| { |
| "best_metric": 2.7920210361480713, |
| "best_model_checkpoint": "model_v1_complete_training_wt_init_48_mini/checkpoint-1830000", |
| "epoch": 15.088621802749513, |
| "global_step": 1841687, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 10.2892, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 8.8433, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 1.5e-06, |
| "loss": 8.1625, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 7.85, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 2.5e-06, |
| "loss": 7.6776, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 3e-06, |
| "loss": 7.5065, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 3.5e-06, |
| "loss": 7.3363, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 7.2137, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.5e-06, |
| "loss": 7.1039, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 5e-06, |
| "loss": 7.0197, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 5.500000000000001e-06, |
| "loss": 6.9294, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 6e-06, |
| "loss": 6.854, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 6.5000000000000004e-06, |
| "loss": 6.7942, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 7e-06, |
| "loss": 6.7328, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 6.685, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 6.6331, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 8.5e-06, |
| "loss": 6.5921, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 9e-06, |
| "loss": 6.5616, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 9.5e-06, |
| "loss": 6.5268, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1e-05, |
| "loss": 6.5012, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 9.998356047280082e-06, |
| "loss": 6.4694, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 9.996712094560161e-06, |
| "loss": 6.4482, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 9.995068141840242e-06, |
| "loss": 6.4289, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 9.993424189120321e-06, |
| "loss": 6.4084, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 9.991780236400402e-06, |
| "loss": 6.3823, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 9.990136283680483e-06, |
| "loss": 6.3696, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 9.988492330960563e-06, |
| "loss": 6.359, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 9.986848378240642e-06, |
| "loss": 6.3447, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 9.985204425520723e-06, |
| "loss": 6.327, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 9.983560472800802e-06, |
| "loss": 6.3197, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 9.981916520080883e-06, |
| "loss": 6.3012, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 9.980272567360964e-06, |
| "loss": 6.2978, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 9.978628614641043e-06, |
| "loss": 6.2945, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 9.976984661921124e-06, |
| "loss": 6.2789, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 9.975340709201205e-06, |
| "loss": 6.2788, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 9.973696756481285e-06, |
| "loss": 6.2633, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 9.972052803761366e-06, |
| "loss": 6.2471, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 9.970408851041445e-06, |
| "loss": 6.1923, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 9.968764898321524e-06, |
| "loss": 6.1695, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 9.967120945601605e-06, |
| "loss": 6.1645, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 9.965476992881686e-06, |
| "loss": 6.1372, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 9.963833040161765e-06, |
| "loss": 6.1184, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 9.962189087441846e-06, |
| "loss": 6.1077, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 9.960545134721926e-06, |
| "loss": 6.0858, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 9.958901182002007e-06, |
| "loss": 6.0807, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 9.957257229282087e-06, |
| "loss": 6.0578, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 9.955613276562167e-06, |
| "loss": 6.0571, |
| "step": 23500 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 9.953969323842248e-06, |
| "loss": 6.0397, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 9.952325371122327e-06, |
| "loss": 6.0305, |
| "step": 24500 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 9.950681418402408e-06, |
| "loss": 6.0192, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 9.949037465682487e-06, |
| "loss": 6.003, |
| "step": 25500 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 9.947393512962568e-06, |
| "loss": 5.9987, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 9.945749560242648e-06, |
| "loss": 5.9946, |
| "step": 26500 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 9.944105607522729e-06, |
| "loss": 5.9855, |
| "step": 27000 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 9.942461654802808e-06, |
| "loss": 5.9723, |
| "step": 27500 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 9.940817702082889e-06, |
| "loss": 5.9701, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 9.93917374936297e-06, |
| "loss": 5.9636, |
| "step": 28500 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 9.937529796643049e-06, |
| "loss": 5.9503, |
| "step": 29000 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 9.93588584392313e-06, |
| "loss": 5.9404, |
| "step": 29500 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 9.934241891203211e-06, |
| "loss": 5.9411, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.25, |
| "eval_accuracy": 0.1518340698223809, |
| "eval_loss": 5.883250713348389, |
| "eval_runtime": 404.4584, |
| "eval_samples_per_second": 762.392, |
| "eval_steps_per_second": 15.885, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 9.93259793848329e-06, |
| "loss": 5.9333, |
| "step": 30500 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 9.930953985763371e-06, |
| "loss": 5.9235, |
| "step": 31000 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 9.92931003304345e-06, |
| "loss": 5.9169, |
| "step": 31500 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 9.92766608032353e-06, |
| "loss": 5.9136, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 9.92602212760361e-06, |
| "loss": 5.9025, |
| "step": 32500 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 9.924378174883692e-06, |
| "loss": 5.9008, |
| "step": 33000 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 9.922734222163771e-06, |
| "loss": 5.8908, |
| "step": 33500 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 9.921090269443852e-06, |
| "loss": 5.8871, |
| "step": 34000 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 9.919446316723931e-06, |
| "loss": 5.8785, |
| "step": 34500 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 9.917802364004012e-06, |
| "loss": 5.877, |
| "step": 35000 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 9.916158411284093e-06, |
| "loss": 5.8796, |
| "step": 35500 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 9.914514458564172e-06, |
| "loss": 5.866, |
| "step": 36000 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 9.912870505844253e-06, |
| "loss": 5.8613, |
| "step": 36500 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 9.911226553124333e-06, |
| "loss": 5.8509, |
| "step": 37000 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 9.909582600404412e-06, |
| "loss": 5.8453, |
| "step": 37500 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 9.907938647684493e-06, |
| "loss": 5.843, |
| "step": 38000 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 9.906294694964574e-06, |
| "loss": 5.8326, |
| "step": 38500 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 9.904650742244653e-06, |
| "loss": 5.8313, |
| "step": 39000 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 9.903006789524734e-06, |
| "loss": 5.8303, |
| "step": 39500 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 9.901362836804813e-06, |
| "loss": 5.825, |
| "step": 40000 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 9.899718884084894e-06, |
| "loss": 5.809, |
| "step": 40500 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 9.898074931364975e-06, |
| "loss": 5.8117, |
| "step": 41000 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 9.896430978645055e-06, |
| "loss": 5.8035, |
| "step": 41500 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 9.894787025925136e-06, |
| "loss": 5.7973, |
| "step": 42000 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 9.893143073205217e-06, |
| "loss": 5.7966, |
| "step": 42500 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 9.891499120485296e-06, |
| "loss": 5.789, |
| "step": 43000 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 9.889855167765375e-06, |
| "loss": 5.7827, |
| "step": 43500 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 9.888211215045456e-06, |
| "loss": 5.7801, |
| "step": 44000 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 9.886567262325535e-06, |
| "loss": 5.7761, |
| "step": 44500 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 9.884923309605616e-06, |
| "loss": 5.7702, |
| "step": 45000 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 9.883279356885697e-06, |
| "loss": 5.7638, |
| "step": 45500 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 9.881635404165777e-06, |
| "loss": 5.7549, |
| "step": 46000 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 9.879991451445858e-06, |
| "loss": 5.7496, |
| "step": 46500 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 9.878347498725937e-06, |
| "loss": 5.7451, |
| "step": 47000 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 9.876703546006018e-06, |
| "loss": 5.7447, |
| "step": 47500 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 9.875059593286099e-06, |
| "loss": 5.7451, |
| "step": 48000 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 9.873415640566178e-06, |
| "loss": 5.7393, |
| "step": 48500 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 9.87177168784626e-06, |
| "loss": 5.7319, |
| "step": 49000 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 9.870127735126338e-06, |
| "loss": 5.73, |
| "step": 49500 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 9.868483782406418e-06, |
| "loss": 5.7289, |
| "step": 50000 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 9.866839829686499e-06, |
| "loss": 5.7158, |
| "step": 50500 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 9.86519587696658e-06, |
| "loss": 5.713, |
| "step": 51000 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 9.863551924246659e-06, |
| "loss": 5.7082, |
| "step": 51500 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 9.86190797152674e-06, |
| "loss": 5.704, |
| "step": 52000 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 9.86026401880682e-06, |
| "loss": 5.7021, |
| "step": 52500 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 9.8586200660869e-06, |
| "loss": 5.7041, |
| "step": 53000 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 9.856976113366981e-06, |
| "loss": 5.6927, |
| "step": 53500 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 9.85533216064706e-06, |
| "loss": 5.6896, |
| "step": 54000 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 9.853688207927141e-06, |
| "loss": 5.6803, |
| "step": 54500 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 9.85204425520722e-06, |
| "loss": 5.6817, |
| "step": 55000 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 9.8504003024873e-06, |
| "loss": 5.6833, |
| "step": 55500 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 9.848756349767381e-06, |
| "loss": 5.6681, |
| "step": 56000 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 9.847112397047462e-06, |
| "loss": 5.6702, |
| "step": 56500 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 9.845468444327541e-06, |
| "loss": 5.6569, |
| "step": 57000 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 9.843824491607622e-06, |
| "loss": 5.6653, |
| "step": 57500 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 9.842180538887703e-06, |
| "loss": 5.6525, |
| "step": 58000 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 9.840536586167782e-06, |
| "loss": 5.6518, |
| "step": 58500 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 9.838892633447863e-06, |
| "loss": 5.6497, |
| "step": 59000 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 9.837248680727943e-06, |
| "loss": 5.6454, |
| "step": 59500 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 9.835604728008024e-06, |
| "loss": 5.6408, |
| "step": 60000 |
| }, |
| { |
| "epoch": 0.49, |
| "eval_accuracy": 0.19079809901308453, |
| "eval_loss": 5.526524543762207, |
| "eval_runtime": 411.2166, |
| "eval_samples_per_second": 749.863, |
| "eval_steps_per_second": 15.624, |
| "step": 60000 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 9.833960775288105e-06, |
| "loss": 5.6208, |
| "step": 60500 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 9.832316822568184e-06, |
| "loss": 5.5933, |
| "step": 61000 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 9.830672869848265e-06, |
| "loss": 5.5664, |
| "step": 61500 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 9.829028917128344e-06, |
| "loss": 5.5416, |
| "step": 62000 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 9.827384964408423e-06, |
| "loss": 5.5315, |
| "step": 62500 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.825741011688504e-06, |
| "loss": 5.5227, |
| "step": 63000 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.824097058968585e-06, |
| "loss": 5.4887, |
| "step": 63500 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.822453106248665e-06, |
| "loss": 5.4846, |
| "step": 64000 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.820809153528746e-06, |
| "loss": 5.4683, |
| "step": 64500 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.819165200808825e-06, |
| "loss": 5.4631, |
| "step": 65000 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.817521248088906e-06, |
| "loss": 5.4465, |
| "step": 65500 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.815877295368987e-06, |
| "loss": 5.4323, |
| "step": 66000 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.814233342649066e-06, |
| "loss": 5.415, |
| "step": 66500 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 9.812589389929147e-06, |
| "loss": 5.4046, |
| "step": 67000 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 9.810945437209226e-06, |
| "loss": 5.3932, |
| "step": 67500 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 9.809301484489306e-06, |
| "loss": 5.3858, |
| "step": 68000 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 9.807657531769387e-06, |
| "loss": 5.3667, |
| "step": 68500 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 9.806013579049468e-06, |
| "loss": 5.3536, |
| "step": 69000 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 9.804369626329547e-06, |
| "loss": 5.3413, |
| "step": 69500 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 9.802725673609628e-06, |
| "loss": 5.327, |
| "step": 70000 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 9.801081720889709e-06, |
| "loss": 5.3028, |
| "step": 70500 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 9.799437768169788e-06, |
| "loss": 5.2809, |
| "step": 71000 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 9.797793815449869e-06, |
| "loss": 5.2594, |
| "step": 71500 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 9.796149862729948e-06, |
| "loss": 5.2433, |
| "step": 72000 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 9.79450591001003e-06, |
| "loss": 5.2326, |
| "step": 72500 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 9.79286195729011e-06, |
| "loss": 5.2018, |
| "step": 73000 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 9.79121800457019e-06, |
| "loss": 5.1855, |
| "step": 73500 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 9.789574051850269e-06, |
| "loss": 5.1629, |
| "step": 74000 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 9.78793009913035e-06, |
| "loss": 5.1439, |
| "step": 74500 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 9.786286146410429e-06, |
| "loss": 5.1354, |
| "step": 75000 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 9.78464219369051e-06, |
| "loss": 5.1111, |
| "step": 75500 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 9.782998240970591e-06, |
| "loss": 5.0945, |
| "step": 76000 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 9.78135428825067e-06, |
| "loss": 5.0785, |
| "step": 76500 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 9.779710335530751e-06, |
| "loss": 5.0583, |
| "step": 77000 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 9.77806638281083e-06, |
| "loss": 5.0422, |
| "step": 77500 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 9.776422430090912e-06, |
| "loss": 5.0259, |
| "step": 78000 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 9.774778477370993e-06, |
| "loss": 5.0104, |
| "step": 78500 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 9.773134524651072e-06, |
| "loss": 4.9894, |
| "step": 79000 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 9.771490571931153e-06, |
| "loss": 4.9678, |
| "step": 79500 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 9.769846619211232e-06, |
| "loss": 4.9547, |
| "step": 80000 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 9.768202666491311e-06, |
| "loss": 4.9365, |
| "step": 80500 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 9.766558713771392e-06, |
| "loss": 4.9222, |
| "step": 81000 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 9.764914761051473e-06, |
| "loss": 4.9002, |
| "step": 81500 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 9.763270808331553e-06, |
| "loss": 4.8928, |
| "step": 82000 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 9.761626855611634e-06, |
| "loss": 4.866, |
| "step": 82500 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 9.759982902891713e-06, |
| "loss": 4.8529, |
| "step": 83000 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 9.758338950171794e-06, |
| "loss": 4.841, |
| "step": 83500 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 9.756694997451875e-06, |
| "loss": 4.8105, |
| "step": 84000 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 9.755051044731954e-06, |
| "loss": 4.7817, |
| "step": 84500 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 9.753407092012035e-06, |
| "loss": 4.7561, |
| "step": 85000 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 9.751763139292116e-06, |
| "loss": 4.7291, |
| "step": 85500 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 9.750119186572195e-06, |
| "loss": 4.6946, |
| "step": 86000 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 9.748475233852275e-06, |
| "loss": 4.6735, |
| "step": 86500 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 9.746831281132356e-06, |
| "loss": 4.6509, |
| "step": 87000 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 9.745187328412435e-06, |
| "loss": 4.6292, |
| "step": 87500 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 9.743543375692516e-06, |
| "loss": 4.6109, |
| "step": 88000 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 9.741899422972597e-06, |
| "loss": 4.5882, |
| "step": 88500 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 9.740255470252676e-06, |
| "loss": 4.573, |
| "step": 89000 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 9.738611517532757e-06, |
| "loss": 4.5576, |
| "step": 89500 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 9.736967564812836e-06, |
| "loss": 4.5385, |
| "step": 90000 |
| }, |
| { |
| "epoch": 0.74, |
| "eval_accuracy": 0.31378937363065973, |
| "eval_loss": 4.313334941864014, |
| "eval_runtime": 406.1908, |
| "eval_samples_per_second": 759.141, |
| "eval_steps_per_second": 15.818, |
| "step": 90000 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 9.735323612092917e-06, |
| "loss": 4.5261, |
| "step": 90500 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 9.733679659372998e-06, |
| "loss": 4.5152, |
| "step": 91000 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 9.732035706653078e-06, |
| "loss": 4.5018, |
| "step": 91500 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 9.730391753933157e-06, |
| "loss": 4.4877, |
| "step": 92000 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 9.728747801213238e-06, |
| "loss": 4.4749, |
| "step": 92500 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 9.727103848493317e-06, |
| "loss": 4.4638, |
| "step": 93000 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 9.725459895773398e-06, |
| "loss": 4.4451, |
| "step": 93500 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 9.723815943053479e-06, |
| "loss": 4.437, |
| "step": 94000 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 9.722171990333558e-06, |
| "loss": 4.433, |
| "step": 94500 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 9.72052803761364e-06, |
| "loss": 4.4104, |
| "step": 95000 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 9.718884084893719e-06, |
| "loss": 4.4126, |
| "step": 95500 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 9.7172401321738e-06, |
| "loss": 4.4033, |
| "step": 96000 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 9.71559617945388e-06, |
| "loss": 4.3898, |
| "step": 96500 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 9.71395222673396e-06, |
| "loss": 4.3815, |
| "step": 97000 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 9.71230827401404e-06, |
| "loss": 4.3669, |
| "step": 97500 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 9.71066432129412e-06, |
| "loss": 4.359, |
| "step": 98000 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 9.709020368574201e-06, |
| "loss": 4.3495, |
| "step": 98500 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 9.70737641585428e-06, |
| "loss": 4.3459, |
| "step": 99000 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 9.705732463134361e-06, |
| "loss": 4.3349, |
| "step": 99500 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 9.70408851041444e-06, |
| "loss": 4.3202, |
| "step": 100000 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 9.702444557694522e-06, |
| "loss": 4.32, |
| "step": 100500 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 9.700800604974602e-06, |
| "loss": 4.3064, |
| "step": 101000 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 9.699156652254682e-06, |
| "loss": 4.3046, |
| "step": 101500 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 9.697512699534763e-06, |
| "loss": 4.2962, |
| "step": 102000 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 9.695868746814842e-06, |
| "loss": 4.2827, |
| "step": 102500 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 9.694224794094923e-06, |
| "loss": 4.2798, |
| "step": 103000 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 9.692580841375004e-06, |
| "loss": 4.2678, |
| "step": 103500 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 9.690936888655083e-06, |
| "loss": 4.2663, |
| "step": 104000 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 9.689292935935163e-06, |
| "loss": 4.2641, |
| "step": 104500 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 9.687648983215243e-06, |
| "loss": 4.2555, |
| "step": 105000 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 9.686005030495323e-06, |
| "loss": 4.2571, |
| "step": 105500 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 9.684361077775404e-06, |
| "loss": 4.2354, |
| "step": 106000 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 9.682717125055485e-06, |
| "loss": 4.2328, |
| "step": 106500 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 9.681073172335564e-06, |
| "loss": 4.2232, |
| "step": 107000 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 9.679429219615645e-06, |
| "loss": 4.2218, |
| "step": 107500 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 9.677785266895724e-06, |
| "loss": 4.2115, |
| "step": 108000 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 9.676141314175805e-06, |
| "loss": 4.2085, |
| "step": 108500 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 9.674497361455886e-06, |
| "loss": 4.1947, |
| "step": 109000 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 9.672853408735965e-06, |
| "loss": 4.1982, |
| "step": 109500 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 9.671209456016045e-06, |
| "loss": 4.1942, |
| "step": 110000 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 9.669565503296126e-06, |
| "loss": 4.1869, |
| "step": 110500 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 9.667921550576205e-06, |
| "loss": 4.1827, |
| "step": 111000 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 9.666277597856286e-06, |
| "loss": 4.174, |
| "step": 111500 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 9.664633645136367e-06, |
| "loss": 4.1719, |
| "step": 112000 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 9.662989692416446e-06, |
| "loss": 4.1692, |
| "step": 112500 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 9.661345739696527e-06, |
| "loss": 4.1603, |
| "step": 113000 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 9.659701786976608e-06, |
| "loss": 4.1583, |
| "step": 113500 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 9.658057834256687e-06, |
| "loss": 4.1563, |
| "step": 114000 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 9.656413881536768e-06, |
| "loss": 4.1527, |
| "step": 114500 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 9.654769928816848e-06, |
| "loss": 4.1418, |
| "step": 115000 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 9.653125976096929e-06, |
| "loss": 4.1333, |
| "step": 115500 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 9.65148202337701e-06, |
| "loss": 4.1356, |
| "step": 116000 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 9.649838070657089e-06, |
| "loss": 4.1236, |
| "step": 116500 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 9.648194117937168e-06, |
| "loss": 4.126, |
| "step": 117000 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 9.64655016521725e-06, |
| "loss": 4.1232, |
| "step": 117500 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 9.644906212497328e-06, |
| "loss": 4.1148, |
| "step": 118000 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 9.64326225977741e-06, |
| "loss": 4.1131, |
| "step": 118500 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 9.64161830705749e-06, |
| "loss": 4.1212, |
| "step": 119000 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 9.63997435433757e-06, |
| "loss": 4.1011, |
| "step": 119500 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 9.63833040161765e-06, |
| "loss": 4.1015, |
| "step": 120000 |
| }, |
| { |
| "epoch": 0.98, |
| "eval_accuracy": 0.35834025167851113, |
| "eval_loss": 3.899629831314087, |
| "eval_runtime": 406.9878, |
| "eval_samples_per_second": 757.654, |
| "eval_steps_per_second": 15.787, |
| "step": 120000 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 9.63668644889773e-06, |
| "loss": 4.0954, |
| "step": 120500 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 9.635042496177811e-06, |
| "loss": 4.0893, |
| "step": 121000 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 9.633398543457892e-06, |
| "loss": 4.0861, |
| "step": 121500 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 9.631754590737971e-06, |
| "loss": 4.0811, |
| "step": 122000 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 9.63011063801805e-06, |
| "loss": 4.0771, |
| "step": 122500 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 9.628466685298131e-06, |
| "loss": 4.0736, |
| "step": 123000 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 9.62682273257821e-06, |
| "loss": 4.0716, |
| "step": 123500 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 9.625178779858292e-06, |
| "loss": 4.064, |
| "step": 124000 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 9.623534827138373e-06, |
| "loss": 4.0587, |
| "step": 124500 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 9.621890874418452e-06, |
| "loss": 4.056, |
| "step": 125000 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 9.620246921698533e-06, |
| "loss": 4.0621, |
| "step": 125500 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 9.618602968978614e-06, |
| "loss": 4.0473, |
| "step": 126000 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 9.616959016258693e-06, |
| "loss": 4.0481, |
| "step": 126500 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 9.615315063538774e-06, |
| "loss": 4.0496, |
| "step": 127000 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 9.613671110818853e-06, |
| "loss": 4.041, |
| "step": 127500 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 9.612027158098934e-06, |
| "loss": 4.0395, |
| "step": 128000 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 9.610383205379014e-06, |
| "loss": 4.0357, |
| "step": 128500 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 9.608739252659095e-06, |
| "loss": 4.0339, |
| "step": 129000 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 9.607095299939174e-06, |
| "loss": 4.0269, |
| "step": 129500 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 9.605451347219255e-06, |
| "loss": 4.0204, |
| "step": 130000 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 9.603807394499334e-06, |
| "loss": 4.0235, |
| "step": 130500 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 9.602163441779415e-06, |
| "loss": 4.0276, |
| "step": 131000 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 9.600519489059496e-06, |
| "loss": 4.0172, |
| "step": 131500 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 9.598875536339575e-06, |
| "loss": 4.0097, |
| "step": 132000 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 9.597231583619656e-06, |
| "loss": 4.0083, |
| "step": 132500 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 9.595587630899736e-06, |
| "loss": 4.0063, |
| "step": 133000 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 9.593943678179817e-06, |
| "loss": 4.0038, |
| "step": 133500 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 9.592299725459898e-06, |
| "loss": 3.995, |
| "step": 134000 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 9.590655772739977e-06, |
| "loss": 3.9927, |
| "step": 134500 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 9.589011820020056e-06, |
| "loss": 3.9941, |
| "step": 135000 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 9.587367867300137e-06, |
| "loss": 3.997, |
| "step": 135500 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 9.585723914580216e-06, |
| "loss": 3.986, |
| "step": 136000 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 9.584079961860297e-06, |
| "loss": 3.9807, |
| "step": 136500 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 9.582436009140378e-06, |
| "loss": 3.9846, |
| "step": 137000 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 9.580792056420458e-06, |
| "loss": 3.9712, |
| "step": 137500 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 9.579148103700539e-06, |
| "loss": 3.9697, |
| "step": 138000 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 9.57750415098062e-06, |
| "loss": 3.9665, |
| "step": 138500 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 9.575860198260699e-06, |
| "loss": 3.9676, |
| "step": 139000 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 9.57421624554078e-06, |
| "loss": 3.9659, |
| "step": 139500 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 9.572572292820859e-06, |
| "loss": 3.9657, |
| "step": 140000 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 9.570928340100938e-06, |
| "loss": 3.9616, |
| "step": 140500 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 9.56928438738102e-06, |
| "loss": 3.95, |
| "step": 141000 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 9.5676404346611e-06, |
| "loss": 3.9508, |
| "step": 141500 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 9.56599648194118e-06, |
| "loss": 3.9479, |
| "step": 142000 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 9.56435252922126e-06, |
| "loss": 3.9495, |
| "step": 142500 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 9.56270857650134e-06, |
| "loss": 3.9443, |
| "step": 143000 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 9.561064623781421e-06, |
| "loss": 3.9392, |
| "step": 143500 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 9.559420671061502e-06, |
| "loss": 3.943, |
| "step": 144000 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 9.557776718341581e-06, |
| "loss": 3.9397, |
| "step": 144500 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 9.556132765621662e-06, |
| "loss": 3.9338, |
| "step": 145000 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 9.554488812901741e-06, |
| "loss": 3.9284, |
| "step": 145500 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 9.552844860181822e-06, |
| "loss": 3.9276, |
| "step": 146000 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 9.551200907461902e-06, |
| "loss": 3.9332, |
| "step": 146500 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 9.549556954741983e-06, |
| "loss": 3.9263, |
| "step": 147000 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 9.547913002022062e-06, |
| "loss": 3.9198, |
| "step": 147500 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 9.546269049302143e-06, |
| "loss": 3.9173, |
| "step": 148000 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 9.544625096582222e-06, |
| "loss": 3.919, |
| "step": 148500 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 9.542981143862303e-06, |
| "loss": 3.917, |
| "step": 149000 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 9.541337191142384e-06, |
| "loss": 3.9189, |
| "step": 149500 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 9.539693238422463e-06, |
| "loss": 3.9119, |
| "step": 150000 |
| }, |
| { |
| "epoch": 1.23, |
| "eval_accuracy": 0.3782837597418556, |
| "eval_loss": 3.7199175357818604, |
| "eval_runtime": 410.4253, |
| "eval_samples_per_second": 751.308, |
| "eval_steps_per_second": 15.654, |
| "step": 150000 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 9.538049285702544e-06, |
| "loss": 3.9076, |
| "step": 150500 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 9.536405332982625e-06, |
| "loss": 3.8992, |
| "step": 151000 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 9.534761380262705e-06, |
| "loss": 3.8987, |
| "step": 151500 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 9.533117427542786e-06, |
| "loss": 3.8942, |
| "step": 152000 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 9.531473474822865e-06, |
| "loss": 3.9044, |
| "step": 152500 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 9.529829522102944e-06, |
| "loss": 3.8889, |
| "step": 153000 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 9.528185569383025e-06, |
| "loss": 3.8895, |
| "step": 153500 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 9.526541616663106e-06, |
| "loss": 3.8905, |
| "step": 154000 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 9.524897663943185e-06, |
| "loss": 3.8842, |
| "step": 154500 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 9.523253711223266e-06, |
| "loss": 3.8843, |
| "step": 155000 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 9.521609758503346e-06, |
| "loss": 3.8713, |
| "step": 155500 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 9.519965805783427e-06, |
| "loss": 3.8828, |
| "step": 156000 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 9.518321853063508e-06, |
| "loss": 3.8808, |
| "step": 156500 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 9.516677900343587e-06, |
| "loss": 3.8747, |
| "step": 157000 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 9.515033947623668e-06, |
| "loss": 3.8737, |
| "step": 157500 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 9.513389994903747e-06, |
| "loss": 3.8709, |
| "step": 158000 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 9.511746042183826e-06, |
| "loss": 3.8574, |
| "step": 158500 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 9.510102089463907e-06, |
| "loss": 3.8695, |
| "step": 159000 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 9.508458136743988e-06, |
| "loss": 3.8644, |
| "step": 159500 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 9.506814184024068e-06, |
| "loss": 3.8627, |
| "step": 160000 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 9.505170231304149e-06, |
| "loss": 3.8545, |
| "step": 160500 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 9.503526278584228e-06, |
| "loss": 3.852, |
| "step": 161000 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 9.501882325864309e-06, |
| "loss": 3.8451, |
| "step": 161500 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 9.50023837314439e-06, |
| "loss": 3.8514, |
| "step": 162000 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 9.498594420424469e-06, |
| "loss": 3.8516, |
| "step": 162500 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 9.49695046770455e-06, |
| "loss": 3.8459, |
| "step": 163000 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 9.495306514984631e-06, |
| "loss": 3.8456, |
| "step": 163500 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 9.49366256226471e-06, |
| "loss": 3.8393, |
| "step": 164000 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 9.492018609544791e-06, |
| "loss": 3.8407, |
| "step": 164500 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 9.49037465682487e-06, |
| "loss": 3.8337, |
| "step": 165000 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 9.48873070410495e-06, |
| "loss": 3.8352, |
| "step": 165500 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 9.48708675138503e-06, |
| "loss": 3.8305, |
| "step": 166000 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 9.485442798665112e-06, |
| "loss": 3.8327, |
| "step": 166500 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 9.483798845945191e-06, |
| "loss": 3.8242, |
| "step": 167000 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 9.482154893225272e-06, |
| "loss": 3.8248, |
| "step": 167500 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 9.480510940505351e-06, |
| "loss": 3.8208, |
| "step": 168000 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 9.478866987785432e-06, |
| "loss": 3.8195, |
| "step": 168500 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 9.477223035065513e-06, |
| "loss": 3.8262, |
| "step": 169000 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 9.475579082345593e-06, |
| "loss": 3.8205, |
| "step": 169500 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 9.473935129625673e-06, |
| "loss": 3.823, |
| "step": 170000 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 9.472291176905753e-06, |
| "loss": 3.8127, |
| "step": 170500 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 9.470647224185832e-06, |
| "loss": 3.813, |
| "step": 171000 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 9.469003271465913e-06, |
| "loss": 3.8126, |
| "step": 171500 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 9.467359318745994e-06, |
| "loss": 3.808, |
| "step": 172000 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 9.465715366026073e-06, |
| "loss": 3.8051, |
| "step": 172500 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 9.464071413306154e-06, |
| "loss": 3.8038, |
| "step": 173000 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 9.462427460586234e-06, |
| "loss": 3.7961, |
| "step": 173500 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 9.460783507866314e-06, |
| "loss": 3.7979, |
| "step": 174000 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 9.459139555146395e-06, |
| "loss": 3.795, |
| "step": 174500 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 9.457495602426475e-06, |
| "loss": 3.8004, |
| "step": 175000 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 9.455851649706556e-06, |
| "loss": 3.7958, |
| "step": 175500 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 9.454207696986637e-06, |
| "loss": 3.7909, |
| "step": 176000 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 9.452563744266716e-06, |
| "loss": 3.8, |
| "step": 176500 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 9.450919791546795e-06, |
| "loss": 3.7826, |
| "step": 177000 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 9.449275838826876e-06, |
| "loss": 3.7857, |
| "step": 177500 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 9.447631886106956e-06, |
| "loss": 3.7867, |
| "step": 178000 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 9.445987933387036e-06, |
| "loss": 3.7935, |
| "step": 178500 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 9.444343980667117e-06, |
| "loss": 3.7803, |
| "step": 179000 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 9.442700027947197e-06, |
| "loss": 3.782, |
| "step": 179500 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 9.441056075227278e-06, |
| "loss": 3.7832, |
| "step": 180000 |
| }, |
| { |
| "epoch": 1.47, |
| "eval_accuracy": 0.39196367058721415, |
| "eval_loss": 3.6038873195648193, |
| "eval_runtime": 407.692, |
| "eval_samples_per_second": 756.345, |
| "eval_steps_per_second": 15.759, |
| "step": 180000 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 9.439412122507357e-06, |
| "loss": 3.775, |
| "step": 180500 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 9.437768169787438e-06, |
| "loss": 3.7777, |
| "step": 181000 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 9.436124217067519e-06, |
| "loss": 3.769, |
| "step": 181500 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 9.434480264347598e-06, |
| "loss": 3.7712, |
| "step": 182000 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 9.43283631162768e-06, |
| "loss": 3.7638, |
| "step": 182500 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 9.431192358907758e-06, |
| "loss": 3.7686, |
| "step": 183000 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 9.429548406187838e-06, |
| "loss": 3.7652, |
| "step": 183500 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 9.427904453467919e-06, |
| "loss": 3.7649, |
| "step": 184000 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 9.426260500748e-06, |
| "loss": 3.7608, |
| "step": 184500 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 9.424616548028079e-06, |
| "loss": 3.762, |
| "step": 185000 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 9.42297259530816e-06, |
| "loss": 3.7551, |
| "step": 185500 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 9.42132864258824e-06, |
| "loss": 3.7545, |
| "step": 186000 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 9.41968468986832e-06, |
| "loss": 3.7556, |
| "step": 186500 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 9.418040737148401e-06, |
| "loss": 3.7507, |
| "step": 187000 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 9.41639678442848e-06, |
| "loss": 3.7467, |
| "step": 187500 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 9.414752831708561e-06, |
| "loss": 3.7507, |
| "step": 188000 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 9.413108878988642e-06, |
| "loss": 3.7384, |
| "step": 188500 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 9.41146492626872e-06, |
| "loss": 3.7473, |
| "step": 189000 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 9.409820973548801e-06, |
| "loss": 3.7429, |
| "step": 189500 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 9.408177020828882e-06, |
| "loss": 3.7427, |
| "step": 190000 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 9.406533068108961e-06, |
| "loss": 3.74, |
| "step": 190500 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 9.404889115389042e-06, |
| "loss": 3.7361, |
| "step": 191000 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 9.403245162669123e-06, |
| "loss": 3.7427, |
| "step": 191500 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 9.401601209949202e-06, |
| "loss": 3.7388, |
| "step": 192000 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 9.399957257229283e-06, |
| "loss": 3.7349, |
| "step": 192500 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 9.398313304509363e-06, |
| "loss": 3.7302, |
| "step": 193000 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 9.396669351789444e-06, |
| "loss": 3.7408, |
| "step": 193500 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 9.395025399069525e-06, |
| "loss": 3.7239, |
| "step": 194000 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 9.393381446349604e-06, |
| "loss": 3.7258, |
| "step": 194500 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 9.391737493629683e-06, |
| "loss": 3.7149, |
| "step": 195000 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 9.390093540909764e-06, |
| "loss": 3.7213, |
| "step": 195500 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 9.388449588189843e-06, |
| "loss": 3.7218, |
| "step": 196000 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 9.386805635469924e-06, |
| "loss": 3.7175, |
| "step": 196500 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 9.385161682750005e-06, |
| "loss": 3.7088, |
| "step": 197000 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 9.383517730030085e-06, |
| "loss": 3.7147, |
| "step": 197500 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 9.381873777310166e-06, |
| "loss": 3.7194, |
| "step": 198000 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 9.380229824590245e-06, |
| "loss": 3.7048, |
| "step": 198500 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 9.378585871870326e-06, |
| "loss": 3.7022, |
| "step": 199000 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 9.376941919150407e-06, |
| "loss": 3.6975, |
| "step": 199500 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 9.375297966430486e-06, |
| "loss": 3.6994, |
| "step": 200000 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 9.373654013710567e-06, |
| "loss": 3.6963, |
| "step": 200500 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 9.372010060990646e-06, |
| "loss": 3.6973, |
| "step": 201000 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 9.370366108270726e-06, |
| "loss": 3.6949, |
| "step": 201500 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 9.368722155550807e-06, |
| "loss": 3.6867, |
| "step": 202000 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 9.367078202830888e-06, |
| "loss": 3.6865, |
| "step": 202500 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 9.365434250110967e-06, |
| "loss": 3.6862, |
| "step": 203000 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 9.363790297391048e-06, |
| "loss": 3.682, |
| "step": 203500 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 9.362146344671129e-06, |
| "loss": 3.6823, |
| "step": 204000 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 9.360502391951208e-06, |
| "loss": 3.6839, |
| "step": 204500 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 9.358858439231289e-06, |
| "loss": 3.6768, |
| "step": 205000 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 9.357214486511368e-06, |
| "loss": 3.6763, |
| "step": 205500 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 9.35557053379145e-06, |
| "loss": 3.6763, |
| "step": 206000 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 9.35392658107153e-06, |
| "loss": 3.6788, |
| "step": 206500 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 9.35228262835161e-06, |
| "loss": 3.6731, |
| "step": 207000 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 9.350638675631689e-06, |
| "loss": 3.6703, |
| "step": 207500 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 9.34899472291177e-06, |
| "loss": 3.6735, |
| "step": 208000 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 9.347350770191849e-06, |
| "loss": 3.6659, |
| "step": 208500 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 9.34570681747193e-06, |
| "loss": 3.6618, |
| "step": 209000 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 9.344062864752011e-06, |
| "loss": 3.66, |
| "step": 209500 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 9.34241891203209e-06, |
| "loss": 3.6686, |
| "step": 210000 |
| }, |
| { |
| "epoch": 1.72, |
| "eval_accuracy": 0.4032999985682802, |
| "eval_loss": 3.505682945251465, |
| "eval_runtime": 407.8472, |
| "eval_samples_per_second": 756.058, |
| "eval_steps_per_second": 15.753, |
| "step": 210000 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 9.340774959312171e-06, |
| "loss": 3.6608, |
| "step": 210500 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 9.33913100659225e-06, |
| "loss": 3.6528, |
| "step": 211000 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 9.337487053872332e-06, |
| "loss": 3.6607, |
| "step": 211500 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 9.335843101152413e-06, |
| "loss": 3.6561, |
| "step": 212000 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 9.334199148432492e-06, |
| "loss": 3.6495, |
| "step": 212500 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 9.332555195712573e-06, |
| "loss": 3.6438, |
| "step": 213000 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 9.330911242992652e-06, |
| "loss": 3.6483, |
| "step": 213500 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 9.329267290272731e-06, |
| "loss": 3.6504, |
| "step": 214000 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 9.327623337552812e-06, |
| "loss": 3.6394, |
| "step": 214500 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 9.325979384832893e-06, |
| "loss": 3.6413, |
| "step": 215000 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 9.324335432112973e-06, |
| "loss": 3.6425, |
| "step": 215500 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 9.322691479393054e-06, |
| "loss": 3.6369, |
| "step": 216000 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 9.321047526673135e-06, |
| "loss": 3.6366, |
| "step": 216500 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 9.319403573953214e-06, |
| "loss": 3.637, |
| "step": 217000 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 9.317759621233295e-06, |
| "loss": 3.638, |
| "step": 217500 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 9.316115668513374e-06, |
| "loss": 3.6389, |
| "step": 218000 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 9.314471715793455e-06, |
| "loss": 3.6397, |
| "step": 218500 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 9.312827763073536e-06, |
| "loss": 3.6262, |
| "step": 219000 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 9.311183810353615e-06, |
| "loss": 3.6296, |
| "step": 219500 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 9.309539857633695e-06, |
| "loss": 3.6228, |
| "step": 220000 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 9.307895904913776e-06, |
| "loss": 3.6257, |
| "step": 220500 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 9.306251952193855e-06, |
| "loss": 3.6242, |
| "step": 221000 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 9.304607999473936e-06, |
| "loss": 3.6263, |
| "step": 221500 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 9.302964046754017e-06, |
| "loss": 3.6204, |
| "step": 222000 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 9.301320094034096e-06, |
| "loss": 3.6092, |
| "step": 222500 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 9.299676141314177e-06, |
| "loss": 3.6146, |
| "step": 223000 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 9.298032188594256e-06, |
| "loss": 3.6233, |
| "step": 223500 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 9.296388235874337e-06, |
| "loss": 3.62, |
| "step": 224000 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 9.294744283154418e-06, |
| "loss": 3.6191, |
| "step": 224500 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 9.293100330434498e-06, |
| "loss": 3.616, |
| "step": 225000 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 9.291456377714577e-06, |
| "loss": 3.6147, |
| "step": 225500 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 9.289812424994658e-06, |
| "loss": 3.6147, |
| "step": 226000 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 9.288168472274737e-06, |
| "loss": 3.6144, |
| "step": 226500 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 9.286524519554818e-06, |
| "loss": 3.6047, |
| "step": 227000 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 9.284880566834899e-06, |
| "loss": 3.6106, |
| "step": 227500 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 9.283236614114978e-06, |
| "loss": 3.6059, |
| "step": 228000 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 9.28159266139506e-06, |
| "loss": 3.6122, |
| "step": 228500 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 9.279948708675139e-06, |
| "loss": 3.6052, |
| "step": 229000 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 9.27830475595522e-06, |
| "loss": 3.6047, |
| "step": 229500 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 9.2766608032353e-06, |
| "loss": 3.6021, |
| "step": 230000 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 9.27501685051538e-06, |
| "loss": 3.6023, |
| "step": 230500 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 9.27337289779546e-06, |
| "loss": 3.6019, |
| "step": 231000 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 9.27172894507554e-06, |
| "loss": 3.595, |
| "step": 231500 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 9.270084992355621e-06, |
| "loss": 3.5902, |
| "step": 232000 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 9.2684410396357e-06, |
| "loss": 3.5964, |
| "step": 232500 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 9.266797086915781e-06, |
| "loss": 3.5953, |
| "step": 233000 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 9.26515313419586e-06, |
| "loss": 3.5934, |
| "step": 233500 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 9.263509181475942e-06, |
| "loss": 3.5851, |
| "step": 234000 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 9.261865228756023e-06, |
| "loss": 3.5861, |
| "step": 234500 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 9.260221276036102e-06, |
| "loss": 3.5849, |
| "step": 235000 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 9.258577323316183e-06, |
| "loss": 3.5883, |
| "step": 235500 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 9.256933370596262e-06, |
| "loss": 3.5822, |
| "step": 236000 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 9.255289417876343e-06, |
| "loss": 3.5742, |
| "step": 236500 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 9.253645465156424e-06, |
| "loss": 3.5809, |
| "step": 237000 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 9.252001512436503e-06, |
| "loss": 3.5805, |
| "step": 237500 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 9.250357559716583e-06, |
| "loss": 3.5813, |
| "step": 238000 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 9.248713606996664e-06, |
| "loss": 3.5793, |
| "step": 238500 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 9.247069654276743e-06, |
| "loss": 3.5819, |
| "step": 239000 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 9.245425701556824e-06, |
| "loss": 3.5771, |
| "step": 239500 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 9.243781748836905e-06, |
| "loss": 3.5793, |
| "step": 240000 |
| }, |
| { |
| "epoch": 1.97, |
| "eval_accuracy": 0.4137328604632164, |
| "eval_loss": 3.4226527214050293, |
| "eval_runtime": 405.6879, |
| "eval_samples_per_second": 760.082, |
| "eval_steps_per_second": 15.837, |
| "step": 240000 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 9.242137796116984e-06, |
| "loss": 3.5746, |
| "step": 240500 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 9.240493843397065e-06, |
| "loss": 3.571, |
| "step": 241000 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 9.238849890677144e-06, |
| "loss": 3.5759, |
| "step": 241500 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 9.237205937957225e-06, |
| "loss": 3.5681, |
| "step": 242000 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 9.235561985237306e-06, |
| "loss": 3.5718, |
| "step": 242500 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 9.233918032517386e-06, |
| "loss": 3.5656, |
| "step": 243000 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 9.232274079797465e-06, |
| "loss": 3.5635, |
| "step": 243500 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 9.230630127077546e-06, |
| "loss": 3.5619, |
| "step": 244000 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 9.228986174357627e-06, |
| "loss": 3.5596, |
| "step": 244500 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 9.227342221637706e-06, |
| "loss": 3.5599, |
| "step": 245000 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 9.225698268917787e-06, |
| "loss": 3.57, |
| "step": 245500 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 9.224054316197866e-06, |
| "loss": 3.5608, |
| "step": 246000 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 9.222410363477947e-06, |
| "loss": 3.5601, |
| "step": 246500 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 9.220766410758028e-06, |
| "loss": 3.562, |
| "step": 247000 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 9.219122458038107e-06, |
| "loss": 3.5569, |
| "step": 247500 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 9.217478505318188e-06, |
| "loss": 3.5563, |
| "step": 248000 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 9.215834552598268e-06, |
| "loss": 3.5634, |
| "step": 248500 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 9.214190599878349e-06, |
| "loss": 3.55, |
| "step": 249000 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 9.212546647158428e-06, |
| "loss": 3.5459, |
| "step": 249500 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 9.210902694438509e-06, |
| "loss": 3.5478, |
| "step": 250000 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 9.209258741718588e-06, |
| "loss": 3.5526, |
| "step": 250500 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 9.20761478899867e-06, |
| "loss": 3.5476, |
| "step": 251000 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 9.205970836278748e-06, |
| "loss": 3.5567, |
| "step": 251500 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 9.20432688355883e-06, |
| "loss": 3.5423, |
| "step": 252000 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 9.20268293083891e-06, |
| "loss": 3.5487, |
| "step": 252500 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 9.20103897811899e-06, |
| "loss": 3.5401, |
| "step": 253000 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 9.19939502539907e-06, |
| "loss": 3.5503, |
| "step": 253500 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 9.19775107267915e-06, |
| "loss": 3.5503, |
| "step": 254000 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 9.196107119959231e-06, |
| "loss": 3.5316, |
| "step": 254500 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 9.194463167239312e-06, |
| "loss": 3.541, |
| "step": 255000 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 9.192819214519391e-06, |
| "loss": 3.5368, |
| "step": 255500 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 9.19117526179947e-06, |
| "loss": 3.5351, |
| "step": 256000 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 9.189531309079551e-06, |
| "loss": 3.537, |
| "step": 256500 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 9.18788735635963e-06, |
| "loss": 3.536, |
| "step": 257000 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 9.186243403639712e-06, |
| "loss": 3.5331, |
| "step": 257500 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 9.184599450919793e-06, |
| "loss": 3.5408, |
| "step": 258000 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 9.182955498199872e-06, |
| "loss": 3.5391, |
| "step": 258500 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 9.181311545479953e-06, |
| "loss": 3.5339, |
| "step": 259000 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 9.179667592760034e-06, |
| "loss": 3.535, |
| "step": 259500 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 9.178023640040113e-06, |
| "loss": 3.5261, |
| "step": 260000 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 9.176379687320194e-06, |
| "loss": 3.5266, |
| "step": 260500 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 9.174735734600273e-06, |
| "loss": 3.5283, |
| "step": 261000 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 9.173091781880353e-06, |
| "loss": 3.5348, |
| "step": 261500 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 9.171447829160434e-06, |
| "loss": 3.5231, |
| "step": 262000 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 9.169803876440515e-06, |
| "loss": 3.5138, |
| "step": 262500 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 9.168159923720594e-06, |
| "loss": 3.5306, |
| "step": 263000 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 9.166515971000675e-06, |
| "loss": 3.5224, |
| "step": 263500 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 9.164872018280754e-06, |
| "loss": 3.5279, |
| "step": 264000 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 9.163228065560835e-06, |
| "loss": 3.5207, |
| "step": 264500 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 9.161584112840916e-06, |
| "loss": 3.5213, |
| "step": 265000 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 9.159940160120995e-06, |
| "loss": 3.5152, |
| "step": 265500 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 9.158296207401076e-06, |
| "loss": 3.5148, |
| "step": 266000 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 9.156652254681156e-06, |
| "loss": 3.5185, |
| "step": 266500 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 9.155008301961237e-06, |
| "loss": 3.5155, |
| "step": 267000 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 9.153364349241318e-06, |
| "loss": 3.5192, |
| "step": 267500 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 9.151720396521397e-06, |
| "loss": 3.5185, |
| "step": 268000 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 9.150076443801476e-06, |
| "loss": 3.518, |
| "step": 268500 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 9.148432491081557e-06, |
| "loss": 3.5159, |
| "step": 269000 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 9.146788538361636e-06, |
| "loss": 3.5211, |
| "step": 269500 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 9.145144585641717e-06, |
| "loss": 3.5128, |
| "step": 270000 |
| }, |
| { |
| "epoch": 2.21, |
| "eval_accuracy": 0.42085813479275763, |
| "eval_loss": 3.364504814147949, |
| "eval_runtime": 411.9164, |
| "eval_samples_per_second": 748.589, |
| "eval_steps_per_second": 15.598, |
| "step": 270000 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 9.143500632921798e-06, |
| "loss": 3.5162, |
| "step": 270500 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 9.141856680201878e-06, |
| "loss": 3.5144, |
| "step": 271000 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 9.140212727481959e-06, |
| "loss": 3.5095, |
| "step": 271500 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 9.13856877476204e-06, |
| "loss": 3.502, |
| "step": 272000 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 9.136924822042119e-06, |
| "loss": 3.503, |
| "step": 272500 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 9.1352808693222e-06, |
| "loss": 3.5011, |
| "step": 273000 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 9.133636916602279e-06, |
| "loss": 3.505, |
| "step": 273500 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 9.131992963882358e-06, |
| "loss": 3.5071, |
| "step": 274000 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 9.13034901116244e-06, |
| "loss": 3.5018, |
| "step": 274500 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 9.12870505844252e-06, |
| "loss": 3.5058, |
| "step": 275000 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 9.1270611057226e-06, |
| "loss": 3.5056, |
| "step": 275500 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 9.12541715300268e-06, |
| "loss": 3.5085, |
| "step": 276000 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 9.12377320028276e-06, |
| "loss": 3.5111, |
| "step": 276500 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 9.122129247562841e-06, |
| "loss": 3.4963, |
| "step": 277000 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 9.120485294842922e-06, |
| "loss": 3.5001, |
| "step": 277500 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 9.118841342123001e-06, |
| "loss": 3.5004, |
| "step": 278000 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 9.117197389403082e-06, |
| "loss": 3.5027, |
| "step": 278500 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 9.115553436683161e-06, |
| "loss": 3.4984, |
| "step": 279000 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 9.113909483963242e-06, |
| "loss": 3.5006, |
| "step": 279500 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 9.112265531243322e-06, |
| "loss": 3.4952, |
| "step": 280000 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 9.110621578523403e-06, |
| "loss": 3.4953, |
| "step": 280500 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 9.108977625803482e-06, |
| "loss": 3.4951, |
| "step": 281000 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 9.107333673083563e-06, |
| "loss": 3.4984, |
| "step": 281500 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 9.105689720363642e-06, |
| "loss": 3.4849, |
| "step": 282000 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 9.104045767643723e-06, |
| "loss": 3.4901, |
| "step": 282500 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 9.102401814923804e-06, |
| "loss": 3.4884, |
| "step": 283000 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 9.100757862203883e-06, |
| "loss": 3.4937, |
| "step": 283500 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 9.099113909483964e-06, |
| "loss": 3.4915, |
| "step": 284000 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 9.097469956764045e-06, |
| "loss": 3.4922, |
| "step": 284500 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 9.095826004044125e-06, |
| "loss": 3.4892, |
| "step": 285000 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 9.094182051324206e-06, |
| "loss": 3.4837, |
| "step": 285500 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 9.092538098604285e-06, |
| "loss": 3.4821, |
| "step": 286000 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 9.090894145884364e-06, |
| "loss": 3.4836, |
| "step": 286500 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 9.089250193164445e-06, |
| "loss": 3.4826, |
| "step": 287000 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 9.087606240444526e-06, |
| "loss": 3.477, |
| "step": 287500 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 9.085962287724605e-06, |
| "loss": 3.485, |
| "step": 288000 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 9.084318335004686e-06, |
| "loss": 3.4778, |
| "step": 288500 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 9.082674382284766e-06, |
| "loss": 3.49, |
| "step": 289000 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 9.081030429564847e-06, |
| "loss": 3.4814, |
| "step": 289500 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 9.079386476844928e-06, |
| "loss": 3.4752, |
| "step": 290000 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 9.077742524125007e-06, |
| "loss": 3.4825, |
| "step": 290500 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 9.076098571405088e-06, |
| "loss": 3.4784, |
| "step": 291000 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 9.074454618685167e-06, |
| "loss": 3.4804, |
| "step": 291500 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 9.072810665965246e-06, |
| "loss": 3.4815, |
| "step": 292000 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 9.071166713245327e-06, |
| "loss": 3.4706, |
| "step": 292500 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 9.069522760525408e-06, |
| "loss": 3.4753, |
| "step": 293000 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 9.067878807805488e-06, |
| "loss": 3.4782, |
| "step": 293500 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 9.066234855085569e-06, |
| "loss": 3.4705, |
| "step": 294000 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 9.064590902365648e-06, |
| "loss": 3.4739, |
| "step": 294500 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 9.062946949645729e-06, |
| "loss": 3.4786, |
| "step": 295000 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 9.06130299692581e-06, |
| "loss": 3.4724, |
| "step": 295500 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 9.059659044205889e-06, |
| "loss": 3.4684, |
| "step": 296000 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 9.05801509148597e-06, |
| "loss": 3.4717, |
| "step": 296500 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 9.056371138766051e-06, |
| "loss": 3.4704, |
| "step": 297000 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 9.05472718604613e-06, |
| "loss": 3.4656, |
| "step": 297500 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 9.05308323332621e-06, |
| "loss": 3.4666, |
| "step": 298000 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 9.05143928060629e-06, |
| "loss": 3.4695, |
| "step": 298500 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 9.04979532788637e-06, |
| "loss": 3.4686, |
| "step": 299000 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 9.04815137516645e-06, |
| "loss": 3.4652, |
| "step": 299500 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 9.046507422446532e-06, |
| "loss": 3.4597, |
| "step": 300000 |
| }, |
| { |
| "epoch": 2.46, |
| "eval_accuracy": 0.42611833634349283, |
| "eval_loss": 3.321903944015503, |
| "eval_runtime": 409.6321, |
| "eval_samples_per_second": 752.763, |
| "eval_steps_per_second": 15.685, |
| "step": 300000 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 9.044863469726611e-06, |
| "loss": 3.4601, |
| "step": 300500 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 9.043219517006692e-06, |
| "loss": 3.4717, |
| "step": 301000 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 9.041575564286771e-06, |
| "loss": 3.463, |
| "step": 301500 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 9.039931611566852e-06, |
| "loss": 3.463, |
| "step": 302000 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 9.038287658846933e-06, |
| "loss": 3.4575, |
| "step": 302500 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 9.036643706127013e-06, |
| "loss": 3.4648, |
| "step": 303000 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 9.034999753407094e-06, |
| "loss": 3.4625, |
| "step": 303500 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 9.033355800687173e-06, |
| "loss": 3.4562, |
| "step": 304000 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 9.031711847967252e-06, |
| "loss": 3.4587, |
| "step": 304500 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 9.030067895247333e-06, |
| "loss": 3.4486, |
| "step": 305000 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 9.028423942527414e-06, |
| "loss": 3.4609, |
| "step": 305500 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 9.026779989807493e-06, |
| "loss": 3.4502, |
| "step": 306000 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 9.025136037087574e-06, |
| "loss": 3.4515, |
| "step": 306500 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 9.023492084367654e-06, |
| "loss": 3.4536, |
| "step": 307000 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 9.021848131647735e-06, |
| "loss": 3.4527, |
| "step": 307500 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 9.020204178927815e-06, |
| "loss": 3.4506, |
| "step": 308000 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 9.018560226207895e-06, |
| "loss": 3.4553, |
| "step": 308500 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 9.016916273487976e-06, |
| "loss": 3.457, |
| "step": 309000 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 9.015272320768057e-06, |
| "loss": 3.4517, |
| "step": 309500 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 9.013628368048134e-06, |
| "loss": 3.4471, |
| "step": 310000 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 9.011984415328215e-06, |
| "loss": 3.4469, |
| "step": 310500 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 9.010340462608296e-06, |
| "loss": 3.4488, |
| "step": 311000 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 9.008696509888376e-06, |
| "loss": 3.4416, |
| "step": 311500 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 9.007052557168457e-06, |
| "loss": 3.4526, |
| "step": 312000 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 9.005408604448537e-06, |
| "loss": 3.4508, |
| "step": 312500 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 9.003764651728617e-06, |
| "loss": 3.4583, |
| "step": 313000 |
| }, |
| { |
| "epoch": 2.57, |
| "learning_rate": 9.002120699008698e-06, |
| "loss": 3.4571, |
| "step": 313500 |
| }, |
| { |
| "epoch": 2.57, |
| "learning_rate": 9.000476746288777e-06, |
| "loss": 3.454, |
| "step": 314000 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 8.998832793568858e-06, |
| "loss": 3.4457, |
| "step": 314500 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 8.997188840848939e-06, |
| "loss": 3.4385, |
| "step": 315000 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 8.995544888129018e-06, |
| "loss": 3.4465, |
| "step": 315500 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 8.9939009354091e-06, |
| "loss": 3.4435, |
| "step": 316000 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 8.992256982689178e-06, |
| "loss": 3.4489, |
| "step": 316500 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 8.990613029969258e-06, |
| "loss": 3.438, |
| "step": 317000 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 8.988969077249339e-06, |
| "loss": 3.4412, |
| "step": 317500 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 8.98732512452942e-06, |
| "loss": 3.4394, |
| "step": 318000 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 8.985681171809499e-06, |
| "loss": 3.4455, |
| "step": 318500 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 8.98403721908958e-06, |
| "loss": 3.4398, |
| "step": 319000 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 8.98239326636966e-06, |
| "loss": 3.4448, |
| "step": 319500 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 8.98074931364974e-06, |
| "loss": 3.4345, |
| "step": 320000 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 8.979105360929821e-06, |
| "loss": 3.4359, |
| "step": 320500 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 8.9774614082099e-06, |
| "loss": 3.4498, |
| "step": 321000 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 8.975817455489981e-06, |
| "loss": 3.4363, |
| "step": 321500 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 8.974173502770062e-06, |
| "loss": 3.4381, |
| "step": 322000 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 8.97252955005014e-06, |
| "loss": 3.4334, |
| "step": 322500 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 8.970885597330221e-06, |
| "loss": 3.4354, |
| "step": 323000 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 8.969241644610302e-06, |
| "loss": 3.4338, |
| "step": 323500 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 8.967597691890381e-06, |
| "loss": 3.4305, |
| "step": 324000 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 8.965953739170462e-06, |
| "loss": 3.4322, |
| "step": 324500 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 8.964309786450543e-06, |
| "loss": 3.4363, |
| "step": 325000 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 8.962665833730622e-06, |
| "loss": 3.4398, |
| "step": 325500 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 8.961021881010703e-06, |
| "loss": 3.4376, |
| "step": 326000 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 8.959377928290783e-06, |
| "loss": 3.4268, |
| "step": 326500 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 8.957733975570864e-06, |
| "loss": 3.4415, |
| "step": 327000 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 8.956090022850945e-06, |
| "loss": 3.4253, |
| "step": 327500 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 8.954446070131024e-06, |
| "loss": 3.4335, |
| "step": 328000 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 8.952802117411103e-06, |
| "loss": 3.4273, |
| "step": 328500 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 8.951158164691184e-06, |
| "loss": 3.4334, |
| "step": 329000 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 8.949514211971263e-06, |
| "loss": 3.4288, |
| "step": 329500 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 8.947870259251344e-06, |
| "loss": 3.4263, |
| "step": 330000 |
| }, |
| { |
| "epoch": 2.7, |
| "eval_accuracy": 0.43119620464233216, |
| "eval_loss": 3.284120559692383, |
| "eval_runtime": 409.6426, |
| "eval_samples_per_second": 752.744, |
| "eval_steps_per_second": 15.684, |
| "step": 330000 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 8.946226306531425e-06, |
| "loss": 3.4205, |
| "step": 330500 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 8.944582353811505e-06, |
| "loss": 3.4233, |
| "step": 331000 |
| }, |
| { |
| "epoch": 2.72, |
| "learning_rate": 8.942938401091586e-06, |
| "loss": 3.4275, |
| "step": 331500 |
| }, |
| { |
| "epoch": 2.72, |
| "learning_rate": 8.941294448371665e-06, |
| "loss": 3.4317, |
| "step": 332000 |
| }, |
| { |
| "epoch": 2.72, |
| "learning_rate": 8.939650495651746e-06, |
| "loss": 3.4239, |
| "step": 332500 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 8.938006542931827e-06, |
| "loss": 3.4237, |
| "step": 333000 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 8.936362590211906e-06, |
| "loss": 3.4192, |
| "step": 333500 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 8.934718637491987e-06, |
| "loss": 3.4193, |
| "step": 334000 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 8.933074684772066e-06, |
| "loss": 3.4232, |
| "step": 334500 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 8.931430732052146e-06, |
| "loss": 3.422, |
| "step": 335000 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 8.929786779332227e-06, |
| "loss": 3.4168, |
| "step": 335500 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 8.928142826612308e-06, |
| "loss": 3.4203, |
| "step": 336000 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 8.926498873892387e-06, |
| "loss": 3.4188, |
| "step": 336500 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 8.924854921172468e-06, |
| "loss": 3.4202, |
| "step": 337000 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 8.923210968452549e-06, |
| "loss": 3.4217, |
| "step": 337500 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 8.921567015732628e-06, |
| "loss": 3.4185, |
| "step": 338000 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 8.919923063012709e-06, |
| "loss": 3.4202, |
| "step": 338500 |
| }, |
| { |
| "epoch": 2.78, |
| "learning_rate": 8.918279110292788e-06, |
| "loss": 3.4172, |
| "step": 339000 |
| }, |
| { |
| "epoch": 2.78, |
| "learning_rate": 8.91663515757287e-06, |
| "loss": 3.4221, |
| "step": 339500 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 8.91499120485295e-06, |
| "loss": 3.4136, |
| "step": 340000 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 8.91334725213303e-06, |
| "loss": 3.4209, |
| "step": 340500 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 8.911703299413109e-06, |
| "loss": 3.4182, |
| "step": 341000 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 8.91005934669319e-06, |
| "loss": 3.4152, |
| "step": 341500 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 8.90841539397327e-06, |
| "loss": 3.4124, |
| "step": 342000 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 8.90677144125335e-06, |
| "loss": 3.408, |
| "step": 342500 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 8.905127488533431e-06, |
| "loss": 3.4112, |
| "step": 343000 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 8.90348353581351e-06, |
| "loss": 3.4144, |
| "step": 343500 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 8.901839583093591e-06, |
| "loss": 3.4173, |
| "step": 344000 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 8.90019563037367e-06, |
| "loss": 3.4107, |
| "step": 344500 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 8.898551677653752e-06, |
| "loss": 3.4094, |
| "step": 345000 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 8.896907724933833e-06, |
| "loss": 3.4165, |
| "step": 345500 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 8.895263772213912e-06, |
| "loss": 3.4163, |
| "step": 346000 |
| }, |
| { |
| "epoch": 2.84, |
| "learning_rate": 8.893619819493991e-06, |
| "loss": 3.4158, |
| "step": 346500 |
| }, |
| { |
| "epoch": 2.84, |
| "learning_rate": 8.891975866774072e-06, |
| "loss": 3.4166, |
| "step": 347000 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 8.890331914054151e-06, |
| "loss": 3.4059, |
| "step": 347500 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 8.888687961334232e-06, |
| "loss": 3.4079, |
| "step": 348000 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 8.887044008614313e-06, |
| "loss": 3.4116, |
| "step": 348500 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 8.885400055894393e-06, |
| "loss": 3.4078, |
| "step": 349000 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 8.883756103174474e-06, |
| "loss": 3.4109, |
| "step": 349500 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 8.882112150454555e-06, |
| "loss": 3.4022, |
| "step": 350000 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 8.880468197734634e-06, |
| "loss": 3.4004, |
| "step": 350500 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 8.878824245014715e-06, |
| "loss": 3.4024, |
| "step": 351000 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 8.877180292294794e-06, |
| "loss": 3.4056, |
| "step": 351500 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 8.875536339574875e-06, |
| "loss": 3.3985, |
| "step": 352000 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 8.873892386854954e-06, |
| "loss": 3.3945, |
| "step": 352500 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 8.872248434135035e-06, |
| "loss": 3.3978, |
| "step": 353000 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 8.870604481415115e-06, |
| "loss": 3.4052, |
| "step": 353500 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 8.868960528695196e-06, |
| "loss": 3.4048, |
| "step": 354000 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 8.867316575975275e-06, |
| "loss": 3.4055, |
| "step": 354500 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 8.865672623255356e-06, |
| "loss": 3.4025, |
| "step": 355000 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 8.864028670535437e-06, |
| "loss": 3.3976, |
| "step": 355500 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 8.862384717815516e-06, |
| "loss": 3.3962, |
| "step": 356000 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 8.860740765095597e-06, |
| "loss": 3.3927, |
| "step": 356500 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 8.859096812375676e-06, |
| "loss": 3.4024, |
| "step": 357000 |
| }, |
| { |
| "epoch": 2.93, |
| "learning_rate": 8.857452859655757e-06, |
| "loss": 3.3929, |
| "step": 357500 |
| }, |
| { |
| "epoch": 2.93, |
| "learning_rate": 8.855808906935838e-06, |
| "loss": 3.3911, |
| "step": 358000 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 8.854164954215918e-06, |
| "loss": 3.3976, |
| "step": 358500 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 8.852521001495997e-06, |
| "loss": 3.3944, |
| "step": 359000 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 8.850877048776078e-06, |
| "loss": 3.3953, |
| "step": 359500 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 8.849233096056157e-06, |
| "loss": 3.3909, |
| "step": 360000 |
| }, |
| { |
| "epoch": 2.95, |
| "eval_accuracy": 0.43477661532857337, |
| "eval_loss": 3.254718542098999, |
| "eval_runtime": 406.1401, |
| "eval_samples_per_second": 759.236, |
| "eval_steps_per_second": 15.82, |
| "step": 360000 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 8.847589143336238e-06, |
| "loss": 3.403, |
| "step": 360500 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 8.845945190616319e-06, |
| "loss": 3.3966, |
| "step": 361000 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 8.844301237896398e-06, |
| "loss": 3.3835, |
| "step": 361500 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 8.84265728517648e-06, |
| "loss": 3.3896, |
| "step": 362000 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 8.84101333245656e-06, |
| "loss": 3.3855, |
| "step": 362500 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 8.83936937973664e-06, |
| "loss": 3.3851, |
| "step": 363000 |
| }, |
| { |
| "epoch": 2.98, |
| "learning_rate": 8.83772542701672e-06, |
| "loss": 3.3927, |
| "step": 363500 |
| }, |
| { |
| "epoch": 2.98, |
| "learning_rate": 8.8360814742968e-06, |
| "loss": 3.389, |
| "step": 364000 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 8.83443752157688e-06, |
| "loss": 3.3861, |
| "step": 364500 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 8.83279356885696e-06, |
| "loss": 3.3955, |
| "step": 365000 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 8.831149616137041e-06, |
| "loss": 3.395, |
| "step": 365500 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 8.82950566341712e-06, |
| "loss": 3.3881, |
| "step": 366000 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 8.827861710697201e-06, |
| "loss": 3.3844, |
| "step": 366500 |
| }, |
| { |
| "epoch": 3.01, |
| "learning_rate": 8.82621775797728e-06, |
| "loss": 3.3886, |
| "step": 367000 |
| }, |
| { |
| "epoch": 3.01, |
| "learning_rate": 8.824573805257362e-06, |
| "loss": 3.3897, |
| "step": 367500 |
| }, |
| { |
| "epoch": 3.01, |
| "learning_rate": 8.822929852537443e-06, |
| "loss": 3.3882, |
| "step": 368000 |
| }, |
| { |
| "epoch": 3.02, |
| "learning_rate": 8.821285899817522e-06, |
| "loss": 3.3919, |
| "step": 368500 |
| }, |
| { |
| "epoch": 3.02, |
| "learning_rate": 8.819641947097603e-06, |
| "loss": 3.3815, |
| "step": 369000 |
| }, |
| { |
| "epoch": 3.03, |
| "learning_rate": 8.817997994377682e-06, |
| "loss": 3.3802, |
| "step": 369500 |
| }, |
| { |
| "epoch": 3.03, |
| "learning_rate": 8.816354041657763e-06, |
| "loss": 3.3817, |
| "step": 370000 |
| }, |
| { |
| "epoch": 3.04, |
| "learning_rate": 8.814710088937844e-06, |
| "loss": 3.3874, |
| "step": 370500 |
| }, |
| { |
| "epoch": 3.04, |
| "learning_rate": 8.813066136217923e-06, |
| "loss": 3.3875, |
| "step": 371000 |
| }, |
| { |
| "epoch": 3.04, |
| "learning_rate": 8.811422183498003e-06, |
| "loss": 3.3885, |
| "step": 371500 |
| }, |
| { |
| "epoch": 3.05, |
| "learning_rate": 8.809778230778084e-06, |
| "loss": 3.3884, |
| "step": 372000 |
| }, |
| { |
| "epoch": 3.05, |
| "learning_rate": 8.808134278058163e-06, |
| "loss": 3.3905, |
| "step": 372500 |
| }, |
| { |
| "epoch": 3.06, |
| "learning_rate": 8.806490325338244e-06, |
| "loss": 3.3829, |
| "step": 373000 |
| }, |
| { |
| "epoch": 3.06, |
| "learning_rate": 8.804846372618325e-06, |
| "loss": 3.3858, |
| "step": 373500 |
| }, |
| { |
| "epoch": 3.06, |
| "learning_rate": 8.803202419898404e-06, |
| "loss": 3.3852, |
| "step": 374000 |
| }, |
| { |
| "epoch": 3.07, |
| "learning_rate": 8.801558467178485e-06, |
| "loss": 3.3814, |
| "step": 374500 |
| }, |
| { |
| "epoch": 3.07, |
| "learning_rate": 8.799914514458564e-06, |
| "loss": 3.3849, |
| "step": 375000 |
| }, |
| { |
| "epoch": 3.08, |
| "learning_rate": 8.798270561738645e-06, |
| "loss": 3.3812, |
| "step": 375500 |
| }, |
| { |
| "epoch": 3.08, |
| "learning_rate": 8.796626609018726e-06, |
| "loss": 3.3815, |
| "step": 376000 |
| }, |
| { |
| "epoch": 3.08, |
| "learning_rate": 8.794982656298806e-06, |
| "loss": 3.3738, |
| "step": 376500 |
| }, |
| { |
| "epoch": 3.09, |
| "learning_rate": 8.793338703578885e-06, |
| "loss": 3.381, |
| "step": 377000 |
| }, |
| { |
| "epoch": 3.09, |
| "learning_rate": 8.791694750858966e-06, |
| "loss": 3.3762, |
| "step": 377500 |
| }, |
| { |
| "epoch": 3.1, |
| "learning_rate": 8.790050798139047e-06, |
| "loss": 3.3709, |
| "step": 378000 |
| }, |
| { |
| "epoch": 3.1, |
| "learning_rate": 8.788406845419126e-06, |
| "loss": 3.3779, |
| "step": 378500 |
| }, |
| { |
| "epoch": 3.11, |
| "learning_rate": 8.786762892699207e-06, |
| "loss": 3.3774, |
| "step": 379000 |
| }, |
| { |
| "epoch": 3.11, |
| "learning_rate": 8.785118939979286e-06, |
| "loss": 3.3692, |
| "step": 379500 |
| }, |
| { |
| "epoch": 3.11, |
| "learning_rate": 8.783474987259367e-06, |
| "loss": 3.3737, |
| "step": 380000 |
| }, |
| { |
| "epoch": 3.12, |
| "learning_rate": 8.781831034539448e-06, |
| "loss": 3.3717, |
| "step": 380500 |
| }, |
| { |
| "epoch": 3.12, |
| "learning_rate": 8.780187081819528e-06, |
| "loss": 3.3763, |
| "step": 381000 |
| }, |
| { |
| "epoch": 3.13, |
| "learning_rate": 8.778543129099608e-06, |
| "loss": 3.3748, |
| "step": 381500 |
| }, |
| { |
| "epoch": 3.13, |
| "learning_rate": 8.776899176379688e-06, |
| "loss": 3.3824, |
| "step": 382000 |
| }, |
| { |
| "epoch": 3.13, |
| "learning_rate": 8.775255223659769e-06, |
| "loss": 3.3739, |
| "step": 382500 |
| }, |
| { |
| "epoch": 3.14, |
| "learning_rate": 8.773611270939848e-06, |
| "loss": 3.3745, |
| "step": 383000 |
| }, |
| { |
| "epoch": 3.14, |
| "learning_rate": 8.771967318219929e-06, |
| "loss": 3.3775, |
| "step": 383500 |
| }, |
| { |
| "epoch": 3.15, |
| "learning_rate": 8.770323365500008e-06, |
| "loss": 3.3675, |
| "step": 384000 |
| }, |
| { |
| "epoch": 3.15, |
| "learning_rate": 8.76867941278009e-06, |
| "loss": 3.3676, |
| "step": 384500 |
| }, |
| { |
| "epoch": 3.15, |
| "learning_rate": 8.767035460060169e-06, |
| "loss": 3.3667, |
| "step": 385000 |
| }, |
| { |
| "epoch": 3.16, |
| "learning_rate": 8.76539150734025e-06, |
| "loss": 3.3638, |
| "step": 385500 |
| }, |
| { |
| "epoch": 3.16, |
| "learning_rate": 8.76374755462033e-06, |
| "loss": 3.3682, |
| "step": 386000 |
| }, |
| { |
| "epoch": 3.17, |
| "learning_rate": 8.76210360190041e-06, |
| "loss": 3.3698, |
| "step": 386500 |
| }, |
| { |
| "epoch": 3.17, |
| "learning_rate": 8.76045964918049e-06, |
| "loss": 3.371, |
| "step": 387000 |
| }, |
| { |
| "epoch": 3.17, |
| "learning_rate": 8.75881569646057e-06, |
| "loss": 3.3797, |
| "step": 387500 |
| }, |
| { |
| "epoch": 3.18, |
| "learning_rate": 8.757171743740651e-06, |
| "loss": 3.3701, |
| "step": 388000 |
| }, |
| { |
| "epoch": 3.18, |
| "learning_rate": 8.755527791020732e-06, |
| "loss": 3.3725, |
| "step": 388500 |
| }, |
| { |
| "epoch": 3.19, |
| "learning_rate": 8.753883838300811e-06, |
| "loss": 3.3784, |
| "step": 389000 |
| }, |
| { |
| "epoch": 3.19, |
| "learning_rate": 8.75223988558089e-06, |
| "loss": 3.3677, |
| "step": 389500 |
| }, |
| { |
| "epoch": 3.2, |
| "learning_rate": 8.750595932860971e-06, |
| "loss": 3.3635, |
| "step": 390000 |
| }, |
| { |
| "epoch": 3.2, |
| "eval_accuracy": 0.43791422082834985, |
| "eval_loss": 3.228388547897339, |
| "eval_runtime": 413.1728, |
| "eval_samples_per_second": 746.312, |
| "eval_steps_per_second": 15.55, |
| "step": 390000 |
| }, |
| { |
| "epoch": 3.2, |
| "learning_rate": 8.748951980141052e-06, |
| "loss": 3.364, |
| "step": 390500 |
| }, |
| { |
| "epoch": 3.2, |
| "learning_rate": 8.747308027421132e-06, |
| "loss": 3.3708, |
| "step": 391000 |
| }, |
| { |
| "epoch": 3.21, |
| "learning_rate": 8.745664074701213e-06, |
| "loss": 3.3714, |
| "step": 391500 |
| }, |
| { |
| "epoch": 3.21, |
| "learning_rate": 8.744020121981292e-06, |
| "loss": 3.3658, |
| "step": 392000 |
| }, |
| { |
| "epoch": 3.22, |
| "learning_rate": 8.742376169261373e-06, |
| "loss": 3.3653, |
| "step": 392500 |
| }, |
| { |
| "epoch": 3.22, |
| "learning_rate": 8.740732216541454e-06, |
| "loss": 3.3617, |
| "step": 393000 |
| }, |
| { |
| "epoch": 3.22, |
| "learning_rate": 8.739088263821533e-06, |
| "loss": 3.367, |
| "step": 393500 |
| }, |
| { |
| "epoch": 3.23, |
| "learning_rate": 8.737444311101614e-06, |
| "loss": 3.3608, |
| "step": 394000 |
| }, |
| { |
| "epoch": 3.23, |
| "learning_rate": 8.735800358381693e-06, |
| "loss": 3.3643, |
| "step": 394500 |
| }, |
| { |
| "epoch": 3.24, |
| "learning_rate": 8.734156405661773e-06, |
| "loss": 3.3607, |
| "step": 395000 |
| }, |
| { |
| "epoch": 3.24, |
| "learning_rate": 8.732512452941854e-06, |
| "loss": 3.3638, |
| "step": 395500 |
| }, |
| { |
| "epoch": 3.24, |
| "learning_rate": 8.730868500221935e-06, |
| "loss": 3.3687, |
| "step": 396000 |
| }, |
| { |
| "epoch": 3.25, |
| "learning_rate": 8.729224547502014e-06, |
| "loss": 3.3616, |
| "step": 396500 |
| }, |
| { |
| "epoch": 3.25, |
| "learning_rate": 8.727580594782095e-06, |
| "loss": 3.3678, |
| "step": 397000 |
| }, |
| { |
| "epoch": 3.26, |
| "learning_rate": 8.725936642062174e-06, |
| "loss": 3.3616, |
| "step": 397500 |
| }, |
| { |
| "epoch": 3.26, |
| "learning_rate": 8.724292689342255e-06, |
| "loss": 3.3573, |
| "step": 398000 |
| }, |
| { |
| "epoch": 3.26, |
| "learning_rate": 8.722648736622336e-06, |
| "loss": 3.3622, |
| "step": 398500 |
| }, |
| { |
| "epoch": 3.27, |
| "learning_rate": 8.721004783902415e-06, |
| "loss": 3.3588, |
| "step": 399000 |
| }, |
| { |
| "epoch": 3.27, |
| "learning_rate": 8.719360831182496e-06, |
| "loss": 3.3604, |
| "step": 399500 |
| }, |
| { |
| "epoch": 3.28, |
| "learning_rate": 8.717716878462576e-06, |
| "loss": 3.361, |
| "step": 400000 |
| }, |
| { |
| "epoch": 3.28, |
| "learning_rate": 8.716072925742657e-06, |
| "loss": 3.3546, |
| "step": 400500 |
| }, |
| { |
| "epoch": 3.29, |
| "learning_rate": 8.714428973022736e-06, |
| "loss": 3.3613, |
| "step": 401000 |
| }, |
| { |
| "epoch": 3.29, |
| "learning_rate": 8.712785020302817e-06, |
| "loss": 3.3619, |
| "step": 401500 |
| }, |
| { |
| "epoch": 3.29, |
| "learning_rate": 8.711141067582896e-06, |
| "loss": 3.3623, |
| "step": 402000 |
| }, |
| { |
| "epoch": 3.3, |
| "learning_rate": 8.709497114862977e-06, |
| "loss": 3.3552, |
| "step": 402500 |
| }, |
| { |
| "epoch": 3.3, |
| "learning_rate": 8.707853162143056e-06, |
| "loss": 3.3554, |
| "step": 403000 |
| }, |
| { |
| "epoch": 3.31, |
| "learning_rate": 8.706209209423137e-06, |
| "loss": 3.3587, |
| "step": 403500 |
| }, |
| { |
| "epoch": 3.31, |
| "learning_rate": 8.704565256703218e-06, |
| "loss": 3.3558, |
| "step": 404000 |
| }, |
| { |
| "epoch": 3.31, |
| "learning_rate": 8.702921303983298e-06, |
| "loss": 3.3582, |
| "step": 404500 |
| }, |
| { |
| "epoch": 3.32, |
| "learning_rate": 8.701277351263379e-06, |
| "loss": 3.3627, |
| "step": 405000 |
| }, |
| { |
| "epoch": 3.32, |
| "learning_rate": 8.69963339854346e-06, |
| "loss": 3.3572, |
| "step": 405500 |
| }, |
| { |
| "epoch": 3.33, |
| "learning_rate": 8.697989445823539e-06, |
| "loss": 3.3658, |
| "step": 406000 |
| }, |
| { |
| "epoch": 3.33, |
| "learning_rate": 8.69634549310362e-06, |
| "loss": 3.358, |
| "step": 406500 |
| }, |
| { |
| "epoch": 3.33, |
| "learning_rate": 8.6947015403837e-06, |
| "loss": 3.349, |
| "step": 407000 |
| }, |
| { |
| "epoch": 3.34, |
| "learning_rate": 8.693057587663778e-06, |
| "loss": 3.3534, |
| "step": 407500 |
| }, |
| { |
| "epoch": 3.34, |
| "learning_rate": 8.69141363494386e-06, |
| "loss": 3.3595, |
| "step": 408000 |
| }, |
| { |
| "epoch": 3.35, |
| "learning_rate": 8.68976968222394e-06, |
| "loss": 3.3551, |
| "step": 408500 |
| }, |
| { |
| "epoch": 3.35, |
| "learning_rate": 8.68812572950402e-06, |
| "loss": 3.3574, |
| "step": 409000 |
| }, |
| { |
| "epoch": 3.35, |
| "learning_rate": 8.6864817767841e-06, |
| "loss": 3.3534, |
| "step": 409500 |
| }, |
| { |
| "epoch": 3.36, |
| "learning_rate": 8.68483782406418e-06, |
| "loss": 3.3541, |
| "step": 410000 |
| }, |
| { |
| "epoch": 3.36, |
| "learning_rate": 8.683193871344261e-06, |
| "loss": 3.3545, |
| "step": 410500 |
| }, |
| { |
| "epoch": 3.37, |
| "learning_rate": 8.681549918624342e-06, |
| "loss": 3.3506, |
| "step": 411000 |
| }, |
| { |
| "epoch": 3.37, |
| "learning_rate": 8.679905965904421e-06, |
| "loss": 3.3454, |
| "step": 411500 |
| }, |
| { |
| "epoch": 3.38, |
| "learning_rate": 8.678262013184502e-06, |
| "loss": 3.3557, |
| "step": 412000 |
| }, |
| { |
| "epoch": 3.38, |
| "learning_rate": 8.676618060464581e-06, |
| "loss": 3.3449, |
| "step": 412500 |
| }, |
| { |
| "epoch": 3.38, |
| "learning_rate": 8.67497410774466e-06, |
| "loss": 3.3565, |
| "step": 413000 |
| }, |
| { |
| "epoch": 3.39, |
| "learning_rate": 8.673330155024742e-06, |
| "loss": 3.3423, |
| "step": 413500 |
| }, |
| { |
| "epoch": 3.39, |
| "learning_rate": 8.671686202304823e-06, |
| "loss": 3.3515, |
| "step": 414000 |
| }, |
| { |
| "epoch": 3.4, |
| "learning_rate": 8.670042249584902e-06, |
| "loss": 3.3462, |
| "step": 414500 |
| }, |
| { |
| "epoch": 3.4, |
| "learning_rate": 8.668398296864983e-06, |
| "loss": 3.3515, |
| "step": 415000 |
| }, |
| { |
| "epoch": 3.4, |
| "learning_rate": 8.666754344145062e-06, |
| "loss": 3.3514, |
| "step": 415500 |
| }, |
| { |
| "epoch": 3.41, |
| "learning_rate": 8.665110391425143e-06, |
| "loss": 3.3498, |
| "step": 416000 |
| }, |
| { |
| "epoch": 3.41, |
| "learning_rate": 8.663466438705224e-06, |
| "loss": 3.3514, |
| "step": 416500 |
| }, |
| { |
| "epoch": 3.42, |
| "learning_rate": 8.661822485985303e-06, |
| "loss": 3.3517, |
| "step": 417000 |
| }, |
| { |
| "epoch": 3.42, |
| "learning_rate": 8.660178533265384e-06, |
| "loss": 3.3476, |
| "step": 417500 |
| }, |
| { |
| "epoch": 3.42, |
| "learning_rate": 8.658534580545465e-06, |
| "loss": 3.3381, |
| "step": 418000 |
| }, |
| { |
| "epoch": 3.43, |
| "learning_rate": 8.656890627825545e-06, |
| "loss": 3.3461, |
| "step": 418500 |
| }, |
| { |
| "epoch": 3.43, |
| "learning_rate": 8.655246675105626e-06, |
| "loss": 3.3492, |
| "step": 419000 |
| }, |
| { |
| "epoch": 3.44, |
| "learning_rate": 8.653602722385705e-06, |
| "loss": 3.3478, |
| "step": 419500 |
| }, |
| { |
| "epoch": 3.44, |
| "learning_rate": 8.651958769665784e-06, |
| "loss": 3.3488, |
| "step": 420000 |
| }, |
| { |
| "epoch": 3.44, |
| "eval_accuracy": 0.4409229499038142, |
| "eval_loss": 3.20596981048584, |
| "eval_runtime": 409.4361, |
| "eval_samples_per_second": 753.124, |
| "eval_steps_per_second": 15.692, |
| "step": 420000 |
| }, |
| { |
| "epoch": 3.45, |
| "learning_rate": 8.650314816945865e-06, |
| "loss": 3.3408, |
| "step": 420500 |
| }, |
| { |
| "epoch": 3.45, |
| "learning_rate": 8.648670864225946e-06, |
| "loss": 3.3418, |
| "step": 421000 |
| }, |
| { |
| "epoch": 3.45, |
| "learning_rate": 8.647026911506025e-06, |
| "loss": 3.3481, |
| "step": 421500 |
| }, |
| { |
| "epoch": 3.46, |
| "learning_rate": 8.645382958786106e-06, |
| "loss": 3.3451, |
| "step": 422000 |
| }, |
| { |
| "epoch": 3.46, |
| "learning_rate": 8.643739006066186e-06, |
| "loss": 3.3391, |
| "step": 422500 |
| }, |
| { |
| "epoch": 3.47, |
| "learning_rate": 8.642095053346267e-06, |
| "loss": 3.3423, |
| "step": 423000 |
| }, |
| { |
| "epoch": 3.47, |
| "learning_rate": 8.640451100626348e-06, |
| "loss": 3.3375, |
| "step": 423500 |
| }, |
| { |
| "epoch": 3.47, |
| "learning_rate": 8.638807147906427e-06, |
| "loss": 3.3379, |
| "step": 424000 |
| }, |
| { |
| "epoch": 3.48, |
| "learning_rate": 8.637163195186508e-06, |
| "loss": 3.3446, |
| "step": 424500 |
| }, |
| { |
| "epoch": 3.48, |
| "learning_rate": 8.635519242466587e-06, |
| "loss": 3.3365, |
| "step": 425000 |
| }, |
| { |
| "epoch": 3.49, |
| "learning_rate": 8.633875289746666e-06, |
| "loss": 3.3405, |
| "step": 425500 |
| }, |
| { |
| "epoch": 3.49, |
| "learning_rate": 8.632231337026747e-06, |
| "loss": 3.3409, |
| "step": 426000 |
| }, |
| { |
| "epoch": 3.49, |
| "learning_rate": 8.630587384306828e-06, |
| "loss": 3.3369, |
| "step": 426500 |
| }, |
| { |
| "epoch": 3.5, |
| "learning_rate": 8.628943431586908e-06, |
| "loss": 3.3391, |
| "step": 427000 |
| }, |
| { |
| "epoch": 3.5, |
| "learning_rate": 8.627299478866989e-06, |
| "loss": 3.3426, |
| "step": 427500 |
| }, |
| { |
| "epoch": 3.51, |
| "learning_rate": 8.625655526147068e-06, |
| "loss": 3.3418, |
| "step": 428000 |
| }, |
| { |
| "epoch": 3.51, |
| "learning_rate": 8.624011573427149e-06, |
| "loss": 3.3402, |
| "step": 428500 |
| }, |
| { |
| "epoch": 3.51, |
| "learning_rate": 8.62236762070723e-06, |
| "loss": 3.3424, |
| "step": 429000 |
| }, |
| { |
| "epoch": 3.52, |
| "learning_rate": 8.620723667987309e-06, |
| "loss": 3.338, |
| "step": 429500 |
| }, |
| { |
| "epoch": 3.52, |
| "learning_rate": 8.61907971526739e-06, |
| "loss": 3.3335, |
| "step": 430000 |
| }, |
| { |
| "epoch": 3.53, |
| "learning_rate": 8.617435762547471e-06, |
| "loss": 3.3276, |
| "step": 430500 |
| }, |
| { |
| "epoch": 3.53, |
| "learning_rate": 8.61579180982755e-06, |
| "loss": 3.3285, |
| "step": 431000 |
| }, |
| { |
| "epoch": 3.54, |
| "learning_rate": 8.61414785710763e-06, |
| "loss": 3.3357, |
| "step": 431500 |
| }, |
| { |
| "epoch": 3.54, |
| "learning_rate": 8.61250390438771e-06, |
| "loss": 3.337, |
| "step": 432000 |
| }, |
| { |
| "epoch": 3.54, |
| "learning_rate": 8.61085995166779e-06, |
| "loss": 3.3378, |
| "step": 432500 |
| }, |
| { |
| "epoch": 3.55, |
| "learning_rate": 8.60921599894787e-06, |
| "loss": 3.3323, |
| "step": 433000 |
| }, |
| { |
| "epoch": 3.55, |
| "learning_rate": 8.607572046227952e-06, |
| "loss": 3.3337, |
| "step": 433500 |
| }, |
| { |
| "epoch": 3.56, |
| "learning_rate": 8.605928093508031e-06, |
| "loss": 3.3325, |
| "step": 434000 |
| }, |
| { |
| "epoch": 3.56, |
| "learning_rate": 8.604284140788112e-06, |
| "loss": 3.3287, |
| "step": 434500 |
| }, |
| { |
| "epoch": 3.56, |
| "learning_rate": 8.602640188068191e-06, |
| "loss": 3.3334, |
| "step": 435000 |
| }, |
| { |
| "epoch": 3.57, |
| "learning_rate": 8.600996235348272e-06, |
| "loss": 3.3308, |
| "step": 435500 |
| }, |
| { |
| "epoch": 3.57, |
| "learning_rate": 8.599352282628353e-06, |
| "loss": 3.3421, |
| "step": 436000 |
| }, |
| { |
| "epoch": 3.58, |
| "learning_rate": 8.597708329908433e-06, |
| "loss": 3.3361, |
| "step": 436500 |
| }, |
| { |
| "epoch": 3.58, |
| "learning_rate": 8.596064377188514e-06, |
| "loss": 3.3349, |
| "step": 437000 |
| }, |
| { |
| "epoch": 3.58, |
| "learning_rate": 8.594420424468593e-06, |
| "loss": 3.3278, |
| "step": 437500 |
| }, |
| { |
| "epoch": 3.59, |
| "learning_rate": 8.592776471748672e-06, |
| "loss": 3.3309, |
| "step": 438000 |
| }, |
| { |
| "epoch": 3.59, |
| "learning_rate": 8.591132519028753e-06, |
| "loss": 3.3285, |
| "step": 438500 |
| }, |
| { |
| "epoch": 3.6, |
| "learning_rate": 8.589488566308834e-06, |
| "loss": 3.322, |
| "step": 439000 |
| }, |
| { |
| "epoch": 3.6, |
| "learning_rate": 8.587844613588913e-06, |
| "loss": 3.3353, |
| "step": 439500 |
| }, |
| { |
| "epoch": 3.6, |
| "learning_rate": 8.586200660868994e-06, |
| "loss": 3.3251, |
| "step": 440000 |
| }, |
| { |
| "epoch": 3.61, |
| "learning_rate": 8.584556708149074e-06, |
| "loss": 3.3303, |
| "step": 440500 |
| }, |
| { |
| "epoch": 3.61, |
| "learning_rate": 8.582912755429155e-06, |
| "loss": 3.3266, |
| "step": 441000 |
| }, |
| { |
| "epoch": 3.62, |
| "learning_rate": 8.581268802709236e-06, |
| "loss": 3.3359, |
| "step": 441500 |
| }, |
| { |
| "epoch": 3.62, |
| "learning_rate": 8.579624849989315e-06, |
| "loss": 3.3264, |
| "step": 442000 |
| }, |
| { |
| "epoch": 3.63, |
| "learning_rate": 8.577980897269396e-06, |
| "loss": 3.3244, |
| "step": 442500 |
| }, |
| { |
| "epoch": 3.63, |
| "learning_rate": 8.576336944549477e-06, |
| "loss": 3.3266, |
| "step": 443000 |
| }, |
| { |
| "epoch": 3.63, |
| "learning_rate": 8.574692991829554e-06, |
| "loss": 3.3246, |
| "step": 443500 |
| }, |
| { |
| "epoch": 3.64, |
| "learning_rate": 8.573049039109635e-06, |
| "loss": 3.3264, |
| "step": 444000 |
| }, |
| { |
| "epoch": 3.64, |
| "learning_rate": 8.571405086389716e-06, |
| "loss": 3.3309, |
| "step": 444500 |
| }, |
| { |
| "epoch": 3.65, |
| "learning_rate": 8.569761133669796e-06, |
| "loss": 3.329, |
| "step": 445000 |
| }, |
| { |
| "epoch": 3.65, |
| "learning_rate": 8.568117180949877e-06, |
| "loss": 3.3208, |
| "step": 445500 |
| }, |
| { |
| "epoch": 3.65, |
| "learning_rate": 8.566473228229958e-06, |
| "loss": 3.3259, |
| "step": 446000 |
| }, |
| { |
| "epoch": 3.66, |
| "learning_rate": 8.564829275510037e-06, |
| "loss": 3.3272, |
| "step": 446500 |
| }, |
| { |
| "epoch": 3.66, |
| "learning_rate": 8.563185322790118e-06, |
| "loss": 3.3228, |
| "step": 447000 |
| }, |
| { |
| "epoch": 3.67, |
| "learning_rate": 8.561541370070197e-06, |
| "loss": 3.3259, |
| "step": 447500 |
| }, |
| { |
| "epoch": 3.67, |
| "learning_rate": 8.559897417350278e-06, |
| "loss": 3.3244, |
| "step": 448000 |
| }, |
| { |
| "epoch": 3.67, |
| "learning_rate": 8.558253464630359e-06, |
| "loss": 3.3245, |
| "step": 448500 |
| }, |
| { |
| "epoch": 3.68, |
| "learning_rate": 8.556609511910438e-06, |
| "loss": 3.3324, |
| "step": 449000 |
| }, |
| { |
| "epoch": 3.68, |
| "learning_rate": 8.554965559190518e-06, |
| "loss": 3.3236, |
| "step": 449500 |
| }, |
| { |
| "epoch": 3.69, |
| "learning_rate": 8.553321606470599e-06, |
| "loss": 3.3239, |
| "step": 450000 |
| }, |
| { |
| "epoch": 3.69, |
| "eval_accuracy": 0.443556698280814, |
| "eval_loss": 3.187194585800171, |
| "eval_runtime": 414.6673, |
| "eval_samples_per_second": 743.623, |
| "eval_steps_per_second": 15.494, |
| "step": 450000 |
| }, |
| { |
| "epoch": 3.69, |
| "learning_rate": 8.551677653750678e-06, |
| "loss": 3.3201, |
| "step": 450500 |
| }, |
| { |
| "epoch": 3.69, |
| "learning_rate": 8.550033701030759e-06, |
| "loss": 3.3214, |
| "step": 451000 |
| }, |
| { |
| "epoch": 3.7, |
| "learning_rate": 8.54838974831084e-06, |
| "loss": 3.3252, |
| "step": 451500 |
| }, |
| { |
| "epoch": 3.7, |
| "learning_rate": 8.546745795590919e-06, |
| "loss": 3.3185, |
| "step": 452000 |
| }, |
| { |
| "epoch": 3.71, |
| "learning_rate": 8.545101842871e-06, |
| "loss": 3.324, |
| "step": 452500 |
| }, |
| { |
| "epoch": 3.71, |
| "learning_rate": 8.54345789015108e-06, |
| "loss": 3.3222, |
| "step": 453000 |
| }, |
| { |
| "epoch": 3.72, |
| "learning_rate": 8.54181393743116e-06, |
| "loss": 3.3129, |
| "step": 453500 |
| }, |
| { |
| "epoch": 3.72, |
| "learning_rate": 8.540169984711241e-06, |
| "loss": 3.3245, |
| "step": 454000 |
| }, |
| { |
| "epoch": 3.72, |
| "learning_rate": 8.53852603199132e-06, |
| "loss": 3.3226, |
| "step": 454500 |
| }, |
| { |
| "epoch": 3.73, |
| "learning_rate": 8.536882079271401e-06, |
| "loss": 3.3162, |
| "step": 455000 |
| }, |
| { |
| "epoch": 3.73, |
| "learning_rate": 8.53523812655148e-06, |
| "loss": 3.3124, |
| "step": 455500 |
| }, |
| { |
| "epoch": 3.74, |
| "learning_rate": 8.53359417383156e-06, |
| "loss": 3.3155, |
| "step": 456000 |
| }, |
| { |
| "epoch": 3.74, |
| "learning_rate": 8.531950221111641e-06, |
| "loss": 3.3179, |
| "step": 456500 |
| }, |
| { |
| "epoch": 3.74, |
| "learning_rate": 8.530306268391722e-06, |
| "loss": 3.3139, |
| "step": 457000 |
| }, |
| { |
| "epoch": 3.75, |
| "learning_rate": 8.528662315671801e-06, |
| "loss": 3.3116, |
| "step": 457500 |
| }, |
| { |
| "epoch": 3.75, |
| "learning_rate": 8.527018362951882e-06, |
| "loss": 3.3204, |
| "step": 458000 |
| }, |
| { |
| "epoch": 3.76, |
| "learning_rate": 8.525374410231963e-06, |
| "loss": 3.3166, |
| "step": 458500 |
| }, |
| { |
| "epoch": 3.76, |
| "learning_rate": 8.523730457512042e-06, |
| "loss": 3.3175, |
| "step": 459000 |
| }, |
| { |
| "epoch": 3.76, |
| "learning_rate": 8.522086504792123e-06, |
| "loss": 3.3107, |
| "step": 459500 |
| }, |
| { |
| "epoch": 3.77, |
| "learning_rate": 8.520442552072203e-06, |
| "loss": 3.3189, |
| "step": 460000 |
| }, |
| { |
| "epoch": 3.77, |
| "learning_rate": 8.518798599352284e-06, |
| "loss": 3.3178, |
| "step": 460500 |
| }, |
| { |
| "epoch": 3.78, |
| "learning_rate": 8.517154646632365e-06, |
| "loss": 3.3001, |
| "step": 461000 |
| }, |
| { |
| "epoch": 3.78, |
| "learning_rate": 8.515510693912444e-06, |
| "loss": 3.3178, |
| "step": 461500 |
| }, |
| { |
| "epoch": 3.79, |
| "learning_rate": 8.513866741192523e-06, |
| "loss": 3.3168, |
| "step": 462000 |
| }, |
| { |
| "epoch": 3.79, |
| "learning_rate": 8.512222788472604e-06, |
| "loss": 3.3178, |
| "step": 462500 |
| }, |
| { |
| "epoch": 3.79, |
| "learning_rate": 8.510578835752684e-06, |
| "loss": 3.3187, |
| "step": 463000 |
| }, |
| { |
| "epoch": 3.8, |
| "learning_rate": 8.508934883032764e-06, |
| "loss": 3.3136, |
| "step": 463500 |
| }, |
| { |
| "epoch": 3.8, |
| "learning_rate": 8.507290930312845e-06, |
| "loss": 3.3138, |
| "step": 464000 |
| }, |
| { |
| "epoch": 3.81, |
| "learning_rate": 8.505646977592925e-06, |
| "loss": 3.3117, |
| "step": 464500 |
| }, |
| { |
| "epoch": 3.81, |
| "learning_rate": 8.504003024873006e-06, |
| "loss": 3.3167, |
| "step": 465000 |
| }, |
| { |
| "epoch": 3.81, |
| "learning_rate": 8.502359072153085e-06, |
| "loss": 3.3172, |
| "step": 465500 |
| }, |
| { |
| "epoch": 3.82, |
| "learning_rate": 8.500715119433166e-06, |
| "loss": 3.3083, |
| "step": 466000 |
| }, |
| { |
| "epoch": 3.82, |
| "learning_rate": 8.499071166713247e-06, |
| "loss": 3.31, |
| "step": 466500 |
| }, |
| { |
| "epoch": 3.83, |
| "learning_rate": 8.497427213993326e-06, |
| "loss": 3.3198, |
| "step": 467000 |
| }, |
| { |
| "epoch": 3.83, |
| "learning_rate": 8.495783261273407e-06, |
| "loss": 3.3153, |
| "step": 467500 |
| }, |
| { |
| "epoch": 3.83, |
| "learning_rate": 8.494139308553486e-06, |
| "loss": 3.3139, |
| "step": 468000 |
| }, |
| { |
| "epoch": 3.84, |
| "learning_rate": 8.492495355833566e-06, |
| "loss": 3.3044, |
| "step": 468500 |
| }, |
| { |
| "epoch": 3.84, |
| "learning_rate": 8.490851403113647e-06, |
| "loss": 3.306, |
| "step": 469000 |
| }, |
| { |
| "epoch": 3.85, |
| "learning_rate": 8.489207450393728e-06, |
| "loss": 3.3104, |
| "step": 469500 |
| }, |
| { |
| "epoch": 3.85, |
| "learning_rate": 8.487563497673807e-06, |
| "loss": 3.3154, |
| "step": 470000 |
| }, |
| { |
| "epoch": 3.85, |
| "learning_rate": 8.485919544953888e-06, |
| "loss": 3.3111, |
| "step": 470500 |
| }, |
| { |
| "epoch": 3.86, |
| "learning_rate": 8.484275592233969e-06, |
| "loss": 3.3065, |
| "step": 471000 |
| }, |
| { |
| "epoch": 3.86, |
| "learning_rate": 8.482631639514048e-06, |
| "loss": 3.3094, |
| "step": 471500 |
| }, |
| { |
| "epoch": 3.87, |
| "learning_rate": 8.48098768679413e-06, |
| "loss": 3.307, |
| "step": 472000 |
| }, |
| { |
| "epoch": 3.87, |
| "learning_rate": 8.479343734074208e-06, |
| "loss": 3.3099, |
| "step": 472500 |
| }, |
| { |
| "epoch": 3.88, |
| "learning_rate": 8.47769978135429e-06, |
| "loss": 3.3043, |
| "step": 473000 |
| }, |
| { |
| "epoch": 3.88, |
| "learning_rate": 8.47605582863437e-06, |
| "loss": 3.3068, |
| "step": 473500 |
| }, |
| { |
| "epoch": 3.88, |
| "learning_rate": 8.47441187591445e-06, |
| "loss": 3.3114, |
| "step": 474000 |
| }, |
| { |
| "epoch": 3.89, |
| "learning_rate": 8.472767923194529e-06, |
| "loss": 3.3066, |
| "step": 474500 |
| }, |
| { |
| "epoch": 3.89, |
| "learning_rate": 8.47112397047461e-06, |
| "loss": 3.3085, |
| "step": 475000 |
| }, |
| { |
| "epoch": 3.9, |
| "learning_rate": 8.46948001775469e-06, |
| "loss": 3.3108, |
| "step": 475500 |
| }, |
| { |
| "epoch": 3.9, |
| "learning_rate": 8.46783606503477e-06, |
| "loss": 3.3058, |
| "step": 476000 |
| }, |
| { |
| "epoch": 3.9, |
| "learning_rate": 8.466192112314851e-06, |
| "loss": 3.3152, |
| "step": 476500 |
| }, |
| { |
| "epoch": 3.91, |
| "learning_rate": 8.46454815959493e-06, |
| "loss": 3.2999, |
| "step": 477000 |
| }, |
| { |
| "epoch": 3.91, |
| "learning_rate": 8.462904206875011e-06, |
| "loss": 3.3049, |
| "step": 477500 |
| }, |
| { |
| "epoch": 3.92, |
| "learning_rate": 8.46126025415509e-06, |
| "loss": 3.3029, |
| "step": 478000 |
| }, |
| { |
| "epoch": 3.92, |
| "learning_rate": 8.459616301435172e-06, |
| "loss": 3.3006, |
| "step": 478500 |
| }, |
| { |
| "epoch": 3.92, |
| "learning_rate": 8.457972348715253e-06, |
| "loss": 3.3031, |
| "step": 479000 |
| }, |
| { |
| "epoch": 3.93, |
| "learning_rate": 8.456328395995332e-06, |
| "loss": 3.3033, |
| "step": 479500 |
| }, |
| { |
| "epoch": 3.93, |
| "learning_rate": 8.454684443275411e-06, |
| "loss": 3.3062, |
| "step": 480000 |
| }, |
| { |
| "epoch": 3.93, |
| "eval_accuracy": 0.4462195104271718, |
| "eval_loss": 3.166045904159546, |
| "eval_runtime": 409.7348, |
| "eval_samples_per_second": 752.575, |
| "eval_steps_per_second": 15.681, |
| "step": 480000 |
| }, |
| { |
| "epoch": 3.94, |
| "learning_rate": 8.453040490555492e-06, |
| "loss": 3.3111, |
| "step": 480500 |
| }, |
| { |
| "epoch": 3.94, |
| "learning_rate": 8.451396537835571e-06, |
| "loss": 3.2931, |
| "step": 481000 |
| }, |
| { |
| "epoch": 3.94, |
| "learning_rate": 8.449752585115652e-06, |
| "loss": 3.2991, |
| "step": 481500 |
| }, |
| { |
| "epoch": 3.95, |
| "learning_rate": 8.448108632395733e-06, |
| "loss": 3.3012, |
| "step": 482000 |
| }, |
| { |
| "epoch": 3.95, |
| "learning_rate": 8.446464679675813e-06, |
| "loss": 3.3012, |
| "step": 482500 |
| }, |
| { |
| "epoch": 3.96, |
| "learning_rate": 8.444820726955894e-06, |
| "loss": 3.3084, |
| "step": 483000 |
| }, |
| { |
| "epoch": 3.96, |
| "learning_rate": 8.443176774235975e-06, |
| "loss": 3.3038, |
| "step": 483500 |
| }, |
| { |
| "epoch": 3.97, |
| "learning_rate": 8.441532821516054e-06, |
| "loss": 3.2989, |
| "step": 484000 |
| }, |
| { |
| "epoch": 3.97, |
| "learning_rate": 8.439888868796135e-06, |
| "loss": 3.3014, |
| "step": 484500 |
| }, |
| { |
| "epoch": 3.97, |
| "learning_rate": 8.438244916076214e-06, |
| "loss": 3.3043, |
| "step": 485000 |
| }, |
| { |
| "epoch": 3.98, |
| "learning_rate": 8.436600963356295e-06, |
| "loss": 3.305, |
| "step": 485500 |
| }, |
| { |
| "epoch": 3.98, |
| "learning_rate": 8.434957010636374e-06, |
| "loss": 3.3027, |
| "step": 486000 |
| }, |
| { |
| "epoch": 3.99, |
| "learning_rate": 8.433313057916455e-06, |
| "loss": 3.3018, |
| "step": 486500 |
| }, |
| { |
| "epoch": 3.99, |
| "learning_rate": 8.431669105196535e-06, |
| "loss": 3.3057, |
| "step": 487000 |
| }, |
| { |
| "epoch": 3.99, |
| "learning_rate": 8.430025152476616e-06, |
| "loss": 3.2995, |
| "step": 487500 |
| }, |
| { |
| "epoch": 4.0, |
| "learning_rate": 8.428381199756695e-06, |
| "loss": 3.3044, |
| "step": 488000 |
| }, |
| { |
| "epoch": 4.0, |
| "learning_rate": 8.426737247036776e-06, |
| "loss": 3.2979, |
| "step": 488500 |
| }, |
| { |
| "epoch": 4.01, |
| "learning_rate": 8.425093294316857e-06, |
| "loss": 3.2995, |
| "step": 489000 |
| }, |
| { |
| "epoch": 4.01, |
| "learning_rate": 8.423449341596936e-06, |
| "loss": 3.2958, |
| "step": 489500 |
| }, |
| { |
| "epoch": 4.01, |
| "learning_rate": 8.421805388877017e-06, |
| "loss": 3.3006, |
| "step": 490000 |
| }, |
| { |
| "epoch": 4.02, |
| "learning_rate": 8.420161436157096e-06, |
| "loss": 3.3015, |
| "step": 490500 |
| }, |
| { |
| "epoch": 4.02, |
| "learning_rate": 8.418517483437177e-06, |
| "loss": 3.2939, |
| "step": 491000 |
| }, |
| { |
| "epoch": 4.03, |
| "learning_rate": 8.416873530717258e-06, |
| "loss": 3.2984, |
| "step": 491500 |
| }, |
| { |
| "epoch": 4.03, |
| "learning_rate": 8.415229577997338e-06, |
| "loss": 3.294, |
| "step": 492000 |
| }, |
| { |
| "epoch": 4.03, |
| "learning_rate": 8.413585625277417e-06, |
| "loss": 3.2979, |
| "step": 492500 |
| }, |
| { |
| "epoch": 4.04, |
| "learning_rate": 8.411941672557498e-06, |
| "loss": 3.2938, |
| "step": 493000 |
| }, |
| { |
| "epoch": 4.04, |
| "learning_rate": 8.410297719837577e-06, |
| "loss": 3.2961, |
| "step": 493500 |
| }, |
| { |
| "epoch": 4.05, |
| "learning_rate": 8.408653767117658e-06, |
| "loss": 3.286, |
| "step": 494000 |
| }, |
| { |
| "epoch": 4.05, |
| "learning_rate": 8.407009814397739e-06, |
| "loss": 3.2893, |
| "step": 494500 |
| }, |
| { |
| "epoch": 4.06, |
| "learning_rate": 8.405365861677818e-06, |
| "loss": 3.289, |
| "step": 495000 |
| }, |
| { |
| "epoch": 4.06, |
| "learning_rate": 8.4037219089579e-06, |
| "loss": 3.2949, |
| "step": 495500 |
| }, |
| { |
| "epoch": 4.06, |
| "learning_rate": 8.40207795623798e-06, |
| "loss": 3.2887, |
| "step": 496000 |
| }, |
| { |
| "epoch": 4.07, |
| "learning_rate": 8.40043400351806e-06, |
| "loss": 3.2963, |
| "step": 496500 |
| }, |
| { |
| "epoch": 4.07, |
| "learning_rate": 8.39879005079814e-06, |
| "loss": 3.2874, |
| "step": 497000 |
| }, |
| { |
| "epoch": 4.08, |
| "learning_rate": 8.39714609807822e-06, |
| "loss": 3.2941, |
| "step": 497500 |
| }, |
| { |
| "epoch": 4.08, |
| "learning_rate": 8.395502145358299e-06, |
| "loss": 3.2907, |
| "step": 498000 |
| }, |
| { |
| "epoch": 4.08, |
| "learning_rate": 8.39385819263838e-06, |
| "loss": 3.2968, |
| "step": 498500 |
| }, |
| { |
| "epoch": 4.09, |
| "learning_rate": 8.392214239918461e-06, |
| "loss": 3.2853, |
| "step": 499000 |
| }, |
| { |
| "epoch": 4.09, |
| "learning_rate": 8.39057028719854e-06, |
| "loss": 3.2883, |
| "step": 499500 |
| }, |
| { |
| "epoch": 4.1, |
| "learning_rate": 8.388926334478621e-06, |
| "loss": 3.2939, |
| "step": 500000 |
| }, |
| { |
| "epoch": 4.1, |
| "learning_rate": 8.3872823817587e-06, |
| "loss": 3.2897, |
| "step": 500500 |
| }, |
| { |
| "epoch": 4.1, |
| "learning_rate": 8.385638429038782e-06, |
| "loss": 3.299, |
| "step": 501000 |
| }, |
| { |
| "epoch": 4.11, |
| "learning_rate": 8.383994476318863e-06, |
| "loss": 3.2911, |
| "step": 501500 |
| }, |
| { |
| "epoch": 4.11, |
| "learning_rate": 8.382350523598942e-06, |
| "loss": 3.2926, |
| "step": 502000 |
| }, |
| { |
| "epoch": 4.12, |
| "learning_rate": 8.380706570879023e-06, |
| "loss": 3.2913, |
| "step": 502500 |
| }, |
| { |
| "epoch": 4.12, |
| "learning_rate": 8.379062618159102e-06, |
| "loss": 3.3006, |
| "step": 503000 |
| }, |
| { |
| "epoch": 4.13, |
| "learning_rate": 8.377418665439183e-06, |
| "loss": 3.2884, |
| "step": 503500 |
| }, |
| { |
| "epoch": 4.13, |
| "learning_rate": 8.375774712719262e-06, |
| "loss": 3.2926, |
| "step": 504000 |
| }, |
| { |
| "epoch": 4.13, |
| "learning_rate": 8.374130759999343e-06, |
| "loss": 3.2898, |
| "step": 504500 |
| }, |
| { |
| "epoch": 4.14, |
| "learning_rate": 8.372486807279423e-06, |
| "loss": 3.2941, |
| "step": 505000 |
| }, |
| { |
| "epoch": 4.14, |
| "learning_rate": 8.370842854559504e-06, |
| "loss": 3.2886, |
| "step": 505500 |
| }, |
| { |
| "epoch": 4.15, |
| "learning_rate": 8.369198901839583e-06, |
| "loss": 3.2888, |
| "step": 506000 |
| }, |
| { |
| "epoch": 4.15, |
| "learning_rate": 8.367554949119664e-06, |
| "loss": 3.2876, |
| "step": 506500 |
| }, |
| { |
| "epoch": 4.15, |
| "learning_rate": 8.365910996399745e-06, |
| "loss": 3.2868, |
| "step": 507000 |
| }, |
| { |
| "epoch": 4.16, |
| "learning_rate": 8.364267043679824e-06, |
| "loss": 3.2854, |
| "step": 507500 |
| }, |
| { |
| "epoch": 4.16, |
| "learning_rate": 8.362623090959905e-06, |
| "loss": 3.2951, |
| "step": 508000 |
| }, |
| { |
| "epoch": 4.17, |
| "learning_rate": 8.360979138239986e-06, |
| "loss": 3.2928, |
| "step": 508500 |
| }, |
| { |
| "epoch": 4.17, |
| "learning_rate": 8.359335185520065e-06, |
| "loss": 3.286, |
| "step": 509000 |
| }, |
| { |
| "epoch": 4.17, |
| "learning_rate": 8.357691232800146e-06, |
| "loss": 3.2894, |
| "step": 509500 |
| }, |
| { |
| "epoch": 4.18, |
| "learning_rate": 8.356047280080226e-06, |
| "loss": 3.2841, |
| "step": 510000 |
| }, |
| { |
| "epoch": 4.18, |
| "eval_accuracy": 0.4485220748826658, |
| "eval_loss": 3.1493306159973145, |
| "eval_runtime": 411.9758, |
| "eval_samples_per_second": 748.481, |
| "eval_steps_per_second": 15.596, |
| "step": 510000 |
| }, |
| { |
| "epoch": 4.18, |
| "learning_rate": 8.354403327360305e-06, |
| "loss": 3.2809, |
| "step": 510500 |
| }, |
| { |
| "epoch": 4.19, |
| "learning_rate": 8.352759374640386e-06, |
| "loss": 3.2861, |
| "step": 511000 |
| }, |
| { |
| "epoch": 4.19, |
| "learning_rate": 8.351115421920467e-06, |
| "loss": 3.2792, |
| "step": 511500 |
| }, |
| { |
| "epoch": 4.19, |
| "learning_rate": 8.349471469200546e-06, |
| "loss": 3.2864, |
| "step": 512000 |
| }, |
| { |
| "epoch": 4.2, |
| "learning_rate": 8.347827516480627e-06, |
| "loss": 3.2761, |
| "step": 512500 |
| }, |
| { |
| "epoch": 4.2, |
| "learning_rate": 8.346183563760706e-06, |
| "loss": 3.2821, |
| "step": 513000 |
| }, |
| { |
| "epoch": 4.21, |
| "learning_rate": 8.344539611040787e-06, |
| "loss": 3.2797, |
| "step": 513500 |
| }, |
| { |
| "epoch": 4.21, |
| "learning_rate": 8.342895658320868e-06, |
| "loss": 3.2854, |
| "step": 514000 |
| }, |
| { |
| "epoch": 4.22, |
| "learning_rate": 8.341251705600948e-06, |
| "loss": 3.2826, |
| "step": 514500 |
| }, |
| { |
| "epoch": 4.22, |
| "learning_rate": 8.339607752881029e-06, |
| "loss": 3.2923, |
| "step": 515000 |
| }, |
| { |
| "epoch": 4.22, |
| "learning_rate": 8.337963800161108e-06, |
| "loss": 3.2826, |
| "step": 515500 |
| }, |
| { |
| "epoch": 4.23, |
| "learning_rate": 8.336319847441187e-06, |
| "loss": 3.2886, |
| "step": 516000 |
| }, |
| { |
| "epoch": 4.23, |
| "learning_rate": 8.334675894721268e-06, |
| "loss": 3.2822, |
| "step": 516500 |
| }, |
| { |
| "epoch": 4.24, |
| "learning_rate": 8.333031942001349e-06, |
| "loss": 3.2828, |
| "step": 517000 |
| }, |
| { |
| "epoch": 4.24, |
| "learning_rate": 8.331387989281428e-06, |
| "loss": 3.2848, |
| "step": 517500 |
| }, |
| { |
| "epoch": 4.24, |
| "learning_rate": 8.32974403656151e-06, |
| "loss": 3.2878, |
| "step": 518000 |
| }, |
| { |
| "epoch": 4.25, |
| "learning_rate": 8.328100083841589e-06, |
| "loss": 3.2879, |
| "step": 518500 |
| }, |
| { |
| "epoch": 4.25, |
| "learning_rate": 8.32645613112167e-06, |
| "loss": 3.2825, |
| "step": 519000 |
| }, |
| { |
| "epoch": 4.26, |
| "learning_rate": 8.32481217840175e-06, |
| "loss": 3.2823, |
| "step": 519500 |
| }, |
| { |
| "epoch": 4.26, |
| "learning_rate": 8.32316822568183e-06, |
| "loss": 3.2841, |
| "step": 520000 |
| }, |
| { |
| "epoch": 4.26, |
| "learning_rate": 8.32152427296191e-06, |
| "loss": 3.2854, |
| "step": 520500 |
| }, |
| { |
| "epoch": 4.27, |
| "learning_rate": 8.31988032024199e-06, |
| "loss": 3.2817, |
| "step": 521000 |
| }, |
| { |
| "epoch": 4.27, |
| "learning_rate": 8.318236367522071e-06, |
| "loss": 3.277, |
| "step": 521500 |
| }, |
| { |
| "epoch": 4.28, |
| "learning_rate": 8.316592414802152e-06, |
| "loss": 3.2717, |
| "step": 522000 |
| }, |
| { |
| "epoch": 4.28, |
| "learning_rate": 8.314948462082231e-06, |
| "loss": 3.2838, |
| "step": 522500 |
| }, |
| { |
| "epoch": 4.28, |
| "learning_rate": 8.31330450936231e-06, |
| "loss": 3.2764, |
| "step": 523000 |
| }, |
| { |
| "epoch": 4.29, |
| "learning_rate": 8.311660556642392e-06, |
| "loss": 3.2824, |
| "step": 523500 |
| }, |
| { |
| "epoch": 4.29, |
| "learning_rate": 8.310016603922472e-06, |
| "loss": 3.2748, |
| "step": 524000 |
| }, |
| { |
| "epoch": 4.3, |
| "learning_rate": 8.308372651202552e-06, |
| "loss": 3.275, |
| "step": 524500 |
| }, |
| { |
| "epoch": 4.3, |
| "learning_rate": 8.306728698482633e-06, |
| "loss": 3.2802, |
| "step": 525000 |
| }, |
| { |
| "epoch": 4.31, |
| "learning_rate": 8.305084745762712e-06, |
| "loss": 3.2764, |
| "step": 525500 |
| }, |
| { |
| "epoch": 4.31, |
| "learning_rate": 8.303440793042793e-06, |
| "loss": 3.2746, |
| "step": 526000 |
| }, |
| { |
| "epoch": 4.31, |
| "learning_rate": 8.301796840322874e-06, |
| "loss": 3.2793, |
| "step": 526500 |
| }, |
| { |
| "epoch": 4.32, |
| "learning_rate": 8.300152887602953e-06, |
| "loss": 3.2819, |
| "step": 527000 |
| }, |
| { |
| "epoch": 4.32, |
| "learning_rate": 8.298508934883034e-06, |
| "loss": 3.2755, |
| "step": 527500 |
| }, |
| { |
| "epoch": 4.33, |
| "learning_rate": 8.296864982163114e-06, |
| "loss": 3.2831, |
| "step": 528000 |
| }, |
| { |
| "epoch": 4.33, |
| "learning_rate": 8.295221029443193e-06, |
| "loss": 3.2733, |
| "step": 528500 |
| }, |
| { |
| "epoch": 4.33, |
| "learning_rate": 8.293577076723274e-06, |
| "loss": 3.277, |
| "step": 529000 |
| }, |
| { |
| "epoch": 4.34, |
| "learning_rate": 8.291933124003355e-06, |
| "loss": 3.2783, |
| "step": 529500 |
| }, |
| { |
| "epoch": 4.34, |
| "learning_rate": 8.290289171283434e-06, |
| "loss": 3.2877, |
| "step": 530000 |
| }, |
| { |
| "epoch": 4.35, |
| "learning_rate": 8.288645218563515e-06, |
| "loss": 3.2762, |
| "step": 530500 |
| }, |
| { |
| "epoch": 4.35, |
| "learning_rate": 8.287001265843594e-06, |
| "loss": 3.2744, |
| "step": 531000 |
| }, |
| { |
| "epoch": 4.35, |
| "learning_rate": 8.285357313123675e-06, |
| "loss": 3.275, |
| "step": 531500 |
| }, |
| { |
| "epoch": 4.36, |
| "learning_rate": 8.283713360403756e-06, |
| "loss": 3.2724, |
| "step": 532000 |
| }, |
| { |
| "epoch": 4.36, |
| "learning_rate": 8.282069407683835e-06, |
| "loss": 3.2723, |
| "step": 532500 |
| }, |
| { |
| "epoch": 4.37, |
| "learning_rate": 8.280425454963916e-06, |
| "loss": 3.2662, |
| "step": 533000 |
| }, |
| { |
| "epoch": 4.37, |
| "learning_rate": 8.278781502243996e-06, |
| "loss": 3.2702, |
| "step": 533500 |
| }, |
| { |
| "epoch": 4.37, |
| "learning_rate": 8.277137549524077e-06, |
| "loss": 3.2736, |
| "step": 534000 |
| }, |
| { |
| "epoch": 4.38, |
| "learning_rate": 8.275493596804156e-06, |
| "loss": 3.2694, |
| "step": 534500 |
| }, |
| { |
| "epoch": 4.38, |
| "learning_rate": 8.273849644084237e-06, |
| "loss": 3.2722, |
| "step": 535000 |
| }, |
| { |
| "epoch": 4.39, |
| "learning_rate": 8.272205691364316e-06, |
| "loss": 3.2708, |
| "step": 535500 |
| }, |
| { |
| "epoch": 4.39, |
| "learning_rate": 8.270561738644397e-06, |
| "loss": 3.274, |
| "step": 536000 |
| }, |
| { |
| "epoch": 4.4, |
| "learning_rate": 8.268917785924478e-06, |
| "loss": 3.2798, |
| "step": 536500 |
| }, |
| { |
| "epoch": 4.4, |
| "learning_rate": 8.267273833204557e-06, |
| "loss": 3.2723, |
| "step": 537000 |
| }, |
| { |
| "epoch": 4.4, |
| "learning_rate": 8.265629880484638e-06, |
| "loss": 3.2728, |
| "step": 537500 |
| }, |
| { |
| "epoch": 4.41, |
| "learning_rate": 8.263985927764718e-06, |
| "loss": 3.2721, |
| "step": 538000 |
| }, |
| { |
| "epoch": 4.41, |
| "learning_rate": 8.262341975044799e-06, |
| "loss": 3.2774, |
| "step": 538500 |
| }, |
| { |
| "epoch": 4.42, |
| "learning_rate": 8.26069802232488e-06, |
| "loss": 3.2741, |
| "step": 539000 |
| }, |
| { |
| "epoch": 4.42, |
| "learning_rate": 8.259054069604959e-06, |
| "loss": 3.2655, |
| "step": 539500 |
| }, |
| { |
| "epoch": 4.42, |
| "learning_rate": 8.25741011688504e-06, |
| "loss": 3.2663, |
| "step": 540000 |
| }, |
| { |
| "epoch": 4.42, |
| "eval_accuracy": 0.4502872659892971, |
| "eval_loss": 3.1354901790618896, |
| "eval_runtime": 409.1172, |
| "eval_samples_per_second": 753.711, |
| "eval_steps_per_second": 15.705, |
| "step": 540000 |
| }, |
| { |
| "epoch": 4.43, |
| "learning_rate": 8.25576616416512e-06, |
| "loss": 3.2669, |
| "step": 540500 |
| }, |
| { |
| "epoch": 4.43, |
| "learning_rate": 8.254122211445198e-06, |
| "loss": 3.2673, |
| "step": 541000 |
| }, |
| { |
| "epoch": 4.44, |
| "learning_rate": 8.25247825872528e-06, |
| "loss": 3.2689, |
| "step": 541500 |
| }, |
| { |
| "epoch": 4.44, |
| "learning_rate": 8.25083430600536e-06, |
| "loss": 3.2659, |
| "step": 542000 |
| }, |
| { |
| "epoch": 4.44, |
| "learning_rate": 8.24919035328544e-06, |
| "loss": 3.2657, |
| "step": 542500 |
| }, |
| { |
| "epoch": 4.45, |
| "learning_rate": 8.24754640056552e-06, |
| "loss": 3.2651, |
| "step": 543000 |
| }, |
| { |
| "epoch": 4.45, |
| "learning_rate": 8.2459024478456e-06, |
| "loss": 3.2673, |
| "step": 543500 |
| }, |
| { |
| "epoch": 4.46, |
| "learning_rate": 8.244258495125681e-06, |
| "loss": 3.2719, |
| "step": 544000 |
| }, |
| { |
| "epoch": 4.46, |
| "learning_rate": 8.242614542405762e-06, |
| "loss": 3.2714, |
| "step": 544500 |
| }, |
| { |
| "epoch": 4.47, |
| "learning_rate": 8.240970589685841e-06, |
| "loss": 3.2702, |
| "step": 545000 |
| }, |
| { |
| "epoch": 4.47, |
| "learning_rate": 8.239326636965922e-06, |
| "loss": 3.2649, |
| "step": 545500 |
| }, |
| { |
| "epoch": 4.47, |
| "learning_rate": 8.237682684246001e-06, |
| "loss": 3.2791, |
| "step": 546000 |
| }, |
| { |
| "epoch": 4.48, |
| "learning_rate": 8.23603873152608e-06, |
| "loss": 3.2627, |
| "step": 546500 |
| }, |
| { |
| "epoch": 4.48, |
| "learning_rate": 8.234394778806162e-06, |
| "loss": 3.2707, |
| "step": 547000 |
| }, |
| { |
| "epoch": 4.49, |
| "learning_rate": 8.232750826086243e-06, |
| "loss": 3.2704, |
| "step": 547500 |
| }, |
| { |
| "epoch": 4.49, |
| "learning_rate": 8.231106873366322e-06, |
| "loss": 3.2624, |
| "step": 548000 |
| }, |
| { |
| "epoch": 4.49, |
| "learning_rate": 8.229462920646403e-06, |
| "loss": 3.2661, |
| "step": 548500 |
| }, |
| { |
| "epoch": 4.5, |
| "learning_rate": 8.227818967926482e-06, |
| "loss": 3.2724, |
| "step": 549000 |
| }, |
| { |
| "epoch": 4.5, |
| "learning_rate": 8.226175015206563e-06, |
| "loss": 3.2653, |
| "step": 549500 |
| }, |
| { |
| "epoch": 4.51, |
| "learning_rate": 8.224531062486644e-06, |
| "loss": 3.2676, |
| "step": 550000 |
| }, |
| { |
| "epoch": 4.51, |
| "learning_rate": 8.222887109766723e-06, |
| "loss": 3.2621, |
| "step": 550500 |
| }, |
| { |
| "epoch": 4.51, |
| "learning_rate": 8.221243157046804e-06, |
| "loss": 3.2705, |
| "step": 551000 |
| }, |
| { |
| "epoch": 4.52, |
| "learning_rate": 8.219599204326885e-06, |
| "loss": 3.2677, |
| "step": 551500 |
| }, |
| { |
| "epoch": 4.52, |
| "learning_rate": 8.217955251606965e-06, |
| "loss": 3.2643, |
| "step": 552000 |
| }, |
| { |
| "epoch": 4.53, |
| "learning_rate": 8.216311298887044e-06, |
| "loss": 3.2621, |
| "step": 552500 |
| }, |
| { |
| "epoch": 4.53, |
| "learning_rate": 8.214667346167125e-06, |
| "loss": 3.262, |
| "step": 553000 |
| }, |
| { |
| "epoch": 4.53, |
| "learning_rate": 8.213023393447204e-06, |
| "loss": 3.2641, |
| "step": 553500 |
| }, |
| { |
| "epoch": 4.54, |
| "learning_rate": 8.211379440727285e-06, |
| "loss": 3.2677, |
| "step": 554000 |
| }, |
| { |
| "epoch": 4.54, |
| "learning_rate": 8.209735488007366e-06, |
| "loss": 3.2587, |
| "step": 554500 |
| }, |
| { |
| "epoch": 4.55, |
| "learning_rate": 8.208091535287445e-06, |
| "loss": 3.2668, |
| "step": 555000 |
| }, |
| { |
| "epoch": 4.55, |
| "learning_rate": 8.206447582567526e-06, |
| "loss": 3.2543, |
| "step": 555500 |
| }, |
| { |
| "epoch": 4.56, |
| "learning_rate": 8.204803629847606e-06, |
| "loss": 3.2612, |
| "step": 556000 |
| }, |
| { |
| "epoch": 4.56, |
| "learning_rate": 8.203159677127687e-06, |
| "loss": 3.2631, |
| "step": 556500 |
| }, |
| { |
| "epoch": 4.56, |
| "learning_rate": 8.201515724407768e-06, |
| "loss": 3.2678, |
| "step": 557000 |
| }, |
| { |
| "epoch": 4.57, |
| "learning_rate": 8.199871771687847e-06, |
| "loss": 3.2658, |
| "step": 557500 |
| }, |
| { |
| "epoch": 4.57, |
| "learning_rate": 8.198227818967928e-06, |
| "loss": 3.2576, |
| "step": 558000 |
| }, |
| { |
| "epoch": 4.58, |
| "learning_rate": 8.196583866248007e-06, |
| "loss": 3.2639, |
| "step": 558500 |
| }, |
| { |
| "epoch": 4.58, |
| "learning_rate": 8.194939913528086e-06, |
| "loss": 3.2549, |
| "step": 559000 |
| }, |
| { |
| "epoch": 4.58, |
| "learning_rate": 8.193295960808167e-06, |
| "loss": 3.2605, |
| "step": 559500 |
| }, |
| { |
| "epoch": 4.59, |
| "learning_rate": 8.191652008088248e-06, |
| "loss": 3.2623, |
| "step": 560000 |
| }, |
| { |
| "epoch": 4.59, |
| "learning_rate": 8.190008055368328e-06, |
| "loss": 3.2635, |
| "step": 560500 |
| }, |
| { |
| "epoch": 4.6, |
| "learning_rate": 8.188364102648409e-06, |
| "loss": 3.2587, |
| "step": 561000 |
| }, |
| { |
| "epoch": 4.6, |
| "learning_rate": 8.186720149928488e-06, |
| "loss": 3.266, |
| "step": 561500 |
| }, |
| { |
| "epoch": 4.6, |
| "learning_rate": 8.185076197208569e-06, |
| "loss": 3.2584, |
| "step": 562000 |
| }, |
| { |
| "epoch": 4.61, |
| "learning_rate": 8.18343224448865e-06, |
| "loss": 3.2625, |
| "step": 562500 |
| }, |
| { |
| "epoch": 4.61, |
| "learning_rate": 8.181788291768729e-06, |
| "loss": 3.2598, |
| "step": 563000 |
| }, |
| { |
| "epoch": 4.62, |
| "learning_rate": 8.18014433904881e-06, |
| "loss": 3.2545, |
| "step": 563500 |
| }, |
| { |
| "epoch": 4.62, |
| "learning_rate": 8.178500386328891e-06, |
| "loss": 3.2591, |
| "step": 564000 |
| }, |
| { |
| "epoch": 4.62, |
| "learning_rate": 8.17685643360897e-06, |
| "loss": 3.2561, |
| "step": 564500 |
| }, |
| { |
| "epoch": 4.63, |
| "learning_rate": 8.17521248088905e-06, |
| "loss": 3.2604, |
| "step": 565000 |
| }, |
| { |
| "epoch": 4.63, |
| "learning_rate": 8.17356852816913e-06, |
| "loss": 3.2529, |
| "step": 565500 |
| }, |
| { |
| "epoch": 4.64, |
| "learning_rate": 8.17192457544921e-06, |
| "loss": 3.2568, |
| "step": 566000 |
| }, |
| { |
| "epoch": 4.64, |
| "learning_rate": 8.170280622729291e-06, |
| "loss": 3.2554, |
| "step": 566500 |
| }, |
| { |
| "epoch": 4.65, |
| "learning_rate": 8.168636670009372e-06, |
| "loss": 3.2528, |
| "step": 567000 |
| }, |
| { |
| "epoch": 4.65, |
| "learning_rate": 8.166992717289451e-06, |
| "loss": 3.2633, |
| "step": 567500 |
| }, |
| { |
| "epoch": 4.65, |
| "learning_rate": 8.165348764569532e-06, |
| "loss": 3.2545, |
| "step": 568000 |
| }, |
| { |
| "epoch": 4.66, |
| "learning_rate": 8.163704811849611e-06, |
| "loss": 3.257, |
| "step": 568500 |
| }, |
| { |
| "epoch": 4.66, |
| "learning_rate": 8.162060859129692e-06, |
| "loss": 3.2531, |
| "step": 569000 |
| }, |
| { |
| "epoch": 4.67, |
| "learning_rate": 8.160416906409773e-06, |
| "loss": 3.2573, |
| "step": 569500 |
| }, |
| { |
| "epoch": 4.67, |
| "learning_rate": 8.158772953689853e-06, |
| "loss": 3.259, |
| "step": 570000 |
| }, |
| { |
| "epoch": 4.67, |
| "eval_accuracy": 0.45185906617575416, |
| "eval_loss": 3.122938632965088, |
| "eval_runtime": 406.5956, |
| "eval_samples_per_second": 758.385, |
| "eval_steps_per_second": 15.802, |
| "step": 570000 |
| }, |
| { |
| "epoch": 4.67, |
| "learning_rate": 8.157129000969934e-06, |
| "loss": 3.2607, |
| "step": 570500 |
| }, |
| { |
| "epoch": 4.68, |
| "learning_rate": 8.155485048250013e-06, |
| "loss": 3.2596, |
| "step": 571000 |
| }, |
| { |
| "epoch": 4.68, |
| "learning_rate": 8.153841095530092e-06, |
| "loss": 3.2551, |
| "step": 571500 |
| }, |
| { |
| "epoch": 4.69, |
| "learning_rate": 8.152197142810173e-06, |
| "loss": 3.26, |
| "step": 572000 |
| }, |
| { |
| "epoch": 4.69, |
| "learning_rate": 8.150553190090254e-06, |
| "loss": 3.2566, |
| "step": 572500 |
| }, |
| { |
| "epoch": 4.69, |
| "learning_rate": 8.148909237370333e-06, |
| "loss": 3.2509, |
| "step": 573000 |
| }, |
| { |
| "epoch": 4.7, |
| "learning_rate": 8.147265284650414e-06, |
| "loss": 3.2506, |
| "step": 573500 |
| }, |
| { |
| "epoch": 4.7, |
| "learning_rate": 8.145621331930494e-06, |
| "loss": 3.2583, |
| "step": 574000 |
| }, |
| { |
| "epoch": 4.71, |
| "learning_rate": 8.143977379210575e-06, |
| "loss": 3.2572, |
| "step": 574500 |
| }, |
| { |
| "epoch": 4.71, |
| "learning_rate": 8.142333426490656e-06, |
| "loss": 3.2544, |
| "step": 575000 |
| }, |
| { |
| "epoch": 4.71, |
| "learning_rate": 8.140689473770735e-06, |
| "loss": 3.2544, |
| "step": 575500 |
| }, |
| { |
| "epoch": 4.72, |
| "learning_rate": 8.139045521050816e-06, |
| "loss": 3.2497, |
| "step": 576000 |
| }, |
| { |
| "epoch": 4.72, |
| "learning_rate": 8.137401568330897e-06, |
| "loss": 3.2496, |
| "step": 576500 |
| }, |
| { |
| "epoch": 4.73, |
| "learning_rate": 8.135757615610974e-06, |
| "loss": 3.2468, |
| "step": 577000 |
| }, |
| { |
| "epoch": 4.73, |
| "learning_rate": 8.134113662891055e-06, |
| "loss": 3.2497, |
| "step": 577500 |
| }, |
| { |
| "epoch": 4.74, |
| "learning_rate": 8.132469710171136e-06, |
| "loss": 3.249, |
| "step": 578000 |
| }, |
| { |
| "epoch": 4.74, |
| "learning_rate": 8.130825757451216e-06, |
| "loss": 3.2553, |
| "step": 578500 |
| }, |
| { |
| "epoch": 4.74, |
| "learning_rate": 8.129181804731297e-06, |
| "loss": 3.2604, |
| "step": 579000 |
| }, |
| { |
| "epoch": 4.75, |
| "learning_rate": 8.127537852011378e-06, |
| "loss": 3.2497, |
| "step": 579500 |
| }, |
| { |
| "epoch": 4.75, |
| "learning_rate": 8.125893899291457e-06, |
| "loss": 3.2503, |
| "step": 580000 |
| }, |
| { |
| "epoch": 4.76, |
| "learning_rate": 8.124249946571538e-06, |
| "loss": 3.2484, |
| "step": 580500 |
| }, |
| { |
| "epoch": 4.76, |
| "learning_rate": 8.122605993851617e-06, |
| "loss": 3.2596, |
| "step": 581000 |
| }, |
| { |
| "epoch": 4.76, |
| "learning_rate": 8.120962041131698e-06, |
| "loss": 3.2464, |
| "step": 581500 |
| }, |
| { |
| "epoch": 4.77, |
| "learning_rate": 8.119318088411779e-06, |
| "loss": 3.2508, |
| "step": 582000 |
| }, |
| { |
| "epoch": 4.77, |
| "learning_rate": 8.117674135691858e-06, |
| "loss": 3.249, |
| "step": 582500 |
| }, |
| { |
| "epoch": 4.78, |
| "learning_rate": 8.116030182971938e-06, |
| "loss": 3.2492, |
| "step": 583000 |
| }, |
| { |
| "epoch": 4.78, |
| "learning_rate": 8.114386230252019e-06, |
| "loss": 3.2457, |
| "step": 583500 |
| }, |
| { |
| "epoch": 4.78, |
| "learning_rate": 8.112742277532098e-06, |
| "loss": 3.2488, |
| "step": 584000 |
| }, |
| { |
| "epoch": 4.79, |
| "learning_rate": 8.111098324812179e-06, |
| "loss": 3.2475, |
| "step": 584500 |
| }, |
| { |
| "epoch": 4.79, |
| "learning_rate": 8.10945437209226e-06, |
| "loss": 3.2466, |
| "step": 585000 |
| }, |
| { |
| "epoch": 4.8, |
| "learning_rate": 8.107810419372339e-06, |
| "loss": 3.2452, |
| "step": 585500 |
| }, |
| { |
| "epoch": 4.8, |
| "learning_rate": 8.10616646665242e-06, |
| "loss": 3.2506, |
| "step": 586000 |
| }, |
| { |
| "epoch": 4.81, |
| "learning_rate": 8.1045225139325e-06, |
| "loss": 3.243, |
| "step": 586500 |
| }, |
| { |
| "epoch": 4.81, |
| "learning_rate": 8.10287856121258e-06, |
| "loss": 3.2476, |
| "step": 587000 |
| }, |
| { |
| "epoch": 4.81, |
| "learning_rate": 8.101234608492661e-06, |
| "loss": 3.2427, |
| "step": 587500 |
| }, |
| { |
| "epoch": 4.82, |
| "learning_rate": 8.09959065577274e-06, |
| "loss": 3.2548, |
| "step": 588000 |
| }, |
| { |
| "epoch": 4.82, |
| "learning_rate": 8.097946703052822e-06, |
| "loss": 3.2503, |
| "step": 588500 |
| }, |
| { |
| "epoch": 4.83, |
| "learning_rate": 8.0963027503329e-06, |
| "loss": 3.2469, |
| "step": 589000 |
| }, |
| { |
| "epoch": 4.83, |
| "learning_rate": 8.09465879761298e-06, |
| "loss": 3.239, |
| "step": 589500 |
| }, |
| { |
| "epoch": 4.83, |
| "learning_rate": 8.093014844893061e-06, |
| "loss": 3.2494, |
| "step": 590000 |
| }, |
| { |
| "epoch": 4.84, |
| "learning_rate": 8.091370892173142e-06, |
| "loss": 3.2423, |
| "step": 590500 |
| }, |
| { |
| "epoch": 4.84, |
| "learning_rate": 8.089726939453221e-06, |
| "loss": 3.2455, |
| "step": 591000 |
| }, |
| { |
| "epoch": 4.85, |
| "learning_rate": 8.088082986733302e-06, |
| "loss": 3.2395, |
| "step": 591500 |
| }, |
| { |
| "epoch": 4.85, |
| "learning_rate": 8.086439034013383e-06, |
| "loss": 3.244, |
| "step": 592000 |
| }, |
| { |
| "epoch": 4.85, |
| "learning_rate": 8.084795081293463e-06, |
| "loss": 3.2469, |
| "step": 592500 |
| }, |
| { |
| "epoch": 4.86, |
| "learning_rate": 8.083151128573544e-06, |
| "loss": 3.244, |
| "step": 593000 |
| }, |
| { |
| "epoch": 4.86, |
| "learning_rate": 8.081507175853623e-06, |
| "loss": 3.2457, |
| "step": 593500 |
| }, |
| { |
| "epoch": 4.87, |
| "learning_rate": 8.079863223133704e-06, |
| "loss": 3.2449, |
| "step": 594000 |
| }, |
| { |
| "epoch": 4.87, |
| "learning_rate": 8.078219270413785e-06, |
| "loss": 3.2407, |
| "step": 594500 |
| }, |
| { |
| "epoch": 4.87, |
| "learning_rate": 8.076575317693864e-06, |
| "loss": 3.2462, |
| "step": 595000 |
| }, |
| { |
| "epoch": 4.88, |
| "learning_rate": 8.074931364973943e-06, |
| "loss": 3.2402, |
| "step": 595500 |
| }, |
| { |
| "epoch": 4.88, |
| "learning_rate": 8.073287412254024e-06, |
| "loss": 3.2431, |
| "step": 596000 |
| }, |
| { |
| "epoch": 4.89, |
| "learning_rate": 8.071643459534104e-06, |
| "loss": 3.2429, |
| "step": 596500 |
| }, |
| { |
| "epoch": 4.89, |
| "learning_rate": 8.069999506814185e-06, |
| "loss": 3.2418, |
| "step": 597000 |
| }, |
| { |
| "epoch": 4.9, |
| "learning_rate": 8.068355554094265e-06, |
| "loss": 3.2403, |
| "step": 597500 |
| }, |
| { |
| "epoch": 4.9, |
| "learning_rate": 8.066711601374345e-06, |
| "loss": 3.2438, |
| "step": 598000 |
| }, |
| { |
| "epoch": 4.9, |
| "learning_rate": 8.065067648654426e-06, |
| "loss": 3.2456, |
| "step": 598500 |
| }, |
| { |
| "epoch": 4.91, |
| "learning_rate": 8.063423695934505e-06, |
| "loss": 3.2345, |
| "step": 599000 |
| }, |
| { |
| "epoch": 4.91, |
| "learning_rate": 8.061779743214586e-06, |
| "loss": 3.2393, |
| "step": 599500 |
| }, |
| { |
| "epoch": 4.92, |
| "learning_rate": 8.060135790494667e-06, |
| "loss": 3.2429, |
| "step": 600000 |
| }, |
| { |
| "epoch": 4.92, |
| "eval_accuracy": 0.45349973762719414, |
| "eval_loss": 3.1096389293670654, |
| "eval_runtime": 406.7173, |
| "eval_samples_per_second": 758.158, |
| "eval_steps_per_second": 15.797, |
| "step": 600000 |
| }, |
| { |
| "epoch": 4.92, |
| "learning_rate": 8.058491837774746e-06, |
| "loss": 3.2356, |
| "step": 600500 |
| }, |
| { |
| "epoch": 4.92, |
| "learning_rate": 8.056847885054826e-06, |
| "loss": 3.2468, |
| "step": 601000 |
| }, |
| { |
| "epoch": 4.93, |
| "learning_rate": 8.055203932334907e-06, |
| "loss": 3.2398, |
| "step": 601500 |
| }, |
| { |
| "epoch": 4.93, |
| "learning_rate": 8.053559979614986e-06, |
| "loss": 3.2352, |
| "step": 602000 |
| }, |
| { |
| "epoch": 4.94, |
| "learning_rate": 8.051916026895067e-06, |
| "loss": 3.2442, |
| "step": 602500 |
| }, |
| { |
| "epoch": 4.94, |
| "learning_rate": 8.050272074175148e-06, |
| "loss": 3.2401, |
| "step": 603000 |
| }, |
| { |
| "epoch": 4.94, |
| "learning_rate": 8.048628121455227e-06, |
| "loss": 3.2406, |
| "step": 603500 |
| }, |
| { |
| "epoch": 4.95, |
| "learning_rate": 8.046984168735308e-06, |
| "loss": 3.2348, |
| "step": 604000 |
| }, |
| { |
| "epoch": 4.95, |
| "learning_rate": 8.045340216015389e-06, |
| "loss": 3.2435, |
| "step": 604500 |
| }, |
| { |
| "epoch": 4.96, |
| "learning_rate": 8.043696263295468e-06, |
| "loss": 3.2372, |
| "step": 605000 |
| }, |
| { |
| "epoch": 4.96, |
| "learning_rate": 8.04205231057555e-06, |
| "loss": 3.2415, |
| "step": 605500 |
| }, |
| { |
| "epoch": 4.96, |
| "learning_rate": 8.040408357855628e-06, |
| "loss": 3.234, |
| "step": 606000 |
| }, |
| { |
| "epoch": 4.97, |
| "learning_rate": 8.03876440513571e-06, |
| "loss": 3.2412, |
| "step": 606500 |
| }, |
| { |
| "epoch": 4.97, |
| "learning_rate": 8.037120452415789e-06, |
| "loss": 3.2354, |
| "step": 607000 |
| }, |
| { |
| "epoch": 4.98, |
| "learning_rate": 8.03547649969587e-06, |
| "loss": 3.2394, |
| "step": 607500 |
| }, |
| { |
| "epoch": 4.98, |
| "learning_rate": 8.033832546975949e-06, |
| "loss": 3.2317, |
| "step": 608000 |
| }, |
| { |
| "epoch": 4.99, |
| "learning_rate": 8.03218859425603e-06, |
| "loss": 3.2447, |
| "step": 608500 |
| }, |
| { |
| "epoch": 4.99, |
| "learning_rate": 8.03054464153611e-06, |
| "loss": 3.2438, |
| "step": 609000 |
| }, |
| { |
| "epoch": 4.99, |
| "learning_rate": 8.02890068881619e-06, |
| "loss": 3.2373, |
| "step": 609500 |
| }, |
| { |
| "epoch": 5.0, |
| "learning_rate": 8.027256736096271e-06, |
| "loss": 3.2411, |
| "step": 610000 |
| }, |
| { |
| "epoch": 5.0, |
| "learning_rate": 8.02561278337635e-06, |
| "loss": 3.2358, |
| "step": 610500 |
| }, |
| { |
| "epoch": 5.01, |
| "learning_rate": 8.023968830656431e-06, |
| "loss": 3.2351, |
| "step": 611000 |
| }, |
| { |
| "epoch": 5.01, |
| "learning_rate": 8.02232487793651e-06, |
| "loss": 3.2341, |
| "step": 611500 |
| }, |
| { |
| "epoch": 5.01, |
| "learning_rate": 8.020680925216592e-06, |
| "loss": 3.2321, |
| "step": 612000 |
| }, |
| { |
| "epoch": 5.02, |
| "learning_rate": 8.019036972496673e-06, |
| "loss": 3.2408, |
| "step": 612500 |
| }, |
| { |
| "epoch": 5.02, |
| "learning_rate": 8.017393019776752e-06, |
| "loss": 3.2406, |
| "step": 613000 |
| }, |
| { |
| "epoch": 5.03, |
| "learning_rate": 8.015749067056831e-06, |
| "loss": 3.2375, |
| "step": 613500 |
| }, |
| { |
| "epoch": 5.03, |
| "learning_rate": 8.014105114336912e-06, |
| "loss": 3.2416, |
| "step": 614000 |
| }, |
| { |
| "epoch": 5.03, |
| "learning_rate": 8.012461161616991e-06, |
| "loss": 3.2331, |
| "step": 614500 |
| }, |
| { |
| "epoch": 5.04, |
| "learning_rate": 8.010817208897072e-06, |
| "loss": 3.2473, |
| "step": 615000 |
| }, |
| { |
| "epoch": 5.04, |
| "learning_rate": 8.009173256177153e-06, |
| "loss": 3.2345, |
| "step": 615500 |
| }, |
| { |
| "epoch": 5.05, |
| "learning_rate": 8.007529303457233e-06, |
| "loss": 3.2292, |
| "step": 616000 |
| }, |
| { |
| "epoch": 5.05, |
| "learning_rate": 8.005885350737314e-06, |
| "loss": 3.2327, |
| "step": 616500 |
| }, |
| { |
| "epoch": 5.05, |
| "learning_rate": 8.004241398017395e-06, |
| "loss": 3.2251, |
| "step": 617000 |
| }, |
| { |
| "epoch": 5.06, |
| "learning_rate": 8.002597445297474e-06, |
| "loss": 3.2326, |
| "step": 617500 |
| }, |
| { |
| "epoch": 5.06, |
| "learning_rate": 8.000953492577555e-06, |
| "loss": 3.2348, |
| "step": 618000 |
| }, |
| { |
| "epoch": 5.07, |
| "learning_rate": 7.999309539857634e-06, |
| "loss": 3.231, |
| "step": 618500 |
| }, |
| { |
| "epoch": 5.07, |
| "learning_rate": 7.997665587137715e-06, |
| "loss": 3.236, |
| "step": 619000 |
| }, |
| { |
| "epoch": 5.08, |
| "learning_rate": 7.996021634417794e-06, |
| "loss": 3.2324, |
| "step": 619500 |
| }, |
| { |
| "epoch": 5.08, |
| "learning_rate": 7.994377681697875e-06, |
| "loss": 3.2321, |
| "step": 620000 |
| }, |
| { |
| "epoch": 5.08, |
| "learning_rate": 7.992733728977955e-06, |
| "loss": 3.2326, |
| "step": 620500 |
| }, |
| { |
| "epoch": 5.09, |
| "learning_rate": 7.991089776258036e-06, |
| "loss": 3.2318, |
| "step": 621000 |
| }, |
| { |
| "epoch": 5.09, |
| "learning_rate": 7.989445823538115e-06, |
| "loss": 3.2291, |
| "step": 621500 |
| }, |
| { |
| "epoch": 5.1, |
| "learning_rate": 7.987801870818196e-06, |
| "loss": 3.2291, |
| "step": 622000 |
| }, |
| { |
| "epoch": 5.1, |
| "learning_rate": 7.986157918098277e-06, |
| "loss": 3.2264, |
| "step": 622500 |
| }, |
| { |
| "epoch": 5.1, |
| "learning_rate": 7.984513965378356e-06, |
| "loss": 3.2319, |
| "step": 623000 |
| }, |
| { |
| "epoch": 5.11, |
| "learning_rate": 7.982870012658437e-06, |
| "loss": 3.231, |
| "step": 623500 |
| }, |
| { |
| "epoch": 5.11, |
| "learning_rate": 7.981226059938516e-06, |
| "loss": 3.2294, |
| "step": 624000 |
| }, |
| { |
| "epoch": 5.12, |
| "learning_rate": 7.979582107218597e-06, |
| "loss": 3.2212, |
| "step": 624500 |
| }, |
| { |
| "epoch": 5.12, |
| "learning_rate": 7.977938154498678e-06, |
| "loss": 3.2315, |
| "step": 625000 |
| }, |
| { |
| "epoch": 5.12, |
| "learning_rate": 7.976294201778758e-06, |
| "loss": 3.2388, |
| "step": 625500 |
| }, |
| { |
| "epoch": 5.13, |
| "learning_rate": 7.974650249058837e-06, |
| "loss": 3.2373, |
| "step": 626000 |
| }, |
| { |
| "epoch": 5.13, |
| "learning_rate": 7.973006296338918e-06, |
| "loss": 3.2268, |
| "step": 626500 |
| }, |
| { |
| "epoch": 5.14, |
| "learning_rate": 7.971362343618997e-06, |
| "loss": 3.2271, |
| "step": 627000 |
| }, |
| { |
| "epoch": 5.14, |
| "learning_rate": 7.969718390899078e-06, |
| "loss": 3.23, |
| "step": 627500 |
| }, |
| { |
| "epoch": 5.15, |
| "learning_rate": 7.968074438179159e-06, |
| "loss": 3.2272, |
| "step": 628000 |
| }, |
| { |
| "epoch": 5.15, |
| "learning_rate": 7.966430485459238e-06, |
| "loss": 3.2258, |
| "step": 628500 |
| }, |
| { |
| "epoch": 5.15, |
| "learning_rate": 7.96478653273932e-06, |
| "loss": 3.2267, |
| "step": 629000 |
| }, |
| { |
| "epoch": 5.16, |
| "learning_rate": 7.9631425800194e-06, |
| "loss": 3.2273, |
| "step": 629500 |
| }, |
| { |
| "epoch": 5.16, |
| "learning_rate": 7.96149862729948e-06, |
| "loss": 3.2234, |
| "step": 630000 |
| }, |
| { |
| "epoch": 5.16, |
| "eval_accuracy": 0.4554354208134474, |
| "eval_loss": 3.0947325229644775, |
| "eval_runtime": 407.3576, |
| "eval_samples_per_second": 756.966, |
| "eval_steps_per_second": 15.772, |
| "step": 630000 |
| }, |
| { |
| "epoch": 5.17, |
| "learning_rate": 7.95985467457956e-06, |
| "loss": 3.2299, |
| "step": 630500 |
| }, |
| { |
| "epoch": 5.17, |
| "learning_rate": 7.95821072185964e-06, |
| "loss": 3.2245, |
| "step": 631000 |
| }, |
| { |
| "epoch": 5.17, |
| "learning_rate": 7.95656676913972e-06, |
| "loss": 3.2264, |
| "step": 631500 |
| }, |
| { |
| "epoch": 5.18, |
| "learning_rate": 7.9549228164198e-06, |
| "loss": 3.2198, |
| "step": 632000 |
| }, |
| { |
| "epoch": 5.18, |
| "learning_rate": 7.953278863699881e-06, |
| "loss": 3.2207, |
| "step": 632500 |
| }, |
| { |
| "epoch": 5.19, |
| "learning_rate": 7.95163491097996e-06, |
| "loss": 3.225, |
| "step": 633000 |
| }, |
| { |
| "epoch": 5.19, |
| "learning_rate": 7.949990958260041e-06, |
| "loss": 3.2346, |
| "step": 633500 |
| }, |
| { |
| "epoch": 5.19, |
| "learning_rate": 7.94834700554012e-06, |
| "loss": 3.228, |
| "step": 634000 |
| }, |
| { |
| "epoch": 5.2, |
| "learning_rate": 7.946703052820202e-06, |
| "loss": 3.2245, |
| "step": 634500 |
| }, |
| { |
| "epoch": 5.2, |
| "learning_rate": 7.945059100100283e-06, |
| "loss": 3.2281, |
| "step": 635000 |
| }, |
| { |
| "epoch": 5.21, |
| "learning_rate": 7.943415147380362e-06, |
| "loss": 3.2263, |
| "step": 635500 |
| }, |
| { |
| "epoch": 5.21, |
| "learning_rate": 7.941771194660443e-06, |
| "loss": 3.2242, |
| "step": 636000 |
| }, |
| { |
| "epoch": 5.21, |
| "learning_rate": 7.940127241940522e-06, |
| "loss": 3.2237, |
| "step": 636500 |
| }, |
| { |
| "epoch": 5.22, |
| "learning_rate": 7.938483289220603e-06, |
| "loss": 3.2275, |
| "step": 637000 |
| }, |
| { |
| "epoch": 5.22, |
| "learning_rate": 7.936839336500682e-06, |
| "loss": 3.223, |
| "step": 637500 |
| }, |
| { |
| "epoch": 5.23, |
| "learning_rate": 7.935195383780763e-06, |
| "loss": 3.2262, |
| "step": 638000 |
| }, |
| { |
| "epoch": 5.23, |
| "learning_rate": 7.933551431060843e-06, |
| "loss": 3.2343, |
| "step": 638500 |
| }, |
| { |
| "epoch": 5.24, |
| "learning_rate": 7.931907478340924e-06, |
| "loss": 3.225, |
| "step": 639000 |
| }, |
| { |
| "epoch": 5.24, |
| "learning_rate": 7.930263525621003e-06, |
| "loss": 3.2203, |
| "step": 639500 |
| }, |
| { |
| "epoch": 5.24, |
| "learning_rate": 7.928619572901084e-06, |
| "loss": 3.2276, |
| "step": 640000 |
| }, |
| { |
| "epoch": 5.25, |
| "learning_rate": 7.926975620181165e-06, |
| "loss": 3.2216, |
| "step": 640500 |
| }, |
| { |
| "epoch": 5.25, |
| "learning_rate": 7.925331667461244e-06, |
| "loss": 3.2199, |
| "step": 641000 |
| }, |
| { |
| "epoch": 5.26, |
| "learning_rate": 7.923687714741325e-06, |
| "loss": 3.226, |
| "step": 641500 |
| }, |
| { |
| "epoch": 5.26, |
| "learning_rate": 7.922043762021406e-06, |
| "loss": 3.2282, |
| "step": 642000 |
| }, |
| { |
| "epoch": 5.26, |
| "learning_rate": 7.920399809301485e-06, |
| "loss": 3.2232, |
| "step": 642500 |
| }, |
| { |
| "epoch": 5.27, |
| "learning_rate": 7.918755856581566e-06, |
| "loss": 3.2266, |
| "step": 643000 |
| }, |
| { |
| "epoch": 5.27, |
| "learning_rate": 7.917111903861646e-06, |
| "loss": 3.2136, |
| "step": 643500 |
| }, |
| { |
| "epoch": 5.28, |
| "learning_rate": 7.915467951141725e-06, |
| "loss": 3.2274, |
| "step": 644000 |
| }, |
| { |
| "epoch": 5.28, |
| "learning_rate": 7.913823998421806e-06, |
| "loss": 3.2235, |
| "step": 644500 |
| }, |
| { |
| "epoch": 5.28, |
| "learning_rate": 7.912180045701887e-06, |
| "loss": 3.2288, |
| "step": 645000 |
| }, |
| { |
| "epoch": 5.29, |
| "learning_rate": 7.910536092981966e-06, |
| "loss": 3.2166, |
| "step": 645500 |
| }, |
| { |
| "epoch": 5.29, |
| "learning_rate": 7.908892140262047e-06, |
| "loss": 3.2206, |
| "step": 646000 |
| }, |
| { |
| "epoch": 5.3, |
| "learning_rate": 7.907248187542126e-06, |
| "loss": 3.2212, |
| "step": 646500 |
| }, |
| { |
| "epoch": 5.3, |
| "learning_rate": 7.905604234822207e-06, |
| "loss": 3.2169, |
| "step": 647000 |
| }, |
| { |
| "epoch": 5.3, |
| "learning_rate": 7.903960282102288e-06, |
| "loss": 3.2286, |
| "step": 647500 |
| }, |
| { |
| "epoch": 5.31, |
| "learning_rate": 7.902316329382368e-06, |
| "loss": 3.2228, |
| "step": 648000 |
| }, |
| { |
| "epoch": 5.31, |
| "learning_rate": 7.900672376662449e-06, |
| "loss": 3.2177, |
| "step": 648500 |
| }, |
| { |
| "epoch": 5.32, |
| "learning_rate": 7.899028423942528e-06, |
| "loss": 3.2192, |
| "step": 649000 |
| }, |
| { |
| "epoch": 5.32, |
| "learning_rate": 7.897384471222607e-06, |
| "loss": 3.2179, |
| "step": 649500 |
| }, |
| { |
| "epoch": 5.33, |
| "learning_rate": 7.895740518502688e-06, |
| "loss": 3.227, |
| "step": 650000 |
| }, |
| { |
| "epoch": 5.33, |
| "learning_rate": 7.894096565782769e-06, |
| "loss": 3.2206, |
| "step": 650500 |
| }, |
| { |
| "epoch": 5.33, |
| "learning_rate": 7.892452613062848e-06, |
| "loss": 3.2183, |
| "step": 651000 |
| }, |
| { |
| "epoch": 5.34, |
| "learning_rate": 7.89080866034293e-06, |
| "loss": 3.2277, |
| "step": 651500 |
| }, |
| { |
| "epoch": 5.34, |
| "learning_rate": 7.889164707623009e-06, |
| "loss": 3.2174, |
| "step": 652000 |
| }, |
| { |
| "epoch": 5.35, |
| "learning_rate": 7.88752075490309e-06, |
| "loss": 3.2233, |
| "step": 652500 |
| }, |
| { |
| "epoch": 5.35, |
| "learning_rate": 7.88587680218317e-06, |
| "loss": 3.2165, |
| "step": 653000 |
| }, |
| { |
| "epoch": 5.35, |
| "learning_rate": 7.88423284946325e-06, |
| "loss": 3.2164, |
| "step": 653500 |
| }, |
| { |
| "epoch": 5.36, |
| "learning_rate": 7.88258889674333e-06, |
| "loss": 3.2157, |
| "step": 654000 |
| }, |
| { |
| "epoch": 5.36, |
| "learning_rate": 7.880944944023412e-06, |
| "loss": 3.2215, |
| "step": 654500 |
| }, |
| { |
| "epoch": 5.37, |
| "learning_rate": 7.879300991303491e-06, |
| "loss": 3.2217, |
| "step": 655000 |
| }, |
| { |
| "epoch": 5.37, |
| "learning_rate": 7.87765703858357e-06, |
| "loss": 3.2213, |
| "step": 655500 |
| }, |
| { |
| "epoch": 5.37, |
| "learning_rate": 7.876013085863651e-06, |
| "loss": 3.2122, |
| "step": 656000 |
| }, |
| { |
| "epoch": 5.38, |
| "learning_rate": 7.87436913314373e-06, |
| "loss": 3.2176, |
| "step": 656500 |
| }, |
| { |
| "epoch": 5.38, |
| "learning_rate": 7.872725180423812e-06, |
| "loss": 3.2147, |
| "step": 657000 |
| }, |
| { |
| "epoch": 5.39, |
| "learning_rate": 7.871081227703893e-06, |
| "loss": 3.217, |
| "step": 657500 |
| }, |
| { |
| "epoch": 5.39, |
| "learning_rate": 7.869437274983972e-06, |
| "loss": 3.219, |
| "step": 658000 |
| }, |
| { |
| "epoch": 5.39, |
| "learning_rate": 7.867793322264053e-06, |
| "loss": 3.2219, |
| "step": 658500 |
| }, |
| { |
| "epoch": 5.4, |
| "learning_rate": 7.866149369544132e-06, |
| "loss": 3.215, |
| "step": 659000 |
| }, |
| { |
| "epoch": 5.4, |
| "learning_rate": 7.864505416824213e-06, |
| "loss": 3.2098, |
| "step": 659500 |
| }, |
| { |
| "epoch": 5.41, |
| "learning_rate": 7.862861464104294e-06, |
| "loss": 3.2115, |
| "step": 660000 |
| }, |
| { |
| "epoch": 5.41, |
| "eval_accuracy": 0.4573150918223206, |
| "eval_loss": 3.0818052291870117, |
| "eval_runtime": 409.6703, |
| "eval_samples_per_second": 752.693, |
| "eval_steps_per_second": 15.683, |
| "step": 660000 |
| }, |
| { |
| "epoch": 5.41, |
| "learning_rate": 7.861217511384373e-06, |
| "loss": 3.214, |
| "step": 660500 |
| }, |
| { |
| "epoch": 5.42, |
| "learning_rate": 7.859573558664454e-06, |
| "loss": 3.2186, |
| "step": 661000 |
| }, |
| { |
| "epoch": 5.42, |
| "learning_rate": 7.857929605944534e-06, |
| "loss": 3.2234, |
| "step": 661500 |
| }, |
| { |
| "epoch": 5.42, |
| "learning_rate": 7.856285653224613e-06, |
| "loss": 3.22, |
| "step": 662000 |
| }, |
| { |
| "epoch": 5.43, |
| "learning_rate": 7.854641700504694e-06, |
| "loss": 3.2187, |
| "step": 662500 |
| }, |
| { |
| "epoch": 5.43, |
| "learning_rate": 7.852997747784775e-06, |
| "loss": 3.2166, |
| "step": 663000 |
| }, |
| { |
| "epoch": 5.44, |
| "learning_rate": 7.851353795064854e-06, |
| "loss": 3.218, |
| "step": 663500 |
| }, |
| { |
| "epoch": 5.44, |
| "learning_rate": 7.849709842344935e-06, |
| "loss": 3.2183, |
| "step": 664000 |
| }, |
| { |
| "epoch": 5.44, |
| "learning_rate": 7.848065889625014e-06, |
| "loss": 3.2065, |
| "step": 664500 |
| }, |
| { |
| "epoch": 5.45, |
| "learning_rate": 7.846421936905095e-06, |
| "loss": 3.2123, |
| "step": 665000 |
| }, |
| { |
| "epoch": 5.45, |
| "learning_rate": 7.844777984185176e-06, |
| "loss": 3.2178, |
| "step": 665500 |
| }, |
| { |
| "epoch": 5.46, |
| "learning_rate": 7.843134031465256e-06, |
| "loss": 3.215, |
| "step": 666000 |
| }, |
| { |
| "epoch": 5.46, |
| "learning_rate": 7.841490078745336e-06, |
| "loss": 3.212, |
| "step": 666500 |
| }, |
| { |
| "epoch": 5.46, |
| "learning_rate": 7.839846126025416e-06, |
| "loss": 3.2089, |
| "step": 667000 |
| }, |
| { |
| "epoch": 5.47, |
| "learning_rate": 7.838202173305495e-06, |
| "loss": 3.2153, |
| "step": 667500 |
| }, |
| { |
| "epoch": 5.47, |
| "learning_rate": 7.836558220585576e-06, |
| "loss": 3.2125, |
| "step": 668000 |
| }, |
| { |
| "epoch": 5.48, |
| "learning_rate": 7.834914267865657e-06, |
| "loss": 3.2162, |
| "step": 668500 |
| }, |
| { |
| "epoch": 5.48, |
| "learning_rate": 7.833270315145736e-06, |
| "loss": 3.2106, |
| "step": 669000 |
| }, |
| { |
| "epoch": 5.49, |
| "learning_rate": 7.831626362425817e-06, |
| "loss": 3.2122, |
| "step": 669500 |
| }, |
| { |
| "epoch": 5.49, |
| "learning_rate": 7.829982409705898e-06, |
| "loss": 3.2046, |
| "step": 670000 |
| }, |
| { |
| "epoch": 5.49, |
| "learning_rate": 7.828338456985978e-06, |
| "loss": 3.202, |
| "step": 670500 |
| }, |
| { |
| "epoch": 5.5, |
| "learning_rate": 7.826694504266058e-06, |
| "loss": 3.2076, |
| "step": 671000 |
| }, |
| { |
| "epoch": 5.5, |
| "learning_rate": 7.825050551546138e-06, |
| "loss": 3.2012, |
| "step": 671500 |
| }, |
| { |
| "epoch": 5.51, |
| "learning_rate": 7.823406598826219e-06, |
| "loss": 3.2134, |
| "step": 672000 |
| }, |
| { |
| "epoch": 5.51, |
| "learning_rate": 7.8217626461063e-06, |
| "loss": 3.2088, |
| "step": 672500 |
| }, |
| { |
| "epoch": 5.51, |
| "learning_rate": 7.820118693386379e-06, |
| "loss": 3.2135, |
| "step": 673000 |
| }, |
| { |
| "epoch": 5.52, |
| "learning_rate": 7.81847474066646e-06, |
| "loss": 3.206, |
| "step": 673500 |
| }, |
| { |
| "epoch": 5.52, |
| "learning_rate": 7.81683078794654e-06, |
| "loss": 3.2101, |
| "step": 674000 |
| }, |
| { |
| "epoch": 5.53, |
| "learning_rate": 7.815186835226619e-06, |
| "loss": 3.2208, |
| "step": 674500 |
| }, |
| { |
| "epoch": 5.53, |
| "learning_rate": 7.8135428825067e-06, |
| "loss": 3.2144, |
| "step": 675000 |
| }, |
| { |
| "epoch": 5.53, |
| "learning_rate": 7.81189892978678e-06, |
| "loss": 3.2144, |
| "step": 675500 |
| }, |
| { |
| "epoch": 5.54, |
| "learning_rate": 7.81025497706686e-06, |
| "loss": 3.2076, |
| "step": 676000 |
| }, |
| { |
| "epoch": 5.54, |
| "learning_rate": 7.80861102434694e-06, |
| "loss": 3.2067, |
| "step": 676500 |
| }, |
| { |
| "epoch": 5.55, |
| "learning_rate": 7.80696707162702e-06, |
| "loss": 3.2107, |
| "step": 677000 |
| }, |
| { |
| "epoch": 5.55, |
| "learning_rate": 7.805323118907101e-06, |
| "loss": 3.2109, |
| "step": 677500 |
| }, |
| { |
| "epoch": 5.55, |
| "learning_rate": 7.803679166187182e-06, |
| "loss": 3.2116, |
| "step": 678000 |
| }, |
| { |
| "epoch": 5.56, |
| "learning_rate": 7.802035213467261e-06, |
| "loss": 3.205, |
| "step": 678500 |
| }, |
| { |
| "epoch": 5.56, |
| "learning_rate": 7.800391260747342e-06, |
| "loss": 3.2019, |
| "step": 679000 |
| }, |
| { |
| "epoch": 5.57, |
| "learning_rate": 7.798747308027421e-06, |
| "loss": 3.2089, |
| "step": 679500 |
| }, |
| { |
| "epoch": 5.57, |
| "learning_rate": 7.7971033553075e-06, |
| "loss": 3.2096, |
| "step": 680000 |
| }, |
| { |
| "epoch": 5.58, |
| "learning_rate": 7.795459402587582e-06, |
| "loss": 3.2131, |
| "step": 680500 |
| }, |
| { |
| "epoch": 5.58, |
| "learning_rate": 7.793815449867663e-06, |
| "loss": 3.2079, |
| "step": 681000 |
| }, |
| { |
| "epoch": 5.58, |
| "learning_rate": 7.792171497147742e-06, |
| "loss": 3.2019, |
| "step": 681500 |
| }, |
| { |
| "epoch": 5.59, |
| "learning_rate": 7.790527544427823e-06, |
| "loss": 3.2092, |
| "step": 682000 |
| }, |
| { |
| "epoch": 5.59, |
| "learning_rate": 7.788883591707904e-06, |
| "loss": 3.2066, |
| "step": 682500 |
| }, |
| { |
| "epoch": 5.6, |
| "learning_rate": 7.787239638987983e-06, |
| "loss": 3.1992, |
| "step": 683000 |
| }, |
| { |
| "epoch": 5.6, |
| "learning_rate": 7.785595686268064e-06, |
| "loss": 3.2042, |
| "step": 683500 |
| }, |
| { |
| "epoch": 5.6, |
| "learning_rate": 7.783951733548143e-06, |
| "loss": 3.1997, |
| "step": 684000 |
| }, |
| { |
| "epoch": 5.61, |
| "learning_rate": 7.782307780828224e-06, |
| "loss": 3.2064, |
| "step": 684500 |
| }, |
| { |
| "epoch": 5.61, |
| "learning_rate": 7.780663828108305e-06, |
| "loss": 3.2102, |
| "step": 685000 |
| }, |
| { |
| "epoch": 5.62, |
| "learning_rate": 7.779019875388385e-06, |
| "loss": 3.202, |
| "step": 685500 |
| }, |
| { |
| "epoch": 5.62, |
| "learning_rate": 7.777375922668464e-06, |
| "loss": 3.2049, |
| "step": 686000 |
| }, |
| { |
| "epoch": 5.62, |
| "learning_rate": 7.775731969948545e-06, |
| "loss": 3.212, |
| "step": 686500 |
| }, |
| { |
| "epoch": 5.63, |
| "learning_rate": 7.774088017228624e-06, |
| "loss": 3.2008, |
| "step": 687000 |
| }, |
| { |
| "epoch": 5.63, |
| "learning_rate": 7.772444064508705e-06, |
| "loss": 3.2015, |
| "step": 687500 |
| }, |
| { |
| "epoch": 5.64, |
| "learning_rate": 7.770800111788786e-06, |
| "loss": 3.1986, |
| "step": 688000 |
| }, |
| { |
| "epoch": 5.64, |
| "learning_rate": 7.769156159068865e-06, |
| "loss": 3.2028, |
| "step": 688500 |
| }, |
| { |
| "epoch": 5.64, |
| "learning_rate": 7.767512206348946e-06, |
| "loss": 3.21, |
| "step": 689000 |
| }, |
| { |
| "epoch": 5.65, |
| "learning_rate": 7.765868253629026e-06, |
| "loss": 3.2104, |
| "step": 689500 |
| }, |
| { |
| "epoch": 5.65, |
| "learning_rate": 7.764224300909107e-06, |
| "loss": 3.2011, |
| "step": 690000 |
| }, |
| { |
| "epoch": 5.65, |
| "eval_accuracy": 0.4590029752907897, |
| "eval_loss": 3.068504810333252, |
| "eval_runtime": 404.8644, |
| "eval_samples_per_second": 761.628, |
| "eval_steps_per_second": 15.87, |
| "step": 690000 |
| }, |
| { |
| "epoch": 5.66, |
| "learning_rate": 7.762580348189188e-06, |
| "loss": 3.2015, |
| "step": 690500 |
| }, |
| { |
| "epoch": 5.66, |
| "learning_rate": 7.760936395469267e-06, |
| "loss": 3.2047, |
| "step": 691000 |
| }, |
| { |
| "epoch": 5.67, |
| "learning_rate": 7.759292442749348e-06, |
| "loss": 3.2034, |
| "step": 691500 |
| }, |
| { |
| "epoch": 5.67, |
| "learning_rate": 7.757648490029427e-06, |
| "loss": 3.1976, |
| "step": 692000 |
| }, |
| { |
| "epoch": 5.67, |
| "learning_rate": 7.756004537309506e-06, |
| "loss": 3.2029, |
| "step": 692500 |
| }, |
| { |
| "epoch": 5.68, |
| "learning_rate": 7.754360584589587e-06, |
| "loss": 3.1993, |
| "step": 693000 |
| }, |
| { |
| "epoch": 5.68, |
| "learning_rate": 7.752716631869668e-06, |
| "loss": 3.2092, |
| "step": 693500 |
| }, |
| { |
| "epoch": 5.69, |
| "learning_rate": 7.751072679149748e-06, |
| "loss": 3.2087, |
| "step": 694000 |
| }, |
| { |
| "epoch": 5.69, |
| "learning_rate": 7.749428726429829e-06, |
| "loss": 3.1983, |
| "step": 694500 |
| }, |
| { |
| "epoch": 5.69, |
| "learning_rate": 7.747784773709908e-06, |
| "loss": 3.209, |
| "step": 695000 |
| }, |
| { |
| "epoch": 5.7, |
| "learning_rate": 7.746140820989989e-06, |
| "loss": 3.1986, |
| "step": 695500 |
| }, |
| { |
| "epoch": 5.7, |
| "learning_rate": 7.74449686827007e-06, |
| "loss": 3.2066, |
| "step": 696000 |
| }, |
| { |
| "epoch": 5.71, |
| "learning_rate": 7.74285291555015e-06, |
| "loss": 3.2, |
| "step": 696500 |
| }, |
| { |
| "epoch": 5.71, |
| "learning_rate": 7.74120896283023e-06, |
| "loss": 3.1934, |
| "step": 697000 |
| }, |
| { |
| "epoch": 5.71, |
| "learning_rate": 7.739565010110311e-06, |
| "loss": 3.1966, |
| "step": 697500 |
| }, |
| { |
| "epoch": 5.72, |
| "learning_rate": 7.73792105739039e-06, |
| "loss": 3.1951, |
| "step": 698000 |
| }, |
| { |
| "epoch": 5.72, |
| "learning_rate": 7.73627710467047e-06, |
| "loss": 3.2011, |
| "step": 698500 |
| }, |
| { |
| "epoch": 5.73, |
| "learning_rate": 7.73463315195055e-06, |
| "loss": 3.2043, |
| "step": 699000 |
| }, |
| { |
| "epoch": 5.73, |
| "learning_rate": 7.73298919923063e-06, |
| "loss": 3.1936, |
| "step": 699500 |
| }, |
| { |
| "epoch": 5.73, |
| "learning_rate": 7.731345246510711e-06, |
| "loss": 3.1944, |
| "step": 700000 |
| }, |
| { |
| "epoch": 5.74, |
| "learning_rate": 7.729701293790792e-06, |
| "loss": 3.1976, |
| "step": 700500 |
| }, |
| { |
| "epoch": 5.74, |
| "learning_rate": 7.728057341070871e-06, |
| "loss": 3.1919, |
| "step": 701000 |
| }, |
| { |
| "epoch": 5.75, |
| "learning_rate": 7.726413388350952e-06, |
| "loss": 3.2013, |
| "step": 701500 |
| }, |
| { |
| "epoch": 5.75, |
| "learning_rate": 7.724769435631031e-06, |
| "loss": 3.1973, |
| "step": 702000 |
| }, |
| { |
| "epoch": 5.76, |
| "learning_rate": 7.723125482911112e-06, |
| "loss": 3.1948, |
| "step": 702500 |
| }, |
| { |
| "epoch": 5.76, |
| "learning_rate": 7.721481530191193e-06, |
| "loss": 3.1967, |
| "step": 703000 |
| }, |
| { |
| "epoch": 5.76, |
| "learning_rate": 7.719837577471273e-06, |
| "loss": 3.1862, |
| "step": 703500 |
| }, |
| { |
| "epoch": 5.77, |
| "learning_rate": 7.718193624751352e-06, |
| "loss": 3.1962, |
| "step": 704000 |
| }, |
| { |
| "epoch": 5.77, |
| "learning_rate": 7.716549672031433e-06, |
| "loss": 3.1963, |
| "step": 704500 |
| }, |
| { |
| "epoch": 5.78, |
| "learning_rate": 7.714905719311512e-06, |
| "loss": 3.1839, |
| "step": 705000 |
| }, |
| { |
| "epoch": 5.78, |
| "learning_rate": 7.713261766591593e-06, |
| "loss": 3.1969, |
| "step": 705500 |
| }, |
| { |
| "epoch": 5.78, |
| "learning_rate": 7.711617813871674e-06, |
| "loss": 3.1906, |
| "step": 706000 |
| }, |
| { |
| "epoch": 5.79, |
| "learning_rate": 7.709973861151753e-06, |
| "loss": 3.1996, |
| "step": 706500 |
| }, |
| { |
| "epoch": 5.79, |
| "learning_rate": 7.708329908431834e-06, |
| "loss": 3.1966, |
| "step": 707000 |
| }, |
| { |
| "epoch": 5.8, |
| "learning_rate": 7.706685955711914e-06, |
| "loss": 3.1923, |
| "step": 707500 |
| }, |
| { |
| "epoch": 5.8, |
| "learning_rate": 7.705042002991995e-06, |
| "loss": 3.1893, |
| "step": 708000 |
| }, |
| { |
| "epoch": 5.8, |
| "learning_rate": 7.703398050272076e-06, |
| "loss": 3.1887, |
| "step": 708500 |
| }, |
| { |
| "epoch": 5.81, |
| "learning_rate": 7.701754097552155e-06, |
| "loss": 3.1983, |
| "step": 709000 |
| }, |
| { |
| "epoch": 5.81, |
| "learning_rate": 7.700110144832236e-06, |
| "loss": 3.1923, |
| "step": 709500 |
| }, |
| { |
| "epoch": 5.82, |
| "learning_rate": 7.698466192112315e-06, |
| "loss": 3.1964, |
| "step": 710000 |
| }, |
| { |
| "epoch": 5.82, |
| "learning_rate": 7.696822239392396e-06, |
| "loss": 3.1965, |
| "step": 710500 |
| }, |
| { |
| "epoch": 5.83, |
| "learning_rate": 7.695178286672475e-06, |
| "loss": 3.1941, |
| "step": 711000 |
| }, |
| { |
| "epoch": 5.83, |
| "learning_rate": 7.693534333952556e-06, |
| "loss": 3.1893, |
| "step": 711500 |
| }, |
| { |
| "epoch": 5.83, |
| "learning_rate": 7.691890381232636e-06, |
| "loss": 3.1945, |
| "step": 712000 |
| }, |
| { |
| "epoch": 5.84, |
| "learning_rate": 7.690246428512717e-06, |
| "loss": 3.1931, |
| "step": 712500 |
| }, |
| { |
| "epoch": 5.84, |
| "learning_rate": 7.688602475792798e-06, |
| "loss": 3.1853, |
| "step": 713000 |
| }, |
| { |
| "epoch": 5.85, |
| "learning_rate": 7.686958523072877e-06, |
| "loss": 3.1902, |
| "step": 713500 |
| }, |
| { |
| "epoch": 5.85, |
| "learning_rate": 7.685314570352958e-06, |
| "loss": 3.2006, |
| "step": 714000 |
| }, |
| { |
| "epoch": 5.85, |
| "learning_rate": 7.683670617633037e-06, |
| "loss": 3.195, |
| "step": 714500 |
| }, |
| { |
| "epoch": 5.86, |
| "learning_rate": 7.682026664913118e-06, |
| "loss": 3.1975, |
| "step": 715000 |
| }, |
| { |
| "epoch": 5.86, |
| "learning_rate": 7.680382712193199e-06, |
| "loss": 3.1905, |
| "step": 715500 |
| }, |
| { |
| "epoch": 5.87, |
| "learning_rate": 7.678738759473278e-06, |
| "loss": 3.1872, |
| "step": 716000 |
| }, |
| { |
| "epoch": 5.87, |
| "learning_rate": 7.677094806753358e-06, |
| "loss": 3.1891, |
| "step": 716500 |
| }, |
| { |
| "epoch": 5.87, |
| "learning_rate": 7.675450854033439e-06, |
| "loss": 3.1857, |
| "step": 717000 |
| }, |
| { |
| "epoch": 5.88, |
| "learning_rate": 7.673806901313518e-06, |
| "loss": 3.1894, |
| "step": 717500 |
| }, |
| { |
| "epoch": 5.88, |
| "learning_rate": 7.672162948593599e-06, |
| "loss": 3.1856, |
| "step": 718000 |
| }, |
| { |
| "epoch": 5.89, |
| "learning_rate": 7.67051899587368e-06, |
| "loss": 3.1828, |
| "step": 718500 |
| }, |
| { |
| "epoch": 5.89, |
| "learning_rate": 7.668875043153759e-06, |
| "loss": 3.1812, |
| "step": 719000 |
| }, |
| { |
| "epoch": 5.89, |
| "learning_rate": 7.66723109043384e-06, |
| "loss": 3.1908, |
| "step": 719500 |
| }, |
| { |
| "epoch": 5.9, |
| "learning_rate": 7.66558713771392e-06, |
| "loss": 3.1898, |
| "step": 720000 |
| }, |
| { |
| "epoch": 5.9, |
| "eval_accuracy": 0.46191997629259224, |
| "eval_loss": 3.0464377403259277, |
| "eval_runtime": 413.3901, |
| "eval_samples_per_second": 745.92, |
| "eval_steps_per_second": 15.542, |
| "step": 720000 |
| }, |
| { |
| "epoch": 5.9, |
| "learning_rate": 7.663943184994e-06, |
| "loss": 3.1823, |
| "step": 720500 |
| }, |
| { |
| "epoch": 5.91, |
| "learning_rate": 7.662299232274081e-06, |
| "loss": 3.1837, |
| "step": 721000 |
| }, |
| { |
| "epoch": 5.91, |
| "learning_rate": 7.66065527955416e-06, |
| "loss": 3.1796, |
| "step": 721500 |
| }, |
| { |
| "epoch": 5.92, |
| "learning_rate": 7.659011326834242e-06, |
| "loss": 3.188, |
| "step": 722000 |
| }, |
| { |
| "epoch": 5.92, |
| "learning_rate": 7.65736737411432e-06, |
| "loss": 3.1883, |
| "step": 722500 |
| }, |
| { |
| "epoch": 5.92, |
| "learning_rate": 7.6557234213944e-06, |
| "loss": 3.1769, |
| "step": 723000 |
| }, |
| { |
| "epoch": 5.93, |
| "learning_rate": 7.654079468674481e-06, |
| "loss": 3.1887, |
| "step": 723500 |
| }, |
| { |
| "epoch": 5.93, |
| "learning_rate": 7.652435515954562e-06, |
| "loss": 3.1806, |
| "step": 724000 |
| }, |
| { |
| "epoch": 5.94, |
| "learning_rate": 7.650791563234641e-06, |
| "loss": 3.1816, |
| "step": 724500 |
| }, |
| { |
| "epoch": 5.94, |
| "learning_rate": 7.649147610514722e-06, |
| "loss": 3.1748, |
| "step": 725000 |
| }, |
| { |
| "epoch": 5.94, |
| "learning_rate": 7.647503657794803e-06, |
| "loss": 3.1888, |
| "step": 725500 |
| }, |
| { |
| "epoch": 5.95, |
| "learning_rate": 7.645859705074883e-06, |
| "loss": 3.1823, |
| "step": 726000 |
| }, |
| { |
| "epoch": 5.95, |
| "learning_rate": 7.644215752354964e-06, |
| "loss": 3.1763, |
| "step": 726500 |
| }, |
| { |
| "epoch": 5.96, |
| "learning_rate": 7.642571799635043e-06, |
| "loss": 3.1843, |
| "step": 727000 |
| }, |
| { |
| "epoch": 5.96, |
| "learning_rate": 7.640927846915124e-06, |
| "loss": 3.1856, |
| "step": 727500 |
| }, |
| { |
| "epoch": 5.96, |
| "learning_rate": 7.639283894195205e-06, |
| "loss": 3.1812, |
| "step": 728000 |
| }, |
| { |
| "epoch": 5.97, |
| "learning_rate": 7.637639941475284e-06, |
| "loss": 3.176, |
| "step": 728500 |
| }, |
| { |
| "epoch": 5.97, |
| "learning_rate": 7.635995988755363e-06, |
| "loss": 3.1842, |
| "step": 729000 |
| }, |
| { |
| "epoch": 5.98, |
| "learning_rate": 7.634352036035444e-06, |
| "loss": 3.1775, |
| "step": 729500 |
| }, |
| { |
| "epoch": 5.98, |
| "learning_rate": 7.632708083315524e-06, |
| "loss": 3.1803, |
| "step": 730000 |
| }, |
| { |
| "epoch": 5.98, |
| "learning_rate": 7.631064130595605e-06, |
| "loss": 3.1818, |
| "step": 730500 |
| }, |
| { |
| "epoch": 5.99, |
| "learning_rate": 7.629420177875686e-06, |
| "loss": 3.1754, |
| "step": 731000 |
| }, |
| { |
| "epoch": 5.99, |
| "learning_rate": 7.627776225155765e-06, |
| "loss": 3.1827, |
| "step": 731500 |
| }, |
| { |
| "epoch": 6.0, |
| "learning_rate": 7.626132272435846e-06, |
| "loss": 3.1745, |
| "step": 732000 |
| }, |
| { |
| "epoch": 6.0, |
| "learning_rate": 7.624488319715926e-06, |
| "loss": 3.1769, |
| "step": 732500 |
| }, |
| { |
| "epoch": 6.01, |
| "learning_rate": 7.622844366996006e-06, |
| "loss": 3.1717, |
| "step": 733000 |
| }, |
| { |
| "epoch": 6.01, |
| "learning_rate": 7.621200414276086e-06, |
| "loss": 3.1804, |
| "step": 733500 |
| }, |
| { |
| "epoch": 6.01, |
| "learning_rate": 7.619556461556167e-06, |
| "loss": 3.1726, |
| "step": 734000 |
| }, |
| { |
| "epoch": 6.02, |
| "learning_rate": 7.617912508836246e-06, |
| "loss": 3.1754, |
| "step": 734500 |
| }, |
| { |
| "epoch": 6.02, |
| "learning_rate": 7.6162685561163265e-06, |
| "loss": 3.177, |
| "step": 735000 |
| }, |
| { |
| "epoch": 6.03, |
| "learning_rate": 7.614624603396407e-06, |
| "loss": 3.1697, |
| "step": 735500 |
| }, |
| { |
| "epoch": 6.03, |
| "learning_rate": 7.612980650676487e-06, |
| "loss": 3.1712, |
| "step": 736000 |
| }, |
| { |
| "epoch": 6.03, |
| "learning_rate": 7.611336697956567e-06, |
| "loss": 3.1734, |
| "step": 736500 |
| }, |
| { |
| "epoch": 6.04, |
| "learning_rate": 7.609692745236648e-06, |
| "loss": 3.1702, |
| "step": 737000 |
| }, |
| { |
| "epoch": 6.04, |
| "learning_rate": 7.608048792516728e-06, |
| "loss": 3.1723, |
| "step": 737500 |
| }, |
| { |
| "epoch": 6.05, |
| "learning_rate": 7.606404839796808e-06, |
| "loss": 3.1815, |
| "step": 738000 |
| }, |
| { |
| "epoch": 6.05, |
| "learning_rate": 7.604760887076888e-06, |
| "loss": 3.1775, |
| "step": 738500 |
| }, |
| { |
| "epoch": 6.05, |
| "learning_rate": 7.603116934356969e-06, |
| "loss": 3.1654, |
| "step": 739000 |
| }, |
| { |
| "epoch": 6.06, |
| "learning_rate": 7.601472981637049e-06, |
| "loss": 3.1639, |
| "step": 739500 |
| }, |
| { |
| "epoch": 6.06, |
| "learning_rate": 7.5998290289171295e-06, |
| "loss": 3.1757, |
| "step": 740000 |
| }, |
| { |
| "epoch": 6.07, |
| "learning_rate": 7.598185076197209e-06, |
| "loss": 3.176, |
| "step": 740500 |
| }, |
| { |
| "epoch": 6.07, |
| "learning_rate": 7.596541123477289e-06, |
| "loss": 3.1781, |
| "step": 741000 |
| }, |
| { |
| "epoch": 6.07, |
| "learning_rate": 7.594897170757369e-06, |
| "loss": 3.1671, |
| "step": 741500 |
| }, |
| { |
| "epoch": 6.08, |
| "learning_rate": 7.59325321803745e-06, |
| "loss": 3.1691, |
| "step": 742000 |
| }, |
| { |
| "epoch": 6.08, |
| "learning_rate": 7.59160926531753e-06, |
| "loss": 3.1701, |
| "step": 742500 |
| }, |
| { |
| "epoch": 6.09, |
| "learning_rate": 7.58996531259761e-06, |
| "loss": 3.1767, |
| "step": 743000 |
| }, |
| { |
| "epoch": 6.09, |
| "learning_rate": 7.58832135987769e-06, |
| "loss": 3.1719, |
| "step": 743500 |
| }, |
| { |
| "epoch": 6.1, |
| "learning_rate": 7.5866774071577705e-06, |
| "loss": 3.1576, |
| "step": 744000 |
| }, |
| { |
| "epoch": 6.1, |
| "learning_rate": 7.5850334544378515e-06, |
| "loss": 3.167, |
| "step": 744500 |
| }, |
| { |
| "epoch": 6.1, |
| "learning_rate": 7.583389501717932e-06, |
| "loss": 3.172, |
| "step": 745000 |
| }, |
| { |
| "epoch": 6.11, |
| "learning_rate": 7.581745548998012e-06, |
| "loss": 3.17, |
| "step": 745500 |
| }, |
| { |
| "epoch": 6.11, |
| "learning_rate": 7.580101596278092e-06, |
| "loss": 3.1622, |
| "step": 746000 |
| }, |
| { |
| "epoch": 6.12, |
| "learning_rate": 7.578457643558171e-06, |
| "loss": 3.1708, |
| "step": 746500 |
| }, |
| { |
| "epoch": 6.12, |
| "learning_rate": 7.576813690838252e-06, |
| "loss": 3.1658, |
| "step": 747000 |
| }, |
| { |
| "epoch": 6.12, |
| "learning_rate": 7.575169738118332e-06, |
| "loss": 3.1665, |
| "step": 747500 |
| }, |
| { |
| "epoch": 6.13, |
| "learning_rate": 7.573525785398412e-06, |
| "loss": 3.157, |
| "step": 748000 |
| }, |
| { |
| "epoch": 6.13, |
| "learning_rate": 7.5718818326784925e-06, |
| "loss": 3.164, |
| "step": 748500 |
| }, |
| { |
| "epoch": 6.14, |
| "learning_rate": 7.570237879958573e-06, |
| "loss": 3.1626, |
| "step": 749000 |
| }, |
| { |
| "epoch": 6.14, |
| "learning_rate": 7.568593927238654e-06, |
| "loss": 3.162, |
| "step": 749500 |
| }, |
| { |
| "epoch": 6.14, |
| "learning_rate": 7.566949974518734e-06, |
| "loss": 3.1651, |
| "step": 750000 |
| }, |
| { |
| "epoch": 6.14, |
| "eval_accuracy": 0.4657812440113609, |
| "eval_loss": 3.022564172744751, |
| "eval_runtime": 408.1772, |
| "eval_samples_per_second": 755.446, |
| "eval_steps_per_second": 15.741, |
| "step": 750000 |
| }, |
| { |
| "epoch": 6.15, |
| "learning_rate": 7.565306021798814e-06, |
| "loss": 3.1641, |
| "step": 750500 |
| }, |
| { |
| "epoch": 6.15, |
| "learning_rate": 7.563662069078894e-06, |
| "loss": 3.153, |
| "step": 751000 |
| }, |
| { |
| "epoch": 6.16, |
| "learning_rate": 7.562018116358975e-06, |
| "loss": 3.1609, |
| "step": 751500 |
| }, |
| { |
| "epoch": 6.16, |
| "learning_rate": 7.560374163639055e-06, |
| "loss": 3.1574, |
| "step": 752000 |
| }, |
| { |
| "epoch": 6.17, |
| "learning_rate": 7.558730210919134e-06, |
| "loss": 3.1566, |
| "step": 752500 |
| }, |
| { |
| "epoch": 6.17, |
| "learning_rate": 7.5570862581992145e-06, |
| "loss": 3.162, |
| "step": 753000 |
| }, |
| { |
| "epoch": 6.17, |
| "learning_rate": 7.555442305479295e-06, |
| "loss": 3.1615, |
| "step": 753500 |
| }, |
| { |
| "epoch": 6.18, |
| "learning_rate": 7.553798352759375e-06, |
| "loss": 3.1636, |
| "step": 754000 |
| }, |
| { |
| "epoch": 6.18, |
| "learning_rate": 7.552154400039456e-06, |
| "loss": 3.1591, |
| "step": 754500 |
| }, |
| { |
| "epoch": 6.19, |
| "learning_rate": 7.550510447319536e-06, |
| "loss": 3.1607, |
| "step": 755000 |
| }, |
| { |
| "epoch": 6.19, |
| "learning_rate": 7.548866494599616e-06, |
| "loss": 3.1572, |
| "step": 755500 |
| }, |
| { |
| "epoch": 6.19, |
| "learning_rate": 7.547222541879696e-06, |
| "loss": 3.1628, |
| "step": 756000 |
| }, |
| { |
| "epoch": 6.2, |
| "learning_rate": 7.545578589159776e-06, |
| "loss": 3.1555, |
| "step": 756500 |
| }, |
| { |
| "epoch": 6.2, |
| "learning_rate": 7.543934636439857e-06, |
| "loss": 3.1581, |
| "step": 757000 |
| }, |
| { |
| "epoch": 6.21, |
| "learning_rate": 7.542290683719937e-06, |
| "loss": 3.1646, |
| "step": 757500 |
| }, |
| { |
| "epoch": 6.21, |
| "learning_rate": 7.5406467310000174e-06, |
| "loss": 3.1626, |
| "step": 758000 |
| }, |
| { |
| "epoch": 6.21, |
| "learning_rate": 7.539002778280097e-06, |
| "loss": 3.1525, |
| "step": 758500 |
| }, |
| { |
| "epoch": 6.22, |
| "learning_rate": 7.537358825560177e-06, |
| "loss": 3.1567, |
| "step": 759000 |
| }, |
| { |
| "epoch": 6.22, |
| "learning_rate": 7.535714872840257e-06, |
| "loss": 3.1599, |
| "step": 759500 |
| }, |
| { |
| "epoch": 6.23, |
| "learning_rate": 7.534070920120338e-06, |
| "loss": 3.1513, |
| "step": 760000 |
| }, |
| { |
| "epoch": 6.23, |
| "learning_rate": 7.532426967400418e-06, |
| "loss": 3.1621, |
| "step": 760500 |
| }, |
| { |
| "epoch": 6.23, |
| "learning_rate": 7.530783014680498e-06, |
| "loss": 3.1586, |
| "step": 761000 |
| }, |
| { |
| "epoch": 6.24, |
| "learning_rate": 7.529139061960578e-06, |
| "loss": 3.1493, |
| "step": 761500 |
| }, |
| { |
| "epoch": 6.24, |
| "learning_rate": 7.527495109240659e-06, |
| "loss": 3.1613, |
| "step": 762000 |
| }, |
| { |
| "epoch": 6.25, |
| "learning_rate": 7.525851156520739e-06, |
| "loss": 3.1542, |
| "step": 762500 |
| }, |
| { |
| "epoch": 6.25, |
| "learning_rate": 7.5242072038008195e-06, |
| "loss": 3.1486, |
| "step": 763000 |
| }, |
| { |
| "epoch": 6.26, |
| "learning_rate": 7.5225632510809e-06, |
| "loss": 3.1542, |
| "step": 763500 |
| }, |
| { |
| "epoch": 6.26, |
| "learning_rate": 7.520919298360981e-06, |
| "loss": 3.1495, |
| "step": 764000 |
| }, |
| { |
| "epoch": 6.26, |
| "learning_rate": 7.519275345641059e-06, |
| "loss": 3.1548, |
| "step": 764500 |
| }, |
| { |
| "epoch": 6.27, |
| "learning_rate": 7.51763139292114e-06, |
| "loss": 3.1525, |
| "step": 765000 |
| }, |
| { |
| "epoch": 6.27, |
| "learning_rate": 7.51598744020122e-06, |
| "loss": 3.1562, |
| "step": 765500 |
| }, |
| { |
| "epoch": 6.28, |
| "learning_rate": 7.5143434874813e-06, |
| "loss": 3.163, |
| "step": 766000 |
| }, |
| { |
| "epoch": 6.28, |
| "learning_rate": 7.5126995347613804e-06, |
| "loss": 3.1509, |
| "step": 766500 |
| }, |
| { |
| "epoch": 6.28, |
| "learning_rate": 7.511055582041461e-06, |
| "loss": 3.1502, |
| "step": 767000 |
| }, |
| { |
| "epoch": 6.29, |
| "learning_rate": 7.5094116293215415e-06, |
| "loss": 3.1499, |
| "step": 767500 |
| }, |
| { |
| "epoch": 6.29, |
| "learning_rate": 7.507767676601622e-06, |
| "loss": 3.1461, |
| "step": 768000 |
| }, |
| { |
| "epoch": 6.3, |
| "learning_rate": 7.506123723881702e-06, |
| "loss": 3.1437, |
| "step": 768500 |
| }, |
| { |
| "epoch": 6.3, |
| "learning_rate": 7.504479771161782e-06, |
| "loss": 3.1509, |
| "step": 769000 |
| }, |
| { |
| "epoch": 6.3, |
| "learning_rate": 7.502835818441863e-06, |
| "loss": 3.1525, |
| "step": 769500 |
| }, |
| { |
| "epoch": 6.31, |
| "learning_rate": 7.501191865721943e-06, |
| "loss": 3.1488, |
| "step": 770000 |
| }, |
| { |
| "epoch": 6.31, |
| "learning_rate": 7.499547913002023e-06, |
| "loss": 3.1541, |
| "step": 770500 |
| }, |
| { |
| "epoch": 6.32, |
| "learning_rate": 7.497903960282102e-06, |
| "loss": 3.147, |
| "step": 771000 |
| }, |
| { |
| "epoch": 6.32, |
| "learning_rate": 7.4962600075621825e-06, |
| "loss": 3.1472, |
| "step": 771500 |
| }, |
| { |
| "epoch": 6.32, |
| "learning_rate": 7.494616054842263e-06, |
| "loss": 3.15, |
| "step": 772000 |
| }, |
| { |
| "epoch": 6.33, |
| "learning_rate": 7.492972102122344e-06, |
| "loss": 3.1486, |
| "step": 772500 |
| }, |
| { |
| "epoch": 6.33, |
| "learning_rate": 7.491328149402424e-06, |
| "loss": 3.1611, |
| "step": 773000 |
| }, |
| { |
| "epoch": 6.34, |
| "learning_rate": 7.489684196682504e-06, |
| "loss": 3.1517, |
| "step": 773500 |
| }, |
| { |
| "epoch": 6.34, |
| "learning_rate": 7.488040243962584e-06, |
| "loss": 3.1473, |
| "step": 774000 |
| }, |
| { |
| "epoch": 6.35, |
| "learning_rate": 7.486396291242665e-06, |
| "loss": 3.1438, |
| "step": 774500 |
| }, |
| { |
| "epoch": 6.35, |
| "learning_rate": 7.484752338522745e-06, |
| "loss": 3.137, |
| "step": 775000 |
| }, |
| { |
| "epoch": 6.35, |
| "learning_rate": 7.483108385802825e-06, |
| "loss": 3.15, |
| "step": 775500 |
| }, |
| { |
| "epoch": 6.36, |
| "learning_rate": 7.481464433082905e-06, |
| "loss": 3.1464, |
| "step": 776000 |
| }, |
| { |
| "epoch": 6.36, |
| "learning_rate": 7.4798204803629855e-06, |
| "loss": 3.1438, |
| "step": 776500 |
| }, |
| { |
| "epoch": 6.37, |
| "learning_rate": 7.478176527643065e-06, |
| "loss": 3.1437, |
| "step": 777000 |
| }, |
| { |
| "epoch": 6.37, |
| "learning_rate": 7.476532574923146e-06, |
| "loss": 3.1401, |
| "step": 777500 |
| }, |
| { |
| "epoch": 6.37, |
| "learning_rate": 7.474888622203226e-06, |
| "loss": 3.144, |
| "step": 778000 |
| }, |
| { |
| "epoch": 6.38, |
| "learning_rate": 7.473244669483306e-06, |
| "loss": 3.1513, |
| "step": 778500 |
| }, |
| { |
| "epoch": 6.38, |
| "learning_rate": 7.471600716763386e-06, |
| "loss": 3.1559, |
| "step": 779000 |
| }, |
| { |
| "epoch": 6.39, |
| "learning_rate": 7.469956764043467e-06, |
| "loss": 3.1461, |
| "step": 779500 |
| }, |
| { |
| "epoch": 6.39, |
| "learning_rate": 7.468312811323547e-06, |
| "loss": 3.1477, |
| "step": 780000 |
| }, |
| { |
| "epoch": 6.39, |
| "eval_accuracy": 0.4689297446428191, |
| "eval_loss": 3.002525568008423, |
| "eval_runtime": 411.5642, |
| "eval_samples_per_second": 749.229, |
| "eval_steps_per_second": 15.611, |
| "step": 780000 |
| }, |
| { |
| "epoch": 6.39, |
| "learning_rate": 7.466668858603627e-06, |
| "loss": 3.1411, |
| "step": 780500 |
| }, |
| { |
| "epoch": 6.4, |
| "learning_rate": 7.4650249058837075e-06, |
| "loss": 3.1422, |
| "step": 781000 |
| }, |
| { |
| "epoch": 6.4, |
| "learning_rate": 7.463380953163788e-06, |
| "loss": 3.1464, |
| "step": 781500 |
| }, |
| { |
| "epoch": 6.41, |
| "learning_rate": 7.461737000443869e-06, |
| "loss": 3.1429, |
| "step": 782000 |
| }, |
| { |
| "epoch": 6.41, |
| "learning_rate": 7.460093047723949e-06, |
| "loss": 3.1451, |
| "step": 782500 |
| }, |
| { |
| "epoch": 6.41, |
| "learning_rate": 7.458449095004028e-06, |
| "loss": 3.1436, |
| "step": 783000 |
| }, |
| { |
| "epoch": 6.42, |
| "learning_rate": 7.456805142284108e-06, |
| "loss": 3.1398, |
| "step": 783500 |
| }, |
| { |
| "epoch": 6.42, |
| "learning_rate": 7.455161189564188e-06, |
| "loss": 3.1447, |
| "step": 784000 |
| }, |
| { |
| "epoch": 6.43, |
| "learning_rate": 7.453517236844268e-06, |
| "loss": 3.1414, |
| "step": 784500 |
| }, |
| { |
| "epoch": 6.43, |
| "learning_rate": 7.451873284124349e-06, |
| "loss": 3.1408, |
| "step": 785000 |
| }, |
| { |
| "epoch": 6.44, |
| "learning_rate": 7.4502293314044295e-06, |
| "loss": 3.1384, |
| "step": 785500 |
| }, |
| { |
| "epoch": 6.44, |
| "learning_rate": 7.44858537868451e-06, |
| "loss": 3.1389, |
| "step": 786000 |
| }, |
| { |
| "epoch": 6.44, |
| "learning_rate": 7.44694142596459e-06, |
| "loss": 3.149, |
| "step": 786500 |
| }, |
| { |
| "epoch": 6.45, |
| "learning_rate": 7.445297473244671e-06, |
| "loss": 3.1404, |
| "step": 787000 |
| }, |
| { |
| "epoch": 6.45, |
| "learning_rate": 7.443653520524751e-06, |
| "loss": 3.1353, |
| "step": 787500 |
| }, |
| { |
| "epoch": 6.46, |
| "learning_rate": 7.442009567804831e-06, |
| "loss": 3.134, |
| "step": 788000 |
| }, |
| { |
| "epoch": 6.46, |
| "learning_rate": 7.440365615084911e-06, |
| "loss": 3.1423, |
| "step": 788500 |
| }, |
| { |
| "epoch": 6.46, |
| "learning_rate": 7.43872166236499e-06, |
| "loss": 3.14, |
| "step": 789000 |
| }, |
| { |
| "epoch": 6.47, |
| "learning_rate": 7.4370777096450705e-06, |
| "loss": 3.1309, |
| "step": 789500 |
| }, |
| { |
| "epoch": 6.47, |
| "learning_rate": 7.4354337569251515e-06, |
| "loss": 3.1384, |
| "step": 790000 |
| }, |
| { |
| "epoch": 6.48, |
| "learning_rate": 7.433789804205232e-06, |
| "loss": 3.1371, |
| "step": 790500 |
| }, |
| { |
| "epoch": 6.48, |
| "learning_rate": 7.432145851485312e-06, |
| "loss": 3.136, |
| "step": 791000 |
| }, |
| { |
| "epoch": 6.48, |
| "learning_rate": 7.430501898765392e-06, |
| "loss": 3.1433, |
| "step": 791500 |
| }, |
| { |
| "epoch": 6.49, |
| "learning_rate": 7.428857946045473e-06, |
| "loss": 3.1398, |
| "step": 792000 |
| }, |
| { |
| "epoch": 6.49, |
| "learning_rate": 7.427213993325553e-06, |
| "loss": 3.1428, |
| "step": 792500 |
| }, |
| { |
| "epoch": 6.5, |
| "learning_rate": 7.425570040605633e-06, |
| "loss": 3.1316, |
| "step": 793000 |
| }, |
| { |
| "epoch": 6.5, |
| "learning_rate": 7.423926087885713e-06, |
| "loss": 3.1392, |
| "step": 793500 |
| }, |
| { |
| "epoch": 6.51, |
| "learning_rate": 7.422282135165793e-06, |
| "loss": 3.1288, |
| "step": 794000 |
| }, |
| { |
| "epoch": 6.51, |
| "learning_rate": 7.420638182445874e-06, |
| "loss": 3.1354, |
| "step": 794500 |
| }, |
| { |
| "epoch": 6.51, |
| "learning_rate": 7.4189942297259536e-06, |
| "loss": 3.1339, |
| "step": 795000 |
| }, |
| { |
| "epoch": 6.52, |
| "learning_rate": 7.417350277006034e-06, |
| "loss": 3.1419, |
| "step": 795500 |
| }, |
| { |
| "epoch": 6.52, |
| "learning_rate": 7.415706324286114e-06, |
| "loss": 3.1363, |
| "step": 796000 |
| }, |
| { |
| "epoch": 6.53, |
| "learning_rate": 7.414062371566194e-06, |
| "loss": 3.134, |
| "step": 796500 |
| }, |
| { |
| "epoch": 6.53, |
| "learning_rate": 7.412418418846274e-06, |
| "loss": 3.13, |
| "step": 797000 |
| }, |
| { |
| "epoch": 6.53, |
| "learning_rate": 7.410774466126355e-06, |
| "loss": 3.1382, |
| "step": 797500 |
| }, |
| { |
| "epoch": 6.54, |
| "learning_rate": 7.409130513406435e-06, |
| "loss": 3.1341, |
| "step": 798000 |
| }, |
| { |
| "epoch": 6.54, |
| "learning_rate": 7.407486560686515e-06, |
| "loss": 3.1306, |
| "step": 798500 |
| }, |
| { |
| "epoch": 6.55, |
| "learning_rate": 7.4058426079665954e-06, |
| "loss": 3.134, |
| "step": 799000 |
| }, |
| { |
| "epoch": 6.55, |
| "learning_rate": 7.404198655246676e-06, |
| "loss": 3.1329, |
| "step": 799500 |
| }, |
| { |
| "epoch": 6.55, |
| "learning_rate": 7.4025547025267565e-06, |
| "loss": 3.1382, |
| "step": 800000 |
| }, |
| { |
| "epoch": 6.56, |
| "learning_rate": 7.400910749806837e-06, |
| "loss": 3.1293, |
| "step": 800500 |
| }, |
| { |
| "epoch": 6.56, |
| "learning_rate": 7.399266797086916e-06, |
| "loss": 3.135, |
| "step": 801000 |
| }, |
| { |
| "epoch": 6.57, |
| "learning_rate": 7.397622844366996e-06, |
| "loss": 3.1407, |
| "step": 801500 |
| }, |
| { |
| "epoch": 6.57, |
| "learning_rate": 7.395978891647076e-06, |
| "loss": 3.1354, |
| "step": 802000 |
| }, |
| { |
| "epoch": 6.57, |
| "learning_rate": 7.394334938927157e-06, |
| "loss": 3.1371, |
| "step": 802500 |
| }, |
| { |
| "epoch": 6.58, |
| "learning_rate": 7.392690986207237e-06, |
| "loss": 3.1362, |
| "step": 803000 |
| }, |
| { |
| "epoch": 6.58, |
| "learning_rate": 7.391047033487317e-06, |
| "loss": 3.129, |
| "step": 803500 |
| }, |
| { |
| "epoch": 6.59, |
| "learning_rate": 7.3894030807673975e-06, |
| "loss": 3.1378, |
| "step": 804000 |
| }, |
| { |
| "epoch": 6.59, |
| "learning_rate": 7.387759128047478e-06, |
| "loss": 3.125, |
| "step": 804500 |
| }, |
| { |
| "epoch": 6.6, |
| "learning_rate": 7.386115175327559e-06, |
| "loss": 3.1285, |
| "step": 805000 |
| }, |
| { |
| "epoch": 6.6, |
| "learning_rate": 7.384471222607639e-06, |
| "loss": 3.1344, |
| "step": 805500 |
| }, |
| { |
| "epoch": 6.6, |
| "learning_rate": 7.382827269887719e-06, |
| "loss": 3.1241, |
| "step": 806000 |
| }, |
| { |
| "epoch": 6.61, |
| "learning_rate": 7.381183317167799e-06, |
| "loss": 3.1264, |
| "step": 806500 |
| }, |
| { |
| "epoch": 6.61, |
| "learning_rate": 7.379539364447878e-06, |
| "loss": 3.1278, |
| "step": 807000 |
| }, |
| { |
| "epoch": 6.62, |
| "learning_rate": 7.377895411727959e-06, |
| "loss": 3.1363, |
| "step": 807500 |
| }, |
| { |
| "epoch": 6.62, |
| "learning_rate": 7.376251459008039e-06, |
| "loss": 3.124, |
| "step": 808000 |
| }, |
| { |
| "epoch": 6.62, |
| "learning_rate": 7.3746075062881195e-06, |
| "loss": 3.1246, |
| "step": 808500 |
| }, |
| { |
| "epoch": 6.63, |
| "learning_rate": 7.3729635535682e-06, |
| "loss": 3.1281, |
| "step": 809000 |
| }, |
| { |
| "epoch": 6.63, |
| "learning_rate": 7.37131960084828e-06, |
| "loss": 3.1198, |
| "step": 809500 |
| }, |
| { |
| "epoch": 6.64, |
| "learning_rate": 7.369675648128361e-06, |
| "loss": 3.1276, |
| "step": 810000 |
| }, |
| { |
| "epoch": 6.64, |
| "eval_accuracy": 0.47175398712230254, |
| "eval_loss": 2.9837634563446045, |
| "eval_runtime": 427.1705, |
| "eval_samples_per_second": 721.857, |
| "eval_steps_per_second": 15.041, |
| "step": 810000 |
| }, |
| { |
| "epoch": 6.64, |
| "learning_rate": 7.368031695408441e-06, |
| "loss": 3.1275, |
| "step": 810500 |
| }, |
| { |
| "epoch": 6.64, |
| "learning_rate": 7.366387742688521e-06, |
| "loss": 3.1262, |
| "step": 811000 |
| }, |
| { |
| "epoch": 6.65, |
| "learning_rate": 7.364743789968601e-06, |
| "loss": 3.1216, |
| "step": 811500 |
| }, |
| { |
| "epoch": 6.65, |
| "learning_rate": 7.363099837248682e-06, |
| "loss": 3.1212, |
| "step": 812000 |
| }, |
| { |
| "epoch": 6.66, |
| "learning_rate": 7.361455884528762e-06, |
| "loss": 3.1169, |
| "step": 812500 |
| }, |
| { |
| "epoch": 6.66, |
| "learning_rate": 7.3598119318088415e-06, |
| "loss": 3.1221, |
| "step": 813000 |
| }, |
| { |
| "epoch": 6.66, |
| "learning_rate": 7.358167979088922e-06, |
| "loss": 3.128, |
| "step": 813500 |
| }, |
| { |
| "epoch": 6.67, |
| "learning_rate": 7.356524026369002e-06, |
| "loss": 3.1245, |
| "step": 814000 |
| }, |
| { |
| "epoch": 6.67, |
| "learning_rate": 7.354880073649082e-06, |
| "loss": 3.1248, |
| "step": 814500 |
| }, |
| { |
| "epoch": 6.68, |
| "learning_rate": 7.353236120929163e-06, |
| "loss": 3.1219, |
| "step": 815000 |
| }, |
| { |
| "epoch": 6.68, |
| "learning_rate": 7.351592168209243e-06, |
| "loss": 3.1285, |
| "step": 815500 |
| }, |
| { |
| "epoch": 6.69, |
| "learning_rate": 7.349948215489323e-06, |
| "loss": 3.1275, |
| "step": 816000 |
| }, |
| { |
| "epoch": 6.69, |
| "learning_rate": 7.348304262769403e-06, |
| "loss": 3.1213, |
| "step": 816500 |
| }, |
| { |
| "epoch": 6.69, |
| "learning_rate": 7.346660310049483e-06, |
| "loss": 3.1211, |
| "step": 817000 |
| }, |
| { |
| "epoch": 6.7, |
| "learning_rate": 7.345016357329564e-06, |
| "loss": 3.1231, |
| "step": 817500 |
| }, |
| { |
| "epoch": 6.7, |
| "learning_rate": 7.3433724046096445e-06, |
| "loss": 3.1306, |
| "step": 818000 |
| }, |
| { |
| "epoch": 6.71, |
| "learning_rate": 7.341728451889725e-06, |
| "loss": 3.1169, |
| "step": 818500 |
| }, |
| { |
| "epoch": 6.71, |
| "learning_rate": 7.340084499169804e-06, |
| "loss": 3.1254, |
| "step": 819000 |
| }, |
| { |
| "epoch": 6.71, |
| "learning_rate": 7.338440546449884e-06, |
| "loss": 3.1182, |
| "step": 819500 |
| }, |
| { |
| "epoch": 6.72, |
| "learning_rate": 7.336796593729965e-06, |
| "loss": 3.1185, |
| "step": 820000 |
| }, |
| { |
| "epoch": 6.72, |
| "learning_rate": 7.335152641010045e-06, |
| "loss": 3.1197, |
| "step": 820500 |
| }, |
| { |
| "epoch": 6.73, |
| "learning_rate": 7.333508688290125e-06, |
| "loss": 3.1243, |
| "step": 821000 |
| }, |
| { |
| "epoch": 6.73, |
| "learning_rate": 7.331864735570205e-06, |
| "loss": 3.1223, |
| "step": 821500 |
| }, |
| { |
| "epoch": 6.73, |
| "learning_rate": 7.3302207828502855e-06, |
| "loss": 3.1259, |
| "step": 822000 |
| }, |
| { |
| "epoch": 6.74, |
| "learning_rate": 7.3285768301303665e-06, |
| "loss": 3.1265, |
| "step": 822500 |
| }, |
| { |
| "epoch": 6.74, |
| "learning_rate": 7.326932877410447e-06, |
| "loss": 3.1141, |
| "step": 823000 |
| }, |
| { |
| "epoch": 6.75, |
| "learning_rate": 7.325288924690527e-06, |
| "loss": 3.1206, |
| "step": 823500 |
| }, |
| { |
| "epoch": 6.75, |
| "learning_rate": 7.323644971970607e-06, |
| "loss": 3.1178, |
| "step": 824000 |
| }, |
| { |
| "epoch": 6.75, |
| "learning_rate": 7.322001019250688e-06, |
| "loss": 3.1122, |
| "step": 824500 |
| }, |
| { |
| "epoch": 6.76, |
| "learning_rate": 7.320357066530768e-06, |
| "loss": 3.1187, |
| "step": 825000 |
| }, |
| { |
| "epoch": 6.76, |
| "learning_rate": 7.318713113810847e-06, |
| "loss": 3.118, |
| "step": 825500 |
| }, |
| { |
| "epoch": 6.77, |
| "learning_rate": 7.317069161090927e-06, |
| "loss": 3.1143, |
| "step": 826000 |
| }, |
| { |
| "epoch": 6.77, |
| "learning_rate": 7.3154252083710075e-06, |
| "loss": 3.1176, |
| "step": 826500 |
| }, |
| { |
| "epoch": 6.78, |
| "learning_rate": 7.313781255651088e-06, |
| "loss": 3.1117, |
| "step": 827000 |
| }, |
| { |
| "epoch": 6.78, |
| "learning_rate": 7.3121373029311686e-06, |
| "loss": 3.1185, |
| "step": 827500 |
| }, |
| { |
| "epoch": 6.78, |
| "learning_rate": 7.310493350211249e-06, |
| "loss": 3.1208, |
| "step": 828000 |
| }, |
| { |
| "epoch": 6.79, |
| "learning_rate": 7.308849397491329e-06, |
| "loss": 3.1171, |
| "step": 828500 |
| }, |
| { |
| "epoch": 6.79, |
| "learning_rate": 7.307205444771409e-06, |
| "loss": 3.1127, |
| "step": 829000 |
| }, |
| { |
| "epoch": 6.8, |
| "learning_rate": 7.305561492051489e-06, |
| "loss": 3.1146, |
| "step": 829500 |
| }, |
| { |
| "epoch": 6.8, |
| "learning_rate": 7.30391753933157e-06, |
| "loss": 3.1141, |
| "step": 830000 |
| }, |
| { |
| "epoch": 6.8, |
| "learning_rate": 7.30227358661165e-06, |
| "loss": 3.1191, |
| "step": 830500 |
| }, |
| { |
| "epoch": 6.81, |
| "learning_rate": 7.30062963389173e-06, |
| "loss": 3.123, |
| "step": 831000 |
| }, |
| { |
| "epoch": 6.81, |
| "learning_rate": 7.29898568117181e-06, |
| "loss": 3.1178, |
| "step": 831500 |
| }, |
| { |
| "epoch": 6.82, |
| "learning_rate": 7.29734172845189e-06, |
| "loss": 3.1133, |
| "step": 832000 |
| }, |
| { |
| "epoch": 6.82, |
| "learning_rate": 7.29569777573197e-06, |
| "loss": 3.1174, |
| "step": 832500 |
| }, |
| { |
| "epoch": 6.82, |
| "learning_rate": 7.294053823012051e-06, |
| "loss": 3.1201, |
| "step": 833000 |
| }, |
| { |
| "epoch": 6.83, |
| "learning_rate": 7.292409870292131e-06, |
| "loss": 3.1199, |
| "step": 833500 |
| }, |
| { |
| "epoch": 6.83, |
| "learning_rate": 7.290765917572211e-06, |
| "loss": 3.1157, |
| "step": 834000 |
| }, |
| { |
| "epoch": 6.84, |
| "learning_rate": 7.289121964852291e-06, |
| "loss": 3.1183, |
| "step": 834500 |
| }, |
| { |
| "epoch": 6.84, |
| "learning_rate": 7.287478012132372e-06, |
| "loss": 3.1141, |
| "step": 835000 |
| }, |
| { |
| "epoch": 6.85, |
| "learning_rate": 7.285834059412452e-06, |
| "loss": 3.1165, |
| "step": 835500 |
| }, |
| { |
| "epoch": 6.85, |
| "learning_rate": 7.284190106692532e-06, |
| "loss": 3.1112, |
| "step": 836000 |
| }, |
| { |
| "epoch": 6.85, |
| "learning_rate": 7.2825461539726125e-06, |
| "loss": 3.1217, |
| "step": 836500 |
| }, |
| { |
| "epoch": 6.86, |
| "learning_rate": 7.2809022012526935e-06, |
| "loss": 3.1089, |
| "step": 837000 |
| }, |
| { |
| "epoch": 6.86, |
| "learning_rate": 7.279258248532772e-06, |
| "loss": 3.118, |
| "step": 837500 |
| }, |
| { |
| "epoch": 6.87, |
| "learning_rate": 7.277614295812853e-06, |
| "loss": 3.1127, |
| "step": 838000 |
| }, |
| { |
| "epoch": 6.87, |
| "learning_rate": 7.275970343092933e-06, |
| "loss": 3.1117, |
| "step": 838500 |
| }, |
| { |
| "epoch": 6.87, |
| "learning_rate": 7.274326390373013e-06, |
| "loss": 3.1112, |
| "step": 839000 |
| }, |
| { |
| "epoch": 6.88, |
| "learning_rate": 7.272682437653093e-06, |
| "loss": 3.1084, |
| "step": 839500 |
| }, |
| { |
| "epoch": 6.88, |
| "learning_rate": 7.271038484933174e-06, |
| "loss": 3.1102, |
| "step": 840000 |
| }, |
| { |
| "epoch": 6.88, |
| "eval_accuracy": 0.47402583913951796, |
| "eval_loss": 2.969046115875244, |
| "eval_runtime": 428.0247, |
| "eval_samples_per_second": 720.416, |
| "eval_steps_per_second": 15.011, |
| "step": 840000 |
| }, |
| { |
| "epoch": 6.89, |
| "learning_rate": 7.269394532213254e-06, |
| "loss": 3.1116, |
| "step": 840500 |
| }, |
| { |
| "epoch": 6.89, |
| "learning_rate": 7.2677505794933345e-06, |
| "loss": 3.1169, |
| "step": 841000 |
| }, |
| { |
| "epoch": 6.89, |
| "learning_rate": 7.266106626773415e-06, |
| "loss": 3.1087, |
| "step": 841500 |
| }, |
| { |
| "epoch": 6.9, |
| "learning_rate": 7.264462674053495e-06, |
| "loss": 3.1085, |
| "step": 842000 |
| }, |
| { |
| "epoch": 6.9, |
| "learning_rate": 7.262818721333576e-06, |
| "loss": 3.1109, |
| "step": 842500 |
| }, |
| { |
| "epoch": 6.91, |
| "learning_rate": 7.261174768613656e-06, |
| "loss": 3.112, |
| "step": 843000 |
| }, |
| { |
| "epoch": 6.91, |
| "learning_rate": 7.259530815893735e-06, |
| "loss": 3.1072, |
| "step": 843500 |
| }, |
| { |
| "epoch": 6.91, |
| "learning_rate": 7.257886863173815e-06, |
| "loss": 3.1142, |
| "step": 844000 |
| }, |
| { |
| "epoch": 6.92, |
| "learning_rate": 7.256242910453895e-06, |
| "loss": 3.1146, |
| "step": 844500 |
| }, |
| { |
| "epoch": 6.92, |
| "learning_rate": 7.2545989577339755e-06, |
| "loss": 3.1098, |
| "step": 845000 |
| }, |
| { |
| "epoch": 6.93, |
| "learning_rate": 7.2529550050140565e-06, |
| "loss": 3.1138, |
| "step": 845500 |
| }, |
| { |
| "epoch": 6.93, |
| "learning_rate": 7.251311052294137e-06, |
| "loss": 3.1111, |
| "step": 846000 |
| }, |
| { |
| "epoch": 6.94, |
| "learning_rate": 7.249667099574217e-06, |
| "loss": 3.1045, |
| "step": 846500 |
| }, |
| { |
| "epoch": 6.94, |
| "learning_rate": 7.248023146854297e-06, |
| "loss": 3.1108, |
| "step": 847000 |
| }, |
| { |
| "epoch": 6.94, |
| "learning_rate": 7.246379194134378e-06, |
| "loss": 3.109, |
| "step": 847500 |
| }, |
| { |
| "epoch": 6.95, |
| "learning_rate": 7.244735241414458e-06, |
| "loss": 3.118, |
| "step": 848000 |
| }, |
| { |
| "epoch": 6.95, |
| "learning_rate": 7.243091288694538e-06, |
| "loss": 3.1119, |
| "step": 848500 |
| }, |
| { |
| "epoch": 6.96, |
| "learning_rate": 7.241447335974618e-06, |
| "loss": 3.1125, |
| "step": 849000 |
| }, |
| { |
| "epoch": 6.96, |
| "learning_rate": 7.2398033832546975e-06, |
| "loss": 3.1131, |
| "step": 849500 |
| }, |
| { |
| "epoch": 6.96, |
| "learning_rate": 7.238159430534778e-06, |
| "loss": 3.1072, |
| "step": 850000 |
| }, |
| { |
| "epoch": 6.97, |
| "learning_rate": 7.236515477814859e-06, |
| "loss": 3.1102, |
| "step": 850500 |
| }, |
| { |
| "epoch": 6.97, |
| "learning_rate": 7.234871525094939e-06, |
| "loss": 3.1095, |
| "step": 851000 |
| }, |
| { |
| "epoch": 6.98, |
| "learning_rate": 7.233227572375019e-06, |
| "loss": 3.1029, |
| "step": 851500 |
| }, |
| { |
| "epoch": 6.98, |
| "learning_rate": 7.231583619655099e-06, |
| "loss": 3.1064, |
| "step": 852000 |
| }, |
| { |
| "epoch": 6.98, |
| "learning_rate": 7.22993966693518e-06, |
| "loss": 3.1105, |
| "step": 852500 |
| }, |
| { |
| "epoch": 6.99, |
| "learning_rate": 7.22829571421526e-06, |
| "loss": 3.1084, |
| "step": 853000 |
| }, |
| { |
| "epoch": 6.99, |
| "learning_rate": 7.22665176149534e-06, |
| "loss": 3.107, |
| "step": 853500 |
| }, |
| { |
| "epoch": 7.0, |
| "learning_rate": 7.22500780877542e-06, |
| "loss": 3.1091, |
| "step": 854000 |
| }, |
| { |
| "epoch": 7.0, |
| "learning_rate": 7.2233638560555005e-06, |
| "loss": 3.1021, |
| "step": 854500 |
| }, |
| { |
| "epoch": 7.0, |
| "learning_rate": 7.2217199033355815e-06, |
| "loss": 3.0996, |
| "step": 855000 |
| }, |
| { |
| "epoch": 7.01, |
| "learning_rate": 7.220075950615661e-06, |
| "loss": 3.0913, |
| "step": 855500 |
| }, |
| { |
| "epoch": 7.01, |
| "learning_rate": 7.218431997895741e-06, |
| "loss": 3.1022, |
| "step": 856000 |
| }, |
| { |
| "epoch": 7.02, |
| "learning_rate": 7.216788045175821e-06, |
| "loss": 3.1077, |
| "step": 856500 |
| }, |
| { |
| "epoch": 7.02, |
| "learning_rate": 7.215144092455901e-06, |
| "loss": 3.1003, |
| "step": 857000 |
| }, |
| { |
| "epoch": 7.03, |
| "learning_rate": 7.213500139735981e-06, |
| "loss": 3.1036, |
| "step": 857500 |
| }, |
| { |
| "epoch": 7.03, |
| "learning_rate": 7.211856187016062e-06, |
| "loss": 3.1037, |
| "step": 858000 |
| }, |
| { |
| "epoch": 7.03, |
| "learning_rate": 7.210212234296142e-06, |
| "loss": 3.1082, |
| "step": 858500 |
| }, |
| { |
| "epoch": 7.04, |
| "learning_rate": 7.2085682815762225e-06, |
| "loss": 3.1052, |
| "step": 859000 |
| }, |
| { |
| "epoch": 7.04, |
| "learning_rate": 7.206924328856303e-06, |
| "loss": 3.1078, |
| "step": 859500 |
| }, |
| { |
| "epoch": 7.05, |
| "learning_rate": 7.2052803761363836e-06, |
| "loss": 3.1049, |
| "step": 860000 |
| }, |
| { |
| "epoch": 7.05, |
| "learning_rate": 7.203636423416464e-06, |
| "loss": 3.0981, |
| "step": 860500 |
| }, |
| { |
| "epoch": 7.05, |
| "learning_rate": 7.201992470696544e-06, |
| "loss": 3.1124, |
| "step": 861000 |
| }, |
| { |
| "epoch": 7.06, |
| "learning_rate": 7.200348517976623e-06, |
| "loss": 3.1003, |
| "step": 861500 |
| }, |
| { |
| "epoch": 7.06, |
| "learning_rate": 7.198704565256703e-06, |
| "loss": 3.1041, |
| "step": 862000 |
| }, |
| { |
| "epoch": 7.07, |
| "learning_rate": 7.197060612536783e-06, |
| "loss": 3.0975, |
| "step": 862500 |
| }, |
| { |
| "epoch": 7.07, |
| "learning_rate": 7.195416659816864e-06, |
| "loss": 3.1068, |
| "step": 863000 |
| }, |
| { |
| "epoch": 7.07, |
| "learning_rate": 7.1937727070969445e-06, |
| "loss": 3.1015, |
| "step": 863500 |
| }, |
| { |
| "epoch": 7.08, |
| "learning_rate": 7.192128754377025e-06, |
| "loss": 3.1072, |
| "step": 864000 |
| }, |
| { |
| "epoch": 7.08, |
| "learning_rate": 7.190484801657105e-06, |
| "loss": 3.0992, |
| "step": 864500 |
| }, |
| { |
| "epoch": 7.09, |
| "learning_rate": 7.188840848937186e-06, |
| "loss": 3.1031, |
| "step": 865000 |
| }, |
| { |
| "epoch": 7.09, |
| "learning_rate": 7.187196896217266e-06, |
| "loss": 3.0964, |
| "step": 865500 |
| }, |
| { |
| "epoch": 7.09, |
| "learning_rate": 7.185552943497346e-06, |
| "loss": 3.1047, |
| "step": 866000 |
| }, |
| { |
| "epoch": 7.1, |
| "learning_rate": 7.183908990777426e-06, |
| "loss": 3.103, |
| "step": 866500 |
| }, |
| { |
| "epoch": 7.1, |
| "learning_rate": 7.182265038057506e-06, |
| "loss": 3.0944, |
| "step": 867000 |
| }, |
| { |
| "epoch": 7.11, |
| "learning_rate": 7.1806210853375855e-06, |
| "loss": 3.0977, |
| "step": 867500 |
| }, |
| { |
| "epoch": 7.11, |
| "learning_rate": 7.1789771326176664e-06, |
| "loss": 3.1003, |
| "step": 868000 |
| }, |
| { |
| "epoch": 7.12, |
| "learning_rate": 7.1773331798977466e-06, |
| "loss": 3.1025, |
| "step": 868500 |
| }, |
| { |
| "epoch": 7.12, |
| "learning_rate": 7.175689227177827e-06, |
| "loss": 3.097, |
| "step": 869000 |
| }, |
| { |
| "epoch": 7.12, |
| "learning_rate": 7.174045274457907e-06, |
| "loss": 3.0947, |
| "step": 869500 |
| }, |
| { |
| "epoch": 7.13, |
| "learning_rate": 7.172401321737987e-06, |
| "loss": 3.1046, |
| "step": 870000 |
| }, |
| { |
| "epoch": 7.13, |
| "eval_accuracy": 0.4757021230011519, |
| "eval_loss": 2.956329584121704, |
| "eval_runtime": 428.2951, |
| "eval_samples_per_second": 719.962, |
| "eval_steps_per_second": 15.001, |
| "step": 870000 |
| }, |
| { |
| "epoch": 7.13, |
| "learning_rate": 7.170757369018068e-06, |
| "loss": 3.1, |
| "step": 870500 |
| }, |
| { |
| "epoch": 7.14, |
| "learning_rate": 7.169113416298148e-06, |
| "loss": 3.0954, |
| "step": 871000 |
| }, |
| { |
| "epoch": 7.14, |
| "learning_rate": 7.167469463578228e-06, |
| "loss": 3.0978, |
| "step": 871500 |
| }, |
| { |
| "epoch": 7.14, |
| "learning_rate": 7.165825510858308e-06, |
| "loss": 3.097, |
| "step": 872000 |
| }, |
| { |
| "epoch": 7.15, |
| "learning_rate": 7.164181558138389e-06, |
| "loss": 3.1062, |
| "step": 872500 |
| }, |
| { |
| "epoch": 7.15, |
| "learning_rate": 7.162537605418469e-06, |
| "loss": 3.0963, |
| "step": 873000 |
| }, |
| { |
| "epoch": 7.16, |
| "learning_rate": 7.1608936526985495e-06, |
| "loss": 3.0909, |
| "step": 873500 |
| }, |
| { |
| "epoch": 7.16, |
| "learning_rate": 7.159249699978629e-06, |
| "loss": 3.0979, |
| "step": 874000 |
| }, |
| { |
| "epoch": 7.16, |
| "learning_rate": 7.157605747258709e-06, |
| "loss": 3.0919, |
| "step": 874500 |
| }, |
| { |
| "epoch": 7.17, |
| "learning_rate": 7.155961794538789e-06, |
| "loss": 3.0945, |
| "step": 875000 |
| }, |
| { |
| "epoch": 7.17, |
| "learning_rate": 7.15431784181887e-06, |
| "loss": 3.0981, |
| "step": 875500 |
| }, |
| { |
| "epoch": 7.18, |
| "learning_rate": 7.15267388909895e-06, |
| "loss": 3.0912, |
| "step": 876000 |
| }, |
| { |
| "epoch": 7.18, |
| "learning_rate": 7.15102993637903e-06, |
| "loss": 3.0948, |
| "step": 876500 |
| }, |
| { |
| "epoch": 7.19, |
| "learning_rate": 7.14938598365911e-06, |
| "loss": 3.1008, |
| "step": 877000 |
| }, |
| { |
| "epoch": 7.19, |
| "learning_rate": 7.1477420309391905e-06, |
| "loss": 3.103, |
| "step": 877500 |
| }, |
| { |
| "epoch": 7.19, |
| "learning_rate": 7.1460980782192715e-06, |
| "loss": 3.0908, |
| "step": 878000 |
| }, |
| { |
| "epoch": 7.2, |
| "learning_rate": 7.144454125499352e-06, |
| "loss": 3.0975, |
| "step": 878500 |
| }, |
| { |
| "epoch": 7.2, |
| "learning_rate": 7.142810172779432e-06, |
| "loss": 3.0972, |
| "step": 879000 |
| }, |
| { |
| "epoch": 7.21, |
| "learning_rate": 7.141166220059512e-06, |
| "loss": 3.0939, |
| "step": 879500 |
| }, |
| { |
| "epoch": 7.21, |
| "learning_rate": 7.139522267339591e-06, |
| "loss": 3.099, |
| "step": 880000 |
| }, |
| { |
| "epoch": 7.21, |
| "learning_rate": 7.137878314619672e-06, |
| "loss": 3.0973, |
| "step": 880500 |
| }, |
| { |
| "epoch": 7.22, |
| "learning_rate": 7.136234361899752e-06, |
| "loss": 3.0922, |
| "step": 881000 |
| }, |
| { |
| "epoch": 7.22, |
| "learning_rate": 7.134590409179832e-06, |
| "loss": 3.0964, |
| "step": 881500 |
| }, |
| { |
| "epoch": 7.23, |
| "learning_rate": 7.1329464564599125e-06, |
| "loss": 3.0954, |
| "step": 882000 |
| }, |
| { |
| "epoch": 7.23, |
| "learning_rate": 7.131302503739993e-06, |
| "loss": 3.1022, |
| "step": 882500 |
| }, |
| { |
| "epoch": 7.23, |
| "learning_rate": 7.129658551020074e-06, |
| "loss": 3.099, |
| "step": 883000 |
| }, |
| { |
| "epoch": 7.24, |
| "learning_rate": 7.128014598300154e-06, |
| "loss": 3.0887, |
| "step": 883500 |
| }, |
| { |
| "epoch": 7.24, |
| "learning_rate": 7.126370645580234e-06, |
| "loss": 3.0924, |
| "step": 884000 |
| }, |
| { |
| "epoch": 7.25, |
| "learning_rate": 7.124726692860314e-06, |
| "loss": 3.0895, |
| "step": 884500 |
| }, |
| { |
| "epoch": 7.25, |
| "learning_rate": 7.123082740140395e-06, |
| "loss": 3.0968, |
| "step": 885000 |
| }, |
| { |
| "epoch": 7.25, |
| "learning_rate": 7.121438787420475e-06, |
| "loss": 3.0948, |
| "step": 885500 |
| }, |
| { |
| "epoch": 7.26, |
| "learning_rate": 7.119794834700554e-06, |
| "loss": 3.0969, |
| "step": 886000 |
| }, |
| { |
| "epoch": 7.26, |
| "learning_rate": 7.1181508819806345e-06, |
| "loss": 3.0952, |
| "step": 886500 |
| }, |
| { |
| "epoch": 7.27, |
| "learning_rate": 7.116506929260715e-06, |
| "loss": 3.0957, |
| "step": 887000 |
| }, |
| { |
| "epoch": 7.27, |
| "learning_rate": 7.114862976540795e-06, |
| "loss": 3.0874, |
| "step": 887500 |
| }, |
| { |
| "epoch": 7.28, |
| "learning_rate": 7.113219023820876e-06, |
| "loss": 3.0911, |
| "step": 888000 |
| }, |
| { |
| "epoch": 7.28, |
| "learning_rate": 7.111575071100956e-06, |
| "loss": 3.0898, |
| "step": 888500 |
| }, |
| { |
| "epoch": 7.28, |
| "learning_rate": 7.109931118381036e-06, |
| "loss": 3.0941, |
| "step": 889000 |
| }, |
| { |
| "epoch": 7.29, |
| "learning_rate": 7.108287165661116e-06, |
| "loss": 3.097, |
| "step": 889500 |
| }, |
| { |
| "epoch": 7.29, |
| "learning_rate": 7.106643212941196e-06, |
| "loss": 3.0977, |
| "step": 890000 |
| }, |
| { |
| "epoch": 7.3, |
| "learning_rate": 7.104999260221277e-06, |
| "loss": 3.0923, |
| "step": 890500 |
| }, |
| { |
| "epoch": 7.3, |
| "learning_rate": 7.103355307501357e-06, |
| "loss": 3.0844, |
| "step": 891000 |
| }, |
| { |
| "epoch": 7.3, |
| "learning_rate": 7.1017113547814375e-06, |
| "loss": 3.0889, |
| "step": 891500 |
| }, |
| { |
| "epoch": 7.31, |
| "learning_rate": 7.100067402061517e-06, |
| "loss": 3.0952, |
| "step": 892000 |
| }, |
| { |
| "epoch": 7.31, |
| "learning_rate": 7.098423449341597e-06, |
| "loss": 3.0884, |
| "step": 892500 |
| }, |
| { |
| "epoch": 7.32, |
| "learning_rate": 7.096779496621678e-06, |
| "loss": 3.0956, |
| "step": 893000 |
| }, |
| { |
| "epoch": 7.32, |
| "learning_rate": 7.095135543901758e-06, |
| "loss": 3.0888, |
| "step": 893500 |
| }, |
| { |
| "epoch": 7.32, |
| "learning_rate": 7.093491591181838e-06, |
| "loss": 3.092, |
| "step": 894000 |
| }, |
| { |
| "epoch": 7.33, |
| "learning_rate": 7.091847638461918e-06, |
| "loss": 3.0842, |
| "step": 894500 |
| }, |
| { |
| "epoch": 7.33, |
| "learning_rate": 7.090203685741998e-06, |
| "loss": 3.092, |
| "step": 895000 |
| }, |
| { |
| "epoch": 7.34, |
| "learning_rate": 7.088559733022079e-06, |
| "loss": 3.0886, |
| "step": 895500 |
| }, |
| { |
| "epoch": 7.34, |
| "learning_rate": 7.0869157803021594e-06, |
| "loss": 3.0845, |
| "step": 896000 |
| }, |
| { |
| "epoch": 7.34, |
| "learning_rate": 7.0852718275822396e-06, |
| "loss": 3.0871, |
| "step": 896500 |
| }, |
| { |
| "epoch": 7.35, |
| "learning_rate": 7.08362787486232e-06, |
| "loss": 3.0804, |
| "step": 897000 |
| }, |
| { |
| "epoch": 7.35, |
| "learning_rate": 7.081983922142401e-06, |
| "loss": 3.0817, |
| "step": 897500 |
| }, |
| { |
| "epoch": 7.36, |
| "learning_rate": 7.080339969422479e-06, |
| "loss": 3.0935, |
| "step": 898000 |
| }, |
| { |
| "epoch": 7.36, |
| "learning_rate": 7.07869601670256e-06, |
| "loss": 3.0893, |
| "step": 898500 |
| }, |
| { |
| "epoch": 7.37, |
| "learning_rate": 7.07705206398264e-06, |
| "loss": 3.0876, |
| "step": 899000 |
| }, |
| { |
| "epoch": 7.37, |
| "learning_rate": 7.07540811126272e-06, |
| "loss": 3.0946, |
| "step": 899500 |
| }, |
| { |
| "epoch": 7.37, |
| "learning_rate": 7.0737641585428005e-06, |
| "loss": 3.0817, |
| "step": 900000 |
| }, |
| { |
| "epoch": 7.37, |
| "eval_accuracy": 0.47711464421124683, |
| "eval_loss": 2.9477081298828125, |
| "eval_runtime": 415.7627, |
| "eval_samples_per_second": 741.663, |
| "eval_steps_per_second": 15.454, |
| "step": 900000 |
| }, |
| { |
| "epoch": 7.38, |
| "learning_rate": 7.0721202058228814e-06, |
| "loss": 3.0775, |
| "step": 900500 |
| }, |
| { |
| "epoch": 7.38, |
| "learning_rate": 7.0704762531029616e-06, |
| "loss": 3.0839, |
| "step": 901000 |
| }, |
| { |
| "epoch": 7.39, |
| "learning_rate": 7.068832300383042e-06, |
| "loss": 3.086, |
| "step": 901500 |
| }, |
| { |
| "epoch": 7.39, |
| "learning_rate": 7.067188347663122e-06, |
| "loss": 3.0927, |
| "step": 902000 |
| }, |
| { |
| "epoch": 7.39, |
| "learning_rate": 7.065544394943202e-06, |
| "loss": 3.0884, |
| "step": 902500 |
| }, |
| { |
| "epoch": 7.4, |
| "learning_rate": 7.063900442223283e-06, |
| "loss": 3.0848, |
| "step": 903000 |
| }, |
| { |
| "epoch": 7.4, |
| "learning_rate": 7.062256489503363e-06, |
| "loss": 3.0805, |
| "step": 903500 |
| }, |
| { |
| "epoch": 7.41, |
| "learning_rate": 7.060612536783442e-06, |
| "loss": 3.0891, |
| "step": 904000 |
| }, |
| { |
| "epoch": 7.41, |
| "learning_rate": 7.0589685840635224e-06, |
| "loss": 3.0887, |
| "step": 904500 |
| }, |
| { |
| "epoch": 7.41, |
| "learning_rate": 7.0573246313436026e-06, |
| "loss": 3.0889, |
| "step": 905000 |
| }, |
| { |
| "epoch": 7.42, |
| "learning_rate": 7.055680678623683e-06, |
| "loss": 3.0918, |
| "step": 905500 |
| }, |
| { |
| "epoch": 7.42, |
| "learning_rate": 7.054036725903764e-06, |
| "loss": 3.0735, |
| "step": 906000 |
| }, |
| { |
| "epoch": 7.43, |
| "learning_rate": 7.052392773183844e-06, |
| "loss": 3.0821, |
| "step": 906500 |
| }, |
| { |
| "epoch": 7.43, |
| "learning_rate": 7.050748820463924e-06, |
| "loss": 3.0843, |
| "step": 907000 |
| }, |
| { |
| "epoch": 7.43, |
| "learning_rate": 7.049104867744004e-06, |
| "loss": 3.088, |
| "step": 907500 |
| }, |
| { |
| "epoch": 7.44, |
| "learning_rate": 7.047460915024085e-06, |
| "loss": 3.0885, |
| "step": 908000 |
| }, |
| { |
| "epoch": 7.44, |
| "learning_rate": 7.045816962304165e-06, |
| "loss": 3.0836, |
| "step": 908500 |
| }, |
| { |
| "epoch": 7.45, |
| "learning_rate": 7.044173009584245e-06, |
| "loss": 3.083, |
| "step": 909000 |
| }, |
| { |
| "epoch": 7.45, |
| "learning_rate": 7.042529056864325e-06, |
| "loss": 3.0838, |
| "step": 909500 |
| }, |
| { |
| "epoch": 7.46, |
| "learning_rate": 7.040885104144405e-06, |
| "loss": 3.0789, |
| "step": 910000 |
| }, |
| { |
| "epoch": 7.46, |
| "learning_rate": 7.039241151424485e-06, |
| "loss": 3.082, |
| "step": 910500 |
| }, |
| { |
| "epoch": 7.46, |
| "learning_rate": 7.037597198704566e-06, |
| "loss": 3.08, |
| "step": 911000 |
| }, |
| { |
| "epoch": 7.47, |
| "learning_rate": 7.035953245984646e-06, |
| "loss": 3.0853, |
| "step": 911500 |
| }, |
| { |
| "epoch": 7.47, |
| "learning_rate": 7.034309293264726e-06, |
| "loss": 3.0808, |
| "step": 912000 |
| }, |
| { |
| "epoch": 7.48, |
| "learning_rate": 7.032665340544806e-06, |
| "loss": 3.0855, |
| "step": 912500 |
| }, |
| { |
| "epoch": 7.48, |
| "learning_rate": 7.031021387824887e-06, |
| "loss": 3.0833, |
| "step": 913000 |
| }, |
| { |
| "epoch": 7.48, |
| "learning_rate": 7.029377435104967e-06, |
| "loss": 3.0754, |
| "step": 913500 |
| }, |
| { |
| "epoch": 7.49, |
| "learning_rate": 7.027733482385047e-06, |
| "loss": 3.0819, |
| "step": 914000 |
| }, |
| { |
| "epoch": 7.49, |
| "learning_rate": 7.0260895296651275e-06, |
| "loss": 3.083, |
| "step": 914500 |
| }, |
| { |
| "epoch": 7.5, |
| "learning_rate": 7.024445576945208e-06, |
| "loss": 3.0783, |
| "step": 915000 |
| }, |
| { |
| "epoch": 7.5, |
| "learning_rate": 7.022801624225289e-06, |
| "loss": 3.0812, |
| "step": 915500 |
| }, |
| { |
| "epoch": 7.5, |
| "learning_rate": 7.021157671505368e-06, |
| "loss": 3.0782, |
| "step": 916000 |
| }, |
| { |
| "epoch": 7.51, |
| "learning_rate": 7.019513718785448e-06, |
| "loss": 3.0797, |
| "step": 916500 |
| }, |
| { |
| "epoch": 7.51, |
| "learning_rate": 7.017869766065528e-06, |
| "loss": 3.0876, |
| "step": 917000 |
| }, |
| { |
| "epoch": 7.52, |
| "learning_rate": 7.016225813345608e-06, |
| "loss": 3.0857, |
| "step": 917500 |
| }, |
| { |
| "epoch": 7.52, |
| "learning_rate": 7.014581860625688e-06, |
| "loss": 3.0776, |
| "step": 918000 |
| }, |
| { |
| "epoch": 7.53, |
| "learning_rate": 7.012937907905769e-06, |
| "loss": 3.085, |
| "step": 918500 |
| }, |
| { |
| "epoch": 7.53, |
| "learning_rate": 7.0112939551858495e-06, |
| "loss": 3.0817, |
| "step": 919000 |
| }, |
| { |
| "epoch": 7.53, |
| "learning_rate": 7.00965000246593e-06, |
| "loss": 3.0807, |
| "step": 919500 |
| }, |
| { |
| "epoch": 7.54, |
| "learning_rate": 7.00800604974601e-06, |
| "loss": 3.0892, |
| "step": 920000 |
| }, |
| { |
| "epoch": 7.54, |
| "learning_rate": 7.006362097026091e-06, |
| "loss": 3.0781, |
| "step": 920500 |
| }, |
| { |
| "epoch": 7.55, |
| "learning_rate": 7.004718144306171e-06, |
| "loss": 3.0726, |
| "step": 921000 |
| }, |
| { |
| "epoch": 7.55, |
| "learning_rate": 7.003074191586251e-06, |
| "loss": 3.0824, |
| "step": 921500 |
| }, |
| { |
| "epoch": 7.55, |
| "learning_rate": 7.001430238866331e-06, |
| "loss": 3.0816, |
| "step": 922000 |
| }, |
| { |
| "epoch": 7.56, |
| "learning_rate": 6.99978628614641e-06, |
| "loss": 3.0747, |
| "step": 922500 |
| }, |
| { |
| "epoch": 7.56, |
| "learning_rate": 6.9981423334264905e-06, |
| "loss": 3.0819, |
| "step": 923000 |
| }, |
| { |
| "epoch": 7.57, |
| "learning_rate": 6.9964983807065715e-06, |
| "loss": 3.0806, |
| "step": 923500 |
| }, |
| { |
| "epoch": 7.57, |
| "learning_rate": 6.994854427986652e-06, |
| "loss": 3.0842, |
| "step": 924000 |
| }, |
| { |
| "epoch": 7.57, |
| "learning_rate": 6.993210475266732e-06, |
| "loss": 3.0732, |
| "step": 924500 |
| }, |
| { |
| "epoch": 7.58, |
| "learning_rate": 6.991566522546812e-06, |
| "loss": 3.0746, |
| "step": 925000 |
| }, |
| { |
| "epoch": 7.58, |
| "learning_rate": 6.989922569826893e-06, |
| "loss": 3.0789, |
| "step": 925500 |
| }, |
| { |
| "epoch": 7.59, |
| "learning_rate": 6.988278617106973e-06, |
| "loss": 3.0817, |
| "step": 926000 |
| }, |
| { |
| "epoch": 7.59, |
| "learning_rate": 6.986634664387053e-06, |
| "loss": 3.0718, |
| "step": 926500 |
| }, |
| { |
| "epoch": 7.59, |
| "learning_rate": 6.984990711667133e-06, |
| "loss": 3.0824, |
| "step": 927000 |
| }, |
| { |
| "epoch": 7.6, |
| "learning_rate": 6.983346758947213e-06, |
| "loss": 3.0776, |
| "step": 927500 |
| }, |
| { |
| "epoch": 7.6, |
| "learning_rate": 6.981702806227294e-06, |
| "loss": 3.075, |
| "step": 928000 |
| }, |
| { |
| "epoch": 7.61, |
| "learning_rate": 6.980058853507374e-06, |
| "loss": 3.0787, |
| "step": 928500 |
| }, |
| { |
| "epoch": 7.61, |
| "learning_rate": 6.978414900787454e-06, |
| "loss": 3.0708, |
| "step": 929000 |
| }, |
| { |
| "epoch": 7.62, |
| "learning_rate": 6.976770948067534e-06, |
| "loss": 3.086, |
| "step": 929500 |
| }, |
| { |
| "epoch": 7.62, |
| "learning_rate": 6.975126995347614e-06, |
| "loss": 3.0813, |
| "step": 930000 |
| }, |
| { |
| "epoch": 7.62, |
| "eval_accuracy": 0.478479420881626, |
| "eval_loss": 2.939741611480713, |
| "eval_runtime": 412.5363, |
| "eval_samples_per_second": 747.464, |
| "eval_steps_per_second": 15.574, |
| "step": 930000 |
| }, |
| { |
| "epoch": 7.62, |
| "learning_rate": 6.973483042627694e-06, |
| "loss": 3.0788, |
| "step": 930500 |
| }, |
| { |
| "epoch": 7.63, |
| "learning_rate": 6.971839089907775e-06, |
| "loss": 3.0747, |
| "step": 931000 |
| }, |
| { |
| "epoch": 7.63, |
| "learning_rate": 6.970195137187855e-06, |
| "loss": 3.0767, |
| "step": 931500 |
| }, |
| { |
| "epoch": 7.64, |
| "learning_rate": 6.968551184467935e-06, |
| "loss": 3.0797, |
| "step": 932000 |
| }, |
| { |
| "epoch": 7.64, |
| "learning_rate": 6.9669072317480155e-06, |
| "loss": 3.0759, |
| "step": 932500 |
| }, |
| { |
| "epoch": 7.64, |
| "learning_rate": 6.9652632790280964e-06, |
| "loss": 3.0809, |
| "step": 933000 |
| }, |
| { |
| "epoch": 7.65, |
| "learning_rate": 6.9636193263081766e-06, |
| "loss": 3.0756, |
| "step": 933500 |
| }, |
| { |
| "epoch": 7.65, |
| "learning_rate": 6.961975373588257e-06, |
| "loss": 3.0814, |
| "step": 934000 |
| }, |
| { |
| "epoch": 7.66, |
| "learning_rate": 6.960331420868336e-06, |
| "loss": 3.0757, |
| "step": 934500 |
| }, |
| { |
| "epoch": 7.66, |
| "learning_rate": 6.958687468148416e-06, |
| "loss": 3.0666, |
| "step": 935000 |
| }, |
| { |
| "epoch": 7.66, |
| "learning_rate": 6.957043515428496e-06, |
| "loss": 3.0683, |
| "step": 935500 |
| }, |
| { |
| "epoch": 7.67, |
| "learning_rate": 6.955399562708577e-06, |
| "loss": 3.0742, |
| "step": 936000 |
| }, |
| { |
| "epoch": 7.67, |
| "learning_rate": 6.953755609988657e-06, |
| "loss": 3.0645, |
| "step": 936500 |
| }, |
| { |
| "epoch": 7.68, |
| "learning_rate": 6.9521116572687374e-06, |
| "loss": 3.0785, |
| "step": 937000 |
| }, |
| { |
| "epoch": 7.68, |
| "learning_rate": 6.9504677045488176e-06, |
| "loss": 3.0819, |
| "step": 937500 |
| }, |
| { |
| "epoch": 7.68, |
| "learning_rate": 6.9488237518288985e-06, |
| "loss": 3.0803, |
| "step": 938000 |
| }, |
| { |
| "epoch": 7.69, |
| "learning_rate": 6.947179799108979e-06, |
| "loss": 3.0805, |
| "step": 938500 |
| }, |
| { |
| "epoch": 7.69, |
| "learning_rate": 6.945535846389059e-06, |
| "loss": 3.0775, |
| "step": 939000 |
| }, |
| { |
| "epoch": 7.7, |
| "learning_rate": 6.943891893669139e-06, |
| "loss": 3.0725, |
| "step": 939500 |
| }, |
| { |
| "epoch": 7.7, |
| "learning_rate": 6.942247940949219e-06, |
| "loss": 3.0744, |
| "step": 940000 |
| }, |
| { |
| "epoch": 7.71, |
| "learning_rate": 6.940603988229298e-06, |
| "loss": 3.0714, |
| "step": 940500 |
| }, |
| { |
| "epoch": 7.71, |
| "learning_rate": 6.938960035509379e-06, |
| "loss": 3.0715, |
| "step": 941000 |
| }, |
| { |
| "epoch": 7.71, |
| "learning_rate": 6.9373160827894594e-06, |
| "loss": 3.0773, |
| "step": 941500 |
| }, |
| { |
| "epoch": 7.72, |
| "learning_rate": 6.9356721300695396e-06, |
| "loss": 3.0694, |
| "step": 942000 |
| }, |
| { |
| "epoch": 7.72, |
| "learning_rate": 6.93402817734962e-06, |
| "loss": 3.0711, |
| "step": 942500 |
| }, |
| { |
| "epoch": 7.73, |
| "learning_rate": 6.9323842246297e-06, |
| "loss": 3.0744, |
| "step": 943000 |
| }, |
| { |
| "epoch": 7.73, |
| "learning_rate": 6.930740271909781e-06, |
| "loss": 3.0734, |
| "step": 943500 |
| }, |
| { |
| "epoch": 7.73, |
| "learning_rate": 6.929096319189861e-06, |
| "loss": 3.0716, |
| "step": 944000 |
| }, |
| { |
| "epoch": 7.74, |
| "learning_rate": 6.927452366469941e-06, |
| "loss": 3.0772, |
| "step": 944500 |
| }, |
| { |
| "epoch": 7.74, |
| "learning_rate": 6.925808413750021e-06, |
| "loss": 3.0681, |
| "step": 945000 |
| }, |
| { |
| "epoch": 7.75, |
| "learning_rate": 6.924164461030102e-06, |
| "loss": 3.0786, |
| "step": 945500 |
| }, |
| { |
| "epoch": 7.75, |
| "learning_rate": 6.922520508310182e-06, |
| "loss": 3.075, |
| "step": 946000 |
| }, |
| { |
| "epoch": 7.75, |
| "learning_rate": 6.9208765555902615e-06, |
| "loss": 3.0774, |
| "step": 946500 |
| }, |
| { |
| "epoch": 7.76, |
| "learning_rate": 6.919232602870342e-06, |
| "loss": 3.0651, |
| "step": 947000 |
| }, |
| { |
| "epoch": 7.76, |
| "learning_rate": 6.917588650150422e-06, |
| "loss": 3.0708, |
| "step": 947500 |
| }, |
| { |
| "epoch": 7.77, |
| "learning_rate": 6.915944697430502e-06, |
| "loss": 3.0813, |
| "step": 948000 |
| }, |
| { |
| "epoch": 7.77, |
| "learning_rate": 6.914300744710583e-06, |
| "loss": 3.0719, |
| "step": 948500 |
| }, |
| { |
| "epoch": 7.77, |
| "learning_rate": 6.912656791990663e-06, |
| "loss": 3.0739, |
| "step": 949000 |
| }, |
| { |
| "epoch": 7.78, |
| "learning_rate": 6.911012839270743e-06, |
| "loss": 3.07, |
| "step": 949500 |
| }, |
| { |
| "epoch": 7.78, |
| "learning_rate": 6.909368886550823e-06, |
| "loss": 3.0715, |
| "step": 950000 |
| }, |
| { |
| "epoch": 7.79, |
| "learning_rate": 6.907724933830903e-06, |
| "loss": 3.0675, |
| "step": 950500 |
| }, |
| { |
| "epoch": 7.79, |
| "learning_rate": 6.906080981110984e-06, |
| "loss": 3.0693, |
| "step": 951000 |
| }, |
| { |
| "epoch": 7.8, |
| "learning_rate": 6.9044370283910645e-06, |
| "loss": 3.0755, |
| "step": 951500 |
| }, |
| { |
| "epoch": 7.8, |
| "learning_rate": 6.902793075671145e-06, |
| "loss": 3.0741, |
| "step": 952000 |
| }, |
| { |
| "epoch": 7.8, |
| "learning_rate": 6.901149122951224e-06, |
| "loss": 3.068, |
| "step": 952500 |
| }, |
| { |
| "epoch": 7.81, |
| "learning_rate": 6.899505170231304e-06, |
| "loss": 3.0707, |
| "step": 953000 |
| }, |
| { |
| "epoch": 7.81, |
| "learning_rate": 6.897861217511385e-06, |
| "loss": 3.0731, |
| "step": 953500 |
| }, |
| { |
| "epoch": 7.82, |
| "learning_rate": 6.896217264791465e-06, |
| "loss": 3.0658, |
| "step": 954000 |
| }, |
| { |
| "epoch": 7.82, |
| "learning_rate": 6.894573312071545e-06, |
| "loss": 3.064, |
| "step": 954500 |
| }, |
| { |
| "epoch": 7.82, |
| "learning_rate": 6.892929359351625e-06, |
| "loss": 3.0747, |
| "step": 955000 |
| }, |
| { |
| "epoch": 7.83, |
| "learning_rate": 6.8912854066317055e-06, |
| "loss": 3.0731, |
| "step": 955500 |
| }, |
| { |
| "epoch": 7.83, |
| "learning_rate": 6.8896414539117865e-06, |
| "loss": 3.0695, |
| "step": 956000 |
| }, |
| { |
| "epoch": 7.84, |
| "learning_rate": 6.887997501191867e-06, |
| "loss": 3.0668, |
| "step": 956500 |
| }, |
| { |
| "epoch": 7.84, |
| "learning_rate": 6.886353548471947e-06, |
| "loss": 3.0691, |
| "step": 957000 |
| }, |
| { |
| "epoch": 7.84, |
| "learning_rate": 6.884709595752027e-06, |
| "loss": 3.068, |
| "step": 957500 |
| }, |
| { |
| "epoch": 7.85, |
| "learning_rate": 6.883065643032108e-06, |
| "loss": 3.0675, |
| "step": 958000 |
| }, |
| { |
| "epoch": 7.85, |
| "learning_rate": 6.881421690312186e-06, |
| "loss": 3.0692, |
| "step": 958500 |
| }, |
| { |
| "epoch": 7.86, |
| "learning_rate": 6.879777737592267e-06, |
| "loss": 3.0684, |
| "step": 959000 |
| }, |
| { |
| "epoch": 7.86, |
| "learning_rate": 6.878133784872347e-06, |
| "loss": 3.0657, |
| "step": 959500 |
| }, |
| { |
| "epoch": 7.87, |
| "learning_rate": 6.8764898321524275e-06, |
| "loss": 3.0709, |
| "step": 960000 |
| }, |
| { |
| "epoch": 7.87, |
| "eval_accuracy": 0.4803523362943657, |
| "eval_loss": 2.925929307937622, |
| "eval_runtime": 423.4884, |
| "eval_samples_per_second": 728.133, |
| "eval_steps_per_second": 15.172, |
| "step": 960000 |
| }, |
| { |
| "epoch": 7.87, |
| "learning_rate": 6.874845879432508e-06, |
| "loss": 3.0705, |
| "step": 960500 |
| }, |
| { |
| "epoch": 7.87, |
| "learning_rate": 6.873201926712589e-06, |
| "loss": 3.0672, |
| "step": 961000 |
| }, |
| { |
| "epoch": 7.88, |
| "learning_rate": 6.871557973992669e-06, |
| "loss": 3.064, |
| "step": 961500 |
| }, |
| { |
| "epoch": 7.88, |
| "learning_rate": 6.869914021272749e-06, |
| "loss": 3.0644, |
| "step": 962000 |
| }, |
| { |
| "epoch": 7.89, |
| "learning_rate": 6.868270068552829e-06, |
| "loss": 3.0636, |
| "step": 962500 |
| }, |
| { |
| "epoch": 7.89, |
| "learning_rate": 6.866626115832909e-06, |
| "loss": 3.0693, |
| "step": 963000 |
| }, |
| { |
| "epoch": 7.89, |
| "learning_rate": 6.86498216311299e-06, |
| "loss": 3.0597, |
| "step": 963500 |
| }, |
| { |
| "epoch": 7.9, |
| "learning_rate": 6.86333821039307e-06, |
| "loss": 3.0646, |
| "step": 964000 |
| }, |
| { |
| "epoch": 7.9, |
| "learning_rate": 6.8616942576731495e-06, |
| "loss": 3.0692, |
| "step": 964500 |
| }, |
| { |
| "epoch": 7.91, |
| "learning_rate": 6.86005030495323e-06, |
| "loss": 3.0657, |
| "step": 965000 |
| }, |
| { |
| "epoch": 7.91, |
| "learning_rate": 6.85840635223331e-06, |
| "loss": 3.061, |
| "step": 965500 |
| }, |
| { |
| "epoch": 7.91, |
| "learning_rate": 6.856762399513391e-06, |
| "loss": 3.0604, |
| "step": 966000 |
| }, |
| { |
| "epoch": 7.92, |
| "learning_rate": 6.855118446793471e-06, |
| "loss": 3.062, |
| "step": 966500 |
| }, |
| { |
| "epoch": 7.92, |
| "learning_rate": 6.853474494073551e-06, |
| "loss": 3.0679, |
| "step": 967000 |
| }, |
| { |
| "epoch": 7.93, |
| "learning_rate": 6.851830541353631e-06, |
| "loss": 3.0556, |
| "step": 967500 |
| }, |
| { |
| "epoch": 7.93, |
| "learning_rate": 6.850186588633711e-06, |
| "loss": 3.0666, |
| "step": 968000 |
| }, |
| { |
| "epoch": 7.93, |
| "learning_rate": 6.848542635913792e-06, |
| "loss": 3.0748, |
| "step": 968500 |
| }, |
| { |
| "epoch": 7.94, |
| "learning_rate": 6.846898683193872e-06, |
| "loss": 3.0643, |
| "step": 969000 |
| }, |
| { |
| "epoch": 7.94, |
| "learning_rate": 6.8452547304739524e-06, |
| "loss": 3.0599, |
| "step": 969500 |
| }, |
| { |
| "epoch": 7.95, |
| "learning_rate": 6.8436107777540326e-06, |
| "loss": 3.0642, |
| "step": 970000 |
| }, |
| { |
| "epoch": 7.95, |
| "learning_rate": 6.841966825034112e-06, |
| "loss": 3.0588, |
| "step": 970500 |
| }, |
| { |
| "epoch": 7.96, |
| "learning_rate": 6.840322872314192e-06, |
| "loss": 3.058, |
| "step": 971000 |
| }, |
| { |
| "epoch": 7.96, |
| "learning_rate": 6.838678919594273e-06, |
| "loss": 3.0621, |
| "step": 971500 |
| }, |
| { |
| "epoch": 7.96, |
| "learning_rate": 6.837034966874353e-06, |
| "loss": 3.0656, |
| "step": 972000 |
| }, |
| { |
| "epoch": 7.97, |
| "learning_rate": 6.835391014154433e-06, |
| "loss": 3.0681, |
| "step": 972500 |
| }, |
| { |
| "epoch": 7.97, |
| "learning_rate": 6.833747061434513e-06, |
| "loss": 3.0672, |
| "step": 973000 |
| }, |
| { |
| "epoch": 7.98, |
| "learning_rate": 6.832103108714594e-06, |
| "loss": 3.068, |
| "step": 973500 |
| }, |
| { |
| "epoch": 7.98, |
| "learning_rate": 6.8304591559946744e-06, |
| "loss": 3.0616, |
| "step": 974000 |
| }, |
| { |
| "epoch": 7.98, |
| "learning_rate": 6.8288152032747546e-06, |
| "loss": 3.0651, |
| "step": 974500 |
| }, |
| { |
| "epoch": 7.99, |
| "learning_rate": 6.827171250554835e-06, |
| "loss": 3.0669, |
| "step": 975000 |
| }, |
| { |
| "epoch": 7.99, |
| "learning_rate": 6.825527297834915e-06, |
| "loss": 3.0668, |
| "step": 975500 |
| }, |
| { |
| "epoch": 8.0, |
| "learning_rate": 6.823883345114996e-06, |
| "loss": 3.0692, |
| "step": 976000 |
| }, |
| { |
| "epoch": 8.0, |
| "learning_rate": 6.822239392395076e-06, |
| "loss": 3.0622, |
| "step": 976500 |
| }, |
| { |
| "epoch": 8.0, |
| "learning_rate": 6.820595439675155e-06, |
| "loss": 3.0618, |
| "step": 977000 |
| }, |
| { |
| "epoch": 8.01, |
| "learning_rate": 6.818951486955235e-06, |
| "loss": 3.0672, |
| "step": 977500 |
| }, |
| { |
| "epoch": 8.01, |
| "learning_rate": 6.8173075342353154e-06, |
| "loss": 3.0686, |
| "step": 978000 |
| }, |
| { |
| "epoch": 8.02, |
| "learning_rate": 6.8156635815153956e-06, |
| "loss": 3.065, |
| "step": 978500 |
| }, |
| { |
| "epoch": 8.02, |
| "learning_rate": 6.8140196287954765e-06, |
| "loss": 3.0568, |
| "step": 979000 |
| }, |
| { |
| "epoch": 8.02, |
| "learning_rate": 6.812375676075557e-06, |
| "loss": 3.0605, |
| "step": 979500 |
| }, |
| { |
| "epoch": 8.03, |
| "learning_rate": 6.810731723355637e-06, |
| "loss": 3.0586, |
| "step": 980000 |
| }, |
| { |
| "epoch": 8.03, |
| "learning_rate": 6.809087770635717e-06, |
| "loss": 3.061, |
| "step": 980500 |
| }, |
| { |
| "epoch": 8.04, |
| "learning_rate": 6.807443817915798e-06, |
| "loss": 3.0603, |
| "step": 981000 |
| }, |
| { |
| "epoch": 8.04, |
| "learning_rate": 6.805799865195878e-06, |
| "loss": 3.0591, |
| "step": 981500 |
| }, |
| { |
| "epoch": 8.05, |
| "learning_rate": 6.804155912475958e-06, |
| "loss": 3.0645, |
| "step": 982000 |
| }, |
| { |
| "epoch": 8.05, |
| "learning_rate": 6.802511959756038e-06, |
| "loss": 3.0609, |
| "step": 982500 |
| }, |
| { |
| "epoch": 8.05, |
| "learning_rate": 6.8008680070361176e-06, |
| "loss": 3.0596, |
| "step": 983000 |
| }, |
| { |
| "epoch": 8.06, |
| "learning_rate": 6.799224054316198e-06, |
| "loss": 3.0609, |
| "step": 983500 |
| }, |
| { |
| "epoch": 8.06, |
| "learning_rate": 6.797580101596279e-06, |
| "loss": 3.0547, |
| "step": 984000 |
| }, |
| { |
| "epoch": 8.07, |
| "learning_rate": 6.795936148876359e-06, |
| "loss": 3.0566, |
| "step": 984500 |
| }, |
| { |
| "epoch": 8.07, |
| "learning_rate": 6.794292196156439e-06, |
| "loss": 3.0511, |
| "step": 985000 |
| }, |
| { |
| "epoch": 8.07, |
| "learning_rate": 6.792648243436519e-06, |
| "loss": 3.0621, |
| "step": 985500 |
| }, |
| { |
| "epoch": 8.08, |
| "learning_rate": 6.7910042907166e-06, |
| "loss": 3.0607, |
| "step": 986000 |
| }, |
| { |
| "epoch": 8.08, |
| "learning_rate": 6.78936033799668e-06, |
| "loss": 3.0596, |
| "step": 986500 |
| }, |
| { |
| "epoch": 8.09, |
| "learning_rate": 6.78771638527676e-06, |
| "loss": 3.0615, |
| "step": 987000 |
| }, |
| { |
| "epoch": 8.09, |
| "learning_rate": 6.78607243255684e-06, |
| "loss": 3.0581, |
| "step": 987500 |
| }, |
| { |
| "epoch": 8.09, |
| "learning_rate": 6.7844284798369205e-06, |
| "loss": 3.0586, |
| "step": 988000 |
| }, |
| { |
| "epoch": 8.1, |
| "learning_rate": 6.7827845271170015e-06, |
| "loss": 3.0569, |
| "step": 988500 |
| }, |
| { |
| "epoch": 8.1, |
| "learning_rate": 6.781140574397081e-06, |
| "loss": 3.0522, |
| "step": 989000 |
| }, |
| { |
| "epoch": 8.11, |
| "learning_rate": 6.779496621677161e-06, |
| "loss": 3.0566, |
| "step": 989500 |
| }, |
| { |
| "epoch": 8.11, |
| "learning_rate": 6.777852668957241e-06, |
| "loss": 3.0528, |
| "step": 990000 |
| }, |
| { |
| "epoch": 8.11, |
| "eval_accuracy": 0.4812049123013876, |
| "eval_loss": 2.920849323272705, |
| "eval_runtime": 415.0385, |
| "eval_samples_per_second": 742.958, |
| "eval_steps_per_second": 15.48, |
| "step": 990000 |
| }, |
| { |
| "epoch": 8.11, |
| "learning_rate": 6.776208716237321e-06, |
| "loss": 3.0549, |
| "step": 990500 |
| }, |
| { |
| "epoch": 8.12, |
| "learning_rate": 6.774564763517401e-06, |
| "loss": 3.0566, |
| "step": 991000 |
| }, |
| { |
| "epoch": 8.12, |
| "learning_rate": 6.772920810797482e-06, |
| "loss": 3.0576, |
| "step": 991500 |
| }, |
| { |
| "epoch": 8.13, |
| "learning_rate": 6.771276858077562e-06, |
| "loss": 3.0539, |
| "step": 992000 |
| }, |
| { |
| "epoch": 8.13, |
| "learning_rate": 6.7696329053576425e-06, |
| "loss": 3.0525, |
| "step": 992500 |
| }, |
| { |
| "epoch": 8.14, |
| "learning_rate": 6.767988952637723e-06, |
| "loss": 3.0545, |
| "step": 993000 |
| }, |
| { |
| "epoch": 8.14, |
| "learning_rate": 6.766344999917804e-06, |
| "loss": 3.0543, |
| "step": 993500 |
| }, |
| { |
| "epoch": 8.14, |
| "learning_rate": 6.764701047197884e-06, |
| "loss": 3.0492, |
| "step": 994000 |
| }, |
| { |
| "epoch": 8.15, |
| "learning_rate": 6.763057094477964e-06, |
| "loss": 3.0608, |
| "step": 994500 |
| }, |
| { |
| "epoch": 8.15, |
| "learning_rate": 6.761413141758043e-06, |
| "loss": 3.0615, |
| "step": 995000 |
| }, |
| { |
| "epoch": 8.16, |
| "learning_rate": 6.759769189038123e-06, |
| "loss": 3.0605, |
| "step": 995500 |
| }, |
| { |
| "epoch": 8.16, |
| "learning_rate": 6.758125236318203e-06, |
| "loss": 3.0584, |
| "step": 996000 |
| }, |
| { |
| "epoch": 8.16, |
| "learning_rate": 6.756481283598284e-06, |
| "loss": 3.0554, |
| "step": 996500 |
| }, |
| { |
| "epoch": 8.17, |
| "learning_rate": 6.7548373308783645e-06, |
| "loss": 3.0558, |
| "step": 997000 |
| }, |
| { |
| "epoch": 8.17, |
| "learning_rate": 6.753193378158445e-06, |
| "loss": 3.056, |
| "step": 997500 |
| }, |
| { |
| "epoch": 8.18, |
| "learning_rate": 6.751549425438525e-06, |
| "loss": 3.0517, |
| "step": 998000 |
| }, |
| { |
| "epoch": 8.18, |
| "learning_rate": 6.749905472718606e-06, |
| "loss": 3.0549, |
| "step": 998500 |
| }, |
| { |
| "epoch": 8.18, |
| "learning_rate": 6.748261519998686e-06, |
| "loss": 3.0613, |
| "step": 999000 |
| }, |
| { |
| "epoch": 8.19, |
| "learning_rate": 6.746617567278766e-06, |
| "loss": 3.0511, |
| "step": 999500 |
| }, |
| { |
| "epoch": 8.19, |
| "learning_rate": 6.744973614558846e-06, |
| "loss": 3.0535, |
| "step": 1000000 |
| }, |
| { |
| "epoch": 8.2, |
| "learning_rate": 6.743329661838926e-06, |
| "loss": 3.0498, |
| "step": 1000500 |
| }, |
| { |
| "epoch": 8.2, |
| "learning_rate": 6.7416857091190055e-06, |
| "loss": 3.0545, |
| "step": 1001000 |
| }, |
| { |
| "epoch": 8.21, |
| "learning_rate": 6.7400417563990865e-06, |
| "loss": 3.0522, |
| "step": 1001500 |
| }, |
| { |
| "epoch": 8.21, |
| "learning_rate": 6.738397803679167e-06, |
| "loss": 3.0562, |
| "step": 1002000 |
| }, |
| { |
| "epoch": 8.21, |
| "learning_rate": 6.736753850959247e-06, |
| "loss": 3.0551, |
| "step": 1002500 |
| }, |
| { |
| "epoch": 8.22, |
| "learning_rate": 6.735109898239327e-06, |
| "loss": 3.0594, |
| "step": 1003000 |
| }, |
| { |
| "epoch": 8.22, |
| "learning_rate": 6.733465945519407e-06, |
| "loss": 3.0588, |
| "step": 1003500 |
| }, |
| { |
| "epoch": 8.23, |
| "learning_rate": 6.731821992799488e-06, |
| "loss": 3.0536, |
| "step": 1004000 |
| }, |
| { |
| "epoch": 8.23, |
| "learning_rate": 6.730178040079568e-06, |
| "loss": 3.0492, |
| "step": 1004500 |
| }, |
| { |
| "epoch": 8.23, |
| "learning_rate": 6.728534087359648e-06, |
| "loss": 3.049, |
| "step": 1005000 |
| }, |
| { |
| "epoch": 8.24, |
| "learning_rate": 6.726890134639728e-06, |
| "loss": 3.0574, |
| "step": 1005500 |
| }, |
| { |
| "epoch": 8.24, |
| "learning_rate": 6.725246181919809e-06, |
| "loss": 3.0467, |
| "step": 1006000 |
| }, |
| { |
| "epoch": 8.25, |
| "learning_rate": 6.7236022291998894e-06, |
| "loss": 3.0529, |
| "step": 1006500 |
| }, |
| { |
| "epoch": 8.25, |
| "learning_rate": 6.721958276479969e-06, |
| "loss": 3.0568, |
| "step": 1007000 |
| }, |
| { |
| "epoch": 8.25, |
| "learning_rate": 6.720314323760049e-06, |
| "loss": 3.0447, |
| "step": 1007500 |
| }, |
| { |
| "epoch": 8.26, |
| "learning_rate": 6.718670371040129e-06, |
| "loss": 3.0549, |
| "step": 1008000 |
| }, |
| { |
| "epoch": 8.26, |
| "learning_rate": 6.717026418320209e-06, |
| "loss": 3.0504, |
| "step": 1008500 |
| }, |
| { |
| "epoch": 8.27, |
| "learning_rate": 6.71538246560029e-06, |
| "loss": 3.0499, |
| "step": 1009000 |
| }, |
| { |
| "epoch": 8.27, |
| "learning_rate": 6.71373851288037e-06, |
| "loss": 3.0537, |
| "step": 1009500 |
| }, |
| { |
| "epoch": 8.27, |
| "learning_rate": 6.71209456016045e-06, |
| "loss": 3.0496, |
| "step": 1010000 |
| }, |
| { |
| "epoch": 8.28, |
| "learning_rate": 6.7104506074405304e-06, |
| "loss": 3.0458, |
| "step": 1010500 |
| }, |
| { |
| "epoch": 8.28, |
| "learning_rate": 6.708806654720611e-06, |
| "loss": 3.047, |
| "step": 1011000 |
| }, |
| { |
| "epoch": 8.29, |
| "learning_rate": 6.7071627020006915e-06, |
| "loss": 3.0525, |
| "step": 1011500 |
| }, |
| { |
| "epoch": 8.29, |
| "learning_rate": 6.705518749280772e-06, |
| "loss": 3.0566, |
| "step": 1012000 |
| }, |
| { |
| "epoch": 8.3, |
| "learning_rate": 6.703874796560852e-06, |
| "loss": 3.0503, |
| "step": 1012500 |
| }, |
| { |
| "epoch": 8.3, |
| "learning_rate": 6.702230843840931e-06, |
| "loss": 3.0531, |
| "step": 1013000 |
| }, |
| { |
| "epoch": 8.3, |
| "learning_rate": 6.700586891121011e-06, |
| "loss": 3.0495, |
| "step": 1013500 |
| }, |
| { |
| "epoch": 8.31, |
| "learning_rate": 6.698942938401092e-06, |
| "loss": 3.052, |
| "step": 1014000 |
| }, |
| { |
| "epoch": 8.31, |
| "learning_rate": 6.697298985681172e-06, |
| "loss": 3.0462, |
| "step": 1014500 |
| }, |
| { |
| "epoch": 8.32, |
| "learning_rate": 6.695655032961252e-06, |
| "loss": 3.0589, |
| "step": 1015000 |
| }, |
| { |
| "epoch": 8.32, |
| "learning_rate": 6.6940110802413325e-06, |
| "loss": 3.0518, |
| "step": 1015500 |
| }, |
| { |
| "epoch": 8.32, |
| "learning_rate": 6.692367127521413e-06, |
| "loss": 3.0572, |
| "step": 1016000 |
| }, |
| { |
| "epoch": 8.33, |
| "learning_rate": 6.690723174801494e-06, |
| "loss": 3.0515, |
| "step": 1016500 |
| }, |
| { |
| "epoch": 8.33, |
| "learning_rate": 6.689079222081574e-06, |
| "loss": 3.0498, |
| "step": 1017000 |
| }, |
| { |
| "epoch": 8.34, |
| "learning_rate": 6.687435269361654e-06, |
| "loss": 3.0501, |
| "step": 1017500 |
| }, |
| { |
| "epoch": 8.34, |
| "learning_rate": 6.685791316641734e-06, |
| "loss": 3.0552, |
| "step": 1018000 |
| }, |
| { |
| "epoch": 8.34, |
| "learning_rate": 6.684147363921815e-06, |
| "loss": 3.049, |
| "step": 1018500 |
| }, |
| { |
| "epoch": 8.35, |
| "learning_rate": 6.6825034112018934e-06, |
| "loss": 3.0508, |
| "step": 1019000 |
| }, |
| { |
| "epoch": 8.35, |
| "learning_rate": 6.680859458481974e-06, |
| "loss": 3.0454, |
| "step": 1019500 |
| }, |
| { |
| "epoch": 8.36, |
| "learning_rate": 6.6792155057620545e-06, |
| "loss": 3.0541, |
| "step": 1020000 |
| }, |
| { |
| "epoch": 8.36, |
| "eval_accuracy": 0.4829413695036341, |
| "eval_loss": 2.90887713432312, |
| "eval_runtime": 413.3015, |
| "eval_samples_per_second": 746.08, |
| "eval_steps_per_second": 15.546, |
| "step": 1020000 |
| }, |
| { |
| "epoch": 8.36, |
| "learning_rate": 6.677571553042135e-06, |
| "loss": 3.0473, |
| "step": 1020500 |
| }, |
| { |
| "epoch": 8.36, |
| "learning_rate": 6.675927600322215e-06, |
| "loss": 3.046, |
| "step": 1021000 |
| }, |
| { |
| "epoch": 8.37, |
| "learning_rate": 6.674283647602296e-06, |
| "loss": 3.0498, |
| "step": 1021500 |
| }, |
| { |
| "epoch": 8.37, |
| "learning_rate": 6.672639694882376e-06, |
| "loss": 3.0504, |
| "step": 1022000 |
| }, |
| { |
| "epoch": 8.38, |
| "learning_rate": 6.670995742162456e-06, |
| "loss": 3.0417, |
| "step": 1022500 |
| }, |
| { |
| "epoch": 8.38, |
| "learning_rate": 6.669351789442536e-06, |
| "loss": 3.0592, |
| "step": 1023000 |
| }, |
| { |
| "epoch": 8.39, |
| "learning_rate": 6.667707836722616e-06, |
| "loss": 3.0561, |
| "step": 1023500 |
| }, |
| { |
| "epoch": 8.39, |
| "learning_rate": 6.666063884002697e-06, |
| "loss": 3.0394, |
| "step": 1024000 |
| }, |
| { |
| "epoch": 8.39, |
| "learning_rate": 6.664419931282777e-06, |
| "loss": 3.0459, |
| "step": 1024500 |
| }, |
| { |
| "epoch": 8.4, |
| "learning_rate": 6.6627759785628575e-06, |
| "loss": 3.0518, |
| "step": 1025000 |
| }, |
| { |
| "epoch": 8.4, |
| "learning_rate": 6.661132025842937e-06, |
| "loss": 3.0447, |
| "step": 1025500 |
| }, |
| { |
| "epoch": 8.41, |
| "learning_rate": 6.659488073123017e-06, |
| "loss": 3.0462, |
| "step": 1026000 |
| }, |
| { |
| "epoch": 8.41, |
| "learning_rate": 6.657844120403098e-06, |
| "loss": 3.052, |
| "step": 1026500 |
| }, |
| { |
| "epoch": 8.41, |
| "learning_rate": 6.656200167683178e-06, |
| "loss": 3.0463, |
| "step": 1027000 |
| }, |
| { |
| "epoch": 8.42, |
| "learning_rate": 6.654556214963258e-06, |
| "loss": 3.0511, |
| "step": 1027500 |
| }, |
| { |
| "epoch": 8.42, |
| "learning_rate": 6.652912262243338e-06, |
| "loss": 3.0508, |
| "step": 1028000 |
| }, |
| { |
| "epoch": 8.43, |
| "learning_rate": 6.651268309523418e-06, |
| "loss": 3.0476, |
| "step": 1028500 |
| }, |
| { |
| "epoch": 8.43, |
| "learning_rate": 6.649624356803499e-06, |
| "loss": 3.0537, |
| "step": 1029000 |
| }, |
| { |
| "epoch": 8.43, |
| "learning_rate": 6.6479804040835795e-06, |
| "loss": 3.046, |
| "step": 1029500 |
| }, |
| { |
| "epoch": 8.44, |
| "learning_rate": 6.64633645136366e-06, |
| "loss": 3.0525, |
| "step": 1030000 |
| }, |
| { |
| "epoch": 8.44, |
| "learning_rate": 6.64469249864374e-06, |
| "loss": 3.0457, |
| "step": 1030500 |
| }, |
| { |
| "epoch": 8.45, |
| "learning_rate": 6.643048545923821e-06, |
| "loss": 3.0466, |
| "step": 1031000 |
| }, |
| { |
| "epoch": 8.45, |
| "learning_rate": 6.641404593203899e-06, |
| "loss": 3.0431, |
| "step": 1031500 |
| }, |
| { |
| "epoch": 8.45, |
| "learning_rate": 6.63976064048398e-06, |
| "loss": 3.0475, |
| "step": 1032000 |
| }, |
| { |
| "epoch": 8.46, |
| "learning_rate": 6.63811668776406e-06, |
| "loss": 3.0473, |
| "step": 1032500 |
| }, |
| { |
| "epoch": 8.46, |
| "learning_rate": 6.63647273504414e-06, |
| "loss": 3.0418, |
| "step": 1033000 |
| }, |
| { |
| "epoch": 8.47, |
| "learning_rate": 6.6348287823242205e-06, |
| "loss": 3.0426, |
| "step": 1033500 |
| }, |
| { |
| "epoch": 8.47, |
| "learning_rate": 6.6331848296043015e-06, |
| "loss": 3.0471, |
| "step": 1034000 |
| }, |
| { |
| "epoch": 8.48, |
| "learning_rate": 6.631540876884382e-06, |
| "loss": 3.0357, |
| "step": 1034500 |
| }, |
| { |
| "epoch": 8.48, |
| "learning_rate": 6.629896924164462e-06, |
| "loss": 3.0484, |
| "step": 1035000 |
| }, |
| { |
| "epoch": 8.48, |
| "learning_rate": 6.628252971444542e-06, |
| "loss": 3.051, |
| "step": 1035500 |
| }, |
| { |
| "epoch": 8.49, |
| "learning_rate": 6.626609018724622e-06, |
| "loss": 3.0439, |
| "step": 1036000 |
| }, |
| { |
| "epoch": 8.49, |
| "learning_rate": 6.624965066004703e-06, |
| "loss": 3.0461, |
| "step": 1036500 |
| }, |
| { |
| "epoch": 8.5, |
| "learning_rate": 6.623321113284783e-06, |
| "loss": 3.0467, |
| "step": 1037000 |
| }, |
| { |
| "epoch": 8.5, |
| "learning_rate": 6.621677160564862e-06, |
| "loss": 3.047, |
| "step": 1037500 |
| }, |
| { |
| "epoch": 8.5, |
| "learning_rate": 6.6200332078449425e-06, |
| "loss": 3.0444, |
| "step": 1038000 |
| }, |
| { |
| "epoch": 8.51, |
| "learning_rate": 6.618389255125023e-06, |
| "loss": 3.0433, |
| "step": 1038500 |
| }, |
| { |
| "epoch": 8.51, |
| "learning_rate": 6.616745302405104e-06, |
| "loss": 3.0416, |
| "step": 1039000 |
| }, |
| { |
| "epoch": 8.52, |
| "learning_rate": 6.615101349685184e-06, |
| "loss": 3.042, |
| "step": 1039500 |
| }, |
| { |
| "epoch": 8.52, |
| "learning_rate": 6.613457396965264e-06, |
| "loss": 3.042, |
| "step": 1040000 |
| }, |
| { |
| "epoch": 8.52, |
| "learning_rate": 6.611813444245344e-06, |
| "loss": 3.0503, |
| "step": 1040500 |
| }, |
| { |
| "epoch": 8.53, |
| "learning_rate": 6.610169491525424e-06, |
| "loss": 3.0503, |
| "step": 1041000 |
| }, |
| { |
| "epoch": 8.53, |
| "learning_rate": 6.608525538805505e-06, |
| "loss": 3.0391, |
| "step": 1041500 |
| }, |
| { |
| "epoch": 8.54, |
| "learning_rate": 6.606881586085585e-06, |
| "loss": 3.0456, |
| "step": 1042000 |
| }, |
| { |
| "epoch": 8.54, |
| "learning_rate": 6.605237633365665e-06, |
| "loss": 3.0408, |
| "step": 1042500 |
| }, |
| { |
| "epoch": 8.55, |
| "learning_rate": 6.6035936806457454e-06, |
| "loss": 3.0393, |
| "step": 1043000 |
| }, |
| { |
| "epoch": 8.55, |
| "learning_rate": 6.601949727925825e-06, |
| "loss": 3.048, |
| "step": 1043500 |
| }, |
| { |
| "epoch": 8.55, |
| "learning_rate": 6.600305775205905e-06, |
| "loss": 3.0361, |
| "step": 1044000 |
| }, |
| { |
| "epoch": 8.56, |
| "learning_rate": 6.598661822485986e-06, |
| "loss": 3.0415, |
| "step": 1044500 |
| }, |
| { |
| "epoch": 8.56, |
| "learning_rate": 6.597017869766066e-06, |
| "loss": 3.0402, |
| "step": 1045000 |
| }, |
| { |
| "epoch": 8.57, |
| "learning_rate": 6.595373917046146e-06, |
| "loss": 3.0475, |
| "step": 1045500 |
| }, |
| { |
| "epoch": 8.57, |
| "learning_rate": 6.593729964326226e-06, |
| "loss": 3.044, |
| "step": 1046000 |
| }, |
| { |
| "epoch": 8.57, |
| "learning_rate": 6.592086011606307e-06, |
| "loss": 3.0376, |
| "step": 1046500 |
| }, |
| { |
| "epoch": 8.58, |
| "learning_rate": 6.590442058886387e-06, |
| "loss": 3.0411, |
| "step": 1047000 |
| }, |
| { |
| "epoch": 8.58, |
| "learning_rate": 6.588798106166467e-06, |
| "loss": 3.0327, |
| "step": 1047500 |
| }, |
| { |
| "epoch": 8.59, |
| "learning_rate": 6.5871541534465475e-06, |
| "loss": 3.038, |
| "step": 1048000 |
| }, |
| { |
| "epoch": 8.59, |
| "learning_rate": 6.585510200726628e-06, |
| "loss": 3.0352, |
| "step": 1048500 |
| }, |
| { |
| "epoch": 8.59, |
| "learning_rate": 6.583866248006709e-06, |
| "loss": 3.0367, |
| "step": 1049000 |
| }, |
| { |
| "epoch": 8.6, |
| "learning_rate": 6.582222295286788e-06, |
| "loss": 3.0446, |
| "step": 1049500 |
| }, |
| { |
| "epoch": 8.6, |
| "learning_rate": 6.580578342566868e-06, |
| "loss": 3.0469, |
| "step": 1050000 |
| }, |
| { |
| "epoch": 8.6, |
| "eval_accuracy": 0.4838990642400208, |
| "eval_loss": 2.9014947414398193, |
| "eval_runtime": 404.752, |
| "eval_samples_per_second": 761.839, |
| "eval_steps_per_second": 15.874, |
| "step": 1050000 |
| }, |
| { |
| "epoch": 8.61, |
| "learning_rate": 6.578934389846948e-06, |
| "loss": 3.0436, |
| "step": 1050500 |
| }, |
| { |
| "epoch": 8.61, |
| "learning_rate": 6.577290437127028e-06, |
| "loss": 3.0516, |
| "step": 1051000 |
| }, |
| { |
| "epoch": 8.61, |
| "learning_rate": 6.5756464844071084e-06, |
| "loss": 3.046, |
| "step": 1051500 |
| }, |
| { |
| "epoch": 8.62, |
| "learning_rate": 6.574002531687189e-06, |
| "loss": 3.0435, |
| "step": 1052000 |
| }, |
| { |
| "epoch": 8.62, |
| "learning_rate": 6.5723585789672695e-06, |
| "loss": 3.0383, |
| "step": 1052500 |
| }, |
| { |
| "epoch": 8.63, |
| "learning_rate": 6.57071462624735e-06, |
| "loss": 3.0484, |
| "step": 1053000 |
| }, |
| { |
| "epoch": 8.63, |
| "learning_rate": 6.56907067352743e-06, |
| "loss": 3.0375, |
| "step": 1053500 |
| }, |
| { |
| "epoch": 8.64, |
| "learning_rate": 6.567426720807511e-06, |
| "loss": 3.0459, |
| "step": 1054000 |
| }, |
| { |
| "epoch": 8.64, |
| "learning_rate": 6.565782768087591e-06, |
| "loss": 3.0424, |
| "step": 1054500 |
| }, |
| { |
| "epoch": 8.64, |
| "learning_rate": 6.564138815367671e-06, |
| "loss": 3.0355, |
| "step": 1055000 |
| }, |
| { |
| "epoch": 8.65, |
| "learning_rate": 6.56249486264775e-06, |
| "loss": 3.0374, |
| "step": 1055500 |
| }, |
| { |
| "epoch": 8.65, |
| "learning_rate": 6.56085090992783e-06, |
| "loss": 3.0422, |
| "step": 1056000 |
| }, |
| { |
| "epoch": 8.66, |
| "learning_rate": 6.5592069572079105e-06, |
| "loss": 3.0372, |
| "step": 1056500 |
| }, |
| { |
| "epoch": 8.66, |
| "learning_rate": 6.5575630044879915e-06, |
| "loss": 3.0446, |
| "step": 1057000 |
| }, |
| { |
| "epoch": 8.66, |
| "learning_rate": 6.555919051768072e-06, |
| "loss": 3.0456, |
| "step": 1057500 |
| }, |
| { |
| "epoch": 8.67, |
| "learning_rate": 6.554275099048152e-06, |
| "loss": 3.0372, |
| "step": 1058000 |
| }, |
| { |
| "epoch": 8.67, |
| "learning_rate": 6.552631146328232e-06, |
| "loss": 3.0409, |
| "step": 1058500 |
| }, |
| { |
| "epoch": 8.68, |
| "learning_rate": 6.550987193608313e-06, |
| "loss": 3.0376, |
| "step": 1059000 |
| }, |
| { |
| "epoch": 8.68, |
| "learning_rate": 6.549343240888393e-06, |
| "loss": 3.0434, |
| "step": 1059500 |
| }, |
| { |
| "epoch": 8.68, |
| "learning_rate": 6.547699288168473e-06, |
| "loss": 3.0451, |
| "step": 1060000 |
| }, |
| { |
| "epoch": 8.69, |
| "learning_rate": 6.546055335448553e-06, |
| "loss": 3.0341, |
| "step": 1060500 |
| }, |
| { |
| "epoch": 8.69, |
| "learning_rate": 6.544411382728633e-06, |
| "loss": 3.0415, |
| "step": 1061000 |
| }, |
| { |
| "epoch": 8.7, |
| "learning_rate": 6.542767430008713e-06, |
| "loss": 3.0404, |
| "step": 1061500 |
| }, |
| { |
| "epoch": 8.7, |
| "learning_rate": 6.541123477288794e-06, |
| "loss": 3.0347, |
| "step": 1062000 |
| }, |
| { |
| "epoch": 8.7, |
| "learning_rate": 6.539479524568874e-06, |
| "loss": 3.0389, |
| "step": 1062500 |
| }, |
| { |
| "epoch": 8.71, |
| "learning_rate": 6.537835571848954e-06, |
| "loss": 3.044, |
| "step": 1063000 |
| }, |
| { |
| "epoch": 8.71, |
| "learning_rate": 6.536191619129034e-06, |
| "loss": 3.0347, |
| "step": 1063500 |
| }, |
| { |
| "epoch": 8.72, |
| "learning_rate": 6.534547666409114e-06, |
| "loss": 3.0444, |
| "step": 1064000 |
| }, |
| { |
| "epoch": 8.72, |
| "learning_rate": 6.532903713689195e-06, |
| "loss": 3.0326, |
| "step": 1064500 |
| }, |
| { |
| "epoch": 8.73, |
| "learning_rate": 6.531259760969275e-06, |
| "loss": 3.0371, |
| "step": 1065000 |
| }, |
| { |
| "epoch": 8.73, |
| "learning_rate": 6.529615808249355e-06, |
| "loss": 3.0326, |
| "step": 1065500 |
| }, |
| { |
| "epoch": 8.73, |
| "learning_rate": 6.5279718555294355e-06, |
| "loss": 3.0373, |
| "step": 1066000 |
| }, |
| { |
| "epoch": 8.74, |
| "learning_rate": 6.5263279028095165e-06, |
| "loss": 3.0276, |
| "step": 1066500 |
| }, |
| { |
| "epoch": 8.74, |
| "learning_rate": 6.524683950089597e-06, |
| "loss": 3.0328, |
| "step": 1067000 |
| }, |
| { |
| "epoch": 8.75, |
| "learning_rate": 6.523039997369676e-06, |
| "loss": 3.0332, |
| "step": 1067500 |
| }, |
| { |
| "epoch": 8.75, |
| "learning_rate": 6.521396044649756e-06, |
| "loss": 3.0334, |
| "step": 1068000 |
| }, |
| { |
| "epoch": 8.75, |
| "learning_rate": 6.519752091929836e-06, |
| "loss": 3.0387, |
| "step": 1068500 |
| }, |
| { |
| "epoch": 8.76, |
| "learning_rate": 6.518108139209916e-06, |
| "loss": 3.0343, |
| "step": 1069000 |
| }, |
| { |
| "epoch": 8.76, |
| "learning_rate": 6.516464186489997e-06, |
| "loss": 3.0377, |
| "step": 1069500 |
| }, |
| { |
| "epoch": 8.77, |
| "learning_rate": 6.514820233770077e-06, |
| "loss": 3.0383, |
| "step": 1070000 |
| }, |
| { |
| "epoch": 8.77, |
| "learning_rate": 6.5131762810501575e-06, |
| "loss": 3.0366, |
| "step": 1070500 |
| }, |
| { |
| "epoch": 8.77, |
| "learning_rate": 6.511532328330238e-06, |
| "loss": 3.0359, |
| "step": 1071000 |
| }, |
| { |
| "epoch": 8.78, |
| "learning_rate": 6.5098883756103186e-06, |
| "loss": 3.0398, |
| "step": 1071500 |
| }, |
| { |
| "epoch": 8.78, |
| "learning_rate": 6.508244422890399e-06, |
| "loss": 3.037, |
| "step": 1072000 |
| }, |
| { |
| "epoch": 8.79, |
| "learning_rate": 6.506600470170479e-06, |
| "loss": 3.0314, |
| "step": 1072500 |
| }, |
| { |
| "epoch": 8.79, |
| "learning_rate": 6.504956517450559e-06, |
| "loss": 3.0387, |
| "step": 1073000 |
| }, |
| { |
| "epoch": 8.79, |
| "learning_rate": 6.503312564730639e-06, |
| "loss": 3.0297, |
| "step": 1073500 |
| }, |
| { |
| "epoch": 8.8, |
| "learning_rate": 6.501668612010718e-06, |
| "loss": 3.0405, |
| "step": 1074000 |
| }, |
| { |
| "epoch": 8.8, |
| "learning_rate": 6.500024659290799e-06, |
| "loss": 3.0421, |
| "step": 1074500 |
| }, |
| { |
| "epoch": 8.81, |
| "learning_rate": 6.4983807065708795e-06, |
| "loss": 3.0287, |
| "step": 1075000 |
| }, |
| { |
| "epoch": 8.81, |
| "learning_rate": 6.49673675385096e-06, |
| "loss": 3.0283, |
| "step": 1075500 |
| }, |
| { |
| "epoch": 8.82, |
| "learning_rate": 6.49509280113104e-06, |
| "loss": 3.0377, |
| "step": 1076000 |
| }, |
| { |
| "epoch": 8.82, |
| "learning_rate": 6.49344884841112e-06, |
| "loss": 3.0367, |
| "step": 1076500 |
| }, |
| { |
| "epoch": 8.82, |
| "learning_rate": 6.491804895691201e-06, |
| "loss": 3.0373, |
| "step": 1077000 |
| }, |
| { |
| "epoch": 8.83, |
| "learning_rate": 6.490160942971281e-06, |
| "loss": 3.0327, |
| "step": 1077500 |
| }, |
| { |
| "epoch": 8.83, |
| "learning_rate": 6.488516990251361e-06, |
| "loss": 3.0261, |
| "step": 1078000 |
| }, |
| { |
| "epoch": 8.84, |
| "learning_rate": 6.486873037531441e-06, |
| "loss": 3.0347, |
| "step": 1078500 |
| }, |
| { |
| "epoch": 8.84, |
| "learning_rate": 6.485229084811522e-06, |
| "loss": 3.0382, |
| "step": 1079000 |
| }, |
| { |
| "epoch": 8.84, |
| "learning_rate": 6.483585132091602e-06, |
| "loss": 3.0369, |
| "step": 1079500 |
| }, |
| { |
| "epoch": 8.85, |
| "learning_rate": 6.4819411793716816e-06, |
| "loss": 3.0377, |
| "step": 1080000 |
| }, |
| { |
| "epoch": 8.85, |
| "eval_accuracy": 0.4847903545689289, |
| "eval_loss": 2.895965814590454, |
| "eval_runtime": 407.6957, |
| "eval_samples_per_second": 756.339, |
| "eval_steps_per_second": 15.759, |
| "step": 1080000 |
| }, |
| { |
| "epoch": 8.85, |
| "learning_rate": 6.480297226651762e-06, |
| "loss": 3.0378, |
| "step": 1080500 |
| }, |
| { |
| "epoch": 8.86, |
| "learning_rate": 6.478653273931842e-06, |
| "loss": 3.0374, |
| "step": 1081000 |
| }, |
| { |
| "epoch": 8.86, |
| "learning_rate": 6.477009321211922e-06, |
| "loss": 3.0315, |
| "step": 1081500 |
| }, |
| { |
| "epoch": 8.86, |
| "learning_rate": 6.475365368492003e-06, |
| "loss": 3.032, |
| "step": 1082000 |
| }, |
| { |
| "epoch": 8.87, |
| "learning_rate": 6.473721415772083e-06, |
| "loss": 3.0362, |
| "step": 1082500 |
| }, |
| { |
| "epoch": 8.87, |
| "learning_rate": 6.472077463052163e-06, |
| "loss": 3.0368, |
| "step": 1083000 |
| }, |
| { |
| "epoch": 8.88, |
| "learning_rate": 6.470433510332243e-06, |
| "loss": 3.0297, |
| "step": 1083500 |
| }, |
| { |
| "epoch": 8.88, |
| "learning_rate": 6.468789557612324e-06, |
| "loss": 3.031, |
| "step": 1084000 |
| }, |
| { |
| "epoch": 8.89, |
| "learning_rate": 6.467145604892404e-06, |
| "loss": 3.0321, |
| "step": 1084500 |
| }, |
| { |
| "epoch": 8.89, |
| "learning_rate": 6.4655016521724845e-06, |
| "loss": 3.0335, |
| "step": 1085000 |
| }, |
| { |
| "epoch": 8.89, |
| "learning_rate": 6.463857699452565e-06, |
| "loss": 3.0344, |
| "step": 1085500 |
| }, |
| { |
| "epoch": 8.9, |
| "learning_rate": 6.462213746732644e-06, |
| "loss": 3.0378, |
| "step": 1086000 |
| }, |
| { |
| "epoch": 8.9, |
| "learning_rate": 6.460569794012724e-06, |
| "loss": 3.0306, |
| "step": 1086500 |
| }, |
| { |
| "epoch": 8.91, |
| "learning_rate": 6.458925841292805e-06, |
| "loss": 3.0363, |
| "step": 1087000 |
| }, |
| { |
| "epoch": 8.91, |
| "learning_rate": 6.457281888572885e-06, |
| "loss": 3.0303, |
| "step": 1087500 |
| }, |
| { |
| "epoch": 8.91, |
| "learning_rate": 6.455637935852965e-06, |
| "loss": 3.0305, |
| "step": 1088000 |
| }, |
| { |
| "epoch": 8.92, |
| "learning_rate": 6.453993983133045e-06, |
| "loss": 3.0352, |
| "step": 1088500 |
| }, |
| { |
| "epoch": 8.92, |
| "learning_rate": 6.4523500304131255e-06, |
| "loss": 3.0338, |
| "step": 1089000 |
| }, |
| { |
| "epoch": 8.93, |
| "learning_rate": 6.4507060776932065e-06, |
| "loss": 3.036, |
| "step": 1089500 |
| }, |
| { |
| "epoch": 8.93, |
| "learning_rate": 6.449062124973287e-06, |
| "loss": 3.0388, |
| "step": 1090000 |
| }, |
| { |
| "epoch": 8.93, |
| "learning_rate": 6.447418172253367e-06, |
| "loss": 3.0292, |
| "step": 1090500 |
| }, |
| { |
| "epoch": 8.94, |
| "learning_rate": 6.445774219533447e-06, |
| "loss": 3.0351, |
| "step": 1091000 |
| }, |
| { |
| "epoch": 8.94, |
| "learning_rate": 6.444130266813528e-06, |
| "loss": 3.0325, |
| "step": 1091500 |
| }, |
| { |
| "epoch": 8.95, |
| "learning_rate": 6.442486314093606e-06, |
| "loss": 3.0258, |
| "step": 1092000 |
| }, |
| { |
| "epoch": 8.95, |
| "learning_rate": 6.440842361373687e-06, |
| "loss": 3.0293, |
| "step": 1092500 |
| }, |
| { |
| "epoch": 8.95, |
| "learning_rate": 6.439198408653767e-06, |
| "loss": 3.0273, |
| "step": 1093000 |
| }, |
| { |
| "epoch": 8.96, |
| "learning_rate": 6.4375544559338475e-06, |
| "loss": 3.0372, |
| "step": 1093500 |
| }, |
| { |
| "epoch": 8.96, |
| "learning_rate": 6.435910503213928e-06, |
| "loss": 3.0329, |
| "step": 1094000 |
| }, |
| { |
| "epoch": 8.97, |
| "learning_rate": 6.434266550494009e-06, |
| "loss": 3.0329, |
| "step": 1094500 |
| }, |
| { |
| "epoch": 8.97, |
| "learning_rate": 6.432622597774089e-06, |
| "loss": 3.0295, |
| "step": 1095000 |
| }, |
| { |
| "epoch": 8.98, |
| "learning_rate": 6.430978645054169e-06, |
| "loss": 3.0264, |
| "step": 1095500 |
| }, |
| { |
| "epoch": 8.98, |
| "learning_rate": 6.429334692334249e-06, |
| "loss": 3.028, |
| "step": 1096000 |
| }, |
| { |
| "epoch": 8.98, |
| "learning_rate": 6.427690739614329e-06, |
| "loss": 3.0408, |
| "step": 1096500 |
| }, |
| { |
| "epoch": 8.99, |
| "learning_rate": 6.42604678689441e-06, |
| "loss": 3.0269, |
| "step": 1097000 |
| }, |
| { |
| "epoch": 8.99, |
| "learning_rate": 6.42440283417449e-06, |
| "loss": 3.0304, |
| "step": 1097500 |
| }, |
| { |
| "epoch": 9.0, |
| "learning_rate": 6.4227588814545695e-06, |
| "loss": 3.0328, |
| "step": 1098000 |
| }, |
| { |
| "epoch": 9.0, |
| "learning_rate": 6.42111492873465e-06, |
| "loss": 3.0254, |
| "step": 1098500 |
| }, |
| { |
| "epoch": 9.0, |
| "learning_rate": 6.41947097601473e-06, |
| "loss": 3.0365, |
| "step": 1099000 |
| }, |
| { |
| "epoch": 9.01, |
| "learning_rate": 6.417827023294811e-06, |
| "loss": 3.0218, |
| "step": 1099500 |
| }, |
| { |
| "epoch": 9.01, |
| "learning_rate": 6.416183070574891e-06, |
| "loss": 3.0276, |
| "step": 1100000 |
| }, |
| { |
| "epoch": 9.02, |
| "learning_rate": 6.414539117854971e-06, |
| "loss": 3.0299, |
| "step": 1100500 |
| }, |
| { |
| "epoch": 9.02, |
| "learning_rate": 6.412895165135051e-06, |
| "loss": 3.0222, |
| "step": 1101000 |
| }, |
| { |
| "epoch": 9.02, |
| "learning_rate": 6.411251212415131e-06, |
| "loss": 3.0279, |
| "step": 1101500 |
| }, |
| { |
| "epoch": 9.03, |
| "learning_rate": 6.409607259695212e-06, |
| "loss": 3.0317, |
| "step": 1102000 |
| }, |
| { |
| "epoch": 9.03, |
| "learning_rate": 6.407963306975292e-06, |
| "loss": 3.0244, |
| "step": 1102500 |
| }, |
| { |
| "epoch": 9.04, |
| "learning_rate": 6.4063193542553725e-06, |
| "loss": 3.0241, |
| "step": 1103000 |
| }, |
| { |
| "epoch": 9.04, |
| "learning_rate": 6.404675401535453e-06, |
| "loss": 3.0223, |
| "step": 1103500 |
| }, |
| { |
| "epoch": 9.04, |
| "learning_rate": 6.403031448815532e-06, |
| "loss": 3.0291, |
| "step": 1104000 |
| }, |
| { |
| "epoch": 9.05, |
| "learning_rate": 6.401387496095612e-06, |
| "loss": 3.0236, |
| "step": 1104500 |
| }, |
| { |
| "epoch": 9.05, |
| "learning_rate": 6.399743543375693e-06, |
| "loss": 3.0248, |
| "step": 1105000 |
| }, |
| { |
| "epoch": 9.06, |
| "learning_rate": 6.398099590655773e-06, |
| "loss": 3.0271, |
| "step": 1105500 |
| }, |
| { |
| "epoch": 9.06, |
| "learning_rate": 6.396455637935853e-06, |
| "loss": 3.0282, |
| "step": 1106000 |
| }, |
| { |
| "epoch": 9.07, |
| "learning_rate": 6.394811685215933e-06, |
| "loss": 3.0316, |
| "step": 1106500 |
| }, |
| { |
| "epoch": 9.07, |
| "learning_rate": 6.393167732496014e-06, |
| "loss": 3.0257, |
| "step": 1107000 |
| }, |
| { |
| "epoch": 9.07, |
| "learning_rate": 6.3915237797760945e-06, |
| "loss": 3.0205, |
| "step": 1107500 |
| }, |
| { |
| "epoch": 9.08, |
| "learning_rate": 6.389879827056175e-06, |
| "loss": 3.0294, |
| "step": 1108000 |
| }, |
| { |
| "epoch": 9.08, |
| "learning_rate": 6.388235874336255e-06, |
| "loss": 3.0295, |
| "step": 1108500 |
| }, |
| { |
| "epoch": 9.09, |
| "learning_rate": 6.386591921616335e-06, |
| "loss": 3.0257, |
| "step": 1109000 |
| }, |
| { |
| "epoch": 9.09, |
| "learning_rate": 6.384947968896416e-06, |
| "loss": 3.0275, |
| "step": 1109500 |
| }, |
| { |
| "epoch": 9.09, |
| "learning_rate": 6.383304016176495e-06, |
| "loss": 3.0284, |
| "step": 1110000 |
| }, |
| { |
| "epoch": 9.09, |
| "eval_accuracy": 0.48608889348818574, |
| "eval_loss": 2.885928153991699, |
| "eval_runtime": 407.7381, |
| "eval_samples_per_second": 756.26, |
| "eval_steps_per_second": 15.758, |
| "step": 1110000 |
| }, |
| { |
| "epoch": 9.1, |
| "learning_rate": 6.381660063456575e-06, |
| "loss": 3.0216, |
| "step": 1110500 |
| }, |
| { |
| "epoch": 9.1, |
| "learning_rate": 6.380016110736655e-06, |
| "loss": 3.0199, |
| "step": 1111000 |
| }, |
| { |
| "epoch": 9.11, |
| "learning_rate": 6.3783721580167355e-06, |
| "loss": 3.0232, |
| "step": 1111500 |
| }, |
| { |
| "epoch": 9.11, |
| "learning_rate": 6.3767282052968164e-06, |
| "loss": 3.0248, |
| "step": 1112000 |
| }, |
| { |
| "epoch": 9.11, |
| "learning_rate": 6.3750842525768966e-06, |
| "loss": 3.0276, |
| "step": 1112500 |
| }, |
| { |
| "epoch": 9.12, |
| "learning_rate": 6.373440299856977e-06, |
| "loss": 3.0284, |
| "step": 1113000 |
| }, |
| { |
| "epoch": 9.12, |
| "learning_rate": 6.371796347137057e-06, |
| "loss": 3.0302, |
| "step": 1113500 |
| }, |
| { |
| "epoch": 9.13, |
| "learning_rate": 6.370152394417137e-06, |
| "loss": 3.0274, |
| "step": 1114000 |
| }, |
| { |
| "epoch": 9.13, |
| "learning_rate": 6.368508441697218e-06, |
| "loss": 3.0335, |
| "step": 1114500 |
| }, |
| { |
| "epoch": 9.14, |
| "learning_rate": 6.366864488977298e-06, |
| "loss": 3.0209, |
| "step": 1115000 |
| }, |
| { |
| "epoch": 9.14, |
| "learning_rate": 6.365220536257378e-06, |
| "loss": 3.0297, |
| "step": 1115500 |
| }, |
| { |
| "epoch": 9.14, |
| "learning_rate": 6.3635765835374575e-06, |
| "loss": 3.0267, |
| "step": 1116000 |
| }, |
| { |
| "epoch": 9.15, |
| "learning_rate": 6.361932630817538e-06, |
| "loss": 3.0261, |
| "step": 1116500 |
| }, |
| { |
| "epoch": 9.15, |
| "learning_rate": 6.360288678097618e-06, |
| "loss": 3.0273, |
| "step": 1117000 |
| }, |
| { |
| "epoch": 9.16, |
| "learning_rate": 6.358644725377699e-06, |
| "loss": 3.0163, |
| "step": 1117500 |
| }, |
| { |
| "epoch": 9.16, |
| "learning_rate": 6.357000772657779e-06, |
| "loss": 3.0258, |
| "step": 1118000 |
| }, |
| { |
| "epoch": 9.16, |
| "learning_rate": 6.355356819937859e-06, |
| "loss": 3.0321, |
| "step": 1118500 |
| }, |
| { |
| "epoch": 9.17, |
| "learning_rate": 6.353712867217939e-06, |
| "loss": 3.0181, |
| "step": 1119000 |
| }, |
| { |
| "epoch": 9.17, |
| "learning_rate": 6.35206891449802e-06, |
| "loss": 3.0194, |
| "step": 1119500 |
| }, |
| { |
| "epoch": 9.18, |
| "learning_rate": 6.3504249617781e-06, |
| "loss": 3.0279, |
| "step": 1120000 |
| }, |
| { |
| "epoch": 9.18, |
| "learning_rate": 6.34878100905818e-06, |
| "loss": 3.0236, |
| "step": 1120500 |
| }, |
| { |
| "epoch": 9.18, |
| "learning_rate": 6.34713705633826e-06, |
| "loss": 3.0279, |
| "step": 1121000 |
| }, |
| { |
| "epoch": 9.19, |
| "learning_rate": 6.3454931036183405e-06, |
| "loss": 3.026, |
| "step": 1121500 |
| }, |
| { |
| "epoch": 9.19, |
| "learning_rate": 6.34384915089842e-06, |
| "loss": 3.0217, |
| "step": 1122000 |
| }, |
| { |
| "epoch": 9.2, |
| "learning_rate": 6.342205198178501e-06, |
| "loss": 3.0299, |
| "step": 1122500 |
| }, |
| { |
| "epoch": 9.2, |
| "learning_rate": 6.340561245458581e-06, |
| "loss": 3.0203, |
| "step": 1123000 |
| }, |
| { |
| "epoch": 9.2, |
| "learning_rate": 6.338917292738661e-06, |
| "loss": 3.0224, |
| "step": 1123500 |
| }, |
| { |
| "epoch": 9.21, |
| "learning_rate": 6.337273340018741e-06, |
| "loss": 3.0184, |
| "step": 1124000 |
| }, |
| { |
| "epoch": 9.21, |
| "learning_rate": 6.335629387298821e-06, |
| "loss": 3.0215, |
| "step": 1124500 |
| }, |
| { |
| "epoch": 9.22, |
| "learning_rate": 6.333985434578902e-06, |
| "loss": 3.0261, |
| "step": 1125000 |
| }, |
| { |
| "epoch": 9.22, |
| "learning_rate": 6.332341481858982e-06, |
| "loss": 3.0242, |
| "step": 1125500 |
| }, |
| { |
| "epoch": 9.23, |
| "learning_rate": 6.3306975291390625e-06, |
| "loss": 3.0246, |
| "step": 1126000 |
| }, |
| { |
| "epoch": 9.23, |
| "learning_rate": 6.329053576419143e-06, |
| "loss": 3.0327, |
| "step": 1126500 |
| }, |
| { |
| "epoch": 9.23, |
| "learning_rate": 6.327409623699224e-06, |
| "loss": 3.0222, |
| "step": 1127000 |
| }, |
| { |
| "epoch": 9.24, |
| "learning_rate": 6.325765670979304e-06, |
| "loss": 3.0257, |
| "step": 1127500 |
| }, |
| { |
| "epoch": 9.24, |
| "learning_rate": 6.324121718259384e-06, |
| "loss": 3.0228, |
| "step": 1128000 |
| }, |
| { |
| "epoch": 9.25, |
| "learning_rate": 6.322477765539463e-06, |
| "loss": 3.0246, |
| "step": 1128500 |
| }, |
| { |
| "epoch": 9.25, |
| "learning_rate": 6.320833812819543e-06, |
| "loss": 3.0205, |
| "step": 1129000 |
| }, |
| { |
| "epoch": 9.25, |
| "learning_rate": 6.319189860099623e-06, |
| "loss": 3.0198, |
| "step": 1129500 |
| }, |
| { |
| "epoch": 9.26, |
| "learning_rate": 6.317545907379704e-06, |
| "loss": 3.0227, |
| "step": 1130000 |
| }, |
| { |
| "epoch": 9.26, |
| "learning_rate": 6.3159019546597845e-06, |
| "loss": 3.0205, |
| "step": 1130500 |
| }, |
| { |
| "epoch": 9.27, |
| "learning_rate": 6.314258001939865e-06, |
| "loss": 3.0218, |
| "step": 1131000 |
| }, |
| { |
| "epoch": 9.27, |
| "learning_rate": 6.312614049219945e-06, |
| "loss": 3.0221, |
| "step": 1131500 |
| }, |
| { |
| "epoch": 9.27, |
| "learning_rate": 6.310970096500026e-06, |
| "loss": 3.017, |
| "step": 1132000 |
| }, |
| { |
| "epoch": 9.28, |
| "learning_rate": 6.309326143780106e-06, |
| "loss": 3.0187, |
| "step": 1132500 |
| }, |
| { |
| "epoch": 9.28, |
| "learning_rate": 6.307682191060186e-06, |
| "loss": 3.0173, |
| "step": 1133000 |
| }, |
| { |
| "epoch": 9.29, |
| "learning_rate": 6.306038238340266e-06, |
| "loss": 3.0229, |
| "step": 1133500 |
| }, |
| { |
| "epoch": 9.29, |
| "learning_rate": 6.304394285620346e-06, |
| "loss": 3.0254, |
| "step": 1134000 |
| }, |
| { |
| "epoch": 9.29, |
| "learning_rate": 6.3027503329004255e-06, |
| "loss": 3.0292, |
| "step": 1134500 |
| }, |
| { |
| "epoch": 9.3, |
| "learning_rate": 6.3011063801805065e-06, |
| "loss": 3.0172, |
| "step": 1135000 |
| }, |
| { |
| "epoch": 9.3, |
| "learning_rate": 6.299462427460587e-06, |
| "loss": 3.0233, |
| "step": 1135500 |
| }, |
| { |
| "epoch": 9.31, |
| "learning_rate": 6.297818474740667e-06, |
| "loss": 3.0245, |
| "step": 1136000 |
| }, |
| { |
| "epoch": 9.31, |
| "learning_rate": 6.296174522020747e-06, |
| "loss": 3.0165, |
| "step": 1136500 |
| }, |
| { |
| "epoch": 9.32, |
| "learning_rate": 6.294530569300827e-06, |
| "loss": 3.0161, |
| "step": 1137000 |
| }, |
| { |
| "epoch": 9.32, |
| "learning_rate": 6.292886616580908e-06, |
| "loss": 3.017, |
| "step": 1137500 |
| }, |
| { |
| "epoch": 9.32, |
| "learning_rate": 6.291242663860988e-06, |
| "loss": 3.0244, |
| "step": 1138000 |
| }, |
| { |
| "epoch": 9.33, |
| "learning_rate": 6.289598711141068e-06, |
| "loss": 3.0189, |
| "step": 1138500 |
| }, |
| { |
| "epoch": 9.33, |
| "learning_rate": 6.287954758421148e-06, |
| "loss": 3.0196, |
| "step": 1139000 |
| }, |
| { |
| "epoch": 9.34, |
| "learning_rate": 6.286310805701229e-06, |
| "loss": 3.0169, |
| "step": 1139500 |
| }, |
| { |
| "epoch": 9.34, |
| "learning_rate": 6.2846668529813095e-06, |
| "loss": 3.0224, |
| "step": 1140000 |
| }, |
| { |
| "epoch": 9.34, |
| "eval_accuracy": 0.4867489885373119, |
| "eval_loss": 2.881913185119629, |
| "eval_runtime": 406.5724, |
| "eval_samples_per_second": 758.428, |
| "eval_steps_per_second": 15.803, |
| "step": 1140000 |
| }, |
| { |
| "epoch": 9.34, |
| "learning_rate": 6.283022900261389e-06, |
| "loss": 3.0198, |
| "step": 1140500 |
| }, |
| { |
| "epoch": 9.35, |
| "learning_rate": 6.281378947541469e-06, |
| "loss": 3.0248, |
| "step": 1141000 |
| }, |
| { |
| "epoch": 9.35, |
| "learning_rate": 6.279734994821549e-06, |
| "loss": 3.0256, |
| "step": 1141500 |
| }, |
| { |
| "epoch": 9.36, |
| "learning_rate": 6.278091042101629e-06, |
| "loss": 3.0153, |
| "step": 1142000 |
| }, |
| { |
| "epoch": 9.36, |
| "learning_rate": 6.27644708938171e-06, |
| "loss": 3.0159, |
| "step": 1142500 |
| }, |
| { |
| "epoch": 9.36, |
| "learning_rate": 6.27480313666179e-06, |
| "loss": 3.0145, |
| "step": 1143000 |
| }, |
| { |
| "epoch": 9.37, |
| "learning_rate": 6.27315918394187e-06, |
| "loss": 3.0236, |
| "step": 1143500 |
| }, |
| { |
| "epoch": 9.37, |
| "learning_rate": 6.2715152312219505e-06, |
| "loss": 3.0184, |
| "step": 1144000 |
| }, |
| { |
| "epoch": 9.38, |
| "learning_rate": 6.2698712785020314e-06, |
| "loss": 3.0168, |
| "step": 1144500 |
| }, |
| { |
| "epoch": 9.38, |
| "learning_rate": 6.2682273257821116e-06, |
| "loss": 3.0201, |
| "step": 1145000 |
| }, |
| { |
| "epoch": 9.38, |
| "learning_rate": 6.266583373062192e-06, |
| "loss": 3.0186, |
| "step": 1145500 |
| }, |
| { |
| "epoch": 9.39, |
| "learning_rate": 6.264939420342272e-06, |
| "loss": 3.0217, |
| "step": 1146000 |
| }, |
| { |
| "epoch": 9.39, |
| "learning_rate": 6.263295467622351e-06, |
| "loss": 3.0154, |
| "step": 1146500 |
| }, |
| { |
| "epoch": 9.4, |
| "learning_rate": 6.261651514902431e-06, |
| "loss": 3.0197, |
| "step": 1147000 |
| }, |
| { |
| "epoch": 9.4, |
| "learning_rate": 6.260007562182512e-06, |
| "loss": 3.0149, |
| "step": 1147500 |
| }, |
| { |
| "epoch": 9.41, |
| "learning_rate": 6.258363609462592e-06, |
| "loss": 3.0203, |
| "step": 1148000 |
| }, |
| { |
| "epoch": 9.41, |
| "learning_rate": 6.2567196567426725e-06, |
| "loss": 3.0206, |
| "step": 1148500 |
| }, |
| { |
| "epoch": 9.41, |
| "learning_rate": 6.255075704022753e-06, |
| "loss": 3.0151, |
| "step": 1149000 |
| }, |
| { |
| "epoch": 9.42, |
| "learning_rate": 6.253431751302833e-06, |
| "loss": 3.0192, |
| "step": 1149500 |
| }, |
| { |
| "epoch": 9.42, |
| "learning_rate": 6.251787798582914e-06, |
| "loss": 3.0161, |
| "step": 1150000 |
| }, |
| { |
| "epoch": 9.43, |
| "learning_rate": 6.250143845862994e-06, |
| "loss": 3.0224, |
| "step": 1150500 |
| }, |
| { |
| "epoch": 9.43, |
| "learning_rate": 6.248499893143074e-06, |
| "loss": 3.02, |
| "step": 1151000 |
| }, |
| { |
| "epoch": 9.43, |
| "learning_rate": 6.246855940423154e-06, |
| "loss": 3.0231, |
| "step": 1151500 |
| }, |
| { |
| "epoch": 9.44, |
| "learning_rate": 6.245211987703235e-06, |
| "loss": 3.0199, |
| "step": 1152000 |
| }, |
| { |
| "epoch": 9.44, |
| "learning_rate": 6.2435680349833135e-06, |
| "loss": 3.0159, |
| "step": 1152500 |
| }, |
| { |
| "epoch": 9.45, |
| "learning_rate": 6.2419240822633944e-06, |
| "loss": 3.0192, |
| "step": 1153000 |
| }, |
| { |
| "epoch": 9.45, |
| "learning_rate": 6.2402801295434746e-06, |
| "loss": 3.0151, |
| "step": 1153500 |
| }, |
| { |
| "epoch": 9.45, |
| "learning_rate": 6.238636176823555e-06, |
| "loss": 3.0198, |
| "step": 1154000 |
| }, |
| { |
| "epoch": 9.46, |
| "learning_rate": 6.236992224103635e-06, |
| "loss": 3.0142, |
| "step": 1154500 |
| }, |
| { |
| "epoch": 9.46, |
| "learning_rate": 6.235348271383716e-06, |
| "loss": 3.0177, |
| "step": 1155000 |
| }, |
| { |
| "epoch": 9.47, |
| "learning_rate": 6.233704318663796e-06, |
| "loss": 3.0148, |
| "step": 1155500 |
| }, |
| { |
| "epoch": 9.47, |
| "learning_rate": 6.232060365943876e-06, |
| "loss": 3.0101, |
| "step": 1156000 |
| }, |
| { |
| "epoch": 9.48, |
| "learning_rate": 6.230416413223956e-06, |
| "loss": 3.0113, |
| "step": 1156500 |
| }, |
| { |
| "epoch": 9.48, |
| "learning_rate": 6.228772460504036e-06, |
| "loss": 3.0084, |
| "step": 1157000 |
| }, |
| { |
| "epoch": 9.48, |
| "learning_rate": 6.227128507784117e-06, |
| "loss": 3.0133, |
| "step": 1157500 |
| }, |
| { |
| "epoch": 9.49, |
| "learning_rate": 6.225484555064197e-06, |
| "loss": 3.0143, |
| "step": 1158000 |
| }, |
| { |
| "epoch": 9.49, |
| "learning_rate": 6.223840602344277e-06, |
| "loss": 3.0167, |
| "step": 1158500 |
| }, |
| { |
| "epoch": 9.5, |
| "learning_rate": 6.222196649624357e-06, |
| "loss": 3.0136, |
| "step": 1159000 |
| }, |
| { |
| "epoch": 9.5, |
| "learning_rate": 6.220552696904437e-06, |
| "loss": 3.012, |
| "step": 1159500 |
| }, |
| { |
| "epoch": 9.5, |
| "learning_rate": 6.218908744184518e-06, |
| "loss": 3.012, |
| "step": 1160000 |
| }, |
| { |
| "epoch": 9.51, |
| "learning_rate": 6.217264791464598e-06, |
| "loss": 3.0111, |
| "step": 1160500 |
| }, |
| { |
| "epoch": 9.51, |
| "learning_rate": 6.215620838744678e-06, |
| "loss": 3.0147, |
| "step": 1161000 |
| }, |
| { |
| "epoch": 9.52, |
| "learning_rate": 6.213976886024758e-06, |
| "loss": 3.015, |
| "step": 1161500 |
| }, |
| { |
| "epoch": 9.52, |
| "learning_rate": 6.212332933304838e-06, |
| "loss": 3.017, |
| "step": 1162000 |
| }, |
| { |
| "epoch": 9.52, |
| "learning_rate": 6.210688980584919e-06, |
| "loss": 3.0108, |
| "step": 1162500 |
| }, |
| { |
| "epoch": 9.53, |
| "learning_rate": 6.2090450278649995e-06, |
| "loss": 3.0125, |
| "step": 1163000 |
| }, |
| { |
| "epoch": 9.53, |
| "learning_rate": 6.20740107514508e-06, |
| "loss": 3.0126, |
| "step": 1163500 |
| }, |
| { |
| "epoch": 9.54, |
| "learning_rate": 6.20575712242516e-06, |
| "loss": 3.0209, |
| "step": 1164000 |
| }, |
| { |
| "epoch": 9.54, |
| "learning_rate": 6.204113169705239e-06, |
| "loss": 3.0089, |
| "step": 1164500 |
| }, |
| { |
| "epoch": 9.54, |
| "learning_rate": 6.202469216985319e-06, |
| "loss": 3.0135, |
| "step": 1165000 |
| }, |
| { |
| "epoch": 9.55, |
| "learning_rate": 6.2008252642654e-06, |
| "loss": 3.011, |
| "step": 1165500 |
| }, |
| { |
| "epoch": 9.55, |
| "learning_rate": 6.19918131154548e-06, |
| "loss": 3.0125, |
| "step": 1166000 |
| }, |
| { |
| "epoch": 9.56, |
| "learning_rate": 6.19753735882556e-06, |
| "loss": 3.0127, |
| "step": 1166500 |
| }, |
| { |
| "epoch": 9.56, |
| "learning_rate": 6.1958934061056405e-06, |
| "loss": 3.0144, |
| "step": 1167000 |
| }, |
| { |
| "epoch": 9.57, |
| "learning_rate": 6.1942494533857215e-06, |
| "loss": 3.0174, |
| "step": 1167500 |
| }, |
| { |
| "epoch": 9.57, |
| "learning_rate": 6.192605500665802e-06, |
| "loss": 3.0088, |
| "step": 1168000 |
| }, |
| { |
| "epoch": 9.57, |
| "learning_rate": 6.190961547945882e-06, |
| "loss": 3.0135, |
| "step": 1168500 |
| }, |
| { |
| "epoch": 9.58, |
| "learning_rate": 6.189317595225962e-06, |
| "loss": 3.0141, |
| "step": 1169000 |
| }, |
| { |
| "epoch": 9.58, |
| "learning_rate": 6.187673642506042e-06, |
| "loss": 3.0075, |
| "step": 1169500 |
| }, |
| { |
| "epoch": 9.59, |
| "learning_rate": 6.186029689786123e-06, |
| "loss": 3.019, |
| "step": 1170000 |
| }, |
| { |
| "epoch": 9.59, |
| "eval_accuracy": 0.4878472453037176, |
| "eval_loss": 2.873086929321289, |
| "eval_runtime": 405.404, |
| "eval_samples_per_second": 760.614, |
| "eval_steps_per_second": 15.848, |
| "step": 1170000 |
| }, |
| { |
| "epoch": 9.59, |
| "learning_rate": 6.184385737066202e-06, |
| "loss": 3.0057, |
| "step": 1170500 |
| }, |
| { |
| "epoch": 9.59, |
| "learning_rate": 6.182741784346282e-06, |
| "loss": 3.0136, |
| "step": 1171000 |
| }, |
| { |
| "epoch": 9.6, |
| "learning_rate": 6.1810978316263625e-06, |
| "loss": 3.0157, |
| "step": 1171500 |
| }, |
| { |
| "epoch": 9.6, |
| "learning_rate": 6.179453878906443e-06, |
| "loss": 3.0087, |
| "step": 1172000 |
| }, |
| { |
| "epoch": 9.61, |
| "learning_rate": 6.177809926186524e-06, |
| "loss": 3.0223, |
| "step": 1172500 |
| }, |
| { |
| "epoch": 9.61, |
| "learning_rate": 6.176165973466604e-06, |
| "loss": 3.0064, |
| "step": 1173000 |
| }, |
| { |
| "epoch": 9.61, |
| "learning_rate": 6.174522020746684e-06, |
| "loss": 3.0084, |
| "step": 1173500 |
| }, |
| { |
| "epoch": 9.62, |
| "learning_rate": 6.172878068026764e-06, |
| "loss": 3.0118, |
| "step": 1174000 |
| }, |
| { |
| "epoch": 9.62, |
| "learning_rate": 6.171234115306844e-06, |
| "loss": 3.0115, |
| "step": 1174500 |
| }, |
| { |
| "epoch": 9.63, |
| "learning_rate": 6.169590162586925e-06, |
| "loss": 3.0137, |
| "step": 1175000 |
| }, |
| { |
| "epoch": 9.63, |
| "learning_rate": 6.167946209867005e-06, |
| "loss": 3.0106, |
| "step": 1175500 |
| }, |
| { |
| "epoch": 9.63, |
| "learning_rate": 6.166302257147085e-06, |
| "loss": 3.0104, |
| "step": 1176000 |
| }, |
| { |
| "epoch": 9.64, |
| "learning_rate": 6.1646583044271655e-06, |
| "loss": 3.0126, |
| "step": 1176500 |
| }, |
| { |
| "epoch": 9.64, |
| "learning_rate": 6.163014351707245e-06, |
| "loss": 3.0086, |
| "step": 1177000 |
| }, |
| { |
| "epoch": 9.65, |
| "learning_rate": 6.161370398987325e-06, |
| "loss": 3.0148, |
| "step": 1177500 |
| }, |
| { |
| "epoch": 9.65, |
| "learning_rate": 6.159726446267406e-06, |
| "loss": 3.0146, |
| "step": 1178000 |
| }, |
| { |
| "epoch": 9.66, |
| "learning_rate": 6.158082493547486e-06, |
| "loss": 3.0155, |
| "step": 1178500 |
| }, |
| { |
| "epoch": 9.66, |
| "learning_rate": 6.156438540827566e-06, |
| "loss": 3.012, |
| "step": 1179000 |
| }, |
| { |
| "epoch": 9.66, |
| "learning_rate": 6.154794588107646e-06, |
| "loss": 3.0118, |
| "step": 1179500 |
| }, |
| { |
| "epoch": 9.67, |
| "learning_rate": 6.153150635387727e-06, |
| "loss": 3.0086, |
| "step": 1180000 |
| }, |
| { |
| "epoch": 9.67, |
| "learning_rate": 6.151506682667807e-06, |
| "loss": 3.0139, |
| "step": 1180500 |
| }, |
| { |
| "epoch": 9.68, |
| "learning_rate": 6.1498627299478875e-06, |
| "loss": 3.0105, |
| "step": 1181000 |
| }, |
| { |
| "epoch": 9.68, |
| "learning_rate": 6.148218777227968e-06, |
| "loss": 3.012, |
| "step": 1181500 |
| }, |
| { |
| "epoch": 9.68, |
| "learning_rate": 6.146574824508048e-06, |
| "loss": 3.0096, |
| "step": 1182000 |
| }, |
| { |
| "epoch": 9.69, |
| "learning_rate": 6.144930871788129e-06, |
| "loss": 3.0109, |
| "step": 1182500 |
| }, |
| { |
| "epoch": 9.69, |
| "learning_rate": 6.143286919068208e-06, |
| "loss": 3.0119, |
| "step": 1183000 |
| }, |
| { |
| "epoch": 9.7, |
| "learning_rate": 6.141642966348288e-06, |
| "loss": 3.0086, |
| "step": 1183500 |
| }, |
| { |
| "epoch": 9.7, |
| "learning_rate": 6.139999013628368e-06, |
| "loss": 3.014, |
| "step": 1184000 |
| }, |
| { |
| "epoch": 9.7, |
| "learning_rate": 6.138355060908448e-06, |
| "loss": 3.0107, |
| "step": 1184500 |
| }, |
| { |
| "epoch": 9.71, |
| "learning_rate": 6.1367111081885285e-06, |
| "loss": 3.0039, |
| "step": 1185000 |
| }, |
| { |
| "epoch": 9.71, |
| "learning_rate": 6.1350671554686094e-06, |
| "loss": 3.0066, |
| "step": 1185500 |
| }, |
| { |
| "epoch": 9.72, |
| "learning_rate": 6.1334232027486896e-06, |
| "loss": 3.0103, |
| "step": 1186000 |
| }, |
| { |
| "epoch": 9.72, |
| "learning_rate": 6.13177925002877e-06, |
| "loss": 3.0094, |
| "step": 1186500 |
| }, |
| { |
| "epoch": 9.72, |
| "learning_rate": 6.13013529730885e-06, |
| "loss": 3.011, |
| "step": 1187000 |
| }, |
| { |
| "epoch": 9.73, |
| "learning_rate": 6.128491344588931e-06, |
| "loss": 3.0094, |
| "step": 1187500 |
| }, |
| { |
| "epoch": 9.73, |
| "learning_rate": 6.126847391869011e-06, |
| "loss": 3.0036, |
| "step": 1188000 |
| }, |
| { |
| "epoch": 9.74, |
| "learning_rate": 6.125203439149091e-06, |
| "loss": 3.0096, |
| "step": 1188500 |
| }, |
| { |
| "epoch": 9.74, |
| "learning_rate": 6.12355948642917e-06, |
| "loss": 3.0105, |
| "step": 1189000 |
| }, |
| { |
| "epoch": 9.75, |
| "learning_rate": 6.1219155337092504e-06, |
| "loss": 3.008, |
| "step": 1189500 |
| }, |
| { |
| "epoch": 9.75, |
| "learning_rate": 6.1202715809893306e-06, |
| "loss": 3.0032, |
| "step": 1190000 |
| }, |
| { |
| "epoch": 9.75, |
| "learning_rate": 6.1186276282694115e-06, |
| "loss": 3.0091, |
| "step": 1190500 |
| }, |
| { |
| "epoch": 9.76, |
| "learning_rate": 6.116983675549492e-06, |
| "loss": 3.0035, |
| "step": 1191000 |
| }, |
| { |
| "epoch": 9.76, |
| "learning_rate": 6.115339722829572e-06, |
| "loss": 3.0091, |
| "step": 1191500 |
| }, |
| { |
| "epoch": 9.77, |
| "learning_rate": 6.113695770109652e-06, |
| "loss": 3.0013, |
| "step": 1192000 |
| }, |
| { |
| "epoch": 9.77, |
| "learning_rate": 6.112051817389733e-06, |
| "loss": 3.0043, |
| "step": 1192500 |
| }, |
| { |
| "epoch": 9.77, |
| "learning_rate": 6.110407864669813e-06, |
| "loss": 3.0081, |
| "step": 1193000 |
| }, |
| { |
| "epoch": 9.78, |
| "learning_rate": 6.108763911949893e-06, |
| "loss": 3.0055, |
| "step": 1193500 |
| }, |
| { |
| "epoch": 9.78, |
| "learning_rate": 6.107119959229973e-06, |
| "loss": 3.011, |
| "step": 1194000 |
| }, |
| { |
| "epoch": 9.79, |
| "learning_rate": 6.105476006510053e-06, |
| "loss": 3.0094, |
| "step": 1194500 |
| }, |
| { |
| "epoch": 9.79, |
| "learning_rate": 6.103832053790133e-06, |
| "loss": 3.0133, |
| "step": 1195000 |
| }, |
| { |
| "epoch": 9.79, |
| "learning_rate": 6.102188101070214e-06, |
| "loss": 3.0032, |
| "step": 1195500 |
| }, |
| { |
| "epoch": 9.8, |
| "learning_rate": 6.100544148350294e-06, |
| "loss": 3.0075, |
| "step": 1196000 |
| }, |
| { |
| "epoch": 9.8, |
| "learning_rate": 6.098900195630374e-06, |
| "loss": 3.0091, |
| "step": 1196500 |
| }, |
| { |
| "epoch": 9.81, |
| "learning_rate": 6.097256242910454e-06, |
| "loss": 3.011, |
| "step": 1197000 |
| }, |
| { |
| "epoch": 9.81, |
| "learning_rate": 6.095612290190534e-06, |
| "loss": 3.0039, |
| "step": 1197500 |
| }, |
| { |
| "epoch": 9.82, |
| "learning_rate": 6.093968337470615e-06, |
| "loss": 3.0036, |
| "step": 1198000 |
| }, |
| { |
| "epoch": 9.82, |
| "learning_rate": 6.092324384750695e-06, |
| "loss": 3.0069, |
| "step": 1198500 |
| }, |
| { |
| "epoch": 9.82, |
| "learning_rate": 6.090680432030775e-06, |
| "loss": 3.0114, |
| "step": 1199000 |
| }, |
| { |
| "epoch": 9.83, |
| "learning_rate": 6.0890364793108555e-06, |
| "loss": 3.006, |
| "step": 1199500 |
| }, |
| { |
| "epoch": 9.83, |
| "learning_rate": 6.0873925265909365e-06, |
| "loss": 3.0094, |
| "step": 1200000 |
| }, |
| { |
| "epoch": 9.83, |
| "eval_accuracy": 0.48851002828245305, |
| "eval_loss": 2.8687474727630615, |
| "eval_runtime": 404.2357, |
| "eval_samples_per_second": 762.812, |
| "eval_steps_per_second": 15.894, |
| "step": 1200000 |
| }, |
| { |
| "epoch": 9.84, |
| "learning_rate": 6.085748573871017e-06, |
| "loss": 2.9995, |
| "step": 1200500 |
| }, |
| { |
| "epoch": 9.84, |
| "learning_rate": 6.084104621151096e-06, |
| "loss": 3.0002, |
| "step": 1201000 |
| }, |
| { |
| "epoch": 9.84, |
| "learning_rate": 6.082460668431176e-06, |
| "loss": 3.0047, |
| "step": 1201500 |
| }, |
| { |
| "epoch": 9.85, |
| "learning_rate": 6.080816715711256e-06, |
| "loss": 3.0093, |
| "step": 1202000 |
| }, |
| { |
| "epoch": 9.85, |
| "learning_rate": 6.079172762991336e-06, |
| "loss": 3.0018, |
| "step": 1202500 |
| }, |
| { |
| "epoch": 9.86, |
| "learning_rate": 6.077528810271417e-06, |
| "loss": 3.0067, |
| "step": 1203000 |
| }, |
| { |
| "epoch": 9.86, |
| "learning_rate": 6.075884857551497e-06, |
| "loss": 3.01, |
| "step": 1203500 |
| }, |
| { |
| "epoch": 9.86, |
| "learning_rate": 6.0742409048315775e-06, |
| "loss": 3.0108, |
| "step": 1204000 |
| }, |
| { |
| "epoch": 9.87, |
| "learning_rate": 6.072596952111658e-06, |
| "loss": 3.0089, |
| "step": 1204500 |
| }, |
| { |
| "epoch": 9.87, |
| "learning_rate": 6.070952999391739e-06, |
| "loss": 3.0047, |
| "step": 1205000 |
| }, |
| { |
| "epoch": 9.88, |
| "learning_rate": 6.069309046671819e-06, |
| "loss": 3.0057, |
| "step": 1205500 |
| }, |
| { |
| "epoch": 9.88, |
| "learning_rate": 6.067665093951899e-06, |
| "loss": 3.006, |
| "step": 1206000 |
| }, |
| { |
| "epoch": 9.88, |
| "learning_rate": 6.066021141231979e-06, |
| "loss": 3.0104, |
| "step": 1206500 |
| }, |
| { |
| "epoch": 9.89, |
| "learning_rate": 6.064377188512058e-06, |
| "loss": 3.0047, |
| "step": 1207000 |
| }, |
| { |
| "epoch": 9.89, |
| "learning_rate": 6.062733235792138e-06, |
| "loss": 3.0062, |
| "step": 1207500 |
| }, |
| { |
| "epoch": 9.9, |
| "learning_rate": 6.061089283072219e-06, |
| "loss": 3.0086, |
| "step": 1208000 |
| }, |
| { |
| "epoch": 9.9, |
| "learning_rate": 6.0594453303522995e-06, |
| "loss": 3.0032, |
| "step": 1208500 |
| }, |
| { |
| "epoch": 9.91, |
| "learning_rate": 6.05780137763238e-06, |
| "loss": 3.0042, |
| "step": 1209000 |
| }, |
| { |
| "epoch": 9.91, |
| "learning_rate": 6.05615742491246e-06, |
| "loss": 3.0064, |
| "step": 1209500 |
| }, |
| { |
| "epoch": 9.91, |
| "learning_rate": 6.05451347219254e-06, |
| "loss": 3.0005, |
| "step": 1210000 |
| }, |
| { |
| "epoch": 9.92, |
| "learning_rate": 6.052869519472621e-06, |
| "loss": 3.0043, |
| "step": 1210500 |
| }, |
| { |
| "epoch": 9.92, |
| "learning_rate": 6.051225566752701e-06, |
| "loss": 3.0002, |
| "step": 1211000 |
| }, |
| { |
| "epoch": 9.93, |
| "learning_rate": 6.049581614032781e-06, |
| "loss": 3.011, |
| "step": 1211500 |
| }, |
| { |
| "epoch": 9.93, |
| "learning_rate": 6.047937661312861e-06, |
| "loss": 3.0027, |
| "step": 1212000 |
| }, |
| { |
| "epoch": 9.93, |
| "learning_rate": 6.046293708592942e-06, |
| "loss": 3.0036, |
| "step": 1212500 |
| }, |
| { |
| "epoch": 9.94, |
| "learning_rate": 6.044649755873021e-06, |
| "loss": 3.0021, |
| "step": 1213000 |
| }, |
| { |
| "epoch": 9.94, |
| "learning_rate": 6.043005803153102e-06, |
| "loss": 2.9972, |
| "step": 1213500 |
| }, |
| { |
| "epoch": 9.95, |
| "learning_rate": 6.041361850433182e-06, |
| "loss": 3.0094, |
| "step": 1214000 |
| }, |
| { |
| "epoch": 9.95, |
| "learning_rate": 6.039717897713262e-06, |
| "loss": 3.0087, |
| "step": 1214500 |
| }, |
| { |
| "epoch": 9.95, |
| "learning_rate": 6.038073944993342e-06, |
| "loss": 3.0098, |
| "step": 1215000 |
| }, |
| { |
| "epoch": 9.96, |
| "learning_rate": 6.036429992273423e-06, |
| "loss": 3.0037, |
| "step": 1215500 |
| }, |
| { |
| "epoch": 9.96, |
| "learning_rate": 6.034786039553503e-06, |
| "loss": 3.0057, |
| "step": 1216000 |
| }, |
| { |
| "epoch": 9.97, |
| "learning_rate": 6.033142086833583e-06, |
| "loss": 3.0026, |
| "step": 1216500 |
| }, |
| { |
| "epoch": 9.97, |
| "learning_rate": 6.031498134113663e-06, |
| "loss": 3.0057, |
| "step": 1217000 |
| }, |
| { |
| "epoch": 9.97, |
| "learning_rate": 6.029854181393744e-06, |
| "loss": 3.0037, |
| "step": 1217500 |
| }, |
| { |
| "epoch": 9.98, |
| "learning_rate": 6.0282102286738244e-06, |
| "loss": 3.0066, |
| "step": 1218000 |
| }, |
| { |
| "epoch": 9.98, |
| "learning_rate": 6.0265662759539046e-06, |
| "loss": 3.0039, |
| "step": 1218500 |
| }, |
| { |
| "epoch": 9.99, |
| "learning_rate": 6.024922323233984e-06, |
| "loss": 3.0073, |
| "step": 1219000 |
| }, |
| { |
| "epoch": 9.99, |
| "learning_rate": 6.023278370514064e-06, |
| "loss": 2.9989, |
| "step": 1219500 |
| }, |
| { |
| "epoch": 10.0, |
| "learning_rate": 6.021634417794144e-06, |
| "loss": 3.0029, |
| "step": 1220000 |
| }, |
| { |
| "epoch": 10.0, |
| "learning_rate": 6.019990465074225e-06, |
| "loss": 2.9987, |
| "step": 1220500 |
| }, |
| { |
| "epoch": 10.0, |
| "learning_rate": 6.018346512354305e-06, |
| "loss": 3.0004, |
| "step": 1221000 |
| }, |
| { |
| "epoch": 10.01, |
| "learning_rate": 6.016702559634385e-06, |
| "loss": 3.006, |
| "step": 1221500 |
| }, |
| { |
| "epoch": 10.01, |
| "learning_rate": 6.0150586069144654e-06, |
| "loss": 3.0002, |
| "step": 1222000 |
| }, |
| { |
| "epoch": 10.02, |
| "learning_rate": 6.0134146541945456e-06, |
| "loss": 3.0044, |
| "step": 1222500 |
| }, |
| { |
| "epoch": 10.02, |
| "learning_rate": 6.0117707014746265e-06, |
| "loss": 2.9957, |
| "step": 1223000 |
| }, |
| { |
| "epoch": 10.02, |
| "learning_rate": 6.010126748754707e-06, |
| "loss": 3.0001, |
| "step": 1223500 |
| }, |
| { |
| "epoch": 10.03, |
| "learning_rate": 6.008482796034787e-06, |
| "loss": 3.0058, |
| "step": 1224000 |
| }, |
| { |
| "epoch": 10.03, |
| "learning_rate": 6.006838843314867e-06, |
| "loss": 2.9946, |
| "step": 1224500 |
| }, |
| { |
| "epoch": 10.04, |
| "learning_rate": 6.005194890594948e-06, |
| "loss": 3.0, |
| "step": 1225000 |
| }, |
| { |
| "epoch": 10.04, |
| "learning_rate": 6.003550937875026e-06, |
| "loss": 2.9992, |
| "step": 1225500 |
| }, |
| { |
| "epoch": 10.04, |
| "learning_rate": 6.001906985155107e-06, |
| "loss": 2.9963, |
| "step": 1226000 |
| }, |
| { |
| "epoch": 10.05, |
| "learning_rate": 6.0002630324351874e-06, |
| "loss": 2.9928, |
| "step": 1226500 |
| }, |
| { |
| "epoch": 10.05, |
| "learning_rate": 5.9986190797152676e-06, |
| "loss": 3.0001, |
| "step": 1227000 |
| }, |
| { |
| "epoch": 10.06, |
| "learning_rate": 5.996975126995348e-06, |
| "loss": 3.0003, |
| "step": 1227500 |
| }, |
| { |
| "epoch": 10.06, |
| "learning_rate": 5.995331174275429e-06, |
| "loss": 2.9989, |
| "step": 1228000 |
| }, |
| { |
| "epoch": 10.06, |
| "learning_rate": 5.993687221555509e-06, |
| "loss": 2.9957, |
| "step": 1228500 |
| }, |
| { |
| "epoch": 10.07, |
| "learning_rate": 5.992043268835589e-06, |
| "loss": 3.0049, |
| "step": 1229000 |
| }, |
| { |
| "epoch": 10.07, |
| "learning_rate": 5.990399316115669e-06, |
| "loss": 2.9962, |
| "step": 1229500 |
| }, |
| { |
| "epoch": 10.08, |
| "learning_rate": 5.988755363395749e-06, |
| "loss": 3.0065, |
| "step": 1230000 |
| }, |
| { |
| "epoch": 10.08, |
| "eval_accuracy": 0.48933464137984234, |
| "eval_loss": 2.8635189533233643, |
| "eval_runtime": 406.4876, |
| "eval_samples_per_second": 758.587, |
| "eval_steps_per_second": 15.806, |
| "step": 1230000 |
| }, |
| { |
| "epoch": 10.08, |
| "learning_rate": 5.98711141067583e-06, |
| "loss": 2.998, |
| "step": 1230500 |
| }, |
| { |
| "epoch": 10.09, |
| "learning_rate": 5.98546745795591e-06, |
| "loss": 3.0041, |
| "step": 1231000 |
| }, |
| { |
| "epoch": 10.09, |
| "learning_rate": 5.9838235052359895e-06, |
| "loss": 3.0004, |
| "step": 1231500 |
| }, |
| { |
| "epoch": 10.09, |
| "learning_rate": 5.98217955251607e-06, |
| "loss": 3.0011, |
| "step": 1232000 |
| }, |
| { |
| "epoch": 10.1, |
| "learning_rate": 5.98053559979615e-06, |
| "loss": 3.0099, |
| "step": 1232500 |
| }, |
| { |
| "epoch": 10.1, |
| "learning_rate": 5.978891647076231e-06, |
| "loss": 3.0018, |
| "step": 1233000 |
| }, |
| { |
| "epoch": 10.11, |
| "learning_rate": 5.977247694356311e-06, |
| "loss": 2.9946, |
| "step": 1233500 |
| }, |
| { |
| "epoch": 10.11, |
| "learning_rate": 5.975603741636391e-06, |
| "loss": 3.0006, |
| "step": 1234000 |
| }, |
| { |
| "epoch": 10.11, |
| "learning_rate": 5.973959788916471e-06, |
| "loss": 2.9976, |
| "step": 1234500 |
| }, |
| { |
| "epoch": 10.12, |
| "learning_rate": 5.972315836196551e-06, |
| "loss": 2.9933, |
| "step": 1235000 |
| }, |
| { |
| "epoch": 10.12, |
| "learning_rate": 5.970671883476632e-06, |
| "loss": 2.997, |
| "step": 1235500 |
| }, |
| { |
| "epoch": 10.13, |
| "learning_rate": 5.969027930756712e-06, |
| "loss": 3.0016, |
| "step": 1236000 |
| }, |
| { |
| "epoch": 10.13, |
| "learning_rate": 5.9673839780367925e-06, |
| "loss": 2.9995, |
| "step": 1236500 |
| }, |
| { |
| "epoch": 10.13, |
| "learning_rate": 5.965740025316873e-06, |
| "loss": 3.0035, |
| "step": 1237000 |
| }, |
| { |
| "epoch": 10.14, |
| "learning_rate": 5.964096072596952e-06, |
| "loss": 3.002, |
| "step": 1237500 |
| }, |
| { |
| "epoch": 10.14, |
| "learning_rate": 5.962452119877032e-06, |
| "loss": 2.9986, |
| "step": 1238000 |
| }, |
| { |
| "epoch": 10.15, |
| "learning_rate": 5.960808167157113e-06, |
| "loss": 3.001, |
| "step": 1238500 |
| }, |
| { |
| "epoch": 10.15, |
| "learning_rate": 5.959164214437193e-06, |
| "loss": 2.9956, |
| "step": 1239000 |
| }, |
| { |
| "epoch": 10.16, |
| "learning_rate": 5.957520261717273e-06, |
| "loss": 3.0005, |
| "step": 1239500 |
| }, |
| { |
| "epoch": 10.16, |
| "learning_rate": 5.955876308997353e-06, |
| "loss": 2.9997, |
| "step": 1240000 |
| }, |
| { |
| "epoch": 10.16, |
| "learning_rate": 5.954232356277434e-06, |
| "loss": 2.9939, |
| "step": 1240500 |
| }, |
| { |
| "epoch": 10.17, |
| "learning_rate": 5.9525884035575145e-06, |
| "loss": 2.9964, |
| "step": 1241000 |
| }, |
| { |
| "epoch": 10.17, |
| "learning_rate": 5.950944450837595e-06, |
| "loss": 2.9955, |
| "step": 1241500 |
| }, |
| { |
| "epoch": 10.18, |
| "learning_rate": 5.949300498117675e-06, |
| "loss": 3.0, |
| "step": 1242000 |
| }, |
| { |
| "epoch": 10.18, |
| "learning_rate": 5.947656545397755e-06, |
| "loss": 3.0017, |
| "step": 1242500 |
| }, |
| { |
| "epoch": 10.18, |
| "learning_rate": 5.946012592677836e-06, |
| "loss": 3.0005, |
| "step": 1243000 |
| }, |
| { |
| "epoch": 10.19, |
| "learning_rate": 5.944368639957915e-06, |
| "loss": 2.9954, |
| "step": 1243500 |
| }, |
| { |
| "epoch": 10.19, |
| "learning_rate": 5.942724687237995e-06, |
| "loss": 2.9982, |
| "step": 1244000 |
| }, |
| { |
| "epoch": 10.2, |
| "learning_rate": 5.941080734518075e-06, |
| "loss": 3.0029, |
| "step": 1244500 |
| }, |
| { |
| "epoch": 10.2, |
| "learning_rate": 5.9394367817981555e-06, |
| "loss": 2.9995, |
| "step": 1245000 |
| }, |
| { |
| "epoch": 10.2, |
| "learning_rate": 5.9377928290782365e-06, |
| "loss": 3.0004, |
| "step": 1245500 |
| }, |
| { |
| "epoch": 10.21, |
| "learning_rate": 5.936148876358317e-06, |
| "loss": 3.0033, |
| "step": 1246000 |
| }, |
| { |
| "epoch": 10.21, |
| "learning_rate": 5.934504923638397e-06, |
| "loss": 2.9923, |
| "step": 1246500 |
| }, |
| { |
| "epoch": 10.22, |
| "learning_rate": 5.932860970918477e-06, |
| "loss": 2.9955, |
| "step": 1247000 |
| }, |
| { |
| "epoch": 10.22, |
| "learning_rate": 5.931217018198557e-06, |
| "loss": 2.9977, |
| "step": 1247500 |
| }, |
| { |
| "epoch": 10.22, |
| "learning_rate": 5.929573065478638e-06, |
| "loss": 2.9962, |
| "step": 1248000 |
| }, |
| { |
| "epoch": 10.23, |
| "learning_rate": 5.927929112758718e-06, |
| "loss": 2.9916, |
| "step": 1248500 |
| }, |
| { |
| "epoch": 10.23, |
| "learning_rate": 5.926285160038798e-06, |
| "loss": 2.9935, |
| "step": 1249000 |
| }, |
| { |
| "epoch": 10.24, |
| "learning_rate": 5.9246412073188775e-06, |
| "loss": 2.9927, |
| "step": 1249500 |
| }, |
| { |
| "epoch": 10.24, |
| "learning_rate": 5.922997254598958e-06, |
| "loss": 2.9976, |
| "step": 1250000 |
| }, |
| { |
| "epoch": 10.25, |
| "learning_rate": 5.921353301879038e-06, |
| "loss": 2.9944, |
| "step": 1250500 |
| }, |
| { |
| "epoch": 10.25, |
| "learning_rate": 5.919709349159119e-06, |
| "loss": 3.0006, |
| "step": 1251000 |
| }, |
| { |
| "epoch": 10.25, |
| "learning_rate": 5.918065396439199e-06, |
| "loss": 2.9983, |
| "step": 1251500 |
| }, |
| { |
| "epoch": 10.26, |
| "learning_rate": 5.916421443719279e-06, |
| "loss": 2.9885, |
| "step": 1252000 |
| }, |
| { |
| "epoch": 10.26, |
| "learning_rate": 5.914777490999359e-06, |
| "loss": 2.9992, |
| "step": 1252500 |
| }, |
| { |
| "epoch": 10.27, |
| "learning_rate": 5.91313353827944e-06, |
| "loss": 3.0021, |
| "step": 1253000 |
| }, |
| { |
| "epoch": 10.27, |
| "learning_rate": 5.91148958555952e-06, |
| "loss": 2.9901, |
| "step": 1253500 |
| }, |
| { |
| "epoch": 10.27, |
| "learning_rate": 5.9098456328396e-06, |
| "loss": 2.9947, |
| "step": 1254000 |
| }, |
| { |
| "epoch": 10.28, |
| "learning_rate": 5.9082016801196804e-06, |
| "loss": 2.9951, |
| "step": 1254500 |
| }, |
| { |
| "epoch": 10.28, |
| "learning_rate": 5.9065577273997606e-06, |
| "loss": 2.9973, |
| "step": 1255000 |
| }, |
| { |
| "epoch": 10.29, |
| "learning_rate": 5.90491377467984e-06, |
| "loss": 2.9943, |
| "step": 1255500 |
| }, |
| { |
| "epoch": 10.29, |
| "learning_rate": 5.903269821959921e-06, |
| "loss": 2.9956, |
| "step": 1256000 |
| }, |
| { |
| "epoch": 10.29, |
| "learning_rate": 5.901625869240001e-06, |
| "loss": 2.9963, |
| "step": 1256500 |
| }, |
| { |
| "epoch": 10.3, |
| "learning_rate": 5.899981916520081e-06, |
| "loss": 2.9933, |
| "step": 1257000 |
| }, |
| { |
| "epoch": 10.3, |
| "learning_rate": 5.898337963800161e-06, |
| "loss": 2.9907, |
| "step": 1257500 |
| }, |
| { |
| "epoch": 10.31, |
| "learning_rate": 5.896694011080241e-06, |
| "loss": 2.9903, |
| "step": 1258000 |
| }, |
| { |
| "epoch": 10.31, |
| "learning_rate": 5.895050058360322e-06, |
| "loss": 2.9945, |
| "step": 1258500 |
| }, |
| { |
| "epoch": 10.31, |
| "learning_rate": 5.8934061056404024e-06, |
| "loss": 2.9894, |
| "step": 1259000 |
| }, |
| { |
| "epoch": 10.32, |
| "learning_rate": 5.8917621529204826e-06, |
| "loss": 2.9936, |
| "step": 1259500 |
| }, |
| { |
| "epoch": 10.32, |
| "learning_rate": 5.890118200200563e-06, |
| "loss": 2.9983, |
| "step": 1260000 |
| }, |
| { |
| "epoch": 10.32, |
| "eval_accuracy": 0.4899989186098096, |
| "eval_loss": 2.856139659881592, |
| "eval_runtime": 405.6967, |
| "eval_samples_per_second": 760.065, |
| "eval_steps_per_second": 15.837, |
| "step": 1260000 |
| }, |
| { |
| "epoch": 10.33, |
| "learning_rate": 5.888474247480644e-06, |
| "loss": 2.9992, |
| "step": 1260500 |
| }, |
| { |
| "epoch": 10.33, |
| "learning_rate": 5.886830294760724e-06, |
| "loss": 2.9973, |
| "step": 1261000 |
| }, |
| { |
| "epoch": 10.34, |
| "learning_rate": 5.885186342040803e-06, |
| "loss": 2.996, |
| "step": 1261500 |
| }, |
| { |
| "epoch": 10.34, |
| "learning_rate": 5.883542389320883e-06, |
| "loss": 2.9911, |
| "step": 1262000 |
| }, |
| { |
| "epoch": 10.34, |
| "learning_rate": 5.881898436600963e-06, |
| "loss": 2.9913, |
| "step": 1262500 |
| }, |
| { |
| "epoch": 10.35, |
| "learning_rate": 5.8802544838810434e-06, |
| "loss": 2.9906, |
| "step": 1263000 |
| }, |
| { |
| "epoch": 10.35, |
| "learning_rate": 5.878610531161124e-06, |
| "loss": 3.0007, |
| "step": 1263500 |
| }, |
| { |
| "epoch": 10.36, |
| "learning_rate": 5.8769665784412045e-06, |
| "loss": 2.9983, |
| "step": 1264000 |
| }, |
| { |
| "epoch": 10.36, |
| "learning_rate": 5.875322625721285e-06, |
| "loss": 2.9929, |
| "step": 1264500 |
| }, |
| { |
| "epoch": 10.36, |
| "learning_rate": 5.873678673001365e-06, |
| "loss": 2.999, |
| "step": 1265000 |
| }, |
| { |
| "epoch": 10.37, |
| "learning_rate": 5.872034720281446e-06, |
| "loss": 2.9903, |
| "step": 1265500 |
| }, |
| { |
| "epoch": 10.37, |
| "learning_rate": 5.870390767561526e-06, |
| "loss": 2.9906, |
| "step": 1266000 |
| }, |
| { |
| "epoch": 10.38, |
| "learning_rate": 5.868746814841606e-06, |
| "loss": 2.99, |
| "step": 1266500 |
| }, |
| { |
| "epoch": 10.38, |
| "learning_rate": 5.867102862121686e-06, |
| "loss": 2.9936, |
| "step": 1267000 |
| }, |
| { |
| "epoch": 10.38, |
| "learning_rate": 5.8654589094017654e-06, |
| "loss": 2.9955, |
| "step": 1267500 |
| }, |
| { |
| "epoch": 10.39, |
| "learning_rate": 5.8638149566818456e-06, |
| "loss": 2.9949, |
| "step": 1268000 |
| }, |
| { |
| "epoch": 10.39, |
| "learning_rate": 5.8621710039619265e-06, |
| "loss": 2.9959, |
| "step": 1268500 |
| }, |
| { |
| "epoch": 10.4, |
| "learning_rate": 5.860527051242007e-06, |
| "loss": 2.9974, |
| "step": 1269000 |
| }, |
| { |
| "epoch": 10.4, |
| "learning_rate": 5.858883098522087e-06, |
| "loss": 2.9972, |
| "step": 1269500 |
| }, |
| { |
| "epoch": 10.4, |
| "learning_rate": 5.857239145802167e-06, |
| "loss": 2.9952, |
| "step": 1270000 |
| }, |
| { |
| "epoch": 10.41, |
| "learning_rate": 5.855595193082247e-06, |
| "loss": 2.9968, |
| "step": 1270500 |
| }, |
| { |
| "epoch": 10.41, |
| "learning_rate": 5.853951240362328e-06, |
| "loss": 2.9893, |
| "step": 1271000 |
| }, |
| { |
| "epoch": 10.42, |
| "learning_rate": 5.852307287642408e-06, |
| "loss": 2.9974, |
| "step": 1271500 |
| }, |
| { |
| "epoch": 10.42, |
| "learning_rate": 5.850663334922488e-06, |
| "loss": 2.9862, |
| "step": 1272000 |
| }, |
| { |
| "epoch": 10.43, |
| "learning_rate": 5.849019382202568e-06, |
| "loss": 2.9944, |
| "step": 1272500 |
| }, |
| { |
| "epoch": 10.43, |
| "learning_rate": 5.847375429482649e-06, |
| "loss": 2.9933, |
| "step": 1273000 |
| }, |
| { |
| "epoch": 10.43, |
| "learning_rate": 5.845731476762729e-06, |
| "loss": 2.993, |
| "step": 1273500 |
| }, |
| { |
| "epoch": 10.44, |
| "learning_rate": 5.844087524042809e-06, |
| "loss": 2.9942, |
| "step": 1274000 |
| }, |
| { |
| "epoch": 10.44, |
| "learning_rate": 5.842443571322889e-06, |
| "loss": 2.9952, |
| "step": 1274500 |
| }, |
| { |
| "epoch": 10.45, |
| "learning_rate": 5.840799618602969e-06, |
| "loss": 2.9978, |
| "step": 1275000 |
| }, |
| { |
| "epoch": 10.45, |
| "learning_rate": 5.839155665883049e-06, |
| "loss": 2.9936, |
| "step": 1275500 |
| }, |
| { |
| "epoch": 10.45, |
| "learning_rate": 5.83751171316313e-06, |
| "loss": 2.9934, |
| "step": 1276000 |
| }, |
| { |
| "epoch": 10.46, |
| "learning_rate": 5.83586776044321e-06, |
| "loss": 2.9946, |
| "step": 1276500 |
| }, |
| { |
| "epoch": 10.46, |
| "learning_rate": 5.83422380772329e-06, |
| "loss": 2.9954, |
| "step": 1277000 |
| }, |
| { |
| "epoch": 10.47, |
| "learning_rate": 5.8325798550033705e-06, |
| "loss": 2.9865, |
| "step": 1277500 |
| }, |
| { |
| "epoch": 10.47, |
| "learning_rate": 5.8309359022834515e-06, |
| "loss": 2.9931, |
| "step": 1278000 |
| }, |
| { |
| "epoch": 10.47, |
| "learning_rate": 5.829291949563532e-06, |
| "loss": 2.9937, |
| "step": 1278500 |
| }, |
| { |
| "epoch": 10.48, |
| "learning_rate": 5.827647996843612e-06, |
| "loss": 2.9888, |
| "step": 1279000 |
| }, |
| { |
| "epoch": 10.48, |
| "learning_rate": 5.826004044123692e-06, |
| "loss": 2.9879, |
| "step": 1279500 |
| }, |
| { |
| "epoch": 10.49, |
| "learning_rate": 5.824360091403771e-06, |
| "loss": 2.9912, |
| "step": 1280000 |
| }, |
| { |
| "epoch": 10.49, |
| "learning_rate": 5.822716138683851e-06, |
| "loss": 3.0024, |
| "step": 1280500 |
| }, |
| { |
| "epoch": 10.5, |
| "learning_rate": 5.821072185963932e-06, |
| "loss": 2.9928, |
| "step": 1281000 |
| }, |
| { |
| "epoch": 10.5, |
| "learning_rate": 5.819428233244012e-06, |
| "loss": 2.9852, |
| "step": 1281500 |
| }, |
| { |
| "epoch": 10.5, |
| "learning_rate": 5.8177842805240925e-06, |
| "loss": 2.9819, |
| "step": 1282000 |
| }, |
| { |
| "epoch": 10.51, |
| "learning_rate": 5.816140327804173e-06, |
| "loss": 2.9923, |
| "step": 1282500 |
| }, |
| { |
| "epoch": 10.51, |
| "learning_rate": 5.814496375084253e-06, |
| "loss": 2.9913, |
| "step": 1283000 |
| }, |
| { |
| "epoch": 10.52, |
| "learning_rate": 5.812852422364334e-06, |
| "loss": 2.985, |
| "step": 1283500 |
| }, |
| { |
| "epoch": 10.52, |
| "learning_rate": 5.811208469644414e-06, |
| "loss": 2.9962, |
| "step": 1284000 |
| }, |
| { |
| "epoch": 10.52, |
| "learning_rate": 5.809564516924494e-06, |
| "loss": 2.9926, |
| "step": 1284500 |
| }, |
| { |
| "epoch": 10.53, |
| "learning_rate": 5.807920564204574e-06, |
| "loss": 2.9897, |
| "step": 1285000 |
| }, |
| { |
| "epoch": 10.53, |
| "learning_rate": 5.806276611484655e-06, |
| "loss": 2.9893, |
| "step": 1285500 |
| }, |
| { |
| "epoch": 10.54, |
| "learning_rate": 5.8046326587647335e-06, |
| "loss": 2.9912, |
| "step": 1286000 |
| }, |
| { |
| "epoch": 10.54, |
| "learning_rate": 5.8029887060448145e-06, |
| "loss": 2.9872, |
| "step": 1286500 |
| }, |
| { |
| "epoch": 10.54, |
| "learning_rate": 5.801344753324895e-06, |
| "loss": 2.9919, |
| "step": 1287000 |
| }, |
| { |
| "epoch": 10.55, |
| "learning_rate": 5.799700800604975e-06, |
| "loss": 2.991, |
| "step": 1287500 |
| }, |
| { |
| "epoch": 10.55, |
| "learning_rate": 5.798056847885055e-06, |
| "loss": 2.9917, |
| "step": 1288000 |
| }, |
| { |
| "epoch": 10.56, |
| "learning_rate": 5.796412895165136e-06, |
| "loss": 2.9909, |
| "step": 1288500 |
| }, |
| { |
| "epoch": 10.56, |
| "learning_rate": 5.794768942445216e-06, |
| "loss": 2.988, |
| "step": 1289000 |
| }, |
| { |
| "epoch": 10.56, |
| "learning_rate": 5.793124989725296e-06, |
| "loss": 2.9947, |
| "step": 1289500 |
| }, |
| { |
| "epoch": 10.57, |
| "learning_rate": 5.791481037005376e-06, |
| "loss": 2.9834, |
| "step": 1290000 |
| }, |
| { |
| "epoch": 10.57, |
| "eval_accuracy": 0.490716405708734, |
| "eval_loss": 2.852367401123047, |
| "eval_runtime": 409.9329, |
| "eval_samples_per_second": 752.211, |
| "eval_steps_per_second": 15.673, |
| "step": 1290000 |
| }, |
| { |
| "epoch": 10.57, |
| "learning_rate": 5.789837084285457e-06, |
| "loss": 2.9875, |
| "step": 1290500 |
| }, |
| { |
| "epoch": 10.58, |
| "learning_rate": 5.788193131565537e-06, |
| "loss": 2.99, |
| "step": 1291000 |
| }, |
| { |
| "epoch": 10.58, |
| "learning_rate": 5.7865491788456174e-06, |
| "loss": 2.9915, |
| "step": 1291500 |
| }, |
| { |
| "epoch": 10.59, |
| "learning_rate": 5.784905226125697e-06, |
| "loss": 2.989, |
| "step": 1292000 |
| }, |
| { |
| "epoch": 10.59, |
| "learning_rate": 5.783261273405777e-06, |
| "loss": 2.9908, |
| "step": 1292500 |
| }, |
| { |
| "epoch": 10.59, |
| "learning_rate": 5.781617320685857e-06, |
| "loss": 2.9896, |
| "step": 1293000 |
| }, |
| { |
| "epoch": 10.6, |
| "learning_rate": 5.779973367965938e-06, |
| "loss": 2.9904, |
| "step": 1293500 |
| }, |
| { |
| "epoch": 10.6, |
| "learning_rate": 5.778329415246018e-06, |
| "loss": 2.9917, |
| "step": 1294000 |
| }, |
| { |
| "epoch": 10.61, |
| "learning_rate": 5.776685462526098e-06, |
| "loss": 2.9959, |
| "step": 1294500 |
| }, |
| { |
| "epoch": 10.61, |
| "learning_rate": 5.775041509806178e-06, |
| "loss": 2.9892, |
| "step": 1295000 |
| }, |
| { |
| "epoch": 10.61, |
| "learning_rate": 5.7733975570862584e-06, |
| "loss": 2.9959, |
| "step": 1295500 |
| }, |
| { |
| "epoch": 10.62, |
| "learning_rate": 5.771753604366339e-06, |
| "loss": 2.9883, |
| "step": 1296000 |
| }, |
| { |
| "epoch": 10.62, |
| "learning_rate": 5.7701096516464195e-06, |
| "loss": 2.9849, |
| "step": 1296500 |
| }, |
| { |
| "epoch": 10.63, |
| "learning_rate": 5.7684656989265e-06, |
| "loss": 2.99, |
| "step": 1297000 |
| }, |
| { |
| "epoch": 10.63, |
| "learning_rate": 5.76682174620658e-06, |
| "loss": 2.9891, |
| "step": 1297500 |
| }, |
| { |
| "epoch": 10.63, |
| "learning_rate": 5.765177793486659e-06, |
| "loss": 2.9913, |
| "step": 1298000 |
| }, |
| { |
| "epoch": 10.64, |
| "learning_rate": 5.763533840766739e-06, |
| "loss": 2.9844, |
| "step": 1298500 |
| }, |
| { |
| "epoch": 10.64, |
| "learning_rate": 5.76188988804682e-06, |
| "loss": 2.988, |
| "step": 1299000 |
| }, |
| { |
| "epoch": 10.65, |
| "learning_rate": 5.7602459353269e-06, |
| "loss": 2.9861, |
| "step": 1299500 |
| }, |
| { |
| "epoch": 10.65, |
| "learning_rate": 5.7586019826069804e-06, |
| "loss": 2.9871, |
| "step": 1300000 |
| }, |
| { |
| "epoch": 10.65, |
| "learning_rate": 5.7569580298870606e-06, |
| "loss": 2.9841, |
| "step": 1300500 |
| }, |
| { |
| "epoch": 10.66, |
| "learning_rate": 5.7553140771671415e-06, |
| "loss": 2.9844, |
| "step": 1301000 |
| }, |
| { |
| "epoch": 10.66, |
| "learning_rate": 5.753670124447222e-06, |
| "loss": 2.9895, |
| "step": 1301500 |
| }, |
| { |
| "epoch": 10.67, |
| "learning_rate": 5.752026171727302e-06, |
| "loss": 2.9843, |
| "step": 1302000 |
| }, |
| { |
| "epoch": 10.67, |
| "learning_rate": 5.750382219007382e-06, |
| "loss": 2.9913, |
| "step": 1302500 |
| }, |
| { |
| "epoch": 10.68, |
| "learning_rate": 5.748738266287462e-06, |
| "loss": 2.9902, |
| "step": 1303000 |
| }, |
| { |
| "epoch": 10.68, |
| "learning_rate": 5.747094313567543e-06, |
| "loss": 2.9856, |
| "step": 1303500 |
| }, |
| { |
| "epoch": 10.68, |
| "learning_rate": 5.745450360847622e-06, |
| "loss": 2.9884, |
| "step": 1304000 |
| }, |
| { |
| "epoch": 10.69, |
| "learning_rate": 5.743806408127702e-06, |
| "loss": 2.9866, |
| "step": 1304500 |
| }, |
| { |
| "epoch": 10.69, |
| "learning_rate": 5.7421624554077825e-06, |
| "loss": 2.9871, |
| "step": 1305000 |
| }, |
| { |
| "epoch": 10.7, |
| "learning_rate": 5.740518502687863e-06, |
| "loss": 2.9865, |
| "step": 1305500 |
| }, |
| { |
| "epoch": 10.7, |
| "learning_rate": 5.738874549967944e-06, |
| "loss": 2.9871, |
| "step": 1306000 |
| }, |
| { |
| "epoch": 10.7, |
| "learning_rate": 5.737230597248024e-06, |
| "loss": 2.9857, |
| "step": 1306500 |
| }, |
| { |
| "epoch": 10.71, |
| "learning_rate": 5.735586644528104e-06, |
| "loss": 2.9791, |
| "step": 1307000 |
| }, |
| { |
| "epoch": 10.71, |
| "learning_rate": 5.733942691808184e-06, |
| "loss": 2.9903, |
| "step": 1307500 |
| }, |
| { |
| "epoch": 10.72, |
| "learning_rate": 5.732298739088264e-06, |
| "loss": 2.9838, |
| "step": 1308000 |
| }, |
| { |
| "epoch": 10.72, |
| "learning_rate": 5.730654786368345e-06, |
| "loss": 2.9891, |
| "step": 1308500 |
| }, |
| { |
| "epoch": 10.72, |
| "learning_rate": 5.729010833648425e-06, |
| "loss": 2.9863, |
| "step": 1309000 |
| }, |
| { |
| "epoch": 10.73, |
| "learning_rate": 5.727366880928505e-06, |
| "loss": 2.9899, |
| "step": 1309500 |
| }, |
| { |
| "epoch": 10.73, |
| "learning_rate": 5.725722928208585e-06, |
| "loss": 2.9845, |
| "step": 1310000 |
| }, |
| { |
| "epoch": 10.74, |
| "learning_rate": 5.724078975488665e-06, |
| "loss": 2.9834, |
| "step": 1310500 |
| }, |
| { |
| "epoch": 10.74, |
| "learning_rate": 5.722435022768745e-06, |
| "loss": 2.9827, |
| "step": 1311000 |
| }, |
| { |
| "epoch": 10.74, |
| "learning_rate": 5.720791070048826e-06, |
| "loss": 2.9816, |
| "step": 1311500 |
| }, |
| { |
| "epoch": 10.75, |
| "learning_rate": 5.719147117328906e-06, |
| "loss": 2.9773, |
| "step": 1312000 |
| }, |
| { |
| "epoch": 10.75, |
| "learning_rate": 5.717503164608986e-06, |
| "loss": 2.9882, |
| "step": 1312500 |
| }, |
| { |
| "epoch": 10.76, |
| "learning_rate": 5.715859211889066e-06, |
| "loss": 2.9822, |
| "step": 1313000 |
| }, |
| { |
| "epoch": 10.76, |
| "learning_rate": 5.714215259169147e-06, |
| "loss": 2.9863, |
| "step": 1313500 |
| }, |
| { |
| "epoch": 10.77, |
| "learning_rate": 5.712571306449227e-06, |
| "loss": 2.9802, |
| "step": 1314000 |
| }, |
| { |
| "epoch": 10.77, |
| "learning_rate": 5.7109273537293075e-06, |
| "loss": 2.9899, |
| "step": 1314500 |
| }, |
| { |
| "epoch": 10.77, |
| "learning_rate": 5.709283401009388e-06, |
| "loss": 2.9936, |
| "step": 1315000 |
| }, |
| { |
| "epoch": 10.78, |
| "learning_rate": 5.707639448289468e-06, |
| "loss": 2.9852, |
| "step": 1315500 |
| }, |
| { |
| "epoch": 10.78, |
| "learning_rate": 5.705995495569547e-06, |
| "loss": 2.9875, |
| "step": 1316000 |
| }, |
| { |
| "epoch": 10.79, |
| "learning_rate": 5.704351542849628e-06, |
| "loss": 2.9867, |
| "step": 1316500 |
| }, |
| { |
| "epoch": 10.79, |
| "learning_rate": 5.702707590129708e-06, |
| "loss": 2.982, |
| "step": 1317000 |
| }, |
| { |
| "epoch": 10.79, |
| "learning_rate": 5.701063637409788e-06, |
| "loss": 2.9805, |
| "step": 1317500 |
| }, |
| { |
| "epoch": 10.8, |
| "learning_rate": 5.699419684689868e-06, |
| "loss": 2.9825, |
| "step": 1318000 |
| }, |
| { |
| "epoch": 10.8, |
| "learning_rate": 5.697775731969949e-06, |
| "loss": 2.9785, |
| "step": 1318500 |
| }, |
| { |
| "epoch": 10.81, |
| "learning_rate": 5.6961317792500295e-06, |
| "loss": 2.9884, |
| "step": 1319000 |
| }, |
| { |
| "epoch": 10.81, |
| "learning_rate": 5.69448782653011e-06, |
| "loss": 2.9854, |
| "step": 1319500 |
| }, |
| { |
| "epoch": 10.81, |
| "learning_rate": 5.69284387381019e-06, |
| "loss": 2.9873, |
| "step": 1320000 |
| }, |
| { |
| "epoch": 10.81, |
| "eval_accuracy": 0.4910966306213036, |
| "eval_loss": 2.8484363555908203, |
| "eval_runtime": 403.7174, |
| "eval_samples_per_second": 763.792, |
| "eval_steps_per_second": 15.915, |
| "step": 1320000 |
| }, |
| { |
| "epoch": 10.82, |
| "learning_rate": 5.69119992109027e-06, |
| "loss": 2.9893, |
| "step": 1320500 |
| }, |
| { |
| "epoch": 10.82, |
| "learning_rate": 5.689555968370351e-06, |
| "loss": 2.9849, |
| "step": 1321000 |
| }, |
| { |
| "epoch": 10.83, |
| "learning_rate": 5.687912015650431e-06, |
| "loss": 2.9878, |
| "step": 1321500 |
| }, |
| { |
| "epoch": 10.83, |
| "learning_rate": 5.68626806293051e-06, |
| "loss": 2.9807, |
| "step": 1322000 |
| }, |
| { |
| "epoch": 10.84, |
| "learning_rate": 5.68462411021059e-06, |
| "loss": 2.9778, |
| "step": 1322500 |
| }, |
| { |
| "epoch": 10.84, |
| "learning_rate": 5.6829801574906705e-06, |
| "loss": 2.9867, |
| "step": 1323000 |
| }, |
| { |
| "epoch": 10.84, |
| "learning_rate": 5.681336204770751e-06, |
| "loss": 2.9818, |
| "step": 1323500 |
| }, |
| { |
| "epoch": 10.85, |
| "learning_rate": 5.679692252050832e-06, |
| "loss": 2.9833, |
| "step": 1324000 |
| }, |
| { |
| "epoch": 10.85, |
| "learning_rate": 5.678048299330912e-06, |
| "loss": 2.982, |
| "step": 1324500 |
| }, |
| { |
| "epoch": 10.86, |
| "learning_rate": 5.676404346610992e-06, |
| "loss": 2.9867, |
| "step": 1325000 |
| }, |
| { |
| "epoch": 10.86, |
| "learning_rate": 5.674760393891072e-06, |
| "loss": 2.9878, |
| "step": 1325500 |
| }, |
| { |
| "epoch": 10.86, |
| "learning_rate": 5.673116441171153e-06, |
| "loss": 2.9814, |
| "step": 1326000 |
| }, |
| { |
| "epoch": 10.87, |
| "learning_rate": 5.671472488451233e-06, |
| "loss": 2.9849, |
| "step": 1326500 |
| }, |
| { |
| "epoch": 10.87, |
| "learning_rate": 5.669828535731313e-06, |
| "loss": 2.9777, |
| "step": 1327000 |
| }, |
| { |
| "epoch": 10.88, |
| "learning_rate": 5.668184583011393e-06, |
| "loss": 2.9794, |
| "step": 1327500 |
| }, |
| { |
| "epoch": 10.88, |
| "learning_rate": 5.6665406302914734e-06, |
| "loss": 2.983, |
| "step": 1328000 |
| }, |
| { |
| "epoch": 10.88, |
| "learning_rate": 5.664896677571553e-06, |
| "loss": 2.9901, |
| "step": 1328500 |
| }, |
| { |
| "epoch": 10.89, |
| "learning_rate": 5.663252724851634e-06, |
| "loss": 2.9791, |
| "step": 1329000 |
| }, |
| { |
| "epoch": 10.89, |
| "learning_rate": 5.661608772131714e-06, |
| "loss": 2.9856, |
| "step": 1329500 |
| }, |
| { |
| "epoch": 10.9, |
| "learning_rate": 5.659964819411794e-06, |
| "loss": 2.9838, |
| "step": 1330000 |
| }, |
| { |
| "epoch": 10.9, |
| "learning_rate": 5.658320866691874e-06, |
| "loss": 2.9832, |
| "step": 1330500 |
| }, |
| { |
| "epoch": 10.9, |
| "learning_rate": 5.656676913971954e-06, |
| "loss": 2.985, |
| "step": 1331000 |
| }, |
| { |
| "epoch": 10.91, |
| "learning_rate": 5.655032961252035e-06, |
| "loss": 2.9829, |
| "step": 1331500 |
| }, |
| { |
| "epoch": 10.91, |
| "learning_rate": 5.653389008532115e-06, |
| "loss": 2.9868, |
| "step": 1332000 |
| }, |
| { |
| "epoch": 10.92, |
| "learning_rate": 5.6517450558121954e-06, |
| "loss": 2.9833, |
| "step": 1332500 |
| }, |
| { |
| "epoch": 10.92, |
| "learning_rate": 5.6501011030922756e-06, |
| "loss": 2.9865, |
| "step": 1333000 |
| }, |
| { |
| "epoch": 10.93, |
| "learning_rate": 5.6484571503723565e-06, |
| "loss": 2.9829, |
| "step": 1333500 |
| }, |
| { |
| "epoch": 10.93, |
| "learning_rate": 5.646813197652437e-06, |
| "loss": 2.9747, |
| "step": 1334000 |
| }, |
| { |
| "epoch": 10.93, |
| "learning_rate": 5.645169244932516e-06, |
| "loss": 2.9909, |
| "step": 1334500 |
| }, |
| { |
| "epoch": 10.94, |
| "learning_rate": 5.643525292212596e-06, |
| "loss": 2.9836, |
| "step": 1335000 |
| }, |
| { |
| "epoch": 10.94, |
| "learning_rate": 5.641881339492676e-06, |
| "loss": 2.9859, |
| "step": 1335500 |
| }, |
| { |
| "epoch": 10.95, |
| "learning_rate": 5.640237386772756e-06, |
| "loss": 2.9849, |
| "step": 1336000 |
| }, |
| { |
| "epoch": 10.95, |
| "learning_rate": 5.638593434052837e-06, |
| "loss": 2.9826, |
| "step": 1336500 |
| }, |
| { |
| "epoch": 10.95, |
| "learning_rate": 5.636949481332917e-06, |
| "loss": 2.9834, |
| "step": 1337000 |
| }, |
| { |
| "epoch": 10.96, |
| "learning_rate": 5.6353055286129975e-06, |
| "loss": 2.9836, |
| "step": 1337500 |
| }, |
| { |
| "epoch": 10.96, |
| "learning_rate": 5.633661575893078e-06, |
| "loss": 2.9822, |
| "step": 1338000 |
| }, |
| { |
| "epoch": 10.97, |
| "learning_rate": 5.632017623173159e-06, |
| "loss": 2.9908, |
| "step": 1338500 |
| }, |
| { |
| "epoch": 10.97, |
| "learning_rate": 5.630373670453239e-06, |
| "loss": 2.9815, |
| "step": 1339000 |
| }, |
| { |
| "epoch": 10.97, |
| "learning_rate": 5.628729717733319e-06, |
| "loss": 2.9858, |
| "step": 1339500 |
| }, |
| { |
| "epoch": 10.98, |
| "learning_rate": 5.627085765013399e-06, |
| "loss": 2.9802, |
| "step": 1340000 |
| }, |
| { |
| "epoch": 10.98, |
| "learning_rate": 5.625441812293478e-06, |
| "loss": 2.9778, |
| "step": 1340500 |
| }, |
| { |
| "epoch": 10.99, |
| "learning_rate": 5.623797859573558e-06, |
| "loss": 2.9915, |
| "step": 1341000 |
| }, |
| { |
| "epoch": 10.99, |
| "learning_rate": 5.622153906853639e-06, |
| "loss": 2.9819, |
| "step": 1341500 |
| }, |
| { |
| "epoch": 10.99, |
| "learning_rate": 5.6205099541337195e-06, |
| "loss": 2.9803, |
| "step": 1342000 |
| }, |
| { |
| "epoch": 11.0, |
| "learning_rate": 5.6188660014138e-06, |
| "loss": 2.9769, |
| "step": 1342500 |
| }, |
| { |
| "epoch": 11.0, |
| "learning_rate": 5.61722204869388e-06, |
| "loss": 2.9809, |
| "step": 1343000 |
| }, |
| { |
| "epoch": 11.01, |
| "learning_rate": 5.61557809597396e-06, |
| "loss": 2.981, |
| "step": 1343500 |
| }, |
| { |
| "epoch": 11.01, |
| "learning_rate": 5.613934143254041e-06, |
| "loss": 2.9829, |
| "step": 1344000 |
| }, |
| { |
| "epoch": 11.02, |
| "learning_rate": 5.612290190534121e-06, |
| "loss": 2.9789, |
| "step": 1344500 |
| }, |
| { |
| "epoch": 11.02, |
| "learning_rate": 5.610646237814201e-06, |
| "loss": 2.9819, |
| "step": 1345000 |
| }, |
| { |
| "epoch": 11.02, |
| "learning_rate": 5.609002285094281e-06, |
| "loss": 2.978, |
| "step": 1345500 |
| }, |
| { |
| "epoch": 11.03, |
| "learning_rate": 5.607358332374362e-06, |
| "loss": 2.987, |
| "step": 1346000 |
| }, |
| { |
| "epoch": 11.03, |
| "learning_rate": 5.6057143796544415e-06, |
| "loss": 2.9808, |
| "step": 1346500 |
| }, |
| { |
| "epoch": 11.04, |
| "learning_rate": 5.604070426934522e-06, |
| "loss": 2.9786, |
| "step": 1347000 |
| }, |
| { |
| "epoch": 11.04, |
| "learning_rate": 5.602426474214602e-06, |
| "loss": 2.9847, |
| "step": 1347500 |
| }, |
| { |
| "epoch": 11.04, |
| "learning_rate": 5.600782521494682e-06, |
| "loss": 2.9829, |
| "step": 1348000 |
| }, |
| { |
| "epoch": 11.05, |
| "learning_rate": 5.599138568774762e-06, |
| "loss": 2.9851, |
| "step": 1348500 |
| }, |
| { |
| "epoch": 11.05, |
| "learning_rate": 5.597494616054843e-06, |
| "loss": 2.9856, |
| "step": 1349000 |
| }, |
| { |
| "epoch": 11.06, |
| "learning_rate": 5.595850663334923e-06, |
| "loss": 2.9804, |
| "step": 1349500 |
| }, |
| { |
| "epoch": 11.06, |
| "learning_rate": 5.594206710615003e-06, |
| "loss": 2.978, |
| "step": 1350000 |
| }, |
| { |
| "epoch": 11.06, |
| "eval_accuracy": 0.4923695335880713, |
| "eval_loss": 2.841365337371826, |
| "eval_runtime": 412.3409, |
| "eval_samples_per_second": 747.818, |
| "eval_steps_per_second": 15.582, |
| "step": 1350000 |
| }, |
| { |
| "epoch": 11.06, |
| "learning_rate": 5.592562757895083e-06, |
| "loss": 2.9795, |
| "step": 1350500 |
| }, |
| { |
| "epoch": 11.07, |
| "learning_rate": 5.590918805175164e-06, |
| "loss": 2.979, |
| "step": 1351000 |
| }, |
| { |
| "epoch": 11.07, |
| "learning_rate": 5.5892748524552445e-06, |
| "loss": 2.9805, |
| "step": 1351500 |
| }, |
| { |
| "epoch": 11.08, |
| "learning_rate": 5.587630899735325e-06, |
| "loss": 2.9765, |
| "step": 1352000 |
| }, |
| { |
| "epoch": 11.08, |
| "learning_rate": 5.585986947015404e-06, |
| "loss": 2.984, |
| "step": 1352500 |
| }, |
| { |
| "epoch": 11.08, |
| "learning_rate": 5.584342994295484e-06, |
| "loss": 2.9842, |
| "step": 1353000 |
| }, |
| { |
| "epoch": 11.09, |
| "learning_rate": 5.582699041575564e-06, |
| "loss": 2.9697, |
| "step": 1353500 |
| }, |
| { |
| "epoch": 11.09, |
| "learning_rate": 5.581055088855645e-06, |
| "loss": 2.9756, |
| "step": 1354000 |
| }, |
| { |
| "epoch": 11.1, |
| "learning_rate": 5.579411136135725e-06, |
| "loss": 2.974, |
| "step": 1354500 |
| }, |
| { |
| "epoch": 11.1, |
| "learning_rate": 5.577767183415805e-06, |
| "loss": 2.9776, |
| "step": 1355000 |
| }, |
| { |
| "epoch": 11.11, |
| "learning_rate": 5.5761232306958855e-06, |
| "loss": 2.9763, |
| "step": 1355500 |
| }, |
| { |
| "epoch": 11.11, |
| "learning_rate": 5.574479277975966e-06, |
| "loss": 2.9764, |
| "step": 1356000 |
| }, |
| { |
| "epoch": 11.11, |
| "learning_rate": 5.572835325256047e-06, |
| "loss": 2.9828, |
| "step": 1356500 |
| }, |
| { |
| "epoch": 11.12, |
| "learning_rate": 5.571191372536127e-06, |
| "loss": 2.9758, |
| "step": 1357000 |
| }, |
| { |
| "epoch": 11.12, |
| "learning_rate": 5.569547419816207e-06, |
| "loss": 2.979, |
| "step": 1357500 |
| }, |
| { |
| "epoch": 11.13, |
| "learning_rate": 5.567903467096287e-06, |
| "loss": 2.9827, |
| "step": 1358000 |
| }, |
| { |
| "epoch": 11.13, |
| "learning_rate": 5.566259514376366e-06, |
| "loss": 2.9817, |
| "step": 1358500 |
| }, |
| { |
| "epoch": 11.13, |
| "learning_rate": 5.564615561656446e-06, |
| "loss": 2.9774, |
| "step": 1359000 |
| }, |
| { |
| "epoch": 11.14, |
| "learning_rate": 5.562971608936527e-06, |
| "loss": 2.9756, |
| "step": 1359500 |
| }, |
| { |
| "epoch": 11.14, |
| "learning_rate": 5.5613276562166075e-06, |
| "loss": 2.9787, |
| "step": 1360000 |
| }, |
| { |
| "epoch": 11.15, |
| "learning_rate": 5.559683703496688e-06, |
| "loss": 2.9761, |
| "step": 1360500 |
| }, |
| { |
| "epoch": 11.15, |
| "learning_rate": 5.558039750776768e-06, |
| "loss": 2.9789, |
| "step": 1361000 |
| }, |
| { |
| "epoch": 11.15, |
| "learning_rate": 5.556395798056849e-06, |
| "loss": 2.9803, |
| "step": 1361500 |
| }, |
| { |
| "epoch": 11.16, |
| "learning_rate": 5.554751845336929e-06, |
| "loss": 2.9784, |
| "step": 1362000 |
| }, |
| { |
| "epoch": 11.16, |
| "learning_rate": 5.553107892617009e-06, |
| "loss": 2.9759, |
| "step": 1362500 |
| }, |
| { |
| "epoch": 11.17, |
| "learning_rate": 5.551463939897089e-06, |
| "loss": 2.9823, |
| "step": 1363000 |
| }, |
| { |
| "epoch": 11.17, |
| "learning_rate": 5.54981998717717e-06, |
| "loss": 2.9767, |
| "step": 1363500 |
| }, |
| { |
| "epoch": 11.18, |
| "learning_rate": 5.54817603445725e-06, |
| "loss": 2.978, |
| "step": 1364000 |
| }, |
| { |
| "epoch": 11.18, |
| "learning_rate": 5.5465320817373294e-06, |
| "loss": 2.9843, |
| "step": 1364500 |
| }, |
| { |
| "epoch": 11.18, |
| "learning_rate": 5.5448881290174096e-06, |
| "loss": 2.9708, |
| "step": 1365000 |
| }, |
| { |
| "epoch": 11.19, |
| "learning_rate": 5.54324417629749e-06, |
| "loss": 2.9756, |
| "step": 1365500 |
| }, |
| { |
| "epoch": 11.19, |
| "learning_rate": 5.54160022357757e-06, |
| "loss": 2.973, |
| "step": 1366000 |
| }, |
| { |
| "epoch": 11.2, |
| "learning_rate": 5.539956270857651e-06, |
| "loss": 2.981, |
| "step": 1366500 |
| }, |
| { |
| "epoch": 11.2, |
| "learning_rate": 5.538312318137731e-06, |
| "loss": 2.971, |
| "step": 1367000 |
| }, |
| { |
| "epoch": 11.2, |
| "learning_rate": 5.536668365417811e-06, |
| "loss": 2.9819, |
| "step": 1367500 |
| }, |
| { |
| "epoch": 11.21, |
| "learning_rate": 5.535024412697891e-06, |
| "loss": 2.975, |
| "step": 1368000 |
| }, |
| { |
| "epoch": 11.21, |
| "learning_rate": 5.533380459977971e-06, |
| "loss": 2.9697, |
| "step": 1368500 |
| }, |
| { |
| "epoch": 11.22, |
| "learning_rate": 5.531736507258052e-06, |
| "loss": 2.9745, |
| "step": 1369000 |
| }, |
| { |
| "epoch": 11.22, |
| "learning_rate": 5.530092554538132e-06, |
| "loss": 2.9812, |
| "step": 1369500 |
| }, |
| { |
| "epoch": 11.22, |
| "learning_rate": 5.5284486018182125e-06, |
| "loss": 2.9816, |
| "step": 1370000 |
| }, |
| { |
| "epoch": 11.23, |
| "learning_rate": 5.526804649098292e-06, |
| "loss": 2.9712, |
| "step": 1370500 |
| }, |
| { |
| "epoch": 11.23, |
| "learning_rate": 5.525160696378372e-06, |
| "loss": 2.9771, |
| "step": 1371000 |
| }, |
| { |
| "epoch": 11.24, |
| "learning_rate": 5.523516743658452e-06, |
| "loss": 2.9747, |
| "step": 1371500 |
| }, |
| { |
| "epoch": 11.24, |
| "learning_rate": 5.521872790938533e-06, |
| "loss": 2.9702, |
| "step": 1372000 |
| }, |
| { |
| "epoch": 11.24, |
| "learning_rate": 5.520228838218613e-06, |
| "loss": 2.9757, |
| "step": 1372500 |
| }, |
| { |
| "epoch": 11.25, |
| "learning_rate": 5.518584885498693e-06, |
| "loss": 2.9752, |
| "step": 1373000 |
| }, |
| { |
| "epoch": 11.25, |
| "learning_rate": 5.516940932778773e-06, |
| "loss": 2.9699, |
| "step": 1373500 |
| }, |
| { |
| "epoch": 11.26, |
| "learning_rate": 5.515296980058854e-06, |
| "loss": 2.9753, |
| "step": 1374000 |
| }, |
| { |
| "epoch": 11.26, |
| "learning_rate": 5.5136530273389345e-06, |
| "loss": 2.9799, |
| "step": 1374500 |
| }, |
| { |
| "epoch": 11.27, |
| "learning_rate": 5.512009074619015e-06, |
| "loss": 2.9813, |
| "step": 1375000 |
| }, |
| { |
| "epoch": 11.27, |
| "learning_rate": 5.510365121899095e-06, |
| "loss": 2.9736, |
| "step": 1375500 |
| }, |
| { |
| "epoch": 11.27, |
| "learning_rate": 5.508721169179175e-06, |
| "loss": 2.9802, |
| "step": 1376000 |
| }, |
| { |
| "epoch": 11.28, |
| "learning_rate": 5.507077216459256e-06, |
| "loss": 2.9686, |
| "step": 1376500 |
| }, |
| { |
| "epoch": 11.28, |
| "learning_rate": 5.505433263739335e-06, |
| "loss": 2.9786, |
| "step": 1377000 |
| }, |
| { |
| "epoch": 11.29, |
| "learning_rate": 5.503789311019415e-06, |
| "loss": 2.9808, |
| "step": 1377500 |
| }, |
| { |
| "epoch": 11.29, |
| "learning_rate": 5.502145358299495e-06, |
| "loss": 2.9748, |
| "step": 1378000 |
| }, |
| { |
| "epoch": 11.29, |
| "learning_rate": 5.5005014055795755e-06, |
| "loss": 2.9771, |
| "step": 1378500 |
| }, |
| { |
| "epoch": 11.3, |
| "learning_rate": 5.4988574528596565e-06, |
| "loss": 2.9765, |
| "step": 1379000 |
| }, |
| { |
| "epoch": 11.3, |
| "learning_rate": 5.497213500139737e-06, |
| "loss": 2.9745, |
| "step": 1379500 |
| }, |
| { |
| "epoch": 11.31, |
| "learning_rate": 5.495569547419817e-06, |
| "loss": 2.9709, |
| "step": 1380000 |
| }, |
| { |
| "epoch": 11.31, |
| "eval_accuracy": 0.4926664563281789, |
| "eval_loss": 2.8374874591827393, |
| "eval_runtime": 405.084, |
| "eval_samples_per_second": 761.215, |
| "eval_steps_per_second": 15.861, |
| "step": 1380000 |
| }, |
| { |
| "epoch": 11.31, |
| "learning_rate": 5.493925594699897e-06, |
| "loss": 2.9771, |
| "step": 1380500 |
| }, |
| { |
| "epoch": 11.31, |
| "learning_rate": 5.492281641979977e-06, |
| "loss": 2.9832, |
| "step": 1381000 |
| }, |
| { |
| "epoch": 11.32, |
| "learning_rate": 5.490637689260058e-06, |
| "loss": 2.9741, |
| "step": 1381500 |
| }, |
| { |
| "epoch": 11.32, |
| "learning_rate": 5.488993736540138e-06, |
| "loss": 2.9815, |
| "step": 1382000 |
| }, |
| { |
| "epoch": 11.33, |
| "learning_rate": 5.487349783820218e-06, |
| "loss": 2.9758, |
| "step": 1382500 |
| }, |
| { |
| "epoch": 11.33, |
| "learning_rate": 5.4857058311002975e-06, |
| "loss": 2.9791, |
| "step": 1383000 |
| }, |
| { |
| "epoch": 11.33, |
| "learning_rate": 5.484061878380378e-06, |
| "loss": 2.9798, |
| "step": 1383500 |
| }, |
| { |
| "epoch": 11.34, |
| "learning_rate": 5.482417925660458e-06, |
| "loss": 2.9725, |
| "step": 1384000 |
| }, |
| { |
| "epoch": 11.34, |
| "learning_rate": 5.480773972940539e-06, |
| "loss": 2.975, |
| "step": 1384500 |
| }, |
| { |
| "epoch": 11.35, |
| "learning_rate": 5.479130020220619e-06, |
| "loss": 2.9717, |
| "step": 1385000 |
| }, |
| { |
| "epoch": 11.35, |
| "learning_rate": 5.477486067500699e-06, |
| "loss": 2.9701, |
| "step": 1385500 |
| }, |
| { |
| "epoch": 11.36, |
| "learning_rate": 5.475842114780779e-06, |
| "loss": 2.9725, |
| "step": 1386000 |
| }, |
| { |
| "epoch": 11.36, |
| "learning_rate": 5.47419816206086e-06, |
| "loss": 2.9766, |
| "step": 1386500 |
| }, |
| { |
| "epoch": 11.36, |
| "learning_rate": 5.47255420934094e-06, |
| "loss": 2.9762, |
| "step": 1387000 |
| }, |
| { |
| "epoch": 11.37, |
| "learning_rate": 5.47091025662102e-06, |
| "loss": 2.968, |
| "step": 1387500 |
| }, |
| { |
| "epoch": 11.37, |
| "learning_rate": 5.4692663039011005e-06, |
| "loss": 2.9754, |
| "step": 1388000 |
| }, |
| { |
| "epoch": 11.38, |
| "learning_rate": 5.467622351181181e-06, |
| "loss": 2.9663, |
| "step": 1388500 |
| }, |
| { |
| "epoch": 11.38, |
| "learning_rate": 5.46597839846126e-06, |
| "loss": 2.973, |
| "step": 1389000 |
| }, |
| { |
| "epoch": 11.38, |
| "learning_rate": 5.464334445741341e-06, |
| "loss": 2.9739, |
| "step": 1389500 |
| }, |
| { |
| "epoch": 11.39, |
| "learning_rate": 5.462690493021421e-06, |
| "loss": 2.9662, |
| "step": 1390000 |
| }, |
| { |
| "epoch": 11.39, |
| "learning_rate": 5.461046540301501e-06, |
| "loss": 2.9719, |
| "step": 1390500 |
| }, |
| { |
| "epoch": 11.4, |
| "learning_rate": 5.459402587581581e-06, |
| "loss": 2.9817, |
| "step": 1391000 |
| }, |
| { |
| "epoch": 11.4, |
| "learning_rate": 5.457758634861662e-06, |
| "loss": 2.9718, |
| "step": 1391500 |
| }, |
| { |
| "epoch": 11.4, |
| "learning_rate": 5.456114682141742e-06, |
| "loss": 2.9796, |
| "step": 1392000 |
| }, |
| { |
| "epoch": 11.41, |
| "learning_rate": 5.4544707294218225e-06, |
| "loss": 2.972, |
| "step": 1392500 |
| }, |
| { |
| "epoch": 11.41, |
| "learning_rate": 5.452826776701903e-06, |
| "loss": 2.9755, |
| "step": 1393000 |
| }, |
| { |
| "epoch": 11.42, |
| "learning_rate": 5.451182823981983e-06, |
| "loss": 2.9731, |
| "step": 1393500 |
| }, |
| { |
| "epoch": 11.42, |
| "learning_rate": 5.449538871262064e-06, |
| "loss": 2.9719, |
| "step": 1394000 |
| }, |
| { |
| "epoch": 11.42, |
| "learning_rate": 5.447894918542144e-06, |
| "loss": 2.9669, |
| "step": 1394500 |
| }, |
| { |
| "epoch": 11.43, |
| "learning_rate": 5.446250965822223e-06, |
| "loss": 2.974, |
| "step": 1395000 |
| }, |
| { |
| "epoch": 11.43, |
| "learning_rate": 5.444607013102303e-06, |
| "loss": 2.9758, |
| "step": 1395500 |
| }, |
| { |
| "epoch": 11.44, |
| "learning_rate": 5.442963060382383e-06, |
| "loss": 2.9695, |
| "step": 1396000 |
| }, |
| { |
| "epoch": 11.44, |
| "learning_rate": 5.4413191076624635e-06, |
| "loss": 2.9685, |
| "step": 1396500 |
| }, |
| { |
| "epoch": 11.45, |
| "learning_rate": 5.4396751549425444e-06, |
| "loss": 2.9747, |
| "step": 1397000 |
| }, |
| { |
| "epoch": 11.45, |
| "learning_rate": 5.4380312022226246e-06, |
| "loss": 2.973, |
| "step": 1397500 |
| }, |
| { |
| "epoch": 11.45, |
| "learning_rate": 5.436387249502705e-06, |
| "loss": 2.9753, |
| "step": 1398000 |
| }, |
| { |
| "epoch": 11.46, |
| "learning_rate": 5.434743296782785e-06, |
| "loss": 2.9692, |
| "step": 1398500 |
| }, |
| { |
| "epoch": 11.46, |
| "learning_rate": 5.433099344062866e-06, |
| "loss": 2.9772, |
| "step": 1399000 |
| }, |
| { |
| "epoch": 11.47, |
| "learning_rate": 5.431455391342946e-06, |
| "loss": 2.9683, |
| "step": 1399500 |
| }, |
| { |
| "epoch": 11.47, |
| "learning_rate": 5.429811438623026e-06, |
| "loss": 2.9707, |
| "step": 1400000 |
| }, |
| { |
| "epoch": 11.47, |
| "learning_rate": 5.428167485903106e-06, |
| "loss": 2.9687, |
| "step": 1400500 |
| }, |
| { |
| "epoch": 11.48, |
| "learning_rate": 5.4265235331831855e-06, |
| "loss": 2.9696, |
| "step": 1401000 |
| }, |
| { |
| "epoch": 11.48, |
| "learning_rate": 5.424879580463266e-06, |
| "loss": 2.9797, |
| "step": 1401500 |
| }, |
| { |
| "epoch": 11.49, |
| "learning_rate": 5.4232356277433466e-06, |
| "loss": 2.9733, |
| "step": 1402000 |
| }, |
| { |
| "epoch": 11.49, |
| "learning_rate": 5.421591675023427e-06, |
| "loss": 2.9671, |
| "step": 1402500 |
| }, |
| { |
| "epoch": 11.49, |
| "learning_rate": 5.419947722303507e-06, |
| "loss": 2.978, |
| "step": 1403000 |
| }, |
| { |
| "epoch": 11.5, |
| "learning_rate": 5.418303769583587e-06, |
| "loss": 2.9727, |
| "step": 1403500 |
| }, |
| { |
| "epoch": 11.5, |
| "learning_rate": 5.416659816863667e-06, |
| "loss": 2.9745, |
| "step": 1404000 |
| }, |
| { |
| "epoch": 11.51, |
| "learning_rate": 5.415015864143748e-06, |
| "loss": 2.9711, |
| "step": 1404500 |
| }, |
| { |
| "epoch": 11.51, |
| "learning_rate": 5.413371911423828e-06, |
| "loss": 2.9749, |
| "step": 1405000 |
| }, |
| { |
| "epoch": 11.52, |
| "learning_rate": 5.411727958703908e-06, |
| "loss": 2.972, |
| "step": 1405500 |
| }, |
| { |
| "epoch": 11.52, |
| "learning_rate": 5.410084005983988e-06, |
| "loss": 2.9717, |
| "step": 1406000 |
| }, |
| { |
| "epoch": 11.52, |
| "learning_rate": 5.408440053264069e-06, |
| "loss": 2.9745, |
| "step": 1406500 |
| }, |
| { |
| "epoch": 11.53, |
| "learning_rate": 5.406796100544149e-06, |
| "loss": 2.9727, |
| "step": 1407000 |
| }, |
| { |
| "epoch": 11.53, |
| "learning_rate": 5.405152147824229e-06, |
| "loss": 2.9724, |
| "step": 1407500 |
| }, |
| { |
| "epoch": 11.54, |
| "learning_rate": 5.403508195104309e-06, |
| "loss": 2.9654, |
| "step": 1408000 |
| }, |
| { |
| "epoch": 11.54, |
| "learning_rate": 5.401864242384389e-06, |
| "loss": 2.9749, |
| "step": 1408500 |
| }, |
| { |
| "epoch": 11.54, |
| "learning_rate": 5.400220289664469e-06, |
| "loss": 2.971, |
| "step": 1409000 |
| }, |
| { |
| "epoch": 11.55, |
| "learning_rate": 5.39857633694455e-06, |
| "loss": 2.9723, |
| "step": 1409500 |
| }, |
| { |
| "epoch": 11.55, |
| "learning_rate": 5.39693238422463e-06, |
| "loss": 2.9695, |
| "step": 1410000 |
| }, |
| { |
| "epoch": 11.55, |
| "eval_accuracy": 0.49317043725147325, |
| "eval_loss": 2.835310459136963, |
| "eval_runtime": 406.1684, |
| "eval_samples_per_second": 759.183, |
| "eval_steps_per_second": 15.819, |
| "step": 1410000 |
| }, |
| { |
| "epoch": 11.56, |
| "learning_rate": 5.39528843150471e-06, |
| "loss": 2.9687, |
| "step": 1410500 |
| }, |
| { |
| "epoch": 11.56, |
| "learning_rate": 5.3936444787847905e-06, |
| "loss": 2.9696, |
| "step": 1411000 |
| }, |
| { |
| "epoch": 11.56, |
| "learning_rate": 5.3920005260648715e-06, |
| "loss": 2.9737, |
| "step": 1411500 |
| }, |
| { |
| "epoch": 11.57, |
| "learning_rate": 5.390356573344952e-06, |
| "loss": 2.9715, |
| "step": 1412000 |
| }, |
| { |
| "epoch": 11.57, |
| "learning_rate": 5.388712620625032e-06, |
| "loss": 2.971, |
| "step": 1412500 |
| }, |
| { |
| "epoch": 11.58, |
| "learning_rate": 5.387068667905111e-06, |
| "loss": 2.9643, |
| "step": 1413000 |
| }, |
| { |
| "epoch": 11.58, |
| "learning_rate": 5.385424715185191e-06, |
| "loss": 2.9683, |
| "step": 1413500 |
| }, |
| { |
| "epoch": 11.58, |
| "learning_rate": 5.383780762465271e-06, |
| "loss": 2.9697, |
| "step": 1414000 |
| }, |
| { |
| "epoch": 11.59, |
| "learning_rate": 5.382136809745352e-06, |
| "loss": 2.9724, |
| "step": 1414500 |
| }, |
| { |
| "epoch": 11.59, |
| "learning_rate": 5.380492857025432e-06, |
| "loss": 2.9723, |
| "step": 1415000 |
| }, |
| { |
| "epoch": 11.6, |
| "learning_rate": 5.3788489043055125e-06, |
| "loss": 2.9665, |
| "step": 1415500 |
| }, |
| { |
| "epoch": 11.6, |
| "learning_rate": 5.377204951585593e-06, |
| "loss": 2.9792, |
| "step": 1416000 |
| }, |
| { |
| "epoch": 11.61, |
| "learning_rate": 5.375560998865673e-06, |
| "loss": 2.9728, |
| "step": 1416500 |
| }, |
| { |
| "epoch": 11.61, |
| "learning_rate": 5.373917046145754e-06, |
| "loss": 2.9705, |
| "step": 1417000 |
| }, |
| { |
| "epoch": 11.61, |
| "learning_rate": 5.372273093425834e-06, |
| "loss": 2.9694, |
| "step": 1417500 |
| }, |
| { |
| "epoch": 11.62, |
| "learning_rate": 5.370629140705914e-06, |
| "loss": 2.9662, |
| "step": 1418000 |
| }, |
| { |
| "epoch": 11.62, |
| "learning_rate": 5.368985187985994e-06, |
| "loss": 2.9696, |
| "step": 1418500 |
| }, |
| { |
| "epoch": 11.63, |
| "learning_rate": 5.367341235266073e-06, |
| "loss": 2.9668, |
| "step": 1419000 |
| }, |
| { |
| "epoch": 11.63, |
| "learning_rate": 5.365697282546154e-06, |
| "loss": 2.9754, |
| "step": 1419500 |
| }, |
| { |
| "epoch": 11.63, |
| "learning_rate": 5.3640533298262345e-06, |
| "loss": 2.9687, |
| "step": 1420000 |
| }, |
| { |
| "epoch": 11.64, |
| "learning_rate": 5.362409377106315e-06, |
| "loss": 2.9657, |
| "step": 1420500 |
| }, |
| { |
| "epoch": 11.64, |
| "learning_rate": 5.360765424386395e-06, |
| "loss": 2.9687, |
| "step": 1421000 |
| }, |
| { |
| "epoch": 11.65, |
| "learning_rate": 5.359121471666475e-06, |
| "loss": 2.963, |
| "step": 1421500 |
| }, |
| { |
| "epoch": 11.65, |
| "learning_rate": 5.357477518946556e-06, |
| "loss": 2.9688, |
| "step": 1422000 |
| }, |
| { |
| "epoch": 11.65, |
| "learning_rate": 5.355833566226636e-06, |
| "loss": 2.9654, |
| "step": 1422500 |
| }, |
| { |
| "epoch": 11.66, |
| "learning_rate": 5.354189613506716e-06, |
| "loss": 2.973, |
| "step": 1423000 |
| }, |
| { |
| "epoch": 11.66, |
| "learning_rate": 5.352545660786796e-06, |
| "loss": 2.9717, |
| "step": 1423500 |
| }, |
| { |
| "epoch": 11.67, |
| "learning_rate": 5.350901708066877e-06, |
| "loss": 2.9654, |
| "step": 1424000 |
| }, |
| { |
| "epoch": 11.67, |
| "learning_rate": 5.349257755346957e-06, |
| "loss": 2.96, |
| "step": 1424500 |
| }, |
| { |
| "epoch": 11.67, |
| "learning_rate": 5.347613802627037e-06, |
| "loss": 2.9653, |
| "step": 1425000 |
| }, |
| { |
| "epoch": 11.68, |
| "learning_rate": 5.345969849907117e-06, |
| "loss": 2.9667, |
| "step": 1425500 |
| }, |
| { |
| "epoch": 11.68, |
| "learning_rate": 5.344325897187197e-06, |
| "loss": 2.9697, |
| "step": 1426000 |
| }, |
| { |
| "epoch": 11.69, |
| "learning_rate": 5.342681944467277e-06, |
| "loss": 2.9725, |
| "step": 1426500 |
| }, |
| { |
| "epoch": 11.69, |
| "learning_rate": 5.341037991747358e-06, |
| "loss": 2.9673, |
| "step": 1427000 |
| }, |
| { |
| "epoch": 11.7, |
| "learning_rate": 5.339394039027438e-06, |
| "loss": 2.9679, |
| "step": 1427500 |
| }, |
| { |
| "epoch": 11.7, |
| "learning_rate": 5.337750086307518e-06, |
| "loss": 2.9735, |
| "step": 1428000 |
| }, |
| { |
| "epoch": 11.7, |
| "learning_rate": 5.336106133587598e-06, |
| "loss": 2.9692, |
| "step": 1428500 |
| }, |
| { |
| "epoch": 11.71, |
| "learning_rate": 5.3344621808676785e-06, |
| "loss": 2.9707, |
| "step": 1429000 |
| }, |
| { |
| "epoch": 11.71, |
| "learning_rate": 5.3328182281477594e-06, |
| "loss": 2.9742, |
| "step": 1429500 |
| }, |
| { |
| "epoch": 11.72, |
| "learning_rate": 5.3311742754278396e-06, |
| "loss": 2.9593, |
| "step": 1430000 |
| }, |
| { |
| "epoch": 11.72, |
| "learning_rate": 5.32953032270792e-06, |
| "loss": 2.9698, |
| "step": 1430500 |
| }, |
| { |
| "epoch": 11.72, |
| "learning_rate": 5.327886369988e-06, |
| "loss": 2.9685, |
| "step": 1431000 |
| }, |
| { |
| "epoch": 11.73, |
| "learning_rate": 5.326242417268079e-06, |
| "loss": 2.9763, |
| "step": 1431500 |
| }, |
| { |
| "epoch": 11.73, |
| "learning_rate": 5.324598464548159e-06, |
| "loss": 2.9721, |
| "step": 1432000 |
| }, |
| { |
| "epoch": 11.74, |
| "learning_rate": 5.32295451182824e-06, |
| "loss": 2.9659, |
| "step": 1432500 |
| }, |
| { |
| "epoch": 11.74, |
| "learning_rate": 5.32131055910832e-06, |
| "loss": 2.9669, |
| "step": 1433000 |
| }, |
| { |
| "epoch": 11.74, |
| "learning_rate": 5.3196666063884005e-06, |
| "loss": 2.9627, |
| "step": 1433500 |
| }, |
| { |
| "epoch": 11.75, |
| "learning_rate": 5.318022653668481e-06, |
| "loss": 2.9696, |
| "step": 1434000 |
| }, |
| { |
| "epoch": 11.75, |
| "learning_rate": 5.3163787009485616e-06, |
| "loss": 2.972, |
| "step": 1434500 |
| }, |
| { |
| "epoch": 11.76, |
| "learning_rate": 5.314734748228642e-06, |
| "loss": 2.9714, |
| "step": 1435000 |
| }, |
| { |
| "epoch": 11.76, |
| "learning_rate": 5.313090795508722e-06, |
| "loss": 2.9683, |
| "step": 1435500 |
| }, |
| { |
| "epoch": 11.76, |
| "learning_rate": 5.311446842788802e-06, |
| "loss": 2.9655, |
| "step": 1436000 |
| }, |
| { |
| "epoch": 11.77, |
| "learning_rate": 5.309802890068883e-06, |
| "loss": 2.9673, |
| "step": 1436500 |
| }, |
| { |
| "epoch": 11.77, |
| "learning_rate": 5.308158937348963e-06, |
| "loss": 2.9678, |
| "step": 1437000 |
| }, |
| { |
| "epoch": 11.78, |
| "learning_rate": 5.306514984629042e-06, |
| "loss": 2.969, |
| "step": 1437500 |
| }, |
| { |
| "epoch": 11.78, |
| "learning_rate": 5.3048710319091224e-06, |
| "loss": 2.9631, |
| "step": 1438000 |
| }, |
| { |
| "epoch": 11.79, |
| "learning_rate": 5.3032270791892026e-06, |
| "loss": 2.9681, |
| "step": 1438500 |
| }, |
| { |
| "epoch": 11.79, |
| "learning_rate": 5.301583126469283e-06, |
| "loss": 2.9658, |
| "step": 1439000 |
| }, |
| { |
| "epoch": 11.79, |
| "learning_rate": 5.299939173749364e-06, |
| "loss": 2.9621, |
| "step": 1439500 |
| }, |
| { |
| "epoch": 11.8, |
| "learning_rate": 5.298295221029444e-06, |
| "loss": 2.9607, |
| "step": 1440000 |
| }, |
| { |
| "epoch": 11.8, |
| "eval_accuracy": 0.4940959315942787, |
| "eval_loss": 2.829009771347046, |
| "eval_runtime": 405.0153, |
| "eval_samples_per_second": 761.344, |
| "eval_steps_per_second": 15.864, |
| "step": 1440000 |
| }, |
| { |
| "epoch": 11.8, |
| "learning_rate": 5.296651268309524e-06, |
| "loss": 2.9657, |
| "step": 1440500 |
| }, |
| { |
| "epoch": 11.81, |
| "learning_rate": 5.295007315589604e-06, |
| "loss": 2.9663, |
| "step": 1441000 |
| }, |
| { |
| "epoch": 11.81, |
| "learning_rate": 5.293363362869684e-06, |
| "loss": 2.9679, |
| "step": 1441500 |
| }, |
| { |
| "epoch": 11.81, |
| "learning_rate": 5.291719410149765e-06, |
| "loss": 2.9643, |
| "step": 1442000 |
| }, |
| { |
| "epoch": 11.82, |
| "learning_rate": 5.290075457429845e-06, |
| "loss": 2.975, |
| "step": 1442500 |
| }, |
| { |
| "epoch": 11.82, |
| "learning_rate": 5.288431504709925e-06, |
| "loss": 2.963, |
| "step": 1443000 |
| }, |
| { |
| "epoch": 11.83, |
| "learning_rate": 5.286787551990005e-06, |
| "loss": 2.9608, |
| "step": 1443500 |
| }, |
| { |
| "epoch": 11.83, |
| "learning_rate": 5.285143599270085e-06, |
| "loss": 2.962, |
| "step": 1444000 |
| }, |
| { |
| "epoch": 11.83, |
| "learning_rate": 5.283499646550165e-06, |
| "loss": 2.9687, |
| "step": 1444500 |
| }, |
| { |
| "epoch": 11.84, |
| "learning_rate": 5.281855693830246e-06, |
| "loss": 2.9767, |
| "step": 1445000 |
| }, |
| { |
| "epoch": 11.84, |
| "learning_rate": 5.280211741110326e-06, |
| "loss": 2.9664, |
| "step": 1445500 |
| }, |
| { |
| "epoch": 11.85, |
| "learning_rate": 5.278567788390406e-06, |
| "loss": 2.9695, |
| "step": 1446000 |
| }, |
| { |
| "epoch": 11.85, |
| "learning_rate": 5.276923835670486e-06, |
| "loss": 2.9689, |
| "step": 1446500 |
| }, |
| { |
| "epoch": 11.86, |
| "learning_rate": 5.275279882950567e-06, |
| "loss": 2.968, |
| "step": 1447000 |
| }, |
| { |
| "epoch": 11.86, |
| "learning_rate": 5.273635930230647e-06, |
| "loss": 2.9666, |
| "step": 1447500 |
| }, |
| { |
| "epoch": 11.86, |
| "learning_rate": 5.2719919775107275e-06, |
| "loss": 2.9656, |
| "step": 1448000 |
| }, |
| { |
| "epoch": 11.87, |
| "learning_rate": 5.270348024790808e-06, |
| "loss": 2.9618, |
| "step": 1448500 |
| }, |
| { |
| "epoch": 11.87, |
| "learning_rate": 5.268704072070888e-06, |
| "loss": 2.9627, |
| "step": 1449000 |
| }, |
| { |
| "epoch": 11.88, |
| "learning_rate": 5.267060119350967e-06, |
| "loss": 2.9654, |
| "step": 1449500 |
| }, |
| { |
| "epoch": 11.88, |
| "learning_rate": 5.265416166631048e-06, |
| "loss": 2.974, |
| "step": 1450000 |
| }, |
| { |
| "epoch": 11.88, |
| "learning_rate": 5.263772213911128e-06, |
| "loss": 2.9723, |
| "step": 1450500 |
| }, |
| { |
| "epoch": 11.89, |
| "learning_rate": 5.262128261191208e-06, |
| "loss": 2.9629, |
| "step": 1451000 |
| }, |
| { |
| "epoch": 11.89, |
| "learning_rate": 5.260484308471288e-06, |
| "loss": 2.9679, |
| "step": 1451500 |
| }, |
| { |
| "epoch": 11.9, |
| "learning_rate": 5.258840355751369e-06, |
| "loss": 2.9637, |
| "step": 1452000 |
| }, |
| { |
| "epoch": 11.9, |
| "learning_rate": 5.2571964030314495e-06, |
| "loss": 2.9677, |
| "step": 1452500 |
| }, |
| { |
| "epoch": 11.9, |
| "learning_rate": 5.25555245031153e-06, |
| "loss": 2.9707, |
| "step": 1453000 |
| }, |
| { |
| "epoch": 11.91, |
| "learning_rate": 5.25390849759161e-06, |
| "loss": 2.9706, |
| "step": 1453500 |
| }, |
| { |
| "epoch": 11.91, |
| "learning_rate": 5.25226454487169e-06, |
| "loss": 2.9627, |
| "step": 1454000 |
| }, |
| { |
| "epoch": 11.92, |
| "learning_rate": 5.250620592151771e-06, |
| "loss": 2.9631, |
| "step": 1454500 |
| }, |
| { |
| "epoch": 11.92, |
| "learning_rate": 5.248976639431851e-06, |
| "loss": 2.9652, |
| "step": 1455000 |
| }, |
| { |
| "epoch": 11.92, |
| "learning_rate": 5.24733268671193e-06, |
| "loss": 2.9613, |
| "step": 1455500 |
| }, |
| { |
| "epoch": 11.93, |
| "learning_rate": 5.24568873399201e-06, |
| "loss": 2.9683, |
| "step": 1456000 |
| }, |
| { |
| "epoch": 11.93, |
| "learning_rate": 5.2440447812720905e-06, |
| "loss": 2.954, |
| "step": 1456500 |
| }, |
| { |
| "epoch": 11.94, |
| "learning_rate": 5.242400828552171e-06, |
| "loss": 2.9646, |
| "step": 1457000 |
| }, |
| { |
| "epoch": 11.94, |
| "learning_rate": 5.240756875832252e-06, |
| "loss": 2.9694, |
| "step": 1457500 |
| }, |
| { |
| "epoch": 11.95, |
| "learning_rate": 5.239112923112332e-06, |
| "loss": 2.9651, |
| "step": 1458000 |
| }, |
| { |
| "epoch": 11.95, |
| "learning_rate": 5.237468970392412e-06, |
| "loss": 2.9675, |
| "step": 1458500 |
| }, |
| { |
| "epoch": 11.95, |
| "learning_rate": 5.235825017672492e-06, |
| "loss": 2.967, |
| "step": 1459000 |
| }, |
| { |
| "epoch": 11.96, |
| "learning_rate": 5.234181064952573e-06, |
| "loss": 2.9687, |
| "step": 1459500 |
| }, |
| { |
| "epoch": 11.96, |
| "learning_rate": 5.232537112232653e-06, |
| "loss": 2.9624, |
| "step": 1460000 |
| }, |
| { |
| "epoch": 11.97, |
| "learning_rate": 5.230893159512733e-06, |
| "loss": 2.9649, |
| "step": 1460500 |
| }, |
| { |
| "epoch": 11.97, |
| "learning_rate": 5.229249206792813e-06, |
| "loss": 2.9656, |
| "step": 1461000 |
| }, |
| { |
| "epoch": 11.97, |
| "learning_rate": 5.227605254072893e-06, |
| "loss": 2.9581, |
| "step": 1461500 |
| }, |
| { |
| "epoch": 11.98, |
| "learning_rate": 5.225961301352973e-06, |
| "loss": 2.964, |
| "step": 1462000 |
| }, |
| { |
| "epoch": 11.98, |
| "learning_rate": 5.224317348633054e-06, |
| "loss": 2.958, |
| "step": 1462500 |
| }, |
| { |
| "epoch": 11.99, |
| "learning_rate": 5.222673395913134e-06, |
| "loss": 2.9638, |
| "step": 1463000 |
| }, |
| { |
| "epoch": 11.99, |
| "learning_rate": 5.221029443193214e-06, |
| "loss": 2.9679, |
| "step": 1463500 |
| }, |
| { |
| "epoch": 11.99, |
| "learning_rate": 5.219385490473294e-06, |
| "loss": 2.9585, |
| "step": 1464000 |
| }, |
| { |
| "epoch": 12.0, |
| "learning_rate": 5.217741537753375e-06, |
| "loss": 2.9664, |
| "step": 1464500 |
| }, |
| { |
| "epoch": 12.0, |
| "learning_rate": 5.216097585033455e-06, |
| "loss": 2.9604, |
| "step": 1465000 |
| }, |
| { |
| "epoch": 12.01, |
| "learning_rate": 5.214453632313535e-06, |
| "loss": 2.9489, |
| "step": 1465500 |
| }, |
| { |
| "epoch": 12.01, |
| "learning_rate": 5.2128096795936155e-06, |
| "loss": 2.9678, |
| "step": 1466000 |
| }, |
| { |
| "epoch": 12.01, |
| "learning_rate": 5.211165726873696e-06, |
| "loss": 2.9626, |
| "step": 1466500 |
| }, |
| { |
| "epoch": 12.02, |
| "learning_rate": 5.2095217741537766e-06, |
| "loss": 2.9567, |
| "step": 1467000 |
| }, |
| { |
| "epoch": 12.02, |
| "learning_rate": 5.207877821433856e-06, |
| "loss": 2.9636, |
| "step": 1467500 |
| }, |
| { |
| "epoch": 12.03, |
| "learning_rate": 5.206233868713936e-06, |
| "loss": 2.9654, |
| "step": 1468000 |
| }, |
| { |
| "epoch": 12.03, |
| "learning_rate": 5.204589915994016e-06, |
| "loss": 2.9617, |
| "step": 1468500 |
| }, |
| { |
| "epoch": 12.04, |
| "learning_rate": 5.202945963274096e-06, |
| "loss": 2.959, |
| "step": 1469000 |
| }, |
| { |
| "epoch": 12.04, |
| "learning_rate": 5.201302010554176e-06, |
| "loss": 2.9599, |
| "step": 1469500 |
| }, |
| { |
| "epoch": 12.04, |
| "learning_rate": 5.199658057834257e-06, |
| "loss": 2.9636, |
| "step": 1470000 |
| }, |
| { |
| "epoch": 12.04, |
| "eval_accuracy": 0.494423367210693, |
| "eval_loss": 2.8266873359680176, |
| "eval_runtime": 407.8041, |
| "eval_samples_per_second": 756.138, |
| "eval_steps_per_second": 15.755, |
| "step": 1470000 |
| }, |
| { |
| "epoch": 12.05, |
| "learning_rate": 5.1980141051143374e-06, |
| "loss": 2.9612, |
| "step": 1470500 |
| }, |
| { |
| "epoch": 12.05, |
| "learning_rate": 5.1963701523944176e-06, |
| "loss": 2.9556, |
| "step": 1471000 |
| }, |
| { |
| "epoch": 12.06, |
| "learning_rate": 5.194726199674498e-06, |
| "loss": 2.9642, |
| "step": 1471500 |
| }, |
| { |
| "epoch": 12.06, |
| "learning_rate": 5.193082246954579e-06, |
| "loss": 2.9593, |
| "step": 1472000 |
| }, |
| { |
| "epoch": 12.06, |
| "learning_rate": 5.191438294234659e-06, |
| "loss": 2.9578, |
| "step": 1472500 |
| }, |
| { |
| "epoch": 12.07, |
| "learning_rate": 5.189794341514739e-06, |
| "loss": 2.962, |
| "step": 1473000 |
| }, |
| { |
| "epoch": 12.07, |
| "learning_rate": 5.188150388794818e-06, |
| "loss": 2.9613, |
| "step": 1473500 |
| }, |
| { |
| "epoch": 12.08, |
| "learning_rate": 5.186506436074898e-06, |
| "loss": 2.96, |
| "step": 1474000 |
| }, |
| { |
| "epoch": 12.08, |
| "learning_rate": 5.1848624833549785e-06, |
| "loss": 2.9622, |
| "step": 1474500 |
| }, |
| { |
| "epoch": 12.08, |
| "learning_rate": 5.1832185306350594e-06, |
| "loss": 2.9623, |
| "step": 1475000 |
| }, |
| { |
| "epoch": 12.09, |
| "learning_rate": 5.1815745779151396e-06, |
| "loss": 2.9627, |
| "step": 1475500 |
| }, |
| { |
| "epoch": 12.09, |
| "learning_rate": 5.17993062519522e-06, |
| "loss": 2.9649, |
| "step": 1476000 |
| }, |
| { |
| "epoch": 12.1, |
| "learning_rate": 5.1782866724753e-06, |
| "loss": 2.9676, |
| "step": 1476500 |
| }, |
| { |
| "epoch": 12.1, |
| "learning_rate": 5.17664271975538e-06, |
| "loss": 2.9661, |
| "step": 1477000 |
| }, |
| { |
| "epoch": 12.1, |
| "learning_rate": 5.174998767035461e-06, |
| "loss": 2.9663, |
| "step": 1477500 |
| }, |
| { |
| "epoch": 12.11, |
| "learning_rate": 5.173354814315541e-06, |
| "loss": 2.9647, |
| "step": 1478000 |
| }, |
| { |
| "epoch": 12.11, |
| "learning_rate": 5.171710861595621e-06, |
| "loss": 2.9625, |
| "step": 1478500 |
| }, |
| { |
| "epoch": 12.12, |
| "learning_rate": 5.170066908875701e-06, |
| "loss": 2.9613, |
| "step": 1479000 |
| }, |
| { |
| "epoch": 12.12, |
| "learning_rate": 5.168422956155782e-06, |
| "loss": 2.9616, |
| "step": 1479500 |
| }, |
| { |
| "epoch": 12.13, |
| "learning_rate": 5.1667790034358615e-06, |
| "loss": 2.9624, |
| "step": 1480000 |
| }, |
| { |
| "epoch": 12.13, |
| "learning_rate": 5.165135050715942e-06, |
| "loss": 2.9611, |
| "step": 1480500 |
| }, |
| { |
| "epoch": 12.13, |
| "learning_rate": 5.163491097996022e-06, |
| "loss": 2.9657, |
| "step": 1481000 |
| }, |
| { |
| "epoch": 12.14, |
| "learning_rate": 5.161847145276102e-06, |
| "loss": 2.9649, |
| "step": 1481500 |
| }, |
| { |
| "epoch": 12.14, |
| "learning_rate": 5.160203192556182e-06, |
| "loss": 2.9588, |
| "step": 1482000 |
| }, |
| { |
| "epoch": 12.15, |
| "learning_rate": 5.158559239836263e-06, |
| "loss": 2.9646, |
| "step": 1482500 |
| }, |
| { |
| "epoch": 12.15, |
| "learning_rate": 5.156915287116343e-06, |
| "loss": 2.968, |
| "step": 1483000 |
| }, |
| { |
| "epoch": 12.15, |
| "learning_rate": 5.155271334396423e-06, |
| "loss": 2.9627, |
| "step": 1483500 |
| }, |
| { |
| "epoch": 12.16, |
| "learning_rate": 5.153627381676503e-06, |
| "loss": 2.9611, |
| "step": 1484000 |
| }, |
| { |
| "epoch": 12.16, |
| "learning_rate": 5.151983428956584e-06, |
| "loss": 2.9668, |
| "step": 1484500 |
| }, |
| { |
| "epoch": 12.17, |
| "learning_rate": 5.1503394762366645e-06, |
| "loss": 2.954, |
| "step": 1485000 |
| }, |
| { |
| "epoch": 12.17, |
| "learning_rate": 5.148695523516745e-06, |
| "loss": 2.963, |
| "step": 1485500 |
| }, |
| { |
| "epoch": 12.17, |
| "learning_rate": 5.147051570796824e-06, |
| "loss": 2.9599, |
| "step": 1486000 |
| }, |
| { |
| "epoch": 12.18, |
| "learning_rate": 5.145407618076904e-06, |
| "loss": 2.9564, |
| "step": 1486500 |
| }, |
| { |
| "epoch": 12.18, |
| "learning_rate": 5.143763665356984e-06, |
| "loss": 2.9578, |
| "step": 1487000 |
| }, |
| { |
| "epoch": 12.19, |
| "learning_rate": 5.142119712637065e-06, |
| "loss": 2.9657, |
| "step": 1487500 |
| }, |
| { |
| "epoch": 12.19, |
| "learning_rate": 5.140475759917145e-06, |
| "loss": 2.9635, |
| "step": 1488000 |
| }, |
| { |
| "epoch": 12.2, |
| "learning_rate": 5.138831807197225e-06, |
| "loss": 2.966, |
| "step": 1488500 |
| }, |
| { |
| "epoch": 12.2, |
| "learning_rate": 5.1371878544773055e-06, |
| "loss": 2.96, |
| "step": 1489000 |
| }, |
| { |
| "epoch": 12.2, |
| "learning_rate": 5.135543901757386e-06, |
| "loss": 2.9557, |
| "step": 1489500 |
| }, |
| { |
| "epoch": 12.21, |
| "learning_rate": 5.133899949037467e-06, |
| "loss": 2.96, |
| "step": 1490000 |
| }, |
| { |
| "epoch": 12.21, |
| "learning_rate": 5.132255996317547e-06, |
| "loss": 2.967, |
| "step": 1490500 |
| }, |
| { |
| "epoch": 12.22, |
| "learning_rate": 5.130612043597627e-06, |
| "loss": 2.9576, |
| "step": 1491000 |
| }, |
| { |
| "epoch": 12.22, |
| "learning_rate": 5.128968090877707e-06, |
| "loss": 2.9589, |
| "step": 1491500 |
| }, |
| { |
| "epoch": 12.22, |
| "learning_rate": 5.127324138157786e-06, |
| "loss": 2.9561, |
| "step": 1492000 |
| }, |
| { |
| "epoch": 12.23, |
| "learning_rate": 5.125680185437867e-06, |
| "loss": 2.9592, |
| "step": 1492500 |
| }, |
| { |
| "epoch": 12.23, |
| "learning_rate": 5.124036232717947e-06, |
| "loss": 2.9611, |
| "step": 1493000 |
| }, |
| { |
| "epoch": 12.24, |
| "learning_rate": 5.1223922799980275e-06, |
| "loss": 2.9568, |
| "step": 1493500 |
| }, |
| { |
| "epoch": 12.24, |
| "learning_rate": 5.120748327278108e-06, |
| "loss": 2.96, |
| "step": 1494000 |
| }, |
| { |
| "epoch": 12.24, |
| "learning_rate": 5.119104374558188e-06, |
| "loss": 2.961, |
| "step": 1494500 |
| }, |
| { |
| "epoch": 12.25, |
| "learning_rate": 5.117460421838269e-06, |
| "loss": 2.9604, |
| "step": 1495000 |
| }, |
| { |
| "epoch": 12.25, |
| "learning_rate": 5.115816469118349e-06, |
| "loss": 2.9559, |
| "step": 1495500 |
| }, |
| { |
| "epoch": 12.26, |
| "learning_rate": 5.114172516398429e-06, |
| "loss": 2.9636, |
| "step": 1496000 |
| }, |
| { |
| "epoch": 12.26, |
| "learning_rate": 5.112528563678509e-06, |
| "loss": 2.9589, |
| "step": 1496500 |
| }, |
| { |
| "epoch": 12.26, |
| "learning_rate": 5.11088461095859e-06, |
| "loss": 2.9592, |
| "step": 1497000 |
| }, |
| { |
| "epoch": 12.27, |
| "learning_rate": 5.10924065823867e-06, |
| "loss": 2.9643, |
| "step": 1497500 |
| }, |
| { |
| "epoch": 12.27, |
| "learning_rate": 5.1075967055187495e-06, |
| "loss": 2.9587, |
| "step": 1498000 |
| }, |
| { |
| "epoch": 12.28, |
| "learning_rate": 5.10595275279883e-06, |
| "loss": 2.9601, |
| "step": 1498500 |
| }, |
| { |
| "epoch": 12.28, |
| "learning_rate": 5.10430880007891e-06, |
| "loss": 2.9574, |
| "step": 1499000 |
| }, |
| { |
| "epoch": 12.29, |
| "learning_rate": 5.10266484735899e-06, |
| "loss": 2.9626, |
| "step": 1499500 |
| }, |
| { |
| "epoch": 12.29, |
| "learning_rate": 5.101020894639071e-06, |
| "loss": 2.9584, |
| "step": 1500000 |
| }, |
| { |
| "epoch": 12.29, |
| "eval_accuracy": 0.49460218852402815, |
| "eval_loss": 2.8247358798980713, |
| "eval_runtime": 404.5149, |
| "eval_samples_per_second": 762.286, |
| "eval_steps_per_second": 15.883, |
| "step": 1500000 |
| }, |
| { |
| "epoch": 12.29, |
| "learning_rate": 5.099376941919151e-06, |
| "loss": 2.9567, |
| "step": 1500500 |
| }, |
| { |
| "epoch": 12.3, |
| "learning_rate": 5.097732989199231e-06, |
| "loss": 2.9534, |
| "step": 1501000 |
| }, |
| { |
| "epoch": 12.3, |
| "learning_rate": 5.096089036479311e-06, |
| "loss": 2.9594, |
| "step": 1501500 |
| }, |
| { |
| "epoch": 12.31, |
| "learning_rate": 5.094445083759391e-06, |
| "loss": 2.9505, |
| "step": 1502000 |
| }, |
| { |
| "epoch": 12.31, |
| "learning_rate": 5.092801131039472e-06, |
| "loss": 2.9665, |
| "step": 1502500 |
| }, |
| { |
| "epoch": 12.31, |
| "learning_rate": 5.0911571783195524e-06, |
| "loss": 2.9554, |
| "step": 1503000 |
| }, |
| { |
| "epoch": 12.32, |
| "learning_rate": 5.0895132255996326e-06, |
| "loss": 2.9596, |
| "step": 1503500 |
| }, |
| { |
| "epoch": 12.32, |
| "learning_rate": 5.087869272879712e-06, |
| "loss": 2.9574, |
| "step": 1504000 |
| }, |
| { |
| "epoch": 12.33, |
| "learning_rate": 5.086225320159792e-06, |
| "loss": 2.9596, |
| "step": 1504500 |
| }, |
| { |
| "epoch": 12.33, |
| "learning_rate": 5.084581367439872e-06, |
| "loss": 2.9548, |
| "step": 1505000 |
| }, |
| { |
| "epoch": 12.33, |
| "learning_rate": 5.082937414719953e-06, |
| "loss": 2.9619, |
| "step": 1505500 |
| }, |
| { |
| "epoch": 12.34, |
| "learning_rate": 5.081293462000033e-06, |
| "loss": 2.9556, |
| "step": 1506000 |
| }, |
| { |
| "epoch": 12.34, |
| "learning_rate": 5.079649509280113e-06, |
| "loss": 2.9447, |
| "step": 1506500 |
| }, |
| { |
| "epoch": 12.35, |
| "learning_rate": 5.0780055565601935e-06, |
| "loss": 2.951, |
| "step": 1507000 |
| }, |
| { |
| "epoch": 12.35, |
| "learning_rate": 5.0763616038402744e-06, |
| "loss": 2.9613, |
| "step": 1507500 |
| }, |
| { |
| "epoch": 12.35, |
| "learning_rate": 5.0747176511203546e-06, |
| "loss": 2.9594, |
| "step": 1508000 |
| }, |
| { |
| "epoch": 12.36, |
| "learning_rate": 5.073073698400435e-06, |
| "loss": 2.9587, |
| "step": 1508500 |
| }, |
| { |
| "epoch": 12.36, |
| "learning_rate": 5.071429745680515e-06, |
| "loss": 2.9508, |
| "step": 1509000 |
| }, |
| { |
| "epoch": 12.37, |
| "learning_rate": 5.069785792960596e-06, |
| "loss": 2.9505, |
| "step": 1509500 |
| }, |
| { |
| "epoch": 12.37, |
| "learning_rate": 5.068141840240674e-06, |
| "loss": 2.9547, |
| "step": 1510000 |
| }, |
| { |
| "epoch": 12.38, |
| "learning_rate": 5.066497887520755e-06, |
| "loss": 2.9588, |
| "step": 1510500 |
| }, |
| { |
| "epoch": 12.38, |
| "learning_rate": 5.064853934800835e-06, |
| "loss": 2.9564, |
| "step": 1511000 |
| }, |
| { |
| "epoch": 12.38, |
| "learning_rate": 5.0632099820809154e-06, |
| "loss": 2.953, |
| "step": 1511500 |
| }, |
| { |
| "epoch": 12.39, |
| "learning_rate": 5.0615660293609956e-06, |
| "loss": 2.964, |
| "step": 1512000 |
| }, |
| { |
| "epoch": 12.39, |
| "learning_rate": 5.0599220766410765e-06, |
| "loss": 2.9576, |
| "step": 1512500 |
| }, |
| { |
| "epoch": 12.4, |
| "learning_rate": 5.058278123921157e-06, |
| "loss": 2.9596, |
| "step": 1513000 |
| }, |
| { |
| "epoch": 12.4, |
| "learning_rate": 5.056634171201237e-06, |
| "loss": 2.9625, |
| "step": 1513500 |
| }, |
| { |
| "epoch": 12.4, |
| "learning_rate": 5.054990218481317e-06, |
| "loss": 2.9526, |
| "step": 1514000 |
| }, |
| { |
| "epoch": 12.41, |
| "learning_rate": 5.053346265761397e-06, |
| "loss": 2.957, |
| "step": 1514500 |
| }, |
| { |
| "epoch": 12.41, |
| "learning_rate": 5.051702313041478e-06, |
| "loss": 2.9643, |
| "step": 1515000 |
| }, |
| { |
| "epoch": 12.42, |
| "learning_rate": 5.050058360321558e-06, |
| "loss": 2.9526, |
| "step": 1515500 |
| }, |
| { |
| "epoch": 12.42, |
| "learning_rate": 5.048414407601637e-06, |
| "loss": 2.9535, |
| "step": 1516000 |
| }, |
| { |
| "epoch": 12.42, |
| "learning_rate": 5.0467704548817175e-06, |
| "loss": 2.9598, |
| "step": 1516500 |
| }, |
| { |
| "epoch": 12.43, |
| "learning_rate": 5.045126502161798e-06, |
| "loss": 2.953, |
| "step": 1517000 |
| }, |
| { |
| "epoch": 12.43, |
| "learning_rate": 5.043482549441878e-06, |
| "loss": 2.9571, |
| "step": 1517500 |
| }, |
| { |
| "epoch": 12.44, |
| "learning_rate": 5.041838596721959e-06, |
| "loss": 2.9583, |
| "step": 1518000 |
| }, |
| { |
| "epoch": 12.44, |
| "learning_rate": 5.040194644002039e-06, |
| "loss": 2.9615, |
| "step": 1518500 |
| }, |
| { |
| "epoch": 12.44, |
| "learning_rate": 5.038550691282119e-06, |
| "loss": 2.9541, |
| "step": 1519000 |
| }, |
| { |
| "epoch": 12.45, |
| "learning_rate": 5.036906738562199e-06, |
| "loss": 2.9595, |
| "step": 1519500 |
| }, |
| { |
| "epoch": 12.45, |
| "learning_rate": 5.03526278584228e-06, |
| "loss": 2.9549, |
| "step": 1520000 |
| }, |
| { |
| "epoch": 12.46, |
| "learning_rate": 5.03361883312236e-06, |
| "loss": 2.958, |
| "step": 1520500 |
| }, |
| { |
| "epoch": 12.46, |
| "learning_rate": 5.03197488040244e-06, |
| "loss": 2.9547, |
| "step": 1521000 |
| }, |
| { |
| "epoch": 12.47, |
| "learning_rate": 5.0303309276825205e-06, |
| "loss": 2.9544, |
| "step": 1521500 |
| }, |
| { |
| "epoch": 12.47, |
| "learning_rate": 5.0286869749626e-06, |
| "loss": 2.9553, |
| "step": 1522000 |
| }, |
| { |
| "epoch": 12.47, |
| "learning_rate": 5.02704302224268e-06, |
| "loss": 2.9533, |
| "step": 1522500 |
| }, |
| { |
| "epoch": 12.48, |
| "learning_rate": 5.025399069522761e-06, |
| "loss": 2.9578, |
| "step": 1523000 |
| }, |
| { |
| "epoch": 12.48, |
| "learning_rate": 5.023755116802841e-06, |
| "loss": 2.9628, |
| "step": 1523500 |
| }, |
| { |
| "epoch": 12.49, |
| "learning_rate": 5.022111164082921e-06, |
| "loss": 2.9515, |
| "step": 1524000 |
| }, |
| { |
| "epoch": 12.49, |
| "learning_rate": 5.020467211363001e-06, |
| "loss": 2.9511, |
| "step": 1524500 |
| }, |
| { |
| "epoch": 12.49, |
| "learning_rate": 5.018823258643082e-06, |
| "loss": 2.9623, |
| "step": 1525000 |
| }, |
| { |
| "epoch": 12.5, |
| "learning_rate": 5.017179305923162e-06, |
| "loss": 2.9648, |
| "step": 1525500 |
| }, |
| { |
| "epoch": 12.5, |
| "learning_rate": 5.0155353532032425e-06, |
| "loss": 2.9571, |
| "step": 1526000 |
| }, |
| { |
| "epoch": 12.51, |
| "learning_rate": 5.013891400483323e-06, |
| "loss": 2.9543, |
| "step": 1526500 |
| }, |
| { |
| "epoch": 12.51, |
| "learning_rate": 5.012247447763403e-06, |
| "loss": 2.9567, |
| "step": 1527000 |
| }, |
| { |
| "epoch": 12.51, |
| "learning_rate": 5.010603495043484e-06, |
| "loss": 2.9549, |
| "step": 1527500 |
| }, |
| { |
| "epoch": 12.52, |
| "learning_rate": 5.008959542323563e-06, |
| "loss": 2.9604, |
| "step": 1528000 |
| }, |
| { |
| "epoch": 12.52, |
| "learning_rate": 5.007315589603643e-06, |
| "loss": 2.9591, |
| "step": 1528500 |
| }, |
| { |
| "epoch": 12.53, |
| "learning_rate": 5.005671636883723e-06, |
| "loss": 2.9592, |
| "step": 1529000 |
| }, |
| { |
| "epoch": 12.53, |
| "learning_rate": 5.004027684163803e-06, |
| "loss": 2.9529, |
| "step": 1529500 |
| }, |
| { |
| "epoch": 12.54, |
| "learning_rate": 5.0023837314438835e-06, |
| "loss": 2.9546, |
| "step": 1530000 |
| }, |
| { |
| "epoch": 12.54, |
| "eval_accuracy": 0.4951152598295225, |
| "eval_loss": 2.8195910453796387, |
| "eval_runtime": 406.5071, |
| "eval_samples_per_second": 758.55, |
| "eval_steps_per_second": 15.805, |
| "step": 1530000 |
| }, |
| { |
| "epoch": 12.54, |
| "learning_rate": 5.0007397787239645e-06, |
| "loss": 2.9556, |
| "step": 1530500 |
| }, |
| { |
| "epoch": 12.54, |
| "learning_rate": 4.999095826004045e-06, |
| "loss": 2.9566, |
| "step": 1531000 |
| }, |
| { |
| "epoch": 12.55, |
| "learning_rate": 4.997451873284125e-06, |
| "loss": 2.9579, |
| "step": 1531500 |
| }, |
| { |
| "epoch": 12.55, |
| "learning_rate": 4.995807920564205e-06, |
| "loss": 2.9552, |
| "step": 1532000 |
| }, |
| { |
| "epoch": 12.56, |
| "learning_rate": 4.994163967844286e-06, |
| "loss": 2.9573, |
| "step": 1532500 |
| }, |
| { |
| "epoch": 12.56, |
| "learning_rate": 4.992520015124365e-06, |
| "loss": 2.9501, |
| "step": 1533000 |
| }, |
| { |
| "epoch": 12.56, |
| "learning_rate": 4.990876062404445e-06, |
| "loss": 2.9542, |
| "step": 1533500 |
| }, |
| { |
| "epoch": 12.57, |
| "learning_rate": 4.989232109684526e-06, |
| "loss": 2.9543, |
| "step": 1534000 |
| }, |
| { |
| "epoch": 12.57, |
| "learning_rate": 4.987588156964606e-06, |
| "loss": 2.9581, |
| "step": 1534500 |
| }, |
| { |
| "epoch": 12.58, |
| "learning_rate": 4.9859442042446865e-06, |
| "loss": 2.9519, |
| "step": 1535000 |
| }, |
| { |
| "epoch": 12.58, |
| "learning_rate": 4.984300251524767e-06, |
| "loss": 2.9525, |
| "step": 1535500 |
| }, |
| { |
| "epoch": 12.58, |
| "learning_rate": 4.982656298804847e-06, |
| "loss": 2.9575, |
| "step": 1536000 |
| }, |
| { |
| "epoch": 12.59, |
| "learning_rate": 4.981012346084927e-06, |
| "loss": 2.951, |
| "step": 1536500 |
| }, |
| { |
| "epoch": 12.59, |
| "learning_rate": 4.979368393365007e-06, |
| "loss": 2.9554, |
| "step": 1537000 |
| }, |
| { |
| "epoch": 12.6, |
| "learning_rate": 4.977724440645088e-06, |
| "loss": 2.948, |
| "step": 1537500 |
| }, |
| { |
| "epoch": 12.6, |
| "learning_rate": 4.976080487925168e-06, |
| "loss": 2.9571, |
| "step": 1538000 |
| }, |
| { |
| "epoch": 12.6, |
| "learning_rate": 4.974436535205248e-06, |
| "loss": 2.9573, |
| "step": 1538500 |
| }, |
| { |
| "epoch": 12.61, |
| "learning_rate": 4.972792582485328e-06, |
| "loss": 2.9505, |
| "step": 1539000 |
| }, |
| { |
| "epoch": 12.61, |
| "learning_rate": 4.9711486297654085e-06, |
| "loss": 2.9538, |
| "step": 1539500 |
| }, |
| { |
| "epoch": 12.62, |
| "learning_rate": 4.969504677045489e-06, |
| "loss": 2.953, |
| "step": 1540000 |
| }, |
| { |
| "epoch": 12.62, |
| "learning_rate": 4.967860724325569e-06, |
| "loss": 2.9508, |
| "step": 1540500 |
| }, |
| { |
| "epoch": 12.63, |
| "learning_rate": 4.966216771605649e-06, |
| "loss": 2.9517, |
| "step": 1541000 |
| }, |
| { |
| "epoch": 12.63, |
| "learning_rate": 4.96457281888573e-06, |
| "loss": 2.9515, |
| "step": 1541500 |
| }, |
| { |
| "epoch": 12.63, |
| "learning_rate": 4.962928866165809e-06, |
| "loss": 2.9529, |
| "step": 1542000 |
| }, |
| { |
| "epoch": 12.64, |
| "learning_rate": 4.961284913445889e-06, |
| "loss": 2.9492, |
| "step": 1542500 |
| }, |
| { |
| "epoch": 12.64, |
| "learning_rate": 4.95964096072597e-06, |
| "loss": 2.9554, |
| "step": 1543000 |
| }, |
| { |
| "epoch": 12.65, |
| "learning_rate": 4.95799700800605e-06, |
| "loss": 2.9546, |
| "step": 1543500 |
| }, |
| { |
| "epoch": 12.65, |
| "learning_rate": 4.9563530552861304e-06, |
| "loss": 2.9517, |
| "step": 1544000 |
| }, |
| { |
| "epoch": 12.65, |
| "learning_rate": 4.9547091025662106e-06, |
| "loss": 2.9527, |
| "step": 1544500 |
| }, |
| { |
| "epoch": 12.66, |
| "learning_rate": 4.953065149846291e-06, |
| "loss": 2.9507, |
| "step": 1545000 |
| }, |
| { |
| "epoch": 12.66, |
| "learning_rate": 4.951421197126371e-06, |
| "loss": 2.9389, |
| "step": 1545500 |
| }, |
| { |
| "epoch": 12.67, |
| "learning_rate": 4.949777244406451e-06, |
| "loss": 2.9514, |
| "step": 1546000 |
| }, |
| { |
| "epoch": 12.67, |
| "learning_rate": 4.948133291686532e-06, |
| "loss": 2.9557, |
| "step": 1546500 |
| }, |
| { |
| "epoch": 12.67, |
| "learning_rate": 4.946489338966612e-06, |
| "loss": 2.9524, |
| "step": 1547000 |
| }, |
| { |
| "epoch": 12.68, |
| "learning_rate": 4.944845386246692e-06, |
| "loss": 2.9492, |
| "step": 1547500 |
| }, |
| { |
| "epoch": 12.68, |
| "learning_rate": 4.943201433526772e-06, |
| "loss": 2.9557, |
| "step": 1548000 |
| }, |
| { |
| "epoch": 12.69, |
| "learning_rate": 4.941557480806852e-06, |
| "loss": 2.9515, |
| "step": 1548500 |
| }, |
| { |
| "epoch": 12.69, |
| "learning_rate": 4.9399135280869325e-06, |
| "loss": 2.9559, |
| "step": 1549000 |
| }, |
| { |
| "epoch": 12.69, |
| "learning_rate": 4.938269575367013e-06, |
| "loss": 2.9511, |
| "step": 1549500 |
| }, |
| { |
| "epoch": 12.7, |
| "learning_rate": 4.936625622647093e-06, |
| "loss": 2.9542, |
| "step": 1550000 |
| }, |
| { |
| "epoch": 12.7, |
| "learning_rate": 4.934981669927174e-06, |
| "loss": 2.9592, |
| "step": 1550500 |
| }, |
| { |
| "epoch": 12.71, |
| "learning_rate": 4.933337717207253e-06, |
| "loss": 2.95, |
| "step": 1551000 |
| }, |
| { |
| "epoch": 12.71, |
| "learning_rate": 4.931693764487334e-06, |
| "loss": 2.9537, |
| "step": 1551500 |
| }, |
| { |
| "epoch": 12.72, |
| "learning_rate": 4.930049811767414e-06, |
| "loss": 2.9526, |
| "step": 1552000 |
| }, |
| { |
| "epoch": 12.72, |
| "learning_rate": 4.928405859047494e-06, |
| "loss": 2.9549, |
| "step": 1552500 |
| }, |
| { |
| "epoch": 12.72, |
| "learning_rate": 4.926761906327574e-06, |
| "loss": 2.9513, |
| "step": 1553000 |
| }, |
| { |
| "epoch": 12.73, |
| "learning_rate": 4.9251179536076545e-06, |
| "loss": 2.9561, |
| "step": 1553500 |
| }, |
| { |
| "epoch": 12.73, |
| "learning_rate": 4.923474000887735e-06, |
| "loss": 2.9589, |
| "step": 1554000 |
| }, |
| { |
| "epoch": 12.74, |
| "learning_rate": 4.921830048167815e-06, |
| "loss": 2.9527, |
| "step": 1554500 |
| }, |
| { |
| "epoch": 12.74, |
| "learning_rate": 4.920186095447895e-06, |
| "loss": 2.9527, |
| "step": 1555000 |
| }, |
| { |
| "epoch": 12.74, |
| "learning_rate": 4.918542142727976e-06, |
| "loss": 2.954, |
| "step": 1555500 |
| }, |
| { |
| "epoch": 12.75, |
| "learning_rate": 4.916898190008056e-06, |
| "loss": 2.943, |
| "step": 1556000 |
| }, |
| { |
| "epoch": 12.75, |
| "learning_rate": 4.915254237288136e-06, |
| "loss": 2.9428, |
| "step": 1556500 |
| }, |
| { |
| "epoch": 12.76, |
| "learning_rate": 4.913610284568216e-06, |
| "loss": 2.9548, |
| "step": 1557000 |
| }, |
| { |
| "epoch": 12.76, |
| "learning_rate": 4.911966331848296e-06, |
| "loss": 2.9565, |
| "step": 1557500 |
| }, |
| { |
| "epoch": 12.76, |
| "learning_rate": 4.9103223791283765e-06, |
| "loss": 2.9475, |
| "step": 1558000 |
| }, |
| { |
| "epoch": 12.77, |
| "learning_rate": 4.908678426408457e-06, |
| "loss": 2.9534, |
| "step": 1558500 |
| }, |
| { |
| "epoch": 12.77, |
| "learning_rate": 4.907034473688538e-06, |
| "loss": 2.9567, |
| "step": 1559000 |
| }, |
| { |
| "epoch": 12.78, |
| "learning_rate": 4.905390520968618e-06, |
| "loss": 2.9496, |
| "step": 1559500 |
| }, |
| { |
| "epoch": 12.78, |
| "learning_rate": 4.903746568248697e-06, |
| "loss": 2.9544, |
| "step": 1560000 |
| }, |
| { |
| "epoch": 12.78, |
| "eval_accuracy": 0.4959430255891887, |
| "eval_loss": 2.8146307468414307, |
| "eval_runtime": 404.6908, |
| "eval_samples_per_second": 761.955, |
| "eval_steps_per_second": 15.876, |
| "step": 1560000 |
| }, |
| { |
| "epoch": 12.78, |
| "learning_rate": 4.902102615528778e-06, |
| "loss": 2.9512, |
| "step": 1560500 |
| }, |
| { |
| "epoch": 12.79, |
| "learning_rate": 4.900458662808858e-06, |
| "loss": 2.9506, |
| "step": 1561000 |
| }, |
| { |
| "epoch": 12.79, |
| "learning_rate": 4.898814710088938e-06, |
| "loss": 2.9507, |
| "step": 1561500 |
| }, |
| { |
| "epoch": 12.8, |
| "learning_rate": 4.897170757369018e-06, |
| "loss": 2.9485, |
| "step": 1562000 |
| }, |
| { |
| "epoch": 12.8, |
| "learning_rate": 4.8955268046490985e-06, |
| "loss": 2.9472, |
| "step": 1562500 |
| }, |
| { |
| "epoch": 12.81, |
| "learning_rate": 4.893882851929179e-06, |
| "loss": 2.9466, |
| "step": 1563000 |
| }, |
| { |
| "epoch": 12.81, |
| "learning_rate": 4.892238899209259e-06, |
| "loss": 2.9461, |
| "step": 1563500 |
| }, |
| { |
| "epoch": 12.81, |
| "learning_rate": 4.890594946489339e-06, |
| "loss": 2.9519, |
| "step": 1564000 |
| }, |
| { |
| "epoch": 12.82, |
| "learning_rate": 4.88895099376942e-06, |
| "loss": 2.9548, |
| "step": 1564500 |
| }, |
| { |
| "epoch": 12.82, |
| "learning_rate": 4.8873070410495e-06, |
| "loss": 2.9514, |
| "step": 1565000 |
| }, |
| { |
| "epoch": 12.83, |
| "learning_rate": 4.88566308832958e-06, |
| "loss": 2.9515, |
| "step": 1565500 |
| }, |
| { |
| "epoch": 12.83, |
| "learning_rate": 4.88401913560966e-06, |
| "loss": 2.9448, |
| "step": 1566000 |
| }, |
| { |
| "epoch": 12.83, |
| "learning_rate": 4.88237518288974e-06, |
| "loss": 2.9534, |
| "step": 1566500 |
| }, |
| { |
| "epoch": 12.84, |
| "learning_rate": 4.8807312301698205e-06, |
| "loss": 2.9453, |
| "step": 1567000 |
| }, |
| { |
| "epoch": 12.84, |
| "learning_rate": 4.879087277449901e-06, |
| "loss": 2.95, |
| "step": 1567500 |
| }, |
| { |
| "epoch": 12.85, |
| "learning_rate": 4.877443324729982e-06, |
| "loss": 2.9518, |
| "step": 1568000 |
| }, |
| { |
| "epoch": 12.85, |
| "learning_rate": 4.875799372010062e-06, |
| "loss": 2.9582, |
| "step": 1568500 |
| }, |
| { |
| "epoch": 12.85, |
| "learning_rate": 4.874155419290141e-06, |
| "loss": 2.9535, |
| "step": 1569000 |
| }, |
| { |
| "epoch": 12.86, |
| "learning_rate": 4.872511466570222e-06, |
| "loss": 2.9523, |
| "step": 1569500 |
| }, |
| { |
| "epoch": 12.86, |
| "learning_rate": 4.870867513850302e-06, |
| "loss": 2.9578, |
| "step": 1570000 |
| }, |
| { |
| "epoch": 12.87, |
| "learning_rate": 4.869223561130382e-06, |
| "loss": 2.95, |
| "step": 1570500 |
| }, |
| { |
| "epoch": 12.87, |
| "learning_rate": 4.867579608410462e-06, |
| "loss": 2.9472, |
| "step": 1571000 |
| }, |
| { |
| "epoch": 12.88, |
| "learning_rate": 4.865935655690543e-06, |
| "loss": 2.9505, |
| "step": 1571500 |
| }, |
| { |
| "epoch": 12.88, |
| "learning_rate": 4.864291702970623e-06, |
| "loss": 2.9519, |
| "step": 1572000 |
| }, |
| { |
| "epoch": 12.88, |
| "learning_rate": 4.862647750250703e-06, |
| "loss": 2.939, |
| "step": 1572500 |
| }, |
| { |
| "epoch": 12.89, |
| "learning_rate": 4.861003797530784e-06, |
| "loss": 2.9494, |
| "step": 1573000 |
| }, |
| { |
| "epoch": 12.89, |
| "learning_rate": 4.859359844810864e-06, |
| "loss": 2.9491, |
| "step": 1573500 |
| }, |
| { |
| "epoch": 12.9, |
| "learning_rate": 4.857715892090944e-06, |
| "loss": 2.9524, |
| "step": 1574000 |
| }, |
| { |
| "epoch": 12.9, |
| "learning_rate": 4.856071939371024e-06, |
| "loss": 2.9501, |
| "step": 1574500 |
| }, |
| { |
| "epoch": 12.9, |
| "learning_rate": 4.854427986651104e-06, |
| "loss": 2.9494, |
| "step": 1575000 |
| }, |
| { |
| "epoch": 12.91, |
| "learning_rate": 4.852784033931184e-06, |
| "loss": 2.949, |
| "step": 1575500 |
| }, |
| { |
| "epoch": 12.91, |
| "learning_rate": 4.8511400812112645e-06, |
| "loss": 2.9445, |
| "step": 1576000 |
| }, |
| { |
| "epoch": 12.92, |
| "learning_rate": 4.849496128491345e-06, |
| "loss": 2.9457, |
| "step": 1576500 |
| }, |
| { |
| "epoch": 12.92, |
| "learning_rate": 4.8478521757714256e-06, |
| "loss": 2.9505, |
| "step": 1577000 |
| }, |
| { |
| "epoch": 12.92, |
| "learning_rate": 4.846208223051506e-06, |
| "loss": 2.9521, |
| "step": 1577500 |
| }, |
| { |
| "epoch": 12.93, |
| "learning_rate": 4.844564270331585e-06, |
| "loss": 2.9494, |
| "step": 1578000 |
| }, |
| { |
| "epoch": 12.93, |
| "learning_rate": 4.842920317611666e-06, |
| "loss": 2.9524, |
| "step": 1578500 |
| }, |
| { |
| "epoch": 12.94, |
| "learning_rate": 4.841276364891746e-06, |
| "loss": 2.9477, |
| "step": 1579000 |
| }, |
| { |
| "epoch": 12.94, |
| "learning_rate": 4.839632412171826e-06, |
| "loss": 2.947, |
| "step": 1579500 |
| }, |
| { |
| "epoch": 12.94, |
| "learning_rate": 4.837988459451906e-06, |
| "loss": 2.9519, |
| "step": 1580000 |
| }, |
| { |
| "epoch": 12.95, |
| "learning_rate": 4.836344506731987e-06, |
| "loss": 2.95, |
| "step": 1580500 |
| }, |
| { |
| "epoch": 12.95, |
| "learning_rate": 4.834700554012067e-06, |
| "loss": 2.9561, |
| "step": 1581000 |
| }, |
| { |
| "epoch": 12.96, |
| "learning_rate": 4.833056601292147e-06, |
| "loss": 2.9502, |
| "step": 1581500 |
| }, |
| { |
| "epoch": 12.96, |
| "learning_rate": 4.831412648572228e-06, |
| "loss": 2.9511, |
| "step": 1582000 |
| }, |
| { |
| "epoch": 12.97, |
| "learning_rate": 4.829768695852308e-06, |
| "loss": 2.9545, |
| "step": 1582500 |
| }, |
| { |
| "epoch": 12.97, |
| "learning_rate": 4.828124743132388e-06, |
| "loss": 2.9486, |
| "step": 1583000 |
| }, |
| { |
| "epoch": 12.97, |
| "learning_rate": 4.826480790412468e-06, |
| "loss": 2.9494, |
| "step": 1583500 |
| }, |
| { |
| "epoch": 12.98, |
| "learning_rate": 4.824836837692549e-06, |
| "loss": 2.9427, |
| "step": 1584000 |
| }, |
| { |
| "epoch": 12.98, |
| "learning_rate": 4.823192884972628e-06, |
| "loss": 2.9539, |
| "step": 1584500 |
| }, |
| { |
| "epoch": 12.99, |
| "learning_rate": 4.8215489322527084e-06, |
| "loss": 2.9488, |
| "step": 1585000 |
| }, |
| { |
| "epoch": 12.99, |
| "learning_rate": 4.819904979532789e-06, |
| "loss": 2.9474, |
| "step": 1585500 |
| }, |
| { |
| "epoch": 12.99, |
| "learning_rate": 4.8182610268128695e-06, |
| "loss": 2.9436, |
| "step": 1586000 |
| }, |
| { |
| "epoch": 13.0, |
| "learning_rate": 4.81661707409295e-06, |
| "loss": 2.9426, |
| "step": 1586500 |
| }, |
| { |
| "epoch": 13.0, |
| "learning_rate": 4.81497312137303e-06, |
| "loss": 2.9475, |
| "step": 1587000 |
| }, |
| { |
| "epoch": 13.01, |
| "learning_rate": 4.81332916865311e-06, |
| "loss": 2.9566, |
| "step": 1587500 |
| }, |
| { |
| "epoch": 13.01, |
| "learning_rate": 4.81168521593319e-06, |
| "loss": 2.9528, |
| "step": 1588000 |
| }, |
| { |
| "epoch": 13.01, |
| "learning_rate": 4.81004126321327e-06, |
| "loss": 2.9459, |
| "step": 1588500 |
| }, |
| { |
| "epoch": 13.02, |
| "learning_rate": 4.80839731049335e-06, |
| "loss": 2.9478, |
| "step": 1589000 |
| }, |
| { |
| "epoch": 13.02, |
| "learning_rate": 4.806753357773431e-06, |
| "loss": 2.9457, |
| "step": 1589500 |
| }, |
| { |
| "epoch": 13.03, |
| "learning_rate": 4.805109405053511e-06, |
| "loss": 2.9486, |
| "step": 1590000 |
| }, |
| { |
| "epoch": 13.03, |
| "eval_accuracy": 0.4963652221640762, |
| "eval_loss": 2.8131563663482666, |
| "eval_runtime": 407.4078, |
| "eval_samples_per_second": 756.873, |
| "eval_steps_per_second": 15.77, |
| "step": 1590000 |
| }, |
| { |
| "epoch": 13.03, |
| "learning_rate": 4.803465452333591e-06, |
| "loss": 2.9509, |
| "step": 1590500 |
| }, |
| { |
| "epoch": 13.03, |
| "learning_rate": 4.801821499613672e-06, |
| "loss": 2.9485, |
| "step": 1591000 |
| }, |
| { |
| "epoch": 13.04, |
| "learning_rate": 4.800177546893752e-06, |
| "loss": 2.9573, |
| "step": 1591500 |
| }, |
| { |
| "epoch": 13.04, |
| "learning_rate": 4.798533594173832e-06, |
| "loss": 2.9446, |
| "step": 1592000 |
| }, |
| { |
| "epoch": 13.05, |
| "learning_rate": 4.796889641453912e-06, |
| "loss": 2.9451, |
| "step": 1592500 |
| }, |
| { |
| "epoch": 13.05, |
| "learning_rate": 4.795245688733993e-06, |
| "loss": 2.9395, |
| "step": 1593000 |
| }, |
| { |
| "epoch": 13.06, |
| "learning_rate": 4.793601736014072e-06, |
| "loss": 2.9443, |
| "step": 1593500 |
| }, |
| { |
| "epoch": 13.06, |
| "learning_rate": 4.791957783294152e-06, |
| "loss": 2.9481, |
| "step": 1594000 |
| }, |
| { |
| "epoch": 13.06, |
| "learning_rate": 4.790313830574233e-06, |
| "loss": 2.9436, |
| "step": 1594500 |
| }, |
| { |
| "epoch": 13.07, |
| "learning_rate": 4.7886698778543135e-06, |
| "loss": 2.95, |
| "step": 1595000 |
| }, |
| { |
| "epoch": 13.07, |
| "learning_rate": 4.787025925134394e-06, |
| "loss": 2.9522, |
| "step": 1595500 |
| }, |
| { |
| "epoch": 13.08, |
| "learning_rate": 4.785381972414474e-06, |
| "loss": 2.9437, |
| "step": 1596000 |
| }, |
| { |
| "epoch": 13.08, |
| "learning_rate": 4.783738019694554e-06, |
| "loss": 2.9467, |
| "step": 1596500 |
| }, |
| { |
| "epoch": 13.08, |
| "learning_rate": 4.782094066974634e-06, |
| "loss": 2.9474, |
| "step": 1597000 |
| }, |
| { |
| "epoch": 13.09, |
| "learning_rate": 4.780450114254714e-06, |
| "loss": 2.945, |
| "step": 1597500 |
| }, |
| { |
| "epoch": 13.09, |
| "learning_rate": 4.778806161534795e-06, |
| "loss": 2.9562, |
| "step": 1598000 |
| }, |
| { |
| "epoch": 13.1, |
| "learning_rate": 4.777162208814875e-06, |
| "loss": 2.9497, |
| "step": 1598500 |
| }, |
| { |
| "epoch": 13.1, |
| "learning_rate": 4.775518256094955e-06, |
| "loss": 2.9503, |
| "step": 1599000 |
| }, |
| { |
| "epoch": 13.1, |
| "learning_rate": 4.7738743033750355e-06, |
| "loss": 2.9489, |
| "step": 1599500 |
| }, |
| { |
| "epoch": 13.11, |
| "learning_rate": 4.772230350655116e-06, |
| "loss": 2.9467, |
| "step": 1600000 |
| }, |
| { |
| "epoch": 13.11, |
| "learning_rate": 4.770586397935196e-06, |
| "loss": 2.9436, |
| "step": 1600500 |
| }, |
| { |
| "epoch": 13.12, |
| "learning_rate": 4.768942445215276e-06, |
| "loss": 2.9546, |
| "step": 1601000 |
| }, |
| { |
| "epoch": 13.12, |
| "learning_rate": 4.767298492495356e-06, |
| "loss": 2.9411, |
| "step": 1601500 |
| }, |
| { |
| "epoch": 13.12, |
| "learning_rate": 4.765654539775437e-06, |
| "loss": 2.9413, |
| "step": 1602000 |
| }, |
| { |
| "epoch": 13.13, |
| "learning_rate": 4.764010587055516e-06, |
| "loss": 2.9436, |
| "step": 1602500 |
| }, |
| { |
| "epoch": 13.13, |
| "learning_rate": 4.762366634335596e-06, |
| "loss": 2.9467, |
| "step": 1603000 |
| }, |
| { |
| "epoch": 13.14, |
| "learning_rate": 4.760722681615677e-06, |
| "loss": 2.9432, |
| "step": 1603500 |
| }, |
| { |
| "epoch": 13.14, |
| "learning_rate": 4.7590787288957575e-06, |
| "loss": 2.9429, |
| "step": 1604000 |
| }, |
| { |
| "epoch": 13.15, |
| "learning_rate": 4.757434776175838e-06, |
| "loss": 2.9474, |
| "step": 1604500 |
| }, |
| { |
| "epoch": 13.15, |
| "learning_rate": 4.755790823455918e-06, |
| "loss": 2.9524, |
| "step": 1605000 |
| }, |
| { |
| "epoch": 13.15, |
| "learning_rate": 4.754146870735998e-06, |
| "loss": 2.9471, |
| "step": 1605500 |
| }, |
| { |
| "epoch": 13.16, |
| "learning_rate": 4.752502918016078e-06, |
| "loss": 2.9486, |
| "step": 1606000 |
| }, |
| { |
| "epoch": 13.16, |
| "learning_rate": 4.750858965296158e-06, |
| "loss": 2.9405, |
| "step": 1606500 |
| }, |
| { |
| "epoch": 13.17, |
| "learning_rate": 4.749215012576239e-06, |
| "loss": 2.9477, |
| "step": 1607000 |
| }, |
| { |
| "epoch": 13.17, |
| "learning_rate": 4.747571059856319e-06, |
| "loss": 2.9465, |
| "step": 1607500 |
| }, |
| { |
| "epoch": 13.17, |
| "learning_rate": 4.745927107136399e-06, |
| "loss": 2.9527, |
| "step": 1608000 |
| }, |
| { |
| "epoch": 13.18, |
| "learning_rate": 4.7442831544164795e-06, |
| "loss": 2.947, |
| "step": 1608500 |
| }, |
| { |
| "epoch": 13.18, |
| "learning_rate": 4.74263920169656e-06, |
| "loss": 2.9366, |
| "step": 1609000 |
| }, |
| { |
| "epoch": 13.19, |
| "learning_rate": 4.74099524897664e-06, |
| "loss": 2.9444, |
| "step": 1609500 |
| }, |
| { |
| "epoch": 13.19, |
| "learning_rate": 4.73935129625672e-06, |
| "loss": 2.9487, |
| "step": 1610000 |
| }, |
| { |
| "epoch": 13.19, |
| "learning_rate": 4.737707343536801e-06, |
| "loss": 2.9506, |
| "step": 1610500 |
| }, |
| { |
| "epoch": 13.2, |
| "learning_rate": 4.736063390816881e-06, |
| "loss": 2.9445, |
| "step": 1611000 |
| }, |
| { |
| "epoch": 13.2, |
| "learning_rate": 4.73441943809696e-06, |
| "loss": 2.9486, |
| "step": 1611500 |
| }, |
| { |
| "epoch": 13.21, |
| "learning_rate": 4.732775485377041e-06, |
| "loss": 2.9435, |
| "step": 1612000 |
| }, |
| { |
| "epoch": 13.21, |
| "learning_rate": 4.731131532657121e-06, |
| "loss": 2.9416, |
| "step": 1612500 |
| }, |
| { |
| "epoch": 13.22, |
| "learning_rate": 4.7294875799372014e-06, |
| "loss": 2.9449, |
| "step": 1613000 |
| }, |
| { |
| "epoch": 13.22, |
| "learning_rate": 4.7278436272172816e-06, |
| "loss": 2.95, |
| "step": 1613500 |
| }, |
| { |
| "epoch": 13.22, |
| "learning_rate": 4.726199674497362e-06, |
| "loss": 2.9506, |
| "step": 1614000 |
| }, |
| { |
| "epoch": 13.23, |
| "learning_rate": 4.724555721777442e-06, |
| "loss": 2.9452, |
| "step": 1614500 |
| }, |
| { |
| "epoch": 13.23, |
| "learning_rate": 4.722911769057522e-06, |
| "loss": 2.9432, |
| "step": 1615000 |
| }, |
| { |
| "epoch": 13.24, |
| "learning_rate": 4.721267816337602e-06, |
| "loss": 2.9425, |
| "step": 1615500 |
| }, |
| { |
| "epoch": 13.24, |
| "learning_rate": 4.719623863617683e-06, |
| "loss": 2.9451, |
| "step": 1616000 |
| }, |
| { |
| "epoch": 13.24, |
| "learning_rate": 4.717979910897763e-06, |
| "loss": 2.9468, |
| "step": 1616500 |
| }, |
| { |
| "epoch": 13.25, |
| "learning_rate": 4.716335958177843e-06, |
| "loss": 2.9504, |
| "step": 1617000 |
| }, |
| { |
| "epoch": 13.25, |
| "learning_rate": 4.7146920054579234e-06, |
| "loss": 2.9464, |
| "step": 1617500 |
| }, |
| { |
| "epoch": 13.26, |
| "learning_rate": 4.7130480527380036e-06, |
| "loss": 2.9482, |
| "step": 1618000 |
| }, |
| { |
| "epoch": 13.26, |
| "learning_rate": 4.711404100018084e-06, |
| "loss": 2.9461, |
| "step": 1618500 |
| }, |
| { |
| "epoch": 13.26, |
| "learning_rate": 4.709760147298164e-06, |
| "loss": 2.9471, |
| "step": 1619000 |
| }, |
| { |
| "epoch": 13.27, |
| "learning_rate": 4.708116194578245e-06, |
| "loss": 2.9439, |
| "step": 1619500 |
| }, |
| { |
| "epoch": 13.27, |
| "learning_rate": 4.706472241858325e-06, |
| "loss": 2.9413, |
| "step": 1620000 |
| }, |
| { |
| "epoch": 13.27, |
| "eval_accuracy": 0.49669974826700375, |
| "eval_loss": 2.809884786605835, |
| "eval_runtime": 410.3222, |
| "eval_samples_per_second": 751.497, |
| "eval_steps_per_second": 15.658, |
| "step": 1620000 |
| }, |
| { |
| "epoch": 13.28, |
| "learning_rate": 4.704828289138404e-06, |
| "loss": 2.9414, |
| "step": 1620500 |
| }, |
| { |
| "epoch": 13.28, |
| "learning_rate": 4.703184336418485e-06, |
| "loss": 2.9411, |
| "step": 1621000 |
| }, |
| { |
| "epoch": 13.28, |
| "learning_rate": 4.701540383698565e-06, |
| "loss": 2.9508, |
| "step": 1621500 |
| }, |
| { |
| "epoch": 13.29, |
| "learning_rate": 4.699896430978645e-06, |
| "loss": 2.9504, |
| "step": 1622000 |
| }, |
| { |
| "epoch": 13.29, |
| "learning_rate": 4.6982524782587255e-06, |
| "loss": 2.9523, |
| "step": 1622500 |
| }, |
| { |
| "epoch": 13.3, |
| "learning_rate": 4.696608525538806e-06, |
| "loss": 2.9378, |
| "step": 1623000 |
| }, |
| { |
| "epoch": 13.3, |
| "learning_rate": 4.694964572818886e-06, |
| "loss": 2.9436, |
| "step": 1623500 |
| }, |
| { |
| "epoch": 13.31, |
| "learning_rate": 4.693320620098966e-06, |
| "loss": 2.9432, |
| "step": 1624000 |
| }, |
| { |
| "epoch": 13.31, |
| "learning_rate": 4.691676667379047e-06, |
| "loss": 2.9492, |
| "step": 1624500 |
| }, |
| { |
| "epoch": 13.31, |
| "learning_rate": 4.690032714659127e-06, |
| "loss": 2.9463, |
| "step": 1625000 |
| }, |
| { |
| "epoch": 13.32, |
| "learning_rate": 4.688388761939207e-06, |
| "loss": 2.9491, |
| "step": 1625500 |
| }, |
| { |
| "epoch": 13.32, |
| "learning_rate": 4.686744809219287e-06, |
| "loss": 2.9386, |
| "step": 1626000 |
| }, |
| { |
| "epoch": 13.33, |
| "learning_rate": 4.685100856499367e-06, |
| "loss": 2.9441, |
| "step": 1626500 |
| }, |
| { |
| "epoch": 13.33, |
| "learning_rate": 4.6834569037794475e-06, |
| "loss": 2.9448, |
| "step": 1627000 |
| }, |
| { |
| "epoch": 13.33, |
| "learning_rate": 4.681812951059528e-06, |
| "loss": 2.9372, |
| "step": 1627500 |
| }, |
| { |
| "epoch": 13.34, |
| "learning_rate": 4.680168998339608e-06, |
| "loss": 2.9463, |
| "step": 1628000 |
| }, |
| { |
| "epoch": 13.34, |
| "learning_rate": 4.678525045619689e-06, |
| "loss": 2.9438, |
| "step": 1628500 |
| }, |
| { |
| "epoch": 13.35, |
| "learning_rate": 4.676881092899769e-06, |
| "loss": 2.9344, |
| "step": 1629000 |
| }, |
| { |
| "epoch": 13.35, |
| "learning_rate": 4.675237140179849e-06, |
| "loss": 2.9421, |
| "step": 1629500 |
| }, |
| { |
| "epoch": 13.35, |
| "learning_rate": 4.673593187459929e-06, |
| "loss": 2.9386, |
| "step": 1630000 |
| }, |
| { |
| "epoch": 13.36, |
| "learning_rate": 4.671949234740009e-06, |
| "loss": 2.9458, |
| "step": 1630500 |
| }, |
| { |
| "epoch": 13.36, |
| "learning_rate": 4.670305282020089e-06, |
| "loss": 2.9415, |
| "step": 1631000 |
| }, |
| { |
| "epoch": 13.37, |
| "learning_rate": 4.6686613293001695e-06, |
| "loss": 2.9457, |
| "step": 1631500 |
| }, |
| { |
| "epoch": 13.37, |
| "learning_rate": 4.6670173765802505e-06, |
| "loss": 2.9435, |
| "step": 1632000 |
| }, |
| { |
| "epoch": 13.37, |
| "learning_rate": 4.665373423860331e-06, |
| "loss": 2.9474, |
| "step": 1632500 |
| }, |
| { |
| "epoch": 13.38, |
| "learning_rate": 4.66372947114041e-06, |
| "loss": 2.9461, |
| "step": 1633000 |
| }, |
| { |
| "epoch": 13.38, |
| "learning_rate": 4.662085518420491e-06, |
| "loss": 2.9435, |
| "step": 1633500 |
| }, |
| { |
| "epoch": 13.39, |
| "learning_rate": 4.660441565700571e-06, |
| "loss": 2.9385, |
| "step": 1634000 |
| }, |
| { |
| "epoch": 13.39, |
| "learning_rate": 4.658797612980651e-06, |
| "loss": 2.9458, |
| "step": 1634500 |
| }, |
| { |
| "epoch": 13.4, |
| "learning_rate": 4.657153660260731e-06, |
| "loss": 2.9402, |
| "step": 1635000 |
| }, |
| { |
| "epoch": 13.4, |
| "learning_rate": 4.655509707540811e-06, |
| "loss": 2.9385, |
| "step": 1635500 |
| }, |
| { |
| "epoch": 13.4, |
| "learning_rate": 4.6538657548208915e-06, |
| "loss": 2.9452, |
| "step": 1636000 |
| }, |
| { |
| "epoch": 13.41, |
| "learning_rate": 4.652221802100972e-06, |
| "loss": 2.9495, |
| "step": 1636500 |
| }, |
| { |
| "epoch": 13.41, |
| "learning_rate": 4.650577849381052e-06, |
| "loss": 2.9466, |
| "step": 1637000 |
| }, |
| { |
| "epoch": 13.42, |
| "learning_rate": 4.648933896661133e-06, |
| "loss": 2.9438, |
| "step": 1637500 |
| }, |
| { |
| "epoch": 13.42, |
| "learning_rate": 4.647289943941213e-06, |
| "loss": 2.9452, |
| "step": 1638000 |
| }, |
| { |
| "epoch": 13.42, |
| "learning_rate": 4.645645991221293e-06, |
| "loss": 2.9386, |
| "step": 1638500 |
| }, |
| { |
| "epoch": 13.43, |
| "learning_rate": 4.644002038501373e-06, |
| "loss": 2.9393, |
| "step": 1639000 |
| }, |
| { |
| "epoch": 13.43, |
| "learning_rate": 4.642358085781453e-06, |
| "loss": 2.9346, |
| "step": 1639500 |
| }, |
| { |
| "epoch": 13.44, |
| "learning_rate": 4.640714133061533e-06, |
| "loss": 2.9463, |
| "step": 1640000 |
| }, |
| { |
| "epoch": 13.44, |
| "learning_rate": 4.6390701803416135e-06, |
| "loss": 2.9433, |
| "step": 1640500 |
| }, |
| { |
| "epoch": 13.44, |
| "learning_rate": 4.6374262276216945e-06, |
| "loss": 2.9408, |
| "step": 1641000 |
| }, |
| { |
| "epoch": 13.45, |
| "learning_rate": 4.635782274901775e-06, |
| "loss": 2.9425, |
| "step": 1641500 |
| }, |
| { |
| "epoch": 13.45, |
| "learning_rate": 4.634138322181854e-06, |
| "loss": 2.9493, |
| "step": 1642000 |
| }, |
| { |
| "epoch": 13.46, |
| "learning_rate": 4.632494369461935e-06, |
| "loss": 2.9481, |
| "step": 1642500 |
| }, |
| { |
| "epoch": 13.46, |
| "learning_rate": 4.630850416742015e-06, |
| "loss": 2.9438, |
| "step": 1643000 |
| }, |
| { |
| "epoch": 13.46, |
| "learning_rate": 4.629206464022095e-06, |
| "loss": 2.9436, |
| "step": 1643500 |
| }, |
| { |
| "epoch": 13.47, |
| "learning_rate": 4.627562511302175e-06, |
| "loss": 2.9404, |
| "step": 1644000 |
| }, |
| { |
| "epoch": 13.47, |
| "learning_rate": 4.625918558582256e-06, |
| "loss": 2.9467, |
| "step": 1644500 |
| }, |
| { |
| "epoch": 13.48, |
| "learning_rate": 4.6242746058623355e-06, |
| "loss": 2.9504, |
| "step": 1645000 |
| }, |
| { |
| "epoch": 13.48, |
| "learning_rate": 4.622630653142416e-06, |
| "loss": 2.947, |
| "step": 1645500 |
| }, |
| { |
| "epoch": 13.49, |
| "learning_rate": 4.6209867004224966e-06, |
| "loss": 2.9411, |
| "step": 1646000 |
| }, |
| { |
| "epoch": 13.49, |
| "learning_rate": 4.619342747702577e-06, |
| "loss": 2.9402, |
| "step": 1646500 |
| }, |
| { |
| "epoch": 13.49, |
| "learning_rate": 4.617698794982657e-06, |
| "loss": 2.9397, |
| "step": 1647000 |
| }, |
| { |
| "epoch": 13.5, |
| "learning_rate": 4.616054842262737e-06, |
| "loss": 2.9406, |
| "step": 1647500 |
| }, |
| { |
| "epoch": 13.5, |
| "learning_rate": 4.614410889542817e-06, |
| "loss": 2.9432, |
| "step": 1648000 |
| }, |
| { |
| "epoch": 13.51, |
| "learning_rate": 4.612766936822897e-06, |
| "loss": 2.9428, |
| "step": 1648500 |
| }, |
| { |
| "epoch": 13.51, |
| "learning_rate": 4.611122984102977e-06, |
| "loss": 2.9471, |
| "step": 1649000 |
| }, |
| { |
| "epoch": 13.51, |
| "learning_rate": 4.6094790313830575e-06, |
| "loss": 2.944, |
| "step": 1649500 |
| }, |
| { |
| "epoch": 13.52, |
| "learning_rate": 4.6078350786631384e-06, |
| "loss": 2.9381, |
| "step": 1650000 |
| }, |
| { |
| "epoch": 13.52, |
| "eval_accuracy": 0.49680267143539775, |
| "eval_loss": 2.8081107139587402, |
| "eval_runtime": 403.5601, |
| "eval_samples_per_second": 764.089, |
| "eval_steps_per_second": 15.921, |
| "step": 1650000 |
| }, |
| { |
| "epoch": 13.52, |
| "learning_rate": 4.6061911259432186e-06, |
| "loss": 2.9407, |
| "step": 1650500 |
| }, |
| { |
| "epoch": 13.53, |
| "learning_rate": 4.604547173223298e-06, |
| "loss": 2.9429, |
| "step": 1651000 |
| }, |
| { |
| "epoch": 13.53, |
| "learning_rate": 4.602903220503379e-06, |
| "loss": 2.9429, |
| "step": 1651500 |
| }, |
| { |
| "epoch": 13.53, |
| "learning_rate": 4.601259267783459e-06, |
| "loss": 2.9433, |
| "step": 1652000 |
| }, |
| { |
| "epoch": 13.54, |
| "learning_rate": 4.599615315063539e-06, |
| "loss": 2.9313, |
| "step": 1652500 |
| }, |
| { |
| "epoch": 13.54, |
| "learning_rate": 4.597971362343619e-06, |
| "loss": 2.942, |
| "step": 1653000 |
| }, |
| { |
| "epoch": 13.55, |
| "learning_rate": 4.5963274096237e-06, |
| "loss": 2.9498, |
| "step": 1653500 |
| }, |
| { |
| "epoch": 13.55, |
| "learning_rate": 4.5946834569037794e-06, |
| "loss": 2.9326, |
| "step": 1654000 |
| }, |
| { |
| "epoch": 13.56, |
| "learning_rate": 4.5930395041838596e-06, |
| "loss": 2.9415, |
| "step": 1654500 |
| }, |
| { |
| "epoch": 13.56, |
| "learning_rate": 4.5913955514639405e-06, |
| "loss": 2.9337, |
| "step": 1655000 |
| }, |
| { |
| "epoch": 13.56, |
| "learning_rate": 4.589751598744021e-06, |
| "loss": 2.9363, |
| "step": 1655500 |
| }, |
| { |
| "epoch": 13.57, |
| "learning_rate": 4.588107646024101e-06, |
| "loss": 2.943, |
| "step": 1656000 |
| }, |
| { |
| "epoch": 13.57, |
| "learning_rate": 4.586463693304181e-06, |
| "loss": 2.9443, |
| "step": 1656500 |
| }, |
| { |
| "epoch": 13.58, |
| "learning_rate": 4.584819740584261e-06, |
| "loss": 2.9424, |
| "step": 1657000 |
| }, |
| { |
| "epoch": 13.58, |
| "learning_rate": 4.583175787864341e-06, |
| "loss": 2.9449, |
| "step": 1657500 |
| }, |
| { |
| "epoch": 13.58, |
| "learning_rate": 4.581531835144421e-06, |
| "loss": 2.9351, |
| "step": 1658000 |
| }, |
| { |
| "epoch": 13.59, |
| "learning_rate": 4.579887882424502e-06, |
| "loss": 2.9457, |
| "step": 1658500 |
| }, |
| { |
| "epoch": 13.59, |
| "learning_rate": 4.578243929704582e-06, |
| "loss": 2.9411, |
| "step": 1659000 |
| }, |
| { |
| "epoch": 13.6, |
| "learning_rate": 4.5765999769846625e-06, |
| "loss": 2.9469, |
| "step": 1659500 |
| }, |
| { |
| "epoch": 13.6, |
| "learning_rate": 4.574956024264743e-06, |
| "loss": 2.9427, |
| "step": 1660000 |
| }, |
| { |
| "epoch": 13.6, |
| "learning_rate": 4.573312071544823e-06, |
| "loss": 2.9381, |
| "step": 1660500 |
| }, |
| { |
| "epoch": 13.61, |
| "learning_rate": 4.571668118824903e-06, |
| "loss": 2.9367, |
| "step": 1661000 |
| }, |
| { |
| "epoch": 13.61, |
| "learning_rate": 4.570024166104983e-06, |
| "loss": 2.9354, |
| "step": 1661500 |
| }, |
| { |
| "epoch": 13.62, |
| "learning_rate": 4.568380213385063e-06, |
| "loss": 2.9368, |
| "step": 1662000 |
| }, |
| { |
| "epoch": 13.62, |
| "learning_rate": 4.566736260665144e-06, |
| "loss": 2.948, |
| "step": 1662500 |
| }, |
| { |
| "epoch": 13.62, |
| "learning_rate": 4.565092307945223e-06, |
| "loss": 2.9395, |
| "step": 1663000 |
| }, |
| { |
| "epoch": 13.63, |
| "learning_rate": 4.5634483552253035e-06, |
| "loss": 2.942, |
| "step": 1663500 |
| }, |
| { |
| "epoch": 13.63, |
| "learning_rate": 4.5618044025053845e-06, |
| "loss": 2.933, |
| "step": 1664000 |
| }, |
| { |
| "epoch": 13.64, |
| "learning_rate": 4.560160449785465e-06, |
| "loss": 2.9397, |
| "step": 1664500 |
| }, |
| { |
| "epoch": 13.64, |
| "learning_rate": 4.558516497065545e-06, |
| "loss": 2.9435, |
| "step": 1665000 |
| }, |
| { |
| "epoch": 13.65, |
| "learning_rate": 4.556872544345625e-06, |
| "loss": 2.9398, |
| "step": 1665500 |
| }, |
| { |
| "epoch": 13.65, |
| "learning_rate": 4.555228591625705e-06, |
| "loss": 2.9433, |
| "step": 1666000 |
| }, |
| { |
| "epoch": 13.65, |
| "learning_rate": 4.553584638905785e-06, |
| "loss": 2.9367, |
| "step": 1666500 |
| }, |
| { |
| "epoch": 13.66, |
| "learning_rate": 4.551940686185865e-06, |
| "loss": 2.9471, |
| "step": 1667000 |
| }, |
| { |
| "epoch": 13.66, |
| "learning_rate": 4.550296733465946e-06, |
| "loss": 2.9446, |
| "step": 1667500 |
| }, |
| { |
| "epoch": 13.67, |
| "learning_rate": 4.548652780746026e-06, |
| "loss": 2.9373, |
| "step": 1668000 |
| }, |
| { |
| "epoch": 13.67, |
| "learning_rate": 4.5470088280261065e-06, |
| "loss": 2.9354, |
| "step": 1668500 |
| }, |
| { |
| "epoch": 13.67, |
| "learning_rate": 4.545364875306187e-06, |
| "loss": 2.9431, |
| "step": 1669000 |
| }, |
| { |
| "epoch": 13.68, |
| "learning_rate": 4.543720922586267e-06, |
| "loss": 2.9444, |
| "step": 1669500 |
| }, |
| { |
| "epoch": 13.68, |
| "learning_rate": 4.542076969866347e-06, |
| "loss": 2.9413, |
| "step": 1670000 |
| }, |
| { |
| "epoch": 13.69, |
| "learning_rate": 4.540433017146427e-06, |
| "loss": 2.9407, |
| "step": 1670500 |
| }, |
| { |
| "epoch": 13.69, |
| "learning_rate": 4.538789064426508e-06, |
| "loss": 2.9372, |
| "step": 1671000 |
| }, |
| { |
| "epoch": 13.69, |
| "learning_rate": 4.537145111706588e-06, |
| "loss": 2.9418, |
| "step": 1671500 |
| }, |
| { |
| "epoch": 13.7, |
| "learning_rate": 4.535501158986667e-06, |
| "loss": 2.9401, |
| "step": 1672000 |
| }, |
| { |
| "epoch": 13.7, |
| "learning_rate": 4.533857206266748e-06, |
| "loss": 2.9391, |
| "step": 1672500 |
| }, |
| { |
| "epoch": 13.71, |
| "learning_rate": 4.5322132535468285e-06, |
| "loss": 2.9417, |
| "step": 1673000 |
| }, |
| { |
| "epoch": 13.71, |
| "learning_rate": 4.530569300826909e-06, |
| "loss": 2.9413, |
| "step": 1673500 |
| }, |
| { |
| "epoch": 13.71, |
| "learning_rate": 4.528925348106989e-06, |
| "loss": 2.9382, |
| "step": 1674000 |
| }, |
| { |
| "epoch": 13.72, |
| "learning_rate": 4.527281395387069e-06, |
| "loss": 2.942, |
| "step": 1674500 |
| }, |
| { |
| "epoch": 13.72, |
| "learning_rate": 4.525637442667149e-06, |
| "loss": 2.9443, |
| "step": 1675000 |
| }, |
| { |
| "epoch": 13.73, |
| "learning_rate": 4.523993489947229e-06, |
| "loss": 2.9417, |
| "step": 1675500 |
| }, |
| { |
| "epoch": 13.73, |
| "learning_rate": 4.522349537227309e-06, |
| "loss": 2.9398, |
| "step": 1676000 |
| }, |
| { |
| "epoch": 13.74, |
| "learning_rate": 4.52070558450739e-06, |
| "loss": 2.943, |
| "step": 1676500 |
| }, |
| { |
| "epoch": 13.74, |
| "learning_rate": 4.51906163178747e-06, |
| "loss": 2.9367, |
| "step": 1677000 |
| }, |
| { |
| "epoch": 13.74, |
| "learning_rate": 4.5174176790675505e-06, |
| "loss": 2.9447, |
| "step": 1677500 |
| }, |
| { |
| "epoch": 13.75, |
| "learning_rate": 4.515773726347631e-06, |
| "loss": 2.9327, |
| "step": 1678000 |
| }, |
| { |
| "epoch": 13.75, |
| "learning_rate": 4.514129773627711e-06, |
| "loss": 2.9379, |
| "step": 1678500 |
| }, |
| { |
| "epoch": 13.76, |
| "learning_rate": 4.512485820907791e-06, |
| "loss": 2.9412, |
| "step": 1679000 |
| }, |
| { |
| "epoch": 13.76, |
| "learning_rate": 4.510841868187871e-06, |
| "loss": 2.9383, |
| "step": 1679500 |
| }, |
| { |
| "epoch": 13.76, |
| "learning_rate": 4.509197915467952e-06, |
| "loss": 2.9389, |
| "step": 1680000 |
| }, |
| { |
| "epoch": 13.76, |
| "eval_accuracy": 0.4972946717627968, |
| "eval_loss": 2.80572772026062, |
| "eval_runtime": 409.9534, |
| "eval_samples_per_second": 752.173, |
| "eval_steps_per_second": 15.673, |
| "step": 1680000 |
| }, |
| { |
| "epoch": 13.77, |
| "learning_rate": 4.507553962748032e-06, |
| "loss": 2.9444, |
| "step": 1680500 |
| }, |
| { |
| "epoch": 13.77, |
| "learning_rate": 4.505910010028112e-06, |
| "loss": 2.9413, |
| "step": 1681000 |
| }, |
| { |
| "epoch": 13.78, |
| "learning_rate": 4.504266057308192e-06, |
| "loss": 2.949, |
| "step": 1681500 |
| }, |
| { |
| "epoch": 13.78, |
| "learning_rate": 4.5026221045882725e-06, |
| "loss": 2.9386, |
| "step": 1682000 |
| }, |
| { |
| "epoch": 13.78, |
| "learning_rate": 4.500978151868353e-06, |
| "loss": 2.9448, |
| "step": 1682500 |
| }, |
| { |
| "epoch": 13.79, |
| "learning_rate": 4.499334199148433e-06, |
| "loss": 2.934, |
| "step": 1683000 |
| }, |
| { |
| "epoch": 13.79, |
| "learning_rate": 4.497690246428514e-06, |
| "loss": 2.9403, |
| "step": 1683500 |
| }, |
| { |
| "epoch": 13.8, |
| "learning_rate": 4.496046293708594e-06, |
| "loss": 2.9464, |
| "step": 1684000 |
| }, |
| { |
| "epoch": 13.8, |
| "learning_rate": 4.494402340988673e-06, |
| "loss": 2.9362, |
| "step": 1684500 |
| }, |
| { |
| "epoch": 13.8, |
| "learning_rate": 4.492758388268754e-06, |
| "loss": 2.9427, |
| "step": 1685000 |
| }, |
| { |
| "epoch": 13.81, |
| "learning_rate": 4.491114435548834e-06, |
| "loss": 2.939, |
| "step": 1685500 |
| }, |
| { |
| "epoch": 13.81, |
| "learning_rate": 4.489470482828914e-06, |
| "loss": 2.9373, |
| "step": 1686000 |
| }, |
| { |
| "epoch": 13.82, |
| "learning_rate": 4.4878265301089944e-06, |
| "loss": 2.941, |
| "step": 1686500 |
| }, |
| { |
| "epoch": 13.82, |
| "learning_rate": 4.4861825773890746e-06, |
| "loss": 2.9358, |
| "step": 1687000 |
| }, |
| { |
| "epoch": 13.83, |
| "learning_rate": 4.484538624669155e-06, |
| "loss": 2.9371, |
| "step": 1687500 |
| }, |
| { |
| "epoch": 13.83, |
| "learning_rate": 4.482894671949235e-06, |
| "loss": 2.9382, |
| "step": 1688000 |
| }, |
| { |
| "epoch": 13.83, |
| "learning_rate": 4.481250719229315e-06, |
| "loss": 2.931, |
| "step": 1688500 |
| }, |
| { |
| "epoch": 13.84, |
| "learning_rate": 4.479606766509396e-06, |
| "loss": 2.9357, |
| "step": 1689000 |
| }, |
| { |
| "epoch": 13.84, |
| "learning_rate": 4.477962813789476e-06, |
| "loss": 2.9389, |
| "step": 1689500 |
| }, |
| { |
| "epoch": 13.85, |
| "learning_rate": 4.476318861069556e-06, |
| "loss": 2.9362, |
| "step": 1690000 |
| }, |
| { |
| "epoch": 13.85, |
| "learning_rate": 4.474674908349636e-06, |
| "loss": 2.9395, |
| "step": 1690500 |
| }, |
| { |
| "epoch": 13.85, |
| "learning_rate": 4.473030955629716e-06, |
| "loss": 2.9368, |
| "step": 1691000 |
| }, |
| { |
| "epoch": 13.86, |
| "learning_rate": 4.4713870029097965e-06, |
| "loss": 2.9447, |
| "step": 1691500 |
| }, |
| { |
| "epoch": 13.86, |
| "learning_rate": 4.469743050189877e-06, |
| "loss": 2.9401, |
| "step": 1692000 |
| }, |
| { |
| "epoch": 13.87, |
| "learning_rate": 4.468099097469958e-06, |
| "loss": 2.9357, |
| "step": 1692500 |
| }, |
| { |
| "epoch": 13.87, |
| "learning_rate": 4.466455144750038e-06, |
| "loss": 2.9433, |
| "step": 1693000 |
| }, |
| { |
| "epoch": 13.87, |
| "learning_rate": 4.464811192030117e-06, |
| "loss": 2.9427, |
| "step": 1693500 |
| }, |
| { |
| "epoch": 13.88, |
| "learning_rate": 4.463167239310198e-06, |
| "loss": 2.9336, |
| "step": 1694000 |
| }, |
| { |
| "epoch": 13.88, |
| "learning_rate": 4.461523286590278e-06, |
| "loss": 2.9388, |
| "step": 1694500 |
| }, |
| { |
| "epoch": 13.89, |
| "learning_rate": 4.459879333870358e-06, |
| "loss": 2.9386, |
| "step": 1695000 |
| }, |
| { |
| "epoch": 13.89, |
| "learning_rate": 4.458235381150438e-06, |
| "loss": 2.9384, |
| "step": 1695500 |
| }, |
| { |
| "epoch": 13.9, |
| "learning_rate": 4.4565914284305185e-06, |
| "loss": 2.9361, |
| "step": 1696000 |
| }, |
| { |
| "epoch": 13.9, |
| "learning_rate": 4.454947475710599e-06, |
| "loss": 2.9363, |
| "step": 1696500 |
| }, |
| { |
| "epoch": 13.9, |
| "learning_rate": 4.453303522990679e-06, |
| "loss": 2.9361, |
| "step": 1697000 |
| }, |
| { |
| "epoch": 13.91, |
| "learning_rate": 4.45165957027076e-06, |
| "loss": 2.9355, |
| "step": 1697500 |
| }, |
| { |
| "epoch": 13.91, |
| "learning_rate": 4.45001561755084e-06, |
| "loss": 2.9356, |
| "step": 1698000 |
| }, |
| { |
| "epoch": 13.92, |
| "learning_rate": 4.44837166483092e-06, |
| "loss": 2.9391, |
| "step": 1698500 |
| }, |
| { |
| "epoch": 13.92, |
| "learning_rate": 4.446727712111e-06, |
| "loss": 2.9384, |
| "step": 1699000 |
| }, |
| { |
| "epoch": 13.92, |
| "learning_rate": 4.44508375939108e-06, |
| "loss": 2.9367, |
| "step": 1699500 |
| }, |
| { |
| "epoch": 13.93, |
| "learning_rate": 4.44343980667116e-06, |
| "loss": 2.9446, |
| "step": 1700000 |
| }, |
| { |
| "epoch": 13.93, |
| "learning_rate": 4.4417958539512405e-06, |
| "loss": 2.9439, |
| "step": 1700500 |
| }, |
| { |
| "epoch": 13.94, |
| "learning_rate": 4.440151901231321e-06, |
| "loss": 2.9381, |
| "step": 1701000 |
| }, |
| { |
| "epoch": 13.94, |
| "learning_rate": 4.438507948511402e-06, |
| "loss": 2.9345, |
| "step": 1701500 |
| }, |
| { |
| "epoch": 13.94, |
| "learning_rate": 4.436863995791482e-06, |
| "loss": 2.9391, |
| "step": 1702000 |
| }, |
| { |
| "epoch": 13.95, |
| "learning_rate": 4.435220043071561e-06, |
| "loss": 2.9316, |
| "step": 1702500 |
| }, |
| { |
| "epoch": 13.95, |
| "learning_rate": 4.433576090351642e-06, |
| "loss": 2.9351, |
| "step": 1703000 |
| }, |
| { |
| "epoch": 13.96, |
| "learning_rate": 4.431932137631722e-06, |
| "loss": 2.932, |
| "step": 1703500 |
| }, |
| { |
| "epoch": 13.96, |
| "learning_rate": 4.430288184911802e-06, |
| "loss": 2.938, |
| "step": 1704000 |
| }, |
| { |
| "epoch": 13.96, |
| "learning_rate": 4.428644232191882e-06, |
| "loss": 2.9325, |
| "step": 1704500 |
| }, |
| { |
| "epoch": 13.97, |
| "learning_rate": 4.427000279471963e-06, |
| "loss": 2.9388, |
| "step": 1705000 |
| }, |
| { |
| "epoch": 13.97, |
| "learning_rate": 4.425356326752043e-06, |
| "loss": 2.9338, |
| "step": 1705500 |
| }, |
| { |
| "epoch": 13.98, |
| "learning_rate": 4.423712374032123e-06, |
| "loss": 2.9372, |
| "step": 1706000 |
| }, |
| { |
| "epoch": 13.98, |
| "learning_rate": 4.422068421312204e-06, |
| "loss": 2.9421, |
| "step": 1706500 |
| }, |
| { |
| "epoch": 13.99, |
| "learning_rate": 4.420424468592284e-06, |
| "loss": 2.9381, |
| "step": 1707000 |
| }, |
| { |
| "epoch": 13.99, |
| "learning_rate": 4.418780515872364e-06, |
| "loss": 2.9352, |
| "step": 1707500 |
| }, |
| { |
| "epoch": 13.99, |
| "learning_rate": 4.417136563152444e-06, |
| "loss": 2.9376, |
| "step": 1708000 |
| }, |
| { |
| "epoch": 14.0, |
| "learning_rate": 4.415492610432524e-06, |
| "loss": 2.9373, |
| "step": 1708500 |
| }, |
| { |
| "epoch": 14.0, |
| "learning_rate": 4.413848657712604e-06, |
| "loss": 2.9386, |
| "step": 1709000 |
| }, |
| { |
| "epoch": 14.01, |
| "learning_rate": 4.4122047049926845e-06, |
| "loss": 2.9335, |
| "step": 1709500 |
| }, |
| { |
| "epoch": 14.01, |
| "learning_rate": 4.410560752272765e-06, |
| "loss": 2.9374, |
| "step": 1710000 |
| }, |
| { |
| "epoch": 14.01, |
| "eval_accuracy": 0.4976944853269889, |
| "eval_loss": 2.8028008937835693, |
| "eval_runtime": 411.9878, |
| "eval_samples_per_second": 748.459, |
| "eval_steps_per_second": 15.595, |
| "step": 1710000 |
| }, |
| { |
| "epoch": 14.01, |
| "learning_rate": 4.408916799552846e-06, |
| "loss": 2.9313, |
| "step": 1710500 |
| }, |
| { |
| "epoch": 14.02, |
| "learning_rate": 4.407272846832926e-06, |
| "loss": 2.9383, |
| "step": 1711000 |
| }, |
| { |
| "epoch": 14.02, |
| "learning_rate": 4.405628894113006e-06, |
| "loss": 2.939, |
| "step": 1711500 |
| }, |
| { |
| "epoch": 14.03, |
| "learning_rate": 4.403984941393086e-06, |
| "loss": 2.9374, |
| "step": 1712000 |
| }, |
| { |
| "epoch": 14.03, |
| "learning_rate": 4.402340988673166e-06, |
| "loss": 2.9351, |
| "step": 1712500 |
| }, |
| { |
| "epoch": 14.03, |
| "learning_rate": 4.400697035953246e-06, |
| "loss": 2.9335, |
| "step": 1713000 |
| }, |
| { |
| "epoch": 14.04, |
| "learning_rate": 4.399053083233326e-06, |
| "loss": 2.9393, |
| "step": 1713500 |
| }, |
| { |
| "epoch": 14.04, |
| "learning_rate": 4.397409130513407e-06, |
| "loss": 2.9374, |
| "step": 1714000 |
| }, |
| { |
| "epoch": 14.05, |
| "learning_rate": 4.395765177793487e-06, |
| "loss": 2.9355, |
| "step": 1714500 |
| }, |
| { |
| "epoch": 14.05, |
| "learning_rate": 4.394121225073567e-06, |
| "loss": 2.9253, |
| "step": 1715000 |
| }, |
| { |
| "epoch": 14.05, |
| "learning_rate": 4.392477272353648e-06, |
| "loss": 2.9334, |
| "step": 1715500 |
| }, |
| { |
| "epoch": 14.06, |
| "learning_rate": 4.390833319633728e-06, |
| "loss": 2.9287, |
| "step": 1716000 |
| }, |
| { |
| "epoch": 14.06, |
| "learning_rate": 4.389189366913808e-06, |
| "loss": 2.9363, |
| "step": 1716500 |
| }, |
| { |
| "epoch": 14.07, |
| "learning_rate": 4.387545414193888e-06, |
| "loss": 2.9334, |
| "step": 1717000 |
| }, |
| { |
| "epoch": 14.07, |
| "learning_rate": 4.385901461473968e-06, |
| "loss": 2.9396, |
| "step": 1717500 |
| }, |
| { |
| "epoch": 14.08, |
| "learning_rate": 4.384257508754048e-06, |
| "loss": 2.9314, |
| "step": 1718000 |
| }, |
| { |
| "epoch": 14.08, |
| "learning_rate": 4.3826135560341285e-06, |
| "loss": 2.9284, |
| "step": 1718500 |
| }, |
| { |
| "epoch": 14.08, |
| "learning_rate": 4.3809696033142094e-06, |
| "loss": 2.9397, |
| "step": 1719000 |
| }, |
| { |
| "epoch": 14.09, |
| "learning_rate": 4.3793256505942896e-06, |
| "loss": 2.9268, |
| "step": 1719500 |
| }, |
| { |
| "epoch": 14.09, |
| "learning_rate": 4.37768169787437e-06, |
| "loss": 2.9344, |
| "step": 1720000 |
| }, |
| { |
| "epoch": 14.1, |
| "learning_rate": 4.37603774515445e-06, |
| "loss": 2.9336, |
| "step": 1720500 |
| }, |
| { |
| "epoch": 14.1, |
| "learning_rate": 4.37439379243453e-06, |
| "loss": 2.9399, |
| "step": 1721000 |
| }, |
| { |
| "epoch": 14.1, |
| "learning_rate": 4.37274983971461e-06, |
| "loss": 2.9335, |
| "step": 1721500 |
| }, |
| { |
| "epoch": 14.11, |
| "learning_rate": 4.37110588699469e-06, |
| "loss": 2.9299, |
| "step": 1722000 |
| }, |
| { |
| "epoch": 14.11, |
| "learning_rate": 4.36946193427477e-06, |
| "loss": 2.9353, |
| "step": 1722500 |
| }, |
| { |
| "epoch": 14.12, |
| "learning_rate": 4.367817981554851e-06, |
| "loss": 2.9282, |
| "step": 1723000 |
| }, |
| { |
| "epoch": 14.12, |
| "learning_rate": 4.3661740288349306e-06, |
| "loss": 2.9343, |
| "step": 1723500 |
| }, |
| { |
| "epoch": 14.12, |
| "learning_rate": 4.364530076115011e-06, |
| "loss": 2.9299, |
| "step": 1724000 |
| }, |
| { |
| "epoch": 14.13, |
| "learning_rate": 4.362886123395092e-06, |
| "loss": 2.9317, |
| "step": 1724500 |
| }, |
| { |
| "epoch": 14.13, |
| "learning_rate": 4.361242170675172e-06, |
| "loss": 2.9287, |
| "step": 1725000 |
| }, |
| { |
| "epoch": 14.14, |
| "learning_rate": 4.359598217955252e-06, |
| "loss": 2.9372, |
| "step": 1725500 |
| }, |
| { |
| "epoch": 14.14, |
| "learning_rate": 4.357954265235332e-06, |
| "loss": 2.9415, |
| "step": 1726000 |
| }, |
| { |
| "epoch": 14.14, |
| "learning_rate": 4.356310312515412e-06, |
| "loss": 2.9302, |
| "step": 1726500 |
| }, |
| { |
| "epoch": 14.15, |
| "learning_rate": 4.354666359795492e-06, |
| "loss": 2.9365, |
| "step": 1727000 |
| }, |
| { |
| "epoch": 14.15, |
| "learning_rate": 4.3530224070755724e-06, |
| "loss": 2.9399, |
| "step": 1727500 |
| }, |
| { |
| "epoch": 14.16, |
| "learning_rate": 4.351378454355653e-06, |
| "loss": 2.934, |
| "step": 1728000 |
| }, |
| { |
| "epoch": 14.16, |
| "learning_rate": 4.3497345016357335e-06, |
| "loss": 2.9384, |
| "step": 1728500 |
| }, |
| { |
| "epoch": 14.17, |
| "learning_rate": 4.348090548915814e-06, |
| "loss": 2.9384, |
| "step": 1729000 |
| }, |
| { |
| "epoch": 14.17, |
| "learning_rate": 4.346446596195894e-06, |
| "loss": 2.9356, |
| "step": 1729500 |
| }, |
| { |
| "epoch": 14.17, |
| "learning_rate": 4.344802643475974e-06, |
| "loss": 2.9365, |
| "step": 1730000 |
| }, |
| { |
| "epoch": 14.18, |
| "learning_rate": 4.343158690756054e-06, |
| "loss": 2.941, |
| "step": 1730500 |
| }, |
| { |
| "epoch": 14.18, |
| "learning_rate": 4.341514738036134e-06, |
| "loss": 2.9355, |
| "step": 1731000 |
| }, |
| { |
| "epoch": 14.19, |
| "learning_rate": 4.339870785316215e-06, |
| "loss": 2.9348, |
| "step": 1731500 |
| }, |
| { |
| "epoch": 14.19, |
| "learning_rate": 4.338226832596295e-06, |
| "loss": 2.9331, |
| "step": 1732000 |
| }, |
| { |
| "epoch": 14.19, |
| "learning_rate": 4.336582879876375e-06, |
| "loss": 2.9319, |
| "step": 1732500 |
| }, |
| { |
| "epoch": 14.2, |
| "learning_rate": 4.3349389271564555e-06, |
| "loss": 2.9369, |
| "step": 1733000 |
| }, |
| { |
| "epoch": 14.2, |
| "learning_rate": 4.333294974436536e-06, |
| "loss": 2.9361, |
| "step": 1733500 |
| }, |
| { |
| "epoch": 14.21, |
| "learning_rate": 4.331651021716616e-06, |
| "loss": 2.9339, |
| "step": 1734000 |
| }, |
| { |
| "epoch": 14.21, |
| "learning_rate": 4.330007068996696e-06, |
| "loss": 2.9307, |
| "step": 1734500 |
| }, |
| { |
| "epoch": 14.21, |
| "learning_rate": 4.328363116276776e-06, |
| "loss": 2.9329, |
| "step": 1735000 |
| }, |
| { |
| "epoch": 14.22, |
| "learning_rate": 4.326719163556857e-06, |
| "loss": 2.9387, |
| "step": 1735500 |
| }, |
| { |
| "epoch": 14.22, |
| "learning_rate": 4.325075210836936e-06, |
| "loss": 2.9352, |
| "step": 1736000 |
| }, |
| { |
| "epoch": 14.23, |
| "learning_rate": 4.323431258117016e-06, |
| "loss": 2.9308, |
| "step": 1736500 |
| }, |
| { |
| "epoch": 14.23, |
| "learning_rate": 4.321787305397097e-06, |
| "loss": 2.9319, |
| "step": 1737000 |
| }, |
| { |
| "epoch": 14.24, |
| "learning_rate": 4.3201433526771775e-06, |
| "loss": 2.9321, |
| "step": 1737500 |
| }, |
| { |
| "epoch": 14.24, |
| "learning_rate": 4.318499399957258e-06, |
| "loss": 2.9276, |
| "step": 1738000 |
| }, |
| { |
| "epoch": 14.24, |
| "learning_rate": 4.316855447237338e-06, |
| "loss": 2.933, |
| "step": 1738500 |
| }, |
| { |
| "epoch": 14.25, |
| "learning_rate": 4.315211494517418e-06, |
| "loss": 2.936, |
| "step": 1739000 |
| }, |
| { |
| "epoch": 14.25, |
| "learning_rate": 4.313567541797498e-06, |
| "loss": 2.9336, |
| "step": 1739500 |
| }, |
| { |
| "epoch": 14.26, |
| "learning_rate": 4.311923589077578e-06, |
| "loss": 2.9341, |
| "step": 1740000 |
| }, |
| { |
| "epoch": 14.26, |
| "eval_accuracy": 0.4978347321882909, |
| "eval_loss": 2.8000051975250244, |
| "eval_runtime": 409.1394, |
| "eval_samples_per_second": 753.67, |
| "eval_steps_per_second": 15.704, |
| "step": 1740000 |
| }, |
| { |
| "epoch": 14.26, |
| "learning_rate": 4.310279636357659e-06, |
| "loss": 2.9279, |
| "step": 1740500 |
| }, |
| { |
| "epoch": 14.26, |
| "learning_rate": 4.308635683637739e-06, |
| "loss": 2.9396, |
| "step": 1741000 |
| }, |
| { |
| "epoch": 14.27, |
| "learning_rate": 4.306991730917819e-06, |
| "loss": 2.9339, |
| "step": 1741500 |
| }, |
| { |
| "epoch": 14.27, |
| "learning_rate": 4.3053477781978995e-06, |
| "loss": 2.9332, |
| "step": 1742000 |
| }, |
| { |
| "epoch": 14.28, |
| "learning_rate": 4.30370382547798e-06, |
| "loss": 2.9357, |
| "step": 1742500 |
| }, |
| { |
| "epoch": 14.28, |
| "learning_rate": 4.30205987275806e-06, |
| "loss": 2.9298, |
| "step": 1743000 |
| }, |
| { |
| "epoch": 14.28, |
| "learning_rate": 4.30041592003814e-06, |
| "loss": 2.936, |
| "step": 1743500 |
| }, |
| { |
| "epoch": 14.29, |
| "learning_rate": 4.298771967318221e-06, |
| "loss": 2.9274, |
| "step": 1744000 |
| }, |
| { |
| "epoch": 14.29, |
| "learning_rate": 4.297128014598301e-06, |
| "loss": 2.9341, |
| "step": 1744500 |
| }, |
| { |
| "epoch": 14.3, |
| "learning_rate": 4.29548406187838e-06, |
| "loss": 2.9329, |
| "step": 1745000 |
| }, |
| { |
| "epoch": 14.3, |
| "learning_rate": 4.293840109158461e-06, |
| "loss": 2.9282, |
| "step": 1745500 |
| }, |
| { |
| "epoch": 14.3, |
| "learning_rate": 4.292196156438541e-06, |
| "loss": 2.9359, |
| "step": 1746000 |
| }, |
| { |
| "epoch": 14.31, |
| "learning_rate": 4.2905522037186215e-06, |
| "loss": 2.9311, |
| "step": 1746500 |
| }, |
| { |
| "epoch": 14.31, |
| "learning_rate": 4.288908250998702e-06, |
| "loss": 2.9301, |
| "step": 1747000 |
| }, |
| { |
| "epoch": 14.32, |
| "learning_rate": 4.287264298278782e-06, |
| "loss": 2.9277, |
| "step": 1747500 |
| }, |
| { |
| "epoch": 14.32, |
| "learning_rate": 4.285620345558862e-06, |
| "loss": 2.9333, |
| "step": 1748000 |
| }, |
| { |
| "epoch": 14.33, |
| "learning_rate": 4.283976392838942e-06, |
| "loss": 2.9305, |
| "step": 1748500 |
| }, |
| { |
| "epoch": 14.33, |
| "learning_rate": 4.282332440119022e-06, |
| "loss": 2.9342, |
| "step": 1749000 |
| }, |
| { |
| "epoch": 14.33, |
| "learning_rate": 4.280688487399103e-06, |
| "loss": 2.9321, |
| "step": 1749500 |
| }, |
| { |
| "epoch": 14.34, |
| "learning_rate": 4.279044534679183e-06, |
| "loss": 2.9341, |
| "step": 1750000 |
| }, |
| { |
| "epoch": 14.34, |
| "learning_rate": 4.277400581959263e-06, |
| "loss": 2.9312, |
| "step": 1750500 |
| }, |
| { |
| "epoch": 14.35, |
| "learning_rate": 4.2757566292393435e-06, |
| "loss": 2.9252, |
| "step": 1751000 |
| }, |
| { |
| "epoch": 14.35, |
| "learning_rate": 4.274112676519424e-06, |
| "loss": 2.9335, |
| "step": 1751500 |
| }, |
| { |
| "epoch": 14.35, |
| "learning_rate": 4.272468723799504e-06, |
| "loss": 2.9248, |
| "step": 1752000 |
| }, |
| { |
| "epoch": 14.36, |
| "learning_rate": 4.270824771079584e-06, |
| "loss": 2.9322, |
| "step": 1752500 |
| }, |
| { |
| "epoch": 14.36, |
| "learning_rate": 4.269180818359665e-06, |
| "loss": 2.9343, |
| "step": 1753000 |
| }, |
| { |
| "epoch": 14.37, |
| "learning_rate": 4.267536865639745e-06, |
| "loss": 2.9346, |
| "step": 1753500 |
| }, |
| { |
| "epoch": 14.37, |
| "learning_rate": 4.265892912919824e-06, |
| "loss": 2.9312, |
| "step": 1754000 |
| }, |
| { |
| "epoch": 14.37, |
| "learning_rate": 4.264248960199905e-06, |
| "loss": 2.9274, |
| "step": 1754500 |
| }, |
| { |
| "epoch": 14.38, |
| "learning_rate": 4.262605007479985e-06, |
| "loss": 2.9308, |
| "step": 1755000 |
| }, |
| { |
| "epoch": 14.38, |
| "learning_rate": 4.2609610547600654e-06, |
| "loss": 2.9315, |
| "step": 1755500 |
| }, |
| { |
| "epoch": 14.39, |
| "learning_rate": 4.2593171020401456e-06, |
| "loss": 2.929, |
| "step": 1756000 |
| }, |
| { |
| "epoch": 14.39, |
| "learning_rate": 4.2576731493202265e-06, |
| "loss": 2.9362, |
| "step": 1756500 |
| }, |
| { |
| "epoch": 14.39, |
| "learning_rate": 4.256029196600306e-06, |
| "loss": 2.9238, |
| "step": 1757000 |
| }, |
| { |
| "epoch": 14.4, |
| "learning_rate": 4.254385243880386e-06, |
| "loss": 2.9318, |
| "step": 1757500 |
| }, |
| { |
| "epoch": 14.4, |
| "learning_rate": 4.252741291160467e-06, |
| "loss": 2.935, |
| "step": 1758000 |
| }, |
| { |
| "epoch": 14.41, |
| "learning_rate": 4.251097338440547e-06, |
| "loss": 2.9325, |
| "step": 1758500 |
| }, |
| { |
| "epoch": 14.41, |
| "learning_rate": 4.249453385720627e-06, |
| "loss": 2.9388, |
| "step": 1759000 |
| }, |
| { |
| "epoch": 14.42, |
| "learning_rate": 4.247809433000707e-06, |
| "loss": 2.9329, |
| "step": 1759500 |
| }, |
| { |
| "epoch": 14.42, |
| "learning_rate": 4.2461654802807874e-06, |
| "loss": 2.9317, |
| "step": 1760000 |
| }, |
| { |
| "epoch": 14.42, |
| "learning_rate": 4.2445215275608676e-06, |
| "loss": 2.9316, |
| "step": 1760500 |
| }, |
| { |
| "epoch": 14.43, |
| "learning_rate": 4.242877574840948e-06, |
| "loss": 2.9311, |
| "step": 1761000 |
| }, |
| { |
| "epoch": 14.43, |
| "learning_rate": 4.241233622121028e-06, |
| "loss": 2.932, |
| "step": 1761500 |
| }, |
| { |
| "epoch": 14.44, |
| "learning_rate": 4.239589669401109e-06, |
| "loss": 2.9319, |
| "step": 1762000 |
| }, |
| { |
| "epoch": 14.44, |
| "learning_rate": 4.237945716681189e-06, |
| "loss": 2.9314, |
| "step": 1762500 |
| }, |
| { |
| "epoch": 14.44, |
| "learning_rate": 4.236301763961268e-06, |
| "loss": 2.934, |
| "step": 1763000 |
| }, |
| { |
| "epoch": 14.45, |
| "learning_rate": 4.234657811241349e-06, |
| "loss": 2.9321, |
| "step": 1763500 |
| }, |
| { |
| "epoch": 14.45, |
| "learning_rate": 4.233013858521429e-06, |
| "loss": 2.936, |
| "step": 1764000 |
| }, |
| { |
| "epoch": 14.46, |
| "learning_rate": 4.231369905801509e-06, |
| "loss": 2.9388, |
| "step": 1764500 |
| }, |
| { |
| "epoch": 14.46, |
| "learning_rate": 4.2297259530815895e-06, |
| "loss": 2.925, |
| "step": 1765000 |
| }, |
| { |
| "epoch": 14.46, |
| "learning_rate": 4.2280820003616705e-06, |
| "loss": 2.9331, |
| "step": 1765500 |
| }, |
| { |
| "epoch": 14.47, |
| "learning_rate": 4.22643804764175e-06, |
| "loss": 2.9265, |
| "step": 1766000 |
| }, |
| { |
| "epoch": 14.47, |
| "learning_rate": 4.22479409492183e-06, |
| "loss": 2.9322, |
| "step": 1766500 |
| }, |
| { |
| "epoch": 14.48, |
| "learning_rate": 4.223150142201911e-06, |
| "loss": 2.9266, |
| "step": 1767000 |
| }, |
| { |
| "epoch": 14.48, |
| "learning_rate": 4.221506189481991e-06, |
| "loss": 2.9338, |
| "step": 1767500 |
| }, |
| { |
| "epoch": 14.48, |
| "learning_rate": 4.219862236762071e-06, |
| "loss": 2.9362, |
| "step": 1768000 |
| }, |
| { |
| "epoch": 14.49, |
| "learning_rate": 4.218218284042151e-06, |
| "loss": 2.9338, |
| "step": 1768500 |
| }, |
| { |
| "epoch": 14.49, |
| "learning_rate": 4.216574331322231e-06, |
| "loss": 2.9352, |
| "step": 1769000 |
| }, |
| { |
| "epoch": 14.5, |
| "learning_rate": 4.2149303786023115e-06, |
| "loss": 2.9298, |
| "step": 1769500 |
| }, |
| { |
| "epoch": 14.5, |
| "learning_rate": 4.213286425882392e-06, |
| "loss": 2.9275, |
| "step": 1770000 |
| }, |
| { |
| "epoch": 14.5, |
| "eval_accuracy": 0.49835039139690634, |
| "eval_loss": 2.7977969646453857, |
| "eval_runtime": 403.9508, |
| "eval_samples_per_second": 763.35, |
| "eval_steps_per_second": 15.905, |
| "step": 1770000 |
| }, |
| { |
| "epoch": 14.51, |
| "learning_rate": 4.211642473162473e-06, |
| "loss": 2.9349, |
| "step": 1770500 |
| }, |
| { |
| "epoch": 14.51, |
| "learning_rate": 4.209998520442553e-06, |
| "loss": 2.9281, |
| "step": 1771000 |
| }, |
| { |
| "epoch": 14.51, |
| "learning_rate": 4.208354567722633e-06, |
| "loss": 2.9367, |
| "step": 1771500 |
| }, |
| { |
| "epoch": 14.52, |
| "learning_rate": 4.206710615002713e-06, |
| "loss": 2.9272, |
| "step": 1772000 |
| }, |
| { |
| "epoch": 14.52, |
| "learning_rate": 4.205066662282793e-06, |
| "loss": 2.9336, |
| "step": 1772500 |
| }, |
| { |
| "epoch": 14.53, |
| "learning_rate": 4.203422709562873e-06, |
| "loss": 2.9265, |
| "step": 1773000 |
| }, |
| { |
| "epoch": 14.53, |
| "learning_rate": 4.201778756842953e-06, |
| "loss": 2.9294, |
| "step": 1773500 |
| }, |
| { |
| "epoch": 14.53, |
| "learning_rate": 4.2001348041230335e-06, |
| "loss": 2.9236, |
| "step": 1774000 |
| }, |
| { |
| "epoch": 14.54, |
| "learning_rate": 4.1984908514031145e-06, |
| "loss": 2.9314, |
| "step": 1774500 |
| }, |
| { |
| "epoch": 14.54, |
| "learning_rate": 4.196846898683194e-06, |
| "loss": 2.9274, |
| "step": 1775000 |
| }, |
| { |
| "epoch": 14.55, |
| "learning_rate": 4.195202945963274e-06, |
| "loss": 2.9199, |
| "step": 1775500 |
| }, |
| { |
| "epoch": 14.55, |
| "learning_rate": 4.193558993243355e-06, |
| "loss": 2.9313, |
| "step": 1776000 |
| }, |
| { |
| "epoch": 14.55, |
| "learning_rate": 4.191915040523435e-06, |
| "loss": 2.9253, |
| "step": 1776500 |
| }, |
| { |
| "epoch": 14.56, |
| "learning_rate": 4.190271087803515e-06, |
| "loss": 2.9304, |
| "step": 1777000 |
| }, |
| { |
| "epoch": 14.56, |
| "learning_rate": 4.188627135083595e-06, |
| "loss": 2.9333, |
| "step": 1777500 |
| }, |
| { |
| "epoch": 14.57, |
| "learning_rate": 4.186983182363675e-06, |
| "loss": 2.9317, |
| "step": 1778000 |
| }, |
| { |
| "epoch": 14.57, |
| "learning_rate": 4.1853392296437555e-06, |
| "loss": 2.9276, |
| "step": 1778500 |
| }, |
| { |
| "epoch": 14.58, |
| "learning_rate": 4.183695276923836e-06, |
| "loss": 2.9319, |
| "step": 1779000 |
| }, |
| { |
| "epoch": 14.58, |
| "learning_rate": 4.182051324203917e-06, |
| "loss": 2.933, |
| "step": 1779500 |
| }, |
| { |
| "epoch": 14.58, |
| "learning_rate": 4.180407371483997e-06, |
| "loss": 2.9379, |
| "step": 1780000 |
| }, |
| { |
| "epoch": 14.59, |
| "learning_rate": 4.178763418764077e-06, |
| "loss": 2.9306, |
| "step": 1780500 |
| }, |
| { |
| "epoch": 14.59, |
| "learning_rate": 4.177119466044157e-06, |
| "loss": 2.9328, |
| "step": 1781000 |
| }, |
| { |
| "epoch": 14.6, |
| "learning_rate": 4.175475513324237e-06, |
| "loss": 2.9255, |
| "step": 1781500 |
| }, |
| { |
| "epoch": 14.6, |
| "learning_rate": 4.173831560604317e-06, |
| "loss": 2.9313, |
| "step": 1782000 |
| }, |
| { |
| "epoch": 14.6, |
| "learning_rate": 4.172187607884397e-06, |
| "loss": 2.9252, |
| "step": 1782500 |
| }, |
| { |
| "epoch": 14.61, |
| "learning_rate": 4.1705436551644775e-06, |
| "loss": 2.9281, |
| "step": 1783000 |
| }, |
| { |
| "epoch": 14.61, |
| "learning_rate": 4.1688997024445585e-06, |
| "loss": 2.9268, |
| "step": 1783500 |
| }, |
| { |
| "epoch": 14.62, |
| "learning_rate": 4.167255749724639e-06, |
| "loss": 2.9269, |
| "step": 1784000 |
| }, |
| { |
| "epoch": 14.62, |
| "learning_rate": 4.165611797004719e-06, |
| "loss": 2.9347, |
| "step": 1784500 |
| }, |
| { |
| "epoch": 14.62, |
| "learning_rate": 4.163967844284799e-06, |
| "loss": 2.9234, |
| "step": 1785000 |
| }, |
| { |
| "epoch": 14.63, |
| "learning_rate": 4.162323891564879e-06, |
| "loss": 2.9271, |
| "step": 1785500 |
| }, |
| { |
| "epoch": 14.63, |
| "learning_rate": 4.160679938844959e-06, |
| "loss": 2.9335, |
| "step": 1786000 |
| }, |
| { |
| "epoch": 14.64, |
| "learning_rate": 4.159035986125039e-06, |
| "loss": 2.9314, |
| "step": 1786500 |
| }, |
| { |
| "epoch": 14.64, |
| "learning_rate": 4.15739203340512e-06, |
| "loss": 2.9327, |
| "step": 1787000 |
| }, |
| { |
| "epoch": 14.64, |
| "learning_rate": 4.1557480806851995e-06, |
| "loss": 2.9259, |
| "step": 1787500 |
| }, |
| { |
| "epoch": 14.65, |
| "learning_rate": 4.15410412796528e-06, |
| "loss": 2.9288, |
| "step": 1788000 |
| }, |
| { |
| "epoch": 14.65, |
| "learning_rate": 4.1524601752453606e-06, |
| "loss": 2.9282, |
| "step": 1788500 |
| }, |
| { |
| "epoch": 14.66, |
| "learning_rate": 4.150816222525441e-06, |
| "loss": 2.9276, |
| "step": 1789000 |
| }, |
| { |
| "epoch": 14.66, |
| "learning_rate": 4.149172269805521e-06, |
| "loss": 2.9337, |
| "step": 1789500 |
| }, |
| { |
| "epoch": 14.67, |
| "learning_rate": 4.147528317085601e-06, |
| "loss": 2.9284, |
| "step": 1790000 |
| }, |
| { |
| "epoch": 14.67, |
| "learning_rate": 4.145884364365681e-06, |
| "loss": 2.9208, |
| "step": 1790500 |
| }, |
| { |
| "epoch": 14.67, |
| "learning_rate": 4.144240411645761e-06, |
| "loss": 2.9408, |
| "step": 1791000 |
| }, |
| { |
| "epoch": 14.68, |
| "learning_rate": 4.142596458925841e-06, |
| "loss": 2.9293, |
| "step": 1791500 |
| }, |
| { |
| "epoch": 14.68, |
| "learning_rate": 4.140952506205922e-06, |
| "loss": 2.93, |
| "step": 1792000 |
| }, |
| { |
| "epoch": 14.69, |
| "learning_rate": 4.1393085534860024e-06, |
| "loss": 2.9313, |
| "step": 1792500 |
| }, |
| { |
| "epoch": 14.69, |
| "learning_rate": 4.1376646007660826e-06, |
| "loss": 2.9321, |
| "step": 1793000 |
| }, |
| { |
| "epoch": 14.69, |
| "learning_rate": 4.136020648046163e-06, |
| "loss": 2.9249, |
| "step": 1793500 |
| }, |
| { |
| "epoch": 14.7, |
| "learning_rate": 4.134376695326243e-06, |
| "loss": 2.9286, |
| "step": 1794000 |
| }, |
| { |
| "epoch": 14.7, |
| "learning_rate": 4.132732742606323e-06, |
| "loss": 2.9354, |
| "step": 1794500 |
| }, |
| { |
| "epoch": 14.71, |
| "learning_rate": 4.131088789886403e-06, |
| "loss": 2.9287, |
| "step": 1795000 |
| }, |
| { |
| "epoch": 14.71, |
| "learning_rate": 4.129444837166483e-06, |
| "loss": 2.9271, |
| "step": 1795500 |
| }, |
| { |
| "epoch": 14.71, |
| "learning_rate": 4.127800884446564e-06, |
| "loss": 2.9335, |
| "step": 1796000 |
| }, |
| { |
| "epoch": 14.72, |
| "learning_rate": 4.1261569317266434e-06, |
| "loss": 2.9235, |
| "step": 1796500 |
| }, |
| { |
| "epoch": 14.72, |
| "learning_rate": 4.1245129790067236e-06, |
| "loss": 2.9245, |
| "step": 1797000 |
| }, |
| { |
| "epoch": 14.73, |
| "learning_rate": 4.1228690262868045e-06, |
| "loss": 2.925, |
| "step": 1797500 |
| }, |
| { |
| "epoch": 14.73, |
| "learning_rate": 4.121225073566885e-06, |
| "loss": 2.9244, |
| "step": 1798000 |
| }, |
| { |
| "epoch": 14.73, |
| "learning_rate": 4.119581120846965e-06, |
| "loss": 2.9306, |
| "step": 1798500 |
| }, |
| { |
| "epoch": 14.74, |
| "learning_rate": 4.117937168127045e-06, |
| "loss": 2.9243, |
| "step": 1799000 |
| }, |
| { |
| "epoch": 14.74, |
| "learning_rate": 4.116293215407125e-06, |
| "loss": 2.9272, |
| "step": 1799500 |
| }, |
| { |
| "epoch": 14.75, |
| "learning_rate": 4.114649262687205e-06, |
| "loss": 2.9319, |
| "step": 1800000 |
| }, |
| { |
| "epoch": 14.75, |
| "eval_accuracy": 0.4989067207393835, |
| "eval_loss": 2.794658899307251, |
| "eval_runtime": 404.1884, |
| "eval_samples_per_second": 762.902, |
| "eval_steps_per_second": 15.896, |
| "step": 1800000 |
| }, |
| { |
| "epoch": 14.75, |
| "learning_rate": 4.113005309967285e-06, |
| "loss": 2.9225, |
| "step": 1800500 |
| }, |
| { |
| "epoch": 14.76, |
| "learning_rate": 4.111361357247366e-06, |
| "loss": 2.9304, |
| "step": 1801000 |
| }, |
| { |
| "epoch": 14.76, |
| "learning_rate": 4.109717404527446e-06, |
| "loss": 2.9258, |
| "step": 1801500 |
| }, |
| { |
| "epoch": 14.76, |
| "learning_rate": 4.1080734518075265e-06, |
| "loss": 2.9214, |
| "step": 1802000 |
| }, |
| { |
| "epoch": 14.77, |
| "learning_rate": 4.106429499087607e-06, |
| "loss": 2.9272, |
| "step": 1802500 |
| }, |
| { |
| "epoch": 14.77, |
| "learning_rate": 4.104785546367687e-06, |
| "loss": 2.9293, |
| "step": 1803000 |
| }, |
| { |
| "epoch": 14.78, |
| "learning_rate": 4.103141593647767e-06, |
| "loss": 2.9307, |
| "step": 1803500 |
| }, |
| { |
| "epoch": 14.78, |
| "learning_rate": 4.101497640927847e-06, |
| "loss": 2.9316, |
| "step": 1804000 |
| }, |
| { |
| "epoch": 14.78, |
| "learning_rate": 4.099853688207928e-06, |
| "loss": 2.9265, |
| "step": 1804500 |
| }, |
| { |
| "epoch": 14.79, |
| "learning_rate": 4.098209735488008e-06, |
| "loss": 2.9225, |
| "step": 1805000 |
| }, |
| { |
| "epoch": 14.79, |
| "learning_rate": 4.096565782768087e-06, |
| "loss": 2.9219, |
| "step": 1805500 |
| }, |
| { |
| "epoch": 14.8, |
| "learning_rate": 4.094921830048168e-06, |
| "loss": 2.9265, |
| "step": 1806000 |
| }, |
| { |
| "epoch": 14.8, |
| "learning_rate": 4.0932778773282485e-06, |
| "loss": 2.9258, |
| "step": 1806500 |
| }, |
| { |
| "epoch": 14.8, |
| "learning_rate": 4.091633924608329e-06, |
| "loss": 2.9324, |
| "step": 1807000 |
| }, |
| { |
| "epoch": 14.81, |
| "learning_rate": 4.089989971888409e-06, |
| "loss": 2.9335, |
| "step": 1807500 |
| }, |
| { |
| "epoch": 14.81, |
| "learning_rate": 4.088346019168489e-06, |
| "loss": 2.9219, |
| "step": 1808000 |
| }, |
| { |
| "epoch": 14.82, |
| "learning_rate": 4.086702066448569e-06, |
| "loss": 2.9355, |
| "step": 1808500 |
| }, |
| { |
| "epoch": 14.82, |
| "learning_rate": 4.085058113728649e-06, |
| "loss": 2.9275, |
| "step": 1809000 |
| }, |
| { |
| "epoch": 14.82, |
| "learning_rate": 4.083414161008729e-06, |
| "loss": 2.9191, |
| "step": 1809500 |
| }, |
| { |
| "epoch": 14.83, |
| "learning_rate": 4.08177020828881e-06, |
| "loss": 2.9248, |
| "step": 1810000 |
| }, |
| { |
| "epoch": 14.83, |
| "learning_rate": 4.08012625556889e-06, |
| "loss": 2.9263, |
| "step": 1810500 |
| }, |
| { |
| "epoch": 14.84, |
| "learning_rate": 4.0784823028489705e-06, |
| "loss": 2.9324, |
| "step": 1811000 |
| }, |
| { |
| "epoch": 14.84, |
| "learning_rate": 4.076838350129051e-06, |
| "loss": 2.9256, |
| "step": 1811500 |
| }, |
| { |
| "epoch": 14.85, |
| "learning_rate": 4.075194397409131e-06, |
| "loss": 2.9276, |
| "step": 1812000 |
| }, |
| { |
| "epoch": 14.85, |
| "learning_rate": 4.073550444689211e-06, |
| "loss": 2.9319, |
| "step": 1812500 |
| }, |
| { |
| "epoch": 14.85, |
| "learning_rate": 4.071906491969291e-06, |
| "loss": 2.9265, |
| "step": 1813000 |
| }, |
| { |
| "epoch": 14.86, |
| "learning_rate": 4.070262539249372e-06, |
| "loss": 2.9284, |
| "step": 1813500 |
| }, |
| { |
| "epoch": 14.86, |
| "learning_rate": 4.068618586529452e-06, |
| "loss": 2.9313, |
| "step": 1814000 |
| }, |
| { |
| "epoch": 14.87, |
| "learning_rate": 4.066974633809531e-06, |
| "loss": 2.9201, |
| "step": 1814500 |
| }, |
| { |
| "epoch": 14.87, |
| "learning_rate": 4.065330681089612e-06, |
| "loss": 2.9272, |
| "step": 1815000 |
| }, |
| { |
| "epoch": 14.87, |
| "learning_rate": 4.0636867283696925e-06, |
| "loss": 2.929, |
| "step": 1815500 |
| }, |
| { |
| "epoch": 14.88, |
| "learning_rate": 4.062042775649773e-06, |
| "loss": 2.9221, |
| "step": 1816000 |
| }, |
| { |
| "epoch": 14.88, |
| "learning_rate": 4.060398822929853e-06, |
| "loss": 2.9246, |
| "step": 1816500 |
| }, |
| { |
| "epoch": 14.89, |
| "learning_rate": 4.058754870209934e-06, |
| "loss": 2.9316, |
| "step": 1817000 |
| }, |
| { |
| "epoch": 14.89, |
| "learning_rate": 4.057110917490013e-06, |
| "loss": 2.9285, |
| "step": 1817500 |
| }, |
| { |
| "epoch": 14.89, |
| "learning_rate": 4.055466964770093e-06, |
| "loss": 2.9272, |
| "step": 1818000 |
| }, |
| { |
| "epoch": 14.9, |
| "learning_rate": 4.053823012050174e-06, |
| "loss": 2.928, |
| "step": 1818500 |
| }, |
| { |
| "epoch": 14.9, |
| "learning_rate": 4.052179059330254e-06, |
| "loss": 2.9296, |
| "step": 1819000 |
| }, |
| { |
| "epoch": 14.91, |
| "learning_rate": 4.050535106610334e-06, |
| "loss": 2.9252, |
| "step": 1819500 |
| }, |
| { |
| "epoch": 14.91, |
| "learning_rate": 4.0488911538904145e-06, |
| "loss": 2.9287, |
| "step": 1820000 |
| }, |
| { |
| "epoch": 14.92, |
| "learning_rate": 4.047247201170495e-06, |
| "loss": 2.9318, |
| "step": 1820500 |
| }, |
| { |
| "epoch": 14.92, |
| "learning_rate": 4.045603248450575e-06, |
| "loss": 2.927, |
| "step": 1821000 |
| }, |
| { |
| "epoch": 14.92, |
| "learning_rate": 4.043959295730655e-06, |
| "loss": 2.9241, |
| "step": 1821500 |
| }, |
| { |
| "epoch": 14.93, |
| "learning_rate": 4.042315343010735e-06, |
| "loss": 2.9234, |
| "step": 1822000 |
| }, |
| { |
| "epoch": 14.93, |
| "learning_rate": 4.040671390290816e-06, |
| "loss": 2.9218, |
| "step": 1822500 |
| }, |
| { |
| "epoch": 14.94, |
| "learning_rate": 4.039027437570896e-06, |
| "loss": 2.9306, |
| "step": 1823000 |
| }, |
| { |
| "epoch": 14.94, |
| "learning_rate": 4.037383484850975e-06, |
| "loss": 2.9271, |
| "step": 1823500 |
| }, |
| { |
| "epoch": 14.94, |
| "learning_rate": 4.035739532131056e-06, |
| "loss": 2.9231, |
| "step": 1824000 |
| }, |
| { |
| "epoch": 14.95, |
| "learning_rate": 4.0340955794111365e-06, |
| "loss": 2.9276, |
| "step": 1824500 |
| }, |
| { |
| "epoch": 14.95, |
| "learning_rate": 4.032451626691217e-06, |
| "loss": 2.9269, |
| "step": 1825000 |
| }, |
| { |
| "epoch": 14.96, |
| "learning_rate": 4.030807673971297e-06, |
| "loss": 2.9248, |
| "step": 1825500 |
| }, |
| { |
| "epoch": 14.96, |
| "learning_rate": 4.029163721251378e-06, |
| "loss": 2.927, |
| "step": 1826000 |
| }, |
| { |
| "epoch": 14.96, |
| "learning_rate": 4.027519768531457e-06, |
| "loss": 2.932, |
| "step": 1826500 |
| }, |
| { |
| "epoch": 14.97, |
| "learning_rate": 4.025875815811537e-06, |
| "loss": 2.9253, |
| "step": 1827000 |
| }, |
| { |
| "epoch": 14.97, |
| "learning_rate": 4.024231863091618e-06, |
| "loss": 2.9283, |
| "step": 1827500 |
| }, |
| { |
| "epoch": 14.98, |
| "learning_rate": 4.022587910371698e-06, |
| "loss": 2.9251, |
| "step": 1828000 |
| }, |
| { |
| "epoch": 14.98, |
| "learning_rate": 4.020943957651778e-06, |
| "loss": 2.9207, |
| "step": 1828500 |
| }, |
| { |
| "epoch": 14.98, |
| "learning_rate": 4.0193000049318584e-06, |
| "loss": 2.931, |
| "step": 1829000 |
| }, |
| { |
| "epoch": 14.99, |
| "learning_rate": 4.0176560522119386e-06, |
| "loss": 2.922, |
| "step": 1829500 |
| }, |
| { |
| "epoch": 14.99, |
| "learning_rate": 4.016012099492019e-06, |
| "loss": 2.9304, |
| "step": 1830000 |
| }, |
| { |
| "epoch": 14.99, |
| "eval_accuracy": 0.49915539316259305, |
| "eval_loss": 2.7920210361480713, |
| "eval_runtime": 408.9888, |
| "eval_samples_per_second": 753.947, |
| "eval_steps_per_second": 15.709, |
| "step": 1830000 |
| }, |
| { |
| "epoch": 15.0, |
| "learning_rate": 4.014368146772099e-06, |
| "loss": 2.924, |
| "step": 1830500 |
| }, |
| { |
| "epoch": 15.0, |
| "learning_rate": 4.01272419405218e-06, |
| "loss": 2.9216, |
| "step": 1831000 |
| }, |
| { |
| "epoch": 15.01, |
| "learning_rate": 4.01108024133226e-06, |
| "loss": 2.9301, |
| "step": 1831500 |
| }, |
| { |
| "epoch": 15.01, |
| "learning_rate": 4.00943628861234e-06, |
| "loss": 2.9275, |
| "step": 1832000 |
| }, |
| { |
| "epoch": 15.01, |
| "learning_rate": 4.00779233589242e-06, |
| "loss": 2.9231, |
| "step": 1832500 |
| }, |
| { |
| "epoch": 15.02, |
| "learning_rate": 4.0061483831725e-06, |
| "loss": 2.9206, |
| "step": 1833000 |
| }, |
| { |
| "epoch": 15.02, |
| "learning_rate": 4.0045044304525804e-06, |
| "loss": 2.9231, |
| "step": 1833500 |
| }, |
| { |
| "epoch": 15.03, |
| "learning_rate": 4.0028604777326606e-06, |
| "loss": 2.9246, |
| "step": 1834000 |
| }, |
| { |
| "epoch": 15.03, |
| "learning_rate": 4.001216525012741e-06, |
| "loss": 2.9253, |
| "step": 1834500 |
| }, |
| { |
| "epoch": 15.03, |
| "learning_rate": 3.999572572292822e-06, |
| "loss": 2.9233, |
| "step": 1835000 |
| }, |
| { |
| "epoch": 15.04, |
| "learning_rate": 3.997928619572902e-06, |
| "loss": 2.9254, |
| "step": 1835500 |
| }, |
| { |
| "epoch": 15.04, |
| "learning_rate": 3.996284666852981e-06, |
| "loss": 2.9231, |
| "step": 1836000 |
| }, |
| { |
| "epoch": 15.05, |
| "learning_rate": 3.994640714133062e-06, |
| "loss": 2.9289, |
| "step": 1836500 |
| }, |
| { |
| "epoch": 15.05, |
| "learning_rate": 3.992996761413142e-06, |
| "loss": 2.9245, |
| "step": 1837000 |
| }, |
| { |
| "epoch": 15.05, |
| "learning_rate": 3.991352808693222e-06, |
| "loss": 2.9221, |
| "step": 1837500 |
| }, |
| { |
| "epoch": 15.06, |
| "learning_rate": 3.989708855973302e-06, |
| "loss": 2.9205, |
| "step": 1838000 |
| }, |
| { |
| "epoch": 15.06, |
| "learning_rate": 3.988064903253383e-06, |
| "loss": 2.9306, |
| "step": 1838500 |
| }, |
| { |
| "epoch": 15.07, |
| "learning_rate": 3.986420950533463e-06, |
| "loss": 2.9248, |
| "step": 1839000 |
| }, |
| { |
| "epoch": 15.07, |
| "learning_rate": 3.984776997813543e-06, |
| "loss": 2.9229, |
| "step": 1839500 |
| }, |
| { |
| "epoch": 15.07, |
| "learning_rate": 3.983133045093624e-06, |
| "loss": 2.9208, |
| "step": 1840000 |
| }, |
| { |
| "epoch": 15.08, |
| "learning_rate": 3.981489092373704e-06, |
| "loss": 2.9246, |
| "step": 1840500 |
| }, |
| { |
| "epoch": 15.08, |
| "learning_rate": 3.979845139653784e-06, |
| "loss": 2.9267, |
| "step": 1841000 |
| }, |
| { |
| "epoch": 15.09, |
| "learning_rate": 3.978201186933864e-06, |
| "loss": 2.9254, |
| "step": 1841500 |
| }, |
| { |
| "epoch": 15.09, |
| "step": 1841687, |
| "total_flos": 9.934758834172068e+17, |
| "train_loss": 3.300056499910879, |
| "train_runtime": 215976.5267, |
| "train_samples_per_second": 678.171, |
| "train_steps_per_second": 14.129 |
| } |
| ], |
| "max_steps": 3051450, |
| "num_train_epochs": 25, |
| "total_flos": 9.934758834172068e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|