{ "best_metric": 3.649672269821167, "best_model_checkpoint": "model_v1_complete_training_wt_init_48_tiny/checkpoint-1980000", "epoch": 21.7857751463777, "global_step": 1994357, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 5.000000000000001e-07, "loss": 10.2813, "step": 500 }, { "epoch": 0.01, "learning_rate": 1.0000000000000002e-06, "loss": 9.6482, "step": 1000 }, { "epoch": 0.02, "learning_rate": 1.5e-06, "loss": 8.9748, "step": 1500 }, { "epoch": 0.02, "learning_rate": 2.0000000000000003e-06, "loss": 8.6322, "step": 2000 }, { "epoch": 0.03, "learning_rate": 2.5e-06, "loss": 8.41, "step": 2500 }, { "epoch": 0.03, "learning_rate": 3e-06, "loss": 8.2558, "step": 3000 }, { "epoch": 0.04, "learning_rate": 3.5e-06, "loss": 8.0642, "step": 3500 }, { "epoch": 0.04, "learning_rate": 4.000000000000001e-06, "loss": 7.9045, "step": 4000 }, { "epoch": 0.05, "learning_rate": 4.5e-06, "loss": 7.7643, "step": 4500 }, { "epoch": 0.05, "learning_rate": 5e-06, "loss": 7.6306, "step": 5000 }, { "epoch": 0.06, "learning_rate": 5.500000000000001e-06, "loss": 7.4948, "step": 5500 }, { "epoch": 0.07, "learning_rate": 6e-06, "loss": 7.3757, "step": 6000 }, { "epoch": 0.07, "learning_rate": 6.5000000000000004e-06, "loss": 7.2508, "step": 6500 }, { "epoch": 0.08, "learning_rate": 7e-06, "loss": 7.1459, "step": 7000 }, { "epoch": 0.08, "learning_rate": 7.500000000000001e-06, "loss": 7.0581, "step": 7500 }, { "epoch": 0.09, "learning_rate": 8.000000000000001e-06, "loss": 6.9748, "step": 8000 }, { "epoch": 0.09, "learning_rate": 8.5e-06, "loss": 6.9143, "step": 8500 }, { "epoch": 0.1, "learning_rate": 9e-06, "loss": 6.8548, "step": 9000 }, { "epoch": 0.1, "learning_rate": 9.5e-06, "loss": 6.7963, "step": 9500 }, { "epoch": 0.11, "learning_rate": 1e-05, "loss": 6.7499, "step": 10000 }, { "epoch": 0.11, "learning_rate": 9.998905237344545e-06, "loss": 6.71, "step": 10500 }, { "epoch": 0.12, "learning_rate": 9.997810474689088e-06, "loss": 6.6699, "step": 11000 }, { "epoch": 0.13, "learning_rate": 9.996715712033632e-06, "loss": 6.6289, "step": 11500 }, { "epoch": 0.13, "learning_rate": 9.995620949378176e-06, "loss": 6.5982, "step": 12000 }, { "epoch": 0.14, "learning_rate": 9.99452618672272e-06, "loss": 6.5775, "step": 12500 }, { "epoch": 0.14, "learning_rate": 9.993431424067263e-06, "loss": 6.5507, "step": 13000 }, { "epoch": 0.15, "learning_rate": 9.992336661411807e-06, "loss": 6.5267, "step": 13500 }, { "epoch": 0.15, "learning_rate": 9.99124189875635e-06, "loss": 6.5071, "step": 14000 }, { "epoch": 0.16, "learning_rate": 9.990147136100895e-06, "loss": 6.4741, "step": 14500 }, { "epoch": 0.16, "learning_rate": 9.989052373445438e-06, "loss": 6.4557, "step": 15000 }, { "epoch": 0.17, "learning_rate": 9.987957610789982e-06, "loss": 6.4293, "step": 15500 }, { "epoch": 0.17, "learning_rate": 9.986862848134526e-06, "loss": 6.4132, "step": 16000 }, { "epoch": 0.18, "learning_rate": 9.98576808547907e-06, "loss": 6.394, "step": 16500 }, { "epoch": 0.19, "learning_rate": 9.984673322823613e-06, "loss": 6.3784, "step": 17000 }, { "epoch": 0.19, "learning_rate": 9.983578560168157e-06, "loss": 6.3588, "step": 17500 }, { "epoch": 0.2, "learning_rate": 9.982483797512699e-06, "loss": 6.3451, "step": 18000 }, { "epoch": 0.2, "learning_rate": 9.981389034857244e-06, "loss": 6.3297, "step": 18500 }, { "epoch": 0.21, "learning_rate": 9.980294272201786e-06, "loss": 6.311, "step": 19000 }, { "epoch": 0.21, "learning_rate": 9.979199509546332e-06, "loss": 6.3009, "step": 19500 }, { "epoch": 0.22, "learning_rate": 9.978104746890876e-06, "loss": 6.2895, "step": 20000 }, { "epoch": 0.22, "learning_rate": 9.977009984235418e-06, "loss": 6.2688, "step": 20500 }, { "epoch": 0.23, "learning_rate": 9.975915221579963e-06, "loss": 6.2517, "step": 21000 }, { "epoch": 0.23, "learning_rate": 9.974820458924505e-06, "loss": 6.2317, "step": 21500 }, { "epoch": 0.24, "learning_rate": 9.97372569626905e-06, "loss": 6.2149, "step": 22000 }, { "epoch": 0.25, "learning_rate": 9.972630933613593e-06, "loss": 6.203, "step": 22500 }, { "epoch": 0.25, "learning_rate": 9.971536170958136e-06, "loss": 6.1911, "step": 23000 }, { "epoch": 0.26, "learning_rate": 9.970441408302682e-06, "loss": 6.1777, "step": 23500 }, { "epoch": 0.26, "learning_rate": 9.969346645647224e-06, "loss": 6.1684, "step": 24000 }, { "epoch": 0.27, "learning_rate": 9.968251882991768e-06, "loss": 6.1529, "step": 24500 }, { "epoch": 0.27, "learning_rate": 9.967157120336311e-06, "loss": 6.1452, "step": 25000 }, { "epoch": 0.28, "learning_rate": 9.966062357680855e-06, "loss": 6.1359, "step": 25500 }, { "epoch": 0.28, "learning_rate": 9.964967595025399e-06, "loss": 6.1262, "step": 26000 }, { "epoch": 0.29, "learning_rate": 9.963872832369943e-06, "loss": 6.1179, "step": 26500 }, { "epoch": 0.29, "learning_rate": 9.962778069714486e-06, "loss": 6.1077, "step": 27000 }, { "epoch": 0.3, "learning_rate": 9.96168330705903e-06, "loss": 6.0977, "step": 27500 }, { "epoch": 0.31, "learning_rate": 9.960588544403574e-06, "loss": 6.0788, "step": 28000 }, { "epoch": 0.31, "learning_rate": 9.959493781748118e-06, "loss": 6.0716, "step": 28500 }, { "epoch": 0.32, "learning_rate": 9.958399019092661e-06, "loss": 6.0501, "step": 29000 }, { "epoch": 0.32, "learning_rate": 9.957304256437205e-06, "loss": 6.0417, "step": 29500 }, { "epoch": 0.33, "learning_rate": 9.956209493781749e-06, "loss": 6.0224, "step": 30000 }, { "epoch": 0.33, "eval_accuracy": 0.15172239107283353, "eval_loss": 5.944677352905273, "eval_runtime": 349.0672, "eval_samples_per_second": 883.371, "eval_steps_per_second": 13.805, "step": 30000 }, { "epoch": 0.33, "learning_rate": 9.955114731126293e-06, "loss": 5.9976, "step": 30500 }, { "epoch": 0.34, "learning_rate": 9.954019968470836e-06, "loss": 5.9858, "step": 31000 }, { "epoch": 0.34, "learning_rate": 9.95292520581538e-06, "loss": 5.9617, "step": 31500 }, { "epoch": 0.35, "learning_rate": 9.951830443159924e-06, "loss": 5.9396, "step": 32000 }, { "epoch": 0.36, "learning_rate": 9.950735680504468e-06, "loss": 5.9186, "step": 32500 }, { "epoch": 0.36, "learning_rate": 9.949640917849011e-06, "loss": 5.9026, "step": 33000 }, { "epoch": 0.37, "learning_rate": 9.948546155193555e-06, "loss": 5.8824, "step": 33500 }, { "epoch": 0.37, "learning_rate": 9.947451392538099e-06, "loss": 5.8617, "step": 34000 }, { "epoch": 0.38, "learning_rate": 9.946356629882643e-06, "loss": 5.8298, "step": 34500 }, { "epoch": 0.38, "learning_rate": 9.945261867227186e-06, "loss": 5.8061, "step": 35000 }, { "epoch": 0.39, "learning_rate": 9.94416710457173e-06, "loss": 5.777, "step": 35500 }, { "epoch": 0.39, "learning_rate": 9.943072341916274e-06, "loss": 5.7525, "step": 36000 }, { "epoch": 0.4, "learning_rate": 9.941977579260817e-06, "loss": 5.7219, "step": 36500 }, { "epoch": 0.4, "learning_rate": 9.940882816605361e-06, "loss": 5.7053, "step": 37000 }, { "epoch": 0.41, "learning_rate": 9.939788053949905e-06, "loss": 5.6758, "step": 37500 }, { "epoch": 0.42, "learning_rate": 9.938693291294447e-06, "loss": 5.6475, "step": 38000 }, { "epoch": 0.42, "learning_rate": 9.937598528638992e-06, "loss": 5.6251, "step": 38500 }, { "epoch": 0.43, "learning_rate": 9.936503765983534e-06, "loss": 5.5986, "step": 39000 }, { "epoch": 0.43, "learning_rate": 9.93540900332808e-06, "loss": 5.5777, "step": 39500 }, { "epoch": 0.44, "learning_rate": 9.934314240672624e-06, "loss": 5.5497, "step": 40000 }, { "epoch": 0.44, "learning_rate": 9.933219478017166e-06, "loss": 5.5252, "step": 40500 }, { "epoch": 0.45, "learning_rate": 9.932124715361711e-06, "loss": 5.5072, "step": 41000 }, { "epoch": 0.45, "learning_rate": 9.931029952706253e-06, "loss": 5.4909, "step": 41500 }, { "epoch": 0.46, "learning_rate": 9.929935190050799e-06, "loss": 5.4723, "step": 42000 }, { "epoch": 0.46, "learning_rate": 9.928840427395342e-06, "loss": 5.4623, "step": 42500 }, { "epoch": 0.47, "learning_rate": 9.927745664739884e-06, "loss": 5.4452, "step": 43000 }, { "epoch": 0.48, "learning_rate": 9.92665090208443e-06, "loss": 5.4397, "step": 43500 }, { "epoch": 0.48, "learning_rate": 9.925556139428972e-06, "loss": 5.4235, "step": 44000 }, { "epoch": 0.49, "learning_rate": 9.924461376773517e-06, "loss": 5.414, "step": 44500 }, { "epoch": 0.49, "learning_rate": 9.92336661411806e-06, "loss": 5.4055, "step": 45000 }, { "epoch": 0.5, "learning_rate": 9.922271851462603e-06, "loss": 5.3926, "step": 45500 }, { "epoch": 0.5, "learning_rate": 9.921177088807149e-06, "loss": 5.378, "step": 46000 }, { "epoch": 0.51, "learning_rate": 9.92008232615169e-06, "loss": 5.3695, "step": 46500 }, { "epoch": 0.51, "learning_rate": 9.918987563496234e-06, "loss": 5.3614, "step": 47000 }, { "epoch": 0.52, "learning_rate": 9.917892800840778e-06, "loss": 5.3548, "step": 47500 }, { "epoch": 0.52, "learning_rate": 9.916798038185322e-06, "loss": 5.348, "step": 48000 }, { "epoch": 0.53, "learning_rate": 9.915703275529866e-06, "loss": 5.339, "step": 48500 }, { "epoch": 0.54, "learning_rate": 9.91460851287441e-06, "loss": 5.3313, "step": 49000 }, { "epoch": 0.54, "learning_rate": 9.913513750218953e-06, "loss": 5.3229, "step": 49500 }, { "epoch": 0.55, "learning_rate": 9.912418987563497e-06, "loss": 5.3117, "step": 50000 }, { "epoch": 0.55, "learning_rate": 9.91132422490804e-06, "loss": 5.3039, "step": 50500 }, { "epoch": 0.56, "learning_rate": 9.910229462252584e-06, "loss": 5.3044, "step": 51000 }, { "epoch": 0.56, "learning_rate": 9.909134699597128e-06, "loss": 5.2932, "step": 51500 }, { "epoch": 0.57, "learning_rate": 9.908039936941672e-06, "loss": 5.2873, "step": 52000 }, { "epoch": 0.57, "learning_rate": 9.906945174286216e-06, "loss": 5.277, "step": 52500 }, { "epoch": 0.58, "learning_rate": 9.90585041163076e-06, "loss": 5.2678, "step": 53000 }, { "epoch": 0.58, "learning_rate": 9.904755648975303e-06, "loss": 5.2644, "step": 53500 }, { "epoch": 0.59, "learning_rate": 9.903660886319847e-06, "loss": 5.2528, "step": 54000 }, { "epoch": 0.6, "learning_rate": 9.90256612366439e-06, "loss": 5.2481, "step": 54500 }, { "epoch": 0.6, "learning_rate": 9.901471361008934e-06, "loss": 5.2411, "step": 55000 }, { "epoch": 0.61, "learning_rate": 9.900376598353478e-06, "loss": 5.2374, "step": 55500 }, { "epoch": 0.61, "learning_rate": 9.899281835698022e-06, "loss": 5.2346, "step": 56000 }, { "epoch": 0.62, "learning_rate": 9.898187073042565e-06, "loss": 5.2198, "step": 56500 }, { "epoch": 0.62, "learning_rate": 9.89709231038711e-06, "loss": 5.2177, "step": 57000 }, { "epoch": 0.63, "learning_rate": 9.895997547731653e-06, "loss": 5.2121, "step": 57500 }, { "epoch": 0.63, "learning_rate": 9.894902785076195e-06, "loss": 5.1945, "step": 58000 }, { "epoch": 0.64, "learning_rate": 9.89380802242074e-06, "loss": 5.2019, "step": 58500 }, { "epoch": 0.64, "learning_rate": 9.892713259765284e-06, "loss": 5.2001, "step": 59000 }, { "epoch": 0.65, "learning_rate": 9.891618497109828e-06, "loss": 5.1926, "step": 59500 }, { "epoch": 0.66, "learning_rate": 9.890523734454372e-06, "loss": 5.1853, "step": 60000 }, { "epoch": 0.66, "eval_accuracy": 0.26149045721222425, "eval_loss": 4.963532447814941, "eval_runtime": 349.2562, "eval_samples_per_second": 882.893, "eval_steps_per_second": 13.798, "step": 60000 }, { "epoch": 0.66, "learning_rate": 9.889428971798914e-06, "loss": 5.1827, "step": 60500 }, { "epoch": 0.67, "learning_rate": 9.88833420914346e-06, "loss": 5.1659, "step": 61000 }, { "epoch": 0.67, "learning_rate": 9.887239446488001e-06, "loss": 5.1699, "step": 61500 }, { "epoch": 0.68, "learning_rate": 9.886144683832547e-06, "loss": 5.1579, "step": 62000 }, { "epoch": 0.68, "learning_rate": 9.88504992117709e-06, "loss": 5.1606, "step": 62500 }, { "epoch": 0.69, "learning_rate": 9.883955158521632e-06, "loss": 5.1452, "step": 63000 }, { "epoch": 0.69, "learning_rate": 9.882860395866178e-06, "loss": 5.1389, "step": 63500 }, { "epoch": 0.7, "learning_rate": 9.88176563321072e-06, "loss": 5.1377, "step": 64000 }, { "epoch": 0.7, "learning_rate": 9.880670870555265e-06, "loss": 5.1376, "step": 64500 }, { "epoch": 0.71, "learning_rate": 9.879576107899807e-06, "loss": 5.1357, "step": 65000 }, { "epoch": 0.72, "learning_rate": 9.878481345244351e-06, "loss": 5.1242, "step": 65500 }, { "epoch": 0.72, "learning_rate": 9.877386582588897e-06, "loss": 5.1228, "step": 66000 }, { "epoch": 0.73, "learning_rate": 9.876291819933439e-06, "loss": 5.1188, "step": 66500 }, { "epoch": 0.73, "learning_rate": 9.875197057277982e-06, "loss": 5.1117, "step": 67000 }, { "epoch": 0.74, "learning_rate": 9.874102294622526e-06, "loss": 5.1086, "step": 67500 }, { "epoch": 0.74, "learning_rate": 9.87300753196707e-06, "loss": 5.1077, "step": 68000 }, { "epoch": 0.75, "learning_rate": 9.871912769311615e-06, "loss": 5.0948, "step": 68500 }, { "epoch": 0.75, "learning_rate": 9.870818006656157e-06, "loss": 5.0959, "step": 69000 }, { "epoch": 0.76, "learning_rate": 9.869723244000701e-06, "loss": 5.0867, "step": 69500 }, { "epoch": 0.76, "learning_rate": 9.868628481345245e-06, "loss": 5.0798, "step": 70000 }, { "epoch": 0.77, "learning_rate": 9.867533718689789e-06, "loss": 5.0802, "step": 70500 }, { "epoch": 0.78, "learning_rate": 9.866438956034332e-06, "loss": 5.0753, "step": 71000 }, { "epoch": 0.78, "learning_rate": 9.865344193378876e-06, "loss": 5.0765, "step": 71500 }, { "epoch": 0.79, "learning_rate": 9.86424943072342e-06, "loss": 5.075, "step": 72000 }, { "epoch": 0.79, "learning_rate": 9.863154668067964e-06, "loss": 5.0685, "step": 72500 }, { "epoch": 0.8, "learning_rate": 9.862059905412507e-06, "loss": 5.0627, "step": 73000 }, { "epoch": 0.8, "learning_rate": 9.860965142757051e-06, "loss": 5.0632, "step": 73500 }, { "epoch": 0.81, "learning_rate": 9.859870380101595e-06, "loss": 5.0536, "step": 74000 }, { "epoch": 0.81, "learning_rate": 9.858775617446139e-06, "loss": 5.0513, "step": 74500 }, { "epoch": 0.82, "learning_rate": 9.857680854790682e-06, "loss": 5.042, "step": 75000 }, { "epoch": 0.82, "learning_rate": 9.856586092135226e-06, "loss": 5.0383, "step": 75500 }, { "epoch": 0.83, "learning_rate": 9.85549132947977e-06, "loss": 5.04, "step": 76000 }, { "epoch": 0.84, "learning_rate": 9.854396566824313e-06, "loss": 5.0351, "step": 76500 }, { "epoch": 0.84, "learning_rate": 9.853301804168857e-06, "loss": 5.0335, "step": 77000 }, { "epoch": 0.85, "learning_rate": 9.852207041513401e-06, "loss": 5.0304, "step": 77500 }, { "epoch": 0.85, "learning_rate": 9.851112278857943e-06, "loss": 5.0217, "step": 78000 }, { "epoch": 0.86, "learning_rate": 9.850017516202488e-06, "loss": 5.0213, "step": 78500 }, { "epoch": 0.86, "learning_rate": 9.848922753547032e-06, "loss": 5.0219, "step": 79000 }, { "epoch": 0.87, "learning_rate": 9.847827990891576e-06, "loss": 5.0151, "step": 79500 }, { "epoch": 0.87, "learning_rate": 9.84673322823612e-06, "loss": 5.0081, "step": 80000 }, { "epoch": 0.88, "learning_rate": 9.845638465580662e-06, "loss": 5.0032, "step": 80500 }, { "epoch": 0.88, "learning_rate": 9.844543702925207e-06, "loss": 5.002, "step": 81000 }, { "epoch": 0.89, "learning_rate": 9.843448940269751e-06, "loss": 4.9959, "step": 81500 }, { "epoch": 0.9, "learning_rate": 9.842354177614295e-06, "loss": 4.9962, "step": 82000 }, { "epoch": 0.9, "learning_rate": 9.841259414958838e-06, "loss": 4.9907, "step": 82500 }, { "epoch": 0.91, "learning_rate": 9.84016465230338e-06, "loss": 4.9905, "step": 83000 }, { "epoch": 0.91, "learning_rate": 9.839069889647926e-06, "loss": 4.9872, "step": 83500 }, { "epoch": 0.92, "learning_rate": 9.837975126992468e-06, "loss": 4.9806, "step": 84000 }, { "epoch": 0.92, "learning_rate": 9.836880364337013e-06, "loss": 4.9851, "step": 84500 }, { "epoch": 0.93, "learning_rate": 9.835785601681557e-06, "loss": 4.9706, "step": 85000 }, { "epoch": 0.93, "learning_rate": 9.8346908390261e-06, "loss": 4.9756, "step": 85500 }, { "epoch": 0.94, "learning_rate": 9.833596076370645e-06, "loss": 4.9761, "step": 86000 }, { "epoch": 0.94, "learning_rate": 9.832501313715187e-06, "loss": 4.9629, "step": 86500 }, { "epoch": 0.95, "learning_rate": 9.83140655105973e-06, "loss": 4.9679, "step": 87000 }, { "epoch": 0.96, "learning_rate": 9.830311788404274e-06, "loss": 4.9575, "step": 87500 }, { "epoch": 0.96, "learning_rate": 9.829217025748818e-06, "loss": 4.9598, "step": 88000 }, { "epoch": 0.97, "learning_rate": 9.828122263093363e-06, "loss": 4.9544, "step": 88500 }, { "epoch": 0.97, "learning_rate": 9.827027500437905e-06, "loss": 4.9529, "step": 89000 }, { "epoch": 0.98, "learning_rate": 9.825932737782449e-06, "loss": 4.9468, "step": 89500 }, { "epoch": 0.98, "learning_rate": 9.824837975126993e-06, "loss": 4.9483, "step": 90000 }, { "epoch": 0.98, "eval_accuracy": 0.2830285537548699, "eval_loss": 4.701588153839111, "eval_runtime": 349.0816, "eval_samples_per_second": 883.335, "eval_steps_per_second": 13.805, "step": 90000 }, { "epoch": 0.99, "learning_rate": 9.823743212471537e-06, "loss": 4.9383, "step": 90500 }, { "epoch": 0.99, "learning_rate": 9.82264844981608e-06, "loss": 4.9377, "step": 91000 }, { "epoch": 1.0, "learning_rate": 9.821553687160624e-06, "loss": 4.9357, "step": 91500 }, { "epoch": 1.0, "learning_rate": 9.820458924505168e-06, "loss": 4.931, "step": 92000 }, { "epoch": 1.01, "learning_rate": 9.819364161849712e-06, "loss": 4.9281, "step": 92500 }, { "epoch": 1.02, "learning_rate": 9.818269399194255e-06, "loss": 4.9201, "step": 93000 }, { "epoch": 1.02, "learning_rate": 9.817174636538799e-06, "loss": 4.9226, "step": 93500 }, { "epoch": 1.03, "learning_rate": 9.816079873883343e-06, "loss": 4.9238, "step": 94000 }, { "epoch": 1.03, "learning_rate": 9.814985111227887e-06, "loss": 4.9143, "step": 94500 }, { "epoch": 1.04, "learning_rate": 9.81389034857243e-06, "loss": 4.9155, "step": 95000 }, { "epoch": 1.04, "learning_rate": 9.812795585916974e-06, "loss": 4.9158, "step": 95500 }, { "epoch": 1.05, "learning_rate": 9.811700823261518e-06, "loss": 4.9112, "step": 96000 }, { "epoch": 1.05, "learning_rate": 9.810606060606061e-06, "loss": 4.9112, "step": 96500 }, { "epoch": 1.06, "learning_rate": 9.809511297950605e-06, "loss": 4.9075, "step": 97000 }, { "epoch": 1.07, "learning_rate": 9.808416535295149e-06, "loss": 4.8964, "step": 97500 }, { "epoch": 1.07, "learning_rate": 9.807321772639693e-06, "loss": 4.9055, "step": 98000 }, { "epoch": 1.08, "learning_rate": 9.806227009984236e-06, "loss": 4.9023, "step": 98500 }, { "epoch": 1.08, "learning_rate": 9.80513224732878e-06, "loss": 4.8878, "step": 99000 }, { "epoch": 1.09, "learning_rate": 9.804037484673324e-06, "loss": 4.8902, "step": 99500 }, { "epoch": 1.09, "learning_rate": 9.802942722017868e-06, "loss": 4.8923, "step": 100000 }, { "epoch": 1.1, "learning_rate": 9.80184795936241e-06, "loss": 4.8848, "step": 100500 }, { "epoch": 1.1, "learning_rate": 9.800753196706955e-06, "loss": 4.8789, "step": 101000 }, { "epoch": 1.11, "learning_rate": 9.799658434051499e-06, "loss": 4.8804, "step": 101500 }, { "epoch": 1.11, "learning_rate": 9.798563671396043e-06, "loss": 4.8778, "step": 102000 }, { "epoch": 1.12, "learning_rate": 9.797468908740586e-06, "loss": 4.8723, "step": 102500 }, { "epoch": 1.13, "learning_rate": 9.796374146085128e-06, "loss": 4.87, "step": 103000 }, { "epoch": 1.13, "learning_rate": 9.795279383429674e-06, "loss": 4.8625, "step": 103500 }, { "epoch": 1.14, "learning_rate": 9.794184620774218e-06, "loss": 4.866, "step": 104000 }, { "epoch": 1.14, "learning_rate": 9.793089858118761e-06, "loss": 4.8584, "step": 104500 }, { "epoch": 1.15, "learning_rate": 9.791995095463305e-06, "loss": 4.8623, "step": 105000 }, { "epoch": 1.15, "learning_rate": 9.790900332807847e-06, "loss": 4.862, "step": 105500 }, { "epoch": 1.16, "learning_rate": 9.789805570152393e-06, "loss": 4.8551, "step": 106000 }, { "epoch": 1.16, "learning_rate": 9.788710807496935e-06, "loss": 4.8444, "step": 106500 }, { "epoch": 1.17, "learning_rate": 9.787616044841478e-06, "loss": 4.8481, "step": 107000 }, { "epoch": 1.17, "learning_rate": 9.786521282186024e-06, "loss": 4.8421, "step": 107500 }, { "epoch": 1.18, "learning_rate": 9.785426519530566e-06, "loss": 4.8498, "step": 108000 }, { "epoch": 1.19, "learning_rate": 9.784331756875111e-06, "loss": 4.8376, "step": 108500 }, { "epoch": 1.19, "learning_rate": 9.783236994219653e-06, "loss": 4.8295, "step": 109000 }, { "epoch": 1.2, "learning_rate": 9.782142231564197e-06, "loss": 4.8282, "step": 109500 }, { "epoch": 1.2, "learning_rate": 9.78104746890874e-06, "loss": 4.8331, "step": 110000 }, { "epoch": 1.21, "learning_rate": 9.779952706253285e-06, "loss": 4.8223, "step": 110500 }, { "epoch": 1.21, "learning_rate": 9.77885794359783e-06, "loss": 4.8167, "step": 111000 }, { "epoch": 1.22, "learning_rate": 9.777763180942372e-06, "loss": 4.8259, "step": 111500 }, { "epoch": 1.22, "learning_rate": 9.776668418286916e-06, "loss": 4.8151, "step": 112000 }, { "epoch": 1.23, "learning_rate": 9.77557365563146e-06, "loss": 4.8211, "step": 112500 }, { "epoch": 1.23, "learning_rate": 9.774478892976003e-06, "loss": 4.81, "step": 113000 }, { "epoch": 1.24, "learning_rate": 9.773384130320547e-06, "loss": 4.8058, "step": 113500 }, { "epoch": 1.25, "learning_rate": 9.77228936766509e-06, "loss": 4.8051, "step": 114000 }, { "epoch": 1.25, "learning_rate": 9.771194605009635e-06, "loss": 4.803, "step": 114500 }, { "epoch": 1.26, "learning_rate": 9.770099842354178e-06, "loss": 4.7962, "step": 115000 }, { "epoch": 1.26, "learning_rate": 9.769005079698722e-06, "loss": 4.7982, "step": 115500 }, { "epoch": 1.27, "learning_rate": 9.767910317043266e-06, "loss": 4.7938, "step": 116000 }, { "epoch": 1.27, "learning_rate": 9.76681555438781e-06, "loss": 4.7823, "step": 116500 }, { "epoch": 1.28, "learning_rate": 9.765720791732353e-06, "loss": 4.7863, "step": 117000 }, { "epoch": 1.28, "learning_rate": 9.764626029076897e-06, "loss": 4.7805, "step": 117500 }, { "epoch": 1.29, "learning_rate": 9.76353126642144e-06, "loss": 4.7808, "step": 118000 }, { "epoch": 1.29, "learning_rate": 9.762436503765984e-06, "loss": 4.781, "step": 118500 }, { "epoch": 1.3, "learning_rate": 9.761341741110528e-06, "loss": 4.7676, "step": 119000 }, { "epoch": 1.31, "learning_rate": 9.760246978455072e-06, "loss": 4.7741, "step": 119500 }, { "epoch": 1.31, "learning_rate": 9.759152215799616e-06, "loss": 4.7679, "step": 120000 }, { "epoch": 1.31, "eval_accuracy": 0.2991870026381957, "eval_loss": 4.515359401702881, "eval_runtime": 351.8184, "eval_samples_per_second": 876.464, "eval_steps_per_second": 13.697, "step": 120000 }, { "epoch": 1.32, "learning_rate": 9.75805745314416e-06, "loss": 4.7663, "step": 120500 }, { "epoch": 1.32, "learning_rate": 9.756962690488703e-06, "loss": 4.7571, "step": 121000 }, { "epoch": 1.33, "learning_rate": 9.755867927833247e-06, "loss": 4.7584, "step": 121500 }, { "epoch": 1.33, "learning_rate": 9.75477316517779e-06, "loss": 4.7573, "step": 122000 }, { "epoch": 1.34, "learning_rate": 9.753678402522334e-06, "loss": 4.7534, "step": 122500 }, { "epoch": 1.34, "learning_rate": 9.752583639866876e-06, "loss": 4.7558, "step": 123000 }, { "epoch": 1.35, "learning_rate": 9.751488877211422e-06, "loss": 4.743, "step": 123500 }, { "epoch": 1.35, "learning_rate": 9.750394114555966e-06, "loss": 4.7517, "step": 124000 }, { "epoch": 1.36, "learning_rate": 9.74929935190051e-06, "loss": 4.7422, "step": 124500 }, { "epoch": 1.37, "learning_rate": 9.748204589245053e-06, "loss": 4.7396, "step": 125000 }, { "epoch": 1.37, "learning_rate": 9.747109826589595e-06, "loss": 4.7369, "step": 125500 }, { "epoch": 1.38, "learning_rate": 9.74601506393414e-06, "loss": 4.7349, "step": 126000 }, { "epoch": 1.38, "learning_rate": 9.744920301278683e-06, "loss": 4.7327, "step": 126500 }, { "epoch": 1.39, "learning_rate": 9.743825538623226e-06, "loss": 4.7354, "step": 127000 }, { "epoch": 1.39, "learning_rate": 9.742730775967772e-06, "loss": 4.7327, "step": 127500 }, { "epoch": 1.4, "learning_rate": 9.741636013312314e-06, "loss": 4.7263, "step": 128000 }, { "epoch": 1.4, "learning_rate": 9.74054125065686e-06, "loss": 4.7255, "step": 128500 }, { "epoch": 1.41, "learning_rate": 9.739446488001401e-06, "loss": 4.7233, "step": 129000 }, { "epoch": 1.41, "learning_rate": 9.738351725345945e-06, "loss": 4.7165, "step": 129500 }, { "epoch": 1.42, "learning_rate": 9.73725696269049e-06, "loss": 4.7175, "step": 130000 }, { "epoch": 1.43, "learning_rate": 9.736162200035033e-06, "loss": 4.7167, "step": 130500 }, { "epoch": 1.43, "learning_rate": 9.735067437379578e-06, "loss": 4.7116, "step": 131000 }, { "epoch": 1.44, "learning_rate": 9.73397267472412e-06, "loss": 4.7145, "step": 131500 }, { "epoch": 1.44, "learning_rate": 9.732877912068664e-06, "loss": 4.7079, "step": 132000 }, { "epoch": 1.45, "learning_rate": 9.731783149413208e-06, "loss": 4.7085, "step": 132500 }, { "epoch": 1.45, "learning_rate": 9.730688386757751e-06, "loss": 4.6998, "step": 133000 }, { "epoch": 1.46, "learning_rate": 9.729593624102297e-06, "loss": 4.7051, "step": 133500 }, { "epoch": 1.46, "learning_rate": 9.728498861446839e-06, "loss": 4.7142, "step": 134000 }, { "epoch": 1.47, "learning_rate": 9.727404098791383e-06, "loss": 4.7002, "step": 134500 }, { "epoch": 1.47, "learning_rate": 9.726309336135926e-06, "loss": 4.7044, "step": 135000 }, { "epoch": 1.48, "learning_rate": 9.72521457348047e-06, "loss": 4.6953, "step": 135500 }, { "epoch": 1.49, "learning_rate": 9.724119810825014e-06, "loss": 4.6942, "step": 136000 }, { "epoch": 1.49, "learning_rate": 9.723025048169558e-06, "loss": 4.6921, "step": 136500 }, { "epoch": 1.5, "learning_rate": 9.721930285514101e-06, "loss": 4.686, "step": 137000 }, { "epoch": 1.5, "learning_rate": 9.720835522858645e-06, "loss": 4.6874, "step": 137500 }, { "epoch": 1.51, "learning_rate": 9.719740760203189e-06, "loss": 4.689, "step": 138000 }, { "epoch": 1.51, "learning_rate": 9.718645997547732e-06, "loss": 4.6867, "step": 138500 }, { "epoch": 1.52, "learning_rate": 9.717551234892276e-06, "loss": 4.6815, "step": 139000 }, { "epoch": 1.52, "learning_rate": 9.71645647223682e-06, "loss": 4.6772, "step": 139500 }, { "epoch": 1.53, "learning_rate": 9.715361709581364e-06, "loss": 4.6811, "step": 140000 }, { "epoch": 1.53, "learning_rate": 9.714266946925907e-06, "loss": 4.6759, "step": 140500 }, { "epoch": 1.54, "learning_rate": 9.713172184270451e-06, "loss": 4.6744, "step": 141000 }, { "epoch": 1.55, "learning_rate": 9.712077421614995e-06, "loss": 4.665, "step": 141500 }, { "epoch": 1.55, "learning_rate": 9.710982658959539e-06, "loss": 4.6768, "step": 142000 }, { "epoch": 1.56, "learning_rate": 9.709887896304082e-06, "loss": 4.6714, "step": 142500 }, { "epoch": 1.56, "learning_rate": 9.708793133648626e-06, "loss": 4.6686, "step": 143000 }, { "epoch": 1.57, "learning_rate": 9.70769837099317e-06, "loss": 4.6707, "step": 143500 }, { "epoch": 1.57, "learning_rate": 9.706603608337714e-06, "loss": 4.6716, "step": 144000 }, { "epoch": 1.58, "learning_rate": 9.705508845682257e-06, "loss": 4.6674, "step": 144500 }, { "epoch": 1.58, "learning_rate": 9.704414083026801e-06, "loss": 4.664, "step": 145000 }, { "epoch": 1.59, "learning_rate": 9.703319320371343e-06, "loss": 4.6601, "step": 145500 }, { "epoch": 1.59, "learning_rate": 9.702224557715889e-06, "loss": 4.6574, "step": 146000 }, { "epoch": 1.6, "learning_rate": 9.701129795060432e-06, "loss": 4.6504, "step": 146500 }, { "epoch": 1.61, "learning_rate": 9.700035032404974e-06, "loss": 4.6634, "step": 147000 }, { "epoch": 1.61, "learning_rate": 9.69894026974952e-06, "loss": 4.6565, "step": 147500 }, { "epoch": 1.62, "learning_rate": 9.697845507094062e-06, "loss": 4.6525, "step": 148000 }, { "epoch": 1.62, "learning_rate": 9.696750744438607e-06, "loss": 4.6573, "step": 148500 }, { "epoch": 1.63, "learning_rate": 9.69565598178315e-06, "loss": 4.6443, "step": 149000 }, { "epoch": 1.63, "learning_rate": 9.694561219127693e-06, "loss": 4.6442, "step": 149500 }, { "epoch": 1.64, "learning_rate": 9.693466456472239e-06, "loss": 4.6448, "step": 150000 }, { "epoch": 1.64, "eval_accuracy": 0.31003917295685746, "eval_loss": 4.388444900512695, "eval_runtime": 352.909, "eval_samples_per_second": 873.755, "eval_steps_per_second": 13.655, "step": 150000 }, { "epoch": 1.64, "learning_rate": 9.69237169381678e-06, "loss": 4.6458, "step": 150500 }, { "epoch": 1.65, "learning_rate": 9.691276931161326e-06, "loss": 4.6417, "step": 151000 }, { "epoch": 1.65, "learning_rate": 9.690182168505868e-06, "loss": 4.6371, "step": 151500 }, { "epoch": 1.66, "learning_rate": 9.689087405850412e-06, "loss": 4.6371, "step": 152000 }, { "epoch": 1.67, "learning_rate": 9.687992643194956e-06, "loss": 4.6384, "step": 152500 }, { "epoch": 1.67, "learning_rate": 9.6868978805395e-06, "loss": 4.6328, "step": 153000 }, { "epoch": 1.68, "learning_rate": 9.685803117884045e-06, "loss": 4.6376, "step": 153500 }, { "epoch": 1.68, "learning_rate": 9.684708355228587e-06, "loss": 4.6353, "step": 154000 }, { "epoch": 1.69, "learning_rate": 9.68361359257313e-06, "loss": 4.6351, "step": 154500 }, { "epoch": 1.69, "learning_rate": 9.682518829917674e-06, "loss": 4.6363, "step": 155000 }, { "epoch": 1.7, "learning_rate": 9.681424067262218e-06, "loss": 4.6306, "step": 155500 }, { "epoch": 1.7, "learning_rate": 9.680329304606762e-06, "loss": 4.632, "step": 156000 }, { "epoch": 1.71, "learning_rate": 9.679234541951306e-06, "loss": 4.6239, "step": 156500 }, { "epoch": 1.72, "learning_rate": 9.67813977929585e-06, "loss": 4.6199, "step": 157000 }, { "epoch": 1.72, "learning_rate": 9.677045016640393e-06, "loss": 4.6305, "step": 157500 }, { "epoch": 1.73, "learning_rate": 9.675950253984937e-06, "loss": 4.6246, "step": 158000 }, { "epoch": 1.73, "learning_rate": 9.67485549132948e-06, "loss": 4.6255, "step": 158500 }, { "epoch": 1.74, "learning_rate": 9.673760728674024e-06, "loss": 4.6228, "step": 159000 }, { "epoch": 1.74, "learning_rate": 9.672665966018568e-06, "loss": 4.6169, "step": 159500 }, { "epoch": 1.75, "learning_rate": 9.671571203363112e-06, "loss": 4.6184, "step": 160000 }, { "epoch": 1.75, "learning_rate": 9.670476440707655e-06, "loss": 4.6159, "step": 160500 }, { "epoch": 1.76, "learning_rate": 9.6693816780522e-06, "loss": 4.6096, "step": 161000 }, { "epoch": 1.76, "learning_rate": 9.668286915396743e-06, "loss": 4.6143, "step": 161500 }, { "epoch": 1.77, "learning_rate": 9.667192152741287e-06, "loss": 4.6108, "step": 162000 }, { "epoch": 1.78, "learning_rate": 9.66609739008583e-06, "loss": 4.6092, "step": 162500 }, { "epoch": 1.78, "learning_rate": 9.665002627430374e-06, "loss": 4.6121, "step": 163000 }, { "epoch": 1.79, "learning_rate": 9.663907864774918e-06, "loss": 4.6107, "step": 163500 }, { "epoch": 1.79, "learning_rate": 9.662813102119462e-06, "loss": 4.6106, "step": 164000 }, { "epoch": 1.8, "learning_rate": 9.661718339464005e-06, "loss": 4.604, "step": 164500 }, { "epoch": 1.8, "learning_rate": 9.660623576808549e-06, "loss": 4.5994, "step": 165000 }, { "epoch": 1.81, "learning_rate": 9.659528814153091e-06, "loss": 4.6051, "step": 165500 }, { "epoch": 1.81, "learning_rate": 9.658434051497637e-06, "loss": 4.6005, "step": 166000 }, { "epoch": 1.82, "learning_rate": 9.65733928884218e-06, "loss": 4.6022, "step": 166500 }, { "epoch": 1.82, "learning_rate": 9.656244526186724e-06, "loss": 4.589, "step": 167000 }, { "epoch": 1.83, "learning_rate": 9.655149763531268e-06, "loss": 4.5974, "step": 167500 }, { "epoch": 1.84, "learning_rate": 9.65405500087581e-06, "loss": 4.5977, "step": 168000 }, { "epoch": 1.84, "learning_rate": 9.652960238220355e-06, "loss": 4.6001, "step": 168500 }, { "epoch": 1.85, "learning_rate": 9.651865475564899e-06, "loss": 4.5985, "step": 169000 }, { "epoch": 1.85, "learning_rate": 9.650770712909441e-06, "loss": 4.5974, "step": 169500 }, { "epoch": 1.86, "learning_rate": 9.649675950253987e-06, "loss": 4.5963, "step": 170000 }, { "epoch": 1.86, "learning_rate": 9.648581187598529e-06, "loss": 4.5955, "step": 170500 }, { "epoch": 1.87, "learning_rate": 9.647486424943074e-06, "loss": 4.5907, "step": 171000 }, { "epoch": 1.87, "learning_rate": 9.646391662287616e-06, "loss": 4.5918, "step": 171500 }, { "epoch": 1.88, "learning_rate": 9.64529689963216e-06, "loss": 4.5894, "step": 172000 }, { "epoch": 1.88, "learning_rate": 9.644202136976705e-06, "loss": 4.5831, "step": 172500 }, { "epoch": 1.89, "learning_rate": 9.643107374321247e-06, "loss": 4.5902, "step": 173000 }, { "epoch": 1.9, "learning_rate": 9.642012611665793e-06, "loss": 4.589, "step": 173500 }, { "epoch": 1.9, "learning_rate": 9.640917849010335e-06, "loss": 4.5823, "step": 174000 }, { "epoch": 1.91, "learning_rate": 9.639823086354879e-06, "loss": 4.5827, "step": 174500 }, { "epoch": 1.91, "learning_rate": 9.638728323699422e-06, "loss": 4.5811, "step": 175000 }, { "epoch": 1.92, "learning_rate": 9.637633561043966e-06, "loss": 4.5791, "step": 175500 }, { "epoch": 1.92, "learning_rate": 9.63653879838851e-06, "loss": 4.5768, "step": 176000 }, { "epoch": 1.93, "learning_rate": 9.635444035733054e-06, "loss": 4.5784, "step": 176500 }, { "epoch": 1.93, "learning_rate": 9.634349273077597e-06, "loss": 4.5751, "step": 177000 }, { "epoch": 1.94, "learning_rate": 9.633254510422141e-06, "loss": 4.5701, "step": 177500 }, { "epoch": 1.94, "learning_rate": 9.632159747766685e-06, "loss": 4.5783, "step": 178000 }, { "epoch": 1.95, "learning_rate": 9.631064985111228e-06, "loss": 4.5732, "step": 178500 }, { "epoch": 1.96, "learning_rate": 9.629970222455772e-06, "loss": 4.5739, "step": 179000 }, { "epoch": 1.96, "learning_rate": 9.628875459800316e-06, "loss": 4.5778, "step": 179500 }, { "epoch": 1.97, "learning_rate": 9.62778069714486e-06, "loss": 4.5688, "step": 180000 }, { "epoch": 1.97, "eval_accuracy": 0.31747385763829217, "eval_loss": 4.309492111206055, "eval_runtime": 371.0441, "eval_samples_per_second": 831.05, "eval_steps_per_second": 12.988, "step": 180000 }, { "epoch": 1.97, "learning_rate": 9.626685934489403e-06, "loss": 4.5673, "step": 180500 }, { "epoch": 1.98, "learning_rate": 9.625591171833947e-06, "loss": 4.5596, "step": 181000 }, { "epoch": 1.98, "learning_rate": 9.624496409178491e-06, "loss": 4.5721, "step": 181500 }, { "epoch": 1.99, "learning_rate": 9.623401646523035e-06, "loss": 4.5707, "step": 182000 }, { "epoch": 1.99, "learning_rate": 9.622306883867578e-06, "loss": 4.5616, "step": 182500 }, { "epoch": 2.0, "learning_rate": 9.621212121212122e-06, "loss": 4.5644, "step": 183000 }, { "epoch": 2.0, "learning_rate": 9.620117358556666e-06, "loss": 4.5598, "step": 183500 }, { "epoch": 2.01, "learning_rate": 9.61902259590121e-06, "loss": 4.5632, "step": 184000 }, { "epoch": 2.02, "learning_rate": 9.617927833245753e-06, "loss": 4.562, "step": 184500 }, { "epoch": 2.02, "learning_rate": 9.616833070590297e-06, "loss": 4.5605, "step": 185000 }, { "epoch": 2.03, "learning_rate": 9.615738307934841e-06, "loss": 4.556, "step": 185500 }, { "epoch": 2.03, "learning_rate": 9.614643545279385e-06, "loss": 4.5584, "step": 186000 }, { "epoch": 2.04, "learning_rate": 9.613548782623928e-06, "loss": 4.5614, "step": 186500 }, { "epoch": 2.04, "learning_rate": 9.612454019968472e-06, "loss": 4.5586, "step": 187000 }, { "epoch": 2.05, "learning_rate": 9.611359257313016e-06, "loss": 4.5562, "step": 187500 }, { "epoch": 2.05, "learning_rate": 9.610264494657558e-06, "loss": 4.5511, "step": 188000 }, { "epoch": 2.06, "learning_rate": 9.609169732002103e-06, "loss": 4.5543, "step": 188500 }, { "epoch": 2.06, "learning_rate": 9.608074969346647e-06, "loss": 4.5499, "step": 189000 }, { "epoch": 2.07, "learning_rate": 9.606980206691189e-06, "loss": 4.548, "step": 189500 }, { "epoch": 2.08, "learning_rate": 9.605885444035735e-06, "loss": 4.5537, "step": 190000 }, { "epoch": 2.08, "learning_rate": 9.604790681380277e-06, "loss": 4.5442, "step": 190500 }, { "epoch": 2.09, "learning_rate": 9.603695918724822e-06, "loss": 4.5411, "step": 191000 }, { "epoch": 2.09, "learning_rate": 9.602601156069366e-06, "loss": 4.5464, "step": 191500 }, { "epoch": 2.1, "learning_rate": 9.601506393413908e-06, "loss": 4.5419, "step": 192000 }, { "epoch": 2.1, "learning_rate": 9.600411630758453e-06, "loss": 4.5485, "step": 192500 }, { "epoch": 2.11, "learning_rate": 9.599316868102995e-06, "loss": 4.5361, "step": 193000 }, { "epoch": 2.11, "learning_rate": 9.59822210544754e-06, "loss": 4.5434, "step": 193500 }, { "epoch": 2.12, "learning_rate": 9.597127342792083e-06, "loss": 4.5412, "step": 194000 }, { "epoch": 2.12, "learning_rate": 9.596032580136627e-06, "loss": 4.5423, "step": 194500 }, { "epoch": 2.13, "learning_rate": 9.594937817481172e-06, "loss": 4.5331, "step": 195000 }, { "epoch": 2.14, "learning_rate": 9.593843054825714e-06, "loss": 4.5306, "step": 195500 }, { "epoch": 2.14, "learning_rate": 9.592748292170258e-06, "loss": 4.5423, "step": 196000 }, { "epoch": 2.15, "learning_rate": 9.591653529514802e-06, "loss": 4.532, "step": 196500 }, { "epoch": 2.15, "learning_rate": 9.590558766859345e-06, "loss": 4.5322, "step": 197000 }, { "epoch": 2.16, "learning_rate": 9.589464004203889e-06, "loss": 4.5359, "step": 197500 }, { "epoch": 2.16, "learning_rate": 9.588369241548433e-06, "loss": 4.5379, "step": 198000 }, { "epoch": 2.17, "learning_rate": 9.587274478892976e-06, "loss": 4.5347, "step": 198500 }, { "epoch": 2.17, "learning_rate": 9.58617971623752e-06, "loss": 4.5281, "step": 199000 }, { "epoch": 2.18, "learning_rate": 9.585084953582064e-06, "loss": 4.5352, "step": 199500 }, { "epoch": 2.18, "learning_rate": 9.583990190926608e-06, "loss": 4.5229, "step": 200000 }, { "epoch": 2.19, "learning_rate": 9.582895428271151e-06, "loss": 4.5315, "step": 200500 }, { "epoch": 2.2, "learning_rate": 9.581800665615695e-06, "loss": 4.5202, "step": 201000 }, { "epoch": 2.2, "learning_rate": 9.580705902960239e-06, "loss": 4.5316, "step": 201500 }, { "epoch": 2.21, "learning_rate": 9.579611140304783e-06, "loss": 4.533, "step": 202000 }, { "epoch": 2.21, "learning_rate": 9.578516377649326e-06, "loss": 4.5264, "step": 202500 }, { "epoch": 2.22, "learning_rate": 9.57742161499387e-06, "loss": 4.5292, "step": 203000 }, { "epoch": 2.22, "learning_rate": 9.576326852338414e-06, "loss": 4.525, "step": 203500 }, { "epoch": 2.23, "learning_rate": 9.575232089682958e-06, "loss": 4.5198, "step": 204000 }, { "epoch": 2.23, "learning_rate": 9.574137327027501e-06, "loss": 4.5153, "step": 204500 }, { "epoch": 2.24, "learning_rate": 9.573042564372045e-06, "loss": 4.5172, "step": 205000 }, { "epoch": 2.24, "learning_rate": 9.571947801716589e-06, "loss": 4.5219, "step": 205500 }, { "epoch": 2.25, "learning_rate": 9.570853039061133e-06, "loss": 4.5195, "step": 206000 }, { "epoch": 2.26, "learning_rate": 9.569758276405676e-06, "loss": 4.5199, "step": 206500 }, { "epoch": 2.26, "learning_rate": 9.56866351375022e-06, "loss": 4.5184, "step": 207000 }, { "epoch": 2.27, "learning_rate": 9.567568751094764e-06, "loss": 4.5202, "step": 207500 }, { "epoch": 2.27, "learning_rate": 9.566473988439308e-06, "loss": 4.511, "step": 208000 }, { "epoch": 2.28, "learning_rate": 9.565379225783851e-06, "loss": 4.5132, "step": 208500 }, { "epoch": 2.28, "learning_rate": 9.564284463128395e-06, "loss": 4.5207, "step": 209000 }, { "epoch": 2.29, "learning_rate": 9.563189700472937e-06, "loss": 4.5132, "step": 209500 }, { "epoch": 2.29, "learning_rate": 9.562094937817483e-06, "loss": 4.5102, "step": 210000 }, { "epoch": 2.29, "eval_accuracy": 0.32362476493299214, "eval_loss": 4.251134872436523, "eval_runtime": 355.8192, "eval_samples_per_second": 866.609, "eval_steps_per_second": 13.543, "step": 210000 }, { "epoch": 2.3, "learning_rate": 9.561000175162025e-06, "loss": 4.516, "step": 210500 }, { "epoch": 2.3, "learning_rate": 9.55990541250657e-06, "loss": 4.5129, "step": 211000 }, { "epoch": 2.31, "learning_rate": 9.558810649851114e-06, "loss": 4.5103, "step": 211500 }, { "epoch": 2.32, "learning_rate": 9.557715887195656e-06, "loss": 4.5111, "step": 212000 }, { "epoch": 2.32, "learning_rate": 9.556621124540201e-06, "loss": 4.5113, "step": 212500 }, { "epoch": 2.33, "learning_rate": 9.555526361884743e-06, "loss": 4.5131, "step": 213000 }, { "epoch": 2.33, "learning_rate": 9.554431599229289e-06, "loss": 4.5085, "step": 213500 }, { "epoch": 2.34, "learning_rate": 9.55333683657383e-06, "loss": 4.5029, "step": 214000 }, { "epoch": 2.34, "learning_rate": 9.552242073918375e-06, "loss": 4.5, "step": 214500 }, { "epoch": 2.35, "learning_rate": 9.55114731126292e-06, "loss": 4.4978, "step": 215000 }, { "epoch": 2.35, "learning_rate": 9.550052548607462e-06, "loss": 4.4991, "step": 215500 }, { "epoch": 2.36, "learning_rate": 9.548957785952006e-06, "loss": 4.5043, "step": 216000 }, { "epoch": 2.36, "learning_rate": 9.54786302329655e-06, "loss": 4.501, "step": 216500 }, { "epoch": 2.37, "learning_rate": 9.546768260641093e-06, "loss": 4.5012, "step": 217000 }, { "epoch": 2.38, "learning_rate": 9.545673497985639e-06, "loss": 4.4922, "step": 217500 }, { "epoch": 2.38, "learning_rate": 9.54457873533018e-06, "loss": 4.5017, "step": 218000 }, { "epoch": 2.39, "learning_rate": 9.543483972674724e-06, "loss": 4.4995, "step": 218500 }, { "epoch": 2.39, "learning_rate": 9.542389210019268e-06, "loss": 4.5025, "step": 219000 }, { "epoch": 2.4, "learning_rate": 9.541294447363812e-06, "loss": 4.4942, "step": 219500 }, { "epoch": 2.4, "learning_rate": 9.540199684708356e-06, "loss": 4.4983, "step": 220000 }, { "epoch": 2.41, "learning_rate": 9.5391049220529e-06, "loss": 4.4954, "step": 220500 }, { "epoch": 2.41, "learning_rate": 9.538010159397443e-06, "loss": 4.4982, "step": 221000 }, { "epoch": 2.42, "learning_rate": 9.536915396741987e-06, "loss": 4.5004, "step": 221500 }, { "epoch": 2.43, "learning_rate": 9.53582063408653e-06, "loss": 4.4925, "step": 222000 }, { "epoch": 2.43, "learning_rate": 9.534725871431074e-06, "loss": 4.4928, "step": 222500 }, { "epoch": 2.44, "learning_rate": 9.533631108775618e-06, "loss": 4.4889, "step": 223000 }, { "epoch": 2.44, "learning_rate": 9.532536346120162e-06, "loss": 4.4877, "step": 223500 }, { "epoch": 2.45, "learning_rate": 9.531441583464706e-06, "loss": 4.4926, "step": 224000 }, { "epoch": 2.45, "learning_rate": 9.53034682080925e-06, "loss": 4.4889, "step": 224500 }, { "epoch": 2.46, "learning_rate": 9.529252058153793e-06, "loss": 4.4871, "step": 225000 }, { "epoch": 2.46, "learning_rate": 9.528157295498337e-06, "loss": 4.4888, "step": 225500 }, { "epoch": 2.47, "learning_rate": 9.52706253284288e-06, "loss": 4.4901, "step": 226000 }, { "epoch": 2.47, "learning_rate": 9.525967770187424e-06, "loss": 4.48, "step": 226500 }, { "epoch": 2.48, "learning_rate": 9.524873007531968e-06, "loss": 4.4787, "step": 227000 }, { "epoch": 2.49, "learning_rate": 9.523778244876512e-06, "loss": 4.4876, "step": 227500 }, { "epoch": 2.49, "learning_rate": 9.522683482221056e-06, "loss": 4.4844, "step": 228000 }, { "epoch": 2.5, "learning_rate": 9.5215887195656e-06, "loss": 4.4844, "step": 228500 }, { "epoch": 2.5, "learning_rate": 9.520493956910143e-06, "loss": 4.4739, "step": 229000 }, { "epoch": 2.51, "learning_rate": 9.519399194254685e-06, "loss": 4.4807, "step": 229500 }, { "epoch": 2.51, "learning_rate": 9.51830443159923e-06, "loss": 4.4771, "step": 230000 }, { "epoch": 2.52, "learning_rate": 9.517209668943774e-06, "loss": 4.4776, "step": 230500 }, { "epoch": 2.52, "learning_rate": 9.516114906288318e-06, "loss": 4.479, "step": 231000 }, { "epoch": 2.53, "learning_rate": 9.515020143632862e-06, "loss": 4.4835, "step": 231500 }, { "epoch": 2.53, "learning_rate": 9.513925380977404e-06, "loss": 4.4845, "step": 232000 }, { "epoch": 2.54, "learning_rate": 9.51283061832195e-06, "loss": 4.4741, "step": 232500 }, { "epoch": 2.55, "learning_rate": 9.511735855666491e-06, "loss": 4.4719, "step": 233000 }, { "epoch": 2.55, "learning_rate": 9.510641093011037e-06, "loss": 4.4703, "step": 233500 }, { "epoch": 2.56, "learning_rate": 9.50954633035558e-06, "loss": 4.4735, "step": 234000 }, { "epoch": 2.56, "learning_rate": 9.508451567700123e-06, "loss": 4.4754, "step": 234500 }, { "epoch": 2.57, "learning_rate": 9.507356805044668e-06, "loss": 4.4754, "step": 235000 }, { "epoch": 2.57, "learning_rate": 9.50626204238921e-06, "loss": 4.4849, "step": 235500 }, { "epoch": 2.58, "learning_rate": 9.505167279733755e-06, "loss": 4.4728, "step": 236000 }, { "epoch": 2.58, "learning_rate": 9.504072517078298e-06, "loss": 4.4678, "step": 236500 }, { "epoch": 2.59, "learning_rate": 9.502977754422841e-06, "loss": 4.4699, "step": 237000 }, { "epoch": 2.59, "learning_rate": 9.501882991767387e-06, "loss": 4.4792, "step": 237500 }, { "epoch": 2.6, "learning_rate": 9.500788229111929e-06, "loss": 4.4735, "step": 238000 }, { "epoch": 2.61, "learning_rate": 9.499693466456472e-06, "loss": 4.4663, "step": 238500 }, { "epoch": 2.61, "learning_rate": 9.498598703801016e-06, "loss": 4.4719, "step": 239000 }, { "epoch": 2.62, "learning_rate": 9.49750394114556e-06, "loss": 4.4621, "step": 239500 }, { "epoch": 2.62, "learning_rate": 9.496409178490104e-06, "loss": 4.4662, "step": 240000 }, { "epoch": 2.62, "eval_accuracy": 0.3294231679378384, "eval_loss": 4.203823566436768, "eval_runtime": 356.8321, "eval_samples_per_second": 864.149, "eval_steps_per_second": 13.505, "step": 240000 }, { "epoch": 2.63, "learning_rate": 9.495314415834647e-06, "loss": 4.4625, "step": 240500 }, { "epoch": 2.63, "learning_rate": 9.494219653179191e-06, "loss": 4.4676, "step": 241000 }, { "epoch": 2.64, "learning_rate": 9.493124890523735e-06, "loss": 4.4689, "step": 241500 }, { "epoch": 2.64, "learning_rate": 9.492030127868279e-06, "loss": 4.46, "step": 242000 }, { "epoch": 2.65, "learning_rate": 9.490935365212822e-06, "loss": 4.4657, "step": 242500 }, { "epoch": 2.65, "learning_rate": 9.489840602557366e-06, "loss": 4.4589, "step": 243000 }, { "epoch": 2.66, "learning_rate": 9.48874583990191e-06, "loss": 4.4617, "step": 243500 }, { "epoch": 2.67, "learning_rate": 9.487651077246454e-06, "loss": 4.4621, "step": 244000 }, { "epoch": 2.67, "learning_rate": 9.486556314590997e-06, "loss": 4.4624, "step": 244500 }, { "epoch": 2.68, "learning_rate": 9.485461551935541e-06, "loss": 4.467, "step": 245000 }, { "epoch": 2.68, "learning_rate": 9.484366789280085e-06, "loss": 4.4566, "step": 245500 }, { "epoch": 2.69, "learning_rate": 9.483272026624629e-06, "loss": 4.4608, "step": 246000 }, { "epoch": 2.69, "learning_rate": 9.482177263969172e-06, "loss": 4.4563, "step": 246500 }, { "epoch": 2.7, "learning_rate": 9.481082501313716e-06, "loss": 4.4621, "step": 247000 }, { "epoch": 2.7, "learning_rate": 9.47998773865826e-06, "loss": 4.4506, "step": 247500 }, { "epoch": 2.71, "learning_rate": 9.478892976002804e-06, "loss": 4.4472, "step": 248000 }, { "epoch": 2.71, "learning_rate": 9.477798213347347e-06, "loss": 4.4565, "step": 248500 }, { "epoch": 2.72, "learning_rate": 9.476703450691891e-06, "loss": 4.4559, "step": 249000 }, { "epoch": 2.73, "learning_rate": 9.475608688036433e-06, "loss": 4.4542, "step": 249500 }, { "epoch": 2.73, "learning_rate": 9.474513925380979e-06, "loss": 4.45, "step": 250000 }, { "epoch": 2.74, "learning_rate": 9.473419162725522e-06, "loss": 4.4497, "step": 250500 }, { "epoch": 2.74, "learning_rate": 9.472324400070066e-06, "loss": 4.4555, "step": 251000 }, { "epoch": 2.75, "learning_rate": 9.47122963741461e-06, "loss": 4.4507, "step": 251500 }, { "epoch": 2.75, "learning_rate": 9.470134874759152e-06, "loss": 4.4486, "step": 252000 }, { "epoch": 2.76, "learning_rate": 9.469040112103697e-06, "loss": 4.4522, "step": 252500 }, { "epoch": 2.76, "learning_rate": 9.46794534944824e-06, "loss": 4.4562, "step": 253000 }, { "epoch": 2.77, "learning_rate": 9.466850586792785e-06, "loss": 4.4452, "step": 253500 }, { "epoch": 2.77, "learning_rate": 9.465755824137329e-06, "loss": 4.4541, "step": 254000 }, { "epoch": 2.78, "learning_rate": 9.46466106148187e-06, "loss": 4.4502, "step": 254500 }, { "epoch": 2.79, "learning_rate": 9.463566298826416e-06, "loss": 4.4495, "step": 255000 }, { "epoch": 2.79, "learning_rate": 9.462471536170958e-06, "loss": 4.4521, "step": 255500 }, { "epoch": 2.8, "learning_rate": 9.461376773515503e-06, "loss": 4.4483, "step": 256000 }, { "epoch": 2.8, "learning_rate": 9.460282010860047e-06, "loss": 4.4433, "step": 256500 }, { "epoch": 2.81, "learning_rate": 9.45918724820459e-06, "loss": 4.4381, "step": 257000 }, { "epoch": 2.81, "learning_rate": 9.458092485549135e-06, "loss": 4.4423, "step": 257500 }, { "epoch": 2.82, "learning_rate": 9.456997722893677e-06, "loss": 4.4412, "step": 258000 }, { "epoch": 2.82, "learning_rate": 9.45590296023822e-06, "loss": 4.4411, "step": 258500 }, { "epoch": 2.83, "learning_rate": 9.454808197582764e-06, "loss": 4.4441, "step": 259000 }, { "epoch": 2.83, "learning_rate": 9.453713434927308e-06, "loss": 4.4486, "step": 259500 }, { "epoch": 2.84, "learning_rate": 9.452618672271853e-06, "loss": 4.4457, "step": 260000 }, { "epoch": 2.85, "learning_rate": 9.451523909616395e-06, "loss": 4.4447, "step": 260500 }, { "epoch": 2.85, "learning_rate": 9.45042914696094e-06, "loss": 4.4342, "step": 261000 }, { "epoch": 2.86, "learning_rate": 9.449334384305483e-06, "loss": 4.4458, "step": 261500 }, { "epoch": 2.86, "learning_rate": 9.448239621650027e-06, "loss": 4.4468, "step": 262000 }, { "epoch": 2.87, "learning_rate": 9.44714485899457e-06, "loss": 4.4425, "step": 262500 }, { "epoch": 2.87, "learning_rate": 9.446050096339114e-06, "loss": 4.4328, "step": 263000 }, { "epoch": 2.88, "learning_rate": 9.444955333683658e-06, "loss": 4.4418, "step": 263500 }, { "epoch": 2.88, "learning_rate": 9.443860571028202e-06, "loss": 4.4357, "step": 264000 }, { "epoch": 2.89, "learning_rate": 9.442765808372745e-06, "loss": 4.4324, "step": 264500 }, { "epoch": 2.89, "learning_rate": 9.44167104571729e-06, "loss": 4.4364, "step": 265000 }, { "epoch": 2.9, "learning_rate": 9.440576283061833e-06, "loss": 4.4398, "step": 265500 }, { "epoch": 2.91, "learning_rate": 9.439481520406377e-06, "loss": 4.4347, "step": 266000 }, { "epoch": 2.91, "learning_rate": 9.43838675775092e-06, "loss": 4.4307, "step": 266500 }, { "epoch": 2.92, "learning_rate": 9.437291995095464e-06, "loss": 4.4275, "step": 267000 }, { "epoch": 2.92, "learning_rate": 9.436197232440008e-06, "loss": 4.4299, "step": 267500 }, { "epoch": 2.93, "learning_rate": 9.435102469784552e-06, "loss": 4.4321, "step": 268000 }, { "epoch": 2.93, "learning_rate": 9.434007707129095e-06, "loss": 4.4265, "step": 268500 }, { "epoch": 2.94, "learning_rate": 9.432912944473639e-06, "loss": 4.4373, "step": 269000 }, { "epoch": 2.94, "learning_rate": 9.431818181818183e-06, "loss": 4.4232, "step": 269500 }, { "epoch": 2.95, "learning_rate": 9.430723419162727e-06, "loss": 4.4269, "step": 270000 }, { "epoch": 2.95, "eval_accuracy": 0.3335691447535404, "eval_loss": 4.167707920074463, "eval_runtime": 379.3356, "eval_samples_per_second": 812.884, "eval_steps_per_second": 12.704, "step": 270000 }, { "epoch": 2.95, "learning_rate": 9.42962865650727e-06, "loss": 4.4366, "step": 270500 }, { "epoch": 2.96, "learning_rate": 9.428533893851814e-06, "loss": 4.4265, "step": 271000 }, { "epoch": 2.97, "learning_rate": 9.427439131196358e-06, "loss": 4.4207, "step": 271500 }, { "epoch": 2.97, "learning_rate": 9.4263443685409e-06, "loss": 4.4219, "step": 272000 }, { "epoch": 2.98, "learning_rate": 9.425249605885445e-06, "loss": 4.4278, "step": 272500 }, { "epoch": 2.98, "learning_rate": 9.424154843229989e-06, "loss": 4.4215, "step": 273000 }, { "epoch": 2.99, "learning_rate": 9.423060080574533e-06, "loss": 4.424, "step": 273500 }, { "epoch": 2.99, "learning_rate": 9.421965317919077e-06, "loss": 4.4283, "step": 274000 }, { "epoch": 3.0, "learning_rate": 9.420870555263619e-06, "loss": 4.4245, "step": 274500 }, { "epoch": 3.0, "learning_rate": 9.419775792608164e-06, "loss": 4.4305, "step": 275000 }, { "epoch": 3.01, "learning_rate": 9.418681029952706e-06, "loss": 4.425, "step": 275500 }, { "epoch": 3.01, "learning_rate": 9.417586267297251e-06, "loss": 4.4305, "step": 276000 }, { "epoch": 3.02, "learning_rate": 9.416491504641795e-06, "loss": 4.4278, "step": 276500 }, { "epoch": 3.03, "learning_rate": 9.415396741986337e-06, "loss": 4.4224, "step": 277000 }, { "epoch": 3.03, "learning_rate": 9.414301979330883e-06, "loss": 4.4214, "step": 277500 }, { "epoch": 3.04, "learning_rate": 9.413207216675425e-06, "loss": 4.4198, "step": 278000 }, { "epoch": 3.04, "learning_rate": 9.412112454019969e-06, "loss": 4.4234, "step": 278500 }, { "epoch": 3.05, "learning_rate": 9.411017691364514e-06, "loss": 4.4272, "step": 279000 }, { "epoch": 3.05, "learning_rate": 9.409922928709056e-06, "loss": 4.421, "step": 279500 }, { "epoch": 3.06, "learning_rate": 9.408828166053601e-06, "loss": 4.4128, "step": 280000 }, { "epoch": 3.06, "learning_rate": 9.407733403398143e-06, "loss": 4.4203, "step": 280500 }, { "epoch": 3.07, "learning_rate": 9.406638640742687e-06, "loss": 4.4181, "step": 281000 }, { "epoch": 3.08, "learning_rate": 9.405543878087231e-06, "loss": 4.4203, "step": 281500 }, { "epoch": 3.08, "learning_rate": 9.404449115431775e-06, "loss": 4.4169, "step": 282000 }, { "epoch": 3.09, "learning_rate": 9.40335435277632e-06, "loss": 4.4139, "step": 282500 }, { "epoch": 3.09, "learning_rate": 9.402259590120862e-06, "loss": 4.4168, "step": 283000 }, { "epoch": 3.1, "learning_rate": 9.401164827465406e-06, "loss": 4.4086, "step": 283500 }, { "epoch": 3.1, "learning_rate": 9.40007006480995e-06, "loss": 4.4152, "step": 284000 }, { "epoch": 3.11, "learning_rate": 9.398975302154493e-06, "loss": 4.4076, "step": 284500 }, { "epoch": 3.11, "learning_rate": 9.397880539499037e-06, "loss": 4.4128, "step": 285000 }, { "epoch": 3.12, "learning_rate": 9.396785776843581e-06, "loss": 4.4113, "step": 285500 }, { "epoch": 3.12, "learning_rate": 9.395691014188125e-06, "loss": 4.4128, "step": 286000 }, { "epoch": 3.13, "learning_rate": 9.394596251532668e-06, "loss": 4.4164, "step": 286500 }, { "epoch": 3.14, "learning_rate": 9.393501488877212e-06, "loss": 4.41, "step": 287000 }, { "epoch": 3.14, "learning_rate": 9.392406726221756e-06, "loss": 4.4091, "step": 287500 }, { "epoch": 3.15, "learning_rate": 9.3913119635663e-06, "loss": 4.4105, "step": 288000 }, { "epoch": 3.15, "learning_rate": 9.390217200910843e-06, "loss": 4.4056, "step": 288500 }, { "epoch": 3.16, "learning_rate": 9.389122438255387e-06, "loss": 4.4048, "step": 289000 }, { "epoch": 3.16, "learning_rate": 9.38802767559993e-06, "loss": 4.406, "step": 289500 }, { "epoch": 3.17, "learning_rate": 9.386932912944475e-06, "loss": 4.4073, "step": 290000 }, { "epoch": 3.17, "learning_rate": 9.385838150289018e-06, "loss": 4.4118, "step": 290500 }, { "epoch": 3.18, "learning_rate": 9.384743387633562e-06, "loss": 4.4115, "step": 291000 }, { "epoch": 3.18, "learning_rate": 9.383648624978106e-06, "loss": 4.4071, "step": 291500 }, { "epoch": 3.19, "learning_rate": 9.38255386232265e-06, "loss": 4.4077, "step": 292000 }, { "epoch": 3.2, "learning_rate": 9.381459099667193e-06, "loss": 4.4054, "step": 292500 }, { "epoch": 3.2, "learning_rate": 9.380364337011737e-06, "loss": 4.4013, "step": 293000 }, { "epoch": 3.21, "learning_rate": 9.37926957435628e-06, "loss": 4.4074, "step": 293500 }, { "epoch": 3.21, "learning_rate": 9.378174811700825e-06, "loss": 4.4053, "step": 294000 }, { "epoch": 3.22, "learning_rate": 9.377080049045367e-06, "loss": 4.4064, "step": 294500 }, { "epoch": 3.22, "learning_rate": 9.375985286389912e-06, "loss": 4.4039, "step": 295000 }, { "epoch": 3.23, "learning_rate": 9.374890523734456e-06, "loss": 4.3979, "step": 295500 }, { "epoch": 3.23, "learning_rate": 9.373795761079e-06, "loss": 4.3988, "step": 296000 }, { "epoch": 3.24, "learning_rate": 9.372700998423543e-06, "loss": 4.4029, "step": 296500 }, { "epoch": 3.24, "learning_rate": 9.371606235768085e-06, "loss": 4.4017, "step": 297000 }, { "epoch": 3.25, "learning_rate": 9.37051147311263e-06, "loss": 4.4062, "step": 297500 }, { "epoch": 3.26, "learning_rate": 9.369416710457173e-06, "loss": 4.4022, "step": 298000 }, { "epoch": 3.26, "learning_rate": 9.368321947801717e-06, "loss": 4.4002, "step": 298500 }, { "epoch": 3.27, "learning_rate": 9.367227185146262e-06, "loss": 4.4008, "step": 299000 }, { "epoch": 3.27, "learning_rate": 9.366132422490804e-06, "loss": 4.3957, "step": 299500 }, { "epoch": 3.28, "learning_rate": 9.36503765983535e-06, "loss": 4.3982, "step": 300000 }, { "epoch": 3.28, "eval_accuracy": 0.3369991324057652, "eval_loss": 4.136691093444824, "eval_runtime": 356.7622, "eval_samples_per_second": 864.318, "eval_steps_per_second": 13.508, "step": 300000 }, { "epoch": 3.28, "learning_rate": 9.363942897179891e-06, "loss": 4.395, "step": 300500 }, { "epoch": 3.29, "learning_rate": 9.362848134524435e-06, "loss": 4.4004, "step": 301000 }, { "epoch": 3.29, "learning_rate": 9.361753371868979e-06, "loss": 4.4025, "step": 301500 }, { "epoch": 3.3, "learning_rate": 9.360658609213523e-06, "loss": 4.4018, "step": 302000 }, { "epoch": 3.3, "learning_rate": 9.359563846558068e-06, "loss": 4.3961, "step": 302500 }, { "epoch": 3.31, "learning_rate": 9.35846908390261e-06, "loss": 4.4, "step": 303000 }, { "epoch": 3.32, "learning_rate": 9.357374321247154e-06, "loss": 4.4008, "step": 303500 }, { "epoch": 3.32, "learning_rate": 9.356279558591698e-06, "loss": 4.3981, "step": 304000 }, { "epoch": 3.33, "learning_rate": 9.355184795936241e-06, "loss": 4.4003, "step": 304500 }, { "epoch": 3.33, "learning_rate": 9.354090033280787e-06, "loss": 4.395, "step": 305000 }, { "epoch": 3.34, "learning_rate": 9.352995270625329e-06, "loss": 4.3902, "step": 305500 }, { "epoch": 3.34, "learning_rate": 9.351900507969873e-06, "loss": 4.3951, "step": 306000 }, { "epoch": 3.35, "learning_rate": 9.350805745314416e-06, "loss": 4.3957, "step": 306500 }, { "epoch": 3.35, "learning_rate": 9.34971098265896e-06, "loss": 4.396, "step": 307000 }, { "epoch": 3.36, "learning_rate": 9.348616220003504e-06, "loss": 4.3921, "step": 307500 }, { "epoch": 3.36, "learning_rate": 9.347521457348048e-06, "loss": 4.3947, "step": 308000 }, { "epoch": 3.37, "learning_rate": 9.346426694692591e-06, "loss": 4.3923, "step": 308500 }, { "epoch": 3.38, "learning_rate": 9.345331932037135e-06, "loss": 4.3965, "step": 309000 }, { "epoch": 3.38, "learning_rate": 9.344237169381679e-06, "loss": 4.384, "step": 309500 }, { "epoch": 3.39, "learning_rate": 9.343142406726223e-06, "loss": 4.3974, "step": 310000 }, { "epoch": 3.39, "learning_rate": 9.342047644070766e-06, "loss": 4.39, "step": 310500 }, { "epoch": 3.4, "learning_rate": 9.34095288141531e-06, "loss": 4.3959, "step": 311000 }, { "epoch": 3.4, "learning_rate": 9.339858118759854e-06, "loss": 4.3921, "step": 311500 }, { "epoch": 3.41, "learning_rate": 9.338763356104398e-06, "loss": 4.3869, "step": 312000 }, { "epoch": 3.41, "learning_rate": 9.337668593448941e-06, "loss": 4.3927, "step": 312500 }, { "epoch": 3.42, "learning_rate": 9.336573830793485e-06, "loss": 4.3784, "step": 313000 }, { "epoch": 3.42, "learning_rate": 9.335479068138029e-06, "loss": 4.3856, "step": 313500 }, { "epoch": 3.43, "learning_rate": 9.334384305482573e-06, "loss": 4.383, "step": 314000 }, { "epoch": 3.44, "learning_rate": 9.333289542827115e-06, "loss": 4.3879, "step": 314500 }, { "epoch": 3.44, "learning_rate": 9.33219478017166e-06, "loss": 4.3899, "step": 315000 }, { "epoch": 3.45, "learning_rate": 9.331100017516204e-06, "loss": 4.3842, "step": 315500 }, { "epoch": 3.45, "learning_rate": 9.330005254860748e-06, "loss": 4.3829, "step": 316000 }, { "epoch": 3.46, "learning_rate": 9.328910492205291e-06, "loss": 4.3859, "step": 316500 }, { "epoch": 3.46, "learning_rate": 9.327815729549833e-06, "loss": 4.3809, "step": 317000 }, { "epoch": 3.47, "learning_rate": 9.326720966894379e-06, "loss": 4.3887, "step": 317500 }, { "epoch": 3.47, "learning_rate": 9.325626204238922e-06, "loss": 4.3841, "step": 318000 }, { "epoch": 3.48, "learning_rate": 9.324531441583465e-06, "loss": 4.3799, "step": 318500 }, { "epoch": 3.48, "learning_rate": 9.32343667892801e-06, "loss": 4.3788, "step": 319000 }, { "epoch": 3.49, "learning_rate": 9.322341916272552e-06, "loss": 4.3847, "step": 319500 }, { "epoch": 3.5, "learning_rate": 9.321247153617097e-06, "loss": 4.3795, "step": 320000 }, { "epoch": 3.5, "learning_rate": 9.32015239096164e-06, "loss": 4.3803, "step": 320500 }, { "epoch": 3.51, "learning_rate": 9.319057628306183e-06, "loss": 4.3877, "step": 321000 }, { "epoch": 3.51, "learning_rate": 9.317962865650729e-06, "loss": 4.3838, "step": 321500 }, { "epoch": 3.52, "learning_rate": 9.31686810299527e-06, "loss": 4.3818, "step": 322000 }, { "epoch": 3.52, "learning_rate": 9.315773340339816e-06, "loss": 4.3812, "step": 322500 }, { "epoch": 3.53, "learning_rate": 9.314678577684358e-06, "loss": 4.374, "step": 323000 }, { "epoch": 3.53, "learning_rate": 9.313583815028902e-06, "loss": 4.374, "step": 323500 }, { "epoch": 3.54, "learning_rate": 9.312489052373446e-06, "loss": 4.3806, "step": 324000 }, { "epoch": 3.54, "learning_rate": 9.31139428971799e-06, "loss": 4.3798, "step": 324500 }, { "epoch": 3.55, "learning_rate": 9.310299527062535e-06, "loss": 4.3759, "step": 325000 }, { "epoch": 3.56, "learning_rate": 9.309204764407077e-06, "loss": 4.3779, "step": 325500 }, { "epoch": 3.56, "learning_rate": 9.30811000175162e-06, "loss": 4.3763, "step": 326000 }, { "epoch": 3.57, "learning_rate": 9.307015239096164e-06, "loss": 4.3761, "step": 326500 }, { "epoch": 3.57, "learning_rate": 9.305920476440708e-06, "loss": 4.379, "step": 327000 }, { "epoch": 3.58, "learning_rate": 9.304825713785252e-06, "loss": 4.3734, "step": 327500 }, { "epoch": 3.58, "learning_rate": 9.303730951129796e-06, "loss": 4.373, "step": 328000 }, { "epoch": 3.59, "learning_rate": 9.30263618847434e-06, "loss": 4.3761, "step": 328500 }, { "epoch": 3.59, "learning_rate": 9.301541425818883e-06, "loss": 4.3763, "step": 329000 }, { "epoch": 3.6, "learning_rate": 9.300446663163427e-06, "loss": 4.3712, "step": 329500 }, { "epoch": 3.6, "learning_rate": 9.29935190050797e-06, "loss": 4.3714, "step": 330000 }, { "epoch": 3.6, "eval_accuracy": 0.3398633059519626, "eval_loss": 4.110254287719727, "eval_runtime": 355.5414, "eval_samples_per_second": 867.286, "eval_steps_per_second": 13.554, "step": 330000 }, { "epoch": 3.61, "learning_rate": 9.298257137852514e-06, "loss": 4.3752, "step": 330500 }, { "epoch": 3.62, "learning_rate": 9.297162375197058e-06, "loss": 4.3761, "step": 331000 }, { "epoch": 3.62, "learning_rate": 9.296067612541602e-06, "loss": 4.3721, "step": 331500 }, { "epoch": 3.63, "learning_rate": 9.294972849886146e-06, "loss": 4.3667, "step": 332000 }, { "epoch": 3.63, "learning_rate": 9.29387808723069e-06, "loss": 4.3764, "step": 332500 }, { "epoch": 3.64, "learning_rate": 9.292783324575233e-06, "loss": 4.3617, "step": 333000 }, { "epoch": 3.64, "learning_rate": 9.291688561919777e-06, "loss": 4.3775, "step": 333500 }, { "epoch": 3.65, "learning_rate": 9.29059379926432e-06, "loss": 4.3707, "step": 334000 }, { "epoch": 3.65, "learning_rate": 9.289499036608864e-06, "loss": 4.3683, "step": 334500 }, { "epoch": 3.66, "learning_rate": 9.288404273953408e-06, "loss": 4.3694, "step": 335000 }, { "epoch": 3.66, "learning_rate": 9.287309511297952e-06, "loss": 4.3677, "step": 335500 }, { "epoch": 3.67, "learning_rate": 9.286214748642496e-06, "loss": 4.3664, "step": 336000 }, { "epoch": 3.68, "learning_rate": 9.28511998598704e-06, "loss": 4.3677, "step": 336500 }, { "epoch": 3.68, "learning_rate": 9.284025223331581e-06, "loss": 4.3716, "step": 337000 }, { "epoch": 3.69, "learning_rate": 9.282930460676127e-06, "loss": 4.3641, "step": 337500 }, { "epoch": 3.69, "learning_rate": 9.28183569802067e-06, "loss": 4.3657, "step": 338000 }, { "epoch": 3.7, "learning_rate": 9.280740935365213e-06, "loss": 4.3695, "step": 338500 }, { "epoch": 3.7, "learning_rate": 9.279646172709758e-06, "loss": 4.36, "step": 339000 }, { "epoch": 3.71, "learning_rate": 9.2785514100543e-06, "loss": 4.3627, "step": 339500 }, { "epoch": 3.71, "learning_rate": 9.277456647398845e-06, "loss": 4.3611, "step": 340000 }, { "epoch": 3.72, "learning_rate": 9.27636188474339e-06, "loss": 4.3652, "step": 340500 }, { "epoch": 3.72, "learning_rate": 9.275267122087931e-06, "loss": 4.3656, "step": 341000 }, { "epoch": 3.73, "learning_rate": 9.274172359432477e-06, "loss": 4.3679, "step": 341500 }, { "epoch": 3.74, "learning_rate": 9.273077596777019e-06, "loss": 4.3611, "step": 342000 }, { "epoch": 3.74, "learning_rate": 9.271982834121564e-06, "loss": 4.3607, "step": 342500 }, { "epoch": 3.75, "learning_rate": 9.270888071466106e-06, "loss": 4.3621, "step": 343000 }, { "epoch": 3.75, "learning_rate": 9.26979330881065e-06, "loss": 4.3661, "step": 343500 }, { "epoch": 3.76, "learning_rate": 9.268698546155195e-06, "loss": 4.3672, "step": 344000 }, { "epoch": 3.76, "learning_rate": 9.267603783499737e-06, "loss": 4.3598, "step": 344500 }, { "epoch": 3.77, "learning_rate": 9.266509020844283e-06, "loss": 4.3615, "step": 345000 }, { "epoch": 3.77, "learning_rate": 9.265414258188825e-06, "loss": 4.3647, "step": 345500 }, { "epoch": 3.78, "learning_rate": 9.264319495533369e-06, "loss": 4.356, "step": 346000 }, { "epoch": 3.79, "learning_rate": 9.263224732877912e-06, "loss": 4.3646, "step": 346500 }, { "epoch": 3.79, "learning_rate": 9.262129970222456e-06, "loss": 4.3605, "step": 347000 }, { "epoch": 3.8, "learning_rate": 9.261035207567e-06, "loss": 4.3613, "step": 347500 }, { "epoch": 3.8, "learning_rate": 9.259940444911544e-06, "loss": 4.36, "step": 348000 }, { "epoch": 3.81, "learning_rate": 9.258845682256087e-06, "loss": 4.3555, "step": 348500 }, { "epoch": 3.81, "learning_rate": 9.257750919600631e-06, "loss": 4.3653, "step": 349000 }, { "epoch": 3.82, "learning_rate": 9.256656156945175e-06, "loss": 4.3626, "step": 349500 }, { "epoch": 3.82, "learning_rate": 9.255561394289719e-06, "loss": 4.3568, "step": 350000 }, { "epoch": 3.83, "learning_rate": 9.254466631634262e-06, "loss": 4.3642, "step": 350500 }, { "epoch": 3.83, "learning_rate": 9.253371868978806e-06, "loss": 4.3549, "step": 351000 }, { "epoch": 3.84, "learning_rate": 9.25227710632335e-06, "loss": 4.3536, "step": 351500 }, { "epoch": 3.85, "learning_rate": 9.251182343667894e-06, "loss": 4.3508, "step": 352000 }, { "epoch": 3.85, "learning_rate": 9.250087581012437e-06, "loss": 4.3611, "step": 352500 }, { "epoch": 3.86, "learning_rate": 9.248992818356981e-06, "loss": 4.3551, "step": 353000 }, { "epoch": 3.86, "learning_rate": 9.247898055701525e-06, "loss": 4.3545, "step": 353500 }, { "epoch": 3.87, "learning_rate": 9.246803293046069e-06, "loss": 4.3582, "step": 354000 }, { "epoch": 3.87, "learning_rate": 9.245708530390612e-06, "loss": 4.3554, "step": 354500 }, { "epoch": 3.88, "learning_rate": 9.244613767735156e-06, "loss": 4.3494, "step": 355000 }, { "epoch": 3.88, "learning_rate": 9.2435190050797e-06, "loss": 4.3537, "step": 355500 }, { "epoch": 3.89, "learning_rate": 9.242424242424244e-06, "loss": 4.3563, "step": 356000 }, { "epoch": 3.89, "learning_rate": 9.241329479768787e-06, "loss": 4.3532, "step": 356500 }, { "epoch": 3.9, "learning_rate": 9.240234717113331e-06, "loss": 4.3529, "step": 357000 }, { "epoch": 3.91, "learning_rate": 9.239139954457875e-06, "loss": 4.3504, "step": 357500 }, { "epoch": 3.91, "learning_rate": 9.238045191802418e-06, "loss": 4.3529, "step": 358000 }, { "epoch": 3.92, "learning_rate": 9.236950429146962e-06, "loss": 4.3503, "step": 358500 }, { "epoch": 3.92, "learning_rate": 9.235855666491506e-06, "loss": 4.3451, "step": 359000 }, { "epoch": 3.93, "learning_rate": 9.234760903836048e-06, "loss": 4.3552, "step": 359500 }, { "epoch": 3.93, "learning_rate": 9.233666141180593e-06, "loss": 4.3493, "step": 360000 }, { "epoch": 3.93, "eval_accuracy": 0.3422635068090456, "eval_loss": 4.086943626403809, "eval_runtime": 352.6284, "eval_samples_per_second": 874.45, "eval_steps_per_second": 13.666, "step": 360000 }, { "epoch": 3.94, "learning_rate": 9.232571378525137e-06, "loss": 4.3552, "step": 360500 }, { "epoch": 3.94, "learning_rate": 9.23147661586968e-06, "loss": 4.3469, "step": 361000 }, { "epoch": 3.95, "learning_rate": 9.230381853214225e-06, "loss": 4.3459, "step": 361500 }, { "epoch": 3.95, "learning_rate": 9.229287090558767e-06, "loss": 4.3465, "step": 362000 }, { "epoch": 3.96, "learning_rate": 9.228192327903312e-06, "loss": 4.3509, "step": 362500 }, { "epoch": 3.97, "learning_rate": 9.227097565247854e-06, "loss": 4.351, "step": 363000 }, { "epoch": 3.97, "learning_rate": 9.226002802592398e-06, "loss": 4.3545, "step": 363500 }, { "epoch": 3.98, "learning_rate": 9.224908039936943e-06, "loss": 4.3491, "step": 364000 }, { "epoch": 3.98, "learning_rate": 9.223813277281485e-06, "loss": 4.3474, "step": 364500 }, { "epoch": 3.99, "learning_rate": 9.222718514626031e-06, "loss": 4.3498, "step": 365000 }, { "epoch": 3.99, "learning_rate": 9.221623751970573e-06, "loss": 4.3487, "step": 365500 }, { "epoch": 4.0, "learning_rate": 9.220528989315117e-06, "loss": 4.3567, "step": 366000 }, { "epoch": 4.0, "learning_rate": 9.219434226659662e-06, "loss": 4.3512, "step": 366500 }, { "epoch": 4.01, "learning_rate": 9.218339464004204e-06, "loss": 4.3413, "step": 367000 }, { "epoch": 4.01, "learning_rate": 9.217244701348748e-06, "loss": 4.3436, "step": 367500 }, { "epoch": 4.02, "learning_rate": 9.216149938693292e-06, "loss": 4.3437, "step": 368000 }, { "epoch": 4.03, "learning_rate": 9.215055176037835e-06, "loss": 4.3402, "step": 368500 }, { "epoch": 4.03, "learning_rate": 9.213960413382379e-06, "loss": 4.3448, "step": 369000 }, { "epoch": 4.04, "learning_rate": 9.212865650726923e-06, "loss": 4.344, "step": 369500 }, { "epoch": 4.04, "learning_rate": 9.211770888071467e-06, "loss": 4.3455, "step": 370000 }, { "epoch": 4.05, "learning_rate": 9.21067612541601e-06, "loss": 4.3386, "step": 370500 }, { "epoch": 4.05, "learning_rate": 9.209581362760554e-06, "loss": 4.3378, "step": 371000 }, { "epoch": 4.06, "learning_rate": 9.208486600105098e-06, "loss": 4.3384, "step": 371500 }, { "epoch": 4.06, "learning_rate": 9.207391837449642e-06, "loss": 4.3393, "step": 372000 }, { "epoch": 4.07, "learning_rate": 9.206297074794185e-06, "loss": 4.3404, "step": 372500 }, { "epoch": 4.07, "learning_rate": 9.205202312138729e-06, "loss": 4.3438, "step": 373000 }, { "epoch": 4.08, "learning_rate": 9.204107549483273e-06, "loss": 4.3394, "step": 373500 }, { "epoch": 4.09, "learning_rate": 9.203012786827817e-06, "loss": 4.3386, "step": 374000 }, { "epoch": 4.09, "learning_rate": 9.20191802417236e-06, "loss": 4.3312, "step": 374500 }, { "epoch": 4.1, "learning_rate": 9.200823261516904e-06, "loss": 4.3454, "step": 375000 }, { "epoch": 4.1, "learning_rate": 9.199728498861448e-06, "loss": 4.342, "step": 375500 }, { "epoch": 4.11, "learning_rate": 9.198633736205992e-06, "loss": 4.3386, "step": 376000 }, { "epoch": 4.11, "learning_rate": 9.197538973550535e-06, "loss": 4.3409, "step": 376500 }, { "epoch": 4.12, "learning_rate": 9.196444210895079e-06, "loss": 4.3444, "step": 377000 }, { "epoch": 4.12, "learning_rate": 9.195349448239623e-06, "loss": 4.3429, "step": 377500 }, { "epoch": 4.13, "learning_rate": 9.194254685584166e-06, "loss": 4.3319, "step": 378000 }, { "epoch": 4.13, "learning_rate": 9.19315992292871e-06, "loss": 4.3407, "step": 378500 }, { "epoch": 4.14, "learning_rate": 9.192065160273254e-06, "loss": 4.3347, "step": 379000 }, { "epoch": 4.15, "learning_rate": 9.190970397617798e-06, "loss": 4.3391, "step": 379500 }, { "epoch": 4.15, "learning_rate": 9.189875634962341e-06, "loss": 4.3348, "step": 380000 }, { "epoch": 4.16, "learning_rate": 9.188780872306885e-06, "loss": 4.3376, "step": 380500 }, { "epoch": 4.16, "learning_rate": 9.187686109651427e-06, "loss": 4.3358, "step": 381000 }, { "epoch": 4.17, "learning_rate": 9.186591346995973e-06, "loss": 4.3393, "step": 381500 }, { "epoch": 4.17, "learning_rate": 9.185496584340515e-06, "loss": 4.3398, "step": 382000 }, { "epoch": 4.18, "learning_rate": 9.18440182168506e-06, "loss": 4.3385, "step": 382500 }, { "epoch": 4.18, "learning_rate": 9.183307059029604e-06, "loss": 4.3324, "step": 383000 }, { "epoch": 4.19, "learning_rate": 9.182212296374146e-06, "loss": 4.3346, "step": 383500 }, { "epoch": 4.19, "learning_rate": 9.181117533718691e-06, "loss": 4.3322, "step": 384000 }, { "epoch": 4.2, "learning_rate": 9.180022771063233e-06, "loss": 4.3264, "step": 384500 }, { "epoch": 4.21, "learning_rate": 9.178928008407779e-06, "loss": 4.3345, "step": 385000 }, { "epoch": 4.21, "learning_rate": 9.177833245752321e-06, "loss": 4.3273, "step": 385500 }, { "epoch": 4.22, "learning_rate": 9.176738483096865e-06, "loss": 4.3283, "step": 386000 }, { "epoch": 4.22, "learning_rate": 9.17564372044141e-06, "loss": 4.3414, "step": 386500 }, { "epoch": 4.23, "learning_rate": 9.174548957785952e-06, "loss": 4.332, "step": 387000 }, { "epoch": 4.23, "learning_rate": 9.173454195130496e-06, "loss": 4.3309, "step": 387500 }, { "epoch": 4.24, "learning_rate": 9.17235943247504e-06, "loss": 4.3335, "step": 388000 }, { "epoch": 4.24, "learning_rate": 9.171264669819583e-06, "loss": 4.3331, "step": 388500 }, { "epoch": 4.25, "learning_rate": 9.170169907164127e-06, "loss": 4.3316, "step": 389000 }, { "epoch": 4.25, "learning_rate": 9.169075144508671e-06, "loss": 4.3293, "step": 389500 }, { "epoch": 4.26, "learning_rate": 9.167980381853215e-06, "loss": 4.3303, "step": 390000 }, { "epoch": 4.26, "eval_accuracy": 0.34389734701532104, "eval_loss": 4.068009853363037, "eval_runtime": 354.6487, "eval_samples_per_second": 869.469, "eval_steps_per_second": 13.588, "step": 390000 }, { "epoch": 4.27, "learning_rate": 9.166885619197758e-06, "loss": 4.3313, "step": 390500 }, { "epoch": 4.27, "learning_rate": 9.165790856542302e-06, "loss": 4.3304, "step": 391000 }, { "epoch": 4.28, "learning_rate": 9.164696093886846e-06, "loss": 4.3144, "step": 391500 }, { "epoch": 4.28, "learning_rate": 9.16360133123139e-06, "loss": 4.3269, "step": 392000 }, { "epoch": 4.29, "learning_rate": 9.162506568575933e-06, "loss": 4.3372, "step": 392500 }, { "epoch": 4.29, "learning_rate": 9.161411805920477e-06, "loss": 4.3253, "step": 393000 }, { "epoch": 4.3, "learning_rate": 9.16031704326502e-06, "loss": 4.3251, "step": 393500 }, { "epoch": 4.3, "learning_rate": 9.159222280609565e-06, "loss": 4.3248, "step": 394000 }, { "epoch": 4.31, "learning_rate": 9.158127517954108e-06, "loss": 4.3228, "step": 394500 }, { "epoch": 4.31, "learning_rate": 9.157032755298652e-06, "loss": 4.3312, "step": 395000 }, { "epoch": 4.32, "learning_rate": 9.155937992643196e-06, "loss": 4.324, "step": 395500 }, { "epoch": 4.33, "learning_rate": 9.15484322998774e-06, "loss": 4.3213, "step": 396000 }, { "epoch": 4.33, "learning_rate": 9.153748467332283e-06, "loss": 4.3238, "step": 396500 }, { "epoch": 4.34, "learning_rate": 9.152653704676827e-06, "loss": 4.3261, "step": 397000 }, { "epoch": 4.34, "learning_rate": 9.15155894202137e-06, "loss": 4.3359, "step": 397500 }, { "epoch": 4.35, "learning_rate": 9.150464179365914e-06, "loss": 4.3286, "step": 398000 }, { "epoch": 4.35, "learning_rate": 9.149369416710458e-06, "loss": 4.3201, "step": 398500 }, { "epoch": 4.36, "learning_rate": 9.148274654055002e-06, "loss": 4.3301, "step": 399000 }, { "epoch": 4.36, "learning_rate": 9.147179891399546e-06, "loss": 4.3196, "step": 399500 }, { "epoch": 4.37, "learning_rate": 9.14608512874409e-06, "loss": 4.3201, "step": 400000 }, { "epoch": 4.37, "learning_rate": 9.144990366088633e-06, "loss": 4.3181, "step": 400500 }, { "epoch": 4.38, "learning_rate": 9.143895603433175e-06, "loss": 4.3223, "step": 401000 }, { "epoch": 4.39, "learning_rate": 9.14280084077772e-06, "loss": 4.3173, "step": 401500 }, { "epoch": 4.39, "learning_rate": 9.141706078122263e-06, "loss": 4.3226, "step": 402000 }, { "epoch": 4.4, "learning_rate": 9.140611315466808e-06, "loss": 4.3275, "step": 402500 }, { "epoch": 4.4, "learning_rate": 9.139516552811352e-06, "loss": 4.3119, "step": 403000 }, { "epoch": 4.41, "learning_rate": 9.138421790155894e-06, "loss": 4.3212, "step": 403500 }, { "epoch": 4.41, "learning_rate": 9.13732702750044e-06, "loss": 4.3189, "step": 404000 }, { "epoch": 4.42, "learning_rate": 9.136232264844981e-06, "loss": 4.3164, "step": 404500 }, { "epoch": 4.42, "learning_rate": 9.135137502189527e-06, "loss": 4.3149, "step": 405000 }, { "epoch": 4.43, "learning_rate": 9.13404273953407e-06, "loss": 4.3197, "step": 405500 }, { "epoch": 4.44, "learning_rate": 9.132947976878613e-06, "loss": 4.3155, "step": 406000 }, { "epoch": 4.44, "learning_rate": 9.131853214223158e-06, "loss": 4.3162, "step": 406500 }, { "epoch": 4.45, "learning_rate": 9.1307584515677e-06, "loss": 4.3181, "step": 407000 }, { "epoch": 4.45, "learning_rate": 9.129663688912244e-06, "loss": 4.3137, "step": 407500 }, { "epoch": 4.46, "learning_rate": 9.128568926256788e-06, "loss": 4.3145, "step": 408000 }, { "epoch": 4.46, "learning_rate": 9.127474163601331e-06, "loss": 4.3221, "step": 408500 }, { "epoch": 4.47, "learning_rate": 9.126379400945877e-06, "loss": 4.315, "step": 409000 }, { "epoch": 4.47, "learning_rate": 9.125284638290419e-06, "loss": 4.3212, "step": 409500 }, { "epoch": 4.48, "learning_rate": 9.124189875634963e-06, "loss": 4.3137, "step": 410000 }, { "epoch": 4.48, "learning_rate": 9.123095112979506e-06, "loss": 4.3188, "step": 410500 }, { "epoch": 4.49, "learning_rate": 9.12200035032405e-06, "loss": 4.3143, "step": 411000 }, { "epoch": 4.5, "learning_rate": 9.120905587668594e-06, "loss": 4.3161, "step": 411500 }, { "epoch": 4.5, "learning_rate": 9.119810825013138e-06, "loss": 4.317, "step": 412000 }, { "epoch": 4.51, "learning_rate": 9.118716062357681e-06, "loss": 4.3172, "step": 412500 }, { "epoch": 4.51, "learning_rate": 9.117621299702225e-06, "loss": 4.3078, "step": 413000 }, { "epoch": 4.52, "learning_rate": 9.116526537046769e-06, "loss": 4.3121, "step": 413500 }, { "epoch": 4.52, "learning_rate": 9.115431774391313e-06, "loss": 4.31, "step": 414000 }, { "epoch": 4.53, "learning_rate": 9.114337011735856e-06, "loss": 4.3102, "step": 414500 }, { "epoch": 4.53, "learning_rate": 9.1132422490804e-06, "loss": 4.3127, "step": 415000 }, { "epoch": 4.54, "learning_rate": 9.112147486424944e-06, "loss": 4.31, "step": 415500 }, { "epoch": 4.54, "learning_rate": 9.111052723769488e-06, "loss": 4.312, "step": 416000 }, { "epoch": 4.55, "learning_rate": 9.109957961114031e-06, "loss": 4.3145, "step": 416500 }, { "epoch": 4.56, "learning_rate": 9.108863198458575e-06, "loss": 4.3041, "step": 417000 }, { "epoch": 4.56, "learning_rate": 9.107768435803119e-06, "loss": 4.3146, "step": 417500 }, { "epoch": 4.57, "learning_rate": 9.106673673147663e-06, "loss": 4.3149, "step": 418000 }, { "epoch": 4.57, "learning_rate": 9.105578910492206e-06, "loss": 4.312, "step": 418500 }, { "epoch": 4.58, "learning_rate": 9.10448414783675e-06, "loss": 4.3091, "step": 419000 }, { "epoch": 4.58, "learning_rate": 9.103389385181294e-06, "loss": 4.3151, "step": 419500 }, { "epoch": 4.59, "learning_rate": 9.102294622525837e-06, "loss": 4.3131, "step": 420000 }, { "epoch": 4.59, "eval_accuracy": 0.346064913012304, "eval_loss": 4.0467329025268555, "eval_runtime": 356.7612, "eval_samples_per_second": 864.321, "eval_steps_per_second": 13.508, "step": 420000 }, { "epoch": 4.59, "learning_rate": 9.101199859870381e-06, "loss": 4.311, "step": 420500 }, { "epoch": 4.6, "learning_rate": 9.100105097214923e-06, "loss": 4.3134, "step": 421000 }, { "epoch": 4.6, "learning_rate": 9.099010334559469e-06, "loss": 4.3053, "step": 421500 }, { "epoch": 4.61, "learning_rate": 9.097915571904012e-06, "loss": 4.3105, "step": 422000 }, { "epoch": 4.62, "learning_rate": 9.096820809248556e-06, "loss": 4.3066, "step": 422500 }, { "epoch": 4.62, "learning_rate": 9.0957260465931e-06, "loss": 4.3061, "step": 423000 }, { "epoch": 4.63, "learning_rate": 9.094631283937642e-06, "loss": 4.3063, "step": 423500 }, { "epoch": 4.63, "learning_rate": 9.093536521282187e-06, "loss": 4.3076, "step": 424000 }, { "epoch": 4.64, "learning_rate": 9.09244175862673e-06, "loss": 4.3108, "step": 424500 }, { "epoch": 4.64, "learning_rate": 9.091346995971275e-06, "loss": 4.3077, "step": 425000 }, { "epoch": 4.65, "learning_rate": 9.090252233315819e-06, "loss": 4.3071, "step": 425500 }, { "epoch": 4.65, "learning_rate": 9.08915747066036e-06, "loss": 4.3064, "step": 426000 }, { "epoch": 4.66, "learning_rate": 9.088062708004906e-06, "loss": 4.3055, "step": 426500 }, { "epoch": 4.66, "learning_rate": 9.086967945349448e-06, "loss": 4.3045, "step": 427000 }, { "epoch": 4.67, "learning_rate": 9.085873182693994e-06, "loss": 4.3059, "step": 427500 }, { "epoch": 4.68, "learning_rate": 9.084778420038537e-06, "loss": 4.3077, "step": 428000 }, { "epoch": 4.68, "learning_rate": 9.08368365738308e-06, "loss": 4.3061, "step": 428500 }, { "epoch": 4.69, "learning_rate": 9.082588894727625e-06, "loss": 4.3105, "step": 429000 }, { "epoch": 4.69, "learning_rate": 9.081494132072167e-06, "loss": 4.2974, "step": 429500 }, { "epoch": 4.7, "learning_rate": 9.08039936941671e-06, "loss": 4.3026, "step": 430000 }, { "epoch": 4.7, "learning_rate": 9.079304606761254e-06, "loss": 4.2997, "step": 430500 }, { "epoch": 4.71, "learning_rate": 9.078209844105798e-06, "loss": 4.2982, "step": 431000 }, { "epoch": 4.71, "learning_rate": 9.077115081450344e-06, "loss": 4.3021, "step": 431500 }, { "epoch": 4.72, "learning_rate": 9.076020318794886e-06, "loss": 4.2976, "step": 432000 }, { "epoch": 4.72, "learning_rate": 9.07492555613943e-06, "loss": 4.2991, "step": 432500 }, { "epoch": 4.73, "learning_rate": 9.073830793483973e-06, "loss": 4.2979, "step": 433000 }, { "epoch": 4.74, "learning_rate": 9.072736030828517e-06, "loss": 4.2968, "step": 433500 }, { "epoch": 4.74, "learning_rate": 9.07164126817306e-06, "loss": 4.3085, "step": 434000 }, { "epoch": 4.75, "learning_rate": 9.070546505517604e-06, "loss": 4.3017, "step": 434500 }, { "epoch": 4.75, "learning_rate": 9.069451742862148e-06, "loss": 4.2967, "step": 435000 }, { "epoch": 4.76, "learning_rate": 9.068356980206692e-06, "loss": 4.302, "step": 435500 }, { "epoch": 4.76, "learning_rate": 9.067262217551236e-06, "loss": 4.2942, "step": 436000 }, { "epoch": 4.77, "learning_rate": 9.06616745489578e-06, "loss": 4.2968, "step": 436500 }, { "epoch": 4.77, "learning_rate": 9.065072692240323e-06, "loss": 4.2924, "step": 437000 }, { "epoch": 4.78, "learning_rate": 9.063977929584867e-06, "loss": 4.2966, "step": 437500 }, { "epoch": 4.78, "learning_rate": 9.06288316692941e-06, "loss": 4.2963, "step": 438000 }, { "epoch": 4.79, "learning_rate": 9.061788404273954e-06, "loss": 4.2952, "step": 438500 }, { "epoch": 4.8, "learning_rate": 9.060693641618498e-06, "loss": 4.2956, "step": 439000 }, { "epoch": 4.8, "learning_rate": 9.059598878963042e-06, "loss": 4.2989, "step": 439500 }, { "epoch": 4.81, "learning_rate": 9.058504116307585e-06, "loss": 4.2889, "step": 440000 }, { "epoch": 4.81, "learning_rate": 9.05740935365213e-06, "loss": 4.2884, "step": 440500 }, { "epoch": 4.82, "learning_rate": 9.056314590996673e-06, "loss": 4.2964, "step": 441000 }, { "epoch": 4.82, "learning_rate": 9.055219828341217e-06, "loss": 4.2965, "step": 441500 }, { "epoch": 4.83, "learning_rate": 9.05412506568576e-06, "loss": 4.2933, "step": 442000 }, { "epoch": 4.83, "learning_rate": 9.053030303030304e-06, "loss": 4.292, "step": 442500 }, { "epoch": 4.84, "learning_rate": 9.051935540374848e-06, "loss": 4.2895, "step": 443000 }, { "epoch": 4.84, "learning_rate": 9.05084077771939e-06, "loss": 4.2904, "step": 443500 }, { "epoch": 4.85, "learning_rate": 9.049746015063935e-06, "loss": 4.2868, "step": 444000 }, { "epoch": 4.86, "learning_rate": 9.04865125240848e-06, "loss": 4.2939, "step": 444500 }, { "epoch": 4.86, "learning_rate": 9.047556489753023e-06, "loss": 4.2941, "step": 445000 }, { "epoch": 4.87, "learning_rate": 9.046461727097567e-06, "loss": 4.2917, "step": 445500 }, { "epoch": 4.87, "learning_rate": 9.045366964442109e-06, "loss": 4.2846, "step": 446000 }, { "epoch": 4.88, "learning_rate": 9.044272201786654e-06, "loss": 4.2867, "step": 446500 }, { "epoch": 4.88, "learning_rate": 9.043177439131196e-06, "loss": 4.2943, "step": 447000 }, { "epoch": 4.89, "learning_rate": 9.042082676475742e-06, "loss": 4.2823, "step": 447500 }, { "epoch": 4.89, "learning_rate": 9.040987913820285e-06, "loss": 4.2857, "step": 448000 }, { "epoch": 4.9, "learning_rate": 9.039893151164827e-06, "loss": 4.2897, "step": 448500 }, { "epoch": 4.9, "learning_rate": 9.038798388509373e-06, "loss": 4.2918, "step": 449000 }, { "epoch": 4.91, "learning_rate": 9.037703625853915e-06, "loss": 4.2822, "step": 449500 }, { "epoch": 4.92, "learning_rate": 9.036608863198459e-06, "loss": 4.2875, "step": 450000 }, { "epoch": 4.92, "eval_accuracy": 0.3477258556120563, "eval_loss": 4.029216289520264, "eval_runtime": 354.7849, "eval_samples_per_second": 869.135, "eval_steps_per_second": 13.583, "step": 450000 }, { "epoch": 4.92, "learning_rate": 9.035514100543002e-06, "loss": 4.284, "step": 450500 }, { "epoch": 4.93, "learning_rate": 9.034419337887546e-06, "loss": 4.2893, "step": 451000 }, { "epoch": 4.93, "learning_rate": 9.033324575232092e-06, "loss": 4.2814, "step": 451500 }, { "epoch": 4.94, "learning_rate": 9.032229812576634e-06, "loss": 4.2877, "step": 452000 }, { "epoch": 4.94, "learning_rate": 9.031135049921177e-06, "loss": 4.2893, "step": 452500 }, { "epoch": 4.95, "learning_rate": 9.030040287265721e-06, "loss": 4.2829, "step": 453000 }, { "epoch": 4.95, "learning_rate": 9.028945524610265e-06, "loss": 4.283, "step": 453500 }, { "epoch": 4.96, "learning_rate": 9.02785076195481e-06, "loss": 4.2874, "step": 454000 }, { "epoch": 4.96, "learning_rate": 9.026755999299352e-06, "loss": 4.2792, "step": 454500 }, { "epoch": 4.97, "learning_rate": 9.025661236643896e-06, "loss": 4.2882, "step": 455000 }, { "epoch": 4.98, "learning_rate": 9.02456647398844e-06, "loss": 4.2862, "step": 455500 }, { "epoch": 4.98, "learning_rate": 9.023471711332984e-06, "loss": 4.2779, "step": 456000 }, { "epoch": 4.99, "learning_rate": 9.022376948677527e-06, "loss": 4.2903, "step": 456500 }, { "epoch": 4.99, "learning_rate": 9.021282186022071e-06, "loss": 4.2849, "step": 457000 }, { "epoch": 5.0, "learning_rate": 9.020187423366615e-06, "loss": 4.2837, "step": 457500 }, { "epoch": 5.0, "learning_rate": 9.019092660711159e-06, "loss": 4.2786, "step": 458000 }, { "epoch": 5.01, "learning_rate": 9.017997898055702e-06, "loss": 4.2842, "step": 458500 }, { "epoch": 5.01, "learning_rate": 9.016903135400246e-06, "loss": 4.2771, "step": 459000 }, { "epoch": 5.02, "learning_rate": 9.01580837274479e-06, "loss": 4.2819, "step": 459500 }, { "epoch": 5.02, "learning_rate": 9.014713610089333e-06, "loss": 4.2781, "step": 460000 }, { "epoch": 5.03, "learning_rate": 9.013618847433877e-06, "loss": 4.285, "step": 460500 }, { "epoch": 5.04, "learning_rate": 9.012524084778421e-06, "loss": 4.2817, "step": 461000 }, { "epoch": 5.04, "learning_rate": 9.011429322122965e-06, "loss": 4.2813, "step": 461500 }, { "epoch": 5.05, "learning_rate": 9.010334559467508e-06, "loss": 4.2801, "step": 462000 }, { "epoch": 5.05, "learning_rate": 9.009239796812052e-06, "loss": 4.2719, "step": 462500 }, { "epoch": 5.06, "learning_rate": 9.008145034156596e-06, "loss": 4.2726, "step": 463000 }, { "epoch": 5.06, "learning_rate": 9.007050271501138e-06, "loss": 4.2776, "step": 463500 }, { "epoch": 5.07, "learning_rate": 9.005955508845683e-06, "loss": 4.2762, "step": 464000 }, { "epoch": 5.07, "learning_rate": 9.004860746190227e-06, "loss": 4.2802, "step": 464500 }, { "epoch": 5.08, "learning_rate": 9.003765983534771e-06, "loss": 4.278, "step": 465000 }, { "epoch": 5.08, "learning_rate": 9.002671220879315e-06, "loss": 4.2768, "step": 465500 }, { "epoch": 5.09, "learning_rate": 9.001576458223857e-06, "loss": 4.2793, "step": 466000 }, { "epoch": 5.1, "learning_rate": 9.000481695568402e-06, "loss": 4.2716, "step": 466500 }, { "epoch": 5.1, "learning_rate": 8.999386932912946e-06, "loss": 4.2711, "step": 467000 }, { "epoch": 5.11, "learning_rate": 8.99829217025749e-06, "loss": 4.2746, "step": 467500 }, { "epoch": 5.11, "learning_rate": 8.997197407602033e-06, "loss": 4.2734, "step": 468000 }, { "epoch": 5.12, "learning_rate": 8.996102644946575e-06, "loss": 4.2669, "step": 468500 }, { "epoch": 5.12, "learning_rate": 8.99500788229112e-06, "loss": 4.2775, "step": 469000 }, { "epoch": 5.13, "learning_rate": 8.993913119635663e-06, "loss": 4.2787, "step": 469500 }, { "epoch": 5.13, "learning_rate": 8.992818356980207e-06, "loss": 4.2719, "step": 470000 }, { "epoch": 5.14, "learning_rate": 8.991723594324752e-06, "loss": 4.269, "step": 470500 }, { "epoch": 5.15, "learning_rate": 8.990628831669294e-06, "loss": 4.2728, "step": 471000 }, { "epoch": 5.15, "learning_rate": 8.98953406901384e-06, "loss": 4.2716, "step": 471500 }, { "epoch": 5.16, "learning_rate": 8.988439306358382e-06, "loss": 4.2718, "step": 472000 }, { "epoch": 5.16, "learning_rate": 8.987344543702925e-06, "loss": 4.2693, "step": 472500 }, { "epoch": 5.17, "learning_rate": 8.986249781047469e-06, "loss": 4.2725, "step": 473000 }, { "epoch": 5.17, "learning_rate": 8.985155018392013e-06, "loss": 4.2737, "step": 473500 }, { "epoch": 5.18, "learning_rate": 8.984060255736558e-06, "loss": 4.2665, "step": 474000 }, { "epoch": 5.18, "learning_rate": 8.9829654930811e-06, "loss": 4.2629, "step": 474500 }, { "epoch": 5.19, "learning_rate": 8.981870730425644e-06, "loss": 4.2712, "step": 475000 }, { "epoch": 5.19, "learning_rate": 8.980775967770188e-06, "loss": 4.2704, "step": 475500 }, { "epoch": 5.2, "learning_rate": 8.979681205114732e-06, "loss": 4.2737, "step": 476000 }, { "epoch": 5.21, "learning_rate": 8.978586442459275e-06, "loss": 4.2665, "step": 476500 }, { "epoch": 5.21, "learning_rate": 8.977491679803819e-06, "loss": 4.263, "step": 477000 }, { "epoch": 5.22, "learning_rate": 8.976396917148363e-06, "loss": 4.2615, "step": 477500 }, { "epoch": 5.22, "learning_rate": 8.975302154492907e-06, "loss": 4.2652, "step": 478000 }, { "epoch": 5.23, "learning_rate": 8.97420739183745e-06, "loss": 4.2688, "step": 478500 }, { "epoch": 5.23, "learning_rate": 8.973112629181994e-06, "loss": 4.2722, "step": 479000 }, { "epoch": 5.24, "learning_rate": 8.972017866526538e-06, "loss": 4.2706, "step": 479500 }, { "epoch": 5.24, "learning_rate": 8.970923103871081e-06, "loss": 4.2629, "step": 480000 }, { "epoch": 5.24, "eval_accuracy": 0.3497467363853022, "eval_loss": 4.01085901260376, "eval_runtime": 355.3308, "eval_samples_per_second": 867.8, "eval_steps_per_second": 13.562, "step": 480000 }, { "epoch": 5.25, "learning_rate": 8.969828341215625e-06, "loss": 4.2604, "step": 480500 }, { "epoch": 5.25, "learning_rate": 8.968733578560169e-06, "loss": 4.2649, "step": 481000 }, { "epoch": 5.26, "learning_rate": 8.967638815904713e-06, "loss": 4.267, "step": 481500 }, { "epoch": 5.27, "learning_rate": 8.966544053249256e-06, "loss": 4.2625, "step": 482000 }, { "epoch": 5.27, "learning_rate": 8.9654492905938e-06, "loss": 4.2633, "step": 482500 }, { "epoch": 5.28, "learning_rate": 8.964354527938344e-06, "loss": 4.2645, "step": 483000 }, { "epoch": 5.28, "learning_rate": 8.963259765282888e-06, "loss": 4.263, "step": 483500 }, { "epoch": 5.29, "learning_rate": 8.962165002627431e-06, "loss": 4.2633, "step": 484000 }, { "epoch": 5.29, "learning_rate": 8.961070239971975e-06, "loss": 4.2609, "step": 484500 }, { "epoch": 5.3, "learning_rate": 8.959975477316519e-06, "loss": 4.2663, "step": 485000 }, { "epoch": 5.3, "learning_rate": 8.958880714661063e-06, "loss": 4.264, "step": 485500 }, { "epoch": 5.31, "learning_rate": 8.957785952005605e-06, "loss": 4.27, "step": 486000 }, { "epoch": 5.31, "learning_rate": 8.95669118935015e-06, "loss": 4.262, "step": 486500 }, { "epoch": 5.32, "learning_rate": 8.955596426694694e-06, "loss": 4.2603, "step": 487000 }, { "epoch": 5.33, "learning_rate": 8.954501664039238e-06, "loss": 4.258, "step": 487500 }, { "epoch": 5.33, "learning_rate": 8.953406901383781e-06, "loss": 4.2597, "step": 488000 }, { "epoch": 5.34, "learning_rate": 8.952312138728323e-06, "loss": 4.2633, "step": 488500 }, { "epoch": 5.34, "learning_rate": 8.951217376072869e-06, "loss": 4.2554, "step": 489000 }, { "epoch": 5.35, "learning_rate": 8.950122613417411e-06, "loss": 4.2555, "step": 489500 }, { "epoch": 5.35, "learning_rate": 8.949027850761955e-06, "loss": 4.2624, "step": 490000 }, { "epoch": 5.36, "learning_rate": 8.9479330881065e-06, "loss": 4.2534, "step": 490500 }, { "epoch": 5.36, "learning_rate": 8.946838325451042e-06, "loss": 4.2653, "step": 491000 }, { "epoch": 5.37, "learning_rate": 8.945743562795588e-06, "loss": 4.2528, "step": 491500 }, { "epoch": 5.37, "learning_rate": 8.94464880014013e-06, "loss": 4.2526, "step": 492000 }, { "epoch": 5.38, "learning_rate": 8.943554037484673e-06, "loss": 4.2584, "step": 492500 }, { "epoch": 5.39, "learning_rate": 8.942459274829219e-06, "loss": 4.2586, "step": 493000 }, { "epoch": 5.39, "learning_rate": 8.94136451217376e-06, "loss": 4.2566, "step": 493500 }, { "epoch": 5.4, "learning_rate": 8.940269749518306e-06, "loss": 4.2573, "step": 494000 }, { "epoch": 5.4, "learning_rate": 8.939174986862848e-06, "loss": 4.2617, "step": 494500 }, { "epoch": 5.41, "learning_rate": 8.938080224207392e-06, "loss": 4.2523, "step": 495000 }, { "epoch": 5.41, "learning_rate": 8.936985461551936e-06, "loss": 4.2564, "step": 495500 }, { "epoch": 5.42, "learning_rate": 8.93589069889648e-06, "loss": 4.2571, "step": 496000 }, { "epoch": 5.42, "learning_rate": 8.934795936241025e-06, "loss": 4.2537, "step": 496500 }, { "epoch": 5.43, "learning_rate": 8.933701173585567e-06, "loss": 4.2573, "step": 497000 }, { "epoch": 5.43, "learning_rate": 8.93260641093011e-06, "loss": 4.256, "step": 497500 }, { "epoch": 5.44, "learning_rate": 8.931511648274655e-06, "loss": 4.255, "step": 498000 }, { "epoch": 5.45, "learning_rate": 8.930416885619198e-06, "loss": 4.2505, "step": 498500 }, { "epoch": 5.45, "learning_rate": 8.929322122963742e-06, "loss": 4.2448, "step": 499000 }, { "epoch": 5.46, "learning_rate": 8.928227360308286e-06, "loss": 4.2552, "step": 499500 }, { "epoch": 5.46, "learning_rate": 8.92713259765283e-06, "loss": 4.2507, "step": 500000 }, { "epoch": 5.47, "learning_rate": 8.926037834997373e-06, "loss": 4.2527, "step": 500500 }, { "epoch": 5.47, "learning_rate": 8.924943072341917e-06, "loss": 4.2522, "step": 501000 }, { "epoch": 5.48, "learning_rate": 8.92384830968646e-06, "loss": 4.2527, "step": 501500 }, { "epoch": 5.48, "learning_rate": 8.922753547031004e-06, "loss": 4.2484, "step": 502000 }, { "epoch": 5.49, "learning_rate": 8.921658784375548e-06, "loss": 4.2475, "step": 502500 }, { "epoch": 5.49, "learning_rate": 8.920564021720092e-06, "loss": 4.2445, "step": 503000 }, { "epoch": 5.5, "learning_rate": 8.919469259064636e-06, "loss": 4.2432, "step": 503500 }, { "epoch": 5.51, "learning_rate": 8.91837449640918e-06, "loss": 4.2476, "step": 504000 }, { "epoch": 5.51, "learning_rate": 8.917279733753723e-06, "loss": 4.2429, "step": 504500 }, { "epoch": 5.52, "learning_rate": 8.916184971098267e-06, "loss": 4.2555, "step": 505000 }, { "epoch": 5.52, "learning_rate": 8.91509020844281e-06, "loss": 4.2436, "step": 505500 }, { "epoch": 5.53, "learning_rate": 8.913995445787354e-06, "loss": 4.2581, "step": 506000 }, { "epoch": 5.53, "learning_rate": 8.912900683131898e-06, "loss": 4.2508, "step": 506500 }, { "epoch": 5.54, "learning_rate": 8.911805920476442e-06, "loss": 4.2448, "step": 507000 }, { "epoch": 5.54, "learning_rate": 8.910711157820986e-06, "loss": 4.2384, "step": 507500 }, { "epoch": 5.55, "learning_rate": 8.90961639516553e-06, "loss": 4.2519, "step": 508000 }, { "epoch": 5.55, "learning_rate": 8.908521632510071e-06, "loss": 4.2459, "step": 508500 }, { "epoch": 5.56, "learning_rate": 8.907426869854617e-06, "loss": 4.2426, "step": 509000 }, { "epoch": 5.57, "learning_rate": 8.90633210719916e-06, "loss": 4.2407, "step": 509500 }, { "epoch": 5.57, "learning_rate": 8.905237344543703e-06, "loss": 4.2413, "step": 510000 }, { "epoch": 5.57, "eval_accuracy": 0.3514999084734722, "eval_loss": 3.9931302070617676, "eval_runtime": 356.6254, "eval_samples_per_second": 864.65, "eval_steps_per_second": 13.513, "step": 510000 }, { "epoch": 5.58, "learning_rate": 8.904142581888248e-06, "loss": 4.2461, "step": 510500 }, { "epoch": 5.58, "learning_rate": 8.90304781923279e-06, "loss": 4.2416, "step": 511000 }, { "epoch": 5.59, "learning_rate": 8.901953056577336e-06, "loss": 4.2487, "step": 511500 }, { "epoch": 5.59, "learning_rate": 8.900858293921878e-06, "loss": 4.2386, "step": 512000 }, { "epoch": 5.6, "learning_rate": 8.899763531266421e-06, "loss": 4.2411, "step": 512500 }, { "epoch": 5.6, "learning_rate": 8.898668768610967e-06, "loss": 4.2397, "step": 513000 }, { "epoch": 5.61, "learning_rate": 8.897574005955509e-06, "loss": 4.2385, "step": 513500 }, { "epoch": 5.61, "learning_rate": 8.896479243300054e-06, "loss": 4.2404, "step": 514000 }, { "epoch": 5.62, "learning_rate": 8.895384480644596e-06, "loss": 4.2388, "step": 514500 }, { "epoch": 5.63, "learning_rate": 8.89428971798914e-06, "loss": 4.2418, "step": 515000 }, { "epoch": 5.63, "learning_rate": 8.893194955333686e-06, "loss": 4.2304, "step": 515500 }, { "epoch": 5.64, "learning_rate": 8.892100192678228e-06, "loss": 4.2318, "step": 516000 }, { "epoch": 5.64, "learning_rate": 8.891005430022773e-06, "loss": 4.2452, "step": 516500 }, { "epoch": 5.65, "learning_rate": 8.889910667367315e-06, "loss": 4.2471, "step": 517000 }, { "epoch": 5.65, "learning_rate": 8.888815904711859e-06, "loss": 4.2458, "step": 517500 }, { "epoch": 5.66, "learning_rate": 8.887721142056403e-06, "loss": 4.2366, "step": 518000 }, { "epoch": 5.66, "learning_rate": 8.886626379400946e-06, "loss": 4.2386, "step": 518500 }, { "epoch": 5.67, "learning_rate": 8.88553161674549e-06, "loss": 4.2348, "step": 519000 }, { "epoch": 5.67, "learning_rate": 8.884436854090034e-06, "loss": 4.2409, "step": 519500 }, { "epoch": 5.68, "learning_rate": 8.883342091434577e-06, "loss": 4.2408, "step": 520000 }, { "epoch": 5.69, "learning_rate": 8.882247328779121e-06, "loss": 4.2416, "step": 520500 }, { "epoch": 5.69, "learning_rate": 8.881152566123665e-06, "loss": 4.2385, "step": 521000 }, { "epoch": 5.7, "learning_rate": 8.880057803468209e-06, "loss": 4.2382, "step": 521500 }, { "epoch": 5.7, "learning_rate": 8.878963040812752e-06, "loss": 4.2427, "step": 522000 }, { "epoch": 5.71, "learning_rate": 8.877868278157296e-06, "loss": 4.2312, "step": 522500 }, { "epoch": 5.71, "learning_rate": 8.87677351550184e-06, "loss": 4.2318, "step": 523000 }, { "epoch": 5.72, "learning_rate": 8.875678752846384e-06, "loss": 4.2306, "step": 523500 }, { "epoch": 5.72, "learning_rate": 8.874583990190927e-06, "loss": 4.2319, "step": 524000 }, { "epoch": 5.73, "learning_rate": 8.873489227535471e-06, "loss": 4.238, "step": 524500 }, { "epoch": 5.73, "learning_rate": 8.872394464880015e-06, "loss": 4.2275, "step": 525000 }, { "epoch": 5.74, "learning_rate": 8.871299702224559e-06, "loss": 4.2375, "step": 525500 }, { "epoch": 5.75, "learning_rate": 8.870204939569102e-06, "loss": 4.2347, "step": 526000 }, { "epoch": 5.75, "learning_rate": 8.869110176913646e-06, "loss": 4.2343, "step": 526500 }, { "epoch": 5.76, "learning_rate": 8.86801541425819e-06, "loss": 4.229, "step": 527000 }, { "epoch": 5.76, "learning_rate": 8.866920651602734e-06, "loss": 4.2344, "step": 527500 }, { "epoch": 5.77, "learning_rate": 8.865825888947277e-06, "loss": 4.2289, "step": 528000 }, { "epoch": 5.77, "learning_rate": 8.864731126291821e-06, "loss": 4.2326, "step": 528500 }, { "epoch": 5.78, "learning_rate": 8.863636363636365e-06, "loss": 4.2251, "step": 529000 }, { "epoch": 5.78, "learning_rate": 8.862541600980909e-06, "loss": 4.2267, "step": 529500 }, { "epoch": 5.79, "learning_rate": 8.86144683832545e-06, "loss": 4.2381, "step": 530000 }, { "epoch": 5.8, "learning_rate": 8.860352075669996e-06, "loss": 4.2256, "step": 530500 }, { "epoch": 5.8, "learning_rate": 8.859257313014538e-06, "loss": 4.2287, "step": 531000 }, { "epoch": 5.81, "learning_rate": 8.858162550359084e-06, "loss": 4.2294, "step": 531500 }, { "epoch": 5.81, "learning_rate": 8.857067787703627e-06, "loss": 4.2319, "step": 532000 }, { "epoch": 5.82, "learning_rate": 8.85597302504817e-06, "loss": 4.2277, "step": 532500 }, { "epoch": 5.82, "learning_rate": 8.854878262392715e-06, "loss": 4.2336, "step": 533000 }, { "epoch": 5.83, "learning_rate": 8.853783499737257e-06, "loss": 4.2272, "step": 533500 }, { "epoch": 5.83, "learning_rate": 8.852688737081802e-06, "loss": 4.2291, "step": 534000 }, { "epoch": 5.84, "learning_rate": 8.851593974426344e-06, "loss": 4.232, "step": 534500 }, { "epoch": 5.84, "learning_rate": 8.850499211770888e-06, "loss": 4.2278, "step": 535000 }, { "epoch": 5.85, "learning_rate": 8.849404449115434e-06, "loss": 4.2362, "step": 535500 }, { "epoch": 5.86, "learning_rate": 8.848309686459976e-06, "loss": 4.2324, "step": 536000 }, { "epoch": 5.86, "learning_rate": 8.847214923804521e-06, "loss": 4.2305, "step": 536500 }, { "epoch": 5.87, "learning_rate": 8.846120161149063e-06, "loss": 4.2251, "step": 537000 }, { "epoch": 5.87, "learning_rate": 8.845025398493607e-06, "loss": 4.2234, "step": 537500 }, { "epoch": 5.88, "learning_rate": 8.84393063583815e-06, "loss": 4.2253, "step": 538000 }, { "epoch": 5.88, "learning_rate": 8.842835873182694e-06, "loss": 4.2254, "step": 538500 }, { "epoch": 5.89, "learning_rate": 8.841741110527238e-06, "loss": 4.2206, "step": 539000 }, { "epoch": 5.89, "learning_rate": 8.840646347871782e-06, "loss": 4.2192, "step": 539500 }, { "epoch": 5.9, "learning_rate": 8.839551585216326e-06, "loss": 4.2282, "step": 540000 }, { "epoch": 5.9, "eval_accuracy": 0.35362820251603594, "eval_loss": 3.975940227508545, "eval_runtime": 370.0914, "eval_samples_per_second": 833.189, "eval_steps_per_second": 13.021, "step": 540000 }, { "epoch": 5.9, "learning_rate": 8.83845682256087e-06, "loss": 4.2245, "step": 540500 }, { "epoch": 5.91, "learning_rate": 8.837362059905413e-06, "loss": 4.2211, "step": 541000 }, { "epoch": 5.92, "learning_rate": 8.836267297249957e-06, "loss": 4.2202, "step": 541500 }, { "epoch": 5.92, "learning_rate": 8.8351725345945e-06, "loss": 4.2252, "step": 542000 }, { "epoch": 5.93, "learning_rate": 8.834077771939044e-06, "loss": 4.2219, "step": 542500 }, { "epoch": 5.93, "learning_rate": 8.832983009283588e-06, "loss": 4.2238, "step": 543000 }, { "epoch": 5.94, "learning_rate": 8.831888246628132e-06, "loss": 4.2212, "step": 543500 }, { "epoch": 5.94, "learning_rate": 8.830793483972675e-06, "loss": 4.2095, "step": 544000 }, { "epoch": 5.95, "learning_rate": 8.82969872131722e-06, "loss": 4.2177, "step": 544500 }, { "epoch": 5.95, "learning_rate": 8.828603958661763e-06, "loss": 4.2181, "step": 545000 }, { "epoch": 5.96, "learning_rate": 8.827509196006307e-06, "loss": 4.2153, "step": 545500 }, { "epoch": 5.96, "learning_rate": 8.82641443335085e-06, "loss": 4.217, "step": 546000 }, { "epoch": 5.97, "learning_rate": 8.825319670695394e-06, "loss": 4.2193, "step": 546500 }, { "epoch": 5.98, "learning_rate": 8.824224908039938e-06, "loss": 4.2199, "step": 547000 }, { "epoch": 5.98, "learning_rate": 8.823130145384482e-06, "loss": 4.2183, "step": 547500 }, { "epoch": 5.99, "learning_rate": 8.822035382729025e-06, "loss": 4.2148, "step": 548000 }, { "epoch": 5.99, "learning_rate": 8.820940620073569e-06, "loss": 4.2139, "step": 548500 }, { "epoch": 6.0, "learning_rate": 8.819845857418113e-06, "loss": 4.2153, "step": 549000 }, { "epoch": 6.0, "learning_rate": 8.818751094762657e-06, "loss": 4.2151, "step": 549500 }, { "epoch": 6.01, "learning_rate": 8.8176563321072e-06, "loss": 4.2149, "step": 550000 }, { "epoch": 6.01, "learning_rate": 8.816561569451744e-06, "loss": 4.2179, "step": 550500 }, { "epoch": 6.02, "learning_rate": 8.815466806796286e-06, "loss": 4.2207, "step": 551000 }, { "epoch": 6.02, "learning_rate": 8.814372044140832e-06, "loss": 4.2181, "step": 551500 }, { "epoch": 6.03, "learning_rate": 8.813277281485375e-06, "loss": 4.2067, "step": 552000 }, { "epoch": 6.04, "learning_rate": 8.812182518829917e-06, "loss": 4.2144, "step": 552500 }, { "epoch": 6.04, "learning_rate": 8.811087756174463e-06, "loss": 4.2194, "step": 553000 }, { "epoch": 6.05, "learning_rate": 8.809992993519005e-06, "loss": 4.2161, "step": 553500 }, { "epoch": 6.05, "learning_rate": 8.80889823086355e-06, "loss": 4.2126, "step": 554000 }, { "epoch": 6.06, "learning_rate": 8.807803468208094e-06, "loss": 4.2094, "step": 554500 }, { "epoch": 6.06, "learning_rate": 8.806708705552636e-06, "loss": 4.2141, "step": 555000 }, { "epoch": 6.07, "learning_rate": 8.805613942897182e-06, "loss": 4.2145, "step": 555500 }, { "epoch": 6.07, "learning_rate": 8.804519180241724e-06, "loss": 4.2191, "step": 556000 }, { "epoch": 6.08, "learning_rate": 8.803424417586269e-06, "loss": 4.2153, "step": 556500 }, { "epoch": 6.08, "learning_rate": 8.802329654930811e-06, "loss": 4.2129, "step": 557000 }, { "epoch": 6.09, "learning_rate": 8.801234892275355e-06, "loss": 4.2123, "step": 557500 }, { "epoch": 6.1, "learning_rate": 8.8001401296199e-06, "loss": 4.2057, "step": 558000 }, { "epoch": 6.1, "learning_rate": 8.799045366964442e-06, "loss": 4.2069, "step": 558500 }, { "epoch": 6.11, "learning_rate": 8.797950604308986e-06, "loss": 4.2161, "step": 559000 }, { "epoch": 6.11, "learning_rate": 8.79685584165353e-06, "loss": 4.2102, "step": 559500 }, { "epoch": 6.12, "learning_rate": 8.795761078998074e-06, "loss": 4.2147, "step": 560000 }, { "epoch": 6.12, "learning_rate": 8.794666316342617e-06, "loss": 4.2103, "step": 560500 }, { "epoch": 6.13, "learning_rate": 8.793571553687161e-06, "loss": 4.2042, "step": 561000 }, { "epoch": 6.13, "learning_rate": 8.792476791031705e-06, "loss": 4.2035, "step": 561500 }, { "epoch": 6.14, "learning_rate": 8.791382028376248e-06, "loss": 4.2075, "step": 562000 }, { "epoch": 6.14, "learning_rate": 8.790287265720792e-06, "loss": 4.2101, "step": 562500 }, { "epoch": 6.15, "learning_rate": 8.789192503065336e-06, "loss": 4.2009, "step": 563000 }, { "epoch": 6.16, "learning_rate": 8.78809774040988e-06, "loss": 4.2032, "step": 563500 }, { "epoch": 6.16, "learning_rate": 8.787002977754423e-06, "loss": 4.2028, "step": 564000 }, { "epoch": 6.17, "learning_rate": 8.785908215098967e-06, "loss": 4.2094, "step": 564500 }, { "epoch": 6.17, "learning_rate": 8.784813452443511e-06, "loss": 4.2035, "step": 565000 }, { "epoch": 6.18, "learning_rate": 8.783718689788055e-06, "loss": 4.2115, "step": 565500 }, { "epoch": 6.18, "learning_rate": 8.782623927132598e-06, "loss": 4.2055, "step": 566000 }, { "epoch": 6.19, "learning_rate": 8.781529164477142e-06, "loss": 4.2055, "step": 566500 }, { "epoch": 6.19, "learning_rate": 8.780434401821686e-06, "loss": 4.2108, "step": 567000 }, { "epoch": 6.2, "learning_rate": 8.77933963916623e-06, "loss": 4.1994, "step": 567500 }, { "epoch": 6.2, "learning_rate": 8.778244876510773e-06, "loss": 4.2092, "step": 568000 }, { "epoch": 6.21, "learning_rate": 8.777150113855317e-06, "loss": 4.207, "step": 568500 }, { "epoch": 6.22, "learning_rate": 8.776055351199861e-06, "loss": 4.1989, "step": 569000 }, { "epoch": 6.22, "learning_rate": 8.774960588544405e-06, "loss": 4.2053, "step": 569500 }, { "epoch": 6.23, "learning_rate": 8.773865825888948e-06, "loss": 4.2003, "step": 570000 }, { "epoch": 6.23, "eval_accuracy": 0.3551333406187099, "eval_loss": 3.9607765674591064, "eval_runtime": 356.4258, "eval_samples_per_second": 865.134, "eval_steps_per_second": 13.52, "step": 570000 }, { "epoch": 6.23, "learning_rate": 8.772771063233492e-06, "loss": 4.2061, "step": 570500 }, { "epoch": 6.24, "learning_rate": 8.771676300578036e-06, "loss": 4.2057, "step": 571000 }, { "epoch": 6.24, "learning_rate": 8.77058153792258e-06, "loss": 4.2021, "step": 571500 }, { "epoch": 6.25, "learning_rate": 8.769486775267123e-06, "loss": 4.2049, "step": 572000 }, { "epoch": 6.25, "learning_rate": 8.768392012611665e-06, "loss": 4.2024, "step": 572500 }, { "epoch": 6.26, "learning_rate": 8.76729724995621e-06, "loss": 4.2034, "step": 573000 }, { "epoch": 6.26, "learning_rate": 8.766202487300753e-06, "loss": 4.1993, "step": 573500 }, { "epoch": 6.27, "learning_rate": 8.765107724645298e-06, "loss": 4.1996, "step": 574000 }, { "epoch": 6.28, "learning_rate": 8.764012961989842e-06, "loss": 4.2121, "step": 574500 }, { "epoch": 6.28, "learning_rate": 8.762918199334384e-06, "loss": 4.1988, "step": 575000 }, { "epoch": 6.29, "learning_rate": 8.76182343667893e-06, "loss": 4.2022, "step": 575500 }, { "epoch": 6.29, "learning_rate": 8.760728674023472e-06, "loss": 4.192, "step": 576000 }, { "epoch": 6.3, "learning_rate": 8.759633911368017e-06, "loss": 4.1901, "step": 576500 }, { "epoch": 6.3, "learning_rate": 8.758539148712559e-06, "loss": 4.2008, "step": 577000 }, { "epoch": 6.31, "learning_rate": 8.757444386057103e-06, "loss": 4.1938, "step": 577500 }, { "epoch": 6.31, "learning_rate": 8.756349623401648e-06, "loss": 4.2004, "step": 578000 }, { "epoch": 6.32, "learning_rate": 8.75525486074619e-06, "loss": 4.2013, "step": 578500 }, { "epoch": 6.32, "learning_rate": 8.754160098090734e-06, "loss": 4.1928, "step": 579000 }, { "epoch": 6.33, "learning_rate": 8.753065335435278e-06, "loss": 4.1987, "step": 579500 }, { "epoch": 6.34, "learning_rate": 8.751970572779822e-06, "loss": 4.2019, "step": 580000 }, { "epoch": 6.34, "learning_rate": 8.750875810124367e-06, "loss": 4.2004, "step": 580500 }, { "epoch": 6.35, "learning_rate": 8.749781047468909e-06, "loss": 4.1942, "step": 581000 }, { "epoch": 6.35, "learning_rate": 8.748686284813453e-06, "loss": 4.1963, "step": 581500 }, { "epoch": 6.36, "learning_rate": 8.747591522157996e-06, "loss": 4.1931, "step": 582000 }, { "epoch": 6.36, "learning_rate": 8.74649675950254e-06, "loss": 4.1939, "step": 582500 }, { "epoch": 6.37, "learning_rate": 8.745401996847084e-06, "loss": 4.1921, "step": 583000 }, { "epoch": 6.37, "learning_rate": 8.744307234191628e-06, "loss": 4.1957, "step": 583500 }, { "epoch": 6.38, "learning_rate": 8.743212471536171e-06, "loss": 4.1969, "step": 584000 }, { "epoch": 6.38, "learning_rate": 8.742117708880715e-06, "loss": 4.1971, "step": 584500 }, { "epoch": 6.39, "learning_rate": 8.741022946225259e-06, "loss": 4.199, "step": 585000 }, { "epoch": 6.4, "learning_rate": 8.739928183569803e-06, "loss": 4.1932, "step": 585500 }, { "epoch": 6.4, "learning_rate": 8.738833420914346e-06, "loss": 4.1938, "step": 586000 }, { "epoch": 6.41, "learning_rate": 8.73773865825889e-06, "loss": 4.1914, "step": 586500 }, { "epoch": 6.41, "learning_rate": 8.736643895603434e-06, "loss": 4.1896, "step": 587000 }, { "epoch": 6.42, "learning_rate": 8.735549132947978e-06, "loss": 4.1923, "step": 587500 }, { "epoch": 6.42, "learning_rate": 8.734454370292521e-06, "loss": 4.1961, "step": 588000 }, { "epoch": 6.43, "learning_rate": 8.733359607637065e-06, "loss": 4.1888, "step": 588500 }, { "epoch": 6.43, "learning_rate": 8.732264844981609e-06, "loss": 4.1962, "step": 589000 }, { "epoch": 6.44, "learning_rate": 8.731170082326153e-06, "loss": 4.1976, "step": 589500 }, { "epoch": 6.44, "learning_rate": 8.730075319670696e-06, "loss": 4.1939, "step": 590000 }, { "epoch": 6.45, "learning_rate": 8.72898055701524e-06, "loss": 4.1863, "step": 590500 }, { "epoch": 6.46, "learning_rate": 8.727885794359784e-06, "loss": 4.1934, "step": 591000 }, { "epoch": 6.46, "learning_rate": 8.726791031704328e-06, "loss": 4.1865, "step": 591500 }, { "epoch": 6.47, "learning_rate": 8.725696269048871e-06, "loss": 4.191, "step": 592000 }, { "epoch": 6.47, "learning_rate": 8.724601506393413e-06, "loss": 4.1854, "step": 592500 }, { "epoch": 6.48, "learning_rate": 8.723506743737959e-06, "loss": 4.1931, "step": 593000 }, { "epoch": 6.48, "learning_rate": 8.722411981082503e-06, "loss": 4.1905, "step": 593500 }, { "epoch": 6.49, "learning_rate": 8.721317218427046e-06, "loss": 4.1955, "step": 594000 }, { "epoch": 6.49, "learning_rate": 8.72022245577159e-06, "loss": 4.1986, "step": 594500 }, { "epoch": 6.5, "learning_rate": 8.719127693116132e-06, "loss": 4.1929, "step": 595000 }, { "epoch": 6.51, "learning_rate": 8.718032930460678e-06, "loss": 4.1828, "step": 595500 }, { "epoch": 6.51, "learning_rate": 8.71693816780522e-06, "loss": 4.1872, "step": 596000 }, { "epoch": 6.52, "learning_rate": 8.715843405149765e-06, "loss": 4.1922, "step": 596500 }, { "epoch": 6.52, "learning_rate": 8.714748642494309e-06, "loss": 4.1953, "step": 597000 }, { "epoch": 6.53, "learning_rate": 8.71365387983885e-06, "loss": 4.1905, "step": 597500 }, { "epoch": 6.53, "learning_rate": 8.712559117183396e-06, "loss": 4.1839, "step": 598000 }, { "epoch": 6.54, "learning_rate": 8.711464354527938e-06, "loss": 4.1882, "step": 598500 }, { "epoch": 6.54, "learning_rate": 8.710369591872482e-06, "loss": 4.1846, "step": 599000 }, { "epoch": 6.55, "learning_rate": 8.709274829217026e-06, "loss": 4.1886, "step": 599500 }, { "epoch": 6.55, "learning_rate": 8.70818006656157e-06, "loss": 4.1867, "step": 600000 }, { "epoch": 6.55, "eval_accuracy": 0.3571117133112928, "eval_loss": 3.944465160369873, "eval_runtime": 355.6615, "eval_samples_per_second": 866.993, "eval_steps_per_second": 13.549, "step": 600000 }, { "epoch": 6.56, "learning_rate": 8.707085303906115e-06, "loss": 4.1837, "step": 600500 }, { "epoch": 6.57, "learning_rate": 8.705990541250657e-06, "loss": 4.1858, "step": 601000 }, { "epoch": 6.57, "learning_rate": 8.7048957785952e-06, "loss": 4.1921, "step": 601500 }, { "epoch": 6.58, "learning_rate": 8.703801015939744e-06, "loss": 4.19, "step": 602000 }, { "epoch": 6.58, "learning_rate": 8.702706253284288e-06, "loss": 4.1859, "step": 602500 }, { "epoch": 6.59, "learning_rate": 8.701611490628834e-06, "loss": 4.1885, "step": 603000 }, { "epoch": 6.59, "learning_rate": 8.700516727973376e-06, "loss": 4.1784, "step": 603500 }, { "epoch": 6.6, "learning_rate": 8.69942196531792e-06, "loss": 4.1788, "step": 604000 }, { "epoch": 6.6, "learning_rate": 8.698327202662463e-06, "loss": 4.1843, "step": 604500 }, { "epoch": 6.61, "learning_rate": 8.697232440007007e-06, "loss": 4.1795, "step": 605000 }, { "epoch": 6.61, "learning_rate": 8.69613767735155e-06, "loss": 4.1837, "step": 605500 }, { "epoch": 6.62, "learning_rate": 8.695042914696094e-06, "loss": 4.1763, "step": 606000 }, { "epoch": 6.63, "learning_rate": 8.693948152040638e-06, "loss": 4.18, "step": 606500 }, { "epoch": 6.63, "learning_rate": 8.692853389385182e-06, "loss": 4.1817, "step": 607000 }, { "epoch": 6.64, "learning_rate": 8.691758626729726e-06, "loss": 4.1797, "step": 607500 }, { "epoch": 6.64, "learning_rate": 8.69066386407427e-06, "loss": 4.1787, "step": 608000 }, { "epoch": 6.65, "learning_rate": 8.689569101418813e-06, "loss": 4.1774, "step": 608500 }, { "epoch": 6.65, "learning_rate": 8.688474338763357e-06, "loss": 4.178, "step": 609000 }, { "epoch": 6.66, "learning_rate": 8.6873795761079e-06, "loss": 4.1807, "step": 609500 }, { "epoch": 6.66, "learning_rate": 8.686284813452444e-06, "loss": 4.1776, "step": 610000 }, { "epoch": 6.67, "learning_rate": 8.685190050796988e-06, "loss": 4.1818, "step": 610500 }, { "epoch": 6.67, "learning_rate": 8.684095288141532e-06, "loss": 4.1749, "step": 611000 }, { "epoch": 6.68, "learning_rate": 8.683000525486076e-06, "loss": 4.175, "step": 611500 }, { "epoch": 6.69, "learning_rate": 8.68190576283062e-06, "loss": 4.1792, "step": 612000 }, { "epoch": 6.69, "learning_rate": 8.680811000175161e-06, "loss": 4.1789, "step": 612500 }, { "epoch": 6.7, "learning_rate": 8.679716237519707e-06, "loss": 4.1803, "step": 613000 }, { "epoch": 6.7, "learning_rate": 8.67862147486425e-06, "loss": 4.1797, "step": 613500 }, { "epoch": 6.71, "learning_rate": 8.677526712208794e-06, "loss": 4.1745, "step": 614000 }, { "epoch": 6.71, "learning_rate": 8.676431949553338e-06, "loss": 4.1818, "step": 614500 }, { "epoch": 6.72, "learning_rate": 8.67533718689788e-06, "loss": 4.1771, "step": 615000 }, { "epoch": 6.72, "learning_rate": 8.674242424242426e-06, "loss": 4.1764, "step": 615500 }, { "epoch": 6.73, "learning_rate": 8.67314766158697e-06, "loss": 4.1728, "step": 616000 }, { "epoch": 6.73, "learning_rate": 8.672052898931513e-06, "loss": 4.18, "step": 616500 }, { "epoch": 6.74, "learning_rate": 8.670958136276057e-06, "loss": 4.1737, "step": 617000 }, { "epoch": 6.75, "learning_rate": 8.669863373620599e-06, "loss": 4.173, "step": 617500 }, { "epoch": 6.75, "learning_rate": 8.668768610965144e-06, "loss": 4.172, "step": 618000 }, { "epoch": 6.76, "learning_rate": 8.667673848309686e-06, "loss": 4.1704, "step": 618500 }, { "epoch": 6.76, "learning_rate": 8.666579085654232e-06, "loss": 4.1702, "step": 619000 }, { "epoch": 6.77, "learning_rate": 8.665484322998775e-06, "loss": 4.1676, "step": 619500 }, { "epoch": 6.77, "learning_rate": 8.664389560343318e-06, "loss": 4.162, "step": 620000 }, { "epoch": 6.78, "learning_rate": 8.663294797687863e-06, "loss": 4.1707, "step": 620500 }, { "epoch": 6.78, "learning_rate": 8.662200035032405e-06, "loss": 4.1698, "step": 621000 }, { "epoch": 6.79, "learning_rate": 8.661105272376949e-06, "loss": 4.1724, "step": 621500 }, { "epoch": 6.79, "learning_rate": 8.660010509721492e-06, "loss": 4.1695, "step": 622000 }, { "epoch": 6.8, "learning_rate": 8.658915747066036e-06, "loss": 4.1681, "step": 622500 }, { "epoch": 6.81, "learning_rate": 8.657820984410582e-06, "loss": 4.1779, "step": 623000 }, { "epoch": 6.81, "learning_rate": 8.656726221755124e-06, "loss": 4.1753, "step": 623500 }, { "epoch": 6.82, "learning_rate": 8.655631459099667e-06, "loss": 4.1771, "step": 624000 }, { "epoch": 6.82, "learning_rate": 8.654536696444211e-06, "loss": 4.1714, "step": 624500 }, { "epoch": 6.83, "learning_rate": 8.653441933788755e-06, "loss": 4.1806, "step": 625000 }, { "epoch": 6.83, "learning_rate": 8.652347171133299e-06, "loss": 4.1684, "step": 625500 }, { "epoch": 6.84, "learning_rate": 8.651252408477842e-06, "loss": 4.164, "step": 626000 }, { "epoch": 6.84, "learning_rate": 8.650157645822386e-06, "loss": 4.1757, "step": 626500 }, { "epoch": 6.85, "learning_rate": 8.64906288316693e-06, "loss": 4.1729, "step": 627000 }, { "epoch": 6.85, "learning_rate": 8.647968120511474e-06, "loss": 4.1731, "step": 627500 }, { "epoch": 6.86, "learning_rate": 8.646873357856017e-06, "loss": 4.1671, "step": 628000 }, { "epoch": 6.87, "learning_rate": 8.645778595200561e-06, "loss": 4.1686, "step": 628500 }, { "epoch": 6.87, "learning_rate": 8.644683832545105e-06, "loss": 4.1645, "step": 629000 }, { "epoch": 6.88, "learning_rate": 8.643589069889649e-06, "loss": 4.1627, "step": 629500 }, { "epoch": 6.88, "learning_rate": 8.642494307234192e-06, "loss": 4.1607, "step": 630000 }, { "epoch": 6.88, "eval_accuracy": 0.35897310975359037, "eval_loss": 3.92728853225708, "eval_runtime": 372.2682, "eval_samples_per_second": 828.317, "eval_steps_per_second": 12.945, "step": 630000 }, { "epoch": 6.89, "learning_rate": 8.641399544578736e-06, "loss": 4.1661, "step": 630500 }, { "epoch": 6.89, "learning_rate": 8.64030478192328e-06, "loss": 4.1655, "step": 631000 }, { "epoch": 6.9, "learning_rate": 8.639210019267824e-06, "loss": 4.1645, "step": 631500 }, { "epoch": 6.9, "learning_rate": 8.638115256612367e-06, "loss": 4.1643, "step": 632000 }, { "epoch": 6.91, "learning_rate": 8.637020493956911e-06, "loss": 4.1649, "step": 632500 }, { "epoch": 6.91, "learning_rate": 8.635925731301455e-06, "loss": 4.1649, "step": 633000 }, { "epoch": 6.92, "learning_rate": 8.634830968645999e-06, "loss": 4.1651, "step": 633500 }, { "epoch": 6.93, "learning_rate": 8.633736205990542e-06, "loss": 4.1646, "step": 634000 }, { "epoch": 6.93, "learning_rate": 8.632641443335086e-06, "loss": 4.166, "step": 634500 }, { "epoch": 6.94, "learning_rate": 8.631546680679628e-06, "loss": 4.1613, "step": 635000 }, { "epoch": 6.94, "learning_rate": 8.630451918024174e-06, "loss": 4.1618, "step": 635500 }, { "epoch": 6.95, "learning_rate": 8.629357155368717e-06, "loss": 4.1733, "step": 636000 }, { "epoch": 6.95, "learning_rate": 8.628262392713261e-06, "loss": 4.1644, "step": 636500 }, { "epoch": 6.96, "learning_rate": 8.627167630057805e-06, "loss": 4.1652, "step": 637000 }, { "epoch": 6.96, "learning_rate": 8.626072867402347e-06, "loss": 4.1573, "step": 637500 }, { "epoch": 6.97, "learning_rate": 8.624978104746892e-06, "loss": 4.1653, "step": 638000 }, { "epoch": 6.97, "learning_rate": 8.623883342091434e-06, "loss": 4.1592, "step": 638500 }, { "epoch": 6.98, "learning_rate": 8.62278857943598e-06, "loss": 4.1599, "step": 639000 }, { "epoch": 6.99, "learning_rate": 8.621693816780523e-06, "loss": 4.1687, "step": 639500 }, { "epoch": 6.99, "learning_rate": 8.620599054125066e-06, "loss": 4.1607, "step": 640000 }, { "epoch": 7.0, "learning_rate": 8.619504291469611e-06, "loss": 4.1614, "step": 640500 }, { "epoch": 7.0, "learning_rate": 8.618409528814153e-06, "loss": 4.1589, "step": 641000 }, { "epoch": 7.01, "learning_rate": 8.617314766158697e-06, "loss": 4.1523, "step": 641500 }, { "epoch": 7.01, "learning_rate": 8.616220003503242e-06, "loss": 4.1574, "step": 642000 }, { "epoch": 7.02, "learning_rate": 8.615125240847784e-06, "loss": 4.1559, "step": 642500 }, { "epoch": 7.02, "learning_rate": 8.61403047819233e-06, "loss": 4.161, "step": 643000 }, { "epoch": 7.03, "learning_rate": 8.612935715536872e-06, "loss": 4.16, "step": 643500 }, { "epoch": 7.03, "learning_rate": 8.611840952881415e-06, "loss": 4.1651, "step": 644000 }, { "epoch": 7.04, "learning_rate": 8.61074619022596e-06, "loss": 4.1636, "step": 644500 }, { "epoch": 7.05, "learning_rate": 8.609651427570503e-06, "loss": 4.1606, "step": 645000 }, { "epoch": 7.05, "learning_rate": 8.608556664915048e-06, "loss": 4.1532, "step": 645500 }, { "epoch": 7.06, "learning_rate": 8.60746190225959e-06, "loss": 4.1568, "step": 646000 }, { "epoch": 7.06, "learning_rate": 8.606367139604134e-06, "loss": 4.1606, "step": 646500 }, { "epoch": 7.07, "learning_rate": 8.605272376948678e-06, "loss": 4.1577, "step": 647000 }, { "epoch": 7.07, "learning_rate": 8.604177614293222e-06, "loss": 4.157, "step": 647500 }, { "epoch": 7.08, "learning_rate": 8.603082851637765e-06, "loss": 4.1634, "step": 648000 }, { "epoch": 7.08, "learning_rate": 8.601988088982309e-06, "loss": 4.1619, "step": 648500 }, { "epoch": 7.09, "learning_rate": 8.600893326326853e-06, "loss": 4.1552, "step": 649000 }, { "epoch": 7.09, "learning_rate": 8.599798563671397e-06, "loss": 4.1579, "step": 649500 }, { "epoch": 7.1, "learning_rate": 8.59870380101594e-06, "loss": 4.1506, "step": 650000 }, { "epoch": 7.11, "learning_rate": 8.597609038360484e-06, "loss": 4.1537, "step": 650500 }, { "epoch": 7.11, "learning_rate": 8.596514275705028e-06, "loss": 4.1568, "step": 651000 }, { "epoch": 7.12, "learning_rate": 8.595419513049572e-06, "loss": 4.1557, "step": 651500 }, { "epoch": 7.12, "learning_rate": 8.594324750394115e-06, "loss": 4.1523, "step": 652000 }, { "epoch": 7.13, "learning_rate": 8.593229987738659e-06, "loss": 4.1547, "step": 652500 }, { "epoch": 7.13, "learning_rate": 8.592135225083203e-06, "loss": 4.1496, "step": 653000 }, { "epoch": 7.14, "learning_rate": 8.591040462427747e-06, "loss": 4.159, "step": 653500 }, { "epoch": 7.14, "learning_rate": 8.58994569977229e-06, "loss": 4.1476, "step": 654000 }, { "epoch": 7.15, "learning_rate": 8.588850937116834e-06, "loss": 4.152, "step": 654500 }, { "epoch": 7.16, "learning_rate": 8.587756174461378e-06, "loss": 4.1484, "step": 655000 }, { "epoch": 7.16, "learning_rate": 8.586661411805922e-06, "loss": 4.1507, "step": 655500 }, { "epoch": 7.17, "learning_rate": 8.585566649150465e-06, "loss": 4.1466, "step": 656000 }, { "epoch": 7.17, "learning_rate": 8.584471886495009e-06, "loss": 4.1536, "step": 656500 }, { "epoch": 7.18, "learning_rate": 8.583377123839553e-06, "loss": 4.148, "step": 657000 }, { "epoch": 7.18, "learning_rate": 8.582282361184095e-06, "loss": 4.1566, "step": 657500 }, { "epoch": 7.19, "learning_rate": 8.58118759852864e-06, "loss": 4.1545, "step": 658000 }, { "epoch": 7.19, "learning_rate": 8.580092835873184e-06, "loss": 4.149, "step": 658500 }, { "epoch": 7.2, "learning_rate": 8.578998073217728e-06, "loss": 4.1533, "step": 659000 }, { "epoch": 7.2, "learning_rate": 8.577903310562271e-06, "loss": 4.1507, "step": 659500 }, { "epoch": 7.21, "learning_rate": 8.576808547906814e-06, "loss": 4.1511, "step": 660000 }, { "epoch": 7.21, "eval_accuracy": 0.36061295861164716, "eval_loss": 3.9130380153656006, "eval_runtime": 405.0854, "eval_samples_per_second": 761.212, "eval_steps_per_second": 11.896, "step": 660000 }, { "epoch": 7.22, "learning_rate": 8.575713785251359e-06, "loss": 4.1472, "step": 660500 }, { "epoch": 7.22, "learning_rate": 8.574619022595901e-06, "loss": 4.1503, "step": 661000 }, { "epoch": 7.23, "learning_rate": 8.573524259940445e-06, "loss": 4.1504, "step": 661500 }, { "epoch": 7.23, "learning_rate": 8.57242949728499e-06, "loss": 4.1518, "step": 662000 }, { "epoch": 7.24, "learning_rate": 8.571334734629532e-06, "loss": 4.1498, "step": 662500 }, { "epoch": 7.24, "learning_rate": 8.570239971974078e-06, "loss": 4.1437, "step": 663000 }, { "epoch": 7.25, "learning_rate": 8.56914520931862e-06, "loss": 4.1455, "step": 663500 }, { "epoch": 7.25, "learning_rate": 8.568050446663163e-06, "loss": 4.1459, "step": 664000 }, { "epoch": 7.26, "learning_rate": 8.566955684007709e-06, "loss": 4.1468, "step": 664500 }, { "epoch": 7.26, "learning_rate": 8.565860921352251e-06, "loss": 4.152, "step": 665000 }, { "epoch": 7.27, "learning_rate": 8.564766158696796e-06, "loss": 4.1408, "step": 665500 }, { "epoch": 7.28, "learning_rate": 8.563671396041338e-06, "loss": 4.147, "step": 666000 }, { "epoch": 7.28, "learning_rate": 8.562576633385882e-06, "loss": 4.1483, "step": 666500 }, { "epoch": 7.29, "learning_rate": 8.561481870730426e-06, "loss": 4.1393, "step": 667000 }, { "epoch": 7.29, "learning_rate": 8.56038710807497e-06, "loss": 4.1487, "step": 667500 }, { "epoch": 7.3, "learning_rate": 8.559292345419513e-06, "loss": 4.1489, "step": 668000 }, { "epoch": 7.3, "learning_rate": 8.558197582764057e-06, "loss": 4.1408, "step": 668500 }, { "epoch": 7.31, "learning_rate": 8.557102820108601e-06, "loss": 4.1451, "step": 669000 }, { "epoch": 7.31, "learning_rate": 8.556008057453145e-06, "loss": 4.1438, "step": 669500 }, { "epoch": 7.32, "learning_rate": 8.554913294797688e-06, "loss": 4.1441, "step": 670000 }, { "epoch": 7.32, "learning_rate": 8.553818532142232e-06, "loss": 4.1428, "step": 670500 }, { "epoch": 7.33, "learning_rate": 8.552723769486776e-06, "loss": 4.1427, "step": 671000 }, { "epoch": 7.34, "learning_rate": 8.55162900683132e-06, "loss": 4.1398, "step": 671500 }, { "epoch": 7.34, "learning_rate": 8.550534244175863e-06, "loss": 4.1395, "step": 672000 }, { "epoch": 7.35, "learning_rate": 8.549439481520407e-06, "loss": 4.1393, "step": 672500 }, { "epoch": 7.35, "learning_rate": 8.54834471886495e-06, "loss": 4.1376, "step": 673000 }, { "epoch": 7.36, "learning_rate": 8.547249956209495e-06, "loss": 4.1446, "step": 673500 }, { "epoch": 7.36, "learning_rate": 8.546155193554038e-06, "loss": 4.1407, "step": 674000 }, { "epoch": 7.37, "learning_rate": 8.545060430898582e-06, "loss": 4.1429, "step": 674500 }, { "epoch": 7.37, "learning_rate": 8.543965668243126e-06, "loss": 4.1434, "step": 675000 }, { "epoch": 7.38, "learning_rate": 8.54287090558767e-06, "loss": 4.136, "step": 675500 }, { "epoch": 7.38, "learning_rate": 8.541776142932213e-06, "loss": 4.1412, "step": 676000 }, { "epoch": 7.39, "learning_rate": 8.540681380276757e-06, "loss": 4.1408, "step": 676500 }, { "epoch": 7.4, "learning_rate": 8.5395866176213e-06, "loss": 4.1442, "step": 677000 }, { "epoch": 7.4, "learning_rate": 8.538491854965845e-06, "loss": 4.1358, "step": 677500 }, { "epoch": 7.41, "learning_rate": 8.537397092310388e-06, "loss": 4.1413, "step": 678000 }, { "epoch": 7.41, "learning_rate": 8.536302329654932e-06, "loss": 4.1447, "step": 678500 }, { "epoch": 7.42, "learning_rate": 8.535207566999476e-06, "loss": 4.1406, "step": 679000 }, { "epoch": 7.42, "learning_rate": 8.53411280434402e-06, "loss": 4.132, "step": 679500 }, { "epoch": 7.43, "learning_rate": 8.533018041688562e-06, "loss": 4.1336, "step": 680000 }, { "epoch": 7.43, "learning_rate": 8.531923279033107e-06, "loss": 4.136, "step": 680500 }, { "epoch": 7.44, "learning_rate": 8.53082851637765e-06, "loss": 4.1338, "step": 681000 }, { "epoch": 7.44, "learning_rate": 8.529733753722193e-06, "loss": 4.1341, "step": 681500 }, { "epoch": 7.45, "learning_rate": 8.528638991066738e-06, "loss": 4.1325, "step": 682000 }, { "epoch": 7.46, "learning_rate": 8.52754422841128e-06, "loss": 4.1388, "step": 682500 }, { "epoch": 7.46, "learning_rate": 8.526449465755826e-06, "loss": 4.1381, "step": 683000 }, { "epoch": 7.47, "learning_rate": 8.525354703100368e-06, "loss": 4.1384, "step": 683500 }, { "epoch": 7.47, "learning_rate": 8.524259940444911e-06, "loss": 4.1324, "step": 684000 }, { "epoch": 7.48, "learning_rate": 8.523165177789457e-06, "loss": 4.1371, "step": 684500 }, { "epoch": 7.48, "learning_rate": 8.522070415133999e-06, "loss": 4.1341, "step": 685000 }, { "epoch": 7.49, "learning_rate": 8.520975652478544e-06, "loss": 4.1358, "step": 685500 }, { "epoch": 7.49, "learning_rate": 8.519880889823086e-06, "loss": 4.1356, "step": 686000 }, { "epoch": 7.5, "learning_rate": 8.51878612716763e-06, "loss": 4.1326, "step": 686500 }, { "epoch": 7.5, "learning_rate": 8.517691364512174e-06, "loss": 4.141, "step": 687000 }, { "epoch": 7.51, "learning_rate": 8.516596601856718e-06, "loss": 4.1375, "step": 687500 }, { "epoch": 7.52, "learning_rate": 8.515501839201263e-06, "loss": 4.1371, "step": 688000 }, { "epoch": 7.52, "learning_rate": 8.514407076545805e-06, "loss": 4.1308, "step": 688500 }, { "epoch": 7.53, "learning_rate": 8.513312313890349e-06, "loss": 4.1361, "step": 689000 }, { "epoch": 7.53, "learning_rate": 8.512217551234893e-06, "loss": 4.1353, "step": 689500 }, { "epoch": 7.54, "learning_rate": 8.511122788579436e-06, "loss": 4.1335, "step": 690000 }, { "epoch": 7.54, "eval_accuracy": 0.36215195760224433, "eval_loss": 3.8971145153045654, "eval_runtime": 370.4132, "eval_samples_per_second": 832.465, "eval_steps_per_second": 13.01, "step": 690000 }, { "epoch": 7.54, "learning_rate": 8.51002802592398e-06, "loss": 4.1272, "step": 690500 }, { "epoch": 7.55, "learning_rate": 8.508933263268524e-06, "loss": 4.1283, "step": 691000 }, { "epoch": 7.55, "learning_rate": 8.507838500613068e-06, "loss": 4.1317, "step": 691500 }, { "epoch": 7.56, "learning_rate": 8.506743737957611e-06, "loss": 4.1308, "step": 692000 }, { "epoch": 7.56, "learning_rate": 8.505648975302155e-06, "loss": 4.1342, "step": 692500 }, { "epoch": 7.57, "learning_rate": 8.504554212646699e-06, "loss": 4.1308, "step": 693000 }, { "epoch": 7.58, "learning_rate": 8.503459449991243e-06, "loss": 4.1276, "step": 693500 }, { "epoch": 7.58, "learning_rate": 8.502364687335786e-06, "loss": 4.1322, "step": 694000 }, { "epoch": 7.59, "learning_rate": 8.50126992468033e-06, "loss": 4.1328, "step": 694500 }, { "epoch": 7.59, "learning_rate": 8.500175162024874e-06, "loss": 4.1367, "step": 695000 }, { "epoch": 7.6, "learning_rate": 8.499080399369418e-06, "loss": 4.1293, "step": 695500 }, { "epoch": 7.6, "learning_rate": 8.497985636713961e-06, "loss": 4.1304, "step": 696000 }, { "epoch": 7.61, "learning_rate": 8.496890874058505e-06, "loss": 4.1289, "step": 696500 }, { "epoch": 7.61, "learning_rate": 8.495796111403049e-06, "loss": 4.1283, "step": 697000 }, { "epoch": 7.62, "learning_rate": 8.494701348747593e-06, "loss": 4.136, "step": 697500 }, { "epoch": 7.62, "learning_rate": 8.493606586092136e-06, "loss": 4.1252, "step": 698000 }, { "epoch": 7.63, "learning_rate": 8.49251182343668e-06, "loss": 4.1329, "step": 698500 }, { "epoch": 7.64, "learning_rate": 8.491417060781224e-06, "loss": 4.1302, "step": 699000 }, { "epoch": 7.64, "learning_rate": 8.490322298125767e-06, "loss": 4.1287, "step": 699500 }, { "epoch": 7.65, "learning_rate": 8.48922753547031e-06, "loss": 4.1253, "step": 700000 }, { "epoch": 7.65, "learning_rate": 8.488132772814855e-06, "loss": 4.1333, "step": 700500 }, { "epoch": 7.66, "learning_rate": 8.487038010159399e-06, "loss": 4.1293, "step": 701000 }, { "epoch": 7.66, "learning_rate": 8.48594324750394e-06, "loss": 4.1265, "step": 701500 }, { "epoch": 7.67, "learning_rate": 8.484848484848486e-06, "loss": 4.1189, "step": 702000 }, { "epoch": 7.67, "learning_rate": 8.483753722193028e-06, "loss": 4.1226, "step": 702500 }, { "epoch": 7.68, "learning_rate": 8.482658959537574e-06, "loss": 4.1286, "step": 703000 }, { "epoch": 7.68, "learning_rate": 8.481564196882117e-06, "loss": 4.1307, "step": 703500 }, { "epoch": 7.69, "learning_rate": 8.48046943422666e-06, "loss": 4.1316, "step": 704000 }, { "epoch": 7.7, "learning_rate": 8.479374671571205e-06, "loss": 4.1238, "step": 704500 }, { "epoch": 7.7, "learning_rate": 8.478279908915747e-06, "loss": 4.1221, "step": 705000 }, { "epoch": 7.71, "learning_rate": 8.477185146260292e-06, "loss": 4.1243, "step": 705500 }, { "epoch": 7.71, "learning_rate": 8.476090383604834e-06, "loss": 4.1273, "step": 706000 }, { "epoch": 7.72, "learning_rate": 8.474995620949378e-06, "loss": 4.1229, "step": 706500 }, { "epoch": 7.72, "learning_rate": 8.473900858293924e-06, "loss": 4.1247, "step": 707000 }, { "epoch": 7.73, "learning_rate": 8.472806095638466e-06, "loss": 4.125, "step": 707500 }, { "epoch": 7.73, "learning_rate": 8.471711332983011e-06, "loss": 4.122, "step": 708000 }, { "epoch": 7.74, "learning_rate": 8.470616570327553e-06, "loss": 4.1269, "step": 708500 }, { "epoch": 7.74, "learning_rate": 8.469521807672097e-06, "loss": 4.1285, "step": 709000 }, { "epoch": 7.75, "learning_rate": 8.46842704501664e-06, "loss": 4.1246, "step": 709500 }, { "epoch": 7.76, "learning_rate": 8.467332282361184e-06, "loss": 4.1248, "step": 710000 }, { "epoch": 7.76, "learning_rate": 8.466237519705728e-06, "loss": 4.1193, "step": 710500 }, { "epoch": 7.77, "learning_rate": 8.465142757050272e-06, "loss": 4.1239, "step": 711000 }, { "epoch": 7.77, "learning_rate": 8.464047994394816e-06, "loss": 4.1176, "step": 711500 }, { "epoch": 7.78, "learning_rate": 8.46295323173936e-06, "loss": 4.1161, "step": 712000 }, { "epoch": 7.78, "learning_rate": 8.461858469083903e-06, "loss": 4.1234, "step": 712500 }, { "epoch": 7.79, "learning_rate": 8.460763706428447e-06, "loss": 4.1195, "step": 713000 }, { "epoch": 7.79, "learning_rate": 8.45966894377299e-06, "loss": 4.1128, "step": 713500 }, { "epoch": 7.8, "learning_rate": 8.458574181117534e-06, "loss": 4.1276, "step": 714000 }, { "epoch": 7.8, "learning_rate": 8.457479418462078e-06, "loss": 4.1213, "step": 714500 }, { "epoch": 7.81, "learning_rate": 8.456384655806622e-06, "loss": 4.1175, "step": 715000 }, { "epoch": 7.82, "learning_rate": 8.455289893151166e-06, "loss": 4.1188, "step": 715500 }, { "epoch": 7.82, "learning_rate": 8.45419513049571e-06, "loss": 4.1171, "step": 716000 }, { "epoch": 7.83, "learning_rate": 8.453100367840253e-06, "loss": 4.1169, "step": 716500 }, { "epoch": 7.83, "learning_rate": 8.452005605184797e-06, "loss": 4.1147, "step": 717000 }, { "epoch": 7.84, "learning_rate": 8.45091084252934e-06, "loss": 4.1127, "step": 717500 }, { "epoch": 7.84, "learning_rate": 8.449816079873884e-06, "loss": 4.1169, "step": 718000 }, { "epoch": 7.85, "learning_rate": 8.448721317218428e-06, "loss": 4.1082, "step": 718500 }, { "epoch": 7.85, "learning_rate": 8.447626554562972e-06, "loss": 4.1162, "step": 719000 }, { "epoch": 7.86, "learning_rate": 8.446531791907516e-06, "loss": 4.1179, "step": 719500 }, { "epoch": 7.87, "learning_rate": 8.44543702925206e-06, "loss": 4.1158, "step": 720000 }, { "epoch": 7.87, "eval_accuracy": 0.3641979743409406, "eval_loss": 3.8798394203186035, "eval_runtime": 406.6473, "eval_samples_per_second": 758.289, "eval_steps_per_second": 11.851, "step": 720000 }, { "epoch": 7.87, "learning_rate": 8.444342266596603e-06, "loss": 4.1193, "step": 720500 }, { "epoch": 7.88, "learning_rate": 8.443247503941147e-06, "loss": 4.115, "step": 721000 }, { "epoch": 7.88, "learning_rate": 8.442152741285689e-06, "loss": 4.1136, "step": 721500 }, { "epoch": 7.89, "learning_rate": 8.441057978630234e-06, "loss": 4.1133, "step": 722000 }, { "epoch": 7.89, "learning_rate": 8.439963215974776e-06, "loss": 4.1154, "step": 722500 }, { "epoch": 7.9, "learning_rate": 8.438868453319322e-06, "loss": 4.1144, "step": 723000 }, { "epoch": 7.9, "learning_rate": 8.437773690663865e-06, "loss": 4.118, "step": 723500 }, { "epoch": 7.91, "learning_rate": 8.436678928008407e-06, "loss": 4.1083, "step": 724000 }, { "epoch": 7.91, "learning_rate": 8.435584165352953e-06, "loss": 4.1087, "step": 724500 }, { "epoch": 7.92, "learning_rate": 8.434489402697495e-06, "loss": 4.1118, "step": 725000 }, { "epoch": 7.93, "learning_rate": 8.43339464004204e-06, "loss": 4.1149, "step": 725500 }, { "epoch": 7.93, "learning_rate": 8.432299877386582e-06, "loss": 4.1126, "step": 726000 }, { "epoch": 7.94, "learning_rate": 8.431205114731126e-06, "loss": 4.1158, "step": 726500 }, { "epoch": 7.94, "learning_rate": 8.430110352075672e-06, "loss": 4.1125, "step": 727000 }, { "epoch": 7.95, "learning_rate": 8.429015589420214e-06, "loss": 4.1092, "step": 727500 }, { "epoch": 7.95, "learning_rate": 8.427920826764759e-06, "loss": 4.1053, "step": 728000 }, { "epoch": 7.96, "learning_rate": 8.426826064109301e-06, "loss": 4.1127, "step": 728500 }, { "epoch": 7.96, "learning_rate": 8.425731301453845e-06, "loss": 4.1119, "step": 729000 }, { "epoch": 7.97, "learning_rate": 8.42463653879839e-06, "loss": 4.1072, "step": 729500 }, { "epoch": 7.97, "learning_rate": 8.423541776142932e-06, "loss": 4.1113, "step": 730000 }, { "epoch": 7.98, "learning_rate": 8.422447013487476e-06, "loss": 4.1098, "step": 730500 }, { "epoch": 7.99, "learning_rate": 8.42135225083202e-06, "loss": 4.1154, "step": 731000 }, { "epoch": 7.99, "learning_rate": 8.420257488176564e-06, "loss": 4.1136, "step": 731500 }, { "epoch": 8.0, "learning_rate": 8.419162725521107e-06, "loss": 4.1149, "step": 732000 }, { "epoch": 8.0, "learning_rate": 8.418067962865651e-06, "loss": 4.1115, "step": 732500 }, { "epoch": 8.01, "learning_rate": 8.416973200210195e-06, "loss": 4.1107, "step": 733000 }, { "epoch": 8.01, "learning_rate": 8.415878437554739e-06, "loss": 4.1078, "step": 733500 }, { "epoch": 8.02, "learning_rate": 8.414783674899282e-06, "loss": 4.1102, "step": 734000 }, { "epoch": 8.02, "learning_rate": 8.413688912243826e-06, "loss": 4.1054, "step": 734500 }, { "epoch": 8.03, "learning_rate": 8.41259414958837e-06, "loss": 4.1076, "step": 735000 }, { "epoch": 8.03, "learning_rate": 8.411499386932914e-06, "loss": 4.1064, "step": 735500 }, { "epoch": 8.04, "learning_rate": 8.410404624277457e-06, "loss": 4.1016, "step": 736000 }, { "epoch": 8.05, "learning_rate": 8.409309861622001e-06, "loss": 4.1095, "step": 736500 }, { "epoch": 8.05, "learning_rate": 8.408215098966545e-06, "loss": 4.1105, "step": 737000 }, { "epoch": 8.06, "learning_rate": 8.407120336311089e-06, "loss": 4.1114, "step": 737500 }, { "epoch": 8.06, "learning_rate": 8.406025573655632e-06, "loss": 4.1062, "step": 738000 }, { "epoch": 8.07, "learning_rate": 8.404930811000176e-06, "loss": 4.0972, "step": 738500 }, { "epoch": 8.07, "learning_rate": 8.40383604834472e-06, "loss": 4.1001, "step": 739000 }, { "epoch": 8.08, "learning_rate": 8.402741285689264e-06, "loss": 4.1067, "step": 739500 }, { "epoch": 8.08, "learning_rate": 8.401646523033807e-06, "loss": 4.1075, "step": 740000 }, { "epoch": 8.09, "learning_rate": 8.400551760378351e-06, "loss": 4.1027, "step": 740500 }, { "epoch": 8.09, "learning_rate": 8.399456997722895e-06, "loss": 4.1058, "step": 741000 }, { "epoch": 8.1, "learning_rate": 8.398362235067438e-06, "loss": 4.1035, "step": 741500 }, { "epoch": 8.11, "learning_rate": 8.397267472411982e-06, "loss": 4.1032, "step": 742000 }, { "epoch": 8.11, "learning_rate": 8.396172709756526e-06, "loss": 4.0998, "step": 742500 }, { "epoch": 8.12, "learning_rate": 8.39507794710107e-06, "loss": 4.1081, "step": 743000 }, { "epoch": 8.12, "learning_rate": 8.393983184445613e-06, "loss": 4.0962, "step": 743500 }, { "epoch": 8.13, "learning_rate": 8.392888421790155e-06, "loss": 4.1031, "step": 744000 }, { "epoch": 8.13, "learning_rate": 8.391793659134701e-06, "loss": 4.0981, "step": 744500 }, { "epoch": 8.14, "learning_rate": 8.390698896479243e-06, "loss": 4.1012, "step": 745000 }, { "epoch": 8.14, "learning_rate": 8.389604133823788e-06, "loss": 4.0952, "step": 745500 }, { "epoch": 8.15, "learning_rate": 8.388509371168332e-06, "loss": 4.1038, "step": 746000 }, { "epoch": 8.15, "learning_rate": 8.387414608512874e-06, "loss": 4.1041, "step": 746500 }, { "epoch": 8.16, "learning_rate": 8.38631984585742e-06, "loss": 4.1021, "step": 747000 }, { "epoch": 8.17, "learning_rate": 8.385225083201962e-06, "loss": 4.1062, "step": 747500 }, { "epoch": 8.17, "learning_rate": 8.384130320546507e-06, "loss": 4.1048, "step": 748000 }, { "epoch": 8.18, "learning_rate": 8.38303555789105e-06, "loss": 4.0978, "step": 748500 }, { "epoch": 8.18, "learning_rate": 8.381940795235593e-06, "loss": 4.101, "step": 749000 }, { "epoch": 8.19, "learning_rate": 8.380846032580138e-06, "loss": 4.1023, "step": 749500 }, { "epoch": 8.19, "learning_rate": 8.37975126992468e-06, "loss": 4.097, "step": 750000 }, { "epoch": 8.19, "eval_accuracy": 0.36634581732509974, "eval_loss": 3.863537311553955, "eval_runtime": 392.1262, "eval_samples_per_second": 786.369, "eval_steps_per_second": 12.289, "step": 750000 }, { "epoch": 8.2, "learning_rate": 8.378656507269224e-06, "loss": 4.0977, "step": 750500 }, { "epoch": 8.2, "learning_rate": 8.377561744613768e-06, "loss": 4.0912, "step": 751000 }, { "epoch": 8.21, "learning_rate": 8.376466981958312e-06, "loss": 4.1035, "step": 751500 }, { "epoch": 8.21, "learning_rate": 8.375372219302857e-06, "loss": 4.1034, "step": 752000 }, { "epoch": 8.22, "learning_rate": 8.374277456647399e-06, "loss": 4.1022, "step": 752500 }, { "epoch": 8.23, "learning_rate": 8.373182693991943e-06, "loss": 4.102, "step": 753000 }, { "epoch": 8.23, "learning_rate": 8.372087931336487e-06, "loss": 4.0954, "step": 753500 }, { "epoch": 8.24, "learning_rate": 8.37099316868103e-06, "loss": 4.1003, "step": 754000 }, { "epoch": 8.24, "learning_rate": 8.369898406025574e-06, "loss": 4.0885, "step": 754500 }, { "epoch": 8.25, "learning_rate": 8.368803643370118e-06, "loss": 4.0951, "step": 755000 }, { "epoch": 8.25, "learning_rate": 8.367708880714662e-06, "loss": 4.0943, "step": 755500 }, { "epoch": 8.26, "learning_rate": 8.366614118059205e-06, "loss": 4.0965, "step": 756000 }, { "epoch": 8.26, "learning_rate": 8.365519355403749e-06, "loss": 4.0922, "step": 756500 }, { "epoch": 8.27, "learning_rate": 8.364424592748293e-06, "loss": 4.0916, "step": 757000 }, { "epoch": 8.27, "learning_rate": 8.363329830092837e-06, "loss": 4.0939, "step": 757500 }, { "epoch": 8.28, "learning_rate": 8.36223506743738e-06, "loss": 4.0928, "step": 758000 }, { "epoch": 8.29, "learning_rate": 8.361140304781924e-06, "loss": 4.0977, "step": 758500 }, { "epoch": 8.29, "learning_rate": 8.360045542126468e-06, "loss": 4.096, "step": 759000 }, { "epoch": 8.3, "learning_rate": 8.358950779471012e-06, "loss": 4.093, "step": 759500 }, { "epoch": 8.3, "learning_rate": 8.357856016815555e-06, "loss": 4.0934, "step": 760000 }, { "epoch": 8.31, "learning_rate": 8.356761254160099e-06, "loss": 4.0885, "step": 760500 }, { "epoch": 8.31, "learning_rate": 8.355666491504643e-06, "loss": 4.0952, "step": 761000 }, { "epoch": 8.32, "learning_rate": 8.354571728849186e-06, "loss": 4.1021, "step": 761500 }, { "epoch": 8.32, "learning_rate": 8.35347696619373e-06, "loss": 4.0914, "step": 762000 }, { "epoch": 8.33, "learning_rate": 8.352382203538274e-06, "loss": 4.0935, "step": 762500 }, { "epoch": 8.33, "learning_rate": 8.351287440882818e-06, "loss": 4.0954, "step": 763000 }, { "epoch": 8.34, "learning_rate": 8.350192678227361e-06, "loss": 4.0942, "step": 763500 }, { "epoch": 8.35, "learning_rate": 8.349097915571904e-06, "loss": 4.0849, "step": 764000 }, { "epoch": 8.35, "learning_rate": 8.348003152916449e-06, "loss": 4.0976, "step": 764500 }, { "epoch": 8.36, "learning_rate": 8.346908390260993e-06, "loss": 4.0936, "step": 765000 }, { "epoch": 8.36, "learning_rate": 8.345813627605536e-06, "loss": 4.0895, "step": 765500 }, { "epoch": 8.37, "learning_rate": 8.34471886495008e-06, "loss": 4.0899, "step": 766000 }, { "epoch": 8.37, "learning_rate": 8.343624102294622e-06, "loss": 4.0894, "step": 766500 }, { "epoch": 8.38, "learning_rate": 8.342529339639168e-06, "loss": 4.0868, "step": 767000 }, { "epoch": 8.38, "learning_rate": 8.34143457698371e-06, "loss": 4.1046, "step": 767500 }, { "epoch": 8.39, "learning_rate": 8.340339814328255e-06, "loss": 4.0868, "step": 768000 }, { "epoch": 8.39, "learning_rate": 8.339245051672799e-06, "loss": 4.0866, "step": 768500 }, { "epoch": 8.4, "learning_rate": 8.338150289017341e-06, "loss": 4.0912, "step": 769000 }, { "epoch": 8.41, "learning_rate": 8.337055526361886e-06, "loss": 4.0871, "step": 769500 }, { "epoch": 8.41, "learning_rate": 8.335960763706428e-06, "loss": 4.0921, "step": 770000 }, { "epoch": 8.42, "learning_rate": 8.334866001050972e-06, "loss": 4.0896, "step": 770500 }, { "epoch": 8.42, "learning_rate": 8.333771238395516e-06, "loss": 4.0921, "step": 771000 }, { "epoch": 8.43, "learning_rate": 8.33267647574006e-06, "loss": 4.0933, "step": 771500 }, { "epoch": 8.43, "learning_rate": 8.331581713084605e-06, "loss": 4.092, "step": 772000 }, { "epoch": 8.44, "learning_rate": 8.330486950429147e-06, "loss": 4.0911, "step": 772500 }, { "epoch": 8.44, "learning_rate": 8.329392187773691e-06, "loss": 4.0849, "step": 773000 }, { "epoch": 8.45, "learning_rate": 8.328297425118235e-06, "loss": 4.0847, "step": 773500 }, { "epoch": 8.45, "learning_rate": 8.327202662462778e-06, "loss": 4.0816, "step": 774000 }, { "epoch": 8.46, "learning_rate": 8.326107899807322e-06, "loss": 4.0877, "step": 774500 }, { "epoch": 8.47, "learning_rate": 8.325013137151866e-06, "loss": 4.0834, "step": 775000 }, { "epoch": 8.47, "learning_rate": 8.32391837449641e-06, "loss": 4.0894, "step": 775500 }, { "epoch": 8.48, "learning_rate": 8.322823611840953e-06, "loss": 4.0789, "step": 776000 }, { "epoch": 8.48, "learning_rate": 8.321728849185497e-06, "loss": 4.0943, "step": 776500 }, { "epoch": 8.49, "learning_rate": 8.32063408653004e-06, "loss": 4.0833, "step": 777000 }, { "epoch": 8.49, "learning_rate": 8.319539323874585e-06, "loss": 4.0923, "step": 777500 }, { "epoch": 8.5, "learning_rate": 8.318444561219128e-06, "loss": 4.0876, "step": 778000 }, { "epoch": 8.5, "learning_rate": 8.317349798563672e-06, "loss": 4.0871, "step": 778500 }, { "epoch": 8.51, "learning_rate": 8.316255035908216e-06, "loss": 4.0859, "step": 779000 }, { "epoch": 8.52, "learning_rate": 8.31516027325276e-06, "loss": 4.0819, "step": 779500 }, { "epoch": 8.52, "learning_rate": 8.314065510597303e-06, "loss": 4.0831, "step": 780000 }, { "epoch": 8.52, "eval_accuracy": 0.3678912442329121, "eval_loss": 3.8493576049804688, "eval_runtime": 372.879, "eval_samples_per_second": 826.96, "eval_steps_per_second": 12.924, "step": 780000 }, { "epoch": 8.53, "learning_rate": 8.312970747941847e-06, "loss": 4.084, "step": 780500 }, { "epoch": 8.53, "learning_rate": 8.31187598528639e-06, "loss": 4.0865, "step": 781000 }, { "epoch": 8.54, "learning_rate": 8.310781222630934e-06, "loss": 4.0872, "step": 781500 }, { "epoch": 8.54, "learning_rate": 8.309686459975478e-06, "loss": 4.0863, "step": 782000 }, { "epoch": 8.55, "learning_rate": 8.308591697320022e-06, "loss": 4.0775, "step": 782500 }, { "epoch": 8.55, "learning_rate": 8.307496934664566e-06, "loss": 4.0818, "step": 783000 }, { "epoch": 8.56, "learning_rate": 8.30640217200911e-06, "loss": 4.0759, "step": 783500 }, { "epoch": 8.56, "learning_rate": 8.305307409353652e-06, "loss": 4.0822, "step": 784000 }, { "epoch": 8.57, "learning_rate": 8.304212646698197e-06, "loss": 4.0823, "step": 784500 }, { "epoch": 8.58, "learning_rate": 8.30311788404274e-06, "loss": 4.0778, "step": 785000 }, { "epoch": 8.58, "learning_rate": 8.302023121387284e-06, "loss": 4.078, "step": 785500 }, { "epoch": 8.59, "learning_rate": 8.300928358731828e-06, "loss": 4.0742, "step": 786000 }, { "epoch": 8.59, "learning_rate": 8.29983359607637e-06, "loss": 4.076, "step": 786500 }, { "epoch": 8.6, "learning_rate": 8.298738833420916e-06, "loss": 4.0805, "step": 787000 }, { "epoch": 8.6, "learning_rate": 8.297644070765458e-06, "loss": 4.0854, "step": 787500 }, { "epoch": 8.61, "learning_rate": 8.296549308110003e-06, "loss": 4.0796, "step": 788000 }, { "epoch": 8.61, "learning_rate": 8.295454545454547e-06, "loss": 4.0889, "step": 788500 }, { "epoch": 8.62, "learning_rate": 8.294359782799089e-06, "loss": 4.082, "step": 789000 }, { "epoch": 8.62, "learning_rate": 8.293265020143634e-06, "loss": 4.0715, "step": 789500 }, { "epoch": 8.63, "learning_rate": 8.292170257488176e-06, "loss": 4.0833, "step": 790000 }, { "epoch": 8.64, "learning_rate": 8.29107549483272e-06, "loss": 4.0767, "step": 790500 }, { "epoch": 8.64, "learning_rate": 8.289980732177266e-06, "loss": 4.0812, "step": 791000 }, { "epoch": 8.65, "learning_rate": 8.288885969521808e-06, "loss": 4.0725, "step": 791500 }, { "epoch": 8.65, "learning_rate": 8.287791206866353e-06, "loss": 4.0757, "step": 792000 }, { "epoch": 8.66, "learning_rate": 8.286696444210895e-06, "loss": 4.0727, "step": 792500 }, { "epoch": 8.66, "learning_rate": 8.285601681555439e-06, "loss": 4.0805, "step": 793000 }, { "epoch": 8.67, "learning_rate": 8.284506918899983e-06, "loss": 4.0808, "step": 793500 }, { "epoch": 8.67, "learning_rate": 8.283412156244526e-06, "loss": 4.0708, "step": 794000 }, { "epoch": 8.68, "learning_rate": 8.282317393589072e-06, "loss": 4.0748, "step": 794500 }, { "epoch": 8.68, "learning_rate": 8.281222630933614e-06, "loss": 4.0785, "step": 795000 }, { "epoch": 8.69, "learning_rate": 8.280127868278158e-06, "loss": 4.0723, "step": 795500 }, { "epoch": 8.7, "learning_rate": 8.279033105622701e-06, "loss": 4.0779, "step": 796000 }, { "epoch": 8.7, "learning_rate": 8.277938342967245e-06, "loss": 4.0734, "step": 796500 }, { "epoch": 8.71, "learning_rate": 8.276843580311789e-06, "loss": 4.0738, "step": 797000 }, { "epoch": 8.71, "learning_rate": 8.275748817656333e-06, "loss": 4.0765, "step": 797500 }, { "epoch": 8.72, "learning_rate": 8.274654055000876e-06, "loss": 4.0774, "step": 798000 }, { "epoch": 8.72, "learning_rate": 8.27355929234542e-06, "loss": 4.0711, "step": 798500 }, { "epoch": 8.73, "learning_rate": 8.272464529689964e-06, "loss": 4.0702, "step": 799000 }, { "epoch": 8.73, "learning_rate": 8.271369767034508e-06, "loss": 4.0772, "step": 799500 }, { "epoch": 8.74, "learning_rate": 8.270275004379051e-06, "loss": 4.068, "step": 800000 }, { "epoch": 8.74, "learning_rate": 8.269180241723595e-06, "loss": 4.073, "step": 800500 }, { "epoch": 8.75, "learning_rate": 8.268085479068139e-06, "loss": 4.0685, "step": 801000 }, { "epoch": 8.76, "learning_rate": 8.266990716412682e-06, "loss": 4.0721, "step": 801500 }, { "epoch": 8.76, "learning_rate": 8.265895953757226e-06, "loss": 4.0739, "step": 802000 }, { "epoch": 8.77, "learning_rate": 8.26480119110177e-06, "loss": 4.0737, "step": 802500 }, { "epoch": 8.77, "learning_rate": 8.263706428446314e-06, "loss": 4.0704, "step": 803000 }, { "epoch": 8.78, "learning_rate": 8.262611665790857e-06, "loss": 4.0764, "step": 803500 }, { "epoch": 8.78, "learning_rate": 8.261516903135401e-06, "loss": 4.0764, "step": 804000 }, { "epoch": 8.79, "learning_rate": 8.260422140479945e-06, "loss": 4.0747, "step": 804500 }, { "epoch": 8.79, "learning_rate": 8.259327377824489e-06, "loss": 4.0671, "step": 805000 }, { "epoch": 8.8, "learning_rate": 8.258232615169032e-06, "loss": 4.0691, "step": 805500 }, { "epoch": 8.8, "learning_rate": 8.257137852513576e-06, "loss": 4.0735, "step": 806000 }, { "epoch": 8.81, "learning_rate": 8.256043089858118e-06, "loss": 4.0712, "step": 806500 }, { "epoch": 8.82, "learning_rate": 8.254948327202664e-06, "loss": 4.0703, "step": 807000 }, { "epoch": 8.82, "learning_rate": 8.253853564547207e-06, "loss": 4.0751, "step": 807500 }, { "epoch": 8.83, "learning_rate": 8.252758801891751e-06, "loss": 4.0703, "step": 808000 }, { "epoch": 8.83, "learning_rate": 8.251664039236295e-06, "loss": 4.0613, "step": 808500 }, { "epoch": 8.84, "learning_rate": 8.250569276580837e-06, "loss": 4.0685, "step": 809000 }, { "epoch": 8.84, "learning_rate": 8.249474513925382e-06, "loss": 4.0715, "step": 809500 }, { "epoch": 8.85, "learning_rate": 8.248379751269924e-06, "loss": 4.0756, "step": 810000 }, { "epoch": 8.85, "eval_accuracy": 0.3696254287201218, "eval_loss": 3.8334319591522217, "eval_runtime": 359.9201, "eval_samples_per_second": 856.735, "eval_steps_per_second": 13.389, "step": 810000 }, { "epoch": 8.85, "learning_rate": 8.24728498861447e-06, "loss": 4.0739, "step": 810500 }, { "epoch": 8.86, "learning_rate": 8.246190225959014e-06, "loss": 4.0678, "step": 811000 }, { "epoch": 8.86, "learning_rate": 8.245095463303556e-06, "loss": 4.0601, "step": 811500 }, { "epoch": 8.87, "learning_rate": 8.244000700648101e-06, "loss": 4.0736, "step": 812000 }, { "epoch": 8.88, "learning_rate": 8.242905937992643e-06, "loss": 4.062, "step": 812500 }, { "epoch": 8.88, "learning_rate": 8.241811175337187e-06, "loss": 4.0656, "step": 813000 }, { "epoch": 8.89, "learning_rate": 8.24071641268173e-06, "loss": 4.0644, "step": 813500 }, { "epoch": 8.89, "learning_rate": 8.239621650026274e-06, "loss": 4.0646, "step": 814000 }, { "epoch": 8.9, "learning_rate": 8.23852688737082e-06, "loss": 4.0703, "step": 814500 }, { "epoch": 8.9, "learning_rate": 8.237432124715362e-06, "loss": 4.0711, "step": 815000 }, { "epoch": 8.91, "learning_rate": 8.236337362059906e-06, "loss": 4.0674, "step": 815500 }, { "epoch": 8.91, "learning_rate": 8.23524259940445e-06, "loss": 4.067, "step": 816000 }, { "epoch": 8.92, "learning_rate": 8.234147836748993e-06, "loss": 4.071, "step": 816500 }, { "epoch": 8.92, "learning_rate": 8.233053074093539e-06, "loss": 4.0701, "step": 817000 }, { "epoch": 8.93, "learning_rate": 8.23195831143808e-06, "loss": 4.0679, "step": 817500 }, { "epoch": 8.94, "learning_rate": 8.230863548782624e-06, "loss": 4.0676, "step": 818000 }, { "epoch": 8.94, "learning_rate": 8.229768786127168e-06, "loss": 4.0739, "step": 818500 }, { "epoch": 8.95, "learning_rate": 8.228674023471712e-06, "loss": 4.0618, "step": 819000 }, { "epoch": 8.95, "learning_rate": 8.227579260816256e-06, "loss": 4.0623, "step": 819500 }, { "epoch": 8.96, "learning_rate": 8.2264844981608e-06, "loss": 4.0708, "step": 820000 }, { "epoch": 8.96, "learning_rate": 8.225389735505343e-06, "loss": 4.0735, "step": 820500 }, { "epoch": 8.97, "learning_rate": 8.224294972849887e-06, "loss": 4.068, "step": 821000 }, { "epoch": 8.97, "learning_rate": 8.22320021019443e-06, "loss": 4.0648, "step": 821500 }, { "epoch": 8.98, "learning_rate": 8.222105447538974e-06, "loss": 4.0611, "step": 822000 }, { "epoch": 8.98, "learning_rate": 8.221010684883518e-06, "loss": 4.0658, "step": 822500 }, { "epoch": 8.99, "learning_rate": 8.219915922228062e-06, "loss": 4.0639, "step": 823000 }, { "epoch": 9.0, "learning_rate": 8.218821159572605e-06, "loss": 4.0705, "step": 823500 }, { "epoch": 9.0, "learning_rate": 8.21772639691715e-06, "loss": 4.0636, "step": 824000 }, { "epoch": 9.01, "learning_rate": 8.216631634261693e-06, "loss": 4.0635, "step": 824500 }, { "epoch": 9.01, "learning_rate": 8.215536871606237e-06, "loss": 4.0574, "step": 825000 }, { "epoch": 9.02, "learning_rate": 8.21444210895078e-06, "loss": 4.0572, "step": 825500 }, { "epoch": 9.02, "learning_rate": 8.213347346295324e-06, "loss": 4.0624, "step": 826000 }, { "epoch": 9.03, "learning_rate": 8.212252583639868e-06, "loss": 4.0648, "step": 826500 }, { "epoch": 9.03, "learning_rate": 8.211157820984412e-06, "loss": 4.0585, "step": 827000 }, { "epoch": 9.04, "learning_rate": 8.210063058328955e-06, "loss": 4.0554, "step": 827500 }, { "epoch": 9.04, "learning_rate": 8.208968295673499e-06, "loss": 4.0614, "step": 828000 }, { "epoch": 9.05, "learning_rate": 8.207873533018043e-06, "loss": 4.0611, "step": 828500 }, { "epoch": 9.06, "learning_rate": 8.206778770362585e-06, "loss": 4.0579, "step": 829000 }, { "epoch": 9.06, "learning_rate": 8.20568400770713e-06, "loss": 4.0595, "step": 829500 }, { "epoch": 9.07, "learning_rate": 8.204589245051674e-06, "loss": 4.06, "step": 830000 }, { "epoch": 9.07, "learning_rate": 8.203494482396218e-06, "loss": 4.0559, "step": 830500 }, { "epoch": 9.08, "learning_rate": 8.202399719740762e-06, "loss": 4.0595, "step": 831000 }, { "epoch": 9.08, "learning_rate": 8.201304957085304e-06, "loss": 4.0577, "step": 831500 }, { "epoch": 9.09, "learning_rate": 8.200210194429849e-06, "loss": 4.0585, "step": 832000 }, { "epoch": 9.09, "learning_rate": 8.199115431774391e-06, "loss": 4.065, "step": 832500 }, { "epoch": 9.1, "learning_rate": 8.198020669118935e-06, "loss": 4.0562, "step": 833000 }, { "epoch": 9.1, "learning_rate": 8.19692590646348e-06, "loss": 4.0597, "step": 833500 }, { "epoch": 9.11, "learning_rate": 8.195831143808022e-06, "loss": 4.0505, "step": 834000 }, { "epoch": 9.12, "learning_rate": 8.194736381152568e-06, "loss": 4.059, "step": 834500 }, { "epoch": 9.12, "learning_rate": 8.19364161849711e-06, "loss": 4.0633, "step": 835000 }, { "epoch": 9.13, "learning_rate": 8.192546855841654e-06, "loss": 4.0565, "step": 835500 }, { "epoch": 9.13, "learning_rate": 8.191452093186197e-06, "loss": 4.0634, "step": 836000 }, { "epoch": 9.14, "learning_rate": 8.190357330530741e-06, "loss": 4.0505, "step": 836500 }, { "epoch": 9.14, "learning_rate": 8.189262567875287e-06, "loss": 4.0624, "step": 837000 }, { "epoch": 9.15, "learning_rate": 8.188167805219829e-06, "loss": 4.0597, "step": 837500 }, { "epoch": 9.15, "learning_rate": 8.187073042564372e-06, "loss": 4.0505, "step": 838000 }, { "epoch": 9.16, "learning_rate": 8.185978279908916e-06, "loss": 4.0599, "step": 838500 }, { "epoch": 9.16, "learning_rate": 8.18488351725346e-06, "loss": 4.0539, "step": 839000 }, { "epoch": 9.17, "learning_rate": 8.183788754598004e-06, "loss": 4.0514, "step": 839500 }, { "epoch": 9.18, "learning_rate": 8.182693991942547e-06, "loss": 4.0533, "step": 840000 }, { "epoch": 9.18, "eval_accuracy": 0.3711509703166781, "eval_loss": 3.8200690746307373, "eval_runtime": 376.8245, "eval_samples_per_second": 818.301, "eval_steps_per_second": 12.788, "step": 840000 }, { "epoch": 9.18, "learning_rate": 8.181599229287091e-06, "loss": 4.0576, "step": 840500 }, { "epoch": 9.19, "learning_rate": 8.180504466631635e-06, "loss": 4.0559, "step": 841000 }, { "epoch": 9.19, "learning_rate": 8.179409703976179e-06, "loss": 4.055, "step": 841500 }, { "epoch": 9.2, "learning_rate": 8.178314941320722e-06, "loss": 4.0522, "step": 842000 }, { "epoch": 9.2, "learning_rate": 8.177220178665266e-06, "loss": 4.0519, "step": 842500 }, { "epoch": 9.21, "learning_rate": 8.17612541600981e-06, "loss": 4.0505, "step": 843000 }, { "epoch": 9.21, "learning_rate": 8.175030653354353e-06, "loss": 4.0524, "step": 843500 }, { "epoch": 9.22, "learning_rate": 8.173935890698897e-06, "loss": 4.0568, "step": 844000 }, { "epoch": 9.23, "learning_rate": 8.172841128043441e-06, "loss": 4.0534, "step": 844500 }, { "epoch": 9.23, "learning_rate": 8.171746365387985e-06, "loss": 4.0562, "step": 845000 }, { "epoch": 9.24, "learning_rate": 8.170651602732528e-06, "loss": 4.0547, "step": 845500 }, { "epoch": 9.24, "learning_rate": 8.169556840077072e-06, "loss": 4.0556, "step": 846000 }, { "epoch": 9.25, "learning_rate": 8.168462077421616e-06, "loss": 4.0557, "step": 846500 }, { "epoch": 9.25, "learning_rate": 8.16736731476616e-06, "loss": 4.0454, "step": 847000 }, { "epoch": 9.26, "learning_rate": 8.166272552110703e-06, "loss": 4.0532, "step": 847500 }, { "epoch": 9.26, "learning_rate": 8.165177789455247e-06, "loss": 4.0549, "step": 848000 }, { "epoch": 9.27, "learning_rate": 8.164083026799791e-06, "loss": 4.0524, "step": 848500 }, { "epoch": 9.27, "learning_rate": 8.162988264144333e-06, "loss": 4.0509, "step": 849000 }, { "epoch": 9.28, "learning_rate": 8.161893501488878e-06, "loss": 4.0471, "step": 849500 }, { "epoch": 9.29, "learning_rate": 8.160798738833422e-06, "loss": 4.0494, "step": 850000 }, { "epoch": 9.29, "learning_rate": 8.159703976177966e-06, "loss": 4.0541, "step": 850500 }, { "epoch": 9.3, "learning_rate": 8.15860921352251e-06, "loss": 4.0497, "step": 851000 }, { "epoch": 9.3, "learning_rate": 8.157514450867052e-06, "loss": 4.0558, "step": 851500 }, { "epoch": 9.31, "learning_rate": 8.156419688211597e-06, "loss": 4.057, "step": 852000 }, { "epoch": 9.31, "learning_rate": 8.15532492555614e-06, "loss": 4.0494, "step": 852500 }, { "epoch": 9.32, "learning_rate": 8.154230162900683e-06, "loss": 4.0437, "step": 853000 }, { "epoch": 9.32, "learning_rate": 8.153135400245228e-06, "loss": 4.0502, "step": 853500 }, { "epoch": 9.33, "learning_rate": 8.15204063758977e-06, "loss": 4.0533, "step": 854000 }, { "epoch": 9.33, "learning_rate": 8.150945874934316e-06, "loss": 4.0491, "step": 854500 }, { "epoch": 9.34, "learning_rate": 8.149851112278858e-06, "loss": 4.0538, "step": 855000 }, { "epoch": 9.35, "learning_rate": 8.148756349623402e-06, "loss": 4.0543, "step": 855500 }, { "epoch": 9.35, "learning_rate": 8.147661586967947e-06, "loss": 4.0545, "step": 856000 }, { "epoch": 9.36, "learning_rate": 8.146566824312489e-06, "loss": 4.0507, "step": 856500 }, { "epoch": 9.36, "learning_rate": 8.145472061657035e-06, "loss": 4.0403, "step": 857000 }, { "epoch": 9.37, "learning_rate": 8.144377299001577e-06, "loss": 4.0464, "step": 857500 }, { "epoch": 9.37, "learning_rate": 8.14328253634612e-06, "loss": 4.0504, "step": 858000 }, { "epoch": 9.38, "learning_rate": 8.142187773690664e-06, "loss": 4.0488, "step": 858500 }, { "epoch": 9.38, "learning_rate": 8.141093011035208e-06, "loss": 4.0441, "step": 859000 }, { "epoch": 9.39, "learning_rate": 8.139998248379752e-06, "loss": 4.0473, "step": 859500 }, { "epoch": 9.39, "learning_rate": 8.138903485724295e-06, "loss": 4.0462, "step": 860000 }, { "epoch": 9.4, "learning_rate": 8.137808723068839e-06, "loss": 4.049, "step": 860500 }, { "epoch": 9.41, "learning_rate": 8.136713960413383e-06, "loss": 4.0501, "step": 861000 }, { "epoch": 9.41, "learning_rate": 8.135619197757927e-06, "loss": 4.0461, "step": 861500 }, { "epoch": 9.42, "learning_rate": 8.13452443510247e-06, "loss": 4.0451, "step": 862000 }, { "epoch": 9.42, "learning_rate": 8.133429672447014e-06, "loss": 4.0427, "step": 862500 }, { "epoch": 9.43, "learning_rate": 8.132334909791558e-06, "loss": 4.0467, "step": 863000 }, { "epoch": 9.43, "learning_rate": 8.131240147136101e-06, "loss": 4.0425, "step": 863500 }, { "epoch": 9.44, "learning_rate": 8.130145384480645e-06, "loss": 4.0483, "step": 864000 }, { "epoch": 9.44, "learning_rate": 8.129050621825189e-06, "loss": 4.0458, "step": 864500 }, { "epoch": 9.45, "learning_rate": 8.127955859169733e-06, "loss": 4.0455, "step": 865000 }, { "epoch": 9.45, "learning_rate": 8.126861096514276e-06, "loss": 4.0451, "step": 865500 }, { "epoch": 9.46, "learning_rate": 8.12576633385882e-06, "loss": 4.0442, "step": 866000 }, { "epoch": 9.47, "learning_rate": 8.124671571203364e-06, "loss": 4.051, "step": 866500 }, { "epoch": 9.47, "learning_rate": 8.123576808547908e-06, "loss": 4.04, "step": 867000 }, { "epoch": 9.48, "learning_rate": 8.122482045892451e-06, "loss": 4.0349, "step": 867500 }, { "epoch": 9.48, "learning_rate": 8.121387283236995e-06, "loss": 4.0389, "step": 868000 }, { "epoch": 9.49, "learning_rate": 8.120292520581539e-06, "loss": 4.0458, "step": 868500 }, { "epoch": 9.49, "learning_rate": 8.119197757926083e-06, "loss": 4.0456, "step": 869000 }, { "epoch": 9.5, "learning_rate": 8.118102995270626e-06, "loss": 4.0392, "step": 869500 }, { "epoch": 9.5, "learning_rate": 8.11700823261517e-06, "loss": 4.0517, "step": 870000 }, { "epoch": 9.5, "eval_accuracy": 0.3724301166312612, "eval_loss": 3.8080055713653564, "eval_runtime": 395.988, "eval_samples_per_second": 778.7, "eval_steps_per_second": 12.17, "step": 870000 }, { "epoch": 9.51, "learning_rate": 8.115913469959714e-06, "loss": 4.046, "step": 870500 }, { "epoch": 9.51, "learning_rate": 8.114818707304258e-06, "loss": 4.0421, "step": 871000 }, { "epoch": 9.52, "learning_rate": 8.1137239446488e-06, "loss": 4.0439, "step": 871500 }, { "epoch": 9.53, "learning_rate": 8.112629181993345e-06, "loss": 4.0463, "step": 872000 }, { "epoch": 9.53, "learning_rate": 8.111534419337889e-06, "loss": 4.0428, "step": 872500 }, { "epoch": 9.54, "learning_rate": 8.110439656682431e-06, "loss": 4.0378, "step": 873000 }, { "epoch": 9.54, "learning_rate": 8.109344894026976e-06, "loss": 4.0421, "step": 873500 }, { "epoch": 9.55, "learning_rate": 8.108250131371518e-06, "loss": 4.0411, "step": 874000 }, { "epoch": 9.55, "learning_rate": 8.107155368716064e-06, "loss": 4.0441, "step": 874500 }, { "epoch": 9.56, "learning_rate": 8.106060606060606e-06, "loss": 4.033, "step": 875000 }, { "epoch": 9.56, "learning_rate": 8.10496584340515e-06, "loss": 4.0378, "step": 875500 }, { "epoch": 9.57, "learning_rate": 8.103871080749695e-06, "loss": 4.0396, "step": 876000 }, { "epoch": 9.57, "learning_rate": 8.102776318094237e-06, "loss": 4.0455, "step": 876500 }, { "epoch": 9.58, "learning_rate": 8.101681555438783e-06, "loss": 4.0437, "step": 877000 }, { "epoch": 9.59, "learning_rate": 8.100586792783325e-06, "loss": 4.0461, "step": 877500 }, { "epoch": 9.59, "learning_rate": 8.099492030127868e-06, "loss": 4.0346, "step": 878000 }, { "epoch": 9.6, "learning_rate": 8.098397267472414e-06, "loss": 4.0388, "step": 878500 }, { "epoch": 9.6, "learning_rate": 8.097302504816956e-06, "loss": 4.0429, "step": 879000 }, { "epoch": 9.61, "learning_rate": 8.096207742161501e-06, "loss": 4.0415, "step": 879500 }, { "epoch": 9.61, "learning_rate": 8.095112979506043e-06, "loss": 4.0349, "step": 880000 }, { "epoch": 9.62, "learning_rate": 8.094018216850587e-06, "loss": 4.04, "step": 880500 }, { "epoch": 9.62, "learning_rate": 8.09292345419513e-06, "loss": 4.0373, "step": 881000 }, { "epoch": 9.63, "learning_rate": 8.091828691539675e-06, "loss": 4.0396, "step": 881500 }, { "epoch": 9.63, "learning_rate": 8.090733928884218e-06, "loss": 4.0374, "step": 882000 }, { "epoch": 9.64, "learning_rate": 8.089639166228762e-06, "loss": 4.0384, "step": 882500 }, { "epoch": 9.65, "learning_rate": 8.088544403573306e-06, "loss": 4.0404, "step": 883000 }, { "epoch": 9.65, "learning_rate": 8.08744964091785e-06, "loss": 4.0398, "step": 883500 }, { "epoch": 9.66, "learning_rate": 8.086354878262393e-06, "loss": 4.0407, "step": 884000 }, { "epoch": 9.66, "learning_rate": 8.085260115606937e-06, "loss": 4.0417, "step": 884500 }, { "epoch": 9.67, "learning_rate": 8.08416535295148e-06, "loss": 4.0338, "step": 885000 }, { "epoch": 9.67, "learning_rate": 8.083070590296024e-06, "loss": 4.0389, "step": 885500 }, { "epoch": 9.68, "learning_rate": 8.081975827640568e-06, "loss": 4.0426, "step": 886000 }, { "epoch": 9.68, "learning_rate": 8.080881064985112e-06, "loss": 4.0395, "step": 886500 }, { "epoch": 9.69, "learning_rate": 8.079786302329656e-06, "loss": 4.0358, "step": 887000 }, { "epoch": 9.69, "learning_rate": 8.0786915396742e-06, "loss": 4.0376, "step": 887500 }, { "epoch": 9.7, "learning_rate": 8.077596777018743e-06, "loss": 4.0341, "step": 888000 }, { "epoch": 9.71, "learning_rate": 8.076502014363287e-06, "loss": 4.0379, "step": 888500 }, { "epoch": 9.71, "learning_rate": 8.07540725170783e-06, "loss": 4.0323, "step": 889000 }, { "epoch": 9.72, "learning_rate": 8.074312489052374e-06, "loss": 4.0319, "step": 889500 }, { "epoch": 9.72, "learning_rate": 8.073217726396918e-06, "loss": 4.0292, "step": 890000 }, { "epoch": 9.73, "learning_rate": 8.072122963741462e-06, "loss": 4.0374, "step": 890500 }, { "epoch": 9.73, "learning_rate": 8.071028201086006e-06, "loss": 4.0295, "step": 891000 }, { "epoch": 9.74, "learning_rate": 8.06993343843055e-06, "loss": 4.0429, "step": 891500 }, { "epoch": 9.74, "learning_rate": 8.068838675775093e-06, "loss": 4.0284, "step": 892000 }, { "epoch": 9.75, "learning_rate": 8.067743913119637e-06, "loss": 4.0341, "step": 892500 }, { "epoch": 9.75, "learning_rate": 8.066649150464179e-06, "loss": 4.0437, "step": 893000 }, { "epoch": 9.76, "learning_rate": 8.065554387808724e-06, "loss": 4.0339, "step": 893500 }, { "epoch": 9.77, "learning_rate": 8.064459625153266e-06, "loss": 4.026, "step": 894000 }, { "epoch": 9.77, "learning_rate": 8.063364862497812e-06, "loss": 4.031, "step": 894500 }, { "epoch": 9.78, "learning_rate": 8.062270099842356e-06, "loss": 4.0345, "step": 895000 }, { "epoch": 9.78, "learning_rate": 8.061175337186898e-06, "loss": 4.0309, "step": 895500 }, { "epoch": 9.79, "learning_rate": 8.060080574531443e-06, "loss": 4.0352, "step": 896000 }, { "epoch": 9.79, "learning_rate": 8.058985811875985e-06, "loss": 4.0315, "step": 896500 }, { "epoch": 9.8, "learning_rate": 8.05789104922053e-06, "loss": 4.0301, "step": 897000 }, { "epoch": 9.8, "learning_rate": 8.056796286565073e-06, "loss": 4.0363, "step": 897500 }, { "epoch": 9.81, "learning_rate": 8.055701523909616e-06, "loss": 4.0341, "step": 898000 }, { "epoch": 9.81, "learning_rate": 8.054606761254162e-06, "loss": 4.0283, "step": 898500 }, { "epoch": 9.82, "learning_rate": 8.053511998598704e-06, "loss": 4.0273, "step": 899000 }, { "epoch": 9.83, "learning_rate": 8.05241723594325e-06, "loss": 4.0301, "step": 899500 }, { "epoch": 9.83, "learning_rate": 8.051322473287791e-06, "loss": 4.0325, "step": 900000 }, { "epoch": 9.83, "eval_accuracy": 0.3734057290823326, "eval_loss": 3.7974772453308105, "eval_runtime": 375.4854, "eval_samples_per_second": 821.22, "eval_steps_per_second": 12.834, "step": 900000 }, { "epoch": 9.84, "learning_rate": 8.050227710632335e-06, "loss": 4.0232, "step": 900500 }, { "epoch": 9.84, "learning_rate": 8.049132947976879e-06, "loss": 4.03, "step": 901000 }, { "epoch": 9.85, "learning_rate": 8.048038185321423e-06, "loss": 4.0312, "step": 901500 }, { "epoch": 9.85, "learning_rate": 8.046943422665966e-06, "loss": 4.0266, "step": 902000 }, { "epoch": 9.86, "learning_rate": 8.04584866001051e-06, "loss": 4.033, "step": 902500 }, { "epoch": 9.86, "learning_rate": 8.044753897355054e-06, "loss": 4.0291, "step": 903000 }, { "epoch": 9.87, "learning_rate": 8.043659134699597e-06, "loss": 4.0298, "step": 903500 }, { "epoch": 9.88, "learning_rate": 8.042564372044141e-06, "loss": 4.0278, "step": 904000 }, { "epoch": 9.88, "learning_rate": 8.041469609388685e-06, "loss": 4.0283, "step": 904500 }, { "epoch": 9.89, "learning_rate": 8.040374846733229e-06, "loss": 4.0335, "step": 905000 }, { "epoch": 9.89, "learning_rate": 8.039280084077772e-06, "loss": 4.0286, "step": 905500 }, { "epoch": 9.9, "learning_rate": 8.038185321422316e-06, "loss": 4.0331, "step": 906000 }, { "epoch": 9.9, "learning_rate": 8.03709055876686e-06, "loss": 4.0231, "step": 906500 }, { "epoch": 9.91, "learning_rate": 8.035995796111404e-06, "loss": 4.0273, "step": 907000 }, { "epoch": 9.91, "learning_rate": 8.034901033455947e-06, "loss": 4.0257, "step": 907500 }, { "epoch": 9.92, "learning_rate": 8.033806270800491e-06, "loss": 4.0286, "step": 908000 }, { "epoch": 9.92, "learning_rate": 8.032711508145035e-06, "loss": 4.0303, "step": 908500 }, { "epoch": 9.93, "learning_rate": 8.031616745489579e-06, "loss": 4.0281, "step": 909000 }, { "epoch": 9.94, "learning_rate": 8.030521982834122e-06, "loss": 4.0256, "step": 909500 }, { "epoch": 9.94, "learning_rate": 8.029427220178666e-06, "loss": 4.0249, "step": 910000 }, { "epoch": 9.95, "learning_rate": 8.02833245752321e-06, "loss": 4.0307, "step": 910500 }, { "epoch": 9.95, "learning_rate": 8.027237694867754e-06, "loss": 4.0319, "step": 911000 }, { "epoch": 9.96, "learning_rate": 8.026142932212297e-06, "loss": 4.0331, "step": 911500 }, { "epoch": 9.96, "learning_rate": 8.025048169556841e-06, "loss": 4.0366, "step": 912000 }, { "epoch": 9.97, "learning_rate": 8.023953406901385e-06, "loss": 4.0305, "step": 912500 }, { "epoch": 9.97, "learning_rate": 8.022858644245929e-06, "loss": 4.0317, "step": 913000 }, { "epoch": 9.98, "learning_rate": 8.021763881590472e-06, "loss": 4.0272, "step": 913500 }, { "epoch": 9.98, "learning_rate": 8.020669118935016e-06, "loss": 4.0252, "step": 914000 }, { "epoch": 9.99, "learning_rate": 8.01957435627956e-06, "loss": 4.0278, "step": 914500 }, { "epoch": 10.0, "learning_rate": 8.018479593624104e-06, "loss": 4.0245, "step": 915000 }, { "epoch": 10.0, "learning_rate": 8.017384830968646e-06, "loss": 4.0211, "step": 915500 }, { "epoch": 10.01, "learning_rate": 8.016290068313191e-06, "loss": 4.025, "step": 916000 }, { "epoch": 10.01, "learning_rate": 8.015195305657733e-06, "loss": 4.0223, "step": 916500 }, { "epoch": 10.02, "learning_rate": 8.014100543002279e-06, "loss": 4.023, "step": 917000 }, { "epoch": 10.02, "learning_rate": 8.013005780346822e-06, "loss": 4.0177, "step": 917500 }, { "epoch": 10.03, "learning_rate": 8.011911017691364e-06, "loss": 4.0322, "step": 918000 }, { "epoch": 10.03, "learning_rate": 8.01081625503591e-06, "loss": 4.0228, "step": 918500 }, { "epoch": 10.04, "learning_rate": 8.009721492380452e-06, "loss": 4.0222, "step": 919000 }, { "epoch": 10.04, "learning_rate": 8.008626729724997e-06, "loss": 4.0196, "step": 919500 }, { "epoch": 10.05, "learning_rate": 8.00753196706954e-06, "loss": 4.0226, "step": 920000 }, { "epoch": 10.06, "learning_rate": 8.006437204414083e-06, "loss": 4.0252, "step": 920500 }, { "epoch": 10.06, "learning_rate": 8.005342441758628e-06, "loss": 4.0254, "step": 921000 }, { "epoch": 10.07, "learning_rate": 8.00424767910317e-06, "loss": 4.0186, "step": 921500 }, { "epoch": 10.07, "learning_rate": 8.003152916447714e-06, "loss": 4.0216, "step": 922000 }, { "epoch": 10.08, "learning_rate": 8.002058153792258e-06, "loss": 4.0275, "step": 922500 }, { "epoch": 10.08, "learning_rate": 8.000963391136802e-06, "loss": 4.0285, "step": 923000 }, { "epoch": 10.09, "learning_rate": 7.999868628481345e-06, "loss": 4.0288, "step": 923500 }, { "epoch": 10.09, "learning_rate": 7.99877386582589e-06, "loss": 4.0238, "step": 924000 }, { "epoch": 10.1, "learning_rate": 7.997679103170433e-06, "loss": 4.0249, "step": 924500 }, { "epoch": 10.1, "learning_rate": 7.996584340514977e-06, "loss": 4.0245, "step": 925000 }, { "epoch": 10.11, "learning_rate": 7.99548957785952e-06, "loss": 4.0196, "step": 925500 }, { "epoch": 10.12, "learning_rate": 7.994394815204064e-06, "loss": 4.0236, "step": 926000 }, { "epoch": 10.12, "learning_rate": 7.993300052548608e-06, "loss": 4.0278, "step": 926500 }, { "epoch": 10.13, "learning_rate": 7.992205289893152e-06, "loss": 4.02, "step": 927000 }, { "epoch": 10.13, "learning_rate": 7.991110527237695e-06, "loss": 4.0171, "step": 927500 }, { "epoch": 10.14, "learning_rate": 7.99001576458224e-06, "loss": 4.0243, "step": 928000 }, { "epoch": 10.14, "learning_rate": 7.988921001926783e-06, "loss": 4.0269, "step": 928500 }, { "epoch": 10.15, "learning_rate": 7.987826239271327e-06, "loss": 4.0219, "step": 929000 }, { "epoch": 10.15, "learning_rate": 7.98673147661587e-06, "loss": 4.0222, "step": 929500 }, { "epoch": 10.16, "learning_rate": 7.985636713960414e-06, "loss": 4.0142, "step": 930000 }, { "epoch": 10.16, "eval_accuracy": 0.37479925868640185, "eval_loss": 3.787224769592285, "eval_runtime": 384.7098, "eval_samples_per_second": 801.529, "eval_steps_per_second": 12.526, "step": 930000 }, { "epoch": 10.16, "learning_rate": 7.984541951304958e-06, "loss": 4.0175, "step": 930500 }, { "epoch": 10.17, "learning_rate": 7.983447188649502e-06, "loss": 4.0263, "step": 931000 }, { "epoch": 10.18, "learning_rate": 7.982352425994045e-06, "loss": 4.0241, "step": 931500 }, { "epoch": 10.18, "learning_rate": 7.981257663338589e-06, "loss": 4.0249, "step": 932000 }, { "epoch": 10.19, "learning_rate": 7.980162900683133e-06, "loss": 4.0254, "step": 932500 }, { "epoch": 10.19, "learning_rate": 7.979068138027677e-06, "loss": 4.019, "step": 933000 }, { "epoch": 10.2, "learning_rate": 7.97797337537222e-06, "loss": 4.0244, "step": 933500 }, { "epoch": 10.2, "learning_rate": 7.976878612716764e-06, "loss": 4.0193, "step": 934000 }, { "epoch": 10.21, "learning_rate": 7.975783850061308e-06, "loss": 4.0267, "step": 934500 }, { "epoch": 10.21, "learning_rate": 7.974689087405852e-06, "loss": 4.0156, "step": 935000 }, { "epoch": 10.22, "learning_rate": 7.973594324750394e-06, "loss": 4.0246, "step": 935500 }, { "epoch": 10.22, "learning_rate": 7.972499562094939e-06, "loss": 4.0165, "step": 936000 }, { "epoch": 10.23, "learning_rate": 7.971404799439481e-06, "loss": 4.0161, "step": 936500 }, { "epoch": 10.24, "learning_rate": 7.970310036784027e-06, "loss": 4.0162, "step": 937000 }, { "epoch": 10.24, "learning_rate": 7.96921527412857e-06, "loss": 4.0191, "step": 937500 }, { "epoch": 10.25, "learning_rate": 7.968120511473112e-06, "loss": 4.0228, "step": 938000 }, { "epoch": 10.25, "learning_rate": 7.967025748817658e-06, "loss": 4.022, "step": 938500 }, { "epoch": 10.26, "learning_rate": 7.9659309861622e-06, "loss": 4.0114, "step": 939000 }, { "epoch": 10.26, "learning_rate": 7.964836223506745e-06, "loss": 4.0193, "step": 939500 }, { "epoch": 10.27, "learning_rate": 7.963741460851289e-06, "loss": 4.0186, "step": 940000 }, { "epoch": 10.27, "learning_rate": 7.962646698195831e-06, "loss": 4.0208, "step": 940500 }, { "epoch": 10.28, "learning_rate": 7.961551935540376e-06, "loss": 4.0213, "step": 941000 }, { "epoch": 10.28, "learning_rate": 7.960457172884919e-06, "loss": 4.0119, "step": 941500 }, { "epoch": 10.29, "learning_rate": 7.959362410229462e-06, "loss": 4.0197, "step": 942000 }, { "epoch": 10.3, "learning_rate": 7.958267647574006e-06, "loss": 4.019, "step": 942500 }, { "epoch": 10.3, "learning_rate": 7.95717288491855e-06, "loss": 4.0103, "step": 943000 }, { "epoch": 10.31, "learning_rate": 7.956078122263095e-06, "loss": 4.0114, "step": 943500 }, { "epoch": 10.31, "learning_rate": 7.954983359607637e-06, "loss": 4.0182, "step": 944000 }, { "epoch": 10.32, "learning_rate": 7.953888596952181e-06, "loss": 4.0123, "step": 944500 }, { "epoch": 10.32, "learning_rate": 7.952793834296725e-06, "loss": 4.022, "step": 945000 }, { "epoch": 10.33, "learning_rate": 7.951699071641268e-06, "loss": 4.0192, "step": 945500 }, { "epoch": 10.33, "learning_rate": 7.950604308985812e-06, "loss": 4.0195, "step": 946000 }, { "epoch": 10.34, "learning_rate": 7.949509546330356e-06, "loss": 4.0086, "step": 946500 }, { "epoch": 10.34, "learning_rate": 7.9484147836749e-06, "loss": 4.0102, "step": 947000 }, { "epoch": 10.35, "learning_rate": 7.947320021019443e-06, "loss": 4.0205, "step": 947500 }, { "epoch": 10.36, "learning_rate": 7.946225258363987e-06, "loss": 4.0209, "step": 948000 }, { "epoch": 10.36, "learning_rate": 7.945130495708531e-06, "loss": 4.0118, "step": 948500 }, { "epoch": 10.37, "learning_rate": 7.944035733053075e-06, "loss": 4.0146, "step": 949000 }, { "epoch": 10.37, "learning_rate": 7.942940970397618e-06, "loss": 4.0167, "step": 949500 }, { "epoch": 10.38, "learning_rate": 7.941846207742162e-06, "loss": 4.0063, "step": 950000 }, { "epoch": 10.38, "learning_rate": 7.940751445086706e-06, "loss": 4.0106, "step": 950500 }, { "epoch": 10.39, "learning_rate": 7.93965668243125e-06, "loss": 4.0172, "step": 951000 }, { "epoch": 10.39, "learning_rate": 7.938561919775793e-06, "loss": 4.0119, "step": 951500 }, { "epoch": 10.4, "learning_rate": 7.937467157120337e-06, "loss": 4.0215, "step": 952000 }, { "epoch": 10.4, "learning_rate": 7.936372394464881e-06, "loss": 4.0229, "step": 952500 }, { "epoch": 10.41, "learning_rate": 7.935277631809425e-06, "loss": 4.0141, "step": 953000 }, { "epoch": 10.42, "learning_rate": 7.934182869153968e-06, "loss": 4.01, "step": 953500 }, { "epoch": 10.42, "learning_rate": 7.933088106498512e-06, "loss": 4.011, "step": 954000 }, { "epoch": 10.43, "learning_rate": 7.931993343843056e-06, "loss": 4.0146, "step": 954500 }, { "epoch": 10.43, "learning_rate": 7.9308985811876e-06, "loss": 4.0153, "step": 955000 }, { "epoch": 10.44, "learning_rate": 7.929803818532142e-06, "loss": 4.0144, "step": 955500 }, { "epoch": 10.44, "learning_rate": 7.928709055876687e-06, "loss": 4.0182, "step": 956000 }, { "epoch": 10.45, "learning_rate": 7.92761429322123e-06, "loss": 4.0179, "step": 956500 }, { "epoch": 10.45, "learning_rate": 7.926519530565775e-06, "loss": 4.0136, "step": 957000 }, { "epoch": 10.46, "learning_rate": 7.925424767910318e-06, "loss": 4.0139, "step": 957500 }, { "epoch": 10.46, "learning_rate": 7.92433000525486e-06, "loss": 4.012, "step": 958000 }, { "epoch": 10.47, "learning_rate": 7.923235242599406e-06, "loss": 4.0102, "step": 958500 }, { "epoch": 10.48, "learning_rate": 7.922140479943948e-06, "loss": 4.0122, "step": 959000 }, { "epoch": 10.48, "learning_rate": 7.921045717288493e-06, "loss": 4.011, "step": 959500 }, { "epoch": 10.49, "learning_rate": 7.919950954633037e-06, "loss": 4.0124, "step": 960000 }, { "epoch": 10.49, "eval_accuracy": 0.37589907025191177, "eval_loss": 3.7787649631500244, "eval_runtime": 356.2318, "eval_samples_per_second": 865.605, "eval_steps_per_second": 13.528, "step": 960000 }, { "epoch": 10.49, "learning_rate": 7.918856191977579e-06, "loss": 4.0222, "step": 960500 }, { "epoch": 10.5, "learning_rate": 7.917761429322124e-06, "loss": 4.0039, "step": 961000 }, { "epoch": 10.5, "learning_rate": 7.916666666666667e-06, "loss": 4.0101, "step": 961500 }, { "epoch": 10.51, "learning_rate": 7.91557190401121e-06, "loss": 4.0158, "step": 962000 }, { "epoch": 10.51, "learning_rate": 7.914477141355754e-06, "loss": 4.0159, "step": 962500 }, { "epoch": 10.52, "learning_rate": 7.913382378700298e-06, "loss": 4.0115, "step": 963000 }, { "epoch": 10.52, "learning_rate": 7.912287616044843e-06, "loss": 4.0135, "step": 963500 }, { "epoch": 10.53, "learning_rate": 7.911192853389385e-06, "loss": 4.0122, "step": 964000 }, { "epoch": 10.54, "learning_rate": 7.910098090733929e-06, "loss": 4.0146, "step": 964500 }, { "epoch": 10.54, "learning_rate": 7.909003328078473e-06, "loss": 4.0101, "step": 965000 }, { "epoch": 10.55, "learning_rate": 7.907908565423016e-06, "loss": 4.0125, "step": 965500 }, { "epoch": 10.55, "learning_rate": 7.906813802767562e-06, "loss": 4.0137, "step": 966000 }, { "epoch": 10.56, "learning_rate": 7.905719040112104e-06, "loss": 4.0091, "step": 966500 }, { "epoch": 10.56, "learning_rate": 7.904624277456648e-06, "loss": 4.0092, "step": 967000 }, { "epoch": 10.57, "learning_rate": 7.903529514801191e-06, "loss": 4.0074, "step": 967500 }, { "epoch": 10.57, "learning_rate": 7.902434752145735e-06, "loss": 4.0077, "step": 968000 }, { "epoch": 10.58, "learning_rate": 7.901339989490279e-06, "loss": 4.0118, "step": 968500 }, { "epoch": 10.59, "learning_rate": 7.900245226834823e-06, "loss": 4.0094, "step": 969000 }, { "epoch": 10.59, "learning_rate": 7.899150464179366e-06, "loss": 4.0131, "step": 969500 }, { "epoch": 10.6, "learning_rate": 7.89805570152391e-06, "loss": 4.0112, "step": 970000 }, { "epoch": 10.6, "learning_rate": 7.896960938868454e-06, "loss": 4.0149, "step": 970500 }, { "epoch": 10.61, "learning_rate": 7.895866176212998e-06, "loss": 4.0105, "step": 971000 }, { "epoch": 10.61, "learning_rate": 7.894771413557541e-06, "loss": 4.0095, "step": 971500 }, { "epoch": 10.62, "learning_rate": 7.893676650902085e-06, "loss": 4.0082, "step": 972000 }, { "epoch": 10.62, "learning_rate": 7.892581888246629e-06, "loss": 4.0111, "step": 972500 }, { "epoch": 10.63, "learning_rate": 7.891487125591173e-06, "loss": 4.0075, "step": 973000 }, { "epoch": 10.63, "learning_rate": 7.890392362935716e-06, "loss": 4.0073, "step": 973500 }, { "epoch": 10.64, "learning_rate": 7.88929760028026e-06, "loss": 4.0113, "step": 974000 }, { "epoch": 10.65, "learning_rate": 7.888202837624804e-06, "loss": 4.0053, "step": 974500 }, { "epoch": 10.65, "learning_rate": 7.887108074969348e-06, "loss": 4.0056, "step": 975000 }, { "epoch": 10.66, "learning_rate": 7.88601331231389e-06, "loss": 4.006, "step": 975500 }, { "epoch": 10.66, "learning_rate": 7.884918549658435e-06, "loss": 4.0023, "step": 976000 }, { "epoch": 10.67, "learning_rate": 7.883823787002979e-06, "loss": 4.0083, "step": 976500 }, { "epoch": 10.67, "learning_rate": 7.882729024347523e-06, "loss": 4.0105, "step": 977000 }, { "epoch": 10.68, "learning_rate": 7.881634261692066e-06, "loss": 4.0063, "step": 977500 }, { "epoch": 10.68, "learning_rate": 7.880539499036608e-06, "loss": 4.005, "step": 978000 }, { "epoch": 10.69, "learning_rate": 7.879444736381154e-06, "loss": 4.0077, "step": 978500 }, { "epoch": 10.69, "learning_rate": 7.878349973725698e-06, "loss": 4.0078, "step": 979000 }, { "epoch": 10.7, "learning_rate": 7.877255211070241e-06, "loss": 4.0065, "step": 979500 }, { "epoch": 10.71, "learning_rate": 7.876160448414785e-06, "loss": 4.0031, "step": 980000 }, { "epoch": 10.71, "learning_rate": 7.875065685759327e-06, "loss": 4.0053, "step": 980500 }, { "epoch": 10.72, "learning_rate": 7.873970923103872e-06, "loss": 4.0077, "step": 981000 }, { "epoch": 10.72, "learning_rate": 7.872876160448415e-06, "loss": 4.0067, "step": 981500 }, { "epoch": 10.73, "learning_rate": 7.87178139779296e-06, "loss": 4.004, "step": 982000 }, { "epoch": 10.73, "learning_rate": 7.870686635137504e-06, "loss": 4.0029, "step": 982500 }, { "epoch": 10.74, "learning_rate": 7.869591872482046e-06, "loss": 3.9984, "step": 983000 }, { "epoch": 10.74, "learning_rate": 7.868497109826591e-06, "loss": 4.0009, "step": 983500 }, { "epoch": 10.75, "learning_rate": 7.867402347171133e-06, "loss": 3.9991, "step": 984000 }, { "epoch": 10.75, "learning_rate": 7.866307584515677e-06, "loss": 4.0029, "step": 984500 }, { "epoch": 10.76, "learning_rate": 7.86521282186022e-06, "loss": 4.0012, "step": 985000 }, { "epoch": 10.77, "learning_rate": 7.864118059204764e-06, "loss": 4.0033, "step": 985500 }, { "epoch": 10.77, "learning_rate": 7.86302329654931e-06, "loss": 4.0084, "step": 986000 }, { "epoch": 10.78, "learning_rate": 7.861928533893852e-06, "loss": 4.0097, "step": 986500 }, { "epoch": 10.78, "learning_rate": 7.860833771238396e-06, "loss": 4.0054, "step": 987000 }, { "epoch": 10.79, "learning_rate": 7.85973900858294e-06, "loss": 4.0023, "step": 987500 }, { "epoch": 10.79, "learning_rate": 7.858644245927483e-06, "loss": 4.0027, "step": 988000 }, { "epoch": 10.8, "learning_rate": 7.857549483272027e-06, "loss": 3.9998, "step": 988500 }, { "epoch": 10.8, "learning_rate": 7.85645472061657e-06, "loss": 4.0056, "step": 989000 }, { "epoch": 10.81, "learning_rate": 7.855359957961114e-06, "loss": 4.0037, "step": 989500 }, { "epoch": 10.81, "learning_rate": 7.854265195305658e-06, "loss": 4.0076, "step": 990000 }, { "epoch": 10.81, "eval_accuracy": 0.37669120696391495, "eval_loss": 3.767904281616211, "eval_runtime": 392.0984, "eval_samples_per_second": 786.425, "eval_steps_per_second": 12.29, "step": 990000 }, { "epoch": 10.82, "learning_rate": 7.853170432650202e-06, "loss": 4.0036, "step": 990500 }, { "epoch": 10.83, "learning_rate": 7.852075669994746e-06, "loss": 4.0046, "step": 991000 }, { "epoch": 10.83, "learning_rate": 7.85098090733929e-06, "loss": 4.0013, "step": 991500 }, { "epoch": 10.84, "learning_rate": 7.849886144683833e-06, "loss": 3.9969, "step": 992000 }, { "epoch": 10.84, "learning_rate": 7.848791382028377e-06, "loss": 4.006, "step": 992500 }, { "epoch": 10.85, "learning_rate": 7.84769661937292e-06, "loss": 3.9994, "step": 993000 }, { "epoch": 10.85, "learning_rate": 7.846601856717464e-06, "loss": 3.9995, "step": 993500 }, { "epoch": 10.86, "learning_rate": 7.845507094062008e-06, "loss": 4.009, "step": 994000 }, { "epoch": 10.86, "learning_rate": 7.844412331406552e-06, "loss": 3.996, "step": 994500 }, { "epoch": 10.87, "learning_rate": 7.843317568751096e-06, "loss": 4.0011, "step": 995000 }, { "epoch": 10.87, "learning_rate": 7.84222280609564e-06, "loss": 3.998, "step": 995500 }, { "epoch": 10.88, "learning_rate": 7.841128043440183e-06, "loss": 4.0059, "step": 996000 }, { "epoch": 10.89, "learning_rate": 7.840033280784727e-06, "loss": 4.0085, "step": 996500 }, { "epoch": 10.89, "learning_rate": 7.83893851812927e-06, "loss": 4.0063, "step": 997000 }, { "epoch": 10.9, "learning_rate": 7.837843755473814e-06, "loss": 4.006, "step": 997500 }, { "epoch": 10.9, "learning_rate": 7.836748992818356e-06, "loss": 4.0072, "step": 998000 }, { "epoch": 10.91, "learning_rate": 7.835654230162902e-06, "loss": 4.0008, "step": 998500 }, { "epoch": 10.91, "learning_rate": 7.834559467507446e-06, "loss": 3.9951, "step": 999000 }, { "epoch": 10.92, "learning_rate": 7.83346470485199e-06, "loss": 4.0045, "step": 999500 }, { "epoch": 10.92, "learning_rate": 7.832369942196533e-06, "loss": 4.0042, "step": 1000000 }, { "epoch": 10.93, "learning_rate": 7.831275179541075e-06, "loss": 3.9975, "step": 1000500 }, { "epoch": 10.93, "learning_rate": 7.83018041688562e-06, "loss": 4.001, "step": 1001000 }, { "epoch": 10.94, "learning_rate": 7.829085654230164e-06, "loss": 4.0053, "step": 1001500 }, { "epoch": 10.95, "learning_rate": 7.827990891574708e-06, "loss": 4.0013, "step": 1002000 }, { "epoch": 10.95, "learning_rate": 7.826896128919252e-06, "loss": 3.9962, "step": 1002500 }, { "epoch": 10.96, "learning_rate": 7.825801366263794e-06, "loss": 4.0011, "step": 1003000 }, { "epoch": 10.96, "learning_rate": 7.82470660360834e-06, "loss": 4.0024, "step": 1003500 }, { "epoch": 10.97, "learning_rate": 7.823611840952881e-06, "loss": 4.0091, "step": 1004000 }, { "epoch": 10.97, "learning_rate": 7.822517078297425e-06, "loss": 4.0033, "step": 1004500 }, { "epoch": 10.98, "learning_rate": 7.82142231564197e-06, "loss": 3.9985, "step": 1005000 }, { "epoch": 10.98, "learning_rate": 7.820327552986512e-06, "loss": 3.998, "step": 1005500 }, { "epoch": 10.99, "learning_rate": 7.819232790331058e-06, "loss": 4.0029, "step": 1006000 }, { "epoch": 10.99, "learning_rate": 7.8181380276756e-06, "loss": 4.0, "step": 1006500 }, { "epoch": 11.0, "learning_rate": 7.817043265020144e-06, "loss": 3.9994, "step": 1007000 }, { "epoch": 11.01, "learning_rate": 7.815948502364687e-06, "loss": 3.9986, "step": 1007500 }, { "epoch": 11.01, "learning_rate": 7.814853739709231e-06, "loss": 3.9923, "step": 1008000 }, { "epoch": 11.02, "learning_rate": 7.813758977053777e-06, "loss": 4.0012, "step": 1008500 }, { "epoch": 11.02, "learning_rate": 7.812664214398319e-06, "loss": 3.9932, "step": 1009000 }, { "epoch": 11.03, "learning_rate": 7.811569451742862e-06, "loss": 3.9999, "step": 1009500 }, { "epoch": 11.03, "learning_rate": 7.810474689087406e-06, "loss": 3.9942, "step": 1010000 }, { "epoch": 11.04, "learning_rate": 7.80937992643195e-06, "loss": 4.0015, "step": 1010500 }, { "epoch": 11.04, "learning_rate": 7.808285163776494e-06, "loss": 3.9961, "step": 1011000 }, { "epoch": 11.05, "learning_rate": 7.807190401121037e-06, "loss": 3.9998, "step": 1011500 }, { "epoch": 11.05, "learning_rate": 7.806095638465581e-06, "loss": 4.003, "step": 1012000 }, { "epoch": 11.06, "learning_rate": 7.805000875810125e-06, "loss": 3.9921, "step": 1012500 }, { "epoch": 11.07, "learning_rate": 7.803906113154669e-06, "loss": 3.9958, "step": 1013000 }, { "epoch": 11.07, "learning_rate": 7.802811350499212e-06, "loss": 3.9996, "step": 1013500 }, { "epoch": 11.08, "learning_rate": 7.801716587843756e-06, "loss": 3.9987, "step": 1014000 }, { "epoch": 11.08, "learning_rate": 7.8006218251883e-06, "loss": 3.9997, "step": 1014500 }, { "epoch": 11.09, "learning_rate": 7.799527062532844e-06, "loss": 3.9934, "step": 1015000 }, { "epoch": 11.09, "learning_rate": 7.798432299877387e-06, "loss": 3.9941, "step": 1015500 }, { "epoch": 11.1, "learning_rate": 7.797337537221931e-06, "loss": 3.9961, "step": 1016000 }, { "epoch": 11.1, "learning_rate": 7.796242774566475e-06, "loss": 3.9942, "step": 1016500 }, { "epoch": 11.11, "learning_rate": 7.795148011911019e-06, "loss": 3.9969, "step": 1017000 }, { "epoch": 11.11, "learning_rate": 7.794053249255562e-06, "loss": 3.9904, "step": 1017500 }, { "epoch": 11.12, "learning_rate": 7.792958486600106e-06, "loss": 3.9997, "step": 1018000 }, { "epoch": 11.13, "learning_rate": 7.79186372394465e-06, "loss": 3.9944, "step": 1018500 }, { "epoch": 11.13, "learning_rate": 7.790768961289194e-06, "loss": 3.9956, "step": 1019000 }, { "epoch": 11.14, "learning_rate": 7.789674198633737e-06, "loss": 3.9878, "step": 1019500 }, { "epoch": 11.14, "learning_rate": 7.788579435978281e-06, "loss": 3.9919, "step": 1020000 }, { "epoch": 11.14, "eval_accuracy": 0.3774963903885396, "eval_loss": 3.760852813720703, "eval_runtime": 406.3879, "eval_samples_per_second": 758.773, "eval_steps_per_second": 11.858, "step": 1020000 }, { "epoch": 11.15, "learning_rate": 7.787484673322823e-06, "loss": 3.9972, "step": 1020500 }, { "epoch": 11.15, "learning_rate": 7.786389910667369e-06, "loss": 3.9968, "step": 1021000 }, { "epoch": 11.16, "learning_rate": 7.785295148011912e-06, "loss": 3.9935, "step": 1021500 }, { "epoch": 11.16, "learning_rate": 7.784200385356456e-06, "loss": 3.9949, "step": 1022000 }, { "epoch": 11.17, "learning_rate": 7.783105622701e-06, "loss": 4.0001, "step": 1022500 }, { "epoch": 11.17, "learning_rate": 7.782010860045542e-06, "loss": 3.9929, "step": 1023000 }, { "epoch": 11.18, "learning_rate": 7.780916097390087e-06, "loss": 3.9951, "step": 1023500 }, { "epoch": 11.19, "learning_rate": 7.77982133473463e-06, "loss": 3.987, "step": 1024000 }, { "epoch": 11.19, "learning_rate": 7.778726572079173e-06, "loss": 3.992, "step": 1024500 }, { "epoch": 11.2, "learning_rate": 7.777631809423718e-06, "loss": 3.9928, "step": 1025000 }, { "epoch": 11.2, "learning_rate": 7.77653704676826e-06, "loss": 3.9946, "step": 1025500 }, { "epoch": 11.21, "learning_rate": 7.775442284112806e-06, "loss": 3.9996, "step": 1026000 }, { "epoch": 11.21, "learning_rate": 7.774347521457348e-06, "loss": 3.9936, "step": 1026500 }, { "epoch": 11.22, "learning_rate": 7.773252758801892e-06, "loss": 3.9937, "step": 1027000 }, { "epoch": 11.22, "learning_rate": 7.772157996146437e-06, "loss": 4.0016, "step": 1027500 }, { "epoch": 11.23, "learning_rate": 7.77106323349098e-06, "loss": 3.9879, "step": 1028000 }, { "epoch": 11.24, "learning_rate": 7.769968470835525e-06, "loss": 3.9966, "step": 1028500 }, { "epoch": 11.24, "learning_rate": 7.768873708180067e-06, "loss": 3.9894, "step": 1029000 }, { "epoch": 11.25, "learning_rate": 7.76777894552461e-06, "loss": 3.997, "step": 1029500 }, { "epoch": 11.25, "learning_rate": 7.766684182869154e-06, "loss": 3.9883, "step": 1030000 }, { "epoch": 11.26, "learning_rate": 7.765589420213698e-06, "loss": 3.9955, "step": 1030500 }, { "epoch": 11.26, "learning_rate": 7.764494657558242e-06, "loss": 3.9957, "step": 1031000 }, { "epoch": 11.27, "learning_rate": 7.763399894902785e-06, "loss": 3.9961, "step": 1031500 }, { "epoch": 11.27, "learning_rate": 7.762305132247329e-06, "loss": 3.9933, "step": 1032000 }, { "epoch": 11.28, "learning_rate": 7.761210369591873e-06, "loss": 3.9899, "step": 1032500 }, { "epoch": 11.28, "learning_rate": 7.760115606936417e-06, "loss": 3.9954, "step": 1033000 }, { "epoch": 11.29, "learning_rate": 7.75902084428096e-06, "loss": 3.9961, "step": 1033500 }, { "epoch": 11.3, "learning_rate": 7.757926081625504e-06, "loss": 3.9972, "step": 1034000 }, { "epoch": 11.3, "learning_rate": 7.756831318970048e-06, "loss": 3.9915, "step": 1034500 }, { "epoch": 11.31, "learning_rate": 7.755736556314592e-06, "loss": 3.9947, "step": 1035000 }, { "epoch": 11.31, "learning_rate": 7.754641793659135e-06, "loss": 3.9975, "step": 1035500 }, { "epoch": 11.32, "learning_rate": 7.753547031003679e-06, "loss": 3.9931, "step": 1036000 }, { "epoch": 11.32, "learning_rate": 7.752452268348223e-06, "loss": 3.9907, "step": 1036500 }, { "epoch": 11.33, "learning_rate": 7.751357505692767e-06, "loss": 3.9955, "step": 1037000 }, { "epoch": 11.33, "learning_rate": 7.75026274303731e-06, "loss": 3.9861, "step": 1037500 }, { "epoch": 11.34, "learning_rate": 7.749167980381854e-06, "loss": 3.9853, "step": 1038000 }, { "epoch": 11.34, "learning_rate": 7.748073217726398e-06, "loss": 3.9911, "step": 1038500 }, { "epoch": 11.35, "learning_rate": 7.746978455070942e-06, "loss": 3.9882, "step": 1039000 }, { "epoch": 11.36, "learning_rate": 7.745883692415485e-06, "loss": 3.9895, "step": 1039500 }, { "epoch": 11.36, "learning_rate": 7.744788929760029e-06, "loss": 3.9937, "step": 1040000 }, { "epoch": 11.37, "learning_rate": 7.743694167104573e-06, "loss": 3.9956, "step": 1040500 }, { "epoch": 11.37, "learning_rate": 7.742599404449117e-06, "loss": 3.9868, "step": 1041000 }, { "epoch": 11.38, "learning_rate": 7.74150464179366e-06, "loss": 3.9879, "step": 1041500 }, { "epoch": 11.38, "learning_rate": 7.740409879138204e-06, "loss": 3.9913, "step": 1042000 }, { "epoch": 11.39, "learning_rate": 7.739315116482748e-06, "loss": 3.9824, "step": 1042500 }, { "epoch": 11.39, "learning_rate": 7.73822035382729e-06, "loss": 3.9922, "step": 1043000 }, { "epoch": 11.4, "learning_rate": 7.737125591171835e-06, "loss": 3.9871, "step": 1043500 }, { "epoch": 11.4, "learning_rate": 7.736030828516379e-06, "loss": 3.9939, "step": 1044000 }, { "epoch": 11.41, "learning_rate": 7.734936065860921e-06, "loss": 3.9925, "step": 1044500 }, { "epoch": 11.42, "learning_rate": 7.733841303205466e-06, "loss": 3.9892, "step": 1045000 }, { "epoch": 11.42, "learning_rate": 7.732746540550008e-06, "loss": 3.9904, "step": 1045500 }, { "epoch": 11.43, "learning_rate": 7.731651777894554e-06, "loss": 3.9866, "step": 1046000 }, { "epoch": 11.43, "learning_rate": 7.730557015239096e-06, "loss": 3.9921, "step": 1046500 }, { "epoch": 11.44, "learning_rate": 7.72946225258364e-06, "loss": 3.9883, "step": 1047000 }, { "epoch": 11.44, "learning_rate": 7.728367489928185e-06, "loss": 3.9877, "step": 1047500 }, { "epoch": 11.45, "learning_rate": 7.727272727272727e-06, "loss": 3.9884, "step": 1048000 }, { "epoch": 11.45, "learning_rate": 7.726177964617273e-06, "loss": 3.9919, "step": 1048500 }, { "epoch": 11.46, "learning_rate": 7.725083201961815e-06, "loss": 3.9851, "step": 1049000 }, { "epoch": 11.46, "learning_rate": 7.723988439306358e-06, "loss": 3.9906, "step": 1049500 }, { "epoch": 11.47, "learning_rate": 7.722893676650902e-06, "loss": 3.9888, "step": 1050000 }, { "epoch": 11.47, "eval_accuracy": 0.3783494649662846, "eval_loss": 3.7550032138824463, "eval_runtime": 354.8036, "eval_samples_per_second": 869.089, "eval_steps_per_second": 13.582, "step": 1050000 }, { "epoch": 11.48, "learning_rate": 7.721798913995446e-06, "loss": 3.9928, "step": 1050500 }, { "epoch": 11.48, "learning_rate": 7.720704151339991e-06, "loss": 3.988, "step": 1051000 }, { "epoch": 11.49, "learning_rate": 7.719609388684533e-06, "loss": 3.9943, "step": 1051500 }, { "epoch": 11.49, "learning_rate": 7.718514626029077e-06, "loss": 3.9922, "step": 1052000 }, { "epoch": 11.5, "learning_rate": 7.717419863373621e-06, "loss": 3.9893, "step": 1052500 }, { "epoch": 11.5, "learning_rate": 7.716325100718165e-06, "loss": 3.9943, "step": 1053000 }, { "epoch": 11.51, "learning_rate": 7.715230338062708e-06, "loss": 3.9921, "step": 1053500 }, { "epoch": 11.51, "learning_rate": 7.714135575407252e-06, "loss": 3.9902, "step": 1054000 }, { "epoch": 11.52, "learning_rate": 7.713040812751796e-06, "loss": 3.988, "step": 1054500 }, { "epoch": 11.52, "learning_rate": 7.71194605009634e-06, "loss": 3.9932, "step": 1055000 }, { "epoch": 11.53, "learning_rate": 7.710851287440883e-06, "loss": 3.9897, "step": 1055500 }, { "epoch": 11.54, "learning_rate": 7.709756524785427e-06, "loss": 3.9843, "step": 1056000 }, { "epoch": 11.54, "learning_rate": 7.70866176212997e-06, "loss": 3.9881, "step": 1056500 }, { "epoch": 11.55, "learning_rate": 7.707566999474515e-06, "loss": 3.9862, "step": 1057000 }, { "epoch": 11.55, "learning_rate": 7.706472236819058e-06, "loss": 3.9913, "step": 1057500 }, { "epoch": 11.56, "learning_rate": 7.705377474163602e-06, "loss": 3.9855, "step": 1058000 }, { "epoch": 11.56, "learning_rate": 7.704282711508146e-06, "loss": 3.9858, "step": 1058500 }, { "epoch": 11.57, "learning_rate": 7.70318794885269e-06, "loss": 3.9919, "step": 1059000 }, { "epoch": 11.57, "learning_rate": 7.702093186197233e-06, "loss": 3.9904, "step": 1059500 }, { "epoch": 11.58, "learning_rate": 7.700998423541777e-06, "loss": 3.9877, "step": 1060000 }, { "epoch": 11.58, "learning_rate": 7.69990366088632e-06, "loss": 3.9787, "step": 1060500 }, { "epoch": 11.59, "learning_rate": 7.698808898230865e-06, "loss": 3.9905, "step": 1061000 }, { "epoch": 11.6, "learning_rate": 7.697714135575408e-06, "loss": 3.9875, "step": 1061500 }, { "epoch": 11.6, "learning_rate": 7.696619372919952e-06, "loss": 3.9914, "step": 1062000 }, { "epoch": 11.61, "learning_rate": 7.695524610264496e-06, "loss": 3.9828, "step": 1062500 }, { "epoch": 11.61, "learning_rate": 7.694429847609038e-06, "loss": 3.9855, "step": 1063000 }, { "epoch": 11.62, "learning_rate": 7.693335084953583e-06, "loss": 3.9892, "step": 1063500 }, { "epoch": 11.62, "learning_rate": 7.692240322298127e-06, "loss": 3.9844, "step": 1064000 }, { "epoch": 11.63, "learning_rate": 7.691145559642669e-06, "loss": 3.9895, "step": 1064500 }, { "epoch": 11.63, "learning_rate": 7.690050796987214e-06, "loss": 3.9914, "step": 1065000 }, { "epoch": 11.64, "learning_rate": 7.688956034331757e-06, "loss": 3.9866, "step": 1065500 }, { "epoch": 11.64, "learning_rate": 7.687861271676302e-06, "loss": 3.9844, "step": 1066000 }, { "epoch": 11.65, "learning_rate": 7.686766509020846e-06, "loss": 3.9797, "step": 1066500 }, { "epoch": 11.66, "learning_rate": 7.685671746365388e-06, "loss": 3.9784, "step": 1067000 }, { "epoch": 11.66, "learning_rate": 7.684576983709933e-06, "loss": 3.9842, "step": 1067500 }, { "epoch": 11.67, "learning_rate": 7.683482221054475e-06, "loss": 3.9793, "step": 1068000 }, { "epoch": 11.67, "learning_rate": 7.68238745839902e-06, "loss": 3.9769, "step": 1068500 }, { "epoch": 11.68, "learning_rate": 7.681292695743563e-06, "loss": 3.986, "step": 1069000 }, { "epoch": 11.68, "learning_rate": 7.680197933088106e-06, "loss": 3.987, "step": 1069500 }, { "epoch": 11.69, "learning_rate": 7.679103170432652e-06, "loss": 3.9801, "step": 1070000 }, { "epoch": 11.69, "learning_rate": 7.678008407777194e-06, "loss": 3.9875, "step": 1070500 }, { "epoch": 11.7, "learning_rate": 7.67691364512174e-06, "loss": 3.9872, "step": 1071000 }, { "epoch": 11.7, "learning_rate": 7.675818882466281e-06, "loss": 3.9821, "step": 1071500 }, { "epoch": 11.71, "learning_rate": 7.674724119810825e-06, "loss": 3.9871, "step": 1072000 }, { "epoch": 11.72, "learning_rate": 7.673629357155369e-06, "loss": 3.9781, "step": 1072500 }, { "epoch": 11.72, "learning_rate": 7.672534594499913e-06, "loss": 3.9858, "step": 1073000 }, { "epoch": 11.73, "learning_rate": 7.671439831844456e-06, "loss": 3.9874, "step": 1073500 }, { "epoch": 11.73, "learning_rate": 7.670345069189e-06, "loss": 3.9895, "step": 1074000 }, { "epoch": 11.74, "learning_rate": 7.669250306533544e-06, "loss": 3.9783, "step": 1074500 }, { "epoch": 11.74, "learning_rate": 7.668155543878088e-06, "loss": 3.9833, "step": 1075000 }, { "epoch": 11.75, "learning_rate": 7.667060781222631e-06, "loss": 3.9795, "step": 1075500 }, { "epoch": 11.75, "learning_rate": 7.665966018567175e-06, "loss": 3.9854, "step": 1076000 }, { "epoch": 11.76, "learning_rate": 7.664871255911719e-06, "loss": 3.9877, "step": 1076500 }, { "epoch": 11.76, "learning_rate": 7.663776493256263e-06, "loss": 3.9786, "step": 1077000 }, { "epoch": 11.77, "learning_rate": 7.662681730600806e-06, "loss": 3.9816, "step": 1077500 }, { "epoch": 11.78, "learning_rate": 7.66158696794535e-06, "loss": 3.9851, "step": 1078000 }, { "epoch": 11.78, "learning_rate": 7.660492205289894e-06, "loss": 3.9868, "step": 1078500 }, { "epoch": 11.79, "learning_rate": 7.659397442634438e-06, "loss": 3.9887, "step": 1079000 }, { "epoch": 11.79, "learning_rate": 7.658302679978981e-06, "loss": 3.9792, "step": 1079500 }, { "epoch": 11.8, "learning_rate": 7.657207917323525e-06, "loss": 3.9796, "step": 1080000 }, { "epoch": 11.8, "eval_accuracy": 0.3788817867775505, "eval_loss": 3.7481019496917725, "eval_runtime": 367.3059, "eval_samples_per_second": 839.507, "eval_steps_per_second": 13.12, "step": 1080000 }, { "epoch": 11.8, "learning_rate": 7.656113154668069e-06, "loss": 3.9814, "step": 1080500 }, { "epoch": 11.81, "learning_rate": 7.655018392012613e-06, "loss": 3.9842, "step": 1081000 }, { "epoch": 11.81, "learning_rate": 7.653923629357156e-06, "loss": 3.9798, "step": 1081500 }, { "epoch": 11.82, "learning_rate": 7.6528288667017e-06, "loss": 3.9862, "step": 1082000 }, { "epoch": 11.82, "learning_rate": 7.651734104046244e-06, "loss": 3.9736, "step": 1082500 }, { "epoch": 11.83, "learning_rate": 7.650639341390787e-06, "loss": 3.9762, "step": 1083000 }, { "epoch": 11.84, "learning_rate": 7.649544578735331e-06, "loss": 3.9857, "step": 1083500 }, { "epoch": 11.84, "learning_rate": 7.648449816079875e-06, "loss": 3.9869, "step": 1084000 }, { "epoch": 11.85, "learning_rate": 7.647355053424417e-06, "loss": 3.9845, "step": 1084500 }, { "epoch": 11.85, "learning_rate": 7.646260290768962e-06, "loss": 3.9846, "step": 1085000 }, { "epoch": 11.86, "learning_rate": 7.645165528113505e-06, "loss": 3.985, "step": 1085500 }, { "epoch": 11.86, "learning_rate": 7.64407076545805e-06, "loss": 3.9791, "step": 1086000 }, { "epoch": 11.87, "learning_rate": 7.642976002802594e-06, "loss": 3.9791, "step": 1086500 }, { "epoch": 11.87, "learning_rate": 7.641881240147136e-06, "loss": 3.9801, "step": 1087000 }, { "epoch": 11.88, "learning_rate": 7.640786477491681e-06, "loss": 3.9826, "step": 1087500 }, { "epoch": 11.88, "learning_rate": 7.639691714836223e-06, "loss": 3.9805, "step": 1088000 }, { "epoch": 11.89, "learning_rate": 7.638596952180769e-06, "loss": 3.9866, "step": 1088500 }, { "epoch": 11.9, "learning_rate": 7.637502189525312e-06, "loss": 3.9777, "step": 1089000 }, { "epoch": 11.9, "learning_rate": 7.636407426869854e-06, "loss": 3.9786, "step": 1089500 }, { "epoch": 11.91, "learning_rate": 7.6353126642144e-06, "loss": 3.9853, "step": 1090000 }, { "epoch": 11.91, "learning_rate": 7.634217901558942e-06, "loss": 3.9856, "step": 1090500 }, { "epoch": 11.92, "learning_rate": 7.633123138903487e-06, "loss": 3.978, "step": 1091000 }, { "epoch": 11.92, "learning_rate": 7.63202837624803e-06, "loss": 3.9751, "step": 1091500 }, { "epoch": 11.93, "learning_rate": 7.630933613592573e-06, "loss": 3.9777, "step": 1092000 }, { "epoch": 11.93, "learning_rate": 7.629838850937119e-06, "loss": 3.9743, "step": 1092500 }, { "epoch": 11.94, "learning_rate": 7.628744088281661e-06, "loss": 3.981, "step": 1093000 }, { "epoch": 11.95, "learning_rate": 7.627649325626205e-06, "loss": 3.9822, "step": 1093500 }, { "epoch": 11.95, "learning_rate": 7.626554562970748e-06, "loss": 3.9781, "step": 1094000 }, { "epoch": 11.96, "learning_rate": 7.625459800315293e-06, "loss": 3.9777, "step": 1094500 }, { "epoch": 11.96, "learning_rate": 7.624365037659836e-06, "loss": 3.9824, "step": 1095000 }, { "epoch": 11.97, "learning_rate": 7.623270275004379e-06, "loss": 3.9773, "step": 1095500 }, { "epoch": 11.97, "learning_rate": 7.622175512348924e-06, "loss": 3.9761, "step": 1096000 }, { "epoch": 11.98, "learning_rate": 7.621080749693467e-06, "loss": 3.9748, "step": 1096500 }, { "epoch": 11.98, "learning_rate": 7.6199859870380114e-06, "loss": 3.9804, "step": 1097000 }, { "epoch": 11.99, "learning_rate": 7.618891224382554e-06, "loss": 3.9809, "step": 1097500 }, { "epoch": 11.99, "learning_rate": 7.617796461727098e-06, "loss": 3.9753, "step": 1098000 }, { "epoch": 12.0, "learning_rate": 7.616701699071641e-06, "loss": 3.9787, "step": 1098500 }, { "epoch": 12.01, "learning_rate": 7.6156069364161856e-06, "loss": 3.9715, "step": 1099000 }, { "epoch": 12.01, "learning_rate": 7.614512173760729e-06, "loss": 3.978, "step": 1099500 }, { "epoch": 12.02, "learning_rate": 7.613417411105273e-06, "loss": 3.9817, "step": 1100000 }, { "epoch": 12.02, "learning_rate": 7.612322648449817e-06, "loss": 3.9819, "step": 1100500 }, { "epoch": 12.03, "learning_rate": 7.61122788579436e-06, "loss": 3.9776, "step": 1101000 }, { "epoch": 12.03, "learning_rate": 7.610133123138904e-06, "loss": 3.9822, "step": 1101500 }, { "epoch": 12.04, "learning_rate": 7.609038360483448e-06, "loss": 3.9735, "step": 1102000 }, { "epoch": 12.04, "learning_rate": 7.607943597827992e-06, "loss": 3.9797, "step": 1102500 }, { "epoch": 12.05, "learning_rate": 7.6068488351725355e-06, "loss": 3.9783, "step": 1103000 }, { "epoch": 12.05, "learning_rate": 7.605754072517078e-06, "loss": 3.9811, "step": 1103500 }, { "epoch": 12.06, "learning_rate": 7.604659309861623e-06, "loss": 3.9754, "step": 1104000 }, { "epoch": 12.07, "learning_rate": 7.603564547206166e-06, "loss": 3.9782, "step": 1104500 }, { "epoch": 12.07, "learning_rate": 7.6024697845507105e-06, "loss": 3.9743, "step": 1105000 }, { "epoch": 12.08, "learning_rate": 7.601375021895254e-06, "loss": 3.973, "step": 1105500 }, { "epoch": 12.08, "learning_rate": 7.600280259239797e-06, "loss": 3.9744, "step": 1106000 }, { "epoch": 12.09, "learning_rate": 7.599185496584342e-06, "loss": 3.9776, "step": 1106500 }, { "epoch": 12.09, "learning_rate": 7.598090733928885e-06, "loss": 3.9781, "step": 1107000 }, { "epoch": 12.1, "learning_rate": 7.596995971273428e-06, "loss": 3.982, "step": 1107500 }, { "epoch": 12.1, "learning_rate": 7.595901208617972e-06, "loss": 3.9759, "step": 1108000 }, { "epoch": 12.11, "learning_rate": 7.594806445962516e-06, "loss": 3.9818, "step": 1108500 }, { "epoch": 12.11, "learning_rate": 7.59371168330706e-06, "loss": 3.9694, "step": 1109000 }, { "epoch": 12.12, "learning_rate": 7.592616920651603e-06, "loss": 3.9738, "step": 1109500 }, { "epoch": 12.13, "learning_rate": 7.591522157996147e-06, "loss": 3.9742, "step": 1110000 }, { "epoch": 12.13, "eval_accuracy": 0.37955094928693195, "eval_loss": 3.7414019107818604, "eval_runtime": 411.3271, "eval_samples_per_second": 749.661, "eval_steps_per_second": 11.716, "step": 1110000 }, { "epoch": 12.13, "learning_rate": 7.590427395340691e-06, "loss": 3.977, "step": 1110500 }, { "epoch": 12.14, "learning_rate": 7.5893326326852345e-06, "loss": 3.98, "step": 1111000 }, { "epoch": 12.14, "learning_rate": 7.5882378700297774e-06, "loss": 3.9745, "step": 1111500 }, { "epoch": 12.15, "learning_rate": 7.587143107374322e-06, "loss": 3.9769, "step": 1112000 }, { "epoch": 12.15, "learning_rate": 7.586048344718866e-06, "loss": 3.9836, "step": 1112500 }, { "epoch": 12.16, "learning_rate": 7.584953582063409e-06, "loss": 3.978, "step": 1113000 }, { "epoch": 12.16, "learning_rate": 7.583858819407953e-06, "loss": 3.9819, "step": 1113500 }, { "epoch": 12.17, "learning_rate": 7.582764056752496e-06, "loss": 3.9723, "step": 1114000 }, { "epoch": 12.17, "learning_rate": 7.581669294097041e-06, "loss": 3.9739, "step": 1114500 }, { "epoch": 12.18, "learning_rate": 7.5805745314415845e-06, "loss": 3.9724, "step": 1115000 }, { "epoch": 12.19, "learning_rate": 7.579479768786127e-06, "loss": 3.9754, "step": 1115500 }, { "epoch": 12.19, "learning_rate": 7.578385006130672e-06, "loss": 3.982, "step": 1116000 }, { "epoch": 12.2, "learning_rate": 7.577290243475215e-06, "loss": 3.9782, "step": 1116500 }, { "epoch": 12.2, "learning_rate": 7.5761954808197594e-06, "loss": 3.9648, "step": 1117000 }, { "epoch": 12.21, "learning_rate": 7.575100718164302e-06, "loss": 3.9748, "step": 1117500 }, { "epoch": 12.21, "learning_rate": 7.574005955508846e-06, "loss": 3.9773, "step": 1118000 }, { "epoch": 12.22, "learning_rate": 7.572911192853391e-06, "loss": 3.9717, "step": 1118500 }, { "epoch": 12.22, "learning_rate": 7.5718164301979336e-06, "loss": 3.9686, "step": 1119000 }, { "epoch": 12.23, "learning_rate": 7.570721667542477e-06, "loss": 3.9772, "step": 1119500 }, { "epoch": 12.23, "learning_rate": 7.569626904887021e-06, "loss": 3.9694, "step": 1120000 }, { "epoch": 12.24, "learning_rate": 7.568532142231565e-06, "loss": 3.9706, "step": 1120500 }, { "epoch": 12.25, "learning_rate": 7.567437379576108e-06, "loss": 3.9753, "step": 1121000 }, { "epoch": 12.25, "learning_rate": 7.566342616920652e-06, "loss": 3.9718, "step": 1121500 }, { "epoch": 12.26, "learning_rate": 7.565247854265196e-06, "loss": 3.9759, "step": 1122000 }, { "epoch": 12.26, "learning_rate": 7.56415309160974e-06, "loss": 3.9762, "step": 1122500 }, { "epoch": 12.27, "learning_rate": 7.5630583289542835e-06, "loss": 3.9711, "step": 1123000 }, { "epoch": 12.27, "learning_rate": 7.561963566298826e-06, "loss": 3.9753, "step": 1123500 }, { "epoch": 12.28, "learning_rate": 7.560868803643371e-06, "loss": 3.9737, "step": 1124000 }, { "epoch": 12.28, "learning_rate": 7.559774040987914e-06, "loss": 3.9759, "step": 1124500 }, { "epoch": 12.29, "learning_rate": 7.5586792783324585e-06, "loss": 3.9723, "step": 1125000 }, { "epoch": 12.29, "learning_rate": 7.557584515677002e-06, "loss": 3.9719, "step": 1125500 }, { "epoch": 12.3, "learning_rate": 7.556489753021545e-06, "loss": 3.9698, "step": 1126000 }, { "epoch": 12.31, "learning_rate": 7.55539499036609e-06, "loss": 3.9655, "step": 1126500 }, { "epoch": 12.31, "learning_rate": 7.554300227710633e-06, "loss": 3.9814, "step": 1127000 }, { "epoch": 12.32, "learning_rate": 7.553205465055176e-06, "loss": 3.9767, "step": 1127500 }, { "epoch": 12.32, "learning_rate": 7.552110702399721e-06, "loss": 3.9752, "step": 1128000 }, { "epoch": 12.33, "learning_rate": 7.551015939744264e-06, "loss": 3.9715, "step": 1128500 }, { "epoch": 12.33, "learning_rate": 7.549921177088808e-06, "loss": 3.9666, "step": 1129000 }, { "epoch": 12.34, "learning_rate": 7.548826414433351e-06, "loss": 3.9746, "step": 1129500 }, { "epoch": 12.34, "learning_rate": 7.547731651777895e-06, "loss": 3.9696, "step": 1130000 }, { "epoch": 12.35, "learning_rate": 7.546636889122439e-06, "loss": 3.9695, "step": 1130500 }, { "epoch": 12.35, "learning_rate": 7.5455421264669825e-06, "loss": 3.9708, "step": 1131000 }, { "epoch": 12.36, "learning_rate": 7.544447363811527e-06, "loss": 3.9711, "step": 1131500 }, { "epoch": 12.37, "learning_rate": 7.54335260115607e-06, "loss": 3.9654, "step": 1132000 }, { "epoch": 12.37, "learning_rate": 7.542257838500614e-06, "loss": 3.9676, "step": 1132500 }, { "epoch": 12.38, "learning_rate": 7.541163075845157e-06, "loss": 3.9668, "step": 1133000 }, { "epoch": 12.38, "learning_rate": 7.540068313189701e-06, "loss": 3.9698, "step": 1133500 }, { "epoch": 12.39, "learning_rate": 7.538973550534244e-06, "loss": 3.9747, "step": 1134000 }, { "epoch": 12.39, "learning_rate": 7.537878787878789e-06, "loss": 3.9671, "step": 1134500 }, { "epoch": 12.4, "learning_rate": 7.5367840252233325e-06, "loss": 3.9778, "step": 1135000 }, { "epoch": 12.4, "learning_rate": 7.535689262567875e-06, "loss": 3.9658, "step": 1135500 }, { "epoch": 12.41, "learning_rate": 7.53459449991242e-06, "loss": 3.9754, "step": 1136000 }, { "epoch": 12.41, "learning_rate": 7.533499737256963e-06, "loss": 3.9751, "step": 1136500 }, { "epoch": 12.42, "learning_rate": 7.5324049746015074e-06, "loss": 3.9713, "step": 1137000 }, { "epoch": 12.43, "learning_rate": 7.53131021194605e-06, "loss": 3.9745, "step": 1137500 }, { "epoch": 12.43, "learning_rate": 7.530215449290594e-06, "loss": 3.9679, "step": 1138000 }, { "epoch": 12.44, "learning_rate": 7.529120686635139e-06, "loss": 3.9747, "step": 1138500 }, { "epoch": 12.44, "learning_rate": 7.5280259239796816e-06, "loss": 3.968, "step": 1139000 }, { "epoch": 12.45, "learning_rate": 7.526931161324226e-06, "loss": 3.9695, "step": 1139500 }, { "epoch": 12.45, "learning_rate": 7.525836398668769e-06, "loss": 3.9667, "step": 1140000 }, { "epoch": 12.45, "eval_accuracy": 0.38018148053890716, "eval_loss": 3.736989974975586, "eval_runtime": 415.6575, "eval_samples_per_second": 741.851, "eval_steps_per_second": 11.594, "step": 1140000 }, { "epoch": 12.46, "learning_rate": 7.524741636013313e-06, "loss": 3.968, "step": 1140500 }, { "epoch": 12.46, "learning_rate": 7.523646873357857e-06, "loss": 3.9702, "step": 1141000 }, { "epoch": 12.47, "learning_rate": 7.5225521107024e-06, "loss": 3.9703, "step": 1141500 }, { "epoch": 12.47, "learning_rate": 7.521457348046944e-06, "loss": 3.9671, "step": 1142000 }, { "epoch": 12.48, "learning_rate": 7.520362585391488e-06, "loss": 3.9744, "step": 1142500 }, { "epoch": 12.49, "learning_rate": 7.5192678227360315e-06, "loss": 3.9659, "step": 1143000 }, { "epoch": 12.49, "learning_rate": 7.518173060080574e-06, "loss": 3.9701, "step": 1143500 }, { "epoch": 12.5, "learning_rate": 7.517078297425119e-06, "loss": 3.9764, "step": 1144000 }, { "epoch": 12.5, "learning_rate": 7.515983534769663e-06, "loss": 3.9709, "step": 1144500 }, { "epoch": 12.51, "learning_rate": 7.5148887721142065e-06, "loss": 3.9705, "step": 1145000 }, { "epoch": 12.51, "learning_rate": 7.51379400945875e-06, "loss": 3.9718, "step": 1145500 }, { "epoch": 12.52, "learning_rate": 7.512699246803293e-06, "loss": 3.9758, "step": 1146000 }, { "epoch": 12.52, "learning_rate": 7.511604484147838e-06, "loss": 3.9747, "step": 1146500 }, { "epoch": 12.53, "learning_rate": 7.510509721492381e-06, "loss": 3.9685, "step": 1147000 }, { "epoch": 12.53, "learning_rate": 7.509414958836924e-06, "loss": 3.9655, "step": 1147500 }, { "epoch": 12.54, "learning_rate": 7.508320196181469e-06, "loss": 3.9658, "step": 1148000 }, { "epoch": 12.55, "learning_rate": 7.507225433526012e-06, "loss": 3.9722, "step": 1148500 }, { "epoch": 12.55, "learning_rate": 7.506130670870556e-06, "loss": 3.9676, "step": 1149000 }, { "epoch": 12.56, "learning_rate": 7.505035908215099e-06, "loss": 3.9688, "step": 1149500 }, { "epoch": 12.56, "learning_rate": 7.503941145559643e-06, "loss": 3.963, "step": 1150000 }, { "epoch": 12.57, "learning_rate": 7.502846382904187e-06, "loss": 3.9672, "step": 1150500 }, { "epoch": 12.57, "learning_rate": 7.5017516202487305e-06, "loss": 3.9721, "step": 1151000 }, { "epoch": 12.58, "learning_rate": 7.500656857593275e-06, "loss": 3.9725, "step": 1151500 }, { "epoch": 12.58, "learning_rate": 7.499562094937818e-06, "loss": 3.9637, "step": 1152000 }, { "epoch": 12.59, "learning_rate": 7.498467332282362e-06, "loss": 3.9694, "step": 1152500 }, { "epoch": 12.6, "learning_rate": 7.497372569626905e-06, "loss": 3.9661, "step": 1153000 }, { "epoch": 12.6, "learning_rate": 7.496277806971449e-06, "loss": 3.9672, "step": 1153500 }, { "epoch": 12.61, "learning_rate": 7.495183044315993e-06, "loss": 3.975, "step": 1154000 }, { "epoch": 12.61, "learning_rate": 7.494088281660537e-06, "loss": 3.9641, "step": 1154500 }, { "epoch": 12.62, "learning_rate": 7.4929935190050805e-06, "loss": 3.9676, "step": 1155000 }, { "epoch": 12.62, "learning_rate": 7.491898756349623e-06, "loss": 3.9665, "step": 1155500 }, { "epoch": 12.63, "learning_rate": 7.490803993694168e-06, "loss": 3.9692, "step": 1156000 }, { "epoch": 12.63, "learning_rate": 7.489709231038711e-06, "loss": 3.9662, "step": 1156500 }, { "epoch": 12.64, "learning_rate": 7.4886144683832555e-06, "loss": 3.9672, "step": 1157000 }, { "epoch": 12.64, "learning_rate": 7.487519705727799e-06, "loss": 3.9641, "step": 1157500 }, { "epoch": 12.65, "learning_rate": 7.486424943072342e-06, "loss": 3.9611, "step": 1158000 }, { "epoch": 12.66, "learning_rate": 7.485330180416887e-06, "loss": 3.9586, "step": 1158500 }, { "epoch": 12.66, "learning_rate": 7.4842354177614296e-06, "loss": 3.9667, "step": 1159000 }, { "epoch": 12.67, "learning_rate": 7.483140655105974e-06, "loss": 3.9604, "step": 1159500 }, { "epoch": 12.67, "learning_rate": 7.482045892450517e-06, "loss": 3.9721, "step": 1160000 }, { "epoch": 12.68, "learning_rate": 7.480951129795061e-06, "loss": 3.97, "step": 1160500 }, { "epoch": 12.68, "learning_rate": 7.479856367139605e-06, "loss": 3.975, "step": 1161000 }, { "epoch": 12.69, "learning_rate": 7.478761604484148e-06, "loss": 3.9581, "step": 1161500 }, { "epoch": 12.69, "learning_rate": 7.477666841828692e-06, "loss": 3.9686, "step": 1162000 }, { "epoch": 12.7, "learning_rate": 7.476572079173236e-06, "loss": 3.9634, "step": 1162500 }, { "epoch": 12.7, "learning_rate": 7.4754773165177795e-06, "loss": 3.9682, "step": 1163000 }, { "epoch": 12.71, "learning_rate": 7.474382553862324e-06, "loss": 3.9643, "step": 1163500 }, { "epoch": 12.72, "learning_rate": 7.473287791206867e-06, "loss": 3.9693, "step": 1164000 }, { "epoch": 12.72, "learning_rate": 7.472193028551411e-06, "loss": 3.9666, "step": 1164500 }, { "epoch": 12.73, "learning_rate": 7.4710982658959545e-06, "loss": 3.9622, "step": 1165000 }, { "epoch": 12.73, "learning_rate": 7.470003503240498e-06, "loss": 3.969, "step": 1165500 }, { "epoch": 12.74, "learning_rate": 7.468908740585041e-06, "loss": 3.9638, "step": 1166000 }, { "epoch": 12.74, "learning_rate": 7.467813977929586e-06, "loss": 3.9663, "step": 1166500 }, { "epoch": 12.75, "learning_rate": 7.4667192152741295e-06, "loss": 3.9671, "step": 1167000 }, { "epoch": 12.75, "learning_rate": 7.465624452618672e-06, "loss": 3.961, "step": 1167500 }, { "epoch": 12.76, "learning_rate": 7.464529689963217e-06, "loss": 3.9751, "step": 1168000 }, { "epoch": 12.76, "learning_rate": 7.46343492730776e-06, "loss": 3.9634, "step": 1168500 }, { "epoch": 12.77, "learning_rate": 7.462340164652304e-06, "loss": 3.9679, "step": 1169000 }, { "epoch": 12.78, "learning_rate": 7.461245401996847e-06, "loss": 3.9638, "step": 1169500 }, { "epoch": 12.78, "learning_rate": 7.460150639341391e-06, "loss": 3.9652, "step": 1170000 }, { "epoch": 12.78, "eval_accuracy": 0.3810010203146202, "eval_loss": 3.7289037704467773, "eval_runtime": 355.255, "eval_samples_per_second": 867.985, "eval_steps_per_second": 13.565, "step": 1170000 }, { "epoch": 12.79, "learning_rate": 7.459055876685936e-06, "loss": 3.9645, "step": 1170500 }, { "epoch": 12.79, "learning_rate": 7.4579611140304785e-06, "loss": 3.96, "step": 1171000 }, { "epoch": 12.8, "learning_rate": 7.456866351375023e-06, "loss": 3.9616, "step": 1171500 }, { "epoch": 12.8, "learning_rate": 7.455771588719566e-06, "loss": 3.9586, "step": 1172000 }, { "epoch": 12.81, "learning_rate": 7.45467682606411e-06, "loss": 3.9671, "step": 1172500 }, { "epoch": 12.81, "learning_rate": 7.453582063408653e-06, "loss": 3.9648, "step": 1173000 }, { "epoch": 12.82, "learning_rate": 7.452487300753197e-06, "loss": 3.9659, "step": 1173500 }, { "epoch": 12.82, "learning_rate": 7.451392538097742e-06, "loss": 3.9619, "step": 1174000 }, { "epoch": 12.83, "learning_rate": 7.450297775442285e-06, "loss": 3.96, "step": 1174500 }, { "epoch": 12.84, "learning_rate": 7.4492030127868285e-06, "loss": 3.9652, "step": 1175000 }, { "epoch": 12.84, "learning_rate": 7.448108250131371e-06, "loss": 3.9621, "step": 1175500 }, { "epoch": 12.85, "learning_rate": 7.447013487475916e-06, "loss": 3.9688, "step": 1176000 }, { "epoch": 12.85, "learning_rate": 7.44591872482046e-06, "loss": 3.963, "step": 1176500 }, { "epoch": 12.86, "learning_rate": 7.4448239621650035e-06, "loss": 3.9655, "step": 1177000 }, { "epoch": 12.86, "learning_rate": 7.443729199509547e-06, "loss": 3.9691, "step": 1177500 }, { "epoch": 12.87, "learning_rate": 7.44263443685409e-06, "loss": 3.9606, "step": 1178000 }, { "epoch": 12.87, "learning_rate": 7.441539674198635e-06, "loss": 3.9634, "step": 1178500 }, { "epoch": 12.88, "learning_rate": 7.440444911543178e-06, "loss": 3.9612, "step": 1179000 }, { "epoch": 12.88, "learning_rate": 7.439350148887722e-06, "loss": 3.9563, "step": 1179500 }, { "epoch": 12.89, "learning_rate": 7.438255386232266e-06, "loss": 3.9581, "step": 1180000 }, { "epoch": 12.9, "learning_rate": 7.437160623576809e-06, "loss": 3.9596, "step": 1180500 }, { "epoch": 12.9, "learning_rate": 7.436065860921353e-06, "loss": 3.963, "step": 1181000 }, { "epoch": 12.91, "learning_rate": 7.434971098265896e-06, "loss": 3.9604, "step": 1181500 }, { "epoch": 12.91, "learning_rate": 7.43387633561044e-06, "loss": 3.9597, "step": 1182000 }, { "epoch": 12.92, "learning_rate": 7.432781572954984e-06, "loss": 3.9566, "step": 1182500 }, { "epoch": 12.92, "learning_rate": 7.4316868102995275e-06, "loss": 3.9614, "step": 1183000 }, { "epoch": 12.93, "learning_rate": 7.430592047644072e-06, "loss": 3.9616, "step": 1183500 }, { "epoch": 12.93, "learning_rate": 7.429497284988615e-06, "loss": 3.9636, "step": 1184000 }, { "epoch": 12.94, "learning_rate": 7.428402522333159e-06, "loss": 3.9597, "step": 1184500 }, { "epoch": 12.94, "learning_rate": 7.4273077596777025e-06, "loss": 3.9612, "step": 1185000 }, { "epoch": 12.95, "learning_rate": 7.426212997022246e-06, "loss": 3.9623, "step": 1185500 }, { "epoch": 12.96, "learning_rate": 7.425118234366789e-06, "loss": 3.9625, "step": 1186000 }, { "epoch": 12.96, "learning_rate": 7.424023471711334e-06, "loss": 3.9614, "step": 1186500 }, { "epoch": 12.97, "learning_rate": 7.4229287090558775e-06, "loss": 3.9677, "step": 1187000 }, { "epoch": 12.97, "learning_rate": 7.42183394640042e-06, "loss": 3.9616, "step": 1187500 }, { "epoch": 12.98, "learning_rate": 7.420739183744965e-06, "loss": 3.957, "step": 1188000 }, { "epoch": 12.98, "learning_rate": 7.419644421089508e-06, "loss": 3.9603, "step": 1188500 }, { "epoch": 12.99, "learning_rate": 7.4185496584340524e-06, "loss": 3.9537, "step": 1189000 }, { "epoch": 12.99, "learning_rate": 7.417454895778596e-06, "loss": 3.9566, "step": 1189500 }, { "epoch": 13.0, "learning_rate": 7.416360133123139e-06, "loss": 3.9581, "step": 1190000 }, { "epoch": 13.0, "learning_rate": 7.415265370467684e-06, "loss": 3.9655, "step": 1190500 }, { "epoch": 13.01, "learning_rate": 7.4141706078122266e-06, "loss": 3.9669, "step": 1191000 }, { "epoch": 13.02, "learning_rate": 7.413075845156771e-06, "loss": 3.9613, "step": 1191500 }, { "epoch": 13.02, "learning_rate": 7.411981082501314e-06, "loss": 3.9548, "step": 1192000 }, { "epoch": 13.03, "learning_rate": 7.410886319845858e-06, "loss": 3.9598, "step": 1192500 }, { "epoch": 13.03, "learning_rate": 7.409791557190402e-06, "loss": 3.9593, "step": 1193000 }, { "epoch": 13.04, "learning_rate": 7.408696794534945e-06, "loss": 3.9632, "step": 1193500 }, { "epoch": 13.04, "learning_rate": 7.40760203187949e-06, "loss": 3.9638, "step": 1194000 }, { "epoch": 13.05, "learning_rate": 7.406507269224033e-06, "loss": 3.9572, "step": 1194500 }, { "epoch": 13.05, "learning_rate": 7.4054125065685765e-06, "loss": 3.9599, "step": 1195000 }, { "epoch": 13.06, "learning_rate": 7.404317743913119e-06, "loss": 3.9598, "step": 1195500 }, { "epoch": 13.06, "learning_rate": 7.403222981257664e-06, "loss": 3.9551, "step": 1196000 }, { "epoch": 13.07, "learning_rate": 7.402128218602208e-06, "loss": 3.9608, "step": 1196500 }, { "epoch": 13.08, "learning_rate": 7.4010334559467515e-06, "loss": 3.96, "step": 1197000 }, { "epoch": 13.08, "learning_rate": 7.399938693291295e-06, "loss": 3.9602, "step": 1197500 }, { "epoch": 13.09, "learning_rate": 7.398843930635838e-06, "loss": 3.9586, "step": 1198000 }, { "epoch": 13.09, "learning_rate": 7.397749167980383e-06, "loss": 3.9606, "step": 1198500 }, { "epoch": 13.1, "learning_rate": 7.396654405324926e-06, "loss": 3.9651, "step": 1199000 }, { "epoch": 13.1, "learning_rate": 7.39555964266947e-06, "loss": 3.9617, "step": 1199500 }, { "epoch": 13.11, "learning_rate": 7.394464880014014e-06, "loss": 3.9548, "step": 1200000 }, { "epoch": 13.11, "eval_accuracy": 0.3812385792091638, "eval_loss": 3.7277634143829346, "eval_runtime": 356.3908, "eval_samples_per_second": 865.219, "eval_steps_per_second": 13.522, "step": 1200000 }, { "epoch": 13.11, "learning_rate": 7.393370117358557e-06, "loss": 3.9629, "step": 1200500 }, { "epoch": 13.12, "learning_rate": 7.392275354703101e-06, "loss": 3.9596, "step": 1201000 }, { "epoch": 13.12, "learning_rate": 7.391180592047644e-06, "loss": 3.952, "step": 1201500 }, { "epoch": 13.13, "learning_rate": 7.390085829392188e-06, "loss": 3.9591, "step": 1202000 }, { "epoch": 13.14, "learning_rate": 7.388991066736733e-06, "loss": 3.9577, "step": 1202500 }, { "epoch": 13.14, "learning_rate": 7.3878963040812755e-06, "loss": 3.9567, "step": 1203000 }, { "epoch": 13.15, "learning_rate": 7.38680154142582e-06, "loss": 3.9607, "step": 1203500 }, { "epoch": 13.15, "learning_rate": 7.385706778770363e-06, "loss": 3.958, "step": 1204000 }, { "epoch": 13.16, "learning_rate": 7.384612016114907e-06, "loss": 3.95, "step": 1204500 }, { "epoch": 13.16, "learning_rate": 7.3835172534594505e-06, "loss": 3.9573, "step": 1205000 }, { "epoch": 13.17, "learning_rate": 7.382422490803994e-06, "loss": 3.962, "step": 1205500 }, { "epoch": 13.17, "learning_rate": 7.381327728148539e-06, "loss": 3.9607, "step": 1206000 }, { "epoch": 13.18, "learning_rate": 7.380232965493082e-06, "loss": 3.9563, "step": 1206500 }, { "epoch": 13.18, "learning_rate": 7.3791382028376255e-06, "loss": 3.9486, "step": 1207000 }, { "epoch": 13.19, "learning_rate": 7.378043440182168e-06, "loss": 3.9618, "step": 1207500 }, { "epoch": 13.2, "learning_rate": 7.376948677526713e-06, "loss": 3.9644, "step": 1208000 }, { "epoch": 13.2, "learning_rate": 7.375853914871256e-06, "loss": 3.9584, "step": 1208500 }, { "epoch": 13.21, "learning_rate": 7.3747591522158004e-06, "loss": 3.957, "step": 1209000 }, { "epoch": 13.21, "learning_rate": 7.373664389560344e-06, "loss": 3.9549, "step": 1209500 }, { "epoch": 13.22, "learning_rate": 7.372569626904887e-06, "loss": 3.9647, "step": 1210000 }, { "epoch": 13.22, "learning_rate": 7.371474864249432e-06, "loss": 3.9613, "step": 1210500 }, { "epoch": 13.23, "learning_rate": 7.3703801015939746e-06, "loss": 3.9564, "step": 1211000 }, { "epoch": 13.23, "learning_rate": 7.369285338938519e-06, "loss": 3.9509, "step": 1211500 }, { "epoch": 13.24, "learning_rate": 7.368190576283062e-06, "loss": 3.9604, "step": 1212000 }, { "epoch": 13.24, "learning_rate": 7.367095813627606e-06, "loss": 3.9545, "step": 1212500 }, { "epoch": 13.25, "learning_rate": 7.36600105097215e-06, "loss": 3.9611, "step": 1213000 }, { "epoch": 13.26, "learning_rate": 7.364906288316693e-06, "loss": 3.96, "step": 1213500 }, { "epoch": 13.26, "learning_rate": 7.363811525661238e-06, "loss": 3.9544, "step": 1214000 }, { "epoch": 13.27, "learning_rate": 7.362716763005781e-06, "loss": 3.9545, "step": 1214500 }, { "epoch": 13.27, "learning_rate": 7.3616220003503245e-06, "loss": 3.9631, "step": 1215000 }, { "epoch": 13.28, "learning_rate": 7.360527237694869e-06, "loss": 3.9562, "step": 1215500 }, { "epoch": 13.28, "learning_rate": 7.359432475039412e-06, "loss": 3.958, "step": 1216000 }, { "epoch": 13.29, "learning_rate": 7.358337712383956e-06, "loss": 3.9627, "step": 1216500 }, { "epoch": 13.29, "learning_rate": 7.3572429497284995e-06, "loss": 3.9589, "step": 1217000 }, { "epoch": 13.3, "learning_rate": 7.356148187073043e-06, "loss": 3.9567, "step": 1217500 }, { "epoch": 13.31, "learning_rate": 7.355053424417586e-06, "loss": 3.9542, "step": 1218000 }, { "epoch": 13.31, "learning_rate": 7.353958661762131e-06, "loss": 3.9567, "step": 1218500 }, { "epoch": 13.32, "learning_rate": 7.3528638991066744e-06, "loss": 3.9589, "step": 1219000 }, { "epoch": 13.32, "learning_rate": 7.351769136451218e-06, "loss": 3.9507, "step": 1219500 }, { "epoch": 13.33, "learning_rate": 7.350674373795762e-06, "loss": 3.9573, "step": 1220000 }, { "epoch": 13.33, "learning_rate": 7.349579611140305e-06, "loss": 3.9642, "step": 1220500 }, { "epoch": 13.34, "learning_rate": 7.348484848484849e-06, "loss": 3.954, "step": 1221000 }, { "epoch": 13.34, "learning_rate": 7.347390085829392e-06, "loss": 3.9544, "step": 1221500 }, { "epoch": 13.35, "learning_rate": 7.346295323173936e-06, "loss": 3.9493, "step": 1222000 }, { "epoch": 13.35, "learning_rate": 7.345200560518481e-06, "loss": 3.9516, "step": 1222500 }, { "epoch": 13.36, "learning_rate": 7.3441057978630235e-06, "loss": 3.9567, "step": 1223000 }, { "epoch": 13.37, "learning_rate": 7.343011035207568e-06, "loss": 3.9559, "step": 1223500 }, { "epoch": 13.37, "learning_rate": 7.341916272552111e-06, "loss": 3.9518, "step": 1224000 }, { "epoch": 13.38, "learning_rate": 7.340821509896655e-06, "loss": 3.9583, "step": 1224500 }, { "epoch": 13.38, "learning_rate": 7.3397267472411985e-06, "loss": 3.9598, "step": 1225000 }, { "epoch": 13.39, "learning_rate": 7.338631984585742e-06, "loss": 3.9533, "step": 1225500 }, { "epoch": 13.39, "learning_rate": 7.337537221930287e-06, "loss": 3.9541, "step": 1226000 }, { "epoch": 13.4, "learning_rate": 7.33644245927483e-06, "loss": 3.9515, "step": 1226500 }, { "epoch": 13.4, "learning_rate": 7.3353476966193735e-06, "loss": 3.9538, "step": 1227000 }, { "epoch": 13.41, "learning_rate": 7.334252933963917e-06, "loss": 3.9587, "step": 1227500 }, { "epoch": 13.41, "learning_rate": 7.333158171308461e-06, "loss": 3.9623, "step": 1228000 }, { "epoch": 13.42, "learning_rate": 7.3320634086530055e-06, "loss": 3.9472, "step": 1228500 }, { "epoch": 13.43, "learning_rate": 7.3309686459975484e-06, "loss": 3.9492, "step": 1229000 }, { "epoch": 13.43, "learning_rate": 7.329873883342092e-06, "loss": 3.9505, "step": 1229500 }, { "epoch": 13.44, "learning_rate": 7.328779120686635e-06, "loss": 3.9556, "step": 1230000 }, { "epoch": 13.44, "eval_accuracy": 0.38170547350072387, "eval_loss": 3.7213144302368164, "eval_runtime": 358.0484, "eval_samples_per_second": 861.213, "eval_steps_per_second": 13.459, "step": 1230000 }, { "epoch": 13.44, "learning_rate": 7.32768435803118e-06, "loss": 3.9587, "step": 1230500 }, { "epoch": 13.45, "learning_rate": 7.3265895953757226e-06, "loss": 3.9591, "step": 1231000 }, { "epoch": 13.45, "learning_rate": 7.325494832720267e-06, "loss": 3.9598, "step": 1231500 }, { "epoch": 13.46, "learning_rate": 7.324400070064811e-06, "loss": 3.9579, "step": 1232000 }, { "epoch": 13.46, "learning_rate": 7.323305307409354e-06, "loss": 3.9577, "step": 1232500 }, { "epoch": 13.47, "learning_rate": 7.322210544753898e-06, "loss": 3.9488, "step": 1233000 }, { "epoch": 13.47, "learning_rate": 7.321115782098441e-06, "loss": 3.9509, "step": 1233500 }, { "epoch": 13.48, "learning_rate": 7.320021019442986e-06, "loss": 3.9543, "step": 1234000 }, { "epoch": 13.49, "learning_rate": 7.318926256787529e-06, "loss": 3.9563, "step": 1234500 }, { "epoch": 13.49, "learning_rate": 7.3178314941320725e-06, "loss": 3.9521, "step": 1235000 }, { "epoch": 13.5, "learning_rate": 7.316736731476617e-06, "loss": 3.9533, "step": 1235500 }, { "epoch": 13.5, "learning_rate": 7.31564196882116e-06, "loss": 3.9509, "step": 1236000 }, { "epoch": 13.51, "learning_rate": 7.314547206165704e-06, "loss": 3.9484, "step": 1236500 }, { "epoch": 13.51, "learning_rate": 7.3134524435102475e-06, "loss": 3.9556, "step": 1237000 }, { "epoch": 13.52, "learning_rate": 7.312357680854791e-06, "loss": 3.9556, "step": 1237500 }, { "epoch": 13.52, "learning_rate": 7.311262918199336e-06, "loss": 3.9534, "step": 1238000 }, { "epoch": 13.53, "learning_rate": 7.310168155543879e-06, "loss": 3.9531, "step": 1238500 }, { "epoch": 13.53, "learning_rate": 7.3090733928884224e-06, "loss": 3.9485, "step": 1239000 }, { "epoch": 13.54, "learning_rate": 7.307978630232966e-06, "loss": 3.9453, "step": 1239500 }, { "epoch": 13.55, "learning_rate": 7.30688386757751e-06, "loss": 3.9572, "step": 1240000 }, { "epoch": 13.55, "learning_rate": 7.305789104922053e-06, "loss": 3.9518, "step": 1240500 }, { "epoch": 13.56, "learning_rate": 7.304694342266597e-06, "loss": 3.952, "step": 1241000 }, { "epoch": 13.56, "learning_rate": 7.303599579611141e-06, "loss": 3.9424, "step": 1241500 }, { "epoch": 13.57, "learning_rate": 7.302504816955684e-06, "loss": 3.9535, "step": 1242000 }, { "epoch": 13.57, "learning_rate": 7.301410054300229e-06, "loss": 3.9522, "step": 1242500 }, { "epoch": 13.58, "learning_rate": 7.3003152916447715e-06, "loss": 3.9539, "step": 1243000 }, { "epoch": 13.58, "learning_rate": 7.299220528989316e-06, "loss": 3.9498, "step": 1243500 }, { "epoch": 13.59, "learning_rate": 7.298125766333859e-06, "loss": 3.9573, "step": 1244000 }, { "epoch": 13.59, "learning_rate": 7.297031003678403e-06, "loss": 3.9522, "step": 1244500 }, { "epoch": 13.6, "learning_rate": 7.295936241022947e-06, "loss": 3.9622, "step": 1245000 }, { "epoch": 13.61, "learning_rate": 7.29484147836749e-06, "loss": 3.9463, "step": 1245500 }, { "epoch": 13.61, "learning_rate": 7.293746715712035e-06, "loss": 3.9534, "step": 1246000 }, { "epoch": 13.62, "learning_rate": 7.292651953056578e-06, "loss": 3.9422, "step": 1246500 }, { "epoch": 13.62, "learning_rate": 7.2915571904011215e-06, "loss": 3.9573, "step": 1247000 }, { "epoch": 13.63, "learning_rate": 7.290462427745665e-06, "loss": 3.9513, "step": 1247500 }, { "epoch": 13.63, "learning_rate": 7.289367665090209e-06, "loss": 3.9417, "step": 1248000 }, { "epoch": 13.64, "learning_rate": 7.2882729024347535e-06, "loss": 3.9519, "step": 1248500 }, { "epoch": 13.64, "learning_rate": 7.2871781397792964e-06, "loss": 3.9529, "step": 1249000 }, { "epoch": 13.65, "learning_rate": 7.28608337712384e-06, "loss": 3.9622, "step": 1249500 }, { "epoch": 13.65, "learning_rate": 7.284988614468383e-06, "loss": 3.9554, "step": 1250000 }, { "epoch": 13.66, "learning_rate": 7.283893851812928e-06, "loss": 3.9562, "step": 1250500 }, { "epoch": 13.67, "learning_rate": 7.282799089157471e-06, "loss": 3.9514, "step": 1251000 }, { "epoch": 13.67, "learning_rate": 7.281704326502015e-06, "loss": 3.9462, "step": 1251500 }, { "epoch": 13.68, "learning_rate": 7.280609563846559e-06, "loss": 3.9551, "step": 1252000 }, { "epoch": 13.68, "learning_rate": 7.279514801191102e-06, "loss": 3.9532, "step": 1252500 }, { "epoch": 13.69, "learning_rate": 7.278420038535646e-06, "loss": 3.9549, "step": 1253000 }, { "epoch": 13.69, "learning_rate": 7.277325275880189e-06, "loss": 3.9459, "step": 1253500 }, { "epoch": 13.7, "learning_rate": 7.276230513224734e-06, "loss": 3.9483, "step": 1254000 }, { "epoch": 13.7, "learning_rate": 7.275135750569278e-06, "loss": 3.9495, "step": 1254500 }, { "epoch": 13.71, "learning_rate": 7.2740409879138205e-06, "loss": 3.9538, "step": 1255000 }, { "epoch": 13.71, "learning_rate": 7.272946225258365e-06, "loss": 3.9454, "step": 1255500 }, { "epoch": 13.72, "learning_rate": 7.271851462602908e-06, "loss": 3.9574, "step": 1256000 }, { "epoch": 13.73, "learning_rate": 7.270756699947452e-06, "loss": 3.9534, "step": 1256500 }, { "epoch": 13.73, "learning_rate": 7.2696619372919955e-06, "loss": 3.956, "step": 1257000 }, { "epoch": 13.74, "learning_rate": 7.268567174636539e-06, "loss": 3.9491, "step": 1257500 }, { "epoch": 13.74, "learning_rate": 7.267472411981084e-06, "loss": 3.9444, "step": 1258000 }, { "epoch": 13.75, "learning_rate": 7.266377649325627e-06, "loss": 3.9488, "step": 1258500 }, { "epoch": 13.75, "learning_rate": 7.2652828866701704e-06, "loss": 3.9497, "step": 1259000 }, { "epoch": 13.76, "learning_rate": 7.264188124014714e-06, "loss": 3.9502, "step": 1259500 }, { "epoch": 13.76, "learning_rate": 7.263093361359258e-06, "loss": 3.9444, "step": 1260000 }, { "epoch": 13.76, "eval_accuracy": 0.3824648488047284, "eval_loss": 3.715151071548462, "eval_runtime": 369.8875, "eval_samples_per_second": 833.648, "eval_steps_per_second": 13.028, "step": 1260000 }, { "epoch": 13.77, "learning_rate": 7.261998598703801e-06, "loss": 3.9524, "step": 1260500 }, { "epoch": 13.77, "learning_rate": 7.260903836048345e-06, "loss": 3.9505, "step": 1261000 }, { "epoch": 13.78, "learning_rate": 7.259809073392889e-06, "loss": 3.9512, "step": 1261500 }, { "epoch": 13.79, "learning_rate": 7.258714310737433e-06, "loss": 3.9517, "step": 1262000 }, { "epoch": 13.79, "learning_rate": 7.257619548081977e-06, "loss": 3.9471, "step": 1262500 }, { "epoch": 13.8, "learning_rate": 7.2565247854265195e-06, "loss": 3.958, "step": 1263000 }, { "epoch": 13.8, "learning_rate": 7.255430022771064e-06, "loss": 3.9511, "step": 1263500 }, { "epoch": 13.81, "learning_rate": 7.254335260115608e-06, "loss": 3.9497, "step": 1264000 }, { "epoch": 13.81, "learning_rate": 7.253240497460151e-06, "loss": 3.9485, "step": 1264500 }, { "epoch": 13.82, "learning_rate": 7.252145734804695e-06, "loss": 3.9534, "step": 1265000 }, { "epoch": 13.82, "learning_rate": 7.251050972149238e-06, "loss": 3.9471, "step": 1265500 }, { "epoch": 13.83, "learning_rate": 7.249956209493783e-06, "loss": 3.9471, "step": 1266000 }, { "epoch": 13.83, "learning_rate": 7.248861446838326e-06, "loss": 3.9384, "step": 1266500 }, { "epoch": 13.84, "learning_rate": 7.2477666841828695e-06, "loss": 3.9487, "step": 1267000 }, { "epoch": 13.85, "learning_rate": 7.246671921527414e-06, "loss": 3.9467, "step": 1267500 }, { "epoch": 13.85, "learning_rate": 7.245577158871957e-06, "loss": 3.9474, "step": 1268000 }, { "epoch": 13.86, "learning_rate": 7.2444823962165016e-06, "loss": 3.9492, "step": 1268500 }, { "epoch": 13.86, "learning_rate": 7.2433876335610445e-06, "loss": 3.9533, "step": 1269000 }, { "epoch": 13.87, "learning_rate": 7.242292870905588e-06, "loss": 3.9495, "step": 1269500 }, { "epoch": 13.87, "learning_rate": 7.241198108250131e-06, "loss": 3.9525, "step": 1270000 }, { "epoch": 13.88, "learning_rate": 7.240103345594676e-06, "loss": 3.9483, "step": 1270500 }, { "epoch": 13.88, "learning_rate": 7.239008582939219e-06, "loss": 3.9612, "step": 1271000 }, { "epoch": 13.89, "learning_rate": 7.237913820283763e-06, "loss": 3.9514, "step": 1271500 }, { "epoch": 13.89, "learning_rate": 7.236819057628307e-06, "loss": 3.9466, "step": 1272000 }, { "epoch": 13.9, "learning_rate": 7.23572429497285e-06, "loss": 3.9471, "step": 1272500 }, { "epoch": 13.91, "learning_rate": 7.234629532317394e-06, "loss": 3.9496, "step": 1273000 }, { "epoch": 13.91, "learning_rate": 7.233534769661937e-06, "loss": 3.9469, "step": 1273500 }, { "epoch": 13.92, "learning_rate": 7.232440007006482e-06, "loss": 3.9451, "step": 1274000 }, { "epoch": 13.92, "learning_rate": 7.231345244351026e-06, "loss": 3.9479, "step": 1274500 }, { "epoch": 13.93, "learning_rate": 7.2302504816955685e-06, "loss": 3.9481, "step": 1275000 }, { "epoch": 13.93, "learning_rate": 7.229155719040113e-06, "loss": 3.9467, "step": 1275500 }, { "epoch": 13.94, "learning_rate": 7.228060956384656e-06, "loss": 3.9408, "step": 1276000 }, { "epoch": 13.94, "learning_rate": 7.2269661937292e-06, "loss": 3.9498, "step": 1276500 }, { "epoch": 13.95, "learning_rate": 7.225871431073744e-06, "loss": 3.9473, "step": 1277000 }, { "epoch": 13.96, "learning_rate": 7.224776668418287e-06, "loss": 3.9513, "step": 1277500 }, { "epoch": 13.96, "learning_rate": 7.223681905762832e-06, "loss": 3.9407, "step": 1278000 }, { "epoch": 13.97, "learning_rate": 7.222587143107375e-06, "loss": 3.9491, "step": 1278500 }, { "epoch": 13.97, "learning_rate": 7.2214923804519185e-06, "loss": 3.9425, "step": 1279000 }, { "epoch": 13.98, "learning_rate": 7.220397617796462e-06, "loss": 3.9432, "step": 1279500 }, { "epoch": 13.98, "learning_rate": 7.219302855141006e-06, "loss": 3.9488, "step": 1280000 }, { "epoch": 13.99, "learning_rate": 7.2182080924855505e-06, "loss": 3.9476, "step": 1280500 }, { "epoch": 13.99, "learning_rate": 7.217113329830093e-06, "loss": 3.9478, "step": 1281000 }, { "epoch": 14.0, "learning_rate": 7.216018567174637e-06, "loss": 3.9533, "step": 1281500 }, { "epoch": 14.0, "learning_rate": 7.214923804519181e-06, "loss": 3.9467, "step": 1282000 }, { "epoch": 14.01, "learning_rate": 7.213829041863725e-06, "loss": 3.9512, "step": 1282500 }, { "epoch": 14.02, "learning_rate": 7.2127342792082675e-06, "loss": 3.9437, "step": 1283000 }, { "epoch": 14.02, "learning_rate": 7.211639516552812e-06, "loss": 3.9467, "step": 1283500 }, { "epoch": 14.03, "learning_rate": 7.210544753897356e-06, "loss": 3.9437, "step": 1284000 }, { "epoch": 14.03, "learning_rate": 7.209449991241899e-06, "loss": 3.945, "step": 1284500 }, { "epoch": 14.04, "learning_rate": 7.208355228586443e-06, "loss": 3.9489, "step": 1285000 }, { "epoch": 14.04, "learning_rate": 7.207260465930986e-06, "loss": 3.9482, "step": 1285500 }, { "epoch": 14.05, "learning_rate": 7.206165703275531e-06, "loss": 3.9404, "step": 1286000 }, { "epoch": 14.05, "learning_rate": 7.205070940620074e-06, "loss": 3.9435, "step": 1286500 }, { "epoch": 14.06, "learning_rate": 7.2039761779646175e-06, "loss": 3.9406, "step": 1287000 }, { "epoch": 14.06, "learning_rate": 7.202881415309162e-06, "loss": 3.9466, "step": 1287500 }, { "epoch": 14.07, "learning_rate": 7.201786652653705e-06, "loss": 3.9518, "step": 1288000 }, { "epoch": 14.08, "learning_rate": 7.2006918899982496e-06, "loss": 3.9404, "step": 1288500 }, { "epoch": 14.08, "learning_rate": 7.1995971273427925e-06, "loss": 3.9422, "step": 1289000 }, { "epoch": 14.09, "learning_rate": 7.198502364687336e-06, "loss": 3.9463, "step": 1289500 }, { "epoch": 14.09, "learning_rate": 7.197407602031881e-06, "loss": 3.9428, "step": 1290000 }, { "epoch": 14.09, "eval_accuracy": 0.38270038741322826, "eval_loss": 3.7119579315185547, "eval_runtime": 357.5786, "eval_samples_per_second": 862.345, "eval_steps_per_second": 13.477, "step": 1290000 }, { "epoch": 14.1, "learning_rate": 7.196312839376424e-06, "loss": 3.946, "step": 1290500 }, { "epoch": 14.1, "learning_rate": 7.1952180767209674e-06, "loss": 3.9497, "step": 1291000 }, { "epoch": 14.11, "learning_rate": 7.194123314065511e-06, "loss": 3.9392, "step": 1291500 }, { "epoch": 14.11, "learning_rate": 7.193028551410055e-06, "loss": 3.9453, "step": 1292000 }, { "epoch": 14.12, "learning_rate": 7.191933788754598e-06, "loss": 3.9497, "step": 1292500 }, { "epoch": 14.12, "learning_rate": 7.190839026099142e-06, "loss": 3.9414, "step": 1293000 }, { "epoch": 14.13, "learning_rate": 7.189744263443686e-06, "loss": 3.9439, "step": 1293500 }, { "epoch": 14.14, "learning_rate": 7.18864950078823e-06, "loss": 3.9432, "step": 1294000 }, { "epoch": 14.14, "learning_rate": 7.187554738132774e-06, "loss": 3.9432, "step": 1294500 }, { "epoch": 14.15, "learning_rate": 7.1864599754773165e-06, "loss": 3.9367, "step": 1295000 }, { "epoch": 14.15, "learning_rate": 7.185365212821861e-06, "loss": 3.9493, "step": 1295500 }, { "epoch": 14.16, "learning_rate": 7.184270450166404e-06, "loss": 3.9438, "step": 1296000 }, { "epoch": 14.16, "learning_rate": 7.183175687510949e-06, "loss": 3.9432, "step": 1296500 }, { "epoch": 14.17, "learning_rate": 7.182080924855492e-06, "loss": 3.9478, "step": 1297000 }, { "epoch": 14.17, "learning_rate": 7.180986162200035e-06, "loss": 3.9458, "step": 1297500 }, { "epoch": 14.18, "learning_rate": 7.17989139954458e-06, "loss": 3.9443, "step": 1298000 }, { "epoch": 14.18, "learning_rate": 7.178796636889123e-06, "loss": 3.9464, "step": 1298500 }, { "epoch": 14.19, "learning_rate": 7.1777018742336665e-06, "loss": 3.9413, "step": 1299000 }, { "epoch": 14.2, "learning_rate": 7.17660711157821e-06, "loss": 3.9419, "step": 1299500 }, { "epoch": 14.2, "learning_rate": 7.175512348922754e-06, "loss": 3.9467, "step": 1300000 }, { "epoch": 14.21, "learning_rate": 7.1744175862672985e-06, "loss": 3.9445, "step": 1300500 }, { "epoch": 14.21, "learning_rate": 7.1733228236118414e-06, "loss": 3.9388, "step": 1301000 }, { "epoch": 14.22, "learning_rate": 7.172228060956385e-06, "loss": 3.9461, "step": 1301500 }, { "epoch": 14.22, "learning_rate": 7.171133298300929e-06, "loss": 3.9436, "step": 1302000 }, { "epoch": 14.23, "learning_rate": 7.170038535645473e-06, "loss": 3.9425, "step": 1302500 }, { "epoch": 14.23, "learning_rate": 7.168943772990017e-06, "loss": 3.9439, "step": 1303000 }, { "epoch": 14.24, "learning_rate": 7.16784901033456e-06, "loss": 3.9418, "step": 1303500 }, { "epoch": 14.24, "learning_rate": 7.166754247679104e-06, "loss": 3.9389, "step": 1304000 }, { "epoch": 14.25, "learning_rate": 7.165659485023647e-06, "loss": 3.9483, "step": 1304500 }, { "epoch": 14.26, "learning_rate": 7.164564722368191e-06, "loss": 3.9428, "step": 1305000 }, { "epoch": 14.26, "learning_rate": 7.163469959712734e-06, "loss": 3.9453, "step": 1305500 }, { "epoch": 14.27, "learning_rate": 7.162375197057279e-06, "loss": 3.947, "step": 1306000 }, { "epoch": 14.27, "learning_rate": 7.161280434401823e-06, "loss": 3.9445, "step": 1306500 }, { "epoch": 14.28, "learning_rate": 7.1601856717463655e-06, "loss": 3.9495, "step": 1307000 }, { "epoch": 14.28, "learning_rate": 7.15909090909091e-06, "loss": 3.9428, "step": 1307500 }, { "epoch": 14.29, "learning_rate": 7.157996146435453e-06, "loss": 3.9387, "step": 1308000 }, { "epoch": 14.29, "learning_rate": 7.1569013837799976e-06, "loss": 3.946, "step": 1308500 }, { "epoch": 14.3, "learning_rate": 7.1558066211245405e-06, "loss": 3.9422, "step": 1309000 }, { "epoch": 14.3, "learning_rate": 7.154711858469084e-06, "loss": 3.941, "step": 1309500 }, { "epoch": 14.31, "learning_rate": 7.153617095813629e-06, "loss": 3.9331, "step": 1310000 }, { "epoch": 14.32, "learning_rate": 7.152522333158172e-06, "loss": 3.9456, "step": 1310500 }, { "epoch": 14.32, "learning_rate": 7.1514275705027154e-06, "loss": 3.9383, "step": 1311000 }, { "epoch": 14.33, "learning_rate": 7.150332807847259e-06, "loss": 3.9414, "step": 1311500 }, { "epoch": 14.33, "learning_rate": 7.149238045191803e-06, "loss": 3.9452, "step": 1312000 }, { "epoch": 14.34, "learning_rate": 7.148143282536346e-06, "loss": 3.9394, "step": 1312500 }, { "epoch": 14.34, "learning_rate": 7.14704851988089e-06, "loss": 3.9454, "step": 1313000 }, { "epoch": 14.35, "learning_rate": 7.145953757225434e-06, "loss": 3.9417, "step": 1313500 }, { "epoch": 14.35, "learning_rate": 7.144858994569978e-06, "loss": 3.9372, "step": 1314000 }, { "epoch": 14.36, "learning_rate": 7.143764231914522e-06, "loss": 3.9409, "step": 1314500 }, { "epoch": 14.36, "learning_rate": 7.1426694692590645e-06, "loss": 3.9476, "step": 1315000 }, { "epoch": 14.37, "learning_rate": 7.141574706603609e-06, "loss": 3.9427, "step": 1315500 }, { "epoch": 14.38, "learning_rate": 7.140479943948153e-06, "loss": 3.9365, "step": 1316000 }, { "epoch": 14.38, "learning_rate": 7.139385181292697e-06, "loss": 3.942, "step": 1316500 }, { "epoch": 14.39, "learning_rate": 7.13829041863724e-06, "loss": 3.9439, "step": 1317000 }, { "epoch": 14.39, "learning_rate": 7.137195655981783e-06, "loss": 3.943, "step": 1317500 }, { "epoch": 14.4, "learning_rate": 7.136100893326328e-06, "loss": 3.9356, "step": 1318000 }, { "epoch": 14.4, "learning_rate": 7.135006130670871e-06, "loss": 3.9435, "step": 1318500 }, { "epoch": 14.41, "learning_rate": 7.1339113680154145e-06, "loss": 3.9422, "step": 1319000 }, { "epoch": 14.41, "learning_rate": 7.132816605359959e-06, "loss": 3.9456, "step": 1319500 }, { "epoch": 14.42, "learning_rate": 7.131721842704502e-06, "loss": 3.9424, "step": 1320000 }, { "epoch": 14.42, "eval_accuracy": 0.38336994812570946, "eval_loss": 3.7072017192840576, "eval_runtime": 355.7685, "eval_samples_per_second": 866.732, "eval_steps_per_second": 13.545, "step": 1320000 }, { "epoch": 14.42, "learning_rate": 7.1306270800490465e-06, "loss": 3.9414, "step": 1320500 }, { "epoch": 14.43, "learning_rate": 7.1295323173935894e-06, "loss": 3.9473, "step": 1321000 }, { "epoch": 14.44, "learning_rate": 7.128437554738133e-06, "loss": 3.941, "step": 1321500 }, { "epoch": 14.44, "learning_rate": 7.127342792082677e-06, "loss": 3.9423, "step": 1322000 }, { "epoch": 14.45, "learning_rate": 7.126248029427221e-06, "loss": 3.9432, "step": 1322500 }, { "epoch": 14.45, "learning_rate": 7.125153266771765e-06, "loss": 3.9402, "step": 1323000 }, { "epoch": 14.46, "learning_rate": 7.124058504116308e-06, "loss": 3.9484, "step": 1323500 }, { "epoch": 14.46, "learning_rate": 7.122963741460852e-06, "loss": 3.9356, "step": 1324000 }, { "epoch": 14.47, "learning_rate": 7.121868978805395e-06, "loss": 3.9348, "step": 1324500 }, { "epoch": 14.47, "learning_rate": 7.120774216149939e-06, "loss": 3.9393, "step": 1325000 }, { "epoch": 14.48, "learning_rate": 7.119679453494483e-06, "loss": 3.9411, "step": 1325500 }, { "epoch": 14.48, "learning_rate": 7.118584690839027e-06, "loss": 3.9457, "step": 1326000 }, { "epoch": 14.49, "learning_rate": 7.117489928183571e-06, "loss": 3.94, "step": 1326500 }, { "epoch": 14.5, "learning_rate": 7.1163951655281135e-06, "loss": 3.9396, "step": 1327000 }, { "epoch": 14.5, "learning_rate": 7.115300402872658e-06, "loss": 3.9436, "step": 1327500 }, { "epoch": 14.51, "learning_rate": 7.114205640217201e-06, "loss": 3.9416, "step": 1328000 }, { "epoch": 14.51, "learning_rate": 7.1131108775617456e-06, "loss": 3.9383, "step": 1328500 }, { "epoch": 14.52, "learning_rate": 7.112016114906289e-06, "loss": 3.9442, "step": 1329000 }, { "epoch": 14.52, "learning_rate": 7.110921352250832e-06, "loss": 3.9386, "step": 1329500 }, { "epoch": 14.53, "learning_rate": 7.109826589595377e-06, "loss": 3.9378, "step": 1330000 }, { "epoch": 14.53, "learning_rate": 7.10873182693992e-06, "loss": 3.9379, "step": 1330500 }, { "epoch": 14.54, "learning_rate": 7.107637064284464e-06, "loss": 3.9407, "step": 1331000 }, { "epoch": 14.54, "learning_rate": 7.106542301629007e-06, "loss": 3.9348, "step": 1331500 }, { "epoch": 14.55, "learning_rate": 7.105447538973551e-06, "loss": 3.9404, "step": 1332000 }, { "epoch": 14.56, "learning_rate": 7.1043527763180955e-06, "loss": 3.9379, "step": 1332500 }, { "epoch": 14.56, "learning_rate": 7.103258013662638e-06, "loss": 3.9396, "step": 1333000 }, { "epoch": 14.57, "learning_rate": 7.102163251007182e-06, "loss": 3.9452, "step": 1333500 }, { "epoch": 14.57, "learning_rate": 7.101068488351726e-06, "loss": 3.9344, "step": 1334000 }, { "epoch": 14.58, "learning_rate": 7.09997372569627e-06, "loss": 3.9446, "step": 1334500 }, { "epoch": 14.58, "learning_rate": 7.0988789630408125e-06, "loss": 3.9481, "step": 1335000 }, { "epoch": 14.59, "learning_rate": 7.097784200385357e-06, "loss": 3.9467, "step": 1335500 }, { "epoch": 14.59, "learning_rate": 7.096689437729901e-06, "loss": 3.9376, "step": 1336000 }, { "epoch": 14.6, "learning_rate": 7.095594675074445e-06, "loss": 3.9352, "step": 1336500 }, { "epoch": 14.6, "learning_rate": 7.094499912418988e-06, "loss": 3.932, "step": 1337000 }, { "epoch": 14.61, "learning_rate": 7.093405149763531e-06, "loss": 3.9377, "step": 1337500 }, { "epoch": 14.62, "learning_rate": 7.092310387108076e-06, "loss": 3.9374, "step": 1338000 }, { "epoch": 14.62, "learning_rate": 7.0912156244526196e-06, "loss": 3.941, "step": 1338500 }, { "epoch": 14.63, "learning_rate": 7.0901208617971625e-06, "loss": 3.9344, "step": 1339000 }, { "epoch": 14.63, "learning_rate": 7.089026099141707e-06, "loss": 3.9428, "step": 1339500 }, { "epoch": 14.64, "learning_rate": 7.08793133648625e-06, "loss": 3.9388, "step": 1340000 }, { "epoch": 14.64, "learning_rate": 7.0868365738307945e-06, "loss": 3.9394, "step": 1340500 }, { "epoch": 14.65, "learning_rate": 7.0857418111753374e-06, "loss": 3.9387, "step": 1341000 }, { "epoch": 14.65, "learning_rate": 7.084647048519881e-06, "loss": 3.9411, "step": 1341500 }, { "epoch": 14.66, "learning_rate": 7.083552285864426e-06, "loss": 3.9395, "step": 1342000 }, { "epoch": 14.67, "learning_rate": 7.082457523208969e-06, "loss": 3.9388, "step": 1342500 }, { "epoch": 14.67, "learning_rate": 7.081362760553513e-06, "loss": 3.9364, "step": 1343000 }, { "epoch": 14.68, "learning_rate": 7.080267997898056e-06, "loss": 3.945, "step": 1343500 }, { "epoch": 14.68, "learning_rate": 7.0791732352426e-06, "loss": 3.9399, "step": 1344000 }, { "epoch": 14.69, "learning_rate": 7.078078472587143e-06, "loss": 3.9382, "step": 1344500 }, { "epoch": 14.69, "learning_rate": 7.076983709931687e-06, "loss": 3.9367, "step": 1345000 }, { "epoch": 14.7, "learning_rate": 7.075888947276232e-06, "loss": 3.935, "step": 1345500 }, { "epoch": 14.7, "learning_rate": 7.074794184620775e-06, "loss": 3.9422, "step": 1346000 }, { "epoch": 14.71, "learning_rate": 7.073699421965319e-06, "loss": 3.9394, "step": 1346500 }, { "epoch": 14.71, "learning_rate": 7.0726046593098615e-06, "loss": 3.9463, "step": 1347000 }, { "epoch": 14.72, "learning_rate": 7.071509896654406e-06, "loss": 3.9348, "step": 1347500 }, { "epoch": 14.73, "learning_rate": 7.070415133998949e-06, "loss": 3.9379, "step": 1348000 }, { "epoch": 14.73, "learning_rate": 7.069320371343494e-06, "loss": 3.9377, "step": 1348500 }, { "epoch": 14.74, "learning_rate": 7.068225608688037e-06, "loss": 3.9358, "step": 1349000 }, { "epoch": 14.74, "learning_rate": 7.06713084603258e-06, "loss": 3.9395, "step": 1349500 }, { "epoch": 14.75, "learning_rate": 7.066036083377125e-06, "loss": 3.9389, "step": 1350000 }, { "epoch": 14.75, "eval_accuracy": 0.3836340076990125, "eval_loss": 3.704674482345581, "eval_runtime": 356.2144, "eval_samples_per_second": 865.647, "eval_steps_per_second": 13.528, "step": 1350000 }, { "epoch": 14.75, "learning_rate": 7.064941320721668e-06, "loss": 3.9306, "step": 1350500 }, { "epoch": 14.76, "learning_rate": 7.063846558066212e-06, "loss": 3.9355, "step": 1351000 }, { "epoch": 14.76, "learning_rate": 7.062751795410756e-06, "loss": 3.9334, "step": 1351500 }, { "epoch": 14.77, "learning_rate": 7.061657032755299e-06, "loss": 3.933, "step": 1352000 }, { "epoch": 14.77, "learning_rate": 7.0605622700998435e-06, "loss": 3.9425, "step": 1352500 }, { "epoch": 14.78, "learning_rate": 7.059467507444386e-06, "loss": 3.9405, "step": 1353000 }, { "epoch": 14.79, "learning_rate": 7.05837274478893e-06, "loss": 3.9349, "step": 1353500 }, { "epoch": 14.79, "learning_rate": 7.057277982133474e-06, "loss": 3.9322, "step": 1354000 }, { "epoch": 14.8, "learning_rate": 7.056183219478018e-06, "loss": 3.9312, "step": 1354500 }, { "epoch": 14.8, "learning_rate": 7.055088456822562e-06, "loss": 3.9372, "step": 1355000 }, { "epoch": 14.81, "learning_rate": 7.053993694167105e-06, "loss": 3.9414, "step": 1355500 }, { "epoch": 14.81, "learning_rate": 7.052898931511649e-06, "loss": 3.932, "step": 1356000 }, { "epoch": 14.82, "learning_rate": 7.051804168856193e-06, "loss": 3.9492, "step": 1356500 }, { "epoch": 14.82, "learning_rate": 7.050709406200736e-06, "loss": 3.9296, "step": 1357000 }, { "epoch": 14.83, "learning_rate": 7.049614643545279e-06, "loss": 3.9368, "step": 1357500 }, { "epoch": 14.83, "learning_rate": 7.048519880889824e-06, "loss": 3.9315, "step": 1358000 }, { "epoch": 14.84, "learning_rate": 7.047425118234368e-06, "loss": 3.9407, "step": 1358500 }, { "epoch": 14.85, "learning_rate": 7.0463303555789105e-06, "loss": 3.9401, "step": 1359000 }, { "epoch": 14.85, "learning_rate": 7.045235592923455e-06, "loss": 3.9438, "step": 1359500 }, { "epoch": 14.86, "learning_rate": 7.044140830267998e-06, "loss": 3.9374, "step": 1360000 }, { "epoch": 14.86, "learning_rate": 7.0430460676125425e-06, "loss": 3.9424, "step": 1360500 }, { "epoch": 14.87, "learning_rate": 7.0419513049570854e-06, "loss": 3.9346, "step": 1361000 }, { "epoch": 14.87, "learning_rate": 7.040856542301629e-06, "loss": 3.9358, "step": 1361500 }, { "epoch": 14.88, "learning_rate": 7.039761779646174e-06, "loss": 3.9351, "step": 1362000 }, { "epoch": 14.88, "learning_rate": 7.038667016990717e-06, "loss": 3.9294, "step": 1362500 }, { "epoch": 14.89, "learning_rate": 7.037572254335261e-06, "loss": 3.9365, "step": 1363000 }, { "epoch": 14.89, "learning_rate": 7.036477491679804e-06, "loss": 3.9353, "step": 1363500 }, { "epoch": 14.9, "learning_rate": 7.035382729024348e-06, "loss": 3.9316, "step": 1364000 }, { "epoch": 14.91, "learning_rate": 7.0342879663688925e-06, "loss": 3.9368, "step": 1364500 }, { "epoch": 14.91, "learning_rate": 7.033193203713435e-06, "loss": 3.9386, "step": 1365000 }, { "epoch": 14.92, "learning_rate": 7.03209844105798e-06, "loss": 3.9365, "step": 1365500 }, { "epoch": 14.92, "learning_rate": 7.031003678402523e-06, "loss": 3.9344, "step": 1366000 }, { "epoch": 14.93, "learning_rate": 7.029908915747067e-06, "loss": 3.9376, "step": 1366500 }, { "epoch": 14.93, "learning_rate": 7.0288141530916095e-06, "loss": 3.9355, "step": 1367000 }, { "epoch": 14.94, "learning_rate": 7.027719390436154e-06, "loss": 3.9342, "step": 1367500 }, { "epoch": 14.94, "learning_rate": 7.026624627780698e-06, "loss": 3.9287, "step": 1368000 }, { "epoch": 14.95, "learning_rate": 7.025529865125242e-06, "loss": 3.9408, "step": 1368500 }, { "epoch": 14.95, "learning_rate": 7.024435102469785e-06, "loss": 3.9405, "step": 1369000 }, { "epoch": 14.96, "learning_rate": 7.023340339814328e-06, "loss": 3.9393, "step": 1369500 }, { "epoch": 14.97, "learning_rate": 7.022245577158873e-06, "loss": 3.9377, "step": 1370000 }, { "epoch": 14.97, "learning_rate": 7.021150814503416e-06, "loss": 3.9278, "step": 1370500 }, { "epoch": 14.98, "learning_rate": 7.02005605184796e-06, "loss": 3.9374, "step": 1371000 }, { "epoch": 14.98, "learning_rate": 7.018961289192504e-06, "loss": 3.9314, "step": 1371500 }, { "epoch": 14.99, "learning_rate": 7.017866526537047e-06, "loss": 3.9345, "step": 1372000 }, { "epoch": 14.99, "learning_rate": 7.0167717638815915e-06, "loss": 3.9415, "step": 1372500 }, { "epoch": 15.0, "learning_rate": 7.015677001226134e-06, "loss": 3.9391, "step": 1373000 }, { "epoch": 15.0, "learning_rate": 7.014582238570678e-06, "loss": 3.9335, "step": 1373500 }, { "epoch": 15.01, "learning_rate": 7.013487475915222e-06, "loss": 3.9381, "step": 1374000 }, { "epoch": 15.01, "learning_rate": 7.012392713259766e-06, "loss": 3.9295, "step": 1374500 }, { "epoch": 15.02, "learning_rate": 7.01129795060431e-06, "loss": 3.9267, "step": 1375000 }, { "epoch": 15.03, "learning_rate": 7.010203187948853e-06, "loss": 3.9301, "step": 1375500 }, { "epoch": 15.03, "learning_rate": 7.009108425293397e-06, "loss": 3.9314, "step": 1376000 }, { "epoch": 15.04, "learning_rate": 7.008013662637941e-06, "loss": 3.9328, "step": 1376500 }, { "epoch": 15.04, "learning_rate": 7.006918899982484e-06, "loss": 3.9337, "step": 1377000 }, { "epoch": 15.05, "learning_rate": 7.005824137327029e-06, "loss": 3.9367, "step": 1377500 }, { "epoch": 15.05, "learning_rate": 7.004729374671572e-06, "loss": 3.9327, "step": 1378000 }, { "epoch": 15.06, "learning_rate": 7.003634612016116e-06, "loss": 3.9319, "step": 1378500 }, { "epoch": 15.06, "learning_rate": 7.0025398493606585e-06, "loss": 3.9348, "step": 1379000 }, { "epoch": 15.07, "learning_rate": 7.001445086705203e-06, "loss": 3.9282, "step": 1379500 }, { "epoch": 15.07, "learning_rate": 7.000350324049746e-06, "loss": 3.936, "step": 1380000 }, { "epoch": 15.07, "eval_accuracy": 0.3843947864036483, "eval_loss": 3.6998095512390137, "eval_runtime": 358.0667, "eval_samples_per_second": 861.169, "eval_steps_per_second": 13.458, "step": 1380000 }, { "epoch": 15.08, "learning_rate": 6.9992555613942906e-06, "loss": 3.9373, "step": 1380500 }, { "epoch": 15.09, "learning_rate": 6.998160798738834e-06, "loss": 3.9359, "step": 1381000 }, { "epoch": 15.09, "learning_rate": 6.997066036083377e-06, "loss": 3.9384, "step": 1381500 }, { "epoch": 15.1, "learning_rate": 6.995971273427922e-06, "loss": 3.9333, "step": 1382000 }, { "epoch": 15.1, "learning_rate": 6.994876510772465e-06, "loss": 3.9297, "step": 1382500 }, { "epoch": 15.11, "learning_rate": 6.993781748117009e-06, "loss": 3.9388, "step": 1383000 }, { "epoch": 15.11, "learning_rate": 6.992686985461552e-06, "loss": 3.9296, "step": 1383500 }, { "epoch": 15.12, "learning_rate": 6.991592222806096e-06, "loss": 3.9291, "step": 1384000 }, { "epoch": 15.12, "learning_rate": 6.9904974601506405e-06, "loss": 3.9309, "step": 1384500 }, { "epoch": 15.13, "learning_rate": 6.989402697495183e-06, "loss": 3.9326, "step": 1385000 }, { "epoch": 15.13, "learning_rate": 6.988307934839728e-06, "loss": 3.9316, "step": 1385500 }, { "epoch": 15.14, "learning_rate": 6.987213172184271e-06, "loss": 3.9299, "step": 1386000 }, { "epoch": 15.15, "learning_rate": 6.986118409528815e-06, "loss": 3.9321, "step": 1386500 }, { "epoch": 15.15, "learning_rate": 6.9850236468733575e-06, "loss": 3.935, "step": 1387000 }, { "epoch": 15.16, "learning_rate": 6.983928884217902e-06, "loss": 3.929, "step": 1387500 }, { "epoch": 15.16, "learning_rate": 6.982834121562446e-06, "loss": 3.9329, "step": 1388000 }, { "epoch": 15.17, "learning_rate": 6.98173935890699e-06, "loss": 3.9354, "step": 1388500 }, { "epoch": 15.17, "learning_rate": 6.980644596251533e-06, "loss": 3.9354, "step": 1389000 }, { "epoch": 15.18, "learning_rate": 6.979549833596076e-06, "loss": 3.9281, "step": 1389500 }, { "epoch": 15.18, "learning_rate": 6.978455070940621e-06, "loss": 3.941, "step": 1390000 }, { "epoch": 15.19, "learning_rate": 6.9773603082851646e-06, "loss": 3.9348, "step": 1390500 }, { "epoch": 15.19, "learning_rate": 6.976265545629708e-06, "loss": 3.938, "step": 1391000 }, { "epoch": 15.2, "learning_rate": 6.975170782974252e-06, "loss": 3.9257, "step": 1391500 }, { "epoch": 15.21, "learning_rate": 6.974076020318795e-06, "loss": 3.9319, "step": 1392000 }, { "epoch": 15.21, "learning_rate": 6.9729812576633395e-06, "loss": 3.9283, "step": 1392500 }, { "epoch": 15.22, "learning_rate": 6.971886495007882e-06, "loss": 3.9355, "step": 1393000 }, { "epoch": 15.22, "learning_rate": 6.970791732352426e-06, "loss": 3.9318, "step": 1393500 }, { "epoch": 15.23, "learning_rate": 6.969696969696971e-06, "loss": 3.9304, "step": 1394000 }, { "epoch": 15.23, "learning_rate": 6.968602207041514e-06, "loss": 3.9313, "step": 1394500 }, { "epoch": 15.24, "learning_rate": 6.967507444386058e-06, "loss": 3.932, "step": 1395000 }, { "epoch": 15.24, "learning_rate": 6.966412681730601e-06, "loss": 3.9267, "step": 1395500 }, { "epoch": 15.25, "learning_rate": 6.965317919075145e-06, "loss": 3.9294, "step": 1396000 }, { "epoch": 15.25, "learning_rate": 6.964223156419689e-06, "loss": 3.9331, "step": 1396500 }, { "epoch": 15.26, "learning_rate": 6.963128393764232e-06, "loss": 3.929, "step": 1397000 }, { "epoch": 15.27, "learning_rate": 6.962033631108777e-06, "loss": 3.9337, "step": 1397500 }, { "epoch": 15.27, "learning_rate": 6.96093886845332e-06, "loss": 3.9366, "step": 1398000 }, { "epoch": 15.28, "learning_rate": 6.959844105797864e-06, "loss": 3.9321, "step": 1398500 }, { "epoch": 15.28, "learning_rate": 6.958749343142407e-06, "loss": 3.9255, "step": 1399000 }, { "epoch": 15.29, "learning_rate": 6.957654580486951e-06, "loss": 3.9318, "step": 1399500 }, { "epoch": 15.29, "learning_rate": 6.956559817831496e-06, "loss": 3.9318, "step": 1400000 }, { "epoch": 15.3, "learning_rate": 6.9554650551760386e-06, "loss": 3.9332, "step": 1400500 }, { "epoch": 15.3, "learning_rate": 6.954370292520582e-06, "loss": 3.9287, "step": 1401000 }, { "epoch": 15.31, "learning_rate": 6.953275529865125e-06, "loss": 3.9351, "step": 1401500 }, { "epoch": 15.32, "learning_rate": 6.95218076720967e-06, "loss": 3.9261, "step": 1402000 }, { "epoch": 15.32, "learning_rate": 6.951086004554213e-06, "loss": 3.9354, "step": 1402500 }, { "epoch": 15.33, "learning_rate": 6.949991241898757e-06, "loss": 3.9293, "step": 1403000 }, { "epoch": 15.33, "learning_rate": 6.948896479243301e-06, "loss": 3.9398, "step": 1403500 }, { "epoch": 15.34, "learning_rate": 6.947801716587844e-06, "loss": 3.9324, "step": 1404000 }, { "epoch": 15.34, "learning_rate": 6.9467069539323885e-06, "loss": 3.9234, "step": 1404500 }, { "epoch": 15.35, "learning_rate": 6.945612191276931e-06, "loss": 3.9349, "step": 1405000 }, { "epoch": 15.35, "learning_rate": 6.944517428621476e-06, "loss": 3.9347, "step": 1405500 }, { "epoch": 15.36, "learning_rate": 6.943422665966019e-06, "loss": 3.9318, "step": 1406000 }, { "epoch": 15.36, "learning_rate": 6.942327903310563e-06, "loss": 3.9236, "step": 1406500 }, { "epoch": 15.37, "learning_rate": 6.941233140655107e-06, "loss": 3.9355, "step": 1407000 }, { "epoch": 15.38, "learning_rate": 6.94013837799965e-06, "loss": 3.9312, "step": 1407500 }, { "epoch": 15.38, "learning_rate": 6.939043615344194e-06, "loss": 3.929, "step": 1408000 }, { "epoch": 15.39, "learning_rate": 6.937948852688738e-06, "loss": 3.9293, "step": 1408500 }, { "epoch": 15.39, "learning_rate": 6.936854090033281e-06, "loss": 3.9324, "step": 1409000 }, { "epoch": 15.4, "learning_rate": 6.935759327377824e-06, "loss": 3.9275, "step": 1409500 }, { "epoch": 15.4, "learning_rate": 6.934664564722369e-06, "loss": 3.9246, "step": 1410000 }, { "epoch": 15.4, "eval_accuracy": 0.38465328780582864, "eval_loss": 3.6968374252319336, "eval_runtime": 354.7199, "eval_samples_per_second": 869.294, "eval_steps_per_second": 13.585, "step": 1410000 }, { "epoch": 15.41, "learning_rate": 6.9335698020669126e-06, "loss": 3.9257, "step": 1410500 }, { "epoch": 15.41, "learning_rate": 6.932475039411456e-06, "loss": 3.9319, "step": 1411000 }, { "epoch": 15.42, "learning_rate": 6.931380276756e-06, "loss": 3.9326, "step": 1411500 }, { "epoch": 15.42, "learning_rate": 6.930285514100543e-06, "loss": 3.9288, "step": 1412000 }, { "epoch": 15.43, "learning_rate": 6.9291907514450875e-06, "loss": 3.9328, "step": 1412500 }, { "epoch": 15.44, "learning_rate": 6.928095988789631e-06, "loss": 3.9279, "step": 1413000 }, { "epoch": 15.44, "learning_rate": 6.927001226134174e-06, "loss": 3.9293, "step": 1413500 }, { "epoch": 15.45, "learning_rate": 6.925906463478719e-06, "loss": 3.9307, "step": 1414000 }, { "epoch": 15.45, "learning_rate": 6.924811700823262e-06, "loss": 3.9308, "step": 1414500 }, { "epoch": 15.46, "learning_rate": 6.923716938167806e-06, "loss": 3.9321, "step": 1415000 }, { "epoch": 15.46, "learning_rate": 6.922622175512349e-06, "loss": 3.9319, "step": 1415500 }, { "epoch": 15.47, "learning_rate": 6.921527412856893e-06, "loss": 3.9381, "step": 1416000 }, { "epoch": 15.47, "learning_rate": 6.9204326502014375e-06, "loss": 3.926, "step": 1416500 }, { "epoch": 15.48, "learning_rate": 6.91933788754598e-06, "loss": 3.9246, "step": 1417000 }, { "epoch": 15.48, "learning_rate": 6.918243124890525e-06, "loss": 3.9306, "step": 1417500 }, { "epoch": 15.49, "learning_rate": 6.917148362235068e-06, "loss": 3.9249, "step": 1418000 }, { "epoch": 15.5, "learning_rate": 6.916053599579612e-06, "loss": 3.932, "step": 1418500 }, { "epoch": 15.5, "learning_rate": 6.914958836924155e-06, "loss": 3.9306, "step": 1419000 }, { "epoch": 15.51, "learning_rate": 6.913864074268699e-06, "loss": 3.9319, "step": 1419500 }, { "epoch": 15.51, "learning_rate": 6.912769311613244e-06, "loss": 3.9241, "step": 1420000 }, { "epoch": 15.52, "learning_rate": 6.9116745489577866e-06, "loss": 3.9292, "step": 1420500 }, { "epoch": 15.52, "learning_rate": 6.91057978630233e-06, "loss": 3.928, "step": 1421000 }, { "epoch": 15.53, "learning_rate": 6.909485023646873e-06, "loss": 3.9252, "step": 1421500 }, { "epoch": 15.53, "learning_rate": 6.908390260991418e-06, "loss": 3.9306, "step": 1422000 }, { "epoch": 15.54, "learning_rate": 6.907295498335961e-06, "loss": 3.93, "step": 1422500 }, { "epoch": 15.54, "learning_rate": 6.906200735680505e-06, "loss": 3.9295, "step": 1423000 }, { "epoch": 15.55, "learning_rate": 6.905105973025049e-06, "loss": 3.93, "step": 1423500 }, { "epoch": 15.56, "learning_rate": 6.904011210369592e-06, "loss": 3.9348, "step": 1424000 }, { "epoch": 15.56, "learning_rate": 6.9029164477141365e-06, "loss": 3.9263, "step": 1424500 }, { "epoch": 15.57, "learning_rate": 6.901821685058679e-06, "loss": 3.9241, "step": 1425000 }, { "epoch": 15.57, "learning_rate": 6.900726922403224e-06, "loss": 3.9206, "step": 1425500 }, { "epoch": 15.58, "learning_rate": 6.899632159747768e-06, "loss": 3.9263, "step": 1426000 }, { "epoch": 15.58, "learning_rate": 6.898537397092311e-06, "loss": 3.9263, "step": 1426500 }, { "epoch": 15.59, "learning_rate": 6.897442634436855e-06, "loss": 3.9258, "step": 1427000 }, { "epoch": 15.59, "learning_rate": 6.896347871781398e-06, "loss": 3.9309, "step": 1427500 }, { "epoch": 15.6, "learning_rate": 6.895253109125942e-06, "loss": 3.9288, "step": 1428000 }, { "epoch": 15.6, "learning_rate": 6.894158346470486e-06, "loss": 3.9257, "step": 1428500 }, { "epoch": 15.61, "learning_rate": 6.893063583815029e-06, "loss": 3.9224, "step": 1429000 }, { "epoch": 15.62, "learning_rate": 6.891968821159574e-06, "loss": 3.9222, "step": 1429500 }, { "epoch": 15.62, "learning_rate": 6.890874058504117e-06, "loss": 3.9308, "step": 1430000 }, { "epoch": 15.63, "learning_rate": 6.8897792958486606e-06, "loss": 3.9339, "step": 1430500 }, { "epoch": 15.63, "learning_rate": 6.888684533193204e-06, "loss": 3.9228, "step": 1431000 }, { "epoch": 15.64, "learning_rate": 6.887589770537748e-06, "loss": 3.9217, "step": 1431500 }, { "epoch": 15.64, "learning_rate": 6.886495007882291e-06, "loss": 3.929, "step": 1432000 }, { "epoch": 15.65, "learning_rate": 6.8854002452268355e-06, "loss": 3.9324, "step": 1432500 }, { "epoch": 15.65, "learning_rate": 6.884305482571379e-06, "loss": 3.931, "step": 1433000 }, { "epoch": 15.66, "learning_rate": 6.883210719915923e-06, "loss": 3.9308, "step": 1433500 }, { "epoch": 15.66, "learning_rate": 6.882115957260467e-06, "loss": 3.9258, "step": 1434000 }, { "epoch": 15.67, "learning_rate": 6.88102119460501e-06, "loss": 3.9259, "step": 1434500 }, { "epoch": 15.68, "learning_rate": 6.879926431949554e-06, "loss": 3.9249, "step": 1435000 }, { "epoch": 15.68, "learning_rate": 6.878831669294097e-06, "loss": 3.9303, "step": 1435500 }, { "epoch": 15.69, "learning_rate": 6.877736906638641e-06, "loss": 3.9268, "step": 1436000 }, { "epoch": 15.69, "learning_rate": 6.8766421439831855e-06, "loss": 3.9345, "step": 1436500 }, { "epoch": 15.7, "learning_rate": 6.875547381327728e-06, "loss": 3.9248, "step": 1437000 }, { "epoch": 15.7, "learning_rate": 6.874452618672273e-06, "loss": 3.9322, "step": 1437500 }, { "epoch": 15.71, "learning_rate": 6.873357856016816e-06, "loss": 3.925, "step": 1438000 }, { "epoch": 15.71, "learning_rate": 6.87226309336136e-06, "loss": 3.9313, "step": 1438500 }, { "epoch": 15.72, "learning_rate": 6.871168330705904e-06, "loss": 3.9266, "step": 1439000 }, { "epoch": 15.72, "learning_rate": 6.870073568050447e-06, "loss": 3.9231, "step": 1439500 }, { "epoch": 15.73, "learning_rate": 6.868978805394992e-06, "loss": 3.9281, "step": 1440000 }, { "epoch": 15.73, "eval_accuracy": 0.38509776975051924, "eval_loss": 3.6924760341644287, "eval_runtime": 353.5448, "eval_samples_per_second": 872.184, "eval_steps_per_second": 13.631, "step": 1440000 }, { "epoch": 15.74, "learning_rate": 6.8678840427395346e-06, "loss": 3.9187, "step": 1440500 }, { "epoch": 15.74, "learning_rate": 6.866789280084078e-06, "loss": 3.9292, "step": 1441000 }, { "epoch": 15.75, "learning_rate": 6.865694517428621e-06, "loss": 3.9272, "step": 1441500 }, { "epoch": 15.75, "learning_rate": 6.864599754773166e-06, "loss": 3.9305, "step": 1442000 }, { "epoch": 15.76, "learning_rate": 6.8635049921177095e-06, "loss": 3.9248, "step": 1442500 }, { "epoch": 15.76, "learning_rate": 6.862410229462253e-06, "loss": 3.9258, "step": 1443000 }, { "epoch": 15.77, "learning_rate": 6.861315466806797e-06, "loss": 3.9257, "step": 1443500 }, { "epoch": 15.77, "learning_rate": 6.86022070415134e-06, "loss": 3.9254, "step": 1444000 }, { "epoch": 15.78, "learning_rate": 6.8591259414958845e-06, "loss": 3.9272, "step": 1444500 }, { "epoch": 15.78, "learning_rate": 6.858031178840427e-06, "loss": 3.9287, "step": 1445000 }, { "epoch": 15.79, "learning_rate": 6.856936416184972e-06, "loss": 3.927, "step": 1445500 }, { "epoch": 15.8, "learning_rate": 6.855841653529516e-06, "loss": 3.9277, "step": 1446000 }, { "epoch": 15.8, "learning_rate": 6.854746890874059e-06, "loss": 3.9317, "step": 1446500 }, { "epoch": 15.81, "learning_rate": 6.853652128218603e-06, "loss": 3.925, "step": 1447000 }, { "epoch": 15.81, "learning_rate": 6.852557365563146e-06, "loss": 3.9231, "step": 1447500 }, { "epoch": 15.82, "learning_rate": 6.85146260290769e-06, "loss": 3.9274, "step": 1448000 }, { "epoch": 15.82, "learning_rate": 6.850367840252234e-06, "loss": 3.9166, "step": 1448500 }, { "epoch": 15.83, "learning_rate": 6.849273077596777e-06, "loss": 3.9231, "step": 1449000 }, { "epoch": 15.83, "learning_rate": 6.848178314941322e-06, "loss": 3.9331, "step": 1449500 }, { "epoch": 15.84, "learning_rate": 6.847083552285865e-06, "loss": 3.9273, "step": 1450000 }, { "epoch": 15.84, "learning_rate": 6.8459887896304086e-06, "loss": 3.9231, "step": 1450500 }, { "epoch": 15.85, "learning_rate": 6.844894026974952e-06, "loss": 3.9267, "step": 1451000 }, { "epoch": 15.86, "learning_rate": 6.843799264319496e-06, "loss": 3.9276, "step": 1451500 }, { "epoch": 15.86, "learning_rate": 6.842704501664041e-06, "loss": 3.9267, "step": 1452000 }, { "epoch": 15.87, "learning_rate": 6.8416097390085835e-06, "loss": 3.924, "step": 1452500 }, { "epoch": 15.87, "learning_rate": 6.840514976353127e-06, "loss": 3.9212, "step": 1453000 }, { "epoch": 15.88, "learning_rate": 6.839420213697671e-06, "loss": 3.9289, "step": 1453500 }, { "epoch": 15.88, "learning_rate": 6.838325451042215e-06, "loss": 3.9296, "step": 1454000 }, { "epoch": 15.89, "learning_rate": 6.837230688386758e-06, "loss": 3.921, "step": 1454500 }, { "epoch": 15.89, "learning_rate": 6.836135925731302e-06, "loss": 3.9248, "step": 1455000 }, { "epoch": 15.9, "learning_rate": 6.835041163075846e-06, "loss": 3.927, "step": 1455500 }, { "epoch": 15.9, "learning_rate": 6.833946400420389e-06, "loss": 3.9354, "step": 1456000 }, { "epoch": 15.91, "learning_rate": 6.8328516377649335e-06, "loss": 3.9222, "step": 1456500 }, { "epoch": 15.92, "learning_rate": 6.831756875109476e-06, "loss": 3.9224, "step": 1457000 }, { "epoch": 15.92, "learning_rate": 6.830662112454021e-06, "loss": 3.9202, "step": 1457500 }, { "epoch": 15.93, "learning_rate": 6.829567349798564e-06, "loss": 3.9253, "step": 1458000 }, { "epoch": 15.93, "learning_rate": 6.828472587143108e-06, "loss": 3.9284, "step": 1458500 }, { "epoch": 15.94, "learning_rate": 6.827377824487652e-06, "loss": 3.924, "step": 1459000 }, { "epoch": 15.94, "learning_rate": 6.826283061832195e-06, "loss": 3.9249, "step": 1459500 }, { "epoch": 15.95, "learning_rate": 6.82518829917674e-06, "loss": 3.9265, "step": 1460000 }, { "epoch": 15.95, "learning_rate": 6.8240935365212826e-06, "loss": 3.927, "step": 1460500 }, { "epoch": 15.96, "learning_rate": 6.822998773865826e-06, "loss": 3.9249, "step": 1461000 }, { "epoch": 15.97, "learning_rate": 6.821904011210369e-06, "loss": 3.9224, "step": 1461500 }, { "epoch": 15.97, "learning_rate": 6.820809248554914e-06, "loss": 3.9199, "step": 1462000 }, { "epoch": 15.98, "learning_rate": 6.8197144858994575e-06, "loss": 3.9293, "step": 1462500 }, { "epoch": 15.98, "learning_rate": 6.818619723244001e-06, "loss": 3.9196, "step": 1463000 }, { "epoch": 15.99, "learning_rate": 6.817524960588545e-06, "loss": 3.9251, "step": 1463500 }, { "epoch": 15.99, "learning_rate": 6.816430197933088e-06, "loss": 3.9216, "step": 1464000 }, { "epoch": 16.0, "learning_rate": 6.8153354352776325e-06, "loss": 3.9307, "step": 1464500 }, { "epoch": 16.0, "learning_rate": 6.814240672622176e-06, "loss": 3.9188, "step": 1465000 }, { "epoch": 16.01, "learning_rate": 6.81314590996672e-06, "loss": 3.9226, "step": 1465500 }, { "epoch": 16.01, "learning_rate": 6.812051147311264e-06, "loss": 3.9234, "step": 1466000 }, { "epoch": 16.02, "learning_rate": 6.810956384655807e-06, "loss": 3.9185, "step": 1466500 }, { "epoch": 16.03, "learning_rate": 6.809861622000351e-06, "loss": 3.9203, "step": 1467000 }, { "epoch": 16.03, "learning_rate": 6.808766859344894e-06, "loss": 3.9206, "step": 1467500 }, { "epoch": 16.04, "learning_rate": 6.807672096689439e-06, "loss": 3.929, "step": 1468000 }, { "epoch": 16.04, "learning_rate": 6.8065773340339825e-06, "loss": 3.9269, "step": 1468500 }, { "epoch": 16.05, "learning_rate": 6.805482571378525e-06, "loss": 3.9268, "step": 1469000 }, { "epoch": 16.05, "learning_rate": 6.80438780872307e-06, "loss": 3.9217, "step": 1469500 }, { "epoch": 16.06, "learning_rate": 6.803293046067613e-06, "loss": 3.9177, "step": 1470000 }, { "epoch": 16.06, "eval_accuracy": 0.3849033392978044, "eval_loss": 3.691620349884033, "eval_runtime": 356.1048, "eval_samples_per_second": 865.914, "eval_steps_per_second": 13.533, "step": 1470000 }, { "epoch": 16.06, "learning_rate": 6.802198283412157e-06, "loss": 3.9219, "step": 1470500 }, { "epoch": 16.07, "learning_rate": 6.8011035207567e-06, "loss": 3.9261, "step": 1471000 }, { "epoch": 16.07, "learning_rate": 6.800008758101244e-06, "loss": 3.9186, "step": 1471500 }, { "epoch": 16.08, "learning_rate": 6.798913995445789e-06, "loss": 3.9235, "step": 1472000 }, { "epoch": 16.09, "learning_rate": 6.7978192327903315e-06, "loss": 3.9201, "step": 1472500 }, { "epoch": 16.09, "learning_rate": 6.796724470134875e-06, "loss": 3.9267, "step": 1473000 }, { "epoch": 16.1, "learning_rate": 6.795629707479419e-06, "loss": 3.9252, "step": 1473500 }, { "epoch": 16.1, "learning_rate": 6.794534944823963e-06, "loss": 3.9237, "step": 1474000 }, { "epoch": 16.11, "learning_rate": 6.793440182168506e-06, "loss": 3.9246, "step": 1474500 }, { "epoch": 16.11, "learning_rate": 6.79234541951305e-06, "loss": 3.9275, "step": 1475000 }, { "epoch": 16.12, "learning_rate": 6.791250656857594e-06, "loss": 3.9222, "step": 1475500 }, { "epoch": 16.12, "learning_rate": 6.790155894202137e-06, "loss": 3.9189, "step": 1476000 }, { "epoch": 16.13, "learning_rate": 6.7890611315466815e-06, "loss": 3.9272, "step": 1476500 }, { "epoch": 16.13, "learning_rate": 6.787966368891224e-06, "loss": 3.9206, "step": 1477000 }, { "epoch": 16.14, "learning_rate": 6.786871606235769e-06, "loss": 3.9235, "step": 1477500 }, { "epoch": 16.15, "learning_rate": 6.785776843580313e-06, "loss": 3.922, "step": 1478000 }, { "epoch": 16.15, "learning_rate": 6.784682080924856e-06, "loss": 3.9243, "step": 1478500 }, { "epoch": 16.16, "learning_rate": 6.7835873182694e-06, "loss": 3.9252, "step": 1479000 }, { "epoch": 16.16, "learning_rate": 6.782492555613943e-06, "loss": 3.9227, "step": 1479500 }, { "epoch": 16.17, "learning_rate": 6.781397792958488e-06, "loss": 3.9192, "step": 1480000 }, { "epoch": 16.17, "learning_rate": 6.780303030303031e-06, "loss": 3.9257, "step": 1480500 }, { "epoch": 16.18, "learning_rate": 6.779208267647574e-06, "loss": 3.9209, "step": 1481000 }, { "epoch": 16.18, "learning_rate": 6.778113504992119e-06, "loss": 3.9285, "step": 1481500 }, { "epoch": 16.19, "learning_rate": 6.777018742336662e-06, "loss": 3.926, "step": 1482000 }, { "epoch": 16.19, "learning_rate": 6.7759239796812056e-06, "loss": 3.92, "step": 1482500 }, { "epoch": 16.2, "learning_rate": 6.774829217025749e-06, "loss": 3.9203, "step": 1483000 }, { "epoch": 16.21, "learning_rate": 6.773734454370293e-06, "loss": 3.9218, "step": 1483500 }, { "epoch": 16.21, "learning_rate": 6.772639691714836e-06, "loss": 3.9223, "step": 1484000 }, { "epoch": 16.22, "learning_rate": 6.7715449290593805e-06, "loss": 3.921, "step": 1484500 }, { "epoch": 16.22, "learning_rate": 6.770450166403924e-06, "loss": 3.913, "step": 1485000 }, { "epoch": 16.23, "learning_rate": 6.769355403748468e-06, "loss": 3.9181, "step": 1485500 }, { "epoch": 16.23, "learning_rate": 6.768260641093012e-06, "loss": 3.9238, "step": 1486000 }, { "epoch": 16.24, "learning_rate": 6.767165878437555e-06, "loss": 3.9203, "step": 1486500 }, { "epoch": 16.24, "learning_rate": 6.766071115782099e-06, "loss": 3.9158, "step": 1487000 }, { "epoch": 16.25, "learning_rate": 6.764976353126643e-06, "loss": 3.9173, "step": 1487500 }, { "epoch": 16.25, "learning_rate": 6.763881590471187e-06, "loss": 3.918, "step": 1488000 }, { "epoch": 16.26, "learning_rate": 6.7627868278157305e-06, "loss": 3.9188, "step": 1488500 }, { "epoch": 16.27, "learning_rate": 6.761692065160273e-06, "loss": 3.9209, "step": 1489000 }, { "epoch": 16.27, "learning_rate": 6.760597302504818e-06, "loss": 3.9245, "step": 1489500 }, { "epoch": 16.28, "learning_rate": 6.759502539849361e-06, "loss": 3.9268, "step": 1490000 }, { "epoch": 16.28, "learning_rate": 6.758407777193905e-06, "loss": 3.9222, "step": 1490500 }, { "epoch": 16.29, "learning_rate": 6.757313014538449e-06, "loss": 3.9275, "step": 1491000 }, { "epoch": 16.29, "learning_rate": 6.756218251882992e-06, "loss": 3.9291, "step": 1491500 }, { "epoch": 16.3, "learning_rate": 6.755123489227537e-06, "loss": 3.9278, "step": 1492000 }, { "epoch": 16.3, "learning_rate": 6.7540287265720796e-06, "loss": 3.9242, "step": 1492500 }, { "epoch": 16.31, "learning_rate": 6.752933963916623e-06, "loss": 3.9207, "step": 1493000 }, { "epoch": 16.31, "learning_rate": 6.751839201261167e-06, "loss": 3.9152, "step": 1493500 }, { "epoch": 16.32, "learning_rate": 6.750744438605711e-06, "loss": 3.925, "step": 1494000 }, { "epoch": 16.33, "learning_rate": 6.749649675950255e-06, "loss": 3.9181, "step": 1494500 }, { "epoch": 16.33, "learning_rate": 6.748554913294798e-06, "loss": 3.9201, "step": 1495000 }, { "epoch": 16.34, "learning_rate": 6.747460150639342e-06, "loss": 3.9197, "step": 1495500 }, { "epoch": 16.34, "learning_rate": 6.746365387983885e-06, "loss": 3.9176, "step": 1496000 }, { "epoch": 16.35, "learning_rate": 6.7452706253284295e-06, "loss": 3.9196, "step": 1496500 }, { "epoch": 16.35, "learning_rate": 6.744175862672972e-06, "loss": 3.9192, "step": 1497000 }, { "epoch": 16.36, "learning_rate": 6.743081100017517e-06, "loss": 3.9269, "step": 1497500 }, { "epoch": 16.36, "learning_rate": 6.741986337362061e-06, "loss": 3.9213, "step": 1498000 }, { "epoch": 16.37, "learning_rate": 6.740891574706604e-06, "loss": 3.9258, "step": 1498500 }, { "epoch": 16.37, "learning_rate": 6.739796812051148e-06, "loss": 3.9256, "step": 1499000 }, { "epoch": 16.38, "learning_rate": 6.738702049395691e-06, "loss": 3.9176, "step": 1499500 }, { "epoch": 16.39, "learning_rate": 6.737607286740236e-06, "loss": 3.9216, "step": 1500000 }, { "epoch": 16.39, "eval_accuracy": 0.3855024946143733, "eval_loss": 3.6870293617248535, "eval_runtime": 353.4983, "eval_samples_per_second": 872.298, "eval_steps_per_second": 13.632, "step": 1500000 }, { "epoch": 16.39, "learning_rate": 6.7365125240847794e-06, "loss": 3.9246, "step": 1500500 }, { "epoch": 16.4, "learning_rate": 6.735417761429322e-06, "loss": 3.9203, "step": 1501000 }, { "epoch": 16.4, "learning_rate": 6.734322998773867e-06, "loss": 3.9211, "step": 1501500 }, { "epoch": 16.41, "learning_rate": 6.73322823611841e-06, "loss": 3.9251, "step": 1502000 }, { "epoch": 16.41, "learning_rate": 6.732133473462954e-06, "loss": 3.9203, "step": 1502500 }, { "epoch": 16.42, "learning_rate": 6.731038710807497e-06, "loss": 3.9225, "step": 1503000 }, { "epoch": 16.42, "learning_rate": 6.729943948152041e-06, "loss": 3.9146, "step": 1503500 }, { "epoch": 16.43, "learning_rate": 6.728849185496586e-06, "loss": 3.927, "step": 1504000 }, { "epoch": 16.43, "learning_rate": 6.7277544228411285e-06, "loss": 3.9232, "step": 1504500 }, { "epoch": 16.44, "learning_rate": 6.726659660185672e-06, "loss": 3.9183, "step": 1505000 }, { "epoch": 16.45, "learning_rate": 6.725564897530216e-06, "loss": 3.9237, "step": 1505500 }, { "epoch": 16.45, "learning_rate": 6.72447013487476e-06, "loss": 3.9235, "step": 1506000 }, { "epoch": 16.46, "learning_rate": 6.723375372219303e-06, "loss": 3.9106, "step": 1506500 }, { "epoch": 16.46, "learning_rate": 6.722280609563847e-06, "loss": 3.9154, "step": 1507000 }, { "epoch": 16.47, "learning_rate": 6.721185846908391e-06, "loss": 3.9194, "step": 1507500 }, { "epoch": 16.47, "learning_rate": 6.720091084252935e-06, "loss": 3.9126, "step": 1508000 }, { "epoch": 16.48, "learning_rate": 6.7189963215974785e-06, "loss": 3.921, "step": 1508500 }, { "epoch": 16.48, "learning_rate": 6.717901558942021e-06, "loss": 3.9192, "step": 1509000 }, { "epoch": 16.49, "learning_rate": 6.716806796286566e-06, "loss": 3.9232, "step": 1509500 }, { "epoch": 16.49, "learning_rate": 6.715712033631109e-06, "loss": 3.9162, "step": 1510000 }, { "epoch": 16.5, "learning_rate": 6.714617270975653e-06, "loss": 3.9169, "step": 1510500 }, { "epoch": 16.51, "learning_rate": 6.713522508320197e-06, "loss": 3.9205, "step": 1511000 }, { "epoch": 16.51, "learning_rate": 6.71242774566474e-06, "loss": 3.9217, "step": 1511500 }, { "epoch": 16.52, "learning_rate": 6.711332983009285e-06, "loss": 3.9191, "step": 1512000 }, { "epoch": 16.52, "learning_rate": 6.7102382203538276e-06, "loss": 3.9195, "step": 1512500 }, { "epoch": 16.53, "learning_rate": 6.709143457698371e-06, "loss": 3.9212, "step": 1513000 }, { "epoch": 16.53, "learning_rate": 6.708048695042916e-06, "loss": 3.9213, "step": 1513500 }, { "epoch": 16.54, "learning_rate": 6.706953932387459e-06, "loss": 3.9139, "step": 1514000 }, { "epoch": 16.54, "learning_rate": 6.705859169732003e-06, "loss": 3.9233, "step": 1514500 }, { "epoch": 16.55, "learning_rate": 6.704764407076546e-06, "loss": 3.917, "step": 1515000 }, { "epoch": 16.55, "learning_rate": 6.70366964442109e-06, "loss": 3.9178, "step": 1515500 }, { "epoch": 16.56, "learning_rate": 6.702574881765633e-06, "loss": 3.9179, "step": 1516000 }, { "epoch": 16.57, "learning_rate": 6.7014801191101775e-06, "loss": 3.9216, "step": 1516500 }, { "epoch": 16.57, "learning_rate": 6.700385356454721e-06, "loss": 3.9244, "step": 1517000 }, { "epoch": 16.58, "learning_rate": 6.699290593799265e-06, "loss": 3.9212, "step": 1517500 }, { "epoch": 16.58, "learning_rate": 6.698195831143809e-06, "loss": 3.9229, "step": 1518000 }, { "epoch": 16.59, "learning_rate": 6.697101068488352e-06, "loss": 3.9183, "step": 1518500 }, { "epoch": 16.59, "learning_rate": 6.696006305832896e-06, "loss": 3.9153, "step": 1519000 }, { "epoch": 16.6, "learning_rate": 6.694911543177439e-06, "loss": 3.9181, "step": 1519500 }, { "epoch": 16.6, "learning_rate": 6.693816780521984e-06, "loss": 3.9151, "step": 1520000 }, { "epoch": 16.61, "learning_rate": 6.6927220178665274e-06, "loss": 3.919, "step": 1520500 }, { "epoch": 16.61, "learning_rate": 6.69162725521107e-06, "loss": 3.9175, "step": 1521000 }, { "epoch": 16.62, "learning_rate": 6.690532492555615e-06, "loss": 3.9133, "step": 1521500 }, { "epoch": 16.63, "learning_rate": 6.689437729900158e-06, "loss": 3.9156, "step": 1522000 }, { "epoch": 16.63, "learning_rate": 6.688342967244702e-06, "loss": 3.9139, "step": 1522500 }, { "epoch": 16.64, "learning_rate": 6.687248204589245e-06, "loss": 3.9209, "step": 1523000 }, { "epoch": 16.64, "learning_rate": 6.686153441933789e-06, "loss": 3.9138, "step": 1523500 }, { "epoch": 16.65, "learning_rate": 6.685058679278334e-06, "loss": 3.9202, "step": 1524000 }, { "epoch": 16.65, "learning_rate": 6.6839639166228765e-06, "loss": 3.9103, "step": 1524500 }, { "epoch": 16.66, "learning_rate": 6.68286915396742e-06, "loss": 3.9237, "step": 1525000 }, { "epoch": 16.66, "learning_rate": 6.681774391311964e-06, "loss": 3.9206, "step": 1525500 }, { "epoch": 16.67, "learning_rate": 6.680679628656508e-06, "loss": 3.9148, "step": 1526000 }, { "epoch": 16.68, "learning_rate": 6.679584866001052e-06, "loss": 3.9145, "step": 1526500 }, { "epoch": 16.68, "learning_rate": 6.678490103345595e-06, "loss": 3.9182, "step": 1527000 }, { "epoch": 16.69, "learning_rate": 6.677395340690139e-06, "loss": 3.9251, "step": 1527500 }, { "epoch": 16.69, "learning_rate": 6.676300578034683e-06, "loss": 3.9217, "step": 1528000 }, { "epoch": 16.7, "learning_rate": 6.6752058153792265e-06, "loss": 3.918, "step": 1528500 }, { "epoch": 16.7, "learning_rate": 6.674111052723769e-06, "loss": 3.9104, "step": 1529000 }, { "epoch": 16.71, "learning_rate": 6.673016290068314e-06, "loss": 3.917, "step": 1529500 }, { "epoch": 16.71, "learning_rate": 6.671921527412858e-06, "loss": 3.9141, "step": 1530000 }, { "epoch": 16.71, "eval_accuracy": 0.3862657473228401, "eval_loss": 3.682206630706787, "eval_runtime": 353.1915, "eval_samples_per_second": 873.056, "eval_steps_per_second": 13.644, "step": 1530000 }, { "epoch": 16.72, "learning_rate": 6.670826764757401e-06, "loss": 3.9259, "step": 1530500 }, { "epoch": 16.72, "learning_rate": 6.669732002101945e-06, "loss": 3.9155, "step": 1531000 }, { "epoch": 16.73, "learning_rate": 6.668637239446488e-06, "loss": 3.9183, "step": 1531500 }, { "epoch": 16.74, "learning_rate": 6.667542476791033e-06, "loss": 3.9213, "step": 1532000 }, { "epoch": 16.74, "learning_rate": 6.6664477141355756e-06, "loss": 3.9161, "step": 1532500 }, { "epoch": 16.75, "learning_rate": 6.665352951480119e-06, "loss": 3.9175, "step": 1533000 }, { "epoch": 16.75, "learning_rate": 6.664258188824664e-06, "loss": 3.9125, "step": 1533500 }, { "epoch": 16.76, "learning_rate": 6.663163426169207e-06, "loss": 3.9192, "step": 1534000 }, { "epoch": 16.76, "learning_rate": 6.662068663513751e-06, "loss": 3.9173, "step": 1534500 }, { "epoch": 16.77, "learning_rate": 6.660973900858294e-06, "loss": 3.9172, "step": 1535000 }, { "epoch": 16.77, "learning_rate": 6.659879138202838e-06, "loss": 3.9095, "step": 1535500 }, { "epoch": 16.78, "learning_rate": 6.658784375547381e-06, "loss": 3.9114, "step": 1536000 }, { "epoch": 16.78, "learning_rate": 6.6576896128919255e-06, "loss": 3.9192, "step": 1536500 }, { "epoch": 16.79, "learning_rate": 6.65659485023647e-06, "loss": 3.9046, "step": 1537000 }, { "epoch": 16.8, "learning_rate": 6.655500087581013e-06, "loss": 3.9132, "step": 1537500 }, { "epoch": 16.8, "learning_rate": 6.654405324925557e-06, "loss": 3.9166, "step": 1538000 }, { "epoch": 16.81, "learning_rate": 6.6533105622701e-06, "loss": 3.9087, "step": 1538500 }, { "epoch": 16.81, "learning_rate": 6.652215799614644e-06, "loss": 3.9141, "step": 1539000 }, { "epoch": 16.82, "learning_rate": 6.651121036959188e-06, "loss": 3.9154, "step": 1539500 }, { "epoch": 16.82, "learning_rate": 6.650026274303732e-06, "loss": 3.9199, "step": 1540000 }, { "epoch": 16.83, "learning_rate": 6.6489315116482754e-06, "loss": 3.9175, "step": 1540500 }, { "epoch": 16.83, "learning_rate": 6.647836748992818e-06, "loss": 3.914, "step": 1541000 }, { "epoch": 16.84, "learning_rate": 6.646741986337363e-06, "loss": 3.9222, "step": 1541500 }, { "epoch": 16.84, "learning_rate": 6.645647223681906e-06, "loss": 3.9155, "step": 1542000 }, { "epoch": 16.85, "learning_rate": 6.64455246102645e-06, "loss": 3.9173, "step": 1542500 }, { "epoch": 16.86, "learning_rate": 6.643457698370994e-06, "loss": 3.9129, "step": 1543000 }, { "epoch": 16.86, "learning_rate": 6.642362935715537e-06, "loss": 3.9157, "step": 1543500 }, { "epoch": 16.87, "learning_rate": 6.641268173060082e-06, "loss": 3.9151, "step": 1544000 }, { "epoch": 16.87, "learning_rate": 6.6401734104046245e-06, "loss": 3.9154, "step": 1544500 }, { "epoch": 16.88, "learning_rate": 6.639078647749168e-06, "loss": 3.9086, "step": 1545000 }, { "epoch": 16.88, "learning_rate": 6.637983885093712e-06, "loss": 3.9143, "step": 1545500 }, { "epoch": 16.89, "learning_rate": 6.636889122438256e-06, "loss": 3.9125, "step": 1546000 }, { "epoch": 16.89, "learning_rate": 6.6357943597828e-06, "loss": 3.9128, "step": 1546500 }, { "epoch": 16.9, "learning_rate": 6.634699597127343e-06, "loss": 3.9135, "step": 1547000 }, { "epoch": 16.9, "learning_rate": 6.633604834471887e-06, "loss": 3.9152, "step": 1547500 }, { "epoch": 16.91, "learning_rate": 6.632510071816431e-06, "loss": 3.9117, "step": 1548000 }, { "epoch": 16.92, "learning_rate": 6.6314153091609745e-06, "loss": 3.9127, "step": 1548500 }, { "epoch": 16.92, "learning_rate": 6.630320546505517e-06, "loss": 3.9134, "step": 1549000 }, { "epoch": 16.93, "learning_rate": 6.629225783850062e-06, "loss": 3.9129, "step": 1549500 }, { "epoch": 16.93, "learning_rate": 6.628131021194606e-06, "loss": 3.9133, "step": 1550000 }, { "epoch": 16.94, "learning_rate": 6.627036258539149e-06, "loss": 3.9166, "step": 1550500 }, { "epoch": 16.94, "learning_rate": 6.625941495883693e-06, "loss": 3.9077, "step": 1551000 }, { "epoch": 16.95, "learning_rate": 6.624846733228236e-06, "loss": 3.9189, "step": 1551500 }, { "epoch": 16.95, "learning_rate": 6.623751970572781e-06, "loss": 3.9167, "step": 1552000 }, { "epoch": 16.96, "learning_rate": 6.622657207917324e-06, "loss": 3.9164, "step": 1552500 }, { "epoch": 16.96, "learning_rate": 6.621562445261867e-06, "loss": 3.9167, "step": 1553000 }, { "epoch": 16.97, "learning_rate": 6.620467682606412e-06, "loss": 3.905, "step": 1553500 }, { "epoch": 16.98, "learning_rate": 6.619372919950955e-06, "loss": 3.9113, "step": 1554000 }, { "epoch": 16.98, "learning_rate": 6.618278157295499e-06, "loss": 3.9177, "step": 1554500 }, { "epoch": 16.99, "learning_rate": 6.617183394640042e-06, "loss": 3.9184, "step": 1555000 }, { "epoch": 16.99, "learning_rate": 6.616088631984586e-06, "loss": 3.9163, "step": 1555500 }, { "epoch": 17.0, "learning_rate": 6.614993869329131e-06, "loss": 3.9162, "step": 1556000 }, { "epoch": 17.0, "learning_rate": 6.6138991066736735e-06, "loss": 3.9147, "step": 1556500 }, { "epoch": 17.01, "learning_rate": 6.612804344018218e-06, "loss": 3.9142, "step": 1557000 }, { "epoch": 17.01, "learning_rate": 6.611709581362761e-06, "loss": 3.9126, "step": 1557500 }, { "epoch": 17.02, "learning_rate": 6.610614818707305e-06, "loss": 3.9097, "step": 1558000 }, { "epoch": 17.02, "learning_rate": 6.609520056051848e-06, "loss": 3.9183, "step": 1558500 }, { "epoch": 17.03, "learning_rate": 6.608425293396392e-06, "loss": 3.9083, "step": 1559000 }, { "epoch": 17.04, "learning_rate": 6.607330530740936e-06, "loss": 3.9093, "step": 1559500 }, { "epoch": 17.04, "learning_rate": 6.60623576808548e-06, "loss": 3.9154, "step": 1560000 }, { "epoch": 17.04, "eval_accuracy": 0.38636553099073784, "eval_loss": 3.6804287433624268, "eval_runtime": 354.9937, "eval_samples_per_second": 868.624, "eval_steps_per_second": 13.575, "step": 1560000 }, { "epoch": 17.05, "learning_rate": 6.6051410054300235e-06, "loss": 3.9088, "step": 1560500 }, { "epoch": 17.05, "learning_rate": 6.604046242774566e-06, "loss": 3.9174, "step": 1561000 }, { "epoch": 17.06, "learning_rate": 6.602951480119111e-06, "loss": 3.9127, "step": 1561500 }, { "epoch": 17.06, "learning_rate": 6.601856717463655e-06, "loss": 3.9127, "step": 1562000 }, { "epoch": 17.07, "learning_rate": 6.600761954808198e-06, "loss": 3.9148, "step": 1562500 }, { "epoch": 17.07, "learning_rate": 6.599667192152742e-06, "loss": 3.9081, "step": 1563000 }, { "epoch": 17.08, "learning_rate": 6.598572429497285e-06, "loss": 3.9151, "step": 1563500 }, { "epoch": 17.08, "learning_rate": 6.59747766684183e-06, "loss": 3.9136, "step": 1564000 }, { "epoch": 17.09, "learning_rate": 6.5963829041863725e-06, "loss": 3.9137, "step": 1564500 }, { "epoch": 17.1, "learning_rate": 6.595288141530916e-06, "loss": 3.9093, "step": 1565000 }, { "epoch": 17.1, "learning_rate": 6.594193378875461e-06, "loss": 3.9113, "step": 1565500 }, { "epoch": 17.11, "learning_rate": 6.593098616220004e-06, "loss": 3.9125, "step": 1566000 }, { "epoch": 17.11, "learning_rate": 6.592003853564548e-06, "loss": 3.9094, "step": 1566500 }, { "epoch": 17.12, "learning_rate": 6.590909090909091e-06, "loss": 3.904, "step": 1567000 }, { "epoch": 17.12, "learning_rate": 6.589814328253635e-06, "loss": 3.9109, "step": 1567500 }, { "epoch": 17.13, "learning_rate": 6.588719565598179e-06, "loss": 3.9092, "step": 1568000 }, { "epoch": 17.13, "learning_rate": 6.5876248029427225e-06, "loss": 3.917, "step": 1568500 }, { "epoch": 17.14, "learning_rate": 6.586530040287267e-06, "loss": 3.915, "step": 1569000 }, { "epoch": 17.14, "learning_rate": 6.58543527763181e-06, "loss": 3.909, "step": 1569500 }, { "epoch": 17.15, "learning_rate": 6.584340514976354e-06, "loss": 3.9167, "step": 1570000 }, { "epoch": 17.16, "learning_rate": 6.583245752320897e-06, "loss": 3.9163, "step": 1570500 }, { "epoch": 17.16, "learning_rate": 6.582150989665441e-06, "loss": 3.9151, "step": 1571000 }, { "epoch": 17.17, "learning_rate": 6.581056227009984e-06, "loss": 3.9175, "step": 1571500 }, { "epoch": 17.17, "learning_rate": 6.579961464354529e-06, "loss": 3.9139, "step": 1572000 }, { "epoch": 17.18, "learning_rate": 6.578866701699072e-06, "loss": 3.9084, "step": 1572500 }, { "epoch": 17.18, "learning_rate": 6.577771939043615e-06, "loss": 3.9124, "step": 1573000 }, { "epoch": 17.19, "learning_rate": 6.57667717638816e-06, "loss": 3.9165, "step": 1573500 }, { "epoch": 17.19, "learning_rate": 6.575582413732703e-06, "loss": 3.9099, "step": 1574000 }, { "epoch": 17.2, "learning_rate": 6.574487651077247e-06, "loss": 3.9185, "step": 1574500 }, { "epoch": 17.2, "learning_rate": 6.573392888421791e-06, "loss": 3.9144, "step": 1575000 }, { "epoch": 17.21, "learning_rate": 6.572298125766334e-06, "loss": 3.9055, "step": 1575500 }, { "epoch": 17.22, "learning_rate": 6.571203363110879e-06, "loss": 3.9171, "step": 1576000 }, { "epoch": 17.22, "learning_rate": 6.5701086004554215e-06, "loss": 3.9103, "step": 1576500 }, { "epoch": 17.23, "learning_rate": 6.569013837799966e-06, "loss": 3.9056, "step": 1577000 }, { "epoch": 17.23, "learning_rate": 6.567919075144509e-06, "loss": 3.908, "step": 1577500 }, { "epoch": 17.24, "learning_rate": 6.566824312489053e-06, "loss": 3.9129, "step": 1578000 }, { "epoch": 17.24, "learning_rate": 6.565729549833597e-06, "loss": 3.9147, "step": 1578500 }, { "epoch": 17.25, "learning_rate": 6.56463478717814e-06, "loss": 3.9112, "step": 1579000 }, { "epoch": 17.25, "learning_rate": 6.563540024522684e-06, "loss": 3.9163, "step": 1579500 }, { "epoch": 17.26, "learning_rate": 6.562445261867228e-06, "loss": 3.9104, "step": 1580000 }, { "epoch": 17.26, "learning_rate": 6.5613504992117715e-06, "loss": 3.9079, "step": 1580500 }, { "epoch": 17.27, "learning_rate": 6.560255736556314e-06, "loss": 3.9133, "step": 1581000 }, { "epoch": 17.28, "learning_rate": 6.559160973900859e-06, "loss": 3.9155, "step": 1581500 }, { "epoch": 17.28, "learning_rate": 6.558066211245403e-06, "loss": 3.9069, "step": 1582000 }, { "epoch": 17.29, "learning_rate": 6.5569714485899464e-06, "loss": 3.9116, "step": 1582500 }, { "epoch": 17.29, "learning_rate": 6.55587668593449e-06, "loss": 3.9112, "step": 1583000 }, { "epoch": 17.3, "learning_rate": 6.554781923279033e-06, "loss": 3.9147, "step": 1583500 }, { "epoch": 17.3, "learning_rate": 6.553687160623578e-06, "loss": 3.918, "step": 1584000 }, { "epoch": 17.31, "learning_rate": 6.5525923979681205e-06, "loss": 3.9127, "step": 1584500 }, { "epoch": 17.31, "learning_rate": 6.551497635312664e-06, "loss": 3.9142, "step": 1585000 }, { "epoch": 17.32, "learning_rate": 6.550402872657209e-06, "loss": 3.9123, "step": 1585500 }, { "epoch": 17.33, "learning_rate": 6.549308110001752e-06, "loss": 3.9104, "step": 1586000 }, { "epoch": 17.33, "learning_rate": 6.548213347346296e-06, "loss": 3.9107, "step": 1586500 }, { "epoch": 17.34, "learning_rate": 6.547118584690839e-06, "loss": 3.9132, "step": 1587000 }, { "epoch": 17.34, "learning_rate": 6.546023822035383e-06, "loss": 3.916, "step": 1587500 }, { "epoch": 17.35, "learning_rate": 6.544929059379928e-06, "loss": 3.9088, "step": 1588000 }, { "epoch": 17.35, "learning_rate": 6.5438342967244705e-06, "loss": 3.9115, "step": 1588500 }, { "epoch": 17.36, "learning_rate": 6.542739534069015e-06, "loss": 3.9054, "step": 1589000 }, { "epoch": 17.36, "learning_rate": 6.541644771413558e-06, "loss": 3.9107, "step": 1589500 }, { "epoch": 17.37, "learning_rate": 6.540550008758102e-06, "loss": 3.9145, "step": 1590000 }, { "epoch": 17.37, "eval_accuracy": 0.3863452109828558, "eval_loss": 3.679518461227417, "eval_runtime": 349.8541, "eval_samples_per_second": 881.384, "eval_steps_per_second": 13.774, "step": 1590000 }, { "epoch": 17.37, "learning_rate": 6.5394552461026455e-06, "loss": 3.9067, "step": 1590500 }, { "epoch": 17.38, "learning_rate": 6.538360483447189e-06, "loss": 3.9119, "step": 1591000 }, { "epoch": 17.39, "learning_rate": 6.537265720791734e-06, "loss": 3.9098, "step": 1591500 }, { "epoch": 17.39, "learning_rate": 6.536170958136277e-06, "loss": 3.907, "step": 1592000 }, { "epoch": 17.4, "learning_rate": 6.5350761954808204e-06, "loss": 3.9085, "step": 1592500 }, { "epoch": 17.4, "learning_rate": 6.533981432825363e-06, "loss": 3.9164, "step": 1593000 }, { "epoch": 17.41, "learning_rate": 6.532886670169908e-06, "loss": 3.9094, "step": 1593500 }, { "epoch": 17.41, "learning_rate": 6.531791907514451e-06, "loss": 3.905, "step": 1594000 }, { "epoch": 17.42, "learning_rate": 6.530697144858995e-06, "loss": 3.9196, "step": 1594500 }, { "epoch": 17.42, "learning_rate": 6.529602382203539e-06, "loss": 3.9053, "step": 1595000 }, { "epoch": 17.43, "learning_rate": 6.528507619548082e-06, "loss": 3.9185, "step": 1595500 }, { "epoch": 17.43, "learning_rate": 6.527412856892627e-06, "loss": 3.9018, "step": 1596000 }, { "epoch": 17.44, "learning_rate": 6.5263180942371695e-06, "loss": 3.9016, "step": 1596500 }, { "epoch": 17.45, "learning_rate": 6.525223331581714e-06, "loss": 3.9083, "step": 1597000 }, { "epoch": 17.45, "learning_rate": 6.524128568926257e-06, "loss": 3.9109, "step": 1597500 }, { "epoch": 17.46, "learning_rate": 6.523033806270801e-06, "loss": 3.91, "step": 1598000 }, { "epoch": 17.46, "learning_rate": 6.521939043615345e-06, "loss": 3.9127, "step": 1598500 }, { "epoch": 17.47, "learning_rate": 6.520844280959888e-06, "loss": 3.9106, "step": 1599000 }, { "epoch": 17.47, "learning_rate": 6.519749518304432e-06, "loss": 3.9066, "step": 1599500 }, { "epoch": 17.48, "learning_rate": 6.518654755648976e-06, "loss": 3.9105, "step": 1600000 }, { "epoch": 17.48, "learning_rate": 6.5175599929935195e-06, "loss": 3.9064, "step": 1600500 }, { "epoch": 17.49, "learning_rate": 6.516465230338064e-06, "loss": 3.9136, "step": 1601000 }, { "epoch": 17.49, "learning_rate": 6.515370467682607e-06, "loss": 3.906, "step": 1601500 }, { "epoch": 17.5, "learning_rate": 6.514275705027151e-06, "loss": 3.9088, "step": 1602000 }, { "epoch": 17.51, "learning_rate": 6.5131809423716944e-06, "loss": 3.9097, "step": 1602500 }, { "epoch": 17.51, "learning_rate": 6.512086179716238e-06, "loss": 3.913, "step": 1603000 }, { "epoch": 17.52, "learning_rate": 6.510991417060781e-06, "loss": 3.9061, "step": 1603500 }, { "epoch": 17.52, "learning_rate": 6.509896654405326e-06, "loss": 3.9154, "step": 1604000 }, { "epoch": 17.53, "learning_rate": 6.508801891749869e-06, "loss": 3.9091, "step": 1604500 }, { "epoch": 17.53, "learning_rate": 6.507707129094412e-06, "loss": 3.9069, "step": 1605000 }, { "epoch": 17.54, "learning_rate": 6.506612366438957e-06, "loss": 3.904, "step": 1605500 }, { "epoch": 17.54, "learning_rate": 6.5055176037835e-06, "loss": 3.9139, "step": 1606000 }, { "epoch": 17.55, "learning_rate": 6.504422841128044e-06, "loss": 3.9069, "step": 1606500 }, { "epoch": 17.55, "learning_rate": 6.503328078472587e-06, "loss": 3.9108, "step": 1607000 }, { "epoch": 17.56, "learning_rate": 6.502233315817131e-06, "loss": 3.9109, "step": 1607500 }, { "epoch": 17.57, "learning_rate": 6.501138553161676e-06, "loss": 3.9089, "step": 1608000 }, { "epoch": 17.57, "learning_rate": 6.5000437905062185e-06, "loss": 3.9161, "step": 1608500 }, { "epoch": 17.58, "learning_rate": 6.498949027850763e-06, "loss": 3.9168, "step": 1609000 }, { "epoch": 17.58, "learning_rate": 6.497854265195306e-06, "loss": 3.9157, "step": 1609500 }, { "epoch": 17.59, "learning_rate": 6.49675950253985e-06, "loss": 3.9096, "step": 1610000 }, { "epoch": 17.59, "learning_rate": 6.4956647398843935e-06, "loss": 3.9148, "step": 1610500 }, { "epoch": 17.6, "learning_rate": 6.494569977228937e-06, "loss": 3.9082, "step": 1611000 }, { "epoch": 17.6, "learning_rate": 6.493475214573482e-06, "loss": 3.9103, "step": 1611500 }, { "epoch": 17.61, "learning_rate": 6.492380451918025e-06, "loss": 3.903, "step": 1612000 }, { "epoch": 17.61, "learning_rate": 6.4912856892625684e-06, "loss": 3.9112, "step": 1612500 }, { "epoch": 17.62, "learning_rate": 6.490190926607111e-06, "loss": 3.9101, "step": 1613000 }, { "epoch": 17.63, "learning_rate": 6.489096163951656e-06, "loss": 3.9004, "step": 1613500 }, { "epoch": 17.63, "learning_rate": 6.4880014012962e-06, "loss": 3.9039, "step": 1614000 }, { "epoch": 17.64, "learning_rate": 6.486906638640743e-06, "loss": 3.9087, "step": 1614500 }, { "epoch": 17.64, "learning_rate": 6.485811875985287e-06, "loss": 3.903, "step": 1615000 }, { "epoch": 17.65, "learning_rate": 6.48471711332983e-06, "loss": 3.911, "step": 1615500 }, { "epoch": 17.65, "learning_rate": 6.483622350674375e-06, "loss": 3.906, "step": 1616000 }, { "epoch": 17.66, "learning_rate": 6.4825275880189175e-06, "loss": 3.9148, "step": 1616500 }, { "epoch": 17.66, "learning_rate": 6.481432825363462e-06, "loss": 3.908, "step": 1617000 }, { "epoch": 17.67, "learning_rate": 6.480338062708006e-06, "loss": 3.9116, "step": 1617500 }, { "epoch": 17.67, "learning_rate": 6.479243300052549e-06, "loss": 3.9115, "step": 1618000 }, { "epoch": 17.68, "learning_rate": 6.478148537397093e-06, "loss": 3.9023, "step": 1618500 }, { "epoch": 17.69, "learning_rate": 6.477053774741636e-06, "loss": 3.914, "step": 1619000 }, { "epoch": 17.69, "learning_rate": 6.47595901208618e-06, "loss": 3.9132, "step": 1619500 }, { "epoch": 17.7, "learning_rate": 6.474864249430724e-06, "loss": 3.9103, "step": 1620000 }, { "epoch": 17.7, "eval_accuracy": 0.38690473041233925, "eval_loss": 3.67338490486145, "eval_runtime": 354.5582, "eval_samples_per_second": 869.691, "eval_steps_per_second": 13.592, "step": 1620000 }, { "epoch": 17.7, "learning_rate": 6.4737694867752675e-06, "loss": 3.9105, "step": 1620500 }, { "epoch": 17.71, "learning_rate": 6.472674724119812e-06, "loss": 3.9015, "step": 1621000 }, { "epoch": 17.71, "learning_rate": 6.471579961464355e-06, "loss": 3.9058, "step": 1621500 }, { "epoch": 17.72, "learning_rate": 6.470485198808899e-06, "loss": 3.9065, "step": 1622000 }, { "epoch": 17.72, "learning_rate": 6.4693904361534424e-06, "loss": 3.9055, "step": 1622500 }, { "epoch": 17.73, "learning_rate": 6.468295673497986e-06, "loss": 3.9055, "step": 1623000 }, { "epoch": 17.73, "learning_rate": 6.467200910842529e-06, "loss": 3.9035, "step": 1623500 }, { "epoch": 17.74, "learning_rate": 6.466106148187074e-06, "loss": 3.9064, "step": 1624000 }, { "epoch": 17.75, "learning_rate": 6.465011385531617e-06, "loss": 3.9138, "step": 1624500 }, { "epoch": 17.75, "learning_rate": 6.463916622876161e-06, "loss": 3.9047, "step": 1625000 }, { "epoch": 17.76, "learning_rate": 6.462821860220705e-06, "loss": 3.9131, "step": 1625500 }, { "epoch": 17.76, "learning_rate": 6.461727097565248e-06, "loss": 3.9063, "step": 1626000 }, { "epoch": 17.77, "learning_rate": 6.460632334909792e-06, "loss": 3.9083, "step": 1626500 }, { "epoch": 17.77, "learning_rate": 6.459537572254336e-06, "loss": 3.9072, "step": 1627000 }, { "epoch": 17.78, "learning_rate": 6.458442809598879e-06, "loss": 3.9035, "step": 1627500 }, { "epoch": 17.78, "learning_rate": 6.457348046943424e-06, "loss": 3.908, "step": 1628000 }, { "epoch": 17.79, "learning_rate": 6.4562532842879665e-06, "loss": 3.8997, "step": 1628500 }, { "epoch": 17.79, "learning_rate": 6.455158521632511e-06, "loss": 3.9095, "step": 1629000 }, { "epoch": 17.8, "learning_rate": 6.454063758977054e-06, "loss": 3.9082, "step": 1629500 }, { "epoch": 17.81, "learning_rate": 6.452968996321598e-06, "loss": 3.9035, "step": 1630000 }, { "epoch": 17.81, "learning_rate": 6.451874233666142e-06, "loss": 3.9103, "step": 1630500 }, { "epoch": 17.82, "learning_rate": 6.450779471010685e-06, "loss": 3.9089, "step": 1631000 }, { "epoch": 17.82, "learning_rate": 6.44968470835523e-06, "loss": 3.9072, "step": 1631500 }, { "epoch": 17.83, "learning_rate": 6.448589945699773e-06, "loss": 3.9024, "step": 1632000 }, { "epoch": 17.83, "learning_rate": 6.4474951830443164e-06, "loss": 3.9046, "step": 1632500 }, { "epoch": 17.84, "learning_rate": 6.446400420388859e-06, "loss": 3.8997, "step": 1633000 }, { "epoch": 17.84, "learning_rate": 6.445305657733404e-06, "loss": 3.9033, "step": 1633500 }, { "epoch": 17.85, "learning_rate": 6.444210895077948e-06, "loss": 3.9054, "step": 1634000 }, { "epoch": 17.85, "learning_rate": 6.443116132422491e-06, "loss": 3.9103, "step": 1634500 }, { "epoch": 17.86, "learning_rate": 6.442021369767035e-06, "loss": 3.9046, "step": 1635000 }, { "epoch": 17.87, "learning_rate": 6.440926607111578e-06, "loss": 3.9112, "step": 1635500 }, { "epoch": 17.87, "learning_rate": 6.439831844456123e-06, "loss": 3.9056, "step": 1636000 }, { "epoch": 17.88, "learning_rate": 6.4387370818006655e-06, "loss": 3.9113, "step": 1636500 }, { "epoch": 17.88, "learning_rate": 6.43764231914521e-06, "loss": 3.9141, "step": 1637000 }, { "epoch": 17.89, "learning_rate": 6.436547556489754e-06, "loss": 3.9111, "step": 1637500 }, { "epoch": 17.89, "learning_rate": 6.435452793834297e-06, "loss": 3.9062, "step": 1638000 }, { "epoch": 17.9, "learning_rate": 6.434358031178841e-06, "loss": 3.9061, "step": 1638500 }, { "epoch": 17.9, "learning_rate": 6.433263268523384e-06, "loss": 3.9086, "step": 1639000 }, { "epoch": 17.91, "learning_rate": 6.432168505867928e-06, "loss": 3.9035, "step": 1639500 }, { "epoch": 17.91, "learning_rate": 6.431073743212473e-06, "loss": 3.9049, "step": 1640000 }, { "epoch": 17.92, "learning_rate": 6.4299789805570155e-06, "loss": 3.9054, "step": 1640500 }, { "epoch": 17.93, "learning_rate": 6.42888421790156e-06, "loss": 3.9128, "step": 1641000 }, { "epoch": 17.93, "learning_rate": 6.427789455246103e-06, "loss": 3.9079, "step": 1641500 }, { "epoch": 17.94, "learning_rate": 6.426694692590647e-06, "loss": 3.8988, "step": 1642000 }, { "epoch": 17.94, "learning_rate": 6.4255999299351904e-06, "loss": 3.9069, "step": 1642500 }, { "epoch": 17.95, "learning_rate": 6.424505167279734e-06, "loss": 3.9108, "step": 1643000 }, { "epoch": 17.95, "learning_rate": 6.423410404624279e-06, "loss": 3.9087, "step": 1643500 }, { "epoch": 17.96, "learning_rate": 6.422315641968822e-06, "loss": 3.9042, "step": 1644000 }, { "epoch": 17.96, "learning_rate": 6.421220879313365e-06, "loss": 3.9082, "step": 1644500 }, { "epoch": 17.97, "learning_rate": 6.420126116657909e-06, "loss": 3.9137, "step": 1645000 }, { "epoch": 17.97, "learning_rate": 6.419031354002453e-06, "loss": 3.9075, "step": 1645500 }, { "epoch": 17.98, "learning_rate": 6.417936591346996e-06, "loss": 3.9008, "step": 1646000 }, { "epoch": 17.99, "learning_rate": 6.41684182869154e-06, "loss": 3.9001, "step": 1646500 }, { "epoch": 17.99, "learning_rate": 6.415747066036084e-06, "loss": 3.9077, "step": 1647000 }, { "epoch": 18.0, "learning_rate": 6.414652303380627e-06, "loss": 3.9072, "step": 1647500 }, { "epoch": 18.0, "learning_rate": 6.413557540725172e-06, "loss": 3.9061, "step": 1648000 }, { "epoch": 18.01, "learning_rate": 6.4124627780697145e-06, "loss": 3.8976, "step": 1648500 }, { "epoch": 18.01, "learning_rate": 6.411368015414259e-06, "loss": 3.9076, "step": 1649000 }, { "epoch": 18.02, "learning_rate": 6.410273252758803e-06, "loss": 3.9075, "step": 1649500 }, { "epoch": 18.02, "learning_rate": 6.409178490103346e-06, "loss": 3.9079, "step": 1650000 }, { "epoch": 18.02, "eval_accuracy": 0.3872627506117312, "eval_loss": 3.6724419593811035, "eval_runtime": 354.6131, "eval_samples_per_second": 869.556, "eval_steps_per_second": 13.589, "step": 1650000 }, { "epoch": 18.03, "learning_rate": 6.40808372744789e-06, "loss": 3.9034, "step": 1650500 }, { "epoch": 18.04, "learning_rate": 6.406988964792433e-06, "loss": 3.9024, "step": 1651000 }, { "epoch": 18.04, "learning_rate": 6.405894202136978e-06, "loss": 3.904, "step": 1651500 }, { "epoch": 18.05, "learning_rate": 6.404799439481521e-06, "loss": 3.9106, "step": 1652000 }, { "epoch": 18.05, "learning_rate": 6.4037046768260644e-06, "loss": 3.9038, "step": 1652500 }, { "epoch": 18.06, "learning_rate": 6.402609914170609e-06, "loss": 3.9027, "step": 1653000 }, { "epoch": 18.06, "learning_rate": 6.401515151515152e-06, "loss": 3.9036, "step": 1653500 }, { "epoch": 18.07, "learning_rate": 6.400420388859696e-06, "loss": 3.9054, "step": 1654000 }, { "epoch": 18.07, "learning_rate": 6.399325626204239e-06, "loss": 3.9016, "step": 1654500 }, { "epoch": 18.08, "learning_rate": 6.398230863548783e-06, "loss": 3.91, "step": 1655000 }, { "epoch": 18.08, "learning_rate": 6.397136100893326e-06, "loss": 3.9046, "step": 1655500 }, { "epoch": 18.09, "learning_rate": 6.396041338237871e-06, "loss": 3.9021, "step": 1656000 }, { "epoch": 18.1, "learning_rate": 6.394946575582414e-06, "loss": 3.8975, "step": 1656500 }, { "epoch": 18.1, "learning_rate": 6.393851812926958e-06, "loss": 3.9123, "step": 1657000 }, { "epoch": 18.11, "learning_rate": 6.392757050271502e-06, "loss": 3.9081, "step": 1657500 }, { "epoch": 18.11, "learning_rate": 6.391662287616045e-06, "loss": 3.9062, "step": 1658000 }, { "epoch": 18.12, "learning_rate": 6.390567524960589e-06, "loss": 3.9049, "step": 1658500 }, { "epoch": 18.12, "learning_rate": 6.389472762305132e-06, "loss": 3.9065, "step": 1659000 }, { "epoch": 18.13, "learning_rate": 6.388377999649677e-06, "loss": 3.9002, "step": 1659500 }, { "epoch": 18.13, "learning_rate": 6.387283236994221e-06, "loss": 3.8942, "step": 1660000 }, { "epoch": 18.14, "learning_rate": 6.3861884743387635e-06, "loss": 3.8977, "step": 1660500 }, { "epoch": 18.14, "learning_rate": 6.385093711683308e-06, "loss": 3.9067, "step": 1661000 }, { "epoch": 18.15, "learning_rate": 6.383998949027851e-06, "loss": 3.9109, "step": 1661500 }, { "epoch": 18.16, "learning_rate": 6.382904186372395e-06, "loss": 3.9092, "step": 1662000 }, { "epoch": 18.16, "learning_rate": 6.381809423716939e-06, "loss": 3.9038, "step": 1662500 }, { "epoch": 18.17, "learning_rate": 6.380714661061482e-06, "loss": 3.9031, "step": 1663000 }, { "epoch": 18.17, "learning_rate": 6.379619898406027e-06, "loss": 3.9024, "step": 1663500 }, { "epoch": 18.18, "learning_rate": 6.37852513575057e-06, "loss": 3.909, "step": 1664000 }, { "epoch": 18.18, "learning_rate": 6.377430373095113e-06, "loss": 3.9014, "step": 1664500 }, { "epoch": 18.19, "learning_rate": 6.376335610439657e-06, "loss": 3.9041, "step": 1665000 }, { "epoch": 18.19, "learning_rate": 6.375240847784201e-06, "loss": 3.9036, "step": 1665500 }, { "epoch": 18.2, "learning_rate": 6.3741460851287455e-06, "loss": 3.8996, "step": 1666000 }, { "epoch": 18.2, "learning_rate": 6.373051322473288e-06, "loss": 3.9035, "step": 1666500 }, { "epoch": 18.21, "learning_rate": 6.371956559817832e-06, "loss": 3.9016, "step": 1667000 }, { "epoch": 18.22, "learning_rate": 6.370861797162375e-06, "loss": 3.9011, "step": 1667500 }, { "epoch": 18.22, "learning_rate": 6.36976703450692e-06, "loss": 3.9086, "step": 1668000 }, { "epoch": 18.23, "learning_rate": 6.3686722718514625e-06, "loss": 3.9045, "step": 1668500 }, { "epoch": 18.23, "learning_rate": 6.367577509196007e-06, "loss": 3.9086, "step": 1669000 }, { "epoch": 18.24, "learning_rate": 6.366482746540551e-06, "loss": 3.902, "step": 1669500 }, { "epoch": 18.24, "learning_rate": 6.365387983885094e-06, "loss": 3.9015, "step": 1670000 }, { "epoch": 18.25, "learning_rate": 6.364293221229638e-06, "loss": 3.9031, "step": 1670500 }, { "epoch": 18.25, "learning_rate": 6.363198458574181e-06, "loss": 3.8995, "step": 1671000 }, { "epoch": 18.26, "learning_rate": 6.362103695918726e-06, "loss": 3.9053, "step": 1671500 }, { "epoch": 18.26, "learning_rate": 6.361008933263269e-06, "loss": 3.9037, "step": 1672000 }, { "epoch": 18.27, "learning_rate": 6.3599141706078125e-06, "loss": 3.9098, "step": 1672500 }, { "epoch": 18.28, "learning_rate": 6.358819407952357e-06, "loss": 3.9087, "step": 1673000 }, { "epoch": 18.28, "learning_rate": 6.3577246452969e-06, "loss": 3.9028, "step": 1673500 }, { "epoch": 18.29, "learning_rate": 6.356629882641444e-06, "loss": 3.9001, "step": 1674000 }, { "epoch": 18.29, "learning_rate": 6.355535119985987e-06, "loss": 3.9037, "step": 1674500 }, { "epoch": 18.3, "learning_rate": 6.354440357330531e-06, "loss": 3.9029, "step": 1675000 }, { "epoch": 18.3, "learning_rate": 6.353345594675076e-06, "loss": 3.9056, "step": 1675500 }, { "epoch": 18.31, "learning_rate": 6.352250832019619e-06, "loss": 3.9118, "step": 1676000 }, { "epoch": 18.31, "learning_rate": 6.351156069364162e-06, "loss": 3.9063, "step": 1676500 }, { "epoch": 18.32, "learning_rate": 6.350061306708706e-06, "loss": 3.899, "step": 1677000 }, { "epoch": 18.32, "learning_rate": 6.34896654405325e-06, "loss": 3.9056, "step": 1677500 }, { "epoch": 18.33, "learning_rate": 6.347871781397793e-06, "loss": 3.9006, "step": 1678000 }, { "epoch": 18.34, "learning_rate": 6.346777018742337e-06, "loss": 3.8994, "step": 1678500 }, { "epoch": 18.34, "learning_rate": 6.345682256086881e-06, "loss": 3.9046, "step": 1679000 }, { "epoch": 18.35, "learning_rate": 6.344587493431425e-06, "loss": 3.909, "step": 1679500 }, { "epoch": 18.35, "learning_rate": 6.343492730775969e-06, "loss": 3.901, "step": 1680000 }, { "epoch": 18.35, "eval_accuracy": 0.3872235282097076, "eval_loss": 3.6707475185394287, "eval_runtime": 352.873, "eval_samples_per_second": 873.844, "eval_steps_per_second": 13.656, "step": 1680000 }, { "epoch": 18.36, "learning_rate": 6.3423979681205115e-06, "loss": 3.902, "step": 1680500 }, { "epoch": 18.36, "learning_rate": 6.341303205465056e-06, "loss": 3.9031, "step": 1681000 }, { "epoch": 18.37, "learning_rate": 6.340208442809599e-06, "loss": 3.8978, "step": 1681500 }, { "epoch": 18.37, "learning_rate": 6.339113680154143e-06, "loss": 3.9122, "step": 1682000 }, { "epoch": 18.38, "learning_rate": 6.338018917498687e-06, "loss": 3.9101, "step": 1682500 }, { "epoch": 18.38, "learning_rate": 6.33692415484323e-06, "loss": 3.8983, "step": 1683000 }, { "epoch": 18.39, "learning_rate": 6.335829392187775e-06, "loss": 3.9012, "step": 1683500 }, { "epoch": 18.4, "learning_rate": 6.334734629532318e-06, "loss": 3.9031, "step": 1684000 }, { "epoch": 18.4, "learning_rate": 6.333639866876861e-06, "loss": 3.905, "step": 1684500 }, { "epoch": 18.41, "learning_rate": 6.332545104221405e-06, "loss": 3.9061, "step": 1685000 }, { "epoch": 18.41, "learning_rate": 6.331450341565949e-06, "loss": 3.9003, "step": 1685500 }, { "epoch": 18.42, "learning_rate": 6.3303555789104935e-06, "loss": 3.8996, "step": 1686000 }, { "epoch": 18.42, "learning_rate": 6.329260816255036e-06, "loss": 3.9036, "step": 1686500 }, { "epoch": 18.43, "learning_rate": 6.32816605359958e-06, "loss": 3.8943, "step": 1687000 }, { "epoch": 18.43, "learning_rate": 6.327071290944123e-06, "loss": 3.9016, "step": 1687500 }, { "epoch": 18.44, "learning_rate": 6.325976528288668e-06, "loss": 3.8999, "step": 1688000 }, { "epoch": 18.44, "learning_rate": 6.324881765633211e-06, "loss": 3.8997, "step": 1688500 }, { "epoch": 18.45, "learning_rate": 6.323787002977755e-06, "loss": 3.9063, "step": 1689000 }, { "epoch": 18.46, "learning_rate": 6.322692240322299e-06, "loss": 3.9046, "step": 1689500 }, { "epoch": 18.46, "learning_rate": 6.321597477666842e-06, "loss": 3.9076, "step": 1690000 }, { "epoch": 18.47, "learning_rate": 6.320502715011386e-06, "loss": 3.9046, "step": 1690500 }, { "epoch": 18.47, "learning_rate": 6.319407952355929e-06, "loss": 3.8971, "step": 1691000 }, { "epoch": 18.48, "learning_rate": 6.318313189700474e-06, "loss": 3.9078, "step": 1691500 }, { "epoch": 18.48, "learning_rate": 6.3172184270450176e-06, "loss": 3.9011, "step": 1692000 }, { "epoch": 18.49, "learning_rate": 6.3161236643895605e-06, "loss": 3.9066, "step": 1692500 }, { "epoch": 18.49, "learning_rate": 6.315028901734105e-06, "loss": 3.8958, "step": 1693000 }, { "epoch": 18.5, "learning_rate": 6.313934139078648e-06, "loss": 3.9001, "step": 1693500 }, { "epoch": 18.5, "learning_rate": 6.3128393764231925e-06, "loss": 3.9033, "step": 1694000 }, { "epoch": 18.51, "learning_rate": 6.3117446137677354e-06, "loss": 3.9037, "step": 1694500 }, { "epoch": 18.52, "learning_rate": 6.310649851112279e-06, "loss": 3.9089, "step": 1695000 }, { "epoch": 18.52, "learning_rate": 6.309555088456824e-06, "loss": 3.9082, "step": 1695500 }, { "epoch": 18.53, "learning_rate": 6.308460325801367e-06, "loss": 3.8976, "step": 1696000 }, { "epoch": 18.53, "learning_rate": 6.30736556314591e-06, "loss": 3.9084, "step": 1696500 }, { "epoch": 18.54, "learning_rate": 6.306270800490454e-06, "loss": 3.9059, "step": 1697000 }, { "epoch": 18.54, "learning_rate": 6.305176037834998e-06, "loss": 3.8994, "step": 1697500 }, { "epoch": 18.55, "learning_rate": 6.304081275179541e-06, "loss": 3.9011, "step": 1698000 }, { "epoch": 18.55, "learning_rate": 6.302986512524085e-06, "loss": 3.9084, "step": 1698500 }, { "epoch": 18.56, "learning_rate": 6.301891749868629e-06, "loss": 3.8992, "step": 1699000 }, { "epoch": 18.56, "learning_rate": 6.300796987213173e-06, "loss": 3.9027, "step": 1699500 }, { "epoch": 18.57, "learning_rate": 6.299702224557717e-06, "loss": 3.9054, "step": 1700000 }, { "epoch": 18.58, "learning_rate": 6.2986074619022595e-06, "loss": 3.9017, "step": 1700500 }, { "epoch": 18.58, "learning_rate": 6.297512699246804e-06, "loss": 3.9042, "step": 1701000 }, { "epoch": 18.59, "learning_rate": 6.296417936591348e-06, "loss": 3.9025, "step": 1701500 }, { "epoch": 18.59, "learning_rate": 6.295323173935891e-06, "loss": 3.9052, "step": 1702000 }, { "epoch": 18.6, "learning_rate": 6.294228411280435e-06, "loss": 3.8997, "step": 1702500 }, { "epoch": 18.6, "learning_rate": 6.293133648624978e-06, "loss": 3.8992, "step": 1703000 }, { "epoch": 18.61, "learning_rate": 6.292038885969523e-06, "loss": 3.8974, "step": 1703500 }, { "epoch": 18.61, "learning_rate": 6.290944123314066e-06, "loss": 3.903, "step": 1704000 }, { "epoch": 18.62, "learning_rate": 6.2898493606586094e-06, "loss": 3.8966, "step": 1704500 }, { "epoch": 18.62, "learning_rate": 6.288754598003154e-06, "loss": 3.9039, "step": 1705000 }, { "epoch": 18.63, "learning_rate": 6.287659835347697e-06, "loss": 3.9088, "step": 1705500 }, { "epoch": 18.64, "learning_rate": 6.2865650726922415e-06, "loss": 3.9073, "step": 1706000 }, { "epoch": 18.64, "learning_rate": 6.285470310036784e-06, "loss": 3.9004, "step": 1706500 }, { "epoch": 18.65, "learning_rate": 6.284375547381328e-06, "loss": 3.9005, "step": 1707000 }, { "epoch": 18.65, "learning_rate": 6.283280784725871e-06, "loss": 3.8977, "step": 1707500 }, { "epoch": 18.66, "learning_rate": 6.282186022070416e-06, "loss": 3.8967, "step": 1708000 }, { "epoch": 18.66, "learning_rate": 6.281091259414959e-06, "loss": 3.8975, "step": 1708500 }, { "epoch": 18.67, "learning_rate": 6.279996496759503e-06, "loss": 3.8958, "step": 1709000 }, { "epoch": 18.67, "learning_rate": 6.278901734104047e-06, "loss": 3.8981, "step": 1709500 }, { "epoch": 18.68, "learning_rate": 6.27780697144859e-06, "loss": 3.9015, "step": 1710000 }, { "epoch": 18.68, "eval_accuracy": 0.38728990982236305, "eval_loss": 3.6694722175598145, "eval_runtime": 350.4552, "eval_samples_per_second": 879.873, "eval_steps_per_second": 13.751, "step": 1710000 }, { "epoch": 18.69, "learning_rate": 6.276712208793134e-06, "loss": 3.9018, "step": 1710500 }, { "epoch": 18.69, "learning_rate": 6.275617446137677e-06, "loss": 3.8978, "step": 1711000 }, { "epoch": 18.7, "learning_rate": 6.274522683482222e-06, "loss": 3.9029, "step": 1711500 }, { "epoch": 18.7, "learning_rate": 6.2734279208267656e-06, "loss": 3.8977, "step": 1712000 }, { "epoch": 18.71, "learning_rate": 6.2723331581713085e-06, "loss": 3.898, "step": 1712500 }, { "epoch": 18.71, "learning_rate": 6.271238395515853e-06, "loss": 3.902, "step": 1713000 }, { "epoch": 18.72, "learning_rate": 6.270143632860396e-06, "loss": 3.9007, "step": 1713500 }, { "epoch": 18.72, "learning_rate": 6.2690488702049405e-06, "loss": 3.8933, "step": 1714000 }, { "epoch": 18.73, "learning_rate": 6.267954107549484e-06, "loss": 3.8957, "step": 1714500 }, { "epoch": 18.73, "learning_rate": 6.266859344894027e-06, "loss": 3.8996, "step": 1715000 }, { "epoch": 18.74, "learning_rate": 6.265764582238572e-06, "loss": 3.8966, "step": 1715500 }, { "epoch": 18.75, "learning_rate": 6.264669819583115e-06, "loss": 3.9029, "step": 1716000 }, { "epoch": 18.75, "learning_rate": 6.263575056927658e-06, "loss": 3.8979, "step": 1716500 }, { "epoch": 18.76, "learning_rate": 6.262480294272202e-06, "loss": 3.8986, "step": 1717000 }, { "epoch": 18.76, "learning_rate": 6.261385531616746e-06, "loss": 3.9022, "step": 1717500 }, { "epoch": 18.77, "learning_rate": 6.2602907689612905e-06, "loss": 3.9032, "step": 1718000 }, { "epoch": 18.77, "learning_rate": 6.259196006305833e-06, "loss": 3.8996, "step": 1718500 }, { "epoch": 18.78, "learning_rate": 6.258101243650377e-06, "loss": 3.903, "step": 1719000 }, { "epoch": 18.78, "learning_rate": 6.257006480994921e-06, "loss": 3.9045, "step": 1719500 }, { "epoch": 18.79, "learning_rate": 6.255911718339465e-06, "loss": 3.8954, "step": 1720000 }, { "epoch": 18.79, "learning_rate": 6.2548169556840075e-06, "loss": 3.8924, "step": 1720500 }, { "epoch": 18.8, "learning_rate": 6.253722193028552e-06, "loss": 3.8958, "step": 1721000 }, { "epoch": 18.81, "learning_rate": 6.252627430373096e-06, "loss": 3.8984, "step": 1721500 }, { "epoch": 18.81, "learning_rate": 6.251532667717639e-06, "loss": 3.9013, "step": 1722000 }, { "epoch": 18.82, "learning_rate": 6.250437905062183e-06, "loss": 3.9007, "step": 1722500 }, { "epoch": 18.82, "learning_rate": 6.249343142406726e-06, "loss": 3.8967, "step": 1723000 }, { "epoch": 18.83, "learning_rate": 6.248248379751271e-06, "loss": 3.8961, "step": 1723500 }, { "epoch": 18.83, "learning_rate": 6.2471536170958145e-06, "loss": 3.8977, "step": 1724000 }, { "epoch": 18.84, "learning_rate": 6.2460588544403574e-06, "loss": 3.8994, "step": 1724500 }, { "epoch": 18.84, "learning_rate": 6.244964091784902e-06, "loss": 3.9011, "step": 1725000 }, { "epoch": 18.85, "learning_rate": 6.243869329129445e-06, "loss": 3.8932, "step": 1725500 }, { "epoch": 18.85, "learning_rate": 6.2427745664739895e-06, "loss": 3.9041, "step": 1726000 }, { "epoch": 18.86, "learning_rate": 6.241679803818532e-06, "loss": 3.9024, "step": 1726500 }, { "epoch": 18.87, "learning_rate": 6.240585041163076e-06, "loss": 3.9036, "step": 1727000 }, { "epoch": 18.87, "learning_rate": 6.239490278507621e-06, "loss": 3.8994, "step": 1727500 }, { "epoch": 18.88, "learning_rate": 6.238395515852164e-06, "loss": 3.8994, "step": 1728000 }, { "epoch": 18.88, "learning_rate": 6.237300753196708e-06, "loss": 3.8991, "step": 1728500 }, { "epoch": 18.89, "learning_rate": 6.236205990541251e-06, "loss": 3.8967, "step": 1729000 }, { "epoch": 18.89, "learning_rate": 6.235111227885795e-06, "loss": 3.9001, "step": 1729500 }, { "epoch": 18.9, "learning_rate": 6.234016465230338e-06, "loss": 3.901, "step": 1730000 }, { "epoch": 18.9, "learning_rate": 6.232921702574882e-06, "loss": 3.8938, "step": 1730500 }, { "epoch": 18.91, "learning_rate": 6.231826939919426e-06, "loss": 3.8981, "step": 1731000 }, { "epoch": 18.91, "learning_rate": 6.23073217726397e-06, "loss": 3.9019, "step": 1731500 }, { "epoch": 18.92, "learning_rate": 6.2296374146085136e-06, "loss": 3.9064, "step": 1732000 }, { "epoch": 18.93, "learning_rate": 6.2285426519530565e-06, "loss": 3.904, "step": 1732500 }, { "epoch": 18.93, "learning_rate": 6.227447889297601e-06, "loss": 3.9001, "step": 1733000 }, { "epoch": 18.94, "learning_rate": 6.226353126642144e-06, "loss": 3.8994, "step": 1733500 }, { "epoch": 18.94, "learning_rate": 6.2252583639866885e-06, "loss": 3.9036, "step": 1734000 }, { "epoch": 18.95, "learning_rate": 6.224163601331232e-06, "loss": 3.8973, "step": 1734500 }, { "epoch": 18.95, "learning_rate": 6.223068838675775e-06, "loss": 3.8994, "step": 1735000 }, { "epoch": 18.96, "learning_rate": 6.22197407602032e-06, "loss": 3.8982, "step": 1735500 }, { "epoch": 18.96, "learning_rate": 6.220879313364863e-06, "loss": 3.894, "step": 1736000 }, { "epoch": 18.97, "learning_rate": 6.219784550709406e-06, "loss": 3.8981, "step": 1736500 }, { "epoch": 18.97, "learning_rate": 6.218689788053951e-06, "loss": 3.8973, "step": 1737000 }, { "epoch": 18.98, "learning_rate": 6.217595025398494e-06, "loss": 3.8979, "step": 1737500 }, { "epoch": 18.99, "learning_rate": 6.2165002627430385e-06, "loss": 3.896, "step": 1738000 }, { "epoch": 18.99, "learning_rate": 6.215405500087581e-06, "loss": 3.8991, "step": 1738500 }, { "epoch": 19.0, "learning_rate": 6.214310737432125e-06, "loss": 3.9004, "step": 1739000 }, { "epoch": 19.0, "learning_rate": 6.213215974776669e-06, "loss": 3.8986, "step": 1739500 }, { "epoch": 19.01, "learning_rate": 6.212121212121213e-06, "loss": 3.8987, "step": 1740000 }, { "epoch": 19.01, "eval_accuracy": 0.3876880126513826, "eval_loss": 3.6671955585479736, "eval_runtime": 353.6881, "eval_samples_per_second": 871.83, "eval_steps_per_second": 13.625, "step": 1740000 }, { "epoch": 19.01, "learning_rate": 6.211026449465757e-06, "loss": 3.8954, "step": 1740500 }, { "epoch": 19.02, "learning_rate": 6.2099316868103e-06, "loss": 3.8949, "step": 1741000 }, { "epoch": 19.02, "learning_rate": 6.208836924154844e-06, "loss": 3.8928, "step": 1741500 }, { "epoch": 19.03, "learning_rate": 6.207742161499387e-06, "loss": 3.8953, "step": 1742000 }, { "epoch": 19.03, "learning_rate": 6.206647398843931e-06, "loss": 3.8992, "step": 1742500 }, { "epoch": 19.04, "learning_rate": 6.205552636188474e-06, "loss": 3.9029, "step": 1743000 }, { "epoch": 19.05, "learning_rate": 6.204457873533019e-06, "loss": 3.8976, "step": 1743500 }, { "epoch": 19.05, "learning_rate": 6.2033631108775625e-06, "loss": 3.8949, "step": 1744000 }, { "epoch": 19.06, "learning_rate": 6.2022683482221054e-06, "loss": 3.895, "step": 1744500 }, { "epoch": 19.06, "learning_rate": 6.20117358556665e-06, "loss": 3.8986, "step": 1745000 }, { "epoch": 19.07, "learning_rate": 6.200078822911193e-06, "loss": 3.8992, "step": 1745500 }, { "epoch": 19.07, "learning_rate": 6.1989840602557375e-06, "loss": 3.8984, "step": 1746000 }, { "epoch": 19.08, "learning_rate": 6.19788929760028e-06, "loss": 3.8964, "step": 1746500 }, { "epoch": 19.08, "learning_rate": 6.196794534944824e-06, "loss": 3.9027, "step": 1747000 }, { "epoch": 19.09, "learning_rate": 6.195699772289369e-06, "loss": 3.897, "step": 1747500 }, { "epoch": 19.09, "learning_rate": 6.194605009633912e-06, "loss": 3.9023, "step": 1748000 }, { "epoch": 19.1, "learning_rate": 6.193510246978456e-06, "loss": 3.898, "step": 1748500 }, { "epoch": 19.11, "learning_rate": 6.192415484322999e-06, "loss": 3.9006, "step": 1749000 }, { "epoch": 19.11, "learning_rate": 6.191320721667543e-06, "loss": 3.8954, "step": 1749500 }, { "epoch": 19.12, "learning_rate": 6.1902259590120875e-06, "loss": 3.8958, "step": 1750000 }, { "epoch": 19.12, "learning_rate": 6.18913119635663e-06, "loss": 3.8938, "step": 1750500 }, { "epoch": 19.13, "learning_rate": 6.188036433701174e-06, "loss": 3.9003, "step": 1751000 }, { "epoch": 19.13, "learning_rate": 6.186941671045718e-06, "loss": 3.8958, "step": 1751500 }, { "epoch": 19.14, "learning_rate": 6.1858469083902616e-06, "loss": 3.8923, "step": 1752000 }, { "epoch": 19.14, "learning_rate": 6.1847521457348045e-06, "loss": 3.8971, "step": 1752500 }, { "epoch": 19.15, "learning_rate": 6.183657383079349e-06, "loss": 3.8947, "step": 1753000 }, { "epoch": 19.15, "learning_rate": 6.182562620423893e-06, "loss": 3.9014, "step": 1753500 }, { "epoch": 19.16, "learning_rate": 6.1814678577684365e-06, "loss": 3.9038, "step": 1754000 }, { "epoch": 19.17, "learning_rate": 6.18037309511298e-06, "loss": 3.8962, "step": 1754500 }, { "epoch": 19.17, "learning_rate": 6.179278332457523e-06, "loss": 3.8915, "step": 1755000 }, { "epoch": 19.18, "learning_rate": 6.178183569802068e-06, "loss": 3.9013, "step": 1755500 }, { "epoch": 19.18, "learning_rate": 6.177088807146611e-06, "loss": 3.9001, "step": 1756000 }, { "epoch": 19.19, "learning_rate": 6.175994044491154e-06, "loss": 3.8944, "step": 1756500 }, { "epoch": 19.19, "learning_rate": 6.174899281835699e-06, "loss": 3.8993, "step": 1757000 }, { "epoch": 19.2, "learning_rate": 6.173804519180242e-06, "loss": 3.9003, "step": 1757500 }, { "epoch": 19.2, "learning_rate": 6.1727097565247865e-06, "loss": 3.9007, "step": 1758000 }, { "epoch": 19.21, "learning_rate": 6.171614993869329e-06, "loss": 3.8952, "step": 1758500 }, { "epoch": 19.21, "learning_rate": 6.170520231213873e-06, "loss": 3.8966, "step": 1759000 }, { "epoch": 19.22, "learning_rate": 6.169425468558417e-06, "loss": 3.8929, "step": 1759500 }, { "epoch": 19.23, "learning_rate": 6.168330705902961e-06, "loss": 3.8992, "step": 1760000 }, { "epoch": 19.23, "learning_rate": 6.167235943247505e-06, "loss": 3.8962, "step": 1760500 }, { "epoch": 19.24, "learning_rate": 6.166141180592048e-06, "loss": 3.8946, "step": 1761000 }, { "epoch": 19.24, "learning_rate": 6.165046417936592e-06, "loss": 3.8971, "step": 1761500 }, { "epoch": 19.25, "learning_rate": 6.163951655281135e-06, "loss": 3.8928, "step": 1762000 }, { "epoch": 19.25, "learning_rate": 6.162856892625679e-06, "loss": 3.8956, "step": 1762500 }, { "epoch": 19.26, "learning_rate": 6.161762129970224e-06, "loss": 3.894, "step": 1763000 }, { "epoch": 19.26, "learning_rate": 6.160667367314767e-06, "loss": 3.8996, "step": 1763500 }, { "epoch": 19.27, "learning_rate": 6.1595726046593105e-06, "loss": 3.8982, "step": 1764000 }, { "epoch": 19.27, "learning_rate": 6.1584778420038534e-06, "loss": 3.9006, "step": 1764500 }, { "epoch": 19.28, "learning_rate": 6.157383079348398e-06, "loss": 3.8925, "step": 1765000 }, { "epoch": 19.29, "learning_rate": 6.156288316692941e-06, "loss": 3.8985, "step": 1765500 }, { "epoch": 19.29, "learning_rate": 6.1551935540374855e-06, "loss": 3.9012, "step": 1766000 }, { "epoch": 19.3, "learning_rate": 6.154098791382029e-06, "loss": 3.8947, "step": 1766500 }, { "epoch": 19.3, "learning_rate": 6.153004028726572e-06, "loss": 3.8922, "step": 1767000 }, { "epoch": 19.31, "learning_rate": 6.151909266071117e-06, "loss": 3.8974, "step": 1767500 }, { "epoch": 19.31, "learning_rate": 6.15081450341566e-06, "loss": 3.8959, "step": 1768000 }, { "epoch": 19.32, "learning_rate": 6.149719740760204e-06, "loss": 3.8965, "step": 1768500 }, { "epoch": 19.32, "learning_rate": 6.148624978104747e-06, "loss": 3.902, "step": 1769000 }, { "epoch": 19.33, "learning_rate": 6.147530215449291e-06, "loss": 3.8941, "step": 1769500 }, { "epoch": 19.33, "learning_rate": 6.1464354527938355e-06, "loss": 3.8929, "step": 1770000 }, { "epoch": 19.33, "eval_accuracy": 0.38775617922349437, "eval_loss": 3.6647467613220215, "eval_runtime": 351.7413, "eval_samples_per_second": 876.656, "eval_steps_per_second": 13.7, "step": 1770000 }, { "epoch": 19.34, "learning_rate": 6.145340690138378e-06, "loss": 3.8952, "step": 1770500 }, { "epoch": 19.35, "learning_rate": 6.144245927482922e-06, "loss": 3.8947, "step": 1771000 }, { "epoch": 19.35, "learning_rate": 6.143151164827466e-06, "loss": 3.8843, "step": 1771500 }, { "epoch": 19.36, "learning_rate": 6.14205640217201e-06, "loss": 3.8964, "step": 1772000 }, { "epoch": 19.36, "learning_rate": 6.1409616395165525e-06, "loss": 3.902, "step": 1772500 }, { "epoch": 19.37, "learning_rate": 6.139866876861097e-06, "loss": 3.8965, "step": 1773000 }, { "epoch": 19.37, "learning_rate": 6.138772114205641e-06, "loss": 3.8914, "step": 1773500 }, { "epoch": 19.38, "learning_rate": 6.1376773515501846e-06, "loss": 3.89, "step": 1774000 }, { "epoch": 19.38, "learning_rate": 6.136582588894728e-06, "loss": 3.9018, "step": 1774500 }, { "epoch": 19.39, "learning_rate": 6.135487826239271e-06, "loss": 3.8915, "step": 1775000 }, { "epoch": 19.4, "learning_rate": 6.134393063583816e-06, "loss": 3.8966, "step": 1775500 }, { "epoch": 19.4, "learning_rate": 6.1332983009283595e-06, "loss": 3.8978, "step": 1776000 }, { "epoch": 19.41, "learning_rate": 6.132203538272902e-06, "loss": 3.8954, "step": 1776500 }, { "epoch": 19.41, "learning_rate": 6.131108775617447e-06, "loss": 3.8926, "step": 1777000 }, { "epoch": 19.42, "learning_rate": 6.13001401296199e-06, "loss": 3.8939, "step": 1777500 }, { "epoch": 19.42, "learning_rate": 6.1289192503065345e-06, "loss": 3.8956, "step": 1778000 }, { "epoch": 19.43, "learning_rate": 6.127824487651077e-06, "loss": 3.8998, "step": 1778500 }, { "epoch": 19.43, "learning_rate": 6.126729724995621e-06, "loss": 3.8965, "step": 1779000 }, { "epoch": 19.44, "learning_rate": 6.125634962340166e-06, "loss": 3.8905, "step": 1779500 }, { "epoch": 19.44, "learning_rate": 6.124540199684709e-06, "loss": 3.8925, "step": 1780000 }, { "epoch": 19.45, "learning_rate": 6.123445437029253e-06, "loss": 3.8992, "step": 1780500 }, { "epoch": 19.46, "learning_rate": 6.122350674373796e-06, "loss": 3.8989, "step": 1781000 }, { "epoch": 19.46, "learning_rate": 6.12125591171834e-06, "loss": 3.895, "step": 1781500 }, { "epoch": 19.47, "learning_rate": 6.120161149062884e-06, "loss": 3.8956, "step": 1782000 }, { "epoch": 19.47, "learning_rate": 6.119066386407427e-06, "loss": 3.8951, "step": 1782500 }, { "epoch": 19.48, "learning_rate": 6.117971623751972e-06, "loss": 3.8941, "step": 1783000 }, { "epoch": 19.48, "learning_rate": 6.116876861096515e-06, "loss": 3.8994, "step": 1783500 }, { "epoch": 19.49, "learning_rate": 6.1157820984410586e-06, "loss": 3.8987, "step": 1784000 }, { "epoch": 19.49, "learning_rate": 6.1146873357856014e-06, "loss": 3.8978, "step": 1784500 }, { "epoch": 19.5, "learning_rate": 6.113592573130146e-06, "loss": 3.8893, "step": 1785000 }, { "epoch": 19.5, "learning_rate": 6.112497810474689e-06, "loss": 3.8954, "step": 1785500 }, { "epoch": 19.51, "learning_rate": 6.1114030478192335e-06, "loss": 3.8993, "step": 1786000 }, { "epoch": 19.52, "learning_rate": 6.110308285163777e-06, "loss": 3.8986, "step": 1786500 }, { "epoch": 19.52, "learning_rate": 6.10921352250832e-06, "loss": 3.8898, "step": 1787000 }, { "epoch": 19.53, "learning_rate": 6.108118759852865e-06, "loss": 3.8925, "step": 1787500 }, { "epoch": 19.53, "learning_rate": 6.107023997197408e-06, "loss": 3.8949, "step": 1788000 }, { "epoch": 19.54, "learning_rate": 6.105929234541952e-06, "loss": 3.8903, "step": 1788500 }, { "epoch": 19.54, "learning_rate": 6.104834471886496e-06, "loss": 3.8908, "step": 1789000 }, { "epoch": 19.55, "learning_rate": 6.103739709231039e-06, "loss": 3.892, "step": 1789500 }, { "epoch": 19.55, "learning_rate": 6.1026449465755835e-06, "loss": 3.8913, "step": 1790000 }, { "epoch": 19.56, "learning_rate": 6.101550183920126e-06, "loss": 3.893, "step": 1790500 }, { "epoch": 19.56, "learning_rate": 6.10045542126467e-06, "loss": 3.8957, "step": 1791000 }, { "epoch": 19.57, "learning_rate": 6.099360658609214e-06, "loss": 3.8913, "step": 1791500 }, { "epoch": 19.58, "learning_rate": 6.098265895953758e-06, "loss": 3.8952, "step": 1792000 }, { "epoch": 19.58, "learning_rate": 6.097171133298302e-06, "loss": 3.8938, "step": 1792500 }, { "epoch": 19.59, "learning_rate": 6.096076370642845e-06, "loss": 3.8958, "step": 1793000 }, { "epoch": 19.59, "learning_rate": 6.094981607987389e-06, "loss": 3.8951, "step": 1793500 }, { "epoch": 19.6, "learning_rate": 6.0938868453319326e-06, "loss": 3.8959, "step": 1794000 }, { "epoch": 19.6, "learning_rate": 6.092792082676476e-06, "loss": 3.8919, "step": 1794500 }, { "epoch": 19.61, "learning_rate": 6.091697320021019e-06, "loss": 3.8916, "step": 1795000 }, { "epoch": 19.61, "learning_rate": 6.090602557365564e-06, "loss": 3.8925, "step": 1795500 }, { "epoch": 19.62, "learning_rate": 6.0895077947101075e-06, "loss": 3.894, "step": 1796000 }, { "epoch": 19.62, "learning_rate": 6.08841303205465e-06, "loss": 3.8903, "step": 1796500 }, { "epoch": 19.63, "learning_rate": 6.087318269399195e-06, "loss": 3.8955, "step": 1797000 }, { "epoch": 19.64, "learning_rate": 6.086223506743738e-06, "loss": 3.8983, "step": 1797500 }, { "epoch": 19.64, "learning_rate": 6.0851287440882825e-06, "loss": 3.8888, "step": 1798000 }, { "epoch": 19.65, "learning_rate": 6.084033981432825e-06, "loss": 3.8919, "step": 1798500 }, { "epoch": 19.65, "learning_rate": 6.082939218777369e-06, "loss": 3.8914, "step": 1799000 }, { "epoch": 19.66, "learning_rate": 6.081844456121914e-06, "loss": 3.8977, "step": 1799500 }, { "epoch": 19.66, "learning_rate": 6.080749693466457e-06, "loss": 3.892, "step": 1800000 }, { "epoch": 19.66, "eval_accuracy": 0.3883897021930646, "eval_loss": 3.6609106063842773, "eval_runtime": 355.8534, "eval_samples_per_second": 866.525, "eval_steps_per_second": 13.542, "step": 1800000 }, { "epoch": 19.67, "learning_rate": 6.079654930811001e-06, "loss": 3.8923, "step": 1800500 }, { "epoch": 19.67, "learning_rate": 6.078560168155544e-06, "loss": 3.8866, "step": 1801000 }, { "epoch": 19.68, "learning_rate": 6.077465405500088e-06, "loss": 3.8937, "step": 1801500 }, { "epoch": 19.68, "learning_rate": 6.0763706428446324e-06, "loss": 3.8949, "step": 1802000 }, { "epoch": 19.69, "learning_rate": 6.075275880189175e-06, "loss": 3.8849, "step": 1802500 }, { "epoch": 19.7, "learning_rate": 6.07418111753372e-06, "loss": 3.8898, "step": 1803000 }, { "epoch": 19.7, "learning_rate": 6.073086354878263e-06, "loss": 3.8959, "step": 1803500 }, { "epoch": 19.71, "learning_rate": 6.0719915922228066e-06, "loss": 3.8904, "step": 1804000 }, { "epoch": 19.71, "learning_rate": 6.0708968295673495e-06, "loss": 3.8925, "step": 1804500 }, { "epoch": 19.72, "learning_rate": 6.069802066911894e-06, "loss": 3.8879, "step": 1805000 }, { "epoch": 19.72, "learning_rate": 6.068707304256438e-06, "loss": 3.8897, "step": 1805500 }, { "epoch": 19.73, "learning_rate": 6.0676125416009815e-06, "loss": 3.8977, "step": 1806000 }, { "epoch": 19.73, "learning_rate": 6.066517778945525e-06, "loss": 3.8984, "step": 1806500 }, { "epoch": 19.74, "learning_rate": 6.065423016290068e-06, "loss": 3.8905, "step": 1807000 }, { "epoch": 19.74, "learning_rate": 6.064328253634613e-06, "loss": 3.8898, "step": 1807500 }, { "epoch": 19.75, "learning_rate": 6.063233490979156e-06, "loss": 3.8932, "step": 1808000 }, { "epoch": 19.76, "learning_rate": 6.0621387283237e-06, "loss": 3.893, "step": 1808500 }, { "epoch": 19.76, "learning_rate": 6.061043965668244e-06, "loss": 3.894, "step": 1809000 }, { "epoch": 19.77, "learning_rate": 6.059949203012787e-06, "loss": 3.8922, "step": 1809500 }, { "epoch": 19.77, "learning_rate": 6.0588544403573315e-06, "loss": 3.8931, "step": 1810000 }, { "epoch": 19.78, "learning_rate": 6.057759677701874e-06, "loss": 3.8906, "step": 1810500 }, { "epoch": 19.78, "learning_rate": 6.056664915046418e-06, "loss": 3.8912, "step": 1811000 }, { "epoch": 19.79, "learning_rate": 6.055570152390963e-06, "loss": 3.891, "step": 1811500 }, { "epoch": 19.79, "learning_rate": 6.054475389735506e-06, "loss": 3.8815, "step": 1812000 }, { "epoch": 19.8, "learning_rate": 6.05338062708005e-06, "loss": 3.8871, "step": 1812500 }, { "epoch": 19.8, "learning_rate": 6.052285864424593e-06, "loss": 3.891, "step": 1813000 }, { "epoch": 19.81, "learning_rate": 6.051191101769137e-06, "loss": 3.8929, "step": 1813500 }, { "epoch": 19.82, "learning_rate": 6.0500963391136806e-06, "loss": 3.8947, "step": 1814000 }, { "epoch": 19.82, "learning_rate": 6.049001576458224e-06, "loss": 3.8959, "step": 1814500 }, { "epoch": 19.83, "learning_rate": 6.047906813802769e-06, "loss": 3.8856, "step": 1815000 }, { "epoch": 19.83, "learning_rate": 6.046812051147312e-06, "loss": 3.895, "step": 1815500 }, { "epoch": 19.84, "learning_rate": 6.0457172884918555e-06, "loss": 3.8896, "step": 1816000 }, { "epoch": 19.84, "learning_rate": 6.044622525836399e-06, "loss": 3.8962, "step": 1816500 }, { "epoch": 19.85, "learning_rate": 6.043527763180943e-06, "loss": 3.8916, "step": 1817000 }, { "epoch": 19.85, "learning_rate": 6.042433000525486e-06, "loss": 3.8921, "step": 1817500 }, { "epoch": 19.86, "learning_rate": 6.0413382378700305e-06, "loss": 3.8864, "step": 1818000 }, { "epoch": 19.86, "learning_rate": 6.040243475214574e-06, "loss": 3.891, "step": 1818500 }, { "epoch": 19.87, "learning_rate": 6.039148712559117e-06, "loss": 3.8882, "step": 1819000 }, { "epoch": 19.88, "learning_rate": 6.038053949903662e-06, "loss": 3.892, "step": 1819500 }, { "epoch": 19.88, "learning_rate": 6.036959187248205e-06, "loss": 3.8903, "step": 1820000 }, { "epoch": 19.89, "learning_rate": 6.035864424592749e-06, "loss": 3.8896, "step": 1820500 }, { "epoch": 19.89, "learning_rate": 6.034769661937292e-06, "loss": 3.8892, "step": 1821000 }, { "epoch": 19.9, "learning_rate": 6.033674899281836e-06, "loss": 3.8906, "step": 1821500 }, { "epoch": 19.9, "learning_rate": 6.0325801366263804e-06, "loss": 3.8896, "step": 1822000 }, { "epoch": 19.91, "learning_rate": 6.031485373970923e-06, "loss": 3.8905, "step": 1822500 }, { "epoch": 19.91, "learning_rate": 6.030390611315468e-06, "loss": 3.8901, "step": 1823000 }, { "epoch": 19.92, "learning_rate": 6.029295848660011e-06, "loss": 3.8878, "step": 1823500 }, { "epoch": 19.92, "learning_rate": 6.0282010860045546e-06, "loss": 3.8916, "step": 1824000 }, { "epoch": 19.93, "learning_rate": 6.027106323349099e-06, "loss": 3.896, "step": 1824500 }, { "epoch": 19.94, "learning_rate": 6.026011560693642e-06, "loss": 3.889, "step": 1825000 }, { "epoch": 19.94, "learning_rate": 6.024916798038186e-06, "loss": 3.8987, "step": 1825500 }, { "epoch": 19.95, "learning_rate": 6.0238220353827295e-06, "loss": 3.8887, "step": 1826000 }, { "epoch": 19.95, "learning_rate": 6.022727272727273e-06, "loss": 3.899, "step": 1826500 }, { "epoch": 19.96, "learning_rate": 6.021632510071816e-06, "loss": 3.8932, "step": 1827000 }, { "epoch": 19.96, "learning_rate": 6.020537747416361e-06, "loss": 3.8892, "step": 1827500 }, { "epoch": 19.97, "learning_rate": 6.0194429847609045e-06, "loss": 3.8916, "step": 1828000 }, { "epoch": 19.97, "learning_rate": 6.018348222105448e-06, "loss": 3.8849, "step": 1828500 }, { "epoch": 19.98, "learning_rate": 6.017253459449992e-06, "loss": 3.8915, "step": 1829000 }, { "epoch": 19.98, "learning_rate": 6.016158696794535e-06, "loss": 3.8913, "step": 1829500 }, { "epoch": 19.99, "learning_rate": 6.0150639341390795e-06, "loss": 3.8906, "step": 1830000 }, { "epoch": 19.99, "eval_accuracy": 0.3886366541941414, "eval_loss": 3.6594605445861816, "eval_runtime": 352.2542, "eval_samples_per_second": 875.379, "eval_steps_per_second": 13.68, "step": 1830000 }, { "epoch": 20.0, "learning_rate": 6.013969171483622e-06, "loss": 3.891, "step": 1830500 }, { "epoch": 20.0, "learning_rate": 6.012874408828166e-06, "loss": 3.8869, "step": 1831000 }, { "epoch": 20.01, "learning_rate": 6.011779646172711e-06, "loss": 3.8943, "step": 1831500 }, { "epoch": 20.01, "learning_rate": 6.010684883517254e-06, "loss": 3.8902, "step": 1832000 }, { "epoch": 20.02, "learning_rate": 6.009590120861798e-06, "loss": 3.887, "step": 1832500 }, { "epoch": 20.02, "learning_rate": 6.008495358206341e-06, "loss": 3.8921, "step": 1833000 }, { "epoch": 20.03, "learning_rate": 6.007400595550885e-06, "loss": 3.894, "step": 1833500 }, { "epoch": 20.03, "learning_rate": 6.0063058328954286e-06, "loss": 3.8946, "step": 1834000 }, { "epoch": 20.04, "learning_rate": 6.005211070239972e-06, "loss": 3.8913, "step": 1834500 }, { "epoch": 20.05, "learning_rate": 6.004116307584517e-06, "loss": 3.884, "step": 1835000 }, { "epoch": 20.05, "learning_rate": 6.00302154492906e-06, "loss": 3.8903, "step": 1835500 }, { "epoch": 20.06, "learning_rate": 6.0019267822736035e-06, "loss": 3.8929, "step": 1836000 }, { "epoch": 20.06, "learning_rate": 6.000832019618147e-06, "loss": 3.8883, "step": 1836500 }, { "epoch": 20.07, "learning_rate": 5.999737256962691e-06, "loss": 3.8891, "step": 1837000 }, { "epoch": 20.07, "learning_rate": 5.998642494307236e-06, "loss": 3.891, "step": 1837500 }, { "epoch": 20.08, "learning_rate": 5.9975477316517785e-06, "loss": 3.8976, "step": 1838000 }, { "epoch": 20.08, "learning_rate": 5.996452968996322e-06, "loss": 3.898, "step": 1838500 }, { "epoch": 20.09, "learning_rate": 5.995358206340865e-06, "loss": 3.8834, "step": 1839000 }, { "epoch": 20.09, "learning_rate": 5.99426344368541e-06, "loss": 3.8825, "step": 1839500 }, { "epoch": 20.1, "learning_rate": 5.993168681029953e-06, "loss": 3.8956, "step": 1840000 }, { "epoch": 20.11, "learning_rate": 5.992073918374497e-06, "loss": 3.8882, "step": 1840500 }, { "epoch": 20.11, "learning_rate": 5.990979155719041e-06, "loss": 3.8904, "step": 1841000 }, { "epoch": 20.12, "learning_rate": 5.989884393063584e-06, "loss": 3.8975, "step": 1841500 }, { "epoch": 20.12, "learning_rate": 5.9887896304081284e-06, "loss": 3.8924, "step": 1842000 }, { "epoch": 20.13, "learning_rate": 5.987694867752671e-06, "loss": 3.889, "step": 1842500 }, { "epoch": 20.13, "learning_rate": 5.986600105097216e-06, "loss": 3.8885, "step": 1843000 }, { "epoch": 20.14, "learning_rate": 5.985505342441759e-06, "loss": 3.8933, "step": 1843500 }, { "epoch": 20.14, "learning_rate": 5.9844105797863026e-06, "loss": 3.8937, "step": 1844000 }, { "epoch": 20.15, "learning_rate": 5.983315817130847e-06, "loss": 3.8929, "step": 1844500 }, { "epoch": 20.15, "learning_rate": 5.98222105447539e-06, "loss": 3.8916, "step": 1845000 }, { "epoch": 20.16, "learning_rate": 5.981126291819934e-06, "loss": 3.8871, "step": 1845500 }, { "epoch": 20.17, "learning_rate": 5.9800315291644775e-06, "loss": 3.8904, "step": 1846000 }, { "epoch": 20.17, "learning_rate": 5.978936766509021e-06, "loss": 3.8926, "step": 1846500 }, { "epoch": 20.18, "learning_rate": 5.977842003853564e-06, "loss": 3.8947, "step": 1847000 }, { "epoch": 20.18, "learning_rate": 5.976747241198109e-06, "loss": 3.8915, "step": 1847500 }, { "epoch": 20.19, "learning_rate": 5.9756524785426525e-06, "loss": 3.8917, "step": 1848000 }, { "epoch": 20.19, "learning_rate": 5.974557715887196e-06, "loss": 3.8816, "step": 1848500 }, { "epoch": 20.2, "learning_rate": 5.97346295323174e-06, "loss": 3.8938, "step": 1849000 }, { "epoch": 20.2, "learning_rate": 5.972368190576283e-06, "loss": 3.8973, "step": 1849500 }, { "epoch": 20.21, "learning_rate": 5.9712734279208275e-06, "loss": 3.8864, "step": 1850000 }, { "epoch": 20.21, "learning_rate": 5.970178665265371e-06, "loss": 3.8891, "step": 1850500 }, { "epoch": 20.22, "learning_rate": 5.969083902609915e-06, "loss": 3.8937, "step": 1851000 }, { "epoch": 20.23, "learning_rate": 5.967989139954459e-06, "loss": 3.8905, "step": 1851500 }, { "epoch": 20.23, "learning_rate": 5.966894377299002e-06, "loss": 3.8902, "step": 1852000 }, { "epoch": 20.24, "learning_rate": 5.965799614643546e-06, "loss": 3.8892, "step": 1852500 }, { "epoch": 20.24, "learning_rate": 5.964704851988089e-06, "loss": 3.8932, "step": 1853000 }, { "epoch": 20.25, "learning_rate": 5.963610089332633e-06, "loss": 3.8863, "step": 1853500 }, { "epoch": 20.25, "learning_rate": 5.962515326677177e-06, "loss": 3.8936, "step": 1854000 }, { "epoch": 20.26, "learning_rate": 5.96142056402172e-06, "loss": 3.8912, "step": 1854500 }, { "epoch": 20.26, "learning_rate": 5.960325801366265e-06, "loss": 3.8826, "step": 1855000 }, { "epoch": 20.27, "learning_rate": 5.959231038710808e-06, "loss": 3.8895, "step": 1855500 }, { "epoch": 20.27, "learning_rate": 5.9581362760553515e-06, "loss": 3.8985, "step": 1856000 }, { "epoch": 20.28, "learning_rate": 5.957041513399895e-06, "loss": 3.8958, "step": 1856500 }, { "epoch": 20.29, "learning_rate": 5.955946750744439e-06, "loss": 3.8896, "step": 1857000 }, { "epoch": 20.29, "learning_rate": 5.954851988088984e-06, "loss": 3.8929, "step": 1857500 }, { "epoch": 20.3, "learning_rate": 5.9537572254335265e-06, "loss": 3.8894, "step": 1858000 }, { "epoch": 20.3, "learning_rate": 5.95266246277807e-06, "loss": 3.8879, "step": 1858500 }, { "epoch": 20.31, "learning_rate": 5.951567700122613e-06, "loss": 3.889, "step": 1859000 }, { "epoch": 20.31, "learning_rate": 5.950472937467158e-06, "loss": 3.8812, "step": 1859500 }, { "epoch": 20.32, "learning_rate": 5.949378174811701e-06, "loss": 3.8923, "step": 1860000 }, { "epoch": 20.32, "eval_accuracy": 0.38845568346944853, "eval_loss": 3.659417152404785, "eval_runtime": 353.5165, "eval_samples_per_second": 872.253, "eval_steps_per_second": 13.632, "step": 1860000 }, { "epoch": 20.32, "learning_rate": 5.948283412156245e-06, "loss": 3.8862, "step": 1860500 }, { "epoch": 20.33, "learning_rate": 5.947188649500789e-06, "loss": 3.8816, "step": 1861000 }, { "epoch": 20.33, "learning_rate": 5.946093886845332e-06, "loss": 3.8868, "step": 1861500 }, { "epoch": 20.34, "learning_rate": 5.9449991241898765e-06, "loss": 3.8924, "step": 1862000 }, { "epoch": 20.35, "learning_rate": 5.943904361534419e-06, "loss": 3.8918, "step": 1862500 }, { "epoch": 20.35, "learning_rate": 5.942809598878964e-06, "loss": 3.8909, "step": 1863000 }, { "epoch": 20.36, "learning_rate": 5.941714836223508e-06, "loss": 3.8842, "step": 1863500 }, { "epoch": 20.36, "learning_rate": 5.9406200735680506e-06, "loss": 3.8857, "step": 1864000 }, { "epoch": 20.37, "learning_rate": 5.939525310912595e-06, "loss": 3.8948, "step": 1864500 }, { "epoch": 20.37, "learning_rate": 5.938430548257138e-06, "loss": 3.8799, "step": 1865000 }, { "epoch": 20.38, "learning_rate": 5.937335785601682e-06, "loss": 3.8878, "step": 1865500 }, { "epoch": 20.38, "learning_rate": 5.9362410229462255e-06, "loss": 3.8815, "step": 1866000 }, { "epoch": 20.39, "learning_rate": 5.935146260290769e-06, "loss": 3.887, "step": 1866500 }, { "epoch": 20.39, "learning_rate": 5.934051497635314e-06, "loss": 3.893, "step": 1867000 }, { "epoch": 20.4, "learning_rate": 5.932956734979857e-06, "loss": 3.8881, "step": 1867500 }, { "epoch": 20.41, "learning_rate": 5.9318619723244005e-06, "loss": 3.8825, "step": 1868000 }, { "epoch": 20.41, "learning_rate": 5.930767209668944e-06, "loss": 3.8956, "step": 1868500 }, { "epoch": 20.42, "learning_rate": 5.929672447013488e-06, "loss": 3.8895, "step": 1869000 }, { "epoch": 20.42, "learning_rate": 5.928577684358031e-06, "loss": 3.893, "step": 1869500 }, { "epoch": 20.43, "learning_rate": 5.9274829217025755e-06, "loss": 3.8963, "step": 1870000 }, { "epoch": 20.43, "learning_rate": 5.926388159047119e-06, "loss": 3.8902, "step": 1870500 }, { "epoch": 20.44, "learning_rate": 5.925293396391663e-06, "loss": 3.8919, "step": 1871000 }, { "epoch": 20.44, "learning_rate": 5.924198633736207e-06, "loss": 3.8888, "step": 1871500 }, { "epoch": 20.45, "learning_rate": 5.92310387108075e-06, "loss": 3.8901, "step": 1872000 }, { "epoch": 20.45, "learning_rate": 5.922009108425294e-06, "loss": 3.8936, "step": 1872500 }, { "epoch": 20.46, "learning_rate": 5.920914345769837e-06, "loss": 3.8837, "step": 1873000 }, { "epoch": 20.47, "learning_rate": 5.919819583114381e-06, "loss": 3.8829, "step": 1873500 }, { "epoch": 20.47, "learning_rate": 5.9187248204589254e-06, "loss": 3.8893, "step": 1874000 }, { "epoch": 20.48, "learning_rate": 5.917630057803468e-06, "loss": 3.8765, "step": 1874500 }, { "epoch": 20.48, "learning_rate": 5.916535295148013e-06, "loss": 3.8876, "step": 1875000 }, { "epoch": 20.49, "learning_rate": 5.915440532492556e-06, "loss": 3.8852, "step": 1875500 }, { "epoch": 20.49, "learning_rate": 5.9143457698370995e-06, "loss": 3.8925, "step": 1876000 }, { "epoch": 20.5, "learning_rate": 5.913251007181644e-06, "loss": 3.8869, "step": 1876500 }, { "epoch": 20.5, "learning_rate": 5.912156244526187e-06, "loss": 3.8855, "step": 1877000 }, { "epoch": 20.51, "learning_rate": 5.911061481870732e-06, "loss": 3.8946, "step": 1877500 }, { "epoch": 20.51, "learning_rate": 5.9099667192152745e-06, "loss": 3.8883, "step": 1878000 }, { "epoch": 20.52, "learning_rate": 5.908871956559818e-06, "loss": 3.886, "step": 1878500 }, { "epoch": 20.53, "learning_rate": 5.907777193904361e-06, "loss": 3.89, "step": 1879000 }, { "epoch": 20.53, "learning_rate": 5.906682431248906e-06, "loss": 3.8938, "step": 1879500 }, { "epoch": 20.54, "learning_rate": 5.9055876685934495e-06, "loss": 3.8864, "step": 1880000 }, { "epoch": 20.54, "learning_rate": 5.904492905937993e-06, "loss": 3.8823, "step": 1880500 }, { "epoch": 20.55, "learning_rate": 5.903398143282537e-06, "loss": 3.8917, "step": 1881000 }, { "epoch": 20.55, "learning_rate": 5.90230338062708e-06, "loss": 3.8848, "step": 1881500 }, { "epoch": 20.56, "learning_rate": 5.9012086179716245e-06, "loss": 3.8908, "step": 1882000 }, { "epoch": 20.56, "learning_rate": 5.900113855316167e-06, "loss": 3.8802, "step": 1882500 }, { "epoch": 20.57, "learning_rate": 5.899019092660712e-06, "loss": 3.8837, "step": 1883000 }, { "epoch": 20.57, "learning_rate": 5.897924330005256e-06, "loss": 3.8907, "step": 1883500 }, { "epoch": 20.58, "learning_rate": 5.896829567349799e-06, "loss": 3.8815, "step": 1884000 }, { "epoch": 20.59, "learning_rate": 5.895734804694343e-06, "loss": 3.8842, "step": 1884500 }, { "epoch": 20.59, "learning_rate": 5.894640042038886e-06, "loss": 3.883, "step": 1885000 }, { "epoch": 20.6, "learning_rate": 5.893545279383431e-06, "loss": 3.8898, "step": 1885500 }, { "epoch": 20.6, "learning_rate": 5.892450516727974e-06, "loss": 3.891, "step": 1886000 }, { "epoch": 20.61, "learning_rate": 5.891355754072517e-06, "loss": 3.8911, "step": 1886500 }, { "epoch": 20.61, "learning_rate": 5.890260991417062e-06, "loss": 3.8837, "step": 1887000 }, { "epoch": 20.62, "learning_rate": 5.889166228761605e-06, "loss": 3.8939, "step": 1887500 }, { "epoch": 20.62, "learning_rate": 5.8880714661061485e-06, "loss": 3.886, "step": 1888000 }, { "epoch": 20.63, "learning_rate": 5.886976703450692e-06, "loss": 3.8907, "step": 1888500 }, { "epoch": 20.63, "learning_rate": 5.885881940795236e-06, "loss": 3.8798, "step": 1889000 }, { "epoch": 20.64, "learning_rate": 5.884787178139781e-06, "loss": 3.8845, "step": 1889500 }, { "epoch": 20.65, "learning_rate": 5.8836924154843235e-06, "loss": 3.8901, "step": 1890000 }, { "epoch": 20.65, "eval_accuracy": 0.3892746700541783, "eval_loss": 3.654066562652588, "eval_runtime": 355.2767, "eval_samples_per_second": 867.932, "eval_steps_per_second": 13.564, "step": 1890000 }, { "epoch": 20.65, "learning_rate": 5.882597652828867e-06, "loss": 3.883, "step": 1890500 }, { "epoch": 20.66, "learning_rate": 5.881502890173411e-06, "loss": 3.8821, "step": 1891000 }, { "epoch": 20.66, "learning_rate": 5.880408127517955e-06, "loss": 3.8872, "step": 1891500 }, { "epoch": 20.67, "learning_rate": 5.879313364862498e-06, "loss": 3.8881, "step": 1892000 }, { "epoch": 20.67, "learning_rate": 5.878218602207042e-06, "loss": 3.8839, "step": 1892500 }, { "epoch": 20.68, "learning_rate": 5.877123839551586e-06, "loss": 3.8837, "step": 1893000 }, { "epoch": 20.68, "learning_rate": 5.876029076896129e-06, "loss": 3.8802, "step": 1893500 }, { "epoch": 20.69, "learning_rate": 5.8749343142406734e-06, "loss": 3.8807, "step": 1894000 }, { "epoch": 20.69, "learning_rate": 5.873839551585216e-06, "loss": 3.8894, "step": 1894500 }, { "epoch": 20.7, "learning_rate": 5.872744788929761e-06, "loss": 3.881, "step": 1895000 }, { "epoch": 20.71, "learning_rate": 5.871650026274304e-06, "loss": 3.8849, "step": 1895500 }, { "epoch": 20.71, "learning_rate": 5.8705552636188476e-06, "loss": 3.8843, "step": 1896000 }, { "epoch": 20.72, "learning_rate": 5.869460500963392e-06, "loss": 3.8953, "step": 1896500 }, { "epoch": 20.72, "learning_rate": 5.868365738307935e-06, "loss": 3.89, "step": 1897000 }, { "epoch": 20.73, "learning_rate": 5.86727097565248e-06, "loss": 3.891, "step": 1897500 }, { "epoch": 20.73, "learning_rate": 5.8661762129970225e-06, "loss": 3.8867, "step": 1898000 }, { "epoch": 20.74, "learning_rate": 5.865081450341566e-06, "loss": 3.886, "step": 1898500 }, { "epoch": 20.74, "learning_rate": 5.863986687686111e-06, "loss": 3.8869, "step": 1899000 }, { "epoch": 20.75, "learning_rate": 5.862891925030654e-06, "loss": 3.8968, "step": 1899500 }, { "epoch": 20.76, "learning_rate": 5.8617971623751975e-06, "loss": 3.8872, "step": 1900000 }, { "epoch": 20.76, "learning_rate": 5.860702399719741e-06, "loss": 3.8849, "step": 1900500 }, { "epoch": 20.77, "learning_rate": 5.859607637064285e-06, "loss": 3.8895, "step": 1901000 }, { "epoch": 20.77, "learning_rate": 5.858512874408828e-06, "loss": 3.8891, "step": 1901500 }, { "epoch": 20.78, "learning_rate": 5.8574181117533725e-06, "loss": 3.8945, "step": 1902000 }, { "epoch": 20.78, "learning_rate": 5.856323349097916e-06, "loss": 3.8827, "step": 1902500 }, { "epoch": 20.79, "learning_rate": 5.85522858644246e-06, "loss": 3.8826, "step": 1903000 }, { "epoch": 20.79, "learning_rate": 5.854133823787004e-06, "loss": 3.8917, "step": 1903500 }, { "epoch": 20.8, "learning_rate": 5.853039061131547e-06, "loss": 3.8873, "step": 1904000 }, { "epoch": 20.8, "learning_rate": 5.851944298476091e-06, "loss": 3.8823, "step": 1904500 }, { "epoch": 20.81, "learning_rate": 5.850849535820634e-06, "loss": 3.8859, "step": 1905000 }, { "epoch": 20.82, "learning_rate": 5.849754773165179e-06, "loss": 3.887, "step": 1905500 }, { "epoch": 20.82, "learning_rate": 5.848660010509722e-06, "loss": 3.8878, "step": 1906000 }, { "epoch": 20.83, "learning_rate": 5.847565247854265e-06, "loss": 3.8824, "step": 1906500 }, { "epoch": 20.83, "learning_rate": 5.84647048519881e-06, "loss": 3.8893, "step": 1907000 }, { "epoch": 20.84, "learning_rate": 5.845375722543353e-06, "loss": 3.8795, "step": 1907500 }, { "epoch": 20.84, "learning_rate": 5.8442809598878965e-06, "loss": 3.8878, "step": 1908000 }, { "epoch": 20.85, "learning_rate": 5.84318619723244e-06, "loss": 3.8832, "step": 1908500 }, { "epoch": 20.85, "learning_rate": 5.842091434576984e-06, "loss": 3.8898, "step": 1909000 }, { "epoch": 20.86, "learning_rate": 5.840996671921529e-06, "loss": 3.884, "step": 1909500 }, { "epoch": 20.86, "learning_rate": 5.8399019092660715e-06, "loss": 3.8849, "step": 1910000 }, { "epoch": 20.87, "learning_rate": 5.838807146610615e-06, "loss": 3.8896, "step": 1910500 }, { "epoch": 20.88, "learning_rate": 5.837712383955159e-06, "loss": 3.8864, "step": 1911000 }, { "epoch": 20.88, "learning_rate": 5.836617621299703e-06, "loss": 3.8824, "step": 1911500 }, { "epoch": 20.89, "learning_rate": 5.835522858644247e-06, "loss": 3.8874, "step": 1912000 }, { "epoch": 20.89, "learning_rate": 5.83442809598879e-06, "loss": 3.8887, "step": 1912500 }, { "epoch": 20.9, "learning_rate": 5.833333333333334e-06, "loss": 3.8787, "step": 1913000 }, { "epoch": 20.9, "learning_rate": 5.832238570677877e-06, "loss": 3.8881, "step": 1913500 }, { "epoch": 20.91, "learning_rate": 5.8311438080224214e-06, "loss": 3.8851, "step": 1914000 }, { "epoch": 20.91, "learning_rate": 5.830049045366964e-06, "loss": 3.8869, "step": 1914500 }, { "epoch": 20.92, "learning_rate": 5.828954282711509e-06, "loss": 3.8897, "step": 1915000 }, { "epoch": 20.92, "learning_rate": 5.827859520056053e-06, "loss": 3.8844, "step": 1915500 }, { "epoch": 20.93, "learning_rate": 5.8267647574005956e-06, "loss": 3.8818, "step": 1916000 }, { "epoch": 20.94, "learning_rate": 5.82566999474514e-06, "loss": 3.8862, "step": 1916500 }, { "epoch": 20.94, "learning_rate": 5.824575232089683e-06, "loss": 3.8902, "step": 1917000 }, { "epoch": 20.95, "learning_rate": 5.823480469434228e-06, "loss": 3.8833, "step": 1917500 }, { "epoch": 20.95, "learning_rate": 5.8223857067787705e-06, "loss": 3.8893, "step": 1918000 }, { "epoch": 20.96, "learning_rate": 5.821290944123314e-06, "loss": 3.8829, "step": 1918500 }, { "epoch": 20.96, "learning_rate": 5.820196181467859e-06, "loss": 3.8897, "step": 1919000 }, { "epoch": 20.97, "learning_rate": 5.819101418812402e-06, "loss": 3.8822, "step": 1919500 }, { "epoch": 20.97, "learning_rate": 5.818006656156946e-06, "loss": 3.8853, "step": 1920000 }, { "epoch": 20.97, "eval_accuracy": 0.3891463309430576, "eval_loss": 3.6538729667663574, "eval_runtime": 356.8555, "eval_samples_per_second": 864.092, "eval_steps_per_second": 13.504, "step": 1920000 }, { "epoch": 20.98, "learning_rate": 5.816911893501489e-06, "loss": 3.8836, "step": 1920500 }, { "epoch": 20.98, "learning_rate": 5.815817130846033e-06, "loss": 3.8895, "step": 1921000 }, { "epoch": 20.99, "learning_rate": 5.814722368190576e-06, "loss": 3.8834, "step": 1921500 }, { "epoch": 21.0, "learning_rate": 5.8136276055351205e-06, "loss": 3.8859, "step": 1922000 }, { "epoch": 21.0, "learning_rate": 5.812532842879664e-06, "loss": 3.8836, "step": 1922500 }, { "epoch": 21.01, "learning_rate": 5.811438080224208e-06, "loss": 3.8834, "step": 1923000 }, { "epoch": 21.01, "learning_rate": 5.810343317568752e-06, "loss": 3.8826, "step": 1923500 }, { "epoch": 21.02, "learning_rate": 5.809248554913295e-06, "loss": 3.8799, "step": 1924000 }, { "epoch": 21.02, "learning_rate": 5.808153792257839e-06, "loss": 3.8885, "step": 1924500 }, { "epoch": 21.03, "learning_rate": 5.807059029602383e-06, "loss": 3.8818, "step": 1925000 }, { "epoch": 21.03, "learning_rate": 5.805964266946927e-06, "loss": 3.8806, "step": 1925500 }, { "epoch": 21.04, "learning_rate": 5.80486950429147e-06, "loss": 3.8794, "step": 1926000 }, { "epoch": 21.04, "learning_rate": 5.803774741636013e-06, "loss": 3.8826, "step": 1926500 }, { "epoch": 21.05, "learning_rate": 5.802679978980558e-06, "loss": 3.8804, "step": 1927000 }, { "epoch": 21.06, "learning_rate": 5.801585216325101e-06, "loss": 3.8822, "step": 1927500 }, { "epoch": 21.06, "learning_rate": 5.8004904536696445e-06, "loss": 3.8814, "step": 1928000 }, { "epoch": 21.07, "learning_rate": 5.799395691014189e-06, "loss": 3.8861, "step": 1928500 }, { "epoch": 21.07, "learning_rate": 5.798300928358732e-06, "loss": 3.8859, "step": 1929000 }, { "epoch": 21.08, "learning_rate": 5.797206165703277e-06, "loss": 3.8813, "step": 1929500 }, { "epoch": 21.08, "learning_rate": 5.7961114030478195e-06, "loss": 3.8839, "step": 1930000 }, { "epoch": 21.09, "learning_rate": 5.795016640392363e-06, "loss": 3.8818, "step": 1930500 }, { "epoch": 21.09, "learning_rate": 5.793921877736907e-06, "loss": 3.8776, "step": 1931000 }, { "epoch": 21.1, "learning_rate": 5.792827115081451e-06, "loss": 3.8879, "step": 1931500 }, { "epoch": 21.1, "learning_rate": 5.791732352425995e-06, "loss": 3.8783, "step": 1932000 }, { "epoch": 21.11, "learning_rate": 5.790637589770538e-06, "loss": 3.8802, "step": 1932500 }, { "epoch": 21.12, "learning_rate": 5.789542827115082e-06, "loss": 3.8852, "step": 1933000 }, { "epoch": 21.12, "learning_rate": 5.788448064459625e-06, "loss": 3.8831, "step": 1933500 }, { "epoch": 21.13, "learning_rate": 5.7873533018041694e-06, "loss": 3.8834, "step": 1934000 }, { "epoch": 21.13, "learning_rate": 5.786258539148712e-06, "loss": 3.8801, "step": 1934500 }, { "epoch": 21.14, "learning_rate": 5.785163776493257e-06, "loss": 3.8793, "step": 1935000 }, { "epoch": 21.14, "learning_rate": 5.784069013837801e-06, "loss": 3.8825, "step": 1935500 }, { "epoch": 21.15, "learning_rate": 5.7829742511823436e-06, "loss": 3.8816, "step": 1936000 }, { "epoch": 21.15, "learning_rate": 5.781879488526888e-06, "loss": 3.8819, "step": 1936500 }, { "epoch": 21.16, "learning_rate": 5.780784725871431e-06, "loss": 3.8849, "step": 1937000 }, { "epoch": 21.16, "learning_rate": 5.779689963215976e-06, "loss": 3.8788, "step": 1937500 }, { "epoch": 21.17, "learning_rate": 5.778595200560519e-06, "loss": 3.8831, "step": 1938000 }, { "epoch": 21.18, "learning_rate": 5.777500437905062e-06, "loss": 3.8846, "step": 1938500 }, { "epoch": 21.18, "learning_rate": 5.776405675249607e-06, "loss": 3.8861, "step": 1939000 }, { "epoch": 21.19, "learning_rate": 5.77531091259415e-06, "loss": 3.884, "step": 1939500 }, { "epoch": 21.19, "learning_rate": 5.774216149938694e-06, "loss": 3.8887, "step": 1940000 }, { "epoch": 21.2, "learning_rate": 5.773121387283237e-06, "loss": 3.8784, "step": 1940500 }, { "epoch": 21.2, "learning_rate": 5.772026624627781e-06, "loss": 3.8787, "step": 1941000 }, { "epoch": 21.21, "learning_rate": 5.770931861972326e-06, "loss": 3.883, "step": 1941500 }, { "epoch": 21.21, "learning_rate": 5.7698370993168685e-06, "loss": 3.8807, "step": 1942000 }, { "epoch": 21.22, "learning_rate": 5.768742336661412e-06, "loss": 3.8846, "step": 1942500 }, { "epoch": 21.22, "learning_rate": 5.767647574005956e-06, "loss": 3.8866, "step": 1943000 }, { "epoch": 21.23, "learning_rate": 5.7665528113505e-06, "loss": 3.8835, "step": 1943500 }, { "epoch": 21.24, "learning_rate": 5.765458048695043e-06, "loss": 3.8843, "step": 1944000 }, { "epoch": 21.24, "learning_rate": 5.764363286039587e-06, "loss": 3.8809, "step": 1944500 }, { "epoch": 21.25, "learning_rate": 5.763268523384131e-06, "loss": 3.88, "step": 1945000 }, { "epoch": 21.25, "learning_rate": 5.762173760728675e-06, "loss": 3.8805, "step": 1945500 }, { "epoch": 21.26, "learning_rate": 5.761078998073218e-06, "loss": 3.8819, "step": 1946000 }, { "epoch": 21.26, "learning_rate": 5.759984235417761e-06, "loss": 3.8807, "step": 1946500 }, { "epoch": 21.27, "learning_rate": 5.758889472762306e-06, "loss": 3.8841, "step": 1947000 }, { "epoch": 21.27, "learning_rate": 5.757794710106849e-06, "loss": 3.8796, "step": 1947500 }, { "epoch": 21.28, "learning_rate": 5.7566999474513925e-06, "loss": 3.8837, "step": 1948000 }, { "epoch": 21.28, "learning_rate": 5.755605184795937e-06, "loss": 3.8834, "step": 1948500 }, { "epoch": 21.29, "learning_rate": 5.75451042214048e-06, "loss": 3.8844, "step": 1949000 }, { "epoch": 21.3, "learning_rate": 5.753415659485025e-06, "loss": 3.8826, "step": 1949500 }, { "epoch": 21.3, "learning_rate": 5.7523208968295675e-06, "loss": 3.8808, "step": 1950000 }, { "epoch": 21.3, "eval_accuracy": 0.3893762728345541, "eval_loss": 3.652740240097046, "eval_runtime": 395.8277, "eval_samples_per_second": 779.016, "eval_steps_per_second": 12.174, "step": 1950000 }, { "epoch": 21.31, "learning_rate": 5.751226134174111e-06, "loss": 3.881, "step": 1950500 }, { "epoch": 21.31, "learning_rate": 5.750131371518656e-06, "loss": 3.8785, "step": 1951000 }, { "epoch": 21.32, "learning_rate": 5.749036608863199e-06, "loss": 3.8859, "step": 1951500 }, { "epoch": 21.32, "learning_rate": 5.747941846207743e-06, "loss": 3.8824, "step": 1952000 }, { "epoch": 21.33, "learning_rate": 5.746847083552286e-06, "loss": 3.8812, "step": 1952500 }, { "epoch": 21.33, "learning_rate": 5.74575232089683e-06, "loss": 3.883, "step": 1953000 }, { "epoch": 21.34, "learning_rate": 5.744657558241373e-06, "loss": 3.8868, "step": 1953500 }, { "epoch": 21.34, "learning_rate": 5.7435627955859174e-06, "loss": 3.8841, "step": 1954000 }, { "epoch": 21.35, "learning_rate": 5.742468032930462e-06, "loss": 3.8822, "step": 1954500 }, { "epoch": 21.36, "learning_rate": 5.741373270275005e-06, "loss": 3.8864, "step": 1955000 }, { "epoch": 21.36, "learning_rate": 5.740278507619549e-06, "loss": 3.8845, "step": 1955500 }, { "epoch": 21.37, "learning_rate": 5.7391837449640916e-06, "loss": 3.8811, "step": 1956000 }, { "epoch": 21.37, "learning_rate": 5.738088982308636e-06, "loss": 3.8846, "step": 1956500 }, { "epoch": 21.38, "learning_rate": 5.736994219653179e-06, "loss": 3.8855, "step": 1957000 }, { "epoch": 21.38, "learning_rate": 5.735899456997724e-06, "loss": 3.8808, "step": 1957500 }, { "epoch": 21.39, "learning_rate": 5.734804694342267e-06, "loss": 3.8822, "step": 1958000 }, { "epoch": 21.39, "learning_rate": 5.73370993168681e-06, "loss": 3.8846, "step": 1958500 }, { "epoch": 21.4, "learning_rate": 5.732615169031355e-06, "loss": 3.8778, "step": 1959000 }, { "epoch": 21.41, "learning_rate": 5.731520406375898e-06, "loss": 3.8822, "step": 1959500 }, { "epoch": 21.41, "learning_rate": 5.730425643720442e-06, "loss": 3.8883, "step": 1960000 }, { "epoch": 21.42, "learning_rate": 5.729330881064985e-06, "loss": 3.8806, "step": 1960500 }, { "epoch": 21.42, "learning_rate": 5.728236118409529e-06, "loss": 3.8871, "step": 1961000 }, { "epoch": 21.43, "learning_rate": 5.727141355754074e-06, "loss": 3.8836, "step": 1961500 }, { "epoch": 21.43, "learning_rate": 5.7260465930986165e-06, "loss": 3.8787, "step": 1962000 }, { "epoch": 21.44, "learning_rate": 5.72495183044316e-06, "loss": 3.8872, "step": 1962500 }, { "epoch": 21.44, "learning_rate": 5.723857067787704e-06, "loss": 3.8894, "step": 1963000 }, { "epoch": 21.45, "learning_rate": 5.722762305132248e-06, "loss": 3.8841, "step": 1963500 }, { "epoch": 21.45, "learning_rate": 5.721667542476792e-06, "loss": 3.8803, "step": 1964000 }, { "epoch": 21.46, "learning_rate": 5.720572779821335e-06, "loss": 3.8909, "step": 1964500 }, { "epoch": 21.47, "learning_rate": 5.719478017165879e-06, "loss": 3.8788, "step": 1965000 }, { "epoch": 21.47, "learning_rate": 5.718383254510423e-06, "loss": 3.888, "step": 1965500 }, { "epoch": 21.48, "learning_rate": 5.717288491854966e-06, "loss": 3.8786, "step": 1966000 }, { "epoch": 21.48, "learning_rate": 5.716193729199509e-06, "loss": 3.8852, "step": 1966500 }, { "epoch": 21.49, "learning_rate": 5.715098966544054e-06, "loss": 3.8801, "step": 1967000 }, { "epoch": 21.49, "learning_rate": 5.714004203888598e-06, "loss": 3.8853, "step": 1967500 }, { "epoch": 21.5, "learning_rate": 5.7129094412331405e-06, "loss": 3.8803, "step": 1968000 }, { "epoch": 21.5, "learning_rate": 5.711814678577685e-06, "loss": 3.8792, "step": 1968500 }, { "epoch": 21.51, "learning_rate": 5.710719915922228e-06, "loss": 3.8836, "step": 1969000 }, { "epoch": 21.51, "learning_rate": 5.709625153266773e-06, "loss": 3.8796, "step": 1969500 }, { "epoch": 21.52, "learning_rate": 5.7085303906113155e-06, "loss": 3.8824, "step": 1970000 }, { "epoch": 21.53, "learning_rate": 5.707435627955859e-06, "loss": 3.8816, "step": 1970500 }, { "epoch": 21.53, "learning_rate": 5.706340865300404e-06, "loss": 3.8762, "step": 1971000 }, { "epoch": 21.54, "learning_rate": 5.705246102644947e-06, "loss": 3.888, "step": 1971500 }, { "epoch": 21.54, "learning_rate": 5.704151339989491e-06, "loss": 3.8817, "step": 1972000 }, { "epoch": 21.55, "learning_rate": 5.703056577334034e-06, "loss": 3.8886, "step": 1972500 }, { "epoch": 21.55, "learning_rate": 5.701961814678578e-06, "loss": 3.8863, "step": 1973000 }, { "epoch": 21.56, "learning_rate": 5.7008670520231226e-06, "loss": 3.8845, "step": 1973500 }, { "epoch": 21.56, "learning_rate": 5.6997722893676655e-06, "loss": 3.8833, "step": 1974000 }, { "epoch": 21.57, "learning_rate": 5.69867752671221e-06, "loss": 3.8715, "step": 1974500 }, { "epoch": 21.57, "learning_rate": 5.697582764056753e-06, "loss": 3.8858, "step": 1975000 }, { "epoch": 21.58, "learning_rate": 5.696488001401297e-06, "loss": 3.8783, "step": 1975500 }, { "epoch": 21.59, "learning_rate": 5.6953932387458396e-06, "loss": 3.884, "step": 1976000 }, { "epoch": 21.59, "learning_rate": 5.694298476090384e-06, "loss": 3.8832, "step": 1976500 }, { "epoch": 21.6, "learning_rate": 5.693203713434928e-06, "loss": 3.8814, "step": 1977000 }, { "epoch": 21.6, "learning_rate": 5.692108950779472e-06, "loss": 3.8781, "step": 1977500 }, { "epoch": 21.61, "learning_rate": 5.691014188124015e-06, "loss": 3.8794, "step": 1978000 }, { "epoch": 21.61, "learning_rate": 5.689919425468558e-06, "loss": 3.8845, "step": 1978500 }, { "epoch": 21.62, "learning_rate": 5.688824662813103e-06, "loss": 3.8815, "step": 1979000 }, { "epoch": 21.62, "learning_rate": 5.687729900157646e-06, "loss": 3.8823, "step": 1979500 }, { "epoch": 21.63, "learning_rate": 5.68663513750219e-06, "loss": 3.8835, "step": 1980000 }, { "epoch": 21.63, "eval_accuracy": 0.38958513743599005, "eval_loss": 3.649672269821167, "eval_runtime": 404.9494, "eval_samples_per_second": 761.468, "eval_steps_per_second": 11.9, "step": 1980000 }, { "epoch": 21.63, "learning_rate": 5.685540374846734e-06, "loss": 3.8815, "step": 1980500 }, { "epoch": 21.64, "learning_rate": 5.684445612191277e-06, "loss": 3.8769, "step": 1981000 }, { "epoch": 21.65, "learning_rate": 5.683350849535822e-06, "loss": 3.8883, "step": 1981500 }, { "epoch": 21.65, "learning_rate": 5.6822560868803645e-06, "loss": 3.8805, "step": 1982000 }, { "epoch": 21.66, "learning_rate": 5.681161324224908e-06, "loss": 3.8871, "step": 1982500 }, { "epoch": 21.66, "learning_rate": 5.680066561569452e-06, "loss": 3.8752, "step": 1983000 }, { "epoch": 21.67, "learning_rate": 5.678971798913996e-06, "loss": 3.8753, "step": 1983500 }, { "epoch": 21.67, "learning_rate": 5.67787703625854e-06, "loss": 3.8818, "step": 1984000 }, { "epoch": 21.68, "learning_rate": 5.676782273603083e-06, "loss": 3.8775, "step": 1984500 }, { "epoch": 21.68, "learning_rate": 5.675687510947627e-06, "loss": 3.8854, "step": 1985000 }, { "epoch": 21.69, "learning_rate": 5.674592748292171e-06, "loss": 3.894, "step": 1985500 }, { "epoch": 21.69, "learning_rate": 5.6734979856367144e-06, "loss": 3.8828, "step": 1986000 }, { "epoch": 21.7, "learning_rate": 5.672403222981259e-06, "loss": 3.8832, "step": 1986500 }, { "epoch": 21.71, "learning_rate": 5.671308460325802e-06, "loss": 3.8834, "step": 1987000 }, { "epoch": 21.71, "learning_rate": 5.670213697670346e-06, "loss": 3.8839, "step": 1987500 }, { "epoch": 21.72, "learning_rate": 5.6691189350148885e-06, "loss": 3.8807, "step": 1988000 }, { "epoch": 21.72, "learning_rate": 5.668024172359433e-06, "loss": 3.8837, "step": 1988500 }, { "epoch": 21.73, "learning_rate": 5.666929409703976e-06, "loss": 3.8822, "step": 1989000 }, { "epoch": 21.73, "learning_rate": 5.665834647048521e-06, "loss": 3.882, "step": 1989500 }, { "epoch": 21.74, "learning_rate": 5.664739884393064e-06, "loss": 3.8776, "step": 1990000 }, { "epoch": 21.74, "learning_rate": 5.663645121737607e-06, "loss": 3.8801, "step": 1990500 }, { "epoch": 21.75, "learning_rate": 5.662550359082152e-06, "loss": 3.8786, "step": 1991000 }, { "epoch": 21.75, "learning_rate": 5.661455596426695e-06, "loss": 3.8813, "step": 1991500 }, { "epoch": 21.76, "learning_rate": 5.660360833771239e-06, "loss": 3.8861, "step": 1992000 }, { "epoch": 21.77, "learning_rate": 5.659266071115782e-06, "loss": 3.8843, "step": 1992500 }, { "epoch": 21.77, "learning_rate": 5.658171308460326e-06, "loss": 3.8804, "step": 1993000 }, { "epoch": 21.78, "learning_rate": 5.6570765458048706e-06, "loss": 3.8767, "step": 1993500 }, { "epoch": 21.78, "learning_rate": 5.6559817831494135e-06, "loss": 3.8802, "step": 1994000 }, { "epoch": 21.79, "step": 1994357, "total_flos": 1.9405559434104013e+17, "train_loss": 4.1639687691688305, "train_runtime": 215975.8067, "train_samples_per_second": 1356.346, "train_steps_per_second": 21.193 } ], "max_steps": 4577200, "num_train_epochs": 50, "total_flos": 1.9405559434104013e+17, "trial_name": null, "trial_params": null }