diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,77328 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.8370592777695176, + "global_step": 6400000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 2.7664e-06, + "loss": 5.2774, + "step": 500 + }, + { + "epoch": 0.0, + "learning_rate": 5.5664e-06, + "loss": 2.8368, + "step": 1000 + }, + { + "epoch": 0.0, + "learning_rate": 8.3664e-06, + "loss": 2.7102, + "step": 1500 + }, + { + "epoch": 0.0, + "learning_rate": 1.1166399999999999e-05, + "loss": 2.563, + "step": 2000 + }, + { + "epoch": 0.0, + "learning_rate": 1.39664e-05, + "loss": 2.4577, + "step": 2500 + }, + { + "epoch": 0.0, + "learning_rate": 1.67664e-05, + "loss": 2.3862, + "step": 3000 + }, + { + "epoch": 0.0, + "learning_rate": 1.95664e-05, + "loss": 2.3317, + "step": 3500 + }, + { + "epoch": 0.0, + "learning_rate": 2.23664e-05, + "loss": 2.2799, + "step": 4000 + }, + { + "epoch": 0.0, + "learning_rate": 2.51664e-05, + "loss": 2.2089, + "step": 4500 + }, + { + "epoch": 0.0, + "learning_rate": 2.79664e-05, + "loss": 2.1503, + "step": 5000 + }, + { + "epoch": 0.0, + "learning_rate": 3.07664e-05, + "loss": 2.1188, + "step": 5500 + }, + { + "epoch": 0.0, + "learning_rate": 3.3566400000000004e-05, + "loss": 2.0651, + "step": 6000 + }, + { + "epoch": 0.0, + "learning_rate": 3.63664e-05, + "loss": 2.01, + "step": 6500 + }, + { + "epoch": 0.0, + "learning_rate": 3.91664e-05, + "loss": 1.952, + "step": 7000 + }, + { + "epoch": 0.0, + "learning_rate": 4.19664e-05, + "loss": 1.8711, + "step": 7500 + }, + { + "epoch": 0.0, + "learning_rate": 4.47664e-05, + "loss": 1.8241, + "step": 8000 + }, + { + "epoch": 0.01, + "learning_rate": 4.75664e-05, + "loss": 1.7604, + "step": 8500 + }, + { + "epoch": 0.01, + "learning_rate": 5.03664e-05, + "loss": 1.7357, + "step": 9000 + }, + { + "epoch": 0.01, + "learning_rate": 5.31664e-05, + "loss": 1.6838, + "step": 9500 + }, + { + "epoch": 0.01, + "learning_rate": 5.59664e-05, + "loss": 1.6501, + "step": 10000 + }, + { + "epoch": 0.01, + "learning_rate": 5.599792523402617e-05, + "loss": 1.5994, + "step": 10500 + }, + { + "epoch": 0.01, + "learning_rate": 5.5995825268465594e-05, + "loss": 1.5364, + "step": 11000 + }, + { + "epoch": 0.01, + "learning_rate": 5.599372530290503e-05, + "loss": 1.5108, + "step": 11500 + }, + { + "epoch": 0.01, + "learning_rate": 5.599162533734447e-05, + "loss": 1.4591, + "step": 12000 + }, + { + "epoch": 0.01, + "learning_rate": 5.59895253717839e-05, + "loss": 1.4329, + "step": 12500 + }, + { + "epoch": 0.01, + "learning_rate": 5.5987425406223334e-05, + "loss": 1.3767, + "step": 13000 + }, + { + "epoch": 0.01, + "learning_rate": 5.5985325440662774e-05, + "loss": 1.3685, + "step": 13500 + }, + { + "epoch": 0.01, + "learning_rate": 5.598322967503333e-05, + "loss": 1.3409, + "step": 14000 + }, + { + "epoch": 0.01, + "learning_rate": 5.598112970947276e-05, + "loss": 1.2896, + "step": 14500 + }, + { + "epoch": 0.01, + "learning_rate": 5.5979029743912195e-05, + "loss": 1.2794, + "step": 15000 + }, + { + "epoch": 0.01, + "learning_rate": 5.5976929778351635e-05, + "loss": 1.267, + "step": 15500 + }, + { + "epoch": 0.01, + "learning_rate": 5.597483401272219e-05, + "loss": 1.2646, + "step": 16000 + }, + { + "epoch": 0.01, + "learning_rate": 5.597273404716162e-05, + "loss": 1.2289, + "step": 16500 + }, + { + "epoch": 0.01, + "learning_rate": 5.597063828153218e-05, + "loss": 1.2068, + "step": 17000 + }, + { + "epoch": 0.01, + "learning_rate": 5.596853831597162e-05, + "loss": 1.1855, + "step": 17500 + }, + { + "epoch": 0.01, + "learning_rate": 5.5966442550342176e-05, + "loss": 1.157, + "step": 18000 + }, + { + "epoch": 0.01, + "learning_rate": 5.596434258478161e-05, + "loss": 1.1478, + "step": 18500 + }, + { + "epoch": 0.01, + "learning_rate": 5.596224261922104e-05, + "loss": 1.1547, + "step": 19000 + }, + { + "epoch": 0.01, + "learning_rate": 5.5960142653660483e-05, + "loss": 1.1175, + "step": 19500 + }, + { + "epoch": 0.01, + "learning_rate": 5.595804268809992e-05, + "loss": 1.1145, + "step": 20000 + }, + { + "epoch": 0.01, + "learning_rate": 5.5955942722539344e-05, + "loss": 1.1134, + "step": 20500 + }, + { + "epoch": 0.01, + "learning_rate": 5.5953842756978784e-05, + "loss": 1.0724, + "step": 21000 + }, + { + "epoch": 0.01, + "learning_rate": 5.595174279141822e-05, + "loss": 1.0697, + "step": 21500 + }, + { + "epoch": 0.01, + "learning_rate": 5.594964282585766e-05, + "loss": 1.0638, + "step": 22000 + }, + { + "epoch": 0.01, + "learning_rate": 5.594754286029709e-05, + "loss": 1.0493, + "step": 22500 + }, + { + "epoch": 0.01, + "learning_rate": 5.5945442894736524e-05, + "loss": 1.0498, + "step": 23000 + }, + { + "epoch": 0.01, + "learning_rate": 5.5943342929175965e-05, + "loss": 1.0211, + "step": 23500 + }, + { + "epoch": 0.01, + "learning_rate": 5.594124716354652e-05, + "loss": 1.0341, + "step": 24000 + }, + { + "epoch": 0.01, + "learning_rate": 5.593914719798595e-05, + "loss": 1.0237, + "step": 24500 + }, + { + "epoch": 0.01, + "learning_rate": 5.5937047232425385e-05, + "loss": 1.0052, + "step": 25000 + }, + { + "epoch": 0.02, + "learning_rate": 5.5934947266864825e-05, + "loss": 0.9896, + "step": 25500 + }, + { + "epoch": 0.02, + "learning_rate": 5.593285150123538e-05, + "loss": 1.0035, + "step": 26000 + }, + { + "epoch": 0.02, + "learning_rate": 5.593075153567481e-05, + "loss": 0.971, + "step": 26500 + }, + { + "epoch": 0.02, + "learning_rate": 5.592865577004537e-05, + "loss": 0.9709, + "step": 27000 + }, + { + "epoch": 0.02, + "learning_rate": 5.59265558044848e-05, + "loss": 0.9857, + "step": 27500 + }, + { + "epoch": 0.02, + "learning_rate": 5.592445583892424e-05, + "loss": 0.9686, + "step": 28000 + }, + { + "epoch": 0.02, + "learning_rate": 5.592235587336367e-05, + "loss": 0.9634, + "step": 28500 + }, + { + "epoch": 0.02, + "learning_rate": 5.5920260107734234e-05, + "loss": 0.9445, + "step": 29000 + }, + { + "epoch": 0.02, + "learning_rate": 5.5918160142173674e-05, + "loss": 0.9376, + "step": 29500 + }, + { + "epoch": 0.02, + "learning_rate": 5.59160601766131e-05, + "loss": 0.9412, + "step": 30000 + }, + { + "epoch": 0.02, + "learning_rate": 5.5913960211052534e-05, + "loss": 0.9431, + "step": 30500 + }, + { + "epoch": 0.02, + "learning_rate": 5.5911860245491974e-05, + "loss": 0.9063, + "step": 31000 + }, + { + "epoch": 0.02, + "learning_rate": 5.590976027993141e-05, + "loss": 0.9193, + "step": 31500 + }, + { + "epoch": 0.02, + "learning_rate": 5.590766031437084e-05, + "loss": 0.9074, + "step": 32000 + }, + { + "epoch": 0.02, + "learning_rate": 5.590556034881028e-05, + "loss": 0.9163, + "step": 32500 + }, + { + "epoch": 0.02, + "learning_rate": 5.5903460383249715e-05, + "loss": 0.8946, + "step": 33000 + }, + { + "epoch": 0.02, + "learning_rate": 5.590136041768915e-05, + "loss": 0.8955, + "step": 33500 + }, + { + "epoch": 0.02, + "learning_rate": 5.589926045212859e-05, + "loss": 0.886, + "step": 34000 + }, + { + "epoch": 0.02, + "learning_rate": 5.589716048656802e-05, + "loss": 0.8956, + "step": 34500 + }, + { + "epoch": 0.02, + "learning_rate": 5.5895060521007455e-05, + "loss": 0.8857, + "step": 35000 + }, + { + "epoch": 0.02, + "learning_rate": 5.589296055544689e-05, + "loss": 0.891, + "step": 35500 + }, + { + "epoch": 0.02, + "learning_rate": 5.589086058988632e-05, + "loss": 0.8747, + "step": 36000 + }, + { + "epoch": 0.02, + "learning_rate": 5.5888760624325756e-05, + "loss": 0.8796, + "step": 36500 + }, + { + "epoch": 0.02, + "learning_rate": 5.5886664858696316e-05, + "loss": 0.8558, + "step": 37000 + }, + { + "epoch": 0.02, + "learning_rate": 5.588456489313575e-05, + "loss": 0.8488, + "step": 37500 + }, + { + "epoch": 0.02, + "learning_rate": 5.588246492757518e-05, + "loss": 0.8656, + "step": 38000 + }, + { + "epoch": 0.02, + "learning_rate": 5.588036496201462e-05, + "loss": 0.8456, + "step": 38500 + }, + { + "epoch": 0.02, + "learning_rate": 5.5878269196385184e-05, + "loss": 0.8532, + "step": 39000 + }, + { + "epoch": 0.02, + "learning_rate": 5.587616923082462e-05, + "loss": 0.8454, + "step": 39500 + }, + { + "epoch": 0.02, + "learning_rate": 5.587407346519517e-05, + "loss": 0.8487, + "step": 40000 + }, + { + "epoch": 0.02, + "learning_rate": 5.5871973499634604e-05, + "loss": 0.839, + "step": 40500 + }, + { + "epoch": 0.02, + "learning_rate": 5.5869873534074044e-05, + "loss": 0.8438, + "step": 41000 + }, + { + "epoch": 0.02, + "learning_rate": 5.586777356851348e-05, + "loss": 0.8217, + "step": 41500 + }, + { + "epoch": 0.03, + "learning_rate": 5.586567360295291e-05, + "loss": 0.8215, + "step": 42000 + }, + { + "epoch": 0.03, + "learning_rate": 5.5863573637392345e-05, + "loss": 0.8145, + "step": 42500 + }, + { + "epoch": 0.03, + "learning_rate": 5.586147367183178e-05, + "loss": 0.825, + "step": 43000 + }, + { + "epoch": 0.03, + "learning_rate": 5.585937370627121e-05, + "loss": 0.8251, + "step": 43500 + }, + { + "epoch": 0.03, + "learning_rate": 5.585727374071065e-05, + "loss": 0.8294, + "step": 44000 + }, + { + "epoch": 0.03, + "learning_rate": 5.5855173775150085e-05, + "loss": 0.83, + "step": 44500 + }, + { + "epoch": 0.03, + "learning_rate": 5.585307380958952e-05, + "loss": 0.813, + "step": 45000 + }, + { + "epoch": 0.03, + "learning_rate": 5.585097384402896e-05, + "loss": 0.8084, + "step": 45500 + }, + { + "epoch": 0.03, + "learning_rate": 5.584888227833064e-05, + "loss": 0.7944, + "step": 46000 + }, + { + "epoch": 0.03, + "learning_rate": 5.584678651270119e-05, + "loss": 0.8026, + "step": 46500 + }, + { + "epoch": 0.03, + "learning_rate": 5.5844686547140627e-05, + "loss": 0.8034, + "step": 47000 + }, + { + "epoch": 0.03, + "learning_rate": 5.584258658158006e-05, + "loss": 0.7919, + "step": 47500 + }, + { + "epoch": 0.03, + "learning_rate": 5.5840490815950614e-05, + "loss": 0.7927, + "step": 48000 + }, + { + "epoch": 0.03, + "learning_rate": 5.5838390850390054e-05, + "loss": 0.8037, + "step": 48500 + }, + { + "epoch": 0.03, + "learning_rate": 5.583629088482949e-05, + "loss": 0.7751, + "step": 49000 + }, + { + "epoch": 0.03, + "learning_rate": 5.583419091926892e-05, + "loss": 0.7858, + "step": 49500 + }, + { + "epoch": 0.03, + "learning_rate": 5.583209095370836e-05, + "loss": 0.7772, + "step": 50000 + }, + { + "epoch": 0.03, + "learning_rate": 5.5829995188078915e-05, + "loss": 0.7833, + "step": 50500 + }, + { + "epoch": 0.03, + "learning_rate": 5.582789522251835e-05, + "loss": 0.7686, + "step": 51000 + }, + { + "epoch": 0.03, + "learning_rate": 5.582579525695779e-05, + "loss": 0.7825, + "step": 51500 + }, + { + "epoch": 0.03, + "learning_rate": 5.582369529139722e-05, + "loss": 0.7745, + "step": 52000 + }, + { + "epoch": 0.03, + "learning_rate": 5.5821595325836655e-05, + "loss": 0.7813, + "step": 52500 + }, + { + "epoch": 0.03, + "learning_rate": 5.5819495360276095e-05, + "loss": 0.7818, + "step": 53000 + }, + { + "epoch": 0.03, + "learning_rate": 5.581739539471553e-05, + "loss": 0.7688, + "step": 53500 + }, + { + "epoch": 0.03, + "learning_rate": 5.581529542915496e-05, + "loss": 0.7534, + "step": 54000 + }, + { + "epoch": 0.03, + "learning_rate": 5.5813195463594396e-05, + "loss": 0.7563, + "step": 54500 + }, + { + "epoch": 0.03, + "learning_rate": 5.581109549803383e-05, + "loss": 0.7565, + "step": 55000 + }, + { + "epoch": 0.03, + "learning_rate": 5.580899553247326e-05, + "loss": 0.7614, + "step": 55500 + }, + { + "epoch": 0.03, + "learning_rate": 5.58068955669127e-05, + "loss": 0.7446, + "step": 56000 + }, + { + "epoch": 0.03, + "learning_rate": 5.5804795601352136e-05, + "loss": 0.7602, + "step": 56500 + }, + { + "epoch": 0.03, + "learning_rate": 5.580269563579157e-05, + "loss": 0.7522, + "step": 57000 + }, + { + "epoch": 0.03, + "learning_rate": 5.5800599870162123e-05, + "loss": 0.7555, + "step": 57500 + }, + { + "epoch": 0.03, + "learning_rate": 5.5798499904601564e-05, + "loss": 0.7548, + "step": 58000 + }, + { + "epoch": 0.04, + "learning_rate": 5.5796399939041e-05, + "loss": 0.7483, + "step": 58500 + }, + { + "epoch": 0.04, + "learning_rate": 5.579429997348043e-05, + "loss": 0.7502, + "step": 59000 + }, + { + "epoch": 0.04, + "learning_rate": 5.579220000791987e-05, + "loss": 0.7493, + "step": 59500 + }, + { + "epoch": 0.04, + "learning_rate": 5.5790100042359304e-05, + "loss": 0.7501, + "step": 60000 + }, + { + "epoch": 0.04, + "learning_rate": 5.578800007679874e-05, + "loss": 0.7415, + "step": 60500 + }, + { + "epoch": 0.04, + "learning_rate": 5.578590011123818e-05, + "loss": 0.7266, + "step": 61000 + }, + { + "epoch": 0.04, + "learning_rate": 5.578380434560873e-05, + "loss": 0.7446, + "step": 61500 + }, + { + "epoch": 0.04, + "learning_rate": 5.5781704380048165e-05, + "loss": 0.7204, + "step": 62000 + }, + { + "epoch": 0.04, + "learning_rate": 5.5779604414487605e-05, + "loss": 0.7352, + "step": 62500 + }, + { + "epoch": 0.04, + "learning_rate": 5.577750444892704e-05, + "loss": 0.7359, + "step": 63000 + }, + { + "epoch": 0.04, + "learning_rate": 5.577540448336647e-05, + "loss": 0.725, + "step": 63500 + }, + { + "epoch": 0.04, + "learning_rate": 5.577330451780591e-05, + "loss": 0.7196, + "step": 64000 + }, + { + "epoch": 0.04, + "learning_rate": 5.5771208752176466e-05, + "loss": 0.7326, + "step": 64500 + }, + { + "epoch": 0.04, + "learning_rate": 5.57691087866159e-05, + "loss": 0.7366, + "step": 65000 + }, + { + "epoch": 0.04, + "learning_rate": 5.576700882105533e-05, + "loss": 0.7191, + "step": 65500 + }, + { + "epoch": 0.04, + "learning_rate": 5.576490885549477e-05, + "loss": 0.7332, + "step": 66000 + }, + { + "epoch": 0.04, + "learning_rate": 5.576281308986533e-05, + "loss": 0.72, + "step": 66500 + }, + { + "epoch": 0.04, + "learning_rate": 5.576071312430476e-05, + "loss": 0.7274, + "step": 67000 + }, + { + "epoch": 0.04, + "learning_rate": 5.5758613158744194e-05, + "loss": 0.7141, + "step": 67500 + }, + { + "epoch": 0.04, + "learning_rate": 5.5756513193183634e-05, + "loss": 0.7202, + "step": 68000 + }, + { + "epoch": 0.04, + "learning_rate": 5.575441322762307e-05, + "loss": 0.7082, + "step": 68500 + }, + { + "epoch": 0.04, + "learning_rate": 5.575231326206251e-05, + "loss": 0.708, + "step": 69000 + }, + { + "epoch": 0.04, + "learning_rate": 5.5750213296501934e-05, + "loss": 0.7177, + "step": 69500 + }, + { + "epoch": 0.04, + "learning_rate": 5.574811333094137e-05, + "loss": 0.7094, + "step": 70000 + }, + { + "epoch": 0.04, + "learning_rate": 5.574601336538081e-05, + "loss": 0.7085, + "step": 70500 + }, + { + "epoch": 0.04, + "learning_rate": 5.574391759975137e-05, + "loss": 0.7188, + "step": 71000 + }, + { + "epoch": 0.04, + "learning_rate": 5.57418176341908e-05, + "loss": 0.7026, + "step": 71500 + }, + { + "epoch": 0.04, + "learning_rate": 5.573971766863023e-05, + "loss": 0.7047, + "step": 72000 + }, + { + "epoch": 0.04, + "learning_rate": 5.573761770306967e-05, + "loss": 0.6978, + "step": 72500 + }, + { + "epoch": 0.04, + "learning_rate": 5.57355177375091e-05, + "loss": 0.6972, + "step": 73000 + }, + { + "epoch": 0.04, + "learning_rate": 5.5733417771948536e-05, + "loss": 0.6942, + "step": 73500 + }, + { + "epoch": 0.04, + "learning_rate": 5.573132200631909e-05, + "loss": 0.7024, + "step": 74000 + }, + { + "epoch": 0.04, + "learning_rate": 5.572922204075853e-05, + "loss": 0.6919, + "step": 74500 + }, + { + "epoch": 0.04, + "learning_rate": 5.572712207519796e-05, + "loss": 0.7045, + "step": 75000 + }, + { + "epoch": 0.05, + "learning_rate": 5.57250221096374e-05, + "loss": 0.7054, + "step": 75500 + }, + { + "epoch": 0.05, + "learning_rate": 5.572292634400796e-05, + "loss": 0.6966, + "step": 76000 + }, + { + "epoch": 0.05, + "learning_rate": 5.572082637844739e-05, + "loss": 0.6926, + "step": 76500 + }, + { + "epoch": 0.05, + "learning_rate": 5.5718726412886824e-05, + "loss": 0.6988, + "step": 77000 + }, + { + "epoch": 0.05, + "learning_rate": 5.5716626447326264e-05, + "loss": 0.7083, + "step": 77500 + }, + { + "epoch": 0.05, + "learning_rate": 5.57145264817657e-05, + "loss": 0.7026, + "step": 78000 + }, + { + "epoch": 0.05, + "learning_rate": 5.571242651620513e-05, + "loss": 0.6944, + "step": 78500 + }, + { + "epoch": 0.05, + "learning_rate": 5.571032655064457e-05, + "loss": 0.696, + "step": 79000 + }, + { + "epoch": 0.05, + "learning_rate": 5.5708226585084004e-05, + "loss": 0.6893, + "step": 79500 + }, + { + "epoch": 0.05, + "learning_rate": 5.570613081945456e-05, + "loss": 0.6844, + "step": 80000 + }, + { + "epoch": 0.05, + "learning_rate": 5.570403085389399e-05, + "loss": 0.7073, + "step": 80500 + }, + { + "epoch": 0.05, + "learning_rate": 5.570193088833343e-05, + "loss": 0.6991, + "step": 81000 + }, + { + "epoch": 0.05, + "learning_rate": 5.5699835122703985e-05, + "loss": 0.684, + "step": 81500 + }, + { + "epoch": 0.05, + "learning_rate": 5.569773515714342e-05, + "loss": 0.678, + "step": 82000 + }, + { + "epoch": 0.05, + "learning_rate": 5.569563519158286e-05, + "loss": 0.6667, + "step": 82500 + }, + { + "epoch": 0.05, + "learning_rate": 5.569353522602229e-05, + "loss": 0.6921, + "step": 83000 + }, + { + "epoch": 0.05, + "learning_rate": 5.5691435260461726e-05, + "loss": 0.6854, + "step": 83500 + }, + { + "epoch": 0.05, + "learning_rate": 5.5689335294901166e-05, + "loss": 0.6604, + "step": 84000 + }, + { + "epoch": 0.05, + "learning_rate": 5.56872353293406e-05, + "loss": 0.6793, + "step": 84500 + }, + { + "epoch": 0.05, + "learning_rate": 5.568513536378003e-05, + "loss": 0.6776, + "step": 85000 + }, + { + "epoch": 0.05, + "learning_rate": 5.568303539821947e-05, + "loss": 0.6625, + "step": 85500 + }, + { + "epoch": 0.05, + "learning_rate": 5.568093963259003e-05, + "loss": 0.6535, + "step": 86000 + }, + { + "epoch": 0.05, + "learning_rate": 5.567883966702946e-05, + "loss": 0.6705, + "step": 86500 + }, + { + "epoch": 0.05, + "learning_rate": 5.5676743901400014e-05, + "loss": 0.6665, + "step": 87000 + }, + { + "epoch": 0.05, + "learning_rate": 5.567464393583945e-05, + "loss": 0.6646, + "step": 87500 + }, + { + "epoch": 0.05, + "learning_rate": 5.567254817021001e-05, + "loss": 0.6637, + "step": 88000 + }, + { + "epoch": 0.05, + "learning_rate": 5.567044820464944e-05, + "loss": 0.6533, + "step": 88500 + }, + { + "epoch": 0.05, + "learning_rate": 5.5668348239088875e-05, + "loss": 0.6846, + "step": 89000 + }, + { + "epoch": 0.05, + "learning_rate": 5.5666248273528315e-05, + "loss": 0.653, + "step": 89500 + }, + { + "epoch": 0.05, + "learning_rate": 5.566414830796775e-05, + "loss": 0.6661, + "step": 90000 + }, + { + "epoch": 0.05, + "learning_rate": 5.566204834240718e-05, + "loss": 0.6696, + "step": 90500 + }, + { + "epoch": 0.05, + "learning_rate": 5.565994837684662e-05, + "loss": 0.6534, + "step": 91000 + }, + { + "epoch": 0.05, + "learning_rate": 5.5657848411286055e-05, + "loss": 0.6579, + "step": 91500 + }, + { + "epoch": 0.06, + "learning_rate": 5.565574844572549e-05, + "loss": 0.6549, + "step": 92000 + }, + { + "epoch": 0.06, + "learning_rate": 5.565364848016493e-05, + "loss": 0.6649, + "step": 92500 + }, + { + "epoch": 0.06, + "learning_rate": 5.565154851460436e-05, + "loss": 0.6772, + "step": 93000 + }, + { + "epoch": 0.06, + "learning_rate": 5.5649448549043796e-05, + "loss": 0.641, + "step": 93500 + }, + { + "epoch": 0.06, + "learning_rate": 5.564735278341435e-05, + "loss": 0.6538, + "step": 94000 + }, + { + "epoch": 0.06, + "learning_rate": 5.564525281785379e-05, + "loss": 0.6657, + "step": 94500 + }, + { + "epoch": 0.06, + "learning_rate": 5.564315285229322e-05, + "loss": 0.6514, + "step": 95000 + }, + { + "epoch": 0.06, + "learning_rate": 5.564105288673266e-05, + "loss": 0.6577, + "step": 95500 + }, + { + "epoch": 0.06, + "learning_rate": 5.563895712110321e-05, + "loss": 0.6627, + "step": 96000 + }, + { + "epoch": 0.06, + "learning_rate": 5.563685715554265e-05, + "loss": 0.6378, + "step": 96500 + }, + { + "epoch": 0.06, + "learning_rate": 5.5634757189982084e-05, + "loss": 0.6515, + "step": 97000 + }, + { + "epoch": 0.06, + "learning_rate": 5.563265722442152e-05, + "loss": 0.6565, + "step": 97500 + }, + { + "epoch": 0.06, + "learning_rate": 5.563056145879208e-05, + "loss": 0.6562, + "step": 98000 + }, + { + "epoch": 0.06, + "learning_rate": 5.562846149323151e-05, + "loss": 0.6596, + "step": 98500 + }, + { + "epoch": 0.06, + "learning_rate": 5.5626365727602065e-05, + "loss": 0.645, + "step": 99000 + }, + { + "epoch": 0.06, + "learning_rate": 5.56242657620415e-05, + "loss": 0.6391, + "step": 99500 + }, + { + "epoch": 0.06, + "learning_rate": 5.562216999641206e-05, + "loss": 0.643, + "step": 100000 + }, + { + "epoch": 0.06, + "eval_loss": 0.5988019108772278, + "eval_runtime": 1105.0166, + "eval_samples_per_second": 476.663, + "eval_steps_per_second": 79.444, + "step": 100000 + }, + { + "epoch": 0.06, + "learning_rate": 5.562007003085149e-05, + "loss": 0.651, + "step": 100500 + }, + { + "epoch": 0.06, + "learning_rate": 5.5617970065290926e-05, + "loss": 0.6459, + "step": 101000 + }, + { + "epoch": 0.06, + "learning_rate": 5.561587009973036e-05, + "loss": 0.6533, + "step": 101500 + }, + { + "epoch": 0.06, + "learning_rate": 5.56137701341698e-05, + "loss": 0.6464, + "step": 102000 + }, + { + "epoch": 0.06, + "learning_rate": 5.561167016860923e-05, + "loss": 0.6404, + "step": 102500 + }, + { + "epoch": 0.06, + "learning_rate": 5.5609570203048666e-05, + "loss": 0.6339, + "step": 103000 + }, + { + "epoch": 0.06, + "learning_rate": 5.5607470237488106e-05, + "loss": 0.6304, + "step": 103500 + }, + { + "epoch": 0.06, + "learning_rate": 5.560537027192754e-05, + "loss": 0.6243, + "step": 104000 + }, + { + "epoch": 0.06, + "learning_rate": 5.560327870622922e-05, + "loss": 0.6351, + "step": 104500 + }, + { + "epoch": 0.06, + "learning_rate": 5.560117874066865e-05, + "loss": 0.6334, + "step": 105000 + }, + { + "epoch": 0.06, + "learning_rate": 5.559907877510809e-05, + "loss": 0.653, + "step": 105500 + }, + { + "epoch": 0.06, + "learning_rate": 5.559697880954752e-05, + "loss": 0.629, + "step": 106000 + }, + { + "epoch": 0.06, + "learning_rate": 5.5594878843986954e-05, + "loss": 0.6343, + "step": 106500 + }, + { + "epoch": 0.06, + "learning_rate": 5.5592778878426394e-05, + "loss": 0.6432, + "step": 107000 + }, + { + "epoch": 0.06, + "learning_rate": 5.559067891286583e-05, + "loss": 0.6456, + "step": 107500 + }, + { + "epoch": 0.06, + "learning_rate": 5.558857894730526e-05, + "loss": 0.6396, + "step": 108000 + }, + { + "epoch": 0.07, + "learning_rate": 5.55864789817447e-05, + "loss": 0.6394, + "step": 108500 + }, + { + "epoch": 0.07, + "learning_rate": 5.5584379016184135e-05, + "loss": 0.6285, + "step": 109000 + }, + { + "epoch": 0.07, + "learning_rate": 5.558227905062357e-05, + "loss": 0.6253, + "step": 109500 + }, + { + "epoch": 0.07, + "learning_rate": 5.558017908506301e-05, + "loss": 0.6239, + "step": 110000 + }, + { + "epoch": 0.07, + "learning_rate": 5.5578079119502435e-05, + "loss": 0.6322, + "step": 110500 + }, + { + "epoch": 0.07, + "learning_rate": 5.5575983353872996e-05, + "loss": 0.628, + "step": 111000 + }, + { + "epoch": 0.07, + "learning_rate": 5.557388338831243e-05, + "loss": 0.6487, + "step": 111500 + }, + { + "epoch": 0.07, + "learning_rate": 5.557178342275187e-05, + "loss": 0.636, + "step": 112000 + }, + { + "epoch": 0.07, + "learning_rate": 5.55696834571913e-05, + "loss": 0.6543, + "step": 112500 + }, + { + "epoch": 0.07, + "learning_rate": 5.5567587691561856e-05, + "loss": 0.6227, + "step": 113000 + }, + { + "epoch": 0.07, + "learning_rate": 5.55654877260013e-05, + "loss": 0.6214, + "step": 113500 + }, + { + "epoch": 0.07, + "learning_rate": 5.556338776044073e-05, + "loss": 0.6391, + "step": 114000 + }, + { + "epoch": 0.07, + "learning_rate": 5.5561287794880164e-05, + "loss": 0.6198, + "step": 114500 + }, + { + "epoch": 0.07, + "learning_rate": 5.555919622918184e-05, + "loss": 0.625, + "step": 115000 + }, + { + "epoch": 0.07, + "learning_rate": 5.555709626362127e-05, + "loss": 0.6129, + "step": 115500 + }, + { + "epoch": 0.07, + "learning_rate": 5.555499629806071e-05, + "loss": 0.6249, + "step": 116000 + }, + { + "epoch": 0.07, + "learning_rate": 5.5552896332500144e-05, + "loss": 0.6214, + "step": 116500 + }, + { + "epoch": 0.07, + "learning_rate": 5.55508005668707e-05, + "loss": 0.6385, + "step": 117000 + }, + { + "epoch": 0.07, + "learning_rate": 5.554870060131014e-05, + "loss": 0.614, + "step": 117500 + }, + { + "epoch": 0.07, + "learning_rate": 5.554660063574957e-05, + "loss": 0.623, + "step": 118000 + }, + { + "epoch": 0.07, + "learning_rate": 5.554450487012013e-05, + "loss": 0.6309, + "step": 118500 + }, + { + "epoch": 0.07, + "learning_rate": 5.5542404904559566e-05, + "loss": 0.6279, + "step": 119000 + }, + { + "epoch": 0.07, + "learning_rate": 5.5540304938999e-05, + "loss": 0.6299, + "step": 119500 + }, + { + "epoch": 0.07, + "learning_rate": 5.553820497343843e-05, + "loss": 0.6167, + "step": 120000 + }, + { + "epoch": 0.07, + "learning_rate": 5.5536105007877866e-05, + "loss": 0.6138, + "step": 120500 + }, + { + "epoch": 0.07, + "learning_rate": 5.5534005042317306e-05, + "loss": 0.619, + "step": 121000 + }, + { + "epoch": 0.07, + "learning_rate": 5.553190507675674e-05, + "loss": 0.617, + "step": 121500 + }, + { + "epoch": 0.07, + "learning_rate": 5.552980511119617e-05, + "loss": 0.6174, + "step": 122000 + }, + { + "epoch": 0.07, + "learning_rate": 5.552770514563561e-05, + "loss": 0.6095, + "step": 122500 + }, + { + "epoch": 0.07, + "learning_rate": 5.552560518007505e-05, + "loss": 0.6124, + "step": 123000 + }, + { + "epoch": 0.07, + "learning_rate": 5.552350521451448e-05, + "loss": 0.6111, + "step": 123500 + }, + { + "epoch": 0.07, + "learning_rate": 5.552140524895392e-05, + "loss": 0.6188, + "step": 124000 + }, + { + "epoch": 0.07, + "learning_rate": 5.5519309483324474e-05, + "loss": 0.6072, + "step": 124500 + }, + { + "epoch": 0.07, + "learning_rate": 5.551720951776391e-05, + "loss": 0.6157, + "step": 125000 + }, + { + "epoch": 0.08, + "learning_rate": 5.551510955220334e-05, + "loss": 0.6249, + "step": 125500 + }, + { + "epoch": 0.08, + "learning_rate": 5.551300958664278e-05, + "loss": 0.6178, + "step": 126000 + }, + { + "epoch": 0.08, + "learning_rate": 5.5510913821013335e-05, + "loss": 0.5944, + "step": 126500 + }, + { + "epoch": 0.08, + "learning_rate": 5.550881805538389e-05, + "loss": 0.6196, + "step": 127000 + }, + { + "epoch": 0.08, + "learning_rate": 5.550671808982332e-05, + "loss": 0.6001, + "step": 127500 + }, + { + "epoch": 0.08, + "learning_rate": 5.550461812426276e-05, + "loss": 0.6155, + "step": 128000 + }, + { + "epoch": 0.08, + "learning_rate": 5.5502518158702196e-05, + "loss": 0.6237, + "step": 128500 + }, + { + "epoch": 0.08, + "learning_rate": 5.550041819314163e-05, + "loss": 0.614, + "step": 129000 + }, + { + "epoch": 0.08, + "learning_rate": 5.549831822758107e-05, + "loss": 0.6185, + "step": 129500 + }, + { + "epoch": 0.08, + "learning_rate": 5.54962182620205e-05, + "loss": 0.6114, + "step": 130000 + }, + { + "epoch": 0.08, + "learning_rate": 5.5494118296459936e-05, + "loss": 0.6114, + "step": 130500 + }, + { + "epoch": 0.08, + "learning_rate": 5.549202253083049e-05, + "loss": 0.6071, + "step": 131000 + }, + { + "epoch": 0.08, + "learning_rate": 5.548992256526993e-05, + "loss": 0.6046, + "step": 131500 + }, + { + "epoch": 0.08, + "learning_rate": 5.5487822599709363e-05, + "loss": 0.5886, + "step": 132000 + }, + { + "epoch": 0.08, + "learning_rate": 5.54857226341488e-05, + "loss": 0.6095, + "step": 132500 + }, + { + "epoch": 0.08, + "learning_rate": 5.548362686851936e-05, + "loss": 0.6063, + "step": 133000 + }, + { + "epoch": 0.08, + "learning_rate": 5.548152690295879e-05, + "loss": 0.6083, + "step": 133500 + }, + { + "epoch": 0.08, + "learning_rate": 5.5479426937398224e-05, + "loss": 0.6127, + "step": 134000 + }, + { + "epoch": 0.08, + "learning_rate": 5.5477326971837664e-05, + "loss": 0.5944, + "step": 134500 + }, + { + "epoch": 0.08, + "learning_rate": 5.547523120620822e-05, + "loss": 0.6011, + "step": 135000 + }, + { + "epoch": 0.08, + "learning_rate": 5.547313124064765e-05, + "loss": 0.6024, + "step": 135500 + }, + { + "epoch": 0.08, + "learning_rate": 5.5471031275087085e-05, + "loss": 0.5883, + "step": 136000 + }, + { + "epoch": 0.08, + "learning_rate": 5.5468931309526525e-05, + "loss": 0.6046, + "step": 136500 + }, + { + "epoch": 0.08, + "learning_rate": 5.546683554389708e-05, + "loss": 0.6, + "step": 137000 + }, + { + "epoch": 0.08, + "learning_rate": 5.546473977826764e-05, + "loss": 0.6035, + "step": 137500 + }, + { + "epoch": 0.08, + "learning_rate": 5.546263981270707e-05, + "loss": 0.5914, + "step": 138000 + }, + { + "epoch": 0.08, + "learning_rate": 5.5460539847146506e-05, + "loss": 0.5996, + "step": 138500 + }, + { + "epoch": 0.08, + "learning_rate": 5.545843988158594e-05, + "loss": 0.5983, + "step": 139000 + }, + { + "epoch": 0.08, + "learning_rate": 5.545633991602537e-05, + "loss": 0.6085, + "step": 139500 + }, + { + "epoch": 0.08, + "learning_rate": 5.545423995046481e-05, + "loss": 0.6016, + "step": 140000 + }, + { + "epoch": 0.08, + "learning_rate": 5.5452139984904247e-05, + "loss": 0.6105, + "step": 140500 + }, + { + "epoch": 0.08, + "learning_rate": 5.545004001934368e-05, + "loss": 0.586, + "step": 141000 + }, + { + "epoch": 0.08, + "learning_rate": 5.5447944253714234e-05, + "loss": 0.5941, + "step": 141500 + }, + { + "epoch": 0.09, + "learning_rate": 5.5445844288153674e-05, + "loss": 0.5863, + "step": 142000 + }, + { + "epoch": 0.09, + "learning_rate": 5.544374432259311e-05, + "loss": 0.5876, + "step": 142500 + }, + { + "epoch": 0.09, + "learning_rate": 5.544164855696367e-05, + "loss": 0.5826, + "step": 143000 + }, + { + "epoch": 0.09, + "learning_rate": 5.5439548591403094e-05, + "loss": 0.6028, + "step": 143500 + }, + { + "epoch": 0.09, + "learning_rate": 5.5437448625842535e-05, + "loss": 0.5828, + "step": 144000 + }, + { + "epoch": 0.09, + "learning_rate": 5.543534866028197e-05, + "loss": 0.593, + "step": 144500 + }, + { + "epoch": 0.09, + "learning_rate": 5.54332486947214e-05, + "loss": 0.603, + "step": 145000 + }, + { + "epoch": 0.09, + "learning_rate": 5.543115292909196e-05, + "loss": 0.5927, + "step": 145500 + }, + { + "epoch": 0.09, + "learning_rate": 5.5429052963531395e-05, + "loss": 0.586, + "step": 146000 + }, + { + "epoch": 0.09, + "learning_rate": 5.542695299797083e-05, + "loss": 0.5799, + "step": 146500 + }, + { + "epoch": 0.09, + "learning_rate": 5.542485303241027e-05, + "loss": 0.5945, + "step": 147000 + }, + { + "epoch": 0.09, + "learning_rate": 5.54227530668497e-05, + "loss": 0.5945, + "step": 147500 + }, + { + "epoch": 0.09, + "learning_rate": 5.5420653101289136e-05, + "loss": 0.5845, + "step": 148000 + }, + { + "epoch": 0.09, + "learning_rate": 5.5418553135728576e-05, + "loss": 0.5927, + "step": 148500 + }, + { + "epoch": 0.09, + "learning_rate": 5.541645317016801e-05, + "loss": 0.5795, + "step": 149000 + }, + { + "epoch": 0.09, + "learning_rate": 5.541435740453856e-05, + "loss": 0.5943, + "step": 149500 + }, + { + "epoch": 0.09, + "learning_rate": 5.5412257438978e-05, + "loss": 0.5831, + "step": 150000 + }, + { + "epoch": 0.09, + "learning_rate": 5.541016167334855e-05, + "loss": 0.5892, + "step": 150500 + }, + { + "epoch": 0.09, + "learning_rate": 5.540806170778799e-05, + "loss": 0.5704, + "step": 151000 + }, + { + "epoch": 0.09, + "learning_rate": 5.5405961742227424e-05, + "loss": 0.5815, + "step": 151500 + }, + { + "epoch": 0.09, + "learning_rate": 5.540386177666686e-05, + "loss": 0.5885, + "step": 152000 + }, + { + "epoch": 0.09, + "learning_rate": 5.54017618111063e-05, + "loss": 0.5842, + "step": 152500 + }, + { + "epoch": 0.09, + "learning_rate": 5.539966184554573e-05, + "loss": 0.5934, + "step": 153000 + }, + { + "epoch": 0.09, + "learning_rate": 5.539756187998517e-05, + "loss": 0.5923, + "step": 153500 + }, + { + "epoch": 0.09, + "learning_rate": 5.5395466114355725e-05, + "loss": 0.5913, + "step": 154000 + }, + { + "epoch": 0.09, + "learning_rate": 5.539336614879516e-05, + "loss": 0.5814, + "step": 154500 + }, + { + "epoch": 0.09, + "learning_rate": 5.539126618323459e-05, + "loss": 0.5746, + "step": 155000 + }, + { + "epoch": 0.09, + "learning_rate": 5.538916621767403e-05, + "loss": 0.5825, + "step": 155500 + }, + { + "epoch": 0.09, + "learning_rate": 5.5387066252113465e-05, + "loss": 0.5735, + "step": 156000 + }, + { + "epoch": 0.09, + "learning_rate": 5.53849662865529e-05, + "loss": 0.5981, + "step": 156500 + }, + { + "epoch": 0.09, + "learning_rate": 5.538286632099234e-05, + "loss": 0.577, + "step": 157000 + }, + { + "epoch": 0.09, + "learning_rate": 5.538076635543177e-05, + "loss": 0.5785, + "step": 157500 + }, + { + "epoch": 0.09, + "learning_rate": 5.5378670589802326e-05, + "loss": 0.5823, + "step": 158000 + }, + { + "epoch": 0.1, + "learning_rate": 5.537657062424176e-05, + "loss": 0.585, + "step": 158500 + }, + { + "epoch": 0.1, + "learning_rate": 5.53744706586812e-05, + "loss": 0.5801, + "step": 159000 + }, + { + "epoch": 0.1, + "learning_rate": 5.537237069312063e-05, + "loss": 0.5874, + "step": 159500 + }, + { + "epoch": 0.1, + "learning_rate": 5.537027072756007e-05, + "loss": 0.5864, + "step": 160000 + }, + { + "epoch": 0.1, + "learning_rate": 5.536817076199951e-05, + "loss": 0.5837, + "step": 160500 + }, + { + "epoch": 0.1, + "learning_rate": 5.5366070796438934e-05, + "loss": 0.575, + "step": 161000 + }, + { + "epoch": 0.1, + "learning_rate": 5.5363970830878374e-05, + "loss": 0.5878, + "step": 161500 + }, + { + "epoch": 0.1, + "learning_rate": 5.5361875065248934e-05, + "loss": 0.5764, + "step": 162000 + }, + { + "epoch": 0.1, + "learning_rate": 5.535977509968837e-05, + "loss": 0.578, + "step": 162500 + }, + { + "epoch": 0.1, + "learning_rate": 5.5357675134127794e-05, + "loss": 0.577, + "step": 163000 + }, + { + "epoch": 0.1, + "learning_rate": 5.5355575168567235e-05, + "loss": 0.589, + "step": 163500 + }, + { + "epoch": 0.1, + "learning_rate": 5.5353479402937795e-05, + "loss": 0.5692, + "step": 164000 + }, + { + "epoch": 0.1, + "learning_rate": 5.535137943737723e-05, + "loss": 0.5749, + "step": 164500 + }, + { + "epoch": 0.1, + "learning_rate": 5.534927947181666e-05, + "loss": 0.5844, + "step": 165000 + }, + { + "epoch": 0.1, + "learning_rate": 5.5347179506256095e-05, + "loss": 0.5632, + "step": 165500 + }, + { + "epoch": 0.1, + "learning_rate": 5.534507954069553e-05, + "loss": 0.5652, + "step": 166000 + }, + { + "epoch": 0.1, + "learning_rate": 5.534297957513496e-05, + "loss": 0.5785, + "step": 166500 + }, + { + "epoch": 0.1, + "learning_rate": 5.53408796095744e-05, + "loss": 0.5914, + "step": 167000 + }, + { + "epoch": 0.1, + "learning_rate": 5.5338779644013836e-05, + "loss": 0.5692, + "step": 167500 + }, + { + "epoch": 0.1, + "learning_rate": 5.533668387838439e-05, + "loss": 0.5671, + "step": 168000 + }, + { + "epoch": 0.1, + "learning_rate": 5.533458391282383e-05, + "loss": 0.5776, + "step": 168500 + }, + { + "epoch": 0.1, + "learning_rate": 5.533248394726326e-05, + "loss": 0.5768, + "step": 169000 + }, + { + "epoch": 0.1, + "learning_rate": 5.53303839817027e-05, + "loss": 0.5734, + "step": 169500 + }, + { + "epoch": 0.1, + "learning_rate": 5.532828821607326e-05, + "loss": 0.5654, + "step": 170000 + }, + { + "epoch": 0.1, + "learning_rate": 5.532618825051269e-05, + "loss": 0.5818, + "step": 170500 + }, + { + "epoch": 0.1, + "learning_rate": 5.5324088284952124e-05, + "loss": 0.5662, + "step": 171000 + }, + { + "epoch": 0.1, + "learning_rate": 5.532198831939156e-05, + "loss": 0.5571, + "step": 171500 + }, + { + "epoch": 0.1, + "learning_rate": 5.5319888353831e-05, + "loss": 0.5798, + "step": 172000 + }, + { + "epoch": 0.1, + "learning_rate": 5.531778838827043e-05, + "loss": 0.575, + "step": 172500 + }, + { + "epoch": 0.1, + "learning_rate": 5.5315688422709865e-05, + "loss": 0.5658, + "step": 173000 + }, + { + "epoch": 0.1, + "learning_rate": 5.5313588457149305e-05, + "loss": 0.5768, + "step": 173500 + }, + { + "epoch": 0.1, + "learning_rate": 5.531149269151986e-05, + "loss": 0.5787, + "step": 174000 + }, + { + "epoch": 0.1, + "learning_rate": 5.530939272595929e-05, + "loss": 0.569, + "step": 174500 + }, + { + "epoch": 0.1, + "learning_rate": 5.5307292760398725e-05, + "loss": 0.5707, + "step": 175000 + }, + { + "epoch": 0.11, + "learning_rate": 5.5305196994769286e-05, + "loss": 0.5553, + "step": 175500 + }, + { + "epoch": 0.11, + "learning_rate": 5.530309702920872e-05, + "loss": 0.5654, + "step": 176000 + }, + { + "epoch": 0.11, + "learning_rate": 5.530099706364815e-05, + "loss": 0.5799, + "step": 176500 + }, + { + "epoch": 0.11, + "learning_rate": 5.529889709808759e-05, + "loss": 0.562, + "step": 177000 + }, + { + "epoch": 0.11, + "learning_rate": 5.5296797132527026e-05, + "loss": 0.5681, + "step": 177500 + }, + { + "epoch": 0.11, + "learning_rate": 5.529469716696646e-05, + "loss": 0.5594, + "step": 178000 + }, + { + "epoch": 0.11, + "learning_rate": 5.52925972014059e-05, + "loss": 0.5675, + "step": 178500 + }, + { + "epoch": 0.11, + "learning_rate": 5.5290497235845333e-05, + "loss": 0.5614, + "step": 179000 + }, + { + "epoch": 0.11, + "learning_rate": 5.528840147021589e-05, + "loss": 0.572, + "step": 179500 + }, + { + "epoch": 0.11, + "learning_rate": 5.528630150465532e-05, + "loss": 0.5761, + "step": 180000 + }, + { + "epoch": 0.11, + "learning_rate": 5.528420153909476e-05, + "loss": 0.564, + "step": 180500 + }, + { + "epoch": 0.11, + "learning_rate": 5.5282101573534194e-05, + "loss": 0.5656, + "step": 181000 + }, + { + "epoch": 0.11, + "learning_rate": 5.528000580790475e-05, + "loss": 0.5641, + "step": 181500 + }, + { + "epoch": 0.11, + "learning_rate": 5.527790584234418e-05, + "loss": 0.55, + "step": 182000 + }, + { + "epoch": 0.11, + "learning_rate": 5.527580587678362e-05, + "loss": 0.563, + "step": 182500 + }, + { + "epoch": 0.11, + "learning_rate": 5.5273705911223055e-05, + "loss": 0.5729, + "step": 183000 + }, + { + "epoch": 0.11, + "learning_rate": 5.527161014559361e-05, + "loss": 0.5712, + "step": 183500 + }, + { + "epoch": 0.11, + "learning_rate": 5.526951018003305e-05, + "loss": 0.564, + "step": 184000 + }, + { + "epoch": 0.11, + "learning_rate": 5.526741021447248e-05, + "loss": 0.5646, + "step": 184500 + }, + { + "epoch": 0.11, + "learning_rate": 5.5265314448843036e-05, + "loss": 0.5621, + "step": 185000 + }, + { + "epoch": 0.11, + "learning_rate": 5.526321448328247e-05, + "loss": 0.5797, + "step": 185500 + }, + { + "epoch": 0.11, + "learning_rate": 5.526111451772191e-05, + "loss": 0.5627, + "step": 186000 + }, + { + "epoch": 0.11, + "learning_rate": 5.525901455216134e-05, + "loss": 0.5543, + "step": 186500 + }, + { + "epoch": 0.11, + "learning_rate": 5.5256914586600776e-05, + "loss": 0.5549, + "step": 187000 + }, + { + "epoch": 0.11, + "learning_rate": 5.525481462104022e-05, + "loss": 0.561, + "step": 187500 + }, + { + "epoch": 0.11, + "learning_rate": 5.525271465547965e-05, + "loss": 0.5621, + "step": 188000 + }, + { + "epoch": 0.11, + "learning_rate": 5.5250614689919084e-05, + "loss": 0.5659, + "step": 188500 + }, + { + "epoch": 0.11, + "learning_rate": 5.524851892428964e-05, + "loss": 0.5729, + "step": 189000 + }, + { + "epoch": 0.11, + "learning_rate": 5.524641895872908e-05, + "loss": 0.5564, + "step": 189500 + }, + { + "epoch": 0.11, + "learning_rate": 5.524431899316851e-05, + "loss": 0.5502, + "step": 190000 + }, + { + "epoch": 0.11, + "learning_rate": 5.5242219027607944e-05, + "loss": 0.5512, + "step": 190500 + }, + { + "epoch": 0.11, + "learning_rate": 5.5240123261978505e-05, + "loss": 0.573, + "step": 191000 + }, + { + "epoch": 0.11, + "learning_rate": 5.523802329641794e-05, + "loss": 0.5524, + "step": 191500 + }, + { + "epoch": 0.12, + "learning_rate": 5.523592333085737e-05, + "loss": 0.5603, + "step": 192000 + }, + { + "epoch": 0.12, + "learning_rate": 5.523382336529681e-05, + "loss": 0.5545, + "step": 192500 + }, + { + "epoch": 0.12, + "learning_rate": 5.5231723399736245e-05, + "loss": 0.5635, + "step": 193000 + }, + { + "epoch": 0.12, + "learning_rate": 5.522962343417568e-05, + "loss": 0.5609, + "step": 193500 + }, + { + "epoch": 0.12, + "learning_rate": 5.522752346861512e-05, + "loss": 0.5582, + "step": 194000 + }, + { + "epoch": 0.12, + "learning_rate": 5.522542770298567e-05, + "loss": 0.5688, + "step": 194500 + }, + { + "epoch": 0.12, + "learning_rate": 5.5223331937356226e-05, + "loss": 0.558, + "step": 195000 + }, + { + "epoch": 0.12, + "learning_rate": 5.522123197179566e-05, + "loss": 0.5551, + "step": 195500 + }, + { + "epoch": 0.12, + "learning_rate": 5.521913200623509e-05, + "loss": 0.5647, + "step": 196000 + }, + { + "epoch": 0.12, + "learning_rate": 5.521703204067453e-05, + "loss": 0.5512, + "step": 196500 + }, + { + "epoch": 0.12, + "learning_rate": 5.521493207511397e-05, + "loss": 0.5618, + "step": 197000 + }, + { + "epoch": 0.12, + "learning_rate": 5.52128321095534e-05, + "loss": 0.5632, + "step": 197500 + }, + { + "epoch": 0.12, + "learning_rate": 5.521073214399284e-05, + "loss": 0.5519, + "step": 198000 + }, + { + "epoch": 0.12, + "learning_rate": 5.5208632178432274e-05, + "loss": 0.5499, + "step": 198500 + }, + { + "epoch": 0.12, + "learning_rate": 5.5206532212871714e-05, + "loss": 0.5545, + "step": 199000 + }, + { + "epoch": 0.12, + "learning_rate": 5.520443224731114e-05, + "loss": 0.5632, + "step": 199500 + }, + { + "epoch": 0.12, + "learning_rate": 5.5202332281750574e-05, + "loss": 0.5529, + "step": 200000 + }, + { + "epoch": 0.12, + "eval_loss": 0.5203812718391418, + "eval_runtime": 1102.9266, + "eval_samples_per_second": 477.566, + "eval_steps_per_second": 79.595, + "step": 200000 + }, + { + "epoch": 0.12, + "learning_rate": 5.5200232316190014e-05, + "loss": 0.5682, + "step": 200500 + }, + { + "epoch": 0.12, + "learning_rate": 5.519813235062945e-05, + "loss": 0.552, + "step": 201000 + }, + { + "epoch": 0.12, + "learning_rate": 5.519603658500001e-05, + "loss": 0.5572, + "step": 201500 + }, + { + "epoch": 0.12, + "learning_rate": 5.5193936619439435e-05, + "loss": 0.5527, + "step": 202000 + }, + { + "epoch": 0.12, + "learning_rate": 5.5191836653878875e-05, + "loss": 0.558, + "step": 202500 + }, + { + "epoch": 0.12, + "learning_rate": 5.518973668831831e-05, + "loss": 0.5685, + "step": 203000 + }, + { + "epoch": 0.12, + "learning_rate": 5.518764092268887e-05, + "loss": 0.552, + "step": 203500 + }, + { + "epoch": 0.12, + "learning_rate": 5.51855409571283e-05, + "loss": 0.5501, + "step": 204000 + }, + { + "epoch": 0.12, + "learning_rate": 5.5183440991567736e-05, + "loss": 0.5613, + "step": 204500 + }, + { + "epoch": 0.12, + "learning_rate": 5.518134102600717e-05, + "loss": 0.5478, + "step": 205000 + }, + { + "epoch": 0.12, + "learning_rate": 5.517924526037773e-05, + "loss": 0.5551, + "step": 205500 + }, + { + "epoch": 0.12, + "learning_rate": 5.517714529481717e-05, + "loss": 0.562, + "step": 206000 + }, + { + "epoch": 0.12, + "learning_rate": 5.5175049529187724e-05, + "loss": 0.5332, + "step": 206500 + }, + { + "epoch": 0.12, + "learning_rate": 5.517295376355828e-05, + "loss": 0.5469, + "step": 207000 + }, + { + "epoch": 0.12, + "learning_rate": 5.517085379799771e-05, + "loss": 0.5506, + "step": 207500 + }, + { + "epoch": 0.12, + "learning_rate": 5.5168753832437144e-05, + "loss": 0.553, + "step": 208000 + }, + { + "epoch": 0.13, + "learning_rate": 5.5166653866876584e-05, + "loss": 0.5563, + "step": 208500 + }, + { + "epoch": 0.13, + "learning_rate": 5.516455810124714e-05, + "loss": 0.5437, + "step": 209000 + }, + { + "epoch": 0.13, + "learning_rate": 5.516245813568657e-05, + "loss": 0.5451, + "step": 209500 + }, + { + "epoch": 0.13, + "learning_rate": 5.5160358170126005e-05, + "loss": 0.5472, + "step": 210000 + }, + { + "epoch": 0.13, + "learning_rate": 5.5158258204565445e-05, + "loss": 0.5546, + "step": 210500 + }, + { + "epoch": 0.13, + "learning_rate": 5.515615823900488e-05, + "loss": 0.5433, + "step": 211000 + }, + { + "epoch": 0.13, + "learning_rate": 5.515405827344431e-05, + "loss": 0.5362, + "step": 211500 + }, + { + "epoch": 0.13, + "learning_rate": 5.515195830788375e-05, + "loss": 0.5573, + "step": 212000 + }, + { + "epoch": 0.13, + "learning_rate": 5.5149858342323186e-05, + "loss": 0.5527, + "step": 212500 + }, + { + "epoch": 0.13, + "learning_rate": 5.5147758376762626e-05, + "loss": 0.5508, + "step": 213000 + }, + { + "epoch": 0.13, + "learning_rate": 5.514565841120206e-05, + "loss": 0.5515, + "step": 213500 + }, + { + "epoch": 0.13, + "learning_rate": 5.5143558445641486e-05, + "loss": 0.5471, + "step": 214000 + }, + { + "epoch": 0.13, + "learning_rate": 5.5141458480080926e-05, + "loss": 0.544, + "step": 214500 + }, + { + "epoch": 0.13, + "learning_rate": 5.5139362714451487e-05, + "loss": 0.5456, + "step": 215000 + }, + { + "epoch": 0.13, + "learning_rate": 5.513726274889092e-05, + "loss": 0.5522, + "step": 215500 + }, + { + "epoch": 0.13, + "learning_rate": 5.5135162783330353e-05, + "loss": 0.5364, + "step": 216000 + }, + { + "epoch": 0.13, + "learning_rate": 5.513306281776979e-05, + "loss": 0.5532, + "step": 216500 + }, + { + "epoch": 0.13, + "learning_rate": 5.513096705214035e-05, + "loss": 0.5588, + "step": 217000 + }, + { + "epoch": 0.13, + "learning_rate": 5.512886708657978e-05, + "loss": 0.5398, + "step": 217500 + }, + { + "epoch": 0.13, + "learning_rate": 5.5126767121019214e-05, + "loss": 0.5508, + "step": 218000 + }, + { + "epoch": 0.13, + "learning_rate": 5.512466715545865e-05, + "loss": 0.5418, + "step": 218500 + }, + { + "epoch": 0.13, + "learning_rate": 5.512257138982921e-05, + "loss": 0.5413, + "step": 219000 + }, + { + "epoch": 0.13, + "learning_rate": 5.512047142426864e-05, + "loss": 0.5562, + "step": 219500 + }, + { + "epoch": 0.13, + "learning_rate": 5.511837145870808e-05, + "loss": 0.5446, + "step": 220000 + }, + { + "epoch": 0.13, + "learning_rate": 5.5116271493147515e-05, + "loss": 0.5451, + "step": 220500 + }, + { + "epoch": 0.13, + "learning_rate": 5.511417152758694e-05, + "loss": 0.5441, + "step": 221000 + }, + { + "epoch": 0.13, + "learning_rate": 5.51120757619575e-05, + "loss": 0.54, + "step": 221500 + }, + { + "epoch": 0.13, + "learning_rate": 5.510997579639694e-05, + "loss": 0.5524, + "step": 222000 + }, + { + "epoch": 0.13, + "learning_rate": 5.5107875830836376e-05, + "loss": 0.5398, + "step": 222500 + }, + { + "epoch": 0.13, + "learning_rate": 5.510577586527581e-05, + "loss": 0.556, + "step": 223000 + }, + { + "epoch": 0.13, + "learning_rate": 5.510368009964636e-05, + "loss": 0.5415, + "step": 223500 + }, + { + "epoch": 0.13, + "learning_rate": 5.5101584334016917e-05, + "loss": 0.5344, + "step": 224000 + }, + { + "epoch": 0.13, + "learning_rate": 5.509948436845636e-05, + "loss": 0.5388, + "step": 224500 + }, + { + "epoch": 0.13, + "learning_rate": 5.509738440289579e-05, + "loss": 0.5386, + "step": 225000 + }, + { + "epoch": 0.14, + "learning_rate": 5.509528443733523e-05, + "loss": 0.5383, + "step": 225500 + }, + { + "epoch": 0.14, + "learning_rate": 5.5093184471774664e-05, + "loss": 0.5424, + "step": 226000 + }, + { + "epoch": 0.14, + "learning_rate": 5.50910845062141e-05, + "loss": 0.5463, + "step": 226500 + }, + { + "epoch": 0.14, + "learning_rate": 5.508898454065354e-05, + "loss": 0.5426, + "step": 227000 + }, + { + "epoch": 0.14, + "learning_rate": 5.508688457509297e-05, + "loss": 0.5406, + "step": 227500 + }, + { + "epoch": 0.14, + "learning_rate": 5.5084788809463525e-05, + "loss": 0.5405, + "step": 228000 + }, + { + "epoch": 0.14, + "learning_rate": 5.508268884390296e-05, + "loss": 0.5319, + "step": 228500 + }, + { + "epoch": 0.14, + "learning_rate": 5.50805888783424e-05, + "loss": 0.5378, + "step": 229000 + }, + { + "epoch": 0.14, + "learning_rate": 5.507848891278183e-05, + "loss": 0.5393, + "step": 229500 + }, + { + "epoch": 0.14, + "learning_rate": 5.5076393147152385e-05, + "loss": 0.5587, + "step": 230000 + }, + { + "epoch": 0.14, + "learning_rate": 5.507429318159182e-05, + "loss": 0.545, + "step": 230500 + }, + { + "epoch": 0.14, + "learning_rate": 5.507219321603126e-05, + "loss": 0.5439, + "step": 231000 + }, + { + "epoch": 0.14, + "learning_rate": 5.507009325047069e-05, + "loss": 0.5283, + "step": 231500 + }, + { + "epoch": 0.14, + "learning_rate": 5.5067997484841246e-05, + "loss": 0.538, + "step": 232000 + }, + { + "epoch": 0.14, + "learning_rate": 5.5065897519280686e-05, + "loss": 0.5309, + "step": 232500 + }, + { + "epoch": 0.14, + "learning_rate": 5.506379755372012e-05, + "loss": 0.5331, + "step": 233000 + }, + { + "epoch": 0.14, + "learning_rate": 5.506169758815955e-05, + "loss": 0.5485, + "step": 233500 + }, + { + "epoch": 0.14, + "learning_rate": 5.505960182253011e-05, + "loss": 0.5486, + "step": 234000 + }, + { + "epoch": 0.14, + "learning_rate": 5.505750185696955e-05, + "loss": 0.5277, + "step": 234500 + }, + { + "epoch": 0.14, + "learning_rate": 5.505540189140898e-05, + "loss": 0.5366, + "step": 235000 + }, + { + "epoch": 0.14, + "learning_rate": 5.5053301925848414e-05, + "loss": 0.5368, + "step": 235500 + }, + { + "epoch": 0.14, + "learning_rate": 5.5051201960287854e-05, + "loss": 0.5366, + "step": 236000 + }, + { + "epoch": 0.14, + "learning_rate": 5.504910619465841e-05, + "loss": 0.5314, + "step": 236500 + }, + { + "epoch": 0.14, + "learning_rate": 5.504700622909784e-05, + "loss": 0.5275, + "step": 237000 + }, + { + "epoch": 0.14, + "learning_rate": 5.5044910463468395e-05, + "loss": 0.5336, + "step": 237500 + }, + { + "epoch": 0.14, + "learning_rate": 5.5042810497907835e-05, + "loss": 0.5381, + "step": 238000 + }, + { + "epoch": 0.14, + "learning_rate": 5.504071053234727e-05, + "loss": 0.5449, + "step": 238500 + }, + { + "epoch": 0.14, + "learning_rate": 5.50386105667867e-05, + "loss": 0.5279, + "step": 239000 + }, + { + "epoch": 0.14, + "learning_rate": 5.503651060122614e-05, + "loss": 0.5159, + "step": 239500 + }, + { + "epoch": 0.14, + "learning_rate": 5.5034410635665576e-05, + "loss": 0.5428, + "step": 240000 + }, + { + "epoch": 0.14, + "learning_rate": 5.503231067010501e-05, + "loss": 0.5263, + "step": 240500 + }, + { + "epoch": 0.14, + "learning_rate": 5.503021070454445e-05, + "loss": 0.5401, + "step": 241000 + }, + { + "epoch": 0.14, + "learning_rate": 5.5028114938915e-05, + "loss": 0.5342, + "step": 241500 + }, + { + "epoch": 0.15, + "learning_rate": 5.5026014973354436e-05, + "loss": 0.5334, + "step": 242000 + }, + { + "epoch": 0.15, + "learning_rate": 5.502391500779387e-05, + "loss": 0.5385, + "step": 242500 + }, + { + "epoch": 0.15, + "learning_rate": 5.502181504223331e-05, + "loss": 0.5353, + "step": 243000 + }, + { + "epoch": 0.15, + "learning_rate": 5.5019719276603864e-05, + "loss": 0.533, + "step": 243500 + }, + { + "epoch": 0.15, + "learning_rate": 5.50176193110433e-05, + "loss": 0.5379, + "step": 244000 + }, + { + "epoch": 0.15, + "learning_rate": 5.501551934548273e-05, + "loss": 0.5242, + "step": 244500 + }, + { + "epoch": 0.15, + "learning_rate": 5.501341937992217e-05, + "loss": 0.5278, + "step": 245000 + }, + { + "epoch": 0.15, + "learning_rate": 5.5011323614292724e-05, + "loss": 0.5375, + "step": 245500 + }, + { + "epoch": 0.15, + "learning_rate": 5.500922364873216e-05, + "loss": 0.5347, + "step": 246000 + }, + { + "epoch": 0.15, + "learning_rate": 5.500712788310271e-05, + "loss": 0.5258, + "step": 246500 + }, + { + "epoch": 0.15, + "learning_rate": 5.500502791754215e-05, + "loss": 0.5321, + "step": 247000 + }, + { + "epoch": 0.15, + "learning_rate": 5.5002927951981585e-05, + "loss": 0.5381, + "step": 247500 + }, + { + "epoch": 0.15, + "learning_rate": 5.500082798642102e-05, + "loss": 0.5345, + "step": 248000 + }, + { + "epoch": 0.15, + "learning_rate": 5.499872802086046e-05, + "loss": 0.5367, + "step": 248500 + }, + { + "epoch": 0.15, + "learning_rate": 5.499662805529989e-05, + "loss": 0.54, + "step": 249000 + }, + { + "epoch": 0.15, + "learning_rate": 5.4994528089739326e-05, + "loss": 0.5262, + "step": 249500 + }, + { + "epoch": 0.15, + "learning_rate": 5.499243232410988e-05, + "loss": 0.5303, + "step": 250000 + }, + { + "epoch": 0.15, + "learning_rate": 5.499033235854932e-05, + "loss": 0.5282, + "step": 250500 + }, + { + "epoch": 0.15, + "learning_rate": 5.498823239298875e-05, + "loss": 0.5223, + "step": 251000 + }, + { + "epoch": 0.15, + "learning_rate": 5.4986136627359307e-05, + "loss": 0.5503, + "step": 251500 + }, + { + "epoch": 0.15, + "learning_rate": 5.498403666179875e-05, + "loss": 0.5315, + "step": 252000 + }, + { + "epoch": 0.15, + "learning_rate": 5.498193669623818e-05, + "loss": 0.5339, + "step": 252500 + }, + { + "epoch": 0.15, + "learning_rate": 5.4979836730677614e-05, + "loss": 0.5316, + "step": 253000 + }, + { + "epoch": 0.15, + "learning_rate": 5.4977736765117054e-05, + "loss": 0.5194, + "step": 253500 + }, + { + "epoch": 0.15, + "learning_rate": 5.497563679955649e-05, + "loss": 0.5397, + "step": 254000 + }, + { + "epoch": 0.15, + "learning_rate": 5.497353683399592e-05, + "loss": 0.521, + "step": 254500 + }, + { + "epoch": 0.15, + "learning_rate": 5.497143686843536e-05, + "loss": 0.5335, + "step": 255000 + }, + { + "epoch": 0.15, + "learning_rate": 5.4969336902874795e-05, + "loss": 0.5279, + "step": 255500 + }, + { + "epoch": 0.15, + "learning_rate": 5.496723693731423e-05, + "loss": 0.5201, + "step": 256000 + }, + { + "epoch": 0.15, + "learning_rate": 5.496513697175367e-05, + "loss": 0.5292, + "step": 256500 + }, + { + "epoch": 0.15, + "learning_rate": 5.4963037006193095e-05, + "loss": 0.5234, + "step": 257000 + }, + { + "epoch": 0.15, + "learning_rate": 5.4960941240563655e-05, + "loss": 0.5251, + "step": 257500 + }, + { + "epoch": 0.15, + "learning_rate": 5.495884127500309e-05, + "loss": 0.5207, + "step": 258000 + }, + { + "epoch": 0.15, + "learning_rate": 5.495674130944253e-05, + "loss": 0.5232, + "step": 258500 + }, + { + "epoch": 0.16, + "learning_rate": 5.495464134388196e-05, + "loss": 0.5224, + "step": 259000 + }, + { + "epoch": 0.16, + "learning_rate": 5.4952545578252516e-05, + "loss": 0.515, + "step": 259500 + }, + { + "epoch": 0.16, + "learning_rate": 5.495044561269195e-05, + "loss": 0.5319, + "step": 260000 + }, + { + "epoch": 0.16, + "learning_rate": 5.494834564713139e-05, + "loss": 0.5267, + "step": 260500 + }, + { + "epoch": 0.16, + "learning_rate": 5.494624568157082e-05, + "loss": 0.5378, + "step": 261000 + }, + { + "epoch": 0.16, + "learning_rate": 5.494414991594138e-05, + "loss": 0.5316, + "step": 261500 + }, + { + "epoch": 0.16, + "learning_rate": 5.494204995038082e-05, + "loss": 0.5228, + "step": 262000 + }, + { + "epoch": 0.16, + "learning_rate": 5.493995418475137e-05, + "loss": 0.5419, + "step": 262500 + }, + { + "epoch": 0.16, + "learning_rate": 5.4937854219190804e-05, + "loss": 0.5258, + "step": 263000 + }, + { + "epoch": 0.16, + "learning_rate": 5.493575425363024e-05, + "loss": 0.529, + "step": 263500 + }, + { + "epoch": 0.16, + "learning_rate": 5.493365428806968e-05, + "loss": 0.523, + "step": 264000 + }, + { + "epoch": 0.16, + "learning_rate": 5.493155432250911e-05, + "loss": 0.5303, + "step": 264500 + }, + { + "epoch": 0.16, + "learning_rate": 5.4929454356948545e-05, + "loss": 0.5383, + "step": 265000 + }, + { + "epoch": 0.16, + "learning_rate": 5.49273585913191e-05, + "loss": 0.5379, + "step": 265500 + }, + { + "epoch": 0.16, + "learning_rate": 5.492525862575854e-05, + "loss": 0.5254, + "step": 266000 + }, + { + "epoch": 0.16, + "learning_rate": 5.492315866019797e-05, + "loss": 0.5297, + "step": 266500 + }, + { + "epoch": 0.16, + "learning_rate": 5.4921058694637405e-05, + "loss": 0.5203, + "step": 267000 + }, + { + "epoch": 0.16, + "learning_rate": 5.4918958729076846e-05, + "loss": 0.5228, + "step": 267500 + }, + { + "epoch": 0.16, + "learning_rate": 5.491685876351628e-05, + "loss": 0.5192, + "step": 268000 + }, + { + "epoch": 0.16, + "learning_rate": 5.491475879795571e-05, + "loss": 0.5326, + "step": 268500 + }, + { + "epoch": 0.16, + "learning_rate": 5.4912658832395146e-05, + "loss": 0.5255, + "step": 269000 + }, + { + "epoch": 0.16, + "learning_rate": 5.4910563066765706e-05, + "loss": 0.5241, + "step": 269500 + }, + { + "epoch": 0.16, + "learning_rate": 5.490846310120514e-05, + "loss": 0.5347, + "step": 270000 + }, + { + "epoch": 0.16, + "learning_rate": 5.490636733557569e-05, + "loss": 0.5259, + "step": 270500 + }, + { + "epoch": 0.16, + "learning_rate": 5.490427156994625e-05, + "loss": 0.5275, + "step": 271000 + }, + { + "epoch": 0.16, + "learning_rate": 5.490217160438569e-05, + "loss": 0.5208, + "step": 271500 + }, + { + "epoch": 0.16, + "learning_rate": 5.490007163882512e-05, + "loss": 0.5212, + "step": 272000 + }, + { + "epoch": 0.16, + "learning_rate": 5.4897971673264554e-05, + "loss": 0.519, + "step": 272500 + }, + { + "epoch": 0.16, + "learning_rate": 5.4895871707703994e-05, + "loss": 0.523, + "step": 273000 + }, + { + "epoch": 0.16, + "learning_rate": 5.489377594207455e-05, + "loss": 0.5421, + "step": 273500 + }, + { + "epoch": 0.16, + "learning_rate": 5.489167597651398e-05, + "loss": 0.5139, + "step": 274000 + }, + { + "epoch": 0.16, + "learning_rate": 5.488957601095342e-05, + "loss": 0.5294, + "step": 274500 + }, + { + "epoch": 0.16, + "learning_rate": 5.4887476045392855e-05, + "loss": 0.5096, + "step": 275000 + }, + { + "epoch": 0.17, + "learning_rate": 5.488537607983229e-05, + "loss": 0.5301, + "step": 275500 + }, + { + "epoch": 0.17, + "learning_rate": 5.488327611427173e-05, + "loss": 0.529, + "step": 276000 + }, + { + "epoch": 0.17, + "learning_rate": 5.488117614871116e-05, + "loss": 0.5144, + "step": 276500 + }, + { + "epoch": 0.17, + "learning_rate": 5.4879076183150596e-05, + "loss": 0.5175, + "step": 277000 + }, + { + "epoch": 0.17, + "learning_rate": 5.4876976217590036e-05, + "loss": 0.5146, + "step": 277500 + }, + { + "epoch": 0.17, + "learning_rate": 5.487487625202947e-05, + "loss": 0.5327, + "step": 278000 + }, + { + "epoch": 0.17, + "learning_rate": 5.4872776286468896e-05, + "loss": 0.5261, + "step": 278500 + }, + { + "epoch": 0.17, + "learning_rate": 5.4870676320908336e-05, + "loss": 0.5025, + "step": 279000 + }, + { + "epoch": 0.17, + "learning_rate": 5.48685805552789e-05, + "loss": 0.532, + "step": 279500 + }, + { + "epoch": 0.17, + "learning_rate": 5.486648058971833e-05, + "loss": 0.5191, + "step": 280000 + }, + { + "epoch": 0.17, + "learning_rate": 5.486438062415776e-05, + "loss": 0.5193, + "step": 280500 + }, + { + "epoch": 0.17, + "learning_rate": 5.48622806585972e-05, + "loss": 0.5327, + "step": 281000 + }, + { + "epoch": 0.17, + "learning_rate": 5.486018069303663e-05, + "loss": 0.5121, + "step": 281500 + }, + { + "epoch": 0.17, + "learning_rate": 5.485808492740719e-05, + "loss": 0.5212, + "step": 282000 + }, + { + "epoch": 0.17, + "learning_rate": 5.4855984961846624e-05, + "loss": 0.5116, + "step": 282500 + }, + { + "epoch": 0.17, + "learning_rate": 5.4853889196217185e-05, + "loss": 0.5175, + "step": 283000 + }, + { + "epoch": 0.17, + "learning_rate": 5.485178923065662e-05, + "loss": 0.5248, + "step": 283500 + }, + { + "epoch": 0.17, + "learning_rate": 5.484968926509605e-05, + "loss": 0.5194, + "step": 284000 + }, + { + "epoch": 0.17, + "learning_rate": 5.484758929953549e-05, + "loss": 0.518, + "step": 284500 + }, + { + "epoch": 0.17, + "learning_rate": 5.4845489333974925e-05, + "loss": 0.5274, + "step": 285000 + }, + { + "epoch": 0.17, + "learning_rate": 5.484338936841435e-05, + "loss": 0.5129, + "step": 285500 + }, + { + "epoch": 0.17, + "learning_rate": 5.484128940285379e-05, + "loss": 0.5142, + "step": 286000 + }, + { + "epoch": 0.17, + "learning_rate": 5.4839189437293226e-05, + "loss": 0.5305, + "step": 286500 + }, + { + "epoch": 0.17, + "learning_rate": 5.4837093671663786e-05, + "loss": 0.5205, + "step": 287000 + }, + { + "epoch": 0.17, + "learning_rate": 5.483499370610322e-05, + "loss": 0.5129, + "step": 287500 + }, + { + "epoch": 0.17, + "learning_rate": 5.483289794047377e-05, + "loss": 0.505, + "step": 288000 + }, + { + "epoch": 0.17, + "learning_rate": 5.483079797491321e-05, + "loss": 0.5062, + "step": 288500 + }, + { + "epoch": 0.17, + "learning_rate": 5.482869800935265e-05, + "loss": 0.5175, + "step": 289000 + }, + { + "epoch": 0.17, + "learning_rate": 5.482659804379208e-05, + "loss": 0.5239, + "step": 289500 + }, + { + "epoch": 0.17, + "learning_rate": 5.482449807823152e-05, + "loss": 0.5136, + "step": 290000 + }, + { + "epoch": 0.17, + "learning_rate": 5.482239811267095e-05, + "loss": 0.5101, + "step": 290500 + }, + { + "epoch": 0.17, + "learning_rate": 5.482029814711039e-05, + "loss": 0.5223, + "step": 291000 + }, + { + "epoch": 0.17, + "learning_rate": 5.481819818154982e-05, + "loss": 0.5275, + "step": 291500 + }, + { + "epoch": 0.18, + "learning_rate": 5.481610241592038e-05, + "loss": 0.5201, + "step": 292000 + }, + { + "epoch": 0.18, + "learning_rate": 5.481400245035981e-05, + "loss": 0.504, + "step": 292500 + }, + { + "epoch": 0.18, + "learning_rate": 5.481190668473037e-05, + "loss": 0.5262, + "step": 293000 + }, + { + "epoch": 0.18, + "learning_rate": 5.480980671916981e-05, + "loss": 0.5258, + "step": 293500 + }, + { + "epoch": 0.18, + "learning_rate": 5.480770675360924e-05, + "loss": 0.5064, + "step": 294000 + }, + { + "epoch": 0.18, + "learning_rate": 5.4805606788048675e-05, + "loss": 0.5209, + "step": 294500 + }, + { + "epoch": 0.18, + "learning_rate": 5.480350682248811e-05, + "loss": 0.5096, + "step": 295000 + }, + { + "epoch": 0.18, + "learning_rate": 5.480140685692754e-05, + "loss": 0.5157, + "step": 295500 + }, + { + "epoch": 0.18, + "learning_rate": 5.4799306891366976e-05, + "loss": 0.5102, + "step": 296000 + }, + { + "epoch": 0.18, + "learning_rate": 5.4797206925806416e-05, + "loss": 0.5095, + "step": 296500 + }, + { + "epoch": 0.18, + "learning_rate": 5.4795111160176976e-05, + "loss": 0.5077, + "step": 297000 + }, + { + "epoch": 0.18, + "learning_rate": 5.47930111946164e-05, + "loss": 0.5097, + "step": 297500 + }, + { + "epoch": 0.18, + "learning_rate": 5.479091122905584e-05, + "loss": 0.5095, + "step": 298000 + }, + { + "epoch": 0.18, + "learning_rate": 5.478881126349528e-05, + "loss": 0.502, + "step": 298500 + }, + { + "epoch": 0.18, + "learning_rate": 5.478671129793471e-05, + "loss": 0.526, + "step": 299000 + }, + { + "epoch": 0.18, + "learning_rate": 5.478461553230527e-05, + "loss": 0.5113, + "step": 299500 + }, + { + "epoch": 0.18, + "learning_rate": 5.4782515566744704e-05, + "loss": 0.5203, + "step": 300000 + }, + { + "epoch": 0.18, + "eval_loss": 0.4822831451892853, + "eval_runtime": 1102.9313, + "eval_samples_per_second": 477.564, + "eval_steps_per_second": 79.594, + "step": 300000 + }, + { + "epoch": 0.18, + "learning_rate": 5.478041560118414e-05, + "loss": 0.5233, + "step": 300500 + }, + { + "epoch": 0.18, + "learning_rate": 5.477831563562357e-05, + "loss": 0.5089, + "step": 301000 + }, + { + "epoch": 0.18, + "learning_rate": 5.477621986999413e-05, + "loss": 0.5203, + "step": 301500 + }, + { + "epoch": 0.18, + "learning_rate": 5.4774119904433565e-05, + "loss": 0.5129, + "step": 302000 + }, + { + "epoch": 0.18, + "learning_rate": 5.4772019938873e-05, + "loss": 0.5119, + "step": 302500 + }, + { + "epoch": 0.18, + "learning_rate": 5.476991997331244e-05, + "loss": 0.5185, + "step": 303000 + }, + { + "epoch": 0.18, + "learning_rate": 5.4767824207683e-05, + "loss": 0.5022, + "step": 303500 + }, + { + "epoch": 0.18, + "learning_rate": 5.476572424212243e-05, + "loss": 0.5123, + "step": 304000 + }, + { + "epoch": 0.18, + "learning_rate": 5.476362427656186e-05, + "loss": 0.5113, + "step": 304500 + }, + { + "epoch": 0.18, + "learning_rate": 5.47615243110013e-05, + "loss": 0.515, + "step": 305000 + }, + { + "epoch": 0.18, + "learning_rate": 5.475942854537186e-05, + "loss": 0.511, + "step": 305500 + }, + { + "epoch": 0.18, + "learning_rate": 5.475732857981129e-05, + "loss": 0.5163, + "step": 306000 + }, + { + "epoch": 0.18, + "learning_rate": 5.4755232814181846e-05, + "loss": 0.5108, + "step": 306500 + }, + { + "epoch": 0.18, + "learning_rate": 5.475313284862128e-05, + "loss": 0.5065, + "step": 307000 + }, + { + "epoch": 0.18, + "learning_rate": 5.475103288306072e-05, + "loss": 0.5166, + "step": 307500 + }, + { + "epoch": 0.18, + "learning_rate": 5.4748932917500154e-05, + "loss": 0.5103, + "step": 308000 + }, + { + "epoch": 0.18, + "learning_rate": 5.474683295193959e-05, + "loss": 0.5047, + "step": 308500 + }, + { + "epoch": 0.19, + "learning_rate": 5.474473298637903e-05, + "loss": 0.5089, + "step": 309000 + }, + { + "epoch": 0.19, + "learning_rate": 5.4742633020818454e-05, + "loss": 0.523, + "step": 309500 + }, + { + "epoch": 0.19, + "learning_rate": 5.4740533055257894e-05, + "loss": 0.5083, + "step": 310000 + }, + { + "epoch": 0.19, + "learning_rate": 5.4738437289628455e-05, + "loss": 0.5164, + "step": 310500 + }, + { + "epoch": 0.19, + "learning_rate": 5.473633732406789e-05, + "loss": 0.5302, + "step": 311000 + }, + { + "epoch": 0.19, + "learning_rate": 5.4734237358507315e-05, + "loss": 0.5004, + "step": 311500 + }, + { + "epoch": 0.19, + "learning_rate": 5.4732137392946755e-05, + "loss": 0.5141, + "step": 312000 + }, + { + "epoch": 0.19, + "learning_rate": 5.4730041627317315e-05, + "loss": 0.5105, + "step": 312500 + }, + { + "epoch": 0.19, + "learning_rate": 5.472794166175675e-05, + "loss": 0.5103, + "step": 313000 + }, + { + "epoch": 0.19, + "learning_rate": 5.472584169619618e-05, + "loss": 0.5096, + "step": 313500 + }, + { + "epoch": 0.19, + "learning_rate": 5.4723741730635616e-05, + "loss": 0.5134, + "step": 314000 + }, + { + "epoch": 0.19, + "learning_rate": 5.4721645965006176e-05, + "loss": 0.4984, + "step": 314500 + }, + { + "epoch": 0.19, + "learning_rate": 5.471954599944561e-05, + "loss": 0.5093, + "step": 315000 + }, + { + "epoch": 0.19, + "learning_rate": 5.471744603388504e-05, + "loss": 0.5057, + "step": 315500 + }, + { + "epoch": 0.19, + "learning_rate": 5.471534606832448e-05, + "loss": 0.5081, + "step": 316000 + }, + { + "epoch": 0.19, + "learning_rate": 5.471325030269504e-05, + "loss": 0.5256, + "step": 316500 + }, + { + "epoch": 0.19, + "learning_rate": 5.471115033713447e-05, + "loss": 0.5071, + "step": 317000 + }, + { + "epoch": 0.19, + "learning_rate": 5.470905037157391e-05, + "loss": 0.5014, + "step": 317500 + }, + { + "epoch": 0.19, + "learning_rate": 5.4706954605944464e-05, + "loss": 0.5024, + "step": 318000 + }, + { + "epoch": 0.19, + "learning_rate": 5.470485884031502e-05, + "loss": 0.503, + "step": 318500 + }, + { + "epoch": 0.19, + "learning_rate": 5.470275887475445e-05, + "loss": 0.5149, + "step": 319000 + }, + { + "epoch": 0.19, + "learning_rate": 5.4700658909193884e-05, + "loss": 0.5047, + "step": 319500 + }, + { + "epoch": 0.19, + "learning_rate": 5.4698558943633325e-05, + "loss": 0.5071, + "step": 320000 + }, + { + "epoch": 0.19, + "learning_rate": 5.469645897807276e-05, + "loss": 0.5055, + "step": 320500 + }, + { + "epoch": 0.19, + "learning_rate": 5.469435901251219e-05, + "loss": 0.4914, + "step": 321000 + }, + { + "epoch": 0.19, + "learning_rate": 5.469225904695163e-05, + "loss": 0.5039, + "step": 321500 + }, + { + "epoch": 0.19, + "learning_rate": 5.4690159081391065e-05, + "loss": 0.514, + "step": 322000 + }, + { + "epoch": 0.19, + "learning_rate": 5.46880591158305e-05, + "loss": 0.511, + "step": 322500 + }, + { + "epoch": 0.19, + "learning_rate": 5.468595915026994e-05, + "loss": 0.505, + "step": 323000 + }, + { + "epoch": 0.19, + "learning_rate": 5.4683859184709366e-05, + "loss": 0.5123, + "step": 323500 + }, + { + "epoch": 0.19, + "learning_rate": 5.4681759219148806e-05, + "loss": 0.5041, + "step": 324000 + }, + { + "epoch": 0.19, + "learning_rate": 5.4679663453519366e-05, + "loss": 0.5104, + "step": 324500 + }, + { + "epoch": 0.19, + "learning_rate": 5.46775634879588e-05, + "loss": 0.511, + "step": 325000 + }, + { + "epoch": 0.2, + "learning_rate": 5.467546352239823e-05, + "loss": 0.5168, + "step": 325500 + }, + { + "epoch": 0.2, + "learning_rate": 5.467336355683767e-05, + "loss": 0.5044, + "step": 326000 + }, + { + "epoch": 0.2, + "learning_rate": 5.467126779120823e-05, + "loss": 0.5054, + "step": 326500 + }, + { + "epoch": 0.2, + "learning_rate": 5.466917202557878e-05, + "loss": 0.5026, + "step": 327000 + }, + { + "epoch": 0.2, + "learning_rate": 5.4667072060018214e-05, + "loss": 0.5107, + "step": 327500 + }, + { + "epoch": 0.2, + "learning_rate": 5.466497209445765e-05, + "loss": 0.508, + "step": 328000 + }, + { + "epoch": 0.2, + "learning_rate": 5.466287212889709e-05, + "loss": 0.5086, + "step": 328500 + }, + { + "epoch": 0.2, + "learning_rate": 5.466077216333652e-05, + "loss": 0.505, + "step": 329000 + }, + { + "epoch": 0.2, + "learning_rate": 5.4658672197775955e-05, + "loss": 0.5076, + "step": 329500 + }, + { + "epoch": 0.2, + "learning_rate": 5.4656572232215395e-05, + "loss": 0.5088, + "step": 330000 + }, + { + "epoch": 0.2, + "learning_rate": 5.465447226665483e-05, + "loss": 0.5191, + "step": 330500 + }, + { + "epoch": 0.2, + "learning_rate": 5.465237650102538e-05, + "loss": 0.5025, + "step": 331000 + }, + { + "epoch": 0.2, + "learning_rate": 5.465027653546482e-05, + "loss": 0.4919, + "step": 331500 + }, + { + "epoch": 0.2, + "learning_rate": 5.4648176569904256e-05, + "loss": 0.5094, + "step": 332000 + }, + { + "epoch": 0.2, + "learning_rate": 5.464607660434369e-05, + "loss": 0.5123, + "step": 332500 + }, + { + "epoch": 0.2, + "learning_rate": 5.464398083871424e-05, + "loss": 0.5047, + "step": 333000 + }, + { + "epoch": 0.2, + "learning_rate": 5.464188087315368e-05, + "loss": 0.5108, + "step": 333500 + }, + { + "epoch": 0.2, + "learning_rate": 5.4639780907593116e-05, + "loss": 0.4939, + "step": 334000 + }, + { + "epoch": 0.2, + "learning_rate": 5.463768094203255e-05, + "loss": 0.4956, + "step": 334500 + }, + { + "epoch": 0.2, + "learning_rate": 5.463558097647199e-05, + "loss": 0.5011, + "step": 335000 + }, + { + "epoch": 0.2, + "learning_rate": 5.4633485210842544e-05, + "loss": 0.5076, + "step": 335500 + }, + { + "epoch": 0.2, + "learning_rate": 5.463138524528198e-05, + "loss": 0.5078, + "step": 336000 + }, + { + "epoch": 0.2, + "learning_rate": 5.462928527972141e-05, + "loss": 0.4944, + "step": 336500 + }, + { + "epoch": 0.2, + "learning_rate": 5.462718531416085e-05, + "loss": 0.5047, + "step": 337000 + }, + { + "epoch": 0.2, + "learning_rate": 5.4625089548531404e-05, + "loss": 0.5066, + "step": 337500 + }, + { + "epoch": 0.2, + "learning_rate": 5.462298958297084e-05, + "loss": 0.5039, + "step": 338000 + }, + { + "epoch": 0.2, + "learning_rate": 5.462088961741028e-05, + "loss": 0.5119, + "step": 338500 + }, + { + "epoch": 0.2, + "learning_rate": 5.461878965184971e-05, + "loss": 0.5187, + "step": 339000 + }, + { + "epoch": 0.2, + "learning_rate": 5.4616693886220265e-05, + "loss": 0.4974, + "step": 339500 + }, + { + "epoch": 0.2, + "learning_rate": 5.46145939206597e-05, + "loss": 0.4936, + "step": 340000 + }, + { + "epoch": 0.2, + "learning_rate": 5.461249395509914e-05, + "loss": 0.5076, + "step": 340500 + }, + { + "epoch": 0.2, + "learning_rate": 5.461039398953857e-05, + "loss": 0.5111, + "step": 341000 + }, + { + "epoch": 0.2, + "learning_rate": 5.4608298223909126e-05, + "loss": 0.5019, + "step": 341500 + }, + { + "epoch": 0.21, + "learning_rate": 5.460620245827968e-05, + "loss": 0.5021, + "step": 342000 + }, + { + "epoch": 0.21, + "learning_rate": 5.460410249271912e-05, + "loss": 0.5026, + "step": 342500 + }, + { + "epoch": 0.21, + "learning_rate": 5.460200252715855e-05, + "loss": 0.4984, + "step": 343000 + }, + { + "epoch": 0.21, + "learning_rate": 5.4599902561597987e-05, + "loss": 0.502, + "step": 343500 + }, + { + "epoch": 0.21, + "learning_rate": 5.459780259603743e-05, + "loss": 0.4965, + "step": 344000 + }, + { + "epoch": 0.21, + "learning_rate": 5.459570263047686e-05, + "loss": 0.5082, + "step": 344500 + }, + { + "epoch": 0.21, + "learning_rate": 5.4593602664916294e-05, + "loss": 0.503, + "step": 345000 + }, + { + "epoch": 0.21, + "learning_rate": 5.4591502699355734e-05, + "loss": 0.505, + "step": 345500 + }, + { + "epoch": 0.21, + "learning_rate": 5.458940693372629e-05, + "loss": 0.5069, + "step": 346000 + }, + { + "epoch": 0.21, + "learning_rate": 5.458730696816572e-05, + "loss": 0.5096, + "step": 346500 + }, + { + "epoch": 0.21, + "learning_rate": 5.4585207002605154e-05, + "loss": 0.504, + "step": 347000 + }, + { + "epoch": 0.21, + "learning_rate": 5.4583107037044595e-05, + "loss": 0.4978, + "step": 347500 + }, + { + "epoch": 0.21, + "learning_rate": 5.458101127141515e-05, + "loss": 0.5064, + "step": 348000 + }, + { + "epoch": 0.21, + "learning_rate": 5.457891130585458e-05, + "loss": 0.5108, + "step": 348500 + }, + { + "epoch": 0.21, + "learning_rate": 5.4576811340294015e-05, + "loss": 0.5069, + "step": 349000 + }, + { + "epoch": 0.21, + "learning_rate": 5.4574711374733455e-05, + "loss": 0.4942, + "step": 349500 + }, + { + "epoch": 0.21, + "learning_rate": 5.457261560910401e-05, + "loss": 0.4912, + "step": 350000 + }, + { + "epoch": 0.21, + "learning_rate": 5.457051564354344e-05, + "loss": 0.4906, + "step": 350500 + }, + { + "epoch": 0.21, + "learning_rate": 5.456841567798288e-05, + "loss": 0.5016, + "step": 351000 + }, + { + "epoch": 0.21, + "learning_rate": 5.4566315712422316e-05, + "loss": 0.4988, + "step": 351500 + }, + { + "epoch": 0.21, + "learning_rate": 5.456421994679287e-05, + "loss": 0.4995, + "step": 352000 + }, + { + "epoch": 0.21, + "learning_rate": 5.45621199812323e-05, + "loss": 0.5008, + "step": 352500 + }, + { + "epoch": 0.21, + "learning_rate": 5.4560020015671743e-05, + "loss": 0.5023, + "step": 353000 + }, + { + "epoch": 0.21, + "learning_rate": 5.455792005011118e-05, + "loss": 0.5017, + "step": 353500 + }, + { + "epoch": 0.21, + "learning_rate": 5.455582428448173e-05, + "loss": 0.5088, + "step": 354000 + }, + { + "epoch": 0.21, + "learning_rate": 5.4553724318921164e-05, + "loss": 0.4994, + "step": 354500 + }, + { + "epoch": 0.21, + "learning_rate": 5.4551624353360604e-05, + "loss": 0.5017, + "step": 355000 + }, + { + "epoch": 0.21, + "learning_rate": 5.454952438780004e-05, + "loss": 0.498, + "step": 355500 + }, + { + "epoch": 0.21, + "learning_rate": 5.454742442223947e-05, + "loss": 0.4991, + "step": 356000 + }, + { + "epoch": 0.21, + "learning_rate": 5.454532445667891e-05, + "loss": 0.5074, + "step": 356500 + }, + { + "epoch": 0.21, + "learning_rate": 5.4543224491118345e-05, + "loss": 0.4835, + "step": 357000 + }, + { + "epoch": 0.21, + "learning_rate": 5.454112452555778e-05, + "loss": 0.4909, + "step": 357500 + }, + { + "epoch": 0.21, + "learning_rate": 5.453902875992834e-05, + "loss": 0.501, + "step": 358000 + }, + { + "epoch": 0.21, + "learning_rate": 5.453693299429889e-05, + "loss": 0.4967, + "step": 358500 + }, + { + "epoch": 0.22, + "learning_rate": 5.4534833028738326e-05, + "loss": 0.5016, + "step": 359000 + }, + { + "epoch": 0.22, + "learning_rate": 5.453273306317776e-05, + "loss": 0.5087, + "step": 359500 + }, + { + "epoch": 0.22, + "learning_rate": 5.45306330976172e-05, + "loss": 0.4931, + "step": 360000 + }, + { + "epoch": 0.22, + "learning_rate": 5.452853313205663e-05, + "loss": 0.5016, + "step": 360500 + }, + { + "epoch": 0.22, + "learning_rate": 5.4526433166496066e-05, + "loss": 0.4924, + "step": 361000 + }, + { + "epoch": 0.22, + "learning_rate": 5.4524333200935506e-05, + "loss": 0.4941, + "step": 361500 + }, + { + "epoch": 0.22, + "learning_rate": 5.452223743530606e-05, + "loss": 0.5112, + "step": 362000 + }, + { + "epoch": 0.22, + "learning_rate": 5.4520137469745493e-05, + "loss": 0.5016, + "step": 362500 + }, + { + "epoch": 0.22, + "learning_rate": 5.451803750418493e-05, + "loss": 0.4971, + "step": 363000 + }, + { + "epoch": 0.22, + "learning_rate": 5.451593753862437e-05, + "loss": 0.4935, + "step": 363500 + }, + { + "epoch": 0.22, + "learning_rate": 5.45138375730638e-05, + "loss": 0.4995, + "step": 364000 + }, + { + "epoch": 0.22, + "learning_rate": 5.4511737607503234e-05, + "loss": 0.4911, + "step": 364500 + }, + { + "epoch": 0.22, + "learning_rate": 5.4509637641942674e-05, + "loss": 0.4947, + "step": 365000 + }, + { + "epoch": 0.22, + "learning_rate": 5.450753767638211e-05, + "loss": 0.4967, + "step": 365500 + }, + { + "epoch": 0.22, + "learning_rate": 5.450543771082154e-05, + "loss": 0.4976, + "step": 366000 + }, + { + "epoch": 0.22, + "learning_rate": 5.45033419451921e-05, + "loss": 0.4922, + "step": 366500 + }, + { + "epoch": 0.22, + "learning_rate": 5.4501241979631535e-05, + "loss": 0.5062, + "step": 367000 + }, + { + "epoch": 0.22, + "learning_rate": 5.449914201407097e-05, + "loss": 0.4856, + "step": 367500 + }, + { + "epoch": 0.22, + "learning_rate": 5.449704204851041e-05, + "loss": 0.4983, + "step": 368000 + }, + { + "epoch": 0.22, + "learning_rate": 5.449494628288096e-05, + "loss": 0.4996, + "step": 368500 + }, + { + "epoch": 0.22, + "learning_rate": 5.4492846317320396e-05, + "loss": 0.5002, + "step": 369000 + }, + { + "epoch": 0.22, + "learning_rate": 5.449074635175983e-05, + "loss": 0.4971, + "step": 369500 + }, + { + "epoch": 0.22, + "learning_rate": 5.448864638619927e-05, + "loss": 0.5007, + "step": 370000 + }, + { + "epoch": 0.22, + "learning_rate": 5.448655062056982e-05, + "loss": 0.5089, + "step": 370500 + }, + { + "epoch": 0.22, + "learning_rate": 5.4484450655009256e-05, + "loss": 0.4842, + "step": 371000 + }, + { + "epoch": 0.22, + "learning_rate": 5.448235068944869e-05, + "loss": 0.4895, + "step": 371500 + }, + { + "epoch": 0.22, + "learning_rate": 5.448025072388813e-05, + "loss": 0.4975, + "step": 372000 + }, + { + "epoch": 0.22, + "learning_rate": 5.4478154958258684e-05, + "loss": 0.5121, + "step": 372500 + }, + { + "epoch": 0.22, + "learning_rate": 5.447605499269812e-05, + "loss": 0.5017, + "step": 373000 + }, + { + "epoch": 0.22, + "learning_rate": 5.447395502713756e-05, + "loss": 0.4865, + "step": 373500 + }, + { + "epoch": 0.22, + "learning_rate": 5.447185926150811e-05, + "loss": 0.5054, + "step": 374000 + }, + { + "epoch": 0.22, + "learning_rate": 5.4469759295947544e-05, + "loss": 0.5035, + "step": 374500 + }, + { + "epoch": 0.22, + "learning_rate": 5.446765933038698e-05, + "loss": 0.4888, + "step": 375000 + }, + { + "epoch": 0.23, + "learning_rate": 5.446555936482642e-05, + "loss": 0.5082, + "step": 375500 + }, + { + "epoch": 0.23, + "learning_rate": 5.446345939926585e-05, + "loss": 0.49, + "step": 376000 + }, + { + "epoch": 0.23, + "learning_rate": 5.4461359433705285e-05, + "loss": 0.4938, + "step": 376500 + }, + { + "epoch": 0.23, + "learning_rate": 5.445926366807584e-05, + "loss": 0.5101, + "step": 377000 + }, + { + "epoch": 0.23, + "learning_rate": 5.445716370251528e-05, + "loss": 0.5033, + "step": 377500 + }, + { + "epoch": 0.23, + "learning_rate": 5.445506373695471e-05, + "loss": 0.4911, + "step": 378000 + }, + { + "epoch": 0.23, + "learning_rate": 5.4452963771394146e-05, + "loss": 0.4924, + "step": 378500 + }, + { + "epoch": 0.23, + "learning_rate": 5.4450863805833586e-05, + "loss": 0.4894, + "step": 379000 + }, + { + "epoch": 0.23, + "learning_rate": 5.444876384027302e-05, + "loss": 0.4997, + "step": 379500 + }, + { + "epoch": 0.23, + "learning_rate": 5.444666387471245e-05, + "loss": 0.4942, + "step": 380000 + }, + { + "epoch": 0.23, + "learning_rate": 5.444456390915189e-05, + "loss": 0.4815, + "step": 380500 + }, + { + "epoch": 0.23, + "learning_rate": 5.444246814352245e-05, + "loss": 0.4945, + "step": 381000 + }, + { + "epoch": 0.23, + "learning_rate": 5.444036817796188e-05, + "loss": 0.501, + "step": 381500 + }, + { + "epoch": 0.23, + "learning_rate": 5.443826821240132e-05, + "loss": 0.4842, + "step": 382000 + }, + { + "epoch": 0.23, + "learning_rate": 5.4436172446771874e-05, + "loss": 0.5063, + "step": 382500 + }, + { + "epoch": 0.23, + "learning_rate": 5.443407248121131e-05, + "loss": 0.4981, + "step": 383000 + }, + { + "epoch": 0.23, + "learning_rate": 5.443197251565074e-05, + "loss": 0.4929, + "step": 383500 + }, + { + "epoch": 0.23, + "learning_rate": 5.442987255009018e-05, + "loss": 0.4935, + "step": 384000 + }, + { + "epoch": 0.23, + "learning_rate": 5.4427772584529615e-05, + "loss": 0.4904, + "step": 384500 + }, + { + "epoch": 0.23, + "learning_rate": 5.442567681890017e-05, + "loss": 0.5077, + "step": 385000 + }, + { + "epoch": 0.23, + "learning_rate": 5.44235768533396e-05, + "loss": 0.4849, + "step": 385500 + }, + { + "epoch": 0.23, + "learning_rate": 5.442147688777904e-05, + "loss": 0.4817, + "step": 386000 + }, + { + "epoch": 0.23, + "learning_rate": 5.4419376922218475e-05, + "loss": 0.4919, + "step": 386500 + }, + { + "epoch": 0.23, + "learning_rate": 5.4417276956657916e-05, + "loss": 0.4935, + "step": 387000 + }, + { + "epoch": 0.23, + "learning_rate": 5.441517699109735e-05, + "loss": 0.4954, + "step": 387500 + }, + { + "epoch": 0.23, + "learning_rate": 5.4413077025536776e-05, + "loss": 0.4895, + "step": 388000 + }, + { + "epoch": 0.23, + "learning_rate": 5.4410977059976216e-05, + "loss": 0.4913, + "step": 388500 + }, + { + "epoch": 0.23, + "learning_rate": 5.440887709441565e-05, + "loss": 0.4849, + "step": 389000 + }, + { + "epoch": 0.23, + "learning_rate": 5.440678132878621e-05, + "loss": 0.4769, + "step": 389500 + }, + { + "epoch": 0.23, + "learning_rate": 5.440468136322564e-05, + "loss": 0.4958, + "step": 390000 + }, + { + "epoch": 0.23, + "learning_rate": 5.44025855975962e-05, + "loss": 0.4968, + "step": 390500 + }, + { + "epoch": 0.23, + "learning_rate": 5.440048563203564e-05, + "loss": 0.4991, + "step": 391000 + }, + { + "epoch": 0.23, + "learning_rate": 5.439838566647507e-05, + "loss": 0.4975, + "step": 391500 + }, + { + "epoch": 0.24, + "learning_rate": 5.4396285700914504e-05, + "loss": 0.5008, + "step": 392000 + }, + { + "epoch": 0.24, + "learning_rate": 5.4394185735353944e-05, + "loss": 0.4803, + "step": 392500 + }, + { + "epoch": 0.24, + "learning_rate": 5.439208576979337e-05, + "loss": 0.4931, + "step": 393000 + }, + { + "epoch": 0.24, + "learning_rate": 5.438998580423281e-05, + "loss": 0.4944, + "step": 393500 + }, + { + "epoch": 0.24, + "learning_rate": 5.4387885838672245e-05, + "loss": 0.4975, + "step": 394000 + }, + { + "epoch": 0.24, + "learning_rate": 5.438578587311168e-05, + "loss": 0.4966, + "step": 394500 + }, + { + "epoch": 0.24, + "learning_rate": 5.438369010748223e-05, + "loss": 0.4939, + "step": 395000 + }, + { + "epoch": 0.24, + "learning_rate": 5.438159014192167e-05, + "loss": 0.4853, + "step": 395500 + }, + { + "epoch": 0.24, + "learning_rate": 5.4379490176361105e-05, + "loss": 0.4925, + "step": 396000 + }, + { + "epoch": 0.24, + "learning_rate": 5.437739021080054e-05, + "loss": 0.4904, + "step": 396500 + }, + { + "epoch": 0.24, + "learning_rate": 5.43752944451711e-05, + "loss": 0.5037, + "step": 397000 + }, + { + "epoch": 0.24, + "learning_rate": 5.437319447961053e-05, + "loss": 0.4943, + "step": 397500 + }, + { + "epoch": 0.24, + "learning_rate": 5.437109871398109e-05, + "loss": 0.4858, + "step": 398000 + }, + { + "epoch": 0.24, + "learning_rate": 5.4368998748420526e-05, + "loss": 0.4913, + "step": 398500 + }, + { + "epoch": 0.24, + "learning_rate": 5.436689878285996e-05, + "loss": 0.4948, + "step": 399000 + }, + { + "epoch": 0.24, + "learning_rate": 5.43647988172994e-05, + "loss": 0.4953, + "step": 399500 + }, + { + "epoch": 0.24, + "learning_rate": 5.436269885173883e-05, + "loss": 0.4932, + "step": 400000 + }, + { + "epoch": 0.24, + "eval_loss": 0.4640251398086548, + "eval_runtime": 1106.9436, + "eval_samples_per_second": 475.833, + "eval_steps_per_second": 79.306, + "step": 400000 + }, + { + "epoch": 0.24, + "learning_rate": 5.436059888617827e-05, + "loss": 0.4821, + "step": 400500 + }, + { + "epoch": 0.24, + "learning_rate": 5.43584989206177e-05, + "loss": 0.4923, + "step": 401000 + }, + { + "epoch": 0.24, + "learning_rate": 5.4356398955057134e-05, + "loss": 0.4936, + "step": 401500 + }, + { + "epoch": 0.24, + "learning_rate": 5.4354298989496574e-05, + "loss": 0.4886, + "step": 402000 + }, + { + "epoch": 0.24, + "learning_rate": 5.435220322386713e-05, + "loss": 0.4754, + "step": 402500 + }, + { + "epoch": 0.24, + "learning_rate": 5.435010325830656e-05, + "loss": 0.5005, + "step": 403000 + }, + { + "epoch": 0.24, + "learning_rate": 5.4348003292745995e-05, + "loss": 0.4906, + "step": 403500 + }, + { + "epoch": 0.24, + "learning_rate": 5.4345903327185435e-05, + "loss": 0.5004, + "step": 404000 + }, + { + "epoch": 0.24, + "learning_rate": 5.434380756155599e-05, + "loss": 0.4803, + "step": 404500 + }, + { + "epoch": 0.24, + "learning_rate": 5.434171179592655e-05, + "loss": 0.4901, + "step": 405000 + }, + { + "epoch": 0.24, + "learning_rate": 5.433961183036598e-05, + "loss": 0.5002, + "step": 405500 + }, + { + "epoch": 0.24, + "learning_rate": 5.4337511864805416e-05, + "loss": 0.479, + "step": 406000 + }, + { + "epoch": 0.24, + "learning_rate": 5.4335411899244856e-05, + "loss": 0.5007, + "step": 406500 + }, + { + "epoch": 0.24, + "learning_rate": 5.433331193368428e-05, + "loss": 0.4883, + "step": 407000 + }, + { + "epoch": 0.24, + "learning_rate": 5.433121196812372e-05, + "loss": 0.4812, + "step": 407500 + }, + { + "epoch": 0.24, + "learning_rate": 5.4329112002563156e-05, + "loss": 0.4963, + "step": 408000 + }, + { + "epoch": 0.24, + "learning_rate": 5.432701203700259e-05, + "loss": 0.4825, + "step": 408500 + }, + { + "epoch": 0.25, + "learning_rate": 5.432491627137315e-05, + "loss": 0.5059, + "step": 409000 + }, + { + "epoch": 0.25, + "learning_rate": 5.4322816305812584e-05, + "loss": 0.4886, + "step": 409500 + }, + { + "epoch": 0.25, + "learning_rate": 5.432071634025202e-05, + "loss": 0.4886, + "step": 410000 + }, + { + "epoch": 0.25, + "learning_rate": 5.431861637469145e-05, + "loss": 0.4839, + "step": 410500 + }, + { + "epoch": 0.25, + "learning_rate": 5.431651640913089e-05, + "loss": 0.49, + "step": 411000 + }, + { + "epoch": 0.25, + "learning_rate": 5.4314416443570324e-05, + "loss": 0.5029, + "step": 411500 + }, + { + "epoch": 0.25, + "learning_rate": 5.431231647800976e-05, + "loss": 0.4902, + "step": 412000 + }, + { + "epoch": 0.25, + "learning_rate": 5.43102165124492e-05, + "loss": 0.4839, + "step": 412500 + }, + { + "epoch": 0.25, + "learning_rate": 5.430812074681975e-05, + "loss": 0.4861, + "step": 413000 + }, + { + "epoch": 0.25, + "learning_rate": 5.4306020781259185e-05, + "loss": 0.494, + "step": 413500 + }, + { + "epoch": 0.25, + "learning_rate": 5.430392081569862e-05, + "loss": 0.4821, + "step": 414000 + }, + { + "epoch": 0.25, + "learning_rate": 5.430182085013806e-05, + "loss": 0.4898, + "step": 414500 + }, + { + "epoch": 0.25, + "learning_rate": 5.429972508450861e-05, + "loss": 0.4891, + "step": 415000 + }, + { + "epoch": 0.25, + "learning_rate": 5.4297625118948046e-05, + "loss": 0.4907, + "step": 415500 + }, + { + "epoch": 0.25, + "learning_rate": 5.4295525153387486e-05, + "loss": 0.4775, + "step": 416000 + }, + { + "epoch": 0.25, + "learning_rate": 5.429342518782692e-05, + "loss": 0.4801, + "step": 416500 + }, + { + "epoch": 0.25, + "learning_rate": 5.429132942219747e-05, + "loss": 0.4925, + "step": 417000 + }, + { + "epoch": 0.25, + "learning_rate": 5.4289229456636906e-05, + "loss": 0.487, + "step": 417500 + }, + { + "epoch": 0.25, + "learning_rate": 5.428712949107635e-05, + "loss": 0.4831, + "step": 418000 + }, + { + "epoch": 0.25, + "learning_rate": 5.428502952551578e-05, + "loss": 0.4823, + "step": 418500 + }, + { + "epoch": 0.25, + "learning_rate": 5.4282933759886334e-05, + "loss": 0.4925, + "step": 419000 + }, + { + "epoch": 0.25, + "learning_rate": 5.428083379432577e-05, + "loss": 0.4827, + "step": 419500 + }, + { + "epoch": 0.25, + "learning_rate": 5.427873382876521e-05, + "loss": 0.4905, + "step": 420000 + }, + { + "epoch": 0.25, + "learning_rate": 5.427663386320464e-05, + "loss": 0.4942, + "step": 420500 + }, + { + "epoch": 0.25, + "learning_rate": 5.42745380975752e-05, + "loss": 0.488, + "step": 421000 + }, + { + "epoch": 0.25, + "learning_rate": 5.4272438132014635e-05, + "loss": 0.484, + "step": 421500 + }, + { + "epoch": 0.25, + "learning_rate": 5.4270342366385195e-05, + "loss": 0.4881, + "step": 422000 + }, + { + "epoch": 0.25, + "learning_rate": 5.426824240082463e-05, + "loss": 0.4892, + "step": 422500 + }, + { + "epoch": 0.25, + "learning_rate": 5.426614243526406e-05, + "loss": 0.4962, + "step": 423000 + }, + { + "epoch": 0.25, + "learning_rate": 5.4264042469703495e-05, + "loss": 0.5008, + "step": 423500 + }, + { + "epoch": 0.25, + "learning_rate": 5.426194250414293e-05, + "loss": 0.4817, + "step": 424000 + }, + { + "epoch": 0.25, + "learning_rate": 5.425984253858236e-05, + "loss": 0.4851, + "step": 424500 + }, + { + "epoch": 0.25, + "learning_rate": 5.42577425730218e-05, + "loss": 0.4779, + "step": 425000 + }, + { + "epoch": 0.26, + "learning_rate": 5.4255642607461236e-05, + "loss": 0.4865, + "step": 425500 + }, + { + "epoch": 0.26, + "learning_rate": 5.425354684183179e-05, + "loss": 0.4877, + "step": 426000 + }, + { + "epoch": 0.26, + "learning_rate": 5.425144687627122e-05, + "loss": 0.4845, + "step": 426500 + }, + { + "epoch": 0.26, + "learning_rate": 5.424934691071066e-05, + "loss": 0.4875, + "step": 427000 + }, + { + "epoch": 0.26, + "learning_rate": 5.42472469451501e-05, + "loss": 0.485, + "step": 427500 + }, + { + "epoch": 0.26, + "learning_rate": 5.424515117952066e-05, + "loss": 0.488, + "step": 428000 + }, + { + "epoch": 0.26, + "learning_rate": 5.424305121396009e-05, + "loss": 0.4798, + "step": 428500 + }, + { + "epoch": 0.26, + "learning_rate": 5.4240951248399524e-05, + "loss": 0.4893, + "step": 429000 + }, + { + "epoch": 0.26, + "learning_rate": 5.423885128283896e-05, + "loss": 0.4907, + "step": 429500 + }, + { + "epoch": 0.26, + "learning_rate": 5.423675551720952e-05, + "loss": 0.4784, + "step": 430000 + }, + { + "epoch": 0.26, + "learning_rate": 5.423465555164896e-05, + "loss": 0.4802, + "step": 430500 + }, + { + "epoch": 0.26, + "learning_rate": 5.4232555586088385e-05, + "loss": 0.4829, + "step": 431000 + }, + { + "epoch": 0.26, + "learning_rate": 5.423045562052782e-05, + "loss": 0.4844, + "step": 431500 + }, + { + "epoch": 0.26, + "learning_rate": 5.422835985489838e-05, + "loss": 0.493, + "step": 432000 + }, + { + "epoch": 0.26, + "learning_rate": 5.422625988933782e-05, + "loss": 0.4844, + "step": 432500 + }, + { + "epoch": 0.26, + "learning_rate": 5.422415992377725e-05, + "loss": 0.475, + "step": 433000 + }, + { + "epoch": 0.26, + "learning_rate": 5.422205995821668e-05, + "loss": 0.5024, + "step": 433500 + }, + { + "epoch": 0.26, + "learning_rate": 5.421995999265612e-05, + "loss": 0.4863, + "step": 434000 + }, + { + "epoch": 0.26, + "learning_rate": 5.421786422702668e-05, + "loss": 0.4783, + "step": 434500 + }, + { + "epoch": 0.26, + "learning_rate": 5.421576426146611e-05, + "loss": 0.4873, + "step": 435000 + }, + { + "epoch": 0.26, + "learning_rate": 5.4213664295905546e-05, + "loss": 0.4879, + "step": 435500 + }, + { + "epoch": 0.26, + "learning_rate": 5.421156433034498e-05, + "loss": 0.4712, + "step": 436000 + }, + { + "epoch": 0.26, + "learning_rate": 5.420946436478441e-05, + "loss": 0.4924, + "step": 436500 + }, + { + "epoch": 0.26, + "learning_rate": 5.4207364399223854e-05, + "loss": 0.4926, + "step": 437000 + }, + { + "epoch": 0.26, + "learning_rate": 5.420526443366329e-05, + "loss": 0.4782, + "step": 437500 + }, + { + "epoch": 0.26, + "learning_rate": 5.420316446810272e-05, + "loss": 0.4805, + "step": 438000 + }, + { + "epoch": 0.26, + "learning_rate": 5.4201068702473274e-05, + "loss": 0.4836, + "step": 438500 + }, + { + "epoch": 0.26, + "learning_rate": 5.4198968736912714e-05, + "loss": 0.5047, + "step": 439000 + }, + { + "epoch": 0.26, + "learning_rate": 5.419686877135215e-05, + "loss": 0.4813, + "step": 439500 + }, + { + "epoch": 0.26, + "learning_rate": 5.419476880579158e-05, + "loss": 0.4868, + "step": 440000 + }, + { + "epoch": 0.26, + "learning_rate": 5.419266884023102e-05, + "loss": 0.4884, + "step": 440500 + }, + { + "epoch": 0.26, + "learning_rate": 5.4190573074601575e-05, + "loss": 0.493, + "step": 441000 + }, + { + "epoch": 0.26, + "learning_rate": 5.4188477308972135e-05, + "loss": 0.4917, + "step": 441500 + }, + { + "epoch": 0.26, + "learning_rate": 5.418637734341157e-05, + "loss": 0.4769, + "step": 442000 + }, + { + "epoch": 0.27, + "learning_rate": 5.4184277377851e-05, + "loss": 0.4872, + "step": 442500 + }, + { + "epoch": 0.27, + "learning_rate": 5.4182177412290436e-05, + "loss": 0.4839, + "step": 443000 + }, + { + "epoch": 0.27, + "learning_rate": 5.418007744672987e-05, + "loss": 0.473, + "step": 443500 + }, + { + "epoch": 0.27, + "learning_rate": 5.417797748116931e-05, + "loss": 0.4828, + "step": 444000 + }, + { + "epoch": 0.27, + "learning_rate": 5.417587751560874e-05, + "loss": 0.4802, + "step": 444500 + }, + { + "epoch": 0.27, + "learning_rate": 5.4173777550048176e-05, + "loss": 0.481, + "step": 445000 + }, + { + "epoch": 0.27, + "learning_rate": 5.417168178441873e-05, + "loss": 0.4829, + "step": 445500 + }, + { + "epoch": 0.27, + "learning_rate": 5.416958181885817e-05, + "loss": 0.4798, + "step": 446000 + }, + { + "epoch": 0.27, + "learning_rate": 5.4167481853297604e-05, + "loss": 0.4819, + "step": 446500 + }, + { + "epoch": 0.27, + "learning_rate": 5.416538188773704e-05, + "loss": 0.4796, + "step": 447000 + }, + { + "epoch": 0.27, + "learning_rate": 5.416328612210759e-05, + "loss": 0.4809, + "step": 447500 + }, + { + "epoch": 0.27, + "learning_rate": 5.416118615654703e-05, + "loss": 0.488, + "step": 448000 + }, + { + "epoch": 0.27, + "learning_rate": 5.415909039091759e-05, + "loss": 0.4986, + "step": 448500 + }, + { + "epoch": 0.27, + "learning_rate": 5.4156990425357025e-05, + "loss": 0.4803, + "step": 449000 + }, + { + "epoch": 0.27, + "learning_rate": 5.415489045979646e-05, + "loss": 0.4834, + "step": 449500 + }, + { + "epoch": 0.27, + "learning_rate": 5.415279049423589e-05, + "loss": 0.4773, + "step": 450000 + }, + { + "epoch": 0.27, + "learning_rate": 5.4150690528675325e-05, + "loss": 0.4835, + "step": 450500 + }, + { + "epoch": 0.27, + "learning_rate": 5.4148590563114765e-05, + "loss": 0.4915, + "step": 451000 + }, + { + "epoch": 0.27, + "learning_rate": 5.41464905975542e-05, + "loss": 0.4809, + "step": 451500 + }, + { + "epoch": 0.27, + "learning_rate": 5.414439483192476e-05, + "loss": 0.492, + "step": 452000 + }, + { + "epoch": 0.27, + "learning_rate": 5.4142294866364186e-05, + "loss": 0.4793, + "step": 452500 + }, + { + "epoch": 0.27, + "learning_rate": 5.4140194900803626e-05, + "loss": 0.4843, + "step": 453000 + }, + { + "epoch": 0.27, + "learning_rate": 5.413809493524306e-05, + "loss": 0.4828, + "step": 453500 + }, + { + "epoch": 0.27, + "learning_rate": 5.413599496968249e-05, + "loss": 0.4858, + "step": 454000 + }, + { + "epoch": 0.27, + "learning_rate": 5.4133899204053047e-05, + "loss": 0.4831, + "step": 454500 + }, + { + "epoch": 0.27, + "learning_rate": 5.413179923849249e-05, + "loss": 0.4877, + "step": 455000 + }, + { + "epoch": 0.27, + "learning_rate": 5.412969927293192e-05, + "loss": 0.4842, + "step": 455500 + }, + { + "epoch": 0.27, + "learning_rate": 5.4127599307371354e-05, + "loss": 0.4774, + "step": 456000 + }, + { + "epoch": 0.27, + "learning_rate": 5.4125503541741914e-05, + "loss": 0.4777, + "step": 456500 + }, + { + "epoch": 0.27, + "learning_rate": 5.412340357618135e-05, + "loss": 0.4871, + "step": 457000 + }, + { + "epoch": 0.27, + "learning_rate": 5.412130361062078e-05, + "loss": 0.485, + "step": 457500 + }, + { + "epoch": 0.27, + "learning_rate": 5.411920364506022e-05, + "loss": 0.4824, + "step": 458000 + }, + { + "epoch": 0.27, + "learning_rate": 5.4117103679499655e-05, + "loss": 0.4786, + "step": 458500 + }, + { + "epoch": 0.28, + "learning_rate": 5.411500371393909e-05, + "loss": 0.4775, + "step": 459000 + }, + { + "epoch": 0.28, + "learning_rate": 5.411290374837853e-05, + "loss": 0.4765, + "step": 459500 + }, + { + "epoch": 0.28, + "learning_rate": 5.411080378281796e-05, + "loss": 0.468, + "step": 460000 + }, + { + "epoch": 0.28, + "learning_rate": 5.410871221711964e-05, + "loss": 0.4827, + "step": 460500 + }, + { + "epoch": 0.28, + "learning_rate": 5.4106612251559076e-05, + "loss": 0.4798, + "step": 461000 + }, + { + "epoch": 0.28, + "learning_rate": 5.410451228599851e-05, + "loss": 0.484, + "step": 461500 + }, + { + "epoch": 0.28, + "learning_rate": 5.410241232043794e-05, + "loss": 0.4744, + "step": 462000 + }, + { + "epoch": 0.28, + "learning_rate": 5.4100312354877376e-05, + "loss": 0.4888, + "step": 462500 + }, + { + "epoch": 0.28, + "learning_rate": 5.409821238931681e-05, + "loss": 0.4786, + "step": 463000 + }, + { + "epoch": 0.28, + "learning_rate": 5.409611242375625e-05, + "loss": 0.4777, + "step": 463500 + }, + { + "epoch": 0.28, + "learning_rate": 5.409401245819568e-05, + "loss": 0.4813, + "step": 464000 + }, + { + "epoch": 0.28, + "learning_rate": 5.409191669256624e-05, + "loss": 0.486, + "step": 464500 + }, + { + "epoch": 0.28, + "learning_rate": 5.408981672700568e-05, + "loss": 0.4769, + "step": 465000 + }, + { + "epoch": 0.28, + "learning_rate": 5.408772096137624e-05, + "loss": 0.4723, + "step": 465500 + }, + { + "epoch": 0.28, + "learning_rate": 5.408562099581567e-05, + "loss": 0.4762, + "step": 466000 + }, + { + "epoch": 0.28, + "learning_rate": 5.40835210302551e-05, + "loss": 0.4793, + "step": 466500 + }, + { + "epoch": 0.28, + "learning_rate": 5.408142106469454e-05, + "loss": 0.4789, + "step": 467000 + }, + { + "epoch": 0.28, + "learning_rate": 5.407932109913397e-05, + "loss": 0.4817, + "step": 467500 + }, + { + "epoch": 0.28, + "learning_rate": 5.4077221133573405e-05, + "loss": 0.469, + "step": 468000 + }, + { + "epoch": 0.28, + "learning_rate": 5.4075121168012845e-05, + "loss": 0.4803, + "step": 468500 + }, + { + "epoch": 0.28, + "learning_rate": 5.40730254023834e-05, + "loss": 0.4751, + "step": 469000 + }, + { + "epoch": 0.28, + "learning_rate": 5.407092543682283e-05, + "loss": 0.4776, + "step": 469500 + }, + { + "epoch": 0.28, + "learning_rate": 5.4068825471262265e-05, + "loss": 0.4821, + "step": 470000 + }, + { + "epoch": 0.28, + "learning_rate": 5.4066725505701706e-05, + "loss": 0.4766, + "step": 470500 + }, + { + "epoch": 0.28, + "learning_rate": 5.406462554014114e-05, + "loss": 0.4848, + "step": 471000 + }, + { + "epoch": 0.28, + "learning_rate": 5.406252557458058e-05, + "loss": 0.4782, + "step": 471500 + }, + { + "epoch": 0.28, + "learning_rate": 5.406042560902001e-05, + "loss": 0.4766, + "step": 472000 + }, + { + "epoch": 0.28, + "learning_rate": 5.4058325643459446e-05, + "loss": 0.4727, + "step": 472500 + }, + { + "epoch": 0.28, + "learning_rate": 5.405623407776113e-05, + "loss": 0.4806, + "step": 473000 + }, + { + "epoch": 0.28, + "learning_rate": 5.405413411220056e-05, + "loss": 0.4798, + "step": 473500 + }, + { + "epoch": 0.28, + "learning_rate": 5.4052034146639994e-05, + "loss": 0.4663, + "step": 474000 + }, + { + "epoch": 0.28, + "learning_rate": 5.404993418107943e-05, + "loss": 0.4745, + "step": 474500 + }, + { + "epoch": 0.28, + "learning_rate": 5.404783421551886e-05, + "loss": 0.4759, + "step": 475000 + }, + { + "epoch": 0.29, + "learning_rate": 5.40457342499583e-05, + "loss": 0.4731, + "step": 475500 + }, + { + "epoch": 0.29, + "learning_rate": 5.4043634284397734e-05, + "loss": 0.4838, + "step": 476000 + }, + { + "epoch": 0.29, + "learning_rate": 5.404153431883717e-05, + "loss": 0.4786, + "step": 476500 + }, + { + "epoch": 0.29, + "learning_rate": 5.403943855320772e-05, + "loss": 0.4884, + "step": 477000 + }, + { + "epoch": 0.29, + "learning_rate": 5.403734278757829e-05, + "loss": 0.4771, + "step": 477500 + }, + { + "epoch": 0.29, + "learning_rate": 5.403524282201772e-05, + "loss": 0.4685, + "step": 478000 + }, + { + "epoch": 0.29, + "learning_rate": 5.403314285645715e-05, + "loss": 0.4844, + "step": 478500 + }, + { + "epoch": 0.29, + "learning_rate": 5.403104289089659e-05, + "loss": 0.4735, + "step": 479000 + }, + { + "epoch": 0.29, + "learning_rate": 5.402894292533602e-05, + "loss": 0.4749, + "step": 479500 + }, + { + "epoch": 0.29, + "learning_rate": 5.402684715970658e-05, + "loss": 0.4834, + "step": 480000 + }, + { + "epoch": 0.29, + "learning_rate": 5.4024747194146016e-05, + "loss": 0.4872, + "step": 480500 + }, + { + "epoch": 0.29, + "learning_rate": 5.402264722858545e-05, + "loss": 0.4707, + "step": 481000 + }, + { + "epoch": 0.29, + "learning_rate": 5.402054726302488e-05, + "loss": 0.4784, + "step": 481500 + }, + { + "epoch": 0.29, + "learning_rate": 5.4018447297464317e-05, + "loss": 0.4714, + "step": 482000 + }, + { + "epoch": 0.29, + "learning_rate": 5.401634733190376e-05, + "loss": 0.4916, + "step": 482500 + }, + { + "epoch": 0.29, + "learning_rate": 5.401424736634319e-05, + "loss": 0.4772, + "step": 483000 + }, + { + "epoch": 0.29, + "learning_rate": 5.4012151600713744e-05, + "loss": 0.4774, + "step": 483500 + }, + { + "epoch": 0.29, + "learning_rate": 5.401005163515318e-05, + "loss": 0.4721, + "step": 484000 + }, + { + "epoch": 0.29, + "learning_rate": 5.400795166959262e-05, + "loss": 0.4684, + "step": 484500 + }, + { + "epoch": 0.29, + "learning_rate": 5.400585170403205e-05, + "loss": 0.4806, + "step": 485000 + }, + { + "epoch": 0.29, + "learning_rate": 5.400375173847149e-05, + "loss": 0.474, + "step": 485500 + }, + { + "epoch": 0.29, + "learning_rate": 5.4001651772910925e-05, + "loss": 0.4765, + "step": 486000 + }, + { + "epoch": 0.29, + "learning_rate": 5.399955180735036e-05, + "loss": 0.4791, + "step": 486500 + }, + { + "epoch": 0.29, + "learning_rate": 5.39974518417898e-05, + "loss": 0.4719, + "step": 487000 + }, + { + "epoch": 0.29, + "learning_rate": 5.399535187622923e-05, + "loss": 0.4824, + "step": 487500 + }, + { + "epoch": 0.29, + "learning_rate": 5.3993256110599785e-05, + "loss": 0.4636, + "step": 488000 + }, + { + "epoch": 0.29, + "learning_rate": 5.399115614503922e-05, + "loss": 0.4664, + "step": 488500 + }, + { + "epoch": 0.29, + "learning_rate": 5.398906037940977e-05, + "loss": 0.4824, + "step": 489000 + }, + { + "epoch": 0.29, + "learning_rate": 5.398696041384921e-05, + "loss": 0.4752, + "step": 489500 + }, + { + "epoch": 0.29, + "learning_rate": 5.3984860448288646e-05, + "loss": 0.4637, + "step": 490000 + }, + { + "epoch": 0.29, + "learning_rate": 5.398276048272808e-05, + "loss": 0.4761, + "step": 490500 + }, + { + "epoch": 0.29, + "learning_rate": 5.398066051716752e-05, + "loss": 0.4694, + "step": 491000 + }, + { + "epoch": 0.29, + "learning_rate": 5.397856475153807e-05, + "loss": 0.462, + "step": 491500 + }, + { + "epoch": 0.29, + "learning_rate": 5.397646478597751e-05, + "loss": 0.4713, + "step": 492000 + }, + { + "epoch": 0.3, + "learning_rate": 5.397436482041695e-05, + "loss": 0.4831, + "step": 492500 + }, + { + "epoch": 0.3, + "learning_rate": 5.397226485485638e-05, + "loss": 0.486, + "step": 493000 + }, + { + "epoch": 0.3, + "learning_rate": 5.3970164889295814e-05, + "loss": 0.4889, + "step": 493500 + }, + { + "epoch": 0.3, + "learning_rate": 5.3968064923735254e-05, + "loss": 0.4764, + "step": 494000 + }, + { + "epoch": 0.3, + "learning_rate": 5.396596495817469e-05, + "loss": 0.4812, + "step": 494500 + }, + { + "epoch": 0.3, + "learning_rate": 5.396386499261412e-05, + "loss": 0.4633, + "step": 495000 + }, + { + "epoch": 0.3, + "learning_rate": 5.3961769226984675e-05, + "loss": 0.4737, + "step": 495500 + }, + { + "epoch": 0.3, + "learning_rate": 5.395967346135523e-05, + "loss": 0.4946, + "step": 496000 + }, + { + "epoch": 0.3, + "learning_rate": 5.395757349579467e-05, + "loss": 0.4614, + "step": 496500 + }, + { + "epoch": 0.3, + "learning_rate": 5.39554735302341e-05, + "loss": 0.4759, + "step": 497000 + }, + { + "epoch": 0.3, + "learning_rate": 5.3953373564673535e-05, + "loss": 0.469, + "step": 497500 + }, + { + "epoch": 0.3, + "learning_rate": 5.3951273599112976e-05, + "loss": 0.4852, + "step": 498000 + }, + { + "epoch": 0.3, + "learning_rate": 5.394917363355241e-05, + "loss": 0.4739, + "step": 498500 + }, + { + "epoch": 0.3, + "learning_rate": 5.394707366799184e-05, + "loss": 0.4711, + "step": 499000 + }, + { + "epoch": 0.3, + "learning_rate": 5.394497370243128e-05, + "loss": 0.4733, + "step": 499500 + }, + { + "epoch": 0.3, + "learning_rate": 5.3942877936801836e-05, + "loss": 0.4654, + "step": 500000 + }, + { + "epoch": 0.3, + "eval_loss": 0.44848358631134033, + "eval_runtime": 1105.3231, + "eval_samples_per_second": 476.53, + "eval_steps_per_second": 79.422, + "step": 500000 + }, + { + "epoch": 0.3, + "learning_rate": 5.394077797124127e-05, + "loss": 0.4744, + "step": 500500 + }, + { + "epoch": 0.3, + "learning_rate": 5.393867800568071e-05, + "loss": 0.4725, + "step": 501000 + }, + { + "epoch": 0.3, + "learning_rate": 5.3936578040120144e-05, + "loss": 0.4721, + "step": 501500 + }, + { + "epoch": 0.3, + "learning_rate": 5.393447807455958e-05, + "loss": 0.4745, + "step": 502000 + }, + { + "epoch": 0.3, + "learning_rate": 5.393238230893013e-05, + "loss": 0.4814, + "step": 502500 + }, + { + "epoch": 0.3, + "learning_rate": 5.393028234336957e-05, + "loss": 0.47, + "step": 503000 + }, + { + "epoch": 0.3, + "learning_rate": 5.3928182377809004e-05, + "loss": 0.4721, + "step": 503500 + }, + { + "epoch": 0.3, + "learning_rate": 5.392608241224844e-05, + "loss": 0.4668, + "step": 504000 + }, + { + "epoch": 0.3, + "learning_rate": 5.392398664661899e-05, + "loss": 0.4679, + "step": 504500 + }, + { + "epoch": 0.3, + "learning_rate": 5.392188668105843e-05, + "loss": 0.4823, + "step": 505000 + }, + { + "epoch": 0.3, + "learning_rate": 5.3919786715497865e-05, + "loss": 0.4748, + "step": 505500 + }, + { + "epoch": 0.3, + "learning_rate": 5.39176867499373e-05, + "loss": 0.4733, + "step": 506000 + }, + { + "epoch": 0.3, + "learning_rate": 5.391559098430786e-05, + "loss": 0.4842, + "step": 506500 + }, + { + "epoch": 0.3, + "learning_rate": 5.391349101874729e-05, + "loss": 0.476, + "step": 507000 + }, + { + "epoch": 0.3, + "learning_rate": 5.3911391053186726e-05, + "loss": 0.4767, + "step": 507500 + }, + { + "epoch": 0.3, + "learning_rate": 5.3909291087626166e-05, + "loss": 0.4652, + "step": 508000 + }, + { + "epoch": 0.3, + "learning_rate": 5.390719952192784e-05, + "loss": 0.476, + "step": 508500 + }, + { + "epoch": 0.31, + "learning_rate": 5.390509955636728e-05, + "loss": 0.4767, + "step": 509000 + }, + { + "epoch": 0.31, + "learning_rate": 5.3902999590806707e-05, + "loss": 0.4742, + "step": 509500 + }, + { + "epoch": 0.31, + "learning_rate": 5.390089962524614e-05, + "loss": 0.4767, + "step": 510000 + }, + { + "epoch": 0.31, + "learning_rate": 5.389879965968558e-05, + "loss": 0.4709, + "step": 510500 + }, + { + "epoch": 0.31, + "learning_rate": 5.3896699694125014e-05, + "loss": 0.4737, + "step": 511000 + }, + { + "epoch": 0.31, + "learning_rate": 5.389459972856445e-05, + "loss": 0.4719, + "step": 511500 + }, + { + "epoch": 0.31, + "learning_rate": 5.389249976300389e-05, + "loss": 0.4696, + "step": 512000 + }, + { + "epoch": 0.31, + "learning_rate": 5.389039979744332e-05, + "loss": 0.4794, + "step": 512500 + }, + { + "epoch": 0.31, + "learning_rate": 5.3888304031813874e-05, + "loss": 0.4704, + "step": 513000 + }, + { + "epoch": 0.31, + "learning_rate": 5.3886204066253315e-05, + "loss": 0.4682, + "step": 513500 + }, + { + "epoch": 0.31, + "learning_rate": 5.388410410069275e-05, + "loss": 0.4628, + "step": 514000 + }, + { + "epoch": 0.31, + "learning_rate": 5.388200413513218e-05, + "loss": 0.474, + "step": 514500 + }, + { + "epoch": 0.31, + "learning_rate": 5.387991256943386e-05, + "loss": 0.471, + "step": 515000 + }, + { + "epoch": 0.31, + "learning_rate": 5.3877816803804416e-05, + "loss": 0.4628, + "step": 515500 + }, + { + "epoch": 0.31, + "learning_rate": 5.387571683824385e-05, + "loss": 0.4751, + "step": 516000 + }, + { + "epoch": 0.31, + "learning_rate": 5.387361687268329e-05, + "loss": 0.4658, + "step": 516500 + }, + { + "epoch": 0.31, + "learning_rate": 5.387151690712272e-05, + "loss": 0.4581, + "step": 517000 + }, + { + "epoch": 0.31, + "learning_rate": 5.3869416941562156e-05, + "loss": 0.4749, + "step": 517500 + }, + { + "epoch": 0.31, + "learning_rate": 5.3867316976001596e-05, + "loss": 0.4818, + "step": 518000 + }, + { + "epoch": 0.31, + "learning_rate": 5.386521701044103e-05, + "loss": 0.4768, + "step": 518500 + }, + { + "epoch": 0.31, + "learning_rate": 5.3863117044880463e-05, + "loss": 0.4707, + "step": 519000 + }, + { + "epoch": 0.31, + "learning_rate": 5.38610170793199e-05, + "loss": 0.4678, + "step": 519500 + }, + { + "epoch": 0.31, + "learning_rate": 5.385891711375933e-05, + "loss": 0.4559, + "step": 520000 + }, + { + "epoch": 0.31, + "learning_rate": 5.385681714819877e-05, + "loss": 0.4681, + "step": 520500 + }, + { + "epoch": 0.31, + "learning_rate": 5.3854717182638204e-05, + "loss": 0.4787, + "step": 521000 + }, + { + "epoch": 0.31, + "learning_rate": 5.385262141700876e-05, + "loss": 0.4673, + "step": 521500 + }, + { + "epoch": 0.31, + "learning_rate": 5.385052145144819e-05, + "loss": 0.4693, + "step": 522000 + }, + { + "epoch": 0.31, + "learning_rate": 5.384842148588763e-05, + "loss": 0.4629, + "step": 522500 + }, + { + "epoch": 0.31, + "learning_rate": 5.3846321520327065e-05, + "loss": 0.4697, + "step": 523000 + }, + { + "epoch": 0.31, + "learning_rate": 5.3844225754697625e-05, + "loss": 0.4682, + "step": 523500 + }, + { + "epoch": 0.31, + "learning_rate": 5.384212578913705e-05, + "loss": 0.4765, + "step": 524000 + }, + { + "epoch": 0.31, + "learning_rate": 5.384002582357649e-05, + "loss": 0.4713, + "step": 524500 + }, + { + "epoch": 0.31, + "learning_rate": 5.3837925858015925e-05, + "loss": 0.462, + "step": 525000 + }, + { + "epoch": 0.32, + "learning_rate": 5.3835830092386486e-05, + "loss": 0.487, + "step": 525500 + }, + { + "epoch": 0.32, + "learning_rate": 5.383373012682592e-05, + "loss": 0.4732, + "step": 526000 + }, + { + "epoch": 0.32, + "learning_rate": 5.383163016126535e-05, + "loss": 0.4689, + "step": 526500 + }, + { + "epoch": 0.32, + "learning_rate": 5.3829530195704786e-05, + "loss": 0.4776, + "step": 527000 + }, + { + "epoch": 0.32, + "learning_rate": 5.3827430230144226e-05, + "loss": 0.4669, + "step": 527500 + }, + { + "epoch": 0.32, + "learning_rate": 5.382533026458366e-05, + "loss": 0.4559, + "step": 528000 + }, + { + "epoch": 0.32, + "learning_rate": 5.382323029902309e-05, + "loss": 0.4742, + "step": 528500 + }, + { + "epoch": 0.32, + "learning_rate": 5.3821130333462534e-05, + "loss": 0.4723, + "step": 529000 + }, + { + "epoch": 0.32, + "learning_rate": 5.381903456783309e-05, + "loss": 0.4701, + "step": 529500 + }, + { + "epoch": 0.32, + "learning_rate": 5.381693460227252e-05, + "loss": 0.4639, + "step": 530000 + }, + { + "epoch": 0.32, + "learning_rate": 5.381483883664308e-05, + "loss": 0.4688, + "step": 530500 + }, + { + "epoch": 0.32, + "learning_rate": 5.381273887108251e-05, + "loss": 0.4621, + "step": 531000 + }, + { + "epoch": 0.32, + "learning_rate": 5.381063890552195e-05, + "loss": 0.4747, + "step": 531500 + }, + { + "epoch": 0.32, + "learning_rate": 5.380853893996138e-05, + "loss": 0.4659, + "step": 532000 + }, + { + "epoch": 0.32, + "learning_rate": 5.3806438974400815e-05, + "loss": 0.4702, + "step": 532500 + }, + { + "epoch": 0.32, + "learning_rate": 5.3804339008840255e-05, + "loss": 0.4744, + "step": 533000 + }, + { + "epoch": 0.32, + "learning_rate": 5.380223904327969e-05, + "loss": 0.4646, + "step": 533500 + }, + { + "epoch": 0.32, + "learning_rate": 5.380013907771912e-05, + "loss": 0.4655, + "step": 534000 + }, + { + "epoch": 0.32, + "learning_rate": 5.379804331208968e-05, + "loss": 0.4674, + "step": 534500 + }, + { + "epoch": 0.32, + "learning_rate": 5.379594754646024e-05, + "loss": 0.4582, + "step": 535000 + }, + { + "epoch": 0.32, + "learning_rate": 5.3793851780830796e-05, + "loss": 0.4678, + "step": 535500 + }, + { + "epoch": 0.32, + "learning_rate": 5.379175181527023e-05, + "loss": 0.4647, + "step": 536000 + }, + { + "epoch": 0.32, + "learning_rate": 5.378965184970966e-05, + "loss": 0.4683, + "step": 536500 + }, + { + "epoch": 0.32, + "learning_rate": 5.3787551884149103e-05, + "loss": 0.4691, + "step": 537000 + }, + { + "epoch": 0.32, + "learning_rate": 5.378545191858854e-05, + "loss": 0.4705, + "step": 537500 + }, + { + "epoch": 0.32, + "learning_rate": 5.378335615295909e-05, + "loss": 0.4659, + "step": 538000 + }, + { + "epoch": 0.32, + "learning_rate": 5.3781256187398524e-05, + "loss": 0.4598, + "step": 538500 + }, + { + "epoch": 0.32, + "learning_rate": 5.3779156221837964e-05, + "loss": 0.4767, + "step": 539000 + }, + { + "epoch": 0.32, + "learning_rate": 5.37770562562774e-05, + "loss": 0.4741, + "step": 539500 + }, + { + "epoch": 0.32, + "learning_rate": 5.377495629071683e-05, + "loss": 0.4665, + "step": 540000 + }, + { + "epoch": 0.32, + "learning_rate": 5.3772856325156264e-05, + "loss": 0.4662, + "step": 540500 + }, + { + "epoch": 0.32, + "learning_rate": 5.37707563595957e-05, + "loss": 0.4796, + "step": 541000 + }, + { + "epoch": 0.32, + "learning_rate": 5.376865639403514e-05, + "loss": 0.473, + "step": 541500 + }, + { + "epoch": 0.32, + "learning_rate": 5.376655642847457e-05, + "loss": 0.4599, + "step": 542000 + }, + { + "epoch": 0.33, + "learning_rate": 5.3764456462914005e-05, + "loss": 0.4725, + "step": 542500 + }, + { + "epoch": 0.33, + "learning_rate": 5.3762356497353445e-05, + "loss": 0.47, + "step": 543000 + }, + { + "epoch": 0.33, + "learning_rate": 5.3760260731724e-05, + "loss": 0.4776, + "step": 543500 + }, + { + "epoch": 0.33, + "learning_rate": 5.375816076616343e-05, + "loss": 0.4765, + "step": 544000 + }, + { + "epoch": 0.33, + "learning_rate": 5.3756060800602866e-05, + "loss": 0.4611, + "step": 544500 + }, + { + "epoch": 0.33, + "learning_rate": 5.3753960835042306e-05, + "loss": 0.4681, + "step": 545000 + }, + { + "epoch": 0.33, + "learning_rate": 5.375186086948174e-05, + "loss": 0.4627, + "step": 545500 + }, + { + "epoch": 0.33, + "learning_rate": 5.374976090392117e-05, + "loss": 0.4707, + "step": 546000 + }, + { + "epoch": 0.33, + "learning_rate": 5.374766093836061e-05, + "loss": 0.4587, + "step": 546500 + }, + { + "epoch": 0.33, + "learning_rate": 5.374556097280005e-05, + "loss": 0.4564, + "step": 547000 + }, + { + "epoch": 0.33, + "learning_rate": 5.37434652071706e-05, + "loss": 0.4745, + "step": 547500 + }, + { + "epoch": 0.33, + "learning_rate": 5.3741365241610034e-05, + "loss": 0.4713, + "step": 548000 + }, + { + "epoch": 0.33, + "learning_rate": 5.3739269475980594e-05, + "loss": 0.4714, + "step": 548500 + }, + { + "epoch": 0.33, + "learning_rate": 5.373716951042003e-05, + "loss": 0.4618, + "step": 549000 + }, + { + "epoch": 0.33, + "learning_rate": 5.373507374479059e-05, + "loss": 0.4738, + "step": 549500 + }, + { + "epoch": 0.33, + "learning_rate": 5.3732973779230015e-05, + "loss": 0.4768, + "step": 550000 + }, + { + "epoch": 0.33, + "learning_rate": 5.3730873813669455e-05, + "loss": 0.4796, + "step": 550500 + }, + { + "epoch": 0.33, + "learning_rate": 5.372877384810889e-05, + "loss": 0.4629, + "step": 551000 + }, + { + "epoch": 0.33, + "learning_rate": 5.372667388254832e-05, + "loss": 0.4726, + "step": 551500 + }, + { + "epoch": 0.33, + "learning_rate": 5.372457391698776e-05, + "loss": 0.4692, + "step": 552000 + }, + { + "epoch": 0.33, + "learning_rate": 5.3722473951427195e-05, + "loss": 0.4632, + "step": 552500 + }, + { + "epoch": 0.33, + "learning_rate": 5.372037398586663e-05, + "loss": 0.4545, + "step": 553000 + }, + { + "epoch": 0.33, + "learning_rate": 5.371827402030607e-05, + "loss": 0.458, + "step": 553500 + }, + { + "epoch": 0.33, + "learning_rate": 5.37161740547455e-05, + "loss": 0.4636, + "step": 554000 + }, + { + "epoch": 0.33, + "learning_rate": 5.3714078289116056e-05, + "loss": 0.4788, + "step": 554500 + }, + { + "epoch": 0.33, + "learning_rate": 5.371197832355549e-05, + "loss": 0.4675, + "step": 555000 + }, + { + "epoch": 0.33, + "learning_rate": 5.370987835799493e-05, + "loss": 0.4563, + "step": 555500 + }, + { + "epoch": 0.33, + "learning_rate": 5.370777839243436e-05, + "loss": 0.4649, + "step": 556000 + }, + { + "epoch": 0.33, + "learning_rate": 5.3705678426873804e-05, + "loss": 0.4569, + "step": 556500 + }, + { + "epoch": 0.33, + "learning_rate": 5.370357846131324e-05, + "loss": 0.4584, + "step": 557000 + }, + { + "epoch": 0.33, + "learning_rate": 5.370147849575267e-05, + "loss": 0.463, + "step": 557500 + }, + { + "epoch": 0.33, + "learning_rate": 5.3699378530192104e-05, + "loss": 0.4636, + "step": 558000 + }, + { + "epoch": 0.33, + "learning_rate": 5.3697282764562664e-05, + "loss": 0.4629, + "step": 558500 + }, + { + "epoch": 0.34, + "learning_rate": 5.36951827990021e-05, + "loss": 0.4681, + "step": 559000 + }, + { + "epoch": 0.34, + "learning_rate": 5.369308283344153e-05, + "loss": 0.4707, + "step": 559500 + }, + { + "epoch": 0.34, + "learning_rate": 5.3690987067812085e-05, + "loss": 0.4832, + "step": 560000 + }, + { + "epoch": 0.34, + "learning_rate": 5.3688887102251525e-05, + "loss": 0.46, + "step": 560500 + }, + { + "epoch": 0.34, + "learning_rate": 5.368678713669096e-05, + "loss": 0.4679, + "step": 561000 + }, + { + "epoch": 0.34, + "learning_rate": 5.368468717113039e-05, + "loss": 0.4642, + "step": 561500 + }, + { + "epoch": 0.34, + "learning_rate": 5.368258720556983e-05, + "loss": 0.4681, + "step": 562000 + }, + { + "epoch": 0.34, + "learning_rate": 5.368048724000926e-05, + "loss": 0.4787, + "step": 562500 + }, + { + "epoch": 0.34, + "learning_rate": 5.36783872744487e-05, + "loss": 0.4644, + "step": 563000 + }, + { + "epoch": 0.34, + "learning_rate": 5.367628730888813e-05, + "loss": 0.4619, + "step": 563500 + }, + { + "epoch": 0.34, + "learning_rate": 5.367419154325869e-05, + "loss": 0.4637, + "step": 564000 + }, + { + "epoch": 0.34, + "learning_rate": 5.3672091577698126e-05, + "loss": 0.4603, + "step": 564500 + }, + { + "epoch": 0.34, + "learning_rate": 5.366999161213756e-05, + "loss": 0.4748, + "step": 565000 + }, + { + "epoch": 0.34, + "learning_rate": 5.366789584650812e-05, + "loss": 0.4696, + "step": 565500 + }, + { + "epoch": 0.34, + "learning_rate": 5.3665795880947554e-05, + "loss": 0.4617, + "step": 566000 + }, + { + "epoch": 0.34, + "learning_rate": 5.366369591538699e-05, + "loss": 0.4513, + "step": 566500 + }, + { + "epoch": 0.34, + "learning_rate": 5.366159594982643e-05, + "loss": 0.4597, + "step": 567000 + }, + { + "epoch": 0.34, + "learning_rate": 5.3659495984265854e-05, + "loss": 0.4643, + "step": 567500 + }, + { + "epoch": 0.34, + "learning_rate": 5.365739601870529e-05, + "loss": 0.4605, + "step": 568000 + }, + { + "epoch": 0.34, + "learning_rate": 5.365529605314473e-05, + "loss": 0.4665, + "step": 568500 + }, + { + "epoch": 0.34, + "learning_rate": 5.365319608758416e-05, + "loss": 0.4612, + "step": 569000 + }, + { + "epoch": 0.34, + "learning_rate": 5.365110032195472e-05, + "loss": 0.4624, + "step": 569500 + }, + { + "epoch": 0.34, + "learning_rate": 5.3649000356394155e-05, + "loss": 0.4799, + "step": 570000 + }, + { + "epoch": 0.34, + "learning_rate": 5.364690039083359e-05, + "loss": 0.469, + "step": 570500 + }, + { + "epoch": 0.34, + "learning_rate": 5.364480042527302e-05, + "loss": 0.4845, + "step": 571000 + }, + { + "epoch": 0.34, + "learning_rate": 5.364270465964358e-05, + "loss": 0.4607, + "step": 571500 + }, + { + "epoch": 0.34, + "learning_rate": 5.364060469408302e-05, + "loss": 0.4676, + "step": 572000 + }, + { + "epoch": 0.34, + "learning_rate": 5.3638508928453576e-05, + "loss": 0.4709, + "step": 572500 + }, + { + "epoch": 0.34, + "learning_rate": 5.363640896289301e-05, + "loss": 0.4588, + "step": 573000 + }, + { + "epoch": 0.34, + "learning_rate": 5.363430899733244e-05, + "loss": 0.4667, + "step": 573500 + }, + { + "epoch": 0.34, + "learning_rate": 5.363220903177188e-05, + "loss": 0.4727, + "step": 574000 + }, + { + "epoch": 0.34, + "learning_rate": 5.363011326614244e-05, + "loss": 0.4633, + "step": 574500 + }, + { + "epoch": 0.34, + "learning_rate": 5.362801330058187e-05, + "loss": 0.4648, + "step": 575000 + }, + { + "epoch": 0.35, + "learning_rate": 5.3625913335021304e-05, + "loss": 0.4565, + "step": 575500 + }, + { + "epoch": 0.35, + "learning_rate": 5.3623813369460744e-05, + "loss": 0.4652, + "step": 576000 + }, + { + "epoch": 0.35, + "learning_rate": 5.362171340390018e-05, + "loss": 0.4649, + "step": 576500 + }, + { + "epoch": 0.35, + "learning_rate": 5.361961343833961e-05, + "loss": 0.4619, + "step": 577000 + }, + { + "epoch": 0.35, + "learning_rate": 5.3617513472779044e-05, + "loss": 0.4586, + "step": 577500 + }, + { + "epoch": 0.35, + "learning_rate": 5.361541350721848e-05, + "loss": 0.4515, + "step": 578000 + }, + { + "epoch": 0.35, + "learning_rate": 5.361331354165792e-05, + "loss": 0.4511, + "step": 578500 + }, + { + "epoch": 0.35, + "learning_rate": 5.361121777602848e-05, + "loss": 0.479, + "step": 579000 + }, + { + "epoch": 0.35, + "learning_rate": 5.3609117810467905e-05, + "loss": 0.4606, + "step": 579500 + }, + { + "epoch": 0.35, + "learning_rate": 5.360701784490734e-05, + "loss": 0.4543, + "step": 580000 + }, + { + "epoch": 0.35, + "learning_rate": 5.360491787934678e-05, + "loss": 0.4661, + "step": 580500 + }, + { + "epoch": 0.35, + "learning_rate": 5.360281791378621e-05, + "loss": 0.47, + "step": 581000 + }, + { + "epoch": 0.35, + "learning_rate": 5.3600717948225646e-05, + "loss": 0.4654, + "step": 581500 + }, + { + "epoch": 0.35, + "learning_rate": 5.3598617982665086e-05, + "loss": 0.4672, + "step": 582000 + }, + { + "epoch": 0.35, + "learning_rate": 5.359651801710452e-05, + "loss": 0.4666, + "step": 582500 + }, + { + "epoch": 0.35, + "learning_rate": 5.359442225147507e-05, + "loss": 0.4505, + "step": 583000 + }, + { + "epoch": 0.35, + "learning_rate": 5.359232648584563e-05, + "loss": 0.4659, + "step": 583500 + }, + { + "epoch": 0.35, + "learning_rate": 5.359022652028507e-05, + "loss": 0.4575, + "step": 584000 + }, + { + "epoch": 0.35, + "learning_rate": 5.35881265547245e-05, + "loss": 0.4572, + "step": 584500 + }, + { + "epoch": 0.35, + "learning_rate": 5.3586026589163934e-05, + "loss": 0.46, + "step": 585000 + }, + { + "epoch": 0.35, + "learning_rate": 5.3583926623603374e-05, + "loss": 0.463, + "step": 585500 + }, + { + "epoch": 0.35, + "learning_rate": 5.358182665804281e-05, + "loss": 0.4617, + "step": 586000 + }, + { + "epoch": 0.35, + "learning_rate": 5.357972669248224e-05, + "loss": 0.4695, + "step": 586500 + }, + { + "epoch": 0.35, + "learning_rate": 5.3577630926852794e-05, + "loss": 0.4552, + "step": 587000 + }, + { + "epoch": 0.35, + "learning_rate": 5.3575530961292235e-05, + "loss": 0.471, + "step": 587500 + }, + { + "epoch": 0.35, + "learning_rate": 5.357343099573167e-05, + "loss": 0.4603, + "step": 588000 + }, + { + "epoch": 0.35, + "learning_rate": 5.35713310301711e-05, + "loss": 0.4541, + "step": 588500 + }, + { + "epoch": 0.35, + "learning_rate": 5.356923106461054e-05, + "loss": 0.4583, + "step": 589000 + }, + { + "epoch": 0.35, + "learning_rate": 5.3567131099049975e-05, + "loss": 0.4849, + "step": 589500 + }, + { + "epoch": 0.35, + "learning_rate": 5.356503113348941e-05, + "loss": 0.4642, + "step": 590000 + }, + { + "epoch": 0.35, + "learning_rate": 5.356293536785996e-05, + "loss": 0.4594, + "step": 590500 + }, + { + "epoch": 0.35, + "learning_rate": 5.35608354022994e-05, + "loss": 0.4704, + "step": 591000 + }, + { + "epoch": 0.35, + "learning_rate": 5.3558735436738836e-05, + "loss": 0.4581, + "step": 591500 + }, + { + "epoch": 0.35, + "learning_rate": 5.355663547117827e-05, + "loss": 0.4688, + "step": 592000 + }, + { + "epoch": 0.36, + "learning_rate": 5.355453970554883e-05, + "loss": 0.4616, + "step": 592500 + }, + { + "epoch": 0.36, + "learning_rate": 5.355243973998826e-05, + "loss": 0.4545, + "step": 593000 + }, + { + "epoch": 0.36, + "learning_rate": 5.35503397744277e-05, + "loss": 0.4565, + "step": 593500 + }, + { + "epoch": 0.36, + "learning_rate": 5.354823980886714e-05, + "loss": 0.4676, + "step": 594000 + }, + { + "epoch": 0.36, + "learning_rate": 5.354613984330657e-05, + "loss": 0.46, + "step": 594500 + }, + { + "epoch": 0.36, + "learning_rate": 5.3544039877746004e-05, + "loss": 0.4544, + "step": 595000 + }, + { + "epoch": 0.36, + "learning_rate": 5.3541939912185444e-05, + "loss": 0.4461, + "step": 595500 + }, + { + "epoch": 0.36, + "learning_rate": 5.3539844146556e-05, + "loss": 0.4712, + "step": 596000 + }, + { + "epoch": 0.36, + "learning_rate": 5.353774418099543e-05, + "loss": 0.467, + "step": 596500 + }, + { + "epoch": 0.36, + "learning_rate": 5.3535644215434865e-05, + "loss": 0.4713, + "step": 597000 + }, + { + "epoch": 0.36, + "learning_rate": 5.3533544249874305e-05, + "loss": 0.4754, + "step": 597500 + }, + { + "epoch": 0.36, + "learning_rate": 5.353144428431374e-05, + "loss": 0.4569, + "step": 598000 + }, + { + "epoch": 0.36, + "learning_rate": 5.352934431875317e-05, + "loss": 0.4629, + "step": 598500 + }, + { + "epoch": 0.36, + "learning_rate": 5.3527244353192605e-05, + "loss": 0.463, + "step": 599000 + }, + { + "epoch": 0.36, + "learning_rate": 5.352514438763204e-05, + "loss": 0.4675, + "step": 599500 + }, + { + "epoch": 0.36, + "learning_rate": 5.35230486220026e-05, + "loss": 0.4593, + "step": 600000 + }, + { + "epoch": 0.36, + "eval_loss": 0.43542608618736267, + "eval_runtime": 1110.5829, + "eval_samples_per_second": 474.273, + "eval_steps_per_second": 79.046, + "step": 600000 + }, + { + "epoch": 0.36, + "learning_rate": 5.352094865644203e-05, + "loss": 0.4673, + "step": 600500 + }, + { + "epoch": 0.36, + "learning_rate": 5.351884869088147e-05, + "loss": 0.468, + "step": 601000 + }, + { + "epoch": 0.36, + "learning_rate": 5.35167487253209e-05, + "loss": 0.4468, + "step": 601500 + }, + { + "epoch": 0.36, + "learning_rate": 5.351465295969146e-05, + "loss": 0.4703, + "step": 602000 + }, + { + "epoch": 0.36, + "learning_rate": 5.35125529941309e-05, + "loss": 0.4562, + "step": 602500 + }, + { + "epoch": 0.36, + "learning_rate": 5.351045302857033e-05, + "loss": 0.4582, + "step": 603000 + }, + { + "epoch": 0.36, + "learning_rate": 5.350835306300977e-05, + "loss": 0.4558, + "step": 603500 + }, + { + "epoch": 0.36, + "learning_rate": 5.350625729738032e-05, + "loss": 0.4562, + "step": 604000 + }, + { + "epoch": 0.36, + "learning_rate": 5.350415733181976e-05, + "loss": 0.4624, + "step": 604500 + }, + { + "epoch": 0.36, + "learning_rate": 5.3502057366259194e-05, + "loss": 0.4595, + "step": 605000 + }, + { + "epoch": 0.36, + "learning_rate": 5.349996160062975e-05, + "loss": 0.4726, + "step": 605500 + }, + { + "epoch": 0.36, + "learning_rate": 5.349786163506918e-05, + "loss": 0.4626, + "step": 606000 + }, + { + "epoch": 0.36, + "learning_rate": 5.349576166950862e-05, + "loss": 0.464, + "step": 606500 + }, + { + "epoch": 0.36, + "learning_rate": 5.3493661703948055e-05, + "loss": 0.4572, + "step": 607000 + }, + { + "epoch": 0.36, + "learning_rate": 5.349156173838749e-05, + "loss": 0.46, + "step": 607500 + }, + { + "epoch": 0.36, + "learning_rate": 5.348946177282693e-05, + "loss": 0.4675, + "step": 608000 + }, + { + "epoch": 0.36, + "learning_rate": 5.348736180726636e-05, + "loss": 0.4633, + "step": 608500 + }, + { + "epoch": 0.37, + "learning_rate": 5.3485261841705795e-05, + "loss": 0.4541, + "step": 609000 + }, + { + "epoch": 0.37, + "learning_rate": 5.3483166076076356e-05, + "loss": 0.4592, + "step": 609500 + }, + { + "epoch": 0.37, + "learning_rate": 5.348107031044691e-05, + "loss": 0.4706, + "step": 610000 + }, + { + "epoch": 0.37, + "learning_rate": 5.347897034488634e-05, + "loss": 0.4518, + "step": 610500 + }, + { + "epoch": 0.37, + "learning_rate": 5.3476870379325776e-05, + "loss": 0.4584, + "step": 611000 + }, + { + "epoch": 0.37, + "learning_rate": 5.3474770413765217e-05, + "loss": 0.466, + "step": 611500 + }, + { + "epoch": 0.37, + "learning_rate": 5.347267044820465e-05, + "loss": 0.4534, + "step": 612000 + }, + { + "epoch": 0.37, + "learning_rate": 5.3470570482644083e-05, + "loss": 0.4687, + "step": 612500 + }, + { + "epoch": 0.37, + "learning_rate": 5.3468470517083524e-05, + "loss": 0.4647, + "step": 613000 + }, + { + "epoch": 0.37, + "learning_rate": 5.346637055152295e-05, + "loss": 0.4613, + "step": 613500 + }, + { + "epoch": 0.37, + "learning_rate": 5.346427478589351e-05, + "loss": 0.4587, + "step": 614000 + }, + { + "epoch": 0.37, + "learning_rate": 5.3462174820332944e-05, + "loss": 0.4569, + "step": 614500 + }, + { + "epoch": 0.37, + "learning_rate": 5.3460074854772384e-05, + "loss": 0.4707, + "step": 615000 + }, + { + "epoch": 0.37, + "learning_rate": 5.345797488921182e-05, + "loss": 0.464, + "step": 615500 + }, + { + "epoch": 0.37, + "learning_rate": 5.345587492365125e-05, + "loss": 0.4617, + "step": 616000 + }, + { + "epoch": 0.37, + "learning_rate": 5.345377915802181e-05, + "loss": 0.4549, + "step": 616500 + }, + { + "epoch": 0.37, + "learning_rate": 5.3451679192461245e-05, + "loss": 0.4706, + "step": 617000 + }, + { + "epoch": 0.37, + "learning_rate": 5.344957922690068e-05, + "loss": 0.4793, + "step": 617500 + }, + { + "epoch": 0.37, + "learning_rate": 5.344747926134012e-05, + "loss": 0.4565, + "step": 618000 + }, + { + "epoch": 0.37, + "learning_rate": 5.3445379295779545e-05, + "loss": 0.4618, + "step": 618500 + }, + { + "epoch": 0.37, + "learning_rate": 5.344327933021898e-05, + "loss": 0.4499, + "step": 619000 + }, + { + "epoch": 0.37, + "learning_rate": 5.344117936465842e-05, + "loss": 0.4606, + "step": 619500 + }, + { + "epoch": 0.37, + "learning_rate": 5.343908359902898e-05, + "loss": 0.4765, + "step": 620000 + }, + { + "epoch": 0.37, + "learning_rate": 5.3436983633468406e-05, + "loss": 0.4644, + "step": 620500 + }, + { + "epoch": 0.37, + "learning_rate": 5.343488366790784e-05, + "loss": 0.4584, + "step": 621000 + }, + { + "epoch": 0.37, + "learning_rate": 5.343278370234728e-05, + "loss": 0.4528, + "step": 621500 + }, + { + "epoch": 0.37, + "learning_rate": 5.343068373678671e-05, + "loss": 0.4561, + "step": 622000 + }, + { + "epoch": 0.37, + "learning_rate": 5.3428587971157274e-05, + "loss": 0.459, + "step": 622500 + }, + { + "epoch": 0.37, + "learning_rate": 5.342648800559671e-05, + "loss": 0.4615, + "step": 623000 + }, + { + "epoch": 0.37, + "learning_rate": 5.342438804003614e-05, + "loss": 0.4616, + "step": 623500 + }, + { + "epoch": 0.37, + "learning_rate": 5.3422288074475574e-05, + "loss": 0.4645, + "step": 624000 + }, + { + "epoch": 0.37, + "learning_rate": 5.3420188108915014e-05, + "loss": 0.4638, + "step": 624500 + }, + { + "epoch": 0.37, + "learning_rate": 5.341808814335445e-05, + "loss": 0.4588, + "step": 625000 + }, + { + "epoch": 0.38, + "learning_rate": 5.341598817779388e-05, + "loss": 0.4619, + "step": 625500 + }, + { + "epoch": 0.38, + "learning_rate": 5.341388821223332e-05, + "loss": 0.4573, + "step": 626000 + }, + { + "epoch": 0.38, + "learning_rate": 5.3411792446603875e-05, + "loss": 0.4668, + "step": 626500 + }, + { + "epoch": 0.38, + "learning_rate": 5.340969248104331e-05, + "loss": 0.4602, + "step": 627000 + }, + { + "epoch": 0.38, + "learning_rate": 5.340759251548274e-05, + "loss": 0.4508, + "step": 627500 + }, + { + "epoch": 0.38, + "learning_rate": 5.340549254992218e-05, + "loss": 0.4435, + "step": 628000 + }, + { + "epoch": 0.38, + "learning_rate": 5.3403392584361616e-05, + "loss": 0.4509, + "step": 628500 + }, + { + "epoch": 0.38, + "learning_rate": 5.3401301018663296e-05, + "loss": 0.4595, + "step": 629000 + }, + { + "epoch": 0.38, + "learning_rate": 5.339920105310273e-05, + "loss": 0.4596, + "step": 629500 + }, + { + "epoch": 0.38, + "learning_rate": 5.339710108754216e-05, + "loss": 0.4466, + "step": 630000 + }, + { + "epoch": 0.38, + "learning_rate": 5.3395001121981597e-05, + "loss": 0.4616, + "step": 630500 + }, + { + "epoch": 0.38, + "learning_rate": 5.339290115642103e-05, + "loss": 0.4483, + "step": 631000 + }, + { + "epoch": 0.38, + "learning_rate": 5.339080119086047e-05, + "loss": 0.4559, + "step": 631500 + }, + { + "epoch": 0.38, + "learning_rate": 5.338870542523103e-05, + "loss": 0.4635, + "step": 632000 + }, + { + "epoch": 0.38, + "learning_rate": 5.338660545967046e-05, + "loss": 0.444, + "step": 632500 + }, + { + "epoch": 0.38, + "learning_rate": 5.338450549410989e-05, + "loss": 0.4643, + "step": 633000 + }, + { + "epoch": 0.38, + "learning_rate": 5.338240552854933e-05, + "loss": 0.4559, + "step": 633500 + }, + { + "epoch": 0.38, + "learning_rate": 5.3380305562988764e-05, + "loss": 0.4588, + "step": 634000 + }, + { + "epoch": 0.38, + "learning_rate": 5.33782055974282e-05, + "loss": 0.4573, + "step": 634500 + }, + { + "epoch": 0.38, + "learning_rate": 5.337610563186764e-05, + "loss": 0.4722, + "step": 635000 + }, + { + "epoch": 0.38, + "learning_rate": 5.337400566630707e-05, + "loss": 0.4547, + "step": 635500 + }, + { + "epoch": 0.38, + "learning_rate": 5.3371909900677625e-05, + "loss": 0.4554, + "step": 636000 + }, + { + "epoch": 0.38, + "learning_rate": 5.3369809935117065e-05, + "loss": 0.4545, + "step": 636500 + }, + { + "epoch": 0.38, + "learning_rate": 5.33677099695565e-05, + "loss": 0.4566, + "step": 637000 + }, + { + "epoch": 0.38, + "learning_rate": 5.336561000399593e-05, + "loss": 0.4576, + "step": 637500 + }, + { + "epoch": 0.38, + "learning_rate": 5.336351003843537e-05, + "loss": 0.4555, + "step": 638000 + }, + { + "epoch": 0.38, + "learning_rate": 5.3361414272805926e-05, + "loss": 0.4577, + "step": 638500 + }, + { + "epoch": 0.38, + "learning_rate": 5.335931430724536e-05, + "loss": 0.4507, + "step": 639000 + }, + { + "epoch": 0.38, + "learning_rate": 5.335721434168479e-05, + "loss": 0.4561, + "step": 639500 + }, + { + "epoch": 0.38, + "learning_rate": 5.335511437612423e-05, + "loss": 0.4609, + "step": 640000 + }, + { + "epoch": 0.38, + "learning_rate": 5.335301861049479e-05, + "loss": 0.461, + "step": 640500 + }, + { + "epoch": 0.38, + "learning_rate": 5.335091864493422e-05, + "loss": 0.4559, + "step": 641000 + }, + { + "epoch": 0.38, + "learning_rate": 5.3348818679373654e-05, + "loss": 0.463, + "step": 641500 + }, + { + "epoch": 0.38, + "learning_rate": 5.3346718713813094e-05, + "loss": 0.4551, + "step": 642000 + }, + { + "epoch": 0.39, + "learning_rate": 5.334462294818365e-05, + "loss": 0.4758, + "step": 642500 + }, + { + "epoch": 0.39, + "learning_rate": 5.334252298262308e-05, + "loss": 0.4667, + "step": 643000 + }, + { + "epoch": 0.39, + "learning_rate": 5.334042301706252e-05, + "loss": 0.4464, + "step": 643500 + }, + { + "epoch": 0.39, + "learning_rate": 5.3338323051501955e-05, + "loss": 0.4622, + "step": 644000 + }, + { + "epoch": 0.39, + "learning_rate": 5.333622308594139e-05, + "loss": 0.4475, + "step": 644500 + }, + { + "epoch": 0.39, + "learning_rate": 5.333412732031194e-05, + "loss": 0.4538, + "step": 645000 + }, + { + "epoch": 0.39, + "learning_rate": 5.333202735475138e-05, + "loss": 0.454, + "step": 645500 + }, + { + "epoch": 0.39, + "learning_rate": 5.3329927389190815e-05, + "loss": 0.4572, + "step": 646000 + }, + { + "epoch": 0.39, + "learning_rate": 5.332782742363025e-05, + "loss": 0.4578, + "step": 646500 + }, + { + "epoch": 0.39, + "learning_rate": 5.332572745806969e-05, + "loss": 0.4602, + "step": 647000 + }, + { + "epoch": 0.39, + "learning_rate": 5.332362749250912e-05, + "loss": 0.4535, + "step": 647500 + }, + { + "epoch": 0.39, + "learning_rate": 5.3321527526948556e-05, + "loss": 0.4637, + "step": 648000 + }, + { + "epoch": 0.39, + "learning_rate": 5.3319427561387996e-05, + "loss": 0.4508, + "step": 648500 + }, + { + "epoch": 0.39, + "learning_rate": 5.331733179575855e-05, + "loss": 0.45, + "step": 649000 + }, + { + "epoch": 0.39, + "learning_rate": 5.331523183019798e-05, + "loss": 0.4555, + "step": 649500 + }, + { + "epoch": 0.39, + "learning_rate": 5.331313186463742e-05, + "loss": 0.4494, + "step": 650000 + }, + { + "epoch": 0.39, + "learning_rate": 5.331103189907686e-05, + "loss": 0.4599, + "step": 650500 + }, + { + "epoch": 0.39, + "learning_rate": 5.330893613344741e-05, + "loss": 0.4592, + "step": 651000 + }, + { + "epoch": 0.39, + "learning_rate": 5.3306836167886844e-05, + "loss": 0.4575, + "step": 651500 + }, + { + "epoch": 0.39, + "learning_rate": 5.3304736202326284e-05, + "loss": 0.4576, + "step": 652000 + }, + { + "epoch": 0.39, + "learning_rate": 5.330263623676572e-05, + "loss": 0.4569, + "step": 652500 + }, + { + "epoch": 0.39, + "learning_rate": 5.330054047113627e-05, + "loss": 0.4508, + "step": 653000 + }, + { + "epoch": 0.39, + "learning_rate": 5.3298440505575705e-05, + "loss": 0.4563, + "step": 653500 + }, + { + "epoch": 0.39, + "learning_rate": 5.3296340540015145e-05, + "loss": 0.4494, + "step": 654000 + }, + { + "epoch": 0.39, + "learning_rate": 5.329424057445458e-05, + "loss": 0.4573, + "step": 654500 + }, + { + "epoch": 0.39, + "learning_rate": 5.329214060889401e-05, + "loss": 0.4545, + "step": 655000 + }, + { + "epoch": 0.39, + "learning_rate": 5.3290044843264565e-05, + "loss": 0.4551, + "step": 655500 + }, + { + "epoch": 0.39, + "learning_rate": 5.3287944877704006e-05, + "loss": 0.4568, + "step": 656000 + }, + { + "epoch": 0.39, + "learning_rate": 5.328584491214344e-05, + "loss": 0.4645, + "step": 656500 + }, + { + "epoch": 0.39, + "learning_rate": 5.328374914651399e-05, + "loss": 0.4496, + "step": 657000 + }, + { + "epoch": 0.39, + "learning_rate": 5.328164918095343e-05, + "loss": 0.4406, + "step": 657500 + }, + { + "epoch": 0.39, + "learning_rate": 5.3279549215392866e-05, + "loss": 0.4547, + "step": 658000 + }, + { + "epoch": 0.39, + "learning_rate": 5.32774492498323e-05, + "loss": 0.4476, + "step": 658500 + }, + { + "epoch": 0.4, + "learning_rate": 5.327534928427174e-05, + "loss": 0.4633, + "step": 659000 + }, + { + "epoch": 0.4, + "learning_rate": 5.3273249318711174e-05, + "loss": 0.4662, + "step": 659500 + }, + { + "epoch": 0.4, + "learning_rate": 5.327114935315061e-05, + "loss": 0.4619, + "step": 660000 + }, + { + "epoch": 0.4, + "learning_rate": 5.326904938759005e-05, + "loss": 0.4683, + "step": 660500 + }, + { + "epoch": 0.4, + "learning_rate": 5.32669536219606e-05, + "loss": 0.4612, + "step": 661000 + }, + { + "epoch": 0.4, + "learning_rate": 5.3264853656400034e-05, + "loss": 0.4559, + "step": 661500 + }, + { + "epoch": 0.4, + "learning_rate": 5.326275369083947e-05, + "loss": 0.4529, + "step": 662000 + }, + { + "epoch": 0.4, + "learning_rate": 5.326065372527891e-05, + "loss": 0.4654, + "step": 662500 + }, + { + "epoch": 0.4, + "learning_rate": 5.325855375971834e-05, + "loss": 0.4486, + "step": 663000 + }, + { + "epoch": 0.4, + "learning_rate": 5.3256453794157775e-05, + "loss": 0.4569, + "step": 663500 + }, + { + "epoch": 0.4, + "learning_rate": 5.3254353828597215e-05, + "loss": 0.4553, + "step": 664000 + }, + { + "epoch": 0.4, + "learning_rate": 5.325225386303664e-05, + "loss": 0.4501, + "step": 664500 + }, + { + "epoch": 0.4, + "learning_rate": 5.32501580974072e-05, + "loss": 0.4534, + "step": 665000 + }, + { + "epoch": 0.4, + "learning_rate": 5.3248062331777756e-05, + "loss": 0.4603, + "step": 665500 + }, + { + "epoch": 0.4, + "learning_rate": 5.3245962366217196e-05, + "loss": 0.4658, + "step": 666000 + }, + { + "epoch": 0.4, + "learning_rate": 5.324386240065663e-05, + "loss": 0.4492, + "step": 666500 + }, + { + "epoch": 0.4, + "learning_rate": 5.324176243509606e-05, + "loss": 0.4495, + "step": 667000 + }, + { + "epoch": 0.4, + "learning_rate": 5.32396624695355e-05, + "loss": 0.4616, + "step": 667500 + }, + { + "epoch": 0.4, + "learning_rate": 5.323756250397494e-05, + "loss": 0.462, + "step": 668000 + }, + { + "epoch": 0.4, + "learning_rate": 5.323546673834549e-05, + "loss": 0.4633, + "step": 668500 + }, + { + "epoch": 0.4, + "learning_rate": 5.3233366772784924e-05, + "loss": 0.4594, + "step": 669000 + }, + { + "epoch": 0.4, + "learning_rate": 5.3231266807224364e-05, + "loss": 0.4669, + "step": 669500 + }, + { + "epoch": 0.4, + "learning_rate": 5.32291668416638e-05, + "loss": 0.4514, + "step": 670000 + }, + { + "epoch": 0.4, + "learning_rate": 5.322706687610323e-05, + "loss": 0.4523, + "step": 670500 + }, + { + "epoch": 0.4, + "learning_rate": 5.322496691054267e-05, + "loss": 0.4481, + "step": 671000 + }, + { + "epoch": 0.4, + "learning_rate": 5.32228669449821e-05, + "loss": 0.4509, + "step": 671500 + }, + { + "epoch": 0.4, + "learning_rate": 5.322077117935266e-05, + "loss": 0.4422, + "step": 672000 + }, + { + "epoch": 0.4, + "learning_rate": 5.321867121379209e-05, + "loss": 0.4521, + "step": 672500 + }, + { + "epoch": 0.4, + "learning_rate": 5.321657124823153e-05, + "loss": 0.4576, + "step": 673000 + }, + { + "epoch": 0.4, + "learning_rate": 5.3214471282670965e-05, + "loss": 0.4526, + "step": 673500 + }, + { + "epoch": 0.4, + "learning_rate": 5.321237551704152e-05, + "loss": 0.4625, + "step": 674000 + }, + { + "epoch": 0.4, + "learning_rate": 5.321027555148096e-05, + "loss": 0.4474, + "step": 674500 + }, + { + "epoch": 0.4, + "learning_rate": 5.320817558592039e-05, + "loss": 0.4539, + "step": 675000 + }, + { + "epoch": 0.4, + "learning_rate": 5.3206075620359826e-05, + "loss": 0.4556, + "step": 675500 + }, + { + "epoch": 0.41, + "learning_rate": 5.3203975654799266e-05, + "loss": 0.4568, + "step": 676000 + }, + { + "epoch": 0.41, + "learning_rate": 5.320187568923869e-05, + "loss": 0.4549, + "step": 676500 + }, + { + "epoch": 0.41, + "learning_rate": 5.3199775723678126e-05, + "loss": 0.4563, + "step": 677000 + }, + { + "epoch": 0.41, + "learning_rate": 5.3197675758117567e-05, + "loss": 0.4653, + "step": 677500 + }, + { + "epoch": 0.41, + "learning_rate": 5.319558419241924e-05, + "loss": 0.4474, + "step": 678000 + }, + { + "epoch": 0.41, + "learning_rate": 5.319348422685868e-05, + "loss": 0.4481, + "step": 678500 + }, + { + "epoch": 0.41, + "learning_rate": 5.3191384261298114e-05, + "loss": 0.4532, + "step": 679000 + }, + { + "epoch": 0.41, + "learning_rate": 5.318928429573755e-05, + "loss": 0.4522, + "step": 679500 + }, + { + "epoch": 0.41, + "learning_rate": 5.318718433017699e-05, + "loss": 0.4516, + "step": 680000 + }, + { + "epoch": 0.41, + "learning_rate": 5.318508856454754e-05, + "loss": 0.4476, + "step": 680500 + }, + { + "epoch": 0.41, + "learning_rate": 5.3182988598986975e-05, + "loss": 0.4529, + "step": 681000 + }, + { + "epoch": 0.41, + "learning_rate": 5.318089283335753e-05, + "loss": 0.4511, + "step": 681500 + }, + { + "epoch": 0.41, + "learning_rate": 5.317879706772809e-05, + "loss": 0.4487, + "step": 682000 + }, + { + "epoch": 0.41, + "learning_rate": 5.317669710216752e-05, + "loss": 0.4599, + "step": 682500 + }, + { + "epoch": 0.41, + "learning_rate": 5.3174597136606956e-05, + "loss": 0.4508, + "step": 683000 + }, + { + "epoch": 0.41, + "learning_rate": 5.317249717104639e-05, + "loss": 0.459, + "step": 683500 + }, + { + "epoch": 0.41, + "learning_rate": 5.317039720548583e-05, + "loss": 0.448, + "step": 684000 + }, + { + "epoch": 0.41, + "learning_rate": 5.316829723992526e-05, + "loss": 0.4419, + "step": 684500 + }, + { + "epoch": 0.41, + "learning_rate": 5.3166197274364696e-05, + "loss": 0.448, + "step": 685000 + }, + { + "epoch": 0.41, + "learning_rate": 5.3164097308804136e-05, + "loss": 0.4495, + "step": 685500 + }, + { + "epoch": 0.41, + "learning_rate": 5.316199734324357e-05, + "loss": 0.4488, + "step": 686000 + }, + { + "epoch": 0.41, + "learning_rate": 5.3159897377683e-05, + "loss": 0.4545, + "step": 686500 + }, + { + "epoch": 0.41, + "learning_rate": 5.3157797412122444e-05, + "loss": 0.4565, + "step": 687000 + }, + { + "epoch": 0.41, + "learning_rate": 5.315569744656188e-05, + "loss": 0.4453, + "step": 687500 + }, + { + "epoch": 0.41, + "learning_rate": 5.315359748100131e-05, + "loss": 0.4494, + "step": 688000 + }, + { + "epoch": 0.41, + "learning_rate": 5.3151497515440744e-05, + "loss": 0.4491, + "step": 688500 + }, + { + "epoch": 0.41, + "learning_rate": 5.3149401749811304e-05, + "loss": 0.4695, + "step": 689000 + }, + { + "epoch": 0.41, + "learning_rate": 5.314730178425074e-05, + "loss": 0.4485, + "step": 689500 + }, + { + "epoch": 0.41, + "learning_rate": 5.314520181869018e-05, + "loss": 0.4554, + "step": 690000 + }, + { + "epoch": 0.41, + "learning_rate": 5.3143101853129605e-05, + "loss": 0.4438, + "step": 690500 + }, + { + "epoch": 0.41, + "learning_rate": 5.314100188756904e-05, + "loss": 0.4545, + "step": 691000 + }, + { + "epoch": 0.41, + "learning_rate": 5.313890192200848e-05, + "loss": 0.4571, + "step": 691500 + }, + { + "epoch": 0.41, + "learning_rate": 5.313680195644791e-05, + "loss": 0.458, + "step": 692000 + }, + { + "epoch": 0.42, + "learning_rate": 5.3134701990887345e-05, + "loss": 0.454, + "step": 692500 + }, + { + "epoch": 0.42, + "learning_rate": 5.31326062252579e-05, + "loss": 0.4603, + "step": 693000 + }, + { + "epoch": 0.42, + "learning_rate": 5.3130510459628466e-05, + "loss": 0.453, + "step": 693500 + }, + { + "epoch": 0.42, + "learning_rate": 5.31284104940679e-05, + "loss": 0.4468, + "step": 694000 + }, + { + "epoch": 0.42, + "learning_rate": 5.312631052850733e-05, + "loss": 0.4517, + "step": 694500 + }, + { + "epoch": 0.42, + "learning_rate": 5.312421056294677e-05, + "loss": 0.452, + "step": 695000 + }, + { + "epoch": 0.42, + "learning_rate": 5.312211479731733e-05, + "loss": 0.4458, + "step": 695500 + }, + { + "epoch": 0.42, + "learning_rate": 5.312001483175676e-05, + "loss": 0.4548, + "step": 696000 + }, + { + "epoch": 0.42, + "learning_rate": 5.3117914866196194e-05, + "loss": 0.4561, + "step": 696500 + }, + { + "epoch": 0.42, + "learning_rate": 5.3115814900635634e-05, + "loss": 0.4511, + "step": 697000 + }, + { + "epoch": 0.42, + "learning_rate": 5.311371493507506e-05, + "loss": 0.4478, + "step": 697500 + }, + { + "epoch": 0.42, + "learning_rate": 5.3111614969514494e-05, + "loss": 0.4571, + "step": 698000 + }, + { + "epoch": 0.42, + "learning_rate": 5.3109515003953934e-05, + "loss": 0.4482, + "step": 698500 + }, + { + "epoch": 0.42, + "learning_rate": 5.310741503839337e-05, + "loss": 0.4553, + "step": 699000 + }, + { + "epoch": 0.42, + "learning_rate": 5.31053150728328e-05, + "loss": 0.4543, + "step": 699500 + }, + { + "epoch": 0.42, + "learning_rate": 5.310321510727224e-05, + "loss": 0.4582, + "step": 700000 + }, + { + "epoch": 0.42, + "eval_loss": 0.4269881248474121, + "eval_runtime": 1108.4352, + "eval_samples_per_second": 475.192, + "eval_steps_per_second": 79.199, + "step": 700000 + }, + { + "epoch": 0.42, + "learning_rate": 5.3101115141711675e-05, + "loss": 0.4571, + "step": 700500 + }, + { + "epoch": 0.42, + "learning_rate": 5.309901937608223e-05, + "loss": 0.4621, + "step": 701000 + }, + { + "epoch": 0.42, + "learning_rate": 5.309691941052167e-05, + "loss": 0.4606, + "step": 701500 + }, + { + "epoch": 0.42, + "learning_rate": 5.30948194449611e-05, + "loss": 0.4538, + "step": 702000 + }, + { + "epoch": 0.42, + "learning_rate": 5.3092719479400536e-05, + "loss": 0.4627, + "step": 702500 + }, + { + "epoch": 0.42, + "learning_rate": 5.3090619513839976e-05, + "loss": 0.4532, + "step": 703000 + }, + { + "epoch": 0.42, + "learning_rate": 5.308851954827941e-05, + "loss": 0.4618, + "step": 703500 + }, + { + "epoch": 0.42, + "learning_rate": 5.308642378264996e-05, + "loss": 0.455, + "step": 704000 + }, + { + "epoch": 0.42, + "learning_rate": 5.3084323817089396e-05, + "loss": 0.4536, + "step": 704500 + }, + { + "epoch": 0.42, + "learning_rate": 5.3082223851528837e-05, + "loss": 0.4464, + "step": 705000 + }, + { + "epoch": 0.42, + "learning_rate": 5.308012388596827e-05, + "loss": 0.4518, + "step": 705500 + }, + { + "epoch": 0.42, + "learning_rate": 5.3078023920407703e-05, + "loss": 0.4437, + "step": 706000 + }, + { + "epoch": 0.42, + "learning_rate": 5.3075923954847144e-05, + "loss": 0.4384, + "step": 706500 + }, + { + "epoch": 0.42, + "learning_rate": 5.307382398928658e-05, + "loss": 0.4546, + "step": 707000 + }, + { + "epoch": 0.42, + "learning_rate": 5.307172402372601e-05, + "loss": 0.4364, + "step": 707500 + }, + { + "epoch": 0.42, + "learning_rate": 5.3069628258096564e-05, + "loss": 0.4505, + "step": 708000 + }, + { + "epoch": 0.42, + "learning_rate": 5.3067532492467125e-05, + "loss": 0.4478, + "step": 708500 + }, + { + "epoch": 0.43, + "learning_rate": 5.306543252690656e-05, + "loss": 0.4562, + "step": 709000 + }, + { + "epoch": 0.43, + "learning_rate": 5.306333256134599e-05, + "loss": 0.4514, + "step": 709500 + }, + { + "epoch": 0.43, + "learning_rate": 5.306123259578543e-05, + "loss": 0.454, + "step": 710000 + }, + { + "epoch": 0.43, + "learning_rate": 5.3059132630224865e-05, + "loss": 0.4432, + "step": 710500 + }, + { + "epoch": 0.43, + "learning_rate": 5.30570326646643e-05, + "loss": 0.4397, + "step": 711000 + }, + { + "epoch": 0.43, + "learning_rate": 5.305493269910374e-05, + "loss": 0.4571, + "step": 711500 + }, + { + "epoch": 0.43, + "learning_rate": 5.305283273354317e-05, + "loss": 0.4442, + "step": 712000 + }, + { + "epoch": 0.43, + "learning_rate": 5.3050736967913726e-05, + "loss": 0.445, + "step": 712500 + }, + { + "epoch": 0.43, + "learning_rate": 5.304864120228428e-05, + "loss": 0.4491, + "step": 713000 + }, + { + "epoch": 0.43, + "learning_rate": 5.304654123672371e-05, + "loss": 0.4447, + "step": 713500 + }, + { + "epoch": 0.43, + "learning_rate": 5.304444127116315e-05, + "loss": 0.4656, + "step": 714000 + }, + { + "epoch": 0.43, + "learning_rate": 5.3042341305602587e-05, + "loss": 0.4451, + "step": 714500 + }, + { + "epoch": 0.43, + "learning_rate": 5.304024134004202e-05, + "loss": 0.444, + "step": 715000 + }, + { + "epoch": 0.43, + "learning_rate": 5.303814137448146e-05, + "loss": 0.4479, + "step": 715500 + }, + { + "epoch": 0.43, + "learning_rate": 5.3036041408920894e-05, + "loss": 0.4543, + "step": 716000 + }, + { + "epoch": 0.43, + "learning_rate": 5.303394564329145e-05, + "loss": 0.4549, + "step": 716500 + }, + { + "epoch": 0.43, + "learning_rate": 5.303184567773089e-05, + "loss": 0.4589, + "step": 717000 + }, + { + "epoch": 0.43, + "learning_rate": 5.302974571217032e-05, + "loss": 0.4537, + "step": 717500 + }, + { + "epoch": 0.43, + "learning_rate": 5.3027645746609754e-05, + "loss": 0.4489, + "step": 718000 + }, + { + "epoch": 0.43, + "learning_rate": 5.3025545781049195e-05, + "loss": 0.4559, + "step": 718500 + }, + { + "epoch": 0.43, + "learning_rate": 5.302344581548863e-05, + "loss": 0.4496, + "step": 719000 + }, + { + "epoch": 0.43, + "learning_rate": 5.302134584992806e-05, + "loss": 0.4671, + "step": 719500 + }, + { + "epoch": 0.43, + "learning_rate": 5.3019245884367495e-05, + "loss": 0.4498, + "step": 720000 + }, + { + "epoch": 0.43, + "learning_rate": 5.3017150118738055e-05, + "loss": 0.4406, + "step": 720500 + }, + { + "epoch": 0.43, + "learning_rate": 5.301505015317749e-05, + "loss": 0.4509, + "step": 721000 + }, + { + "epoch": 0.43, + "learning_rate": 5.301295018761692e-05, + "loss": 0.4578, + "step": 721500 + }, + { + "epoch": 0.43, + "learning_rate": 5.301085022205636e-05, + "loss": 0.4481, + "step": 722000 + }, + { + "epoch": 0.43, + "learning_rate": 5.3008754456426916e-05, + "loss": 0.4471, + "step": 722500 + }, + { + "epoch": 0.43, + "learning_rate": 5.300665449086635e-05, + "loss": 0.4489, + "step": 723000 + }, + { + "epoch": 0.43, + "learning_rate": 5.300455452530578e-05, + "loss": 0.4407, + "step": 723500 + }, + { + "epoch": 0.43, + "learning_rate": 5.300245455974522e-05, + "loss": 0.4565, + "step": 724000 + }, + { + "epoch": 0.43, + "learning_rate": 5.300035879411578e-05, + "loss": 0.4565, + "step": 724500 + }, + { + "epoch": 0.43, + "learning_rate": 5.299825882855521e-05, + "loss": 0.447, + "step": 725000 + }, + { + "epoch": 0.43, + "learning_rate": 5.299615886299465e-05, + "loss": 0.4546, + "step": 725500 + }, + { + "epoch": 0.44, + "learning_rate": 5.2994063097365204e-05, + "loss": 0.4453, + "step": 726000 + }, + { + "epoch": 0.44, + "learning_rate": 5.299196313180464e-05, + "loss": 0.4492, + "step": 726500 + }, + { + "epoch": 0.44, + "learning_rate": 5.298986316624407e-05, + "loss": 0.4428, + "step": 727000 + }, + { + "epoch": 0.44, + "learning_rate": 5.298776320068351e-05, + "loss": 0.4514, + "step": 727500 + }, + { + "epoch": 0.44, + "learning_rate": 5.2985663235122945e-05, + "loss": 0.4514, + "step": 728000 + }, + { + "epoch": 0.44, + "learning_rate": 5.298356326956238e-05, + "loss": 0.4422, + "step": 728500 + }, + { + "epoch": 0.44, + "learning_rate": 5.298146330400182e-05, + "loss": 0.441, + "step": 729000 + }, + { + "epoch": 0.44, + "learning_rate": 5.2979363338441245e-05, + "loss": 0.4472, + "step": 729500 + }, + { + "epoch": 0.44, + "learning_rate": 5.2977267572811806e-05, + "loss": 0.4494, + "step": 730000 + }, + { + "epoch": 0.44, + "learning_rate": 5.297516760725124e-05, + "loss": 0.4444, + "step": 730500 + }, + { + "epoch": 0.44, + "learning_rate": 5.297306764169068e-05, + "loss": 0.4507, + "step": 731000 + }, + { + "epoch": 0.44, + "learning_rate": 5.297097187606123e-05, + "loss": 0.4621, + "step": 731500 + }, + { + "epoch": 0.44, + "learning_rate": 5.2968871910500666e-05, + "loss": 0.4545, + "step": 732000 + }, + { + "epoch": 0.44, + "learning_rate": 5.2966771944940106e-05, + "loss": 0.4473, + "step": 732500 + }, + { + "epoch": 0.44, + "learning_rate": 5.296467197937954e-05, + "loss": 0.4438, + "step": 733000 + }, + { + "epoch": 0.44, + "learning_rate": 5.296257201381897e-05, + "loss": 0.4627, + "step": 733500 + }, + { + "epoch": 0.44, + "learning_rate": 5.296047204825841e-05, + "loss": 0.4542, + "step": 734000 + }, + { + "epoch": 0.44, + "learning_rate": 5.295837208269784e-05, + "loss": 0.4528, + "step": 734500 + }, + { + "epoch": 0.44, + "learning_rate": 5.2956272117137274e-05, + "loss": 0.44, + "step": 735000 + }, + { + "epoch": 0.44, + "learning_rate": 5.2954172151576714e-05, + "loss": 0.4465, + "step": 735500 + }, + { + "epoch": 0.44, + "learning_rate": 5.2952076385947274e-05, + "loss": 0.4552, + "step": 736000 + }, + { + "epoch": 0.44, + "learning_rate": 5.29499764203867e-05, + "loss": 0.4448, + "step": 736500 + }, + { + "epoch": 0.44, + "learning_rate": 5.2947876454826134e-05, + "loss": 0.4532, + "step": 737000 + }, + { + "epoch": 0.44, + "learning_rate": 5.2945776489265575e-05, + "loss": 0.4518, + "step": 737500 + }, + { + "epoch": 0.44, + "learning_rate": 5.2943684923567255e-05, + "loss": 0.4466, + "step": 738000 + }, + { + "epoch": 0.44, + "learning_rate": 5.294158495800669e-05, + "loss": 0.4488, + "step": 738500 + }, + { + "epoch": 0.44, + "learning_rate": 5.293948499244612e-05, + "loss": 0.4484, + "step": 739000 + }, + { + "epoch": 0.44, + "learning_rate": 5.293738502688556e-05, + "loss": 0.4589, + "step": 739500 + }, + { + "epoch": 0.44, + "learning_rate": 5.2935285061324996e-05, + "loss": 0.4436, + "step": 740000 + }, + { + "epoch": 0.44, + "learning_rate": 5.293318929569555e-05, + "loss": 0.4546, + "step": 740500 + }, + { + "epoch": 0.44, + "learning_rate": 5.293108933013498e-05, + "loss": 0.4572, + "step": 741000 + }, + { + "epoch": 0.44, + "learning_rate": 5.292898936457442e-05, + "loss": 0.4454, + "step": 741500 + }, + { + "epoch": 0.44, + "learning_rate": 5.2926889399013857e-05, + "loss": 0.4524, + "step": 742000 + }, + { + "epoch": 0.45, + "learning_rate": 5.292478943345329e-05, + "loss": 0.4532, + "step": 742500 + }, + { + "epoch": 0.45, + "learning_rate": 5.292268946789273e-05, + "loss": 0.4563, + "step": 743000 + }, + { + "epoch": 0.45, + "learning_rate": 5.2920589502332164e-05, + "loss": 0.4524, + "step": 743500 + }, + { + "epoch": 0.45, + "learning_rate": 5.291848953677159e-05, + "loss": 0.4459, + "step": 744000 + }, + { + "epoch": 0.45, + "learning_rate": 5.291639377114215e-05, + "loss": 0.4542, + "step": 744500 + }, + { + "epoch": 0.45, + "learning_rate": 5.291429380558159e-05, + "loss": 0.4523, + "step": 745000 + }, + { + "epoch": 0.45, + "learning_rate": 5.2912193840021024e-05, + "loss": 0.4449, + "step": 745500 + }, + { + "epoch": 0.45, + "learning_rate": 5.291009387446046e-05, + "loss": 0.4411, + "step": 746000 + }, + { + "epoch": 0.45, + "learning_rate": 5.290799810883102e-05, + "loss": 0.4444, + "step": 746500 + }, + { + "epoch": 0.45, + "learning_rate": 5.290589814327045e-05, + "loss": 0.4446, + "step": 747000 + }, + { + "epoch": 0.45, + "learning_rate": 5.2903798177709885e-05, + "loss": 0.4558, + "step": 747500 + }, + { + "epoch": 0.45, + "learning_rate": 5.2901698212149325e-05, + "loss": 0.4553, + "step": 748000 + }, + { + "epoch": 0.45, + "learning_rate": 5.289959824658875e-05, + "loss": 0.4408, + "step": 748500 + }, + { + "epoch": 0.45, + "learning_rate": 5.2897498281028186e-05, + "loss": 0.4449, + "step": 749000 + }, + { + "epoch": 0.45, + "learning_rate": 5.2895398315467626e-05, + "loss": 0.4527, + "step": 749500 + }, + { + "epoch": 0.45, + "learning_rate": 5.289329834990706e-05, + "loss": 0.4487, + "step": 750000 + }, + { + "epoch": 0.45, + "learning_rate": 5.289119838434649e-05, + "loss": 0.4545, + "step": 750500 + }, + { + "epoch": 0.45, + "learning_rate": 5.2889102618717046e-05, + "loss": 0.4516, + "step": 751000 + }, + { + "epoch": 0.45, + "learning_rate": 5.2887002653156486e-05, + "loss": 0.4579, + "step": 751500 + }, + { + "epoch": 0.45, + "learning_rate": 5.288490268759592e-05, + "loss": 0.4509, + "step": 752000 + }, + { + "epoch": 0.45, + "learning_rate": 5.288280272203536e-05, + "loss": 0.4582, + "step": 752500 + }, + { + "epoch": 0.45, + "learning_rate": 5.2880711156337034e-05, + "loss": 0.4493, + "step": 753000 + }, + { + "epoch": 0.45, + "learning_rate": 5.2878611190776474e-05, + "loss": 0.4605, + "step": 753500 + }, + { + "epoch": 0.45, + "learning_rate": 5.287651122521591e-05, + "loss": 0.4387, + "step": 754000 + }, + { + "epoch": 0.45, + "learning_rate": 5.287441125965534e-05, + "loss": 0.4578, + "step": 754500 + }, + { + "epoch": 0.45, + "learning_rate": 5.287231129409478e-05, + "loss": 0.4476, + "step": 755000 + }, + { + "epoch": 0.45, + "learning_rate": 5.287021972839645e-05, + "loss": 0.4494, + "step": 755500 + }, + { + "epoch": 0.45, + "learning_rate": 5.286811976283589e-05, + "loss": 0.4319, + "step": 756000 + }, + { + "epoch": 0.45, + "learning_rate": 5.286601979727532e-05, + "loss": 0.4443, + "step": 756500 + }, + { + "epoch": 0.45, + "learning_rate": 5.2863919831714755e-05, + "loss": 0.4436, + "step": 757000 + }, + { + "epoch": 0.45, + "learning_rate": 5.2861819866154196e-05, + "loss": 0.4432, + "step": 757500 + }, + { + "epoch": 0.45, + "learning_rate": 5.285971990059363e-05, + "loss": 0.4387, + "step": 758000 + }, + { + "epoch": 0.45, + "learning_rate": 5.285761993503306e-05, + "loss": 0.4475, + "step": 758500 + }, + { + "epoch": 0.46, + "learning_rate": 5.28555199694725e-05, + "loss": 0.4494, + "step": 759000 + }, + { + "epoch": 0.46, + "learning_rate": 5.2853420003911936e-05, + "loss": 0.4418, + "step": 759500 + }, + { + "epoch": 0.46, + "learning_rate": 5.2851320038351376e-05, + "loss": 0.4497, + "step": 760000 + }, + { + "epoch": 0.46, + "learning_rate": 5.28492200727908e-05, + "loss": 0.4469, + "step": 760500 + }, + { + "epoch": 0.46, + "learning_rate": 5.2847120107230237e-05, + "loss": 0.4565, + "step": 761000 + }, + { + "epoch": 0.46, + "learning_rate": 5.28450243416008e-05, + "loss": 0.4353, + "step": 761500 + }, + { + "epoch": 0.46, + "learning_rate": 5.284292857597135e-05, + "loss": 0.4496, + "step": 762000 + }, + { + "epoch": 0.46, + "learning_rate": 5.284082861041079e-05, + "loss": 0.4501, + "step": 762500 + }, + { + "epoch": 0.46, + "learning_rate": 5.2838728644850224e-05, + "loss": 0.4437, + "step": 763000 + }, + { + "epoch": 0.46, + "learning_rate": 5.283662867928966e-05, + "loss": 0.4548, + "step": 763500 + }, + { + "epoch": 0.46, + "learning_rate": 5.28345287137291e-05, + "loss": 0.4443, + "step": 764000 + }, + { + "epoch": 0.46, + "learning_rate": 5.283242874816853e-05, + "loss": 0.4521, + "step": 764500 + }, + { + "epoch": 0.46, + "learning_rate": 5.283032878260796e-05, + "loss": 0.4486, + "step": 765000 + }, + { + "epoch": 0.46, + "learning_rate": 5.28282288170474e-05, + "loss": 0.461, + "step": 765500 + }, + { + "epoch": 0.46, + "learning_rate": 5.282613305141796e-05, + "loss": 0.4415, + "step": 766000 + }, + { + "epoch": 0.46, + "learning_rate": 5.282403308585739e-05, + "loss": 0.4386, + "step": 766500 + }, + { + "epoch": 0.46, + "learning_rate": 5.2821937320227946e-05, + "loss": 0.4413, + "step": 767000 + }, + { + "epoch": 0.46, + "learning_rate": 5.2819837354667386e-05, + "loss": 0.4485, + "step": 767500 + }, + { + "epoch": 0.46, + "learning_rate": 5.281773738910682e-05, + "loss": 0.4467, + "step": 768000 + }, + { + "epoch": 0.46, + "learning_rate": 5.281563742354625e-05, + "loss": 0.4419, + "step": 768500 + }, + { + "epoch": 0.46, + "learning_rate": 5.281353745798569e-05, + "loss": 0.4469, + "step": 769000 + }, + { + "epoch": 0.46, + "learning_rate": 5.2811437492425126e-05, + "loss": 0.4462, + "step": 769500 + }, + { + "epoch": 0.46, + "learning_rate": 5.280933752686455e-05, + "loss": 0.4403, + "step": 770000 + }, + { + "epoch": 0.46, + "learning_rate": 5.2807241761235114e-05, + "loss": 0.4349, + "step": 770500 + }, + { + "epoch": 0.46, + "learning_rate": 5.2805141795674554e-05, + "loss": 0.4533, + "step": 771000 + }, + { + "epoch": 0.46, + "learning_rate": 5.280304183011399e-05, + "loss": 0.449, + "step": 771500 + }, + { + "epoch": 0.46, + "learning_rate": 5.280094186455342e-05, + "loss": 0.4477, + "step": 772000 + }, + { + "epoch": 0.46, + "learning_rate": 5.2798841898992854e-05, + "loss": 0.4441, + "step": 772500 + }, + { + "epoch": 0.46, + "learning_rate": 5.2796746133363414e-05, + "loss": 0.4492, + "step": 773000 + }, + { + "epoch": 0.46, + "learning_rate": 5.279464616780285e-05, + "loss": 0.4552, + "step": 773500 + }, + { + "epoch": 0.46, + "learning_rate": 5.279254620224229e-05, + "loss": 0.4507, + "step": 774000 + }, + { + "epoch": 0.46, + "learning_rate": 5.2790446236681715e-05, + "loss": 0.4546, + "step": 774500 + }, + { + "epoch": 0.46, + "learning_rate": 5.278834627112115e-05, + "loss": 0.4438, + "step": 775000 + }, + { + "epoch": 0.46, + "learning_rate": 5.278624630556059e-05, + "loss": 0.4503, + "step": 775500 + }, + { + "epoch": 0.47, + "learning_rate": 5.278414634000002e-05, + "loss": 0.4452, + "step": 776000 + }, + { + "epoch": 0.47, + "learning_rate": 5.2782046374439455e-05, + "loss": 0.4427, + "step": 776500 + }, + { + "epoch": 0.47, + "learning_rate": 5.277995060881001e-05, + "loss": 0.4503, + "step": 777000 + }, + { + "epoch": 0.47, + "learning_rate": 5.277785064324945e-05, + "loss": 0.4369, + "step": 777500 + }, + { + "epoch": 0.47, + "learning_rate": 5.277575067768888e-05, + "loss": 0.4345, + "step": 778000 + }, + { + "epoch": 0.47, + "learning_rate": 5.2773650712128316e-05, + "loss": 0.4388, + "step": 778500 + }, + { + "epoch": 0.47, + "learning_rate": 5.2771550746567756e-05, + "loss": 0.4378, + "step": 779000 + }, + { + "epoch": 0.47, + "learning_rate": 5.276945078100719e-05, + "loss": 0.4496, + "step": 779500 + }, + { + "epoch": 0.47, + "learning_rate": 5.276735081544662e-05, + "loss": 0.4411, + "step": 780000 + }, + { + "epoch": 0.47, + "learning_rate": 5.2765250849886064e-05, + "loss": 0.4408, + "step": 780500 + }, + { + "epoch": 0.47, + "learning_rate": 5.276315508425662e-05, + "loss": 0.4326, + "step": 781000 + }, + { + "epoch": 0.47, + "learning_rate": 5.276105931862718e-05, + "loss": 0.4496, + "step": 781500 + }, + { + "epoch": 0.47, + "learning_rate": 5.2758959353066604e-05, + "loss": 0.4357, + "step": 782000 + }, + { + "epoch": 0.47, + "learning_rate": 5.2756859387506044e-05, + "loss": 0.4396, + "step": 782500 + }, + { + "epoch": 0.47, + "learning_rate": 5.275475942194548e-05, + "loss": 0.4417, + "step": 783000 + }, + { + "epoch": 0.47, + "learning_rate": 5.275265945638491e-05, + "loss": 0.4503, + "step": 783500 + }, + { + "epoch": 0.47, + "learning_rate": 5.275055949082435e-05, + "loss": 0.4496, + "step": 784000 + }, + { + "epoch": 0.47, + "learning_rate": 5.2748459525263785e-05, + "loss": 0.4564, + "step": 784500 + }, + { + "epoch": 0.47, + "learning_rate": 5.274635955970322e-05, + "loss": 0.445, + "step": 785000 + }, + { + "epoch": 0.47, + "learning_rate": 5.274425959414266e-05, + "loss": 0.4512, + "step": 785500 + }, + { + "epoch": 0.47, + "learning_rate": 5.274216382851321e-05, + "loss": 0.4452, + "step": 786000 + }, + { + "epoch": 0.47, + "learning_rate": 5.2740063862952646e-05, + "loss": 0.4423, + "step": 786500 + }, + { + "epoch": 0.47, + "learning_rate": 5.273796389739208e-05, + "loss": 0.4485, + "step": 787000 + }, + { + "epoch": 0.47, + "learning_rate": 5.273586393183152e-05, + "loss": 0.4407, + "step": 787500 + }, + { + "epoch": 0.47, + "learning_rate": 5.273376816620207e-05, + "loss": 0.4463, + "step": 788000 + }, + { + "epoch": 0.47, + "learning_rate": 5.273167240057263e-05, + "loss": 0.4499, + "step": 788500 + }, + { + "epoch": 0.47, + "learning_rate": 5.272957243501206e-05, + "loss": 0.4491, + "step": 789000 + }, + { + "epoch": 0.47, + "learning_rate": 5.27274724694515e-05, + "loss": 0.4454, + "step": 789500 + }, + { + "epoch": 0.47, + "learning_rate": 5.2725372503890934e-05, + "loss": 0.4461, + "step": 790000 + }, + { + "epoch": 0.47, + "learning_rate": 5.2723276738261494e-05, + "loss": 0.4367, + "step": 790500 + }, + { + "epoch": 0.47, + "learning_rate": 5.272117677270093e-05, + "loss": 0.4494, + "step": 791000 + }, + { + "epoch": 0.47, + "learning_rate": 5.271907680714036e-05, + "loss": 0.4404, + "step": 791500 + }, + { + "epoch": 0.47, + "learning_rate": 5.2716976841579794e-05, + "loss": 0.4535, + "step": 792000 + }, + { + "epoch": 0.48, + "learning_rate": 5.271487687601923e-05, + "loss": 0.4564, + "step": 792500 + }, + { + "epoch": 0.48, + "learning_rate": 5.271277691045867e-05, + "loss": 0.4513, + "step": 793000 + }, + { + "epoch": 0.48, + "learning_rate": 5.27106769448981e-05, + "loss": 0.4486, + "step": 793500 + }, + { + "epoch": 0.48, + "learning_rate": 5.2708576979337535e-05, + "loss": 0.4489, + "step": 794000 + }, + { + "epoch": 0.48, + "learning_rate": 5.2706481213708095e-05, + "loss": 0.436, + "step": 794500 + }, + { + "epoch": 0.48, + "learning_rate": 5.270438124814753e-05, + "loss": 0.4431, + "step": 795000 + }, + { + "epoch": 0.48, + "learning_rate": 5.270228128258696e-05, + "loss": 0.4457, + "step": 795500 + }, + { + "epoch": 0.48, + "learning_rate": 5.27001813170264e-05, + "loss": 0.4371, + "step": 796000 + }, + { + "epoch": 0.48, + "learning_rate": 5.2698089751328076e-05, + "loss": 0.4413, + "step": 796500 + }, + { + "epoch": 0.48, + "learning_rate": 5.2695989785767517e-05, + "loss": 0.4503, + "step": 797000 + }, + { + "epoch": 0.48, + "learning_rate": 5.269388982020695e-05, + "loss": 0.4531, + "step": 797500 + }, + { + "epoch": 0.48, + "learning_rate": 5.2691789854646383e-05, + "loss": 0.4475, + "step": 798000 + }, + { + "epoch": 0.48, + "learning_rate": 5.268968988908582e-05, + "loss": 0.4366, + "step": 798500 + }, + { + "epoch": 0.48, + "learning_rate": 5.268758992352525e-05, + "loss": 0.4336, + "step": 799000 + }, + { + "epoch": 0.48, + "learning_rate": 5.268549415789581e-05, + "loss": 0.4513, + "step": 799500 + }, + { + "epoch": 0.48, + "learning_rate": 5.2683394192335244e-05, + "loss": 0.4385, + "step": 800000 + }, + { + "epoch": 0.48, + "eval_loss": 0.4203203022480011, + "eval_runtime": 1112.9021, + "eval_samples_per_second": 473.285, + "eval_steps_per_second": 78.881, + "step": 800000 + }, + { + "epoch": 0.48, + "learning_rate": 5.2681294226774684e-05, + "loss": 0.4351, + "step": 800500 + }, + { + "epoch": 0.48, + "learning_rate": 5.267919426121411e-05, + "loss": 0.4367, + "step": 801000 + }, + { + "epoch": 0.48, + "learning_rate": 5.267709429565355e-05, + "loss": 0.4444, + "step": 801500 + }, + { + "epoch": 0.48, + "learning_rate": 5.267499853002411e-05, + "loss": 0.4397, + "step": 802000 + }, + { + "epoch": 0.48, + "learning_rate": 5.2672898564463545e-05, + "loss": 0.4394, + "step": 802500 + }, + { + "epoch": 0.48, + "learning_rate": 5.267079859890298e-05, + "loss": 0.438, + "step": 803000 + }, + { + "epoch": 0.48, + "learning_rate": 5.266869863334241e-05, + "loss": 0.4468, + "step": 803500 + }, + { + "epoch": 0.48, + "learning_rate": 5.2666598667781846e-05, + "loss": 0.441, + "step": 804000 + }, + { + "epoch": 0.48, + "learning_rate": 5.266449870222128e-05, + "loss": 0.4408, + "step": 804500 + }, + { + "epoch": 0.48, + "learning_rate": 5.266239873666072e-05, + "loss": 0.4369, + "step": 805000 + }, + { + "epoch": 0.48, + "learning_rate": 5.266030297103127e-05, + "loss": 0.4508, + "step": 805500 + }, + { + "epoch": 0.48, + "learning_rate": 5.2658203005470706e-05, + "loss": 0.4282, + "step": 806000 + }, + { + "epoch": 0.48, + "learning_rate": 5.265610303991014e-05, + "loss": 0.436, + "step": 806500 + }, + { + "epoch": 0.48, + "learning_rate": 5.265400307434958e-05, + "loss": 0.4386, + "step": 807000 + }, + { + "epoch": 0.48, + "learning_rate": 5.2651903108789013e-05, + "loss": 0.4434, + "step": 807500 + }, + { + "epoch": 0.48, + "learning_rate": 5.264980314322845e-05, + "loss": 0.4483, + "step": 808000 + }, + { + "epoch": 0.48, + "learning_rate": 5.264770317766789e-05, + "loss": 0.4415, + "step": 808500 + }, + { + "epoch": 0.49, + "learning_rate": 5.264560321210732e-05, + "loss": 0.449, + "step": 809000 + }, + { + "epoch": 0.49, + "learning_rate": 5.2643503246546754e-05, + "loss": 0.4413, + "step": 809500 + }, + { + "epoch": 0.49, + "learning_rate": 5.2641407480917314e-05, + "loss": 0.4402, + "step": 810000 + }, + { + "epoch": 0.49, + "learning_rate": 5.263930751535675e-05, + "loss": 0.4547, + "step": 810500 + }, + { + "epoch": 0.49, + "learning_rate": 5.263720754979618e-05, + "loss": 0.4462, + "step": 811000 + }, + { + "epoch": 0.49, + "learning_rate": 5.263510758423562e-05, + "loss": 0.4455, + "step": 811500 + }, + { + "epoch": 0.49, + "learning_rate": 5.2633011818606175e-05, + "loss": 0.4442, + "step": 812000 + }, + { + "epoch": 0.49, + "learning_rate": 5.263091185304561e-05, + "loss": 0.4463, + "step": 812500 + }, + { + "epoch": 0.49, + "learning_rate": 5.262881188748504e-05, + "loss": 0.4529, + "step": 813000 + }, + { + "epoch": 0.49, + "learning_rate": 5.262671192192448e-05, + "loss": 0.4384, + "step": 813500 + }, + { + "epoch": 0.49, + "learning_rate": 5.2624616156295036e-05, + "loss": 0.4476, + "step": 814000 + }, + { + "epoch": 0.49, + "learning_rate": 5.262251619073447e-05, + "loss": 0.4426, + "step": 814500 + }, + { + "epoch": 0.49, + "learning_rate": 5.26204162251739e-05, + "loss": 0.4441, + "step": 815000 + }, + { + "epoch": 0.49, + "learning_rate": 5.261831625961334e-05, + "loss": 0.4477, + "step": 815500 + }, + { + "epoch": 0.49, + "learning_rate": 5.2616220493983897e-05, + "loss": 0.4424, + "step": 816000 + }, + { + "epoch": 0.49, + "learning_rate": 5.261412472835446e-05, + "loss": 0.4465, + "step": 816500 + }, + { + "epoch": 0.49, + "learning_rate": 5.261202476279389e-05, + "loss": 0.4302, + "step": 817000 + }, + { + "epoch": 0.49, + "learning_rate": 5.2609924797233324e-05, + "loss": 0.4305, + "step": 817500 + }, + { + "epoch": 0.49, + "learning_rate": 5.260782483167276e-05, + "loss": 0.452, + "step": 818000 + }, + { + "epoch": 0.49, + "learning_rate": 5.260572486611219e-05, + "loss": 0.4447, + "step": 818500 + }, + { + "epoch": 0.49, + "learning_rate": 5.260362490055163e-05, + "loss": 0.4432, + "step": 819000 + }, + { + "epoch": 0.49, + "learning_rate": 5.2601524934991064e-05, + "loss": 0.434, + "step": 819500 + }, + { + "epoch": 0.49, + "learning_rate": 5.25994249694305e-05, + "loss": 0.4301, + "step": 820000 + }, + { + "epoch": 0.49, + "learning_rate": 5.259732920380105e-05, + "loss": 0.4529, + "step": 820500 + }, + { + "epoch": 0.49, + "learning_rate": 5.259523343817161e-05, + "loss": 0.4438, + "step": 821000 + }, + { + "epoch": 0.49, + "learning_rate": 5.259313347261105e-05, + "loss": 0.4324, + "step": 821500 + }, + { + "epoch": 0.49, + "learning_rate": 5.2591033507050486e-05, + "loss": 0.4466, + "step": 822000 + }, + { + "epoch": 0.49, + "learning_rate": 5.258893354148992e-05, + "loss": 0.4382, + "step": 822500 + }, + { + "epoch": 0.49, + "learning_rate": 5.258683777586048e-05, + "loss": 0.4459, + "step": 823000 + }, + { + "epoch": 0.49, + "learning_rate": 5.258473781029991e-05, + "loss": 0.4475, + "step": 823500 + }, + { + "epoch": 0.49, + "learning_rate": 5.2582637844739346e-05, + "loss": 0.4397, + "step": 824000 + }, + { + "epoch": 0.49, + "learning_rate": 5.2580537879178786e-05, + "loss": 0.4388, + "step": 824500 + }, + { + "epoch": 0.49, + "learning_rate": 5.257843791361821e-05, + "loss": 0.4389, + "step": 825000 + }, + { + "epoch": 0.49, + "learning_rate": 5.2576337948057647e-05, + "loss": 0.4387, + "step": 825500 + }, + { + "epoch": 0.5, + "learning_rate": 5.257423798249709e-05, + "loss": 0.4586, + "step": 826000 + }, + { + "epoch": 0.5, + "learning_rate": 5.257213801693652e-05, + "loss": 0.4368, + "step": 826500 + }, + { + "epoch": 0.5, + "learning_rate": 5.2570042251307074e-05, + "loss": 0.4423, + "step": 827000 + }, + { + "epoch": 0.5, + "learning_rate": 5.256794228574651e-05, + "loss": 0.4436, + "step": 827500 + }, + { + "epoch": 0.5, + "learning_rate": 5.256584232018595e-05, + "loss": 0.4402, + "step": 828000 + }, + { + "epoch": 0.5, + "learning_rate": 5.256374655455651e-05, + "loss": 0.449, + "step": 828500 + }, + { + "epoch": 0.5, + "learning_rate": 5.256164658899594e-05, + "loss": 0.4468, + "step": 829000 + }, + { + "epoch": 0.5, + "learning_rate": 5.2559546623435375e-05, + "loss": 0.4408, + "step": 829500 + }, + { + "epoch": 0.5, + "learning_rate": 5.255744665787481e-05, + "loss": 0.4328, + "step": 830000 + }, + { + "epoch": 0.5, + "learning_rate": 5.255535089224537e-05, + "loss": 0.4445, + "step": 830500 + }, + { + "epoch": 0.5, + "learning_rate": 5.25532509266848e-05, + "loss": 0.4388, + "step": 831000 + }, + { + "epoch": 0.5, + "learning_rate": 5.255115096112424e-05, + "loss": 0.4425, + "step": 831500 + }, + { + "epoch": 0.5, + "learning_rate": 5.254905099556367e-05, + "loss": 0.4386, + "step": 832000 + }, + { + "epoch": 0.5, + "learning_rate": 5.254695522993423e-05, + "loss": 0.4458, + "step": 832500 + }, + { + "epoch": 0.5, + "learning_rate": 5.254485526437366e-05, + "loss": 0.4364, + "step": 833000 + }, + { + "epoch": 0.5, + "learning_rate": 5.25427552988131e-05, + "loss": 0.447, + "step": 833500 + }, + { + "epoch": 0.5, + "learning_rate": 5.2540655333252537e-05, + "loss": 0.4501, + "step": 834000 + }, + { + "epoch": 0.5, + "learning_rate": 5.253855536769196e-05, + "loss": 0.4403, + "step": 834500 + }, + { + "epoch": 0.5, + "learning_rate": 5.2536455402131403e-05, + "loss": 0.449, + "step": 835000 + }, + { + "epoch": 0.5, + "learning_rate": 5.253435543657084e-05, + "loss": 0.4283, + "step": 835500 + }, + { + "epoch": 0.5, + "learning_rate": 5.253225547101027e-05, + "loss": 0.4387, + "step": 836000 + }, + { + "epoch": 0.5, + "learning_rate": 5.253015550544971e-05, + "loss": 0.449, + "step": 836500 + }, + { + "epoch": 0.5, + "learning_rate": 5.2528055539889144e-05, + "loss": 0.4357, + "step": 837000 + }, + { + "epoch": 0.5, + "learning_rate": 5.2525955574328584e-05, + "loss": 0.4258, + "step": 837500 + }, + { + "epoch": 0.5, + "learning_rate": 5.252385560876802e-05, + "loss": 0.4414, + "step": 838000 + }, + { + "epoch": 0.5, + "learning_rate": 5.252175984313857e-05, + "loss": 0.4506, + "step": 838500 + }, + { + "epoch": 0.5, + "learning_rate": 5.2519659877578005e-05, + "loss": 0.442, + "step": 839000 + }, + { + "epoch": 0.5, + "learning_rate": 5.2517559912017445e-05, + "loss": 0.439, + "step": 839500 + }, + { + "epoch": 0.5, + "learning_rate": 5.2515464146388e-05, + "loss": 0.4314, + "step": 840000 + }, + { + "epoch": 0.5, + "learning_rate": 5.251336418082743e-05, + "loss": 0.4413, + "step": 840500 + }, + { + "epoch": 0.5, + "learning_rate": 5.2511264215266866e-05, + "loss": 0.4541, + "step": 841000 + }, + { + "epoch": 0.5, + "learning_rate": 5.2509164249706306e-05, + "loss": 0.4392, + "step": 841500 + }, + { + "epoch": 0.5, + "learning_rate": 5.250706428414574e-05, + "loss": 0.4559, + "step": 842000 + }, + { + "epoch": 0.51, + "learning_rate": 5.250496431858517e-05, + "loss": 0.4517, + "step": 842500 + }, + { + "epoch": 0.51, + "learning_rate": 5.250286435302461e-05, + "loss": 0.4498, + "step": 843000 + }, + { + "epoch": 0.51, + "learning_rate": 5.2500764387464046e-05, + "loss": 0.4395, + "step": 843500 + }, + { + "epoch": 0.51, + "learning_rate": 5.24986686218346e-05, + "loss": 0.4561, + "step": 844000 + }, + { + "epoch": 0.51, + "learning_rate": 5.249656865627404e-05, + "loss": 0.4453, + "step": 844500 + }, + { + "epoch": 0.51, + "learning_rate": 5.2494472890644594e-05, + "loss": 0.4404, + "step": 845000 + }, + { + "epoch": 0.51, + "learning_rate": 5.249237292508403e-05, + "loss": 0.4414, + "step": 845500 + }, + { + "epoch": 0.51, + "learning_rate": 5.249027295952346e-05, + "loss": 0.4364, + "step": 846000 + }, + { + "epoch": 0.51, + "learning_rate": 5.24881729939629e-05, + "loss": 0.4453, + "step": 846500 + }, + { + "epoch": 0.51, + "learning_rate": 5.2486073028402334e-05, + "loss": 0.4529, + "step": 847000 + }, + { + "epoch": 0.51, + "learning_rate": 5.248397726277289e-05, + "loss": 0.4461, + "step": 847500 + }, + { + "epoch": 0.51, + "learning_rate": 5.248187729721232e-05, + "loss": 0.4478, + "step": 848000 + }, + { + "epoch": 0.51, + "learning_rate": 5.247977733165176e-05, + "loss": 0.4356, + "step": 848500 + }, + { + "epoch": 0.51, + "learning_rate": 5.2477677366091195e-05, + "loss": 0.4385, + "step": 849000 + }, + { + "epoch": 0.51, + "learning_rate": 5.247557740053063e-05, + "loss": 0.4217, + "step": 849500 + }, + { + "epoch": 0.51, + "learning_rate": 5.247347743497007e-05, + "loss": 0.4523, + "step": 850000 + }, + { + "epoch": 0.51, + "learning_rate": 5.24713774694095e-05, + "loss": 0.4332, + "step": 850500 + }, + { + "epoch": 0.51, + "learning_rate": 5.2469277503848936e-05, + "loss": 0.43, + "step": 851000 + }, + { + "epoch": 0.51, + "learning_rate": 5.2467181738219496e-05, + "loss": 0.4423, + "step": 851500 + }, + { + "epoch": 0.51, + "learning_rate": 5.246508177265893e-05, + "loss": 0.4496, + "step": 852000 + }, + { + "epoch": 0.51, + "learning_rate": 5.246298180709836e-05, + "loss": 0.4419, + "step": 852500 + }, + { + "epoch": 0.51, + "learning_rate": 5.24608818415378e-05, + "loss": 0.4351, + "step": 853000 + }, + { + "epoch": 0.51, + "learning_rate": 5.245878607590836e-05, + "loss": 0.4407, + "step": 853500 + }, + { + "epoch": 0.51, + "learning_rate": 5.245668611034779e-05, + "loss": 0.447, + "step": 854000 + }, + { + "epoch": 0.51, + "learning_rate": 5.2454586144787224e-05, + "loss": 0.4428, + "step": 854500 + }, + { + "epoch": 0.51, + "learning_rate": 5.2452486179226664e-05, + "loss": 0.4479, + "step": 855000 + }, + { + "epoch": 0.51, + "learning_rate": 5.24503862136661e-05, + "loss": 0.4365, + "step": 855500 + }, + { + "epoch": 0.51, + "learning_rate": 5.244828624810553e-05, + "loss": 0.4379, + "step": 856000 + }, + { + "epoch": 0.51, + "learning_rate": 5.2446190482476084e-05, + "loss": 0.4265, + "step": 856500 + }, + { + "epoch": 0.51, + "learning_rate": 5.2444090516915525e-05, + "loss": 0.4365, + "step": 857000 + }, + { + "epoch": 0.51, + "learning_rate": 5.244199055135496e-05, + "loss": 0.4359, + "step": 857500 + }, + { + "epoch": 0.51, + "learning_rate": 5.243989058579439e-05, + "loss": 0.4384, + "step": 858000 + }, + { + "epoch": 0.51, + "learning_rate": 5.243779062023383e-05, + "loss": 0.4293, + "step": 858500 + }, + { + "epoch": 0.52, + "learning_rate": 5.243569065467326e-05, + "loss": 0.441, + "step": 859000 + }, + { + "epoch": 0.52, + "learning_rate": 5.24335906891127e-05, + "loss": 0.4328, + "step": 859500 + }, + { + "epoch": 0.52, + "learning_rate": 5.243149072355213e-05, + "loss": 0.4431, + "step": 860000 + }, + { + "epoch": 0.52, + "learning_rate": 5.242939495792269e-05, + "loss": 0.4451, + "step": 860500 + }, + { + "epoch": 0.52, + "learning_rate": 5.2427299192293246e-05, + "loss": 0.431, + "step": 861000 + }, + { + "epoch": 0.52, + "learning_rate": 5.242519922673268e-05, + "loss": 0.4271, + "step": 861500 + }, + { + "epoch": 0.52, + "learning_rate": 5.242309926117212e-05, + "loss": 0.4258, + "step": 862000 + }, + { + "epoch": 0.52, + "learning_rate": 5.242099929561155e-05, + "loss": 0.4463, + "step": 862500 + }, + { + "epoch": 0.52, + "learning_rate": 5.241889933005099e-05, + "loss": 0.4352, + "step": 863000 + }, + { + "epoch": 0.52, + "learning_rate": 5.241679936449042e-05, + "loss": 0.4383, + "step": 863500 + }, + { + "epoch": 0.52, + "learning_rate": 5.24147077987921e-05, + "loss": 0.4434, + "step": 864000 + }, + { + "epoch": 0.52, + "learning_rate": 5.2412607833231534e-05, + "loss": 0.4448, + "step": 864500 + }, + { + "epoch": 0.52, + "learning_rate": 5.241050786767097e-05, + "loss": 0.4315, + "step": 865000 + }, + { + "epoch": 0.52, + "learning_rate": 5.240840790211041e-05, + "loss": 0.4317, + "step": 865500 + }, + { + "epoch": 0.52, + "learning_rate": 5.240630793654984e-05, + "loss": 0.4381, + "step": 866000 + }, + { + "epoch": 0.52, + "learning_rate": 5.2404207970989275e-05, + "loss": 0.4474, + "step": 866500 + }, + { + "epoch": 0.52, + "learning_rate": 5.2402108005428715e-05, + "loss": 0.4266, + "step": 867000 + }, + { + "epoch": 0.52, + "learning_rate": 5.240000803986815e-05, + "loss": 0.4413, + "step": 867500 + }, + { + "epoch": 0.52, + "learning_rate": 5.239790807430758e-05, + "loss": 0.4382, + "step": 868000 + }, + { + "epoch": 0.52, + "learning_rate": 5.2395808108747015e-05, + "loss": 0.4455, + "step": 868500 + }, + { + "epoch": 0.52, + "learning_rate": 5.239370814318645e-05, + "loss": 0.4281, + "step": 869000 + }, + { + "epoch": 0.52, + "learning_rate": 5.239161237755701e-05, + "loss": 0.4289, + "step": 869500 + }, + { + "epoch": 0.52, + "learning_rate": 5.238951241199644e-05, + "loss": 0.4458, + "step": 870000 + }, + { + "epoch": 0.52, + "learning_rate": 5.238741244643588e-05, + "loss": 0.4403, + "step": 870500 + }, + { + "epoch": 0.52, + "learning_rate": 5.238531248087531e-05, + "loss": 0.442, + "step": 871000 + }, + { + "epoch": 0.52, + "learning_rate": 5.238321251531474e-05, + "loss": 0.4279, + "step": 871500 + }, + { + "epoch": 0.52, + "learning_rate": 5.238111254975418e-05, + "loss": 0.4286, + "step": 872000 + }, + { + "epoch": 0.52, + "learning_rate": 5.2379016784124744e-05, + "loss": 0.4269, + "step": 872500 + }, + { + "epoch": 0.52, + "learning_rate": 5.237691681856417e-05, + "loss": 0.438, + "step": 873000 + }, + { + "epoch": 0.52, + "learning_rate": 5.237481685300361e-05, + "loss": 0.4402, + "step": 873500 + }, + { + "epoch": 0.52, + "learning_rate": 5.2372716887443044e-05, + "loss": 0.4438, + "step": 874000 + }, + { + "epoch": 0.52, + "learning_rate": 5.237061692188248e-05, + "loss": 0.4337, + "step": 874500 + }, + { + "epoch": 0.52, + "learning_rate": 5.236851695632192e-05, + "loss": 0.4355, + "step": 875000 + }, + { + "epoch": 0.52, + "learning_rate": 5.236641699076135e-05, + "loss": 0.4369, + "step": 875500 + }, + { + "epoch": 0.53, + "learning_rate": 5.2364317025200785e-05, + "loss": 0.4411, + "step": 876000 + }, + { + "epoch": 0.53, + "learning_rate": 5.236222125957134e-05, + "loss": 0.4394, + "step": 876500 + }, + { + "epoch": 0.53, + "learning_rate": 5.236012129401078e-05, + "loss": 0.4257, + "step": 877000 + }, + { + "epoch": 0.53, + "learning_rate": 5.235802552838134e-05, + "loss": 0.4427, + "step": 877500 + }, + { + "epoch": 0.53, + "learning_rate": 5.2355925562820765e-05, + "loss": 0.4423, + "step": 878000 + }, + { + "epoch": 0.53, + "learning_rate": 5.23538255972602e-05, + "loss": 0.448, + "step": 878500 + }, + { + "epoch": 0.53, + "learning_rate": 5.235172563169964e-05, + "loss": 0.4434, + "step": 879000 + }, + { + "epoch": 0.53, + "learning_rate": 5.234962566613907e-05, + "loss": 0.4384, + "step": 879500 + }, + { + "epoch": 0.53, + "learning_rate": 5.2347525700578506e-05, + "loss": 0.444, + "step": 880000 + }, + { + "epoch": 0.53, + "learning_rate": 5.2345425735017946e-05, + "loss": 0.4334, + "step": 880500 + }, + { + "epoch": 0.53, + "learning_rate": 5.234332576945738e-05, + "loss": 0.4278, + "step": 881000 + }, + { + "epoch": 0.53, + "learning_rate": 5.234123000382793e-05, + "loss": 0.4432, + "step": 881500 + }, + { + "epoch": 0.53, + "learning_rate": 5.2339134238198494e-05, + "loss": 0.439, + "step": 882000 + }, + { + "epoch": 0.53, + "learning_rate": 5.2337034272637934e-05, + "loss": 0.4417, + "step": 882500 + }, + { + "epoch": 0.53, + "learning_rate": 5.233493430707736e-05, + "loss": 0.4345, + "step": 883000 + }, + { + "epoch": 0.53, + "learning_rate": 5.2332834341516794e-05, + "loss": 0.4369, + "step": 883500 + }, + { + "epoch": 0.53, + "learning_rate": 5.2330734375956234e-05, + "loss": 0.4323, + "step": 884000 + }, + { + "epoch": 0.53, + "learning_rate": 5.232863441039567e-05, + "loss": 0.4316, + "step": 884500 + }, + { + "epoch": 0.53, + "learning_rate": 5.23265344448351e-05, + "loss": 0.4399, + "step": 885000 + }, + { + "epoch": 0.53, + "learning_rate": 5.232443447927454e-05, + "loss": 0.4341, + "step": 885500 + }, + { + "epoch": 0.53, + "learning_rate": 5.2322342913576215e-05, + "loss": 0.435, + "step": 886000 + }, + { + "epoch": 0.53, + "learning_rate": 5.2320242948015655e-05, + "loss": 0.4418, + "step": 886500 + }, + { + "epoch": 0.53, + "learning_rate": 5.231814298245509e-05, + "loss": 0.4267, + "step": 887000 + }, + { + "epoch": 0.53, + "learning_rate": 5.231604301689452e-05, + "loss": 0.4392, + "step": 887500 + }, + { + "epoch": 0.53, + "learning_rate": 5.2313943051333956e-05, + "loss": 0.444, + "step": 888000 + }, + { + "epoch": 0.53, + "learning_rate": 5.231184308577339e-05, + "loss": 0.4307, + "step": 888500 + }, + { + "epoch": 0.53, + "learning_rate": 5.230974312021283e-05, + "loss": 0.4339, + "step": 889000 + }, + { + "epoch": 0.53, + "learning_rate": 5.230764315465226e-05, + "loss": 0.4391, + "step": 889500 + }, + { + "epoch": 0.53, + "learning_rate": 5.2305547389022816e-05, + "loss": 0.4305, + "step": 890000 + }, + { + "epoch": 0.53, + "learning_rate": 5.230344742346225e-05, + "loss": 0.4453, + "step": 890500 + }, + { + "epoch": 0.53, + "learning_rate": 5.230134745790169e-05, + "loss": 0.4355, + "step": 891000 + }, + { + "epoch": 0.53, + "learning_rate": 5.2299247492341124e-05, + "loss": 0.4408, + "step": 891500 + }, + { + "epoch": 0.53, + "learning_rate": 5.2297155926642804e-05, + "loss": 0.4354, + "step": 892000 + }, + { + "epoch": 0.54, + "learning_rate": 5.229505596108224e-05, + "loss": 0.443, + "step": 892500 + }, + { + "epoch": 0.54, + "learning_rate": 5.229295599552167e-05, + "loss": 0.4412, + "step": 893000 + }, + { + "epoch": 0.54, + "learning_rate": 5.229085602996111e-05, + "loss": 0.4339, + "step": 893500 + }, + { + "epoch": 0.54, + "learning_rate": 5.2288756064400545e-05, + "loss": 0.4364, + "step": 894000 + }, + { + "epoch": 0.54, + "learning_rate": 5.22866602987711e-05, + "loss": 0.4447, + "step": 894500 + }, + { + "epoch": 0.54, + "learning_rate": 5.228456033321054e-05, + "loss": 0.4455, + "step": 895000 + }, + { + "epoch": 0.54, + "learning_rate": 5.228246036764997e-05, + "loss": 0.4344, + "step": 895500 + }, + { + "epoch": 0.54, + "learning_rate": 5.2280360402089405e-05, + "loss": 0.4252, + "step": 896000 + }, + { + "epoch": 0.54, + "learning_rate": 5.2278260436528846e-05, + "loss": 0.4314, + "step": 896500 + }, + { + "epoch": 0.54, + "learning_rate": 5.227616047096827e-05, + "loss": 0.437, + "step": 897000 + }, + { + "epoch": 0.54, + "learning_rate": 5.2274060505407706e-05, + "loss": 0.4309, + "step": 897500 + }, + { + "epoch": 0.54, + "learning_rate": 5.2271960539847146e-05, + "loss": 0.4412, + "step": 898000 + }, + { + "epoch": 0.54, + "learning_rate": 5.2269864774217706e-05, + "loss": 0.4329, + "step": 898500 + }, + { + "epoch": 0.54, + "learning_rate": 5.226776480865714e-05, + "loss": 0.4397, + "step": 899000 + }, + { + "epoch": 0.54, + "learning_rate": 5.2265664843096566e-05, + "loss": 0.4336, + "step": 899500 + }, + { + "epoch": 0.54, + "learning_rate": 5.226356907746713e-05, + "loss": 0.4423, + "step": 900000 + }, + { + "epoch": 0.54, + "eval_loss": 0.41384953260421753, + "eval_runtime": 1113.6597, + "eval_samples_per_second": 472.963, + "eval_steps_per_second": 78.827, + "step": 900000 + }, + { + "epoch": 0.54, + "learning_rate": 5.226146911190657e-05, + "loss": 0.4501, + "step": 900500 + }, + { + "epoch": 0.54, + "learning_rate": 5.2259369146346e-05, + "loss": 0.4412, + "step": 901000 + }, + { + "epoch": 0.54, + "learning_rate": 5.2257269180785434e-05, + "loss": 0.4312, + "step": 901500 + }, + { + "epoch": 0.54, + "learning_rate": 5.225516921522487e-05, + "loss": 0.4328, + "step": 902000 + }, + { + "epoch": 0.54, + "learning_rate": 5.225307344959543e-05, + "loss": 0.4359, + "step": 902500 + }, + { + "epoch": 0.54, + "learning_rate": 5.225097348403486e-05, + "loss": 0.4201, + "step": 903000 + }, + { + "epoch": 0.54, + "learning_rate": 5.22488735184743e-05, + "loss": 0.4353, + "step": 903500 + }, + { + "epoch": 0.54, + "learning_rate": 5.224677355291373e-05, + "loss": 0.4411, + "step": 904000 + }, + { + "epoch": 0.54, + "learning_rate": 5.224467778728429e-05, + "loss": 0.4302, + "step": 904500 + }, + { + "epoch": 0.54, + "learning_rate": 5.224257782172372e-05, + "loss": 0.4395, + "step": 905000 + }, + { + "epoch": 0.54, + "learning_rate": 5.224047785616316e-05, + "loss": 0.428, + "step": 905500 + }, + { + "epoch": 0.54, + "learning_rate": 5.2238377890602596e-05, + "loss": 0.4344, + "step": 906000 + }, + { + "epoch": 0.54, + "learning_rate": 5.223627792504202e-05, + "loss": 0.4424, + "step": 906500 + }, + { + "epoch": 0.54, + "learning_rate": 5.223417795948146e-05, + "loss": 0.4332, + "step": 907000 + }, + { + "epoch": 0.54, + "learning_rate": 5.2232077993920896e-05, + "loss": 0.434, + "step": 907500 + }, + { + "epoch": 0.54, + "learning_rate": 5.222997802836033e-05, + "loss": 0.4299, + "step": 908000 + }, + { + "epoch": 0.54, + "learning_rate": 5.22278822627309e-05, + "loss": 0.4345, + "step": 908500 + }, + { + "epoch": 0.54, + "learning_rate": 5.222578229717032e-05, + "loss": 0.4472, + "step": 909000 + }, + { + "epoch": 0.55, + "learning_rate": 5.2223686531540884e-05, + "loss": 0.4365, + "step": 909500 + }, + { + "epoch": 0.55, + "learning_rate": 5.222158656598032e-05, + "loss": 0.4291, + "step": 910000 + }, + { + "epoch": 0.55, + "learning_rate": 5.221948660041976e-05, + "loss": 0.4303, + "step": 910500 + }, + { + "epoch": 0.55, + "learning_rate": 5.221738663485919e-05, + "loss": 0.4381, + "step": 911000 + }, + { + "epoch": 0.55, + "learning_rate": 5.221528666929862e-05, + "loss": 0.4342, + "step": 911500 + }, + { + "epoch": 0.55, + "learning_rate": 5.221318670373806e-05, + "loss": 0.4281, + "step": 912000 + }, + { + "epoch": 0.55, + "learning_rate": 5.221108673817749e-05, + "loss": 0.4367, + "step": 912500 + }, + { + "epoch": 0.55, + "learning_rate": 5.2208986772616925e-05, + "loss": 0.4348, + "step": 913000 + }, + { + "epoch": 0.55, + "learning_rate": 5.220689100698748e-05, + "loss": 0.452, + "step": 913500 + }, + { + "epoch": 0.55, + "learning_rate": 5.220479104142692e-05, + "loss": 0.432, + "step": 914000 + }, + { + "epoch": 0.55, + "learning_rate": 5.220269107586635e-05, + "loss": 0.4419, + "step": 914500 + }, + { + "epoch": 0.55, + "learning_rate": 5.2200591110305785e-05, + "loss": 0.4372, + "step": 915000 + }, + { + "epoch": 0.55, + "learning_rate": 5.219849534467635e-05, + "loss": 0.438, + "step": 915500 + }, + { + "epoch": 0.55, + "learning_rate": 5.219639537911578e-05, + "loss": 0.4308, + "step": 916000 + }, + { + "epoch": 0.55, + "learning_rate": 5.219429541355521e-05, + "loss": 0.4305, + "step": 916500 + }, + { + "epoch": 0.55, + "learning_rate": 5.219219544799465e-05, + "loss": 0.4293, + "step": 917000 + }, + { + "epoch": 0.55, + "learning_rate": 5.219009968236521e-05, + "loss": 0.4417, + "step": 917500 + }, + { + "epoch": 0.55, + "learning_rate": 5.218799971680465e-05, + "loss": 0.4457, + "step": 918000 + }, + { + "epoch": 0.55, + "learning_rate": 5.2185899751244073e-05, + "loss": 0.4206, + "step": 918500 + }, + { + "epoch": 0.55, + "learning_rate": 5.2183799785683514e-05, + "loss": 0.438, + "step": 919000 + }, + { + "epoch": 0.55, + "learning_rate": 5.2181704020054074e-05, + "loss": 0.4321, + "step": 919500 + }, + { + "epoch": 0.55, + "learning_rate": 5.217960405449351e-05, + "loss": 0.4331, + "step": 920000 + }, + { + "epoch": 0.55, + "learning_rate": 5.217750408893294e-05, + "loss": 0.4373, + "step": 920500 + }, + { + "epoch": 0.55, + "learning_rate": 5.2175404123372374e-05, + "loss": 0.4381, + "step": 921000 + }, + { + "epoch": 0.55, + "learning_rate": 5.217330415781181e-05, + "loss": 0.435, + "step": 921500 + }, + { + "epoch": 0.55, + "learning_rate": 5.217120419225125e-05, + "loss": 0.4228, + "step": 922000 + }, + { + "epoch": 0.55, + "learning_rate": 5.216910422669068e-05, + "loss": 0.4324, + "step": 922500 + }, + { + "epoch": 0.55, + "learning_rate": 5.216700846106124e-05, + "loss": 0.4335, + "step": 923000 + }, + { + "epoch": 0.55, + "learning_rate": 5.216490849550067e-05, + "loss": 0.4376, + "step": 923500 + }, + { + "epoch": 0.55, + "learning_rate": 5.216280852994011e-05, + "loss": 0.4229, + "step": 924000 + }, + { + "epoch": 0.55, + "learning_rate": 5.216070856437954e-05, + "loss": 0.4261, + "step": 924500 + }, + { + "epoch": 0.55, + "learning_rate": 5.2158608598818976e-05, + "loss": 0.4385, + "step": 925000 + }, + { + "epoch": 0.55, + "learning_rate": 5.215651283318953e-05, + "loss": 0.4417, + "step": 925500 + }, + { + "epoch": 0.56, + "learning_rate": 5.215441286762897e-05, + "loss": 0.4465, + "step": 926000 + }, + { + "epoch": 0.56, + "learning_rate": 5.21523129020684e-05, + "loss": 0.4265, + "step": 926500 + }, + { + "epoch": 0.56, + "learning_rate": 5.215021713643896e-05, + "loss": 0.4352, + "step": 927000 + }, + { + "epoch": 0.56, + "learning_rate": 5.21481171708784e-05, + "loss": 0.4338, + "step": 927500 + }, + { + "epoch": 0.56, + "learning_rate": 5.214601720531783e-05, + "loss": 0.4342, + "step": 928000 + }, + { + "epoch": 0.56, + "learning_rate": 5.2143917239757264e-05, + "loss": 0.4419, + "step": 928500 + }, + { + "epoch": 0.56, + "learning_rate": 5.2141817274196704e-05, + "loss": 0.4385, + "step": 929000 + }, + { + "epoch": 0.56, + "learning_rate": 5.2139721508567264e-05, + "loss": 0.4368, + "step": 929500 + }, + { + "epoch": 0.56, + "learning_rate": 5.21376215430067e-05, + "loss": 0.4364, + "step": 930000 + }, + { + "epoch": 0.56, + "learning_rate": 5.2135521577446124e-05, + "loss": 0.4367, + "step": 930500 + }, + { + "epoch": 0.56, + "learning_rate": 5.2133421611885565e-05, + "loss": 0.4379, + "step": 931000 + }, + { + "epoch": 0.56, + "learning_rate": 5.2131321646325e-05, + "loss": 0.4332, + "step": 931500 + }, + { + "epoch": 0.56, + "learning_rate": 5.212922168076443e-05, + "loss": 0.437, + "step": 932000 + }, + { + "epoch": 0.56, + "learning_rate": 5.212712171520387e-05, + "loss": 0.4465, + "step": 932500 + }, + { + "epoch": 0.56, + "learning_rate": 5.2125021749643305e-05, + "loss": 0.4364, + "step": 933000 + }, + { + "epoch": 0.56, + "learning_rate": 5.212292178408274e-05, + "loss": 0.4251, + "step": 933500 + }, + { + "epoch": 0.56, + "learning_rate": 5.212082181852218e-05, + "loss": 0.4339, + "step": 934000 + }, + { + "epoch": 0.56, + "learning_rate": 5.211872185296161e-05, + "loss": 0.4384, + "step": 934500 + }, + { + "epoch": 0.56, + "learning_rate": 5.2116621887401046e-05, + "loss": 0.4342, + "step": 935000 + }, + { + "epoch": 0.56, + "learning_rate": 5.21145261217716e-05, + "loss": 0.4378, + "step": 935500 + }, + { + "epoch": 0.56, + "learning_rate": 5.211242615621104e-05, + "loss": 0.432, + "step": 936000 + }, + { + "epoch": 0.56, + "learning_rate": 5.211032619065047e-05, + "loss": 0.4215, + "step": 936500 + }, + { + "epoch": 0.56, + "learning_rate": 5.210822622508991e-05, + "loss": 0.4411, + "step": 937000 + }, + { + "epoch": 0.56, + "learning_rate": 5.210612625952935e-05, + "loss": 0.4399, + "step": 937500 + }, + { + "epoch": 0.56, + "learning_rate": 5.210402629396878e-05, + "loss": 0.437, + "step": 938000 + }, + { + "epoch": 0.56, + "learning_rate": 5.2101926328408214e-05, + "loss": 0.437, + "step": 938500 + }, + { + "epoch": 0.56, + "learning_rate": 5.2099830562778774e-05, + "loss": 0.4355, + "step": 939000 + }, + { + "epoch": 0.56, + "learning_rate": 5.209773059721821e-05, + "loss": 0.4399, + "step": 939500 + }, + { + "epoch": 0.56, + "learning_rate": 5.209563063165764e-05, + "loss": 0.4376, + "step": 940000 + }, + { + "epoch": 0.56, + "learning_rate": 5.2093530666097075e-05, + "loss": 0.4273, + "step": 940500 + }, + { + "epoch": 0.56, + "learning_rate": 5.209143070053651e-05, + "loss": 0.4274, + "step": 941000 + }, + { + "epoch": 0.56, + "learning_rate": 5.208933493490707e-05, + "loss": 0.4377, + "step": 941500 + }, + { + "epoch": 0.56, + "learning_rate": 5.20872349693465e-05, + "loss": 0.4497, + "step": 942000 + }, + { + "epoch": 0.57, + "learning_rate": 5.208513500378594e-05, + "loss": 0.4283, + "step": 942500 + }, + { + "epoch": 0.57, + "learning_rate": 5.208303503822537e-05, + "loss": 0.4406, + "step": 943000 + }, + { + "epoch": 0.57, + "learning_rate": 5.20809350726648e-05, + "loss": 0.423, + "step": 943500 + }, + { + "epoch": 0.57, + "learning_rate": 5.207883510710424e-05, + "loss": 0.437, + "step": 944000 + }, + { + "epoch": 0.57, + "learning_rate": 5.2076735141543676e-05, + "loss": 0.4338, + "step": 944500 + }, + { + "epoch": 0.57, + "learning_rate": 5.2074639375914236e-05, + "loss": 0.4248, + "step": 945000 + }, + { + "epoch": 0.57, + "learning_rate": 5.207253941035367e-05, + "loss": 0.4395, + "step": 945500 + }, + { + "epoch": 0.57, + "learning_rate": 5.20704394447931e-05, + "loss": 0.4367, + "step": 946000 + }, + { + "epoch": 0.57, + "learning_rate": 5.2068339479232537e-05, + "loss": 0.4402, + "step": 946500 + }, + { + "epoch": 0.57, + "learning_rate": 5.206623951367198e-05, + "loss": 0.4373, + "step": 947000 + }, + { + "epoch": 0.57, + "learning_rate": 5.206413954811141e-05, + "loss": 0.4383, + "step": 947500 + }, + { + "epoch": 0.57, + "learning_rate": 5.2062043782481964e-05, + "loss": 0.4357, + "step": 948000 + }, + { + "epoch": 0.57, + "learning_rate": 5.20599438169214e-05, + "loss": 0.4258, + "step": 948500 + }, + { + "epoch": 0.57, + "learning_rate": 5.205784385136084e-05, + "loss": 0.4272, + "step": 949000 + }, + { + "epoch": 0.57, + "learning_rate": 5.205574388580027e-05, + "loss": 0.4341, + "step": 949500 + }, + { + "epoch": 0.57, + "learning_rate": 5.2053648120170825e-05, + "loss": 0.4331, + "step": 950000 + }, + { + "epoch": 0.57, + "learning_rate": 5.205154815461026e-05, + "loss": 0.4322, + "step": 950500 + }, + { + "epoch": 0.57, + "learning_rate": 5.20494481890497e-05, + "loss": 0.4364, + "step": 951000 + }, + { + "epoch": 0.57, + "learning_rate": 5.204734822348913e-05, + "loss": 0.4312, + "step": 951500 + }, + { + "epoch": 0.57, + "learning_rate": 5.2045248257928565e-05, + "loss": 0.4386, + "step": 952000 + }, + { + "epoch": 0.57, + "learning_rate": 5.2043148292368005e-05, + "loss": 0.4341, + "step": 952500 + }, + { + "epoch": 0.57, + "learning_rate": 5.204104832680744e-05, + "loss": 0.4412, + "step": 953000 + }, + { + "epoch": 0.57, + "learning_rate": 5.203894836124687e-05, + "loss": 0.4374, + "step": 953500 + }, + { + "epoch": 0.57, + "learning_rate": 5.203685259561743e-05, + "loss": 0.4242, + "step": 954000 + }, + { + "epoch": 0.57, + "learning_rate": 5.2034752630056866e-05, + "loss": 0.4296, + "step": 954500 + }, + { + "epoch": 0.57, + "learning_rate": 5.20326526644963e-05, + "loss": 0.4266, + "step": 955000 + }, + { + "epoch": 0.57, + "learning_rate": 5.203055269893574e-05, + "loss": 0.4289, + "step": 955500 + }, + { + "epoch": 0.57, + "learning_rate": 5.202845273337517e-05, + "loss": 0.4276, + "step": 956000 + }, + { + "epoch": 0.57, + "learning_rate": 5.202635276781461e-05, + "loss": 0.441, + "step": 956500 + }, + { + "epoch": 0.57, + "learning_rate": 5.202425280225405e-05, + "loss": 0.4311, + "step": 957000 + }, + { + "epoch": 0.57, + "learning_rate": 5.202215283669348e-05, + "loss": 0.4326, + "step": 957500 + }, + { + "epoch": 0.57, + "learning_rate": 5.2020057071064034e-05, + "loss": 0.431, + "step": 958000 + }, + { + "epoch": 0.57, + "learning_rate": 5.201795710550347e-05, + "loss": 0.4346, + "step": 958500 + }, + { + "epoch": 0.57, + "learning_rate": 5.201585713994291e-05, + "loss": 0.4277, + "step": 959000 + }, + { + "epoch": 0.58, + "learning_rate": 5.201376137431346e-05, + "loss": 0.4418, + "step": 959500 + }, + { + "epoch": 0.58, + "learning_rate": 5.2011661408752895e-05, + "loss": 0.4346, + "step": 960000 + }, + { + "epoch": 0.58, + "learning_rate": 5.200956144319233e-05, + "loss": 0.4239, + "step": 960500 + }, + { + "epoch": 0.58, + "learning_rate": 5.200746147763177e-05, + "loss": 0.4354, + "step": 961000 + }, + { + "epoch": 0.58, + "learning_rate": 5.20053615120712e-05, + "loss": 0.426, + "step": 961500 + }, + { + "epoch": 0.58, + "learning_rate": 5.200326154651064e-05, + "loss": 0.4412, + "step": 962000 + }, + { + "epoch": 0.58, + "learning_rate": 5.2001161580950076e-05, + "loss": 0.4387, + "step": 962500 + }, + { + "epoch": 0.58, + "learning_rate": 5.19990616153895e-05, + "loss": 0.4223, + "step": 963000 + }, + { + "epoch": 0.58, + "learning_rate": 5.199697004969118e-05, + "loss": 0.4381, + "step": 963500 + }, + { + "epoch": 0.58, + "learning_rate": 5.1994870084130616e-05, + "loss": 0.4403, + "step": 964000 + }, + { + "epoch": 0.58, + "learning_rate": 5.1992770118570056e-05, + "loss": 0.4336, + "step": 964500 + }, + { + "epoch": 0.58, + "learning_rate": 5.199067015300949e-05, + "loss": 0.4251, + "step": 965000 + }, + { + "epoch": 0.58, + "learning_rate": 5.198857018744892e-05, + "loss": 0.4273, + "step": 965500 + }, + { + "epoch": 0.58, + "learning_rate": 5.1986470221888364e-05, + "loss": 0.4305, + "step": 966000 + }, + { + "epoch": 0.58, + "learning_rate": 5.19843702563278e-05, + "loss": 0.4311, + "step": 966500 + }, + { + "epoch": 0.58, + "learning_rate": 5.198227449069835e-05, + "loss": 0.4429, + "step": 967000 + }, + { + "epoch": 0.58, + "learning_rate": 5.1980174525137784e-05, + "loss": 0.4323, + "step": 967500 + }, + { + "epoch": 0.58, + "learning_rate": 5.1978074559577224e-05, + "loss": 0.4343, + "step": 968000 + }, + { + "epoch": 0.58, + "learning_rate": 5.197597459401666e-05, + "loss": 0.4308, + "step": 968500 + }, + { + "epoch": 0.58, + "learning_rate": 5.19738746284561e-05, + "loss": 0.4362, + "step": 969000 + }, + { + "epoch": 0.58, + "learning_rate": 5.197177886282665e-05, + "loss": 0.4365, + "step": 969500 + }, + { + "epoch": 0.58, + "learning_rate": 5.1969678897266085e-05, + "loss": 0.4323, + "step": 970000 + }, + { + "epoch": 0.58, + "learning_rate": 5.196758313163664e-05, + "loss": 0.4396, + "step": 970500 + }, + { + "epoch": 0.58, + "learning_rate": 5.196548316607607e-05, + "loss": 0.434, + "step": 971000 + }, + { + "epoch": 0.58, + "learning_rate": 5.196338320051551e-05, + "loss": 0.4144, + "step": 971500 + }, + { + "epoch": 0.58, + "learning_rate": 5.1961283234954946e-05, + "loss": 0.4275, + "step": 972000 + }, + { + "epoch": 0.58, + "learning_rate": 5.195918326939438e-05, + "loss": 0.4501, + "step": 972500 + }, + { + "epoch": 0.58, + "learning_rate": 5.195708330383382e-05, + "loss": 0.4291, + "step": 973000 + }, + { + "epoch": 0.58, + "learning_rate": 5.195498333827325e-05, + "loss": 0.4297, + "step": 973500 + }, + { + "epoch": 0.58, + "learning_rate": 5.1952883372712686e-05, + "loss": 0.4263, + "step": 974000 + }, + { + "epoch": 0.58, + "learning_rate": 5.195078760708325e-05, + "loss": 0.4368, + "step": 974500 + }, + { + "epoch": 0.58, + "learning_rate": 5.194868764152268e-05, + "loss": 0.4304, + "step": 975000 + }, + { + "epoch": 0.58, + "learning_rate": 5.1946587675962114e-05, + "loss": 0.4361, + "step": 975500 + }, + { + "epoch": 0.59, + "learning_rate": 5.1944487710401554e-05, + "loss": 0.4307, + "step": 976000 + }, + { + "epoch": 0.59, + "learning_rate": 5.194238774484099e-05, + "loss": 0.4344, + "step": 976500 + }, + { + "epoch": 0.59, + "learning_rate": 5.1940287779280414e-05, + "loss": 0.4342, + "step": 977000 + }, + { + "epoch": 0.59, + "learning_rate": 5.1938187813719854e-05, + "loss": 0.433, + "step": 977500 + }, + { + "epoch": 0.59, + "learning_rate": 5.193608784815929e-05, + "loss": 0.4325, + "step": 978000 + }, + { + "epoch": 0.59, + "learning_rate": 5.193398788259872e-05, + "loss": 0.4247, + "step": 978500 + }, + { + "epoch": 0.59, + "learning_rate": 5.19318963169004e-05, + "loss": 0.4297, + "step": 979000 + }, + { + "epoch": 0.59, + "learning_rate": 5.1929796351339835e-05, + "loss": 0.4373, + "step": 979500 + }, + { + "epoch": 0.59, + "learning_rate": 5.1927696385779275e-05, + "loss": 0.4279, + "step": 980000 + }, + { + "epoch": 0.59, + "learning_rate": 5.192559642021871e-05, + "loss": 0.4307, + "step": 980500 + }, + { + "epoch": 0.59, + "learning_rate": 5.192349645465814e-05, + "loss": 0.439, + "step": 981000 + }, + { + "epoch": 0.59, + "learning_rate": 5.192139648909758e-05, + "loss": 0.4377, + "step": 981500 + }, + { + "epoch": 0.59, + "learning_rate": 5.191929652353701e-05, + "loss": 0.4369, + "step": 982000 + }, + { + "epoch": 0.59, + "learning_rate": 5.191719655797645e-05, + "loss": 0.4363, + "step": 982500 + }, + { + "epoch": 0.59, + "learning_rate": 5.191510079234701e-05, + "loss": 0.4242, + "step": 983000 + }, + { + "epoch": 0.59, + "learning_rate": 5.191300082678644e-05, + "loss": 0.4264, + "step": 983500 + }, + { + "epoch": 0.59, + "learning_rate": 5.191090086122588e-05, + "loss": 0.4293, + "step": 984000 + }, + { + "epoch": 0.59, + "learning_rate": 5.190880089566531e-05, + "loss": 0.4302, + "step": 984500 + }, + { + "epoch": 0.59, + "learning_rate": 5.190670513003587e-05, + "loss": 0.4386, + "step": 985000 + }, + { + "epoch": 0.59, + "learning_rate": 5.1904605164475304e-05, + "loss": 0.4313, + "step": 985500 + }, + { + "epoch": 0.59, + "learning_rate": 5.190250519891474e-05, + "loss": 0.4399, + "step": 986000 + }, + { + "epoch": 0.59, + "learning_rate": 5.190040523335417e-05, + "loss": 0.4361, + "step": 986500 + }, + { + "epoch": 0.59, + "learning_rate": 5.189830946772473e-05, + "loss": 0.4386, + "step": 987000 + }, + { + "epoch": 0.59, + "learning_rate": 5.1896209502164165e-05, + "loss": 0.4256, + "step": 987500 + }, + { + "epoch": 0.59, + "learning_rate": 5.18941095366036e-05, + "loss": 0.4304, + "step": 988000 + }, + { + "epoch": 0.59, + "learning_rate": 5.189200957104304e-05, + "loss": 0.4324, + "step": 988500 + }, + { + "epoch": 0.59, + "learning_rate": 5.188991380541359e-05, + "loss": 0.4414, + "step": 989000 + }, + { + "epoch": 0.59, + "learning_rate": 5.1887813839853025e-05, + "loss": 0.4335, + "step": 989500 + }, + { + "epoch": 0.59, + "learning_rate": 5.1885713874292466e-05, + "loss": 0.4296, + "step": 990000 + }, + { + "epoch": 0.59, + "learning_rate": 5.18836139087319e-05, + "loss": 0.4324, + "step": 990500 + }, + { + "epoch": 0.59, + "learning_rate": 5.188151814310245e-05, + "loss": 0.4354, + "step": 991000 + }, + { + "epoch": 0.59, + "learning_rate": 5.1879418177541886e-05, + "loss": 0.4305, + "step": 991500 + }, + { + "epoch": 0.59, + "learning_rate": 5.1877318211981326e-05, + "loss": 0.4283, + "step": 992000 + }, + { + "epoch": 0.6, + "learning_rate": 5.187522244635188e-05, + "loss": 0.4307, + "step": 992500 + }, + { + "epoch": 0.6, + "learning_rate": 5.1873122480791313e-05, + "loss": 0.4306, + "step": 993000 + }, + { + "epoch": 0.6, + "learning_rate": 5.187102251523075e-05, + "loss": 0.4329, + "step": 993500 + }, + { + "epoch": 0.6, + "learning_rate": 5.186892254967019e-05, + "loss": 0.4255, + "step": 994000 + }, + { + "epoch": 0.6, + "learning_rate": 5.186682258410962e-05, + "loss": 0.4252, + "step": 994500 + }, + { + "epoch": 0.6, + "learning_rate": 5.1864722618549054e-05, + "loss": 0.4255, + "step": 995000 + }, + { + "epoch": 0.6, + "learning_rate": 5.1862626852919614e-05, + "loss": 0.4262, + "step": 995500 + }, + { + "epoch": 0.6, + "learning_rate": 5.186052688735905e-05, + "loss": 0.4298, + "step": 996000 + }, + { + "epoch": 0.6, + "learning_rate": 5.185842692179848e-05, + "loss": 0.433, + "step": 996500 + }, + { + "epoch": 0.6, + "learning_rate": 5.185632695623792e-05, + "loss": 0.4348, + "step": 997000 + }, + { + "epoch": 0.6, + "learning_rate": 5.1854231190608475e-05, + "loss": 0.4303, + "step": 997500 + }, + { + "epoch": 0.6, + "learning_rate": 5.185213122504791e-05, + "loss": 0.4285, + "step": 998000 + }, + { + "epoch": 0.6, + "learning_rate": 5.185003125948734e-05, + "loss": 0.4397, + "step": 998500 + }, + { + "epoch": 0.6, + "learning_rate": 5.184793129392678e-05, + "loss": 0.4273, + "step": 999000 + }, + { + "epoch": 0.6, + "learning_rate": 5.1845831328366216e-05, + "loss": 0.4321, + "step": 999500 + }, + { + "epoch": 0.6, + "learning_rate": 5.184373136280565e-05, + "loss": 0.4303, + "step": 1000000 + }, + { + "epoch": 0.6, + "eval_loss": 0.4063897132873535, + "eval_runtime": 1123.4797, + "eval_samples_per_second": 468.829, + "eval_steps_per_second": 78.138, + "step": 1000000 + }, + { + "epoch": 0.6, + "learning_rate": 5.184163139724509e-05, + "loss": 0.4253, + "step": 1000500 + }, + { + "epoch": 0.6, + "learning_rate": 5.183953563161564e-05, + "loss": 0.4352, + "step": 1001000 + }, + { + "epoch": 0.6, + "learning_rate": 5.1837435666055076e-05, + "loss": 0.4338, + "step": 1001500 + }, + { + "epoch": 0.6, + "learning_rate": 5.183533570049451e-05, + "loss": 0.442, + "step": 1002000 + }, + { + "epoch": 0.6, + "learning_rate": 5.183323573493395e-05, + "loss": 0.4312, + "step": 1002500 + }, + { + "epoch": 0.6, + "learning_rate": 5.1831135769373384e-05, + "loss": 0.4391, + "step": 1003000 + }, + { + "epoch": 0.6, + "learning_rate": 5.182903580381282e-05, + "loss": 0.4255, + "step": 1003500 + }, + { + "epoch": 0.6, + "learning_rate": 5.182693583825225e-05, + "loss": 0.4237, + "step": 1004000 + }, + { + "epoch": 0.6, + "learning_rate": 5.1824835872691684e-05, + "loss": 0.4345, + "step": 1004500 + }, + { + "epoch": 0.6, + "learning_rate": 5.1822740107062244e-05, + "loss": 0.4283, + "step": 1005000 + }, + { + "epoch": 0.6, + "learning_rate": 5.1820640141501685e-05, + "loss": 0.4265, + "step": 1005500 + }, + { + "epoch": 0.6, + "learning_rate": 5.181854017594111e-05, + "loss": 0.4265, + "step": 1006000 + }, + { + "epoch": 0.6, + "learning_rate": 5.1816440210380545e-05, + "loss": 0.4351, + "step": 1006500 + }, + { + "epoch": 0.6, + "learning_rate": 5.1814348644682225e-05, + "loss": 0.4318, + "step": 1007000 + }, + { + "epoch": 0.6, + "learning_rate": 5.181224867912166e-05, + "loss": 0.4328, + "step": 1007500 + }, + { + "epoch": 0.6, + "learning_rate": 5.18101487135611e-05, + "loss": 0.4298, + "step": 1008000 + }, + { + "epoch": 0.6, + "learning_rate": 5.180804874800053e-05, + "loss": 0.4331, + "step": 1008500 + }, + { + "epoch": 0.6, + "learning_rate": 5.1805948782439966e-05, + "loss": 0.4421, + "step": 1009000 + }, + { + "epoch": 0.61, + "learning_rate": 5.1803848816879406e-05, + "loss": 0.4348, + "step": 1009500 + }, + { + "epoch": 0.61, + "learning_rate": 5.180174885131884e-05, + "loss": 0.4194, + "step": 1010000 + }, + { + "epoch": 0.61, + "learning_rate": 5.179964888575827e-05, + "loss": 0.4284, + "step": 1010500 + }, + { + "epoch": 0.61, + "learning_rate": 5.179755312012883e-05, + "loss": 0.4319, + "step": 1011000 + }, + { + "epoch": 0.61, + "learning_rate": 5.179545315456827e-05, + "loss": 0.4284, + "step": 1011500 + }, + { + "epoch": 0.61, + "learning_rate": 5.179335738893882e-05, + "loss": 0.4334, + "step": 1012000 + }, + { + "epoch": 0.61, + "learning_rate": 5.1791257423378254e-05, + "loss": 0.4372, + "step": 1012500 + }, + { + "epoch": 0.61, + "learning_rate": 5.1789157457817694e-05, + "loss": 0.418, + "step": 1013000 + }, + { + "epoch": 0.61, + "learning_rate": 5.178705749225713e-05, + "loss": 0.4341, + "step": 1013500 + }, + { + "epoch": 0.61, + "learning_rate": 5.178495752669656e-05, + "loss": 0.4225, + "step": 1014000 + }, + { + "epoch": 0.61, + "learning_rate": 5.1782857561136e-05, + "loss": 0.4301, + "step": 1014500 + }, + { + "epoch": 0.61, + "learning_rate": 5.1780757595575435e-05, + "loss": 0.4302, + "step": 1015000 + }, + { + "epoch": 0.61, + "learning_rate": 5.177865763001486e-05, + "loss": 0.4269, + "step": 1015500 + }, + { + "epoch": 0.61, + "learning_rate": 5.177656186438542e-05, + "loss": 0.4262, + "step": 1016000 + }, + { + "epoch": 0.61, + "learning_rate": 5.177446609875598e-05, + "loss": 0.4248, + "step": 1016500 + }, + { + "epoch": 0.61, + "learning_rate": 5.1772366133195415e-05, + "loss": 0.4266, + "step": 1017000 + }, + { + "epoch": 0.61, + "learning_rate": 5.177026616763485e-05, + "loss": 0.4268, + "step": 1017500 + }, + { + "epoch": 0.61, + "learning_rate": 5.176816620207429e-05, + "loss": 0.4256, + "step": 1018000 + }, + { + "epoch": 0.61, + "learning_rate": 5.176606623651372e-05, + "loss": 0.4295, + "step": 1018500 + }, + { + "epoch": 0.61, + "learning_rate": 5.1763966270953156e-05, + "loss": 0.4351, + "step": 1019000 + }, + { + "epoch": 0.61, + "learning_rate": 5.1761866305392596e-05, + "loss": 0.4265, + "step": 1019500 + }, + { + "epoch": 0.61, + "learning_rate": 5.175976633983202e-05, + "loss": 0.4316, + "step": 1020000 + }, + { + "epoch": 0.61, + "learning_rate": 5.1757674774133703e-05, + "loss": 0.4287, + "step": 1020500 + }, + { + "epoch": 0.61, + "learning_rate": 5.175557480857314e-05, + "loss": 0.4393, + "step": 1021000 + }, + { + "epoch": 0.61, + "learning_rate": 5.175347484301257e-05, + "loss": 0.4313, + "step": 1021500 + }, + { + "epoch": 0.61, + "learning_rate": 5.175137487745201e-05, + "loss": 0.4253, + "step": 1022000 + }, + { + "epoch": 0.61, + "learning_rate": 5.1749274911891444e-05, + "loss": 0.4292, + "step": 1022500 + }, + { + "epoch": 0.61, + "learning_rate": 5.174717494633088e-05, + "loss": 0.4342, + "step": 1023000 + }, + { + "epoch": 0.61, + "learning_rate": 5.174507498077032e-05, + "loss": 0.4202, + "step": 1023500 + }, + { + "epoch": 0.61, + "learning_rate": 5.174297501520975e-05, + "loss": 0.4403, + "step": 1024000 + }, + { + "epoch": 0.61, + "learning_rate": 5.1740879249580305e-05, + "loss": 0.4241, + "step": 1024500 + }, + { + "epoch": 0.61, + "learning_rate": 5.1738779284019745e-05, + "loss": 0.4406, + "step": 1025000 + }, + { + "epoch": 0.61, + "learning_rate": 5.173667931845918e-05, + "loss": 0.4216, + "step": 1025500 + }, + { + "epoch": 0.62, + "learning_rate": 5.173457935289861e-05, + "loss": 0.4347, + "step": 1026000 + }, + { + "epoch": 0.62, + "learning_rate": 5.1732483587269166e-05, + "loss": 0.4272, + "step": 1026500 + }, + { + "epoch": 0.62, + "learning_rate": 5.1730383621708606e-05, + "loss": 0.4202, + "step": 1027000 + }, + { + "epoch": 0.62, + "learning_rate": 5.172828365614804e-05, + "loss": 0.4246, + "step": 1027500 + }, + { + "epoch": 0.62, + "learning_rate": 5.172618369058747e-05, + "loss": 0.44, + "step": 1028000 + }, + { + "epoch": 0.62, + "learning_rate": 5.172409212488915e-05, + "loss": 0.4282, + "step": 1028500 + }, + { + "epoch": 0.62, + "learning_rate": 5.172199215932859e-05, + "loss": 0.4367, + "step": 1029000 + }, + { + "epoch": 0.62, + "learning_rate": 5.171989639369915e-05, + "loss": 0.4466, + "step": 1029500 + }, + { + "epoch": 0.62, + "learning_rate": 5.171779642813858e-05, + "loss": 0.4307, + "step": 1030000 + }, + { + "epoch": 0.62, + "learning_rate": 5.1715696462578014e-05, + "loss": 0.4184, + "step": 1030500 + }, + { + "epoch": 0.62, + "learning_rate": 5.1713596497017454e-05, + "loss": 0.4456, + "step": 1031000 + }, + { + "epoch": 0.62, + "learning_rate": 5.171149653145688e-05, + "loss": 0.4273, + "step": 1031500 + }, + { + "epoch": 0.62, + "learning_rate": 5.1709396565896314e-05, + "loss": 0.4226, + "step": 1032000 + }, + { + "epoch": 0.62, + "learning_rate": 5.1707296600335755e-05, + "loss": 0.4193, + "step": 1032500 + }, + { + "epoch": 0.62, + "learning_rate": 5.170519663477519e-05, + "loss": 0.4315, + "step": 1033000 + }, + { + "epoch": 0.62, + "learning_rate": 5.170310086914574e-05, + "loss": 0.4436, + "step": 1033500 + }, + { + "epoch": 0.62, + "learning_rate": 5.1701000903585175e-05, + "loss": 0.4384, + "step": 1034000 + }, + { + "epoch": 0.62, + "learning_rate": 5.1698900938024615e-05, + "loss": 0.4245, + "step": 1034500 + }, + { + "epoch": 0.62, + "learning_rate": 5.169680097246405e-05, + "loss": 0.4325, + "step": 1035000 + }, + { + "epoch": 0.62, + "learning_rate": 5.169470100690348e-05, + "loss": 0.4235, + "step": 1035500 + }, + { + "epoch": 0.62, + "learning_rate": 5.169260104134292e-05, + "loss": 0.4347, + "step": 1036000 + }, + { + "epoch": 0.62, + "learning_rate": 5.1690501075782356e-05, + "loss": 0.4322, + "step": 1036500 + }, + { + "epoch": 0.62, + "learning_rate": 5.168840111022179e-05, + "loss": 0.4348, + "step": 1037000 + }, + { + "epoch": 0.62, + "learning_rate": 5.168630534459235e-05, + "loss": 0.4265, + "step": 1037500 + }, + { + "epoch": 0.62, + "learning_rate": 5.168420957896291e-05, + "loss": 0.4334, + "step": 1038000 + }, + { + "epoch": 0.62, + "learning_rate": 5.168210961340234e-05, + "loss": 0.4261, + "step": 1038500 + }, + { + "epoch": 0.62, + "learning_rate": 5.168000964784177e-05, + "loss": 0.4183, + "step": 1039000 + }, + { + "epoch": 0.62, + "learning_rate": 5.167790968228121e-05, + "loss": 0.4369, + "step": 1039500 + }, + { + "epoch": 0.62, + "learning_rate": 5.1675809716720644e-05, + "loss": 0.4241, + "step": 1040000 + }, + { + "epoch": 0.62, + "learning_rate": 5.1673713951091204e-05, + "loss": 0.4242, + "step": 1040500 + }, + { + "epoch": 0.62, + "learning_rate": 5.167161398553063e-05, + "loss": 0.4239, + "step": 1041000 + }, + { + "epoch": 0.62, + "learning_rate": 5.166951401997007e-05, + "loss": 0.4282, + "step": 1041500 + }, + { + "epoch": 0.62, + "learning_rate": 5.1667414054409505e-05, + "loss": 0.4168, + "step": 1042000 + }, + { + "epoch": 0.63, + "learning_rate": 5.166531408884894e-05, + "loss": 0.4336, + "step": 1042500 + }, + { + "epoch": 0.63, + "learning_rate": 5.166321412328838e-05, + "loss": 0.4288, + "step": 1043000 + }, + { + "epoch": 0.63, + "learning_rate": 5.166111415772781e-05, + "loss": 0.4299, + "step": 1043500 + }, + { + "epoch": 0.63, + "learning_rate": 5.1659014192167245e-05, + "loss": 0.4252, + "step": 1044000 + }, + { + "epoch": 0.63, + "learning_rate": 5.1656918426537806e-05, + "loss": 0.4207, + "step": 1044500 + }, + { + "epoch": 0.63, + "learning_rate": 5.165481846097724e-05, + "loss": 0.4217, + "step": 1045000 + }, + { + "epoch": 0.63, + "learning_rate": 5.165272269534779e-05, + "loss": 0.427, + "step": 1045500 + }, + { + "epoch": 0.63, + "learning_rate": 5.1650622729787226e-05, + "loss": 0.431, + "step": 1046000 + }, + { + "epoch": 0.63, + "learning_rate": 5.1648522764226666e-05, + "loss": 0.4284, + "step": 1046500 + }, + { + "epoch": 0.63, + "learning_rate": 5.16464227986661e-05, + "loss": 0.4316, + "step": 1047000 + }, + { + "epoch": 0.63, + "learning_rate": 5.164432283310553e-05, + "loss": 0.4263, + "step": 1047500 + }, + { + "epoch": 0.63, + "learning_rate": 5.1642222867544973e-05, + "loss": 0.4364, + "step": 1048000 + }, + { + "epoch": 0.63, + "learning_rate": 5.164012290198441e-05, + "loss": 0.4186, + "step": 1048500 + }, + { + "epoch": 0.63, + "learning_rate": 5.163802713635496e-05, + "loss": 0.4335, + "step": 1049000 + }, + { + "epoch": 0.63, + "learning_rate": 5.1635927170794394e-05, + "loss": 0.4167, + "step": 1049500 + }, + { + "epoch": 0.63, + "learning_rate": 5.1633827205233834e-05, + "loss": 0.4219, + "step": 1050000 + }, + { + "epoch": 0.63, + "learning_rate": 5.163172723967327e-05, + "loss": 0.4269, + "step": 1050500 + }, + { + "epoch": 0.63, + "learning_rate": 5.16296272741127e-05, + "loss": 0.4346, + "step": 1051000 + }, + { + "epoch": 0.63, + "learning_rate": 5.162752730855214e-05, + "loss": 0.4185, + "step": 1051500 + }, + { + "epoch": 0.63, + "learning_rate": 5.1625427342991575e-05, + "loss": 0.4341, + "step": 1052000 + }, + { + "epoch": 0.63, + "learning_rate": 5.1623327377431015e-05, + "loss": 0.4147, + "step": 1052500 + }, + { + "epoch": 0.63, + "learning_rate": 5.162123161180157e-05, + "loss": 0.4327, + "step": 1053000 + }, + { + "epoch": 0.63, + "learning_rate": 5.1619131646241e-05, + "loss": 0.426, + "step": 1053500 + }, + { + "epoch": 0.63, + "learning_rate": 5.161704008054268e-05, + "loss": 0.4319, + "step": 1054000 + }, + { + "epoch": 0.63, + "learning_rate": 5.1614940114982116e-05, + "loss": 0.4247, + "step": 1054500 + }, + { + "epoch": 0.63, + "learning_rate": 5.161284014942154e-05, + "loss": 0.4279, + "step": 1055000 + }, + { + "epoch": 0.63, + "learning_rate": 5.161074018386098e-05, + "loss": 0.4407, + "step": 1055500 + }, + { + "epoch": 0.63, + "learning_rate": 5.1608640218300416e-05, + "loss": 0.432, + "step": 1056000 + }, + { + "epoch": 0.63, + "learning_rate": 5.160654025273985e-05, + "loss": 0.4339, + "step": 1056500 + }, + { + "epoch": 0.63, + "learning_rate": 5.160444028717929e-05, + "loss": 0.4185, + "step": 1057000 + }, + { + "epoch": 0.63, + "learning_rate": 5.1602340321618723e-05, + "loss": 0.4159, + "step": 1057500 + }, + { + "epoch": 0.63, + "learning_rate": 5.160024035605816e-05, + "loss": 0.4201, + "step": 1058000 + }, + { + "epoch": 0.63, + "learning_rate": 5.15981403904976e-05, + "loss": 0.4264, + "step": 1058500 + }, + { + "epoch": 0.63, + "learning_rate": 5.159604042493703e-05, + "loss": 0.4204, + "step": 1059000 + }, + { + "epoch": 0.64, + "learning_rate": 5.159394045937647e-05, + "loss": 0.4253, + "step": 1059500 + }, + { + "epoch": 0.64, + "learning_rate": 5.1591844693747024e-05, + "loss": 0.421, + "step": 1060000 + }, + { + "epoch": 0.64, + "learning_rate": 5.158974472818646e-05, + "loss": 0.4313, + "step": 1060500 + }, + { + "epoch": 0.64, + "learning_rate": 5.158764896255701e-05, + "loss": 0.4245, + "step": 1061000 + }, + { + "epoch": 0.64, + "learning_rate": 5.1585548996996445e-05, + "loss": 0.4168, + "step": 1061500 + }, + { + "epoch": 0.64, + "learning_rate": 5.1583449031435885e-05, + "loss": 0.4276, + "step": 1062000 + }, + { + "epoch": 0.64, + "learning_rate": 5.158134906587532e-05, + "loss": 0.4215, + "step": 1062500 + }, + { + "epoch": 0.64, + "learning_rate": 5.157924910031475e-05, + "loss": 0.4236, + "step": 1063000 + }, + { + "epoch": 0.64, + "learning_rate": 5.157714913475419e-05, + "loss": 0.4312, + "step": 1063500 + }, + { + "epoch": 0.64, + "learning_rate": 5.1575049169193626e-05, + "loss": 0.4229, + "step": 1064000 + }, + { + "epoch": 0.64, + "learning_rate": 5.157294920363306e-05, + "loss": 0.4199, + "step": 1064500 + }, + { + "epoch": 0.64, + "learning_rate": 5.157085343800362e-05, + "loss": 0.424, + "step": 1065000 + }, + { + "epoch": 0.64, + "learning_rate": 5.156875767237417e-05, + "loss": 0.4309, + "step": 1065500 + }, + { + "epoch": 0.64, + "learning_rate": 5.156665770681361e-05, + "loss": 0.4326, + "step": 1066000 + }, + { + "epoch": 0.64, + "learning_rate": 5.156455774125304e-05, + "loss": 0.4249, + "step": 1066500 + }, + { + "epoch": 0.64, + "learning_rate": 5.156245777569248e-05, + "loss": 0.4282, + "step": 1067000 + }, + { + "epoch": 0.64, + "learning_rate": 5.1560357810131914e-05, + "loss": 0.4274, + "step": 1067500 + }, + { + "epoch": 0.64, + "learning_rate": 5.155826204450247e-05, + "loss": 0.4345, + "step": 1068000 + }, + { + "epoch": 0.64, + "learning_rate": 5.15561620789419e-05, + "loss": 0.4236, + "step": 1068500 + }, + { + "epoch": 0.64, + "learning_rate": 5.155406211338134e-05, + "loss": 0.4273, + "step": 1069000 + }, + { + "epoch": 0.64, + "learning_rate": 5.1551962147820775e-05, + "loss": 0.4159, + "step": 1069500 + }, + { + "epoch": 0.64, + "learning_rate": 5.154986218226021e-05, + "loss": 0.4257, + "step": 1070000 + }, + { + "epoch": 0.64, + "learning_rate": 5.154776641663076e-05, + "loss": 0.4326, + "step": 1070500 + }, + { + "epoch": 0.64, + "learning_rate": 5.15456664510702e-05, + "loss": 0.4267, + "step": 1071000 + }, + { + "epoch": 0.64, + "learning_rate": 5.1543566485509635e-05, + "loss": 0.4296, + "step": 1071500 + }, + { + "epoch": 0.64, + "learning_rate": 5.1541466519949075e-05, + "loss": 0.4308, + "step": 1072000 + }, + { + "epoch": 0.64, + "learning_rate": 5.153936655438851e-05, + "loss": 0.4332, + "step": 1072500 + }, + { + "epoch": 0.64, + "learning_rate": 5.153726658882794e-05, + "loss": 0.4193, + "step": 1073000 + }, + { + "epoch": 0.64, + "learning_rate": 5.153516662326738e-05, + "loss": 0.4313, + "step": 1073500 + }, + { + "epoch": 0.64, + "learning_rate": 5.1533070857637936e-05, + "loss": 0.4253, + "step": 1074000 + }, + { + "epoch": 0.64, + "learning_rate": 5.153097089207737e-05, + "loss": 0.4305, + "step": 1074500 + }, + { + "epoch": 0.64, + "learning_rate": 5.15288709265168e-05, + "loss": 0.4157, + "step": 1075000 + }, + { + "epoch": 0.64, + "learning_rate": 5.152677096095624e-05, + "loss": 0.4267, + "step": 1075500 + }, + { + "epoch": 0.65, + "learning_rate": 5.152467099539568e-05, + "loss": 0.4186, + "step": 1076000 + }, + { + "epoch": 0.65, + "learning_rate": 5.152257522976623e-05, + "loss": 0.4297, + "step": 1076500 + }, + { + "epoch": 0.65, + "learning_rate": 5.1520475264205664e-05, + "loss": 0.4335, + "step": 1077000 + }, + { + "epoch": 0.65, + "learning_rate": 5.1518375298645104e-05, + "loss": 0.4354, + "step": 1077500 + }, + { + "epoch": 0.65, + "learning_rate": 5.151627533308454e-05, + "loss": 0.4163, + "step": 1078000 + }, + { + "epoch": 0.65, + "learning_rate": 5.151417536752397e-05, + "loss": 0.4302, + "step": 1078500 + }, + { + "epoch": 0.65, + "learning_rate": 5.151207540196341e-05, + "loss": 0.4207, + "step": 1079000 + }, + { + "epoch": 0.65, + "learning_rate": 5.1509979636333965e-05, + "loss": 0.4325, + "step": 1079500 + }, + { + "epoch": 0.65, + "learning_rate": 5.15078796707734e-05, + "loss": 0.4284, + "step": 1080000 + }, + { + "epoch": 0.65, + "learning_rate": 5.150577970521284e-05, + "loss": 0.4343, + "step": 1080500 + }, + { + "epoch": 0.65, + "learning_rate": 5.150367973965227e-05, + "loss": 0.414, + "step": 1081000 + }, + { + "epoch": 0.65, + "learning_rate": 5.1501579774091705e-05, + "loss": 0.416, + "step": 1081500 + }, + { + "epoch": 0.65, + "learning_rate": 5.149947980853114e-05, + "loss": 0.438, + "step": 1082000 + }, + { + "epoch": 0.65, + "learning_rate": 5.149737984297057e-05, + "loss": 0.4386, + "step": 1082500 + }, + { + "epoch": 0.65, + "learning_rate": 5.1495279877410006e-05, + "loss": 0.424, + "step": 1083000 + }, + { + "epoch": 0.65, + "learning_rate": 5.1493184111780566e-05, + "loss": 0.4227, + "step": 1083500 + }, + { + "epoch": 0.65, + "learning_rate": 5.1491084146220006e-05, + "loss": 0.4279, + "step": 1084000 + }, + { + "epoch": 0.65, + "learning_rate": 5.148898418065943e-05, + "loss": 0.442, + "step": 1084500 + }, + { + "epoch": 0.65, + "learning_rate": 5.1486884215098867e-05, + "loss": 0.4321, + "step": 1085000 + }, + { + "epoch": 0.65, + "learning_rate": 5.148478844946943e-05, + "loss": 0.4257, + "step": 1085500 + }, + { + "epoch": 0.65, + "learning_rate": 5.148268848390887e-05, + "loss": 0.4245, + "step": 1086000 + }, + { + "epoch": 0.65, + "learning_rate": 5.148059271827942e-05, + "loss": 0.4393, + "step": 1086500 + }, + { + "epoch": 0.65, + "learning_rate": 5.1478492752718854e-05, + "loss": 0.418, + "step": 1087000 + }, + { + "epoch": 0.65, + "learning_rate": 5.1476392787158294e-05, + "loss": 0.4185, + "step": 1087500 + }, + { + "epoch": 0.65, + "learning_rate": 5.147429282159773e-05, + "loss": 0.4312, + "step": 1088000 + }, + { + "epoch": 0.65, + "learning_rate": 5.147219705596828e-05, + "loss": 0.4259, + "step": 1088500 + }, + { + "epoch": 0.65, + "learning_rate": 5.1470097090407715e-05, + "loss": 0.4187, + "step": 1089000 + }, + { + "epoch": 0.65, + "learning_rate": 5.1467997124847155e-05, + "loss": 0.4306, + "step": 1089500 + }, + { + "epoch": 0.65, + "learning_rate": 5.146589715928659e-05, + "loss": 0.4247, + "step": 1090000 + }, + { + "epoch": 0.65, + "learning_rate": 5.146379719372602e-05, + "loss": 0.4307, + "step": 1090500 + }, + { + "epoch": 0.65, + "learning_rate": 5.1461701428096576e-05, + "loss": 0.4234, + "step": 1091000 + }, + { + "epoch": 0.65, + "learning_rate": 5.1459601462536016e-05, + "loss": 0.4291, + "step": 1091500 + }, + { + "epoch": 0.65, + "learning_rate": 5.145750149697545e-05, + "loss": 0.4307, + "step": 1092000 + }, + { + "epoch": 0.65, + "learning_rate": 5.145540153141488e-05, + "loss": 0.426, + "step": 1092500 + }, + { + "epoch": 0.66, + "learning_rate": 5.145330156585432e-05, + "loss": 0.4222, + "step": 1093000 + }, + { + "epoch": 0.66, + "learning_rate": 5.1451201600293756e-05, + "loss": 0.4234, + "step": 1093500 + }, + { + "epoch": 0.66, + "learning_rate": 5.144910583466431e-05, + "loss": 0.435, + "step": 1094000 + }, + { + "epoch": 0.66, + "learning_rate": 5.144700586910375e-05, + "loss": 0.4268, + "step": 1094500 + }, + { + "epoch": 0.66, + "learning_rate": 5.1444905903543184e-05, + "loss": 0.4227, + "step": 1095000 + }, + { + "epoch": 0.66, + "learning_rate": 5.144280593798262e-05, + "loss": 0.4119, + "step": 1095500 + }, + { + "epoch": 0.66, + "learning_rate": 5.144070597242206e-05, + "loss": 0.4323, + "step": 1096000 + }, + { + "epoch": 0.66, + "learning_rate": 5.1438606006861484e-05, + "loss": 0.4235, + "step": 1096500 + }, + { + "epoch": 0.66, + "learning_rate": 5.1436510241232044e-05, + "loss": 0.4196, + "step": 1097000 + }, + { + "epoch": 0.66, + "learning_rate": 5.143441027567148e-05, + "loss": 0.4246, + "step": 1097500 + }, + { + "epoch": 0.66, + "learning_rate": 5.143231031011092e-05, + "loss": 0.4261, + "step": 1098000 + }, + { + "epoch": 0.66, + "learning_rate": 5.143021034455035e-05, + "loss": 0.426, + "step": 1098500 + }, + { + "epoch": 0.66, + "learning_rate": 5.142811037898978e-05, + "loss": 0.4259, + "step": 1099000 + }, + { + "epoch": 0.66, + "learning_rate": 5.142601461336034e-05, + "loss": 0.42, + "step": 1099500 + }, + { + "epoch": 0.66, + "learning_rate": 5.142391464779978e-05, + "loss": 0.4198, + "step": 1100000 + }, + { + "epoch": 0.66, + "eval_loss": 0.40232425928115845, + "eval_runtime": 1114.6441, + "eval_samples_per_second": 472.545, + "eval_steps_per_second": 78.758, + "step": 1100000 + }, + { + "epoch": 0.66, + "learning_rate": 5.142181468223921e-05, + "loss": 0.4262, + "step": 1100500 + }, + { + "epoch": 0.66, + "learning_rate": 5.1419714716678646e-05, + "loss": 0.4156, + "step": 1101000 + }, + { + "epoch": 0.66, + "learning_rate": 5.1417618951049206e-05, + "loss": 0.4152, + "step": 1101500 + }, + { + "epoch": 0.66, + "learning_rate": 5.141551898548864e-05, + "loss": 0.4241, + "step": 1102000 + }, + { + "epoch": 0.66, + "learning_rate": 5.141341901992807e-05, + "loss": 0.4178, + "step": 1102500 + }, + { + "epoch": 0.66, + "learning_rate": 5.141131905436751e-05, + "loss": 0.427, + "step": 1103000 + }, + { + "epoch": 0.66, + "learning_rate": 5.140921908880694e-05, + "loss": 0.4144, + "step": 1103500 + }, + { + "epoch": 0.66, + "learning_rate": 5.1407119123246373e-05, + "loss": 0.4336, + "step": 1104000 + }, + { + "epoch": 0.66, + "learning_rate": 5.1405019157685814e-05, + "loss": 0.425, + "step": 1104500 + }, + { + "epoch": 0.66, + "learning_rate": 5.140291919212525e-05, + "loss": 0.4254, + "step": 1105000 + }, + { + "epoch": 0.66, + "learning_rate": 5.140082342649581e-05, + "loss": 0.4198, + "step": 1105500 + }, + { + "epoch": 0.66, + "learning_rate": 5.1398723460935234e-05, + "loss": 0.4296, + "step": 1106000 + }, + { + "epoch": 0.66, + "learning_rate": 5.1396623495374674e-05, + "loss": 0.4291, + "step": 1106500 + }, + { + "epoch": 0.66, + "learning_rate": 5.139452352981411e-05, + "loss": 0.4133, + "step": 1107000 + }, + { + "epoch": 0.66, + "learning_rate": 5.139242776418467e-05, + "loss": 0.4294, + "step": 1107500 + }, + { + "epoch": 0.66, + "learning_rate": 5.139033199855522e-05, + "loss": 0.4264, + "step": 1108000 + }, + { + "epoch": 0.66, + "learning_rate": 5.138823203299466e-05, + "loss": 0.4242, + "step": 1108500 + }, + { + "epoch": 0.66, + "learning_rate": 5.1386132067434095e-05, + "loss": 0.4334, + "step": 1109000 + }, + { + "epoch": 0.67, + "learning_rate": 5.138403210187353e-05, + "loss": 0.4201, + "step": 1109500 + }, + { + "epoch": 0.67, + "learning_rate": 5.138193213631297e-05, + "loss": 0.4229, + "step": 1110000 + }, + { + "epoch": 0.67, + "learning_rate": 5.1379832170752396e-05, + "loss": 0.4126, + "step": 1110500 + }, + { + "epoch": 0.67, + "learning_rate": 5.137773220519183e-05, + "loss": 0.4253, + "step": 1111000 + }, + { + "epoch": 0.67, + "learning_rate": 5.137563643956239e-05, + "loss": 0.424, + "step": 1111500 + }, + { + "epoch": 0.67, + "learning_rate": 5.137353647400183e-05, + "loss": 0.4263, + "step": 1112000 + }, + { + "epoch": 0.67, + "learning_rate": 5.137143650844126e-05, + "loss": 0.4231, + "step": 1112500 + }, + { + "epoch": 0.67, + "learning_rate": 5.136934074281182e-05, + "loss": 0.4317, + "step": 1113000 + }, + { + "epoch": 0.67, + "learning_rate": 5.136724077725125e-05, + "loss": 0.4292, + "step": 1113500 + }, + { + "epoch": 0.67, + "learning_rate": 5.136514081169069e-05, + "loss": 0.4216, + "step": 1114000 + }, + { + "epoch": 0.67, + "learning_rate": 5.1363040846130124e-05, + "loss": 0.416, + "step": 1114500 + }, + { + "epoch": 0.67, + "learning_rate": 5.136094088056956e-05, + "loss": 0.4285, + "step": 1115000 + }, + { + "epoch": 0.67, + "learning_rate": 5.135884091500899e-05, + "loss": 0.4246, + "step": 1115500 + }, + { + "epoch": 0.67, + "learning_rate": 5.1356740949448424e-05, + "loss": 0.4106, + "step": 1116000 + }, + { + "epoch": 0.67, + "learning_rate": 5.1354640983887865e-05, + "loss": 0.4361, + "step": 1116500 + }, + { + "epoch": 0.67, + "learning_rate": 5.13525410183273e-05, + "loss": 0.4286, + "step": 1117000 + }, + { + "epoch": 0.67, + "learning_rate": 5.135044525269786e-05, + "loss": 0.4185, + "step": 1117500 + }, + { + "epoch": 0.67, + "learning_rate": 5.1348345287137285e-05, + "loss": 0.4226, + "step": 1118000 + }, + { + "epoch": 0.67, + "learning_rate": 5.1346245321576725e-05, + "loss": 0.423, + "step": 1118500 + }, + { + "epoch": 0.67, + "learning_rate": 5.134414535601616e-05, + "loss": 0.4265, + "step": 1119000 + }, + { + "epoch": 0.67, + "learning_rate": 5.134204539045559e-05, + "loss": 0.4235, + "step": 1119500 + }, + { + "epoch": 0.67, + "learning_rate": 5.1339949624826146e-05, + "loss": 0.4245, + "step": 1120000 + }, + { + "epoch": 0.67, + "learning_rate": 5.1337849659265586e-05, + "loss": 0.4301, + "step": 1120500 + }, + { + "epoch": 0.67, + "learning_rate": 5.1335753893636146e-05, + "loss": 0.4318, + "step": 1121000 + }, + { + "epoch": 0.67, + "learning_rate": 5.133365392807558e-05, + "loss": 0.4159, + "step": 1121500 + }, + { + "epoch": 0.67, + "learning_rate": 5.1331553962515013e-05, + "loss": 0.4385, + "step": 1122000 + }, + { + "epoch": 0.67, + "learning_rate": 5.132945399695445e-05, + "loss": 0.4205, + "step": 1122500 + }, + { + "epoch": 0.67, + "learning_rate": 5.132735403139388e-05, + "loss": 0.4231, + "step": 1123000 + }, + { + "epoch": 0.67, + "learning_rate": 5.132525406583332e-05, + "loss": 0.4233, + "step": 1123500 + }, + { + "epoch": 0.67, + "learning_rate": 5.1323154100272754e-05, + "loss": 0.4202, + "step": 1124000 + }, + { + "epoch": 0.67, + "learning_rate": 5.132105413471219e-05, + "loss": 0.4294, + "step": 1124500 + }, + { + "epoch": 0.67, + "learning_rate": 5.131895416915163e-05, + "loss": 0.422, + "step": 1125000 + }, + { + "epoch": 0.67, + "learning_rate": 5.131685420359106e-05, + "loss": 0.4174, + "step": 1125500 + }, + { + "epoch": 0.68, + "learning_rate": 5.1314754238030495e-05, + "loss": 0.4222, + "step": 1126000 + }, + { + "epoch": 0.68, + "learning_rate": 5.131265847240105e-05, + "loss": 0.413, + "step": 1126500 + }, + { + "epoch": 0.68, + "learning_rate": 5.131056270677161e-05, + "loss": 0.4288, + "step": 1127000 + }, + { + "epoch": 0.68, + "learning_rate": 5.130846274121104e-05, + "loss": 0.4288, + "step": 1127500 + }, + { + "epoch": 0.68, + "learning_rate": 5.1306362775650475e-05, + "loss": 0.4215, + "step": 1128000 + }, + { + "epoch": 0.68, + "learning_rate": 5.130426281008991e-05, + "loss": 0.4237, + "step": 1128500 + }, + { + "epoch": 0.68, + "learning_rate": 5.130216284452935e-05, + "loss": 0.4307, + "step": 1129000 + }, + { + "epoch": 0.68, + "learning_rate": 5.130006287896878e-05, + "loss": 0.4291, + "step": 1129500 + }, + { + "epoch": 0.68, + "learning_rate": 5.1297962913408216e-05, + "loss": 0.4185, + "step": 1130000 + }, + { + "epoch": 0.68, + "learning_rate": 5.1295862947847656e-05, + "loss": 0.4271, + "step": 1130500 + }, + { + "epoch": 0.68, + "learning_rate": 5.129376718221821e-05, + "loss": 0.4406, + "step": 1131000 + }, + { + "epoch": 0.68, + "learning_rate": 5.129166721665764e-05, + "loss": 0.4191, + "step": 1131500 + }, + { + "epoch": 0.68, + "learning_rate": 5.1289567251097084e-05, + "loss": 0.4267, + "step": 1132000 + }, + { + "epoch": 0.68, + "learning_rate": 5.128746728553652e-05, + "loss": 0.4189, + "step": 1132500 + }, + { + "epoch": 0.68, + "learning_rate": 5.128536731997595e-05, + "loss": 0.428, + "step": 1133000 + }, + { + "epoch": 0.68, + "learning_rate": 5.128326735441539e-05, + "loss": 0.4212, + "step": 1133500 + }, + { + "epoch": 0.68, + "learning_rate": 5.1281171588785944e-05, + "loss": 0.4221, + "step": 1134000 + }, + { + "epoch": 0.68, + "learning_rate": 5.127907162322538e-05, + "loss": 0.432, + "step": 1134500 + }, + { + "epoch": 0.68, + "learning_rate": 5.127697165766481e-05, + "loss": 0.4322, + "step": 1135000 + }, + { + "epoch": 0.68, + "learning_rate": 5.127487169210425e-05, + "loss": 0.412, + "step": 1135500 + }, + { + "epoch": 0.68, + "learning_rate": 5.1272771726543685e-05, + "loss": 0.4221, + "step": 1136000 + }, + { + "epoch": 0.68, + "learning_rate": 5.127067176098312e-05, + "loss": 0.428, + "step": 1136500 + }, + { + "epoch": 0.68, + "learning_rate": 5.126857599535368e-05, + "loss": 0.4189, + "step": 1137000 + }, + { + "epoch": 0.68, + "learning_rate": 5.126647602979311e-05, + "loss": 0.4214, + "step": 1137500 + }, + { + "epoch": 0.68, + "learning_rate": 5.1264380264163666e-05, + "loss": 0.4361, + "step": 1138000 + }, + { + "epoch": 0.68, + "learning_rate": 5.12622802986031e-05, + "loss": 0.4225, + "step": 1138500 + }, + { + "epoch": 0.68, + "learning_rate": 5.126018033304254e-05, + "loss": 0.4206, + "step": 1139000 + }, + { + "epoch": 0.68, + "learning_rate": 5.125808036748197e-05, + "loss": 0.4294, + "step": 1139500 + }, + { + "epoch": 0.68, + "learning_rate": 5.1255980401921406e-05, + "loss": 0.4178, + "step": 1140000 + }, + { + "epoch": 0.68, + "learning_rate": 5.125388043636085e-05, + "loss": 0.4172, + "step": 1140500 + }, + { + "epoch": 0.68, + "learning_rate": 5.125178047080028e-05, + "loss": 0.4263, + "step": 1141000 + }, + { + "epoch": 0.68, + "learning_rate": 5.1249680505239714e-05, + "loss": 0.4161, + "step": 1141500 + }, + { + "epoch": 0.68, + "learning_rate": 5.1247580539679154e-05, + "loss": 0.4191, + "step": 1142000 + }, + { + "epoch": 0.68, + "learning_rate": 5.124548477404971e-05, + "loss": 0.4238, + "step": 1142500 + }, + { + "epoch": 0.69, + "learning_rate": 5.124338480848914e-05, + "loss": 0.4215, + "step": 1143000 + }, + { + "epoch": 0.69, + "learning_rate": 5.1241284842928574e-05, + "loss": 0.4367, + "step": 1143500 + }, + { + "epoch": 0.69, + "learning_rate": 5.1239184877368015e-05, + "loss": 0.4106, + "step": 1144000 + }, + { + "epoch": 0.69, + "learning_rate": 5.123708491180745e-05, + "loss": 0.4255, + "step": 1144500 + }, + { + "epoch": 0.69, + "learning_rate": 5.123498494624688e-05, + "loss": 0.4124, + "step": 1145000 + }, + { + "epoch": 0.69, + "learning_rate": 5.1232884980686315e-05, + "loss": 0.4257, + "step": 1145500 + }, + { + "epoch": 0.69, + "learning_rate": 5.123078501512575e-05, + "loss": 0.4265, + "step": 1146000 + }, + { + "epoch": 0.69, + "learning_rate": 5.122868924949631e-05, + "loss": 0.4199, + "step": 1146500 + }, + { + "epoch": 0.69, + "learning_rate": 5.122659348386686e-05, + "loss": 0.4251, + "step": 1147000 + }, + { + "epoch": 0.69, + "learning_rate": 5.12244935183063e-05, + "loss": 0.4153, + "step": 1147500 + }, + { + "epoch": 0.69, + "learning_rate": 5.1222393552745736e-05, + "loss": 0.4214, + "step": 1148000 + }, + { + "epoch": 0.69, + "learning_rate": 5.122029358718517e-05, + "loss": 0.429, + "step": 1148500 + }, + { + "epoch": 0.69, + "learning_rate": 5.121819362162461e-05, + "loss": 0.4193, + "step": 1149000 + }, + { + "epoch": 0.69, + "learning_rate": 5.1216093656064036e-05, + "loss": 0.4087, + "step": 1149500 + }, + { + "epoch": 0.69, + "learning_rate": 5.121399369050347e-05, + "loss": 0.4249, + "step": 1150000 + }, + { + "epoch": 0.69, + "learning_rate": 5.121189372494291e-05, + "loss": 0.4277, + "step": 1150500 + }, + { + "epoch": 0.69, + "learning_rate": 5.120979795931347e-05, + "loss": 0.4142, + "step": 1151000 + }, + { + "epoch": 0.69, + "learning_rate": 5.1207697993752904e-05, + "loss": 0.4337, + "step": 1151500 + }, + { + "epoch": 0.69, + "learning_rate": 5.120559802819234e-05, + "loss": 0.4258, + "step": 1152000 + }, + { + "epoch": 0.69, + "learning_rate": 5.120349806263177e-05, + "loss": 0.4307, + "step": 1152500 + }, + { + "epoch": 0.69, + "learning_rate": 5.120141069686457e-05, + "loss": 0.4306, + "step": 1153000 + }, + { + "epoch": 0.69, + "learning_rate": 5.1199310731304005e-05, + "loss": 0.4171, + "step": 1153500 + }, + { + "epoch": 0.69, + "learning_rate": 5.119721076574344e-05, + "loss": 0.4229, + "step": 1154000 + }, + { + "epoch": 0.69, + "learning_rate": 5.119511080018287e-05, + "loss": 0.4133, + "step": 1154500 + }, + { + "epoch": 0.69, + "learning_rate": 5.119301083462231e-05, + "loss": 0.4238, + "step": 1155000 + }, + { + "epoch": 0.69, + "learning_rate": 5.1190910869061745e-05, + "loss": 0.4175, + "step": 1155500 + }, + { + "epoch": 0.69, + "learning_rate": 5.118881090350118e-05, + "loss": 0.4287, + "step": 1156000 + }, + { + "epoch": 0.69, + "learning_rate": 5.118671093794062e-05, + "loss": 0.4244, + "step": 1156500 + }, + { + "epoch": 0.69, + "learning_rate": 5.118461097238005e-05, + "loss": 0.4269, + "step": 1157000 + }, + { + "epoch": 0.69, + "learning_rate": 5.1182511006819486e-05, + "loss": 0.4163, + "step": 1157500 + }, + { + "epoch": 0.69, + "learning_rate": 5.1180411041258926e-05, + "loss": 0.4108, + "step": 1158000 + }, + { + "epoch": 0.69, + "learning_rate": 5.117831107569836e-05, + "loss": 0.4283, + "step": 1158500 + }, + { + "epoch": 0.69, + "learning_rate": 5.117621531006891e-05, + "loss": 0.4275, + "step": 1159000 + }, + { + "epoch": 0.7, + "learning_rate": 5.1174115344508354e-05, + "loss": 0.4307, + "step": 1159500 + }, + { + "epoch": 0.7, + "learning_rate": 5.117201537894779e-05, + "loss": 0.4123, + "step": 1160000 + }, + { + "epoch": 0.7, + "learning_rate": 5.116991541338722e-05, + "loss": 0.4219, + "step": 1160500 + }, + { + "epoch": 0.7, + "learning_rate": 5.116781544782666e-05, + "loss": 0.4217, + "step": 1161000 + }, + { + "epoch": 0.7, + "learning_rate": 5.116572388212833e-05, + "loss": 0.4191, + "step": 1161500 + }, + { + "epoch": 0.7, + "learning_rate": 5.116362391656777e-05, + "loss": 0.4208, + "step": 1162000 + }, + { + "epoch": 0.7, + "learning_rate": 5.11615239510072e-05, + "loss": 0.4129, + "step": 1162500 + }, + { + "epoch": 0.7, + "learning_rate": 5.1159423985446635e-05, + "loss": 0.4259, + "step": 1163000 + }, + { + "epoch": 0.7, + "learning_rate": 5.1157324019886075e-05, + "loss": 0.4188, + "step": 1163500 + }, + { + "epoch": 0.7, + "learning_rate": 5.115522405432551e-05, + "loss": 0.4243, + "step": 1164000 + }, + { + "epoch": 0.7, + "learning_rate": 5.115312408876494e-05, + "loss": 0.4228, + "step": 1164500 + }, + { + "epoch": 0.7, + "learning_rate": 5.115102412320438e-05, + "loss": 0.4224, + "step": 1165000 + }, + { + "epoch": 0.7, + "learning_rate": 5.1148928357574936e-05, + "loss": 0.4296, + "step": 1165500 + }, + { + "epoch": 0.7, + "learning_rate": 5.114682839201437e-05, + "loss": 0.4224, + "step": 1166000 + }, + { + "epoch": 0.7, + "learning_rate": 5.114473262638492e-05, + "loss": 0.4255, + "step": 1166500 + }, + { + "epoch": 0.7, + "learning_rate": 5.114263266082436e-05, + "loss": 0.4274, + "step": 1167000 + }, + { + "epoch": 0.7, + "learning_rate": 5.1140532695263796e-05, + "loss": 0.416, + "step": 1167500 + }, + { + "epoch": 0.7, + "learning_rate": 5.113843272970323e-05, + "loss": 0.4241, + "step": 1168000 + }, + { + "epoch": 0.7, + "learning_rate": 5.113633276414267e-05, + "loss": 0.4316, + "step": 1168500 + }, + { + "epoch": 0.7, + "learning_rate": 5.1134236998513224e-05, + "loss": 0.42, + "step": 1169000 + }, + { + "epoch": 0.7, + "learning_rate": 5.113213703295266e-05, + "loss": 0.4224, + "step": 1169500 + }, + { + "epoch": 0.7, + "learning_rate": 5.113003706739209e-05, + "loss": 0.4198, + "step": 1170000 + }, + { + "epoch": 0.7, + "learning_rate": 5.112793710183153e-05, + "loss": 0.4273, + "step": 1170500 + }, + { + "epoch": 0.7, + "learning_rate": 5.1125837136270964e-05, + "loss": 0.4149, + "step": 1171000 + }, + { + "epoch": 0.7, + "learning_rate": 5.112374137064152e-05, + "loss": 0.4163, + "step": 1171500 + }, + { + "epoch": 0.7, + "learning_rate": 5.112164140508096e-05, + "loss": 0.4161, + "step": 1172000 + }, + { + "epoch": 0.7, + "learning_rate": 5.111954143952039e-05, + "loss": 0.4071, + "step": 1172500 + }, + { + "epoch": 0.7, + "learning_rate": 5.1117441473959825e-05, + "loss": 0.4223, + "step": 1173000 + }, + { + "epoch": 0.7, + "learning_rate": 5.1115341508399265e-05, + "loss": 0.4234, + "step": 1173500 + }, + { + "epoch": 0.7, + "learning_rate": 5.11132415428387e-05, + "loss": 0.4391, + "step": 1174000 + }, + { + "epoch": 0.7, + "learning_rate": 5.111114157727813e-05, + "loss": 0.4234, + "step": 1174500 + }, + { + "epoch": 0.7, + "learning_rate": 5.1109045811648686e-05, + "loss": 0.4167, + "step": 1175000 + }, + { + "epoch": 0.7, + "learning_rate": 5.1106945846088126e-05, + "loss": 0.4147, + "step": 1175500 + }, + { + "epoch": 0.71, + "learning_rate": 5.110484588052756e-05, + "loss": 0.4323, + "step": 1176000 + }, + { + "epoch": 0.71, + "learning_rate": 5.110274591496699e-05, + "loss": 0.4333, + "step": 1176500 + }, + { + "epoch": 0.71, + "learning_rate": 5.110064594940643e-05, + "loss": 0.4199, + "step": 1177000 + }, + { + "epoch": 0.71, + "learning_rate": 5.109854598384587e-05, + "loss": 0.423, + "step": 1177500 + }, + { + "epoch": 0.71, + "learning_rate": 5.109644601828529e-05, + "loss": 0.4223, + "step": 1178000 + }, + { + "epoch": 0.71, + "learning_rate": 5.1094346052724734e-05, + "loss": 0.4351, + "step": 1178500 + }, + { + "epoch": 0.71, + "learning_rate": 5.1092250287095294e-05, + "loss": 0.4261, + "step": 1179000 + }, + { + "epoch": 0.71, + "learning_rate": 5.109015032153473e-05, + "loss": 0.4202, + "step": 1179500 + }, + { + "epoch": 0.71, + "learning_rate": 5.108805035597416e-05, + "loss": 0.4222, + "step": 1180000 + }, + { + "epoch": 0.71, + "learning_rate": 5.108595459034472e-05, + "loss": 0.4298, + "step": 1180500 + }, + { + "epoch": 0.71, + "learning_rate": 5.1083854624784155e-05, + "loss": 0.425, + "step": 1181000 + }, + { + "epoch": 0.71, + "learning_rate": 5.108175465922359e-05, + "loss": 0.4165, + "step": 1181500 + }, + { + "epoch": 0.71, + "learning_rate": 5.107965469366303e-05, + "loss": 0.4253, + "step": 1182000 + }, + { + "epoch": 0.71, + "learning_rate": 5.107755472810246e-05, + "loss": 0.4171, + "step": 1182500 + }, + { + "epoch": 0.71, + "learning_rate": 5.107545476254189e-05, + "loss": 0.4211, + "step": 1183000 + }, + { + "epoch": 0.71, + "learning_rate": 5.107335479698133e-05, + "loss": 0.4272, + "step": 1183500 + }, + { + "epoch": 0.71, + "learning_rate": 5.107125483142076e-05, + "loss": 0.4159, + "step": 1184000 + }, + { + "epoch": 0.71, + "learning_rate": 5.106915906579132e-05, + "loss": 0.4126, + "step": 1184500 + }, + { + "epoch": 0.71, + "learning_rate": 5.1067059100230756e-05, + "loss": 0.4183, + "step": 1185000 + }, + { + "epoch": 0.71, + "learning_rate": 5.106495913467019e-05, + "loss": 0.427, + "step": 1185500 + }, + { + "epoch": 0.71, + "learning_rate": 5.106286336904075e-05, + "loss": 0.4245, + "step": 1186000 + }, + { + "epoch": 0.71, + "learning_rate": 5.106076340348018e-05, + "loss": 0.4251, + "step": 1186500 + }, + { + "epoch": 0.71, + "learning_rate": 5.105866343791962e-05, + "loss": 0.4232, + "step": 1187000 + }, + { + "epoch": 0.71, + "learning_rate": 5.105656347235905e-05, + "loss": 0.4339, + "step": 1187500 + }, + { + "epoch": 0.71, + "learning_rate": 5.1054463506798484e-05, + "loss": 0.4258, + "step": 1188000 + }, + { + "epoch": 0.71, + "learning_rate": 5.1052363541237924e-05, + "loss": 0.4245, + "step": 1188500 + }, + { + "epoch": 0.71, + "learning_rate": 5.1050267775608484e-05, + "loss": 0.4253, + "step": 1189000 + }, + { + "epoch": 0.71, + "learning_rate": 5.104816781004792e-05, + "loss": 0.4101, + "step": 1189500 + }, + { + "epoch": 0.71, + "learning_rate": 5.1046067844487344e-05, + "loss": 0.4279, + "step": 1190000 + }, + { + "epoch": 0.71, + "learning_rate": 5.1043967878926785e-05, + "loss": 0.4183, + "step": 1190500 + }, + { + "epoch": 0.71, + "learning_rate": 5.104186791336622e-05, + "loss": 0.4224, + "step": 1191000 + }, + { + "epoch": 0.71, + "learning_rate": 5.103977214773678e-05, + "loss": 0.4087, + "step": 1191500 + }, + { + "epoch": 0.71, + "learning_rate": 5.103767218217621e-05, + "loss": 0.4216, + "step": 1192000 + }, + { + "epoch": 0.71, + "learning_rate": 5.1035572216615645e-05, + "loss": 0.4271, + "step": 1192500 + }, + { + "epoch": 0.72, + "learning_rate": 5.103347225105508e-05, + "loss": 0.4262, + "step": 1193000 + }, + { + "epoch": 0.72, + "learning_rate": 5.103137228549451e-05, + "loss": 0.4285, + "step": 1193500 + }, + { + "epoch": 0.72, + "learning_rate": 5.102927231993395e-05, + "loss": 0.4259, + "step": 1194000 + }, + { + "epoch": 0.72, + "learning_rate": 5.102717655430451e-05, + "loss": 0.423, + "step": 1194500 + }, + { + "epoch": 0.72, + "learning_rate": 5.102507658874394e-05, + "loss": 0.4205, + "step": 1195000 + }, + { + "epoch": 0.72, + "learning_rate": 5.102297662318338e-05, + "loss": 0.4189, + "step": 1195500 + }, + { + "epoch": 0.72, + "learning_rate": 5.102087665762281e-05, + "loss": 0.4218, + "step": 1196000 + }, + { + "epoch": 0.72, + "learning_rate": 5.1018780891993374e-05, + "loss": 0.4241, + "step": 1196500 + }, + { + "epoch": 0.72, + "learning_rate": 5.101668092643281e-05, + "loss": 0.4068, + "step": 1197000 + }, + { + "epoch": 0.72, + "learning_rate": 5.101458096087224e-05, + "loss": 0.4185, + "step": 1197500 + }, + { + "epoch": 0.72, + "learning_rate": 5.1012480995311674e-05, + "loss": 0.419, + "step": 1198000 + }, + { + "epoch": 0.72, + "learning_rate": 5.101038102975111e-05, + "loss": 0.4192, + "step": 1198500 + }, + { + "epoch": 0.72, + "learning_rate": 5.100828106419055e-05, + "loss": 0.4205, + "step": 1199000 + }, + { + "epoch": 0.72, + "learning_rate": 5.100618109862998e-05, + "loss": 0.4233, + "step": 1199500 + }, + { + "epoch": 0.72, + "learning_rate": 5.1004081133069415e-05, + "loss": 0.413, + "step": 1200000 + }, + { + "epoch": 0.72, + "eval_loss": 0.4001677334308624, + "eval_runtime": 1118.6829, + "eval_samples_per_second": 470.839, + "eval_steps_per_second": 78.474, + "step": 1200000 + }, + { + "epoch": 0.72, + "learning_rate": 5.1001981167508855e-05, + "loss": 0.4221, + "step": 1200500 + }, + { + "epoch": 0.72, + "learning_rate": 5.099988120194829e-05, + "loss": 0.4216, + "step": 1201000 + }, + { + "epoch": 0.72, + "learning_rate": 5.099778543631884e-05, + "loss": 0.4179, + "step": 1201500 + }, + { + "epoch": 0.72, + "learning_rate": 5.0995685470758275e-05, + "loss": 0.4188, + "step": 1202000 + }, + { + "epoch": 0.72, + "learning_rate": 5.0993585505197715e-05, + "loss": 0.4226, + "step": 1202500 + }, + { + "epoch": 0.72, + "learning_rate": 5.099148553963715e-05, + "loss": 0.4315, + "step": 1203000 + }, + { + "epoch": 0.72, + "learning_rate": 5.09893897740077e-05, + "loss": 0.4292, + "step": 1203500 + }, + { + "epoch": 0.72, + "learning_rate": 5.098728980844714e-05, + "loss": 0.4196, + "step": 1204000 + }, + { + "epoch": 0.72, + "learning_rate": 5.0985189842886576e-05, + "loss": 0.4163, + "step": 1204500 + }, + { + "epoch": 0.72, + "learning_rate": 5.098308987732601e-05, + "loss": 0.4115, + "step": 1205000 + }, + { + "epoch": 0.72, + "learning_rate": 5.098098991176545e-05, + "loss": 0.4162, + "step": 1205500 + }, + { + "epoch": 0.72, + "learning_rate": 5.097888994620488e-05, + "loss": 0.4283, + "step": 1206000 + }, + { + "epoch": 0.72, + "learning_rate": 5.097678998064432e-05, + "loss": 0.4301, + "step": 1206500 + }, + { + "epoch": 0.72, + "learning_rate": 5.097469001508376e-05, + "loss": 0.4174, + "step": 1207000 + }, + { + "epoch": 0.72, + "learning_rate": 5.097259424945431e-05, + "loss": 0.4195, + "step": 1207500 + }, + { + "epoch": 0.72, + "learning_rate": 5.0970494283893744e-05, + "loss": 0.4296, + "step": 1208000 + }, + { + "epoch": 0.72, + "learning_rate": 5.09683985182643e-05, + "loss": 0.4221, + "step": 1208500 + }, + { + "epoch": 0.72, + "learning_rate": 5.096629855270374e-05, + "loss": 0.4087, + "step": 1209000 + }, + { + "epoch": 0.73, + "learning_rate": 5.096419858714317e-05, + "loss": 0.4188, + "step": 1209500 + }, + { + "epoch": 0.73, + "learning_rate": 5.0962098621582605e-05, + "loss": 0.4359, + "step": 1210000 + }, + { + "epoch": 0.73, + "learning_rate": 5.0959998656022045e-05, + "loss": 0.4234, + "step": 1210500 + }, + { + "epoch": 0.73, + "learning_rate": 5.095789869046148e-05, + "loss": 0.4074, + "step": 1211000 + }, + { + "epoch": 0.73, + "learning_rate": 5.095579872490091e-05, + "loss": 0.4187, + "step": 1211500 + }, + { + "epoch": 0.73, + "learning_rate": 5.095369875934035e-05, + "loss": 0.418, + "step": 1212000 + }, + { + "epoch": 0.73, + "learning_rate": 5.095159879377978e-05, + "loss": 0.419, + "step": 1212500 + }, + { + "epoch": 0.73, + "learning_rate": 5.094949882821921e-05, + "loss": 0.4158, + "step": 1213000 + }, + { + "epoch": 0.73, + "learning_rate": 5.094739886265865e-05, + "loss": 0.4067, + "step": 1213500 + }, + { + "epoch": 0.73, + "learning_rate": 5.0945298897098086e-05, + "loss": 0.4161, + "step": 1214000 + }, + { + "epoch": 0.73, + "learning_rate": 5.094319893153752e-05, + "loss": 0.4167, + "step": 1214500 + }, + { + "epoch": 0.73, + "learning_rate": 5.094110316590807e-05, + "loss": 0.4151, + "step": 1215000 + }, + { + "epoch": 0.73, + "learning_rate": 5.093900320034751e-05, + "loss": 0.4243, + "step": 1215500 + }, + { + "epoch": 0.73, + "learning_rate": 5.0936907434718074e-05, + "loss": 0.4097, + "step": 1216000 + }, + { + "epoch": 0.73, + "learning_rate": 5.093480746915751e-05, + "loss": 0.4176, + "step": 1216500 + }, + { + "epoch": 0.73, + "learning_rate": 5.093270750359694e-05, + "loss": 0.4211, + "step": 1217000 + }, + { + "epoch": 0.73, + "learning_rate": 5.0930607538036374e-05, + "loss": 0.4217, + "step": 1217500 + }, + { + "epoch": 0.73, + "learning_rate": 5.0928511772406934e-05, + "loss": 0.4155, + "step": 1218000 + }, + { + "epoch": 0.73, + "learning_rate": 5.092641180684637e-05, + "loss": 0.4236, + "step": 1218500 + }, + { + "epoch": 0.73, + "learning_rate": 5.092431184128581e-05, + "loss": 0.4204, + "step": 1219000 + }, + { + "epoch": 0.73, + "learning_rate": 5.0922211875725235e-05, + "loss": 0.4201, + "step": 1219500 + }, + { + "epoch": 0.73, + "learning_rate": 5.092011191016467e-05, + "loss": 0.4295, + "step": 1220000 + }, + { + "epoch": 0.73, + "learning_rate": 5.091801614453523e-05, + "loss": 0.4118, + "step": 1220500 + }, + { + "epoch": 0.73, + "learning_rate": 5.091591617897467e-05, + "loss": 0.4067, + "step": 1221000 + }, + { + "epoch": 0.73, + "learning_rate": 5.09138162134141e-05, + "loss": 0.4166, + "step": 1221500 + }, + { + "epoch": 0.73, + "learning_rate": 5.091171624785353e-05, + "loss": 0.4133, + "step": 1222000 + }, + { + "epoch": 0.73, + "learning_rate": 5.090961628229297e-05, + "loss": 0.4106, + "step": 1222500 + }, + { + "epoch": 0.73, + "learning_rate": 5.09075163167324e-05, + "loss": 0.4201, + "step": 1223000 + }, + { + "epoch": 0.73, + "learning_rate": 5.0905416351171836e-05, + "loss": 0.4175, + "step": 1223500 + }, + { + "epoch": 0.73, + "learning_rate": 5.0903320585542396e-05, + "loss": 0.4153, + "step": 1224000 + }, + { + "epoch": 0.73, + "learning_rate": 5.090122061998183e-05, + "loss": 0.4222, + "step": 1224500 + }, + { + "epoch": 0.73, + "learning_rate": 5.089912485435239e-05, + "loss": 0.4162, + "step": 1225000 + }, + { + "epoch": 0.73, + "learning_rate": 5.0897024888791824e-05, + "loss": 0.4311, + "step": 1225500 + }, + { + "epoch": 0.74, + "learning_rate": 5.0894924923231264e-05, + "loss": 0.4196, + "step": 1226000 + }, + { + "epoch": 0.74, + "learning_rate": 5.089282495767069e-05, + "loss": 0.4123, + "step": 1226500 + }, + { + "epoch": 0.74, + "learning_rate": 5.0890724992110124e-05, + "loss": 0.418, + "step": 1227000 + }, + { + "epoch": 0.74, + "learning_rate": 5.0888625026549564e-05, + "loss": 0.4239, + "step": 1227500 + }, + { + "epoch": 0.74, + "learning_rate": 5.0886525060989e-05, + "loss": 0.4164, + "step": 1228000 + }, + { + "epoch": 0.74, + "learning_rate": 5.088442509542843e-05, + "loss": 0.4261, + "step": 1228500 + }, + { + "epoch": 0.74, + "learning_rate": 5.088232512986787e-05, + "loss": 0.4121, + "step": 1229000 + }, + { + "epoch": 0.74, + "learning_rate": 5.0880225164307305e-05, + "loss": 0.4165, + "step": 1229500 + }, + { + "epoch": 0.74, + "learning_rate": 5.087812519874674e-05, + "loss": 0.4118, + "step": 1230000 + }, + { + "epoch": 0.74, + "learning_rate": 5.087602523318618e-05, + "loss": 0.4152, + "step": 1230500 + }, + { + "epoch": 0.74, + "learning_rate": 5.087392946755673e-05, + "loss": 0.4226, + "step": 1231000 + }, + { + "epoch": 0.74, + "learning_rate": 5.0871829501996166e-05, + "loss": 0.4125, + "step": 1231500 + }, + { + "epoch": 0.74, + "learning_rate": 5.08697295364356e-05, + "loss": 0.4132, + "step": 1232000 + }, + { + "epoch": 0.74, + "learning_rate": 5.086762957087504e-05, + "loss": 0.4197, + "step": 1232500 + }, + { + "epoch": 0.74, + "learning_rate": 5.086552960531447e-05, + "loss": 0.4232, + "step": 1233000 + }, + { + "epoch": 0.74, + "learning_rate": 5.086343803961615e-05, + "loss": 0.4196, + "step": 1233500 + }, + { + "epoch": 0.74, + "learning_rate": 5.086133807405558e-05, + "loss": 0.4156, + "step": 1234000 + }, + { + "epoch": 0.74, + "learning_rate": 5.085923810849502e-05, + "loss": 0.426, + "step": 1234500 + }, + { + "epoch": 0.74, + "learning_rate": 5.0857138142934454e-05, + "loss": 0.4141, + "step": 1235000 + }, + { + "epoch": 0.74, + "learning_rate": 5.085503817737389e-05, + "loss": 0.4254, + "step": 1235500 + }, + { + "epoch": 0.74, + "learning_rate": 5.085293821181333e-05, + "loss": 0.4089, + "step": 1236000 + }, + { + "epoch": 0.74, + "learning_rate": 5.085083824625276e-05, + "loss": 0.4215, + "step": 1236500 + }, + { + "epoch": 0.74, + "learning_rate": 5.0848738280692194e-05, + "loss": 0.421, + "step": 1237000 + }, + { + "epoch": 0.74, + "learning_rate": 5.084664251506275e-05, + "loss": 0.4188, + "step": 1237500 + }, + { + "epoch": 0.74, + "learning_rate": 5.084454254950219e-05, + "loss": 0.4194, + "step": 1238000 + }, + { + "epoch": 0.74, + "learning_rate": 5.084244258394162e-05, + "loss": 0.4121, + "step": 1238500 + }, + { + "epoch": 0.74, + "learning_rate": 5.0840346818312175e-05, + "loss": 0.4152, + "step": 1239000 + }, + { + "epoch": 0.74, + "learning_rate": 5.0838246852751615e-05, + "loss": 0.422, + "step": 1239500 + }, + { + "epoch": 0.74, + "learning_rate": 5.083614688719105e-05, + "loss": 0.4217, + "step": 1240000 + }, + { + "epoch": 0.74, + "learning_rate": 5.083404692163048e-05, + "loss": 0.425, + "step": 1240500 + }, + { + "epoch": 0.74, + "learning_rate": 5.083194695606992e-05, + "loss": 0.4169, + "step": 1241000 + }, + { + "epoch": 0.74, + "learning_rate": 5.0829846990509356e-05, + "loss": 0.4073, + "step": 1241500 + }, + { + "epoch": 0.74, + "learning_rate": 5.082774702494879e-05, + "loss": 0.416, + "step": 1242000 + }, + { + "epoch": 0.74, + "learning_rate": 5.082564705938823e-05, + "loss": 0.4188, + "step": 1242500 + }, + { + "epoch": 0.75, + "learning_rate": 5.082355129375878e-05, + "loss": 0.4249, + "step": 1243000 + }, + { + "epoch": 0.75, + "learning_rate": 5.082145132819822e-05, + "loss": 0.4148, + "step": 1243500 + }, + { + "epoch": 0.75, + "learning_rate": 5.081935136263765e-05, + "loss": 0.4173, + "step": 1244000 + }, + { + "epoch": 0.75, + "learning_rate": 5.0817255597008204e-05, + "loss": 0.4187, + "step": 1244500 + }, + { + "epoch": 0.75, + "learning_rate": 5.0815155631447644e-05, + "loss": 0.4164, + "step": 1245000 + }, + { + "epoch": 0.75, + "learning_rate": 5.081305566588708e-05, + "loss": 0.4082, + "step": 1245500 + }, + { + "epoch": 0.75, + "learning_rate": 5.081095570032651e-05, + "loss": 0.4085, + "step": 1246000 + }, + { + "epoch": 0.75, + "learning_rate": 5.080885573476595e-05, + "loss": 0.4102, + "step": 1246500 + }, + { + "epoch": 0.75, + "learning_rate": 5.0806755769205385e-05, + "loss": 0.418, + "step": 1247000 + }, + { + "epoch": 0.75, + "learning_rate": 5.080465580364482e-05, + "loss": 0.4142, + "step": 1247500 + }, + { + "epoch": 0.75, + "learning_rate": 5.080255583808426e-05, + "loss": 0.4135, + "step": 1248000 + }, + { + "epoch": 0.75, + "learning_rate": 5.080046427238593e-05, + "loss": 0.4023, + "step": 1248500 + }, + { + "epoch": 0.75, + "learning_rate": 5.0798364306825365e-05, + "loss": 0.4111, + "step": 1249000 + }, + { + "epoch": 0.75, + "learning_rate": 5.07962643412648e-05, + "loss": 0.4068, + "step": 1249500 + }, + { + "epoch": 0.75, + "learning_rate": 5.079416437570424e-05, + "loss": 0.4075, + "step": 1250000 + }, + { + "epoch": 0.75, + "learning_rate": 5.079206441014367e-05, + "loss": 0.4117, + "step": 1250500 + }, + { + "epoch": 0.75, + "learning_rate": 5.0789964444583106e-05, + "loss": 0.4054, + "step": 1251000 + }, + { + "epoch": 0.75, + "learning_rate": 5.0787864479022546e-05, + "loss": 0.4194, + "step": 1251500 + }, + { + "epoch": 0.75, + "learning_rate": 5.078576451346198e-05, + "loss": 0.424, + "step": 1252000 + }, + { + "epoch": 0.75, + "learning_rate": 5.078366874783253e-05, + "loss": 0.4109, + "step": 1252500 + }, + { + "epoch": 0.75, + "learning_rate": 5.078157298220309e-05, + "loss": 0.411, + "step": 1253000 + }, + { + "epoch": 0.75, + "learning_rate": 5.077947301664253e-05, + "loss": 0.4137, + "step": 1253500 + }, + { + "epoch": 0.75, + "learning_rate": 5.077737305108196e-05, + "loss": 0.4032, + "step": 1254000 + }, + { + "epoch": 0.75, + "learning_rate": 5.0775273085521394e-05, + "loss": 0.4101, + "step": 1254500 + }, + { + "epoch": 0.75, + "learning_rate": 5.0773173119960834e-05, + "loss": 0.4101, + "step": 1255000 + }, + { + "epoch": 0.75, + "learning_rate": 5.077108155426251e-05, + "loss": 0.4122, + "step": 1255500 + }, + { + "epoch": 0.75, + "learning_rate": 5.076898158870195e-05, + "loss": 0.4179, + "step": 1256000 + }, + { + "epoch": 0.75, + "learning_rate": 5.076688162314138e-05, + "loss": 0.41, + "step": 1256500 + }, + { + "epoch": 0.75, + "learning_rate": 5.0764781657580815e-05, + "loss": 0.4026, + "step": 1257000 + }, + { + "epoch": 0.75, + "learning_rate": 5.076268169202025e-05, + "loss": 0.4138, + "step": 1257500 + }, + { + "epoch": 0.75, + "learning_rate": 5.076058172645968e-05, + "loss": 0.4016, + "step": 1258000 + }, + { + "epoch": 0.75, + "learning_rate": 5.0758481760899116e-05, + "loss": 0.4108, + "step": 1258500 + }, + { + "epoch": 0.75, + "learning_rate": 5.0756381795338556e-05, + "loss": 0.403, + "step": 1259000 + }, + { + "epoch": 0.76, + "learning_rate": 5.075428182977799e-05, + "loss": 0.3953, + "step": 1259500 + }, + { + "epoch": 0.76, + "learning_rate": 5.075218606414854e-05, + "loss": 0.4147, + "step": 1260000 + }, + { + "epoch": 0.76, + "learning_rate": 5.075008609858798e-05, + "loss": 0.4173, + "step": 1260500 + }, + { + "epoch": 0.76, + "learning_rate": 5.0747986133027416e-05, + "loss": 0.4167, + "step": 1261000 + }, + { + "epoch": 0.76, + "learning_rate": 5.074588616746685e-05, + "loss": 0.3962, + "step": 1261500 + }, + { + "epoch": 0.76, + "learning_rate": 5.074379040183741e-05, + "loss": 0.4032, + "step": 1262000 + }, + { + "epoch": 0.76, + "learning_rate": 5.0741690436276844e-05, + "loss": 0.4167, + "step": 1262500 + }, + { + "epoch": 0.76, + "learning_rate": 5.073959047071628e-05, + "loss": 0.4224, + "step": 1263000 + }, + { + "epoch": 0.76, + "learning_rate": 5.073749050515571e-05, + "loss": 0.4127, + "step": 1263500 + }, + { + "epoch": 0.76, + "learning_rate": 5.073539053959515e-05, + "loss": 0.4167, + "step": 1264000 + }, + { + "epoch": 0.76, + "learning_rate": 5.0733290574034584e-05, + "loss": 0.4101, + "step": 1264500 + }, + { + "epoch": 0.76, + "learning_rate": 5.073119060847402e-05, + "loss": 0.4053, + "step": 1265000 + }, + { + "epoch": 0.76, + "learning_rate": 5.072909064291346e-05, + "loss": 0.4107, + "step": 1265500 + }, + { + "epoch": 0.76, + "learning_rate": 5.072699487728401e-05, + "loss": 0.4021, + "step": 1266000 + }, + { + "epoch": 0.76, + "learning_rate": 5.072489911165457e-05, + "loss": 0.4078, + "step": 1266500 + }, + { + "epoch": 0.76, + "learning_rate": 5.0722799146094e-05, + "loss": 0.412, + "step": 1267000 + }, + { + "epoch": 0.76, + "learning_rate": 5.072069918053344e-05, + "loss": 0.4141, + "step": 1267500 + }, + { + "epoch": 0.76, + "learning_rate": 5.071859921497287e-05, + "loss": 0.4115, + "step": 1268000 + }, + { + "epoch": 0.76, + "learning_rate": 5.0716499249412306e-05, + "loss": 0.4152, + "step": 1268500 + }, + { + "epoch": 0.76, + "learning_rate": 5.0714399283851746e-05, + "loss": 0.4168, + "step": 1269000 + }, + { + "epoch": 0.76, + "learning_rate": 5.071229931829118e-05, + "loss": 0.4047, + "step": 1269500 + }, + { + "epoch": 0.76, + "learning_rate": 5.071019935273061e-05, + "loss": 0.4223, + "step": 1270000 + }, + { + "epoch": 0.76, + "learning_rate": 5.0708103587101167e-05, + "loss": 0.3979, + "step": 1270500 + }, + { + "epoch": 0.76, + "learning_rate": 5.070600362154061e-05, + "loss": 0.4087, + "step": 1271000 + }, + { + "epoch": 0.76, + "learning_rate": 5.070390785591117e-05, + "loss": 0.3993, + "step": 1271500 + }, + { + "epoch": 0.76, + "learning_rate": 5.070181209028172e-05, + "loss": 0.4067, + "step": 1272000 + }, + { + "epoch": 0.76, + "learning_rate": 5.0699712124721154e-05, + "loss": 0.4099, + "step": 1272500 + }, + { + "epoch": 0.76, + "learning_rate": 5.069761215916059e-05, + "loss": 0.4029, + "step": 1273000 + }, + { + "epoch": 0.76, + "learning_rate": 5.069551219360003e-05, + "loss": 0.4029, + "step": 1273500 + }, + { + "epoch": 0.76, + "learning_rate": 5.069341222803946e-05, + "loss": 0.4077, + "step": 1274000 + }, + { + "epoch": 0.76, + "learning_rate": 5.0691312262478895e-05, + "loss": 0.4085, + "step": 1274500 + }, + { + "epoch": 0.76, + "learning_rate": 5.068921229691833e-05, + "loss": 0.4064, + "step": 1275000 + }, + { + "epoch": 0.76, + "learning_rate": 5.068711233135776e-05, + "loss": 0.4131, + "step": 1275500 + }, + { + "epoch": 0.77, + "learning_rate": 5.06850123657972e-05, + "loss": 0.4063, + "step": 1276000 + }, + { + "epoch": 0.77, + "learning_rate": 5.0682916600167756e-05, + "loss": 0.403, + "step": 1276500 + }, + { + "epoch": 0.77, + "learning_rate": 5.068081663460719e-05, + "loss": 0.4129, + "step": 1277000 + }, + { + "epoch": 0.77, + "learning_rate": 5.067871666904662e-05, + "loss": 0.4199, + "step": 1277500 + }, + { + "epoch": 0.77, + "learning_rate": 5.067661670348606e-05, + "loss": 0.4191, + "step": 1278000 + }, + { + "epoch": 0.77, + "learning_rate": 5.0674516737925496e-05, + "loss": 0.4061, + "step": 1278500 + }, + { + "epoch": 0.77, + "learning_rate": 5.067241677236493e-05, + "loss": 0.4061, + "step": 1279000 + }, + { + "epoch": 0.77, + "learning_rate": 5.067031680680437e-05, + "loss": 0.4067, + "step": 1279500 + }, + { + "epoch": 0.77, + "learning_rate": 5.066822104117492e-05, + "loss": 0.4134, + "step": 1280000 + }, + { + "epoch": 0.77, + "learning_rate": 5.066612107561436e-05, + "loss": 0.4171, + "step": 1280500 + }, + { + "epoch": 0.77, + "learning_rate": 5.06640211100538e-05, + "loss": 0.4077, + "step": 1281000 + }, + { + "epoch": 0.77, + "learning_rate": 5.066192114449323e-05, + "loss": 0.414, + "step": 1281500 + }, + { + "epoch": 0.77, + "learning_rate": 5.0659821178932664e-05, + "loss": 0.4095, + "step": 1282000 + }, + { + "epoch": 0.77, + "learning_rate": 5.0657721213372104e-05, + "loss": 0.4032, + "step": 1282500 + }, + { + "epoch": 0.77, + "learning_rate": 5.065562124781154e-05, + "loss": 0.4105, + "step": 1283000 + }, + { + "epoch": 0.77, + "learning_rate": 5.065352128225097e-05, + "loss": 0.4066, + "step": 1283500 + }, + { + "epoch": 0.77, + "learning_rate": 5.0651425516621525e-05, + "loss": 0.4177, + "step": 1284000 + }, + { + "epoch": 0.77, + "learning_rate": 5.064932975099208e-05, + "loss": 0.4106, + "step": 1284500 + }, + { + "epoch": 0.77, + "learning_rate": 5.064722978543152e-05, + "loss": 0.413, + "step": 1285000 + }, + { + "epoch": 0.77, + "learning_rate": 5.064512981987095e-05, + "loss": 0.406, + "step": 1285500 + }, + { + "epoch": 0.77, + "learning_rate": 5.0643029854310385e-05, + "loss": 0.4005, + "step": 1286000 + }, + { + "epoch": 0.77, + "learning_rate": 5.0640929888749826e-05, + "loss": 0.4172, + "step": 1286500 + }, + { + "epoch": 0.77, + "learning_rate": 5.063883412312038e-05, + "loss": 0.4044, + "step": 1287000 + }, + { + "epoch": 0.77, + "learning_rate": 5.063673415755981e-05, + "loss": 0.4057, + "step": 1287500 + }, + { + "epoch": 0.77, + "learning_rate": 5.063463419199925e-05, + "loss": 0.4115, + "step": 1288000 + }, + { + "epoch": 0.77, + "learning_rate": 5.0632534226438686e-05, + "loss": 0.3972, + "step": 1288500 + }, + { + "epoch": 0.77, + "learning_rate": 5.063043426087812e-05, + "loss": 0.4065, + "step": 1289000 + }, + { + "epoch": 0.77, + "learning_rate": 5.062833429531756e-05, + "loss": 0.4076, + "step": 1289500 + }, + { + "epoch": 0.77, + "learning_rate": 5.0626234329756994e-05, + "loss": 0.407, + "step": 1290000 + }, + { + "epoch": 0.77, + "learning_rate": 5.062413436419643e-05, + "loss": 0.4129, + "step": 1290500 + }, + { + "epoch": 0.77, + "learning_rate": 5.06220427984981e-05, + "loss": 0.4074, + "step": 1291000 + }, + { + "epoch": 0.77, + "learning_rate": 5.0619942832937534e-05, + "loss": 0.4036, + "step": 1291500 + }, + { + "epoch": 0.77, + "learning_rate": 5.0617842867376974e-05, + "loss": 0.4116, + "step": 1292000 + }, + { + "epoch": 0.77, + "learning_rate": 5.061574290181641e-05, + "loss": 0.4154, + "step": 1292500 + }, + { + "epoch": 0.78, + "learning_rate": 5.061364713618697e-05, + "loss": 0.3998, + "step": 1293000 + }, + { + "epoch": 0.78, + "learning_rate": 5.06115471706264e-05, + "loss": 0.4044, + "step": 1293500 + }, + { + "epoch": 0.78, + "learning_rate": 5.0609447205065835e-05, + "loss": 0.3992, + "step": 1294000 + }, + { + "epoch": 0.78, + "learning_rate": 5.060734723950527e-05, + "loss": 0.401, + "step": 1294500 + }, + { + "epoch": 0.78, + "learning_rate": 5.060524727394471e-05, + "loss": 0.4153, + "step": 1295000 + }, + { + "epoch": 0.78, + "learning_rate": 5.060314730838414e-05, + "loss": 0.4019, + "step": 1295500 + }, + { + "epoch": 0.78, + "learning_rate": 5.0601047342823576e-05, + "loss": 0.4131, + "step": 1296000 + }, + { + "epoch": 0.78, + "learning_rate": 5.0598947377263016e-05, + "loss": 0.4051, + "step": 1296500 + }, + { + "epoch": 0.78, + "learning_rate": 5.059684741170245e-05, + "loss": 0.4008, + "step": 1297000 + }, + { + "epoch": 0.78, + "learning_rate": 5.059474744614188e-05, + "loss": 0.4086, + "step": 1297500 + }, + { + "epoch": 0.78, + "learning_rate": 5.059264748058132e-05, + "loss": 0.4184, + "step": 1298000 + }, + { + "epoch": 0.78, + "learning_rate": 5.0590547515020757e-05, + "loss": 0.4122, + "step": 1298500 + }, + { + "epoch": 0.78, + "learning_rate": 5.058845174939131e-05, + "loss": 0.4187, + "step": 1299000 + }, + { + "epoch": 0.78, + "learning_rate": 5.0586355983761864e-05, + "loss": 0.4066, + "step": 1299500 + }, + { + "epoch": 0.78, + "learning_rate": 5.05842560182013e-05, + "loss": 0.4035, + "step": 1300000 + }, + { + "epoch": 0.78, + "eval_loss": 0.3874254822731018, + "eval_runtime": 1121.4615, + "eval_samples_per_second": 469.673, + "eval_steps_per_second": 78.279, + "step": 1300000 + }, + { + "epoch": 0.78, + "learning_rate": 5.058215605264074e-05, + "loss": 0.407, + "step": 1300500 + }, + { + "epoch": 0.78, + "learning_rate": 5.058005608708017e-05, + "loss": 0.4223, + "step": 1301000 + }, + { + "epoch": 0.78, + "learning_rate": 5.0577960321450724e-05, + "loss": 0.4132, + "step": 1301500 + }, + { + "epoch": 0.78, + "learning_rate": 5.0575860355890165e-05, + "loss": 0.4059, + "step": 1302000 + }, + { + "epoch": 0.78, + "learning_rate": 5.05737603903296e-05, + "loss": 0.4021, + "step": 1302500 + }, + { + "epoch": 0.78, + "learning_rate": 5.057166042476903e-05, + "loss": 0.4105, + "step": 1303000 + }, + { + "epoch": 0.78, + "learning_rate": 5.0569564659139585e-05, + "loss": 0.4061, + "step": 1303500 + }, + { + "epoch": 0.78, + "learning_rate": 5.0567464693579025e-05, + "loss": 0.4021, + "step": 1304000 + }, + { + "epoch": 0.78, + "learning_rate": 5.056536472801846e-05, + "loss": 0.417, + "step": 1304500 + }, + { + "epoch": 0.78, + "learning_rate": 5.056326476245789e-05, + "loss": 0.402, + "step": 1305000 + }, + { + "epoch": 0.78, + "learning_rate": 5.056116479689733e-05, + "loss": 0.4079, + "step": 1305500 + }, + { + "epoch": 0.78, + "learning_rate": 5.0559064831336766e-05, + "loss": 0.4027, + "step": 1306000 + }, + { + "epoch": 0.78, + "learning_rate": 5.05569648657762e-05, + "loss": 0.4067, + "step": 1306500 + }, + { + "epoch": 0.78, + "learning_rate": 5.055486490021564e-05, + "loss": 0.4151, + "step": 1307000 + }, + { + "epoch": 0.78, + "learning_rate": 5.055276913458619e-05, + "loss": 0.409, + "step": 1307500 + }, + { + "epoch": 0.78, + "learning_rate": 5.055066916902563e-05, + "loss": 0.4003, + "step": 1308000 + }, + { + "epoch": 0.78, + "learning_rate": 5.054856920346506e-05, + "loss": 0.4037, + "step": 1308500 + }, + { + "epoch": 0.78, + "learning_rate": 5.05464692379045e-05, + "loss": 0.4047, + "step": 1309000 + }, + { + "epoch": 0.79, + "learning_rate": 5.0544373472275054e-05, + "loss": 0.4042, + "step": 1309500 + }, + { + "epoch": 0.79, + "learning_rate": 5.054227350671449e-05, + "loss": 0.4024, + "step": 1310000 + }, + { + "epoch": 0.79, + "learning_rate": 5.054017774108504e-05, + "loss": 0.4058, + "step": 1310500 + }, + { + "epoch": 0.79, + "learning_rate": 5.053807777552448e-05, + "loss": 0.4074, + "step": 1311000 + }, + { + "epoch": 0.79, + "learning_rate": 5.0535977809963915e-05, + "loss": 0.4049, + "step": 1311500 + }, + { + "epoch": 0.79, + "learning_rate": 5.053387784440335e-05, + "loss": 0.4063, + "step": 1312000 + }, + { + "epoch": 0.79, + "learning_rate": 5.05317820787739e-05, + "loss": 0.399, + "step": 1312500 + }, + { + "epoch": 0.79, + "learning_rate": 5.052968211321334e-05, + "loss": 0.4023, + "step": 1313000 + }, + { + "epoch": 0.79, + "learning_rate": 5.05275863475839e-05, + "loss": 0.4061, + "step": 1313500 + }, + { + "epoch": 0.79, + "learning_rate": 5.0525486382023336e-05, + "loss": 0.4068, + "step": 1314000 + }, + { + "epoch": 0.79, + "learning_rate": 5.052338641646277e-05, + "loss": 0.4107, + "step": 1314500 + }, + { + "epoch": 0.79, + "learning_rate": 5.05212864509022e-05, + "loss": 0.4019, + "step": 1315000 + }, + { + "epoch": 0.79, + "learning_rate": 5.0519186485341636e-05, + "loss": 0.4154, + "step": 1315500 + }, + { + "epoch": 0.79, + "learning_rate": 5.0517086519781076e-05, + "loss": 0.4081, + "step": 1316000 + }, + { + "epoch": 0.79, + "learning_rate": 5.051498655422051e-05, + "loss": 0.4075, + "step": 1316500 + }, + { + "epoch": 0.79, + "learning_rate": 5.051288658865994e-05, + "loss": 0.4083, + "step": 1317000 + }, + { + "epoch": 0.79, + "learning_rate": 5.05107908230305e-05, + "loss": 0.4058, + "step": 1317500 + }, + { + "epoch": 0.79, + "learning_rate": 5.050869085746994e-05, + "loss": 0.4032, + "step": 1318000 + }, + { + "epoch": 0.79, + "learning_rate": 5.050659089190937e-05, + "loss": 0.3966, + "step": 1318500 + }, + { + "epoch": 0.79, + "learning_rate": 5.0504490926348804e-05, + "loss": 0.4087, + "step": 1319000 + }, + { + "epoch": 0.79, + "learning_rate": 5.0502390960788244e-05, + "loss": 0.4063, + "step": 1319500 + }, + { + "epoch": 0.79, + "learning_rate": 5.050029099522768e-05, + "loss": 0.3991, + "step": 1320000 + }, + { + "epoch": 0.79, + "learning_rate": 5.049819102966711e-05, + "loss": 0.4113, + "step": 1320500 + }, + { + "epoch": 0.79, + "learning_rate": 5.049609106410655e-05, + "loss": 0.3931, + "step": 1321000 + }, + { + "epoch": 0.79, + "learning_rate": 5.0493991098545985e-05, + "loss": 0.4023, + "step": 1321500 + }, + { + "epoch": 0.79, + "learning_rate": 5.049189533291654e-05, + "loss": 0.4125, + "step": 1322000 + }, + { + "epoch": 0.79, + "learning_rate": 5.048979956728709e-05, + "loss": 0.4033, + "step": 1322500 + }, + { + "epoch": 0.79, + "learning_rate": 5.048769960172653e-05, + "loss": 0.4134, + "step": 1323000 + }, + { + "epoch": 0.79, + "learning_rate": 5.048560383609709e-05, + "loss": 0.4148, + "step": 1323500 + }, + { + "epoch": 0.79, + "learning_rate": 5.0483503870536526e-05, + "loss": 0.4131, + "step": 1324000 + }, + { + "epoch": 0.79, + "learning_rate": 5.048140390497595e-05, + "loss": 0.398, + "step": 1324500 + }, + { + "epoch": 0.79, + "learning_rate": 5.047930393941539e-05, + "loss": 0.4058, + "step": 1325000 + }, + { + "epoch": 0.79, + "learning_rate": 5.0477203973854827e-05, + "loss": 0.4169, + "step": 1325500 + }, + { + "epoch": 0.79, + "learning_rate": 5.047510820822539e-05, + "loss": 0.4119, + "step": 1326000 + }, + { + "epoch": 0.8, + "learning_rate": 5.0473008242664814e-05, + "loss": 0.3973, + "step": 1326500 + }, + { + "epoch": 0.8, + "learning_rate": 5.0470908277104254e-05, + "loss": 0.4126, + "step": 1327000 + }, + { + "epoch": 0.8, + "learning_rate": 5.046880831154369e-05, + "loss": 0.4157, + "step": 1327500 + }, + { + "epoch": 0.8, + "learning_rate": 5.046670834598312e-05, + "loss": 0.4021, + "step": 1328000 + }, + { + "epoch": 0.8, + "learning_rate": 5.046460838042256e-05, + "loss": 0.4005, + "step": 1328500 + }, + { + "epoch": 0.8, + "learning_rate": 5.0462512614793115e-05, + "loss": 0.4148, + "step": 1329000 + }, + { + "epoch": 0.8, + "learning_rate": 5.046041264923255e-05, + "loss": 0.4081, + "step": 1329500 + }, + { + "epoch": 0.8, + "learning_rate": 5.045831268367199e-05, + "loss": 0.4098, + "step": 1330000 + }, + { + "epoch": 0.8, + "learning_rate": 5.045621271811142e-05, + "loss": 0.4137, + "step": 1330500 + }, + { + "epoch": 0.8, + "learning_rate": 5.0454112752550855e-05, + "loss": 0.4131, + "step": 1331000 + }, + { + "epoch": 0.8, + "learning_rate": 5.0452012786990295e-05, + "loss": 0.4041, + "step": 1331500 + }, + { + "epoch": 0.8, + "learning_rate": 5.044991282142973e-05, + "loss": 0.3975, + "step": 1332000 + }, + { + "epoch": 0.8, + "learning_rate": 5.044781285586916e-05, + "loss": 0.4074, + "step": 1332500 + }, + { + "epoch": 0.8, + "learning_rate": 5.04457128903086e-05, + "loss": 0.4013, + "step": 1333000 + }, + { + "epoch": 0.8, + "learning_rate": 5.0443612924748036e-05, + "loss": 0.4122, + "step": 1333500 + }, + { + "epoch": 0.8, + "learning_rate": 5.044151715911859e-05, + "loss": 0.4143, + "step": 1334000 + }, + { + "epoch": 0.8, + "learning_rate": 5.043941719355802e-05, + "loss": 0.4051, + "step": 1334500 + }, + { + "epoch": 0.8, + "learning_rate": 5.043731722799746e-05, + "loss": 0.413, + "step": 1335000 + }, + { + "epoch": 0.8, + "learning_rate": 5.04352172624369e-05, + "loss": 0.4021, + "step": 1335500 + }, + { + "epoch": 0.8, + "learning_rate": 5.043311729687633e-05, + "loss": 0.4134, + "step": 1336000 + }, + { + "epoch": 0.8, + "learning_rate": 5.043101733131577e-05, + "loss": 0.4122, + "step": 1336500 + }, + { + "epoch": 0.8, + "learning_rate": 5.0428921565686324e-05, + "loss": 0.4096, + "step": 1337000 + }, + { + "epoch": 0.8, + "learning_rate": 5.042682160012576e-05, + "loss": 0.4011, + "step": 1337500 + }, + { + "epoch": 0.8, + "learning_rate": 5.042472163456519e-05, + "loss": 0.4122, + "step": 1338000 + }, + { + "epoch": 0.8, + "learning_rate": 5.042262166900463e-05, + "loss": 0.404, + "step": 1338500 + }, + { + "epoch": 0.8, + "learning_rate": 5.0420521703444065e-05, + "loss": 0.3982, + "step": 1339000 + }, + { + "epoch": 0.8, + "learning_rate": 5.04184217378835e-05, + "loss": 0.3985, + "step": 1339500 + }, + { + "epoch": 0.8, + "learning_rate": 5.041632177232293e-05, + "loss": 0.4128, + "step": 1340000 + }, + { + "epoch": 0.8, + "learning_rate": 5.0414221806762365e-05, + "loss": 0.4028, + "step": 1340500 + }, + { + "epoch": 0.8, + "learning_rate": 5.0412130241064045e-05, + "loss": 0.4053, + "step": 1341000 + }, + { + "epoch": 0.8, + "learning_rate": 5.041003027550348e-05, + "loss": 0.3961, + "step": 1341500 + }, + { + "epoch": 0.8, + "learning_rate": 5.040793030994292e-05, + "loss": 0.4104, + "step": 1342000 + }, + { + "epoch": 0.8, + "learning_rate": 5.040583034438235e-05, + "loss": 0.4004, + "step": 1342500 + }, + { + "epoch": 0.81, + "learning_rate": 5.0403734578752906e-05, + "loss": 0.4171, + "step": 1343000 + }, + { + "epoch": 0.81, + "learning_rate": 5.040163461319234e-05, + "loss": 0.4146, + "step": 1343500 + }, + { + "epoch": 0.81, + "learning_rate": 5.039953464763178e-05, + "loss": 0.3999, + "step": 1344000 + }, + { + "epoch": 0.81, + "learning_rate": 5.039743468207121e-05, + "loss": 0.4086, + "step": 1344500 + }, + { + "epoch": 0.81, + "learning_rate": 5.039533471651065e-05, + "loss": 0.3986, + "step": 1345000 + }, + { + "epoch": 0.81, + "learning_rate": 5.039323475095009e-05, + "loss": 0.4019, + "step": 1345500 + }, + { + "epoch": 0.81, + "learning_rate": 5.039113478538952e-05, + "loss": 0.4184, + "step": 1346000 + }, + { + "epoch": 0.81, + "learning_rate": 5.0389034819828954e-05, + "loss": 0.4062, + "step": 1346500 + }, + { + "epoch": 0.81, + "learning_rate": 5.038693485426839e-05, + "loss": 0.4172, + "step": 1347000 + }, + { + "epoch": 0.81, + "learning_rate": 5.038483488870782e-05, + "loss": 0.4052, + "step": 1347500 + }, + { + "epoch": 0.81, + "learning_rate": 5.038273912307838e-05, + "loss": 0.4044, + "step": 1348000 + }, + { + "epoch": 0.81, + "learning_rate": 5.038063915751782e-05, + "loss": 0.4091, + "step": 1348500 + }, + { + "epoch": 0.81, + "learning_rate": 5.037853919195725e-05, + "loss": 0.4006, + "step": 1349000 + }, + { + "epoch": 0.81, + "learning_rate": 5.037643922639668e-05, + "loss": 0.4108, + "step": 1349500 + }, + { + "epoch": 0.81, + "learning_rate": 5.037433926083612e-05, + "loss": 0.4125, + "step": 1350000 + }, + { + "epoch": 0.81, + "learning_rate": 5.0372239295275555e-05, + "loss": 0.4072, + "step": 1350500 + }, + { + "epoch": 0.81, + "learning_rate": 5.037013932971499e-05, + "loss": 0.4081, + "step": 1351000 + }, + { + "epoch": 0.81, + "learning_rate": 5.036804356408554e-05, + "loss": 0.4048, + "step": 1351500 + }, + { + "epoch": 0.81, + "learning_rate": 5.036594359852498e-05, + "loss": 0.4077, + "step": 1352000 + }, + { + "epoch": 0.81, + "learning_rate": 5.0363843632964416e-05, + "loss": 0.4053, + "step": 1352500 + }, + { + "epoch": 0.81, + "learning_rate": 5.0361747867334976e-05, + "loss": 0.4095, + "step": 1353000 + }, + { + "epoch": 0.81, + "learning_rate": 5.035964790177441e-05, + "loss": 0.4097, + "step": 1353500 + }, + { + "epoch": 0.81, + "learning_rate": 5.035754793621384e-05, + "loss": 0.3952, + "step": 1354000 + }, + { + "epoch": 0.81, + "learning_rate": 5.035544797065328e-05, + "loss": 0.4074, + "step": 1354500 + }, + { + "epoch": 0.81, + "learning_rate": 5.035334800509272e-05, + "loss": 0.3991, + "step": 1355000 + }, + { + "epoch": 0.81, + "learning_rate": 5.035125223946328e-05, + "loss": 0.4139, + "step": 1355500 + }, + { + "epoch": 0.81, + "learning_rate": 5.0349152273902704e-05, + "loss": 0.4092, + "step": 1356000 + }, + { + "epoch": 0.81, + "learning_rate": 5.034705230834214e-05, + "loss": 0.4067, + "step": 1356500 + }, + { + "epoch": 0.81, + "learning_rate": 5.034495234278158e-05, + "loss": 0.4006, + "step": 1357000 + }, + { + "epoch": 0.81, + "learning_rate": 5.034285237722101e-05, + "loss": 0.397, + "step": 1357500 + }, + { + "epoch": 0.81, + "learning_rate": 5.0340752411660445e-05, + "loss": 0.4086, + "step": 1358000 + }, + { + "epoch": 0.81, + "learning_rate": 5.0338652446099885e-05, + "loss": 0.4042, + "step": 1358500 + }, + { + "epoch": 0.81, + "learning_rate": 5.033655248053932e-05, + "loss": 0.3964, + "step": 1359000 + }, + { + "epoch": 0.82, + "learning_rate": 5.033445251497875e-05, + "loss": 0.4081, + "step": 1359500 + }, + { + "epoch": 0.82, + "learning_rate": 5.033235674934931e-05, + "loss": 0.4124, + "step": 1360000 + }, + { + "epoch": 0.82, + "learning_rate": 5.0330256783788746e-05, + "loss": 0.411, + "step": 1360500 + }, + { + "epoch": 0.82, + "learning_rate": 5.032815681822818e-05, + "loss": 0.3958, + "step": 1361000 + }, + { + "epoch": 0.82, + "learning_rate": 5.032605685266762e-05, + "loss": 0.4021, + "step": 1361500 + }, + { + "epoch": 0.82, + "learning_rate": 5.032395688710705e-05, + "loss": 0.4041, + "step": 1362000 + }, + { + "epoch": 0.82, + "learning_rate": 5.0321861121477606e-05, + "loss": 0.4099, + "step": 1362500 + }, + { + "epoch": 0.82, + "learning_rate": 5.031976115591704e-05, + "loss": 0.4012, + "step": 1363000 + }, + { + "epoch": 0.82, + "learning_rate": 5.031766119035648e-05, + "loss": 0.4072, + "step": 1363500 + }, + { + "epoch": 0.82, + "learning_rate": 5.0315561224795913e-05, + "loss": 0.4098, + "step": 1364000 + }, + { + "epoch": 0.82, + "learning_rate": 5.031346125923535e-05, + "loss": 0.4088, + "step": 1364500 + }, + { + "epoch": 0.82, + "learning_rate": 5.03113654936059e-05, + "loss": 0.395, + "step": 1365000 + }, + { + "epoch": 0.82, + "learning_rate": 5.030926552804534e-05, + "loss": 0.4086, + "step": 1365500 + }, + { + "epoch": 0.82, + "learning_rate": 5.0307165562484774e-05, + "loss": 0.4002, + "step": 1366000 + }, + { + "epoch": 0.82, + "learning_rate": 5.030506559692421e-05, + "loss": 0.4028, + "step": 1366500 + }, + { + "epoch": 0.82, + "learning_rate": 5.030296563136365e-05, + "loss": 0.4034, + "step": 1367000 + }, + { + "epoch": 0.82, + "learning_rate": 5.030086566580308e-05, + "loss": 0.4054, + "step": 1367500 + }, + { + "epoch": 0.82, + "learning_rate": 5.0298765700242515e-05, + "loss": 0.4111, + "step": 1368000 + }, + { + "epoch": 0.82, + "learning_rate": 5.0296669934613075e-05, + "loss": 0.4076, + "step": 1368500 + }, + { + "epoch": 0.82, + "learning_rate": 5.029456996905251e-05, + "loss": 0.4053, + "step": 1369000 + }, + { + "epoch": 0.82, + "learning_rate": 5.029247000349194e-05, + "loss": 0.4098, + "step": 1369500 + }, + { + "epoch": 0.82, + "learning_rate": 5.029037003793138e-05, + "loss": 0.4013, + "step": 1370000 + }, + { + "epoch": 0.82, + "learning_rate": 5.0288270072370816e-05, + "loss": 0.4029, + "step": 1370500 + }, + { + "epoch": 0.82, + "learning_rate": 5.028617010681024e-05, + "loss": 0.41, + "step": 1371000 + }, + { + "epoch": 0.82, + "learning_rate": 5.028407014124968e-05, + "loss": 0.3986, + "step": 1371500 + }, + { + "epoch": 0.82, + "learning_rate": 5.028197437562024e-05, + "loss": 0.4092, + "step": 1372000 + }, + { + "epoch": 0.82, + "learning_rate": 5.0279874410059676e-05, + "loss": 0.4187, + "step": 1372500 + }, + { + "epoch": 0.82, + "learning_rate": 5.027777444449911e-05, + "loss": 0.4107, + "step": 1373000 + }, + { + "epoch": 0.82, + "learning_rate": 5.0275678678869664e-05, + "loss": 0.416, + "step": 1373500 + }, + { + "epoch": 0.82, + "learning_rate": 5.0273578713309104e-05, + "loss": 0.3992, + "step": 1374000 + }, + { + "epoch": 0.82, + "learning_rate": 5.027147874774854e-05, + "loss": 0.4124, + "step": 1374500 + }, + { + "epoch": 0.82, + "learning_rate": 5.026937878218797e-05, + "loss": 0.4115, + "step": 1375000 + }, + { + "epoch": 0.82, + "learning_rate": 5.026727881662741e-05, + "loss": 0.3955, + "step": 1375500 + }, + { + "epoch": 0.82, + "learning_rate": 5.026517885106684e-05, + "loss": 0.4033, + "step": 1376000 + }, + { + "epoch": 0.83, + "learning_rate": 5.026307888550628e-05, + "loss": 0.4079, + "step": 1376500 + }, + { + "epoch": 0.83, + "learning_rate": 5.026097891994571e-05, + "loss": 0.4036, + "step": 1377000 + }, + { + "epoch": 0.83, + "learning_rate": 5.0258878954385145e-05, + "loss": 0.3996, + "step": 1377500 + }, + { + "epoch": 0.83, + "learning_rate": 5.0256778988824585e-05, + "loss": 0.4056, + "step": 1378000 + }, + { + "epoch": 0.83, + "learning_rate": 5.025468322319514e-05, + "loss": 0.3982, + "step": 1378500 + }, + { + "epoch": 0.83, + "learning_rate": 5.025258325763457e-05, + "loss": 0.4081, + "step": 1379000 + }, + { + "epoch": 0.83, + "learning_rate": 5.0250483292074005e-05, + "loss": 0.3993, + "step": 1379500 + }, + { + "epoch": 0.83, + "learning_rate": 5.0248383326513446e-05, + "loss": 0.4044, + "step": 1380000 + }, + { + "epoch": 0.83, + "learning_rate": 5.024628336095288e-05, + "loss": 0.3992, + "step": 1380500 + }, + { + "epoch": 0.83, + "learning_rate": 5.024418339539231e-05, + "loss": 0.4105, + "step": 1381000 + }, + { + "epoch": 0.83, + "learning_rate": 5.0242087629762866e-05, + "loss": 0.4042, + "step": 1381500 + }, + { + "epoch": 0.83, + "learning_rate": 5.0239987664202306e-05, + "loss": 0.4127, + "step": 1382000 + }, + { + "epoch": 0.83, + "learning_rate": 5.023788769864174e-05, + "loss": 0.4054, + "step": 1382500 + }, + { + "epoch": 0.83, + "learning_rate": 5.023578773308117e-05, + "loss": 0.409, + "step": 1383000 + }, + { + "epoch": 0.83, + "learning_rate": 5.0233687767520614e-05, + "loss": 0.4057, + "step": 1383500 + }, + { + "epoch": 0.83, + "learning_rate": 5.023159200189117e-05, + "loss": 0.4165, + "step": 1384000 + }, + { + "epoch": 0.83, + "learning_rate": 5.02294920363306e-05, + "loss": 0.4007, + "step": 1384500 + }, + { + "epoch": 0.83, + "learning_rate": 5.022739207077004e-05, + "loss": 0.4075, + "step": 1385000 + }, + { + "epoch": 0.83, + "learning_rate": 5.0225292105209474e-05, + "loss": 0.3994, + "step": 1385500 + }, + { + "epoch": 0.83, + "learning_rate": 5.022319213964891e-05, + "loss": 0.4091, + "step": 1386000 + }, + { + "epoch": 0.83, + "learning_rate": 5.022109217408835e-05, + "loss": 0.4039, + "step": 1386500 + }, + { + "epoch": 0.83, + "learning_rate": 5.021899220852778e-05, + "loss": 0.4052, + "step": 1387000 + }, + { + "epoch": 0.83, + "learning_rate": 5.0216892242967215e-05, + "loss": 0.4039, + "step": 1387500 + }, + { + "epoch": 0.83, + "learning_rate": 5.021480067726889e-05, + "loss": 0.4095, + "step": 1388000 + }, + { + "epoch": 0.83, + "learning_rate": 5.021270071170832e-05, + "loss": 0.3968, + "step": 1388500 + }, + { + "epoch": 0.83, + "learning_rate": 5.021060074614776e-05, + "loss": 0.4012, + "step": 1389000 + }, + { + "epoch": 0.83, + "learning_rate": 5.0208500780587196e-05, + "loss": 0.3925, + "step": 1389500 + }, + { + "epoch": 0.83, + "learning_rate": 5.020640501495775e-05, + "loss": 0.4009, + "step": 1390000 + }, + { + "epoch": 0.83, + "learning_rate": 5.020430924932831e-05, + "loss": 0.3926, + "step": 1390500 + }, + { + "epoch": 0.83, + "learning_rate": 5.020220928376775e-05, + "loss": 0.4065, + "step": 1391000 + }, + { + "epoch": 0.83, + "learning_rate": 5.0200109318207183e-05, + "loss": 0.4038, + "step": 1391500 + }, + { + "epoch": 0.83, + "learning_rate": 5.019800935264662e-05, + "loss": 0.3961, + "step": 1392000 + }, + { + "epoch": 0.83, + "learning_rate": 5.019590938708605e-05, + "loss": 0.4038, + "step": 1392500 + }, + { + "epoch": 0.84, + "learning_rate": 5.0193809421525484e-05, + "loss": 0.4106, + "step": 1393000 + }, + { + "epoch": 0.84, + "learning_rate": 5.019170945596492e-05, + "loss": 0.416, + "step": 1393500 + }, + { + "epoch": 0.84, + "learning_rate": 5.018960949040436e-05, + "loss": 0.4141, + "step": 1394000 + }, + { + "epoch": 0.84, + "learning_rate": 5.018750952484379e-05, + "loss": 0.3983, + "step": 1394500 + }, + { + "epoch": 0.84, + "learning_rate": 5.0185409559283224e-05, + "loss": 0.4127, + "step": 1395000 + }, + { + "epoch": 0.84, + "learning_rate": 5.0183309593722665e-05, + "loss": 0.4036, + "step": 1395500 + }, + { + "epoch": 0.84, + "learning_rate": 5.018121382809322e-05, + "loss": 0.3991, + "step": 1396000 + }, + { + "epoch": 0.84, + "learning_rate": 5.017911386253265e-05, + "loss": 0.401, + "step": 1396500 + }, + { + "epoch": 0.84, + "learning_rate": 5.0177013896972085e-05, + "loss": 0.4076, + "step": 1397000 + }, + { + "epoch": 0.84, + "learning_rate": 5.0174913931411525e-05, + "loss": 0.4047, + "step": 1397500 + }, + { + "epoch": 0.84, + "learning_rate": 5.017281396585096e-05, + "loss": 0.3983, + "step": 1398000 + }, + { + "epoch": 0.84, + "learning_rate": 5.017071820022151e-05, + "loss": 0.3931, + "step": 1398500 + }, + { + "epoch": 0.84, + "learning_rate": 5.016861823466095e-05, + "loss": 0.3963, + "step": 1399000 + }, + { + "epoch": 0.84, + "learning_rate": 5.0166518269100386e-05, + "loss": 0.4083, + "step": 1399500 + }, + { + "epoch": 0.84, + "learning_rate": 5.016441830353982e-05, + "loss": 0.4021, + "step": 1400000 + }, + { + "epoch": 0.84, + "eval_loss": 0.3852459192276001, + "eval_runtime": 1130.5133, + "eval_samples_per_second": 465.912, + "eval_steps_per_second": 77.652, + "step": 1400000 + }, + { + "epoch": 0.84, + "learning_rate": 5.016231833797926e-05, + "loss": 0.4059, + "step": 1400500 + }, + { + "epoch": 0.84, + "learning_rate": 5.016021837241869e-05, + "loss": 0.41, + "step": 1401000 + }, + { + "epoch": 0.84, + "learning_rate": 5.015811840685813e-05, + "loss": 0.3999, + "step": 1401500 + }, + { + "epoch": 0.84, + "learning_rate": 5.015602264122868e-05, + "loss": 0.3988, + "step": 1402000 + }, + { + "epoch": 0.84, + "learning_rate": 5.015392267566812e-05, + "loss": 0.3889, + "step": 1402500 + }, + { + "epoch": 0.84, + "learning_rate": 5.0151822710107554e-05, + "loss": 0.4051, + "step": 1403000 + }, + { + "epoch": 0.84, + "learning_rate": 5.014972274454699e-05, + "loss": 0.3999, + "step": 1403500 + }, + { + "epoch": 0.84, + "learning_rate": 5.014762277898643e-05, + "loss": 0.4024, + "step": 1404000 + }, + { + "epoch": 0.84, + "learning_rate": 5.014552701335698e-05, + "loss": 0.3961, + "step": 1404500 + }, + { + "epoch": 0.84, + "learning_rate": 5.0143427047796415e-05, + "loss": 0.4119, + "step": 1405000 + }, + { + "epoch": 0.84, + "learning_rate": 5.0141327082235855e-05, + "loss": 0.4088, + "step": 1405500 + }, + { + "epoch": 0.84, + "learning_rate": 5.013922711667529e-05, + "loss": 0.4121, + "step": 1406000 + }, + { + "epoch": 0.84, + "learning_rate": 5.013712715111472e-05, + "loss": 0.4036, + "step": 1406500 + }, + { + "epoch": 0.84, + "learning_rate": 5.013502718555416e-05, + "loss": 0.4122, + "step": 1407000 + }, + { + "epoch": 0.84, + "learning_rate": 5.013292721999359e-05, + "loss": 0.412, + "step": 1407500 + }, + { + "epoch": 0.84, + "learning_rate": 5.013082725443302e-05, + "loss": 0.4032, + "step": 1408000 + }, + { + "epoch": 0.84, + "learning_rate": 5.01287356887347e-05, + "loss": 0.4128, + "step": 1408500 + }, + { + "epoch": 0.84, + "learning_rate": 5.0126635723174136e-05, + "loss": 0.403, + "step": 1409000 + }, + { + "epoch": 0.85, + "learning_rate": 5.0124535757613576e-05, + "loss": 0.4054, + "step": 1409500 + }, + { + "epoch": 0.85, + "learning_rate": 5.012243579205301e-05, + "loss": 0.4034, + "step": 1410000 + }, + { + "epoch": 0.85, + "learning_rate": 5.012033582649244e-05, + "loss": 0.4079, + "step": 1410500 + }, + { + "epoch": 0.85, + "learning_rate": 5.0118235860931884e-05, + "loss": 0.4091, + "step": 1411000 + }, + { + "epoch": 0.85, + "learning_rate": 5.011613589537132e-05, + "loss": 0.4042, + "step": 1411500 + }, + { + "epoch": 0.85, + "learning_rate": 5.011403592981075e-05, + "loss": 0.4029, + "step": 1412000 + }, + { + "epoch": 0.85, + "learning_rate": 5.011194016418131e-05, + "loss": 0.401, + "step": 1412500 + }, + { + "epoch": 0.85, + "learning_rate": 5.0109840198620744e-05, + "loss": 0.3962, + "step": 1413000 + }, + { + "epoch": 0.85, + "learning_rate": 5.010774023306018e-05, + "loss": 0.4038, + "step": 1413500 + }, + { + "epoch": 0.85, + "learning_rate": 5.010564026749962e-05, + "loss": 0.403, + "step": 1414000 + }, + { + "epoch": 0.85, + "learning_rate": 5.010354450187017e-05, + "loss": 0.4061, + "step": 1414500 + }, + { + "epoch": 0.85, + "learning_rate": 5.0101444536309605e-05, + "loss": 0.3967, + "step": 1415000 + }, + { + "epoch": 0.85, + "learning_rate": 5.009934457074904e-05, + "loss": 0.4102, + "step": 1415500 + }, + { + "epoch": 0.85, + "learning_rate": 5.009724460518848e-05, + "loss": 0.4101, + "step": 1416000 + }, + { + "epoch": 0.85, + "learning_rate": 5.009514463962791e-05, + "loss": 0.4026, + "step": 1416500 + }, + { + "epoch": 0.85, + "learning_rate": 5.009304467406734e-05, + "loss": 0.4097, + "step": 1417000 + }, + { + "epoch": 0.85, + "learning_rate": 5.009094470850678e-05, + "loss": 0.4048, + "step": 1417500 + }, + { + "epoch": 0.85, + "learning_rate": 5.008884894287734e-05, + "loss": 0.4115, + "step": 1418000 + }, + { + "epoch": 0.85, + "learning_rate": 5.008674897731677e-05, + "loss": 0.397, + "step": 1418500 + }, + { + "epoch": 0.85, + "learning_rate": 5.0084649011756206e-05, + "loss": 0.4046, + "step": 1419000 + }, + { + "epoch": 0.85, + "learning_rate": 5.008254904619564e-05, + "loss": 0.3951, + "step": 1419500 + }, + { + "epoch": 0.85, + "learning_rate": 5.008044908063507e-05, + "loss": 0.3988, + "step": 1420000 + }, + { + "epoch": 0.85, + "learning_rate": 5.0078349115074513e-05, + "loss": 0.4105, + "step": 1420500 + }, + { + "epoch": 0.85, + "learning_rate": 5.007624914951395e-05, + "loss": 0.414, + "step": 1421000 + }, + { + "epoch": 0.85, + "learning_rate": 5.007415338388451e-05, + "loss": 0.4008, + "step": 1421500 + }, + { + "epoch": 0.85, + "learning_rate": 5.0072053418323934e-05, + "loss": 0.4069, + "step": 1422000 + }, + { + "epoch": 0.85, + "learning_rate": 5.0069953452763374e-05, + "loss": 0.4062, + "step": 1422500 + }, + { + "epoch": 0.85, + "learning_rate": 5.006785348720281e-05, + "loss": 0.3949, + "step": 1423000 + }, + { + "epoch": 0.85, + "learning_rate": 5.006575352164224e-05, + "loss": 0.4017, + "step": 1423500 + }, + { + "epoch": 0.85, + "learning_rate": 5.00636577560128e-05, + "loss": 0.4014, + "step": 1424000 + }, + { + "epoch": 0.85, + "learning_rate": 5.0061557790452235e-05, + "loss": 0.402, + "step": 1424500 + }, + { + "epoch": 0.85, + "learning_rate": 5.005945782489167e-05, + "loss": 0.4058, + "step": 1425000 + }, + { + "epoch": 0.85, + "learning_rate": 5.00573578593311e-05, + "loss": 0.3918, + "step": 1425500 + }, + { + "epoch": 0.85, + "learning_rate": 5.005525789377054e-05, + "loss": 0.4159, + "step": 1426000 + }, + { + "epoch": 0.86, + "learning_rate": 5.0053162128141096e-05, + "loss": 0.3947, + "step": 1426500 + }, + { + "epoch": 0.86, + "learning_rate": 5.005106216258053e-05, + "loss": 0.4117, + "step": 1427000 + }, + { + "epoch": 0.86, + "learning_rate": 5.004896219701997e-05, + "loss": 0.3985, + "step": 1427500 + }, + { + "epoch": 0.86, + "learning_rate": 5.00468622314594e-05, + "loss": 0.41, + "step": 1428000 + }, + { + "epoch": 0.86, + "learning_rate": 5.0044762265898836e-05, + "loss": 0.3975, + "step": 1428500 + }, + { + "epoch": 0.86, + "learning_rate": 5.0042662300338277e-05, + "loss": 0.4008, + "step": 1429000 + }, + { + "epoch": 0.86, + "learning_rate": 5.004056233477771e-05, + "loss": 0.4091, + "step": 1429500 + }, + { + "epoch": 0.86, + "learning_rate": 5.0038466569148264e-05, + "loss": 0.3979, + "step": 1430000 + }, + { + "epoch": 0.86, + "learning_rate": 5.00363666035877e-05, + "loss": 0.3948, + "step": 1430500 + }, + { + "epoch": 0.86, + "learning_rate": 5.003426663802714e-05, + "loss": 0.4046, + "step": 1431000 + }, + { + "epoch": 0.86, + "learning_rate": 5.003216667246657e-05, + "loss": 0.3988, + "step": 1431500 + }, + { + "epoch": 0.86, + "learning_rate": 5.0030066706906004e-05, + "loss": 0.3981, + "step": 1432000 + }, + { + "epoch": 0.86, + "learning_rate": 5.0027966741345444e-05, + "loss": 0.4074, + "step": 1432500 + }, + { + "epoch": 0.86, + "learning_rate": 5.002586677578488e-05, + "loss": 0.4058, + "step": 1433000 + }, + { + "epoch": 0.86, + "learning_rate": 5.002376681022431e-05, + "loss": 0.402, + "step": 1433500 + }, + { + "epoch": 0.86, + "learning_rate": 5.0021671044594865e-05, + "loss": 0.3948, + "step": 1434000 + }, + { + "epoch": 0.86, + "learning_rate": 5.0019575278965425e-05, + "loss": 0.4009, + "step": 1434500 + }, + { + "epoch": 0.86, + "learning_rate": 5.001747531340486e-05, + "loss": 0.3991, + "step": 1435000 + }, + { + "epoch": 0.86, + "learning_rate": 5.001537534784429e-05, + "loss": 0.4049, + "step": 1435500 + }, + { + "epoch": 0.86, + "learning_rate": 5.001327538228373e-05, + "loss": 0.4014, + "step": 1436000 + }, + { + "epoch": 0.86, + "learning_rate": 5.0011175416723166e-05, + "loss": 0.4044, + "step": 1436500 + }, + { + "epoch": 0.86, + "learning_rate": 5.000907965109372e-05, + "loss": 0.4175, + "step": 1437000 + }, + { + "epoch": 0.86, + "learning_rate": 5.000697968553315e-05, + "loss": 0.403, + "step": 1437500 + }, + { + "epoch": 0.86, + "learning_rate": 5.000487971997259e-05, + "loss": 0.4026, + "step": 1438000 + }, + { + "epoch": 0.86, + "learning_rate": 5.0002779754412027e-05, + "loss": 0.4022, + "step": 1438500 + }, + { + "epoch": 0.86, + "learning_rate": 5.000067978885146e-05, + "loss": 0.4008, + "step": 1439000 + }, + { + "epoch": 0.86, + "learning_rate": 4.9998584023222014e-05, + "loss": 0.3995, + "step": 1439500 + }, + { + "epoch": 0.86, + "learning_rate": 4.9996484057661454e-05, + "loss": 0.3989, + "step": 1440000 + }, + { + "epoch": 0.86, + "learning_rate": 4.999438409210089e-05, + "loss": 0.4022, + "step": 1440500 + }, + { + "epoch": 0.86, + "learning_rate": 4.999228412654032e-05, + "loss": 0.3912, + "step": 1441000 + }, + { + "epoch": 0.86, + "learning_rate": 4.999018416097976e-05, + "loss": 0.4076, + "step": 1441500 + }, + { + "epoch": 0.86, + "learning_rate": 4.9988088395350315e-05, + "loss": 0.4023, + "step": 1442000 + }, + { + "epoch": 0.86, + "learning_rate": 4.998598842978975e-05, + "loss": 0.4136, + "step": 1442500 + }, + { + "epoch": 0.87, + "learning_rate": 4.998388846422919e-05, + "loss": 0.4029, + "step": 1443000 + }, + { + "epoch": 0.87, + "learning_rate": 4.998178849866862e-05, + "loss": 0.4037, + "step": 1443500 + }, + { + "epoch": 0.87, + "learning_rate": 4.9979688533108055e-05, + "loss": 0.3998, + "step": 1444000 + }, + { + "epoch": 0.87, + "learning_rate": 4.997759276747861e-05, + "loss": 0.4144, + "step": 1444500 + }, + { + "epoch": 0.87, + "learning_rate": 4.997549280191805e-05, + "loss": 0.3977, + "step": 1445000 + }, + { + "epoch": 0.87, + "learning_rate": 4.997339283635748e-05, + "loss": 0.4052, + "step": 1445500 + }, + { + "epoch": 0.87, + "learning_rate": 4.9971292870796916e-05, + "loss": 0.4064, + "step": 1446000 + }, + { + "epoch": 0.87, + "learning_rate": 4.996919710516747e-05, + "loss": 0.401, + "step": 1446500 + }, + { + "epoch": 0.87, + "learning_rate": 4.996709713960691e-05, + "loss": 0.3947, + "step": 1447000 + }, + { + "epoch": 0.87, + "learning_rate": 4.996499717404634e-05, + "loss": 0.4047, + "step": 1447500 + }, + { + "epoch": 0.87, + "learning_rate": 4.996289720848578e-05, + "loss": 0.4045, + "step": 1448000 + }, + { + "epoch": 0.87, + "learning_rate": 4.996079724292522e-05, + "loss": 0.432, + "step": 1448500 + }, + { + "epoch": 0.87, + "learning_rate": 4.995869727736465e-05, + "loss": 0.3994, + "step": 1449000 + }, + { + "epoch": 0.87, + "learning_rate": 4.9956597311804084e-05, + "loss": 0.4011, + "step": 1449500 + }, + { + "epoch": 0.87, + "learning_rate": 4.9954497346243524e-05, + "loss": 0.3977, + "step": 1450000 + }, + { + "epoch": 0.87, + "learning_rate": 4.995240158061408e-05, + "loss": 0.3981, + "step": 1450500 + }, + { + "epoch": 0.87, + "learning_rate": 4.995030161505351e-05, + "loss": 0.4046, + "step": 1451000 + }, + { + "epoch": 0.87, + "learning_rate": 4.9948205849424065e-05, + "loss": 0.4028, + "step": 1451500 + }, + { + "epoch": 0.87, + "learning_rate": 4.9946105883863505e-05, + "loss": 0.3943, + "step": 1452000 + }, + { + "epoch": 0.87, + "learning_rate": 4.994400591830294e-05, + "loss": 0.4071, + "step": 1452500 + }, + { + "epoch": 0.87, + "learning_rate": 4.994190595274237e-05, + "loss": 0.398, + "step": 1453000 + }, + { + "epoch": 0.87, + "learning_rate": 4.993980598718181e-05, + "loss": 0.4154, + "step": 1453500 + }, + { + "epoch": 0.87, + "learning_rate": 4.9937710221552366e-05, + "loss": 0.4076, + "step": 1454000 + }, + { + "epoch": 0.87, + "learning_rate": 4.99356102559918e-05, + "loss": 0.3971, + "step": 1454500 + }, + { + "epoch": 0.87, + "learning_rate": 4.993351029043123e-05, + "loss": 0.4051, + "step": 1455000 + }, + { + "epoch": 0.87, + "learning_rate": 4.993141032487067e-05, + "loss": 0.418, + "step": 1455500 + }, + { + "epoch": 0.87, + "learning_rate": 4.9929310359310106e-05, + "loss": 0.4116, + "step": 1456000 + }, + { + "epoch": 0.87, + "learning_rate": 4.992721039374954e-05, + "loss": 0.3935, + "step": 1456500 + }, + { + "epoch": 0.87, + "learning_rate": 4.99251146281201e-05, + "loss": 0.4023, + "step": 1457000 + }, + { + "epoch": 0.87, + "learning_rate": 4.9923014662559533e-05, + "loss": 0.4001, + "step": 1457500 + }, + { + "epoch": 0.87, + "learning_rate": 4.992091469699897e-05, + "loss": 0.3998, + "step": 1458000 + }, + { + "epoch": 0.87, + "learning_rate": 4.991881473143841e-05, + "loss": 0.4061, + "step": 1458500 + }, + { + "epoch": 0.87, + "learning_rate": 4.991671476587784e-05, + "loss": 0.3977, + "step": 1459000 + }, + { + "epoch": 0.88, + "learning_rate": 4.9914619000248394e-05, + "loss": 0.3896, + "step": 1459500 + }, + { + "epoch": 0.88, + "learning_rate": 4.991251903468783e-05, + "loss": 0.4001, + "step": 1460000 + }, + { + "epoch": 0.88, + "learning_rate": 4.991041906912727e-05, + "loss": 0.4089, + "step": 1460500 + }, + { + "epoch": 0.88, + "learning_rate": 4.99083191035667e-05, + "loss": 0.4105, + "step": 1461000 + }, + { + "epoch": 0.88, + "learning_rate": 4.9906223337937255e-05, + "loss": 0.4008, + "step": 1461500 + }, + { + "epoch": 0.88, + "learning_rate": 4.990412337237669e-05, + "loss": 0.4138, + "step": 1462000 + }, + { + "epoch": 0.88, + "learning_rate": 4.990202340681613e-05, + "loss": 0.4091, + "step": 1462500 + }, + { + "epoch": 0.88, + "learning_rate": 4.989992344125556e-05, + "loss": 0.4117, + "step": 1463000 + }, + { + "epoch": 0.88, + "learning_rate": 4.9897823475694996e-05, + "loss": 0.4029, + "step": 1463500 + }, + { + "epoch": 0.88, + "learning_rate": 4.9895723510134436e-05, + "loss": 0.4021, + "step": 1464000 + }, + { + "epoch": 0.88, + "learning_rate": 4.989362354457387e-05, + "loss": 0.4018, + "step": 1464500 + }, + { + "epoch": 0.88, + "learning_rate": 4.989152357901331e-05, + "loss": 0.4049, + "step": 1465000 + }, + { + "epoch": 0.88, + "learning_rate": 4.988942781338386e-05, + "loss": 0.3994, + "step": 1465500 + }, + { + "epoch": 0.88, + "learning_rate": 4.988733204775442e-05, + "loss": 0.4045, + "step": 1466000 + }, + { + "epoch": 0.88, + "learning_rate": 4.988523208219385e-05, + "loss": 0.3953, + "step": 1466500 + }, + { + "epoch": 0.88, + "learning_rate": 4.9883132116633284e-05, + "loss": 0.3998, + "step": 1467000 + }, + { + "epoch": 0.88, + "learning_rate": 4.9881032151072724e-05, + "loss": 0.3889, + "step": 1467500 + }, + { + "epoch": 0.88, + "learning_rate": 4.987893218551216e-05, + "loss": 0.4007, + "step": 1468000 + }, + { + "epoch": 0.88, + "learning_rate": 4.987683221995159e-05, + "loss": 0.399, + "step": 1468500 + }, + { + "epoch": 0.88, + "learning_rate": 4.987473225439103e-05, + "loss": 0.4028, + "step": 1469000 + }, + { + "epoch": 0.88, + "learning_rate": 4.9872632288830464e-05, + "loss": 0.4031, + "step": 1469500 + }, + { + "epoch": 0.88, + "learning_rate": 4.987053652320102e-05, + "loss": 0.3927, + "step": 1470000 + }, + { + "epoch": 0.88, + "learning_rate": 4.986843655764046e-05, + "loss": 0.3958, + "step": 1470500 + }, + { + "epoch": 0.88, + "learning_rate": 4.986633659207989e-05, + "loss": 0.4044, + "step": 1471000 + }, + { + "epoch": 0.88, + "learning_rate": 4.9864236626519325e-05, + "loss": 0.3999, + "step": 1471500 + }, + { + "epoch": 0.88, + "learning_rate": 4.986214086088988e-05, + "loss": 0.4079, + "step": 1472000 + }, + { + "epoch": 0.88, + "learning_rate": 4.986004089532932e-05, + "loss": 0.3988, + "step": 1472500 + }, + { + "epoch": 0.88, + "learning_rate": 4.985794092976875e-05, + "loss": 0.3995, + "step": 1473000 + }, + { + "epoch": 0.88, + "learning_rate": 4.9855840964208186e-05, + "loss": 0.3984, + "step": 1473500 + }, + { + "epoch": 0.88, + "learning_rate": 4.9853740998647626e-05, + "loss": 0.4, + "step": 1474000 + }, + { + "epoch": 0.88, + "learning_rate": 4.985164523301818e-05, + "loss": 0.4049, + "step": 1474500 + }, + { + "epoch": 0.88, + "learning_rate": 4.984954526745761e-05, + "loss": 0.3961, + "step": 1475000 + }, + { + "epoch": 0.88, + "learning_rate": 4.984744950182817e-05, + "loss": 0.4049, + "step": 1475500 + }, + { + "epoch": 0.88, + "learning_rate": 4.98453495362676e-05, + "loss": 0.4066, + "step": 1476000 + }, + { + "epoch": 0.89, + "learning_rate": 4.984324957070704e-05, + "loss": 0.403, + "step": 1476500 + }, + { + "epoch": 0.89, + "learning_rate": 4.9841149605146474e-05, + "loss": 0.4142, + "step": 1477000 + }, + { + "epoch": 0.89, + "learning_rate": 4.9839049639585914e-05, + "loss": 0.4006, + "step": 1477500 + }, + { + "epoch": 0.89, + "learning_rate": 4.983694967402535e-05, + "loss": 0.4, + "step": 1478000 + }, + { + "epoch": 0.89, + "learning_rate": 4.983484970846478e-05, + "loss": 0.4009, + "step": 1478500 + }, + { + "epoch": 0.89, + "learning_rate": 4.983274974290422e-05, + "loss": 0.3986, + "step": 1479000 + }, + { + "epoch": 0.89, + "learning_rate": 4.9830653977274775e-05, + "loss": 0.3993, + "step": 1479500 + }, + { + "epoch": 0.89, + "learning_rate": 4.982855401171421e-05, + "loss": 0.4014, + "step": 1480000 + }, + { + "epoch": 0.89, + "learning_rate": 4.982645404615364e-05, + "loss": 0.4023, + "step": 1480500 + }, + { + "epoch": 0.89, + "learning_rate": 4.982435408059308e-05, + "loss": 0.4029, + "step": 1481000 + }, + { + "epoch": 0.89, + "learning_rate": 4.9822254115032515e-05, + "loss": 0.408, + "step": 1481500 + }, + { + "epoch": 0.89, + "learning_rate": 4.982015834940307e-05, + "loss": 0.4104, + "step": 1482000 + }, + { + "epoch": 0.89, + "learning_rate": 4.98180583838425e-05, + "loss": 0.4011, + "step": 1482500 + }, + { + "epoch": 0.89, + "learning_rate": 4.981595841828194e-05, + "loss": 0.3961, + "step": 1483000 + }, + { + "epoch": 0.89, + "learning_rate": 4.9813858452721376e-05, + "loss": 0.3999, + "step": 1483500 + }, + { + "epoch": 0.89, + "learning_rate": 4.981176268709193e-05, + "loss": 0.4042, + "step": 1484000 + }, + { + "epoch": 0.89, + "learning_rate": 4.980966272153137e-05, + "loss": 0.4149, + "step": 1484500 + }, + { + "epoch": 0.89, + "learning_rate": 4.9807562755970803e-05, + "loss": 0.4107, + "step": 1485000 + }, + { + "epoch": 0.89, + "learning_rate": 4.980546279041024e-05, + "loss": 0.3992, + "step": 1485500 + }, + { + "epoch": 0.89, + "learning_rate": 4.980336282484968e-05, + "loss": 0.4042, + "step": 1486000 + }, + { + "epoch": 0.89, + "learning_rate": 4.980126285928911e-05, + "loss": 0.4078, + "step": 1486500 + }, + { + "epoch": 0.89, + "learning_rate": 4.979916289372854e-05, + "loss": 0.4061, + "step": 1487000 + }, + { + "epoch": 0.89, + "learning_rate": 4.979706292816798e-05, + "loss": 0.3908, + "step": 1487500 + }, + { + "epoch": 0.89, + "learning_rate": 4.979496296260741e-05, + "loss": 0.4056, + "step": 1488000 + }, + { + "epoch": 0.89, + "learning_rate": 4.9792862997046844e-05, + "loss": 0.4031, + "step": 1488500 + }, + { + "epoch": 0.89, + "learning_rate": 4.9790763031486285e-05, + "loss": 0.3924, + "step": 1489000 + }, + { + "epoch": 0.89, + "learning_rate": 4.978866306592572e-05, + "loss": 0.3941, + "step": 1489500 + }, + { + "epoch": 0.89, + "learning_rate": 4.978656730029627e-05, + "loss": 0.3941, + "step": 1490000 + }, + { + "epoch": 0.89, + "learning_rate": 4.9784467334735705e-05, + "loss": 0.4008, + "step": 1490500 + }, + { + "epoch": 0.89, + "learning_rate": 4.9782367369175145e-05, + "loss": 0.4003, + "step": 1491000 + }, + { + "epoch": 0.89, + "learning_rate": 4.978026740361458e-05, + "loss": 0.3981, + "step": 1491500 + }, + { + "epoch": 0.89, + "learning_rate": 4.977817163798513e-05, + "loss": 0.3932, + "step": 1492000 + }, + { + "epoch": 0.89, + "learning_rate": 4.977607167242457e-05, + "loss": 0.3994, + "step": 1492500 + }, + { + "epoch": 0.9, + "learning_rate": 4.9773971706864006e-05, + "loss": 0.3996, + "step": 1493000 + }, + { + "epoch": 0.9, + "learning_rate": 4.977187174130344e-05, + "loss": 0.3983, + "step": 1493500 + }, + { + "epoch": 0.9, + "learning_rate": 4.976977597567399e-05, + "loss": 0.406, + "step": 1494000 + }, + { + "epoch": 0.9, + "learning_rate": 4.9767680210044553e-05, + "loss": 0.4002, + "step": 1494500 + }, + { + "epoch": 0.9, + "learning_rate": 4.9765580244483994e-05, + "loss": 0.4054, + "step": 1495000 + }, + { + "epoch": 0.9, + "learning_rate": 4.976348027892343e-05, + "loss": 0.3898, + "step": 1495500 + }, + { + "epoch": 0.9, + "learning_rate": 4.976138031336286e-05, + "loss": 0.3999, + "step": 1496000 + }, + { + "epoch": 0.9, + "learning_rate": 4.9759280347802294e-05, + "loss": 0.4097, + "step": 1496500 + }, + { + "epoch": 0.9, + "learning_rate": 4.975718038224173e-05, + "loss": 0.4069, + "step": 1497000 + }, + { + "epoch": 0.9, + "learning_rate": 4.975508041668116e-05, + "loss": 0.4037, + "step": 1497500 + }, + { + "epoch": 0.9, + "learning_rate": 4.97529804511206e-05, + "loss": 0.4046, + "step": 1498000 + }, + { + "epoch": 0.9, + "learning_rate": 4.975088468549116e-05, + "loss": 0.3988, + "step": 1498500 + }, + { + "epoch": 0.9, + "learning_rate": 4.974878471993059e-05, + "loss": 0.4027, + "step": 1499000 + }, + { + "epoch": 0.9, + "learning_rate": 4.974668475437003e-05, + "loss": 0.407, + "step": 1499500 + }, + { + "epoch": 0.9, + "learning_rate": 4.974458478880946e-05, + "loss": 0.4007, + "step": 1500000 + }, + { + "epoch": 0.9, + "eval_loss": 0.3794560134410858, + "eval_runtime": 1129.597, + "eval_samples_per_second": 466.29, + "eval_steps_per_second": 77.715, + "step": 1500000 + }, + { + "epoch": 0.9, + "learning_rate": 4.974248902318002e-05, + "loss": 0.4074, + "step": 1500500 + }, + { + "epoch": 0.9, + "learning_rate": 4.9740389057619456e-05, + "loss": 0.4007, + "step": 1501000 + }, + { + "epoch": 0.9, + "learning_rate": 4.973828909205889e-05, + "loss": 0.396, + "step": 1501500 + }, + { + "epoch": 0.9, + "learning_rate": 4.973618912649832e-05, + "loss": 0.3875, + "step": 1502000 + }, + { + "epoch": 0.9, + "learning_rate": 4.973409336086888e-05, + "loss": 0.403, + "step": 1502500 + }, + { + "epoch": 0.9, + "learning_rate": 4.9731993395308317e-05, + "loss": 0.4012, + "step": 1503000 + }, + { + "epoch": 0.9, + "learning_rate": 4.972989342974775e-05, + "loss": 0.4021, + "step": 1503500 + }, + { + "epoch": 0.9, + "learning_rate": 4.9727793464187183e-05, + "loss": 0.3887, + "step": 1504000 + }, + { + "epoch": 0.9, + "learning_rate": 4.972569349862662e-05, + "loss": 0.4006, + "step": 1504500 + }, + { + "epoch": 0.9, + "learning_rate": 4.972359353306606e-05, + "loss": 0.4025, + "step": 1505000 + }, + { + "epoch": 0.9, + "learning_rate": 4.972149356750549e-05, + "loss": 0.4123, + "step": 1505500 + }, + { + "epoch": 0.9, + "learning_rate": 4.9719393601944924e-05, + "loss": 0.3975, + "step": 1506000 + }, + { + "epoch": 0.9, + "learning_rate": 4.9717302036246605e-05, + "loss": 0.4035, + "step": 1506500 + }, + { + "epoch": 0.9, + "learning_rate": 4.9715202070686045e-05, + "loss": 0.4016, + "step": 1507000 + }, + { + "epoch": 0.9, + "learning_rate": 4.971310210512548e-05, + "loss": 0.4217, + "step": 1507500 + }, + { + "epoch": 0.9, + "learning_rate": 4.971100213956491e-05, + "loss": 0.3977, + "step": 1508000 + }, + { + "epoch": 0.9, + "learning_rate": 4.9708902174004345e-05, + "loss": 0.4075, + "step": 1508500 + }, + { + "epoch": 0.9, + "learning_rate": 4.970680220844378e-05, + "loss": 0.4105, + "step": 1509000 + }, + { + "epoch": 0.91, + "learning_rate": 4.970470224288321e-05, + "loss": 0.4038, + "step": 1509500 + }, + { + "epoch": 0.91, + "learning_rate": 4.970260227732265e-05, + "loss": 0.4064, + "step": 1510000 + }, + { + "epoch": 0.91, + "learning_rate": 4.970050651169321e-05, + "loss": 0.396, + "step": 1510500 + }, + { + "epoch": 0.91, + "learning_rate": 4.969840654613264e-05, + "loss": 0.3902, + "step": 1511000 + }, + { + "epoch": 0.91, + "learning_rate": 4.969630658057207e-05, + "loss": 0.3979, + "step": 1511500 + }, + { + "epoch": 0.91, + "learning_rate": 4.969420661501151e-05, + "loss": 0.4011, + "step": 1512000 + }, + { + "epoch": 0.91, + "learning_rate": 4.969211084938207e-05, + "loss": 0.3905, + "step": 1512500 + }, + { + "epoch": 0.91, + "learning_rate": 4.969001508375263e-05, + "loss": 0.4015, + "step": 1513000 + }, + { + "epoch": 0.91, + "learning_rate": 4.968791511819206e-05, + "loss": 0.3921, + "step": 1513500 + }, + { + "epoch": 0.91, + "learning_rate": 4.96858151526315e-05, + "loss": 0.3909, + "step": 1514000 + }, + { + "epoch": 0.91, + "learning_rate": 4.9683715187070934e-05, + "loss": 0.3968, + "step": 1514500 + }, + { + "epoch": 0.91, + "learning_rate": 4.968161522151037e-05, + "loss": 0.3973, + "step": 1515000 + }, + { + "epoch": 0.91, + "learning_rate": 4.967951945588092e-05, + "loss": 0.4059, + "step": 1515500 + }, + { + "epoch": 0.91, + "learning_rate": 4.967741949032036e-05, + "loss": 0.3962, + "step": 1516000 + }, + { + "epoch": 0.91, + "learning_rate": 4.9675319524759795e-05, + "loss": 0.3878, + "step": 1516500 + }, + { + "epoch": 0.91, + "learning_rate": 4.967321955919923e-05, + "loss": 0.3941, + "step": 1517000 + }, + { + "epoch": 0.91, + "learning_rate": 4.967111959363867e-05, + "loss": 0.4089, + "step": 1517500 + }, + { + "epoch": 0.91, + "learning_rate": 4.9669019628078095e-05, + "loss": 0.3994, + "step": 1518000 + }, + { + "epoch": 0.91, + "learning_rate": 4.966691966251753e-05, + "loss": 0.3919, + "step": 1518500 + }, + { + "epoch": 0.91, + "learning_rate": 4.966481969695697e-05, + "loss": 0.4007, + "step": 1519000 + }, + { + "epoch": 0.91, + "learning_rate": 4.966272393132753e-05, + "loss": 0.3994, + "step": 1519500 + }, + { + "epoch": 0.91, + "learning_rate": 4.966062396576696e-05, + "loss": 0.3998, + "step": 1520000 + }, + { + "epoch": 0.91, + "learning_rate": 4.9658528200137516e-05, + "loss": 0.3936, + "step": 1520500 + }, + { + "epoch": 0.91, + "learning_rate": 4.9656428234576957e-05, + "loss": 0.3968, + "step": 1521000 + }, + { + "epoch": 0.91, + "learning_rate": 4.965432826901639e-05, + "loss": 0.3943, + "step": 1521500 + }, + { + "epoch": 0.91, + "learning_rate": 4.9652228303455823e-05, + "loss": 0.3983, + "step": 1522000 + }, + { + "epoch": 0.91, + "learning_rate": 4.9650128337895264e-05, + "loss": 0.3962, + "step": 1522500 + }, + { + "epoch": 0.91, + "learning_rate": 4.964802837233469e-05, + "loss": 0.4097, + "step": 1523000 + }, + { + "epoch": 0.91, + "learning_rate": 4.9645928406774124e-05, + "loss": 0.4114, + "step": 1523500 + }, + { + "epoch": 0.91, + "learning_rate": 4.9643828441213564e-05, + "loss": 0.4113, + "step": 1524000 + }, + { + "epoch": 0.91, + "learning_rate": 4.964173687551524e-05, + "loss": 0.4016, + "step": 1524500 + }, + { + "epoch": 0.91, + "learning_rate": 4.963963690995468e-05, + "loss": 0.3945, + "step": 1525000 + }, + { + "epoch": 0.91, + "learning_rate": 4.963753694439411e-05, + "loss": 0.3973, + "step": 1525500 + }, + { + "epoch": 0.91, + "learning_rate": 4.9635436978833545e-05, + "loss": 0.4043, + "step": 1526000 + }, + { + "epoch": 0.92, + "learning_rate": 4.9633337013272985e-05, + "loss": 0.3927, + "step": 1526500 + }, + { + "epoch": 0.92, + "learning_rate": 4.963123704771242e-05, + "loss": 0.3974, + "step": 1527000 + }, + { + "epoch": 0.92, + "learning_rate": 4.962913708215185e-05, + "loss": 0.4042, + "step": 1527500 + }, + { + "epoch": 0.92, + "learning_rate": 4.9627037116591285e-05, + "loss": 0.3977, + "step": 1528000 + }, + { + "epoch": 0.92, + "learning_rate": 4.9624941350961846e-05, + "loss": 0.4043, + "step": 1528500 + }, + { + "epoch": 0.92, + "learning_rate": 4.962284138540128e-05, + "loss": 0.4104, + "step": 1529000 + }, + { + "epoch": 0.92, + "learning_rate": 4.962074141984072e-05, + "loss": 0.408, + "step": 1529500 + }, + { + "epoch": 0.92, + "learning_rate": 4.9618641454280146e-05, + "loss": 0.4034, + "step": 1530000 + }, + { + "epoch": 0.92, + "learning_rate": 4.9616545688650707e-05, + "loss": 0.3955, + "step": 1530500 + }, + { + "epoch": 0.92, + "learning_rate": 4.961444572309014e-05, + "loss": 0.3994, + "step": 1531000 + }, + { + "epoch": 0.92, + "learning_rate": 4.961234575752958e-05, + "loss": 0.3949, + "step": 1531500 + }, + { + "epoch": 0.92, + "learning_rate": 4.9610245791969014e-05, + "loss": 0.4051, + "step": 1532000 + }, + { + "epoch": 0.92, + "learning_rate": 4.960814582640844e-05, + "loss": 0.3973, + "step": 1532500 + }, + { + "epoch": 0.92, + "learning_rate": 4.960604586084788e-05, + "loss": 0.3957, + "step": 1533000 + }, + { + "epoch": 0.92, + "learning_rate": 4.9603945895287314e-05, + "loss": 0.3979, + "step": 1533500 + }, + { + "epoch": 0.92, + "learning_rate": 4.960184592972675e-05, + "loss": 0.4057, + "step": 1534000 + }, + { + "epoch": 0.92, + "learning_rate": 4.959975016409731e-05, + "loss": 0.3929, + "step": 1534500 + }, + { + "epoch": 0.92, + "learning_rate": 4.959765439846787e-05, + "loss": 0.3925, + "step": 1535000 + }, + { + "epoch": 0.92, + "learning_rate": 4.95955544329073e-05, + "loss": 0.4034, + "step": 1535500 + }, + { + "epoch": 0.92, + "learning_rate": 4.9593454467346735e-05, + "loss": 0.4077, + "step": 1536000 + }, + { + "epoch": 0.92, + "learning_rate": 4.9591354501786175e-05, + "loss": 0.3934, + "step": 1536500 + }, + { + "epoch": 0.92, + "learning_rate": 4.95892545362256e-05, + "loss": 0.3951, + "step": 1537000 + }, + { + "epoch": 0.92, + "learning_rate": 4.9587154570665036e-05, + "loss": 0.4154, + "step": 1537500 + }, + { + "epoch": 0.92, + "learning_rate": 4.9585054605104476e-05, + "loss": 0.3942, + "step": 1538000 + }, + { + "epoch": 0.92, + "learning_rate": 4.958295463954391e-05, + "loss": 0.4112, + "step": 1538500 + }, + { + "epoch": 0.92, + "learning_rate": 4.958086307384559e-05, + "loss": 0.4001, + "step": 1539000 + }, + { + "epoch": 0.92, + "learning_rate": 4.957876310828502e-05, + "loss": 0.3966, + "step": 1539500 + }, + { + "epoch": 0.92, + "learning_rate": 4.957666314272446e-05, + "loss": 0.4014, + "step": 1540000 + }, + { + "epoch": 0.92, + "learning_rate": 4.95745631771639e-05, + "loss": 0.3941, + "step": 1540500 + }, + { + "epoch": 0.92, + "learning_rate": 4.957246321160333e-05, + "loss": 0.3895, + "step": 1541000 + }, + { + "epoch": 0.92, + "learning_rate": 4.9570363246042764e-05, + "loss": 0.3986, + "step": 1541500 + }, + { + "epoch": 0.92, + "learning_rate": 4.95682632804822e-05, + "loss": 0.3963, + "step": 1542000 + }, + { + "epoch": 0.92, + "learning_rate": 4.956616751485276e-05, + "loss": 0.405, + "step": 1542500 + }, + { + "epoch": 0.93, + "learning_rate": 4.956406754929219e-05, + "loss": 0.4128, + "step": 1543000 + }, + { + "epoch": 0.93, + "learning_rate": 4.956196758373163e-05, + "loss": 0.4049, + "step": 1543500 + }, + { + "epoch": 0.93, + "learning_rate": 4.955986761817106e-05, + "loss": 0.3983, + "step": 1544000 + }, + { + "epoch": 0.93, + "learning_rate": 4.955776765261049e-05, + "loss": 0.407, + "step": 1544500 + }, + { + "epoch": 0.93, + "learning_rate": 4.955567188698105e-05, + "loss": 0.3942, + "step": 1545000 + }, + { + "epoch": 0.93, + "learning_rate": 4.955357192142049e-05, + "loss": 0.391, + "step": 1545500 + }, + { + "epoch": 0.93, + "learning_rate": 4.9551471955859925e-05, + "loss": 0.3967, + "step": 1546000 + }, + { + "epoch": 0.93, + "learning_rate": 4.954937199029935e-05, + "loss": 0.399, + "step": 1546500 + }, + { + "epoch": 0.93, + "learning_rate": 4.954727202473879e-05, + "loss": 0.4027, + "step": 1547000 + }, + { + "epoch": 0.93, + "learning_rate": 4.954517625910935e-05, + "loss": 0.4001, + "step": 1547500 + }, + { + "epoch": 0.93, + "learning_rate": 4.9543076293548786e-05, + "loss": 0.3956, + "step": 1548000 + }, + { + "epoch": 0.93, + "learning_rate": 4.9540976327988226e-05, + "loss": 0.3947, + "step": 1548500 + }, + { + "epoch": 0.93, + "learning_rate": 4.953887636242765e-05, + "loss": 0.3933, + "step": 1549000 + }, + { + "epoch": 0.93, + "learning_rate": 4.9536776396867087e-05, + "loss": 0.4064, + "step": 1549500 + }, + { + "epoch": 0.93, + "learning_rate": 4.953467643130653e-05, + "loss": 0.4016, + "step": 1550000 + }, + { + "epoch": 0.93, + "learning_rate": 4.953257646574596e-05, + "loss": 0.3944, + "step": 1550500 + }, + { + "epoch": 0.93, + "learning_rate": 4.9530476500185394e-05, + "loss": 0.3909, + "step": 1551000 + }, + { + "epoch": 0.93, + "learning_rate": 4.952838073455595e-05, + "loss": 0.3999, + "step": 1551500 + }, + { + "epoch": 0.93, + "learning_rate": 4.952628076899539e-05, + "loss": 0.404, + "step": 1552000 + }, + { + "epoch": 0.93, + "learning_rate": 4.952418080343482e-05, + "loss": 0.3932, + "step": 1552500 + }, + { + "epoch": 0.93, + "learning_rate": 4.9522080837874254e-05, + "loss": 0.3956, + "step": 1553000 + }, + { + "epoch": 0.93, + "learning_rate": 4.9519985072244815e-05, + "loss": 0.3976, + "step": 1553500 + }, + { + "epoch": 0.93, + "learning_rate": 4.951788510668425e-05, + "loss": 0.392, + "step": 1554000 + }, + { + "epoch": 0.93, + "learning_rate": 4.951578514112368e-05, + "loss": 0.3935, + "step": 1554500 + }, + { + "epoch": 0.93, + "learning_rate": 4.951368937549424e-05, + "loss": 0.3906, + "step": 1555000 + }, + { + "epoch": 0.93, + "learning_rate": 4.9511593609864796e-05, + "loss": 0.3897, + "step": 1555500 + }, + { + "epoch": 0.93, + "learning_rate": 4.9509493644304236e-05, + "loss": 0.4028, + "step": 1556000 + }, + { + "epoch": 0.93, + "learning_rate": 4.950739367874367e-05, + "loss": 0.3969, + "step": 1556500 + }, + { + "epoch": 0.93, + "learning_rate": 4.95052937131831e-05, + "loss": 0.4038, + "step": 1557000 + }, + { + "epoch": 0.93, + "learning_rate": 4.950319374762254e-05, + "loss": 0.3962, + "step": 1557500 + }, + { + "epoch": 0.93, + "learning_rate": 4.9501093782061977e-05, + "loss": 0.4027, + "step": 1558000 + }, + { + "epoch": 0.93, + "learning_rate": 4.94989938165014e-05, + "loss": 0.4158, + "step": 1558500 + }, + { + "epoch": 0.93, + "learning_rate": 4.9496893850940843e-05, + "loss": 0.4029, + "step": 1559000 + }, + { + "epoch": 0.93, + "learning_rate": 4.949479388538028e-05, + "loss": 0.386, + "step": 1559500 + }, + { + "epoch": 0.94, + "learning_rate": 4.949269391981971e-05, + "loss": 0.3911, + "step": 1560000 + }, + { + "epoch": 0.94, + "learning_rate": 4.949059395425915e-05, + "loss": 0.4012, + "step": 1560500 + }, + { + "epoch": 0.94, + "learning_rate": 4.9488493988698584e-05, + "loss": 0.4041, + "step": 1561000 + }, + { + "epoch": 0.94, + "learning_rate": 4.948639822306914e-05, + "loss": 0.3997, + "step": 1561500 + }, + { + "epoch": 0.94, + "learning_rate": 4.948429825750858e-05, + "loss": 0.3929, + "step": 1562000 + }, + { + "epoch": 0.94, + "learning_rate": 4.948220249187914e-05, + "loss": 0.4075, + "step": 1562500 + }, + { + "epoch": 0.94, + "learning_rate": 4.948010252631857e-05, + "loss": 0.3843, + "step": 1563000 + }, + { + "epoch": 0.94, + "learning_rate": 4.9478002560758e-05, + "loss": 0.4202, + "step": 1563500 + }, + { + "epoch": 0.94, + "learning_rate": 4.947590259519744e-05, + "loss": 0.4033, + "step": 1564000 + }, + { + "epoch": 0.94, + "learning_rate": 4.947380262963687e-05, + "loss": 0.3998, + "step": 1564500 + }, + { + "epoch": 0.94, + "learning_rate": 4.9471702664076305e-05, + "loss": 0.3954, + "step": 1565000 + }, + { + "epoch": 0.94, + "learning_rate": 4.9469602698515746e-05, + "loss": 0.3968, + "step": 1565500 + }, + { + "epoch": 0.94, + "learning_rate": 4.94675069328863e-05, + "loss": 0.4035, + "step": 1566000 + }, + { + "epoch": 0.94, + "learning_rate": 4.946540696732573e-05, + "loss": 0.3872, + "step": 1566500 + }, + { + "epoch": 0.94, + "learning_rate": 4.9463307001765166e-05, + "loss": 0.3996, + "step": 1567000 + }, + { + "epoch": 0.94, + "learning_rate": 4.9461207036204606e-05, + "loss": 0.4081, + "step": 1567500 + }, + { + "epoch": 0.94, + "learning_rate": 4.945910707064404e-05, + "loss": 0.3933, + "step": 1568000 + }, + { + "epoch": 0.94, + "learning_rate": 4.9457011305014593e-05, + "loss": 0.3907, + "step": 1568500 + }, + { + "epoch": 0.94, + "learning_rate": 4.9454911339454034e-05, + "loss": 0.4086, + "step": 1569000 + }, + { + "epoch": 0.94, + "learning_rate": 4.945281137389347e-05, + "loss": 0.4096, + "step": 1569500 + }, + { + "epoch": 0.94, + "learning_rate": 4.94507114083329e-05, + "loss": 0.4034, + "step": 1570000 + }, + { + "epoch": 0.94, + "learning_rate": 4.944861144277234e-05, + "loss": 0.3939, + "step": 1570500 + }, + { + "epoch": 0.94, + "learning_rate": 4.9446511477211774e-05, + "loss": 0.3917, + "step": 1571000 + }, + { + "epoch": 0.94, + "learning_rate": 4.944441151165121e-05, + "loss": 0.3904, + "step": 1571500 + }, + { + "epoch": 0.94, + "learning_rate": 4.944231574602176e-05, + "loss": 0.4004, + "step": 1572000 + }, + { + "epoch": 0.94, + "learning_rate": 4.94402157804612e-05, + "loss": 0.3958, + "step": 1572500 + }, + { + "epoch": 0.94, + "learning_rate": 4.9438115814900635e-05, + "loss": 0.4039, + "step": 1573000 + }, + { + "epoch": 0.94, + "learning_rate": 4.943601584934007e-05, + "loss": 0.3906, + "step": 1573500 + }, + { + "epoch": 0.94, + "learning_rate": 4.943392008371062e-05, + "loss": 0.3972, + "step": 1574000 + }, + { + "epoch": 0.94, + "learning_rate": 4.943182011815006e-05, + "loss": 0.4021, + "step": 1574500 + }, + { + "epoch": 0.94, + "learning_rate": 4.9429720152589496e-05, + "loss": 0.3954, + "step": 1575000 + }, + { + "epoch": 0.94, + "learning_rate": 4.942762018702893e-05, + "loss": 0.4037, + "step": 1575500 + }, + { + "epoch": 0.94, + "learning_rate": 4.942552022146837e-05, + "loss": 0.3884, + "step": 1576000 + }, + { + "epoch": 0.95, + "learning_rate": 4.94234202559078e-05, + "loss": 0.3984, + "step": 1576500 + }, + { + "epoch": 0.95, + "learning_rate": 4.9421320290347236e-05, + "loss": 0.3922, + "step": 1577000 + }, + { + "epoch": 0.95, + "learning_rate": 4.94192245247178e-05, + "loss": 0.3999, + "step": 1577500 + }, + { + "epoch": 0.95, + "learning_rate": 4.941712455915723e-05, + "loss": 0.3917, + "step": 1578000 + }, + { + "epoch": 0.95, + "learning_rate": 4.9415024593596664e-05, + "loss": 0.4033, + "step": 1578500 + }, + { + "epoch": 0.95, + "learning_rate": 4.9412924628036104e-05, + "loss": 0.3927, + "step": 1579000 + }, + { + "epoch": 0.95, + "learning_rate": 4.941082466247554e-05, + "loss": 0.4096, + "step": 1579500 + }, + { + "epoch": 0.95, + "learning_rate": 4.940872469691497e-05, + "loss": 0.3933, + "step": 1580000 + }, + { + "epoch": 0.95, + "learning_rate": 4.9406624731354404e-05, + "loss": 0.3939, + "step": 1580500 + }, + { + "epoch": 0.95, + "learning_rate": 4.940452476579384e-05, + "loss": 0.4043, + "step": 1581000 + }, + { + "epoch": 0.95, + "learning_rate": 4.94024290001644e-05, + "loss": 0.3971, + "step": 1581500 + }, + { + "epoch": 0.95, + "learning_rate": 4.940032903460383e-05, + "loss": 0.4013, + "step": 1582000 + }, + { + "epoch": 0.95, + "learning_rate": 4.939822906904327e-05, + "loss": 0.4005, + "step": 1582500 + }, + { + "epoch": 0.95, + "learning_rate": 4.93961291034827e-05, + "loss": 0.3918, + "step": 1583000 + }, + { + "epoch": 0.95, + "learning_rate": 4.939403333785326e-05, + "loss": 0.4037, + "step": 1583500 + }, + { + "epoch": 0.95, + "learning_rate": 4.939193337229269e-05, + "loss": 0.4008, + "step": 1584000 + }, + { + "epoch": 0.95, + "learning_rate": 4.938983340673213e-05, + "loss": 0.4018, + "step": 1584500 + }, + { + "epoch": 0.95, + "learning_rate": 4.9387737641102686e-05, + "loss": 0.4125, + "step": 1585000 + }, + { + "epoch": 0.95, + "learning_rate": 4.938563767554212e-05, + "loss": 0.393, + "step": 1585500 + }, + { + "epoch": 0.95, + "learning_rate": 4.938353770998156e-05, + "loss": 0.3916, + "step": 1586000 + }, + { + "epoch": 0.95, + "learning_rate": 4.938143774442099e-05, + "loss": 0.3972, + "step": 1586500 + }, + { + "epoch": 0.95, + "learning_rate": 4.937933777886043e-05, + "loss": 0.4013, + "step": 1587000 + }, + { + "epoch": 0.95, + "learning_rate": 4.937723781329987e-05, + "loss": 0.4045, + "step": 1587500 + }, + { + "epoch": 0.95, + "learning_rate": 4.9375137847739294e-05, + "loss": 0.3985, + "step": 1588000 + }, + { + "epoch": 0.95, + "learning_rate": 4.937303788217873e-05, + "loss": 0.4055, + "step": 1588500 + }, + { + "epoch": 0.95, + "learning_rate": 4.937094211654929e-05, + "loss": 0.3989, + "step": 1589000 + }, + { + "epoch": 0.95, + "learning_rate": 4.936884215098873e-05, + "loss": 0.3932, + "step": 1589500 + }, + { + "epoch": 0.95, + "learning_rate": 4.936674638535928e-05, + "loss": 0.3932, + "step": 1590000 + }, + { + "epoch": 0.95, + "learning_rate": 4.9364646419798715e-05, + "loss": 0.3953, + "step": 1590500 + }, + { + "epoch": 0.95, + "learning_rate": 4.936254645423815e-05, + "loss": 0.3987, + "step": 1591000 + }, + { + "epoch": 0.95, + "learning_rate": 4.936044648867759e-05, + "loss": 0.3867, + "step": 1591500 + }, + { + "epoch": 0.95, + "learning_rate": 4.935834652311702e-05, + "loss": 0.3898, + "step": 1592000 + }, + { + "epoch": 0.95, + "learning_rate": 4.9356246557556455e-05, + "loss": 0.4007, + "step": 1592500 + }, + { + "epoch": 0.96, + "learning_rate": 4.935414659199589e-05, + "loss": 0.4088, + "step": 1593000 + }, + { + "epoch": 0.96, + "learning_rate": 4.935204662643532e-05, + "loss": 0.3968, + "step": 1593500 + }, + { + "epoch": 0.96, + "learning_rate": 4.934995086080588e-05, + "loss": 0.4114, + "step": 1594000 + }, + { + "epoch": 0.96, + "learning_rate": 4.934785089524532e-05, + "loss": 0.4046, + "step": 1594500 + }, + { + "epoch": 0.96, + "learning_rate": 4.9345755129615876e-05, + "loss": 0.4072, + "step": 1595000 + }, + { + "epoch": 0.96, + "learning_rate": 4.934365516405531e-05, + "loss": 0.4097, + "step": 1595500 + }, + { + "epoch": 0.96, + "learning_rate": 4.934155519849474e-05, + "loss": 0.3958, + "step": 1596000 + }, + { + "epoch": 0.96, + "learning_rate": 4.9339455232934184e-05, + "loss": 0.3986, + "step": 1596500 + }, + { + "epoch": 0.96, + "learning_rate": 4.933735946730474e-05, + "loss": 0.3877, + "step": 1597000 + }, + { + "epoch": 0.96, + "learning_rate": 4.933525950174417e-05, + "loss": 0.3983, + "step": 1597500 + }, + { + "epoch": 0.96, + "learning_rate": 4.9333159536183604e-05, + "loss": 0.3961, + "step": 1598000 + }, + { + "epoch": 0.96, + "learning_rate": 4.9331059570623044e-05, + "loss": 0.4036, + "step": 1598500 + }, + { + "epoch": 0.96, + "learning_rate": 4.93289638049936e-05, + "loss": 0.3901, + "step": 1599000 + }, + { + "epoch": 0.96, + "learning_rate": 4.932686383943303e-05, + "loss": 0.4081, + "step": 1599500 + }, + { + "epoch": 0.96, + "learning_rate": 4.932476387387247e-05, + "loss": 0.4044, + "step": 1600000 + }, + { + "epoch": 0.96, + "eval_loss": 0.37684500217437744, + "eval_runtime": 1131.2002, + "eval_samples_per_second": 465.629, + "eval_steps_per_second": 77.605, + "step": 1600000 + }, + { + "epoch": 0.96, + "learning_rate": 4.9322663908311905e-05, + "loss": 0.3948, + "step": 1600500 + }, + { + "epoch": 0.96, + "learning_rate": 4.932056394275134e-05, + "loss": 0.3825, + "step": 1601000 + }, + { + "epoch": 0.96, + "learning_rate": 4.931846397719078e-05, + "loss": 0.3892, + "step": 1601500 + }, + { + "epoch": 0.96, + "learning_rate": 4.931636821156133e-05, + "loss": 0.3969, + "step": 1602000 + }, + { + "epoch": 0.96, + "learning_rate": 4.9314268246000766e-05, + "loss": 0.3916, + "step": 1602500 + }, + { + "epoch": 0.96, + "learning_rate": 4.93121682804402e-05, + "loss": 0.3976, + "step": 1603000 + }, + { + "epoch": 0.96, + "learning_rate": 4.931006831487964e-05, + "loss": 0.3865, + "step": 1603500 + }, + { + "epoch": 0.96, + "learning_rate": 4.930796834931907e-05, + "loss": 0.3893, + "step": 1604000 + }, + { + "epoch": 0.96, + "learning_rate": 4.9305872583689626e-05, + "loss": 0.3903, + "step": 1604500 + }, + { + "epoch": 0.96, + "learning_rate": 4.930377261812906e-05, + "loss": 0.4042, + "step": 1605000 + }, + { + "epoch": 0.96, + "learning_rate": 4.93016726525685e-05, + "loss": 0.3973, + "step": 1605500 + }, + { + "epoch": 0.96, + "learning_rate": 4.9299572687007934e-05, + "loss": 0.3906, + "step": 1606000 + }, + { + "epoch": 0.96, + "learning_rate": 4.929747272144737e-05, + "loss": 0.3997, + "step": 1606500 + }, + { + "epoch": 0.96, + "learning_rate": 4.929537695581793e-05, + "loss": 0.3955, + "step": 1607000 + }, + { + "epoch": 0.96, + "learning_rate": 4.929327699025736e-05, + "loss": 0.3981, + "step": 1607500 + }, + { + "epoch": 0.96, + "learning_rate": 4.9291177024696794e-05, + "loss": 0.4023, + "step": 1608000 + }, + { + "epoch": 0.96, + "learning_rate": 4.9289077059136235e-05, + "loss": 0.401, + "step": 1608500 + }, + { + "epoch": 0.96, + "learning_rate": 4.928697709357567e-05, + "loss": 0.4034, + "step": 1609000 + }, + { + "epoch": 0.96, + "learning_rate": 4.9284877128015095e-05, + "loss": 0.3957, + "step": 1609500 + }, + { + "epoch": 0.97, + "learning_rate": 4.9282777162454535e-05, + "loss": 0.3997, + "step": 1610000 + }, + { + "epoch": 0.97, + "learning_rate": 4.928067719689397e-05, + "loss": 0.3997, + "step": 1610500 + }, + { + "epoch": 0.97, + "learning_rate": 4.927858143126453e-05, + "loss": 0.3998, + "step": 1611000 + }, + { + "epoch": 0.97, + "learning_rate": 4.9276481465703955e-05, + "loss": 0.4005, + "step": 1611500 + }, + { + "epoch": 0.97, + "learning_rate": 4.9274385700074516e-05, + "loss": 0.3922, + "step": 1612000 + }, + { + "epoch": 0.97, + "learning_rate": 4.9272285734513956e-05, + "loss": 0.4072, + "step": 1612500 + }, + { + "epoch": 0.97, + "learning_rate": 4.927018576895339e-05, + "loss": 0.4047, + "step": 1613000 + }, + { + "epoch": 0.97, + "learning_rate": 4.926808580339282e-05, + "loss": 0.3941, + "step": 1613500 + }, + { + "epoch": 0.97, + "learning_rate": 4.9265985837832256e-05, + "loss": 0.3874, + "step": 1614000 + }, + { + "epoch": 0.97, + "learning_rate": 4.926389007220282e-05, + "loss": 0.386, + "step": 1614500 + }, + { + "epoch": 0.97, + "learning_rate": 4.926179010664225e-05, + "loss": 0.4041, + "step": 1615000 + }, + { + "epoch": 0.97, + "learning_rate": 4.925969014108169e-05, + "loss": 0.4068, + "step": 1615500 + }, + { + "epoch": 0.97, + "learning_rate": 4.9257590175521124e-05, + "loss": 0.4056, + "step": 1616000 + }, + { + "epoch": 0.97, + "learning_rate": 4.925549440989168e-05, + "loss": 0.4118, + "step": 1616500 + }, + { + "epoch": 0.97, + "learning_rate": 4.925339444433111e-05, + "loss": 0.4037, + "step": 1617000 + }, + { + "epoch": 0.97, + "learning_rate": 4.925129447877055e-05, + "loss": 0.4001, + "step": 1617500 + }, + { + "epoch": 0.97, + "learning_rate": 4.9249194513209985e-05, + "loss": 0.3976, + "step": 1618000 + }, + { + "epoch": 0.97, + "learning_rate": 4.924709874758054e-05, + "loss": 0.3889, + "step": 1618500 + }, + { + "epoch": 0.97, + "learning_rate": 4.924499878201997e-05, + "loss": 0.3877, + "step": 1619000 + }, + { + "epoch": 0.97, + "learning_rate": 4.924289881645941e-05, + "loss": 0.4045, + "step": 1619500 + }, + { + "epoch": 0.97, + "learning_rate": 4.9240798850898845e-05, + "loss": 0.4037, + "step": 1620000 + }, + { + "epoch": 0.97, + "learning_rate": 4.92387030852694e-05, + "loss": 0.3958, + "step": 1620500 + }, + { + "epoch": 0.97, + "learning_rate": 4.923660311970884e-05, + "loss": 0.4147, + "step": 1621000 + }, + { + "epoch": 0.97, + "learning_rate": 4.923450315414827e-05, + "loss": 0.3963, + "step": 1621500 + }, + { + "epoch": 0.97, + "learning_rate": 4.9232403188587706e-05, + "loss": 0.3923, + "step": 1622000 + }, + { + "epoch": 0.97, + "learning_rate": 4.9230303223027146e-05, + "loss": 0.387, + "step": 1622500 + }, + { + "epoch": 0.97, + "learning_rate": 4.922820325746658e-05, + "loss": 0.3952, + "step": 1623000 + }, + { + "epoch": 0.97, + "learning_rate": 4.9226103291906006e-05, + "loss": 0.3988, + "step": 1623500 + }, + { + "epoch": 0.97, + "learning_rate": 4.922400332634545e-05, + "loss": 0.4009, + "step": 1624000 + }, + { + "epoch": 0.97, + "learning_rate": 4.922190336078488e-05, + "loss": 0.3952, + "step": 1624500 + }, + { + "epoch": 0.97, + "learning_rate": 4.9219803395224314e-05, + "loss": 0.3908, + "step": 1625000 + }, + { + "epoch": 0.97, + "learning_rate": 4.9217703429663754e-05, + "loss": 0.3975, + "step": 1625500 + }, + { + "epoch": 0.97, + "learning_rate": 4.921560346410319e-05, + "loss": 0.3918, + "step": 1626000 + }, + { + "epoch": 0.98, + "learning_rate": 4.921350769847374e-05, + "loss": 0.3953, + "step": 1626500 + }, + { + "epoch": 0.98, + "learning_rate": 4.921140773291318e-05, + "loss": 0.3985, + "step": 1627000 + }, + { + "epoch": 0.98, + "learning_rate": 4.9209307767352615e-05, + "loss": 0.389, + "step": 1627500 + }, + { + "epoch": 0.98, + "learning_rate": 4.920720780179205e-05, + "loss": 0.3956, + "step": 1628000 + }, + { + "epoch": 0.98, + "learning_rate": 4.92051120361626e-05, + "loss": 0.3997, + "step": 1628500 + }, + { + "epoch": 0.98, + "learning_rate": 4.920301207060204e-05, + "loss": 0.3925, + "step": 1629000 + }, + { + "epoch": 0.98, + "learning_rate": 4.9200912105041475e-05, + "loss": 0.3974, + "step": 1629500 + }, + { + "epoch": 0.98, + "learning_rate": 4.919881213948091e-05, + "loss": 0.3942, + "step": 1630000 + }, + { + "epoch": 0.98, + "learning_rate": 4.919672057378259e-05, + "loss": 0.4006, + "step": 1630500 + }, + { + "epoch": 0.98, + "learning_rate": 4.919462060822202e-05, + "loss": 0.3958, + "step": 1631000 + }, + { + "epoch": 0.98, + "learning_rate": 4.919252064266146e-05, + "loss": 0.3854, + "step": 1631500 + }, + { + "epoch": 0.98, + "learning_rate": 4.9190420677100896e-05, + "loss": 0.3993, + "step": 1632000 + }, + { + "epoch": 0.98, + "learning_rate": 4.918832071154033e-05, + "loss": 0.4024, + "step": 1632500 + }, + { + "epoch": 0.98, + "learning_rate": 4.918622074597976e-05, + "loss": 0.3893, + "step": 1633000 + }, + { + "epoch": 0.98, + "learning_rate": 4.9184124980350324e-05, + "loss": 0.3971, + "step": 1633500 + }, + { + "epoch": 0.98, + "learning_rate": 4.918202501478976e-05, + "loss": 0.3952, + "step": 1634000 + }, + { + "epoch": 0.98, + "learning_rate": 4.91799250492292e-05, + "loss": 0.4021, + "step": 1634500 + }, + { + "epoch": 0.98, + "learning_rate": 4.917782508366863e-05, + "loss": 0.3965, + "step": 1635000 + }, + { + "epoch": 0.98, + "learning_rate": 4.917572511810806e-05, + "loss": 0.4006, + "step": 1635500 + }, + { + "epoch": 0.98, + "learning_rate": 4.91736251525475e-05, + "loss": 0.3991, + "step": 1636000 + }, + { + "epoch": 0.98, + "learning_rate": 4.917152518698693e-05, + "loss": 0.3962, + "step": 1636500 + }, + { + "epoch": 0.98, + "learning_rate": 4.9169425221426365e-05, + "loss": 0.3996, + "step": 1637000 + }, + { + "epoch": 0.98, + "learning_rate": 4.9167329455796925e-05, + "loss": 0.3964, + "step": 1637500 + }, + { + "epoch": 0.98, + "learning_rate": 4.916522949023636e-05, + "loss": 0.3975, + "step": 1638000 + }, + { + "epoch": 0.98, + "learning_rate": 4.916312952467579e-05, + "loss": 0.3948, + "step": 1638500 + }, + { + "epoch": 0.98, + "learning_rate": 4.916103375904635e-05, + "loss": 0.4023, + "step": 1639000 + }, + { + "epoch": 0.98, + "learning_rate": 4.9158933793485786e-05, + "loss": 0.3981, + "step": 1639500 + }, + { + "epoch": 0.98, + "learning_rate": 4.9156838027856346e-05, + "loss": 0.399, + "step": 1640000 + }, + { + "epoch": 0.98, + "learning_rate": 4.915473806229578e-05, + "loss": 0.3992, + "step": 1640500 + }, + { + "epoch": 0.98, + "learning_rate": 4.915263809673521e-05, + "loss": 0.39, + "step": 1641000 + }, + { + "epoch": 0.98, + "learning_rate": 4.915053813117465e-05, + "loss": 0.401, + "step": 1641500 + }, + { + "epoch": 0.98, + "learning_rate": 4.914843816561409e-05, + "loss": 0.3941, + "step": 1642000 + }, + { + "epoch": 0.98, + "learning_rate": 4.914633820005351e-05, + "loss": 0.4024, + "step": 1642500 + }, + { + "epoch": 0.99, + "learning_rate": 4.9144238234492954e-05, + "loss": 0.3974, + "step": 1643000 + }, + { + "epoch": 0.99, + "learning_rate": 4.914213826893239e-05, + "loss": 0.4107, + "step": 1643500 + }, + { + "epoch": 0.99, + "learning_rate": 4.914003830337182e-05, + "loss": 0.3924, + "step": 1644000 + }, + { + "epoch": 0.99, + "learning_rate": 4.913793833781126e-05, + "loss": 0.3987, + "step": 1644500 + }, + { + "epoch": 0.99, + "learning_rate": 4.9135838372250694e-05, + "loss": 0.4004, + "step": 1645000 + }, + { + "epoch": 0.99, + "learning_rate": 4.913373840669013e-05, + "loss": 0.3931, + "step": 1645500 + }, + { + "epoch": 0.99, + "learning_rate": 4.913164264106068e-05, + "loss": 0.3895, + "step": 1646000 + }, + { + "epoch": 0.99, + "learning_rate": 4.912954687543124e-05, + "loss": 0.3896, + "step": 1646500 + }, + { + "epoch": 0.99, + "learning_rate": 4.912744690987068e-05, + "loss": 0.4015, + "step": 1647000 + }, + { + "epoch": 0.99, + "learning_rate": 4.912534694431011e-05, + "loss": 0.4104, + "step": 1647500 + }, + { + "epoch": 0.99, + "learning_rate": 4.912324697874955e-05, + "loss": 0.3931, + "step": 1648000 + }, + { + "epoch": 0.99, + "learning_rate": 4.912114701318898e-05, + "loss": 0.3975, + "step": 1648500 + }, + { + "epoch": 0.99, + "learning_rate": 4.9119047047628416e-05, + "loss": 0.3963, + "step": 1649000 + }, + { + "epoch": 0.99, + "learning_rate": 4.9116947082067856e-05, + "loss": 0.3865, + "step": 1649500 + }, + { + "epoch": 0.99, + "learning_rate": 4.911484711650729e-05, + "loss": 0.3903, + "step": 1650000 + }, + { + "epoch": 0.99, + "learning_rate": 4.911274715094672e-05, + "loss": 0.3948, + "step": 1650500 + }, + { + "epoch": 0.99, + "learning_rate": 4.911064718538616e-05, + "loss": 0.3986, + "step": 1651000 + }, + { + "epoch": 0.99, + "learning_rate": 4.9108547219825597e-05, + "loss": 0.4044, + "step": 1651500 + }, + { + "epoch": 0.99, + "learning_rate": 4.910644725426503e-05, + "loss": 0.3924, + "step": 1652000 + }, + { + "epoch": 0.99, + "learning_rate": 4.9104351488635584e-05, + "loss": 0.3915, + "step": 1652500 + }, + { + "epoch": 0.99, + "learning_rate": 4.9102251523075024e-05, + "loss": 0.3964, + "step": 1653000 + }, + { + "epoch": 0.99, + "learning_rate": 4.910015575744558e-05, + "loss": 0.3976, + "step": 1653500 + }, + { + "epoch": 0.99, + "learning_rate": 4.909805579188501e-05, + "loss": 0.392, + "step": 1654000 + }, + { + "epoch": 0.99, + "learning_rate": 4.9095955826324444e-05, + "loss": 0.3941, + "step": 1654500 + }, + { + "epoch": 0.99, + "learning_rate": 4.9093855860763885e-05, + "loss": 0.3861, + "step": 1655000 + }, + { + "epoch": 0.99, + "learning_rate": 4.909175589520332e-05, + "loss": 0.3955, + "step": 1655500 + }, + { + "epoch": 0.99, + "learning_rate": 4.908965592964275e-05, + "loss": 0.395, + "step": 1656000 + }, + { + "epoch": 0.99, + "learning_rate": 4.908755596408219e-05, + "loss": 0.3958, + "step": 1656500 + }, + { + "epoch": 0.99, + "learning_rate": 4.9085460198452745e-05, + "loss": 0.3962, + "step": 1657000 + }, + { + "epoch": 0.99, + "learning_rate": 4.908336023289218e-05, + "loss": 0.3939, + "step": 1657500 + }, + { + "epoch": 0.99, + "learning_rate": 4.908126026733162e-05, + "loss": 0.3936, + "step": 1658000 + }, + { + "epoch": 0.99, + "learning_rate": 4.907916030177105e-05, + "loss": 0.3839, + "step": 1658500 + }, + { + "epoch": 0.99, + "learning_rate": 4.9077060336210486e-05, + "loss": 0.39, + "step": 1659000 + }, + { + "epoch": 0.99, + "learning_rate": 4.907496457058104e-05, + "loss": 0.3937, + "step": 1659500 + }, + { + "epoch": 1.0, + "learning_rate": 4.907286460502048e-05, + "loss": 0.381, + "step": 1660000 + }, + { + "epoch": 1.0, + "learning_rate": 4.907076463945991e-05, + "loss": 0.3848, + "step": 1660500 + }, + { + "epoch": 1.0, + "learning_rate": 4.9068664673899347e-05, + "loss": 0.3931, + "step": 1661000 + }, + { + "epoch": 1.0, + "learning_rate": 4.906656470833879e-05, + "loss": 0.3917, + "step": 1661500 + }, + { + "epoch": 1.0, + "learning_rate": 4.906446474277822e-05, + "loss": 0.3957, + "step": 1662000 + }, + { + "epoch": 1.0, + "learning_rate": 4.906236477721765e-05, + "loss": 0.3948, + "step": 1662500 + }, + { + "epoch": 1.0, + "learning_rate": 4.906026901158821e-05, + "loss": 0.4079, + "step": 1663000 + }, + { + "epoch": 1.0, + "learning_rate": 4.905816904602765e-05, + "loss": 0.3979, + "step": 1663500 + }, + { + "epoch": 1.0, + "learning_rate": 4.905606908046708e-05, + "loss": 0.39, + "step": 1664000 + }, + { + "epoch": 1.0, + "learning_rate": 4.9053969114906514e-05, + "loss": 0.3934, + "step": 1664500 + }, + { + "epoch": 1.0, + "learning_rate": 4.905186914934595e-05, + "loss": 0.4007, + "step": 1665000 + }, + { + "epoch": 1.0, + "learning_rate": 4.904976918378538e-05, + "loss": 0.3971, + "step": 1665500 + }, + { + "epoch": 1.0, + "learning_rate": 4.904766921822482e-05, + "loss": 0.3962, + "step": 1666000 + }, + { + "epoch": 1.0, + "learning_rate": 4.9045569252664255e-05, + "loss": 0.3955, + "step": 1666500 + }, + { + "epoch": 1.0, + "learning_rate": 4.9043473487034815e-05, + "loss": 0.3834, + "step": 1667000 + }, + { + "epoch": 1.0, + "learning_rate": 4.904137352147424e-05, + "loss": 0.3998, + "step": 1667500 + }, + { + "epoch": 1.0, + "learning_rate": 4.90392777558448e-05, + "loss": 0.3958, + "step": 1668000 + }, + { + "epoch": 1.0, + "learning_rate": 4.903717779028424e-05, + "loss": 0.3835, + "step": 1668500 + }, + { + "epoch": 1.0, + "learning_rate": 4.9035077824723676e-05, + "loss": 0.3999, + "step": 1669000 + }, + { + "epoch": 1.0, + "learning_rate": 4.90329778591631e-05, + "loss": 0.3916, + "step": 1669500 + }, + { + "epoch": 1.0, + "learning_rate": 4.903087789360254e-05, + "loss": 0.3804, + "step": 1670000 + }, + { + "epoch": 1.0, + "learning_rate": 4.9028777928041977e-05, + "loss": 0.3816, + "step": 1670500 + }, + { + "epoch": 1.0, + "learning_rate": 4.902667796248141e-05, + "loss": 0.3923, + "step": 1671000 + }, + { + "epoch": 1.0, + "learning_rate": 4.902457799692085e-05, + "loss": 0.384, + "step": 1671500 + }, + { + "epoch": 1.0, + "learning_rate": 4.9022482231291404e-05, + "loss": 0.3817, + "step": 1672000 + }, + { + "epoch": 1.0, + "learning_rate": 4.902038226573084e-05, + "loss": 0.384, + "step": 1672500 + }, + { + "epoch": 1.0, + "learning_rate": 4.901828230017028e-05, + "loss": 0.3898, + "step": 1673000 + }, + { + "epoch": 1.0, + "learning_rate": 4.901618653454084e-05, + "loss": 0.3948, + "step": 1673500 + }, + { + "epoch": 1.0, + "learning_rate": 4.901408656898027e-05, + "loss": 0.3915, + "step": 1674000 + }, + { + "epoch": 1.0, + "learning_rate": 4.90119866034197e-05, + "loss": 0.3865, + "step": 1674500 + }, + { + "epoch": 1.0, + "learning_rate": 4.900989083779026e-05, + "loss": 0.3964, + "step": 1675000 + }, + { + "epoch": 1.0, + "learning_rate": 4.90077908722297e-05, + "loss": 0.3939, + "step": 1675500 + }, + { + "epoch": 1.0, + "learning_rate": 4.900569090666913e-05, + "loss": 0.3875, + "step": 1676000 + }, + { + "epoch": 1.01, + "learning_rate": 4.9003590941108566e-05, + "loss": 0.3883, + "step": 1676500 + }, + { + "epoch": 1.01, + "learning_rate": 4.9001490975548e-05, + "loss": 0.3887, + "step": 1677000 + }, + { + "epoch": 1.01, + "learning_rate": 4.899939100998743e-05, + "loss": 0.3924, + "step": 1677500 + }, + { + "epoch": 1.01, + "learning_rate": 4.8997291044426866e-05, + "loss": 0.3882, + "step": 1678000 + }, + { + "epoch": 1.01, + "learning_rate": 4.8995191078866306e-05, + "loss": 0.3929, + "step": 1678500 + }, + { + "epoch": 1.01, + "learning_rate": 4.899309531323686e-05, + "loss": 0.3843, + "step": 1679000 + }, + { + "epoch": 1.01, + "learning_rate": 4.899099534767629e-05, + "loss": 0.3836, + "step": 1679500 + }, + { + "epoch": 1.01, + "learning_rate": 4.8988895382115733e-05, + "loss": 0.3873, + "step": 1680000 + }, + { + "epoch": 1.01, + "learning_rate": 4.898679541655517e-05, + "loss": 0.3921, + "step": 1680500 + }, + { + "epoch": 1.01, + "learning_rate": 4.89846954509946e-05, + "loss": 0.3881, + "step": 1681000 + }, + { + "epoch": 1.01, + "learning_rate": 4.898259548543404e-05, + "loss": 0.3794, + "step": 1681500 + }, + { + "epoch": 1.01, + "learning_rate": 4.8980495519873474e-05, + "loss": 0.3859, + "step": 1682000 + }, + { + "epoch": 1.01, + "learning_rate": 4.897839975424403e-05, + "loss": 0.3885, + "step": 1682500 + }, + { + "epoch": 1.01, + "learning_rate": 4.897629978868346e-05, + "loss": 0.3768, + "step": 1683000 + }, + { + "epoch": 1.01, + "learning_rate": 4.89741998231229e-05, + "loss": 0.3897, + "step": 1683500 + }, + { + "epoch": 1.01, + "learning_rate": 4.8972099857562335e-05, + "loss": 0.3781, + "step": 1684000 + }, + { + "epoch": 1.01, + "learning_rate": 4.896999989200177e-05, + "loss": 0.3733, + "step": 1684500 + }, + { + "epoch": 1.01, + "learning_rate": 4.896789992644121e-05, + "loss": 0.3935, + "step": 1685000 + }, + { + "epoch": 1.01, + "learning_rate": 4.896579996088064e-05, + "loss": 0.3858, + "step": 1685500 + }, + { + "epoch": 1.01, + "learning_rate": 4.8963704195251195e-05, + "loss": 0.3901, + "step": 1686000 + }, + { + "epoch": 1.01, + "learning_rate": 4.8961604229690636e-05, + "loss": 0.3872, + "step": 1686500 + }, + { + "epoch": 1.01, + "learning_rate": 4.895950426413007e-05, + "loss": 0.3861, + "step": 1687000 + }, + { + "epoch": 1.01, + "learning_rate": 4.89574042985695e-05, + "loss": 0.3813, + "step": 1687500 + }, + { + "epoch": 1.01, + "learning_rate": 4.895530433300894e-05, + "loss": 0.3831, + "step": 1688000 + }, + { + "epoch": 1.01, + "learning_rate": 4.8953208567379496e-05, + "loss": 0.3923, + "step": 1688500 + }, + { + "epoch": 1.01, + "learning_rate": 4.895110860181893e-05, + "loss": 0.3831, + "step": 1689000 + }, + { + "epoch": 1.01, + "learning_rate": 4.894900863625836e-05, + "loss": 0.4015, + "step": 1689500 + }, + { + "epoch": 1.01, + "learning_rate": 4.8946908670697804e-05, + "loss": 0.3896, + "step": 1690000 + }, + { + "epoch": 1.01, + "learning_rate": 4.894480870513724e-05, + "loss": 0.3847, + "step": 1690500 + }, + { + "epoch": 1.01, + "learning_rate": 4.894270873957667e-05, + "loss": 0.3929, + "step": 1691000 + }, + { + "epoch": 1.01, + "learning_rate": 4.894060877401611e-05, + "loss": 0.3883, + "step": 1691500 + }, + { + "epoch": 1.01, + "learning_rate": 4.8938513008386664e-05, + "loss": 0.3757, + "step": 1692000 + }, + { + "epoch": 1.01, + "learning_rate": 4.89364130428261e-05, + "loss": 0.4014, + "step": 1692500 + }, + { + "epoch": 1.02, + "learning_rate": 4.893431307726553e-05, + "loss": 0.3943, + "step": 1693000 + }, + { + "epoch": 1.02, + "learning_rate": 4.893221311170497e-05, + "loss": 0.3872, + "step": 1693500 + }, + { + "epoch": 1.02, + "learning_rate": 4.8930113146144405e-05, + "loss": 0.3849, + "step": 1694000 + }, + { + "epoch": 1.02, + "learning_rate": 4.892801318058384e-05, + "loss": 0.3798, + "step": 1694500 + }, + { + "epoch": 1.02, + "learning_rate": 4.89259174149544e-05, + "loss": 0.3853, + "step": 1695000 + }, + { + "epoch": 1.02, + "learning_rate": 4.892381744939383e-05, + "loss": 0.3878, + "step": 1695500 + }, + { + "epoch": 1.02, + "learning_rate": 4.8921717483833266e-05, + "loss": 0.3919, + "step": 1696000 + }, + { + "epoch": 1.02, + "learning_rate": 4.89196175182727e-05, + "loss": 0.3887, + "step": 1696500 + }, + { + "epoch": 1.02, + "learning_rate": 4.891751755271213e-05, + "loss": 0.3884, + "step": 1697000 + }, + { + "epoch": 1.02, + "learning_rate": 4.8915417587151566e-05, + "loss": 0.3883, + "step": 1697500 + }, + { + "epoch": 1.02, + "learning_rate": 4.8913321821522126e-05, + "loss": 0.3882, + "step": 1698000 + }, + { + "epoch": 1.02, + "learning_rate": 4.8911221855961567e-05, + "loss": 0.3787, + "step": 1698500 + }, + { + "epoch": 1.02, + "learning_rate": 4.890912189040099e-05, + "loss": 0.3788, + "step": 1699000 + }, + { + "epoch": 1.02, + "learning_rate": 4.890702192484043e-05, + "loss": 0.3907, + "step": 1699500 + }, + { + "epoch": 1.02, + "learning_rate": 4.890492195927987e-05, + "loss": 0.3863, + "step": 1700000 + }, + { + "epoch": 1.02, + "eval_loss": 0.37541428208351135, + "eval_runtime": 1122.3076, + "eval_samples_per_second": 469.319, + "eval_steps_per_second": 78.22, + "step": 1700000 + }, + { + "epoch": 1.02, + "learning_rate": 4.89028219937193e-05, + "loss": 0.3904, + "step": 1700500 + }, + { + "epoch": 1.02, + "learning_rate": 4.8900722028158734e-05, + "loss": 0.3878, + "step": 1701000 + }, + { + "epoch": 1.02, + "learning_rate": 4.8898622062598174e-05, + "loss": 0.3831, + "step": 1701500 + }, + { + "epoch": 1.02, + "learning_rate": 4.889652629696873e-05, + "loss": 0.3891, + "step": 1702000 + }, + { + "epoch": 1.02, + "learning_rate": 4.889442633140816e-05, + "loss": 0.3819, + "step": 1702500 + }, + { + "epoch": 1.02, + "learning_rate": 4.88923263658476e-05, + "loss": 0.3874, + "step": 1703000 + }, + { + "epoch": 1.02, + "learning_rate": 4.8890226400287035e-05, + "loss": 0.3772, + "step": 1703500 + }, + { + "epoch": 1.02, + "learning_rate": 4.888813063465759e-05, + "loss": 0.385, + "step": 1704000 + }, + { + "epoch": 1.02, + "learning_rate": 4.888603066909702e-05, + "loss": 0.3935, + "step": 1704500 + }, + { + "epoch": 1.02, + "learning_rate": 4.888393070353646e-05, + "loss": 0.3841, + "step": 1705000 + }, + { + "epoch": 1.02, + "learning_rate": 4.8881830737975896e-05, + "loss": 0.3909, + "step": 1705500 + }, + { + "epoch": 1.02, + "learning_rate": 4.887973077241533e-05, + "loss": 0.386, + "step": 1706000 + }, + { + "epoch": 1.02, + "learning_rate": 4.887763080685477e-05, + "loss": 0.3844, + "step": 1706500 + }, + { + "epoch": 1.02, + "learning_rate": 4.88755308412942e-05, + "loss": 0.3867, + "step": 1707000 + }, + { + "epoch": 1.02, + "learning_rate": 4.8873435075664756e-05, + "loss": 0.3864, + "step": 1707500 + }, + { + "epoch": 1.02, + "learning_rate": 4.887133511010419e-05, + "loss": 0.3833, + "step": 1708000 + }, + { + "epoch": 1.02, + "learning_rate": 4.886923514454363e-05, + "loss": 0.3811, + "step": 1708500 + }, + { + "epoch": 1.02, + "learning_rate": 4.8867135178983063e-05, + "loss": 0.3793, + "step": 1709000 + }, + { + "epoch": 1.02, + "learning_rate": 4.88650352134225e-05, + "loss": 0.3873, + "step": 1709500 + }, + { + "epoch": 1.03, + "learning_rate": 4.886293524786194e-05, + "loss": 0.3955, + "step": 1710000 + }, + { + "epoch": 1.03, + "learning_rate": 4.886083528230137e-05, + "loss": 0.3893, + "step": 1710500 + }, + { + "epoch": 1.03, + "learning_rate": 4.8858735316740804e-05, + "loss": 0.3769, + "step": 1711000 + }, + { + "epoch": 1.03, + "learning_rate": 4.885663535118024e-05, + "loss": 0.3864, + "step": 1711500 + }, + { + "epoch": 1.03, + "learning_rate": 4.88545395855508e-05, + "loss": 0.3862, + "step": 1712000 + }, + { + "epoch": 1.03, + "learning_rate": 4.885244381992135e-05, + "loss": 0.3922, + "step": 1712500 + }, + { + "epoch": 1.03, + "learning_rate": 4.8850343854360785e-05, + "loss": 0.379, + "step": 1713000 + }, + { + "epoch": 1.03, + "learning_rate": 4.8848243888800225e-05, + "loss": 0.3893, + "step": 1713500 + }, + { + "epoch": 1.03, + "learning_rate": 4.884614392323966e-05, + "loss": 0.3862, + "step": 1714000 + }, + { + "epoch": 1.03, + "learning_rate": 4.884404395767909e-05, + "loss": 0.3768, + "step": 1714500 + }, + { + "epoch": 1.03, + "learning_rate": 4.8841948192049646e-05, + "loss": 0.3982, + "step": 1715000 + }, + { + "epoch": 1.03, + "learning_rate": 4.8839848226489086e-05, + "loss": 0.3833, + "step": 1715500 + }, + { + "epoch": 1.03, + "learning_rate": 4.883774826092852e-05, + "loss": 0.3927, + "step": 1716000 + }, + { + "epoch": 1.03, + "learning_rate": 4.883564829536795e-05, + "loss": 0.3921, + "step": 1716500 + }, + { + "epoch": 1.03, + "learning_rate": 4.883354832980739e-05, + "loss": 0.3847, + "step": 1717000 + }, + { + "epoch": 1.03, + "learning_rate": 4.8831448364246827e-05, + "loss": 0.3878, + "step": 1717500 + }, + { + "epoch": 1.03, + "learning_rate": 4.882934839868626e-05, + "loss": 0.3921, + "step": 1718000 + }, + { + "epoch": 1.03, + "learning_rate": 4.88272484331257e-05, + "loss": 0.3954, + "step": 1718500 + }, + { + "epoch": 1.03, + "learning_rate": 4.8825152667496254e-05, + "loss": 0.3725, + "step": 1719000 + }, + { + "epoch": 1.03, + "learning_rate": 4.882305270193569e-05, + "loss": 0.3896, + "step": 1719500 + }, + { + "epoch": 1.03, + "learning_rate": 4.882095693630624e-05, + "loss": 0.3899, + "step": 1720000 + }, + { + "epoch": 1.03, + "learning_rate": 4.8818861170676794e-05, + "loss": 0.3885, + "step": 1720500 + }, + { + "epoch": 1.03, + "learning_rate": 4.8816761205116235e-05, + "loss": 0.3774, + "step": 1721000 + }, + { + "epoch": 1.03, + "learning_rate": 4.881466123955567e-05, + "loss": 0.3846, + "step": 1721500 + }, + { + "epoch": 1.03, + "learning_rate": 4.88125612739951e-05, + "loss": 0.3727, + "step": 1722000 + }, + { + "epoch": 1.03, + "learning_rate": 4.881046130843454e-05, + "loss": 0.3976, + "step": 1722500 + }, + { + "epoch": 1.03, + "learning_rate": 4.8808361342873975e-05, + "loss": 0.3755, + "step": 1723000 + }, + { + "epoch": 1.03, + "learning_rate": 4.880626137731341e-05, + "loss": 0.3887, + "step": 1723500 + }, + { + "epoch": 1.03, + "learning_rate": 4.880416141175285e-05, + "loss": 0.3837, + "step": 1724000 + }, + { + "epoch": 1.03, + "learning_rate": 4.880206144619228e-05, + "loss": 0.3811, + "step": 1724500 + }, + { + "epoch": 1.03, + "learning_rate": 4.8799965680562836e-05, + "loss": 0.3925, + "step": 1725000 + }, + { + "epoch": 1.03, + "learning_rate": 4.8797865715002276e-05, + "loss": 0.3876, + "step": 1725500 + }, + { + "epoch": 1.03, + "learning_rate": 4.879576574944171e-05, + "loss": 0.3947, + "step": 1726000 + }, + { + "epoch": 1.04, + "learning_rate": 4.879366578388114e-05, + "loss": 0.3963, + "step": 1726500 + }, + { + "epoch": 1.04, + "learning_rate": 4.879156581832058e-05, + "loss": 0.3801, + "step": 1727000 + }, + { + "epoch": 1.04, + "learning_rate": 4.878946585276002e-05, + "loss": 0.3892, + "step": 1727500 + }, + { + "epoch": 1.04, + "learning_rate": 4.878736588719945e-05, + "loss": 0.3846, + "step": 1728000 + }, + { + "epoch": 1.04, + "learning_rate": 4.8785270121570004e-05, + "loss": 0.392, + "step": 1728500 + }, + { + "epoch": 1.04, + "learning_rate": 4.8783170156009444e-05, + "loss": 0.3906, + "step": 1729000 + }, + { + "epoch": 1.04, + "learning_rate": 4.878107019044888e-05, + "loss": 0.3756, + "step": 1729500 + }, + { + "epoch": 1.04, + "learning_rate": 4.877897022488831e-05, + "loss": 0.3881, + "step": 1730000 + }, + { + "epoch": 1.04, + "learning_rate": 4.877687025932775e-05, + "loss": 0.3846, + "step": 1730500 + }, + { + "epoch": 1.04, + "learning_rate": 4.877477029376718e-05, + "loss": 0.3888, + "step": 1731000 + }, + { + "epoch": 1.04, + "learning_rate": 4.877267032820661e-05, + "loss": 0.3821, + "step": 1731500 + }, + { + "epoch": 1.04, + "learning_rate": 4.877057456257717e-05, + "loss": 0.399, + "step": 1732000 + }, + { + "epoch": 1.04, + "learning_rate": 4.876847459701661e-05, + "loss": 0.3959, + "step": 1732500 + }, + { + "epoch": 1.04, + "learning_rate": 4.876637463145604e-05, + "loss": 0.3828, + "step": 1733000 + }, + { + "epoch": 1.04, + "learning_rate": 4.876427466589548e-05, + "loss": 0.3772, + "step": 1733500 + }, + { + "epoch": 1.04, + "learning_rate": 4.876217890026604e-05, + "loss": 0.3855, + "step": 1734000 + }, + { + "epoch": 1.04, + "learning_rate": 4.876007893470547e-05, + "loss": 0.3872, + "step": 1734500 + }, + { + "epoch": 1.04, + "learning_rate": 4.8757978969144906e-05, + "loss": 0.3823, + "step": 1735000 + }, + { + "epoch": 1.04, + "learning_rate": 4.875587900358434e-05, + "loss": 0.3869, + "step": 1735500 + }, + { + "epoch": 1.04, + "learning_rate": 4.87537832379549e-05, + "loss": 0.391, + "step": 1736000 + }, + { + "epoch": 1.04, + "learning_rate": 4.8751683272394333e-05, + "loss": 0.3868, + "step": 1736500 + }, + { + "epoch": 1.04, + "learning_rate": 4.874958330683377e-05, + "loss": 0.3862, + "step": 1737000 + }, + { + "epoch": 1.04, + "learning_rate": 4.874748334127321e-05, + "loss": 0.383, + "step": 1737500 + }, + { + "epoch": 1.04, + "learning_rate": 4.8745383375712634e-05, + "loss": 0.3899, + "step": 1738000 + }, + { + "epoch": 1.04, + "learning_rate": 4.874328341015207e-05, + "loss": 0.3857, + "step": 1738500 + }, + { + "epoch": 1.04, + "learning_rate": 4.874118344459151e-05, + "loss": 0.3899, + "step": 1739000 + }, + { + "epoch": 1.04, + "learning_rate": 4.873908347903094e-05, + "loss": 0.3954, + "step": 1739500 + }, + { + "epoch": 1.04, + "learning_rate": 4.87369877134015e-05, + "loss": 0.3873, + "step": 1740000 + }, + { + "epoch": 1.04, + "learning_rate": 4.8734887747840935e-05, + "loss": 0.3944, + "step": 1740500 + }, + { + "epoch": 1.04, + "learning_rate": 4.873278778228037e-05, + "loss": 0.393, + "step": 1741000 + }, + { + "epoch": 1.04, + "learning_rate": 4.873069201665093e-05, + "loss": 0.3877, + "step": 1741500 + }, + { + "epoch": 1.04, + "learning_rate": 4.872859625102148e-05, + "loss": 0.3905, + "step": 1742000 + }, + { + "epoch": 1.04, + "learning_rate": 4.8726496285460916e-05, + "loss": 0.395, + "step": 1742500 + }, + { + "epoch": 1.04, + "learning_rate": 4.8724396319900356e-05, + "loss": 0.3855, + "step": 1743000 + }, + { + "epoch": 1.05, + "learning_rate": 4.872229635433979e-05, + "loss": 0.3909, + "step": 1743500 + }, + { + "epoch": 1.05, + "learning_rate": 4.872019638877922e-05, + "loss": 0.3915, + "step": 1744000 + }, + { + "epoch": 1.05, + "learning_rate": 4.871809642321866e-05, + "loss": 0.3826, + "step": 1744500 + }, + { + "epoch": 1.05, + "learning_rate": 4.871599645765809e-05, + "loss": 0.3906, + "step": 1745000 + }, + { + "epoch": 1.05, + "learning_rate": 4.871389649209753e-05, + "loss": 0.3871, + "step": 1745500 + }, + { + "epoch": 1.05, + "learning_rate": 4.871179652653696e-05, + "loss": 0.385, + "step": 1746000 + }, + { + "epoch": 1.05, + "learning_rate": 4.87096965609764e-05, + "loss": 0.3826, + "step": 1746500 + }, + { + "epoch": 1.05, + "learning_rate": 4.870760079534696e-05, + "loss": 0.3924, + "step": 1747000 + }, + { + "epoch": 1.05, + "learning_rate": 4.870550082978639e-05, + "loss": 0.401, + "step": 1747500 + }, + { + "epoch": 1.05, + "learning_rate": 4.8703400864225824e-05, + "loss": 0.3897, + "step": 1748000 + }, + { + "epoch": 1.05, + "learning_rate": 4.8701305098596384e-05, + "loss": 0.3883, + "step": 1748500 + }, + { + "epoch": 1.05, + "learning_rate": 4.869920513303582e-05, + "loss": 0.3856, + "step": 1749000 + }, + { + "epoch": 1.05, + "learning_rate": 4.869710516747526e-05, + "loss": 0.3912, + "step": 1749500 + }, + { + "epoch": 1.05, + "learning_rate": 4.8695005201914685e-05, + "loss": 0.3805, + "step": 1750000 + }, + { + "epoch": 1.05, + "learning_rate": 4.869290523635412e-05, + "loss": 0.3994, + "step": 1750500 + }, + { + "epoch": 1.05, + "learning_rate": 4.869080527079356e-05, + "loss": 0.3863, + "step": 1751000 + }, + { + "epoch": 1.05, + "learning_rate": 4.868870530523299e-05, + "loss": 0.3833, + "step": 1751500 + }, + { + "epoch": 1.05, + "learning_rate": 4.8686605339672425e-05, + "loss": 0.3917, + "step": 1752000 + }, + { + "epoch": 1.05, + "learning_rate": 4.8684505374111866e-05, + "loss": 0.3894, + "step": 1752500 + }, + { + "epoch": 1.05, + "learning_rate": 4.86824054085513e-05, + "loss": 0.3819, + "step": 1753000 + }, + { + "epoch": 1.05, + "learning_rate": 4.868030964292185e-05, + "loss": 0.3797, + "step": 1753500 + }, + { + "epoch": 1.05, + "learning_rate": 4.867820967736129e-05, + "loss": 0.3961, + "step": 1754000 + }, + { + "epoch": 1.05, + "learning_rate": 4.8676109711800726e-05, + "loss": 0.3866, + "step": 1754500 + }, + { + "epoch": 1.05, + "learning_rate": 4.867400974624016e-05, + "loss": 0.3865, + "step": 1755000 + }, + { + "epoch": 1.05, + "learning_rate": 4.86719097806796e-05, + "loss": 0.3816, + "step": 1755500 + }, + { + "epoch": 1.05, + "learning_rate": 4.8669809815119034e-05, + "loss": 0.376, + "step": 1756000 + }, + { + "epoch": 1.05, + "learning_rate": 4.866770984955847e-05, + "loss": 0.3793, + "step": 1756500 + }, + { + "epoch": 1.05, + "learning_rate": 4.866560988399791e-05, + "loss": 0.3793, + "step": 1757000 + }, + { + "epoch": 1.05, + "learning_rate": 4.866351411836846e-05, + "loss": 0.3906, + "step": 1757500 + }, + { + "epoch": 1.05, + "learning_rate": 4.8661414152807894e-05, + "loss": 0.3885, + "step": 1758000 + }, + { + "epoch": 1.05, + "learning_rate": 4.865931418724733e-05, + "loss": 0.3825, + "step": 1758500 + }, + { + "epoch": 1.05, + "learning_rate": 4.865721842161788e-05, + "loss": 0.3931, + "step": 1759000 + }, + { + "epoch": 1.05, + "learning_rate": 4.865511845605732e-05, + "loss": 0.3815, + "step": 1759500 + }, + { + "epoch": 1.06, + "learning_rate": 4.8653018490496755e-05, + "loss": 0.3938, + "step": 1760000 + }, + { + "epoch": 1.06, + "learning_rate": 4.865091852493619e-05, + "loss": 0.3911, + "step": 1760500 + }, + { + "epoch": 1.06, + "learning_rate": 4.864882275930675e-05, + "loss": 0.3852, + "step": 1761000 + }, + { + "epoch": 1.06, + "learning_rate": 4.864672699367731e-05, + "loss": 0.3847, + "step": 1761500 + }, + { + "epoch": 1.06, + "learning_rate": 4.8644627028116736e-05, + "loss": 0.3929, + "step": 1762000 + }, + { + "epoch": 1.06, + "learning_rate": 4.864252706255617e-05, + "loss": 0.3846, + "step": 1762500 + }, + { + "epoch": 1.06, + "learning_rate": 4.864043129692673e-05, + "loss": 0.3799, + "step": 1763000 + }, + { + "epoch": 1.06, + "learning_rate": 4.863833133136617e-05, + "loss": 0.3945, + "step": 1763500 + }, + { + "epoch": 1.06, + "learning_rate": 4.8636231365805597e-05, + "loss": 0.3853, + "step": 1764000 + }, + { + "epoch": 1.06, + "learning_rate": 4.863413140024503e-05, + "loss": 0.3918, + "step": 1764500 + }, + { + "epoch": 1.06, + "learning_rate": 4.863203143468447e-05, + "loss": 0.391, + "step": 1765000 + }, + { + "epoch": 1.06, + "learning_rate": 4.8629931469123904e-05, + "loss": 0.3959, + "step": 1765500 + }, + { + "epoch": 1.06, + "learning_rate": 4.862783150356334e-05, + "loss": 0.3926, + "step": 1766000 + }, + { + "epoch": 1.06, + "learning_rate": 4.862573153800278e-05, + "loss": 0.3802, + "step": 1766500 + }, + { + "epoch": 1.06, + "learning_rate": 4.862363157244221e-05, + "loss": 0.3799, + "step": 1767000 + }, + { + "epoch": 1.06, + "learning_rate": 4.8621531606881644e-05, + "loss": 0.3911, + "step": 1767500 + }, + { + "epoch": 1.06, + "learning_rate": 4.8619431641321085e-05, + "loss": 0.3793, + "step": 1768000 + }, + { + "epoch": 1.06, + "learning_rate": 4.861733167576052e-05, + "loss": 0.3913, + "step": 1768500 + }, + { + "epoch": 1.06, + "learning_rate": 4.861523171019995e-05, + "loss": 0.385, + "step": 1769000 + }, + { + "epoch": 1.06, + "learning_rate": 4.8613131744639385e-05, + "loss": 0.391, + "step": 1769500 + }, + { + "epoch": 1.06, + "learning_rate": 4.8611035979009945e-05, + "loss": 0.3795, + "step": 1770000 + }, + { + "epoch": 1.06, + "learning_rate": 4.860893601344938e-05, + "loss": 0.3885, + "step": 1770500 + }, + { + "epoch": 1.06, + "learning_rate": 4.860683604788882e-05, + "loss": 0.3787, + "step": 1771000 + }, + { + "epoch": 1.06, + "learning_rate": 4.860473608232825e-05, + "loss": 0.3874, + "step": 1771500 + }, + { + "epoch": 1.06, + "learning_rate": 4.860263611676768e-05, + "loss": 0.3908, + "step": 1772000 + }, + { + "epoch": 1.06, + "learning_rate": 4.860053615120712e-05, + "loss": 0.3824, + "step": 1772500 + }, + { + "epoch": 1.06, + "learning_rate": 4.859843618564655e-05, + "loss": 0.3894, + "step": 1773000 + }, + { + "epoch": 1.06, + "learning_rate": 4.8596336220085986e-05, + "loss": 0.3823, + "step": 1773500 + }, + { + "epoch": 1.06, + "learning_rate": 4.859424045445655e-05, + "loss": 0.3939, + "step": 1774000 + }, + { + "epoch": 1.06, + "learning_rate": 4.85921446888271e-05, + "loss": 0.3903, + "step": 1774500 + }, + { + "epoch": 1.06, + "learning_rate": 4.859004472326654e-05, + "loss": 0.3919, + "step": 1775000 + }, + { + "epoch": 1.06, + "learning_rate": 4.8587944757705974e-05, + "loss": 0.3901, + "step": 1775500 + }, + { + "epoch": 1.06, + "learning_rate": 4.858584479214541e-05, + "loss": 0.3819, + "step": 1776000 + }, + { + "epoch": 1.07, + "learning_rate": 4.858374482658485e-05, + "loss": 0.3833, + "step": 1776500 + }, + { + "epoch": 1.07, + "learning_rate": 4.8581644861024274e-05, + "loss": 0.4008, + "step": 1777000 + }, + { + "epoch": 1.07, + "learning_rate": 4.8579549095394835e-05, + "loss": 0.3826, + "step": 1777500 + }, + { + "epoch": 1.07, + "learning_rate": 4.8577449129834275e-05, + "loss": 0.3932, + "step": 1778000 + }, + { + "epoch": 1.07, + "learning_rate": 4.857534916427371e-05, + "loss": 0.3849, + "step": 1778500 + }, + { + "epoch": 1.07, + "learning_rate": 4.8573249198713135e-05, + "loss": 0.3826, + "step": 1779000 + }, + { + "epoch": 1.07, + "learning_rate": 4.8571149233152575e-05, + "loss": 0.3908, + "step": 1779500 + }, + { + "epoch": 1.07, + "learning_rate": 4.8569053467523136e-05, + "loss": 0.3839, + "step": 1780000 + }, + { + "epoch": 1.07, + "learning_rate": 4.856695350196257e-05, + "loss": 0.3907, + "step": 1780500 + }, + { + "epoch": 1.07, + "learning_rate": 4.8564853536402e-05, + "loss": 0.3857, + "step": 1781000 + }, + { + "epoch": 1.07, + "learning_rate": 4.8562753570841436e-05, + "loss": 0.384, + "step": 1781500 + }, + { + "epoch": 1.07, + "learning_rate": 4.856065360528087e-05, + "loss": 0.3784, + "step": 1782000 + }, + { + "epoch": 1.07, + "learning_rate": 4.85585536397203e-05, + "loss": 0.3901, + "step": 1782500 + }, + { + "epoch": 1.07, + "learning_rate": 4.855645787409086e-05, + "loss": 0.3793, + "step": 1783000 + }, + { + "epoch": 1.07, + "learning_rate": 4.8554357908530303e-05, + "loss": 0.3789, + "step": 1783500 + }, + { + "epoch": 1.07, + "learning_rate": 4.855225794296973e-05, + "loss": 0.384, + "step": 1784000 + }, + { + "epoch": 1.07, + "learning_rate": 4.855015797740917e-05, + "loss": 0.3803, + "step": 1784500 + }, + { + "epoch": 1.07, + "learning_rate": 4.8548058011848604e-05, + "loss": 0.3828, + "step": 1785000 + }, + { + "epoch": 1.07, + "learning_rate": 4.8545966446150284e-05, + "loss": 0.4057, + "step": 1785500 + }, + { + "epoch": 1.07, + "learning_rate": 4.854387068052084e-05, + "loss": 0.4014, + "step": 1786000 + }, + { + "epoch": 1.07, + "learning_rate": 4.854177071496027e-05, + "loss": 0.383, + "step": 1786500 + }, + { + "epoch": 1.07, + "learning_rate": 4.8539670749399705e-05, + "loss": 0.3803, + "step": 1787000 + }, + { + "epoch": 1.07, + "learning_rate": 4.8537570783839145e-05, + "loss": 0.379, + "step": 1787500 + }, + { + "epoch": 1.07, + "learning_rate": 4.853547081827858e-05, + "loss": 0.3838, + "step": 1788000 + }, + { + "epoch": 1.07, + "learning_rate": 4.853337085271801e-05, + "loss": 0.382, + "step": 1788500 + }, + { + "epoch": 1.07, + "learning_rate": 4.853127088715745e-05, + "loss": 0.3894, + "step": 1789000 + }, + { + "epoch": 1.07, + "learning_rate": 4.8529170921596886e-05, + "loss": 0.3936, + "step": 1789500 + }, + { + "epoch": 1.07, + "learning_rate": 4.852707095603632e-05, + "loss": 0.3914, + "step": 1790000 + }, + { + "epoch": 1.07, + "learning_rate": 4.852497099047576e-05, + "loss": 0.3938, + "step": 1790500 + }, + { + "epoch": 1.07, + "learning_rate": 4.8522871024915186e-05, + "loss": 0.3932, + "step": 1791000 + }, + { + "epoch": 1.07, + "learning_rate": 4.8520771059354626e-05, + "loss": 0.3812, + "step": 1791500 + }, + { + "epoch": 1.07, + "learning_rate": 4.851867109379406e-05, + "loss": 0.3874, + "step": 1792000 + }, + { + "epoch": 1.07, + "learning_rate": 4.851657532816462e-05, + "loss": 0.3901, + "step": 1792500 + }, + { + "epoch": 1.07, + "learning_rate": 4.8514475362604054e-05, + "loss": 0.3874, + "step": 1793000 + }, + { + "epoch": 1.08, + "learning_rate": 4.851237539704349e-05, + "loss": 0.3868, + "step": 1793500 + }, + { + "epoch": 1.08, + "learning_rate": 4.851027543148292e-05, + "loss": 0.3923, + "step": 1794000 + }, + { + "epoch": 1.08, + "learning_rate": 4.8508175465922354e-05, + "loss": 0.3934, + "step": 1794500 + }, + { + "epoch": 1.08, + "learning_rate": 4.8506075500361794e-05, + "loss": 0.3794, + "step": 1795000 + }, + { + "epoch": 1.08, + "learning_rate": 4.850397553480123e-05, + "loss": 0.3842, + "step": 1795500 + }, + { + "epoch": 1.08, + "learning_rate": 4.850187556924066e-05, + "loss": 0.3918, + "step": 1796000 + }, + { + "epoch": 1.08, + "learning_rate": 4.8499779803611215e-05, + "loss": 0.3919, + "step": 1796500 + }, + { + "epoch": 1.08, + "learning_rate": 4.8497679838050655e-05, + "loss": 0.3865, + "step": 1797000 + }, + { + "epoch": 1.08, + "learning_rate": 4.8495584072421215e-05, + "loss": 0.3809, + "step": 1797500 + }, + { + "epoch": 1.08, + "learning_rate": 4.849348410686065e-05, + "loss": 0.3839, + "step": 1798000 + }, + { + "epoch": 1.08, + "learning_rate": 4.849138414130008e-05, + "loss": 0.378, + "step": 1798500 + }, + { + "epoch": 1.08, + "learning_rate": 4.8489284175739516e-05, + "loss": 0.3871, + "step": 1799000 + }, + { + "epoch": 1.08, + "learning_rate": 4.8487188410110076e-05, + "loss": 0.3898, + "step": 1799500 + }, + { + "epoch": 1.08, + "learning_rate": 4.848508844454951e-05, + "loss": 0.3869, + "step": 1800000 + }, + { + "epoch": 1.08, + "eval_loss": 0.3730049133300781, + "eval_runtime": 1122.2369, + "eval_samples_per_second": 469.348, + "eval_steps_per_second": 78.225, + "step": 1800000 + }, + { + "epoch": 1.08, + "learning_rate": 4.848298847898894e-05, + "loss": 0.3832, + "step": 1800500 + }, + { + "epoch": 1.08, + "learning_rate": 4.8480888513428376e-05, + "loss": 0.3851, + "step": 1801000 + }, + { + "epoch": 1.08, + "learning_rate": 4.847878854786781e-05, + "loss": 0.3911, + "step": 1801500 + }, + { + "epoch": 1.08, + "learning_rate": 4.847668858230725e-05, + "loss": 0.3917, + "step": 1802000 + }, + { + "epoch": 1.08, + "learning_rate": 4.847459281667781e-05, + "loss": 0.3871, + "step": 1802500 + }, + { + "epoch": 1.08, + "learning_rate": 4.847249285111724e-05, + "loss": 0.3916, + "step": 1803000 + }, + { + "epoch": 1.08, + "learning_rate": 4.847039288555667e-05, + "loss": 0.3815, + "step": 1803500 + }, + { + "epoch": 1.08, + "learning_rate": 4.846829711992723e-05, + "loss": 0.3927, + "step": 1804000 + }, + { + "epoch": 1.08, + "learning_rate": 4.846620135429779e-05, + "loss": 0.3978, + "step": 1804500 + }, + { + "epoch": 1.08, + "learning_rate": 4.8464101388737225e-05, + "loss": 0.3767, + "step": 1805000 + }, + { + "epoch": 1.08, + "learning_rate": 4.846200142317666e-05, + "loss": 0.38, + "step": 1805500 + }, + { + "epoch": 1.08, + "learning_rate": 4.84599014576161e-05, + "loss": 0.3917, + "step": 1806000 + }, + { + "epoch": 1.08, + "learning_rate": 4.845780149205553e-05, + "loss": 0.391, + "step": 1806500 + }, + { + "epoch": 1.08, + "learning_rate": 4.8455701526494965e-05, + "loss": 0.3864, + "step": 1807000 + }, + { + "epoch": 1.08, + "learning_rate": 4.8453601560934406e-05, + "loss": 0.3811, + "step": 1807500 + }, + { + "epoch": 1.08, + "learning_rate": 4.845150159537383e-05, + "loss": 0.3821, + "step": 1808000 + }, + { + "epoch": 1.08, + "learning_rate": 4.8449401629813266e-05, + "loss": 0.3937, + "step": 1808500 + }, + { + "epoch": 1.08, + "learning_rate": 4.8447301664252706e-05, + "loss": 0.3879, + "step": 1809000 + }, + { + "epoch": 1.08, + "learning_rate": 4.844520169869214e-05, + "loss": 0.3912, + "step": 1809500 + }, + { + "epoch": 1.09, + "learning_rate": 4.844310173313157e-05, + "loss": 0.3915, + "step": 1810000 + }, + { + "epoch": 1.09, + "learning_rate": 4.844100176757101e-05, + "loss": 0.3805, + "step": 1810500 + }, + { + "epoch": 1.09, + "learning_rate": 4.8438901802010447e-05, + "loss": 0.3835, + "step": 1811000 + }, + { + "epoch": 1.09, + "learning_rate": 4.8436806036381e-05, + "loss": 0.3797, + "step": 1811500 + }, + { + "epoch": 1.09, + "learning_rate": 4.843470607082044e-05, + "loss": 0.3893, + "step": 1812000 + }, + { + "epoch": 1.09, + "learning_rate": 4.8432606105259874e-05, + "loss": 0.3827, + "step": 1812500 + }, + { + "epoch": 1.09, + "learning_rate": 4.843050613969931e-05, + "loss": 0.3751, + "step": 1813000 + }, + { + "epoch": 1.09, + "learning_rate": 4.842840617413875e-05, + "loss": 0.3842, + "step": 1813500 + }, + { + "epoch": 1.09, + "learning_rate": 4.842630620857818e-05, + "loss": 0.3922, + "step": 1814000 + }, + { + "epoch": 1.09, + "learning_rate": 4.8424206243017614e-05, + "loss": 0.3774, + "step": 1814500 + }, + { + "epoch": 1.09, + "learning_rate": 4.8422106277457055e-05, + "loss": 0.3884, + "step": 1815000 + }, + { + "epoch": 1.09, + "learning_rate": 4.842001051182761e-05, + "loss": 0.3936, + "step": 1815500 + }, + { + "epoch": 1.09, + "learning_rate": 4.841791054626704e-05, + "loss": 0.388, + "step": 1816000 + }, + { + "epoch": 1.09, + "learning_rate": 4.8415810580706475e-05, + "loss": 0.3834, + "step": 1816500 + }, + { + "epoch": 1.09, + "learning_rate": 4.841371481507703e-05, + "loss": 0.381, + "step": 1817000 + }, + { + "epoch": 1.09, + "learning_rate": 4.841161484951647e-05, + "loss": 0.3799, + "step": 1817500 + }, + { + "epoch": 1.09, + "learning_rate": 4.84095148839559e-05, + "loss": 0.3851, + "step": 1818000 + }, + { + "epoch": 1.09, + "learning_rate": 4.8407414918395336e-05, + "loss": 0.385, + "step": 1818500 + }, + { + "epoch": 1.09, + "learning_rate": 4.8405314952834776e-05, + "loss": 0.3944, + "step": 1819000 + }, + { + "epoch": 1.09, + "learning_rate": 4.840321498727421e-05, + "loss": 0.3894, + "step": 1819500 + }, + { + "epoch": 1.09, + "learning_rate": 4.840111502171364e-05, + "loss": 0.3807, + "step": 1820000 + }, + { + "epoch": 1.09, + "learning_rate": 4.8399015056153076e-05, + "loss": 0.3926, + "step": 1820500 + }, + { + "epoch": 1.09, + "learning_rate": 4.839691509059251e-05, + "loss": 0.3945, + "step": 1821000 + }, + { + "epoch": 1.09, + "learning_rate": 4.839481512503195e-05, + "loss": 0.3791, + "step": 1821500 + }, + { + "epoch": 1.09, + "learning_rate": 4.8392715159471384e-05, + "loss": 0.3839, + "step": 1822000 + }, + { + "epoch": 1.09, + "learning_rate": 4.8390619393841944e-05, + "loss": 0.3792, + "step": 1822500 + }, + { + "epoch": 1.09, + "learning_rate": 4.838851942828137e-05, + "loss": 0.3857, + "step": 1823000 + }, + { + "epoch": 1.09, + "learning_rate": 4.838641946272081e-05, + "loss": 0.3857, + "step": 1823500 + }, + { + "epoch": 1.09, + "learning_rate": 4.8384319497160244e-05, + "loss": 0.3929, + "step": 1824000 + }, + { + "epoch": 1.09, + "learning_rate": 4.838221953159968e-05, + "loss": 0.3916, + "step": 1824500 + }, + { + "epoch": 1.09, + "learning_rate": 4.838012796590136e-05, + "loss": 0.3908, + "step": 1825000 + }, + { + "epoch": 1.09, + "learning_rate": 4.837802800034079e-05, + "loss": 0.3962, + "step": 1825500 + }, + { + "epoch": 1.09, + "learning_rate": 4.837592803478023e-05, + "loss": 0.3875, + "step": 1826000 + }, + { + "epoch": 1.1, + "learning_rate": 4.8373828069219665e-05, + "loss": 0.4018, + "step": 1826500 + }, + { + "epoch": 1.1, + "learning_rate": 4.83717281036591e-05, + "loss": 0.387, + "step": 1827000 + }, + { + "epoch": 1.1, + "learning_rate": 4.836962813809853e-05, + "loss": 0.3838, + "step": 1827500 + }, + { + "epoch": 1.1, + "learning_rate": 4.8367528172537966e-05, + "loss": 0.3786, + "step": 1828000 + }, + { + "epoch": 1.1, + "learning_rate": 4.8365428206977406e-05, + "loss": 0.3841, + "step": 1828500 + }, + { + "epoch": 1.1, + "learning_rate": 4.836332824141684e-05, + "loss": 0.3852, + "step": 1829000 + }, + { + "epoch": 1.1, + "learning_rate": 4.836122827585627e-05, + "loss": 0.3851, + "step": 1829500 + }, + { + "epoch": 1.1, + "learning_rate": 4.835912831029571e-05, + "loss": 0.3858, + "step": 1830000 + }, + { + "epoch": 1.1, + "learning_rate": 4.835703254466627e-05, + "loss": 0.3832, + "step": 1830500 + }, + { + "epoch": 1.1, + "learning_rate": 4.83549325791057e-05, + "loss": 0.3832, + "step": 1831000 + }, + { + "epoch": 1.1, + "learning_rate": 4.8352832613545134e-05, + "loss": 0.3763, + "step": 1831500 + }, + { + "epoch": 1.1, + "learning_rate": 4.8350732647984574e-05, + "loss": 0.3858, + "step": 1832000 + }, + { + "epoch": 1.1, + "learning_rate": 4.834863268242401e-05, + "loss": 0.3895, + "step": 1832500 + }, + { + "epoch": 1.1, + "learning_rate": 4.834653271686344e-05, + "loss": 0.3862, + "step": 1833000 + }, + { + "epoch": 1.1, + "learning_rate": 4.834443275130288e-05, + "loss": 0.3824, + "step": 1833500 + }, + { + "epoch": 1.1, + "learning_rate": 4.8342332785742315e-05, + "loss": 0.3773, + "step": 1834000 + }, + { + "epoch": 1.1, + "learning_rate": 4.8340241220043995e-05, + "loss": 0.3891, + "step": 1834500 + }, + { + "epoch": 1.1, + "learning_rate": 4.833814125448342e-05, + "loss": 0.3788, + "step": 1835000 + }, + { + "epoch": 1.1, + "learning_rate": 4.833604128892286e-05, + "loss": 0.3841, + "step": 1835500 + }, + { + "epoch": 1.1, + "learning_rate": 4.8333941323362295e-05, + "loss": 0.3895, + "step": 1836000 + }, + { + "epoch": 1.1, + "learning_rate": 4.833184135780173e-05, + "loss": 0.3887, + "step": 1836500 + }, + { + "epoch": 1.1, + "learning_rate": 4.832974139224117e-05, + "loss": 0.3787, + "step": 1837000 + }, + { + "epoch": 1.1, + "learning_rate": 4.832764562661172e-05, + "loss": 0.3877, + "step": 1837500 + }, + { + "epoch": 1.1, + "learning_rate": 4.8325545661051156e-05, + "loss": 0.3863, + "step": 1838000 + }, + { + "epoch": 1.1, + "learning_rate": 4.832344569549059e-05, + "loss": 0.3794, + "step": 1838500 + }, + { + "epoch": 1.1, + "learning_rate": 4.832134572993003e-05, + "loss": 0.3775, + "step": 1839000 + }, + { + "epoch": 1.1, + "learning_rate": 4.831924576436946e-05, + "loss": 0.3843, + "step": 1839500 + }, + { + "epoch": 1.1, + "learning_rate": 4.83171457988089e-05, + "loss": 0.3916, + "step": 1840000 + }, + { + "epoch": 1.1, + "learning_rate": 4.831505003317945e-05, + "loss": 0.3876, + "step": 1840500 + }, + { + "epoch": 1.1, + "learning_rate": 4.831295006761889e-05, + "loss": 0.3765, + "step": 1841000 + }, + { + "epoch": 1.1, + "learning_rate": 4.8310850102058324e-05, + "loss": 0.3802, + "step": 1841500 + }, + { + "epoch": 1.1, + "learning_rate": 4.830875013649776e-05, + "loss": 0.3904, + "step": 1842000 + }, + { + "epoch": 1.1, + "learning_rate": 4.83066501709372e-05, + "loss": 0.3806, + "step": 1842500 + }, + { + "epoch": 1.1, + "learning_rate": 4.830455020537663e-05, + "loss": 0.3879, + "step": 1843000 + }, + { + "epoch": 1.11, + "learning_rate": 4.8302454439747185e-05, + "loss": 0.3845, + "step": 1843500 + }, + { + "epoch": 1.11, + "learning_rate": 4.8300354474186625e-05, + "loss": 0.3817, + "step": 1844000 + }, + { + "epoch": 1.11, + "learning_rate": 4.829825450862606e-05, + "loss": 0.3811, + "step": 1844500 + }, + { + "epoch": 1.11, + "learning_rate": 4.829615454306549e-05, + "loss": 0.3963, + "step": 1845000 + }, + { + "epoch": 1.11, + "learning_rate": 4.829405457750493e-05, + "loss": 0.38, + "step": 1845500 + }, + { + "epoch": 1.11, + "learning_rate": 4.8291954611944366e-05, + "loss": 0.3761, + "step": 1846000 + }, + { + "epoch": 1.11, + "learning_rate": 4.828985884631492e-05, + "loss": 0.3914, + "step": 1846500 + }, + { + "epoch": 1.11, + "learning_rate": 4.828775888075435e-05, + "loss": 0.3886, + "step": 1847000 + }, + { + "epoch": 1.11, + "learning_rate": 4.828565891519379e-05, + "loss": 0.3822, + "step": 1847500 + }, + { + "epoch": 1.11, + "learning_rate": 4.8283558949633226e-05, + "loss": 0.3911, + "step": 1848000 + }, + { + "epoch": 1.11, + "learning_rate": 4.828145898407266e-05, + "loss": 0.3821, + "step": 1848500 + }, + { + "epoch": 1.11, + "learning_rate": 4.82793590185121e-05, + "loss": 0.3819, + "step": 1849000 + }, + { + "epoch": 1.11, + "learning_rate": 4.8277263252882654e-05, + "loss": 0.3834, + "step": 1849500 + }, + { + "epoch": 1.11, + "learning_rate": 4.827516328732209e-05, + "loss": 0.3818, + "step": 1850000 + }, + { + "epoch": 1.11, + "learning_rate": 4.827306332176152e-05, + "loss": 0.3919, + "step": 1850500 + }, + { + "epoch": 1.11, + "learning_rate": 4.827096335620096e-05, + "loss": 0.3839, + "step": 1851000 + }, + { + "epoch": 1.11, + "learning_rate": 4.8268867590571514e-05, + "loss": 0.3853, + "step": 1851500 + }, + { + "epoch": 1.11, + "learning_rate": 4.826676762501095e-05, + "loss": 0.3892, + "step": 1852000 + }, + { + "epoch": 1.11, + "learning_rate": 4.826466765945039e-05, + "loss": 0.3789, + "step": 1852500 + }, + { + "epoch": 1.11, + "learning_rate": 4.826256769388982e-05, + "loss": 0.3776, + "step": 1853000 + }, + { + "epoch": 1.11, + "learning_rate": 4.8260467728329255e-05, + "loss": 0.3923, + "step": 1853500 + }, + { + "epoch": 1.11, + "learning_rate": 4.825837196269981e-05, + "loss": 0.3838, + "step": 1854000 + }, + { + "epoch": 1.11, + "learning_rate": 4.825627199713925e-05, + "loss": 0.3759, + "step": 1854500 + }, + { + "epoch": 1.11, + "learning_rate": 4.82541762315098e-05, + "loss": 0.3762, + "step": 1855000 + }, + { + "epoch": 1.11, + "learning_rate": 4.8252076265949236e-05, + "loss": 0.3844, + "step": 1855500 + }, + { + "epoch": 1.11, + "learning_rate": 4.824997630038867e-05, + "loss": 0.394, + "step": 1856000 + }, + { + "epoch": 1.11, + "learning_rate": 4.824787633482811e-05, + "loss": 0.3876, + "step": 1856500 + }, + { + "epoch": 1.11, + "learning_rate": 4.824577636926754e-05, + "loss": 0.3875, + "step": 1857000 + }, + { + "epoch": 1.11, + "learning_rate": 4.824367640370698e-05, + "loss": 0.391, + "step": 1857500 + }, + { + "epoch": 1.11, + "learning_rate": 4.8241576438146417e-05, + "loss": 0.3835, + "step": 1858000 + }, + { + "epoch": 1.11, + "learning_rate": 4.823947647258585e-05, + "loss": 0.3869, + "step": 1858500 + }, + { + "epoch": 1.11, + "learning_rate": 4.8237380706956404e-05, + "loss": 0.3798, + "step": 1859000 + }, + { + "epoch": 1.11, + "learning_rate": 4.8235280741395844e-05, + "loss": 0.4017, + "step": 1859500 + }, + { + "epoch": 1.12, + "learning_rate": 4.823318077583528e-05, + "loss": 0.3846, + "step": 1860000 + }, + { + "epoch": 1.12, + "learning_rate": 4.823108081027471e-05, + "loss": 0.396, + "step": 1860500 + }, + { + "epoch": 1.12, + "learning_rate": 4.822898084471415e-05, + "loss": 0.3842, + "step": 1861000 + }, + { + "epoch": 1.12, + "learning_rate": 4.822688087915358e-05, + "loss": 0.382, + "step": 1861500 + }, + { + "epoch": 1.12, + "learning_rate": 4.822478511352414e-05, + "loss": 0.3866, + "step": 1862000 + }, + { + "epoch": 1.12, + "learning_rate": 4.822268514796357e-05, + "loss": 0.3776, + "step": 1862500 + }, + { + "epoch": 1.12, + "learning_rate": 4.822058518240301e-05, + "loss": 0.3908, + "step": 1863000 + }, + { + "epoch": 1.12, + "learning_rate": 4.8218485216842445e-05, + "loss": 0.3894, + "step": 1863500 + }, + { + "epoch": 1.12, + "learning_rate": 4.821638525128188e-05, + "loss": 0.3822, + "step": 1864000 + }, + { + "epoch": 1.12, + "learning_rate": 4.821428948565244e-05, + "loss": 0.388, + "step": 1864500 + }, + { + "epoch": 1.12, + "learning_rate": 4.821218952009187e-05, + "loss": 0.3909, + "step": 1865000 + }, + { + "epoch": 1.12, + "learning_rate": 4.8210089554531306e-05, + "loss": 0.381, + "step": 1865500 + }, + { + "epoch": 1.12, + "learning_rate": 4.8207989588970746e-05, + "loss": 0.3861, + "step": 1866000 + }, + { + "epoch": 1.12, + "learning_rate": 4.820588962341017e-05, + "loss": 0.3803, + "step": 1866500 + }, + { + "epoch": 1.12, + "learning_rate": 4.8203789657849606e-05, + "loss": 0.383, + "step": 1867000 + }, + { + "epoch": 1.12, + "learning_rate": 4.8201689692289047e-05, + "loss": 0.3778, + "step": 1867500 + }, + { + "epoch": 1.12, + "learning_rate": 4.819959392665961e-05, + "loss": 0.381, + "step": 1868000 + }, + { + "epoch": 1.12, + "learning_rate": 4.819749396109904e-05, + "loss": 0.3945, + "step": 1868500 + }, + { + "epoch": 1.12, + "learning_rate": 4.819539399553847e-05, + "loss": 0.3919, + "step": 1869000 + }, + { + "epoch": 1.12, + "learning_rate": 4.819329822990903e-05, + "loss": 0.3882, + "step": 1869500 + }, + { + "epoch": 1.12, + "learning_rate": 4.819119826434847e-05, + "loss": 0.3737, + "step": 1870000 + }, + { + "epoch": 1.12, + "learning_rate": 4.81890982987879e-05, + "loss": 0.3878, + "step": 1870500 + }, + { + "epoch": 1.12, + "learning_rate": 4.8186998333227335e-05, + "loss": 0.3902, + "step": 1871000 + }, + { + "epoch": 1.12, + "learning_rate": 4.818489836766677e-05, + "loss": 0.3831, + "step": 1871500 + }, + { + "epoch": 1.12, + "learning_rate": 4.81827984021062e-05, + "loss": 0.3848, + "step": 1872000 + }, + { + "epoch": 1.12, + "learning_rate": 4.818069843654564e-05, + "loss": 0.3829, + "step": 1872500 + }, + { + "epoch": 1.12, + "learning_rate": 4.8178598470985075e-05, + "loss": 0.3838, + "step": 1873000 + }, + { + "epoch": 1.12, + "learning_rate": 4.817649850542451e-05, + "loss": 0.3757, + "step": 1873500 + }, + { + "epoch": 1.12, + "learning_rate": 4.817440273979506e-05, + "loss": 0.3717, + "step": 1874000 + }, + { + "epoch": 1.12, + "learning_rate": 4.81723027742345e-05, + "loss": 0.3832, + "step": 1874500 + }, + { + "epoch": 1.12, + "learning_rate": 4.817020700860506e-05, + "loss": 0.3815, + "step": 1875000 + }, + { + "epoch": 1.12, + "learning_rate": 4.8168107043044496e-05, + "loss": 0.3904, + "step": 1875500 + }, + { + "epoch": 1.12, + "learning_rate": 4.816600707748392e-05, + "loss": 0.3786, + "step": 1876000 + }, + { + "epoch": 1.13, + "learning_rate": 4.816390711192336e-05, + "loss": 0.3805, + "step": 1876500 + }, + { + "epoch": 1.13, + "learning_rate": 4.81618071463628e-05, + "loss": 0.3818, + "step": 1877000 + }, + { + "epoch": 1.13, + "learning_rate": 4.815970718080223e-05, + "loss": 0.3885, + "step": 1877500 + }, + { + "epoch": 1.13, + "learning_rate": 4.815760721524167e-05, + "loss": 0.3829, + "step": 1878000 + }, + { + "epoch": 1.13, + "learning_rate": 4.8155507249681104e-05, + "loss": 0.3866, + "step": 1878500 + }, + { + "epoch": 1.13, + "learning_rate": 4.815340728412054e-05, + "loss": 0.383, + "step": 1879000 + }, + { + "epoch": 1.13, + "learning_rate": 4.815130731855998e-05, + "loss": 0.3857, + "step": 1879500 + }, + { + "epoch": 1.13, + "learning_rate": 4.814920735299941e-05, + "loss": 0.3828, + "step": 1880000 + }, + { + "epoch": 1.13, + "learning_rate": 4.8147107387438844e-05, + "loss": 0.3854, + "step": 1880500 + }, + { + "epoch": 1.13, + "learning_rate": 4.8145011621809405e-05, + "loss": 0.3746, + "step": 1881000 + }, + { + "epoch": 1.13, + "learning_rate": 4.814291585617996e-05, + "loss": 0.3822, + "step": 1881500 + }, + { + "epoch": 1.13, + "learning_rate": 4.814081589061939e-05, + "loss": 0.3891, + "step": 1882000 + }, + { + "epoch": 1.13, + "learning_rate": 4.8138715925058825e-05, + "loss": 0.396, + "step": 1882500 + }, + { + "epoch": 1.13, + "learning_rate": 4.8136615959498265e-05, + "loss": 0.3855, + "step": 1883000 + }, + { + "epoch": 1.13, + "learning_rate": 4.813452019386882e-05, + "loss": 0.3837, + "step": 1883500 + }, + { + "epoch": 1.13, + "learning_rate": 4.813242022830825e-05, + "loss": 0.388, + "step": 1884000 + }, + { + "epoch": 1.13, + "learning_rate": 4.8130320262747686e-05, + "loss": 0.3806, + "step": 1884500 + }, + { + "epoch": 1.13, + "learning_rate": 4.8128220297187126e-05, + "loss": 0.3927, + "step": 1885000 + }, + { + "epoch": 1.13, + "learning_rate": 4.812612033162656e-05, + "loss": 0.3793, + "step": 1885500 + }, + { + "epoch": 1.13, + "learning_rate": 4.812402036606599e-05, + "loss": 0.3837, + "step": 1886000 + }, + { + "epoch": 1.13, + "learning_rate": 4.812192040050543e-05, + "loss": 0.382, + "step": 1886500 + }, + { + "epoch": 1.13, + "learning_rate": 4.811982463487599e-05, + "loss": 0.3724, + "step": 1887000 + }, + { + "epoch": 1.13, + "learning_rate": 4.811772466931542e-05, + "loss": 0.384, + "step": 1887500 + }, + { + "epoch": 1.13, + "learning_rate": 4.811562470375486e-05, + "loss": 0.3807, + "step": 1888000 + }, + { + "epoch": 1.13, + "learning_rate": 4.8113524738194294e-05, + "loss": 0.3765, + "step": 1888500 + }, + { + "epoch": 1.13, + "learning_rate": 4.811142477263373e-05, + "loss": 0.4014, + "step": 1889000 + }, + { + "epoch": 1.13, + "learning_rate": 4.810932480707317e-05, + "loss": 0.3786, + "step": 1889500 + }, + { + "epoch": 1.13, + "learning_rate": 4.81072248415126e-05, + "loss": 0.3835, + "step": 1890000 + }, + { + "epoch": 1.13, + "learning_rate": 4.8105124875952035e-05, + "loss": 0.3775, + "step": 1890500 + }, + { + "epoch": 1.13, + "learning_rate": 4.810302911032259e-05, + "loss": 0.3855, + "step": 1891000 + }, + { + "epoch": 1.13, + "learning_rate": 4.810092914476203e-05, + "loss": 0.3814, + "step": 1891500 + }, + { + "epoch": 1.13, + "learning_rate": 4.809882917920146e-05, + "loss": 0.3844, + "step": 1892000 + }, + { + "epoch": 1.13, + "learning_rate": 4.8096729213640895e-05, + "loss": 0.3847, + "step": 1892500 + }, + { + "epoch": 1.13, + "learning_rate": 4.8094629248080336e-05, + "loss": 0.3794, + "step": 1893000 + }, + { + "epoch": 1.14, + "learning_rate": 4.809252928251976e-05, + "loss": 0.3843, + "step": 1893500 + }, + { + "epoch": 1.14, + "learning_rate": 4.809043351689032e-05, + "loss": 0.3811, + "step": 1894000 + }, + { + "epoch": 1.14, + "learning_rate": 4.8088333551329756e-05, + "loss": 0.3879, + "step": 1894500 + }, + { + "epoch": 1.14, + "learning_rate": 4.8086233585769196e-05, + "loss": 0.3712, + "step": 1895000 + }, + { + "epoch": 1.14, + "learning_rate": 4.808413362020863e-05, + "loss": 0.3842, + "step": 1895500 + }, + { + "epoch": 1.14, + "learning_rate": 4.808203365464806e-05, + "loss": 0.3808, + "step": 1896000 + }, + { + "epoch": 1.14, + "learning_rate": 4.8079937889018624e-05, + "loss": 0.3806, + "step": 1896500 + }, + { + "epoch": 1.14, + "learning_rate": 4.807783792345806e-05, + "loss": 0.382, + "step": 1897000 + }, + { + "epoch": 1.14, + "learning_rate": 4.807573795789749e-05, + "loss": 0.3868, + "step": 1897500 + }, + { + "epoch": 1.14, + "learning_rate": 4.8073637992336924e-05, + "loss": 0.3776, + "step": 1898000 + }, + { + "epoch": 1.14, + "learning_rate": 4.807153802677636e-05, + "loss": 0.3823, + "step": 1898500 + }, + { + "epoch": 1.14, + "learning_rate": 4.806944646107804e-05, + "loss": 0.3941, + "step": 1899000 + }, + { + "epoch": 1.14, + "learning_rate": 4.806734649551747e-05, + "loss": 0.3788, + "step": 1899500 + }, + { + "epoch": 1.14, + "learning_rate": 4.8065246529956905e-05, + "loss": 0.3895, + "step": 1900000 + }, + { + "epoch": 1.14, + "eval_loss": 0.36924830079078674, + "eval_runtime": 1118.768, + "eval_samples_per_second": 470.804, + "eval_steps_per_second": 78.468, + "step": 1900000 + }, + { + "epoch": 1.14, + "learning_rate": 4.8063146564396345e-05, + "loss": 0.3742, + "step": 1900500 + }, + { + "epoch": 1.14, + "learning_rate": 4.806104659883578e-05, + "loss": 0.3917, + "step": 1901000 + }, + { + "epoch": 1.14, + "learning_rate": 4.805895083320633e-05, + "loss": 0.3923, + "step": 1901500 + }, + { + "epoch": 1.14, + "learning_rate": 4.805685086764577e-05, + "loss": 0.3892, + "step": 1902000 + }, + { + "epoch": 1.14, + "learning_rate": 4.8054750902085206e-05, + "loss": 0.3879, + "step": 1902500 + }, + { + "epoch": 1.14, + "learning_rate": 4.805265093652464e-05, + "loss": 0.3825, + "step": 1903000 + }, + { + "epoch": 1.14, + "learning_rate": 4.805055097096408e-05, + "loss": 0.379, + "step": 1903500 + }, + { + "epoch": 1.14, + "learning_rate": 4.804845100540351e-05, + "loss": 0.3899, + "step": 1904000 + }, + { + "epoch": 1.14, + "learning_rate": 4.8046351039842946e-05, + "loss": 0.3952, + "step": 1904500 + }, + { + "epoch": 1.14, + "learning_rate": 4.804425107428239e-05, + "loss": 0.3812, + "step": 1905000 + }, + { + "epoch": 1.14, + "learning_rate": 4.804215110872181e-05, + "loss": 0.3904, + "step": 1905500 + }, + { + "epoch": 1.14, + "learning_rate": 4.8040055343092374e-05, + "loss": 0.3722, + "step": 1906000 + }, + { + "epoch": 1.14, + "learning_rate": 4.803795537753181e-05, + "loss": 0.3799, + "step": 1906500 + }, + { + "epoch": 1.14, + "learning_rate": 4.803585541197125e-05, + "loss": 0.3802, + "step": 1907000 + }, + { + "epoch": 1.14, + "learning_rate": 4.803375544641068e-05, + "loss": 0.3858, + "step": 1907500 + }, + { + "epoch": 1.14, + "learning_rate": 4.8031659680781234e-05, + "loss": 0.3761, + "step": 1908000 + }, + { + "epoch": 1.14, + "learning_rate": 4.802955971522067e-05, + "loss": 0.3877, + "step": 1908500 + }, + { + "epoch": 1.14, + "learning_rate": 4.802745974966011e-05, + "loss": 0.391, + "step": 1909000 + }, + { + "epoch": 1.14, + "learning_rate": 4.802535978409954e-05, + "loss": 0.3751, + "step": 1909500 + }, + { + "epoch": 1.15, + "learning_rate": 4.8023259818538975e-05, + "loss": 0.3732, + "step": 1910000 + }, + { + "epoch": 1.15, + "learning_rate": 4.802115985297841e-05, + "loss": 0.3749, + "step": 1910500 + }, + { + "epoch": 1.15, + "learning_rate": 4.801905988741784e-05, + "loss": 0.3757, + "step": 1911000 + }, + { + "epoch": 1.15, + "learning_rate": 4.801695992185728e-05, + "loss": 0.3848, + "step": 1911500 + }, + { + "epoch": 1.15, + "learning_rate": 4.801486415622784e-05, + "loss": 0.3764, + "step": 1912000 + }, + { + "epoch": 1.15, + "learning_rate": 4.801276419066727e-05, + "loss": 0.3785, + "step": 1912500 + }, + { + "epoch": 1.15, + "learning_rate": 4.801066842503783e-05, + "loss": 0.3768, + "step": 1913000 + }, + { + "epoch": 1.15, + "learning_rate": 4.800856845947726e-05, + "loss": 0.3748, + "step": 1913500 + }, + { + "epoch": 1.15, + "learning_rate": 4.80064684939167e-05, + "loss": 0.3885, + "step": 1914000 + }, + { + "epoch": 1.15, + "learning_rate": 4.800436852835614e-05, + "loss": 0.3809, + "step": 1914500 + }, + { + "epoch": 1.15, + "learning_rate": 4.8002268562795563e-05, + "loss": 0.3818, + "step": 1915000 + }, + { + "epoch": 1.15, + "learning_rate": 4.8000172797166124e-05, + "loss": 0.3903, + "step": 1915500 + }, + { + "epoch": 1.15, + "learning_rate": 4.7998072831605564e-05, + "loss": 0.3884, + "step": 1916000 + }, + { + "epoch": 1.15, + "learning_rate": 4.7995972866045e-05, + "loss": 0.3864, + "step": 1916500 + }, + { + "epoch": 1.15, + "learning_rate": 4.799387290048444e-05, + "loss": 0.3831, + "step": 1917000 + }, + { + "epoch": 1.15, + "learning_rate": 4.7991772934923864e-05, + "loss": 0.3859, + "step": 1917500 + }, + { + "epoch": 1.15, + "learning_rate": 4.7989677169294425e-05, + "loss": 0.3852, + "step": 1918000 + }, + { + "epoch": 1.15, + "learning_rate": 4.798757720373386e-05, + "loss": 0.3775, + "step": 1918500 + }, + { + "epoch": 1.15, + "learning_rate": 4.79854772381733e-05, + "loss": 0.3841, + "step": 1919000 + }, + { + "epoch": 1.15, + "learning_rate": 4.7983377272612725e-05, + "loss": 0.3954, + "step": 1919500 + }, + { + "epoch": 1.15, + "learning_rate": 4.798127730705216e-05, + "loss": 0.3829, + "step": 1920000 + }, + { + "epoch": 1.15, + "learning_rate": 4.797918154142272e-05, + "loss": 0.3759, + "step": 1920500 + }, + { + "epoch": 1.15, + "learning_rate": 4.797708157586216e-05, + "loss": 0.3911, + "step": 1921000 + }, + { + "epoch": 1.15, + "learning_rate": 4.797498161030159e-05, + "loss": 0.3777, + "step": 1921500 + }, + { + "epoch": 1.15, + "learning_rate": 4.797288164474102e-05, + "loss": 0.3838, + "step": 1922000 + }, + { + "epoch": 1.15, + "learning_rate": 4.7970785879111586e-05, + "loss": 0.3774, + "step": 1922500 + }, + { + "epoch": 1.15, + "learning_rate": 4.796868591355102e-05, + "loss": 0.3844, + "step": 1923000 + }, + { + "epoch": 1.15, + "learning_rate": 4.796658594799045e-05, + "loss": 0.3995, + "step": 1923500 + }, + { + "epoch": 1.15, + "learning_rate": 4.7964485982429894e-05, + "loss": 0.3827, + "step": 1924000 + }, + { + "epoch": 1.15, + "learning_rate": 4.796238601686932e-05, + "loss": 0.3918, + "step": 1924500 + }, + { + "epoch": 1.15, + "learning_rate": 4.7960286051308754e-05, + "loss": 0.3963, + "step": 1925000 + }, + { + "epoch": 1.15, + "learning_rate": 4.7958190285679314e-05, + "loss": 0.3845, + "step": 1925500 + }, + { + "epoch": 1.15, + "learning_rate": 4.7956090320118754e-05, + "loss": 0.3806, + "step": 1926000 + }, + { + "epoch": 1.16, + "learning_rate": 4.795399035455819e-05, + "loss": 0.3864, + "step": 1926500 + }, + { + "epoch": 1.16, + "learning_rate": 4.7951890388997614e-05, + "loss": 0.3938, + "step": 1927000 + }, + { + "epoch": 1.16, + "learning_rate": 4.7949790423437055e-05, + "loss": 0.3857, + "step": 1927500 + }, + { + "epoch": 1.16, + "learning_rate": 4.794769045787649e-05, + "loss": 0.3889, + "step": 1928000 + }, + { + "epoch": 1.16, + "learning_rate": 4.794559049231592e-05, + "loss": 0.3838, + "step": 1928500 + }, + { + "epoch": 1.16, + "learning_rate": 4.794349052675536e-05, + "loss": 0.3804, + "step": 1929000 + }, + { + "epoch": 1.16, + "learning_rate": 4.7941394761125915e-05, + "loss": 0.3869, + "step": 1929500 + }, + { + "epoch": 1.16, + "learning_rate": 4.7939298995496476e-05, + "loss": 0.3834, + "step": 1930000 + }, + { + "epoch": 1.16, + "learning_rate": 4.793719902993591e-05, + "loss": 0.3811, + "step": 1930500 + }, + { + "epoch": 1.16, + "learning_rate": 4.793509906437535e-05, + "loss": 0.3825, + "step": 1931000 + }, + { + "epoch": 1.16, + "learning_rate": 4.7932999098814776e-05, + "loss": 0.384, + "step": 1931500 + }, + { + "epoch": 1.16, + "learning_rate": 4.793089913325421e-05, + "loss": 0.3805, + "step": 1932000 + }, + { + "epoch": 1.16, + "learning_rate": 4.792879916769365e-05, + "loss": 0.3806, + "step": 1932500 + }, + { + "epoch": 1.16, + "learning_rate": 4.792670340206421e-05, + "loss": 0.3967, + "step": 1933000 + }, + { + "epoch": 1.16, + "learning_rate": 4.7924603436503644e-05, + "loss": 0.3778, + "step": 1933500 + }, + { + "epoch": 1.16, + "learning_rate": 4.792250347094307e-05, + "loss": 0.3866, + "step": 1934000 + }, + { + "epoch": 1.16, + "learning_rate": 4.792040350538251e-05, + "loss": 0.3874, + "step": 1934500 + }, + { + "epoch": 1.16, + "learning_rate": 4.7918303539821944e-05, + "loss": 0.38, + "step": 1935000 + }, + { + "epoch": 1.16, + "learning_rate": 4.791620357426138e-05, + "loss": 0.388, + "step": 1935500 + }, + { + "epoch": 1.16, + "learning_rate": 4.791410360870082e-05, + "loss": 0.3874, + "step": 1936000 + }, + { + "epoch": 1.16, + "learning_rate": 4.791200364314025e-05, + "loss": 0.3827, + "step": 1936500 + }, + { + "epoch": 1.16, + "learning_rate": 4.7909903677579685e-05, + "loss": 0.3782, + "step": 1937000 + }, + { + "epoch": 1.16, + "learning_rate": 4.7907807911950245e-05, + "loss": 0.3859, + "step": 1937500 + }, + { + "epoch": 1.16, + "learning_rate": 4.790570794638968e-05, + "loss": 0.382, + "step": 1938000 + }, + { + "epoch": 1.16, + "learning_rate": 4.790360798082911e-05, + "loss": 0.3858, + "step": 1938500 + }, + { + "epoch": 1.16, + "learning_rate": 4.790150801526855e-05, + "loss": 0.3842, + "step": 1939000 + }, + { + "epoch": 1.16, + "learning_rate": 4.7899412249639106e-05, + "loss": 0.3806, + "step": 1939500 + }, + { + "epoch": 1.16, + "learning_rate": 4.7897316484009666e-05, + "loss": 0.3809, + "step": 1940000 + }, + { + "epoch": 1.16, + "learning_rate": 4.78952165184491e-05, + "loss": 0.3978, + "step": 1940500 + }, + { + "epoch": 1.16, + "learning_rate": 4.789312075281965e-05, + "loss": 0.3888, + "step": 1941000 + }, + { + "epoch": 1.16, + "learning_rate": 4.7891020787259087e-05, + "loss": 0.3844, + "step": 1941500 + }, + { + "epoch": 1.16, + "learning_rate": 4.788892082169853e-05, + "loss": 0.3857, + "step": 1942000 + }, + { + "epoch": 1.16, + "learning_rate": 4.788682085613796e-05, + "loss": 0.3876, + "step": 1942500 + }, + { + "epoch": 1.16, + "learning_rate": 4.7884720890577394e-05, + "loss": 0.372, + "step": 1943000 + }, + { + "epoch": 1.17, + "learning_rate": 4.788262092501683e-05, + "loss": 0.3844, + "step": 1943500 + }, + { + "epoch": 1.17, + "learning_rate": 4.788052095945626e-05, + "loss": 0.3816, + "step": 1944000 + }, + { + "epoch": 1.17, + "learning_rate": 4.78784209938957e-05, + "loss": 0.3845, + "step": 1944500 + }, + { + "epoch": 1.17, + "learning_rate": 4.7876321028335134e-05, + "loss": 0.3802, + "step": 1945000 + }, + { + "epoch": 1.17, + "learning_rate": 4.7874225262705695e-05, + "loss": 0.3851, + "step": 1945500 + }, + { + "epoch": 1.17, + "learning_rate": 4.787212529714512e-05, + "loss": 0.3804, + "step": 1946000 + }, + { + "epoch": 1.17, + "learning_rate": 4.787002533158456e-05, + "loss": 0.3918, + "step": 1946500 + }, + { + "epoch": 1.17, + "learning_rate": 4.786792956595512e-05, + "loss": 0.3813, + "step": 1947000 + }, + { + "epoch": 1.17, + "learning_rate": 4.7865829600394555e-05, + "loss": 0.3795, + "step": 1947500 + }, + { + "epoch": 1.17, + "learning_rate": 4.786372963483399e-05, + "loss": 0.3822, + "step": 1948000 + }, + { + "epoch": 1.17, + "learning_rate": 4.786162966927342e-05, + "loss": 0.3961, + "step": 1948500 + }, + { + "epoch": 1.17, + "learning_rate": 4.785953390364398e-05, + "loss": 0.3847, + "step": 1949000 + }, + { + "epoch": 1.17, + "learning_rate": 4.7857433938083416e-05, + "loss": 0.3928, + "step": 1949500 + }, + { + "epoch": 1.17, + "learning_rate": 4.785533397252285e-05, + "loss": 0.3765, + "step": 1950000 + }, + { + "epoch": 1.17, + "learning_rate": 4.785323400696228e-05, + "loss": 0.3932, + "step": 1950500 + }, + { + "epoch": 1.17, + "learning_rate": 4.7851134041401717e-05, + "loss": 0.3844, + "step": 1951000 + }, + { + "epoch": 1.17, + "learning_rate": 4.784903407584116e-05, + "loss": 0.3831, + "step": 1951500 + }, + { + "epoch": 1.17, + "learning_rate": 4.784693411028059e-05, + "loss": 0.3851, + "step": 1952000 + }, + { + "epoch": 1.17, + "learning_rate": 4.7844834144720024e-05, + "loss": 0.3806, + "step": 1952500 + }, + { + "epoch": 1.17, + "learning_rate": 4.7842734179159464e-05, + "loss": 0.3881, + "step": 1953000 + }, + { + "epoch": 1.17, + "learning_rate": 4.78406342135989e-05, + "loss": 0.3781, + "step": 1953500 + }, + { + "epoch": 1.17, + "learning_rate": 4.783853424803833e-05, + "loss": 0.3839, + "step": 1954000 + }, + { + "epoch": 1.17, + "learning_rate": 4.783643428247777e-05, + "loss": 0.383, + "step": 1954500 + }, + { + "epoch": 1.17, + "learning_rate": 4.7834338516848325e-05, + "loss": 0.3829, + "step": 1955000 + }, + { + "epoch": 1.17, + "learning_rate": 4.783223855128776e-05, + "loss": 0.3821, + "step": 1955500 + }, + { + "epoch": 1.17, + "learning_rate": 4.783014278565831e-05, + "loss": 0.383, + "step": 1956000 + }, + { + "epoch": 1.17, + "learning_rate": 4.7828042820097745e-05, + "loss": 0.3812, + "step": 1956500 + }, + { + "epoch": 1.17, + "learning_rate": 4.7825942854537185e-05, + "loss": 0.3856, + "step": 1957000 + }, + { + "epoch": 1.17, + "learning_rate": 4.782384288897662e-05, + "loss": 0.3901, + "step": 1957500 + }, + { + "epoch": 1.17, + "learning_rate": 4.782174292341605e-05, + "loss": 0.3861, + "step": 1958000 + }, + { + "epoch": 1.17, + "learning_rate": 4.781964295785549e-05, + "loss": 0.3864, + "step": 1958500 + }, + { + "epoch": 1.17, + "learning_rate": 4.7817542992294926e-05, + "loss": 0.3855, + "step": 1959000 + }, + { + "epoch": 1.17, + "learning_rate": 4.781544722666548e-05, + "loss": 0.3791, + "step": 1959500 + }, + { + "epoch": 1.18, + "learning_rate": 4.781334726110492e-05, + "loss": 0.389, + "step": 1960000 + }, + { + "epoch": 1.18, + "learning_rate": 4.781124729554435e-05, + "loss": 0.3903, + "step": 1960500 + }, + { + "epoch": 1.18, + "learning_rate": 4.780914732998379e-05, + "loss": 0.3869, + "step": 1961000 + }, + { + "epoch": 1.18, + "learning_rate": 4.780704736442323e-05, + "loss": 0.3897, + "step": 1961500 + }, + { + "epoch": 1.18, + "learning_rate": 4.780494739886266e-05, + "loss": 0.3886, + "step": 1962000 + }, + { + "epoch": 1.18, + "learning_rate": 4.7802847433302094e-05, + "loss": 0.3893, + "step": 1962500 + }, + { + "epoch": 1.18, + "learning_rate": 4.780075166767265e-05, + "loss": 0.396, + "step": 1963000 + }, + { + "epoch": 1.18, + "learning_rate": 4.779865170211209e-05, + "loss": 0.3808, + "step": 1963500 + }, + { + "epoch": 1.18, + "learning_rate": 4.779655173655152e-05, + "loss": 0.3953, + "step": 1964000 + }, + { + "epoch": 1.18, + "learning_rate": 4.7794451770990955e-05, + "loss": 0.3825, + "step": 1964500 + }, + { + "epoch": 1.18, + "learning_rate": 4.7792351805430395e-05, + "loss": 0.3894, + "step": 1965000 + }, + { + "epoch": 1.18, + "learning_rate": 4.779025603980095e-05, + "loss": 0.3956, + "step": 1965500 + }, + { + "epoch": 1.18, + "learning_rate": 4.778815607424038e-05, + "loss": 0.3729, + "step": 1966000 + }, + { + "epoch": 1.18, + "learning_rate": 4.7786056108679815e-05, + "loss": 0.3888, + "step": 1966500 + }, + { + "epoch": 1.18, + "learning_rate": 4.7783956143119256e-05, + "loss": 0.383, + "step": 1967000 + }, + { + "epoch": 1.18, + "learning_rate": 4.778185617755869e-05, + "loss": 0.3859, + "step": 1967500 + }, + { + "epoch": 1.18, + "learning_rate": 4.777975621199812e-05, + "loss": 0.393, + "step": 1968000 + }, + { + "epoch": 1.18, + "learning_rate": 4.777766044636868e-05, + "loss": 0.3928, + "step": 1968500 + }, + { + "epoch": 1.18, + "learning_rate": 4.7775560480808116e-05, + "loss": 0.3824, + "step": 1969000 + }, + { + "epoch": 1.18, + "learning_rate": 4.777346051524755e-05, + "loss": 0.3878, + "step": 1969500 + }, + { + "epoch": 1.18, + "learning_rate": 4.777136054968699e-05, + "loss": 0.3797, + "step": 1970000 + }, + { + "epoch": 1.18, + "learning_rate": 4.776926058412642e-05, + "loss": 0.3845, + "step": 1970500 + }, + { + "epoch": 1.18, + "learning_rate": 4.776716061856585e-05, + "loss": 0.3777, + "step": 1971000 + }, + { + "epoch": 1.18, + "learning_rate": 4.776506065300529e-05, + "loss": 0.3801, + "step": 1971500 + }, + { + "epoch": 1.18, + "learning_rate": 4.776296488737585e-05, + "loss": 0.3868, + "step": 1972000 + }, + { + "epoch": 1.18, + "learning_rate": 4.7760864921815284e-05, + "loss": 0.3875, + "step": 1972500 + }, + { + "epoch": 1.18, + "learning_rate": 4.775876495625471e-05, + "loss": 0.3846, + "step": 1973000 + }, + { + "epoch": 1.18, + "learning_rate": 4.775666499069415e-05, + "loss": 0.3823, + "step": 1973500 + }, + { + "epoch": 1.18, + "learning_rate": 4.7754565025133585e-05, + "loss": 0.381, + "step": 1974000 + }, + { + "epoch": 1.18, + "learning_rate": 4.7752473459435265e-05, + "loss": 0.3853, + "step": 1974500 + }, + { + "epoch": 1.18, + "learning_rate": 4.77503734938747e-05, + "loss": 0.3674, + "step": 1975000 + }, + { + "epoch": 1.18, + "learning_rate": 4.774827352831414e-05, + "loss": 0.3857, + "step": 1975500 + }, + { + "epoch": 1.18, + "learning_rate": 4.774617356275357e-05, + "loss": 0.3834, + "step": 1976000 + }, + { + "epoch": 1.18, + "learning_rate": 4.7744073597193006e-05, + "loss": 0.3871, + "step": 1976500 + }, + { + "epoch": 1.19, + "learning_rate": 4.7741973631632446e-05, + "loss": 0.3764, + "step": 1977000 + }, + { + "epoch": 1.19, + "learning_rate": 4.773987366607187e-05, + "loss": 0.383, + "step": 1977500 + }, + { + "epoch": 1.19, + "learning_rate": 4.7737773700511306e-05, + "loss": 0.3762, + "step": 1978000 + }, + { + "epoch": 1.19, + "learning_rate": 4.7735673734950746e-05, + "loss": 0.3885, + "step": 1978500 + }, + { + "epoch": 1.19, + "learning_rate": 4.773357376939018e-05, + "loss": 0.3815, + "step": 1979000 + }, + { + "epoch": 1.19, + "learning_rate": 4.773147380382961e-05, + "loss": 0.3829, + "step": 1979500 + }, + { + "epoch": 1.19, + "learning_rate": 4.772937383826905e-05, + "loss": 0.3937, + "step": 1980000 + }, + { + "epoch": 1.19, + "learning_rate": 4.772727807263961e-05, + "loss": 0.3793, + "step": 1980500 + }, + { + "epoch": 1.19, + "learning_rate": 4.772517810707904e-05, + "loss": 0.3852, + "step": 1981000 + }, + { + "epoch": 1.19, + "learning_rate": 4.7723078141518474e-05, + "loss": 0.3826, + "step": 1981500 + }, + { + "epoch": 1.19, + "learning_rate": 4.772098237588904e-05, + "loss": 0.3974, + "step": 1982000 + }, + { + "epoch": 1.19, + "learning_rate": 4.771888241032847e-05, + "loss": 0.3876, + "step": 1982500 + }, + { + "epoch": 1.19, + "learning_rate": 4.77167824447679e-05, + "loss": 0.382, + "step": 1983000 + }, + { + "epoch": 1.19, + "learning_rate": 4.771468247920734e-05, + "loss": 0.387, + "step": 1983500 + }, + { + "epoch": 1.19, + "learning_rate": 4.7712582513646775e-05, + "loss": 0.3876, + "step": 1984000 + }, + { + "epoch": 1.19, + "learning_rate": 4.771048254808621e-05, + "loss": 0.3867, + "step": 1984500 + }, + { + "epoch": 1.19, + "learning_rate": 4.770838258252565e-05, + "loss": 0.3843, + "step": 1985000 + }, + { + "epoch": 1.19, + "learning_rate": 4.770628261696508e-05, + "loss": 0.3883, + "step": 1985500 + }, + { + "epoch": 1.19, + "learning_rate": 4.7704186851335636e-05, + "loss": 0.3821, + "step": 1986000 + }, + { + "epoch": 1.19, + "learning_rate": 4.770208688577507e-05, + "loss": 0.387, + "step": 1986500 + }, + { + "epoch": 1.19, + "learning_rate": 4.769998692021451e-05, + "loss": 0.371, + "step": 1987000 + }, + { + "epoch": 1.19, + "learning_rate": 4.769788695465394e-05, + "loss": 0.3829, + "step": 1987500 + }, + { + "epoch": 1.19, + "learning_rate": 4.7695791189024496e-05, + "loss": 0.3859, + "step": 1988000 + }, + { + "epoch": 1.19, + "learning_rate": 4.7693691223463937e-05, + "loss": 0.3903, + "step": 1988500 + }, + { + "epoch": 1.19, + "learning_rate": 4.769159125790337e-05, + "loss": 0.3897, + "step": 1989000 + }, + { + "epoch": 1.19, + "learning_rate": 4.7689491292342803e-05, + "loss": 0.3785, + "step": 1989500 + }, + { + "epoch": 1.19, + "learning_rate": 4.768739552671336e-05, + "loss": 0.3821, + "step": 1990000 + }, + { + "epoch": 1.19, + "learning_rate": 4.76852955611528e-05, + "loss": 0.3855, + "step": 1990500 + }, + { + "epoch": 1.19, + "learning_rate": 4.768319559559223e-05, + "loss": 0.3799, + "step": 1991000 + }, + { + "epoch": 1.19, + "learning_rate": 4.7681095630031664e-05, + "loss": 0.387, + "step": 1991500 + }, + { + "epoch": 1.19, + "learning_rate": 4.767899986440222e-05, + "loss": 0.379, + "step": 1992000 + }, + { + "epoch": 1.19, + "learning_rate": 4.767690409877278e-05, + "loss": 0.3794, + "step": 1992500 + }, + { + "epoch": 1.19, + "learning_rate": 4.767480413321222e-05, + "loss": 0.3823, + "step": 1993000 + }, + { + "epoch": 1.2, + "learning_rate": 4.767270416765165e-05, + "loss": 0.3913, + "step": 1993500 + }, + { + "epoch": 1.2, + "learning_rate": 4.7670604202091085e-05, + "loss": 0.3726, + "step": 1994000 + }, + { + "epoch": 1.2, + "learning_rate": 4.766850423653052e-05, + "loss": 0.3849, + "step": 1994500 + }, + { + "epoch": 1.2, + "learning_rate": 4.766640427096995e-05, + "loss": 0.3878, + "step": 1995000 + }, + { + "epoch": 1.2, + "learning_rate": 4.766430430540939e-05, + "loss": 0.3811, + "step": 1995500 + }, + { + "epoch": 1.2, + "learning_rate": 4.7662204339848826e-05, + "loss": 0.3835, + "step": 1996000 + }, + { + "epoch": 1.2, + "learning_rate": 4.766010857421938e-05, + "loss": 0.3889, + "step": 1996500 + }, + { + "epoch": 1.2, + "learning_rate": 4.765800860865881e-05, + "loss": 0.3848, + "step": 1997000 + }, + { + "epoch": 1.2, + "learning_rate": 4.765590864309825e-05, + "loss": 0.3764, + "step": 1997500 + }, + { + "epoch": 1.2, + "learning_rate": 4.7653808677537687e-05, + "loss": 0.392, + "step": 1998000 + }, + { + "epoch": 1.2, + "learning_rate": 4.765171291190825e-05, + "loss": 0.3937, + "step": 1998500 + }, + { + "epoch": 1.2, + "learning_rate": 4.7649612946347674e-05, + "loss": 0.3831, + "step": 1999000 + }, + { + "epoch": 1.2, + "learning_rate": 4.7647512980787114e-05, + "loss": 0.3791, + "step": 1999500 + }, + { + "epoch": 1.2, + "learning_rate": 4.764541301522655e-05, + "loss": 0.3742, + "step": 2000000 + }, + { + "epoch": 1.2, + "eval_loss": 0.36836302280426025, + "eval_runtime": 1117.5583, + "eval_samples_per_second": 471.313, + "eval_steps_per_second": 78.552, + "step": 2000000 + }, + { + "epoch": 1.2, + "learning_rate": 4.764331724959711e-05, + "loss": 0.3824, + "step": 2000500 + }, + { + "epoch": 1.2, + "learning_rate": 4.764121728403654e-05, + "loss": 0.3833, + "step": 2001000 + }, + { + "epoch": 1.2, + "learning_rate": 4.7639117318475975e-05, + "loss": 0.3765, + "step": 2001500 + }, + { + "epoch": 1.2, + "learning_rate": 4.763701735291541e-05, + "loss": 0.3751, + "step": 2002000 + }, + { + "epoch": 1.2, + "learning_rate": 4.763491738735485e-05, + "loss": 0.3793, + "step": 2002500 + }, + { + "epoch": 1.2, + "learning_rate": 4.763282162172541e-05, + "loss": 0.3828, + "step": 2003000 + }, + { + "epoch": 1.2, + "learning_rate": 4.763072165616484e-05, + "loss": 0.3819, + "step": 2003500 + }, + { + "epoch": 1.2, + "learning_rate": 4.762862169060427e-05, + "loss": 0.3824, + "step": 2004000 + }, + { + "epoch": 1.2, + "learning_rate": 4.762652172504371e-05, + "loss": 0.3872, + "step": 2004500 + }, + { + "epoch": 1.2, + "learning_rate": 4.762442595941427e-05, + "loss": 0.3886, + "step": 2005000 + }, + { + "epoch": 1.2, + "learning_rate": 4.76223259938537e-05, + "loss": 0.3804, + "step": 2005500 + }, + { + "epoch": 1.2, + "learning_rate": 4.762022602829313e-05, + "loss": 0.3851, + "step": 2006000 + }, + { + "epoch": 1.2, + "learning_rate": 4.761813026266369e-05, + "loss": 0.3854, + "step": 2006500 + }, + { + "epoch": 1.2, + "learning_rate": 4.761603029710313e-05, + "loss": 0.3822, + "step": 2007000 + }, + { + "epoch": 1.2, + "learning_rate": 4.7613930331542564e-05, + "loss": 0.3736, + "step": 2007500 + }, + { + "epoch": 1.2, + "learning_rate": 4.7611830365982e-05, + "loss": 0.3848, + "step": 2008000 + }, + { + "epoch": 1.2, + "learning_rate": 4.760973040042143e-05, + "loss": 0.3863, + "step": 2008500 + }, + { + "epoch": 1.2, + "learning_rate": 4.7607630434860864e-05, + "loss": 0.3813, + "step": 2009000 + }, + { + "epoch": 1.2, + "learning_rate": 4.7605530469300304e-05, + "loss": 0.377, + "step": 2009500 + }, + { + "epoch": 1.21, + "learning_rate": 4.760343050373974e-05, + "loss": 0.3831, + "step": 2010000 + }, + { + "epoch": 1.21, + "learning_rate": 4.760133053817917e-05, + "loss": 0.3836, + "step": 2010500 + }, + { + "epoch": 1.21, + "learning_rate": 4.759923057261861e-05, + "loss": 0.3782, + "step": 2011000 + }, + { + "epoch": 1.21, + "learning_rate": 4.7597130607058045e-05, + "loss": 0.3734, + "step": 2011500 + }, + { + "epoch": 1.21, + "learning_rate": 4.759503064149748e-05, + "loss": 0.3896, + "step": 2012000 + }, + { + "epoch": 1.21, + "learning_rate": 4.759293067593692e-05, + "loss": 0.3785, + "step": 2012500 + }, + { + "epoch": 1.21, + "learning_rate": 4.759083911023859e-05, + "loss": 0.3845, + "step": 2013000 + }, + { + "epoch": 1.21, + "learning_rate": 4.7588739144678026e-05, + "loss": 0.3829, + "step": 2013500 + }, + { + "epoch": 1.21, + "learning_rate": 4.758663917911746e-05, + "loss": 0.3828, + "step": 2014000 + }, + { + "epoch": 1.21, + "learning_rate": 4.758453921355689e-05, + "loss": 0.3889, + "step": 2014500 + }, + { + "epoch": 1.21, + "learning_rate": 4.758243924799633e-05, + "loss": 0.3894, + "step": 2015000 + }, + { + "epoch": 1.21, + "learning_rate": 4.7580339282435766e-05, + "loss": 0.3861, + "step": 2015500 + }, + { + "epoch": 1.21, + "learning_rate": 4.757824351680632e-05, + "loss": 0.3844, + "step": 2016000 + }, + { + "epoch": 1.21, + "learning_rate": 4.757614355124576e-05, + "loss": 0.3892, + "step": 2016500 + }, + { + "epoch": 1.21, + "learning_rate": 4.7574043585685193e-05, + "loss": 0.3861, + "step": 2017000 + }, + { + "epoch": 1.21, + "learning_rate": 4.757194362012463e-05, + "loss": 0.3928, + "step": 2017500 + }, + { + "epoch": 1.21, + "learning_rate": 4.756984785449518e-05, + "loss": 0.3736, + "step": 2018000 + }, + { + "epoch": 1.21, + "learning_rate": 4.756774788893462e-05, + "loss": 0.3869, + "step": 2018500 + }, + { + "epoch": 1.21, + "learning_rate": 4.7565647923374054e-05, + "loss": 0.3886, + "step": 2019000 + }, + { + "epoch": 1.21, + "learning_rate": 4.7563552157744615e-05, + "loss": 0.3861, + "step": 2019500 + }, + { + "epoch": 1.21, + "learning_rate": 4.756145219218405e-05, + "loss": 0.3835, + "step": 2020000 + }, + { + "epoch": 1.21, + "learning_rate": 4.755935222662348e-05, + "loss": 0.3864, + "step": 2020500 + }, + { + "epoch": 1.21, + "learning_rate": 4.7557252261062915e-05, + "loss": 0.3801, + "step": 2021000 + }, + { + "epoch": 1.21, + "learning_rate": 4.755515229550235e-05, + "loss": 0.3816, + "step": 2021500 + }, + { + "epoch": 1.21, + "learning_rate": 4.755305232994179e-05, + "loss": 0.3797, + "step": 2022000 + }, + { + "epoch": 1.21, + "learning_rate": 4.755095236438122e-05, + "loss": 0.3841, + "step": 2022500 + }, + { + "epoch": 1.21, + "learning_rate": 4.7548852398820656e-05, + "loss": 0.3815, + "step": 2023000 + }, + { + "epoch": 1.21, + "learning_rate": 4.7546756633191216e-05, + "loss": 0.3877, + "step": 2023500 + }, + { + "epoch": 1.21, + "learning_rate": 4.754465666763065e-05, + "loss": 0.3938, + "step": 2024000 + }, + { + "epoch": 1.21, + "learning_rate": 4.754255670207008e-05, + "loss": 0.3915, + "step": 2024500 + }, + { + "epoch": 1.21, + "learning_rate": 4.754046093644064e-05, + "loss": 0.3897, + "step": 2025000 + }, + { + "epoch": 1.21, + "learning_rate": 4.753836097088008e-05, + "loss": 0.3855, + "step": 2025500 + }, + { + "epoch": 1.21, + "learning_rate": 4.753626100531951e-05, + "loss": 0.378, + "step": 2026000 + }, + { + "epoch": 1.21, + "learning_rate": 4.7534161039758944e-05, + "loss": 0.3861, + "step": 2026500 + }, + { + "epoch": 1.22, + "learning_rate": 4.7532061074198384e-05, + "loss": 0.3895, + "step": 2027000 + }, + { + "epoch": 1.22, + "learning_rate": 4.752996110863782e-05, + "loss": 0.3771, + "step": 2027500 + }, + { + "epoch": 1.22, + "learning_rate": 4.752786114307725e-05, + "loss": 0.3743, + "step": 2028000 + }, + { + "epoch": 1.22, + "learning_rate": 4.752576117751669e-05, + "loss": 0.3753, + "step": 2028500 + }, + { + "epoch": 1.22, + "learning_rate": 4.7523661211956124e-05, + "loss": 0.3781, + "step": 2029000 + }, + { + "epoch": 1.22, + "learning_rate": 4.752156124639556e-05, + "loss": 0.381, + "step": 2029500 + }, + { + "epoch": 1.22, + "learning_rate": 4.7519461280835e-05, + "loss": 0.3912, + "step": 2030000 + }, + { + "epoch": 1.22, + "learning_rate": 4.751736551520555e-05, + "loss": 0.3873, + "step": 2030500 + }, + { + "epoch": 1.22, + "learning_rate": 4.7515265549644985e-05, + "loss": 0.396, + "step": 2031000 + }, + { + "epoch": 1.22, + "learning_rate": 4.751316558408442e-05, + "loss": 0.39, + "step": 2031500 + }, + { + "epoch": 1.22, + "learning_rate": 4.751106561852386e-05, + "loss": 0.3745, + "step": 2032000 + }, + { + "epoch": 1.22, + "learning_rate": 4.750896565296329e-05, + "loss": 0.3771, + "step": 2032500 + }, + { + "epoch": 1.22, + "learning_rate": 4.7506865687402726e-05, + "loss": 0.3815, + "step": 2033000 + }, + { + "epoch": 1.22, + "learning_rate": 4.750476572184216e-05, + "loss": 0.3797, + "step": 2033500 + }, + { + "epoch": 1.22, + "learning_rate": 4.750266575628159e-05, + "loss": 0.3771, + "step": 2034000 + }, + { + "epoch": 1.22, + "learning_rate": 4.750056999065215e-05, + "loss": 0.3864, + "step": 2034500 + }, + { + "epoch": 1.22, + "learning_rate": 4.749847002509159e-05, + "loss": 0.3828, + "step": 2035000 + }, + { + "epoch": 1.22, + "learning_rate": 4.749637425946215e-05, + "loss": 0.3857, + "step": 2035500 + }, + { + "epoch": 1.22, + "learning_rate": 4.749427429390158e-05, + "loss": 0.3892, + "step": 2036000 + }, + { + "epoch": 1.22, + "learning_rate": 4.7492174328341014e-05, + "loss": 0.3812, + "step": 2036500 + }, + { + "epoch": 1.22, + "learning_rate": 4.7490074362780454e-05, + "loss": 0.3858, + "step": 2037000 + }, + { + "epoch": 1.22, + "learning_rate": 4.748797859715101e-05, + "loss": 0.3798, + "step": 2037500 + }, + { + "epoch": 1.22, + "learning_rate": 4.748587863159044e-05, + "loss": 0.3865, + "step": 2038000 + }, + { + "epoch": 1.22, + "learning_rate": 4.7483778666029874e-05, + "loss": 0.3879, + "step": 2038500 + }, + { + "epoch": 1.22, + "learning_rate": 4.7481678700469315e-05, + "loss": 0.3907, + "step": 2039000 + }, + { + "epoch": 1.22, + "learning_rate": 4.747958293483987e-05, + "loss": 0.3838, + "step": 2039500 + }, + { + "epoch": 1.22, + "learning_rate": 4.74774829692793e-05, + "loss": 0.3731, + "step": 2040000 + }, + { + "epoch": 1.22, + "learning_rate": 4.747538300371874e-05, + "loss": 0.3857, + "step": 2040500 + }, + { + "epoch": 1.22, + "learning_rate": 4.7473283038158175e-05, + "loss": 0.379, + "step": 2041000 + }, + { + "epoch": 1.22, + "learning_rate": 4.747118307259761e-05, + "loss": 0.3827, + "step": 2041500 + }, + { + "epoch": 1.22, + "learning_rate": 4.746908310703705e-05, + "loss": 0.3782, + "step": 2042000 + }, + { + "epoch": 1.22, + "learning_rate": 4.7466983141476476e-05, + "loss": 0.3816, + "step": 2042500 + }, + { + "epoch": 1.22, + "learning_rate": 4.746488317591591e-05, + "loss": 0.3947, + "step": 2043000 + }, + { + "epoch": 1.23, + "learning_rate": 4.746278321035535e-05, + "loss": 0.3789, + "step": 2043500 + }, + { + "epoch": 1.23, + "learning_rate": 4.746068324479478e-05, + "loss": 0.3891, + "step": 2044000 + }, + { + "epoch": 1.23, + "learning_rate": 4.7458583279234216e-05, + "loss": 0.3894, + "step": 2044500 + }, + { + "epoch": 1.23, + "learning_rate": 4.745648331367366e-05, + "loss": 0.3804, + "step": 2045000 + }, + { + "epoch": 1.23, + "learning_rate": 4.745438754804421e-05, + "loss": 0.3752, + "step": 2045500 + }, + { + "epoch": 1.23, + "learning_rate": 4.7452287582483644e-05, + "loss": 0.3869, + "step": 2046000 + }, + { + "epoch": 1.23, + "learning_rate": 4.745018761692308e-05, + "loss": 0.3875, + "step": 2046500 + }, + { + "epoch": 1.23, + "learning_rate": 4.744808765136252e-05, + "loss": 0.3801, + "step": 2047000 + }, + { + "epoch": 1.23, + "learning_rate": 4.744599188573307e-05, + "loss": 0.3842, + "step": 2047500 + }, + { + "epoch": 1.23, + "learning_rate": 4.7443891920172504e-05, + "loss": 0.3797, + "step": 2048000 + }, + { + "epoch": 1.23, + "learning_rate": 4.7441791954611945e-05, + "loss": 0.3749, + "step": 2048500 + }, + { + "epoch": 1.23, + "learning_rate": 4.743969198905138e-05, + "loss": 0.3782, + "step": 2049000 + }, + { + "epoch": 1.23, + "learning_rate": 4.743759622342194e-05, + "loss": 0.3748, + "step": 2049500 + }, + { + "epoch": 1.23, + "learning_rate": 4.7435496257861365e-05, + "loss": 0.3911, + "step": 2050000 + }, + { + "epoch": 1.23, + "learning_rate": 4.7433396292300805e-05, + "loss": 0.3959, + "step": 2050500 + }, + { + "epoch": 1.23, + "learning_rate": 4.7431300526671366e-05, + "loss": 0.3914, + "step": 2051000 + }, + { + "epoch": 1.23, + "learning_rate": 4.74292005611108e-05, + "loss": 0.3817, + "step": 2051500 + }, + { + "epoch": 1.23, + "learning_rate": 4.742710059555023e-05, + "loss": 0.387, + "step": 2052000 + }, + { + "epoch": 1.23, + "learning_rate": 4.7425000629989666e-05, + "loss": 0.3771, + "step": 2052500 + }, + { + "epoch": 1.23, + "learning_rate": 4.74229006644291e-05, + "loss": 0.3796, + "step": 2053000 + }, + { + "epoch": 1.23, + "learning_rate": 4.742080069886853e-05, + "loss": 0.3805, + "step": 2053500 + }, + { + "epoch": 1.23, + "learning_rate": 4.74187049332391e-05, + "loss": 0.3957, + "step": 2054000 + }, + { + "epoch": 1.23, + "learning_rate": 4.741660496767853e-05, + "loss": 0.3787, + "step": 2054500 + }, + { + "epoch": 1.23, + "learning_rate": 4.741450500211796e-05, + "loss": 0.3796, + "step": 2055000 + }, + { + "epoch": 1.23, + "learning_rate": 4.74124050365574e-05, + "loss": 0.3905, + "step": 2055500 + }, + { + "epoch": 1.23, + "learning_rate": 4.741030927092796e-05, + "loss": 0.3822, + "step": 2056000 + }, + { + "epoch": 1.23, + "learning_rate": 4.7408209305367394e-05, + "loss": 0.3777, + "step": 2056500 + }, + { + "epoch": 1.23, + "learning_rate": 4.740610933980682e-05, + "loss": 0.389, + "step": 2057000 + }, + { + "epoch": 1.23, + "learning_rate": 4.740400937424626e-05, + "loss": 0.3783, + "step": 2057500 + }, + { + "epoch": 1.23, + "learning_rate": 4.7401909408685695e-05, + "loss": 0.3916, + "step": 2058000 + }, + { + "epoch": 1.23, + "learning_rate": 4.739980944312513e-05, + "loss": 0.3771, + "step": 2058500 + }, + { + "epoch": 1.23, + "learning_rate": 4.739771367749569e-05, + "loss": 0.3803, + "step": 2059000 + }, + { + "epoch": 1.23, + "learning_rate": 4.739561371193512e-05, + "loss": 0.3826, + "step": 2059500 + }, + { + "epoch": 1.24, + "learning_rate": 4.7393513746374555e-05, + "loss": 0.3927, + "step": 2060000 + }, + { + "epoch": 1.24, + "learning_rate": 4.7391413780813996e-05, + "loss": 0.3802, + "step": 2060500 + }, + { + "epoch": 1.24, + "learning_rate": 4.738931381525343e-05, + "loss": 0.3782, + "step": 2061000 + }, + { + "epoch": 1.24, + "learning_rate": 4.738721804962399e-05, + "loss": 0.3888, + "step": 2061500 + }, + { + "epoch": 1.24, + "learning_rate": 4.7385118084063416e-05, + "loss": 0.3744, + "step": 2062000 + }, + { + "epoch": 1.24, + "learning_rate": 4.7383018118502856e-05, + "loss": 0.3852, + "step": 2062500 + }, + { + "epoch": 1.24, + "learning_rate": 4.738091815294229e-05, + "loss": 0.3824, + "step": 2063000 + }, + { + "epoch": 1.24, + "learning_rate": 4.737881818738172e-05, + "loss": 0.3891, + "step": 2063500 + }, + { + "epoch": 1.24, + "learning_rate": 4.7376718221821164e-05, + "loss": 0.3875, + "step": 2064000 + }, + { + "epoch": 1.24, + "learning_rate": 4.73746182562606e-05, + "loss": 0.3731, + "step": 2064500 + }, + { + "epoch": 1.24, + "learning_rate": 4.737252249063115e-05, + "loss": 0.3935, + "step": 2065000 + }, + { + "epoch": 1.24, + "learning_rate": 4.7370422525070584e-05, + "loss": 0.3867, + "step": 2065500 + }, + { + "epoch": 1.24, + "learning_rate": 4.7368322559510024e-05, + "loss": 0.3908, + "step": 2066000 + }, + { + "epoch": 1.24, + "learning_rate": 4.736622259394946e-05, + "loss": 0.3786, + "step": 2066500 + }, + { + "epoch": 1.24, + "learning_rate": 4.736412262838889e-05, + "loss": 0.3792, + "step": 2067000 + }, + { + "epoch": 1.24, + "learning_rate": 4.736202686275945e-05, + "loss": 0.384, + "step": 2067500 + }, + { + "epoch": 1.24, + "learning_rate": 4.7359926897198885e-05, + "loss": 0.3803, + "step": 2068000 + }, + { + "epoch": 1.24, + "learning_rate": 4.735782693163832e-05, + "loss": 0.384, + "step": 2068500 + }, + { + "epoch": 1.24, + "learning_rate": 4.735572696607776e-05, + "loss": 0.3814, + "step": 2069000 + }, + { + "epoch": 1.24, + "learning_rate": 4.735362700051719e-05, + "loss": 0.3759, + "step": 2069500 + }, + { + "epoch": 1.24, + "learning_rate": 4.7351527034956626e-05, + "loss": 0.393, + "step": 2070000 + }, + { + "epoch": 1.24, + "learning_rate": 4.7349427069396066e-05, + "loss": 0.3773, + "step": 2070500 + }, + { + "epoch": 1.24, + "learning_rate": 4.734733130376662e-05, + "loss": 0.3841, + "step": 2071000 + }, + { + "epoch": 1.24, + "learning_rate": 4.734523133820605e-05, + "loss": 0.3815, + "step": 2071500 + }, + { + "epoch": 1.24, + "learning_rate": 4.7343131372645486e-05, + "loss": 0.382, + "step": 2072000 + }, + { + "epoch": 1.24, + "learning_rate": 4.7341031407084927e-05, + "loss": 0.3921, + "step": 2072500 + }, + { + "epoch": 1.24, + "learning_rate": 4.733893564145548e-05, + "loss": 0.3723, + "step": 2073000 + }, + { + "epoch": 1.24, + "learning_rate": 4.7336835675894914e-05, + "loss": 0.3785, + "step": 2073500 + }, + { + "epoch": 1.24, + "learning_rate": 4.733473571033435e-05, + "loss": 0.3876, + "step": 2074000 + }, + { + "epoch": 1.24, + "learning_rate": 4.733263574477379e-05, + "loss": 0.3742, + "step": 2074500 + }, + { + "epoch": 1.24, + "learning_rate": 4.733053577921322e-05, + "loss": 0.3832, + "step": 2075000 + }, + { + "epoch": 1.24, + "learning_rate": 4.7328435813652654e-05, + "loss": 0.3768, + "step": 2075500 + }, + { + "epoch": 1.24, + "learning_rate": 4.7326340048023215e-05, + "loss": 0.3881, + "step": 2076000 + }, + { + "epoch": 1.24, + "learning_rate": 4.732424008246265e-05, + "loss": 0.3813, + "step": 2076500 + }, + { + "epoch": 1.25, + "learning_rate": 4.732214011690208e-05, + "loss": 0.3796, + "step": 2077000 + }, + { + "epoch": 1.25, + "learning_rate": 4.732004015134152e-05, + "loss": 0.3862, + "step": 2077500 + }, + { + "epoch": 1.25, + "learning_rate": 4.7317940185780955e-05, + "loss": 0.3836, + "step": 2078000 + }, + { + "epoch": 1.25, + "learning_rate": 4.731584022022039e-05, + "loss": 0.3797, + "step": 2078500 + }, + { + "epoch": 1.25, + "learning_rate": 4.731374025465982e-05, + "loss": 0.3862, + "step": 2079000 + }, + { + "epoch": 1.25, + "learning_rate": 4.731164448903038e-05, + "loss": 0.3836, + "step": 2079500 + }, + { + "epoch": 1.25, + "learning_rate": 4.7309548723400936e-05, + "loss": 0.3851, + "step": 2080000 + }, + { + "epoch": 1.25, + "learning_rate": 4.730744875784037e-05, + "loss": 0.3761, + "step": 2080500 + }, + { + "epoch": 1.25, + "learning_rate": 4.73053487922798e-05, + "loss": 0.3813, + "step": 2081000 + }, + { + "epoch": 1.25, + "learning_rate": 4.730324882671924e-05, + "loss": 0.3886, + "step": 2081500 + }, + { + "epoch": 1.25, + "learning_rate": 4.730114886115868e-05, + "loss": 0.3732, + "step": 2082000 + }, + { + "epoch": 1.25, + "learning_rate": 4.729904889559811e-05, + "loss": 0.3794, + "step": 2082500 + }, + { + "epoch": 1.25, + "learning_rate": 4.729694893003755e-05, + "loss": 0.3728, + "step": 2083000 + }, + { + "epoch": 1.25, + "learning_rate": 4.7294848964476984e-05, + "loss": 0.3782, + "step": 2083500 + }, + { + "epoch": 1.25, + "learning_rate": 4.729274899891642e-05, + "loss": 0.369, + "step": 2084000 + }, + { + "epoch": 1.25, + "learning_rate": 4.729064903335585e-05, + "loss": 0.3723, + "step": 2084500 + }, + { + "epoch": 1.25, + "learning_rate": 4.7288549067795284e-05, + "loss": 0.3769, + "step": 2085000 + }, + { + "epoch": 1.25, + "learning_rate": 4.7286449102234724e-05, + "loss": 0.3806, + "step": 2085500 + }, + { + "epoch": 1.25, + "learning_rate": 4.72843575365364e-05, + "loss": 0.3853, + "step": 2086000 + }, + { + "epoch": 1.25, + "learning_rate": 4.728225757097584e-05, + "loss": 0.3811, + "step": 2086500 + }, + { + "epoch": 1.25, + "learning_rate": 4.728015760541527e-05, + "loss": 0.3834, + "step": 2087000 + }, + { + "epoch": 1.25, + "learning_rate": 4.7278057639854705e-05, + "loss": 0.3783, + "step": 2087500 + }, + { + "epoch": 1.25, + "learning_rate": 4.7275957674294145e-05, + "loss": 0.3758, + "step": 2088000 + }, + { + "epoch": 1.25, + "learning_rate": 4.72738619086647e-05, + "loss": 0.3773, + "step": 2088500 + }, + { + "epoch": 1.25, + "learning_rate": 4.727176194310413e-05, + "loss": 0.3883, + "step": 2089000 + }, + { + "epoch": 1.25, + "learning_rate": 4.7269661977543566e-05, + "loss": 0.3796, + "step": 2089500 + }, + { + "epoch": 1.25, + "learning_rate": 4.7267562011983006e-05, + "loss": 0.3931, + "step": 2090000 + }, + { + "epoch": 1.25, + "learning_rate": 4.726546204642244e-05, + "loss": 0.3785, + "step": 2090500 + }, + { + "epoch": 1.25, + "learning_rate": 4.726336208086187e-05, + "loss": 0.3818, + "step": 2091000 + }, + { + "epoch": 1.25, + "learning_rate": 4.7261266315232433e-05, + "loss": 0.3911, + "step": 2091500 + }, + { + "epoch": 1.25, + "learning_rate": 4.725916634967187e-05, + "loss": 0.3776, + "step": 2092000 + }, + { + "epoch": 1.25, + "learning_rate": 4.72570663841113e-05, + "loss": 0.3892, + "step": 2092500 + }, + { + "epoch": 1.25, + "learning_rate": 4.725496641855074e-05, + "loss": 0.3819, + "step": 2093000 + }, + { + "epoch": 1.26, + "learning_rate": 4.7252870652921294e-05, + "loss": 0.3859, + "step": 2093500 + }, + { + "epoch": 1.26, + "learning_rate": 4.725077068736073e-05, + "loss": 0.3729, + "step": 2094000 + }, + { + "epoch": 1.26, + "learning_rate": 4.724867072180016e-05, + "loss": 0.372, + "step": 2094500 + }, + { + "epoch": 1.26, + "learning_rate": 4.72465707562396e-05, + "loss": 0.3773, + "step": 2095000 + }, + { + "epoch": 1.26, + "learning_rate": 4.7244470790679035e-05, + "loss": 0.3883, + "step": 2095500 + }, + { + "epoch": 1.26, + "learning_rate": 4.724237502504959e-05, + "loss": 0.3827, + "step": 2096000 + }, + { + "epoch": 1.26, + "learning_rate": 4.724027925942014e-05, + "loss": 0.3825, + "step": 2096500 + }, + { + "epoch": 1.26, + "learning_rate": 4.72381834937907e-05, + "loss": 0.3795, + "step": 2097000 + }, + { + "epoch": 1.26, + "learning_rate": 4.7236083528230136e-05, + "loss": 0.3823, + "step": 2097500 + }, + { + "epoch": 1.26, + "learning_rate": 4.723398356266957e-05, + "loss": 0.379, + "step": 2098000 + }, + { + "epoch": 1.26, + "learning_rate": 4.7231883597109e-05, + "loss": 0.3815, + "step": 2098500 + }, + { + "epoch": 1.26, + "learning_rate": 4.722978363154844e-05, + "loss": 0.3753, + "step": 2099000 + }, + { + "epoch": 1.26, + "learning_rate": 4.7227683665987876e-05, + "loss": 0.3782, + "step": 2099500 + }, + { + "epoch": 1.26, + "learning_rate": 4.722558370042731e-05, + "loss": 0.3766, + "step": 2100000 + }, + { + "epoch": 1.26, + "eval_loss": 0.3669387102127075, + "eval_runtime": 1119.9259, + "eval_samples_per_second": 470.317, + "eval_steps_per_second": 78.386, + "step": 2100000 + }, + { + "epoch": 1.26, + "learning_rate": 4.722348373486675e-05, + "loss": 0.3878, + "step": 2100500 + }, + { + "epoch": 1.26, + "learning_rate": 4.7221383769306184e-05, + "loss": 0.3764, + "step": 2101000 + }, + { + "epoch": 1.26, + "learning_rate": 4.721928380374562e-05, + "loss": 0.3789, + "step": 2101500 + }, + { + "epoch": 1.26, + "learning_rate": 4.721718383818506e-05, + "loss": 0.3707, + "step": 2102000 + }, + { + "epoch": 1.26, + "learning_rate": 4.721508387262449e-05, + "loss": 0.3784, + "step": 2102500 + }, + { + "epoch": 1.26, + "learning_rate": 4.721298390706392e-05, + "loss": 0.3842, + "step": 2103000 + }, + { + "epoch": 1.26, + "learning_rate": 4.721088394150336e-05, + "loss": 0.3787, + "step": 2103500 + }, + { + "epoch": 1.26, + "learning_rate": 4.720878397594279e-05, + "loss": 0.3857, + "step": 2104000 + }, + { + "epoch": 1.26, + "learning_rate": 4.720668821031335e-05, + "loss": 0.3738, + "step": 2104500 + }, + { + "epoch": 1.26, + "learning_rate": 4.7204588244752785e-05, + "loss": 0.3862, + "step": 2105000 + }, + { + "epoch": 1.26, + "learning_rate": 4.720248827919222e-05, + "loss": 0.3874, + "step": 2105500 + }, + { + "epoch": 1.26, + "learning_rate": 4.720038831363165e-05, + "loss": 0.37, + "step": 2106000 + }, + { + "epoch": 1.26, + "learning_rate": 4.719828834807109e-05, + "loss": 0.3783, + "step": 2106500 + }, + { + "epoch": 1.26, + "learning_rate": 4.7196188382510526e-05, + "loss": 0.3715, + "step": 2107000 + }, + { + "epoch": 1.26, + "learning_rate": 4.7194092616881086e-05, + "loss": 0.382, + "step": 2107500 + }, + { + "epoch": 1.26, + "learning_rate": 4.719199265132051e-05, + "loss": 0.3834, + "step": 2108000 + }, + { + "epoch": 1.26, + "learning_rate": 4.718989268575995e-05, + "loss": 0.3789, + "step": 2108500 + }, + { + "epoch": 1.26, + "learning_rate": 4.7187792720199386e-05, + "loss": 0.3859, + "step": 2109000 + }, + { + "epoch": 1.26, + "learning_rate": 4.718569275463882e-05, + "loss": 0.375, + "step": 2109500 + }, + { + "epoch": 1.27, + "learning_rate": 4.718359278907826e-05, + "loss": 0.3805, + "step": 2110000 + }, + { + "epoch": 1.27, + "learning_rate": 4.7181492823517693e-05, + "loss": 0.3826, + "step": 2110500 + }, + { + "epoch": 1.27, + "learning_rate": 4.717939705788825e-05, + "loss": 0.3797, + "step": 2111000 + }, + { + "epoch": 1.27, + "learning_rate": 4.717729709232768e-05, + "loss": 0.3766, + "step": 2111500 + }, + { + "epoch": 1.27, + "learning_rate": 4.717519712676712e-05, + "loss": 0.3786, + "step": 2112000 + }, + { + "epoch": 1.27, + "learning_rate": 4.7173097161206554e-05, + "loss": 0.3778, + "step": 2112500 + }, + { + "epoch": 1.27, + "learning_rate": 4.7170997195645994e-05, + "loss": 0.3892, + "step": 2113000 + }, + { + "epoch": 1.27, + "learning_rate": 4.716889723008543e-05, + "loss": 0.3796, + "step": 2113500 + }, + { + "epoch": 1.27, + "learning_rate": 4.716679726452486e-05, + "loss": 0.3807, + "step": 2114000 + }, + { + "epoch": 1.27, + "learning_rate": 4.71646972989643e-05, + "loss": 0.3772, + "step": 2114500 + }, + { + "epoch": 1.27, + "learning_rate": 4.7162601533334855e-05, + "loss": 0.3813, + "step": 2115000 + }, + { + "epoch": 1.27, + "learning_rate": 4.716050156777429e-05, + "loss": 0.3803, + "step": 2115500 + }, + { + "epoch": 1.27, + "learning_rate": 4.715840160221372e-05, + "loss": 0.3749, + "step": 2116000 + }, + { + "epoch": 1.27, + "learning_rate": 4.715630163665316e-05, + "loss": 0.3999, + "step": 2116500 + }, + { + "epoch": 1.27, + "learning_rate": 4.7154210070954836e-05, + "loss": 0.3802, + "step": 2117000 + }, + { + "epoch": 1.27, + "learning_rate": 4.715211010539427e-05, + "loss": 0.371, + "step": 2117500 + }, + { + "epoch": 1.27, + "learning_rate": 4.71500101398337e-05, + "loss": 0.3874, + "step": 2118000 + }, + { + "epoch": 1.27, + "learning_rate": 4.7147910174273136e-05, + "loss": 0.3781, + "step": 2118500 + }, + { + "epoch": 1.27, + "learning_rate": 4.7145814408643703e-05, + "loss": 0.3797, + "step": 2119000 + }, + { + "epoch": 1.27, + "learning_rate": 4.714371444308314e-05, + "loss": 0.3935, + "step": 2119500 + }, + { + "epoch": 1.27, + "learning_rate": 4.7141614477522564e-05, + "loss": 0.3798, + "step": 2120000 + }, + { + "epoch": 1.27, + "learning_rate": 4.7139514511962004e-05, + "loss": 0.3903, + "step": 2120500 + }, + { + "epoch": 1.27, + "learning_rate": 4.713741454640144e-05, + "loss": 0.3859, + "step": 2121000 + }, + { + "epoch": 1.27, + "learning_rate": 4.713531458084087e-05, + "loss": 0.3794, + "step": 2121500 + }, + { + "epoch": 1.27, + "learning_rate": 4.713321461528031e-05, + "loss": 0.3681, + "step": 2122000 + }, + { + "epoch": 1.27, + "learning_rate": 4.7131114649719744e-05, + "loss": 0.375, + "step": 2122500 + }, + { + "epoch": 1.27, + "learning_rate": 4.712901468415918e-05, + "loss": 0.3804, + "step": 2123000 + }, + { + "epoch": 1.27, + "learning_rate": 4.712691471859862e-05, + "loss": 0.3835, + "step": 2123500 + }, + { + "epoch": 1.27, + "learning_rate": 4.712481895296917e-05, + "loss": 0.3855, + "step": 2124000 + }, + { + "epoch": 1.27, + "learning_rate": 4.7122718987408605e-05, + "loss": 0.3779, + "step": 2124500 + }, + { + "epoch": 1.27, + "learning_rate": 4.712061902184804e-05, + "loss": 0.3864, + "step": 2125000 + }, + { + "epoch": 1.27, + "learning_rate": 4.711852325621859e-05, + "loss": 0.3799, + "step": 2125500 + }, + { + "epoch": 1.27, + "learning_rate": 4.711642329065803e-05, + "loss": 0.3747, + "step": 2126000 + }, + { + "epoch": 1.27, + "learning_rate": 4.7114323325097466e-05, + "loss": 0.3841, + "step": 2126500 + }, + { + "epoch": 1.28, + "learning_rate": 4.7112223359536906e-05, + "loss": 0.3809, + "step": 2127000 + }, + { + "epoch": 1.28, + "learning_rate": 4.711012339397634e-05, + "loss": 0.3729, + "step": 2127500 + }, + { + "epoch": 1.28, + "learning_rate": 4.710802342841577e-05, + "loss": 0.378, + "step": 2128000 + }, + { + "epoch": 1.28, + "learning_rate": 4.710592346285521e-05, + "loss": 0.3852, + "step": 2128500 + }, + { + "epoch": 1.28, + "learning_rate": 4.710382349729465e-05, + "loss": 0.3752, + "step": 2129000 + }, + { + "epoch": 1.28, + "learning_rate": 4.710172353173408e-05, + "loss": 0.3793, + "step": 2129500 + }, + { + "epoch": 1.28, + "learning_rate": 4.7099623566173514e-05, + "loss": 0.3783, + "step": 2130000 + }, + { + "epoch": 1.28, + "learning_rate": 4.709752360061295e-05, + "loss": 0.3729, + "step": 2130500 + }, + { + "epoch": 1.28, + "learning_rate": 4.709542363505238e-05, + "loss": 0.3748, + "step": 2131000 + }, + { + "epoch": 1.28, + "learning_rate": 4.709332786942294e-05, + "loss": 0.386, + "step": 2131500 + }, + { + "epoch": 1.28, + "learning_rate": 4.7091232103793494e-05, + "loss": 0.3868, + "step": 2132000 + }, + { + "epoch": 1.28, + "learning_rate": 4.7089132138232935e-05, + "loss": 0.3748, + "step": 2132500 + }, + { + "epoch": 1.28, + "learning_rate": 4.708703217267237e-05, + "loss": 0.3776, + "step": 2133000 + }, + { + "epoch": 1.28, + "learning_rate": 4.70849322071118e-05, + "loss": 0.3861, + "step": 2133500 + }, + { + "epoch": 1.28, + "learning_rate": 4.708283224155124e-05, + "loss": 0.3793, + "step": 2134000 + }, + { + "epoch": 1.28, + "learning_rate": 4.7080732275990675e-05, + "loss": 0.3796, + "step": 2134500 + }, + { + "epoch": 1.28, + "learning_rate": 4.707863231043011e-05, + "loss": 0.3863, + "step": 2135000 + }, + { + "epoch": 1.28, + "learning_rate": 4.707653234486954e-05, + "loss": 0.3846, + "step": 2135500 + }, + { + "epoch": 1.28, + "learning_rate": 4.70744365792401e-05, + "loss": 0.3676, + "step": 2136000 + }, + { + "epoch": 1.28, + "learning_rate": 4.7072336613679536e-05, + "loss": 0.3771, + "step": 2136500 + }, + { + "epoch": 1.28, + "learning_rate": 4.707023664811897e-05, + "loss": 0.369, + "step": 2137000 + }, + { + "epoch": 1.28, + "learning_rate": 4.70681366825584e-05, + "loss": 0.3875, + "step": 2137500 + }, + { + "epoch": 1.28, + "learning_rate": 4.7066045116860083e-05, + "loss": 0.3833, + "step": 2138000 + }, + { + "epoch": 1.28, + "learning_rate": 4.706394515129952e-05, + "loss": 0.3877, + "step": 2138500 + }, + { + "epoch": 1.28, + "learning_rate": 4.706184518573895e-05, + "loss": 0.3782, + "step": 2139000 + }, + { + "epoch": 1.28, + "learning_rate": 4.705974522017839e-05, + "loss": 0.382, + "step": 2139500 + }, + { + "epoch": 1.28, + "learning_rate": 4.7057645254617824e-05, + "loss": 0.3716, + "step": 2140000 + }, + { + "epoch": 1.28, + "learning_rate": 4.705554528905726e-05, + "loss": 0.3772, + "step": 2140500 + }, + { + "epoch": 1.28, + "learning_rate": 4.705344952342782e-05, + "loss": 0.3846, + "step": 2141000 + }, + { + "epoch": 1.28, + "learning_rate": 4.705134955786725e-05, + "loss": 0.3779, + "step": 2141500 + }, + { + "epoch": 1.28, + "learning_rate": 4.7049249592306685e-05, + "loss": 0.3788, + "step": 2142000 + }, + { + "epoch": 1.28, + "learning_rate": 4.7047149626746125e-05, + "loss": 0.3758, + "step": 2142500 + }, + { + "epoch": 1.28, + "learning_rate": 4.704504966118556e-05, + "loss": 0.3768, + "step": 2143000 + }, + { + "epoch": 1.29, + "learning_rate": 4.704294969562499e-05, + "loss": 0.3784, + "step": 2143500 + }, + { + "epoch": 1.29, + "learning_rate": 4.7040853929995546e-05, + "loss": 0.3857, + "step": 2144000 + }, + { + "epoch": 1.29, + "learning_rate": 4.7038753964434986e-05, + "loss": 0.3689, + "step": 2144500 + }, + { + "epoch": 1.29, + "learning_rate": 4.703665399887442e-05, + "loss": 0.3777, + "step": 2145000 + }, + { + "epoch": 1.29, + "learning_rate": 4.703455403331385e-05, + "loss": 0.3832, + "step": 2145500 + }, + { + "epoch": 1.29, + "learning_rate": 4.703245406775329e-05, + "loss": 0.3733, + "step": 2146000 + }, + { + "epoch": 1.29, + "learning_rate": 4.703035410219272e-05, + "loss": 0.3885, + "step": 2146500 + }, + { + "epoch": 1.29, + "learning_rate": 4.702825413663215e-05, + "loss": 0.3874, + "step": 2147000 + }, + { + "epoch": 1.29, + "learning_rate": 4.7026158371002713e-05, + "loss": 0.3848, + "step": 2147500 + }, + { + "epoch": 1.29, + "learning_rate": 4.7024058405442154e-05, + "loss": 0.3892, + "step": 2148000 + }, + { + "epoch": 1.29, + "learning_rate": 4.702195843988159e-05, + "loss": 0.3824, + "step": 2148500 + }, + { + "epoch": 1.29, + "learning_rate": 4.701985847432102e-05, + "loss": 0.3761, + "step": 2149000 + }, + { + "epoch": 1.29, + "learning_rate": 4.7017758508760454e-05, + "loss": 0.3807, + "step": 2149500 + }, + { + "epoch": 1.29, + "learning_rate": 4.7015662743131014e-05, + "loss": 0.3928, + "step": 2150000 + }, + { + "epoch": 1.29, + "learning_rate": 4.701356277757045e-05, + "loss": 0.3877, + "step": 2150500 + }, + { + "epoch": 1.29, + "learning_rate": 4.701146281200989e-05, + "loss": 0.3797, + "step": 2151000 + }, + { + "epoch": 1.29, + "learning_rate": 4.7009362846449315e-05, + "loss": 0.3697, + "step": 2151500 + }, + { + "epoch": 1.29, + "learning_rate": 4.700726288088875e-05, + "loss": 0.3684, + "step": 2152000 + }, + { + "epoch": 1.29, + "learning_rate": 4.700516291532819e-05, + "loss": 0.3808, + "step": 2152500 + }, + { + "epoch": 1.29, + "learning_rate": 4.700306714969875e-05, + "loss": 0.379, + "step": 2153000 + }, + { + "epoch": 1.29, + "learning_rate": 4.700096718413818e-05, + "loss": 0.3768, + "step": 2153500 + }, + { + "epoch": 1.29, + "learning_rate": 4.699886721857761e-05, + "loss": 0.3709, + "step": 2154000 + }, + { + "epoch": 1.29, + "learning_rate": 4.699676725301705e-05, + "loss": 0.383, + "step": 2154500 + }, + { + "epoch": 1.29, + "learning_rate": 4.699467148738761e-05, + "loss": 0.3848, + "step": 2155000 + }, + { + "epoch": 1.29, + "learning_rate": 4.699257152182704e-05, + "loss": 0.3798, + "step": 2155500 + }, + { + "epoch": 1.29, + "learning_rate": 4.6990471556266476e-05, + "loss": 0.3843, + "step": 2156000 + }, + { + "epoch": 1.29, + "learning_rate": 4.698837159070591e-05, + "loss": 0.383, + "step": 2156500 + }, + { + "epoch": 1.29, + "learning_rate": 4.698627162514534e-05, + "loss": 0.3824, + "step": 2157000 + }, + { + "epoch": 1.29, + "learning_rate": 4.6984171659584784e-05, + "loss": 0.3839, + "step": 2157500 + }, + { + "epoch": 1.29, + "learning_rate": 4.698207169402422e-05, + "loss": 0.3808, + "step": 2158000 + }, + { + "epoch": 1.29, + "learning_rate": 4.697997172846365e-05, + "loss": 0.3814, + "step": 2158500 + }, + { + "epoch": 1.29, + "learning_rate": 4.6977875962834204e-05, + "loss": 0.3779, + "step": 2159000 + }, + { + "epoch": 1.29, + "learning_rate": 4.6975775997273644e-05, + "loss": 0.3884, + "step": 2159500 + }, + { + "epoch": 1.3, + "learning_rate": 4.697367603171308e-05, + "loss": 0.3729, + "step": 2160000 + }, + { + "epoch": 1.3, + "learning_rate": 4.697157606615251e-05, + "loss": 0.3818, + "step": 2160500 + }, + { + "epoch": 1.3, + "learning_rate": 4.6969480300523065e-05, + "loss": 0.3765, + "step": 2161000 + }, + { + "epoch": 1.3, + "learning_rate": 4.6967380334962505e-05, + "loss": 0.3795, + "step": 2161500 + }, + { + "epoch": 1.3, + "learning_rate": 4.696528036940194e-05, + "loss": 0.3892, + "step": 2162000 + }, + { + "epoch": 1.3, + "learning_rate": 4.696318040384137e-05, + "loss": 0.3809, + "step": 2162500 + }, + { + "epoch": 1.3, + "learning_rate": 4.696108463821193e-05, + "loss": 0.3755, + "step": 2163000 + }, + { + "epoch": 1.3, + "learning_rate": 4.6958984672651366e-05, + "loss": 0.3707, + "step": 2163500 + }, + { + "epoch": 1.3, + "learning_rate": 4.69568847070908e-05, + "loss": 0.3751, + "step": 2164000 + }, + { + "epoch": 1.3, + "learning_rate": 4.695478474153024e-05, + "loss": 0.3788, + "step": 2164500 + }, + { + "epoch": 1.3, + "learning_rate": 4.695268477596967e-05, + "loss": 0.3816, + "step": 2165000 + }, + { + "epoch": 1.3, + "learning_rate": 4.6950584810409106e-05, + "loss": 0.378, + "step": 2165500 + }, + { + "epoch": 1.3, + "learning_rate": 4.6948484844848547e-05, + "loss": 0.3879, + "step": 2166000 + }, + { + "epoch": 1.3, + "learning_rate": 4.69463890792191e-05, + "loss": 0.3883, + "step": 2166500 + }, + { + "epoch": 1.3, + "learning_rate": 4.6944289113658534e-05, + "loss": 0.3852, + "step": 2167000 + }, + { + "epoch": 1.3, + "learning_rate": 4.694218914809797e-05, + "loss": 0.3973, + "step": 2167500 + }, + { + "epoch": 1.3, + "learning_rate": 4.694008918253741e-05, + "loss": 0.3753, + "step": 2168000 + }, + { + "epoch": 1.3, + "learning_rate": 4.693798921697684e-05, + "loss": 0.3898, + "step": 2168500 + }, + { + "epoch": 1.3, + "learning_rate": 4.6935893451347394e-05, + "loss": 0.3795, + "step": 2169000 + }, + { + "epoch": 1.3, + "learning_rate": 4.693379348578683e-05, + "loss": 0.3796, + "step": 2169500 + }, + { + "epoch": 1.3, + "learning_rate": 4.693169352022627e-05, + "loss": 0.3707, + "step": 2170000 + }, + { + "epoch": 1.3, + "learning_rate": 4.69295935546657e-05, + "loss": 0.3712, + "step": 2170500 + }, + { + "epoch": 1.3, + "learning_rate": 4.6927493589105135e-05, + "loss": 0.3812, + "step": 2171000 + }, + { + "epoch": 1.3, + "learning_rate": 4.6925397823475695e-05, + "loss": 0.3732, + "step": 2171500 + }, + { + "epoch": 1.3, + "learning_rate": 4.692329785791513e-05, + "loss": 0.3796, + "step": 2172000 + }, + { + "epoch": 1.3, + "learning_rate": 4.692119789235456e-05, + "loss": 0.3803, + "step": 2172500 + }, + { + "epoch": 1.3, + "learning_rate": 4.6919097926794e-05, + "loss": 0.3831, + "step": 2173000 + }, + { + "epoch": 1.3, + "learning_rate": 4.6916997961233436e-05, + "loss": 0.3789, + "step": 2173500 + }, + { + "epoch": 1.3, + "learning_rate": 4.691490219560399e-05, + "loss": 0.3801, + "step": 2174000 + }, + { + "epoch": 1.3, + "learning_rate": 4.691280223004342e-05, + "loss": 0.3825, + "step": 2174500 + }, + { + "epoch": 1.3, + "learning_rate": 4.691070226448286e-05, + "loss": 0.3809, + "step": 2175000 + }, + { + "epoch": 1.3, + "learning_rate": 4.69086022989223e-05, + "loss": 0.3675, + "step": 2175500 + }, + { + "epoch": 1.3, + "learning_rate": 4.690650653329285e-05, + "loss": 0.3812, + "step": 2176000 + }, + { + "epoch": 1.3, + "learning_rate": 4.6904406567732284e-05, + "loss": 0.3734, + "step": 2176500 + }, + { + "epoch": 1.31, + "learning_rate": 4.6902306602171724e-05, + "loss": 0.381, + "step": 2177000 + }, + { + "epoch": 1.31, + "learning_rate": 4.690020663661116e-05, + "loss": 0.3741, + "step": 2177500 + }, + { + "epoch": 1.31, + "learning_rate": 4.689810667105059e-05, + "loss": 0.3789, + "step": 2178000 + }, + { + "epoch": 1.31, + "learning_rate": 4.689600670549003e-05, + "loss": 0.3891, + "step": 2178500 + }, + { + "epoch": 1.31, + "learning_rate": 4.6893906739929465e-05, + "loss": 0.3817, + "step": 2179000 + }, + { + "epoch": 1.31, + "learning_rate": 4.6891806774368905e-05, + "loss": 0.3807, + "step": 2179500 + }, + { + "epoch": 1.31, + "learning_rate": 4.688971100873946e-05, + "loss": 0.3801, + "step": 2180000 + }, + { + "epoch": 1.31, + "learning_rate": 4.688761104317889e-05, + "loss": 0.3801, + "step": 2180500 + }, + { + "epoch": 1.31, + "learning_rate": 4.6885511077618325e-05, + "loss": 0.3916, + "step": 2181000 + }, + { + "epoch": 1.31, + "learning_rate": 4.6883411112057766e-05, + "loss": 0.3772, + "step": 2181500 + }, + { + "epoch": 1.31, + "learning_rate": 4.688131534642832e-05, + "loss": 0.3811, + "step": 2182000 + }, + { + "epoch": 1.31, + "learning_rate": 4.687921958079887e-05, + "loss": 0.3748, + "step": 2182500 + }, + { + "epoch": 1.31, + "learning_rate": 4.6877119615238306e-05, + "loss": 0.3752, + "step": 2183000 + }, + { + "epoch": 1.31, + "learning_rate": 4.687501964967774e-05, + "loss": 0.3754, + "step": 2183500 + }, + { + "epoch": 1.31, + "learning_rate": 4.687291968411718e-05, + "loss": 0.3752, + "step": 2184000 + }, + { + "epoch": 1.31, + "learning_rate": 4.687082391848774e-05, + "loss": 0.3901, + "step": 2184500 + }, + { + "epoch": 1.31, + "learning_rate": 4.686872395292717e-05, + "loss": 0.3671, + "step": 2185000 + }, + { + "epoch": 1.31, + "learning_rate": 4.686662398736661e-05, + "loss": 0.384, + "step": 2185500 + }, + { + "epoch": 1.31, + "learning_rate": 4.686452402180604e-05, + "loss": 0.3839, + "step": 2186000 + }, + { + "epoch": 1.31, + "learning_rate": 4.6862424056245474e-05, + "loss": 0.3777, + "step": 2186500 + }, + { + "epoch": 1.31, + "learning_rate": 4.6860324090684914e-05, + "loss": 0.3756, + "step": 2187000 + }, + { + "epoch": 1.31, + "learning_rate": 4.685822412512435e-05, + "loss": 0.3753, + "step": 2187500 + }, + { + "epoch": 1.31, + "learning_rate": 4.685612415956378e-05, + "loss": 0.3721, + "step": 2188000 + }, + { + "epoch": 1.31, + "learning_rate": 4.6854028393934335e-05, + "loss": 0.3831, + "step": 2188500 + }, + { + "epoch": 1.31, + "learning_rate": 4.6851928428373775e-05, + "loss": 0.383, + "step": 2189000 + }, + { + "epoch": 1.31, + "learning_rate": 4.684982846281321e-05, + "loss": 0.3859, + "step": 2189500 + }, + { + "epoch": 1.31, + "learning_rate": 4.684773269718376e-05, + "loss": 0.3742, + "step": 2190000 + }, + { + "epoch": 1.31, + "learning_rate": 4.6845632731623195e-05, + "loss": 0.3763, + "step": 2190500 + }, + { + "epoch": 1.31, + "learning_rate": 4.6843532766062636e-05, + "loss": 0.3756, + "step": 2191000 + }, + { + "epoch": 1.31, + "learning_rate": 4.684143280050207e-05, + "loss": 0.3764, + "step": 2191500 + }, + { + "epoch": 1.31, + "learning_rate": 4.683933283494151e-05, + "loss": 0.3742, + "step": 2192000 + }, + { + "epoch": 1.31, + "learning_rate": 4.683723286938094e-05, + "loss": 0.3771, + "step": 2192500 + }, + { + "epoch": 1.31, + "learning_rate": 4.6835137103751496e-05, + "loss": 0.3931, + "step": 2193000 + }, + { + "epoch": 1.32, + "learning_rate": 4.683303713819093e-05, + "loss": 0.3817, + "step": 2193500 + }, + { + "epoch": 1.32, + "learning_rate": 4.683093717263037e-05, + "loss": 0.3724, + "step": 2194000 + }, + { + "epoch": 1.32, + "learning_rate": 4.6828837207069804e-05, + "loss": 0.3782, + "step": 2194500 + }, + { + "epoch": 1.32, + "learning_rate": 4.682673724150924e-05, + "loss": 0.3781, + "step": 2195000 + }, + { + "epoch": 1.32, + "learning_rate": 4.682463727594868e-05, + "loss": 0.3729, + "step": 2195500 + }, + { + "epoch": 1.32, + "learning_rate": 4.682253731038811e-05, + "loss": 0.3793, + "step": 2196000 + }, + { + "epoch": 1.32, + "learning_rate": 4.6820437344827544e-05, + "loss": 0.3698, + "step": 2196500 + }, + { + "epoch": 1.32, + "learning_rate": 4.68183415791981e-05, + "loss": 0.3768, + "step": 2197000 + }, + { + "epoch": 1.32, + "learning_rate": 4.681624581356866e-05, + "loss": 0.3816, + "step": 2197500 + }, + { + "epoch": 1.32, + "learning_rate": 4.681414584800809e-05, + "loss": 0.3796, + "step": 2198000 + }, + { + "epoch": 1.32, + "learning_rate": 4.6812045882447525e-05, + "loss": 0.3825, + "step": 2198500 + }, + { + "epoch": 1.32, + "learning_rate": 4.6809945916886965e-05, + "loss": 0.3808, + "step": 2199000 + }, + { + "epoch": 1.32, + "learning_rate": 4.68078459513264e-05, + "loss": 0.3802, + "step": 2199500 + }, + { + "epoch": 1.32, + "learning_rate": 4.680574598576583e-05, + "loss": 0.3751, + "step": 2200000 + }, + { + "epoch": 1.32, + "eval_loss": 0.3648931682109833, + "eval_runtime": 1120.548, + "eval_samples_per_second": 470.056, + "eval_steps_per_second": 78.343, + "step": 2200000 + }, + { + "epoch": 1.32, + "learning_rate": 4.680364602020527e-05, + "loss": 0.3758, + "step": 2200500 + }, + { + "epoch": 1.32, + "learning_rate": 4.6801546054644706e-05, + "loss": 0.3872, + "step": 2201000 + }, + { + "epoch": 1.32, + "learning_rate": 4.679945028901526e-05, + "loss": 0.387, + "step": 2201500 + }, + { + "epoch": 1.32, + "learning_rate": 4.679735032345469e-05, + "loss": 0.3761, + "step": 2202000 + }, + { + "epoch": 1.32, + "learning_rate": 4.679525035789413e-05, + "loss": 0.3872, + "step": 2202500 + }, + { + "epoch": 1.32, + "learning_rate": 4.6793150392333567e-05, + "loss": 0.3766, + "step": 2203000 + }, + { + "epoch": 1.32, + "learning_rate": 4.6791050426773e-05, + "loss": 0.3739, + "step": 2203500 + }, + { + "epoch": 1.32, + "learning_rate": 4.678895046121244e-05, + "loss": 0.3754, + "step": 2204000 + }, + { + "epoch": 1.32, + "learning_rate": 4.678685049565187e-05, + "loss": 0.3678, + "step": 2204500 + }, + { + "epoch": 1.32, + "learning_rate": 4.67847505300913e-05, + "loss": 0.3782, + "step": 2205000 + }, + { + "epoch": 1.32, + "learning_rate": 4.678265476446186e-05, + "loss": 0.377, + "step": 2205500 + }, + { + "epoch": 1.32, + "learning_rate": 4.67805547989013e-05, + "loss": 0.3778, + "step": 2206000 + }, + { + "epoch": 1.32, + "learning_rate": 4.6778454833340734e-05, + "loss": 0.3664, + "step": 2206500 + }, + { + "epoch": 1.32, + "learning_rate": 4.677635486778017e-05, + "loss": 0.3808, + "step": 2207000 + }, + { + "epoch": 1.32, + "learning_rate": 4.67742549022196e-05, + "loss": 0.3755, + "step": 2207500 + }, + { + "epoch": 1.32, + "learning_rate": 4.677215913659016e-05, + "loss": 0.3799, + "step": 2208000 + }, + { + "epoch": 1.32, + "learning_rate": 4.6770059171029595e-05, + "loss": 0.3788, + "step": 2208500 + }, + { + "epoch": 1.32, + "learning_rate": 4.6767959205469035e-05, + "loss": 0.384, + "step": 2209000 + }, + { + "epoch": 1.32, + "learning_rate": 4.676585923990846e-05, + "loss": 0.377, + "step": 2209500 + }, + { + "epoch": 1.32, + "learning_rate": 4.676376767421014e-05, + "loss": 0.3808, + "step": 2210000 + }, + { + "epoch": 1.33, + "learning_rate": 4.6761667708649576e-05, + "loss": 0.3833, + "step": 2210500 + }, + { + "epoch": 1.33, + "learning_rate": 4.675956774308901e-05, + "loss": 0.3785, + "step": 2211000 + }, + { + "epoch": 1.33, + "learning_rate": 4.675746777752845e-05, + "loss": 0.381, + "step": 2211500 + }, + { + "epoch": 1.33, + "learning_rate": 4.675536781196788e-05, + "loss": 0.3797, + "step": 2212000 + }, + { + "epoch": 1.33, + "learning_rate": 4.675327204633844e-05, + "loss": 0.3777, + "step": 2212500 + }, + { + "epoch": 1.33, + "learning_rate": 4.675117208077788e-05, + "loss": 0.3831, + "step": 2213000 + }, + { + "epoch": 1.33, + "learning_rate": 4.674907211521731e-05, + "loss": 0.3761, + "step": 2213500 + }, + { + "epoch": 1.33, + "learning_rate": 4.6746972149656744e-05, + "loss": 0.3738, + "step": 2214000 + }, + { + "epoch": 1.33, + "learning_rate": 4.67448763840273e-05, + "loss": 0.3687, + "step": 2214500 + }, + { + "epoch": 1.33, + "learning_rate": 4.674277641846674e-05, + "loss": 0.3718, + "step": 2215000 + }, + { + "epoch": 1.33, + "learning_rate": 4.674067645290617e-05, + "loss": 0.3862, + "step": 2215500 + }, + { + "epoch": 1.33, + "learning_rate": 4.6738576487345605e-05, + "loss": 0.391, + "step": 2216000 + }, + { + "epoch": 1.33, + "learning_rate": 4.6736476521785045e-05, + "loss": 0.3755, + "step": 2216500 + }, + { + "epoch": 1.33, + "learning_rate": 4.67343807561556e-05, + "loss": 0.3759, + "step": 2217000 + }, + { + "epoch": 1.33, + "learning_rate": 4.673228079059503e-05, + "loss": 0.3793, + "step": 2217500 + }, + { + "epoch": 1.33, + "learning_rate": 4.6730180825034465e-05, + "loss": 0.3818, + "step": 2218000 + }, + { + "epoch": 1.33, + "learning_rate": 4.6728080859473906e-05, + "loss": 0.3808, + "step": 2218500 + }, + { + "epoch": 1.33, + "learning_rate": 4.672598089391334e-05, + "loss": 0.3915, + "step": 2219000 + }, + { + "epoch": 1.33, + "learning_rate": 4.672388092835277e-05, + "loss": 0.3809, + "step": 2219500 + }, + { + "epoch": 1.33, + "learning_rate": 4.672178096279221e-05, + "loss": 0.3831, + "step": 2220000 + }, + { + "epoch": 1.33, + "learning_rate": 4.6719680997231646e-05, + "loss": 0.3816, + "step": 2220500 + }, + { + "epoch": 1.33, + "learning_rate": 4.67175852316022e-05, + "loss": 0.3744, + "step": 2221000 + }, + { + "epoch": 1.33, + "learning_rate": 4.6715489465972753e-05, + "loss": 0.3774, + "step": 2221500 + }, + { + "epoch": 1.33, + "learning_rate": 4.6713389500412194e-05, + "loss": 0.371, + "step": 2222000 + }, + { + "epoch": 1.33, + "learning_rate": 4.671128953485163e-05, + "loss": 0.3921, + "step": 2222500 + }, + { + "epoch": 1.33, + "learning_rate": 4.670918956929106e-05, + "loss": 0.3849, + "step": 2223000 + }, + { + "epoch": 1.33, + "learning_rate": 4.67070896037305e-05, + "loss": 0.3767, + "step": 2223500 + }, + { + "epoch": 1.33, + "learning_rate": 4.6704989638169934e-05, + "loss": 0.3715, + "step": 2224000 + }, + { + "epoch": 1.33, + "learning_rate": 4.670288967260937e-05, + "loss": 0.3747, + "step": 2224500 + }, + { + "epoch": 1.33, + "learning_rate": 4.670078970704881e-05, + "loss": 0.3861, + "step": 2225000 + }, + { + "epoch": 1.33, + "learning_rate": 4.669869814135048e-05, + "loss": 0.3703, + "step": 2225500 + }, + { + "epoch": 1.33, + "learning_rate": 4.6696598175789915e-05, + "loss": 0.3815, + "step": 2226000 + }, + { + "epoch": 1.33, + "learning_rate": 4.669449821022935e-05, + "loss": 0.3728, + "step": 2226500 + }, + { + "epoch": 1.34, + "learning_rate": 4.669239824466879e-05, + "loss": 0.3827, + "step": 2227000 + }, + { + "epoch": 1.34, + "learning_rate": 4.669029827910822e-05, + "loss": 0.3726, + "step": 2227500 + }, + { + "epoch": 1.34, + "learning_rate": 4.6688198313547656e-05, + "loss": 0.3756, + "step": 2228000 + }, + { + "epoch": 1.34, + "learning_rate": 4.6686098347987096e-05, + "loss": 0.3748, + "step": 2228500 + }, + { + "epoch": 1.34, + "learning_rate": 4.668399838242653e-05, + "loss": 0.3756, + "step": 2229000 + }, + { + "epoch": 1.34, + "learning_rate": 4.668190261679708e-05, + "loss": 0.3789, + "step": 2229500 + }, + { + "epoch": 1.34, + "learning_rate": 4.6679806851167637e-05, + "loss": 0.3813, + "step": 2230000 + }, + { + "epoch": 1.34, + "learning_rate": 4.667770688560707e-05, + "loss": 0.3898, + "step": 2230500 + }, + { + "epoch": 1.34, + "learning_rate": 4.667560692004651e-05, + "loss": 0.3782, + "step": 2231000 + }, + { + "epoch": 1.34, + "learning_rate": 4.6673506954485944e-05, + "loss": 0.3724, + "step": 2231500 + }, + { + "epoch": 1.34, + "learning_rate": 4.6671411188856504e-05, + "loss": 0.3784, + "step": 2232000 + }, + { + "epoch": 1.34, + "learning_rate": 4.666931542322706e-05, + "loss": 0.3722, + "step": 2232500 + }, + { + "epoch": 1.34, + "learning_rate": 4.66672154576665e-05, + "loss": 0.376, + "step": 2233000 + }, + { + "epoch": 1.34, + "learning_rate": 4.666511549210593e-05, + "loss": 0.3786, + "step": 2233500 + }, + { + "epoch": 1.34, + "learning_rate": 4.6663015526545365e-05, + "loss": 0.3831, + "step": 2234000 + }, + { + "epoch": 1.34, + "learning_rate": 4.6660915560984805e-05, + "loss": 0.3788, + "step": 2234500 + }, + { + "epoch": 1.34, + "learning_rate": 4.665881559542423e-05, + "loss": 0.3804, + "step": 2235000 + }, + { + "epoch": 1.34, + "learning_rate": 4.6656715629863665e-05, + "loss": 0.3775, + "step": 2235500 + }, + { + "epoch": 1.34, + "learning_rate": 4.6654615664303105e-05, + "loss": 0.3751, + "step": 2236000 + }, + { + "epoch": 1.34, + "learning_rate": 4.665251569874254e-05, + "loss": 0.3867, + "step": 2236500 + }, + { + "epoch": 1.34, + "learning_rate": 4.665041573318197e-05, + "loss": 0.3825, + "step": 2237000 + }, + { + "epoch": 1.34, + "learning_rate": 4.664831576762141e-05, + "loss": 0.3839, + "step": 2237500 + }, + { + "epoch": 1.34, + "learning_rate": 4.6646215802060846e-05, + "loss": 0.3806, + "step": 2238000 + }, + { + "epoch": 1.34, + "learning_rate": 4.664411583650028e-05, + "loss": 0.3682, + "step": 2238500 + }, + { + "epoch": 1.34, + "learning_rate": 4.664201587093972e-05, + "loss": 0.3741, + "step": 2239000 + }, + { + "epoch": 1.34, + "learning_rate": 4.663991590537915e-05, + "loss": 0.3744, + "step": 2239500 + }, + { + "epoch": 1.34, + "learning_rate": 4.663781593981859e-05, + "loss": 0.3862, + "step": 2240000 + }, + { + "epoch": 1.34, + "learning_rate": 4.663572017418914e-05, + "loss": 0.3882, + "step": 2240500 + }, + { + "epoch": 1.34, + "learning_rate": 4.663362020862858e-05, + "loss": 0.388, + "step": 2241000 + }, + { + "epoch": 1.34, + "learning_rate": 4.6631520243068014e-05, + "loss": 0.3769, + "step": 2241500 + }, + { + "epoch": 1.34, + "learning_rate": 4.662942027750745e-05, + "loss": 0.3822, + "step": 2242000 + }, + { + "epoch": 1.34, + "learning_rate": 4.662732451187801e-05, + "loss": 0.3854, + "step": 2242500 + }, + { + "epoch": 1.34, + "learning_rate": 4.662522454631744e-05, + "loss": 0.3679, + "step": 2243000 + }, + { + "epoch": 1.35, + "learning_rate": 4.6623128780687995e-05, + "loss": 0.3783, + "step": 2243500 + }, + { + "epoch": 1.35, + "learning_rate": 4.662102881512743e-05, + "loss": 0.3845, + "step": 2244000 + }, + { + "epoch": 1.35, + "learning_rate": 4.661892884956687e-05, + "loss": 0.3828, + "step": 2244500 + }, + { + "epoch": 1.35, + "learning_rate": 4.66168288840063e-05, + "loss": 0.3789, + "step": 2245000 + }, + { + "epoch": 1.35, + "learning_rate": 4.6614728918445735e-05, + "loss": 0.378, + "step": 2245500 + }, + { + "epoch": 1.35, + "learning_rate": 4.6612628952885176e-05, + "loss": 0.3779, + "step": 2246000 + }, + { + "epoch": 1.35, + "learning_rate": 4.661053318725573e-05, + "loss": 0.382, + "step": 2246500 + }, + { + "epoch": 1.35, + "learning_rate": 4.660843322169516e-05, + "loss": 0.3779, + "step": 2247000 + }, + { + "epoch": 1.35, + "learning_rate": 4.6606333256134596e-05, + "loss": 0.3803, + "step": 2247500 + }, + { + "epoch": 1.35, + "learning_rate": 4.6604233290574036e-05, + "loss": 0.3792, + "step": 2248000 + }, + { + "epoch": 1.35, + "learning_rate": 4.660213332501347e-05, + "loss": 0.3802, + "step": 2248500 + }, + { + "epoch": 1.35, + "learning_rate": 4.66000333594529e-05, + "loss": 0.3897, + "step": 2249000 + }, + { + "epoch": 1.35, + "learning_rate": 4.6597933393892343e-05, + "loss": 0.3827, + "step": 2249500 + }, + { + "epoch": 1.35, + "learning_rate": 4.659583342833177e-05, + "loss": 0.3787, + "step": 2250000 + }, + { + "epoch": 1.35, + "learning_rate": 4.659373346277121e-05, + "loss": 0.387, + "step": 2250500 + }, + { + "epoch": 1.35, + "learning_rate": 4.6591641897072884e-05, + "loss": 0.3896, + "step": 2251000 + }, + { + "epoch": 1.35, + "learning_rate": 4.6589541931512324e-05, + "loss": 0.3779, + "step": 2251500 + }, + { + "epoch": 1.35, + "learning_rate": 4.658744196595176e-05, + "loss": 0.365, + "step": 2252000 + }, + { + "epoch": 1.35, + "learning_rate": 4.658534200039119e-05, + "loss": 0.3776, + "step": 2252500 + }, + { + "epoch": 1.35, + "learning_rate": 4.658324203483063e-05, + "loss": 0.386, + "step": 2253000 + }, + { + "epoch": 1.35, + "learning_rate": 4.6581142069270065e-05, + "loss": 0.381, + "step": 2253500 + }, + { + "epoch": 1.35, + "learning_rate": 4.65790421037095e-05, + "loss": 0.387, + "step": 2254000 + }, + { + "epoch": 1.35, + "learning_rate": 4.657694213814893e-05, + "loss": 0.3701, + "step": 2254500 + }, + { + "epoch": 1.35, + "learning_rate": 4.6574842172588365e-05, + "loss": 0.3805, + "step": 2255000 + }, + { + "epoch": 1.35, + "learning_rate": 4.65727422070278e-05, + "loss": 0.3731, + "step": 2255500 + }, + { + "epoch": 1.35, + "learning_rate": 4.657065064132948e-05, + "loss": 0.3831, + "step": 2256000 + }, + { + "epoch": 1.35, + "learning_rate": 4.656855067576892e-05, + "loss": 0.3772, + "step": 2256500 + }, + { + "epoch": 1.35, + "learning_rate": 4.656645071020835e-05, + "loss": 0.3832, + "step": 2257000 + }, + { + "epoch": 1.35, + "learning_rate": 4.6564350744647786e-05, + "loss": 0.3765, + "step": 2257500 + }, + { + "epoch": 1.35, + "learning_rate": 4.6562250779087227e-05, + "loss": 0.3736, + "step": 2258000 + }, + { + "epoch": 1.35, + "learning_rate": 4.656015081352666e-05, + "loss": 0.3868, + "step": 2258500 + }, + { + "epoch": 1.35, + "learning_rate": 4.6558050847966094e-05, + "loss": 0.3737, + "step": 2259000 + }, + { + "epoch": 1.35, + "learning_rate": 4.655595088240553e-05, + "loss": 0.3817, + "step": 2259500 + }, + { + "epoch": 1.35, + "learning_rate": 4.655385091684496e-05, + "loss": 0.3733, + "step": 2260000 + }, + { + "epoch": 1.36, + "learning_rate": 4.655175515121552e-05, + "loss": 0.3868, + "step": 2260500 + }, + { + "epoch": 1.36, + "learning_rate": 4.6549655185654954e-05, + "loss": 0.3679, + "step": 2261000 + }, + { + "epoch": 1.36, + "learning_rate": 4.6547555220094394e-05, + "loss": 0.3763, + "step": 2261500 + }, + { + "epoch": 1.36, + "learning_rate": 4.654545525453382e-05, + "loss": 0.3706, + "step": 2262000 + }, + { + "epoch": 1.36, + "learning_rate": 4.654335948890438e-05, + "loss": 0.3744, + "step": 2262500 + }, + { + "epoch": 1.36, + "learning_rate": 4.654125952334382e-05, + "loss": 0.3709, + "step": 2263000 + }, + { + "epoch": 1.36, + "learning_rate": 4.6539159557783255e-05, + "loss": 0.3789, + "step": 2263500 + }, + { + "epoch": 1.36, + "learning_rate": 4.653705959222269e-05, + "loss": 0.3855, + "step": 2264000 + }, + { + "epoch": 1.36, + "learning_rate": 4.653495962666212e-05, + "loss": 0.3757, + "step": 2264500 + }, + { + "epoch": 1.36, + "learning_rate": 4.6532859661101556e-05, + "loss": 0.3817, + "step": 2265000 + }, + { + "epoch": 1.36, + "learning_rate": 4.653075969554099e-05, + "loss": 0.3774, + "step": 2265500 + }, + { + "epoch": 1.36, + "learning_rate": 4.652865972998043e-05, + "loss": 0.3726, + "step": 2266000 + }, + { + "epoch": 1.36, + "learning_rate": 4.652656396435098e-05, + "loss": 0.3642, + "step": 2266500 + }, + { + "epoch": 1.36, + "learning_rate": 4.652446819872154e-05, + "loss": 0.3721, + "step": 2267000 + }, + { + "epoch": 1.36, + "learning_rate": 4.652236823316098e-05, + "loss": 0.3866, + "step": 2267500 + }, + { + "epoch": 1.36, + "learning_rate": 4.652026826760041e-05, + "loss": 0.372, + "step": 2268000 + }, + { + "epoch": 1.36, + "learning_rate": 4.651816830203985e-05, + "loss": 0.38, + "step": 2268500 + }, + { + "epoch": 1.36, + "learning_rate": 4.651606833647928e-05, + "loss": 0.3748, + "step": 2269000 + }, + { + "epoch": 1.36, + "learning_rate": 4.651396837091872e-05, + "loss": 0.3869, + "step": 2269500 + }, + { + "epoch": 1.36, + "learning_rate": 4.651186840535815e-05, + "loss": 0.3762, + "step": 2270000 + }, + { + "epoch": 1.36, + "learning_rate": 4.6509768439797584e-05, + "loss": 0.375, + "step": 2270500 + }, + { + "epoch": 1.36, + "learning_rate": 4.6507672674168145e-05, + "loss": 0.3801, + "step": 2271000 + }, + { + "epoch": 1.36, + "learning_rate": 4.65055769085387e-05, + "loss": 0.3807, + "step": 2271500 + }, + { + "epoch": 1.36, + "learning_rate": 4.650347694297814e-05, + "loss": 0.3742, + "step": 2272000 + }, + { + "epoch": 1.36, + "learning_rate": 4.650137697741757e-05, + "loss": 0.3795, + "step": 2272500 + }, + { + "epoch": 1.36, + "learning_rate": 4.6499277011857005e-05, + "loss": 0.3908, + "step": 2273000 + }, + { + "epoch": 1.36, + "learning_rate": 4.6497177046296446e-05, + "loss": 0.3818, + "step": 2273500 + }, + { + "epoch": 1.36, + "learning_rate": 4.6495081280667e-05, + "loss": 0.3811, + "step": 2274000 + }, + { + "epoch": 1.36, + "learning_rate": 4.649298131510643e-05, + "loss": 0.3758, + "step": 2274500 + }, + { + "epoch": 1.36, + "learning_rate": 4.6490881349545866e-05, + "loss": 0.3782, + "step": 2275000 + }, + { + "epoch": 1.36, + "learning_rate": 4.6488781383985306e-05, + "loss": 0.3732, + "step": 2275500 + }, + { + "epoch": 1.36, + "learning_rate": 4.648668141842473e-05, + "loss": 0.3847, + "step": 2276000 + }, + { + "epoch": 1.36, + "learning_rate": 4.648458145286417e-05, + "loss": 0.3838, + "step": 2276500 + }, + { + "epoch": 1.37, + "learning_rate": 4.6482485687234734e-05, + "loss": 0.3803, + "step": 2277000 + }, + { + "epoch": 1.37, + "learning_rate": 4.648038572167417e-05, + "loss": 0.3694, + "step": 2277500 + }, + { + "epoch": 1.37, + "learning_rate": 4.64782857561136e-05, + "loss": 0.3763, + "step": 2278000 + }, + { + "epoch": 1.37, + "learning_rate": 4.6476185790553034e-05, + "loss": 0.3793, + "step": 2278500 + }, + { + "epoch": 1.37, + "learning_rate": 4.647408582499247e-05, + "loss": 0.3796, + "step": 2279000 + }, + { + "epoch": 1.37, + "learning_rate": 4.64719858594319e-05, + "loss": 0.376, + "step": 2279500 + }, + { + "epoch": 1.37, + "learning_rate": 4.646988589387134e-05, + "loss": 0.3848, + "step": 2280000 + }, + { + "epoch": 1.37, + "learning_rate": 4.6467785928310774e-05, + "loss": 0.3791, + "step": 2280500 + }, + { + "epoch": 1.37, + "learning_rate": 4.646569016268133e-05, + "loss": 0.3723, + "step": 2281000 + }, + { + "epoch": 1.37, + "learning_rate": 4.646359439705189e-05, + "loss": 0.3806, + "step": 2281500 + }, + { + "epoch": 1.37, + "learning_rate": 4.646149443149132e-05, + "loss": 0.3771, + "step": 2282000 + }, + { + "epoch": 1.37, + "learning_rate": 4.645939446593076e-05, + "loss": 0.3771, + "step": 2282500 + }, + { + "epoch": 1.37, + "learning_rate": 4.6457294500370196e-05, + "loss": 0.3744, + "step": 2283000 + }, + { + "epoch": 1.37, + "learning_rate": 4.645519873474075e-05, + "loss": 0.3771, + "step": 2283500 + }, + { + "epoch": 1.37, + "learning_rate": 4.645309876918019e-05, + "loss": 0.3792, + "step": 2284000 + }, + { + "epoch": 1.37, + "learning_rate": 4.645099880361962e-05, + "loss": 0.3856, + "step": 2284500 + }, + { + "epoch": 1.37, + "learning_rate": 4.6448898838059056e-05, + "loss": 0.3796, + "step": 2285000 + }, + { + "epoch": 1.37, + "learning_rate": 4.644679887249849e-05, + "loss": 0.366, + "step": 2285500 + }, + { + "epoch": 1.37, + "learning_rate": 4.644469890693792e-05, + "loss": 0.3806, + "step": 2286000 + }, + { + "epoch": 1.37, + "learning_rate": 4.644259894137736e-05, + "loss": 0.3825, + "step": 2286500 + }, + { + "epoch": 1.37, + "learning_rate": 4.644050317574792e-05, + "loss": 0.3826, + "step": 2287000 + }, + { + "epoch": 1.37, + "learning_rate": 4.643840741011847e-05, + "loss": 0.3727, + "step": 2287500 + }, + { + "epoch": 1.37, + "learning_rate": 4.643630744455791e-05, + "loss": 0.3722, + "step": 2288000 + }, + { + "epoch": 1.37, + "learning_rate": 4.6434207478997344e-05, + "loss": 0.3869, + "step": 2288500 + }, + { + "epoch": 1.37, + "learning_rate": 4.643210751343678e-05, + "loss": 0.3746, + "step": 2289000 + }, + { + "epoch": 1.37, + "learning_rate": 4.643000754787622e-05, + "loss": 0.3728, + "step": 2289500 + }, + { + "epoch": 1.37, + "learning_rate": 4.642790758231565e-05, + "loss": 0.3672, + "step": 2290000 + }, + { + "epoch": 1.37, + "learning_rate": 4.6425807616755085e-05, + "loss": 0.3789, + "step": 2290500 + }, + { + "epoch": 1.37, + "learning_rate": 4.642370765119452e-05, + "loss": 0.3827, + "step": 2291000 + }, + { + "epoch": 1.37, + "learning_rate": 4.642160768563395e-05, + "loss": 0.3773, + "step": 2291500 + }, + { + "epoch": 1.37, + "learning_rate": 4.641950772007339e-05, + "loss": 0.3734, + "step": 2292000 + }, + { + "epoch": 1.37, + "learning_rate": 4.6417407754512826e-05, + "loss": 0.3707, + "step": 2292500 + }, + { + "epoch": 1.37, + "learning_rate": 4.641530778895226e-05, + "loss": 0.3742, + "step": 2293000 + }, + { + "epoch": 1.38, + "learning_rate": 4.641321202332281e-05, + "loss": 0.3851, + "step": 2293500 + }, + { + "epoch": 1.38, + "learning_rate": 4.641111625769337e-05, + "loss": 0.3646, + "step": 2294000 + }, + { + "epoch": 1.38, + "learning_rate": 4.640901629213281e-05, + "loss": 0.3824, + "step": 2294500 + }, + { + "epoch": 1.38, + "learning_rate": 4.6406916326572247e-05, + "loss": 0.3807, + "step": 2295000 + }, + { + "epoch": 1.38, + "learning_rate": 4.640481636101167e-05, + "loss": 0.3773, + "step": 2295500 + }, + { + "epoch": 1.38, + "learning_rate": 4.6402716395451114e-05, + "loss": 0.3777, + "step": 2296000 + }, + { + "epoch": 1.38, + "learning_rate": 4.640061642989055e-05, + "loss": 0.3756, + "step": 2296500 + }, + { + "epoch": 1.38, + "learning_rate": 4.639851646432998e-05, + "loss": 0.3754, + "step": 2297000 + }, + { + "epoch": 1.38, + "learning_rate": 4.639642489863166e-05, + "loss": 0.3858, + "step": 2297500 + }, + { + "epoch": 1.38, + "learning_rate": 4.63943249330711e-05, + "loss": 0.3843, + "step": 2298000 + }, + { + "epoch": 1.38, + "learning_rate": 4.6392224967510535e-05, + "loss": 0.3801, + "step": 2298500 + }, + { + "epoch": 1.38, + "learning_rate": 4.639012500194997e-05, + "loss": 0.3806, + "step": 2299000 + }, + { + "epoch": 1.38, + "learning_rate": 4.638802503638941e-05, + "loss": 0.3761, + "step": 2299500 + }, + { + "epoch": 1.38, + "learning_rate": 4.6385925070828835e-05, + "loss": 0.3765, + "step": 2300000 + }, + { + "epoch": 1.38, + "eval_loss": 0.3626013994216919, + "eval_runtime": 1121.9655, + "eval_samples_per_second": 469.462, + "eval_steps_per_second": 78.244, + "step": 2300000 + }, + { + "epoch": 1.38, + "learning_rate": 4.638382510526827e-05, + "loss": 0.3778, + "step": 2300500 + }, + { + "epoch": 1.38, + "learning_rate": 4.638172513970771e-05, + "loss": 0.3866, + "step": 2301000 + }, + { + "epoch": 1.38, + "learning_rate": 4.637962517414714e-05, + "loss": 0.3719, + "step": 2301500 + }, + { + "epoch": 1.38, + "learning_rate": 4.6377525208586576e-05, + "loss": 0.3781, + "step": 2302000 + }, + { + "epoch": 1.38, + "learning_rate": 4.6375425243026016e-05, + "loss": 0.3785, + "step": 2302500 + }, + { + "epoch": 1.38, + "learning_rate": 4.637332527746545e-05, + "loss": 0.3824, + "step": 2303000 + }, + { + "epoch": 1.38, + "learning_rate": 4.637122531190488e-05, + "loss": 0.3773, + "step": 2303500 + }, + { + "epoch": 1.38, + "learning_rate": 4.6369129546275436e-05, + "loss": 0.3772, + "step": 2304000 + }, + { + "epoch": 1.38, + "learning_rate": 4.6367033780646e-05, + "loss": 0.3715, + "step": 2304500 + }, + { + "epoch": 1.38, + "learning_rate": 4.636493381508543e-05, + "loss": 0.3798, + "step": 2305000 + }, + { + "epoch": 1.38, + "learning_rate": 4.6362833849524864e-05, + "loss": 0.3759, + "step": 2305500 + }, + { + "epoch": 1.38, + "learning_rate": 4.6360733883964304e-05, + "loss": 0.3819, + "step": 2306000 + }, + { + "epoch": 1.38, + "learning_rate": 4.635863391840374e-05, + "loss": 0.3759, + "step": 2306500 + }, + { + "epoch": 1.38, + "learning_rate": 4.635653395284317e-05, + "loss": 0.3714, + "step": 2307000 + }, + { + "epoch": 1.38, + "learning_rate": 4.635443398728261e-05, + "loss": 0.3808, + "step": 2307500 + }, + { + "epoch": 1.38, + "learning_rate": 4.6352334021722044e-05, + "loss": 0.368, + "step": 2308000 + }, + { + "epoch": 1.38, + "learning_rate": 4.63502382560926e-05, + "loss": 0.3759, + "step": 2308500 + }, + { + "epoch": 1.38, + "learning_rate": 4.634813829053203e-05, + "loss": 0.373, + "step": 2309000 + }, + { + "epoch": 1.38, + "learning_rate": 4.634603832497147e-05, + "loss": 0.3701, + "step": 2309500 + }, + { + "epoch": 1.38, + "learning_rate": 4.6343938359410905e-05, + "loss": 0.3761, + "step": 2310000 + }, + { + "epoch": 1.39, + "learning_rate": 4.634184259378146e-05, + "loss": 0.3793, + "step": 2310500 + }, + { + "epoch": 1.39, + "learning_rate": 4.633974682815202e-05, + "loss": 0.3739, + "step": 2311000 + }, + { + "epoch": 1.39, + "learning_rate": 4.633764686259145e-05, + "loss": 0.3809, + "step": 2311500 + }, + { + "epoch": 1.39, + "learning_rate": 4.6335546897030886e-05, + "loss": 0.3795, + "step": 2312000 + }, + { + "epoch": 1.39, + "learning_rate": 4.633344693147032e-05, + "loss": 0.3809, + "step": 2312500 + }, + { + "epoch": 1.39, + "learning_rate": 4.633134696590976e-05, + "loss": 0.381, + "step": 2313000 + }, + { + "epoch": 1.39, + "learning_rate": 4.632924700034919e-05, + "loss": 0.3692, + "step": 2313500 + }, + { + "epoch": 1.39, + "learning_rate": 4.632714703478863e-05, + "loss": 0.3791, + "step": 2314000 + }, + { + "epoch": 1.39, + "learning_rate": 4.632504706922807e-05, + "loss": 0.3735, + "step": 2314500 + }, + { + "epoch": 1.39, + "learning_rate": 4.63229471036675e-05, + "loss": 0.3838, + "step": 2315000 + }, + { + "epoch": 1.39, + "learning_rate": 4.6320847138106934e-05, + "loss": 0.3797, + "step": 2315500 + }, + { + "epoch": 1.39, + "learning_rate": 4.6318747172546374e-05, + "loss": 0.3814, + "step": 2316000 + }, + { + "epoch": 1.39, + "learning_rate": 4.631665140691693e-05, + "loss": 0.3833, + "step": 2316500 + }, + { + "epoch": 1.39, + "learning_rate": 4.631455564128748e-05, + "loss": 0.376, + "step": 2317000 + }, + { + "epoch": 1.39, + "learning_rate": 4.6312455675726915e-05, + "loss": 0.3752, + "step": 2317500 + }, + { + "epoch": 1.39, + "learning_rate": 4.631035571016635e-05, + "loss": 0.3762, + "step": 2318000 + }, + { + "epoch": 1.39, + "learning_rate": 4.630825574460579e-05, + "loss": 0.377, + "step": 2318500 + }, + { + "epoch": 1.39, + "learning_rate": 4.630615577904522e-05, + "loss": 0.3795, + "step": 2319000 + }, + { + "epoch": 1.39, + "learning_rate": 4.6304055813484655e-05, + "loss": 0.3706, + "step": 2319500 + }, + { + "epoch": 1.39, + "learning_rate": 4.6301960047855216e-05, + "loss": 0.3789, + "step": 2320000 + }, + { + "epoch": 1.39, + "learning_rate": 4.629986008229465e-05, + "loss": 0.3819, + "step": 2320500 + }, + { + "epoch": 1.39, + "learning_rate": 4.629776011673408e-05, + "loss": 0.3767, + "step": 2321000 + }, + { + "epoch": 1.39, + "learning_rate": 4.629566015117352e-05, + "loss": 0.381, + "step": 2321500 + }, + { + "epoch": 1.39, + "learning_rate": 4.6293560185612956e-05, + "loss": 0.3687, + "step": 2322000 + }, + { + "epoch": 1.39, + "learning_rate": 4.629146022005239e-05, + "loss": 0.3649, + "step": 2322500 + }, + { + "epoch": 1.39, + "learning_rate": 4.628936025449183e-05, + "loss": 0.3782, + "step": 2323000 + }, + { + "epoch": 1.39, + "learning_rate": 4.628726028893126e-05, + "loss": 0.37, + "step": 2323500 + }, + { + "epoch": 1.39, + "learning_rate": 4.62851603233707e-05, + "loss": 0.3851, + "step": 2324000 + }, + { + "epoch": 1.39, + "learning_rate": 4.628306035781013e-05, + "loss": 0.3795, + "step": 2324500 + }, + { + "epoch": 1.39, + "learning_rate": 4.628096459218069e-05, + "loss": 0.3756, + "step": 2325000 + }, + { + "epoch": 1.39, + "learning_rate": 4.6278864626620124e-05, + "loss": 0.3737, + "step": 2325500 + }, + { + "epoch": 1.39, + "learning_rate": 4.627676466105956e-05, + "loss": 0.3736, + "step": 2326000 + }, + { + "epoch": 1.39, + "learning_rate": 4.6274664695499e-05, + "loss": 0.3805, + "step": 2326500 + }, + { + "epoch": 1.4, + "learning_rate": 4.6272564729938424e-05, + "loss": 0.3666, + "step": 2327000 + }, + { + "epoch": 1.4, + "learning_rate": 4.627046476437786e-05, + "loss": 0.3725, + "step": 2327500 + }, + { + "epoch": 1.4, + "learning_rate": 4.62683647988173e-05, + "loss": 0.3868, + "step": 2328000 + }, + { + "epoch": 1.4, + "learning_rate": 4.626626483325673e-05, + "loss": 0.3879, + "step": 2328500 + }, + { + "epoch": 1.4, + "learning_rate": 4.626416906762729e-05, + "loss": 0.3778, + "step": 2329000 + }, + { + "epoch": 1.4, + "learning_rate": 4.6262069102066725e-05, + "loss": 0.3798, + "step": 2329500 + }, + { + "epoch": 1.4, + "learning_rate": 4.6259973336437286e-05, + "loss": 0.3773, + "step": 2330000 + }, + { + "epoch": 1.4, + "learning_rate": 4.625787337087672e-05, + "loss": 0.3769, + "step": 2330500 + }, + { + "epoch": 1.4, + "learning_rate": 4.625577340531615e-05, + "loss": 0.3772, + "step": 2331000 + }, + { + "epoch": 1.4, + "learning_rate": 4.625367343975559e-05, + "loss": 0.3806, + "step": 2331500 + }, + { + "epoch": 1.4, + "learning_rate": 4.6251577674126146e-05, + "loss": 0.3756, + "step": 2332000 + }, + { + "epoch": 1.4, + "learning_rate": 4.624947770856558e-05, + "loss": 0.3784, + "step": 2332500 + }, + { + "epoch": 1.4, + "learning_rate": 4.6247377743005013e-05, + "loss": 0.3854, + "step": 2333000 + }, + { + "epoch": 1.4, + "learning_rate": 4.6245277777444454e-05, + "loss": 0.3778, + "step": 2333500 + }, + { + "epoch": 1.4, + "learning_rate": 4.624317781188388e-05, + "loss": 0.384, + "step": 2334000 + }, + { + "epoch": 1.4, + "learning_rate": 4.6241077846323314e-05, + "loss": 0.3766, + "step": 2334500 + }, + { + "epoch": 1.4, + "learning_rate": 4.623898208069388e-05, + "loss": 0.3799, + "step": 2335000 + }, + { + "epoch": 1.4, + "learning_rate": 4.6236882115133314e-05, + "loss": 0.3668, + "step": 2335500 + }, + { + "epoch": 1.4, + "learning_rate": 4.623478214957275e-05, + "loss": 0.3845, + "step": 2336000 + }, + { + "epoch": 1.4, + "learning_rate": 4.623268218401218e-05, + "loss": 0.3697, + "step": 2336500 + }, + { + "epoch": 1.4, + "learning_rate": 4.6230582218451615e-05, + "loss": 0.3693, + "step": 2337000 + }, + { + "epoch": 1.4, + "learning_rate": 4.622848225289105e-05, + "loss": 0.3766, + "step": 2337500 + }, + { + "epoch": 1.4, + "learning_rate": 4.622638228733049e-05, + "loss": 0.3715, + "step": 2338000 + }, + { + "epoch": 1.4, + "learning_rate": 4.622428232176992e-05, + "loss": 0.381, + "step": 2338500 + }, + { + "epoch": 1.4, + "learning_rate": 4.6222182356209355e-05, + "loss": 0.3769, + "step": 2339000 + }, + { + "epoch": 1.4, + "learning_rate": 4.622008659057991e-05, + "loss": 0.3851, + "step": 2339500 + }, + { + "epoch": 1.4, + "learning_rate": 4.621798662501935e-05, + "loss": 0.3799, + "step": 2340000 + }, + { + "epoch": 1.4, + "learning_rate": 4.621588665945878e-05, + "loss": 0.3822, + "step": 2340500 + }, + { + "epoch": 1.4, + "learning_rate": 4.6213786693898216e-05, + "loss": 0.3767, + "step": 2341000 + }, + { + "epoch": 1.4, + "learning_rate": 4.6211686728337656e-05, + "loss": 0.3789, + "step": 2341500 + }, + { + "epoch": 1.4, + "learning_rate": 4.620958676277709e-05, + "loss": 0.3796, + "step": 2342000 + }, + { + "epoch": 1.4, + "learning_rate": 4.620748679721652e-05, + "loss": 0.3741, + "step": 2342500 + }, + { + "epoch": 1.4, + "learning_rate": 4.6205386831655963e-05, + "loss": 0.3828, + "step": 2343000 + }, + { + "epoch": 1.41, + "learning_rate": 4.620329106602652e-05, + "loss": 0.3774, + "step": 2343500 + }, + { + "epoch": 1.41, + "learning_rate": 4.620119110046595e-05, + "loss": 0.3855, + "step": 2344000 + }, + { + "epoch": 1.41, + "learning_rate": 4.619909113490539e-05, + "loss": 0.3811, + "step": 2344500 + }, + { + "epoch": 1.41, + "learning_rate": 4.6196991169344824e-05, + "loss": 0.3728, + "step": 2345000 + }, + { + "epoch": 1.41, + "learning_rate": 4.619489540371538e-05, + "loss": 0.3779, + "step": 2345500 + }, + { + "epoch": 1.41, + "learning_rate": 4.619279543815481e-05, + "loss": 0.3727, + "step": 2346000 + }, + { + "epoch": 1.41, + "learning_rate": 4.6190699672525365e-05, + "loss": 0.3852, + "step": 2346500 + }, + { + "epoch": 1.41, + "learning_rate": 4.6188599706964805e-05, + "loss": 0.3761, + "step": 2347000 + }, + { + "epoch": 1.41, + "learning_rate": 4.618649974140424e-05, + "loss": 0.3745, + "step": 2347500 + }, + { + "epoch": 1.41, + "learning_rate": 4.618439977584367e-05, + "loss": 0.3683, + "step": 2348000 + }, + { + "epoch": 1.41, + "learning_rate": 4.618229981028311e-05, + "loss": 0.3811, + "step": 2348500 + }, + { + "epoch": 1.41, + "learning_rate": 4.6180199844722546e-05, + "loss": 0.3735, + "step": 2349000 + }, + { + "epoch": 1.41, + "learning_rate": 4.61781040790931e-05, + "loss": 0.3803, + "step": 2349500 + }, + { + "epoch": 1.41, + "learning_rate": 4.617600411353254e-05, + "loss": 0.3794, + "step": 2350000 + }, + { + "epoch": 1.41, + "learning_rate": 4.617390414797197e-05, + "loss": 0.3725, + "step": 2350500 + }, + { + "epoch": 1.41, + "learning_rate": 4.6171804182411406e-05, + "loss": 0.3714, + "step": 2351000 + }, + { + "epoch": 1.41, + "learning_rate": 4.616970421685085e-05, + "loss": 0.3674, + "step": 2351500 + }, + { + "epoch": 1.41, + "learning_rate": 4.616760425129028e-05, + "loss": 0.3729, + "step": 2352000 + }, + { + "epoch": 1.41, + "learning_rate": 4.6165504285729714e-05, + "loss": 0.3726, + "step": 2352500 + }, + { + "epoch": 1.41, + "learning_rate": 4.6163404320169154e-05, + "loss": 0.3834, + "step": 2353000 + }, + { + "epoch": 1.41, + "learning_rate": 4.616130855453971e-05, + "loss": 0.3924, + "step": 2353500 + }, + { + "epoch": 1.41, + "learning_rate": 4.615920858897914e-05, + "loss": 0.3708, + "step": 2354000 + }, + { + "epoch": 1.41, + "learning_rate": 4.6157108623418574e-05, + "loss": 0.3685, + "step": 2354500 + }, + { + "epoch": 1.41, + "learning_rate": 4.6155008657858015e-05, + "loss": 0.377, + "step": 2355000 + }, + { + "epoch": 1.41, + "learning_rate": 4.615291289222857e-05, + "loss": 0.3686, + "step": 2355500 + }, + { + "epoch": 1.41, + "learning_rate": 4.6150812926668e-05, + "loss": 0.3763, + "step": 2356000 + }, + { + "epoch": 1.41, + "learning_rate": 4.6148712961107435e-05, + "loss": 0.3719, + "step": 2356500 + }, + { + "epoch": 1.41, + "learning_rate": 4.6146612995546875e-05, + "loss": 0.3728, + "step": 2357000 + }, + { + "epoch": 1.41, + "learning_rate": 4.614451722991743e-05, + "loss": 0.3762, + "step": 2357500 + }, + { + "epoch": 1.41, + "learning_rate": 4.614241726435686e-05, + "loss": 0.3649, + "step": 2358000 + }, + { + "epoch": 1.41, + "learning_rate": 4.61403172987963e-05, + "loss": 0.3684, + "step": 2358500 + }, + { + "epoch": 1.41, + "learning_rate": 4.6138217333235736e-05, + "loss": 0.3685, + "step": 2359000 + }, + { + "epoch": 1.41, + "learning_rate": 4.613612156760629e-05, + "loss": 0.3703, + "step": 2359500 + }, + { + "epoch": 1.41, + "learning_rate": 4.613402580197685e-05, + "loss": 0.3864, + "step": 2360000 + }, + { + "epoch": 1.42, + "learning_rate": 4.6131925836416277e-05, + "loss": 0.3838, + "step": 2360500 + }, + { + "epoch": 1.42, + "learning_rate": 4.612982587085572e-05, + "loss": 0.3677, + "step": 2361000 + }, + { + "epoch": 1.42, + "learning_rate": 4.612772590529515e-05, + "loss": 0.3701, + "step": 2361500 + }, + { + "epoch": 1.42, + "learning_rate": 4.6125625939734584e-05, + "loss": 0.3728, + "step": 2362000 + }, + { + "epoch": 1.42, + "learning_rate": 4.6123525974174024e-05, + "loss": 0.3869, + "step": 2362500 + }, + { + "epoch": 1.42, + "learning_rate": 4.612142600861346e-05, + "loss": 0.385, + "step": 2363000 + }, + { + "epoch": 1.42, + "learning_rate": 4.611933024298401e-05, + "loss": 0.3703, + "step": 2363500 + }, + { + "epoch": 1.42, + "learning_rate": 4.611723027742345e-05, + "loss": 0.3759, + "step": 2364000 + }, + { + "epoch": 1.42, + "learning_rate": 4.6115130311862885e-05, + "loss": 0.3699, + "step": 2364500 + }, + { + "epoch": 1.42, + "learning_rate": 4.611303034630232e-05, + "loss": 0.3802, + "step": 2365000 + }, + { + "epoch": 1.42, + "learning_rate": 4.611093038074176e-05, + "loss": 0.3807, + "step": 2365500 + }, + { + "epoch": 1.42, + "learning_rate": 4.610883041518119e-05, + "loss": 0.3767, + "step": 2366000 + }, + { + "epoch": 1.42, + "learning_rate": 4.6106734649551745e-05, + "loss": 0.3722, + "step": 2366500 + }, + { + "epoch": 1.42, + "learning_rate": 4.610463468399118e-05, + "loss": 0.3811, + "step": 2367000 + }, + { + "epoch": 1.42, + "learning_rate": 4.610253471843062e-05, + "loss": 0.3752, + "step": 2367500 + }, + { + "epoch": 1.42, + "learning_rate": 4.610043475287005e-05, + "loss": 0.3788, + "step": 2368000 + }, + { + "epoch": 1.42, + "learning_rate": 4.6098338987240606e-05, + "loss": 0.3752, + "step": 2368500 + }, + { + "epoch": 1.42, + "learning_rate": 4.609623902168004e-05, + "loss": 0.368, + "step": 2369000 + }, + { + "epoch": 1.42, + "learning_rate": 4.609413905611948e-05, + "loss": 0.3734, + "step": 2369500 + }, + { + "epoch": 1.42, + "learning_rate": 4.609203909055891e-05, + "loss": 0.3715, + "step": 2370000 + }, + { + "epoch": 1.42, + "learning_rate": 4.608993912499835e-05, + "loss": 0.3803, + "step": 2370500 + }, + { + "epoch": 1.42, + "learning_rate": 4.608783915943779e-05, + "loss": 0.3797, + "step": 2371000 + }, + { + "epoch": 1.42, + "learning_rate": 4.608574339380834e-05, + "loss": 0.3817, + "step": 2371500 + }, + { + "epoch": 1.42, + "learning_rate": 4.6083643428247774e-05, + "loss": 0.374, + "step": 2372000 + }, + { + "epoch": 1.42, + "learning_rate": 4.6081543462687214e-05, + "loss": 0.3852, + "step": 2372500 + }, + { + "epoch": 1.42, + "learning_rate": 4.607944349712665e-05, + "loss": 0.3706, + "step": 2373000 + }, + { + "epoch": 1.42, + "learning_rate": 4.607734353156608e-05, + "loss": 0.3845, + "step": 2373500 + }, + { + "epoch": 1.42, + "learning_rate": 4.6075247765936635e-05, + "loss": 0.3783, + "step": 2374000 + }, + { + "epoch": 1.42, + "learning_rate": 4.6073147800376075e-05, + "loss": 0.3811, + "step": 2374500 + }, + { + "epoch": 1.42, + "learning_rate": 4.607104783481551e-05, + "loss": 0.3888, + "step": 2375000 + }, + { + "epoch": 1.42, + "learning_rate": 4.606894786925494e-05, + "loss": 0.3749, + "step": 2375500 + }, + { + "epoch": 1.42, + "learning_rate": 4.6066852103625495e-05, + "loss": 0.3798, + "step": 2376000 + }, + { + "epoch": 1.42, + "learning_rate": 4.6064752138064936e-05, + "loss": 0.3672, + "step": 2376500 + }, + { + "epoch": 1.43, + "learning_rate": 4.606265217250437e-05, + "loss": 0.3832, + "step": 2377000 + }, + { + "epoch": 1.43, + "learning_rate": 4.60605522069438e-05, + "loss": 0.3724, + "step": 2377500 + }, + { + "epoch": 1.43, + "learning_rate": 4.605845224138324e-05, + "loss": 0.3619, + "step": 2378000 + }, + { + "epoch": 1.43, + "learning_rate": 4.6056352275822676e-05, + "loss": 0.3763, + "step": 2378500 + }, + { + "epoch": 1.43, + "learning_rate": 4.605425231026211e-05, + "loss": 0.3775, + "step": 2379000 + }, + { + "epoch": 1.43, + "learning_rate": 4.605215234470155e-05, + "loss": 0.3822, + "step": 2379500 + }, + { + "epoch": 1.43, + "learning_rate": 4.6050056579072104e-05, + "loss": 0.3796, + "step": 2380000 + }, + { + "epoch": 1.43, + "learning_rate": 4.604795661351154e-05, + "loss": 0.3755, + "step": 2380500 + }, + { + "epoch": 1.43, + "learning_rate": 4.604586084788209e-05, + "loss": 0.3783, + "step": 2381000 + }, + { + "epoch": 1.43, + "learning_rate": 4.604376088232153e-05, + "loss": 0.3732, + "step": 2381500 + }, + { + "epoch": 1.43, + "learning_rate": 4.6041660916760964e-05, + "loss": 0.3743, + "step": 2382000 + }, + { + "epoch": 1.43, + "learning_rate": 4.60395609512004e-05, + "loss": 0.3695, + "step": 2382500 + }, + { + "epoch": 1.43, + "learning_rate": 4.603746098563984e-05, + "loss": 0.3755, + "step": 2383000 + }, + { + "epoch": 1.43, + "learning_rate": 4.603536102007927e-05, + "loss": 0.3702, + "step": 2383500 + }, + { + "epoch": 1.43, + "learning_rate": 4.6033261054518705e-05, + "loss": 0.3757, + "step": 2384000 + }, + { + "epoch": 1.43, + "learning_rate": 4.603116528888926e-05, + "loss": 0.3801, + "step": 2384500 + }, + { + "epoch": 1.43, + "learning_rate": 4.60290653233287e-05, + "loss": 0.3708, + "step": 2385000 + }, + { + "epoch": 1.43, + "learning_rate": 4.602696535776813e-05, + "loss": 0.3767, + "step": 2385500 + }, + { + "epoch": 1.43, + "learning_rate": 4.6024865392207566e-05, + "loss": 0.3725, + "step": 2386000 + }, + { + "epoch": 1.43, + "learning_rate": 4.6022765426647006e-05, + "loss": 0.3767, + "step": 2386500 + }, + { + "epoch": 1.43, + "learning_rate": 4.602066546108644e-05, + "loss": 0.3728, + "step": 2387000 + }, + { + "epoch": 1.43, + "learning_rate": 4.601856549552587e-05, + "loss": 0.3682, + "step": 2387500 + }, + { + "epoch": 1.43, + "learning_rate": 4.6016465529965306e-05, + "loss": 0.3769, + "step": 2388000 + }, + { + "epoch": 1.43, + "learning_rate": 4.601436976433587e-05, + "loss": 0.3774, + "step": 2388500 + }, + { + "epoch": 1.43, + "learning_rate": 4.60122697987753e-05, + "loss": 0.3742, + "step": 2389000 + }, + { + "epoch": 1.43, + "learning_rate": 4.6010169833214734e-05, + "loss": 0.3804, + "step": 2389500 + }, + { + "epoch": 1.43, + "learning_rate": 4.600806986765417e-05, + "loss": 0.3785, + "step": 2390000 + }, + { + "epoch": 1.43, + "learning_rate": 4.60059699020936e-05, + "loss": 0.3702, + "step": 2390500 + }, + { + "epoch": 1.43, + "learning_rate": 4.600386993653304e-05, + "loss": 0.3784, + "step": 2391000 + }, + { + "epoch": 1.43, + "learning_rate": 4.60017741709036e-05, + "loss": 0.3719, + "step": 2391500 + }, + { + "epoch": 1.43, + "learning_rate": 4.599967420534303e-05, + "loss": 0.3958, + "step": 2392000 + }, + { + "epoch": 1.43, + "learning_rate": 4.599757423978246e-05, + "loss": 0.3765, + "step": 2392500 + }, + { + "epoch": 1.43, + "learning_rate": 4.59954742742219e-05, + "loss": 0.3765, + "step": 2393000 + }, + { + "epoch": 1.44, + "learning_rate": 4.5993374308661335e-05, + "loss": 0.3809, + "step": 2393500 + }, + { + "epoch": 1.44, + "learning_rate": 4.5991274343100775e-05, + "loss": 0.3735, + "step": 2394000 + }, + { + "epoch": 1.44, + "learning_rate": 4.598917857747133e-05, + "loss": 0.3684, + "step": 2394500 + }, + { + "epoch": 1.44, + "learning_rate": 4.598707861191076e-05, + "loss": 0.3748, + "step": 2395000 + }, + { + "epoch": 1.44, + "learning_rate": 4.5984978646350196e-05, + "loss": 0.3826, + "step": 2395500 + }, + { + "epoch": 1.44, + "learning_rate": 4.5982878680789636e-05, + "loss": 0.3766, + "step": 2396000 + }, + { + "epoch": 1.44, + "learning_rate": 4.5980782915160196e-05, + "loss": 0.3801, + "step": 2396500 + }, + { + "epoch": 1.44, + "learning_rate": 4.597868294959962e-05, + "loss": 0.3804, + "step": 2397000 + }, + { + "epoch": 1.44, + "learning_rate": 4.5976582984039056e-05, + "loss": 0.3714, + "step": 2397500 + }, + { + "epoch": 1.44, + "learning_rate": 4.5974483018478497e-05, + "loss": 0.3776, + "step": 2398000 + }, + { + "epoch": 1.44, + "learning_rate": 4.597238305291793e-05, + "loss": 0.3815, + "step": 2398500 + }, + { + "epoch": 1.44, + "learning_rate": 4.5970283087357363e-05, + "loss": 0.3768, + "step": 2399000 + }, + { + "epoch": 1.44, + "learning_rate": 4.5968183121796804e-05, + "loss": 0.365, + "step": 2399500 + }, + { + "epoch": 1.44, + "learning_rate": 4.596608735616736e-05, + "loss": 0.3779, + "step": 2400000 + }, + { + "epoch": 1.44, + "eval_loss": 0.3611328899860382, + "eval_runtime": 1123.5144, + "eval_samples_per_second": 468.815, + "eval_steps_per_second": 78.136, + "step": 2400000 + }, + { + "epoch": 1.44, + "learning_rate": 4.596398739060679e-05, + "loss": 0.3885, + "step": 2400500 + }, + { + "epoch": 1.44, + "learning_rate": 4.596188742504623e-05, + "loss": 0.3818, + "step": 2401000 + }, + { + "epoch": 1.44, + "learning_rate": 4.5959787459485664e-05, + "loss": 0.3791, + "step": 2401500 + }, + { + "epoch": 1.44, + "learning_rate": 4.595769169385622e-05, + "loss": 0.3697, + "step": 2402000 + }, + { + "epoch": 1.44, + "learning_rate": 4.595559172829565e-05, + "loss": 0.3775, + "step": 2402500 + }, + { + "epoch": 1.44, + "learning_rate": 4.595349176273509e-05, + "loss": 0.3852, + "step": 2403000 + }, + { + "epoch": 1.44, + "learning_rate": 4.5951391797174525e-05, + "loss": 0.3859, + "step": 2403500 + }, + { + "epoch": 1.44, + "learning_rate": 4.594929603154508e-05, + "loss": 0.373, + "step": 2404000 + }, + { + "epoch": 1.44, + "learning_rate": 4.594719606598451e-05, + "loss": 0.3805, + "step": 2404500 + }, + { + "epoch": 1.44, + "learning_rate": 4.594509610042395e-05, + "loss": 0.3867, + "step": 2405000 + }, + { + "epoch": 1.44, + "learning_rate": 4.5942996134863386e-05, + "loss": 0.3654, + "step": 2405500 + }, + { + "epoch": 1.44, + "learning_rate": 4.594089616930282e-05, + "loss": 0.3867, + "step": 2406000 + }, + { + "epoch": 1.44, + "learning_rate": 4.593879620374226e-05, + "loss": 0.3748, + "step": 2406500 + }, + { + "epoch": 1.44, + "learning_rate": 4.593669623818169e-05, + "loss": 0.3799, + "step": 2407000 + }, + { + "epoch": 1.44, + "learning_rate": 4.5934596272621127e-05, + "loss": 0.3745, + "step": 2407500 + }, + { + "epoch": 1.44, + "learning_rate": 4.593250050699169e-05, + "loss": 0.3772, + "step": 2408000 + }, + { + "epoch": 1.44, + "learning_rate": 4.593040054143112e-05, + "loss": 0.3689, + "step": 2408500 + }, + { + "epoch": 1.44, + "learning_rate": 4.5928300575870554e-05, + "loss": 0.3726, + "step": 2409000 + }, + { + "epoch": 1.44, + "learning_rate": 4.5926200610309994e-05, + "loss": 0.372, + "step": 2409500 + }, + { + "epoch": 1.44, + "learning_rate": 4.592410064474943e-05, + "loss": 0.3816, + "step": 2410000 + }, + { + "epoch": 1.45, + "learning_rate": 4.592200067918886e-05, + "loss": 0.3767, + "step": 2410500 + }, + { + "epoch": 1.45, + "learning_rate": 4.5919904913559415e-05, + "loss": 0.3733, + "step": 2411000 + }, + { + "epoch": 1.45, + "learning_rate": 4.5917804947998855e-05, + "loss": 0.3792, + "step": 2411500 + }, + { + "epoch": 1.45, + "learning_rate": 4.591570498243829e-05, + "loss": 0.3719, + "step": 2412000 + }, + { + "epoch": 1.45, + "learning_rate": 4.591360501687772e-05, + "loss": 0.3757, + "step": 2412500 + }, + { + "epoch": 1.45, + "learning_rate": 4.5911509251248275e-05, + "loss": 0.3629, + "step": 2413000 + }, + { + "epoch": 1.45, + "learning_rate": 4.5909409285687715e-05, + "loss": 0.3827, + "step": 2413500 + }, + { + "epoch": 1.45, + "learning_rate": 4.590730932012715e-05, + "loss": 0.3743, + "step": 2414000 + }, + { + "epoch": 1.45, + "learning_rate": 4.590520935456658e-05, + "loss": 0.3722, + "step": 2414500 + }, + { + "epoch": 1.45, + "learning_rate": 4.590310938900602e-05, + "loss": 0.3715, + "step": 2415000 + }, + { + "epoch": 1.45, + "learning_rate": 4.5901009423445456e-05, + "loss": 0.3833, + "step": 2415500 + }, + { + "epoch": 1.45, + "learning_rate": 4.589890945788489e-05, + "loss": 0.372, + "step": 2416000 + }, + { + "epoch": 1.45, + "learning_rate": 4.589680949232432e-05, + "loss": 0.378, + "step": 2416500 + }, + { + "epoch": 1.45, + "learning_rate": 4.589471372669488e-05, + "loss": 0.3835, + "step": 2417000 + }, + { + "epoch": 1.45, + "learning_rate": 4.589261376113432e-05, + "loss": 0.3761, + "step": 2417500 + }, + { + "epoch": 1.45, + "learning_rate": 4.589051379557376e-05, + "loss": 0.3729, + "step": 2418000 + }, + { + "epoch": 1.45, + "learning_rate": 4.588841383001319e-05, + "loss": 0.3716, + "step": 2418500 + }, + { + "epoch": 1.45, + "learning_rate": 4.588631386445262e-05, + "loss": 0.3785, + "step": 2419000 + }, + { + "epoch": 1.45, + "learning_rate": 4.588421389889206e-05, + "loss": 0.3836, + "step": 2419500 + }, + { + "epoch": 1.45, + "learning_rate": 4.588211393333149e-05, + "loss": 0.3837, + "step": 2420000 + }, + { + "epoch": 1.45, + "learning_rate": 4.5880013967770924e-05, + "loss": 0.3707, + "step": 2420500 + }, + { + "epoch": 1.45, + "learning_rate": 4.5877918202141485e-05, + "loss": 0.3638, + "step": 2421000 + }, + { + "epoch": 1.45, + "learning_rate": 4.587582243651204e-05, + "loss": 0.3669, + "step": 2421500 + }, + { + "epoch": 1.45, + "learning_rate": 4.587372247095148e-05, + "loss": 0.383, + "step": 2422000 + }, + { + "epoch": 1.45, + "learning_rate": 4.587162250539091e-05, + "loss": 0.3692, + "step": 2422500 + }, + { + "epoch": 1.45, + "learning_rate": 4.5869522539830345e-05, + "loss": 0.3754, + "step": 2423000 + }, + { + "epoch": 1.45, + "learning_rate": 4.5867422574269786e-05, + "loss": 0.3693, + "step": 2423500 + }, + { + "epoch": 1.45, + "learning_rate": 4.586532260870921e-05, + "loss": 0.3652, + "step": 2424000 + }, + { + "epoch": 1.45, + "learning_rate": 4.586322264314865e-05, + "loss": 0.3701, + "step": 2424500 + }, + { + "epoch": 1.45, + "learning_rate": 4.586112687751921e-05, + "loss": 0.3745, + "step": 2425000 + }, + { + "epoch": 1.45, + "learning_rate": 4.5859026911958646e-05, + "loss": 0.3815, + "step": 2425500 + }, + { + "epoch": 1.45, + "learning_rate": 4.585692694639807e-05, + "loss": 0.3765, + "step": 2426000 + }, + { + "epoch": 1.45, + "learning_rate": 4.585482698083751e-05, + "loss": 0.3766, + "step": 2426500 + }, + { + "epoch": 1.46, + "learning_rate": 4.585272701527695e-05, + "loss": 0.3781, + "step": 2427000 + }, + { + "epoch": 1.46, + "learning_rate": 4.585063124964751e-05, + "loss": 0.3748, + "step": 2427500 + }, + { + "epoch": 1.46, + "learning_rate": 4.584853128408694e-05, + "loss": 0.3913, + "step": 2428000 + }, + { + "epoch": 1.46, + "learning_rate": 4.5846431318526374e-05, + "loss": 0.3714, + "step": 2428500 + }, + { + "epoch": 1.46, + "learning_rate": 4.584433135296581e-05, + "loss": 0.3786, + "step": 2429000 + }, + { + "epoch": 1.46, + "learning_rate": 4.584223138740524e-05, + "loss": 0.3782, + "step": 2429500 + }, + { + "epoch": 1.46, + "learning_rate": 4.584013142184468e-05, + "loss": 0.3755, + "step": 2430000 + }, + { + "epoch": 1.46, + "learning_rate": 4.5838031456284115e-05, + "loss": 0.3716, + "step": 2430500 + }, + { + "epoch": 1.46, + "learning_rate": 4.583593149072355e-05, + "loss": 0.3731, + "step": 2431000 + }, + { + "epoch": 1.46, + "learning_rate": 4.583383572509411e-05, + "loss": 0.3659, + "step": 2431500 + }, + { + "epoch": 1.46, + "learning_rate": 4.583173575953354e-05, + "loss": 0.3701, + "step": 2432000 + }, + { + "epoch": 1.46, + "learning_rate": 4.5829635793972975e-05, + "loss": 0.3727, + "step": 2432500 + }, + { + "epoch": 1.46, + "learning_rate": 4.5827540028343536e-05, + "loss": 0.3871, + "step": 2433000 + }, + { + "epoch": 1.46, + "learning_rate": 4.582544006278297e-05, + "loss": 0.3674, + "step": 2433500 + }, + { + "epoch": 1.46, + "learning_rate": 4.58233400972224e-05, + "loss": 0.3808, + "step": 2434000 + }, + { + "epoch": 1.46, + "learning_rate": 4.5821240131661836e-05, + "loss": 0.374, + "step": 2434500 + }, + { + "epoch": 1.46, + "learning_rate": 4.5819140166101276e-05, + "loss": 0.376, + "step": 2435000 + }, + { + "epoch": 1.46, + "learning_rate": 4.581704020054071e-05, + "loss": 0.3691, + "step": 2435500 + }, + { + "epoch": 1.46, + "learning_rate": 4.581494023498014e-05, + "loss": 0.3723, + "step": 2436000 + }, + { + "epoch": 1.46, + "learning_rate": 4.58128444693507e-05, + "loss": 0.3722, + "step": 2436500 + }, + { + "epoch": 1.46, + "learning_rate": 4.581074450379014e-05, + "loss": 0.3839, + "step": 2437000 + }, + { + "epoch": 1.46, + "learning_rate": 4.580864453822957e-05, + "loss": 0.3722, + "step": 2437500 + }, + { + "epoch": 1.46, + "learning_rate": 4.5806544572669004e-05, + "loss": 0.3801, + "step": 2438000 + }, + { + "epoch": 1.46, + "learning_rate": 4.5804444607108444e-05, + "loss": 0.3708, + "step": 2438500 + }, + { + "epoch": 1.46, + "learning_rate": 4.580234464154788e-05, + "loss": 0.3756, + "step": 2439000 + }, + { + "epoch": 1.46, + "learning_rate": 4.580024467598731e-05, + "loss": 0.3863, + "step": 2439500 + }, + { + "epoch": 1.46, + "learning_rate": 4.579814891035787e-05, + "loss": 0.3638, + "step": 2440000 + }, + { + "epoch": 1.46, + "learning_rate": 4.5796048944797305e-05, + "loss": 0.3745, + "step": 2440500 + }, + { + "epoch": 1.46, + "learning_rate": 4.579395317916786e-05, + "loss": 0.3825, + "step": 2441000 + }, + { + "epoch": 1.46, + "learning_rate": 4.579185321360729e-05, + "loss": 0.3678, + "step": 2441500 + }, + { + "epoch": 1.46, + "learning_rate": 4.578975324804673e-05, + "loss": 0.3824, + "step": 2442000 + }, + { + "epoch": 1.46, + "learning_rate": 4.5787653282486166e-05, + "loss": 0.3624, + "step": 2442500 + }, + { + "epoch": 1.46, + "learning_rate": 4.57855533169256e-05, + "loss": 0.3696, + "step": 2443000 + }, + { + "epoch": 1.46, + "learning_rate": 4.578345335136504e-05, + "loss": 0.3783, + "step": 2443500 + }, + { + "epoch": 1.47, + "learning_rate": 4.578135338580447e-05, + "loss": 0.3729, + "step": 2444000 + }, + { + "epoch": 1.47, + "learning_rate": 4.5779253420243906e-05, + "loss": 0.3705, + "step": 2444500 + }, + { + "epoch": 1.47, + "learning_rate": 4.5777153454683347e-05, + "loss": 0.3801, + "step": 2445000 + }, + { + "epoch": 1.47, + "learning_rate": 4.577505348912278e-05, + "loss": 0.3789, + "step": 2445500 + }, + { + "epoch": 1.47, + "learning_rate": 4.577295352356221e-05, + "loss": 0.3779, + "step": 2446000 + }, + { + "epoch": 1.47, + "learning_rate": 4.577085355800165e-05, + "loss": 0.375, + "step": 2446500 + }, + { + "epoch": 1.47, + "learning_rate": 4.576875779237221e-05, + "loss": 0.3703, + "step": 2447000 + }, + { + "epoch": 1.47, + "learning_rate": 4.576666202674276e-05, + "loss": 0.3788, + "step": 2447500 + }, + { + "epoch": 1.47, + "learning_rate": 4.5764562061182194e-05, + "loss": 0.3805, + "step": 2448000 + }, + { + "epoch": 1.47, + "learning_rate": 4.5762462095621635e-05, + "loss": 0.3698, + "step": 2448500 + }, + { + "epoch": 1.47, + "learning_rate": 4.576036213006107e-05, + "loss": 0.3817, + "step": 2449000 + }, + { + "epoch": 1.47, + "learning_rate": 4.57582621645005e-05, + "loss": 0.3744, + "step": 2449500 + }, + { + "epoch": 1.47, + "learning_rate": 4.575616219893994e-05, + "loss": 0.3743, + "step": 2450000 + }, + { + "epoch": 1.47, + "learning_rate": 4.5754062233379375e-05, + "loss": 0.3727, + "step": 2450500 + }, + { + "epoch": 1.47, + "learning_rate": 4.57519622678188e-05, + "loss": 0.3732, + "step": 2451000 + }, + { + "epoch": 1.47, + "learning_rate": 4.574986650218936e-05, + "loss": 0.378, + "step": 2451500 + }, + { + "epoch": 1.47, + "learning_rate": 4.574777493649104e-05, + "loss": 0.3808, + "step": 2452000 + }, + { + "epoch": 1.47, + "learning_rate": 4.5745674970930476e-05, + "loss": 0.3746, + "step": 2452500 + }, + { + "epoch": 1.47, + "learning_rate": 4.574357500536991e-05, + "loss": 0.3781, + "step": 2453000 + }, + { + "epoch": 1.47, + "learning_rate": 4.574147503980934e-05, + "loss": 0.3776, + "step": 2453500 + }, + { + "epoch": 1.47, + "learning_rate": 4.573937507424878e-05, + "loss": 0.3795, + "step": 2454000 + }, + { + "epoch": 1.47, + "learning_rate": 4.573727510868822e-05, + "loss": 0.3825, + "step": 2454500 + }, + { + "epoch": 1.47, + "learning_rate": 4.573517514312765e-05, + "loss": 0.3811, + "step": 2455000 + }, + { + "epoch": 1.47, + "learning_rate": 4.573307517756709e-05, + "loss": 0.3819, + "step": 2455500 + }, + { + "epoch": 1.47, + "learning_rate": 4.5730975212006524e-05, + "loss": 0.3831, + "step": 2456000 + }, + { + "epoch": 1.47, + "learning_rate": 4.572887524644596e-05, + "loss": 0.3795, + "step": 2456500 + }, + { + "epoch": 1.47, + "learning_rate": 4.57267752808854e-05, + "loss": 0.3689, + "step": 2457000 + }, + { + "epoch": 1.47, + "learning_rate": 4.572467951525595e-05, + "loss": 0.3724, + "step": 2457500 + }, + { + "epoch": 1.47, + "learning_rate": 4.5722579549695385e-05, + "loss": 0.3648, + "step": 2458000 + }, + { + "epoch": 1.47, + "learning_rate": 4.572047958413482e-05, + "loss": 0.3762, + "step": 2458500 + }, + { + "epoch": 1.47, + "learning_rate": 4.571837961857426e-05, + "loss": 0.3793, + "step": 2459000 + }, + { + "epoch": 1.47, + "learning_rate": 4.571627965301369e-05, + "loss": 0.3811, + "step": 2459500 + }, + { + "epoch": 1.47, + "learning_rate": 4.5714179687453125e-05, + "loss": 0.3821, + "step": 2460000 + }, + { + "epoch": 1.48, + "learning_rate": 4.571207972189256e-05, + "loss": 0.3771, + "step": 2460500 + }, + { + "epoch": 1.48, + "learning_rate": 4.570998395626312e-05, + "loss": 0.3827, + "step": 2461000 + }, + { + "epoch": 1.48, + "learning_rate": 4.570788399070255e-05, + "loss": 0.3853, + "step": 2461500 + }, + { + "epoch": 1.48, + "learning_rate": 4.570578402514199e-05, + "loss": 0.376, + "step": 2462000 + }, + { + "epoch": 1.48, + "learning_rate": 4.570368405958142e-05, + "loss": 0.3655, + "step": 2462500 + }, + { + "epoch": 1.48, + "learning_rate": 4.570158409402085e-05, + "loss": 0.3741, + "step": 2463000 + }, + { + "epoch": 1.48, + "learning_rate": 4.569948832839141e-05, + "loss": 0.3754, + "step": 2463500 + }, + { + "epoch": 1.48, + "learning_rate": 4.5697388362830853e-05, + "loss": 0.3761, + "step": 2464000 + }, + { + "epoch": 1.48, + "learning_rate": 4.569528839727029e-05, + "loss": 0.3776, + "step": 2464500 + }, + { + "epoch": 1.48, + "learning_rate": 4.5693188431709714e-05, + "loss": 0.3612, + "step": 2465000 + }, + { + "epoch": 1.48, + "learning_rate": 4.5691088466149154e-05, + "loss": 0.371, + "step": 2465500 + }, + { + "epoch": 1.48, + "learning_rate": 4.568898850058859e-05, + "loss": 0.3716, + "step": 2466000 + }, + { + "epoch": 1.48, + "learning_rate": 4.568688853502802e-05, + "loss": 0.3732, + "step": 2466500 + }, + { + "epoch": 1.48, + "learning_rate": 4.568478856946746e-05, + "loss": 0.379, + "step": 2467000 + }, + { + "epoch": 1.48, + "learning_rate": 4.5682692803838015e-05, + "loss": 0.3749, + "step": 2467500 + }, + { + "epoch": 1.48, + "learning_rate": 4.568059283827745e-05, + "loss": 0.3764, + "step": 2468000 + }, + { + "epoch": 1.48, + "learning_rate": 4.567849707264801e-05, + "loss": 0.3827, + "step": 2468500 + }, + { + "epoch": 1.48, + "learning_rate": 4.567639710708745e-05, + "loss": 0.3728, + "step": 2469000 + }, + { + "epoch": 1.48, + "learning_rate": 4.567429714152688e-05, + "loss": 0.3799, + "step": 2469500 + }, + { + "epoch": 1.48, + "learning_rate": 4.5672201375897436e-05, + "loss": 0.3853, + "step": 2470000 + }, + { + "epoch": 1.48, + "learning_rate": 4.567010141033687e-05, + "loss": 0.3929, + "step": 2470500 + }, + { + "epoch": 1.48, + "learning_rate": 4.566800144477631e-05, + "loss": 0.3737, + "step": 2471000 + }, + { + "epoch": 1.48, + "learning_rate": 4.566590147921574e-05, + "loss": 0.3766, + "step": 2471500 + }, + { + "epoch": 1.48, + "learning_rate": 4.5663801513655176e-05, + "loss": 0.3791, + "step": 2472000 + }, + { + "epoch": 1.48, + "learning_rate": 4.566170154809461e-05, + "loss": 0.3747, + "step": 2472500 + }, + { + "epoch": 1.48, + "learning_rate": 4.565960158253404e-05, + "loss": 0.3875, + "step": 2473000 + }, + { + "epoch": 1.48, + "learning_rate": 4.5657505816904604e-05, + "loss": 0.3848, + "step": 2473500 + }, + { + "epoch": 1.48, + "learning_rate": 4.565540585134404e-05, + "loss": 0.3748, + "step": 2474000 + }, + { + "epoch": 1.48, + "learning_rate": 4.565330588578347e-05, + "loss": 0.3876, + "step": 2474500 + }, + { + "epoch": 1.48, + "learning_rate": 4.5651205920222904e-05, + "loss": 0.361, + "step": 2475000 + }, + { + "epoch": 1.48, + "learning_rate": 4.5649105954662344e-05, + "loss": 0.3833, + "step": 2475500 + }, + { + "epoch": 1.48, + "learning_rate": 4.564700598910178e-05, + "loss": 0.3717, + "step": 2476000 + }, + { + "epoch": 1.48, + "learning_rate": 4.564490602354121e-05, + "loss": 0.3714, + "step": 2476500 + }, + { + "epoch": 1.49, + "learning_rate": 4.564280605798065e-05, + "loss": 0.3602, + "step": 2477000 + }, + { + "epoch": 1.49, + "learning_rate": 4.5640706092420085e-05, + "loss": 0.3673, + "step": 2477500 + }, + { + "epoch": 1.49, + "learning_rate": 4.563860612685952e-05, + "loss": 0.3694, + "step": 2478000 + }, + { + "epoch": 1.49, + "learning_rate": 4.563650616129896e-05, + "loss": 0.373, + "step": 2478500 + }, + { + "epoch": 1.49, + "learning_rate": 4.563440619573839e-05, + "loss": 0.3738, + "step": 2479000 + }, + { + "epoch": 1.49, + "learning_rate": 4.5632310430108945e-05, + "loss": 0.3735, + "step": 2479500 + }, + { + "epoch": 1.49, + "learning_rate": 4.563021046454838e-05, + "loss": 0.3781, + "step": 2480000 + }, + { + "epoch": 1.49, + "learning_rate": 4.562811049898782e-05, + "loss": 0.3787, + "step": 2480500 + }, + { + "epoch": 1.49, + "learning_rate": 4.562601053342725e-05, + "loss": 0.3784, + "step": 2481000 + }, + { + "epoch": 1.49, + "learning_rate": 4.5623910567866686e-05, + "loss": 0.3751, + "step": 2481500 + }, + { + "epoch": 1.49, + "learning_rate": 4.5621810602306126e-05, + "loss": 0.3609, + "step": 2482000 + }, + { + "epoch": 1.49, + "learning_rate": 4.561971063674555e-05, + "loss": 0.3699, + "step": 2482500 + }, + { + "epoch": 1.49, + "learning_rate": 4.561761487111611e-05, + "loss": 0.376, + "step": 2483000 + }, + { + "epoch": 1.49, + "learning_rate": 4.561551490555555e-05, + "loss": 0.3711, + "step": 2483500 + }, + { + "epoch": 1.49, + "learning_rate": 4.561341493999499e-05, + "loss": 0.3786, + "step": 2484000 + }, + { + "epoch": 1.49, + "learning_rate": 4.561131497443442e-05, + "loss": 0.3674, + "step": 2484500 + }, + { + "epoch": 1.49, + "learning_rate": 4.5609215008873854e-05, + "loss": 0.3757, + "step": 2485000 + }, + { + "epoch": 1.49, + "learning_rate": 4.560711504331329e-05, + "loss": 0.3707, + "step": 2485500 + }, + { + "epoch": 1.49, + "learning_rate": 4.560501927768385e-05, + "loss": 0.3698, + "step": 2486000 + }, + { + "epoch": 1.49, + "learning_rate": 4.560291931212328e-05, + "loss": 0.3767, + "step": 2486500 + }, + { + "epoch": 1.49, + "learning_rate": 4.560081934656272e-05, + "loss": 0.3774, + "step": 2487000 + }, + { + "epoch": 1.49, + "learning_rate": 4.559871938100215e-05, + "loss": 0.3761, + "step": 2487500 + }, + { + "epoch": 1.49, + "learning_rate": 4.559662361537271e-05, + "loss": 0.3703, + "step": 2488000 + }, + { + "epoch": 1.49, + "learning_rate": 4.559452784974326e-05, + "loss": 0.3709, + "step": 2488500 + }, + { + "epoch": 1.49, + "learning_rate": 4.5592427884182696e-05, + "loss": 0.3854, + "step": 2489000 + }, + { + "epoch": 1.49, + "learning_rate": 4.5590327918622136e-05, + "loss": 0.3776, + "step": 2489500 + }, + { + "epoch": 1.49, + "learning_rate": 4.558822795306157e-05, + "loss": 0.3741, + "step": 2490000 + }, + { + "epoch": 1.49, + "learning_rate": 4.5586127987501e-05, + "loss": 0.3803, + "step": 2490500 + }, + { + "epoch": 1.49, + "learning_rate": 4.558402802194044e-05, + "loss": 0.3597, + "step": 2491000 + }, + { + "epoch": 1.49, + "learning_rate": 4.5581928056379876e-05, + "loss": 0.3805, + "step": 2491500 + }, + { + "epoch": 1.49, + "learning_rate": 4.557982809081931e-05, + "loss": 0.3714, + "step": 2492000 + }, + { + "epoch": 1.49, + "learning_rate": 4.557772812525874e-05, + "loss": 0.3802, + "step": 2492500 + }, + { + "epoch": 1.49, + "learning_rate": 4.557562815969818e-05, + "loss": 0.3693, + "step": 2493000 + }, + { + "epoch": 1.49, + "learning_rate": 4.557352819413762e-05, + "loss": 0.3766, + "step": 2493500 + }, + { + "epoch": 1.5, + "learning_rate": 4.557142822857705e-05, + "loss": 0.3819, + "step": 2494000 + }, + { + "epoch": 1.5, + "learning_rate": 4.5569332462947604e-05, + "loss": 0.3753, + "step": 2494500 + }, + { + "epoch": 1.5, + "learning_rate": 4.556723249738704e-05, + "loss": 0.3702, + "step": 2495000 + }, + { + "epoch": 1.5, + "learning_rate": 4.556513253182648e-05, + "loss": 0.3665, + "step": 2495500 + }, + { + "epoch": 1.5, + "learning_rate": 4.556303256626591e-05, + "loss": 0.3727, + "step": 2496000 + }, + { + "epoch": 1.5, + "learning_rate": 4.556093680063647e-05, + "loss": 0.3781, + "step": 2496500 + }, + { + "epoch": 1.5, + "learning_rate": 4.5558841035007025e-05, + "loss": 0.3808, + "step": 2497000 + }, + { + "epoch": 1.5, + "learning_rate": 4.555674106944646e-05, + "loss": 0.3794, + "step": 2497500 + }, + { + "epoch": 1.5, + "learning_rate": 4.55546411038859e-05, + "loss": 0.374, + "step": 2498000 + }, + { + "epoch": 1.5, + "learning_rate": 4.555254113832533e-05, + "loss": 0.374, + "step": 2498500 + }, + { + "epoch": 1.5, + "learning_rate": 4.5550441172764766e-05, + "loss": 0.3619, + "step": 2499000 + }, + { + "epoch": 1.5, + "learning_rate": 4.55483412072042e-05, + "loss": 0.3731, + "step": 2499500 + }, + { + "epoch": 1.5, + "learning_rate": 4.554624124164363e-05, + "loss": 0.3674, + "step": 2500000 + }, + { + "epoch": 1.5, + "eval_loss": 0.3600391149520874, + "eval_runtime": 1121.959, + "eval_samples_per_second": 469.465, + "eval_steps_per_second": 78.244, + "step": 2500000 + }, + { + "epoch": 1.5, + "learning_rate": 4.554414547601419e-05, + "loss": 0.3643, + "step": 2500500 + }, + { + "epoch": 1.5, + "learning_rate": 4.554204551045363e-05, + "loss": 0.3667, + "step": 2501000 + }, + { + "epoch": 1.5, + "learning_rate": 4.553994554489306e-05, + "loss": 0.3691, + "step": 2501500 + }, + { + "epoch": 1.5, + "learning_rate": 4.553784557933249e-05, + "loss": 0.3739, + "step": 2502000 + }, + { + "epoch": 1.5, + "learning_rate": 4.5535749813703054e-05, + "loss": 0.3795, + "step": 2502500 + }, + { + "epoch": 1.5, + "learning_rate": 4.5533649848142494e-05, + "loss": 0.3683, + "step": 2503000 + }, + { + "epoch": 1.5, + "learning_rate": 4.553154988258193e-05, + "loss": 0.3815, + "step": 2503500 + }, + { + "epoch": 1.5, + "learning_rate": 4.5529449917021354e-05, + "loss": 0.3759, + "step": 2504000 + }, + { + "epoch": 1.5, + "learning_rate": 4.5527349951460794e-05, + "loss": 0.3702, + "step": 2504500 + }, + { + "epoch": 1.5, + "learning_rate": 4.5525258385762475e-05, + "loss": 0.3709, + "step": 2505000 + }, + { + "epoch": 1.5, + "learning_rate": 4.552315842020191e-05, + "loss": 0.37, + "step": 2505500 + }, + { + "epoch": 1.5, + "learning_rate": 4.552105845464134e-05, + "loss": 0.3645, + "step": 2506000 + }, + { + "epoch": 1.5, + "learning_rate": 4.551895848908078e-05, + "loss": 0.3758, + "step": 2506500 + }, + { + "epoch": 1.5, + "learning_rate": 4.5516858523520215e-05, + "loss": 0.3763, + "step": 2507000 + }, + { + "epoch": 1.5, + "learning_rate": 4.551475855795965e-05, + "loss": 0.3764, + "step": 2507500 + }, + { + "epoch": 1.5, + "learning_rate": 4.55126627923302e-05, + "loss": 0.3706, + "step": 2508000 + }, + { + "epoch": 1.5, + "learning_rate": 4.551056282676964e-05, + "loss": 0.3665, + "step": 2508500 + }, + { + "epoch": 1.5, + "learning_rate": 4.5508462861209076e-05, + "loss": 0.3815, + "step": 2509000 + }, + { + "epoch": 1.5, + "learning_rate": 4.550636289564851e-05, + "loss": 0.3735, + "step": 2509500 + }, + { + "epoch": 1.5, + "learning_rate": 4.550426293008795e-05, + "loss": 0.3733, + "step": 2510000 + }, + { + "epoch": 1.51, + "learning_rate": 4.550216296452738e-05, + "loss": 0.3787, + "step": 2510500 + }, + { + "epoch": 1.51, + "learning_rate": 4.550006299896681e-05, + "loss": 0.3795, + "step": 2511000 + }, + { + "epoch": 1.51, + "learning_rate": 4.549796303340625e-05, + "loss": 0.366, + "step": 2511500 + }, + { + "epoch": 1.51, + "learning_rate": 4.549586726777681e-05, + "loss": 0.3767, + "step": 2512000 + }, + { + "epoch": 1.51, + "learning_rate": 4.5493767302216244e-05, + "loss": 0.3706, + "step": 2512500 + }, + { + "epoch": 1.51, + "learning_rate": 4.5491667336655684e-05, + "loss": 0.3821, + "step": 2513000 + }, + { + "epoch": 1.51, + "learning_rate": 4.548956737109511e-05, + "loss": 0.3747, + "step": 2513500 + }, + { + "epoch": 1.51, + "learning_rate": 4.5487467405534544e-05, + "loss": 0.3774, + "step": 2514000 + }, + { + "epoch": 1.51, + "learning_rate": 4.5485371639905105e-05, + "loss": 0.3765, + "step": 2514500 + }, + { + "epoch": 1.51, + "learning_rate": 4.5483271674344545e-05, + "loss": 0.3715, + "step": 2515000 + }, + { + "epoch": 1.51, + "learning_rate": 4.548117170878398e-05, + "loss": 0.3746, + "step": 2515500 + }, + { + "epoch": 1.51, + "learning_rate": 4.5479071743223405e-05, + "loss": 0.3773, + "step": 2516000 + }, + { + "epoch": 1.51, + "learning_rate": 4.5476971777662845e-05, + "loss": 0.3678, + "step": 2516500 + }, + { + "epoch": 1.51, + "learning_rate": 4.547487181210228e-05, + "loss": 0.3549, + "step": 2517000 + }, + { + "epoch": 1.51, + "learning_rate": 4.547277604647284e-05, + "loss": 0.3744, + "step": 2517500 + }, + { + "epoch": 1.51, + "learning_rate": 4.547067608091227e-05, + "loss": 0.3642, + "step": 2518000 + }, + { + "epoch": 1.51, + "learning_rate": 4.5468576115351706e-05, + "loss": 0.3758, + "step": 2518500 + }, + { + "epoch": 1.51, + "learning_rate": 4.546647614979114e-05, + "loss": 0.3757, + "step": 2519000 + }, + { + "epoch": 1.51, + "learning_rate": 4.546437618423058e-05, + "loss": 0.37, + "step": 2519500 + }, + { + "epoch": 1.51, + "learning_rate": 4.546227621867001e-05, + "loss": 0.3679, + "step": 2520000 + }, + { + "epoch": 1.51, + "learning_rate": 4.546017625310945e-05, + "loss": 0.3671, + "step": 2520500 + }, + { + "epoch": 1.51, + "learning_rate": 4.545807628754889e-05, + "loss": 0.3763, + "step": 2521000 + }, + { + "epoch": 1.51, + "learning_rate": 4.545598052191944e-05, + "loss": 0.363, + "step": 2521500 + }, + { + "epoch": 1.51, + "learning_rate": 4.5453880556358874e-05, + "loss": 0.3704, + "step": 2522000 + }, + { + "epoch": 1.51, + "learning_rate": 4.5451784790729434e-05, + "loss": 0.3776, + "step": 2522500 + }, + { + "epoch": 1.51, + "learning_rate": 4.544968482516886e-05, + "loss": 0.3699, + "step": 2523000 + }, + { + "epoch": 1.51, + "learning_rate": 4.54475848596083e-05, + "loss": 0.3811, + "step": 2523500 + }, + { + "epoch": 1.51, + "learning_rate": 4.5445484894047735e-05, + "loss": 0.368, + "step": 2524000 + }, + { + "epoch": 1.51, + "learning_rate": 4.544338492848717e-05, + "loss": 0.3778, + "step": 2524500 + }, + { + "epoch": 1.51, + "learning_rate": 4.544128916285773e-05, + "loss": 0.3833, + "step": 2525000 + }, + { + "epoch": 1.51, + "learning_rate": 4.543918919729716e-05, + "loss": 0.3784, + "step": 2525500 + }, + { + "epoch": 1.51, + "learning_rate": 4.5437089231736595e-05, + "loss": 0.3842, + "step": 2526000 + }, + { + "epoch": 1.51, + "learning_rate": 4.5434989266176036e-05, + "loss": 0.3759, + "step": 2526500 + }, + { + "epoch": 1.52, + "learning_rate": 4.543288930061547e-05, + "loss": 0.3698, + "step": 2527000 + }, + { + "epoch": 1.52, + "learning_rate": 4.54307893350549e-05, + "loss": 0.3766, + "step": 2527500 + }, + { + "epoch": 1.52, + "learning_rate": 4.542868936949434e-05, + "loss": 0.3812, + "step": 2528000 + }, + { + "epoch": 1.52, + "learning_rate": 4.5426589403933776e-05, + "loss": 0.3808, + "step": 2528500 + }, + { + "epoch": 1.52, + "learning_rate": 4.542449783823546e-05, + "loss": 0.3665, + "step": 2529000 + }, + { + "epoch": 1.52, + "learning_rate": 4.542239787267489e-05, + "loss": 0.3698, + "step": 2529500 + }, + { + "epoch": 1.52, + "learning_rate": 4.542029790711432e-05, + "loss": 0.3711, + "step": 2530000 + }, + { + "epoch": 1.52, + "learning_rate": 4.541819794155376e-05, + "loss": 0.3618, + "step": 2530500 + }, + { + "epoch": 1.52, + "learning_rate": 4.541609797599319e-05, + "loss": 0.3741, + "step": 2531000 + }, + { + "epoch": 1.52, + "learning_rate": 4.5413998010432624e-05, + "loss": 0.3826, + "step": 2531500 + }, + { + "epoch": 1.52, + "learning_rate": 4.5411898044872064e-05, + "loss": 0.3701, + "step": 2532000 + }, + { + "epoch": 1.52, + "learning_rate": 4.54097980793115e-05, + "loss": 0.3664, + "step": 2532500 + }, + { + "epoch": 1.52, + "learning_rate": 4.540770231368205e-05, + "loss": 0.382, + "step": 2533000 + }, + { + "epoch": 1.52, + "learning_rate": 4.540560654805261e-05, + "loss": 0.3785, + "step": 2533500 + }, + { + "epoch": 1.52, + "learning_rate": 4.540350658249205e-05, + "loss": 0.3724, + "step": 2534000 + }, + { + "epoch": 1.52, + "learning_rate": 4.5401406616931485e-05, + "loss": 0.3772, + "step": 2534500 + }, + { + "epoch": 1.52, + "learning_rate": 4.539930665137091e-05, + "loss": 0.377, + "step": 2535000 + }, + { + "epoch": 1.52, + "learning_rate": 4.539720668581035e-05, + "loss": 0.3769, + "step": 2535500 + }, + { + "epoch": 1.52, + "learning_rate": 4.5395106720249786e-05, + "loss": 0.3763, + "step": 2536000 + }, + { + "epoch": 1.52, + "learning_rate": 4.539300675468922e-05, + "loss": 0.3794, + "step": 2536500 + }, + { + "epoch": 1.52, + "learning_rate": 4.539090678912866e-05, + "loss": 0.3704, + "step": 2537000 + }, + { + "epoch": 1.52, + "learning_rate": 4.538881102349921e-05, + "loss": 0.3662, + "step": 2537500 + }, + { + "epoch": 1.52, + "learning_rate": 4.5386711057938646e-05, + "loss": 0.3793, + "step": 2538000 + }, + { + "epoch": 1.52, + "learning_rate": 4.538461109237808e-05, + "loss": 0.372, + "step": 2538500 + }, + { + "epoch": 1.52, + "learning_rate": 4.538251112681752e-05, + "loss": 0.3726, + "step": 2539000 + }, + { + "epoch": 1.52, + "learning_rate": 4.538041536118808e-05, + "loss": 0.3704, + "step": 2539500 + }, + { + "epoch": 1.52, + "learning_rate": 4.537831539562751e-05, + "loss": 0.3718, + "step": 2540000 + }, + { + "epoch": 1.52, + "learning_rate": 4.537621543006695e-05, + "loss": 0.3861, + "step": 2540500 + }, + { + "epoch": 1.52, + "learning_rate": 4.537411546450638e-05, + "loss": 0.3703, + "step": 2541000 + }, + { + "epoch": 1.52, + "learning_rate": 4.537201969887694e-05, + "loss": 0.3755, + "step": 2541500 + }, + { + "epoch": 1.52, + "learning_rate": 4.536991973331637e-05, + "loss": 0.371, + "step": 2542000 + }, + { + "epoch": 1.52, + "learning_rate": 4.536781976775581e-05, + "loss": 0.3727, + "step": 2542500 + }, + { + "epoch": 1.52, + "learning_rate": 4.536571980219524e-05, + "loss": 0.3805, + "step": 2543000 + }, + { + "epoch": 1.52, + "learning_rate": 4.5363619836634675e-05, + "loss": 0.373, + "step": 2543500 + }, + { + "epoch": 1.53, + "learning_rate": 4.5361519871074115e-05, + "loss": 0.3788, + "step": 2544000 + }, + { + "epoch": 1.53, + "learning_rate": 4.535941990551355e-05, + "loss": 0.3735, + "step": 2544500 + }, + { + "epoch": 1.53, + "learning_rate": 4.53573241398841e-05, + "loss": 0.3726, + "step": 2545000 + }, + { + "epoch": 1.53, + "learning_rate": 4.5355224174323536e-05, + "loss": 0.3653, + "step": 2545500 + }, + { + "epoch": 1.53, + "learning_rate": 4.5353124208762976e-05, + "loss": 0.3771, + "step": 2546000 + }, + { + "epoch": 1.53, + "learning_rate": 4.535102424320241e-05, + "loss": 0.3706, + "step": 2546500 + }, + { + "epoch": 1.53, + "learning_rate": 4.534892427764184e-05, + "loss": 0.3754, + "step": 2547000 + }, + { + "epoch": 1.53, + "learning_rate": 4.534682431208128e-05, + "loss": 0.3706, + "step": 2547500 + }, + { + "epoch": 1.53, + "learning_rate": 4.534472434652072e-05, + "loss": 0.372, + "step": 2548000 + }, + { + "epoch": 1.53, + "learning_rate": 4.534262438096015e-05, + "loss": 0.3669, + "step": 2548500 + }, + { + "epoch": 1.53, + "learning_rate": 4.534052861533071e-05, + "loss": 0.3773, + "step": 2549000 + }, + { + "epoch": 1.53, + "learning_rate": 4.5338428649770144e-05, + "loss": 0.3874, + "step": 2549500 + }, + { + "epoch": 1.53, + "learning_rate": 4.533632868420958e-05, + "loss": 0.3683, + "step": 2550000 + }, + { + "epoch": 1.53, + "learning_rate": 4.533422871864902e-05, + "loss": 0.3696, + "step": 2550500 + }, + { + "epoch": 1.53, + "learning_rate": 4.533213295301957e-05, + "loss": 0.3731, + "step": 2551000 + }, + { + "epoch": 1.53, + "learning_rate": 4.5330032987459005e-05, + "loss": 0.3778, + "step": 2551500 + }, + { + "epoch": 1.53, + "learning_rate": 4.532793302189844e-05, + "loss": 0.3779, + "step": 2552000 + }, + { + "epoch": 1.53, + "learning_rate": 4.532583305633788e-05, + "loss": 0.3786, + "step": 2552500 + }, + { + "epoch": 1.53, + "learning_rate": 4.532373309077731e-05, + "loss": 0.3779, + "step": 2553000 + }, + { + "epoch": 1.53, + "learning_rate": 4.5321633125216745e-05, + "loss": 0.3785, + "step": 2553500 + }, + { + "epoch": 1.53, + "learning_rate": 4.5319533159656185e-05, + "loss": 0.3659, + "step": 2554000 + }, + { + "epoch": 1.53, + "learning_rate": 4.531743319409562e-05, + "loss": 0.3693, + "step": 2554500 + }, + { + "epoch": 1.53, + "learning_rate": 4.531533742846617e-05, + "loss": 0.376, + "step": 2555000 + }, + { + "epoch": 1.53, + "learning_rate": 4.5313237462905606e-05, + "loss": 0.3785, + "step": 2555500 + }, + { + "epoch": 1.53, + "learning_rate": 4.5311137497345046e-05, + "loss": 0.378, + "step": 2556000 + }, + { + "epoch": 1.53, + "learning_rate": 4.53090417317156e-05, + "loss": 0.3657, + "step": 2556500 + }, + { + "epoch": 1.53, + "learning_rate": 4.530694176615503e-05, + "loss": 0.3711, + "step": 2557000 + }, + { + "epoch": 1.53, + "learning_rate": 4.5304841800594473e-05, + "loss": 0.3709, + "step": 2557500 + }, + { + "epoch": 1.53, + "learning_rate": 4.530274183503391e-05, + "loss": 0.367, + "step": 2558000 + }, + { + "epoch": 1.53, + "learning_rate": 4.530064606940446e-05, + "loss": 0.3737, + "step": 2558500 + }, + { + "epoch": 1.53, + "learning_rate": 4.5298546103843894e-05, + "loss": 0.3693, + "step": 2559000 + }, + { + "epoch": 1.53, + "learning_rate": 4.5296446138283334e-05, + "loss": 0.373, + "step": 2559500 + }, + { + "epoch": 1.53, + "learning_rate": 4.529434617272277e-05, + "loss": 0.3679, + "step": 2560000 + }, + { + "epoch": 1.54, + "learning_rate": 4.52922462071622e-05, + "loss": 0.3762, + "step": 2560500 + }, + { + "epoch": 1.54, + "learning_rate": 4.529014624160164e-05, + "loss": 0.3646, + "step": 2561000 + }, + { + "epoch": 1.54, + "learning_rate": 4.5288046276041075e-05, + "loss": 0.3617, + "step": 2561500 + }, + { + "epoch": 1.54, + "learning_rate": 4.52859463104805e-05, + "loss": 0.379, + "step": 2562000 + }, + { + "epoch": 1.54, + "learning_rate": 4.528385054485106e-05, + "loss": 0.3637, + "step": 2562500 + }, + { + "epoch": 1.54, + "learning_rate": 4.52817505792905e-05, + "loss": 0.3762, + "step": 2563000 + }, + { + "epoch": 1.54, + "learning_rate": 4.5279650613729936e-05, + "loss": 0.3734, + "step": 2563500 + }, + { + "epoch": 1.54, + "learning_rate": 4.527755064816937e-05, + "loss": 0.3762, + "step": 2564000 + }, + { + "epoch": 1.54, + "learning_rate": 4.52754506826088e-05, + "loss": 0.3785, + "step": 2564500 + }, + { + "epoch": 1.54, + "learning_rate": 4.527335491697936e-05, + "loss": 0.3754, + "step": 2565000 + }, + { + "epoch": 1.54, + "learning_rate": 4.5271254951418796e-05, + "loss": 0.3698, + "step": 2565500 + }, + { + "epoch": 1.54, + "learning_rate": 4.5269154985858236e-05, + "loss": 0.3735, + "step": 2566000 + }, + { + "epoch": 1.54, + "learning_rate": 4.526705502029766e-05, + "loss": 0.3741, + "step": 2566500 + }, + { + "epoch": 1.54, + "learning_rate": 4.52649550547371e-05, + "loss": 0.3681, + "step": 2567000 + }, + { + "epoch": 1.54, + "learning_rate": 4.526285508917654e-05, + "loss": 0.366, + "step": 2567500 + }, + { + "epoch": 1.54, + "learning_rate": 4.526075512361597e-05, + "loss": 0.3773, + "step": 2568000 + }, + { + "epoch": 1.54, + "learning_rate": 4.525865935798653e-05, + "loss": 0.3814, + "step": 2568500 + }, + { + "epoch": 1.54, + "learning_rate": 4.525655939242596e-05, + "loss": 0.3722, + "step": 2569000 + }, + { + "epoch": 1.54, + "learning_rate": 4.525446362679652e-05, + "loss": 0.3724, + "step": 2569500 + }, + { + "epoch": 1.54, + "learning_rate": 4.525236366123596e-05, + "loss": 0.3718, + "step": 2570000 + }, + { + "epoch": 1.54, + "learning_rate": 4.525026369567539e-05, + "loss": 0.3691, + "step": 2570500 + }, + { + "epoch": 1.54, + "learning_rate": 4.524816373011483e-05, + "loss": 0.3738, + "step": 2571000 + }, + { + "epoch": 1.54, + "learning_rate": 4.524606376455426e-05, + "loss": 0.3722, + "step": 2571500 + }, + { + "epoch": 1.54, + "learning_rate": 4.524396379899369e-05, + "loss": 0.372, + "step": 2572000 + }, + { + "epoch": 1.54, + "learning_rate": 4.524186383343313e-05, + "loss": 0.3842, + "step": 2572500 + }, + { + "epoch": 1.54, + "learning_rate": 4.5239763867872565e-05, + "loss": 0.3781, + "step": 2573000 + }, + { + "epoch": 1.54, + "learning_rate": 4.5237663902312e-05, + "loss": 0.383, + "step": 2573500 + }, + { + "epoch": 1.54, + "learning_rate": 4.523556393675144e-05, + "loss": 0.3726, + "step": 2574000 + }, + { + "epoch": 1.54, + "learning_rate": 4.523346397119087e-05, + "loss": 0.3618, + "step": 2574500 + }, + { + "epoch": 1.54, + "learning_rate": 4.5231364005630306e-05, + "loss": 0.3746, + "step": 2575000 + }, + { + "epoch": 1.54, + "learning_rate": 4.522926824000086e-05, + "loss": 0.3742, + "step": 2575500 + }, + { + "epoch": 1.54, + "learning_rate": 4.52271682744403e-05, + "loss": 0.3783, + "step": 2576000 + }, + { + "epoch": 1.54, + "learning_rate": 4.522506830887973e-05, + "loss": 0.3655, + "step": 2576500 + }, + { + "epoch": 1.55, + "learning_rate": 4.522296834331917e-05, + "loss": 0.3725, + "step": 2577000 + }, + { + "epoch": 1.55, + "learning_rate": 4.522087257768972e-05, + "loss": 0.3825, + "step": 2577500 + }, + { + "epoch": 1.55, + "learning_rate": 4.521877261212916e-05, + "loss": 0.3673, + "step": 2578000 + }, + { + "epoch": 1.55, + "learning_rate": 4.5216672646568594e-05, + "loss": 0.3633, + "step": 2578500 + }, + { + "epoch": 1.55, + "learning_rate": 4.5214572681008034e-05, + "loss": 0.3647, + "step": 2579000 + }, + { + "epoch": 1.55, + "learning_rate": 4.521247691537859e-05, + "loss": 0.3736, + "step": 2579500 + }, + { + "epoch": 1.55, + "learning_rate": 4.521037694981802e-05, + "loss": 0.3809, + "step": 2580000 + }, + { + "epoch": 1.55, + "learning_rate": 4.5208276984257455e-05, + "loss": 0.3733, + "step": 2580500 + }, + { + "epoch": 1.55, + "learning_rate": 4.5206177018696895e-05, + "loss": 0.3759, + "step": 2581000 + }, + { + "epoch": 1.55, + "learning_rate": 4.520407705313633e-05, + "loss": 0.3785, + "step": 2581500 + }, + { + "epoch": 1.55, + "learning_rate": 4.520198128750688e-05, + "loss": 0.3736, + "step": 2582000 + }, + { + "epoch": 1.55, + "learning_rate": 4.5199881321946316e-05, + "loss": 0.3662, + "step": 2582500 + }, + { + "epoch": 1.55, + "learning_rate": 4.5197781356385756e-05, + "loss": 0.3661, + "step": 2583000 + }, + { + "epoch": 1.55, + "learning_rate": 4.519568139082519e-05, + "loss": 0.3713, + "step": 2583500 + }, + { + "epoch": 1.55, + "learning_rate": 4.519358142526462e-05, + "loss": 0.3779, + "step": 2584000 + }, + { + "epoch": 1.55, + "learning_rate": 4.519148145970406e-05, + "loss": 0.3782, + "step": 2584500 + }, + { + "epoch": 1.55, + "learning_rate": 4.5189385694074617e-05, + "loss": 0.3655, + "step": 2585000 + }, + { + "epoch": 1.55, + "learning_rate": 4.518728572851405e-05, + "loss": 0.3711, + "step": 2585500 + }, + { + "epoch": 1.55, + "learning_rate": 4.518518576295349e-05, + "loss": 0.3708, + "step": 2586000 + }, + { + "epoch": 1.55, + "learning_rate": 4.5183085797392924e-05, + "loss": 0.3737, + "step": 2586500 + }, + { + "epoch": 1.55, + "learning_rate": 4.518098583183236e-05, + "loss": 0.3679, + "step": 2587000 + }, + { + "epoch": 1.55, + "learning_rate": 4.51788858662718e-05, + "loss": 0.3671, + "step": 2587500 + }, + { + "epoch": 1.55, + "learning_rate": 4.517678590071123e-05, + "loss": 0.3685, + "step": 2588000 + }, + { + "epoch": 1.55, + "learning_rate": 4.5174685935150664e-05, + "loss": 0.3798, + "step": 2588500 + }, + { + "epoch": 1.55, + "learning_rate": 4.517259016952122e-05, + "loss": 0.3678, + "step": 2589000 + }, + { + "epoch": 1.55, + "learning_rate": 4.517049020396066e-05, + "loss": 0.3667, + "step": 2589500 + }, + { + "epoch": 1.55, + "learning_rate": 4.516839023840009e-05, + "loss": 0.3631, + "step": 2590000 + }, + { + "epoch": 1.55, + "learning_rate": 4.5166290272839525e-05, + "loss": 0.3564, + "step": 2590500 + }, + { + "epoch": 1.55, + "learning_rate": 4.516419450721008e-05, + "loss": 0.3664, + "step": 2591000 + }, + { + "epoch": 1.55, + "learning_rate": 4.516209454164952e-05, + "loss": 0.373, + "step": 2591500 + }, + { + "epoch": 1.55, + "learning_rate": 4.515999457608895e-05, + "loss": 0.3756, + "step": 2592000 + }, + { + "epoch": 1.55, + "learning_rate": 4.5157894610528386e-05, + "loss": 0.3842, + "step": 2592500 + }, + { + "epoch": 1.55, + "learning_rate": 4.5155798844898946e-05, + "loss": 0.375, + "step": 2593000 + }, + { + "epoch": 1.55, + "learning_rate": 4.51537030792695e-05, + "loss": 0.373, + "step": 2593500 + }, + { + "epoch": 1.56, + "learning_rate": 4.515160311370893e-05, + "loss": 0.3679, + "step": 2594000 + }, + { + "epoch": 1.56, + "learning_rate": 4.5149503148148367e-05, + "loss": 0.3787, + "step": 2594500 + }, + { + "epoch": 1.56, + "learning_rate": 4.514740318258781e-05, + "loss": 0.3755, + "step": 2595000 + }, + { + "epoch": 1.56, + "learning_rate": 4.514530321702724e-05, + "loss": 0.3633, + "step": 2595500 + }, + { + "epoch": 1.56, + "learning_rate": 4.5143203251466674e-05, + "loss": 0.3653, + "step": 2596000 + }, + { + "epoch": 1.56, + "learning_rate": 4.5141103285906114e-05, + "loss": 0.3724, + "step": 2596500 + }, + { + "epoch": 1.56, + "learning_rate": 4.513900332034555e-05, + "loss": 0.3764, + "step": 2597000 + }, + { + "epoch": 1.56, + "learning_rate": 4.513691175464722e-05, + "loss": 0.377, + "step": 2597500 + }, + { + "epoch": 1.56, + "learning_rate": 4.5134811789086655e-05, + "loss": 0.3775, + "step": 2598000 + }, + { + "epoch": 1.56, + "learning_rate": 4.5132711823526095e-05, + "loss": 0.3699, + "step": 2598500 + }, + { + "epoch": 1.56, + "learning_rate": 4.513061185796553e-05, + "loss": 0.3646, + "step": 2599000 + }, + { + "epoch": 1.56, + "learning_rate": 4.512851189240496e-05, + "loss": 0.3714, + "step": 2599500 + }, + { + "epoch": 1.56, + "learning_rate": 4.5126416126775515e-05, + "loss": 0.3848, + "step": 2600000 + }, + { + "epoch": 1.56, + "eval_loss": 0.35718128085136414, + "eval_runtime": 1116.8014, + "eval_samples_per_second": 471.633, + "eval_steps_per_second": 78.606, + "step": 2600000 + }, + { + "epoch": 1.56, + "learning_rate": 4.5124316161214956e-05, + "loss": 0.3717, + "step": 2600500 + }, + { + "epoch": 1.56, + "learning_rate": 4.512221619565439e-05, + "loss": 0.369, + "step": 2601000 + }, + { + "epoch": 1.56, + "learning_rate": 4.512011623009382e-05, + "loss": 0.3685, + "step": 2601500 + }, + { + "epoch": 1.56, + "learning_rate": 4.511802046446438e-05, + "loss": 0.3805, + "step": 2602000 + }, + { + "epoch": 1.56, + "learning_rate": 4.5115920498903816e-05, + "loss": 0.3729, + "step": 2602500 + }, + { + "epoch": 1.56, + "learning_rate": 4.511382053334325e-05, + "loss": 0.3644, + "step": 2603000 + }, + { + "epoch": 1.56, + "learning_rate": 4.511172056778268e-05, + "loss": 0.3706, + "step": 2603500 + }, + { + "epoch": 1.56, + "learning_rate": 4.5109620602222123e-05, + "loss": 0.3752, + "step": 2604000 + }, + { + "epoch": 1.56, + "learning_rate": 4.510752063666156e-05, + "loss": 0.3626, + "step": 2604500 + }, + { + "epoch": 1.56, + "learning_rate": 4.510542067110099e-05, + "loss": 0.3771, + "step": 2605000 + }, + { + "epoch": 1.56, + "learning_rate": 4.510332070554043e-05, + "loss": 0.3787, + "step": 2605500 + }, + { + "epoch": 1.56, + "learning_rate": 4.5101224939910984e-05, + "loss": 0.3776, + "step": 2606000 + }, + { + "epoch": 1.56, + "learning_rate": 4.509912497435042e-05, + "loss": 0.3756, + "step": 2606500 + }, + { + "epoch": 1.56, + "learning_rate": 4.509702500878986e-05, + "loss": 0.3746, + "step": 2607000 + }, + { + "epoch": 1.56, + "learning_rate": 4.509492924316041e-05, + "loss": 0.3719, + "step": 2607500 + }, + { + "epoch": 1.56, + "learning_rate": 4.509283347753097e-05, + "loss": 0.3645, + "step": 2608000 + }, + { + "epoch": 1.56, + "learning_rate": 4.5090733511970405e-05, + "loss": 0.3676, + "step": 2608500 + }, + { + "epoch": 1.56, + "learning_rate": 4.508863354640984e-05, + "loss": 0.3677, + "step": 2609000 + }, + { + "epoch": 1.56, + "learning_rate": 4.508653358084927e-05, + "loss": 0.3742, + "step": 2609500 + }, + { + "epoch": 1.56, + "learning_rate": 4.5084433615288706e-05, + "loss": 0.3797, + "step": 2610000 + }, + { + "epoch": 1.57, + "learning_rate": 4.508233364972814e-05, + "loss": 0.3596, + "step": 2610500 + }, + { + "epoch": 1.57, + "learning_rate": 4.508023368416758e-05, + "loss": 0.368, + "step": 2611000 + }, + { + "epoch": 1.57, + "learning_rate": 4.507813371860701e-05, + "loss": 0.3715, + "step": 2611500 + }, + { + "epoch": 1.57, + "learning_rate": 4.5076037952977566e-05, + "loss": 0.3785, + "step": 2612000 + }, + { + "epoch": 1.57, + "learning_rate": 4.5073937987417007e-05, + "loss": 0.3624, + "step": 2612500 + }, + { + "epoch": 1.57, + "learning_rate": 4.507183802185644e-05, + "loss": 0.3774, + "step": 2613000 + }, + { + "epoch": 1.57, + "learning_rate": 4.5069738056295873e-05, + "loss": 0.3774, + "step": 2613500 + }, + { + "epoch": 1.57, + "learning_rate": 4.5067638090735314e-05, + "loss": 0.3681, + "step": 2614000 + }, + { + "epoch": 1.57, + "learning_rate": 4.506553812517475e-05, + "loss": 0.3744, + "step": 2614500 + }, + { + "epoch": 1.57, + "learning_rate": 4.506343815961418e-05, + "loss": 0.3684, + "step": 2615000 + }, + { + "epoch": 1.57, + "learning_rate": 4.506133819405362e-05, + "loss": 0.3779, + "step": 2615500 + }, + { + "epoch": 1.57, + "learning_rate": 4.5059242428424174e-05, + "loss": 0.3851, + "step": 2616000 + }, + { + "epoch": 1.57, + "learning_rate": 4.505714246286361e-05, + "loss": 0.3659, + "step": 2616500 + }, + { + "epoch": 1.57, + "learning_rate": 4.505504249730304e-05, + "loss": 0.3737, + "step": 2617000 + }, + { + "epoch": 1.57, + "learning_rate": 4.5052946731673595e-05, + "loss": 0.3691, + "step": 2617500 + }, + { + "epoch": 1.57, + "learning_rate": 4.5050846766113035e-05, + "loss": 0.3698, + "step": 2618000 + }, + { + "epoch": 1.57, + "learning_rate": 4.504874680055247e-05, + "loss": 0.3657, + "step": 2618500 + }, + { + "epoch": 1.57, + "learning_rate": 4.50466468349919e-05, + "loss": 0.3759, + "step": 2619000 + }, + { + "epoch": 1.57, + "learning_rate": 4.504454686943134e-05, + "loss": 0.3757, + "step": 2619500 + }, + { + "epoch": 1.57, + "learning_rate": 4.5042446903870776e-05, + "loss": 0.3745, + "step": 2620000 + }, + { + "epoch": 1.57, + "learning_rate": 4.504035113824133e-05, + "loss": 0.3862, + "step": 2620500 + }, + { + "epoch": 1.57, + "learning_rate": 4.503825117268077e-05, + "loss": 0.3798, + "step": 2621000 + }, + { + "epoch": 1.57, + "learning_rate": 4.50361512071202e-05, + "loss": 0.3732, + "step": 2621500 + }, + { + "epoch": 1.57, + "learning_rate": 4.5034051241559637e-05, + "loss": 0.3854, + "step": 2622000 + }, + { + "epoch": 1.57, + "learning_rate": 4.503195127599908e-05, + "loss": 0.3747, + "step": 2622500 + }, + { + "epoch": 1.57, + "learning_rate": 4.502985131043851e-05, + "loss": 0.3661, + "step": 2623000 + }, + { + "epoch": 1.57, + "learning_rate": 4.5027755544809064e-05, + "loss": 0.381, + "step": 2623500 + }, + { + "epoch": 1.57, + "learning_rate": 4.50256555792485e-05, + "loss": 0.3767, + "step": 2624000 + }, + { + "epoch": 1.57, + "learning_rate": 4.502355561368794e-05, + "loss": 0.3772, + "step": 2624500 + }, + { + "epoch": 1.57, + "learning_rate": 4.502145564812737e-05, + "loss": 0.3886, + "step": 2625000 + }, + { + "epoch": 1.57, + "learning_rate": 4.5019355682566804e-05, + "loss": 0.3701, + "step": 2625500 + }, + { + "epoch": 1.57, + "learning_rate": 4.5017255717006245e-05, + "loss": 0.3709, + "step": 2626000 + }, + { + "epoch": 1.57, + "learning_rate": 4.501515575144568e-05, + "loss": 0.3678, + "step": 2626500 + }, + { + "epoch": 1.57, + "learning_rate": 4.5013055785885105e-05, + "loss": 0.3787, + "step": 2627000 + }, + { + "epoch": 1.58, + "learning_rate": 4.5010960020255665e-05, + "loss": 0.3733, + "step": 2627500 + }, + { + "epoch": 1.58, + "learning_rate": 4.5008864254626225e-05, + "loss": 0.3738, + "step": 2628000 + }, + { + "epoch": 1.58, + "learning_rate": 4.500676428906566e-05, + "loss": 0.3736, + "step": 2628500 + }, + { + "epoch": 1.58, + "learning_rate": 4.500466432350509e-05, + "loss": 0.3765, + "step": 2629000 + }, + { + "epoch": 1.58, + "learning_rate": 4.500256435794453e-05, + "loss": 0.3822, + "step": 2629500 + }, + { + "epoch": 1.58, + "learning_rate": 4.5000464392383966e-05, + "loss": 0.3657, + "step": 2630000 + }, + { + "epoch": 1.58, + "learning_rate": 4.49983644268234e-05, + "loss": 0.3721, + "step": 2630500 + }, + { + "epoch": 1.58, + "learning_rate": 4.499626446126284e-05, + "loss": 0.3862, + "step": 2631000 + }, + { + "epoch": 1.58, + "learning_rate": 4.499416449570227e-05, + "loss": 0.374, + "step": 2631500 + }, + { + "epoch": 1.58, + "learning_rate": 4.49920645301417e-05, + "loss": 0.3676, + "step": 2632000 + }, + { + "epoch": 1.58, + "learning_rate": 4.498996876451226e-05, + "loss": 0.3767, + "step": 2632500 + }, + { + "epoch": 1.58, + "learning_rate": 4.49878687989517e-05, + "loss": 0.3711, + "step": 2633000 + }, + { + "epoch": 1.58, + "learning_rate": 4.4985768833391134e-05, + "loss": 0.3697, + "step": 2633500 + }, + { + "epoch": 1.58, + "learning_rate": 4.498366886783056e-05, + "loss": 0.3681, + "step": 2634000 + }, + { + "epoch": 1.58, + "learning_rate": 4.498157310220112e-05, + "loss": 0.3652, + "step": 2634500 + }, + { + "epoch": 1.58, + "learning_rate": 4.497947313664056e-05, + "loss": 0.3725, + "step": 2635000 + }, + { + "epoch": 1.58, + "learning_rate": 4.4977373171079995e-05, + "loss": 0.3635, + "step": 2635500 + }, + { + "epoch": 1.58, + "learning_rate": 4.497527320551943e-05, + "loss": 0.3584, + "step": 2636000 + }, + { + "epoch": 1.58, + "learning_rate": 4.497317743988999e-05, + "loss": 0.3741, + "step": 2636500 + }, + { + "epoch": 1.58, + "learning_rate": 4.497107747432942e-05, + "loss": 0.3718, + "step": 2637000 + }, + { + "epoch": 1.58, + "learning_rate": 4.4968981708699976e-05, + "loss": 0.3731, + "step": 2637500 + }, + { + "epoch": 1.58, + "learning_rate": 4.496688174313941e-05, + "loss": 0.3666, + "step": 2638000 + }, + { + "epoch": 1.58, + "learning_rate": 4.496478177757885e-05, + "loss": 0.3734, + "step": 2638500 + }, + { + "epoch": 1.58, + "learning_rate": 4.496268181201828e-05, + "loss": 0.3663, + "step": 2639000 + }, + { + "epoch": 1.58, + "learning_rate": 4.4960581846457716e-05, + "loss": 0.379, + "step": 2639500 + }, + { + "epoch": 1.58, + "learning_rate": 4.4958481880897156e-05, + "loss": 0.3676, + "step": 2640000 + }, + { + "epoch": 1.58, + "learning_rate": 4.495638611526771e-05, + "loss": 0.3651, + "step": 2640500 + }, + { + "epoch": 1.58, + "learning_rate": 4.4954286149707143e-05, + "loss": 0.378, + "step": 2641000 + }, + { + "epoch": 1.58, + "learning_rate": 4.495218618414658e-05, + "loss": 0.3784, + "step": 2641500 + }, + { + "epoch": 1.58, + "learning_rate": 4.495008621858602e-05, + "loss": 0.3616, + "step": 2642000 + }, + { + "epoch": 1.58, + "learning_rate": 4.494798625302545e-05, + "loss": 0.3761, + "step": 2642500 + }, + { + "epoch": 1.58, + "learning_rate": 4.494588628746489e-05, + "loss": 0.3734, + "step": 2643000 + }, + { + "epoch": 1.58, + "learning_rate": 4.4943786321904324e-05, + "loss": 0.3794, + "step": 2643500 + }, + { + "epoch": 1.59, + "learning_rate": 4.494168635634375e-05, + "loss": 0.3691, + "step": 2644000 + }, + { + "epoch": 1.59, + "learning_rate": 4.493959059071431e-05, + "loss": 0.365, + "step": 2644500 + }, + { + "epoch": 1.59, + "learning_rate": 4.493749062515375e-05, + "loss": 0.3648, + "step": 2645000 + }, + { + "epoch": 1.59, + "learning_rate": 4.4935390659593185e-05, + "loss": 0.3688, + "step": 2645500 + }, + { + "epoch": 1.59, + "learning_rate": 4.493329069403261e-05, + "loss": 0.3668, + "step": 2646000 + }, + { + "epoch": 1.59, + "learning_rate": 4.493119072847205e-05, + "loss": 0.3834, + "step": 2646500 + }, + { + "epoch": 1.59, + "learning_rate": 4.4929090762911485e-05, + "loss": 0.3795, + "step": 2647000 + }, + { + "epoch": 1.59, + "learning_rate": 4.492699079735092e-05, + "loss": 0.3704, + "step": 2647500 + }, + { + "epoch": 1.59, + "learning_rate": 4.492489503172148e-05, + "loss": 0.3769, + "step": 2648000 + }, + { + "epoch": 1.59, + "learning_rate": 4.492279506616091e-05, + "loss": 0.3696, + "step": 2648500 + }, + { + "epoch": 1.59, + "learning_rate": 4.4920695100600346e-05, + "loss": 0.3739, + "step": 2649000 + }, + { + "epoch": 1.59, + "learning_rate": 4.4918595135039786e-05, + "loss": 0.3643, + "step": 2649500 + }, + { + "epoch": 1.59, + "learning_rate": 4.491649936941035e-05, + "loss": 0.3742, + "step": 2650000 + }, + { + "epoch": 1.59, + "learning_rate": 4.491439940384978e-05, + "loss": 0.363, + "step": 2650500 + }, + { + "epoch": 1.59, + "learning_rate": 4.491229943828921e-05, + "loss": 0.3689, + "step": 2651000 + }, + { + "epoch": 1.59, + "learning_rate": 4.491019947272865e-05, + "loss": 0.37, + "step": 2651500 + }, + { + "epoch": 1.59, + "learning_rate": 4.490809950716808e-05, + "loss": 0.3678, + "step": 2652000 + }, + { + "epoch": 1.59, + "learning_rate": 4.490600374153864e-05, + "loss": 0.3769, + "step": 2652500 + }, + { + "epoch": 1.59, + "learning_rate": 4.4903903775978074e-05, + "loss": 0.3735, + "step": 2653000 + }, + { + "epoch": 1.59, + "learning_rate": 4.490180381041751e-05, + "loss": 0.378, + "step": 2653500 + }, + { + "epoch": 1.59, + "learning_rate": 4.489970384485694e-05, + "loss": 0.3739, + "step": 2654000 + }, + { + "epoch": 1.59, + "learning_rate": 4.4897603879296375e-05, + "loss": 0.3722, + "step": 2654500 + }, + { + "epoch": 1.59, + "learning_rate": 4.4895503913735815e-05, + "loss": 0.3767, + "step": 2655000 + }, + { + "epoch": 1.59, + "learning_rate": 4.489340394817525e-05, + "loss": 0.3751, + "step": 2655500 + }, + { + "epoch": 1.59, + "learning_rate": 4.489130398261468e-05, + "loss": 0.3754, + "step": 2656000 + }, + { + "epoch": 1.59, + "learning_rate": 4.488920821698524e-05, + "loss": 0.3741, + "step": 2656500 + }, + { + "epoch": 1.59, + "learning_rate": 4.4887108251424676e-05, + "loss": 0.3696, + "step": 2657000 + }, + { + "epoch": 1.59, + "learning_rate": 4.4885012485795236e-05, + "loss": 0.366, + "step": 2657500 + }, + { + "epoch": 1.59, + "learning_rate": 4.488291252023466e-05, + "loss": 0.3694, + "step": 2658000 + }, + { + "epoch": 1.59, + "learning_rate": 4.48808125546741e-05, + "loss": 0.3714, + "step": 2658500 + }, + { + "epoch": 1.59, + "learning_rate": 4.4878712589113536e-05, + "loss": 0.3701, + "step": 2659000 + }, + { + "epoch": 1.59, + "learning_rate": 4.487661262355297e-05, + "loss": 0.3675, + "step": 2659500 + }, + { + "epoch": 1.59, + "learning_rate": 4.487451265799241e-05, + "loss": 0.37, + "step": 2660000 + }, + { + "epoch": 1.6, + "learning_rate": 4.4872416892362964e-05, + "loss": 0.3822, + "step": 2660500 + }, + { + "epoch": 1.6, + "learning_rate": 4.48703169268024e-05, + "loss": 0.3715, + "step": 2661000 + }, + { + "epoch": 1.6, + "learning_rate": 4.486821696124183e-05, + "loss": 0.3689, + "step": 2661500 + }, + { + "epoch": 1.6, + "learning_rate": 4.486611699568127e-05, + "loss": 0.3706, + "step": 2662000 + }, + { + "epoch": 1.6, + "learning_rate": 4.4864017030120704e-05, + "loss": 0.3836, + "step": 2662500 + }, + { + "epoch": 1.6, + "learning_rate": 4.486192126449126e-05, + "loss": 0.3769, + "step": 2663000 + }, + { + "epoch": 1.6, + "learning_rate": 4.485982549886182e-05, + "loss": 0.3646, + "step": 2663500 + }, + { + "epoch": 1.6, + "learning_rate": 4.485772553330126e-05, + "loss": 0.3772, + "step": 2664000 + }, + { + "epoch": 1.6, + "learning_rate": 4.485562556774069e-05, + "loss": 0.3682, + "step": 2664500 + }, + { + "epoch": 1.6, + "learning_rate": 4.485352560218012e-05, + "loss": 0.3737, + "step": 2665000 + }, + { + "epoch": 1.6, + "learning_rate": 4.485142563661956e-05, + "loss": 0.3622, + "step": 2665500 + }, + { + "epoch": 1.6, + "learning_rate": 4.484932567105899e-05, + "loss": 0.3702, + "step": 2666000 + }, + { + "epoch": 1.6, + "learning_rate": 4.4847225705498426e-05, + "loss": 0.3708, + "step": 2666500 + }, + { + "epoch": 1.6, + "learning_rate": 4.4845125739937866e-05, + "loss": 0.37, + "step": 2667000 + }, + { + "epoch": 1.6, + "learning_rate": 4.48430257743773e-05, + "loss": 0.3758, + "step": 2667500 + }, + { + "epoch": 1.6, + "learning_rate": 4.484092580881673e-05, + "loss": 0.3725, + "step": 2668000 + }, + { + "epoch": 1.6, + "learning_rate": 4.483882584325617e-05, + "loss": 0.3633, + "step": 2668500 + }, + { + "epoch": 1.6, + "learning_rate": 4.4836725877695607e-05, + "loss": 0.3697, + "step": 2669000 + }, + { + "epoch": 1.6, + "learning_rate": 4.483463011206616e-05, + "loss": 0.3713, + "step": 2669500 + }, + { + "epoch": 1.6, + "learning_rate": 4.4832530146505594e-05, + "loss": 0.3662, + "step": 2670000 + }, + { + "epoch": 1.6, + "learning_rate": 4.4830430180945034e-05, + "loss": 0.367, + "step": 2670500 + }, + { + "epoch": 1.6, + "learning_rate": 4.482833021538447e-05, + "loss": 0.3706, + "step": 2671000 + }, + { + "epoch": 1.6, + "learning_rate": 4.48262302498239e-05, + "loss": 0.3765, + "step": 2671500 + }, + { + "epoch": 1.6, + "learning_rate": 4.482413028426334e-05, + "loss": 0.3695, + "step": 2672000 + }, + { + "epoch": 1.6, + "learning_rate": 4.4822030318702774e-05, + "loss": 0.3682, + "step": 2672500 + }, + { + "epoch": 1.6, + "learning_rate": 4.481993455307333e-05, + "loss": 0.3755, + "step": 2673000 + }, + { + "epoch": 1.6, + "learning_rate": 4.481783458751277e-05, + "loss": 0.3664, + "step": 2673500 + }, + { + "epoch": 1.6, + "learning_rate": 4.48157346219522e-05, + "loss": 0.3811, + "step": 2674000 + }, + { + "epoch": 1.6, + "learning_rate": 4.4813634656391635e-05, + "loss": 0.3797, + "step": 2674500 + }, + { + "epoch": 1.6, + "learning_rate": 4.4811534690831075e-05, + "loss": 0.3632, + "step": 2675000 + }, + { + "epoch": 1.6, + "learning_rate": 4.48094347252705e-05, + "loss": 0.3733, + "step": 2675500 + }, + { + "epoch": 1.6, + "learning_rate": 4.480733895964106e-05, + "loss": 0.3697, + "step": 2676000 + }, + { + "epoch": 1.6, + "learning_rate": 4.4805238994080496e-05, + "loss": 0.361, + "step": 2676500 + }, + { + "epoch": 1.6, + "learning_rate": 4.4803139028519936e-05, + "loss": 0.3725, + "step": 2677000 + }, + { + "epoch": 1.61, + "learning_rate": 4.480103906295937e-05, + "loss": 0.3776, + "step": 2677500 + }, + { + "epoch": 1.61, + "learning_rate": 4.4798939097398796e-05, + "loss": 0.3731, + "step": 2678000 + }, + { + "epoch": 1.61, + "learning_rate": 4.479684333176936e-05, + "loss": 0.3742, + "step": 2678500 + }, + { + "epoch": 1.61, + "learning_rate": 4.47947433662088e-05, + "loss": 0.3668, + "step": 2679000 + }, + { + "epoch": 1.61, + "learning_rate": 4.479264340064823e-05, + "loss": 0.3706, + "step": 2679500 + }, + { + "epoch": 1.61, + "learning_rate": 4.4790543435087664e-05, + "loss": 0.3696, + "step": 2680000 + }, + { + "epoch": 1.61, + "learning_rate": 4.47884434695271e-05, + "loss": 0.378, + "step": 2680500 + }, + { + "epoch": 1.61, + "learning_rate": 4.478634350396653e-05, + "loss": 0.3715, + "step": 2681000 + }, + { + "epoch": 1.61, + "learning_rate": 4.478424353840597e-05, + "loss": 0.3672, + "step": 2681500 + }, + { + "epoch": 1.61, + "learning_rate": 4.4782143572845404e-05, + "loss": 0.3686, + "step": 2682000 + }, + { + "epoch": 1.61, + "learning_rate": 4.478004780721596e-05, + "loss": 0.3621, + "step": 2682500 + }, + { + "epoch": 1.61, + "learning_rate": 4.477794784165539e-05, + "loss": 0.3772, + "step": 2683000 + }, + { + "epoch": 1.61, + "learning_rate": 4.477585207602595e-05, + "loss": 0.3788, + "step": 2683500 + }, + { + "epoch": 1.61, + "learning_rate": 4.477375211046539e-05, + "loss": 0.3654, + "step": 2684000 + }, + { + "epoch": 1.61, + "learning_rate": 4.4771652144904825e-05, + "loss": 0.3669, + "step": 2684500 + }, + { + "epoch": 1.61, + "learning_rate": 4.476955217934425e-05, + "loss": 0.3656, + "step": 2685000 + }, + { + "epoch": 1.61, + "learning_rate": 4.476745221378369e-05, + "loss": 0.3701, + "step": 2685500 + }, + { + "epoch": 1.61, + "learning_rate": 4.4765352248223126e-05, + "loss": 0.3771, + "step": 2686000 + }, + { + "epoch": 1.61, + "learning_rate": 4.4763256482593686e-05, + "loss": 0.376, + "step": 2686500 + }, + { + "epoch": 1.61, + "learning_rate": 4.476115651703312e-05, + "loss": 0.3697, + "step": 2687000 + }, + { + "epoch": 1.61, + "learning_rate": 4.475905655147255e-05, + "loss": 0.3577, + "step": 2687500 + }, + { + "epoch": 1.61, + "learning_rate": 4.4756956585911987e-05, + "loss": 0.3753, + "step": 2688000 + }, + { + "epoch": 1.61, + "learning_rate": 4.475485662035143e-05, + "loss": 0.378, + "step": 2688500 + }, + { + "epoch": 1.61, + "learning_rate": 4.475275665479086e-05, + "loss": 0.3731, + "step": 2689000 + }, + { + "epoch": 1.61, + "learning_rate": 4.4750656689230294e-05, + "loss": 0.3741, + "step": 2689500 + }, + { + "epoch": 1.61, + "learning_rate": 4.4748556723669734e-05, + "loss": 0.372, + "step": 2690000 + }, + { + "epoch": 1.61, + "learning_rate": 4.474646095804029e-05, + "loss": 0.3593, + "step": 2690500 + }, + { + "epoch": 1.61, + "learning_rate": 4.474436099247972e-05, + "loss": 0.3754, + "step": 2691000 + }, + { + "epoch": 1.61, + "learning_rate": 4.4742261026919154e-05, + "loss": 0.3713, + "step": 2691500 + }, + { + "epoch": 1.61, + "learning_rate": 4.474016526128971e-05, + "loss": 0.3711, + "step": 2692000 + }, + { + "epoch": 1.61, + "learning_rate": 4.473806529572915e-05, + "loss": 0.3699, + "step": 2692500 + }, + { + "epoch": 1.61, + "learning_rate": 4.473596533016858e-05, + "loss": 0.3659, + "step": 2693000 + }, + { + "epoch": 1.61, + "learning_rate": 4.4733865364608015e-05, + "loss": 0.3722, + "step": 2693500 + }, + { + "epoch": 1.62, + "learning_rate": 4.4731765399047455e-05, + "loss": 0.3709, + "step": 2694000 + }, + { + "epoch": 1.62, + "learning_rate": 4.472966543348689e-05, + "loss": 0.379, + "step": 2694500 + }, + { + "epoch": 1.62, + "learning_rate": 4.472756966785744e-05, + "loss": 0.3768, + "step": 2695000 + }, + { + "epoch": 1.62, + "learning_rate": 4.472546970229688e-05, + "loss": 0.3681, + "step": 2695500 + }, + { + "epoch": 1.62, + "learning_rate": 4.4723369736736316e-05, + "loss": 0.373, + "step": 2696000 + }, + { + "epoch": 1.62, + "learning_rate": 4.4721273971106877e-05, + "loss": 0.3703, + "step": 2696500 + }, + { + "epoch": 1.62, + "learning_rate": 4.47191740055463e-05, + "loss": 0.3619, + "step": 2697000 + }, + { + "epoch": 1.62, + "learning_rate": 4.4717074039985743e-05, + "loss": 0.373, + "step": 2697500 + }, + { + "epoch": 1.62, + "learning_rate": 4.471497407442518e-05, + "loss": 0.371, + "step": 2698000 + }, + { + "epoch": 1.62, + "learning_rate": 4.471287410886461e-05, + "loss": 0.376, + "step": 2698500 + }, + { + "epoch": 1.62, + "learning_rate": 4.471077414330405e-05, + "loss": 0.3642, + "step": 2699000 + }, + { + "epoch": 1.62, + "learning_rate": 4.4708674177743484e-05, + "loss": 0.3803, + "step": 2699500 + }, + { + "epoch": 1.62, + "learning_rate": 4.470657421218292e-05, + "loss": 0.3724, + "step": 2700000 + }, + { + "epoch": 1.62, + "eval_loss": 0.3561702072620392, + "eval_runtime": 1123.298, + "eval_samples_per_second": 468.905, + "eval_steps_per_second": 78.151, + "step": 2700000 + }, + { + "epoch": 1.62, + "learning_rate": 4.470447424662236e-05, + "loss": 0.3809, + "step": 2700500 + }, + { + "epoch": 1.62, + "learning_rate": 4.470237428106179e-05, + "loss": 0.3651, + "step": 2701000 + }, + { + "epoch": 1.62, + "learning_rate": 4.4700274315501225e-05, + "loss": 0.3678, + "step": 2701500 + }, + { + "epoch": 1.62, + "learning_rate": 4.469817854987178e-05, + "loss": 0.3708, + "step": 2702000 + }, + { + "epoch": 1.62, + "learning_rate": 4.469607858431122e-05, + "loss": 0.3684, + "step": 2702500 + }, + { + "epoch": 1.62, + "learning_rate": 4.469397861875065e-05, + "loss": 0.3687, + "step": 2703000 + }, + { + "epoch": 1.62, + "learning_rate": 4.469187865319009e-05, + "loss": 0.3726, + "step": 2703500 + }, + { + "epoch": 1.62, + "learning_rate": 4.4689778687629526e-05, + "loss": 0.3746, + "step": 2704000 + }, + { + "epoch": 1.62, + "learning_rate": 4.468767872206896e-05, + "loss": 0.3568, + "step": 2704500 + }, + { + "epoch": 1.62, + "learning_rate": 4.468557875650839e-05, + "loss": 0.3676, + "step": 2705000 + }, + { + "epoch": 1.62, + "learning_rate": 4.4683478790947826e-05, + "loss": 0.3725, + "step": 2705500 + }, + { + "epoch": 1.62, + "learning_rate": 4.4681383025318386e-05, + "loss": 0.3569, + "step": 2706000 + }, + { + "epoch": 1.62, + "learning_rate": 4.467928725968894e-05, + "loss": 0.3732, + "step": 2706500 + }, + { + "epoch": 1.62, + "learning_rate": 4.4677187294128373e-05, + "loss": 0.3734, + "step": 2707000 + }, + { + "epoch": 1.62, + "learning_rate": 4.4675087328567814e-05, + "loss": 0.3751, + "step": 2707500 + }, + { + "epoch": 1.62, + "learning_rate": 4.467298736300725e-05, + "loss": 0.376, + "step": 2708000 + }, + { + "epoch": 1.62, + "learning_rate": 4.467088739744668e-05, + "loss": 0.3615, + "step": 2708500 + }, + { + "epoch": 1.62, + "learning_rate": 4.466878743188612e-05, + "loss": 0.3702, + "step": 2709000 + }, + { + "epoch": 1.62, + "learning_rate": 4.466668746632555e-05, + "loss": 0.3705, + "step": 2709500 + }, + { + "epoch": 1.62, + "learning_rate": 4.466458750076499e-05, + "loss": 0.3673, + "step": 2710000 + }, + { + "epoch": 1.63, + "learning_rate": 4.466249173513555e-05, + "loss": 0.3648, + "step": 2710500 + }, + { + "epoch": 1.63, + "learning_rate": 4.466039176957498e-05, + "loss": 0.3733, + "step": 2711000 + }, + { + "epoch": 1.63, + "learning_rate": 4.4658291804014415e-05, + "loss": 0.3647, + "step": 2711500 + }, + { + "epoch": 1.63, + "learning_rate": 4.465619183845385e-05, + "loss": 0.3714, + "step": 2712000 + }, + { + "epoch": 1.63, + "learning_rate": 4.465409607282441e-05, + "loss": 0.3705, + "step": 2712500 + }, + { + "epoch": 1.63, + "learning_rate": 4.465199610726384e-05, + "loss": 0.3655, + "step": 2713000 + }, + { + "epoch": 1.63, + "learning_rate": 4.4649896141703276e-05, + "loss": 0.3647, + "step": 2713500 + }, + { + "epoch": 1.63, + "learning_rate": 4.4647796176142716e-05, + "loss": 0.3684, + "step": 2714000 + }, + { + "epoch": 1.63, + "learning_rate": 4.464569621058214e-05, + "loss": 0.3791, + "step": 2714500 + }, + { + "epoch": 1.63, + "learning_rate": 4.46436004449527e-05, + "loss": 0.3639, + "step": 2715000 + }, + { + "epoch": 1.63, + "learning_rate": 4.4641500479392136e-05, + "loss": 0.3664, + "step": 2715500 + }, + { + "epoch": 1.63, + "learning_rate": 4.463940051383158e-05, + "loss": 0.3688, + "step": 2716000 + }, + { + "epoch": 1.63, + "learning_rate": 4.463730054827101e-05, + "loss": 0.3841, + "step": 2716500 + }, + { + "epoch": 1.63, + "learning_rate": 4.4635200582710444e-05, + "loss": 0.3721, + "step": 2717000 + }, + { + "epoch": 1.63, + "learning_rate": 4.463310061714988e-05, + "loss": 0.3589, + "step": 2717500 + }, + { + "epoch": 1.63, + "learning_rate": 4.463100065158931e-05, + "loss": 0.3752, + "step": 2718000 + }, + { + "epoch": 1.63, + "learning_rate": 4.462890488595987e-05, + "loss": 0.3729, + "step": 2718500 + }, + { + "epoch": 1.63, + "learning_rate": 4.4626804920399304e-05, + "loss": 0.369, + "step": 2719000 + }, + { + "epoch": 1.63, + "learning_rate": 4.462470495483874e-05, + "loss": 0.3649, + "step": 2719500 + }, + { + "epoch": 1.63, + "learning_rate": 4.462260498927817e-05, + "loss": 0.3756, + "step": 2720000 + }, + { + "epoch": 1.63, + "learning_rate": 4.462050922364873e-05, + "loss": 0.3859, + "step": 2720500 + }, + { + "epoch": 1.63, + "learning_rate": 4.4618413458019285e-05, + "loss": 0.366, + "step": 2721000 + }, + { + "epoch": 1.63, + "learning_rate": 4.4616313492458725e-05, + "loss": 0.3605, + "step": 2721500 + }, + { + "epoch": 1.63, + "learning_rate": 4.461421352689816e-05, + "loss": 0.3698, + "step": 2722000 + }, + { + "epoch": 1.63, + "learning_rate": 4.461211356133759e-05, + "loss": 0.3672, + "step": 2722500 + }, + { + "epoch": 1.63, + "learning_rate": 4.461001359577703e-05, + "loss": 0.3542, + "step": 2723000 + }, + { + "epoch": 1.63, + "learning_rate": 4.4607913630216466e-05, + "loss": 0.3676, + "step": 2723500 + }, + { + "epoch": 1.63, + "learning_rate": 4.46058136646559e-05, + "loss": 0.3639, + "step": 2724000 + }, + { + "epoch": 1.63, + "learning_rate": 4.460371369909533e-05, + "loss": 0.3621, + "step": 2724500 + }, + { + "epoch": 1.63, + "learning_rate": 4.460161793346589e-05, + "loss": 0.3666, + "step": 2725000 + }, + { + "epoch": 1.63, + "learning_rate": 4.459951796790533e-05, + "loss": 0.3746, + "step": 2725500 + }, + { + "epoch": 1.63, + "learning_rate": 4.459741800234477e-05, + "loss": 0.3764, + "step": 2726000 + }, + { + "epoch": 1.63, + "learning_rate": 4.4595318036784194e-05, + "loss": 0.3685, + "step": 2726500 + }, + { + "epoch": 1.63, + "learning_rate": 4.459321807122363e-05, + "loss": 0.3716, + "step": 2727000 + }, + { + "epoch": 1.64, + "learning_rate": 4.459111810566307e-05, + "loss": 0.3599, + "step": 2727500 + }, + { + "epoch": 1.64, + "learning_rate": 4.45890181401025e-05, + "loss": 0.3782, + "step": 2728000 + }, + { + "epoch": 1.64, + "learning_rate": 4.4586918174541934e-05, + "loss": 0.3764, + "step": 2728500 + }, + { + "epoch": 1.64, + "learning_rate": 4.458482240891249e-05, + "loss": 0.3667, + "step": 2729000 + }, + { + "epoch": 1.64, + "learning_rate": 4.458272244335193e-05, + "loss": 0.3736, + "step": 2729500 + }, + { + "epoch": 1.64, + "learning_rate": 4.458062667772249e-05, + "loss": 0.3799, + "step": 2730000 + }, + { + "epoch": 1.64, + "learning_rate": 4.457852671216192e-05, + "loss": 0.3764, + "step": 2730500 + }, + { + "epoch": 1.64, + "learning_rate": 4.4576426746601355e-05, + "loss": 0.3659, + "step": 2731000 + }, + { + "epoch": 1.64, + "learning_rate": 4.457432678104079e-05, + "loss": 0.3694, + "step": 2731500 + }, + { + "epoch": 1.64, + "learning_rate": 4.457222681548022e-05, + "loss": 0.359, + "step": 2732000 + }, + { + "epoch": 1.64, + "learning_rate": 4.457012684991966e-05, + "loss": 0.3733, + "step": 2732500 + }, + { + "epoch": 1.64, + "learning_rate": 4.4568026884359096e-05, + "loss": 0.3737, + "step": 2733000 + }, + { + "epoch": 1.64, + "learning_rate": 4.456592691879853e-05, + "loss": 0.3625, + "step": 2733500 + }, + { + "epoch": 1.64, + "learning_rate": 4.456382695323797e-05, + "loss": 0.375, + "step": 2734000 + }, + { + "epoch": 1.64, + "learning_rate": 4.456173118760852e-05, + "loss": 0.3647, + "step": 2734500 + }, + { + "epoch": 1.64, + "learning_rate": 4.455963122204796e-05, + "loss": 0.3656, + "step": 2735000 + }, + { + "epoch": 1.64, + "learning_rate": 4.455753125648739e-05, + "loss": 0.3762, + "step": 2735500 + }, + { + "epoch": 1.64, + "learning_rate": 4.455543129092683e-05, + "loss": 0.3746, + "step": 2736000 + }, + { + "epoch": 1.64, + "learning_rate": 4.4553331325366264e-05, + "loss": 0.3731, + "step": 2736500 + }, + { + "epoch": 1.64, + "learning_rate": 4.455123555973682e-05, + "loss": 0.3629, + "step": 2737000 + }, + { + "epoch": 1.64, + "learning_rate": 4.454913559417625e-05, + "loss": 0.3687, + "step": 2737500 + }, + { + "epoch": 1.64, + "learning_rate": 4.454703562861569e-05, + "loss": 0.3585, + "step": 2738000 + }, + { + "epoch": 1.64, + "learning_rate": 4.4544935663055125e-05, + "loss": 0.3781, + "step": 2738500 + }, + { + "epoch": 1.64, + "learning_rate": 4.454283569749456e-05, + "loss": 0.368, + "step": 2739000 + }, + { + "epoch": 1.64, + "learning_rate": 4.4540735731934e-05, + "loss": 0.3724, + "step": 2739500 + }, + { + "epoch": 1.64, + "learning_rate": 4.453863576637343e-05, + "loss": 0.3601, + "step": 2740000 + }, + { + "epoch": 1.64, + "learning_rate": 4.4536535800812865e-05, + "loss": 0.3627, + "step": 2740500 + }, + { + "epoch": 1.64, + "learning_rate": 4.4534440035183426e-05, + "loss": 0.3678, + "step": 2741000 + }, + { + "epoch": 1.64, + "learning_rate": 4.453234006962286e-05, + "loss": 0.3722, + "step": 2741500 + }, + { + "epoch": 1.64, + "learning_rate": 4.453024430399341e-05, + "loss": 0.3675, + "step": 2742000 + }, + { + "epoch": 1.64, + "learning_rate": 4.4528144338432846e-05, + "loss": 0.3695, + "step": 2742500 + }, + { + "epoch": 1.64, + "learning_rate": 4.4526044372872286e-05, + "loss": 0.3803, + "step": 2743000 + }, + { + "epoch": 1.64, + "learning_rate": 4.452394440731172e-05, + "loss": 0.3764, + "step": 2743500 + }, + { + "epoch": 1.65, + "learning_rate": 4.452184444175115e-05, + "loss": 0.365, + "step": 2744000 + }, + { + "epoch": 1.65, + "learning_rate": 4.4519744476190593e-05, + "loss": 0.3757, + "step": 2744500 + }, + { + "epoch": 1.65, + "learning_rate": 4.451764451063003e-05, + "loss": 0.3602, + "step": 2745000 + }, + { + "epoch": 1.65, + "learning_rate": 4.451554874500058e-05, + "loss": 0.3636, + "step": 2745500 + }, + { + "epoch": 1.65, + "learning_rate": 4.4513448779440014e-05, + "loss": 0.3731, + "step": 2746000 + }, + { + "epoch": 1.65, + "learning_rate": 4.4511348813879454e-05, + "loss": 0.3749, + "step": 2746500 + }, + { + "epoch": 1.65, + "learning_rate": 4.450924884831889e-05, + "loss": 0.367, + "step": 2747000 + }, + { + "epoch": 1.65, + "learning_rate": 4.450714888275832e-05, + "loss": 0.3648, + "step": 2747500 + }, + { + "epoch": 1.65, + "learning_rate": 4.450505311712888e-05, + "loss": 0.3719, + "step": 2748000 + }, + { + "epoch": 1.65, + "learning_rate": 4.4502953151568315e-05, + "loss": 0.3637, + "step": 2748500 + }, + { + "epoch": 1.65, + "learning_rate": 4.450085318600775e-05, + "loss": 0.3752, + "step": 2749000 + }, + { + "epoch": 1.65, + "learning_rate": 4.449875322044719e-05, + "loss": 0.3852, + "step": 2749500 + }, + { + "epoch": 1.65, + "learning_rate": 4.449665745481774e-05, + "loss": 0.3793, + "step": 2750000 + }, + { + "epoch": 1.65, + "learning_rate": 4.4494557489257176e-05, + "loss": 0.3628, + "step": 2750500 + }, + { + "epoch": 1.65, + "learning_rate": 4.449246172362773e-05, + "loss": 0.3705, + "step": 2751000 + }, + { + "epoch": 1.65, + "learning_rate": 4.449036175806716e-05, + "loss": 0.3813, + "step": 2751500 + }, + { + "epoch": 1.65, + "learning_rate": 4.44882617925066e-05, + "loss": 0.3727, + "step": 2752000 + }, + { + "epoch": 1.65, + "learning_rate": 4.4486161826946036e-05, + "loss": 0.3716, + "step": 2752500 + }, + { + "epoch": 1.65, + "learning_rate": 4.448406186138547e-05, + "loss": 0.3764, + "step": 2753000 + }, + { + "epoch": 1.65, + "learning_rate": 4.448196189582491e-05, + "loss": 0.3746, + "step": 2753500 + }, + { + "epoch": 1.65, + "learning_rate": 4.4479861930264343e-05, + "loss": 0.3761, + "step": 2754000 + }, + { + "epoch": 1.65, + "learning_rate": 4.447776196470378e-05, + "loss": 0.3724, + "step": 2754500 + }, + { + "epoch": 1.65, + "learning_rate": 4.447566199914322e-05, + "loss": 0.3714, + "step": 2755000 + }, + { + "epoch": 1.65, + "learning_rate": 4.4473562033582644e-05, + "loss": 0.3753, + "step": 2755500 + }, + { + "epoch": 1.65, + "learning_rate": 4.4471462068022084e-05, + "loss": 0.3661, + "step": 2756000 + }, + { + "epoch": 1.65, + "learning_rate": 4.446936210246152e-05, + "loss": 0.3702, + "step": 2756500 + }, + { + "epoch": 1.65, + "learning_rate": 4.446726633683208e-05, + "loss": 0.3725, + "step": 2757000 + }, + { + "epoch": 1.65, + "learning_rate": 4.446516637127151e-05, + "loss": 0.3683, + "step": 2757500 + }, + { + "epoch": 1.65, + "learning_rate": 4.4463070605642065e-05, + "loss": 0.3627, + "step": 2758000 + }, + { + "epoch": 1.65, + "learning_rate": 4.4460970640081505e-05, + "loss": 0.3651, + "step": 2758500 + }, + { + "epoch": 1.65, + "learning_rate": 4.445887067452094e-05, + "loss": 0.3668, + "step": 2759000 + }, + { + "epoch": 1.65, + "learning_rate": 4.445677070896037e-05, + "loss": 0.3661, + "step": 2759500 + }, + { + "epoch": 1.65, + "learning_rate": 4.4454674943330926e-05, + "loss": 0.3693, + "step": 2760000 + }, + { + "epoch": 1.66, + "learning_rate": 4.4452574977770366e-05, + "loss": 0.3631, + "step": 2760500 + }, + { + "epoch": 1.66, + "learning_rate": 4.44504750122098e-05, + "loss": 0.3701, + "step": 2761000 + }, + { + "epoch": 1.66, + "learning_rate": 4.444837504664924e-05, + "loss": 0.3746, + "step": 2761500 + }, + { + "epoch": 1.66, + "learning_rate": 4.444627508108867e-05, + "loss": 0.3756, + "step": 2762000 + }, + { + "epoch": 1.66, + "learning_rate": 4.4444175115528106e-05, + "loss": 0.3703, + "step": 2762500 + }, + { + "epoch": 1.66, + "learning_rate": 4.444207514996754e-05, + "loss": 0.3685, + "step": 2763000 + }, + { + "epoch": 1.66, + "learning_rate": 4.4439975184406973e-05, + "loss": 0.3674, + "step": 2763500 + }, + { + "epoch": 1.66, + "learning_rate": 4.4437879418777534e-05, + "loss": 0.3674, + "step": 2764000 + }, + { + "epoch": 1.66, + "learning_rate": 4.443577945321697e-05, + "loss": 0.3721, + "step": 2764500 + }, + { + "epoch": 1.66, + "learning_rate": 4.443368368758752e-05, + "loss": 0.361, + "step": 2765000 + }, + { + "epoch": 1.66, + "learning_rate": 4.443158372202696e-05, + "loss": 0.371, + "step": 2765500 + }, + { + "epoch": 1.66, + "learning_rate": 4.4429483756466394e-05, + "loss": 0.367, + "step": 2766000 + }, + { + "epoch": 1.66, + "learning_rate": 4.442738379090583e-05, + "loss": 0.38, + "step": 2766500 + }, + { + "epoch": 1.66, + "learning_rate": 4.442528382534527e-05, + "loss": 0.3637, + "step": 2767000 + }, + { + "epoch": 1.66, + "learning_rate": 4.4423183859784695e-05, + "loss": 0.372, + "step": 2767500 + }, + { + "epoch": 1.66, + "learning_rate": 4.4421083894224135e-05, + "loss": 0.3771, + "step": 2768000 + }, + { + "epoch": 1.66, + "learning_rate": 4.441898392866357e-05, + "loss": 0.3688, + "step": 2768500 + }, + { + "epoch": 1.66, + "learning_rate": 4.441688816303413e-05, + "loss": 0.3575, + "step": 2769000 + }, + { + "epoch": 1.66, + "learning_rate": 4.441478819747356e-05, + "loss": 0.3719, + "step": 2769500 + }, + { + "epoch": 1.66, + "learning_rate": 4.4412692431844116e-05, + "loss": 0.3881, + "step": 2770000 + }, + { + "epoch": 1.66, + "learning_rate": 4.4410592466283556e-05, + "loss": 0.3631, + "step": 2770500 + }, + { + "epoch": 1.66, + "learning_rate": 4.440849250072299e-05, + "loss": 0.3766, + "step": 2771000 + }, + { + "epoch": 1.66, + "learning_rate": 4.440639253516242e-05, + "loss": 0.3662, + "step": 2771500 + }, + { + "epoch": 1.66, + "learning_rate": 4.440429256960186e-05, + "loss": 0.3819, + "step": 2772000 + }, + { + "epoch": 1.66, + "learning_rate": 4.440219260404129e-05, + "loss": 0.3683, + "step": 2772500 + }, + { + "epoch": 1.66, + "learning_rate": 4.4400092638480723e-05, + "loss": 0.365, + "step": 2773000 + }, + { + "epoch": 1.66, + "learning_rate": 4.4397992672920164e-05, + "loss": 0.3757, + "step": 2773500 + }, + { + "epoch": 1.66, + "learning_rate": 4.4395896907290724e-05, + "loss": 0.3674, + "step": 2774000 + }, + { + "epoch": 1.66, + "learning_rate": 4.439379694173015e-05, + "loss": 0.3792, + "step": 2774500 + }, + { + "epoch": 1.66, + "learning_rate": 4.439170117610071e-05, + "loss": 0.3653, + "step": 2775000 + }, + { + "epoch": 1.66, + "learning_rate": 4.438960121054015e-05, + "loss": 0.3663, + "step": 2775500 + }, + { + "epoch": 1.66, + "learning_rate": 4.4387501244979585e-05, + "loss": 0.3785, + "step": 2776000 + }, + { + "epoch": 1.66, + "learning_rate": 4.438540127941902e-05, + "loss": 0.3665, + "step": 2776500 + }, + { + "epoch": 1.66, + "learning_rate": 4.438330131385845e-05, + "loss": 0.3649, + "step": 2777000 + }, + { + "epoch": 1.67, + "learning_rate": 4.4381201348297885e-05, + "loss": 0.3634, + "step": 2777500 + }, + { + "epoch": 1.67, + "learning_rate": 4.437910138273732e-05, + "loss": 0.3592, + "step": 2778000 + }, + { + "epoch": 1.67, + "learning_rate": 4.437700141717676e-05, + "loss": 0.3755, + "step": 2778500 + }, + { + "epoch": 1.67, + "learning_rate": 4.437490565154732e-05, + "loss": 0.38, + "step": 2779000 + }, + { + "epoch": 1.67, + "learning_rate": 4.4372805685986746e-05, + "loss": 0.3726, + "step": 2779500 + }, + { + "epoch": 1.67, + "learning_rate": 4.437070572042618e-05, + "loss": 0.3688, + "step": 2780000 + }, + { + "epoch": 1.67, + "learning_rate": 4.436860995479674e-05, + "loss": 0.3719, + "step": 2780500 + }, + { + "epoch": 1.67, + "learning_rate": 4.436650998923618e-05, + "loss": 0.3646, + "step": 2781000 + }, + { + "epoch": 1.67, + "learning_rate": 4.4364410023675613e-05, + "loss": 0.3622, + "step": 2781500 + }, + { + "epoch": 1.67, + "learning_rate": 4.436231005811505e-05, + "loss": 0.3639, + "step": 2782000 + }, + { + "epoch": 1.67, + "learning_rate": 4.436021009255448e-05, + "loss": 0.363, + "step": 2782500 + }, + { + "epoch": 1.67, + "learning_rate": 4.435811432692504e-05, + "loss": 0.3637, + "step": 2783000 + }, + { + "epoch": 1.67, + "learning_rate": 4.4356014361364474e-05, + "loss": 0.379, + "step": 2783500 + }, + { + "epoch": 1.67, + "learning_rate": 4.435391439580391e-05, + "loss": 0.3725, + "step": 2784000 + }, + { + "epoch": 1.67, + "learning_rate": 4.435181443024334e-05, + "loss": 0.3754, + "step": 2784500 + }, + { + "epoch": 1.67, + "learning_rate": 4.4349714464682775e-05, + "loss": 0.3723, + "step": 2785000 + }, + { + "epoch": 1.67, + "learning_rate": 4.4347614499122215e-05, + "loss": 0.3804, + "step": 2785500 + }, + { + "epoch": 1.67, + "learning_rate": 4.434551453356165e-05, + "loss": 0.3644, + "step": 2786000 + }, + { + "epoch": 1.67, + "learning_rate": 4.434341456800108e-05, + "loss": 0.3587, + "step": 2786500 + }, + { + "epoch": 1.67, + "learning_rate": 4.4341318802371635e-05, + "loss": 0.3614, + "step": 2787000 + }, + { + "epoch": 1.67, + "learning_rate": 4.4339223036742196e-05, + "loss": 0.3786, + "step": 2787500 + }, + { + "epoch": 1.67, + "learning_rate": 4.4337123071181636e-05, + "loss": 0.3725, + "step": 2788000 + }, + { + "epoch": 1.67, + "learning_rate": 4.433502730555219e-05, + "loss": 0.365, + "step": 2788500 + }, + { + "epoch": 1.67, + "learning_rate": 4.433292733999162e-05, + "loss": 0.3564, + "step": 2789000 + }, + { + "epoch": 1.67, + "learning_rate": 4.433082737443106e-05, + "loss": 0.3665, + "step": 2789500 + }, + { + "epoch": 1.67, + "learning_rate": 4.4328727408870497e-05, + "loss": 0.3726, + "step": 2790000 + }, + { + "epoch": 1.67, + "learning_rate": 4.432662744330993e-05, + "loss": 0.3659, + "step": 2790500 + }, + { + "epoch": 1.67, + "learning_rate": 4.432452747774937e-05, + "loss": 0.3692, + "step": 2791000 + }, + { + "epoch": 1.67, + "learning_rate": 4.43224275121888e-05, + "loss": 0.3682, + "step": 2791500 + }, + { + "epoch": 1.67, + "learning_rate": 4.432032754662823e-05, + "loss": 0.3664, + "step": 2792000 + }, + { + "epoch": 1.67, + "learning_rate": 4.431822758106767e-05, + "loss": 0.3706, + "step": 2792500 + }, + { + "epoch": 1.67, + "learning_rate": 4.4316127615507104e-05, + "loss": 0.3632, + "step": 2793000 + }, + { + "epoch": 1.67, + "learning_rate": 4.4314031849877664e-05, + "loss": 0.3718, + "step": 2793500 + }, + { + "epoch": 1.68, + "learning_rate": 4.431193188431709e-05, + "loss": 0.3651, + "step": 2794000 + }, + { + "epoch": 1.68, + "learning_rate": 4.430983191875653e-05, + "loss": 0.374, + "step": 2794500 + }, + { + "epoch": 1.68, + "learning_rate": 4.4307731953195965e-05, + "loss": 0.3714, + "step": 2795000 + }, + { + "epoch": 1.68, + "learning_rate": 4.43056319876354e-05, + "loss": 0.3653, + "step": 2795500 + }, + { + "epoch": 1.68, + "learning_rate": 4.430353202207484e-05, + "loss": 0.3648, + "step": 2796000 + }, + { + "epoch": 1.68, + "learning_rate": 4.430143625644539e-05, + "loss": 0.3703, + "step": 2796500 + }, + { + "epoch": 1.68, + "learning_rate": 4.4299336290884826e-05, + "loss": 0.3704, + "step": 2797000 + }, + { + "epoch": 1.68, + "learning_rate": 4.4297236325324266e-05, + "loss": 0.3738, + "step": 2797500 + }, + { + "epoch": 1.68, + "learning_rate": 4.42951363597637e-05, + "loss": 0.3644, + "step": 2798000 + }, + { + "epoch": 1.68, + "learning_rate": 4.429303639420313e-05, + "loss": 0.3663, + "step": 2798500 + }, + { + "epoch": 1.68, + "learning_rate": 4.4290940628573686e-05, + "loss": 0.3741, + "step": 2799000 + }, + { + "epoch": 1.68, + "learning_rate": 4.4288840663013126e-05, + "loss": 0.3648, + "step": 2799500 + }, + { + "epoch": 1.68, + "learning_rate": 4.428674069745256e-05, + "loss": 0.3699, + "step": 2800000 + }, + { + "epoch": 1.68, + "eval_loss": 0.3532743453979492, + "eval_runtime": 1119.9621, + "eval_samples_per_second": 470.302, + "eval_steps_per_second": 78.384, + "step": 2800000 + }, + { + "epoch": 1.68, + "learning_rate": 4.4284640731891993e-05, + "loss": 0.3731, + "step": 2800500 + }, + { + "epoch": 1.68, + "learning_rate": 4.428254496626255e-05, + "loss": 0.3727, + "step": 2801000 + }, + { + "epoch": 1.68, + "learning_rate": 4.428044500070199e-05, + "loss": 0.3835, + "step": 2801500 + }, + { + "epoch": 1.68, + "learning_rate": 4.427834503514142e-05, + "loss": 0.3702, + "step": 2802000 + }, + { + "epoch": 1.68, + "learning_rate": 4.4276245069580854e-05, + "loss": 0.3781, + "step": 2802500 + }, + { + "epoch": 1.68, + "learning_rate": 4.4274145104020294e-05, + "loss": 0.3682, + "step": 2803000 + }, + { + "epoch": 1.68, + "learning_rate": 4.427204513845973e-05, + "loss": 0.3676, + "step": 2803500 + }, + { + "epoch": 1.68, + "learning_rate": 4.426994937283028e-05, + "loss": 0.3705, + "step": 2804000 + }, + { + "epoch": 1.68, + "learning_rate": 4.426784940726972e-05, + "loss": 0.3662, + "step": 2804500 + }, + { + "epoch": 1.68, + "learning_rate": 4.4265749441709155e-05, + "loss": 0.3656, + "step": 2805000 + }, + { + "epoch": 1.68, + "learning_rate": 4.426364947614859e-05, + "loss": 0.3702, + "step": 2805500 + }, + { + "epoch": 1.68, + "learning_rate": 4.426154951058803e-05, + "loss": 0.363, + "step": 2806000 + }, + { + "epoch": 1.68, + "learning_rate": 4.425944954502746e-05, + "loss": 0.3663, + "step": 2806500 + }, + { + "epoch": 1.68, + "learning_rate": 4.4257349579466896e-05, + "loss": 0.3677, + "step": 2807000 + }, + { + "epoch": 1.68, + "learning_rate": 4.4255249613906336e-05, + "loss": 0.3667, + "step": 2807500 + }, + { + "epoch": 1.68, + "learning_rate": 4.425314964834577e-05, + "loss": 0.37, + "step": 2808000 + }, + { + "epoch": 1.68, + "learning_rate": 4.425105388271632e-05, + "loss": 0.3723, + "step": 2808500 + }, + { + "epoch": 1.68, + "learning_rate": 4.4248953917155756e-05, + "loss": 0.3785, + "step": 2809000 + }, + { + "epoch": 1.68, + "learning_rate": 4.42468539515952e-05, + "loss": 0.3756, + "step": 2809500 + }, + { + "epoch": 1.68, + "learning_rate": 4.424475398603463e-05, + "loss": 0.3645, + "step": 2810000 + }, + { + "epoch": 1.69, + "learning_rate": 4.4242658220405184e-05, + "loss": 0.3697, + "step": 2810500 + }, + { + "epoch": 1.69, + "learning_rate": 4.424055825484462e-05, + "loss": 0.3684, + "step": 2811000 + }, + { + "epoch": 1.69, + "learning_rate": 4.423845828928406e-05, + "loss": 0.3633, + "step": 2811500 + }, + { + "epoch": 1.69, + "learning_rate": 4.423636252365461e-05, + "loss": 0.3754, + "step": 2812000 + }, + { + "epoch": 1.69, + "learning_rate": 4.4234262558094044e-05, + "loss": 0.3648, + "step": 2812500 + }, + { + "epoch": 1.69, + "learning_rate": 4.4232162592533485e-05, + "loss": 0.3685, + "step": 2813000 + }, + { + "epoch": 1.69, + "learning_rate": 4.423006262697292e-05, + "loss": 0.3608, + "step": 2813500 + }, + { + "epoch": 1.69, + "learning_rate": 4.422796266141235e-05, + "loss": 0.356, + "step": 2814000 + }, + { + "epoch": 1.69, + "learning_rate": 4.422586269585179e-05, + "loss": 0.3652, + "step": 2814500 + }, + { + "epoch": 1.69, + "learning_rate": 4.4223766930222345e-05, + "loss": 0.3659, + "step": 2815000 + }, + { + "epoch": 1.69, + "learning_rate": 4.422166696466178e-05, + "loss": 0.3679, + "step": 2815500 + }, + { + "epoch": 1.69, + "learning_rate": 4.421956699910121e-05, + "loss": 0.3662, + "step": 2816000 + }, + { + "epoch": 1.69, + "learning_rate": 4.421746703354065e-05, + "loss": 0.3676, + "step": 2816500 + }, + { + "epoch": 1.69, + "learning_rate": 4.4215367067980086e-05, + "loss": 0.3706, + "step": 2817000 + }, + { + "epoch": 1.69, + "learning_rate": 4.421326710241952e-05, + "loss": 0.3592, + "step": 2817500 + }, + { + "epoch": 1.69, + "learning_rate": 4.421116713685896e-05, + "loss": 0.3625, + "step": 2818000 + }, + { + "epoch": 1.69, + "learning_rate": 4.4209067171298386e-05, + "loss": 0.3711, + "step": 2818500 + }, + { + "epoch": 1.69, + "learning_rate": 4.420697140566895e-05, + "loss": 0.3676, + "step": 2819000 + }, + { + "epoch": 1.69, + "learning_rate": 4.420487144010838e-05, + "loss": 0.3778, + "step": 2819500 + }, + { + "epoch": 1.69, + "learning_rate": 4.420277567447894e-05, + "loss": 0.3729, + "step": 2820000 + }, + { + "epoch": 1.69, + "learning_rate": 4.4200679908849494e-05, + "loss": 0.3621, + "step": 2820500 + }, + { + "epoch": 1.69, + "learning_rate": 4.419857994328893e-05, + "loss": 0.3734, + "step": 2821000 + }, + { + "epoch": 1.69, + "learning_rate": 4.419647997772836e-05, + "loss": 0.3606, + "step": 2821500 + }, + { + "epoch": 1.69, + "learning_rate": 4.41943800121678e-05, + "loss": 0.3646, + "step": 2822000 + }, + { + "epoch": 1.69, + "learning_rate": 4.4192280046607235e-05, + "loss": 0.3721, + "step": 2822500 + }, + { + "epoch": 1.69, + "learning_rate": 4.419018008104667e-05, + "loss": 0.3636, + "step": 2823000 + }, + { + "epoch": 1.69, + "learning_rate": 4.418808011548611e-05, + "loss": 0.3724, + "step": 2823500 + }, + { + "epoch": 1.69, + "learning_rate": 4.418598014992554e-05, + "loss": 0.3642, + "step": 2824000 + }, + { + "epoch": 1.69, + "learning_rate": 4.4183880184364975e-05, + "loss": 0.3723, + "step": 2824500 + }, + { + "epoch": 1.69, + "learning_rate": 4.4181780218804416e-05, + "loss": 0.3635, + "step": 2825000 + }, + { + "epoch": 1.69, + "learning_rate": 4.417968025324384e-05, + "loss": 0.3719, + "step": 2825500 + }, + { + "epoch": 1.69, + "learning_rate": 4.41775844876144e-05, + "loss": 0.3689, + "step": 2826000 + }, + { + "epoch": 1.69, + "learning_rate": 4.4175484522053836e-05, + "loss": 0.3762, + "step": 2826500 + }, + { + "epoch": 1.69, + "learning_rate": 4.4173384556493276e-05, + "loss": 0.3827, + "step": 2827000 + }, + { + "epoch": 1.7, + "learning_rate": 4.417128459093271e-05, + "loss": 0.3725, + "step": 2827500 + }, + { + "epoch": 1.7, + "learning_rate": 4.416918462537214e-05, + "loss": 0.3663, + "step": 2828000 + }, + { + "epoch": 1.7, + "learning_rate": 4.4167088859742704e-05, + "loss": 0.3725, + "step": 2828500 + }, + { + "epoch": 1.7, + "learning_rate": 4.416498889418214e-05, + "loss": 0.3536, + "step": 2829000 + }, + { + "epoch": 1.7, + "learning_rate": 4.416288892862157e-05, + "loss": 0.3755, + "step": 2829500 + }, + { + "epoch": 1.7, + "learning_rate": 4.416078896306101e-05, + "loss": 0.3727, + "step": 2830000 + }, + { + "epoch": 1.7, + "learning_rate": 4.415868899750044e-05, + "loss": 0.3768, + "step": 2830500 + }, + { + "epoch": 1.7, + "learning_rate": 4.415658903193987e-05, + "loss": 0.3672, + "step": 2831000 + }, + { + "epoch": 1.7, + "learning_rate": 4.415448906637931e-05, + "loss": 0.3729, + "step": 2831500 + }, + { + "epoch": 1.7, + "learning_rate": 4.4152389100818745e-05, + "loss": 0.372, + "step": 2832000 + }, + { + "epoch": 1.7, + "learning_rate": 4.41502933351893e-05, + "loss": 0.3713, + "step": 2832500 + }, + { + "epoch": 1.7, + "learning_rate": 4.414819336962873e-05, + "loss": 0.368, + "step": 2833000 + }, + { + "epoch": 1.7, + "learning_rate": 4.414609340406817e-05, + "loss": 0.3687, + "step": 2833500 + }, + { + "epoch": 1.7, + "learning_rate": 4.4143993438507605e-05, + "loss": 0.3793, + "step": 2834000 + }, + { + "epoch": 1.7, + "learning_rate": 4.4141893472947046e-05, + "loss": 0.3721, + "step": 2834500 + }, + { + "epoch": 1.7, + "learning_rate": 4.41397977073176e-05, + "loss": 0.367, + "step": 2835000 + }, + { + "epoch": 1.7, + "learning_rate": 4.413769774175703e-05, + "loss": 0.3648, + "step": 2835500 + }, + { + "epoch": 1.7, + "learning_rate": 4.4135597776196466e-05, + "loss": 0.3621, + "step": 2836000 + }, + { + "epoch": 1.7, + "learning_rate": 4.4133497810635906e-05, + "loss": 0.363, + "step": 2836500 + }, + { + "epoch": 1.7, + "learning_rate": 4.413139784507534e-05, + "loss": 0.3709, + "step": 2837000 + }, + { + "epoch": 1.7, + "learning_rate": 4.412929787951477e-05, + "loss": 0.3624, + "step": 2837500 + }, + { + "epoch": 1.7, + "learning_rate": 4.4127197913954213e-05, + "loss": 0.3699, + "step": 2838000 + }, + { + "epoch": 1.7, + "learning_rate": 4.412510214832477e-05, + "loss": 0.3725, + "step": 2838500 + }, + { + "epoch": 1.7, + "learning_rate": 4.41230021827642e-05, + "loss": 0.3676, + "step": 2839000 + }, + { + "epoch": 1.7, + "learning_rate": 4.4120902217203634e-05, + "loss": 0.3644, + "step": 2839500 + }, + { + "epoch": 1.7, + "learning_rate": 4.4118802251643074e-05, + "loss": 0.366, + "step": 2840000 + }, + { + "epoch": 1.7, + "learning_rate": 4.411670228608251e-05, + "loss": 0.3754, + "step": 2840500 + }, + { + "epoch": 1.7, + "learning_rate": 4.411460232052194e-05, + "loss": 0.3649, + "step": 2841000 + }, + { + "epoch": 1.7, + "learning_rate": 4.411250235496138e-05, + "loss": 0.3588, + "step": 2841500 + }, + { + "epoch": 1.7, + "learning_rate": 4.4110402389400815e-05, + "loss": 0.3559, + "step": 2842000 + }, + { + "epoch": 1.7, + "learning_rate": 4.410830662377137e-05, + "loss": 0.3623, + "step": 2842500 + }, + { + "epoch": 1.7, + "learning_rate": 4.410620665821081e-05, + "loss": 0.3787, + "step": 2843000 + }, + { + "epoch": 1.7, + "learning_rate": 4.410410669265024e-05, + "loss": 0.3612, + "step": 2843500 + }, + { + "epoch": 1.71, + "learning_rate": 4.4102006727089675e-05, + "loss": 0.3692, + "step": 2844000 + }, + { + "epoch": 1.71, + "learning_rate": 4.4099906761529116e-05, + "loss": 0.3628, + "step": 2844500 + }, + { + "epoch": 1.71, + "learning_rate": 4.409781099589967e-05, + "loss": 0.3725, + "step": 2845000 + }, + { + "epoch": 1.71, + "learning_rate": 4.40957110303391e-05, + "loss": 0.3714, + "step": 2845500 + }, + { + "epoch": 1.71, + "learning_rate": 4.4093611064778536e-05, + "loss": 0.3661, + "step": 2846000 + }, + { + "epoch": 1.71, + "learning_rate": 4.4091511099217976e-05, + "loss": 0.3698, + "step": 2846500 + }, + { + "epoch": 1.71, + "learning_rate": 4.408941113365741e-05, + "loss": 0.3768, + "step": 2847000 + }, + { + "epoch": 1.71, + "learning_rate": 4.4087311168096837e-05, + "loss": 0.3625, + "step": 2847500 + }, + { + "epoch": 1.71, + "learning_rate": 4.40852154024674e-05, + "loss": 0.3681, + "step": 2848000 + }, + { + "epoch": 1.71, + "learning_rate": 4.408311543690684e-05, + "loss": 0.3742, + "step": 2848500 + }, + { + "epoch": 1.71, + "learning_rate": 4.408101547134627e-05, + "loss": 0.3601, + "step": 2849000 + }, + { + "epoch": 1.71, + "learning_rate": 4.4078915505785704e-05, + "loss": 0.3703, + "step": 2849500 + }, + { + "epoch": 1.71, + "learning_rate": 4.407681554022514e-05, + "loss": 0.3645, + "step": 2850000 + }, + { + "epoch": 1.71, + "learning_rate": 4.40747197745957e-05, + "loss": 0.3676, + "step": 2850500 + }, + { + "epoch": 1.71, + "learning_rate": 4.407261980903513e-05, + "loss": 0.3674, + "step": 2851000 + }, + { + "epoch": 1.71, + "learning_rate": 4.407051984347457e-05, + "loss": 0.3655, + "step": 2851500 + }, + { + "epoch": 1.71, + "learning_rate": 4.4068419877914005e-05, + "loss": 0.3764, + "step": 2852000 + }, + { + "epoch": 1.71, + "learning_rate": 4.406631991235343e-05, + "loss": 0.3689, + "step": 2852500 + }, + { + "epoch": 1.71, + "learning_rate": 4.406421994679287e-05, + "loss": 0.3709, + "step": 2853000 + }, + { + "epoch": 1.71, + "learning_rate": 4.4062119981232305e-05, + "loss": 0.3771, + "step": 2853500 + }, + { + "epoch": 1.71, + "learning_rate": 4.406002001567174e-05, + "loss": 0.3745, + "step": 2854000 + }, + { + "epoch": 1.71, + "learning_rate": 4.40579242500423e-05, + "loss": 0.3677, + "step": 2854500 + }, + { + "epoch": 1.71, + "learning_rate": 4.405582428448173e-05, + "loss": 0.3754, + "step": 2855000 + }, + { + "epoch": 1.71, + "learning_rate": 4.4053724318921166e-05, + "loss": 0.365, + "step": 2855500 + }, + { + "epoch": 1.71, + "learning_rate": 4.40516243533606e-05, + "loss": 0.3705, + "step": 2856000 + }, + { + "epoch": 1.71, + "learning_rate": 4.404952438780004e-05, + "loss": 0.3798, + "step": 2856500 + }, + { + "epoch": 1.71, + "learning_rate": 4.404742442223947e-05, + "loss": 0.3661, + "step": 2857000 + }, + { + "epoch": 1.71, + "learning_rate": 4.404532445667891e-05, + "loss": 0.3644, + "step": 2857500 + }, + { + "epoch": 1.71, + "learning_rate": 4.404322869104947e-05, + "loss": 0.3636, + "step": 2858000 + }, + { + "epoch": 1.71, + "learning_rate": 4.40411287254889e-05, + "loss": 0.3671, + "step": 2858500 + }, + { + "epoch": 1.71, + "learning_rate": 4.4039028759928334e-05, + "loss": 0.371, + "step": 2859000 + }, + { + "epoch": 1.71, + "learning_rate": 4.4036928794367774e-05, + "loss": 0.372, + "step": 2859500 + }, + { + "epoch": 1.71, + "learning_rate": 4.403482882880721e-05, + "loss": 0.3603, + "step": 2860000 + }, + { + "epoch": 1.71, + "learning_rate": 4.403272886324664e-05, + "loss": 0.3714, + "step": 2860500 + }, + { + "epoch": 1.72, + "learning_rate": 4.4030633097617195e-05, + "loss": 0.3725, + "step": 2861000 + }, + { + "epoch": 1.72, + "learning_rate": 4.4028533132056635e-05, + "loss": 0.3727, + "step": 2861500 + }, + { + "epoch": 1.72, + "learning_rate": 4.402643316649607e-05, + "loss": 0.3676, + "step": 2862000 + }, + { + "epoch": 1.72, + "learning_rate": 4.40243332009355e-05, + "loss": 0.3604, + "step": 2862500 + }, + { + "epoch": 1.72, + "learning_rate": 4.402223323537494e-05, + "loss": 0.3702, + "step": 2863000 + }, + { + "epoch": 1.72, + "learning_rate": 4.4020137469745496e-05, + "loss": 0.3702, + "step": 2863500 + }, + { + "epoch": 1.72, + "learning_rate": 4.401803750418493e-05, + "loss": 0.361, + "step": 2864000 + }, + { + "epoch": 1.72, + "learning_rate": 4.401593753862436e-05, + "loss": 0.3696, + "step": 2864500 + }, + { + "epoch": 1.72, + "learning_rate": 4.40138375730638e-05, + "loss": 0.3603, + "step": 2865000 + }, + { + "epoch": 1.72, + "learning_rate": 4.4011737607503236e-05, + "loss": 0.3607, + "step": 2865500 + }, + { + "epoch": 1.72, + "learning_rate": 4.400964184187379e-05, + "loss": 0.3743, + "step": 2866000 + }, + { + "epoch": 1.72, + "learning_rate": 4.400754187631323e-05, + "loss": 0.3618, + "step": 2866500 + }, + { + "epoch": 1.72, + "learning_rate": 4.4005441910752664e-05, + "loss": 0.3601, + "step": 2867000 + }, + { + "epoch": 1.72, + "learning_rate": 4.40033419451921e-05, + "loss": 0.36, + "step": 2867500 + }, + { + "epoch": 1.72, + "learning_rate": 4.400124197963154e-05, + "loss": 0.3746, + "step": 2868000 + }, + { + "epoch": 1.72, + "learning_rate": 4.399914201407097e-05, + "loss": 0.3686, + "step": 2868500 + }, + { + "epoch": 1.72, + "learning_rate": 4.3997042048510404e-05, + "loss": 0.3748, + "step": 2869000 + }, + { + "epoch": 1.72, + "learning_rate": 4.399494628288096e-05, + "loss": 0.3656, + "step": 2869500 + }, + { + "epoch": 1.72, + "learning_rate": 4.39928463173204e-05, + "loss": 0.371, + "step": 2870000 + }, + { + "epoch": 1.72, + "learning_rate": 4.399074635175983e-05, + "loss": 0.3686, + "step": 2870500 + }, + { + "epoch": 1.72, + "learning_rate": 4.3988646386199265e-05, + "loss": 0.3733, + "step": 2871000 + }, + { + "epoch": 1.72, + "learning_rate": 4.3986546420638705e-05, + "loss": 0.3675, + "step": 2871500 + }, + { + "epoch": 1.72, + "learning_rate": 4.398444645507814e-05, + "loss": 0.3715, + "step": 2872000 + }, + { + "epoch": 1.72, + "learning_rate": 4.3982346489517565e-05, + "loss": 0.3677, + "step": 2872500 + }, + { + "epoch": 1.72, + "learning_rate": 4.3980246523957006e-05, + "loss": 0.3654, + "step": 2873000 + }, + { + "epoch": 1.72, + "learning_rate": 4.3978150758327566e-05, + "loss": 0.369, + "step": 2873500 + }, + { + "epoch": 1.72, + "learning_rate": 4.3976050792767e-05, + "loss": 0.3755, + "step": 2874000 + }, + { + "epoch": 1.72, + "learning_rate": 4.397395082720643e-05, + "loss": 0.3679, + "step": 2874500 + }, + { + "epoch": 1.72, + "learning_rate": 4.3971850861645866e-05, + "loss": 0.3723, + "step": 2875000 + }, + { + "epoch": 1.72, + "learning_rate": 4.396975509601643e-05, + "loss": 0.3697, + "step": 2875500 + }, + { + "epoch": 1.72, + "learning_rate": 4.396765513045586e-05, + "loss": 0.3551, + "step": 2876000 + }, + { + "epoch": 1.72, + "learning_rate": 4.39655551648953e-05, + "loss": 0.3702, + "step": 2876500 + }, + { + "epoch": 1.72, + "learning_rate": 4.396345519933473e-05, + "loss": 0.3674, + "step": 2877000 + }, + { + "epoch": 1.73, + "learning_rate": 4.396135943370529e-05, + "loss": 0.3554, + "step": 2877500 + }, + { + "epoch": 1.73, + "learning_rate": 4.395925946814472e-05, + "loss": 0.3557, + "step": 2878000 + }, + { + "epoch": 1.73, + "learning_rate": 4.3957163702515274e-05, + "loss": 0.3622, + "step": 2878500 + }, + { + "epoch": 1.73, + "learning_rate": 4.3955063736954715e-05, + "loss": 0.3663, + "step": 2879000 + }, + { + "epoch": 1.73, + "learning_rate": 4.395296797132527e-05, + "loss": 0.3645, + "step": 2879500 + }, + { + "epoch": 1.73, + "learning_rate": 4.395087220569583e-05, + "loss": 0.3716, + "step": 2880000 + }, + { + "epoch": 1.73, + "learning_rate": 4.394877644006638e-05, + "loss": 0.378, + "step": 2880500 + }, + { + "epoch": 1.73, + "learning_rate": 4.3946676474505816e-05, + "loss": 0.3552, + "step": 2881000 + }, + { + "epoch": 1.73, + "learning_rate": 4.3944576508945256e-05, + "loss": 0.3595, + "step": 2881500 + }, + { + "epoch": 1.73, + "learning_rate": 4.394247654338469e-05, + "loss": 0.3698, + "step": 2882000 + }, + { + "epoch": 1.73, + "learning_rate": 4.394037657782412e-05, + "loss": 0.363, + "step": 2882500 + }, + { + "epoch": 1.73, + "learning_rate": 4.393827661226356e-05, + "loss": 0.3728, + "step": 2883000 + }, + { + "epoch": 1.73, + "learning_rate": 4.393617664670299e-05, + "loss": 0.3679, + "step": 2883500 + }, + { + "epoch": 1.73, + "learning_rate": 4.393407668114242e-05, + "loss": 0.3702, + "step": 2884000 + }, + { + "epoch": 1.73, + "learning_rate": 4.393197671558186e-05, + "loss": 0.3752, + "step": 2884500 + }, + { + "epoch": 1.73, + "learning_rate": 4.39298767500213e-05, + "loss": 0.3593, + "step": 2885000 + }, + { + "epoch": 1.73, + "learning_rate": 4.392777678446073e-05, + "loss": 0.3691, + "step": 2885500 + }, + { + "epoch": 1.73, + "learning_rate": 4.392567681890017e-05, + "loss": 0.3578, + "step": 2886000 + }, + { + "epoch": 1.73, + "learning_rate": 4.3923576853339604e-05, + "loss": 0.3738, + "step": 2886500 + }, + { + "epoch": 1.73, + "learning_rate": 4.3921476887779044e-05, + "loss": 0.3719, + "step": 2887000 + }, + { + "epoch": 1.73, + "learning_rate": 4.391937692221848e-05, + "loss": 0.3589, + "step": 2887500 + }, + { + "epoch": 1.73, + "learning_rate": 4.391727695665791e-05, + "loss": 0.3716, + "step": 2888000 + }, + { + "epoch": 1.73, + "learning_rate": 4.391517699109735e-05, + "loss": 0.3672, + "step": 2888500 + }, + { + "epoch": 1.73, + "learning_rate": 4.391307702553678e-05, + "loss": 0.3643, + "step": 2889000 + }, + { + "epoch": 1.73, + "learning_rate": 4.391097705997621e-05, + "loss": 0.367, + "step": 2889500 + }, + { + "epoch": 1.73, + "learning_rate": 4.390887709441565e-05, + "loss": 0.3661, + "step": 2890000 + }, + { + "epoch": 1.73, + "learning_rate": 4.390678132878621e-05, + "loss": 0.3714, + "step": 2890500 + }, + { + "epoch": 1.73, + "learning_rate": 4.3904681363225646e-05, + "loss": 0.3666, + "step": 2891000 + }, + { + "epoch": 1.73, + "learning_rate": 4.390258139766507e-05, + "loss": 0.3632, + "step": 2891500 + }, + { + "epoch": 1.73, + "learning_rate": 4.390048143210451e-05, + "loss": 0.3731, + "step": 2892000 + }, + { + "epoch": 1.73, + "learning_rate": 4.389838566647507e-05, + "loss": 0.3788, + "step": 2892500 + }, + { + "epoch": 1.73, + "learning_rate": 4.3896289900845626e-05, + "loss": 0.3768, + "step": 2893000 + }, + { + "epoch": 1.73, + "learning_rate": 4.389418993528506e-05, + "loss": 0.3644, + "step": 2893500 + }, + { + "epoch": 1.74, + "learning_rate": 4.38920899697245e-05, + "loss": 0.3726, + "step": 2894000 + }, + { + "epoch": 1.74, + "learning_rate": 4.3889990004163934e-05, + "loss": 0.3683, + "step": 2894500 + }, + { + "epoch": 1.74, + "learning_rate": 4.388789423853449e-05, + "loss": 0.3712, + "step": 2895000 + }, + { + "epoch": 1.74, + "learning_rate": 4.388579427297392e-05, + "loss": 0.3724, + "step": 2895500 + }, + { + "epoch": 1.74, + "learning_rate": 4.388369430741336e-05, + "loss": 0.3657, + "step": 2896000 + }, + { + "epoch": 1.74, + "learning_rate": 4.3881594341852794e-05, + "loss": 0.3659, + "step": 2896500 + }, + { + "epoch": 1.74, + "learning_rate": 4.387949437629223e-05, + "loss": 0.3651, + "step": 2897000 + }, + { + "epoch": 1.74, + "learning_rate": 4.387739441073167e-05, + "loss": 0.371, + "step": 2897500 + }, + { + "epoch": 1.74, + "learning_rate": 4.38752944451711e-05, + "loss": 0.3717, + "step": 2898000 + }, + { + "epoch": 1.74, + "learning_rate": 4.387319447961053e-05, + "loss": 0.3734, + "step": 2898500 + }, + { + "epoch": 1.74, + "learning_rate": 4.387109871398109e-05, + "loss": 0.3612, + "step": 2899000 + }, + { + "epoch": 1.74, + "learning_rate": 4.386899874842053e-05, + "loss": 0.3662, + "step": 2899500 + }, + { + "epoch": 1.74, + "learning_rate": 4.386689878285996e-05, + "loss": 0.367, + "step": 2900000 + }, + { + "epoch": 1.74, + "eval_loss": 0.35273319482803345, + "eval_runtime": 1140.8155, + "eval_samples_per_second": 461.705, + "eval_steps_per_second": 76.951, + "step": 2900000 + }, + { + "epoch": 1.74, + "learning_rate": 4.3864798817299396e-05, + "loss": 0.365, + "step": 2900500 + }, + { + "epoch": 1.74, + "learning_rate": 4.386269885173883e-05, + "loss": 0.3641, + "step": 2901000 + }, + { + "epoch": 1.74, + "learning_rate": 4.386059888617826e-05, + "loss": 0.3642, + "step": 2901500 + }, + { + "epoch": 1.74, + "learning_rate": 4.385850312054882e-05, + "loss": 0.3628, + "step": 2902000 + }, + { + "epoch": 1.74, + "learning_rate": 4.385640315498826e-05, + "loss": 0.3685, + "step": 2902500 + }, + { + "epoch": 1.74, + "learning_rate": 4.3854303189427697e-05, + "loss": 0.3718, + "step": 2903000 + }, + { + "epoch": 1.74, + "learning_rate": 4.385220322386712e-05, + "loss": 0.3655, + "step": 2903500 + }, + { + "epoch": 1.74, + "learning_rate": 4.3850103258306564e-05, + "loss": 0.3644, + "step": 2904000 + }, + { + "epoch": 1.74, + "learning_rate": 4.3848003292746e-05, + "loss": 0.3744, + "step": 2904500 + }, + { + "epoch": 1.74, + "learning_rate": 4.384590332718543e-05, + "loss": 0.3721, + "step": 2905000 + }, + { + "epoch": 1.74, + "learning_rate": 4.384380336162487e-05, + "loss": 0.3632, + "step": 2905500 + }, + { + "epoch": 1.74, + "learning_rate": 4.3841707595995424e-05, + "loss": 0.3622, + "step": 2906000 + }, + { + "epoch": 1.74, + "learning_rate": 4.383960763043486e-05, + "loss": 0.3646, + "step": 2906500 + }, + { + "epoch": 1.74, + "learning_rate": 4.383750766487429e-05, + "loss": 0.3662, + "step": 2907000 + }, + { + "epoch": 1.74, + "learning_rate": 4.383540769931373e-05, + "loss": 0.3616, + "step": 2907500 + }, + { + "epoch": 1.74, + "learning_rate": 4.3833307733753165e-05, + "loss": 0.3608, + "step": 2908000 + }, + { + "epoch": 1.74, + "learning_rate": 4.38312077681926e-05, + "loss": 0.3641, + "step": 2908500 + }, + { + "epoch": 1.74, + "learning_rate": 4.382911200256316e-05, + "loss": 0.3604, + "step": 2909000 + }, + { + "epoch": 1.74, + "learning_rate": 4.382701203700259e-05, + "loss": 0.3666, + "step": 2909500 + }, + { + "epoch": 1.74, + "learning_rate": 4.3824912071442026e-05, + "loss": 0.3708, + "step": 2910000 + }, + { + "epoch": 1.74, + "learning_rate": 4.382281630581258e-05, + "loss": 0.374, + "step": 2910500 + }, + { + "epoch": 1.75, + "learning_rate": 4.382071634025202e-05, + "loss": 0.3745, + "step": 2911000 + }, + { + "epoch": 1.75, + "learning_rate": 4.381861637469145e-05, + "loss": 0.3596, + "step": 2911500 + }, + { + "epoch": 1.75, + "learning_rate": 4.3816516409130886e-05, + "loss": 0.3801, + "step": 2912000 + }, + { + "epoch": 1.75, + "learning_rate": 4.3814416443570327e-05, + "loss": 0.3706, + "step": 2912500 + }, + { + "epoch": 1.75, + "learning_rate": 4.381232067794088e-05, + "loss": 0.3644, + "step": 2913000 + }, + { + "epoch": 1.75, + "learning_rate": 4.3810220712380314e-05, + "loss": 0.3587, + "step": 2913500 + }, + { + "epoch": 1.75, + "learning_rate": 4.380812074681975e-05, + "loss": 0.369, + "step": 2914000 + }, + { + "epoch": 1.75, + "learning_rate": 4.380602078125919e-05, + "loss": 0.3665, + "step": 2914500 + }, + { + "epoch": 1.75, + "learning_rate": 4.380392081569862e-05, + "loss": 0.3637, + "step": 2915000 + }, + { + "epoch": 1.75, + "learning_rate": 4.3801820850138054e-05, + "loss": 0.3674, + "step": 2915500 + }, + { + "epoch": 1.75, + "learning_rate": 4.3799720884577494e-05, + "loss": 0.3735, + "step": 2916000 + }, + { + "epoch": 1.75, + "learning_rate": 4.379762091901693e-05, + "loss": 0.3657, + "step": 2916500 + }, + { + "epoch": 1.75, + "learning_rate": 4.379552095345636e-05, + "loss": 0.3719, + "step": 2917000 + }, + { + "epoch": 1.75, + "learning_rate": 4.37934209878958e-05, + "loss": 0.3631, + "step": 2917500 + }, + { + "epoch": 1.75, + "learning_rate": 4.3791325222266355e-05, + "loss": 0.367, + "step": 2918000 + }, + { + "epoch": 1.75, + "learning_rate": 4.378922525670579e-05, + "loss": 0.3646, + "step": 2918500 + }, + { + "epoch": 1.75, + "learning_rate": 4.378712529114523e-05, + "loss": 0.369, + "step": 2919000 + }, + { + "epoch": 1.75, + "learning_rate": 4.378502532558466e-05, + "loss": 0.372, + "step": 2919500 + }, + { + "epoch": 1.75, + "learning_rate": 4.3782925360024096e-05, + "loss": 0.3689, + "step": 2920000 + }, + { + "epoch": 1.75, + "learning_rate": 4.378082539446353e-05, + "loss": 0.3661, + "step": 2920500 + }, + { + "epoch": 1.75, + "learning_rate": 4.377872542890296e-05, + "loss": 0.3741, + "step": 2921000 + }, + { + "epoch": 1.75, + "learning_rate": 4.3776625463342396e-05, + "loss": 0.3636, + "step": 2921500 + }, + { + "epoch": 1.75, + "learning_rate": 4.3774529697712956e-05, + "loss": 0.3703, + "step": 2922000 + }, + { + "epoch": 1.75, + "learning_rate": 4.37724297321524e-05, + "loss": 0.3682, + "step": 2922500 + }, + { + "epoch": 1.75, + "learning_rate": 4.3770329766591823e-05, + "loss": 0.3707, + "step": 2923000 + }, + { + "epoch": 1.75, + "learning_rate": 4.3768234000962384e-05, + "loss": 0.3646, + "step": 2923500 + }, + { + "epoch": 1.75, + "learning_rate": 4.376613403540182e-05, + "loss": 0.3694, + "step": 2924000 + }, + { + "epoch": 1.75, + "learning_rate": 4.376403406984126e-05, + "loss": 0.3737, + "step": 2924500 + }, + { + "epoch": 1.75, + "learning_rate": 4.376193410428069e-05, + "loss": 0.3784, + "step": 2925000 + }, + { + "epoch": 1.75, + "learning_rate": 4.3759834138720124e-05, + "loss": 0.3679, + "step": 2925500 + }, + { + "epoch": 1.75, + "learning_rate": 4.3757738373090685e-05, + "loss": 0.361, + "step": 2926000 + }, + { + "epoch": 1.75, + "learning_rate": 4.375563840753012e-05, + "loss": 0.3628, + "step": 2926500 + }, + { + "epoch": 1.75, + "learning_rate": 4.375354264190067e-05, + "loss": 0.3748, + "step": 2927000 + }, + { + "epoch": 1.76, + "learning_rate": 4.3751442676340105e-05, + "loss": 0.3662, + "step": 2927500 + }, + { + "epoch": 1.76, + "learning_rate": 4.3749342710779545e-05, + "loss": 0.3634, + "step": 2928000 + }, + { + "epoch": 1.76, + "learning_rate": 4.374724274521898e-05, + "loss": 0.359, + "step": 2928500 + }, + { + "epoch": 1.76, + "learning_rate": 4.374514277965841e-05, + "loss": 0.3696, + "step": 2929000 + }, + { + "epoch": 1.76, + "learning_rate": 4.374304281409785e-05, + "loss": 0.3772, + "step": 2929500 + }, + { + "epoch": 1.76, + "learning_rate": 4.3740942848537286e-05, + "loss": 0.3656, + "step": 2930000 + }, + { + "epoch": 1.76, + "learning_rate": 4.373884288297671e-05, + "loss": 0.3705, + "step": 2930500 + }, + { + "epoch": 1.76, + "learning_rate": 4.373674291741615e-05, + "loss": 0.3571, + "step": 2931000 + }, + { + "epoch": 1.76, + "learning_rate": 4.3734642951855586e-05, + "loss": 0.3654, + "step": 2931500 + }, + { + "epoch": 1.76, + "learning_rate": 4.373254298629502e-05, + "loss": 0.366, + "step": 2932000 + }, + { + "epoch": 1.76, + "learning_rate": 4.373044302073446e-05, + "loss": 0.3585, + "step": 2932500 + }, + { + "epoch": 1.76, + "learning_rate": 4.3728347255105014e-05, + "loss": 0.3754, + "step": 2933000 + }, + { + "epoch": 1.76, + "learning_rate": 4.372624728954445e-05, + "loss": 0.3601, + "step": 2933500 + }, + { + "epoch": 1.76, + "learning_rate": 4.372414732398389e-05, + "loss": 0.3635, + "step": 2934000 + }, + { + "epoch": 1.76, + "learning_rate": 4.372204735842332e-05, + "loss": 0.3632, + "step": 2934500 + }, + { + "epoch": 1.76, + "learning_rate": 4.3719951592793874e-05, + "loss": 0.3648, + "step": 2935000 + }, + { + "epoch": 1.76, + "learning_rate": 4.371785162723331e-05, + "loss": 0.3714, + "step": 2935500 + }, + { + "epoch": 1.76, + "learning_rate": 4.371575166167275e-05, + "loss": 0.3639, + "step": 2936000 + }, + { + "epoch": 1.76, + "learning_rate": 4.371365589604331e-05, + "loss": 0.3605, + "step": 2936500 + }, + { + "epoch": 1.76, + "learning_rate": 4.371156013041386e-05, + "loss": 0.3647, + "step": 2937000 + }, + { + "epoch": 1.76, + "learning_rate": 4.3709460164853296e-05, + "loss": 0.3654, + "step": 2937500 + }, + { + "epoch": 1.76, + "learning_rate": 4.370736019929273e-05, + "loss": 0.3635, + "step": 2938000 + }, + { + "epoch": 1.76, + "learning_rate": 4.370526023373217e-05, + "loss": 0.3679, + "step": 2938500 + }, + { + "epoch": 1.76, + "learning_rate": 4.37031602681716e-05, + "loss": 0.3562, + "step": 2939000 + }, + { + "epoch": 1.76, + "learning_rate": 4.370106030261104e-05, + "loss": 0.3705, + "step": 2939500 + }, + { + "epoch": 1.76, + "learning_rate": 4.369896033705047e-05, + "loss": 0.3665, + "step": 2940000 + }, + { + "epoch": 1.76, + "learning_rate": 4.36968603714899e-05, + "loss": 0.3665, + "step": 2940500 + }, + { + "epoch": 1.76, + "learning_rate": 4.369476040592934e-05, + "loss": 0.3646, + "step": 2941000 + }, + { + "epoch": 1.76, + "learning_rate": 4.3692664640299904e-05, + "loss": 0.3646, + "step": 2941500 + }, + { + "epoch": 1.76, + "learning_rate": 4.369056467473933e-05, + "loss": 0.3729, + "step": 2942000 + }, + { + "epoch": 1.76, + "learning_rate": 4.3688464709178764e-05, + "loss": 0.3703, + "step": 2942500 + }, + { + "epoch": 1.76, + "learning_rate": 4.3686364743618204e-05, + "loss": 0.366, + "step": 2943000 + }, + { + "epoch": 1.76, + "learning_rate": 4.368426477805764e-05, + "loss": 0.3672, + "step": 2943500 + }, + { + "epoch": 1.77, + "learning_rate": 4.368216481249707e-05, + "loss": 0.368, + "step": 2944000 + }, + { + "epoch": 1.77, + "learning_rate": 4.368006484693651e-05, + "loss": 0.3653, + "step": 2944500 + }, + { + "epoch": 1.77, + "learning_rate": 4.3677964881375945e-05, + "loss": 0.36, + "step": 2945000 + }, + { + "epoch": 1.77, + "learning_rate": 4.36758691157465e-05, + "loss": 0.3674, + "step": 2945500 + }, + { + "epoch": 1.77, + "learning_rate": 4.367376915018594e-05, + "loss": 0.3705, + "step": 2946000 + }, + { + "epoch": 1.77, + "learning_rate": 4.367166918462537e-05, + "loss": 0.3768, + "step": 2946500 + }, + { + "epoch": 1.77, + "learning_rate": 4.3669569219064805e-05, + "loss": 0.3653, + "step": 2947000 + }, + { + "epoch": 1.77, + "learning_rate": 4.366747345343536e-05, + "loss": 0.3731, + "step": 2947500 + }, + { + "epoch": 1.77, + "learning_rate": 4.36653734878748e-05, + "loss": 0.3654, + "step": 2948000 + }, + { + "epoch": 1.77, + "learning_rate": 4.366327772224536e-05, + "loss": 0.3608, + "step": 2948500 + }, + { + "epoch": 1.77, + "learning_rate": 4.366117775668479e-05, + "loss": 0.3696, + "step": 2949000 + }, + { + "epoch": 1.77, + "learning_rate": 4.365907779112422e-05, + "loss": 0.3668, + "step": 2949500 + }, + { + "epoch": 1.77, + "learning_rate": 4.365697782556366e-05, + "loss": 0.3683, + "step": 2950000 + }, + { + "epoch": 1.77, + "learning_rate": 4.365487786000309e-05, + "loss": 0.3671, + "step": 2950500 + }, + { + "epoch": 1.77, + "learning_rate": 4.3652782094373654e-05, + "loss": 0.3645, + "step": 2951000 + }, + { + "epoch": 1.77, + "learning_rate": 4.365068212881308e-05, + "loss": 0.3703, + "step": 2951500 + }, + { + "epoch": 1.77, + "learning_rate": 4.364858216325252e-05, + "loss": 0.3671, + "step": 2952000 + }, + { + "epoch": 1.77, + "learning_rate": 4.3646482197691954e-05, + "loss": 0.3678, + "step": 2952500 + }, + { + "epoch": 1.77, + "learning_rate": 4.3644382232131394e-05, + "loss": 0.364, + "step": 2953000 + }, + { + "epoch": 1.77, + "learning_rate": 4.364228226657083e-05, + "loss": 0.3709, + "step": 2953500 + }, + { + "epoch": 1.77, + "learning_rate": 4.364018230101026e-05, + "loss": 0.3628, + "step": 2954000 + }, + { + "epoch": 1.77, + "learning_rate": 4.36380823354497e-05, + "loss": 0.3672, + "step": 2954500 + }, + { + "epoch": 1.77, + "learning_rate": 4.3635990769751375e-05, + "loss": 0.3702, + "step": 2955000 + }, + { + "epoch": 1.77, + "learning_rate": 4.3633890804190815e-05, + "loss": 0.3598, + "step": 2955500 + }, + { + "epoch": 1.77, + "learning_rate": 4.363179083863025e-05, + "loss": 0.361, + "step": 2956000 + }, + { + "epoch": 1.77, + "learning_rate": 4.3629690873069676e-05, + "loss": 0.3735, + "step": 2956500 + }, + { + "epoch": 1.77, + "learning_rate": 4.3627590907509116e-05, + "loss": 0.3653, + "step": 2957000 + }, + { + "epoch": 1.77, + "learning_rate": 4.362549094194855e-05, + "loss": 0.3704, + "step": 2957500 + }, + { + "epoch": 1.77, + "learning_rate": 4.362339097638798e-05, + "loss": 0.3653, + "step": 2958000 + }, + { + "epoch": 1.77, + "learning_rate": 4.362129101082742e-05, + "loss": 0.3658, + "step": 2958500 + }, + { + "epoch": 1.77, + "learning_rate": 4.3619195245197976e-05, + "loss": 0.3701, + "step": 2959000 + }, + { + "epoch": 1.77, + "learning_rate": 4.361709527963741e-05, + "loss": 0.3635, + "step": 2959500 + }, + { + "epoch": 1.77, + "learning_rate": 4.361499951400797e-05, + "loss": 0.3747, + "step": 2960000 + }, + { + "epoch": 1.77, + "learning_rate": 4.361289954844741e-05, + "loss": 0.3574, + "step": 2960500 + }, + { + "epoch": 1.78, + "learning_rate": 4.3610803782817964e-05, + "loss": 0.3632, + "step": 2961000 + }, + { + "epoch": 1.78, + "learning_rate": 4.36087038172574e-05, + "loss": 0.3727, + "step": 2961500 + }, + { + "epoch": 1.78, + "learning_rate": 4.360660385169683e-05, + "loss": 0.3778, + "step": 2962000 + }, + { + "epoch": 1.78, + "learning_rate": 4.360450388613627e-05, + "loss": 0.3548, + "step": 2962500 + }, + { + "epoch": 1.78, + "learning_rate": 4.3602403920575705e-05, + "loss": 0.3759, + "step": 2963000 + }, + { + "epoch": 1.78, + "learning_rate": 4.360030395501513e-05, + "loss": 0.3556, + "step": 2963500 + }, + { + "epoch": 1.78, + "learning_rate": 4.359820398945457e-05, + "loss": 0.3721, + "step": 2964000 + }, + { + "epoch": 1.78, + "learning_rate": 4.3596104023894005e-05, + "loss": 0.3671, + "step": 2964500 + }, + { + "epoch": 1.78, + "learning_rate": 4.359400405833344e-05, + "loss": 0.3721, + "step": 2965000 + }, + { + "epoch": 1.78, + "learning_rate": 4.3591908292704e-05, + "loss": 0.3629, + "step": 2965500 + }, + { + "epoch": 1.78, + "learning_rate": 4.358980832714343e-05, + "loss": 0.3656, + "step": 2966000 + }, + { + "epoch": 1.78, + "learning_rate": 4.3587708361582866e-05, + "loss": 0.3619, + "step": 2966500 + }, + { + "epoch": 1.78, + "learning_rate": 4.3585608396022306e-05, + "loss": 0.3725, + "step": 2967000 + }, + { + "epoch": 1.78, + "learning_rate": 4.358350843046174e-05, + "loss": 0.3696, + "step": 2967500 + }, + { + "epoch": 1.78, + "learning_rate": 4.358140846490117e-05, + "loss": 0.3562, + "step": 2968000 + }, + { + "epoch": 1.78, + "learning_rate": 4.357930849934061e-05, + "loss": 0.3641, + "step": 2968500 + }, + { + "epoch": 1.78, + "learning_rate": 4.357720853378005e-05, + "loss": 0.3651, + "step": 2969000 + }, + { + "epoch": 1.78, + "learning_rate": 4.35751127681506e-05, + "loss": 0.3553, + "step": 2969500 + }, + { + "epoch": 1.78, + "learning_rate": 4.3573012802590034e-05, + "loss": 0.37, + "step": 2970000 + }, + { + "epoch": 1.78, + "learning_rate": 4.3570912837029474e-05, + "loss": 0.3669, + "step": 2970500 + }, + { + "epoch": 1.78, + "learning_rate": 4.356881707140003e-05, + "loss": 0.3668, + "step": 2971000 + }, + { + "epoch": 1.78, + "learning_rate": 4.356671710583946e-05, + "loss": 0.3647, + "step": 2971500 + }, + { + "epoch": 1.78, + "learning_rate": 4.3564617140278894e-05, + "loss": 0.3633, + "step": 2972000 + }, + { + "epoch": 1.78, + "learning_rate": 4.3562517174718335e-05, + "loss": 0.362, + "step": 2972500 + }, + { + "epoch": 1.78, + "learning_rate": 4.356041720915777e-05, + "loss": 0.3619, + "step": 2973000 + }, + { + "epoch": 1.78, + "learning_rate": 4.355832144352832e-05, + "loss": 0.3547, + "step": 2973500 + }, + { + "epoch": 1.78, + "learning_rate": 4.355622147796776e-05, + "loss": 0.3653, + "step": 2974000 + }, + { + "epoch": 1.78, + "learning_rate": 4.3554121512407195e-05, + "loss": 0.3653, + "step": 2974500 + }, + { + "epoch": 1.78, + "learning_rate": 4.355202154684663e-05, + "loss": 0.3703, + "step": 2975000 + }, + { + "epoch": 1.78, + "learning_rate": 4.354992158128607e-05, + "loss": 0.3604, + "step": 2975500 + }, + { + "epoch": 1.78, + "learning_rate": 4.35478216157255e-05, + "loss": 0.3695, + "step": 2976000 + }, + { + "epoch": 1.78, + "learning_rate": 4.3545721650164936e-05, + "loss": 0.3689, + "step": 2976500 + }, + { + "epoch": 1.78, + "learning_rate": 4.3543621684604376e-05, + "loss": 0.3599, + "step": 2977000 + }, + { + "epoch": 1.79, + "learning_rate": 4.354152591897493e-05, + "loss": 0.3619, + "step": 2977500 + }, + { + "epoch": 1.79, + "learning_rate": 4.3539430153345483e-05, + "loss": 0.3722, + "step": 2978000 + }, + { + "epoch": 1.79, + "learning_rate": 4.3537334387716044e-05, + "loss": 0.3627, + "step": 2978500 + }, + { + "epoch": 1.79, + "learning_rate": 4.353523442215548e-05, + "loss": 0.3668, + "step": 2979000 + }, + { + "epoch": 1.79, + "learning_rate": 4.353313865652603e-05, + "loss": 0.3693, + "step": 2979500 + }, + { + "epoch": 1.79, + "learning_rate": 4.353103869096547e-05, + "loss": 0.3792, + "step": 2980000 + }, + { + "epoch": 1.79, + "learning_rate": 4.3528938725404904e-05, + "loss": 0.3726, + "step": 2980500 + }, + { + "epoch": 1.79, + "learning_rate": 4.352683875984434e-05, + "loss": 0.367, + "step": 2981000 + }, + { + "epoch": 1.79, + "learning_rate": 4.352473879428378e-05, + "loss": 0.3623, + "step": 2981500 + }, + { + "epoch": 1.79, + "learning_rate": 4.352263882872321e-05, + "loss": 0.3653, + "step": 2982000 + }, + { + "epoch": 1.79, + "learning_rate": 4.352053886316264e-05, + "loss": 0.3633, + "step": 2982500 + }, + { + "epoch": 1.79, + "learning_rate": 4.351843889760208e-05, + "loss": 0.3585, + "step": 2983000 + }, + { + "epoch": 1.79, + "learning_rate": 4.351633893204151e-05, + "loss": 0.3628, + "step": 2983500 + }, + { + "epoch": 1.79, + "learning_rate": 4.3514238966480945e-05, + "loss": 0.3609, + "step": 2984000 + }, + { + "epoch": 1.79, + "learning_rate": 4.3512139000920386e-05, + "loss": 0.3607, + "step": 2984500 + }, + { + "epoch": 1.79, + "learning_rate": 4.351003903535982e-05, + "loss": 0.3624, + "step": 2985000 + }, + { + "epoch": 1.79, + "learning_rate": 4.350793906979925e-05, + "loss": 0.3564, + "step": 2985500 + }, + { + "epoch": 1.79, + "learning_rate": 4.3505843304169806e-05, + "loss": 0.3691, + "step": 2986000 + }, + { + "epoch": 1.79, + "learning_rate": 4.3503743338609246e-05, + "loss": 0.3603, + "step": 2986500 + }, + { + "epoch": 1.79, + "learning_rate": 4.350164337304868e-05, + "loss": 0.3701, + "step": 2987000 + }, + { + "epoch": 1.79, + "learning_rate": 4.349954340748811e-05, + "loss": 0.365, + "step": 2987500 + }, + { + "epoch": 1.79, + "learning_rate": 4.3497443441927554e-05, + "loss": 0.368, + "step": 2988000 + }, + { + "epoch": 1.79, + "learning_rate": 4.349534347636699e-05, + "loss": 0.3646, + "step": 2988500 + }, + { + "epoch": 1.79, + "learning_rate": 4.349324771073754e-05, + "loss": 0.3729, + "step": 2989000 + }, + { + "epoch": 1.79, + "learning_rate": 4.349114774517698e-05, + "loss": 0.3631, + "step": 2989500 + }, + { + "epoch": 1.79, + "learning_rate": 4.3489047779616414e-05, + "loss": 0.3728, + "step": 2990000 + }, + { + "epoch": 1.79, + "learning_rate": 4.348694781405585e-05, + "loss": 0.3705, + "step": 2990500 + }, + { + "epoch": 1.79, + "learning_rate": 4.348484784849529e-05, + "loss": 0.3657, + "step": 2991000 + }, + { + "epoch": 1.79, + "learning_rate": 4.348274788293472e-05, + "loss": 0.3694, + "step": 2991500 + }, + { + "epoch": 1.79, + "learning_rate": 4.3480647917374155e-05, + "loss": 0.3658, + "step": 2992000 + }, + { + "epoch": 1.79, + "learning_rate": 4.347855215174471e-05, + "loss": 0.3716, + "step": 2992500 + }, + { + "epoch": 1.79, + "learning_rate": 4.347645218618415e-05, + "loss": 0.3776, + "step": 2993000 + }, + { + "epoch": 1.79, + "learning_rate": 4.347435222062358e-05, + "loss": 0.3796, + "step": 2993500 + }, + { + "epoch": 1.8, + "learning_rate": 4.3472256454994136e-05, + "loss": 0.3714, + "step": 2994000 + }, + { + "epoch": 1.8, + "learning_rate": 4.347015648943357e-05, + "loss": 0.3672, + "step": 2994500 + }, + { + "epoch": 1.8, + "learning_rate": 4.346805652387301e-05, + "loss": 0.3562, + "step": 2995000 + }, + { + "epoch": 1.8, + "learning_rate": 4.346595655831244e-05, + "loss": 0.3638, + "step": 2995500 + }, + { + "epoch": 1.8, + "learning_rate": 4.3463856592751876e-05, + "loss": 0.3641, + "step": 2996000 + }, + { + "epoch": 1.8, + "learning_rate": 4.346175662719132e-05, + "loss": 0.3719, + "step": 2996500 + }, + { + "epoch": 1.8, + "learning_rate": 4.345965666163075e-05, + "loss": 0.3656, + "step": 2997000 + }, + { + "epoch": 1.8, + "learning_rate": 4.3457556696070184e-05, + "loss": 0.3791, + "step": 2997500 + }, + { + "epoch": 1.8, + "learning_rate": 4.345545673050962e-05, + "loss": 0.3749, + "step": 2998000 + }, + { + "epoch": 1.8, + "learning_rate": 4.345335676494905e-05, + "loss": 0.3644, + "step": 2998500 + }, + { + "epoch": 1.8, + "learning_rate": 4.345125679938849e-05, + "loss": 0.3682, + "step": 2999000 + }, + { + "epoch": 1.8, + "learning_rate": 4.344916103375905e-05, + "loss": 0.3564, + "step": 2999500 + }, + { + "epoch": 1.8, + "learning_rate": 4.344706106819848e-05, + "loss": 0.364, + "step": 3000000 + }, + { + "epoch": 1.8, + "eval_loss": 0.35116276144981384, + "eval_runtime": 1120.4699, + "eval_samples_per_second": 470.088, + "eval_steps_per_second": 78.348, + "step": 3000000 + }, + { + "epoch": 1.8, + "learning_rate": 4.344496110263791e-05, + "loss": 0.37, + "step": 3000500 + }, + { + "epoch": 1.8, + "learning_rate": 4.344286533700847e-05, + "loss": 0.3574, + "step": 3001000 + }, + { + "epoch": 1.8, + "learning_rate": 4.344076537144791e-05, + "loss": 0.3713, + "step": 3001500 + }, + { + "epoch": 1.8, + "learning_rate": 4.3438665405887345e-05, + "loss": 0.3561, + "step": 3002000 + }, + { + "epoch": 1.8, + "learning_rate": 4.343656544032677e-05, + "loss": 0.3707, + "step": 3002500 + }, + { + "epoch": 1.8, + "learning_rate": 4.343446547476621e-05, + "loss": 0.359, + "step": 3003000 + }, + { + "epoch": 1.8, + "learning_rate": 4.3432365509205646e-05, + "loss": 0.3627, + "step": 3003500 + }, + { + "epoch": 1.8, + "learning_rate": 4.343026554364508e-05, + "loss": 0.364, + "step": 3004000 + }, + { + "epoch": 1.8, + "learning_rate": 4.3428169778015646e-05, + "loss": 0.3545, + "step": 3004500 + }, + { + "epoch": 1.8, + "learning_rate": 4.342606981245507e-05, + "loss": 0.3729, + "step": 3005000 + }, + { + "epoch": 1.8, + "learning_rate": 4.3423969846894506e-05, + "loss": 0.3588, + "step": 3005500 + }, + { + "epoch": 1.8, + "learning_rate": 4.3421869881333947e-05, + "loss": 0.3579, + "step": 3006000 + }, + { + "epoch": 1.8, + "learning_rate": 4.341976991577338e-05, + "loss": 0.3685, + "step": 3006500 + }, + { + "epoch": 1.8, + "learning_rate": 4.3417669950212813e-05, + "loss": 0.3614, + "step": 3007000 + }, + { + "epoch": 1.8, + "learning_rate": 4.3415569984652254e-05, + "loss": 0.3671, + "step": 3007500 + }, + { + "epoch": 1.8, + "learning_rate": 4.341347001909169e-05, + "loss": 0.3726, + "step": 3008000 + }, + { + "epoch": 1.8, + "learning_rate": 4.341137005353112e-05, + "loss": 0.3607, + "step": 3008500 + }, + { + "epoch": 1.8, + "learning_rate": 4.340927008797056e-05, + "loss": 0.3702, + "step": 3009000 + }, + { + "epoch": 1.8, + "learning_rate": 4.3407170122409994e-05, + "loss": 0.3692, + "step": 3009500 + }, + { + "epoch": 1.8, + "learning_rate": 4.340507015684943e-05, + "loss": 0.3638, + "step": 3010000 + }, + { + "epoch": 1.8, + "learning_rate": 4.340297439121998e-05, + "loss": 0.3654, + "step": 3010500 + }, + { + "epoch": 1.81, + "learning_rate": 4.340087442565942e-05, + "loss": 0.366, + "step": 3011000 + }, + { + "epoch": 1.81, + "learning_rate": 4.3398778660029975e-05, + "loss": 0.3588, + "step": 3011500 + }, + { + "epoch": 1.81, + "learning_rate": 4.339667869446941e-05, + "loss": 0.3627, + "step": 3012000 + }, + { + "epoch": 1.81, + "learning_rate": 4.339457872890885e-05, + "loss": 0.3741, + "step": 3012500 + }, + { + "epoch": 1.81, + "learning_rate": 4.339247876334828e-05, + "loss": 0.3652, + "step": 3013000 + }, + { + "epoch": 1.81, + "learning_rate": 4.3390378797787716e-05, + "loss": 0.3645, + "step": 3013500 + }, + { + "epoch": 1.81, + "learning_rate": 4.338828303215827e-05, + "loss": 0.3618, + "step": 3014000 + }, + { + "epoch": 1.81, + "learning_rate": 4.338618306659771e-05, + "loss": 0.3573, + "step": 3014500 + }, + { + "epoch": 1.81, + "learning_rate": 4.338408310103714e-05, + "loss": 0.3636, + "step": 3015000 + }, + { + "epoch": 1.81, + "learning_rate": 4.3381983135476577e-05, + "loss": 0.361, + "step": 3015500 + }, + { + "epoch": 1.81, + "learning_rate": 4.337988316991602e-05, + "loss": 0.3621, + "step": 3016000 + }, + { + "epoch": 1.81, + "learning_rate": 4.337778320435545e-05, + "loss": 0.3697, + "step": 3016500 + }, + { + "epoch": 1.81, + "learning_rate": 4.3375687438726004e-05, + "loss": 0.3705, + "step": 3017000 + }, + { + "epoch": 1.81, + "learning_rate": 4.337358747316544e-05, + "loss": 0.3675, + "step": 3017500 + }, + { + "epoch": 1.81, + "learning_rate": 4.337148750760488e-05, + "loss": 0.3651, + "step": 3018000 + }, + { + "epoch": 1.81, + "learning_rate": 4.336938754204431e-05, + "loss": 0.3582, + "step": 3018500 + }, + { + "epoch": 1.81, + "learning_rate": 4.3367287576483744e-05, + "loss": 0.3654, + "step": 3019000 + }, + { + "epoch": 1.81, + "learning_rate": 4.3365187610923185e-05, + "loss": 0.3651, + "step": 3019500 + }, + { + "epoch": 1.81, + "learning_rate": 4.336308764536261e-05, + "loss": 0.3646, + "step": 3020000 + }, + { + "epoch": 1.81, + "learning_rate": 4.336099187973317e-05, + "loss": 0.3709, + "step": 3020500 + }, + { + "epoch": 1.81, + "learning_rate": 4.335889191417261e-05, + "loss": 0.369, + "step": 3021000 + }, + { + "epoch": 1.81, + "learning_rate": 4.3356791948612045e-05, + "loss": 0.3687, + "step": 3021500 + }, + { + "epoch": 1.81, + "learning_rate": 4.335469198305148e-05, + "loss": 0.369, + "step": 3022000 + }, + { + "epoch": 1.81, + "learning_rate": 4.335259201749091e-05, + "loss": 0.3658, + "step": 3022500 + }, + { + "epoch": 1.81, + "learning_rate": 4.3350492051930346e-05, + "loss": 0.3638, + "step": 3023000 + }, + { + "epoch": 1.81, + "learning_rate": 4.334839208636978e-05, + "loss": 0.3672, + "step": 3023500 + }, + { + "epoch": 1.81, + "learning_rate": 4.334629212080922e-05, + "loss": 0.3647, + "step": 3024000 + }, + { + "epoch": 1.81, + "learning_rate": 4.334419635517977e-05, + "loss": 0.3729, + "step": 3024500 + }, + { + "epoch": 1.81, + "learning_rate": 4.3342096389619206e-05, + "loss": 0.3612, + "step": 3025000 + }, + { + "epoch": 1.81, + "learning_rate": 4.333999642405864e-05, + "loss": 0.3654, + "step": 3025500 + }, + { + "epoch": 1.81, + "learning_rate": 4.333789645849808e-05, + "loss": 0.3728, + "step": 3026000 + }, + { + "epoch": 1.81, + "learning_rate": 4.333580069286864e-05, + "loss": 0.3656, + "step": 3026500 + }, + { + "epoch": 1.81, + "learning_rate": 4.3333704927239194e-05, + "loss": 0.3666, + "step": 3027000 + }, + { + "epoch": 1.82, + "learning_rate": 4.333160496167863e-05, + "loss": 0.3629, + "step": 3027500 + }, + { + "epoch": 1.82, + "learning_rate": 4.332950499611807e-05, + "loss": 0.3668, + "step": 3028000 + }, + { + "epoch": 1.82, + "learning_rate": 4.33274050305575e-05, + "loss": 0.3577, + "step": 3028500 + }, + { + "epoch": 1.82, + "learning_rate": 4.3325305064996935e-05, + "loss": 0.3645, + "step": 3029000 + }, + { + "epoch": 1.82, + "learning_rate": 4.332320509943637e-05, + "loss": 0.3549, + "step": 3029500 + }, + { + "epoch": 1.82, + "learning_rate": 4.33211051338758e-05, + "loss": 0.3721, + "step": 3030000 + }, + { + "epoch": 1.82, + "learning_rate": 4.3319005168315235e-05, + "loss": 0.3678, + "step": 3030500 + }, + { + "epoch": 1.82, + "learning_rate": 4.3316909402685795e-05, + "loss": 0.3722, + "step": 3031000 + }, + { + "epoch": 1.82, + "learning_rate": 4.331481363705635e-05, + "loss": 0.3631, + "step": 3031500 + }, + { + "epoch": 1.82, + "learning_rate": 4.331271367149579e-05, + "loss": 0.3632, + "step": 3032000 + }, + { + "epoch": 1.82, + "learning_rate": 4.331061370593522e-05, + "loss": 0.3625, + "step": 3032500 + }, + { + "epoch": 1.82, + "learning_rate": 4.3308513740374656e-05, + "loss": 0.3617, + "step": 3033000 + }, + { + "epoch": 1.82, + "learning_rate": 4.3306413774814096e-05, + "loss": 0.3584, + "step": 3033500 + }, + { + "epoch": 1.82, + "learning_rate": 4.330431800918465e-05, + "loss": 0.3618, + "step": 3034000 + }, + { + "epoch": 1.82, + "learning_rate": 4.3302218043624083e-05, + "loss": 0.3632, + "step": 3034500 + }, + { + "epoch": 1.82, + "learning_rate": 4.3300118078063524e-05, + "loss": 0.3663, + "step": 3035000 + }, + { + "epoch": 1.82, + "learning_rate": 4.329801811250296e-05, + "loss": 0.3667, + "step": 3035500 + }, + { + "epoch": 1.82, + "learning_rate": 4.329591814694239e-05, + "loss": 0.3646, + "step": 3036000 + }, + { + "epoch": 1.82, + "learning_rate": 4.3293818181381824e-05, + "loss": 0.3596, + "step": 3036500 + }, + { + "epoch": 1.82, + "learning_rate": 4.329171821582126e-05, + "loss": 0.3682, + "step": 3037000 + }, + { + "epoch": 1.82, + "learning_rate": 4.328961825026069e-05, + "loss": 0.3617, + "step": 3037500 + }, + { + "epoch": 1.82, + "learning_rate": 4.328752248463125e-05, + "loss": 0.3611, + "step": 3038000 + }, + { + "epoch": 1.82, + "learning_rate": 4.328542251907069e-05, + "loss": 0.3604, + "step": 3038500 + }, + { + "epoch": 1.82, + "learning_rate": 4.328332255351012e-05, + "loss": 0.3735, + "step": 3039000 + }, + { + "epoch": 1.82, + "learning_rate": 4.328122258794955e-05, + "loss": 0.3686, + "step": 3039500 + }, + { + "epoch": 1.82, + "learning_rate": 4.327912682232011e-05, + "loss": 0.3661, + "step": 3040000 + }, + { + "epoch": 1.82, + "learning_rate": 4.327702685675955e-05, + "loss": 0.3717, + "step": 3040500 + }, + { + "epoch": 1.82, + "learning_rate": 4.3274926891198986e-05, + "loss": 0.3689, + "step": 3041000 + }, + { + "epoch": 1.82, + "learning_rate": 4.327282692563842e-05, + "loss": 0.3657, + "step": 3041500 + }, + { + "epoch": 1.82, + "learning_rate": 4.327072696007785e-05, + "loss": 0.3566, + "step": 3042000 + }, + { + "epoch": 1.82, + "learning_rate": 4.326863119444841e-05, + "loss": 0.3692, + "step": 3042500 + }, + { + "epoch": 1.82, + "learning_rate": 4.3266531228887846e-05, + "loss": 0.3767, + "step": 3043000 + }, + { + "epoch": 1.82, + "learning_rate": 4.326443126332729e-05, + "loss": 0.368, + "step": 3043500 + }, + { + "epoch": 1.83, + "learning_rate": 4.326233129776671e-05, + "loss": 0.3617, + "step": 3044000 + }, + { + "epoch": 1.83, + "learning_rate": 4.3260235532137274e-05, + "loss": 0.365, + "step": 3044500 + }, + { + "epoch": 1.83, + "learning_rate": 4.325813556657671e-05, + "loss": 0.3707, + "step": 3045000 + }, + { + "epoch": 1.83, + "learning_rate": 4.325603560101615e-05, + "loss": 0.3662, + "step": 3045500 + }, + { + "epoch": 1.83, + "learning_rate": 4.3253935635455574e-05, + "loss": 0.3633, + "step": 3046000 + }, + { + "epoch": 1.83, + "learning_rate": 4.325183566989501e-05, + "loss": 0.3681, + "step": 3046500 + }, + { + "epoch": 1.83, + "learning_rate": 4.324973570433445e-05, + "loss": 0.3525, + "step": 3047000 + }, + { + "epoch": 1.83, + "learning_rate": 4.324763573877388e-05, + "loss": 0.3574, + "step": 3047500 + }, + { + "epoch": 1.83, + "learning_rate": 4.3245535773213315e-05, + "loss": 0.3609, + "step": 3048000 + }, + { + "epoch": 1.83, + "learning_rate": 4.3243440007583875e-05, + "loss": 0.367, + "step": 3048500 + }, + { + "epoch": 1.83, + "learning_rate": 4.324134004202331e-05, + "loss": 0.3698, + "step": 3049000 + }, + { + "epoch": 1.83, + "learning_rate": 4.323924007646274e-05, + "loss": 0.3675, + "step": 3049500 + }, + { + "epoch": 1.83, + "learning_rate": 4.323714011090218e-05, + "loss": 0.3651, + "step": 3050000 + }, + { + "epoch": 1.83, + "learning_rate": 4.323504434527274e-05, + "loss": 0.3621, + "step": 3050500 + }, + { + "epoch": 1.83, + "learning_rate": 4.323294437971217e-05, + "loss": 0.3752, + "step": 3051000 + }, + { + "epoch": 1.83, + "learning_rate": 4.32308444141516e-05, + "loss": 0.3592, + "step": 3051500 + }, + { + "epoch": 1.83, + "learning_rate": 4.322874444859104e-05, + "loss": 0.362, + "step": 3052000 + }, + { + "epoch": 1.83, + "learning_rate": 4.32266486829616e-05, + "loss": 0.371, + "step": 3052500 + }, + { + "epoch": 1.83, + "learning_rate": 4.322455291733216e-05, + "loss": 0.364, + "step": 3053000 + }, + { + "epoch": 1.83, + "learning_rate": 4.322245295177159e-05, + "loss": 0.3675, + "step": 3053500 + }, + { + "epoch": 1.83, + "learning_rate": 4.3220352986211024e-05, + "loss": 0.3654, + "step": 3054000 + }, + { + "epoch": 1.83, + "learning_rate": 4.3218253020650464e-05, + "loss": 0.3696, + "step": 3054500 + }, + { + "epoch": 1.83, + "learning_rate": 4.32161530550899e-05, + "loss": 0.3732, + "step": 3055000 + }, + { + "epoch": 1.83, + "learning_rate": 4.321405308952933e-05, + "loss": 0.3682, + "step": 3055500 + }, + { + "epoch": 1.83, + "learning_rate": 4.3211953123968764e-05, + "loss": 0.3695, + "step": 3056000 + }, + { + "epoch": 1.83, + "learning_rate": 4.3209857358339325e-05, + "loss": 0.3646, + "step": 3056500 + }, + { + "epoch": 1.83, + "learning_rate": 4.320775739277876e-05, + "loss": 0.3692, + "step": 3057000 + }, + { + "epoch": 1.83, + "learning_rate": 4.32056574272182e-05, + "loss": 0.3699, + "step": 3057500 + }, + { + "epoch": 1.83, + "learning_rate": 4.3203557461657625e-05, + "loss": 0.3679, + "step": 3058000 + }, + { + "epoch": 1.83, + "learning_rate": 4.320145749609706e-05, + "loss": 0.3557, + "step": 3058500 + }, + { + "epoch": 1.83, + "learning_rate": 4.31993575305365e-05, + "loss": 0.3651, + "step": 3059000 + }, + { + "epoch": 1.83, + "learning_rate": 4.319726176490706e-05, + "loss": 0.3683, + "step": 3059500 + }, + { + "epoch": 1.83, + "learning_rate": 4.319516179934649e-05, + "loss": 0.3693, + "step": 3060000 + }, + { + "epoch": 1.83, + "learning_rate": 4.319306183378592e-05, + "loss": 0.3642, + "step": 3060500 + }, + { + "epoch": 1.84, + "learning_rate": 4.319096186822536e-05, + "loss": 0.3594, + "step": 3061000 + }, + { + "epoch": 1.84, + "learning_rate": 4.318886190266479e-05, + "loss": 0.3574, + "step": 3061500 + }, + { + "epoch": 1.84, + "learning_rate": 4.3186761937104226e-05, + "loss": 0.3714, + "step": 3062000 + }, + { + "epoch": 1.84, + "learning_rate": 4.318466197154367e-05, + "loss": 0.3619, + "step": 3062500 + }, + { + "epoch": 1.84, + "learning_rate": 4.31825620059831e-05, + "loss": 0.381, + "step": 3063000 + }, + { + "epoch": 1.84, + "learning_rate": 4.3180466240353654e-05, + "loss": 0.371, + "step": 3063500 + }, + { + "epoch": 1.84, + "learning_rate": 4.3178366274793094e-05, + "loss": 0.3656, + "step": 3064000 + }, + { + "epoch": 1.84, + "learning_rate": 4.317626630923253e-05, + "loss": 0.3694, + "step": 3064500 + }, + { + "epoch": 1.84, + "learning_rate": 4.317417054360308e-05, + "loss": 0.3648, + "step": 3065000 + }, + { + "epoch": 1.84, + "learning_rate": 4.3172070578042514e-05, + "loss": 0.3625, + "step": 3065500 + }, + { + "epoch": 1.84, + "learning_rate": 4.3169970612481955e-05, + "loss": 0.3717, + "step": 3066000 + }, + { + "epoch": 1.84, + "learning_rate": 4.316787064692139e-05, + "loss": 0.3621, + "step": 3066500 + }, + { + "epoch": 1.84, + "learning_rate": 4.316577068136082e-05, + "loss": 0.3668, + "step": 3067000 + }, + { + "epoch": 1.84, + "learning_rate": 4.316367071580026e-05, + "loss": 0.3585, + "step": 3067500 + }, + { + "epoch": 1.84, + "learning_rate": 4.3161570750239695e-05, + "loss": 0.3652, + "step": 3068000 + }, + { + "epoch": 1.84, + "learning_rate": 4.315947078467913e-05, + "loss": 0.363, + "step": 3068500 + }, + { + "epoch": 1.84, + "learning_rate": 4.315737501904968e-05, + "loss": 0.3662, + "step": 3069000 + }, + { + "epoch": 1.84, + "learning_rate": 4.315527505348912e-05, + "loss": 0.3648, + "step": 3069500 + }, + { + "epoch": 1.84, + "learning_rate": 4.3153175087928556e-05, + "loss": 0.3615, + "step": 3070000 + }, + { + "epoch": 1.84, + "learning_rate": 4.3151075122367996e-05, + "loss": 0.3704, + "step": 3070500 + }, + { + "epoch": 1.84, + "learning_rate": 4.314897515680743e-05, + "loss": 0.3584, + "step": 3071000 + }, + { + "epoch": 1.84, + "learning_rate": 4.314687519124686e-05, + "loss": 0.3649, + "step": 3071500 + }, + { + "epoch": 1.84, + "learning_rate": 4.3144775225686303e-05, + "loss": 0.3794, + "step": 3072000 + }, + { + "epoch": 1.84, + "learning_rate": 4.314267526012574e-05, + "loss": 0.3587, + "step": 3072500 + }, + { + "epoch": 1.84, + "learning_rate": 4.3140575294565164e-05, + "loss": 0.3716, + "step": 3073000 + }, + { + "epoch": 1.84, + "learning_rate": 4.3138479528935724e-05, + "loss": 0.3622, + "step": 3073500 + }, + { + "epoch": 1.84, + "learning_rate": 4.313638376330628e-05, + "loss": 0.3676, + "step": 3074000 + }, + { + "epoch": 1.84, + "learning_rate": 4.313428379774572e-05, + "loss": 0.3733, + "step": 3074500 + }, + { + "epoch": 1.84, + "learning_rate": 4.313218383218515e-05, + "loss": 0.3585, + "step": 3075000 + }, + { + "epoch": 1.84, + "learning_rate": 4.3130083866624585e-05, + "loss": 0.3638, + "step": 3075500 + }, + { + "epoch": 1.84, + "learning_rate": 4.3127983901064025e-05, + "loss": 0.3549, + "step": 3076000 + }, + { + "epoch": 1.84, + "learning_rate": 4.312588813543458e-05, + "loss": 0.3635, + "step": 3076500 + }, + { + "epoch": 1.84, + "learning_rate": 4.312378816987401e-05, + "loss": 0.3573, + "step": 3077000 + }, + { + "epoch": 1.85, + "learning_rate": 4.312168820431345e-05, + "loss": 0.3663, + "step": 3077500 + }, + { + "epoch": 1.85, + "learning_rate": 4.3119588238752886e-05, + "loss": 0.3595, + "step": 3078000 + }, + { + "epoch": 1.85, + "learning_rate": 4.311748827319232e-05, + "loss": 0.3599, + "step": 3078500 + }, + { + "epoch": 1.85, + "learning_rate": 4.311538830763176e-05, + "loss": 0.3741, + "step": 3079000 + }, + { + "epoch": 1.85, + "learning_rate": 4.311328834207119e-05, + "loss": 0.3584, + "step": 3079500 + }, + { + "epoch": 1.85, + "learning_rate": 4.3111188376510626e-05, + "loss": 0.3696, + "step": 3080000 + }, + { + "epoch": 1.85, + "learning_rate": 4.310909261088118e-05, + "loss": 0.3685, + "step": 3080500 + }, + { + "epoch": 1.85, + "learning_rate": 4.310699264532062e-05, + "loss": 0.3599, + "step": 3081000 + }, + { + "epoch": 1.85, + "learning_rate": 4.3104892679760053e-05, + "loss": 0.3623, + "step": 3081500 + }, + { + "epoch": 1.85, + "learning_rate": 4.310279691413061e-05, + "loss": 0.3563, + "step": 3082000 + }, + { + "epoch": 1.85, + "learning_rate": 4.310070114850116e-05, + "loss": 0.3811, + "step": 3082500 + }, + { + "epoch": 1.85, + "learning_rate": 4.30986011829406e-05, + "loss": 0.3751, + "step": 3083000 + }, + { + "epoch": 1.85, + "learning_rate": 4.3096501217380034e-05, + "loss": 0.3695, + "step": 3083500 + }, + { + "epoch": 1.85, + "learning_rate": 4.309440125181947e-05, + "loss": 0.3691, + "step": 3084000 + }, + { + "epoch": 1.85, + "learning_rate": 4.309230128625891e-05, + "loss": 0.3605, + "step": 3084500 + }, + { + "epoch": 1.85, + "learning_rate": 4.309020552062946e-05, + "loss": 0.3765, + "step": 3085000 + }, + { + "epoch": 1.85, + "learning_rate": 4.3088105555068895e-05, + "loss": 0.3678, + "step": 3085500 + }, + { + "epoch": 1.85, + "learning_rate": 4.308600558950833e-05, + "loss": 0.3685, + "step": 3086000 + }, + { + "epoch": 1.85, + "learning_rate": 4.308390982387888e-05, + "loss": 0.3702, + "step": 3086500 + }, + { + "epoch": 1.85, + "learning_rate": 4.308180985831832e-05, + "loss": 0.3655, + "step": 3087000 + }, + { + "epoch": 1.85, + "learning_rate": 4.3079709892757756e-05, + "loss": 0.3715, + "step": 3087500 + }, + { + "epoch": 1.85, + "learning_rate": 4.307760992719719e-05, + "loss": 0.3687, + "step": 3088000 + }, + { + "epoch": 1.85, + "learning_rate": 4.307550996163663e-05, + "loss": 0.3619, + "step": 3088500 + }, + { + "epoch": 1.85, + "learning_rate": 4.307340999607606e-05, + "loss": 0.3563, + "step": 3089000 + }, + { + "epoch": 1.85, + "learning_rate": 4.3071310030515496e-05, + "loss": 0.3667, + "step": 3089500 + }, + { + "epoch": 1.85, + "learning_rate": 4.306921006495494e-05, + "loss": 0.3672, + "step": 3090000 + }, + { + "epoch": 1.85, + "learning_rate": 4.306711009939437e-05, + "loss": 0.359, + "step": 3090500 + }, + { + "epoch": 1.85, + "learning_rate": 4.3065010133833804e-05, + "loss": 0.3561, + "step": 3091000 + }, + { + "epoch": 1.85, + "learning_rate": 4.3062910168273244e-05, + "loss": 0.3606, + "step": 3091500 + }, + { + "epoch": 1.85, + "learning_rate": 4.306081020271267e-05, + "loss": 0.3678, + "step": 3092000 + }, + { + "epoch": 1.85, + "learning_rate": 4.305871023715211e-05, + "loss": 0.3779, + "step": 3092500 + }, + { + "epoch": 1.85, + "learning_rate": 4.305661447152267e-05, + "loss": 0.3684, + "step": 3093000 + }, + { + "epoch": 1.85, + "learning_rate": 4.3054514505962105e-05, + "loss": 0.3628, + "step": 3093500 + }, + { + "epoch": 1.85, + "learning_rate": 4.305241454040154e-05, + "loss": 0.3605, + "step": 3094000 + }, + { + "epoch": 1.86, + "learning_rate": 4.305031457484097e-05, + "loss": 0.3689, + "step": 3094500 + }, + { + "epoch": 1.86, + "learning_rate": 4.3048214609280405e-05, + "loss": 0.3709, + "step": 3095000 + }, + { + "epoch": 1.86, + "learning_rate": 4.304611464371984e-05, + "loss": 0.3575, + "step": 3095500 + }, + { + "epoch": 1.86, + "learning_rate": 4.304401467815928e-05, + "loss": 0.363, + "step": 3096000 + }, + { + "epoch": 1.86, + "learning_rate": 4.304191891252984e-05, + "loss": 0.3572, + "step": 3096500 + }, + { + "epoch": 1.86, + "learning_rate": 4.3039818946969266e-05, + "loss": 0.3657, + "step": 3097000 + }, + { + "epoch": 1.86, + "learning_rate": 4.30377189814087e-05, + "loss": 0.3703, + "step": 3097500 + }, + { + "epoch": 1.86, + "learning_rate": 4.303561901584814e-05, + "loss": 0.3627, + "step": 3098000 + }, + { + "epoch": 1.86, + "learning_rate": 4.303351905028757e-05, + "loss": 0.3634, + "step": 3098500 + }, + { + "epoch": 1.86, + "learning_rate": 4.3031419084727006e-05, + "loss": 0.3626, + "step": 3099000 + }, + { + "epoch": 1.86, + "learning_rate": 4.3029319119166446e-05, + "loss": 0.3567, + "step": 3099500 + }, + { + "epoch": 1.86, + "learning_rate": 4.302721915360588e-05, + "loss": 0.3623, + "step": 3100000 + }, + { + "epoch": 1.86, + "eval_loss": 0.349761039018631, + "eval_runtime": 1142.9955, + "eval_samples_per_second": 460.824, + "eval_steps_per_second": 76.804, + "step": 3100000 + }, + { + "epoch": 1.86, + "learning_rate": 4.3025123387976434e-05, + "loss": 0.3552, + "step": 3100500 + }, + { + "epoch": 1.86, + "learning_rate": 4.3023023422415874e-05, + "loss": 0.3559, + "step": 3101000 + }, + { + "epoch": 1.86, + "learning_rate": 4.302092765678643e-05, + "loss": 0.3589, + "step": 3101500 + }, + { + "epoch": 1.86, + "learning_rate": 4.301882769122586e-05, + "loss": 0.3675, + "step": 3102000 + }, + { + "epoch": 1.86, + "learning_rate": 4.3016727725665294e-05, + "loss": 0.3617, + "step": 3102500 + }, + { + "epoch": 1.86, + "learning_rate": 4.3014627760104734e-05, + "loss": 0.3608, + "step": 3103000 + }, + { + "epoch": 1.86, + "learning_rate": 4.301252779454417e-05, + "loss": 0.3632, + "step": 3103500 + }, + { + "epoch": 1.86, + "learning_rate": 4.30104278289836e-05, + "loss": 0.363, + "step": 3104000 + }, + { + "epoch": 1.86, + "learning_rate": 4.300832786342304e-05, + "loss": 0.3634, + "step": 3104500 + }, + { + "epoch": 1.86, + "learning_rate": 4.3006227897862475e-05, + "loss": 0.3573, + "step": 3105000 + }, + { + "epoch": 1.86, + "learning_rate": 4.300413213223303e-05, + "loss": 0.3693, + "step": 3105500 + }, + { + "epoch": 1.86, + "learning_rate": 4.300203216667246e-05, + "loss": 0.3641, + "step": 3106000 + }, + { + "epoch": 1.86, + "learning_rate": 4.29999322011119e-05, + "loss": 0.3681, + "step": 3106500 + }, + { + "epoch": 1.86, + "learning_rate": 4.2997832235551336e-05, + "loss": 0.3659, + "step": 3107000 + }, + { + "epoch": 1.86, + "learning_rate": 4.299573226999077e-05, + "loss": 0.3571, + "step": 3107500 + }, + { + "epoch": 1.86, + "learning_rate": 4.299363650436133e-05, + "loss": 0.3664, + "step": 3108000 + }, + { + "epoch": 1.86, + "learning_rate": 4.299153653880076e-05, + "loss": 0.3693, + "step": 3108500 + }, + { + "epoch": 1.86, + "learning_rate": 4.2989436573240197e-05, + "loss": 0.3684, + "step": 3109000 + }, + { + "epoch": 1.86, + "learning_rate": 4.298733660767964e-05, + "loss": 0.3712, + "step": 3109500 + }, + { + "epoch": 1.86, + "learning_rate": 4.298524084205019e-05, + "loss": 0.3694, + "step": 3110000 + }, + { + "epoch": 1.86, + "learning_rate": 4.2983140876489624e-05, + "loss": 0.365, + "step": 3110500 + }, + { + "epoch": 1.87, + "learning_rate": 4.298104091092906e-05, + "loss": 0.367, + "step": 3111000 + }, + { + "epoch": 1.87, + "learning_rate": 4.29789409453685e-05, + "loss": 0.3627, + "step": 3111500 + }, + { + "epoch": 1.87, + "learning_rate": 4.297684517973905e-05, + "loss": 0.3503, + "step": 3112000 + }, + { + "epoch": 1.87, + "learning_rate": 4.2974745214178485e-05, + "loss": 0.3661, + "step": 3112500 + }, + { + "epoch": 1.87, + "learning_rate": 4.297264524861792e-05, + "loss": 0.3568, + "step": 3113000 + }, + { + "epoch": 1.87, + "learning_rate": 4.297054528305736e-05, + "loss": 0.3667, + "step": 3113500 + }, + { + "epoch": 1.87, + "learning_rate": 4.296845371735904e-05, + "loss": 0.3558, + "step": 3114000 + }, + { + "epoch": 1.87, + "learning_rate": 4.296635375179847e-05, + "loss": 0.3668, + "step": 3114500 + }, + { + "epoch": 1.87, + "learning_rate": 4.2964253786237906e-05, + "loss": 0.366, + "step": 3115000 + }, + { + "epoch": 1.87, + "learning_rate": 4.2962153820677346e-05, + "loss": 0.3623, + "step": 3115500 + }, + { + "epoch": 1.87, + "learning_rate": 4.29600580550479e-05, + "loss": 0.3676, + "step": 3116000 + }, + { + "epoch": 1.87, + "learning_rate": 4.295795808948733e-05, + "loss": 0.3569, + "step": 3116500 + }, + { + "epoch": 1.87, + "learning_rate": 4.2955858123926766e-05, + "loss": 0.3632, + "step": 3117000 + }, + { + "epoch": 1.87, + "learning_rate": 4.2953758158366207e-05, + "loss": 0.3554, + "step": 3117500 + }, + { + "epoch": 1.87, + "learning_rate": 4.295165819280564e-05, + "loss": 0.3612, + "step": 3118000 + }, + { + "epoch": 1.87, + "learning_rate": 4.294955822724507e-05, + "loss": 0.3687, + "step": 3118500 + }, + { + "epoch": 1.87, + "learning_rate": 4.294745826168451e-05, + "loss": 0.3688, + "step": 3119000 + }, + { + "epoch": 1.87, + "learning_rate": 4.294535829612394e-05, + "loss": 0.3659, + "step": 3119500 + }, + { + "epoch": 1.87, + "learning_rate": 4.29432625304945e-05, + "loss": 0.363, + "step": 3120000 + }, + { + "epoch": 1.87, + "learning_rate": 4.2941162564933934e-05, + "loss": 0.3648, + "step": 3120500 + }, + { + "epoch": 1.87, + "learning_rate": 4.293906259937337e-05, + "loss": 0.3649, + "step": 3121000 + }, + { + "epoch": 1.87, + "learning_rate": 4.29369626338128e-05, + "loss": 0.3736, + "step": 3121500 + }, + { + "epoch": 1.87, + "learning_rate": 4.293486686818336e-05, + "loss": 0.3695, + "step": 3122000 + }, + { + "epoch": 1.87, + "learning_rate": 4.29327669026228e-05, + "loss": 0.3631, + "step": 3122500 + }, + { + "epoch": 1.87, + "learning_rate": 4.293066693706223e-05, + "loss": 0.3667, + "step": 3123000 + }, + { + "epoch": 1.87, + "learning_rate": 4.292856697150166e-05, + "loss": 0.3694, + "step": 3123500 + }, + { + "epoch": 1.87, + "learning_rate": 4.29264670059411e-05, + "loss": 0.3731, + "step": 3124000 + }, + { + "epoch": 1.87, + "learning_rate": 4.292437124031166e-05, + "loss": 0.3719, + "step": 3124500 + }, + { + "epoch": 1.87, + "learning_rate": 4.2922271274751096e-05, + "loss": 0.367, + "step": 3125000 + }, + { + "epoch": 1.87, + "learning_rate": 4.292017130919052e-05, + "loss": 0.373, + "step": 3125500 + }, + { + "epoch": 1.87, + "learning_rate": 4.291807134362996e-05, + "loss": 0.3597, + "step": 3126000 + }, + { + "epoch": 1.87, + "learning_rate": 4.2915971378069396e-05, + "loss": 0.3631, + "step": 3126500 + }, + { + "epoch": 1.87, + "learning_rate": 4.291387141250883e-05, + "loss": 0.3651, + "step": 3127000 + }, + { + "epoch": 1.88, + "learning_rate": 4.291177144694827e-05, + "loss": 0.3606, + "step": 3127500 + }, + { + "epoch": 1.88, + "learning_rate": 4.2909671481387703e-05, + "loss": 0.3793, + "step": 3128000 + }, + { + "epoch": 1.88, + "learning_rate": 4.290757571575826e-05, + "loss": 0.3559, + "step": 3128500 + }, + { + "epoch": 1.88, + "learning_rate": 4.29054757501977e-05, + "loss": 0.3773, + "step": 3129000 + }, + { + "epoch": 1.88, + "learning_rate": 4.290337578463713e-05, + "loss": 0.3691, + "step": 3129500 + }, + { + "epoch": 1.88, + "learning_rate": 4.2901275819076564e-05, + "loss": 0.3704, + "step": 3130000 + }, + { + "epoch": 1.88, + "learning_rate": 4.289918005344712e-05, + "loss": 0.3657, + "step": 3130500 + }, + { + "epoch": 1.88, + "learning_rate": 4.289708008788656e-05, + "loss": 0.3623, + "step": 3131000 + }, + { + "epoch": 1.88, + "learning_rate": 4.289498012232599e-05, + "loss": 0.3633, + "step": 3131500 + }, + { + "epoch": 1.88, + "learning_rate": 4.2892880156765425e-05, + "loss": 0.3637, + "step": 3132000 + }, + { + "epoch": 1.88, + "learning_rate": 4.289078439113598e-05, + "loss": 0.3578, + "step": 3132500 + }, + { + "epoch": 1.88, + "learning_rate": 4.288868442557542e-05, + "loss": 0.3682, + "step": 3133000 + }, + { + "epoch": 1.88, + "learning_rate": 4.288658446001485e-05, + "loss": 0.3628, + "step": 3133500 + }, + { + "epoch": 1.88, + "learning_rate": 4.2884484494454286e-05, + "loss": 0.3687, + "step": 3134000 + }, + { + "epoch": 1.88, + "learning_rate": 4.2882388728824846e-05, + "loss": 0.3666, + "step": 3134500 + }, + { + "epoch": 1.88, + "learning_rate": 4.2880292963195406e-05, + "loss": 0.3644, + "step": 3135000 + }, + { + "epoch": 1.88, + "learning_rate": 4.287819299763484e-05, + "loss": 0.3679, + "step": 3135500 + }, + { + "epoch": 1.88, + "learning_rate": 4.287609303207427e-05, + "loss": 0.3604, + "step": 3136000 + }, + { + "epoch": 1.88, + "learning_rate": 4.2873993066513713e-05, + "loss": 0.3736, + "step": 3136500 + }, + { + "epoch": 1.88, + "learning_rate": 4.287189310095315e-05, + "loss": 0.3549, + "step": 3137000 + }, + { + "epoch": 1.88, + "learning_rate": 4.2869793135392574e-05, + "loss": 0.3671, + "step": 3137500 + }, + { + "epoch": 1.88, + "learning_rate": 4.2867693169832014e-05, + "loss": 0.3621, + "step": 3138000 + }, + { + "epoch": 1.88, + "learning_rate": 4.286559320427145e-05, + "loss": 0.3526, + "step": 3138500 + }, + { + "epoch": 1.88, + "learning_rate": 4.286349743864201e-05, + "loss": 0.3628, + "step": 3139000 + }, + { + "epoch": 1.88, + "learning_rate": 4.286139747308144e-05, + "loss": 0.36, + "step": 3139500 + }, + { + "epoch": 1.88, + "learning_rate": 4.2859297507520875e-05, + "loss": 0.3615, + "step": 3140000 + }, + { + "epoch": 1.88, + "learning_rate": 4.285719754196031e-05, + "loss": 0.3599, + "step": 3140500 + }, + { + "epoch": 1.88, + "learning_rate": 4.285510177633087e-05, + "loss": 0.3595, + "step": 3141000 + }, + { + "epoch": 1.88, + "learning_rate": 4.285300181077031e-05, + "loss": 0.3637, + "step": 3141500 + }, + { + "epoch": 1.88, + "learning_rate": 4.285090184520974e-05, + "loss": 0.3635, + "step": 3142000 + }, + { + "epoch": 1.88, + "learning_rate": 4.284880187964917e-05, + "loss": 0.3703, + "step": 3142500 + }, + { + "epoch": 1.88, + "learning_rate": 4.284670611401973e-05, + "loss": 0.3622, + "step": 3143000 + }, + { + "epoch": 1.88, + "learning_rate": 4.284461034839028e-05, + "loss": 0.3657, + "step": 3143500 + }, + { + "epoch": 1.88, + "learning_rate": 4.284251038282972e-05, + "loss": 0.3524, + "step": 3144000 + }, + { + "epoch": 1.89, + "learning_rate": 4.2840410417269156e-05, + "loss": 0.3697, + "step": 3144500 + }, + { + "epoch": 1.89, + "learning_rate": 4.283831045170859e-05, + "loss": 0.3654, + "step": 3145000 + }, + { + "epoch": 1.89, + "learning_rate": 4.2836214686079143e-05, + "loss": 0.3804, + "step": 3145500 + }, + { + "epoch": 1.89, + "learning_rate": 4.2834114720518584e-05, + "loss": 0.36, + "step": 3146000 + }, + { + "epoch": 1.89, + "learning_rate": 4.283201475495802e-05, + "loss": 0.357, + "step": 3146500 + }, + { + "epoch": 1.89, + "learning_rate": 4.282991478939745e-05, + "loss": 0.3631, + "step": 3147000 + }, + { + "epoch": 1.89, + "learning_rate": 4.282781482383689e-05, + "loss": 0.366, + "step": 3147500 + }, + { + "epoch": 1.89, + "learning_rate": 4.2825714858276324e-05, + "loss": 0.359, + "step": 3148000 + }, + { + "epoch": 1.89, + "learning_rate": 4.2823614892715765e-05, + "loss": 0.3665, + "step": 3148500 + }, + { + "epoch": 1.89, + "learning_rate": 4.282151912708632e-05, + "loss": 0.3569, + "step": 3149000 + }, + { + "epoch": 1.89, + "learning_rate": 4.281941916152575e-05, + "loss": 0.3589, + "step": 3149500 + }, + { + "epoch": 1.89, + "learning_rate": 4.2817319195965185e-05, + "loss": 0.3571, + "step": 3150000 + }, + { + "epoch": 1.89, + "learning_rate": 4.2815219230404625e-05, + "loss": 0.3744, + "step": 3150500 + }, + { + "epoch": 1.89, + "learning_rate": 4.281311926484406e-05, + "loss": 0.3582, + "step": 3151000 + }, + { + "epoch": 1.89, + "learning_rate": 4.281102349921461e-05, + "loss": 0.3544, + "step": 3151500 + }, + { + "epoch": 1.89, + "learning_rate": 4.2808923533654046e-05, + "loss": 0.3615, + "step": 3152000 + }, + { + "epoch": 1.89, + "learning_rate": 4.2806823568093486e-05, + "loss": 0.3718, + "step": 3152500 + }, + { + "epoch": 1.89, + "learning_rate": 4.280472360253292e-05, + "loss": 0.3573, + "step": 3153000 + }, + { + "epoch": 1.89, + "learning_rate": 4.280262363697235e-05, + "loss": 0.3594, + "step": 3153500 + }, + { + "epoch": 1.89, + "learning_rate": 4.2800527871342906e-05, + "loss": 0.3724, + "step": 3154000 + }, + { + "epoch": 1.89, + "learning_rate": 4.279842790578235e-05, + "loss": 0.357, + "step": 3154500 + }, + { + "epoch": 1.89, + "learning_rate": 4.279632794022178e-05, + "loss": 0.3666, + "step": 3155000 + }, + { + "epoch": 1.89, + "learning_rate": 4.2794232174592334e-05, + "loss": 0.3732, + "step": 3155500 + }, + { + "epoch": 1.89, + "learning_rate": 4.2792132209031774e-05, + "loss": 0.37, + "step": 3156000 + }, + { + "epoch": 1.89, + "learning_rate": 4.279003224347121e-05, + "loss": 0.3816, + "step": 3156500 + }, + { + "epoch": 1.89, + "learning_rate": 4.278793227791064e-05, + "loss": 0.3619, + "step": 3157000 + }, + { + "epoch": 1.89, + "learning_rate": 4.278583231235008e-05, + "loss": 0.3571, + "step": 3157500 + }, + { + "epoch": 1.89, + "learning_rate": 4.2783732346789515e-05, + "loss": 0.3791, + "step": 3158000 + }, + { + "epoch": 1.89, + "learning_rate": 4.278163238122895e-05, + "loss": 0.3732, + "step": 3158500 + }, + { + "epoch": 1.89, + "learning_rate": 4.277953241566838e-05, + "loss": 0.3642, + "step": 3159000 + }, + { + "epoch": 1.89, + "learning_rate": 4.2777432450107815e-05, + "loss": 0.3507, + "step": 3159500 + }, + { + "epoch": 1.89, + "learning_rate": 4.277533248454725e-05, + "loss": 0.363, + "step": 3160000 + }, + { + "epoch": 1.89, + "learning_rate": 4.277323251898669e-05, + "loss": 0.3547, + "step": 3160500 + }, + { + "epoch": 1.9, + "learning_rate": 4.277113255342612e-05, + "loss": 0.3618, + "step": 3161000 + }, + { + "epoch": 1.9, + "learning_rate": 4.2769036787796676e-05, + "loss": 0.3621, + "step": 3161500 + }, + { + "epoch": 1.9, + "learning_rate": 4.2766941022167236e-05, + "loss": 0.3696, + "step": 3162000 + }, + { + "epoch": 1.9, + "learning_rate": 4.276484525653779e-05, + "loss": 0.3612, + "step": 3162500 + }, + { + "epoch": 1.9, + "learning_rate": 4.276274529097723e-05, + "loss": 0.3656, + "step": 3163000 + }, + { + "epoch": 1.9, + "learning_rate": 4.276064532541666e-05, + "loss": 0.3662, + "step": 3163500 + }, + { + "epoch": 1.9, + "learning_rate": 4.27585453598561e-05, + "loss": 0.3565, + "step": 3164000 + }, + { + "epoch": 1.9, + "learning_rate": 4.275644959422665e-05, + "loss": 0.3725, + "step": 3164500 + }, + { + "epoch": 1.9, + "learning_rate": 4.275434962866609e-05, + "loss": 0.3589, + "step": 3165000 + }, + { + "epoch": 1.9, + "learning_rate": 4.2752249663105524e-05, + "loss": 0.3685, + "step": 3165500 + }, + { + "epoch": 1.9, + "learning_rate": 4.275014969754496e-05, + "loss": 0.3689, + "step": 3166000 + }, + { + "epoch": 1.9, + "learning_rate": 4.27480497319844e-05, + "loss": 0.364, + "step": 3166500 + }, + { + "epoch": 1.9, + "learning_rate": 4.274594976642383e-05, + "loss": 0.3631, + "step": 3167000 + }, + { + "epoch": 1.9, + "learning_rate": 4.2743849800863265e-05, + "loss": 0.3781, + "step": 3167500 + }, + { + "epoch": 1.9, + "learning_rate": 4.2741749835302705e-05, + "loss": 0.3628, + "step": 3168000 + }, + { + "epoch": 1.9, + "learning_rate": 4.273964986974213e-05, + "loss": 0.3579, + "step": 3168500 + }, + { + "epoch": 1.9, + "learning_rate": 4.273755410411269e-05, + "loss": 0.3672, + "step": 3169000 + }, + { + "epoch": 1.9, + "learning_rate": 4.273545413855213e-05, + "loss": 0.3632, + "step": 3169500 + }, + { + "epoch": 1.9, + "learning_rate": 4.2733354172991566e-05, + "loss": 0.369, + "step": 3170000 + }, + { + "epoch": 1.9, + "learning_rate": 4.2731254207431e-05, + "loss": 0.3627, + "step": 3170500 + }, + { + "epoch": 1.9, + "learning_rate": 4.272915424187043e-05, + "loss": 0.3617, + "step": 3171000 + }, + { + "epoch": 1.9, + "learning_rate": 4.2727054276309866e-05, + "loss": 0.3558, + "step": 3171500 + }, + { + "epoch": 1.9, + "learning_rate": 4.27249543107493e-05, + "loss": 0.3662, + "step": 3172000 + }, + { + "epoch": 1.9, + "learning_rate": 4.272285434518874e-05, + "loss": 0.3727, + "step": 3172500 + }, + { + "epoch": 1.9, + "learning_rate": 4.272075857955929e-05, + "loss": 0.371, + "step": 3173000 + }, + { + "epoch": 1.9, + "learning_rate": 4.271865861399873e-05, + "loss": 0.365, + "step": 3173500 + }, + { + "epoch": 1.9, + "learning_rate": 4.271655864843816e-05, + "loss": 0.3645, + "step": 3174000 + }, + { + "epoch": 1.9, + "learning_rate": 4.27144586828776e-05, + "loss": 0.3711, + "step": 3174500 + }, + { + "epoch": 1.9, + "learning_rate": 4.2712358717317034e-05, + "loss": 0.3626, + "step": 3175000 + }, + { + "epoch": 1.9, + "learning_rate": 4.271025875175647e-05, + "loss": 0.3608, + "step": 3175500 + }, + { + "epoch": 1.9, + "learning_rate": 4.270816298612703e-05, + "loss": 0.3564, + "step": 3176000 + }, + { + "epoch": 1.9, + "learning_rate": 4.270606302056646e-05, + "loss": 0.3674, + "step": 3176500 + }, + { + "epoch": 1.9, + "learning_rate": 4.2703963055005895e-05, + "loss": 0.3566, + "step": 3177000 + }, + { + "epoch": 1.91, + "learning_rate": 4.2701863089445335e-05, + "loss": 0.3657, + "step": 3177500 + }, + { + "epoch": 1.91, + "learning_rate": 4.269976312388477e-05, + "loss": 0.3615, + "step": 3178000 + }, + { + "epoch": 1.91, + "learning_rate": 4.26976631583242e-05, + "loss": 0.3686, + "step": 3178500 + }, + { + "epoch": 1.91, + "learning_rate": 4.269556319276364e-05, + "loss": 0.3611, + "step": 3179000 + }, + { + "epoch": 1.91, + "learning_rate": 4.2693467427134196e-05, + "loss": 0.361, + "step": 3179500 + }, + { + "epoch": 1.91, + "learning_rate": 4.269136746157363e-05, + "loss": 0.3604, + "step": 3180000 + }, + { + "epoch": 1.91, + "learning_rate": 4.268926749601306e-05, + "loss": 0.3733, + "step": 3180500 + }, + { + "epoch": 1.91, + "learning_rate": 4.26871675304525e-05, + "loss": 0.3553, + "step": 3181000 + }, + { + "epoch": 1.91, + "learning_rate": 4.2685067564891936e-05, + "loss": 0.371, + "step": 3181500 + }, + { + "epoch": 1.91, + "learning_rate": 4.268296759933137e-05, + "loss": 0.3574, + "step": 3182000 + }, + { + "epoch": 1.91, + "learning_rate": 4.268086763377081e-05, + "loss": 0.3553, + "step": 3182500 + }, + { + "epoch": 1.91, + "learning_rate": 4.267876766821024e-05, + "loss": 0.3581, + "step": 3183000 + }, + { + "epoch": 1.91, + "learning_rate": 4.26766719025808e-05, + "loss": 0.3671, + "step": 3183500 + }, + { + "epoch": 1.91, + "learning_rate": 4.267457193702023e-05, + "loss": 0.3617, + "step": 3184000 + }, + { + "epoch": 1.91, + "learning_rate": 4.267247197145967e-05, + "loss": 0.3585, + "step": 3184500 + }, + { + "epoch": 1.91, + "learning_rate": 4.2670372005899104e-05, + "loss": 0.3592, + "step": 3185000 + }, + { + "epoch": 1.91, + "learning_rate": 4.266827624026966e-05, + "loss": 0.3654, + "step": 3185500 + }, + { + "epoch": 1.91, + "learning_rate": 4.26661762747091e-05, + "loss": 0.3653, + "step": 3186000 + }, + { + "epoch": 1.91, + "learning_rate": 4.266407630914853e-05, + "loss": 0.3622, + "step": 3186500 + }, + { + "epoch": 1.91, + "learning_rate": 4.2661976343587965e-05, + "loss": 0.361, + "step": 3187000 + }, + { + "epoch": 1.91, + "learning_rate": 4.265988057795852e-05, + "loss": 0.368, + "step": 3187500 + }, + { + "epoch": 1.91, + "learning_rate": 4.265778061239796e-05, + "loss": 0.3622, + "step": 3188000 + }, + { + "epoch": 1.91, + "learning_rate": 4.265568484676851e-05, + "loss": 0.3555, + "step": 3188500 + }, + { + "epoch": 1.91, + "learning_rate": 4.2653584881207946e-05, + "loss": 0.3583, + "step": 3189000 + }, + { + "epoch": 1.91, + "learning_rate": 4.265148491564738e-05, + "loss": 0.3555, + "step": 3189500 + }, + { + "epoch": 1.91, + "learning_rate": 4.264938495008682e-05, + "loss": 0.3601, + "step": 3190000 + }, + { + "epoch": 1.91, + "learning_rate": 4.264728498452625e-05, + "loss": 0.3609, + "step": 3190500 + }, + { + "epoch": 1.91, + "learning_rate": 4.2645185018965686e-05, + "loss": 0.3633, + "step": 3191000 + }, + { + "epoch": 1.91, + "learning_rate": 4.2643089253336247e-05, + "loss": 0.3673, + "step": 3191500 + }, + { + "epoch": 1.91, + "learning_rate": 4.264098928777568e-05, + "loss": 0.3657, + "step": 3192000 + }, + { + "epoch": 1.91, + "learning_rate": 4.2638889322215113e-05, + "loss": 0.3582, + "step": 3192500 + }, + { + "epoch": 1.91, + "learning_rate": 4.2636789356654554e-05, + "loss": 0.3489, + "step": 3193000 + }, + { + "epoch": 1.91, + "learning_rate": 4.263468939109399e-05, + "loss": 0.361, + "step": 3193500 + }, + { + "epoch": 1.91, + "learning_rate": 4.263258942553342e-05, + "loss": 0.3672, + "step": 3194000 + }, + { + "epoch": 1.92, + "learning_rate": 4.263048945997286e-05, + "loss": 0.366, + "step": 3194500 + }, + { + "epoch": 1.92, + "learning_rate": 4.2628389494412294e-05, + "loss": 0.3582, + "step": 3195000 + }, + { + "epoch": 1.92, + "learning_rate": 4.262629372878285e-05, + "loss": 0.3582, + "step": 3195500 + }, + { + "epoch": 1.92, + "learning_rate": 4.262419376322228e-05, + "loss": 0.3659, + "step": 3196000 + }, + { + "epoch": 1.92, + "learning_rate": 4.2622097997592835e-05, + "loss": 0.3548, + "step": 3196500 + }, + { + "epoch": 1.92, + "learning_rate": 4.2619998032032275e-05, + "loss": 0.3661, + "step": 3197000 + }, + { + "epoch": 1.92, + "learning_rate": 4.261789806647171e-05, + "loss": 0.3605, + "step": 3197500 + }, + { + "epoch": 1.92, + "learning_rate": 4.261579810091114e-05, + "loss": 0.3614, + "step": 3198000 + }, + { + "epoch": 1.92, + "learning_rate": 4.261369813535058e-05, + "loss": 0.3679, + "step": 3198500 + }, + { + "epoch": 1.92, + "learning_rate": 4.2611602369721136e-05, + "loss": 0.3602, + "step": 3199000 + }, + { + "epoch": 1.92, + "learning_rate": 4.260950240416057e-05, + "loss": 0.3669, + "step": 3199500 + }, + { + "epoch": 1.92, + "learning_rate": 4.260740243860001e-05, + "loss": 0.3585, + "step": 3200000 + }, + { + "epoch": 1.92, + "eval_loss": 0.3485775291919708, + "eval_runtime": 1121.258, + "eval_samples_per_second": 469.758, + "eval_steps_per_second": 78.293, + "step": 3200000 + }, + { + "epoch": 1.92, + "learning_rate": 4.260530247303944e-05, + "loss": 0.3697, + "step": 3200500 + }, + { + "epoch": 1.92, + "learning_rate": 4.2603202507478877e-05, + "loss": 0.3736, + "step": 3201000 + }, + { + "epoch": 1.92, + "learning_rate": 4.260110674184943e-05, + "loss": 0.3561, + "step": 3201500 + }, + { + "epoch": 1.92, + "learning_rate": 4.259900677628887e-05, + "loss": 0.363, + "step": 3202000 + }, + { + "epoch": 1.92, + "learning_rate": 4.2596906810728304e-05, + "loss": 0.367, + "step": 3202500 + }, + { + "epoch": 1.92, + "learning_rate": 4.259480684516774e-05, + "loss": 0.3662, + "step": 3203000 + }, + { + "epoch": 1.92, + "learning_rate": 4.259270687960718e-05, + "loss": 0.3612, + "step": 3203500 + }, + { + "epoch": 1.92, + "learning_rate": 4.259060691404661e-05, + "loss": 0.3582, + "step": 3204000 + }, + { + "epoch": 1.92, + "learning_rate": 4.2588506948486044e-05, + "loss": 0.3671, + "step": 3204500 + }, + { + "epoch": 1.92, + "learning_rate": 4.25864111828566e-05, + "loss": 0.3619, + "step": 3205000 + }, + { + "epoch": 1.92, + "learning_rate": 4.258431121729604e-05, + "loss": 0.3689, + "step": 3205500 + }, + { + "epoch": 1.92, + "learning_rate": 4.258221125173547e-05, + "loss": 0.3663, + "step": 3206000 + }, + { + "epoch": 1.92, + "learning_rate": 4.2580111286174905e-05, + "loss": 0.37, + "step": 3206500 + }, + { + "epoch": 1.92, + "learning_rate": 4.2578011320614345e-05, + "loss": 0.3675, + "step": 3207000 + }, + { + "epoch": 1.92, + "learning_rate": 4.257591135505377e-05, + "loss": 0.3585, + "step": 3207500 + }, + { + "epoch": 1.92, + "learning_rate": 4.257381138949321e-05, + "loss": 0.3658, + "step": 3208000 + }, + { + "epoch": 1.92, + "learning_rate": 4.257171562386377e-05, + "loss": 0.3577, + "step": 3208500 + }, + { + "epoch": 1.92, + "learning_rate": 4.2569615658303206e-05, + "loss": 0.3679, + "step": 3209000 + }, + { + "epoch": 1.92, + "learning_rate": 4.256751569274263e-05, + "loss": 0.3689, + "step": 3209500 + }, + { + "epoch": 1.92, + "learning_rate": 4.256541572718207e-05, + "loss": 0.37, + "step": 3210000 + }, + { + "epoch": 1.92, + "learning_rate": 4.256331996155263e-05, + "loss": 0.3554, + "step": 3210500 + }, + { + "epoch": 1.93, + "learning_rate": 4.256121999599207e-05, + "loss": 0.3701, + "step": 3211000 + }, + { + "epoch": 1.93, + "learning_rate": 4.25591200304315e-05, + "loss": 0.3663, + "step": 3211500 + }, + { + "epoch": 1.93, + "learning_rate": 4.2557020064870934e-05, + "loss": 0.3651, + "step": 3212000 + }, + { + "epoch": 1.93, + "learning_rate": 4.255492009931037e-05, + "loss": 0.3621, + "step": 3212500 + }, + { + "epoch": 1.93, + "learning_rate": 4.25528201337498e-05, + "loss": 0.3663, + "step": 3213000 + }, + { + "epoch": 1.93, + "learning_rate": 4.255072016818924e-05, + "loss": 0.3666, + "step": 3213500 + }, + { + "epoch": 1.93, + "learning_rate": 4.2548620202628674e-05, + "loss": 0.3664, + "step": 3214000 + }, + { + "epoch": 1.93, + "learning_rate": 4.254652443699923e-05, + "loss": 0.3633, + "step": 3214500 + }, + { + "epoch": 1.93, + "learning_rate": 4.254442447143867e-05, + "loss": 0.3611, + "step": 3215000 + }, + { + "epoch": 1.93, + "learning_rate": 4.25423245058781e-05, + "loss": 0.3535, + "step": 3215500 + }, + { + "epoch": 1.93, + "learning_rate": 4.2540224540317535e-05, + "loss": 0.3584, + "step": 3216000 + }, + { + "epoch": 1.93, + "learning_rate": 4.2538128774688095e-05, + "loss": 0.3772, + "step": 3216500 + }, + { + "epoch": 1.93, + "learning_rate": 4.253602880912753e-05, + "loss": 0.3561, + "step": 3217000 + }, + { + "epoch": 1.93, + "learning_rate": 4.253392884356696e-05, + "loss": 0.3665, + "step": 3217500 + }, + { + "epoch": 1.93, + "learning_rate": 4.2531828878006396e-05, + "loss": 0.3672, + "step": 3218000 + }, + { + "epoch": 1.93, + "learning_rate": 4.2529728912445836e-05, + "loss": 0.3551, + "step": 3218500 + }, + { + "epoch": 1.93, + "learning_rate": 4.252762894688527e-05, + "loss": 0.3637, + "step": 3219000 + }, + { + "epoch": 1.93, + "learning_rate": 4.252553318125582e-05, + "loss": 0.3562, + "step": 3219500 + }, + { + "epoch": 1.93, + "learning_rate": 4.252343321569526e-05, + "loss": 0.3655, + "step": 3220000 + }, + { + "epoch": 1.93, + "learning_rate": 4.25213332501347e-05, + "loss": 0.3639, + "step": 3220500 + }, + { + "epoch": 1.93, + "learning_rate": 4.251923328457413e-05, + "loss": 0.3724, + "step": 3221000 + }, + { + "epoch": 1.93, + "learning_rate": 4.251713331901357e-05, + "loss": 0.3728, + "step": 3221500 + }, + { + "epoch": 1.93, + "learning_rate": 4.2515037553384124e-05, + "loss": 0.358, + "step": 3222000 + }, + { + "epoch": 1.93, + "learning_rate": 4.251293758782356e-05, + "loss": 0.3652, + "step": 3222500 + }, + { + "epoch": 1.93, + "learning_rate": 4.251083762226299e-05, + "loss": 0.3603, + "step": 3223000 + }, + { + "epoch": 1.93, + "learning_rate": 4.250873765670243e-05, + "loss": 0.3539, + "step": 3223500 + }, + { + "epoch": 1.93, + "learning_rate": 4.2506637691141865e-05, + "loss": 0.3606, + "step": 3224000 + }, + { + "epoch": 1.93, + "learning_rate": 4.25045377255813e-05, + "loss": 0.3689, + "step": 3224500 + }, + { + "epoch": 1.93, + "learning_rate": 4.250244195995185e-05, + "loss": 0.3694, + "step": 3225000 + }, + { + "epoch": 1.93, + "learning_rate": 4.250034199439129e-05, + "loss": 0.3715, + "step": 3225500 + }, + { + "epoch": 1.93, + "learning_rate": 4.2498242028830725e-05, + "loss": 0.3645, + "step": 3226000 + }, + { + "epoch": 1.93, + "learning_rate": 4.249614206327016e-05, + "loss": 0.3526, + "step": 3226500 + }, + { + "epoch": 1.93, + "learning_rate": 4.24940420977096e-05, + "loss": 0.3635, + "step": 3227000 + }, + { + "epoch": 1.94, + "learning_rate": 4.249194213214903e-05, + "loss": 0.363, + "step": 3227500 + }, + { + "epoch": 1.94, + "learning_rate": 4.2489842166588466e-05, + "loss": 0.3636, + "step": 3228000 + }, + { + "epoch": 1.94, + "learning_rate": 4.2487742201027906e-05, + "loss": 0.3641, + "step": 3228500 + }, + { + "epoch": 1.94, + "learning_rate": 4.248564643539846e-05, + "loss": 0.3733, + "step": 3229000 + }, + { + "epoch": 1.94, + "learning_rate": 4.248354646983789e-05, + "loss": 0.3679, + "step": 3229500 + }, + { + "epoch": 1.94, + "learning_rate": 4.2481446504277334e-05, + "loss": 0.3568, + "step": 3230000 + }, + { + "epoch": 1.94, + "learning_rate": 4.247934653871677e-05, + "loss": 0.3595, + "step": 3230500 + }, + { + "epoch": 1.94, + "learning_rate": 4.24772465731562e-05, + "loss": 0.3606, + "step": 3231000 + }, + { + "epoch": 1.94, + "learning_rate": 4.247514660759564e-05, + "loss": 0.3586, + "step": 3231500 + }, + { + "epoch": 1.94, + "learning_rate": 4.247304664203507e-05, + "loss": 0.3602, + "step": 3232000 + }, + { + "epoch": 1.94, + "learning_rate": 4.24709466764745e-05, + "loss": 0.3668, + "step": 3232500 + }, + { + "epoch": 1.94, + "learning_rate": 4.246885091084506e-05, + "loss": 0.3677, + "step": 3233000 + }, + { + "epoch": 1.94, + "learning_rate": 4.24667509452845e-05, + "loss": 0.3542, + "step": 3233500 + }, + { + "epoch": 1.94, + "learning_rate": 4.2464650979723935e-05, + "loss": 0.3657, + "step": 3234000 + }, + { + "epoch": 1.94, + "learning_rate": 4.246255101416336e-05, + "loss": 0.3612, + "step": 3234500 + }, + { + "epoch": 1.94, + "learning_rate": 4.246045944846504e-05, + "loss": 0.3504, + "step": 3235000 + }, + { + "epoch": 1.94, + "learning_rate": 4.245835948290448e-05, + "loss": 0.3672, + "step": 3235500 + }, + { + "epoch": 1.94, + "learning_rate": 4.2456259517343916e-05, + "loss": 0.3574, + "step": 3236000 + }, + { + "epoch": 1.94, + "learning_rate": 4.245415955178335e-05, + "loss": 0.3571, + "step": 3236500 + }, + { + "epoch": 1.94, + "learning_rate": 4.245205958622279e-05, + "loss": 0.3604, + "step": 3237000 + }, + { + "epoch": 1.94, + "learning_rate": 4.244995962066222e-05, + "loss": 0.3578, + "step": 3237500 + }, + { + "epoch": 1.94, + "learning_rate": 4.2447859655101656e-05, + "loss": 0.3579, + "step": 3238000 + }, + { + "epoch": 1.94, + "learning_rate": 4.2445759689541097e-05, + "loss": 0.3596, + "step": 3238500 + }, + { + "epoch": 1.94, + "learning_rate": 4.244365972398052e-05, + "loss": 0.3607, + "step": 3239000 + }, + { + "epoch": 1.94, + "learning_rate": 4.244155975841996e-05, + "loss": 0.3619, + "step": 3239500 + }, + { + "epoch": 1.94, + "learning_rate": 4.24394597928594e-05, + "loss": 0.3651, + "step": 3240000 + }, + { + "epoch": 1.94, + "learning_rate": 4.243736402722996e-05, + "loss": 0.3515, + "step": 3240500 + }, + { + "epoch": 1.94, + "learning_rate": 4.243526406166939e-05, + "loss": 0.379, + "step": 3241000 + }, + { + "epoch": 1.94, + "learning_rate": 4.243316409610882e-05, + "loss": 0.3665, + "step": 3241500 + }, + { + "epoch": 1.94, + "learning_rate": 4.243106413054826e-05, + "loss": 0.3595, + "step": 3242000 + }, + { + "epoch": 1.94, + "learning_rate": 4.242896416498769e-05, + "loss": 0.3716, + "step": 3242500 + }, + { + "epoch": 1.94, + "learning_rate": 4.2426864199427125e-05, + "loss": 0.3652, + "step": 3243000 + }, + { + "epoch": 1.94, + "learning_rate": 4.2424764233866565e-05, + "loss": 0.3568, + "step": 3243500 + }, + { + "epoch": 1.94, + "learning_rate": 4.2422664268306e-05, + "loss": 0.3638, + "step": 3244000 + }, + { + "epoch": 1.95, + "learning_rate": 4.242057270260768e-05, + "loss": 0.3734, + "step": 3244500 + }, + { + "epoch": 1.95, + "learning_rate": 4.241847273704711e-05, + "loss": 0.3595, + "step": 3245000 + }, + { + "epoch": 1.95, + "learning_rate": 4.2416376971417666e-05, + "loss": 0.3792, + "step": 3245500 + }, + { + "epoch": 1.95, + "learning_rate": 4.2414277005857106e-05, + "loss": 0.3656, + "step": 3246000 + }, + { + "epoch": 1.95, + "learning_rate": 4.241218544015878e-05, + "loss": 0.36, + "step": 3246500 + }, + { + "epoch": 1.95, + "learning_rate": 4.241008547459822e-05, + "loss": 0.3595, + "step": 3247000 + }, + { + "epoch": 1.95, + "learning_rate": 4.240798550903765e-05, + "loss": 0.3653, + "step": 3247500 + }, + { + "epoch": 1.95, + "learning_rate": 4.240588554347709e-05, + "loss": 0.3615, + "step": 3248000 + }, + { + "epoch": 1.95, + "learning_rate": 4.240378557791652e-05, + "loss": 0.3538, + "step": 3248500 + }, + { + "epoch": 1.95, + "learning_rate": 4.2401685612355954e-05, + "loss": 0.3677, + "step": 3249000 + }, + { + "epoch": 1.95, + "learning_rate": 4.2399585646795394e-05, + "loss": 0.3725, + "step": 3249500 + }, + { + "epoch": 1.95, + "learning_rate": 4.239748568123483e-05, + "loss": 0.3538, + "step": 3250000 + }, + { + "epoch": 1.95, + "learning_rate": 4.239538571567426e-05, + "loss": 0.3745, + "step": 3250500 + }, + { + "epoch": 1.95, + "learning_rate": 4.23932857501137e-05, + "loss": 0.3572, + "step": 3251000 + }, + { + "epoch": 1.95, + "learning_rate": 4.2391185784553135e-05, + "loss": 0.3628, + "step": 3251500 + }, + { + "epoch": 1.95, + "learning_rate": 4.238908581899257e-05, + "loss": 0.366, + "step": 3252000 + }, + { + "epoch": 1.95, + "learning_rate": 4.238698585343201e-05, + "loss": 0.3655, + "step": 3252500 + }, + { + "epoch": 1.95, + "learning_rate": 4.238488588787144e-05, + "loss": 0.3631, + "step": 3253000 + }, + { + "epoch": 1.95, + "learning_rate": 4.238278592231087e-05, + "loss": 0.3624, + "step": 3253500 + }, + { + "epoch": 1.95, + "learning_rate": 4.238068595675031e-05, + "loss": 0.3507, + "step": 3254000 + }, + { + "epoch": 1.95, + "learning_rate": 4.237858599118974e-05, + "loss": 0.3612, + "step": 3254500 + }, + { + "epoch": 1.95, + "learning_rate": 4.2376486025629176e-05, + "loss": 0.3562, + "step": 3255000 + }, + { + "epoch": 1.95, + "learning_rate": 4.2374390259999736e-05, + "loss": 0.3699, + "step": 3255500 + }, + { + "epoch": 1.95, + "learning_rate": 4.237229029443917e-05, + "loss": 0.3608, + "step": 3256000 + }, + { + "epoch": 1.95, + "learning_rate": 4.23701903288786e-05, + "loss": 0.3601, + "step": 3256500 + }, + { + "epoch": 1.95, + "learning_rate": 4.2368090363318036e-05, + "loss": 0.361, + "step": 3257000 + }, + { + "epoch": 1.95, + "learning_rate": 4.2365990397757477e-05, + "loss": 0.3609, + "step": 3257500 + }, + { + "epoch": 1.95, + "learning_rate": 4.236389043219691e-05, + "loss": 0.3633, + "step": 3258000 + }, + { + "epoch": 1.95, + "learning_rate": 4.2361794666567464e-05, + "loss": 0.3643, + "step": 3258500 + }, + { + "epoch": 1.95, + "learning_rate": 4.2359694701006904e-05, + "loss": 0.3575, + "step": 3259000 + }, + { + "epoch": 1.95, + "learning_rate": 4.2357598935377464e-05, + "loss": 0.3615, + "step": 3259500 + }, + { + "epoch": 1.95, + "learning_rate": 4.23554989698169e-05, + "loss": 0.3584, + "step": 3260000 + }, + { + "epoch": 1.95, + "learning_rate": 4.2353399004256324e-05, + "loss": 0.3541, + "step": 3260500 + }, + { + "epoch": 1.96, + "learning_rate": 4.2351299038695765e-05, + "loss": 0.3582, + "step": 3261000 + }, + { + "epoch": 1.96, + "learning_rate": 4.23491990731352e-05, + "loss": 0.363, + "step": 3261500 + }, + { + "epoch": 1.96, + "learning_rate": 4.234709910757463e-05, + "loss": 0.3605, + "step": 3262000 + }, + { + "epoch": 1.96, + "learning_rate": 4.234499914201407e-05, + "loss": 0.3661, + "step": 3262500 + }, + { + "epoch": 1.96, + "learning_rate": 4.2342899176453505e-05, + "loss": 0.3453, + "step": 3263000 + }, + { + "epoch": 1.96, + "learning_rate": 4.234079921089294e-05, + "loss": 0.3768, + "step": 3263500 + }, + { + "epoch": 1.96, + "learning_rate": 4.233869924533238e-05, + "loss": 0.353, + "step": 3264000 + }, + { + "epoch": 1.96, + "learning_rate": 4.233659927977181e-05, + "loss": 0.357, + "step": 3264500 + }, + { + "epoch": 1.96, + "learning_rate": 4.2334503514142366e-05, + "loss": 0.3658, + "step": 3265000 + }, + { + "epoch": 1.96, + "learning_rate": 4.23324035485818e-05, + "loss": 0.3747, + "step": 3265500 + }, + { + "epoch": 1.96, + "learning_rate": 4.233030358302124e-05, + "loss": 0.364, + "step": 3266000 + }, + { + "epoch": 1.96, + "learning_rate": 4.232820361746067e-05, + "loss": 0.3613, + "step": 3266500 + }, + { + "epoch": 1.96, + "learning_rate": 4.232610785183123e-05, + "loss": 0.3729, + "step": 3267000 + }, + { + "epoch": 1.96, + "learning_rate": 4.232400788627067e-05, + "loss": 0.3606, + "step": 3267500 + }, + { + "epoch": 1.96, + "learning_rate": 4.23219079207101e-05, + "loss": 0.3624, + "step": 3268000 + }, + { + "epoch": 1.96, + "learning_rate": 4.2319807955149534e-05, + "loss": 0.364, + "step": 3268500 + }, + { + "epoch": 1.96, + "learning_rate": 4.2317707989588974e-05, + "loss": 0.3573, + "step": 3269000 + }, + { + "epoch": 1.96, + "learning_rate": 4.231560802402841e-05, + "loss": 0.3696, + "step": 3269500 + }, + { + "epoch": 1.96, + "learning_rate": 4.231350805846784e-05, + "loss": 0.3617, + "step": 3270000 + }, + { + "epoch": 1.96, + "learning_rate": 4.231140809290728e-05, + "loss": 0.3592, + "step": 3270500 + }, + { + "epoch": 1.96, + "learning_rate": 4.2309312327277835e-05, + "loss": 0.3618, + "step": 3271000 + }, + { + "epoch": 1.96, + "learning_rate": 4.230721236171727e-05, + "loss": 0.353, + "step": 3271500 + }, + { + "epoch": 1.96, + "learning_rate": 4.23051123961567e-05, + "loss": 0.3611, + "step": 3272000 + }, + { + "epoch": 1.96, + "learning_rate": 4.230301243059614e-05, + "loss": 0.3608, + "step": 3272500 + }, + { + "epoch": 1.96, + "learning_rate": 4.230091246503557e-05, + "loss": 0.3597, + "step": 3273000 + }, + { + "epoch": 1.96, + "learning_rate": 4.229881249947501e-05, + "loss": 0.3555, + "step": 3273500 + }, + { + "epoch": 1.96, + "learning_rate": 4.229671253391444e-05, + "loss": 0.361, + "step": 3274000 + }, + { + "epoch": 1.96, + "learning_rate": 4.2294616768285e-05, + "loss": 0.3682, + "step": 3274500 + }, + { + "epoch": 1.96, + "learning_rate": 4.2292516802724436e-05, + "loss": 0.3557, + "step": 3275000 + }, + { + "epoch": 1.96, + "learning_rate": 4.229041683716387e-05, + "loss": 0.3583, + "step": 3275500 + }, + { + "epoch": 1.96, + "learning_rate": 4.22883168716033e-05, + "loss": 0.3672, + "step": 3276000 + }, + { + "epoch": 1.96, + "learning_rate": 4.2286216906042736e-05, + "loss": 0.359, + "step": 3276500 + }, + { + "epoch": 1.96, + "learning_rate": 4.22841211404133e-05, + "loss": 0.3572, + "step": 3277000 + }, + { + "epoch": 1.96, + "learning_rate": 4.228202117485274e-05, + "loss": 0.3593, + "step": 3277500 + }, + { + "epoch": 1.97, + "learning_rate": 4.2279921209292164e-05, + "loss": 0.3679, + "step": 3278000 + }, + { + "epoch": 1.97, + "learning_rate": 4.22778212437316e-05, + "loss": 0.3659, + "step": 3278500 + }, + { + "epoch": 1.97, + "learning_rate": 4.227572127817104e-05, + "loss": 0.3583, + "step": 3279000 + }, + { + "epoch": 1.97, + "learning_rate": 4.227362131261047e-05, + "loss": 0.351, + "step": 3279500 + }, + { + "epoch": 1.97, + "learning_rate": 4.227152554698103e-05, + "loss": 0.3718, + "step": 3280000 + }, + { + "epoch": 1.97, + "learning_rate": 4.2269425581420465e-05, + "loss": 0.3554, + "step": 3280500 + }, + { + "epoch": 1.97, + "learning_rate": 4.22673256158599e-05, + "loss": 0.3614, + "step": 3281000 + }, + { + "epoch": 1.97, + "learning_rate": 4.226522565029933e-05, + "loss": 0.3609, + "step": 3281500 + }, + { + "epoch": 1.97, + "learning_rate": 4.226312988466989e-05, + "loss": 0.3661, + "step": 3282000 + }, + { + "epoch": 1.97, + "learning_rate": 4.2261034119040446e-05, + "loss": 0.3505, + "step": 3282500 + }, + { + "epoch": 1.97, + "learning_rate": 4.2258934153479886e-05, + "loss": 0.3607, + "step": 3283000 + }, + { + "epoch": 1.97, + "learning_rate": 4.225683418791932e-05, + "loss": 0.3661, + "step": 3283500 + }, + { + "epoch": 1.97, + "learning_rate": 4.225473422235875e-05, + "loss": 0.3656, + "step": 3284000 + }, + { + "epoch": 1.97, + "learning_rate": 4.225263425679819e-05, + "loss": 0.3582, + "step": 3284500 + }, + { + "epoch": 1.97, + "learning_rate": 4.225053429123762e-05, + "loss": 0.363, + "step": 3285000 + }, + { + "epoch": 1.97, + "learning_rate": 4.224843432567705e-05, + "loss": 0.3613, + "step": 3285500 + }, + { + "epoch": 1.97, + "learning_rate": 4.224633436011649e-05, + "loss": 0.3649, + "step": 3286000 + }, + { + "epoch": 1.97, + "learning_rate": 4.224423439455593e-05, + "loss": 0.3628, + "step": 3286500 + }, + { + "epoch": 1.97, + "learning_rate": 4.224213442899536e-05, + "loss": 0.3572, + "step": 3287000 + }, + { + "epoch": 1.97, + "learning_rate": 4.22400344634348e-05, + "loss": 0.3549, + "step": 3287500 + }, + { + "epoch": 1.97, + "learning_rate": 4.2237934497874234e-05, + "loss": 0.3693, + "step": 3288000 + }, + { + "epoch": 1.97, + "learning_rate": 4.223583873224479e-05, + "loss": 0.3684, + "step": 3288500 + }, + { + "epoch": 1.97, + "learning_rate": 4.223373876668423e-05, + "loss": 0.3578, + "step": 3289000 + }, + { + "epoch": 1.97, + "learning_rate": 4.223163880112366e-05, + "loss": 0.3549, + "step": 3289500 + }, + { + "epoch": 1.97, + "learning_rate": 4.2229538835563095e-05, + "loss": 0.3506, + "step": 3290000 + }, + { + "epoch": 1.97, + "learning_rate": 4.2227438870002535e-05, + "loss": 0.3606, + "step": 3290500 + }, + { + "epoch": 1.97, + "learning_rate": 4.222534310437309e-05, + "loss": 0.3573, + "step": 3291000 + }, + { + "epoch": 1.97, + "learning_rate": 4.222324313881252e-05, + "loss": 0.3749, + "step": 3291500 + }, + { + "epoch": 1.97, + "learning_rate": 4.222114737318308e-05, + "loss": 0.3619, + "step": 3292000 + }, + { + "epoch": 1.97, + "learning_rate": 4.221904740762251e-05, + "loss": 0.3624, + "step": 3292500 + }, + { + "epoch": 1.97, + "learning_rate": 4.221694744206195e-05, + "loss": 0.3684, + "step": 3293000 + }, + { + "epoch": 1.97, + "learning_rate": 4.221484747650138e-05, + "loss": 0.3564, + "step": 3293500 + }, + { + "epoch": 1.97, + "learning_rate": 4.2212747510940816e-05, + "loss": 0.3519, + "step": 3294000 + }, + { + "epoch": 1.98, + "learning_rate": 4.2210651745311376e-05, + "loss": 0.3635, + "step": 3294500 + }, + { + "epoch": 1.98, + "learning_rate": 4.220855177975081e-05, + "loss": 0.3519, + "step": 3295000 + }, + { + "epoch": 1.98, + "learning_rate": 4.220645181419024e-05, + "loss": 0.3731, + "step": 3295500 + }, + { + "epoch": 1.98, + "learning_rate": 4.2204351848629684e-05, + "loss": 0.3678, + "step": 3296000 + }, + { + "epoch": 1.98, + "learning_rate": 4.2202256083000244e-05, + "loss": 0.3692, + "step": 3296500 + }, + { + "epoch": 1.98, + "learning_rate": 4.220015611743967e-05, + "loss": 0.3598, + "step": 3297000 + }, + { + "epoch": 1.98, + "learning_rate": 4.2198056151879104e-05, + "loss": 0.357, + "step": 3297500 + }, + { + "epoch": 1.98, + "learning_rate": 4.2195956186318544e-05, + "loss": 0.3647, + "step": 3298000 + }, + { + "epoch": 1.98, + "learning_rate": 4.219385622075798e-05, + "loss": 0.3591, + "step": 3298500 + }, + { + "epoch": 1.98, + "learning_rate": 4.219175625519741e-05, + "loss": 0.3622, + "step": 3299000 + }, + { + "epoch": 1.98, + "learning_rate": 4.2189660489567965e-05, + "loss": 0.3644, + "step": 3299500 + }, + { + "epoch": 1.98, + "learning_rate": 4.2187564723938525e-05, + "loss": 0.3805, + "step": 3300000 + }, + { + "epoch": 1.98, + "eval_loss": 0.34579384326934814, + "eval_runtime": 1120.1725, + "eval_samples_per_second": 470.213, + "eval_steps_per_second": 78.369, + "step": 3300000 + }, + { + "epoch": 1.98, + "learning_rate": 4.2185464758377965e-05, + "loss": 0.3607, + "step": 3300500 + }, + { + "epoch": 1.98, + "learning_rate": 4.21833647928174e-05, + "loss": 0.3694, + "step": 3301000 + }, + { + "epoch": 1.98, + "learning_rate": 4.218126482725683e-05, + "loss": 0.3614, + "step": 3301500 + }, + { + "epoch": 1.98, + "learning_rate": 4.2179164861696266e-05, + "loss": 0.3571, + "step": 3302000 + }, + { + "epoch": 1.98, + "learning_rate": 4.21770648961357e-05, + "loss": 0.3564, + "step": 3302500 + }, + { + "epoch": 1.98, + "learning_rate": 4.217496493057514e-05, + "loss": 0.3531, + "step": 3303000 + }, + { + "epoch": 1.98, + "learning_rate": 4.217286496501457e-05, + "loss": 0.3571, + "step": 3303500 + }, + { + "epoch": 1.98, + "learning_rate": 4.2170764999454006e-05, + "loss": 0.3514, + "step": 3304000 + }, + { + "epoch": 1.98, + "learning_rate": 4.216866503389345e-05, + "loss": 0.3602, + "step": 3304500 + }, + { + "epoch": 1.98, + "learning_rate": 4.216656506833288e-05, + "loss": 0.3608, + "step": 3305000 + }, + { + "epoch": 1.98, + "learning_rate": 4.2164465102772314e-05, + "loss": 0.3678, + "step": 3305500 + }, + { + "epoch": 1.98, + "learning_rate": 4.216236933714287e-05, + "loss": 0.3614, + "step": 3306000 + }, + { + "epoch": 1.98, + "learning_rate": 4.216026937158231e-05, + "loss": 0.3637, + "step": 3306500 + }, + { + "epoch": 1.98, + "learning_rate": 4.215817360595286e-05, + "loss": 0.3667, + "step": 3307000 + }, + { + "epoch": 1.98, + "learning_rate": 4.2156073640392294e-05, + "loss": 0.3603, + "step": 3307500 + }, + { + "epoch": 1.98, + "learning_rate": 4.215397367483173e-05, + "loss": 0.3588, + "step": 3308000 + }, + { + "epoch": 1.98, + "learning_rate": 4.215187370927117e-05, + "loss": 0.3624, + "step": 3308500 + }, + { + "epoch": 1.98, + "learning_rate": 4.21497737437106e-05, + "loss": 0.3627, + "step": 3309000 + }, + { + "epoch": 1.98, + "learning_rate": 4.2147673778150035e-05, + "loss": 0.3651, + "step": 3309500 + }, + { + "epoch": 1.98, + "learning_rate": 4.2145578012520595e-05, + "loss": 0.3631, + "step": 3310000 + }, + { + "epoch": 1.98, + "learning_rate": 4.214347804696003e-05, + "loss": 0.3606, + "step": 3310500 + }, + { + "epoch": 1.99, + "learning_rate": 4.214137808139946e-05, + "loss": 0.3634, + "step": 3311000 + }, + { + "epoch": 1.99, + "learning_rate": 4.21392781158389e-05, + "loss": 0.3516, + "step": 3311500 + }, + { + "epoch": 1.99, + "learning_rate": 4.2137178150278336e-05, + "loss": 0.355, + "step": 3312000 + }, + { + "epoch": 1.99, + "learning_rate": 4.213507818471777e-05, + "loss": 0.3601, + "step": 3312500 + }, + { + "epoch": 1.99, + "learning_rate": 4.213298241908832e-05, + "loss": 0.3642, + "step": 3313000 + }, + { + "epoch": 1.99, + "learning_rate": 4.213088245352776e-05, + "loss": 0.3616, + "step": 3313500 + }, + { + "epoch": 1.99, + "learning_rate": 4.21287824879672e-05, + "loss": 0.3675, + "step": 3314000 + }, + { + "epoch": 1.99, + "learning_rate": 4.212668252240663e-05, + "loss": 0.3652, + "step": 3314500 + }, + { + "epoch": 1.99, + "learning_rate": 4.212458255684607e-05, + "loss": 0.3533, + "step": 3315000 + }, + { + "epoch": 1.99, + "learning_rate": 4.2122482591285504e-05, + "loss": 0.3666, + "step": 3315500 + }, + { + "epoch": 1.99, + "learning_rate": 4.212038262572494e-05, + "loss": 0.3579, + "step": 3316000 + }, + { + "epoch": 1.99, + "learning_rate": 4.211828266016438e-05, + "loss": 0.363, + "step": 3316500 + }, + { + "epoch": 1.99, + "learning_rate": 4.211618689453493e-05, + "loss": 0.3626, + "step": 3317000 + }, + { + "epoch": 1.99, + "learning_rate": 4.2114086928974365e-05, + "loss": 0.3603, + "step": 3317500 + }, + { + "epoch": 1.99, + "learning_rate": 4.21119869634138e-05, + "loss": 0.3699, + "step": 3318000 + }, + { + "epoch": 1.99, + "learning_rate": 4.210988699785324e-05, + "loss": 0.3706, + "step": 3318500 + }, + { + "epoch": 1.99, + "learning_rate": 4.210778703229267e-05, + "loss": 0.3706, + "step": 3319000 + }, + { + "epoch": 1.99, + "learning_rate": 4.2105687066732105e-05, + "loss": 0.3628, + "step": 3319500 + }, + { + "epoch": 1.99, + "learning_rate": 4.210358710117154e-05, + "loss": 0.3655, + "step": 3320000 + }, + { + "epoch": 1.99, + "learning_rate": 4.210148713561097e-05, + "loss": 0.3576, + "step": 3320500 + }, + { + "epoch": 1.99, + "learning_rate": 4.209939556991265e-05, + "loss": 0.3685, + "step": 3321000 + }, + { + "epoch": 1.99, + "learning_rate": 4.2097295604352086e-05, + "loss": 0.3583, + "step": 3321500 + }, + { + "epoch": 1.99, + "learning_rate": 4.2095195638791526e-05, + "loss": 0.3632, + "step": 3322000 + }, + { + "epoch": 1.99, + "learning_rate": 4.209309567323096e-05, + "loss": 0.3645, + "step": 3322500 + }, + { + "epoch": 1.99, + "learning_rate": 4.209099570767039e-05, + "loss": 0.3538, + "step": 3323000 + }, + { + "epoch": 1.99, + "learning_rate": 4.2088895742109833e-05, + "loss": 0.3573, + "step": 3323500 + }, + { + "epoch": 1.99, + "learning_rate": 4.208679577654926e-05, + "loss": 0.3662, + "step": 3324000 + }, + { + "epoch": 1.99, + "learning_rate": 4.2084695810988694e-05, + "loss": 0.3514, + "step": 3324500 + }, + { + "epoch": 1.99, + "learning_rate": 4.2082600045359254e-05, + "loss": 0.3643, + "step": 3325000 + }, + { + "epoch": 1.99, + "learning_rate": 4.2080504279729814e-05, + "loss": 0.3586, + "step": 3325500 + }, + { + "epoch": 1.99, + "learning_rate": 4.207840431416925e-05, + "loss": 0.3654, + "step": 3326000 + }, + { + "epoch": 1.99, + "learning_rate": 4.20763085485398e-05, + "loss": 0.3687, + "step": 3326500 + }, + { + "epoch": 1.99, + "learning_rate": 4.2074208582979235e-05, + "loss": 0.3581, + "step": 3327000 + }, + { + "epoch": 1.99, + "learning_rate": 4.2072108617418675e-05, + "loss": 0.3599, + "step": 3327500 + }, + { + "epoch": 2.0, + "learning_rate": 4.207000865185811e-05, + "loss": 0.3575, + "step": 3328000 + }, + { + "epoch": 2.0, + "learning_rate": 4.206790868629754e-05, + "loss": 0.3627, + "step": 3328500 + }, + { + "epoch": 2.0, + "learning_rate": 4.206580872073698e-05, + "loss": 0.3653, + "step": 3329000 + }, + { + "epoch": 2.0, + "learning_rate": 4.2063708755176416e-05, + "loss": 0.3603, + "step": 3329500 + }, + { + "epoch": 2.0, + "learning_rate": 4.206160878961585e-05, + "loss": 0.3689, + "step": 3330000 + }, + { + "epoch": 2.0, + "learning_rate": 4.205950882405529e-05, + "loss": 0.3625, + "step": 3330500 + }, + { + "epoch": 2.0, + "learning_rate": 4.2057408858494716e-05, + "loss": 0.3641, + "step": 3331000 + }, + { + "epoch": 2.0, + "learning_rate": 4.2055313092865276e-05, + "loss": 0.3554, + "step": 3331500 + }, + { + "epoch": 2.0, + "learning_rate": 4.2053213127304717e-05, + "loss": 0.3631, + "step": 3332000 + }, + { + "epoch": 2.0, + "learning_rate": 4.205111316174415e-05, + "loss": 0.354, + "step": 3332500 + }, + { + "epoch": 2.0, + "learning_rate": 4.2049013196183583e-05, + "loss": 0.3631, + "step": 3333000 + }, + { + "epoch": 2.0, + "learning_rate": 4.204691743055414e-05, + "loss": 0.3657, + "step": 3333500 + }, + { + "epoch": 2.0, + "learning_rate": 4.204481746499358e-05, + "loss": 0.3536, + "step": 3334000 + }, + { + "epoch": 2.0, + "learning_rate": 4.204271749943301e-05, + "loss": 0.3682, + "step": 3334500 + }, + { + "epoch": 2.0, + "learning_rate": 4.2040621733803564e-05, + "loss": 0.3757, + "step": 3335000 + }, + { + "epoch": 2.0, + "learning_rate": 4.2038521768243e-05, + "loss": 0.3511, + "step": 3335500 + }, + { + "epoch": 2.0, + "learning_rate": 4.203642180268244e-05, + "loss": 0.3625, + "step": 3336000 + }, + { + "epoch": 2.0, + "learning_rate": 4.203432183712187e-05, + "loss": 0.3516, + "step": 3336500 + }, + { + "epoch": 2.0, + "learning_rate": 4.2032221871561305e-05, + "loss": 0.3507, + "step": 3337000 + }, + { + "epoch": 2.0, + "learning_rate": 4.2030121906000745e-05, + "loss": 0.354, + "step": 3337500 + }, + { + "epoch": 2.0, + "learning_rate": 4.202802194044018e-05, + "loss": 0.3475, + "step": 3338000 + }, + { + "epoch": 2.0, + "learning_rate": 4.202592617481073e-05, + "loss": 0.3563, + "step": 3338500 + }, + { + "epoch": 2.0, + "learning_rate": 4.202382620925017e-05, + "loss": 0.3444, + "step": 3339000 + }, + { + "epoch": 2.0, + "learning_rate": 4.2021726243689606e-05, + "loss": 0.354, + "step": 3339500 + }, + { + "epoch": 2.0, + "learning_rate": 4.201962627812904e-05, + "loss": 0.3498, + "step": 3340000 + }, + { + "epoch": 2.0, + "learning_rate": 4.201752631256847e-05, + "loss": 0.3572, + "step": 3340500 + }, + { + "epoch": 2.0, + "learning_rate": 4.2015426347007906e-05, + "loss": 0.3506, + "step": 3341000 + }, + { + "epoch": 2.0, + "learning_rate": 4.201332638144734e-05, + "loss": 0.3627, + "step": 3341500 + }, + { + "epoch": 2.0, + "learning_rate": 4.201122641588678e-05, + "loss": 0.3527, + "step": 3342000 + }, + { + "epoch": 2.0, + "learning_rate": 4.2009126450326213e-05, + "loss": 0.3512, + "step": 3342500 + }, + { + "epoch": 2.0, + "learning_rate": 4.200702648476565e-05, + "loss": 0.3599, + "step": 3343000 + }, + { + "epoch": 2.0, + "learning_rate": 4.20049307191362e-05, + "loss": 0.3481, + "step": 3343500 + }, + { + "epoch": 2.0, + "learning_rate": 4.200283075357564e-05, + "loss": 0.3587, + "step": 3344000 + }, + { + "epoch": 2.01, + "learning_rate": 4.2000730788015074e-05, + "loss": 0.3445, + "step": 3344500 + }, + { + "epoch": 2.01, + "learning_rate": 4.199863082245451e-05, + "loss": 0.3484, + "step": 3345000 + }, + { + "epoch": 2.01, + "learning_rate": 4.199653505682507e-05, + "loss": 0.3571, + "step": 3345500 + }, + { + "epoch": 2.01, + "learning_rate": 4.19944350912645e-05, + "loss": 0.3543, + "step": 3346000 + }, + { + "epoch": 2.01, + "learning_rate": 4.1992335125703935e-05, + "loss": 0.3435, + "step": 3346500 + }, + { + "epoch": 2.01, + "learning_rate": 4.1990235160143375e-05, + "loss": 0.3564, + "step": 3347000 + }, + { + "epoch": 2.01, + "learning_rate": 4.1988139394513935e-05, + "loss": 0.361, + "step": 3347500 + }, + { + "epoch": 2.01, + "learning_rate": 4.198603942895336e-05, + "loss": 0.3691, + "step": 3348000 + }, + { + "epoch": 2.01, + "learning_rate": 4.1983939463392796e-05, + "loss": 0.3596, + "step": 3348500 + }, + { + "epoch": 2.01, + "learning_rate": 4.1981839497832236e-05, + "loss": 0.3428, + "step": 3349000 + }, + { + "epoch": 2.01, + "learning_rate": 4.197973953227167e-05, + "loss": 0.3525, + "step": 3349500 + }, + { + "epoch": 2.01, + "learning_rate": 4.19776395667111e-05, + "loss": 0.3508, + "step": 3350000 + }, + { + "epoch": 2.01, + "learning_rate": 4.1975543801081656e-05, + "loss": 0.3436, + "step": 3350500 + }, + { + "epoch": 2.01, + "learning_rate": 4.1973443835521097e-05, + "loss": 0.3478, + "step": 3351000 + }, + { + "epoch": 2.01, + "learning_rate": 4.197134386996053e-05, + "loss": 0.3598, + "step": 3351500 + }, + { + "epoch": 2.01, + "learning_rate": 4.1969243904399963e-05, + "loss": 0.3591, + "step": 3352000 + }, + { + "epoch": 2.01, + "learning_rate": 4.1967143938839404e-05, + "loss": 0.3491, + "step": 3352500 + }, + { + "epoch": 2.01, + "learning_rate": 4.196504397327884e-05, + "loss": 0.3541, + "step": 3353000 + }, + { + "epoch": 2.01, + "learning_rate": 4.196294820764939e-05, + "loss": 0.3527, + "step": 3353500 + }, + { + "epoch": 2.01, + "learning_rate": 4.196084824208883e-05, + "loss": 0.346, + "step": 3354000 + }, + { + "epoch": 2.01, + "learning_rate": 4.1958748276528264e-05, + "loss": 0.3534, + "step": 3354500 + }, + { + "epoch": 2.01, + "learning_rate": 4.19566483109677e-05, + "loss": 0.3508, + "step": 3355000 + }, + { + "epoch": 2.01, + "learning_rate": 4.195454834540714e-05, + "loss": 0.3498, + "step": 3355500 + }, + { + "epoch": 2.01, + "learning_rate": 4.195244837984657e-05, + "loss": 0.3593, + "step": 3356000 + }, + { + "epoch": 2.01, + "learning_rate": 4.1950348414286005e-05, + "loss": 0.3484, + "step": 3356500 + }, + { + "epoch": 2.01, + "learning_rate": 4.194825264865656e-05, + "loss": 0.3551, + "step": 3357000 + }, + { + "epoch": 2.01, + "learning_rate": 4.1946152683096e-05, + "loss": 0.3616, + "step": 3357500 + }, + { + "epoch": 2.01, + "learning_rate": 4.194405271753543e-05, + "loss": 0.3512, + "step": 3358000 + }, + { + "epoch": 2.01, + "learning_rate": 4.1941952751974866e-05, + "loss": 0.3537, + "step": 3358500 + }, + { + "epoch": 2.01, + "learning_rate": 4.193985698634542e-05, + "loss": 0.3553, + "step": 3359000 + }, + { + "epoch": 2.01, + "learning_rate": 4.193775702078486e-05, + "loss": 0.3467, + "step": 3359500 + }, + { + "epoch": 2.01, + "learning_rate": 4.193565705522429e-05, + "loss": 0.3465, + "step": 3360000 + }, + { + "epoch": 2.01, + "learning_rate": 4.1933557089663727e-05, + "loss": 0.3571, + "step": 3360500 + }, + { + "epoch": 2.02, + "learning_rate": 4.193145712410317e-05, + "loss": 0.349, + "step": 3361000 + }, + { + "epoch": 2.02, + "learning_rate": 4.19293571585426e-05, + "loss": 0.3504, + "step": 3361500 + }, + { + "epoch": 2.02, + "learning_rate": 4.1927257192982034e-05, + "loss": 0.3422, + "step": 3362000 + }, + { + "epoch": 2.02, + "learning_rate": 4.1925157227421474e-05, + "loss": 0.3553, + "step": 3362500 + }, + { + "epoch": 2.02, + "learning_rate": 4.19230572618609e-05, + "loss": 0.3512, + "step": 3363000 + }, + { + "epoch": 2.02, + "learning_rate": 4.192095729630034e-05, + "loss": 0.3454, + "step": 3363500 + }, + { + "epoch": 2.02, + "learning_rate": 4.19188615306709e-05, + "loss": 0.3533, + "step": 3364000 + }, + { + "epoch": 2.02, + "learning_rate": 4.1916761565110335e-05, + "loss": 0.359, + "step": 3364500 + }, + { + "epoch": 2.02, + "learning_rate": 4.191466159954977e-05, + "loss": 0.3509, + "step": 3365000 + }, + { + "epoch": 2.02, + "learning_rate": 4.19125616339892e-05, + "loss": 0.3518, + "step": 3365500 + }, + { + "epoch": 2.02, + "learning_rate": 4.191046586835976e-05, + "loss": 0.3476, + "step": 3366000 + }, + { + "epoch": 2.02, + "learning_rate": 4.1908365902799195e-05, + "loss": 0.3508, + "step": 3366500 + }, + { + "epoch": 2.02, + "learning_rate": 4.190626593723863e-05, + "loss": 0.3429, + "step": 3367000 + }, + { + "epoch": 2.02, + "learning_rate": 4.190416597167806e-05, + "loss": 0.3552, + "step": 3367500 + }, + { + "epoch": 2.02, + "learning_rate": 4.1902066006117496e-05, + "loss": 0.3459, + "step": 3368000 + }, + { + "epoch": 2.02, + "learning_rate": 4.1899970240488056e-05, + "loss": 0.3507, + "step": 3368500 + }, + { + "epoch": 2.02, + "learning_rate": 4.189787027492749e-05, + "loss": 0.3557, + "step": 3369000 + }, + { + "epoch": 2.02, + "learning_rate": 4.189577030936693e-05, + "loss": 0.3542, + "step": 3369500 + }, + { + "epoch": 2.02, + "learning_rate": 4.1893670343806356e-05, + "loss": 0.3528, + "step": 3370000 + }, + { + "epoch": 2.02, + "learning_rate": 4.189157457817692e-05, + "loss": 0.3603, + "step": 3370500 + }, + { + "epoch": 2.02, + "learning_rate": 4.188947461261636e-05, + "loss": 0.3551, + "step": 3371000 + }, + { + "epoch": 2.02, + "learning_rate": 4.188737464705579e-05, + "loss": 0.3511, + "step": 3371500 + }, + { + "epoch": 2.02, + "learning_rate": 4.1885274681495224e-05, + "loss": 0.3597, + "step": 3372000 + }, + { + "epoch": 2.02, + "learning_rate": 4.188317891586578e-05, + "loss": 0.3486, + "step": 3372500 + }, + { + "epoch": 2.02, + "learning_rate": 4.188107895030522e-05, + "loss": 0.3581, + "step": 3373000 + }, + { + "epoch": 2.02, + "learning_rate": 4.187897898474465e-05, + "loss": 0.359, + "step": 3373500 + }, + { + "epoch": 2.02, + "learning_rate": 4.1876879019184085e-05, + "loss": 0.3535, + "step": 3374000 + }, + { + "epoch": 2.02, + "learning_rate": 4.1874779053623525e-05, + "loss": 0.3633, + "step": 3374500 + }, + { + "epoch": 2.02, + "learning_rate": 4.187267908806295e-05, + "loss": 0.3444, + "step": 3375000 + }, + { + "epoch": 2.02, + "learning_rate": 4.187058332243351e-05, + "loss": 0.3419, + "step": 3375500 + }, + { + "epoch": 2.02, + "learning_rate": 4.1868483356872945e-05, + "loss": 0.3621, + "step": 3376000 + }, + { + "epoch": 2.02, + "learning_rate": 4.1866383391312386e-05, + "loss": 0.3562, + "step": 3376500 + }, + { + "epoch": 2.02, + "learning_rate": 4.186428342575181e-05, + "loss": 0.3543, + "step": 3377000 + }, + { + "epoch": 2.02, + "learning_rate": 4.186218766012237e-05, + "loss": 0.363, + "step": 3377500 + }, + { + "epoch": 2.03, + "learning_rate": 4.186008769456181e-05, + "loss": 0.3489, + "step": 3378000 + }, + { + "epoch": 2.03, + "learning_rate": 4.1857987729001246e-05, + "loss": 0.3614, + "step": 3378500 + }, + { + "epoch": 2.03, + "learning_rate": 4.185588776344068e-05, + "loss": 0.3519, + "step": 3379000 + }, + { + "epoch": 2.03, + "learning_rate": 4.185378779788011e-05, + "loss": 0.3567, + "step": 3379500 + }, + { + "epoch": 2.03, + "learning_rate": 4.185168783231955e-05, + "loss": 0.354, + "step": 3380000 + }, + { + "epoch": 2.03, + "learning_rate": 4.184958786675898e-05, + "loss": 0.3541, + "step": 3380500 + }, + { + "epoch": 2.03, + "learning_rate": 4.184748790119842e-05, + "loss": 0.3562, + "step": 3381000 + }, + { + "epoch": 2.03, + "learning_rate": 4.1845387935637854e-05, + "loss": 0.3533, + "step": 3381500 + }, + { + "epoch": 2.03, + "learning_rate": 4.184328797007729e-05, + "loss": 0.3498, + "step": 3382000 + }, + { + "epoch": 2.03, + "learning_rate": 4.184118800451673e-05, + "loss": 0.3471, + "step": 3382500 + }, + { + "epoch": 2.03, + "learning_rate": 4.183908803895616e-05, + "loss": 0.3639, + "step": 3383000 + }, + { + "epoch": 2.03, + "learning_rate": 4.1836992273326715e-05, + "loss": 0.3463, + "step": 3383500 + }, + { + "epoch": 2.03, + "learning_rate": 4.1834896507697275e-05, + "loss": 0.3611, + "step": 3384000 + }, + { + "epoch": 2.03, + "learning_rate": 4.183280074206783e-05, + "loss": 0.3513, + "step": 3384500 + }, + { + "epoch": 2.03, + "learning_rate": 4.183070077650727e-05, + "loss": 0.3594, + "step": 3385000 + }, + { + "epoch": 2.03, + "learning_rate": 4.18286008109467e-05, + "loss": 0.3595, + "step": 3385500 + }, + { + "epoch": 2.03, + "learning_rate": 4.1826500845386136e-05, + "loss": 0.345, + "step": 3386000 + }, + { + "epoch": 2.03, + "learning_rate": 4.182440087982557e-05, + "loss": 0.3513, + "step": 3386500 + }, + { + "epoch": 2.03, + "learning_rate": 4.1822300914265e-05, + "loss": 0.3459, + "step": 3387000 + }, + { + "epoch": 2.03, + "learning_rate": 4.1820200948704436e-05, + "loss": 0.3461, + "step": 3387500 + }, + { + "epoch": 2.03, + "learning_rate": 4.1818100983143876e-05, + "loss": 0.36, + "step": 3388000 + }, + { + "epoch": 2.03, + "learning_rate": 4.181600101758331e-05, + "loss": 0.3505, + "step": 3388500 + }, + { + "epoch": 2.03, + "learning_rate": 4.181390105202274e-05, + "loss": 0.3561, + "step": 3389000 + }, + { + "epoch": 2.03, + "learning_rate": 4.1811801086462183e-05, + "loss": 0.3492, + "step": 3389500 + }, + { + "epoch": 2.03, + "learning_rate": 4.180970532083274e-05, + "loss": 0.3571, + "step": 3390000 + }, + { + "epoch": 2.03, + "learning_rate": 4.18076095552033e-05, + "loss": 0.3528, + "step": 3390500 + }, + { + "epoch": 2.03, + "learning_rate": 4.180550958964273e-05, + "loss": 0.3514, + "step": 3391000 + }, + { + "epoch": 2.03, + "learning_rate": 4.1803409624082164e-05, + "loss": 0.3484, + "step": 3391500 + }, + { + "epoch": 2.03, + "learning_rate": 4.18013096585216e-05, + "loss": 0.343, + "step": 3392000 + }, + { + "epoch": 2.03, + "learning_rate": 4.179920969296103e-05, + "loss": 0.3589, + "step": 3392500 + }, + { + "epoch": 2.03, + "learning_rate": 4.179710972740047e-05, + "loss": 0.345, + "step": 3393000 + }, + { + "epoch": 2.03, + "learning_rate": 4.1795009761839905e-05, + "loss": 0.3511, + "step": 3393500 + }, + { + "epoch": 2.03, + "learning_rate": 4.179290979627934e-05, + "loss": 0.3571, + "step": 3394000 + }, + { + "epoch": 2.04, + "learning_rate": 4.179081403064989e-05, + "loss": 0.353, + "step": 3394500 + }, + { + "epoch": 2.04, + "learning_rate": 4.178871406508933e-05, + "loss": 0.3483, + "step": 3395000 + }, + { + "epoch": 2.04, + "learning_rate": 4.1786614099528766e-05, + "loss": 0.3587, + "step": 3395500 + }, + { + "epoch": 2.04, + "learning_rate": 4.17845141339682e-05, + "loss": 0.3517, + "step": 3396000 + }, + { + "epoch": 2.04, + "learning_rate": 4.178241416840764e-05, + "loss": 0.3511, + "step": 3396500 + }, + { + "epoch": 2.04, + "learning_rate": 4.178031420284707e-05, + "loss": 0.3573, + "step": 3397000 + }, + { + "epoch": 2.04, + "learning_rate": 4.1778214237286506e-05, + "loss": 0.3564, + "step": 3397500 + }, + { + "epoch": 2.04, + "learning_rate": 4.1776114271725947e-05, + "loss": 0.3589, + "step": 3398000 + }, + { + "epoch": 2.04, + "learning_rate": 4.17740185060965e-05, + "loss": 0.3594, + "step": 3398500 + }, + { + "epoch": 2.04, + "learning_rate": 4.1771918540535934e-05, + "loss": 0.3517, + "step": 3399000 + }, + { + "epoch": 2.04, + "learning_rate": 4.1769818574975374e-05, + "loss": 0.3529, + "step": 3399500 + }, + { + "epoch": 2.04, + "learning_rate": 4.176771860941481e-05, + "loss": 0.353, + "step": 3400000 + }, + { + "epoch": 2.04, + "eval_loss": 0.3463619649410248, + "eval_runtime": 1118.9984, + "eval_samples_per_second": 470.707, + "eval_steps_per_second": 78.451, + "step": 3400000 + }, + { + "epoch": 2.04, + "learning_rate": 4.176562284378536e-05, + "loss": 0.3509, + "step": 3400500 + }, + { + "epoch": 2.04, + "learning_rate": 4.1763522878224794e-05, + "loss": 0.36, + "step": 3401000 + }, + { + "epoch": 2.04, + "learning_rate": 4.1761422912664235e-05, + "loss": 0.3477, + "step": 3401500 + }, + { + "epoch": 2.04, + "learning_rate": 4.175932294710367e-05, + "loss": 0.352, + "step": 3402000 + }, + { + "epoch": 2.04, + "learning_rate": 4.175723138140535e-05, + "loss": 0.368, + "step": 3402500 + }, + { + "epoch": 2.04, + "learning_rate": 4.175513141584478e-05, + "loss": 0.3562, + "step": 3403000 + }, + { + "epoch": 2.04, + "learning_rate": 4.175303145028421e-05, + "loss": 0.3582, + "step": 3403500 + }, + { + "epoch": 2.04, + "learning_rate": 4.175093148472365e-05, + "loss": 0.358, + "step": 3404000 + }, + { + "epoch": 2.04, + "learning_rate": 4.174883151916308e-05, + "loss": 0.3594, + "step": 3404500 + }, + { + "epoch": 2.04, + "learning_rate": 4.174673155360252e-05, + "loss": 0.3644, + "step": 3405000 + }, + { + "epoch": 2.04, + "learning_rate": 4.174463578797308e-05, + "loss": 0.3521, + "step": 3405500 + }, + { + "epoch": 2.04, + "learning_rate": 4.174253582241251e-05, + "loss": 0.3513, + "step": 3406000 + }, + { + "epoch": 2.04, + "learning_rate": 4.174043585685194e-05, + "loss": 0.3517, + "step": 3406500 + }, + { + "epoch": 2.04, + "learning_rate": 4.173833589129138e-05, + "loss": 0.3647, + "step": 3407000 + }, + { + "epoch": 2.04, + "learning_rate": 4.173623592573082e-05, + "loss": 0.3679, + "step": 3407500 + }, + { + "epoch": 2.04, + "learning_rate": 4.173413596017025e-05, + "loss": 0.3557, + "step": 3408000 + }, + { + "epoch": 2.04, + "learning_rate": 4.1732040194540804e-05, + "loss": 0.3507, + "step": 3408500 + }, + { + "epoch": 2.04, + "learning_rate": 4.1729940228980244e-05, + "loss": 0.3527, + "step": 3409000 + }, + { + "epoch": 2.04, + "learning_rate": 4.172784026341968e-05, + "loss": 0.3637, + "step": 3409500 + }, + { + "epoch": 2.04, + "learning_rate": 4.172574029785911e-05, + "loss": 0.3514, + "step": 3410000 + }, + { + "epoch": 2.04, + "learning_rate": 4.172364453222967e-05, + "loss": 0.3566, + "step": 3410500 + }, + { + "epoch": 2.05, + "learning_rate": 4.1721544566669105e-05, + "loss": 0.3512, + "step": 3411000 + }, + { + "epoch": 2.05, + "learning_rate": 4.171944460110854e-05, + "loss": 0.3575, + "step": 3411500 + }, + { + "epoch": 2.05, + "learning_rate": 4.171734463554798e-05, + "loss": 0.3475, + "step": 3412000 + }, + { + "epoch": 2.05, + "learning_rate": 4.171524466998741e-05, + "loss": 0.3562, + "step": 3412500 + }, + { + "epoch": 2.05, + "learning_rate": 4.1713144704426845e-05, + "loss": 0.3514, + "step": 3413000 + }, + { + "epoch": 2.05, + "learning_rate": 4.1711044738866286e-05, + "loss": 0.3542, + "step": 3413500 + }, + { + "epoch": 2.05, + "learning_rate": 4.170894477330572e-05, + "loss": 0.3413, + "step": 3414000 + }, + { + "epoch": 2.05, + "learning_rate": 4.170684900767627e-05, + "loss": 0.3551, + "step": 3414500 + }, + { + "epoch": 2.05, + "learning_rate": 4.1704749042115706e-05, + "loss": 0.3593, + "step": 3415000 + }, + { + "epoch": 2.05, + "learning_rate": 4.1702649076555146e-05, + "loss": 0.351, + "step": 3415500 + }, + { + "epoch": 2.05, + "learning_rate": 4.170054911099458e-05, + "loss": 0.344, + "step": 3416000 + }, + { + "epoch": 2.05, + "learning_rate": 4.169845334536513e-05, + "loss": 0.3531, + "step": 3416500 + }, + { + "epoch": 2.05, + "learning_rate": 4.169635337980457e-05, + "loss": 0.3504, + "step": 3417000 + }, + { + "epoch": 2.05, + "learning_rate": 4.169425341424401e-05, + "loss": 0.3558, + "step": 3417500 + }, + { + "epoch": 2.05, + "learning_rate": 4.169215764861456e-05, + "loss": 0.3522, + "step": 3418000 + }, + { + "epoch": 2.05, + "learning_rate": 4.1690057683053994e-05, + "loss": 0.3481, + "step": 3418500 + }, + { + "epoch": 2.05, + "learning_rate": 4.1687957717493434e-05, + "loss": 0.3444, + "step": 3419000 + }, + { + "epoch": 2.05, + "learning_rate": 4.168585775193287e-05, + "loss": 0.3415, + "step": 3419500 + }, + { + "epoch": 2.05, + "learning_rate": 4.16837577863723e-05, + "loss": 0.35, + "step": 3420000 + }, + { + "epoch": 2.05, + "learning_rate": 4.168165782081174e-05, + "loss": 0.3493, + "step": 3420500 + }, + { + "epoch": 2.05, + "learning_rate": 4.1679557855251175e-05, + "loss": 0.348, + "step": 3421000 + }, + { + "epoch": 2.05, + "learning_rate": 4.167745788969061e-05, + "loss": 0.3438, + "step": 3421500 + }, + { + "epoch": 2.05, + "learning_rate": 4.167536212406116e-05, + "loss": 0.3485, + "step": 3422000 + }, + { + "epoch": 2.05, + "learning_rate": 4.16732621585006e-05, + "loss": 0.3541, + "step": 3422500 + }, + { + "epoch": 2.05, + "learning_rate": 4.1671162192940036e-05, + "loss": 0.3534, + "step": 3423000 + }, + { + "epoch": 2.05, + "learning_rate": 4.166906642731059e-05, + "loss": 0.3483, + "step": 3423500 + }, + { + "epoch": 2.05, + "learning_rate": 4.166696646175002e-05, + "loss": 0.3598, + "step": 3424000 + }, + { + "epoch": 2.05, + "learning_rate": 4.166486649618946e-05, + "loss": 0.3549, + "step": 3424500 + }, + { + "epoch": 2.05, + "learning_rate": 4.1662766530628896e-05, + "loss": 0.3711, + "step": 3425000 + }, + { + "epoch": 2.05, + "learning_rate": 4.166066656506833e-05, + "loss": 0.3656, + "step": 3425500 + }, + { + "epoch": 2.05, + "learning_rate": 4.165856659950777e-05, + "loss": 0.3587, + "step": 3426000 + }, + { + "epoch": 2.05, + "learning_rate": 4.1656466633947204e-05, + "loss": 0.356, + "step": 3426500 + }, + { + "epoch": 2.05, + "learning_rate": 4.165436666838664e-05, + "loss": 0.3557, + "step": 3427000 + }, + { + "epoch": 2.05, + "learning_rate": 4.165227510268831e-05, + "loss": 0.3471, + "step": 3427500 + }, + { + "epoch": 2.06, + "learning_rate": 4.165017513712775e-05, + "loss": 0.3626, + "step": 3428000 + }, + { + "epoch": 2.06, + "learning_rate": 4.1648075171567184e-05, + "loss": 0.3562, + "step": 3428500 + }, + { + "epoch": 2.06, + "learning_rate": 4.164597520600662e-05, + "loss": 0.3477, + "step": 3429000 + }, + { + "epoch": 2.06, + "learning_rate": 4.164387524044606e-05, + "loss": 0.3517, + "step": 3429500 + }, + { + "epoch": 2.06, + "learning_rate": 4.164177947481661e-05, + "loss": 0.3577, + "step": 3430000 + }, + { + "epoch": 2.06, + "learning_rate": 4.1639679509256045e-05, + "loss": 0.3547, + "step": 3430500 + }, + { + "epoch": 2.06, + "learning_rate": 4.163757954369548e-05, + "loss": 0.3466, + "step": 3431000 + }, + { + "epoch": 2.06, + "learning_rate": 4.163547957813492e-05, + "loss": 0.3495, + "step": 3431500 + }, + { + "epoch": 2.06, + "learning_rate": 4.163337961257435e-05, + "loss": 0.3605, + "step": 3432000 + }, + { + "epoch": 2.06, + "learning_rate": 4.1631279647013786e-05, + "loss": 0.362, + "step": 3432500 + }, + { + "epoch": 2.06, + "learning_rate": 4.1629183881384346e-05, + "loss": 0.3483, + "step": 3433000 + }, + { + "epoch": 2.06, + "learning_rate": 4.162708391582378e-05, + "loss": 0.3585, + "step": 3433500 + }, + { + "epoch": 2.06, + "learning_rate": 4.162498395026321e-05, + "loss": 0.3536, + "step": 3434000 + }, + { + "epoch": 2.06, + "learning_rate": 4.162288398470265e-05, + "loss": 0.3559, + "step": 3434500 + }, + { + "epoch": 2.06, + "learning_rate": 4.162078821907321e-05, + "loss": 0.3505, + "step": 3435000 + }, + { + "epoch": 2.06, + "learning_rate": 4.161868825351264e-05, + "loss": 0.3514, + "step": 3435500 + }, + { + "epoch": 2.06, + "learning_rate": 4.1616588287952074e-05, + "loss": 0.3573, + "step": 3436000 + }, + { + "epoch": 2.06, + "learning_rate": 4.1614488322391514e-05, + "loss": 0.3696, + "step": 3436500 + }, + { + "epoch": 2.06, + "learning_rate": 4.161238835683095e-05, + "loss": 0.3551, + "step": 3437000 + }, + { + "epoch": 2.06, + "learning_rate": 4.161028839127038e-05, + "loss": 0.3514, + "step": 3437500 + }, + { + "epoch": 2.06, + "learning_rate": 4.160818842570982e-05, + "loss": 0.3599, + "step": 3438000 + }, + { + "epoch": 2.06, + "learning_rate": 4.1606088460149255e-05, + "loss": 0.3582, + "step": 3438500 + }, + { + "epoch": 2.06, + "learning_rate": 4.160399269451981e-05, + "loss": 0.3511, + "step": 3439000 + }, + { + "epoch": 2.06, + "learning_rate": 4.160189692889036e-05, + "loss": 0.3485, + "step": 3439500 + }, + { + "epoch": 2.06, + "learning_rate": 4.15997969633298e-05, + "loss": 0.3528, + "step": 3440000 + }, + { + "epoch": 2.06, + "learning_rate": 4.1597696997769235e-05, + "loss": 0.3497, + "step": 3440500 + }, + { + "epoch": 2.06, + "learning_rate": 4.159559703220867e-05, + "loss": 0.353, + "step": 3441000 + }, + { + "epoch": 2.06, + "learning_rate": 4.159349706664811e-05, + "loss": 0.3527, + "step": 3441500 + }, + { + "epoch": 2.06, + "learning_rate": 4.159140130101866e-05, + "loss": 0.3616, + "step": 3442000 + }, + { + "epoch": 2.06, + "learning_rate": 4.1589301335458096e-05, + "loss": 0.3509, + "step": 3442500 + }, + { + "epoch": 2.06, + "learning_rate": 4.158720136989753e-05, + "loss": 0.3421, + "step": 3443000 + }, + { + "epoch": 2.06, + "learning_rate": 4.158510140433697e-05, + "loss": 0.357, + "step": 3443500 + }, + { + "epoch": 2.06, + "learning_rate": 4.15830014387764e-05, + "loss": 0.3538, + "step": 3444000 + }, + { + "epoch": 2.07, + "learning_rate": 4.158090567314696e-05, + "loss": 0.3598, + "step": 3444500 + }, + { + "epoch": 2.07, + "learning_rate": 4.157880570758639e-05, + "loss": 0.3511, + "step": 3445000 + }, + { + "epoch": 2.07, + "learning_rate": 4.157670574202583e-05, + "loss": 0.364, + "step": 3445500 + }, + { + "epoch": 2.07, + "learning_rate": 4.1574605776465264e-05, + "loss": 0.3551, + "step": 3446000 + }, + { + "epoch": 2.07, + "learning_rate": 4.15725058109047e-05, + "loss": 0.3549, + "step": 3446500 + }, + { + "epoch": 2.07, + "learning_rate": 4.157040584534414e-05, + "loss": 0.3485, + "step": 3447000 + }, + { + "epoch": 2.07, + "learning_rate": 4.156831007971469e-05, + "loss": 0.3496, + "step": 3447500 + }, + { + "epoch": 2.07, + "learning_rate": 4.1566210114154125e-05, + "loss": 0.3545, + "step": 3448000 + }, + { + "epoch": 2.07, + "learning_rate": 4.1564110148593565e-05, + "loss": 0.3531, + "step": 3448500 + }, + { + "epoch": 2.07, + "learning_rate": 4.156201438296412e-05, + "loss": 0.3488, + "step": 3449000 + }, + { + "epoch": 2.07, + "learning_rate": 4.155991441740355e-05, + "loss": 0.3525, + "step": 3449500 + }, + { + "epoch": 2.07, + "learning_rate": 4.1557814451842985e-05, + "loss": 0.3593, + "step": 3450000 + }, + { + "epoch": 2.07, + "learning_rate": 4.1555714486282426e-05, + "loss": 0.3543, + "step": 3450500 + }, + { + "epoch": 2.07, + "learning_rate": 4.155361452072186e-05, + "loss": 0.3583, + "step": 3451000 + }, + { + "epoch": 2.07, + "learning_rate": 4.155151455516129e-05, + "loss": 0.3474, + "step": 3451500 + }, + { + "epoch": 2.07, + "learning_rate": 4.154941458960073e-05, + "loss": 0.3521, + "step": 3452000 + }, + { + "epoch": 2.07, + "learning_rate": 4.1547314624040166e-05, + "loss": 0.3535, + "step": 3452500 + }, + { + "epoch": 2.07, + "learning_rate": 4.15452146584796e-05, + "loss": 0.3464, + "step": 3453000 + }, + { + "epoch": 2.07, + "learning_rate": 4.154311889285015e-05, + "loss": 0.3482, + "step": 3453500 + }, + { + "epoch": 2.07, + "learning_rate": 4.1541018927289594e-05, + "loss": 0.3537, + "step": 3454000 + }, + { + "epoch": 2.07, + "learning_rate": 4.153891896172903e-05, + "loss": 0.3523, + "step": 3454500 + }, + { + "epoch": 2.07, + "learning_rate": 4.153681899616846e-05, + "loss": 0.3518, + "step": 3455000 + }, + { + "epoch": 2.07, + "learning_rate": 4.15347190306079e-05, + "loss": 0.3505, + "step": 3455500 + }, + { + "epoch": 2.07, + "learning_rate": 4.1532619065047334e-05, + "loss": 0.3548, + "step": 3456000 + }, + { + "epoch": 2.07, + "learning_rate": 4.153052329941789e-05, + "loss": 0.3605, + "step": 3456500 + }, + { + "epoch": 2.07, + "learning_rate": 4.152842333385733e-05, + "loss": 0.3586, + "step": 3457000 + }, + { + "epoch": 2.07, + "learning_rate": 4.152632336829676e-05, + "loss": 0.3636, + "step": 3457500 + }, + { + "epoch": 2.07, + "learning_rate": 4.1524223402736195e-05, + "loss": 0.3533, + "step": 3458000 + }, + { + "epoch": 2.07, + "learning_rate": 4.1522123437175635e-05, + "loss": 0.3595, + "step": 3458500 + }, + { + "epoch": 2.07, + "learning_rate": 4.152002347161506e-05, + "loss": 0.3445, + "step": 3459000 + }, + { + "epoch": 2.07, + "learning_rate": 4.1517923506054495e-05, + "loss": 0.3527, + "step": 3459500 + }, + { + "epoch": 2.07, + "learning_rate": 4.1515823540493936e-05, + "loss": 0.359, + "step": 3460000 + }, + { + "epoch": 2.07, + "learning_rate": 4.1513727774864496e-05, + "loss": 0.3449, + "step": 3460500 + }, + { + "epoch": 2.08, + "learning_rate": 4.151162780930393e-05, + "loss": 0.3464, + "step": 3461000 + }, + { + "epoch": 2.08, + "learning_rate": 4.1509527843743356e-05, + "loss": 0.3475, + "step": 3461500 + }, + { + "epoch": 2.08, + "learning_rate": 4.1507427878182796e-05, + "loss": 0.3543, + "step": 3462000 + }, + { + "epoch": 2.08, + "learning_rate": 4.1505332112553357e-05, + "loss": 0.3454, + "step": 3462500 + }, + { + "epoch": 2.08, + "learning_rate": 4.150323214699279e-05, + "loss": 0.3532, + "step": 3463000 + }, + { + "epoch": 2.08, + "learning_rate": 4.150113218143223e-05, + "loss": 0.3458, + "step": 3463500 + }, + { + "epoch": 2.08, + "learning_rate": 4.149903221587166e-05, + "loss": 0.3533, + "step": 3464000 + }, + { + "epoch": 2.08, + "learning_rate": 4.149693645024222e-05, + "loss": 0.3558, + "step": 3464500 + }, + { + "epoch": 2.08, + "learning_rate": 4.149483648468165e-05, + "loss": 0.3447, + "step": 3465000 + }, + { + "epoch": 2.08, + "learning_rate": 4.149273651912109e-05, + "loss": 0.3524, + "step": 3465500 + }, + { + "epoch": 2.08, + "learning_rate": 4.149063655356052e-05, + "loss": 0.3472, + "step": 3466000 + }, + { + "epoch": 2.08, + "learning_rate": 4.148854078793108e-05, + "loss": 0.3524, + "step": 3466500 + }, + { + "epoch": 2.08, + "learning_rate": 4.148644502230163e-05, + "loss": 0.3569, + "step": 3467000 + }, + { + "epoch": 2.08, + "learning_rate": 4.1484345056741065e-05, + "loss": 0.3495, + "step": 3467500 + }, + { + "epoch": 2.08, + "learning_rate": 4.1482245091180505e-05, + "loss": 0.3522, + "step": 3468000 + }, + { + "epoch": 2.08, + "learning_rate": 4.148014512561994e-05, + "loss": 0.3521, + "step": 3468500 + }, + { + "epoch": 2.08, + "learning_rate": 4.147804516005938e-05, + "loss": 0.3532, + "step": 3469000 + }, + { + "epoch": 2.08, + "learning_rate": 4.147594519449881e-05, + "loss": 0.3619, + "step": 3469500 + }, + { + "epoch": 2.08, + "learning_rate": 4.1473845228938246e-05, + "loss": 0.3519, + "step": 3470000 + }, + { + "epoch": 2.08, + "learning_rate": 4.1471745263377686e-05, + "loss": 0.3523, + "step": 3470500 + }, + { + "epoch": 2.08, + "learning_rate": 4.146964949774824e-05, + "loss": 0.35, + "step": 3471000 + }, + { + "epoch": 2.08, + "learning_rate": 4.146754953218767e-05, + "loss": 0.3501, + "step": 3471500 + }, + { + "epoch": 2.08, + "learning_rate": 4.146544956662711e-05, + "loss": 0.3496, + "step": 3472000 + }, + { + "epoch": 2.08, + "learning_rate": 4.146334960106655e-05, + "loss": 0.3592, + "step": 3472500 + }, + { + "epoch": 2.08, + "learning_rate": 4.1461258035368214e-05, + "loss": 0.3551, + "step": 3473000 + }, + { + "epoch": 2.08, + "learning_rate": 4.1459158069807654e-05, + "loss": 0.3505, + "step": 3473500 + }, + { + "epoch": 2.08, + "learning_rate": 4.145705810424709e-05, + "loss": 0.3442, + "step": 3474000 + }, + { + "epoch": 2.08, + "learning_rate": 4.145495813868652e-05, + "loss": 0.3501, + "step": 3474500 + }, + { + "epoch": 2.08, + "learning_rate": 4.145286237305708e-05, + "loss": 0.3507, + "step": 3475000 + }, + { + "epoch": 2.08, + "learning_rate": 4.1450762407496515e-05, + "loss": 0.3564, + "step": 3475500 + }, + { + "epoch": 2.08, + "learning_rate": 4.144866244193595e-05, + "loss": 0.3599, + "step": 3476000 + }, + { + "epoch": 2.08, + "learning_rate": 4.144656247637539e-05, + "loss": 0.3538, + "step": 3476500 + }, + { + "epoch": 2.08, + "learning_rate": 4.144446251081482e-05, + "loss": 0.361, + "step": 3477000 + }, + { + "epoch": 2.08, + "learning_rate": 4.1442362545254255e-05, + "loss": 0.3603, + "step": 3477500 + }, + { + "epoch": 2.09, + "learning_rate": 4.144026677962481e-05, + "loss": 0.3498, + "step": 3478000 + }, + { + "epoch": 2.09, + "learning_rate": 4.143816681406425e-05, + "loss": 0.3508, + "step": 3478500 + }, + { + "epoch": 2.09, + "learning_rate": 4.143606684850368e-05, + "loss": 0.3502, + "step": 3479000 + }, + { + "epoch": 2.09, + "learning_rate": 4.1433966882943116e-05, + "loss": 0.3516, + "step": 3479500 + }, + { + "epoch": 2.09, + "learning_rate": 4.1431866917382556e-05, + "loss": 0.3604, + "step": 3480000 + }, + { + "epoch": 2.09, + "learning_rate": 4.142976695182199e-05, + "loss": 0.3436, + "step": 3480500 + }, + { + "epoch": 2.09, + "learning_rate": 4.142766698626142e-05, + "loss": 0.357, + "step": 3481000 + }, + { + "epoch": 2.09, + "learning_rate": 4.142557122063198e-05, + "loss": 0.352, + "step": 3481500 + }, + { + "epoch": 2.09, + "learning_rate": 4.142347125507142e-05, + "loss": 0.3547, + "step": 3482000 + }, + { + "epoch": 2.09, + "learning_rate": 4.142137128951085e-05, + "loss": 0.3501, + "step": 3482500 + }, + { + "epoch": 2.09, + "learning_rate": 4.141927132395029e-05, + "loss": 0.3534, + "step": 3483000 + }, + { + "epoch": 2.09, + "learning_rate": 4.1417171358389724e-05, + "loss": 0.3469, + "step": 3483500 + }, + { + "epoch": 2.09, + "learning_rate": 4.141507559276028e-05, + "loss": 0.3548, + "step": 3484000 + }, + { + "epoch": 2.09, + "learning_rate": 4.141297562719971e-05, + "loss": 0.3508, + "step": 3484500 + }, + { + "epoch": 2.09, + "learning_rate": 4.141087566163915e-05, + "loss": 0.3543, + "step": 3485000 + }, + { + "epoch": 2.09, + "learning_rate": 4.1408775696078585e-05, + "loss": 0.3453, + "step": 3485500 + }, + { + "epoch": 2.09, + "learning_rate": 4.140667573051802e-05, + "loss": 0.3493, + "step": 3486000 + }, + { + "epoch": 2.09, + "learning_rate": 4.140457996488857e-05, + "loss": 0.3561, + "step": 3486500 + }, + { + "epoch": 2.09, + "learning_rate": 4.140247999932801e-05, + "loss": 0.3485, + "step": 3487000 + }, + { + "epoch": 2.09, + "learning_rate": 4.1400380033767446e-05, + "loss": 0.3558, + "step": 3487500 + }, + { + "epoch": 2.09, + "learning_rate": 4.139828006820688e-05, + "loss": 0.3626, + "step": 3488000 + }, + { + "epoch": 2.09, + "learning_rate": 4.139618010264632e-05, + "loss": 0.3545, + "step": 3488500 + }, + { + "epoch": 2.09, + "learning_rate": 4.139408013708575e-05, + "loss": 0.3467, + "step": 3489000 + }, + { + "epoch": 2.09, + "learning_rate": 4.1391980171525186e-05, + "loss": 0.3578, + "step": 3489500 + }, + { + "epoch": 2.09, + "learning_rate": 4.138988020596462e-05, + "loss": 0.3501, + "step": 3490000 + }, + { + "epoch": 2.09, + "learning_rate": 4.138778444033518e-05, + "loss": 0.3621, + "step": 3490500 + }, + { + "epoch": 2.09, + "learning_rate": 4.1385684474774614e-05, + "loss": 0.3543, + "step": 3491000 + }, + { + "epoch": 2.09, + "learning_rate": 4.138358870914517e-05, + "loss": 0.3553, + "step": 3491500 + }, + { + "epoch": 2.09, + "learning_rate": 4.138148874358461e-05, + "loss": 0.3605, + "step": 3492000 + }, + { + "epoch": 2.09, + "learning_rate": 4.137938877802404e-05, + "loss": 0.3518, + "step": 3492500 + }, + { + "epoch": 2.09, + "learning_rate": 4.1377288812463474e-05, + "loss": 0.3443, + "step": 3493000 + }, + { + "epoch": 2.09, + "learning_rate": 4.1375188846902915e-05, + "loss": 0.3448, + "step": 3493500 + }, + { + "epoch": 2.09, + "learning_rate": 4.137308888134235e-05, + "loss": 0.3544, + "step": 3494000 + }, + { + "epoch": 2.1, + "learning_rate": 4.137098891578178e-05, + "loss": 0.3594, + "step": 3494500 + }, + { + "epoch": 2.1, + "learning_rate": 4.1368888950221215e-05, + "loss": 0.3489, + "step": 3495000 + }, + { + "epoch": 2.1, + "learning_rate": 4.1366793184591775e-05, + "loss": 0.3573, + "step": 3495500 + }, + { + "epoch": 2.1, + "learning_rate": 4.136469741896233e-05, + "loss": 0.356, + "step": 3496000 + }, + { + "epoch": 2.1, + "learning_rate": 4.136259745340176e-05, + "loss": 0.3495, + "step": 3496500 + }, + { + "epoch": 2.1, + "learning_rate": 4.13604974878412e-05, + "loss": 0.3533, + "step": 3497000 + }, + { + "epoch": 2.1, + "learning_rate": 4.1358397522280636e-05, + "loss": 0.3512, + "step": 3497500 + }, + { + "epoch": 2.1, + "learning_rate": 4.135629755672007e-05, + "loss": 0.351, + "step": 3498000 + }, + { + "epoch": 2.1, + "learning_rate": 4.135419759115951e-05, + "loss": 0.3498, + "step": 3498500 + }, + { + "epoch": 2.1, + "learning_rate": 4.135209762559894e-05, + "loss": 0.3454, + "step": 3499000 + }, + { + "epoch": 2.1, + "learning_rate": 4.134999766003837e-05, + "loss": 0.3484, + "step": 3499500 + }, + { + "epoch": 2.1, + "learning_rate": 4.134789769447781e-05, + "loss": 0.3501, + "step": 3500000 + }, + { + "epoch": 2.1, + "eval_loss": 0.34503260254859924, + "eval_runtime": 1117.4396, + "eval_samples_per_second": 471.363, + "eval_steps_per_second": 78.561, + "step": 3500000 + }, + { + "epoch": 2.1, + "learning_rate": 4.134580192884837e-05, + "loss": 0.3546, + "step": 3500500 + }, + { + "epoch": 2.1, + "learning_rate": 4.1343706163218924e-05, + "loss": 0.356, + "step": 3501000 + }, + { + "epoch": 2.1, + "learning_rate": 4.134160619765836e-05, + "loss": 0.3565, + "step": 3501500 + }, + { + "epoch": 2.1, + "learning_rate": 4.133950623209779e-05, + "loss": 0.3592, + "step": 3502000 + }, + { + "epoch": 2.1, + "learning_rate": 4.133740626653723e-05, + "loss": 0.3566, + "step": 3502500 + }, + { + "epoch": 2.1, + "learning_rate": 4.1335310500907785e-05, + "loss": 0.3474, + "step": 3503000 + }, + { + "epoch": 2.1, + "learning_rate": 4.133321473527834e-05, + "loss": 0.3567, + "step": 3503500 + }, + { + "epoch": 2.1, + "learning_rate": 4.133111476971777e-05, + "loss": 0.354, + "step": 3504000 + }, + { + "epoch": 2.1, + "learning_rate": 4.132901480415721e-05, + "loss": 0.3509, + "step": 3504500 + }, + { + "epoch": 2.1, + "learning_rate": 4.1326914838596645e-05, + "loss": 0.3533, + "step": 3505000 + }, + { + "epoch": 2.1, + "learning_rate": 4.132481487303608e-05, + "loss": 0.3493, + "step": 3505500 + }, + { + "epoch": 2.1, + "learning_rate": 4.132271490747552e-05, + "loss": 0.3527, + "step": 3506000 + }, + { + "epoch": 2.1, + "learning_rate": 4.132061494191495e-05, + "loss": 0.3445, + "step": 3506500 + }, + { + "epoch": 2.1, + "learning_rate": 4.1318514976354386e-05, + "loss": 0.3498, + "step": 3507000 + }, + { + "epoch": 2.1, + "learning_rate": 4.1316415010793826e-05, + "loss": 0.3519, + "step": 3507500 + }, + { + "epoch": 2.1, + "learning_rate": 4.131431504523326e-05, + "loss": 0.3545, + "step": 3508000 + }, + { + "epoch": 2.1, + "learning_rate": 4.131221507967269e-05, + "loss": 0.3506, + "step": 3508500 + }, + { + "epoch": 2.1, + "learning_rate": 4.131011511411213e-05, + "loss": 0.3566, + "step": 3509000 + }, + { + "epoch": 2.1, + "learning_rate": 4.130801934848269e-05, + "loss": 0.37, + "step": 3509500 + }, + { + "epoch": 2.1, + "learning_rate": 4.130591938292212e-05, + "loss": 0.3542, + "step": 3510000 + }, + { + "epoch": 2.1, + "learning_rate": 4.1303819417361554e-05, + "loss": 0.3482, + "step": 3510500 + }, + { + "epoch": 2.1, + "learning_rate": 4.1301719451800994e-05, + "loss": 0.3488, + "step": 3511000 + }, + { + "epoch": 2.11, + "learning_rate": 4.129962368617155e-05, + "loss": 0.3468, + "step": 3511500 + }, + { + "epoch": 2.11, + "learning_rate": 4.129752372061098e-05, + "loss": 0.344, + "step": 3512000 + }, + { + "epoch": 2.11, + "learning_rate": 4.1295427954981535e-05, + "loss": 0.3537, + "step": 3512500 + }, + { + "epoch": 2.11, + "learning_rate": 4.1293327989420975e-05, + "loss": 0.3517, + "step": 3513000 + }, + { + "epoch": 2.11, + "learning_rate": 4.129122802386041e-05, + "loss": 0.3532, + "step": 3513500 + }, + { + "epoch": 2.11, + "learning_rate": 4.128912805829984e-05, + "loss": 0.3536, + "step": 3514000 + }, + { + "epoch": 2.11, + "learning_rate": 4.1287032292670395e-05, + "loss": 0.356, + "step": 3514500 + }, + { + "epoch": 2.11, + "learning_rate": 4.1284932327109836e-05, + "loss": 0.3575, + "step": 3515000 + }, + { + "epoch": 2.11, + "learning_rate": 4.128283236154927e-05, + "loss": 0.3437, + "step": 3515500 + }, + { + "epoch": 2.11, + "learning_rate": 4.12807323959887e-05, + "loss": 0.3553, + "step": 3516000 + }, + { + "epoch": 2.11, + "learning_rate": 4.127863243042814e-05, + "loss": 0.353, + "step": 3516500 + }, + { + "epoch": 2.11, + "learning_rate": 4.1276536664798696e-05, + "loss": 0.3583, + "step": 3517000 + }, + { + "epoch": 2.11, + "learning_rate": 4.127443669923813e-05, + "loss": 0.3603, + "step": 3517500 + }, + { + "epoch": 2.11, + "learning_rate": 4.127233673367757e-05, + "loss": 0.3544, + "step": 3518000 + }, + { + "epoch": 2.11, + "learning_rate": 4.1270236768117004e-05, + "loss": 0.3577, + "step": 3518500 + }, + { + "epoch": 2.11, + "learning_rate": 4.126813680255644e-05, + "loss": 0.3403, + "step": 3519000 + }, + { + "epoch": 2.11, + "learning_rate": 4.126603683699588e-05, + "loss": 0.3511, + "step": 3519500 + }, + { + "epoch": 2.11, + "learning_rate": 4.126394107136643e-05, + "loss": 0.3449, + "step": 3520000 + }, + { + "epoch": 2.11, + "learning_rate": 4.1261841105805864e-05, + "loss": 0.355, + "step": 3520500 + }, + { + "epoch": 2.11, + "learning_rate": 4.125974534017642e-05, + "loss": 0.3518, + "step": 3521000 + }, + { + "epoch": 2.11, + "learning_rate": 4.125764537461585e-05, + "loss": 0.3447, + "step": 3521500 + }, + { + "epoch": 2.11, + "learning_rate": 4.125554540905529e-05, + "loss": 0.3532, + "step": 3522000 + }, + { + "epoch": 2.11, + "learning_rate": 4.1253445443494725e-05, + "loss": 0.3522, + "step": 3522500 + }, + { + "epoch": 2.11, + "learning_rate": 4.125134547793416e-05, + "loss": 0.3575, + "step": 3523000 + }, + { + "epoch": 2.11, + "learning_rate": 4.12492455123736e-05, + "loss": 0.3528, + "step": 3523500 + }, + { + "epoch": 2.11, + "learning_rate": 4.124714554681303e-05, + "loss": 0.3547, + "step": 3524000 + }, + { + "epoch": 2.11, + "learning_rate": 4.1245045581252466e-05, + "loss": 0.3611, + "step": 3524500 + }, + { + "epoch": 2.11, + "learning_rate": 4.1242945615691906e-05, + "loss": 0.3548, + "step": 3525000 + }, + { + "epoch": 2.11, + "learning_rate": 4.124084565013133e-05, + "loss": 0.3514, + "step": 3525500 + }, + { + "epoch": 2.11, + "learning_rate": 4.123874988450189e-05, + "loss": 0.3572, + "step": 3526000 + }, + { + "epoch": 2.11, + "learning_rate": 4.123664991894133e-05, + "loss": 0.35, + "step": 3526500 + }, + { + "epoch": 2.11, + "learning_rate": 4.123454995338077e-05, + "loss": 0.3525, + "step": 3527000 + }, + { + "epoch": 2.11, + "learning_rate": 4.12324499878202e-05, + "loss": 0.3551, + "step": 3527500 + }, + { + "epoch": 2.12, + "learning_rate": 4.1230354222190754e-05, + "loss": 0.3557, + "step": 3528000 + }, + { + "epoch": 2.12, + "learning_rate": 4.1228254256630194e-05, + "loss": 0.3615, + "step": 3528500 + }, + { + "epoch": 2.12, + "learning_rate": 4.122615429106963e-05, + "loss": 0.3516, + "step": 3529000 + }, + { + "epoch": 2.12, + "learning_rate": 4.122405432550906e-05, + "loss": 0.344, + "step": 3529500 + }, + { + "epoch": 2.12, + "learning_rate": 4.12219543599485e-05, + "loss": 0.3484, + "step": 3530000 + }, + { + "epoch": 2.12, + "learning_rate": 4.1219858594319055e-05, + "loss": 0.3544, + "step": 3530500 + }, + { + "epoch": 2.12, + "learning_rate": 4.121775862875849e-05, + "loss": 0.3525, + "step": 3531000 + }, + { + "epoch": 2.12, + "learning_rate": 4.121565866319792e-05, + "loss": 0.3511, + "step": 3531500 + }, + { + "epoch": 2.12, + "learning_rate": 4.121355869763736e-05, + "loss": 0.3539, + "step": 3532000 + }, + { + "epoch": 2.12, + "learning_rate": 4.1211458732076795e-05, + "loss": 0.3512, + "step": 3532500 + }, + { + "epoch": 2.12, + "learning_rate": 4.120935876651623e-05, + "loss": 0.3478, + "step": 3533000 + }, + { + "epoch": 2.12, + "learning_rate": 4.120725880095566e-05, + "loss": 0.3532, + "step": 3533500 + }, + { + "epoch": 2.12, + "learning_rate": 4.1205158835395096e-05, + "loss": 0.3554, + "step": 3534000 + }, + { + "epoch": 2.12, + "learning_rate": 4.1203063069765656e-05, + "loss": 0.3571, + "step": 3534500 + }, + { + "epoch": 2.12, + "learning_rate": 4.1200963104205096e-05, + "loss": 0.354, + "step": 3535000 + }, + { + "epoch": 2.12, + "learning_rate": 4.119886313864452e-05, + "loss": 0.3568, + "step": 3535500 + }, + { + "epoch": 2.12, + "learning_rate": 4.1196763173083956e-05, + "loss": 0.3464, + "step": 3536000 + }, + { + "epoch": 2.12, + "learning_rate": 4.1194663207523397e-05, + "loss": 0.3587, + "step": 3536500 + }, + { + "epoch": 2.12, + "learning_rate": 4.119256744189396e-05, + "loss": 0.3615, + "step": 3537000 + }, + { + "epoch": 2.12, + "learning_rate": 4.1190467476333384e-05, + "loss": 0.3673, + "step": 3537500 + }, + { + "epoch": 2.12, + "learning_rate": 4.118836751077282e-05, + "loss": 0.3536, + "step": 3538000 + }, + { + "epoch": 2.12, + "learning_rate": 4.118626754521226e-05, + "loss": 0.3529, + "step": 3538500 + }, + { + "epoch": 2.12, + "learning_rate": 4.118416757965169e-05, + "loss": 0.3682, + "step": 3539000 + }, + { + "epoch": 2.12, + "learning_rate": 4.1182067614091124e-05, + "loss": 0.3608, + "step": 3539500 + }, + { + "epoch": 2.12, + "learning_rate": 4.1179967648530564e-05, + "loss": 0.3625, + "step": 3540000 + }, + { + "epoch": 2.12, + "learning_rate": 4.117787188290112e-05, + "loss": 0.3531, + "step": 3540500 + }, + { + "epoch": 2.12, + "learning_rate": 4.117577191734055e-05, + "loss": 0.3612, + "step": 3541000 + }, + { + "epoch": 2.12, + "learning_rate": 4.117367195177999e-05, + "loss": 0.3604, + "step": 3541500 + }, + { + "epoch": 2.12, + "learning_rate": 4.1171571986219425e-05, + "loss": 0.3443, + "step": 3542000 + }, + { + "epoch": 2.12, + "learning_rate": 4.116947202065886e-05, + "loss": 0.3578, + "step": 3542500 + }, + { + "epoch": 2.12, + "learning_rate": 4.11673720550983e-05, + "loss": 0.356, + "step": 3543000 + }, + { + "epoch": 2.12, + "learning_rate": 4.116527628946885e-05, + "loss": 0.3555, + "step": 3543500 + }, + { + "epoch": 2.12, + "learning_rate": 4.1163176323908286e-05, + "loss": 0.3541, + "step": 3544000 + }, + { + "epoch": 2.13, + "learning_rate": 4.116107635834772e-05, + "loss": 0.3421, + "step": 3544500 + }, + { + "epoch": 2.13, + "learning_rate": 4.115897639278716e-05, + "loss": 0.3547, + "step": 3545000 + }, + { + "epoch": 2.13, + "learning_rate": 4.115687642722659e-05, + "loss": 0.3587, + "step": 3545500 + }, + { + "epoch": 2.13, + "learning_rate": 4.1154776461666027e-05, + "loss": 0.3429, + "step": 3546000 + }, + { + "epoch": 2.13, + "learning_rate": 4.115267649610547e-05, + "loss": 0.3569, + "step": 3546500 + }, + { + "epoch": 2.13, + "learning_rate": 4.115058073047602e-05, + "loss": 0.3614, + "step": 3547000 + }, + { + "epoch": 2.13, + "learning_rate": 4.1148480764915454e-05, + "loss": 0.352, + "step": 3547500 + }, + { + "epoch": 2.13, + "learning_rate": 4.1146380799354894e-05, + "loss": 0.3492, + "step": 3548000 + }, + { + "epoch": 2.13, + "learning_rate": 4.114428083379433e-05, + "loss": 0.3466, + "step": 3548500 + }, + { + "epoch": 2.13, + "learning_rate": 4.114218086823376e-05, + "loss": 0.3561, + "step": 3549000 + }, + { + "epoch": 2.13, + "learning_rate": 4.11400809026732e-05, + "loss": 0.3532, + "step": 3549500 + }, + { + "epoch": 2.13, + "learning_rate": 4.1137980937112635e-05, + "loss": 0.3601, + "step": 3550000 + }, + { + "epoch": 2.13, + "learning_rate": 4.113588517148319e-05, + "loss": 0.3477, + "step": 3550500 + }, + { + "epoch": 2.13, + "learning_rate": 4.113378520592262e-05, + "loss": 0.3559, + "step": 3551000 + }, + { + "epoch": 2.13, + "learning_rate": 4.113168524036206e-05, + "loss": 0.3644, + "step": 3551500 + }, + { + "epoch": 2.13, + "learning_rate": 4.1129585274801495e-05, + "loss": 0.3505, + "step": 3552000 + }, + { + "epoch": 2.13, + "learning_rate": 4.112748530924092e-05, + "loss": 0.3448, + "step": 3552500 + }, + { + "epoch": 2.13, + "learning_rate": 4.112538534368036e-05, + "loss": 0.3517, + "step": 3553000 + }, + { + "epoch": 2.13, + "learning_rate": 4.1123285378119796e-05, + "loss": 0.3498, + "step": 3553500 + }, + { + "epoch": 2.13, + "learning_rate": 4.112118541255923e-05, + "loss": 0.3541, + "step": 3554000 + }, + { + "epoch": 2.13, + "learning_rate": 4.111908964692979e-05, + "loss": 0.3547, + "step": 3554500 + }, + { + "epoch": 2.13, + "learning_rate": 4.111699388130035e-05, + "loss": 0.3557, + "step": 3555000 + }, + { + "epoch": 2.13, + "learning_rate": 4.111489391573978e-05, + "loss": 0.3533, + "step": 3555500 + }, + { + "epoch": 2.13, + "learning_rate": 4.111279395017922e-05, + "loss": 0.3529, + "step": 3556000 + }, + { + "epoch": 2.13, + "learning_rate": 4.111069398461866e-05, + "loss": 0.3524, + "step": 3556500 + }, + { + "epoch": 2.13, + "learning_rate": 4.110859401905809e-05, + "loss": 0.3542, + "step": 3557000 + }, + { + "epoch": 2.13, + "learning_rate": 4.110649405349752e-05, + "loss": 0.3519, + "step": 3557500 + }, + { + "epoch": 2.13, + "learning_rate": 4.110439408793696e-05, + "loss": 0.3618, + "step": 3558000 + }, + { + "epoch": 2.13, + "learning_rate": 4.110229412237639e-05, + "loss": 0.3572, + "step": 3558500 + }, + { + "epoch": 2.13, + "learning_rate": 4.110019835674695e-05, + "loss": 0.3628, + "step": 3559000 + }, + { + "epoch": 2.13, + "learning_rate": 4.1098098391186385e-05, + "loss": 0.3519, + "step": 3559500 + }, + { + "epoch": 2.13, + "learning_rate": 4.109599842562582e-05, + "loss": 0.3478, + "step": 3560000 + }, + { + "epoch": 2.13, + "learning_rate": 4.109389846006525e-05, + "loss": 0.359, + "step": 3560500 + }, + { + "epoch": 2.13, + "learning_rate": 4.109180269443581e-05, + "loss": 0.3565, + "step": 3561000 + }, + { + "epoch": 2.14, + "learning_rate": 4.1089702728875245e-05, + "loss": 0.3588, + "step": 3561500 + }, + { + "epoch": 2.14, + "learning_rate": 4.108760276331468e-05, + "loss": 0.3603, + "step": 3562000 + }, + { + "epoch": 2.14, + "learning_rate": 4.108550279775411e-05, + "loss": 0.349, + "step": 3562500 + }, + { + "epoch": 2.14, + "learning_rate": 4.108340703212467e-05, + "loss": 0.3529, + "step": 3563000 + }, + { + "epoch": 2.14, + "learning_rate": 4.108130706656411e-05, + "loss": 0.3595, + "step": 3563500 + }, + { + "epoch": 2.14, + "learning_rate": 4.1079207101003546e-05, + "loss": 0.3501, + "step": 3564000 + }, + { + "epoch": 2.14, + "learning_rate": 4.107710713544297e-05, + "loss": 0.3511, + "step": 3564500 + }, + { + "epoch": 2.14, + "learning_rate": 4.1075015569744654e-05, + "loss": 0.3493, + "step": 3565000 + }, + { + "epoch": 2.14, + "learning_rate": 4.107291560418409e-05, + "loss": 0.3601, + "step": 3565500 + }, + { + "epoch": 2.14, + "learning_rate": 4.107081563862353e-05, + "loss": 0.3576, + "step": 3566000 + }, + { + "epoch": 2.14, + "learning_rate": 4.106871567306296e-05, + "loss": 0.3574, + "step": 3566500 + }, + { + "epoch": 2.14, + "learning_rate": 4.1066615707502394e-05, + "loss": 0.3543, + "step": 3567000 + }, + { + "epoch": 2.14, + "learning_rate": 4.1064515741941834e-05, + "loss": 0.3553, + "step": 3567500 + }, + { + "epoch": 2.14, + "learning_rate": 4.106241577638127e-05, + "loss": 0.3492, + "step": 3568000 + }, + { + "epoch": 2.14, + "learning_rate": 4.10603158108207e-05, + "loss": 0.3469, + "step": 3568500 + }, + { + "epoch": 2.14, + "learning_rate": 4.105822004519126e-05, + "loss": 0.3548, + "step": 3569000 + }, + { + "epoch": 2.14, + "learning_rate": 4.1056120079630695e-05, + "loss": 0.3564, + "step": 3569500 + }, + { + "epoch": 2.14, + "learning_rate": 4.105402011407013e-05, + "loss": 0.3543, + "step": 3570000 + }, + { + "epoch": 2.14, + "learning_rate": 4.105192434844068e-05, + "loss": 0.3597, + "step": 3570500 + }, + { + "epoch": 2.14, + "learning_rate": 4.104982438288012e-05, + "loss": 0.3573, + "step": 3571000 + }, + { + "epoch": 2.14, + "learning_rate": 4.1047724417319556e-05, + "loss": 0.3516, + "step": 3571500 + }, + { + "epoch": 2.14, + "learning_rate": 4.104562445175899e-05, + "loss": 0.3439, + "step": 3572000 + }, + { + "epoch": 2.14, + "learning_rate": 4.104352448619843e-05, + "loss": 0.3478, + "step": 3572500 + }, + { + "epoch": 2.14, + "learning_rate": 4.104142452063786e-05, + "loss": 0.337, + "step": 3573000 + }, + { + "epoch": 2.14, + "learning_rate": 4.1039324555077296e-05, + "loss": 0.3531, + "step": 3573500 + }, + { + "epoch": 2.14, + "learning_rate": 4.103722878944785e-05, + "loss": 0.3526, + "step": 3574000 + }, + { + "epoch": 2.14, + "learning_rate": 4.103512882388729e-05, + "loss": 0.3487, + "step": 3574500 + }, + { + "epoch": 2.14, + "learning_rate": 4.1033028858326724e-05, + "loss": 0.354, + "step": 3575000 + }, + { + "epoch": 2.14, + "learning_rate": 4.103092889276616e-05, + "loss": 0.3431, + "step": 3575500 + }, + { + "epoch": 2.14, + "learning_rate": 4.10288289272056e-05, + "loss": 0.362, + "step": 3576000 + }, + { + "epoch": 2.14, + "learning_rate": 4.1026728961645024e-05, + "loss": 0.3575, + "step": 3576500 + }, + { + "epoch": 2.14, + "learning_rate": 4.1024628996084464e-05, + "loss": 0.3548, + "step": 3577000 + }, + { + "epoch": 2.14, + "learning_rate": 4.1022533230455025e-05, + "loss": 0.3451, + "step": 3577500 + }, + { + "epoch": 2.15, + "learning_rate": 4.102043326489446e-05, + "loss": 0.3442, + "step": 3578000 + }, + { + "epoch": 2.15, + "learning_rate": 4.101833329933389e-05, + "loss": 0.3429, + "step": 3578500 + }, + { + "epoch": 2.15, + "learning_rate": 4.1016233333773325e-05, + "loss": 0.3536, + "step": 3579000 + }, + { + "epoch": 2.15, + "learning_rate": 4.101413336821276e-05, + "loss": 0.3529, + "step": 3579500 + }, + { + "epoch": 2.15, + "learning_rate": 4.101203760258332e-05, + "loss": 0.3645, + "step": 3580000 + }, + { + "epoch": 2.15, + "learning_rate": 4.100993763702275e-05, + "loss": 0.3533, + "step": 3580500 + }, + { + "epoch": 2.15, + "learning_rate": 4.100783767146219e-05, + "loss": 0.3564, + "step": 3581000 + }, + { + "epoch": 2.15, + "learning_rate": 4.100573770590162e-05, + "loss": 0.352, + "step": 3581500 + }, + { + "epoch": 2.15, + "learning_rate": 4.100363774034105e-05, + "loss": 0.3555, + "step": 3582000 + }, + { + "epoch": 2.15, + "learning_rate": 4.100154197471161e-05, + "loss": 0.3473, + "step": 3582500 + }, + { + "epoch": 2.15, + "learning_rate": 4.099944200915105e-05, + "loss": 0.3577, + "step": 3583000 + }, + { + "epoch": 2.15, + "learning_rate": 4.099734204359048e-05, + "loss": 0.3504, + "step": 3583500 + }, + { + "epoch": 2.15, + "learning_rate": 4.099524207802992e-05, + "loss": 0.3501, + "step": 3584000 + }, + { + "epoch": 2.15, + "learning_rate": 4.0993142112469354e-05, + "loss": 0.3578, + "step": 3584500 + }, + { + "epoch": 2.15, + "learning_rate": 4.099104214690879e-05, + "loss": 0.3541, + "step": 3585000 + }, + { + "epoch": 2.15, + "learning_rate": 4.098894218134823e-05, + "loss": 0.3476, + "step": 3585500 + }, + { + "epoch": 2.15, + "learning_rate": 4.098684641571878e-05, + "loss": 0.3526, + "step": 3586000 + }, + { + "epoch": 2.15, + "learning_rate": 4.0984746450158214e-05, + "loss": 0.3484, + "step": 3586500 + }, + { + "epoch": 2.15, + "learning_rate": 4.098264648459765e-05, + "loss": 0.3538, + "step": 3587000 + }, + { + "epoch": 2.15, + "learning_rate": 4.098054651903709e-05, + "loss": 0.3678, + "step": 3587500 + }, + { + "epoch": 2.15, + "learning_rate": 4.097844655347652e-05, + "loss": 0.3411, + "step": 3588000 + }, + { + "epoch": 2.15, + "learning_rate": 4.0976346587915955e-05, + "loss": 0.3536, + "step": 3588500 + }, + { + "epoch": 2.15, + "learning_rate": 4.0974246622355395e-05, + "loss": 0.3551, + "step": 3589000 + }, + { + "epoch": 2.15, + "learning_rate": 4.097214665679483e-05, + "loss": 0.346, + "step": 3589500 + }, + { + "epoch": 2.15, + "learning_rate": 4.097005509109651e-05, + "loss": 0.3474, + "step": 3590000 + }, + { + "epoch": 2.15, + "learning_rate": 4.096795512553594e-05, + "loss": 0.3514, + "step": 3590500 + }, + { + "epoch": 2.15, + "learning_rate": 4.0965855159975376e-05, + "loss": 0.3498, + "step": 3591000 + }, + { + "epoch": 2.15, + "learning_rate": 4.096375519441481e-05, + "loss": 0.3462, + "step": 3591500 + }, + { + "epoch": 2.15, + "learning_rate": 4.096165522885424e-05, + "loss": 0.3495, + "step": 3592000 + }, + { + "epoch": 2.15, + "learning_rate": 4.0959559463224803e-05, + "loss": 0.3485, + "step": 3592500 + }, + { + "epoch": 2.15, + "learning_rate": 4.095745949766424e-05, + "loss": 0.3596, + "step": 3593000 + }, + { + "epoch": 2.15, + "learning_rate": 4.095535953210367e-05, + "loss": 0.3588, + "step": 3593500 + }, + { + "epoch": 2.15, + "learning_rate": 4.0953259566543104e-05, + "loss": 0.3504, + "step": 3594000 + }, + { + "epoch": 2.16, + "learning_rate": 4.0951159600982544e-05, + "loss": 0.3685, + "step": 3594500 + }, + { + "epoch": 2.16, + "learning_rate": 4.0949063835353104e-05, + "loss": 0.3504, + "step": 3595000 + }, + { + "epoch": 2.16, + "learning_rate": 4.094696386979253e-05, + "loss": 0.3546, + "step": 3595500 + }, + { + "epoch": 2.16, + "learning_rate": 4.0944863904231964e-05, + "loss": 0.3539, + "step": 3596000 + }, + { + "epoch": 2.16, + "learning_rate": 4.0942763938671405e-05, + "loss": 0.3326, + "step": 3596500 + }, + { + "epoch": 2.16, + "learning_rate": 4.094066397311084e-05, + "loss": 0.3473, + "step": 3597000 + }, + { + "epoch": 2.16, + "learning_rate": 4.09385682074814e-05, + "loss": 0.3527, + "step": 3597500 + }, + { + "epoch": 2.16, + "learning_rate": 4.093646824192083e-05, + "loss": 0.3496, + "step": 3598000 + }, + { + "epoch": 2.16, + "learning_rate": 4.0934368276360265e-05, + "loss": 0.3435, + "step": 3598500 + }, + { + "epoch": 2.16, + "learning_rate": 4.09322683107997e-05, + "loss": 0.3522, + "step": 3599000 + }, + { + "epoch": 2.16, + "learning_rate": 4.093016834523914e-05, + "loss": 0.3535, + "step": 3599500 + }, + { + "epoch": 2.16, + "learning_rate": 4.09280725796097e-05, + "loss": 0.3483, + "step": 3600000 + }, + { + "epoch": 2.16, + "eval_loss": 0.3436375558376312, + "eval_runtime": 1120.5227, + "eval_samples_per_second": 470.066, + "eval_steps_per_second": 78.345, + "step": 3600000 + }, + { + "epoch": 2.16, + "learning_rate": 4.0925972614049126e-05, + "loss": 0.3604, + "step": 3600500 + }, + { + "epoch": 2.16, + "learning_rate": 4.092387264848856e-05, + "loss": 0.347, + "step": 3601000 + }, + { + "epoch": 2.16, + "learning_rate": 4.092177688285912e-05, + "loss": 0.3591, + "step": 3601500 + }, + { + "epoch": 2.16, + "learning_rate": 4.091967691729856e-05, + "loss": 0.3539, + "step": 3602000 + }, + { + "epoch": 2.16, + "learning_rate": 4.0917576951737994e-05, + "loss": 0.3507, + "step": 3602500 + }, + { + "epoch": 2.16, + "learning_rate": 4.091547698617742e-05, + "loss": 0.3558, + "step": 3603000 + }, + { + "epoch": 2.16, + "learning_rate": 4.091337702061686e-05, + "loss": 0.3687, + "step": 3603500 + }, + { + "epoch": 2.16, + "learning_rate": 4.0911277055056294e-05, + "loss": 0.3462, + "step": 3604000 + }, + { + "epoch": 2.16, + "learning_rate": 4.090917708949573e-05, + "loss": 0.3461, + "step": 3604500 + }, + { + "epoch": 2.16, + "learning_rate": 4.090707712393517e-05, + "loss": 0.3578, + "step": 3605000 + }, + { + "epoch": 2.16, + "learning_rate": 4.09049771583746e-05, + "loss": 0.3603, + "step": 3605500 + }, + { + "epoch": 2.16, + "learning_rate": 4.0902881392745155e-05, + "loss": 0.358, + "step": 3606000 + }, + { + "epoch": 2.16, + "learning_rate": 4.0900781427184595e-05, + "loss": 0.3639, + "step": 3606500 + }, + { + "epoch": 2.16, + "learning_rate": 4.089868146162403e-05, + "loss": 0.3461, + "step": 3607000 + }, + { + "epoch": 2.16, + "learning_rate": 4.089658149606346e-05, + "loss": 0.3558, + "step": 3607500 + }, + { + "epoch": 2.16, + "learning_rate": 4.08944815305029e-05, + "loss": 0.3626, + "step": 3608000 + }, + { + "epoch": 2.16, + "learning_rate": 4.0892385764873456e-05, + "loss": 0.353, + "step": 3608500 + }, + { + "epoch": 2.16, + "learning_rate": 4.089028579931289e-05, + "loss": 0.3489, + "step": 3609000 + }, + { + "epoch": 2.16, + "learning_rate": 4.088818583375232e-05, + "loss": 0.3402, + "step": 3609500 + }, + { + "epoch": 2.16, + "learning_rate": 4.088608586819176e-05, + "loss": 0.3399, + "step": 3610000 + }, + { + "epoch": 2.16, + "learning_rate": 4.0883985902631196e-05, + "loss": 0.3496, + "step": 3610500 + }, + { + "epoch": 2.16, + "learning_rate": 4.088188593707063e-05, + "loss": 0.3459, + "step": 3611000 + }, + { + "epoch": 2.17, + "learning_rate": 4.087978597151007e-05, + "loss": 0.3395, + "step": 3611500 + }, + { + "epoch": 2.17, + "learning_rate": 4.0877686005949504e-05, + "loss": 0.35, + "step": 3612000 + }, + { + "epoch": 2.17, + "learning_rate": 4.087559024032006e-05, + "loss": 0.3542, + "step": 3612500 + }, + { + "epoch": 2.17, + "learning_rate": 4.08734902747595e-05, + "loss": 0.3483, + "step": 3613000 + }, + { + "epoch": 2.17, + "learning_rate": 4.087139030919893e-05, + "loss": 0.3571, + "step": 3613500 + }, + { + "epoch": 2.17, + "learning_rate": 4.0869290343638364e-05, + "loss": 0.3559, + "step": 3614000 + }, + { + "epoch": 2.17, + "learning_rate": 4.086719457800892e-05, + "loss": 0.3523, + "step": 3614500 + }, + { + "epoch": 2.17, + "learning_rate": 4.086509461244836e-05, + "loss": 0.35, + "step": 3615000 + }, + { + "epoch": 2.17, + "learning_rate": 4.086299464688779e-05, + "loss": 0.3491, + "step": 3615500 + }, + { + "epoch": 2.17, + "learning_rate": 4.0860894681327225e-05, + "loss": 0.3522, + "step": 3616000 + }, + { + "epoch": 2.17, + "learning_rate": 4.085879891569778e-05, + "loss": 0.3542, + "step": 3616500 + }, + { + "epoch": 2.17, + "learning_rate": 4.085669895013722e-05, + "loss": 0.3505, + "step": 3617000 + }, + { + "epoch": 2.17, + "learning_rate": 4.085459898457665e-05, + "loss": 0.3555, + "step": 3617500 + }, + { + "epoch": 2.17, + "learning_rate": 4.0852503218947206e-05, + "loss": 0.362, + "step": 3618000 + }, + { + "epoch": 2.17, + "learning_rate": 4.085040325338664e-05, + "loss": 0.351, + "step": 3618500 + }, + { + "epoch": 2.17, + "learning_rate": 4.084830328782608e-05, + "loss": 0.3463, + "step": 3619000 + }, + { + "epoch": 2.17, + "learning_rate": 4.084620752219663e-05, + "loss": 0.355, + "step": 3619500 + }, + { + "epoch": 2.17, + "learning_rate": 4.0844107556636067e-05, + "loss": 0.3561, + "step": 3620000 + }, + { + "epoch": 2.17, + "learning_rate": 4.084200759107551e-05, + "loss": 0.3553, + "step": 3620500 + }, + { + "epoch": 2.17, + "learning_rate": 4.083990762551494e-05, + "loss": 0.3674, + "step": 3621000 + }, + { + "epoch": 2.17, + "learning_rate": 4.0837807659954374e-05, + "loss": 0.3578, + "step": 3621500 + }, + { + "epoch": 2.17, + "learning_rate": 4.0835707694393814e-05, + "loss": 0.3548, + "step": 3622000 + }, + { + "epoch": 2.17, + "learning_rate": 4.083360772883325e-05, + "loss": 0.3478, + "step": 3622500 + }, + { + "epoch": 2.17, + "learning_rate": 4.083150776327268e-05, + "loss": 0.3489, + "step": 3623000 + }, + { + "epoch": 2.17, + "learning_rate": 4.082940779771212e-05, + "loss": 0.3521, + "step": 3623500 + }, + { + "epoch": 2.17, + "learning_rate": 4.0827307832151555e-05, + "loss": 0.3453, + "step": 3624000 + }, + { + "epoch": 2.17, + "learning_rate": 4.082520786659099e-05, + "loss": 0.356, + "step": 3624500 + }, + { + "epoch": 2.17, + "learning_rate": 4.082310790103042e-05, + "loss": 0.3488, + "step": 3625000 + }, + { + "epoch": 2.17, + "learning_rate": 4.082101213540098e-05, + "loss": 0.3431, + "step": 3625500 + }, + { + "epoch": 2.17, + "learning_rate": 4.0818912169840415e-05, + "loss": 0.3541, + "step": 3626000 + }, + { + "epoch": 2.17, + "learning_rate": 4.081681220427985e-05, + "loss": 0.3549, + "step": 3626500 + }, + { + "epoch": 2.17, + "learning_rate": 4.081471223871929e-05, + "loss": 0.3575, + "step": 3627000 + }, + { + "epoch": 2.17, + "learning_rate": 4.081261647308984e-05, + "loss": 0.3464, + "step": 3627500 + }, + { + "epoch": 2.18, + "learning_rate": 4.0810516507529276e-05, + "loss": 0.3489, + "step": 3628000 + }, + { + "epoch": 2.18, + "learning_rate": 4.080842074189983e-05, + "loss": 0.3466, + "step": 3628500 + }, + { + "epoch": 2.18, + "learning_rate": 4.080632077633927e-05, + "loss": 0.3506, + "step": 3629000 + }, + { + "epoch": 2.18, + "learning_rate": 4.08042208107787e-05, + "loss": 0.3456, + "step": 3629500 + }, + { + "epoch": 2.18, + "learning_rate": 4.080212084521814e-05, + "loss": 0.3593, + "step": 3630000 + }, + { + "epoch": 2.18, + "learning_rate": 4.080002087965758e-05, + "loss": 0.3585, + "step": 3630500 + }, + { + "epoch": 2.18, + "learning_rate": 4.079792091409701e-05, + "loss": 0.3525, + "step": 3631000 + }, + { + "epoch": 2.18, + "learning_rate": 4.0795820948536444e-05, + "loss": 0.3563, + "step": 3631500 + }, + { + "epoch": 2.18, + "learning_rate": 4.079372098297588e-05, + "loss": 0.3521, + "step": 3632000 + }, + { + "epoch": 2.18, + "learning_rate": 4.079162521734644e-05, + "loss": 0.3517, + "step": 3632500 + }, + { + "epoch": 2.18, + "learning_rate": 4.078952525178587e-05, + "loss": 0.3511, + "step": 3633000 + }, + { + "epoch": 2.18, + "learning_rate": 4.0787425286225305e-05, + "loss": 0.3503, + "step": 3633500 + }, + { + "epoch": 2.18, + "learning_rate": 4.0785329520595865e-05, + "loss": 0.3447, + "step": 3634000 + }, + { + "epoch": 2.18, + "learning_rate": 4.07832295550353e-05, + "loss": 0.3561, + "step": 3634500 + }, + { + "epoch": 2.18, + "learning_rate": 4.078112958947473e-05, + "loss": 0.3517, + "step": 3635000 + }, + { + "epoch": 2.18, + "learning_rate": 4.077902962391417e-05, + "loss": 0.3559, + "step": 3635500 + }, + { + "epoch": 2.18, + "learning_rate": 4.0776929658353606e-05, + "loss": 0.3514, + "step": 3636000 + }, + { + "epoch": 2.18, + "learning_rate": 4.077482969279304e-05, + "loss": 0.3568, + "step": 3636500 + }, + { + "epoch": 2.18, + "learning_rate": 4.077272972723247e-05, + "loss": 0.3601, + "step": 3637000 + }, + { + "epoch": 2.18, + "learning_rate": 4.077063396160303e-05, + "loss": 0.3548, + "step": 3637500 + }, + { + "epoch": 2.18, + "learning_rate": 4.0768533996042466e-05, + "loss": 0.3546, + "step": 3638000 + }, + { + "epoch": 2.18, + "learning_rate": 4.07664340304819e-05, + "loss": 0.3532, + "step": 3638500 + }, + { + "epoch": 2.18, + "learning_rate": 4.076433406492134e-05, + "loss": 0.3534, + "step": 3639000 + }, + { + "epoch": 2.18, + "learning_rate": 4.076223409936077e-05, + "loss": 0.3562, + "step": 3639500 + }, + { + "epoch": 2.18, + "learning_rate": 4.07601341338002e-05, + "loss": 0.3502, + "step": 3640000 + }, + { + "epoch": 2.18, + "learning_rate": 4.075803416823964e-05, + "loss": 0.3471, + "step": 3640500 + }, + { + "epoch": 2.18, + "learning_rate": 4.07559384026102e-05, + "loss": 0.3623, + "step": 3641000 + }, + { + "epoch": 2.18, + "learning_rate": 4.075383843704963e-05, + "loss": 0.3519, + "step": 3641500 + }, + { + "epoch": 2.18, + "learning_rate": 4.075173847148907e-05, + "loss": 0.3509, + "step": 3642000 + }, + { + "epoch": 2.18, + "learning_rate": 4.07496385059285e-05, + "loss": 0.3581, + "step": 3642500 + }, + { + "epoch": 2.18, + "learning_rate": 4.0747538540367935e-05, + "loss": 0.3574, + "step": 3643000 + }, + { + "epoch": 2.18, + "learning_rate": 4.0745438574807375e-05, + "loss": 0.3502, + "step": 3643500 + }, + { + "epoch": 2.18, + "learning_rate": 4.074334280917793e-05, + "loss": 0.3544, + "step": 3644000 + }, + { + "epoch": 2.19, + "learning_rate": 4.074124284361736e-05, + "loss": 0.3512, + "step": 3644500 + }, + { + "epoch": 2.19, + "learning_rate": 4.0739142878056795e-05, + "loss": 0.3611, + "step": 3645000 + }, + { + "epoch": 2.19, + "learning_rate": 4.0737042912496236e-05, + "loss": 0.3563, + "step": 3645500 + }, + { + "epoch": 2.19, + "learning_rate": 4.073494294693567e-05, + "loss": 0.3456, + "step": 3646000 + }, + { + "epoch": 2.19, + "learning_rate": 4.07328429813751e-05, + "loss": 0.3555, + "step": 3646500 + }, + { + "epoch": 2.19, + "learning_rate": 4.073074301581454e-05, + "loss": 0.3508, + "step": 3647000 + }, + { + "epoch": 2.19, + "learning_rate": 4.0728643050253976e-05, + "loss": 0.3574, + "step": 3647500 + }, + { + "epoch": 2.19, + "learning_rate": 4.072655568448678e-05, + "loss": 0.3474, + "step": 3648000 + }, + { + "epoch": 2.19, + "learning_rate": 4.072445571892621e-05, + "loss": 0.3511, + "step": 3648500 + }, + { + "epoch": 2.19, + "learning_rate": 4.0722355753365644e-05, + "loss": 0.3468, + "step": 3649000 + }, + { + "epoch": 2.19, + "learning_rate": 4.0720255787805084e-05, + "loss": 0.3615, + "step": 3649500 + }, + { + "epoch": 2.19, + "learning_rate": 4.071815582224452e-05, + "loss": 0.3575, + "step": 3650000 + }, + { + "epoch": 2.19, + "learning_rate": 4.071605585668395e-05, + "loss": 0.3514, + "step": 3650500 + }, + { + "epoch": 2.19, + "learning_rate": 4.0713955891123384e-05, + "loss": 0.3429, + "step": 3651000 + }, + { + "epoch": 2.19, + "learning_rate": 4.071185592556282e-05, + "loss": 0.3563, + "step": 3651500 + }, + { + "epoch": 2.19, + "learning_rate": 4.070975596000225e-05, + "loss": 0.3578, + "step": 3652000 + }, + { + "epoch": 2.19, + "learning_rate": 4.070765599444169e-05, + "loss": 0.355, + "step": 3652500 + }, + { + "epoch": 2.19, + "learning_rate": 4.0705556028881125e-05, + "loss": 0.3511, + "step": 3653000 + }, + { + "epoch": 2.19, + "learning_rate": 4.070345606332056e-05, + "loss": 0.3531, + "step": 3653500 + }, + { + "epoch": 2.19, + "learning_rate": 4.070135609776e-05, + "loss": 0.3475, + "step": 3654000 + }, + { + "epoch": 2.19, + "learning_rate": 4.069926453206167e-05, + "loss": 0.3575, + "step": 3654500 + }, + { + "epoch": 2.19, + "learning_rate": 4.069716456650111e-05, + "loss": 0.3712, + "step": 3655000 + }, + { + "epoch": 2.19, + "learning_rate": 4.0695068800871666e-05, + "loss": 0.3678, + "step": 3655500 + }, + { + "epoch": 2.19, + "learning_rate": 4.06929688353111e-05, + "loss": 0.3474, + "step": 3656000 + }, + { + "epoch": 2.19, + "learning_rate": 4.069086886975054e-05, + "loss": 0.3502, + "step": 3656500 + }, + { + "epoch": 2.19, + "learning_rate": 4.068876890418997e-05, + "loss": 0.3527, + "step": 3657000 + }, + { + "epoch": 2.19, + "learning_rate": 4.068666893862941e-05, + "loss": 0.3504, + "step": 3657500 + }, + { + "epoch": 2.19, + "learning_rate": 4.068456897306885e-05, + "loss": 0.3511, + "step": 3658000 + }, + { + "epoch": 2.19, + "learning_rate": 4.0682469007508274e-05, + "loss": 0.3577, + "step": 3658500 + }, + { + "epoch": 2.19, + "learning_rate": 4.068036904194771e-05, + "loss": 0.3462, + "step": 3659000 + }, + { + "epoch": 2.19, + "learning_rate": 4.067826907638715e-05, + "loss": 0.3611, + "step": 3659500 + }, + { + "epoch": 2.19, + "learning_rate": 4.067616911082658e-05, + "loss": 0.3477, + "step": 3660000 + }, + { + "epoch": 2.19, + "learning_rate": 4.0674069145266014e-05, + "loss": 0.361, + "step": 3660500 + }, + { + "epoch": 2.19, + "learning_rate": 4.067197337963657e-05, + "loss": 0.3645, + "step": 3661000 + }, + { + "epoch": 2.2, + "learning_rate": 4.066987341407601e-05, + "loss": 0.3538, + "step": 3661500 + }, + { + "epoch": 2.2, + "learning_rate": 4.066777344851544e-05, + "loss": 0.3503, + "step": 3662000 + }, + { + "epoch": 2.2, + "learning_rate": 4.0665673482954875e-05, + "loss": 0.3406, + "step": 3662500 + }, + { + "epoch": 2.2, + "learning_rate": 4.0663573517394315e-05, + "loss": 0.3548, + "step": 3663000 + }, + { + "epoch": 2.2, + "learning_rate": 4.066147355183375e-05, + "loss": 0.3466, + "step": 3663500 + }, + { + "epoch": 2.2, + "learning_rate": 4.065937358627318e-05, + "loss": 0.3498, + "step": 3664000 + }, + { + "epoch": 2.2, + "learning_rate": 4.065727362071262e-05, + "loss": 0.3593, + "step": 3664500 + }, + { + "epoch": 2.2, + "learning_rate": 4.0655177855083176e-05, + "loss": 0.3588, + "step": 3665000 + }, + { + "epoch": 2.2, + "learning_rate": 4.065307788952261e-05, + "loss": 0.3614, + "step": 3665500 + }, + { + "epoch": 2.2, + "learning_rate": 4.065097792396205e-05, + "loss": 0.3556, + "step": 3666000 + }, + { + "epoch": 2.2, + "learning_rate": 4.06488821583326e-05, + "loss": 0.3523, + "step": 3666500 + }, + { + "epoch": 2.2, + "learning_rate": 4.064678219277204e-05, + "loss": 0.3564, + "step": 3667000 + }, + { + "epoch": 2.2, + "learning_rate": 4.064468222721147e-05, + "loss": 0.3501, + "step": 3667500 + }, + { + "epoch": 2.2, + "learning_rate": 4.064258226165091e-05, + "loss": 0.3532, + "step": 3668000 + }, + { + "epoch": 2.2, + "learning_rate": 4.0640482296090344e-05, + "loss": 0.3457, + "step": 3668500 + }, + { + "epoch": 2.2, + "learning_rate": 4.063838233052978e-05, + "loss": 0.359, + "step": 3669000 + }, + { + "epoch": 2.2, + "learning_rate": 4.063628236496922e-05, + "loss": 0.3627, + "step": 3669500 + }, + { + "epoch": 2.2, + "learning_rate": 4.063418239940865e-05, + "loss": 0.3568, + "step": 3670000 + }, + { + "epoch": 2.2, + "learning_rate": 4.0632086633779205e-05, + "loss": 0.348, + "step": 3670500 + }, + { + "epoch": 2.2, + "learning_rate": 4.062998666821864e-05, + "loss": 0.3465, + "step": 3671000 + }, + { + "epoch": 2.2, + "learning_rate": 4.062788670265808e-05, + "loss": 0.3566, + "step": 3671500 + }, + { + "epoch": 2.2, + "learning_rate": 4.062579093702863e-05, + "loss": 0.3493, + "step": 3672000 + }, + { + "epoch": 2.2, + "learning_rate": 4.0623690971468065e-05, + "loss": 0.3516, + "step": 3672500 + }, + { + "epoch": 2.2, + "learning_rate": 4.0621591005907505e-05, + "loss": 0.3522, + "step": 3673000 + }, + { + "epoch": 2.2, + "learning_rate": 4.061949104034694e-05, + "loss": 0.3473, + "step": 3673500 + }, + { + "epoch": 2.2, + "learning_rate": 4.061739107478637e-05, + "loss": 0.3577, + "step": 3674000 + }, + { + "epoch": 2.2, + "learning_rate": 4.061529110922581e-05, + "loss": 0.3544, + "step": 3674500 + }, + { + "epoch": 2.2, + "learning_rate": 4.0613191143665246e-05, + "loss": 0.3515, + "step": 3675000 + }, + { + "epoch": 2.2, + "learning_rate": 4.061109117810468e-05, + "loss": 0.351, + "step": 3675500 + }, + { + "epoch": 2.2, + "learning_rate": 4.060899541247523e-05, + "loss": 0.357, + "step": 3676000 + }, + { + "epoch": 2.2, + "learning_rate": 4.060689544691467e-05, + "loss": 0.3531, + "step": 3676500 + }, + { + "epoch": 2.2, + "learning_rate": 4.060479548135411e-05, + "loss": 0.3564, + "step": 3677000 + }, + { + "epoch": 2.2, + "learning_rate": 4.060269551579354e-05, + "loss": 0.347, + "step": 3677500 + }, + { + "epoch": 2.21, + "learning_rate": 4.0600599750164094e-05, + "loss": 0.3516, + "step": 3678000 + }, + { + "epoch": 2.21, + "learning_rate": 4.0598499784603534e-05, + "loss": 0.3581, + "step": 3678500 + }, + { + "epoch": 2.21, + "learning_rate": 4.059639981904297e-05, + "loss": 0.3565, + "step": 3679000 + }, + { + "epoch": 2.21, + "learning_rate": 4.059429985348241e-05, + "loss": 0.3491, + "step": 3679500 + }, + { + "epoch": 2.21, + "learning_rate": 4.059219988792184e-05, + "loss": 0.3498, + "step": 3680000 + }, + { + "epoch": 2.21, + "learning_rate": 4.059009992236127e-05, + "loss": 0.3522, + "step": 3680500 + }, + { + "epoch": 2.21, + "learning_rate": 4.058800415673183e-05, + "loss": 0.3505, + "step": 3681000 + }, + { + "epoch": 2.21, + "learning_rate": 4.058590419117127e-05, + "loss": 0.3452, + "step": 3681500 + }, + { + "epoch": 2.21, + "learning_rate": 4.05838042256107e-05, + "loss": 0.3591, + "step": 3682000 + }, + { + "epoch": 2.21, + "learning_rate": 4.0581704260050135e-05, + "loss": 0.3471, + "step": 3682500 + }, + { + "epoch": 2.21, + "learning_rate": 4.057960849442069e-05, + "loss": 0.3487, + "step": 3683000 + }, + { + "epoch": 2.21, + "learning_rate": 4.057750852886013e-05, + "loss": 0.3523, + "step": 3683500 + }, + { + "epoch": 2.21, + "learning_rate": 4.057540856329956e-05, + "loss": 0.3503, + "step": 3684000 + }, + { + "epoch": 2.21, + "learning_rate": 4.0573308597738996e-05, + "loss": 0.3506, + "step": 3684500 + }, + { + "epoch": 2.21, + "learning_rate": 4.0571208632178436e-05, + "loss": 0.3538, + "step": 3685000 + }, + { + "epoch": 2.21, + "learning_rate": 4.056910866661786e-05, + "loss": 0.3449, + "step": 3685500 + }, + { + "epoch": 2.21, + "learning_rate": 4.05670087010573e-05, + "loss": 0.3455, + "step": 3686000 + }, + { + "epoch": 2.21, + "learning_rate": 4.056490873549674e-05, + "loss": 0.3469, + "step": 3686500 + }, + { + "epoch": 2.21, + "learning_rate": 4.05628129698673e-05, + "loss": 0.3514, + "step": 3687000 + }, + { + "epoch": 2.21, + "learning_rate": 4.0560713004306724e-05, + "loss": 0.3478, + "step": 3687500 + }, + { + "epoch": 2.21, + "learning_rate": 4.0558613038746164e-05, + "loss": 0.3611, + "step": 3688000 + }, + { + "epoch": 2.21, + "learning_rate": 4.05565130731856e-05, + "loss": 0.3491, + "step": 3688500 + }, + { + "epoch": 2.21, + "learning_rate": 4.055441730755616e-05, + "loss": 0.3609, + "step": 3689000 + }, + { + "epoch": 2.21, + "learning_rate": 4.055231734199559e-05, + "loss": 0.358, + "step": 3689500 + }, + { + "epoch": 2.21, + "learning_rate": 4.0550221576366145e-05, + "loss": 0.361, + "step": 3690000 + }, + { + "epoch": 2.21, + "learning_rate": 4.0548121610805585e-05, + "loss": 0.3631, + "step": 3690500 + }, + { + "epoch": 2.21, + "learning_rate": 4.054602164524502e-05, + "loss": 0.3586, + "step": 3691000 + }, + { + "epoch": 2.21, + "learning_rate": 4.054392167968445e-05, + "loss": 0.3449, + "step": 3691500 + }, + { + "epoch": 2.21, + "learning_rate": 4.054182591405501e-05, + "loss": 0.3579, + "step": 3692000 + }, + { + "epoch": 2.21, + "learning_rate": 4.0539725948494446e-05, + "loss": 0.356, + "step": 3692500 + }, + { + "epoch": 2.21, + "learning_rate": 4.053762598293388e-05, + "loss": 0.3544, + "step": 3693000 + }, + { + "epoch": 2.21, + "learning_rate": 4.053552601737332e-05, + "loss": 0.354, + "step": 3693500 + }, + { + "epoch": 2.21, + "learning_rate": 4.053342605181275e-05, + "loss": 0.3488, + "step": 3694000 + }, + { + "epoch": 2.22, + "learning_rate": 4.0531326086252186e-05, + "loss": 0.3495, + "step": 3694500 + }, + { + "epoch": 2.22, + "learning_rate": 4.052923032062274e-05, + "loss": 0.3507, + "step": 3695000 + }, + { + "epoch": 2.22, + "learning_rate": 4.052713035506218e-05, + "loss": 0.3526, + "step": 3695500 + }, + { + "epoch": 2.22, + "learning_rate": 4.0525030389501614e-05, + "loss": 0.3451, + "step": 3696000 + }, + { + "epoch": 2.22, + "learning_rate": 4.052293042394105e-05, + "loss": 0.3466, + "step": 3696500 + }, + { + "epoch": 2.22, + "learning_rate": 4.052083045838048e-05, + "loss": 0.3444, + "step": 3697000 + }, + { + "epoch": 2.22, + "learning_rate": 4.0518730492819914e-05, + "loss": 0.3536, + "step": 3697500 + }, + { + "epoch": 2.22, + "learning_rate": 4.0516634727190474e-05, + "loss": 0.3428, + "step": 3698000 + }, + { + "epoch": 2.22, + "learning_rate": 4.051453896156103e-05, + "loss": 0.3638, + "step": 3698500 + }, + { + "epoch": 2.22, + "learning_rate": 4.051243899600047e-05, + "loss": 0.3536, + "step": 3699000 + }, + { + "epoch": 2.22, + "learning_rate": 4.05103390304399e-05, + "loss": 0.3588, + "step": 3699500 + }, + { + "epoch": 2.22, + "learning_rate": 4.0508239064879335e-05, + "loss": 0.3497, + "step": 3700000 + }, + { + "epoch": 2.22, + "eval_loss": 0.34326010942459106, + "eval_runtime": 1122.4896, + "eval_samples_per_second": 469.243, + "eval_steps_per_second": 78.207, + "step": 3700000 + }, + { + "epoch": 2.22, + "learning_rate": 4.0506139099318775e-05, + "loss": 0.3543, + "step": 3700500 + }, + { + "epoch": 2.22, + "learning_rate": 4.050404333368933e-05, + "loss": 0.357, + "step": 3701000 + }, + { + "epoch": 2.22, + "learning_rate": 4.050194336812876e-05, + "loss": 0.3614, + "step": 3701500 + }, + { + "epoch": 2.22, + "learning_rate": 4.0499843402568196e-05, + "loss": 0.3606, + "step": 3702000 + }, + { + "epoch": 2.22, + "learning_rate": 4.0497743437007636e-05, + "loss": 0.3546, + "step": 3702500 + }, + { + "epoch": 2.22, + "learning_rate": 4.049564347144707e-05, + "loss": 0.356, + "step": 3703000 + }, + { + "epoch": 2.22, + "learning_rate": 4.04935435058865e-05, + "loss": 0.3575, + "step": 3703500 + }, + { + "epoch": 2.22, + "learning_rate": 4.049144354032594e-05, + "loss": 0.3593, + "step": 3704000 + }, + { + "epoch": 2.22, + "learning_rate": 4.048934357476537e-05, + "loss": 0.3489, + "step": 3704500 + }, + { + "epoch": 2.22, + "learning_rate": 4.0487243609204803e-05, + "loss": 0.348, + "step": 3705000 + }, + { + "epoch": 2.22, + "learning_rate": 4.0485147843575364e-05, + "loss": 0.3645, + "step": 3705500 + }, + { + "epoch": 2.22, + "learning_rate": 4.0483047878014804e-05, + "loss": 0.3512, + "step": 3706000 + }, + { + "epoch": 2.22, + "learning_rate": 4.048094791245424e-05, + "loss": 0.3562, + "step": 3706500 + }, + { + "epoch": 2.22, + "learning_rate": 4.047884794689367e-05, + "loss": 0.3501, + "step": 3707000 + }, + { + "epoch": 2.22, + "learning_rate": 4.0476747981333104e-05, + "loss": 0.3525, + "step": 3707500 + }, + { + "epoch": 2.22, + "learning_rate": 4.047464801577254e-05, + "loss": 0.3539, + "step": 3708000 + }, + { + "epoch": 2.22, + "learning_rate": 4.047254805021198e-05, + "loss": 0.3505, + "step": 3708500 + }, + { + "epoch": 2.22, + "learning_rate": 4.047044808465141e-05, + "loss": 0.3471, + "step": 3709000 + }, + { + "epoch": 2.22, + "learning_rate": 4.0468352319021965e-05, + "loss": 0.3583, + "step": 3709500 + }, + { + "epoch": 2.22, + "learning_rate": 4.04662523534614e-05, + "loss": 0.3458, + "step": 3710000 + }, + { + "epoch": 2.22, + "learning_rate": 4.046416078776308e-05, + "loss": 0.3577, + "step": 3710500 + }, + { + "epoch": 2.22, + "learning_rate": 4.046206082220251e-05, + "loss": 0.3483, + "step": 3711000 + }, + { + "epoch": 2.23, + "learning_rate": 4.045996085664195e-05, + "loss": 0.363, + "step": 3711500 + }, + { + "epoch": 2.23, + "learning_rate": 4.0457860891081386e-05, + "loss": 0.3572, + "step": 3712000 + }, + { + "epoch": 2.23, + "learning_rate": 4.045576092552082e-05, + "loss": 0.3602, + "step": 3712500 + }, + { + "epoch": 2.23, + "learning_rate": 4.045366515989138e-05, + "loss": 0.354, + "step": 3713000 + }, + { + "epoch": 2.23, + "learning_rate": 4.0451565194330813e-05, + "loss": 0.3478, + "step": 3713500 + }, + { + "epoch": 2.23, + "learning_rate": 4.044946522877025e-05, + "loss": 0.3576, + "step": 3714000 + }, + { + "epoch": 2.23, + "learning_rate": 4.044736526320969e-05, + "loss": 0.3481, + "step": 3714500 + }, + { + "epoch": 2.23, + "learning_rate": 4.044526529764912e-05, + "loss": 0.3448, + "step": 3715000 + }, + { + "epoch": 2.23, + "learning_rate": 4.0443165332088554e-05, + "loss": 0.3514, + "step": 3715500 + }, + { + "epoch": 2.23, + "learning_rate": 4.0441065366527994e-05, + "loss": 0.3518, + "step": 3716000 + }, + { + "epoch": 2.23, + "learning_rate": 4.043896540096742e-05, + "loss": 0.3559, + "step": 3716500 + }, + { + "epoch": 2.23, + "learning_rate": 4.0436865435406854e-05, + "loss": 0.3571, + "step": 3717000 + }, + { + "epoch": 2.23, + "learning_rate": 4.0434769669777415e-05, + "loss": 0.3621, + "step": 3717500 + }, + { + "epoch": 2.23, + "learning_rate": 4.0432669704216855e-05, + "loss": 0.3487, + "step": 3718000 + }, + { + "epoch": 2.23, + "learning_rate": 4.043056973865628e-05, + "loss": 0.3481, + "step": 3718500 + }, + { + "epoch": 2.23, + "learning_rate": 4.0428469773095715e-05, + "loss": 0.3582, + "step": 3719000 + }, + { + "epoch": 2.23, + "learning_rate": 4.0426369807535155e-05, + "loss": 0.3509, + "step": 3719500 + }, + { + "epoch": 2.23, + "learning_rate": 4.042426984197459e-05, + "loss": 0.3582, + "step": 3720000 + }, + { + "epoch": 2.23, + "learning_rate": 4.042216987641402e-05, + "loss": 0.3487, + "step": 3720500 + }, + { + "epoch": 2.23, + "learning_rate": 4.042007411078458e-05, + "loss": 0.3515, + "step": 3721000 + }, + { + "epoch": 2.23, + "learning_rate": 4.0417974145224016e-05, + "loss": 0.352, + "step": 3721500 + }, + { + "epoch": 2.23, + "learning_rate": 4.041587417966345e-05, + "loss": 0.3565, + "step": 3722000 + }, + { + "epoch": 2.23, + "learning_rate": 4.041377421410289e-05, + "loss": 0.3523, + "step": 3722500 + }, + { + "epoch": 2.23, + "learning_rate": 4.041167424854232e-05, + "loss": 0.3598, + "step": 3723000 + }, + { + "epoch": 2.23, + "learning_rate": 4.040957428298176e-05, + "loss": 0.3499, + "step": 3723500 + }, + { + "epoch": 2.23, + "learning_rate": 4.040747851735231e-05, + "loss": 0.3548, + "step": 3724000 + }, + { + "epoch": 2.23, + "learning_rate": 4.040537855179175e-05, + "loss": 0.3571, + "step": 3724500 + }, + { + "epoch": 2.23, + "learning_rate": 4.0403278586231184e-05, + "loss": 0.3506, + "step": 3725000 + }, + { + "epoch": 2.23, + "learning_rate": 4.040117862067062e-05, + "loss": 0.3606, + "step": 3725500 + }, + { + "epoch": 2.23, + "learning_rate": 4.039907865511006e-05, + "loss": 0.3451, + "step": 3726000 + }, + { + "epoch": 2.23, + "learning_rate": 4.039697868954949e-05, + "loss": 0.3587, + "step": 3726500 + }, + { + "epoch": 2.23, + "learning_rate": 4.0394878723988925e-05, + "loss": 0.3522, + "step": 3727000 + }, + { + "epoch": 2.23, + "learning_rate": 4.0392778758428365e-05, + "loss": 0.3491, + "step": 3727500 + }, + { + "epoch": 2.24, + "learning_rate": 4.039068299279892e-05, + "loss": 0.3503, + "step": 3728000 + }, + { + "epoch": 2.24, + "learning_rate": 4.038858722716947e-05, + "loss": 0.3515, + "step": 3728500 + }, + { + "epoch": 2.24, + "learning_rate": 4.038649146154003e-05, + "loss": 0.3421, + "step": 3729000 + }, + { + "epoch": 2.24, + "learning_rate": 4.0384391495979466e-05, + "loss": 0.3483, + "step": 3729500 + }, + { + "epoch": 2.24, + "learning_rate": 4.0382291530418906e-05, + "loss": 0.3469, + "step": 3730000 + }, + { + "epoch": 2.24, + "learning_rate": 4.038019156485833e-05, + "loss": 0.3584, + "step": 3730500 + }, + { + "epoch": 2.24, + "learning_rate": 4.0378091599297766e-05, + "loss": 0.347, + "step": 3731000 + }, + { + "epoch": 2.24, + "learning_rate": 4.0375991633737206e-05, + "loss": 0.3402, + "step": 3731500 + }, + { + "epoch": 2.24, + "learning_rate": 4.037389166817664e-05, + "loss": 0.3595, + "step": 3732000 + }, + { + "epoch": 2.24, + "learning_rate": 4.037179170261607e-05, + "loss": 0.35, + "step": 3732500 + }, + { + "epoch": 2.24, + "learning_rate": 4.0369691737055514e-05, + "loss": 0.3484, + "step": 3733000 + }, + { + "epoch": 2.24, + "learning_rate": 4.036759177149495e-05, + "loss": 0.3588, + "step": 3733500 + }, + { + "epoch": 2.24, + "learning_rate": 4.036549180593438e-05, + "loss": 0.336, + "step": 3734000 + }, + { + "epoch": 2.24, + "learning_rate": 4.036339184037382e-05, + "loss": 0.3491, + "step": 3734500 + }, + { + "epoch": 2.24, + "learning_rate": 4.0361296074744374e-05, + "loss": 0.3528, + "step": 3735000 + }, + { + "epoch": 2.24, + "learning_rate": 4.035919610918381e-05, + "loss": 0.3465, + "step": 3735500 + }, + { + "epoch": 2.24, + "learning_rate": 4.035709614362324e-05, + "loss": 0.3567, + "step": 3736000 + }, + { + "epoch": 2.24, + "learning_rate": 4.03550003779938e-05, + "loss": 0.3606, + "step": 3736500 + }, + { + "epoch": 2.24, + "learning_rate": 4.0352900412433235e-05, + "loss": 0.3579, + "step": 3737000 + }, + { + "epoch": 2.24, + "learning_rate": 4.035080044687267e-05, + "loss": 0.3617, + "step": 3737500 + }, + { + "epoch": 2.24, + "learning_rate": 4.034870048131211e-05, + "loss": 0.3445, + "step": 3738000 + }, + { + "epoch": 2.24, + "learning_rate": 4.034660051575154e-05, + "loss": 0.3569, + "step": 3738500 + }, + { + "epoch": 2.24, + "learning_rate": 4.0344500550190976e-05, + "loss": 0.3597, + "step": 3739000 + }, + { + "epoch": 2.24, + "learning_rate": 4.0342400584630416e-05, + "loss": 0.3633, + "step": 3739500 + }, + { + "epoch": 2.24, + "learning_rate": 4.034030061906985e-05, + "loss": 0.3489, + "step": 3740000 + }, + { + "epoch": 2.24, + "learning_rate": 4.033820905337152e-05, + "loss": 0.3524, + "step": 3740500 + }, + { + "epoch": 2.24, + "learning_rate": 4.0336109087810957e-05, + "loss": 0.3556, + "step": 3741000 + }, + { + "epoch": 2.24, + "learning_rate": 4.033400912225039e-05, + "loss": 0.3608, + "step": 3741500 + }, + { + "epoch": 2.24, + "learning_rate": 4.033190915668983e-05, + "loss": 0.3557, + "step": 3742000 + }, + { + "epoch": 2.24, + "learning_rate": 4.0329809191129264e-05, + "loss": 0.3487, + "step": 3742500 + }, + { + "epoch": 2.24, + "learning_rate": 4.032771342549982e-05, + "loss": 0.3476, + "step": 3743000 + }, + { + "epoch": 2.24, + "learning_rate": 4.032561765987038e-05, + "loss": 0.357, + "step": 3743500 + }, + { + "epoch": 2.24, + "learning_rate": 4.032352189424093e-05, + "loss": 0.3587, + "step": 3744000 + }, + { + "epoch": 2.24, + "learning_rate": 4.032142192868037e-05, + "loss": 0.3646, + "step": 3744500 + }, + { + "epoch": 2.25, + "learning_rate": 4.0319321963119805e-05, + "loss": 0.3534, + "step": 3745000 + }, + { + "epoch": 2.25, + "learning_rate": 4.031722199755924e-05, + "loss": 0.3471, + "step": 3745500 + }, + { + "epoch": 2.25, + "learning_rate": 4.031512203199868e-05, + "loss": 0.3406, + "step": 3746000 + }, + { + "epoch": 2.25, + "learning_rate": 4.031302206643811e-05, + "loss": 0.3641, + "step": 3746500 + }, + { + "epoch": 2.25, + "learning_rate": 4.0310922100877545e-05, + "loss": 0.3564, + "step": 3747000 + }, + { + "epoch": 2.25, + "learning_rate": 4.030882213531698e-05, + "loss": 0.3577, + "step": 3747500 + }, + { + "epoch": 2.25, + "learning_rate": 4.030672636968754e-05, + "loss": 0.3461, + "step": 3748000 + }, + { + "epoch": 2.25, + "learning_rate": 4.030462640412697e-05, + "loss": 0.3464, + "step": 3748500 + }, + { + "epoch": 2.25, + "learning_rate": 4.0302526438566406e-05, + "loss": 0.3483, + "step": 3749000 + }, + { + "epoch": 2.25, + "learning_rate": 4.030042647300584e-05, + "loss": 0.3426, + "step": 3749500 + }, + { + "epoch": 2.25, + "learning_rate": 4.029832650744527e-05, + "loss": 0.3551, + "step": 3750000 + }, + { + "epoch": 2.25, + "learning_rate": 4.029622654188471e-05, + "loss": 0.3487, + "step": 3750500 + }, + { + "epoch": 2.25, + "learning_rate": 4.029412657632415e-05, + "loss": 0.3459, + "step": 3751000 + }, + { + "epoch": 2.25, + "learning_rate": 4.029202661076358e-05, + "loss": 0.3592, + "step": 3751500 + }, + { + "epoch": 2.25, + "learning_rate": 4.0289930845134134e-05, + "loss": 0.3485, + "step": 3752000 + }, + { + "epoch": 2.25, + "learning_rate": 4.0287830879573574e-05, + "loss": 0.3608, + "step": 3752500 + }, + { + "epoch": 2.25, + "learning_rate": 4.028573091401301e-05, + "loss": 0.3625, + "step": 3753000 + }, + { + "epoch": 2.25, + "learning_rate": 4.028363514838357e-05, + "loss": 0.348, + "step": 3753500 + }, + { + "epoch": 2.25, + "learning_rate": 4.0281535182823e-05, + "loss": 0.3437, + "step": 3754000 + }, + { + "epoch": 2.25, + "learning_rate": 4.0279435217262435e-05, + "loss": 0.3558, + "step": 3754500 + }, + { + "epoch": 2.25, + "learning_rate": 4.027733525170187e-05, + "loss": 0.3595, + "step": 3755000 + }, + { + "epoch": 2.25, + "learning_rate": 4.02752352861413e-05, + "loss": 0.3471, + "step": 3755500 + }, + { + "epoch": 2.25, + "learning_rate": 4.027313532058074e-05, + "loss": 0.3435, + "step": 3756000 + }, + { + "epoch": 2.25, + "learning_rate": 4.0271035355020175e-05, + "loss": 0.3395, + "step": 3756500 + }, + { + "epoch": 2.25, + "learning_rate": 4.0268935389459616e-05, + "loss": 0.3606, + "step": 3757000 + }, + { + "epoch": 2.25, + "learning_rate": 4.026683542389905e-05, + "loss": 0.3551, + "step": 3757500 + }, + { + "epoch": 2.25, + "learning_rate": 4.026473545833848e-05, + "loss": 0.3579, + "step": 3758000 + }, + { + "epoch": 2.25, + "learning_rate": 4.026263549277792e-05, + "loss": 0.3619, + "step": 3758500 + }, + { + "epoch": 2.25, + "learning_rate": 4.0260539727148476e-05, + "loss": 0.3611, + "step": 3759000 + }, + { + "epoch": 2.25, + "learning_rate": 4.025843976158791e-05, + "loss": 0.3477, + "step": 3759500 + }, + { + "epoch": 2.25, + "learning_rate": 4.025633979602734e-05, + "loss": 0.3534, + "step": 3760000 + }, + { + "epoch": 2.25, + "learning_rate": 4.0254239830466784e-05, + "loss": 0.3454, + "step": 3760500 + }, + { + "epoch": 2.25, + "learning_rate": 4.025214406483734e-05, + "loss": 0.3559, + "step": 3761000 + }, + { + "epoch": 2.26, + "learning_rate": 4.025004409927677e-05, + "loss": 0.3506, + "step": 3761500 + }, + { + "epoch": 2.26, + "learning_rate": 4.0247944133716204e-05, + "loss": 0.3463, + "step": 3762000 + }, + { + "epoch": 2.26, + "learning_rate": 4.0245844168155644e-05, + "loss": 0.3505, + "step": 3762500 + }, + { + "epoch": 2.26, + "learning_rate": 4.024374420259508e-05, + "loss": 0.349, + "step": 3763000 + }, + { + "epoch": 2.26, + "learning_rate": 4.024164423703451e-05, + "loss": 0.3578, + "step": 3763500 + }, + { + "epoch": 2.26, + "learning_rate": 4.023954427147395e-05, + "loss": 0.3589, + "step": 3764000 + }, + { + "epoch": 2.26, + "learning_rate": 4.023744430591338e-05, + "loss": 0.3456, + "step": 3764500 + }, + { + "epoch": 2.26, + "learning_rate": 4.023534434035282e-05, + "loss": 0.3535, + "step": 3765000 + }, + { + "epoch": 2.26, + "learning_rate": 4.023324857472338e-05, + "loss": 0.3462, + "step": 3765500 + }, + { + "epoch": 2.26, + "learning_rate": 4.023114860916281e-05, + "loss": 0.3477, + "step": 3766000 + }, + { + "epoch": 2.26, + "learning_rate": 4.0229048643602246e-05, + "loss": 0.3472, + "step": 3766500 + }, + { + "epoch": 2.26, + "learning_rate": 4.022694867804168e-05, + "loss": 0.3583, + "step": 3767000 + }, + { + "epoch": 2.26, + "learning_rate": 4.022485291241224e-05, + "loss": 0.3518, + "step": 3767500 + }, + { + "epoch": 2.26, + "learning_rate": 4.022275294685167e-05, + "loss": 0.3517, + "step": 3768000 + }, + { + "epoch": 2.26, + "learning_rate": 4.0220657181222226e-05, + "loss": 0.3558, + "step": 3768500 + }, + { + "epoch": 2.26, + "learning_rate": 4.021855721566166e-05, + "loss": 0.3488, + "step": 3769000 + }, + { + "epoch": 2.26, + "learning_rate": 4.02164572501011e-05, + "loss": 0.3535, + "step": 3769500 + }, + { + "epoch": 2.26, + "learning_rate": 4.0214357284540534e-05, + "loss": 0.3552, + "step": 3770000 + }, + { + "epoch": 2.26, + "learning_rate": 4.021226151891109e-05, + "loss": 0.3503, + "step": 3770500 + }, + { + "epoch": 2.26, + "learning_rate": 4.021016155335053e-05, + "loss": 0.3528, + "step": 3771000 + }, + { + "epoch": 2.26, + "learning_rate": 4.020806158778996e-05, + "loss": 0.3614, + "step": 3771500 + }, + { + "epoch": 2.26, + "learning_rate": 4.0205961622229394e-05, + "loss": 0.3461, + "step": 3772000 + }, + { + "epoch": 2.26, + "learning_rate": 4.0203861656668835e-05, + "loss": 0.3587, + "step": 3772500 + }, + { + "epoch": 2.26, + "learning_rate": 4.020176169110827e-05, + "loss": 0.3497, + "step": 3773000 + }, + { + "epoch": 2.26, + "learning_rate": 4.01996617255477e-05, + "loss": 0.3485, + "step": 3773500 + }, + { + "epoch": 2.26, + "learning_rate": 4.019756175998714e-05, + "loss": 0.3533, + "step": 3774000 + }, + { + "epoch": 2.26, + "learning_rate": 4.019546179442657e-05, + "loss": 0.3638, + "step": 3774500 + }, + { + "epoch": 2.26, + "learning_rate": 4.0193361828866e-05, + "loss": 0.3475, + "step": 3775000 + }, + { + "epoch": 2.26, + "learning_rate": 4.019126186330544e-05, + "loss": 0.3449, + "step": 3775500 + }, + { + "epoch": 2.26, + "learning_rate": 4.0189161897744876e-05, + "loss": 0.3469, + "step": 3776000 + }, + { + "epoch": 2.26, + "learning_rate": 4.018706613211543e-05, + "loss": 0.3544, + "step": 3776500 + }, + { + "epoch": 2.26, + "learning_rate": 4.018496616655486e-05, + "loss": 0.3482, + "step": 3777000 + }, + { + "epoch": 2.26, + "learning_rate": 4.01828662009943e-05, + "loss": 0.343, + "step": 3777500 + }, + { + "epoch": 2.27, + "learning_rate": 4.0180766235433736e-05, + "loss": 0.3583, + "step": 3778000 + }, + { + "epoch": 2.27, + "learning_rate": 4.017866626987317e-05, + "loss": 0.3552, + "step": 3778500 + }, + { + "epoch": 2.27, + "learning_rate": 4.017656630431261e-05, + "loss": 0.3409, + "step": 3779000 + }, + { + "epoch": 2.27, + "learning_rate": 4.0174466338752043e-05, + "loss": 0.3525, + "step": 3779500 + }, + { + "epoch": 2.27, + "learning_rate": 4.017236637319148e-05, + "loss": 0.3529, + "step": 3780000 + }, + { + "epoch": 2.27, + "learning_rate": 4.017027060756204e-05, + "loss": 0.3525, + "step": 3780500 + }, + { + "epoch": 2.27, + "learning_rate": 4.016817064200147e-05, + "loss": 0.3479, + "step": 3781000 + }, + { + "epoch": 2.27, + "learning_rate": 4.0166070676440904e-05, + "loss": 0.3521, + "step": 3781500 + }, + { + "epoch": 2.27, + "learning_rate": 4.0163970710880344e-05, + "loss": 0.3527, + "step": 3782000 + }, + { + "epoch": 2.27, + "learning_rate": 4.016187074531978e-05, + "loss": 0.3469, + "step": 3782500 + }, + { + "epoch": 2.27, + "learning_rate": 4.015977077975921e-05, + "loss": 0.3504, + "step": 3783000 + }, + { + "epoch": 2.27, + "learning_rate": 4.015767081419865e-05, + "loss": 0.3428, + "step": 3783500 + }, + { + "epoch": 2.27, + "learning_rate": 4.0155570848638085e-05, + "loss": 0.3448, + "step": 3784000 + }, + { + "epoch": 2.27, + "learning_rate": 4.015347508300864e-05, + "loss": 0.3496, + "step": 3784500 + }, + { + "epoch": 2.27, + "learning_rate": 4.015137511744807e-05, + "loss": 0.351, + "step": 3785000 + }, + { + "epoch": 2.27, + "learning_rate": 4.014927515188751e-05, + "loss": 0.3429, + "step": 3785500 + }, + { + "epoch": 2.27, + "learning_rate": 4.0147175186326946e-05, + "loss": 0.3482, + "step": 3786000 + }, + { + "epoch": 2.27, + "learning_rate": 4.01450794206975e-05, + "loss": 0.3498, + "step": 3786500 + }, + { + "epoch": 2.27, + "learning_rate": 4.014297945513693e-05, + "loss": 0.3485, + "step": 3787000 + }, + { + "epoch": 2.27, + "learning_rate": 4.014087948957637e-05, + "loss": 0.3557, + "step": 3787500 + }, + { + "epoch": 2.27, + "learning_rate": 4.0138779524015806e-05, + "loss": 0.3641, + "step": 3788000 + }, + { + "epoch": 2.27, + "learning_rate": 4.013668375838636e-05, + "loss": 0.353, + "step": 3788500 + }, + { + "epoch": 2.27, + "learning_rate": 4.01345837928258e-05, + "loss": 0.3564, + "step": 3789000 + }, + { + "epoch": 2.27, + "learning_rate": 4.0132483827265234e-05, + "loss": 0.3448, + "step": 3789500 + }, + { + "epoch": 2.27, + "learning_rate": 4.013038386170467e-05, + "loss": 0.3459, + "step": 3790000 + }, + { + "epoch": 2.27, + "learning_rate": 4.012828809607522e-05, + "loss": 0.3492, + "step": 3790500 + }, + { + "epoch": 2.27, + "learning_rate": 4.012618813051466e-05, + "loss": 0.3563, + "step": 3791000 + }, + { + "epoch": 2.27, + "learning_rate": 4.0124088164954094e-05, + "loss": 0.362, + "step": 3791500 + }, + { + "epoch": 2.27, + "learning_rate": 4.012199239932465e-05, + "loss": 0.3567, + "step": 3792000 + }, + { + "epoch": 2.27, + "learning_rate": 4.011989243376408e-05, + "loss": 0.3553, + "step": 3792500 + }, + { + "epoch": 2.27, + "learning_rate": 4.011779246820352e-05, + "loss": 0.353, + "step": 3793000 + }, + { + "epoch": 2.27, + "learning_rate": 4.0115692502642955e-05, + "loss": 0.3572, + "step": 3793500 + }, + { + "epoch": 2.27, + "learning_rate": 4.011359253708239e-05, + "loss": 0.35, + "step": 3794000 + }, + { + "epoch": 2.27, + "learning_rate": 4.011149257152183e-05, + "loss": 0.3468, + "step": 3794500 + }, + { + "epoch": 2.28, + "learning_rate": 4.010939260596126e-05, + "loss": 0.35, + "step": 3795000 + }, + { + "epoch": 2.28, + "learning_rate": 4.0107292640400696e-05, + "loss": 0.3552, + "step": 3795500 + }, + { + "epoch": 2.28, + "learning_rate": 4.0105192674840136e-05, + "loss": 0.3521, + "step": 3796000 + }, + { + "epoch": 2.28, + "learning_rate": 4.010309270927956e-05, + "loss": 0.3547, + "step": 3796500 + }, + { + "epoch": 2.28, + "learning_rate": 4.010099694365012e-05, + "loss": 0.3596, + "step": 3797000 + }, + { + "epoch": 2.28, + "learning_rate": 4.009889697808956e-05, + "loss": 0.3541, + "step": 3797500 + }, + { + "epoch": 2.28, + "learning_rate": 4.0096797012529e-05, + "loss": 0.351, + "step": 3798000 + }, + { + "epoch": 2.28, + "learning_rate": 4.009469704696843e-05, + "loss": 0.3438, + "step": 3798500 + }, + { + "epoch": 2.28, + "learning_rate": 4.0092601281338984e-05, + "loss": 0.3554, + "step": 3799000 + }, + { + "epoch": 2.28, + "learning_rate": 4.0090501315778424e-05, + "loss": 0.3512, + "step": 3799500 + }, + { + "epoch": 2.28, + "learning_rate": 4.008840135021786e-05, + "loss": 0.3533, + "step": 3800000 + }, + { + "epoch": 2.28, + "eval_loss": 0.343013197183609, + "eval_runtime": 1123.1851, + "eval_samples_per_second": 468.952, + "eval_steps_per_second": 78.159, + "step": 3800000 + }, + { + "epoch": 2.28, + "learning_rate": 4.008630138465729e-05, + "loss": 0.3532, + "step": 3800500 + }, + { + "epoch": 2.28, + "learning_rate": 4.0084201419096724e-05, + "loss": 0.3649, + "step": 3801000 + }, + { + "epoch": 2.28, + "learning_rate": 4.008210145353616e-05, + "loss": 0.3557, + "step": 3801500 + }, + { + "epoch": 2.28, + "learning_rate": 4.008000568790672e-05, + "loss": 0.3606, + "step": 3802000 + }, + { + "epoch": 2.28, + "learning_rate": 4.007790572234615e-05, + "loss": 0.3594, + "step": 3802500 + }, + { + "epoch": 2.28, + "learning_rate": 4.007580575678559e-05, + "loss": 0.3547, + "step": 3803000 + }, + { + "epoch": 2.28, + "learning_rate": 4.007370579122502e-05, + "loss": 0.3554, + "step": 3803500 + }, + { + "epoch": 2.28, + "learning_rate": 4.007161002559558e-05, + "loss": 0.3507, + "step": 3804000 + }, + { + "epoch": 2.28, + "learning_rate": 4.006951006003502e-05, + "loss": 0.36, + "step": 3804500 + }, + { + "epoch": 2.28, + "learning_rate": 4.006741009447445e-05, + "loss": 0.345, + "step": 3805000 + }, + { + "epoch": 2.28, + "learning_rate": 4.0065310128913886e-05, + "loss": 0.3546, + "step": 3805500 + }, + { + "epoch": 2.28, + "learning_rate": 4.006321436328444e-05, + "loss": 0.3564, + "step": 3806000 + }, + { + "epoch": 2.28, + "learning_rate": 4.006111439772388e-05, + "loss": 0.3572, + "step": 3806500 + }, + { + "epoch": 2.28, + "learning_rate": 4.005901443216331e-05, + "loss": 0.3526, + "step": 3807000 + }, + { + "epoch": 2.28, + "learning_rate": 4.005691446660275e-05, + "loss": 0.3514, + "step": 3807500 + }, + { + "epoch": 2.28, + "learning_rate": 4.005481450104219e-05, + "loss": 0.348, + "step": 3808000 + }, + { + "epoch": 2.28, + "learning_rate": 4.005271873541274e-05, + "loss": 0.3542, + "step": 3808500 + }, + { + "epoch": 2.28, + "learning_rate": 4.0050618769852174e-05, + "loss": 0.3566, + "step": 3809000 + }, + { + "epoch": 2.28, + "learning_rate": 4.0048518804291614e-05, + "loss": 0.3577, + "step": 3809500 + }, + { + "epoch": 2.28, + "learning_rate": 4.004641883873105e-05, + "loss": 0.3479, + "step": 3810000 + }, + { + "epoch": 2.28, + "learning_rate": 4.004431887317048e-05, + "loss": 0.3537, + "step": 3810500 + }, + { + "epoch": 2.28, + "learning_rate": 4.0042223107541035e-05, + "loss": 0.3477, + "step": 3811000 + }, + { + "epoch": 2.29, + "learning_rate": 4.0040123141980475e-05, + "loss": 0.3538, + "step": 3811500 + }, + { + "epoch": 2.29, + "learning_rate": 4.003802317641991e-05, + "loss": 0.3519, + "step": 3812000 + }, + { + "epoch": 2.29, + "learning_rate": 4.003592321085934e-05, + "loss": 0.3544, + "step": 3812500 + }, + { + "epoch": 2.29, + "learning_rate": 4.0033823245298775e-05, + "loss": 0.3534, + "step": 3813000 + }, + { + "epoch": 2.29, + "learning_rate": 4.003172327973821e-05, + "loss": 0.3513, + "step": 3813500 + }, + { + "epoch": 2.29, + "learning_rate": 4.002962331417764e-05, + "loss": 0.3424, + "step": 3814000 + }, + { + "epoch": 2.29, + "learning_rate": 4.002752334861708e-05, + "loss": 0.3561, + "step": 3814500 + }, + { + "epoch": 2.29, + "learning_rate": 4.0025431782918756e-05, + "loss": 0.3433, + "step": 3815000 + }, + { + "epoch": 2.29, + "learning_rate": 4.0023331817358197e-05, + "loss": 0.3484, + "step": 3815500 + }, + { + "epoch": 2.29, + "learning_rate": 4.002123185179763e-05, + "loss": 0.3546, + "step": 3816000 + }, + { + "epoch": 2.29, + "learning_rate": 4.0019136086168184e-05, + "loss": 0.3574, + "step": 3816500 + }, + { + "epoch": 2.29, + "learning_rate": 4.0017036120607624e-05, + "loss": 0.3535, + "step": 3817000 + }, + { + "epoch": 2.29, + "learning_rate": 4.001493615504706e-05, + "loss": 0.3603, + "step": 3817500 + }, + { + "epoch": 2.29, + "learning_rate": 4.001283618948649e-05, + "loss": 0.3481, + "step": 3818000 + }, + { + "epoch": 2.29, + "learning_rate": 4.001073622392593e-05, + "loss": 0.3541, + "step": 3818500 + }, + { + "epoch": 2.29, + "learning_rate": 4.0008636258365364e-05, + "loss": 0.3504, + "step": 3819000 + }, + { + "epoch": 2.29, + "learning_rate": 4.00065362928048e-05, + "loss": 0.3459, + "step": 3819500 + }, + { + "epoch": 2.29, + "learning_rate": 4.000443632724424e-05, + "loss": 0.3478, + "step": 3820000 + }, + { + "epoch": 2.29, + "learning_rate": 4.0002336361683665e-05, + "loss": 0.3537, + "step": 3820500 + }, + { + "epoch": 2.29, + "learning_rate": 4.00002363961231e-05, + "loss": 0.3479, + "step": 3821000 + }, + { + "epoch": 2.29, + "learning_rate": 3.999813643056254e-05, + "loss": 0.3533, + "step": 3821500 + }, + { + "epoch": 2.29, + "learning_rate": 3.999603646500197e-05, + "loss": 0.3533, + "step": 3822000 + }, + { + "epoch": 2.29, + "learning_rate": 3.9993936499441405e-05, + "loss": 0.3491, + "step": 3822500 + }, + { + "epoch": 2.29, + "learning_rate": 3.9991840733811966e-05, + "loss": 0.3528, + "step": 3823000 + }, + { + "epoch": 2.29, + "learning_rate": 3.99897407682514e-05, + "loss": 0.3425, + "step": 3823500 + }, + { + "epoch": 2.29, + "learning_rate": 3.998764080269083e-05, + "loss": 0.3516, + "step": 3824000 + }, + { + "epoch": 2.29, + "learning_rate": 3.998554083713027e-05, + "loss": 0.3548, + "step": 3824500 + }, + { + "epoch": 2.29, + "learning_rate": 3.9983445071500826e-05, + "loss": 0.3478, + "step": 3825000 + }, + { + "epoch": 2.29, + "learning_rate": 3.998134510594026e-05, + "loss": 0.3603, + "step": 3825500 + }, + { + "epoch": 2.29, + "learning_rate": 3.9979245140379693e-05, + "loss": 0.355, + "step": 3826000 + }, + { + "epoch": 2.29, + "learning_rate": 3.9977145174819134e-05, + "loss": 0.3509, + "step": 3826500 + }, + { + "epoch": 2.29, + "learning_rate": 3.997505360912081e-05, + "loss": 0.3489, + "step": 3827000 + }, + { + "epoch": 2.29, + "learning_rate": 3.997295364356025e-05, + "loss": 0.3453, + "step": 3827500 + }, + { + "epoch": 2.3, + "learning_rate": 3.99708578779308e-05, + "loss": 0.3486, + "step": 3828000 + }, + { + "epoch": 2.3, + "learning_rate": 3.9968757912370235e-05, + "loss": 0.3484, + "step": 3828500 + }, + { + "epoch": 2.3, + "learning_rate": 3.9966657946809675e-05, + "loss": 0.348, + "step": 3829000 + }, + { + "epoch": 2.3, + "learning_rate": 3.996455798124911e-05, + "loss": 0.344, + "step": 3829500 + }, + { + "epoch": 2.3, + "learning_rate": 3.996245801568854e-05, + "loss": 0.3557, + "step": 3830000 + }, + { + "epoch": 2.3, + "learning_rate": 3.996035805012798e-05, + "loss": 0.347, + "step": 3830500 + }, + { + "epoch": 2.3, + "learning_rate": 3.9958258084567415e-05, + "loss": 0.3534, + "step": 3831000 + }, + { + "epoch": 2.3, + "learning_rate": 3.995616231893797e-05, + "loss": 0.3509, + "step": 3831500 + }, + { + "epoch": 2.3, + "learning_rate": 3.99540623533774e-05, + "loss": 0.3502, + "step": 3832000 + }, + { + "epoch": 2.3, + "learning_rate": 3.995196238781684e-05, + "loss": 0.3657, + "step": 3832500 + }, + { + "epoch": 2.3, + "learning_rate": 3.9949862422256276e-05, + "loss": 0.3452, + "step": 3833000 + }, + { + "epoch": 2.3, + "learning_rate": 3.994776245669571e-05, + "loss": 0.3419, + "step": 3833500 + }, + { + "epoch": 2.3, + "learning_rate": 3.994566249113515e-05, + "loss": 0.3616, + "step": 3834000 + }, + { + "epoch": 2.3, + "learning_rate": 3.9943562525574577e-05, + "loss": 0.3451, + "step": 3834500 + }, + { + "epoch": 2.3, + "learning_rate": 3.994146256001401e-05, + "loss": 0.3535, + "step": 3835000 + }, + { + "epoch": 2.3, + "learning_rate": 3.993936259445345e-05, + "loss": 0.3444, + "step": 3835500 + }, + { + "epoch": 2.3, + "learning_rate": 3.9937262628892884e-05, + "loss": 0.3414, + "step": 3836000 + }, + { + "epoch": 2.3, + "learning_rate": 3.993516266333232e-05, + "loss": 0.354, + "step": 3836500 + }, + { + "epoch": 2.3, + "learning_rate": 3.993306689770288e-05, + "loss": 0.3508, + "step": 3837000 + }, + { + "epoch": 2.3, + "learning_rate": 3.993096693214231e-05, + "loss": 0.3522, + "step": 3837500 + }, + { + "epoch": 2.3, + "learning_rate": 3.9928866966581744e-05, + "loss": 0.3559, + "step": 3838000 + }, + { + "epoch": 2.3, + "learning_rate": 3.9926767001021185e-05, + "loss": 0.3565, + "step": 3838500 + }, + { + "epoch": 2.3, + "learning_rate": 3.992466703546062e-05, + "loss": 0.3503, + "step": 3839000 + }, + { + "epoch": 2.3, + "learning_rate": 3.992256706990005e-05, + "loss": 0.3566, + "step": 3839500 + }, + { + "epoch": 2.3, + "learning_rate": 3.992046710433949e-05, + "loss": 0.3536, + "step": 3840000 + }, + { + "epoch": 2.3, + "learning_rate": 3.9918367138778925e-05, + "loss": 0.3485, + "step": 3840500 + }, + { + "epoch": 2.3, + "learning_rate": 3.991627137314948e-05, + "loss": 0.3461, + "step": 3841000 + }, + { + "epoch": 2.3, + "learning_rate": 3.991417140758891e-05, + "loss": 0.3542, + "step": 3841500 + }, + { + "epoch": 2.3, + "learning_rate": 3.9912075641959466e-05, + "loss": 0.3532, + "step": 3842000 + }, + { + "epoch": 2.3, + "learning_rate": 3.9909975676398906e-05, + "loss": 0.3464, + "step": 3842500 + }, + { + "epoch": 2.3, + "learning_rate": 3.990787571083834e-05, + "loss": 0.3538, + "step": 3843000 + }, + { + "epoch": 2.3, + "learning_rate": 3.990577574527777e-05, + "loss": 0.3547, + "step": 3843500 + }, + { + "epoch": 2.3, + "learning_rate": 3.990367577971721e-05, + "loss": 0.3596, + "step": 3844000 + }, + { + "epoch": 2.3, + "learning_rate": 3.990158001408777e-05, + "loss": 0.3554, + "step": 3844500 + }, + { + "epoch": 2.31, + "learning_rate": 3.98994800485272e-05, + "loss": 0.3443, + "step": 3845000 + }, + { + "epoch": 2.31, + "learning_rate": 3.989738008296664e-05, + "loss": 0.3387, + "step": 3845500 + }, + { + "epoch": 2.31, + "learning_rate": 3.9895280117406074e-05, + "loss": 0.3406, + "step": 3846000 + }, + { + "epoch": 2.31, + "learning_rate": 3.989318015184551e-05, + "loss": 0.3449, + "step": 3846500 + }, + { + "epoch": 2.31, + "learning_rate": 3.989108438621606e-05, + "loss": 0.3511, + "step": 3847000 + }, + { + "epoch": 2.31, + "learning_rate": 3.98889844206555e-05, + "loss": 0.3519, + "step": 3847500 + }, + { + "epoch": 2.31, + "learning_rate": 3.9886884455094935e-05, + "loss": 0.3542, + "step": 3848000 + }, + { + "epoch": 2.31, + "learning_rate": 3.988478448953437e-05, + "loss": 0.3579, + "step": 3848500 + }, + { + "epoch": 2.31, + "learning_rate": 3.988268452397381e-05, + "loss": 0.3459, + "step": 3849000 + }, + { + "epoch": 2.31, + "learning_rate": 3.988058875834436e-05, + "loss": 0.3642, + "step": 3849500 + }, + { + "epoch": 2.31, + "learning_rate": 3.9878488792783795e-05, + "loss": 0.3511, + "step": 3850000 + }, + { + "epoch": 2.31, + "learning_rate": 3.987638882722323e-05, + "loss": 0.3496, + "step": 3850500 + }, + { + "epoch": 2.31, + "learning_rate": 3.987428886166267e-05, + "loss": 0.3539, + "step": 3851000 + }, + { + "epoch": 2.31, + "learning_rate": 3.98721888961021e-05, + "loss": 0.3527, + "step": 3851500 + }, + { + "epoch": 2.31, + "learning_rate": 3.9870093130472656e-05, + "loss": 0.3457, + "step": 3852000 + }, + { + "epoch": 2.31, + "learning_rate": 3.9867993164912096e-05, + "loss": 0.3436, + "step": 3852500 + }, + { + "epoch": 2.31, + "learning_rate": 3.986589319935153e-05, + "loss": 0.35, + "step": 3853000 + }, + { + "epoch": 2.31, + "learning_rate": 3.986379323379096e-05, + "loss": 0.3437, + "step": 3853500 + }, + { + "epoch": 2.31, + "learning_rate": 3.986169746816152e-05, + "loss": 0.3514, + "step": 3854000 + }, + { + "epoch": 2.31, + "learning_rate": 3.985959750260096e-05, + "loss": 0.352, + "step": 3854500 + }, + { + "epoch": 2.31, + "learning_rate": 3.985749753704039e-05, + "loss": 0.3473, + "step": 3855000 + }, + { + "epoch": 2.31, + "learning_rate": 3.9855397571479824e-05, + "loss": 0.3453, + "step": 3855500 + }, + { + "epoch": 2.31, + "learning_rate": 3.9853297605919264e-05, + "loss": 0.3482, + "step": 3856000 + }, + { + "epoch": 2.31, + "learning_rate": 3.98511976403587e-05, + "loss": 0.3567, + "step": 3856500 + }, + { + "epoch": 2.31, + "learning_rate": 3.984910187472925e-05, + "loss": 0.3453, + "step": 3857000 + }, + { + "epoch": 2.31, + "learning_rate": 3.9847001909168685e-05, + "loss": 0.3527, + "step": 3857500 + }, + { + "epoch": 2.31, + "learning_rate": 3.9844901943608125e-05, + "loss": 0.3503, + "step": 3858000 + }, + { + "epoch": 2.31, + "learning_rate": 3.984280197804756e-05, + "loss": 0.3585, + "step": 3858500 + }, + { + "epoch": 2.31, + "learning_rate": 3.984070201248699e-05, + "loss": 0.3449, + "step": 3859000 + }, + { + "epoch": 2.31, + "learning_rate": 3.983860204692643e-05, + "loss": 0.3476, + "step": 3859500 + }, + { + "epoch": 2.31, + "learning_rate": 3.9836502081365866e-05, + "loss": 0.356, + "step": 3860000 + }, + { + "epoch": 2.31, + "learning_rate": 3.98344021158053e-05, + "loss": 0.3563, + "step": 3860500 + }, + { + "epoch": 2.31, + "learning_rate": 3.983230635017586e-05, + "loss": 0.3518, + "step": 3861000 + }, + { + "epoch": 2.32, + "learning_rate": 3.983020638461529e-05, + "loss": 0.3536, + "step": 3861500 + }, + { + "epoch": 2.32, + "learning_rate": 3.9828106419054726e-05, + "loss": 0.342, + "step": 3862000 + }, + { + "epoch": 2.32, + "learning_rate": 3.9826006453494167e-05, + "loss": 0.3498, + "step": 3862500 + }, + { + "epoch": 2.32, + "learning_rate": 3.982391068786472e-05, + "loss": 0.3559, + "step": 3863000 + }, + { + "epoch": 2.32, + "learning_rate": 3.9821814922235274e-05, + "loss": 0.3406, + "step": 3863500 + }, + { + "epoch": 2.32, + "learning_rate": 3.981971495667471e-05, + "loss": 0.3539, + "step": 3864000 + }, + { + "epoch": 2.32, + "learning_rate": 3.981761499111414e-05, + "loss": 0.3463, + "step": 3864500 + }, + { + "epoch": 2.32, + "learning_rate": 3.981551502555358e-05, + "loss": 0.3507, + "step": 3865000 + }, + { + "epoch": 2.32, + "learning_rate": 3.9813415059993014e-05, + "loss": 0.3422, + "step": 3865500 + }, + { + "epoch": 2.32, + "learning_rate": 3.981131509443245e-05, + "loss": 0.3538, + "step": 3866000 + }, + { + "epoch": 2.32, + "learning_rate": 3.980921932880301e-05, + "loss": 0.3524, + "step": 3866500 + }, + { + "epoch": 2.32, + "learning_rate": 3.980711936324244e-05, + "loss": 0.3532, + "step": 3867000 + }, + { + "epoch": 2.32, + "learning_rate": 3.9805019397681875e-05, + "loss": 0.3557, + "step": 3867500 + }, + { + "epoch": 2.32, + "learning_rate": 3.9802919432121315e-05, + "loss": 0.351, + "step": 3868000 + }, + { + "epoch": 2.32, + "learning_rate": 3.980081946656075e-05, + "loss": 0.3495, + "step": 3868500 + }, + { + "epoch": 2.32, + "learning_rate": 3.979871950100018e-05, + "loss": 0.3436, + "step": 3869000 + }, + { + "epoch": 2.32, + "learning_rate": 3.979661953543962e-05, + "loss": 0.3451, + "step": 3869500 + }, + { + "epoch": 2.32, + "learning_rate": 3.9794519569879056e-05, + "loss": 0.3499, + "step": 3870000 + }, + { + "epoch": 2.32, + "learning_rate": 3.979242380424961e-05, + "loss": 0.3598, + "step": 3870500 + }, + { + "epoch": 2.32, + "learning_rate": 3.979032383868904e-05, + "loss": 0.3512, + "step": 3871000 + }, + { + "epoch": 2.32, + "learning_rate": 3.978822387312848e-05, + "loss": 0.3548, + "step": 3871500 + }, + { + "epoch": 2.32, + "learning_rate": 3.978612390756792e-05, + "loss": 0.3408, + "step": 3872000 + }, + { + "epoch": 2.32, + "learning_rate": 3.978402394200735e-05, + "loss": 0.3399, + "step": 3872500 + }, + { + "epoch": 2.32, + "learning_rate": 3.978192397644679e-05, + "loss": 0.3651, + "step": 3873000 + }, + { + "epoch": 2.32, + "learning_rate": 3.9779828210817344e-05, + "loss": 0.3613, + "step": 3873500 + }, + { + "epoch": 2.32, + "learning_rate": 3.977772824525678e-05, + "loss": 0.344, + "step": 3874000 + }, + { + "epoch": 2.32, + "learning_rate": 3.977562827969622e-05, + "loss": 0.356, + "step": 3874500 + }, + { + "epoch": 2.32, + "learning_rate": 3.977352831413565e-05, + "loss": 0.3515, + "step": 3875000 + }, + { + "epoch": 2.32, + "learning_rate": 3.9771428348575085e-05, + "loss": 0.3458, + "step": 3875500 + }, + { + "epoch": 2.32, + "learning_rate": 3.976932838301452e-05, + "loss": 0.3525, + "step": 3876000 + }, + { + "epoch": 2.32, + "learning_rate": 3.976722841745395e-05, + "loss": 0.34, + "step": 3876500 + }, + { + "epoch": 2.32, + "learning_rate": 3.9765128451893385e-05, + "loss": 0.344, + "step": 3877000 + }, + { + "epoch": 2.32, + "learning_rate": 3.9763032686263945e-05, + "loss": 0.3519, + "step": 3877500 + }, + { + "epoch": 2.33, + "learning_rate": 3.976093272070338e-05, + "loss": 0.351, + "step": 3878000 + }, + { + "epoch": 2.33, + "learning_rate": 3.975883275514281e-05, + "loss": 0.3521, + "step": 3878500 + }, + { + "epoch": 2.33, + "learning_rate": 3.9756732789582246e-05, + "loss": 0.3453, + "step": 3879000 + }, + { + "epoch": 2.33, + "learning_rate": 3.9754641223883926e-05, + "loss": 0.3461, + "step": 3879500 + }, + { + "epoch": 2.33, + "learning_rate": 3.975254125832336e-05, + "loss": 0.3556, + "step": 3880000 + }, + { + "epoch": 2.33, + "learning_rate": 3.97504412927628e-05, + "loss": 0.3558, + "step": 3880500 + }, + { + "epoch": 2.33, + "learning_rate": 3.974834132720223e-05, + "loss": 0.3527, + "step": 3881000 + }, + { + "epoch": 2.33, + "learning_rate": 3.9746241361641674e-05, + "loss": 0.3517, + "step": 3881500 + }, + { + "epoch": 2.33, + "learning_rate": 3.974414139608111e-05, + "loss": 0.3579, + "step": 3882000 + }, + { + "epoch": 2.33, + "learning_rate": 3.974204563045166e-05, + "loss": 0.3572, + "step": 3882500 + }, + { + "epoch": 2.33, + "learning_rate": 3.9739945664891094e-05, + "loss": 0.3539, + "step": 3883000 + }, + { + "epoch": 2.33, + "learning_rate": 3.9737845699330534e-05, + "loss": 0.3415, + "step": 3883500 + }, + { + "epoch": 2.33, + "learning_rate": 3.973574993370109e-05, + "loss": 0.3637, + "step": 3884000 + }, + { + "epoch": 2.33, + "learning_rate": 3.973364996814052e-05, + "loss": 0.3571, + "step": 3884500 + }, + { + "epoch": 2.33, + "learning_rate": 3.9731550002579955e-05, + "loss": 0.3501, + "step": 3885000 + }, + { + "epoch": 2.33, + "learning_rate": 3.9729450037019395e-05, + "loss": 0.3578, + "step": 3885500 + }, + { + "epoch": 2.33, + "learning_rate": 3.972735007145883e-05, + "loss": 0.3552, + "step": 3886000 + }, + { + "epoch": 2.33, + "learning_rate": 3.972525010589826e-05, + "loss": 0.3528, + "step": 3886500 + }, + { + "epoch": 2.33, + "learning_rate": 3.97231501403377e-05, + "loss": 0.3538, + "step": 3887000 + }, + { + "epoch": 2.33, + "learning_rate": 3.9721050174777136e-05, + "loss": 0.3501, + "step": 3887500 + }, + { + "epoch": 2.33, + "learning_rate": 3.971895440914769e-05, + "loss": 0.3556, + "step": 3888000 + }, + { + "epoch": 2.33, + "learning_rate": 3.971685444358713e-05, + "loss": 0.3459, + "step": 3888500 + }, + { + "epoch": 2.33, + "learning_rate": 3.971475447802656e-05, + "loss": 0.357, + "step": 3889000 + }, + { + "epoch": 2.33, + "learning_rate": 3.9712654512465996e-05, + "loss": 0.3497, + "step": 3889500 + }, + { + "epoch": 2.33, + "learning_rate": 3.971055454690543e-05, + "loss": 0.3621, + "step": 3890000 + }, + { + "epoch": 2.33, + "learning_rate": 3.970845878127599e-05, + "loss": 0.359, + "step": 3890500 + }, + { + "epoch": 2.33, + "learning_rate": 3.9706358815715424e-05, + "loss": 0.3517, + "step": 3891000 + }, + { + "epoch": 2.33, + "learning_rate": 3.970425885015486e-05, + "loss": 0.3464, + "step": 3891500 + }, + { + "epoch": 2.33, + "learning_rate": 3.97021588845943e-05, + "loss": 0.3558, + "step": 3892000 + }, + { + "epoch": 2.33, + "learning_rate": 3.9700058919033724e-05, + "loss": 0.3536, + "step": 3892500 + }, + { + "epoch": 2.33, + "learning_rate": 3.969795895347316e-05, + "loss": 0.3472, + "step": 3893000 + }, + { + "epoch": 2.33, + "learning_rate": 3.96958589879126e-05, + "loss": 0.3456, + "step": 3893500 + }, + { + "epoch": 2.33, + "learning_rate": 3.969375902235203e-05, + "loss": 0.3413, + "step": 3894000 + }, + { + "epoch": 2.33, + "learning_rate": 3.969166745665371e-05, + "loss": 0.3457, + "step": 3894500 + }, + { + "epoch": 2.34, + "learning_rate": 3.9689567491093145e-05, + "loss": 0.3418, + "step": 3895000 + }, + { + "epoch": 2.34, + "learning_rate": 3.9687467525532585e-05, + "loss": 0.3556, + "step": 3895500 + }, + { + "epoch": 2.34, + "learning_rate": 3.968536755997202e-05, + "loss": 0.3569, + "step": 3896000 + }, + { + "epoch": 2.34, + "learning_rate": 3.968326759441145e-05, + "loss": 0.3508, + "step": 3896500 + }, + { + "epoch": 2.34, + "learning_rate": 3.968116762885089e-05, + "loss": 0.3414, + "step": 3897000 + }, + { + "epoch": 2.34, + "learning_rate": 3.967906766329032e-05, + "loss": 0.3505, + "step": 3897500 + }, + { + "epoch": 2.34, + "learning_rate": 3.967696769772975e-05, + "loss": 0.3514, + "step": 3898000 + }, + { + "epoch": 2.34, + "learning_rate": 3.967487613203143e-05, + "loss": 0.3548, + "step": 3898500 + }, + { + "epoch": 2.34, + "learning_rate": 3.9672776166470866e-05, + "loss": 0.3397, + "step": 3899000 + }, + { + "epoch": 2.34, + "learning_rate": 3.967067620091031e-05, + "loss": 0.3488, + "step": 3899500 + }, + { + "epoch": 2.34, + "learning_rate": 3.966857623534974e-05, + "loss": 0.3551, + "step": 3900000 + }, + { + "epoch": 2.34, + "eval_loss": 0.34067773818969727, + "eval_runtime": 1122.4626, + "eval_samples_per_second": 469.254, + "eval_steps_per_second": 78.209, + "step": 3900000 + }, + { + "epoch": 2.34, + "learning_rate": 3.9666476269789174e-05, + "loss": 0.3501, + "step": 3900500 + }, + { + "epoch": 2.34, + "learning_rate": 3.9664376304228614e-05, + "loss": 0.347, + "step": 3901000 + }, + { + "epoch": 2.34, + "learning_rate": 3.966227633866805e-05, + "loss": 0.348, + "step": 3901500 + }, + { + "epoch": 2.34, + "learning_rate": 3.966017637310748e-05, + "loss": 0.3535, + "step": 3902000 + }, + { + "epoch": 2.34, + "learning_rate": 3.965808060747804e-05, + "loss": 0.3533, + "step": 3902500 + }, + { + "epoch": 2.34, + "learning_rate": 3.9655980641917475e-05, + "loss": 0.3552, + "step": 3903000 + }, + { + "epoch": 2.34, + "learning_rate": 3.965388487628803e-05, + "loss": 0.3576, + "step": 3903500 + }, + { + "epoch": 2.34, + "learning_rate": 3.965178491072746e-05, + "loss": 0.3488, + "step": 3904000 + }, + { + "epoch": 2.34, + "learning_rate": 3.96496849451669e-05, + "loss": 0.3517, + "step": 3904500 + }, + { + "epoch": 2.34, + "learning_rate": 3.9647584979606335e-05, + "loss": 0.3463, + "step": 3905000 + }, + { + "epoch": 2.34, + "learning_rate": 3.964548501404577e-05, + "loss": 0.3451, + "step": 3905500 + }, + { + "epoch": 2.34, + "learning_rate": 3.964338924841632e-05, + "loss": 0.35, + "step": 3906000 + }, + { + "epoch": 2.34, + "learning_rate": 3.964129348278688e-05, + "loss": 0.3516, + "step": 3906500 + }, + { + "epoch": 2.34, + "learning_rate": 3.9639193517226316e-05, + "loss": 0.3464, + "step": 3907000 + }, + { + "epoch": 2.34, + "learning_rate": 3.963709355166575e-05, + "loss": 0.3483, + "step": 3907500 + }, + { + "epoch": 2.34, + "learning_rate": 3.963499358610519e-05, + "loss": 0.3477, + "step": 3908000 + }, + { + "epoch": 2.34, + "learning_rate": 3.963289362054462e-05, + "loss": 0.3448, + "step": 3908500 + }, + { + "epoch": 2.34, + "learning_rate": 3.963079365498406e-05, + "loss": 0.3605, + "step": 3909000 + }, + { + "epoch": 2.34, + "learning_rate": 3.96286936894235e-05, + "loss": 0.3459, + "step": 3909500 + }, + { + "epoch": 2.34, + "learning_rate": 3.962659372386293e-05, + "loss": 0.3468, + "step": 3910000 + }, + { + "epoch": 2.34, + "learning_rate": 3.9624493758302364e-05, + "loss": 0.3456, + "step": 3910500 + }, + { + "epoch": 2.34, + "learning_rate": 3.9622393792741804e-05, + "loss": 0.3456, + "step": 3911000 + }, + { + "epoch": 2.35, + "learning_rate": 3.962029802711236e-05, + "loss": 0.3562, + "step": 3911500 + }, + { + "epoch": 2.35, + "learning_rate": 3.961819806155179e-05, + "loss": 0.3563, + "step": 3912000 + }, + { + "epoch": 2.35, + "learning_rate": 3.9616098095991225e-05, + "loss": 0.3485, + "step": 3912500 + }, + { + "epoch": 2.35, + "learning_rate": 3.9613998130430665e-05, + "loss": 0.3501, + "step": 3913000 + }, + { + "epoch": 2.35, + "learning_rate": 3.96118981648701e-05, + "loss": 0.3481, + "step": 3913500 + }, + { + "epoch": 2.35, + "learning_rate": 3.9609798199309525e-05, + "loss": 0.3474, + "step": 3914000 + }, + { + "epoch": 2.35, + "learning_rate": 3.9607702433680085e-05, + "loss": 0.3481, + "step": 3914500 + }, + { + "epoch": 2.35, + "learning_rate": 3.9605602468119526e-05, + "loss": 0.3499, + "step": 3915000 + }, + { + "epoch": 2.35, + "learning_rate": 3.960350250255896e-05, + "loss": 0.3517, + "step": 3915500 + }, + { + "epoch": 2.35, + "learning_rate": 3.960140673692951e-05, + "loss": 0.3451, + "step": 3916000 + }, + { + "epoch": 2.35, + "learning_rate": 3.959930677136895e-05, + "loss": 0.3541, + "step": 3916500 + }, + { + "epoch": 2.35, + "learning_rate": 3.9597206805808386e-05, + "loss": 0.353, + "step": 3917000 + }, + { + "epoch": 2.35, + "learning_rate": 3.959510684024782e-05, + "loss": 0.3535, + "step": 3917500 + }, + { + "epoch": 2.35, + "learning_rate": 3.959300687468726e-05, + "loss": 0.3518, + "step": 3918000 + }, + { + "epoch": 2.35, + "learning_rate": 3.9590906909126694e-05, + "loss": 0.3546, + "step": 3918500 + }, + { + "epoch": 2.35, + "learning_rate": 3.958880694356612e-05, + "loss": 0.3445, + "step": 3919000 + }, + { + "epoch": 2.35, + "learning_rate": 3.958670697800556e-05, + "loss": 0.3539, + "step": 3919500 + }, + { + "epoch": 2.35, + "learning_rate": 3.9584607012444994e-05, + "loss": 0.3396, + "step": 3920000 + }, + { + "epoch": 2.35, + "learning_rate": 3.958250704688443e-05, + "loss": 0.3458, + "step": 3920500 + }, + { + "epoch": 2.35, + "learning_rate": 3.958040708132387e-05, + "loss": 0.3515, + "step": 3921000 + }, + { + "epoch": 2.35, + "learning_rate": 3.957831131569442e-05, + "loss": 0.3507, + "step": 3921500 + }, + { + "epoch": 2.35, + "learning_rate": 3.9576211350133855e-05, + "loss": 0.354, + "step": 3922000 + }, + { + "epoch": 2.35, + "learning_rate": 3.957411138457329e-05, + "loss": 0.3522, + "step": 3922500 + }, + { + "epoch": 2.35, + "learning_rate": 3.957201141901273e-05, + "loss": 0.3609, + "step": 3923000 + }, + { + "epoch": 2.35, + "learning_rate": 3.956991145345216e-05, + "loss": 0.3476, + "step": 3923500 + }, + { + "epoch": 2.35, + "learning_rate": 3.9567811487891595e-05, + "loss": 0.3571, + "step": 3924000 + }, + { + "epoch": 2.35, + "learning_rate": 3.9565711522331035e-05, + "loss": 0.3518, + "step": 3924500 + }, + { + "epoch": 2.35, + "learning_rate": 3.956361155677047e-05, + "loss": 0.3518, + "step": 3925000 + }, + { + "epoch": 2.35, + "learning_rate": 3.956151579114102e-05, + "loss": 0.3496, + "step": 3925500 + }, + { + "epoch": 2.35, + "learning_rate": 3.955941582558046e-05, + "loss": 0.3601, + "step": 3926000 + }, + { + "epoch": 2.35, + "learning_rate": 3.9557315860019896e-05, + "loss": 0.3504, + "step": 3926500 + }, + { + "epoch": 2.35, + "learning_rate": 3.955521589445933e-05, + "loss": 0.3587, + "step": 3927000 + }, + { + "epoch": 2.35, + "learning_rate": 3.955311592889877e-05, + "loss": 0.3585, + "step": 3927500 + }, + { + "epoch": 2.35, + "learning_rate": 3.95510159633382e-05, + "loss": 0.3505, + "step": 3928000 + }, + { + "epoch": 2.36, + "learning_rate": 3.954891599777764e-05, + "loss": 0.3371, + "step": 3928500 + }, + { + "epoch": 2.36, + "learning_rate": 3.954681603221707e-05, + "loss": 0.3514, + "step": 3929000 + }, + { + "epoch": 2.36, + "learning_rate": 3.954472026658763e-05, + "loss": 0.3515, + "step": 3929500 + }, + { + "epoch": 2.36, + "learning_rate": 3.9542620301027064e-05, + "loss": 0.345, + "step": 3930000 + }, + { + "epoch": 2.36, + "learning_rate": 3.954052453539762e-05, + "loss": 0.3461, + "step": 3930500 + }, + { + "epoch": 2.36, + "learning_rate": 3.953842456983705e-05, + "loss": 0.3539, + "step": 3931000 + }, + { + "epoch": 2.36, + "learning_rate": 3.953632880420761e-05, + "loss": 0.3468, + "step": 3931500 + }, + { + "epoch": 2.36, + "learning_rate": 3.9534228838647045e-05, + "loss": 0.3464, + "step": 3932000 + }, + { + "epoch": 2.36, + "learning_rate": 3.953212887308648e-05, + "loss": 0.3628, + "step": 3932500 + }, + { + "epoch": 2.36, + "learning_rate": 3.953002890752592e-05, + "loss": 0.3537, + "step": 3933000 + }, + { + "epoch": 2.36, + "learning_rate": 3.952792894196535e-05, + "loss": 0.3411, + "step": 3933500 + }, + { + "epoch": 2.36, + "learning_rate": 3.9525828976404786e-05, + "loss": 0.3518, + "step": 3934000 + }, + { + "epoch": 2.36, + "learning_rate": 3.9523729010844226e-05, + "loss": 0.3458, + "step": 3934500 + }, + { + "epoch": 2.36, + "learning_rate": 3.952162904528366e-05, + "loss": 0.3437, + "step": 3935000 + }, + { + "epoch": 2.36, + "learning_rate": 3.951952907972309e-05, + "loss": 0.3541, + "step": 3935500 + }, + { + "epoch": 2.36, + "learning_rate": 3.9517429114162526e-05, + "loss": 0.3611, + "step": 3936000 + }, + { + "epoch": 2.36, + "learning_rate": 3.9515333348533086e-05, + "loss": 0.3498, + "step": 3936500 + }, + { + "epoch": 2.36, + "learning_rate": 3.951323338297252e-05, + "loss": 0.3528, + "step": 3937000 + }, + { + "epoch": 2.36, + "learning_rate": 3.9511133417411953e-05, + "loss": 0.3601, + "step": 3937500 + }, + { + "epoch": 2.36, + "learning_rate": 3.9509033451851394e-05, + "loss": 0.3596, + "step": 3938000 + }, + { + "epoch": 2.36, + "learning_rate": 3.950693348629082e-05, + "loss": 0.3563, + "step": 3938500 + }, + { + "epoch": 2.36, + "learning_rate": 3.9504833520730254e-05, + "loss": 0.3442, + "step": 3939000 + }, + { + "epoch": 2.36, + "learning_rate": 3.9502737755100814e-05, + "loss": 0.3475, + "step": 3939500 + }, + { + "epoch": 2.36, + "learning_rate": 3.9500637789540254e-05, + "loss": 0.3545, + "step": 3940000 + }, + { + "epoch": 2.36, + "learning_rate": 3.949853782397969e-05, + "loss": 0.3545, + "step": 3940500 + }, + { + "epoch": 2.36, + "learning_rate": 3.949643785841912e-05, + "loss": 0.356, + "step": 3941000 + }, + { + "epoch": 2.36, + "learning_rate": 3.9494337892858555e-05, + "loss": 0.3483, + "step": 3941500 + }, + { + "epoch": 2.36, + "learning_rate": 3.949223792729799e-05, + "loss": 0.3519, + "step": 3942000 + }, + { + "epoch": 2.36, + "learning_rate": 3.949013796173743e-05, + "loss": 0.3496, + "step": 3942500 + }, + { + "epoch": 2.36, + "learning_rate": 3.948803799617686e-05, + "loss": 0.3531, + "step": 3943000 + }, + { + "epoch": 2.36, + "learning_rate": 3.9485942230547415e-05, + "loss": 0.3455, + "step": 3943500 + }, + { + "epoch": 2.36, + "learning_rate": 3.948384226498685e-05, + "loss": 0.3575, + "step": 3944000 + }, + { + "epoch": 2.36, + "learning_rate": 3.948174229942629e-05, + "loss": 0.3419, + "step": 3944500 + }, + { + "epoch": 2.37, + "learning_rate": 3.947964233386572e-05, + "loss": 0.3461, + "step": 3945000 + }, + { + "epoch": 2.37, + "learning_rate": 3.94775507681674e-05, + "loss": 0.3508, + "step": 3945500 + }, + { + "epoch": 2.37, + "learning_rate": 3.9475450802606837e-05, + "loss": 0.3429, + "step": 3946000 + }, + { + "epoch": 2.37, + "learning_rate": 3.947335083704628e-05, + "loss": 0.3357, + "step": 3946500 + }, + { + "epoch": 2.37, + "learning_rate": 3.947125087148571e-05, + "loss": 0.3553, + "step": 3947000 + }, + { + "epoch": 2.37, + "learning_rate": 3.9469150905925144e-05, + "loss": 0.3509, + "step": 3947500 + }, + { + "epoch": 2.37, + "learning_rate": 3.946705094036458e-05, + "loss": 0.3658, + "step": 3948000 + }, + { + "epoch": 2.37, + "learning_rate": 3.946495097480401e-05, + "loss": 0.3393, + "step": 3948500 + }, + { + "epoch": 2.37, + "learning_rate": 3.946285520917457e-05, + "loss": 0.3584, + "step": 3949000 + }, + { + "epoch": 2.37, + "learning_rate": 3.9460755243614004e-05, + "loss": 0.3447, + "step": 3949500 + }, + { + "epoch": 2.37, + "learning_rate": 3.9458655278053445e-05, + "loss": 0.3487, + "step": 3950000 + }, + { + "epoch": 2.37, + "learning_rate": 3.945655531249287e-05, + "loss": 0.3528, + "step": 3950500 + }, + { + "epoch": 2.37, + "learning_rate": 3.9454455346932305e-05, + "loss": 0.3457, + "step": 3951000 + }, + { + "epoch": 2.37, + "learning_rate": 3.9452355381371745e-05, + "loss": 0.3559, + "step": 3951500 + }, + { + "epoch": 2.37, + "learning_rate": 3.945025541581118e-05, + "loss": 0.3548, + "step": 3952000 + }, + { + "epoch": 2.37, + "learning_rate": 3.944815545025061e-05, + "loss": 0.3541, + "step": 3952500 + }, + { + "epoch": 2.37, + "learning_rate": 3.944605968462117e-05, + "loss": 0.3411, + "step": 3953000 + }, + { + "epoch": 2.37, + "learning_rate": 3.9443959719060606e-05, + "loss": 0.3493, + "step": 3953500 + }, + { + "epoch": 2.37, + "learning_rate": 3.944185975350004e-05, + "loss": 0.3549, + "step": 3954000 + }, + { + "epoch": 2.37, + "learning_rate": 3.943975978793948e-05, + "loss": 0.3547, + "step": 3954500 + }, + { + "epoch": 2.37, + "learning_rate": 3.943766402231004e-05, + "loss": 0.3514, + "step": 3955000 + }, + { + "epoch": 2.37, + "learning_rate": 3.9435564056749466e-05, + "loss": 0.3535, + "step": 3955500 + }, + { + "epoch": 2.37, + "learning_rate": 3.94334640911889e-05, + "loss": 0.3424, + "step": 3956000 + }, + { + "epoch": 2.37, + "learning_rate": 3.943136832555946e-05, + "loss": 0.3518, + "step": 3956500 + }, + { + "epoch": 2.37, + "learning_rate": 3.94292683599989e-05, + "loss": 0.343, + "step": 3957000 + }, + { + "epoch": 2.37, + "learning_rate": 3.942716839443833e-05, + "loss": 0.3416, + "step": 3957500 + }, + { + "epoch": 2.37, + "learning_rate": 3.942506842887776e-05, + "loss": 0.3451, + "step": 3958000 + }, + { + "epoch": 2.37, + "learning_rate": 3.94229684633172e-05, + "loss": 0.3552, + "step": 3958500 + }, + { + "epoch": 2.37, + "learning_rate": 3.942087269768776e-05, + "loss": 0.3449, + "step": 3959000 + }, + { + "epoch": 2.37, + "learning_rate": 3.9418772732127195e-05, + "loss": 0.3464, + "step": 3959500 + }, + { + "epoch": 2.37, + "learning_rate": 3.941667276656663e-05, + "loss": 0.3471, + "step": 3960000 + }, + { + "epoch": 2.37, + "learning_rate": 3.941457280100606e-05, + "loss": 0.346, + "step": 3960500 + }, + { + "epoch": 2.37, + "learning_rate": 3.9412472835445495e-05, + "loss": 0.3494, + "step": 3961000 + }, + { + "epoch": 2.38, + "learning_rate": 3.9410372869884935e-05, + "loss": 0.3513, + "step": 3961500 + }, + { + "epoch": 2.38, + "learning_rate": 3.940827290432437e-05, + "loss": 0.3465, + "step": 3962000 + }, + { + "epoch": 2.38, + "learning_rate": 3.94061729387638e-05, + "loss": 0.3447, + "step": 3962500 + }, + { + "epoch": 2.38, + "learning_rate": 3.9404077173134356e-05, + "loss": 0.3589, + "step": 3963000 + }, + { + "epoch": 2.38, + "learning_rate": 3.9401977207573796e-05, + "loss": 0.3459, + "step": 3963500 + }, + { + "epoch": 2.38, + "learning_rate": 3.939987724201323e-05, + "loss": 0.3516, + "step": 3964000 + }, + { + "epoch": 2.38, + "learning_rate": 3.939778147638379e-05, + "loss": 0.3438, + "step": 3964500 + }, + { + "epoch": 2.38, + "learning_rate": 3.9395681510823217e-05, + "loss": 0.3451, + "step": 3965000 + }, + { + "epoch": 2.38, + "learning_rate": 3.939358154526266e-05, + "loss": 0.3543, + "step": 3965500 + }, + { + "epoch": 2.38, + "learning_rate": 3.939148157970209e-05, + "loss": 0.3501, + "step": 3966000 + }, + { + "epoch": 2.38, + "learning_rate": 3.9389381614141524e-05, + "loss": 0.3484, + "step": 3966500 + }, + { + "epoch": 2.38, + "learning_rate": 3.9387281648580964e-05, + "loss": 0.3426, + "step": 3967000 + }, + { + "epoch": 2.38, + "learning_rate": 3.93851816830204e-05, + "loss": 0.3423, + "step": 3967500 + }, + { + "epoch": 2.38, + "learning_rate": 3.938308171745983e-05, + "loss": 0.3504, + "step": 3968000 + }, + { + "epoch": 2.38, + "learning_rate": 3.938098595183039e-05, + "loss": 0.3528, + "step": 3968500 + }, + { + "epoch": 2.38, + "learning_rate": 3.9378885986269825e-05, + "loss": 0.354, + "step": 3969000 + }, + { + "epoch": 2.38, + "learning_rate": 3.937678602070926e-05, + "loss": 0.3466, + "step": 3969500 + }, + { + "epoch": 2.38, + "learning_rate": 3.93746860551487e-05, + "loss": 0.3504, + "step": 3970000 + }, + { + "epoch": 2.38, + "learning_rate": 3.937259028951925e-05, + "loss": 0.3522, + "step": 3970500 + }, + { + "epoch": 2.38, + "learning_rate": 3.9370490323958685e-05, + "loss": 0.344, + "step": 3971000 + }, + { + "epoch": 2.38, + "learning_rate": 3.9368394558329246e-05, + "loss": 0.3465, + "step": 3971500 + }, + { + "epoch": 2.38, + "learning_rate": 3.936629459276867e-05, + "loss": 0.3542, + "step": 3972000 + }, + { + "epoch": 2.38, + "learning_rate": 3.936419462720811e-05, + "loss": 0.3457, + "step": 3972500 + }, + { + "epoch": 2.38, + "learning_rate": 3.9362094661647546e-05, + "loss": 0.3512, + "step": 3973000 + }, + { + "epoch": 2.38, + "learning_rate": 3.935999469608698e-05, + "loss": 0.3541, + "step": 3973500 + }, + { + "epoch": 2.38, + "learning_rate": 3.935789473052642e-05, + "loss": 0.3526, + "step": 3974000 + }, + { + "epoch": 2.38, + "learning_rate": 3.935579476496585e-05, + "loss": 0.3482, + "step": 3974500 + }, + { + "epoch": 2.38, + "learning_rate": 3.935369479940529e-05, + "loss": 0.3557, + "step": 3975000 + }, + { + "epoch": 2.38, + "learning_rate": 3.935159903377585e-05, + "loss": 0.3473, + "step": 3975500 + }, + { + "epoch": 2.38, + "learning_rate": 3.934950326814641e-05, + "loss": 0.3494, + "step": 3976000 + }, + { + "epoch": 2.38, + "learning_rate": 3.9347403302585834e-05, + "loss": 0.3408, + "step": 3976500 + }, + { + "epoch": 2.38, + "learning_rate": 3.934530333702527e-05, + "loss": 0.3608, + "step": 3977000 + }, + { + "epoch": 2.38, + "learning_rate": 3.934320337146471e-05, + "loss": 0.3558, + "step": 3977500 + }, + { + "epoch": 2.38, + "learning_rate": 3.934110340590414e-05, + "loss": 0.3496, + "step": 3978000 + }, + { + "epoch": 2.39, + "learning_rate": 3.9339003440343575e-05, + "loss": 0.3489, + "step": 3978500 + }, + { + "epoch": 2.39, + "learning_rate": 3.933690767471413e-05, + "loss": 0.3557, + "step": 3979000 + }, + { + "epoch": 2.39, + "learning_rate": 3.933480770915357e-05, + "loss": 0.3339, + "step": 3979500 + }, + { + "epoch": 2.39, + "learning_rate": 3.9332707743593e-05, + "loss": 0.3451, + "step": 3980000 + }, + { + "epoch": 2.39, + "learning_rate": 3.9330607778032435e-05, + "loss": 0.3417, + "step": 3980500 + }, + { + "epoch": 2.39, + "learning_rate": 3.9328507812471876e-05, + "loss": 0.3504, + "step": 3981000 + }, + { + "epoch": 2.39, + "learning_rate": 3.932640784691131e-05, + "loss": 0.3472, + "step": 3981500 + }, + { + "epoch": 2.39, + "learning_rate": 3.932430788135074e-05, + "loss": 0.3539, + "step": 3982000 + }, + { + "epoch": 2.39, + "learning_rate": 3.932220791579018e-05, + "loss": 0.356, + "step": 3982500 + }, + { + "epoch": 2.39, + "learning_rate": 3.932011635009186e-05, + "loss": 0.3619, + "step": 3983000 + }, + { + "epoch": 2.39, + "learning_rate": 3.93180163845313e-05, + "loss": 0.3545, + "step": 3983500 + }, + { + "epoch": 2.39, + "learning_rate": 3.9315916418970723e-05, + "loss": 0.355, + "step": 3984000 + }, + { + "epoch": 2.39, + "learning_rate": 3.9313816453410164e-05, + "loss": 0.341, + "step": 3984500 + }, + { + "epoch": 2.39, + "learning_rate": 3.93117164878496e-05, + "loss": 0.3472, + "step": 3985000 + }, + { + "epoch": 2.39, + "learning_rate": 3.930961652228903e-05, + "loss": 0.3482, + "step": 3985500 + }, + { + "epoch": 2.39, + "learning_rate": 3.930751655672847e-05, + "loss": 0.3449, + "step": 3986000 + }, + { + "epoch": 2.39, + "learning_rate": 3.9305416591167904e-05, + "loss": 0.3524, + "step": 3986500 + }, + { + "epoch": 2.39, + "learning_rate": 3.930331662560734e-05, + "loss": 0.3468, + "step": 3987000 + }, + { + "epoch": 2.39, + "learning_rate": 3.930121666004678e-05, + "loss": 0.3407, + "step": 3987500 + }, + { + "epoch": 2.39, + "learning_rate": 3.929911669448621e-05, + "loss": 0.3521, + "step": 3988000 + }, + { + "epoch": 2.39, + "learning_rate": 3.9297016728925645e-05, + "loss": 0.349, + "step": 3988500 + }, + { + "epoch": 2.39, + "learning_rate": 3.92949209632962e-05, + "loss": 0.3544, + "step": 3989000 + }, + { + "epoch": 2.39, + "learning_rate": 3.929282099773564e-05, + "loss": 0.3413, + "step": 3989500 + }, + { + "epoch": 2.39, + "learning_rate": 3.929072103217507e-05, + "loss": 0.3454, + "step": 3990000 + }, + { + "epoch": 2.39, + "learning_rate": 3.9288621066614506e-05, + "loss": 0.3483, + "step": 3990500 + }, + { + "epoch": 2.39, + "learning_rate": 3.9286521101053946e-05, + "loss": 0.3458, + "step": 3991000 + }, + { + "epoch": 2.39, + "learning_rate": 3.928442113549338e-05, + "loss": 0.3537, + "step": 3991500 + }, + { + "epoch": 2.39, + "learning_rate": 3.928232116993281e-05, + "loss": 0.3408, + "step": 3992000 + }, + { + "epoch": 2.39, + "learning_rate": 3.9280221204372246e-05, + "loss": 0.3583, + "step": 3992500 + }, + { + "epoch": 2.39, + "learning_rate": 3.927812543874281e-05, + "loss": 0.3463, + "step": 3993000 + }, + { + "epoch": 2.39, + "learning_rate": 3.927602547318224e-05, + "loss": 0.3457, + "step": 3993500 + }, + { + "epoch": 2.39, + "learning_rate": 3.9273925507621674e-05, + "loss": 0.3519, + "step": 3994000 + }, + { + "epoch": 2.39, + "learning_rate": 3.927182554206111e-05, + "loss": 0.3517, + "step": 3994500 + }, + { + "epoch": 2.4, + "learning_rate": 3.926972557650054e-05, + "loss": 0.3444, + "step": 3995000 + }, + { + "epoch": 2.4, + "learning_rate": 3.926762561093998e-05, + "loss": 0.3458, + "step": 3995500 + }, + { + "epoch": 2.4, + "learning_rate": 3.9265525645379414e-05, + "loss": 0.3466, + "step": 3996000 + }, + { + "epoch": 2.4, + "learning_rate": 3.926342987974997e-05, + "loss": 0.3476, + "step": 3996500 + }, + { + "epoch": 2.4, + "learning_rate": 3.92613299141894e-05, + "loss": 0.3477, + "step": 3997000 + }, + { + "epoch": 2.4, + "learning_rate": 3.925922994862884e-05, + "loss": 0.339, + "step": 3997500 + }, + { + "epoch": 2.4, + "learning_rate": 3.9257129983068275e-05, + "loss": 0.3598, + "step": 3998000 + }, + { + "epoch": 2.4, + "learning_rate": 3.925503001750771e-05, + "loss": 0.3497, + "step": 3998500 + }, + { + "epoch": 2.4, + "learning_rate": 3.925293005194715e-05, + "loss": 0.3415, + "step": 3999000 + }, + { + "epoch": 2.4, + "learning_rate": 3.925083008638658e-05, + "loss": 0.3483, + "step": 3999500 + }, + { + "epoch": 2.4, + "learning_rate": 3.924873012082602e-05, + "loss": 0.3535, + "step": 4000000 + }, + { + "epoch": 2.4, + "eval_loss": 0.3390848636627197, + "eval_runtime": 1120.7204, + "eval_samples_per_second": 469.983, + "eval_steps_per_second": 78.331, + "step": 4000000 + }, + { + "epoch": 2.4, + "learning_rate": 3.9246634355196576e-05, + "loss": 0.3493, + "step": 4000500 + }, + { + "epoch": 2.4, + "learning_rate": 3.924453438963601e-05, + "loss": 0.3542, + "step": 4001000 + }, + { + "epoch": 2.4, + "learning_rate": 3.924243442407544e-05, + "loss": 0.3466, + "step": 4001500 + }, + { + "epoch": 2.4, + "learning_rate": 3.9240338658445996e-05, + "loss": 0.3511, + "step": 4002000 + }, + { + "epoch": 2.4, + "learning_rate": 3.9238238692885437e-05, + "loss": 0.3562, + "step": 4002500 + }, + { + "epoch": 2.4, + "learning_rate": 3.923613872732487e-05, + "loss": 0.347, + "step": 4003000 + }, + { + "epoch": 2.4, + "learning_rate": 3.9234038761764303e-05, + "loss": 0.3421, + "step": 4003500 + }, + { + "epoch": 2.4, + "learning_rate": 3.9231938796203744e-05, + "loss": 0.3518, + "step": 4004000 + }, + { + "epoch": 2.4, + "learning_rate": 3.922983883064318e-05, + "loss": 0.3472, + "step": 4004500 + }, + { + "epoch": 2.4, + "learning_rate": 3.922773886508261e-05, + "loss": 0.3464, + "step": 4005000 + }, + { + "epoch": 2.4, + "learning_rate": 3.9225643099453164e-05, + "loss": 0.3463, + "step": 4005500 + }, + { + "epoch": 2.4, + "learning_rate": 3.9223543133892604e-05, + "loss": 0.3412, + "step": 4006000 + }, + { + "epoch": 2.4, + "learning_rate": 3.922144316833204e-05, + "loss": 0.3514, + "step": 4006500 + }, + { + "epoch": 2.4, + "learning_rate": 3.921934320277148e-05, + "loss": 0.3544, + "step": 4007000 + }, + { + "epoch": 2.4, + "learning_rate": 3.921724323721091e-05, + "loss": 0.3506, + "step": 4007500 + }, + { + "epoch": 2.4, + "learning_rate": 3.9215143271650345e-05, + "loss": 0.3442, + "step": 4008000 + }, + { + "epoch": 2.4, + "learning_rate": 3.9213043306089785e-05, + "loss": 0.3438, + "step": 4008500 + }, + { + "epoch": 2.4, + "learning_rate": 3.921094334052921e-05, + "loss": 0.3456, + "step": 4009000 + }, + { + "epoch": 2.4, + "learning_rate": 3.920884757489977e-05, + "loss": 0.3482, + "step": 4009500 + }, + { + "epoch": 2.4, + "learning_rate": 3.9206747609339206e-05, + "loss": 0.3536, + "step": 4010000 + }, + { + "epoch": 2.4, + "learning_rate": 3.9204647643778646e-05, + "loss": 0.3668, + "step": 4010500 + }, + { + "epoch": 2.4, + "learning_rate": 3.920254767821808e-05, + "loss": 0.3442, + "step": 4011000 + }, + { + "epoch": 2.41, + "learning_rate": 3.920045191258863e-05, + "loss": 0.3416, + "step": 4011500 + }, + { + "epoch": 2.41, + "learning_rate": 3.919835614695919e-05, + "loss": 0.3488, + "step": 4012000 + }, + { + "epoch": 2.41, + "learning_rate": 3.919625618139863e-05, + "loss": 0.3482, + "step": 4012500 + }, + { + "epoch": 2.41, + "learning_rate": 3.919415621583806e-05, + "loss": 0.3417, + "step": 4013000 + }, + { + "epoch": 2.41, + "learning_rate": 3.9192056250277494e-05, + "loss": 0.3474, + "step": 4013500 + }, + { + "epoch": 2.41, + "learning_rate": 3.918996048464805e-05, + "loss": 0.3511, + "step": 4014000 + }, + { + "epoch": 2.41, + "learning_rate": 3.918786051908749e-05, + "loss": 0.3606, + "step": 4014500 + }, + { + "epoch": 2.41, + "learning_rate": 3.918576055352692e-05, + "loss": 0.3507, + "step": 4015000 + }, + { + "epoch": 2.41, + "learning_rate": 3.9183660587966355e-05, + "loss": 0.3467, + "step": 4015500 + }, + { + "epoch": 2.41, + "learning_rate": 3.9181560622405795e-05, + "loss": 0.3453, + "step": 4016000 + }, + { + "epoch": 2.41, + "learning_rate": 3.917946065684523e-05, + "loss": 0.3356, + "step": 4016500 + }, + { + "epoch": 2.41, + "learning_rate": 3.917736489121578e-05, + "loss": 0.3546, + "step": 4017000 + }, + { + "epoch": 2.41, + "learning_rate": 3.9175264925655215e-05, + "loss": 0.3481, + "step": 4017500 + }, + { + "epoch": 2.41, + "learning_rate": 3.917316916002577e-05, + "loss": 0.3452, + "step": 4018000 + }, + { + "epoch": 2.41, + "learning_rate": 3.917106919446521e-05, + "loss": 0.35, + "step": 4018500 + }, + { + "epoch": 2.41, + "learning_rate": 3.916896922890464e-05, + "loss": 0.3542, + "step": 4019000 + }, + { + "epoch": 2.41, + "learning_rate": 3.916686926334408e-05, + "loss": 0.3514, + "step": 4019500 + }, + { + "epoch": 2.41, + "learning_rate": 3.9164769297783516e-05, + "loss": 0.3447, + "step": 4020000 + }, + { + "epoch": 2.41, + "learning_rate": 3.916266933222295e-05, + "loss": 0.3438, + "step": 4020500 + }, + { + "epoch": 2.41, + "learning_rate": 3.916056936666239e-05, + "loss": 0.3535, + "step": 4021000 + }, + { + "epoch": 2.41, + "learning_rate": 3.915846940110182e-05, + "loss": 0.3414, + "step": 4021500 + }, + { + "epoch": 2.41, + "learning_rate": 3.915637363547238e-05, + "loss": 0.3505, + "step": 4022000 + }, + { + "epoch": 2.41, + "learning_rate": 3.915427366991181e-05, + "loss": 0.3454, + "step": 4022500 + }, + { + "epoch": 2.41, + "learning_rate": 3.9152177904282364e-05, + "loss": 0.3457, + "step": 4023000 + }, + { + "epoch": 2.41, + "learning_rate": 3.9150077938721804e-05, + "loss": 0.3439, + "step": 4023500 + }, + { + "epoch": 2.41, + "learning_rate": 3.914797797316124e-05, + "loss": 0.3537, + "step": 4024000 + }, + { + "epoch": 2.41, + "learning_rate": 3.914587800760067e-05, + "loss": 0.3487, + "step": 4024500 + }, + { + "epoch": 2.41, + "learning_rate": 3.914377804204011e-05, + "loss": 0.3543, + "step": 4025000 + }, + { + "epoch": 2.41, + "learning_rate": 3.9141678076479545e-05, + "loss": 0.358, + "step": 4025500 + }, + { + "epoch": 2.41, + "learning_rate": 3.913957811091898e-05, + "loss": 0.3399, + "step": 4026000 + }, + { + "epoch": 2.41, + "learning_rate": 3.913747814535842e-05, + "loss": 0.3561, + "step": 4026500 + }, + { + "epoch": 2.41, + "learning_rate": 3.913537817979785e-05, + "loss": 0.3454, + "step": 4027000 + }, + { + "epoch": 2.41, + "learning_rate": 3.9133278214237285e-05, + "loss": 0.3495, + "step": 4027500 + }, + { + "epoch": 2.41, + "learning_rate": 3.9131182448607846e-05, + "loss": 0.3555, + "step": 4028000 + }, + { + "epoch": 2.42, + "learning_rate": 3.912908248304728e-05, + "loss": 0.3516, + "step": 4028500 + }, + { + "epoch": 2.42, + "learning_rate": 3.912698251748671e-05, + "loss": 0.3534, + "step": 4029000 + }, + { + "epoch": 2.42, + "learning_rate": 3.912488255192615e-05, + "loss": 0.3555, + "step": 4029500 + }, + { + "epoch": 2.42, + "learning_rate": 3.9122782586365586e-05, + "loss": 0.3518, + "step": 4030000 + }, + { + "epoch": 2.42, + "learning_rate": 3.912068262080501e-05, + "loss": 0.3565, + "step": 4030500 + }, + { + "epoch": 2.42, + "learning_rate": 3.9118586855175573e-05, + "loss": 0.347, + "step": 4031000 + }, + { + "epoch": 2.42, + "learning_rate": 3.9116486889615014e-05, + "loss": 0.3424, + "step": 4031500 + }, + { + "epoch": 2.42, + "learning_rate": 3.911438692405445e-05, + "loss": 0.363, + "step": 4032000 + }, + { + "epoch": 2.42, + "learning_rate": 3.911228695849388e-05, + "loss": 0.3485, + "step": 4032500 + }, + { + "epoch": 2.42, + "learning_rate": 3.9110186992933314e-05, + "loss": 0.351, + "step": 4033000 + }, + { + "epoch": 2.42, + "learning_rate": 3.910808702737275e-05, + "loss": 0.3405, + "step": 4033500 + }, + { + "epoch": 2.42, + "learning_rate": 3.910599546167443e-05, + "loss": 0.3656, + "step": 4034000 + }, + { + "epoch": 2.42, + "learning_rate": 3.910389549611386e-05, + "loss": 0.3583, + "step": 4034500 + }, + { + "epoch": 2.42, + "learning_rate": 3.91017955305533e-05, + "loss": 0.3516, + "step": 4035000 + }, + { + "epoch": 2.42, + "learning_rate": 3.9099695564992735e-05, + "loss": 0.3528, + "step": 4035500 + }, + { + "epoch": 2.42, + "learning_rate": 3.909759559943217e-05, + "loss": 0.3414, + "step": 4036000 + }, + { + "epoch": 2.42, + "learning_rate": 3.909549563387161e-05, + "loss": 0.3454, + "step": 4036500 + }, + { + "epoch": 2.42, + "learning_rate": 3.909339566831104e-05, + "loss": 0.3547, + "step": 4037000 + }, + { + "epoch": 2.42, + "learning_rate": 3.9091295702750476e-05, + "loss": 0.3511, + "step": 4037500 + }, + { + "epoch": 2.42, + "learning_rate": 3.908919573718991e-05, + "loss": 0.3595, + "step": 4038000 + }, + { + "epoch": 2.42, + "learning_rate": 3.908709577162934e-05, + "loss": 0.3504, + "step": 4038500 + }, + { + "epoch": 2.42, + "learning_rate": 3.9084995806068776e-05, + "loss": 0.3595, + "step": 4039000 + }, + { + "epoch": 2.42, + "learning_rate": 3.9082895840508216e-05, + "loss": 0.3467, + "step": 4039500 + }, + { + "epoch": 2.42, + "learning_rate": 3.908080007487877e-05, + "loss": 0.3406, + "step": 4040000 + }, + { + "epoch": 2.42, + "learning_rate": 3.90787001093182e-05, + "loss": 0.3609, + "step": 4040500 + }, + { + "epoch": 2.42, + "learning_rate": 3.907660014375764e-05, + "loss": 0.3465, + "step": 4041000 + }, + { + "epoch": 2.42, + "learning_rate": 3.907450017819708e-05, + "loss": 0.3418, + "step": 4041500 + }, + { + "epoch": 2.42, + "learning_rate": 3.907240441256764e-05, + "loss": 0.3352, + "step": 4042000 + }, + { + "epoch": 2.42, + "learning_rate": 3.9070304447007064e-05, + "loss": 0.3501, + "step": 4042500 + }, + { + "epoch": 2.42, + "learning_rate": 3.9068204481446504e-05, + "loss": 0.35, + "step": 4043000 + }, + { + "epoch": 2.42, + "learning_rate": 3.906610451588594e-05, + "loss": 0.3524, + "step": 4043500 + }, + { + "epoch": 2.42, + "learning_rate": 3.906401295018762e-05, + "loss": 0.3495, + "step": 4044000 + }, + { + "epoch": 2.42, + "learning_rate": 3.906191298462705e-05, + "loss": 0.3557, + "step": 4044500 + }, + { + "epoch": 2.43, + "learning_rate": 3.9059813019066485e-05, + "loss": 0.3501, + "step": 4045000 + }, + { + "epoch": 2.43, + "learning_rate": 3.9057713053505925e-05, + "loss": 0.3573, + "step": 4045500 + }, + { + "epoch": 2.43, + "learning_rate": 3.905561308794536e-05, + "loss": 0.3547, + "step": 4046000 + }, + { + "epoch": 2.43, + "learning_rate": 3.905351312238479e-05, + "loss": 0.3573, + "step": 4046500 + }, + { + "epoch": 2.43, + "learning_rate": 3.905141315682423e-05, + "loss": 0.3615, + "step": 4047000 + }, + { + "epoch": 2.43, + "learning_rate": 3.9049317391194786e-05, + "loss": 0.3452, + "step": 4047500 + }, + { + "epoch": 2.43, + "learning_rate": 3.904721742563422e-05, + "loss": 0.3561, + "step": 4048000 + }, + { + "epoch": 2.43, + "learning_rate": 3.904511746007365e-05, + "loss": 0.3481, + "step": 4048500 + }, + { + "epoch": 2.43, + "learning_rate": 3.904301749451309e-05, + "loss": 0.3533, + "step": 4049000 + }, + { + "epoch": 2.43, + "learning_rate": 3.904091752895252e-05, + "loss": 0.3506, + "step": 4049500 + }, + { + "epoch": 2.43, + "learning_rate": 3.903881756339196e-05, + "loss": 0.3435, + "step": 4050000 + }, + { + "epoch": 2.43, + "learning_rate": 3.9036717597831394e-05, + "loss": 0.3443, + "step": 4050500 + }, + { + "epoch": 2.43, + "learning_rate": 3.903461763227083e-05, + "loss": 0.3613, + "step": 4051000 + }, + { + "epoch": 2.43, + "learning_rate": 3.903252186664139e-05, + "loss": 0.3446, + "step": 4051500 + }, + { + "epoch": 2.43, + "learning_rate": 3.903042190108082e-05, + "loss": 0.3525, + "step": 4052000 + }, + { + "epoch": 2.43, + "learning_rate": 3.902832613545138e-05, + "loss": 0.3557, + "step": 4052500 + }, + { + "epoch": 2.43, + "learning_rate": 3.9026226169890815e-05, + "loss": 0.3436, + "step": 4053000 + }, + { + "epoch": 2.43, + "learning_rate": 3.902412620433025e-05, + "loss": 0.3558, + "step": 4053500 + }, + { + "epoch": 2.43, + "learning_rate": 3.902202623876969e-05, + "loss": 0.3518, + "step": 4054000 + }, + { + "epoch": 2.43, + "learning_rate": 3.9019926273209115e-05, + "loss": 0.3403, + "step": 4054500 + }, + { + "epoch": 2.43, + "learning_rate": 3.901782630764855e-05, + "loss": 0.348, + "step": 4055000 + }, + { + "epoch": 2.43, + "learning_rate": 3.901572634208799e-05, + "loss": 0.3543, + "step": 4055500 + }, + { + "epoch": 2.43, + "learning_rate": 3.901362637652742e-05, + "loss": 0.3404, + "step": 4056000 + }, + { + "epoch": 2.43, + "learning_rate": 3.901153061089798e-05, + "loss": 0.3567, + "step": 4056500 + }, + { + "epoch": 2.43, + "learning_rate": 3.9009430645337416e-05, + "loss": 0.3373, + "step": 4057000 + }, + { + "epoch": 2.43, + "learning_rate": 3.900733067977685e-05, + "loss": 0.3503, + "step": 4057500 + }, + { + "epoch": 2.43, + "learning_rate": 3.900523071421628e-05, + "loss": 0.3389, + "step": 4058000 + }, + { + "epoch": 2.43, + "learning_rate": 3.900313494858684e-05, + "loss": 0.348, + "step": 4058500 + }, + { + "epoch": 2.43, + "learning_rate": 3.9001034983026284e-05, + "loss": 0.3475, + "step": 4059000 + }, + { + "epoch": 2.43, + "learning_rate": 3.899893501746571e-05, + "loss": 0.3586, + "step": 4059500 + }, + { + "epoch": 2.43, + "learning_rate": 3.899683925183627e-05, + "loss": 0.3489, + "step": 4060000 + }, + { + "epoch": 2.43, + "learning_rate": 3.8994739286275704e-05, + "loss": 0.3526, + "step": 4060500 + }, + { + "epoch": 2.43, + "learning_rate": 3.899264352064626e-05, + "loss": 0.3406, + "step": 4061000 + }, + { + "epoch": 2.44, + "learning_rate": 3.89905435550857e-05, + "loss": 0.3418, + "step": 4061500 + }, + { + "epoch": 2.44, + "learning_rate": 3.898844358952513e-05, + "loss": 0.3559, + "step": 4062000 + }, + { + "epoch": 2.44, + "learning_rate": 3.8986343623964565e-05, + "loss": 0.3424, + "step": 4062500 + }, + { + "epoch": 2.44, + "learning_rate": 3.8984243658404005e-05, + "loss": 0.3462, + "step": 4063000 + }, + { + "epoch": 2.44, + "learning_rate": 3.898214369284344e-05, + "loss": 0.3599, + "step": 4063500 + }, + { + "epoch": 2.44, + "learning_rate": 3.898004372728287e-05, + "loss": 0.3454, + "step": 4064000 + }, + { + "epoch": 2.44, + "learning_rate": 3.8977943761722305e-05, + "loss": 0.3508, + "step": 4064500 + }, + { + "epoch": 2.44, + "learning_rate": 3.897584379616174e-05, + "loss": 0.3507, + "step": 4065000 + }, + { + "epoch": 2.44, + "learning_rate": 3.897374383060118e-05, + "loss": 0.3504, + "step": 4065500 + }, + { + "epoch": 2.44, + "learning_rate": 3.897164386504061e-05, + "loss": 0.3411, + "step": 4066000 + }, + { + "epoch": 2.44, + "learning_rate": 3.8969543899480046e-05, + "loss": 0.345, + "step": 4066500 + }, + { + "epoch": 2.44, + "learning_rate": 3.89674481338506e-05, + "loss": 0.3457, + "step": 4067000 + }, + { + "epoch": 2.44, + "learning_rate": 3.896534816829004e-05, + "loss": 0.3476, + "step": 4067500 + }, + { + "epoch": 2.44, + "learning_rate": 3.896324820272947e-05, + "loss": 0.3434, + "step": 4068000 + }, + { + "epoch": 2.44, + "learning_rate": 3.896114823716891e-05, + "loss": 0.3487, + "step": 4068500 + }, + { + "epoch": 2.44, + "learning_rate": 3.895905247153946e-05, + "loss": 0.3489, + "step": 4069000 + }, + { + "epoch": 2.44, + "learning_rate": 3.89569525059789e-05, + "loss": 0.3508, + "step": 4069500 + }, + { + "epoch": 2.44, + "learning_rate": 3.8954852540418334e-05, + "loss": 0.3544, + "step": 4070000 + }, + { + "epoch": 2.44, + "learning_rate": 3.8952756774788894e-05, + "loss": 0.3554, + "step": 4070500 + }, + { + "epoch": 2.44, + "learning_rate": 3.895065680922833e-05, + "loss": 0.3495, + "step": 4071000 + }, + { + "epoch": 2.44, + "learning_rate": 3.894855684366776e-05, + "loss": 0.357, + "step": 4071500 + }, + { + "epoch": 2.44, + "learning_rate": 3.8946456878107195e-05, + "loss": 0.347, + "step": 4072000 + }, + { + "epoch": 2.44, + "learning_rate": 3.8944356912546635e-05, + "loss": 0.3504, + "step": 4072500 + }, + { + "epoch": 2.44, + "learning_rate": 3.894225694698607e-05, + "loss": 0.3481, + "step": 4073000 + }, + { + "epoch": 2.44, + "learning_rate": 3.894016118135662e-05, + "loss": 0.3454, + "step": 4073500 + }, + { + "epoch": 2.44, + "learning_rate": 3.8938061215796055e-05, + "loss": 0.3481, + "step": 4074000 + }, + { + "epoch": 2.44, + "learning_rate": 3.8935961250235496e-05, + "loss": 0.3415, + "step": 4074500 + }, + { + "epoch": 2.44, + "learning_rate": 3.8933865484606056e-05, + "loss": 0.3527, + "step": 4075000 + }, + { + "epoch": 2.44, + "learning_rate": 3.893176551904549e-05, + "loss": 0.3436, + "step": 4075500 + }, + { + "epoch": 2.44, + "learning_rate": 3.8929665553484916e-05, + "loss": 0.3549, + "step": 4076000 + }, + { + "epoch": 2.44, + "learning_rate": 3.8927565587924356e-05, + "loss": 0.3478, + "step": 4076500 + }, + { + "epoch": 2.44, + "learning_rate": 3.892546562236379e-05, + "loss": 0.3444, + "step": 4077000 + }, + { + "epoch": 2.44, + "learning_rate": 3.892336565680323e-05, + "loss": 0.349, + "step": 4077500 + }, + { + "epoch": 2.44, + "learning_rate": 3.8921265691242664e-05, + "loss": 0.3484, + "step": 4078000 + }, + { + "epoch": 2.45, + "learning_rate": 3.89191657256821e-05, + "loss": 0.3466, + "step": 4078500 + }, + { + "epoch": 2.45, + "learning_rate": 3.891706996005265e-05, + "loss": 0.3482, + "step": 4079000 + }, + { + "epoch": 2.45, + "learning_rate": 3.891496999449209e-05, + "loss": 0.3466, + "step": 4079500 + }, + { + "epoch": 2.45, + "learning_rate": 3.891287422886265e-05, + "loss": 0.346, + "step": 4080000 + }, + { + "epoch": 2.45, + "learning_rate": 3.891077426330208e-05, + "loss": 0.3438, + "step": 4080500 + }, + { + "epoch": 2.45, + "learning_rate": 3.890867429774151e-05, + "loss": 0.3577, + "step": 4081000 + }, + { + "epoch": 2.45, + "learning_rate": 3.890657433218095e-05, + "loss": 0.3409, + "step": 4081500 + }, + { + "epoch": 2.45, + "learning_rate": 3.8904474366620385e-05, + "loss": 0.3443, + "step": 4082000 + }, + { + "epoch": 2.45, + "learning_rate": 3.890237440105982e-05, + "loss": 0.3457, + "step": 4082500 + }, + { + "epoch": 2.45, + "learning_rate": 3.890027443549926e-05, + "loss": 0.3526, + "step": 4083000 + }, + { + "epoch": 2.45, + "learning_rate": 3.889817446993869e-05, + "loss": 0.3428, + "step": 4083500 + }, + { + "epoch": 2.45, + "learning_rate": 3.8896074504378126e-05, + "loss": 0.357, + "step": 4084000 + }, + { + "epoch": 2.45, + "learning_rate": 3.8893974538817566e-05, + "loss": 0.3434, + "step": 4084500 + }, + { + "epoch": 2.45, + "learning_rate": 3.8891874573257e-05, + "loss": 0.3466, + "step": 4085000 + }, + { + "epoch": 2.45, + "learning_rate": 3.888977460769643e-05, + "loss": 0.3467, + "step": 4085500 + }, + { + "epoch": 2.45, + "learning_rate": 3.888767884206699e-05, + "loss": 0.3506, + "step": 4086000 + }, + { + "epoch": 2.45, + "learning_rate": 3.888557887650643e-05, + "loss": 0.3478, + "step": 4086500 + }, + { + "epoch": 2.45, + "learning_rate": 3.888347891094586e-05, + "loss": 0.3491, + "step": 4087000 + }, + { + "epoch": 2.45, + "learning_rate": 3.88813789453853e-05, + "loss": 0.3475, + "step": 4087500 + }, + { + "epoch": 2.45, + "learning_rate": 3.8879283179755854e-05, + "loss": 0.3572, + "step": 4088000 + }, + { + "epoch": 2.45, + "learning_rate": 3.887718741412641e-05, + "loss": 0.3599, + "step": 4088500 + }, + { + "epoch": 2.45, + "learning_rate": 3.887508744856584e-05, + "loss": 0.3414, + "step": 4089000 + }, + { + "epoch": 2.45, + "learning_rate": 3.8872987483005274e-05, + "loss": 0.3493, + "step": 4089500 + }, + { + "epoch": 2.45, + "learning_rate": 3.8870887517444715e-05, + "loss": 0.3574, + "step": 4090000 + }, + { + "epoch": 2.45, + "learning_rate": 3.886878755188415e-05, + "loss": 0.3556, + "step": 4090500 + }, + { + "epoch": 2.45, + "learning_rate": 3.886668758632358e-05, + "loss": 0.3461, + "step": 4091000 + }, + { + "epoch": 2.45, + "learning_rate": 3.886458762076302e-05, + "loss": 0.3492, + "step": 4091500 + }, + { + "epoch": 2.45, + "learning_rate": 3.8862487655202455e-05, + "loss": 0.3521, + "step": 4092000 + }, + { + "epoch": 2.45, + "learning_rate": 3.886038768964189e-05, + "loss": 0.35, + "step": 4092500 + }, + { + "epoch": 2.45, + "learning_rate": 3.885828772408133e-05, + "loss": 0.3502, + "step": 4093000 + }, + { + "epoch": 2.45, + "learning_rate": 3.885619195845188e-05, + "loss": 0.3449, + "step": 4093500 + }, + { + "epoch": 2.45, + "learning_rate": 3.8854091992891316e-05, + "loss": 0.3532, + "step": 4094000 + }, + { + "epoch": 2.45, + "learning_rate": 3.8851992027330756e-05, + "loss": 0.3453, + "step": 4094500 + }, + { + "epoch": 2.46, + "learning_rate": 3.884989206177019e-05, + "loss": 0.3456, + "step": 4095000 + }, + { + "epoch": 2.46, + "learning_rate": 3.884779209620962e-05, + "loss": 0.3429, + "step": 4095500 + }, + { + "epoch": 2.46, + "learning_rate": 3.8845692130649057e-05, + "loss": 0.3458, + "step": 4096000 + }, + { + "epoch": 2.46, + "learning_rate": 3.884359636501962e-05, + "loss": 0.3518, + "step": 4096500 + }, + { + "epoch": 2.46, + "learning_rate": 3.884149639945905e-05, + "loss": 0.3486, + "step": 4097000 + }, + { + "epoch": 2.46, + "learning_rate": 3.8839396433898484e-05, + "loss": 0.346, + "step": 4097500 + }, + { + "epoch": 2.46, + "learning_rate": 3.883729646833792e-05, + "loss": 0.3493, + "step": 4098000 + }, + { + "epoch": 2.46, + "learning_rate": 3.883519650277735e-05, + "loss": 0.3484, + "step": 4098500 + }, + { + "epoch": 2.46, + "learning_rate": 3.8833096537216784e-05, + "loss": 0.3447, + "step": 4099000 + }, + { + "epoch": 2.46, + "learning_rate": 3.8831000771587345e-05, + "loss": 0.3528, + "step": 4099500 + }, + { + "epoch": 2.46, + "learning_rate": 3.8828900806026785e-05, + "loss": 0.3536, + "step": 4100000 + }, + { + "epoch": 2.46, + "eval_loss": 0.3373085558414459, + "eval_runtime": 1111.3303, + "eval_samples_per_second": 473.954, + "eval_steps_per_second": 78.993, + "step": 4100000 + }, + { + "epoch": 2.46, + "learning_rate": 3.882680084046621e-05, + "loss": 0.349, + "step": 4100500 + }, + { + "epoch": 2.46, + "learning_rate": 3.882470087490565e-05, + "loss": 0.3522, + "step": 4101000 + }, + { + "epoch": 2.46, + "learning_rate": 3.8822600909345085e-05, + "loss": 0.3485, + "step": 4101500 + }, + { + "epoch": 2.46, + "learning_rate": 3.882050094378452e-05, + "loss": 0.358, + "step": 4102000 + }, + { + "epoch": 2.46, + "learning_rate": 3.881840097822396e-05, + "loss": 0.3685, + "step": 4102500 + }, + { + "epoch": 2.46, + "learning_rate": 3.881630521259451e-05, + "loss": 0.3537, + "step": 4103000 + }, + { + "epoch": 2.46, + "learning_rate": 3.8814205247033946e-05, + "loss": 0.3451, + "step": 4103500 + }, + { + "epoch": 2.46, + "learning_rate": 3.881210528147338e-05, + "loss": 0.3456, + "step": 4104000 + }, + { + "epoch": 2.46, + "learning_rate": 3.881000531591282e-05, + "loss": 0.3448, + "step": 4104500 + }, + { + "epoch": 2.46, + "learning_rate": 3.880790955028338e-05, + "loss": 0.3594, + "step": 4105000 + }, + { + "epoch": 2.46, + "learning_rate": 3.880580958472281e-05, + "loss": 0.3482, + "step": 4105500 + }, + { + "epoch": 2.46, + "learning_rate": 3.880370961916224e-05, + "loss": 0.3469, + "step": 4106000 + }, + { + "epoch": 2.46, + "learning_rate": 3.880160965360168e-05, + "loss": 0.3476, + "step": 4106500 + }, + { + "epoch": 2.46, + "learning_rate": 3.8799509688041114e-05, + "loss": 0.3494, + "step": 4107000 + }, + { + "epoch": 2.46, + "learning_rate": 3.879740972248055e-05, + "loss": 0.3451, + "step": 4107500 + }, + { + "epoch": 2.46, + "learning_rate": 3.879531395685111e-05, + "loss": 0.3467, + "step": 4108000 + }, + { + "epoch": 2.46, + "learning_rate": 3.879321399129054e-05, + "loss": 0.339, + "step": 4108500 + }, + { + "epoch": 2.46, + "learning_rate": 3.8791114025729975e-05, + "loss": 0.344, + "step": 4109000 + }, + { + "epoch": 2.46, + "learning_rate": 3.8789014060169415e-05, + "loss": 0.3361, + "step": 4109500 + }, + { + "epoch": 2.46, + "learning_rate": 3.878691409460885e-05, + "loss": 0.3516, + "step": 4110000 + }, + { + "epoch": 2.46, + "learning_rate": 3.878481412904828e-05, + "loss": 0.3557, + "step": 4110500 + }, + { + "epoch": 2.46, + "learning_rate": 3.878271416348772e-05, + "loss": 0.3508, + "step": 4111000 + }, + { + "epoch": 2.47, + "learning_rate": 3.8780614197927155e-05, + "loss": 0.3511, + "step": 4111500 + }, + { + "epoch": 2.47, + "learning_rate": 3.877851843229771e-05, + "loss": 0.3489, + "step": 4112000 + }, + { + "epoch": 2.47, + "learning_rate": 3.877641846673714e-05, + "loss": 0.352, + "step": 4112500 + }, + { + "epoch": 2.47, + "learning_rate": 3.8774322701107696e-05, + "loss": 0.3583, + "step": 4113000 + }, + { + "epoch": 2.47, + "learning_rate": 3.8772222735547136e-05, + "loss": 0.3439, + "step": 4113500 + }, + { + "epoch": 2.47, + "learning_rate": 3.877012276998657e-05, + "loss": 0.3468, + "step": 4114000 + }, + { + "epoch": 2.47, + "learning_rate": 3.8768022804426e-05, + "loss": 0.3591, + "step": 4114500 + }, + { + "epoch": 2.47, + "learning_rate": 3.876592283886544e-05, + "loss": 0.3548, + "step": 4115000 + }, + { + "epoch": 2.47, + "learning_rate": 3.8763827073236e-05, + "loss": 0.3488, + "step": 4115500 + }, + { + "epoch": 2.47, + "learning_rate": 3.876172710767543e-05, + "loss": 0.3482, + "step": 4116000 + }, + { + "epoch": 2.47, + "learning_rate": 3.875962714211487e-05, + "loss": 0.3492, + "step": 4116500 + }, + { + "epoch": 2.47, + "learning_rate": 3.8757527176554304e-05, + "loss": 0.3579, + "step": 4117000 + }, + { + "epoch": 2.47, + "learning_rate": 3.875542721099374e-05, + "loss": 0.359, + "step": 4117500 + }, + { + "epoch": 2.47, + "learning_rate": 3.875333144536429e-05, + "loss": 0.3453, + "step": 4118000 + }, + { + "epoch": 2.47, + "learning_rate": 3.875123147980373e-05, + "loss": 0.3622, + "step": 4118500 + }, + { + "epoch": 2.47, + "learning_rate": 3.8749131514243165e-05, + "loss": 0.3428, + "step": 4119000 + }, + { + "epoch": 2.47, + "learning_rate": 3.87470315486826e-05, + "loss": 0.3484, + "step": 4119500 + }, + { + "epoch": 2.47, + "learning_rate": 3.874493158312204e-05, + "loss": 0.3509, + "step": 4120000 + }, + { + "epoch": 2.47, + "learning_rate": 3.874283161756147e-05, + "loss": 0.3435, + "step": 4120500 + }, + { + "epoch": 2.47, + "learning_rate": 3.8740731652000905e-05, + "loss": 0.3415, + "step": 4121000 + }, + { + "epoch": 2.47, + "learning_rate": 3.8738631686440346e-05, + "loss": 0.3513, + "step": 4121500 + }, + { + "epoch": 2.47, + "learning_rate": 3.873654012074202e-05, + "loss": 0.3418, + "step": 4122000 + }, + { + "epoch": 2.47, + "learning_rate": 3.873444015518145e-05, + "loss": 0.3498, + "step": 4122500 + }, + { + "epoch": 2.47, + "learning_rate": 3.8732340189620886e-05, + "loss": 0.3521, + "step": 4123000 + }, + { + "epoch": 2.47, + "learning_rate": 3.8730240224060327e-05, + "loss": 0.349, + "step": 4123500 + }, + { + "epoch": 2.47, + "learning_rate": 3.872814445843089e-05, + "loss": 0.3503, + "step": 4124000 + }, + { + "epoch": 2.47, + "learning_rate": 3.8726044492870314e-05, + "loss": 0.3621, + "step": 4124500 + }, + { + "epoch": 2.47, + "learning_rate": 3.872394452730975e-05, + "loss": 0.3384, + "step": 4125000 + }, + { + "epoch": 2.47, + "learning_rate": 3.872184456174919e-05, + "loss": 0.3445, + "step": 4125500 + }, + { + "epoch": 2.47, + "learning_rate": 3.871974459618862e-05, + "loss": 0.3412, + "step": 4126000 + }, + { + "epoch": 2.47, + "learning_rate": 3.8717648830559174e-05, + "loss": 0.3571, + "step": 4126500 + }, + { + "epoch": 2.47, + "learning_rate": 3.871554886499861e-05, + "loss": 0.3424, + "step": 4127000 + }, + { + "epoch": 2.47, + "learning_rate": 3.871344889943805e-05, + "loss": 0.3495, + "step": 4127500 + }, + { + "epoch": 2.47, + "learning_rate": 3.871134893387748e-05, + "loss": 0.3533, + "step": 4128000 + }, + { + "epoch": 2.48, + "learning_rate": 3.8709248968316915e-05, + "loss": 0.3523, + "step": 4128500 + }, + { + "epoch": 2.48, + "learning_rate": 3.8707149002756355e-05, + "loss": 0.3502, + "step": 4129000 + }, + { + "epoch": 2.48, + "learning_rate": 3.870504903719579e-05, + "loss": 0.3465, + "step": 4129500 + }, + { + "epoch": 2.48, + "learning_rate": 3.870294907163522e-05, + "loss": 0.352, + "step": 4130000 + }, + { + "epoch": 2.48, + "learning_rate": 3.870085330600578e-05, + "loss": 0.3511, + "step": 4130500 + }, + { + "epoch": 2.48, + "learning_rate": 3.869875754037634e-05, + "loss": 0.3493, + "step": 4131000 + }, + { + "epoch": 2.48, + "learning_rate": 3.869665757481577e-05, + "loss": 0.3481, + "step": 4131500 + }, + { + "epoch": 2.48, + "learning_rate": 3.86945576092552e-05, + "loss": 0.351, + "step": 4132000 + }, + { + "epoch": 2.48, + "learning_rate": 3.869245764369464e-05, + "loss": 0.3514, + "step": 4132500 + }, + { + "epoch": 2.48, + "learning_rate": 3.8690357678134077e-05, + "loss": 0.3371, + "step": 4133000 + }, + { + "epoch": 2.48, + "learning_rate": 3.868826191250464e-05, + "loss": 0.3461, + "step": 4133500 + }, + { + "epoch": 2.48, + "learning_rate": 3.8686161946944064e-05, + "loss": 0.341, + "step": 4134000 + }, + { + "epoch": 2.48, + "learning_rate": 3.8684061981383504e-05, + "loss": 0.3473, + "step": 4134500 + }, + { + "epoch": 2.48, + "learning_rate": 3.868196201582294e-05, + "loss": 0.3405, + "step": 4135000 + }, + { + "epoch": 2.48, + "learning_rate": 3.86798662501935e-05, + "loss": 0.3531, + "step": 4135500 + }, + { + "epoch": 2.48, + "learning_rate": 3.867776628463293e-05, + "loss": 0.3515, + "step": 4136000 + }, + { + "epoch": 2.48, + "learning_rate": 3.8675666319072365e-05, + "loss": 0.351, + "step": 4136500 + }, + { + "epoch": 2.48, + "learning_rate": 3.86735663535118e-05, + "loss": 0.3545, + "step": 4137000 + }, + { + "epoch": 2.48, + "learning_rate": 3.867146638795124e-05, + "loss": 0.347, + "step": 4137500 + }, + { + "epoch": 2.48, + "learning_rate": 3.866936642239067e-05, + "loss": 0.3408, + "step": 4138000 + }, + { + "epoch": 2.48, + "learning_rate": 3.8667266456830105e-05, + "loss": 0.3529, + "step": 4138500 + }, + { + "epoch": 2.48, + "learning_rate": 3.8665166491269545e-05, + "loss": 0.3439, + "step": 4139000 + }, + { + "epoch": 2.48, + "learning_rate": 3.86630707256401e-05, + "loss": 0.3503, + "step": 4139500 + }, + { + "epoch": 2.48, + "learning_rate": 3.866097076007953e-05, + "loss": 0.3629, + "step": 4140000 + }, + { + "epoch": 2.48, + "learning_rate": 3.8658870794518966e-05, + "loss": 0.3495, + "step": 4140500 + }, + { + "epoch": 2.48, + "learning_rate": 3.8656770828958406e-05, + "loss": 0.3411, + "step": 4141000 + }, + { + "epoch": 2.48, + "learning_rate": 3.865467506332896e-05, + "loss": 0.3493, + "step": 4141500 + }, + { + "epoch": 2.48, + "learning_rate": 3.865257509776839e-05, + "loss": 0.3529, + "step": 4142000 + }, + { + "epoch": 2.48, + "learning_rate": 3.865047513220783e-05, + "loss": 0.3481, + "step": 4142500 + }, + { + "epoch": 2.48, + "learning_rate": 3.864837516664727e-05, + "loss": 0.3588, + "step": 4143000 + }, + { + "epoch": 2.48, + "learning_rate": 3.864627940101782e-05, + "loss": 0.3605, + "step": 4143500 + }, + { + "epoch": 2.48, + "learning_rate": 3.8644179435457254e-05, + "loss": 0.3546, + "step": 4144000 + }, + { + "epoch": 2.48, + "learning_rate": 3.8642079469896694e-05, + "loss": 0.3513, + "step": 4144500 + }, + { + "epoch": 2.49, + "learning_rate": 3.863997950433613e-05, + "loss": 0.3533, + "step": 4145000 + }, + { + "epoch": 2.49, + "learning_rate": 3.863787953877556e-05, + "loss": 0.3456, + "step": 4145500 + }, + { + "epoch": 2.49, + "learning_rate": 3.8635779573215e-05, + "loss": 0.3483, + "step": 4146000 + }, + { + "epoch": 2.49, + "learning_rate": 3.8633679607654435e-05, + "loss": 0.3421, + "step": 4146500 + }, + { + "epoch": 2.49, + "learning_rate": 3.863157964209387e-05, + "loss": 0.3387, + "step": 4147000 + }, + { + "epoch": 2.49, + "learning_rate": 3.862948387646442e-05, + "loss": 0.3518, + "step": 4147500 + }, + { + "epoch": 2.49, + "learning_rate": 3.862738391090386e-05, + "loss": 0.35, + "step": 4148000 + }, + { + "epoch": 2.49, + "learning_rate": 3.8625283945343296e-05, + "loss": 0.3491, + "step": 4148500 + }, + { + "epoch": 2.49, + "learning_rate": 3.862318397978273e-05, + "loss": 0.3437, + "step": 4149000 + }, + { + "epoch": 2.49, + "learning_rate": 3.862108821415329e-05, + "loss": 0.3447, + "step": 4149500 + }, + { + "epoch": 2.49, + "learning_rate": 3.861898824859272e-05, + "loss": 0.3501, + "step": 4150000 + }, + { + "epoch": 2.49, + "learning_rate": 3.8616888283032156e-05, + "loss": 0.355, + "step": 4150500 + }, + { + "epoch": 2.49, + "learning_rate": 3.8614788317471596e-05, + "loss": 0.3469, + "step": 4151000 + }, + { + "epoch": 2.49, + "learning_rate": 3.861268835191103e-05, + "loss": 0.3474, + "step": 4151500 + }, + { + "epoch": 2.49, + "learning_rate": 3.8610588386350463e-05, + "loss": 0.3464, + "step": 4152000 + }, + { + "epoch": 2.49, + "learning_rate": 3.860849262072102e-05, + "loss": 0.352, + "step": 4152500 + }, + { + "epoch": 2.49, + "learning_rate": 3.860639265516046e-05, + "loss": 0.3399, + "step": 4153000 + }, + { + "epoch": 2.49, + "learning_rate": 3.860429268959989e-05, + "loss": 0.353, + "step": 4153500 + }, + { + "epoch": 2.49, + "learning_rate": 3.8602192724039324e-05, + "loss": 0.35, + "step": 4154000 + }, + { + "epoch": 2.49, + "learning_rate": 3.8600092758478764e-05, + "loss": 0.3486, + "step": 4154500 + }, + { + "epoch": 2.49, + "learning_rate": 3.85979927929182e-05, + "loss": 0.3407, + "step": 4155000 + }, + { + "epoch": 2.49, + "learning_rate": 3.859589282735763e-05, + "loss": 0.3509, + "step": 4155500 + }, + { + "epoch": 2.49, + "learning_rate": 3.8593792861797065e-05, + "loss": 0.3416, + "step": 4156000 + }, + { + "epoch": 2.49, + "learning_rate": 3.8591697096167625e-05, + "loss": 0.3583, + "step": 4156500 + }, + { + "epoch": 2.49, + "learning_rate": 3.858959713060706e-05, + "loss": 0.3478, + "step": 4157000 + }, + { + "epoch": 2.49, + "learning_rate": 3.858749716504649e-05, + "loss": 0.359, + "step": 4157500 + }, + { + "epoch": 2.49, + "learning_rate": 3.858539719948593e-05, + "loss": 0.347, + "step": 4158000 + }, + { + "epoch": 2.49, + "learning_rate": 3.8583305633787606e-05, + "loss": 0.3456, + "step": 4158500 + }, + { + "epoch": 2.49, + "learning_rate": 3.858120566822704e-05, + "loss": 0.3453, + "step": 4159000 + }, + { + "epoch": 2.49, + "learning_rate": 3.857910570266647e-05, + "loss": 0.351, + "step": 4159500 + }, + { + "epoch": 2.49, + "learning_rate": 3.857700573710591e-05, + "loss": 0.3529, + "step": 4160000 + }, + { + "epoch": 2.49, + "learning_rate": 3.8574905771545347e-05, + "loss": 0.348, + "step": 4160500 + }, + { + "epoch": 2.49, + "learning_rate": 3.857280580598478e-05, + "loss": 0.3483, + "step": 4161000 + }, + { + "epoch": 2.49, + "learning_rate": 3.8570710040355334e-05, + "loss": 0.3484, + "step": 4161500 + }, + { + "epoch": 2.5, + "learning_rate": 3.8568610074794774e-05, + "loss": 0.3526, + "step": 4162000 + }, + { + "epoch": 2.5, + "learning_rate": 3.856651010923421e-05, + "loss": 0.3579, + "step": 4162500 + }, + { + "epoch": 2.5, + "learning_rate": 3.856441014367364e-05, + "loss": 0.3434, + "step": 4163000 + }, + { + "epoch": 2.5, + "learning_rate": 3.85623143780442e-05, + "loss": 0.351, + "step": 4163500 + }, + { + "epoch": 2.5, + "learning_rate": 3.8560214412483635e-05, + "loss": 0.3491, + "step": 4164000 + }, + { + "epoch": 2.5, + "learning_rate": 3.855811444692307e-05, + "loss": 0.3535, + "step": 4164500 + }, + { + "epoch": 2.5, + "learning_rate": 3.855601448136251e-05, + "loss": 0.3515, + "step": 4165000 + }, + { + "epoch": 2.5, + "learning_rate": 3.855391451580194e-05, + "loss": 0.3403, + "step": 4165500 + }, + { + "epoch": 2.5, + "learning_rate": 3.8551818750172495e-05, + "loss": 0.3585, + "step": 4166000 + }, + { + "epoch": 2.5, + "learning_rate": 3.854971878461193e-05, + "loss": 0.3485, + "step": 4166500 + }, + { + "epoch": 2.5, + "learning_rate": 3.854761881905137e-05, + "loss": 0.3415, + "step": 4167000 + }, + { + "epoch": 2.5, + "learning_rate": 3.85455188534908e-05, + "loss": 0.3482, + "step": 4167500 + }, + { + "epoch": 2.5, + "learning_rate": 3.8543418887930236e-05, + "loss": 0.3458, + "step": 4168000 + }, + { + "epoch": 2.5, + "learning_rate": 3.8541318922369676e-05, + "loss": 0.3513, + "step": 4168500 + }, + { + "epoch": 2.5, + "learning_rate": 3.853921895680911e-05, + "loss": 0.3477, + "step": 4169000 + }, + { + "epoch": 2.5, + "learning_rate": 3.853711899124854e-05, + "loss": 0.3475, + "step": 4169500 + }, + { + "epoch": 2.5, + "learning_rate": 3.8535023225619097e-05, + "loss": 0.3497, + "step": 4170000 + }, + { + "epoch": 2.5, + "learning_rate": 3.853292326005854e-05, + "loss": 0.3488, + "step": 4170500 + }, + { + "epoch": 2.5, + "learning_rate": 3.853082749442909e-05, + "loss": 0.3408, + "step": 4171000 + }, + { + "epoch": 2.5, + "learning_rate": 3.8528727528868524e-05, + "loss": 0.3482, + "step": 4171500 + }, + { + "epoch": 2.5, + "learning_rate": 3.852663176323908e-05, + "loss": 0.3481, + "step": 4172000 + }, + { + "epoch": 2.5, + "learning_rate": 3.852453179767852e-05, + "loss": 0.339, + "step": 4172500 + }, + { + "epoch": 2.5, + "learning_rate": 3.852243183211795e-05, + "loss": 0.3548, + "step": 4173000 + }, + { + "epoch": 2.5, + "learning_rate": 3.8520331866557385e-05, + "loss": 0.3488, + "step": 4173500 + }, + { + "epoch": 2.5, + "learning_rate": 3.8518231900996825e-05, + "loss": 0.3461, + "step": 4174000 + }, + { + "epoch": 2.5, + "learning_rate": 3.851613193543626e-05, + "loss": 0.3471, + "step": 4174500 + }, + { + "epoch": 2.5, + "learning_rate": 3.851403196987569e-05, + "loss": 0.3455, + "step": 4175000 + }, + { + "epoch": 2.5, + "learning_rate": 3.851193200431513e-05, + "loss": 0.3463, + "step": 4175500 + }, + { + "epoch": 2.5, + "learning_rate": 3.8509832038754565e-05, + "loss": 0.3397, + "step": 4176000 + }, + { + "epoch": 2.5, + "learning_rate": 3.8507732073194e-05, + "loss": 0.3497, + "step": 4176500 + }, + { + "epoch": 2.5, + "learning_rate": 3.850563210763344e-05, + "loss": 0.3488, + "step": 4177000 + }, + { + "epoch": 2.5, + "learning_rate": 3.8503532142072866e-05, + "loss": 0.3468, + "step": 4177500 + }, + { + "epoch": 2.5, + "learning_rate": 3.8501436376443426e-05, + "loss": 0.3513, + "step": 4178000 + }, + { + "epoch": 2.51, + "learning_rate": 3.849933641088286e-05, + "loss": 0.3525, + "step": 4178500 + }, + { + "epoch": 2.51, + "learning_rate": 3.849724064525342e-05, + "loss": 0.3514, + "step": 4179000 + }, + { + "epoch": 2.51, + "learning_rate": 3.8495140679692853e-05, + "loss": 0.3508, + "step": 4179500 + }, + { + "epoch": 2.51, + "learning_rate": 3.849304071413229e-05, + "loss": 0.3535, + "step": 4180000 + }, + { + "epoch": 2.51, + "learning_rate": 3.849094074857173e-05, + "loss": 0.3488, + "step": 4180500 + }, + { + "epoch": 2.51, + "learning_rate": 3.848884078301116e-05, + "loss": 0.3414, + "step": 4181000 + }, + { + "epoch": 2.51, + "learning_rate": 3.8486740817450594e-05, + "loss": 0.3519, + "step": 4181500 + }, + { + "epoch": 2.51, + "learning_rate": 3.8484640851890034e-05, + "loss": 0.3395, + "step": 4182000 + }, + { + "epoch": 2.51, + "learning_rate": 3.848254508626059e-05, + "loss": 0.3391, + "step": 4182500 + }, + { + "epoch": 2.51, + "learning_rate": 3.848044512070002e-05, + "loss": 0.3499, + "step": 4183000 + }, + { + "epoch": 2.51, + "learning_rate": 3.8478345155139455e-05, + "loss": 0.3456, + "step": 4183500 + }, + { + "epoch": 2.51, + "learning_rate": 3.8476245189578895e-05, + "loss": 0.3271, + "step": 4184000 + }, + { + "epoch": 2.51, + "learning_rate": 3.847414522401832e-05, + "loss": 0.3543, + "step": 4184500 + }, + { + "epoch": 2.51, + "learning_rate": 3.8472045258457755e-05, + "loss": 0.3582, + "step": 4185000 + }, + { + "epoch": 2.51, + "learning_rate": 3.8469949492828316e-05, + "loss": 0.3376, + "step": 4185500 + }, + { + "epoch": 2.51, + "learning_rate": 3.8467849527267756e-05, + "loss": 0.3441, + "step": 4186000 + }, + { + "epoch": 2.51, + "learning_rate": 3.846574956170719e-05, + "loss": 0.3506, + "step": 4186500 + }, + { + "epoch": 2.51, + "learning_rate": 3.846364959614662e-05, + "loss": 0.3519, + "step": 4187000 + }, + { + "epoch": 2.51, + "learning_rate": 3.8461549630586056e-05, + "loss": 0.3438, + "step": 4187500 + }, + { + "epoch": 2.51, + "learning_rate": 3.845944966502549e-05, + "loss": 0.3498, + "step": 4188000 + }, + { + "epoch": 2.51, + "learning_rate": 3.845734969946493e-05, + "loss": 0.3539, + "step": 4188500 + }, + { + "epoch": 2.51, + "learning_rate": 3.845525393383549e-05, + "loss": 0.3474, + "step": 4189000 + }, + { + "epoch": 2.51, + "learning_rate": 3.845315396827492e-05, + "loss": 0.3577, + "step": 4189500 + }, + { + "epoch": 2.51, + "learning_rate": 3.845105400271435e-05, + "loss": 0.3466, + "step": 4190000 + }, + { + "epoch": 2.51, + "learning_rate": 3.844895403715379e-05, + "loss": 0.3405, + "step": 4190500 + }, + { + "epoch": 2.51, + "learning_rate": 3.844685827152435e-05, + "loss": 0.3478, + "step": 4191000 + }, + { + "epoch": 2.51, + "learning_rate": 3.8444758305963784e-05, + "loss": 0.3477, + "step": 4191500 + }, + { + "epoch": 2.51, + "learning_rate": 3.844265834040321e-05, + "loss": 0.3384, + "step": 4192000 + }, + { + "epoch": 2.51, + "learning_rate": 3.844055837484265e-05, + "loss": 0.3461, + "step": 4192500 + }, + { + "epoch": 2.51, + "learning_rate": 3.8438458409282085e-05, + "loss": 0.3404, + "step": 4193000 + }, + { + "epoch": 2.51, + "learning_rate": 3.843635844372152e-05, + "loss": 0.3481, + "step": 4193500 + }, + { + "epoch": 2.51, + "learning_rate": 3.843426267809208e-05, + "loss": 0.3435, + "step": 4194000 + }, + { + "epoch": 2.51, + "learning_rate": 3.843216271253151e-05, + "loss": 0.3519, + "step": 4194500 + }, + { + "epoch": 2.52, + "learning_rate": 3.8430062746970945e-05, + "loss": 0.3579, + "step": 4195000 + }, + { + "epoch": 2.52, + "learning_rate": 3.8427962781410386e-05, + "loss": 0.3392, + "step": 4195500 + }, + { + "epoch": 2.52, + "learning_rate": 3.842586281584982e-05, + "loss": 0.3491, + "step": 4196000 + }, + { + "epoch": 2.52, + "learning_rate": 3.842376285028925e-05, + "loss": 0.3439, + "step": 4196500 + }, + { + "epoch": 2.52, + "learning_rate": 3.8421667084659806e-05, + "loss": 0.354, + "step": 4197000 + }, + { + "epoch": 2.52, + "learning_rate": 3.8419567119099246e-05, + "loss": 0.3542, + "step": 4197500 + }, + { + "epoch": 2.52, + "learning_rate": 3.841746715353868e-05, + "loss": 0.3434, + "step": 4198000 + }, + { + "epoch": 2.52, + "learning_rate": 3.841537138790924e-05, + "loss": 0.3564, + "step": 4198500 + }, + { + "epoch": 2.52, + "learning_rate": 3.841327142234867e-05, + "loss": 0.3387, + "step": 4199000 + }, + { + "epoch": 2.52, + "learning_rate": 3.841117145678811e-05, + "loss": 0.3438, + "step": 4199500 + }, + { + "epoch": 2.52, + "learning_rate": 3.840907149122754e-05, + "loss": 0.344, + "step": 4200000 + }, + { + "epoch": 2.52, + "eval_loss": 0.33738669753074646, + "eval_runtime": 1113.5514, + "eval_samples_per_second": 473.009, + "eval_steps_per_second": 78.835, + "step": 4200000 + }, + { + "epoch": 2.52, + "learning_rate": 3.8406971525666974e-05, + "loss": 0.3491, + "step": 4200500 + }, + { + "epoch": 2.52, + "learning_rate": 3.8404871560106414e-05, + "loss": 0.3482, + "step": 4201000 + }, + { + "epoch": 2.52, + "learning_rate": 3.840277159454585e-05, + "loss": 0.3563, + "step": 4201500 + }, + { + "epoch": 2.52, + "learning_rate": 3.840067162898529e-05, + "loss": 0.3503, + "step": 4202000 + }, + { + "epoch": 2.52, + "learning_rate": 3.839857166342472e-05, + "loss": 0.3481, + "step": 4202500 + }, + { + "epoch": 2.52, + "learning_rate": 3.8396475897795275e-05, + "loss": 0.341, + "step": 4203000 + }, + { + "epoch": 2.52, + "learning_rate": 3.839437593223471e-05, + "loss": 0.3478, + "step": 4203500 + }, + { + "epoch": 2.52, + "learning_rate": 3.839227596667415e-05, + "loss": 0.3421, + "step": 4204000 + }, + { + "epoch": 2.52, + "learning_rate": 3.83901802010447e-05, + "loss": 0.3565, + "step": 4204500 + }, + { + "epoch": 2.52, + "learning_rate": 3.8388080235484136e-05, + "loss": 0.3396, + "step": 4205000 + }, + { + "epoch": 2.52, + "learning_rate": 3.838598026992357e-05, + "loss": 0.3424, + "step": 4205500 + }, + { + "epoch": 2.52, + "learning_rate": 3.838388030436301e-05, + "loss": 0.3539, + "step": 4206000 + }, + { + "epoch": 2.52, + "learning_rate": 3.838178033880244e-05, + "loss": 0.3472, + "step": 4206500 + }, + { + "epoch": 2.52, + "learning_rate": 3.8379680373241876e-05, + "loss": 0.343, + "step": 4207000 + }, + { + "epoch": 2.52, + "learning_rate": 3.8377580407681317e-05, + "loss": 0.3504, + "step": 4207500 + }, + { + "epoch": 2.52, + "learning_rate": 3.837548044212075e-05, + "loss": 0.3562, + "step": 4208000 + }, + { + "epoch": 2.52, + "learning_rate": 3.8373380476560184e-05, + "loss": 0.3525, + "step": 4208500 + }, + { + "epoch": 2.52, + "learning_rate": 3.8371280510999624e-05, + "loss": 0.3439, + "step": 4209000 + }, + { + "epoch": 2.52, + "learning_rate": 3.836918054543905e-05, + "loss": 0.3477, + "step": 4209500 + }, + { + "epoch": 2.52, + "learning_rate": 3.836708057987849e-05, + "loss": 0.3506, + "step": 4210000 + }, + { + "epoch": 2.52, + "learning_rate": 3.836498481424905e-05, + "loss": 0.3509, + "step": 4210500 + }, + { + "epoch": 2.52, + "learning_rate": 3.8362884848688484e-05, + "loss": 0.3588, + "step": 4211000 + }, + { + "epoch": 2.52, + "learning_rate": 3.836078908305904e-05, + "loss": 0.345, + "step": 4211500 + }, + { + "epoch": 2.53, + "learning_rate": 3.835868911749847e-05, + "loss": 0.3386, + "step": 4212000 + }, + { + "epoch": 2.53, + "learning_rate": 3.835658915193791e-05, + "loss": 0.3497, + "step": 4212500 + }, + { + "epoch": 2.53, + "learning_rate": 3.8354489186377345e-05, + "loss": 0.3456, + "step": 4213000 + }, + { + "epoch": 2.53, + "learning_rate": 3.835238922081678e-05, + "loss": 0.3459, + "step": 4213500 + }, + { + "epoch": 2.53, + "learning_rate": 3.835028925525621e-05, + "loss": 0.3447, + "step": 4214000 + }, + { + "epoch": 2.53, + "learning_rate": 3.834819348962677e-05, + "loss": 0.3483, + "step": 4214500 + }, + { + "epoch": 2.53, + "learning_rate": 3.8346093524066206e-05, + "loss": 0.3482, + "step": 4215000 + }, + { + "epoch": 2.53, + "learning_rate": 3.834399355850564e-05, + "loss": 0.3517, + "step": 4215500 + }, + { + "epoch": 2.53, + "learning_rate": 3.834189359294508e-05, + "loss": 0.3504, + "step": 4216000 + }, + { + "epoch": 2.53, + "learning_rate": 3.8339793627384506e-05, + "loss": 0.3465, + "step": 4216500 + }, + { + "epoch": 2.53, + "learning_rate": 3.8337693661823947e-05, + "loss": 0.3481, + "step": 4217000 + }, + { + "epoch": 2.53, + "learning_rate": 3.833559789619451e-05, + "loss": 0.3425, + "step": 4217500 + }, + { + "epoch": 2.53, + "learning_rate": 3.833349793063394e-05, + "loss": 0.3462, + "step": 4218000 + }, + { + "epoch": 2.53, + "learning_rate": 3.8331397965073374e-05, + "loss": 0.3521, + "step": 4218500 + }, + { + "epoch": 2.53, + "learning_rate": 3.832929799951281e-05, + "loss": 0.3527, + "step": 4219000 + }, + { + "epoch": 2.53, + "learning_rate": 3.832719803395224e-05, + "loss": 0.3472, + "step": 4219500 + }, + { + "epoch": 2.53, + "learning_rate": 3.8325098068391674e-05, + "loss": 0.3374, + "step": 4220000 + }, + { + "epoch": 2.53, + "learning_rate": 3.8323002302762235e-05, + "loss": 0.3397, + "step": 4220500 + }, + { + "epoch": 2.53, + "learning_rate": 3.832090233720167e-05, + "loss": 0.3447, + "step": 4221000 + }, + { + "epoch": 2.53, + "learning_rate": 3.83188023716411e-05, + "loss": 0.3496, + "step": 4221500 + }, + { + "epoch": 2.53, + "learning_rate": 3.8316702406080535e-05, + "loss": 0.3456, + "step": 4222000 + }, + { + "epoch": 2.53, + "learning_rate": 3.8314606640451095e-05, + "loss": 0.3415, + "step": 4222500 + }, + { + "epoch": 2.53, + "learning_rate": 3.8312506674890536e-05, + "loss": 0.35, + "step": 4223000 + }, + { + "epoch": 2.53, + "learning_rate": 3.831040670932996e-05, + "loss": 0.3404, + "step": 4223500 + }, + { + "epoch": 2.53, + "learning_rate": 3.83083067437694e-05, + "loss": 0.3444, + "step": 4224000 + }, + { + "epoch": 2.53, + "learning_rate": 3.8306206778208836e-05, + "loss": 0.3483, + "step": 4224500 + }, + { + "epoch": 2.53, + "learning_rate": 3.830410681264827e-05, + "loss": 0.3481, + "step": 4225000 + }, + { + "epoch": 2.53, + "learning_rate": 3.830200684708771e-05, + "loss": 0.3377, + "step": 4225500 + }, + { + "epoch": 2.53, + "learning_rate": 3.829990688152714e-05, + "loss": 0.3435, + "step": 4226000 + }, + { + "epoch": 2.53, + "learning_rate": 3.82978111158977e-05, + "loss": 0.35, + "step": 4226500 + }, + { + "epoch": 2.53, + "learning_rate": 3.829571115033713e-05, + "loss": 0.344, + "step": 4227000 + }, + { + "epoch": 2.53, + "learning_rate": 3.829361118477657e-05, + "loss": 0.3436, + "step": 4227500 + }, + { + "epoch": 2.53, + "learning_rate": 3.8291511219216004e-05, + "loss": 0.3491, + "step": 4228000 + }, + { + "epoch": 2.54, + "learning_rate": 3.828941125365544e-05, + "loss": 0.3444, + "step": 4228500 + }, + { + "epoch": 2.54, + "learning_rate": 3.828731548802599e-05, + "loss": 0.3457, + "step": 4229000 + }, + { + "epoch": 2.54, + "learning_rate": 3.828521552246543e-05, + "loss": 0.3448, + "step": 4229500 + }, + { + "epoch": 2.54, + "learning_rate": 3.8283115556904865e-05, + "loss": 0.3517, + "step": 4230000 + }, + { + "epoch": 2.54, + "learning_rate": 3.82810155913443e-05, + "loss": 0.3417, + "step": 4230500 + }, + { + "epoch": 2.54, + "learning_rate": 3.827891982571486e-05, + "loss": 0.3467, + "step": 4231000 + }, + { + "epoch": 2.54, + "learning_rate": 3.827681986015429e-05, + "loss": 0.3405, + "step": 4231500 + }, + { + "epoch": 2.54, + "learning_rate": 3.8274719894593725e-05, + "loss": 0.3512, + "step": 4232000 + }, + { + "epoch": 2.54, + "learning_rate": 3.8272619929033165e-05, + "loss": 0.3388, + "step": 4232500 + }, + { + "epoch": 2.54, + "learning_rate": 3.82705199634726e-05, + "loss": 0.3527, + "step": 4233000 + }, + { + "epoch": 2.54, + "learning_rate": 3.826841999791203e-05, + "loss": 0.3484, + "step": 4233500 + }, + { + "epoch": 2.54, + "learning_rate": 3.826632003235147e-05, + "loss": 0.343, + "step": 4234000 + }, + { + "epoch": 2.54, + "learning_rate": 3.8264220066790906e-05, + "loss": 0.348, + "step": 4234500 + }, + { + "epoch": 2.54, + "learning_rate": 3.826212430116146e-05, + "loss": 0.3459, + "step": 4235000 + }, + { + "epoch": 2.54, + "learning_rate": 3.826002433560089e-05, + "loss": 0.3482, + "step": 4235500 + }, + { + "epoch": 2.54, + "learning_rate": 3.825792856997145e-05, + "loss": 0.35, + "step": 4236000 + }, + { + "epoch": 2.54, + "learning_rate": 3.825582860441089e-05, + "loss": 0.3523, + "step": 4236500 + }, + { + "epoch": 2.54, + "learning_rate": 3.825372863885032e-05, + "loss": 0.3416, + "step": 4237000 + }, + { + "epoch": 2.54, + "learning_rate": 3.8251628673289754e-05, + "loss": 0.3435, + "step": 4237500 + }, + { + "epoch": 2.54, + "learning_rate": 3.8249528707729194e-05, + "loss": 0.3433, + "step": 4238000 + }, + { + "epoch": 2.54, + "learning_rate": 3.824742874216863e-05, + "loss": 0.3508, + "step": 4238500 + }, + { + "epoch": 2.54, + "learning_rate": 3.824532877660806e-05, + "loss": 0.3492, + "step": 4239000 + }, + { + "epoch": 2.54, + "learning_rate": 3.82432288110475e-05, + "loss": 0.351, + "step": 4239500 + }, + { + "epoch": 2.54, + "learning_rate": 3.8241133045418055e-05, + "loss": 0.3567, + "step": 4240000 + }, + { + "epoch": 2.54, + "learning_rate": 3.823903307985749e-05, + "loss": 0.3543, + "step": 4240500 + }, + { + "epoch": 2.54, + "learning_rate": 3.823693311429693e-05, + "loss": 0.3481, + "step": 4241000 + }, + { + "epoch": 2.54, + "learning_rate": 3.823483314873636e-05, + "loss": 0.3479, + "step": 4241500 + }, + { + "epoch": 2.54, + "learning_rate": 3.8232737383106916e-05, + "loss": 0.3496, + "step": 4242000 + }, + { + "epoch": 2.54, + "learning_rate": 3.823063741754635e-05, + "loss": 0.3502, + "step": 4242500 + }, + { + "epoch": 2.54, + "learning_rate": 3.822853745198579e-05, + "loss": 0.351, + "step": 4243000 + }, + { + "epoch": 2.54, + "learning_rate": 3.822643748642522e-05, + "loss": 0.3419, + "step": 4243500 + }, + { + "epoch": 2.54, + "learning_rate": 3.8224341720795776e-05, + "loss": 0.3437, + "step": 4244000 + }, + { + "epoch": 2.54, + "learning_rate": 3.822224175523521e-05, + "loss": 0.3598, + "step": 4244500 + }, + { + "epoch": 2.55, + "learning_rate": 3.822014178967465e-05, + "loss": 0.3513, + "step": 4245000 + }, + { + "epoch": 2.55, + "learning_rate": 3.8218041824114083e-05, + "loss": 0.3567, + "step": 4245500 + }, + { + "epoch": 2.55, + "learning_rate": 3.821594605848464e-05, + "loss": 0.3458, + "step": 4246000 + }, + { + "epoch": 2.55, + "learning_rate": 3.821384609292408e-05, + "loss": 0.3416, + "step": 4246500 + }, + { + "epoch": 2.55, + "learning_rate": 3.821174612736351e-05, + "loss": 0.3502, + "step": 4247000 + }, + { + "epoch": 2.55, + "learning_rate": 3.8209646161802944e-05, + "loss": 0.3452, + "step": 4247500 + }, + { + "epoch": 2.55, + "learning_rate": 3.8207546196242384e-05, + "loss": 0.353, + "step": 4248000 + }, + { + "epoch": 2.55, + "learning_rate": 3.820545043061294e-05, + "loss": 0.3539, + "step": 4248500 + }, + { + "epoch": 2.55, + "learning_rate": 3.820335046505237e-05, + "loss": 0.3441, + "step": 4249000 + }, + { + "epoch": 2.55, + "learning_rate": 3.8201250499491805e-05, + "loss": 0.3499, + "step": 4249500 + }, + { + "epoch": 2.55, + "learning_rate": 3.8199150533931245e-05, + "loss": 0.3409, + "step": 4250000 + }, + { + "epoch": 2.55, + "learning_rate": 3.819705056837068e-05, + "loss": 0.3493, + "step": 4250500 + }, + { + "epoch": 2.55, + "learning_rate": 3.819495060281011e-05, + "loss": 0.3446, + "step": 4251000 + }, + { + "epoch": 2.55, + "learning_rate": 3.819285063724955e-05, + "loss": 0.3506, + "step": 4251500 + }, + { + "epoch": 2.55, + "learning_rate": 3.8190750671688986e-05, + "loss": 0.3473, + "step": 4252000 + }, + { + "epoch": 2.55, + "learning_rate": 3.818865490605954e-05, + "loss": 0.351, + "step": 4252500 + }, + { + "epoch": 2.55, + "learning_rate": 3.818655494049897e-05, + "loss": 0.3443, + "step": 4253000 + }, + { + "epoch": 2.55, + "learning_rate": 3.818445497493841e-05, + "loss": 0.3483, + "step": 4253500 + }, + { + "epoch": 2.55, + "learning_rate": 3.8182355009377846e-05, + "loss": 0.3494, + "step": 4254000 + }, + { + "epoch": 2.55, + "learning_rate": 3.81802592437484e-05, + "loss": 0.3512, + "step": 4254500 + }, + { + "epoch": 2.55, + "learning_rate": 3.817815927818784e-05, + "loss": 0.3543, + "step": 4255000 + }, + { + "epoch": 2.55, + "learning_rate": 3.8176059312627274e-05, + "loss": 0.3397, + "step": 4255500 + }, + { + "epoch": 2.55, + "learning_rate": 3.817396354699783e-05, + "loss": 0.3489, + "step": 4256000 + }, + { + "epoch": 2.55, + "learning_rate": 3.817186358143726e-05, + "loss": 0.3317, + "step": 4256500 + }, + { + "epoch": 2.55, + "learning_rate": 3.81697636158767e-05, + "loss": 0.361, + "step": 4257000 + }, + { + "epoch": 2.55, + "learning_rate": 3.8167663650316134e-05, + "loss": 0.3323, + "step": 4257500 + }, + { + "epoch": 2.55, + "learning_rate": 3.816556368475557e-05, + "loss": 0.348, + "step": 4258000 + }, + { + "epoch": 2.55, + "learning_rate": 3.816346371919501e-05, + "loss": 0.3458, + "step": 4258500 + }, + { + "epoch": 2.55, + "learning_rate": 3.816136795356556e-05, + "loss": 0.3468, + "step": 4259000 + }, + { + "epoch": 2.55, + "learning_rate": 3.8159267988004995e-05, + "loss": 0.3538, + "step": 4259500 + }, + { + "epoch": 2.55, + "learning_rate": 3.815716802244443e-05, + "loss": 0.3574, + "step": 4260000 + }, + { + "epoch": 2.55, + "learning_rate": 3.815506805688387e-05, + "loss": 0.3418, + "step": 4260500 + }, + { + "epoch": 2.55, + "learning_rate": 3.81529680913233e-05, + "loss": 0.3478, + "step": 4261000 + }, + { + "epoch": 2.55, + "learning_rate": 3.8150872325693856e-05, + "loss": 0.3461, + "step": 4261500 + }, + { + "epoch": 2.56, + "learning_rate": 3.8148772360133296e-05, + "loss": 0.3426, + "step": 4262000 + }, + { + "epoch": 2.56, + "learning_rate": 3.814667239457273e-05, + "loss": 0.3459, + "step": 4262500 + }, + { + "epoch": 2.56, + "learning_rate": 3.814457242901216e-05, + "loss": 0.3551, + "step": 4263000 + }, + { + "epoch": 2.56, + "learning_rate": 3.81424724634516e-05, + "loss": 0.3481, + "step": 4263500 + }, + { + "epoch": 2.56, + "learning_rate": 3.814037669782216e-05, + "loss": 0.3407, + "step": 4264000 + }, + { + "epoch": 2.56, + "learning_rate": 3.813827673226159e-05, + "loss": 0.3568, + "step": 4264500 + }, + { + "epoch": 2.56, + "learning_rate": 3.8136176766701024e-05, + "loss": 0.3486, + "step": 4265000 + }, + { + "epoch": 2.56, + "learning_rate": 3.8134076801140464e-05, + "loss": 0.3508, + "step": 4265500 + }, + { + "epoch": 2.56, + "learning_rate": 3.81319768355799e-05, + "loss": 0.3447, + "step": 4266000 + }, + { + "epoch": 2.56, + "learning_rate": 3.812988106995045e-05, + "loss": 0.353, + "step": 4266500 + }, + { + "epoch": 2.56, + "learning_rate": 3.8127781104389885e-05, + "loss": 0.3517, + "step": 4267000 + }, + { + "epoch": 2.56, + "learning_rate": 3.8125681138829325e-05, + "loss": 0.3502, + "step": 4267500 + }, + { + "epoch": 2.56, + "learning_rate": 3.812358537319988e-05, + "loss": 0.3434, + "step": 4268000 + }, + { + "epoch": 2.56, + "learning_rate": 3.812148540763931e-05, + "loss": 0.3483, + "step": 4268500 + }, + { + "epoch": 2.56, + "learning_rate": 3.811938544207875e-05, + "loss": 0.3401, + "step": 4269000 + }, + { + "epoch": 2.56, + "learning_rate": 3.8117285476518185e-05, + "loss": 0.3389, + "step": 4269500 + }, + { + "epoch": 2.56, + "learning_rate": 3.811518551095762e-05, + "loss": 0.3448, + "step": 4270000 + }, + { + "epoch": 2.56, + "learning_rate": 3.811308554539706e-05, + "loss": 0.3448, + "step": 4270500 + }, + { + "epoch": 2.56, + "learning_rate": 3.811098557983649e-05, + "loss": 0.3536, + "step": 4271000 + }, + { + "epoch": 2.56, + "learning_rate": 3.8108889814207046e-05, + "loss": 0.3452, + "step": 4271500 + }, + { + "epoch": 2.56, + "learning_rate": 3.810678984864648e-05, + "loss": 0.3507, + "step": 4272000 + }, + { + "epoch": 2.56, + "learning_rate": 3.810468988308592e-05, + "loss": 0.3512, + "step": 4272500 + }, + { + "epoch": 2.56, + "learning_rate": 3.810258991752535e-05, + "loss": 0.3543, + "step": 4273000 + }, + { + "epoch": 2.56, + "learning_rate": 3.810048995196479e-05, + "loss": 0.3411, + "step": 4273500 + }, + { + "epoch": 2.56, + "learning_rate": 3.809838998640423e-05, + "loss": 0.3547, + "step": 4274000 + }, + { + "epoch": 2.56, + "learning_rate": 3.8096290020843654e-05, + "loss": 0.3463, + "step": 4274500 + }, + { + "epoch": 2.56, + "learning_rate": 3.8094190055283094e-05, + "loss": 0.3478, + "step": 4275000 + }, + { + "epoch": 2.56, + "learning_rate": 3.809209008972253e-05, + "loss": 0.3485, + "step": 4275500 + }, + { + "epoch": 2.56, + "learning_rate": 3.808999012416196e-05, + "loss": 0.3492, + "step": 4276000 + }, + { + "epoch": 2.56, + "learning_rate": 3.80878901586014e-05, + "loss": 0.3418, + "step": 4276500 + }, + { + "epoch": 2.56, + "learning_rate": 3.8085790193040835e-05, + "loss": 0.337, + "step": 4277000 + }, + { + "epoch": 2.56, + "learning_rate": 3.808369442741139e-05, + "loss": 0.3381, + "step": 4277500 + }, + { + "epoch": 2.56, + "learning_rate": 3.808159446185082e-05, + "loss": 0.348, + "step": 4278000 + }, + { + "epoch": 2.57, + "learning_rate": 3.807949449629026e-05, + "loss": 0.338, + "step": 4278500 + }, + { + "epoch": 2.57, + "learning_rate": 3.8077394530729695e-05, + "loss": 0.3345, + "step": 4279000 + }, + { + "epoch": 2.57, + "learning_rate": 3.807529876510025e-05, + "loss": 0.3516, + "step": 4279500 + }, + { + "epoch": 2.57, + "learning_rate": 3.807319879953968e-05, + "loss": 0.3428, + "step": 4280000 + }, + { + "epoch": 2.57, + "learning_rate": 3.807110303391024e-05, + "loss": 0.3508, + "step": 4280500 + }, + { + "epoch": 2.57, + "learning_rate": 3.806900306834968e-05, + "loss": 0.3551, + "step": 4281000 + }, + { + "epoch": 2.57, + "learning_rate": 3.806690310278911e-05, + "loss": 0.3412, + "step": 4281500 + }, + { + "epoch": 2.57, + "learning_rate": 3.806480313722855e-05, + "loss": 0.3571, + "step": 4282000 + }, + { + "epoch": 2.57, + "learning_rate": 3.806270317166798e-05, + "loss": 0.3514, + "step": 4282500 + }, + { + "epoch": 2.57, + "learning_rate": 3.8060607406038544e-05, + "loss": 0.3458, + "step": 4283000 + }, + { + "epoch": 2.57, + "learning_rate": 3.805850744047798e-05, + "loss": 0.3355, + "step": 4283500 + }, + { + "epoch": 2.57, + "learning_rate": 3.805640747491741e-05, + "loss": 0.3518, + "step": 4284000 + }, + { + "epoch": 2.57, + "learning_rate": 3.8054307509356844e-05, + "loss": 0.3428, + "step": 4284500 + }, + { + "epoch": 2.57, + "learning_rate": 3.805220754379628e-05, + "loss": 0.3417, + "step": 4285000 + }, + { + "epoch": 2.57, + "learning_rate": 3.805010757823572e-05, + "loss": 0.3473, + "step": 4285500 + }, + { + "epoch": 2.57, + "learning_rate": 3.804800761267515e-05, + "loss": 0.3447, + "step": 4286000 + }, + { + "epoch": 2.57, + "learning_rate": 3.8045907647114585e-05, + "loss": 0.357, + "step": 4286500 + }, + { + "epoch": 2.57, + "learning_rate": 3.8043807681554025e-05, + "loss": 0.3426, + "step": 4287000 + }, + { + "epoch": 2.57, + "learning_rate": 3.80417161158557e-05, + "loss": 0.3493, + "step": 4287500 + }, + { + "epoch": 2.57, + "learning_rate": 3.803961615029514e-05, + "loss": 0.3502, + "step": 4288000 + }, + { + "epoch": 2.57, + "learning_rate": 3.8037516184734565e-05, + "loss": 0.3429, + "step": 4288500 + }, + { + "epoch": 2.57, + "learning_rate": 3.8035416219174006e-05, + "loss": 0.3442, + "step": 4289000 + }, + { + "epoch": 2.57, + "learning_rate": 3.803331625361344e-05, + "loss": 0.3515, + "step": 4289500 + }, + { + "epoch": 2.57, + "learning_rate": 3.803121628805287e-05, + "loss": 0.3406, + "step": 4290000 + }, + { + "epoch": 2.57, + "learning_rate": 3.802912052242343e-05, + "loss": 0.348, + "step": 4290500 + }, + { + "epoch": 2.57, + "learning_rate": 3.8027020556862866e-05, + "loss": 0.3455, + "step": 4291000 + }, + { + "epoch": 2.57, + "learning_rate": 3.80249205913023e-05, + "loss": 0.34, + "step": 4291500 + }, + { + "epoch": 2.57, + "learning_rate": 3.802282062574173e-05, + "loss": 0.3498, + "step": 4292000 + }, + { + "epoch": 2.57, + "learning_rate": 3.8020724860112294e-05, + "loss": 0.3529, + "step": 4292500 + }, + { + "epoch": 2.57, + "learning_rate": 3.8018624894551734e-05, + "loss": 0.3491, + "step": 4293000 + }, + { + "epoch": 2.57, + "learning_rate": 3.801652492899116e-05, + "loss": 0.3503, + "step": 4293500 + }, + { + "epoch": 2.57, + "learning_rate": 3.8014424963430594e-05, + "loss": 0.3494, + "step": 4294000 + }, + { + "epoch": 2.57, + "learning_rate": 3.8012324997870034e-05, + "loss": 0.3484, + "step": 4294500 + }, + { + "epoch": 2.58, + "learning_rate": 3.801022503230947e-05, + "loss": 0.3404, + "step": 4295000 + }, + { + "epoch": 2.58, + "learning_rate": 3.80081250667489e-05, + "loss": 0.345, + "step": 4295500 + }, + { + "epoch": 2.58, + "learning_rate": 3.800602510118834e-05, + "loss": 0.3422, + "step": 4296000 + }, + { + "epoch": 2.58, + "learning_rate": 3.8003929335558895e-05, + "loss": 0.3441, + "step": 4296500 + }, + { + "epoch": 2.58, + "learning_rate": 3.800182936999833e-05, + "loss": 0.3453, + "step": 4297000 + }, + { + "epoch": 2.58, + "learning_rate": 3.799972940443777e-05, + "loss": 0.3545, + "step": 4297500 + }, + { + "epoch": 2.58, + "learning_rate": 3.79976294388772e-05, + "loss": 0.3523, + "step": 4298000 + }, + { + "epoch": 2.58, + "learning_rate": 3.7995529473316636e-05, + "loss": 0.3403, + "step": 4298500 + }, + { + "epoch": 2.58, + "learning_rate": 3.799343370768719e-05, + "loss": 0.342, + "step": 4299000 + }, + { + "epoch": 2.58, + "learning_rate": 3.799133374212663e-05, + "loss": 0.3515, + "step": 4299500 + }, + { + "epoch": 2.58, + "learning_rate": 3.798923377656606e-05, + "loss": 0.3408, + "step": 4300000 + }, + { + "epoch": 2.58, + "eval_loss": 0.33534136414527893, + "eval_runtime": 1116.3539, + "eval_samples_per_second": 471.822, + "eval_steps_per_second": 78.637, + "step": 4300000 + }, + { + "epoch": 2.58, + "learning_rate": 3.7987133811005496e-05, + "loss": 0.3505, + "step": 4300500 + }, + { + "epoch": 2.58, + "learning_rate": 3.798503804537605e-05, + "loss": 0.3482, + "step": 4301000 + }, + { + "epoch": 2.58, + "learning_rate": 3.798293807981549e-05, + "loss": 0.3335, + "step": 4301500 + }, + { + "epoch": 2.58, + "learning_rate": 3.7980838114254924e-05, + "loss": 0.3431, + "step": 4302000 + }, + { + "epoch": 2.58, + "learning_rate": 3.797873814869436e-05, + "loss": 0.3535, + "step": 4302500 + }, + { + "epoch": 2.58, + "learning_rate": 3.797664238306492e-05, + "loss": 0.3382, + "step": 4303000 + }, + { + "epoch": 2.58, + "learning_rate": 3.797454241750435e-05, + "loss": 0.3428, + "step": 4303500 + }, + { + "epoch": 2.58, + "learning_rate": 3.7972442451943784e-05, + "loss": 0.356, + "step": 4304000 + }, + { + "epoch": 2.58, + "learning_rate": 3.7970342486383225e-05, + "loss": 0.3498, + "step": 4304500 + }, + { + "epoch": 2.58, + "learning_rate": 3.7968246720753785e-05, + "loss": 0.3386, + "step": 4305000 + }, + { + "epoch": 2.58, + "learning_rate": 3.796615095512434e-05, + "loss": 0.3477, + "step": 4305500 + }, + { + "epoch": 2.58, + "learning_rate": 3.796405098956377e-05, + "loss": 0.352, + "step": 4306000 + }, + { + "epoch": 2.58, + "learning_rate": 3.7961951024003205e-05, + "loss": 0.3598, + "step": 4306500 + }, + { + "epoch": 2.58, + "learning_rate": 3.7959851058442646e-05, + "loss": 0.3581, + "step": 4307000 + }, + { + "epoch": 2.58, + "learning_rate": 3.795775109288208e-05, + "loss": 0.3509, + "step": 4307500 + }, + { + "epoch": 2.58, + "learning_rate": 3.7955651127321506e-05, + "loss": 0.3465, + "step": 4308000 + }, + { + "epoch": 2.58, + "learning_rate": 3.7953555361692066e-05, + "loss": 0.3446, + "step": 4308500 + }, + { + "epoch": 2.58, + "learning_rate": 3.7951455396131506e-05, + "loss": 0.3446, + "step": 4309000 + }, + { + "epoch": 2.58, + "learning_rate": 3.794935543057094e-05, + "loss": 0.3441, + "step": 4309500 + }, + { + "epoch": 2.58, + "learning_rate": 3.794725546501037e-05, + "loss": 0.3484, + "step": 4310000 + }, + { + "epoch": 2.58, + "learning_rate": 3.794515549944981e-05, + "loss": 0.3505, + "step": 4310500 + }, + { + "epoch": 2.58, + "learning_rate": 3.794305973382037e-05, + "loss": 0.3536, + "step": 4311000 + }, + { + "epoch": 2.58, + "learning_rate": 3.79409597682598e-05, + "loss": 0.3431, + "step": 4311500 + }, + { + "epoch": 2.59, + "learning_rate": 3.793885980269924e-05, + "loss": 0.3401, + "step": 4312000 + }, + { + "epoch": 2.59, + "learning_rate": 3.793675983713867e-05, + "loss": 0.3493, + "step": 4312500 + }, + { + "epoch": 2.59, + "learning_rate": 3.79346598715781e-05, + "loss": 0.3491, + "step": 4313000 + }, + { + "epoch": 2.59, + "learning_rate": 3.793256410594866e-05, + "loss": 0.3404, + "step": 4313500 + }, + { + "epoch": 2.59, + "learning_rate": 3.79304641403881e-05, + "loss": 0.3402, + "step": 4314000 + }, + { + "epoch": 2.59, + "learning_rate": 3.7928364174827535e-05, + "loss": 0.3502, + "step": 4314500 + }, + { + "epoch": 2.59, + "learning_rate": 3.792626420926696e-05, + "loss": 0.3608, + "step": 4315000 + }, + { + "epoch": 2.59, + "learning_rate": 3.792416844363752e-05, + "loss": 0.3601, + "step": 4315500 + }, + { + "epoch": 2.59, + "learning_rate": 3.792206847807696e-05, + "loss": 0.3464, + "step": 4316000 + }, + { + "epoch": 2.59, + "learning_rate": 3.7919968512516396e-05, + "loss": 0.3451, + "step": 4316500 + }, + { + "epoch": 2.59, + "learning_rate": 3.791786854695583e-05, + "loss": 0.3454, + "step": 4317000 + }, + { + "epoch": 2.59, + "learning_rate": 3.791577278132639e-05, + "loss": 0.3551, + "step": 4317500 + }, + { + "epoch": 2.59, + "learning_rate": 3.791367281576582e-05, + "loss": 0.3481, + "step": 4318000 + }, + { + "epoch": 2.59, + "learning_rate": 3.7911572850205257e-05, + "loss": 0.3456, + "step": 4318500 + }, + { + "epoch": 2.59, + "learning_rate": 3.79094728846447e-05, + "loss": 0.3462, + "step": 4319000 + }, + { + "epoch": 2.59, + "learning_rate": 3.7907372919084123e-05, + "loss": 0.3465, + "step": 4319500 + }, + { + "epoch": 2.59, + "learning_rate": 3.790527295352356e-05, + "loss": 0.3464, + "step": 4320000 + }, + { + "epoch": 2.59, + "learning_rate": 3.7903172987963e-05, + "loss": 0.3571, + "step": 4320500 + }, + { + "epoch": 2.59, + "learning_rate": 3.790107302240243e-05, + "loss": 0.355, + "step": 4321000 + }, + { + "epoch": 2.59, + "learning_rate": 3.7898973056841864e-05, + "loss": 0.3509, + "step": 4321500 + }, + { + "epoch": 2.59, + "learning_rate": 3.7896873091281304e-05, + "loss": 0.3482, + "step": 4322000 + }, + { + "epoch": 2.59, + "learning_rate": 3.789477312572074e-05, + "loss": 0.3537, + "step": 4322500 + }, + { + "epoch": 2.59, + "learning_rate": 3.789267316016017e-05, + "loss": 0.3442, + "step": 4323000 + }, + { + "epoch": 2.59, + "learning_rate": 3.7890577394530725e-05, + "loss": 0.3488, + "step": 4323500 + }, + { + "epoch": 2.59, + "learning_rate": 3.7888477428970165e-05, + "loss": 0.3492, + "step": 4324000 + }, + { + "epoch": 2.59, + "learning_rate": 3.78863774634096e-05, + "loss": 0.3461, + "step": 4324500 + }, + { + "epoch": 2.59, + "learning_rate": 3.788427749784903e-05, + "loss": 0.3405, + "step": 4325000 + }, + { + "epoch": 2.59, + "learning_rate": 3.788218173221959e-05, + "loss": 0.3384, + "step": 4325500 + }, + { + "epoch": 2.59, + "learning_rate": 3.7880081766659026e-05, + "loss": 0.3461, + "step": 4326000 + }, + { + "epoch": 2.59, + "learning_rate": 3.787798180109846e-05, + "loss": 0.3507, + "step": 4326500 + }, + { + "epoch": 2.59, + "learning_rate": 3.78758818355379e-05, + "loss": 0.3512, + "step": 4327000 + }, + { + "epoch": 2.59, + "learning_rate": 3.787378606990845e-05, + "loss": 0.3394, + "step": 4327500 + }, + { + "epoch": 2.59, + "learning_rate": 3.7871686104347886e-05, + "loss": 0.3443, + "step": 4328000 + }, + { + "epoch": 2.6, + "learning_rate": 3.786958613878732e-05, + "loss": 0.3509, + "step": 4328500 + }, + { + "epoch": 2.6, + "learning_rate": 3.786748617322676e-05, + "loss": 0.345, + "step": 4329000 + }, + { + "epoch": 2.6, + "learning_rate": 3.7865390407597314e-05, + "loss": 0.3452, + "step": 4329500 + }, + { + "epoch": 2.6, + "learning_rate": 3.7863294641967874e-05, + "loss": 0.3439, + "step": 4330000 + }, + { + "epoch": 2.6, + "learning_rate": 3.786119467640731e-05, + "loss": 0.3431, + "step": 4330500 + }, + { + "epoch": 2.6, + "learning_rate": 3.785909471084674e-05, + "loss": 0.352, + "step": 4331000 + }, + { + "epoch": 2.6, + "learning_rate": 3.7856994745286174e-05, + "loss": 0.3557, + "step": 4331500 + }, + { + "epoch": 2.6, + "learning_rate": 3.785489477972561e-05, + "loss": 0.3469, + "step": 4332000 + }, + { + "epoch": 2.6, + "learning_rate": 3.785279481416505e-05, + "loss": 0.3474, + "step": 4332500 + }, + { + "epoch": 2.6, + "learning_rate": 3.785069484860448e-05, + "loss": 0.3547, + "step": 4333000 + }, + { + "epoch": 2.6, + "learning_rate": 3.7848594883043915e-05, + "loss": 0.3502, + "step": 4333500 + }, + { + "epoch": 2.6, + "learning_rate": 3.784649911741447e-05, + "loss": 0.3429, + "step": 4334000 + }, + { + "epoch": 2.6, + "learning_rate": 3.784439915185391e-05, + "loss": 0.342, + "step": 4334500 + }, + { + "epoch": 2.6, + "learning_rate": 3.784229918629334e-05, + "loss": 0.3458, + "step": 4335000 + }, + { + "epoch": 2.6, + "learning_rate": 3.7840199220732776e-05, + "loss": 0.3502, + "step": 4335500 + }, + { + "epoch": 2.6, + "learning_rate": 3.7838099255172216e-05, + "loss": 0.3446, + "step": 4336000 + }, + { + "epoch": 2.6, + "learning_rate": 3.783600348954277e-05, + "loss": 0.3531, + "step": 4336500 + }, + { + "epoch": 2.6, + "learning_rate": 3.78339035239822e-05, + "loss": 0.3488, + "step": 4337000 + }, + { + "epoch": 2.6, + "learning_rate": 3.7831803558421637e-05, + "loss": 0.3498, + "step": 4337500 + }, + { + "epoch": 2.6, + "learning_rate": 3.782970359286108e-05, + "loss": 0.3527, + "step": 4338000 + }, + { + "epoch": 2.6, + "learning_rate": 3.782760362730051e-05, + "loss": 0.3384, + "step": 4338500 + }, + { + "epoch": 2.6, + "learning_rate": 3.7825507861671064e-05, + "loss": 0.3407, + "step": 4339000 + }, + { + "epoch": 2.6, + "learning_rate": 3.7823407896110504e-05, + "loss": 0.3416, + "step": 4339500 + }, + { + "epoch": 2.6, + "learning_rate": 3.782130793054994e-05, + "loss": 0.341, + "step": 4340000 + }, + { + "epoch": 2.6, + "learning_rate": 3.781920796498937e-05, + "loss": 0.3501, + "step": 4340500 + }, + { + "epoch": 2.6, + "learning_rate": 3.781710799942881e-05, + "loss": 0.3479, + "step": 4341000 + }, + { + "epoch": 2.6, + "learning_rate": 3.7815008033868245e-05, + "loss": 0.3483, + "step": 4341500 + }, + { + "epoch": 2.6, + "learning_rate": 3.781290806830768e-05, + "loss": 0.337, + "step": 4342000 + }, + { + "epoch": 2.6, + "learning_rate": 3.781080810274712e-05, + "loss": 0.3445, + "step": 4342500 + }, + { + "epoch": 2.6, + "learning_rate": 3.780871233711767e-05, + "loss": 0.3475, + "step": 4343000 + }, + { + "epoch": 2.6, + "learning_rate": 3.7806616571488225e-05, + "loss": 0.3482, + "step": 4343500 + }, + { + "epoch": 2.6, + "learning_rate": 3.780451660592766e-05, + "loss": 0.3536, + "step": 4344000 + }, + { + "epoch": 2.6, + "learning_rate": 3.780241664036709e-05, + "loss": 0.3435, + "step": 4344500 + }, + { + "epoch": 2.61, + "learning_rate": 3.780031667480653e-05, + "loss": 0.3513, + "step": 4345000 + }, + { + "epoch": 2.61, + "learning_rate": 3.7798216709245966e-05, + "loss": 0.3503, + "step": 4345500 + }, + { + "epoch": 2.61, + "learning_rate": 3.7796116743685406e-05, + "loss": 0.3464, + "step": 4346000 + }, + { + "epoch": 2.61, + "learning_rate": 3.779402097805596e-05, + "loss": 0.3587, + "step": 4346500 + }, + { + "epoch": 2.61, + "learning_rate": 3.779192101249539e-05, + "loss": 0.3456, + "step": 4347000 + }, + { + "epoch": 2.61, + "learning_rate": 3.778982104693483e-05, + "loss": 0.3505, + "step": 4347500 + }, + { + "epoch": 2.61, + "learning_rate": 3.778772108137427e-05, + "loss": 0.3479, + "step": 4348000 + }, + { + "epoch": 2.61, + "learning_rate": 3.77856211158137e-05, + "loss": 0.3504, + "step": 4348500 + }, + { + "epoch": 2.61, + "learning_rate": 3.7783521150253134e-05, + "loss": 0.3474, + "step": 4349000 + }, + { + "epoch": 2.61, + "learning_rate": 3.778142538462369e-05, + "loss": 0.3466, + "step": 4349500 + }, + { + "epoch": 2.61, + "learning_rate": 3.777932541906313e-05, + "loss": 0.3392, + "step": 4350000 + }, + { + "epoch": 2.61, + "learning_rate": 3.777722545350256e-05, + "loss": 0.3404, + "step": 4350500 + }, + { + "epoch": 2.61, + "learning_rate": 3.7775125487941995e-05, + "loss": 0.3503, + "step": 4351000 + }, + { + "epoch": 2.61, + "learning_rate": 3.7773025522381435e-05, + "loss": 0.3494, + "step": 4351500 + }, + { + "epoch": 2.61, + "learning_rate": 3.777092555682087e-05, + "loss": 0.3464, + "step": 4352000 + }, + { + "epoch": 2.61, + "learning_rate": 3.77688255912603e-05, + "loss": 0.3549, + "step": 4352500 + }, + { + "epoch": 2.61, + "learning_rate": 3.776672562569974e-05, + "loss": 0.3507, + "step": 4353000 + }, + { + "epoch": 2.61, + "learning_rate": 3.7764629860070296e-05, + "loss": 0.3574, + "step": 4353500 + }, + { + "epoch": 2.61, + "learning_rate": 3.776252989450973e-05, + "loss": 0.345, + "step": 4354000 + }, + { + "epoch": 2.61, + "learning_rate": 3.776042992894917e-05, + "loss": 0.354, + "step": 4354500 + }, + { + "epoch": 2.61, + "learning_rate": 3.77583299633886e-05, + "loss": 0.3434, + "step": 4355000 + }, + { + "epoch": 2.61, + "learning_rate": 3.7756234197759156e-05, + "loss": 0.3477, + "step": 4355500 + }, + { + "epoch": 2.61, + "learning_rate": 3.775413843212971e-05, + "loss": 0.3554, + "step": 4356000 + }, + { + "epoch": 2.61, + "learning_rate": 3.7752038466569143e-05, + "loss": 0.3524, + "step": 4356500 + }, + { + "epoch": 2.61, + "learning_rate": 3.7749938501008584e-05, + "loss": 0.3491, + "step": 4357000 + }, + { + "epoch": 2.61, + "learning_rate": 3.774783853544802e-05, + "loss": 0.3412, + "step": 4357500 + }, + { + "epoch": 2.61, + "learning_rate": 3.774573856988745e-05, + "loss": 0.3448, + "step": 4358000 + }, + { + "epoch": 2.61, + "learning_rate": 3.774363860432689e-05, + "loss": 0.3435, + "step": 4358500 + }, + { + "epoch": 2.61, + "learning_rate": 3.7741542838697444e-05, + "loss": 0.3456, + "step": 4359000 + }, + { + "epoch": 2.61, + "learning_rate": 3.773944287313688e-05, + "loss": 0.3425, + "step": 4359500 + }, + { + "epoch": 2.61, + "learning_rate": 3.773734290757632e-05, + "loss": 0.3442, + "step": 4360000 + }, + { + "epoch": 2.61, + "learning_rate": 3.773524714194687e-05, + "loss": 0.3426, + "step": 4360500 + }, + { + "epoch": 2.61, + "learning_rate": 3.7733147176386305e-05, + "loss": 0.3441, + "step": 4361000 + }, + { + "epoch": 2.61, + "learning_rate": 3.773104721082574e-05, + "loss": 0.3499, + "step": 4361500 + }, + { + "epoch": 2.62, + "learning_rate": 3.772894724526518e-05, + "loss": 0.3384, + "step": 4362000 + }, + { + "epoch": 2.62, + "learning_rate": 3.772684727970461e-05, + "loss": 0.3501, + "step": 4362500 + }, + { + "epoch": 2.62, + "learning_rate": 3.7724747314144046e-05, + "loss": 0.3369, + "step": 4363000 + }, + { + "epoch": 2.62, + "learning_rate": 3.7722647348583486e-05, + "loss": 0.3476, + "step": 4363500 + }, + { + "epoch": 2.62, + "learning_rate": 3.772054738302292e-05, + "loss": 0.3503, + "step": 4364000 + }, + { + "epoch": 2.62, + "learning_rate": 3.771845161739347e-05, + "loss": 0.3553, + "step": 4364500 + }, + { + "epoch": 2.62, + "learning_rate": 3.7716351651832906e-05, + "loss": 0.3513, + "step": 4365000 + }, + { + "epoch": 2.62, + "learning_rate": 3.771425168627235e-05, + "loss": 0.3422, + "step": 4365500 + }, + { + "epoch": 2.62, + "learning_rate": 3.771215172071178e-05, + "loss": 0.346, + "step": 4366000 + }, + { + "epoch": 2.62, + "learning_rate": 3.7710051755151214e-05, + "loss": 0.3446, + "step": 4366500 + }, + { + "epoch": 2.62, + "learning_rate": 3.7707951789590654e-05, + "loss": 0.3488, + "step": 4367000 + }, + { + "epoch": 2.62, + "learning_rate": 3.770585182403009e-05, + "loss": 0.344, + "step": 4367500 + }, + { + "epoch": 2.62, + "learning_rate": 3.770375185846952e-05, + "loss": 0.3455, + "step": 4368000 + }, + { + "epoch": 2.62, + "learning_rate": 3.7701651892908954e-05, + "loss": 0.3503, + "step": 4368500 + }, + { + "epoch": 2.62, + "learning_rate": 3.769955192734839e-05, + "loss": 0.3467, + "step": 4369000 + }, + { + "epoch": 2.62, + "learning_rate": 3.769745196178783e-05, + "loss": 0.3488, + "step": 4369500 + }, + { + "epoch": 2.62, + "learning_rate": 3.769535619615839e-05, + "loss": 0.3478, + "step": 4370000 + }, + { + "epoch": 2.62, + "learning_rate": 3.7693256230597815e-05, + "loss": 0.3523, + "step": 4370500 + }, + { + "epoch": 2.62, + "learning_rate": 3.7691160464968375e-05, + "loss": 0.3535, + "step": 4371000 + }, + { + "epoch": 2.62, + "learning_rate": 3.768906049940781e-05, + "loss": 0.3469, + "step": 4371500 + }, + { + "epoch": 2.62, + "learning_rate": 3.768696053384725e-05, + "loss": 0.3474, + "step": 4372000 + }, + { + "epoch": 2.62, + "learning_rate": 3.768486056828668e-05, + "loss": 0.3495, + "step": 4372500 + }, + { + "epoch": 2.62, + "learning_rate": 3.768276060272611e-05, + "loss": 0.3451, + "step": 4373000 + }, + { + "epoch": 2.62, + "learning_rate": 3.768066063716555e-05, + "loss": 0.3486, + "step": 4373500 + }, + { + "epoch": 2.62, + "learning_rate": 3.767856067160498e-05, + "loss": 0.3418, + "step": 4374000 + }, + { + "epoch": 2.62, + "learning_rate": 3.7676460706044416e-05, + "loss": 0.3509, + "step": 4374500 + }, + { + "epoch": 2.62, + "learning_rate": 3.7674360740483857e-05, + "loss": 0.3478, + "step": 4375000 + }, + { + "epoch": 2.62, + "learning_rate": 3.767226077492329e-05, + "loss": 0.3501, + "step": 4375500 + }, + { + "epoch": 2.62, + "learning_rate": 3.7670160809362723e-05, + "loss": 0.3342, + "step": 4376000 + }, + { + "epoch": 2.62, + "learning_rate": 3.7668060843802164e-05, + "loss": 0.3432, + "step": 4376500 + }, + { + "epoch": 2.62, + "learning_rate": 3.766596507817272e-05, + "loss": 0.3426, + "step": 4377000 + }, + { + "epoch": 2.62, + "learning_rate": 3.766386931254327e-05, + "loss": 0.3513, + "step": 4377500 + }, + { + "epoch": 2.62, + "learning_rate": 3.7661769346982704e-05, + "loss": 0.35, + "step": 4378000 + }, + { + "epoch": 2.63, + "learning_rate": 3.7659669381422145e-05, + "loss": 0.3454, + "step": 4378500 + }, + { + "epoch": 2.63, + "learning_rate": 3.765756941586158e-05, + "loss": 0.3464, + "step": 4379000 + }, + { + "epoch": 2.63, + "learning_rate": 3.765546945030101e-05, + "loss": 0.3506, + "step": 4379500 + }, + { + "epoch": 2.63, + "learning_rate": 3.7653373684671565e-05, + "loss": 0.3521, + "step": 4380000 + }, + { + "epoch": 2.63, + "learning_rate": 3.7651273719111005e-05, + "loss": 0.3458, + "step": 4380500 + }, + { + "epoch": 2.63, + "learning_rate": 3.764917375355044e-05, + "loss": 0.3482, + "step": 4381000 + }, + { + "epoch": 2.63, + "learning_rate": 3.764707378798987e-05, + "loss": 0.3469, + "step": 4381500 + }, + { + "epoch": 2.63, + "learning_rate": 3.764497382242931e-05, + "loss": 0.3433, + "step": 4382000 + }, + { + "epoch": 2.63, + "learning_rate": 3.7642878056799866e-05, + "loss": 0.3454, + "step": 4382500 + }, + { + "epoch": 2.63, + "learning_rate": 3.76407780912393e-05, + "loss": 0.3398, + "step": 4383000 + }, + { + "epoch": 2.63, + "learning_rate": 3.763867812567874e-05, + "loss": 0.344, + "step": 4383500 + }, + { + "epoch": 2.63, + "learning_rate": 3.763657816011817e-05, + "loss": 0.345, + "step": 4384000 + }, + { + "epoch": 2.63, + "learning_rate": 3.7634478194557607e-05, + "loss": 0.3421, + "step": 4384500 + }, + { + "epoch": 2.63, + "learning_rate": 3.763237822899705e-05, + "loss": 0.3501, + "step": 4385000 + }, + { + "epoch": 2.63, + "learning_rate": 3.76302824633676e-05, + "loss": 0.3477, + "step": 4385500 + }, + { + "epoch": 2.63, + "learning_rate": 3.7628182497807034e-05, + "loss": 0.3508, + "step": 4386000 + }, + { + "epoch": 2.63, + "learning_rate": 3.762608253224647e-05, + "loss": 0.3614, + "step": 4386500 + }, + { + "epoch": 2.63, + "learning_rate": 3.762398256668591e-05, + "loss": 0.3417, + "step": 4387000 + }, + { + "epoch": 2.63, + "learning_rate": 3.762188260112534e-05, + "loss": 0.345, + "step": 4387500 + }, + { + "epoch": 2.63, + "learning_rate": 3.7619782635564774e-05, + "loss": 0.3531, + "step": 4388000 + }, + { + "epoch": 2.63, + "learning_rate": 3.7617682670004215e-05, + "loss": 0.3481, + "step": 4388500 + }, + { + "epoch": 2.63, + "learning_rate": 3.761559110430589e-05, + "loss": 0.3494, + "step": 4389000 + }, + { + "epoch": 2.63, + "learning_rate": 3.761349113874532e-05, + "loss": 0.3407, + "step": 4389500 + }, + { + "epoch": 2.63, + "learning_rate": 3.7611391173184755e-05, + "loss": 0.3503, + "step": 4390000 + }, + { + "epoch": 2.63, + "learning_rate": 3.7609295407555316e-05, + "loss": 0.3484, + "step": 4390500 + }, + { + "epoch": 2.63, + "learning_rate": 3.7607195441994756e-05, + "loss": 0.3389, + "step": 4391000 + }, + { + "epoch": 2.63, + "learning_rate": 3.760509547643419e-05, + "loss": 0.3447, + "step": 4391500 + }, + { + "epoch": 2.63, + "learning_rate": 3.7602995510873616e-05, + "loss": 0.3456, + "step": 4392000 + }, + { + "epoch": 2.63, + "learning_rate": 3.7600895545313056e-05, + "loss": 0.3487, + "step": 4392500 + }, + { + "epoch": 2.63, + "learning_rate": 3.759879557975249e-05, + "loss": 0.3403, + "step": 4393000 + }, + { + "epoch": 2.63, + "learning_rate": 3.759669561419192e-05, + "loss": 0.3547, + "step": 4393500 + }, + { + "epoch": 2.63, + "learning_rate": 3.7594595648631363e-05, + "loss": 0.3339, + "step": 4394000 + }, + { + "epoch": 2.63, + "learning_rate": 3.75924956830708e-05, + "loss": 0.3437, + "step": 4394500 + }, + { + "epoch": 2.63, + "learning_rate": 3.759039571751023e-05, + "loss": 0.3509, + "step": 4395000 + }, + { + "epoch": 2.64, + "learning_rate": 3.758829575194967e-05, + "loss": 0.3444, + "step": 4395500 + }, + { + "epoch": 2.64, + "learning_rate": 3.7586195786389104e-05, + "loss": 0.3363, + "step": 4396000 + }, + { + "epoch": 2.64, + "learning_rate": 3.758409582082854e-05, + "loss": 0.3529, + "step": 4396500 + }, + { + "epoch": 2.64, + "learning_rate": 3.758199585526798e-05, + "loss": 0.3397, + "step": 4397000 + }, + { + "epoch": 2.64, + "learning_rate": 3.757990008963853e-05, + "loss": 0.3511, + "step": 4397500 + }, + { + "epoch": 2.64, + "learning_rate": 3.7577800124077965e-05, + "loss": 0.3452, + "step": 4398000 + }, + { + "epoch": 2.64, + "learning_rate": 3.7575700158517405e-05, + "loss": 0.35, + "step": 4398500 + }, + { + "epoch": 2.64, + "learning_rate": 3.757360019295684e-05, + "loss": 0.3364, + "step": 4399000 + }, + { + "epoch": 2.64, + "learning_rate": 3.757150022739627e-05, + "loss": 0.3456, + "step": 4399500 + }, + { + "epoch": 2.64, + "learning_rate": 3.7569400261835705e-05, + "loss": 0.3535, + "step": 4400000 + }, + { + "epoch": 2.64, + "eval_loss": 0.33438417315483093, + "eval_runtime": 1120.157, + "eval_samples_per_second": 470.22, + "eval_steps_per_second": 78.37, + "step": 4400000 + }, + { + "epoch": 2.64, + "learning_rate": 3.756730029627514e-05, + "loss": 0.361, + "step": 4400500 + }, + { + "epoch": 2.64, + "learning_rate": 3.756520033071457e-05, + "loss": 0.3414, + "step": 4401000 + }, + { + "epoch": 2.64, + "learning_rate": 3.756310456508513e-05, + "loss": 0.3421, + "step": 4401500 + }, + { + "epoch": 2.64, + "learning_rate": 3.7561004599524566e-05, + "loss": 0.3546, + "step": 4402000 + }, + { + "epoch": 2.64, + "learning_rate": 3.7558904633964e-05, + "loss": 0.3477, + "step": 4402500 + }, + { + "epoch": 2.64, + "learning_rate": 3.755680466840343e-05, + "loss": 0.342, + "step": 4403000 + }, + { + "epoch": 2.64, + "learning_rate": 3.755470470284287e-05, + "loss": 0.3524, + "step": 4403500 + }, + { + "epoch": 2.64, + "learning_rate": 3.755260473728231e-05, + "loss": 0.353, + "step": 4404000 + }, + { + "epoch": 2.64, + "learning_rate": 3.755050477172174e-05, + "loss": 0.3433, + "step": 4404500 + }, + { + "epoch": 2.64, + "learning_rate": 3.7548409006092294e-05, + "loss": 0.3643, + "step": 4405000 + }, + { + "epoch": 2.64, + "learning_rate": 3.7546309040531734e-05, + "loss": 0.3458, + "step": 4405500 + }, + { + "epoch": 2.64, + "learning_rate": 3.754420907497117e-05, + "loss": 0.3549, + "step": 4406000 + }, + { + "epoch": 2.64, + "learning_rate": 3.754210910941061e-05, + "loss": 0.3416, + "step": 4406500 + }, + { + "epoch": 2.64, + "learning_rate": 3.754000914385004e-05, + "loss": 0.3425, + "step": 4407000 + }, + { + "epoch": 2.64, + "learning_rate": 3.7537913378220595e-05, + "loss": 0.3555, + "step": 4407500 + }, + { + "epoch": 2.64, + "learning_rate": 3.753581341266003e-05, + "loss": 0.3459, + "step": 4408000 + }, + { + "epoch": 2.64, + "learning_rate": 3.753371344709947e-05, + "loss": 0.3497, + "step": 4408500 + }, + { + "epoch": 2.64, + "learning_rate": 3.75316134815389e-05, + "loss": 0.3408, + "step": 4409000 + }, + { + "epoch": 2.64, + "learning_rate": 3.7529517715909455e-05, + "loss": 0.3571, + "step": 4409500 + }, + { + "epoch": 2.64, + "learning_rate": 3.752741775034889e-05, + "loss": 0.3634, + "step": 4410000 + }, + { + "epoch": 2.64, + "learning_rate": 3.752531778478833e-05, + "loss": 0.3498, + "step": 4410500 + }, + { + "epoch": 2.64, + "learning_rate": 3.752321781922776e-05, + "loss": 0.3519, + "step": 4411000 + }, + { + "epoch": 2.64, + "learning_rate": 3.7521117853667196e-05, + "loss": 0.3345, + "step": 4411500 + }, + { + "epoch": 2.65, + "learning_rate": 3.7519017888106636e-05, + "loss": 0.3445, + "step": 4412000 + }, + { + "epoch": 2.65, + "learning_rate": 3.751692212247719e-05, + "loss": 0.3499, + "step": 4412500 + }, + { + "epoch": 2.65, + "learning_rate": 3.751482215691662e-05, + "loss": 0.3356, + "step": 4413000 + }, + { + "epoch": 2.65, + "learning_rate": 3.7512722191356064e-05, + "loss": 0.3533, + "step": 4413500 + }, + { + "epoch": 2.65, + "learning_rate": 3.75106222257955e-05, + "loss": 0.3435, + "step": 4414000 + }, + { + "epoch": 2.65, + "learning_rate": 3.750852646016605e-05, + "loss": 0.3397, + "step": 4414500 + }, + { + "epoch": 2.65, + "learning_rate": 3.7506426494605484e-05, + "loss": 0.3454, + "step": 4415000 + }, + { + "epoch": 2.65, + "learning_rate": 3.7504326529044924e-05, + "loss": 0.3424, + "step": 4415500 + }, + { + "epoch": 2.65, + "learning_rate": 3.750222656348436e-05, + "loss": 0.3416, + "step": 4416000 + }, + { + "epoch": 2.65, + "learning_rate": 3.750012659792379e-05, + "loss": 0.3448, + "step": 4416500 + }, + { + "epoch": 2.65, + "learning_rate": 3.7498030832294345e-05, + "loss": 0.3412, + "step": 4417000 + }, + { + "epoch": 2.65, + "learning_rate": 3.7495930866733785e-05, + "loss": 0.3478, + "step": 4417500 + }, + { + "epoch": 2.65, + "learning_rate": 3.749383090117322e-05, + "loss": 0.3417, + "step": 4418000 + }, + { + "epoch": 2.65, + "learning_rate": 3.749173093561265e-05, + "loss": 0.3512, + "step": 4418500 + }, + { + "epoch": 2.65, + "learning_rate": 3.748963516998321e-05, + "loss": 0.3438, + "step": 4419000 + }, + { + "epoch": 2.65, + "learning_rate": 3.7487535204422646e-05, + "loss": 0.3456, + "step": 4419500 + }, + { + "epoch": 2.65, + "learning_rate": 3.748543523886208e-05, + "loss": 0.3506, + "step": 4420000 + }, + { + "epoch": 2.65, + "learning_rate": 3.748333527330152e-05, + "loss": 0.3362, + "step": 4420500 + }, + { + "epoch": 2.65, + "learning_rate": 3.748123530774095e-05, + "loss": 0.3443, + "step": 4421000 + }, + { + "epoch": 2.65, + "learning_rate": 3.7479135342180386e-05, + "loss": 0.3429, + "step": 4421500 + }, + { + "epoch": 2.65, + "learning_rate": 3.7477035376619827e-05, + "loss": 0.3413, + "step": 4422000 + }, + { + "epoch": 2.65, + "learning_rate": 3.747493961099038e-05, + "loss": 0.3477, + "step": 4422500 + }, + { + "epoch": 2.65, + "learning_rate": 3.7472839645429814e-05, + "loss": 0.3374, + "step": 4423000 + }, + { + "epoch": 2.65, + "learning_rate": 3.747073967986925e-05, + "loss": 0.3541, + "step": 4423500 + }, + { + "epoch": 2.65, + "learning_rate": 3.746863971430869e-05, + "loss": 0.3473, + "step": 4424000 + }, + { + "epoch": 2.65, + "learning_rate": 3.746653974874812e-05, + "loss": 0.349, + "step": 4424500 + }, + { + "epoch": 2.65, + "learning_rate": 3.7464439783187554e-05, + "loss": 0.3483, + "step": 4425000 + }, + { + "epoch": 2.65, + "learning_rate": 3.7462339817626994e-05, + "loss": 0.3411, + "step": 4425500 + }, + { + "epoch": 2.65, + "learning_rate": 3.746024405199755e-05, + "loss": 0.3388, + "step": 4426000 + }, + { + "epoch": 2.65, + "learning_rate": 3.74581482863681e-05, + "loss": 0.3521, + "step": 4426500 + }, + { + "epoch": 2.65, + "learning_rate": 3.7456048320807535e-05, + "loss": 0.3371, + "step": 4427000 + }, + { + "epoch": 2.65, + "learning_rate": 3.7453948355246975e-05, + "loss": 0.3443, + "step": 4427500 + }, + { + "epoch": 2.65, + "learning_rate": 3.745184838968641e-05, + "loss": 0.355, + "step": 4428000 + }, + { + "epoch": 2.66, + "learning_rate": 3.744974842412584e-05, + "loss": 0.3442, + "step": 4428500 + }, + { + "epoch": 2.66, + "learning_rate": 3.744764845856528e-05, + "loss": 0.3527, + "step": 4429000 + }, + { + "epoch": 2.66, + "learning_rate": 3.7445548493004716e-05, + "loss": 0.3425, + "step": 4429500 + }, + { + "epoch": 2.66, + "learning_rate": 3.744344852744415e-05, + "loss": 0.3496, + "step": 4430000 + }, + { + "epoch": 2.66, + "learning_rate": 3.744134856188359e-05, + "loss": 0.3373, + "step": 4430500 + }, + { + "epoch": 2.66, + "learning_rate": 3.743925279625414e-05, + "loss": 0.3479, + "step": 4431000 + }, + { + "epoch": 2.66, + "learning_rate": 3.74371570306247e-05, + "loss": 0.349, + "step": 4431500 + }, + { + "epoch": 2.66, + "learning_rate": 3.743505706506413e-05, + "loss": 0.3473, + "step": 4432000 + }, + { + "epoch": 2.66, + "learning_rate": 3.7432957099503564e-05, + "loss": 0.3483, + "step": 4432500 + }, + { + "epoch": 2.66, + "learning_rate": 3.7430857133943004e-05, + "loss": 0.3529, + "step": 4433000 + }, + { + "epoch": 2.66, + "learning_rate": 3.742875716838244e-05, + "loss": 0.3475, + "step": 4433500 + }, + { + "epoch": 2.66, + "learning_rate": 3.742665720282187e-05, + "loss": 0.3454, + "step": 4434000 + }, + { + "epoch": 2.66, + "learning_rate": 3.742456143719243e-05, + "loss": 0.3511, + "step": 4434500 + }, + { + "epoch": 2.66, + "learning_rate": 3.7422461471631865e-05, + "loss": 0.3444, + "step": 4435000 + }, + { + "epoch": 2.66, + "learning_rate": 3.74203615060713e-05, + "loss": 0.3479, + "step": 4435500 + }, + { + "epoch": 2.66, + "learning_rate": 3.741826154051074e-05, + "loss": 0.3495, + "step": 4436000 + }, + { + "epoch": 2.66, + "learning_rate": 3.741616157495017e-05, + "loss": 0.3475, + "step": 4436500 + }, + { + "epoch": 2.66, + "learning_rate": 3.7414061609389605e-05, + "loss": 0.3532, + "step": 4437000 + }, + { + "epoch": 2.66, + "learning_rate": 3.7411961643829046e-05, + "loss": 0.3425, + "step": 4437500 + }, + { + "epoch": 2.66, + "learning_rate": 3.740986167826848e-05, + "loss": 0.3523, + "step": 4438000 + }, + { + "epoch": 2.66, + "learning_rate": 3.7407761712707906e-05, + "loss": 0.3484, + "step": 4438500 + }, + { + "epoch": 2.66, + "learning_rate": 3.7405661747147346e-05, + "loss": 0.3459, + "step": 4439000 + }, + { + "epoch": 2.66, + "learning_rate": 3.740356178158678e-05, + "loss": 0.3525, + "step": 4439500 + }, + { + "epoch": 2.66, + "learning_rate": 3.740146181602621e-05, + "loss": 0.3427, + "step": 4440000 + }, + { + "epoch": 2.66, + "learning_rate": 3.739936605039677e-05, + "loss": 0.3441, + "step": 4440500 + }, + { + "epoch": 2.66, + "learning_rate": 3.7397266084836207e-05, + "loss": 0.3496, + "step": 4441000 + }, + { + "epoch": 2.66, + "learning_rate": 3.739516611927564e-05, + "loss": 0.3424, + "step": 4441500 + }, + { + "epoch": 2.66, + "learning_rate": 3.7393066153715074e-05, + "loss": 0.339, + "step": 4442000 + }, + { + "epoch": 2.66, + "learning_rate": 3.7390970388085634e-05, + "loss": 0.3488, + "step": 4442500 + }, + { + "epoch": 2.66, + "learning_rate": 3.7388870422525074e-05, + "loss": 0.3457, + "step": 4443000 + }, + { + "epoch": 2.66, + "learning_rate": 3.73867704569645e-05, + "loss": 0.3505, + "step": 4443500 + }, + { + "epoch": 2.66, + "learning_rate": 3.738467049140394e-05, + "loss": 0.3596, + "step": 4444000 + }, + { + "epoch": 2.66, + "learning_rate": 3.7382570525843374e-05, + "loss": 0.34, + "step": 4444500 + }, + { + "epoch": 2.66, + "learning_rate": 3.738047056028281e-05, + "loss": 0.3396, + "step": 4445000 + }, + { + "epoch": 2.67, + "learning_rate": 3.737837059472225e-05, + "loss": 0.3406, + "step": 4445500 + }, + { + "epoch": 2.67, + "learning_rate": 3.737627062916168e-05, + "loss": 0.3458, + "step": 4446000 + }, + { + "epoch": 2.67, + "learning_rate": 3.7374174863532235e-05, + "loss": 0.3394, + "step": 4446500 + }, + { + "epoch": 2.67, + "learning_rate": 3.7372079097902796e-05, + "loss": 0.3399, + "step": 4447000 + }, + { + "epoch": 2.67, + "learning_rate": 3.736997913234223e-05, + "loss": 0.336, + "step": 4447500 + }, + { + "epoch": 2.67, + "learning_rate": 3.736787916678167e-05, + "loss": 0.3615, + "step": 4448000 + }, + { + "epoch": 2.67, + "learning_rate": 3.7365779201221096e-05, + "loss": 0.3405, + "step": 4448500 + }, + { + "epoch": 2.67, + "learning_rate": 3.736367923566053e-05, + "loss": 0.3404, + "step": 4449000 + }, + { + "epoch": 2.67, + "learning_rate": 3.736157927009997e-05, + "loss": 0.3386, + "step": 4449500 + }, + { + "epoch": 2.67, + "learning_rate": 3.735948350447053e-05, + "loss": 0.3365, + "step": 4450000 + }, + { + "epoch": 2.67, + "learning_rate": 3.735738353890996e-05, + "loss": 0.3494, + "step": 4450500 + }, + { + "epoch": 2.67, + "learning_rate": 3.73552835733494e-05, + "loss": 0.3441, + "step": 4451000 + }, + { + "epoch": 2.67, + "learning_rate": 3.735318360778883e-05, + "loss": 0.3432, + "step": 4451500 + }, + { + "epoch": 2.67, + "learning_rate": 3.7351083642228264e-05, + "loss": 0.3443, + "step": 4452000 + }, + { + "epoch": 2.67, + "learning_rate": 3.7348983676667704e-05, + "loss": 0.343, + "step": 4452500 + }, + { + "epoch": 2.67, + "learning_rate": 3.734688371110714e-05, + "loss": 0.3428, + "step": 4453000 + }, + { + "epoch": 2.67, + "learning_rate": 3.734478374554657e-05, + "loss": 0.3383, + "step": 4453500 + }, + { + "epoch": 2.67, + "learning_rate": 3.7342687979917125e-05, + "loss": 0.3435, + "step": 4454000 + }, + { + "epoch": 2.67, + "learning_rate": 3.7340588014356565e-05, + "loss": 0.3534, + "step": 4454500 + }, + { + "epoch": 2.67, + "learning_rate": 3.7338492248727125e-05, + "loss": 0.341, + "step": 4455000 + }, + { + "epoch": 2.67, + "learning_rate": 3.733639228316655e-05, + "loss": 0.3442, + "step": 4455500 + }, + { + "epoch": 2.67, + "learning_rate": 3.733429651753711e-05, + "loss": 0.3454, + "step": 4456000 + }, + { + "epoch": 2.67, + "learning_rate": 3.7332196551976546e-05, + "loss": 0.3382, + "step": 4456500 + }, + { + "epoch": 2.67, + "learning_rate": 3.7330096586415986e-05, + "loss": 0.3529, + "step": 4457000 + }, + { + "epoch": 2.67, + "learning_rate": 3.732799662085542e-05, + "loss": 0.3387, + "step": 4457500 + }, + { + "epoch": 2.67, + "learning_rate": 3.732589665529485e-05, + "loss": 0.3437, + "step": 4458000 + }, + { + "epoch": 2.67, + "learning_rate": 3.7323796689734286e-05, + "loss": 0.3544, + "step": 4458500 + }, + { + "epoch": 2.67, + "learning_rate": 3.7321700924104847e-05, + "loss": 0.3455, + "step": 4459000 + }, + { + "epoch": 2.67, + "learning_rate": 3.731960095854428e-05, + "loss": 0.356, + "step": 4459500 + }, + { + "epoch": 2.67, + "learning_rate": 3.7317500992983714e-05, + "loss": 0.3413, + "step": 4460000 + }, + { + "epoch": 2.67, + "learning_rate": 3.731540102742315e-05, + "loss": 0.3432, + "step": 4460500 + }, + { + "epoch": 2.67, + "learning_rate": 3.731330106186258e-05, + "loss": 0.3423, + "step": 4461000 + }, + { + "epoch": 2.67, + "learning_rate": 3.731120109630202e-05, + "loss": 0.3571, + "step": 4461500 + }, + { + "epoch": 2.68, + "learning_rate": 3.730910533067258e-05, + "loss": 0.3403, + "step": 4462000 + }, + { + "epoch": 2.68, + "learning_rate": 3.730700536511201e-05, + "loss": 0.341, + "step": 4462500 + }, + { + "epoch": 2.68, + "learning_rate": 3.730490959948257e-05, + "loss": 0.3532, + "step": 4463000 + }, + { + "epoch": 2.68, + "learning_rate": 3.7302809633922e-05, + "loss": 0.3352, + "step": 4463500 + }, + { + "epoch": 2.68, + "learning_rate": 3.730070966836144e-05, + "loss": 0.342, + "step": 4464000 + }, + { + "epoch": 2.68, + "learning_rate": 3.7298609702800875e-05, + "loss": 0.3404, + "step": 4464500 + }, + { + "epoch": 2.68, + "learning_rate": 3.729650973724031e-05, + "loss": 0.3438, + "step": 4465000 + }, + { + "epoch": 2.68, + "learning_rate": 3.729440977167974e-05, + "loss": 0.3454, + "step": 4465500 + }, + { + "epoch": 2.68, + "learning_rate": 3.7292309806119176e-05, + "loss": 0.3421, + "step": 4466000 + }, + { + "epoch": 2.68, + "learning_rate": 3.7290214040489736e-05, + "loss": 0.3559, + "step": 4466500 + }, + { + "epoch": 2.68, + "learning_rate": 3.7288114074929176e-05, + "loss": 0.3504, + "step": 4467000 + }, + { + "epoch": 2.68, + "learning_rate": 3.72860141093686e-05, + "loss": 0.3524, + "step": 4467500 + }, + { + "epoch": 2.68, + "learning_rate": 3.7283914143808036e-05, + "loss": 0.3491, + "step": 4468000 + }, + { + "epoch": 2.68, + "learning_rate": 3.7281814178247477e-05, + "loss": 0.3556, + "step": 4468500 + }, + { + "epoch": 2.68, + "learning_rate": 3.727971421268691e-05, + "loss": 0.3556, + "step": 4469000 + }, + { + "epoch": 2.68, + "learning_rate": 3.7277614247126343e-05, + "loss": 0.3384, + "step": 4469500 + }, + { + "epoch": 2.68, + "learning_rate": 3.7275514281565784e-05, + "loss": 0.3533, + "step": 4470000 + }, + { + "epoch": 2.68, + "learning_rate": 3.727341431600522e-05, + "loss": 0.3408, + "step": 4470500 + }, + { + "epoch": 2.68, + "learning_rate": 3.727131435044465e-05, + "loss": 0.355, + "step": 4471000 + }, + { + "epoch": 2.68, + "learning_rate": 3.726921438488409e-05, + "loss": 0.3463, + "step": 4471500 + }, + { + "epoch": 2.68, + "learning_rate": 3.7267114419323524e-05, + "loss": 0.3637, + "step": 4472000 + }, + { + "epoch": 2.68, + "learning_rate": 3.726501445376296e-05, + "loss": 0.3396, + "step": 4472500 + }, + { + "epoch": 2.68, + "learning_rate": 3.726291448820239e-05, + "loss": 0.3475, + "step": 4473000 + }, + { + "epoch": 2.68, + "learning_rate": 3.726081872257295e-05, + "loss": 0.3367, + "step": 4473500 + }, + { + "epoch": 2.68, + "learning_rate": 3.7258718757012385e-05, + "loss": 0.3506, + "step": 4474000 + }, + { + "epoch": 2.68, + "learning_rate": 3.7256618791451825e-05, + "loss": 0.3336, + "step": 4474500 + }, + { + "epoch": 2.68, + "learning_rate": 3.725451882589125e-05, + "loss": 0.3475, + "step": 4475000 + }, + { + "epoch": 2.68, + "learning_rate": 3.7252418860330685e-05, + "loss": 0.3373, + "step": 4475500 + }, + { + "epoch": 2.68, + "learning_rate": 3.7250318894770126e-05, + "loss": 0.3433, + "step": 4476000 + }, + { + "epoch": 2.68, + "learning_rate": 3.724821892920956e-05, + "loss": 0.3431, + "step": 4476500 + }, + { + "epoch": 2.68, + "learning_rate": 3.724611896364899e-05, + "loss": 0.3417, + "step": 4477000 + }, + { + "epoch": 2.68, + "learning_rate": 3.7244023198019546e-05, + "loss": 0.3468, + "step": 4477500 + }, + { + "epoch": 2.68, + "learning_rate": 3.7241923232458986e-05, + "loss": 0.3364, + "step": 4478000 + }, + { + "epoch": 2.69, + "learning_rate": 3.723982326689842e-05, + "loss": 0.3512, + "step": 4478500 + }, + { + "epoch": 2.69, + "learning_rate": 3.723772330133785e-05, + "loss": 0.3473, + "step": 4479000 + }, + { + "epoch": 2.69, + "learning_rate": 3.7235623335777294e-05, + "loss": 0.3444, + "step": 4479500 + }, + { + "epoch": 2.69, + "learning_rate": 3.723352337021673e-05, + "loss": 0.3326, + "step": 4480000 + }, + { + "epoch": 2.69, + "learning_rate": 3.723142340465616e-05, + "loss": 0.3467, + "step": 4480500 + }, + { + "epoch": 2.69, + "learning_rate": 3.72293234390956e-05, + "loss": 0.3422, + "step": 4481000 + }, + { + "epoch": 2.69, + "learning_rate": 3.7227227673466154e-05, + "loss": 0.3363, + "step": 4481500 + }, + { + "epoch": 2.69, + "learning_rate": 3.722512770790559e-05, + "loss": 0.3455, + "step": 4482000 + }, + { + "epoch": 2.69, + "learning_rate": 3.722302774234503e-05, + "loss": 0.3492, + "step": 4482500 + }, + { + "epoch": 2.69, + "learning_rate": 3.722092777678446e-05, + "loss": 0.3383, + "step": 4483000 + }, + { + "epoch": 2.69, + "learning_rate": 3.7218832011155015e-05, + "loss": 0.3482, + "step": 4483500 + }, + { + "epoch": 2.69, + "learning_rate": 3.7216736245525575e-05, + "loss": 0.3511, + "step": 4484000 + }, + { + "epoch": 2.69, + "learning_rate": 3.721463627996501e-05, + "loss": 0.346, + "step": 4484500 + }, + { + "epoch": 2.69, + "learning_rate": 3.721253631440444e-05, + "loss": 0.3567, + "step": 4485000 + }, + { + "epoch": 2.69, + "learning_rate": 3.7210436348843876e-05, + "loss": 0.3433, + "step": 4485500 + }, + { + "epoch": 2.69, + "learning_rate": 3.720833638328331e-05, + "loss": 0.3423, + "step": 4486000 + }, + { + "epoch": 2.69, + "learning_rate": 3.720624061765387e-05, + "loss": 0.3598, + "step": 4486500 + }, + { + "epoch": 2.69, + "learning_rate": 3.72041406520933e-05, + "loss": 0.3549, + "step": 4487000 + }, + { + "epoch": 2.69, + "learning_rate": 3.7202040686532736e-05, + "loss": 0.3433, + "step": 4487500 + }, + { + "epoch": 2.69, + "learning_rate": 3.719994072097218e-05, + "loss": 0.3427, + "step": 4488000 + }, + { + "epoch": 2.69, + "learning_rate": 3.719784075541161e-05, + "loss": 0.3435, + "step": 4488500 + }, + { + "epoch": 2.69, + "learning_rate": 3.7195740789851044e-05, + "loss": 0.34, + "step": 4489000 + }, + { + "epoch": 2.69, + "learning_rate": 3.7193640824290484e-05, + "loss": 0.343, + "step": 4489500 + }, + { + "epoch": 2.69, + "learning_rate": 3.719154085872992e-05, + "loss": 0.338, + "step": 4490000 + }, + { + "epoch": 2.69, + "learning_rate": 3.71894492930316e-05, + "loss": 0.3482, + "step": 4490500 + }, + { + "epoch": 2.69, + "learning_rate": 3.718734932747103e-05, + "loss": 0.3492, + "step": 4491000 + }, + { + "epoch": 2.69, + "learning_rate": 3.7185249361910465e-05, + "loss": 0.3454, + "step": 4491500 + }, + { + "epoch": 2.69, + "learning_rate": 3.71831493963499e-05, + "loss": 0.3411, + "step": 4492000 + }, + { + "epoch": 2.69, + "learning_rate": 3.718105363072046e-05, + "loss": 0.353, + "step": 4492500 + }, + { + "epoch": 2.69, + "learning_rate": 3.717895366515989e-05, + "loss": 0.3437, + "step": 4493000 + }, + { + "epoch": 2.69, + "learning_rate": 3.7176853699599325e-05, + "loss": 0.3385, + "step": 4493500 + }, + { + "epoch": 2.69, + "learning_rate": 3.7174753734038766e-05, + "loss": 0.3456, + "step": 4494000 + }, + { + "epoch": 2.69, + "learning_rate": 3.717265376847819e-05, + "loss": 0.3466, + "step": 4494500 + }, + { + "epoch": 2.69, + "learning_rate": 3.717055800284875e-05, + "loss": 0.3422, + "step": 4495000 + }, + { + "epoch": 2.7, + "learning_rate": 3.716845803728819e-05, + "loss": 0.3425, + "step": 4495500 + }, + { + "epoch": 2.7, + "learning_rate": 3.7166358071727626e-05, + "loss": 0.3409, + "step": 4496000 + }, + { + "epoch": 2.7, + "learning_rate": 3.716425810616705e-05, + "loss": 0.3455, + "step": 4496500 + }, + { + "epoch": 2.7, + "learning_rate": 3.716215814060649e-05, + "loss": 0.3486, + "step": 4497000 + }, + { + "epoch": 2.7, + "learning_rate": 3.716005817504593e-05, + "loss": 0.3432, + "step": 4497500 + }, + { + "epoch": 2.7, + "learning_rate": 3.715795820948536e-05, + "loss": 0.3506, + "step": 4498000 + }, + { + "epoch": 2.7, + "learning_rate": 3.71558582439248e-05, + "loss": 0.3463, + "step": 4498500 + }, + { + "epoch": 2.7, + "learning_rate": 3.7153762478295354e-05, + "loss": 0.3441, + "step": 4499000 + }, + { + "epoch": 2.7, + "learning_rate": 3.7151666712665914e-05, + "loss": 0.3542, + "step": 4499500 + }, + { + "epoch": 2.7, + "learning_rate": 3.714956674710535e-05, + "loss": 0.347, + "step": 4500000 + }, + { + "epoch": 2.7, + "eval_loss": 0.33365142345428467, + "eval_runtime": 1119.1708, + "eval_samples_per_second": 470.634, + "eval_steps_per_second": 78.439, + "step": 4500000 + }, + { + "epoch": 2.7, + "learning_rate": 3.714746678154478e-05, + "loss": 0.3464, + "step": 4500500 + }, + { + "epoch": 2.7, + "learning_rate": 3.714536681598422e-05, + "loss": 0.3466, + "step": 4501000 + }, + { + "epoch": 2.7, + "learning_rate": 3.714326685042365e-05, + "loss": 0.3465, + "step": 4501500 + }, + { + "epoch": 2.7, + "learning_rate": 3.714116688486309e-05, + "loss": 0.3339, + "step": 4502000 + }, + { + "epoch": 2.7, + "learning_rate": 3.713906691930252e-05, + "loss": 0.3432, + "step": 4502500 + }, + { + "epoch": 2.7, + "learning_rate": 3.7136966953741955e-05, + "loss": 0.3485, + "step": 4503000 + }, + { + "epoch": 2.7, + "learning_rate": 3.7134871188112516e-05, + "loss": 0.3518, + "step": 4503500 + }, + { + "epoch": 2.7, + "learning_rate": 3.713277542248307e-05, + "loss": 0.3489, + "step": 4504000 + }, + { + "epoch": 2.7, + "learning_rate": 3.713067545692251e-05, + "loss": 0.3507, + "step": 4504500 + }, + { + "epoch": 2.7, + "learning_rate": 3.712857549136194e-05, + "loss": 0.3516, + "step": 4505000 + }, + { + "epoch": 2.7, + "learning_rate": 3.7126475525801376e-05, + "loss": 0.3364, + "step": 4505500 + }, + { + "epoch": 2.7, + "learning_rate": 3.712437556024081e-05, + "loss": 0.3461, + "step": 4506000 + }, + { + "epoch": 2.7, + "learning_rate": 3.712227559468024e-05, + "loss": 0.3406, + "step": 4506500 + }, + { + "epoch": 2.7, + "learning_rate": 3.712017562911968e-05, + "loss": 0.3438, + "step": 4507000 + }, + { + "epoch": 2.7, + "learning_rate": 3.711807566355912e-05, + "loss": 0.3368, + "step": 4507500 + }, + { + "epoch": 2.7, + "learning_rate": 3.71159840978608e-05, + "loss": 0.3484, + "step": 4508000 + }, + { + "epoch": 2.7, + "learning_rate": 3.711388413230023e-05, + "loss": 0.3424, + "step": 4508500 + }, + { + "epoch": 2.7, + "learning_rate": 3.7111784166739664e-05, + "loss": 0.3434, + "step": 4509000 + }, + { + "epoch": 2.7, + "learning_rate": 3.7109684201179105e-05, + "loss": 0.3373, + "step": 4509500 + }, + { + "epoch": 2.7, + "learning_rate": 3.710758423561854e-05, + "loss": 0.3457, + "step": 4510000 + }, + { + "epoch": 2.7, + "learning_rate": 3.710548427005797e-05, + "loss": 0.3463, + "step": 4510500 + }, + { + "epoch": 2.7, + "learning_rate": 3.7103384304497405e-05, + "loss": 0.3429, + "step": 4511000 + }, + { + "epoch": 2.7, + "learning_rate": 3.710128433893684e-05, + "loss": 0.3478, + "step": 4511500 + }, + { + "epoch": 2.71, + "learning_rate": 3.70991885733074e-05, + "loss": 0.3382, + "step": 4512000 + }, + { + "epoch": 2.71, + "learning_rate": 3.709708860774683e-05, + "loss": 0.3406, + "step": 4512500 + }, + { + "epoch": 2.71, + "learning_rate": 3.709498864218627e-05, + "loss": 0.3481, + "step": 4513000 + }, + { + "epoch": 2.71, + "learning_rate": 3.70928886766257e-05, + "loss": 0.3378, + "step": 4513500 + }, + { + "epoch": 2.71, + "learning_rate": 3.709078871106513e-05, + "loss": 0.338, + "step": 4514000 + }, + { + "epoch": 2.71, + "learning_rate": 3.708868874550457e-05, + "loss": 0.3504, + "step": 4514500 + }, + { + "epoch": 2.71, + "learning_rate": 3.7086588779944006e-05, + "loss": 0.3421, + "step": 4515000 + }, + { + "epoch": 2.71, + "learning_rate": 3.708448881438344e-05, + "loss": 0.346, + "step": 4515500 + }, + { + "epoch": 2.71, + "learning_rate": 3.7082393048754e-05, + "loss": 0.3389, + "step": 4516000 + }, + { + "epoch": 2.71, + "learning_rate": 3.7080293083193434e-05, + "loss": 0.3495, + "step": 4516500 + }, + { + "epoch": 2.71, + "learning_rate": 3.707819311763287e-05, + "loss": 0.3454, + "step": 4517000 + }, + { + "epoch": 2.71, + "learning_rate": 3.707609315207231e-05, + "loss": 0.3463, + "step": 4517500 + }, + { + "epoch": 2.71, + "learning_rate": 3.707399738644286e-05, + "loss": 0.3505, + "step": 4518000 + }, + { + "epoch": 2.71, + "learning_rate": 3.7071897420882294e-05, + "loss": 0.3404, + "step": 4518500 + }, + { + "epoch": 2.71, + "learning_rate": 3.7069801655252855e-05, + "loss": 0.347, + "step": 4519000 + }, + { + "epoch": 2.71, + "learning_rate": 3.706770168969229e-05, + "loss": 0.349, + "step": 4519500 + }, + { + "epoch": 2.71, + "learning_rate": 3.706560172413173e-05, + "loss": 0.3439, + "step": 4520000 + }, + { + "epoch": 2.71, + "learning_rate": 3.7063501758571155e-05, + "loss": 0.3478, + "step": 4520500 + }, + { + "epoch": 2.71, + "learning_rate": 3.706140179301059e-05, + "loss": 0.3443, + "step": 4521000 + }, + { + "epoch": 2.71, + "learning_rate": 3.705930182745003e-05, + "loss": 0.3587, + "step": 4521500 + }, + { + "epoch": 2.71, + "learning_rate": 3.705720186188946e-05, + "loss": 0.3398, + "step": 4522000 + }, + { + "epoch": 2.71, + "learning_rate": 3.7055101896328896e-05, + "loss": 0.3384, + "step": 4522500 + }, + { + "epoch": 2.71, + "learning_rate": 3.7053010330630576e-05, + "loss": 0.3529, + "step": 4523000 + }, + { + "epoch": 2.71, + "learning_rate": 3.7050910365070016e-05, + "loss": 0.3446, + "step": 4523500 + }, + { + "epoch": 2.71, + "learning_rate": 3.704881459944057e-05, + "loss": 0.3574, + "step": 4524000 + }, + { + "epoch": 2.71, + "learning_rate": 3.7046714633880003e-05, + "loss": 0.3546, + "step": 4524500 + }, + { + "epoch": 2.71, + "learning_rate": 3.704461466831944e-05, + "loss": 0.3499, + "step": 4525000 + }, + { + "epoch": 2.71, + "learning_rate": 3.704251470275888e-05, + "loss": 0.346, + "step": 4525500 + }, + { + "epoch": 2.71, + "learning_rate": 3.704041473719831e-05, + "loss": 0.3459, + "step": 4526000 + }, + { + "epoch": 2.71, + "learning_rate": 3.7038318971568864e-05, + "loss": 0.3527, + "step": 4526500 + }, + { + "epoch": 2.71, + "learning_rate": 3.70362190060083e-05, + "loss": 0.3422, + "step": 4527000 + }, + { + "epoch": 2.71, + "learning_rate": 3.703411904044774e-05, + "loss": 0.3362, + "step": 4527500 + }, + { + "epoch": 2.71, + "learning_rate": 3.703201907488717e-05, + "loss": 0.3412, + "step": 4528000 + }, + { + "epoch": 2.72, + "learning_rate": 3.7029919109326605e-05, + "loss": 0.3459, + "step": 4528500 + }, + { + "epoch": 2.72, + "learning_rate": 3.7027819143766045e-05, + "loss": 0.343, + "step": 4529000 + }, + { + "epoch": 2.72, + "learning_rate": 3.702571917820548e-05, + "loss": 0.3553, + "step": 4529500 + }, + { + "epoch": 2.72, + "learning_rate": 3.702361921264491e-05, + "loss": 0.3511, + "step": 4530000 + }, + { + "epoch": 2.72, + "learning_rate": 3.7021519247084345e-05, + "loss": 0.3413, + "step": 4530500 + }, + { + "epoch": 2.72, + "learning_rate": 3.701941928152378e-05, + "loss": 0.3491, + "step": 4531000 + }, + { + "epoch": 2.72, + "learning_rate": 3.701732351589434e-05, + "loss": 0.3463, + "step": 4531500 + }, + { + "epoch": 2.72, + "learning_rate": 3.701522355033378e-05, + "loss": 0.3485, + "step": 4532000 + }, + { + "epoch": 2.72, + "learning_rate": 3.7013123584773206e-05, + "loss": 0.3518, + "step": 4532500 + }, + { + "epoch": 2.72, + "learning_rate": 3.701102361921264e-05, + "loss": 0.357, + "step": 4533000 + }, + { + "epoch": 2.72, + "learning_rate": 3.700892365365208e-05, + "loss": 0.3372, + "step": 4533500 + }, + { + "epoch": 2.72, + "learning_rate": 3.700682368809151e-05, + "loss": 0.3426, + "step": 4534000 + }, + { + "epoch": 2.72, + "learning_rate": 3.700472372253095e-05, + "loss": 0.3456, + "step": 4534500 + }, + { + "epoch": 2.72, + "learning_rate": 3.700262375697039e-05, + "loss": 0.3448, + "step": 4535000 + }, + { + "epoch": 2.72, + "learning_rate": 3.700052799134094e-05, + "loss": 0.3513, + "step": 4535500 + }, + { + "epoch": 2.72, + "learning_rate": 3.69984322257115e-05, + "loss": 0.342, + "step": 4536000 + }, + { + "epoch": 2.72, + "learning_rate": 3.6996332260150934e-05, + "loss": 0.3362, + "step": 4536500 + }, + { + "epoch": 2.72, + "learning_rate": 3.699423229459037e-05, + "loss": 0.3478, + "step": 4537000 + }, + { + "epoch": 2.72, + "learning_rate": 3.69921323290298e-05, + "loss": 0.345, + "step": 4537500 + }, + { + "epoch": 2.72, + "learning_rate": 3.6990032363469235e-05, + "loss": 0.3466, + "step": 4538000 + }, + { + "epoch": 2.72, + "learning_rate": 3.6987932397908675e-05, + "loss": 0.3343, + "step": 4538500 + }, + { + "epoch": 2.72, + "learning_rate": 3.698583243234811e-05, + "loss": 0.3435, + "step": 4539000 + }, + { + "epoch": 2.72, + "learning_rate": 3.698373246678754e-05, + "loss": 0.3381, + "step": 4539500 + }, + { + "epoch": 2.72, + "learning_rate": 3.698164090108922e-05, + "loss": 0.3438, + "step": 4540000 + }, + { + "epoch": 2.72, + "learning_rate": 3.6979540935528656e-05, + "loss": 0.3414, + "step": 4540500 + }, + { + "epoch": 2.72, + "learning_rate": 3.6977440969968096e-05, + "loss": 0.3438, + "step": 4541000 + }, + { + "epoch": 2.72, + "learning_rate": 3.697534100440753e-05, + "loss": 0.3526, + "step": 4541500 + }, + { + "epoch": 2.72, + "learning_rate": 3.697324523877808e-05, + "loss": 0.3516, + "step": 4542000 + }, + { + "epoch": 2.72, + "learning_rate": 3.697114527321752e-05, + "loss": 0.3368, + "step": 4542500 + }, + { + "epoch": 2.72, + "learning_rate": 3.696904530765696e-05, + "loss": 0.3374, + "step": 4543000 + }, + { + "epoch": 2.72, + "learning_rate": 3.696694534209639e-05, + "loss": 0.3538, + "step": 4543500 + }, + { + "epoch": 2.72, + "learning_rate": 3.696484537653583e-05, + "loss": 0.3529, + "step": 4544000 + }, + { + "epoch": 2.72, + "learning_rate": 3.6962749610906384e-05, + "loss": 0.3433, + "step": 4544500 + }, + { + "epoch": 2.72, + "learning_rate": 3.696064964534582e-05, + "loss": 0.3523, + "step": 4545000 + }, + { + "epoch": 2.73, + "learning_rate": 3.695854967978525e-05, + "loss": 0.3431, + "step": 4545500 + }, + { + "epoch": 2.73, + "learning_rate": 3.695644971422469e-05, + "loss": 0.3523, + "step": 4546000 + }, + { + "epoch": 2.73, + "learning_rate": 3.695434974866412e-05, + "loss": 0.3488, + "step": 4546500 + }, + { + "epoch": 2.73, + "learning_rate": 3.695224978310355e-05, + "loss": 0.3419, + "step": 4547000 + }, + { + "epoch": 2.73, + "learning_rate": 3.695014981754299e-05, + "loss": 0.3418, + "step": 4547500 + }, + { + "epoch": 2.73, + "learning_rate": 3.6948049851982425e-05, + "loss": 0.3388, + "step": 4548000 + }, + { + "epoch": 2.73, + "learning_rate": 3.6945954086352985e-05, + "loss": 0.3384, + "step": 4548500 + }, + { + "epoch": 2.73, + "learning_rate": 3.694385412079242e-05, + "loss": 0.3412, + "step": 4549000 + }, + { + "epoch": 2.73, + "learning_rate": 3.694175835516298e-05, + "loss": 0.3404, + "step": 4549500 + }, + { + "epoch": 2.73, + "learning_rate": 3.693965838960241e-05, + "loss": 0.3396, + "step": 4550000 + }, + { + "epoch": 2.73, + "learning_rate": 3.6937558424041846e-05, + "loss": 0.3348, + "step": 4550500 + }, + { + "epoch": 2.73, + "learning_rate": 3.6935458458481286e-05, + "loss": 0.3352, + "step": 4551000 + }, + { + "epoch": 2.73, + "learning_rate": 3.693335849292071e-05, + "loss": 0.3383, + "step": 4551500 + }, + { + "epoch": 2.73, + "learning_rate": 3.6931258527360147e-05, + "loss": 0.3363, + "step": 4552000 + }, + { + "epoch": 2.73, + "learning_rate": 3.692915856179959e-05, + "loss": 0.3459, + "step": 4552500 + }, + { + "epoch": 2.73, + "learning_rate": 3.692705859623902e-05, + "loss": 0.3455, + "step": 4553000 + }, + { + "epoch": 2.73, + "learning_rate": 3.692496283060958e-05, + "loss": 0.3498, + "step": 4553500 + }, + { + "epoch": 2.73, + "learning_rate": 3.692286286504901e-05, + "loss": 0.3354, + "step": 4554000 + }, + { + "epoch": 2.73, + "learning_rate": 3.692076289948845e-05, + "loss": 0.345, + "step": 4554500 + }, + { + "epoch": 2.73, + "learning_rate": 3.691866293392788e-05, + "loss": 0.3431, + "step": 4555000 + }, + { + "epoch": 2.73, + "learning_rate": 3.6916562968367314e-05, + "loss": 0.3401, + "step": 4555500 + }, + { + "epoch": 2.73, + "learning_rate": 3.6914463002806755e-05, + "loss": 0.3487, + "step": 4556000 + }, + { + "epoch": 2.73, + "learning_rate": 3.691236303724619e-05, + "loss": 0.3473, + "step": 4556500 + }, + { + "epoch": 2.73, + "learning_rate": 3.691026307168562e-05, + "loss": 0.3467, + "step": 4557000 + }, + { + "epoch": 2.73, + "learning_rate": 3.690816730605618e-05, + "loss": 0.3365, + "step": 4557500 + }, + { + "epoch": 2.73, + "learning_rate": 3.690607154042674e-05, + "loss": 0.3454, + "step": 4558000 + }, + { + "epoch": 2.73, + "learning_rate": 3.690397157486617e-05, + "loss": 0.3419, + "step": 4558500 + }, + { + "epoch": 2.73, + "learning_rate": 3.69018716093056e-05, + "loss": 0.3407, + "step": 4559000 + }, + { + "epoch": 2.73, + "learning_rate": 3.689977164374504e-05, + "loss": 0.3509, + "step": 4559500 + }, + { + "epoch": 2.73, + "learning_rate": 3.6897671678184476e-05, + "loss": 0.3515, + "step": 4560000 + }, + { + "epoch": 2.73, + "learning_rate": 3.689557171262391e-05, + "loss": 0.3406, + "step": 4560500 + }, + { + "epoch": 2.73, + "learning_rate": 3.689347594699446e-05, + "loss": 0.3454, + "step": 4561000 + }, + { + "epoch": 2.73, + "learning_rate": 3.68913759814339e-05, + "loss": 0.3369, + "step": 4561500 + }, + { + "epoch": 2.74, + "learning_rate": 3.688927601587334e-05, + "loss": 0.3412, + "step": 4562000 + }, + { + "epoch": 2.74, + "learning_rate": 3.688717605031277e-05, + "loss": 0.3322, + "step": 4562500 + }, + { + "epoch": 2.74, + "learning_rate": 3.688507608475221e-05, + "loss": 0.3401, + "step": 4563000 + }, + { + "epoch": 2.74, + "learning_rate": 3.6882980319122764e-05, + "loss": 0.3608, + "step": 4563500 + }, + { + "epoch": 2.74, + "learning_rate": 3.68808803535622e-05, + "loss": 0.3449, + "step": 4564000 + }, + { + "epoch": 2.74, + "learning_rate": 3.687878038800164e-05, + "loss": 0.3442, + "step": 4564500 + }, + { + "epoch": 2.74, + "learning_rate": 3.687668042244107e-05, + "loss": 0.3343, + "step": 4565000 + }, + { + "epoch": 2.74, + "learning_rate": 3.6874580456880505e-05, + "loss": 0.3454, + "step": 4565500 + }, + { + "epoch": 2.74, + "learning_rate": 3.687248469125106e-05, + "loss": 0.3371, + "step": 4566000 + }, + { + "epoch": 2.74, + "learning_rate": 3.68703847256905e-05, + "loss": 0.336, + "step": 4566500 + }, + { + "epoch": 2.74, + "learning_rate": 3.686828476012993e-05, + "loss": 0.3449, + "step": 4567000 + }, + { + "epoch": 2.74, + "learning_rate": 3.6866184794569365e-05, + "loss": 0.3537, + "step": 4567500 + }, + { + "epoch": 2.74, + "learning_rate": 3.6864084829008806e-05, + "loss": 0.3436, + "step": 4568000 + }, + { + "epoch": 2.74, + "learning_rate": 3.686198906337936e-05, + "loss": 0.3585, + "step": 4568500 + }, + { + "epoch": 2.74, + "learning_rate": 3.685988909781879e-05, + "loss": 0.3536, + "step": 4569000 + }, + { + "epoch": 2.74, + "learning_rate": 3.6857789132258226e-05, + "loss": 0.3406, + "step": 4569500 + }, + { + "epoch": 2.74, + "learning_rate": 3.6855693366628786e-05, + "loss": 0.3512, + "step": 4570000 + }, + { + "epoch": 2.74, + "learning_rate": 3.685359340106822e-05, + "loss": 0.356, + "step": 4570500 + }, + { + "epoch": 2.74, + "learning_rate": 3.6851493435507653e-05, + "loss": 0.3366, + "step": 4571000 + }, + { + "epoch": 2.74, + "learning_rate": 3.6849393469947094e-05, + "loss": 0.3434, + "step": 4571500 + }, + { + "epoch": 2.74, + "learning_rate": 3.684729350438653e-05, + "loss": 0.3409, + "step": 4572000 + }, + { + "epoch": 2.74, + "learning_rate": 3.684519353882596e-05, + "loss": 0.3394, + "step": 4572500 + }, + { + "epoch": 2.74, + "learning_rate": 3.68430935732654e-05, + "loss": 0.3437, + "step": 4573000 + }, + { + "epoch": 2.74, + "learning_rate": 3.6840993607704834e-05, + "loss": 0.3465, + "step": 4573500 + }, + { + "epoch": 2.74, + "learning_rate": 3.683889364214427e-05, + "loss": 0.3429, + "step": 4574000 + }, + { + "epoch": 2.74, + "learning_rate": 3.683679367658371e-05, + "loss": 0.342, + "step": 4574500 + }, + { + "epoch": 2.74, + "learning_rate": 3.683469371102314e-05, + "loss": 0.3358, + "step": 4575000 + }, + { + "epoch": 2.74, + "learning_rate": 3.6832593745462575e-05, + "loss": 0.3515, + "step": 4575500 + }, + { + "epoch": 2.74, + "learning_rate": 3.683049797983313e-05, + "loss": 0.3441, + "step": 4576000 + }, + { + "epoch": 2.74, + "learning_rate": 3.682839801427257e-05, + "loss": 0.3516, + "step": 4576500 + }, + { + "epoch": 2.74, + "learning_rate": 3.6826298048712e-05, + "loss": 0.3423, + "step": 4577000 + }, + { + "epoch": 2.74, + "learning_rate": 3.6824202283082556e-05, + "loss": 0.3373, + "step": 4577500 + }, + { + "epoch": 2.74, + "learning_rate": 3.682210231752199e-05, + "loss": 0.3472, + "step": 4578000 + }, + { + "epoch": 2.74, + "learning_rate": 3.682000235196143e-05, + "loss": 0.3419, + "step": 4578500 + }, + { + "epoch": 2.75, + "learning_rate": 3.681790238640086e-05, + "loss": 0.3386, + "step": 4579000 + }, + { + "epoch": 2.75, + "learning_rate": 3.6815802420840296e-05, + "loss": 0.3574, + "step": 4579500 + }, + { + "epoch": 2.75, + "learning_rate": 3.6813702455279737e-05, + "loss": 0.3428, + "step": 4580000 + }, + { + "epoch": 2.75, + "learning_rate": 3.681160668965029e-05, + "loss": 0.3457, + "step": 4580500 + }, + { + "epoch": 2.75, + "learning_rate": 3.6809506724089724e-05, + "loss": 0.3458, + "step": 4581000 + }, + { + "epoch": 2.75, + "learning_rate": 3.6807406758529164e-05, + "loss": 0.345, + "step": 4581500 + }, + { + "epoch": 2.75, + "learning_rate": 3.68053067929686e-05, + "loss": 0.3325, + "step": 4582000 + }, + { + "epoch": 2.75, + "learning_rate": 3.680320682740803e-05, + "loss": 0.3367, + "step": 4582500 + }, + { + "epoch": 2.75, + "learning_rate": 3.6801106861847464e-05, + "loss": 0.3464, + "step": 4583000 + }, + { + "epoch": 2.75, + "learning_rate": 3.67990068962869e-05, + "loss": 0.3381, + "step": 4583500 + }, + { + "epoch": 2.75, + "learning_rate": 3.679690693072633e-05, + "loss": 0.3477, + "step": 4584000 + }, + { + "epoch": 2.75, + "learning_rate": 3.679481116509689e-05, + "loss": 0.3369, + "step": 4584500 + }, + { + "epoch": 2.75, + "learning_rate": 3.679271119953633e-05, + "loss": 0.3515, + "step": 4585000 + }, + { + "epoch": 2.75, + "learning_rate": 3.679061123397576e-05, + "loss": 0.3455, + "step": 4585500 + }, + { + "epoch": 2.75, + "learning_rate": 3.678851546834632e-05, + "loss": 0.3473, + "step": 4586000 + }, + { + "epoch": 2.75, + "learning_rate": 3.678641970271687e-05, + "loss": 0.3337, + "step": 4586500 + }, + { + "epoch": 2.75, + "learning_rate": 3.678431973715631e-05, + "loss": 0.3346, + "step": 4587000 + }, + { + "epoch": 2.75, + "learning_rate": 3.6782219771595746e-05, + "loss": 0.3532, + "step": 4587500 + }, + { + "epoch": 2.75, + "learning_rate": 3.678011980603518e-05, + "loss": 0.3455, + "step": 4588000 + }, + { + "epoch": 2.75, + "learning_rate": 3.677801984047462e-05, + "loss": 0.3382, + "step": 4588500 + }, + { + "epoch": 2.75, + "learning_rate": 3.677591987491405e-05, + "loss": 0.3401, + "step": 4589000 + }, + { + "epoch": 2.75, + "learning_rate": 3.677381990935349e-05, + "loss": 0.343, + "step": 4589500 + }, + { + "epoch": 2.75, + "learning_rate": 3.677171994379293e-05, + "loss": 0.3463, + "step": 4590000 + }, + { + "epoch": 2.75, + "learning_rate": 3.6769619978232354e-05, + "loss": 0.3455, + "step": 4590500 + }, + { + "epoch": 2.75, + "learning_rate": 3.6767524212602914e-05, + "loss": 0.3369, + "step": 4591000 + }, + { + "epoch": 2.75, + "learning_rate": 3.676542424704235e-05, + "loss": 0.3446, + "step": 4591500 + }, + { + "epoch": 2.75, + "learning_rate": 3.676332428148179e-05, + "loss": 0.3455, + "step": 4592000 + }, + { + "epoch": 2.75, + "learning_rate": 3.676122431592122e-05, + "loss": 0.3432, + "step": 4592500 + }, + { + "epoch": 2.75, + "learning_rate": 3.6759132750222895e-05, + "loss": 0.3487, + "step": 4593000 + }, + { + "epoch": 2.75, + "learning_rate": 3.675703278466233e-05, + "loss": 0.337, + "step": 4593500 + }, + { + "epoch": 2.75, + "learning_rate": 3.675493281910177e-05, + "loss": 0.3486, + "step": 4594000 + }, + { + "epoch": 2.75, + "learning_rate": 3.67528328535412e-05, + "loss": 0.3417, + "step": 4594500 + }, + { + "epoch": 2.75, + "learning_rate": 3.6750732887980635e-05, + "loss": 0.3396, + "step": 4595000 + }, + { + "epoch": 2.76, + "learning_rate": 3.6748632922420076e-05, + "loss": 0.3486, + "step": 4595500 + }, + { + "epoch": 2.76, + "learning_rate": 3.674653295685951e-05, + "loss": 0.3468, + "step": 4596000 + }, + { + "epoch": 2.76, + "learning_rate": 3.674443299129894e-05, + "loss": 0.3465, + "step": 4596500 + }, + { + "epoch": 2.76, + "learning_rate": 3.674233302573838e-05, + "loss": 0.3333, + "step": 4597000 + }, + { + "epoch": 2.76, + "learning_rate": 3.674023306017781e-05, + "loss": 0.3483, + "step": 4597500 + }, + { + "epoch": 2.76, + "learning_rate": 3.673813729454837e-05, + "loss": 0.3393, + "step": 4598000 + }, + { + "epoch": 2.76, + "learning_rate": 3.67360373289878e-05, + "loss": 0.3412, + "step": 4598500 + }, + { + "epoch": 2.76, + "learning_rate": 3.6733937363427243e-05, + "loss": 0.3411, + "step": 4599000 + }, + { + "epoch": 2.76, + "learning_rate": 3.673183739786668e-05, + "loss": 0.3354, + "step": 4599500 + }, + { + "epoch": 2.76, + "learning_rate": 3.672974163223723e-05, + "loss": 0.3462, + "step": 4600000 + }, + { + "epoch": 2.76, + "eval_loss": 0.3324766755104065, + "eval_runtime": 1118.3498, + "eval_samples_per_second": 470.98, + "eval_steps_per_second": 78.497, + "step": 4600000 + }, + { + "epoch": 2.76, + "learning_rate": 3.6727641666676664e-05, + "loss": 0.3462, + "step": 4600500 + }, + { + "epoch": 2.76, + "learning_rate": 3.6725541701116104e-05, + "loss": 0.3484, + "step": 4601000 + }, + { + "epoch": 2.76, + "learning_rate": 3.672344173555554e-05, + "loss": 0.3479, + "step": 4601500 + }, + { + "epoch": 2.76, + "learning_rate": 3.672134176999498e-05, + "loss": 0.3413, + "step": 4602000 + }, + { + "epoch": 2.76, + "learning_rate": 3.6719241804434405e-05, + "loss": 0.3512, + "step": 4602500 + }, + { + "epoch": 2.76, + "learning_rate": 3.6717146038804965e-05, + "loss": 0.3417, + "step": 4603000 + }, + { + "epoch": 2.76, + "learning_rate": 3.67150460732444e-05, + "loss": 0.3425, + "step": 4603500 + }, + { + "epoch": 2.76, + "learning_rate": 3.671294610768384e-05, + "loss": 0.3446, + "step": 4604000 + }, + { + "epoch": 2.76, + "learning_rate": 3.6710846142123265e-05, + "loss": 0.3415, + "step": 4604500 + }, + { + "epoch": 2.76, + "learning_rate": 3.67087461765627e-05, + "loss": 0.3485, + "step": 4605000 + }, + { + "epoch": 2.76, + "learning_rate": 3.670665041093326e-05, + "loss": 0.3444, + "step": 4605500 + }, + { + "epoch": 2.76, + "learning_rate": 3.67045504453727e-05, + "loss": 0.3429, + "step": 4606000 + }, + { + "epoch": 2.76, + "learning_rate": 3.670245047981213e-05, + "loss": 0.3408, + "step": 4606500 + }, + { + "epoch": 2.76, + "learning_rate": 3.670035051425156e-05, + "loss": 0.3403, + "step": 4607000 + }, + { + "epoch": 2.76, + "learning_rate": 3.6698250548691e-05, + "loss": 0.344, + "step": 4607500 + }, + { + "epoch": 2.76, + "learning_rate": 3.669615058313043e-05, + "loss": 0.3493, + "step": 4608000 + }, + { + "epoch": 2.76, + "learning_rate": 3.6694054817500994e-05, + "loss": 0.3511, + "step": 4608500 + }, + { + "epoch": 2.76, + "learning_rate": 3.6691954851940434e-05, + "loss": 0.3416, + "step": 4609000 + }, + { + "epoch": 2.76, + "learning_rate": 3.668985488637986e-05, + "loss": 0.3394, + "step": 4609500 + }, + { + "epoch": 2.76, + "learning_rate": 3.6687754920819294e-05, + "loss": 0.3412, + "step": 4610000 + }, + { + "epoch": 2.76, + "learning_rate": 3.6685654955258734e-05, + "loss": 0.3365, + "step": 4610500 + }, + { + "epoch": 2.76, + "learning_rate": 3.668355498969817e-05, + "loss": 0.3442, + "step": 4611000 + }, + { + "epoch": 2.76, + "learning_rate": 3.668145922406873e-05, + "loss": 0.347, + "step": 4611500 + }, + { + "epoch": 2.77, + "learning_rate": 3.6679359258508155e-05, + "loss": 0.3473, + "step": 4612000 + }, + { + "epoch": 2.77, + "learning_rate": 3.6677259292947595e-05, + "loss": 0.3401, + "step": 4612500 + }, + { + "epoch": 2.77, + "learning_rate": 3.667515932738703e-05, + "loss": 0.3313, + "step": 4613000 + }, + { + "epoch": 2.77, + "learning_rate": 3.667306356175759e-05, + "loss": 0.3418, + "step": 4613500 + }, + { + "epoch": 2.77, + "learning_rate": 3.6670963596197015e-05, + "loss": 0.3465, + "step": 4614000 + }, + { + "epoch": 2.77, + "learning_rate": 3.6668863630636456e-05, + "loss": 0.342, + "step": 4614500 + }, + { + "epoch": 2.77, + "learning_rate": 3.666676366507589e-05, + "loss": 0.3494, + "step": 4615000 + }, + { + "epoch": 2.77, + "learning_rate": 3.666466369951533e-05, + "loss": 0.3392, + "step": 4615500 + }, + { + "epoch": 2.77, + "learning_rate": 3.666256373395476e-05, + "loss": 0.3489, + "step": 4616000 + }, + { + "epoch": 2.77, + "learning_rate": 3.6660463768394196e-05, + "loss": 0.3375, + "step": 4616500 + }, + { + "epoch": 2.77, + "learning_rate": 3.6658363802833636e-05, + "loss": 0.3416, + "step": 4617000 + }, + { + "epoch": 2.77, + "learning_rate": 3.665626803720419e-05, + "loss": 0.3476, + "step": 4617500 + }, + { + "epoch": 2.77, + "learning_rate": 3.665417227157475e-05, + "loss": 0.3473, + "step": 4618000 + }, + { + "epoch": 2.77, + "learning_rate": 3.6652072306014184e-05, + "loss": 0.3474, + "step": 4618500 + }, + { + "epoch": 2.77, + "learning_rate": 3.664997234045361e-05, + "loss": 0.3377, + "step": 4619000 + }, + { + "epoch": 2.77, + "learning_rate": 3.664787237489305e-05, + "loss": 0.3359, + "step": 4619500 + }, + { + "epoch": 2.77, + "learning_rate": 3.6645772409332484e-05, + "loss": 0.3473, + "step": 4620000 + }, + { + "epoch": 2.77, + "learning_rate": 3.6643676643703045e-05, + "loss": 0.3487, + "step": 4620500 + }, + { + "epoch": 2.77, + "learning_rate": 3.664157667814248e-05, + "loss": 0.3428, + "step": 4621000 + }, + { + "epoch": 2.77, + "learning_rate": 3.663948091251304e-05, + "loss": 0.349, + "step": 4621500 + }, + { + "epoch": 2.77, + "learning_rate": 3.663738094695247e-05, + "loss": 0.3503, + "step": 4622000 + }, + { + "epoch": 2.77, + "learning_rate": 3.6635280981391905e-05, + "loss": 0.342, + "step": 4622500 + }, + { + "epoch": 2.77, + "learning_rate": 3.6633181015831346e-05, + "loss": 0.343, + "step": 4623000 + }, + { + "epoch": 2.77, + "learning_rate": 3.663108105027078e-05, + "loss": 0.3386, + "step": 4623500 + }, + { + "epoch": 2.77, + "learning_rate": 3.6628981084710206e-05, + "loss": 0.3465, + "step": 4624000 + }, + { + "epoch": 2.77, + "learning_rate": 3.6626881119149646e-05, + "loss": 0.3517, + "step": 4624500 + }, + { + "epoch": 2.77, + "learning_rate": 3.662478115358908e-05, + "loss": 0.343, + "step": 4625000 + }, + { + "epoch": 2.77, + "learning_rate": 3.662268118802851e-05, + "loss": 0.3486, + "step": 4625500 + }, + { + "epoch": 2.77, + "learning_rate": 3.662058962233019e-05, + "loss": 0.3422, + "step": 4626000 + }, + { + "epoch": 2.77, + "learning_rate": 3.661848965676963e-05, + "loss": 0.3439, + "step": 4626500 + }, + { + "epoch": 2.77, + "learning_rate": 3.661638969120907e-05, + "loss": 0.3355, + "step": 4627000 + }, + { + "epoch": 2.77, + "learning_rate": 3.66142897256485e-05, + "loss": 0.3434, + "step": 4627500 + }, + { + "epoch": 2.77, + "learning_rate": 3.6612189760087934e-05, + "loss": 0.34, + "step": 4628000 + }, + { + "epoch": 2.77, + "learning_rate": 3.661008979452737e-05, + "loss": 0.3427, + "step": 4628500 + }, + { + "epoch": 2.78, + "learning_rate": 3.66079898289668e-05, + "loss": 0.3437, + "step": 4629000 + }, + { + "epoch": 2.78, + "learning_rate": 3.660588986340624e-05, + "loss": 0.3368, + "step": 4629500 + }, + { + "epoch": 2.78, + "learning_rate": 3.6603789897845675e-05, + "loss": 0.346, + "step": 4630000 + }, + { + "epoch": 2.78, + "learning_rate": 3.6601694132216235e-05, + "loss": 0.351, + "step": 4630500 + }, + { + "epoch": 2.78, + "learning_rate": 3.659959416665566e-05, + "loss": 0.3448, + "step": 4631000 + }, + { + "epoch": 2.78, + "learning_rate": 3.659749840102622e-05, + "loss": 0.3539, + "step": 4631500 + }, + { + "epoch": 2.78, + "learning_rate": 3.659539843546566e-05, + "loss": 0.3411, + "step": 4632000 + }, + { + "epoch": 2.78, + "learning_rate": 3.6593298469905096e-05, + "loss": 0.3437, + "step": 4632500 + }, + { + "epoch": 2.78, + "learning_rate": 3.659119850434453e-05, + "loss": 0.3447, + "step": 4633000 + }, + { + "epoch": 2.78, + "learning_rate": 3.658909853878396e-05, + "loss": 0.3328, + "step": 4633500 + }, + { + "epoch": 2.78, + "learning_rate": 3.6586998573223396e-05, + "loss": 0.3369, + "step": 4634000 + }, + { + "epoch": 2.78, + "learning_rate": 3.658489860766283e-05, + "loss": 0.3389, + "step": 4634500 + }, + { + "epoch": 2.78, + "learning_rate": 3.658279864210227e-05, + "loss": 0.3434, + "step": 4635000 + }, + { + "epoch": 2.78, + "learning_rate": 3.65806986765417e-05, + "loss": 0.3435, + "step": 4635500 + }, + { + "epoch": 2.78, + "learning_rate": 3.6578598710981137e-05, + "loss": 0.3317, + "step": 4636000 + }, + { + "epoch": 2.78, + "learning_rate": 3.657649874542058e-05, + "loss": 0.3411, + "step": 4636500 + }, + { + "epoch": 2.78, + "learning_rate": 3.657440297979113e-05, + "loss": 0.3369, + "step": 4637000 + }, + { + "epoch": 2.78, + "learning_rate": 3.6572303014230564e-05, + "loss": 0.3401, + "step": 4637500 + }, + { + "epoch": 2.78, + "learning_rate": 3.6570203048670004e-05, + "loss": 0.3433, + "step": 4638000 + }, + { + "epoch": 2.78, + "learning_rate": 3.656810308310944e-05, + "loss": 0.3333, + "step": 4638500 + }, + { + "epoch": 2.78, + "learning_rate": 3.656600311754887e-05, + "loss": 0.342, + "step": 4639000 + }, + { + "epoch": 2.78, + "learning_rate": 3.656390315198831e-05, + "loss": 0.3436, + "step": 4639500 + }, + { + "epoch": 2.78, + "learning_rate": 3.6561803186427745e-05, + "loss": 0.336, + "step": 4640000 + }, + { + "epoch": 2.78, + "learning_rate": 3.65597074207983e-05, + "loss": 0.3399, + "step": 4640500 + }, + { + "epoch": 2.78, + "learning_rate": 3.655760745523773e-05, + "loss": 0.3441, + "step": 4641000 + }, + { + "epoch": 2.78, + "learning_rate": 3.655550748967717e-05, + "loss": 0.3428, + "step": 4641500 + }, + { + "epoch": 2.78, + "learning_rate": 3.6553407524116605e-05, + "loss": 0.3391, + "step": 4642000 + }, + { + "epoch": 2.78, + "learning_rate": 3.655130755855604e-05, + "loss": 0.3412, + "step": 4642500 + }, + { + "epoch": 2.78, + "learning_rate": 3.654920759299548e-05, + "loss": 0.3445, + "step": 4643000 + }, + { + "epoch": 2.78, + "learning_rate": 3.6547107627434906e-05, + "loss": 0.3436, + "step": 4643500 + }, + { + "epoch": 2.78, + "learning_rate": 3.6545011861805466e-05, + "loss": 0.3487, + "step": 4644000 + }, + { + "epoch": 2.78, + "learning_rate": 3.65429118962449e-05, + "loss": 0.3392, + "step": 4644500 + }, + { + "epoch": 2.78, + "learning_rate": 3.654081193068434e-05, + "loss": 0.3433, + "step": 4645000 + }, + { + "epoch": 2.79, + "learning_rate": 3.653871196512377e-05, + "loss": 0.3447, + "step": 4645500 + }, + { + "epoch": 2.79, + "learning_rate": 3.653661199956321e-05, + "loss": 0.3402, + "step": 4646000 + }, + { + "epoch": 2.79, + "learning_rate": 3.653451623393377e-05, + "loss": 0.3393, + "step": 4646500 + }, + { + "epoch": 2.79, + "learning_rate": 3.65324162683732e-05, + "loss": 0.3499, + "step": 4647000 + }, + { + "epoch": 2.79, + "learning_rate": 3.6530316302812634e-05, + "loss": 0.3452, + "step": 4647500 + }, + { + "epoch": 2.79, + "learning_rate": 3.6528216337252074e-05, + "loss": 0.3376, + "step": 4648000 + }, + { + "epoch": 2.79, + "learning_rate": 3.65261163716915e-05, + "loss": 0.3501, + "step": 4648500 + }, + { + "epoch": 2.79, + "learning_rate": 3.6524016406130934e-05, + "loss": 0.3452, + "step": 4649000 + }, + { + "epoch": 2.79, + "learning_rate": 3.6521916440570375e-05, + "loss": 0.3485, + "step": 4649500 + }, + { + "epoch": 2.79, + "learning_rate": 3.651981647500981e-05, + "loss": 0.346, + "step": 4650000 + }, + { + "epoch": 2.79, + "learning_rate": 3.651772070938036e-05, + "loss": 0.3438, + "step": 4650500 + }, + { + "epoch": 2.79, + "learning_rate": 3.6515620743819795e-05, + "loss": 0.339, + "step": 4651000 + }, + { + "epoch": 2.79, + "learning_rate": 3.6513524978190355e-05, + "loss": 0.3386, + "step": 4651500 + }, + { + "epoch": 2.79, + "learning_rate": 3.6511425012629796e-05, + "loss": 0.3529, + "step": 4652000 + }, + { + "epoch": 2.79, + "learning_rate": 3.650932504706923e-05, + "loss": 0.3315, + "step": 4652500 + }, + { + "epoch": 2.79, + "learning_rate": 3.650722508150866e-05, + "loss": 0.3445, + "step": 4653000 + }, + { + "epoch": 2.79, + "learning_rate": 3.650512931587922e-05, + "loss": 0.3459, + "step": 4653500 + }, + { + "epoch": 2.79, + "learning_rate": 3.6503029350318656e-05, + "loss": 0.3443, + "step": 4654000 + }, + { + "epoch": 2.79, + "learning_rate": 3.650092938475809e-05, + "loss": 0.3391, + "step": 4654500 + }, + { + "epoch": 2.79, + "learning_rate": 3.649882941919753e-05, + "loss": 0.3453, + "step": 4655000 + }, + { + "epoch": 2.79, + "learning_rate": 3.649672945363696e-05, + "loss": 0.3487, + "step": 4655500 + }, + { + "epoch": 2.79, + "learning_rate": 3.649462948807639e-05, + "loss": 0.3319, + "step": 4656000 + }, + { + "epoch": 2.79, + "learning_rate": 3.649252952251583e-05, + "loss": 0.3511, + "step": 4656500 + }, + { + "epoch": 2.79, + "learning_rate": 3.6490429556955264e-05, + "loss": 0.3387, + "step": 4657000 + }, + { + "epoch": 2.79, + "learning_rate": 3.6488333791325824e-05, + "loss": 0.3398, + "step": 4657500 + }, + { + "epoch": 2.79, + "learning_rate": 3.648623382576525e-05, + "loss": 0.3481, + "step": 4658000 + }, + { + "epoch": 2.79, + "learning_rate": 3.648413386020469e-05, + "loss": 0.3543, + "step": 4658500 + }, + { + "epoch": 2.79, + "learning_rate": 3.6482033894644125e-05, + "loss": 0.3388, + "step": 4659000 + }, + { + "epoch": 2.79, + "learning_rate": 3.647993392908356e-05, + "loss": 0.3382, + "step": 4659500 + }, + { + "epoch": 2.79, + "learning_rate": 3.6477833963523e-05, + "loss": 0.3449, + "step": 4660000 + }, + { + "epoch": 2.79, + "learning_rate": 3.647573399796243e-05, + "loss": 0.3443, + "step": 4660500 + }, + { + "epoch": 2.79, + "learning_rate": 3.6473638232332985e-05, + "loss": 0.3451, + "step": 4661000 + }, + { + "epoch": 2.79, + "learning_rate": 3.6471538266772426e-05, + "loss": 0.3436, + "step": 4661500 + }, + { + "epoch": 2.8, + "learning_rate": 3.646943830121186e-05, + "loss": 0.3432, + "step": 4662000 + }, + { + "epoch": 2.8, + "learning_rate": 3.646733833565129e-05, + "loss": 0.3422, + "step": 4662500 + }, + { + "epoch": 2.8, + "learning_rate": 3.646523837009073e-05, + "loss": 0.343, + "step": 4663000 + }, + { + "epoch": 2.8, + "learning_rate": 3.6463142604461286e-05, + "loss": 0.347, + "step": 4663500 + }, + { + "epoch": 2.8, + "learning_rate": 3.646104683883185e-05, + "loss": 0.3387, + "step": 4664000 + }, + { + "epoch": 2.8, + "learning_rate": 3.645894687327128e-05, + "loss": 0.3433, + "step": 4664500 + }, + { + "epoch": 2.8, + "learning_rate": 3.645684690771071e-05, + "loss": 0.3389, + "step": 4665000 + }, + { + "epoch": 2.8, + "learning_rate": 3.645474694215015e-05, + "loss": 0.3501, + "step": 4665500 + }, + { + "epoch": 2.8, + "learning_rate": 3.645264697658958e-05, + "loss": 0.3441, + "step": 4666000 + }, + { + "epoch": 2.8, + "learning_rate": 3.6450547011029014e-05, + "loss": 0.3455, + "step": 4666500 + }, + { + "epoch": 2.8, + "learning_rate": 3.6448447045468454e-05, + "loss": 0.3426, + "step": 4667000 + }, + { + "epoch": 2.8, + "learning_rate": 3.644634707990789e-05, + "loss": 0.3388, + "step": 4667500 + }, + { + "epoch": 2.8, + "learning_rate": 3.644424711434733e-05, + "loss": 0.3372, + "step": 4668000 + }, + { + "epoch": 2.8, + "learning_rate": 3.644214714878676e-05, + "loss": 0.3462, + "step": 4668500 + }, + { + "epoch": 2.8, + "learning_rate": 3.6440047183226195e-05, + "loss": 0.3304, + "step": 4669000 + }, + { + "epoch": 2.8, + "learning_rate": 3.6437947217665635e-05, + "loss": 0.3454, + "step": 4669500 + }, + { + "epoch": 2.8, + "learning_rate": 3.643585145203619e-05, + "loss": 0.3415, + "step": 4670000 + }, + { + "epoch": 2.8, + "learning_rate": 3.643375148647562e-05, + "loss": 0.3403, + "step": 4670500 + }, + { + "epoch": 2.8, + "learning_rate": 3.6431651520915056e-05, + "loss": 0.3418, + "step": 4671000 + }, + { + "epoch": 2.8, + "learning_rate": 3.642955575528561e-05, + "loss": 0.3413, + "step": 4671500 + }, + { + "epoch": 2.8, + "learning_rate": 3.642745578972505e-05, + "loss": 0.3498, + "step": 4672000 + }, + { + "epoch": 2.8, + "learning_rate": 3.642535582416448e-05, + "loss": 0.3436, + "step": 4672500 + }, + { + "epoch": 2.8, + "learning_rate": 3.6423255858603916e-05, + "loss": 0.339, + "step": 4673000 + }, + { + "epoch": 2.8, + "learning_rate": 3.642116009297448e-05, + "loss": 0.356, + "step": 4673500 + }, + { + "epoch": 2.8, + "learning_rate": 3.641906012741391e-05, + "loss": 0.3483, + "step": 4674000 + }, + { + "epoch": 2.8, + "learning_rate": 3.6416960161853344e-05, + "loss": 0.3373, + "step": 4674500 + }, + { + "epoch": 2.8, + "learning_rate": 3.6414860196292784e-05, + "loss": 0.3528, + "step": 4675000 + }, + { + "epoch": 2.8, + "learning_rate": 3.641276023073222e-05, + "loss": 0.3412, + "step": 4675500 + }, + { + "epoch": 2.8, + "learning_rate": 3.641066026517165e-05, + "loss": 0.3441, + "step": 4676000 + }, + { + "epoch": 2.8, + "learning_rate": 3.640856029961109e-05, + "loss": 0.348, + "step": 4676500 + }, + { + "epoch": 2.8, + "learning_rate": 3.6406460334050524e-05, + "loss": 0.3456, + "step": 4677000 + }, + { + "epoch": 2.8, + "learning_rate": 3.64043687683522e-05, + "loss": 0.3415, + "step": 4677500 + }, + { + "epoch": 2.8, + "learning_rate": 3.640226880279163e-05, + "loss": 0.326, + "step": 4678000 + }, + { + "epoch": 2.8, + "learning_rate": 3.6400168837231065e-05, + "loss": 0.3429, + "step": 4678500 + }, + { + "epoch": 2.81, + "learning_rate": 3.6398068871670505e-05, + "loss": 0.3446, + "step": 4679000 + }, + { + "epoch": 2.81, + "learning_rate": 3.639597310604106e-05, + "loss": 0.3498, + "step": 4679500 + }, + { + "epoch": 2.81, + "learning_rate": 3.639387314048049e-05, + "loss": 0.3533, + "step": 4680000 + }, + { + "epoch": 2.81, + "learning_rate": 3.639177317491993e-05, + "loss": 0.3349, + "step": 4680500 + }, + { + "epoch": 2.81, + "learning_rate": 3.6389673209359366e-05, + "loss": 0.3406, + "step": 4681000 + }, + { + "epoch": 2.81, + "learning_rate": 3.638757744372992e-05, + "loss": 0.341, + "step": 4681500 + }, + { + "epoch": 2.81, + "learning_rate": 3.638547747816935e-05, + "loss": 0.3421, + "step": 4682000 + }, + { + "epoch": 2.81, + "learning_rate": 3.638337751260879e-05, + "loss": 0.3422, + "step": 4682500 + }, + { + "epoch": 2.81, + "learning_rate": 3.638127754704823e-05, + "loss": 0.3434, + "step": 4683000 + }, + { + "epoch": 2.81, + "learning_rate": 3.637917758148766e-05, + "loss": 0.3396, + "step": 4683500 + }, + { + "epoch": 2.81, + "learning_rate": 3.6377081815858214e-05, + "loss": 0.3468, + "step": 4684000 + }, + { + "epoch": 2.81, + "learning_rate": 3.6374981850297654e-05, + "loss": 0.3411, + "step": 4684500 + }, + { + "epoch": 2.81, + "learning_rate": 3.637288188473709e-05, + "loss": 0.3473, + "step": 4685000 + }, + { + "epoch": 2.81, + "learning_rate": 3.637078191917652e-05, + "loss": 0.3375, + "step": 4685500 + }, + { + "epoch": 2.81, + "learning_rate": 3.636868195361596e-05, + "loss": 0.3427, + "step": 4686000 + }, + { + "epoch": 2.81, + "learning_rate": 3.6366581988055395e-05, + "loss": 0.3471, + "step": 4686500 + }, + { + "epoch": 2.81, + "learning_rate": 3.636448202249483e-05, + "loss": 0.349, + "step": 4687000 + }, + { + "epoch": 2.81, + "learning_rate": 3.636238205693427e-05, + "loss": 0.3472, + "step": 4687500 + }, + { + "epoch": 2.81, + "learning_rate": 3.636028629130482e-05, + "loss": 0.3432, + "step": 4688000 + }, + { + "epoch": 2.81, + "learning_rate": 3.6358186325744255e-05, + "loss": 0.3434, + "step": 4688500 + }, + { + "epoch": 2.81, + "learning_rate": 3.6356086360183696e-05, + "loss": 0.3393, + "step": 4689000 + }, + { + "epoch": 2.81, + "learning_rate": 3.635398639462313e-05, + "loss": 0.3411, + "step": 4689500 + }, + { + "epoch": 2.81, + "learning_rate": 3.635188642906256e-05, + "loss": 0.3422, + "step": 4690000 + }, + { + "epoch": 2.81, + "learning_rate": 3.6349786463502e-05, + "loss": 0.3472, + "step": 4690500 + }, + { + "epoch": 2.81, + "learning_rate": 3.6347686497941436e-05, + "loss": 0.3415, + "step": 4691000 + }, + { + "epoch": 2.81, + "learning_rate": 3.634558653238087e-05, + "loss": 0.3426, + "step": 4691500 + }, + { + "epoch": 2.81, + "learning_rate": 3.634349076675142e-05, + "loss": 0.3313, + "step": 4692000 + }, + { + "epoch": 2.81, + "learning_rate": 3.6341390801190863e-05, + "loss": 0.3495, + "step": 4692500 + }, + { + "epoch": 2.81, + "learning_rate": 3.63392908356303e-05, + "loss": 0.3439, + "step": 4693000 + }, + { + "epoch": 2.81, + "learning_rate": 3.633719087006973e-05, + "loss": 0.3481, + "step": 4693500 + }, + { + "epoch": 2.81, + "learning_rate": 3.6335099304371404e-05, + "loss": 0.3413, + "step": 4694000 + }, + { + "epoch": 2.81, + "learning_rate": 3.6332999338810844e-05, + "loss": 0.3412, + "step": 4694500 + }, + { + "epoch": 2.81, + "learning_rate": 3.633089937325028e-05, + "loss": 0.3394, + "step": 4695000 + }, + { + "epoch": 2.82, + "learning_rate": 3.632879940768971e-05, + "loss": 0.3382, + "step": 4695500 + }, + { + "epoch": 2.82, + "learning_rate": 3.632669944212915e-05, + "loss": 0.3451, + "step": 4696000 + }, + { + "epoch": 2.82, + "learning_rate": 3.6324599476568585e-05, + "loss": 0.3418, + "step": 4696500 + }, + { + "epoch": 2.82, + "learning_rate": 3.632249951100802e-05, + "loss": 0.3372, + "step": 4697000 + }, + { + "epoch": 2.82, + "learning_rate": 3.632039954544746e-05, + "loss": 0.3337, + "step": 4697500 + }, + { + "epoch": 2.82, + "learning_rate": 3.631830377981801e-05, + "loss": 0.344, + "step": 4698000 + }, + { + "epoch": 2.82, + "learning_rate": 3.6316203814257446e-05, + "loss": 0.3466, + "step": 4698500 + }, + { + "epoch": 2.82, + "learning_rate": 3.6314108048628e-05, + "loss": 0.3416, + "step": 4699000 + }, + { + "epoch": 2.82, + "learning_rate": 3.631200808306743e-05, + "loss": 0.3482, + "step": 4699500 + }, + { + "epoch": 2.82, + "learning_rate": 3.630990811750687e-05, + "loss": 0.3419, + "step": 4700000 + }, + { + "epoch": 2.82, + "eval_loss": 0.3312073349952698, + "eval_runtime": 1117.6715, + "eval_samples_per_second": 471.265, + "eval_steps_per_second": 78.545, + "step": 4700000 + }, + { + "epoch": 2.82, + "learning_rate": 3.6307808151946306e-05, + "loss": 0.3483, + "step": 4700500 + }, + { + "epoch": 2.82, + "learning_rate": 3.630570818638574e-05, + "loss": 0.3387, + "step": 4701000 + }, + { + "epoch": 2.82, + "learning_rate": 3.630360822082518e-05, + "loss": 0.3387, + "step": 4701500 + }, + { + "epoch": 2.82, + "learning_rate": 3.6301508255264614e-05, + "loss": 0.341, + "step": 4702000 + }, + { + "epoch": 2.82, + "learning_rate": 3.629940828970405e-05, + "loss": 0.3437, + "step": 4702500 + }, + { + "epoch": 2.82, + "learning_rate": 3.629731672400572e-05, + "loss": 0.3426, + "step": 4703000 + }, + { + "epoch": 2.82, + "learning_rate": 3.629521675844516e-05, + "loss": 0.3429, + "step": 4703500 + }, + { + "epoch": 2.82, + "learning_rate": 3.6293116792884594e-05, + "loss": 0.3402, + "step": 4704000 + }, + { + "epoch": 2.82, + "learning_rate": 3.629101682732403e-05, + "loss": 0.3385, + "step": 4704500 + }, + { + "epoch": 2.82, + "learning_rate": 3.628891686176347e-05, + "loss": 0.3436, + "step": 4705000 + }, + { + "epoch": 2.82, + "learning_rate": 3.62868168962029e-05, + "loss": 0.3432, + "step": 4705500 + }, + { + "epoch": 2.82, + "learning_rate": 3.6284716930642335e-05, + "loss": 0.3438, + "step": 4706000 + }, + { + "epoch": 2.82, + "learning_rate": 3.6282616965081775e-05, + "loss": 0.3417, + "step": 4706500 + }, + { + "epoch": 2.82, + "learning_rate": 3.628052119945233e-05, + "loss": 0.3386, + "step": 4707000 + }, + { + "epoch": 2.82, + "learning_rate": 3.627842123389176e-05, + "loss": 0.3411, + "step": 4707500 + }, + { + "epoch": 2.82, + "learning_rate": 3.6276325468262316e-05, + "loss": 0.3445, + "step": 4708000 + }, + { + "epoch": 2.82, + "learning_rate": 3.6274229702632876e-05, + "loss": 0.3455, + "step": 4708500 + }, + { + "epoch": 2.82, + "learning_rate": 3.6272129737072316e-05, + "loss": 0.3432, + "step": 4709000 + }, + { + "epoch": 2.82, + "learning_rate": 3.627002977151175e-05, + "loss": 0.3498, + "step": 4709500 + }, + { + "epoch": 2.82, + "learning_rate": 3.626792980595118e-05, + "loss": 0.3352, + "step": 4710000 + }, + { + "epoch": 2.82, + "learning_rate": 3.626582984039062e-05, + "loss": 0.34, + "step": 4710500 + }, + { + "epoch": 2.82, + "learning_rate": 3.626373407476118e-05, + "loss": 0.3451, + "step": 4711000 + }, + { + "epoch": 2.82, + "learning_rate": 3.626163410920061e-05, + "loss": 0.3398, + "step": 4711500 + }, + { + "epoch": 2.83, + "learning_rate": 3.6259534143640044e-05, + "loss": 0.3409, + "step": 4712000 + }, + { + "epoch": 2.83, + "learning_rate": 3.625743417807948e-05, + "loss": 0.3379, + "step": 4712500 + }, + { + "epoch": 2.83, + "learning_rate": 3.625533421251891e-05, + "loss": 0.3434, + "step": 4713000 + }, + { + "epoch": 2.83, + "learning_rate": 3.6253234246958344e-05, + "loss": 0.3377, + "step": 4713500 + }, + { + "epoch": 2.83, + "learning_rate": 3.6251134281397785e-05, + "loss": 0.3416, + "step": 4714000 + }, + { + "epoch": 2.83, + "learning_rate": 3.624903431583722e-05, + "loss": 0.3458, + "step": 4714500 + }, + { + "epoch": 2.83, + "learning_rate": 3.624693435027665e-05, + "loss": 0.3585, + "step": 4715000 + }, + { + "epoch": 2.83, + "learning_rate": 3.624483438471609e-05, + "loss": 0.3396, + "step": 4715500 + }, + { + "epoch": 2.83, + "learning_rate": 3.6242734419155525e-05, + "loss": 0.3479, + "step": 4716000 + }, + { + "epoch": 2.83, + "learning_rate": 3.624063445359496e-05, + "loss": 0.3416, + "step": 4716500 + }, + { + "epoch": 2.83, + "learning_rate": 3.62385344880344e-05, + "loss": 0.3428, + "step": 4717000 + }, + { + "epoch": 2.83, + "learning_rate": 3.623643452247383e-05, + "loss": 0.3462, + "step": 4717500 + }, + { + "epoch": 2.83, + "learning_rate": 3.6234338756844386e-05, + "loss": 0.3409, + "step": 4718000 + }, + { + "epoch": 2.83, + "learning_rate": 3.623224299121494e-05, + "loss": 0.343, + "step": 4718500 + }, + { + "epoch": 2.83, + "learning_rate": 3.623014302565438e-05, + "loss": 0.3411, + "step": 4719000 + }, + { + "epoch": 2.83, + "learning_rate": 3.622804306009381e-05, + "loss": 0.3373, + "step": 4719500 + }, + { + "epoch": 2.83, + "learning_rate": 3.622594309453325e-05, + "loss": 0.3432, + "step": 4720000 + }, + { + "epoch": 2.83, + "learning_rate": 3.622384312897269e-05, + "loss": 0.3386, + "step": 4720500 + }, + { + "epoch": 2.83, + "learning_rate": 3.622174316341212e-05, + "loss": 0.3435, + "step": 4721000 + }, + { + "epoch": 2.83, + "learning_rate": 3.6219643197851554e-05, + "loss": 0.3292, + "step": 4721500 + }, + { + "epoch": 2.83, + "learning_rate": 3.6217543232290994e-05, + "loss": 0.349, + "step": 4722000 + }, + { + "epoch": 2.83, + "learning_rate": 3.621544326673043e-05, + "loss": 0.3394, + "step": 4722500 + }, + { + "epoch": 2.83, + "learning_rate": 3.621334750110098e-05, + "loss": 0.3428, + "step": 4723000 + }, + { + "epoch": 2.83, + "learning_rate": 3.6211247535540415e-05, + "loss": 0.3413, + "step": 4723500 + }, + { + "epoch": 2.83, + "learning_rate": 3.6209147569979855e-05, + "loss": 0.34, + "step": 4724000 + }, + { + "epoch": 2.83, + "learning_rate": 3.620704760441929e-05, + "loss": 0.3453, + "step": 4724500 + }, + { + "epoch": 2.83, + "learning_rate": 3.620494763885872e-05, + "loss": 0.3522, + "step": 4725000 + }, + { + "epoch": 2.83, + "learning_rate": 3.6202847673298155e-05, + "loss": 0.3314, + "step": 4725500 + }, + { + "epoch": 2.83, + "learning_rate": 3.620074770773759e-05, + "loss": 0.3316, + "step": 4726000 + }, + { + "epoch": 2.83, + "learning_rate": 3.619865194210815e-05, + "loss": 0.3396, + "step": 4726500 + }, + { + "epoch": 2.83, + "learning_rate": 3.619655197654759e-05, + "loss": 0.3382, + "step": 4727000 + }, + { + "epoch": 2.83, + "learning_rate": 3.619445201098702e-05, + "loss": 0.3413, + "step": 4727500 + }, + { + "epoch": 2.83, + "learning_rate": 3.619235204542645e-05, + "loss": 0.3473, + "step": 4728000 + }, + { + "epoch": 2.83, + "learning_rate": 3.619025207986589e-05, + "loss": 0.3464, + "step": 4728500 + }, + { + "epoch": 2.84, + "learning_rate": 3.618815211430532e-05, + "loss": 0.3326, + "step": 4729000 + }, + { + "epoch": 2.84, + "learning_rate": 3.6186052148744757e-05, + "loss": 0.3374, + "step": 4729500 + }, + { + "epoch": 2.84, + "learning_rate": 3.618395638311531e-05, + "loss": 0.3454, + "step": 4730000 + }, + { + "epoch": 2.84, + "learning_rate": 3.618185641755475e-05, + "loss": 0.3441, + "step": 4730500 + }, + { + "epoch": 2.84, + "learning_rate": 3.6179756451994184e-05, + "loss": 0.3453, + "step": 4731000 + }, + { + "epoch": 2.84, + "learning_rate": 3.617765648643362e-05, + "loss": 0.3429, + "step": 4731500 + }, + { + "epoch": 2.84, + "learning_rate": 3.6175560720804184e-05, + "loss": 0.3474, + "step": 4732000 + }, + { + "epoch": 2.84, + "learning_rate": 3.617346075524361e-05, + "loss": 0.3527, + "step": 4732500 + }, + { + "epoch": 2.84, + "learning_rate": 3.6171360789683045e-05, + "loss": 0.3378, + "step": 4733000 + }, + { + "epoch": 2.84, + "learning_rate": 3.6169260824122485e-05, + "loss": 0.3509, + "step": 4733500 + }, + { + "epoch": 2.84, + "learning_rate": 3.616716085856192e-05, + "loss": 0.3423, + "step": 4734000 + }, + { + "epoch": 2.84, + "learning_rate": 3.616506509293248e-05, + "loss": 0.3468, + "step": 4734500 + }, + { + "epoch": 2.84, + "learning_rate": 3.6162965127371905e-05, + "loss": 0.3459, + "step": 4735000 + }, + { + "epoch": 2.84, + "learning_rate": 3.6160865161811346e-05, + "loss": 0.3432, + "step": 4735500 + }, + { + "epoch": 2.84, + "learning_rate": 3.615876519625078e-05, + "loss": 0.3369, + "step": 4736000 + }, + { + "epoch": 2.84, + "learning_rate": 3.615666523069021e-05, + "loss": 0.3409, + "step": 4736500 + }, + { + "epoch": 2.84, + "learning_rate": 3.615456526512965e-05, + "loss": 0.3447, + "step": 4737000 + }, + { + "epoch": 2.84, + "learning_rate": 3.6152465299569086e-05, + "loss": 0.3437, + "step": 4737500 + }, + { + "epoch": 2.84, + "learning_rate": 3.615036953393964e-05, + "loss": 0.3383, + "step": 4738000 + }, + { + "epoch": 2.84, + "learning_rate": 3.614826956837907e-05, + "loss": 0.3325, + "step": 4738500 + }, + { + "epoch": 2.84, + "learning_rate": 3.6146169602818513e-05, + "loss": 0.3463, + "step": 4739000 + }, + { + "epoch": 2.84, + "learning_rate": 3.614406963725795e-05, + "loss": 0.347, + "step": 4739500 + }, + { + "epoch": 2.84, + "learning_rate": 3.614196967169739e-05, + "loss": 0.3392, + "step": 4740000 + }, + { + "epoch": 2.84, + "learning_rate": 3.613986970613682e-05, + "loss": 0.3379, + "step": 4740500 + }, + { + "epoch": 2.84, + "learning_rate": 3.6137769740576254e-05, + "loss": 0.3416, + "step": 4741000 + }, + { + "epoch": 2.84, + "learning_rate": 3.6135669775015694e-05, + "loss": 0.3431, + "step": 4741500 + }, + { + "epoch": 2.84, + "learning_rate": 3.613357400938625e-05, + "loss": 0.3519, + "step": 4742000 + }, + { + "epoch": 2.84, + "learning_rate": 3.61314782437568e-05, + "loss": 0.3455, + "step": 4742500 + }, + { + "epoch": 2.84, + "learning_rate": 3.6129378278196235e-05, + "loss": 0.3422, + "step": 4743000 + }, + { + "epoch": 2.84, + "learning_rate": 3.612727831263567e-05, + "loss": 0.3394, + "step": 4743500 + }, + { + "epoch": 2.84, + "learning_rate": 3.612517834707511e-05, + "loss": 0.3527, + "step": 4744000 + }, + { + "epoch": 2.84, + "learning_rate": 3.612307838151454e-05, + "loss": 0.3329, + "step": 4744500 + }, + { + "epoch": 2.84, + "learning_rate": 3.6120978415953976e-05, + "loss": 0.3415, + "step": 4745000 + }, + { + "epoch": 2.85, + "learning_rate": 3.6118878450393416e-05, + "loss": 0.3469, + "step": 4745500 + }, + { + "epoch": 2.85, + "learning_rate": 3.611677848483285e-05, + "loss": 0.3533, + "step": 4746000 + }, + { + "epoch": 2.85, + "learning_rate": 3.611468691913453e-05, + "loss": 0.3368, + "step": 4746500 + }, + { + "epoch": 2.85, + "learning_rate": 3.6112586953573956e-05, + "loss": 0.3431, + "step": 4747000 + }, + { + "epoch": 2.85, + "learning_rate": 3.6110486988013397e-05, + "loss": 0.3488, + "step": 4747500 + }, + { + "epoch": 2.85, + "learning_rate": 3.610838702245283e-05, + "loss": 0.3409, + "step": 4748000 + }, + { + "epoch": 2.85, + "learning_rate": 3.6106287056892264e-05, + "loss": 0.3402, + "step": 4748500 + }, + { + "epoch": 2.85, + "learning_rate": 3.610419129126282e-05, + "loss": 0.3472, + "step": 4749000 + }, + { + "epoch": 2.85, + "learning_rate": 3.610209132570226e-05, + "loss": 0.3455, + "step": 4749500 + }, + { + "epoch": 2.85, + "learning_rate": 3.609999136014169e-05, + "loss": 0.3365, + "step": 4750000 + }, + { + "epoch": 2.85, + "learning_rate": 3.6097891394581124e-05, + "loss": 0.3333, + "step": 4750500 + }, + { + "epoch": 2.85, + "learning_rate": 3.6095791429020564e-05, + "loss": 0.3444, + "step": 4751000 + }, + { + "epoch": 2.85, + "learning_rate": 3.609369146346e-05, + "loss": 0.3397, + "step": 4751500 + }, + { + "epoch": 2.85, + "learning_rate": 3.609159149789943e-05, + "loss": 0.3505, + "step": 4752000 + }, + { + "epoch": 2.85, + "learning_rate": 3.608949573226999e-05, + "loss": 0.3351, + "step": 4752500 + }, + { + "epoch": 2.85, + "learning_rate": 3.6087395766709425e-05, + "loss": 0.3379, + "step": 4753000 + }, + { + "epoch": 2.85, + "learning_rate": 3.608529580114886e-05, + "loss": 0.3467, + "step": 4753500 + }, + { + "epoch": 2.85, + "learning_rate": 3.608320003551941e-05, + "loss": 0.3561, + "step": 4754000 + }, + { + "epoch": 2.85, + "learning_rate": 3.608110006995885e-05, + "loss": 0.3414, + "step": 4754500 + }, + { + "epoch": 2.85, + "learning_rate": 3.6079000104398286e-05, + "loss": 0.3328, + "step": 4755000 + }, + { + "epoch": 2.85, + "learning_rate": 3.607690013883772e-05, + "loss": 0.3419, + "step": 4755500 + }, + { + "epoch": 2.85, + "learning_rate": 3.607480017327716e-05, + "loss": 0.3502, + "step": 4756000 + }, + { + "epoch": 2.85, + "learning_rate": 3.607270020771659e-05, + "loss": 0.3374, + "step": 4756500 + }, + { + "epoch": 2.85, + "learning_rate": 3.6070600242156027e-05, + "loss": 0.3475, + "step": 4757000 + }, + { + "epoch": 2.85, + "learning_rate": 3.606850027659547e-05, + "loss": 0.3491, + "step": 4757500 + }, + { + "epoch": 2.85, + "learning_rate": 3.606640451096602e-05, + "loss": 0.3416, + "step": 4758000 + }, + { + "epoch": 2.85, + "learning_rate": 3.6064304545405454e-05, + "loss": 0.3337, + "step": 4758500 + }, + { + "epoch": 2.85, + "learning_rate": 3.606220457984489e-05, + "loss": 0.3356, + "step": 4759000 + }, + { + "epoch": 2.85, + "learning_rate": 3.606010461428433e-05, + "loss": 0.3377, + "step": 4759500 + }, + { + "epoch": 2.85, + "learning_rate": 3.605800464872376e-05, + "loss": 0.3431, + "step": 4760000 + }, + { + "epoch": 2.85, + "learning_rate": 3.6055904683163194e-05, + "loss": 0.3483, + "step": 4760500 + }, + { + "epoch": 2.85, + "learning_rate": 3.6053808917533755e-05, + "loss": 0.3539, + "step": 4761000 + }, + { + "epoch": 2.85, + "learning_rate": 3.605170895197319e-05, + "loss": 0.3419, + "step": 4761500 + }, + { + "epoch": 2.86, + "learning_rate": 3.604960898641262e-05, + "loss": 0.3376, + "step": 4762000 + }, + { + "epoch": 2.86, + "learning_rate": 3.604750902085206e-05, + "loss": 0.3315, + "step": 4762500 + }, + { + "epoch": 2.86, + "learning_rate": 3.6045409055291495e-05, + "loss": 0.3579, + "step": 4763000 + }, + { + "epoch": 2.86, + "learning_rate": 3.604330908973093e-05, + "loss": 0.3539, + "step": 4763500 + }, + { + "epoch": 2.86, + "learning_rate": 3.604120912417037e-05, + "loss": 0.3506, + "step": 4764000 + }, + { + "epoch": 2.86, + "learning_rate": 3.6039109158609796e-05, + "loss": 0.3405, + "step": 4764500 + }, + { + "epoch": 2.86, + "learning_rate": 3.6037013392980356e-05, + "loss": 0.3489, + "step": 4765000 + }, + { + "epoch": 2.86, + "learning_rate": 3.603491342741979e-05, + "loss": 0.3446, + "step": 4765500 + }, + { + "epoch": 2.86, + "learning_rate": 3.603281346185923e-05, + "loss": 0.3367, + "step": 4766000 + }, + { + "epoch": 2.86, + "learning_rate": 3.6030713496298656e-05, + "loss": 0.338, + "step": 4766500 + }, + { + "epoch": 2.86, + "learning_rate": 3.602861353073809e-05, + "loss": 0.3422, + "step": 4767000 + }, + { + "epoch": 2.86, + "learning_rate": 3.602651356517753e-05, + "loss": 0.3427, + "step": 4767500 + }, + { + "epoch": 2.86, + "learning_rate": 3.6024413599616964e-05, + "loss": 0.3447, + "step": 4768000 + }, + { + "epoch": 2.86, + "learning_rate": 3.60223136340564e-05, + "loss": 0.3385, + "step": 4768500 + }, + { + "epoch": 2.86, + "learning_rate": 3.602021786842696e-05, + "loss": 0.3606, + "step": 4769000 + }, + { + "epoch": 2.86, + "learning_rate": 3.601811790286639e-05, + "loss": 0.3434, + "step": 4769500 + }, + { + "epoch": 2.86, + "learning_rate": 3.6016017937305824e-05, + "loss": 0.3432, + "step": 4770000 + }, + { + "epoch": 2.86, + "learning_rate": 3.6013917971745265e-05, + "loss": 0.3387, + "step": 4770500 + }, + { + "epoch": 2.86, + "learning_rate": 3.60118180061847e-05, + "loss": 0.346, + "step": 4771000 + }, + { + "epoch": 2.86, + "learning_rate": 3.600972224055525e-05, + "loss": 0.3428, + "step": 4771500 + }, + { + "epoch": 2.86, + "learning_rate": 3.6007622274994685e-05, + "loss": 0.3458, + "step": 4772000 + }, + { + "epoch": 2.86, + "learning_rate": 3.6005522309434125e-05, + "loss": 0.3426, + "step": 4772500 + }, + { + "epoch": 2.86, + "learning_rate": 3.600342234387356e-05, + "loss": 0.3395, + "step": 4773000 + }, + { + "epoch": 2.86, + "learning_rate": 3.600132237831299e-05, + "loss": 0.3427, + "step": 4773500 + }, + { + "epoch": 2.86, + "learning_rate": 3.5999226612683546e-05, + "loss": 0.3463, + "step": 4774000 + }, + { + "epoch": 2.86, + "learning_rate": 3.5997126647122986e-05, + "loss": 0.3454, + "step": 4774500 + }, + { + "epoch": 2.86, + "learning_rate": 3.599502668156242e-05, + "loss": 0.3401, + "step": 4775000 + }, + { + "epoch": 2.86, + "learning_rate": 3.599292671600185e-05, + "loss": 0.3381, + "step": 4775500 + }, + { + "epoch": 2.86, + "learning_rate": 3.599082675044129e-05, + "loss": 0.3426, + "step": 4776000 + }, + { + "epoch": 2.86, + "learning_rate": 3.598872678488073e-05, + "loss": 0.3366, + "step": 4776500 + }, + { + "epoch": 2.86, + "learning_rate": 3.598662681932016e-05, + "loss": 0.3419, + "step": 4777000 + }, + { + "epoch": 2.86, + "learning_rate": 3.598453105369072e-05, + "loss": 0.3397, + "step": 4777500 + }, + { + "epoch": 2.86, + "learning_rate": 3.5982431088130154e-05, + "loss": 0.3445, + "step": 4778000 + }, + { + "epoch": 2.86, + "learning_rate": 3.598033532250071e-05, + "loss": 0.3352, + "step": 4778500 + }, + { + "epoch": 2.87, + "learning_rate": 3.597823535694014e-05, + "loss": 0.3414, + "step": 4779000 + }, + { + "epoch": 2.87, + "learning_rate": 3.597613539137958e-05, + "loss": 0.3542, + "step": 4779500 + }, + { + "epoch": 2.87, + "learning_rate": 3.5974035425819015e-05, + "loss": 0.3502, + "step": 4780000 + }, + { + "epoch": 2.87, + "learning_rate": 3.597193546025845e-05, + "loss": 0.3383, + "step": 4780500 + }, + { + "epoch": 2.87, + "learning_rate": 3.596983549469789e-05, + "loss": 0.3458, + "step": 4781000 + }, + { + "epoch": 2.87, + "learning_rate": 3.596773552913732e-05, + "loss": 0.3404, + "step": 4781500 + }, + { + "epoch": 2.87, + "learning_rate": 3.5965635563576755e-05, + "loss": 0.3366, + "step": 4782000 + }, + { + "epoch": 2.87, + "learning_rate": 3.5963535598016196e-05, + "loss": 0.3371, + "step": 4782500 + }, + { + "epoch": 2.87, + "learning_rate": 3.596143563245563e-05, + "loss": 0.3396, + "step": 4783000 + }, + { + "epoch": 2.87, + "learning_rate": 3.595933986682618e-05, + "loss": 0.3392, + "step": 4783500 + }, + { + "epoch": 2.87, + "learning_rate": 3.5957239901265616e-05, + "loss": 0.3394, + "step": 4784000 + }, + { + "epoch": 2.87, + "learning_rate": 3.5955139935705056e-05, + "loss": 0.3451, + "step": 4784500 + }, + { + "epoch": 2.87, + "learning_rate": 3.595303997014449e-05, + "loss": 0.3298, + "step": 4785000 + }, + { + "epoch": 2.87, + "learning_rate": 3.595094420451504e-05, + "loss": 0.3537, + "step": 4785500 + }, + { + "epoch": 2.87, + "learning_rate": 3.5948844238954484e-05, + "loss": 0.3382, + "step": 4786000 + }, + { + "epoch": 2.87, + "learning_rate": 3.594674427339392e-05, + "loss": 0.345, + "step": 4786500 + }, + { + "epoch": 2.87, + "learning_rate": 3.594464430783335e-05, + "loss": 0.3346, + "step": 4787000 + }, + { + "epoch": 2.87, + "learning_rate": 3.594254434227279e-05, + "loss": 0.3455, + "step": 4787500 + }, + { + "epoch": 2.87, + "learning_rate": 3.5940448576643344e-05, + "loss": 0.3408, + "step": 4788000 + }, + { + "epoch": 2.87, + "learning_rate": 3.593834861108278e-05, + "loss": 0.3422, + "step": 4788500 + }, + { + "epoch": 2.87, + "learning_rate": 3.593624864552221e-05, + "loss": 0.3528, + "step": 4789000 + }, + { + "epoch": 2.87, + "learning_rate": 3.593414867996165e-05, + "loss": 0.3407, + "step": 4789500 + }, + { + "epoch": 2.87, + "learning_rate": 3.5932048714401085e-05, + "loss": 0.3431, + "step": 4790000 + }, + { + "epoch": 2.87, + "learning_rate": 3.592994874884052e-05, + "loss": 0.3435, + "step": 4790500 + }, + { + "epoch": 2.87, + "learning_rate": 3.592785298321107e-05, + "loss": 0.3421, + "step": 4791000 + }, + { + "epoch": 2.87, + "learning_rate": 3.592575301765051e-05, + "loss": 0.343, + "step": 4791500 + }, + { + "epoch": 2.87, + "learning_rate": 3.5923653052089946e-05, + "loss": 0.3447, + "step": 4792000 + }, + { + "epoch": 2.87, + "learning_rate": 3.5921553086529386e-05, + "loss": 0.3377, + "step": 4792500 + }, + { + "epoch": 2.87, + "learning_rate": 3.591945312096882e-05, + "loss": 0.3406, + "step": 4793000 + }, + { + "epoch": 2.87, + "learning_rate": 3.5917353155408246e-05, + "loss": 0.3388, + "step": 4793500 + }, + { + "epoch": 2.87, + "learning_rate": 3.5915257389778806e-05, + "loss": 0.3408, + "step": 4794000 + }, + { + "epoch": 2.87, + "learning_rate": 3.5913157424218247e-05, + "loss": 0.3414, + "step": 4794500 + }, + { + "epoch": 2.87, + "learning_rate": 3.591105745865768e-05, + "loss": 0.3515, + "step": 4795000 + }, + { + "epoch": 2.88, + "learning_rate": 3.5908957493097113e-05, + "loss": 0.3438, + "step": 4795500 + }, + { + "epoch": 2.88, + "learning_rate": 3.590685752753655e-05, + "loss": 0.3471, + "step": 4796000 + }, + { + "epoch": 2.88, + "learning_rate": 3.590476176190711e-05, + "loss": 0.3376, + "step": 4796500 + }, + { + "epoch": 2.88, + "learning_rate": 3.590266179634654e-05, + "loss": 0.3329, + "step": 4797000 + }, + { + "epoch": 2.88, + "learning_rate": 3.5900561830785974e-05, + "loss": 0.3409, + "step": 4797500 + }, + { + "epoch": 2.88, + "learning_rate": 3.5898461865225414e-05, + "loss": 0.3399, + "step": 4798000 + }, + { + "epoch": 2.88, + "learning_rate": 3.589636189966484e-05, + "loss": 0.3437, + "step": 4798500 + }, + { + "epoch": 2.88, + "learning_rate": 3.589426193410428e-05, + "loss": 0.341, + "step": 4799000 + }, + { + "epoch": 2.88, + "learning_rate": 3.5892161968543715e-05, + "loss": 0.337, + "step": 4799500 + }, + { + "epoch": 2.88, + "learning_rate": 3.589006200298315e-05, + "loss": 0.334, + "step": 4800000 + }, + { + "epoch": 2.88, + "eval_loss": 0.33047863841056824, + "eval_runtime": 1120.6954, + "eval_samples_per_second": 469.994, + "eval_steps_per_second": 78.333, + "step": 4800000 + }, + { + "epoch": 2.88, + "learning_rate": 3.588796623735371e-05, + "loss": 0.3471, + "step": 4800500 + }, + { + "epoch": 2.88, + "learning_rate": 3.588587047172426e-05, + "loss": 0.3363, + "step": 4801000 + }, + { + "epoch": 2.88, + "learning_rate": 3.58837705061637e-05, + "loss": 0.3381, + "step": 4801500 + }, + { + "epoch": 2.88, + "learning_rate": 3.5881670540603136e-05, + "loss": 0.3363, + "step": 4802000 + }, + { + "epoch": 2.88, + "learning_rate": 3.587957057504257e-05, + "loss": 0.3478, + "step": 4802500 + }, + { + "epoch": 2.88, + "learning_rate": 3.5877470609482e-05, + "loss": 0.347, + "step": 4803000 + }, + { + "epoch": 2.88, + "learning_rate": 3.5875370643921436e-05, + "loss": 0.3462, + "step": 4803500 + }, + { + "epoch": 2.88, + "learning_rate": 3.587327067836087e-05, + "loss": 0.3476, + "step": 4804000 + }, + { + "epoch": 2.88, + "learning_rate": 3.587117071280031e-05, + "loss": 0.3369, + "step": 4804500 + }, + { + "epoch": 2.88, + "learning_rate": 3.586907494717087e-05, + "loss": 0.3403, + "step": 4805000 + }, + { + "epoch": 2.88, + "learning_rate": 3.5866979181541424e-05, + "loss": 0.3448, + "step": 4805500 + }, + { + "epoch": 2.88, + "learning_rate": 3.586487921598086e-05, + "loss": 0.3339, + "step": 4806000 + }, + { + "epoch": 2.88, + "learning_rate": 3.58627792504203e-05, + "loss": 0.3382, + "step": 4806500 + }, + { + "epoch": 2.88, + "learning_rate": 3.586067928485973e-05, + "loss": 0.341, + "step": 4807000 + }, + { + "epoch": 2.88, + "learning_rate": 3.5858579319299164e-05, + "loss": 0.3402, + "step": 4807500 + }, + { + "epoch": 2.88, + "learning_rate": 3.58564793537386e-05, + "loss": 0.3396, + "step": 4808000 + }, + { + "epoch": 2.88, + "learning_rate": 3.585437938817803e-05, + "loss": 0.3341, + "step": 4808500 + }, + { + "epoch": 2.88, + "learning_rate": 3.5852279422617465e-05, + "loss": 0.3527, + "step": 4809000 + }, + { + "epoch": 2.88, + "learning_rate": 3.5850183656988025e-05, + "loss": 0.3442, + "step": 4809500 + }, + { + "epoch": 2.88, + "learning_rate": 3.5848083691427465e-05, + "loss": 0.3447, + "step": 4810000 + }, + { + "epoch": 2.88, + "learning_rate": 3.584598372586689e-05, + "loss": 0.3478, + "step": 4810500 + }, + { + "epoch": 2.88, + "learning_rate": 3.5843883760306326e-05, + "loss": 0.3317, + "step": 4811000 + }, + { + "epoch": 2.88, + "learning_rate": 3.5841787994676886e-05, + "loss": 0.3335, + "step": 4811500 + }, + { + "epoch": 2.88, + "learning_rate": 3.5839688029116326e-05, + "loss": 0.3351, + "step": 4812000 + }, + { + "epoch": 2.89, + "learning_rate": 3.583758806355575e-05, + "loss": 0.3392, + "step": 4812500 + }, + { + "epoch": 2.89, + "learning_rate": 3.583548809799519e-05, + "loss": 0.3289, + "step": 4813000 + }, + { + "epoch": 2.89, + "learning_rate": 3.5833388132434627e-05, + "loss": 0.3478, + "step": 4813500 + }, + { + "epoch": 2.89, + "learning_rate": 3.583129236680519e-05, + "loss": 0.3313, + "step": 4814000 + }, + { + "epoch": 2.89, + "learning_rate": 3.582919240124462e-05, + "loss": 0.3349, + "step": 4814500 + }, + { + "epoch": 2.89, + "learning_rate": 3.5827092435684054e-05, + "loss": 0.3412, + "step": 4815000 + }, + { + "epoch": 2.89, + "learning_rate": 3.582499247012349e-05, + "loss": 0.3342, + "step": 4815500 + }, + { + "epoch": 2.89, + "learning_rate": 3.582289670449405e-05, + "loss": 0.3466, + "step": 4816000 + }, + { + "epoch": 2.89, + "learning_rate": 3.58208009388646e-05, + "loss": 0.3465, + "step": 4816500 + }, + { + "epoch": 2.89, + "learning_rate": 3.5818700973304035e-05, + "loss": 0.3368, + "step": 4817000 + }, + { + "epoch": 2.89, + "learning_rate": 3.5816601007743475e-05, + "loss": 0.3463, + "step": 4817500 + }, + { + "epoch": 2.89, + "learning_rate": 3.581450104218291e-05, + "loss": 0.3332, + "step": 4818000 + }, + { + "epoch": 2.89, + "learning_rate": 3.581240527655346e-05, + "loss": 0.3443, + "step": 4818500 + }, + { + "epoch": 2.89, + "learning_rate": 3.58103053109929e-05, + "loss": 0.3278, + "step": 4819000 + }, + { + "epoch": 2.89, + "learning_rate": 3.5808205345432336e-05, + "loss": 0.3429, + "step": 4819500 + }, + { + "epoch": 2.89, + "learning_rate": 3.580610537987177e-05, + "loss": 0.3324, + "step": 4820000 + }, + { + "epoch": 2.89, + "learning_rate": 3.580400541431121e-05, + "loss": 0.3459, + "step": 4820500 + }, + { + "epoch": 2.89, + "learning_rate": 3.580190544875064e-05, + "loss": 0.3415, + "step": 4821000 + }, + { + "epoch": 2.89, + "learning_rate": 3.5799805483190076e-05, + "loss": 0.3396, + "step": 4821500 + }, + { + "epoch": 2.89, + "learning_rate": 3.579770551762951e-05, + "loss": 0.3406, + "step": 4822000 + }, + { + "epoch": 2.89, + "learning_rate": 3.579560975200007e-05, + "loss": 0.3385, + "step": 4822500 + }, + { + "epoch": 2.89, + "learning_rate": 3.5793509786439504e-05, + "loss": 0.346, + "step": 4823000 + }, + { + "epoch": 2.89, + "learning_rate": 3.579140982087894e-05, + "loss": 0.3406, + "step": 4823500 + }, + { + "epoch": 2.89, + "learning_rate": 3.578930985531838e-05, + "loss": 0.3422, + "step": 4824000 + }, + { + "epoch": 2.89, + "learning_rate": 3.5787209889757804e-05, + "loss": 0.3415, + "step": 4824500 + }, + { + "epoch": 2.89, + "learning_rate": 3.578510992419724e-05, + "loss": 0.3405, + "step": 4825000 + }, + { + "epoch": 2.89, + "learning_rate": 3.57830141585678e-05, + "loss": 0.3421, + "step": 4825500 + }, + { + "epoch": 2.89, + "learning_rate": 3.578091419300724e-05, + "loss": 0.3379, + "step": 4826000 + }, + { + "epoch": 2.89, + "learning_rate": 3.577881842737779e-05, + "loss": 0.345, + "step": 4826500 + }, + { + "epoch": 2.89, + "learning_rate": 3.5776718461817225e-05, + "loss": 0.3375, + "step": 4827000 + }, + { + "epoch": 2.89, + "learning_rate": 3.5774618496256665e-05, + "loss": 0.3389, + "step": 4827500 + }, + { + "epoch": 2.89, + "learning_rate": 3.57725185306961e-05, + "loss": 0.3421, + "step": 4828000 + }, + { + "epoch": 2.89, + "learning_rate": 3.577041856513553e-05, + "loss": 0.3519, + "step": 4828500 + }, + { + "epoch": 2.9, + "learning_rate": 3.5768322799506086e-05, + "loss": 0.3353, + "step": 4829000 + }, + { + "epoch": 2.9, + "learning_rate": 3.5766222833945526e-05, + "loss": 0.3333, + "step": 4829500 + }, + { + "epoch": 2.9, + "learning_rate": 3.576412286838496e-05, + "loss": 0.3373, + "step": 4830000 + }, + { + "epoch": 2.9, + "learning_rate": 3.576202710275551e-05, + "loss": 0.3462, + "step": 4830500 + }, + { + "epoch": 2.9, + "learning_rate": 3.5759927137194946e-05, + "loss": 0.3412, + "step": 4831000 + }, + { + "epoch": 2.9, + "learning_rate": 3.575782717163439e-05, + "loss": 0.3371, + "step": 4831500 + }, + { + "epoch": 2.9, + "learning_rate": 3.575572720607382e-05, + "loss": 0.3495, + "step": 4832000 + }, + { + "epoch": 2.9, + "learning_rate": 3.5753627240513254e-05, + "loss": 0.3405, + "step": 4832500 + }, + { + "epoch": 2.9, + "learning_rate": 3.5751527274952694e-05, + "loss": 0.3512, + "step": 4833000 + }, + { + "epoch": 2.9, + "learning_rate": 3.574942730939213e-05, + "loss": 0.3441, + "step": 4833500 + }, + { + "epoch": 2.9, + "learning_rate": 3.574732734383156e-05, + "loss": 0.3443, + "step": 4834000 + }, + { + "epoch": 2.9, + "learning_rate": 3.5745227378270994e-05, + "loss": 0.3425, + "step": 4834500 + }, + { + "epoch": 2.9, + "learning_rate": 3.574312741271043e-05, + "loss": 0.3486, + "step": 4835000 + }, + { + "epoch": 2.9, + "learning_rate": 3.574102744714987e-05, + "loss": 0.3352, + "step": 4835500 + }, + { + "epoch": 2.9, + "learning_rate": 3.57389274815893e-05, + "loss": 0.3352, + "step": 4836000 + }, + { + "epoch": 2.9, + "learning_rate": 3.5736827516028735e-05, + "loss": 0.3391, + "step": 4836500 + }, + { + "epoch": 2.9, + "learning_rate": 3.5734727550468175e-05, + "loss": 0.3406, + "step": 4837000 + }, + { + "epoch": 2.9, + "learning_rate": 3.573262758490761e-05, + "loss": 0.3482, + "step": 4837500 + }, + { + "epoch": 2.9, + "learning_rate": 3.573052761934704e-05, + "loss": 0.3464, + "step": 4838000 + }, + { + "epoch": 2.9, + "learning_rate": 3.5728431853717596e-05, + "loss": 0.3369, + "step": 4838500 + }, + { + "epoch": 2.9, + "learning_rate": 3.572633608808815e-05, + "loss": 0.3456, + "step": 4839000 + }, + { + "epoch": 2.9, + "learning_rate": 3.572424032245871e-05, + "loss": 0.3485, + "step": 4839500 + }, + { + "epoch": 2.9, + "learning_rate": 3.572214035689815e-05, + "loss": 0.3364, + "step": 4840000 + }, + { + "epoch": 2.9, + "learning_rate": 3.572004039133758e-05, + "loss": 0.3484, + "step": 4840500 + }, + { + "epoch": 2.9, + "learning_rate": 3.571794042577702e-05, + "loss": 0.3361, + "step": 4841000 + }, + { + "epoch": 2.9, + "learning_rate": 3.571584046021645e-05, + "loss": 0.3418, + "step": 4841500 + }, + { + "epoch": 2.9, + "learning_rate": 3.5713740494655884e-05, + "loss": 0.3528, + "step": 4842000 + }, + { + "epoch": 2.9, + "learning_rate": 3.5711640529095324e-05, + "loss": 0.3402, + "step": 4842500 + }, + { + "epoch": 2.9, + "learning_rate": 3.570954056353476e-05, + "loss": 0.3448, + "step": 4843000 + }, + { + "epoch": 2.9, + "learning_rate": 3.570744059797419e-05, + "loss": 0.3321, + "step": 4843500 + }, + { + "epoch": 2.9, + "learning_rate": 3.570534063241363e-05, + "loss": 0.3349, + "step": 4844000 + }, + { + "epoch": 2.9, + "learning_rate": 3.5703240666853064e-05, + "loss": 0.342, + "step": 4844500 + }, + { + "epoch": 2.9, + "learning_rate": 3.57011407012925e-05, + "loss": 0.3451, + "step": 4845000 + }, + { + "epoch": 2.91, + "learning_rate": 3.569904073573194e-05, + "loss": 0.3386, + "step": 4845500 + }, + { + "epoch": 2.91, + "learning_rate": 3.569694497010249e-05, + "loss": 0.3449, + "step": 4846000 + }, + { + "epoch": 2.91, + "learning_rate": 3.5694845004541925e-05, + "loss": 0.3423, + "step": 4846500 + }, + { + "epoch": 2.91, + "learning_rate": 3.569274923891248e-05, + "loss": 0.3477, + "step": 4847000 + }, + { + "epoch": 2.91, + "learning_rate": 3.569064927335191e-05, + "loss": 0.3401, + "step": 4847500 + }, + { + "epoch": 2.91, + "learning_rate": 3.568854930779135e-05, + "loss": 0.3437, + "step": 4848000 + }, + { + "epoch": 2.91, + "learning_rate": 3.5686449342230786e-05, + "loss": 0.3426, + "step": 4848500 + }, + { + "epoch": 2.91, + "learning_rate": 3.568434937667022e-05, + "loss": 0.3313, + "step": 4849000 + }, + { + "epoch": 2.91, + "learning_rate": 3.568225361104078e-05, + "loss": 0.3429, + "step": 4849500 + }, + { + "epoch": 2.91, + "learning_rate": 3.568015364548021e-05, + "loss": 0.3473, + "step": 4850000 + }, + { + "epoch": 2.91, + "learning_rate": 3.5678053679919647e-05, + "loss": 0.3426, + "step": 4850500 + }, + { + "epoch": 2.91, + "learning_rate": 3.567595371435909e-05, + "loss": 0.3459, + "step": 4851000 + }, + { + "epoch": 2.91, + "learning_rate": 3.567385794872964e-05, + "loss": 0.3321, + "step": 4851500 + }, + { + "epoch": 2.91, + "learning_rate": 3.5671757983169074e-05, + "loss": 0.3423, + "step": 4852000 + }, + { + "epoch": 2.91, + "learning_rate": 3.566965801760851e-05, + "loss": 0.3464, + "step": 4852500 + }, + { + "epoch": 2.91, + "learning_rate": 3.566755805204795e-05, + "loss": 0.3433, + "step": 4853000 + }, + { + "epoch": 2.91, + "learning_rate": 3.566545808648738e-05, + "loss": 0.3342, + "step": 4853500 + }, + { + "epoch": 2.91, + "learning_rate": 3.5663362320857935e-05, + "loss": 0.3343, + "step": 4854000 + }, + { + "epoch": 2.91, + "learning_rate": 3.566126235529737e-05, + "loss": 0.3516, + "step": 4854500 + }, + { + "epoch": 2.91, + "learning_rate": 3.565916238973681e-05, + "loss": 0.3265, + "step": 4855000 + }, + { + "epoch": 2.91, + "learning_rate": 3.565706242417624e-05, + "loss": 0.3382, + "step": 4855500 + }, + { + "epoch": 2.91, + "learning_rate": 3.5654962458615675e-05, + "loss": 0.3356, + "step": 4856000 + }, + { + "epoch": 2.91, + "learning_rate": 3.5652862493055115e-05, + "loss": 0.3372, + "step": 4856500 + }, + { + "epoch": 2.91, + "learning_rate": 3.565076672742567e-05, + "loss": 0.3454, + "step": 4857000 + }, + { + "epoch": 2.91, + "learning_rate": 3.56486667618651e-05, + "loss": 0.3425, + "step": 4857500 + }, + { + "epoch": 2.91, + "learning_rate": 3.564656679630454e-05, + "loss": 0.3311, + "step": 4858000 + }, + { + "epoch": 2.91, + "learning_rate": 3.5644466830743976e-05, + "loss": 0.3437, + "step": 4858500 + }, + { + "epoch": 2.91, + "learning_rate": 3.564237106511453e-05, + "loss": 0.3481, + "step": 4859000 + }, + { + "epoch": 2.91, + "learning_rate": 3.564027109955396e-05, + "loss": 0.3455, + "step": 4859500 + }, + { + "epoch": 2.91, + "learning_rate": 3.5638171133993403e-05, + "loss": 0.3479, + "step": 4860000 + }, + { + "epoch": 2.91, + "learning_rate": 3.563607116843284e-05, + "loss": 0.3407, + "step": 4860500 + }, + { + "epoch": 2.91, + "learning_rate": 3.563397120287227e-05, + "loss": 0.3444, + "step": 4861000 + }, + { + "epoch": 2.91, + "learning_rate": 3.563187123731171e-05, + "loss": 0.3443, + "step": 4861500 + }, + { + "epoch": 2.91, + "learning_rate": 3.5629771271751144e-05, + "loss": 0.3387, + "step": 4862000 + }, + { + "epoch": 2.92, + "learning_rate": 3.56276755061217e-05, + "loss": 0.3346, + "step": 4862500 + }, + { + "epoch": 2.92, + "learning_rate": 3.562557554056113e-05, + "loss": 0.3307, + "step": 4863000 + }, + { + "epoch": 2.92, + "learning_rate": 3.562347977493169e-05, + "loss": 0.3418, + "step": 4863500 + }, + { + "epoch": 2.92, + "learning_rate": 3.5621379809371125e-05, + "loss": 0.345, + "step": 4864000 + }, + { + "epoch": 2.92, + "learning_rate": 3.561927984381056e-05, + "loss": 0.3482, + "step": 4864500 + }, + { + "epoch": 2.92, + "learning_rate": 3.561717987825e-05, + "loss": 0.3385, + "step": 4865000 + }, + { + "epoch": 2.92, + "learning_rate": 3.561507991268943e-05, + "loss": 0.347, + "step": 4865500 + }, + { + "epoch": 2.92, + "learning_rate": 3.5612984147059986e-05, + "loss": 0.3456, + "step": 4866000 + }, + { + "epoch": 2.92, + "learning_rate": 3.561088418149942e-05, + "loss": 0.3427, + "step": 4866500 + }, + { + "epoch": 2.92, + "learning_rate": 3.560878421593886e-05, + "loss": 0.3484, + "step": 4867000 + }, + { + "epoch": 2.92, + "learning_rate": 3.560668425037829e-05, + "loss": 0.341, + "step": 4867500 + }, + { + "epoch": 2.92, + "learning_rate": 3.5604584284817726e-05, + "loss": 0.3376, + "step": 4868000 + }, + { + "epoch": 2.92, + "learning_rate": 3.5602484319257166e-05, + "loss": 0.3405, + "step": 4868500 + }, + { + "epoch": 2.92, + "learning_rate": 3.56003843536966e-05, + "loss": 0.3469, + "step": 4869000 + }, + { + "epoch": 2.92, + "learning_rate": 3.559828438813603e-05, + "loss": 0.3483, + "step": 4869500 + }, + { + "epoch": 2.92, + "learning_rate": 3.5596184422575474e-05, + "loss": 0.348, + "step": 4870000 + }, + { + "epoch": 2.92, + "learning_rate": 3.559408865694603e-05, + "loss": 0.3357, + "step": 4870500 + }, + { + "epoch": 2.92, + "learning_rate": 3.559198869138546e-05, + "loss": 0.3349, + "step": 4871000 + }, + { + "epoch": 2.92, + "learning_rate": 3.55898887258249e-05, + "loss": 0.3445, + "step": 4871500 + }, + { + "epoch": 2.92, + "learning_rate": 3.5587788760264334e-05, + "loss": 0.3404, + "step": 4872000 + }, + { + "epoch": 2.92, + "learning_rate": 3.558568879470377e-05, + "loss": 0.3507, + "step": 4872500 + }, + { + "epoch": 2.92, + "learning_rate": 3.55835888291432e-05, + "loss": 0.3419, + "step": 4873000 + }, + { + "epoch": 2.92, + "learning_rate": 3.5581488863582635e-05, + "loss": 0.344, + "step": 4873500 + }, + { + "epoch": 2.92, + "learning_rate": 3.5579397297884315e-05, + "loss": 0.3481, + "step": 4874000 + }, + { + "epoch": 2.92, + "learning_rate": 3.557729733232375e-05, + "loss": 0.3383, + "step": 4874500 + }, + { + "epoch": 2.92, + "learning_rate": 3.557519736676318e-05, + "loss": 0.3356, + "step": 4875000 + }, + { + "epoch": 2.92, + "learning_rate": 3.557309740120262e-05, + "loss": 0.3473, + "step": 4875500 + }, + { + "epoch": 2.92, + "learning_rate": 3.5570997435642056e-05, + "loss": 0.3396, + "step": 4876000 + }, + { + "epoch": 2.92, + "learning_rate": 3.556889747008149e-05, + "loss": 0.342, + "step": 4876500 + }, + { + "epoch": 2.92, + "learning_rate": 3.556679750452093e-05, + "loss": 0.338, + "step": 4877000 + }, + { + "epoch": 2.92, + "learning_rate": 3.556469753896036e-05, + "loss": 0.3389, + "step": 4877500 + }, + { + "epoch": 2.92, + "learning_rate": 3.5562597573399796e-05, + "loss": 0.3527, + "step": 4878000 + }, + { + "epoch": 2.92, + "learning_rate": 3.556049760783923e-05, + "loss": 0.3404, + "step": 4878500 + }, + { + "epoch": 2.93, + "learning_rate": 3.555840184220979e-05, + "loss": 0.3294, + "step": 4879000 + }, + { + "epoch": 2.93, + "learning_rate": 3.5556301876649224e-05, + "loss": 0.3451, + "step": 4879500 + }, + { + "epoch": 2.93, + "learning_rate": 3.555420191108866e-05, + "loss": 0.3253, + "step": 4880000 + }, + { + "epoch": 2.93, + "learning_rate": 3.555210194552809e-05, + "loss": 0.3411, + "step": 4880500 + }, + { + "epoch": 2.93, + "learning_rate": 3.5550001979967524e-05, + "loss": 0.3395, + "step": 4881000 + }, + { + "epoch": 2.93, + "learning_rate": 3.5547902014406964e-05, + "loss": 0.3436, + "step": 4881500 + }, + { + "epoch": 2.93, + "learning_rate": 3.55458020488464e-05, + "loss": 0.3415, + "step": 4882000 + }, + { + "epoch": 2.93, + "learning_rate": 3.554370208328583e-05, + "loss": 0.333, + "step": 4882500 + }, + { + "epoch": 2.93, + "learning_rate": 3.5541606317656385e-05, + "loss": 0.3357, + "step": 4883000 + }, + { + "epoch": 2.93, + "learning_rate": 3.5539510552026945e-05, + "loss": 0.3448, + "step": 4883500 + }, + { + "epoch": 2.93, + "learning_rate": 3.5537410586466385e-05, + "loss": 0.3385, + "step": 4884000 + }, + { + "epoch": 2.93, + "learning_rate": 3.553531062090582e-05, + "loss": 0.3501, + "step": 4884500 + }, + { + "epoch": 2.93, + "learning_rate": 3.553321065534525e-05, + "loss": 0.3388, + "step": 4885000 + }, + { + "epoch": 2.93, + "learning_rate": 3.5531110689784686e-05, + "loss": 0.3396, + "step": 4885500 + }, + { + "epoch": 2.93, + "learning_rate": 3.5529014924155246e-05, + "loss": 0.3337, + "step": 4886000 + }, + { + "epoch": 2.93, + "learning_rate": 3.552691495859468e-05, + "loss": 0.3379, + "step": 4886500 + }, + { + "epoch": 2.93, + "learning_rate": 3.552481499303412e-05, + "loss": 0.3387, + "step": 4887000 + }, + { + "epoch": 2.93, + "learning_rate": 3.5522715027473546e-05, + "loss": 0.3395, + "step": 4887500 + }, + { + "epoch": 2.93, + "learning_rate": 3.552061926184411e-05, + "loss": 0.34, + "step": 4888000 + }, + { + "epoch": 2.93, + "learning_rate": 3.551851929628354e-05, + "loss": 0.3441, + "step": 4888500 + }, + { + "epoch": 2.93, + "learning_rate": 3.551641933072298e-05, + "loss": 0.3389, + "step": 4889000 + }, + { + "epoch": 2.93, + "learning_rate": 3.551431936516241e-05, + "loss": 0.334, + "step": 4889500 + }, + { + "epoch": 2.93, + "learning_rate": 3.551221939960184e-05, + "loss": 0.347, + "step": 4890000 + }, + { + "epoch": 2.93, + "learning_rate": 3.55101236339724e-05, + "loss": 0.3489, + "step": 4890500 + }, + { + "epoch": 2.93, + "learning_rate": 3.550802366841184e-05, + "loss": 0.3519, + "step": 4891000 + }, + { + "epoch": 2.93, + "learning_rate": 3.5505923702851275e-05, + "loss": 0.3409, + "step": 4891500 + }, + { + "epoch": 2.93, + "learning_rate": 3.550382373729071e-05, + "loss": 0.3364, + "step": 4892000 + }, + { + "epoch": 2.93, + "learning_rate": 3.550172377173014e-05, + "loss": 0.338, + "step": 4892500 + }, + { + "epoch": 2.93, + "learning_rate": 3.54996280061007e-05, + "loss": 0.3458, + "step": 4893000 + }, + { + "epoch": 2.93, + "learning_rate": 3.5497528040540135e-05, + "loss": 0.3394, + "step": 4893500 + }, + { + "epoch": 2.93, + "learning_rate": 3.5495428074979576e-05, + "loss": 0.3438, + "step": 4894000 + }, + { + "epoch": 2.93, + "learning_rate": 3.5493328109419e-05, + "loss": 0.3481, + "step": 4894500 + }, + { + "epoch": 2.93, + "learning_rate": 3.5491228143858436e-05, + "loss": 0.3412, + "step": 4895000 + }, + { + "epoch": 2.94, + "learning_rate": 3.5489128178297876e-05, + "loss": 0.3403, + "step": 4895500 + }, + { + "epoch": 2.94, + "learning_rate": 3.548702821273731e-05, + "loss": 0.3388, + "step": 4896000 + }, + { + "epoch": 2.94, + "learning_rate": 3.548493244710787e-05, + "loss": 0.3488, + "step": 4896500 + }, + { + "epoch": 2.94, + "learning_rate": 3.5482832481547297e-05, + "loss": 0.3382, + "step": 4897000 + }, + { + "epoch": 2.94, + "learning_rate": 3.548073251598674e-05, + "loss": 0.3381, + "step": 4897500 + }, + { + "epoch": 2.94, + "learning_rate": 3.547863255042617e-05, + "loss": 0.3476, + "step": 4898000 + }, + { + "epoch": 2.94, + "learning_rate": 3.5476532584865604e-05, + "loss": 0.3351, + "step": 4898500 + }, + { + "epoch": 2.94, + "learning_rate": 3.5474436819236164e-05, + "loss": 0.3383, + "step": 4899000 + }, + { + "epoch": 2.94, + "learning_rate": 3.54723368536756e-05, + "loss": 0.3409, + "step": 4899500 + }, + { + "epoch": 2.94, + "learning_rate": 3.547023688811503e-05, + "loss": 0.3381, + "step": 4900000 + }, + { + "epoch": 2.94, + "eval_loss": 0.33070382475852966, + "eval_runtime": 1120.8105, + "eval_samples_per_second": 469.946, + "eval_steps_per_second": 78.325, + "step": 4900000 + }, + { + "epoch": 2.94, + "learning_rate": 3.546813692255447e-05, + "loss": 0.327, + "step": 4900500 + }, + { + "epoch": 2.94, + "learning_rate": 3.5466036956993905e-05, + "loss": 0.3382, + "step": 4901000 + }, + { + "epoch": 2.94, + "learning_rate": 3.546393699143334e-05, + "loss": 0.3389, + "step": 4901500 + }, + { + "epoch": 2.94, + "learning_rate": 3.546183702587278e-05, + "loss": 0.3382, + "step": 4902000 + }, + { + "epoch": 2.94, + "learning_rate": 3.545973706031221e-05, + "loss": 0.3503, + "step": 4902500 + }, + { + "epoch": 2.94, + "learning_rate": 3.5457641294682765e-05, + "loss": 0.3299, + "step": 4903000 + }, + { + "epoch": 2.94, + "learning_rate": 3.54555413291222e-05, + "loss": 0.3327, + "step": 4903500 + }, + { + "epoch": 2.94, + "learning_rate": 3.545344136356164e-05, + "loss": 0.3377, + "step": 4904000 + }, + { + "epoch": 2.94, + "learning_rate": 3.545134139800107e-05, + "loss": 0.3366, + "step": 4904500 + }, + { + "epoch": 2.94, + "learning_rate": 3.5449245632371626e-05, + "loss": 0.3396, + "step": 4905000 + }, + { + "epoch": 2.94, + "learning_rate": 3.544714566681106e-05, + "loss": 0.3399, + "step": 4905500 + }, + { + "epoch": 2.94, + "learning_rate": 3.54450457012505e-05, + "loss": 0.334, + "step": 4906000 + }, + { + "epoch": 2.94, + "learning_rate": 3.544294573568993e-05, + "loss": 0.3395, + "step": 4906500 + }, + { + "epoch": 2.94, + "learning_rate": 3.544084997006049e-05, + "loss": 0.3378, + "step": 4907000 + }, + { + "epoch": 2.94, + "learning_rate": 3.543875000449993e-05, + "loss": 0.3379, + "step": 4907500 + }, + { + "epoch": 2.94, + "learning_rate": 3.543665003893936e-05, + "loss": 0.3453, + "step": 4908000 + }, + { + "epoch": 2.94, + "learning_rate": 3.543455427330992e-05, + "loss": 0.3405, + "step": 4908500 + }, + { + "epoch": 2.94, + "learning_rate": 3.543245430774935e-05, + "loss": 0.3434, + "step": 4909000 + }, + { + "epoch": 2.94, + "learning_rate": 3.543035434218879e-05, + "loss": 0.346, + "step": 4909500 + }, + { + "epoch": 2.94, + "learning_rate": 3.542825437662822e-05, + "loss": 0.348, + "step": 4910000 + }, + { + "epoch": 2.94, + "learning_rate": 3.5426154411067655e-05, + "loss": 0.3419, + "step": 4910500 + }, + { + "epoch": 2.94, + "learning_rate": 3.5424054445507095e-05, + "loss": 0.3433, + "step": 4911000 + }, + { + "epoch": 2.94, + "learning_rate": 3.542195447994653e-05, + "loss": 0.3459, + "step": 4911500 + }, + { + "epoch": 2.94, + "learning_rate": 3.541985451438596e-05, + "loss": 0.3413, + "step": 4912000 + }, + { + "epoch": 2.95, + "learning_rate": 3.5417758748756515e-05, + "loss": 0.3429, + "step": 4912500 + }, + { + "epoch": 2.95, + "learning_rate": 3.5415658783195956e-05, + "loss": 0.3298, + "step": 4913000 + }, + { + "epoch": 2.95, + "learning_rate": 3.541355881763539e-05, + "loss": 0.3439, + "step": 4913500 + }, + { + "epoch": 2.95, + "learning_rate": 3.541145885207482e-05, + "loss": 0.3358, + "step": 4914000 + }, + { + "epoch": 2.95, + "learning_rate": 3.540936308644538e-05, + "loss": 0.3502, + "step": 4914500 + }, + { + "epoch": 2.95, + "learning_rate": 3.540726732081594e-05, + "loss": 0.3396, + "step": 4915000 + }, + { + "epoch": 2.95, + "learning_rate": 3.540516735525538e-05, + "loss": 0.3483, + "step": 4915500 + }, + { + "epoch": 2.95, + "learning_rate": 3.5403067389694803e-05, + "loss": 0.3446, + "step": 4916000 + }, + { + "epoch": 2.95, + "learning_rate": 3.5400967424134244e-05, + "loss": 0.3304, + "step": 4916500 + }, + { + "epoch": 2.95, + "learning_rate": 3.539886745857368e-05, + "loss": 0.3416, + "step": 4917000 + }, + { + "epoch": 2.95, + "learning_rate": 3.539676749301311e-05, + "loss": 0.3342, + "step": 4917500 + }, + { + "epoch": 2.95, + "learning_rate": 3.539466752745255e-05, + "loss": 0.3345, + "step": 4918000 + }, + { + "epoch": 2.95, + "learning_rate": 3.5392567561891984e-05, + "loss": 0.3381, + "step": 4918500 + }, + { + "epoch": 2.95, + "learning_rate": 3.539047179626254e-05, + "loss": 0.3403, + "step": 4919000 + }, + { + "epoch": 2.95, + "learning_rate": 3.538837183070197e-05, + "loss": 0.3424, + "step": 4919500 + }, + { + "epoch": 2.95, + "learning_rate": 3.538627606507253e-05, + "loss": 0.3378, + "step": 4920000 + }, + { + "epoch": 2.95, + "learning_rate": 3.5384176099511965e-05, + "loss": 0.3363, + "step": 4920500 + }, + { + "epoch": 2.95, + "learning_rate": 3.5382080333882525e-05, + "loss": 0.3503, + "step": 4921000 + }, + { + "epoch": 2.95, + "learning_rate": 3.537998036832196e-05, + "loss": 0.3405, + "step": 4921500 + }, + { + "epoch": 2.95, + "learning_rate": 3.53778804027614e-05, + "loss": 0.3463, + "step": 4922000 + }, + { + "epoch": 2.95, + "learning_rate": 3.537578043720083e-05, + "loss": 0.3484, + "step": 4922500 + }, + { + "epoch": 2.95, + "learning_rate": 3.537368047164026e-05, + "loss": 0.3401, + "step": 4923000 + }, + { + "epoch": 2.95, + "learning_rate": 3.53715805060797e-05, + "loss": 0.3415, + "step": 4923500 + }, + { + "epoch": 2.95, + "learning_rate": 3.536948054051913e-05, + "loss": 0.3375, + "step": 4924000 + }, + { + "epoch": 2.95, + "learning_rate": 3.5367380574958566e-05, + "loss": 0.3402, + "step": 4924500 + }, + { + "epoch": 2.95, + "learning_rate": 3.536528060939801e-05, + "loss": 0.3355, + "step": 4925000 + }, + { + "epoch": 2.95, + "learning_rate": 3.536318064383744e-05, + "loss": 0.3415, + "step": 4925500 + }, + { + "epoch": 2.95, + "learning_rate": 3.5361084878207994e-05, + "loss": 0.3419, + "step": 4926000 + }, + { + "epoch": 2.95, + "learning_rate": 3.535898491264743e-05, + "loss": 0.336, + "step": 4926500 + }, + { + "epoch": 2.95, + "learning_rate": 3.535688494708687e-05, + "loss": 0.3362, + "step": 4927000 + }, + { + "epoch": 2.95, + "learning_rate": 3.53547849815263e-05, + "loss": 0.3372, + "step": 4927500 + }, + { + "epoch": 2.95, + "learning_rate": 3.5352685015965734e-05, + "loss": 0.3377, + "step": 4928000 + }, + { + "epoch": 2.95, + "learning_rate": 3.5350585050405175e-05, + "loss": 0.3318, + "step": 4928500 + }, + { + "epoch": 2.96, + "learning_rate": 3.534848508484461e-05, + "loss": 0.3323, + "step": 4929000 + }, + { + "epoch": 2.96, + "learning_rate": 3.534638931921516e-05, + "loss": 0.3455, + "step": 4929500 + }, + { + "epoch": 2.96, + "learning_rate": 3.53442893536546e-05, + "loss": 0.3429, + "step": 4930000 + }, + { + "epoch": 2.96, + "learning_rate": 3.5342193588025155e-05, + "loss": 0.3481, + "step": 4930500 + }, + { + "epoch": 2.96, + "learning_rate": 3.534009362246459e-05, + "loss": 0.3416, + "step": 4931000 + }, + { + "epoch": 2.96, + "learning_rate": 3.533799365690402e-05, + "loss": 0.342, + "step": 4931500 + }, + { + "epoch": 2.96, + "learning_rate": 3.533589369134346e-05, + "loss": 0.3406, + "step": 4932000 + }, + { + "epoch": 2.96, + "learning_rate": 3.5333793725782896e-05, + "loss": 0.3396, + "step": 4932500 + }, + { + "epoch": 2.96, + "learning_rate": 3.533169376022233e-05, + "loss": 0.3386, + "step": 4933000 + }, + { + "epoch": 2.96, + "learning_rate": 3.532959379466177e-05, + "loss": 0.3375, + "step": 4933500 + }, + { + "epoch": 2.96, + "learning_rate": 3.53274938291012e-05, + "loss": 0.3446, + "step": 4934000 + }, + { + "epoch": 2.96, + "learning_rate": 3.532539386354064e-05, + "loss": 0.3285, + "step": 4934500 + }, + { + "epoch": 2.96, + "learning_rate": 3.532329389798008e-05, + "loss": 0.3417, + "step": 4935000 + }, + { + "epoch": 2.96, + "learning_rate": 3.532119813235063e-05, + "loss": 0.3459, + "step": 4935500 + }, + { + "epoch": 2.96, + "learning_rate": 3.5319098166790064e-05, + "loss": 0.3421, + "step": 4936000 + }, + { + "epoch": 2.96, + "learning_rate": 3.5316998201229504e-05, + "loss": 0.3377, + "step": 4936500 + }, + { + "epoch": 2.96, + "learning_rate": 3.531490243560006e-05, + "loss": 0.3378, + "step": 4937000 + }, + { + "epoch": 2.96, + "learning_rate": 3.531280247003949e-05, + "loss": 0.3428, + "step": 4937500 + }, + { + "epoch": 2.96, + "learning_rate": 3.5310702504478925e-05, + "loss": 0.3383, + "step": 4938000 + }, + { + "epoch": 2.96, + "learning_rate": 3.5308602538918365e-05, + "loss": 0.335, + "step": 4938500 + }, + { + "epoch": 2.96, + "learning_rate": 3.530650677328892e-05, + "loss": 0.3365, + "step": 4939000 + }, + { + "epoch": 2.96, + "learning_rate": 3.530440680772835e-05, + "loss": 0.345, + "step": 4939500 + }, + { + "epoch": 2.96, + "learning_rate": 3.5302306842167785e-05, + "loss": 0.3345, + "step": 4940000 + }, + { + "epoch": 2.96, + "learning_rate": 3.5300206876607226e-05, + "loss": 0.3406, + "step": 4940500 + }, + { + "epoch": 2.96, + "learning_rate": 3.529810691104666e-05, + "loss": 0.3419, + "step": 4941000 + }, + { + "epoch": 2.96, + "learning_rate": 3.529600694548609e-05, + "loss": 0.3371, + "step": 4941500 + }, + { + "epoch": 2.96, + "learning_rate": 3.529390697992553e-05, + "loss": 0.3398, + "step": 4942000 + }, + { + "epoch": 2.96, + "learning_rate": 3.5291807014364966e-05, + "loss": 0.3468, + "step": 4942500 + }, + { + "epoch": 2.96, + "learning_rate": 3.52897070488044e-05, + "loss": 0.3357, + "step": 4943000 + }, + { + "epoch": 2.96, + "learning_rate": 3.528760708324383e-05, + "loss": 0.3402, + "step": 4943500 + }, + { + "epoch": 2.96, + "learning_rate": 3.5285507117683267e-05, + "loss": 0.3426, + "step": 4944000 + }, + { + "epoch": 2.96, + "learning_rate": 3.528340715212271e-05, + "loss": 0.3402, + "step": 4944500 + }, + { + "epoch": 2.96, + "learning_rate": 3.528130718656214e-05, + "loss": 0.3364, + "step": 4945000 + }, + { + "epoch": 2.97, + "learning_rate": 3.5279211420932694e-05, + "loss": 0.3338, + "step": 4945500 + }, + { + "epoch": 2.97, + "learning_rate": 3.527711145537213e-05, + "loss": 0.3345, + "step": 4946000 + }, + { + "epoch": 2.97, + "learning_rate": 3.527501148981157e-05, + "loss": 0.3329, + "step": 4946500 + }, + { + "epoch": 2.97, + "learning_rate": 3.5272911524251e-05, + "loss": 0.352, + "step": 4947000 + }, + { + "epoch": 2.97, + "learning_rate": 3.5270811558690434e-05, + "loss": 0.3372, + "step": 4947500 + }, + { + "epoch": 2.97, + "learning_rate": 3.5268711593129875e-05, + "loss": 0.3426, + "step": 4948000 + }, + { + "epoch": 2.97, + "learning_rate": 3.526661162756931e-05, + "loss": 0.3414, + "step": 4948500 + }, + { + "epoch": 2.97, + "learning_rate": 3.526451586193986e-05, + "loss": 0.3407, + "step": 4949000 + }, + { + "epoch": 2.97, + "learning_rate": 3.5262415896379295e-05, + "loss": 0.3369, + "step": 4949500 + }, + { + "epoch": 2.97, + "learning_rate": 3.5260315930818735e-05, + "loss": 0.3492, + "step": 4950000 + }, + { + "epoch": 2.97, + "learning_rate": 3.525821596525817e-05, + "loss": 0.3453, + "step": 4950500 + }, + { + "epoch": 2.97, + "learning_rate": 3.52561159996976e-05, + "loss": 0.3446, + "step": 4951000 + }, + { + "epoch": 2.97, + "learning_rate": 3.525402023406816e-05, + "loss": 0.3377, + "step": 4951500 + }, + { + "epoch": 2.97, + "learning_rate": 3.5251920268507596e-05, + "loss": 0.3498, + "step": 4952000 + }, + { + "epoch": 2.97, + "learning_rate": 3.524982030294703e-05, + "loss": 0.3407, + "step": 4952500 + }, + { + "epoch": 2.97, + "learning_rate": 3.524772033738647e-05, + "loss": 0.3436, + "step": 4953000 + }, + { + "epoch": 2.97, + "learning_rate": 3.52456203718259e-05, + "loss": 0.3488, + "step": 4953500 + }, + { + "epoch": 2.97, + "learning_rate": 3.524352040626534e-05, + "loss": 0.3342, + "step": 4954000 + }, + { + "epoch": 2.97, + "learning_rate": 3.524142044070478e-05, + "loss": 0.3326, + "step": 4954500 + }, + { + "epoch": 2.97, + "learning_rate": 3.523932047514421e-05, + "loss": 0.3393, + "step": 4955000 + }, + { + "epoch": 2.97, + "learning_rate": 3.5237224709514764e-05, + "loss": 0.3517, + "step": 4955500 + }, + { + "epoch": 2.97, + "learning_rate": 3.52351247439542e-05, + "loss": 0.3416, + "step": 4956000 + }, + { + "epoch": 2.97, + "learning_rate": 3.523302477839364e-05, + "loss": 0.3374, + "step": 4956500 + }, + { + "epoch": 2.97, + "learning_rate": 3.523092481283307e-05, + "loss": 0.3432, + "step": 4957000 + }, + { + "epoch": 2.97, + "learning_rate": 3.5228829047203625e-05, + "loss": 0.3443, + "step": 4957500 + }, + { + "epoch": 2.97, + "learning_rate": 3.522672908164306e-05, + "loss": 0.3434, + "step": 4958000 + }, + { + "epoch": 2.97, + "learning_rate": 3.52246291160825e-05, + "loss": 0.3468, + "step": 4958500 + }, + { + "epoch": 2.97, + "learning_rate": 3.522252915052193e-05, + "loss": 0.3441, + "step": 4959000 + }, + { + "epoch": 2.97, + "learning_rate": 3.5220429184961365e-05, + "loss": 0.3469, + "step": 4959500 + }, + { + "epoch": 2.97, + "learning_rate": 3.5218329219400806e-05, + "loss": 0.3362, + "step": 4960000 + }, + { + "epoch": 2.97, + "learning_rate": 3.521623765370248e-05, + "loss": 0.3374, + "step": 4960500 + }, + { + "epoch": 2.97, + "learning_rate": 3.521413768814191e-05, + "loss": 0.3464, + "step": 4961000 + }, + { + "epoch": 2.97, + "learning_rate": 3.5212037722581346e-05, + "loss": 0.3423, + "step": 4961500 + }, + { + "epoch": 2.97, + "learning_rate": 3.5209937757020786e-05, + "loss": 0.3357, + "step": 4962000 + }, + { + "epoch": 2.98, + "learning_rate": 3.520783779146022e-05, + "loss": 0.3522, + "step": 4962500 + }, + { + "epoch": 2.98, + "learning_rate": 3.520573782589965e-05, + "loss": 0.3466, + "step": 4963000 + }, + { + "epoch": 2.98, + "learning_rate": 3.5203637860339094e-05, + "loss": 0.3349, + "step": 4963500 + }, + { + "epoch": 2.98, + "learning_rate": 3.520153789477853e-05, + "loss": 0.3423, + "step": 4964000 + }, + { + "epoch": 2.98, + "learning_rate": 3.519943792921796e-05, + "loss": 0.3379, + "step": 4964500 + }, + { + "epoch": 2.98, + "learning_rate": 3.5197337963657394e-05, + "loss": 0.3344, + "step": 4965000 + }, + { + "epoch": 2.98, + "learning_rate": 3.5195242198027954e-05, + "loss": 0.3374, + "step": 4965500 + }, + { + "epoch": 2.98, + "learning_rate": 3.519314223246739e-05, + "loss": 0.3415, + "step": 4966000 + }, + { + "epoch": 2.98, + "learning_rate": 3.519104226690682e-05, + "loss": 0.3484, + "step": 4966500 + }, + { + "epoch": 2.98, + "learning_rate": 3.518894650127738e-05, + "loss": 0.3384, + "step": 4967000 + }, + { + "epoch": 2.98, + "learning_rate": 3.5186846535716815e-05, + "loss": 0.3333, + "step": 4967500 + }, + { + "epoch": 2.98, + "learning_rate": 3.518474657015625e-05, + "loss": 0.3452, + "step": 4968000 + }, + { + "epoch": 2.98, + "learning_rate": 3.518264660459569e-05, + "loss": 0.335, + "step": 4968500 + }, + { + "epoch": 2.98, + "learning_rate": 3.518054663903512e-05, + "loss": 0.331, + "step": 4969000 + }, + { + "epoch": 2.98, + "learning_rate": 3.5178446673474556e-05, + "loss": 0.3386, + "step": 4969500 + }, + { + "epoch": 2.98, + "learning_rate": 3.517634670791399e-05, + "loss": 0.3435, + "step": 4970000 + }, + { + "epoch": 2.98, + "learning_rate": 3.517425094228455e-05, + "loss": 0.3357, + "step": 4970500 + }, + { + "epoch": 2.98, + "learning_rate": 3.517215097672398e-05, + "loss": 0.3327, + "step": 4971000 + }, + { + "epoch": 2.98, + "learning_rate": 3.5170051011163416e-05, + "loss": 0.3366, + "step": 4971500 + }, + { + "epoch": 2.98, + "learning_rate": 3.516795104560285e-05, + "loss": 0.335, + "step": 4972000 + }, + { + "epoch": 2.98, + "learning_rate": 3.516585108004228e-05, + "loss": 0.3436, + "step": 4972500 + }, + { + "epoch": 2.98, + "learning_rate": 3.516375111448172e-05, + "loss": 0.3455, + "step": 4973000 + }, + { + "epoch": 2.98, + "learning_rate": 3.516165114892116e-05, + "loss": 0.3414, + "step": 4973500 + }, + { + "epoch": 2.98, + "learning_rate": 3.515955118336059e-05, + "loss": 0.3387, + "step": 4974000 + }, + { + "epoch": 2.98, + "learning_rate": 3.5157451217800024e-05, + "loss": 0.3313, + "step": 4974500 + }, + { + "epoch": 2.98, + "learning_rate": 3.5155351252239464e-05, + "loss": 0.3492, + "step": 4975000 + }, + { + "epoch": 2.98, + "learning_rate": 3.515325548661002e-05, + "loss": 0.3413, + "step": 4975500 + }, + { + "epoch": 2.98, + "learning_rate": 3.515115552104945e-05, + "loss": 0.3388, + "step": 4976000 + }, + { + "epoch": 2.98, + "learning_rate": 3.514905555548889e-05, + "loss": 0.3426, + "step": 4976500 + }, + { + "epoch": 2.98, + "learning_rate": 3.5146955589928325e-05, + "loss": 0.3387, + "step": 4977000 + }, + { + "epoch": 2.98, + "learning_rate": 3.514485562436776e-05, + "loss": 0.3503, + "step": 4977500 + }, + { + "epoch": 2.98, + "learning_rate": 3.514275985873831e-05, + "loss": 0.3432, + "step": 4978000 + }, + { + "epoch": 2.98, + "learning_rate": 3.514065989317775e-05, + "loss": 0.3406, + "step": 4978500 + }, + { + "epoch": 2.99, + "learning_rate": 3.5138559927617186e-05, + "loss": 0.3413, + "step": 4979000 + }, + { + "epoch": 2.99, + "learning_rate": 3.513645996205662e-05, + "loss": 0.3396, + "step": 4979500 + }, + { + "epoch": 2.99, + "learning_rate": 3.513436419642717e-05, + "loss": 0.3439, + "step": 4980000 + }, + { + "epoch": 2.99, + "learning_rate": 3.513226423086661e-05, + "loss": 0.331, + "step": 4980500 + }, + { + "epoch": 2.99, + "learning_rate": 3.5130164265306046e-05, + "loss": 0.3443, + "step": 4981000 + }, + { + "epoch": 2.99, + "learning_rate": 3.512806429974548e-05, + "loss": 0.3364, + "step": 4981500 + }, + { + "epoch": 2.99, + "learning_rate": 3.512596433418492e-05, + "loss": 0.3305, + "step": 4982000 + }, + { + "epoch": 2.99, + "learning_rate": 3.5123864368624354e-05, + "loss": 0.3379, + "step": 4982500 + }, + { + "epoch": 2.99, + "learning_rate": 3.5121764403063794e-05, + "loss": 0.3414, + "step": 4983000 + }, + { + "epoch": 2.99, + "learning_rate": 3.511966443750323e-05, + "loss": 0.3357, + "step": 4983500 + }, + { + "epoch": 2.99, + "learning_rate": 3.511756867187378e-05, + "loss": 0.3426, + "step": 4984000 + }, + { + "epoch": 2.99, + "learning_rate": 3.5115468706313214e-05, + "loss": 0.3472, + "step": 4984500 + }, + { + "epoch": 2.99, + "learning_rate": 3.5113368740752654e-05, + "loss": 0.3371, + "step": 4985000 + }, + { + "epoch": 2.99, + "learning_rate": 3.511126877519209e-05, + "loss": 0.3346, + "step": 4985500 + }, + { + "epoch": 2.99, + "learning_rate": 3.510917300956264e-05, + "loss": 0.3419, + "step": 4986000 + }, + { + "epoch": 2.99, + "learning_rate": 3.5107073044002075e-05, + "loss": 0.3479, + "step": 4986500 + }, + { + "epoch": 2.99, + "learning_rate": 3.5104973078441515e-05, + "loss": 0.3433, + "step": 4987000 + }, + { + "epoch": 2.99, + "learning_rate": 3.510287731281207e-05, + "loss": 0.3569, + "step": 4987500 + }, + { + "epoch": 2.99, + "learning_rate": 3.51007773472515e-05, + "loss": 0.3461, + "step": 4988000 + }, + { + "epoch": 2.99, + "learning_rate": 3.509867738169094e-05, + "loss": 0.3421, + "step": 4988500 + }, + { + "epoch": 2.99, + "learning_rate": 3.5096577416130376e-05, + "loss": 0.3359, + "step": 4989000 + }, + { + "epoch": 2.99, + "learning_rate": 3.509447745056981e-05, + "loss": 0.3415, + "step": 4989500 + }, + { + "epoch": 2.99, + "learning_rate": 3.509237748500925e-05, + "loss": 0.3485, + "step": 4990000 + }, + { + "epoch": 2.99, + "learning_rate": 3.509027751944868e-05, + "loss": 0.3349, + "step": 4990500 + }, + { + "epoch": 2.99, + "learning_rate": 3.5088177553888117e-05, + "loss": 0.3505, + "step": 4991000 + }, + { + "epoch": 2.99, + "learning_rate": 3.508608178825867e-05, + "loss": 0.341, + "step": 4991500 + }, + { + "epoch": 2.99, + "learning_rate": 3.508398182269811e-05, + "loss": 0.3331, + "step": 4992000 + }, + { + "epoch": 2.99, + "learning_rate": 3.5081881857137544e-05, + "loss": 0.3475, + "step": 4992500 + }, + { + "epoch": 2.99, + "learning_rate": 3.50797860915081e-05, + "loss": 0.3364, + "step": 4993000 + }, + { + "epoch": 2.99, + "learning_rate": 3.507768612594753e-05, + "loss": 0.3375, + "step": 4993500 + }, + { + "epoch": 2.99, + "learning_rate": 3.507558616038697e-05, + "loss": 0.3433, + "step": 4994000 + }, + { + "epoch": 2.99, + "learning_rate": 3.5073490394757525e-05, + "loss": 0.3485, + "step": 4994500 + }, + { + "epoch": 2.99, + "learning_rate": 3.507139042919696e-05, + "loss": 0.3566, + "step": 4995000 + }, + { + "epoch": 3.0, + "learning_rate": 3.50692904636364e-05, + "loss": 0.3505, + "step": 4995500 + }, + { + "epoch": 3.0, + "learning_rate": 3.506719469800695e-05, + "loss": 0.336, + "step": 4996000 + }, + { + "epoch": 3.0, + "learning_rate": 3.5065094732446385e-05, + "loss": 0.3339, + "step": 4996500 + }, + { + "epoch": 3.0, + "learning_rate": 3.506299476688582e-05, + "loss": 0.3442, + "step": 4997000 + }, + { + "epoch": 3.0, + "learning_rate": 3.506089480132526e-05, + "loss": 0.3425, + "step": 4997500 + }, + { + "epoch": 3.0, + "learning_rate": 3.505879483576469e-05, + "loss": 0.3285, + "step": 4998000 + }, + { + "epoch": 3.0, + "learning_rate": 3.5056694870204126e-05, + "loss": 0.3422, + "step": 4998500 + }, + { + "epoch": 3.0, + "learning_rate": 3.5054594904643566e-05, + "loss": 0.3358, + "step": 4999000 + }, + { + "epoch": 3.0, + "learning_rate": 3.5052494939083e-05, + "loss": 0.3369, + "step": 4999500 + }, + { + "epoch": 3.0, + "learning_rate": 3.505039497352243e-05, + "loss": 0.339, + "step": 5000000 + }, + { + "epoch": 3.0, + "eval_loss": 0.3291718661785126, + "eval_runtime": 1121.0384, + "eval_samples_per_second": 469.85, + "eval_steps_per_second": 78.309, + "step": 5000000 + }, + { + "epoch": 3.0, + "learning_rate": 3.504829500796187e-05, + "loss": 0.3354, + "step": 5000500 + }, + { + "epoch": 3.0, + "learning_rate": 3.504619504240131e-05, + "loss": 0.3497, + "step": 5001000 + }, + { + "epoch": 3.0, + "learning_rate": 3.5044095076840734e-05, + "loss": 0.3456, + "step": 5001500 + }, + { + "epoch": 3.0, + "learning_rate": 3.5041995111280174e-05, + "loss": 0.3401, + "step": 5002000 + }, + { + "epoch": 3.0, + "learning_rate": 3.5039899345650734e-05, + "loss": 0.3412, + "step": 5002500 + }, + { + "epoch": 3.0, + "learning_rate": 3.503779938009017e-05, + "loss": 0.3382, + "step": 5003000 + }, + { + "epoch": 3.0, + "learning_rate": 3.50356994145296e-05, + "loss": 0.3387, + "step": 5003500 + }, + { + "epoch": 3.0, + "learning_rate": 3.5033599448969034e-05, + "loss": 0.3377, + "step": 5004000 + }, + { + "epoch": 3.0, + "learning_rate": 3.503149948340847e-05, + "loss": 0.3226, + "step": 5004500 + }, + { + "epoch": 3.0, + "learning_rate": 3.502939951784791e-05, + "loss": 0.3159, + "step": 5005000 + }, + { + "epoch": 3.0, + "learning_rate": 3.502729955228734e-05, + "loss": 0.3247, + "step": 5005500 + }, + { + "epoch": 3.0, + "learning_rate": 3.5025199586726775e-05, + "loss": 0.3298, + "step": 5006000 + }, + { + "epoch": 3.0, + "learning_rate": 3.502310382109733e-05, + "loss": 0.3218, + "step": 5006500 + }, + { + "epoch": 3.0, + "learning_rate": 3.502100385553677e-05, + "loss": 0.3299, + "step": 5007000 + }, + { + "epoch": 3.0, + "learning_rate": 3.50189038899762e-05, + "loss": 0.3296, + "step": 5007500 + }, + { + "epoch": 3.0, + "learning_rate": 3.5016803924415636e-05, + "loss": 0.3237, + "step": 5008000 + }, + { + "epoch": 3.0, + "learning_rate": 3.501470815878619e-05, + "loss": 0.3258, + "step": 5008500 + }, + { + "epoch": 3.0, + "learning_rate": 3.501260819322563e-05, + "loss": 0.3215, + "step": 5009000 + }, + { + "epoch": 3.0, + "learning_rate": 3.501050822766506e-05, + "loss": 0.3223, + "step": 5009500 + }, + { + "epoch": 3.0, + "learning_rate": 3.5008408262104497e-05, + "loss": 0.3316, + "step": 5010000 + }, + { + "epoch": 3.0, + "learning_rate": 3.500631249647506e-05, + "loss": 0.3378, + "step": 5010500 + }, + { + "epoch": 3.0, + "learning_rate": 3.500421673084562e-05, + "loss": 0.3298, + "step": 5011000 + }, + { + "epoch": 3.0, + "learning_rate": 3.500211676528505e-05, + "loss": 0.3247, + "step": 5011500 + }, + { + "epoch": 3.0, + "learning_rate": 3.5000016799724484e-05, + "loss": 0.3306, + "step": 5012000 + }, + { + "epoch": 3.01, + "learning_rate": 3.4997916834163924e-05, + "loss": 0.3243, + "step": 5012500 + }, + { + "epoch": 3.01, + "learning_rate": 3.499581686860336e-05, + "loss": 0.3294, + "step": 5013000 + }, + { + "epoch": 3.01, + "learning_rate": 3.4993716903042785e-05, + "loss": 0.3299, + "step": 5013500 + }, + { + "epoch": 3.01, + "learning_rate": 3.4991616937482225e-05, + "loss": 0.3248, + "step": 5014000 + }, + { + "epoch": 3.01, + "learning_rate": 3.4989521171852785e-05, + "loss": 0.3295, + "step": 5014500 + }, + { + "epoch": 3.01, + "learning_rate": 3.498742120629222e-05, + "loss": 0.3232, + "step": 5015000 + }, + { + "epoch": 3.01, + "learning_rate": 3.498532124073165e-05, + "loss": 0.3285, + "step": 5015500 + }, + { + "epoch": 3.01, + "learning_rate": 3.4983221275171086e-05, + "loss": 0.3353, + "step": 5016000 + }, + { + "epoch": 3.01, + "learning_rate": 3.498112130961052e-05, + "loss": 0.3242, + "step": 5016500 + }, + { + "epoch": 3.01, + "learning_rate": 3.497902134404995e-05, + "loss": 0.3198, + "step": 5017000 + }, + { + "epoch": 3.01, + "learning_rate": 3.497692137848939e-05, + "loss": 0.3278, + "step": 5017500 + }, + { + "epoch": 3.01, + "learning_rate": 3.4974821412928826e-05, + "loss": 0.335, + "step": 5018000 + }, + { + "epoch": 3.01, + "learning_rate": 3.4972729847230507e-05, + "loss": 0.3249, + "step": 5018500 + }, + { + "epoch": 3.01, + "learning_rate": 3.497062988166994e-05, + "loss": 0.3348, + "step": 5019000 + }, + { + "epoch": 3.01, + "learning_rate": 3.496852991610938e-05, + "loss": 0.3334, + "step": 5019500 + }, + { + "epoch": 3.01, + "learning_rate": 3.4966429950548814e-05, + "loss": 0.3286, + "step": 5020000 + }, + { + "epoch": 3.01, + "learning_rate": 3.496432998498824e-05, + "loss": 0.3282, + "step": 5020500 + }, + { + "epoch": 3.01, + "learning_rate": 3.496223001942768e-05, + "loss": 0.3286, + "step": 5021000 + }, + { + "epoch": 3.01, + "learning_rate": 3.4960130053867114e-05, + "loss": 0.3361, + "step": 5021500 + }, + { + "epoch": 3.01, + "learning_rate": 3.495803008830655e-05, + "loss": 0.3342, + "step": 5022000 + }, + { + "epoch": 3.01, + "learning_rate": 3.495593852260823e-05, + "loss": 0.3323, + "step": 5022500 + }, + { + "epoch": 3.01, + "learning_rate": 3.495383855704766e-05, + "loss": 0.3262, + "step": 5023000 + }, + { + "epoch": 3.01, + "learning_rate": 3.49517385914871e-05, + "loss": 0.3307, + "step": 5023500 + }, + { + "epoch": 3.01, + "learning_rate": 3.4949638625926535e-05, + "loss": 0.3244, + "step": 5024000 + }, + { + "epoch": 3.01, + "learning_rate": 3.494753866036597e-05, + "loss": 0.3311, + "step": 5024500 + }, + { + "epoch": 3.01, + "learning_rate": 3.494543869480541e-05, + "loss": 0.3233, + "step": 5025000 + }, + { + "epoch": 3.01, + "learning_rate": 3.4943338729244836e-05, + "loss": 0.3299, + "step": 5025500 + }, + { + "epoch": 3.01, + "learning_rate": 3.4941238763684276e-05, + "loss": 0.3227, + "step": 5026000 + }, + { + "epoch": 3.01, + "learning_rate": 3.493913879812371e-05, + "loss": 0.3236, + "step": 5026500 + }, + { + "epoch": 3.01, + "learning_rate": 3.493703883256314e-05, + "loss": 0.3307, + "step": 5027000 + }, + { + "epoch": 3.01, + "learning_rate": 3.493493886700258e-05, + "loss": 0.3275, + "step": 5027500 + }, + { + "epoch": 3.01, + "learning_rate": 3.4932843101373137e-05, + "loss": 0.3258, + "step": 5028000 + }, + { + "epoch": 3.01, + "learning_rate": 3.493074313581257e-05, + "loss": 0.3259, + "step": 5028500 + }, + { + "epoch": 3.02, + "learning_rate": 3.4928643170252003e-05, + "loss": 0.3347, + "step": 5029000 + }, + { + "epoch": 3.02, + "learning_rate": 3.4926543204691444e-05, + "loss": 0.3265, + "step": 5029500 + }, + { + "epoch": 3.02, + "learning_rate": 3.492444323913088e-05, + "loss": 0.3352, + "step": 5030000 + }, + { + "epoch": 3.02, + "learning_rate": 3.492234327357031e-05, + "loss": 0.3329, + "step": 5030500 + }, + { + "epoch": 3.02, + "learning_rate": 3.4920247507940864e-05, + "loss": 0.3327, + "step": 5031000 + }, + { + "epoch": 3.02, + "learning_rate": 3.4918147542380304e-05, + "loss": 0.3255, + "step": 5031500 + }, + { + "epoch": 3.02, + "learning_rate": 3.491604757681974e-05, + "loss": 0.3231, + "step": 5032000 + }, + { + "epoch": 3.02, + "learning_rate": 3.491394761125917e-05, + "loss": 0.3218, + "step": 5032500 + }, + { + "epoch": 3.02, + "learning_rate": 3.491184764569861e-05, + "loss": 0.3368, + "step": 5033000 + }, + { + "epoch": 3.02, + "learning_rate": 3.4909747680138045e-05, + "loss": 0.3366, + "step": 5033500 + }, + { + "epoch": 3.02, + "learning_rate": 3.490764771457748e-05, + "loss": 0.3219, + "step": 5034000 + }, + { + "epoch": 3.02, + "learning_rate": 3.490554774901692e-05, + "loss": 0.3354, + "step": 5034500 + }, + { + "epoch": 3.02, + "learning_rate": 3.490345198338747e-05, + "loss": 0.3237, + "step": 5035000 + }, + { + "epoch": 3.02, + "learning_rate": 3.4901352017826906e-05, + "loss": 0.3323, + "step": 5035500 + }, + { + "epoch": 3.02, + "learning_rate": 3.4899252052266346e-05, + "loss": 0.3282, + "step": 5036000 + }, + { + "epoch": 3.02, + "learning_rate": 3.489715208670578e-05, + "loss": 0.3363, + "step": 5036500 + }, + { + "epoch": 3.02, + "learning_rate": 3.489506052100746e-05, + "loss": 0.3293, + "step": 5037000 + }, + { + "epoch": 3.02, + "learning_rate": 3.4892960555446887e-05, + "loss": 0.3259, + "step": 5037500 + }, + { + "epoch": 3.02, + "learning_rate": 3.489086058988632e-05, + "loss": 0.3309, + "step": 5038000 + }, + { + "epoch": 3.02, + "learning_rate": 3.488876062432576e-05, + "loss": 0.328, + "step": 5038500 + }, + { + "epoch": 3.02, + "learning_rate": 3.4886660658765194e-05, + "loss": 0.3307, + "step": 5039000 + }, + { + "epoch": 3.02, + "learning_rate": 3.488456069320463e-05, + "loss": 0.3284, + "step": 5039500 + }, + { + "epoch": 3.02, + "learning_rate": 3.488246072764407e-05, + "loss": 0.3264, + "step": 5040000 + }, + { + "epoch": 3.02, + "learning_rate": 3.488036496201462e-05, + "loss": 0.3313, + "step": 5040500 + }, + { + "epoch": 3.02, + "learning_rate": 3.4878264996454054e-05, + "loss": 0.3401, + "step": 5041000 + }, + { + "epoch": 3.02, + "learning_rate": 3.4876165030893495e-05, + "loss": 0.3253, + "step": 5041500 + }, + { + "epoch": 3.02, + "learning_rate": 3.487406506533293e-05, + "loss": 0.3268, + "step": 5042000 + }, + { + "epoch": 3.02, + "learning_rate": 3.487196509977236e-05, + "loss": 0.337, + "step": 5042500 + }, + { + "epoch": 3.02, + "learning_rate": 3.48698651342118e-05, + "loss": 0.3267, + "step": 5043000 + }, + { + "epoch": 3.02, + "learning_rate": 3.4867769368582355e-05, + "loss": 0.334, + "step": 5043500 + }, + { + "epoch": 3.02, + "learning_rate": 3.486566940302179e-05, + "loss": 0.3344, + "step": 5044000 + }, + { + "epoch": 3.02, + "learning_rate": 3.486356943746122e-05, + "loss": 0.3336, + "step": 5044500 + }, + { + "epoch": 3.02, + "learning_rate": 3.486146947190066e-05, + "loss": 0.3268, + "step": 5045000 + }, + { + "epoch": 3.02, + "learning_rate": 3.4859369506340096e-05, + "loss": 0.332, + "step": 5045500 + }, + { + "epoch": 3.03, + "learning_rate": 3.485726954077953e-05, + "loss": 0.3367, + "step": 5046000 + }, + { + "epoch": 3.03, + "learning_rate": 3.485517377515008e-05, + "loss": 0.3396, + "step": 5046500 + }, + { + "epoch": 3.03, + "learning_rate": 3.485307380958952e-05, + "loss": 0.3163, + "step": 5047000 + }, + { + "epoch": 3.03, + "learning_rate": 3.485097384402896e-05, + "loss": 0.3205, + "step": 5047500 + }, + { + "epoch": 3.03, + "learning_rate": 3.48488738784684e-05, + "loss": 0.3348, + "step": 5048000 + }, + { + "epoch": 3.03, + "learning_rate": 3.484677391290783e-05, + "loss": 0.3291, + "step": 5048500 + }, + { + "epoch": 3.03, + "learning_rate": 3.4844678147278384e-05, + "loss": 0.3358, + "step": 5049000 + }, + { + "epoch": 3.03, + "learning_rate": 3.484257818171782e-05, + "loss": 0.3279, + "step": 5049500 + }, + { + "epoch": 3.03, + "learning_rate": 3.484047821615726e-05, + "loss": 0.33, + "step": 5050000 + }, + { + "epoch": 3.03, + "learning_rate": 3.483837825059669e-05, + "loss": 0.3322, + "step": 5050500 + }, + { + "epoch": 3.03, + "learning_rate": 3.4836278285036125e-05, + "loss": 0.3257, + "step": 5051000 + }, + { + "epoch": 3.03, + "learning_rate": 3.4834178319475565e-05, + "loss": 0.3285, + "step": 5051500 + }, + { + "epoch": 3.03, + "learning_rate": 3.4832078353915e-05, + "loss": 0.3279, + "step": 5052000 + }, + { + "epoch": 3.03, + "learning_rate": 3.4829978388354425e-05, + "loss": 0.3337, + "step": 5052500 + }, + { + "epoch": 3.03, + "learning_rate": 3.4827882622724985e-05, + "loss": 0.3342, + "step": 5053000 + }, + { + "epoch": 3.03, + "learning_rate": 3.4825786857095546e-05, + "loss": 0.3329, + "step": 5053500 + }, + { + "epoch": 3.03, + "learning_rate": 3.48236910914661e-05, + "loss": 0.3347, + "step": 5054000 + }, + { + "epoch": 3.03, + "learning_rate": 3.482159112590553e-05, + "loss": 0.3471, + "step": 5054500 + }, + { + "epoch": 3.03, + "learning_rate": 3.4819491160344966e-05, + "loss": 0.3342, + "step": 5055000 + }, + { + "epoch": 3.03, + "learning_rate": 3.4817391194784406e-05, + "loss": 0.3319, + "step": 5055500 + }, + { + "epoch": 3.03, + "learning_rate": 3.481529122922384e-05, + "loss": 0.3309, + "step": 5056000 + }, + { + "epoch": 3.03, + "learning_rate": 3.4813195463594394e-05, + "loss": 0.3352, + "step": 5056500 + }, + { + "epoch": 3.03, + "learning_rate": 3.481109549803383e-05, + "loss": 0.3312, + "step": 5057000 + }, + { + "epoch": 3.03, + "learning_rate": 3.480899553247327e-05, + "loss": 0.3415, + "step": 5057500 + }, + { + "epoch": 3.03, + "learning_rate": 3.48068955669127e-05, + "loss": 0.3257, + "step": 5058000 + }, + { + "epoch": 3.03, + "learning_rate": 3.4804795601352134e-05, + "loss": 0.3293, + "step": 5058500 + }, + { + "epoch": 3.03, + "learning_rate": 3.4802695635791574e-05, + "loss": 0.3276, + "step": 5059000 + }, + { + "epoch": 3.03, + "learning_rate": 3.480059567023101e-05, + "loss": 0.3295, + "step": 5059500 + }, + { + "epoch": 3.03, + "learning_rate": 3.479849570467044e-05, + "loss": 0.3289, + "step": 5060000 + }, + { + "epoch": 3.03, + "learning_rate": 3.4796399939041e-05, + "loss": 0.347, + "step": 5060500 + }, + { + "epoch": 3.03, + "learning_rate": 3.4794304173411555e-05, + "loss": 0.3209, + "step": 5061000 + }, + { + "epoch": 3.03, + "learning_rate": 3.479220420785099e-05, + "loss": 0.333, + "step": 5061500 + }, + { + "epoch": 3.03, + "learning_rate": 3.479010424229042e-05, + "loss": 0.3282, + "step": 5062000 + }, + { + "epoch": 3.04, + "learning_rate": 3.478800427672986e-05, + "loss": 0.3267, + "step": 5062500 + }, + { + "epoch": 3.04, + "learning_rate": 3.4785904311169296e-05, + "loss": 0.3295, + "step": 5063000 + }, + { + "epoch": 3.04, + "learning_rate": 3.478380434560873e-05, + "loss": 0.3319, + "step": 5063500 + }, + { + "epoch": 3.04, + "learning_rate": 3.478170438004817e-05, + "loss": 0.3282, + "step": 5064000 + }, + { + "epoch": 3.04, + "learning_rate": 3.47796044144876e-05, + "loss": 0.3236, + "step": 5064500 + }, + { + "epoch": 3.04, + "learning_rate": 3.4777504448927036e-05, + "loss": 0.3352, + "step": 5065000 + }, + { + "epoch": 3.04, + "learning_rate": 3.477540448336648e-05, + "loss": 0.3188, + "step": 5065500 + }, + { + "epoch": 3.04, + "learning_rate": 3.477330451780591e-05, + "loss": 0.3301, + "step": 5066000 + }, + { + "epoch": 3.04, + "learning_rate": 3.477120455224534e-05, + "loss": 0.3349, + "step": 5066500 + }, + { + "epoch": 3.04, + "learning_rate": 3.47691087866159e-05, + "loss": 0.328, + "step": 5067000 + }, + { + "epoch": 3.04, + "learning_rate": 3.476700882105534e-05, + "loss": 0.3226, + "step": 5067500 + }, + { + "epoch": 3.04, + "learning_rate": 3.476490885549477e-05, + "loss": 0.3353, + "step": 5068000 + }, + { + "epoch": 3.04, + "learning_rate": 3.4762808889934204e-05, + "loss": 0.3346, + "step": 5068500 + }, + { + "epoch": 3.04, + "learning_rate": 3.4760713124304765e-05, + "loss": 0.337, + "step": 5069000 + }, + { + "epoch": 3.04, + "learning_rate": 3.47586131587442e-05, + "loss": 0.3397, + "step": 5069500 + }, + { + "epoch": 3.04, + "learning_rate": 3.475651739311475e-05, + "loss": 0.3332, + "step": 5070000 + }, + { + "epoch": 3.04, + "learning_rate": 3.4754417427554185e-05, + "loss": 0.3317, + "step": 5070500 + }, + { + "epoch": 3.04, + "learning_rate": 3.4752317461993625e-05, + "loss": 0.3271, + "step": 5071000 + }, + { + "epoch": 3.04, + "learning_rate": 3.475021749643306e-05, + "loss": 0.3366, + "step": 5071500 + }, + { + "epoch": 3.04, + "learning_rate": 3.474811753087249e-05, + "loss": 0.3365, + "step": 5072000 + }, + { + "epoch": 3.04, + "learning_rate": 3.474601756531193e-05, + "loss": 0.3325, + "step": 5072500 + }, + { + "epoch": 3.04, + "learning_rate": 3.4743917599751366e-05, + "loss": 0.3296, + "step": 5073000 + }, + { + "epoch": 3.04, + "learning_rate": 3.47418176341908e-05, + "loss": 0.3237, + "step": 5073500 + }, + { + "epoch": 3.04, + "learning_rate": 3.473972186856135e-05, + "loss": 0.3341, + "step": 5074000 + }, + { + "epoch": 3.04, + "learning_rate": 3.473762190300079e-05, + "loss": 0.3386, + "step": 5074500 + }, + { + "epoch": 3.04, + "learning_rate": 3.473552193744023e-05, + "loss": 0.3288, + "step": 5075000 + }, + { + "epoch": 3.04, + "learning_rate": 3.473342197187966e-05, + "loss": 0.3325, + "step": 5075500 + }, + { + "epoch": 3.04, + "learning_rate": 3.473132620625022e-05, + "loss": 0.343, + "step": 5076000 + }, + { + "epoch": 3.04, + "learning_rate": 3.4729230440620774e-05, + "loss": 0.3255, + "step": 5076500 + }, + { + "epoch": 3.04, + "learning_rate": 3.472713047506021e-05, + "loss": 0.3264, + "step": 5077000 + }, + { + "epoch": 3.04, + "learning_rate": 3.472503050949964e-05, + "loss": 0.3223, + "step": 5077500 + }, + { + "epoch": 3.04, + "learning_rate": 3.472293054393908e-05, + "loss": 0.3276, + "step": 5078000 + }, + { + "epoch": 3.04, + "learning_rate": 3.4720830578378515e-05, + "loss": 0.3375, + "step": 5078500 + }, + { + "epoch": 3.05, + "learning_rate": 3.471873061281795e-05, + "loss": 0.3292, + "step": 5079000 + }, + { + "epoch": 3.05, + "learning_rate": 3.471663064725739e-05, + "loss": 0.328, + "step": 5079500 + }, + { + "epoch": 3.05, + "learning_rate": 3.471453488162794e-05, + "loss": 0.3288, + "step": 5080000 + }, + { + "epoch": 3.05, + "learning_rate": 3.4712434916067375e-05, + "loss": 0.3355, + "step": 5080500 + }, + { + "epoch": 3.05, + "learning_rate": 3.471033495050681e-05, + "loss": 0.3262, + "step": 5081000 + }, + { + "epoch": 3.05, + "learning_rate": 3.470823498494625e-05, + "loss": 0.3273, + "step": 5081500 + }, + { + "epoch": 3.05, + "learning_rate": 3.470613501938568e-05, + "loss": 0.3374, + "step": 5082000 + }, + { + "epoch": 3.05, + "learning_rate": 3.4704035053825116e-05, + "loss": 0.3287, + "step": 5082500 + }, + { + "epoch": 3.05, + "learning_rate": 3.4701939288195676e-05, + "loss": 0.325, + "step": 5083000 + }, + { + "epoch": 3.05, + "learning_rate": 3.469983932263511e-05, + "loss": 0.3318, + "step": 5083500 + }, + { + "epoch": 3.05, + "learning_rate": 3.469773935707454e-05, + "loss": 0.3299, + "step": 5084000 + }, + { + "epoch": 3.05, + "learning_rate": 3.4695639391513984e-05, + "loss": 0.3329, + "step": 5084500 + }, + { + "epoch": 3.05, + "learning_rate": 3.469353942595342e-05, + "loss": 0.3305, + "step": 5085000 + }, + { + "epoch": 3.05, + "learning_rate": 3.469143946039285e-05, + "loss": 0.3323, + "step": 5085500 + }, + { + "epoch": 3.05, + "learning_rate": 3.4689343694763404e-05, + "loss": 0.3317, + "step": 5086000 + }, + { + "epoch": 3.05, + "learning_rate": 3.4687243729202844e-05, + "loss": 0.3386, + "step": 5086500 + }, + { + "epoch": 3.05, + "learning_rate": 3.468514376364228e-05, + "loss": 0.326, + "step": 5087000 + }, + { + "epoch": 3.05, + "learning_rate": 3.468304379808171e-05, + "loss": 0.3346, + "step": 5087500 + }, + { + "epoch": 3.05, + "learning_rate": 3.4680943832521145e-05, + "loss": 0.3227, + "step": 5088000 + }, + { + "epoch": 3.05, + "learning_rate": 3.4678848066891705e-05, + "loss": 0.333, + "step": 5088500 + }, + { + "epoch": 3.05, + "learning_rate": 3.467674810133114e-05, + "loss": 0.3299, + "step": 5089000 + }, + { + "epoch": 3.05, + "learning_rate": 3.467464813577057e-05, + "loss": 0.3402, + "step": 5089500 + }, + { + "epoch": 3.05, + "learning_rate": 3.467254817021001e-05, + "loss": 0.329, + "step": 5090000 + }, + { + "epoch": 3.05, + "learning_rate": 3.467044820464944e-05, + "loss": 0.3281, + "step": 5090500 + }, + { + "epoch": 3.05, + "learning_rate": 3.466834823908888e-05, + "loss": 0.3306, + "step": 5091000 + }, + { + "epoch": 3.05, + "learning_rate": 3.466624827352831e-05, + "loss": 0.3321, + "step": 5091500 + }, + { + "epoch": 3.05, + "learning_rate": 3.4664148307967746e-05, + "loss": 0.3387, + "step": 5092000 + }, + { + "epoch": 3.05, + "learning_rate": 3.4662052542338306e-05, + "loss": 0.3323, + "step": 5092500 + }, + { + "epoch": 3.05, + "learning_rate": 3.465995257677774e-05, + "loss": 0.3401, + "step": 5093000 + }, + { + "epoch": 3.05, + "learning_rate": 3.46578568111483e-05, + "loss": 0.3251, + "step": 5093500 + }, + { + "epoch": 3.05, + "learning_rate": 3.4655756845587734e-05, + "loss": 0.3334, + "step": 5094000 + }, + { + "epoch": 3.05, + "learning_rate": 3.465365688002717e-05, + "loss": 0.3291, + "step": 5094500 + }, + { + "epoch": 3.05, + "learning_rate": 3.465155691446661e-05, + "loss": 0.3347, + "step": 5095000 + }, + { + "epoch": 3.05, + "learning_rate": 3.464946114883716e-05, + "loss": 0.3377, + "step": 5095500 + }, + { + "epoch": 3.06, + "learning_rate": 3.4647361183276594e-05, + "loss": 0.3306, + "step": 5096000 + }, + { + "epoch": 3.06, + "learning_rate": 3.464526121771603e-05, + "loss": 0.3291, + "step": 5096500 + }, + { + "epoch": 3.06, + "learning_rate": 3.464316125215547e-05, + "loss": 0.3283, + "step": 5097000 + }, + { + "epoch": 3.06, + "learning_rate": 3.4641061286594895e-05, + "loss": 0.333, + "step": 5097500 + }, + { + "epoch": 3.06, + "learning_rate": 3.4638961321034335e-05, + "loss": 0.3229, + "step": 5098000 + }, + { + "epoch": 3.06, + "learning_rate": 3.463686135547377e-05, + "loss": 0.3309, + "step": 5098500 + }, + { + "epoch": 3.06, + "learning_rate": 3.46347613899132e-05, + "loss": 0.3356, + "step": 5099000 + }, + { + "epoch": 3.06, + "learning_rate": 3.463266562428376e-05, + "loss": 0.3252, + "step": 5099500 + }, + { + "epoch": 3.06, + "learning_rate": 3.4630565658723196e-05, + "loss": 0.3226, + "step": 5100000 + }, + { + "epoch": 3.06, + "eval_loss": 0.3272015154361725, + "eval_runtime": 1122.7478, + "eval_samples_per_second": 469.135, + "eval_steps_per_second": 78.189, + "step": 5100000 + }, + { + "epoch": 3.06, + "learning_rate": 3.462846569316263e-05, + "loss": 0.328, + "step": 5100500 + }, + { + "epoch": 3.06, + "learning_rate": 3.462636572760206e-05, + "loss": 0.3282, + "step": 5101000 + }, + { + "epoch": 3.06, + "learning_rate": 3.462426996197262e-05, + "loss": 0.3344, + "step": 5101500 + }, + { + "epoch": 3.06, + "learning_rate": 3.462216999641206e-05, + "loss": 0.3221, + "step": 5102000 + }, + { + "epoch": 3.06, + "learning_rate": 3.462007003085149e-05, + "loss": 0.3368, + "step": 5102500 + }, + { + "epoch": 3.06, + "learning_rate": 3.461797426522205e-05, + "loss": 0.3321, + "step": 5103000 + }, + { + "epoch": 3.06, + "learning_rate": 3.4615874299661484e-05, + "loss": 0.3355, + "step": 5103500 + }, + { + "epoch": 3.06, + "learning_rate": 3.4613774334100924e-05, + "loss": 0.3334, + "step": 5104000 + }, + { + "epoch": 3.06, + "learning_rate": 3.461167436854036e-05, + "loss": 0.3291, + "step": 5104500 + }, + { + "epoch": 3.06, + "learning_rate": 3.460957860291091e-05, + "loss": 0.3293, + "step": 5105000 + }, + { + "epoch": 3.06, + "learning_rate": 3.460747863735035e-05, + "loss": 0.3368, + "step": 5105500 + }, + { + "epoch": 3.06, + "learning_rate": 3.4605378671789785e-05, + "loss": 0.3243, + "step": 5106000 + }, + { + "epoch": 3.06, + "learning_rate": 3.460327870622922e-05, + "loss": 0.3313, + "step": 5106500 + }, + { + "epoch": 3.06, + "learning_rate": 3.460118294059977e-05, + "loss": 0.3349, + "step": 5107000 + }, + { + "epoch": 3.06, + "learning_rate": 3.459908297503921e-05, + "loss": 0.3352, + "step": 5107500 + }, + { + "epoch": 3.06, + "learning_rate": 3.4596983009478645e-05, + "loss": 0.3352, + "step": 5108000 + }, + { + "epoch": 3.06, + "learning_rate": 3.459488304391808e-05, + "loss": 0.3287, + "step": 5108500 + }, + { + "epoch": 3.06, + "learning_rate": 3.459278307835752e-05, + "loss": 0.3265, + "step": 5109000 + }, + { + "epoch": 3.06, + "learning_rate": 3.4590683112796946e-05, + "loss": 0.3362, + "step": 5109500 + }, + { + "epoch": 3.06, + "learning_rate": 3.458858314723638e-05, + "loss": 0.3348, + "step": 5110000 + }, + { + "epoch": 3.06, + "learning_rate": 3.458648318167582e-05, + "loss": 0.3276, + "step": 5110500 + }, + { + "epoch": 3.06, + "learning_rate": 3.458438741604638e-05, + "loss": 0.3243, + "step": 5111000 + }, + { + "epoch": 3.06, + "learning_rate": 3.458228745048581e-05, + "loss": 0.3301, + "step": 5111500 + }, + { + "epoch": 3.06, + "learning_rate": 3.458018748492525e-05, + "loss": 0.3311, + "step": 5112000 + }, + { + "epoch": 3.07, + "learning_rate": 3.457808751936468e-05, + "loss": 0.3288, + "step": 5112500 + }, + { + "epoch": 3.07, + "learning_rate": 3.4575987553804114e-05, + "loss": 0.3293, + "step": 5113000 + }, + { + "epoch": 3.07, + "learning_rate": 3.4573895988105794e-05, + "loss": 0.34, + "step": 5113500 + }, + { + "epoch": 3.07, + "learning_rate": 3.457179602254523e-05, + "loss": 0.3386, + "step": 5114000 + }, + { + "epoch": 3.07, + "learning_rate": 3.456969605698467e-05, + "loss": 0.328, + "step": 5114500 + }, + { + "epoch": 3.07, + "learning_rate": 3.45675960914241e-05, + "loss": 0.3253, + "step": 5115000 + }, + { + "epoch": 3.07, + "learning_rate": 3.4565496125863535e-05, + "loss": 0.3385, + "step": 5115500 + }, + { + "epoch": 3.07, + "learning_rate": 3.4563396160302975e-05, + "loss": 0.3275, + "step": 5116000 + }, + { + "epoch": 3.07, + "learning_rate": 3.456129619474241e-05, + "loss": 0.325, + "step": 5116500 + }, + { + "epoch": 3.07, + "learning_rate": 3.4559196229181835e-05, + "loss": 0.3292, + "step": 5117000 + }, + { + "epoch": 3.07, + "learning_rate": 3.4557096263621275e-05, + "loss": 0.3321, + "step": 5117500 + }, + { + "epoch": 3.07, + "learning_rate": 3.4555000497991836e-05, + "loss": 0.3309, + "step": 5118000 + }, + { + "epoch": 3.07, + "learning_rate": 3.455290053243127e-05, + "loss": 0.3321, + "step": 5118500 + }, + { + "epoch": 3.07, + "learning_rate": 3.45508005668707e-05, + "loss": 0.3262, + "step": 5119000 + }, + { + "epoch": 3.07, + "learning_rate": 3.4548700601310136e-05, + "loss": 0.3309, + "step": 5119500 + }, + { + "epoch": 3.07, + "learning_rate": 3.454660063574957e-05, + "loss": 0.3406, + "step": 5120000 + }, + { + "epoch": 3.07, + "learning_rate": 3.454450067018901e-05, + "loss": 0.3359, + "step": 5120500 + }, + { + "epoch": 3.07, + "learning_rate": 3.454240070462844e-05, + "loss": 0.3361, + "step": 5121000 + }, + { + "epoch": 3.07, + "learning_rate": 3.454030073906788e-05, + "loss": 0.337, + "step": 5121500 + }, + { + "epoch": 3.07, + "learning_rate": 3.453820497343843e-05, + "loss": 0.338, + "step": 5122000 + }, + { + "epoch": 3.07, + "learning_rate": 3.453610920780899e-05, + "loss": 0.3268, + "step": 5122500 + }, + { + "epoch": 3.07, + "learning_rate": 3.453400924224843e-05, + "loss": 0.33, + "step": 5123000 + }, + { + "epoch": 3.07, + "learning_rate": 3.4531909276687864e-05, + "loss": 0.3243, + "step": 5123500 + }, + { + "epoch": 3.07, + "learning_rate": 3.452980931112729e-05, + "loss": 0.3392, + "step": 5124000 + }, + { + "epoch": 3.07, + "learning_rate": 3.452770934556673e-05, + "loss": 0.3332, + "step": 5124500 + }, + { + "epoch": 3.07, + "learning_rate": 3.4525609380006165e-05, + "loss": 0.324, + "step": 5125000 + }, + { + "epoch": 3.07, + "learning_rate": 3.4523509414445605e-05, + "loss": 0.3344, + "step": 5125500 + }, + { + "epoch": 3.07, + "learning_rate": 3.452140944888504e-05, + "loss": 0.3233, + "step": 5126000 + }, + { + "epoch": 3.07, + "learning_rate": 3.451930948332447e-05, + "loss": 0.3272, + "step": 5126500 + }, + { + "epoch": 3.07, + "learning_rate": 3.4517213717695025e-05, + "loss": 0.3235, + "step": 5127000 + }, + { + "epoch": 3.07, + "learning_rate": 3.4515113752134466e-05, + "loss": 0.3272, + "step": 5127500 + }, + { + "epoch": 3.07, + "learning_rate": 3.45130137865739e-05, + "loss": 0.3331, + "step": 5128000 + }, + { + "epoch": 3.07, + "learning_rate": 3.451091802094445e-05, + "loss": 0.3375, + "step": 5128500 + }, + { + "epoch": 3.08, + "learning_rate": 3.4508818055383886e-05, + "loss": 0.3377, + "step": 5129000 + }, + { + "epoch": 3.08, + "learning_rate": 3.4506718089823326e-05, + "loss": 0.3336, + "step": 5129500 + }, + { + "epoch": 3.08, + "learning_rate": 3.450461812426276e-05, + "loss": 0.3319, + "step": 5130000 + }, + { + "epoch": 3.08, + "learning_rate": 3.450251815870219e-05, + "loss": 0.3273, + "step": 5130500 + }, + { + "epoch": 3.08, + "learning_rate": 3.4500418193141634e-05, + "loss": 0.3291, + "step": 5131000 + }, + { + "epoch": 3.08, + "learning_rate": 3.449831822758107e-05, + "loss": 0.327, + "step": 5131500 + }, + { + "epoch": 3.08, + "learning_rate": 3.44962182620205e-05, + "loss": 0.3255, + "step": 5132000 + }, + { + "epoch": 3.08, + "learning_rate": 3.449411829645994e-05, + "loss": 0.3233, + "step": 5132500 + }, + { + "epoch": 3.08, + "learning_rate": 3.4492018330899374e-05, + "loss": 0.3372, + "step": 5133000 + }, + { + "epoch": 3.08, + "learning_rate": 3.448991836533881e-05, + "loss": 0.3184, + "step": 5133500 + }, + { + "epoch": 3.08, + "learning_rate": 3.448782259970937e-05, + "loss": 0.3377, + "step": 5134000 + }, + { + "epoch": 3.08, + "learning_rate": 3.44857226341488e-05, + "loss": 0.3324, + "step": 5134500 + }, + { + "epoch": 3.08, + "learning_rate": 3.4483626868519355e-05, + "loss": 0.3338, + "step": 5135000 + }, + { + "epoch": 3.08, + "learning_rate": 3.448152690295879e-05, + "loss": 0.3267, + "step": 5135500 + }, + { + "epoch": 3.08, + "learning_rate": 3.447942693739823e-05, + "loss": 0.3258, + "step": 5136000 + }, + { + "epoch": 3.08, + "learning_rate": 3.447732697183766e-05, + "loss": 0.3332, + "step": 5136500 + }, + { + "epoch": 3.08, + "learning_rate": 3.4475227006277096e-05, + "loss": 0.3293, + "step": 5137000 + }, + { + "epoch": 3.08, + "learning_rate": 3.447313124064765e-05, + "loss": 0.3352, + "step": 5137500 + }, + { + "epoch": 3.08, + "learning_rate": 3.447103127508709e-05, + "loss": 0.3252, + "step": 5138000 + }, + { + "epoch": 3.08, + "learning_rate": 3.446893130952652e-05, + "loss": 0.3246, + "step": 5138500 + }, + { + "epoch": 3.08, + "learning_rate": 3.4466831343965956e-05, + "loss": 0.332, + "step": 5139000 + }, + { + "epoch": 3.08, + "learning_rate": 3.4464731378405397e-05, + "loss": 0.332, + "step": 5139500 + }, + { + "epoch": 3.08, + "learning_rate": 3.446263141284483e-05, + "loss": 0.3286, + "step": 5140000 + }, + { + "epoch": 3.08, + "learning_rate": 3.4460531447284263e-05, + "loss": 0.3222, + "step": 5140500 + }, + { + "epoch": 3.08, + "learning_rate": 3.4458431481723704e-05, + "loss": 0.3213, + "step": 5141000 + }, + { + "epoch": 3.08, + "learning_rate": 3.445633151616313e-05, + "loss": 0.3344, + "step": 5141500 + }, + { + "epoch": 3.08, + "learning_rate": 3.445423575053369e-05, + "loss": 0.34, + "step": 5142000 + }, + { + "epoch": 3.08, + "learning_rate": 3.445213578497313e-05, + "loss": 0.3264, + "step": 5142500 + }, + { + "epoch": 3.08, + "learning_rate": 3.4450035819412564e-05, + "loss": 0.3349, + "step": 5143000 + }, + { + "epoch": 3.08, + "learning_rate": 3.444793585385199e-05, + "loss": 0.3281, + "step": 5143500 + }, + { + "epoch": 3.08, + "learning_rate": 3.444583588829143e-05, + "loss": 0.3402, + "step": 5144000 + }, + { + "epoch": 3.08, + "learning_rate": 3.444374012266199e-05, + "loss": 0.35, + "step": 5144500 + }, + { + "epoch": 3.08, + "learning_rate": 3.4441640157101425e-05, + "loss": 0.3368, + "step": 5145000 + }, + { + "epoch": 3.08, + "learning_rate": 3.443954019154086e-05, + "loss": 0.3247, + "step": 5145500 + }, + { + "epoch": 3.09, + "learning_rate": 3.443744022598029e-05, + "loss": 0.3224, + "step": 5146000 + }, + { + "epoch": 3.09, + "learning_rate": 3.443534446035085e-05, + "loss": 0.3484, + "step": 5146500 + }, + { + "epoch": 3.09, + "learning_rate": 3.4433244494790286e-05, + "loss": 0.3236, + "step": 5147000 + }, + { + "epoch": 3.09, + "learning_rate": 3.443114452922972e-05, + "loss": 0.3316, + "step": 5147500 + }, + { + "epoch": 3.09, + "learning_rate": 3.442904456366916e-05, + "loss": 0.3351, + "step": 5148000 + }, + { + "epoch": 3.09, + "learning_rate": 3.442694879803971e-05, + "loss": 0.3204, + "step": 5148500 + }, + { + "epoch": 3.09, + "learning_rate": 3.442484883247915e-05, + "loss": 0.3286, + "step": 5149000 + }, + { + "epoch": 3.09, + "learning_rate": 3.442274886691859e-05, + "loss": 0.3335, + "step": 5149500 + }, + { + "epoch": 3.09, + "learning_rate": 3.442064890135802e-05, + "loss": 0.332, + "step": 5150000 + }, + { + "epoch": 3.09, + "learning_rate": 3.4418548935797454e-05, + "loss": 0.329, + "step": 5150500 + }, + { + "epoch": 3.09, + "learning_rate": 3.441644897023689e-05, + "loss": 0.3274, + "step": 5151000 + }, + { + "epoch": 3.09, + "learning_rate": 3.441434900467632e-05, + "loss": 0.3263, + "step": 5151500 + }, + { + "epoch": 3.09, + "learning_rate": 3.4412249039115754e-05, + "loss": 0.3203, + "step": 5152000 + }, + { + "epoch": 3.09, + "learning_rate": 3.4410149073555194e-05, + "loss": 0.3248, + "step": 5152500 + }, + { + "epoch": 3.09, + "learning_rate": 3.440804910799463e-05, + "loss": 0.334, + "step": 5153000 + }, + { + "epoch": 3.09, + "learning_rate": 3.440595334236518e-05, + "loss": 0.3309, + "step": 5153500 + }, + { + "epoch": 3.09, + "learning_rate": 3.4403853376804615e-05, + "loss": 0.3213, + "step": 5154000 + }, + { + "epoch": 3.09, + "learning_rate": 3.4401753411244055e-05, + "loss": 0.3351, + "step": 5154500 + }, + { + "epoch": 3.09, + "learning_rate": 3.439965344568349e-05, + "loss": 0.3308, + "step": 5155000 + }, + { + "epoch": 3.09, + "learning_rate": 3.439755348012292e-05, + "loss": 0.3331, + "step": 5155500 + }, + { + "epoch": 3.09, + "learning_rate": 3.439545351456236e-05, + "loss": 0.3314, + "step": 5156000 + }, + { + "epoch": 3.09, + "learning_rate": 3.4393357748932916e-05, + "loss": 0.3498, + "step": 5156500 + }, + { + "epoch": 3.09, + "learning_rate": 3.439125778337235e-05, + "loss": 0.3312, + "step": 5157000 + }, + { + "epoch": 3.09, + "learning_rate": 3.438915781781179e-05, + "loss": 0.3297, + "step": 5157500 + }, + { + "epoch": 3.09, + "learning_rate": 3.438705785225122e-05, + "loss": 0.3299, + "step": 5158000 + }, + { + "epoch": 3.09, + "learning_rate": 3.4384957886690656e-05, + "loss": 0.3357, + "step": 5158500 + }, + { + "epoch": 3.09, + "learning_rate": 3.438286212106121e-05, + "loss": 0.3312, + "step": 5159000 + }, + { + "epoch": 3.09, + "learning_rate": 3.438076215550065e-05, + "loss": 0.3274, + "step": 5159500 + }, + { + "epoch": 3.09, + "learning_rate": 3.4378662189940084e-05, + "loss": 0.3245, + "step": 5160000 + }, + { + "epoch": 3.09, + "learning_rate": 3.437656222437952e-05, + "loss": 0.3254, + "step": 5160500 + }, + { + "epoch": 3.09, + "learning_rate": 3.437446645875007e-05, + "loss": 0.3256, + "step": 5161000 + }, + { + "epoch": 3.09, + "learning_rate": 3.437236649318951e-05, + "loss": 0.3319, + "step": 5161500 + }, + { + "epoch": 3.09, + "learning_rate": 3.4370266527628944e-05, + "loss": 0.3308, + "step": 5162000 + }, + { + "epoch": 3.1, + "learning_rate": 3.436816656206838e-05, + "loss": 0.3349, + "step": 5162500 + }, + { + "epoch": 3.1, + "learning_rate": 3.436606659650782e-05, + "loss": 0.3316, + "step": 5163000 + }, + { + "epoch": 3.1, + "learning_rate": 3.436396663094725e-05, + "loss": 0.3332, + "step": 5163500 + }, + { + "epoch": 3.1, + "learning_rate": 3.4361870865317805e-05, + "loss": 0.3263, + "step": 5164000 + }, + { + "epoch": 3.1, + "learning_rate": 3.4359770899757245e-05, + "loss": 0.3412, + "step": 5164500 + }, + { + "epoch": 3.1, + "learning_rate": 3.435767093419668e-05, + "loss": 0.3453, + "step": 5165000 + }, + { + "epoch": 3.1, + "learning_rate": 3.435557096863611e-05, + "loss": 0.3286, + "step": 5165500 + }, + { + "epoch": 3.1, + "learning_rate": 3.435347100307555e-05, + "loss": 0.3245, + "step": 5166000 + }, + { + "epoch": 3.1, + "learning_rate": 3.4351371037514986e-05, + "loss": 0.3333, + "step": 5166500 + }, + { + "epoch": 3.1, + "learning_rate": 3.434927107195442e-05, + "loss": 0.3284, + "step": 5167000 + }, + { + "epoch": 3.1, + "learning_rate": 3.434717110639386e-05, + "loss": 0.3249, + "step": 5167500 + }, + { + "epoch": 3.1, + "learning_rate": 3.434507954069553e-05, + "loss": 0.3343, + "step": 5168000 + }, + { + "epoch": 3.1, + "learning_rate": 3.434297957513497e-05, + "loss": 0.3329, + "step": 5168500 + }, + { + "epoch": 3.1, + "learning_rate": 3.43408796095744e-05, + "loss": 0.3317, + "step": 5169000 + }, + { + "epoch": 3.1, + "learning_rate": 3.4338779644013834e-05, + "loss": 0.3232, + "step": 5169500 + }, + { + "epoch": 3.1, + "learning_rate": 3.4336679678453274e-05, + "loss": 0.3349, + "step": 5170000 + }, + { + "epoch": 3.1, + "learning_rate": 3.433458391282383e-05, + "loss": 0.3315, + "step": 5170500 + }, + { + "epoch": 3.1, + "learning_rate": 3.433248394726326e-05, + "loss": 0.3224, + "step": 5171000 + }, + { + "epoch": 3.1, + "learning_rate": 3.43303839817027e-05, + "loss": 0.333, + "step": 5171500 + }, + { + "epoch": 3.1, + "learning_rate": 3.4328284016142135e-05, + "loss": 0.327, + "step": 5172000 + }, + { + "epoch": 3.1, + "learning_rate": 3.432618405058157e-05, + "loss": 0.3259, + "step": 5172500 + }, + { + "epoch": 3.1, + "learning_rate": 3.432408828495212e-05, + "loss": 0.3286, + "step": 5173000 + }, + { + "epoch": 3.1, + "learning_rate": 3.432198831939156e-05, + "loss": 0.3293, + "step": 5173500 + }, + { + "epoch": 3.1, + "learning_rate": 3.4319888353830995e-05, + "loss": 0.3305, + "step": 5174000 + }, + { + "epoch": 3.1, + "learning_rate": 3.431778838827043e-05, + "loss": 0.3282, + "step": 5174500 + }, + { + "epoch": 3.1, + "learning_rate": 3.431568842270987e-05, + "loss": 0.3349, + "step": 5175000 + }, + { + "epoch": 3.1, + "learning_rate": 3.43135884571493e-05, + "loss": 0.3344, + "step": 5175500 + }, + { + "epoch": 3.1, + "learning_rate": 3.4311488491588736e-05, + "loss": 0.33, + "step": 5176000 + }, + { + "epoch": 3.1, + "learning_rate": 3.4309388526028176e-05, + "loss": 0.337, + "step": 5176500 + }, + { + "epoch": 3.1, + "learning_rate": 3.430729696032985e-05, + "loss": 0.3365, + "step": 5177000 + }, + { + "epoch": 3.1, + "learning_rate": 3.4305196994769283e-05, + "loss": 0.3205, + "step": 5177500 + }, + { + "epoch": 3.1, + "learning_rate": 3.430309702920872e-05, + "loss": 0.3301, + "step": 5178000 + }, + { + "epoch": 3.1, + "learning_rate": 3.430099706364816e-05, + "loss": 0.3302, + "step": 5178500 + }, + { + "epoch": 3.11, + "learning_rate": 3.429889709808759e-05, + "loss": 0.3278, + "step": 5179000 + }, + { + "epoch": 3.11, + "learning_rate": 3.4296797132527024e-05, + "loss": 0.3311, + "step": 5179500 + }, + { + "epoch": 3.11, + "learning_rate": 3.4294697166966464e-05, + "loss": 0.3302, + "step": 5180000 + }, + { + "epoch": 3.11, + "learning_rate": 3.42925972014059e-05, + "loss": 0.3376, + "step": 5180500 + }, + { + "epoch": 3.11, + "learning_rate": 3.429049723584533e-05, + "loss": 0.3375, + "step": 5181000 + }, + { + "epoch": 3.11, + "learning_rate": 3.4288401470215885e-05, + "loss": 0.3242, + "step": 5181500 + }, + { + "epoch": 3.11, + "learning_rate": 3.4286301504655325e-05, + "loss": 0.335, + "step": 5182000 + }, + { + "epoch": 3.11, + "learning_rate": 3.428420153909476e-05, + "loss": 0.3268, + "step": 5182500 + }, + { + "epoch": 3.11, + "learning_rate": 3.428210157353419e-05, + "loss": 0.324, + "step": 5183000 + }, + { + "epoch": 3.11, + "learning_rate": 3.428000160797363e-05, + "loss": 0.3313, + "step": 5183500 + }, + { + "epoch": 3.11, + "learning_rate": 3.4277901642413066e-05, + "loss": 0.3364, + "step": 5184000 + }, + { + "epoch": 3.11, + "learning_rate": 3.42758016768525e-05, + "loss": 0.3363, + "step": 5184500 + }, + { + "epoch": 3.11, + "learning_rate": 3.427370171129193e-05, + "loss": 0.3305, + "step": 5185000 + }, + { + "epoch": 3.11, + "learning_rate": 3.427161014559361e-05, + "loss": 0.3347, + "step": 5185500 + }, + { + "epoch": 3.11, + "learning_rate": 3.4269510180033047e-05, + "loss": 0.336, + "step": 5186000 + }, + { + "epoch": 3.11, + "learning_rate": 3.426741021447248e-05, + "loss": 0.3315, + "step": 5186500 + }, + { + "epoch": 3.11, + "learning_rate": 3.426531024891192e-05, + "loss": 0.3428, + "step": 5187000 + }, + { + "epoch": 3.11, + "learning_rate": 3.4263210283351354e-05, + "loss": 0.324, + "step": 5187500 + }, + { + "epoch": 3.11, + "learning_rate": 3.4261118717653034e-05, + "loss": 0.3302, + "step": 5188000 + }, + { + "epoch": 3.11, + "learning_rate": 3.425901875209247e-05, + "loss": 0.3261, + "step": 5188500 + }, + { + "epoch": 3.11, + "learning_rate": 3.4256918786531894e-05, + "loss": 0.3372, + "step": 5189000 + }, + { + "epoch": 3.11, + "learning_rate": 3.4254818820971335e-05, + "loss": 0.3339, + "step": 5189500 + }, + { + "epoch": 3.11, + "learning_rate": 3.425271885541077e-05, + "loss": 0.3303, + "step": 5190000 + }, + { + "epoch": 3.11, + "learning_rate": 3.42506188898502e-05, + "loss": 0.3316, + "step": 5190500 + }, + { + "epoch": 3.11, + "learning_rate": 3.424851892428964e-05, + "loss": 0.3382, + "step": 5191000 + }, + { + "epoch": 3.11, + "learning_rate": 3.4246418958729075e-05, + "loss": 0.3304, + "step": 5191500 + }, + { + "epoch": 3.11, + "learning_rate": 3.4244318993168515e-05, + "loss": 0.3289, + "step": 5192000 + }, + { + "epoch": 3.11, + "learning_rate": 3.424221902760795e-05, + "loss": 0.3316, + "step": 5192500 + }, + { + "epoch": 3.11, + "learning_rate": 3.424011906204738e-05, + "loss": 0.3288, + "step": 5193000 + }, + { + "epoch": 3.11, + "learning_rate": 3.423801909648682e-05, + "loss": 0.3355, + "step": 5193500 + }, + { + "epoch": 3.11, + "learning_rate": 3.4235923330857376e-05, + "loss": 0.327, + "step": 5194000 + }, + { + "epoch": 3.11, + "learning_rate": 3.423382756522793e-05, + "loss": 0.3352, + "step": 5194500 + }, + { + "epoch": 3.11, + "learning_rate": 3.423172759966736e-05, + "loss": 0.3256, + "step": 5195000 + }, + { + "epoch": 3.11, + "learning_rate": 3.4229627634106797e-05, + "loss": 0.3344, + "step": 5195500 + }, + { + "epoch": 3.12, + "learning_rate": 3.422752766854624e-05, + "loss": 0.3334, + "step": 5196000 + }, + { + "epoch": 3.12, + "learning_rate": 3.422542770298567e-05, + "loss": 0.3391, + "step": 5196500 + }, + { + "epoch": 3.12, + "learning_rate": 3.4223327737425104e-05, + "loss": 0.3366, + "step": 5197000 + }, + { + "epoch": 3.12, + "learning_rate": 3.4221231971795664e-05, + "loss": 0.3238, + "step": 5197500 + }, + { + "epoch": 3.12, + "learning_rate": 3.4219136206166224e-05, + "loss": 0.3334, + "step": 5198000 + }, + { + "epoch": 3.12, + "learning_rate": 3.421703624060565e-05, + "loss": 0.3335, + "step": 5198500 + }, + { + "epoch": 3.12, + "learning_rate": 3.4214936275045085e-05, + "loss": 0.3252, + "step": 5199000 + }, + { + "epoch": 3.12, + "learning_rate": 3.4212836309484525e-05, + "loss": 0.3319, + "step": 5199500 + }, + { + "epoch": 3.12, + "learning_rate": 3.421073634392396e-05, + "loss": 0.3408, + "step": 5200000 + }, + { + "epoch": 3.12, + "eval_loss": 0.3276987373828888, + "eval_runtime": 1118.9331, + "eval_samples_per_second": 470.734, + "eval_steps_per_second": 78.456, + "step": 5200000 + }, + { + "epoch": 3.12, + "learning_rate": 3.420863637836339e-05, + "loss": 0.3387, + "step": 5200500 + }, + { + "epoch": 3.12, + "learning_rate": 3.420653641280283e-05, + "loss": 0.3251, + "step": 5201000 + }, + { + "epoch": 3.12, + "learning_rate": 3.4204436447242265e-05, + "loss": 0.3279, + "step": 5201500 + }, + { + "epoch": 3.12, + "learning_rate": 3.420234068161282e-05, + "loss": 0.3385, + "step": 5202000 + }, + { + "epoch": 3.12, + "learning_rate": 3.420024071605225e-05, + "loss": 0.3291, + "step": 5202500 + }, + { + "epoch": 3.12, + "learning_rate": 3.419814075049169e-05, + "loss": 0.328, + "step": 5203000 + }, + { + "epoch": 3.12, + "learning_rate": 3.4196040784931126e-05, + "loss": 0.3264, + "step": 5203500 + }, + { + "epoch": 3.12, + "learning_rate": 3.419394081937056e-05, + "loss": 0.3359, + "step": 5204000 + }, + { + "epoch": 3.12, + "learning_rate": 3.419184505374112e-05, + "loss": 0.3415, + "step": 5204500 + }, + { + "epoch": 3.12, + "learning_rate": 3.4189745088180553e-05, + "loss": 0.3232, + "step": 5205000 + }, + { + "epoch": 3.12, + "learning_rate": 3.418764512261999e-05, + "loss": 0.3374, + "step": 5205500 + }, + { + "epoch": 3.12, + "learning_rate": 3.418554515705943e-05, + "loss": 0.3305, + "step": 5206000 + }, + { + "epoch": 3.12, + "learning_rate": 3.418344519149886e-05, + "loss": 0.3348, + "step": 5206500 + }, + { + "epoch": 3.12, + "learning_rate": 3.4181345225938294e-05, + "loss": 0.3331, + "step": 5207000 + }, + { + "epoch": 3.12, + "learning_rate": 3.4179245260377734e-05, + "loss": 0.3202, + "step": 5207500 + }, + { + "epoch": 3.12, + "learning_rate": 3.417714949474829e-05, + "loss": 0.3256, + "step": 5208000 + }, + { + "epoch": 3.12, + "learning_rate": 3.417504952918772e-05, + "loss": 0.3398, + "step": 5208500 + }, + { + "epoch": 3.12, + "learning_rate": 3.4172949563627155e-05, + "loss": 0.3483, + "step": 5209000 + }, + { + "epoch": 3.12, + "learning_rate": 3.4170849598066595e-05, + "loss": 0.3318, + "step": 5209500 + }, + { + "epoch": 3.12, + "learning_rate": 3.416874963250603e-05, + "loss": 0.3293, + "step": 5210000 + }, + { + "epoch": 3.12, + "learning_rate": 3.416664966694546e-05, + "loss": 0.3284, + "step": 5210500 + }, + { + "epoch": 3.12, + "learning_rate": 3.4164549701384895e-05, + "loss": 0.327, + "step": 5211000 + }, + { + "epoch": 3.12, + "learning_rate": 3.416244973582433e-05, + "loss": 0.3334, + "step": 5211500 + }, + { + "epoch": 3.12, + "learning_rate": 3.416035397019489e-05, + "loss": 0.3408, + "step": 5212000 + }, + { + "epoch": 3.13, + "learning_rate": 3.415825400463432e-05, + "loss": 0.3298, + "step": 5212500 + }, + { + "epoch": 3.13, + "learning_rate": 3.415615403907376e-05, + "loss": 0.3221, + "step": 5213000 + }, + { + "epoch": 3.13, + "learning_rate": 3.415405407351319e-05, + "loss": 0.3308, + "step": 5213500 + }, + { + "epoch": 3.13, + "learning_rate": 3.415195410795263e-05, + "loss": 0.3299, + "step": 5214000 + }, + { + "epoch": 3.13, + "learning_rate": 3.414985414239206e-05, + "loss": 0.3338, + "step": 5214500 + }, + { + "epoch": 3.13, + "learning_rate": 3.41477541768315e-05, + "loss": 0.3295, + "step": 5215000 + }, + { + "epoch": 3.13, + "learning_rate": 3.414565841120206e-05, + "loss": 0.3281, + "step": 5215500 + }, + { + "epoch": 3.13, + "learning_rate": 3.414355844564149e-05, + "loss": 0.334, + "step": 5216000 + }, + { + "epoch": 3.13, + "learning_rate": 3.4141458480080924e-05, + "loss": 0.3265, + "step": 5216500 + }, + { + "epoch": 3.13, + "learning_rate": 3.413935851452036e-05, + "loss": 0.3358, + "step": 5217000 + }, + { + "epoch": 3.13, + "learning_rate": 3.41372585489598e-05, + "loss": 0.3415, + "step": 5217500 + }, + { + "epoch": 3.13, + "learning_rate": 3.413516278333036e-05, + "loss": 0.3279, + "step": 5218000 + }, + { + "epoch": 3.13, + "learning_rate": 3.4133062817769785e-05, + "loss": 0.3477, + "step": 5218500 + }, + { + "epoch": 3.13, + "learning_rate": 3.4130967052140345e-05, + "loss": 0.3349, + "step": 5219000 + }, + { + "epoch": 3.13, + "learning_rate": 3.412886708657978e-05, + "loss": 0.3208, + "step": 5219500 + }, + { + "epoch": 3.13, + "learning_rate": 3.412676712101922e-05, + "loss": 0.3301, + "step": 5220000 + }, + { + "epoch": 3.13, + "learning_rate": 3.4124667155458645e-05, + "loss": 0.3368, + "step": 5220500 + }, + { + "epoch": 3.13, + "learning_rate": 3.4122567189898086e-05, + "loss": 0.324, + "step": 5221000 + }, + { + "epoch": 3.13, + "learning_rate": 3.412046722433752e-05, + "loss": 0.33, + "step": 5221500 + }, + { + "epoch": 3.13, + "learning_rate": 3.411836725877695e-05, + "loss": 0.3368, + "step": 5222000 + }, + { + "epoch": 3.13, + "learning_rate": 3.411626729321639e-05, + "loss": 0.3293, + "step": 5222500 + }, + { + "epoch": 3.13, + "learning_rate": 3.4114167327655826e-05, + "loss": 0.3319, + "step": 5223000 + }, + { + "epoch": 3.13, + "learning_rate": 3.411207156202638e-05, + "loss": 0.3276, + "step": 5223500 + }, + { + "epoch": 3.13, + "learning_rate": 3.410997159646581e-05, + "loss": 0.3305, + "step": 5224000 + }, + { + "epoch": 3.13, + "learning_rate": 3.4107871630905254e-05, + "loss": 0.3428, + "step": 5224500 + }, + { + "epoch": 3.13, + "learning_rate": 3.410577166534469e-05, + "loss": 0.3285, + "step": 5225000 + }, + { + "epoch": 3.13, + "learning_rate": 3.410367169978412e-05, + "loss": 0.3306, + "step": 5225500 + }, + { + "epoch": 3.13, + "learning_rate": 3.4101575934154674e-05, + "loss": 0.3351, + "step": 5226000 + }, + { + "epoch": 3.13, + "learning_rate": 3.4099475968594114e-05, + "loss": 0.342, + "step": 5226500 + }, + { + "epoch": 3.13, + "learning_rate": 3.409737600303355e-05, + "loss": 0.3423, + "step": 5227000 + }, + { + "epoch": 3.13, + "learning_rate": 3.409527603747298e-05, + "loss": 0.3303, + "step": 5227500 + }, + { + "epoch": 3.13, + "learning_rate": 3.409317607191242e-05, + "loss": 0.3377, + "step": 5228000 + }, + { + "epoch": 3.13, + "learning_rate": 3.4091076106351855e-05, + "loss": 0.326, + "step": 5228500 + }, + { + "epoch": 3.13, + "learning_rate": 3.408897614079129e-05, + "loss": 0.3338, + "step": 5229000 + }, + { + "epoch": 3.14, + "learning_rate": 3.408688037516185e-05, + "loss": 0.3308, + "step": 5229500 + }, + { + "epoch": 3.14, + "learning_rate": 3.408478040960128e-05, + "loss": 0.3275, + "step": 5230000 + }, + { + "epoch": 3.14, + "learning_rate": 3.4082680444040716e-05, + "loss": 0.3248, + "step": 5230500 + }, + { + "epoch": 3.14, + "learning_rate": 3.4080580478480156e-05, + "loss": 0.3426, + "step": 5231000 + }, + { + "epoch": 3.14, + "learning_rate": 3.407848051291959e-05, + "loss": 0.3264, + "step": 5231500 + }, + { + "epoch": 3.14, + "learning_rate": 3.407638054735902e-05, + "loss": 0.3277, + "step": 5232000 + }, + { + "epoch": 3.14, + "learning_rate": 3.407428058179846e-05, + "loss": 0.3268, + "step": 5232500 + }, + { + "epoch": 3.14, + "learning_rate": 3.4072184816169017e-05, + "loss": 0.3322, + "step": 5233000 + }, + { + "epoch": 3.14, + "learning_rate": 3.407008485060845e-05, + "loss": 0.3327, + "step": 5233500 + }, + { + "epoch": 3.14, + "learning_rate": 3.4067984885047883e-05, + "loss": 0.3303, + "step": 5234000 + }, + { + "epoch": 3.14, + "learning_rate": 3.4065884919487324e-05, + "loss": 0.3334, + "step": 5234500 + }, + { + "epoch": 3.14, + "learning_rate": 3.406378495392676e-05, + "loss": 0.3247, + "step": 5235000 + }, + { + "epoch": 3.14, + "learning_rate": 3.406168498836619e-05, + "loss": 0.3287, + "step": 5235500 + }, + { + "epoch": 3.14, + "learning_rate": 3.4059585022805624e-05, + "loss": 0.3352, + "step": 5236000 + }, + { + "epoch": 3.14, + "learning_rate": 3.405748505724506e-05, + "loss": 0.3323, + "step": 5236500 + }, + { + "epoch": 3.14, + "learning_rate": 3.405538929161562e-05, + "loss": 0.3347, + "step": 5237000 + }, + { + "epoch": 3.14, + "learning_rate": 3.405328932605506e-05, + "loss": 0.3367, + "step": 5237500 + }, + { + "epoch": 3.14, + "learning_rate": 3.4051189360494485e-05, + "loss": 0.3275, + "step": 5238000 + }, + { + "epoch": 3.14, + "learning_rate": 3.404908939493392e-05, + "loss": 0.3295, + "step": 5238500 + }, + { + "epoch": 3.14, + "learning_rate": 3.404698942937336e-05, + "loss": 0.323, + "step": 5239000 + }, + { + "epoch": 3.14, + "learning_rate": 3.404489366374392e-05, + "loss": 0.3345, + "step": 5239500 + }, + { + "epoch": 3.14, + "learning_rate": 3.404279369818335e-05, + "loss": 0.3243, + "step": 5240000 + }, + { + "epoch": 3.14, + "learning_rate": 3.404069373262278e-05, + "loss": 0.328, + "step": 5240500 + }, + { + "epoch": 3.14, + "learning_rate": 3.403859796699334e-05, + "loss": 0.3289, + "step": 5241000 + }, + { + "epoch": 3.14, + "learning_rate": 3.403649800143278e-05, + "loss": 0.3293, + "step": 5241500 + }, + { + "epoch": 3.14, + "learning_rate": 3.403439803587221e-05, + "loss": 0.3307, + "step": 5242000 + }, + { + "epoch": 3.14, + "learning_rate": 3.4032298070311647e-05, + "loss": 0.3428, + "step": 5242500 + }, + { + "epoch": 3.14, + "learning_rate": 3.403019810475108e-05, + "loss": 0.3242, + "step": 5243000 + }, + { + "epoch": 3.14, + "learning_rate": 3.4028098139190513e-05, + "loss": 0.333, + "step": 5243500 + }, + { + "epoch": 3.14, + "learning_rate": 3.4025998173629954e-05, + "loss": 0.3387, + "step": 5244000 + }, + { + "epoch": 3.14, + "learning_rate": 3.4023902408000514e-05, + "loss": 0.3267, + "step": 5244500 + }, + { + "epoch": 3.14, + "learning_rate": 3.402180244243995e-05, + "loss": 0.3313, + "step": 5245000 + }, + { + "epoch": 3.14, + "learning_rate": 3.4019702476879374e-05, + "loss": 0.3284, + "step": 5245500 + }, + { + "epoch": 3.15, + "learning_rate": 3.4017602511318814e-05, + "loss": 0.3263, + "step": 5246000 + }, + { + "epoch": 3.15, + "learning_rate": 3.401550254575825e-05, + "loss": 0.3314, + "step": 5246500 + }, + { + "epoch": 3.15, + "learning_rate": 3.401340258019768e-05, + "loss": 0.3196, + "step": 5247000 + }, + { + "epoch": 3.15, + "learning_rate": 3.401130261463712e-05, + "loss": 0.3302, + "step": 5247500 + }, + { + "epoch": 3.15, + "learning_rate": 3.4009206849007675e-05, + "loss": 0.3314, + "step": 5248000 + }, + { + "epoch": 3.15, + "learning_rate": 3.400710688344711e-05, + "loss": 0.3375, + "step": 5248500 + }, + { + "epoch": 3.15, + "learning_rate": 3.400500691788654e-05, + "loss": 0.3242, + "step": 5249000 + }, + { + "epoch": 3.15, + "learning_rate": 3.400290695232598e-05, + "loss": 0.3301, + "step": 5249500 + }, + { + "epoch": 3.15, + "learning_rate": 3.4000806986765416e-05, + "loss": 0.3274, + "step": 5250000 + }, + { + "epoch": 3.15, + "learning_rate": 3.399870702120485e-05, + "loss": 0.3446, + "step": 5250500 + }, + { + "epoch": 3.15, + "learning_rate": 3.399661125557541e-05, + "loss": 0.3312, + "step": 5251000 + }, + { + "epoch": 3.15, + "learning_rate": 3.399451129001484e-05, + "loss": 0.3331, + "step": 5251500 + }, + { + "epoch": 3.15, + "learning_rate": 3.3992411324454276e-05, + "loss": 0.3293, + "step": 5252000 + }, + { + "epoch": 3.15, + "learning_rate": 3.399031135889372e-05, + "loss": 0.3361, + "step": 5252500 + }, + { + "epoch": 3.15, + "learning_rate": 3.398821139333315e-05, + "loss": 0.3356, + "step": 5253000 + }, + { + "epoch": 3.15, + "learning_rate": 3.3986111427772584e-05, + "loss": 0.3288, + "step": 5253500 + }, + { + "epoch": 3.15, + "learning_rate": 3.398401566214314e-05, + "loss": 0.3358, + "step": 5254000 + }, + { + "epoch": 3.15, + "learning_rate": 3.398191569658258e-05, + "loss": 0.3279, + "step": 5254500 + }, + { + "epoch": 3.15, + "learning_rate": 3.397981573102201e-05, + "loss": 0.3289, + "step": 5255000 + }, + { + "epoch": 3.15, + "learning_rate": 3.3977715765461444e-05, + "loss": 0.3309, + "step": 5255500 + }, + { + "epoch": 3.15, + "learning_rate": 3.3975615799900885e-05, + "loss": 0.3342, + "step": 5256000 + }, + { + "epoch": 3.15, + "learning_rate": 3.397352003427144e-05, + "loss": 0.3312, + "step": 5256500 + }, + { + "epoch": 3.15, + "learning_rate": 3.397142006871087e-05, + "loss": 0.3323, + "step": 5257000 + }, + { + "epoch": 3.15, + "learning_rate": 3.3969320103150305e-05, + "loss": 0.3345, + "step": 5257500 + }, + { + "epoch": 3.15, + "learning_rate": 3.3967220137589745e-05, + "loss": 0.3247, + "step": 5258000 + }, + { + "epoch": 3.15, + "learning_rate": 3.396512017202918e-05, + "loss": 0.3323, + "step": 5258500 + }, + { + "epoch": 3.15, + "learning_rate": 3.396302440639973e-05, + "loss": 0.3318, + "step": 5259000 + }, + { + "epoch": 3.15, + "learning_rate": 3.396092444083917e-05, + "loss": 0.339, + "step": 5259500 + }, + { + "epoch": 3.15, + "learning_rate": 3.3958824475278606e-05, + "loss": 0.3284, + "step": 5260000 + }, + { + "epoch": 3.15, + "learning_rate": 3.395672450971804e-05, + "loss": 0.3257, + "step": 5260500 + }, + { + "epoch": 3.15, + "learning_rate": 3.395462874408859e-05, + "loss": 0.3331, + "step": 5261000 + }, + { + "epoch": 3.15, + "learning_rate": 3.395252877852803e-05, + "loss": 0.3367, + "step": 5261500 + }, + { + "epoch": 3.15, + "learning_rate": 3.395042881296747e-05, + "loss": 0.3291, + "step": 5262000 + }, + { + "epoch": 3.16, + "learning_rate": 3.39483288474069e-05, + "loss": 0.3303, + "step": 5262500 + }, + { + "epoch": 3.16, + "learning_rate": 3.394622888184634e-05, + "loss": 0.3418, + "step": 5263000 + }, + { + "epoch": 3.16, + "learning_rate": 3.3944128916285774e-05, + "loss": 0.3316, + "step": 5263500 + }, + { + "epoch": 3.16, + "learning_rate": 3.394203315065633e-05, + "loss": 0.3396, + "step": 5264000 + }, + { + "epoch": 3.16, + "learning_rate": 3.393993318509576e-05, + "loss": 0.3449, + "step": 5264500 + }, + { + "epoch": 3.16, + "learning_rate": 3.39378332195352e-05, + "loss": 0.329, + "step": 5265000 + }, + { + "epoch": 3.16, + "learning_rate": 3.3935733253974635e-05, + "loss": 0.3341, + "step": 5265500 + }, + { + "epoch": 3.16, + "learning_rate": 3.393363328841407e-05, + "loss": 0.3359, + "step": 5266000 + }, + { + "epoch": 3.16, + "learning_rate": 3.393153332285351e-05, + "loss": 0.3242, + "step": 5266500 + }, + { + "epoch": 3.16, + "learning_rate": 3.392943335729294e-05, + "loss": 0.3335, + "step": 5267000 + }, + { + "epoch": 3.16, + "learning_rate": 3.3927337591663495e-05, + "loss": 0.3279, + "step": 5267500 + }, + { + "epoch": 3.16, + "learning_rate": 3.3925237626102936e-05, + "loss": 0.3324, + "step": 5268000 + }, + { + "epoch": 3.16, + "learning_rate": 3.392313766054237e-05, + "loss": 0.3391, + "step": 5268500 + }, + { + "epoch": 3.16, + "learning_rate": 3.39210376949818e-05, + "loss": 0.3258, + "step": 5269000 + }, + { + "epoch": 3.16, + "learning_rate": 3.391893772942124e-05, + "loss": 0.3339, + "step": 5269500 + }, + { + "epoch": 3.16, + "learning_rate": 3.391683776386067e-05, + "loss": 0.3317, + "step": 5270000 + }, + { + "epoch": 3.16, + "learning_rate": 3.391474199823123e-05, + "loss": 0.3344, + "step": 5270500 + }, + { + "epoch": 3.16, + "learning_rate": 3.391264203267066e-05, + "loss": 0.3412, + "step": 5271000 + }, + { + "epoch": 3.16, + "learning_rate": 3.3910542067110104e-05, + "loss": 0.3304, + "step": 5271500 + }, + { + "epoch": 3.16, + "learning_rate": 3.390844210154954e-05, + "loss": 0.3313, + "step": 5272000 + }, + { + "epoch": 3.16, + "learning_rate": 3.3906342135988964e-05, + "loss": 0.3382, + "step": 5272500 + }, + { + "epoch": 3.16, + "learning_rate": 3.3904242170428404e-05, + "loss": 0.3321, + "step": 5273000 + }, + { + "epoch": 3.16, + "learning_rate": 3.390214220486784e-05, + "loss": 0.3267, + "step": 5273500 + }, + { + "epoch": 3.16, + "learning_rate": 3.390004223930727e-05, + "loss": 0.3329, + "step": 5274000 + }, + { + "epoch": 3.16, + "learning_rate": 3.389794647367783e-05, + "loss": 0.3313, + "step": 5274500 + }, + { + "epoch": 3.16, + "learning_rate": 3.3895846508117265e-05, + "loss": 0.3376, + "step": 5275000 + }, + { + "epoch": 3.16, + "learning_rate": 3.38937465425567e-05, + "loss": 0.3339, + "step": 5275500 + }, + { + "epoch": 3.16, + "learning_rate": 3.389165077692726e-05, + "loss": 0.3379, + "step": 5276000 + }, + { + "epoch": 3.16, + "learning_rate": 3.38895508113667e-05, + "loss": 0.3154, + "step": 5276500 + }, + { + "epoch": 3.16, + "learning_rate": 3.3887450845806125e-05, + "loss": 0.3347, + "step": 5277000 + }, + { + "epoch": 3.16, + "learning_rate": 3.388535088024556e-05, + "loss": 0.3321, + "step": 5277500 + }, + { + "epoch": 3.16, + "learning_rate": 3.388325511461612e-05, + "loss": 0.3305, + "step": 5278000 + }, + { + "epoch": 3.16, + "learning_rate": 3.388115514905556e-05, + "loss": 0.3374, + "step": 5278500 + }, + { + "epoch": 3.16, + "learning_rate": 3.387905518349499e-05, + "loss": 0.3338, + "step": 5279000 + }, + { + "epoch": 3.17, + "learning_rate": 3.387695521793442e-05, + "loss": 0.3249, + "step": 5279500 + }, + { + "epoch": 3.17, + "learning_rate": 3.387485525237386e-05, + "loss": 0.3285, + "step": 5280000 + }, + { + "epoch": 3.17, + "learning_rate": 3.387275528681329e-05, + "loss": 0.34, + "step": 5280500 + }, + { + "epoch": 3.17, + "learning_rate": 3.387065532125273e-05, + "loss": 0.3256, + "step": 5281000 + }, + { + "epoch": 3.17, + "learning_rate": 3.386855955562329e-05, + "loss": 0.3414, + "step": 5281500 + }, + { + "epoch": 3.17, + "learning_rate": 3.386645959006272e-05, + "loss": 0.3369, + "step": 5282000 + }, + { + "epoch": 3.17, + "learning_rate": 3.3864359624502154e-05, + "loss": 0.339, + "step": 5282500 + }, + { + "epoch": 3.17, + "learning_rate": 3.3862259658941594e-05, + "loss": 0.3375, + "step": 5283000 + }, + { + "epoch": 3.17, + "learning_rate": 3.386015969338103e-05, + "loss": 0.3302, + "step": 5283500 + }, + { + "epoch": 3.17, + "learning_rate": 3.385806392775158e-05, + "loss": 0.34, + "step": 5284000 + }, + { + "epoch": 3.17, + "learning_rate": 3.3855963962191015e-05, + "loss": 0.3363, + "step": 5284500 + }, + { + "epoch": 3.17, + "learning_rate": 3.3853863996630455e-05, + "loss": 0.3352, + "step": 5285000 + }, + { + "epoch": 3.17, + "learning_rate": 3.385176403106989e-05, + "loss": 0.3381, + "step": 5285500 + }, + { + "epoch": 3.17, + "learning_rate": 3.384966406550932e-05, + "loss": 0.332, + "step": 5286000 + }, + { + "epoch": 3.17, + "learning_rate": 3.384756409994876e-05, + "loss": 0.3295, + "step": 5286500 + }, + { + "epoch": 3.17, + "learning_rate": 3.3845464134388196e-05, + "loss": 0.3266, + "step": 5287000 + }, + { + "epoch": 3.17, + "learning_rate": 3.384336836875875e-05, + "loss": 0.3396, + "step": 5287500 + }, + { + "epoch": 3.17, + "learning_rate": 3.384126840319818e-05, + "loss": 0.339, + "step": 5288000 + }, + { + "epoch": 3.17, + "learning_rate": 3.383916843763762e-05, + "loss": 0.3365, + "step": 5288500 + }, + { + "epoch": 3.17, + "learning_rate": 3.3837068472077056e-05, + "loss": 0.3351, + "step": 5289000 + }, + { + "epoch": 3.17, + "learning_rate": 3.383496850651649e-05, + "loss": 0.3315, + "step": 5289500 + }, + { + "epoch": 3.17, + "learning_rate": 3.383286854095593e-05, + "loss": 0.3315, + "step": 5290000 + }, + { + "epoch": 3.17, + "learning_rate": 3.3830772775326484e-05, + "loss": 0.339, + "step": 5290500 + }, + { + "epoch": 3.17, + "learning_rate": 3.382867280976592e-05, + "loss": 0.337, + "step": 5291000 + }, + { + "epoch": 3.17, + "learning_rate": 3.382657284420536e-05, + "loss": 0.3368, + "step": 5291500 + }, + { + "epoch": 3.17, + "learning_rate": 3.382447287864479e-05, + "loss": 0.3273, + "step": 5292000 + }, + { + "epoch": 3.17, + "learning_rate": 3.3822377113015344e-05, + "loss": 0.3259, + "step": 5292500 + }, + { + "epoch": 3.17, + "learning_rate": 3.382027714745478e-05, + "loss": 0.3309, + "step": 5293000 + }, + { + "epoch": 3.17, + "learning_rate": 3.381817718189422e-05, + "loss": 0.3311, + "step": 5293500 + }, + { + "epoch": 3.17, + "learning_rate": 3.381607721633365e-05, + "loss": 0.3211, + "step": 5294000 + }, + { + "epoch": 3.17, + "learning_rate": 3.3813977250773085e-05, + "loss": 0.3333, + "step": 5294500 + }, + { + "epoch": 3.17, + "learning_rate": 3.3811877285212525e-05, + "loss": 0.3257, + "step": 5295000 + }, + { + "epoch": 3.17, + "learning_rate": 3.380977731965196e-05, + "loss": 0.3305, + "step": 5295500 + }, + { + "epoch": 3.18, + "learning_rate": 3.380767735409139e-05, + "loss": 0.33, + "step": 5296000 + }, + { + "epoch": 3.18, + "learning_rate": 3.380557738853083e-05, + "loss": 0.3258, + "step": 5296500 + }, + { + "epoch": 3.18, + "learning_rate": 3.380347742297026e-05, + "loss": 0.3277, + "step": 5297000 + }, + { + "epoch": 3.18, + "learning_rate": 3.38013774574097e-05, + "loss": 0.3259, + "step": 5297500 + }, + { + "epoch": 3.18, + "learning_rate": 3.379927749184913e-05, + "loss": 0.3279, + "step": 5298000 + }, + { + "epoch": 3.18, + "learning_rate": 3.379718172621969e-05, + "loss": 0.3259, + "step": 5298500 + }, + { + "epoch": 3.18, + "learning_rate": 3.3795085960590247e-05, + "loss": 0.3303, + "step": 5299000 + }, + { + "epoch": 3.18, + "learning_rate": 3.37929901949608e-05, + "loss": 0.3234, + "step": 5299500 + }, + { + "epoch": 3.18, + "learning_rate": 3.3790890229400234e-05, + "loss": 0.327, + "step": 5300000 + }, + { + "epoch": 3.18, + "eval_loss": 0.3255656063556671, + "eval_runtime": 1119.0379, + "eval_samples_per_second": 470.69, + "eval_steps_per_second": 78.449, + "step": 5300000 + }, + { + "epoch": 3.18, + "learning_rate": 3.3788790263839674e-05, + "loss": 0.3288, + "step": 5300500 + }, + { + "epoch": 3.18, + "learning_rate": 3.378669029827911e-05, + "loss": 0.3356, + "step": 5301000 + }, + { + "epoch": 3.18, + "learning_rate": 3.378459033271854e-05, + "loss": 0.3257, + "step": 5301500 + }, + { + "epoch": 3.18, + "learning_rate": 3.378249036715798e-05, + "loss": 0.3322, + "step": 5302000 + }, + { + "epoch": 3.18, + "learning_rate": 3.3780390401597414e-05, + "loss": 0.3446, + "step": 5302500 + }, + { + "epoch": 3.18, + "learning_rate": 3.377829043603685e-05, + "loss": 0.3363, + "step": 5303000 + }, + { + "epoch": 3.18, + "learning_rate": 3.377619467040741e-05, + "loss": 0.3346, + "step": 5303500 + }, + { + "epoch": 3.18, + "learning_rate": 3.377409470484684e-05, + "loss": 0.3385, + "step": 5304000 + }, + { + "epoch": 3.18, + "learning_rate": 3.3771994739286275e-05, + "loss": 0.3288, + "step": 5304500 + }, + { + "epoch": 3.18, + "learning_rate": 3.3769894773725715e-05, + "loss": 0.3313, + "step": 5305000 + }, + { + "epoch": 3.18, + "learning_rate": 3.376779480816515e-05, + "loss": 0.3246, + "step": 5305500 + }, + { + "epoch": 3.18, + "learning_rate": 3.376569484260458e-05, + "loss": 0.3286, + "step": 5306000 + }, + { + "epoch": 3.18, + "learning_rate": 3.3763599076975136e-05, + "loss": 0.3325, + "step": 5306500 + }, + { + "epoch": 3.18, + "learning_rate": 3.3761499111414576e-05, + "loss": 0.3232, + "step": 5307000 + }, + { + "epoch": 3.18, + "learning_rate": 3.375939914585401e-05, + "loss": 0.3323, + "step": 5307500 + }, + { + "epoch": 3.18, + "learning_rate": 3.375729918029344e-05, + "loss": 0.3287, + "step": 5308000 + }, + { + "epoch": 3.18, + "learning_rate": 3.375519921473288e-05, + "loss": 0.3333, + "step": 5308500 + }, + { + "epoch": 3.18, + "learning_rate": 3.375309924917231e-05, + "loss": 0.3301, + "step": 5309000 + }, + { + "epoch": 3.18, + "learning_rate": 3.3750999283611743e-05, + "loss": 0.321, + "step": 5309500 + }, + { + "epoch": 3.18, + "learning_rate": 3.3748903517982304e-05, + "loss": 0.3358, + "step": 5310000 + }, + { + "epoch": 3.18, + "learning_rate": 3.3746807752352864e-05, + "loss": 0.3362, + "step": 5310500 + }, + { + "epoch": 3.18, + "learning_rate": 3.37447077867923e-05, + "loss": 0.3252, + "step": 5311000 + }, + { + "epoch": 3.18, + "learning_rate": 3.374260782123173e-05, + "loss": 0.3305, + "step": 5311500 + }, + { + "epoch": 3.18, + "learning_rate": 3.374050785567117e-05, + "loss": 0.3309, + "step": 5312000 + }, + { + "epoch": 3.19, + "learning_rate": 3.3738407890110605e-05, + "loss": 0.3313, + "step": 5312500 + }, + { + "epoch": 3.19, + "learning_rate": 3.373630792455004e-05, + "loss": 0.3358, + "step": 5313000 + }, + { + "epoch": 3.19, + "learning_rate": 3.373420795898947e-05, + "loss": 0.3374, + "step": 5313500 + }, + { + "epoch": 3.19, + "learning_rate": 3.3732107993428905e-05, + "loss": 0.3291, + "step": 5314000 + }, + { + "epoch": 3.19, + "learning_rate": 3.373000802786834e-05, + "loss": 0.3289, + "step": 5314500 + }, + { + "epoch": 3.19, + "learning_rate": 3.372790806230778e-05, + "loss": 0.3316, + "step": 5315000 + }, + { + "epoch": 3.19, + "learning_rate": 3.372580809674721e-05, + "loss": 0.3352, + "step": 5315500 + }, + { + "epoch": 3.19, + "learning_rate": 3.3723708131186646e-05, + "loss": 0.3296, + "step": 5316000 + }, + { + "epoch": 3.19, + "learning_rate": 3.3721616565488326e-05, + "loss": 0.3333, + "step": 5316500 + }, + { + "epoch": 3.19, + "learning_rate": 3.371951659992776e-05, + "loss": 0.3302, + "step": 5317000 + }, + { + "epoch": 3.19, + "learning_rate": 3.37174166343672e-05, + "loss": 0.3355, + "step": 5317500 + }, + { + "epoch": 3.19, + "learning_rate": 3.371531666880663e-05, + "loss": 0.3383, + "step": 5318000 + }, + { + "epoch": 3.19, + "learning_rate": 3.371321670324607e-05, + "loss": 0.3266, + "step": 5318500 + }, + { + "epoch": 3.19, + "learning_rate": 3.37111167376855e-05, + "loss": 0.3336, + "step": 5319000 + }, + { + "epoch": 3.19, + "learning_rate": 3.3709016772124934e-05, + "loss": 0.3208, + "step": 5319500 + }, + { + "epoch": 3.19, + "learning_rate": 3.3706921006495494e-05, + "loss": 0.3331, + "step": 5320000 + }, + { + "epoch": 3.19, + "learning_rate": 3.370482104093493e-05, + "loss": 0.3271, + "step": 5320500 + }, + { + "epoch": 3.19, + "learning_rate": 3.370272107537436e-05, + "loss": 0.3322, + "step": 5321000 + }, + { + "epoch": 3.19, + "learning_rate": 3.3700621109813794e-05, + "loss": 0.3195, + "step": 5321500 + }, + { + "epoch": 3.19, + "learning_rate": 3.3698521144253235e-05, + "loss": 0.3278, + "step": 5322000 + }, + { + "epoch": 3.19, + "learning_rate": 3.369642117869267e-05, + "loss": 0.3322, + "step": 5322500 + }, + { + "epoch": 3.19, + "learning_rate": 3.36943212131321e-05, + "loss": 0.3291, + "step": 5323000 + }, + { + "epoch": 3.19, + "learning_rate": 3.369222124757154e-05, + "loss": 0.3403, + "step": 5323500 + }, + { + "epoch": 3.19, + "learning_rate": 3.3690129681873216e-05, + "loss": 0.328, + "step": 5324000 + }, + { + "epoch": 3.19, + "learning_rate": 3.3688029716312656e-05, + "loss": 0.3271, + "step": 5324500 + }, + { + "epoch": 3.19, + "learning_rate": 3.368592975075209e-05, + "loss": 0.3302, + "step": 5325000 + }, + { + "epoch": 3.19, + "learning_rate": 3.368382978519152e-05, + "loss": 0.3392, + "step": 5325500 + }, + { + "epoch": 3.19, + "learning_rate": 3.3681729819630956e-05, + "loss": 0.328, + "step": 5326000 + }, + { + "epoch": 3.19, + "learning_rate": 3.367962985407039e-05, + "loss": 0.3235, + "step": 5326500 + }, + { + "epoch": 3.19, + "learning_rate": 3.367752988850983e-05, + "loss": 0.3353, + "step": 5327000 + }, + { + "epoch": 3.19, + "learning_rate": 3.367542992294926e-05, + "loss": 0.3303, + "step": 5327500 + }, + { + "epoch": 3.19, + "learning_rate": 3.367333415731982e-05, + "loss": 0.3335, + "step": 5328000 + }, + { + "epoch": 3.19, + "learning_rate": 3.367123419175925e-05, + "loss": 0.3288, + "step": 5328500 + }, + { + "epoch": 3.19, + "learning_rate": 3.366913422619869e-05, + "loss": 0.3275, + "step": 5329000 + }, + { + "epoch": 3.2, + "learning_rate": 3.3667034260638124e-05, + "loss": 0.3224, + "step": 5329500 + }, + { + "epoch": 3.2, + "learning_rate": 3.3664938495008684e-05, + "loss": 0.3289, + "step": 5330000 + }, + { + "epoch": 3.2, + "learning_rate": 3.366283852944811e-05, + "loss": 0.3244, + "step": 5330500 + }, + { + "epoch": 3.2, + "learning_rate": 3.366073856388755e-05, + "loss": 0.3407, + "step": 5331000 + }, + { + "epoch": 3.2, + "learning_rate": 3.3658638598326985e-05, + "loss": 0.3283, + "step": 5331500 + }, + { + "epoch": 3.2, + "learning_rate": 3.3656542832697545e-05, + "loss": 0.3404, + "step": 5332000 + }, + { + "epoch": 3.2, + "learning_rate": 3.365444286713698e-05, + "loss": 0.3342, + "step": 5332500 + }, + { + "epoch": 3.2, + "learning_rate": 3.365234290157641e-05, + "loss": 0.3336, + "step": 5333000 + }, + { + "epoch": 3.2, + "learning_rate": 3.3650242936015845e-05, + "loss": 0.3335, + "step": 5333500 + }, + { + "epoch": 3.2, + "learning_rate": 3.3648142970455286e-05, + "loss": 0.3373, + "step": 5334000 + }, + { + "epoch": 3.2, + "learning_rate": 3.3646047204825846e-05, + "loss": 0.3358, + "step": 5334500 + }, + { + "epoch": 3.2, + "learning_rate": 3.364394723926527e-05, + "loss": 0.3249, + "step": 5335000 + }, + { + "epoch": 3.2, + "learning_rate": 3.3641847273704706e-05, + "loss": 0.337, + "step": 5335500 + }, + { + "epoch": 3.2, + "learning_rate": 3.3639747308144146e-05, + "loss": 0.3243, + "step": 5336000 + }, + { + "epoch": 3.2, + "learning_rate": 3.363764734258358e-05, + "loss": 0.3266, + "step": 5336500 + }, + { + "epoch": 3.2, + "learning_rate": 3.363555157695414e-05, + "loss": 0.3352, + "step": 5337000 + }, + { + "epoch": 3.2, + "learning_rate": 3.363345161139357e-05, + "loss": 0.3274, + "step": 5337500 + }, + { + "epoch": 3.2, + "learning_rate": 3.363135164583301e-05, + "loss": 0.3323, + "step": 5338000 + }, + { + "epoch": 3.2, + "learning_rate": 3.362925168027244e-05, + "loss": 0.327, + "step": 5338500 + }, + { + "epoch": 3.2, + "learning_rate": 3.3627151714711874e-05, + "loss": 0.3241, + "step": 5339000 + }, + { + "epoch": 3.2, + "learning_rate": 3.3625051749151314e-05, + "loss": 0.325, + "step": 5339500 + }, + { + "epoch": 3.2, + "learning_rate": 3.362295178359075e-05, + "loss": 0.3324, + "step": 5340000 + }, + { + "epoch": 3.2, + "learning_rate": 3.362085181803018e-05, + "loss": 0.34, + "step": 5340500 + }, + { + "epoch": 3.2, + "learning_rate": 3.361875605240074e-05, + "loss": 0.3274, + "step": 5341000 + }, + { + "epoch": 3.2, + "learning_rate": 3.3616656086840175e-05, + "loss": 0.3349, + "step": 5341500 + }, + { + "epoch": 3.2, + "learning_rate": 3.361455612127961e-05, + "loss": 0.3326, + "step": 5342000 + }, + { + "epoch": 3.2, + "learning_rate": 3.361245615571905e-05, + "loss": 0.3364, + "step": 5342500 + }, + { + "epoch": 3.2, + "learning_rate": 3.36103603900896e-05, + "loss": 0.3304, + "step": 5343000 + }, + { + "epoch": 3.2, + "learning_rate": 3.3608260424529036e-05, + "loss": 0.3342, + "step": 5343500 + }, + { + "epoch": 3.2, + "learning_rate": 3.360616045896847e-05, + "loss": 0.3195, + "step": 5344000 + }, + { + "epoch": 3.2, + "learning_rate": 3.360406049340791e-05, + "loss": 0.3245, + "step": 5344500 + }, + { + "epoch": 3.2, + "learning_rate": 3.360196472777846e-05, + "loss": 0.3346, + "step": 5345000 + }, + { + "epoch": 3.2, + "learning_rate": 3.3599864762217897e-05, + "loss": 0.3311, + "step": 5345500 + }, + { + "epoch": 3.21, + "learning_rate": 3.359776899658846e-05, + "loss": 0.3318, + "step": 5346000 + }, + { + "epoch": 3.21, + "learning_rate": 3.359566903102789e-05, + "loss": 0.336, + "step": 5346500 + }, + { + "epoch": 3.21, + "learning_rate": 3.3593569065467324e-05, + "loss": 0.334, + "step": 5347000 + }, + { + "epoch": 3.21, + "learning_rate": 3.359146909990676e-05, + "loss": 0.3343, + "step": 5347500 + }, + { + "epoch": 3.21, + "learning_rate": 3.35893691343462e-05, + "loss": 0.3245, + "step": 5348000 + }, + { + "epoch": 3.21, + "learning_rate": 3.358726916878563e-05, + "loss": 0.3308, + "step": 5348500 + }, + { + "epoch": 3.21, + "learning_rate": 3.358517340315619e-05, + "loss": 0.3321, + "step": 5349000 + }, + { + "epoch": 3.21, + "learning_rate": 3.358307343759562e-05, + "loss": 0.3305, + "step": 5349500 + }, + { + "epoch": 3.21, + "learning_rate": 3.358097347203506e-05, + "loss": 0.3405, + "step": 5350000 + }, + { + "epoch": 3.21, + "learning_rate": 3.357887350647449e-05, + "loss": 0.328, + "step": 5350500 + }, + { + "epoch": 3.21, + "learning_rate": 3.357677774084505e-05, + "loss": 0.3346, + "step": 5351000 + }, + { + "epoch": 3.21, + "learning_rate": 3.357467777528448e-05, + "loss": 0.3245, + "step": 5351500 + }, + { + "epoch": 3.21, + "learning_rate": 3.357257780972392e-05, + "loss": 0.3298, + "step": 5352000 + }, + { + "epoch": 3.21, + "learning_rate": 3.357047784416335e-05, + "loss": 0.3371, + "step": 5352500 + }, + { + "epoch": 3.21, + "learning_rate": 3.3568377878602786e-05, + "loss": 0.3334, + "step": 5353000 + }, + { + "epoch": 3.21, + "learning_rate": 3.3566277913042226e-05, + "loss": 0.3325, + "step": 5353500 + }, + { + "epoch": 3.21, + "learning_rate": 3.356417794748166e-05, + "loss": 0.3301, + "step": 5354000 + }, + { + "epoch": 3.21, + "learning_rate": 3.356208218185221e-05, + "loss": 0.3296, + "step": 5354500 + }, + { + "epoch": 3.21, + "learning_rate": 3.355998221629165e-05, + "loss": 0.3348, + "step": 5355000 + }, + { + "epoch": 3.21, + "learning_rate": 3.355788225073109e-05, + "loss": 0.3352, + "step": 5355500 + }, + { + "epoch": 3.21, + "learning_rate": 3.355578228517052e-05, + "loss": 0.3368, + "step": 5356000 + }, + { + "epoch": 3.21, + "learning_rate": 3.355368231960996e-05, + "loss": 0.3301, + "step": 5356500 + }, + { + "epoch": 3.21, + "learning_rate": 3.3551582354049394e-05, + "loss": 0.3291, + "step": 5357000 + }, + { + "epoch": 3.21, + "learning_rate": 3.354948658841995e-05, + "loss": 0.3359, + "step": 5357500 + }, + { + "epoch": 3.21, + "learning_rate": 3.354738662285938e-05, + "loss": 0.3297, + "step": 5358000 + }, + { + "epoch": 3.21, + "learning_rate": 3.354528665729882e-05, + "loss": 0.3269, + "step": 5358500 + }, + { + "epoch": 3.21, + "learning_rate": 3.3543186691738255e-05, + "loss": 0.3233, + "step": 5359000 + }, + { + "epoch": 3.21, + "learning_rate": 3.354108672617769e-05, + "loss": 0.3318, + "step": 5359500 + }, + { + "epoch": 3.21, + "learning_rate": 3.353899096054824e-05, + "loss": 0.3294, + "step": 5360000 + }, + { + "epoch": 3.21, + "learning_rate": 3.353689099498768e-05, + "loss": 0.3308, + "step": 5360500 + }, + { + "epoch": 3.21, + "learning_rate": 3.3534791029427115e-05, + "loss": 0.3305, + "step": 5361000 + }, + { + "epoch": 3.21, + "learning_rate": 3.353269106386655e-05, + "loss": 0.3331, + "step": 5361500 + }, + { + "epoch": 3.21, + "learning_rate": 3.353059109830599e-05, + "loss": 0.3263, + "step": 5362000 + }, + { + "epoch": 3.22, + "learning_rate": 3.352849113274542e-05, + "loss": 0.3291, + "step": 5362500 + }, + { + "epoch": 3.22, + "learning_rate": 3.352639116718486e-05, + "loss": 0.3283, + "step": 5363000 + }, + { + "epoch": 3.22, + "learning_rate": 3.3524291201624296e-05, + "loss": 0.3364, + "step": 5363500 + }, + { + "epoch": 3.22, + "learning_rate": 3.352219543599485e-05, + "loss": 0.3343, + "step": 5364000 + }, + { + "epoch": 3.22, + "learning_rate": 3.352009547043428e-05, + "loss": 0.3454, + "step": 5364500 + }, + { + "epoch": 3.22, + "learning_rate": 3.3517995504873724e-05, + "loss": 0.3351, + "step": 5365000 + }, + { + "epoch": 3.22, + "learning_rate": 3.351589553931316e-05, + "loss": 0.3373, + "step": 5365500 + }, + { + "epoch": 3.22, + "learning_rate": 3.351379977368371e-05, + "loss": 0.3321, + "step": 5366000 + }, + { + "epoch": 3.22, + "learning_rate": 3.3511699808123144e-05, + "loss": 0.3309, + "step": 5366500 + }, + { + "epoch": 3.22, + "learning_rate": 3.3509599842562584e-05, + "loss": 0.3256, + "step": 5367000 + }, + { + "epoch": 3.22, + "learning_rate": 3.350749987700202e-05, + "loss": 0.3307, + "step": 5367500 + }, + { + "epoch": 3.22, + "learning_rate": 3.35054083113037e-05, + "loss": 0.32, + "step": 5368000 + }, + { + "epoch": 3.22, + "learning_rate": 3.3503308345743125e-05, + "loss": 0.3467, + "step": 5368500 + }, + { + "epoch": 3.22, + "learning_rate": 3.3501208380182565e-05, + "loss": 0.3279, + "step": 5369000 + }, + { + "epoch": 3.22, + "learning_rate": 3.3499108414622e-05, + "loss": 0.3271, + "step": 5369500 + }, + { + "epoch": 3.22, + "learning_rate": 3.349700844906143e-05, + "loss": 0.3334, + "step": 5370000 + }, + { + "epoch": 3.22, + "learning_rate": 3.349490848350087e-05, + "loss": 0.3407, + "step": 5370500 + }, + { + "epoch": 3.22, + "learning_rate": 3.3492812717871426e-05, + "loss": 0.3207, + "step": 5371000 + }, + { + "epoch": 3.22, + "learning_rate": 3.349071275231086e-05, + "loss": 0.3358, + "step": 5371500 + }, + { + "epoch": 3.22, + "learning_rate": 3.348861278675029e-05, + "loss": 0.3373, + "step": 5372000 + }, + { + "epoch": 3.22, + "learning_rate": 3.348651282118973e-05, + "loss": 0.3364, + "step": 5372500 + }, + { + "epoch": 3.22, + "learning_rate": 3.3484412855629166e-05, + "loss": 0.33, + "step": 5373000 + }, + { + "epoch": 3.22, + "learning_rate": 3.348231708999972e-05, + "loss": 0.335, + "step": 5373500 + }, + { + "epoch": 3.22, + "learning_rate": 3.3480217124439153e-05, + "loss": 0.3181, + "step": 5374000 + }, + { + "epoch": 3.22, + "learning_rate": 3.3478117158878594e-05, + "loss": 0.3365, + "step": 5374500 + }, + { + "epoch": 3.22, + "learning_rate": 3.347601719331803e-05, + "loss": 0.3245, + "step": 5375000 + }, + { + "epoch": 3.22, + "learning_rate": 3.347391722775747e-05, + "loss": 0.3332, + "step": 5375500 + }, + { + "epoch": 3.22, + "learning_rate": 3.347182146212802e-05, + "loss": 0.3301, + "step": 5376000 + }, + { + "epoch": 3.22, + "learning_rate": 3.3469721496567454e-05, + "loss": 0.3252, + "step": 5376500 + }, + { + "epoch": 3.22, + "learning_rate": 3.346762153100689e-05, + "loss": 0.3327, + "step": 5377000 + }, + { + "epoch": 3.22, + "learning_rate": 3.346552156544633e-05, + "loss": 0.3397, + "step": 5377500 + }, + { + "epoch": 3.22, + "learning_rate": 3.346342159988576e-05, + "loss": 0.3341, + "step": 5378000 + }, + { + "epoch": 3.22, + "learning_rate": 3.3461325834256315e-05, + "loss": 0.3224, + "step": 5378500 + }, + { + "epoch": 3.22, + "learning_rate": 3.345922586869575e-05, + "loss": 0.334, + "step": 5379000 + }, + { + "epoch": 3.23, + "learning_rate": 3.345712590313519e-05, + "loss": 0.3196, + "step": 5379500 + }, + { + "epoch": 3.23, + "learning_rate": 3.345502593757462e-05, + "loss": 0.3314, + "step": 5380000 + }, + { + "epoch": 3.23, + "learning_rate": 3.3452925972014056e-05, + "loss": 0.3372, + "step": 5380500 + }, + { + "epoch": 3.23, + "learning_rate": 3.3450826006453496e-05, + "loss": 0.3265, + "step": 5381000 + }, + { + "epoch": 3.23, + "learning_rate": 3.344872604089293e-05, + "loss": 0.3282, + "step": 5381500 + }, + { + "epoch": 3.23, + "learning_rate": 3.344662607533236e-05, + "loss": 0.3345, + "step": 5382000 + }, + { + "epoch": 3.23, + "learning_rate": 3.344453030970292e-05, + "loss": 0.3298, + "step": 5382500 + }, + { + "epoch": 3.23, + "learning_rate": 3.344243034414236e-05, + "loss": 0.3254, + "step": 5383000 + }, + { + "epoch": 3.23, + "learning_rate": 3.344033457851291e-05, + "loss": 0.3327, + "step": 5383500 + }, + { + "epoch": 3.23, + "learning_rate": 3.3438234612952344e-05, + "loss": 0.3359, + "step": 5384000 + }, + { + "epoch": 3.23, + "learning_rate": 3.3436134647391784e-05, + "loss": 0.3337, + "step": 5384500 + }, + { + "epoch": 3.23, + "learning_rate": 3.343403468183122e-05, + "loss": 0.3269, + "step": 5385000 + }, + { + "epoch": 3.23, + "learning_rate": 3.343193471627065e-05, + "loss": 0.3333, + "step": 5385500 + }, + { + "epoch": 3.23, + "learning_rate": 3.3429838950641205e-05, + "loss": 0.3336, + "step": 5386000 + }, + { + "epoch": 3.23, + "learning_rate": 3.3427738985080645e-05, + "loss": 0.3239, + "step": 5386500 + }, + { + "epoch": 3.23, + "learning_rate": 3.342563901952008e-05, + "loss": 0.3326, + "step": 5387000 + }, + { + "epoch": 3.23, + "learning_rate": 3.342353905395951e-05, + "loss": 0.3465, + "step": 5387500 + }, + { + "epoch": 3.23, + "learning_rate": 3.342143908839895e-05, + "loss": 0.3255, + "step": 5388000 + }, + { + "epoch": 3.23, + "learning_rate": 3.3419343322769505e-05, + "loss": 0.3323, + "step": 5388500 + }, + { + "epoch": 3.23, + "learning_rate": 3.341724335720894e-05, + "loss": 0.3359, + "step": 5389000 + }, + { + "epoch": 3.23, + "learning_rate": 3.341514339164838e-05, + "loss": 0.3255, + "step": 5389500 + }, + { + "epoch": 3.23, + "learning_rate": 3.341304342608781e-05, + "loss": 0.3367, + "step": 5390000 + }, + { + "epoch": 3.23, + "learning_rate": 3.3410943460527246e-05, + "loss": 0.3263, + "step": 5390500 + }, + { + "epoch": 3.23, + "learning_rate": 3.3408843494966686e-05, + "loss": 0.3353, + "step": 5391000 + }, + { + "epoch": 3.23, + "learning_rate": 3.340674352940612e-05, + "loss": 0.3366, + "step": 5391500 + }, + { + "epoch": 3.23, + "learning_rate": 3.340464776377667e-05, + "loss": 0.3292, + "step": 5392000 + }, + { + "epoch": 3.23, + "learning_rate": 3.340254779821611e-05, + "loss": 0.3346, + "step": 5392500 + }, + { + "epoch": 3.23, + "learning_rate": 3.340044783265555e-05, + "loss": 0.3291, + "step": 5393000 + }, + { + "epoch": 3.23, + "learning_rate": 3.339834786709498e-05, + "loss": 0.3333, + "step": 5393500 + }, + { + "epoch": 3.23, + "learning_rate": 3.3396247901534414e-05, + "loss": 0.3406, + "step": 5394000 + }, + { + "epoch": 3.23, + "learning_rate": 3.339415213590497e-05, + "loss": 0.3308, + "step": 5394500 + }, + { + "epoch": 3.23, + "learning_rate": 3.339205217034441e-05, + "loss": 0.3247, + "step": 5395000 + }, + { + "epoch": 3.23, + "learning_rate": 3.338995220478384e-05, + "loss": 0.3274, + "step": 5395500 + }, + { + "epoch": 3.24, + "learning_rate": 3.3387852239223275e-05, + "loss": 0.3332, + "step": 5396000 + }, + { + "epoch": 3.24, + "learning_rate": 3.3385752273662715e-05, + "loss": 0.3318, + "step": 5396500 + }, + { + "epoch": 3.24, + "learning_rate": 3.338365230810215e-05, + "loss": 0.3252, + "step": 5397000 + }, + { + "epoch": 3.24, + "learning_rate": 3.338155234254158e-05, + "loss": 0.3345, + "step": 5397500 + }, + { + "epoch": 3.24, + "learning_rate": 3.3379452376981015e-05, + "loss": 0.3324, + "step": 5398000 + }, + { + "epoch": 3.24, + "learning_rate": 3.3377356611351576e-05, + "loss": 0.3354, + "step": 5398500 + }, + { + "epoch": 3.24, + "learning_rate": 3.337526084572213e-05, + "loss": 0.3307, + "step": 5399000 + }, + { + "epoch": 3.24, + "learning_rate": 3.337316088016156e-05, + "loss": 0.3386, + "step": 5399500 + }, + { + "epoch": 3.24, + "learning_rate": 3.3371060914601e-05, + "loss": 0.3299, + "step": 5400000 + }, + { + "epoch": 3.24, + "eval_loss": 0.32459577918052673, + "eval_runtime": 1114.6554, + "eval_samples_per_second": 472.541, + "eval_steps_per_second": 78.757, + "step": 5400000 + }, + { + "epoch": 3.24, + "learning_rate": 3.3368960949040436e-05, + "loss": 0.3301, + "step": 5400500 + }, + { + "epoch": 3.24, + "learning_rate": 3.336686098347987e-05, + "loss": 0.3289, + "step": 5401000 + }, + { + "epoch": 3.24, + "learning_rate": 3.336476101791931e-05, + "loss": 0.3331, + "step": 5401500 + }, + { + "epoch": 3.24, + "learning_rate": 3.3362661052358744e-05, + "loss": 0.3413, + "step": 5402000 + }, + { + "epoch": 3.24, + "learning_rate": 3.336056108679817e-05, + "loss": 0.3335, + "step": 5402500 + }, + { + "epoch": 3.24, + "learning_rate": 3.335846532116873e-05, + "loss": 0.3361, + "step": 5403000 + }, + { + "epoch": 3.24, + "learning_rate": 3.335636535560817e-05, + "loss": 0.3364, + "step": 5403500 + }, + { + "epoch": 3.24, + "learning_rate": 3.3354265390047604e-05, + "loss": 0.3306, + "step": 5404000 + }, + { + "epoch": 3.24, + "learning_rate": 3.335216542448704e-05, + "loss": 0.3376, + "step": 5404500 + }, + { + "epoch": 3.24, + "learning_rate": 3.335006545892647e-05, + "loss": 0.3378, + "step": 5405000 + }, + { + "epoch": 3.24, + "learning_rate": 3.3347965493365905e-05, + "loss": 0.3262, + "step": 5405500 + }, + { + "epoch": 3.24, + "learning_rate": 3.3345865527805345e-05, + "loss": 0.3259, + "step": 5406000 + }, + { + "epoch": 3.24, + "learning_rate": 3.334376556224478e-05, + "loss": 0.3379, + "step": 5406500 + }, + { + "epoch": 3.24, + "learning_rate": 3.334167399654646e-05, + "loss": 0.3302, + "step": 5407000 + }, + { + "epoch": 3.24, + "learning_rate": 3.333957403098589e-05, + "loss": 0.3287, + "step": 5407500 + }, + { + "epoch": 3.24, + "learning_rate": 3.3337474065425326e-05, + "loss": 0.3277, + "step": 5408000 + }, + { + "epoch": 3.24, + "learning_rate": 3.3335374099864766e-05, + "loss": 0.3225, + "step": 5408500 + }, + { + "epoch": 3.24, + "learning_rate": 3.33332741343042e-05, + "loss": 0.3294, + "step": 5409000 + }, + { + "epoch": 3.24, + "learning_rate": 3.333118256860587e-05, + "loss": 0.3317, + "step": 5409500 + }, + { + "epoch": 3.24, + "learning_rate": 3.3329091002907554e-05, + "loss": 0.3266, + "step": 5410000 + }, + { + "epoch": 3.24, + "learning_rate": 3.332699103734699e-05, + "loss": 0.3337, + "step": 5410500 + }, + { + "epoch": 3.24, + "learning_rate": 3.332489107178642e-05, + "loss": 0.3325, + "step": 5411000 + }, + { + "epoch": 3.24, + "learning_rate": 3.332279110622586e-05, + "loss": 0.3286, + "step": 5411500 + }, + { + "epoch": 3.24, + "learning_rate": 3.3320691140665294e-05, + "loss": 0.3303, + "step": 5412000 + }, + { + "epoch": 3.25, + "learning_rate": 3.331859117510473e-05, + "loss": 0.3283, + "step": 5412500 + }, + { + "epoch": 3.25, + "learning_rate": 3.331649120954417e-05, + "loss": 0.3266, + "step": 5413000 + }, + { + "epoch": 3.25, + "learning_rate": 3.3314391243983595e-05, + "loss": 0.3201, + "step": 5413500 + }, + { + "epoch": 3.25, + "learning_rate": 3.331229127842303e-05, + "loss": 0.3424, + "step": 5414000 + }, + { + "epoch": 3.25, + "learning_rate": 3.331019131286247e-05, + "loss": 0.3308, + "step": 5414500 + }, + { + "epoch": 3.25, + "learning_rate": 3.33080913473019e-05, + "loss": 0.3263, + "step": 5415000 + }, + { + "epoch": 3.25, + "learning_rate": 3.3305991381741335e-05, + "loss": 0.3429, + "step": 5415500 + }, + { + "epoch": 3.25, + "learning_rate": 3.3303891416180775e-05, + "loss": 0.3258, + "step": 5416000 + }, + { + "epoch": 3.25, + "learning_rate": 3.330179145062021e-05, + "loss": 0.337, + "step": 5416500 + }, + { + "epoch": 3.25, + "learning_rate": 3.329969568499076e-05, + "loss": 0.3346, + "step": 5417000 + }, + { + "epoch": 3.25, + "learning_rate": 3.32975957194302e-05, + "loss": 0.3366, + "step": 5417500 + }, + { + "epoch": 3.25, + "learning_rate": 3.3295495753869636e-05, + "loss": 0.3324, + "step": 5418000 + }, + { + "epoch": 3.25, + "learning_rate": 3.329339578830907e-05, + "loss": 0.335, + "step": 5418500 + }, + { + "epoch": 3.25, + "learning_rate": 3.329129582274851e-05, + "loss": 0.3285, + "step": 5419000 + }, + { + "epoch": 3.25, + "learning_rate": 3.328919585718794e-05, + "loss": 0.3353, + "step": 5419500 + }, + { + "epoch": 3.25, + "learning_rate": 3.328709589162738e-05, + "loss": 0.3339, + "step": 5420000 + }, + { + "epoch": 3.25, + "learning_rate": 3.328499592606682e-05, + "loss": 0.3187, + "step": 5420500 + }, + { + "epoch": 3.25, + "learning_rate": 3.328289596050625e-05, + "loss": 0.3235, + "step": 5421000 + }, + { + "epoch": 3.25, + "learning_rate": 3.328079599494568e-05, + "loss": 0.3289, + "step": 5421500 + }, + { + "epoch": 3.25, + "learning_rate": 3.327869602938512e-05, + "loss": 0.3313, + "step": 5422000 + }, + { + "epoch": 3.25, + "learning_rate": 3.327659606382455e-05, + "loss": 0.3273, + "step": 5422500 + }, + { + "epoch": 3.25, + "learning_rate": 3.327450449812623e-05, + "loss": 0.3303, + "step": 5423000 + }, + { + "epoch": 3.25, + "learning_rate": 3.3272404532565665e-05, + "loss": 0.3315, + "step": 5423500 + }, + { + "epoch": 3.25, + "learning_rate": 3.32703045670051e-05, + "loss": 0.3303, + "step": 5424000 + }, + { + "epoch": 3.25, + "learning_rate": 3.326820460144454e-05, + "loss": 0.3305, + "step": 5424500 + }, + { + "epoch": 3.25, + "learning_rate": 3.326610883581509e-05, + "loss": 0.3301, + "step": 5425000 + }, + { + "epoch": 3.25, + "learning_rate": 3.3264008870254525e-05, + "loss": 0.3229, + "step": 5425500 + }, + { + "epoch": 3.25, + "learning_rate": 3.3261908904693966e-05, + "loss": 0.3228, + "step": 5426000 + }, + { + "epoch": 3.25, + "learning_rate": 3.32598089391334e-05, + "loss": 0.3247, + "step": 5426500 + }, + { + "epoch": 3.25, + "learning_rate": 3.325770897357283e-05, + "loss": 0.3348, + "step": 5427000 + }, + { + "epoch": 3.25, + "learning_rate": 3.3255613207943386e-05, + "loss": 0.3212, + "step": 5427500 + }, + { + "epoch": 3.25, + "learning_rate": 3.3253513242382826e-05, + "loss": 0.3328, + "step": 5428000 + }, + { + "epoch": 3.25, + "learning_rate": 3.325141327682226e-05, + "loss": 0.332, + "step": 5428500 + }, + { + "epoch": 3.25, + "learning_rate": 3.324931331126169e-05, + "loss": 0.3367, + "step": 5429000 + }, + { + "epoch": 3.26, + "learning_rate": 3.3247213345701134e-05, + "loss": 0.3411, + "step": 5429500 + }, + { + "epoch": 3.26, + "learning_rate": 3.324511338014057e-05, + "loss": 0.3235, + "step": 5430000 + }, + { + "epoch": 3.26, + "learning_rate": 3.324301341458e-05, + "loss": 0.3328, + "step": 5430500 + }, + { + "epoch": 3.26, + "learning_rate": 3.3240917648950554e-05, + "loss": 0.3353, + "step": 5431000 + }, + { + "epoch": 3.26, + "learning_rate": 3.3238821883321114e-05, + "loss": 0.3201, + "step": 5431500 + }, + { + "epoch": 3.26, + "learning_rate": 3.323672191776055e-05, + "loss": 0.3407, + "step": 5432000 + }, + { + "epoch": 3.26, + "learning_rate": 3.323462195219998e-05, + "loss": 0.3331, + "step": 5432500 + }, + { + "epoch": 3.26, + "learning_rate": 3.323252198663942e-05, + "loss": 0.3334, + "step": 5433000 + }, + { + "epoch": 3.26, + "learning_rate": 3.3230422021078855e-05, + "loss": 0.3443, + "step": 5433500 + }, + { + "epoch": 3.26, + "learning_rate": 3.322832205551829e-05, + "loss": 0.3244, + "step": 5434000 + }, + { + "epoch": 3.26, + "learning_rate": 3.322622208995773e-05, + "loss": 0.3389, + "step": 5434500 + }, + { + "epoch": 3.26, + "learning_rate": 3.322412212439716e-05, + "loss": 0.3308, + "step": 5435000 + }, + { + "epoch": 3.26, + "learning_rate": 3.3222022158836596e-05, + "loss": 0.3289, + "step": 5435500 + }, + { + "epoch": 3.26, + "learning_rate": 3.321992639320715e-05, + "loss": 0.3398, + "step": 5436000 + }, + { + "epoch": 3.26, + "learning_rate": 3.321782642764659e-05, + "loss": 0.3292, + "step": 5436500 + }, + { + "epoch": 3.26, + "learning_rate": 3.321572646208602e-05, + "loss": 0.3363, + "step": 5437000 + }, + { + "epoch": 3.26, + "learning_rate": 3.3213626496525456e-05, + "loss": 0.3244, + "step": 5437500 + }, + { + "epoch": 3.26, + "learning_rate": 3.32115265309649e-05, + "loss": 0.3284, + "step": 5438000 + }, + { + "epoch": 3.26, + "learning_rate": 3.320942656540432e-05, + "loss": 0.3333, + "step": 5438500 + }, + { + "epoch": 3.26, + "learning_rate": 3.3207330799774884e-05, + "loss": 0.3381, + "step": 5439000 + }, + { + "epoch": 3.26, + "learning_rate": 3.320523503414544e-05, + "loss": 0.3274, + "step": 5439500 + }, + { + "epoch": 3.26, + "learning_rate": 3.320313506858488e-05, + "loss": 0.3345, + "step": 5440000 + }, + { + "epoch": 3.26, + "learning_rate": 3.320103510302431e-05, + "loss": 0.3269, + "step": 5440500 + }, + { + "epoch": 3.26, + "learning_rate": 3.3198935137463744e-05, + "loss": 0.332, + "step": 5441000 + }, + { + "epoch": 3.26, + "learning_rate": 3.3196835171903185e-05, + "loss": 0.322, + "step": 5441500 + }, + { + "epoch": 3.26, + "learning_rate": 3.319473520634262e-05, + "loss": 0.3359, + "step": 5442000 + }, + { + "epoch": 3.26, + "learning_rate": 3.319263524078205e-05, + "loss": 0.3329, + "step": 5442500 + }, + { + "epoch": 3.26, + "learning_rate": 3.3190535275221485e-05, + "loss": 0.3341, + "step": 5443000 + }, + { + "epoch": 3.26, + "learning_rate": 3.318843530966092e-05, + "loss": 0.3373, + "step": 5443500 + }, + { + "epoch": 3.26, + "learning_rate": 3.318633534410035e-05, + "loss": 0.331, + "step": 5444000 + }, + { + "epoch": 3.26, + "learning_rate": 3.318423537853979e-05, + "loss": 0.3273, + "step": 5444500 + }, + { + "epoch": 3.26, + "learning_rate": 3.3182135412979226e-05, + "loss": 0.3239, + "step": 5445000 + }, + { + "epoch": 3.26, + "learning_rate": 3.318003964734978e-05, + "loss": 0.3296, + "step": 5445500 + }, + { + "epoch": 3.27, + "learning_rate": 3.317794388172034e-05, + "loss": 0.3457, + "step": 5446000 + }, + { + "epoch": 3.27, + "learning_rate": 3.317584391615978e-05, + "loss": 0.3303, + "step": 5446500 + }, + { + "epoch": 3.27, + "learning_rate": 3.317374395059921e-05, + "loss": 0.3238, + "step": 5447000 + }, + { + "epoch": 3.27, + "learning_rate": 3.317164398503865e-05, + "loss": 0.3352, + "step": 5447500 + }, + { + "epoch": 3.27, + "learning_rate": 3.316954401947808e-05, + "loss": 0.3223, + "step": 5448000 + }, + { + "epoch": 3.27, + "learning_rate": 3.3167444053917514e-05, + "loss": 0.3277, + "step": 5448500 + }, + { + "epoch": 3.27, + "learning_rate": 3.3165348288288074e-05, + "loss": 0.3406, + "step": 5449000 + }, + { + "epoch": 3.27, + "learning_rate": 3.316324832272751e-05, + "loss": 0.3253, + "step": 5449500 + }, + { + "epoch": 3.27, + "learning_rate": 3.316114835716694e-05, + "loss": 0.3325, + "step": 5450000 + }, + { + "epoch": 3.27, + "learning_rate": 3.3159048391606374e-05, + "loss": 0.336, + "step": 5450500 + }, + { + "epoch": 3.27, + "learning_rate": 3.315694842604581e-05, + "loss": 0.3299, + "step": 5451000 + }, + { + "epoch": 3.27, + "learning_rate": 3.315484846048525e-05, + "loss": 0.3349, + "step": 5451500 + }, + { + "epoch": 3.27, + "learning_rate": 3.315275269485581e-05, + "loss": 0.3373, + "step": 5452000 + }, + { + "epoch": 3.27, + "learning_rate": 3.3150652729295235e-05, + "loss": 0.3272, + "step": 5452500 + }, + { + "epoch": 3.27, + "learning_rate": 3.3148552763734675e-05, + "loss": 0.3311, + "step": 5453000 + }, + { + "epoch": 3.27, + "learning_rate": 3.314645279817411e-05, + "loss": 0.3255, + "step": 5453500 + }, + { + "epoch": 3.27, + "learning_rate": 3.314435283261354e-05, + "loss": 0.3305, + "step": 5454000 + }, + { + "epoch": 3.27, + "learning_rate": 3.314225286705298e-05, + "loss": 0.3367, + "step": 5454500 + }, + { + "epoch": 3.27, + "learning_rate": 3.3140152901492416e-05, + "loss": 0.3361, + "step": 5455000 + }, + { + "epoch": 3.27, + "learning_rate": 3.313805293593185e-05, + "loss": 0.3357, + "step": 5455500 + }, + { + "epoch": 3.27, + "learning_rate": 3.31359571703024e-05, + "loss": 0.3314, + "step": 5456000 + }, + { + "epoch": 3.27, + "learning_rate": 3.313385720474184e-05, + "loss": 0.3232, + "step": 5456500 + }, + { + "epoch": 3.27, + "learning_rate": 3.313175723918128e-05, + "loss": 0.3308, + "step": 5457000 + }, + { + "epoch": 3.27, + "learning_rate": 3.312965727362071e-05, + "loss": 0.3274, + "step": 5457500 + }, + { + "epoch": 3.27, + "learning_rate": 3.312755730806015e-05, + "loss": 0.3402, + "step": 5458000 + }, + { + "epoch": 3.27, + "learning_rate": 3.3125457342499584e-05, + "loss": 0.3352, + "step": 5458500 + }, + { + "epoch": 3.27, + "learning_rate": 3.312335737693902e-05, + "loss": 0.3397, + "step": 5459000 + }, + { + "epoch": 3.27, + "learning_rate": 3.312126161130957e-05, + "loss": 0.3327, + "step": 5459500 + }, + { + "epoch": 3.27, + "learning_rate": 3.311916164574901e-05, + "loss": 0.3262, + "step": 5460000 + }, + { + "epoch": 3.27, + "learning_rate": 3.3117061680188445e-05, + "loss": 0.3297, + "step": 5460500 + }, + { + "epoch": 3.27, + "learning_rate": 3.311496171462788e-05, + "loss": 0.3322, + "step": 5461000 + }, + { + "epoch": 3.27, + "learning_rate": 3.311286174906732e-05, + "loss": 0.335, + "step": 5461500 + }, + { + "epoch": 3.27, + "learning_rate": 3.311076178350675e-05, + "loss": 0.3365, + "step": 5462000 + }, + { + "epoch": 3.27, + "learning_rate": 3.3108661817946185e-05, + "loss": 0.3239, + "step": 5462500 + }, + { + "epoch": 3.28, + "learning_rate": 3.3106566052316745e-05, + "loss": 0.3297, + "step": 5463000 + }, + { + "epoch": 3.28, + "learning_rate": 3.310446608675618e-05, + "loss": 0.3298, + "step": 5463500 + }, + { + "epoch": 3.28, + "learning_rate": 3.310236612119561e-05, + "loss": 0.3335, + "step": 5464000 + }, + { + "epoch": 3.28, + "learning_rate": 3.310026615563505e-05, + "loss": 0.3271, + "step": 5464500 + }, + { + "epoch": 3.28, + "learning_rate": 3.309816619007448e-05, + "loss": 0.3253, + "step": 5465000 + }, + { + "epoch": 3.28, + "learning_rate": 3.309607042444504e-05, + "loss": 0.3266, + "step": 5465500 + }, + { + "epoch": 3.28, + "learning_rate": 3.309397045888447e-05, + "loss": 0.3261, + "step": 5466000 + }, + { + "epoch": 3.28, + "learning_rate": 3.309187049332391e-05, + "loss": 0.3345, + "step": 5466500 + }, + { + "epoch": 3.28, + "learning_rate": 3.308977052776335e-05, + "loss": 0.3304, + "step": 5467000 + }, + { + "epoch": 3.28, + "learning_rate": 3.3087670562202773e-05, + "loss": 0.3242, + "step": 5467500 + }, + { + "epoch": 3.28, + "learning_rate": 3.3085570596642214e-05, + "loss": 0.3382, + "step": 5468000 + }, + { + "epoch": 3.28, + "learning_rate": 3.308347063108165e-05, + "loss": 0.3355, + "step": 5468500 + }, + { + "epoch": 3.28, + "learning_rate": 3.308137066552108e-05, + "loss": 0.3251, + "step": 5469000 + }, + { + "epoch": 3.28, + "learning_rate": 3.307927489989164e-05, + "loss": 0.3242, + "step": 5469500 + }, + { + "epoch": 3.28, + "learning_rate": 3.3077174934331074e-05, + "loss": 0.3339, + "step": 5470000 + }, + { + "epoch": 3.28, + "learning_rate": 3.3075079168701635e-05, + "loss": 0.3265, + "step": 5470500 + }, + { + "epoch": 3.28, + "learning_rate": 3.307297920314107e-05, + "loss": 0.3304, + "step": 5471000 + }, + { + "epoch": 3.28, + "learning_rate": 3.307087923758051e-05, + "loss": 0.3267, + "step": 5471500 + }, + { + "epoch": 3.28, + "learning_rate": 3.306877927201994e-05, + "loss": 0.3199, + "step": 5472000 + }, + { + "epoch": 3.28, + "learning_rate": 3.306667930645937e-05, + "loss": 0.3325, + "step": 5472500 + }, + { + "epoch": 3.28, + "learning_rate": 3.306458354082993e-05, + "loss": 0.3286, + "step": 5473000 + }, + { + "epoch": 3.28, + "learning_rate": 3.306248357526937e-05, + "loss": 0.3231, + "step": 5473500 + }, + { + "epoch": 3.28, + "learning_rate": 3.30603836097088e-05, + "loss": 0.3307, + "step": 5474000 + }, + { + "epoch": 3.28, + "learning_rate": 3.3058283644148236e-05, + "loss": 0.3367, + "step": 5474500 + }, + { + "epoch": 3.28, + "learning_rate": 3.305618367858767e-05, + "loss": 0.3329, + "step": 5475000 + }, + { + "epoch": 3.28, + "learning_rate": 3.305408791295823e-05, + "loss": 0.335, + "step": 5475500 + }, + { + "epoch": 3.28, + "learning_rate": 3.3051987947397663e-05, + "loss": 0.3337, + "step": 5476000 + }, + { + "epoch": 3.28, + "learning_rate": 3.30498879818371e-05, + "loss": 0.3297, + "step": 5476500 + }, + { + "epoch": 3.28, + "learning_rate": 3.304778801627653e-05, + "loss": 0.3333, + "step": 5477000 + }, + { + "epoch": 3.28, + "learning_rate": 3.3045688050715964e-05, + "loss": 0.3238, + "step": 5477500 + }, + { + "epoch": 3.28, + "learning_rate": 3.3043592285086524e-05, + "loss": 0.3352, + "step": 5478000 + }, + { + "epoch": 3.28, + "learning_rate": 3.3041492319525964e-05, + "loss": 0.3218, + "step": 5478500 + }, + { + "epoch": 3.28, + "learning_rate": 3.30393923539654e-05, + "loss": 0.3238, + "step": 5479000 + }, + { + "epoch": 3.29, + "learning_rate": 3.3037292388404825e-05, + "loss": 0.3335, + "step": 5479500 + }, + { + "epoch": 3.29, + "learning_rate": 3.3035192422844265e-05, + "loss": 0.3225, + "step": 5480000 + }, + { + "epoch": 3.29, + "learning_rate": 3.30330924572837e-05, + "loss": 0.3268, + "step": 5480500 + }, + { + "epoch": 3.29, + "learning_rate": 3.303099669165426e-05, + "loss": 0.3223, + "step": 5481000 + }, + { + "epoch": 3.29, + "learning_rate": 3.302890092602481e-05, + "loss": 0.3283, + "step": 5481500 + }, + { + "epoch": 3.29, + "learning_rate": 3.3026800960464246e-05, + "loss": 0.3302, + "step": 5482000 + }, + { + "epoch": 3.29, + "learning_rate": 3.3024700994903686e-05, + "loss": 0.329, + "step": 5482500 + }, + { + "epoch": 3.29, + "learning_rate": 3.302260102934312e-05, + "loss": 0.3349, + "step": 5483000 + }, + { + "epoch": 3.29, + "learning_rate": 3.302050106378255e-05, + "loss": 0.3319, + "step": 5483500 + }, + { + "epoch": 3.29, + "learning_rate": 3.301840109822199e-05, + "loss": 0.3308, + "step": 5484000 + }, + { + "epoch": 3.29, + "learning_rate": 3.301630113266142e-05, + "loss": 0.3275, + "step": 5484500 + }, + { + "epoch": 3.29, + "learning_rate": 3.301420116710086e-05, + "loss": 0.3346, + "step": 5485000 + }, + { + "epoch": 3.29, + "learning_rate": 3.301210120154029e-05, + "loss": 0.3323, + "step": 5485500 + }, + { + "epoch": 3.29, + "learning_rate": 3.3010005435910854e-05, + "loss": 0.3325, + "step": 5486000 + }, + { + "epoch": 3.29, + "learning_rate": 3.300790547035028e-05, + "loss": 0.3232, + "step": 5486500 + }, + { + "epoch": 3.29, + "learning_rate": 3.300580550478972e-05, + "loss": 0.3218, + "step": 5487000 + }, + { + "epoch": 3.29, + "learning_rate": 3.3003705539229154e-05, + "loss": 0.3357, + "step": 5487500 + }, + { + "epoch": 3.29, + "learning_rate": 3.300160557366859e-05, + "loss": 0.3492, + "step": 5488000 + }, + { + "epoch": 3.29, + "learning_rate": 3.299950560810803e-05, + "loss": 0.3303, + "step": 5488500 + }, + { + "epoch": 3.29, + "learning_rate": 3.299740564254746e-05, + "loss": 0.3372, + "step": 5489000 + }, + { + "epoch": 3.29, + "learning_rate": 3.2995305676986895e-05, + "loss": 0.3275, + "step": 5489500 + }, + { + "epoch": 3.29, + "learning_rate": 3.299320991135745e-05, + "loss": 0.3252, + "step": 5490000 + }, + { + "epoch": 3.29, + "learning_rate": 3.299110994579689e-05, + "loss": 0.3332, + "step": 5490500 + }, + { + "epoch": 3.29, + "learning_rate": 3.298900998023632e-05, + "loss": 0.3324, + "step": 5491000 + }, + { + "epoch": 3.29, + "learning_rate": 3.2986910014675755e-05, + "loss": 0.3292, + "step": 5491500 + }, + { + "epoch": 3.29, + "learning_rate": 3.2984814249046316e-05, + "loss": 0.329, + "step": 5492000 + }, + { + "epoch": 3.29, + "learning_rate": 3.298271428348575e-05, + "loss": 0.3269, + "step": 5492500 + }, + { + "epoch": 3.29, + "learning_rate": 3.298061851785631e-05, + "loss": 0.3314, + "step": 5493000 + }, + { + "epoch": 3.29, + "learning_rate": 3.297851855229574e-05, + "loss": 0.339, + "step": 5493500 + }, + { + "epoch": 3.29, + "learning_rate": 3.2976418586735177e-05, + "loss": 0.3264, + "step": 5494000 + }, + { + "epoch": 3.29, + "learning_rate": 3.297431862117461e-05, + "loss": 0.3267, + "step": 5494500 + }, + { + "epoch": 3.29, + "learning_rate": 3.2972218655614043e-05, + "loss": 0.3481, + "step": 5495000 + }, + { + "epoch": 3.29, + "learning_rate": 3.2970118690053484e-05, + "loss": 0.3291, + "step": 5495500 + }, + { + "epoch": 3.3, + "learning_rate": 3.296801872449292e-05, + "loss": 0.3313, + "step": 5496000 + }, + { + "epoch": 3.3, + "learning_rate": 3.296591875893235e-05, + "loss": 0.3328, + "step": 5496500 + }, + { + "epoch": 3.3, + "learning_rate": 3.296381879337179e-05, + "loss": 0.3291, + "step": 5497000 + }, + { + "epoch": 3.3, + "learning_rate": 3.2961718827811224e-05, + "loss": 0.3344, + "step": 5497500 + }, + { + "epoch": 3.3, + "learning_rate": 3.295961886225066e-05, + "loss": 0.3278, + "step": 5498000 + }, + { + "epoch": 3.3, + "learning_rate": 3.295752309662121e-05, + "loss": 0.3358, + "step": 5498500 + }, + { + "epoch": 3.3, + "learning_rate": 3.295542313106065e-05, + "loss": 0.3318, + "step": 5499000 + }, + { + "epoch": 3.3, + "learning_rate": 3.2953323165500085e-05, + "loss": 0.3316, + "step": 5499500 + }, + { + "epoch": 3.3, + "learning_rate": 3.2951223199939525e-05, + "loss": 0.3216, + "step": 5500000 + }, + { + "epoch": 3.3, + "eval_loss": 0.3251773416996002, + "eval_runtime": 1118.1228, + "eval_samples_per_second": 471.075, + "eval_steps_per_second": 78.513, + "step": 5500000 + }, + { + "epoch": 3.3, + "learning_rate": 3.294912743431008e-05, + "loss": 0.3249, + "step": 5500500 + }, + { + "epoch": 3.3, + "learning_rate": 3.294702746874951e-05, + "loss": 0.3392, + "step": 5501000 + }, + { + "epoch": 3.3, + "learning_rate": 3.2944927503188946e-05, + "loss": 0.3261, + "step": 5501500 + }, + { + "epoch": 3.3, + "learning_rate": 3.2942827537628386e-05, + "loss": 0.3238, + "step": 5502000 + }, + { + "epoch": 3.3, + "learning_rate": 3.294073177199894e-05, + "loss": 0.3355, + "step": 5502500 + }, + { + "epoch": 3.3, + "learning_rate": 3.293863180643837e-05, + "loss": 0.3302, + "step": 5503000 + }, + { + "epoch": 3.3, + "learning_rate": 3.2936531840877806e-05, + "loss": 0.3293, + "step": 5503500 + }, + { + "epoch": 3.3, + "learning_rate": 3.293443187531725e-05, + "loss": 0.3265, + "step": 5504000 + }, + { + "epoch": 3.3, + "learning_rate": 3.293233190975668e-05, + "loss": 0.3236, + "step": 5504500 + }, + { + "epoch": 3.3, + "learning_rate": 3.2930236144127234e-05, + "loss": 0.3169, + "step": 5505000 + }, + { + "epoch": 3.3, + "learning_rate": 3.292813617856667e-05, + "loss": 0.3369, + "step": 5505500 + }, + { + "epoch": 3.3, + "learning_rate": 3.292603621300611e-05, + "loss": 0.3322, + "step": 5506000 + }, + { + "epoch": 3.3, + "learning_rate": 3.292394044737666e-05, + "loss": 0.3428, + "step": 5506500 + }, + { + "epoch": 3.3, + "learning_rate": 3.2921840481816094e-05, + "loss": 0.3259, + "step": 5507000 + }, + { + "epoch": 3.3, + "learning_rate": 3.2919740516255535e-05, + "loss": 0.3381, + "step": 5507500 + }, + { + "epoch": 3.3, + "learning_rate": 3.291764055069497e-05, + "loss": 0.3184, + "step": 5508000 + }, + { + "epoch": 3.3, + "learning_rate": 3.29155405851344e-05, + "loss": 0.3278, + "step": 5508500 + }, + { + "epoch": 3.3, + "learning_rate": 3.291344061957384e-05, + "loss": 0.3175, + "step": 5509000 + }, + { + "epoch": 3.3, + "learning_rate": 3.2911340654013275e-05, + "loss": 0.3278, + "step": 5509500 + }, + { + "epoch": 3.3, + "learning_rate": 3.290924068845271e-05, + "loss": 0.3289, + "step": 5510000 + }, + { + "epoch": 3.3, + "learning_rate": 3.290714492282326e-05, + "loss": 0.3362, + "step": 5510500 + }, + { + "epoch": 3.3, + "learning_rate": 3.29050449572627e-05, + "loss": 0.3398, + "step": 5511000 + }, + { + "epoch": 3.3, + "learning_rate": 3.2902949191633256e-05, + "loss": 0.3321, + "step": 5511500 + }, + { + "epoch": 3.3, + "learning_rate": 3.290084922607269e-05, + "loss": 0.3325, + "step": 5512000 + }, + { + "epoch": 3.3, + "learning_rate": 3.289874926051213e-05, + "loss": 0.3277, + "step": 5512500 + }, + { + "epoch": 3.31, + "learning_rate": 3.289664929495156e-05, + "loss": 0.3265, + "step": 5513000 + }, + { + "epoch": 3.31, + "learning_rate": 3.2894549329391e-05, + "loss": 0.334, + "step": 5513500 + }, + { + "epoch": 3.31, + "learning_rate": 3.289244936383044e-05, + "loss": 0.3406, + "step": 5514000 + }, + { + "epoch": 3.31, + "learning_rate": 3.289034939826987e-05, + "loss": 0.337, + "step": 5514500 + }, + { + "epoch": 3.31, + "learning_rate": 3.2888249432709304e-05, + "loss": 0.3297, + "step": 5515000 + }, + { + "epoch": 3.31, + "learning_rate": 3.2886149467148744e-05, + "loss": 0.326, + "step": 5515500 + }, + { + "epoch": 3.31, + "learning_rate": 3.28840537015193e-05, + "loss": 0.3235, + "step": 5516000 + }, + { + "epoch": 3.31, + "learning_rate": 3.288195373595873e-05, + "loss": 0.3248, + "step": 5516500 + }, + { + "epoch": 3.31, + "learning_rate": 3.2879853770398165e-05, + "loss": 0.3249, + "step": 5517000 + }, + { + "epoch": 3.31, + "learning_rate": 3.2877753804837605e-05, + "loss": 0.3257, + "step": 5517500 + }, + { + "epoch": 3.31, + "learning_rate": 3.287565383927704e-05, + "loss": 0.3344, + "step": 5518000 + }, + { + "epoch": 3.31, + "learning_rate": 3.287355807364759e-05, + "loss": 0.3288, + "step": 5518500 + }, + { + "epoch": 3.31, + "learning_rate": 3.2871458108087025e-05, + "loss": 0.3333, + "step": 5519000 + }, + { + "epoch": 3.31, + "learning_rate": 3.2869358142526466e-05, + "loss": 0.3305, + "step": 5519500 + }, + { + "epoch": 3.31, + "learning_rate": 3.28672581769659e-05, + "loss": 0.328, + "step": 5520000 + }, + { + "epoch": 3.31, + "learning_rate": 3.286516241133645e-05, + "loss": 0.3355, + "step": 5520500 + }, + { + "epoch": 3.31, + "learning_rate": 3.2863066645707006e-05, + "loss": 0.3281, + "step": 5521000 + }, + { + "epoch": 3.31, + "learning_rate": 3.2860966680146446e-05, + "loss": 0.3283, + "step": 5521500 + }, + { + "epoch": 3.31, + "learning_rate": 3.285886671458588e-05, + "loss": 0.3311, + "step": 5522000 + }, + { + "epoch": 3.31, + "learning_rate": 3.285676674902531e-05, + "loss": 0.3285, + "step": 5522500 + }, + { + "epoch": 3.31, + "learning_rate": 3.2854666783464754e-05, + "loss": 0.3224, + "step": 5523000 + }, + { + "epoch": 3.31, + "learning_rate": 3.285256681790419e-05, + "loss": 0.3236, + "step": 5523500 + }, + { + "epoch": 3.31, + "learning_rate": 3.285046685234362e-05, + "loss": 0.3248, + "step": 5524000 + }, + { + "epoch": 3.31, + "learning_rate": 3.284836688678306e-05, + "loss": 0.342, + "step": 5524500 + }, + { + "epoch": 3.31, + "learning_rate": 3.2846266921222494e-05, + "loss": 0.3374, + "step": 5525000 + }, + { + "epoch": 3.31, + "learning_rate": 3.284417115559305e-05, + "loss": 0.3328, + "step": 5525500 + }, + { + "epoch": 3.31, + "learning_rate": 3.284207119003248e-05, + "loss": 0.3327, + "step": 5526000 + }, + { + "epoch": 3.31, + "learning_rate": 3.283997542440304e-05, + "loss": 0.3268, + "step": 5526500 + }, + { + "epoch": 3.31, + "learning_rate": 3.2837875458842475e-05, + "loss": 0.3345, + "step": 5527000 + }, + { + "epoch": 3.31, + "learning_rate": 3.283577549328191e-05, + "loss": 0.3365, + "step": 5527500 + }, + { + "epoch": 3.31, + "learning_rate": 3.283367552772135e-05, + "loss": 0.3298, + "step": 5528000 + }, + { + "epoch": 3.31, + "learning_rate": 3.283157556216078e-05, + "loss": 0.3351, + "step": 5528500 + }, + { + "epoch": 3.31, + "learning_rate": 3.2829475596600216e-05, + "loss": 0.3323, + "step": 5529000 + }, + { + "epoch": 3.32, + "learning_rate": 3.2827375631039656e-05, + "loss": 0.3242, + "step": 5529500 + }, + { + "epoch": 3.32, + "learning_rate": 3.282527566547909e-05, + "loss": 0.3249, + "step": 5530000 + }, + { + "epoch": 3.32, + "learning_rate": 3.2823175699918516e-05, + "loss": 0.3361, + "step": 5530500 + }, + { + "epoch": 3.32, + "learning_rate": 3.2821079934289076e-05, + "loss": 0.3405, + "step": 5531000 + }, + { + "epoch": 3.32, + "learning_rate": 3.281897996872852e-05, + "loss": 0.327, + "step": 5531500 + }, + { + "epoch": 3.32, + "learning_rate": 3.281688000316795e-05, + "loss": 0.3384, + "step": 5532000 + }, + { + "epoch": 3.32, + "learning_rate": 3.281478003760738e-05, + "loss": 0.323, + "step": 5532500 + }, + { + "epoch": 3.32, + "learning_rate": 3.281268007204682e-05, + "loss": 0.3328, + "step": 5533000 + }, + { + "epoch": 3.32, + "learning_rate": 3.281058010648625e-05, + "loss": 0.3267, + "step": 5533500 + }, + { + "epoch": 3.32, + "learning_rate": 3.280848434085681e-05, + "loss": 0.3292, + "step": 5534000 + }, + { + "epoch": 3.32, + "learning_rate": 3.2806384375296244e-05, + "loss": 0.3375, + "step": 5534500 + }, + { + "epoch": 3.32, + "learning_rate": 3.280428440973568e-05, + "loss": 0.3283, + "step": 5535000 + }, + { + "epoch": 3.32, + "learning_rate": 3.280218444417511e-05, + "loss": 0.3415, + "step": 5535500 + }, + { + "epoch": 3.32, + "learning_rate": 3.280008867854567e-05, + "loss": 0.3366, + "step": 5536000 + }, + { + "epoch": 3.32, + "learning_rate": 3.279798871298511e-05, + "loss": 0.337, + "step": 5536500 + }, + { + "epoch": 3.32, + "learning_rate": 3.2795888747424545e-05, + "loss": 0.3242, + "step": 5537000 + }, + { + "epoch": 3.32, + "learning_rate": 3.279378878186397e-05, + "loss": 0.3287, + "step": 5537500 + }, + { + "epoch": 3.32, + "learning_rate": 3.279168881630341e-05, + "loss": 0.3277, + "step": 5538000 + }, + { + "epoch": 3.32, + "learning_rate": 3.2789588850742846e-05, + "loss": 0.335, + "step": 5538500 + }, + { + "epoch": 3.32, + "learning_rate": 3.278748888518228e-05, + "loss": 0.33, + "step": 5539000 + }, + { + "epoch": 3.32, + "learning_rate": 3.278538891962172e-05, + "loss": 0.3346, + "step": 5539500 + }, + { + "epoch": 3.32, + "learning_rate": 3.278329315399227e-05, + "loss": 0.3274, + "step": 5540000 + }, + { + "epoch": 3.32, + "learning_rate": 3.2781193188431706e-05, + "loss": 0.3263, + "step": 5540500 + }, + { + "epoch": 3.32, + "learning_rate": 3.277909322287114e-05, + "loss": 0.3195, + "step": 5541000 + }, + { + "epoch": 3.32, + "learning_rate": 3.277699325731058e-05, + "loss": 0.3258, + "step": 5541500 + }, + { + "epoch": 3.32, + "learning_rate": 3.277489749168114e-05, + "loss": 0.3361, + "step": 5542000 + }, + { + "epoch": 3.32, + "learning_rate": 3.277279752612057e-05, + "loss": 0.3386, + "step": 5542500 + }, + { + "epoch": 3.32, + "learning_rate": 3.277069756056001e-05, + "loss": 0.3367, + "step": 5543000 + }, + { + "epoch": 3.32, + "learning_rate": 3.276859759499944e-05, + "loss": 0.3204, + "step": 5543500 + }, + { + "epoch": 3.32, + "learning_rate": 3.2766497629438874e-05, + "loss": 0.331, + "step": 5544000 + }, + { + "epoch": 3.32, + "learning_rate": 3.2764397663878314e-05, + "loss": 0.3406, + "step": 5544500 + }, + { + "epoch": 3.32, + "learning_rate": 3.276229769831775e-05, + "loss": 0.3316, + "step": 5545000 + }, + { + "epoch": 3.32, + "learning_rate": 3.276019773275718e-05, + "loss": 0.3294, + "step": 5545500 + }, + { + "epoch": 3.33, + "learning_rate": 3.2758101967127735e-05, + "loss": 0.3214, + "step": 5546000 + }, + { + "epoch": 3.33, + "learning_rate": 3.2756002001567175e-05, + "loss": 0.334, + "step": 5546500 + }, + { + "epoch": 3.33, + "learning_rate": 3.275390203600661e-05, + "loss": 0.327, + "step": 5547000 + }, + { + "epoch": 3.33, + "learning_rate": 3.275180207044604e-05, + "loss": 0.3261, + "step": 5547500 + }, + { + "epoch": 3.33, + "learning_rate": 3.2749706304816596e-05, + "loss": 0.3324, + "step": 5548000 + }, + { + "epoch": 3.33, + "learning_rate": 3.2747606339256036e-05, + "loss": 0.3371, + "step": 5548500 + }, + { + "epoch": 3.33, + "learning_rate": 3.274550637369547e-05, + "loss": 0.3305, + "step": 5549000 + }, + { + "epoch": 3.33, + "learning_rate": 3.274341060806602e-05, + "loss": 0.3249, + "step": 5549500 + }, + { + "epoch": 3.33, + "learning_rate": 3.274131064250546e-05, + "loss": 0.3221, + "step": 5550000 + }, + { + "epoch": 3.33, + "learning_rate": 3.27392106769449e-05, + "loss": 0.3316, + "step": 5550500 + }, + { + "epoch": 3.33, + "learning_rate": 3.273711071138433e-05, + "loss": 0.3278, + "step": 5551000 + }, + { + "epoch": 3.33, + "learning_rate": 3.273501074582377e-05, + "loss": 0.3325, + "step": 5551500 + }, + { + "epoch": 3.33, + "learning_rate": 3.2732910780263204e-05, + "loss": 0.3294, + "step": 5552000 + }, + { + "epoch": 3.33, + "learning_rate": 3.273081081470264e-05, + "loss": 0.3324, + "step": 5552500 + }, + { + "epoch": 3.33, + "learning_rate": 3.272871084914208e-05, + "loss": 0.3294, + "step": 5553000 + }, + { + "epoch": 3.33, + "learning_rate": 3.272661928344375e-05, + "loss": 0.3306, + "step": 5553500 + }, + { + "epoch": 3.33, + "learning_rate": 3.2724519317883185e-05, + "loss": 0.3291, + "step": 5554000 + }, + { + "epoch": 3.33, + "learning_rate": 3.272241935232262e-05, + "loss": 0.3311, + "step": 5554500 + }, + { + "epoch": 3.33, + "learning_rate": 3.272031938676205e-05, + "loss": 0.3291, + "step": 5555000 + }, + { + "epoch": 3.33, + "learning_rate": 3.271821942120149e-05, + "loss": 0.326, + "step": 5555500 + }, + { + "epoch": 3.33, + "learning_rate": 3.271612365557205e-05, + "loss": 0.3228, + "step": 5556000 + }, + { + "epoch": 3.33, + "learning_rate": 3.271402369001148e-05, + "loss": 0.3324, + "step": 5556500 + }, + { + "epoch": 3.33, + "learning_rate": 3.271192372445092e-05, + "loss": 0.3307, + "step": 5557000 + }, + { + "epoch": 3.33, + "learning_rate": 3.270982375889035e-05, + "loss": 0.3306, + "step": 5557500 + }, + { + "epoch": 3.33, + "learning_rate": 3.2707723793329786e-05, + "loss": 0.3303, + "step": 5558000 + }, + { + "epoch": 3.33, + "learning_rate": 3.2705623827769226e-05, + "loss": 0.3371, + "step": 5558500 + }, + { + "epoch": 3.33, + "learning_rate": 3.270352386220866e-05, + "loss": 0.3347, + "step": 5559000 + }, + { + "epoch": 3.33, + "learning_rate": 3.270142809657921e-05, + "loss": 0.3302, + "step": 5559500 + }, + { + "epoch": 3.33, + "learning_rate": 3.269932813101865e-05, + "loss": 0.329, + "step": 5560000 + }, + { + "epoch": 3.33, + "learning_rate": 3.269722816545809e-05, + "loss": 0.3332, + "step": 5560500 + }, + { + "epoch": 3.33, + "learning_rate": 3.269512819989752e-05, + "loss": 0.3354, + "step": 5561000 + }, + { + "epoch": 3.33, + "learning_rate": 3.2693028234336954e-05, + "loss": 0.3285, + "step": 5561500 + }, + { + "epoch": 3.33, + "learning_rate": 3.2690928268776394e-05, + "loss": 0.3368, + "step": 5562000 + }, + { + "epoch": 3.33, + "learning_rate": 3.268883250314695e-05, + "loss": 0.3312, + "step": 5562500 + }, + { + "epoch": 3.34, + "learning_rate": 3.268673253758638e-05, + "loss": 0.3304, + "step": 5563000 + }, + { + "epoch": 3.34, + "learning_rate": 3.2684632572025815e-05, + "loss": 0.3292, + "step": 5563500 + }, + { + "epoch": 3.34, + "learning_rate": 3.2682532606465255e-05, + "loss": 0.328, + "step": 5564000 + }, + { + "epoch": 3.34, + "learning_rate": 3.268043264090469e-05, + "loss": 0.3352, + "step": 5564500 + }, + { + "epoch": 3.34, + "learning_rate": 3.267833267534413e-05, + "loss": 0.3308, + "step": 5565000 + }, + { + "epoch": 3.34, + "learning_rate": 3.267623690971468e-05, + "loss": 0.3411, + "step": 5565500 + }, + { + "epoch": 3.34, + "learning_rate": 3.2674141144085236e-05, + "loss": 0.333, + "step": 5566000 + }, + { + "epoch": 3.34, + "learning_rate": 3.267204117852467e-05, + "loss": 0.3263, + "step": 5566500 + }, + { + "epoch": 3.34, + "learning_rate": 3.26699412129641e-05, + "loss": 0.3302, + "step": 5567000 + }, + { + "epoch": 3.34, + "learning_rate": 3.266784124740354e-05, + "loss": 0.3279, + "step": 5567500 + }, + { + "epoch": 3.34, + "learning_rate": 3.2665741281842976e-05, + "loss": 0.3283, + "step": 5568000 + }, + { + "epoch": 3.34, + "learning_rate": 3.266364131628241e-05, + "loss": 0.3269, + "step": 5568500 + }, + { + "epoch": 3.34, + "learning_rate": 3.266154135072185e-05, + "loss": 0.331, + "step": 5569000 + }, + { + "epoch": 3.34, + "learning_rate": 3.2659441385161283e-05, + "loss": 0.3384, + "step": 5569500 + }, + { + "epoch": 3.34, + "learning_rate": 3.265734141960072e-05, + "loss": 0.3316, + "step": 5570000 + }, + { + "epoch": 3.34, + "learning_rate": 3.265524565397127e-05, + "loss": 0.3369, + "step": 5570500 + }, + { + "epoch": 3.34, + "learning_rate": 3.265314988834183e-05, + "loss": 0.3326, + "step": 5571000 + }, + { + "epoch": 3.34, + "learning_rate": 3.2651049922781264e-05, + "loss": 0.3376, + "step": 5571500 + }, + { + "epoch": 3.34, + "learning_rate": 3.26489499572207e-05, + "loss": 0.3394, + "step": 5572000 + }, + { + "epoch": 3.34, + "learning_rate": 3.264684999166014e-05, + "loss": 0.3291, + "step": 5572500 + }, + { + "epoch": 3.34, + "learning_rate": 3.264475002609957e-05, + "loss": 0.3278, + "step": 5573000 + }, + { + "epoch": 3.34, + "learning_rate": 3.2642650060539005e-05, + "loss": 0.336, + "step": 5573500 + }, + { + "epoch": 3.34, + "learning_rate": 3.2640550094978445e-05, + "loss": 0.3282, + "step": 5574000 + }, + { + "epoch": 3.34, + "learning_rate": 3.263845012941788e-05, + "loss": 0.33, + "step": 5574500 + }, + { + "epoch": 3.34, + "learning_rate": 3.263635016385731e-05, + "loss": 0.3228, + "step": 5575000 + }, + { + "epoch": 3.34, + "learning_rate": 3.2634254398227866e-05, + "loss": 0.3287, + "step": 5575500 + }, + { + "epoch": 3.34, + "learning_rate": 3.2632154432667306e-05, + "loss": 0.3322, + "step": 5576000 + }, + { + "epoch": 3.34, + "learning_rate": 3.263005446710674e-05, + "loss": 0.3205, + "step": 5576500 + }, + { + "epoch": 3.34, + "learning_rate": 3.262795870147729e-05, + "loss": 0.3463, + "step": 5577000 + }, + { + "epoch": 3.34, + "learning_rate": 3.262585873591673e-05, + "loss": 0.3348, + "step": 5577500 + }, + { + "epoch": 3.34, + "learning_rate": 3.2623758770356167e-05, + "loss": 0.33, + "step": 5578000 + }, + { + "epoch": 3.34, + "learning_rate": 3.26216588047956e-05, + "loss": 0.3351, + "step": 5578500 + }, + { + "epoch": 3.34, + "learning_rate": 3.261955883923504e-05, + "loss": 0.3421, + "step": 5579000 + }, + { + "epoch": 3.35, + "learning_rate": 3.2617458873674474e-05, + "loss": 0.3182, + "step": 5579500 + }, + { + "epoch": 3.35, + "learning_rate": 3.261535890811391e-05, + "loss": 0.3266, + "step": 5580000 + }, + { + "epoch": 3.35, + "learning_rate": 3.261325894255335e-05, + "loss": 0.3258, + "step": 5580500 + }, + { + "epoch": 3.35, + "learning_rate": 3.2611158976992774e-05, + "loss": 0.3224, + "step": 5581000 + }, + { + "epoch": 3.35, + "learning_rate": 3.260905901143221e-05, + "loss": 0.3281, + "step": 5581500 + }, + { + "epoch": 3.35, + "learning_rate": 3.260695904587165e-05, + "loss": 0.3268, + "step": 5582000 + }, + { + "epoch": 3.35, + "learning_rate": 3.260486328024221e-05, + "loss": 0.3245, + "step": 5582500 + }, + { + "epoch": 3.35, + "learning_rate": 3.260276331468164e-05, + "loss": 0.3225, + "step": 5583000 + }, + { + "epoch": 3.35, + "learning_rate": 3.260066334912107e-05, + "loss": 0.3245, + "step": 5583500 + }, + { + "epoch": 3.35, + "learning_rate": 3.259856338356051e-05, + "loss": 0.3234, + "step": 5584000 + }, + { + "epoch": 3.35, + "learning_rate": 3.259646341799994e-05, + "loss": 0.3321, + "step": 5584500 + }, + { + "epoch": 3.35, + "learning_rate": 3.2594363452439375e-05, + "loss": 0.3249, + "step": 5585000 + }, + { + "epoch": 3.35, + "learning_rate": 3.2592263486878816e-05, + "loss": 0.3263, + "step": 5585500 + }, + { + "epoch": 3.35, + "learning_rate": 3.259016772124937e-05, + "loss": 0.3291, + "step": 5586000 + }, + { + "epoch": 3.35, + "learning_rate": 3.25880677556888e-05, + "loss": 0.33, + "step": 5586500 + }, + { + "epoch": 3.35, + "learning_rate": 3.258596779012824e-05, + "loss": 0.3243, + "step": 5587000 + }, + { + "epoch": 3.35, + "learning_rate": 3.2583867824567676e-05, + "loss": 0.3314, + "step": 5587500 + }, + { + "epoch": 3.35, + "learning_rate": 3.258177205893824e-05, + "loss": 0.3293, + "step": 5588000 + }, + { + "epoch": 3.35, + "learning_rate": 3.2579672093377663e-05, + "loss": 0.333, + "step": 5588500 + }, + { + "epoch": 3.35, + "learning_rate": 3.2577572127817104e-05, + "loss": 0.325, + "step": 5589000 + }, + { + "epoch": 3.35, + "learning_rate": 3.257547216225654e-05, + "loss": 0.3317, + "step": 5589500 + }, + { + "epoch": 3.35, + "learning_rate": 3.257337219669597e-05, + "loss": 0.3226, + "step": 5590000 + }, + { + "epoch": 3.35, + "learning_rate": 3.257127223113541e-05, + "loss": 0.3255, + "step": 5590500 + }, + { + "epoch": 3.35, + "learning_rate": 3.2569172265574844e-05, + "loss": 0.3264, + "step": 5591000 + }, + { + "epoch": 3.35, + "learning_rate": 3.25670764999454e-05, + "loss": 0.3323, + "step": 5591500 + }, + { + "epoch": 3.35, + "learning_rate": 3.256497653438483e-05, + "loss": 0.3366, + "step": 5592000 + }, + { + "epoch": 3.35, + "learning_rate": 3.256287656882427e-05, + "loss": 0.3209, + "step": 5592500 + }, + { + "epoch": 3.35, + "learning_rate": 3.2560776603263705e-05, + "loss": 0.3329, + "step": 5593000 + }, + { + "epoch": 3.35, + "learning_rate": 3.255867663770314e-05, + "loss": 0.3363, + "step": 5593500 + }, + { + "epoch": 3.35, + "learning_rate": 3.255657667214258e-05, + "loss": 0.3372, + "step": 5594000 + }, + { + "epoch": 3.35, + "learning_rate": 3.255447670658201e-05, + "loss": 0.3205, + "step": 5594500 + }, + { + "epoch": 3.35, + "learning_rate": 3.2552380940952566e-05, + "loss": 0.3348, + "step": 5595000 + }, + { + "epoch": 3.35, + "learning_rate": 3.2550280975392006e-05, + "loss": 0.3324, + "step": 5595500 + }, + { + "epoch": 3.36, + "learning_rate": 3.254818100983144e-05, + "loss": 0.3291, + "step": 5596000 + }, + { + "epoch": 3.36, + "learning_rate": 3.254608104427087e-05, + "loss": 0.3283, + "step": 5596500 + }, + { + "epoch": 3.36, + "learning_rate": 3.254398107871031e-05, + "loss": 0.3305, + "step": 5597000 + }, + { + "epoch": 3.36, + "learning_rate": 3.2541881113149747e-05, + "loss": 0.3278, + "step": 5597500 + }, + { + "epoch": 3.36, + "learning_rate": 3.253978114758918e-05, + "loss": 0.3368, + "step": 5598000 + }, + { + "epoch": 3.36, + "learning_rate": 3.2537681182028614e-05, + "loss": 0.3352, + "step": 5598500 + }, + { + "epoch": 3.36, + "learning_rate": 3.2535585416399174e-05, + "loss": 0.3367, + "step": 5599000 + }, + { + "epoch": 3.36, + "learning_rate": 3.253348965076973e-05, + "loss": 0.3432, + "step": 5599500 + }, + { + "epoch": 3.36, + "learning_rate": 3.253138968520916e-05, + "loss": 0.3385, + "step": 5600000 + }, + { + "epoch": 3.36, + "eval_loss": 0.3224908411502838, + "eval_runtime": 1116.5259, + "eval_samples_per_second": 471.749, + "eval_steps_per_second": 78.625, + "step": 5600000 + }, + { + "epoch": 3.36, + "learning_rate": 3.2529289719648594e-05, + "loss": 0.3322, + "step": 5600500 + }, + { + "epoch": 3.36, + "learning_rate": 3.2527189754088035e-05, + "loss": 0.3274, + "step": 5601000 + }, + { + "epoch": 3.36, + "learning_rate": 3.252508978852747e-05, + "loss": 0.3269, + "step": 5601500 + }, + { + "epoch": 3.36, + "learning_rate": 3.252299402289802e-05, + "loss": 0.3204, + "step": 5602000 + }, + { + "epoch": 3.36, + "learning_rate": 3.252089405733746e-05, + "loss": 0.3278, + "step": 5602500 + }, + { + "epoch": 3.36, + "learning_rate": 3.2518794091776895e-05, + "loss": 0.3321, + "step": 5603000 + }, + { + "epoch": 3.36, + "learning_rate": 3.251669412621633e-05, + "loss": 0.3279, + "step": 5603500 + }, + { + "epoch": 3.36, + "learning_rate": 3.251459416065577e-05, + "loss": 0.3343, + "step": 5604000 + }, + { + "epoch": 3.36, + "learning_rate": 3.251249839502632e-05, + "loss": 0.327, + "step": 5604500 + }, + { + "epoch": 3.36, + "learning_rate": 3.2510398429465756e-05, + "loss": 0.3308, + "step": 5605000 + }, + { + "epoch": 3.36, + "learning_rate": 3.250829846390519e-05, + "loss": 0.3346, + "step": 5605500 + }, + { + "epoch": 3.36, + "learning_rate": 3.250619849834463e-05, + "loss": 0.3261, + "step": 5606000 + }, + { + "epoch": 3.36, + "learning_rate": 3.250410273271518e-05, + "loss": 0.3331, + "step": 5606500 + }, + { + "epoch": 3.36, + "learning_rate": 3.250200276715462e-05, + "loss": 0.3417, + "step": 5607000 + }, + { + "epoch": 3.36, + "learning_rate": 3.249990280159405e-05, + "loss": 0.3311, + "step": 5607500 + }, + { + "epoch": 3.36, + "learning_rate": 3.249780283603349e-05, + "loss": 0.3183, + "step": 5608000 + }, + { + "epoch": 3.36, + "learning_rate": 3.2495702870472924e-05, + "loss": 0.3322, + "step": 5608500 + }, + { + "epoch": 3.36, + "learning_rate": 3.249360710484348e-05, + "loss": 0.336, + "step": 5609000 + }, + { + "epoch": 3.36, + "learning_rate": 3.249150713928292e-05, + "loss": 0.3419, + "step": 5609500 + }, + { + "epoch": 3.36, + "learning_rate": 3.248940717372235e-05, + "loss": 0.3407, + "step": 5610000 + }, + { + "epoch": 3.36, + "learning_rate": 3.2487307208161785e-05, + "loss": 0.3371, + "step": 5610500 + }, + { + "epoch": 3.36, + "learning_rate": 3.2485207242601225e-05, + "loss": 0.3309, + "step": 5611000 + }, + { + "epoch": 3.36, + "learning_rate": 3.248310727704066e-05, + "loss": 0.3289, + "step": 5611500 + }, + { + "epoch": 3.36, + "learning_rate": 3.248101151141121e-05, + "loss": 0.3278, + "step": 5612000 + }, + { + "epoch": 3.36, + "learning_rate": 3.2478911545850645e-05, + "loss": 0.3328, + "step": 5612500 + }, + { + "epoch": 3.37, + "learning_rate": 3.2476811580290086e-05, + "loss": 0.33, + "step": 5613000 + }, + { + "epoch": 3.37, + "learning_rate": 3.247471161472952e-05, + "loss": 0.3284, + "step": 5613500 + }, + { + "epoch": 3.37, + "learning_rate": 3.247261164916895e-05, + "loss": 0.3338, + "step": 5614000 + }, + { + "epoch": 3.37, + "learning_rate": 3.247051168360839e-05, + "loss": 0.3291, + "step": 5614500 + }, + { + "epoch": 3.37, + "learning_rate": 3.2468411718047826e-05, + "loss": 0.3266, + "step": 5615000 + }, + { + "epoch": 3.37, + "learning_rate": 3.246631595241838e-05, + "loss": 0.3275, + "step": 5615500 + }, + { + "epoch": 3.37, + "learning_rate": 3.246421598685781e-05, + "loss": 0.341, + "step": 5616000 + }, + { + "epoch": 3.37, + "learning_rate": 3.2462116021297254e-05, + "loss": 0.335, + "step": 5616500 + }, + { + "epoch": 3.37, + "learning_rate": 3.246001605573669e-05, + "loss": 0.3248, + "step": 5617000 + }, + { + "epoch": 3.37, + "learning_rate": 3.245791609017612e-05, + "loss": 0.3351, + "step": 5617500 + }, + { + "epoch": 3.37, + "learning_rate": 3.2455816124615554e-05, + "loss": 0.3208, + "step": 5618000 + }, + { + "epoch": 3.37, + "learning_rate": 3.245371615905499e-05, + "loss": 0.3358, + "step": 5618500 + }, + { + "epoch": 3.37, + "learning_rate": 3.245161619349443e-05, + "loss": 0.325, + "step": 5619000 + }, + { + "epoch": 3.37, + "learning_rate": 3.244952042786499e-05, + "loss": 0.3278, + "step": 5619500 + }, + { + "epoch": 3.37, + "learning_rate": 3.244742466223554e-05, + "loss": 0.3342, + "step": 5620000 + }, + { + "epoch": 3.37, + "learning_rate": 3.2445324696674975e-05, + "loss": 0.3303, + "step": 5620500 + }, + { + "epoch": 3.37, + "learning_rate": 3.244322473111441e-05, + "loss": 0.3276, + "step": 5621000 + }, + { + "epoch": 3.37, + "learning_rate": 3.244112476555385e-05, + "loss": 0.3339, + "step": 5621500 + }, + { + "epoch": 3.37, + "learning_rate": 3.243902479999328e-05, + "loss": 0.3234, + "step": 5622000 + }, + { + "epoch": 3.37, + "learning_rate": 3.243692483443271e-05, + "loss": 0.3333, + "step": 5622500 + }, + { + "epoch": 3.37, + "learning_rate": 3.243482486887215e-05, + "loss": 0.3407, + "step": 5623000 + }, + { + "epoch": 3.37, + "learning_rate": 3.243272910324271e-05, + "loss": 0.3249, + "step": 5623500 + }, + { + "epoch": 3.37, + "learning_rate": 3.243062913768214e-05, + "loss": 0.3263, + "step": 5624000 + }, + { + "epoch": 3.37, + "learning_rate": 3.242852917212158e-05, + "loss": 0.3273, + "step": 5624500 + }, + { + "epoch": 3.37, + "learning_rate": 3.242642920656101e-05, + "loss": 0.3284, + "step": 5625000 + }, + { + "epoch": 3.37, + "learning_rate": 3.242432924100044e-05, + "loss": 0.3308, + "step": 5625500 + }, + { + "epoch": 3.37, + "learning_rate": 3.2422229275439883e-05, + "loss": 0.3459, + "step": 5626000 + }, + { + "epoch": 3.37, + "learning_rate": 3.242012930987932e-05, + "loss": 0.3321, + "step": 5626500 + }, + { + "epoch": 3.37, + "learning_rate": 3.241803354424987e-05, + "loss": 0.3251, + "step": 5627000 + }, + { + "epoch": 3.37, + "learning_rate": 3.2415933578689304e-05, + "loss": 0.3308, + "step": 5627500 + }, + { + "epoch": 3.37, + "learning_rate": 3.2413833613128744e-05, + "loss": 0.3258, + "step": 5628000 + }, + { + "epoch": 3.37, + "learning_rate": 3.241173364756818e-05, + "loss": 0.3222, + "step": 5628500 + }, + { + "epoch": 3.37, + "learning_rate": 3.240963368200761e-05, + "loss": 0.3361, + "step": 5629000 + }, + { + "epoch": 3.38, + "learning_rate": 3.240753371644705e-05, + "loss": 0.3284, + "step": 5629500 + }, + { + "epoch": 3.38, + "learning_rate": 3.2405437950817605e-05, + "loss": 0.3256, + "step": 5630000 + }, + { + "epoch": 3.38, + "learning_rate": 3.240333798525704e-05, + "loss": 0.3355, + "step": 5630500 + }, + { + "epoch": 3.38, + "learning_rate": 3.240123801969648e-05, + "loss": 0.328, + "step": 5631000 + }, + { + "epoch": 3.38, + "learning_rate": 3.239913805413591e-05, + "loss": 0.3412, + "step": 5631500 + }, + { + "epoch": 3.38, + "learning_rate": 3.2397038088575346e-05, + "loss": 0.326, + "step": 5632000 + }, + { + "epoch": 3.38, + "learning_rate": 3.2394938123014786e-05, + "loss": 0.3264, + "step": 5632500 + }, + { + "epoch": 3.38, + "learning_rate": 3.239283815745422e-05, + "loss": 0.3229, + "step": 5633000 + }, + { + "epoch": 3.38, + "learning_rate": 3.239073819189365e-05, + "loss": 0.3368, + "step": 5633500 + }, + { + "epoch": 3.38, + "learning_rate": 3.2388642426264206e-05, + "loss": 0.333, + "step": 5634000 + }, + { + "epoch": 3.38, + "learning_rate": 3.2386542460703646e-05, + "loss": 0.3301, + "step": 5634500 + }, + { + "epoch": 3.38, + "learning_rate": 3.238444249514308e-05, + "loss": 0.3255, + "step": 5635000 + }, + { + "epoch": 3.38, + "learning_rate": 3.2382346729513634e-05, + "loss": 0.3279, + "step": 5635500 + }, + { + "epoch": 3.38, + "learning_rate": 3.238024676395307e-05, + "loss": 0.3486, + "step": 5636000 + }, + { + "epoch": 3.38, + "learning_rate": 3.237814679839251e-05, + "loss": 0.3261, + "step": 5636500 + }, + { + "epoch": 3.38, + "learning_rate": 3.237604683283194e-05, + "loss": 0.3303, + "step": 5637000 + }, + { + "epoch": 3.38, + "learning_rate": 3.2373946867271374e-05, + "loss": 0.3317, + "step": 5637500 + }, + { + "epoch": 3.38, + "learning_rate": 3.2371846901710814e-05, + "loss": 0.3348, + "step": 5638000 + }, + { + "epoch": 3.38, + "learning_rate": 3.236975113608137e-05, + "loss": 0.3359, + "step": 5638500 + }, + { + "epoch": 3.38, + "learning_rate": 3.23676511705208e-05, + "loss": 0.3309, + "step": 5639000 + }, + { + "epoch": 3.38, + "learning_rate": 3.236555120496024e-05, + "loss": 0.3274, + "step": 5639500 + }, + { + "epoch": 3.38, + "learning_rate": 3.2363451239399675e-05, + "loss": 0.3305, + "step": 5640000 + }, + { + "epoch": 3.38, + "learning_rate": 3.236135127383911e-05, + "loss": 0.3284, + "step": 5640500 + }, + { + "epoch": 3.38, + "learning_rate": 3.235925130827855e-05, + "loss": 0.3241, + "step": 5641000 + }, + { + "epoch": 3.38, + "learning_rate": 3.235715134271798e-05, + "loss": 0.3266, + "step": 5641500 + }, + { + "epoch": 3.38, + "learning_rate": 3.235505137715741e-05, + "loss": 0.3367, + "step": 5642000 + }, + { + "epoch": 3.38, + "learning_rate": 3.235295561152797e-05, + "loss": 0.3309, + "step": 5642500 + }, + { + "epoch": 3.38, + "learning_rate": 3.235085564596741e-05, + "loss": 0.3341, + "step": 5643000 + }, + { + "epoch": 3.38, + "learning_rate": 3.234875568040684e-05, + "loss": 0.34, + "step": 5643500 + }, + { + "epoch": 3.38, + "learning_rate": 3.2346655714846276e-05, + "loss": 0.3297, + "step": 5644000 + }, + { + "epoch": 3.38, + "learning_rate": 3.234455574928571e-05, + "loss": 0.3225, + "step": 5644500 + }, + { + "epoch": 3.38, + "learning_rate": 3.234245578372514e-05, + "loss": 0.3265, + "step": 5645000 + }, + { + "epoch": 3.38, + "learning_rate": 3.234035581816458e-05, + "loss": 0.3356, + "step": 5645500 + }, + { + "epoch": 3.39, + "learning_rate": 3.233825585260402e-05, + "loss": 0.3287, + "step": 5646000 + }, + { + "epoch": 3.39, + "learning_rate": 3.233616008697458e-05, + "loss": 0.3287, + "step": 5646500 + }, + { + "epoch": 3.39, + "learning_rate": 3.2334060121414004e-05, + "loss": 0.3345, + "step": 5647000 + }, + { + "epoch": 3.39, + "learning_rate": 3.2331964355784564e-05, + "loss": 0.3385, + "step": 5647500 + }, + { + "epoch": 3.39, + "learning_rate": 3.2329864390224005e-05, + "loss": 0.3249, + "step": 5648000 + }, + { + "epoch": 3.39, + "learning_rate": 3.232776442466344e-05, + "loss": 0.3483, + "step": 5648500 + }, + { + "epoch": 3.39, + "learning_rate": 3.232566445910287e-05, + "loss": 0.3381, + "step": 5649000 + }, + { + "epoch": 3.39, + "learning_rate": 3.2323568693473425e-05, + "loss": 0.3299, + "step": 5649500 + }, + { + "epoch": 3.39, + "learning_rate": 3.2321468727912865e-05, + "loss": 0.3189, + "step": 5650000 + }, + { + "epoch": 3.39, + "learning_rate": 3.23193687623523e-05, + "loss": 0.3338, + "step": 5650500 + }, + { + "epoch": 3.39, + "learning_rate": 3.231726879679173e-05, + "loss": 0.3268, + "step": 5651000 + }, + { + "epoch": 3.39, + "learning_rate": 3.231516883123117e-05, + "loss": 0.3327, + "step": 5651500 + }, + { + "epoch": 3.39, + "learning_rate": 3.2313073065601726e-05, + "loss": 0.3314, + "step": 5652000 + }, + { + "epoch": 3.39, + "learning_rate": 3.231097310004116e-05, + "loss": 0.3334, + "step": 5652500 + }, + { + "epoch": 3.39, + "learning_rate": 3.230887313448059e-05, + "loss": 0.318, + "step": 5653000 + }, + { + "epoch": 3.39, + "learning_rate": 3.230677316892003e-05, + "loss": 0.3322, + "step": 5653500 + }, + { + "epoch": 3.39, + "learning_rate": 3.230467740329059e-05, + "loss": 0.332, + "step": 5654000 + }, + { + "epoch": 3.39, + "learning_rate": 3.230257743773002e-05, + "loss": 0.3237, + "step": 5654500 + }, + { + "epoch": 3.39, + "learning_rate": 3.2300481672100574e-05, + "loss": 0.3387, + "step": 5655000 + }, + { + "epoch": 3.39, + "learning_rate": 3.2298381706540014e-05, + "loss": 0.3333, + "step": 5655500 + }, + { + "epoch": 3.39, + "learning_rate": 3.229628174097945e-05, + "loss": 0.3295, + "step": 5656000 + }, + { + "epoch": 3.39, + "learning_rate": 3.229418177541888e-05, + "loss": 0.3351, + "step": 5656500 + }, + { + "epoch": 3.39, + "learning_rate": 3.229208180985832e-05, + "loss": 0.3227, + "step": 5657000 + }, + { + "epoch": 3.39, + "learning_rate": 3.2289981844297755e-05, + "loss": 0.3397, + "step": 5657500 + }, + { + "epoch": 3.39, + "learning_rate": 3.228788607866831e-05, + "loss": 0.3284, + "step": 5658000 + }, + { + "epoch": 3.39, + "learning_rate": 3.228578611310774e-05, + "loss": 0.3304, + "step": 5658500 + }, + { + "epoch": 3.39, + "learning_rate": 3.228368614754718e-05, + "loss": 0.3252, + "step": 5659000 + }, + { + "epoch": 3.39, + "learning_rate": 3.2281590381917736e-05, + "loss": 0.3331, + "step": 5659500 + }, + { + "epoch": 3.39, + "learning_rate": 3.227949041635717e-05, + "loss": 0.3333, + "step": 5660000 + }, + { + "epoch": 3.39, + "learning_rate": 3.227739045079661e-05, + "loss": 0.3275, + "step": 5660500 + }, + { + "epoch": 3.39, + "learning_rate": 3.227529048523604e-05, + "loss": 0.3313, + "step": 5661000 + }, + { + "epoch": 3.39, + "learning_rate": 3.2273190519675476e-05, + "loss": 0.3377, + "step": 5661500 + }, + { + "epoch": 3.39, + "learning_rate": 3.2271090554114916e-05, + "loss": 0.331, + "step": 5662000 + }, + { + "epoch": 3.39, + "learning_rate": 3.226899058855435e-05, + "loss": 0.3278, + "step": 5662500 + }, + { + "epoch": 3.4, + "learning_rate": 3.226689062299378e-05, + "loss": 0.3254, + "step": 5663000 + }, + { + "epoch": 3.4, + "learning_rate": 3.226479065743322e-05, + "loss": 0.3291, + "step": 5663500 + }, + { + "epoch": 3.4, + "learning_rate": 3.226269069187265e-05, + "loss": 0.3349, + "step": 5664000 + }, + { + "epoch": 3.4, + "learning_rate": 3.2260590726312084e-05, + "loss": 0.3351, + "step": 5664500 + }, + { + "epoch": 3.4, + "learning_rate": 3.2258490760751524e-05, + "loss": 0.3362, + "step": 5665000 + }, + { + "epoch": 3.4, + "learning_rate": 3.225639079519096e-05, + "loss": 0.3274, + "step": 5665500 + }, + { + "epoch": 3.4, + "learning_rate": 3.225429502956151e-05, + "loss": 0.3264, + "step": 5666000 + }, + { + "epoch": 3.4, + "learning_rate": 3.2252195064000944e-05, + "loss": 0.3348, + "step": 5666500 + }, + { + "epoch": 3.4, + "learning_rate": 3.2250095098440385e-05, + "loss": 0.3318, + "step": 5667000 + }, + { + "epoch": 3.4, + "learning_rate": 3.224799513287982e-05, + "loss": 0.3249, + "step": 5667500 + }, + { + "epoch": 3.4, + "learning_rate": 3.224589516731925e-05, + "loss": 0.3272, + "step": 5668000 + }, + { + "epoch": 3.4, + "learning_rate": 3.224379520175869e-05, + "loss": 0.3353, + "step": 5668500 + }, + { + "epoch": 3.4, + "learning_rate": 3.2241699436129245e-05, + "loss": 0.336, + "step": 5669000 + }, + { + "epoch": 3.4, + "learning_rate": 3.223959947056868e-05, + "loss": 0.3279, + "step": 5669500 + }, + { + "epoch": 3.4, + "learning_rate": 3.223749950500812e-05, + "loss": 0.3229, + "step": 5670000 + }, + { + "epoch": 3.4, + "learning_rate": 3.223539953944755e-05, + "loss": 0.3286, + "step": 5670500 + }, + { + "epoch": 3.4, + "learning_rate": 3.2233299573886986e-05, + "loss": 0.3241, + "step": 5671000 + }, + { + "epoch": 3.4, + "learning_rate": 3.2231199608326426e-05, + "loss": 0.3284, + "step": 5671500 + }, + { + "epoch": 3.4, + "learning_rate": 3.222909964276586e-05, + "loss": 0.3332, + "step": 5672000 + }, + { + "epoch": 3.4, + "learning_rate": 3.222700387713641e-05, + "loss": 0.3336, + "step": 5672500 + }, + { + "epoch": 3.4, + "learning_rate": 3.222490391157585e-05, + "loss": 0.3251, + "step": 5673000 + }, + { + "epoch": 3.4, + "learning_rate": 3.222280394601529e-05, + "loss": 0.3318, + "step": 5673500 + }, + { + "epoch": 3.4, + "learning_rate": 3.222070818038584e-05, + "loss": 0.3262, + "step": 5674000 + }, + { + "epoch": 3.4, + "learning_rate": 3.2218608214825274e-05, + "loss": 0.3326, + "step": 5674500 + }, + { + "epoch": 3.4, + "learning_rate": 3.221650824926471e-05, + "loss": 0.3313, + "step": 5675000 + }, + { + "epoch": 3.4, + "learning_rate": 3.221440828370415e-05, + "loss": 0.3201, + "step": 5675500 + }, + { + "epoch": 3.4, + "learning_rate": 3.221230831814358e-05, + "loss": 0.3317, + "step": 5676000 + }, + { + "epoch": 3.4, + "learning_rate": 3.2210208352583015e-05, + "loss": 0.3239, + "step": 5676500 + }, + { + "epoch": 3.4, + "learning_rate": 3.2208108387022455e-05, + "loss": 0.3277, + "step": 5677000 + }, + { + "epoch": 3.4, + "learning_rate": 3.220600842146189e-05, + "loss": 0.3332, + "step": 5677500 + }, + { + "epoch": 3.4, + "learning_rate": 3.220391265583244e-05, + "loss": 0.3333, + "step": 5678000 + }, + { + "epoch": 3.4, + "learning_rate": 3.220181269027188e-05, + "loss": 0.3339, + "step": 5678500 + }, + { + "epoch": 3.4, + "learning_rate": 3.2199712724711316e-05, + "loss": 0.3336, + "step": 5679000 + }, + { + "epoch": 3.41, + "learning_rate": 3.219761275915075e-05, + "loss": 0.3315, + "step": 5679500 + }, + { + "epoch": 3.41, + "learning_rate": 3.219551279359019e-05, + "loss": 0.3385, + "step": 5680000 + }, + { + "epoch": 3.41, + "learning_rate": 3.219341702796074e-05, + "loss": 0.3303, + "step": 5680500 + }, + { + "epoch": 3.41, + "learning_rate": 3.2191317062400176e-05, + "loss": 0.3255, + "step": 5681000 + }, + { + "epoch": 3.41, + "learning_rate": 3.218921709683961e-05, + "loss": 0.3325, + "step": 5681500 + }, + { + "epoch": 3.41, + "learning_rate": 3.218711713127905e-05, + "loss": 0.3265, + "step": 5682000 + }, + { + "epoch": 3.41, + "learning_rate": 3.2185017165718483e-05, + "loss": 0.3362, + "step": 5682500 + }, + { + "epoch": 3.41, + "learning_rate": 3.218291720015792e-05, + "loss": 0.3343, + "step": 5683000 + }, + { + "epoch": 3.41, + "learning_rate": 3.218081723459735e-05, + "loss": 0.3266, + "step": 5683500 + }, + { + "epoch": 3.41, + "learning_rate": 3.217872146896791e-05, + "loss": 0.3296, + "step": 5684000 + }, + { + "epoch": 3.41, + "learning_rate": 3.2176621503407344e-05, + "loss": 0.3269, + "step": 5684500 + }, + { + "epoch": 3.41, + "learning_rate": 3.2174521537846784e-05, + "loss": 0.3262, + "step": 5685000 + }, + { + "epoch": 3.41, + "learning_rate": 3.217242157228622e-05, + "loss": 0.3289, + "step": 5685500 + }, + { + "epoch": 3.41, + "learning_rate": 3.2170321606725645e-05, + "loss": 0.3347, + "step": 5686000 + }, + { + "epoch": 3.41, + "learning_rate": 3.2168225841096205e-05, + "loss": 0.3317, + "step": 5686500 + }, + { + "epoch": 3.41, + "learning_rate": 3.2166125875535645e-05, + "loss": 0.3263, + "step": 5687000 + }, + { + "epoch": 3.41, + "learning_rate": 3.21640301099062e-05, + "loss": 0.3221, + "step": 5687500 + }, + { + "epoch": 3.41, + "learning_rate": 3.216193014434563e-05, + "loss": 0.3359, + "step": 5688000 + }, + { + "epoch": 3.41, + "learning_rate": 3.2159830178785066e-05, + "loss": 0.3265, + "step": 5688500 + }, + { + "epoch": 3.41, + "learning_rate": 3.2157730213224506e-05, + "loss": 0.3307, + "step": 5689000 + }, + { + "epoch": 3.41, + "learning_rate": 3.215563024766394e-05, + "loss": 0.3238, + "step": 5689500 + }, + { + "epoch": 3.41, + "learning_rate": 3.215353028210337e-05, + "loss": 0.3364, + "step": 5690000 + }, + { + "epoch": 3.41, + "learning_rate": 3.2151430316542806e-05, + "loss": 0.324, + "step": 5690500 + }, + { + "epoch": 3.41, + "learning_rate": 3.214933035098224e-05, + "loss": 0.3272, + "step": 5691000 + }, + { + "epoch": 3.41, + "learning_rate": 3.214723038542168e-05, + "loss": 0.3267, + "step": 5691500 + }, + { + "epoch": 3.41, + "learning_rate": 3.214513461979224e-05, + "loss": 0.3262, + "step": 5692000 + }, + { + "epoch": 3.41, + "learning_rate": 3.2143034654231674e-05, + "loss": 0.3318, + "step": 5692500 + }, + { + "epoch": 3.41, + "learning_rate": 3.21409346886711e-05, + "loss": 0.3329, + "step": 5693000 + }, + { + "epoch": 3.41, + "learning_rate": 3.213883892304166e-05, + "loss": 0.3392, + "step": 5693500 + }, + { + "epoch": 3.41, + "learning_rate": 3.21367389574811e-05, + "loss": 0.3245, + "step": 5694000 + }, + { + "epoch": 3.41, + "learning_rate": 3.2134638991920535e-05, + "loss": 0.3399, + "step": 5694500 + }, + { + "epoch": 3.41, + "learning_rate": 3.213253902635997e-05, + "loss": 0.3277, + "step": 5695000 + }, + { + "epoch": 3.41, + "learning_rate": 3.21304390607994e-05, + "loss": 0.3207, + "step": 5695500 + }, + { + "epoch": 3.41, + "learning_rate": 3.2128339095238835e-05, + "loss": 0.3221, + "step": 5696000 + }, + { + "epoch": 3.42, + "learning_rate": 3.212623912967827e-05, + "loss": 0.3275, + "step": 5696500 + }, + { + "epoch": 3.42, + "learning_rate": 3.212413916411771e-05, + "loss": 0.3354, + "step": 5697000 + }, + { + "epoch": 3.42, + "learning_rate": 3.212203919855714e-05, + "loss": 0.333, + "step": 5697500 + }, + { + "epoch": 3.42, + "learning_rate": 3.2119939232996576e-05, + "loss": 0.3304, + "step": 5698000 + }, + { + "epoch": 3.42, + "learning_rate": 3.2117843467367136e-05, + "loss": 0.3291, + "step": 5698500 + }, + { + "epoch": 3.42, + "learning_rate": 3.211574350180657e-05, + "loss": 0.3352, + "step": 5699000 + }, + { + "epoch": 3.42, + "learning_rate": 3.2113643536246e-05, + "loss": 0.3277, + "step": 5699500 + }, + { + "epoch": 3.42, + "learning_rate": 3.211154357068544e-05, + "loss": 0.3252, + "step": 5700000 + }, + { + "epoch": 3.42, + "eval_loss": 0.32316854596138, + "eval_runtime": 1119.4683, + "eval_samples_per_second": 470.509, + "eval_steps_per_second": 78.418, + "step": 5700000 + }, + { + "epoch": 3.42, + "learning_rate": 3.2109443605124876e-05, + "loss": 0.3347, + "step": 5700500 + }, + { + "epoch": 3.42, + "learning_rate": 3.210734363956431e-05, + "loss": 0.3145, + "step": 5701000 + }, + { + "epoch": 3.42, + "learning_rate": 3.210524367400375e-05, + "loss": 0.3391, + "step": 5701500 + }, + { + "epoch": 3.42, + "learning_rate": 3.2103147908374304e-05, + "loss": 0.3218, + "step": 5702000 + }, + { + "epoch": 3.42, + "learning_rate": 3.210104794281374e-05, + "loss": 0.3225, + "step": 5702500 + }, + { + "epoch": 3.42, + "learning_rate": 3.209894797725317e-05, + "loss": 0.3307, + "step": 5703000 + }, + { + "epoch": 3.42, + "learning_rate": 3.209684801169261e-05, + "loss": 0.324, + "step": 5703500 + }, + { + "epoch": 3.42, + "learning_rate": 3.2094748046132044e-05, + "loss": 0.3306, + "step": 5704000 + }, + { + "epoch": 3.42, + "learning_rate": 3.209264808057148e-05, + "loss": 0.3319, + "step": 5704500 + }, + { + "epoch": 3.42, + "learning_rate": 3.209054811501092e-05, + "loss": 0.318, + "step": 5705000 + }, + { + "epoch": 3.42, + "learning_rate": 3.2088448149450345e-05, + "loss": 0.334, + "step": 5705500 + }, + { + "epoch": 3.42, + "learning_rate": 3.2086352383820905e-05, + "loss": 0.3261, + "step": 5706000 + }, + { + "epoch": 3.42, + "learning_rate": 3.208425661819146e-05, + "loss": 0.3428, + "step": 5706500 + }, + { + "epoch": 3.42, + "learning_rate": 3.20821566526309e-05, + "loss": 0.333, + "step": 5707000 + }, + { + "epoch": 3.42, + "learning_rate": 3.208005668707033e-05, + "loss": 0.3407, + "step": 5707500 + }, + { + "epoch": 3.42, + "learning_rate": 3.2077956721509766e-05, + "loss": 0.3275, + "step": 5708000 + }, + { + "epoch": 3.42, + "learning_rate": 3.2075856755949206e-05, + "loss": 0.3247, + "step": 5708500 + }, + { + "epoch": 3.42, + "learning_rate": 3.207375679038864e-05, + "loss": 0.3274, + "step": 5709000 + }, + { + "epoch": 3.42, + "learning_rate": 3.207165682482807e-05, + "loss": 0.3275, + "step": 5709500 + }, + { + "epoch": 3.42, + "learning_rate": 3.206955685926751e-05, + "loss": 0.323, + "step": 5710000 + }, + { + "epoch": 3.42, + "learning_rate": 3.206746109363807e-05, + "loss": 0.3323, + "step": 5710500 + }, + { + "epoch": 3.42, + "learning_rate": 3.20653611280775e-05, + "loss": 0.3409, + "step": 5711000 + }, + { + "epoch": 3.42, + "learning_rate": 3.2063261162516934e-05, + "loss": 0.3283, + "step": 5711500 + }, + { + "epoch": 3.42, + "learning_rate": 3.206116539688749e-05, + "loss": 0.3297, + "step": 5712000 + }, + { + "epoch": 3.42, + "learning_rate": 3.205906543132693e-05, + "loss": 0.3275, + "step": 5712500 + }, + { + "epoch": 3.43, + "learning_rate": 3.205696546576636e-05, + "loss": 0.3279, + "step": 5713000 + }, + { + "epoch": 3.43, + "learning_rate": 3.2054865500205794e-05, + "loss": 0.3251, + "step": 5713500 + }, + { + "epoch": 3.43, + "learning_rate": 3.2052769734576355e-05, + "loss": 0.3277, + "step": 5714000 + }, + { + "epoch": 3.43, + "learning_rate": 3.205066976901579e-05, + "loss": 0.3338, + "step": 5714500 + }, + { + "epoch": 3.43, + "learning_rate": 3.204856980345522e-05, + "loss": 0.3327, + "step": 5715000 + }, + { + "epoch": 3.43, + "learning_rate": 3.204646983789466e-05, + "loss": 0.3299, + "step": 5715500 + }, + { + "epoch": 3.43, + "learning_rate": 3.2044369872334095e-05, + "loss": 0.3334, + "step": 5716000 + }, + { + "epoch": 3.43, + "learning_rate": 3.204226990677353e-05, + "loss": 0.3313, + "step": 5716500 + }, + { + "epoch": 3.43, + "learning_rate": 3.204016994121297e-05, + "loss": 0.3347, + "step": 5717000 + }, + { + "epoch": 3.43, + "learning_rate": 3.2038069975652396e-05, + "loss": 0.3369, + "step": 5717500 + }, + { + "epoch": 3.43, + "learning_rate": 3.2035978409954076e-05, + "loss": 0.3269, + "step": 5718000 + }, + { + "epoch": 3.43, + "learning_rate": 3.203387844439351e-05, + "loss": 0.3183, + "step": 5718500 + }, + { + "epoch": 3.43, + "learning_rate": 3.203177847883294e-05, + "loss": 0.3335, + "step": 5719000 + }, + { + "epoch": 3.43, + "learning_rate": 3.202967851327238e-05, + "loss": 0.3209, + "step": 5719500 + }, + { + "epoch": 3.43, + "learning_rate": 3.202757854771182e-05, + "loss": 0.3277, + "step": 5720000 + }, + { + "epoch": 3.43, + "learning_rate": 3.202547858215125e-05, + "loss": 0.3279, + "step": 5720500 + }, + { + "epoch": 3.43, + "learning_rate": 3.202338281652181e-05, + "loss": 0.3268, + "step": 5721000 + }, + { + "epoch": 3.43, + "learning_rate": 3.2021282850961244e-05, + "loss": 0.3351, + "step": 5721500 + }, + { + "epoch": 3.43, + "learning_rate": 3.201918288540068e-05, + "loss": 0.3287, + "step": 5722000 + }, + { + "epoch": 3.43, + "learning_rate": 3.201708291984012e-05, + "loss": 0.3304, + "step": 5722500 + }, + { + "epoch": 3.43, + "learning_rate": 3.201498295427955e-05, + "loss": 0.3222, + "step": 5723000 + }, + { + "epoch": 3.43, + "learning_rate": 3.2012887188650105e-05, + "loss": 0.3265, + "step": 5723500 + }, + { + "epoch": 3.43, + "learning_rate": 3.201078722308954e-05, + "loss": 0.3262, + "step": 5724000 + }, + { + "epoch": 3.43, + "learning_rate": 3.200868725752898e-05, + "loss": 0.3274, + "step": 5724500 + }, + { + "epoch": 3.43, + "learning_rate": 3.200659149189953e-05, + "loss": 0.3255, + "step": 5725000 + }, + { + "epoch": 3.43, + "learning_rate": 3.2004491526338966e-05, + "loss": 0.338, + "step": 5725500 + }, + { + "epoch": 3.43, + "learning_rate": 3.2002395760709526e-05, + "loss": 0.3435, + "step": 5726000 + }, + { + "epoch": 3.43, + "learning_rate": 3.200029579514896e-05, + "loss": 0.3247, + "step": 5726500 + }, + { + "epoch": 3.43, + "learning_rate": 3.199819582958839e-05, + "loss": 0.3228, + "step": 5727000 + }, + { + "epoch": 3.43, + "learning_rate": 3.1996095864027826e-05, + "loss": 0.3197, + "step": 5727500 + }, + { + "epoch": 3.43, + "learning_rate": 3.1993995898467267e-05, + "loss": 0.3221, + "step": 5728000 + }, + { + "epoch": 3.43, + "learning_rate": 3.19918959329067e-05, + "loss": 0.3236, + "step": 5728500 + }, + { + "epoch": 3.43, + "learning_rate": 3.1989795967346133e-05, + "loss": 0.3313, + "step": 5729000 + }, + { + "epoch": 3.44, + "learning_rate": 3.1987696001785574e-05, + "loss": 0.3264, + "step": 5729500 + }, + { + "epoch": 3.44, + "learning_rate": 3.198559603622501e-05, + "loss": 0.3358, + "step": 5730000 + }, + { + "epoch": 3.44, + "learning_rate": 3.198349607066444e-05, + "loss": 0.3321, + "step": 5730500 + }, + { + "epoch": 3.44, + "learning_rate": 3.198139610510388e-05, + "loss": 0.3392, + "step": 5731000 + }, + { + "epoch": 3.44, + "learning_rate": 3.1979296139543314e-05, + "loss": 0.3285, + "step": 5731500 + }, + { + "epoch": 3.44, + "learning_rate": 3.197719617398274e-05, + "loss": 0.3321, + "step": 5732000 + }, + { + "epoch": 3.44, + "learning_rate": 3.197509620842218e-05, + "loss": 0.3233, + "step": 5732500 + }, + { + "epoch": 3.44, + "learning_rate": 3.197300044279274e-05, + "loss": 0.3325, + "step": 5733000 + }, + { + "epoch": 3.44, + "learning_rate": 3.1970900477232175e-05, + "loss": 0.3308, + "step": 5733500 + }, + { + "epoch": 3.44, + "learning_rate": 3.196880051167161e-05, + "loss": 0.3265, + "step": 5734000 + }, + { + "epoch": 3.44, + "learning_rate": 3.196670054611104e-05, + "loss": 0.3338, + "step": 5734500 + }, + { + "epoch": 3.44, + "learning_rate": 3.1964600580550475e-05, + "loss": 0.3269, + "step": 5735000 + }, + { + "epoch": 3.44, + "learning_rate": 3.196250061498991e-05, + "loss": 0.331, + "step": 5735500 + }, + { + "epoch": 3.44, + "learning_rate": 3.196040064942935e-05, + "loss": 0.3281, + "step": 5736000 + }, + { + "epoch": 3.44, + "learning_rate": 3.19583048837999e-05, + "loss": 0.3239, + "step": 5736500 + }, + { + "epoch": 3.44, + "learning_rate": 3.1956204918239336e-05, + "loss": 0.3243, + "step": 5737000 + }, + { + "epoch": 3.44, + "learning_rate": 3.1954104952678776e-05, + "loss": 0.3271, + "step": 5737500 + }, + { + "epoch": 3.44, + "learning_rate": 3.195200498711821e-05, + "loss": 0.3255, + "step": 5738000 + }, + { + "epoch": 3.44, + "learning_rate": 3.194990502155764e-05, + "loss": 0.3353, + "step": 5738500 + }, + { + "epoch": 3.44, + "learning_rate": 3.19478092559282e-05, + "loss": 0.3297, + "step": 5739000 + }, + { + "epoch": 3.44, + "learning_rate": 3.194570929036764e-05, + "loss": 0.3336, + "step": 5739500 + }, + { + "epoch": 3.44, + "learning_rate": 3.194360932480707e-05, + "loss": 0.3328, + "step": 5740000 + }, + { + "epoch": 3.44, + "learning_rate": 3.1941509359246504e-05, + "loss": 0.3223, + "step": 5740500 + }, + { + "epoch": 3.44, + "learning_rate": 3.1939413593617064e-05, + "loss": 0.3223, + "step": 5741000 + }, + { + "epoch": 3.44, + "learning_rate": 3.19373136280565e-05, + "loss": 0.3241, + "step": 5741500 + }, + { + "epoch": 3.44, + "learning_rate": 3.193521366249593e-05, + "loss": 0.3292, + "step": 5742000 + }, + { + "epoch": 3.44, + "learning_rate": 3.193311369693537e-05, + "loss": 0.327, + "step": 5742500 + }, + { + "epoch": 3.44, + "learning_rate": 3.193101793130593e-05, + "loss": 0.3304, + "step": 5743000 + }, + { + "epoch": 3.44, + "learning_rate": 3.1928917965745365e-05, + "loss": 0.3389, + "step": 5743500 + }, + { + "epoch": 3.44, + "learning_rate": 3.192681800018479e-05, + "loss": 0.3325, + "step": 5744000 + }, + { + "epoch": 3.44, + "learning_rate": 3.192471803462423e-05, + "loss": 0.3286, + "step": 5744500 + }, + { + "epoch": 3.44, + "learning_rate": 3.1922618069063666e-05, + "loss": 0.3248, + "step": 5745000 + }, + { + "epoch": 3.44, + "learning_rate": 3.1920522303434226e-05, + "loss": 0.3322, + "step": 5745500 + }, + { + "epoch": 3.44, + "learning_rate": 3.191842233787365e-05, + "loss": 0.3276, + "step": 5746000 + }, + { + "epoch": 3.45, + "learning_rate": 3.191632237231309e-05, + "loss": 0.3261, + "step": 5746500 + }, + { + "epoch": 3.45, + "learning_rate": 3.1914222406752526e-05, + "loss": 0.3319, + "step": 5747000 + }, + { + "epoch": 3.45, + "learning_rate": 3.191212244119196e-05, + "loss": 0.3177, + "step": 5747500 + }, + { + "epoch": 3.45, + "learning_rate": 3.19100224756314e-05, + "loss": 0.3279, + "step": 5748000 + }, + { + "epoch": 3.45, + "learning_rate": 3.1907922510070834e-05, + "loss": 0.3384, + "step": 5748500 + }, + { + "epoch": 3.45, + "learning_rate": 3.190582254451027e-05, + "loss": 0.3262, + "step": 5749000 + }, + { + "epoch": 3.45, + "learning_rate": 3.190373097881195e-05, + "loss": 0.3298, + "step": 5749500 + }, + { + "epoch": 3.45, + "learning_rate": 3.190163101325139e-05, + "loss": 0.3292, + "step": 5750000 + }, + { + "epoch": 3.45, + "learning_rate": 3.189953524762194e-05, + "loss": 0.3353, + "step": 5750500 + }, + { + "epoch": 3.45, + "learning_rate": 3.1897435282061375e-05, + "loss": 0.33, + "step": 5751000 + }, + { + "epoch": 3.45, + "learning_rate": 3.189533531650081e-05, + "loss": 0.3196, + "step": 5751500 + }, + { + "epoch": 3.45, + "learning_rate": 3.189323535094025e-05, + "loss": 0.3286, + "step": 5752000 + }, + { + "epoch": 3.45, + "learning_rate": 3.189113538537968e-05, + "loss": 0.3363, + "step": 5752500 + }, + { + "epoch": 3.45, + "learning_rate": 3.1889035419819115e-05, + "loss": 0.3397, + "step": 5753000 + }, + { + "epoch": 3.45, + "learning_rate": 3.188693545425855e-05, + "loss": 0.3395, + "step": 5753500 + }, + { + "epoch": 3.45, + "learning_rate": 3.188483548869798e-05, + "loss": 0.3249, + "step": 5754000 + }, + { + "epoch": 3.45, + "learning_rate": 3.1882735523137416e-05, + "loss": 0.3332, + "step": 5754500 + }, + { + "epoch": 3.45, + "learning_rate": 3.1880639757507976e-05, + "loss": 0.33, + "step": 5755000 + }, + { + "epoch": 3.45, + "learning_rate": 3.1878539791947416e-05, + "loss": 0.3306, + "step": 5755500 + }, + { + "epoch": 3.45, + "learning_rate": 3.187643982638684e-05, + "loss": 0.3267, + "step": 5756000 + }, + { + "epoch": 3.45, + "learning_rate": 3.187433986082628e-05, + "loss": 0.3228, + "step": 5756500 + }, + { + "epoch": 3.45, + "learning_rate": 3.187223989526572e-05, + "loss": 0.3303, + "step": 5757000 + }, + { + "epoch": 3.45, + "learning_rate": 3.187014412963628e-05, + "loss": 0.3268, + "step": 5757500 + }, + { + "epoch": 3.45, + "learning_rate": 3.1868044164075704e-05, + "loss": 0.3238, + "step": 5758000 + }, + { + "epoch": 3.45, + "learning_rate": 3.1865944198515144e-05, + "loss": 0.3288, + "step": 5758500 + }, + { + "epoch": 3.45, + "learning_rate": 3.186384423295458e-05, + "loss": 0.3331, + "step": 5759000 + }, + { + "epoch": 3.45, + "learning_rate": 3.186174846732514e-05, + "loss": 0.3257, + "step": 5759500 + }, + { + "epoch": 3.45, + "learning_rate": 3.185964850176457e-05, + "loss": 0.329, + "step": 5760000 + }, + { + "epoch": 3.45, + "learning_rate": 3.1857548536204005e-05, + "loss": 0.3278, + "step": 5760500 + }, + { + "epoch": 3.45, + "learning_rate": 3.185544857064344e-05, + "loss": 0.3288, + "step": 5761000 + }, + { + "epoch": 3.45, + "learning_rate": 3.185334860508287e-05, + "loss": 0.327, + "step": 5761500 + }, + { + "epoch": 3.45, + "learning_rate": 3.185125283945343e-05, + "loss": 0.3289, + "step": 5762000 + }, + { + "epoch": 3.45, + "learning_rate": 3.184915287389287e-05, + "loss": 0.3251, + "step": 5762500 + }, + { + "epoch": 3.46, + "learning_rate": 3.18470529083323e-05, + "loss": 0.3294, + "step": 5763000 + }, + { + "epoch": 3.46, + "learning_rate": 3.184495294277174e-05, + "loss": 0.3375, + "step": 5763500 + }, + { + "epoch": 3.46, + "learning_rate": 3.18428571771423e-05, + "loss": 0.3332, + "step": 5764000 + }, + { + "epoch": 3.46, + "learning_rate": 3.184075721158173e-05, + "loss": 0.3437, + "step": 5764500 + }, + { + "epoch": 3.46, + "learning_rate": 3.1838657246021166e-05, + "loss": 0.3279, + "step": 5765000 + }, + { + "epoch": 3.46, + "learning_rate": 3.18365572804606e-05, + "loss": 0.3296, + "step": 5765500 + }, + { + "epoch": 3.46, + "learning_rate": 3.183445731490003e-05, + "loss": 0.3365, + "step": 5766000 + }, + { + "epoch": 3.46, + "learning_rate": 3.183235734933947e-05, + "loss": 0.3367, + "step": 5766500 + }, + { + "epoch": 3.46, + "learning_rate": 3.183025738377891e-05, + "loss": 0.331, + "step": 5767000 + }, + { + "epoch": 3.46, + "learning_rate": 3.182815741821834e-05, + "loss": 0.3305, + "step": 5767500 + }, + { + "epoch": 3.46, + "learning_rate": 3.1826061652588894e-05, + "loss": 0.3277, + "step": 5768000 + }, + { + "epoch": 3.46, + "learning_rate": 3.182396168702833e-05, + "loss": 0.3247, + "step": 5768500 + }, + { + "epoch": 3.46, + "learning_rate": 3.182186172146777e-05, + "loss": 0.3275, + "step": 5769000 + }, + { + "epoch": 3.46, + "learning_rate": 3.18197617559072e-05, + "loss": 0.3243, + "step": 5769500 + }, + { + "epoch": 3.46, + "learning_rate": 3.1817661790346635e-05, + "loss": 0.3294, + "step": 5770000 + }, + { + "epoch": 3.46, + "learning_rate": 3.1815566024717195e-05, + "loss": 0.3359, + "step": 5770500 + }, + { + "epoch": 3.46, + "learning_rate": 3.181346605915663e-05, + "loss": 0.3264, + "step": 5771000 + }, + { + "epoch": 3.46, + "learning_rate": 3.181136609359606e-05, + "loss": 0.3274, + "step": 5771500 + }, + { + "epoch": 3.46, + "learning_rate": 3.18092661280355e-05, + "loss": 0.3254, + "step": 5772000 + }, + { + "epoch": 3.46, + "learning_rate": 3.1807170362406056e-05, + "loss": 0.3389, + "step": 5772500 + }, + { + "epoch": 3.46, + "learning_rate": 3.180507039684549e-05, + "loss": 0.3321, + "step": 5773000 + }, + { + "epoch": 3.46, + "learning_rate": 3.180297043128492e-05, + "loss": 0.3251, + "step": 5773500 + }, + { + "epoch": 3.46, + "learning_rate": 3.180087046572436e-05, + "loss": 0.3337, + "step": 5774000 + }, + { + "epoch": 3.46, + "learning_rate": 3.1798770500163796e-05, + "loss": 0.3362, + "step": 5774500 + }, + { + "epoch": 3.46, + "learning_rate": 3.179667473453435e-05, + "loss": 0.3335, + "step": 5775000 + }, + { + "epoch": 3.46, + "learning_rate": 3.1794574768973783e-05, + "loss": 0.3282, + "step": 5775500 + }, + { + "epoch": 3.46, + "learning_rate": 3.1792474803413224e-05, + "loss": 0.3324, + "step": 5776000 + }, + { + "epoch": 3.46, + "learning_rate": 3.179037483785266e-05, + "loss": 0.3303, + "step": 5776500 + }, + { + "epoch": 3.46, + "learning_rate": 3.178827907222321e-05, + "loss": 0.3312, + "step": 5777000 + }, + { + "epoch": 3.46, + "learning_rate": 3.178617910666265e-05, + "loss": 0.3358, + "step": 5777500 + }, + { + "epoch": 3.46, + "learning_rate": 3.1784079141102084e-05, + "loss": 0.3295, + "step": 5778000 + }, + { + "epoch": 3.46, + "learning_rate": 3.178197917554152e-05, + "loss": 0.3298, + "step": 5778500 + }, + { + "epoch": 3.46, + "learning_rate": 3.177987920998096e-05, + "loss": 0.3328, + "step": 5779000 + }, + { + "epoch": 3.47, + "learning_rate": 3.177777924442039e-05, + "loss": 0.3337, + "step": 5779500 + }, + { + "epoch": 3.47, + "learning_rate": 3.1775679278859825e-05, + "loss": 0.3323, + "step": 5780000 + }, + { + "epoch": 3.47, + "learning_rate": 3.1773579313299265e-05, + "loss": 0.3313, + "step": 5780500 + }, + { + "epoch": 3.47, + "learning_rate": 3.177148774760094e-05, + "loss": 0.3268, + "step": 5781000 + }, + { + "epoch": 3.47, + "learning_rate": 3.176938778204038e-05, + "loss": 0.3276, + "step": 5781500 + }, + { + "epoch": 3.47, + "learning_rate": 3.1767287816479806e-05, + "loss": 0.3321, + "step": 5782000 + }, + { + "epoch": 3.47, + "learning_rate": 3.176518785091924e-05, + "loss": 0.3304, + "step": 5782500 + }, + { + "epoch": 3.47, + "learning_rate": 3.176308788535868e-05, + "loss": 0.3343, + "step": 5783000 + }, + { + "epoch": 3.47, + "learning_rate": 3.176099211972924e-05, + "loss": 0.3339, + "step": 5783500 + }, + { + "epoch": 3.47, + "learning_rate": 3.175889215416867e-05, + "loss": 0.3311, + "step": 5784000 + }, + { + "epoch": 3.47, + "learning_rate": 3.175679218860811e-05, + "loss": 0.3382, + "step": 5784500 + }, + { + "epoch": 3.47, + "learning_rate": 3.175469222304754e-05, + "loss": 0.3167, + "step": 5785000 + }, + { + "epoch": 3.47, + "learning_rate": 3.1752592257486974e-05, + "loss": 0.3263, + "step": 5785500 + }, + { + "epoch": 3.47, + "learning_rate": 3.1750496491857534e-05, + "loss": 0.3316, + "step": 5786000 + }, + { + "epoch": 3.47, + "learning_rate": 3.174839652629697e-05, + "loss": 0.3319, + "step": 5786500 + }, + { + "epoch": 3.47, + "learning_rate": 3.17462965607364e-05, + "loss": 0.336, + "step": 5787000 + }, + { + "epoch": 3.47, + "learning_rate": 3.1744196595175834e-05, + "loss": 0.3316, + "step": 5787500 + }, + { + "epoch": 3.47, + "learning_rate": 3.1742096629615275e-05, + "loss": 0.3299, + "step": 5788000 + }, + { + "epoch": 3.47, + "learning_rate": 3.1740000863985835e-05, + "loss": 0.3308, + "step": 5788500 + }, + { + "epoch": 3.47, + "learning_rate": 3.173790089842526e-05, + "loss": 0.3315, + "step": 5789000 + }, + { + "epoch": 3.47, + "learning_rate": 3.1735800932864695e-05, + "loss": 0.325, + "step": 5789500 + }, + { + "epoch": 3.47, + "learning_rate": 3.1733700967304135e-05, + "loss": 0.3268, + "step": 5790000 + }, + { + "epoch": 3.47, + "learning_rate": 3.173160100174357e-05, + "loss": 0.3315, + "step": 5790500 + }, + { + "epoch": 3.47, + "learning_rate": 3.172950523611413e-05, + "loss": 0.3269, + "step": 5791000 + }, + { + "epoch": 3.47, + "learning_rate": 3.172740527055356e-05, + "loss": 0.3316, + "step": 5791500 + }, + { + "epoch": 3.47, + "learning_rate": 3.1725305304992996e-05, + "loss": 0.3297, + "step": 5792000 + }, + { + "epoch": 3.47, + "learning_rate": 3.172320533943243e-05, + "loss": 0.3227, + "step": 5792500 + }, + { + "epoch": 3.47, + "learning_rate": 3.172110537387187e-05, + "loss": 0.3242, + "step": 5793000 + }, + { + "epoch": 3.47, + "learning_rate": 3.17190054083113e-05, + "loss": 0.3337, + "step": 5793500 + }, + { + "epoch": 3.47, + "learning_rate": 3.171690544275074e-05, + "loss": 0.3371, + "step": 5794000 + }, + { + "epoch": 3.47, + "learning_rate": 3.171480547719018e-05, + "loss": 0.3276, + "step": 5794500 + }, + { + "epoch": 3.47, + "learning_rate": 3.171270971156073e-05, + "loss": 0.3327, + "step": 5795000 + }, + { + "epoch": 3.47, + "learning_rate": 3.171061394593129e-05, + "loss": 0.3446, + "step": 5795500 + }, + { + "epoch": 3.47, + "learning_rate": 3.1708513980370724e-05, + "loss": 0.3283, + "step": 5796000 + }, + { + "epoch": 3.48, + "learning_rate": 3.170641401481015e-05, + "loss": 0.3318, + "step": 5796500 + }, + { + "epoch": 3.48, + "learning_rate": 3.170431404924959e-05, + "loss": 0.3218, + "step": 5797000 + }, + { + "epoch": 3.48, + "learning_rate": 3.170221828362015e-05, + "loss": 0.3289, + "step": 5797500 + }, + { + "epoch": 3.48, + "learning_rate": 3.1700118318059585e-05, + "loss": 0.326, + "step": 5798000 + }, + { + "epoch": 3.48, + "learning_rate": 3.169801835249902e-05, + "loss": 0.3279, + "step": 5798500 + }, + { + "epoch": 3.48, + "learning_rate": 3.169591838693845e-05, + "loss": 0.3182, + "step": 5799000 + }, + { + "epoch": 3.48, + "learning_rate": 3.1693818421377885e-05, + "loss": 0.331, + "step": 5799500 + }, + { + "epoch": 3.48, + "learning_rate": 3.1691722655748446e-05, + "loss": 0.3225, + "step": 5800000 + }, + { + "epoch": 3.48, + "eval_loss": 0.32082223892211914, + "eval_runtime": 1121.4189, + "eval_samples_per_second": 469.691, + "eval_steps_per_second": 78.282, + "step": 5800000 + }, + { + "epoch": 3.48, + "learning_rate": 3.1689626890119e-05, + "loss": 0.3266, + "step": 5800500 + }, + { + "epoch": 3.48, + "learning_rate": 3.168752692455844e-05, + "loss": 0.3319, + "step": 5801000 + }, + { + "epoch": 3.48, + "learning_rate": 3.168542695899787e-05, + "loss": 0.3262, + "step": 5801500 + }, + { + "epoch": 3.48, + "learning_rate": 3.1683326993437307e-05, + "loss": 0.3249, + "step": 5802000 + }, + { + "epoch": 3.48, + "learning_rate": 3.168122702787675e-05, + "loss": 0.3226, + "step": 5802500 + }, + { + "epoch": 3.48, + "learning_rate": 3.16791312622473e-05, + "loss": 0.3374, + "step": 5803000 + }, + { + "epoch": 3.48, + "learning_rate": 3.1677031296686734e-05, + "loss": 0.3277, + "step": 5803500 + }, + { + "epoch": 3.48, + "learning_rate": 3.167493133112617e-05, + "loss": 0.324, + "step": 5804000 + }, + { + "epoch": 3.48, + "learning_rate": 3.167283136556561e-05, + "loss": 0.3268, + "step": 5804500 + }, + { + "epoch": 3.48, + "learning_rate": 3.167073140000504e-05, + "loss": 0.3269, + "step": 5805000 + }, + { + "epoch": 3.48, + "learning_rate": 3.1668631434444474e-05, + "loss": 0.3212, + "step": 5805500 + }, + { + "epoch": 3.48, + "learning_rate": 3.166653146888391e-05, + "loss": 0.3215, + "step": 5806000 + }, + { + "epoch": 3.48, + "learning_rate": 3.166443150332334e-05, + "loss": 0.3225, + "step": 5806500 + }, + { + "epoch": 3.48, + "learning_rate": 3.166233153776278e-05, + "loss": 0.3371, + "step": 5807000 + }, + { + "epoch": 3.48, + "learning_rate": 3.1660231572202215e-05, + "loss": 0.3308, + "step": 5807500 + }, + { + "epoch": 3.48, + "learning_rate": 3.165813580657277e-05, + "loss": 0.3245, + "step": 5808000 + }, + { + "epoch": 3.48, + "learning_rate": 3.16560358410122e-05, + "loss": 0.3291, + "step": 5808500 + }, + { + "epoch": 3.48, + "learning_rate": 3.165393587545164e-05, + "loss": 0.3298, + "step": 5809000 + }, + { + "epoch": 3.48, + "learning_rate": 3.1651835909891076e-05, + "loss": 0.3257, + "step": 5809500 + }, + { + "epoch": 3.48, + "learning_rate": 3.164973594433051e-05, + "loss": 0.3221, + "step": 5810000 + }, + { + "epoch": 3.48, + "learning_rate": 3.164763597876995e-05, + "loss": 0.333, + "step": 5810500 + }, + { + "epoch": 3.48, + "learning_rate": 3.164553601320938e-05, + "loss": 0.3329, + "step": 5811000 + }, + { + "epoch": 3.48, + "learning_rate": 3.1643440247579936e-05, + "loss": 0.3315, + "step": 5811500 + }, + { + "epoch": 3.48, + "learning_rate": 3.164134028201937e-05, + "loss": 0.3256, + "step": 5812000 + }, + { + "epoch": 3.48, + "learning_rate": 3.163924031645881e-05, + "loss": 0.3312, + "step": 5812500 + }, + { + "epoch": 3.49, + "learning_rate": 3.1637140350898244e-05, + "loss": 0.3435, + "step": 5813000 + }, + { + "epoch": 3.49, + "learning_rate": 3.163504038533768e-05, + "loss": 0.3223, + "step": 5813500 + }, + { + "epoch": 3.49, + "learning_rate": 3.163294041977712e-05, + "loss": 0.3311, + "step": 5814000 + }, + { + "epoch": 3.49, + "learning_rate": 3.163084045421655e-05, + "loss": 0.3244, + "step": 5814500 + }, + { + "epoch": 3.49, + "learning_rate": 3.162874048865599e-05, + "loss": 0.3293, + "step": 5815000 + }, + { + "epoch": 3.49, + "learning_rate": 3.1626644723026545e-05, + "loss": 0.3369, + "step": 5815500 + }, + { + "epoch": 3.49, + "learning_rate": 3.162454475746598e-05, + "loss": 0.3324, + "step": 5816000 + }, + { + "epoch": 3.49, + "learning_rate": 3.162244899183653e-05, + "loss": 0.3303, + "step": 5816500 + }, + { + "epoch": 3.49, + "learning_rate": 3.1620349026275965e-05, + "loss": 0.3284, + "step": 5817000 + }, + { + "epoch": 3.49, + "learning_rate": 3.1618249060715405e-05, + "loss": 0.3239, + "step": 5817500 + }, + { + "epoch": 3.49, + "learning_rate": 3.161614909515484e-05, + "loss": 0.3264, + "step": 5818000 + }, + { + "epoch": 3.49, + "learning_rate": 3.161404912959427e-05, + "loss": 0.3301, + "step": 5818500 + }, + { + "epoch": 3.49, + "learning_rate": 3.161194916403371e-05, + "loss": 0.3324, + "step": 5819000 + }, + { + "epoch": 3.49, + "learning_rate": 3.1609849198473146e-05, + "loss": 0.3307, + "step": 5819500 + }, + { + "epoch": 3.49, + "learning_rate": 3.16077534328437e-05, + "loss": 0.3246, + "step": 5820000 + }, + { + "epoch": 3.49, + "learning_rate": 3.160565346728314e-05, + "loss": 0.3262, + "step": 5820500 + }, + { + "epoch": 3.49, + "learning_rate": 3.160355350172257e-05, + "loss": 0.3327, + "step": 5821000 + }, + { + "epoch": 3.49, + "learning_rate": 3.160145353616201e-05, + "loss": 0.3268, + "step": 5821500 + }, + { + "epoch": 3.49, + "learning_rate": 3.159935357060145e-05, + "loss": 0.3391, + "step": 5822000 + }, + { + "epoch": 3.49, + "learning_rate": 3.159725360504088e-05, + "loss": 0.3266, + "step": 5822500 + }, + { + "epoch": 3.49, + "learning_rate": 3.159515363948031e-05, + "loss": 0.3286, + "step": 5823000 + }, + { + "epoch": 3.49, + "learning_rate": 3.159305367391975e-05, + "loss": 0.3354, + "step": 5823500 + }, + { + "epoch": 3.49, + "learning_rate": 3.159095790829031e-05, + "loss": 0.3251, + "step": 5824000 + }, + { + "epoch": 3.49, + "learning_rate": 3.158885794272974e-05, + "loss": 0.3258, + "step": 5824500 + }, + { + "epoch": 3.49, + "learning_rate": 3.1586762177100295e-05, + "loss": 0.3373, + "step": 5825000 + }, + { + "epoch": 3.49, + "learning_rate": 3.158466221153973e-05, + "loss": 0.3337, + "step": 5825500 + }, + { + "epoch": 3.49, + "learning_rate": 3.158256224597917e-05, + "loss": 0.3259, + "step": 5826000 + }, + { + "epoch": 3.49, + "learning_rate": 3.15804622804186e-05, + "loss": 0.3313, + "step": 5826500 + }, + { + "epoch": 3.49, + "learning_rate": 3.1578366514789155e-05, + "loss": 0.3281, + "step": 5827000 + }, + { + "epoch": 3.49, + "learning_rate": 3.1576266549228596e-05, + "loss": 0.3194, + "step": 5827500 + }, + { + "epoch": 3.49, + "learning_rate": 3.157416658366803e-05, + "loss": 0.3361, + "step": 5828000 + }, + { + "epoch": 3.49, + "learning_rate": 3.157206661810746e-05, + "loss": 0.3311, + "step": 5828500 + }, + { + "epoch": 3.49, + "learning_rate": 3.15699666525469e-05, + "loss": 0.3256, + "step": 5829000 + }, + { + "epoch": 3.5, + "learning_rate": 3.1567866686986336e-05, + "loss": 0.3268, + "step": 5829500 + }, + { + "epoch": 3.5, + "learning_rate": 3.156576672142577e-05, + "loss": 0.3279, + "step": 5830000 + }, + { + "epoch": 3.5, + "learning_rate": 3.15636667558652e-05, + "loss": 0.3346, + "step": 5830500 + }, + { + "epoch": 3.5, + "learning_rate": 3.156157519016688e-05, + "loss": 0.3348, + "step": 5831000 + }, + { + "epoch": 3.5, + "learning_rate": 3.155947522460632e-05, + "loss": 0.3285, + "step": 5831500 + }, + { + "epoch": 3.5, + "learning_rate": 3.155737525904575e-05, + "loss": 0.3241, + "step": 5832000 + }, + { + "epoch": 3.5, + "learning_rate": 3.1555275293485184e-05, + "loss": 0.3221, + "step": 5832500 + }, + { + "epoch": 3.5, + "learning_rate": 3.155317952785574e-05, + "loss": 0.3332, + "step": 5833000 + }, + { + "epoch": 3.5, + "learning_rate": 3.155107956229518e-05, + "loss": 0.3199, + "step": 5833500 + }, + { + "epoch": 3.5, + "learning_rate": 3.154897959673461e-05, + "loss": 0.3192, + "step": 5834000 + }, + { + "epoch": 3.5, + "learning_rate": 3.154687963117405e-05, + "loss": 0.322, + "step": 5834500 + }, + { + "epoch": 3.5, + "learning_rate": 3.1544779665613485e-05, + "loss": 0.3252, + "step": 5835000 + }, + { + "epoch": 3.5, + "learning_rate": 3.154268389998404e-05, + "loss": 0.3272, + "step": 5835500 + }, + { + "epoch": 3.5, + "learning_rate": 3.154058393442347e-05, + "loss": 0.3275, + "step": 5836000 + }, + { + "epoch": 3.5, + "learning_rate": 3.153848396886291e-05, + "loss": 0.3248, + "step": 5836500 + }, + { + "epoch": 3.5, + "learning_rate": 3.1536384003302346e-05, + "loss": 0.3247, + "step": 5837000 + }, + { + "epoch": 3.5, + "learning_rate": 3.153428403774178e-05, + "loss": 0.33, + "step": 5837500 + }, + { + "epoch": 3.5, + "learning_rate": 3.153218407218122e-05, + "loss": 0.3342, + "step": 5838000 + }, + { + "epoch": 3.5, + "learning_rate": 3.153008830655177e-05, + "loss": 0.324, + "step": 5838500 + }, + { + "epoch": 3.5, + "learning_rate": 3.1527988340991206e-05, + "loss": 0.3269, + "step": 5839000 + }, + { + "epoch": 3.5, + "learning_rate": 3.152588837543064e-05, + "loss": 0.3346, + "step": 5839500 + }, + { + "epoch": 3.5, + "learning_rate": 3.152378840987008e-05, + "loss": 0.3331, + "step": 5840000 + }, + { + "epoch": 3.5, + "learning_rate": 3.1521688444309514e-05, + "loss": 0.3276, + "step": 5840500 + }, + { + "epoch": 3.5, + "learning_rate": 3.151959267868007e-05, + "loss": 0.3369, + "step": 5841000 + }, + { + "epoch": 3.5, + "learning_rate": 3.151749271311951e-05, + "loss": 0.3304, + "step": 5841500 + }, + { + "epoch": 3.5, + "learning_rate": 3.151539274755894e-05, + "loss": 0.3233, + "step": 5842000 + }, + { + "epoch": 3.5, + "learning_rate": 3.1513292781998374e-05, + "loss": 0.3331, + "step": 5842500 + }, + { + "epoch": 3.5, + "learning_rate": 3.1511192816437815e-05, + "loss": 0.3218, + "step": 5843000 + }, + { + "epoch": 3.5, + "learning_rate": 3.150909285087725e-05, + "loss": 0.3246, + "step": 5843500 + }, + { + "epoch": 3.5, + "learning_rate": 3.150699288531668e-05, + "loss": 0.3243, + "step": 5844000 + }, + { + "epoch": 3.5, + "learning_rate": 3.1504892919756115e-05, + "loss": 0.3297, + "step": 5844500 + }, + { + "epoch": 3.5, + "learning_rate": 3.1502797154126675e-05, + "loss": 0.3302, + "step": 5845000 + }, + { + "epoch": 3.5, + "learning_rate": 3.150069718856611e-05, + "loss": 0.3267, + "step": 5845500 + }, + { + "epoch": 3.5, + "learning_rate": 3.149860142293666e-05, + "loss": 0.3268, + "step": 5846000 + }, + { + "epoch": 3.51, + "learning_rate": 3.1496501457376096e-05, + "loss": 0.3244, + "step": 5846500 + }, + { + "epoch": 3.51, + "learning_rate": 3.1494401491815536e-05, + "loss": 0.3319, + "step": 5847000 + }, + { + "epoch": 3.51, + "learning_rate": 3.149230152625497e-05, + "loss": 0.3288, + "step": 5847500 + }, + { + "epoch": 3.51, + "learning_rate": 3.149020576062552e-05, + "loss": 0.3299, + "step": 5848000 + }, + { + "epoch": 3.51, + "learning_rate": 3.148810579506496e-05, + "loss": 0.3285, + "step": 5848500 + }, + { + "epoch": 3.51, + "learning_rate": 3.14860058295044e-05, + "loss": 0.3361, + "step": 5849000 + }, + { + "epoch": 3.51, + "learning_rate": 3.148390586394383e-05, + "loss": 0.3269, + "step": 5849500 + }, + { + "epoch": 3.51, + "learning_rate": 3.148180589838327e-05, + "loss": 0.3434, + "step": 5850000 + }, + { + "epoch": 3.51, + "learning_rate": 3.1479710132753824e-05, + "loss": 0.3257, + "step": 5850500 + }, + { + "epoch": 3.51, + "learning_rate": 3.147761016719326e-05, + "loss": 0.3321, + "step": 5851000 + }, + { + "epoch": 3.51, + "learning_rate": 3.147551020163269e-05, + "loss": 0.3281, + "step": 5851500 + }, + { + "epoch": 3.51, + "learning_rate": 3.147341023607213e-05, + "loss": 0.324, + "step": 5852000 + }, + { + "epoch": 3.51, + "learning_rate": 3.1471314470442685e-05, + "loss": 0.3354, + "step": 5852500 + }, + { + "epoch": 3.51, + "learning_rate": 3.146921450488212e-05, + "loss": 0.3251, + "step": 5853000 + }, + { + "epoch": 3.51, + "learning_rate": 3.146711453932155e-05, + "loss": 0.3231, + "step": 5853500 + }, + { + "epoch": 3.51, + "learning_rate": 3.146501457376099e-05, + "loss": 0.3361, + "step": 5854000 + }, + { + "epoch": 3.51, + "learning_rate": 3.1462914608200425e-05, + "loss": 0.3335, + "step": 5854500 + }, + { + "epoch": 3.51, + "learning_rate": 3.146081464263986e-05, + "loss": 0.3276, + "step": 5855000 + }, + { + "epoch": 3.51, + "learning_rate": 3.14587146770793e-05, + "loss": 0.3268, + "step": 5855500 + }, + { + "epoch": 3.51, + "learning_rate": 3.145661471151873e-05, + "loss": 0.335, + "step": 5856000 + }, + { + "epoch": 3.51, + "learning_rate": 3.1454518945889286e-05, + "loss": 0.3276, + "step": 5856500 + }, + { + "epoch": 3.51, + "learning_rate": 3.1452418980328726e-05, + "loss": 0.3279, + "step": 5857000 + }, + { + "epoch": 3.51, + "learning_rate": 3.145031901476816e-05, + "loss": 0.3302, + "step": 5857500 + }, + { + "epoch": 3.51, + "learning_rate": 3.144821904920759e-05, + "loss": 0.3316, + "step": 5858000 + }, + { + "epoch": 3.51, + "learning_rate": 3.144612328357815e-05, + "loss": 0.3292, + "step": 5858500 + }, + { + "epoch": 3.51, + "learning_rate": 3.144402331801759e-05, + "loss": 0.3173, + "step": 5859000 + }, + { + "epoch": 3.51, + "learning_rate": 3.144192335245702e-05, + "loss": 0.3261, + "step": 5859500 + }, + { + "epoch": 3.51, + "learning_rate": 3.1439823386896454e-05, + "loss": 0.3364, + "step": 5860000 + }, + { + "epoch": 3.51, + "learning_rate": 3.143772762126701e-05, + "loss": 0.3236, + "step": 5860500 + }, + { + "epoch": 3.51, + "learning_rate": 3.143562765570645e-05, + "loss": 0.3314, + "step": 5861000 + }, + { + "epoch": 3.51, + "learning_rate": 3.143352769014588e-05, + "loss": 0.3256, + "step": 5861500 + }, + { + "epoch": 3.51, + "learning_rate": 3.1431427724585315e-05, + "loss": 0.3313, + "step": 5862000 + }, + { + "epoch": 3.51, + "learning_rate": 3.1429327759024755e-05, + "loss": 0.3295, + "step": 5862500 + }, + { + "epoch": 3.52, + "learning_rate": 3.142722779346419e-05, + "loss": 0.3278, + "step": 5863000 + }, + { + "epoch": 3.52, + "learning_rate": 3.142513202783474e-05, + "loss": 0.3247, + "step": 5863500 + }, + { + "epoch": 3.52, + "learning_rate": 3.142303206227418e-05, + "loss": 0.3267, + "step": 5864000 + }, + { + "epoch": 3.52, + "learning_rate": 3.1420932096713616e-05, + "loss": 0.3259, + "step": 5864500 + }, + { + "epoch": 3.52, + "learning_rate": 3.141883213115305e-05, + "loss": 0.3285, + "step": 5865000 + }, + { + "epoch": 3.52, + "learning_rate": 3.14167363655236e-05, + "loss": 0.3414, + "step": 5865500 + }, + { + "epoch": 3.52, + "learning_rate": 3.141463639996304e-05, + "loss": 0.3302, + "step": 5866000 + }, + { + "epoch": 3.52, + "learning_rate": 3.1412536434402476e-05, + "loss": 0.3253, + "step": 5866500 + }, + { + "epoch": 3.52, + "learning_rate": 3.141043646884191e-05, + "loss": 0.3262, + "step": 5867000 + }, + { + "epoch": 3.52, + "learning_rate": 3.140833650328135e-05, + "loss": 0.321, + "step": 5867500 + }, + { + "epoch": 3.52, + "learning_rate": 3.1406240737651904e-05, + "loss": 0.3275, + "step": 5868000 + }, + { + "epoch": 3.52, + "learning_rate": 3.140414077209134e-05, + "loss": 0.3319, + "step": 5868500 + }, + { + "epoch": 3.52, + "learning_rate": 3.140204080653077e-05, + "loss": 0.3385, + "step": 5869000 + }, + { + "epoch": 3.52, + "learning_rate": 3.139994084097021e-05, + "loss": 0.3192, + "step": 5869500 + }, + { + "epoch": 3.52, + "learning_rate": 3.1397840875409644e-05, + "loss": 0.3267, + "step": 5870000 + }, + { + "epoch": 3.52, + "learning_rate": 3.139574090984908e-05, + "loss": 0.3177, + "step": 5870500 + }, + { + "epoch": 3.52, + "learning_rate": 3.139364514421964e-05, + "loss": 0.3389, + "step": 5871000 + }, + { + "epoch": 3.52, + "learning_rate": 3.139154517865907e-05, + "loss": 0.3258, + "step": 5871500 + }, + { + "epoch": 3.52, + "learning_rate": 3.1389445213098505e-05, + "loss": 0.3234, + "step": 5872000 + }, + { + "epoch": 3.52, + "learning_rate": 3.1387345247537945e-05, + "loss": 0.3331, + "step": 5872500 + }, + { + "epoch": 3.52, + "learning_rate": 3.138524528197738e-05, + "loss": 0.3364, + "step": 5873000 + }, + { + "epoch": 3.52, + "learning_rate": 3.1383145316416805e-05, + "loss": 0.3389, + "step": 5873500 + }, + { + "epoch": 3.52, + "learning_rate": 3.1381045350856246e-05, + "loss": 0.3221, + "step": 5874000 + }, + { + "epoch": 3.52, + "learning_rate": 3.1378949585226806e-05, + "loss": 0.3299, + "step": 5874500 + }, + { + "epoch": 3.52, + "learning_rate": 3.137684961966624e-05, + "loss": 0.3165, + "step": 5875000 + }, + { + "epoch": 3.52, + "learning_rate": 3.137475385403679e-05, + "loss": 0.3409, + "step": 5875500 + }, + { + "epoch": 3.52, + "learning_rate": 3.1372653888476226e-05, + "loss": 0.3268, + "step": 5876000 + }, + { + "epoch": 3.52, + "learning_rate": 3.137055392291567e-05, + "loss": 0.3218, + "step": 5876500 + }, + { + "epoch": 3.52, + "learning_rate": 3.13684539573551e-05, + "loss": 0.3307, + "step": 5877000 + }, + { + "epoch": 3.52, + "learning_rate": 3.1366353991794534e-05, + "loss": 0.3298, + "step": 5877500 + }, + { + "epoch": 3.52, + "learning_rate": 3.136425402623397e-05, + "loss": 0.328, + "step": 5878000 + }, + { + "epoch": 3.52, + "learning_rate": 3.13621540606734e-05, + "loss": 0.3333, + "step": 5878500 + }, + { + "epoch": 3.52, + "learning_rate": 3.136005409511284e-05, + "loss": 0.3194, + "step": 5879000 + }, + { + "epoch": 3.52, + "learning_rate": 3.1357954129552274e-05, + "loss": 0.3246, + "step": 5879500 + }, + { + "epoch": 3.53, + "learning_rate": 3.1355858363922835e-05, + "loss": 0.323, + "step": 5880000 + }, + { + "epoch": 3.53, + "learning_rate": 3.135375839836226e-05, + "loss": 0.326, + "step": 5880500 + }, + { + "epoch": 3.53, + "learning_rate": 3.13516584328017e-05, + "loss": 0.3279, + "step": 5881000 + }, + { + "epoch": 3.53, + "learning_rate": 3.1349558467241135e-05, + "loss": 0.3233, + "step": 5881500 + }, + { + "epoch": 3.53, + "learning_rate": 3.134745850168057e-05, + "loss": 0.3295, + "step": 5882000 + }, + { + "epoch": 3.53, + "learning_rate": 3.134535853612001e-05, + "loss": 0.3211, + "step": 5882500 + }, + { + "epoch": 3.53, + "learning_rate": 3.134326277049056e-05, + "loss": 0.3361, + "step": 5883000 + }, + { + "epoch": 3.53, + "learning_rate": 3.1341162804929996e-05, + "loss": 0.3285, + "step": 5883500 + }, + { + "epoch": 3.53, + "learning_rate": 3.133906283936943e-05, + "loss": 0.3266, + "step": 5884000 + }, + { + "epoch": 3.53, + "learning_rate": 3.133696287380887e-05, + "loss": 0.3265, + "step": 5884500 + }, + { + "epoch": 3.53, + "learning_rate": 3.13348629082483e-05, + "loss": 0.33, + "step": 5885000 + }, + { + "epoch": 3.53, + "learning_rate": 3.1332767142618856e-05, + "loss": 0.3285, + "step": 5885500 + }, + { + "epoch": 3.53, + "learning_rate": 3.1330667177058297e-05, + "loss": 0.3258, + "step": 5886000 + }, + { + "epoch": 3.53, + "learning_rate": 3.132856721149773e-05, + "loss": 0.3331, + "step": 5886500 + }, + { + "epoch": 3.53, + "learning_rate": 3.1326467245937164e-05, + "loss": 0.3304, + "step": 5887000 + }, + { + "epoch": 3.53, + "learning_rate": 3.1324367280376604e-05, + "loss": 0.3322, + "step": 5887500 + }, + { + "epoch": 3.53, + "learning_rate": 3.132227151474716e-05, + "loss": 0.3258, + "step": 5888000 + }, + { + "epoch": 3.53, + "learning_rate": 3.132017154918659e-05, + "loss": 0.3328, + "step": 5888500 + }, + { + "epoch": 3.53, + "learning_rate": 3.1318071583626024e-05, + "loss": 0.322, + "step": 5889000 + }, + { + "epoch": 3.53, + "learning_rate": 3.1315971618065464e-05, + "loss": 0.3276, + "step": 5889500 + }, + { + "epoch": 3.53, + "learning_rate": 3.131387585243602e-05, + "loss": 0.322, + "step": 5890000 + }, + { + "epoch": 3.53, + "learning_rate": 3.131177588687545e-05, + "loss": 0.337, + "step": 5890500 + }, + { + "epoch": 3.53, + "learning_rate": 3.1309675921314885e-05, + "loss": 0.3227, + "step": 5891000 + }, + { + "epoch": 3.53, + "learning_rate": 3.1307575955754325e-05, + "loss": 0.3355, + "step": 5891500 + }, + { + "epoch": 3.53, + "learning_rate": 3.1305480190124886e-05, + "loss": 0.3237, + "step": 5892000 + }, + { + "epoch": 3.53, + "learning_rate": 3.130338022456431e-05, + "loss": 0.3284, + "step": 5892500 + }, + { + "epoch": 3.53, + "learning_rate": 3.130128025900375e-05, + "loss": 0.3209, + "step": 5893000 + }, + { + "epoch": 3.53, + "learning_rate": 3.1299180293443186e-05, + "loss": 0.3272, + "step": 5893500 + }, + { + "epoch": 3.53, + "learning_rate": 3.129708032788262e-05, + "loss": 0.3255, + "step": 5894000 + }, + { + "epoch": 3.53, + "learning_rate": 3.129498036232206e-05, + "loss": 0.3201, + "step": 5894500 + }, + { + "epoch": 3.53, + "learning_rate": 3.129288459669261e-05, + "loss": 0.3306, + "step": 5895000 + }, + { + "epoch": 3.53, + "learning_rate": 3.129078463113205e-05, + "loss": 0.3203, + "step": 5895500 + }, + { + "epoch": 3.53, + "learning_rate": 3.128868466557148e-05, + "loss": 0.3299, + "step": 5896000 + }, + { + "epoch": 3.54, + "learning_rate": 3.128658470001092e-05, + "loss": 0.3296, + "step": 5896500 + }, + { + "epoch": 3.54, + "learning_rate": 3.1284488934381474e-05, + "loss": 0.3263, + "step": 5897000 + }, + { + "epoch": 3.54, + "learning_rate": 3.128238896882091e-05, + "loss": 0.3272, + "step": 5897500 + }, + { + "epoch": 3.54, + "learning_rate": 3.128028900326034e-05, + "loss": 0.3321, + "step": 5898000 + }, + { + "epoch": 3.54, + "learning_rate": 3.127818903769978e-05, + "loss": 0.3388, + "step": 5898500 + }, + { + "epoch": 3.54, + "learning_rate": 3.1276089072139215e-05, + "loss": 0.3231, + "step": 5899000 + }, + { + "epoch": 3.54, + "learning_rate": 3.1273989106578655e-05, + "loss": 0.3265, + "step": 5899500 + }, + { + "epoch": 3.54, + "learning_rate": 3.127188914101809e-05, + "loss": 0.3428, + "step": 5900000 + }, + { + "epoch": 3.54, + "eval_loss": 0.32011526823043823, + "eval_runtime": 1121.1763, + "eval_samples_per_second": 469.792, + "eval_steps_per_second": 78.299, + "step": 5900000 + }, + { + "epoch": 3.54, + "learning_rate": 3.126979337538864e-05, + "loss": 0.3309, + "step": 5900500 + }, + { + "epoch": 3.54, + "learning_rate": 3.1267693409828075e-05, + "loss": 0.3252, + "step": 5901000 + }, + { + "epoch": 3.54, + "learning_rate": 3.1265593444267516e-05, + "loss": 0.3157, + "step": 5901500 + }, + { + "epoch": 3.54, + "learning_rate": 3.126349347870695e-05, + "loss": 0.3332, + "step": 5902000 + }, + { + "epoch": 3.54, + "learning_rate": 3.126139351314638e-05, + "loss": 0.3272, + "step": 5902500 + }, + { + "epoch": 3.54, + "learning_rate": 3.125929354758582e-05, + "loss": 0.3124, + "step": 5903000 + }, + { + "epoch": 3.54, + "learning_rate": 3.1257193582025256e-05, + "loss": 0.3287, + "step": 5903500 + }, + { + "epoch": 3.54, + "learning_rate": 3.125509361646469e-05, + "loss": 0.3297, + "step": 5904000 + }, + { + "epoch": 3.54, + "learning_rate": 3.125299365090413e-05, + "loss": 0.335, + "step": 5904500 + }, + { + "epoch": 3.54, + "learning_rate": 3.1250893685343557e-05, + "loss": 0.3339, + "step": 5905000 + }, + { + "epoch": 3.54, + "learning_rate": 3.124879371978299e-05, + "loss": 0.3283, + "step": 5905500 + }, + { + "epoch": 3.54, + "learning_rate": 3.124669375422243e-05, + "loss": 0.3197, + "step": 5906000 + }, + { + "epoch": 3.54, + "learning_rate": 3.124459798859299e-05, + "loss": 0.3319, + "step": 5906500 + }, + { + "epoch": 3.54, + "learning_rate": 3.1242502222963544e-05, + "loss": 0.3292, + "step": 5907000 + }, + { + "epoch": 3.54, + "learning_rate": 3.124040225740298e-05, + "loss": 0.3375, + "step": 5907500 + }, + { + "epoch": 3.54, + "learning_rate": 3.123830229184242e-05, + "loss": 0.3321, + "step": 5908000 + }, + { + "epoch": 3.54, + "learning_rate": 3.123620232628185e-05, + "loss": 0.3206, + "step": 5908500 + }, + { + "epoch": 3.54, + "learning_rate": 3.1234106560652405e-05, + "loss": 0.3228, + "step": 5909000 + }, + { + "epoch": 3.54, + "learning_rate": 3.123200659509184e-05, + "loss": 0.3286, + "step": 5909500 + }, + { + "epoch": 3.54, + "learning_rate": 3.122990662953128e-05, + "loss": 0.3264, + "step": 5910000 + }, + { + "epoch": 3.54, + "learning_rate": 3.122780666397071e-05, + "loss": 0.3307, + "step": 5910500 + }, + { + "epoch": 3.54, + "learning_rate": 3.1225706698410145e-05, + "loss": 0.3273, + "step": 5911000 + }, + { + "epoch": 3.54, + "learning_rate": 3.1223606732849586e-05, + "loss": 0.3351, + "step": 5911500 + }, + { + "epoch": 3.54, + "learning_rate": 3.122150676728901e-05, + "loss": 0.3258, + "step": 5912000 + }, + { + "epoch": 3.54, + "learning_rate": 3.1219406801728446e-05, + "loss": 0.3246, + "step": 5912500 + }, + { + "epoch": 3.55, + "learning_rate": 3.1217311036099006e-05, + "loss": 0.3291, + "step": 5913000 + }, + { + "epoch": 3.55, + "learning_rate": 3.1215215270469567e-05, + "loss": 0.3186, + "step": 5913500 + }, + { + "epoch": 3.55, + "learning_rate": 3.1213115304909e-05, + "loss": 0.3282, + "step": 5914000 + }, + { + "epoch": 3.55, + "learning_rate": 3.1211015339348433e-05, + "loss": 0.3314, + "step": 5914500 + }, + { + "epoch": 3.55, + "learning_rate": 3.1208915373787874e-05, + "loss": 0.3267, + "step": 5915000 + }, + { + "epoch": 3.55, + "learning_rate": 3.120681540822731e-05, + "loss": 0.3209, + "step": 5915500 + }, + { + "epoch": 3.55, + "learning_rate": 3.120471544266674e-05, + "loss": 0.3296, + "step": 5916000 + }, + { + "epoch": 3.55, + "learning_rate": 3.1202619677037294e-05, + "loss": 0.3286, + "step": 5916500 + }, + { + "epoch": 3.55, + "learning_rate": 3.1200519711476734e-05, + "loss": 0.3372, + "step": 5917000 + }, + { + "epoch": 3.55, + "learning_rate": 3.119841974591617e-05, + "loss": 0.3262, + "step": 5917500 + }, + { + "epoch": 3.55, + "learning_rate": 3.11963197803556e-05, + "loss": 0.3236, + "step": 5918000 + }, + { + "epoch": 3.55, + "learning_rate": 3.119421981479504e-05, + "loss": 0.3317, + "step": 5918500 + }, + { + "epoch": 3.55, + "learning_rate": 3.1192124049165595e-05, + "loss": 0.3227, + "step": 5919000 + }, + { + "epoch": 3.55, + "learning_rate": 3.119002408360503e-05, + "loss": 0.3243, + "step": 5919500 + }, + { + "epoch": 3.55, + "learning_rate": 3.118792411804446e-05, + "loss": 0.332, + "step": 5920000 + }, + { + "epoch": 3.55, + "learning_rate": 3.11858241524839e-05, + "loss": 0.3364, + "step": 5920500 + }, + { + "epoch": 3.55, + "learning_rate": 3.1183724186923336e-05, + "loss": 0.3392, + "step": 5921000 + }, + { + "epoch": 3.55, + "learning_rate": 3.118162422136277e-05, + "loss": 0.3237, + "step": 5921500 + }, + { + "epoch": 3.55, + "learning_rate": 3.11795242558022e-05, + "loss": 0.3256, + "step": 5922000 + }, + { + "epoch": 3.55, + "learning_rate": 3.117742849017276e-05, + "loss": 0.3359, + "step": 5922500 + }, + { + "epoch": 3.55, + "learning_rate": 3.1175328524612196e-05, + "loss": 0.3214, + "step": 5923000 + }, + { + "epoch": 3.55, + "learning_rate": 3.117322855905164e-05, + "loss": 0.3269, + "step": 5923500 + }, + { + "epoch": 3.55, + "learning_rate": 3.1171128593491063e-05, + "loss": 0.3328, + "step": 5924000 + }, + { + "epoch": 3.55, + "learning_rate": 3.11690286279305e-05, + "loss": 0.3276, + "step": 5924500 + }, + { + "epoch": 3.55, + "learning_rate": 3.116692866236994e-05, + "loss": 0.3226, + "step": 5925000 + }, + { + "epoch": 3.55, + "learning_rate": 3.116482869680937e-05, + "loss": 0.3355, + "step": 5925500 + }, + { + "epoch": 3.55, + "learning_rate": 3.116273293117993e-05, + "loss": 0.3367, + "step": 5926000 + }, + { + "epoch": 3.55, + "learning_rate": 3.116063296561936e-05, + "loss": 0.3222, + "step": 5926500 + }, + { + "epoch": 3.55, + "learning_rate": 3.11585330000588e-05, + "loss": 0.3215, + "step": 5927000 + }, + { + "epoch": 3.55, + "learning_rate": 3.115643303449823e-05, + "loss": 0.3252, + "step": 5927500 + }, + { + "epoch": 3.55, + "learning_rate": 3.1154333068937665e-05, + "loss": 0.3373, + "step": 5928000 + }, + { + "epoch": 3.55, + "learning_rate": 3.1152233103377105e-05, + "loss": 0.3188, + "step": 5928500 + }, + { + "epoch": 3.55, + "learning_rate": 3.115013733774766e-05, + "loss": 0.336, + "step": 5929000 + }, + { + "epoch": 3.55, + "learning_rate": 3.114803737218709e-05, + "loss": 0.3276, + "step": 5929500 + }, + { + "epoch": 3.56, + "learning_rate": 3.114593740662653e-05, + "loss": 0.3429, + "step": 5930000 + }, + { + "epoch": 3.56, + "learning_rate": 3.1143837441065966e-05, + "loss": 0.3253, + "step": 5930500 + }, + { + "epoch": 3.56, + "learning_rate": 3.11417374755054e-05, + "loss": 0.326, + "step": 5931000 + }, + { + "epoch": 3.56, + "learning_rate": 3.113963750994484e-05, + "loss": 0.3267, + "step": 5931500 + }, + { + "epoch": 3.56, + "learning_rate": 3.113753754438427e-05, + "loss": 0.3378, + "step": 5932000 + }, + { + "epoch": 3.56, + "learning_rate": 3.1135437578823706e-05, + "loss": 0.3283, + "step": 5932500 + }, + { + "epoch": 3.56, + "learning_rate": 3.113334181319426e-05, + "loss": 0.3238, + "step": 5933000 + }, + { + "epoch": 3.56, + "learning_rate": 3.11312418476337e-05, + "loss": 0.3239, + "step": 5933500 + }, + { + "epoch": 3.56, + "learning_rate": 3.1129146082004254e-05, + "loss": 0.3284, + "step": 5934000 + }, + { + "epoch": 3.56, + "learning_rate": 3.112704611644369e-05, + "loss": 0.3288, + "step": 5934500 + }, + { + "epoch": 3.56, + "learning_rate": 3.112494615088312e-05, + "loss": 0.3318, + "step": 5935000 + }, + { + "epoch": 3.56, + "learning_rate": 3.112284618532256e-05, + "loss": 0.3232, + "step": 5935500 + }, + { + "epoch": 3.56, + "learning_rate": 3.1120746219761994e-05, + "loss": 0.3291, + "step": 5936000 + }, + { + "epoch": 3.56, + "learning_rate": 3.111864625420143e-05, + "loss": 0.3305, + "step": 5936500 + }, + { + "epoch": 3.56, + "learning_rate": 3.111654628864087e-05, + "loss": 0.3253, + "step": 5937000 + }, + { + "epoch": 3.56, + "learning_rate": 3.11144463230803e-05, + "loss": 0.3285, + "step": 5937500 + }, + { + "epoch": 3.56, + "learning_rate": 3.1112350557450855e-05, + "loss": 0.3273, + "step": 5938000 + }, + { + "epoch": 3.56, + "learning_rate": 3.1110250591890295e-05, + "loss": 0.3269, + "step": 5938500 + }, + { + "epoch": 3.56, + "learning_rate": 3.110815062632973e-05, + "loss": 0.3279, + "step": 5939000 + }, + { + "epoch": 3.56, + "learning_rate": 3.110605486070028e-05, + "loss": 0.3398, + "step": 5939500 + }, + { + "epoch": 3.56, + "learning_rate": 3.1103954895139716e-05, + "loss": 0.3345, + "step": 5940000 + }, + { + "epoch": 3.56, + "learning_rate": 3.1101854929579156e-05, + "loss": 0.3313, + "step": 5940500 + }, + { + "epoch": 3.56, + "learning_rate": 3.109975496401859e-05, + "loss": 0.3386, + "step": 5941000 + }, + { + "epoch": 3.56, + "learning_rate": 3.109765499845802e-05, + "loss": 0.3205, + "step": 5941500 + }, + { + "epoch": 3.56, + "learning_rate": 3.1095559232828577e-05, + "loss": 0.3209, + "step": 5942000 + }, + { + "epoch": 3.56, + "learning_rate": 3.109345926726802e-05, + "loss": 0.3287, + "step": 5942500 + }, + { + "epoch": 3.56, + "learning_rate": 3.109135930170745e-05, + "loss": 0.3215, + "step": 5943000 + }, + { + "epoch": 3.56, + "learning_rate": 3.1089259336146884e-05, + "loss": 0.331, + "step": 5943500 + }, + { + "epoch": 3.56, + "learning_rate": 3.1087159370586324e-05, + "loss": 0.3203, + "step": 5944000 + }, + { + "epoch": 3.56, + "learning_rate": 3.108506360495688e-05, + "loss": 0.3297, + "step": 5944500 + }, + { + "epoch": 3.56, + "learning_rate": 3.108296363939631e-05, + "loss": 0.3408, + "step": 5945000 + }, + { + "epoch": 3.56, + "learning_rate": 3.108086367383575e-05, + "loss": 0.3205, + "step": 5945500 + }, + { + "epoch": 3.56, + "learning_rate": 3.1078763708275185e-05, + "loss": 0.3259, + "step": 5946000 + }, + { + "epoch": 3.57, + "learning_rate": 3.107666794264574e-05, + "loss": 0.336, + "step": 5946500 + }, + { + "epoch": 3.57, + "learning_rate": 3.107456797708517e-05, + "loss": 0.319, + "step": 5947000 + }, + { + "epoch": 3.57, + "learning_rate": 3.107246801152461e-05, + "loss": 0.3275, + "step": 5947500 + }, + { + "epoch": 3.57, + "learning_rate": 3.1070368045964045e-05, + "loss": 0.3372, + "step": 5948000 + }, + { + "epoch": 3.57, + "learning_rate": 3.10682722803346e-05, + "loss": 0.3273, + "step": 5948500 + }, + { + "epoch": 3.57, + "learning_rate": 3.106617231477403e-05, + "loss": 0.3308, + "step": 5949000 + }, + { + "epoch": 3.57, + "learning_rate": 3.106407234921347e-05, + "loss": 0.3238, + "step": 5949500 + }, + { + "epoch": 3.57, + "learning_rate": 3.1061972383652906e-05, + "loss": 0.3276, + "step": 5950000 + }, + { + "epoch": 3.57, + "learning_rate": 3.105987241809234e-05, + "loss": 0.3256, + "step": 5950500 + }, + { + "epoch": 3.57, + "learning_rate": 3.10577766524629e-05, + "loss": 0.3289, + "step": 5951000 + }, + { + "epoch": 3.57, + "learning_rate": 3.105567668690233e-05, + "loss": 0.3266, + "step": 5951500 + }, + { + "epoch": 3.57, + "learning_rate": 3.105357672134177e-05, + "loss": 0.3308, + "step": 5952000 + }, + { + "epoch": 3.57, + "learning_rate": 3.105147675578121e-05, + "loss": 0.3309, + "step": 5952500 + }, + { + "epoch": 3.57, + "learning_rate": 3.104937679022064e-05, + "loss": 0.3275, + "step": 5953000 + }, + { + "epoch": 3.57, + "learning_rate": 3.1047276824660074e-05, + "loss": 0.3262, + "step": 5953500 + }, + { + "epoch": 3.57, + "learning_rate": 3.1045176859099514e-05, + "loss": 0.3352, + "step": 5954000 + }, + { + "epoch": 3.57, + "learning_rate": 3.104307689353895e-05, + "loss": 0.3283, + "step": 5954500 + }, + { + "epoch": 3.57, + "learning_rate": 3.10409811279095e-05, + "loss": 0.3281, + "step": 5955000 + }, + { + "epoch": 3.57, + "learning_rate": 3.1038885362280055e-05, + "loss": 0.3359, + "step": 5955500 + }, + { + "epoch": 3.57, + "learning_rate": 3.103678539671949e-05, + "loss": 0.3218, + "step": 5956000 + }, + { + "epoch": 3.57, + "learning_rate": 3.103468543115893e-05, + "loss": 0.3229, + "step": 5956500 + }, + { + "epoch": 3.57, + "learning_rate": 3.103258546559836e-05, + "loss": 0.3317, + "step": 5957000 + }, + { + "epoch": 3.57, + "learning_rate": 3.1030485500037795e-05, + "loss": 0.3255, + "step": 5957500 + }, + { + "epoch": 3.57, + "learning_rate": 3.1028385534477236e-05, + "loss": 0.3277, + "step": 5958000 + }, + { + "epoch": 3.57, + "learning_rate": 3.102628976884779e-05, + "loss": 0.3234, + "step": 5958500 + }, + { + "epoch": 3.57, + "learning_rate": 3.102418980328722e-05, + "loss": 0.337, + "step": 5959000 + }, + { + "epoch": 3.57, + "learning_rate": 3.102208983772666e-05, + "loss": 0.3237, + "step": 5959500 + }, + { + "epoch": 3.57, + "learning_rate": 3.1019989872166096e-05, + "loss": 0.3245, + "step": 5960000 + }, + { + "epoch": 3.57, + "learning_rate": 3.101788990660553e-05, + "loss": 0.3317, + "step": 5960500 + }, + { + "epoch": 3.57, + "learning_rate": 3.101578994104497e-05, + "loss": 0.3268, + "step": 5961000 + }, + { + "epoch": 3.57, + "learning_rate": 3.1013689975484404e-05, + "loss": 0.3311, + "step": 5961500 + }, + { + "epoch": 3.57, + "learning_rate": 3.101159420985496e-05, + "loss": 0.3277, + "step": 5962000 + }, + { + "epoch": 3.57, + "learning_rate": 3.100949424429439e-05, + "loss": 0.3193, + "step": 5962500 + }, + { + "epoch": 3.58, + "learning_rate": 3.100739427873383e-05, + "loss": 0.3251, + "step": 5963000 + }, + { + "epoch": 3.58, + "learning_rate": 3.1005294313173264e-05, + "loss": 0.3252, + "step": 5963500 + }, + { + "epoch": 3.58, + "learning_rate": 3.10031943476127e-05, + "loss": 0.3263, + "step": 5964000 + }, + { + "epoch": 3.58, + "learning_rate": 3.100109858198326e-05, + "loss": 0.3302, + "step": 5964500 + }, + { + "epoch": 3.58, + "learning_rate": 3.099899861642269e-05, + "loss": 0.3329, + "step": 5965000 + }, + { + "epoch": 3.58, + "learning_rate": 3.0996898650862125e-05, + "loss": 0.3178, + "step": 5965500 + }, + { + "epoch": 3.58, + "learning_rate": 3.0994798685301565e-05, + "loss": 0.3326, + "step": 5966000 + }, + { + "epoch": 3.58, + "learning_rate": 3.0992698719741e-05, + "loss": 0.3329, + "step": 5966500 + }, + { + "epoch": 3.58, + "learning_rate": 3.099059875418043e-05, + "loss": 0.3279, + "step": 5967000 + }, + { + "epoch": 3.58, + "learning_rate": 3.098849878861987e-05, + "loss": 0.3297, + "step": 5967500 + }, + { + "epoch": 3.58, + "learning_rate": 3.09863988230593e-05, + "loss": 0.3263, + "step": 5968000 + }, + { + "epoch": 3.58, + "learning_rate": 3.098430305742986e-05, + "loss": 0.3284, + "step": 5968500 + }, + { + "epoch": 3.58, + "learning_rate": 3.098220309186929e-05, + "loss": 0.3308, + "step": 5969000 + }, + { + "epoch": 3.58, + "learning_rate": 3.098010312630873e-05, + "loss": 0.3288, + "step": 5969500 + }, + { + "epoch": 3.58, + "learning_rate": 3.097800316074816e-05, + "loss": 0.3302, + "step": 5970000 + }, + { + "epoch": 3.58, + "learning_rate": 3.097590319518759e-05, + "loss": 0.3252, + "step": 5970500 + }, + { + "epoch": 3.58, + "learning_rate": 3.0973807429558154e-05, + "loss": 0.3222, + "step": 5971000 + }, + { + "epoch": 3.58, + "learning_rate": 3.0971707463997594e-05, + "loss": 0.3219, + "step": 5971500 + }, + { + "epoch": 3.58, + "learning_rate": 3.096960749843703e-05, + "loss": 0.3354, + "step": 5972000 + }, + { + "epoch": 3.58, + "learning_rate": 3.096750753287646e-05, + "loss": 0.325, + "step": 5972500 + }, + { + "epoch": 3.58, + "learning_rate": 3.0965407567315894e-05, + "loss": 0.3254, + "step": 5973000 + }, + { + "epoch": 3.58, + "learning_rate": 3.0963311801686455e-05, + "loss": 0.3354, + "step": 5973500 + }, + { + "epoch": 3.58, + "learning_rate": 3.096121183612589e-05, + "loss": 0.3218, + "step": 5974000 + }, + { + "epoch": 3.58, + "learning_rate": 3.095911187056533e-05, + "loss": 0.3271, + "step": 5974500 + }, + { + "epoch": 3.58, + "learning_rate": 3.0957011905004755e-05, + "loss": 0.3244, + "step": 5975000 + }, + { + "epoch": 3.58, + "learning_rate": 3.095491193944419e-05, + "loss": 0.3193, + "step": 5975500 + }, + { + "epoch": 3.58, + "learning_rate": 3.095281197388363e-05, + "loss": 0.3252, + "step": 5976000 + }, + { + "epoch": 3.58, + "learning_rate": 3.095071620825419e-05, + "loss": 0.3271, + "step": 5976500 + }, + { + "epoch": 3.58, + "learning_rate": 3.094861624269362e-05, + "loss": 0.3337, + "step": 5977000 + }, + { + "epoch": 3.58, + "learning_rate": 3.094651627713305e-05, + "loss": 0.3272, + "step": 5977500 + }, + { + "epoch": 3.58, + "learning_rate": 3.094441631157249e-05, + "loss": 0.3282, + "step": 5978000 + }, + { + "epoch": 3.58, + "learning_rate": 3.094231634601192e-05, + "loss": 0.3197, + "step": 5978500 + }, + { + "epoch": 3.58, + "learning_rate": 3.0940216380451356e-05, + "loss": 0.323, + "step": 5979000 + }, + { + "epoch": 3.58, + "learning_rate": 3.093812061482192e-05, + "loss": 0.3213, + "step": 5979500 + }, + { + "epoch": 3.59, + "learning_rate": 3.093602064926135e-05, + "loss": 0.3264, + "step": 5980000 + }, + { + "epoch": 3.59, + "learning_rate": 3.0933920683700784e-05, + "loss": 0.3303, + "step": 5980500 + }, + { + "epoch": 3.59, + "learning_rate": 3.0931820718140224e-05, + "loss": 0.3331, + "step": 5981000 + }, + { + "epoch": 3.59, + "learning_rate": 3.092972075257966e-05, + "loss": 0.3273, + "step": 5981500 + }, + { + "epoch": 3.59, + "learning_rate": 3.092762498695021e-05, + "loss": 0.3237, + "step": 5982000 + }, + { + "epoch": 3.59, + "learning_rate": 3.0925525021389644e-05, + "loss": 0.3249, + "step": 5982500 + }, + { + "epoch": 3.59, + "learning_rate": 3.0923425055829085e-05, + "loss": 0.3208, + "step": 5983000 + }, + { + "epoch": 3.59, + "learning_rate": 3.092132509026852e-05, + "loss": 0.3248, + "step": 5983500 + }, + { + "epoch": 3.59, + "learning_rate": 3.091922512470795e-05, + "loss": 0.3253, + "step": 5984000 + }, + { + "epoch": 3.59, + "learning_rate": 3.091712515914739e-05, + "loss": 0.3322, + "step": 5984500 + }, + { + "epoch": 3.59, + "learning_rate": 3.0915025193586825e-05, + "loss": 0.3253, + "step": 5985000 + }, + { + "epoch": 3.59, + "learning_rate": 3.091292522802626e-05, + "loss": 0.3295, + "step": 5985500 + }, + { + "epoch": 3.59, + "learning_rate": 3.091082946239681e-05, + "loss": 0.3283, + "step": 5986000 + }, + { + "epoch": 3.59, + "learning_rate": 3.090872949683625e-05, + "loss": 0.3259, + "step": 5986500 + }, + { + "epoch": 3.59, + "learning_rate": 3.0906629531275686e-05, + "loss": 0.3215, + "step": 5987000 + }, + { + "epoch": 3.59, + "learning_rate": 3.090452956571512e-05, + "loss": 0.3295, + "step": 5987500 + }, + { + "epoch": 3.59, + "learning_rate": 3.090242960015456e-05, + "loss": 0.3183, + "step": 5988000 + }, + { + "epoch": 3.59, + "learning_rate": 3.090033383452511e-05, + "loss": 0.3286, + "step": 5988500 + }, + { + "epoch": 3.59, + "learning_rate": 3.0898233868964547e-05, + "loss": 0.3252, + "step": 5989000 + }, + { + "epoch": 3.59, + "learning_rate": 3.089613390340399e-05, + "loss": 0.326, + "step": 5989500 + }, + { + "epoch": 3.59, + "learning_rate": 3.089403393784342e-05, + "loss": 0.3257, + "step": 5990000 + }, + { + "epoch": 3.59, + "learning_rate": 3.0891933972282854e-05, + "loss": 0.3299, + "step": 5990500 + }, + { + "epoch": 3.59, + "learning_rate": 3.0889834006722294e-05, + "loss": 0.3347, + "step": 5991000 + }, + { + "epoch": 3.59, + "learning_rate": 3.088773824109285e-05, + "loss": 0.336, + "step": 5991500 + }, + { + "epoch": 3.59, + "learning_rate": 3.08856424754634e-05, + "loss": 0.3386, + "step": 5992000 + }, + { + "epoch": 3.59, + "learning_rate": 3.0883542509902835e-05, + "loss": 0.3289, + "step": 5992500 + }, + { + "epoch": 3.59, + "learning_rate": 3.088144254434227e-05, + "loss": 0.3211, + "step": 5993000 + }, + { + "epoch": 3.59, + "learning_rate": 3.087934257878171e-05, + "loss": 0.3267, + "step": 5993500 + }, + { + "epoch": 3.59, + "learning_rate": 3.087724261322114e-05, + "loss": 0.3226, + "step": 5994000 + }, + { + "epoch": 3.59, + "learning_rate": 3.0875142647660575e-05, + "loss": 0.3197, + "step": 5994500 + }, + { + "epoch": 3.59, + "learning_rate": 3.0873042682100015e-05, + "loss": 0.3237, + "step": 5995000 + }, + { + "epoch": 3.59, + "learning_rate": 3.087094271653945e-05, + "loss": 0.3253, + "step": 5995500 + }, + { + "epoch": 3.59, + "learning_rate": 3.086884275097888e-05, + "loss": 0.334, + "step": 5996000 + }, + { + "epoch": 3.6, + "learning_rate": 3.086674698534944e-05, + "loss": 0.3253, + "step": 5996500 + }, + { + "epoch": 3.6, + "learning_rate": 3.0864647019788876e-05, + "loss": 0.3308, + "step": 5997000 + }, + { + "epoch": 3.6, + "learning_rate": 3.086254705422831e-05, + "loss": 0.3314, + "step": 5997500 + }, + { + "epoch": 3.6, + "learning_rate": 3.086044708866775e-05, + "loss": 0.321, + "step": 5998000 + }, + { + "epoch": 3.6, + "learning_rate": 3.0858351323038303e-05, + "loss": 0.3282, + "step": 5998500 + }, + { + "epoch": 3.6, + "learning_rate": 3.085625135747774e-05, + "loss": 0.3325, + "step": 5999000 + }, + { + "epoch": 3.6, + "learning_rate": 3.085415139191717e-05, + "loss": 0.3162, + "step": 5999500 + }, + { + "epoch": 3.6, + "learning_rate": 3.085205142635661e-05, + "loss": 0.3209, + "step": 6000000 + }, + { + "epoch": 3.6, + "eval_loss": 0.31902313232421875, + "eval_runtime": 1120.2992, + "eval_samples_per_second": 470.16, + "eval_steps_per_second": 78.36, + "step": 6000000 + }, + { + "epoch": 3.6, + "learning_rate": 3.0849951460796044e-05, + "loss": 0.3256, + "step": 6000500 + }, + { + "epoch": 3.6, + "learning_rate": 3.084785149523548e-05, + "loss": 0.3238, + "step": 6001000 + }, + { + "epoch": 3.6, + "learning_rate": 3.084575152967492e-05, + "loss": 0.3316, + "step": 6001500 + }, + { + "epoch": 3.6, + "learning_rate": 3.084365576404547e-05, + "loss": 0.3317, + "step": 6002000 + }, + { + "epoch": 3.6, + "learning_rate": 3.0841555798484905e-05, + "loss": 0.3341, + "step": 6002500 + }, + { + "epoch": 3.6, + "learning_rate": 3.083945583292434e-05, + "loss": 0.3278, + "step": 6003000 + }, + { + "epoch": 3.6, + "learning_rate": 3.083735586736378e-05, + "loss": 0.3286, + "step": 6003500 + }, + { + "epoch": 3.6, + "learning_rate": 3.083525590180321e-05, + "loss": 0.3363, + "step": 6004000 + }, + { + "epoch": 3.6, + "learning_rate": 3.0833155936242645e-05, + "loss": 0.3257, + "step": 6004500 + }, + { + "epoch": 3.6, + "learning_rate": 3.083105597068208e-05, + "loss": 0.3214, + "step": 6005000 + }, + { + "epoch": 3.6, + "learning_rate": 3.082895600512151e-05, + "loss": 0.3261, + "step": 6005500 + }, + { + "epoch": 3.6, + "learning_rate": 3.082686023949207e-05, + "loss": 0.3263, + "step": 6006000 + }, + { + "epoch": 3.6, + "learning_rate": 3.0824764473862626e-05, + "loss": 0.3263, + "step": 6006500 + }, + { + "epoch": 3.6, + "learning_rate": 3.0822664508302066e-05, + "loss": 0.325, + "step": 6007000 + }, + { + "epoch": 3.6, + "learning_rate": 3.08205645427415e-05, + "loss": 0.319, + "step": 6007500 + }, + { + "epoch": 3.6, + "learning_rate": 3.081846457718093e-05, + "loss": 0.3237, + "step": 6008000 + }, + { + "epoch": 3.6, + "learning_rate": 3.0816364611620374e-05, + "loss": 0.3258, + "step": 6008500 + }, + { + "epoch": 3.6, + "learning_rate": 3.08142646460598e-05, + "loss": 0.3238, + "step": 6009000 + }, + { + "epoch": 3.6, + "learning_rate": 3.0812164680499234e-05, + "loss": 0.322, + "step": 6009500 + }, + { + "epoch": 3.6, + "learning_rate": 3.0810068914869794e-05, + "loss": 0.3256, + "step": 6010000 + }, + { + "epoch": 3.6, + "learning_rate": 3.0807968949309234e-05, + "loss": 0.3341, + "step": 6010500 + }, + { + "epoch": 3.6, + "learning_rate": 3.080586898374867e-05, + "loss": 0.3253, + "step": 6011000 + }, + { + "epoch": 3.6, + "learning_rate": 3.08037690181881e-05, + "loss": 0.3199, + "step": 6011500 + }, + { + "epoch": 3.6, + "learning_rate": 3.0801669052627535e-05, + "loss": 0.3271, + "step": 6012000 + }, + { + "epoch": 3.6, + "learning_rate": 3.079956908706697e-05, + "loss": 0.327, + "step": 6012500 + }, + { + "epoch": 3.61, + "learning_rate": 3.079746912150641e-05, + "loss": 0.3307, + "step": 6013000 + }, + { + "epoch": 3.61, + "learning_rate": 3.079536915594584e-05, + "loss": 0.3304, + "step": 6013500 + }, + { + "epoch": 3.61, + "learning_rate": 3.0793273390316395e-05, + "loss": 0.3238, + "step": 6014000 + }, + { + "epoch": 3.61, + "learning_rate": 3.0791177624686956e-05, + "loss": 0.322, + "step": 6014500 + }, + { + "epoch": 3.61, + "learning_rate": 3.078907765912639e-05, + "loss": 0.3308, + "step": 6015000 + }, + { + "epoch": 3.61, + "learning_rate": 3.078697769356583e-05, + "loss": 0.3302, + "step": 6015500 + }, + { + "epoch": 3.61, + "learning_rate": 3.0784877728005256e-05, + "loss": 0.3236, + "step": 6016000 + }, + { + "epoch": 3.61, + "learning_rate": 3.078277776244469e-05, + "loss": 0.3156, + "step": 6016500 + }, + { + "epoch": 3.61, + "learning_rate": 3.078068199681526e-05, + "loss": 0.3277, + "step": 6017000 + }, + { + "epoch": 3.61, + "learning_rate": 3.077858203125469e-05, + "loss": 0.3307, + "step": 6017500 + }, + { + "epoch": 3.61, + "learning_rate": 3.0776482065694124e-05, + "loss": 0.338, + "step": 6018000 + }, + { + "epoch": 3.61, + "learning_rate": 3.077438210013356e-05, + "loss": 0.3242, + "step": 6018500 + }, + { + "epoch": 3.61, + "learning_rate": 3.077228213457299e-05, + "loss": 0.3187, + "step": 6019000 + }, + { + "epoch": 3.61, + "learning_rate": 3.0770182169012424e-05, + "loss": 0.3256, + "step": 6019500 + }, + { + "epoch": 3.61, + "learning_rate": 3.0768082203451864e-05, + "loss": 0.3314, + "step": 6020000 + }, + { + "epoch": 3.61, + "learning_rate": 3.07659822378913e-05, + "loss": 0.3226, + "step": 6020500 + }, + { + "epoch": 3.61, + "learning_rate": 3.076388647226185e-05, + "loss": 0.3236, + "step": 6021000 + }, + { + "epoch": 3.61, + "learning_rate": 3.0761786506701285e-05, + "loss": 0.3255, + "step": 6021500 + }, + { + "epoch": 3.61, + "learning_rate": 3.0759690741071845e-05, + "loss": 0.3288, + "step": 6022000 + }, + { + "epoch": 3.61, + "learning_rate": 3.0757590775511285e-05, + "loss": 0.3225, + "step": 6022500 + }, + { + "epoch": 3.61, + "learning_rate": 3.075549080995072e-05, + "loss": 0.3329, + "step": 6023000 + }, + { + "epoch": 3.61, + "learning_rate": 3.075339084439015e-05, + "loss": 0.3279, + "step": 6023500 + }, + { + "epoch": 3.61, + "learning_rate": 3.0751290878829586e-05, + "loss": 0.3349, + "step": 6024000 + }, + { + "epoch": 3.61, + "learning_rate": 3.0749195113200146e-05, + "loss": 0.3246, + "step": 6024500 + }, + { + "epoch": 3.61, + "learning_rate": 3.074709514763958e-05, + "loss": 0.3296, + "step": 6025000 + }, + { + "epoch": 3.61, + "learning_rate": 3.074499518207901e-05, + "loss": 0.3201, + "step": 6025500 + }, + { + "epoch": 3.61, + "learning_rate": 3.0742895216518446e-05, + "loss": 0.3216, + "step": 6026000 + }, + { + "epoch": 3.61, + "learning_rate": 3.074079945088901e-05, + "loss": 0.3245, + "step": 6026500 + }, + { + "epoch": 3.61, + "learning_rate": 3.073869948532844e-05, + "loss": 0.3324, + "step": 6027000 + }, + { + "epoch": 3.61, + "learning_rate": 3.073659951976788e-05, + "loss": 0.3293, + "step": 6027500 + }, + { + "epoch": 3.61, + "learning_rate": 3.073449955420731e-05, + "loss": 0.3277, + "step": 6028000 + }, + { + "epoch": 3.61, + "learning_rate": 3.073240378857787e-05, + "loss": 0.3408, + "step": 6028500 + }, + { + "epoch": 3.61, + "learning_rate": 3.07303038230173e-05, + "loss": 0.3248, + "step": 6029000 + }, + { + "epoch": 3.61, + "learning_rate": 3.072820385745674e-05, + "loss": 0.3251, + "step": 6029500 + }, + { + "epoch": 3.62, + "learning_rate": 3.0726103891896175e-05, + "loss": 0.3239, + "step": 6030000 + }, + { + "epoch": 3.62, + "learning_rate": 3.072400392633561e-05, + "loss": 0.3302, + "step": 6030500 + }, + { + "epoch": 3.62, + "learning_rate": 3.072190816070617e-05, + "loss": 0.3338, + "step": 6031000 + }, + { + "epoch": 3.62, + "learning_rate": 3.07198081951456e-05, + "loss": 0.3283, + "step": 6031500 + }, + { + "epoch": 3.62, + "learning_rate": 3.0717708229585035e-05, + "loss": 0.3256, + "step": 6032000 + }, + { + "epoch": 3.62, + "learning_rate": 3.0715608264024476e-05, + "loss": 0.3322, + "step": 6032500 + }, + { + "epoch": 3.62, + "learning_rate": 3.07135082984639e-05, + "loss": 0.3256, + "step": 6033000 + }, + { + "epoch": 3.62, + "learning_rate": 3.071141253283446e-05, + "loss": 0.3336, + "step": 6033500 + }, + { + "epoch": 3.62, + "learning_rate": 3.0709312567273896e-05, + "loss": 0.3311, + "step": 6034000 + }, + { + "epoch": 3.62, + "learning_rate": 3.0707212601713336e-05, + "loss": 0.3238, + "step": 6034500 + }, + { + "epoch": 3.62, + "learning_rate": 3.070511263615277e-05, + "loss": 0.3168, + "step": 6035000 + }, + { + "epoch": 3.62, + "learning_rate": 3.0703012670592197e-05, + "loss": 0.3276, + "step": 6035500 + }, + { + "epoch": 3.62, + "learning_rate": 3.070091270503164e-05, + "loss": 0.3351, + "step": 6036000 + }, + { + "epoch": 3.62, + "learning_rate": 3.069881273947107e-05, + "loss": 0.3267, + "step": 6036500 + }, + { + "epoch": 3.62, + "learning_rate": 3.0696712773910504e-05, + "loss": 0.3247, + "step": 6037000 + }, + { + "epoch": 3.62, + "learning_rate": 3.0694617008281064e-05, + "loss": 0.3261, + "step": 6037500 + }, + { + "epoch": 3.62, + "learning_rate": 3.06925170427205e-05, + "loss": 0.321, + "step": 6038000 + }, + { + "epoch": 3.62, + "learning_rate": 3.069041707715993e-05, + "loss": 0.3295, + "step": 6038500 + }, + { + "epoch": 3.62, + "learning_rate": 3.068831711159937e-05, + "loss": 0.335, + "step": 6039000 + }, + { + "epoch": 3.62, + "learning_rate": 3.068622134596993e-05, + "loss": 0.3329, + "step": 6039500 + }, + { + "epoch": 3.62, + "learning_rate": 3.068412138040936e-05, + "loss": 0.3196, + "step": 6040000 + }, + { + "epoch": 3.62, + "learning_rate": 3.068202141484879e-05, + "loss": 0.3253, + "step": 6040500 + }, + { + "epoch": 3.62, + "learning_rate": 3.067992144928823e-05, + "loss": 0.3241, + "step": 6041000 + }, + { + "epoch": 3.62, + "learning_rate": 3.0677821483727665e-05, + "loss": 0.3221, + "step": 6041500 + }, + { + "epoch": 3.62, + "learning_rate": 3.06757215181671e-05, + "loss": 0.3232, + "step": 6042000 + }, + { + "epoch": 3.62, + "learning_rate": 3.067362155260654e-05, + "loss": 0.3283, + "step": 6042500 + }, + { + "epoch": 3.62, + "learning_rate": 3.067152158704597e-05, + "loss": 0.3275, + "step": 6043000 + }, + { + "epoch": 3.62, + "learning_rate": 3.0669425821416526e-05, + "loss": 0.3304, + "step": 6043500 + }, + { + "epoch": 3.62, + "learning_rate": 3.0667330055787086e-05, + "loss": 0.337, + "step": 6044000 + }, + { + "epoch": 3.62, + "learning_rate": 3.066523009022652e-05, + "loss": 0.3281, + "step": 6044500 + }, + { + "epoch": 3.62, + "learning_rate": 3.066313012466595e-05, + "loss": 0.3281, + "step": 6045000 + }, + { + "epoch": 3.62, + "learning_rate": 3.066103015910539e-05, + "loss": 0.3272, + "step": 6045500 + }, + { + "epoch": 3.62, + "learning_rate": 3.065893019354483e-05, + "loss": 0.3198, + "step": 6046000 + }, + { + "epoch": 3.63, + "learning_rate": 3.065683442791539e-05, + "loss": 0.327, + "step": 6046500 + }, + { + "epoch": 3.63, + "learning_rate": 3.0654734462354814e-05, + "loss": 0.3287, + "step": 6047000 + }, + { + "epoch": 3.63, + "learning_rate": 3.065263449679425e-05, + "loss": 0.326, + "step": 6047500 + }, + { + "epoch": 3.63, + "learning_rate": 3.065053453123369e-05, + "loss": 0.3311, + "step": 6048000 + }, + { + "epoch": 3.63, + "learning_rate": 3.064843456567312e-05, + "loss": 0.317, + "step": 6048500 + }, + { + "epoch": 3.63, + "learning_rate": 3.0646334600112555e-05, + "loss": 0.3347, + "step": 6049000 + }, + { + "epoch": 3.63, + "learning_rate": 3.0644234634551995e-05, + "loss": 0.3209, + "step": 6049500 + }, + { + "epoch": 3.63, + "learning_rate": 3.064213466899143e-05, + "loss": 0.3303, + "step": 6050000 + }, + { + "epoch": 3.63, + "learning_rate": 3.064003890336198e-05, + "loss": 0.3318, + "step": 6050500 + }, + { + "epoch": 3.63, + "learning_rate": 3.0637938937801415e-05, + "loss": 0.3261, + "step": 6051000 + }, + { + "epoch": 3.63, + "learning_rate": 3.0635838972240856e-05, + "loss": 0.3285, + "step": 6051500 + }, + { + "epoch": 3.63, + "learning_rate": 3.063374320661141e-05, + "loss": 0.3253, + "step": 6052000 + }, + { + "epoch": 3.63, + "learning_rate": 3.063164324105084e-05, + "loss": 0.3226, + "step": 6052500 + }, + { + "epoch": 3.63, + "learning_rate": 3.062954327549028e-05, + "loss": 0.3259, + "step": 6053000 + }, + { + "epoch": 3.63, + "learning_rate": 3.0627443309929716e-05, + "loss": 0.3272, + "step": 6053500 + }, + { + "epoch": 3.63, + "learning_rate": 3.062534334436915e-05, + "loss": 0.3249, + "step": 6054000 + }, + { + "epoch": 3.63, + "learning_rate": 3.062324337880859e-05, + "loss": 0.331, + "step": 6054500 + }, + { + "epoch": 3.63, + "learning_rate": 3.0621147613179144e-05, + "loss": 0.3283, + "step": 6055000 + }, + { + "epoch": 3.63, + "learning_rate": 3.061904764761858e-05, + "loss": 0.3282, + "step": 6055500 + }, + { + "epoch": 3.63, + "learning_rate": 3.061694768205801e-05, + "loss": 0.3319, + "step": 6056000 + }, + { + "epoch": 3.63, + "learning_rate": 3.061484771649745e-05, + "loss": 0.3291, + "step": 6056500 + }, + { + "epoch": 3.63, + "learning_rate": 3.0612747750936884e-05, + "loss": 0.3302, + "step": 6057000 + }, + { + "epoch": 3.63, + "learning_rate": 3.061065198530744e-05, + "loss": 0.3271, + "step": 6057500 + }, + { + "epoch": 3.63, + "learning_rate": 3.060855201974687e-05, + "loss": 0.3311, + "step": 6058000 + }, + { + "epoch": 3.63, + "learning_rate": 3.060645205418631e-05, + "loss": 0.3288, + "step": 6058500 + }, + { + "epoch": 3.63, + "learning_rate": 3.0604352088625745e-05, + "loss": 0.3252, + "step": 6059000 + }, + { + "epoch": 3.63, + "learning_rate": 3.060225212306518e-05, + "loss": 0.3297, + "step": 6059500 + }, + { + "epoch": 3.63, + "learning_rate": 3.060015635743574e-05, + "loss": 0.3287, + "step": 6060000 + }, + { + "epoch": 3.63, + "learning_rate": 3.059805639187517e-05, + "loss": 0.3327, + "step": 6060500 + }, + { + "epoch": 3.63, + "learning_rate": 3.0595956426314606e-05, + "loss": 0.3203, + "step": 6061000 + }, + { + "epoch": 3.63, + "learning_rate": 3.0593856460754046e-05, + "loss": 0.3293, + "step": 6061500 + }, + { + "epoch": 3.63, + "learning_rate": 3.05917606951246e-05, + "loss": 0.3218, + "step": 6062000 + }, + { + "epoch": 3.63, + "learning_rate": 3.058966072956403e-05, + "loss": 0.3206, + "step": 6062500 + }, + { + "epoch": 3.64, + "learning_rate": 3.0587560764003466e-05, + "loss": 0.3227, + "step": 6063000 + }, + { + "epoch": 3.64, + "learning_rate": 3.058546079844291e-05, + "loss": 0.3391, + "step": 6063500 + }, + { + "epoch": 3.64, + "learning_rate": 3.058336083288234e-05, + "loss": 0.3259, + "step": 6064000 + }, + { + "epoch": 3.64, + "learning_rate": 3.0581260867321774e-05, + "loss": 0.3248, + "step": 6064500 + }, + { + "epoch": 3.64, + "learning_rate": 3.0579160901761214e-05, + "loss": 0.3337, + "step": 6065000 + }, + { + "epoch": 3.64, + "learning_rate": 3.057706093620065e-05, + "loss": 0.3169, + "step": 6065500 + }, + { + "epoch": 3.64, + "learning_rate": 3.05749651705712e-05, + "loss": 0.3265, + "step": 6066000 + }, + { + "epoch": 3.64, + "learning_rate": 3.0572865205010634e-05, + "loss": 0.3234, + "step": 6066500 + }, + { + "epoch": 3.64, + "learning_rate": 3.0570765239450075e-05, + "loss": 0.3358, + "step": 6067000 + }, + { + "epoch": 3.64, + "learning_rate": 3.056866527388951e-05, + "loss": 0.3337, + "step": 6067500 + }, + { + "epoch": 3.64, + "learning_rate": 3.056656950826006e-05, + "loss": 0.3264, + "step": 6068000 + }, + { + "epoch": 3.64, + "learning_rate": 3.05644695426995e-05, + "loss": 0.3293, + "step": 6068500 + }, + { + "epoch": 3.64, + "learning_rate": 3.0562373777070055e-05, + "loss": 0.3295, + "step": 6069000 + }, + { + "epoch": 3.64, + "learning_rate": 3.056027381150949e-05, + "loss": 0.3271, + "step": 6069500 + }, + { + "epoch": 3.64, + "learning_rate": 3.055817384594892e-05, + "loss": 0.3204, + "step": 6070000 + }, + { + "epoch": 3.64, + "learning_rate": 3.055607388038836e-05, + "loss": 0.3318, + "step": 6070500 + }, + { + "epoch": 3.64, + "learning_rate": 3.0553973914827796e-05, + "loss": 0.3199, + "step": 6071000 + }, + { + "epoch": 3.64, + "learning_rate": 3.055187814919835e-05, + "loss": 0.3179, + "step": 6071500 + }, + { + "epoch": 3.64, + "learning_rate": 3.054977818363778e-05, + "loss": 0.3289, + "step": 6072000 + }, + { + "epoch": 3.64, + "learning_rate": 3.054767821807722e-05, + "loss": 0.322, + "step": 6072500 + }, + { + "epoch": 3.64, + "learning_rate": 3.054557825251666e-05, + "loss": 0.3224, + "step": 6073000 + }, + { + "epoch": 3.64, + "learning_rate": 3.054347828695609e-05, + "loss": 0.3215, + "step": 6073500 + }, + { + "epoch": 3.64, + "learning_rate": 3.054137832139553e-05, + "loss": 0.3268, + "step": 6074000 + }, + { + "epoch": 3.64, + "learning_rate": 3.0539282555766084e-05, + "loss": 0.3188, + "step": 6074500 + }, + { + "epoch": 3.64, + "learning_rate": 3.0537186790136644e-05, + "loss": 0.3295, + "step": 6075000 + }, + { + "epoch": 3.64, + "learning_rate": 3.053508682457608e-05, + "loss": 0.326, + "step": 6075500 + }, + { + "epoch": 3.64, + "learning_rate": 3.053298685901551e-05, + "loss": 0.3143, + "step": 6076000 + }, + { + "epoch": 3.64, + "learning_rate": 3.0530886893454945e-05, + "loss": 0.3254, + "step": 6076500 + }, + { + "epoch": 3.64, + "learning_rate": 3.052878692789438e-05, + "loss": 0.3287, + "step": 6077000 + }, + { + "epoch": 3.64, + "learning_rate": 3.052668696233382e-05, + "loss": 0.3288, + "step": 6077500 + }, + { + "epoch": 3.64, + "learning_rate": 3.052458699677325e-05, + "loss": 0.3254, + "step": 6078000 + }, + { + "epoch": 3.64, + "learning_rate": 3.0522487031212685e-05, + "loss": 0.3322, + "step": 6078500 + }, + { + "epoch": 3.64, + "learning_rate": 3.0520387065652126e-05, + "loss": 0.332, + "step": 6079000 + }, + { + "epoch": 3.64, + "learning_rate": 3.051829130002268e-05, + "loss": 0.3222, + "step": 6079500 + }, + { + "epoch": 3.65, + "learning_rate": 3.0516191334462113e-05, + "loss": 0.3175, + "step": 6080000 + }, + { + "epoch": 3.65, + "learning_rate": 3.051409136890155e-05, + "loss": 0.3299, + "step": 6080500 + }, + { + "epoch": 3.65, + "learning_rate": 3.0511991403340986e-05, + "loss": 0.3301, + "step": 6081000 + }, + { + "epoch": 3.65, + "learning_rate": 3.050989563771154e-05, + "loss": 0.3286, + "step": 6081500 + }, + { + "epoch": 3.65, + "learning_rate": 3.0507795672150977e-05, + "loss": 0.3325, + "step": 6082000 + }, + { + "epoch": 3.65, + "learning_rate": 3.050569570659041e-05, + "loss": 0.3308, + "step": 6082500 + }, + { + "epoch": 3.65, + "learning_rate": 3.0503595741029847e-05, + "loss": 0.3221, + "step": 6083000 + }, + { + "epoch": 3.65, + "learning_rate": 3.0501495775469284e-05, + "loss": 0.33, + "step": 6083500 + }, + { + "epoch": 3.65, + "learning_rate": 3.0499395809908717e-05, + "loss": 0.3231, + "step": 6084000 + }, + { + "epoch": 3.65, + "learning_rate": 3.049730004427927e-05, + "loss": 0.3335, + "step": 6084500 + }, + { + "epoch": 3.65, + "learning_rate": 3.0495200078718708e-05, + "loss": 0.3271, + "step": 6085000 + }, + { + "epoch": 3.65, + "learning_rate": 3.0493100113158145e-05, + "loss": 0.328, + "step": 6085500 + }, + { + "epoch": 3.65, + "learning_rate": 3.0491000147597578e-05, + "loss": 0.3248, + "step": 6086000 + }, + { + "epoch": 3.65, + "learning_rate": 3.0488900182037015e-05, + "loss": 0.3287, + "step": 6086500 + }, + { + "epoch": 3.65, + "learning_rate": 3.048680441640757e-05, + "loss": 0.3291, + "step": 6087000 + }, + { + "epoch": 3.65, + "learning_rate": 3.0484704450847005e-05, + "loss": 0.3347, + "step": 6087500 + }, + { + "epoch": 3.65, + "learning_rate": 3.0482604485286442e-05, + "loss": 0.3226, + "step": 6088000 + }, + { + "epoch": 3.65, + "learning_rate": 3.0480504519725876e-05, + "loss": 0.3258, + "step": 6088500 + }, + { + "epoch": 3.65, + "learning_rate": 3.0478404554165313e-05, + "loss": 0.3294, + "step": 6089000 + }, + { + "epoch": 3.65, + "learning_rate": 3.047630458860475e-05, + "loss": 0.3289, + "step": 6089500 + }, + { + "epoch": 3.65, + "learning_rate": 3.0474204623044183e-05, + "loss": 0.3281, + "step": 6090000 + }, + { + "epoch": 3.65, + "learning_rate": 3.047210885741474e-05, + "loss": 0.3207, + "step": 6090500 + }, + { + "epoch": 3.65, + "learning_rate": 3.0470008891854173e-05, + "loss": 0.324, + "step": 6091000 + }, + { + "epoch": 3.65, + "learning_rate": 3.046790892629361e-05, + "loss": 0.3241, + "step": 6091500 + }, + { + "epoch": 3.65, + "learning_rate": 3.0465808960733047e-05, + "loss": 0.3261, + "step": 6092000 + }, + { + "epoch": 3.65, + "learning_rate": 3.046370899517248e-05, + "loss": 0.3308, + "step": 6092500 + }, + { + "epoch": 3.65, + "learning_rate": 3.046160902961191e-05, + "loss": 0.3298, + "step": 6093000 + }, + { + "epoch": 3.65, + "learning_rate": 3.0459509064051347e-05, + "loss": 0.3313, + "step": 6093500 + }, + { + "epoch": 3.65, + "learning_rate": 3.045740909849078e-05, + "loss": 0.319, + "step": 6094000 + }, + { + "epoch": 3.65, + "learning_rate": 3.045531333286134e-05, + "loss": 0.3339, + "step": 6094500 + }, + { + "epoch": 3.65, + "learning_rate": 3.0453213367300778e-05, + "loss": 0.33, + "step": 6095000 + }, + { + "epoch": 3.65, + "learning_rate": 3.0451113401740208e-05, + "loss": 0.3239, + "step": 6095500 + }, + { + "epoch": 3.65, + "learning_rate": 3.044901763611077e-05, + "loss": 0.3253, + "step": 6096000 + }, + { + "epoch": 3.66, + "learning_rate": 3.0446917670550205e-05, + "loss": 0.3351, + "step": 6096500 + }, + { + "epoch": 3.66, + "learning_rate": 3.044481770498964e-05, + "loss": 0.3388, + "step": 6097000 + }, + { + "epoch": 3.66, + "learning_rate": 3.0442717739429076e-05, + "loss": 0.3287, + "step": 6097500 + }, + { + "epoch": 3.66, + "learning_rate": 3.0440617773868506e-05, + "loss": 0.3287, + "step": 6098000 + }, + { + "epoch": 3.66, + "learning_rate": 3.0438517808307942e-05, + "loss": 0.3192, + "step": 6098500 + }, + { + "epoch": 3.66, + "learning_rate": 3.0436422042678503e-05, + "loss": 0.3276, + "step": 6099000 + }, + { + "epoch": 3.66, + "learning_rate": 3.0434322077117936e-05, + "loss": 0.3281, + "step": 6099500 + }, + { + "epoch": 3.66, + "learning_rate": 3.0432222111557373e-05, + "loss": 0.3189, + "step": 6100000 + }, + { + "epoch": 3.66, + "eval_loss": 0.31894803047180176, + "eval_runtime": 1119.5797, + "eval_samples_per_second": 470.462, + "eval_steps_per_second": 78.411, + "step": 6100000 + }, + { + "epoch": 3.66, + "learning_rate": 3.0430122145996803e-05, + "loss": 0.3295, + "step": 6100500 + }, + { + "epoch": 3.66, + "learning_rate": 3.0428022180436237e-05, + "loss": 0.3204, + "step": 6101000 + }, + { + "epoch": 3.66, + "learning_rate": 3.0425926414806797e-05, + "loss": 0.323, + "step": 6101500 + }, + { + "epoch": 3.66, + "learning_rate": 3.0423826449246234e-05, + "loss": 0.3237, + "step": 6102000 + }, + { + "epoch": 3.66, + "learning_rate": 3.0421726483685664e-05, + "loss": 0.3268, + "step": 6102500 + }, + { + "epoch": 3.66, + "learning_rate": 3.04196265181251e-05, + "loss": 0.3271, + "step": 6103000 + }, + { + "epoch": 3.66, + "learning_rate": 3.0417526552564534e-05, + "loss": 0.3263, + "step": 6103500 + }, + { + "epoch": 3.66, + "learning_rate": 3.041542658700397e-05, + "loss": 0.3345, + "step": 6104000 + }, + { + "epoch": 3.66, + "learning_rate": 3.0413326621443408e-05, + "loss": 0.3214, + "step": 6104500 + }, + { + "epoch": 3.66, + "learning_rate": 3.041123085581396e-05, + "loss": 0.3262, + "step": 6105000 + }, + { + "epoch": 3.66, + "learning_rate": 3.04091308902534e-05, + "loss": 0.3189, + "step": 6105500 + }, + { + "epoch": 3.66, + "learning_rate": 3.0407030924692832e-05, + "loss": 0.3267, + "step": 6106000 + }, + { + "epoch": 3.66, + "learning_rate": 3.040493095913227e-05, + "loss": 0.326, + "step": 6106500 + }, + { + "epoch": 3.66, + "learning_rate": 3.0402830993571706e-05, + "loss": 0.3286, + "step": 6107000 + }, + { + "epoch": 3.66, + "learning_rate": 3.040073522794226e-05, + "loss": 0.3325, + "step": 6107500 + }, + { + "epoch": 3.66, + "learning_rate": 3.0398635262381693e-05, + "loss": 0.3269, + "step": 6108000 + }, + { + "epoch": 3.66, + "learning_rate": 3.039653529682113e-05, + "loss": 0.3308, + "step": 6108500 + }, + { + "epoch": 3.66, + "learning_rate": 3.0394435331260566e-05, + "loss": 0.3257, + "step": 6109000 + }, + { + "epoch": 3.66, + "learning_rate": 3.0392335365700003e-05, + "loss": 0.3247, + "step": 6109500 + }, + { + "epoch": 3.66, + "learning_rate": 3.0390239600070557e-05, + "loss": 0.3277, + "step": 6110000 + }, + { + "epoch": 3.66, + "learning_rate": 3.038813963450999e-05, + "loss": 0.3278, + "step": 6110500 + }, + { + "epoch": 3.66, + "learning_rate": 3.0386039668949427e-05, + "loss": 0.32, + "step": 6111000 + }, + { + "epoch": 3.66, + "learning_rate": 3.0383939703388864e-05, + "loss": 0.3309, + "step": 6111500 + }, + { + "epoch": 3.66, + "learning_rate": 3.0381839737828297e-05, + "loss": 0.323, + "step": 6112000 + }, + { + "epoch": 3.66, + "learning_rate": 3.0379739772267734e-05, + "loss": 0.3352, + "step": 6112500 + }, + { + "epoch": 3.66, + "learning_rate": 3.037763980670717e-05, + "loss": 0.3263, + "step": 6113000 + }, + { + "epoch": 3.67, + "learning_rate": 3.0375539841146604e-05, + "loss": 0.3295, + "step": 6113500 + }, + { + "epoch": 3.67, + "learning_rate": 3.037344407551716e-05, + "loss": 0.324, + "step": 6114000 + }, + { + "epoch": 3.67, + "learning_rate": 3.0371344109956595e-05, + "loss": 0.3373, + "step": 6114500 + }, + { + "epoch": 3.67, + "learning_rate": 3.036924414439603e-05, + "loss": 0.3244, + "step": 6115000 + }, + { + "epoch": 3.67, + "learning_rate": 3.036714417883547e-05, + "loss": 0.3263, + "step": 6115500 + }, + { + "epoch": 3.67, + "learning_rate": 3.0365048413206022e-05, + "loss": 0.3393, + "step": 6116000 + }, + { + "epoch": 3.67, + "learning_rate": 3.036294844764546e-05, + "loss": 0.3211, + "step": 6116500 + }, + { + "epoch": 3.67, + "learning_rate": 3.0360848482084892e-05, + "loss": 0.3277, + "step": 6117000 + }, + { + "epoch": 3.67, + "learning_rate": 3.035874851652433e-05, + "loss": 0.3288, + "step": 6117500 + }, + { + "epoch": 3.67, + "learning_rate": 3.0356648550963766e-05, + "loss": 0.3302, + "step": 6118000 + }, + { + "epoch": 3.67, + "learning_rate": 3.03545485854032e-05, + "loss": 0.3178, + "step": 6118500 + }, + { + "epoch": 3.67, + "learning_rate": 3.0352452819773753e-05, + "loss": 0.3312, + "step": 6119000 + }, + { + "epoch": 3.67, + "learning_rate": 3.035035285421319e-05, + "loss": 0.3305, + "step": 6119500 + }, + { + "epoch": 3.67, + "learning_rate": 3.0348252888652627e-05, + "loss": 0.3331, + "step": 6120000 + }, + { + "epoch": 3.67, + "learning_rate": 3.034615292309206e-05, + "loss": 0.3191, + "step": 6120500 + }, + { + "epoch": 3.67, + "learning_rate": 3.0344052957531497e-05, + "loss": 0.3279, + "step": 6121000 + }, + { + "epoch": 3.67, + "learning_rate": 3.0341952991970934e-05, + "loss": 0.3316, + "step": 6121500 + }, + { + "epoch": 3.67, + "learning_rate": 3.0339857226341488e-05, + "loss": 0.3325, + "step": 6122000 + }, + { + "epoch": 3.67, + "learning_rate": 3.0337757260780924e-05, + "loss": 0.3279, + "step": 6122500 + }, + { + "epoch": 3.67, + "learning_rate": 3.0335657295220358e-05, + "loss": 0.3212, + "step": 6123000 + }, + { + "epoch": 3.67, + "learning_rate": 3.0333557329659795e-05, + "loss": 0.3284, + "step": 6123500 + }, + { + "epoch": 3.67, + "learning_rate": 3.033145736409923e-05, + "loss": 0.3209, + "step": 6124000 + }, + { + "epoch": 3.67, + "learning_rate": 3.0329361598469785e-05, + "loss": 0.3332, + "step": 6124500 + }, + { + "epoch": 3.67, + "learning_rate": 3.0327261632909222e-05, + "loss": 0.3167, + "step": 6125000 + }, + { + "epoch": 3.67, + "learning_rate": 3.0325161667348655e-05, + "loss": 0.3195, + "step": 6125500 + }, + { + "epoch": 3.67, + "learning_rate": 3.0323061701788092e-05, + "loss": 0.3274, + "step": 6126000 + }, + { + "epoch": 3.67, + "learning_rate": 3.032096173622753e-05, + "loss": 0.3235, + "step": 6126500 + }, + { + "epoch": 3.67, + "learning_rate": 3.0318865970598083e-05, + "loss": 0.3265, + "step": 6127000 + }, + { + "epoch": 3.67, + "learning_rate": 3.0316766005037516e-05, + "loss": 0.3236, + "step": 6127500 + }, + { + "epoch": 3.67, + "learning_rate": 3.0314666039476953e-05, + "loss": 0.3246, + "step": 6128000 + }, + { + "epoch": 3.67, + "learning_rate": 3.031256607391639e-05, + "loss": 0.3243, + "step": 6128500 + }, + { + "epoch": 3.67, + "learning_rate": 3.0310466108355827e-05, + "loss": 0.3119, + "step": 6129000 + }, + { + "epoch": 3.67, + "learning_rate": 3.0308366142795253e-05, + "loss": 0.3294, + "step": 6129500 + }, + { + "epoch": 3.68, + "learning_rate": 3.0306270377165814e-05, + "loss": 0.333, + "step": 6130000 + }, + { + "epoch": 3.68, + "learning_rate": 3.030417041160525e-05, + "loss": 0.3232, + "step": 6130500 + }, + { + "epoch": 3.68, + "learning_rate": 3.0302070446044687e-05, + "loss": 0.3277, + "step": 6131000 + }, + { + "epoch": 3.68, + "learning_rate": 3.029997048048412e-05, + "loss": 0.3206, + "step": 6131500 + }, + { + "epoch": 3.68, + "learning_rate": 3.029787051492355e-05, + "loss": 0.3239, + "step": 6132000 + }, + { + "epoch": 3.68, + "learning_rate": 3.029577474929411e-05, + "loss": 0.3337, + "step": 6132500 + }, + { + "epoch": 3.68, + "learning_rate": 3.0293674783733548e-05, + "loss": 0.3279, + "step": 6133000 + }, + { + "epoch": 3.68, + "learning_rate": 3.0291574818172985e-05, + "loss": 0.3271, + "step": 6133500 + }, + { + "epoch": 3.68, + "learning_rate": 3.028947485261242e-05, + "loss": 0.3272, + "step": 6134000 + }, + { + "epoch": 3.68, + "learning_rate": 3.0287379086982975e-05, + "loss": 0.3195, + "step": 6134500 + }, + { + "epoch": 3.68, + "learning_rate": 3.028527912142241e-05, + "loss": 0.3189, + "step": 6135000 + }, + { + "epoch": 3.68, + "learning_rate": 3.0283179155861846e-05, + "loss": 0.3268, + "step": 6135500 + }, + { + "epoch": 3.68, + "learning_rate": 3.0281079190301283e-05, + "loss": 0.3235, + "step": 6136000 + }, + { + "epoch": 3.68, + "learning_rate": 3.0278979224740716e-05, + "loss": 0.3261, + "step": 6136500 + }, + { + "epoch": 3.68, + "learning_rate": 3.0276879259180146e-05, + "loss": 0.3223, + "step": 6137000 + }, + { + "epoch": 3.68, + "learning_rate": 3.0274783493550706e-05, + "loss": 0.3258, + "step": 6137500 + }, + { + "epoch": 3.68, + "learning_rate": 3.0272683527990143e-05, + "loss": 0.3312, + "step": 6138000 + }, + { + "epoch": 3.68, + "learning_rate": 3.0270583562429577e-05, + "loss": 0.3178, + "step": 6138500 + }, + { + "epoch": 3.68, + "learning_rate": 3.0268483596869007e-05, + "loss": 0.3185, + "step": 6139000 + }, + { + "epoch": 3.68, + "learning_rate": 3.0266387831239567e-05, + "loss": 0.3206, + "step": 6139500 + }, + { + "epoch": 3.68, + "learning_rate": 3.0264287865679004e-05, + "loss": 0.3182, + "step": 6140000 + }, + { + "epoch": 3.68, + "learning_rate": 3.026218790011844e-05, + "loss": 0.3273, + "step": 6140500 + }, + { + "epoch": 3.68, + "learning_rate": 3.0260087934557874e-05, + "loss": 0.3284, + "step": 6141000 + }, + { + "epoch": 3.68, + "learning_rate": 3.0257987968997304e-05, + "loss": 0.3234, + "step": 6141500 + }, + { + "epoch": 3.68, + "learning_rate": 3.0255892203367865e-05, + "loss": 0.3341, + "step": 6142000 + }, + { + "epoch": 3.68, + "learning_rate": 3.02537922378073e-05, + "loss": 0.3279, + "step": 6142500 + }, + { + "epoch": 3.68, + "learning_rate": 3.025169227224674e-05, + "loss": 0.3228, + "step": 6143000 + }, + { + "epoch": 3.68, + "learning_rate": 3.0249592306686172e-05, + "loss": 0.3214, + "step": 6143500 + }, + { + "epoch": 3.68, + "learning_rate": 3.0247492341125602e-05, + "loss": 0.3264, + "step": 6144000 + }, + { + "epoch": 3.68, + "learning_rate": 3.024539237556504e-05, + "loss": 0.3247, + "step": 6144500 + }, + { + "epoch": 3.68, + "learning_rate": 3.0243292410004472e-05, + "loss": 0.3229, + "step": 6145000 + }, + { + "epoch": 3.68, + "learning_rate": 3.024119244444391e-05, + "loss": 0.3263, + "step": 6145500 + }, + { + "epoch": 3.68, + "learning_rate": 3.023909667881447e-05, + "loss": 0.3271, + "step": 6146000 + }, + { + "epoch": 3.69, + "learning_rate": 3.02369967132539e-05, + "loss": 0.3208, + "step": 6146500 + }, + { + "epoch": 3.69, + "learning_rate": 3.0234896747693336e-05, + "loss": 0.3249, + "step": 6147000 + }, + { + "epoch": 3.69, + "learning_rate": 3.023279678213277e-05, + "loss": 0.3243, + "step": 6147500 + }, + { + "epoch": 3.69, + "learning_rate": 3.023070101650333e-05, + "loss": 0.3308, + "step": 6148000 + }, + { + "epoch": 3.69, + "learning_rate": 3.0228601050942767e-05, + "loss": 0.3253, + "step": 6148500 + }, + { + "epoch": 3.69, + "learning_rate": 3.0226501085382197e-05, + "loss": 0.315, + "step": 6149000 + }, + { + "epoch": 3.69, + "learning_rate": 3.0224401119821634e-05, + "loss": 0.3245, + "step": 6149500 + }, + { + "epoch": 3.69, + "learning_rate": 3.0222305354192194e-05, + "loss": 0.3259, + "step": 6150000 + }, + { + "epoch": 3.69, + "learning_rate": 3.0220205388631628e-05, + "loss": 0.3245, + "step": 6150500 + }, + { + "epoch": 3.69, + "learning_rate": 3.0218105423071058e-05, + "loss": 0.336, + "step": 6151000 + }, + { + "epoch": 3.69, + "learning_rate": 3.0216005457510495e-05, + "loss": 0.3236, + "step": 6151500 + }, + { + "epoch": 3.69, + "learning_rate": 3.0213909691881055e-05, + "loss": 0.333, + "step": 6152000 + }, + { + "epoch": 3.69, + "learning_rate": 3.021180972632049e-05, + "loss": 0.3356, + "step": 6152500 + }, + { + "epoch": 3.69, + "learning_rate": 3.0209709760759925e-05, + "loss": 0.3306, + "step": 6153000 + }, + { + "epoch": 3.69, + "learning_rate": 3.0207609795199355e-05, + "loss": 0.3233, + "step": 6153500 + }, + { + "epoch": 3.69, + "learning_rate": 3.0205509829638792e-05, + "loss": 0.3226, + "step": 6154000 + }, + { + "epoch": 3.69, + "learning_rate": 3.0203414064009353e-05, + "loss": 0.3358, + "step": 6154500 + }, + { + "epoch": 3.69, + "learning_rate": 3.0201314098448786e-05, + "loss": 0.3232, + "step": 6155000 + }, + { + "epoch": 3.69, + "learning_rate": 3.0199214132888223e-05, + "loss": 0.3303, + "step": 6155500 + }, + { + "epoch": 3.69, + "learning_rate": 3.0197114167327653e-05, + "loss": 0.3329, + "step": 6156000 + }, + { + "epoch": 3.69, + "learning_rate": 3.019501420176709e-05, + "loss": 0.3251, + "step": 6156500 + }, + { + "epoch": 3.69, + "learning_rate": 3.0192914236206523e-05, + "loss": 0.3262, + "step": 6157000 + }, + { + "epoch": 3.69, + "learning_rate": 3.019081427064596e-05, + "loss": 0.3311, + "step": 6157500 + }, + { + "epoch": 3.69, + "learning_rate": 3.0188714305085397e-05, + "loss": 0.3195, + "step": 6158000 + }, + { + "epoch": 3.69, + "learning_rate": 3.0186622739387074e-05, + "loss": 0.3423, + "step": 6158500 + }, + { + "epoch": 3.69, + "learning_rate": 3.018452277382651e-05, + "loss": 0.3177, + "step": 6159000 + }, + { + "epoch": 3.69, + "learning_rate": 3.0182422808265944e-05, + "loss": 0.3297, + "step": 6159500 + }, + { + "epoch": 3.69, + "learning_rate": 3.018032284270538e-05, + "loss": 0.3217, + "step": 6160000 + }, + { + "epoch": 3.69, + "learning_rate": 3.017822287714481e-05, + "loss": 0.3234, + "step": 6160500 + }, + { + "epoch": 3.69, + "learning_rate": 3.017612711151537e-05, + "loss": 0.3235, + "step": 6161000 + }, + { + "epoch": 3.69, + "learning_rate": 3.017402714595481e-05, + "loss": 0.3321, + "step": 6161500 + }, + { + "epoch": 3.69, + "learning_rate": 3.0171927180394242e-05, + "loss": 0.3312, + "step": 6162000 + }, + { + "epoch": 3.69, + "learning_rate": 3.016982721483368e-05, + "loss": 0.3281, + "step": 6162500 + }, + { + "epoch": 3.69, + "learning_rate": 3.0167731449204232e-05, + "loss": 0.3277, + "step": 6163000 + }, + { + "epoch": 3.7, + "learning_rate": 3.016563148364367e-05, + "loss": 0.3358, + "step": 6163500 + }, + { + "epoch": 3.7, + "learning_rate": 3.0163531518083106e-05, + "loss": 0.3277, + "step": 6164000 + }, + { + "epoch": 3.7, + "learning_rate": 3.016143155252254e-05, + "loss": 0.3238, + "step": 6164500 + }, + { + "epoch": 3.7, + "learning_rate": 3.0159331586961976e-05, + "loss": 0.3232, + "step": 6165000 + }, + { + "epoch": 3.7, + "learning_rate": 3.015723582133253e-05, + "loss": 0.3324, + "step": 6165500 + }, + { + "epoch": 3.7, + "learning_rate": 3.0155135855771967e-05, + "loss": 0.3264, + "step": 6166000 + }, + { + "epoch": 3.7, + "learning_rate": 3.01530358902114e-05, + "loss": 0.3306, + "step": 6166500 + }, + { + "epoch": 3.7, + "learning_rate": 3.0150935924650837e-05, + "loss": 0.3207, + "step": 6167000 + }, + { + "epoch": 3.7, + "learning_rate": 3.0148835959090274e-05, + "loss": 0.3362, + "step": 6167500 + }, + { + "epoch": 3.7, + "learning_rate": 3.0146735993529704e-05, + "loss": 0.3311, + "step": 6168000 + }, + { + "epoch": 3.7, + "learning_rate": 3.0144640227900264e-05, + "loss": 0.3359, + "step": 6168500 + }, + { + "epoch": 3.7, + "learning_rate": 3.0142540262339698e-05, + "loss": 0.3187, + "step": 6169000 + }, + { + "epoch": 3.7, + "learning_rate": 3.0140440296779135e-05, + "loss": 0.3275, + "step": 6169500 + }, + { + "epoch": 3.7, + "learning_rate": 3.0138340331218565e-05, + "loss": 0.328, + "step": 6170000 + }, + { + "epoch": 3.7, + "learning_rate": 3.0136240365658e-05, + "loss": 0.3251, + "step": 6170500 + }, + { + "epoch": 3.7, + "learning_rate": 3.0134140400097435e-05, + "loss": 0.3203, + "step": 6171000 + }, + { + "epoch": 3.7, + "learning_rate": 3.0132040434536872e-05, + "loss": 0.3386, + "step": 6171500 + }, + { + "epoch": 3.7, + "learning_rate": 3.012994046897631e-05, + "loss": 0.3325, + "step": 6172000 + }, + { + "epoch": 3.7, + "learning_rate": 3.0127844703346862e-05, + "loss": 0.3293, + "step": 6172500 + }, + { + "epoch": 3.7, + "learning_rate": 3.0125748937717423e-05, + "loss": 0.3359, + "step": 6173000 + }, + { + "epoch": 3.7, + "learning_rate": 3.012364897215686e-05, + "loss": 0.3288, + "step": 6173500 + }, + { + "epoch": 3.7, + "learning_rate": 3.0121549006596293e-05, + "loss": 0.3227, + "step": 6174000 + }, + { + "epoch": 3.7, + "learning_rate": 3.011944904103573e-05, + "loss": 0.3282, + "step": 6174500 + }, + { + "epoch": 3.7, + "learning_rate": 3.011734907547516e-05, + "loss": 0.333, + "step": 6175000 + }, + { + "epoch": 3.7, + "learning_rate": 3.011525330984572e-05, + "loss": 0.3331, + "step": 6175500 + }, + { + "epoch": 3.7, + "learning_rate": 3.0113153344285154e-05, + "loss": 0.3194, + "step": 6176000 + }, + { + "epoch": 3.7, + "learning_rate": 3.011105337872459e-05, + "loss": 0.3342, + "step": 6176500 + }, + { + "epoch": 3.7, + "learning_rate": 3.0108953413164027e-05, + "loss": 0.3196, + "step": 6177000 + }, + { + "epoch": 3.7, + "learning_rate": 3.0106853447603458e-05, + "loss": 0.3274, + "step": 6177500 + }, + { + "epoch": 3.7, + "learning_rate": 3.0104757681974018e-05, + "loss": 0.324, + "step": 6178000 + }, + { + "epoch": 3.7, + "learning_rate": 3.010265771641345e-05, + "loss": 0.3343, + "step": 6178500 + }, + { + "epoch": 3.7, + "learning_rate": 3.0100557750852888e-05, + "loss": 0.3239, + "step": 6179000 + }, + { + "epoch": 3.7, + "learning_rate": 3.0098457785292325e-05, + "loss": 0.3357, + "step": 6179500 + }, + { + "epoch": 3.71, + "learning_rate": 3.0096357819731752e-05, + "loss": 0.3267, + "step": 6180000 + }, + { + "epoch": 3.71, + "learning_rate": 3.0094262054102315e-05, + "loss": 0.3322, + "step": 6180500 + }, + { + "epoch": 3.71, + "learning_rate": 3.009216208854175e-05, + "loss": 0.3261, + "step": 6181000 + }, + { + "epoch": 3.71, + "learning_rate": 3.0090062122981186e-05, + "loss": 0.3211, + "step": 6181500 + }, + { + "epoch": 3.71, + "learning_rate": 3.0087962157420616e-05, + "loss": 0.3249, + "step": 6182000 + }, + { + "epoch": 3.71, + "learning_rate": 3.008586219186005e-05, + "loss": 0.3341, + "step": 6182500 + }, + { + "epoch": 3.71, + "learning_rate": 3.0083762226299486e-05, + "loss": 0.3302, + "step": 6183000 + }, + { + "epoch": 3.71, + "learning_rate": 3.0081662260738923e-05, + "loss": 0.3289, + "step": 6183500 + }, + { + "epoch": 3.71, + "learning_rate": 3.0079562295178356e-05, + "loss": 0.3265, + "step": 6184000 + }, + { + "epoch": 3.71, + "learning_rate": 3.0077466529548913e-05, + "loss": 0.3358, + "step": 6184500 + }, + { + "epoch": 3.71, + "learning_rate": 3.0075366563988347e-05, + "loss": 0.329, + "step": 6185000 + }, + { + "epoch": 3.71, + "learning_rate": 3.0073266598427784e-05, + "loss": 0.3254, + "step": 6185500 + }, + { + "epoch": 3.71, + "learning_rate": 3.007116663286722e-05, + "loss": 0.3325, + "step": 6186000 + }, + { + "epoch": 3.71, + "learning_rate": 3.0069066667306654e-05, + "loss": 0.3187, + "step": 6186500 + }, + { + "epoch": 3.71, + "learning_rate": 3.006697090167721e-05, + "loss": 0.3233, + "step": 6187000 + }, + { + "epoch": 3.71, + "learning_rate": 3.0064870936116644e-05, + "loss": 0.3184, + "step": 6187500 + }, + { + "epoch": 3.71, + "learning_rate": 3.006277097055608e-05, + "loss": 0.3293, + "step": 6188000 + }, + { + "epoch": 3.71, + "learning_rate": 3.0060671004995518e-05, + "loss": 0.3313, + "step": 6188500 + }, + { + "epoch": 3.71, + "learning_rate": 3.005857103943495e-05, + "loss": 0.3237, + "step": 6189000 + }, + { + "epoch": 3.71, + "learning_rate": 3.005647107387439e-05, + "loss": 0.3256, + "step": 6189500 + }, + { + "epoch": 3.71, + "learning_rate": 3.0054375308244942e-05, + "loss": 0.3276, + "step": 6190000 + }, + { + "epoch": 3.71, + "learning_rate": 3.005227534268438e-05, + "loss": 0.3191, + "step": 6190500 + }, + { + "epoch": 3.71, + "learning_rate": 3.0050175377123812e-05, + "loss": 0.3209, + "step": 6191000 + }, + { + "epoch": 3.71, + "learning_rate": 3.004807541156325e-05, + "loss": 0.3259, + "step": 6191500 + }, + { + "epoch": 3.71, + "learning_rate": 3.0045975446002686e-05, + "loss": 0.3192, + "step": 6192000 + }, + { + "epoch": 3.71, + "learning_rate": 3.004387968037324e-05, + "loss": 0.331, + "step": 6192500 + }, + { + "epoch": 3.71, + "learning_rate": 3.0041779714812676e-05, + "loss": 0.3223, + "step": 6193000 + }, + { + "epoch": 3.71, + "learning_rate": 3.003967974925211e-05, + "loss": 0.322, + "step": 6193500 + }, + { + "epoch": 3.71, + "learning_rate": 3.0037579783691547e-05, + "loss": 0.3299, + "step": 6194000 + }, + { + "epoch": 3.71, + "learning_rate": 3.00354840180621e-05, + "loss": 0.3237, + "step": 6194500 + }, + { + "epoch": 3.71, + "learning_rate": 3.0033384052501537e-05, + "loss": 0.3243, + "step": 6195000 + }, + { + "epoch": 3.71, + "learning_rate": 3.0031284086940974e-05, + "loss": 0.3287, + "step": 6195500 + }, + { + "epoch": 3.71, + "learning_rate": 3.0029184121380407e-05, + "loss": 0.3392, + "step": 6196000 + }, + { + "epoch": 3.72, + "learning_rate": 3.0027084155819844e-05, + "loss": 0.3283, + "step": 6196500 + }, + { + "epoch": 3.72, + "learning_rate": 3.002498419025928e-05, + "loss": 0.3258, + "step": 6197000 + }, + { + "epoch": 3.72, + "learning_rate": 3.0022884224698715e-05, + "loss": 0.3168, + "step": 6197500 + }, + { + "epoch": 3.72, + "learning_rate": 3.002078425913815e-05, + "loss": 0.3325, + "step": 6198000 + }, + { + "epoch": 3.72, + "learning_rate": 3.001869269343983e-05, + "loss": 0.3295, + "step": 6198500 + }, + { + "epoch": 3.72, + "learning_rate": 3.001659272787926e-05, + "loss": 0.3224, + "step": 6199000 + }, + { + "epoch": 3.72, + "learning_rate": 3.0014492762318695e-05, + "loss": 0.3311, + "step": 6199500 + }, + { + "epoch": 3.72, + "learning_rate": 3.0012392796758132e-05, + "loss": 0.3276, + "step": 6200000 + }, + { + "epoch": 3.72, + "eval_loss": 0.31798064708709717, + "eval_runtime": 1116.1155, + "eval_samples_per_second": 471.922, + "eval_steps_per_second": 78.654, + "step": 6200000 + }, + { + "epoch": 3.72, + "learning_rate": 3.0010292831197566e-05, + "loss": 0.3232, + "step": 6200500 + }, + { + "epoch": 3.72, + "learning_rate": 3.0008192865637003e-05, + "loss": 0.3197, + "step": 6201000 + }, + { + "epoch": 3.72, + "learning_rate": 3.000609290007644e-05, + "loss": 0.3228, + "step": 6201500 + }, + { + "epoch": 3.72, + "learning_rate": 3.0003992934515873e-05, + "loss": 0.3239, + "step": 6202000 + }, + { + "epoch": 3.72, + "learning_rate": 3.000189716888643e-05, + "loss": 0.3245, + "step": 6202500 + }, + { + "epoch": 3.72, + "learning_rate": 2.999980140325699e-05, + "loss": 0.3373, + "step": 6203000 + }, + { + "epoch": 3.72, + "learning_rate": 2.9997701437696417e-05, + "loss": 0.3265, + "step": 6203500 + }, + { + "epoch": 3.72, + "learning_rate": 2.9995601472135854e-05, + "loss": 0.3222, + "step": 6204000 + }, + { + "epoch": 3.72, + "learning_rate": 2.999350150657529e-05, + "loss": 0.3209, + "step": 6204500 + }, + { + "epoch": 3.72, + "learning_rate": 2.9991401541014724e-05, + "loss": 0.3308, + "step": 6205000 + }, + { + "epoch": 3.72, + "learning_rate": 2.998930157545416e-05, + "loss": 0.3285, + "step": 6205500 + }, + { + "epoch": 3.72, + "learning_rate": 2.9987201609893598e-05, + "loss": 0.3234, + "step": 6206000 + }, + { + "epoch": 3.72, + "learning_rate": 2.9985101644333035e-05, + "loss": 0.3241, + "step": 6206500 + }, + { + "epoch": 3.72, + "learning_rate": 2.9983001678772468e-05, + "loss": 0.3282, + "step": 6207000 + }, + { + "epoch": 3.72, + "learning_rate": 2.9980901713211905e-05, + "loss": 0.3279, + "step": 6207500 + }, + { + "epoch": 3.72, + "learning_rate": 2.997880594758246e-05, + "loss": 0.3153, + "step": 6208000 + }, + { + "epoch": 3.72, + "learning_rate": 2.9976705982021895e-05, + "loss": 0.3321, + "step": 6208500 + }, + { + "epoch": 3.72, + "learning_rate": 2.997460601646133e-05, + "loss": 0.3258, + "step": 6209000 + }, + { + "epoch": 3.72, + "learning_rate": 2.9972506050900766e-05, + "loss": 0.3216, + "step": 6209500 + }, + { + "epoch": 3.72, + "learning_rate": 2.9970406085340202e-05, + "loss": 0.3305, + "step": 6210000 + }, + { + "epoch": 3.72, + "learning_rate": 2.9968306119779636e-05, + "loss": 0.3266, + "step": 6210500 + }, + { + "epoch": 3.72, + "learning_rate": 2.9966206154219073e-05, + "loss": 0.3292, + "step": 6211000 + }, + { + "epoch": 3.72, + "learning_rate": 2.9964106188658503e-05, + "loss": 0.3252, + "step": 6211500 + }, + { + "epoch": 3.72, + "learning_rate": 2.996201462296018e-05, + "loss": 0.3228, + "step": 6212000 + }, + { + "epoch": 3.72, + "learning_rate": 2.9959914657399617e-05, + "loss": 0.3154, + "step": 6212500 + }, + { + "epoch": 3.72, + "learning_rate": 2.9957814691839054e-05, + "loss": 0.328, + "step": 6213000 + }, + { + "epoch": 3.73, + "learning_rate": 2.995571472627849e-05, + "loss": 0.3342, + "step": 6213500 + }, + { + "epoch": 3.73, + "learning_rate": 2.9953614760717924e-05, + "loss": 0.3269, + "step": 6214000 + }, + { + "epoch": 3.73, + "learning_rate": 2.9951518995088478e-05, + "loss": 0.3225, + "step": 6214500 + }, + { + "epoch": 3.73, + "learning_rate": 2.9949419029527914e-05, + "loss": 0.3154, + "step": 6215000 + }, + { + "epoch": 3.73, + "learning_rate": 2.994731906396735e-05, + "loss": 0.3262, + "step": 6215500 + }, + { + "epoch": 3.73, + "learning_rate": 2.9945219098406785e-05, + "loss": 0.3252, + "step": 6216000 + }, + { + "epoch": 3.73, + "learning_rate": 2.994311913284622e-05, + "loss": 0.3209, + "step": 6216500 + }, + { + "epoch": 3.73, + "learning_rate": 2.9941023367216775e-05, + "loss": 0.346, + "step": 6217000 + }, + { + "epoch": 3.73, + "learning_rate": 2.9938923401656212e-05, + "loss": 0.3388, + "step": 6217500 + }, + { + "epoch": 3.73, + "learning_rate": 2.993682343609565e-05, + "loss": 0.3217, + "step": 6218000 + }, + { + "epoch": 3.73, + "learning_rate": 2.9934723470535082e-05, + "loss": 0.33, + "step": 6218500 + }, + { + "epoch": 3.73, + "learning_rate": 2.993262350497452e-05, + "loss": 0.3271, + "step": 6219000 + }, + { + "epoch": 3.73, + "learning_rate": 2.9930523539413956e-05, + "loss": 0.3215, + "step": 6219500 + }, + { + "epoch": 3.73, + "learning_rate": 2.992842357385339e-05, + "loss": 0.3245, + "step": 6220000 + }, + { + "epoch": 3.73, + "learning_rate": 2.9926327808223946e-05, + "loss": 0.3286, + "step": 6220500 + }, + { + "epoch": 3.73, + "learning_rate": 2.992422784266338e-05, + "loss": 0.3194, + "step": 6221000 + }, + { + "epoch": 3.73, + "learning_rate": 2.9922127877102817e-05, + "loss": 0.3333, + "step": 6221500 + }, + { + "epoch": 3.73, + "learning_rate": 2.9920027911542254e-05, + "loss": 0.3231, + "step": 6222000 + }, + { + "epoch": 3.73, + "learning_rate": 2.9917932145912807e-05, + "loss": 0.3243, + "step": 6222500 + }, + { + "epoch": 3.73, + "learning_rate": 2.991583218035224e-05, + "loss": 0.3266, + "step": 6223000 + }, + { + "epoch": 3.73, + "learning_rate": 2.9913732214791677e-05, + "loss": 0.3197, + "step": 6223500 + }, + { + "epoch": 3.73, + "learning_rate": 2.9911632249231114e-05, + "loss": 0.3271, + "step": 6224000 + }, + { + "epoch": 3.73, + "learning_rate": 2.9909532283670548e-05, + "loss": 0.3199, + "step": 6224500 + }, + { + "epoch": 3.73, + "learning_rate": 2.9907432318109985e-05, + "loss": 0.3229, + "step": 6225000 + }, + { + "epoch": 3.73, + "learning_rate": 2.990533235254942e-05, + "loss": 0.3199, + "step": 6225500 + }, + { + "epoch": 3.73, + "learning_rate": 2.990323238698885e-05, + "loss": 0.3302, + "step": 6226000 + }, + { + "epoch": 3.73, + "learning_rate": 2.9901136621359412e-05, + "loss": 0.3266, + "step": 6226500 + }, + { + "epoch": 3.73, + "learning_rate": 2.9899036655798845e-05, + "loss": 0.3325, + "step": 6227000 + }, + { + "epoch": 3.73, + "learning_rate": 2.9896936690238282e-05, + "loss": 0.3308, + "step": 6227500 + }, + { + "epoch": 3.73, + "learning_rate": 2.9894836724677712e-05, + "loss": 0.3123, + "step": 6228000 + }, + { + "epoch": 3.73, + "learning_rate": 2.989273675911715e-05, + "loss": 0.3275, + "step": 6228500 + }, + { + "epoch": 3.73, + "learning_rate": 2.989064099348771e-05, + "loss": 0.3206, + "step": 6229000 + }, + { + "epoch": 3.73, + "learning_rate": 2.9888541027927143e-05, + "loss": 0.3293, + "step": 6229500 + }, + { + "epoch": 3.74, + "learning_rate": 2.988644106236658e-05, + "loss": 0.3289, + "step": 6230000 + }, + { + "epoch": 3.74, + "learning_rate": 2.988434109680601e-05, + "loss": 0.3294, + "step": 6230500 + }, + { + "epoch": 3.74, + "learning_rate": 2.988224533117657e-05, + "loss": 0.3309, + "step": 6231000 + }, + { + "epoch": 3.74, + "learning_rate": 2.9880145365616004e-05, + "loss": 0.3216, + "step": 6231500 + }, + { + "epoch": 3.74, + "learning_rate": 2.987804540005544e-05, + "loss": 0.329, + "step": 6232000 + }, + { + "epoch": 3.74, + "learning_rate": 2.9875945434494877e-05, + "loss": 0.3145, + "step": 6232500 + }, + { + "epoch": 3.74, + "learning_rate": 2.9873845468934307e-05, + "loss": 0.3305, + "step": 6233000 + }, + { + "epoch": 3.74, + "learning_rate": 2.987174550337374e-05, + "loss": 0.3263, + "step": 6233500 + }, + { + "epoch": 3.74, + "learning_rate": 2.9869645537813178e-05, + "loss": 0.3309, + "step": 6234000 + }, + { + "epoch": 3.74, + "learning_rate": 2.9867549772183738e-05, + "loss": 0.3289, + "step": 6234500 + }, + { + "epoch": 3.74, + "learning_rate": 2.9865449806623175e-05, + "loss": 0.3267, + "step": 6235000 + }, + { + "epoch": 3.74, + "learning_rate": 2.9863349841062605e-05, + "loss": 0.3256, + "step": 6235500 + }, + { + "epoch": 3.74, + "learning_rate": 2.986124987550204e-05, + "loss": 0.3274, + "step": 6236000 + }, + { + "epoch": 3.74, + "learning_rate": 2.9859149909941475e-05, + "loss": 0.3253, + "step": 6236500 + }, + { + "epoch": 3.74, + "learning_rate": 2.9857049944380912e-05, + "loss": 0.3274, + "step": 6237000 + }, + { + "epoch": 3.74, + "learning_rate": 2.9854954178751466e-05, + "loss": 0.3321, + "step": 6237500 + }, + { + "epoch": 3.74, + "learning_rate": 2.98528542131909e-05, + "loss": 0.3222, + "step": 6238000 + }, + { + "epoch": 3.74, + "learning_rate": 2.9850754247630336e-05, + "loss": 0.3221, + "step": 6238500 + }, + { + "epoch": 3.74, + "learning_rate": 2.9848654282069773e-05, + "loss": 0.3269, + "step": 6239000 + }, + { + "epoch": 3.74, + "learning_rate": 2.984655431650921e-05, + "loss": 0.3249, + "step": 6239500 + }, + { + "epoch": 3.74, + "learning_rate": 2.9844458550879763e-05, + "loss": 0.3266, + "step": 6240000 + }, + { + "epoch": 3.74, + "learning_rate": 2.9842358585319197e-05, + "loss": 0.3249, + "step": 6240500 + }, + { + "epoch": 3.74, + "learning_rate": 2.9840258619758634e-05, + "loss": 0.3301, + "step": 6241000 + }, + { + "epoch": 3.74, + "learning_rate": 2.983815865419807e-05, + "loss": 0.3313, + "step": 6241500 + }, + { + "epoch": 3.74, + "learning_rate": 2.9836058688637504e-05, + "loss": 0.3183, + "step": 6242000 + }, + { + "epoch": 3.74, + "learning_rate": 2.983396292300806e-05, + "loss": 0.318, + "step": 6242500 + }, + { + "epoch": 3.74, + "learning_rate": 2.9831862957447494e-05, + "loss": 0.3234, + "step": 6243000 + }, + { + "epoch": 3.74, + "learning_rate": 2.982976299188693e-05, + "loss": 0.3309, + "step": 6243500 + }, + { + "epoch": 3.74, + "learning_rate": 2.9827663026326368e-05, + "loss": 0.323, + "step": 6244000 + }, + { + "epoch": 3.74, + "learning_rate": 2.98255630607658e-05, + "loss": 0.3311, + "step": 6244500 + }, + { + "epoch": 3.74, + "learning_rate": 2.9823467295136355e-05, + "loss": 0.33, + "step": 6245000 + }, + { + "epoch": 3.74, + "learning_rate": 2.9821367329575792e-05, + "loss": 0.3282, + "step": 6245500 + }, + { + "epoch": 3.74, + "learning_rate": 2.981926736401523e-05, + "loss": 0.3182, + "step": 6246000 + }, + { + "epoch": 3.75, + "learning_rate": 2.9817167398454666e-05, + "loss": 0.3193, + "step": 6246500 + }, + { + "epoch": 3.75, + "learning_rate": 2.98150674328941e-05, + "loss": 0.3323, + "step": 6247000 + }, + { + "epoch": 3.75, + "learning_rate": 2.9812967467333536e-05, + "loss": 0.3247, + "step": 6247500 + }, + { + "epoch": 3.75, + "learning_rate": 2.981087170170409e-05, + "loss": 0.3265, + "step": 6248000 + }, + { + "epoch": 3.75, + "learning_rate": 2.9808771736143526e-05, + "loss": 0.3287, + "step": 6248500 + }, + { + "epoch": 3.75, + "learning_rate": 2.980667177058296e-05, + "loss": 0.3187, + "step": 6249000 + }, + { + "epoch": 3.75, + "learning_rate": 2.9804571805022397e-05, + "loss": 0.3248, + "step": 6249500 + }, + { + "epoch": 3.75, + "learning_rate": 2.9802471839461833e-05, + "loss": 0.3248, + "step": 6250000 + }, + { + "epoch": 3.75, + "learning_rate": 2.9800371873901267e-05, + "loss": 0.3191, + "step": 6250500 + }, + { + "epoch": 3.75, + "learning_rate": 2.9798271908340704e-05, + "loss": 0.3139, + "step": 6251000 + }, + { + "epoch": 3.75, + "learning_rate": 2.979617194278014e-05, + "loss": 0.3262, + "step": 6251500 + }, + { + "epoch": 3.75, + "learning_rate": 2.9794076177150694e-05, + "loss": 0.3269, + "step": 6252000 + }, + { + "epoch": 3.75, + "learning_rate": 2.979197621159013e-05, + "loss": 0.3218, + "step": 6252500 + }, + { + "epoch": 3.75, + "learning_rate": 2.9789880445960685e-05, + "loss": 0.3314, + "step": 6253000 + }, + { + "epoch": 3.75, + "learning_rate": 2.978778048040012e-05, + "loss": 0.3315, + "step": 6253500 + }, + { + "epoch": 3.75, + "learning_rate": 2.9785680514839555e-05, + "loss": 0.3288, + "step": 6254000 + }, + { + "epoch": 3.75, + "learning_rate": 2.9783580549278992e-05, + "loss": 0.3268, + "step": 6254500 + }, + { + "epoch": 3.75, + "learning_rate": 2.9781484783649545e-05, + "loss": 0.3312, + "step": 6255000 + }, + { + "epoch": 3.75, + "learning_rate": 2.9779384818088982e-05, + "loss": 0.3318, + "step": 6255500 + }, + { + "epoch": 3.75, + "learning_rate": 2.9777284852528416e-05, + "loss": 0.3176, + "step": 6256000 + }, + { + "epoch": 3.75, + "learning_rate": 2.9775184886967852e-05, + "loss": 0.3254, + "step": 6256500 + }, + { + "epoch": 3.75, + "learning_rate": 2.977308492140729e-05, + "loss": 0.3211, + "step": 6257000 + }, + { + "epoch": 3.75, + "learning_rate": 2.9770984955846723e-05, + "loss": 0.3241, + "step": 6257500 + }, + { + "epoch": 3.75, + "learning_rate": 2.976888499028616e-05, + "loss": 0.3288, + "step": 6258000 + }, + { + "epoch": 3.75, + "learning_rate": 2.9766785024725596e-05, + "loss": 0.3244, + "step": 6258500 + }, + { + "epoch": 3.75, + "learning_rate": 2.976468925909615e-05, + "loss": 0.3311, + "step": 6259000 + }, + { + "epoch": 3.75, + "learning_rate": 2.9762589293535587e-05, + "loss": 0.3279, + "step": 6259500 + }, + { + "epoch": 3.75, + "learning_rate": 2.976048932797502e-05, + "loss": 0.3286, + "step": 6260000 + }, + { + "epoch": 3.75, + "learning_rate": 2.9758393562345577e-05, + "loss": 0.3348, + "step": 6260500 + }, + { + "epoch": 3.75, + "learning_rate": 2.975629359678501e-05, + "loss": 0.3267, + "step": 6261000 + }, + { + "epoch": 3.75, + "learning_rate": 2.9754193631224448e-05, + "loss": 0.3225, + "step": 6261500 + }, + { + "epoch": 3.75, + "learning_rate": 2.9752093665663884e-05, + "loss": 0.3184, + "step": 6262000 + }, + { + "epoch": 3.75, + "learning_rate": 2.9749993700103318e-05, + "loss": 0.3341, + "step": 6262500 + }, + { + "epoch": 3.75, + "learning_rate": 2.9747893734542755e-05, + "loss": 0.3242, + "step": 6263000 + }, + { + "epoch": 3.76, + "learning_rate": 2.974579376898219e-05, + "loss": 0.3245, + "step": 6263500 + }, + { + "epoch": 3.76, + "learning_rate": 2.9743693803421625e-05, + "loss": 0.3211, + "step": 6264000 + }, + { + "epoch": 3.76, + "learning_rate": 2.974159803779218e-05, + "loss": 0.3234, + "step": 6264500 + }, + { + "epoch": 3.76, + "learning_rate": 2.9739498072231615e-05, + "loss": 0.3198, + "step": 6265000 + }, + { + "epoch": 3.76, + "learning_rate": 2.9737398106671052e-05, + "loss": 0.3247, + "step": 6265500 + }, + { + "epoch": 3.76, + "learning_rate": 2.973529814111049e-05, + "loss": 0.3204, + "step": 6266000 + }, + { + "epoch": 3.76, + "learning_rate": 2.9733198175549923e-05, + "loss": 0.3263, + "step": 6266500 + }, + { + "epoch": 3.76, + "learning_rate": 2.9731098209989353e-05, + "loss": 0.3251, + "step": 6267000 + }, + { + "epoch": 3.76, + "learning_rate": 2.9729002444359913e-05, + "loss": 0.3246, + "step": 6267500 + }, + { + "epoch": 3.76, + "learning_rate": 2.972690247879935e-05, + "loss": 0.3192, + "step": 6268000 + }, + { + "epoch": 3.76, + "learning_rate": 2.9724802513238783e-05, + "loss": 0.327, + "step": 6268500 + }, + { + "epoch": 3.76, + "learning_rate": 2.972270254767822e-05, + "loss": 0.3262, + "step": 6269000 + }, + { + "epoch": 3.76, + "learning_rate": 2.972060258211765e-05, + "loss": 0.3186, + "step": 6269500 + }, + { + "epoch": 3.76, + "learning_rate": 2.971850681648821e-05, + "loss": 0.3351, + "step": 6270000 + }, + { + "epoch": 3.76, + "learning_rate": 2.9716406850927647e-05, + "loss": 0.3245, + "step": 6270500 + }, + { + "epoch": 3.76, + "learning_rate": 2.971430688536708e-05, + "loss": 0.3217, + "step": 6271000 + }, + { + "epoch": 3.76, + "learning_rate": 2.9712206919806518e-05, + "loss": 0.316, + "step": 6271500 + }, + { + "epoch": 3.76, + "learning_rate": 2.9710106954245948e-05, + "loss": 0.3226, + "step": 6272000 + }, + { + "epoch": 3.76, + "learning_rate": 2.9708006988685385e-05, + "loss": 0.3311, + "step": 6272500 + }, + { + "epoch": 3.76, + "learning_rate": 2.9705907023124818e-05, + "loss": 0.318, + "step": 6273000 + }, + { + "epoch": 3.76, + "learning_rate": 2.970381125749538e-05, + "loss": 0.3255, + "step": 6273500 + }, + { + "epoch": 3.76, + "learning_rate": 2.970171129193481e-05, + "loss": 0.3243, + "step": 6274000 + }, + { + "epoch": 3.76, + "learning_rate": 2.9699611326374245e-05, + "loss": 0.3363, + "step": 6274500 + }, + { + "epoch": 3.76, + "learning_rate": 2.969751136081368e-05, + "loss": 0.3284, + "step": 6275000 + }, + { + "epoch": 3.76, + "learning_rate": 2.9695411395253116e-05, + "loss": 0.3297, + "step": 6275500 + }, + { + "epoch": 3.76, + "learning_rate": 2.9693315629623676e-05, + "loss": 0.3274, + "step": 6276000 + }, + { + "epoch": 3.76, + "learning_rate": 2.9691215664063106e-05, + "loss": 0.3231, + "step": 6276500 + }, + { + "epoch": 3.76, + "learning_rate": 2.9689115698502543e-05, + "loss": 0.326, + "step": 6277000 + }, + { + "epoch": 3.76, + "learning_rate": 2.9687015732941976e-05, + "loss": 0.3304, + "step": 6277500 + }, + { + "epoch": 3.76, + "learning_rate": 2.9684915767381413e-05, + "loss": 0.3335, + "step": 6278000 + }, + { + "epoch": 3.76, + "learning_rate": 2.968281580182085e-05, + "loss": 0.3292, + "step": 6278500 + }, + { + "epoch": 3.76, + "learning_rate": 2.9680720036191404e-05, + "loss": 0.3271, + "step": 6279000 + }, + { + "epoch": 3.76, + "learning_rate": 2.967862007063084e-05, + "loss": 0.3229, + "step": 6279500 + }, + { + "epoch": 3.77, + "learning_rate": 2.9676520105070274e-05, + "loss": 0.3268, + "step": 6280000 + }, + { + "epoch": 3.77, + "learning_rate": 2.9674424339440834e-05, + "loss": 0.338, + "step": 6280500 + }, + { + "epoch": 3.77, + "learning_rate": 2.967232437388027e-05, + "loss": 0.322, + "step": 6281000 + }, + { + "epoch": 3.77, + "learning_rate": 2.96702244083197e-05, + "loss": 0.3287, + "step": 6281500 + }, + { + "epoch": 3.77, + "learning_rate": 2.9668124442759135e-05, + "loss": 0.3282, + "step": 6282000 + }, + { + "epoch": 3.77, + "learning_rate": 2.966602447719857e-05, + "loss": 0.3212, + "step": 6282500 + }, + { + "epoch": 3.77, + "learning_rate": 2.966392451163801e-05, + "loss": 0.3247, + "step": 6283000 + }, + { + "epoch": 3.77, + "learning_rate": 2.9661824546077442e-05, + "loss": 0.3199, + "step": 6283500 + }, + { + "epoch": 3.77, + "learning_rate": 2.965972458051688e-05, + "loss": 0.3286, + "step": 6284000 + }, + { + "epoch": 3.77, + "learning_rate": 2.9657624614956316e-05, + "loss": 0.3287, + "step": 6284500 + }, + { + "epoch": 3.77, + "learning_rate": 2.965552884932687e-05, + "loss": 0.3217, + "step": 6285000 + }, + { + "epoch": 3.77, + "learning_rate": 2.9653428883766306e-05, + "loss": 0.324, + "step": 6285500 + }, + { + "epoch": 3.77, + "learning_rate": 2.965132891820574e-05, + "loss": 0.3257, + "step": 6286000 + }, + { + "epoch": 3.77, + "learning_rate": 2.9649228952645176e-05, + "loss": 0.3276, + "step": 6286500 + }, + { + "epoch": 3.77, + "learning_rate": 2.9647128987084613e-05, + "loss": 0.3308, + "step": 6287000 + }, + { + "epoch": 3.77, + "learning_rate": 2.9645029021524047e-05, + "loss": 0.3172, + "step": 6287500 + }, + { + "epoch": 3.77, + "learning_rate": 2.9642933255894604e-05, + "loss": 0.3231, + "step": 6288000 + }, + { + "epoch": 3.77, + "learning_rate": 2.9640833290334037e-05, + "loss": 0.3218, + "step": 6288500 + }, + { + "epoch": 3.77, + "learning_rate": 2.9638733324773474e-05, + "loss": 0.3227, + "step": 6289000 + }, + { + "epoch": 3.77, + "learning_rate": 2.963663335921291e-05, + "loss": 0.333, + "step": 6289500 + }, + { + "epoch": 3.77, + "learning_rate": 2.9634537593583464e-05, + "loss": 0.3257, + "step": 6290000 + }, + { + "epoch": 3.77, + "learning_rate": 2.9632437628022898e-05, + "loss": 0.3283, + "step": 6290500 + }, + { + "epoch": 3.77, + "learning_rate": 2.9630337662462335e-05, + "loss": 0.3159, + "step": 6291000 + }, + { + "epoch": 3.77, + "learning_rate": 2.962823769690177e-05, + "loss": 0.328, + "step": 6291500 + }, + { + "epoch": 3.77, + "learning_rate": 2.9626141931272325e-05, + "loss": 0.3372, + "step": 6292000 + }, + { + "epoch": 3.77, + "learning_rate": 2.9624041965711762e-05, + "loss": 0.3266, + "step": 6292500 + }, + { + "epoch": 3.77, + "learning_rate": 2.9621942000151195e-05, + "loss": 0.3291, + "step": 6293000 + }, + { + "epoch": 3.77, + "learning_rate": 2.9619842034590632e-05, + "loss": 0.3264, + "step": 6293500 + }, + { + "epoch": 3.77, + "learning_rate": 2.961774206903007e-05, + "loss": 0.3294, + "step": 6294000 + }, + { + "epoch": 3.77, + "learning_rate": 2.9615646303400623e-05, + "loss": 0.3335, + "step": 6294500 + }, + { + "epoch": 3.77, + "learning_rate": 2.961354633784006e-05, + "loss": 0.3432, + "step": 6295000 + }, + { + "epoch": 3.77, + "learning_rate": 2.9611446372279493e-05, + "loss": 0.3196, + "step": 6295500 + }, + { + "epoch": 3.77, + "learning_rate": 2.960934640671893e-05, + "loss": 0.3208, + "step": 6296000 + }, + { + "epoch": 3.78, + "learning_rate": 2.9607246441158367e-05, + "loss": 0.3327, + "step": 6296500 + }, + { + "epoch": 3.78, + "learning_rate": 2.960515067552892e-05, + "loss": 0.3255, + "step": 6297000 + }, + { + "epoch": 3.78, + "learning_rate": 2.960305490989948e-05, + "loss": 0.3372, + "step": 6297500 + }, + { + "epoch": 3.78, + "learning_rate": 2.960095494433891e-05, + "loss": 0.3223, + "step": 6298000 + }, + { + "epoch": 3.78, + "learning_rate": 2.9598854978778344e-05, + "loss": 0.3279, + "step": 6298500 + }, + { + "epoch": 3.78, + "learning_rate": 2.959675501321778e-05, + "loss": 0.3296, + "step": 6299000 + }, + { + "epoch": 3.78, + "learning_rate": 2.9594655047657218e-05, + "loss": 0.321, + "step": 6299500 + }, + { + "epoch": 3.78, + "learning_rate": 2.959255508209665e-05, + "loss": 0.3175, + "step": 6300000 + }, + { + "epoch": 3.78, + "eval_loss": 0.3166191577911377, + "eval_runtime": 1117.0284, + "eval_samples_per_second": 471.537, + "eval_steps_per_second": 78.59, + "step": 6300000 + }, + { + "epoch": 3.78, + "learning_rate": 2.9590455116536088e-05, + "loss": 0.3221, + "step": 6300500 + }, + { + "epoch": 3.78, + "learning_rate": 2.9588355150975525e-05, + "loss": 0.3261, + "step": 6301000 + }, + { + "epoch": 3.78, + "learning_rate": 2.958625938534608e-05, + "loss": 0.3218, + "step": 6301500 + }, + { + "epoch": 3.78, + "learning_rate": 2.9584159419785515e-05, + "loss": 0.3312, + "step": 6302000 + }, + { + "epoch": 3.78, + "learning_rate": 2.958205945422495e-05, + "loss": 0.3334, + "step": 6302500 + }, + { + "epoch": 3.78, + "learning_rate": 2.9579959488664386e-05, + "loss": 0.3266, + "step": 6303000 + }, + { + "epoch": 3.78, + "learning_rate": 2.9577859523103823e-05, + "loss": 0.3304, + "step": 6303500 + }, + { + "epoch": 3.78, + "learning_rate": 2.9575759557543256e-05, + "loss": 0.3229, + "step": 6304000 + }, + { + "epoch": 3.78, + "learning_rate": 2.957366379191381e-05, + "loss": 0.3247, + "step": 6304500 + }, + { + "epoch": 3.78, + "learning_rate": 2.9571563826353246e-05, + "loss": 0.3286, + "step": 6305000 + }, + { + "epoch": 3.78, + "learning_rate": 2.9569463860792683e-05, + "loss": 0.3196, + "step": 6305500 + }, + { + "epoch": 3.78, + "learning_rate": 2.956736389523212e-05, + "loss": 0.3315, + "step": 6306000 + }, + { + "epoch": 3.78, + "learning_rate": 2.9565263929671554e-05, + "loss": 0.3316, + "step": 6306500 + }, + { + "epoch": 3.78, + "learning_rate": 2.9563168164042107e-05, + "loss": 0.3245, + "step": 6307000 + }, + { + "epoch": 3.78, + "learning_rate": 2.9561068198481544e-05, + "loss": 0.3229, + "step": 6307500 + }, + { + "epoch": 3.78, + "learning_rate": 2.955896823292098e-05, + "loss": 0.3277, + "step": 6308000 + }, + { + "epoch": 3.78, + "learning_rate": 2.9556868267360414e-05, + "loss": 0.3224, + "step": 6308500 + }, + { + "epoch": 3.78, + "learning_rate": 2.955476830179985e-05, + "loss": 0.3222, + "step": 6309000 + }, + { + "epoch": 3.78, + "learning_rate": 2.9552672536170405e-05, + "loss": 0.3317, + "step": 6309500 + }, + { + "epoch": 3.78, + "learning_rate": 2.955057257060984e-05, + "loss": 0.3252, + "step": 6310000 + }, + { + "epoch": 3.78, + "learning_rate": 2.954847260504928e-05, + "loss": 0.3249, + "step": 6310500 + }, + { + "epoch": 3.78, + "learning_rate": 2.9546372639488712e-05, + "loss": 0.3129, + "step": 6311000 + }, + { + "epoch": 3.78, + "learning_rate": 2.9544281073790392e-05, + "loss": 0.3408, + "step": 6311500 + }, + { + "epoch": 3.78, + "learning_rate": 2.954218110822983e-05, + "loss": 0.3261, + "step": 6312000 + }, + { + "epoch": 3.78, + "learning_rate": 2.9540081142669256e-05, + "loss": 0.3308, + "step": 6312500 + }, + { + "epoch": 3.78, + "learning_rate": 2.9537981177108693e-05, + "loss": 0.3288, + "step": 6313000 + }, + { + "epoch": 3.79, + "learning_rate": 2.953588121154813e-05, + "loss": 0.3286, + "step": 6313500 + }, + { + "epoch": 3.79, + "learning_rate": 2.9533781245987563e-05, + "loss": 0.3264, + "step": 6314000 + }, + { + "epoch": 3.79, + "learning_rate": 2.9531681280427e-05, + "loss": 0.3382, + "step": 6314500 + }, + { + "epoch": 3.79, + "learning_rate": 2.9529581314866437e-05, + "loss": 0.3238, + "step": 6315000 + }, + { + "epoch": 3.79, + "learning_rate": 2.952748134930587e-05, + "loss": 0.323, + "step": 6315500 + }, + { + "epoch": 3.79, + "learning_rate": 2.9525385583676427e-05, + "loss": 0.326, + "step": 6316000 + }, + { + "epoch": 3.79, + "learning_rate": 2.952328561811586e-05, + "loss": 0.3303, + "step": 6316500 + }, + { + "epoch": 3.79, + "learning_rate": 2.9521185652555297e-05, + "loss": 0.3321, + "step": 6317000 + }, + { + "epoch": 3.79, + "learning_rate": 2.9519085686994734e-05, + "loss": 0.3239, + "step": 6317500 + }, + { + "epoch": 3.79, + "learning_rate": 2.9516989921365288e-05, + "loss": 0.3284, + "step": 6318000 + }, + { + "epoch": 3.79, + "learning_rate": 2.9514889955804725e-05, + "loss": 0.3201, + "step": 6318500 + }, + { + "epoch": 3.79, + "learning_rate": 2.9512789990244158e-05, + "loss": 0.3312, + "step": 6319000 + }, + { + "epoch": 3.79, + "learning_rate": 2.9510690024683595e-05, + "loss": 0.3207, + "step": 6319500 + }, + { + "epoch": 3.79, + "learning_rate": 2.9508590059123032e-05, + "loss": 0.3386, + "step": 6320000 + }, + { + "epoch": 3.79, + "learning_rate": 2.9506490093562465e-05, + "loss": 0.3291, + "step": 6320500 + }, + { + "epoch": 3.79, + "learning_rate": 2.9504390128001902e-05, + "loss": 0.3278, + "step": 6321000 + }, + { + "epoch": 3.79, + "learning_rate": 2.950229016244134e-05, + "loss": 0.3262, + "step": 6321500 + }, + { + "epoch": 3.79, + "learning_rate": 2.9500190196880772e-05, + "loss": 0.3212, + "step": 6322000 + }, + { + "epoch": 3.79, + "learning_rate": 2.9498094431251326e-05, + "loss": 0.3174, + "step": 6322500 + }, + { + "epoch": 3.79, + "learning_rate": 2.9495994465690763e-05, + "loss": 0.3273, + "step": 6323000 + }, + { + "epoch": 3.79, + "learning_rate": 2.94938945001302e-05, + "loss": 0.328, + "step": 6323500 + }, + { + "epoch": 3.79, + "learning_rate": 2.9491794534569633e-05, + "loss": 0.3249, + "step": 6324000 + }, + { + "epoch": 3.79, + "learning_rate": 2.948969876894019e-05, + "loss": 0.3292, + "step": 6324500 + }, + { + "epoch": 3.79, + "learning_rate": 2.9487598803379624e-05, + "loss": 0.3272, + "step": 6325000 + }, + { + "epoch": 3.79, + "learning_rate": 2.948549883781906e-05, + "loss": 0.3251, + "step": 6325500 + }, + { + "epoch": 3.79, + "learning_rate": 2.9483398872258497e-05, + "loss": 0.3212, + "step": 6326000 + }, + { + "epoch": 3.79, + "learning_rate": 2.948129890669793e-05, + "loss": 0.3188, + "step": 6326500 + }, + { + "epoch": 3.79, + "learning_rate": 2.9479198941137368e-05, + "loss": 0.3276, + "step": 6327000 + }, + { + "epoch": 3.79, + "learning_rate": 2.947710317550792e-05, + "loss": 0.3262, + "step": 6327500 + }, + { + "epoch": 3.79, + "learning_rate": 2.9475003209947358e-05, + "loss": 0.3155, + "step": 6328000 + }, + { + "epoch": 3.79, + "learning_rate": 2.9472903244386795e-05, + "loss": 0.3256, + "step": 6328500 + }, + { + "epoch": 3.79, + "learning_rate": 2.947080327882623e-05, + "loss": 0.3233, + "step": 6329000 + }, + { + "epoch": 3.79, + "learning_rate": 2.9468703313265665e-05, + "loss": 0.3257, + "step": 6329500 + }, + { + "epoch": 3.8, + "learning_rate": 2.9466603347705095e-05, + "loss": 0.3301, + "step": 6330000 + }, + { + "epoch": 3.8, + "learning_rate": 2.946450338214453e-05, + "loss": 0.325, + "step": 6330500 + }, + { + "epoch": 3.8, + "learning_rate": 2.9462403416583966e-05, + "loss": 0.3301, + "step": 6331000 + }, + { + "epoch": 3.8, + "learning_rate": 2.9460307650954526e-05, + "loss": 0.3255, + "step": 6331500 + }, + { + "epoch": 3.8, + "learning_rate": 2.945821188532508e-05, + "loss": 0.3231, + "step": 6332000 + }, + { + "epoch": 3.8, + "learning_rate": 2.9456111919764516e-05, + "loss": 0.3216, + "step": 6332500 + }, + { + "epoch": 3.8, + "learning_rate": 2.9454011954203953e-05, + "loss": 0.3271, + "step": 6333000 + }, + { + "epoch": 3.8, + "learning_rate": 2.9451911988643387e-05, + "loss": 0.3234, + "step": 6333500 + }, + { + "epoch": 3.8, + "learning_rate": 2.9449816223013944e-05, + "loss": 0.3271, + "step": 6334000 + }, + { + "epoch": 3.8, + "learning_rate": 2.9447716257453377e-05, + "loss": 0.3249, + "step": 6334500 + }, + { + "epoch": 3.8, + "learning_rate": 2.9445616291892814e-05, + "loss": 0.3259, + "step": 6335000 + }, + { + "epoch": 3.8, + "learning_rate": 2.944351632633225e-05, + "loss": 0.3249, + "step": 6335500 + }, + { + "epoch": 3.8, + "learning_rate": 2.9441416360771684e-05, + "loss": 0.3274, + "step": 6336000 + }, + { + "epoch": 3.8, + "learning_rate": 2.943931639521112e-05, + "loss": 0.3198, + "step": 6336500 + }, + { + "epoch": 3.8, + "learning_rate": 2.943721642965055e-05, + "loss": 0.334, + "step": 6337000 + }, + { + "epoch": 3.8, + "learning_rate": 2.9435116464089985e-05, + "loss": 0.3266, + "step": 6337500 + }, + { + "epoch": 3.8, + "learning_rate": 2.943302069846055e-05, + "loss": 0.3263, + "step": 6338000 + }, + { + "epoch": 3.8, + "learning_rate": 2.9430920732899982e-05, + "loss": 0.3336, + "step": 6338500 + }, + { + "epoch": 3.8, + "learning_rate": 2.942882076733942e-05, + "loss": 0.3197, + "step": 6339000 + }, + { + "epoch": 3.8, + "learning_rate": 2.9426725001709972e-05, + "loss": 0.3297, + "step": 6339500 + }, + { + "epoch": 3.8, + "learning_rate": 2.942462503614941e-05, + "loss": 0.322, + "step": 6340000 + }, + { + "epoch": 3.8, + "learning_rate": 2.9422525070588843e-05, + "loss": 0.3302, + "step": 6340500 + }, + { + "epoch": 3.8, + "learning_rate": 2.942042510502828e-05, + "loss": 0.3244, + "step": 6341000 + }, + { + "epoch": 3.8, + "learning_rate": 2.941832513946771e-05, + "loss": 0.3276, + "step": 6341500 + }, + { + "epoch": 3.8, + "learning_rate": 2.9416225173907146e-05, + "loss": 0.3242, + "step": 6342000 + }, + { + "epoch": 3.8, + "learning_rate": 2.941412520834658e-05, + "loss": 0.3234, + "step": 6342500 + }, + { + "epoch": 3.8, + "learning_rate": 2.9412025242786017e-05, + "loss": 0.3187, + "step": 6343000 + }, + { + "epoch": 3.8, + "learning_rate": 2.9409929477156577e-05, + "loss": 0.3379, + "step": 6343500 + }, + { + "epoch": 3.8, + "learning_rate": 2.9407829511596007e-05, + "loss": 0.3302, + "step": 6344000 + }, + { + "epoch": 3.8, + "learning_rate": 2.9405733745966567e-05, + "loss": 0.333, + "step": 6344500 + }, + { + "epoch": 3.8, + "learning_rate": 2.9403633780406004e-05, + "loss": 0.3322, + "step": 6345000 + }, + { + "epoch": 3.8, + "learning_rate": 2.9401533814845438e-05, + "loss": 0.3296, + "step": 6345500 + }, + { + "epoch": 3.8, + "learning_rate": 2.9399433849284875e-05, + "loss": 0.3293, + "step": 6346000 + }, + { + "epoch": 3.8, + "learning_rate": 2.9397333883724305e-05, + "loss": 0.3308, + "step": 6346500 + }, + { + "epoch": 3.81, + "learning_rate": 2.9395233918163738e-05, + "loss": 0.3244, + "step": 6347000 + }, + { + "epoch": 3.81, + "learning_rate": 2.9393133952603175e-05, + "loss": 0.3256, + "step": 6347500 + }, + { + "epoch": 3.81, + "learning_rate": 2.9391033987042612e-05, + "loss": 0.3255, + "step": 6348000 + }, + { + "epoch": 3.81, + "learning_rate": 2.9388938221413172e-05, + "loss": 0.3256, + "step": 6348500 + }, + { + "epoch": 3.81, + "learning_rate": 2.9386838255852602e-05, + "loss": 0.322, + "step": 6349000 + }, + { + "epoch": 3.81, + "learning_rate": 2.9384742490223163e-05, + "loss": 0.3209, + "step": 6349500 + }, + { + "epoch": 3.81, + "learning_rate": 2.9382642524662596e-05, + "loss": 0.3315, + "step": 6350000 + }, + { + "epoch": 3.81, + "learning_rate": 2.9380542559102033e-05, + "loss": 0.3243, + "step": 6350500 + }, + { + "epoch": 3.81, + "learning_rate": 2.9378442593541463e-05, + "loss": 0.3317, + "step": 6351000 + }, + { + "epoch": 3.81, + "learning_rate": 2.93763426279809e-05, + "loss": 0.3301, + "step": 6351500 + }, + { + "epoch": 3.81, + "learning_rate": 2.937424686235146e-05, + "loss": 0.3341, + "step": 6352000 + }, + { + "epoch": 3.81, + "learning_rate": 2.9372146896790894e-05, + "loss": 0.3192, + "step": 6352500 + }, + { + "epoch": 3.81, + "learning_rate": 2.937004693123033e-05, + "loss": 0.3164, + "step": 6353000 + }, + { + "epoch": 3.81, + "learning_rate": 2.936794696566976e-05, + "loss": 0.34, + "step": 6353500 + }, + { + "epoch": 3.81, + "learning_rate": 2.936585120004032e-05, + "loss": 0.3192, + "step": 6354000 + }, + { + "epoch": 3.81, + "learning_rate": 2.9363751234479754e-05, + "loss": 0.3341, + "step": 6354500 + }, + { + "epoch": 3.81, + "learning_rate": 2.936165126891919e-05, + "loss": 0.3217, + "step": 6355000 + }, + { + "epoch": 3.81, + "learning_rate": 2.9359551303358628e-05, + "loss": 0.3321, + "step": 6355500 + }, + { + "epoch": 3.81, + "learning_rate": 2.9357451337798058e-05, + "loss": 0.321, + "step": 6356000 + }, + { + "epoch": 3.81, + "learning_rate": 2.935535137223749e-05, + "loss": 0.3342, + "step": 6356500 + }, + { + "epoch": 3.81, + "learning_rate": 2.935325140667693e-05, + "loss": 0.3265, + "step": 6357000 + }, + { + "epoch": 3.81, + "learning_rate": 2.9351151441116365e-05, + "loss": 0.3207, + "step": 6357500 + }, + { + "epoch": 3.81, + "learning_rate": 2.9349055675486926e-05, + "loss": 0.3234, + "step": 6358000 + }, + { + "epoch": 3.81, + "learning_rate": 2.9346955709926356e-05, + "loss": 0.3233, + "step": 6358500 + }, + { + "epoch": 3.81, + "learning_rate": 2.934485574436579e-05, + "loss": 0.3214, + "step": 6359000 + }, + { + "epoch": 3.81, + "learning_rate": 2.9342755778805226e-05, + "loss": 0.313, + "step": 6359500 + }, + { + "epoch": 3.81, + "learning_rate": 2.9340655813244663e-05, + "loss": 0.3199, + "step": 6360000 + }, + { + "epoch": 3.81, + "learning_rate": 2.9338560047615223e-05, + "loss": 0.3269, + "step": 6360500 + }, + { + "epoch": 3.81, + "learning_rate": 2.933646008205465e-05, + "loss": 0.3294, + "step": 6361000 + }, + { + "epoch": 3.81, + "learning_rate": 2.9334360116494087e-05, + "loss": 0.3326, + "step": 6361500 + }, + { + "epoch": 3.81, + "learning_rate": 2.9332260150933523e-05, + "loss": 0.3297, + "step": 6362000 + }, + { + "epoch": 3.81, + "learning_rate": 2.9330160185372957e-05, + "loss": 0.3286, + "step": 6362500 + }, + { + "epoch": 3.81, + "learning_rate": 2.9328060219812394e-05, + "loss": 0.3293, + "step": 6363000 + }, + { + "epoch": 3.82, + "learning_rate": 2.9325964454182947e-05, + "loss": 0.3326, + "step": 6363500 + }, + { + "epoch": 3.82, + "learning_rate": 2.9323864488622384e-05, + "loss": 0.3397, + "step": 6364000 + }, + { + "epoch": 3.82, + "learning_rate": 2.932176452306182e-05, + "loss": 0.3243, + "step": 6364500 + }, + { + "epoch": 3.82, + "learning_rate": 2.931966875743238e-05, + "loss": 0.3155, + "step": 6365000 + }, + { + "epoch": 3.82, + "learning_rate": 2.931756879187181e-05, + "loss": 0.3364, + "step": 6365500 + }, + { + "epoch": 3.82, + "learning_rate": 2.9315468826311245e-05, + "loss": 0.3348, + "step": 6366000 + }, + { + "epoch": 3.82, + "learning_rate": 2.9313368860750682e-05, + "loss": 0.3181, + "step": 6366500 + }, + { + "epoch": 3.82, + "learning_rate": 2.931126889519012e-05, + "loss": 0.3173, + "step": 6367000 + }, + { + "epoch": 3.82, + "learning_rate": 2.9309168929629552e-05, + "loss": 0.322, + "step": 6367500 + }, + { + "epoch": 3.82, + "learning_rate": 2.930706896406899e-05, + "loss": 0.3199, + "step": 6368000 + }, + { + "epoch": 3.82, + "learning_rate": 2.9304968998508426e-05, + "loss": 0.3262, + "step": 6368500 + }, + { + "epoch": 3.82, + "learning_rate": 2.930286903294786e-05, + "loss": 0.3114, + "step": 6369000 + }, + { + "epoch": 3.82, + "learning_rate": 2.9300773267318413e-05, + "loss": 0.3209, + "step": 6369500 + }, + { + "epoch": 3.82, + "learning_rate": 2.929867330175785e-05, + "loss": 0.3202, + "step": 6370000 + }, + { + "epoch": 3.82, + "learning_rate": 2.9296573336197287e-05, + "loss": 0.3279, + "step": 6370500 + }, + { + "epoch": 3.82, + "learning_rate": 2.9294473370636723e-05, + "loss": 0.328, + "step": 6371000 + }, + { + "epoch": 3.82, + "learning_rate": 2.9292373405076157e-05, + "loss": 0.3277, + "step": 6371500 + }, + { + "epoch": 3.82, + "learning_rate": 2.9290273439515594e-05, + "loss": 0.3296, + "step": 6372000 + }, + { + "epoch": 3.82, + "learning_rate": 2.928817347395503e-05, + "loss": 0.3268, + "step": 6372500 + }, + { + "epoch": 3.82, + "learning_rate": 2.9286077708325584e-05, + "loss": 0.3236, + "step": 6373000 + }, + { + "epoch": 3.82, + "learning_rate": 2.9283977742765018e-05, + "loss": 0.3251, + "step": 6373500 + }, + { + "epoch": 3.82, + "learning_rate": 2.9281877777204454e-05, + "loss": 0.3148, + "step": 6374000 + }, + { + "epoch": 3.82, + "learning_rate": 2.927977781164389e-05, + "loss": 0.3322, + "step": 6374500 + }, + { + "epoch": 3.82, + "learning_rate": 2.9277682046014445e-05, + "loss": 0.3303, + "step": 6375000 + }, + { + "epoch": 3.82, + "learning_rate": 2.927558208045388e-05, + "loss": 0.3296, + "step": 6375500 + }, + { + "epoch": 3.82, + "learning_rate": 2.9273482114893315e-05, + "loss": 0.3279, + "step": 6376000 + }, + { + "epoch": 3.82, + "learning_rate": 2.9271382149332752e-05, + "loss": 0.3234, + "step": 6376500 + }, + { + "epoch": 3.82, + "learning_rate": 2.926928218377219e-05, + "loss": 0.3163, + "step": 6377000 + }, + { + "epoch": 3.82, + "learning_rate": 2.9267182218211622e-05, + "loss": 0.326, + "step": 6377500 + }, + { + "epoch": 3.82, + "learning_rate": 2.9265082252651052e-05, + "loss": 0.3205, + "step": 6378000 + }, + { + "epoch": 3.82, + "learning_rate": 2.926298228709049e-05, + "loss": 0.3284, + "step": 6378500 + }, + { + "epoch": 3.82, + "learning_rate": 2.926088652146105e-05, + "loss": 0.3131, + "step": 6379000 + }, + { + "epoch": 3.82, + "learning_rate": 2.9258790755831603e-05, + "loss": 0.318, + "step": 6379500 + }, + { + "epoch": 3.83, + "learning_rate": 2.925669079027104e-05, + "loss": 0.3218, + "step": 6380000 + }, + { + "epoch": 3.83, + "learning_rate": 2.9254590824710473e-05, + "loss": 0.3199, + "step": 6380500 + }, + { + "epoch": 3.83, + "learning_rate": 2.925249085914991e-05, + "loss": 0.3195, + "step": 6381000 + }, + { + "epoch": 3.83, + "learning_rate": 2.9250390893589347e-05, + "loss": 0.3294, + "step": 6381500 + }, + { + "epoch": 3.83, + "learning_rate": 2.924829092802878e-05, + "loss": 0.3226, + "step": 6382000 + }, + { + "epoch": 3.83, + "learning_rate": 2.9246190962468217e-05, + "loss": 0.3248, + "step": 6382500 + }, + { + "epoch": 3.83, + "learning_rate": 2.924409519683877e-05, + "loss": 0.3224, + "step": 6383000 + }, + { + "epoch": 3.83, + "learning_rate": 2.9241995231278208e-05, + "loss": 0.325, + "step": 6383500 + }, + { + "epoch": 3.83, + "learning_rate": 2.9239895265717645e-05, + "loss": 0.3336, + "step": 6384000 + }, + { + "epoch": 3.83, + "learning_rate": 2.9237795300157078e-05, + "loss": 0.3212, + "step": 6384500 + }, + { + "epoch": 3.83, + "learning_rate": 2.9235695334596515e-05, + "loss": 0.3241, + "step": 6385000 + }, + { + "epoch": 3.83, + "learning_rate": 2.9233595369035945e-05, + "loss": 0.3304, + "step": 6385500 + }, + { + "epoch": 3.83, + "learning_rate": 2.9231495403475382e-05, + "loss": 0.3284, + "step": 6386000 + }, + { + "epoch": 3.83, + "learning_rate": 2.9229399637845942e-05, + "loss": 0.3282, + "step": 6386500 + }, + { + "epoch": 3.83, + "learning_rate": 2.9227299672285376e-05, + "loss": 0.326, + "step": 6387000 + }, + { + "epoch": 3.83, + "learning_rate": 2.9225199706724813e-05, + "loss": 0.3209, + "step": 6387500 + }, + { + "epoch": 3.83, + "learning_rate": 2.9223099741164243e-05, + "loss": 0.3142, + "step": 6388000 + }, + { + "epoch": 3.83, + "learning_rate": 2.9220999775603676e-05, + "loss": 0.3282, + "step": 6388500 + }, + { + "epoch": 3.83, + "learning_rate": 2.9218899810043113e-05, + "loss": 0.3158, + "step": 6389000 + }, + { + "epoch": 3.83, + "learning_rate": 2.921679984448255e-05, + "loss": 0.3205, + "step": 6389500 + }, + { + "epoch": 3.83, + "learning_rate": 2.9214699878921983e-05, + "loss": 0.3291, + "step": 6390000 + }, + { + "epoch": 3.83, + "learning_rate": 2.9212608313223664e-05, + "loss": 0.3238, + "step": 6390500 + }, + { + "epoch": 3.83, + "learning_rate": 2.92105083476631e-05, + "loss": 0.3257, + "step": 6391000 + }, + { + "epoch": 3.83, + "learning_rate": 2.9208408382102534e-05, + "loss": 0.3242, + "step": 6391500 + }, + { + "epoch": 3.83, + "learning_rate": 2.920630841654197e-05, + "loss": 0.3276, + "step": 6392000 + }, + { + "epoch": 3.83, + "learning_rate": 2.9204212650912524e-05, + "loss": 0.3126, + "step": 6392500 + }, + { + "epoch": 3.83, + "learning_rate": 2.920211268535196e-05, + "loss": 0.3321, + "step": 6393000 + }, + { + "epoch": 3.83, + "learning_rate": 2.9200012719791398e-05, + "loss": 0.3163, + "step": 6393500 + }, + { + "epoch": 3.83, + "learning_rate": 2.919791275423083e-05, + "loss": 0.3277, + "step": 6394000 + }, + { + "epoch": 3.83, + "learning_rate": 2.919581278867027e-05, + "loss": 0.3242, + "step": 6394500 + }, + { + "epoch": 3.83, + "learning_rate": 2.91937128231097e-05, + "loss": 0.3259, + "step": 6395000 + }, + { + "epoch": 3.83, + "learning_rate": 2.919161705748026e-05, + "loss": 0.321, + "step": 6395500 + }, + { + "epoch": 3.83, + "learning_rate": 2.9189517091919692e-05, + "loss": 0.3252, + "step": 6396000 + }, + { + "epoch": 3.83, + "learning_rate": 2.918741712635913e-05, + "loss": 0.3325, + "step": 6396500 + }, + { + "epoch": 3.84, + "learning_rate": 2.9185317160798566e-05, + "loss": 0.3307, + "step": 6397000 + }, + { + "epoch": 3.84, + "learning_rate": 2.918322139516912e-05, + "loss": 0.3237, + "step": 6397500 + }, + { + "epoch": 3.84, + "learning_rate": 2.9181121429608556e-05, + "loss": 0.3268, + "step": 6398000 + }, + { + "epoch": 3.84, + "learning_rate": 2.917902566397911e-05, + "loss": 0.3136, + "step": 6398500 + }, + { + "epoch": 3.84, + "learning_rate": 2.9176925698418547e-05, + "loss": 0.324, + "step": 6399000 + }, + { + "epoch": 3.84, + "learning_rate": 2.917482573285798e-05, + "loss": 0.3228, + "step": 6399500 + }, + { + "epoch": 3.84, + "learning_rate": 2.9172725767297417e-05, + "loss": 0.3183, + "step": 6400000 + }, + { + "epoch": 3.84, + "eval_loss": 0.3165391683578491, + "eval_runtime": 1122.9364, + "eval_samples_per_second": 469.056, + "eval_steps_per_second": 78.176, + "step": 6400000 + } + ], + "max_steps": 13343552, + "num_train_epochs": 8, + "total_flos": 1.3064158120701542e+18, + "trial_name": null, + "trial_params": null +}