{ "best_metric": 1.6579999923706055, "best_model_checkpoint": "/scratch/kwamea/llama-output/checkpoint-720", "epoch": 1.8766066838046274, "eval_steps": 5, "global_step": 730, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 9.93573264781491e-05, "loss": 1.9824, "step": 5 }, { "epoch": 0.01, "eval_loss": 1.954480528831482, "eval_runtime": 38.0417, "eval_samples_per_second": 2.208, "eval_steps_per_second": 0.289, "step": 5 }, { "epoch": 0.03, "learning_rate": 9.87146529562982e-05, "loss": 1.9245, "step": 10 }, { "epoch": 0.03, "eval_loss": 1.8942010402679443, "eval_runtime": 37.8934, "eval_samples_per_second": 2.217, "eval_steps_per_second": 0.29, "step": 10 }, { "epoch": 0.04, "learning_rate": 9.80719794344473e-05, "loss": 1.8616, "step": 15 }, { "epoch": 0.04, "eval_loss": 1.8424055576324463, "eval_runtime": 37.8727, "eval_samples_per_second": 2.218, "eval_steps_per_second": 0.29, "step": 15 }, { "epoch": 0.05, "learning_rate": 9.742930591259641e-05, "loss": 1.8267, "step": 20 }, { "epoch": 0.05, "eval_loss": 1.8279685974121094, "eval_runtime": 38.166, "eval_samples_per_second": 2.201, "eval_steps_per_second": 0.288, "step": 20 }, { "epoch": 0.06, "learning_rate": 9.67866323907455e-05, "loss": 1.835, "step": 25 }, { "epoch": 0.06, "eval_loss": 1.8134318590164185, "eval_runtime": 38.0411, "eval_samples_per_second": 2.208, "eval_steps_per_second": 0.289, "step": 25 }, { "epoch": 0.08, "learning_rate": 9.61439588688946e-05, "loss": 1.8236, "step": 30 }, { "epoch": 0.08, "eval_loss": 1.8010995388031006, "eval_runtime": 37.8753, "eval_samples_per_second": 2.218, "eval_steps_per_second": 0.29, "step": 30 }, { "epoch": 0.09, "learning_rate": 9.550128534704372e-05, "loss": 1.7186, "step": 35 }, { "epoch": 0.09, "eval_loss": 1.7901933193206787, "eval_runtime": 38.0374, "eval_samples_per_second": 2.208, "eval_steps_per_second": 0.289, "step": 35 }, { "epoch": 0.1, "learning_rate": 9.485861182519281e-05, "loss": 1.7959, "step": 40 }, { "epoch": 0.1, "eval_loss": 1.7807564735412598, "eval_runtime": 37.8784, "eval_samples_per_second": 2.218, "eval_steps_per_second": 0.29, "step": 40 }, { "epoch": 0.12, "learning_rate": 9.421593830334192e-05, "loss": 1.715, "step": 45 }, { "epoch": 0.12, "eval_loss": 1.7709146738052368, "eval_runtime": 37.8705, "eval_samples_per_second": 2.218, "eval_steps_per_second": 0.29, "step": 45 }, { "epoch": 0.13, "learning_rate": 9.357326478149101e-05, "loss": 1.7581, "step": 50 }, { "epoch": 0.13, "eval_loss": 1.761339783668518, "eval_runtime": 37.661, "eval_samples_per_second": 2.23, "eval_steps_per_second": 0.292, "step": 50 }, { "epoch": 0.14, "learning_rate": 9.29305912596401e-05, "loss": 1.7305, "step": 55 }, { "epoch": 0.14, "eval_loss": 1.7389689683914185, "eval_runtime": 38.0055, "eval_samples_per_second": 2.21, "eval_steps_per_second": 0.289, "step": 55 }, { "epoch": 0.15, "learning_rate": 9.228791773778921e-05, "loss": 1.7086, "step": 60 }, { "epoch": 0.15, "eval_loss": 1.713218092918396, "eval_runtime": 37.737, "eval_samples_per_second": 2.226, "eval_steps_per_second": 0.291, "step": 60 }, { "epoch": 0.17, "learning_rate": 9.16452442159383e-05, "loss": 1.7057, "step": 65 }, { "epoch": 0.17, "eval_loss": 1.7108122110366821, "eval_runtime": 38.0037, "eval_samples_per_second": 2.21, "eval_steps_per_second": 0.289, "step": 65 }, { "epoch": 0.18, "learning_rate": 9.100257069408741e-05, "loss": 1.7243, "step": 70 }, { "epoch": 0.18, "eval_loss": 1.7041622400283813, "eval_runtime": 38.1402, "eval_samples_per_second": 2.202, "eval_steps_per_second": 0.288, "step": 70 }, { "epoch": 0.19, "learning_rate": 9.03598971722365e-05, "loss": 1.7038, "step": 75 }, { "epoch": 0.19, "eval_loss": 1.7010408639907837, "eval_runtime": 37.7193, "eval_samples_per_second": 2.227, "eval_steps_per_second": 0.292, "step": 75 }, { "epoch": 0.21, "learning_rate": 8.97172236503856e-05, "loss": 1.6939, "step": 80 }, { "epoch": 0.21, "eval_loss": 1.6987501382827759, "eval_runtime": 38.0624, "eval_samples_per_second": 2.207, "eval_steps_per_second": 0.289, "step": 80 }, { "epoch": 0.22, "learning_rate": 8.907455012853471e-05, "loss": 1.645, "step": 85 }, { "epoch": 0.22, "eval_loss": 1.6968218088150024, "eval_runtime": 37.9205, "eval_samples_per_second": 2.215, "eval_steps_per_second": 0.29, "step": 85 }, { "epoch": 0.23, "learning_rate": 8.84318766066838e-05, "loss": 1.7016, "step": 90 }, { "epoch": 0.23, "eval_loss": 1.6952500343322754, "eval_runtime": 37.9617, "eval_samples_per_second": 2.213, "eval_steps_per_second": 0.29, "step": 90 }, { "epoch": 0.24, "learning_rate": 8.778920308483291e-05, "loss": 1.6869, "step": 95 }, { "epoch": 0.24, "eval_loss": 1.6932854652404785, "eval_runtime": 37.8781, "eval_samples_per_second": 2.218, "eval_steps_per_second": 0.29, "step": 95 }, { "epoch": 0.26, "learning_rate": 8.7146529562982e-05, "loss": 1.6728, "step": 100 }, { "epoch": 0.26, "eval_loss": 1.6920738220214844, "eval_runtime": 38.1996, "eval_samples_per_second": 2.199, "eval_steps_per_second": 0.288, "step": 100 }, { "epoch": 0.27, "learning_rate": 8.650385604113111e-05, "loss": 1.6674, "step": 105 }, { "epoch": 0.27, "eval_loss": 1.6904469728469849, "eval_runtime": 38.1252, "eval_samples_per_second": 2.203, "eval_steps_per_second": 0.289, "step": 105 }, { "epoch": 0.28, "learning_rate": 8.586118251928022e-05, "loss": 1.7013, "step": 110 }, { "epoch": 0.28, "eval_loss": 1.6895124912261963, "eval_runtime": 38.0968, "eval_samples_per_second": 2.205, "eval_steps_per_second": 0.289, "step": 110 }, { "epoch": 0.3, "learning_rate": 8.521850899742931e-05, "loss": 1.7148, "step": 115 }, { "epoch": 0.3, "eval_loss": 1.6886374950408936, "eval_runtime": 37.8799, "eval_samples_per_second": 2.218, "eval_steps_per_second": 0.29, "step": 115 }, { "epoch": 0.31, "learning_rate": 8.457583547557842e-05, "loss": 1.7166, "step": 120 }, { "epoch": 0.31, "eval_loss": 1.6868164539337158, "eval_runtime": 37.8861, "eval_samples_per_second": 2.217, "eval_steps_per_second": 0.29, "step": 120 }, { "epoch": 0.32, "learning_rate": 8.393316195372751e-05, "loss": 1.7012, "step": 125 }, { "epoch": 0.32, "eval_loss": 1.6858367919921875, "eval_runtime": 38.1136, "eval_samples_per_second": 2.204, "eval_steps_per_second": 0.289, "step": 125 }, { "epoch": 0.33, "learning_rate": 8.32904884318766e-05, "loss": 1.6827, "step": 130 }, { "epoch": 0.33, "eval_loss": 1.6849361658096313, "eval_runtime": 37.9883, "eval_samples_per_second": 2.211, "eval_steps_per_second": 0.29, "step": 130 }, { "epoch": 0.35, "learning_rate": 8.264781491002571e-05, "loss": 1.7157, "step": 135 }, { "epoch": 0.35, "eval_loss": 1.684756875038147, "eval_runtime": 37.8726, "eval_samples_per_second": 2.218, "eval_steps_per_second": 0.29, "step": 135 }, { "epoch": 0.36, "learning_rate": 8.200514138817481e-05, "loss": 1.6668, "step": 140 }, { "epoch": 0.36, "eval_loss": 1.6836236715316772, "eval_runtime": 37.9586, "eval_samples_per_second": 2.213, "eval_steps_per_second": 0.29, "step": 140 }, { "epoch": 0.37, "learning_rate": 8.136246786632391e-05, "loss": 1.6588, "step": 145 }, { "epoch": 0.37, "eval_loss": 1.6828166246414185, "eval_runtime": 38.0596, "eval_samples_per_second": 2.207, "eval_steps_per_second": 0.289, "step": 145 }, { "epoch": 0.39, "learning_rate": 8.071979434447301e-05, "loss": 1.7005, "step": 150 }, { "epoch": 0.39, "eval_loss": 1.681463360786438, "eval_runtime": 38.0878, "eval_samples_per_second": 2.205, "eval_steps_per_second": 0.289, "step": 150 }, { "epoch": 0.4, "learning_rate": 8.007712082262212e-05, "loss": 1.6893, "step": 155 }, { "epoch": 0.4, "eval_loss": 1.6811552047729492, "eval_runtime": 37.9265, "eval_samples_per_second": 2.215, "eval_steps_per_second": 0.29, "step": 155 }, { "epoch": 0.41, "learning_rate": 7.943444730077121e-05, "loss": 1.6682, "step": 160 }, { "epoch": 0.41, "eval_loss": 1.6801096200942993, "eval_runtime": 38.0375, "eval_samples_per_second": 2.208, "eval_steps_per_second": 0.289, "step": 160 }, { "epoch": 0.42, "learning_rate": 7.87917737789203e-05, "loss": 1.6481, "step": 165 }, { "epoch": 0.42, "eval_loss": 1.6800144910812378, "eval_runtime": 37.8916, "eval_samples_per_second": 2.217, "eval_steps_per_second": 0.29, "step": 165 }, { "epoch": 0.44, "learning_rate": 7.814910025706941e-05, "loss": 1.7042, "step": 170 }, { "epoch": 0.44, "eval_loss": 1.6784976720809937, "eval_runtime": 37.8514, "eval_samples_per_second": 2.219, "eval_steps_per_second": 0.291, "step": 170 }, { "epoch": 0.45, "learning_rate": 7.750642673521852e-05, "loss": 1.6555, "step": 175 }, { "epoch": 0.45, "eval_loss": 1.6780468225479126, "eval_runtime": 37.8893, "eval_samples_per_second": 2.217, "eval_steps_per_second": 0.29, "step": 175 }, { "epoch": 0.46, "learning_rate": 7.686375321336761e-05, "loss": 1.6717, "step": 180 }, { "epoch": 0.46, "eval_loss": 1.6775085926055908, "eval_runtime": 37.9289, "eval_samples_per_second": 2.215, "eval_steps_per_second": 0.29, "step": 180 }, { "epoch": 0.48, "learning_rate": 7.622107969151672e-05, "loss": 1.6716, "step": 185 }, { "epoch": 0.48, "eval_loss": 1.6770914793014526, "eval_runtime": 38.0111, "eval_samples_per_second": 2.21, "eval_steps_per_second": 0.289, "step": 185 }, { "epoch": 0.49, "learning_rate": 7.557840616966581e-05, "loss": 1.6618, "step": 190 }, { "epoch": 0.49, "eval_loss": 1.676563024520874, "eval_runtime": 38.0703, "eval_samples_per_second": 2.206, "eval_steps_per_second": 0.289, "step": 190 }, { "epoch": 0.5, "learning_rate": 7.493573264781492e-05, "loss": 1.6411, "step": 195 }, { "epoch": 0.5, "eval_loss": 1.6757832765579224, "eval_runtime": 37.827, "eval_samples_per_second": 2.221, "eval_steps_per_second": 0.291, "step": 195 }, { "epoch": 0.51, "learning_rate": 7.429305912596401e-05, "loss": 1.6637, "step": 200 }, { "epoch": 0.51, "eval_loss": 1.6751985549926758, "eval_runtime": 37.836, "eval_samples_per_second": 2.22, "eval_steps_per_second": 0.291, "step": 200 }, { "epoch": 0.53, "learning_rate": 7.365038560411311e-05, "loss": 1.6142, "step": 205 }, { "epoch": 0.53, "eval_loss": 1.675147294998169, "eval_runtime": 37.9175, "eval_samples_per_second": 2.215, "eval_steps_per_second": 0.29, "step": 205 }, { "epoch": 0.54, "learning_rate": 7.300771208226222e-05, "loss": 1.6741, "step": 210 }, { "epoch": 0.54, "eval_loss": 1.674437165260315, "eval_runtime": 38.087, "eval_samples_per_second": 2.205, "eval_steps_per_second": 0.289, "step": 210 }, { "epoch": 0.55, "learning_rate": 7.236503856041131e-05, "loss": 1.6408, "step": 215 }, { "epoch": 0.55, "eval_loss": 1.6737719774246216, "eval_runtime": 38.0075, "eval_samples_per_second": 2.21, "eval_steps_per_second": 0.289, "step": 215 }, { "epoch": 0.57, "learning_rate": 7.172236503856042e-05, "loss": 1.6733, "step": 220 }, { "epoch": 0.57, "eval_loss": 1.6732759475708008, "eval_runtime": 38.1125, "eval_samples_per_second": 2.204, "eval_steps_per_second": 0.289, "step": 220 }, { "epoch": 0.58, "learning_rate": 7.107969151670951e-05, "loss": 1.679, "step": 225 }, { "epoch": 0.58, "eval_loss": 1.6726195812225342, "eval_runtime": 37.9769, "eval_samples_per_second": 2.212, "eval_steps_per_second": 0.29, "step": 225 }, { "epoch": 0.59, "learning_rate": 7.043701799485862e-05, "loss": 1.7202, "step": 230 }, { "epoch": 0.59, "eval_loss": 1.671908974647522, "eval_runtime": 37.8215, "eval_samples_per_second": 2.221, "eval_steps_per_second": 0.291, "step": 230 }, { "epoch": 0.6, "learning_rate": 6.979434447300771e-05, "loss": 1.6805, "step": 235 }, { "epoch": 0.6, "eval_loss": 1.6715577840805054, "eval_runtime": 37.9343, "eval_samples_per_second": 2.214, "eval_steps_per_second": 0.29, "step": 235 }, { "epoch": 0.62, "learning_rate": 6.91516709511568e-05, "loss": 1.6331, "step": 240 }, { "epoch": 0.62, "eval_loss": 1.6715376377105713, "eval_runtime": 37.9904, "eval_samples_per_second": 2.211, "eval_steps_per_second": 0.29, "step": 240 }, { "epoch": 0.63, "learning_rate": 6.850899742930593e-05, "loss": 1.6761, "step": 245 }, { "epoch": 0.63, "eval_loss": 1.671446681022644, "eval_runtime": 38.0357, "eval_samples_per_second": 2.208, "eval_steps_per_second": 0.289, "step": 245 }, { "epoch": 0.64, "learning_rate": 6.786632390745502e-05, "loss": 1.6994, "step": 250 }, { "epoch": 0.64, "eval_loss": 1.6716604232788086, "eval_runtime": 37.9609, "eval_samples_per_second": 2.213, "eval_steps_per_second": 0.29, "step": 250 }, { "epoch": 0.66, "learning_rate": 6.722365038560411e-05, "loss": 1.6305, "step": 255 }, { "epoch": 0.66, "eval_loss": 1.6711723804473877, "eval_runtime": 37.7893, "eval_samples_per_second": 2.223, "eval_steps_per_second": 0.291, "step": 255 }, { "epoch": 0.67, "learning_rate": 6.658097686375322e-05, "loss": 1.6612, "step": 260 }, { "epoch": 0.67, "eval_loss": 1.670398473739624, "eval_runtime": 37.7667, "eval_samples_per_second": 2.224, "eval_steps_per_second": 0.291, "step": 260 }, { "epoch": 0.68, "learning_rate": 6.593830334190231e-05, "loss": 1.6576, "step": 265 }, { "epoch": 0.68, "eval_loss": 1.6706459522247314, "eval_runtime": 37.8285, "eval_samples_per_second": 2.221, "eval_steps_per_second": 0.291, "step": 265 }, { "epoch": 0.69, "learning_rate": 6.529562982005142e-05, "loss": 1.6837, "step": 270 }, { "epoch": 0.69, "eval_loss": 1.6699285507202148, "eval_runtime": 37.7619, "eval_samples_per_second": 2.224, "eval_steps_per_second": 0.291, "step": 270 }, { "epoch": 0.71, "learning_rate": 6.465295629820052e-05, "loss": 1.6493, "step": 275 }, { "epoch": 0.71, "eval_loss": 1.6693135499954224, "eval_runtime": 37.7441, "eval_samples_per_second": 2.226, "eval_steps_per_second": 0.291, "step": 275 }, { "epoch": 0.72, "learning_rate": 6.401028277634962e-05, "loss": 1.6427, "step": 280 }, { "epoch": 0.72, "eval_loss": 1.669467568397522, "eval_runtime": 37.8707, "eval_samples_per_second": 2.218, "eval_steps_per_second": 0.29, "step": 280 }, { "epoch": 0.73, "learning_rate": 6.336760925449872e-05, "loss": 1.6236, "step": 285 }, { "epoch": 0.73, "eval_loss": 1.6690431833267212, "eval_runtime": 37.8946, "eval_samples_per_second": 2.217, "eval_steps_per_second": 0.29, "step": 285 }, { "epoch": 0.75, "learning_rate": 6.272493573264781e-05, "loss": 1.6538, "step": 290 }, { "epoch": 0.75, "eval_loss": 1.6688117980957031, "eval_runtime": 37.9378, "eval_samples_per_second": 2.214, "eval_steps_per_second": 0.29, "step": 290 }, { "epoch": 0.76, "learning_rate": 6.208226221079692e-05, "loss": 1.6558, "step": 295 }, { "epoch": 0.76, "eval_loss": 1.668560266494751, "eval_runtime": 38.0448, "eval_samples_per_second": 2.208, "eval_steps_per_second": 0.289, "step": 295 }, { "epoch": 0.77, "learning_rate": 6.143958868894601e-05, "loss": 1.6401, "step": 300 }, { "epoch": 0.77, "eval_loss": 1.6680612564086914, "eval_runtime": 37.8571, "eval_samples_per_second": 2.219, "eval_steps_per_second": 0.291, "step": 300 }, { "epoch": 0.78, "learning_rate": 6.079691516709511e-05, "loss": 1.6468, "step": 305 }, { "epoch": 0.78, "eval_loss": 1.6675914525985718, "eval_runtime": 37.8491, "eval_samples_per_second": 2.219, "eval_steps_per_second": 0.291, "step": 305 }, { "epoch": 0.8, "learning_rate": 6.015424164524421e-05, "loss": 1.6579, "step": 310 }, { "epoch": 0.8, "eval_loss": 1.6671632528305054, "eval_runtime": 37.7658, "eval_samples_per_second": 2.224, "eval_steps_per_second": 0.291, "step": 310 }, { "epoch": 0.81, "learning_rate": 5.951156812339333e-05, "loss": 1.6339, "step": 315 }, { "epoch": 0.81, "eval_loss": 1.666803240776062, "eval_runtime": 37.7896, "eval_samples_per_second": 2.223, "eval_steps_per_second": 0.291, "step": 315 }, { "epoch": 0.82, "learning_rate": 5.886889460154242e-05, "loss": 1.6636, "step": 320 }, { "epoch": 0.82, "eval_loss": 1.6664105653762817, "eval_runtime": 38.0586, "eval_samples_per_second": 2.207, "eval_steps_per_second": 0.289, "step": 320 }, { "epoch": 0.84, "learning_rate": 5.822622107969152e-05, "loss": 1.641, "step": 325 }, { "epoch": 0.84, "eval_loss": 1.6665875911712646, "eval_runtime": 37.9046, "eval_samples_per_second": 2.216, "eval_steps_per_second": 0.29, "step": 325 }, { "epoch": 0.85, "learning_rate": 5.758354755784062e-05, "loss": 1.6616, "step": 330 }, { "epoch": 0.85, "eval_loss": 1.6664165258407593, "eval_runtime": 37.8575, "eval_samples_per_second": 2.219, "eval_steps_per_second": 0.291, "step": 330 }, { "epoch": 0.86, "learning_rate": 5.694087403598972e-05, "loss": 1.6597, "step": 335 }, { "epoch": 0.86, "eval_loss": 1.666045904159546, "eval_runtime": 37.9711, "eval_samples_per_second": 2.212, "eval_steps_per_second": 0.29, "step": 335 }, { "epoch": 0.87, "learning_rate": 5.6298200514138824e-05, "loss": 1.67, "step": 340 }, { "epoch": 0.87, "eval_loss": 1.666337490081787, "eval_runtime": 37.8821, "eval_samples_per_second": 2.217, "eval_steps_per_second": 0.29, "step": 340 }, { "epoch": 0.89, "learning_rate": 5.5655526992287924e-05, "loss": 1.6344, "step": 345 }, { "epoch": 0.89, "eval_loss": 1.6659029722213745, "eval_runtime": 37.9188, "eval_samples_per_second": 2.215, "eval_steps_per_second": 0.29, "step": 345 }, { "epoch": 0.9, "learning_rate": 5.501285347043702e-05, "loss": 1.6623, "step": 350 }, { "epoch": 0.9, "eval_loss": 1.6656525135040283, "eval_runtime": 37.9951, "eval_samples_per_second": 2.211, "eval_steps_per_second": 0.29, "step": 350 }, { "epoch": 0.91, "learning_rate": 5.437017994858612e-05, "loss": 1.6623, "step": 355 }, { "epoch": 0.91, "eval_loss": 1.6654421091079712, "eval_runtime": 37.8803, "eval_samples_per_second": 2.218, "eval_steps_per_second": 0.29, "step": 355 }, { "epoch": 0.93, "learning_rate": 5.372750642673522e-05, "loss": 1.6741, "step": 360 }, { "epoch": 0.93, "eval_loss": 1.665541648864746, "eval_runtime": 37.7151, "eval_samples_per_second": 2.227, "eval_steps_per_second": 0.292, "step": 360 }, { "epoch": 0.94, "learning_rate": 5.308483290488432e-05, "loss": 1.6973, "step": 365 }, { "epoch": 0.94, "eval_loss": 1.6651471853256226, "eval_runtime": 37.9144, "eval_samples_per_second": 2.216, "eval_steps_per_second": 0.29, "step": 365 }, { "epoch": 0.95, "learning_rate": 5.244215938303342e-05, "loss": 1.6829, "step": 370 }, { "epoch": 0.95, "eval_loss": 1.6651064157485962, "eval_runtime": 37.7443, "eval_samples_per_second": 2.226, "eval_steps_per_second": 0.291, "step": 370 }, { "epoch": 0.96, "learning_rate": 5.1799485861182514e-05, "loss": 1.6923, "step": 375 }, { "epoch": 0.96, "eval_loss": 1.6646034717559814, "eval_runtime": 37.8783, "eval_samples_per_second": 2.218, "eval_steps_per_second": 0.29, "step": 375 }, { "epoch": 0.98, "learning_rate": 5.1156812339331615e-05, "loss": 1.6725, "step": 380 }, { "epoch": 0.98, "eval_loss": 1.6639235019683838, "eval_runtime": 37.7559, "eval_samples_per_second": 2.225, "eval_steps_per_second": 0.291, "step": 380 }, { "epoch": 0.99, "learning_rate": 5.051413881748073e-05, "loss": 1.6216, "step": 385 }, { "epoch": 0.99, "eval_loss": 1.6643506288528442, "eval_runtime": 37.9895, "eval_samples_per_second": 2.211, "eval_steps_per_second": 0.29, "step": 385 }, { "epoch": 1.0, "learning_rate": 4.987146529562982e-05, "loss": 1.6518, "step": 390 }, { "epoch": 1.0, "eval_loss": 1.6643046140670776, "eval_runtime": 37.9289, "eval_samples_per_second": 2.215, "eval_steps_per_second": 0.29, "step": 390 }, { "epoch": 1.02, "learning_rate": 4.922879177377892e-05, "loss": 1.658, "step": 395 }, { "epoch": 1.02, "eval_loss": 1.6641957759857178, "eval_runtime": 37.8638, "eval_samples_per_second": 2.218, "eval_steps_per_second": 0.291, "step": 395 }, { "epoch": 1.03, "learning_rate": 4.8586118251928024e-05, "loss": 1.6767, "step": 400 }, { "epoch": 1.03, "eval_loss": 1.663757085800171, "eval_runtime": 37.7728, "eval_samples_per_second": 2.224, "eval_steps_per_second": 0.291, "step": 400 }, { "epoch": 1.04, "learning_rate": 4.7943444730077124e-05, "loss": 1.6264, "step": 405 }, { "epoch": 1.04, "eval_loss": 1.6635041236877441, "eval_runtime": 37.7702, "eval_samples_per_second": 2.224, "eval_steps_per_second": 0.291, "step": 405 }, { "epoch": 1.05, "learning_rate": 4.7300771208226225e-05, "loss": 1.6527, "step": 410 }, { "epoch": 1.05, "eval_loss": 1.6632280349731445, "eval_runtime": 37.8453, "eval_samples_per_second": 2.22, "eval_steps_per_second": 0.291, "step": 410 }, { "epoch": 1.07, "learning_rate": 4.6658097686375325e-05, "loss": 1.6157, "step": 415 }, { "epoch": 1.07, "eval_loss": 1.6636598110198975, "eval_runtime": 37.812, "eval_samples_per_second": 2.222, "eval_steps_per_second": 0.291, "step": 415 }, { "epoch": 1.08, "learning_rate": 4.6015424164524426e-05, "loss": 1.5966, "step": 420 }, { "epoch": 1.08, "eval_loss": 1.663393497467041, "eval_runtime": 37.7743, "eval_samples_per_second": 2.224, "eval_steps_per_second": 0.291, "step": 420 }, { "epoch": 1.09, "learning_rate": 4.537275064267352e-05, "loss": 1.6705, "step": 425 }, { "epoch": 1.09, "eval_loss": 1.6631269454956055, "eval_runtime": 38.0953, "eval_samples_per_second": 2.205, "eval_steps_per_second": 0.289, "step": 425 }, { "epoch": 1.11, "learning_rate": 4.473007712082262e-05, "loss": 1.6691, "step": 430 }, { "epoch": 1.11, "eval_loss": 1.6633110046386719, "eval_runtime": 37.8964, "eval_samples_per_second": 2.217, "eval_steps_per_second": 0.29, "step": 430 }, { "epoch": 1.12, "learning_rate": 4.408740359897173e-05, "loss": 1.6332, "step": 435 }, { "epoch": 1.12, "eval_loss": 1.6628649234771729, "eval_runtime": 38.0621, "eval_samples_per_second": 2.207, "eval_steps_per_second": 0.289, "step": 435 }, { "epoch": 1.13, "learning_rate": 4.344473007712083e-05, "loss": 1.5916, "step": 440 }, { "epoch": 1.13, "eval_loss": 1.662834882736206, "eval_runtime": 37.9722, "eval_samples_per_second": 2.212, "eval_steps_per_second": 0.29, "step": 440 }, { "epoch": 1.14, "learning_rate": 4.280205655526993e-05, "loss": 1.6543, "step": 445 }, { "epoch": 1.14, "eval_loss": 1.6630265712738037, "eval_runtime": 37.9699, "eval_samples_per_second": 2.212, "eval_steps_per_second": 0.29, "step": 445 }, { "epoch": 1.16, "learning_rate": 4.215938303341902e-05, "loss": 1.6353, "step": 450 }, { "epoch": 1.16, "eval_loss": 1.6625033617019653, "eval_runtime": 37.7505, "eval_samples_per_second": 2.225, "eval_steps_per_second": 0.291, "step": 450 }, { "epoch": 1.17, "learning_rate": 4.151670951156812e-05, "loss": 1.6441, "step": 455 }, { "epoch": 1.17, "eval_loss": 1.662785530090332, "eval_runtime": 37.75, "eval_samples_per_second": 2.225, "eval_steps_per_second": 0.291, "step": 455 }, { "epoch": 1.18, "learning_rate": 4.0874035989717224e-05, "loss": 1.6631, "step": 460 }, { "epoch": 1.18, "eval_loss": 1.6627779006958008, "eval_runtime": 37.864, "eval_samples_per_second": 2.218, "eval_steps_per_second": 0.291, "step": 460 }, { "epoch": 1.2, "learning_rate": 4.0231362467866324e-05, "loss": 1.6327, "step": 465 }, { "epoch": 1.2, "eval_loss": 1.662458896636963, "eval_runtime": 37.7496, "eval_samples_per_second": 2.225, "eval_steps_per_second": 0.291, "step": 465 }, { "epoch": 1.21, "learning_rate": 3.958868894601543e-05, "loss": 1.6238, "step": 470 }, { "epoch": 1.21, "eval_loss": 1.6621876955032349, "eval_runtime": 37.8943, "eval_samples_per_second": 2.217, "eval_steps_per_second": 0.29, "step": 470 }, { "epoch": 1.22, "learning_rate": 3.8946015424164526e-05, "loss": 1.6231, "step": 475 }, { "epoch": 1.22, "eval_loss": 1.662013292312622, "eval_runtime": 37.9364, "eval_samples_per_second": 2.214, "eval_steps_per_second": 0.29, "step": 475 }, { "epoch": 1.23, "learning_rate": 3.8303341902313626e-05, "loss": 1.6381, "step": 480 }, { "epoch": 1.23, "eval_loss": 1.6615785360336304, "eval_runtime": 37.91, "eval_samples_per_second": 2.216, "eval_steps_per_second": 0.29, "step": 480 }, { "epoch": 1.25, "learning_rate": 3.766066838046273e-05, "loss": 1.6419, "step": 485 }, { "epoch": 1.25, "eval_loss": 1.662042498588562, "eval_runtime": 37.7834, "eval_samples_per_second": 2.223, "eval_steps_per_second": 0.291, "step": 485 }, { "epoch": 1.26, "learning_rate": 3.701799485861183e-05, "loss": 1.6319, "step": 490 }, { "epoch": 1.26, "eval_loss": 1.6619195938110352, "eval_runtime": 37.7385, "eval_samples_per_second": 2.226, "eval_steps_per_second": 0.291, "step": 490 }, { "epoch": 1.27, "learning_rate": 3.637532133676093e-05, "loss": 1.609, "step": 495 }, { "epoch": 1.27, "eval_loss": 1.6615225076675415, "eval_runtime": 37.8056, "eval_samples_per_second": 2.222, "eval_steps_per_second": 0.291, "step": 495 }, { "epoch": 1.29, "learning_rate": 3.573264781491003e-05, "loss": 1.6435, "step": 500 }, { "epoch": 1.29, "eval_loss": 1.6610548496246338, "eval_runtime": 37.9353, "eval_samples_per_second": 2.214, "eval_steps_per_second": 0.29, "step": 500 }, { "epoch": 1.3, "learning_rate": 3.508997429305913e-05, "loss": 1.6381, "step": 505 }, { "epoch": 1.3, "eval_loss": 1.661049246788025, "eval_runtime": 37.9669, "eval_samples_per_second": 2.212, "eval_steps_per_second": 0.29, "step": 505 }, { "epoch": 1.31, "learning_rate": 3.444730077120823e-05, "loss": 1.6506, "step": 510 }, { "epoch": 1.31, "eval_loss": 1.6610089540481567, "eval_runtime": 37.9127, "eval_samples_per_second": 2.216, "eval_steps_per_second": 0.29, "step": 510 }, { "epoch": 1.32, "learning_rate": 3.380462724935733e-05, "loss": 1.6376, "step": 515 }, { "epoch": 1.32, "eval_loss": 1.6608952283859253, "eval_runtime": 37.9454, "eval_samples_per_second": 2.214, "eval_steps_per_second": 0.29, "step": 515 }, { "epoch": 1.34, "learning_rate": 3.316195372750643e-05, "loss": 1.6354, "step": 520 }, { "epoch": 1.34, "eval_loss": 1.6610838174819946, "eval_runtime": 37.7265, "eval_samples_per_second": 2.227, "eval_steps_per_second": 0.292, "step": 520 }, { "epoch": 1.35, "learning_rate": 3.251928020565553e-05, "loss": 1.6201, "step": 525 }, { "epoch": 1.35, "eval_loss": 1.6610314846038818, "eval_runtime": 37.8464, "eval_samples_per_second": 2.219, "eval_steps_per_second": 0.291, "step": 525 }, { "epoch": 1.36, "learning_rate": 3.1876606683804625e-05, "loss": 1.6461, "step": 530 }, { "epoch": 1.36, "eval_loss": 1.6605802774429321, "eval_runtime": 37.9503, "eval_samples_per_second": 2.213, "eval_steps_per_second": 0.29, "step": 530 }, { "epoch": 1.38, "learning_rate": 3.1233933161953726e-05, "loss": 1.6818, "step": 535 }, { "epoch": 1.38, "eval_loss": 1.6607571840286255, "eval_runtime": 37.9086, "eval_samples_per_second": 2.216, "eval_steps_per_second": 0.29, "step": 535 }, { "epoch": 1.39, "learning_rate": 3.059125964010283e-05, "loss": 1.6117, "step": 540 }, { "epoch": 1.39, "eval_loss": 1.6605336666107178, "eval_runtime": 37.947, "eval_samples_per_second": 2.214, "eval_steps_per_second": 0.29, "step": 540 }, { "epoch": 1.4, "learning_rate": 2.994858611825193e-05, "loss": 1.6252, "step": 545 }, { "epoch": 1.4, "eval_loss": 1.6603213548660278, "eval_runtime": 37.9826, "eval_samples_per_second": 2.212, "eval_steps_per_second": 0.29, "step": 545 }, { "epoch": 1.41, "learning_rate": 2.930591259640103e-05, "loss": 1.6486, "step": 550 }, { "epoch": 1.41, "eval_loss": 1.6600297689437866, "eval_runtime": 37.9635, "eval_samples_per_second": 2.213, "eval_steps_per_second": 0.29, "step": 550 }, { "epoch": 1.43, "learning_rate": 2.866323907455013e-05, "loss": 1.6861, "step": 555 }, { "epoch": 1.43, "eval_loss": 1.6602742671966553, "eval_runtime": 37.8722, "eval_samples_per_second": 2.218, "eval_steps_per_second": 0.29, "step": 555 }, { "epoch": 1.44, "learning_rate": 2.802056555269923e-05, "loss": 1.6624, "step": 560 }, { "epoch": 1.44, "eval_loss": 1.6599992513656616, "eval_runtime": 37.9306, "eval_samples_per_second": 2.215, "eval_steps_per_second": 0.29, "step": 560 }, { "epoch": 1.45, "learning_rate": 2.737789203084833e-05, "loss": 1.6323, "step": 565 }, { "epoch": 1.45, "eval_loss": 1.6600306034088135, "eval_runtime": 37.9548, "eval_samples_per_second": 2.213, "eval_steps_per_second": 0.29, "step": 565 }, { "epoch": 1.47, "learning_rate": 2.673521850899743e-05, "loss": 1.6707, "step": 570 }, { "epoch": 1.47, "eval_loss": 1.6601014137268066, "eval_runtime": 38.0349, "eval_samples_per_second": 2.208, "eval_steps_per_second": 0.289, "step": 570 }, { "epoch": 1.48, "learning_rate": 2.6092544987146534e-05, "loss": 1.6478, "step": 575 }, { "epoch": 1.48, "eval_loss": 1.6597967147827148, "eval_runtime": 37.9413, "eval_samples_per_second": 2.214, "eval_steps_per_second": 0.29, "step": 575 }, { "epoch": 1.49, "learning_rate": 2.5449871465295634e-05, "loss": 1.6715, "step": 580 }, { "epoch": 1.49, "eval_loss": 1.6598337888717651, "eval_runtime": 37.8386, "eval_samples_per_second": 2.22, "eval_steps_per_second": 0.291, "step": 580 }, { "epoch": 1.5, "learning_rate": 2.480719794344473e-05, "loss": 1.6626, "step": 585 }, { "epoch": 1.5, "eval_loss": 1.6600011587142944, "eval_runtime": 37.7901, "eval_samples_per_second": 2.223, "eval_steps_per_second": 0.291, "step": 585 }, { "epoch": 1.52, "learning_rate": 2.4164524421593832e-05, "loss": 1.651, "step": 590 }, { "epoch": 1.52, "eval_loss": 1.65969717502594, "eval_runtime": 37.7643, "eval_samples_per_second": 2.224, "eval_steps_per_second": 0.291, "step": 590 }, { "epoch": 1.53, "learning_rate": 2.3521850899742933e-05, "loss": 1.6346, "step": 595 }, { "epoch": 1.53, "eval_loss": 1.6598676443099976, "eval_runtime": 37.9026, "eval_samples_per_second": 2.216, "eval_steps_per_second": 0.29, "step": 595 }, { "epoch": 1.54, "learning_rate": 2.2879177377892033e-05, "loss": 1.6533, "step": 600 }, { "epoch": 1.54, "eval_loss": 1.659874677658081, "eval_runtime": 37.9871, "eval_samples_per_second": 2.211, "eval_steps_per_second": 0.29, "step": 600 }, { "epoch": 1.56, "learning_rate": 2.2236503856041134e-05, "loss": 1.6913, "step": 605 }, { "epoch": 1.56, "eval_loss": 1.659849762916565, "eval_runtime": 37.8712, "eval_samples_per_second": 2.218, "eval_steps_per_second": 0.29, "step": 605 }, { "epoch": 1.57, "learning_rate": 2.159383033419023e-05, "loss": 1.6709, "step": 610 }, { "epoch": 1.57, "eval_loss": 1.6598856449127197, "eval_runtime": 37.7043, "eval_samples_per_second": 2.228, "eval_steps_per_second": 0.292, "step": 610 }, { "epoch": 1.58, "learning_rate": 2.095115681233933e-05, "loss": 1.598, "step": 615 }, { "epoch": 1.58, "eval_loss": 1.6597983837127686, "eval_runtime": 37.722, "eval_samples_per_second": 2.227, "eval_steps_per_second": 0.292, "step": 615 }, { "epoch": 1.59, "learning_rate": 2.0308483290488432e-05, "loss": 1.6434, "step": 620 }, { "epoch": 1.59, "eval_loss": 1.6595433950424194, "eval_runtime": 37.6125, "eval_samples_per_second": 2.233, "eval_steps_per_second": 0.292, "step": 620 }, { "epoch": 1.61, "learning_rate": 1.9665809768637533e-05, "loss": 1.6086, "step": 625 }, { "epoch": 1.61, "eval_loss": 1.6594945192337036, "eval_runtime": 38.0046, "eval_samples_per_second": 2.21, "eval_steps_per_second": 0.289, "step": 625 }, { "epoch": 1.62, "learning_rate": 1.9023136246786633e-05, "loss": 1.5962, "step": 630 }, { "epoch": 1.62, "eval_loss": 1.6595605611801147, "eval_runtime": 37.8518, "eval_samples_per_second": 2.219, "eval_steps_per_second": 0.291, "step": 630 }, { "epoch": 1.63, "learning_rate": 1.8380462724935734e-05, "loss": 1.6695, "step": 635 }, { "epoch": 1.63, "eval_loss": 1.6592403650283813, "eval_runtime": 37.7285, "eval_samples_per_second": 2.226, "eval_steps_per_second": 0.292, "step": 635 }, { "epoch": 1.65, "learning_rate": 1.7737789203084834e-05, "loss": 1.6526, "step": 640 }, { "epoch": 1.65, "eval_loss": 1.6591880321502686, "eval_runtime": 37.758, "eval_samples_per_second": 2.225, "eval_steps_per_second": 0.291, "step": 640 }, { "epoch": 1.66, "learning_rate": 1.7095115681233935e-05, "loss": 1.672, "step": 645 }, { "epoch": 1.66, "eval_loss": 1.6588138341903687, "eval_runtime": 37.92, "eval_samples_per_second": 2.215, "eval_steps_per_second": 0.29, "step": 645 }, { "epoch": 1.67, "learning_rate": 1.6452442159383032e-05, "loss": 1.6347, "step": 650 }, { "epoch": 1.67, "eval_loss": 1.6589809656143188, "eval_runtime": 37.8021, "eval_samples_per_second": 2.222, "eval_steps_per_second": 0.291, "step": 650 }, { "epoch": 1.68, "learning_rate": 1.5809768637532136e-05, "loss": 1.6227, "step": 655 }, { "epoch": 1.68, "eval_loss": 1.6586792469024658, "eval_runtime": 37.9128, "eval_samples_per_second": 2.216, "eval_steps_per_second": 0.29, "step": 655 }, { "epoch": 1.7, "learning_rate": 1.5167095115681235e-05, "loss": 1.6308, "step": 660 }, { "epoch": 1.7, "eval_loss": 1.6585420370101929, "eval_runtime": 37.9881, "eval_samples_per_second": 2.211, "eval_steps_per_second": 0.29, "step": 660 }, { "epoch": 1.71, "learning_rate": 1.4524421593830334e-05, "loss": 1.6495, "step": 665 }, { "epoch": 1.71, "eval_loss": 1.6587530374526978, "eval_runtime": 37.9148, "eval_samples_per_second": 2.215, "eval_steps_per_second": 0.29, "step": 665 }, { "epoch": 1.72, "learning_rate": 1.3881748071979436e-05, "loss": 1.6712, "step": 670 }, { "epoch": 1.72, "eval_loss": 1.658648133277893, "eval_runtime": 37.9313, "eval_samples_per_second": 2.215, "eval_steps_per_second": 0.29, "step": 670 }, { "epoch": 1.74, "learning_rate": 1.3239074550128535e-05, "loss": 1.636, "step": 675 }, { "epoch": 1.74, "eval_loss": 1.6584407091140747, "eval_runtime": 37.7963, "eval_samples_per_second": 2.222, "eval_steps_per_second": 0.291, "step": 675 }, { "epoch": 1.75, "learning_rate": 1.2596401028277636e-05, "loss": 1.6453, "step": 680 }, { "epoch": 1.75, "eval_loss": 1.658594012260437, "eval_runtime": 37.7226, "eval_samples_per_second": 2.227, "eval_steps_per_second": 0.292, "step": 680 }, { "epoch": 1.76, "learning_rate": 1.1953727506426736e-05, "loss": 1.6509, "step": 685 }, { "epoch": 1.76, "eval_loss": 1.658467411994934, "eval_runtime": 37.8179, "eval_samples_per_second": 2.221, "eval_steps_per_second": 0.291, "step": 685 }, { "epoch": 1.77, "learning_rate": 1.1311053984575835e-05, "loss": 1.6489, "step": 690 }, { "epoch": 1.77, "eval_loss": 1.6585352420806885, "eval_runtime": 37.6768, "eval_samples_per_second": 2.229, "eval_steps_per_second": 0.292, "step": 690 }, { "epoch": 1.79, "learning_rate": 1.0668380462724936e-05, "loss": 1.6424, "step": 695 }, { "epoch": 1.79, "eval_loss": 1.6582835912704468, "eval_runtime": 38.0083, "eval_samples_per_second": 2.21, "eval_steps_per_second": 0.289, "step": 695 }, { "epoch": 1.8, "learning_rate": 1.0025706940874038e-05, "loss": 1.6131, "step": 700 }, { "epoch": 1.8, "eval_loss": 1.658105731010437, "eval_runtime": 37.8711, "eval_samples_per_second": 2.218, "eval_steps_per_second": 0.29, "step": 700 }, { "epoch": 1.81, "learning_rate": 9.383033419023137e-06, "loss": 1.6225, "step": 705 }, { "epoch": 1.81, "eval_loss": 1.658282995223999, "eval_runtime": 37.8125, "eval_samples_per_second": 2.221, "eval_steps_per_second": 0.291, "step": 705 }, { "epoch": 1.83, "learning_rate": 8.740359897172237e-06, "loss": 1.6928, "step": 710 }, { "epoch": 1.83, "eval_loss": 1.6581056118011475, "eval_runtime": 37.863, "eval_samples_per_second": 2.219, "eval_steps_per_second": 0.291, "step": 710 }, { "epoch": 1.84, "learning_rate": 8.097686375321336e-06, "loss": 1.6227, "step": 715 }, { "epoch": 1.84, "eval_loss": 1.6581188440322876, "eval_runtime": 37.8646, "eval_samples_per_second": 2.218, "eval_steps_per_second": 0.291, "step": 715 }, { "epoch": 1.85, "learning_rate": 7.4550128534704376e-06, "loss": 1.6593, "step": 720 }, { "epoch": 1.85, "eval_loss": 1.6579999923706055, "eval_runtime": 37.876, "eval_samples_per_second": 2.218, "eval_steps_per_second": 0.29, "step": 720 }, { "epoch": 1.86, "learning_rate": 6.812339331619537e-06, "loss": 1.6642, "step": 725 }, { "epoch": 1.86, "eval_loss": 1.65813410282135, "eval_runtime": 37.7093, "eval_samples_per_second": 2.228, "eval_steps_per_second": 0.292, "step": 725 }, { "epoch": 1.88, "learning_rate": 6.169665809768638e-06, "loss": 1.6414, "step": 730 }, { "epoch": 1.88, "eval_loss": 1.6581989526748657, "eval_runtime": 37.7818, "eval_samples_per_second": 2.223, "eval_steps_per_second": 0.291, "step": 730 } ], "logging_steps": 5, "max_steps": 778, "num_train_epochs": 2, "save_steps": 10, "total_flos": 2.3714708506804224e+17, "trial_name": null, "trial_params": null }