| { |
| "best_metric": 1.657867670059204, |
| "best_model_checkpoint": "/scratch/kwamea/llama-output/checkpoint-750", |
| "epoch": 1.9280205655526992, |
| "eval_steps": 5, |
| "global_step": 750, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "learning_rate": 9.93573264781491e-05, |
| "loss": 1.9824, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.01, |
| "eval_loss": 1.954480528831482, |
| "eval_runtime": 38.0417, |
| "eval_samples_per_second": 2.208, |
| "eval_steps_per_second": 0.289, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 9.87146529562982e-05, |
| "loss": 1.9245, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_loss": 1.8942010402679443, |
| "eval_runtime": 37.8934, |
| "eval_samples_per_second": 2.217, |
| "eval_steps_per_second": 0.29, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 9.80719794344473e-05, |
| "loss": 1.8616, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.04, |
| "eval_loss": 1.8424055576324463, |
| "eval_runtime": 37.8727, |
| "eval_samples_per_second": 2.218, |
| "eval_steps_per_second": 0.29, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 9.742930591259641e-05, |
| "loss": 1.8267, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_loss": 1.8279685974121094, |
| "eval_runtime": 38.166, |
| "eval_samples_per_second": 2.201, |
| "eval_steps_per_second": 0.288, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 9.67866323907455e-05, |
| "loss": 1.835, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.06, |
| "eval_loss": 1.8134318590164185, |
| "eval_runtime": 38.0411, |
| "eval_samples_per_second": 2.208, |
| "eval_steps_per_second": 0.289, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 9.61439588688946e-05, |
| "loss": 1.8236, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.08, |
| "eval_loss": 1.8010995388031006, |
| "eval_runtime": 37.8753, |
| "eval_samples_per_second": 2.218, |
| "eval_steps_per_second": 0.29, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 9.550128534704372e-05, |
| "loss": 1.7186, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.09, |
| "eval_loss": 1.7901933193206787, |
| "eval_runtime": 38.0374, |
| "eval_samples_per_second": 2.208, |
| "eval_steps_per_second": 0.289, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 9.485861182519281e-05, |
| "loss": 1.7959, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.1, |
| "eval_loss": 1.7807564735412598, |
| "eval_runtime": 37.8784, |
| "eval_samples_per_second": 2.218, |
| "eval_steps_per_second": 0.29, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 9.421593830334192e-05, |
| "loss": 1.715, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.12, |
| "eval_loss": 1.7709146738052368, |
| "eval_runtime": 37.8705, |
| "eval_samples_per_second": 2.218, |
| "eval_steps_per_second": 0.29, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 9.357326478149101e-05, |
| "loss": 1.7581, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.13, |
| "eval_loss": 1.761339783668518, |
| "eval_runtime": 37.661, |
| "eval_samples_per_second": 2.23, |
| "eval_steps_per_second": 0.292, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 9.29305912596401e-05, |
| "loss": 1.7305, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.14, |
| "eval_loss": 1.7389689683914185, |
| "eval_runtime": 38.0055, |
| "eval_samples_per_second": 2.21, |
| "eval_steps_per_second": 0.289, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 9.228791773778921e-05, |
| "loss": 1.7086, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.15, |
| "eval_loss": 1.713218092918396, |
| "eval_runtime": 37.737, |
| "eval_samples_per_second": 2.226, |
| "eval_steps_per_second": 0.291, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 9.16452442159383e-05, |
| "loss": 1.7057, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.17, |
| "eval_loss": 1.7108122110366821, |
| "eval_runtime": 38.0037, |
| "eval_samples_per_second": 2.21, |
| "eval_steps_per_second": 0.289, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 9.100257069408741e-05, |
| "loss": 1.7243, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.18, |
| "eval_loss": 1.7041622400283813, |
| "eval_runtime": 38.1402, |
| "eval_samples_per_second": 2.202, |
| "eval_steps_per_second": 0.288, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 9.03598971722365e-05, |
| "loss": 1.7038, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.19, |
| "eval_loss": 1.7010408639907837, |
| "eval_runtime": 37.7193, |
| "eval_samples_per_second": 2.227, |
| "eval_steps_per_second": 0.292, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 8.97172236503856e-05, |
| "loss": 1.6939, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.21, |
| "eval_loss": 1.6987501382827759, |
| "eval_runtime": 38.0624, |
| "eval_samples_per_second": 2.207, |
| "eval_steps_per_second": 0.289, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 8.907455012853471e-05, |
| "loss": 1.645, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.22, |
| "eval_loss": 1.6968218088150024, |
| "eval_runtime": 37.9205, |
| "eval_samples_per_second": 2.215, |
| "eval_steps_per_second": 0.29, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 8.84318766066838e-05, |
| "loss": 1.7016, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.23, |
| "eval_loss": 1.6952500343322754, |
| "eval_runtime": 37.9617, |
| "eval_samples_per_second": 2.213, |
| "eval_steps_per_second": 0.29, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 8.778920308483291e-05, |
| "loss": 1.6869, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.24, |
| "eval_loss": 1.6932854652404785, |
| "eval_runtime": 37.8781, |
| "eval_samples_per_second": 2.218, |
| "eval_steps_per_second": 0.29, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 8.7146529562982e-05, |
| "loss": 1.6728, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.26, |
| "eval_loss": 1.6920738220214844, |
| "eval_runtime": 38.1996, |
| "eval_samples_per_second": 2.199, |
| "eval_steps_per_second": 0.288, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 8.650385604113111e-05, |
| "loss": 1.6674, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.27, |
| "eval_loss": 1.6904469728469849, |
| "eval_runtime": 38.1252, |
| "eval_samples_per_second": 2.203, |
| "eval_steps_per_second": 0.289, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 8.586118251928022e-05, |
| "loss": 1.7013, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.28, |
| "eval_loss": 1.6895124912261963, |
| "eval_runtime": 38.0968, |
| "eval_samples_per_second": 2.205, |
| "eval_steps_per_second": 0.289, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 8.521850899742931e-05, |
| "loss": 1.7148, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.3, |
| "eval_loss": 1.6886374950408936, |
| "eval_runtime": 37.8799, |
| "eval_samples_per_second": 2.218, |
| "eval_steps_per_second": 0.29, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 8.457583547557842e-05, |
| "loss": 1.7166, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.31, |
| "eval_loss": 1.6868164539337158, |
| "eval_runtime": 37.8861, |
| "eval_samples_per_second": 2.217, |
| "eval_steps_per_second": 0.29, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 8.393316195372751e-05, |
| "loss": 1.7012, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.32, |
| "eval_loss": 1.6858367919921875, |
| "eval_runtime": 38.1136, |
| "eval_samples_per_second": 2.204, |
| "eval_steps_per_second": 0.289, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 8.32904884318766e-05, |
| "loss": 1.6827, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.33, |
| "eval_loss": 1.6849361658096313, |
| "eval_runtime": 37.9883, |
| "eval_samples_per_second": 2.211, |
| "eval_steps_per_second": 0.29, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 8.264781491002571e-05, |
| "loss": 1.7157, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.35, |
| "eval_loss": 1.684756875038147, |
| "eval_runtime": 37.8726, |
| "eval_samples_per_second": 2.218, |
| "eval_steps_per_second": 0.29, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 8.200514138817481e-05, |
| "loss": 1.6668, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.36, |
| "eval_loss": 1.6836236715316772, |
| "eval_runtime": 37.9586, |
| "eval_samples_per_second": 2.213, |
| "eval_steps_per_second": 0.29, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 8.136246786632391e-05, |
| "loss": 1.6588, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.37, |
| "eval_loss": 1.6828166246414185, |
| "eval_runtime": 38.0596, |
| "eval_samples_per_second": 2.207, |
| "eval_steps_per_second": 0.289, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 8.071979434447301e-05, |
| "loss": 1.7005, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.39, |
| "eval_loss": 1.681463360786438, |
| "eval_runtime": 38.0878, |
| "eval_samples_per_second": 2.205, |
| "eval_steps_per_second": 0.289, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 8.007712082262212e-05, |
| "loss": 1.6893, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.4, |
| "eval_loss": 1.6811552047729492, |
| "eval_runtime": 37.9265, |
| "eval_samples_per_second": 2.215, |
| "eval_steps_per_second": 0.29, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 7.943444730077121e-05, |
| "loss": 1.6682, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.41, |
| "eval_loss": 1.6801096200942993, |
| "eval_runtime": 38.0375, |
| "eval_samples_per_second": 2.208, |
| "eval_steps_per_second": 0.289, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 7.87917737789203e-05, |
| "loss": 1.6481, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.42, |
| "eval_loss": 1.6800144910812378, |
| "eval_runtime": 37.8916, |
| "eval_samples_per_second": 2.217, |
| "eval_steps_per_second": 0.29, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 7.814910025706941e-05, |
| "loss": 1.7042, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.44, |
| "eval_loss": 1.6784976720809937, |
| "eval_runtime": 37.8514, |
| "eval_samples_per_second": 2.219, |
| "eval_steps_per_second": 0.291, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 7.750642673521852e-05, |
| "loss": 1.6555, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.45, |
| "eval_loss": 1.6780468225479126, |
| "eval_runtime": 37.8893, |
| "eval_samples_per_second": 2.217, |
| "eval_steps_per_second": 0.29, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 7.686375321336761e-05, |
| "loss": 1.6717, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.46, |
| "eval_loss": 1.6775085926055908, |
| "eval_runtime": 37.9289, |
| "eval_samples_per_second": 2.215, |
| "eval_steps_per_second": 0.29, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 7.622107969151672e-05, |
| "loss": 1.6716, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.48, |
| "eval_loss": 1.6770914793014526, |
| "eval_runtime": 38.0111, |
| "eval_samples_per_second": 2.21, |
| "eval_steps_per_second": 0.289, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 7.557840616966581e-05, |
| "loss": 1.6618, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.49, |
| "eval_loss": 1.676563024520874, |
| "eval_runtime": 38.0703, |
| "eval_samples_per_second": 2.206, |
| "eval_steps_per_second": 0.289, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 7.493573264781492e-05, |
| "loss": 1.6411, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.5, |
| "eval_loss": 1.6757832765579224, |
| "eval_runtime": 37.827, |
| "eval_samples_per_second": 2.221, |
| "eval_steps_per_second": 0.291, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 7.429305912596401e-05, |
| "loss": 1.6637, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.51, |
| "eval_loss": 1.6751985549926758, |
| "eval_runtime": 37.836, |
| "eval_samples_per_second": 2.22, |
| "eval_steps_per_second": 0.291, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 7.365038560411311e-05, |
| "loss": 1.6142, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.53, |
| "eval_loss": 1.675147294998169, |
| "eval_runtime": 37.9175, |
| "eval_samples_per_second": 2.215, |
| "eval_steps_per_second": 0.29, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 7.300771208226222e-05, |
| "loss": 1.6741, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.54, |
| "eval_loss": 1.674437165260315, |
| "eval_runtime": 38.087, |
| "eval_samples_per_second": 2.205, |
| "eval_steps_per_second": 0.289, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 7.236503856041131e-05, |
| "loss": 1.6408, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.55, |
| "eval_loss": 1.6737719774246216, |
| "eval_runtime": 38.0075, |
| "eval_samples_per_second": 2.21, |
| "eval_steps_per_second": 0.289, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 7.172236503856042e-05, |
| "loss": 1.6733, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.57, |
| "eval_loss": 1.6732759475708008, |
| "eval_runtime": 38.1125, |
| "eval_samples_per_second": 2.204, |
| "eval_steps_per_second": 0.289, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 7.107969151670951e-05, |
| "loss": 1.679, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.58, |
| "eval_loss": 1.6726195812225342, |
| "eval_runtime": 37.9769, |
| "eval_samples_per_second": 2.212, |
| "eval_steps_per_second": 0.29, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.043701799485862e-05, |
| "loss": 1.7202, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.59, |
| "eval_loss": 1.671908974647522, |
| "eval_runtime": 37.8215, |
| "eval_samples_per_second": 2.221, |
| "eval_steps_per_second": 0.291, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 6.979434447300771e-05, |
| "loss": 1.6805, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.6, |
| "eval_loss": 1.6715577840805054, |
| "eval_runtime": 37.9343, |
| "eval_samples_per_second": 2.214, |
| "eval_steps_per_second": 0.29, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.91516709511568e-05, |
| "loss": 1.6331, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.62, |
| "eval_loss": 1.6715376377105713, |
| "eval_runtime": 37.9904, |
| "eval_samples_per_second": 2.211, |
| "eval_steps_per_second": 0.29, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.850899742930593e-05, |
| "loss": 1.6761, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.63, |
| "eval_loss": 1.671446681022644, |
| "eval_runtime": 38.0357, |
| "eval_samples_per_second": 2.208, |
| "eval_steps_per_second": 0.289, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 6.786632390745502e-05, |
| "loss": 1.6994, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.64, |
| "eval_loss": 1.6716604232788086, |
| "eval_runtime": 37.9609, |
| "eval_samples_per_second": 2.213, |
| "eval_steps_per_second": 0.29, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 6.722365038560411e-05, |
| "loss": 1.6305, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.66, |
| "eval_loss": 1.6711723804473877, |
| "eval_runtime": 37.7893, |
| "eval_samples_per_second": 2.223, |
| "eval_steps_per_second": 0.291, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 6.658097686375322e-05, |
| "loss": 1.6612, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.67, |
| "eval_loss": 1.670398473739624, |
| "eval_runtime": 37.7667, |
| "eval_samples_per_second": 2.224, |
| "eval_steps_per_second": 0.291, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 6.593830334190231e-05, |
| "loss": 1.6576, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.68, |
| "eval_loss": 1.6706459522247314, |
| "eval_runtime": 37.8285, |
| "eval_samples_per_second": 2.221, |
| "eval_steps_per_second": 0.291, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 6.529562982005142e-05, |
| "loss": 1.6837, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.69, |
| "eval_loss": 1.6699285507202148, |
| "eval_runtime": 37.7619, |
| "eval_samples_per_second": 2.224, |
| "eval_steps_per_second": 0.291, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 6.465295629820052e-05, |
| "loss": 1.6493, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.71, |
| "eval_loss": 1.6693135499954224, |
| "eval_runtime": 37.7441, |
| "eval_samples_per_second": 2.226, |
| "eval_steps_per_second": 0.291, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 6.401028277634962e-05, |
| "loss": 1.6427, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.72, |
| "eval_loss": 1.669467568397522, |
| "eval_runtime": 37.8707, |
| "eval_samples_per_second": 2.218, |
| "eval_steps_per_second": 0.29, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 6.336760925449872e-05, |
| "loss": 1.6236, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.73, |
| "eval_loss": 1.6690431833267212, |
| "eval_runtime": 37.8946, |
| "eval_samples_per_second": 2.217, |
| "eval_steps_per_second": 0.29, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 6.272493573264781e-05, |
| "loss": 1.6538, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.75, |
| "eval_loss": 1.6688117980957031, |
| "eval_runtime": 37.9378, |
| "eval_samples_per_second": 2.214, |
| "eval_steps_per_second": 0.29, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 6.208226221079692e-05, |
| "loss": 1.6558, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.76, |
| "eval_loss": 1.668560266494751, |
| "eval_runtime": 38.0448, |
| "eval_samples_per_second": 2.208, |
| "eval_steps_per_second": 0.289, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 6.143958868894601e-05, |
| "loss": 1.6401, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.77, |
| "eval_loss": 1.6680612564086914, |
| "eval_runtime": 37.8571, |
| "eval_samples_per_second": 2.219, |
| "eval_steps_per_second": 0.291, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 6.079691516709511e-05, |
| "loss": 1.6468, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.78, |
| "eval_loss": 1.6675914525985718, |
| "eval_runtime": 37.8491, |
| "eval_samples_per_second": 2.219, |
| "eval_steps_per_second": 0.291, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 6.015424164524421e-05, |
| "loss": 1.6579, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.8, |
| "eval_loss": 1.6671632528305054, |
| "eval_runtime": 37.7658, |
| "eval_samples_per_second": 2.224, |
| "eval_steps_per_second": 0.291, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 5.951156812339333e-05, |
| "loss": 1.6339, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.81, |
| "eval_loss": 1.666803240776062, |
| "eval_runtime": 37.7896, |
| "eval_samples_per_second": 2.223, |
| "eval_steps_per_second": 0.291, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 5.886889460154242e-05, |
| "loss": 1.6636, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.82, |
| "eval_loss": 1.6664105653762817, |
| "eval_runtime": 38.0586, |
| "eval_samples_per_second": 2.207, |
| "eval_steps_per_second": 0.289, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 5.822622107969152e-05, |
| "loss": 1.641, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.84, |
| "eval_loss": 1.6665875911712646, |
| "eval_runtime": 37.9046, |
| "eval_samples_per_second": 2.216, |
| "eval_steps_per_second": 0.29, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 5.758354755784062e-05, |
| "loss": 1.6616, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.85, |
| "eval_loss": 1.6664165258407593, |
| "eval_runtime": 37.8575, |
| "eval_samples_per_second": 2.219, |
| "eval_steps_per_second": 0.291, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 5.694087403598972e-05, |
| "loss": 1.6597, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.86, |
| "eval_loss": 1.666045904159546, |
| "eval_runtime": 37.9711, |
| "eval_samples_per_second": 2.212, |
| "eval_steps_per_second": 0.29, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 5.6298200514138824e-05, |
| "loss": 1.67, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.87, |
| "eval_loss": 1.666337490081787, |
| "eval_runtime": 37.8821, |
| "eval_samples_per_second": 2.217, |
| "eval_steps_per_second": 0.29, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 5.5655526992287924e-05, |
| "loss": 1.6344, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.89, |
| "eval_loss": 1.6659029722213745, |
| "eval_runtime": 37.9188, |
| "eval_samples_per_second": 2.215, |
| "eval_steps_per_second": 0.29, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 5.501285347043702e-05, |
| "loss": 1.6623, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.9, |
| "eval_loss": 1.6656525135040283, |
| "eval_runtime": 37.9951, |
| "eval_samples_per_second": 2.211, |
| "eval_steps_per_second": 0.29, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 5.437017994858612e-05, |
| "loss": 1.6623, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.91, |
| "eval_loss": 1.6654421091079712, |
| "eval_runtime": 37.8803, |
| "eval_samples_per_second": 2.218, |
| "eval_steps_per_second": 0.29, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 5.372750642673522e-05, |
| "loss": 1.6741, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.93, |
| "eval_loss": 1.665541648864746, |
| "eval_runtime": 37.7151, |
| "eval_samples_per_second": 2.227, |
| "eval_steps_per_second": 0.292, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 5.308483290488432e-05, |
| "loss": 1.6973, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.94, |
| "eval_loss": 1.6651471853256226, |
| "eval_runtime": 37.9144, |
| "eval_samples_per_second": 2.216, |
| "eval_steps_per_second": 0.29, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 5.244215938303342e-05, |
| "loss": 1.6829, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.95, |
| "eval_loss": 1.6651064157485962, |
| "eval_runtime": 37.7443, |
| "eval_samples_per_second": 2.226, |
| "eval_steps_per_second": 0.291, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 5.1799485861182514e-05, |
| "loss": 1.6923, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.96, |
| "eval_loss": 1.6646034717559814, |
| "eval_runtime": 37.8783, |
| "eval_samples_per_second": 2.218, |
| "eval_steps_per_second": 0.29, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 5.1156812339331615e-05, |
| "loss": 1.6725, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.98, |
| "eval_loss": 1.6639235019683838, |
| "eval_runtime": 37.7559, |
| "eval_samples_per_second": 2.225, |
| "eval_steps_per_second": 0.291, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 5.051413881748073e-05, |
| "loss": 1.6216, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.99, |
| "eval_loss": 1.6643506288528442, |
| "eval_runtime": 37.9895, |
| "eval_samples_per_second": 2.211, |
| "eval_steps_per_second": 0.29, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.987146529562982e-05, |
| "loss": 1.6518, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 1.6643046140670776, |
| "eval_runtime": 37.9289, |
| "eval_samples_per_second": 2.215, |
| "eval_steps_per_second": 0.29, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.922879177377892e-05, |
| "loss": 1.658, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.02, |
| "eval_loss": 1.6641957759857178, |
| "eval_runtime": 37.8638, |
| "eval_samples_per_second": 2.218, |
| "eval_steps_per_second": 0.291, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 4.8586118251928024e-05, |
| "loss": 1.6767, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.03, |
| "eval_loss": 1.663757085800171, |
| "eval_runtime": 37.7728, |
| "eval_samples_per_second": 2.224, |
| "eval_steps_per_second": 0.291, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 4.7943444730077124e-05, |
| "loss": 1.6264, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.04, |
| "eval_loss": 1.6635041236877441, |
| "eval_runtime": 37.7702, |
| "eval_samples_per_second": 2.224, |
| "eval_steps_per_second": 0.291, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 4.7300771208226225e-05, |
| "loss": 1.6527, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.05, |
| "eval_loss": 1.6632280349731445, |
| "eval_runtime": 37.8453, |
| "eval_samples_per_second": 2.22, |
| "eval_steps_per_second": 0.291, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 4.6658097686375325e-05, |
| "loss": 1.6157, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.07, |
| "eval_loss": 1.6636598110198975, |
| "eval_runtime": 37.812, |
| "eval_samples_per_second": 2.222, |
| "eval_steps_per_second": 0.291, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 4.6015424164524426e-05, |
| "loss": 1.5966, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.08, |
| "eval_loss": 1.663393497467041, |
| "eval_runtime": 37.7743, |
| "eval_samples_per_second": 2.224, |
| "eval_steps_per_second": 0.291, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 4.537275064267352e-05, |
| "loss": 1.6705, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.09, |
| "eval_loss": 1.6631269454956055, |
| "eval_runtime": 38.0953, |
| "eval_samples_per_second": 2.205, |
| "eval_steps_per_second": 0.289, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 4.473007712082262e-05, |
| "loss": 1.6691, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.11, |
| "eval_loss": 1.6633110046386719, |
| "eval_runtime": 37.8964, |
| "eval_samples_per_second": 2.217, |
| "eval_steps_per_second": 0.29, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 4.408740359897173e-05, |
| "loss": 1.6332, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.12, |
| "eval_loss": 1.6628649234771729, |
| "eval_runtime": 38.0621, |
| "eval_samples_per_second": 2.207, |
| "eval_steps_per_second": 0.289, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 4.344473007712083e-05, |
| "loss": 1.5916, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.13, |
| "eval_loss": 1.662834882736206, |
| "eval_runtime": 37.9722, |
| "eval_samples_per_second": 2.212, |
| "eval_steps_per_second": 0.29, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 4.280205655526993e-05, |
| "loss": 1.6543, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.14, |
| "eval_loss": 1.6630265712738037, |
| "eval_runtime": 37.9699, |
| "eval_samples_per_second": 2.212, |
| "eval_steps_per_second": 0.29, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 4.215938303341902e-05, |
| "loss": 1.6353, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.16, |
| "eval_loss": 1.6625033617019653, |
| "eval_runtime": 37.7505, |
| "eval_samples_per_second": 2.225, |
| "eval_steps_per_second": 0.291, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 4.151670951156812e-05, |
| "loss": 1.6441, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.17, |
| "eval_loss": 1.662785530090332, |
| "eval_runtime": 37.75, |
| "eval_samples_per_second": 2.225, |
| "eval_steps_per_second": 0.291, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 4.0874035989717224e-05, |
| "loss": 1.6631, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.18, |
| "eval_loss": 1.6627779006958008, |
| "eval_runtime": 37.864, |
| "eval_samples_per_second": 2.218, |
| "eval_steps_per_second": 0.291, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 4.0231362467866324e-05, |
| "loss": 1.6327, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.2, |
| "eval_loss": 1.662458896636963, |
| "eval_runtime": 37.7496, |
| "eval_samples_per_second": 2.225, |
| "eval_steps_per_second": 0.291, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 3.958868894601543e-05, |
| "loss": 1.6238, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.21, |
| "eval_loss": 1.6621876955032349, |
| "eval_runtime": 37.8943, |
| "eval_samples_per_second": 2.217, |
| "eval_steps_per_second": 0.29, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 3.8946015424164526e-05, |
| "loss": 1.6231, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.22, |
| "eval_loss": 1.662013292312622, |
| "eval_runtime": 37.9364, |
| "eval_samples_per_second": 2.214, |
| "eval_steps_per_second": 0.29, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 3.8303341902313626e-05, |
| "loss": 1.6381, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.23, |
| "eval_loss": 1.6615785360336304, |
| "eval_runtime": 37.91, |
| "eval_samples_per_second": 2.216, |
| "eval_steps_per_second": 0.29, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 3.766066838046273e-05, |
| "loss": 1.6419, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.25, |
| "eval_loss": 1.662042498588562, |
| "eval_runtime": 37.7834, |
| "eval_samples_per_second": 2.223, |
| "eval_steps_per_second": 0.291, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 3.701799485861183e-05, |
| "loss": 1.6319, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.26, |
| "eval_loss": 1.6619195938110352, |
| "eval_runtime": 37.7385, |
| "eval_samples_per_second": 2.226, |
| "eval_steps_per_second": 0.291, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 3.637532133676093e-05, |
| "loss": 1.609, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.27, |
| "eval_loss": 1.6615225076675415, |
| "eval_runtime": 37.8056, |
| "eval_samples_per_second": 2.222, |
| "eval_steps_per_second": 0.291, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 3.573264781491003e-05, |
| "loss": 1.6435, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.29, |
| "eval_loss": 1.6610548496246338, |
| "eval_runtime": 37.9353, |
| "eval_samples_per_second": 2.214, |
| "eval_steps_per_second": 0.29, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 3.508997429305913e-05, |
| "loss": 1.6381, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.3, |
| "eval_loss": 1.661049246788025, |
| "eval_runtime": 37.9669, |
| "eval_samples_per_second": 2.212, |
| "eval_steps_per_second": 0.29, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 3.444730077120823e-05, |
| "loss": 1.6506, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.31, |
| "eval_loss": 1.6610089540481567, |
| "eval_runtime": 37.9127, |
| "eval_samples_per_second": 2.216, |
| "eval_steps_per_second": 0.29, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 3.380462724935733e-05, |
| "loss": 1.6376, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.32, |
| "eval_loss": 1.6608952283859253, |
| "eval_runtime": 37.9454, |
| "eval_samples_per_second": 2.214, |
| "eval_steps_per_second": 0.29, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 3.316195372750643e-05, |
| "loss": 1.6354, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.34, |
| "eval_loss": 1.6610838174819946, |
| "eval_runtime": 37.7265, |
| "eval_samples_per_second": 2.227, |
| "eval_steps_per_second": 0.292, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 3.251928020565553e-05, |
| "loss": 1.6201, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.35, |
| "eval_loss": 1.6610314846038818, |
| "eval_runtime": 37.8464, |
| "eval_samples_per_second": 2.219, |
| "eval_steps_per_second": 0.291, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 3.1876606683804625e-05, |
| "loss": 1.6461, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.36, |
| "eval_loss": 1.6605802774429321, |
| "eval_runtime": 37.9503, |
| "eval_samples_per_second": 2.213, |
| "eval_steps_per_second": 0.29, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 3.1233933161953726e-05, |
| "loss": 1.6818, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.38, |
| "eval_loss": 1.6607571840286255, |
| "eval_runtime": 37.9086, |
| "eval_samples_per_second": 2.216, |
| "eval_steps_per_second": 0.29, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 3.059125964010283e-05, |
| "loss": 1.6117, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.39, |
| "eval_loss": 1.6605336666107178, |
| "eval_runtime": 37.947, |
| "eval_samples_per_second": 2.214, |
| "eval_steps_per_second": 0.29, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 2.994858611825193e-05, |
| "loss": 1.6252, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.4, |
| "eval_loss": 1.6603213548660278, |
| "eval_runtime": 37.9826, |
| "eval_samples_per_second": 2.212, |
| "eval_steps_per_second": 0.29, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 2.930591259640103e-05, |
| "loss": 1.6486, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.41, |
| "eval_loss": 1.6600297689437866, |
| "eval_runtime": 37.9635, |
| "eval_samples_per_second": 2.213, |
| "eval_steps_per_second": 0.29, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 2.866323907455013e-05, |
| "loss": 1.6861, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.43, |
| "eval_loss": 1.6602742671966553, |
| "eval_runtime": 37.8722, |
| "eval_samples_per_second": 2.218, |
| "eval_steps_per_second": 0.29, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 2.802056555269923e-05, |
| "loss": 1.6624, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.44, |
| "eval_loss": 1.6599992513656616, |
| "eval_runtime": 37.9306, |
| "eval_samples_per_second": 2.215, |
| "eval_steps_per_second": 0.29, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 2.737789203084833e-05, |
| "loss": 1.6323, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.45, |
| "eval_loss": 1.6600306034088135, |
| "eval_runtime": 37.9548, |
| "eval_samples_per_second": 2.213, |
| "eval_steps_per_second": 0.29, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 2.673521850899743e-05, |
| "loss": 1.6707, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.47, |
| "eval_loss": 1.6601014137268066, |
| "eval_runtime": 38.0349, |
| "eval_samples_per_second": 2.208, |
| "eval_steps_per_second": 0.289, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 2.6092544987146534e-05, |
| "loss": 1.6478, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.48, |
| "eval_loss": 1.6597967147827148, |
| "eval_runtime": 37.9413, |
| "eval_samples_per_second": 2.214, |
| "eval_steps_per_second": 0.29, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 2.5449871465295634e-05, |
| "loss": 1.6715, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.49, |
| "eval_loss": 1.6598337888717651, |
| "eval_runtime": 37.8386, |
| "eval_samples_per_second": 2.22, |
| "eval_steps_per_second": 0.291, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 2.480719794344473e-05, |
| "loss": 1.6626, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.5, |
| "eval_loss": 1.6600011587142944, |
| "eval_runtime": 37.7901, |
| "eval_samples_per_second": 2.223, |
| "eval_steps_per_second": 0.291, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 2.4164524421593832e-05, |
| "loss": 1.651, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.52, |
| "eval_loss": 1.65969717502594, |
| "eval_runtime": 37.7643, |
| "eval_samples_per_second": 2.224, |
| "eval_steps_per_second": 0.291, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 2.3521850899742933e-05, |
| "loss": 1.6346, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.53, |
| "eval_loss": 1.6598676443099976, |
| "eval_runtime": 37.9026, |
| "eval_samples_per_second": 2.216, |
| "eval_steps_per_second": 0.29, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 2.2879177377892033e-05, |
| "loss": 1.6533, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.54, |
| "eval_loss": 1.659874677658081, |
| "eval_runtime": 37.9871, |
| "eval_samples_per_second": 2.211, |
| "eval_steps_per_second": 0.29, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 2.2236503856041134e-05, |
| "loss": 1.6913, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.56, |
| "eval_loss": 1.659849762916565, |
| "eval_runtime": 37.8712, |
| "eval_samples_per_second": 2.218, |
| "eval_steps_per_second": 0.29, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 2.159383033419023e-05, |
| "loss": 1.6709, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.57, |
| "eval_loss": 1.6598856449127197, |
| "eval_runtime": 37.7043, |
| "eval_samples_per_second": 2.228, |
| "eval_steps_per_second": 0.292, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 2.095115681233933e-05, |
| "loss": 1.598, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.58, |
| "eval_loss": 1.6597983837127686, |
| "eval_runtime": 37.722, |
| "eval_samples_per_second": 2.227, |
| "eval_steps_per_second": 0.292, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 2.0308483290488432e-05, |
| "loss": 1.6434, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.59, |
| "eval_loss": 1.6595433950424194, |
| "eval_runtime": 37.6125, |
| "eval_samples_per_second": 2.233, |
| "eval_steps_per_second": 0.292, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 1.9665809768637533e-05, |
| "loss": 1.6086, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.61, |
| "eval_loss": 1.6594945192337036, |
| "eval_runtime": 38.0046, |
| "eval_samples_per_second": 2.21, |
| "eval_steps_per_second": 0.289, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 1.9023136246786633e-05, |
| "loss": 1.5962, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.62, |
| "eval_loss": 1.6595605611801147, |
| "eval_runtime": 37.8518, |
| "eval_samples_per_second": 2.219, |
| "eval_steps_per_second": 0.291, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 1.8380462724935734e-05, |
| "loss": 1.6695, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.63, |
| "eval_loss": 1.6592403650283813, |
| "eval_runtime": 37.7285, |
| "eval_samples_per_second": 2.226, |
| "eval_steps_per_second": 0.292, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 1.7737789203084834e-05, |
| "loss": 1.6526, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.65, |
| "eval_loss": 1.6591880321502686, |
| "eval_runtime": 37.758, |
| "eval_samples_per_second": 2.225, |
| "eval_steps_per_second": 0.291, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 1.7095115681233935e-05, |
| "loss": 1.672, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.66, |
| "eval_loss": 1.6588138341903687, |
| "eval_runtime": 37.92, |
| "eval_samples_per_second": 2.215, |
| "eval_steps_per_second": 0.29, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 1.6452442159383032e-05, |
| "loss": 1.6347, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.67, |
| "eval_loss": 1.6589809656143188, |
| "eval_runtime": 37.8021, |
| "eval_samples_per_second": 2.222, |
| "eval_steps_per_second": 0.291, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 1.5809768637532136e-05, |
| "loss": 1.6227, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.68, |
| "eval_loss": 1.6586792469024658, |
| "eval_runtime": 37.9128, |
| "eval_samples_per_second": 2.216, |
| "eval_steps_per_second": 0.29, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 1.5167095115681235e-05, |
| "loss": 1.6308, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.7, |
| "eval_loss": 1.6585420370101929, |
| "eval_runtime": 37.9881, |
| "eval_samples_per_second": 2.211, |
| "eval_steps_per_second": 0.29, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 1.4524421593830334e-05, |
| "loss": 1.6495, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.71, |
| "eval_loss": 1.6587530374526978, |
| "eval_runtime": 37.9148, |
| "eval_samples_per_second": 2.215, |
| "eval_steps_per_second": 0.29, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 1.3881748071979436e-05, |
| "loss": 1.6712, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.72, |
| "eval_loss": 1.658648133277893, |
| "eval_runtime": 37.9313, |
| "eval_samples_per_second": 2.215, |
| "eval_steps_per_second": 0.29, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 1.3239074550128535e-05, |
| "loss": 1.636, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.74, |
| "eval_loss": 1.6584407091140747, |
| "eval_runtime": 37.7963, |
| "eval_samples_per_second": 2.222, |
| "eval_steps_per_second": 0.291, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 1.2596401028277636e-05, |
| "loss": 1.6453, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.75, |
| "eval_loss": 1.658594012260437, |
| "eval_runtime": 37.7226, |
| "eval_samples_per_second": 2.227, |
| "eval_steps_per_second": 0.292, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 1.1953727506426736e-05, |
| "loss": 1.6509, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.76, |
| "eval_loss": 1.658467411994934, |
| "eval_runtime": 37.8179, |
| "eval_samples_per_second": 2.221, |
| "eval_steps_per_second": 0.291, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 1.1311053984575835e-05, |
| "loss": 1.6489, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.77, |
| "eval_loss": 1.6585352420806885, |
| "eval_runtime": 37.6768, |
| "eval_samples_per_second": 2.229, |
| "eval_steps_per_second": 0.292, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 1.0668380462724936e-05, |
| "loss": 1.6424, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.79, |
| "eval_loss": 1.6582835912704468, |
| "eval_runtime": 38.0083, |
| "eval_samples_per_second": 2.21, |
| "eval_steps_per_second": 0.289, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 1.0025706940874038e-05, |
| "loss": 1.6131, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.8, |
| "eval_loss": 1.658105731010437, |
| "eval_runtime": 37.8711, |
| "eval_samples_per_second": 2.218, |
| "eval_steps_per_second": 0.29, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 9.383033419023137e-06, |
| "loss": 1.6225, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.81, |
| "eval_loss": 1.658282995223999, |
| "eval_runtime": 37.8125, |
| "eval_samples_per_second": 2.221, |
| "eval_steps_per_second": 0.291, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 8.740359897172237e-06, |
| "loss": 1.6928, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.83, |
| "eval_loss": 1.6581056118011475, |
| "eval_runtime": 37.863, |
| "eval_samples_per_second": 2.219, |
| "eval_steps_per_second": 0.291, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 8.097686375321336e-06, |
| "loss": 1.6227, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.84, |
| "eval_loss": 1.6581188440322876, |
| "eval_runtime": 37.8646, |
| "eval_samples_per_second": 2.218, |
| "eval_steps_per_second": 0.291, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 7.4550128534704376e-06, |
| "loss": 1.6593, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.85, |
| "eval_loss": 1.6579999923706055, |
| "eval_runtime": 37.876, |
| "eval_samples_per_second": 2.218, |
| "eval_steps_per_second": 0.29, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 6.812339331619537e-06, |
| "loss": 1.6642, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.86, |
| "eval_loss": 1.65813410282135, |
| "eval_runtime": 37.7093, |
| "eval_samples_per_second": 2.228, |
| "eval_steps_per_second": 0.292, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 6.169665809768638e-06, |
| "loss": 1.6414, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.88, |
| "eval_loss": 1.6581989526748657, |
| "eval_runtime": 37.7818, |
| "eval_samples_per_second": 2.223, |
| "eval_steps_per_second": 0.291, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 5.526992287917738e-06, |
| "loss": 1.6519, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.89, |
| "eval_loss": 1.6577394008636475, |
| "eval_runtime": 37.7264, |
| "eval_samples_per_second": 2.227, |
| "eval_steps_per_second": 0.292, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 4.884318766066838e-06, |
| "loss": 1.6485, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.9, |
| "eval_loss": 1.6580238342285156, |
| "eval_runtime": 37.8729, |
| "eval_samples_per_second": 2.218, |
| "eval_steps_per_second": 0.29, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 4.241645244215939e-06, |
| "loss": 1.6261, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.92, |
| "eval_loss": 1.6580148935317993, |
| "eval_runtime": 37.9546, |
| "eval_samples_per_second": 2.213, |
| "eval_steps_per_second": 0.29, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 3.598971722365039e-06, |
| "loss": 1.657, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.93, |
| "eval_loss": 1.657867670059204, |
| "eval_runtime": 37.9411, |
| "eval_samples_per_second": 2.214, |
| "eval_steps_per_second": 0.29, |
| "step": 750 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 778, |
| "num_train_epochs": 2, |
| "save_steps": 10, |
| "total_flos": 2.4364649130491904e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|