| { | |
| "best_metric": 1.6579999923706055, | |
| "best_model_checkpoint": "/scratch/kwamea/llama-output/checkpoint-720", | |
| "epoch": 1.8766066838046274, | |
| "eval_steps": 5, | |
| "global_step": 730, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9.93573264781491e-05, | |
| "loss": 1.9824, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.954480528831482, | |
| "eval_runtime": 38.0417, | |
| "eval_samples_per_second": 2.208, | |
| "eval_steps_per_second": 0.289, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 9.87146529562982e-05, | |
| "loss": 1.9245, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.8942010402679443, | |
| "eval_runtime": 37.8934, | |
| "eval_samples_per_second": 2.217, | |
| "eval_steps_per_second": 0.29, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 9.80719794344473e-05, | |
| "loss": 1.8616, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.8424055576324463, | |
| "eval_runtime": 37.8727, | |
| "eval_samples_per_second": 2.218, | |
| "eval_steps_per_second": 0.29, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.742930591259641e-05, | |
| "loss": 1.8267, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.8279685974121094, | |
| "eval_runtime": 38.166, | |
| "eval_samples_per_second": 2.201, | |
| "eval_steps_per_second": 0.288, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 9.67866323907455e-05, | |
| "loss": 1.835, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 1.8134318590164185, | |
| "eval_runtime": 38.0411, | |
| "eval_samples_per_second": 2.208, | |
| "eval_steps_per_second": 0.289, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 9.61439588688946e-05, | |
| "loss": 1.8236, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_loss": 1.8010995388031006, | |
| "eval_runtime": 37.8753, | |
| "eval_samples_per_second": 2.218, | |
| "eval_steps_per_second": 0.29, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 9.550128534704372e-05, | |
| "loss": 1.7186, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_loss": 1.7901933193206787, | |
| "eval_runtime": 38.0374, | |
| "eval_samples_per_second": 2.208, | |
| "eval_steps_per_second": 0.289, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 9.485861182519281e-05, | |
| "loss": 1.7959, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_loss": 1.7807564735412598, | |
| "eval_runtime": 37.8784, | |
| "eval_samples_per_second": 2.218, | |
| "eval_steps_per_second": 0.29, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 9.421593830334192e-05, | |
| "loss": 1.715, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_loss": 1.7709146738052368, | |
| "eval_runtime": 37.8705, | |
| "eval_samples_per_second": 2.218, | |
| "eval_steps_per_second": 0.29, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 9.357326478149101e-05, | |
| "loss": 1.7581, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_loss": 1.761339783668518, | |
| "eval_runtime": 37.661, | |
| "eval_samples_per_second": 2.23, | |
| "eval_steps_per_second": 0.292, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 9.29305912596401e-05, | |
| "loss": 1.7305, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_loss": 1.7389689683914185, | |
| "eval_runtime": 38.0055, | |
| "eval_samples_per_second": 2.21, | |
| "eval_steps_per_second": 0.289, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 9.228791773778921e-05, | |
| "loss": 1.7086, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_loss": 1.713218092918396, | |
| "eval_runtime": 37.737, | |
| "eval_samples_per_second": 2.226, | |
| "eval_steps_per_second": 0.291, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 9.16452442159383e-05, | |
| "loss": 1.7057, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_loss": 1.7108122110366821, | |
| "eval_runtime": 38.0037, | |
| "eval_samples_per_second": 2.21, | |
| "eval_steps_per_second": 0.289, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 9.100257069408741e-05, | |
| "loss": 1.7243, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_loss": 1.7041622400283813, | |
| "eval_runtime": 38.1402, | |
| "eval_samples_per_second": 2.202, | |
| "eval_steps_per_second": 0.288, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 9.03598971722365e-05, | |
| "loss": 1.7038, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_loss": 1.7010408639907837, | |
| "eval_runtime": 37.7193, | |
| "eval_samples_per_second": 2.227, | |
| "eval_steps_per_second": 0.292, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 8.97172236503856e-05, | |
| "loss": 1.6939, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_loss": 1.6987501382827759, | |
| "eval_runtime": 38.0624, | |
| "eval_samples_per_second": 2.207, | |
| "eval_steps_per_second": 0.289, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 8.907455012853471e-05, | |
| "loss": 1.645, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_loss": 1.6968218088150024, | |
| "eval_runtime": 37.9205, | |
| "eval_samples_per_second": 2.215, | |
| "eval_steps_per_second": 0.29, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 8.84318766066838e-05, | |
| "loss": 1.7016, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_loss": 1.6952500343322754, | |
| "eval_runtime": 37.9617, | |
| "eval_samples_per_second": 2.213, | |
| "eval_steps_per_second": 0.29, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 8.778920308483291e-05, | |
| "loss": 1.6869, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_loss": 1.6932854652404785, | |
| "eval_runtime": 37.8781, | |
| "eval_samples_per_second": 2.218, | |
| "eval_steps_per_second": 0.29, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 8.7146529562982e-05, | |
| "loss": 1.6728, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_loss": 1.6920738220214844, | |
| "eval_runtime": 38.1996, | |
| "eval_samples_per_second": 2.199, | |
| "eval_steps_per_second": 0.288, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 8.650385604113111e-05, | |
| "loss": 1.6674, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "eval_loss": 1.6904469728469849, | |
| "eval_runtime": 38.1252, | |
| "eval_samples_per_second": 2.203, | |
| "eval_steps_per_second": 0.289, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 8.586118251928022e-05, | |
| "loss": 1.7013, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_loss": 1.6895124912261963, | |
| "eval_runtime": 38.0968, | |
| "eval_samples_per_second": 2.205, | |
| "eval_steps_per_second": 0.289, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 8.521850899742931e-05, | |
| "loss": 1.7148, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_loss": 1.6886374950408936, | |
| "eval_runtime": 37.8799, | |
| "eval_samples_per_second": 2.218, | |
| "eval_steps_per_second": 0.29, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 8.457583547557842e-05, | |
| "loss": 1.7166, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_loss": 1.6868164539337158, | |
| "eval_runtime": 37.8861, | |
| "eval_samples_per_second": 2.217, | |
| "eval_steps_per_second": 0.29, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 8.393316195372751e-05, | |
| "loss": 1.7012, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_loss": 1.6858367919921875, | |
| "eval_runtime": 38.1136, | |
| "eval_samples_per_second": 2.204, | |
| "eval_steps_per_second": 0.289, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 8.32904884318766e-05, | |
| "loss": 1.6827, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_loss": 1.6849361658096313, | |
| "eval_runtime": 37.9883, | |
| "eval_samples_per_second": 2.211, | |
| "eval_steps_per_second": 0.29, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 8.264781491002571e-05, | |
| "loss": 1.7157, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_loss": 1.684756875038147, | |
| "eval_runtime": 37.8726, | |
| "eval_samples_per_second": 2.218, | |
| "eval_steps_per_second": 0.29, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 8.200514138817481e-05, | |
| "loss": 1.6668, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_loss": 1.6836236715316772, | |
| "eval_runtime": 37.9586, | |
| "eval_samples_per_second": 2.213, | |
| "eval_steps_per_second": 0.29, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 8.136246786632391e-05, | |
| "loss": 1.6588, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_loss": 1.6828166246414185, | |
| "eval_runtime": 38.0596, | |
| "eval_samples_per_second": 2.207, | |
| "eval_steps_per_second": 0.289, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 8.071979434447301e-05, | |
| "loss": 1.7005, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_loss": 1.681463360786438, | |
| "eval_runtime": 38.0878, | |
| "eval_samples_per_second": 2.205, | |
| "eval_steps_per_second": 0.289, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 8.007712082262212e-05, | |
| "loss": 1.6893, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_loss": 1.6811552047729492, | |
| "eval_runtime": 37.9265, | |
| "eval_samples_per_second": 2.215, | |
| "eval_steps_per_second": 0.29, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 7.943444730077121e-05, | |
| "loss": 1.6682, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_loss": 1.6801096200942993, | |
| "eval_runtime": 38.0375, | |
| "eval_samples_per_second": 2.208, | |
| "eval_steps_per_second": 0.289, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 7.87917737789203e-05, | |
| "loss": 1.6481, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_loss": 1.6800144910812378, | |
| "eval_runtime": 37.8916, | |
| "eval_samples_per_second": 2.217, | |
| "eval_steps_per_second": 0.29, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 7.814910025706941e-05, | |
| "loss": 1.7042, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_loss": 1.6784976720809937, | |
| "eval_runtime": 37.8514, | |
| "eval_samples_per_second": 2.219, | |
| "eval_steps_per_second": 0.291, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 7.750642673521852e-05, | |
| "loss": 1.6555, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_loss": 1.6780468225479126, | |
| "eval_runtime": 37.8893, | |
| "eval_samples_per_second": 2.217, | |
| "eval_steps_per_second": 0.29, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 7.686375321336761e-05, | |
| "loss": 1.6717, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_loss": 1.6775085926055908, | |
| "eval_runtime": 37.9289, | |
| "eval_samples_per_second": 2.215, | |
| "eval_steps_per_second": 0.29, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 7.622107969151672e-05, | |
| "loss": 1.6716, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_loss": 1.6770914793014526, | |
| "eval_runtime": 38.0111, | |
| "eval_samples_per_second": 2.21, | |
| "eval_steps_per_second": 0.289, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 7.557840616966581e-05, | |
| "loss": 1.6618, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_loss": 1.676563024520874, | |
| "eval_runtime": 38.0703, | |
| "eval_samples_per_second": 2.206, | |
| "eval_steps_per_second": 0.289, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 7.493573264781492e-05, | |
| "loss": 1.6411, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_loss": 1.6757832765579224, | |
| "eval_runtime": 37.827, | |
| "eval_samples_per_second": 2.221, | |
| "eval_steps_per_second": 0.291, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 7.429305912596401e-05, | |
| "loss": 1.6637, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_loss": 1.6751985549926758, | |
| "eval_runtime": 37.836, | |
| "eval_samples_per_second": 2.22, | |
| "eval_steps_per_second": 0.291, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 7.365038560411311e-05, | |
| "loss": 1.6142, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_loss": 1.675147294998169, | |
| "eval_runtime": 37.9175, | |
| "eval_samples_per_second": 2.215, | |
| "eval_steps_per_second": 0.29, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 7.300771208226222e-05, | |
| "loss": 1.6741, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_loss": 1.674437165260315, | |
| "eval_runtime": 38.087, | |
| "eval_samples_per_second": 2.205, | |
| "eval_steps_per_second": 0.289, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 7.236503856041131e-05, | |
| "loss": 1.6408, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "eval_loss": 1.6737719774246216, | |
| "eval_runtime": 38.0075, | |
| "eval_samples_per_second": 2.21, | |
| "eval_steps_per_second": 0.289, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 7.172236503856042e-05, | |
| "loss": 1.6733, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_loss": 1.6732759475708008, | |
| "eval_runtime": 38.1125, | |
| "eval_samples_per_second": 2.204, | |
| "eval_steps_per_second": 0.289, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 7.107969151670951e-05, | |
| "loss": 1.679, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_loss": 1.6726195812225342, | |
| "eval_runtime": 37.9769, | |
| "eval_samples_per_second": 2.212, | |
| "eval_steps_per_second": 0.29, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 7.043701799485862e-05, | |
| "loss": 1.7202, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_loss": 1.671908974647522, | |
| "eval_runtime": 37.8215, | |
| "eval_samples_per_second": 2.221, | |
| "eval_steps_per_second": 0.291, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 6.979434447300771e-05, | |
| "loss": 1.6805, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_loss": 1.6715577840805054, | |
| "eval_runtime": 37.9343, | |
| "eval_samples_per_second": 2.214, | |
| "eval_steps_per_second": 0.29, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 6.91516709511568e-05, | |
| "loss": 1.6331, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_loss": 1.6715376377105713, | |
| "eval_runtime": 37.9904, | |
| "eval_samples_per_second": 2.211, | |
| "eval_steps_per_second": 0.29, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 6.850899742930593e-05, | |
| "loss": 1.6761, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_loss": 1.671446681022644, | |
| "eval_runtime": 38.0357, | |
| "eval_samples_per_second": 2.208, | |
| "eval_steps_per_second": 0.289, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 6.786632390745502e-05, | |
| "loss": 1.6994, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_loss": 1.6716604232788086, | |
| "eval_runtime": 37.9609, | |
| "eval_samples_per_second": 2.213, | |
| "eval_steps_per_second": 0.29, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 6.722365038560411e-05, | |
| "loss": 1.6305, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_loss": 1.6711723804473877, | |
| "eval_runtime": 37.7893, | |
| "eval_samples_per_second": 2.223, | |
| "eval_steps_per_second": 0.291, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 6.658097686375322e-05, | |
| "loss": 1.6612, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_loss": 1.670398473739624, | |
| "eval_runtime": 37.7667, | |
| "eval_samples_per_second": 2.224, | |
| "eval_steps_per_second": 0.291, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 6.593830334190231e-05, | |
| "loss": 1.6576, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_loss": 1.6706459522247314, | |
| "eval_runtime": 37.8285, | |
| "eval_samples_per_second": 2.221, | |
| "eval_steps_per_second": 0.291, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 6.529562982005142e-05, | |
| "loss": 1.6837, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_loss": 1.6699285507202148, | |
| "eval_runtime": 37.7619, | |
| "eval_samples_per_second": 2.224, | |
| "eval_steps_per_second": 0.291, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 6.465295629820052e-05, | |
| "loss": 1.6493, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_loss": 1.6693135499954224, | |
| "eval_runtime": 37.7441, | |
| "eval_samples_per_second": 2.226, | |
| "eval_steps_per_second": 0.291, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 6.401028277634962e-05, | |
| "loss": 1.6427, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_loss": 1.669467568397522, | |
| "eval_runtime": 37.8707, | |
| "eval_samples_per_second": 2.218, | |
| "eval_steps_per_second": 0.29, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 6.336760925449872e-05, | |
| "loss": 1.6236, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_loss": 1.6690431833267212, | |
| "eval_runtime": 37.8946, | |
| "eval_samples_per_second": 2.217, | |
| "eval_steps_per_second": 0.29, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 6.272493573264781e-05, | |
| "loss": 1.6538, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "eval_loss": 1.6688117980957031, | |
| "eval_runtime": 37.9378, | |
| "eval_samples_per_second": 2.214, | |
| "eval_steps_per_second": 0.29, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 6.208226221079692e-05, | |
| "loss": 1.6558, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_loss": 1.668560266494751, | |
| "eval_runtime": 38.0448, | |
| "eval_samples_per_second": 2.208, | |
| "eval_steps_per_second": 0.289, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 6.143958868894601e-05, | |
| "loss": 1.6401, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_loss": 1.6680612564086914, | |
| "eval_runtime": 37.8571, | |
| "eval_samples_per_second": 2.219, | |
| "eval_steps_per_second": 0.291, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 6.079691516709511e-05, | |
| "loss": 1.6468, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_loss": 1.6675914525985718, | |
| "eval_runtime": 37.8491, | |
| "eval_samples_per_second": 2.219, | |
| "eval_steps_per_second": 0.291, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 6.015424164524421e-05, | |
| "loss": 1.6579, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_loss": 1.6671632528305054, | |
| "eval_runtime": 37.7658, | |
| "eval_samples_per_second": 2.224, | |
| "eval_steps_per_second": 0.291, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.951156812339333e-05, | |
| "loss": 1.6339, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_loss": 1.666803240776062, | |
| "eval_runtime": 37.7896, | |
| "eval_samples_per_second": 2.223, | |
| "eval_steps_per_second": 0.291, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 5.886889460154242e-05, | |
| "loss": 1.6636, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "eval_loss": 1.6664105653762817, | |
| "eval_runtime": 38.0586, | |
| "eval_samples_per_second": 2.207, | |
| "eval_steps_per_second": 0.289, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 5.822622107969152e-05, | |
| "loss": 1.641, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_loss": 1.6665875911712646, | |
| "eval_runtime": 37.9046, | |
| "eval_samples_per_second": 2.216, | |
| "eval_steps_per_second": 0.29, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 5.758354755784062e-05, | |
| "loss": 1.6616, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_loss": 1.6664165258407593, | |
| "eval_runtime": 37.8575, | |
| "eval_samples_per_second": 2.219, | |
| "eval_steps_per_second": 0.291, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 5.694087403598972e-05, | |
| "loss": 1.6597, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_loss": 1.666045904159546, | |
| "eval_runtime": 37.9711, | |
| "eval_samples_per_second": 2.212, | |
| "eval_steps_per_second": 0.29, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 5.6298200514138824e-05, | |
| "loss": 1.67, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_loss": 1.666337490081787, | |
| "eval_runtime": 37.8821, | |
| "eval_samples_per_second": 2.217, | |
| "eval_steps_per_second": 0.29, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 5.5655526992287924e-05, | |
| "loss": 1.6344, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_loss": 1.6659029722213745, | |
| "eval_runtime": 37.9188, | |
| "eval_samples_per_second": 2.215, | |
| "eval_steps_per_second": 0.29, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.501285347043702e-05, | |
| "loss": 1.6623, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_loss": 1.6656525135040283, | |
| "eval_runtime": 37.9951, | |
| "eval_samples_per_second": 2.211, | |
| "eval_steps_per_second": 0.29, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 5.437017994858612e-05, | |
| "loss": 1.6623, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "eval_loss": 1.6654421091079712, | |
| "eval_runtime": 37.8803, | |
| "eval_samples_per_second": 2.218, | |
| "eval_steps_per_second": 0.29, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 5.372750642673522e-05, | |
| "loss": 1.6741, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_loss": 1.665541648864746, | |
| "eval_runtime": 37.7151, | |
| "eval_samples_per_second": 2.227, | |
| "eval_steps_per_second": 0.292, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 5.308483290488432e-05, | |
| "loss": 1.6973, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_loss": 1.6651471853256226, | |
| "eval_runtime": 37.9144, | |
| "eval_samples_per_second": 2.216, | |
| "eval_steps_per_second": 0.29, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 5.244215938303342e-05, | |
| "loss": 1.6829, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_loss": 1.6651064157485962, | |
| "eval_runtime": 37.7443, | |
| "eval_samples_per_second": 2.226, | |
| "eval_steps_per_second": 0.291, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 5.1799485861182514e-05, | |
| "loss": 1.6923, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_loss": 1.6646034717559814, | |
| "eval_runtime": 37.8783, | |
| "eval_samples_per_second": 2.218, | |
| "eval_steps_per_second": 0.29, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 5.1156812339331615e-05, | |
| "loss": 1.6725, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_loss": 1.6639235019683838, | |
| "eval_runtime": 37.7559, | |
| "eval_samples_per_second": 2.225, | |
| "eval_steps_per_second": 0.291, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 5.051413881748073e-05, | |
| "loss": 1.6216, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_loss": 1.6643506288528442, | |
| "eval_runtime": 37.9895, | |
| "eval_samples_per_second": 2.211, | |
| "eval_steps_per_second": 0.29, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.987146529562982e-05, | |
| "loss": 1.6518, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 1.6643046140670776, | |
| "eval_runtime": 37.9289, | |
| "eval_samples_per_second": 2.215, | |
| "eval_steps_per_second": 0.29, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 4.922879177377892e-05, | |
| "loss": 1.658, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "eval_loss": 1.6641957759857178, | |
| "eval_runtime": 37.8638, | |
| "eval_samples_per_second": 2.218, | |
| "eval_steps_per_second": 0.291, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 4.8586118251928024e-05, | |
| "loss": 1.6767, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "eval_loss": 1.663757085800171, | |
| "eval_runtime": 37.7728, | |
| "eval_samples_per_second": 2.224, | |
| "eval_steps_per_second": 0.291, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 4.7943444730077124e-05, | |
| "loss": 1.6264, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "eval_loss": 1.6635041236877441, | |
| "eval_runtime": 37.7702, | |
| "eval_samples_per_second": 2.224, | |
| "eval_steps_per_second": 0.291, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 4.7300771208226225e-05, | |
| "loss": 1.6527, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "eval_loss": 1.6632280349731445, | |
| "eval_runtime": 37.8453, | |
| "eval_samples_per_second": 2.22, | |
| "eval_steps_per_second": 0.291, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 4.6658097686375325e-05, | |
| "loss": 1.6157, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "eval_loss": 1.6636598110198975, | |
| "eval_runtime": 37.812, | |
| "eval_samples_per_second": 2.222, | |
| "eval_steps_per_second": 0.291, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 4.6015424164524426e-05, | |
| "loss": 1.5966, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "eval_loss": 1.663393497467041, | |
| "eval_runtime": 37.7743, | |
| "eval_samples_per_second": 2.224, | |
| "eval_steps_per_second": 0.291, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 4.537275064267352e-05, | |
| "loss": 1.6705, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "eval_loss": 1.6631269454956055, | |
| "eval_runtime": 38.0953, | |
| "eval_samples_per_second": 2.205, | |
| "eval_steps_per_second": 0.289, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 4.473007712082262e-05, | |
| "loss": 1.6691, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "eval_loss": 1.6633110046386719, | |
| "eval_runtime": 37.8964, | |
| "eval_samples_per_second": 2.217, | |
| "eval_steps_per_second": 0.29, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 4.408740359897173e-05, | |
| "loss": 1.6332, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "eval_loss": 1.6628649234771729, | |
| "eval_runtime": 38.0621, | |
| "eval_samples_per_second": 2.207, | |
| "eval_steps_per_second": 0.289, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 4.344473007712083e-05, | |
| "loss": 1.5916, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "eval_loss": 1.662834882736206, | |
| "eval_runtime": 37.9722, | |
| "eval_samples_per_second": 2.212, | |
| "eval_steps_per_second": 0.29, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 4.280205655526993e-05, | |
| "loss": 1.6543, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "eval_loss": 1.6630265712738037, | |
| "eval_runtime": 37.9699, | |
| "eval_samples_per_second": 2.212, | |
| "eval_steps_per_second": 0.29, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 4.215938303341902e-05, | |
| "loss": 1.6353, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "eval_loss": 1.6625033617019653, | |
| "eval_runtime": 37.7505, | |
| "eval_samples_per_second": 2.225, | |
| "eval_steps_per_second": 0.291, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 4.151670951156812e-05, | |
| "loss": 1.6441, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "eval_loss": 1.662785530090332, | |
| "eval_runtime": 37.75, | |
| "eval_samples_per_second": 2.225, | |
| "eval_steps_per_second": 0.291, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 4.0874035989717224e-05, | |
| "loss": 1.6631, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "eval_loss": 1.6627779006958008, | |
| "eval_runtime": 37.864, | |
| "eval_samples_per_second": 2.218, | |
| "eval_steps_per_second": 0.291, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 4.0231362467866324e-05, | |
| "loss": 1.6327, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "eval_loss": 1.662458896636963, | |
| "eval_runtime": 37.7496, | |
| "eval_samples_per_second": 2.225, | |
| "eval_steps_per_second": 0.291, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 3.958868894601543e-05, | |
| "loss": 1.6238, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "eval_loss": 1.6621876955032349, | |
| "eval_runtime": 37.8943, | |
| "eval_samples_per_second": 2.217, | |
| "eval_steps_per_second": 0.29, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 3.8946015424164526e-05, | |
| "loss": 1.6231, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "eval_loss": 1.662013292312622, | |
| "eval_runtime": 37.9364, | |
| "eval_samples_per_second": 2.214, | |
| "eval_steps_per_second": 0.29, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 3.8303341902313626e-05, | |
| "loss": 1.6381, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "eval_loss": 1.6615785360336304, | |
| "eval_runtime": 37.91, | |
| "eval_samples_per_second": 2.216, | |
| "eval_steps_per_second": 0.29, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 3.766066838046273e-05, | |
| "loss": 1.6419, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "eval_loss": 1.662042498588562, | |
| "eval_runtime": 37.7834, | |
| "eval_samples_per_second": 2.223, | |
| "eval_steps_per_second": 0.291, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 3.701799485861183e-05, | |
| "loss": 1.6319, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "eval_loss": 1.6619195938110352, | |
| "eval_runtime": 37.7385, | |
| "eval_samples_per_second": 2.226, | |
| "eval_steps_per_second": 0.291, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 3.637532133676093e-05, | |
| "loss": 1.609, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "eval_loss": 1.6615225076675415, | |
| "eval_runtime": 37.8056, | |
| "eval_samples_per_second": 2.222, | |
| "eval_steps_per_second": 0.291, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 3.573264781491003e-05, | |
| "loss": 1.6435, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "eval_loss": 1.6610548496246338, | |
| "eval_runtime": 37.9353, | |
| "eval_samples_per_second": 2.214, | |
| "eval_steps_per_second": 0.29, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 3.508997429305913e-05, | |
| "loss": 1.6381, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "eval_loss": 1.661049246788025, | |
| "eval_runtime": 37.9669, | |
| "eval_samples_per_second": 2.212, | |
| "eval_steps_per_second": 0.29, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 3.444730077120823e-05, | |
| "loss": 1.6506, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "eval_loss": 1.6610089540481567, | |
| "eval_runtime": 37.9127, | |
| "eval_samples_per_second": 2.216, | |
| "eval_steps_per_second": 0.29, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 3.380462724935733e-05, | |
| "loss": 1.6376, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "eval_loss": 1.6608952283859253, | |
| "eval_runtime": 37.9454, | |
| "eval_samples_per_second": 2.214, | |
| "eval_steps_per_second": 0.29, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 3.316195372750643e-05, | |
| "loss": 1.6354, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "eval_loss": 1.6610838174819946, | |
| "eval_runtime": 37.7265, | |
| "eval_samples_per_second": 2.227, | |
| "eval_steps_per_second": 0.292, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 3.251928020565553e-05, | |
| "loss": 1.6201, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "eval_loss": 1.6610314846038818, | |
| "eval_runtime": 37.8464, | |
| "eval_samples_per_second": 2.219, | |
| "eval_steps_per_second": 0.291, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 3.1876606683804625e-05, | |
| "loss": 1.6461, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "eval_loss": 1.6605802774429321, | |
| "eval_runtime": 37.9503, | |
| "eval_samples_per_second": 2.213, | |
| "eval_steps_per_second": 0.29, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 3.1233933161953726e-05, | |
| "loss": 1.6818, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "eval_loss": 1.6607571840286255, | |
| "eval_runtime": 37.9086, | |
| "eval_samples_per_second": 2.216, | |
| "eval_steps_per_second": 0.29, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 3.059125964010283e-05, | |
| "loss": 1.6117, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "eval_loss": 1.6605336666107178, | |
| "eval_runtime": 37.947, | |
| "eval_samples_per_second": 2.214, | |
| "eval_steps_per_second": 0.29, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.994858611825193e-05, | |
| "loss": 1.6252, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "eval_loss": 1.6603213548660278, | |
| "eval_runtime": 37.9826, | |
| "eval_samples_per_second": 2.212, | |
| "eval_steps_per_second": 0.29, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 2.930591259640103e-05, | |
| "loss": 1.6486, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "eval_loss": 1.6600297689437866, | |
| "eval_runtime": 37.9635, | |
| "eval_samples_per_second": 2.213, | |
| "eval_steps_per_second": 0.29, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 2.866323907455013e-05, | |
| "loss": 1.6861, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "eval_loss": 1.6602742671966553, | |
| "eval_runtime": 37.8722, | |
| "eval_samples_per_second": 2.218, | |
| "eval_steps_per_second": 0.29, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.802056555269923e-05, | |
| "loss": 1.6624, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "eval_loss": 1.6599992513656616, | |
| "eval_runtime": 37.9306, | |
| "eval_samples_per_second": 2.215, | |
| "eval_steps_per_second": 0.29, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.737789203084833e-05, | |
| "loss": 1.6323, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "eval_loss": 1.6600306034088135, | |
| "eval_runtime": 37.9548, | |
| "eval_samples_per_second": 2.213, | |
| "eval_steps_per_second": 0.29, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.673521850899743e-05, | |
| "loss": 1.6707, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "eval_loss": 1.6601014137268066, | |
| "eval_runtime": 38.0349, | |
| "eval_samples_per_second": 2.208, | |
| "eval_steps_per_second": 0.289, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 2.6092544987146534e-05, | |
| "loss": 1.6478, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "eval_loss": 1.6597967147827148, | |
| "eval_runtime": 37.9413, | |
| "eval_samples_per_second": 2.214, | |
| "eval_steps_per_second": 0.29, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.5449871465295634e-05, | |
| "loss": 1.6715, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "eval_loss": 1.6598337888717651, | |
| "eval_runtime": 37.8386, | |
| "eval_samples_per_second": 2.22, | |
| "eval_steps_per_second": 0.291, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.480719794344473e-05, | |
| "loss": 1.6626, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "eval_loss": 1.6600011587142944, | |
| "eval_runtime": 37.7901, | |
| "eval_samples_per_second": 2.223, | |
| "eval_steps_per_second": 0.291, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.4164524421593832e-05, | |
| "loss": 1.651, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "eval_loss": 1.65969717502594, | |
| "eval_runtime": 37.7643, | |
| "eval_samples_per_second": 2.224, | |
| "eval_steps_per_second": 0.291, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.3521850899742933e-05, | |
| "loss": 1.6346, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "eval_loss": 1.6598676443099976, | |
| "eval_runtime": 37.9026, | |
| "eval_samples_per_second": 2.216, | |
| "eval_steps_per_second": 0.29, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.2879177377892033e-05, | |
| "loss": 1.6533, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "eval_loss": 1.659874677658081, | |
| "eval_runtime": 37.9871, | |
| "eval_samples_per_second": 2.211, | |
| "eval_steps_per_second": 0.29, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.2236503856041134e-05, | |
| "loss": 1.6913, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "eval_loss": 1.659849762916565, | |
| "eval_runtime": 37.8712, | |
| "eval_samples_per_second": 2.218, | |
| "eval_steps_per_second": 0.29, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.159383033419023e-05, | |
| "loss": 1.6709, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "eval_loss": 1.6598856449127197, | |
| "eval_runtime": 37.7043, | |
| "eval_samples_per_second": 2.228, | |
| "eval_steps_per_second": 0.292, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.095115681233933e-05, | |
| "loss": 1.598, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "eval_loss": 1.6597983837127686, | |
| "eval_runtime": 37.722, | |
| "eval_samples_per_second": 2.227, | |
| "eval_steps_per_second": 0.292, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.0308483290488432e-05, | |
| "loss": 1.6434, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "eval_loss": 1.6595433950424194, | |
| "eval_runtime": 37.6125, | |
| "eval_samples_per_second": 2.233, | |
| "eval_steps_per_second": 0.292, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 1.9665809768637533e-05, | |
| "loss": 1.6086, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "eval_loss": 1.6594945192337036, | |
| "eval_runtime": 38.0046, | |
| "eval_samples_per_second": 2.21, | |
| "eval_steps_per_second": 0.289, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 1.9023136246786633e-05, | |
| "loss": 1.5962, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "eval_loss": 1.6595605611801147, | |
| "eval_runtime": 37.8518, | |
| "eval_samples_per_second": 2.219, | |
| "eval_steps_per_second": 0.291, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 1.8380462724935734e-05, | |
| "loss": 1.6695, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "eval_loss": 1.6592403650283813, | |
| "eval_runtime": 37.7285, | |
| "eval_samples_per_second": 2.226, | |
| "eval_steps_per_second": 0.292, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 1.7737789203084834e-05, | |
| "loss": 1.6526, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "eval_loss": 1.6591880321502686, | |
| "eval_runtime": 37.758, | |
| "eval_samples_per_second": 2.225, | |
| "eval_steps_per_second": 0.291, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 1.7095115681233935e-05, | |
| "loss": 1.672, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "eval_loss": 1.6588138341903687, | |
| "eval_runtime": 37.92, | |
| "eval_samples_per_second": 2.215, | |
| "eval_steps_per_second": 0.29, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 1.6452442159383032e-05, | |
| "loss": 1.6347, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "eval_loss": 1.6589809656143188, | |
| "eval_runtime": 37.8021, | |
| "eval_samples_per_second": 2.222, | |
| "eval_steps_per_second": 0.291, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 1.5809768637532136e-05, | |
| "loss": 1.6227, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "eval_loss": 1.6586792469024658, | |
| "eval_runtime": 37.9128, | |
| "eval_samples_per_second": 2.216, | |
| "eval_steps_per_second": 0.29, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 1.5167095115681235e-05, | |
| "loss": 1.6308, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "eval_loss": 1.6585420370101929, | |
| "eval_runtime": 37.9881, | |
| "eval_samples_per_second": 2.211, | |
| "eval_steps_per_second": 0.29, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 1.4524421593830334e-05, | |
| "loss": 1.6495, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "eval_loss": 1.6587530374526978, | |
| "eval_runtime": 37.9148, | |
| "eval_samples_per_second": 2.215, | |
| "eval_steps_per_second": 0.29, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 1.3881748071979436e-05, | |
| "loss": 1.6712, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "eval_loss": 1.658648133277893, | |
| "eval_runtime": 37.9313, | |
| "eval_samples_per_second": 2.215, | |
| "eval_steps_per_second": 0.29, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 1.3239074550128535e-05, | |
| "loss": 1.636, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "eval_loss": 1.6584407091140747, | |
| "eval_runtime": 37.7963, | |
| "eval_samples_per_second": 2.222, | |
| "eval_steps_per_second": 0.291, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 1.2596401028277636e-05, | |
| "loss": 1.6453, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "eval_loss": 1.658594012260437, | |
| "eval_runtime": 37.7226, | |
| "eval_samples_per_second": 2.227, | |
| "eval_steps_per_second": 0.292, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 1.1953727506426736e-05, | |
| "loss": 1.6509, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "eval_loss": 1.658467411994934, | |
| "eval_runtime": 37.8179, | |
| "eval_samples_per_second": 2.221, | |
| "eval_steps_per_second": 0.291, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 1.1311053984575835e-05, | |
| "loss": 1.6489, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "eval_loss": 1.6585352420806885, | |
| "eval_runtime": 37.6768, | |
| "eval_samples_per_second": 2.229, | |
| "eval_steps_per_second": 0.292, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 1.0668380462724936e-05, | |
| "loss": 1.6424, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "eval_loss": 1.6582835912704468, | |
| "eval_runtime": 38.0083, | |
| "eval_samples_per_second": 2.21, | |
| "eval_steps_per_second": 0.289, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 1.0025706940874038e-05, | |
| "loss": 1.6131, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "eval_loss": 1.658105731010437, | |
| "eval_runtime": 37.8711, | |
| "eval_samples_per_second": 2.218, | |
| "eval_steps_per_second": 0.29, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 9.383033419023137e-06, | |
| "loss": 1.6225, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "eval_loss": 1.658282995223999, | |
| "eval_runtime": 37.8125, | |
| "eval_samples_per_second": 2.221, | |
| "eval_steps_per_second": 0.291, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 8.740359897172237e-06, | |
| "loss": 1.6928, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "eval_loss": 1.6581056118011475, | |
| "eval_runtime": 37.863, | |
| "eval_samples_per_second": 2.219, | |
| "eval_steps_per_second": 0.291, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 8.097686375321336e-06, | |
| "loss": 1.6227, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "eval_loss": 1.6581188440322876, | |
| "eval_runtime": 37.8646, | |
| "eval_samples_per_second": 2.218, | |
| "eval_steps_per_second": 0.291, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 7.4550128534704376e-06, | |
| "loss": 1.6593, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "eval_loss": 1.6579999923706055, | |
| "eval_runtime": 37.876, | |
| "eval_samples_per_second": 2.218, | |
| "eval_steps_per_second": 0.29, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 6.812339331619537e-06, | |
| "loss": 1.6642, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "eval_loss": 1.65813410282135, | |
| "eval_runtime": 37.7093, | |
| "eval_samples_per_second": 2.228, | |
| "eval_steps_per_second": 0.292, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 6.169665809768638e-06, | |
| "loss": 1.6414, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "eval_loss": 1.6581989526748657, | |
| "eval_runtime": 37.7818, | |
| "eval_samples_per_second": 2.223, | |
| "eval_steps_per_second": 0.291, | |
| "step": 730 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 778, | |
| "num_train_epochs": 2, | |
| "save_steps": 10, | |
| "total_flos": 2.3714708506804224e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |