| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 9.999433267214508, |
| "global_step": 44110, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.11, |
| "learning_rate": 2.9671276354568124e-06, |
| "loss": 1.5714, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 2.934255270913625e-06, |
| "loss": 1.5492, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 2.9013829063704375e-06, |
| "loss": 1.5441, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 2.8685105418272502e-06, |
| "loss": 1.5406, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 2.8356381772840626e-06, |
| "loss": 1.5376, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 2.802765812740875e-06, |
| "loss": 1.5358, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.7698934481976876e-06, |
| "loss": 1.5342, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 2.7370210836545004e-06, |
| "loss": 1.5329, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 1.5299354791641235, |
| "eval_runtime": 95.1335, |
| "eval_samples_per_second": 299.737, |
| "eval_steps_per_second": 1.882, |
| "step": 4411 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 2.7041487191113127e-06, |
| "loss": 1.5308, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 2.6712763545681255e-06, |
| "loss": 1.5293, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 2.638403990024938e-06, |
| "loss": 1.5287, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 2.60553162548175e-06, |
| "loss": 1.5277, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 2.572659260938563e-06, |
| "loss": 1.5267, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 2.539786896395375e-06, |
| "loss": 1.5256, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 2.506914531852188e-06, |
| "loss": 1.525, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 2.4740421673090003e-06, |
| "loss": 1.5246, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 2.4411698027658126e-06, |
| "loss": 1.5238, |
| "step": 8500 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 1.5252745151519775, |
| "eval_runtime": 95.3162, |
| "eval_samples_per_second": 299.162, |
| "eval_steps_per_second": 1.878, |
| "step": 8822 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 2.4082974382226254e-06, |
| "loss": 1.5238, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 2.3754250736794377e-06, |
| "loss": 1.5222, |
| "step": 9500 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 2.34255270913625e-06, |
| "loss": 1.5216, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 2.3096803445930628e-06, |
| "loss": 1.5208, |
| "step": 10500 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 2.2768079800498755e-06, |
| "loss": 1.5204, |
| "step": 11000 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 2.243935615506688e-06, |
| "loss": 1.5202, |
| "step": 11500 |
| }, |
| { |
| "epoch": 2.72, |
| "learning_rate": 2.2110632509635006e-06, |
| "loss": 1.5195, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 2.178190886420313e-06, |
| "loss": 1.5193, |
| "step": 12500 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 2.1453185218771257e-06, |
| "loss": 1.5183, |
| "step": 13000 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 1.5211608409881592, |
| "eval_runtime": 96.0051, |
| "eval_samples_per_second": 297.015, |
| "eval_steps_per_second": 1.864, |
| "step": 13233 |
| }, |
| { |
| "epoch": 3.06, |
| "learning_rate": 2.112446157333938e-06, |
| "loss": 1.5179, |
| "step": 13500 |
| }, |
| { |
| "epoch": 3.17, |
| "learning_rate": 2.0795737927907503e-06, |
| "loss": 1.5175, |
| "step": 14000 |
| }, |
| { |
| "epoch": 3.29, |
| "learning_rate": 2.0467014282475627e-06, |
| "loss": 1.5173, |
| "step": 14500 |
| }, |
| { |
| "epoch": 3.4, |
| "learning_rate": 2.0138290637043754e-06, |
| "loss": 1.5166, |
| "step": 15000 |
| }, |
| { |
| "epoch": 3.51, |
| "learning_rate": 1.980956699161188e-06, |
| "loss": 1.5166, |
| "step": 15500 |
| }, |
| { |
| "epoch": 3.63, |
| "learning_rate": 1.9480843346180005e-06, |
| "loss": 1.5163, |
| "step": 16000 |
| }, |
| { |
| "epoch": 3.74, |
| "learning_rate": 1.9152119700748132e-06, |
| "loss": 1.5158, |
| "step": 16500 |
| }, |
| { |
| "epoch": 3.85, |
| "learning_rate": 1.8823396055316256e-06, |
| "loss": 1.5156, |
| "step": 17000 |
| }, |
| { |
| "epoch": 3.97, |
| "learning_rate": 1.8494672409884385e-06, |
| "loss": 1.5152, |
| "step": 17500 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_loss": 1.5186687707901, |
| "eval_runtime": 95.8953, |
| "eval_samples_per_second": 297.356, |
| "eval_steps_per_second": 1.867, |
| "step": 17645 |
| }, |
| { |
| "epoch": 4.08, |
| "learning_rate": 1.8165948764452506e-06, |
| "loss": 1.5147, |
| "step": 18000 |
| }, |
| { |
| "epoch": 4.19, |
| "learning_rate": 1.7837225119020632e-06, |
| "loss": 1.5137, |
| "step": 18500 |
| }, |
| { |
| "epoch": 4.31, |
| "learning_rate": 1.7508501473588757e-06, |
| "loss": 1.5138, |
| "step": 19000 |
| }, |
| { |
| "epoch": 4.42, |
| "learning_rate": 1.717977782815688e-06, |
| "loss": 1.5134, |
| "step": 19500 |
| }, |
| { |
| "epoch": 4.53, |
| "learning_rate": 1.6851054182725004e-06, |
| "loss": 1.5136, |
| "step": 20000 |
| }, |
| { |
| "epoch": 4.65, |
| "learning_rate": 1.6522330537293131e-06, |
| "loss": 1.5135, |
| "step": 20500 |
| }, |
| { |
| "epoch": 4.76, |
| "learning_rate": 1.6193606891861259e-06, |
| "loss": 1.5128, |
| "step": 21000 |
| }, |
| { |
| "epoch": 4.87, |
| "learning_rate": 1.5864883246429384e-06, |
| "loss": 1.5133, |
| "step": 21500 |
| }, |
| { |
| "epoch": 4.99, |
| "learning_rate": 1.5536159600997505e-06, |
| "loss": 1.5127, |
| "step": 22000 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_loss": 1.5171175003051758, |
| "eval_runtime": 94.991, |
| "eval_samples_per_second": 300.186, |
| "eval_steps_per_second": 1.884, |
| "step": 22056 |
| }, |
| { |
| "epoch": 5.1, |
| "learning_rate": 1.520743595556563e-06, |
| "loss": 1.5109, |
| "step": 22500 |
| }, |
| { |
| "epoch": 5.21, |
| "learning_rate": 1.4878712310133756e-06, |
| "loss": 1.512, |
| "step": 23000 |
| }, |
| { |
| "epoch": 5.33, |
| "learning_rate": 1.454998866470188e-06, |
| "loss": 1.5115, |
| "step": 23500 |
| }, |
| { |
| "epoch": 5.44, |
| "learning_rate": 1.4221265019270007e-06, |
| "loss": 1.5113, |
| "step": 24000 |
| }, |
| { |
| "epoch": 5.55, |
| "learning_rate": 1.3892541373838134e-06, |
| "loss": 1.511, |
| "step": 24500 |
| }, |
| { |
| "epoch": 5.67, |
| "learning_rate": 1.3563817728406258e-06, |
| "loss": 1.5112, |
| "step": 25000 |
| }, |
| { |
| "epoch": 5.78, |
| "learning_rate": 1.3235094082974385e-06, |
| "loss": 1.5106, |
| "step": 25500 |
| }, |
| { |
| "epoch": 5.89, |
| "learning_rate": 1.2906370437542509e-06, |
| "loss": 1.5105, |
| "step": 26000 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_loss": 1.51548171043396, |
| "eval_runtime": 95.4859, |
| "eval_samples_per_second": 298.631, |
| "eval_steps_per_second": 1.875, |
| "step": 26467 |
| }, |
| { |
| "epoch": 6.01, |
| "learning_rate": 1.2577646792110636e-06, |
| "loss": 1.511, |
| "step": 26500 |
| }, |
| { |
| "epoch": 6.12, |
| "learning_rate": 1.224892314667876e-06, |
| "loss": 1.5103, |
| "step": 27000 |
| }, |
| { |
| "epoch": 6.23, |
| "learning_rate": 1.1920199501246883e-06, |
| "loss": 1.5095, |
| "step": 27500 |
| }, |
| { |
| "epoch": 6.35, |
| "learning_rate": 1.159147585581501e-06, |
| "loss": 1.5096, |
| "step": 28000 |
| }, |
| { |
| "epoch": 6.46, |
| "learning_rate": 1.1262752210383133e-06, |
| "loss": 1.5099, |
| "step": 28500 |
| }, |
| { |
| "epoch": 6.57, |
| "learning_rate": 1.0934028564951257e-06, |
| "loss": 1.5096, |
| "step": 29000 |
| }, |
| { |
| "epoch": 6.69, |
| "learning_rate": 1.0605304919519384e-06, |
| "loss": 1.5091, |
| "step": 29500 |
| }, |
| { |
| "epoch": 6.8, |
| "learning_rate": 1.0276581274087507e-06, |
| "loss": 1.5096, |
| "step": 30000 |
| }, |
| { |
| "epoch": 6.91, |
| "learning_rate": 9.947857628655633e-07, |
| "loss": 1.5087, |
| "step": 30500 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_loss": 1.5147736072540283, |
| "eval_runtime": 95.3411, |
| "eval_samples_per_second": 299.084, |
| "eval_steps_per_second": 1.877, |
| "step": 30878 |
| }, |
| { |
| "epoch": 7.03, |
| "learning_rate": 9.61913398322376e-07, |
| "loss": 1.5093, |
| "step": 31000 |
| }, |
| { |
| "epoch": 7.14, |
| "learning_rate": 9.290410337791883e-07, |
| "loss": 1.5085, |
| "step": 31500 |
| }, |
| { |
| "epoch": 7.25, |
| "learning_rate": 8.96168669236001e-07, |
| "loss": 1.5082, |
| "step": 32000 |
| }, |
| { |
| "epoch": 7.37, |
| "learning_rate": 8.632963046928134e-07, |
| "loss": 1.5082, |
| "step": 32500 |
| }, |
| { |
| "epoch": 7.48, |
| "learning_rate": 8.304239401496259e-07, |
| "loss": 1.5082, |
| "step": 33000 |
| }, |
| { |
| "epoch": 7.59, |
| "learning_rate": 7.975515756064386e-07, |
| "loss": 1.5084, |
| "step": 33500 |
| }, |
| { |
| "epoch": 7.71, |
| "learning_rate": 7.64679211063251e-07, |
| "loss": 1.5083, |
| "step": 34000 |
| }, |
| { |
| "epoch": 7.82, |
| "learning_rate": 7.318068465200634e-07, |
| "loss": 1.5084, |
| "step": 34500 |
| }, |
| { |
| "epoch": 7.93, |
| "learning_rate": 6.989344819768761e-07, |
| "loss": 1.5078, |
| "step": 35000 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_loss": 1.5141184329986572, |
| "eval_runtime": 95.3059, |
| "eval_samples_per_second": 299.195, |
| "eval_steps_per_second": 1.878, |
| "step": 35290 |
| }, |
| { |
| "epoch": 8.05, |
| "learning_rate": 6.660621174336885e-07, |
| "loss": 1.5083, |
| "step": 35500 |
| }, |
| { |
| "epoch": 8.16, |
| "learning_rate": 6.331897528905012e-07, |
| "loss": 1.5078, |
| "step": 36000 |
| }, |
| { |
| "epoch": 8.27, |
| "learning_rate": 6.003173883473137e-07, |
| "loss": 1.5077, |
| "step": 36500 |
| }, |
| { |
| "epoch": 8.39, |
| "learning_rate": 5.67445023804126e-07, |
| "loss": 1.5079, |
| "step": 37000 |
| }, |
| { |
| "epoch": 8.5, |
| "learning_rate": 5.345726592609387e-07, |
| "loss": 1.5073, |
| "step": 37500 |
| }, |
| { |
| "epoch": 8.61, |
| "learning_rate": 5.017002947177512e-07, |
| "loss": 1.5072, |
| "step": 38000 |
| }, |
| { |
| "epoch": 8.73, |
| "learning_rate": 4.688279301745635e-07, |
| "loss": 1.5073, |
| "step": 38500 |
| }, |
| { |
| "epoch": 8.84, |
| "learning_rate": 4.3595556563137624e-07, |
| "loss": 1.5072, |
| "step": 39000 |
| }, |
| { |
| "epoch": 8.95, |
| "learning_rate": 4.030832010881886e-07, |
| "loss": 1.5071, |
| "step": 39500 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_loss": 1.5136668682098389, |
| "eval_runtime": 95.2932, |
| "eval_samples_per_second": 299.234, |
| "eval_steps_per_second": 1.878, |
| "step": 39701 |
| }, |
| { |
| "epoch": 9.07, |
| "learning_rate": 3.70210836545001e-07, |
| "loss": 1.5066, |
| "step": 40000 |
| }, |
| { |
| "epoch": 9.18, |
| "learning_rate": 3.3733847200181375e-07, |
| "loss": 1.5074, |
| "step": 40500 |
| }, |
| { |
| "epoch": 9.29, |
| "learning_rate": 3.044661074586261e-07, |
| "loss": 1.5069, |
| "step": 41000 |
| }, |
| { |
| "epoch": 9.41, |
| "learning_rate": 2.7159374291543883e-07, |
| "loss": 1.5069, |
| "step": 41500 |
| }, |
| { |
| "epoch": 9.52, |
| "learning_rate": 2.387213783722512e-07, |
| "loss": 1.5067, |
| "step": 42000 |
| }, |
| { |
| "epoch": 9.63, |
| "learning_rate": 2.0584901382906362e-07, |
| "loss": 1.5067, |
| "step": 42500 |
| }, |
| { |
| "epoch": 9.75, |
| "learning_rate": 1.7297664928587634e-07, |
| "loss": 1.5066, |
| "step": 43000 |
| }, |
| { |
| "epoch": 9.86, |
| "learning_rate": 1.4010428474268872e-07, |
| "loss": 1.5072, |
| "step": 43500 |
| }, |
| { |
| "epoch": 9.97, |
| "learning_rate": 1.0723192019950112e-07, |
| "loss": 1.507, |
| "step": 44000 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_loss": 1.5134241580963135, |
| "eval_runtime": 95.477, |
| "eval_samples_per_second": 298.658, |
| "eval_steps_per_second": 1.875, |
| "step": 44110 |
| }, |
| { |
| "epoch": 10.0, |
| "step": 44110, |
| "total_flos": 7.740642541156762e+17, |
| "train_loss": 1.5161892493331657, |
| "train_runtime": 101125.7197, |
| "train_samples_per_second": 279.163, |
| "train_steps_per_second": 0.436 |
| } |
| ], |
| "max_steps": 44110, |
| "num_train_epochs": 10, |
| "total_flos": 7.740642541156762e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|