{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.999433267214508, "global_step": 44110, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "learning_rate": 2.9671276354568124e-06, "loss": 1.5714, "step": 500 }, { "epoch": 0.23, "learning_rate": 2.934255270913625e-06, "loss": 1.5492, "step": 1000 }, { "epoch": 0.34, "learning_rate": 2.9013829063704375e-06, "loss": 1.5441, "step": 1500 }, { "epoch": 0.45, "learning_rate": 2.8685105418272502e-06, "loss": 1.5406, "step": 2000 }, { "epoch": 0.57, "learning_rate": 2.8356381772840626e-06, "loss": 1.5376, "step": 2500 }, { "epoch": 0.68, "learning_rate": 2.802765812740875e-06, "loss": 1.5358, "step": 3000 }, { "epoch": 0.79, "learning_rate": 2.7698934481976876e-06, "loss": 1.5342, "step": 3500 }, { "epoch": 0.91, "learning_rate": 2.7370210836545004e-06, "loss": 1.5329, "step": 4000 }, { "epoch": 1.0, "eval_loss": 1.5299354791641235, "eval_runtime": 95.1335, "eval_samples_per_second": 299.737, "eval_steps_per_second": 1.882, "step": 4411 }, { "epoch": 1.02, "learning_rate": 2.7041487191113127e-06, "loss": 1.5308, "step": 4500 }, { "epoch": 1.13, "learning_rate": 2.6712763545681255e-06, "loss": 1.5293, "step": 5000 }, { "epoch": 1.25, "learning_rate": 2.638403990024938e-06, "loss": 1.5287, "step": 5500 }, { "epoch": 1.36, "learning_rate": 2.60553162548175e-06, "loss": 1.5277, "step": 6000 }, { "epoch": 1.47, "learning_rate": 2.572659260938563e-06, "loss": 1.5267, "step": 6500 }, { "epoch": 1.59, "learning_rate": 2.539786896395375e-06, "loss": 1.5256, "step": 7000 }, { "epoch": 1.7, "learning_rate": 2.506914531852188e-06, "loss": 1.525, "step": 7500 }, { "epoch": 1.81, "learning_rate": 2.4740421673090003e-06, "loss": 1.5246, "step": 8000 }, { "epoch": 1.93, "learning_rate": 2.4411698027658126e-06, "loss": 1.5238, "step": 8500 }, { "epoch": 2.0, "eval_loss": 1.5252745151519775, "eval_runtime": 95.3162, "eval_samples_per_second": 299.162, "eval_steps_per_second": 1.878, "step": 8822 }, { "epoch": 2.04, "learning_rate": 2.4082974382226254e-06, "loss": 1.5238, "step": 9000 }, { "epoch": 2.15, "learning_rate": 2.3754250736794377e-06, "loss": 1.5222, "step": 9500 }, { "epoch": 2.27, "learning_rate": 2.34255270913625e-06, "loss": 1.5216, "step": 10000 }, { "epoch": 2.38, "learning_rate": 2.3096803445930628e-06, "loss": 1.5208, "step": 10500 }, { "epoch": 2.49, "learning_rate": 2.2768079800498755e-06, "loss": 1.5204, "step": 11000 }, { "epoch": 2.61, "learning_rate": 2.243935615506688e-06, "loss": 1.5202, "step": 11500 }, { "epoch": 2.72, "learning_rate": 2.2110632509635006e-06, "loss": 1.5195, "step": 12000 }, { "epoch": 2.83, "learning_rate": 2.178190886420313e-06, "loss": 1.5193, "step": 12500 }, { "epoch": 2.95, "learning_rate": 2.1453185218771257e-06, "loss": 1.5183, "step": 13000 }, { "epoch": 3.0, "eval_loss": 1.5211608409881592, "eval_runtime": 96.0051, "eval_samples_per_second": 297.015, "eval_steps_per_second": 1.864, "step": 13233 }, { "epoch": 3.06, "learning_rate": 2.112446157333938e-06, "loss": 1.5179, "step": 13500 }, { "epoch": 3.17, "learning_rate": 2.0795737927907503e-06, "loss": 1.5175, "step": 14000 }, { "epoch": 3.29, "learning_rate": 2.0467014282475627e-06, "loss": 1.5173, "step": 14500 }, { "epoch": 3.4, "learning_rate": 2.0138290637043754e-06, "loss": 1.5166, "step": 15000 }, { "epoch": 3.51, "learning_rate": 1.980956699161188e-06, "loss": 1.5166, "step": 15500 }, { "epoch": 3.63, "learning_rate": 1.9480843346180005e-06, "loss": 1.5163, "step": 16000 }, { "epoch": 3.74, "learning_rate": 1.9152119700748132e-06, "loss": 1.5158, "step": 16500 }, { "epoch": 3.85, "learning_rate": 1.8823396055316256e-06, "loss": 1.5156, "step": 17000 }, { "epoch": 3.97, "learning_rate": 1.8494672409884385e-06, "loss": 1.5152, "step": 17500 }, { "epoch": 4.0, "eval_loss": 1.5186687707901, "eval_runtime": 95.8953, "eval_samples_per_second": 297.356, "eval_steps_per_second": 1.867, "step": 17645 }, { "epoch": 4.08, "learning_rate": 1.8165948764452506e-06, "loss": 1.5147, "step": 18000 }, { "epoch": 4.19, "learning_rate": 1.7837225119020632e-06, "loss": 1.5137, "step": 18500 }, { "epoch": 4.31, "learning_rate": 1.7508501473588757e-06, "loss": 1.5138, "step": 19000 }, { "epoch": 4.42, "learning_rate": 1.717977782815688e-06, "loss": 1.5134, "step": 19500 }, { "epoch": 4.53, "learning_rate": 1.6851054182725004e-06, "loss": 1.5136, "step": 20000 }, { "epoch": 4.65, "learning_rate": 1.6522330537293131e-06, "loss": 1.5135, "step": 20500 }, { "epoch": 4.76, "learning_rate": 1.6193606891861259e-06, "loss": 1.5128, "step": 21000 }, { "epoch": 4.87, "learning_rate": 1.5864883246429384e-06, "loss": 1.5133, "step": 21500 }, { "epoch": 4.99, "learning_rate": 1.5536159600997505e-06, "loss": 1.5127, "step": 22000 }, { "epoch": 5.0, "eval_loss": 1.5171175003051758, "eval_runtime": 94.991, "eval_samples_per_second": 300.186, "eval_steps_per_second": 1.884, "step": 22056 }, { "epoch": 5.1, "learning_rate": 1.520743595556563e-06, "loss": 1.5109, "step": 22500 }, { "epoch": 5.21, "learning_rate": 1.4878712310133756e-06, "loss": 1.512, "step": 23000 }, { "epoch": 5.33, "learning_rate": 1.454998866470188e-06, "loss": 1.5115, "step": 23500 }, { "epoch": 5.44, "learning_rate": 1.4221265019270007e-06, "loss": 1.5113, "step": 24000 }, { "epoch": 5.55, "learning_rate": 1.3892541373838134e-06, "loss": 1.511, "step": 24500 }, { "epoch": 5.67, "learning_rate": 1.3563817728406258e-06, "loss": 1.5112, "step": 25000 }, { "epoch": 5.78, "learning_rate": 1.3235094082974385e-06, "loss": 1.5106, "step": 25500 }, { "epoch": 5.89, "learning_rate": 1.2906370437542509e-06, "loss": 1.5105, "step": 26000 }, { "epoch": 6.0, "eval_loss": 1.51548171043396, "eval_runtime": 95.4859, "eval_samples_per_second": 298.631, "eval_steps_per_second": 1.875, "step": 26467 }, { "epoch": 6.01, "learning_rate": 1.2577646792110636e-06, "loss": 1.511, "step": 26500 }, { "epoch": 6.12, "learning_rate": 1.224892314667876e-06, "loss": 1.5103, "step": 27000 }, { "epoch": 6.23, "learning_rate": 1.1920199501246883e-06, "loss": 1.5095, "step": 27500 }, { "epoch": 6.35, "learning_rate": 1.159147585581501e-06, "loss": 1.5096, "step": 28000 }, { "epoch": 6.46, "learning_rate": 1.1262752210383133e-06, "loss": 1.5099, "step": 28500 }, { "epoch": 6.57, "learning_rate": 1.0934028564951257e-06, "loss": 1.5096, "step": 29000 }, { "epoch": 6.69, "learning_rate": 1.0605304919519384e-06, "loss": 1.5091, "step": 29500 }, { "epoch": 6.8, "learning_rate": 1.0276581274087507e-06, "loss": 1.5096, "step": 30000 }, { "epoch": 6.91, "learning_rate": 9.947857628655633e-07, "loss": 1.5087, "step": 30500 }, { "epoch": 7.0, "eval_loss": 1.5147736072540283, "eval_runtime": 95.3411, "eval_samples_per_second": 299.084, "eval_steps_per_second": 1.877, "step": 30878 }, { "epoch": 7.03, "learning_rate": 9.61913398322376e-07, "loss": 1.5093, "step": 31000 }, { "epoch": 7.14, "learning_rate": 9.290410337791883e-07, "loss": 1.5085, "step": 31500 }, { "epoch": 7.25, "learning_rate": 8.96168669236001e-07, "loss": 1.5082, "step": 32000 }, { "epoch": 7.37, "learning_rate": 8.632963046928134e-07, "loss": 1.5082, "step": 32500 }, { "epoch": 7.48, "learning_rate": 8.304239401496259e-07, "loss": 1.5082, "step": 33000 }, { "epoch": 7.59, "learning_rate": 7.975515756064386e-07, "loss": 1.5084, "step": 33500 }, { "epoch": 7.71, "learning_rate": 7.64679211063251e-07, "loss": 1.5083, "step": 34000 }, { "epoch": 7.82, "learning_rate": 7.318068465200634e-07, "loss": 1.5084, "step": 34500 }, { "epoch": 7.93, "learning_rate": 6.989344819768761e-07, "loss": 1.5078, "step": 35000 }, { "epoch": 8.0, "eval_loss": 1.5141184329986572, "eval_runtime": 95.3059, "eval_samples_per_second": 299.195, "eval_steps_per_second": 1.878, "step": 35290 }, { "epoch": 8.05, "learning_rate": 6.660621174336885e-07, "loss": 1.5083, "step": 35500 }, { "epoch": 8.16, "learning_rate": 6.331897528905012e-07, "loss": 1.5078, "step": 36000 }, { "epoch": 8.27, "learning_rate": 6.003173883473137e-07, "loss": 1.5077, "step": 36500 }, { "epoch": 8.39, "learning_rate": 5.67445023804126e-07, "loss": 1.5079, "step": 37000 }, { "epoch": 8.5, "learning_rate": 5.345726592609387e-07, "loss": 1.5073, "step": 37500 }, { "epoch": 8.61, "learning_rate": 5.017002947177512e-07, "loss": 1.5072, "step": 38000 }, { "epoch": 8.73, "learning_rate": 4.688279301745635e-07, "loss": 1.5073, "step": 38500 }, { "epoch": 8.84, "learning_rate": 4.3595556563137624e-07, "loss": 1.5072, "step": 39000 }, { "epoch": 8.95, "learning_rate": 4.030832010881886e-07, "loss": 1.5071, "step": 39500 }, { "epoch": 9.0, "eval_loss": 1.5136668682098389, "eval_runtime": 95.2932, "eval_samples_per_second": 299.234, "eval_steps_per_second": 1.878, "step": 39701 }, { "epoch": 9.07, "learning_rate": 3.70210836545001e-07, "loss": 1.5066, "step": 40000 }, { "epoch": 9.18, "learning_rate": 3.3733847200181375e-07, "loss": 1.5074, "step": 40500 }, { "epoch": 9.29, "learning_rate": 3.044661074586261e-07, "loss": 1.5069, "step": 41000 }, { "epoch": 9.41, "learning_rate": 2.7159374291543883e-07, "loss": 1.5069, "step": 41500 }, { "epoch": 9.52, "learning_rate": 2.387213783722512e-07, "loss": 1.5067, "step": 42000 }, { "epoch": 9.63, "learning_rate": 2.0584901382906362e-07, "loss": 1.5067, "step": 42500 }, { "epoch": 9.75, "learning_rate": 1.7297664928587634e-07, "loss": 1.5066, "step": 43000 }, { "epoch": 9.86, "learning_rate": 1.4010428474268872e-07, "loss": 1.5072, "step": 43500 }, { "epoch": 9.97, "learning_rate": 1.0723192019950112e-07, "loss": 1.507, "step": 44000 }, { "epoch": 10.0, "eval_loss": 1.5134241580963135, "eval_runtime": 95.477, "eval_samples_per_second": 298.658, "eval_steps_per_second": 1.875, "step": 44110 }, { "epoch": 10.0, "step": 44110, "total_flos": 7.740642541156762e+17, "train_loss": 1.5161892493331657, "train_runtime": 101125.7197, "train_samples_per_second": 279.163, "train_steps_per_second": 0.436 } ], "max_steps": 44110, "num_train_epochs": 10, "total_flos": 7.740642541156762e+17, "trial_name": null, "trial_params": null }