| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 9.999433267214508, | |
| "global_step": 44110, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 2.9671276354568124e-06, | |
| "loss": 1.5714, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 2.934255270913625e-06, | |
| "loss": 1.5492, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 2.9013829063704375e-06, | |
| "loss": 1.5441, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.8685105418272502e-06, | |
| "loss": 1.5406, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.8356381772840626e-06, | |
| "loss": 1.5376, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 2.802765812740875e-06, | |
| "loss": 1.5358, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.7698934481976876e-06, | |
| "loss": 1.5342, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.7370210836545004e-06, | |
| "loss": 1.5329, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 1.5299354791641235, | |
| "eval_runtime": 95.1335, | |
| "eval_samples_per_second": 299.737, | |
| "eval_steps_per_second": 1.882, | |
| "step": 4411 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 2.7041487191113127e-06, | |
| "loss": 1.5308, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 2.6712763545681255e-06, | |
| "loss": 1.5293, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.638403990024938e-06, | |
| "loss": 1.5287, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 2.60553162548175e-06, | |
| "loss": 1.5277, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.572659260938563e-06, | |
| "loss": 1.5267, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.539786896395375e-06, | |
| "loss": 1.5256, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.506914531852188e-06, | |
| "loss": 1.525, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 2.4740421673090003e-06, | |
| "loss": 1.5246, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 2.4411698027658126e-06, | |
| "loss": 1.5238, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 1.5252745151519775, | |
| "eval_runtime": 95.3162, | |
| "eval_samples_per_second": 299.162, | |
| "eval_steps_per_second": 1.878, | |
| "step": 8822 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 2.4082974382226254e-06, | |
| "loss": 1.5238, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 2.3754250736794377e-06, | |
| "loss": 1.5222, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 2.34255270913625e-06, | |
| "loss": 1.5216, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 2.3096803445930628e-06, | |
| "loss": 1.5208, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 2.2768079800498755e-06, | |
| "loss": 1.5204, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 2.243935615506688e-06, | |
| "loss": 1.5202, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 2.2110632509635006e-06, | |
| "loss": 1.5195, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 2.178190886420313e-06, | |
| "loss": 1.5193, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 2.1453185218771257e-06, | |
| "loss": 1.5183, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 1.5211608409881592, | |
| "eval_runtime": 96.0051, | |
| "eval_samples_per_second": 297.015, | |
| "eval_steps_per_second": 1.864, | |
| "step": 13233 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 2.112446157333938e-06, | |
| "loss": 1.5179, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 2.0795737927907503e-06, | |
| "loss": 1.5175, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 2.0467014282475627e-06, | |
| "loss": 1.5173, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 2.0138290637043754e-06, | |
| "loss": 1.5166, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 1.980956699161188e-06, | |
| "loss": 1.5166, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 1.9480843346180005e-06, | |
| "loss": 1.5163, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 1.9152119700748132e-06, | |
| "loss": 1.5158, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "learning_rate": 1.8823396055316256e-06, | |
| "loss": 1.5156, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "learning_rate": 1.8494672409884385e-06, | |
| "loss": 1.5152, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 1.5186687707901, | |
| "eval_runtime": 95.8953, | |
| "eval_samples_per_second": 297.356, | |
| "eval_steps_per_second": 1.867, | |
| "step": 17645 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 1.8165948764452506e-06, | |
| "loss": 1.5147, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 1.7837225119020632e-06, | |
| "loss": 1.5137, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 1.7508501473588757e-06, | |
| "loss": 1.5138, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 1.717977782815688e-06, | |
| "loss": 1.5134, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 1.6851054182725004e-06, | |
| "loss": 1.5136, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 1.6522330537293131e-06, | |
| "loss": 1.5135, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "learning_rate": 1.6193606891861259e-06, | |
| "loss": 1.5128, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 1.5864883246429384e-06, | |
| "loss": 1.5133, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 1.5536159600997505e-06, | |
| "loss": 1.5127, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 1.5171175003051758, | |
| "eval_runtime": 94.991, | |
| "eval_samples_per_second": 300.186, | |
| "eval_steps_per_second": 1.884, | |
| "step": 22056 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "learning_rate": 1.520743595556563e-06, | |
| "loss": 1.5109, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 5.21, | |
| "learning_rate": 1.4878712310133756e-06, | |
| "loss": 1.512, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 1.454998866470188e-06, | |
| "loss": 1.5115, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 1.4221265019270007e-06, | |
| "loss": 1.5113, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 5.55, | |
| "learning_rate": 1.3892541373838134e-06, | |
| "loss": 1.511, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 5.67, | |
| "learning_rate": 1.3563817728406258e-06, | |
| "loss": 1.5112, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 5.78, | |
| "learning_rate": 1.3235094082974385e-06, | |
| "loss": 1.5106, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "learning_rate": 1.2906370437542509e-06, | |
| "loss": 1.5105, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 1.51548171043396, | |
| "eval_runtime": 95.4859, | |
| "eval_samples_per_second": 298.631, | |
| "eval_steps_per_second": 1.875, | |
| "step": 26467 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "learning_rate": 1.2577646792110636e-06, | |
| "loss": 1.511, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 1.224892314667876e-06, | |
| "loss": 1.5103, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 6.23, | |
| "learning_rate": 1.1920199501246883e-06, | |
| "loss": 1.5095, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 6.35, | |
| "learning_rate": 1.159147585581501e-06, | |
| "loss": 1.5096, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "learning_rate": 1.1262752210383133e-06, | |
| "loss": 1.5099, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 6.57, | |
| "learning_rate": 1.0934028564951257e-06, | |
| "loss": 1.5096, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "learning_rate": 1.0605304919519384e-06, | |
| "loss": 1.5091, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "learning_rate": 1.0276581274087507e-06, | |
| "loss": 1.5096, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "learning_rate": 9.947857628655633e-07, | |
| "loss": 1.5087, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 1.5147736072540283, | |
| "eval_runtime": 95.3411, | |
| "eval_samples_per_second": 299.084, | |
| "eval_steps_per_second": 1.877, | |
| "step": 30878 | |
| }, | |
| { | |
| "epoch": 7.03, | |
| "learning_rate": 9.61913398322376e-07, | |
| "loss": 1.5093, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "learning_rate": 9.290410337791883e-07, | |
| "loss": 1.5085, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "learning_rate": 8.96168669236001e-07, | |
| "loss": 1.5082, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "learning_rate": 8.632963046928134e-07, | |
| "loss": 1.5082, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 7.48, | |
| "learning_rate": 8.304239401496259e-07, | |
| "loss": 1.5082, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "learning_rate": 7.975515756064386e-07, | |
| "loss": 1.5084, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "learning_rate": 7.64679211063251e-07, | |
| "loss": 1.5083, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 7.82, | |
| "learning_rate": 7.318068465200634e-07, | |
| "loss": 1.5084, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "learning_rate": 6.989344819768761e-07, | |
| "loss": 1.5078, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 1.5141184329986572, | |
| "eval_runtime": 95.3059, | |
| "eval_samples_per_second": 299.195, | |
| "eval_steps_per_second": 1.878, | |
| "step": 35290 | |
| }, | |
| { | |
| "epoch": 8.05, | |
| "learning_rate": 6.660621174336885e-07, | |
| "loss": 1.5083, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 8.16, | |
| "learning_rate": 6.331897528905012e-07, | |
| "loss": 1.5078, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 8.27, | |
| "learning_rate": 6.003173883473137e-07, | |
| "loss": 1.5077, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "learning_rate": 5.67445023804126e-07, | |
| "loss": 1.5079, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 8.5, | |
| "learning_rate": 5.345726592609387e-07, | |
| "loss": 1.5073, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 8.61, | |
| "learning_rate": 5.017002947177512e-07, | |
| "loss": 1.5072, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 8.73, | |
| "learning_rate": 4.688279301745635e-07, | |
| "loss": 1.5073, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 8.84, | |
| "learning_rate": 4.3595556563137624e-07, | |
| "loss": 1.5072, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 8.95, | |
| "learning_rate": 4.030832010881886e-07, | |
| "loss": 1.5071, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 1.5136668682098389, | |
| "eval_runtime": 95.2932, | |
| "eval_samples_per_second": 299.234, | |
| "eval_steps_per_second": 1.878, | |
| "step": 39701 | |
| }, | |
| { | |
| "epoch": 9.07, | |
| "learning_rate": 3.70210836545001e-07, | |
| "loss": 1.5066, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 9.18, | |
| "learning_rate": 3.3733847200181375e-07, | |
| "loss": 1.5074, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 9.29, | |
| "learning_rate": 3.044661074586261e-07, | |
| "loss": 1.5069, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 9.41, | |
| "learning_rate": 2.7159374291543883e-07, | |
| "loss": 1.5069, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 9.52, | |
| "learning_rate": 2.387213783722512e-07, | |
| "loss": 1.5067, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 9.63, | |
| "learning_rate": 2.0584901382906362e-07, | |
| "loss": 1.5067, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 9.75, | |
| "learning_rate": 1.7297664928587634e-07, | |
| "loss": 1.5066, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 9.86, | |
| "learning_rate": 1.4010428474268872e-07, | |
| "loss": 1.5072, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 9.97, | |
| "learning_rate": 1.0723192019950112e-07, | |
| "loss": 1.507, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 1.5134241580963135, | |
| "eval_runtime": 95.477, | |
| "eval_samples_per_second": 298.658, | |
| "eval_steps_per_second": 1.875, | |
| "step": 44110 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 44110, | |
| "total_flos": 7.740642541156762e+17, | |
| "train_loss": 1.5161892493331657, | |
| "train_runtime": 101125.7197, | |
| "train_samples_per_second": 279.163, | |
| "train_steps_per_second": 0.436 | |
| } | |
| ], | |
| "max_steps": 44110, | |
| "num_train_epochs": 10, | |
| "total_flos": 7.740642541156762e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |