| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 6.0, | |
| "global_step": 34848, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.92825987144169e-05, | |
| "loss": 3.4212, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.85651974288338e-05, | |
| "loss": 3.323, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.784779614325069e-05, | |
| "loss": 3.2791, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.7130394857667584e-05, | |
| "loss": 3.2457, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.6412993572084486e-05, | |
| "loss": 3.2237, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.569559228650138e-05, | |
| "loss": 3.1983, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.4978191000918276e-05, | |
| "loss": 3.195, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.426078971533517e-05, | |
| "loss": 3.1841, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.354338842975207e-05, | |
| "loss": 3.1802, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.282598714416897e-05, | |
| "loss": 3.1674, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.210858585858586e-05, | |
| "loss": 3.1341, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 4.139118457300275e-05, | |
| "loss": 3.101, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 4.0673783287419655e-05, | |
| "loss": 3.0298, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 3.995638200183655e-05, | |
| "loss": 3.0358, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 3.9238980716253446e-05, | |
| "loss": 3.0531, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 3.852157943067034e-05, | |
| "loss": 3.0409, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 3.7804178145087236e-05, | |
| "loss": 3.0291, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 3.708677685950414e-05, | |
| "loss": 3.0346, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 3.6369375573921034e-05, | |
| "loss": 3.0356, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 3.565197428833792e-05, | |
| "loss": 3.0163, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 3.4934573002754824e-05, | |
| "loss": 3.026, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 3.421717171717172e-05, | |
| "loss": 3.0223, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 3.3499770431588615e-05, | |
| "loss": 3.0286, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 3.278236914600551e-05, | |
| "loss": 2.9479, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 3.2064967860422406e-05, | |
| "loss": 2.9327, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 3.134756657483931e-05, | |
| "loss": 2.943, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 3.06301652892562e-05, | |
| "loss": 2.9277, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 2.9912764003673095e-05, | |
| "loss": 2.9443, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 2.919536271808999e-05, | |
| "loss": 2.9452, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 2.847796143250689e-05, | |
| "loss": 2.9482, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 2.7760560146923788e-05, | |
| "loss": 2.9429, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 2.704315886134068e-05, | |
| "loss": 2.9463, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 2.6325757575757575e-05, | |
| "loss": 2.9292, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 2.5608356290174473e-05, | |
| "loss": 2.9286, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 2.489095500459137e-05, | |
| "loss": 2.9312, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 2.4173553719008264e-05, | |
| "loss": 2.8712, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 2.3456152433425163e-05, | |
| "loss": 2.8583, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 2.2738751147842058e-05, | |
| "loss": 2.882, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 2.2021349862258957e-05, | |
| "loss": 2.857, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 2.130394857667585e-05, | |
| "loss": 2.8815, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "learning_rate": 2.0586547291092747e-05, | |
| "loss": 2.8775, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 1.9869146005509643e-05, | |
| "loss": 2.8606, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 1.915174471992654e-05, | |
| "loss": 2.8845, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 1.8434343434343433e-05, | |
| "loss": 2.8692, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 1.7716942148760332e-05, | |
| "loss": 2.8829, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 1.6999540863177227e-05, | |
| "loss": 2.8782, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "learning_rate": 1.6282139577594123e-05, | |
| "loss": 2.8401, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 1.5564738292011018e-05, | |
| "loss": 2.822, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "learning_rate": 1.4847337006427917e-05, | |
| "loss": 2.8197, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 1.4129935720844812e-05, | |
| "loss": 2.8227, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "learning_rate": 1.3412534435261709e-05, | |
| "loss": 2.8154, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 1.2695133149678604e-05, | |
| "loss": 2.8247, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "learning_rate": 1.1977731864095501e-05, | |
| "loss": 2.8196, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 1.1260330578512397e-05, | |
| "loss": 2.8357, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "learning_rate": 1.0542929292929294e-05, | |
| "loss": 2.8163, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 9.825528007346189e-06, | |
| "loss": 2.8381, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 9.108126721763086e-06, | |
| "loss": 2.8198, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 8.390725436179981e-06, | |
| "loss": 2.8302, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 5.08, | |
| "learning_rate": 7.673324150596878e-06, | |
| "loss": 2.7897, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 5.17, | |
| "learning_rate": 6.955922865013774e-06, | |
| "loss": 2.7858, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 5.25, | |
| "learning_rate": 6.238521579430671e-06, | |
| "loss": 2.7865, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 5.34, | |
| "learning_rate": 5.521120293847567e-06, | |
| "loss": 2.7889, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "learning_rate": 4.803719008264463e-06, | |
| "loss": 2.7797, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "learning_rate": 4.086317722681359e-06, | |
| "loss": 2.7869, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "learning_rate": 3.3689164370982553e-06, | |
| "loss": 2.7922, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "learning_rate": 2.651515151515152e-06, | |
| "loss": 2.8032, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 5.77, | |
| "learning_rate": 1.934113865932048e-06, | |
| "loss": 2.7963, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 5.85, | |
| "learning_rate": 1.216712580348944e-06, | |
| "loss": 2.7952, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "learning_rate": 4.993112947658403e-07, | |
| "loss": 2.7968, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "step": 34848, | |
| "total_flos": 1.8211009462272e+16, | |
| "train_loss": 2.9473999731030567, | |
| "train_runtime": 8951.9422, | |
| "train_samples_per_second": 3.893, | |
| "train_steps_per_second": 3.893 | |
| } | |
| ], | |
| "max_steps": 34848, | |
| "num_train_epochs": 6, | |
| "total_flos": 1.8211009462272e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |