| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 21.693934737413, | |
| "global_step": 30000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.0002777777777777778, | |
| "loss": 7.7201, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.0005555555555555556, | |
| "loss": 6.4428, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.0008333333333333334, | |
| "loss": 5.9072, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 0.0009929078014184398, | |
| "loss": 3.7123, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 0.000975177304964539, | |
| "loss": 2.6271, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 0.0009574468085106384, | |
| "loss": 2.2823, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 0.0009397163120567376, | |
| "loss": 2.1231, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 0.0009219858156028368, | |
| "loss": 2.0297, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 0.0009042553191489362, | |
| "loss": 1.9652, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 0.0008865248226950354, | |
| "loss": 1.9186, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 0.0008687943262411348, | |
| "loss": 1.8837, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "learning_rate": 0.000851063829787234, | |
| "loss": 1.8546, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 0.0008333333333333334, | |
| "loss": 1.8322, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "learning_rate": 0.0008156028368794326, | |
| "loss": 1.8122, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "learning_rate": 0.0007978723404255319, | |
| "loss": 1.7951, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "learning_rate": 0.0007801418439716312, | |
| "loss": 1.7814, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "learning_rate": 0.0007624113475177306, | |
| "loss": 1.7679, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 6.51, | |
| "learning_rate": 0.0007446808510638298, | |
| "loss": 1.756, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 6.87, | |
| "learning_rate": 0.0007269503546099291, | |
| "loss": 1.7466, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 7.23, | |
| "learning_rate": 0.0007092198581560284, | |
| "loss": 1.7355, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "learning_rate": 0.0006914893617021278, | |
| "loss": 1.7268, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 7.95, | |
| "learning_rate": 0.0006737588652482269, | |
| "loss": 1.7197, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 8.32, | |
| "learning_rate": 0.0006560283687943263, | |
| "loss": 1.7111, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 8.68, | |
| "learning_rate": 0.0006382978723404256, | |
| "loss": 1.7043, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 9.04, | |
| "learning_rate": 0.0006205673758865247, | |
| "loss": 1.6979, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 9.4, | |
| "learning_rate": 0.0006028368794326241, | |
| "loss": 1.6909, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 9.76, | |
| "learning_rate": 0.0005851063829787234, | |
| "loss": 1.6856, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 10.12, | |
| "learning_rate": 0.0005673758865248228, | |
| "loss": 1.6796, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 10.49, | |
| "learning_rate": 0.0005496453900709219, | |
| "loss": 1.6744, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 10.85, | |
| "learning_rate": 0.0005319148936170213, | |
| "loss": 1.6699, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 11.21, | |
| "learning_rate": 0.0005141843971631206, | |
| "loss": 1.6648, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 11.57, | |
| "learning_rate": 0.0004964539007092199, | |
| "loss": 1.6595, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 11.93, | |
| "learning_rate": 0.0004787234042553192, | |
| "loss": 1.6553, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 12.29, | |
| "learning_rate": 0.0004609929078014184, | |
| "loss": 1.6511, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 12.65, | |
| "learning_rate": 0.0004432624113475177, | |
| "loss": 1.6466, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 13.02, | |
| "learning_rate": 0.000425531914893617, | |
| "loss": 1.6431, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 13.38, | |
| "learning_rate": 0.0004078014184397163, | |
| "loss": 1.6386, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 13.74, | |
| "learning_rate": 0.0003900709219858156, | |
| "loss": 1.6353, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 14.1, | |
| "learning_rate": 0.0003723404255319149, | |
| "loss": 1.6307, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 14.46, | |
| "learning_rate": 0.0003546099290780142, | |
| "loss": 1.6279, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 14.82, | |
| "learning_rate": 0.00033687943262411345, | |
| "loss": 1.6241, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 15.19, | |
| "learning_rate": 0.0003191489361702128, | |
| "loss": 1.6215, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 15.55, | |
| "learning_rate": 0.00030141843971631205, | |
| "loss": 1.6178, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 15.91, | |
| "learning_rate": 0.0002836879432624114, | |
| "loss": 1.6149, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 16.27, | |
| "learning_rate": 0.00026595744680851064, | |
| "loss": 1.6118, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 16.63, | |
| "learning_rate": 0.00024822695035460994, | |
| "loss": 1.6086, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 16.99, | |
| "learning_rate": 0.0002304964539007092, | |
| "loss": 1.6064, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 17.36, | |
| "learning_rate": 0.0002127659574468085, | |
| "loss": 1.6028, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 17.72, | |
| "learning_rate": 0.0001950354609929078, | |
| "loss": 1.5994, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 18.08, | |
| "learning_rate": 0.0001773049645390071, | |
| "loss": 1.5971, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 18.44, | |
| "learning_rate": 0.0001595744680851064, | |
| "loss": 1.5938, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 18.8, | |
| "learning_rate": 0.0001418439716312057, | |
| "loss": 1.5922, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 19.16, | |
| "learning_rate": 0.00012411347517730497, | |
| "loss": 1.589, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 19.52, | |
| "learning_rate": 0.00010638297872340425, | |
| "loss": 1.5867, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 19.89, | |
| "learning_rate": 8.865248226950355e-05, | |
| "loss": 1.5839, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 20.25, | |
| "learning_rate": 7.092198581560285e-05, | |
| "loss": 1.5811, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 20.61, | |
| "learning_rate": 5.319148936170213e-05, | |
| "loss": 1.5795, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 20.97, | |
| "learning_rate": 3.5460992907801425e-05, | |
| "loss": 1.577, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 21.33, | |
| "learning_rate": 1.7730496453900712e-05, | |
| "loss": 1.575, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 21.69, | |
| "learning_rate": 0.0, | |
| "loss": 1.5733, | |
| "step": 30000 | |
| } | |
| ], | |
| "max_steps": 30000, | |
| "num_train_epochs": 22, | |
| "total_flos": 1.9277555263183258e+19, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |