{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.944110060189166, "global_step": 23000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "learning_rate": 9.787188306104902e-05, "loss": 1.1754, "step": 500 }, { "epoch": 0.21, "learning_rate": 9.572226999140155e-05, "loss": 0.5467, "step": 1000 }, { "epoch": 0.32, "learning_rate": 9.357265692175409e-05, "loss": 0.5189, "step": 1500 }, { "epoch": 0.43, "learning_rate": 9.142304385210663e-05, "loss": 0.5128, "step": 2000 }, { "epoch": 0.54, "learning_rate": 8.927343078245917e-05, "loss": 0.5021, "step": 2500 }, { "epoch": 0.64, "learning_rate": 8.71238177128117e-05, "loss": 0.5089, "step": 3000 }, { "epoch": 0.75, "learning_rate": 8.497420464316423e-05, "loss": 0.4957, "step": 3500 }, { "epoch": 0.86, "learning_rate": 8.282459157351677e-05, "loss": 0.4943, "step": 4000 }, { "epoch": 0.97, "learning_rate": 8.06749785038693e-05, "loss": 0.4853, "step": 4500 }, { "epoch": 1.0, "eval_gen_len": 23.4555, "eval_loss": 0.4496892988681793, "eval_rouge1": 73.323, "eval_rouge2": 54.7538, "eval_rougeL": 68.0299, "eval_rougeLsum": 68.0246, "eval_runtime": 375.6913, "eval_samples_per_second": 0.508, "eval_steps_per_second": 0.016, "step": 4652 }, { "epoch": 1.07, "learning_rate": 7.852536543422185e-05, "loss": 0.4764, "step": 5000 }, { "epoch": 1.18, "learning_rate": 7.637575236457438e-05, "loss": 0.4661, "step": 5500 }, { "epoch": 1.29, "learning_rate": 7.422613929492692e-05, "loss": 0.4562, "step": 6000 }, { "epoch": 1.4, "learning_rate": 7.207652622527945e-05, "loss": 0.4591, "step": 6500 }, { "epoch": 1.5, "learning_rate": 6.9926913155632e-05, "loss": 0.46, "step": 7000 }, { "epoch": 1.61, "learning_rate": 6.777730008598453e-05, "loss": 0.462, "step": 7500 }, { "epoch": 1.72, "learning_rate": 6.562768701633706e-05, "loss": 0.457, "step": 8000 }, { "epoch": 1.83, "learning_rate": 6.34780739466896e-05, "loss": 0.4473, "step": 8500 }, { "epoch": 1.93, "learning_rate": 6.132846087704214e-05, "loss": 0.4536, "step": 9000 }, { "epoch": 2.0, "eval_gen_len": 23.089, "eval_loss": 0.4476110339164734, "eval_rouge1": 73.0167, "eval_rouge2": 54.5451, "eval_rougeL": 67.782, "eval_rougeLsum": 67.7971, "eval_runtime": 375.2197, "eval_samples_per_second": 0.509, "eval_steps_per_second": 0.016, "step": 9304 }, { "epoch": 2.04, "learning_rate": 5.917884780739468e-05, "loss": 0.4484, "step": 9500 }, { "epoch": 2.15, "learning_rate": 5.7029234737747204e-05, "loss": 0.4335, "step": 10000 }, { "epoch": 2.26, "learning_rate": 5.4879621668099744e-05, "loss": 0.4341, "step": 10500 }, { "epoch": 2.36, "learning_rate": 5.2730008598452284e-05, "loss": 0.4347, "step": 11000 }, { "epoch": 2.47, "learning_rate": 5.0584694754944115e-05, "loss": 0.4268, "step": 11500 }, { "epoch": 2.58, "learning_rate": 4.843508168529665e-05, "loss": 0.4283, "step": 12000 }, { "epoch": 2.69, "learning_rate": 4.628976784178848e-05, "loss": 0.4353, "step": 12500 }, { "epoch": 2.79, "learning_rate": 4.414015477214102e-05, "loss": 0.4291, "step": 13000 }, { "epoch": 2.9, "learning_rate": 4.1990541702493554e-05, "loss": 0.4333, "step": 13500 }, { "epoch": 3.0, "eval_gen_len": 22.9686, "eval_loss": 0.44937288761138916, "eval_rouge1": 73.1917, "eval_rouge2": 54.579, "eval_rougeL": 67.9593, "eval_rougeLsum": 67.9681, "eval_runtime": 375.7125, "eval_samples_per_second": 0.508, "eval_steps_per_second": 0.016, "step": 13956 }, { "epoch": 3.01, "learning_rate": 3.9840928632846094e-05, "loss": 0.429, "step": 14000 }, { "epoch": 3.12, "learning_rate": 3.769561478933792e-05, "loss": 0.4158, "step": 14500 }, { "epoch": 3.22, "learning_rate": 3.554600171969046e-05, "loss": 0.4113, "step": 15000 }, { "epoch": 3.33, "learning_rate": 3.339638865004299e-05, "loss": 0.4131, "step": 15500 }, { "epoch": 3.44, "learning_rate": 3.124677558039553e-05, "loss": 0.4176, "step": 16000 }, { "epoch": 3.55, "learning_rate": 2.9097162510748066e-05, "loss": 0.4179, "step": 16500 }, { "epoch": 3.65, "learning_rate": 2.6947549441100606e-05, "loss": 0.4136, "step": 17000 }, { "epoch": 3.76, "learning_rate": 2.479793637145314e-05, "loss": 0.4158, "step": 17500 }, { "epoch": 3.87, "learning_rate": 2.2648323301805677e-05, "loss": 0.4146, "step": 18000 }, { "epoch": 3.98, "learning_rate": 2.0498710232158214e-05, "loss": 0.4142, "step": 18500 }, { "epoch": 4.0, "eval_gen_len": 22.9162, "eval_loss": 0.4507947266101837, "eval_rouge1": 72.7951, "eval_rouge2": 54.1782, "eval_rougeL": 67.7093, "eval_rougeLsum": 67.7632, "eval_runtime": 376.0977, "eval_samples_per_second": 0.508, "eval_steps_per_second": 0.016, "step": 18608 }, { "epoch": 4.08, "learning_rate": 1.8349097162510747e-05, "loss": 0.4057, "step": 19000 }, { "epoch": 4.19, "learning_rate": 1.6199484092863284e-05, "loss": 0.4059, "step": 19500 }, { "epoch": 4.3, "learning_rate": 1.4054170249355117e-05, "loss": 0.4055, "step": 20000 }, { "epoch": 4.41, "learning_rate": 1.1904557179707654e-05, "loss": 0.4022, "step": 20500 }, { "epoch": 4.51, "learning_rate": 9.754944110060189e-06, "loss": 0.4105, "step": 21000 }, { "epoch": 4.62, "learning_rate": 7.605331040412726e-06, "loss": 0.405, "step": 21500 }, { "epoch": 4.73, "learning_rate": 5.455717970765263e-06, "loss": 0.4007, "step": 22000 }, { "epoch": 4.84, "learning_rate": 3.3061049011177993e-06, "loss": 0.4093, "step": 22500 }, { "epoch": 4.94, "learning_rate": 1.1564918314703356e-06, "loss": 0.4011, "step": 23000 } ], "max_steps": 23260, "num_train_epochs": 5, "total_flos": 7.011060475428864e+16, "trial_name": null, "trial_params": null }