{ "best_metric": 1.2624495029449463, "epoch": 2.0, "global_step": 12460, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 9.899678972712681e-06, "loss": 1.5273, "step": 500 }, { "epoch": 0.16, "learning_rate": 9.799357945425362e-06, "loss": 1.3678, "step": 1000 }, { "epoch": 0.24, "learning_rate": 9.699036918138043e-06, "loss": 1.2869, "step": 1500 }, { "epoch": 0.32, "learning_rate": 9.598715890850723e-06, "loss": 1.2951, "step": 2000 }, { "epoch": 0.4, "learning_rate": 9.498394863563404e-06, "loss": 1.2578, "step": 2500 }, { "epoch": 0.48, "learning_rate": 9.398073836276083e-06, "loss": 1.2532, "step": 3000 }, { "epoch": 0.56, "learning_rate": 9.297752808988765e-06, "loss": 1.2441, "step": 3500 }, { "epoch": 0.64, "learning_rate": 9.197431781701446e-06, "loss": 1.212, "step": 4000 }, { "epoch": 0.72, "learning_rate": 9.097110754414125e-06, "loss": 1.2009, "step": 4500 }, { "epoch": 0.8, "learning_rate": 8.996789727126807e-06, "loss": 1.1731, "step": 5000 }, { "epoch": 0.88, "learning_rate": 8.896468699839486e-06, "loss": 1.2008, "step": 5500 }, { "epoch": 0.96, "learning_rate": 8.796147672552167e-06, "loss": 1.1732, "step": 6000 }, { "epoch": 2.0, "eval_gen_len": 18.808, "eval_loss": 1.2909361124038696, "eval_rouge1": 38.4799, "eval_rouge2": 14.1438, "eval_rougeL": 32.0853, "eval_rougeLsum": 34.22, "eval_runtime": 725.7342, "eval_samples_per_second": 2.067, "eval_steps_per_second": 1.033, "step": 6230 }, { "epoch": 1.04, "learning_rate": 8.69582664526485e-06, "loss": 1.1773, "step": 6500 }, { "epoch": 1.12, "learning_rate": 8.595505617977528e-06, "loss": 1.1377, "step": 7000 }, { "epoch": 1.2, "learning_rate": 8.49518459069021e-06, "loss": 1.1186, "step": 7500 }, { "epoch": 1.28, "learning_rate": 8.39486356340289e-06, "loss": 1.106, "step": 8000 }, { "epoch": 1.36, "learning_rate": 8.29454253611557e-06, "loss": 1.1346, "step": 8500 }, { "epoch": 1.44, "learning_rate": 8.19422150882825e-06, "loss": 1.1376, "step": 9000 }, { "epoch": 1.52, "learning_rate": 8.093900481540931e-06, "loss": 1.1118, "step": 9500 }, { "epoch": 1.61, "learning_rate": 7.993579454253612e-06, "loss": 1.1356, "step": 10000 }, { "epoch": 1.69, "learning_rate": 7.893258426966293e-06, "loss": 1.0916, "step": 10500 }, { "epoch": 1.77, "learning_rate": 7.792937399678973e-06, "loss": 1.125, "step": 11000 }, { "epoch": 1.85, "learning_rate": 7.692616372391654e-06, "loss": 1.1061, "step": 11500 }, { "epoch": 1.93, "learning_rate": 7.592295345104335e-06, "loss": 1.1101, "step": 12000 }, { "epoch": 2.0, "eval_gen_len": 18.808, "eval_loss": 1.2624495029449463, "eval_rouge1": 38.462, "eval_rouge2": 14.3899, "eval_rougeL": 32.0676, "eval_rougeLsum": 34.0319, "eval_runtime": 723.1779, "eval_samples_per_second": 2.074, "eval_steps_per_second": 1.037, "step": 12460 } ], "max_steps": 49840, "num_train_epochs": 8, "total_flos": 2.96391223296e+16, "trial_name": null, "trial_params": null }