{ "best_global_step": 7000, "best_metric": 0.4164, "best_model_checkpoint": "models/facebook/bart-large-cnn_30_5e-05/checkpoint-7000", "epoch": 28.225806451612904, "eval_steps": 500, "global_step": 7000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.0161290322580645, "grad_norm": 2.271665573120117, "learning_rate": 4.911573288058857e-05, "loss": 0.6372, "step": 500 }, { "epoch": 2.0161290322580645, "eval_gen_len": 86.6301, "eval_loss": 0.6856603622436523, "eval_rouge1": 0.3918, "eval_rouge2": 0.3151, "eval_rougeL": 0.3653, "eval_rougeLsum": 0.3829, "eval_runtime": 470.8787, "eval_samples_per_second": 2.4, "eval_steps_per_second": 0.076, "step": 500 }, { "epoch": 4.032258064516129, "grad_norm": 1.920154333114624, "learning_rate": 4.557866440294284e-05, "loss": 0.3093, "step": 1000 }, { "epoch": 4.032258064516129, "eval_gen_len": 86.4681, "eval_loss": 0.8830690383911133, "eval_rouge1": 0.4027, "eval_rouge2": 0.328, "eval_rougeL": 0.3763, "eval_rougeLsum": 0.3948, "eval_runtime": 463.6435, "eval_samples_per_second": 2.437, "eval_steps_per_second": 0.078, "step": 1000 }, { "epoch": 6.048387096774194, "grad_norm": 2.545297622680664, "learning_rate": 4.204867006225241e-05, "loss": 0.1443, "step": 1500 }, { "epoch": 6.048387096774194, "eval_gen_len": 85.0513, "eval_loss": 1.0781320333480835, "eval_rouge1": 0.4129, "eval_rouge2": 0.3395, "eval_rougeL": 0.3857, "eval_rougeLsum": 0.4057, "eval_runtime": 467.3383, "eval_samples_per_second": 2.418, "eval_steps_per_second": 0.077, "step": 1500 }, { "epoch": 8.064516129032258, "grad_norm": 1.484631061553955, "learning_rate": 3.8511601584606685e-05, "loss": 0.076, "step": 2000 }, { "epoch": 8.064516129032258, "eval_gen_len": 77.6938, "eval_loss": 1.1164271831512451, "eval_rouge1": 0.4342, "eval_rouge2": 0.3594, "eval_rougeL": 0.4087, "eval_rougeLsum": 0.4259, "eval_runtime": 433.3086, "eval_samples_per_second": 2.608, "eval_steps_per_second": 0.083, "step": 2000 }, { "epoch": 10.080645161290322, "grad_norm": 0.9293203949928284, "learning_rate": 3.4974533106960955e-05, "loss": 0.046, "step": 2500 }, { "epoch": 10.080645161290322, "eval_gen_len": 79.4717, "eval_loss": 1.4387677907943726, "eval_rouge1": 0.4304, "eval_rouge2": 0.3557, "eval_rougeL": 0.406, "eval_rougeLsum": 0.423, "eval_runtime": 431.9944, "eval_samples_per_second": 2.616, "eval_steps_per_second": 0.083, "step": 2500 }, { "epoch": 12.096774193548388, "grad_norm": 1.3663330078125, "learning_rate": 3.143746462931523e-05, "loss": 0.0322, "step": 3000 }, { "epoch": 12.096774193548388, "eval_gen_len": 78.7823, "eval_loss": 1.2941193580627441, "eval_rouge1": 0.4297, "eval_rouge2": 0.3536, "eval_rougeL": 0.4034, "eval_rougeLsum": 0.4216, "eval_runtime": 429.3366, "eval_samples_per_second": 2.632, "eval_steps_per_second": 0.084, "step": 3000 }, { "epoch": 14.112903225806452, "grad_norm": 3.2132232189178467, "learning_rate": 2.79003961516695e-05, "loss": 0.0231, "step": 3500 }, { "epoch": 14.112903225806452, "eval_gen_len": 80.5938, "eval_loss": 1.4391534328460693, "eval_rouge1": 0.4162, "eval_rouge2": 0.342, "eval_rougeL": 0.3912, "eval_rougeLsum": 0.4083, "eval_runtime": 429.1979, "eval_samples_per_second": 2.633, "eval_steps_per_second": 0.084, "step": 3500 }, { "epoch": 16.129032258064516, "grad_norm": 0.9062560796737671, "learning_rate": 2.436332767402377e-05, "loss": 0.0173, "step": 4000 }, { "epoch": 16.129032258064516, "eval_gen_len": 81.5655, "eval_loss": 1.5033551454544067, "eval_rouge1": 0.4273, "eval_rouge2": 0.3537, "eval_rougeL": 0.4024, "eval_rougeLsum": 0.4199, "eval_runtime": 427.5276, "eval_samples_per_second": 2.643, "eval_steps_per_second": 0.084, "step": 4000 }, { "epoch": 18.14516129032258, "grad_norm": Infinity, "learning_rate": 2.082625919637804e-05, "loss": 0.0126, "step": 4500 }, { "epoch": 18.14516129032258, "eval_gen_len": 81.192, "eval_loss": 1.4981940984725952, "eval_rouge1": 0.4252, "eval_rouge2": 0.3528, "eval_rougeL": 0.4013, "eval_rougeLsum": 0.4175, "eval_runtime": 437.7384, "eval_samples_per_second": 2.581, "eval_steps_per_second": 0.082, "step": 4500 }, { "epoch": 20.161290322580644, "grad_norm": 0.3979549705982208, "learning_rate": 1.729626485568761e-05, "loss": 0.009, "step": 5000 }, { "epoch": 20.161290322580644, "eval_gen_len": 81.9372, "eval_loss": 1.5800154209136963, "eval_rouge1": 0.4257, "eval_rouge2": 0.3534, "eval_rougeL": 0.401, "eval_rougeLsum": 0.418, "eval_runtime": 434.4426, "eval_samples_per_second": 2.601, "eval_steps_per_second": 0.083, "step": 5000 }, { "epoch": 22.177419354838708, "grad_norm": 1.6955211162567139, "learning_rate": 1.375919637804188e-05, "loss": 0.0063, "step": 5500 }, { "epoch": 22.177419354838708, "eval_gen_len": 79.0938, "eval_loss": 1.576202154159546, "eval_rouge1": 0.4323, "eval_rouge2": 0.3582, "eval_rougeL": 0.4074, "eval_rougeLsum": 0.4245, "eval_runtime": 428.2115, "eval_samples_per_second": 2.639, "eval_steps_per_second": 0.084, "step": 5500 }, { "epoch": 24.193548387096776, "grad_norm": 0.2606065273284912, "learning_rate": 1.0222127900396152e-05, "loss": 0.0046, "step": 6000 }, { "epoch": 24.193548387096776, "eval_gen_len": 80.6867, "eval_loss": 1.5904020071029663, "eval_rouge1": 0.4385, "eval_rouge2": 0.3675, "eval_rougeL": 0.4145, "eval_rougeLsum": 0.4313, "eval_runtime": 427.4723, "eval_samples_per_second": 2.643, "eval_steps_per_second": 0.084, "step": 6000 }, { "epoch": 26.20967741935484, "grad_norm": 0.12859472632408142, "learning_rate": 6.685059422750426e-06, "loss": 0.0033, "step": 6500 }, { "epoch": 26.20967741935484, "eval_gen_len": 79.9637, "eval_loss": 1.5917092561721802, "eval_rouge1": 0.4393, "eval_rouge2": 0.3686, "eval_rougeL": 0.415, "eval_rougeLsum": 0.4312, "eval_runtime": 433.2177, "eval_samples_per_second": 2.608, "eval_steps_per_second": 0.083, "step": 6500 }, { "epoch": 28.225806451612904, "grad_norm": 0.3688865005970001, "learning_rate": 3.1479909451046973e-06, "loss": 0.002, "step": 7000 }, { "epoch": 28.225806451612904, "eval_gen_len": 80.7027, "eval_loss": 1.6072598695755005, "eval_rouge1": 0.4404, "eval_rouge2": 0.3687, "eval_rougeL": 0.4164, "eval_rougeLsum": 0.4327, "eval_runtime": 441.5366, "eval_samples_per_second": 2.559, "eval_steps_per_second": 0.082, "step": 7000 } ], "logging_steps": 500, "max_steps": 7440, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.704962073115853e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }