{ "best_global_step": 200, "best_metric": 4.316298961639404, "best_model_checkpoint": "./pegasus-finetuned/checkpoint-200", "epoch": 4.0, "eval_steps": 20, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "grad_norm": NaN, "learning_rate": 1.8e-06, "loss": 5.5812, "step": 10 }, { "epoch": 0.4, "grad_norm": 12.937685012817383, "learning_rate": 4.5e-06, "loss": 5.2561, "step": 20 }, { "epoch": 0.4, "eval_avg_label_length": 217.48, "eval_avg_pred_length": 245.78, "eval_loss": 4.909902095794678, "eval_rouge1": 0.44797432353828237, "eval_rouge2": 0.1515012753387767, "eval_rougeL": 0.3128394670569607, "eval_runtime": 20.275, "eval_samples_per_second": 2.466, "eval_steps_per_second": 2.466, "step": 20 }, { "epoch": 0.6, "grad_norm": 8.617694854736328, "learning_rate": 7.2e-06, "loss": 5.6105, "step": 30 }, { "epoch": 0.8, "grad_norm": 5.31163215637207, "learning_rate": 1.02e-05, "loss": 5.1614, "step": 40 }, { "epoch": 0.8, "eval_avg_label_length": 217.48, "eval_avg_pred_length": 257.86, "eval_loss": 4.743632793426514, "eval_rouge1": 0.4397395335167489, "eval_rouge2": 0.15641621115576146, "eval_rougeL": 0.3133924602167529, "eval_runtime": 14.7438, "eval_samples_per_second": 3.391, "eval_steps_per_second": 3.391, "step": 40 }, { "epoch": 1.0, "grad_norm": 22.030437469482422, "learning_rate": 1.29e-05, "loss": 5.0716, "step": 50 }, { "epoch": 1.2, "grad_norm": 6.041182994842529, "learning_rate": 1.59e-05, "loss": 5.0289, "step": 60 }, { "epoch": 1.2, "eval_avg_label_length": 217.48, "eval_avg_pred_length": 266.6, "eval_loss": 4.613500595092773, "eval_rouge1": 0.43309371423798987, "eval_rouge2": 0.1573688721382018, "eval_rougeL": 0.3142031839040855, "eval_runtime": 13.9823, "eval_samples_per_second": 3.576, "eval_steps_per_second": 3.576, "step": 60 }, { "epoch": 1.4, "grad_norm": 6.781141757965088, "learning_rate": 1.8900000000000002e-05, "loss": 4.8995, "step": 70 }, { "epoch": 1.6, "grad_norm": 3.802191734313965, "learning_rate": 2.19e-05, "loss": 4.7833, "step": 80 }, { "epoch": 1.6, "eval_avg_label_length": 217.48, "eval_avg_pred_length": 294.64, "eval_loss": 4.50701904296875, "eval_rouge1": 0.40764168234081616, "eval_rouge2": 0.14614275843571056, "eval_rougeL": 0.29674742581027913, "eval_runtime": 13.8586, "eval_samples_per_second": 3.608, "eval_steps_per_second": 3.608, "step": 80 }, { "epoch": 1.8, "grad_norm": 3.317619562149048, "learning_rate": 2.49e-05, "loss": 4.7805, "step": 90 }, { "epoch": 2.0, "grad_norm": 3.339324474334717, "learning_rate": 2.79e-05, "loss": 4.9119, "step": 100 }, { "epoch": 2.0, "eval_avg_label_length": 217.48, "eval_avg_pred_length": 332.78, "eval_loss": 4.426856517791748, "eval_rouge1": 0.37944039950933656, "eval_rouge2": 0.13584556227702202, "eval_rougeL": 0.276265703905275, "eval_runtime": 13.9953, "eval_samples_per_second": 3.573, "eval_steps_per_second": 3.573, "step": 100 }, { "epoch": 2.2, "grad_norm": 4.322465896606445, "learning_rate": 2.91e-05, "loss": 4.652, "step": 110 }, { "epoch": 2.4, "grad_norm": 3.677753210067749, "learning_rate": 2.61e-05, "loss": 4.5762, "step": 120 }, { "epoch": 2.4, "eval_avg_label_length": 217.48, "eval_avg_pred_length": 338.36, "eval_loss": 4.378966331481934, "eval_rouge1": 0.3753125894277081, "eval_rouge2": 0.13362842725075502, "eval_rougeL": 0.27296944731719464, "eval_runtime": 14.2879, "eval_samples_per_second": 3.499, "eval_steps_per_second": 3.499, "step": 120 }, { "epoch": 2.6, "grad_norm": 3.48340106010437, "learning_rate": 2.3100000000000002e-05, "loss": 4.5606, "step": 130 }, { "epoch": 2.8, "grad_norm": 2.9511525630950928, "learning_rate": 2.01e-05, "loss": 4.5311, "step": 140 }, { "epoch": 2.8, "eval_avg_label_length": 217.48, "eval_avg_pred_length": 333.48, "eval_loss": 4.343095302581787, "eval_rouge1": 0.38086615014184255, "eval_rouge2": 0.1373720803961207, "eval_rougeL": 0.2777445759902725, "eval_runtime": 15.7276, "eval_samples_per_second": 3.179, "eval_steps_per_second": 3.179, "step": 140 }, { "epoch": 3.0, "grad_norm": 4.5145158767700195, "learning_rate": 1.71e-05, "loss": 4.6825, "step": 150 }, { "epoch": 3.2, "grad_norm": 3.4626283645629883, "learning_rate": 1.4099999999999999e-05, "loss": 4.4947, "step": 160 }, { "epoch": 3.2, "eval_avg_label_length": 217.48, "eval_avg_pred_length": 332.84, "eval_loss": 4.33493185043335, "eval_rouge1": 0.3806044223795976, "eval_rouge2": 0.1360227322055919, "eval_rougeL": 0.2777518121856722, "eval_runtime": 14.1172, "eval_samples_per_second": 3.542, "eval_steps_per_second": 3.542, "step": 160 }, { "epoch": 3.4, "grad_norm": 3.6152524948120117, "learning_rate": 1.11e-05, "loss": 4.5716, "step": 170 }, { "epoch": 3.6, "grad_norm": 3.842635154724121, "learning_rate": 8.1e-06, "loss": 4.5307, "step": 180 }, { "epoch": 3.6, "eval_avg_label_length": 217.48, "eval_avg_pred_length": 328.42, "eval_loss": 4.32252311706543, "eval_rouge1": 0.3834980545411381, "eval_rouge2": 0.13811567307518083, "eval_rougeL": 0.28112159619630067, "eval_runtime": 13.9254, "eval_samples_per_second": 3.591, "eval_steps_per_second": 3.591, "step": 180 }, { "epoch": 3.8, "grad_norm": 2.734753131866455, "learning_rate": 5.1e-06, "loss": 4.4624, "step": 190 }, { "epoch": 4.0, "grad_norm": 2.630204677581787, "learning_rate": 2.1000000000000002e-06, "loss": 4.5697, "step": 200 }, { "epoch": 4.0, "eval_avg_label_length": 217.48, "eval_avg_pred_length": 329.0, "eval_loss": 4.316298961639404, "eval_rouge1": 0.38243647130093067, "eval_rouge2": 0.137975069698913, "eval_rougeL": 0.28021824928998074, "eval_runtime": 13.762, "eval_samples_per_second": 3.633, "eval_steps_per_second": 3.633, "step": 200 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4623143062732800.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }