| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 2500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 2.0887, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_f1": 0.9023, | |
| "eval_gen_len": 31.24818181818182, | |
| "eval_loss": 1.7361507415771484, | |
| "eval_precision": 0.9035, | |
| "eval_recall": 0.9015, | |
| "eval_rouge1": 0.4326, | |
| "eval_rouge2": 0.1871, | |
| "eval_rougeL": 0.3375, | |
| "eval_rougeLsum": 0.3373, | |
| "eval_runtime": 386.0456, | |
| "eval_samples_per_second": 2.849, | |
| "eval_steps_per_second": 0.357, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 1.2e-05, | |
| "loss": 1.8362, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_f1": 0.905, | |
| "eval_gen_len": 30.303636363636365, | |
| "eval_loss": 1.6843606233596802, | |
| "eval_precision": 0.9071, | |
| "eval_recall": 0.9032, | |
| "eval_rouge1": 0.4466, | |
| "eval_rouge2": 0.1942, | |
| "eval_rougeL": 0.3511, | |
| "eval_rougeLsum": 0.3507, | |
| "eval_runtime": 351.8932, | |
| "eval_samples_per_second": 3.126, | |
| "eval_steps_per_second": 0.392, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 1.7784, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_f1": 0.9056, | |
| "eval_gen_len": 30.79909090909091, | |
| "eval_loss": 1.6666187047958374, | |
| "eval_precision": 0.907, | |
| "eval_recall": 0.9045, | |
| "eval_rouge1": 0.451, | |
| "eval_rouge2": 0.1992, | |
| "eval_rougeL": 0.3554, | |
| "eval_rougeLsum": 0.3551, | |
| "eval_runtime": 352.5825, | |
| "eval_samples_per_second": 3.12, | |
| "eval_steps_per_second": 0.391, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 1.7543, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 0.0, | |
| "loss": 1.7261, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_f1": 0.9064, | |
| "eval_gen_len": 30.85090909090909, | |
| "eval_loss": 1.6605653762817383, | |
| "eval_precision": 0.9078, | |
| "eval_recall": 0.9053, | |
| "eval_rouge1": 0.4557, | |
| "eval_rouge2": 0.2019, | |
| "eval_rougeL": 0.3603, | |
| "eval_rougeLsum": 0.3597, | |
| "eval_runtime": 359.5712, | |
| "eval_samples_per_second": 3.059, | |
| "eval_steps_per_second": 0.384, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 2500, | |
| "total_flos": 1.9957763220524237e+17, | |
| "train_loss": 1.8367231689453125, | |
| "train_runtime": 11495.8338, | |
| "train_samples_per_second": 6.959, | |
| "train_steps_per_second": 0.217 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 2500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "total_flos": 1.9957763220524237e+17, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |