| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 345, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_gen_len": 95.44927536231884, | |
| "eval_loss": 0.09570997208356857, | |
| "eval_rouge1": 72.6601, | |
| "eval_rouge2": 71.6824, | |
| "eval_rougeL": 72.6858, | |
| "eval_rougeLsum": 72.4668, | |
| "eval_runtime": 12.0064, | |
| "eval_samples_per_second": 5.747, | |
| "eval_steps_per_second": 1.499, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_gen_len": 92.01449275362319, | |
| "eval_loss": 0.13447459042072296, | |
| "eval_rouge1": 75.0063, | |
| "eval_rouge2": 74.0782, | |
| "eval_rougeL": 75.0597, | |
| "eval_rougeLsum": 74.8943, | |
| "eval_runtime": 11.945, | |
| "eval_samples_per_second": 5.776, | |
| "eval_steps_per_second": 1.507, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_gen_len": 85.46376811594203, | |
| "eval_loss": 0.14119356870651245, | |
| "eval_rouge1": 75.3012, | |
| "eval_rouge2": 74.5492, | |
| "eval_rougeL": 75.4246, | |
| "eval_rougeLsum": 75.324, | |
| "eval_runtime": 10.9494, | |
| "eval_samples_per_second": 6.302, | |
| "eval_steps_per_second": 1.644, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_gen_len": 85.04347826086956, | |
| "eval_loss": 0.10889122635126114, | |
| "eval_rouge1": 74.8426, | |
| "eval_rouge2": 74.0317, | |
| "eval_rougeL": 74.8939, | |
| "eval_rougeLsum": 74.8128, | |
| "eval_runtime": 11.2109, | |
| "eval_samples_per_second": 6.155, | |
| "eval_steps_per_second": 1.606, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_gen_len": 85.31884057971014, | |
| "eval_loss": 0.12416736036539078, | |
| "eval_rouge1": 75.3806, | |
| "eval_rouge2": 74.6735, | |
| "eval_rougeL": 75.5866, | |
| "eval_rougeLsum": 75.5446, | |
| "eval_runtime": 11.151, | |
| "eval_samples_per_second": 6.188, | |
| "eval_steps_per_second": 1.614, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "step": 345, | |
| "total_flos": 2990604350914560.0, | |
| "train_loss": 0.036720043679942256, | |
| "train_runtime": 132.663, | |
| "train_samples_per_second": 10.402, | |
| "train_steps_per_second": 2.601 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 345, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "total_flos": 2990604350914560.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |