| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 7.0, | |
| "eval_steps": 500, | |
| "global_step": 2464, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_gen_len": 109.027, | |
| "eval_loss": 1.2415885925292969, | |
| "eval_rouge1": 0.5269433666229263, | |
| "eval_rouge2": 0.28037183395108156, | |
| "eval_rougeL": 0.35995139966897394, | |
| "eval_rougeLsum": 0.37649885084214285, | |
| "eval_runtime": 723.8312, | |
| "eval_samples_per_second": 1.942, | |
| "eval_steps_per_second": 0.122, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.4204545454545454, | |
| "grad_norm": 3.200246810913086, | |
| "learning_rate": 4.759018352770229e-06, | |
| "loss": 1.3036, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_gen_len": 112.4431, | |
| "eval_loss": 1.177475094795227, | |
| "eval_rouge1": 0.5430391605521028, | |
| "eval_rouge2": 0.2972281166428693, | |
| "eval_rougeL": 0.37256436707598495, | |
| "eval_rougeLsum": 0.3884270291699813, | |
| "eval_runtime": 682.3343, | |
| "eval_samples_per_second": 2.061, | |
| "eval_steps_per_second": 0.129, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 2.840909090909091, | |
| "grad_norm": 3.206270217895508, | |
| "learning_rate": 4.075611866937373e-06, | |
| "loss": 1.1054, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_gen_len": 108.6607, | |
| "eval_loss": 1.1586213111877441, | |
| "eval_rouge1": 0.5453459493029312, | |
| "eval_rouge2": 0.30012808902733723, | |
| "eval_rougeL": 0.3766083565770627, | |
| "eval_rougeLsum": 0.39252447019524606, | |
| "eval_runtime": 840.2954, | |
| "eval_samples_per_second": 1.673, | |
| "eval_steps_per_second": 0.105, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_gen_len": 106.1166, | |
| "eval_loss": 1.1364487409591675, | |
| "eval_rouge1": 0.5476054862493738, | |
| "eval_rouge2": 0.3011862619458042, | |
| "eval_rougeL": 0.37811611035117715, | |
| "eval_rougeLsum": 0.3946684605387395, | |
| "eval_runtime": 576.235, | |
| "eval_samples_per_second": 2.44, | |
| "eval_steps_per_second": 0.153, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 4.261363636363637, | |
| "grad_norm": 3.138392925262451, | |
| "learning_rate": 3.0836134096397642e-06, | |
| "loss": 1.0104, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_gen_len": 105.2859, | |
| "eval_loss": 1.1392455101013184, | |
| "eval_rouge1": 0.5465154278801767, | |
| "eval_rouge2": 0.3007552050130359, | |
| "eval_rougeL": 0.3773198844643692, | |
| "eval_rougeLsum": 0.3932806868540929, | |
| "eval_runtime": 1033.7454, | |
| "eval_samples_per_second": 1.36, | |
| "eval_steps_per_second": 0.085, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 5.681818181818182, | |
| "grad_norm": 3.235567092895508, | |
| "learning_rate": 1.977311159362942e-06, | |
| "loss": 0.9495, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_gen_len": 104.6494, | |
| "eval_loss": 1.125599980354309, | |
| "eval_rouge1": 0.5492129139731206, | |
| "eval_rouge2": 0.30319883423234095, | |
| "eval_rougeL": 0.3789239095282904, | |
| "eval_rougeLsum": 0.39465416548270177, | |
| "eval_runtime": 562.8701, | |
| "eval_samples_per_second": 2.498, | |
| "eval_steps_per_second": 0.156, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_gen_len": 103.5974, | |
| "eval_loss": 1.1215816736221313, | |
| "eval_rouge1": 0.5489838414026048, | |
| "eval_rouge2": 0.30370662819639743, | |
| "eval_rougeL": 0.3803871339480277, | |
| "eval_rougeLsum": 0.3957810407664568, | |
| "eval_runtime": 623.0852, | |
| "eval_samples_per_second": 2.257, | |
| "eval_steps_per_second": 0.141, | |
| "step": 2464 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 3520, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.445404124486042e+16, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |