| { | |
| "best_metric": 0.6162260174751282, | |
| "best_model_checkpoint": "flan_t5_summarization/checkpoint-2720", | |
| "epoch": 10.0, | |
| "global_step": 2720, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_gen_len": 19.0, | |
| "eval_loss": 1.120025634765625, | |
| "eval_rouge1": 9.2565, | |
| "eval_rouge2": 1.2805, | |
| "eval_rougeL": 9.2358, | |
| "eval_rougeLsum": 9.284, | |
| "eval_runtime": 4.0606, | |
| "eval_samples_per_second": 16.5, | |
| "eval_steps_per_second": 1.478, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 4.0808823529411765e-05, | |
| "loss": 1.5343, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_gen_len": 19.0, | |
| "eval_loss": 0.8922988772392273, | |
| "eval_rouge1": 10.9045, | |
| "eval_rouge2": 2.9468, | |
| "eval_rougeL": 10.9112, | |
| "eval_rougeLsum": 10.8827, | |
| "eval_runtime": 4.0719, | |
| "eval_samples_per_second": 16.454, | |
| "eval_steps_per_second": 1.474, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_gen_len": 19.0, | |
| "eval_loss": 0.7884227633476257, | |
| "eval_rouge1": 13.637, | |
| "eval_rouge2": 4.8447, | |
| "eval_rougeL": 13.3594, | |
| "eval_rougeLsum": 13.3459, | |
| "eval_runtime": 4.0204, | |
| "eval_samples_per_second": 16.665, | |
| "eval_steps_per_second": 1.492, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "learning_rate": 3.161764705882353e-05, | |
| "loss": 0.9949, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_gen_len": 18.91044776119403, | |
| "eval_loss": 0.7256659269332886, | |
| "eval_rouge1": 15.2005, | |
| "eval_rouge2": 6.3919, | |
| "eval_rougeL": 14.781, | |
| "eval_rougeLsum": 14.7204, | |
| "eval_runtime": 4.0736, | |
| "eval_samples_per_second": 16.447, | |
| "eval_steps_per_second": 1.473, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_gen_len": 18.73134328358209, | |
| "eval_loss": 0.6852018237113953, | |
| "eval_rouge1": 15.1627, | |
| "eval_rouge2": 6.3116, | |
| "eval_rougeL": 14.7187, | |
| "eval_rougeLsum": 14.6983, | |
| "eval_runtime": 4.0455, | |
| "eval_samples_per_second": 16.561, | |
| "eval_steps_per_second": 1.483, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "learning_rate": 2.2426470588235296e-05, | |
| "loss": 0.8504, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_gen_len": 18.73134328358209, | |
| "eval_loss": 0.6580936312675476, | |
| "eval_rouge1": 16.0795, | |
| "eval_rouge2": 6.9284, | |
| "eval_rougeL": 15.3516, | |
| "eval_rougeLsum": 15.3571, | |
| "eval_runtime": 4.0423, | |
| "eval_samples_per_second": 16.575, | |
| "eval_steps_per_second": 1.484, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_gen_len": 18.91044776119403, | |
| "eval_loss": 0.6391794681549072, | |
| "eval_rouge1": 16.0518, | |
| "eval_rouge2": 6.9377, | |
| "eval_rougeL": 15.3914, | |
| "eval_rougeLsum": 15.3748, | |
| "eval_runtime": 4.0453, | |
| "eval_samples_per_second": 16.562, | |
| "eval_steps_per_second": 1.483, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 7.35, | |
| "learning_rate": 1.323529411764706e-05, | |
| "loss": 0.7841, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_gen_len": 18.82089552238806, | |
| "eval_loss": 0.6258341073989868, | |
| "eval_rouge1": 16.1307, | |
| "eval_rouge2": 7.6286, | |
| "eval_rougeL": 15.7398, | |
| "eval_rougeLsum": 15.7627, | |
| "eval_runtime": 4.0536, | |
| "eval_samples_per_second": 16.529, | |
| "eval_steps_per_second": 1.48, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_gen_len": 18.73134328358209, | |
| "eval_loss": 0.6200478672981262, | |
| "eval_rouge1": 15.9488, | |
| "eval_rouge2": 7.4447, | |
| "eval_rougeL": 15.5654, | |
| "eval_rougeLsum": 15.583, | |
| "eval_runtime": 4.042, | |
| "eval_samples_per_second": 16.576, | |
| "eval_steps_per_second": 1.484, | |
| "step": 2448 | |
| }, | |
| { | |
| "epoch": 9.19, | |
| "learning_rate": 4.044117647058824e-06, | |
| "loss": 0.7599, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_gen_len": 18.73134328358209, | |
| "eval_loss": 0.6162260174751282, | |
| "eval_rouge1": 15.9418, | |
| "eval_rouge2": 7.4447, | |
| "eval_rougeL": 15.5655, | |
| "eval_rougeLsum": 15.5835, | |
| "eval_runtime": 4.0403, | |
| "eval_samples_per_second": 16.583, | |
| "eval_steps_per_second": 1.485, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 2720, | |
| "total_flos": 1177555285370880.0, | |
| "train_loss": 0.9648406533633962, | |
| "train_runtime": 1843.1414, | |
| "train_samples_per_second": 17.693, | |
| "train_steps_per_second": 1.476 | |
| } | |
| ], | |
| "max_steps": 2720, | |
| "num_train_epochs": 10, | |
| "total_flos": 1177555285370880.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |