{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.345794392523365, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.93, "learning_rate": 0.0002719626168224299, "loss": 2.3351, "step": 100 }, { "epoch": 1.0, "eval_loss": 1.9539090394973755, "eval_rouge1": 0.3928238565889721, "eval_rouge2": 0.24457675199617848, "eval_rougeL": 0.34443833165269067, "eval_rougeLsum": 0.35377968284169653, "eval_runtime": 113.4757, "eval_samples_per_second": 2.503, "eval_steps_per_second": 0.626, "step": 107 }, { "epoch": 1.87, "learning_rate": 0.0002439252336448598, "loss": 1.8616, "step": 200 }, { "epoch": 2.0, "eval_loss": 1.8454691171646118, "eval_rouge1": 0.40135907614173977, "eval_rouge2": 0.2537731004498688, "eval_rougeL": 0.3517340305952086, "eval_rougeLsum": 0.3599553733909089, "eval_runtime": 90.5265, "eval_samples_per_second": 3.137, "eval_steps_per_second": 0.784, "step": 214 }, { "epoch": 2.8, "learning_rate": 0.0002158878504672897, "loss": 1.5474, "step": 300 }, { "epoch": 3.0, "eval_loss": 1.7911098003387451, "eval_rouge1": 0.40516907769034227, "eval_rouge2": 0.2571882790410479, "eval_rougeL": 0.3551430134650718, "eval_rougeLsum": 0.3650237424768551, "eval_runtime": 89.8747, "eval_samples_per_second": 3.16, "eval_steps_per_second": 0.79, "step": 321 }, { "epoch": 3.74, "learning_rate": 0.0001878504672897196, "loss": 1.3416, "step": 400 }, { "epoch": 4.0, "eval_loss": 1.7877225875854492, "eval_rouge1": 0.39727138204448276, "eval_rouge2": 0.2553111741867333, "eval_rougeL": 0.35344418510391534, "eval_rougeLsum": 0.3624411592522736, "eval_runtime": 91.2462, "eval_samples_per_second": 3.112, "eval_steps_per_second": 0.778, "step": 428 }, { "epoch": 4.67, "learning_rate": 0.0001598130841121495, "loss": 1.1667, "step": 500 }, { "epoch": 5.0, "eval_loss": 1.8065179586410522, "eval_rouge1": 0.40226039832787347, "eval_rouge2": 0.25889093132847524, "eval_rougeL": 0.3547361470543734, "eval_rougeLsum": 0.3642271784388902, "eval_runtime": 88.6588, "eval_samples_per_second": 3.203, "eval_steps_per_second": 0.801, "step": 535 }, { "epoch": 5.61, "learning_rate": 0.0001317757009345794, "loss": 1.0582, "step": 600 }, { "epoch": 6.0, "eval_loss": 1.8586112260818481, "eval_rouge1": 0.3957092352506892, "eval_rouge2": 0.2538965597420449, "eval_rougeL": 0.34916932912187554, "eval_rougeLsum": 0.3576929636444268, "eval_runtime": 95.986, "eval_samples_per_second": 2.959, "eval_steps_per_second": 0.74, "step": 642 }, { "epoch": 6.54, "learning_rate": 0.00010373831775700933, "loss": 0.9202, "step": 700 }, { "epoch": 7.0, "eval_loss": 1.8837732076644897, "eval_rouge1": 0.40251597070955947, "eval_rouge2": 0.2583209858085714, "eval_rougeL": 0.3540804774162999, "eval_rougeLsum": 0.36370971781604133, "eval_runtime": 94.4477, "eval_samples_per_second": 3.007, "eval_steps_per_second": 0.752, "step": 749 }, { "epoch": 7.48, "learning_rate": 7.570093457943924e-05, "loss": 0.8575, "step": 800 }, { "epoch": 8.0, "eval_loss": 1.9066342115402222, "eval_rouge1": 0.39449493230029453, "eval_rouge2": 0.2549659416128175, "eval_rougeL": 0.348120528239499, "eval_rougeLsum": 0.3568722790464366, "eval_runtime": 82.6343, "eval_samples_per_second": 3.437, "eval_steps_per_second": 0.859, "step": 856 }, { "epoch": 8.41, "learning_rate": 4.766355140186915e-05, "loss": 0.7691, "step": 900 }, { "epoch": 9.0, "eval_loss": 1.970426321029663, "eval_rouge1": 0.393735400589476, "eval_rouge2": 0.25521942254430097, "eval_rougeL": 0.34772527093463, "eval_rougeLsum": 0.356885126635194, "eval_runtime": 88.6133, "eval_samples_per_second": 3.205, "eval_steps_per_second": 0.801, "step": 963 }, { "epoch": 9.35, "learning_rate": 1.9626168224299062e-05, "loss": 0.7361, "step": 1000 } ], "max_steps": 1070, "num_train_epochs": 10, "total_flos": 370753309016064.0, "trial_name": null, "trial_params": null }