| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 7.175986495041148, | |
| "global_step": 17000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 4.4431, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_loss": 3.1658337116241455, | |
| "eval_rouge-1": 0.1104, | |
| "eval_rouge-2": 0.0147, | |
| "eval_rouge-l": 0.1071, | |
| "eval_runtime": 938.5942, | |
| "eval_samples_per_second": 4.447, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.8873366381252815e-05, | |
| "loss": 3.0017, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_loss": 2.450249195098877, | |
| "eval_rouge-1": 0.2256, | |
| "eval_rouge-2": 0.0714, | |
| "eval_rouge-l": 0.2134, | |
| "eval_runtime": 828.0492, | |
| "eval_samples_per_second": 5.041, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 4.662009914375845e-05, | |
| "loss": 2.258, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "eval_loss": 1.915558099746704, | |
| "eval_rouge-1": 0.3442, | |
| "eval_rouge-2": 0.1597, | |
| "eval_rouge-l": 0.3284, | |
| "eval_runtime": 854.823, | |
| "eval_samples_per_second": 4.883, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 4.4366831906264086e-05, | |
| "loss": 1.8476, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "eval_loss": 1.7214736938476562, | |
| "eval_rouge-1": 0.3784, | |
| "eval_rouge-2": 0.1998, | |
| "eval_rouge-l": 0.362, | |
| "eval_runtime": 711.98, | |
| "eval_samples_per_second": 5.863, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 4.211356466876972e-05, | |
| "loss": 1.6932, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "eval_loss": 1.6499994993209839, | |
| "eval_rouge-1": 0.3992, | |
| "eval_rouge-2": 0.2227, | |
| "eval_rouge-l": 0.3822, | |
| "eval_runtime": 721.767, | |
| "eval_samples_per_second": 5.783, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 3.986029743127535e-05, | |
| "loss": 1.5271, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "eval_loss": 1.5838263034820557, | |
| "eval_rouge-1": 0.3999, | |
| "eval_rouge-2": 0.2255, | |
| "eval_rouge-l": 0.3825, | |
| "eval_runtime": 791.7162, | |
| "eval_samples_per_second": 5.272, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 3.760703019378098e-05, | |
| "loss": 1.4984, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "eval_loss": 1.5331367254257202, | |
| "eval_rouge-1": 0.4063, | |
| "eval_rouge-2": 0.2319, | |
| "eval_rouge-l": 0.39, | |
| "eval_runtime": 787.263, | |
| "eval_samples_per_second": 5.302, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 3.535376295628662e-05, | |
| "loss": 1.3435, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "eval_loss": 1.5344029664993286, | |
| "eval_rouge-1": 0.419, | |
| "eval_rouge-2": 0.2391, | |
| "eval_rouge-l": 0.4009, | |
| "eval_runtime": 801.0657, | |
| "eval_samples_per_second": 5.211, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 3.310049571879225e-05, | |
| "loss": 1.275, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "eval_loss": 1.5167639255523682, | |
| "eval_rouge-1": 0.4172, | |
| "eval_rouge-2": 0.2399, | |
| "eval_rouge-l": 0.398, | |
| "eval_runtime": 714.2604, | |
| "eval_samples_per_second": 5.844, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "learning_rate": 3.0847228481297885e-05, | |
| "loss": 1.2267, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "eval_loss": 1.5133156776428223, | |
| "eval_rouge-1": 0.4215, | |
| "eval_rouge-2": 0.2435, | |
| "eval_rouge-l": 0.402, | |
| "eval_runtime": 712.9933, | |
| "eval_samples_per_second": 5.854, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 2.859396124380352e-05, | |
| "loss": 1.1519, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "eval_loss": 1.4870655536651611, | |
| "eval_rouge-1": 0.4202, | |
| "eval_rouge-2": 0.244, | |
| "eval_rouge-l": 0.4013, | |
| "eval_runtime": 437.1167, | |
| "eval_samples_per_second": 9.549, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "learning_rate": 2.6340694006309152e-05, | |
| "loss": 1.1841, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "eval_loss": 1.5044740438461304, | |
| "eval_rouge-1": 0.4269, | |
| "eval_rouge-2": 0.2471, | |
| "eval_rouge-l": 0.4076, | |
| "eval_runtime": 446.0452, | |
| "eval_samples_per_second": 9.358, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 5.49, | |
| "learning_rate": 2.4087426768814784e-05, | |
| "loss": 1.0586, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 5.49, | |
| "eval_loss": 1.4930425882339478, | |
| "eval_rouge-1": 0.4285, | |
| "eval_rouge-2": 0.2518, | |
| "eval_rouge-l": 0.4109, | |
| "eval_runtime": 430.9672, | |
| "eval_samples_per_second": 9.685, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 5.91, | |
| "learning_rate": 2.1834159531320416e-05, | |
| "loss": 1.0657, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 5.91, | |
| "eval_loss": 1.488339900970459, | |
| "eval_rouge-1": 0.4273, | |
| "eval_rouge-2": 0.2514, | |
| "eval_rouge-l": 0.4084, | |
| "eval_runtime": 438.2586, | |
| "eval_samples_per_second": 9.524, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "learning_rate": 1.958089229382605e-05, | |
| "loss": 0.9725, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "eval_loss": 1.5135878324508667, | |
| "eval_rouge-1": 0.4312, | |
| "eval_rouge-2": 0.2537, | |
| "eval_rouge-l": 0.4116, | |
| "eval_runtime": 436.2911, | |
| "eval_samples_per_second": 9.567, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 6.75, | |
| "learning_rate": 1.732762505633168e-05, | |
| "loss": 0.9176, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 6.75, | |
| "eval_loss": 1.5166784524917603, | |
| "eval_rouge-1": 0.433, | |
| "eval_rouge-2": 0.2559, | |
| "eval_rouge-l": 0.4146, | |
| "eval_runtime": 820.6736, | |
| "eval_samples_per_second": 5.086, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 7.18, | |
| "learning_rate": 1.5074357818837314e-05, | |
| "loss": 0.8982, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 7.18, | |
| "eval_loss": 1.535448431968689, | |
| "eval_rouge-1": 0.4293, | |
| "eval_rouge-2": 0.2538, | |
| "eval_rouge-l": 0.4101, | |
| "eval_runtime": 828.443, | |
| "eval_samples_per_second": 5.038, | |
| "step": 17000 | |
| } | |
| ], | |
| "max_steps": 23690, | |
| "num_train_epochs": 10, | |
| "total_flos": 1.317233087652327e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |