| { | |
| "best_metric": 31.7626, | |
| "best_model_checkpoint": "/content/drive/Shareddrives/UCM_SHARED/TFM_ESG/Ejemplo Oficial de T5-Summarization de HF/t5-small_adafactor/checkpoint-8100", | |
| "epoch": 0.9880028228652082, | |
| "global_step": 8400, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0009823570924488356, | |
| "loss": 2.9361, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_gen_len": 18.8845, | |
| "eval_loss": 2.6215693950653076, | |
| "eval_rouge1": 26.8542, | |
| "eval_rouge2": 6.8667, | |
| "eval_rougeL": 21.1484, | |
| "eval_rougeLsum": 21.1563, | |
| "eval_runtime": 342.0879, | |
| "eval_samples_per_second": 33.126, | |
| "eval_steps_per_second": 1.383, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0009648318042813455, | |
| "loss": 2.8543, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_gen_len": 18.8097, | |
| "eval_loss": 2.5835769176483154, | |
| "eval_rouge1": 27.2234, | |
| "eval_rouge2": 7.1753, | |
| "eval_rougeL": 21.5276, | |
| "eval_rougeLsum": 21.5308, | |
| "eval_runtime": 342.5643, | |
| "eval_samples_per_second": 33.08, | |
| "eval_steps_per_second": 1.381, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0009471888967301812, | |
| "loss": 2.814, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_gen_len": 18.8294, | |
| "eval_loss": 2.564615488052368, | |
| "eval_rouge1": 28.1695, | |
| "eval_rouge2": 7.7873, | |
| "eval_rougeL": 22.2229, | |
| "eval_rougeLsum": 22.2251, | |
| "eval_runtime": 341.6846, | |
| "eval_samples_per_second": 33.165, | |
| "eval_steps_per_second": 1.384, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0009295459891790166, | |
| "loss": 2.7861, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_gen_len": 18.7867, | |
| "eval_loss": 2.5409207344055176, | |
| "eval_rouge1": 28.5349, | |
| "eval_rouge2": 7.9732, | |
| "eval_rougeL": 22.6959, | |
| "eval_rougeLsum": 22.7078, | |
| "eval_runtime": 341.6917, | |
| "eval_samples_per_second": 33.164, | |
| "eval_steps_per_second": 1.384, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0009119030816278523, | |
| "loss": 2.76, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_gen_len": 18.7526, | |
| "eval_loss": 2.5160539150238037, | |
| "eval_rouge1": 28.5629, | |
| "eval_rouge2": 7.9485, | |
| "eval_rougeL": 22.6545, | |
| "eval_rougeLsum": 22.6617, | |
| "eval_runtime": 341.8755, | |
| "eval_samples_per_second": 33.147, | |
| "eval_steps_per_second": 1.384, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0008942601740766878, | |
| "loss": 2.7545, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_gen_len": 18.7481, | |
| "eval_loss": 2.5028903484344482, | |
| "eval_rouge1": 29.1851, | |
| "eval_rouge2": 8.3586, | |
| "eval_rougeL": 23.1781, | |
| "eval_rougeLsum": 23.1815, | |
| "eval_runtime": 341.7767, | |
| "eval_samples_per_second": 33.156, | |
| "eval_steps_per_second": 1.384, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0008767348859091978, | |
| "loss": 2.7294, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_gen_len": 18.6948, | |
| "eval_loss": 2.4874510765075684, | |
| "eval_rouge1": 28.7611, | |
| "eval_rouge2": 8.0309, | |
| "eval_rougeL": 22.8289, | |
| "eval_rougeLsum": 22.8364, | |
| "eval_runtime": 341.7005, | |
| "eval_samples_per_second": 33.164, | |
| "eval_steps_per_second": 1.384, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0008590919783580335, | |
| "loss": 2.7005, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_gen_len": 18.7843, | |
| "eval_loss": 2.4769885540008545, | |
| "eval_rouge1": 29.1072, | |
| "eval_rouge2": 8.2713, | |
| "eval_rougeL": 23.0568, | |
| "eval_rougeLsum": 23.0602, | |
| "eval_runtime": 341.9693, | |
| "eval_samples_per_second": 33.137, | |
| "eval_steps_per_second": 1.383, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0008414490708068689, | |
| "loss": 2.7057, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_gen_len": 18.6973, | |
| "eval_loss": 2.46754789352417, | |
| "eval_rouge1": 29.0607, | |
| "eval_rouge2": 8.4308, | |
| "eval_rougeL": 23.1094, | |
| "eval_rougeLsum": 23.1125, | |
| "eval_runtime": 341.5228, | |
| "eval_samples_per_second": 33.181, | |
| "eval_steps_per_second": 1.385, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0008238061632557046, | |
| "loss": 2.6779, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_gen_len": 18.7927, | |
| "eval_loss": 2.4461910724639893, | |
| "eval_rouge1": 29.4815, | |
| "eval_rouge2": 8.6203, | |
| "eval_rougeL": 23.3742, | |
| "eval_rougeLsum": 23.3745, | |
| "eval_runtime": 341.6546, | |
| "eval_samples_per_second": 33.168, | |
| "eval_steps_per_second": 1.384, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00080616325570454, | |
| "loss": 2.6944, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_gen_len": 18.8219, | |
| "eval_loss": 2.4397881031036377, | |
| "eval_rouge1": 29.3817, | |
| "eval_rouge2": 8.5247, | |
| "eval_rougeL": 23.3088, | |
| "eval_rougeLsum": 23.3137, | |
| "eval_runtime": 341.8781, | |
| "eval_samples_per_second": 33.146, | |
| "eval_steps_per_second": 1.384, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0007885203481533757, | |
| "loss": 2.6589, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_gen_len": 18.7917, | |
| "eval_loss": 2.429356813430786, | |
| "eval_rouge1": 29.1914, | |
| "eval_rouge2": 8.5565, | |
| "eval_rougeL": 23.2017, | |
| "eval_rougeLsum": 23.2039, | |
| "eval_runtime": 341.5952, | |
| "eval_samples_per_second": 33.174, | |
| "eval_steps_per_second": 1.385, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0007708774406022112, | |
| "loss": 2.6473, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_gen_len": 18.7605, | |
| "eval_loss": 2.417249917984009, | |
| "eval_rouge1": 29.9482, | |
| "eval_rouge2": 8.8321, | |
| "eval_rougeL": 23.7848, | |
| "eval_rougeLsum": 23.7954, | |
| "eval_runtime": 341.816, | |
| "eval_samples_per_second": 33.152, | |
| "eval_steps_per_second": 1.384, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0007532345330510469, | |
| "loss": 2.6498, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_gen_len": 18.8079, | |
| "eval_loss": 2.4158124923706055, | |
| "eval_rouge1": 29.5843, | |
| "eval_rouge2": 8.9229, | |
| "eval_rougeL": 23.6193, | |
| "eval_rougeLsum": 23.6255, | |
| "eval_runtime": 341.8291, | |
| "eval_samples_per_second": 33.151, | |
| "eval_steps_per_second": 1.384, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0007355916254998823, | |
| "loss": 2.6321, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_gen_len": 18.7639, | |
| "eval_loss": 2.4022669792175293, | |
| "eval_rouge1": 29.9147, | |
| "eval_rouge2": 9.0085, | |
| "eval_rougeL": 23.8569, | |
| "eval_rougeLsum": 23.8709, | |
| "eval_runtime": 341.59, | |
| "eval_samples_per_second": 33.174, | |
| "eval_steps_per_second": 1.385, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.000717948717948718, | |
| "loss": 2.614, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_gen_len": 18.6922, | |
| "eval_loss": 2.3971035480499268, | |
| "eval_rouge1": 29.711, | |
| "eval_rouge2": 9.0017, | |
| "eval_rougeL": 23.761, | |
| "eval_rougeLsum": 23.7742, | |
| "eval_runtime": 341.4293, | |
| "eval_samples_per_second": 33.19, | |
| "eval_steps_per_second": 1.385, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0007003058103975535, | |
| "loss": 2.6212, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_gen_len": 18.7082, | |
| "eval_loss": 2.390623092651367, | |
| "eval_rouge1": 29.498, | |
| "eval_rouge2": 8.6483, | |
| "eval_rougeL": 23.5006, | |
| "eval_rougeLsum": 23.5112, | |
| "eval_runtime": 341.8905, | |
| "eval_samples_per_second": 33.145, | |
| "eval_steps_per_second": 1.383, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.0006826629028463891, | |
| "loss": 2.6151, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_gen_len": 18.8923, | |
| "eval_loss": 2.3796188831329346, | |
| "eval_rouge1": 29.8502, | |
| "eval_rouge2": 8.9427, | |
| "eval_rougeL": 23.6665, | |
| "eval_rougeLsum": 23.6781, | |
| "eval_runtime": 341.6707, | |
| "eval_samples_per_second": 33.166, | |
| "eval_steps_per_second": 1.384, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.0006650199952952247, | |
| "loss": 2.6123, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_gen_len": 18.7541, | |
| "eval_loss": 2.371220588684082, | |
| "eval_rouge1": 30.0211, | |
| "eval_rouge2": 9.2412, | |
| "eval_rougeL": 24.0388, | |
| "eval_rougeLsum": 24.045, | |
| "eval_runtime": 341.6315, | |
| "eval_samples_per_second": 33.17, | |
| "eval_steps_per_second": 1.385, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0006473770877440603, | |
| "loss": 2.5928, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_gen_len": 18.7963, | |
| "eval_loss": 2.359456777572632, | |
| "eval_rouge1": 30.3185, | |
| "eval_rouge2": 9.2796, | |
| "eval_rougeL": 24.2004, | |
| "eval_rougeLsum": 24.2101, | |
| "eval_runtime": 341.8833, | |
| "eval_samples_per_second": 33.146, | |
| "eval_steps_per_second": 1.384, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0006297341801928958, | |
| "loss": 2.5746, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_gen_len": 18.8229, | |
| "eval_loss": 2.366935968399048, | |
| "eval_rouge1": 29.8303, | |
| "eval_rouge2": 9.1665, | |
| "eval_rougeL": 23.8553, | |
| "eval_rougeLsum": 23.8672, | |
| "eval_runtime": 341.7148, | |
| "eval_samples_per_second": 33.162, | |
| "eval_steps_per_second": 1.384, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0006120912726417314, | |
| "loss": 2.5619, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_gen_len": 18.8087, | |
| "eval_loss": 2.3527944087982178, | |
| "eval_rouge1": 30.4684, | |
| "eval_rouge2": 9.4451, | |
| "eval_rougeL": 24.2867, | |
| "eval_rougeLsum": 24.2939, | |
| "eval_runtime": 341.8038, | |
| "eval_samples_per_second": 33.154, | |
| "eval_steps_per_second": 1.384, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.000594448365090567, | |
| "loss": 2.5677, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_gen_len": 18.8491, | |
| "eval_loss": 2.343312978744507, | |
| "eval_rouge1": 30.496, | |
| "eval_rouge2": 9.4356, | |
| "eval_rougeL": 24.31, | |
| "eval_rougeLsum": 24.3125, | |
| "eval_runtime": 342.0548, | |
| "eval_samples_per_second": 33.129, | |
| "eval_steps_per_second": 1.383, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0005768054575394025, | |
| "loss": 2.5456, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_gen_len": 18.809, | |
| "eval_loss": 2.3360698223114014, | |
| "eval_rouge1": 30.5752, | |
| "eval_rouge2": 9.4803, | |
| "eval_rougeL": 24.3462, | |
| "eval_rougeLsum": 24.3559, | |
| "eval_runtime": 341.7402, | |
| "eval_samples_per_second": 33.16, | |
| "eval_steps_per_second": 1.384, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0005591625499882381, | |
| "loss": 2.5669, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_gen_len": 18.8287, | |
| "eval_loss": 2.327768325805664, | |
| "eval_rouge1": 30.0561, | |
| "eval_rouge2": 9.2402, | |
| "eval_rougeL": 24.0535, | |
| "eval_rougeLsum": 24.0713, | |
| "eval_runtime": 341.71, | |
| "eval_samples_per_second": 33.163, | |
| "eval_steps_per_second": 1.384, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0005415196424370737, | |
| "loss": 2.5446, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_gen_len": 18.8118, | |
| "eval_loss": 2.3254404067993164, | |
| "eval_rouge1": 30.3142, | |
| "eval_rouge2": 9.4929, | |
| "eval_rougeL": 24.335, | |
| "eval_rougeLsum": 24.3432, | |
| "eval_runtime": 341.8271, | |
| "eval_samples_per_second": 33.151, | |
| "eval_steps_per_second": 1.384, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.0005238767348859092, | |
| "loss": 2.5536, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_gen_len": 18.9035, | |
| "eval_loss": 2.3251070976257324, | |
| "eval_rouge1": 30.2523, | |
| "eval_rouge2": 9.3005, | |
| "eval_rougeL": 24.163, | |
| "eval_rougeLsum": 24.1711, | |
| "eval_runtime": 341.8151, | |
| "eval_samples_per_second": 33.152, | |
| "eval_steps_per_second": 1.384, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0005062338273347448, | |
| "loss": 2.5154, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_gen_len": 18.7458, | |
| "eval_loss": 2.313904047012329, | |
| "eval_rouge1": 30.7718, | |
| "eval_rouge2": 9.8676, | |
| "eval_rougeL": 24.7111, | |
| "eval_rougeLsum": 24.7266, | |
| "eval_runtime": 341.6852, | |
| "eval_samples_per_second": 33.165, | |
| "eval_steps_per_second": 1.384, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0004885909197835804, | |
| "loss": 2.5176, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_gen_len": 18.7983, | |
| "eval_loss": 2.309589385986328, | |
| "eval_rouge1": 30.5617, | |
| "eval_rouge2": 9.6562, | |
| "eval_rougeL": 24.4754, | |
| "eval_rougeLsum": 24.4862, | |
| "eval_runtime": 341.8284, | |
| "eval_samples_per_second": 33.151, | |
| "eval_steps_per_second": 1.384, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.0004709480122324159, | |
| "loss": 2.5307, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_gen_len": 18.8217, | |
| "eval_loss": 2.3089160919189453, | |
| "eval_rouge1": 30.601, | |
| "eval_rouge2": 9.6672, | |
| "eval_rougeL": 24.4465, | |
| "eval_rougeLsum": 24.4567, | |
| "eval_runtime": 341.6213, | |
| "eval_samples_per_second": 33.171, | |
| "eval_steps_per_second": 1.385, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00045330510468125144, | |
| "loss": 2.515, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "eval_gen_len": 18.8096, | |
| "eval_loss": 2.2991859912872314, | |
| "eval_rouge1": 31.1807, | |
| "eval_rouge2": 10.0105, | |
| "eval_rougeL": 24.9947, | |
| "eval_rougeLsum": 25.0096, | |
| "eval_runtime": 341.7452, | |
| "eval_samples_per_second": 33.159, | |
| "eval_steps_per_second": 1.384, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.000435662197130087, | |
| "loss": 2.5168, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_gen_len": 18.8254, | |
| "eval_loss": 2.292003631591797, | |
| "eval_rouge1": 30.8999, | |
| "eval_rouge2": 9.8832, | |
| "eval_rougeL": 24.7713, | |
| "eval_rougeLsum": 24.7823, | |
| "eval_runtime": 341.8542, | |
| "eval_samples_per_second": 33.149, | |
| "eval_steps_per_second": 1.384, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.0004180192895789226, | |
| "loss": 2.5021, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_gen_len": 18.8787, | |
| "eval_loss": 2.285507917404175, | |
| "eval_rouge1": 31.0569, | |
| "eval_rouge2": 9.8752, | |
| "eval_rougeL": 24.791, | |
| "eval_rougeLsum": 24.8033, | |
| "eval_runtime": 341.7747, | |
| "eval_samples_per_second": 33.156, | |
| "eval_steps_per_second": 1.384, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00040037638202775815, | |
| "loss": 2.501, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_gen_len": 18.8315, | |
| "eval_loss": 2.2868235111236572, | |
| "eval_rouge1": 30.7132, | |
| "eval_rouge2": 9.8654, | |
| "eval_rougeL": 24.6084, | |
| "eval_rougeLsum": 24.6244, | |
| "eval_runtime": 341.9154, | |
| "eval_samples_per_second": 33.143, | |
| "eval_steps_per_second": 1.383, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.0003827334744765937, | |
| "loss": 2.4849, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_gen_len": 18.8196, | |
| "eval_loss": 2.2783043384552, | |
| "eval_rouge1": 31.3434, | |
| "eval_rouge2": 10.206, | |
| "eval_rougeL": 25.0954, | |
| "eval_rougeLsum": 25.1114, | |
| "eval_runtime": 341.9076, | |
| "eval_samples_per_second": 33.143, | |
| "eval_steps_per_second": 1.383, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.0003650905669254293, | |
| "loss": 2.4939, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_gen_len": 18.8579, | |
| "eval_loss": 2.275907039642334, | |
| "eval_rouge1": 31.1467, | |
| "eval_rouge2": 10.0457, | |
| "eval_rougeL": 24.964, | |
| "eval_rougeLsum": 24.9793, | |
| "eval_runtime": 341.6475, | |
| "eval_samples_per_second": 33.169, | |
| "eval_steps_per_second": 1.384, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00034744765937426485, | |
| "loss": 2.4624, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_gen_len": 18.8098, | |
| "eval_loss": 2.2712931632995605, | |
| "eval_rouge1": 31.4288, | |
| "eval_rouge2": 10.1719, | |
| "eval_rougeL": 25.1096, | |
| "eval_rougeLsum": 25.1276, | |
| "eval_runtime": 341.723, | |
| "eval_samples_per_second": 33.161, | |
| "eval_steps_per_second": 1.384, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.0003298047518231004, | |
| "loss": 2.456, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_gen_len": 18.8262, | |
| "eval_loss": 2.2674217224121094, | |
| "eval_rouge1": 31.1515, | |
| "eval_rouge2": 10.1208, | |
| "eval_rougeL": 25.0456, | |
| "eval_rougeLsum": 25.0532, | |
| "eval_runtime": 341.9008, | |
| "eval_samples_per_second": 33.144, | |
| "eval_steps_per_second": 1.383, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.000312161844271936, | |
| "loss": 2.4667, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_gen_len": 18.7816, | |
| "eval_loss": 2.262035608291626, | |
| "eval_rouge1": 31.3741, | |
| "eval_rouge2": 10.1733, | |
| "eval_rougeL": 25.1421, | |
| "eval_rougeLsum": 25.159, | |
| "eval_runtime": 341.6066, | |
| "eval_samples_per_second": 33.173, | |
| "eval_steps_per_second": 1.385, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.000294636556104446, | |
| "loss": 2.4658, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_gen_len": 18.8, | |
| "eval_loss": 2.258843183517456, | |
| "eval_rouge1": 31.3913, | |
| "eval_rouge2": 10.2645, | |
| "eval_rougeL": 25.1746, | |
| "eval_rougeLsum": 25.188, | |
| "eval_runtime": 341.9247, | |
| "eval_samples_per_second": 33.142, | |
| "eval_steps_per_second": 1.383, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00027699364855328156, | |
| "loss": 2.4943, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_gen_len": 18.8074, | |
| "eval_loss": 2.2533156871795654, | |
| "eval_rouge1": 31.3905, | |
| "eval_rouge2": 10.164, | |
| "eval_rougeL": 25.093, | |
| "eval_rougeLsum": 25.107, | |
| "eval_runtime": 341.8317, | |
| "eval_samples_per_second": 33.151, | |
| "eval_steps_per_second": 1.384, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00025935074100211713, | |
| "loss": 2.473, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_gen_len": 18.7439, | |
| "eval_loss": 2.2551848888397217, | |
| "eval_rouge1": 31.1105, | |
| "eval_rouge2": 10.1939, | |
| "eval_rougeL": 24.9214, | |
| "eval_rougeLsum": 24.9321, | |
| "eval_runtime": 341.6034, | |
| "eval_samples_per_second": 33.173, | |
| "eval_steps_per_second": 1.385, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00024170783345095273, | |
| "loss": 2.4687, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_gen_len": 18.7709, | |
| "eval_loss": 2.247098684310913, | |
| "eval_rouge1": 31.3387, | |
| "eval_rouge2": 10.2446, | |
| "eval_rougeL": 25.0746, | |
| "eval_rougeLsum": 25.0936, | |
| "eval_runtime": 341.7347, | |
| "eval_samples_per_second": 33.16, | |
| "eval_steps_per_second": 1.384, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.0002240649258997883, | |
| "loss": 2.4286, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_gen_len": 18.8313, | |
| "eval_loss": 2.2467916011810303, | |
| "eval_rouge1": 31.1953, | |
| "eval_rouge2": 10.1277, | |
| "eval_rougeL": 24.9535, | |
| "eval_rougeLsum": 24.9714, | |
| "eval_runtime": 341.8485, | |
| "eval_samples_per_second": 33.149, | |
| "eval_steps_per_second": 1.384, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00020642201834862386, | |
| "loss": 2.4492, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_gen_len": 18.7977, | |
| "eval_loss": 2.2422139644622803, | |
| "eval_rouge1": 31.7203, | |
| "eval_rouge2": 10.4934, | |
| "eval_rougeL": 25.3862, | |
| "eval_rougeLsum": 25.3946, | |
| "eval_runtime": 341.6909, | |
| "eval_samples_per_second": 33.164, | |
| "eval_steps_per_second": 1.384, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00018877911079745943, | |
| "loss": 2.4427, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_gen_len": 18.8144, | |
| "eval_loss": 2.2402756214141846, | |
| "eval_rouge1": 31.5498, | |
| "eval_rouge2": 10.4086, | |
| "eval_rougeL": 25.2384, | |
| "eval_rougeLsum": 25.2593, | |
| "eval_runtime": 341.7269, | |
| "eval_samples_per_second": 33.161, | |
| "eval_steps_per_second": 1.384, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.000171136203246295, | |
| "loss": 2.4641, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_gen_len": 18.7966, | |
| "eval_loss": 2.2365546226501465, | |
| "eval_rouge1": 31.4038, | |
| "eval_rouge2": 10.3691, | |
| "eval_rougeL": 25.1786, | |
| "eval_rougeLsum": 25.195, | |
| "eval_runtime": 341.8284, | |
| "eval_samples_per_second": 33.151, | |
| "eval_steps_per_second": 1.384, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00015349329569513056, | |
| "loss": 2.4276, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_gen_len": 18.7789, | |
| "eval_loss": 2.2344412803649902, | |
| "eval_rouge1": 31.5076, | |
| "eval_rouge2": 10.4403, | |
| "eval_rougeL": 25.299, | |
| "eval_rougeLsum": 25.3201, | |
| "eval_runtime": 342.6233, | |
| "eval_samples_per_second": 33.074, | |
| "eval_steps_per_second": 1.381, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00013585038814396613, | |
| "loss": 2.4402, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_gen_len": 18.7713, | |
| "eval_loss": 2.2317440509796143, | |
| "eval_rouge1": 31.5216, | |
| "eval_rouge2": 10.3901, | |
| "eval_rougeL": 25.276, | |
| "eval_rougeLsum": 25.2943, | |
| "eval_runtime": 345.4971, | |
| "eval_samples_per_second": 32.799, | |
| "eval_steps_per_second": 1.369, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00011820748059280171, | |
| "loss": 2.44, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_gen_len": 18.7671, | |
| "eval_loss": 2.2292771339416504, | |
| "eval_rouge1": 31.4244, | |
| "eval_rouge2": 10.4211, | |
| "eval_rougeL": 25.2592, | |
| "eval_rougeLsum": 25.2735, | |
| "eval_runtime": 342.0133, | |
| "eval_samples_per_second": 33.133, | |
| "eval_steps_per_second": 1.383, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00010056457304163728, | |
| "loss": 2.4251, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_gen_len": 18.7972, | |
| "eval_loss": 2.226907968521118, | |
| "eval_rouge1": 31.4887, | |
| "eval_rouge2": 10.3959, | |
| "eval_rougeL": 25.2335, | |
| "eval_rougeLsum": 25.2545, | |
| "eval_runtime": 341.7402, | |
| "eval_samples_per_second": 33.16, | |
| "eval_steps_per_second": 1.384, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 8.292166549047284e-05, | |
| "loss": 2.456, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "eval_gen_len": 18.786, | |
| "eval_loss": 2.224229097366333, | |
| "eval_rouge1": 31.4508, | |
| "eval_rouge2": 10.4079, | |
| "eval_rougeL": 25.2328, | |
| "eval_rougeLsum": 25.2564, | |
| "eval_runtime": 341.8379, | |
| "eval_samples_per_second": 33.15, | |
| "eval_steps_per_second": 1.384, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 6.527875793930841e-05, | |
| "loss": 2.4181, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_gen_len": 18.8012, | |
| "eval_loss": 2.2223522663116455, | |
| "eval_rouge1": 31.6181, | |
| "eval_rouge2": 10.5558, | |
| "eval_rougeL": 25.3867, | |
| "eval_rougeLsum": 25.4042, | |
| "eval_runtime": 342.1259, | |
| "eval_samples_per_second": 33.122, | |
| "eval_steps_per_second": 1.383, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.763585038814397e-05, | |
| "loss": 2.4288, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_gen_len": 18.7953, | |
| "eval_loss": 2.22170352935791, | |
| "eval_rouge1": 31.7626, | |
| "eval_rouge2": 10.6059, | |
| "eval_rougeL": 25.4827, | |
| "eval_rougeLsum": 25.4958, | |
| "eval_runtime": 341.8728, | |
| "eval_samples_per_second": 33.147, | |
| "eval_steps_per_second": 1.384, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 2.9992942836979537e-05, | |
| "loss": 2.4327, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_gen_len": 18.7827, | |
| "eval_loss": 2.220174789428711, | |
| "eval_rouge1": 31.6839, | |
| "eval_rouge2": 10.5615, | |
| "eval_rougeL": 25.4137, | |
| "eval_rougeLsum": 25.433, | |
| "eval_runtime": 342.1089, | |
| "eval_samples_per_second": 33.124, | |
| "eval_steps_per_second": 1.383, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.2350035285815103e-05, | |
| "loss": 2.4118, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_gen_len": 18.7979, | |
| "eval_loss": 2.2196593284606934, | |
| "eval_rouge1": 31.6519, | |
| "eval_rouge2": 10.4949, | |
| "eval_rougeL": 25.3751, | |
| "eval_rougeLsum": 25.3984, | |
| "eval_runtime": 342.1132, | |
| "eval_samples_per_second": 33.124, | |
| "eval_steps_per_second": 1.383, | |
| "step": 8400 | |
| } | |
| ], | |
| "max_steps": 8502, | |
| "num_train_epochs": 1, | |
| "total_flos": 4.261943991730176e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |