| { |
| "best_metric": 54.2663, |
| "best_model_checkpoint": "drive/MyDrive/Models/bart-base-paraphrasing-Review/checkpoint-200", |
| "epoch": 20.0, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.2, |
| "learning_rate": 3.9680000000000006e-05, |
| "loss": 1.5002, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.2, |
| "eval_gen_len": 19.33, |
| "eval_loss": 0.5407444834709167, |
| "eval_rouge1": 51.4943, |
| "eval_rouge2": 46.3962, |
| "eval_rougeL": 48.4646, |
| "eval_rougeLsum": 50.6928, |
| "eval_runtime": 6.8406, |
| "eval_samples_per_second": 14.619, |
| "eval_steps_per_second": 1.9, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 3.9280000000000003e-05, |
| "loss": 0.7721, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.4, |
| "eval_gen_len": 19.82, |
| "eval_loss": 0.46091121435165405, |
| "eval_rouge1": 52.542, |
| "eval_rouge2": 47.6572, |
| "eval_rougeL": 49.0919, |
| "eval_rougeLsum": 51.8604, |
| "eval_runtime": 8.2487, |
| "eval_samples_per_second": 12.123, |
| "eval_steps_per_second": 1.576, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 3.888e-05, |
| "loss": 0.6535, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.6, |
| "eval_gen_len": 19.83, |
| "eval_loss": 0.462110698223114, |
| "eval_rouge1": 51.4575, |
| "eval_rouge2": 45.802, |
| "eval_rougeL": 48.4395, |
| "eval_rougeLsum": 50.6411, |
| "eval_runtime": 8.2208, |
| "eval_samples_per_second": 12.164, |
| "eval_steps_per_second": 1.581, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 3.8480000000000004e-05, |
| "loss": 0.6574, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.8, |
| "eval_gen_len": 19.92, |
| "eval_loss": 0.4107917547225952, |
| "eval_rouge1": 53.4288, |
| "eval_rouge2": 48.0337, |
| "eval_rougeL": 49.8305, |
| "eval_rougeLsum": 52.5636, |
| "eval_runtime": 7.434, |
| "eval_samples_per_second": 13.452, |
| "eval_steps_per_second": 1.749, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 3.808e-05, |
| "loss": 0.6373, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_gen_len": 19.6, |
| "eval_loss": 0.417746365070343, |
| "eval_rouge1": 52.4223, |
| "eval_rouge2": 47.4174, |
| "eval_rougeL": 49.3609, |
| "eval_rougeLsum": 51.6379, |
| "eval_runtime": 8.0886, |
| "eval_samples_per_second": 12.363, |
| "eval_steps_per_second": 1.607, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 3.768e-05, |
| "loss": 0.4648, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.2, |
| "eval_gen_len": 19.78, |
| "eval_loss": 0.4121185541152954, |
| "eval_rouge1": 54.2574, |
| "eval_rouge2": 49.4654, |
| "eval_rougeL": 52.0245, |
| "eval_rougeLsum": 53.4168, |
| "eval_runtime": 6.7135, |
| "eval_samples_per_second": 14.895, |
| "eval_steps_per_second": 1.936, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 3.728e-05, |
| "loss": 0.4845, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.4, |
| "eval_gen_len": 19.81, |
| "eval_loss": 0.3925768733024597, |
| "eval_rouge1": 53.6404, |
| "eval_rouge2": 48.6113, |
| "eval_rougeL": 50.7083, |
| "eval_rougeLsum": 52.7981, |
| "eval_runtime": 8.3261, |
| "eval_samples_per_second": 12.01, |
| "eval_steps_per_second": 1.561, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 3.6880000000000006e-05, |
| "loss": 0.384, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.6, |
| "eval_gen_len": 19.86, |
| "eval_loss": 0.4059392511844635, |
| "eval_rouge1": 53.3219, |
| "eval_rouge2": 48.4794, |
| "eval_rougeL": 51.0572, |
| "eval_rougeLsum": 52.5149, |
| "eval_runtime": 6.9029, |
| "eval_samples_per_second": 14.487, |
| "eval_steps_per_second": 1.883, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 3.648e-05, |
| "loss": 0.424, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.8, |
| "eval_gen_len": 19.86, |
| "eval_loss": 0.40950503945350647, |
| "eval_rouge1": 53.46, |
| "eval_rouge2": 48.5606, |
| "eval_rougeL": 50.3645, |
| "eval_rougeLsum": 52.6534, |
| "eval_runtime": 7.8719, |
| "eval_samples_per_second": 12.703, |
| "eval_steps_per_second": 1.651, |
| "step": 90 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 3.608000000000001e-05, |
| "loss": 0.5296, |
| "step": 100 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_gen_len": 19.82, |
| "eval_loss": 0.3766419291496277, |
| "eval_rouge1": 53.4984, |
| "eval_rouge2": 48.5905, |
| "eval_rougeL": 51.3471, |
| "eval_rougeLsum": 52.7547, |
| "eval_runtime": 9.149, |
| "eval_samples_per_second": 10.93, |
| "eval_steps_per_second": 1.421, |
| "step": 100 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 3.5680000000000004e-05, |
| "loss": 0.3373, |
| "step": 110 |
| }, |
| { |
| "epoch": 2.2, |
| "eval_gen_len": 19.73, |
| "eval_loss": 0.38662123680114746, |
| "eval_rouge1": 54.0079, |
| "eval_rouge2": 49.2467, |
| "eval_rougeL": 51.8215, |
| "eval_rougeLsum": 53.1045, |
| "eval_runtime": 9.6895, |
| "eval_samples_per_second": 10.32, |
| "eval_steps_per_second": 1.342, |
| "step": 110 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 3.528e-05, |
| "loss": 0.3547, |
| "step": 120 |
| }, |
| { |
| "epoch": 2.4, |
| "eval_gen_len": 19.92, |
| "eval_loss": 0.4074762761592865, |
| "eval_rouge1": 53.6412, |
| "eval_rouge2": 48.7051, |
| "eval_rougeL": 50.9648, |
| "eval_rougeLsum": 52.9864, |
| "eval_runtime": 10.9749, |
| "eval_samples_per_second": 9.112, |
| "eval_steps_per_second": 1.185, |
| "step": 120 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 3.4880000000000005e-05, |
| "loss": 0.3487, |
| "step": 130 |
| }, |
| { |
| "epoch": 2.6, |
| "eval_gen_len": 19.8, |
| "eval_loss": 0.3859531283378601, |
| "eval_rouge1": 52.9323, |
| "eval_rouge2": 47.6675, |
| "eval_rougeL": 49.9928, |
| "eval_rougeLsum": 52.1444, |
| "eval_runtime": 7.7041, |
| "eval_samples_per_second": 12.98, |
| "eval_steps_per_second": 1.687, |
| "step": 130 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 3.448e-05, |
| "loss": 0.3698, |
| "step": 140 |
| }, |
| { |
| "epoch": 2.8, |
| "eval_gen_len": 19.95, |
| "eval_loss": 0.38846832513809204, |
| "eval_rouge1": 53.839, |
| "eval_rouge2": 48.7036, |
| "eval_rougeL": 50.9286, |
| "eval_rougeLsum": 52.9169, |
| "eval_runtime": 7.8099, |
| "eval_samples_per_second": 12.804, |
| "eval_steps_per_second": 1.665, |
| "step": 140 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 3.408e-05, |
| "loss": 0.3611, |
| "step": 150 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_gen_len": 19.9, |
| "eval_loss": 0.3930496275424957, |
| "eval_rouge1": 53.5329, |
| "eval_rouge2": 48.8497, |
| "eval_rougeL": 50.7808, |
| "eval_rougeLsum": 52.8185, |
| "eval_runtime": 7.4198, |
| "eval_samples_per_second": 13.477, |
| "eval_steps_per_second": 1.752, |
| "step": 150 |
| }, |
| { |
| "epoch": 3.2, |
| "learning_rate": 3.368e-05, |
| "loss": 0.2646, |
| "step": 160 |
| }, |
| { |
| "epoch": 3.2, |
| "eval_gen_len": 19.72, |
| "eval_loss": 0.3976023197174072, |
| "eval_rouge1": 52.8705, |
| "eval_rouge2": 47.9654, |
| "eval_rougeL": 50.1419, |
| "eval_rougeLsum": 52.2498, |
| "eval_runtime": 7.0404, |
| "eval_samples_per_second": 14.204, |
| "eval_steps_per_second": 1.846, |
| "step": 160 |
| }, |
| { |
| "epoch": 3.4, |
| "learning_rate": 3.328e-05, |
| "loss": 0.247, |
| "step": 170 |
| }, |
| { |
| "epoch": 3.4, |
| "eval_gen_len": 19.83, |
| "eval_loss": 0.42816296219825745, |
| "eval_rouge1": 53.8543, |
| "eval_rouge2": 48.9547, |
| "eval_rougeL": 50.8772, |
| "eval_rougeLsum": 53.0736, |
| "eval_runtime": 8.0332, |
| "eval_samples_per_second": 12.448, |
| "eval_steps_per_second": 1.618, |
| "step": 170 |
| }, |
| { |
| "epoch": 3.6, |
| "learning_rate": 3.2880000000000004e-05, |
| "loss": 0.3138, |
| "step": 180 |
| }, |
| { |
| "epoch": 3.6, |
| "eval_gen_len": 19.93, |
| "eval_loss": 0.4122447669506073, |
| "eval_rouge1": 53.352, |
| "eval_rouge2": 48.3843, |
| "eval_rougeL": 50.8596, |
| "eval_rougeLsum": 52.4134, |
| "eval_runtime": 6.6776, |
| "eval_samples_per_second": 14.975, |
| "eval_steps_per_second": 1.947, |
| "step": 180 |
| }, |
| { |
| "epoch": 3.8, |
| "learning_rate": 3.248000000000001e-05, |
| "loss": 0.2651, |
| "step": 190 |
| }, |
| { |
| "epoch": 3.8, |
| "eval_gen_len": 19.92, |
| "eval_loss": 0.4259437322616577, |
| "eval_rouge1": 54.1516, |
| "eval_rouge2": 49.344, |
| "eval_rougeL": 51.9955, |
| "eval_rougeLsum": 53.4336, |
| "eval_runtime": 8.5086, |
| "eval_samples_per_second": 11.753, |
| "eval_steps_per_second": 1.528, |
| "step": 190 |
| }, |
| { |
| "epoch": 4.0, |
| "learning_rate": 3.2080000000000005e-05, |
| "loss": 0.2809, |
| "step": 200 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_gen_len": 19.92, |
| "eval_loss": 0.4072933495044708, |
| "eval_rouge1": 54.2663, |
| "eval_rouge2": 49.3906, |
| "eval_rougeL": 51.9168, |
| "eval_rougeLsum": 53.408, |
| "eval_runtime": 6.638, |
| "eval_samples_per_second": 15.065, |
| "eval_steps_per_second": 1.958, |
| "step": 200 |
| }, |
| { |
| "epoch": 4.2, |
| "learning_rate": 3.168e-05, |
| "loss": 0.1995, |
| "step": 210 |
| }, |
| { |
| "epoch": 4.2, |
| "eval_gen_len": 19.93, |
| "eval_loss": 0.43279576301574707, |
| "eval_rouge1": 52.9244, |
| "eval_rouge2": 47.8818, |
| "eval_rougeL": 50.3287, |
| "eval_rougeLsum": 52.0486, |
| "eval_runtime": 11.2433, |
| "eval_samples_per_second": 8.894, |
| "eval_steps_per_second": 1.156, |
| "step": 210 |
| }, |
| { |
| "epoch": 4.4, |
| "learning_rate": 3.1280000000000005e-05, |
| "loss": 0.2221, |
| "step": 220 |
| }, |
| { |
| "epoch": 4.4, |
| "eval_gen_len": 19.92, |
| "eval_loss": 0.4510229527950287, |
| "eval_rouge1": 53.2115, |
| "eval_rouge2": 47.7244, |
| "eval_rougeL": 50.3561, |
| "eval_rougeLsum": 52.3876, |
| "eval_runtime": 9.8917, |
| "eval_samples_per_second": 10.11, |
| "eval_steps_per_second": 1.314, |
| "step": 220 |
| }, |
| { |
| "epoch": 4.6, |
| "learning_rate": 3.088e-05, |
| "loss": 0.198, |
| "step": 230 |
| }, |
| { |
| "epoch": 4.6, |
| "eval_gen_len": 19.92, |
| "eval_loss": 0.46202754974365234, |
| "eval_rouge1": 53.6401, |
| "eval_rouge2": 48.3161, |
| "eval_rougeL": 50.5124, |
| "eval_rougeLsum": 52.809, |
| "eval_runtime": 6.6888, |
| "eval_samples_per_second": 14.95, |
| "eval_steps_per_second": 1.944, |
| "step": 230 |
| }, |
| { |
| "epoch": 4.8, |
| "learning_rate": 3.0520000000000006e-05, |
| "loss": 0.2395, |
| "step": 240 |
| }, |
| { |
| "epoch": 4.8, |
| "eval_gen_len": 19.9, |
| "eval_loss": 0.42543941736221313, |
| "eval_rouge1": 52.8133, |
| "eval_rouge2": 47.1914, |
| "eval_rougeL": 49.4615, |
| "eval_rougeLsum": 52.0112, |
| "eval_runtime": 8.3162, |
| "eval_samples_per_second": 12.025, |
| "eval_steps_per_second": 1.563, |
| "step": 240 |
| }, |
| { |
| "epoch": 5.0, |
| "learning_rate": 3.0120000000000003e-05, |
| "loss": 0.1948, |
| "step": 250 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_gen_len": 19.81, |
| "eval_loss": 0.4100102186203003, |
| "eval_rouge1": 54.5088, |
| "eval_rouge2": 49.4265, |
| "eval_rougeL": 51.1343, |
| "eval_rougeLsum": 53.6923, |
| "eval_runtime": 6.6101, |
| "eval_samples_per_second": 15.128, |
| "eval_steps_per_second": 1.967, |
| "step": 250 |
| }, |
| { |
| "epoch": 5.2, |
| "learning_rate": 2.9720000000000003e-05, |
| "loss": 0.1519, |
| "step": 260 |
| }, |
| { |
| "epoch": 5.2, |
| "eval_gen_len": 19.77, |
| "eval_loss": 0.4444543421268463, |
| "eval_rouge1": 52.762, |
| "eval_rouge2": 47.5185, |
| "eval_rougeL": 49.8893, |
| "eval_rougeLsum": 51.9623, |
| "eval_runtime": 8.1595, |
| "eval_samples_per_second": 12.256, |
| "eval_steps_per_second": 1.593, |
| "step": 260 |
| }, |
| { |
| "epoch": 5.4, |
| "learning_rate": 2.932e-05, |
| "loss": 0.1982, |
| "step": 270 |
| }, |
| { |
| "epoch": 5.4, |
| "eval_gen_len": 19.93, |
| "eval_loss": 0.4569690227508545, |
| "eval_rouge1": 53.5689, |
| "eval_rouge2": 48.2123, |
| "eval_rougeL": 50.6624, |
| "eval_rougeLsum": 52.7258, |
| "eval_runtime": 7.021, |
| "eval_samples_per_second": 14.243, |
| "eval_steps_per_second": 1.852, |
| "step": 270 |
| }, |
| { |
| "epoch": 5.6, |
| "learning_rate": 2.892e-05, |
| "loss": 0.1715, |
| "step": 280 |
| }, |
| { |
| "epoch": 5.6, |
| "eval_gen_len": 19.88, |
| "eval_loss": 0.4624459445476532, |
| "eval_rouge1": 52.8072, |
| "eval_rouge2": 47.7219, |
| "eval_rougeL": 50.2858, |
| "eval_rougeLsum": 52.1116, |
| "eval_runtime": 7.4965, |
| "eval_samples_per_second": 13.34, |
| "eval_steps_per_second": 1.734, |
| "step": 280 |
| }, |
| { |
| "epoch": 5.8, |
| "learning_rate": 2.852e-05, |
| "loss": 0.2086, |
| "step": 290 |
| }, |
| { |
| "epoch": 5.8, |
| "eval_gen_len": 19.89, |
| "eval_loss": 0.42807042598724365, |
| "eval_rouge1": 53.7821, |
| "eval_rouge2": 48.5423, |
| "eval_rougeL": 51.0908, |
| "eval_rougeLsum": 53.0218, |
| "eval_runtime": 7.7637, |
| "eval_samples_per_second": 12.88, |
| "eval_steps_per_second": 1.674, |
| "step": 290 |
| }, |
| { |
| "epoch": 6.0, |
| "learning_rate": 2.8120000000000002e-05, |
| "loss": 0.1586, |
| "step": 300 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_gen_len": 19.9, |
| "eval_loss": 0.46185803413391113, |
| "eval_rouge1": 54.0695, |
| "eval_rouge2": 49.2271, |
| "eval_rougeL": 51.3253, |
| "eval_rougeLsum": 53.3106, |
| "eval_runtime": 6.9257, |
| "eval_samples_per_second": 14.439, |
| "eval_steps_per_second": 1.877, |
| "step": 300 |
| }, |
| { |
| "epoch": 6.2, |
| "learning_rate": 2.772e-05, |
| "loss": 0.1397, |
| "step": 310 |
| }, |
| { |
| "epoch": 6.2, |
| "eval_gen_len": 19.7, |
| "eval_loss": 0.47624072432518005, |
| "eval_rouge1": 52.9886, |
| "eval_rouge2": 48.0092, |
| "eval_rougeL": 50.6223, |
| "eval_rougeLsum": 52.1953, |
| "eval_runtime": 7.6624, |
| "eval_samples_per_second": 13.051, |
| "eval_steps_per_second": 1.697, |
| "step": 310 |
| }, |
| { |
| "epoch": 6.4, |
| "learning_rate": 2.7320000000000003e-05, |
| "loss": 0.1359, |
| "step": 320 |
| }, |
| { |
| "epoch": 6.4, |
| "eval_gen_len": 19.92, |
| "eval_loss": 0.5075673460960388, |
| "eval_rouge1": 55.1074, |
| "eval_rouge2": 50.6647, |
| "eval_rougeL": 53.0524, |
| "eval_rougeLsum": 54.4127, |
| "eval_runtime": 10.6561, |
| "eval_samples_per_second": 9.384, |
| "eval_steps_per_second": 1.22, |
| "step": 320 |
| }, |
| { |
| "epoch": 6.6, |
| "learning_rate": 2.6920000000000003e-05, |
| "loss": 0.1533, |
| "step": 330 |
| }, |
| { |
| "epoch": 6.6, |
| "eval_gen_len": 19.92, |
| "eval_loss": 0.4753476679325104, |
| "eval_rouge1": 53.9777, |
| "eval_rouge2": 49.1125, |
| "eval_rougeL": 51.7324, |
| "eval_rougeLsum": 53.2685, |
| "eval_runtime": 8.3237, |
| "eval_samples_per_second": 12.014, |
| "eval_steps_per_second": 1.562, |
| "step": 330 |
| }, |
| { |
| "epoch": 6.8, |
| "learning_rate": 2.6520000000000004e-05, |
| "loss": 0.1231, |
| "step": 340 |
| }, |
| { |
| "epoch": 6.8, |
| "eval_gen_len": 19.75, |
| "eval_loss": 0.4629780650138855, |
| "eval_rouge1": 52.8367, |
| "eval_rouge2": 47.6698, |
| "eval_rougeL": 50.0559, |
| "eval_rougeLsum": 51.986, |
| "eval_runtime": 6.6174, |
| "eval_samples_per_second": 15.112, |
| "eval_steps_per_second": 1.965, |
| "step": 340 |
| }, |
| { |
| "epoch": 7.0, |
| "learning_rate": 2.6120000000000004e-05, |
| "loss": 0.166, |
| "step": 350 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_gen_len": 19.67, |
| "eval_loss": 0.4622231423854828, |
| "eval_rouge1": 53.2509, |
| "eval_rouge2": 48.2235, |
| "eval_rougeL": 50.8908, |
| "eval_rougeLsum": 52.3717, |
| "eval_runtime": 8.1116, |
| "eval_samples_per_second": 12.328, |
| "eval_steps_per_second": 1.603, |
| "step": 350 |
| }, |
| { |
| "epoch": 7.2, |
| "learning_rate": 2.572e-05, |
| "loss": 0.1042, |
| "step": 360 |
| }, |
| { |
| "epoch": 7.2, |
| "eval_gen_len": 19.86, |
| "eval_loss": 0.4717084467411041, |
| "eval_rouge1": 53.4468, |
| "eval_rouge2": 48.3219, |
| "eval_rougeL": 51.074, |
| "eval_rougeLsum": 52.5822, |
| "eval_runtime": 7.0111, |
| "eval_samples_per_second": 14.263, |
| "eval_steps_per_second": 1.854, |
| "step": 360 |
| }, |
| { |
| "epoch": 7.4, |
| "learning_rate": 2.532e-05, |
| "loss": 0.0942, |
| "step": 370 |
| }, |
| { |
| "epoch": 7.4, |
| "eval_gen_len": 19.92, |
| "eval_loss": 0.5130577683448792, |
| "eval_rouge1": 53.5099, |
| "eval_rouge2": 48.6401, |
| "eval_rougeL": 51.348, |
| "eval_rougeLsum": 52.7033, |
| "eval_runtime": 7.5081, |
| "eval_samples_per_second": 13.319, |
| "eval_steps_per_second": 1.731, |
| "step": 370 |
| }, |
| { |
| "epoch": 7.6, |
| "learning_rate": 2.4920000000000002e-05, |
| "loss": 0.108, |
| "step": 380 |
| }, |
| { |
| "epoch": 7.6, |
| "eval_gen_len": 19.92, |
| "eval_loss": 0.4888118803501129, |
| "eval_rouge1": 53.9399, |
| "eval_rouge2": 48.9895, |
| "eval_rougeL": 51.4691, |
| "eval_rougeLsum": 53.0923, |
| "eval_runtime": 7.6096, |
| "eval_samples_per_second": 13.141, |
| "eval_steps_per_second": 1.708, |
| "step": 380 |
| }, |
| { |
| "epoch": 7.8, |
| "learning_rate": 2.4520000000000002e-05, |
| "loss": 0.396, |
| "step": 390 |
| }, |
| { |
| "epoch": 7.8, |
| "eval_gen_len": 19.79, |
| "eval_loss": 0.48315200209617615, |
| "eval_rouge1": 52.1465, |
| "eval_rouge2": 46.8191, |
| "eval_rougeL": 49.6385, |
| "eval_rougeLsum": 51.3173, |
| "eval_runtime": 6.5648, |
| "eval_samples_per_second": 15.233, |
| "eval_steps_per_second": 1.98, |
| "step": 390 |
| }, |
| { |
| "epoch": 8.0, |
| "learning_rate": 2.4120000000000003e-05, |
| "loss": 0.3167, |
| "step": 400 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_gen_len": 19.93, |
| "eval_loss": 0.505258321762085, |
| "eval_rouge1": 52.4445, |
| "eval_rouge2": 47.0657, |
| "eval_rougeL": 49.9123, |
| "eval_rougeLsum": 51.5409, |
| "eval_runtime": 8.223, |
| "eval_samples_per_second": 12.161, |
| "eval_steps_per_second": 1.581, |
| "step": 400 |
| }, |
| { |
| "epoch": 8.2, |
| "learning_rate": 2.372e-05, |
| "loss": 0.0926, |
| "step": 410 |
| }, |
| { |
| "epoch": 8.2, |
| "eval_gen_len": 19.91, |
| "eval_loss": 0.491664320230484, |
| "eval_rouge1": 53.9381, |
| "eval_rouge2": 49.1002, |
| "eval_rougeL": 51.6393, |
| "eval_rougeLsum": 53.07, |
| "eval_runtime": 7.2939, |
| "eval_samples_per_second": 13.71, |
| "eval_steps_per_second": 1.782, |
| "step": 410 |
| }, |
| { |
| "epoch": 8.4, |
| "learning_rate": 2.332e-05, |
| "loss": 0.1143, |
| "step": 420 |
| }, |
| { |
| "epoch": 8.4, |
| "eval_gen_len": 19.86, |
| "eval_loss": 0.4843844175338745, |
| "eval_rouge1": 51.9319, |
| "eval_rouge2": 46.4005, |
| "eval_rougeL": 49.5382, |
| "eval_rougeLsum": 51.0042, |
| "eval_runtime": 12.7186, |
| "eval_samples_per_second": 7.863, |
| "eval_steps_per_second": 1.022, |
| "step": 420 |
| }, |
| { |
| "epoch": 8.6, |
| "learning_rate": 2.292e-05, |
| "loss": 0.0852, |
| "step": 430 |
| }, |
| { |
| "epoch": 8.6, |
| "eval_gen_len": 19.89, |
| "eval_loss": 0.5236030220985413, |
| "eval_rouge1": 53.0753, |
| "eval_rouge2": 47.999, |
| "eval_rougeL": 50.8066, |
| "eval_rougeLsum": 52.1535, |
| "eval_runtime": 6.9023, |
| "eval_samples_per_second": 14.488, |
| "eval_steps_per_second": 1.883, |
| "step": 430 |
| }, |
| { |
| "epoch": 8.8, |
| "learning_rate": 2.252e-05, |
| "loss": 0.0894, |
| "step": 440 |
| }, |
| { |
| "epoch": 8.8, |
| "eval_gen_len": 19.83, |
| "eval_loss": 0.5296807289123535, |
| "eval_rouge1": 52.3528, |
| "eval_rouge2": 47.0573, |
| "eval_rougeL": 49.9171, |
| "eval_rougeLsum": 51.4175, |
| "eval_runtime": 7.6163, |
| "eval_samples_per_second": 13.13, |
| "eval_steps_per_second": 1.707, |
| "step": 440 |
| }, |
| { |
| "epoch": 9.0, |
| "learning_rate": 2.2120000000000005e-05, |
| "loss": 0.092, |
| "step": 450 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_gen_len": 19.79, |
| "eval_loss": 0.545309841632843, |
| "eval_rouge1": 52.9402, |
| "eval_rouge2": 47.6937, |
| "eval_rougeL": 50.4167, |
| "eval_rougeLsum": 52.0807, |
| "eval_runtime": 7.5068, |
| "eval_samples_per_second": 13.321, |
| "eval_steps_per_second": 1.732, |
| "step": 450 |
| }, |
| { |
| "epoch": 9.2, |
| "learning_rate": 2.1720000000000002e-05, |
| "loss": 0.0837, |
| "step": 460 |
| }, |
| { |
| "epoch": 9.2, |
| "eval_gen_len": 19.92, |
| "eval_loss": 0.5535491108894348, |
| "eval_rouge1": 53.2688, |
| "eval_rouge2": 48.3391, |
| "eval_rougeL": 50.8943, |
| "eval_rougeLsum": 52.5012, |
| "eval_runtime": 6.8049, |
| "eval_samples_per_second": 14.695, |
| "eval_steps_per_second": 1.91, |
| "step": 460 |
| }, |
| { |
| "epoch": 9.4, |
| "learning_rate": 2.1320000000000003e-05, |
| "loss": 0.0753, |
| "step": 470 |
| }, |
| { |
| "epoch": 9.4, |
| "eval_gen_len": 19.88, |
| "eval_loss": 0.5247593522071838, |
| "eval_rouge1": 53.1878, |
| "eval_rouge2": 47.9215, |
| "eval_rougeL": 50.2844, |
| "eval_rougeLsum": 52.072, |
| "eval_runtime": 8.2937, |
| "eval_samples_per_second": 12.057, |
| "eval_steps_per_second": 1.567, |
| "step": 470 |
| }, |
| { |
| "epoch": 9.6, |
| "learning_rate": 2.0920000000000003e-05, |
| "loss": 0.0861, |
| "step": 480 |
| }, |
| { |
| "epoch": 9.6, |
| "eval_gen_len": 19.87, |
| "eval_loss": 0.5427589416503906, |
| "eval_rouge1": 53.4103, |
| "eval_rouge2": 48.1705, |
| "eval_rougeL": 49.9195, |
| "eval_rougeLsum": 52.3346, |
| "eval_runtime": 6.5323, |
| "eval_samples_per_second": 15.309, |
| "eval_steps_per_second": 1.99, |
| "step": 480 |
| }, |
| { |
| "epoch": 9.8, |
| "learning_rate": 2.0520000000000003e-05, |
| "loss": 0.0868, |
| "step": 490 |
| }, |
| { |
| "epoch": 9.8, |
| "eval_gen_len": 19.88, |
| "eval_loss": 0.5506805181503296, |
| "eval_rouge1": 52.9177, |
| "eval_rouge2": 47.3222, |
| "eval_rougeL": 49.5766, |
| "eval_rougeLsum": 51.9415, |
| "eval_runtime": 8.204, |
| "eval_samples_per_second": 12.189, |
| "eval_steps_per_second": 1.585, |
| "step": 490 |
| }, |
| { |
| "epoch": 10.0, |
| "learning_rate": 2.012e-05, |
| "loss": 0.0956, |
| "step": 500 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_gen_len": 19.84, |
| "eval_loss": 0.5506177544593811, |
| "eval_rouge1": 53.8554, |
| "eval_rouge2": 48.6792, |
| "eval_rougeL": 51.0507, |
| "eval_rougeLsum": 53.0633, |
| "eval_runtime": 6.7592, |
| "eval_samples_per_second": 14.795, |
| "eval_steps_per_second": 1.923, |
| "step": 500 |
| }, |
| { |
| "epoch": 10.2, |
| "learning_rate": 1.972e-05, |
| "loss": 0.063, |
| "step": 510 |
| }, |
| { |
| "epoch": 10.2, |
| "eval_gen_len": 19.92, |
| "eval_loss": 0.5823889374732971, |
| "eval_rouge1": 53.384, |
| "eval_rouge2": 48.0079, |
| "eval_rougeL": 50.8212, |
| "eval_rougeLsum": 52.4155, |
| "eval_runtime": 7.9793, |
| "eval_samples_per_second": 12.532, |
| "eval_steps_per_second": 1.629, |
| "step": 510 |
| }, |
| { |
| "epoch": 10.4, |
| "learning_rate": 1.932e-05, |
| "loss": 0.072, |
| "step": 520 |
| }, |
| { |
| "epoch": 10.4, |
| "eval_gen_len": 19.81, |
| "eval_loss": 0.5638877749443054, |
| "eval_rouge1": 53.1868, |
| "eval_rouge2": 48.0509, |
| "eval_rougeL": 50.6999, |
| "eval_rougeLsum": 52.3177, |
| "eval_runtime": 6.4643, |
| "eval_samples_per_second": 15.47, |
| "eval_steps_per_second": 2.011, |
| "step": 520 |
| }, |
| { |
| "epoch": 10.6, |
| "learning_rate": 1.8920000000000002e-05, |
| "loss": 0.0753, |
| "step": 530 |
| }, |
| { |
| "epoch": 10.6, |
| "eval_gen_len": 19.9, |
| "eval_loss": 0.5847346782684326, |
| "eval_rouge1": 53.9754, |
| "eval_rouge2": 48.7875, |
| "eval_rougeL": 50.5908, |
| "eval_rougeLsum": 53.0499, |
| "eval_runtime": 8.2095, |
| "eval_samples_per_second": 12.181, |
| "eval_steps_per_second": 1.584, |
| "step": 530 |
| }, |
| { |
| "epoch": 10.8, |
| "learning_rate": 1.8520000000000002e-05, |
| "loss": 0.0832, |
| "step": 540 |
| }, |
| { |
| "epoch": 10.8, |
| "eval_gen_len": 19.91, |
| "eval_loss": 0.5566834807395935, |
| "eval_rouge1": 52.2617, |
| "eval_rouge2": 46.765, |
| "eval_rougeL": 48.9661, |
| "eval_rougeLsum": 51.2406, |
| "eval_runtime": 6.5735, |
| "eval_samples_per_second": 15.213, |
| "eval_steps_per_second": 1.978, |
| "step": 540 |
| }, |
| { |
| "epoch": 11.0, |
| "learning_rate": 1.8120000000000003e-05, |
| "loss": 0.0848, |
| "step": 550 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_gen_len": 19.89, |
| "eval_loss": 0.5314372181892395, |
| "eval_rouge1": 53.1077, |
| "eval_rouge2": 47.9591, |
| "eval_rougeL": 50.2941, |
| "eval_rougeLsum": 52.1444, |
| "eval_runtime": 8.0665, |
| "eval_samples_per_second": 12.397, |
| "eval_steps_per_second": 1.612, |
| "step": 550 |
| }, |
| { |
| "epoch": 11.2, |
| "learning_rate": 1.7720000000000003e-05, |
| "loss": 0.059, |
| "step": 560 |
| }, |
| { |
| "epoch": 11.2, |
| "eval_gen_len": 19.88, |
| "eval_loss": 0.5482513904571533, |
| "eval_rouge1": 54.0563, |
| "eval_rouge2": 49.0141, |
| "eval_rougeL": 50.7017, |
| "eval_rougeLsum": 53.0877, |
| "eval_runtime": 7.1824, |
| "eval_samples_per_second": 13.923, |
| "eval_steps_per_second": 1.81, |
| "step": 560 |
| }, |
| { |
| "epoch": 11.4, |
| "learning_rate": 1.732e-05, |
| "loss": 0.072, |
| "step": 570 |
| }, |
| { |
| "epoch": 11.4, |
| "eval_gen_len": 19.9, |
| "eval_loss": 0.5527663826942444, |
| "eval_rouge1": 53.7555, |
| "eval_rouge2": 48.6013, |
| "eval_rougeL": 50.0226, |
| "eval_rougeLsum": 52.6843, |
| "eval_runtime": 7.3833, |
| "eval_samples_per_second": 13.544, |
| "eval_steps_per_second": 1.761, |
| "step": 570 |
| }, |
| { |
| "epoch": 11.6, |
| "learning_rate": 1.692e-05, |
| "loss": 0.0537, |
| "step": 580 |
| }, |
| { |
| "epoch": 11.6, |
| "eval_gen_len": 19.93, |
| "eval_loss": 0.5718952417373657, |
| "eval_rouge1": 53.3446, |
| "eval_rouge2": 48.156, |
| "eval_rougeL": 50.5391, |
| "eval_rougeLsum": 52.473, |
| "eval_runtime": 7.8512, |
| "eval_samples_per_second": 12.737, |
| "eval_steps_per_second": 1.656, |
| "step": 580 |
| }, |
| { |
| "epoch": 11.8, |
| "learning_rate": 1.652e-05, |
| "loss": 0.064, |
| "step": 590 |
| }, |
| { |
| "epoch": 11.8, |
| "eval_gen_len": 19.9, |
| "eval_loss": 0.5569304823875427, |
| "eval_rouge1": 53.7903, |
| "eval_rouge2": 48.7776, |
| "eval_rougeL": 50.6488, |
| "eval_rougeLsum": 52.8657, |
| "eval_runtime": 6.4395, |
| "eval_samples_per_second": 15.529, |
| "eval_steps_per_second": 2.019, |
| "step": 590 |
| }, |
| { |
| "epoch": 12.0, |
| "learning_rate": 1.612e-05, |
| "loss": 0.0511, |
| "step": 600 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_gen_len": 19.93, |
| "eval_loss": 0.5667613744735718, |
| "eval_rouge1": 53.2515, |
| "eval_rouge2": 48.205, |
| "eval_rougeL": 49.9425, |
| "eval_rougeLsum": 52.302, |
| "eval_runtime": 8.1618, |
| "eval_samples_per_second": 12.252, |
| "eval_steps_per_second": 1.593, |
| "step": 600 |
| }, |
| { |
| "epoch": 12.2, |
| "learning_rate": 1.5720000000000002e-05, |
| "loss": 0.0327, |
| "step": 610 |
| }, |
| { |
| "epoch": 12.2, |
| "eval_gen_len": 19.96, |
| "eval_loss": 0.6061870455741882, |
| "eval_rouge1": 52.9566, |
| "eval_rouge2": 47.573, |
| "eval_rougeL": 49.5866, |
| "eval_rougeLsum": 51.8936, |
| "eval_runtime": 7.6757, |
| "eval_samples_per_second": 13.028, |
| "eval_steps_per_second": 1.694, |
| "step": 610 |
| }, |
| { |
| "epoch": 12.4, |
| "learning_rate": 1.5320000000000002e-05, |
| "loss": 0.066, |
| "step": 620 |
| }, |
| { |
| "epoch": 12.4, |
| "eval_gen_len": 19.96, |
| "eval_loss": 0.5923030376434326, |
| "eval_rouge1": 52.8622, |
| "eval_rouge2": 47.4706, |
| "eval_rougeL": 49.6314, |
| "eval_rougeLsum": 51.8334, |
| "eval_runtime": 6.8801, |
| "eval_samples_per_second": 14.535, |
| "eval_steps_per_second": 1.89, |
| "step": 620 |
| }, |
| { |
| "epoch": 12.6, |
| "learning_rate": 1.4920000000000001e-05, |
| "loss": 0.0508, |
| "step": 630 |
| }, |
| { |
| "epoch": 12.6, |
| "eval_gen_len": 19.93, |
| "eval_loss": 0.5727255344390869, |
| "eval_rouge1": 53.0607, |
| "eval_rouge2": 47.7413, |
| "eval_rougeL": 50.1959, |
| "eval_rougeLsum": 52.1585, |
| "eval_runtime": 8.4487, |
| "eval_samples_per_second": 11.836, |
| "eval_steps_per_second": 1.539, |
| "step": 630 |
| }, |
| { |
| "epoch": 12.8, |
| "learning_rate": 1.4520000000000002e-05, |
| "loss": 0.0551, |
| "step": 640 |
| }, |
| { |
| "epoch": 12.8, |
| "eval_gen_len": 19.93, |
| "eval_loss": 0.5799768567085266, |
| "eval_rouge1": 52.551, |
| "eval_rouge2": 47.2043, |
| "eval_rougeL": 49.6789, |
| "eval_rougeLsum": 51.5886, |
| "eval_runtime": 6.6599, |
| "eval_samples_per_second": 15.015, |
| "eval_steps_per_second": 1.952, |
| "step": 640 |
| }, |
| { |
| "epoch": 13.0, |
| "learning_rate": 1.412e-05, |
| "loss": 0.0393, |
| "step": 650 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_gen_len": 19.89, |
| "eval_loss": 0.5942478775978088, |
| "eval_rouge1": 52.5056, |
| "eval_rouge2": 47.2568, |
| "eval_rougeL": 49.8142, |
| "eval_rougeLsum": 51.6736, |
| "eval_runtime": 8.553, |
| "eval_samples_per_second": 11.692, |
| "eval_steps_per_second": 1.52, |
| "step": 650 |
| }, |
| { |
| "epoch": 13.2, |
| "learning_rate": 1.3720000000000002e-05, |
| "loss": 0.0472, |
| "step": 660 |
| }, |
| { |
| "epoch": 13.2, |
| "eval_gen_len": 19.84, |
| "eval_loss": 0.5964275598526001, |
| "eval_rouge1": 53.9299, |
| "eval_rouge2": 49.2649, |
| "eval_rougeL": 51.1705, |
| "eval_rougeLsum": 53.3046, |
| "eval_runtime": 7.2167, |
| "eval_samples_per_second": 13.857, |
| "eval_steps_per_second": 1.801, |
| "step": 660 |
| }, |
| { |
| "epoch": 13.4, |
| "learning_rate": 1.3320000000000001e-05, |
| "loss": 0.0484, |
| "step": 670 |
| }, |
| { |
| "epoch": 13.4, |
| "eval_gen_len": 19.93, |
| "eval_loss": 0.6021795868873596, |
| "eval_rouge1": 52.7107, |
| "eval_rouge2": 47.5152, |
| "eval_rougeL": 50.4474, |
| "eval_rougeLsum": 51.8891, |
| "eval_runtime": 7.6455, |
| "eval_samples_per_second": 13.08, |
| "eval_steps_per_second": 1.7, |
| "step": 670 |
| }, |
| { |
| "epoch": 13.6, |
| "learning_rate": 1.2920000000000002e-05, |
| "loss": 0.0493, |
| "step": 680 |
| }, |
| { |
| "epoch": 13.6, |
| "eval_gen_len": 19.93, |
| "eval_loss": 0.5896037220954895, |
| "eval_rouge1": 52.581, |
| "eval_rouge2": 47.2503, |
| "eval_rougeL": 49.8352, |
| "eval_rougeLsum": 51.7529, |
| "eval_runtime": 7.7221, |
| "eval_samples_per_second": 12.95, |
| "eval_steps_per_second": 1.683, |
| "step": 680 |
| }, |
| { |
| "epoch": 13.8, |
| "learning_rate": 1.252e-05, |
| "loss": 0.0373, |
| "step": 690 |
| }, |
| { |
| "epoch": 13.8, |
| "eval_gen_len": 19.9, |
| "eval_loss": 0.5958464741706848, |
| "eval_rouge1": 53.7481, |
| "eval_rouge2": 48.8915, |
| "eval_rougeL": 50.9133, |
| "eval_rougeLsum": 52.8805, |
| "eval_runtime": 7.027, |
| "eval_samples_per_second": 14.231, |
| "eval_steps_per_second": 1.85, |
| "step": 690 |
| }, |
| { |
| "epoch": 14.0, |
| "learning_rate": 1.2120000000000001e-05, |
| "loss": 0.0439, |
| "step": 700 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_gen_len": 19.95, |
| "eval_loss": 0.5903494954109192, |
| "eval_rouge1": 52.5592, |
| "eval_rouge2": 47.4923, |
| "eval_rougeL": 50.2025, |
| "eval_rougeLsum": 51.7545, |
| "eval_runtime": 8.4747, |
| "eval_samples_per_second": 11.8, |
| "eval_steps_per_second": 1.534, |
| "step": 700 |
| }, |
| { |
| "epoch": 14.2, |
| "learning_rate": 1.172e-05, |
| "loss": 0.0495, |
| "step": 710 |
| }, |
| { |
| "epoch": 14.2, |
| "eval_gen_len": 19.96, |
| "eval_loss": 0.6074602603912354, |
| "eval_rouge1": 53.1459, |
| "eval_rouge2": 48.2296, |
| "eval_rougeL": 50.8331, |
| "eval_rougeLsum": 52.3047, |
| "eval_runtime": 6.6639, |
| "eval_samples_per_second": 15.006, |
| "eval_steps_per_second": 1.951, |
| "step": 710 |
| }, |
| { |
| "epoch": 14.4, |
| "learning_rate": 1.132e-05, |
| "loss": 0.0488, |
| "step": 720 |
| }, |
| { |
| "epoch": 14.4, |
| "eval_gen_len": 19.96, |
| "eval_loss": 0.6027532815933228, |
| "eval_rouge1": 53.1743, |
| "eval_rouge2": 48.1621, |
| "eval_rougeL": 50.5616, |
| "eval_rougeLsum": 52.294, |
| "eval_runtime": 8.39, |
| "eval_samples_per_second": 11.919, |
| "eval_steps_per_second": 1.549, |
| "step": 720 |
| }, |
| { |
| "epoch": 14.6, |
| "learning_rate": 1.0920000000000002e-05, |
| "loss": 0.0428, |
| "step": 730 |
| }, |
| { |
| "epoch": 14.6, |
| "eval_gen_len": 19.92, |
| "eval_loss": 0.6251101493835449, |
| "eval_rouge1": 53.6698, |
| "eval_rouge2": 48.9146, |
| "eval_rougeL": 51.3211, |
| "eval_rougeLsum": 52.8219, |
| "eval_runtime": 6.6435, |
| "eval_samples_per_second": 15.052, |
| "eval_steps_per_second": 1.957, |
| "step": 730 |
| }, |
| { |
| "epoch": 14.8, |
| "learning_rate": 1.0520000000000001e-05, |
| "loss": 0.0332, |
| "step": 740 |
| }, |
| { |
| "epoch": 14.8, |
| "eval_gen_len": 19.88, |
| "eval_loss": 0.5891709327697754, |
| "eval_rouge1": 53.3632, |
| "eval_rouge2": 48.281, |
| "eval_rougeL": 50.8155, |
| "eval_rougeLsum": 52.441, |
| "eval_runtime": 7.9324, |
| "eval_samples_per_second": 12.606, |
| "eval_steps_per_second": 1.639, |
| "step": 740 |
| }, |
| { |
| "epoch": 15.0, |
| "learning_rate": 1.0120000000000001e-05, |
| "loss": 0.0443, |
| "step": 750 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_gen_len": 19.92, |
| "eval_loss": 0.6117092370986938, |
| "eval_rouge1": 53.3405, |
| "eval_rouge2": 48.3071, |
| "eval_rougeL": 50.4905, |
| "eval_rougeLsum": 52.5336, |
| "eval_runtime": 7.4624, |
| "eval_samples_per_second": 13.401, |
| "eval_steps_per_second": 1.742, |
| "step": 750 |
| }, |
| { |
| "epoch": 15.2, |
| "learning_rate": 9.72e-06, |
| "loss": 0.0533, |
| "step": 760 |
| }, |
| { |
| "epoch": 15.2, |
| "eval_gen_len": 19.88, |
| "eval_loss": 0.596733033657074, |
| "eval_rouge1": 53.2498, |
| "eval_rouge2": 48.2583, |
| "eval_rougeL": 50.5271, |
| "eval_rougeLsum": 52.4317, |
| "eval_runtime": 7.2087, |
| "eval_samples_per_second": 13.872, |
| "eval_steps_per_second": 1.803, |
| "step": 760 |
| }, |
| { |
| "epoch": 15.4, |
| "learning_rate": 9.32e-06, |
| "loss": 0.0312, |
| "step": 770 |
| }, |
| { |
| "epoch": 15.4, |
| "eval_gen_len": 19.93, |
| "eval_loss": 0.611742377281189, |
| "eval_rouge1": 52.9445, |
| "eval_rouge2": 47.6968, |
| "eval_rougeL": 50.0402, |
| "eval_rougeLsum": 52.053, |
| "eval_runtime": 8.3029, |
| "eval_samples_per_second": 12.044, |
| "eval_steps_per_second": 1.566, |
| "step": 770 |
| }, |
| { |
| "epoch": 15.6, |
| "learning_rate": 8.920000000000001e-06, |
| "loss": 0.0363, |
| "step": 780 |
| }, |
| { |
| "epoch": 15.6, |
| "eval_gen_len": 19.93, |
| "eval_loss": 0.6265898942947388, |
| "eval_rouge1": 52.6826, |
| "eval_rouge2": 47.3318, |
| "eval_rougeL": 50.0565, |
| "eval_rougeLsum": 51.7117, |
| "eval_runtime": 6.4527, |
| "eval_samples_per_second": 15.497, |
| "eval_steps_per_second": 2.015, |
| "step": 780 |
| }, |
| { |
| "epoch": 15.8, |
| "learning_rate": 8.52e-06, |
| "loss": 0.035, |
| "step": 790 |
| }, |
| { |
| "epoch": 15.8, |
| "eval_gen_len": 19.93, |
| "eval_loss": 0.6089562773704529, |
| "eval_rouge1": 52.7035, |
| "eval_rouge2": 47.3733, |
| "eval_rougeL": 49.8679, |
| "eval_rougeLsum": 51.8499, |
| "eval_runtime": 8.2045, |
| "eval_samples_per_second": 12.188, |
| "eval_steps_per_second": 1.584, |
| "step": 790 |
| }, |
| { |
| "epoch": 16.0, |
| "learning_rate": 8.120000000000002e-06, |
| "loss": 0.0405, |
| "step": 800 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_gen_len": 19.93, |
| "eval_loss": 0.6099843382835388, |
| "eval_rouge1": 52.3781, |
| "eval_rouge2": 46.9794, |
| "eval_rougeL": 49.4781, |
| "eval_rougeLsum": 51.4939, |
| "eval_runtime": 6.4126, |
| "eval_samples_per_second": 15.594, |
| "eval_steps_per_second": 2.027, |
| "step": 800 |
| }, |
| { |
| "epoch": 16.2, |
| "learning_rate": 7.72e-06, |
| "loss": 0.0378, |
| "step": 810 |
| }, |
| { |
| "epoch": 16.2, |
| "eval_gen_len": 19.95, |
| "eval_loss": 0.610033392906189, |
| "eval_rouge1": 53.2325, |
| "eval_rouge2": 48.2416, |
| "eval_rougeL": 49.8123, |
| "eval_rougeLsum": 52.4172, |
| "eval_runtime": 7.6476, |
| "eval_samples_per_second": 13.076, |
| "eval_steps_per_second": 1.7, |
| "step": 810 |
| }, |
| { |
| "epoch": 16.4, |
| "learning_rate": 7.32e-06, |
| "loss": 0.0322, |
| "step": 820 |
| }, |
| { |
| "epoch": 16.4, |
| "eval_gen_len": 19.95, |
| "eval_loss": 0.6167323589324951, |
| "eval_rouge1": 53.1487, |
| "eval_rouge2": 48.2712, |
| "eval_rougeL": 49.9134, |
| "eval_rougeLsum": 52.4232, |
| "eval_runtime": 6.8379, |
| "eval_samples_per_second": 14.624, |
| "eval_steps_per_second": 1.901, |
| "step": 820 |
| }, |
| { |
| "epoch": 16.6, |
| "learning_rate": 6.92e-06, |
| "loss": 0.0274, |
| "step": 830 |
| }, |
| { |
| "epoch": 16.6, |
| "eval_gen_len": 19.93, |
| "eval_loss": 0.6255094408988953, |
| "eval_rouge1": 53.1403, |
| "eval_rouge2": 48.0661, |
| "eval_rougeL": 50.3561, |
| "eval_rougeLsum": 52.2934, |
| "eval_runtime": 8.4294, |
| "eval_samples_per_second": 11.863, |
| "eval_steps_per_second": 1.542, |
| "step": 830 |
| }, |
| { |
| "epoch": 16.8, |
| "learning_rate": 6.520000000000001e-06, |
| "loss": 0.0277, |
| "step": 840 |
| }, |
| { |
| "epoch": 16.8, |
| "eval_gen_len": 19.93, |
| "eval_loss": 0.6320467591285706, |
| "eval_rouge1": 53.1403, |
| "eval_rouge2": 48.0661, |
| "eval_rougeL": 50.3561, |
| "eval_rougeLsum": 52.2934, |
| "eval_runtime": 6.6968, |
| "eval_samples_per_second": 14.933, |
| "eval_steps_per_second": 1.941, |
| "step": 840 |
| }, |
| { |
| "epoch": 17.0, |
| "learning_rate": 6.120000000000001e-06, |
| "loss": 0.0302, |
| "step": 850 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_gen_len": 19.93, |
| "eval_loss": 0.6346279978752136, |
| "eval_rouge1": 53.0019, |
| "eval_rouge2": 47.8758, |
| "eval_rougeL": 50.1767, |
| "eval_rougeLsum": 52.0888, |
| "eval_runtime": 8.3668, |
| "eval_samples_per_second": 11.952, |
| "eval_steps_per_second": 1.554, |
| "step": 850 |
| }, |
| { |
| "epoch": 17.2, |
| "learning_rate": 5.72e-06, |
| "loss": 0.0344, |
| "step": 860 |
| }, |
| { |
| "epoch": 17.2, |
| "eval_gen_len": 19.91, |
| "eval_loss": 0.6346395015716553, |
| "eval_rouge1": 53.0135, |
| "eval_rouge2": 48.0249, |
| "eval_rougeL": 49.977, |
| "eval_rougeLsum": 52.1297, |
| "eval_runtime": 6.9398, |
| "eval_samples_per_second": 14.41, |
| "eval_steps_per_second": 1.873, |
| "step": 860 |
| }, |
| { |
| "epoch": 17.4, |
| "learning_rate": 5.320000000000001e-06, |
| "loss": 0.0331, |
| "step": 870 |
| }, |
| { |
| "epoch": 17.4, |
| "eval_gen_len": 19.93, |
| "eval_loss": 0.6337741613388062, |
| "eval_rouge1": 53.2181, |
| "eval_rouge2": 48.1723, |
| "eval_rougeL": 50.4487, |
| "eval_rougeLsum": 52.4175, |
| "eval_runtime": 7.6152, |
| "eval_samples_per_second": 13.132, |
| "eval_steps_per_second": 1.707, |
| "step": 870 |
| }, |
| { |
| "epoch": 17.6, |
| "learning_rate": 4.92e-06, |
| "loss": 0.0344, |
| "step": 880 |
| }, |
| { |
| "epoch": 17.6, |
| "eval_gen_len": 19.93, |
| "eval_loss": 0.6169251799583435, |
| "eval_rouge1": 53.2726, |
| "eval_rouge2": 48.2238, |
| "eval_rougeL": 50.5094, |
| "eval_rougeLsum": 52.4617, |
| "eval_runtime": 7.681, |
| "eval_samples_per_second": 13.019, |
| "eval_steps_per_second": 1.692, |
| "step": 880 |
| }, |
| { |
| "epoch": 17.8, |
| "learning_rate": 4.520000000000001e-06, |
| "loss": 0.04, |
| "step": 890 |
| }, |
| { |
| "epoch": 17.8, |
| "eval_gen_len": 19.89, |
| "eval_loss": 0.6145161986351013, |
| "eval_rouge1": 52.9438, |
| "eval_rouge2": 47.9296, |
| "eval_rougeL": 50.2202, |
| "eval_rougeLsum": 52.1485, |
| "eval_runtime": 6.7635, |
| "eval_samples_per_second": 14.785, |
| "eval_steps_per_second": 1.922, |
| "step": 890 |
| }, |
| { |
| "epoch": 18.0, |
| "learning_rate": 4.12e-06, |
| "loss": 0.0291, |
| "step": 900 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_gen_len": 19.91, |
| "eval_loss": 0.6197648048400879, |
| "eval_rouge1": 52.9654, |
| "eval_rouge2": 47.9572, |
| "eval_rougeL": 50.2296, |
| "eval_rougeLsum": 52.2057, |
| "eval_runtime": 8.1949, |
| "eval_samples_per_second": 12.203, |
| "eval_steps_per_second": 1.586, |
| "step": 900 |
| }, |
| { |
| "epoch": 18.2, |
| "learning_rate": 3.7200000000000004e-06, |
| "loss": 0.0258, |
| "step": 910 |
| }, |
| { |
| "epoch": 18.2, |
| "eval_gen_len": 19.95, |
| "eval_loss": 0.626217782497406, |
| "eval_rouge1": 52.9077, |
| "eval_rouge2": 47.7606, |
| "eval_rougeL": 50.1641, |
| "eval_rougeLsum": 52.126, |
| "eval_runtime": 6.3753, |
| "eval_samples_per_second": 15.686, |
| "eval_steps_per_second": 2.039, |
| "step": 910 |
| }, |
| { |
| "epoch": 18.4, |
| "learning_rate": 3.3200000000000004e-06, |
| "loss": 0.0374, |
| "step": 920 |
| }, |
| { |
| "epoch": 18.4, |
| "eval_gen_len": 19.93, |
| "eval_loss": 0.6214368343353271, |
| "eval_rouge1": 52.9374, |
| "eval_rouge2": 47.9047, |
| "eval_rougeL": 50.5323, |
| "eval_rougeLsum": 52.2003, |
| "eval_runtime": 7.6867, |
| "eval_samples_per_second": 13.01, |
| "eval_steps_per_second": 1.691, |
| "step": 920 |
| }, |
| { |
| "epoch": 18.6, |
| "learning_rate": 2.92e-06, |
| "loss": 0.0236, |
| "step": 930 |
| }, |
| { |
| "epoch": 18.6, |
| "eval_gen_len": 19.93, |
| "eval_loss": 0.622434139251709, |
| "eval_rouge1": 52.9374, |
| "eval_rouge2": 47.9308, |
| "eval_rougeL": 50.2522, |
| "eval_rougeLsum": 52.2003, |
| "eval_runtime": 7.0784, |
| "eval_samples_per_second": 14.128, |
| "eval_steps_per_second": 1.837, |
| "step": 930 |
| }, |
| { |
| "epoch": 18.8, |
| "learning_rate": 2.52e-06, |
| "loss": 0.0161, |
| "step": 940 |
| }, |
| { |
| "epoch": 18.8, |
| "eval_gen_len": 19.93, |
| "eval_loss": 0.6140244603157043, |
| "eval_rouge1": 52.9019, |
| "eval_rouge2": 47.9184, |
| "eval_rougeL": 50.4912, |
| "eval_rougeLsum": 52.1339, |
| "eval_runtime": 7.1373, |
| "eval_samples_per_second": 14.011, |
| "eval_steps_per_second": 1.821, |
| "step": 940 |
| }, |
| { |
| "epoch": 19.0, |
| "learning_rate": 2.12e-06, |
| "loss": 0.0411, |
| "step": 950 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_gen_len": 19.95, |
| "eval_loss": 0.6175794005393982, |
| "eval_rouge1": 53.0345, |
| "eval_rouge2": 48.0268, |
| "eval_rougeL": 50.3072, |
| "eval_rougeLsum": 52.2337, |
| "eval_runtime": 7.454, |
| "eval_samples_per_second": 13.416, |
| "eval_steps_per_second": 1.744, |
| "step": 950 |
| }, |
| { |
| "epoch": 19.2, |
| "learning_rate": 1.72e-06, |
| "loss": 0.0255, |
| "step": 960 |
| }, |
| { |
| "epoch": 19.2, |
| "eval_gen_len": 19.95, |
| "eval_loss": 0.6201534271240234, |
| "eval_rouge1": 53.0189, |
| "eval_rouge2": 47.8858, |
| "eval_rougeL": 50.2223, |
| "eval_rougeLsum": 52.1154, |
| "eval_runtime": 6.4527, |
| "eval_samples_per_second": 15.497, |
| "eval_steps_per_second": 2.015, |
| "step": 960 |
| }, |
| { |
| "epoch": 19.4, |
| "learning_rate": 1.32e-06, |
| "loss": 0.024, |
| "step": 970 |
| }, |
| { |
| "epoch": 19.4, |
| "eval_gen_len": 19.95, |
| "eval_loss": 0.6199787855148315, |
| "eval_rouge1": 53.0189, |
| "eval_rouge2": 47.8858, |
| "eval_rougeL": 50.2223, |
| "eval_rougeLsum": 52.1154, |
| "eval_runtime": 8.1992, |
| "eval_samples_per_second": 12.196, |
| "eval_steps_per_second": 1.586, |
| "step": 970 |
| }, |
| { |
| "epoch": 19.6, |
| "learning_rate": 9.200000000000001e-07, |
| "loss": 0.0295, |
| "step": 980 |
| }, |
| { |
| "epoch": 19.6, |
| "eval_gen_len": 19.95, |
| "eval_loss": 0.618171215057373, |
| "eval_rouge1": 53.0189, |
| "eval_rouge2": 47.8858, |
| "eval_rougeL": 50.2223, |
| "eval_rougeLsum": 52.1154, |
| "eval_runtime": 6.3503, |
| "eval_samples_per_second": 15.747, |
| "eval_steps_per_second": 2.047, |
| "step": 980 |
| }, |
| { |
| "epoch": 19.8, |
| "learning_rate": 5.2e-07, |
| "loss": 0.0129, |
| "step": 990 |
| }, |
| { |
| "epoch": 19.8, |
| "eval_gen_len": 19.95, |
| "eval_loss": 0.6194862723350525, |
| "eval_rouge1": 53.0273, |
| "eval_rouge2": 47.8919, |
| "eval_rougeL": 50.2299, |
| "eval_rougeLsum": 52.1227, |
| "eval_runtime": 8.0127, |
| "eval_samples_per_second": 12.48, |
| "eval_steps_per_second": 1.622, |
| "step": 990 |
| }, |
| { |
| "epoch": 20.0, |
| "learning_rate": 1.2000000000000002e-07, |
| "loss": 0.0332, |
| "step": 1000 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_gen_len": 19.95, |
| "eval_loss": 0.6200586557388306, |
| "eval_rouge1": 52.8857, |
| "eval_rouge2": 47.6744, |
| "eval_rougeL": 50.037, |
| "eval_rougeLsum": 52.0023, |
| "eval_runtime": 6.4657, |
| "eval_samples_per_second": 15.466, |
| "eval_steps_per_second": 2.011, |
| "step": 1000 |
| } |
| ], |
| "max_steps": 1000, |
| "num_train_epochs": 20, |
| "total_flos": 644853467381760.0, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|