| { | |
| "best_metric": 63.1011, | |
| "best_model_checkpoint": "output_train_bart_large_local/checkpoint-48000", | |
| "epoch": 2.0123153700647967, | |
| "global_step": 50000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.9842240824211213e-07, | |
| "loss": 1.8309, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 8.008692852543465e-07, | |
| "loss": 1.0322, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_bertscore/f1": 0.7264, | |
| "eval_bertscore/precision": 0.7345, | |
| "eval_bertscore/recall": 0.7214, | |
| "eval_mean_prediction_length_characters": 779.505, | |
| "eval_mean_prediction_length_tokens": 167.969, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 45.7604, | |
| "eval_rouge/rouge1": 60.4328, | |
| "eval_rouge/rouge2": 39.8155, | |
| "eval_rouge/rougeL": 39.824, | |
| "eval_rouge/rougeLsum": 57.577, | |
| "eval_runtime": 2363.9821, | |
| "eval_samples_per_second": 0.423, | |
| "eval_steps_per_second": 0.423, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.203316162266581e-06, | |
| "loss": 0.8078, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.6057630392788153e-06, | |
| "loss": 0.7401, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_bertscore/f1": 0.767, | |
| "eval_bertscore/precision": 0.7735, | |
| "eval_bertscore/recall": 0.7627, | |
| "eval_mean_prediction_length_characters": 746.987, | |
| "eval_mean_prediction_length_tokens": 167.637, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 49.7632, | |
| "eval_rouge/rouge1": 64.5596, | |
| "eval_rouge/rouge2": 43.5883, | |
| "eval_rouge/rougeL": 43.7919, | |
| "eval_rouge/rougeLsum": 62.268, | |
| "eval_runtime": 2390.2778, | |
| "eval_samples_per_second": 0.418, | |
| "eval_steps_per_second": 0.418, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 2.00820991629105e-06, | |
| "loss": 0.6952, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 2.4106567933032843e-06, | |
| "loss": 0.6654, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_bertscore/f1": 0.775, | |
| "eval_bertscore/precision": 0.7801, | |
| "eval_bertscore/recall": 0.7719, | |
| "eval_mean_prediction_length_characters": 750.739, | |
| "eval_mean_prediction_length_tokens": 167.441, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 51.1255, | |
| "eval_rouge/rouge1": 65.8167, | |
| "eval_rouge/rouge2": 45.0215, | |
| "eval_rouge/rougeL": 45.0979, | |
| "eval_rouge/rougeLsum": 63.4694, | |
| "eval_runtime": 2400.8698, | |
| "eval_samples_per_second": 0.417, | |
| "eval_steps_per_second": 0.417, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 2.8131036703155183e-06, | |
| "loss": 0.6532, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.215550547327753e-06, | |
| "loss": 0.6421, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_bertscore/f1": 0.778, | |
| "eval_bertscore/precision": 0.7803, | |
| "eval_bertscore/recall": 0.7778, | |
| "eval_mean_prediction_length_characters": 787.437, | |
| "eval_mean_prediction_length_tokens": 175.145, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 51.6325, | |
| "eval_rouge/rouge1": 66.6546, | |
| "eval_rouge/rouge2": 45.5613, | |
| "eval_rouge/rougeL": 45.3256, | |
| "eval_rouge/rougeLsum": 64.2944, | |
| "eval_runtime": 2500.2502, | |
| "eval_samples_per_second": 0.4, | |
| "eval_steps_per_second": 0.4, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.6179974243399875e-06, | |
| "loss": 0.6253, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.0196394075981975e-06, | |
| "loss": 0.62, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_bertscore/f1": 0.7774, | |
| "eval_bertscore/precision": 0.7652, | |
| "eval_bertscore/recall": 0.7921, | |
| "eval_mean_prediction_length_characters": 948.673, | |
| "eval_mean_prediction_length_tokens": 206.93, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 51.4967, | |
| "eval_rouge/rouge1": 67.0983, | |
| "eval_rouge/rouge2": 45.6949, | |
| "eval_rouge/rougeL": 44.541, | |
| "eval_rouge/rougeLsum": 64.5765, | |
| "eval_runtime": 2980.0062, | |
| "eval_samples_per_second": 0.336, | |
| "eval_steps_per_second": 0.336, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.4220862846104314e-06, | |
| "loss": 0.6041, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.824533161622666e-06, | |
| "loss": 0.5949, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_bertscore/f1": 0.7803, | |
| "eval_bertscore/precision": 0.7732, | |
| "eval_bertscore/recall": 0.7895, | |
| "eval_mean_prediction_length_characters": 879.72, | |
| "eval_mean_prediction_length_tokens": 192.817, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 52.1495, | |
| "eval_rouge/rouge1": 67.3707, | |
| "eval_rouge/rouge2": 46.4072, | |
| "eval_rouge/rougeL": 45.3622, | |
| "eval_rouge/rougeLsum": 65.0424, | |
| "eval_runtime": 2666.0858, | |
| "eval_samples_per_second": 0.375, | |
| "eval_steps_per_second": 0.375, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 5.2269800386349e-06, | |
| "loss": 0.5762, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 5.629426915647135e-06, | |
| "loss": 0.5719, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_bertscore/f1": 0.7838, | |
| "eval_bertscore/precision": 0.7832, | |
| "eval_bertscore/recall": 0.7866, | |
| "eval_mean_prediction_length_characters": 832.624, | |
| "eval_mean_prediction_length_tokens": 183.52, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 52.9627, | |
| "eval_rouge/rouge1": 67.8554, | |
| "eval_rouge/rouge2": 47.0328, | |
| "eval_rouge/rougeL": 46.5505, | |
| "eval_rouge/rougeLsum": 65.5148, | |
| "eval_runtime": 2493.7707, | |
| "eval_samples_per_second": 0.401, | |
| "eval_steps_per_second": 0.401, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 6.031068898905345e-06, | |
| "loss": 0.5718, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 6.433515775917579e-06, | |
| "loss": 0.5541, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_bertscore/f1": 0.7856, | |
| "eval_bertscore/precision": 0.7835, | |
| "eval_bertscore/recall": 0.7897, | |
| "eval_mean_prediction_length_characters": 831.437, | |
| "eval_mean_prediction_length_tokens": 184.28, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 53.234, | |
| "eval_rouge/rouge1": 68.2648, | |
| "eval_rouge/rouge2": 47.5788, | |
| "eval_rouge/rougeL": 46.447, | |
| "eval_rouge/rougeLsum": 65.9056, | |
| "eval_runtime": 2476.0861, | |
| "eval_samples_per_second": 0.404, | |
| "eval_steps_per_second": 0.404, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 6.835962652929814e-06, | |
| "loss": 0.5621, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 7.238409529942049e-06, | |
| "loss": 0.5509, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_bertscore/f1": 0.7876, | |
| "eval_bertscore/precision": 0.787, | |
| "eval_bertscore/recall": 0.79, | |
| "eval_mean_prediction_length_characters": 825.034, | |
| "eval_mean_prediction_length_tokens": 181.78, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 53.6596, | |
| "eval_rouge/rouge1": 68.4619, | |
| "eval_rouge/rouge2": 47.9553, | |
| "eval_rouge/rougeL": 47.0605, | |
| "eval_rouge/rougeLsum": 66.229, | |
| "eval_runtime": 2413.4877, | |
| "eval_samples_per_second": 0.414, | |
| "eval_steps_per_second": 0.414, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 7.640856406954282e-06, | |
| "loss": 0.5319, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 8.042498390212493e-06, | |
| "loss": 0.5317, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_bertscore/f1": 0.7801, | |
| "eval_bertscore/precision": 0.7653, | |
| "eval_bertscore/recall": 0.7976, | |
| "eval_mean_prediction_length_characters": 983.333, | |
| "eval_mean_prediction_length_tokens": 212.132, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 52.2204, | |
| "eval_rouge/rouge1": 67.4114, | |
| "eval_rouge/rouge2": 46.8366, | |
| "eval_rouge/rougeL": 45.1025, | |
| "eval_rouge/rougeLsum": 64.9737, | |
| "eval_runtime": 2850.8965, | |
| "eval_samples_per_second": 0.351, | |
| "eval_steps_per_second": 0.351, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 8.444945267224727e-06, | |
| "loss": 0.5246, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 8.847392144236962e-06, | |
| "loss": 0.5306, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_bertscore/f1": 0.7812, | |
| "eval_bertscore/precision": 0.7727, | |
| "eval_bertscore/recall": 0.7923, | |
| "eval_mean_prediction_length_characters": 929.756, | |
| "eval_mean_prediction_length_tokens": 198.992, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 52.9021, | |
| "eval_rouge/rouge1": 67.7205, | |
| "eval_rouge/rouge2": 47.5736, | |
| "eval_rouge/rougeL": 45.955, | |
| "eval_rouge/rougeLsum": 65.2918, | |
| "eval_runtime": 2641.4953, | |
| "eval_samples_per_second": 0.379, | |
| "eval_steps_per_second": 0.379, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 9.249839021249196e-06, | |
| "loss": 0.5118, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 9.65228589826143e-06, | |
| "loss": 0.5125, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_bertscore/f1": 0.7924, | |
| "eval_bertscore/precision": 0.8025, | |
| "eval_bertscore/recall": 0.7847, | |
| "eval_mean_prediction_length_characters": 739.515, | |
| "eval_mean_prediction_length_tokens": 166.617, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 54.3827, | |
| "eval_rouge/rouge1": 68.5657, | |
| "eval_rouge/rouge2": 48.8496, | |
| "eval_rouge/rougeL": 48.0192, | |
| "eval_rouge/rougeLsum": 66.6464, | |
| "eval_runtime": 2173.7087, | |
| "eval_samples_per_second": 0.46, | |
| "eval_steps_per_second": 0.46, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 9.993918580525149e-06, | |
| "loss": 0.4953, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 9.949202260857123e-06, | |
| "loss": 0.4559, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "eval_bertscore/f1": 0.7876, | |
| "eval_bertscore/precision": 0.782, | |
| "eval_bertscore/recall": 0.7953, | |
| "eval_mean_prediction_length_characters": 872.104, | |
| "eval_mean_prediction_length_tokens": 188.836, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 54.0286, | |
| "eval_rouge/rouge1": 68.9929, | |
| "eval_rouge/rouge2": 48.683, | |
| "eval_rouge/rougeL": 46.9557, | |
| "eval_rouge/rougeLsum": 66.5892, | |
| "eval_runtime": 2531.5352, | |
| "eval_samples_per_second": 0.395, | |
| "eval_steps_per_second": 0.395, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 9.904575373828433e-06, | |
| "loss": 0.4572, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 9.859859054160406e-06, | |
| "loss": 0.4455, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "eval_bertscore/f1": 0.792, | |
| "eval_bertscore/precision": 0.7832, | |
| "eval_bertscore/recall": 0.8031, | |
| "eval_mean_prediction_length_characters": 921.871, | |
| "eval_mean_prediction_length_tokens": 200.026, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 54.5933, | |
| "eval_rouge/rouge1": 69.3876, | |
| "eval_rouge/rouge2": 49.1971, | |
| "eval_rouge/rougeL": 47.6645, | |
| "eval_rouge/rougeLsum": 67.1636, | |
| "eval_runtime": 2729.0713, | |
| "eval_samples_per_second": 0.366, | |
| "eval_steps_per_second": 0.366, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 9.81514273449238e-06, | |
| "loss": 0.446, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 9.770426414824355e-06, | |
| "loss": 0.4376, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "eval_bertscore/f1": 0.7943, | |
| "eval_bertscore/precision": 0.7909, | |
| "eval_bertscore/recall": 0.7996, | |
| "eval_mean_prediction_length_characters": 856.751, | |
| "eval_mean_prediction_length_tokens": 186.271, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 55.0336, | |
| "eval_rouge/rouge1": 69.6177, | |
| "eval_rouge/rouge2": 49.6842, | |
| "eval_rouge/rougeL": 48.1889, | |
| "eval_rouge/rougeLsum": 67.4597, | |
| "eval_runtime": 2554.3498, | |
| "eval_samples_per_second": 0.391, | |
| "eval_steps_per_second": 0.391, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 9.725710095156328e-06, | |
| "loss": 0.4338, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 9.680993775488303e-06, | |
| "loss": 0.4333, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "eval_bertscore/f1": 0.7941, | |
| "eval_bertscore/precision": 0.7901, | |
| "eval_bertscore/recall": 0.8003, | |
| "eval_mean_prediction_length_characters": 857.762, | |
| "eval_mean_prediction_length_tokens": 185.042, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 55.3969, | |
| "eval_rouge/rouge1": 69.5818, | |
| "eval_rouge/rouge2": 50.0439, | |
| "eval_rouge/rougeL": 48.8213, | |
| "eval_rouge/rougeLsum": 67.3532, | |
| "eval_runtime": 2568.5908, | |
| "eval_samples_per_second": 0.389, | |
| "eval_steps_per_second": 0.389, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 9.636366888459613e-06, | |
| "loss": 0.4224, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 9.591650568791587e-06, | |
| "loss": 0.4229, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "eval_bertscore/f1": 0.7968, | |
| "eval_bertscore/precision": 0.7936, | |
| "eval_bertscore/recall": 0.802, | |
| "eval_mean_prediction_length_characters": 850.064, | |
| "eval_mean_prediction_length_tokens": 186.438, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 56.2479, | |
| "eval_rouge/rouge1": 70.2657, | |
| "eval_rouge/rouge2": 50.9461, | |
| "eval_rouge/rougeL": 49.7123, | |
| "eval_rouge/rougeLsum": 68.1758, | |
| "eval_runtime": 2596.5556, | |
| "eval_samples_per_second": 0.385, | |
| "eval_steps_per_second": 0.385, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 9.54693424912356e-06, | |
| "loss": 0.4215, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 9.502217929455535e-06, | |
| "loss": 0.4145, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "eval_bertscore/f1": 0.7982, | |
| "eval_bertscore/precision": 0.7974, | |
| "eval_bertscore/recall": 0.801, | |
| "eval_mean_prediction_length_characters": 821.211, | |
| "eval_mean_prediction_length_tokens": 179.198, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 56.2997, | |
| "eval_rouge/rouge1": 70.2035, | |
| "eval_rouge/rouge2": 51.1438, | |
| "eval_rouge/rougeL": 49.7011, | |
| "eval_rouge/rougeLsum": 68.1054, | |
| "eval_runtime": 2500.5637, | |
| "eval_samples_per_second": 0.4, | |
| "eval_steps_per_second": 0.4, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 9.457501609787508e-06, | |
| "loss": 0.4108, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 9.41287472275882e-06, | |
| "loss": 0.413, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "eval_bertscore/f1": 0.7981, | |
| "eval_bertscore/precision": 0.7992, | |
| "eval_bertscore/recall": 0.799, | |
| "eval_mean_prediction_length_characters": 806.834, | |
| "eval_mean_prediction_length_tokens": 176.24, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 56.5708, | |
| "eval_rouge/rouge1": 70.2439, | |
| "eval_rouge/rouge2": 51.4876, | |
| "eval_rouge/rougeL": 50.0572, | |
| "eval_rouge/rougeLsum": 68.1941, | |
| "eval_runtime": 2443.1073, | |
| "eval_samples_per_second": 0.409, | |
| "eval_steps_per_second": 0.409, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 9.368158403090792e-06, | |
| "loss": 0.4064, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 9.323442083422767e-06, | |
| "loss": 0.4004, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "eval_bertscore/f1": 0.8022, | |
| "eval_bertscore/precision": 0.7971, | |
| "eval_bertscore/recall": 0.8093, | |
| "eval_mean_prediction_length_characters": 860.077, | |
| "eval_mean_prediction_length_tokens": 188.397, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 57.2862, | |
| "eval_rouge/rouge1": 71.0513, | |
| "eval_rouge/rouge2": 52.0715, | |
| "eval_rouge/rougeL": 50.8134, | |
| "eval_rouge/rougeLsum": 69.0639, | |
| "eval_runtime": 2596.2685, | |
| "eval_samples_per_second": 0.385, | |
| "eval_steps_per_second": 0.385, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 9.278725763754741e-06, | |
| "loss": 0.3953, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 9.234009444086714e-06, | |
| "loss": 0.3905, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "eval_bertscore/f1": 0.8009, | |
| "eval_bertscore/precision": 0.792, | |
| "eval_bertscore/recall": 0.812, | |
| "eval_mean_prediction_length_characters": 922.278, | |
| "eval_mean_prediction_length_tokens": 199.619, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 56.8514, | |
| "eval_rouge/rouge1": 70.8436, | |
| "eval_rouge/rouge2": 51.8156, | |
| "eval_rouge/rougeL": 50.0567, | |
| "eval_rouge/rougeLsum": 68.8517, | |
| "eval_runtime": 2741.6372, | |
| "eval_samples_per_second": 0.365, | |
| "eval_steps_per_second": 0.365, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 9.189382557058024e-06, | |
| "loss": 0.3834, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 9.144666237389999e-06, | |
| "loss": 0.3772, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "eval_bertscore/f1": 0.802, | |
| "eval_bertscore/precision": 0.8014, | |
| "eval_bertscore/recall": 0.8048, | |
| "eval_mean_prediction_length_characters": 821.195, | |
| "eval_mean_prediction_length_tokens": 179.279, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 57.3719, | |
| "eval_rouge/rouge1": 70.4754, | |
| "eval_rouge/rouge2": 52.2981, | |
| "eval_rouge/rougeL": 51.2358, | |
| "eval_rouge/rougeLsum": 68.5129, | |
| "eval_runtime": 2484.708, | |
| "eval_samples_per_second": 0.402, | |
| "eval_steps_per_second": 0.402, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 9.099949917721973e-06, | |
| "loss": 0.3796, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 9.055233598053946e-06, | |
| "loss": 0.3745, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "eval_bertscore/f1": 0.8077, | |
| "eval_bertscore/precision": 0.8096, | |
| "eval_bertscore/recall": 0.8076, | |
| "eval_mean_prediction_length_characters": 797.37, | |
| "eval_mean_prediction_length_tokens": 176.263, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 58.2667, | |
| "eval_rouge/rouge1": 71.7377, | |
| "eval_rouge/rouge2": 53.2732, | |
| "eval_rouge/rougeL": 51.7614, | |
| "eval_rouge/rougeLsum": 69.9972, | |
| "eval_runtime": 2452.4348, | |
| "eval_samples_per_second": 0.408, | |
| "eval_steps_per_second": 0.408, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 9.01051727838592e-06, | |
| "loss": 0.3705, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 8.96589039135723e-06, | |
| "loss": 0.3709, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "eval_bertscore/f1": 0.8108, | |
| "eval_bertscore/precision": 0.8141, | |
| "eval_bertscore/recall": 0.8094, | |
| "eval_mean_prediction_length_characters": 798.81, | |
| "eval_mean_prediction_length_tokens": 176.217, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 58.6206, | |
| "eval_rouge/rouge1": 72.0193, | |
| "eval_rouge/rouge2": 53.6884, | |
| "eval_rouge/rougeL": 52.098, | |
| "eval_rouge/rougeLsum": 70.3419, | |
| "eval_runtime": 2449.0615, | |
| "eval_samples_per_second": 0.408, | |
| "eval_steps_per_second": 0.408, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 8.92126350432854e-06, | |
| "loss": 0.3638, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 8.876547184660515e-06, | |
| "loss": 0.3461, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "eval_bertscore/f1": 0.8092, | |
| "eval_bertscore/precision": 0.8022, | |
| "eval_bertscore/recall": 0.8182, | |
| "eval_mean_prediction_length_characters": 891.59, | |
| "eval_mean_prediction_length_tokens": 193.579, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 58.5159, | |
| "eval_rouge/rouge1": 72.4344, | |
| "eval_rouge/rouge2": 54.0016, | |
| "eval_rouge/rougeL": 51.2235, | |
| "eval_rouge/rougeLsum": 70.5176, | |
| "eval_runtime": 2696.2641, | |
| "eval_samples_per_second": 0.371, | |
| "eval_steps_per_second": 0.371, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 8.83183086499249e-06, | |
| "loss": 0.2926, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 8.787114545324463e-06, | |
| "loss": 0.2935, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "eval_bertscore/f1": 0.8099, | |
| "eval_bertscore/precision": 0.806, | |
| "eval_bertscore/recall": 0.8158, | |
| "eval_mean_prediction_length_characters": 858.323, | |
| "eval_mean_prediction_length_tokens": 186.494, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 59.0206, | |
| "eval_rouge/rouge1": 72.3233, | |
| "eval_rouge/rouge2": 54.4823, | |
| "eval_rouge/rougeL": 52.1769, | |
| "eval_rouge/rougeLsum": 70.5191, | |
| "eval_runtime": 2616.4872, | |
| "eval_samples_per_second": 0.382, | |
| "eval_steps_per_second": 0.382, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 8.742398225656437e-06, | |
| "loss": 0.2945, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 8.69768190598841e-06, | |
| "loss": 0.2859, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "eval_bertscore/f1": 0.8021, | |
| "eval_bertscore/precision": 0.7967, | |
| "eval_bertscore/recall": 0.8098, | |
| "eval_mean_prediction_length_characters": 862.623, | |
| "eval_mean_prediction_length_tokens": 187.108, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 58.0972, | |
| "eval_rouge/rouge1": 71.3169, | |
| "eval_rouge/rouge2": 53.5502, | |
| "eval_rouge/rougeL": 51.3466, | |
| "eval_rouge/rougeLsum": 69.328, | |
| "eval_runtime": 2591.3634, | |
| "eval_samples_per_second": 0.386, | |
| "eval_steps_per_second": 0.386, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 8.65305501895972e-06, | |
| "loss": 0.2913, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 8.608338699291695e-06, | |
| "loss": 0.2871, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "eval_bertscore/f1": 0.8125, | |
| "eval_bertscore/precision": 0.81, | |
| "eval_bertscore/recall": 0.8167, | |
| "eval_mean_prediction_length_characters": 839.348, | |
| "eval_mean_prediction_length_tokens": 182.08, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 59.5943, | |
| "eval_rouge/rouge1": 72.7757, | |
| "eval_rouge/rouge2": 55.0591, | |
| "eval_rouge/rougeL": 52.82, | |
| "eval_rouge/rougeLsum": 70.9482, | |
| "eval_runtime": 2411.5587, | |
| "eval_samples_per_second": 0.415, | |
| "eval_steps_per_second": 0.415, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 8.563622379623669e-06, | |
| "loss": 0.2858, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 8.518906059955642e-06, | |
| "loss": 0.2849, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "eval_bertscore/f1": 0.8151, | |
| "eval_bertscore/precision": 0.8177, | |
| "eval_bertscore/recall": 0.8143, | |
| "eval_mean_prediction_length_characters": 801.734, | |
| "eval_mean_prediction_length_tokens": 175.958, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 60.3823, | |
| "eval_rouge/rouge1": 73.1311, | |
| "eval_rouge/rouge2": 56.0185, | |
| "eval_rouge/rougeL": 53.7397, | |
| "eval_rouge/rougeLsum": 71.4246, | |
| "eval_runtime": 2330.5152, | |
| "eval_samples_per_second": 0.429, | |
| "eval_steps_per_second": 0.429, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 8.474189740287617e-06, | |
| "loss": 0.2787, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 8.429473420619591e-06, | |
| "loss": 0.2819, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "eval_bertscore/f1": 0.8142, | |
| "eval_bertscore/precision": 0.8077, | |
| "eval_bertscore/recall": 0.8228, | |
| "eval_mean_prediction_length_characters": 894.063, | |
| "eval_mean_prediction_length_tokens": 192.74, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 60.1592, | |
| "eval_rouge/rouge1": 73.1791, | |
| "eval_rouge/rouge2": 55.8507, | |
| "eval_rouge/rougeL": 53.2711, | |
| "eval_rouge/rougeLsum": 71.4419, | |
| "eval_runtime": 2537.8153, | |
| "eval_samples_per_second": 0.394, | |
| "eval_steps_per_second": 0.394, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 8.384757100951564e-06, | |
| "loss": 0.2799, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 8.340040781283539e-06, | |
| "loss": 0.2776, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "eval_bertscore/f1": 0.8054, | |
| "eval_bertscore/precision": 0.7925, | |
| "eval_bertscore/recall": 0.821, | |
| "eval_mean_prediction_length_characters": 967.121, | |
| "eval_mean_prediction_length_tokens": 206.944, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 58.7521, | |
| "eval_rouge/rouge1": 71.9074, | |
| "eval_rouge/rouge2": 54.5099, | |
| "eval_rouge/rougeL": 51.7393, | |
| "eval_rouge/rougeLsum": 69.9728, | |
| "eval_runtime": 2713.6577, | |
| "eval_samples_per_second": 0.369, | |
| "eval_steps_per_second": 0.369, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 8.295413894254849e-06, | |
| "loss": 0.2761, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 8.250697574586823e-06, | |
| "loss": 0.2742, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "eval_bertscore/f1": 0.8187, | |
| "eval_bertscore/precision": 0.8125, | |
| "eval_bertscore/recall": 0.827, | |
| "eval_mean_prediction_length_characters": 880.74, | |
| "eval_mean_prediction_length_tokens": 191.221, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 60.9358, | |
| "eval_rouge/rouge1": 73.8216, | |
| "eval_rouge/rouge2": 56.8015, | |
| "eval_rouge/rougeL": 53.9603, | |
| "eval_rouge/rougeLsum": 72.1193, | |
| "eval_runtime": 2491.9164, | |
| "eval_samples_per_second": 0.401, | |
| "eval_steps_per_second": 0.401, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 8.205981254918796e-06, | |
| "loss": 0.2737, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 8.16126493525077e-06, | |
| "loss": 0.2663, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "eval_bertscore/f1": 0.8215, | |
| "eval_bertscore/precision": 0.8174, | |
| "eval_bertscore/recall": 0.8272, | |
| "eval_mean_prediction_length_characters": 862.393, | |
| "eval_mean_prediction_length_tokens": 187.552, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 61.7551, | |
| "eval_rouge/rouge1": 74.5811, | |
| "eval_rouge/rouge2": 57.6565, | |
| "eval_rouge/rougeL": 54.7699, | |
| "eval_rouge/rougeLsum": 72.9841, | |
| "eval_runtime": 2430.2491, | |
| "eval_samples_per_second": 0.411, | |
| "eval_steps_per_second": 0.411, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 8.116548615582743e-06, | |
| "loss": 0.2636, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 8.071921728554053e-06, | |
| "loss": 0.2672, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "eval_bertscore/f1": 0.8194, | |
| "eval_bertscore/precision": 0.8133, | |
| "eval_bertscore/recall": 0.8274, | |
| "eval_mean_prediction_length_characters": 893.688, | |
| "eval_mean_prediction_length_tokens": 193.593, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 61.3651, | |
| "eval_rouge/rouge1": 74.2269, | |
| "eval_rouge/rouge2": 57.4497, | |
| "eval_rouge/rougeL": 54.1897, | |
| "eval_rouge/rougeLsum": 72.5834, | |
| "eval_runtime": 2499.9317, | |
| "eval_samples_per_second": 0.4, | |
| "eval_steps_per_second": 0.4, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 9.569096202090125e-06, | |
| "loss": 0.2366, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 9.54673714242274e-06, | |
| "loss": 0.2405, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "eval_bertscore/f1": 0.8145, | |
| "eval_bertscore/precision": 0.8054, | |
| "eval_bertscore/recall": 0.8256, | |
| "eval_mean_prediction_length_characters": 927.422, | |
| "eval_mean_prediction_length_tokens": 200.478, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 60.0655, | |
| "eval_rouge/rouge1": 73.2741, | |
| "eval_rouge/rouge2": 55.8854, | |
| "eval_rouge/rougeL": 52.9208, | |
| "eval_rouge/rougeLsum": 71.6192, | |
| "eval_runtime": 2620.9045, | |
| "eval_samples_per_second": 0.382, | |
| "eval_steps_per_second": 0.382, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 9.524378082755352e-06, | |
| "loss": 0.2489, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 9.502019023087967e-06, | |
| "loss": 0.2428, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "eval_bertscore/f1": 0.8171, | |
| "eval_bertscore/precision": 0.8136, | |
| "eval_bertscore/recall": 0.8226, | |
| "eval_mean_prediction_length_characters": 847.398, | |
| "eval_mean_prediction_length_tokens": 183.753, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 60.8081, | |
| "eval_rouge/rouge1": 73.4983, | |
| "eval_rouge/rouge2": 56.5892, | |
| "eval_rouge/rougeL": 54.0596, | |
| "eval_rouge/rougeLsum": 71.9013, | |
| "eval_runtime": 2373.5033, | |
| "eval_samples_per_second": 0.421, | |
| "eval_steps_per_second": 0.421, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 9.47965996342058e-06, | |
| "loss": 0.2511, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 9.457300903753192e-06, | |
| "loss": 0.2416, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "eval_bertscore/f1": 0.8155, | |
| "eval_bertscore/precision": 0.8154, | |
| "eval_bertscore/recall": 0.8179, | |
| "eval_mean_prediction_length_characters": 850.697, | |
| "eval_mean_prediction_length_tokens": 185.038, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 60.5988, | |
| "eval_rouge/rouge1": 72.9601, | |
| "eval_rouge/rouge2": 56.3526, | |
| "eval_rouge/rougeL": 54.1244, | |
| "eval_rouge/rougeLsum": 71.3786, | |
| "eval_runtime": 2440.9193, | |
| "eval_samples_per_second": 0.41, | |
| "eval_steps_per_second": 0.41, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 9.43498656220514e-06, | |
| "loss": 0.2404, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 9.412627502537755e-06, | |
| "loss": 0.2597, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "eval_bertscore/f1": 0.8146, | |
| "eval_bertscore/precision": 0.8118, | |
| "eval_bertscore/recall": 0.8194, | |
| "eval_mean_prediction_length_characters": 856.43, | |
| "eval_mean_prediction_length_tokens": 186.628, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 60.5122, | |
| "eval_rouge/rouge1": 73.1873, | |
| "eval_rouge/rouge2": 56.3588, | |
| "eval_rouge/rougeL": 53.7195, | |
| "eval_rouge/rougeLsum": 71.5458, | |
| "eval_runtime": 2434.8964, | |
| "eval_samples_per_second": 0.411, | |
| "eval_steps_per_second": 0.411, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 9.390268442870366e-06, | |
| "loss": 0.2436, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 9.367954101322315e-06, | |
| "loss": 0.2526, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "eval_bertscore/f1": 0.8206, | |
| "eval_bertscore/precision": 0.8186, | |
| "eval_bertscore/recall": 0.8245, | |
| "eval_mean_prediction_length_characters": 843.759, | |
| "eval_mean_prediction_length_tokens": 183.492, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 61.4692, | |
| "eval_rouge/rouge1": 74.1108, | |
| "eval_rouge/rouge2": 57.3353, | |
| "eval_rouge/rougeL": 54.6598, | |
| "eval_rouge/rougeLsum": 72.5858, | |
| "eval_runtime": 2441.5715, | |
| "eval_samples_per_second": 0.41, | |
| "eval_steps_per_second": 0.41, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 9.34559504165493e-06, | |
| "loss": 0.248, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 9.323235981987542e-06, | |
| "loss": 0.2431, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "eval_bertscore/f1": 0.8214, | |
| "eval_bertscore/precision": 0.8186, | |
| "eval_bertscore/recall": 0.826, | |
| "eval_mean_prediction_length_characters": 854.807, | |
| "eval_mean_prediction_length_tokens": 186.06, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 61.6121, | |
| "eval_rouge/rouge1": 74.3063, | |
| "eval_rouge/rouge2": 57.6124, | |
| "eval_rouge/rougeL": 54.6331, | |
| "eval_rouge/rougeLsum": 72.7609, | |
| "eval_runtime": 2483.1491, | |
| "eval_samples_per_second": 0.403, | |
| "eval_steps_per_second": 0.403, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 9.300876922320155e-06, | |
| "loss": 0.2413, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 9.27851786265277e-06, | |
| "loss": 0.2556, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "eval_bertscore/f1": 0.8212, | |
| "eval_bertscore/precision": 0.8166, | |
| "eval_bertscore/recall": 0.8277, | |
| "eval_mean_prediction_length_characters": 871.94, | |
| "eval_mean_prediction_length_tokens": 189.412, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 61.6287, | |
| "eval_rouge/rouge1": 74.4291, | |
| "eval_rouge/rouge2": 57.5628, | |
| "eval_rouge/rougeL": 54.6343, | |
| "eval_rouge/rougeLsum": 72.7972, | |
| "eval_runtime": 2504.8302, | |
| "eval_samples_per_second": 0.399, | |
| "eval_steps_per_second": 0.399, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 9.256158802985382e-06, | |
| "loss": 0.2414, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 9.233799743317997e-06, | |
| "loss": 0.2493, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "eval_bertscore/f1": 0.8202, | |
| "eval_bertscore/precision": 0.8115, | |
| "eval_bertscore/recall": 0.831, | |
| "eval_mean_prediction_length_characters": 919.152, | |
| "eval_mean_prediction_length_tokens": 199.285, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 61.6474, | |
| "eval_rouge/rouge1": 74.2897, | |
| "eval_rouge/rouge2": 57.7571, | |
| "eval_rouge/rougeL": 54.6022, | |
| "eval_rouge/rougeLsum": 72.7195, | |
| "eval_runtime": 2638.78, | |
| "eval_samples_per_second": 0.379, | |
| "eval_steps_per_second": 0.379, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 9.21144068365061e-06, | |
| "loss": 0.2467, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 9.189081623983222e-06, | |
| "loss": 0.2421, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "eval_bertscore/f1": 0.823, | |
| "eval_bertscore/precision": 0.8258, | |
| "eval_bertscore/recall": 0.8222, | |
| "eval_mean_prediction_length_characters": 803.212, | |
| "eval_mean_prediction_length_tokens": 176.172, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 61.9161, | |
| "eval_rouge/rouge1": 74.3746, | |
| "eval_rouge/rouge2": 57.9399, | |
| "eval_rouge/rougeL": 55.0818, | |
| "eval_rouge/rougeLsum": 72.8967, | |
| "eval_runtime": 2327.757, | |
| "eval_samples_per_second": 0.43, | |
| "eval_steps_per_second": 0.43, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 9.166722564315837e-06, | |
| "loss": 0.2478, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 9.14436350464845e-06, | |
| "loss": 0.229, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "eval_bertscore/f1": 0.8212, | |
| "eval_bertscore/precision": 0.822, | |
| "eval_bertscore/recall": 0.8227, | |
| "eval_mean_prediction_length_characters": 819.677, | |
| "eval_mean_prediction_length_tokens": 178.389, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 61.8804, | |
| "eval_rouge/rouge1": 74.0116, | |
| "eval_rouge/rouge2": 57.9086, | |
| "eval_rouge/rougeL": 55.2862, | |
| "eval_rouge/rougeLsum": 72.466, | |
| "eval_runtime": 2366.2663, | |
| "eval_samples_per_second": 0.423, | |
| "eval_steps_per_second": 0.423, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 9.122049163100398e-06, | |
| "loss": 0.2398, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 9.09969010343301e-06, | |
| "loss": 0.2429, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "eval_bertscore/f1": 0.8243, | |
| "eval_bertscore/precision": 0.8174, | |
| "eval_bertscore/recall": 0.8333, | |
| "eval_mean_prediction_length_characters": 896.966, | |
| "eval_mean_prediction_length_tokens": 195.349, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 62.2879, | |
| "eval_rouge/rouge1": 74.8367, | |
| "eval_rouge/rouge2": 58.4326, | |
| "eval_rouge/rougeL": 55.2638, | |
| "eval_rouge/rougeLsum": 73.3944, | |
| "eval_runtime": 2574.2161, | |
| "eval_samples_per_second": 0.388, | |
| "eval_steps_per_second": 0.388, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 9.077331043765623e-06, | |
| "loss": 0.2355, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 9.054971984098238e-06, | |
| "loss": 0.2375, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "eval_bertscore/f1": 0.824, | |
| "eval_bertscore/precision": 0.82, | |
| "eval_bertscore/recall": 0.8303, | |
| "eval_mean_prediction_length_characters": 873.321, | |
| "eval_mean_prediction_length_tokens": 190.408, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 62.1689, | |
| "eval_rouge/rouge1": 74.7062, | |
| "eval_rouge/rouge2": 58.2978, | |
| "eval_rouge/rougeL": 55.1711, | |
| "eval_rouge/rougeLsum": 73.2347, | |
| "eval_runtime": 2618.1437, | |
| "eval_samples_per_second": 0.382, | |
| "eval_steps_per_second": 0.382, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 9.032657642550186e-06, | |
| "loss": 0.2425, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 9.010298582882799e-06, | |
| "loss": 0.228, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "eval_bertscore/f1": 0.8258, | |
| "eval_bertscore/precision": 0.8189, | |
| "eval_bertscore/recall": 0.8348, | |
| "eval_mean_prediction_length_characters": 904.242, | |
| "eval_mean_prediction_length_tokens": 196.813, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 62.6657, | |
| "eval_rouge/rouge1": 75.0484, | |
| "eval_rouge/rouge2": 59.0138, | |
| "eval_rouge/rougeL": 55.5642, | |
| "eval_rouge/rougeLsum": 73.6606, | |
| "eval_runtime": 2709.9214, | |
| "eval_samples_per_second": 0.369, | |
| "eval_steps_per_second": 0.369, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 8.987939523215412e-06, | |
| "loss": 0.2388, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 8.965580463548026e-06, | |
| "loss": 0.2398, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "eval_bertscore/f1": 0.8287, | |
| "eval_bertscore/precision": 0.8255, | |
| "eval_bertscore/recall": 0.8338, | |
| "eval_mean_prediction_length_characters": 861.415, | |
| "eval_mean_prediction_length_tokens": 187.846, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 63.1011, | |
| "eval_rouge/rouge1": 75.5807, | |
| "eval_rouge/rouge2": 59.457, | |
| "eval_rouge/rougeL": 55.9108, | |
| "eval_rouge/rougeLsum": 74.1699, | |
| "eval_runtime": 2601.0783, | |
| "eval_samples_per_second": 0.384, | |
| "eval_steps_per_second": 0.384, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 8.943221403880639e-06, | |
| "loss": 0.2399, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 8.920862344213252e-06, | |
| "loss": 0.2373, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "eval_bertscore/f1": 0.8253, | |
| "eval_bertscore/precision": 0.8244, | |
| "eval_bertscore/recall": 0.828, | |
| "eval_mean_prediction_length_characters": 840.263, | |
| "eval_mean_prediction_length_tokens": 182.473, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 62.7461, | |
| "eval_rouge/rouge1": 74.8987, | |
| "eval_rouge/rouge2": 59.0142, | |
| "eval_rouge/rougeL": 55.8894, | |
| "eval_rouge/rougeLsum": 73.4189, | |
| "eval_runtime": 2549.6944, | |
| "eval_samples_per_second": 0.392, | |
| "eval_steps_per_second": 0.392, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 8.8985480026652e-06, | |
| "loss": 0.2397, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 8.876233661117149e-06, | |
| "loss": 0.2057, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "eval_bertscore/f1": 0.8258, | |
| "eval_bertscore/precision": 0.8222, | |
| "eval_bertscore/recall": 0.8313, | |
| "eval_mean_prediction_length_characters": 857.49, | |
| "eval_mean_prediction_length_tokens": 186.547, | |
| "eval_num_predicted": 1000, | |
| "eval_rouge/geometric_mean": 62.8166, | |
| "eval_rouge/rouge1": 75.1042, | |
| "eval_rouge/rouge2": 59.0815, | |
| "eval_rouge/rougeL": 55.8608, | |
| "eval_rouge/rougeLsum": 73.5713, | |
| "eval_runtime": 2602.9106, | |
| "eval_samples_per_second": 0.384, | |
| "eval_steps_per_second": 0.384, | |
| "step": 50000 | |
| } | |
| ], | |
| "max_steps": 248470, | |
| "num_train_epochs": 10, | |
| "total_flos": 6.239894115827712e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |