| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 19.776824034334766, |
| "eval_steps": 500, |
| "global_step": 720, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.99, |
| "learning_rate": 0.0001, |
| "loss": 3.6024, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.99, |
| "eval_bleu": 4.4454, |
| "eval_bp": 0.6832, |
| "eval_counts_1": 5645, |
| "eval_counts_2": 1343, |
| "eval_counts_3": 424, |
| "eval_counts_4": 109, |
| "eval_exact_match": 0.0005, |
| "eval_f1": 0.2236, |
| "eval_gen_len": 11.6338, |
| "eval_loss": 2.468198776245117, |
| "eval_precisions_1": 36.6844, |
| "eval_precisions_2": 10.1866, |
| "eval_precisions_3": 3.8616, |
| "eval_precisions_4": 1.242, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.2285, |
| "eval_rouge2": 0.0824, |
| "eval_rougeL": 0.2192, |
| "eval_rougeLsum": 0.2188, |
| "eval_runtime": 813.9917, |
| "eval_samples_per_second": 2.708, |
| "eval_steps_per_second": 0.677, |
| "eval_sys_len": 15388, |
| "eval_totals_1": 15388, |
| "eval_totals_2": 13184, |
| "eval_totals_3": 10980, |
| "eval_totals_4": 8776, |
| "step": 36 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 0.0001, |
| "loss": 2.9671, |
| "step": 72 |
| }, |
| { |
| "epoch": 1.98, |
| "eval_bleu": 5.7163, |
| "eval_bp": 0.7259, |
| "eval_counts_1": 5988, |
| "eval_counts_2": 1562, |
| "eval_counts_3": 569, |
| "eval_counts_4": 179, |
| "eval_exact_match": 0.0018, |
| "eval_f1": 0.2401, |
| "eval_gen_len": 12.314, |
| "eval_loss": 2.244511842727661, |
| "eval_precisions_1": 37.2064, |
| "eval_precisions_2": 11.2455, |
| "eval_precisions_3": 4.8691, |
| "eval_precisions_4": 1.8878, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.2465, |
| "eval_rouge2": 0.0971, |
| "eval_rougeL": 0.2371, |
| "eval_rougeLsum": 0.2371, |
| "eval_runtime": 802.4783, |
| "eval_samples_per_second": 2.746, |
| "eval_steps_per_second": 0.687, |
| "eval_sys_len": 16094, |
| "eval_totals_1": 16094, |
| "eval_totals_2": 13890, |
| "eval_totals_3": 11686, |
| "eval_totals_4": 9482, |
| "step": 72 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 0.0001, |
| "loss": 2.6324, |
| "step": 109 |
| }, |
| { |
| "epoch": 2.99, |
| "eval_bleu": 6.9028, |
| "eval_bp": 0.7887, |
| "eval_counts_1": 6539, |
| "eval_counts_2": 1846, |
| "eval_counts_3": 702, |
| "eval_counts_4": 240, |
| "eval_exact_match": 0.0027, |
| "eval_f1": 0.2663, |
| "eval_gen_len": 13.2319, |
| "eval_loss": 2.122749090194702, |
| "eval_precisions_1": 38.0772, |
| "eval_precisions_2": 12.3322, |
| "eval_precisions_3": 5.4994, |
| "eval_precisions_4": 2.2725, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.2729, |
| "eval_rouge2": 0.1154, |
| "eval_rougeL": 0.2601, |
| "eval_rougeLsum": 0.2604, |
| "eval_runtime": 822.9261, |
| "eval_samples_per_second": 2.678, |
| "eval_steps_per_second": 0.67, |
| "eval_sys_len": 17173, |
| "eval_totals_1": 17173, |
| "eval_totals_2": 14969, |
| "eval_totals_3": 12765, |
| "eval_totals_4": 10561, |
| "step": 109 |
| }, |
| { |
| "epoch": 3.98, |
| "learning_rate": 0.0001, |
| "loss": 2.5557, |
| "step": 145 |
| }, |
| { |
| "epoch": 3.98, |
| "eval_bleu": 7.3331, |
| "eval_bp": 0.7179, |
| "eval_counts_1": 6491, |
| "eval_counts_2": 1923, |
| "eval_counts_3": 752, |
| "eval_counts_4": 275, |
| "eval_exact_match": 0.0059, |
| "eval_f1": 0.2729, |
| "eval_gen_len": 12.0962, |
| "eval_loss": 2.035691022872925, |
| "eval_precisions_1": 40.6679, |
| "eval_precisions_2": 13.9783, |
| "eval_precisions_3": 6.5091, |
| "eval_precisions_4": 2.9415, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.2783, |
| "eval_rouge2": 0.1214, |
| "eval_rougeL": 0.2676, |
| "eval_rougeLsum": 0.2678, |
| "eval_runtime": 786.7967, |
| "eval_samples_per_second": 2.801, |
| "eval_steps_per_second": 0.7, |
| "eval_sys_len": 15961, |
| "eval_totals_1": 15961, |
| "eval_totals_2": 13757, |
| "eval_totals_3": 11553, |
| "eval_totals_4": 9349, |
| "step": 145 |
| }, |
| { |
| "epoch": 5.0, |
| "learning_rate": 0.0001, |
| "loss": 2.3785, |
| "step": 182 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_bleu": 8.2007, |
| "eval_bp": 0.7463, |
| "eval_counts_1": 6808, |
| "eval_counts_2": 2113, |
| "eval_counts_3": 855, |
| "eval_counts_4": 328, |
| "eval_exact_match": 0.0064, |
| "eval_f1": 0.2892, |
| "eval_gen_len": 12.6819, |
| "eval_loss": 1.9824347496032715, |
| "eval_precisions_1": 41.4137, |
| "eval_precisions_2": 14.8437, |
| "eval_precisions_3": 7.1066, |
| "eval_precisions_4": 3.3377, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.2948, |
| "eval_rouge2": 0.1326, |
| "eval_rougeL": 0.2825, |
| "eval_rougeLsum": 0.2825, |
| "eval_runtime": 806.3535, |
| "eval_samples_per_second": 2.733, |
| "eval_steps_per_second": 0.683, |
| "eval_sys_len": 16439, |
| "eval_totals_1": 16439, |
| "eval_totals_2": 14235, |
| "eval_totals_3": 12031, |
| "eval_totals_4": 9827, |
| "step": 182 |
| }, |
| { |
| "epoch": 5.99, |
| "learning_rate": 0.0001, |
| "loss": 2.3396, |
| "step": 218 |
| }, |
| { |
| "epoch": 5.99, |
| "eval_bleu": 8.639, |
| "eval_bp": 0.7702, |
| "eval_counts_1": 7033, |
| "eval_counts_2": 2194, |
| "eval_counts_3": 886, |
| "eval_counts_4": 364, |
| "eval_exact_match": 0.0086, |
| "eval_f1": 0.3, |
| "eval_gen_len": 13.0254, |
| "eval_loss": 1.9448895454406738, |
| "eval_precisions_1": 41.7364, |
| "eval_precisions_2": 14.9792, |
| "eval_precisions_3": 7.1205, |
| "eval_precisions_4": 3.555, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3044, |
| "eval_rouge2": 0.1373, |
| "eval_rougeL": 0.292, |
| "eval_rougeLsum": 0.2922, |
| "eval_runtime": 473.2306, |
| "eval_samples_per_second": 4.657, |
| "eval_steps_per_second": 1.164, |
| "eval_sys_len": 16851, |
| "eval_totals_1": 16851, |
| "eval_totals_2": 14647, |
| "eval_totals_3": 12443, |
| "eval_totals_4": 10239, |
| "step": 218 |
| }, |
| { |
| "epoch": 6.98, |
| "learning_rate": 0.0001, |
| "loss": 2.2557, |
| "step": 254 |
| }, |
| { |
| "epoch": 6.98, |
| "eval_bleu": 9.049, |
| "eval_bp": 0.7515, |
| "eval_counts_1": 7167, |
| "eval_counts_2": 2285, |
| "eval_counts_3": 939, |
| "eval_counts_4": 389, |
| "eval_exact_match": 0.0095, |
| "eval_f1": 0.3119, |
| "eval_gen_len": 12.7119, |
| "eval_loss": 1.8937886953353882, |
| "eval_precisions_1": 43.3602, |
| "eval_precisions_2": 15.9511, |
| "eval_precisions_3": 7.7469, |
| "eval_precisions_4": 3.9226, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3166, |
| "eval_rouge2": 0.1428, |
| "eval_rougeL": 0.3043, |
| "eval_rougeLsum": 0.3046, |
| "eval_runtime": 453.3958, |
| "eval_samples_per_second": 4.861, |
| "eval_steps_per_second": 1.215, |
| "eval_sys_len": 16529, |
| "eval_totals_1": 16529, |
| "eval_totals_2": 14325, |
| "eval_totals_3": 12121, |
| "eval_totals_4": 9917, |
| "step": 254 |
| }, |
| { |
| "epoch": 7.99, |
| "learning_rate": 0.0001, |
| "loss": 2.1168, |
| "step": 291 |
| }, |
| { |
| "epoch": 7.99, |
| "eval_bleu": 9.6447, |
| "eval_bp": 0.7708, |
| "eval_counts_1": 7347, |
| "eval_counts_2": 2425, |
| "eval_counts_3": 1021, |
| "eval_counts_4": 425, |
| "eval_exact_match": 0.0104, |
| "eval_f1": 0.3211, |
| "eval_gen_len": 12.9374, |
| "eval_loss": 1.857459306716919, |
| "eval_precisions_1": 43.5765, |
| "eval_precisions_2": 16.5461, |
| "eval_precisions_3": 8.1995, |
| "eval_precisions_4": 4.1472, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3258, |
| "eval_rouge2": 0.1505, |
| "eval_rougeL": 0.3137, |
| "eval_rougeLsum": 0.3142, |
| "eval_runtime": 457.8255, |
| "eval_samples_per_second": 4.814, |
| "eval_steps_per_second": 1.204, |
| "eval_sys_len": 16860, |
| "eval_totals_1": 16860, |
| "eval_totals_2": 14656, |
| "eval_totals_3": 12452, |
| "eval_totals_4": 10248, |
| "step": 291 |
| }, |
| { |
| "epoch": 8.98, |
| "learning_rate": 0.0001, |
| "loss": 2.1105, |
| "step": 327 |
| }, |
| { |
| "epoch": 8.98, |
| "eval_bleu": 9.9436, |
| "eval_bp": 0.7807, |
| "eval_counts_1": 7460, |
| "eval_counts_2": 2461, |
| "eval_counts_3": 1061, |
| "eval_counts_4": 449, |
| "eval_exact_match": 0.0095, |
| "eval_f1": 0.3267, |
| "eval_gen_len": 13.1828, |
| "eval_loss": 1.8283559083938599, |
| "eval_precisions_1": 43.7948, |
| "eval_precisions_2": 16.5947, |
| "eval_precisions_3": 8.4033, |
| "eval_precisions_4": 4.3082, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3317, |
| "eval_rouge2": 0.1521, |
| "eval_rougeL": 0.3187, |
| "eval_rougeLsum": 0.3191, |
| "eval_runtime": 464.6, |
| "eval_samples_per_second": 4.744, |
| "eval_steps_per_second": 1.186, |
| "eval_sys_len": 17034, |
| "eval_totals_1": 17034, |
| "eval_totals_2": 14830, |
| "eval_totals_3": 12626, |
| "eval_totals_4": 10422, |
| "step": 327 |
| }, |
| { |
| "epoch": 10.0, |
| "learning_rate": 0.0001, |
| "loss": 1.9913, |
| "step": 364 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_bleu": 10.3601, |
| "eval_bp": 0.7791, |
| "eval_counts_1": 7547, |
| "eval_counts_2": 2537, |
| "eval_counts_3": 1105, |
| "eval_counts_4": 487, |
| "eval_exact_match": 0.0113, |
| "eval_f1": 0.3316, |
| "eval_gen_len": 13.0358, |
| "eval_loss": 1.8056522607803345, |
| "eval_precisions_1": 44.3811, |
| "eval_precisions_2": 17.1407, |
| "eval_precisions_3": 8.7719, |
| "eval_precisions_4": 4.6858, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.335, |
| "eval_rouge2": 0.1566, |
| "eval_rougeL": 0.323, |
| "eval_rougeLsum": 0.3233, |
| "eval_runtime": 492.674, |
| "eval_samples_per_second": 4.474, |
| "eval_steps_per_second": 1.118, |
| "eval_sys_len": 17005, |
| "eval_totals_1": 17005, |
| "eval_totals_2": 14801, |
| "eval_totals_3": 12597, |
| "eval_totals_4": 10393, |
| "step": 364 |
| }, |
| { |
| "epoch": 10.99, |
| "learning_rate": 0.0001, |
| "loss": 1.9943, |
| "step": 400 |
| }, |
| { |
| "epoch": 10.99, |
| "eval_bleu": 10.5378, |
| "eval_bp": 0.7697, |
| "eval_counts_1": 7629, |
| "eval_counts_2": 2574, |
| "eval_counts_3": 1131, |
| "eval_counts_4": 496, |
| "eval_exact_match": 0.0113, |
| "eval_f1": 0.3385, |
| "eval_gen_len": 13.0154, |
| "eval_loss": 1.7973003387451172, |
| "eval_precisions_1": 45.2975, |
| "eval_precisions_2": 17.5844, |
| "eval_precisions_3": 9.096, |
| "eval_precisions_4": 4.8485, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.343, |
| "eval_rouge2": 0.1594, |
| "eval_rougeL": 0.3296, |
| "eval_rougeLsum": 0.33, |
| "eval_runtime": 454.7448, |
| "eval_samples_per_second": 4.847, |
| "eval_steps_per_second": 1.212, |
| "eval_sys_len": 16842, |
| "eval_totals_1": 16842, |
| "eval_totals_2": 14638, |
| "eval_totals_3": 12434, |
| "eval_totals_4": 10230, |
| "step": 400 |
| }, |
| { |
| "epoch": 11.98, |
| "learning_rate": 0.0001, |
| "loss": 1.941, |
| "step": 436 |
| }, |
| { |
| "epoch": 11.98, |
| "eval_bleu": 10.8273, |
| "eval_bp": 0.7848, |
| "eval_counts_1": 7681, |
| "eval_counts_2": 2606, |
| "eval_counts_3": 1164, |
| "eval_counts_4": 528, |
| "eval_exact_match": 0.0132, |
| "eval_f1": 0.3385, |
| "eval_gen_len": 13.1361, |
| "eval_loss": 1.777303695678711, |
| "eval_precisions_1": 44.905, |
| "eval_precisions_2": 17.4888, |
| "eval_precisions_3": 9.1675, |
| "eval_precisions_4": 5.0319, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3421, |
| "eval_rouge2": 0.1607, |
| "eval_rougeL": 0.3295, |
| "eval_rougeLsum": 0.3294, |
| "eval_runtime": 458.5033, |
| "eval_samples_per_second": 4.807, |
| "eval_steps_per_second": 1.202, |
| "eval_sys_len": 17105, |
| "eval_totals_1": 17105, |
| "eval_totals_2": 14901, |
| "eval_totals_3": 12697, |
| "eval_totals_4": 10493, |
| "step": 436 |
| }, |
| { |
| "epoch": 12.99, |
| "learning_rate": 0.0001, |
| "loss": 1.8453, |
| "step": 473 |
| }, |
| { |
| "epoch": 12.99, |
| "eval_bleu": 11.2687, |
| "eval_bp": 0.7972, |
| "eval_counts_1": 7817, |
| "eval_counts_2": 2700, |
| "eval_counts_3": 1224, |
| "eval_counts_4": 560, |
| "eval_exact_match": 0.0127, |
| "eval_f1": 0.3447, |
| "eval_gen_len": 13.5018, |
| "eval_loss": 1.7595148086547852, |
| "eval_precisions_1": 45.1224, |
| "eval_precisions_2": 17.8571, |
| "eval_precisions_3": 9.4766, |
| "eval_precisions_4": 5.2278, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3492, |
| "eval_rouge2": 0.1662, |
| "eval_rougeL": 0.3367, |
| "eval_rougeLsum": 0.3367, |
| "eval_runtime": 465.5444, |
| "eval_samples_per_second": 4.734, |
| "eval_steps_per_second": 1.184, |
| "eval_sys_len": 17324, |
| "eval_totals_1": 17324, |
| "eval_totals_2": 15120, |
| "eval_totals_3": 12916, |
| "eval_totals_4": 10712, |
| "step": 473 |
| }, |
| { |
| "epoch": 13.98, |
| "learning_rate": 0.0001, |
| "loss": 1.85, |
| "step": 509 |
| }, |
| { |
| "epoch": 13.98, |
| "eval_bleu": 10.9825, |
| "eval_bp": 0.8025, |
| "eval_counts_1": 7792, |
| "eval_counts_2": 2642, |
| "eval_counts_3": 1182, |
| "eval_counts_4": 537, |
| "eval_exact_match": 0.0127, |
| "eval_f1": 0.3416, |
| "eval_gen_len": 13.5395, |
| "eval_loss": 1.7414402961730957, |
| "eval_precisions_1": 44.7379, |
| "eval_precisions_2": 17.3667, |
| "eval_precisions_3": 9.086, |
| "eval_precisions_4": 4.9699, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3458, |
| "eval_rouge2": 0.1632, |
| "eval_rougeL": 0.3322, |
| "eval_rougeLsum": 0.3322, |
| "eval_runtime": 468.8552, |
| "eval_samples_per_second": 4.701, |
| "eval_steps_per_second": 1.175, |
| "eval_sys_len": 17417, |
| "eval_totals_1": 17417, |
| "eval_totals_2": 15213, |
| "eval_totals_3": 13009, |
| "eval_totals_4": 10805, |
| "step": 509 |
| }, |
| { |
| "epoch": 15.0, |
| "learning_rate": 0.0001, |
| "loss": 1.7588, |
| "step": 546 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_bleu": 11.3189, |
| "eval_bp": 0.7939, |
| "eval_counts_1": 7827, |
| "eval_counts_2": 2702, |
| "eval_counts_3": 1223, |
| "eval_counts_4": 569, |
| "eval_exact_match": 0.015, |
| "eval_f1": 0.3446, |
| "eval_gen_len": 13.3026, |
| "eval_loss": 1.7346255779266357, |
| "eval_precisions_1": 45.3345, |
| "eval_precisions_2": 17.9404, |
| "eval_precisions_3": 9.5123, |
| "eval_precisions_4": 5.3412, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3487, |
| "eval_rouge2": 0.1661, |
| "eval_rougeL": 0.3355, |
| "eval_rougeLsum": 0.3354, |
| "eval_runtime": 464.8491, |
| "eval_samples_per_second": 4.741, |
| "eval_steps_per_second": 1.185, |
| "eval_sys_len": 17265, |
| "eval_totals_1": 17265, |
| "eval_totals_2": 15061, |
| "eval_totals_3": 12857, |
| "eval_totals_4": 10653, |
| "step": 546 |
| }, |
| { |
| "epoch": 15.99, |
| "learning_rate": 0.0001, |
| "loss": 1.7663, |
| "step": 582 |
| }, |
| { |
| "epoch": 15.99, |
| "eval_bleu": 11.5245, |
| "eval_bp": 0.8032, |
| "eval_counts_1": 7946, |
| "eval_counts_2": 2757, |
| "eval_counts_3": 1245, |
| "eval_counts_4": 581, |
| "eval_exact_match": 0.0154, |
| "eval_f1": 0.3501, |
| "eval_gen_len": 13.4515, |
| "eval_loss": 1.7190728187561035, |
| "eval_precisions_1": 45.5855, |
| "eval_precisions_2": 18.106, |
| "eval_precisions_3": 9.56, |
| "eval_precisions_4": 5.3702, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3544, |
| "eval_rouge2": 0.1695, |
| "eval_rougeL": 0.3418, |
| "eval_rougeLsum": 0.3416, |
| "eval_runtime": 465.8123, |
| "eval_samples_per_second": 4.732, |
| "eval_steps_per_second": 1.183, |
| "eval_sys_len": 17431, |
| "eval_totals_1": 17431, |
| "eval_totals_2": 15227, |
| "eval_totals_3": 13023, |
| "eval_totals_4": 10819, |
| "step": 582 |
| }, |
| { |
| "epoch": 16.98, |
| "learning_rate": 0.0001, |
| "loss": 1.7317, |
| "step": 618 |
| }, |
| { |
| "epoch": 16.98, |
| "eval_bleu": 12.0845, |
| "eval_bp": 0.8212, |
| "eval_counts_1": 8068, |
| "eval_counts_2": 2844, |
| "eval_counts_3": 1325, |
| "eval_counts_4": 633, |
| "eval_exact_match": 0.0163, |
| "eval_f1": 0.3527, |
| "eval_gen_len": 13.77, |
| "eval_loss": 1.7133468389511108, |
| "eval_precisions_1": 45.4484, |
| "eval_precisions_2": 18.2917, |
| "eval_precisions_3": 9.9296, |
| "eval_precisions_4": 5.6822, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3575, |
| "eval_rouge2": 0.1746, |
| "eval_rougeL": 0.3445, |
| "eval_rougeLsum": 0.3447, |
| "eval_runtime": 458.8154, |
| "eval_samples_per_second": 4.804, |
| "eval_steps_per_second": 1.201, |
| "eval_sys_len": 17752, |
| "eval_totals_1": 17752, |
| "eval_totals_2": 15548, |
| "eval_totals_3": 13344, |
| "eval_totals_4": 11140, |
| "step": 618 |
| }, |
| { |
| "epoch": 17.99, |
| "learning_rate": 0.0001, |
| "loss": 1.6421, |
| "step": 655 |
| }, |
| { |
| "epoch": 17.99, |
| "eval_bleu": 11.877, |
| "eval_bp": 0.8091, |
| "eval_counts_1": 8003, |
| "eval_counts_2": 2823, |
| "eval_counts_3": 1301, |
| "eval_counts_4": 609, |
| "eval_exact_match": 0.015, |
| "eval_f1": 0.353, |
| "eval_gen_len": 13.4669, |
| "eval_loss": 1.719835877418518, |
| "eval_precisions_1": 45.6401, |
| "eval_precisions_2": 18.4137, |
| "eval_precisions_3": 9.9109, |
| "eval_precisions_4": 5.5754, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3576, |
| "eval_rouge2": 0.1737, |
| "eval_rougeL": 0.3447, |
| "eval_rougeLsum": 0.3448, |
| "eval_runtime": 467.8501, |
| "eval_samples_per_second": 4.711, |
| "eval_steps_per_second": 1.178, |
| "eval_sys_len": 17535, |
| "eval_totals_1": 17535, |
| "eval_totals_2": 15331, |
| "eval_totals_3": 13127, |
| "eval_totals_4": 10923, |
| "step": 655 |
| }, |
| { |
| "epoch": 18.98, |
| "learning_rate": 0.0001, |
| "loss": 1.6543, |
| "step": 691 |
| }, |
| { |
| "epoch": 18.98, |
| "eval_bleu": 11.8679, |
| "eval_bp": 0.824, |
| "eval_counts_1": 8031, |
| "eval_counts_2": 2817, |
| "eval_counts_3": 1294, |
| "eval_counts_4": 612, |
| "eval_exact_match": 0.015, |
| "eval_f1": 0.351, |
| "eval_gen_len": 13.8648, |
| "eval_loss": 1.715085506439209, |
| "eval_precisions_1": 45.1104, |
| "eval_precisions_2": 18.0588, |
| "eval_precisions_3": 9.6603, |
| "eval_precisions_4": 5.4687, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3567, |
| "eval_rouge2": 0.1734, |
| "eval_rougeL": 0.3435, |
| "eval_rougeLsum": 0.3431, |
| "eval_runtime": 748.2265, |
| "eval_samples_per_second": 2.946, |
| "eval_steps_per_second": 0.736, |
| "eval_sys_len": 17803, |
| "eval_totals_1": 17803, |
| "eval_totals_2": 15599, |
| "eval_totals_3": 13395, |
| "eval_totals_4": 11191, |
| "step": 691 |
| }, |
| { |
| "epoch": 19.78, |
| "learning_rate": 0.0001, |
| "loss": 1.5702, |
| "step": 720 |
| }, |
| { |
| "epoch": 19.78, |
| "eval_bleu": 12.1229, |
| "eval_bp": 0.7945, |
| "eval_counts_1": 7996, |
| "eval_counts_2": 2850, |
| "eval_counts_3": 1330, |
| "eval_counts_4": 639, |
| "eval_exact_match": 0.0168, |
| "eval_f1": 0.3569, |
| "eval_gen_len": 13.3367, |
| "eval_loss": 1.7079344987869263, |
| "eval_precisions_1": 46.2865, |
| "eval_precisions_2": 18.9105, |
| "eval_precisions_3": 10.3365, |
| "eval_precisions_4": 5.9927, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3618, |
| "eval_rouge2": 0.1769, |
| "eval_rougeL": 0.3485, |
| "eval_rougeLsum": 0.348, |
| "eval_runtime": 880.8231, |
| "eval_samples_per_second": 2.502, |
| "eval_steps_per_second": 0.626, |
| "eval_sys_len": 17275, |
| "eval_totals_1": 17275, |
| "eval_totals_2": 15071, |
| "eval_totals_3": 12867, |
| "eval_totals_4": 10663, |
| "step": 720 |
| }, |
| { |
| "epoch": 19.78, |
| "step": 720, |
| "total_flos": 2.52283256045568e+17, |
| "train_loss": 2.1398978657192655, |
| "train_runtime": 23260.8504, |
| "train_samples_per_second": 8.008, |
| "train_steps_per_second": 0.031 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 720, |
| "num_train_epochs": 20, |
| "save_steps": 500, |
| "total_flos": 2.52283256045568e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|