| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 19.926991625509984, |
| "eval_steps": 500, |
| "global_step": 2900, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "learning_rate": 0.0001, |
| "loss": 6.5987, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_bleu": 0.1374, |
| "eval_bp": 1.0, |
| "eval_counts_1": 3804, |
| "eval_counts_2": 134, |
| "eval_counts_3": 2, |
| "eval_counts_4": 0, |
| "eval_exact_match": 0.0, |
| "eval_f1": 0.0814, |
| "eval_gen_len": 16.2899, |
| "eval_loss": 5.069606304168701, |
| "eval_precisions_1": 16.6019, |
| "eval_precisions_2": 0.6471, |
| "eval_precisions_3": 0.0108, |
| "eval_precisions_4": 0.0031, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.0783, |
| "eval_rouge2": 0.007, |
| "eval_rougeL": 0.0769, |
| "eval_rougeLsum": 0.0768, |
| "eval_runtime": 2008.1612, |
| "eval_samples_per_second": 1.098, |
| "eval_steps_per_second": 0.549, |
| "eval_sys_len": 22913, |
| "eval_totals_1": 22913, |
| "eval_totals_2": 20709, |
| "eval_totals_3": 18505, |
| "eval_totals_4": 16301, |
| "step": 145 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 0.0001, |
| "loss": 4.7443, |
| "step": 291 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_bleu": 0.303, |
| "eval_bp": 0.7996, |
| "eval_counts_1": 4022, |
| "eval_counts_2": 188, |
| "eval_counts_3": 20, |
| "eval_counts_4": 0, |
| "eval_exact_match": 0.0, |
| "eval_f1": 0.1073, |
| "eval_gen_len": 12.9038, |
| "eval_loss": 4.227029323577881, |
| "eval_precisions_1": 23.1602, |
| "eval_precisions_2": 1.2399, |
| "eval_precisions_3": 0.1543, |
| "eval_precisions_4": 0.0046, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.1028, |
| "eval_rouge2": 0.012, |
| "eval_rougeL": 0.0991, |
| "eval_rougeLsum": 0.099, |
| "eval_runtime": 2942.0368, |
| "eval_samples_per_second": 0.749, |
| "eval_steps_per_second": 0.375, |
| "eval_sys_len": 17366, |
| "eval_totals_1": 17366, |
| "eval_totals_2": 15162, |
| "eval_totals_3": 12958, |
| "eval_totals_4": 10754, |
| "step": 291 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 0.0001, |
| "loss": 4.1412, |
| "step": 436 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_bleu": 0.4488, |
| "eval_bp": 0.7507, |
| "eval_counts_1": 3723, |
| "eval_counts_2": 187, |
| "eval_counts_3": 26, |
| "eval_counts_4": 2, |
| "eval_exact_match": 0.0, |
| "eval_f1": 0.0938, |
| "eval_gen_len": 12.4769, |
| "eval_loss": 3.7837560176849365, |
| "eval_precisions_1": 22.5431, |
| "eval_precisions_2": 1.3067, |
| "eval_precisions_3": 0.2148, |
| "eval_precisions_4": 0.0202, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.0899, |
| "eval_rouge2": 0.0124, |
| "eval_rougeL": 0.0886, |
| "eval_rougeLsum": 0.0884, |
| "eval_runtime": 2963.118, |
| "eval_samples_per_second": 0.744, |
| "eval_steps_per_second": 0.372, |
| "eval_sys_len": 16515, |
| "eval_totals_1": 16515, |
| "eval_totals_2": 14311, |
| "eval_totals_3": 12107, |
| "eval_totals_4": 9903, |
| "step": 436 |
| }, |
| { |
| "epoch": 4.0, |
| "learning_rate": 0.0001, |
| "loss": 3.6791, |
| "step": 582 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_bleu": 1.6623, |
| "eval_bp": 1.0, |
| "eval_counts_1": 4576, |
| "eval_counts_2": 549, |
| "eval_counts_3": 134, |
| "eval_counts_4": 26, |
| "eval_exact_match": 0.0, |
| "eval_f1": 0.1323, |
| "eval_gen_len": 14.5676, |
| "eval_loss": 3.4246089458465576, |
| "eval_precisions_1": 20.9227, |
| "eval_precisions_2": 2.7915, |
| "eval_precisions_3": 0.7673, |
| "eval_precisions_4": 0.1704, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.1259, |
| "eval_rouge2": 0.0296, |
| "eval_rougeL": 0.1204, |
| "eval_rougeLsum": 0.1201, |
| "eval_runtime": 3118.2455, |
| "eval_samples_per_second": 0.707, |
| "eval_steps_per_second": 0.353, |
| "eval_sys_len": 21871, |
| "eval_totals_1": 21871, |
| "eval_totals_2": 19667, |
| "eval_totals_3": 17463, |
| "eval_totals_4": 15259, |
| "step": 582 |
| }, |
| { |
| "epoch": 5.0, |
| "learning_rate": 0.0001, |
| "loss": 3.3523, |
| "step": 727 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_bleu": 2.4472, |
| "eval_bp": 0.9085, |
| "eval_counts_1": 4900, |
| "eval_counts_2": 796, |
| "eval_counts_3": 210, |
| "eval_counts_4": 41, |
| "eval_exact_match": 0.0005, |
| "eval_f1": 0.1585, |
| "eval_gen_len": 14.3943, |
| "eval_loss": 3.172255277633667, |
| "eval_precisions_1": 25.2721, |
| "eval_precisions_2": 4.6319, |
| "eval_precisions_3": 1.4018, |
| "eval_precisions_4": 0.3209, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.1542, |
| "eval_rouge2": 0.0449, |
| "eval_rougeL": 0.1486, |
| "eval_rougeLsum": 0.1484, |
| "eval_runtime": 3087.9672, |
| "eval_samples_per_second": 0.714, |
| "eval_steps_per_second": 0.357, |
| "eval_sys_len": 19389, |
| "eval_totals_1": 19389, |
| "eval_totals_2": 17185, |
| "eval_totals_3": 14981, |
| "eval_totals_4": 12777, |
| "step": 727 |
| }, |
| { |
| "epoch": 6.0, |
| "learning_rate": 0.0001, |
| "loss": 3.0161, |
| "step": 873 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_bleu": 4.1987, |
| "eval_bp": 0.8907, |
| "eval_counts_1": 5633, |
| "eval_counts_2": 1182, |
| "eval_counts_3": 390, |
| "eval_counts_4": 111, |
| "eval_exact_match": 0.0045, |
| "eval_f1": 0.2074, |
| "eval_gen_len": 14.5789, |
| "eval_loss": 2.926840305328369, |
| "eval_precisions_1": 29.5773, |
| "eval_precisions_2": 7.0186, |
| "eval_precisions_3": 2.6645, |
| "eval_precisions_4": 0.8928, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.204, |
| "eval_rouge2": 0.069, |
| "eval_rougeL": 0.196, |
| "eval_rougeLsum": 0.1961, |
| "eval_runtime": 3093.3528, |
| "eval_samples_per_second": 0.712, |
| "eval_steps_per_second": 0.356, |
| "eval_sys_len": 19045, |
| "eval_totals_1": 19045, |
| "eval_totals_2": 16841, |
| "eval_totals_3": 14637, |
| "eval_totals_4": 12433, |
| "step": 873 |
| }, |
| { |
| "epoch": 7.0, |
| "learning_rate": 0.0001, |
| "loss": 2.7639, |
| "step": 1018 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_bleu": 5.3362, |
| "eval_bp": 0.8306, |
| "eval_counts_1": 6100, |
| "eval_counts_2": 1461, |
| "eval_counts_3": 499, |
| "eval_counts_4": 165, |
| "eval_exact_match": 0.0073, |
| "eval_f1": 0.2431, |
| "eval_gen_len": 13.8553, |
| "eval_loss": 2.760089635848999, |
| "eval_precisions_1": 34.0326, |
| "eval_precisions_2": 9.2939, |
| "eval_precisions_3": 3.6919, |
| "eval_precisions_4": 1.4586, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.2409, |
| "eval_rouge2": 0.0885, |
| "eval_rougeL": 0.2332, |
| "eval_rougeLsum": 0.2331, |
| "eval_runtime": 2991.0063, |
| "eval_samples_per_second": 0.737, |
| "eval_steps_per_second": 0.368, |
| "eval_sys_len": 17924, |
| "eval_totals_1": 17924, |
| "eval_totals_2": 15720, |
| "eval_totals_3": 13516, |
| "eval_totals_4": 11312, |
| "step": 1018 |
| }, |
| { |
| "epoch": 8.0, |
| "learning_rate": 0.0001, |
| "loss": 2.5036, |
| "step": 1164 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_bleu": 7.0633, |
| "eval_bp": 0.9483, |
| "eval_counts_1": 6765, |
| "eval_counts_2": 1845, |
| "eval_counts_3": 701, |
| "eval_counts_4": 273, |
| "eval_exact_match": 0.0059, |
| "eval_f1": 0.2689, |
| "eval_gen_len": 15.7232, |
| "eval_loss": 2.572913885116577, |
| "eval_precisions_1": 33.525, |
| "eval_precisions_2": 10.2643, |
| "eval_precisions_3": 4.4449, |
| "eval_precisions_4": 2.0122, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.2682, |
| "eval_rouge2": 0.1079, |
| "eval_rougeL": 0.2589, |
| "eval_rougeLsum": 0.259, |
| "eval_runtime": 3343.9439, |
| "eval_samples_per_second": 0.659, |
| "eval_steps_per_second": 0.33, |
| "eval_sys_len": 20179, |
| "eval_totals_1": 20179, |
| "eval_totals_2": 17975, |
| "eval_totals_3": 15771, |
| "eval_totals_4": 13567, |
| "step": 1164 |
| }, |
| { |
| "epoch": 8.99, |
| "learning_rate": 0.0001, |
| "loss": 2.307, |
| "step": 1309 |
| }, |
| { |
| "epoch": 8.99, |
| "eval_bleu": 8.1681, |
| "eval_bp": 0.8911, |
| "eval_counts_1": 7018, |
| "eval_counts_2": 2047, |
| "eval_counts_3": 826, |
| "eval_counts_4": 348, |
| "eval_exact_match": 0.0095, |
| "eval_f1": 0.2907, |
| "eval_gen_len": 14.8076, |
| "eval_loss": 2.4636850357055664, |
| "eval_precisions_1": 36.8322, |
| "eval_precisions_2": 12.1484, |
| "eval_precisions_3": 5.6398, |
| "eval_precisions_4": 2.797, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.2907, |
| "eval_rouge2": 0.1218, |
| "eval_rougeL": 0.2799, |
| "eval_rougeLsum": 0.2798, |
| "eval_runtime": 3082.8011, |
| "eval_samples_per_second": 0.715, |
| "eval_steps_per_second": 0.357, |
| "eval_sys_len": 19054, |
| "eval_totals_1": 19054, |
| "eval_totals_2": 16850, |
| "eval_totals_3": 14646, |
| "eval_totals_4": 12442, |
| "step": 1309 |
| }, |
| { |
| "epoch": 10.0, |
| "learning_rate": 0.0001, |
| "loss": 2.1012, |
| "step": 1455 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_bleu": 8.6921, |
| "eval_bp": 0.8604, |
| "eval_counts_1": 7147, |
| "eval_counts_2": 2127, |
| "eval_counts_3": 883, |
| "eval_counts_4": 389, |
| "eval_exact_match": 0.0118, |
| "eval_f1": 0.3008, |
| "eval_gen_len": 14.2736, |
| "eval_loss": 2.361370325088501, |
| "eval_precisions_1": 38.6889, |
| "eval_precisions_2": 13.0739, |
| "eval_precisions_3": 6.278, |
| "eval_precisions_4": 3.2797, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3003, |
| "eval_rouge2": 0.1275, |
| "eval_rougeL": 0.289, |
| "eval_rougeLsum": 0.2888, |
| "eval_runtime": 2980.6044, |
| "eval_samples_per_second": 0.739, |
| "eval_steps_per_second": 0.37, |
| "eval_sys_len": 18473, |
| "eval_totals_1": 18473, |
| "eval_totals_2": 16269, |
| "eval_totals_3": 14065, |
| "eval_totals_4": 11861, |
| "step": 1455 |
| }, |
| { |
| "epoch": 10.99, |
| "learning_rate": 0.0001, |
| "loss": 1.9538, |
| "step": 1600 |
| }, |
| { |
| "epoch": 10.99, |
| "eval_bleu": 9.67, |
| "eval_bp": 0.8632, |
| "eval_counts_1": 7481, |
| "eval_counts_2": 2339, |
| "eval_counts_3": 997, |
| "eval_counts_4": 459, |
| "eval_exact_match": 0.0127, |
| "eval_f1": 0.3167, |
| "eval_gen_len": 14.3757, |
| "eval_loss": 2.297987461090088, |
| "eval_precisions_1": 40.3854, |
| "eval_precisions_2": 14.3321, |
| "eval_precisions_3": 7.0629, |
| "eval_precisions_4": 3.8533, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3192, |
| "eval_rouge2": 0.1423, |
| "eval_rougeL": 0.3064, |
| "eval_rougeLsum": 0.3068, |
| "eval_runtime": 1745.8738, |
| "eval_samples_per_second": 1.262, |
| "eval_steps_per_second": 0.631, |
| "eval_sys_len": 18524, |
| "eval_totals_1": 18524, |
| "eval_totals_2": 16320, |
| "eval_totals_3": 14116, |
| "eval_totals_4": 11912, |
| "step": 1600 |
| }, |
| { |
| "epoch": 12.0, |
| "learning_rate": 0.0001, |
| "loss": 1.7909, |
| "step": 1746 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_bleu": 10.724, |
| "eval_bp": 0.8804, |
| "eval_counts_1": 7675, |
| "eval_counts_2": 2546, |
| "eval_counts_3": 1144, |
| "eval_counts_4": 546, |
| "eval_exact_match": 0.015, |
| "eval_f1": 0.3279, |
| "eval_gen_len": 14.583, |
| "eval_loss": 2.2389414310455322, |
| "eval_precisions_1": 40.7183, |
| "eval_precisions_2": 15.2959, |
| "eval_precisions_3": 7.9219, |
| "eval_precisions_4": 4.4619, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3299, |
| "eval_rouge2": 0.1528, |
| "eval_rougeL": 0.3174, |
| "eval_rougeLsum": 0.3175, |
| "eval_runtime": 1768.3367, |
| "eval_samples_per_second": 1.246, |
| "eval_steps_per_second": 0.623, |
| "eval_sys_len": 18849, |
| "eval_totals_1": 18849, |
| "eval_totals_2": 16645, |
| "eval_totals_3": 14441, |
| "eval_totals_4": 12237, |
| "step": 1746 |
| }, |
| { |
| "epoch": 12.99, |
| "learning_rate": 0.0001, |
| "loss": 1.6691, |
| "step": 1891 |
| }, |
| { |
| "epoch": 12.99, |
| "eval_bleu": 11.1241, |
| "eval_bp": 0.8695, |
| "eval_counts_1": 7858, |
| "eval_counts_2": 2635, |
| "eval_counts_3": 1179, |
| "eval_counts_4": 576, |
| "eval_exact_match": 0.0163, |
| "eval_f1": 0.3395, |
| "eval_gen_len": 14.3848, |
| "eval_loss": 2.181286096572876, |
| "eval_precisions_1": 42.1499, |
| "eval_precisions_2": 16.029, |
| "eval_precisions_3": 8.2824, |
| "eval_precisions_4": 4.7876, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.344, |
| "eval_rouge2": 0.1626, |
| "eval_rougeL": 0.33, |
| "eval_rougeLsum": 0.33, |
| "eval_runtime": 1475.7204, |
| "eval_samples_per_second": 1.494, |
| "eval_steps_per_second": 0.747, |
| "eval_sys_len": 18643, |
| "eval_totals_1": 18643, |
| "eval_totals_2": 16439, |
| "eval_totals_3": 14235, |
| "eval_totals_4": 12031, |
| "step": 1891 |
| }, |
| { |
| "epoch": 14.0, |
| "learning_rate": 0.0001, |
| "loss": 1.5361, |
| "step": 2037 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_bleu": 11.5803, |
| "eval_bp": 0.8754, |
| "eval_counts_1": 8016, |
| "eval_counts_2": 2729, |
| "eval_counts_3": 1249, |
| "eval_counts_4": 606, |
| "eval_exact_match": 0.0163, |
| "eval_f1": 0.3462, |
| "eval_gen_len": 14.564, |
| "eval_loss": 2.15460205078125, |
| "eval_precisions_1": 42.7429, |
| "eval_precisions_2": 16.4894, |
| "eval_precisions_3": 8.7063, |
| "eval_precisions_4": 4.9909, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3494, |
| "eval_rouge2": 0.1664, |
| "eval_rougeL": 0.3349, |
| "eval_rougeLsum": 0.3351, |
| "eval_runtime": 2521.9472, |
| "eval_samples_per_second": 0.874, |
| "eval_steps_per_second": 0.437, |
| "eval_sys_len": 18754, |
| "eval_totals_1": 18754, |
| "eval_totals_2": 16550, |
| "eval_totals_3": 14346, |
| "eval_totals_4": 12142, |
| "step": 2037 |
| }, |
| { |
| "epoch": 14.99, |
| "learning_rate": 0.0001, |
| "loss": 1.4365, |
| "step": 2182 |
| }, |
| { |
| "epoch": 14.99, |
| "eval_bleu": 12.1055, |
| "eval_bp": 0.856, |
| "eval_counts_1": 8112, |
| "eval_counts_2": 2839, |
| "eval_counts_3": 1316, |
| "eval_counts_4": 647, |
| "eval_exact_match": 0.02, |
| "eval_f1": 0.3538, |
| "eval_gen_len": 14.1656, |
| "eval_loss": 2.1357789039611816, |
| "eval_precisions_1": 44.1109, |
| "eval_precisions_2": 17.5398, |
| "eval_precisions_3": 9.4121, |
| "eval_precisions_4": 5.4933, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3581, |
| "eval_rouge2": 0.1761, |
| "eval_rougeL": 0.3448, |
| "eval_rougeLsum": 0.3448, |
| "eval_runtime": 2133.8974, |
| "eval_samples_per_second": 1.033, |
| "eval_steps_per_second": 0.516, |
| "eval_sys_len": 18390, |
| "eval_totals_1": 18390, |
| "eval_totals_2": 16186, |
| "eval_totals_3": 13982, |
| "eval_totals_4": 11778, |
| "step": 2182 |
| }, |
| { |
| "epoch": 16.0, |
| "learning_rate": 0.0001, |
| "loss": 1.3263, |
| "step": 2328 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_bleu": 12.9765, |
| "eval_bp": 0.8827, |
| "eval_counts_1": 8381, |
| "eval_counts_2": 2990, |
| "eval_counts_3": 1430, |
| "eval_counts_4": 731, |
| "eval_exact_match": 0.0209, |
| "eval_f1": 0.363, |
| "eval_gen_len": 14.5445, |
| "eval_loss": 2.1189985275268555, |
| "eval_precisions_1": 44.3627, |
| "eval_precisions_2": 17.9171, |
| "eval_precisions_3": 9.873, |
| "eval_precisions_4": 5.9528, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3681, |
| "eval_rouge2": 0.1831, |
| "eval_rougeL": 0.3532, |
| "eval_rougeLsum": 0.3534, |
| "eval_runtime": 1849.5796, |
| "eval_samples_per_second": 1.192, |
| "eval_steps_per_second": 0.596, |
| "eval_sys_len": 18892, |
| "eval_totals_1": 18892, |
| "eval_totals_2": 16688, |
| "eval_totals_3": 14484, |
| "eval_totals_4": 12280, |
| "step": 2328 |
| }, |
| { |
| "epoch": 17.0, |
| "learning_rate": 0.0001, |
| "loss": 1.2329, |
| "step": 2474 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_bleu": 13.5903, |
| "eval_bp": 0.8678, |
| "eval_counts_1": 8449, |
| "eval_counts_2": 3101, |
| "eval_counts_3": 1520, |
| "eval_counts_4": 786, |
| "eval_exact_match": 0.0227, |
| "eval_f1": 0.3692, |
| "eval_gen_len": 14.1779, |
| "eval_loss": 2.1201868057250977, |
| "eval_precisions_1": 45.3954, |
| "eval_precisions_2": 18.8993, |
| "eval_precisions_3": 10.7012, |
| "eval_precisions_4": 6.55, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3743, |
| "eval_rouge2": 0.1901, |
| "eval_rougeL": 0.3603, |
| "eval_rougeLsum": 0.3603, |
| "eval_runtime": 1363.814, |
| "eval_samples_per_second": 1.616, |
| "eval_steps_per_second": 0.808, |
| "eval_sys_len": 18612, |
| "eval_totals_1": 18612, |
| "eval_totals_2": 16408, |
| "eval_totals_3": 14204, |
| "eval_totals_4": 12000, |
| "step": 2474 |
| }, |
| { |
| "epoch": 18.0, |
| "learning_rate": 0.0001, |
| "loss": 1.1557, |
| "step": 2619 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_bleu": 13.8388, |
| "eval_bp": 0.8325, |
| "eval_counts_1": 8406, |
| "eval_counts_2": 3154, |
| "eval_counts_3": 1558, |
| "eval_counts_4": 804, |
| "eval_exact_match": 0.0277, |
| "eval_f1": 0.371, |
| "eval_gen_len": 13.677, |
| "eval_loss": 2.1282455921173096, |
| "eval_precisions_1": 46.8092, |
| "eval_precisions_2": 20.0203, |
| "eval_precisions_3": 11.4982, |
| "eval_precisions_4": 7.0862, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3761, |
| "eval_rouge2": 0.194, |
| "eval_rougeL": 0.3633, |
| "eval_rougeLsum": 0.3636, |
| "eval_runtime": 1323.8829, |
| "eval_samples_per_second": 1.665, |
| "eval_steps_per_second": 0.832, |
| "eval_sys_len": 17958, |
| "eval_totals_1": 17958, |
| "eval_totals_2": 15754, |
| "eval_totals_3": 13550, |
| "eval_totals_4": 11346, |
| "step": 2619 |
| }, |
| { |
| "epoch": 19.0, |
| "learning_rate": 0.0001, |
| "loss": 1.0658, |
| "step": 2765 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_bleu": 14.2084, |
| "eval_bp": 0.886, |
| "eval_counts_1": 8614, |
| "eval_counts_2": 3241, |
| "eval_counts_3": 1610, |
| "eval_counts_4": 839, |
| "eval_exact_match": 0.0272, |
| "eval_f1": 0.3749, |
| "eval_gen_len": 14.3816, |
| "eval_loss": 2.123244524002075, |
| "eval_precisions_1": 45.4445, |
| "eval_precisions_2": 19.3481, |
| "eval_precisions_3": 11.0676, |
| "eval_precisions_4": 6.7974, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3803, |
| "eval_rouge2": 0.196, |
| "eval_rougeL": 0.3654, |
| "eval_rougeLsum": 0.3656, |
| "eval_runtime": 1378.8855, |
| "eval_samples_per_second": 1.598, |
| "eval_steps_per_second": 0.799, |
| "eval_sys_len": 18955, |
| "eval_totals_1": 18955, |
| "eval_totals_2": 16751, |
| "eval_totals_3": 14547, |
| "eval_totals_4": 12343, |
| "step": 2765 |
| }, |
| { |
| "epoch": 19.93, |
| "learning_rate": 0.0001, |
| "loss": 0.9944, |
| "step": 2900 |
| }, |
| { |
| "epoch": 19.93, |
| "eval_bleu": 14.3883, |
| "eval_bp": 0.8806, |
| "eval_counts_1": 8658, |
| "eval_counts_2": 3273, |
| "eval_counts_3": 1625, |
| "eval_counts_4": 859, |
| "eval_exact_match": 0.0268, |
| "eval_f1": 0.3775, |
| "eval_gen_len": 14.2881, |
| "eval_loss": 2.1203458309173584, |
| "eval_precisions_1": 45.9237, |
| "eval_precisions_2": 19.6588, |
| "eval_precisions_3": 11.2496, |
| "eval_precisions_4": 7.0174, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3833, |
| "eval_rouge2": 0.1977, |
| "eval_rougeL": 0.369, |
| "eval_rougeLsum": 0.3691, |
| "eval_runtime": 1364.695, |
| "eval_samples_per_second": 1.615, |
| "eval_steps_per_second": 0.808, |
| "eval_sys_len": 18853, |
| "eval_totals_1": 18853, |
| "eval_totals_2": 16649, |
| "eval_totals_3": 14445, |
| "eval_totals_4": 12241, |
| "step": 2900 |
| }, |
| { |
| "epoch": 19.93, |
| "step": 2900, |
| "total_flos": 8.55557888016384e+17, |
| "train_loss": 2.472949571280644, |
| "train_runtime": 103540.1577, |
| "train_samples_per_second": 1.799, |
| "train_steps_per_second": 0.028 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 2900, |
| "num_train_epochs": 20, |
| "save_steps": 500, |
| "total_flos": 8.55557888016384e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|