| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 19.776824034334766, |
| "eval_steps": 500, |
| "global_step": 1440, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.99, |
| "learning_rate": 0.0001, |
| "loss": 3.458, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.99, |
| "eval_bleu": 4.6384, |
| "eval_bp": 0.6642, |
| "eval_counts_1": 5618, |
| "eval_counts_2": 1383, |
| "eval_counts_3": 463, |
| "eval_counts_4": 116, |
| "eval_exact_match": 0.0005, |
| "eval_f1": 0.2226, |
| "eval_gen_len": 11.3013, |
| "eval_loss": 2.3696436882019043, |
| "eval_precisions_1": 37.2546, |
| "eval_precisions_2": 10.7409, |
| "eval_precisions_3": 4.3385, |
| "eval_precisions_4": 1.3699, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.2266, |
| "eval_rouge2": 0.0841, |
| "eval_rougeL": 0.2197, |
| "eval_rougeLsum": 0.2196, |
| "eval_runtime": 433.2426, |
| "eval_samples_per_second": 5.087, |
| "eval_steps_per_second": 1.272, |
| "eval_sys_len": 15080, |
| "eval_totals_1": 15080, |
| "eval_totals_2": 12876, |
| "eval_totals_3": 10672, |
| "eval_totals_4": 8468, |
| "step": 72 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 0.0001, |
| "loss": 2.7548, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.99, |
| "eval_bleu": 6.9183, |
| "eval_bp": 0.728, |
| "eval_counts_1": 6361, |
| "eval_counts_2": 1807, |
| "eval_counts_3": 700, |
| "eval_counts_4": 254, |
| "eval_exact_match": 0.0036, |
| "eval_f1": 0.2635, |
| "eval_gen_len": 12.206, |
| "eval_loss": 2.1310036182403564, |
| "eval_precisions_1": 39.4358, |
| "eval_precisions_2": 12.9757, |
| "eval_precisions_3": 5.9717, |
| "eval_precisions_4": 2.6686, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.2706, |
| "eval_rouge2": 0.1122, |
| "eval_rougeL": 0.2596, |
| "eval_rougeLsum": 0.2596, |
| "eval_runtime": 445.5518, |
| "eval_samples_per_second": 4.947, |
| "eval_steps_per_second": 1.237, |
| "eval_sys_len": 16130, |
| "eval_totals_1": 16130, |
| "eval_totals_2": 13926, |
| "eval_totals_3": 11722, |
| "eval_totals_4": 9518, |
| "step": 145 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 0.0001, |
| "loss": 2.5084, |
| "step": 218 |
| }, |
| { |
| "epoch": 2.99, |
| "eval_bleu": 7.616, |
| "eval_bp": 0.7714, |
| "eval_counts_1": 6758, |
| "eval_counts_2": 2001, |
| "eval_counts_3": 780, |
| "eval_counts_4": 285, |
| "eval_exact_match": 0.0045, |
| "eval_f1": 0.2832, |
| "eval_gen_len": 12.8825, |
| "eval_loss": 2.0244088172912598, |
| "eval_precisions_1": 40.0569, |
| "eval_precisions_2": 13.6429, |
| "eval_precisions_3": 6.2585, |
| "eval_precisions_4": 2.778, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.2888, |
| "eval_rouge2": 0.1258, |
| "eval_rougeL": 0.2766, |
| "eval_rougeLsum": 0.2767, |
| "eval_runtime": 693.3228, |
| "eval_samples_per_second": 3.179, |
| "eval_steps_per_second": 0.795, |
| "eval_sys_len": 16871, |
| "eval_totals_1": 16871, |
| "eval_totals_2": 14667, |
| "eval_totals_3": 12463, |
| "eval_totals_4": 10259, |
| "step": 218 |
| }, |
| { |
| "epoch": 4.0, |
| "learning_rate": 0.0001, |
| "loss": 2.3562, |
| "step": 291 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_bleu": 8.6611, |
| "eval_bp": 0.7671, |
| "eval_counts_1": 7011, |
| "eval_counts_2": 2193, |
| "eval_counts_3": 908, |
| "eval_counts_4": 360, |
| "eval_exact_match": 0.0077, |
| "eval_f1": 0.2978, |
| "eval_gen_len": 12.9142, |
| "eval_loss": 1.950147032737732, |
| "eval_precisions_1": 41.7421, |
| "eval_precisions_2": 15.0288, |
| "eval_precisions_3": 7.3297, |
| "eval_precisions_4": 3.535, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.303, |
| "eval_rouge2": 0.1375, |
| "eval_rougeL": 0.2892, |
| "eval_rougeLsum": 0.2894, |
| "eval_runtime": 807.954, |
| "eval_samples_per_second": 2.728, |
| "eval_steps_per_second": 0.682, |
| "eval_sys_len": 16796, |
| "eval_totals_1": 16796, |
| "eval_totals_2": 14592, |
| "eval_totals_3": 12388, |
| "eval_totals_4": 10184, |
| "step": 291 |
| }, |
| { |
| "epoch": 5.0, |
| "learning_rate": 0.0001, |
| "loss": 2.2383, |
| "step": 364 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_bleu": 9.6159, |
| "eval_bp": 0.762, |
| "eval_counts_1": 7245, |
| "eval_counts_2": 2386, |
| "eval_counts_3": 1015, |
| "eval_counts_4": 435, |
| "eval_exact_match": 0.0113, |
| "eval_f1": 0.3155, |
| "eval_gen_len": 12.8417, |
| "eval_loss": 1.8873705863952637, |
| "eval_precisions_1": 43.3625, |
| "eval_precisions_2": 16.4506, |
| "eval_precisions_3": 8.252, |
| "eval_precisions_4": 4.3086, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3198, |
| "eval_rouge2": 0.1498, |
| "eval_rougeL": 0.3077, |
| "eval_rougeLsum": 0.3079, |
| "eval_runtime": 789.9213, |
| "eval_samples_per_second": 2.79, |
| "eval_steps_per_second": 0.698, |
| "eval_sys_len": 16708, |
| "eval_totals_1": 16708, |
| "eval_totals_2": 14504, |
| "eval_totals_3": 12300, |
| "eval_totals_4": 10096, |
| "step": 364 |
| }, |
| { |
| "epoch": 5.99, |
| "learning_rate": 0.0001, |
| "loss": 2.1576, |
| "step": 436 |
| }, |
| { |
| "epoch": 5.99, |
| "eval_bleu": 9.5745, |
| "eval_bp": 0.7796, |
| "eval_counts_1": 7378, |
| "eval_counts_2": 2382, |
| "eval_counts_3": 997, |
| "eval_counts_4": 429, |
| "eval_exact_match": 0.0109, |
| "eval_f1": 0.3215, |
| "eval_gen_len": 13.2187, |
| "eval_loss": 1.859336018562317, |
| "eval_precisions_1": 43.3643, |
| "eval_precisions_2": 16.0837, |
| "eval_precisions_3": 7.9089, |
| "eval_precisions_4": 4.1242, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.326, |
| "eval_rouge2": 0.1497, |
| "eval_rougeL": 0.3132, |
| "eval_rougeLsum": 0.3132, |
| "eval_runtime": 806.6141, |
| "eval_samples_per_second": 2.732, |
| "eval_steps_per_second": 0.683, |
| "eval_sys_len": 17014, |
| "eval_totals_1": 17014, |
| "eval_totals_2": 14810, |
| "eval_totals_3": 12606, |
| "eval_totals_4": 10402, |
| "step": 436 |
| }, |
| { |
| "epoch": 6.99, |
| "learning_rate": 0.0001, |
| "loss": 2.0356, |
| "step": 509 |
| }, |
| { |
| "epoch": 6.99, |
| "eval_bleu": 10.3053, |
| "eval_bp": 0.7787, |
| "eval_counts_1": 7570, |
| "eval_counts_2": 2520, |
| "eval_counts_3": 1097, |
| "eval_counts_4": 482, |
| "eval_exact_match": 0.0123, |
| "eval_f1": 0.3339, |
| "eval_gen_len": 13.0368, |
| "eval_loss": 1.8132530450820923, |
| "eval_precisions_1": 44.532, |
| "eval_precisions_2": 17.0328, |
| "eval_precisions_3": 8.7126, |
| "eval_precisions_4": 4.6404, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3384, |
| "eval_rouge2": 0.158, |
| "eval_rougeL": 0.3258, |
| "eval_rougeLsum": 0.3257, |
| "eval_runtime": 454.2622, |
| "eval_samples_per_second": 4.852, |
| "eval_steps_per_second": 1.213, |
| "eval_sys_len": 16999, |
| "eval_totals_1": 16999, |
| "eval_totals_2": 14795, |
| "eval_totals_3": 12591, |
| "eval_totals_4": 10387, |
| "step": 509 |
| }, |
| { |
| "epoch": 7.99, |
| "learning_rate": 0.0001, |
| "loss": 1.9575, |
| "step": 582 |
| }, |
| { |
| "epoch": 7.99, |
| "eval_bleu": 10.993, |
| "eval_bp": 0.8003, |
| "eval_counts_1": 7764, |
| "eval_counts_2": 2637, |
| "eval_counts_3": 1175, |
| "eval_counts_4": 545, |
| "eval_exact_match": 0.0136, |
| "eval_f1": 0.3407, |
| "eval_gen_len": 13.4719, |
| "eval_loss": 1.7855687141418457, |
| "eval_precisions_1": 44.6746, |
| "eval_precisions_2": 17.3773, |
| "eval_precisions_3": 9.0587, |
| "eval_precisions_4": 5.0618, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.345, |
| "eval_rouge2": 0.1625, |
| "eval_rougeL": 0.3322, |
| "eval_rougeLsum": 0.3324, |
| "eval_runtime": 470.7972, |
| "eval_samples_per_second": 4.681, |
| "eval_steps_per_second": 1.17, |
| "eval_sys_len": 17379, |
| "eval_totals_1": 17379, |
| "eval_totals_2": 15175, |
| "eval_totals_3": 12971, |
| "eval_totals_4": 10767, |
| "step": 582 |
| }, |
| { |
| "epoch": 9.0, |
| "learning_rate": 0.0001, |
| "loss": 1.8889, |
| "step": 655 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_bleu": 10.9637, |
| "eval_bp": 0.7846, |
| "eval_counts_1": 7766, |
| "eval_counts_2": 2644, |
| "eval_counts_3": 1184, |
| "eval_counts_4": 532, |
| "eval_exact_match": 0.0123, |
| "eval_f1": 0.3438, |
| "eval_gen_len": 13.2164, |
| "eval_loss": 1.7666170597076416, |
| "eval_precisions_1": 45.4099, |
| "eval_precisions_2": 17.7473, |
| "eval_precisions_3": 9.3272, |
| "eval_precisions_4": 5.0715, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3487, |
| "eval_rouge2": 0.1636, |
| "eval_rougeL": 0.3348, |
| "eval_rougeLsum": 0.335, |
| "eval_runtime": 461.5052, |
| "eval_samples_per_second": 4.776, |
| "eval_steps_per_second": 1.194, |
| "eval_sys_len": 17102, |
| "eval_totals_1": 17102, |
| "eval_totals_2": 14898, |
| "eval_totals_3": 12694, |
| "eval_totals_4": 10490, |
| "step": 655 |
| }, |
| { |
| "epoch": 10.0, |
| "learning_rate": 0.0001, |
| "loss": 1.8201, |
| "step": 728 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_bleu": 11.3891, |
| "eval_bp": 0.7877, |
| "eval_counts_1": 7737, |
| "eval_counts_2": 2680, |
| "eval_counts_3": 1238, |
| "eval_counts_4": 587, |
| "eval_exact_match": 0.0163, |
| "eval_f1": 0.3406, |
| "eval_gen_len": 13.1388, |
| "eval_loss": 1.7414668798446655, |
| "eval_precisions_1": 45.0979, |
| "eval_precisions_2": 17.924, |
| "eval_precisions_3": 9.7113, |
| "eval_precisions_4": 5.5671, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3453, |
| "eval_rouge2": 0.1666, |
| "eval_rougeL": 0.3332, |
| "eval_rougeLsum": 0.3333, |
| "eval_runtime": 457.4345, |
| "eval_samples_per_second": 4.818, |
| "eval_steps_per_second": 1.205, |
| "eval_sys_len": 17156, |
| "eval_totals_1": 17156, |
| "eval_totals_2": 14952, |
| "eval_totals_3": 12748, |
| "eval_totals_4": 10544, |
| "step": 728 |
| }, |
| { |
| "epoch": 10.99, |
| "learning_rate": 0.0001, |
| "loss": 1.7882, |
| "step": 800 |
| }, |
| { |
| "epoch": 10.99, |
| "eval_bleu": 11.4047, |
| "eval_bp": 0.7995, |
| "eval_counts_1": 7859, |
| "eval_counts_2": 2722, |
| "eval_counts_3": 1241, |
| "eval_counts_4": 572, |
| "eval_exact_match": 0.0145, |
| "eval_f1": 0.3473, |
| "eval_gen_len": 13.4052, |
| "eval_loss": 1.7331299781799316, |
| "eval_precisions_1": 45.2603, |
| "eval_precisions_2": 17.9551, |
| "eval_precisions_3": 9.5786, |
| "eval_precisions_4": 5.3199, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3524, |
| "eval_rouge2": 0.1673, |
| "eval_rougeL": 0.3387, |
| "eval_rougeLsum": 0.3385, |
| "eval_runtime": 470.5412, |
| "eval_samples_per_second": 4.684, |
| "eval_steps_per_second": 1.171, |
| "eval_sys_len": 17364, |
| "eval_totals_1": 17364, |
| "eval_totals_2": 15160, |
| "eval_totals_3": 12956, |
| "eval_totals_4": 10752, |
| "step": 800 |
| }, |
| { |
| "epoch": 11.99, |
| "learning_rate": 0.0001, |
| "loss": 1.7095, |
| "step": 873 |
| }, |
| { |
| "epoch": 11.99, |
| "eval_bleu": 11.8807, |
| "eval_bp": 0.8053, |
| "eval_counts_1": 7968, |
| "eval_counts_2": 2783, |
| "eval_counts_3": 1292, |
| "eval_counts_4": 625, |
| "eval_exact_match": 0.0154, |
| "eval_f1": 0.3495, |
| "eval_gen_len": 13.4437, |
| "eval_loss": 1.7193998098373413, |
| "eval_precisions_1": 45.6175, |
| "eval_precisions_2": 18.2336, |
| "eval_precisions_3": 9.8936, |
| "eval_precisions_4": 5.7577, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3547, |
| "eval_rouge2": 0.1708, |
| "eval_rougeL": 0.3418, |
| "eval_rougeLsum": 0.3414, |
| "eval_runtime": 472.913, |
| "eval_samples_per_second": 4.66, |
| "eval_steps_per_second": 1.165, |
| "eval_sys_len": 17467, |
| "eval_totals_1": 17467, |
| "eval_totals_2": 15263, |
| "eval_totals_3": 13059, |
| "eval_totals_4": 10855, |
| "step": 873 |
| }, |
| { |
| "epoch": 12.99, |
| "learning_rate": 0.0001, |
| "loss": 1.6619, |
| "step": 946 |
| }, |
| { |
| "epoch": 12.99, |
| "eval_bleu": 11.7968, |
| "eval_bp": 0.8034, |
| "eval_counts_1": 8011, |
| "eval_counts_2": 2796, |
| "eval_counts_3": 1286, |
| "eval_counts_4": 604, |
| "eval_exact_match": 0.0154, |
| "eval_f1": 0.3526, |
| "eval_gen_len": 13.4964, |
| "eval_loss": 1.703238606452942, |
| "eval_precisions_1": 45.9531, |
| "eval_precisions_2": 18.3597, |
| "eval_precisions_3": 9.8733, |
| "eval_precisions_4": 5.5817, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3584, |
| "eval_rouge2": 0.1736, |
| "eval_rougeL": 0.3454, |
| "eval_rougeLsum": 0.3454, |
| "eval_runtime": 460.9308, |
| "eval_samples_per_second": 4.782, |
| "eval_steps_per_second": 1.195, |
| "eval_sys_len": 17433, |
| "eval_totals_1": 17433, |
| "eval_totals_2": 15229, |
| "eval_totals_3": 13025, |
| "eval_totals_4": 10821, |
| "step": 946 |
| }, |
| { |
| "epoch": 13.99, |
| "learning_rate": 0.0001, |
| "loss": 1.6103, |
| "step": 1019 |
| }, |
| { |
| "epoch": 13.99, |
| "eval_bleu": 12.235, |
| "eval_bp": 0.8163, |
| "eval_counts_1": 8154, |
| "eval_counts_2": 2891, |
| "eval_counts_3": 1347, |
| "eval_counts_4": 636, |
| "eval_exact_match": 0.015, |
| "eval_f1": 0.3602, |
| "eval_gen_len": 13.7223, |
| "eval_loss": 1.7027523517608643, |
| "eval_precisions_1": 46.1591, |
| "eval_precisions_2": 18.6987, |
| "eval_precisions_3": 10.1607, |
| "eval_precisions_4": 5.7541, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3659, |
| "eval_rouge2": 0.1795, |
| "eval_rougeL": 0.3509, |
| "eval_rougeLsum": 0.3508, |
| "eval_runtime": 461.3951, |
| "eval_samples_per_second": 4.777, |
| "eval_steps_per_second": 1.194, |
| "eval_sys_len": 17665, |
| "eval_totals_1": 17665, |
| "eval_totals_2": 15461, |
| "eval_totals_3": 13257, |
| "eval_totals_4": 11053, |
| "step": 1019 |
| }, |
| { |
| "epoch": 15.0, |
| "learning_rate": 0.0001, |
| "loss": 1.565, |
| "step": 1092 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_bleu": 12.4116, |
| "eval_bp": 0.8088, |
| "eval_counts_1": 8135, |
| "eval_counts_2": 2897, |
| "eval_counts_3": 1362, |
| "eval_counts_4": 665, |
| "eval_exact_match": 0.02, |
| "eval_f1": 0.3603, |
| "eval_gen_len": 13.6107, |
| "eval_loss": 1.6954691410064697, |
| "eval_precisions_1": 46.4062, |
| "eval_precisions_2": 18.9025, |
| "eval_precisions_3": 10.3795, |
| "eval_precisions_4": 6.0909, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3668, |
| "eval_rouge2": 0.1808, |
| "eval_rougeL": 0.3518, |
| "eval_rougeLsum": 0.3516, |
| "eval_runtime": 457.9806, |
| "eval_samples_per_second": 4.812, |
| "eval_steps_per_second": 1.203, |
| "eval_sys_len": 17530, |
| "eval_totals_1": 17530, |
| "eval_totals_2": 15326, |
| "eval_totals_3": 13122, |
| "eval_totals_4": 10918, |
| "step": 1092 |
| }, |
| { |
| "epoch": 16.0, |
| "learning_rate": 0.0001, |
| "loss": 1.522, |
| "step": 1165 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_bleu": 12.8008, |
| "eval_bp": 0.8318, |
| "eval_counts_1": 8271, |
| "eval_counts_2": 2982, |
| "eval_counts_3": 1414, |
| "eval_counts_4": 697, |
| "eval_exact_match": 0.0191, |
| "eval_f1": 0.3632, |
| "eval_gen_len": 13.9192, |
| "eval_loss": 1.6792546510696411, |
| "eval_precisions_1": 46.0883, |
| "eval_precisions_2": 18.943, |
| "eval_precisions_3": 10.4447, |
| "eval_precisions_4": 6.1496, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3695, |
| "eval_rouge2": 0.1828, |
| "eval_rougeL": 0.354, |
| "eval_rougeLsum": 0.354, |
| "eval_runtime": 476.6232, |
| "eval_samples_per_second": 4.624, |
| "eval_steps_per_second": 1.156, |
| "eval_sys_len": 17946, |
| "eval_totals_1": 17946, |
| "eval_totals_2": 15742, |
| "eval_totals_3": 13538, |
| "eval_totals_4": 11334, |
| "step": 1165 |
| }, |
| { |
| "epoch": 16.99, |
| "learning_rate": 0.0001, |
| "loss": 1.5022, |
| "step": 1237 |
| }, |
| { |
| "epoch": 16.99, |
| "eval_bleu": 12.6672, |
| "eval_bp": 0.8077, |
| "eval_counts_1": 8244, |
| "eval_counts_2": 2967, |
| "eval_counts_3": 1392, |
| "eval_counts_4": 680, |
| "eval_exact_match": 0.0191, |
| "eval_f1": 0.366, |
| "eval_gen_len": 13.6243, |
| "eval_loss": 1.684873104095459, |
| "eval_precisions_1": 47.0817, |
| "eval_precisions_2": 19.3846, |
| "eval_precisions_3": 10.6243, |
| "eval_precisions_4": 6.2397, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3728, |
| "eval_rouge2": 0.184, |
| "eval_rougeL": 0.3569, |
| "eval_rougeLsum": 0.3569, |
| "eval_runtime": 453.2, |
| "eval_samples_per_second": 4.863, |
| "eval_steps_per_second": 1.216, |
| "eval_sys_len": 17510, |
| "eval_totals_1": 17510, |
| "eval_totals_2": 15306, |
| "eval_totals_3": 13102, |
| "eval_totals_4": 10898, |
| "step": 1237 |
| }, |
| { |
| "epoch": 17.99, |
| "learning_rate": 0.0001, |
| "loss": 1.4359, |
| "step": 1310 |
| }, |
| { |
| "epoch": 17.99, |
| "eval_bleu": 13.0683, |
| "eval_bp": 0.8278, |
| "eval_counts_1": 8328, |
| "eval_counts_2": 3050, |
| "eval_counts_3": 1448, |
| "eval_counts_4": 717, |
| "eval_exact_match": 0.0181, |
| "eval_f1": 0.3671, |
| "eval_gen_len": 13.7255, |
| "eval_loss": 1.686221718788147, |
| "eval_precisions_1": 46.5954, |
| "eval_precisions_2": 19.4652, |
| "eval_precisions_3": 10.7538, |
| "eval_precisions_4": 6.3671, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3742, |
| "eval_rouge2": 0.1866, |
| "eval_rougeL": 0.3582, |
| "eval_rougeLsum": 0.3583, |
| "eval_runtime": 451.92, |
| "eval_samples_per_second": 4.877, |
| "eval_steps_per_second": 1.219, |
| "eval_sys_len": 17873, |
| "eval_totals_1": 17873, |
| "eval_totals_2": 15669, |
| "eval_totals_3": 13465, |
| "eval_totals_4": 11261, |
| "step": 1310 |
| }, |
| { |
| "epoch": 18.99, |
| "learning_rate": 0.0001, |
| "loss": 1.3994, |
| "step": 1383 |
| }, |
| { |
| "epoch": 18.99, |
| "eval_bleu": 12.8728, |
| "eval_bp": 0.8152, |
| "eval_counts_1": 8272, |
| "eval_counts_2": 2998, |
| "eval_counts_3": 1417, |
| "eval_counts_4": 704, |
| "eval_exact_match": 0.0213, |
| "eval_f1": 0.3673, |
| "eval_gen_len": 13.6956, |
| "eval_loss": 1.6775314807891846, |
| "eval_precisions_1": 46.8801, |
| "eval_precisions_2": 19.4158, |
| "eval_precisions_3": 10.7048, |
| "eval_precisions_4": 6.3809, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3739, |
| "eval_rouge2": 0.1866, |
| "eval_rougeL": 0.3583, |
| "eval_rougeLsum": 0.3581, |
| "eval_runtime": 818.5079, |
| "eval_samples_per_second": 2.693, |
| "eval_steps_per_second": 0.673, |
| "eval_sys_len": 17645, |
| "eval_totals_1": 17645, |
| "eval_totals_2": 15441, |
| "eval_totals_3": 13237, |
| "eval_totals_4": 11033, |
| "step": 1383 |
| }, |
| { |
| "epoch": 19.78, |
| "learning_rate": 0.0001, |
| "loss": 1.3609, |
| "step": 1440 |
| }, |
| { |
| "epoch": 19.78, |
| "eval_bleu": 13.1569, |
| "eval_bp": 0.8251, |
| "eval_counts_1": 8347, |
| "eval_counts_2": 3062, |
| "eval_counts_3": 1465, |
| "eval_counts_4": 723, |
| "eval_exact_match": 0.0204, |
| "eval_f1": 0.3692, |
| "eval_gen_len": 13.7328, |
| "eval_loss": 1.688394546508789, |
| "eval_precisions_1": 46.8327, |
| "eval_precisions_2": 19.6043, |
| "eval_precisions_3": 10.9206, |
| "eval_precisions_4": 6.449, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3761, |
| "eval_rouge2": 0.1886, |
| "eval_rougeL": 0.3601, |
| "eval_rougeLsum": 0.3596, |
| "eval_runtime": 834.1703, |
| "eval_samples_per_second": 2.642, |
| "eval_steps_per_second": 0.661, |
| "eval_sys_len": 17823, |
| "eval_totals_1": 17823, |
| "eval_totals_2": 15619, |
| "eval_totals_3": 13415, |
| "eval_totals_4": 11211, |
| "step": 1440 |
| }, |
| { |
| "epoch": 19.78, |
| "step": 1440, |
| "total_flos": 2.52283256045568e+17, |
| "train_loss": 1.9421327537960476, |
| "train_runtime": 22435.6962, |
| "train_samples_per_second": 8.303, |
| "train_steps_per_second": 0.064 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 1440, |
| "num_train_epochs": 20, |
| "save_steps": 500, |
| "total_flos": 2.52283256045568e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|