| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 19.789564097058193, |
| "eval_steps": 500, |
| "global_step": 1440, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.99, |
| "learning_rate": 0.0001, |
| "loss": 3.9659, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.99, |
| "eval_bleu": 10.8438, |
| "eval_bp": 0.7379, |
| "eval_counts_1": 7244, |
| "eval_counts_2": 2547, |
| "eval_counts_3": 1183, |
| "eval_counts_4": 565, |
| "eval_exact_match": 0.0136, |
| "eval_f1": 0.3139, |
| "eval_gen_len": 11.7786, |
| "eval_loss": 1.4144511222839355, |
| "eval_precisions_1": 44.4526, |
| "eval_precisions_2": 18.0741, |
| "eval_precisions_3": 9.9512, |
| "eval_precisions_4": 5.8344, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3213, |
| "eval_rouge2": 0.1608, |
| "eval_rougeL": 0.3091, |
| "eval_rougeLsum": 0.309, |
| "eval_runtime": 2106.9539, |
| "eval_samples_per_second": 1.046, |
| "eval_steps_per_second": 1.046, |
| "eval_sys_len": 16296, |
| "eval_totals_1": 16296, |
| "eval_totals_2": 14092, |
| "eval_totals_3": 11888, |
| "eval_totals_4": 9684, |
| "step": 72 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 0.0001, |
| "loss": 1.7081, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.99, |
| "eval_bleu": 13.2044, |
| "eval_bp": 0.7697, |
| "eval_counts_1": 7865, |
| "eval_counts_2": 3037, |
| "eval_counts_3": 1498, |
| "eval_counts_4": 759, |
| "eval_exact_match": 0.0181, |
| "eval_f1": 0.3481, |
| "eval_gen_len": 12.225, |
| "eval_loss": 1.263157844543457, |
| "eval_precisions_1": 46.7015, |
| "eval_precisions_2": 20.7488, |
| "eval_precisions_3": 12.0486, |
| "eval_precisions_4": 7.4201, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3577, |
| "eval_rouge2": 0.189, |
| "eval_rougeL": 0.3438, |
| "eval_rougeLsum": 0.3439, |
| "eval_runtime": 3942.8178, |
| "eval_samples_per_second": 0.559, |
| "eval_steps_per_second": 0.559, |
| "eval_sys_len": 16841, |
| "eval_totals_1": 16841, |
| "eval_totals_2": 14637, |
| "eval_totals_3": 12433, |
| "eval_totals_4": 10229, |
| "step": 145 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 0.0001, |
| "loss": 1.4856, |
| "step": 218 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_bleu": 15.6014, |
| "eval_bp": 0.8142, |
| "eval_counts_1": 8608, |
| "eval_counts_2": 3519, |
| "eval_counts_3": 1818, |
| "eval_counts_4": 969, |
| "eval_exact_match": 0.0268, |
| "eval_f1": 0.3882, |
| "eval_gen_len": 13.0027, |
| "eval_loss": 1.1974213123321533, |
| "eval_precisions_1": 48.8342, |
| "eval_precisions_2": 22.8166, |
| "eval_precisions_3": 13.7529, |
| "eval_precisions_4": 8.7971, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.3969, |
| "eval_rouge2": 0.2181, |
| "eval_rougeL": 0.381, |
| "eval_rougeLsum": 0.3812, |
| "eval_runtime": 4069.754, |
| "eval_samples_per_second": 0.542, |
| "eval_steps_per_second": 0.542, |
| "eval_sys_len": 17627, |
| "eval_totals_1": 17627, |
| "eval_totals_2": 15423, |
| "eval_totals_3": 13219, |
| "eval_totals_4": 11015, |
| "step": 218 |
| }, |
| { |
| "epoch": 4.0, |
| "learning_rate": 0.0001, |
| "loss": 1.3277, |
| "step": 291 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_bleu": 16.4313, |
| "eval_bp": 0.8052, |
| "eval_counts_1": 9018, |
| "eval_counts_2": 3702, |
| "eval_counts_3": 1907, |
| "eval_counts_4": 1029, |
| "eval_exact_match": 0.0313, |
| "eval_f1": 0.4156, |
| "eval_gen_len": 12.8716, |
| "eval_loss": 1.1393847465515137, |
| "eval_precisions_1": 51.6347, |
| "eval_precisions_2": 24.2579, |
| "eval_precisions_3": 14.6052, |
| "eval_precisions_4": 9.4812, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.424, |
| "eval_rouge2": 0.2321, |
| "eval_rougeL": 0.4087, |
| "eval_rougeLsum": 0.4085, |
| "eval_runtime": 4037.7601, |
| "eval_samples_per_second": 0.546, |
| "eval_steps_per_second": 0.546, |
| "eval_sys_len": 17465, |
| "eval_totals_1": 17465, |
| "eval_totals_2": 15261, |
| "eval_totals_3": 13057, |
| "eval_totals_4": 10853, |
| "step": 291 |
| }, |
| { |
| "epoch": 4.99, |
| "learning_rate": 0.0001, |
| "loss": 1.2314, |
| "step": 363 |
| }, |
| { |
| "epoch": 4.99, |
| "eval_bleu": 17.0718, |
| "eval_bp": 0.8235, |
| "eval_counts_1": 9240, |
| "eval_counts_2": 3869, |
| "eval_counts_3": 1994, |
| "eval_counts_4": 1076, |
| "eval_exact_match": 0.0363, |
| "eval_f1": 0.4256, |
| "eval_gen_len": 13.2137, |
| "eval_loss": 1.1193382740020752, |
| "eval_precisions_1": 51.9276, |
| "eval_precisions_2": 24.8172, |
| "eval_precisions_3": 14.8962, |
| "eval_precisions_4": 9.6226, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.4336, |
| "eval_rouge2": 0.2413, |
| "eval_rougeL": 0.4183, |
| "eval_rougeLsum": 0.418, |
| "eval_runtime": 4116.6581, |
| "eval_samples_per_second": 0.535, |
| "eval_steps_per_second": 0.535, |
| "eval_sys_len": 17794, |
| "eval_totals_1": 17794, |
| "eval_totals_2": 15590, |
| "eval_totals_3": 13386, |
| "eval_totals_4": 11182, |
| "step": 363 |
| }, |
| { |
| "epoch": 5.99, |
| "learning_rate": 0.0001, |
| "loss": 1.1264, |
| "step": 436 |
| }, |
| { |
| "epoch": 5.99, |
| "eval_bleu": 17.4744, |
| "eval_bp": 0.8072, |
| "eval_counts_1": 9263, |
| "eval_counts_2": 3908, |
| "eval_counts_3": 2055, |
| "eval_counts_4": 1127, |
| "eval_exact_match": 0.0372, |
| "eval_f1": 0.4309, |
| "eval_gen_len": 13.034, |
| "eval_loss": 1.1085509061813354, |
| "eval_precisions_1": 52.9254, |
| "eval_precisions_2": 25.5458, |
| "eval_precisions_3": 15.6942, |
| "eval_precisions_4": 10.3489, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.4383, |
| "eval_rouge2": 0.2452, |
| "eval_rougeL": 0.4239, |
| "eval_rougeLsum": 0.4237, |
| "eval_runtime": 3709.3886, |
| "eval_samples_per_second": 0.594, |
| "eval_steps_per_second": 0.594, |
| "eval_sys_len": 17502, |
| "eval_totals_1": 17502, |
| "eval_totals_2": 15298, |
| "eval_totals_3": 13094, |
| "eval_totals_4": 10890, |
| "step": 436 |
| }, |
| { |
| "epoch": 7.0, |
| "learning_rate": 0.0001, |
| "loss": 1.0469, |
| "step": 509 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_bleu": 18.0906, |
| "eval_bp": 0.8363, |
| "eval_counts_1": 9434, |
| "eval_counts_2": 4034, |
| "eval_counts_3": 2146, |
| "eval_counts_4": 1189, |
| "eval_exact_match": 0.039, |
| "eval_f1": 0.4348, |
| "eval_gen_len": 13.422, |
| "eval_loss": 1.103752851486206, |
| "eval_precisions_1": 52.3297, |
| "eval_precisions_2": 25.4929, |
| "eval_precisions_3": 15.7562, |
| "eval_precisions_4": 10.4152, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.4433, |
| "eval_rouge2": 0.2505, |
| "eval_rougeL": 0.4286, |
| "eval_rougeLsum": 0.4282, |
| "eval_runtime": 4081.2971, |
| "eval_samples_per_second": 0.54, |
| "eval_steps_per_second": 0.54, |
| "eval_sys_len": 18028, |
| "eval_totals_1": 18028, |
| "eval_totals_2": 15824, |
| "eval_totals_3": 13620, |
| "eval_totals_4": 11416, |
| "step": 509 |
| }, |
| { |
| "epoch": 8.0, |
| "learning_rate": 0.0001, |
| "loss": 0.9874, |
| "step": 582 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_bleu": 19.1287, |
| "eval_bp": 0.8539, |
| "eval_counts_1": 9746, |
| "eval_counts_2": 4265, |
| "eval_counts_3": 2287, |
| "eval_counts_4": 1285, |
| "eval_exact_match": 0.0454, |
| "eval_f1": 0.4498, |
| "eval_gen_len": 13.6466, |
| "eval_loss": 1.0989724397659302, |
| "eval_precisions_1": 53.1088, |
| "eval_precisions_2": 26.4136, |
| "eval_precisions_3": 16.4025, |
| "eval_precisions_4": 10.9464, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.457, |
| "eval_rouge2": 0.2627, |
| "eval_rougeL": 0.4417, |
| "eval_rougeLsum": 0.4416, |
| "eval_runtime": 2875.9709, |
| "eval_samples_per_second": 0.766, |
| "eval_steps_per_second": 0.766, |
| "eval_sys_len": 18351, |
| "eval_totals_1": 18351, |
| "eval_totals_2": 16147, |
| "eval_totals_3": 13943, |
| "eval_totals_4": 11739, |
| "step": 582 |
| }, |
| { |
| "epoch": 8.99, |
| "learning_rate": 0.0001, |
| "loss": 0.9488, |
| "step": 654 |
| }, |
| { |
| "epoch": 8.99, |
| "eval_bleu": 18.2172, |
| "eval_bp": 0.8255, |
| "eval_counts_1": 9484, |
| "eval_counts_2": 4062, |
| "eval_counts_3": 2158, |
| "eval_counts_4": 1197, |
| "eval_exact_match": 0.0431, |
| "eval_f1": 0.4399, |
| "eval_gen_len": 13.2763, |
| "eval_loss": 1.1175453662872314, |
| "eval_precisions_1": 53.1883, |
| "eval_precisions_2": 25.9935, |
| "eval_precisions_3": 16.0769, |
| "eval_precisions_4": 10.6694, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.4482, |
| "eval_rouge2": 0.2548, |
| "eval_rougeL": 0.4338, |
| "eval_rougeLsum": 0.4333, |
| "eval_runtime": 4231.6184, |
| "eval_samples_per_second": 0.521, |
| "eval_steps_per_second": 0.521, |
| "eval_sys_len": 17831, |
| "eval_totals_1": 17831, |
| "eval_totals_2": 15627, |
| "eval_totals_3": 13423, |
| "eval_totals_4": 11219, |
| "step": 654 |
| }, |
| { |
| "epoch": 9.99, |
| "learning_rate": 0.0001, |
| "loss": 0.8893, |
| "step": 727 |
| }, |
| { |
| "epoch": 9.99, |
| "eval_bleu": 19.064, |
| "eval_bp": 0.8357, |
| "eval_counts_1": 9650, |
| "eval_counts_2": 4205, |
| "eval_counts_3": 2289, |
| "eval_counts_4": 1289, |
| "eval_exact_match": 0.0463, |
| "eval_f1": 0.4472, |
| "eval_gen_len": 13.4251, |
| "eval_loss": 1.1221915483474731, |
| "eval_precisions_1": 53.5605, |
| "eval_precisions_2": 26.592, |
| "eval_precisions_3": 16.8198, |
| "eval_precisions_4": 11.3021, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.4543, |
| "eval_rouge2": 0.262, |
| "eval_rougeL": 0.4396, |
| "eval_rougeLsum": 0.4394, |
| "eval_runtime": 4369.7974, |
| "eval_samples_per_second": 0.504, |
| "eval_steps_per_second": 0.504, |
| "eval_sys_len": 18017, |
| "eval_totals_1": 18017, |
| "eval_totals_2": 15813, |
| "eval_totals_3": 13609, |
| "eval_totals_4": 11405, |
| "step": 727 |
| }, |
| { |
| "epoch": 10.99, |
| "learning_rate": 0.0001, |
| "loss": 0.8362, |
| "step": 800 |
| }, |
| { |
| "epoch": 10.99, |
| "eval_bleu": 19.052, |
| "eval_bp": 0.8474, |
| "eval_counts_1": 9706, |
| "eval_counts_2": 4232, |
| "eval_counts_3": 2279, |
| "eval_counts_4": 1281, |
| "eval_exact_match": 0.0472, |
| "eval_f1": 0.4473, |
| "eval_gen_len": 13.6021, |
| "eval_loss": 1.1342219114303589, |
| "eval_precisions_1": 53.2361, |
| "eval_precisions_2": 26.4038, |
| "eval_precisions_3": 16.4858, |
| "eval_precisions_4": 11.0241, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.4551, |
| "eval_rouge2": 0.2632, |
| "eval_rougeL": 0.4395, |
| "eval_rougeLsum": 0.4393, |
| "eval_runtime": 4741.4712, |
| "eval_samples_per_second": 0.465, |
| "eval_steps_per_second": 0.465, |
| "eval_sys_len": 18232, |
| "eval_totals_1": 18232, |
| "eval_totals_2": 16028, |
| "eval_totals_3": 13824, |
| "eval_totals_4": 11620, |
| "step": 800 |
| }, |
| { |
| "epoch": 12.0, |
| "learning_rate": 0.0001, |
| "loss": 0.7835, |
| "step": 873 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_bleu": 19.169, |
| "eval_bp": 0.8614, |
| "eval_counts_1": 9802, |
| "eval_counts_2": 4280, |
| "eval_counts_3": 2292, |
| "eval_counts_4": 1285, |
| "eval_exact_match": 0.0472, |
| "eval_f1": 0.4497, |
| "eval_gen_len": 14.0168, |
| "eval_loss": 1.1426819562911987, |
| "eval_precisions_1": 53.0096, |
| "eval_precisions_2": 26.2786, |
| "eval_precisions_3": 16.2749, |
| "eval_precisions_4": 10.8174, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.458, |
| "eval_rouge2": 0.2634, |
| "eval_rougeL": 0.4414, |
| "eval_rougeLsum": 0.4412, |
| "eval_runtime": 2858.9204, |
| "eval_samples_per_second": 0.771, |
| "eval_steps_per_second": 0.771, |
| "eval_sys_len": 18491, |
| "eval_totals_1": 18491, |
| "eval_totals_2": 16287, |
| "eval_totals_3": 14083, |
| "eval_totals_4": 11879, |
| "step": 873 |
| }, |
| { |
| "epoch": 12.99, |
| "learning_rate": 0.0001, |
| "loss": 0.7441, |
| "step": 945 |
| }, |
| { |
| "epoch": 12.99, |
| "eval_bleu": 19.3443, |
| "eval_bp": 0.8618, |
| "eval_counts_1": 9816, |
| "eval_counts_2": 4323, |
| "eval_counts_3": 2334, |
| "eval_counts_4": 1294, |
| "eval_exact_match": 0.0463, |
| "eval_f1": 0.4493, |
| "eval_gen_len": 13.8348, |
| "eval_loss": 1.1669002771377563, |
| "eval_precisions_1": 53.0652, |
| "eval_precisions_2": 26.5312, |
| "eval_precisions_3": 16.5649, |
| "eval_precisions_4": 10.8868, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.4577, |
| "eval_rouge2": 0.2659, |
| "eval_rougeL": 0.4418, |
| "eval_rougeLsum": 0.4417, |
| "eval_runtime": 2130.8, |
| "eval_samples_per_second": 1.034, |
| "eval_steps_per_second": 1.034, |
| "eval_sys_len": 18498, |
| "eval_totals_1": 18498, |
| "eval_totals_2": 16294, |
| "eval_totals_3": 14090, |
| "eval_totals_4": 11886, |
| "step": 945 |
| }, |
| { |
| "epoch": 13.99, |
| "learning_rate": 0.0001, |
| "loss": 0.7012, |
| "step": 1018 |
| }, |
| { |
| "epoch": 13.99, |
| "eval_bleu": 19.7341, |
| "eval_bp": 0.8639, |
| "eval_counts_1": 9856, |
| "eval_counts_2": 4364, |
| "eval_counts_3": 2375, |
| "eval_counts_4": 1360, |
| "eval_exact_match": 0.0476, |
| "eval_f1": 0.4514, |
| "eval_gen_len": 13.976, |
| "eval_loss": 1.1739834547042847, |
| "eval_precisions_1": 53.1693, |
| "eval_precisions_2": 26.7189, |
| "eval_precisions_3": 16.8094, |
| "eval_precisions_4": 11.4046, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.4591, |
| "eval_rouge2": 0.2653, |
| "eval_rougeL": 0.443, |
| "eval_rougeLsum": 0.4428, |
| "eval_runtime": 2149.1056, |
| "eval_samples_per_second": 1.026, |
| "eval_steps_per_second": 1.026, |
| "eval_sys_len": 18537, |
| "eval_totals_1": 18537, |
| "eval_totals_2": 16333, |
| "eval_totals_3": 14129, |
| "eval_totals_4": 11925, |
| "step": 1018 |
| }, |
| { |
| "epoch": 14.99, |
| "learning_rate": 0.0001, |
| "loss": 0.6597, |
| "step": 1091 |
| }, |
| { |
| "epoch": 14.99, |
| "eval_bleu": 19.3289, |
| "eval_bp": 0.8602, |
| "eval_counts_1": 9780, |
| "eval_counts_2": 4292, |
| "eval_counts_3": 2336, |
| "eval_counts_4": 1302, |
| "eval_exact_match": 0.0485, |
| "eval_f1": 0.4492, |
| "eval_gen_len": 13.8802, |
| "eval_loss": 1.1987030506134033, |
| "eval_precisions_1": 52.9565, |
| "eval_precisions_2": 26.3896, |
| "eval_precisions_3": 16.6145, |
| "eval_precisions_4": 10.9818, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.457, |
| "eval_rouge2": 0.2633, |
| "eval_rougeL": 0.4418, |
| "eval_rougeLsum": 0.4416, |
| "eval_runtime": 2149.2833, |
| "eval_samples_per_second": 1.025, |
| "eval_steps_per_second": 1.025, |
| "eval_sys_len": 18468, |
| "eval_totals_1": 18468, |
| "eval_totals_2": 16264, |
| "eval_totals_3": 14060, |
| "eval_totals_4": 11856, |
| "step": 1091 |
| }, |
| { |
| "epoch": 16.0, |
| "learning_rate": 0.0001, |
| "loss": 0.6236, |
| "step": 1164 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_bleu": 19.8055, |
| "eval_bp": 0.8734, |
| "eval_counts_1": 9931, |
| "eval_counts_2": 4388, |
| "eval_counts_3": 2390, |
| "eval_counts_4": 1359, |
| "eval_exact_match": 0.0495, |
| "eval_f1": 0.4538, |
| "eval_gen_len": 14.044, |
| "eval_loss": 1.2135030031204224, |
| "eval_precisions_1": 53.0587, |
| "eval_precisions_2": 26.573, |
| "eval_precisions_3": 16.7028, |
| "eval_precisions_4": 11.2268, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.4618, |
| "eval_rouge2": 0.2682, |
| "eval_rougeL": 0.4452, |
| "eval_rougeLsum": 0.445, |
| "eval_runtime": 2168.1341, |
| "eval_samples_per_second": 1.017, |
| "eval_steps_per_second": 1.017, |
| "eval_sys_len": 18717, |
| "eval_totals_1": 18717, |
| "eval_totals_2": 16513, |
| "eval_totals_3": 14309, |
| "eval_totals_4": 12105, |
| "step": 1164 |
| }, |
| { |
| "epoch": 17.0, |
| "learning_rate": 0.0001, |
| "loss": 0.5933, |
| "step": 1237 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_bleu": 19.5893, |
| "eval_bp": 0.8654, |
| "eval_counts_1": 9806, |
| "eval_counts_2": 4316, |
| "eval_counts_3": 2366, |
| "eval_counts_4": 1348, |
| "eval_exact_match": 0.049, |
| "eval_f1": 0.4485, |
| "eval_gen_len": 14.0622, |
| "eval_loss": 1.2305341958999634, |
| "eval_precisions_1": 52.817, |
| "eval_precisions_2": 26.3782, |
| "eval_precisions_3": 16.7114, |
| "eval_precisions_4": 11.2766, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.4571, |
| "eval_rouge2": 0.2628, |
| "eval_rougeL": 0.4407, |
| "eval_rougeLsum": 0.4409, |
| "eval_runtime": 2171.7325, |
| "eval_samples_per_second": 1.015, |
| "eval_steps_per_second": 1.015, |
| "eval_sys_len": 18566, |
| "eval_totals_1": 18566, |
| "eval_totals_2": 16362, |
| "eval_totals_3": 14158, |
| "eval_totals_4": 11954, |
| "step": 1237 |
| }, |
| { |
| "epoch": 17.99, |
| "learning_rate": 0.0001, |
| "loss": 0.5622, |
| "step": 1309 |
| }, |
| { |
| "epoch": 17.99, |
| "eval_bleu": 19.4914, |
| "eval_bp": 0.865, |
| "eval_counts_1": 9787, |
| "eval_counts_2": 4306, |
| "eval_counts_3": 2346, |
| "eval_counts_4": 1338, |
| "eval_exact_match": 0.0476, |
| "eval_f1": 0.447, |
| "eval_gen_len": 13.7763, |
| "eval_loss": 1.2796473503112793, |
| "eval_precisions_1": 52.7345, |
| "eval_precisions_2": 26.3283, |
| "eval_precisions_3": 16.5783, |
| "eval_precisions_4": 11.1995, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.4549, |
| "eval_rouge2": 0.2609, |
| "eval_rougeL": 0.4383, |
| "eval_rougeLsum": 0.4382, |
| "eval_runtime": 2158.5699, |
| "eval_samples_per_second": 1.021, |
| "eval_steps_per_second": 1.021, |
| "eval_sys_len": 18559, |
| "eval_totals_1": 18559, |
| "eval_totals_2": 16355, |
| "eval_totals_3": 14151, |
| "eval_totals_4": 11947, |
| "step": 1309 |
| }, |
| { |
| "epoch": 18.99, |
| "learning_rate": 0.0001, |
| "loss": 0.5275, |
| "step": 1382 |
| }, |
| { |
| "epoch": 18.99, |
| "eval_bleu": 19.6947, |
| "eval_bp": 0.8857, |
| "eval_counts_1": 9918, |
| "eval_counts_2": 4363, |
| "eval_counts_3": 2374, |
| "eval_counts_4": 1355, |
| "eval_exact_match": 0.0508, |
| "eval_f1": 0.4499, |
| "eval_gen_len": 14.1647, |
| "eval_loss": 1.2833356857299805, |
| "eval_precisions_1": 52.3377, |
| "eval_precisions_2": 26.054, |
| "eval_precisions_3": 16.3251, |
| "eval_precisions_4": 10.9823, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.4573, |
| "eval_rouge2": 0.2624, |
| "eval_rougeL": 0.441, |
| "eval_rougeLsum": 0.4408, |
| "eval_runtime": 2190.1704, |
| "eval_samples_per_second": 1.006, |
| "eval_steps_per_second": 1.006, |
| "eval_sys_len": 18950, |
| "eval_totals_1": 18950, |
| "eval_totals_2": 16746, |
| "eval_totals_3": 14542, |
| "eval_totals_4": 12338, |
| "step": 1382 |
| }, |
| { |
| "epoch": 19.79, |
| "learning_rate": 0.0001, |
| "loss": 0.4986, |
| "step": 1440 |
| }, |
| { |
| "epoch": 19.79, |
| "eval_bleu": 19.4544, |
| "eval_bp": 0.8847, |
| "eval_counts_1": 9879, |
| "eval_counts_2": 4315, |
| "eval_counts_3": 2347, |
| "eval_counts_4": 1324, |
| "eval_exact_match": 0.0495, |
| "eval_f1": 0.4478, |
| "eval_gen_len": 14.2827, |
| "eval_loss": 1.3059108257293701, |
| "eval_precisions_1": 52.1842, |
| "eval_precisions_2": 25.7966, |
| "eval_precisions_3": 16.1606, |
| "eval_precisions_4": 10.7476, |
| "eval_ref_len": 21250, |
| "eval_rouge1": 0.4564, |
| "eval_rouge2": 0.2622, |
| "eval_rougeL": 0.4407, |
| "eval_rougeLsum": 0.4403, |
| "eval_runtime": 3646.8693, |
| "eval_samples_per_second": 0.604, |
| "eval_steps_per_second": 0.604, |
| "eval_sys_len": 18931, |
| "eval_totals_1": 18931, |
| "eval_totals_2": 16727, |
| "eval_totals_3": 14523, |
| "eval_totals_4": 12319, |
| "step": 1440 |
| }, |
| { |
| "epoch": 19.79, |
| "step": 1440, |
| "total_flos": 1.102412878184448e+18, |
| "train_loss": 1.0667428798145717, |
| "train_runtime": 140813.6912, |
| "train_samples_per_second": 1.323, |
| "train_steps_per_second": 0.01 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 1440, |
| "num_train_epochs": 20, |
| "save_steps": 500, |
| "total_flos": 1.102412878184448e+18, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|