| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 19.78531558608845, | |
| "eval_steps": 500, | |
| "global_step": 1440, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.0001, | |
| "loss": 6.6905, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_bleu": 3.7816, | |
| "eval_bp": 1.0, | |
| "eval_counts_1": 5515, | |
| "eval_counts_2": 1394, | |
| "eval_counts_3": 522, | |
| "eval_counts_4": 191, | |
| "eval_exact_match": 0.0, | |
| "eval_f1": 0.2106, | |
| "eval_gen_len": 11.2786, | |
| "eval_loss": 2.097219705581665, | |
| "eval_precisions_1": 19.5762, | |
| "eval_precisions_2": 5.3681, | |
| "eval_precisions_3": 2.1966, | |
| "eval_precisions_4": 0.8859, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.1942, | |
| "eval_rouge2": 0.0761, | |
| "eval_rougeL": 0.1837, | |
| "eval_rougeLsum": 0.1841, | |
| "eval_runtime": 456.7865, | |
| "eval_samples_per_second": 4.825, | |
| "eval_steps_per_second": 1.206, | |
| "eval_sys_len": 28172, | |
| "eval_totals_1": 28172, | |
| "eval_totals_2": 25968, | |
| "eval_totals_3": 23764, | |
| "eval_totals_4": 21560, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4978, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "eval_bleu": 9.6021, | |
| "eval_bp": 0.7524, | |
| "eval_counts_1": 7079, | |
| "eval_counts_2": 2339, | |
| "eval_counts_3": 1027, | |
| "eval_counts_4": 446, | |
| "eval_exact_match": 0.01, | |
| "eval_f1": 0.3032, | |
| "eval_gen_len": 12.0159, | |
| "eval_loss": 1.6211049556732178, | |
| "eval_precisions_1": 42.7889, | |
| "eval_precisions_2": 16.311, | |
| "eval_precisions_3": 8.4624, | |
| "eval_precisions_4": 4.4905, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3097, | |
| "eval_rouge2": 0.1455, | |
| "eval_rougeL": 0.2971, | |
| "eval_rougeLsum": 0.2969, | |
| "eval_runtime": 435.2772, | |
| "eval_samples_per_second": 5.063, | |
| "eval_steps_per_second": 1.266, | |
| "eval_sys_len": 16544, | |
| "eval_totals_1": 16544, | |
| "eval_totals_2": 14340, | |
| "eval_totals_3": 12136, | |
| "eval_totals_4": 9932, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1021, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_bleu": 11.162, | |
| "eval_bp": 0.7908, | |
| "eval_counts_1": 7507, | |
| "eval_counts_2": 2637, | |
| "eval_counts_3": 1222, | |
| "eval_counts_4": 575, | |
| "eval_exact_match": 0.0141, | |
| "eval_f1": 0.3228, | |
| "eval_gen_len": 12.6375, | |
| "eval_loss": 1.5342339277267456, | |
| "eval_precisions_1": 43.6175, | |
| "eval_precisions_2": 17.5718, | |
| "eval_precisions_3": 9.5446, | |
| "eval_precisions_4": 5.425, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3304, | |
| "eval_rouge2": 0.1642, | |
| "eval_rougeL": 0.3172, | |
| "eval_rougeLsum": 0.3171, | |
| "eval_runtime": 446.8682, | |
| "eval_samples_per_second": 4.932, | |
| "eval_steps_per_second": 1.233, | |
| "eval_sys_len": 17211, | |
| "eval_totals_1": 17211, | |
| "eval_totals_2": 15007, | |
| "eval_totals_3": 12803, | |
| "eval_totals_4": 10599, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9208, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_bleu": 11.7136, | |
| "eval_bp": 0.7714, | |
| "eval_counts_1": 7599, | |
| "eval_counts_2": 2755, | |
| "eval_counts_3": 1296, | |
| "eval_counts_4": 620, | |
| "eval_exact_match": 0.015, | |
| "eval_f1": 0.33, | |
| "eval_gen_len": 12.3938, | |
| "eval_loss": 1.4861969947814941, | |
| "eval_precisions_1": 45.0418, | |
| "eval_precisions_2": 18.7837, | |
| "eval_precisions_3": 10.3988, | |
| "eval_precisions_4": 6.0435, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3377, | |
| "eval_rouge2": 0.1721, | |
| "eval_rougeL": 0.3232, | |
| "eval_rougeLsum": 0.3229, | |
| "eval_runtime": 440.9926, | |
| "eval_samples_per_second": 4.998, | |
| "eval_steps_per_second": 1.249, | |
| "eval_sys_len": 16871, | |
| "eval_totals_1": 16871, | |
| "eval_totals_2": 14667, | |
| "eval_totals_3": 12463, | |
| "eval_totals_4": 10259, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8135, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "eval_bleu": 12.6402, | |
| "eval_bp": 0.7893, | |
| "eval_counts_1": 7831, | |
| "eval_counts_2": 2955, | |
| "eval_counts_3": 1424, | |
| "eval_counts_4": 694, | |
| "eval_exact_match": 0.0177, | |
| "eval_f1": 0.3417, | |
| "eval_gen_len": 12.6366, | |
| "eval_loss": 1.4626398086547852, | |
| "eval_precisions_1": 45.5715, | |
| "eval_precisions_2": 19.7263, | |
| "eval_precisions_3": 11.1459, | |
| "eval_precisions_4": 6.5645, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3497, | |
| "eval_rouge2": 0.1837, | |
| "eval_rougeL": 0.3358, | |
| "eval_rougeLsum": 0.3354, | |
| "eval_runtime": 448.9344, | |
| "eval_samples_per_second": 4.909, | |
| "eval_steps_per_second": 1.227, | |
| "eval_sys_len": 17184, | |
| "eval_totals_1": 17184, | |
| "eval_totals_2": 14980, | |
| "eval_totals_3": 12776, | |
| "eval_totals_4": 10572, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "learning_rate": 0.0001, | |
| "loss": 1.6907, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "eval_bleu": 13.0722, | |
| "eval_bp": 0.7735, | |
| "eval_counts_1": 7872, | |
| "eval_counts_2": 3023, | |
| "eval_counts_3": 1482, | |
| "eval_counts_4": 740, | |
| "eval_exact_match": 0.0177, | |
| "eval_f1": 0.3483, | |
| "eval_gen_len": 12.564, | |
| "eval_loss": 1.439197301864624, | |
| "eval_precisions_1": 46.5606, | |
| "eval_precisions_2": 20.5604, | |
| "eval_precisions_3": 11.8569, | |
| "eval_precisions_4": 7.188, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3566, | |
| "eval_rouge2": 0.1896, | |
| "eval_rougeL": 0.3432, | |
| "eval_rougeLsum": 0.343, | |
| "eval_runtime": 718.6776, | |
| "eval_samples_per_second": 3.067, | |
| "eval_steps_per_second": 0.767, | |
| "eval_sys_len": 16907, | |
| "eval_totals_1": 16907, | |
| "eval_totals_2": 14703, | |
| "eval_totals_3": 12499, | |
| "eval_totals_4": 10295, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 0.0001, | |
| "loss": 1.6159, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "eval_bleu": 13.5053, | |
| "eval_bp": 0.7797, | |
| "eval_counts_1": 7981, | |
| "eval_counts_2": 3128, | |
| "eval_counts_3": 1542, | |
| "eval_counts_4": 773, | |
| "eval_exact_match": 0.0191, | |
| "eval_f1": 0.3543, | |
| "eval_gen_len": 12.5749, | |
| "eval_loss": 1.4288065433502197, | |
| "eval_precisions_1": 46.9029, | |
| "eval_precisions_2": 21.118, | |
| "eval_precisions_3": 12.2303, | |
| "eval_precisions_4": 7.4298, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.363, | |
| "eval_rouge2": 0.1952, | |
| "eval_rougeL": 0.3504, | |
| "eval_rougeLsum": 0.3502, | |
| "eval_runtime": 709.1881, | |
| "eval_samples_per_second": 3.108, | |
| "eval_steps_per_second": 0.777, | |
| "eval_sys_len": 17016, | |
| "eval_totals_1": 17016, | |
| "eval_totals_2": 14812, | |
| "eval_totals_3": 12608, | |
| "eval_totals_4": 10404, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 0.0001, | |
| "loss": 1.556, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_bleu": 13.2095, | |
| "eval_bp": 0.797, | |
| "eval_counts_1": 8014, | |
| "eval_counts_2": 3046, | |
| "eval_counts_3": 1496, | |
| "eval_counts_4": 748, | |
| "eval_exact_match": 0.0222, | |
| "eval_f1": 0.355, | |
| "eval_gen_len": 12.7641, | |
| "eval_loss": 1.4131838083267212, | |
| "eval_precisions_1": 46.2702, | |
| "eval_precisions_2": 20.1508, | |
| "eval_precisions_3": 11.5861, | |
| "eval_precisions_4": 6.9854, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3632, | |
| "eval_rouge2": 0.1903, | |
| "eval_rougeL": 0.3489, | |
| "eval_rougeLsum": 0.3491, | |
| "eval_runtime": 736.4055, | |
| "eval_samples_per_second": 2.993, | |
| "eval_steps_per_second": 0.748, | |
| "eval_sys_len": 17320, | |
| "eval_totals_1": 17320, | |
| "eval_totals_2": 15116, | |
| "eval_totals_3": 12912, | |
| "eval_totals_4": 10708, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 0.0001, | |
| "loss": 1.4951, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_bleu": 14.1831, | |
| "eval_bp": 0.789, | |
| "eval_counts_1": 8342, | |
| "eval_counts_2": 3271, | |
| "eval_counts_3": 1622, | |
| "eval_counts_4": 819, | |
| "eval_exact_match": 0.0218, | |
| "eval_f1": 0.3769, | |
| "eval_gen_len": 12.7654, | |
| "eval_loss": 1.3926042318344116, | |
| "eval_precisions_1": 48.5621, | |
| "eval_precisions_2": 21.8445, | |
| "eval_precisions_3": 12.7016, | |
| "eval_precisions_4": 7.7513, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3843, | |
| "eval_rouge2": 0.2059, | |
| "eval_rougeL": 0.3704, | |
| "eval_rougeLsum": 0.3704, | |
| "eval_runtime": 695.8554, | |
| "eval_samples_per_second": 3.167, | |
| "eval_steps_per_second": 0.792, | |
| "eval_sys_len": 17178, | |
| "eval_totals_1": 17178, | |
| "eval_totals_2": 14974, | |
| "eval_totals_3": 12770, | |
| "eval_totals_4": 10566, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 9.99, | |
| "learning_rate": 0.0001, | |
| "loss": 1.4522, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 9.99, | |
| "eval_bleu": 15.0442, | |
| "eval_bp": 0.8187, | |
| "eval_counts_1": 8639, | |
| "eval_counts_2": 3449, | |
| "eval_counts_3": 1740, | |
| "eval_counts_4": 891, | |
| "eval_exact_match": 0.024, | |
| "eval_f1": 0.3895, | |
| "eval_gen_len": 13.1016, | |
| "eval_loss": 1.3769304752349854, | |
| "eval_precisions_1": 48.7859, | |
| "eval_precisions_2": 22.2459, | |
| "eval_precisions_3": 13.0827, | |
| "eval_precisions_4": 8.0299, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3972, | |
| "eval_rouge2": 0.2129, | |
| "eval_rougeL": 0.3821, | |
| "eval_rougeLsum": 0.3823, | |
| "eval_runtime": 733.5109, | |
| "eval_samples_per_second": 3.005, | |
| "eval_steps_per_second": 0.751, | |
| "eval_sys_len": 17708, | |
| "eval_totals_1": 17708, | |
| "eval_totals_2": 15504, | |
| "eval_totals_3": 13300, | |
| "eval_totals_4": 11096, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 10.99, | |
| "learning_rate": 0.0001, | |
| "loss": 1.3663, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 10.99, | |
| "eval_bleu": 15.2622, | |
| "eval_bp": 0.8168, | |
| "eval_counts_1": 8736, | |
| "eval_counts_2": 3468, | |
| "eval_counts_3": 1747, | |
| "eval_counts_4": 924, | |
| "eval_exact_match": 0.0245, | |
| "eval_f1": 0.3946, | |
| "eval_gen_len": 13.0399, | |
| "eval_loss": 1.3676577806472778, | |
| "eval_precisions_1": 49.4285, | |
| "eval_precisions_2": 22.4176, | |
| "eval_precisions_3": 13.169, | |
| "eval_precisions_4": 8.3529, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4027, | |
| "eval_rouge2": 0.215, | |
| "eval_rougeL": 0.3871, | |
| "eval_rougeLsum": 0.387, | |
| "eval_runtime": 746.3261, | |
| "eval_samples_per_second": 2.953, | |
| "eval_steps_per_second": 0.738, | |
| "eval_sys_len": 17674, | |
| "eval_totals_1": 17674, | |
| "eval_totals_2": 15470, | |
| "eval_totals_3": 13266, | |
| "eval_totals_4": 11062, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 11.99, | |
| "learning_rate": 0.0001, | |
| "loss": 1.3122, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 11.99, | |
| "eval_bleu": 15.3943, | |
| "eval_bp": 0.8308, | |
| "eval_counts_1": 8833, | |
| "eval_counts_2": 3533, | |
| "eval_counts_3": 1780, | |
| "eval_counts_4": 915, | |
| "eval_exact_match": 0.0222, | |
| "eval_f1": 0.3975, | |
| "eval_gen_len": 13.3494, | |
| "eval_loss": 1.352068305015564, | |
| "eval_precisions_1": 49.272, | |
| "eval_precisions_2": 22.4703, | |
| "eval_precisions_3": 13.1667, | |
| "eval_precisions_4": 8.0866, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4055, | |
| "eval_rouge2": 0.219, | |
| "eval_rougeL": 0.3915, | |
| "eval_rougeLsum": 0.3915, | |
| "eval_runtime": 815.025, | |
| "eval_samples_per_second": 2.704, | |
| "eval_steps_per_second": 0.676, | |
| "eval_sys_len": 17927, | |
| "eval_totals_1": 17927, | |
| "eval_totals_2": 15723, | |
| "eval_totals_3": 13519, | |
| "eval_totals_4": 11315, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "learning_rate": 0.0001, | |
| "loss": 1.2641, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_bleu": 16.1011, | |
| "eval_bp": 0.848, | |
| "eval_counts_1": 9048, | |
| "eval_counts_2": 3668, | |
| "eval_counts_3": 1864, | |
| "eval_counts_4": 989, | |
| "eval_exact_match": 0.0268, | |
| "eval_f1": 0.408, | |
| "eval_gen_len": 13.5508, | |
| "eval_loss": 1.3493599891662598, | |
| "eval_precisions_1": 49.5998, | |
| "eval_precisions_2": 22.8707, | |
| "eval_precisions_3": 13.474, | |
| "eval_precisions_4": 8.5039, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4165, | |
| "eval_rouge2": 0.2265, | |
| "eval_rougeL": 0.4011, | |
| "eval_rougeLsum": 0.401, | |
| "eval_runtime": 726.7867, | |
| "eval_samples_per_second": 3.033, | |
| "eval_steps_per_second": 0.758, | |
| "eval_sys_len": 18242, | |
| "eval_totals_1": 18242, | |
| "eval_totals_2": 16038, | |
| "eval_totals_3": 13834, | |
| "eval_totals_4": 11630, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 13.99, | |
| "learning_rate": 0.0001, | |
| "loss": 1.2359, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 13.99, | |
| "eval_bleu": 16.3595, | |
| "eval_bp": 0.8402, | |
| "eval_counts_1": 9075, | |
| "eval_counts_2": 3709, | |
| "eval_counts_3": 1907, | |
| "eval_counts_4": 1013, | |
| "eval_exact_match": 0.0259, | |
| "eval_f1": 0.4113, | |
| "eval_gen_len": 13.5681, | |
| "eval_loss": 1.3488041162490845, | |
| "eval_precisions_1": 50.1437, | |
| "eval_precisions_2": 23.3359, | |
| "eval_precisions_3": 13.9299, | |
| "eval_precisions_4": 8.8194, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4195, | |
| "eval_rouge2": 0.2298, | |
| "eval_rougeL": 0.4041, | |
| "eval_rougeLsum": 0.4038, | |
| "eval_runtime": 701.8557, | |
| "eval_samples_per_second": 3.14, | |
| "eval_steps_per_second": 0.785, | |
| "eval_sys_len": 18098, | |
| "eval_totals_1": 18098, | |
| "eval_totals_2": 15894, | |
| "eval_totals_3": 13690, | |
| "eval_totals_4": 11486, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 14.99, | |
| "learning_rate": 0.0001, | |
| "loss": 1.1754, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 14.99, | |
| "eval_bleu": 16.7083, | |
| "eval_bp": 0.8547, | |
| "eval_counts_1": 9182, | |
| "eval_counts_2": 3777, | |
| "eval_counts_3": 1957, | |
| "eval_counts_4": 1048, | |
| "eval_exact_match": 0.0268, | |
| "eval_f1": 0.4145, | |
| "eval_gen_len": 13.6534, | |
| "eval_loss": 1.3482075929641724, | |
| "eval_precisions_1": 49.9946, | |
| "eval_precisions_2": 23.3696, | |
| "eval_precisions_3": 14.0206, | |
| "eval_precisions_4": 8.9161, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4227, | |
| "eval_rouge2": 0.2314, | |
| "eval_rougeL": 0.406, | |
| "eval_rougeLsum": 0.4058, | |
| "eval_runtime": 469.6435, | |
| "eval_samples_per_second": 4.693, | |
| "eval_steps_per_second": 1.173, | |
| "eval_sys_len": 18366, | |
| "eval_totals_1": 18366, | |
| "eval_totals_2": 16162, | |
| "eval_totals_3": 13958, | |
| "eval_totals_4": 11754, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 15.99, | |
| "learning_rate": 0.0001, | |
| "loss": 1.1367, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 15.99, | |
| "eval_bleu": 16.5803, | |
| "eval_bp": 0.8517, | |
| "eval_counts_1": 9164, | |
| "eval_counts_2": 3761, | |
| "eval_counts_3": 1935, | |
| "eval_counts_4": 1033, | |
| "eval_exact_match": 0.0245, | |
| "eval_f1": 0.4147, | |
| "eval_gen_len": 13.6152, | |
| "eval_loss": 1.3501369953155518, | |
| "eval_precisions_1": 50.0492, | |
| "eval_precisions_2": 23.3515, | |
| "eval_precisions_3": 13.9189, | |
| "eval_precisions_4": 8.8306, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4225, | |
| "eval_rouge2": 0.2316, | |
| "eval_rougeL": 0.4078, | |
| "eval_rougeLsum": 0.4079, | |
| "eval_runtime": 480.2308, | |
| "eval_samples_per_second": 4.589, | |
| "eval_steps_per_second": 1.147, | |
| "eval_sys_len": 18310, | |
| "eval_totals_1": 18310, | |
| "eval_totals_2": 16106, | |
| "eval_totals_3": 13902, | |
| "eval_totals_4": 11698, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "learning_rate": 0.0001, | |
| "loss": 1.096, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_bleu": 16.5513, | |
| "eval_bp": 0.8499, | |
| "eval_counts_1": 9126, | |
| "eval_counts_2": 3712, | |
| "eval_counts_3": 1922, | |
| "eval_counts_4": 1050, | |
| "eval_exact_match": 0.0295, | |
| "eval_f1": 0.4141, | |
| "eval_gen_len": 13.6325, | |
| "eval_loss": 1.358604907989502, | |
| "eval_precisions_1": 49.9316, | |
| "eval_precisions_2": 23.0946, | |
| "eval_precisions_3": 13.8582, | |
| "eval_precisions_4": 9.0013, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4217, | |
| "eval_rouge2": 0.2304, | |
| "eval_rougeL": 0.4066, | |
| "eval_rougeLsum": 0.4066, | |
| "eval_runtime": 465.7019, | |
| "eval_samples_per_second": 4.733, | |
| "eval_steps_per_second": 1.183, | |
| "eval_sys_len": 18277, | |
| "eval_totals_1": 18277, | |
| "eval_totals_2": 16073, | |
| "eval_totals_3": 13869, | |
| "eval_totals_4": 11665, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "learning_rate": 0.0001, | |
| "loss": 1.0571, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_bleu": 16.4708, | |
| "eval_bp": 0.8446, | |
| "eval_counts_1": 9087, | |
| "eval_counts_2": 3707, | |
| "eval_counts_3": 1923, | |
| "eval_counts_4": 1033, | |
| "eval_exact_match": 0.029, | |
| "eval_f1": 0.4116, | |
| "eval_gen_len": 13.5172, | |
| "eval_loss": 1.3658462762832642, | |
| "eval_precisions_1": 49.9862, | |
| "eval_precisions_2": 23.205, | |
| "eval_precisions_3": 13.9641, | |
| "eval_precisions_4": 8.9306, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4196, | |
| "eval_rouge2": 0.2301, | |
| "eval_rougeL": 0.4049, | |
| "eval_rougeLsum": 0.4049, | |
| "eval_runtime": 463.8447, | |
| "eval_samples_per_second": 4.752, | |
| "eval_steps_per_second": 1.188, | |
| "eval_sys_len": 18179, | |
| "eval_totals_1": 18179, | |
| "eval_totals_2": 15975, | |
| "eval_totals_3": 13771, | |
| "eval_totals_4": 11567, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 18.99, | |
| "learning_rate": 0.0001, | |
| "loss": 1.036, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 18.99, | |
| "eval_bleu": 16.8386, | |
| "eval_bp": 0.8528, | |
| "eval_counts_1": 9206, | |
| "eval_counts_2": 3806, | |
| "eval_counts_3": 1976, | |
| "eval_counts_4": 1059, | |
| "eval_exact_match": 0.0309, | |
| "eval_f1": 0.4174, | |
| "eval_gen_len": 13.7205, | |
| "eval_loss": 1.367233395576477, | |
| "eval_precisions_1": 50.2182, | |
| "eval_precisions_2": 23.5987, | |
| "eval_precisions_3": 14.1913, | |
| "eval_precisions_4": 9.0358, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4254, | |
| "eval_rouge2": 0.2348, | |
| "eval_rougeL": 0.4106, | |
| "eval_rougeLsum": 0.4107, | |
| "eval_runtime": 489.8628, | |
| "eval_samples_per_second": 4.499, | |
| "eval_steps_per_second": 1.125, | |
| "eval_sys_len": 18332, | |
| "eval_totals_1": 18332, | |
| "eval_totals_2": 16128, | |
| "eval_totals_3": 13924, | |
| "eval_totals_4": 11720, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 19.79, | |
| "learning_rate": 0.0001, | |
| "loss": 0.9785, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 19.79, | |
| "eval_bleu": 16.8234, | |
| "eval_bp": 0.8438, | |
| "eval_counts_1": 9180, | |
| "eval_counts_2": 3796, | |
| "eval_counts_3": 1973, | |
| "eval_counts_4": 1059, | |
| "eval_exact_match": 0.0327, | |
| "eval_f1": 0.4172, | |
| "eval_gen_len": 13.5113, | |
| "eval_loss": 1.381914496421814, | |
| "eval_precisions_1": 50.5395, | |
| "eval_precisions_2": 23.7845, | |
| "eval_precisions_3": 14.3428, | |
| "eval_precisions_4": 9.1672, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4254, | |
| "eval_rouge2": 0.2344, | |
| "eval_rougeL": 0.4116, | |
| "eval_rougeLsum": 0.4117, | |
| "eval_runtime": 465.8344, | |
| "eval_samples_per_second": 4.731, | |
| "eval_steps_per_second": 1.183, | |
| "eval_sys_len": 18164, | |
| "eval_totals_1": 18164, | |
| "eval_totals_2": 15960, | |
| "eval_totals_3": 13756, | |
| "eval_totals_4": 11552, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 19.79, | |
| "step": 1440, | |
| "total_flos": 4.419252384883016e+17, | |
| "train_loss": 1.7299000342686972, | |
| "train_runtime": 27815.7883, | |
| "train_samples_per_second": 6.697, | |
| "train_steps_per_second": 0.052 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 1440, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "total_flos": 4.419252384883016e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |