| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 19.922713610991842, | |
| "eval_steps": 500, | |
| "global_step": 2900, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.0001, | |
| "loss": 5.5131, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_bleu": 6.2485, | |
| "eval_bp": 0.7216, | |
| "eval_counts_1": 6032, | |
| "eval_counts_2": 1668, | |
| "eval_counts_3": 626, | |
| "eval_counts_4": 216, | |
| "eval_exact_match": 0.0018, | |
| "eval_f1": 0.2406, | |
| "eval_gen_len": 12.6166, | |
| "eval_loss": 1.8697563409805298, | |
| "eval_precisions_1": 37.6459, | |
| "eval_precisions_2": 12.0703, | |
| "eval_precisions_3": 5.3896, | |
| "eval_precisions_4": 2.2952, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.2485, | |
| "eval_rouge2": 0.1011, | |
| "eval_rougeL": 0.2368, | |
| "eval_rougeLsum": 0.2366, | |
| "eval_runtime": 467.1177, | |
| "eval_samples_per_second": 4.718, | |
| "eval_steps_per_second": 1.18, | |
| "eval_sys_len": 16023, | |
| "eval_totals_1": 16023, | |
| "eval_totals_2": 13819, | |
| "eval_totals_3": 11615, | |
| "eval_totals_4": 9411, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3946, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_bleu": 10.8315, | |
| "eval_bp": 0.7704, | |
| "eval_counts_1": 7325, | |
| "eval_counts_2": 2554, | |
| "eval_counts_3": 1178, | |
| "eval_counts_4": 558, | |
| "eval_exact_match": 0.0145, | |
| "eval_f1": 0.3148, | |
| "eval_gen_len": 12.2582, | |
| "eval_loss": 1.58878493309021, | |
| "eval_precisions_1": 43.4641, | |
| "eval_precisions_2": 17.4346, | |
| "eval_precisions_3": 9.4656, | |
| "eval_precisions_4": 5.4487, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3226, | |
| "eval_rouge2": 0.1585, | |
| "eval_rougeL": 0.31, | |
| "eval_rougeLsum": 0.31, | |
| "eval_runtime": 528.6481, | |
| "eval_samples_per_second": 4.169, | |
| "eval_steps_per_second": 1.042, | |
| "eval_sys_len": 16853, | |
| "eval_totals_1": 16853, | |
| "eval_totals_2": 14649, | |
| "eval_totals_3": 12445, | |
| "eval_totals_4": 10241, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0101, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_bleu": 11.7891, | |
| "eval_bp": 0.7812, | |
| "eval_counts_1": 7623, | |
| "eval_counts_2": 2764, | |
| "eval_counts_3": 1304, | |
| "eval_counts_4": 629, | |
| "eval_exact_match": 0.0154, | |
| "eval_f1": 0.3315, | |
| "eval_gen_len": 12.6783, | |
| "eval_loss": 1.4997321367263794, | |
| "eval_precisions_1": 44.7307, | |
| "eval_precisions_2": 18.6278, | |
| "eval_precisions_3": 10.3214, | |
| "eval_precisions_4": 6.0307, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3403, | |
| "eval_rouge2": 0.1723, | |
| "eval_rougeL": 0.3263, | |
| "eval_rougeLsum": 0.3263, | |
| "eval_runtime": 451.1882, | |
| "eval_samples_per_second": 4.885, | |
| "eval_steps_per_second": 1.221, | |
| "eval_sys_len": 17042, | |
| "eval_totals_1": 17042, | |
| "eval_totals_2": 14838, | |
| "eval_totals_3": 12634, | |
| "eval_totals_4": 10430, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8073, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_bleu": 12.6068, | |
| "eval_bp": 0.7588, | |
| "eval_counts_1": 7728, | |
| "eval_counts_2": 2916, | |
| "eval_counts_3": 1415, | |
| "eval_counts_4": 707, | |
| "eval_exact_match": 0.0168, | |
| "eval_f1": 0.3387, | |
| "eval_gen_len": 12.2963, | |
| "eval_loss": 1.4610050916671753, | |
| "eval_precisions_1": 46.4033, | |
| "eval_precisions_2": 20.1799, | |
| "eval_precisions_3": 11.5548, | |
| "eval_precisions_4": 7.0404, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3461, | |
| "eval_rouge2": 0.1818, | |
| "eval_rougeL": 0.3324, | |
| "eval_rougeLsum": 0.3326, | |
| "eval_runtime": 433.3953, | |
| "eval_samples_per_second": 5.085, | |
| "eval_steps_per_second": 1.271, | |
| "eval_sys_len": 16654, | |
| "eval_totals_1": 16654, | |
| "eval_totals_2": 14450, | |
| "eval_totals_3": 12246, | |
| "eval_totals_4": 10042, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 0.0001, | |
| "loss": 1.6851, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "eval_bleu": 13.0784, | |
| "eval_bp": 0.8004, | |
| "eval_counts_1": 7964, | |
| "eval_counts_2": 3059, | |
| "eval_counts_3": 1483, | |
| "eval_counts_4": 727, | |
| "eval_exact_match": 0.0159, | |
| "eval_f1": 0.3483, | |
| "eval_gen_len": 12.7436, | |
| "eval_loss": 1.4356882572174072, | |
| "eval_precisions_1": 45.8201, | |
| "eval_precisions_2": 20.1555, | |
| "eval_precisions_3": 11.4314, | |
| "eval_precisions_4": 6.7509, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3558, | |
| "eval_rouge2": 0.1888, | |
| "eval_rougeL": 0.3415, | |
| "eval_rougeLsum": 0.3414, | |
| "eval_runtime": 452.1483, | |
| "eval_samples_per_second": 4.875, | |
| "eval_steps_per_second": 1.219, | |
| "eval_sys_len": 17381, | |
| "eval_totals_1": 17381, | |
| "eval_totals_2": 15177, | |
| "eval_totals_3": 12973, | |
| "eval_totals_4": 10769, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 0.0001, | |
| "loss": 1.5642, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_bleu": 13.9065, | |
| "eval_bp": 0.7987, | |
| "eval_counts_1": 8299, | |
| "eval_counts_2": 3224, | |
| "eval_counts_3": 1592, | |
| "eval_counts_4": 788, | |
| "eval_exact_match": 0.0204, | |
| "eval_f1": 0.3736, | |
| "eval_gen_len": 12.9569, | |
| "eval_loss": 1.4003357887268066, | |
| "eval_precisions_1": 47.8301, | |
| "eval_precisions_2": 21.2847, | |
| "eval_precisions_3": 12.3001, | |
| "eval_precisions_4": 7.3377, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3814, | |
| "eval_rouge2": 0.2025, | |
| "eval_rougeL": 0.3684, | |
| "eval_rougeLsum": 0.3685, | |
| "eval_runtime": 450.2054, | |
| "eval_samples_per_second": 4.896, | |
| "eval_steps_per_second": 1.224, | |
| "eval_sys_len": 17351, | |
| "eval_totals_1": 17351, | |
| "eval_totals_2": 15147, | |
| "eval_totals_3": 12943, | |
| "eval_totals_4": 10739, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 0.0001, | |
| "loss": 1.4756, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "eval_bleu": 14.9146, | |
| "eval_bp": 0.8165, | |
| "eval_counts_1": 8640, | |
| "eval_counts_2": 3430, | |
| "eval_counts_3": 1712, | |
| "eval_counts_4": 879, | |
| "eval_exact_match": 0.025, | |
| "eval_f1": 0.3892, | |
| "eval_gen_len": 13.1084, | |
| "eval_loss": 1.3778630495071411, | |
| "eval_precisions_1": 48.8992, | |
| "eval_precisions_2": 22.1791, | |
| "eval_precisions_3": 12.91, | |
| "eval_precisions_4": 7.9497, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3971, | |
| "eval_rouge2": 0.2133, | |
| "eval_rougeL": 0.3828, | |
| "eval_rougeLsum": 0.3826, | |
| "eval_runtime": 753.2935, | |
| "eval_samples_per_second": 2.926, | |
| "eval_steps_per_second": 0.731, | |
| "eval_sys_len": 17669, | |
| "eval_totals_1": 17669, | |
| "eval_totals_2": 15465, | |
| "eval_totals_3": 13261, | |
| "eval_totals_4": 11057, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 0.0001, | |
| "loss": 1.3792, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_bleu": 14.8859, | |
| "eval_bp": 0.8346, | |
| "eval_counts_1": 8732, | |
| "eval_counts_2": 3417, | |
| "eval_counts_3": 1712, | |
| "eval_counts_4": 871, | |
| "eval_exact_match": 0.0245, | |
| "eval_f1": 0.3917, | |
| "eval_gen_len": 13.3748, | |
| "eval_loss": 1.362410306930542, | |
| "eval_precisions_1": 48.5219, | |
| "eval_precisions_2": 21.6375, | |
| "eval_precisions_3": 12.5994, | |
| "eval_precisions_4": 7.6511, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4003, | |
| "eval_rouge2": 0.2131, | |
| "eval_rougeL": 0.3852, | |
| "eval_rougeLsum": 0.3849, | |
| "eval_runtime": 699.0977, | |
| "eval_samples_per_second": 3.153, | |
| "eval_steps_per_second": 0.788, | |
| "eval_sys_len": 17996, | |
| "eval_totals_1": 17996, | |
| "eval_totals_2": 15792, | |
| "eval_totals_3": 13588, | |
| "eval_totals_4": 11384, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 0.0001, | |
| "loss": 1.3133, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_bleu": 15.3264, | |
| "eval_bp": 0.8161, | |
| "eval_counts_1": 8804, | |
| "eval_counts_2": 3500, | |
| "eval_counts_3": 1754, | |
| "eval_counts_4": 920, | |
| "eval_exact_match": 0.025, | |
| "eval_f1": 0.4, | |
| "eval_gen_len": 13.2019, | |
| "eval_loss": 1.3630096912384033, | |
| "eval_precisions_1": 49.85, | |
| "eval_precisions_2": 22.6435, | |
| "eval_precisions_3": 13.2347, | |
| "eval_precisions_4": 8.3265, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4078, | |
| "eval_rouge2": 0.219, | |
| "eval_rougeL": 0.3932, | |
| "eval_rougeLsum": 0.3935, | |
| "eval_runtime": 465.2887, | |
| "eval_samples_per_second": 4.737, | |
| "eval_steps_per_second": 1.184, | |
| "eval_sys_len": 17661, | |
| "eval_totals_1": 17661, | |
| "eval_totals_2": 15457, | |
| "eval_totals_3": 13253, | |
| "eval_totals_4": 11049, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 0.0001, | |
| "loss": 1.261, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_bleu": 16.0163, | |
| "eval_bp": 0.8188, | |
| "eval_counts_1": 8910, | |
| "eval_counts_2": 3602, | |
| "eval_counts_3": 1849, | |
| "eval_counts_4": 1000, | |
| "eval_exact_match": 0.0295, | |
| "eval_f1": 0.4055, | |
| "eval_gen_len": 13.1892, | |
| "eval_loss": 1.3685479164123535, | |
| "eval_precisions_1": 50.3134, | |
| "eval_precisions_2": 23.2312, | |
| "eval_precisions_3": 13.9012, | |
| "eval_precisions_4": 9.0114, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4135, | |
| "eval_rouge2": 0.223, | |
| "eval_rougeL": 0.3991, | |
| "eval_rougeLsum": 0.3992, | |
| "eval_runtime": 491.3102, | |
| "eval_samples_per_second": 4.486, | |
| "eval_steps_per_second": 1.121, | |
| "eval_sys_len": 17709, | |
| "eval_totals_1": 17709, | |
| "eval_totals_2": 15505, | |
| "eval_totals_3": 13301, | |
| "eval_totals_4": 11097, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "learning_rate": 0.0001, | |
| "loss": 1.1897, | |
| "step": 1601 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_bleu": 16.3202, | |
| "eval_bp": 0.849, | |
| "eval_counts_1": 9096, | |
| "eval_counts_2": 3690, | |
| "eval_counts_3": 1902, | |
| "eval_counts_4": 1012, | |
| "eval_exact_match": 0.0281, | |
| "eval_f1": 0.4121, | |
| "eval_gen_len": 13.5077, | |
| "eval_loss": 1.3638867139816284, | |
| "eval_precisions_1": 49.8111, | |
| "eval_precisions_2": 22.9806, | |
| "eval_precisions_3": 13.7299, | |
| "eval_precisions_4": 8.6874, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4201, | |
| "eval_rouge2": 0.2289, | |
| "eval_rougeL": 0.4059, | |
| "eval_rougeLsum": 0.4057, | |
| "eval_runtime": 536.9399, | |
| "eval_samples_per_second": 4.105, | |
| "eval_steps_per_second": 1.026, | |
| "eval_sys_len": 18261, | |
| "eval_totals_1": 18261, | |
| "eval_totals_2": 16057, | |
| "eval_totals_3": 13853, | |
| "eval_totals_4": 11649, | |
| "step": 1601 | |
| }, | |
| { | |
| "epoch": 11.99, | |
| "learning_rate": 0.0001, | |
| "loss": 1.1453, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 11.99, | |
| "eval_bleu": 16.4772, | |
| "eval_bp": 0.8527, | |
| "eval_counts_1": 9106, | |
| "eval_counts_2": 3735, | |
| "eval_counts_3": 1932, | |
| "eval_counts_4": 1023, | |
| "eval_exact_match": 0.0281, | |
| "eval_f1": 0.4099, | |
| "eval_gen_len": 13.8013, | |
| "eval_loss": 1.3609519004821777, | |
| "eval_precisions_1": 49.6808, | |
| "eval_precisions_2": 23.1628, | |
| "eval_precisions_3": 13.8783, | |
| "eval_precisions_4": 8.7309, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4173, | |
| "eval_rouge2": 0.2303, | |
| "eval_rougeL": 0.4026, | |
| "eval_rougeLsum": 0.4025, | |
| "eval_runtime": 617.7899, | |
| "eval_samples_per_second": 3.568, | |
| "eval_steps_per_second": 0.892, | |
| "eval_sys_len": 18329, | |
| "eval_totals_1": 18329, | |
| "eval_totals_2": 16125, | |
| "eval_totals_3": 13921, | |
| "eval_totals_4": 11717, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "learning_rate": 0.0001, | |
| "loss": 1.0858, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_bleu": 16.7204, | |
| "eval_bp": 0.8649, | |
| "eval_counts_1": 9245, | |
| "eval_counts_2": 3778, | |
| "eval_counts_3": 1955, | |
| "eval_counts_4": 1049, | |
| "eval_exact_match": 0.0322, | |
| "eval_f1": 0.417, | |
| "eval_gen_len": 13.8144, | |
| "eval_loss": 1.3716095685958862, | |
| "eval_precisions_1": 49.8222, | |
| "eval_precisions_2": 23.1042, | |
| "eval_precisions_3": 13.8182, | |
| "eval_precisions_4": 8.7827, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4244, | |
| "eval_rouge2": 0.2327, | |
| "eval_rougeL": 0.409, | |
| "eval_rougeLsum": 0.409, | |
| "eval_runtime": 504.2774, | |
| "eval_samples_per_second": 4.371, | |
| "eval_steps_per_second": 1.093, | |
| "eval_sys_len": 18556, | |
| "eval_totals_1": 18556, | |
| "eval_totals_2": 16352, | |
| "eval_totals_3": 14148, | |
| "eval_totals_4": 11944, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 13.99, | |
| "learning_rate": 0.0001, | |
| "loss": 1.0472, | |
| "step": 2037 | |
| }, | |
| { | |
| "epoch": 13.99, | |
| "eval_bleu": 16.6825, | |
| "eval_bp": 0.8519, | |
| "eval_counts_1": 9166, | |
| "eval_counts_2": 3756, | |
| "eval_counts_3": 1946, | |
| "eval_counts_4": 1054, | |
| "eval_exact_match": 0.0309, | |
| "eval_f1": 0.4143, | |
| "eval_gen_len": 13.8099, | |
| "eval_loss": 1.3770091533660889, | |
| "eval_precisions_1": 50.0464, | |
| "eval_precisions_2": 23.3133, | |
| "eval_precisions_3": 13.993, | |
| "eval_precisions_4": 9.0062, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4216, | |
| "eval_rouge2": 0.2311, | |
| "eval_rougeL": 0.4068, | |
| "eval_rougeLsum": 0.4067, | |
| "eval_runtime": 581.2707, | |
| "eval_samples_per_second": 3.792, | |
| "eval_steps_per_second": 0.948, | |
| "eval_sys_len": 18315, | |
| "eval_totals_1": 18315, | |
| "eval_totals_2": 16111, | |
| "eval_totals_3": 13907, | |
| "eval_totals_4": 11703, | |
| "step": 2037 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "learning_rate": 0.0001, | |
| "loss": 0.9953, | |
| "step": 2183 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_bleu": 17.3937, | |
| "eval_bp": 0.842, | |
| "eval_counts_1": 9342, | |
| "eval_counts_2": 3926, | |
| "eval_counts_3": 2046, | |
| "eval_counts_4": 1108, | |
| "eval_exact_match": 0.0327, | |
| "eval_f1": 0.4258, | |
| "eval_gen_len": 13.5023, | |
| "eval_loss": 1.3880597352981567, | |
| "eval_precisions_1": 51.5222, | |
| "eval_precisions_2": 24.6484, | |
| "eval_precisions_3": 14.9082, | |
| "eval_precisions_4": 9.6181, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4328, | |
| "eval_rouge2": 0.2418, | |
| "eval_rougeL": 0.4171, | |
| "eval_rougeLsum": 0.4171, | |
| "eval_runtime": 718.2329, | |
| "eval_samples_per_second": 3.069, | |
| "eval_steps_per_second": 0.767, | |
| "eval_sys_len": 18132, | |
| "eval_totals_1": 18132, | |
| "eval_totals_2": 15928, | |
| "eval_totals_3": 13724, | |
| "eval_totals_4": 11520, | |
| "step": 2183 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "learning_rate": 0.0001, | |
| "loss": 0.9509, | |
| "step": 2329 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_bleu": 17.1618, | |
| "eval_bp": 0.871, | |
| "eval_counts_1": 9330, | |
| "eval_counts_2": 3894, | |
| "eval_counts_3": 2024, | |
| "eval_counts_4": 1084, | |
| "eval_exact_match": 0.0313, | |
| "eval_f1": 0.4198, | |
| "eval_gen_len": 13.956, | |
| "eval_loss": 1.401639461517334, | |
| "eval_precisions_1": 49.9679, | |
| "eval_precisions_2": 23.6459, | |
| "eval_precisions_3": 14.1896, | |
| "eval_precisions_4": 8.9884, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4269, | |
| "eval_rouge2": 0.237, | |
| "eval_rougeL": 0.4123, | |
| "eval_rougeLsum": 0.4122, | |
| "eval_runtime": 632.3222, | |
| "eval_samples_per_second": 3.486, | |
| "eval_steps_per_second": 0.871, | |
| "eval_sys_len": 18672, | |
| "eval_totals_1": 18672, | |
| "eval_totals_2": 16468, | |
| "eval_totals_3": 14264, | |
| "eval_totals_4": 12060, | |
| "step": 2329 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "learning_rate": 0.0001, | |
| "loss": 0.9183, | |
| "step": 2474 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_bleu": 16.995, | |
| "eval_bp": 0.8606, | |
| "eval_counts_1": 9303, | |
| "eval_counts_2": 3824, | |
| "eval_counts_3": 1979, | |
| "eval_counts_4": 1084, | |
| "eval_exact_match": 0.0327, | |
| "eval_f1": 0.4199, | |
| "eval_gen_len": 13.7854, | |
| "eval_loss": 1.4152026176452637, | |
| "eval_precisions_1": 50.3518, | |
| "eval_precisions_2": 23.5005, | |
| "eval_precisions_3": 14.0674, | |
| "eval_precisions_4": 9.1369, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4269, | |
| "eval_rouge2": 0.2345, | |
| "eval_rougeL": 0.4121, | |
| "eval_rougeLsum": 0.4122, | |
| "eval_runtime": 466.5423, | |
| "eval_samples_per_second": 4.724, | |
| "eval_steps_per_second": 1.181, | |
| "eval_sys_len": 18476, | |
| "eval_totals_1": 18476, | |
| "eval_totals_2": 16272, | |
| "eval_totals_3": 14068, | |
| "eval_totals_4": 11864, | |
| "step": 2474 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "learning_rate": 0.0001, | |
| "loss": 0.8696, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_bleu": 16.9541, | |
| "eval_bp": 0.8554, | |
| "eval_counts_1": 9184, | |
| "eval_counts_2": 3798, | |
| "eval_counts_3": 1993, | |
| "eval_counts_4": 1085, | |
| "eval_exact_match": 0.034, | |
| "eval_f1": 0.4148, | |
| "eval_gen_len": 13.726, | |
| "eval_loss": 1.44040048122406, | |
| "eval_precisions_1": 49.9701, | |
| "eval_precisions_2": 23.4807, | |
| "eval_precisions_3": 14.2653, | |
| "eval_precisions_4": 9.2207, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4218, | |
| "eval_rouge2": 0.2333, | |
| "eval_rougeL": 0.4076, | |
| "eval_rougeLsum": 0.4074, | |
| "eval_runtime": 470.6343, | |
| "eval_samples_per_second": 4.683, | |
| "eval_steps_per_second": 1.171, | |
| "eval_sys_len": 18379, | |
| "eval_totals_1": 18379, | |
| "eval_totals_2": 16175, | |
| "eval_totals_3": 13971, | |
| "eval_totals_4": 11767, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "learning_rate": 0.0001, | |
| "loss": 0.8389, | |
| "step": 2765 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_bleu": 17.67, | |
| "eval_bp": 0.8885, | |
| "eval_counts_1": 9476, | |
| "eval_counts_2": 4000, | |
| "eval_counts_3": 2092, | |
| "eval_counts_4": 1139, | |
| "eval_exact_match": 0.0299, | |
| "eval_f1": 0.4239, | |
| "eval_gen_len": 14.2064, | |
| "eval_loss": 1.4360300302505493, | |
| "eval_precisions_1": 49.8658, | |
| "eval_precisions_2": 23.8109, | |
| "eval_precisions_3": 14.3337, | |
| "eval_precisions_4": 9.1922, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4307, | |
| "eval_rouge2": 0.2406, | |
| "eval_rougeL": 0.4161, | |
| "eval_rougeLsum": 0.416, | |
| "eval_runtime": 480.4816, | |
| "eval_samples_per_second": 4.587, | |
| "eval_steps_per_second": 1.147, | |
| "eval_sys_len": 19003, | |
| "eval_totals_1": 19003, | |
| "eval_totals_2": 16799, | |
| "eval_totals_3": 14595, | |
| "eval_totals_4": 12391, | |
| "step": 2765 | |
| }, | |
| { | |
| "epoch": 19.92, | |
| "learning_rate": 0.0001, | |
| "loss": 0.7993, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 19.92, | |
| "eval_bleu": 17.5799, | |
| "eval_bp": 0.8747, | |
| "eval_counts_1": 9464, | |
| "eval_counts_2": 3970, | |
| "eval_counts_3": 2078, | |
| "eval_counts_4": 1126, | |
| "eval_exact_match": 0.0327, | |
| "eval_f1": 0.4269, | |
| "eval_gen_len": 13.9959, | |
| "eval_loss": 1.454466700553894, | |
| "eval_precisions_1": 50.4989, | |
| "eval_precisions_2": 24.0068, | |
| "eval_precisions_3": 14.498, | |
| "eval_precisions_4": 9.2835, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4349, | |
| "eval_rouge2": 0.2424, | |
| "eval_rougeL": 0.4194, | |
| "eval_rougeLsum": 0.4192, | |
| "eval_runtime": 476.8512, | |
| "eval_samples_per_second": 4.622, | |
| "eval_steps_per_second": 1.155, | |
| "eval_sys_len": 18741, | |
| "eval_totals_1": 18741, | |
| "eval_totals_2": 16537, | |
| "eval_totals_3": 14333, | |
| "eval_totals_4": 12129, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 19.92, | |
| "step": 2900, | |
| "total_flos": 4.449947965854843e+17, | |
| "train_loss": 1.5141178552035628, | |
| "train_runtime": 27637.7455, | |
| "train_samples_per_second": 6.74, | |
| "train_steps_per_second": 0.105 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 2900, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "total_flos": 4.449947965854843e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |