{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.914163090128756, "eval_steps": 500, "global_step": 2900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 0.0001, "loss": 3.1671, "step": 145 }, { "epoch": 1.0, "eval_bleu": 5.9441, "eval_bp": 0.7156, "eval_counts_1": 6177, "eval_counts_2": 1669, "eval_counts_3": 604, "eval_counts_4": 179, "eval_exact_match": 0.0023, "eval_f1": 0.2528, "eval_gen_len": 12.0218, "eval_loss": 2.190216541290283, "eval_precisions_1": 38.7954, "eval_precisions_2": 12.1665, "eval_precisions_3": 5.2458, "eval_precisions_4": 1.9227, "eval_ref_len": 21250, "eval_rouge1": 0.2595, "eval_rouge2": 0.1035, "eval_rougeL": 0.2491, "eval_rougeLsum": 0.2492, "eval_runtime": 793.0147, "eval_samples_per_second": 2.779, "eval_steps_per_second": 0.695, "eval_sys_len": 15922, "eval_totals_1": 15922, "eval_totals_2": 13718, "eval_totals_3": 11514, "eval_totals_4": 9310, "step": 145 }, { "epoch": 2.0, "learning_rate": 0.0001, "loss": 2.5597, "step": 291 }, { "epoch": 2.0, "eval_bleu": 7.7787, "eval_bp": 0.7556, "eval_counts_1": 6785, "eval_counts_2": 2044, "eval_counts_3": 804, "eval_counts_4": 293, "eval_exact_match": 0.0064, "eval_f1": 0.2864, "eval_gen_len": 12.6084, "eval_loss": 2.016404151916504, "eval_precisions_1": 40.876, "eval_precisions_2": 14.1994, "eval_precisions_3": 6.595, "eval_precisions_4": 2.9338, "eval_ref_len": 21250, "eval_rouge1": 0.2931, "eval_rouge2": 0.1291, "eval_rougeL": 0.2817, "eval_rougeLsum": 0.2818, "eval_runtime": 817.9822, "eval_samples_per_second": 2.694, "eval_steps_per_second": 0.674, "eval_sys_len": 16599, "eval_totals_1": 16599, "eval_totals_2": 14395, "eval_totals_3": 12191, "eval_totals_4": 9987, "step": 291 }, { "epoch": 2.99, "learning_rate": 0.0001, "loss": 2.3464, "step": 436 }, { "epoch": 2.99, "eval_bleu": 9.2407, "eval_bp": 0.7935, "eval_counts_1": 7251, "eval_counts_2": 2326, "eval_counts_3": 969, "eval_counts_4": 400, "eval_exact_match": 0.0073, "eval_f1": 0.3114, "eval_gen_len": 13.2296, "eval_loss": 1.9138075113296509, "eval_precisions_1": 42.0129, "eval_precisions_2": 15.45, "eval_precisions_3": 7.5403, "eval_precisions_4": 3.7569, "eval_ref_len": 21250, "eval_rouge1": 0.3162, "eval_rouge2": 0.1456, "eval_rougeL": 0.3031, "eval_rougeLsum": 0.3031, "eval_runtime": 765.0466, "eval_samples_per_second": 2.881, "eval_steps_per_second": 0.72, "eval_sys_len": 17259, "eval_totals_1": 17259, "eval_totals_2": 15055, "eval_totals_3": 12851, "eval_totals_4": 10647, "step": 436 }, { "epoch": 4.0, "learning_rate": 0.0001, "loss": 2.1679, "step": 582 }, { "epoch": 4.0, "eval_bleu": 9.6363, "eval_bp": 0.7795, "eval_counts_1": 7382, "eval_counts_2": 2393, "eval_counts_3": 1006, "eval_counts_4": 434, "eval_exact_match": 0.0109, "eval_f1": 0.3226, "eval_gen_len": 13.1207, "eval_loss": 1.8524010181427002, "eval_precisions_1": 43.3903, "eval_precisions_2": 16.1591, "eval_precisions_3": 7.981, "eval_precisions_4": 4.1727, "eval_ref_len": 21250, "eval_rouge1": 0.3272, "eval_rouge2": 0.1504, "eval_rougeL": 0.3147, "eval_rougeLsum": 0.3149, "eval_runtime": 882.4242, "eval_samples_per_second": 2.498, "eval_steps_per_second": 0.624, "eval_sys_len": 17013, "eval_totals_1": 17013, "eval_totals_2": 14809, "eval_totals_3": 12605, "eval_totals_4": 10401, "step": 582 }, { "epoch": 5.0, "learning_rate": 0.0001, "loss": 2.0454, "step": 728 }, { "epoch": 5.0, "eval_bleu": 10.3812, "eval_bp": 0.7665, "eval_counts_1": 7581, "eval_counts_2": 2555, "eval_counts_3": 1111, "eval_counts_4": 482, "eval_exact_match": 0.0132, "eval_f1": 0.3357, "eval_gen_len": 12.9782, "eval_loss": 1.7996737957000732, "eval_precisions_1": 45.1599, "eval_precisions_2": 17.5204, "eval_precisions_3": 8.9749, "eval_precisions_4": 4.7371, "eval_ref_len": 21250, "eval_rouge1": 0.3401, "eval_rouge2": 0.1606, "eval_rougeL": 0.3278, "eval_rougeLsum": 0.3279, "eval_runtime": 519.8377, "eval_samples_per_second": 4.24, "eval_steps_per_second": 1.06, "eval_sys_len": 16787, "eval_totals_1": 16787, "eval_totals_2": 14583, "eval_totals_3": 12379, "eval_totals_4": 10175, "step": 728 }, { "epoch": 5.99, "learning_rate": 0.0001, "loss": 1.9502, "step": 873 }, { "epoch": 5.99, "eval_bleu": 10.7668, "eval_bp": 0.7992, "eval_counts_1": 7759, "eval_counts_2": 2618, "eval_counts_3": 1162, "eval_counts_4": 511, "eval_exact_match": 0.0127, "eval_f1": 0.3406, "eval_gen_len": 13.4841, "eval_loss": 1.7696163654327393, "eval_precisions_1": 44.6973, "eval_precisions_2": 17.2748, "eval_precisions_3": 8.9723, "eval_precisions_4": 4.7548, "eval_ref_len": 21250, "eval_rouge1": 0.3452, "eval_rouge2": 0.1631, "eval_rougeL": 0.3321, "eval_rougeLsum": 0.3319, "eval_runtime": 542.6731, "eval_samples_per_second": 4.061, "eval_steps_per_second": 1.015, "eval_sys_len": 17359, "eval_totals_1": 17359, "eval_totals_2": 15155, "eval_totals_3": 12951, "eval_totals_4": 10747, "step": 873 }, { "epoch": 7.0, "learning_rate": 0.0001, "loss": 1.8414, "step": 1019 }, { "epoch": 7.0, "eval_bleu": 11.3408, "eval_bp": 0.7721, "eval_counts_1": 7791, "eval_counts_2": 2693, "eval_counts_3": 1236, "eval_counts_4": 570, "eval_exact_match": 0.015, "eval_f1": 0.347, "eval_gen_len": 13.0563, "eval_loss": 1.7471755743026733, "eval_precisions_1": 46.147, "eval_precisions_2": 18.3459, "eval_precisions_3": 9.9078, "eval_precisions_4": 5.5496, "eval_ref_len": 21250, "eval_rouge1": 0.3513, "eval_rouge2": 0.1679, "eval_rougeL": 0.3391, "eval_rougeLsum": 0.3391, "eval_runtime": 455.2485, "eval_samples_per_second": 4.841, "eval_steps_per_second": 1.21, "eval_sys_len": 16883, "eval_totals_1": 16883, "eval_totals_2": 14679, "eval_totals_3": 12475, "eval_totals_4": 10271, "step": 1019 }, { "epoch": 8.0, "learning_rate": 0.0001, "loss": 1.7614, "step": 1165 }, { "epoch": 8.0, "eval_bleu": 11.8447, "eval_bp": 0.8198, "eval_counts_1": 8024, "eval_counts_2": 2799, "eval_counts_3": 1296, "eval_counts_4": 610, "eval_exact_match": 0.0145, "eval_f1": 0.352, "eval_gen_len": 13.515, "eval_loss": 1.7203415632247925, "eval_precisions_1": 45.2643, "eval_precisions_2": 18.0313, "eval_precisions_3": 9.7305, "eval_precisions_4": 5.4881, "eval_ref_len": 21250, "eval_rouge1": 0.3565, "eval_rouge2": 0.1711, "eval_rougeL": 0.3422, "eval_rougeLsum": 0.3423, "eval_runtime": 457.6091, "eval_samples_per_second": 4.816, "eval_steps_per_second": 1.204, "eval_sys_len": 17727, "eval_totals_1": 17727, "eval_totals_2": 15523, "eval_totals_3": 13319, "eval_totals_4": 11115, "step": 1165 }, { "epoch": 9.0, "learning_rate": 0.0001, "loss": 1.6997, "step": 1310 }, { "epoch": 9.0, "eval_bleu": 11.9689, "eval_bp": 0.8027, "eval_counts_1": 8046, "eval_counts_2": 2835, "eval_counts_3": 1314, "eval_counts_4": 615, "eval_exact_match": 0.0168, "eval_f1": 0.3568, "eval_gen_len": 13.4306, "eval_loss": 1.7166661024093628, "eval_precisions_1": 46.183, "eval_precisions_2": 18.6293, "eval_precisions_3": 10.0968, "eval_precisions_4": 5.6892, "eval_ref_len": 21250, "eval_rouge1": 0.3613, "eval_rouge2": 0.1746, "eval_rougeL": 0.3466, "eval_rougeLsum": 0.3466, "eval_runtime": 543.9804, "eval_samples_per_second": 4.052, "eval_steps_per_second": 1.013, "eval_sys_len": 17422, "eval_totals_1": 17422, "eval_totals_2": 15218, "eval_totals_3": 13014, "eval_totals_4": 10810, "step": 1310 }, { "epoch": 10.0, "learning_rate": 0.0001, "loss": 1.6159, "step": 1456 }, { "epoch": 10.0, "eval_bleu": 12.5678, "eval_bp": 0.8182, "eval_counts_1": 8087, "eval_counts_2": 2928, "eval_counts_3": 1395, "eval_counts_4": 681, "eval_exact_match": 0.0181, "eval_f1": 0.3564, "eval_gen_len": 13.5268, "eval_loss": 1.689180612564087, "eval_precisions_1": 45.6944, "eval_precisions_2": 18.8976, "eval_precisions_3": 10.4966, "eval_precisions_4": 6.1429, "eval_ref_len": 21250, "eval_rouge1": 0.3612, "eval_rouge2": 0.1795, "eval_rougeL": 0.3485, "eval_rougeLsum": 0.3482, "eval_runtime": 661.754, "eval_samples_per_second": 3.331, "eval_steps_per_second": 0.833, "eval_sys_len": 17698, "eval_totals_1": 17698, "eval_totals_2": 15494, "eval_totals_3": 13290, "eval_totals_4": 11086, "step": 1456 }, { "epoch": 10.99, "learning_rate": 0.0001, "loss": 1.5681, "step": 1601 }, { "epoch": 10.99, "eval_bleu": 12.497, "eval_bp": 0.813, "eval_counts_1": 8154, "eval_counts_2": 2933, "eval_counts_3": 1383, "eval_counts_4": 664, "eval_exact_match": 0.0168, "eval_f1": 0.3605, "eval_gen_len": 13.6044, "eval_loss": 1.6923038959503174, "eval_precisions_1": 46.3164, "eval_precisions_2": 19.0442, "eval_precisions_3": 10.4797, "eval_precisions_4": 6.0402, "eval_ref_len": 21250, "eval_rouge1": 0.3654, "eval_rouge2": 0.1789, "eval_rougeL": 0.3506, "eval_rougeLsum": 0.3505, "eval_runtime": 528.2815, "eval_samples_per_second": 4.172, "eval_steps_per_second": 1.043, "eval_sys_len": 17605, "eval_totals_1": 17605, "eval_totals_2": 15401, "eval_totals_3": 13197, "eval_totals_4": 10993, "step": 1601 }, { "epoch": 12.0, "learning_rate": 0.0001, "loss": 1.4987, "step": 1747 }, { "epoch": 12.0, "eval_bleu": 12.8959, "eval_bp": 0.8169, "eval_counts_1": 8295, "eval_counts_2": 3011, "eval_counts_3": 1432, "eval_counts_4": 697, "eval_exact_match": 0.0181, "eval_f1": 0.3675, "eval_gen_len": 13.6134, "eval_loss": 1.6824951171875, "eval_precisions_1": 46.928, "eval_precisions_2": 19.461, "eval_precisions_3": 10.7929, "eval_precisions_4": 6.2997, "eval_ref_len": 21250, "eval_rouge1": 0.3734, "eval_rouge2": 0.1846, "eval_rougeL": 0.3576, "eval_rougeLsum": 0.3577, "eval_runtime": 636.4551, "eval_samples_per_second": 3.463, "eval_steps_per_second": 0.866, "eval_sys_len": 17676, "eval_totals_1": 17676, "eval_totals_2": 15472, "eval_totals_3": 13268, "eval_totals_4": 11064, "step": 1747 }, { "epoch": 13.0, "learning_rate": 0.0001, "loss": 1.4461, "step": 1893 }, { "epoch": 13.0, "eval_bleu": 12.8688, "eval_bp": 0.8139, "eval_counts_1": 8246, "eval_counts_2": 3005, "eval_counts_3": 1424, "eval_counts_4": 700, "eval_exact_match": 0.0191, "eval_f1": 0.3658, "eval_gen_len": 13.5812, "eval_loss": 1.6783509254455566, "eval_precisions_1": 46.7964, "eval_precisions_2": 19.4915, "eval_precisions_3": 10.7773, "eval_precisions_4": 6.3584, "eval_ref_len": 21250, "eval_rouge1": 0.3725, "eval_rouge2": 0.1857, "eval_rougeL": 0.358, "eval_rougeLsum": 0.3576, "eval_runtime": 521.7174, "eval_samples_per_second": 4.225, "eval_steps_per_second": 1.056, "eval_sys_len": 17621, "eval_totals_1": 17621, "eval_totals_2": 15417, "eval_totals_3": 13213, "eval_totals_4": 11009, "step": 1893 }, { "epoch": 13.99, "learning_rate": 0.0001, "loss": 1.4002, "step": 2038 }, { "epoch": 13.99, "eval_bleu": 13.4526, "eval_bp": 0.8329, "eval_counts_1": 8457, "eval_counts_2": 3130, "eval_counts_3": 1504, "eval_counts_4": 745, "eval_exact_match": 0.02, "eval_f1": 0.3727, "eval_gen_len": 13.9179, "eval_loss": 1.6725177764892578, "eval_precisions_1": 47.0749, "eval_precisions_2": 19.8591, "eval_precisions_3": 11.0939, "eval_precisions_4": 6.5621, "eval_ref_len": 21250, "eval_rouge1": 0.3797, "eval_rouge2": 0.1915, "eval_rougeL": 0.3637, "eval_rougeLsum": 0.3634, "eval_runtime": 592.5507, "eval_samples_per_second": 3.72, "eval_steps_per_second": 0.93, "eval_sys_len": 17965, "eval_totals_1": 17965, "eval_totals_2": 15761, "eval_totals_3": 13557, "eval_totals_4": 11353, "step": 2038 }, { "epoch": 15.0, "learning_rate": 0.0001, "loss": 1.3391, "step": 2184 }, { "epoch": 15.0, "eval_bleu": 13.211, "eval_bp": 0.8283, "eval_counts_1": 8443, "eval_counts_2": 3091, "eval_counts_3": 1468, "eval_counts_4": 719, "eval_exact_match": 0.0204, "eval_f1": 0.3737, "eval_gen_len": 13.9133, "eval_loss": 1.6783130168914795, "eval_precisions_1": 47.2177, "eval_precisions_2": 19.7168, "eval_precisions_3": 10.8959, "eval_precisions_4": 6.3803, "eval_ref_len": 21250, "eval_rouge1": 0.3804, "eval_rouge2": 0.1901, "eval_rougeL": 0.3634, "eval_rougeLsum": 0.363, "eval_runtime": 547.4964, "eval_samples_per_second": 4.026, "eval_steps_per_second": 1.006, "eval_sys_len": 17881, "eval_totals_1": 17881, "eval_totals_2": 15677, "eval_totals_3": 13473, "eval_totals_4": 11269, "step": 2184 }, { "epoch": 16.0, "learning_rate": 0.0001, "loss": 1.2921, "step": 2330 }, { "epoch": 16.0, "eval_bleu": 13.4907, "eval_bp": 0.8373, "eval_counts_1": 8457, "eval_counts_2": 3147, "eval_counts_3": 1511, "eval_counts_4": 747, "eval_exact_match": 0.0195, "eval_f1": 0.3716, "eval_gen_len": 13.9882, "eval_loss": 1.6737552881240845, "eval_precisions_1": 46.8662, "eval_precisions_2": 19.8662, "eval_precisions_3": 11.0801, "eval_precisions_4": 6.5337, "eval_ref_len": 21250, "eval_rouge1": 0.3782, "eval_rouge2": 0.1902, "eval_rougeL": 0.3624, "eval_rougeLsum": 0.3624, "eval_runtime": 652.072, "eval_samples_per_second": 3.38, "eval_steps_per_second": 0.845, "eval_sys_len": 18045, "eval_totals_1": 18045, "eval_totals_2": 15841, "eval_totals_3": 13637, "eval_totals_4": 11433, "step": 2330 }, { "epoch": 17.0, "learning_rate": 0.0001, "loss": 1.2572, "step": 2475 }, { "epoch": 17.0, "eval_bleu": 13.8581, "eval_bp": 0.8267, "eval_counts_1": 8473, "eval_counts_2": 3219, "eval_counts_3": 1561, "eval_counts_4": 783, "eval_exact_match": 0.02, "eval_f1": 0.3753, "eval_gen_len": 13.7618, "eval_loss": 1.676971435546875, "eval_precisions_1": 47.4598, "eval_precisions_2": 20.57, "eval_precisions_3": 11.6103, "eval_precisions_4": 6.9656, "eval_ref_len": 21250, "eval_rouge1": 0.3821, "eval_rouge2": 0.1948, "eval_rougeL": 0.3669, "eval_rougeLsum": 0.3665, "eval_runtime": 452.0799, "eval_samples_per_second": 4.875, "eval_steps_per_second": 1.219, "eval_sys_len": 17853, "eval_totals_1": 17853, "eval_totals_2": 15649, "eval_totals_3": 13445, "eval_totals_4": 11241, "step": 2475 }, { "epoch": 18.0, "learning_rate": 0.0001, "loss": 1.199, "step": 2621 }, { "epoch": 18.0, "eval_bleu": 13.7496, "eval_bp": 0.8326, "eval_counts_1": 8484, "eval_counts_2": 3190, "eval_counts_3": 1551, "eval_counts_4": 771, "eval_exact_match": 0.0186, "eval_f1": 0.3745, "eval_gen_len": 13.8798, "eval_loss": 1.6934301853179932, "eval_precisions_1": 47.2409, "eval_precisions_2": 20.2475, "eval_precisions_3": 11.4456, "eval_precisions_4": 6.7947, "eval_ref_len": 21250, "eval_rouge1": 0.3812, "eval_rouge2": 0.1922, "eval_rougeL": 0.3657, "eval_rougeLsum": 0.3658, "eval_runtime": 869.0302, "eval_samples_per_second": 2.536, "eval_steps_per_second": 0.634, "eval_sys_len": 17959, "eval_totals_1": 17959, "eval_totals_2": 15755, "eval_totals_3": 13551, "eval_totals_4": 11347, "step": 2621 }, { "epoch": 18.99, "learning_rate": 0.0001, "loss": 1.1668, "step": 2766 }, { "epoch": 18.99, "eval_bleu": 13.7379, "eval_bp": 0.8395, "eval_counts_1": 8504, "eval_counts_2": 3179, "eval_counts_3": 1541, "eval_counts_4": 776, "eval_exact_match": 0.0204, "eval_f1": 0.376, "eval_gen_len": 13.9256, "eval_loss": 1.6926020383834839, "eval_precisions_1": 47.0198, "eval_precisions_2": 20.0164, "eval_precisions_3": 11.2663, "eval_precisions_4": 6.7631, "eval_ref_len": 21250, "eval_rouge1": 0.3828, "eval_rouge2": 0.1939, "eval_rougeL": 0.3665, "eval_rougeLsum": 0.3665, "eval_runtime": 580.7372, "eval_samples_per_second": 3.795, "eval_steps_per_second": 0.949, "eval_sys_len": 18086, "eval_totals_1": 18086, "eval_totals_2": 15882, "eval_totals_3": 13678, "eval_totals_4": 11474, "step": 2766 }, { "epoch": 19.91, "learning_rate": 0.0001, "loss": 1.1164, "step": 2900 }, { "epoch": 19.91, "eval_bleu": 14.1906, "eval_bp": 0.8529, "eval_counts_1": 8625, "eval_counts_2": 3250, "eval_counts_3": 1609, "eval_counts_4": 820, "eval_exact_match": 0.0204, "eval_f1": 0.3803, "eval_gen_len": 14.069, "eval_loss": 1.7026218175888062, "eval_precisions_1": 47.0463, "eval_precisions_2": 20.15, "eval_precisions_3": 11.5548, "eval_precisions_4": 6.996, "eval_ref_len": 21250, "eval_rouge1": 0.3874, "eval_rouge2": 0.1964, "eval_rougeL": 0.3716, "eval_rougeLsum": 0.3715, "eval_runtime": 462.8982, "eval_samples_per_second": 4.761, "eval_steps_per_second": 1.19, "eval_sys_len": 18333, "eval_totals_1": 18333, "eval_totals_2": 16129, "eval_totals_3": 13925, "eval_totals_4": 11721, "step": 2900 }, { "epoch": 19.91, "step": 2900, "total_flos": 2.54036307345408e+17, "train_loss": 1.724, "train_runtime": 25476.0, "train_samples_per_second": 7.312, "train_steps_per_second": 0.114 } ], "logging_steps": 500, "max_steps": 2900, "num_train_epochs": 20, "save_steps": 500, "total_flos": 2.54036307345408e+17, "trial_name": null, "trial_params": null }