| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 19.914163090128756, | |
| "eval_steps": 500, | |
| "global_step": 2900, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.0001, | |
| "loss": 3.1671, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_bleu": 5.9441, | |
| "eval_bp": 0.7156, | |
| "eval_counts_1": 6177, | |
| "eval_counts_2": 1669, | |
| "eval_counts_3": 604, | |
| "eval_counts_4": 179, | |
| "eval_exact_match": 0.0023, | |
| "eval_f1": 0.2528, | |
| "eval_gen_len": 12.0218, | |
| "eval_loss": 2.190216541290283, | |
| "eval_precisions_1": 38.7954, | |
| "eval_precisions_2": 12.1665, | |
| "eval_precisions_3": 5.2458, | |
| "eval_precisions_4": 1.9227, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.2595, | |
| "eval_rouge2": 0.1035, | |
| "eval_rougeL": 0.2491, | |
| "eval_rougeLsum": 0.2492, | |
| "eval_runtime": 793.0147, | |
| "eval_samples_per_second": 2.779, | |
| "eval_steps_per_second": 0.695, | |
| "eval_sys_len": 15922, | |
| "eval_totals_1": 15922, | |
| "eval_totals_2": 13718, | |
| "eval_totals_3": 11514, | |
| "eval_totals_4": 9310, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5597, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_bleu": 7.7787, | |
| "eval_bp": 0.7556, | |
| "eval_counts_1": 6785, | |
| "eval_counts_2": 2044, | |
| "eval_counts_3": 804, | |
| "eval_counts_4": 293, | |
| "eval_exact_match": 0.0064, | |
| "eval_f1": 0.2864, | |
| "eval_gen_len": 12.6084, | |
| "eval_loss": 2.016404151916504, | |
| "eval_precisions_1": 40.876, | |
| "eval_precisions_2": 14.1994, | |
| "eval_precisions_3": 6.595, | |
| "eval_precisions_4": 2.9338, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.2931, | |
| "eval_rouge2": 0.1291, | |
| "eval_rougeL": 0.2817, | |
| "eval_rougeLsum": 0.2818, | |
| "eval_runtime": 817.9822, | |
| "eval_samples_per_second": 2.694, | |
| "eval_steps_per_second": 0.674, | |
| "eval_sys_len": 16599, | |
| "eval_totals_1": 16599, | |
| "eval_totals_2": 14395, | |
| "eval_totals_3": 12191, | |
| "eval_totals_4": 9987, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 0.0001, | |
| "loss": 2.3464, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "eval_bleu": 9.2407, | |
| "eval_bp": 0.7935, | |
| "eval_counts_1": 7251, | |
| "eval_counts_2": 2326, | |
| "eval_counts_3": 969, | |
| "eval_counts_4": 400, | |
| "eval_exact_match": 0.0073, | |
| "eval_f1": 0.3114, | |
| "eval_gen_len": 13.2296, | |
| "eval_loss": 1.9138075113296509, | |
| "eval_precisions_1": 42.0129, | |
| "eval_precisions_2": 15.45, | |
| "eval_precisions_3": 7.5403, | |
| "eval_precisions_4": 3.7569, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3162, | |
| "eval_rouge2": 0.1456, | |
| "eval_rougeL": 0.3031, | |
| "eval_rougeLsum": 0.3031, | |
| "eval_runtime": 765.0466, | |
| "eval_samples_per_second": 2.881, | |
| "eval_steps_per_second": 0.72, | |
| "eval_sys_len": 17259, | |
| "eval_totals_1": 17259, | |
| "eval_totals_2": 15055, | |
| "eval_totals_3": 12851, | |
| "eval_totals_4": 10647, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1679, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_bleu": 9.6363, | |
| "eval_bp": 0.7795, | |
| "eval_counts_1": 7382, | |
| "eval_counts_2": 2393, | |
| "eval_counts_3": 1006, | |
| "eval_counts_4": 434, | |
| "eval_exact_match": 0.0109, | |
| "eval_f1": 0.3226, | |
| "eval_gen_len": 13.1207, | |
| "eval_loss": 1.8524010181427002, | |
| "eval_precisions_1": 43.3903, | |
| "eval_precisions_2": 16.1591, | |
| "eval_precisions_3": 7.981, | |
| "eval_precisions_4": 4.1727, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3272, | |
| "eval_rouge2": 0.1504, | |
| "eval_rougeL": 0.3147, | |
| "eval_rougeLsum": 0.3149, | |
| "eval_runtime": 882.4242, | |
| "eval_samples_per_second": 2.498, | |
| "eval_steps_per_second": 0.624, | |
| "eval_sys_len": 17013, | |
| "eval_totals_1": 17013, | |
| "eval_totals_2": 14809, | |
| "eval_totals_3": 12605, | |
| "eval_totals_4": 10401, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0454, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_bleu": 10.3812, | |
| "eval_bp": 0.7665, | |
| "eval_counts_1": 7581, | |
| "eval_counts_2": 2555, | |
| "eval_counts_3": 1111, | |
| "eval_counts_4": 482, | |
| "eval_exact_match": 0.0132, | |
| "eval_f1": 0.3357, | |
| "eval_gen_len": 12.9782, | |
| "eval_loss": 1.7996737957000732, | |
| "eval_precisions_1": 45.1599, | |
| "eval_precisions_2": 17.5204, | |
| "eval_precisions_3": 8.9749, | |
| "eval_precisions_4": 4.7371, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3401, | |
| "eval_rouge2": 0.1606, | |
| "eval_rougeL": 0.3278, | |
| "eval_rougeLsum": 0.3279, | |
| "eval_runtime": 519.8377, | |
| "eval_samples_per_second": 4.24, | |
| "eval_steps_per_second": 1.06, | |
| "eval_sys_len": 16787, | |
| "eval_totals_1": 16787, | |
| "eval_totals_2": 14583, | |
| "eval_totals_3": 12379, | |
| "eval_totals_4": 10175, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9502, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "eval_bleu": 10.7668, | |
| "eval_bp": 0.7992, | |
| "eval_counts_1": 7759, | |
| "eval_counts_2": 2618, | |
| "eval_counts_3": 1162, | |
| "eval_counts_4": 511, | |
| "eval_exact_match": 0.0127, | |
| "eval_f1": 0.3406, | |
| "eval_gen_len": 13.4841, | |
| "eval_loss": 1.7696163654327393, | |
| "eval_precisions_1": 44.6973, | |
| "eval_precisions_2": 17.2748, | |
| "eval_precisions_3": 8.9723, | |
| "eval_precisions_4": 4.7548, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3452, | |
| "eval_rouge2": 0.1631, | |
| "eval_rougeL": 0.3321, | |
| "eval_rougeLsum": 0.3319, | |
| "eval_runtime": 542.6731, | |
| "eval_samples_per_second": 4.061, | |
| "eval_steps_per_second": 1.015, | |
| "eval_sys_len": 17359, | |
| "eval_totals_1": 17359, | |
| "eval_totals_2": 15155, | |
| "eval_totals_3": 12951, | |
| "eval_totals_4": 10747, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8414, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_bleu": 11.3408, | |
| "eval_bp": 0.7721, | |
| "eval_counts_1": 7791, | |
| "eval_counts_2": 2693, | |
| "eval_counts_3": 1236, | |
| "eval_counts_4": 570, | |
| "eval_exact_match": 0.015, | |
| "eval_f1": 0.347, | |
| "eval_gen_len": 13.0563, | |
| "eval_loss": 1.7471755743026733, | |
| "eval_precisions_1": 46.147, | |
| "eval_precisions_2": 18.3459, | |
| "eval_precisions_3": 9.9078, | |
| "eval_precisions_4": 5.5496, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3513, | |
| "eval_rouge2": 0.1679, | |
| "eval_rougeL": 0.3391, | |
| "eval_rougeLsum": 0.3391, | |
| "eval_runtime": 455.2485, | |
| "eval_samples_per_second": 4.841, | |
| "eval_steps_per_second": 1.21, | |
| "eval_sys_len": 16883, | |
| "eval_totals_1": 16883, | |
| "eval_totals_2": 14679, | |
| "eval_totals_3": 12475, | |
| "eval_totals_4": 10271, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 0.0001, | |
| "loss": 1.7614, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_bleu": 11.8447, | |
| "eval_bp": 0.8198, | |
| "eval_counts_1": 8024, | |
| "eval_counts_2": 2799, | |
| "eval_counts_3": 1296, | |
| "eval_counts_4": 610, | |
| "eval_exact_match": 0.0145, | |
| "eval_f1": 0.352, | |
| "eval_gen_len": 13.515, | |
| "eval_loss": 1.7203415632247925, | |
| "eval_precisions_1": 45.2643, | |
| "eval_precisions_2": 18.0313, | |
| "eval_precisions_3": 9.7305, | |
| "eval_precisions_4": 5.4881, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3565, | |
| "eval_rouge2": 0.1711, | |
| "eval_rougeL": 0.3422, | |
| "eval_rougeLsum": 0.3423, | |
| "eval_runtime": 457.6091, | |
| "eval_samples_per_second": 4.816, | |
| "eval_steps_per_second": 1.204, | |
| "eval_sys_len": 17727, | |
| "eval_totals_1": 17727, | |
| "eval_totals_2": 15523, | |
| "eval_totals_3": 13319, | |
| "eval_totals_4": 11115, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 0.0001, | |
| "loss": 1.6997, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_bleu": 11.9689, | |
| "eval_bp": 0.8027, | |
| "eval_counts_1": 8046, | |
| "eval_counts_2": 2835, | |
| "eval_counts_3": 1314, | |
| "eval_counts_4": 615, | |
| "eval_exact_match": 0.0168, | |
| "eval_f1": 0.3568, | |
| "eval_gen_len": 13.4306, | |
| "eval_loss": 1.7166661024093628, | |
| "eval_precisions_1": 46.183, | |
| "eval_precisions_2": 18.6293, | |
| "eval_precisions_3": 10.0968, | |
| "eval_precisions_4": 5.6892, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3613, | |
| "eval_rouge2": 0.1746, | |
| "eval_rougeL": 0.3466, | |
| "eval_rougeLsum": 0.3466, | |
| "eval_runtime": 543.9804, | |
| "eval_samples_per_second": 4.052, | |
| "eval_steps_per_second": 1.013, | |
| "eval_sys_len": 17422, | |
| "eval_totals_1": 17422, | |
| "eval_totals_2": 15218, | |
| "eval_totals_3": 13014, | |
| "eval_totals_4": 10810, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 0.0001, | |
| "loss": 1.6159, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_bleu": 12.5678, | |
| "eval_bp": 0.8182, | |
| "eval_counts_1": 8087, | |
| "eval_counts_2": 2928, | |
| "eval_counts_3": 1395, | |
| "eval_counts_4": 681, | |
| "eval_exact_match": 0.0181, | |
| "eval_f1": 0.3564, | |
| "eval_gen_len": 13.5268, | |
| "eval_loss": 1.689180612564087, | |
| "eval_precisions_1": 45.6944, | |
| "eval_precisions_2": 18.8976, | |
| "eval_precisions_3": 10.4966, | |
| "eval_precisions_4": 6.1429, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3612, | |
| "eval_rouge2": 0.1795, | |
| "eval_rougeL": 0.3485, | |
| "eval_rougeLsum": 0.3482, | |
| "eval_runtime": 661.754, | |
| "eval_samples_per_second": 3.331, | |
| "eval_steps_per_second": 0.833, | |
| "eval_sys_len": 17698, | |
| "eval_totals_1": 17698, | |
| "eval_totals_2": 15494, | |
| "eval_totals_3": 13290, | |
| "eval_totals_4": 11086, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 10.99, | |
| "learning_rate": 0.0001, | |
| "loss": 1.5681, | |
| "step": 1601 | |
| }, | |
| { | |
| "epoch": 10.99, | |
| "eval_bleu": 12.497, | |
| "eval_bp": 0.813, | |
| "eval_counts_1": 8154, | |
| "eval_counts_2": 2933, | |
| "eval_counts_3": 1383, | |
| "eval_counts_4": 664, | |
| "eval_exact_match": 0.0168, | |
| "eval_f1": 0.3605, | |
| "eval_gen_len": 13.6044, | |
| "eval_loss": 1.6923038959503174, | |
| "eval_precisions_1": 46.3164, | |
| "eval_precisions_2": 19.0442, | |
| "eval_precisions_3": 10.4797, | |
| "eval_precisions_4": 6.0402, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3654, | |
| "eval_rouge2": 0.1789, | |
| "eval_rougeL": 0.3506, | |
| "eval_rougeLsum": 0.3505, | |
| "eval_runtime": 528.2815, | |
| "eval_samples_per_second": 4.172, | |
| "eval_steps_per_second": 1.043, | |
| "eval_sys_len": 17605, | |
| "eval_totals_1": 17605, | |
| "eval_totals_2": 15401, | |
| "eval_totals_3": 13197, | |
| "eval_totals_4": 10993, | |
| "step": 1601 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "learning_rate": 0.0001, | |
| "loss": 1.4987, | |
| "step": 1747 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_bleu": 12.8959, | |
| "eval_bp": 0.8169, | |
| "eval_counts_1": 8295, | |
| "eval_counts_2": 3011, | |
| "eval_counts_3": 1432, | |
| "eval_counts_4": 697, | |
| "eval_exact_match": 0.0181, | |
| "eval_f1": 0.3675, | |
| "eval_gen_len": 13.6134, | |
| "eval_loss": 1.6824951171875, | |
| "eval_precisions_1": 46.928, | |
| "eval_precisions_2": 19.461, | |
| "eval_precisions_3": 10.7929, | |
| "eval_precisions_4": 6.2997, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3734, | |
| "eval_rouge2": 0.1846, | |
| "eval_rougeL": 0.3576, | |
| "eval_rougeLsum": 0.3577, | |
| "eval_runtime": 636.4551, | |
| "eval_samples_per_second": 3.463, | |
| "eval_steps_per_second": 0.866, | |
| "eval_sys_len": 17676, | |
| "eval_totals_1": 17676, | |
| "eval_totals_2": 15472, | |
| "eval_totals_3": 13268, | |
| "eval_totals_4": 11064, | |
| "step": 1747 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "learning_rate": 0.0001, | |
| "loss": 1.4461, | |
| "step": 1893 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_bleu": 12.8688, | |
| "eval_bp": 0.8139, | |
| "eval_counts_1": 8246, | |
| "eval_counts_2": 3005, | |
| "eval_counts_3": 1424, | |
| "eval_counts_4": 700, | |
| "eval_exact_match": 0.0191, | |
| "eval_f1": 0.3658, | |
| "eval_gen_len": 13.5812, | |
| "eval_loss": 1.6783509254455566, | |
| "eval_precisions_1": 46.7964, | |
| "eval_precisions_2": 19.4915, | |
| "eval_precisions_3": 10.7773, | |
| "eval_precisions_4": 6.3584, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3725, | |
| "eval_rouge2": 0.1857, | |
| "eval_rougeL": 0.358, | |
| "eval_rougeLsum": 0.3576, | |
| "eval_runtime": 521.7174, | |
| "eval_samples_per_second": 4.225, | |
| "eval_steps_per_second": 1.056, | |
| "eval_sys_len": 17621, | |
| "eval_totals_1": 17621, | |
| "eval_totals_2": 15417, | |
| "eval_totals_3": 13213, | |
| "eval_totals_4": 11009, | |
| "step": 1893 | |
| }, | |
| { | |
| "epoch": 13.99, | |
| "learning_rate": 0.0001, | |
| "loss": 1.4002, | |
| "step": 2038 | |
| }, | |
| { | |
| "epoch": 13.99, | |
| "eval_bleu": 13.4526, | |
| "eval_bp": 0.8329, | |
| "eval_counts_1": 8457, | |
| "eval_counts_2": 3130, | |
| "eval_counts_3": 1504, | |
| "eval_counts_4": 745, | |
| "eval_exact_match": 0.02, | |
| "eval_f1": 0.3727, | |
| "eval_gen_len": 13.9179, | |
| "eval_loss": 1.6725177764892578, | |
| "eval_precisions_1": 47.0749, | |
| "eval_precisions_2": 19.8591, | |
| "eval_precisions_3": 11.0939, | |
| "eval_precisions_4": 6.5621, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3797, | |
| "eval_rouge2": 0.1915, | |
| "eval_rougeL": 0.3637, | |
| "eval_rougeLsum": 0.3634, | |
| "eval_runtime": 592.5507, | |
| "eval_samples_per_second": 3.72, | |
| "eval_steps_per_second": 0.93, | |
| "eval_sys_len": 17965, | |
| "eval_totals_1": 17965, | |
| "eval_totals_2": 15761, | |
| "eval_totals_3": 13557, | |
| "eval_totals_4": 11353, | |
| "step": 2038 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "learning_rate": 0.0001, | |
| "loss": 1.3391, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_bleu": 13.211, | |
| "eval_bp": 0.8283, | |
| "eval_counts_1": 8443, | |
| "eval_counts_2": 3091, | |
| "eval_counts_3": 1468, | |
| "eval_counts_4": 719, | |
| "eval_exact_match": 0.0204, | |
| "eval_f1": 0.3737, | |
| "eval_gen_len": 13.9133, | |
| "eval_loss": 1.6783130168914795, | |
| "eval_precisions_1": 47.2177, | |
| "eval_precisions_2": 19.7168, | |
| "eval_precisions_3": 10.8959, | |
| "eval_precisions_4": 6.3803, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3804, | |
| "eval_rouge2": 0.1901, | |
| "eval_rougeL": 0.3634, | |
| "eval_rougeLsum": 0.363, | |
| "eval_runtime": 547.4964, | |
| "eval_samples_per_second": 4.026, | |
| "eval_steps_per_second": 1.006, | |
| "eval_sys_len": 17881, | |
| "eval_totals_1": 17881, | |
| "eval_totals_2": 15677, | |
| "eval_totals_3": 13473, | |
| "eval_totals_4": 11269, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "learning_rate": 0.0001, | |
| "loss": 1.2921, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_bleu": 13.4907, | |
| "eval_bp": 0.8373, | |
| "eval_counts_1": 8457, | |
| "eval_counts_2": 3147, | |
| "eval_counts_3": 1511, | |
| "eval_counts_4": 747, | |
| "eval_exact_match": 0.0195, | |
| "eval_f1": 0.3716, | |
| "eval_gen_len": 13.9882, | |
| "eval_loss": 1.6737552881240845, | |
| "eval_precisions_1": 46.8662, | |
| "eval_precisions_2": 19.8662, | |
| "eval_precisions_3": 11.0801, | |
| "eval_precisions_4": 6.5337, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3782, | |
| "eval_rouge2": 0.1902, | |
| "eval_rougeL": 0.3624, | |
| "eval_rougeLsum": 0.3624, | |
| "eval_runtime": 652.072, | |
| "eval_samples_per_second": 3.38, | |
| "eval_steps_per_second": 0.845, | |
| "eval_sys_len": 18045, | |
| "eval_totals_1": 18045, | |
| "eval_totals_2": 15841, | |
| "eval_totals_3": 13637, | |
| "eval_totals_4": 11433, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "learning_rate": 0.0001, | |
| "loss": 1.2572, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_bleu": 13.8581, | |
| "eval_bp": 0.8267, | |
| "eval_counts_1": 8473, | |
| "eval_counts_2": 3219, | |
| "eval_counts_3": 1561, | |
| "eval_counts_4": 783, | |
| "eval_exact_match": 0.02, | |
| "eval_f1": 0.3753, | |
| "eval_gen_len": 13.7618, | |
| "eval_loss": 1.676971435546875, | |
| "eval_precisions_1": 47.4598, | |
| "eval_precisions_2": 20.57, | |
| "eval_precisions_3": 11.6103, | |
| "eval_precisions_4": 6.9656, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3821, | |
| "eval_rouge2": 0.1948, | |
| "eval_rougeL": 0.3669, | |
| "eval_rougeLsum": 0.3665, | |
| "eval_runtime": 452.0799, | |
| "eval_samples_per_second": 4.875, | |
| "eval_steps_per_second": 1.219, | |
| "eval_sys_len": 17853, | |
| "eval_totals_1": 17853, | |
| "eval_totals_2": 15649, | |
| "eval_totals_3": 13445, | |
| "eval_totals_4": 11241, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "learning_rate": 0.0001, | |
| "loss": 1.199, | |
| "step": 2621 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_bleu": 13.7496, | |
| "eval_bp": 0.8326, | |
| "eval_counts_1": 8484, | |
| "eval_counts_2": 3190, | |
| "eval_counts_3": 1551, | |
| "eval_counts_4": 771, | |
| "eval_exact_match": 0.0186, | |
| "eval_f1": 0.3745, | |
| "eval_gen_len": 13.8798, | |
| "eval_loss": 1.6934301853179932, | |
| "eval_precisions_1": 47.2409, | |
| "eval_precisions_2": 20.2475, | |
| "eval_precisions_3": 11.4456, | |
| "eval_precisions_4": 6.7947, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3812, | |
| "eval_rouge2": 0.1922, | |
| "eval_rougeL": 0.3657, | |
| "eval_rougeLsum": 0.3658, | |
| "eval_runtime": 869.0302, | |
| "eval_samples_per_second": 2.536, | |
| "eval_steps_per_second": 0.634, | |
| "eval_sys_len": 17959, | |
| "eval_totals_1": 17959, | |
| "eval_totals_2": 15755, | |
| "eval_totals_3": 13551, | |
| "eval_totals_4": 11347, | |
| "step": 2621 | |
| }, | |
| { | |
| "epoch": 18.99, | |
| "learning_rate": 0.0001, | |
| "loss": 1.1668, | |
| "step": 2766 | |
| }, | |
| { | |
| "epoch": 18.99, | |
| "eval_bleu": 13.7379, | |
| "eval_bp": 0.8395, | |
| "eval_counts_1": 8504, | |
| "eval_counts_2": 3179, | |
| "eval_counts_3": 1541, | |
| "eval_counts_4": 776, | |
| "eval_exact_match": 0.0204, | |
| "eval_f1": 0.376, | |
| "eval_gen_len": 13.9256, | |
| "eval_loss": 1.6926020383834839, | |
| "eval_precisions_1": 47.0198, | |
| "eval_precisions_2": 20.0164, | |
| "eval_precisions_3": 11.2663, | |
| "eval_precisions_4": 6.7631, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3828, | |
| "eval_rouge2": 0.1939, | |
| "eval_rougeL": 0.3665, | |
| "eval_rougeLsum": 0.3665, | |
| "eval_runtime": 580.7372, | |
| "eval_samples_per_second": 3.795, | |
| "eval_steps_per_second": 0.949, | |
| "eval_sys_len": 18086, | |
| "eval_totals_1": 18086, | |
| "eval_totals_2": 15882, | |
| "eval_totals_3": 13678, | |
| "eval_totals_4": 11474, | |
| "step": 2766 | |
| }, | |
| { | |
| "epoch": 19.91, | |
| "learning_rate": 0.0001, | |
| "loss": 1.1164, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 19.91, | |
| "eval_bleu": 14.1906, | |
| "eval_bp": 0.8529, | |
| "eval_counts_1": 8625, | |
| "eval_counts_2": 3250, | |
| "eval_counts_3": 1609, | |
| "eval_counts_4": 820, | |
| "eval_exact_match": 0.0204, | |
| "eval_f1": 0.3803, | |
| "eval_gen_len": 14.069, | |
| "eval_loss": 1.7026218175888062, | |
| "eval_precisions_1": 47.0463, | |
| "eval_precisions_2": 20.15, | |
| "eval_precisions_3": 11.5548, | |
| "eval_precisions_4": 6.996, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3874, | |
| "eval_rouge2": 0.1964, | |
| "eval_rougeL": 0.3716, | |
| "eval_rougeLsum": 0.3715, | |
| "eval_runtime": 462.8982, | |
| "eval_samples_per_second": 4.761, | |
| "eval_steps_per_second": 1.19, | |
| "eval_sys_len": 18333, | |
| "eval_totals_1": 18333, | |
| "eval_totals_2": 16129, | |
| "eval_totals_3": 13925, | |
| "eval_totals_4": 11721, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 19.91, | |
| "step": 2900, | |
| "total_flos": 2.54036307345408e+17, | |
| "train_loss": 1.724, | |
| "train_runtime": 25476.0, | |
| "train_samples_per_second": 7.312, | |
| "train_steps_per_second": 0.114 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 2900, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "total_flos": 2.54036307345408e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |