| { | |
| "best_metric": 0.8713221549987793, | |
| "best_model_checkpoint": "flan_base_ck/checkpoint-44000", | |
| "epoch": 2.9809220985691574, | |
| "global_step": 45000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 9.779190955661545e-05, | |
| "loss": 1.2883, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_gen_len": 17.954155844155846, | |
| "eval_loss": 1.085593819618225, | |
| "eval_meteor": 48.7831, | |
| "eval_runtime": 411.5676, | |
| "eval_samples_per_second": 18.709, | |
| "eval_steps_per_second": 1.171, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 9.55838191132309e-05, | |
| "loss": 1.199, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_gen_len": 17.936233766233766, | |
| "eval_loss": 1.0542693138122559, | |
| "eval_meteor": 49.4398, | |
| "eval_runtime": 409.1897, | |
| "eval_samples_per_second": 18.818, | |
| "eval_steps_per_second": 1.178, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 9.337572866984632e-05, | |
| "loss": 1.1741, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_gen_len": 17.96727272727273, | |
| "eval_loss": 1.0262038707733154, | |
| "eval_meteor": 49.8428, | |
| "eval_runtime": 410.1577, | |
| "eval_samples_per_second": 18.773, | |
| "eval_steps_per_second": 1.175, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 9.55838191132309e-05, | |
| "loss": 1.0635, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_gen_len": 17.807662337662336, | |
| "eval_loss": 1.0363563299179077, | |
| "eval_meteor": 49.9292, | |
| "eval_runtime": 526.0268, | |
| "eval_samples_per_second": 14.638, | |
| "eval_steps_per_second": 1.831, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 9.447977389153861e-05, | |
| "loss": 1.0671, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_gen_len": 18.005974025974027, | |
| "eval_loss": 1.0267610549926758, | |
| "eval_meteor": 50.7066, | |
| "eval_runtime": 523.464, | |
| "eval_samples_per_second": 14.71, | |
| "eval_steps_per_second": 1.84, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 9.337572866984632e-05, | |
| "loss": 1.0609, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_gen_len": 17.876103896103896, | |
| "eval_loss": 1.0178078413009644, | |
| "eval_meteor": 50.2756, | |
| "eval_runtime": 522.3556, | |
| "eval_samples_per_second": 14.741, | |
| "eval_steps_per_second": 1.844, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 9.227168344815404e-05, | |
| "loss": 1.1359, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_gen_len": 17.943376623376622, | |
| "eval_loss": 1.0045710802078247, | |
| "eval_meteor": 50.6234, | |
| "eval_runtime": 524.2578, | |
| "eval_samples_per_second": 14.687, | |
| "eval_steps_per_second": 1.837, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 9.116763822646175e-05, | |
| "loss": 1.1026, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_gen_len": 17.898441558441558, | |
| "eval_loss": 0.9982025623321533, | |
| "eval_meteor": 50.4994, | |
| "eval_runtime": 522.8838, | |
| "eval_samples_per_second": 14.726, | |
| "eval_steps_per_second": 1.842, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 9.006359300476947e-05, | |
| "loss": 1.0998, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_gen_len": 17.898441558441558, | |
| "eval_loss": 0.9884146451950073, | |
| "eval_meteor": 50.8928, | |
| "eval_runtime": 534.9829, | |
| "eval_samples_per_second": 14.393, | |
| "eval_steps_per_second": 1.8, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 8.89595477830772e-05, | |
| "loss": 1.0863, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_gen_len": 17.928, | |
| "eval_loss": 0.9836859703063965, | |
| "eval_meteor": 51.429, | |
| "eval_runtime": 68.5042, | |
| "eval_samples_per_second": 14.598, | |
| "eval_steps_per_second": 1.825, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 8.785550256138491e-05, | |
| "loss": 1.0842, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_gen_len": 17.958, | |
| "eval_loss": 0.9726663827896118, | |
| "eval_meteor": 51.1218, | |
| "eval_runtime": 67.2576, | |
| "eval_samples_per_second": 14.868, | |
| "eval_steps_per_second": 1.859, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 8.675145733969264e-05, | |
| "loss": 1.0805, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_gen_len": 17.893, | |
| "eval_loss": 0.9778503775596619, | |
| "eval_meteor": 51.1678, | |
| "eval_runtime": 67.4631, | |
| "eval_samples_per_second": 14.823, | |
| "eval_steps_per_second": 1.853, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 8.564741211800036e-05, | |
| "loss": 1.0835, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_gen_len": 17.992, | |
| "eval_loss": 0.9666480422019958, | |
| "eval_meteor": 51.247, | |
| "eval_runtime": 68.9408, | |
| "eval_samples_per_second": 14.505, | |
| "eval_steps_per_second": 1.813, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 8.454336689630807e-05, | |
| "loss": 1.0893, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_gen_len": 17.948, | |
| "eval_loss": 0.9660341739654541, | |
| "eval_meteor": 51.4993, | |
| "eval_runtime": 67.538, | |
| "eval_samples_per_second": 14.806, | |
| "eval_steps_per_second": 1.851, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 8.34393216746158e-05, | |
| "loss": 1.0556, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_gen_len": 17.96, | |
| "eval_loss": 0.9647287726402283, | |
| "eval_meteor": 51.504, | |
| "eval_runtime": 67.4324, | |
| "eval_samples_per_second": 14.83, | |
| "eval_steps_per_second": 1.854, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 8.233527645292352e-05, | |
| "loss": 1.0616, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_gen_len": 18.03, | |
| "eval_loss": 0.9583545923233032, | |
| "eval_meteor": 51.345, | |
| "eval_runtime": 66.9475, | |
| "eval_samples_per_second": 14.937, | |
| "eval_steps_per_second": 1.867, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 8.123123123123123e-05, | |
| "loss": 1.0689, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_gen_len": 18.027, | |
| "eval_loss": 0.9531763792037964, | |
| "eval_meteor": 51.3761, | |
| "eval_runtime": 67.295, | |
| "eval_samples_per_second": 14.86, | |
| "eval_steps_per_second": 1.857, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 8.012718600953896e-05, | |
| "loss": 1.0507, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_gen_len": 17.998, | |
| "eval_loss": 0.9463646411895752, | |
| "eval_meteor": 51.6442, | |
| "eval_runtime": 67.7294, | |
| "eval_samples_per_second": 14.765, | |
| "eval_steps_per_second": 1.846, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 7.902314078784668e-05, | |
| "loss": 1.0422, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_gen_len": 17.998, | |
| "eval_loss": 0.9440800547599792, | |
| "eval_meteor": 51.7458, | |
| "eval_runtime": 67.0105, | |
| "eval_samples_per_second": 14.923, | |
| "eval_steps_per_second": 1.865, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 7.791909556615439e-05, | |
| "loss": 1.0539, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_gen_len": 17.989, | |
| "eval_loss": 0.9368101954460144, | |
| "eval_meteor": 51.7704, | |
| "eval_runtime": 68.5114, | |
| "eval_samples_per_second": 14.596, | |
| "eval_steps_per_second": 1.825, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 7.681505034446211e-05, | |
| "loss": 1.0395, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_gen_len": 18.021, | |
| "eval_loss": 0.939913809299469, | |
| "eval_meteor": 51.9542, | |
| "eval_runtime": 67.0531, | |
| "eval_samples_per_second": 14.914, | |
| "eval_steps_per_second": 1.864, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 7.571100512276982e-05, | |
| "loss": 1.0512, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_gen_len": 17.964, | |
| "eval_loss": 0.9322188496589661, | |
| "eval_meteor": 51.7557, | |
| "eval_runtime": 68.247, | |
| "eval_samples_per_second": 14.653, | |
| "eval_steps_per_second": 1.832, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 7.460695990107755e-05, | |
| "loss": 1.0355, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_gen_len": 17.904, | |
| "eval_loss": 0.9345097541809082, | |
| "eval_meteor": 51.9772, | |
| "eval_runtime": 68.722, | |
| "eval_samples_per_second": 14.551, | |
| "eval_steps_per_second": 1.819, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 7.350291467938527e-05, | |
| "loss": 1.0503, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_gen_len": 18.087, | |
| "eval_loss": 0.9308701157569885, | |
| "eval_meteor": 51.7568, | |
| "eval_runtime": 68.4746, | |
| "eval_samples_per_second": 14.604, | |
| "eval_steps_per_second": 1.825, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 7.239886945769298e-05, | |
| "loss": 1.0118, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_gen_len": 17.952, | |
| "eval_loss": 0.9287739396095276, | |
| "eval_meteor": 52.0746, | |
| "eval_runtime": 67.9075, | |
| "eval_samples_per_second": 14.726, | |
| "eval_steps_per_second": 1.841, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 7.129482423600071e-05, | |
| "loss": 1.02, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_gen_len": 18.035, | |
| "eval_loss": 0.9278337359428406, | |
| "eval_meteor": 52.1522, | |
| "eval_runtime": 68.3733, | |
| "eval_samples_per_second": 14.626, | |
| "eval_steps_per_second": 1.828, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 7.019077901430843e-05, | |
| "loss": 1.0383, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_gen_len": 18.059, | |
| "eval_loss": 0.9207689166069031, | |
| "eval_meteor": 51.7911, | |
| "eval_runtime": 67.1668, | |
| "eval_samples_per_second": 14.888, | |
| "eval_steps_per_second": 1.861, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 6.908673379261616e-05, | |
| "loss": 1.0112, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_gen_len": 17.973, | |
| "eval_loss": 0.9229653477668762, | |
| "eval_meteor": 51.6625, | |
| "eval_runtime": 68.3229, | |
| "eval_samples_per_second": 14.636, | |
| "eval_steps_per_second": 1.83, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 6.798268857092387e-05, | |
| "loss": 1.0077, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_gen_len": 18.037, | |
| "eval_loss": 0.9167578220367432, | |
| "eval_meteor": 52.8499, | |
| "eval_runtime": 70.9136, | |
| "eval_samples_per_second": 14.102, | |
| "eval_steps_per_second": 1.763, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 6.687864334923159e-05, | |
| "loss": 1.0065, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_gen_len": 18.005, | |
| "eval_loss": 0.9148094654083252, | |
| "eval_meteor": 52.443, | |
| "eval_runtime": 68.1553, | |
| "eval_samples_per_second": 14.672, | |
| "eval_steps_per_second": 1.834, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 6.577459812753932e-05, | |
| "loss": 0.9435, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "eval_gen_len": 18.105, | |
| "eval_loss": 0.920024037361145, | |
| "eval_meteor": 51.8196, | |
| "eval_runtime": 68.3233, | |
| "eval_samples_per_second": 14.636, | |
| "eval_steps_per_second": 1.83, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 6.467055290584703e-05, | |
| "loss": 0.9475, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "eval_gen_len": 17.98, | |
| "eval_loss": 0.9110422134399414, | |
| "eval_meteor": 52.1818, | |
| "eval_runtime": 67.6998, | |
| "eval_samples_per_second": 14.771, | |
| "eval_steps_per_second": 1.846, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 6.356650768415475e-05, | |
| "loss": 0.9198, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "eval_gen_len": 18.008, | |
| "eval_loss": 0.9214870929718018, | |
| "eval_meteor": 52.1586, | |
| "eval_runtime": 67.5624, | |
| "eval_samples_per_second": 14.801, | |
| "eval_steps_per_second": 1.85, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 6.246246246246246e-05, | |
| "loss": 0.9186, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "eval_gen_len": 18.024, | |
| "eval_loss": 0.9111480712890625, | |
| "eval_meteor": 52.4627, | |
| "eval_runtime": 68.9783, | |
| "eval_samples_per_second": 14.497, | |
| "eval_steps_per_second": 1.812, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 6.135841724077018e-05, | |
| "loss": 0.9232, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "eval_gen_len": 17.962, | |
| "eval_loss": 0.9073123931884766, | |
| "eval_meteor": 52.4152, | |
| "eval_runtime": 69.4815, | |
| "eval_samples_per_second": 14.392, | |
| "eval_steps_per_second": 1.799, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 6.0254372019077906e-05, | |
| "loss": 0.9324, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "eval_gen_len": 17.941, | |
| "eval_loss": 0.9070972800254822, | |
| "eval_meteor": 52.7035, | |
| "eval_runtime": 70.7686, | |
| "eval_samples_per_second": 14.131, | |
| "eval_steps_per_second": 1.766, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 5.915032679738562e-05, | |
| "loss": 0.9474, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "eval_gen_len": 17.987, | |
| "eval_loss": 0.9077590107917786, | |
| "eval_meteor": 51.8842, | |
| "eval_runtime": 69.8986, | |
| "eval_samples_per_second": 14.306, | |
| "eval_steps_per_second": 1.788, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 5.804628157569334e-05, | |
| "loss": 0.93, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "eval_gen_len": 17.975, | |
| "eval_loss": 0.9080641865730286, | |
| "eval_meteor": 52.4181, | |
| "eval_runtime": 69.5758, | |
| "eval_samples_per_second": 14.373, | |
| "eval_steps_per_second": 1.797, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 5.6942236354001066e-05, | |
| "loss": 0.9269, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "eval_gen_len": 18.014, | |
| "eval_loss": 0.9048936367034912, | |
| "eval_meteor": 52.6893, | |
| "eval_runtime": 70.2646, | |
| "eval_samples_per_second": 14.232, | |
| "eval_steps_per_second": 1.779, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 5.583819113230878e-05, | |
| "loss": 0.9137, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "eval_gen_len": 17.947, | |
| "eval_loss": 0.9061869382858276, | |
| "eval_meteor": 52.5416, | |
| "eval_runtime": 69.637, | |
| "eval_samples_per_second": 14.36, | |
| "eval_steps_per_second": 1.795, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 5.47341459106165e-05, | |
| "loss": 0.9305, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "eval_gen_len": 18.031, | |
| "eval_loss": 0.9059242010116577, | |
| "eval_meteor": 52.5436, | |
| "eval_runtime": 69.8898, | |
| "eval_samples_per_second": 14.308, | |
| "eval_steps_per_second": 1.789, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 5.3630100688924226e-05, | |
| "loss": 0.9203, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "eval_gen_len": 17.978, | |
| "eval_loss": 0.9046989679336548, | |
| "eval_meteor": 52.6743, | |
| "eval_runtime": 70.1569, | |
| "eval_samples_per_second": 14.254, | |
| "eval_steps_per_second": 1.782, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 5.252605546723194e-05, | |
| "loss": 0.927, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "eval_gen_len": 18.068, | |
| "eval_loss": 0.9016240239143372, | |
| "eval_meteor": 52.7934, | |
| "eval_runtime": 68.7827, | |
| "eval_samples_per_second": 14.539, | |
| "eval_steps_per_second": 1.817, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 5.142201024553966e-05, | |
| "loss": 0.9247, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "eval_gen_len": 18.021, | |
| "eval_loss": 0.895437479019165, | |
| "eval_meteor": 52.4454, | |
| "eval_runtime": 70.0047, | |
| "eval_samples_per_second": 14.285, | |
| "eval_steps_per_second": 1.786, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 5.031796502384738e-05, | |
| "loss": 0.9209, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "eval_gen_len": 17.989, | |
| "eval_loss": 0.8976706862449646, | |
| "eval_meteor": 52.488, | |
| "eval_runtime": 69.4109, | |
| "eval_samples_per_second": 14.407, | |
| "eval_steps_per_second": 1.801, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 4.92139198021551e-05, | |
| "loss": 0.9137, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "eval_gen_len": 17.984, | |
| "eval_loss": 0.899140477180481, | |
| "eval_meteor": 52.5371, | |
| "eval_runtime": 70.1117, | |
| "eval_samples_per_second": 14.263, | |
| "eval_steps_per_second": 1.783, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 4.8109874580462816e-05, | |
| "loss": 0.9235, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "eval_gen_len": 18.034, | |
| "eval_loss": 0.8912692666053772, | |
| "eval_meteor": 52.972, | |
| "eval_runtime": 70.4761, | |
| "eval_samples_per_second": 14.189, | |
| "eval_steps_per_second": 1.774, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 4.700582935877054e-05, | |
| "loss": 0.9209, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "eval_gen_len": 18.008, | |
| "eval_loss": 0.8921295404434204, | |
| "eval_meteor": 53.0034, | |
| "eval_runtime": 70.2359, | |
| "eval_samples_per_second": 14.238, | |
| "eval_steps_per_second": 1.78, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 4.590178413707826e-05, | |
| "loss": 0.9278, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "eval_gen_len": 18.022, | |
| "eval_loss": 0.8956578373908997, | |
| "eval_meteor": 52.6384, | |
| "eval_runtime": 70.3862, | |
| "eval_samples_per_second": 14.207, | |
| "eval_steps_per_second": 1.776, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 4.4797738915385975e-05, | |
| "loss": 0.9233, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "eval_gen_len": 17.973, | |
| "eval_loss": 0.8917882442474365, | |
| "eval_meteor": 53.2839, | |
| "eval_runtime": 70.6559, | |
| "eval_samples_per_second": 14.153, | |
| "eval_steps_per_second": 1.769, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 4.369369369369369e-05, | |
| "loss": 0.926, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "eval_gen_len": 18.06, | |
| "eval_loss": 0.8892934322357178, | |
| "eval_meteor": 52.9841, | |
| "eval_runtime": 70.5774, | |
| "eval_samples_per_second": 14.169, | |
| "eval_steps_per_second": 1.771, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 4.258964847200141e-05, | |
| "loss": 0.9126, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "eval_gen_len": 18.105, | |
| "eval_loss": 0.8900778889656067, | |
| "eval_meteor": 53.6622, | |
| "eval_runtime": 70.1938, | |
| "eval_samples_per_second": 14.246, | |
| "eval_steps_per_second": 1.781, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 4.1485603250309135e-05, | |
| "loss": 0.9089, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "eval_gen_len": 18.106, | |
| "eval_loss": 0.8875311017036438, | |
| "eval_meteor": 52.987, | |
| "eval_runtime": 70.0052, | |
| "eval_samples_per_second": 14.285, | |
| "eval_steps_per_second": 1.786, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 4.038155802861686e-05, | |
| "loss": 0.9146, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "eval_gen_len": 18.073, | |
| "eval_loss": 0.886923611164093, | |
| "eval_meteor": 53.4227, | |
| "eval_runtime": 69.955, | |
| "eval_samples_per_second": 14.295, | |
| "eval_steps_per_second": 1.787, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 3.927751280692457e-05, | |
| "loss": 0.9028, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "eval_gen_len": 18.004, | |
| "eval_loss": 0.8831958174705505, | |
| "eval_meteor": 53.1025, | |
| "eval_runtime": 69.8967, | |
| "eval_samples_per_second": 14.307, | |
| "eval_steps_per_second": 1.788, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 3.8173467585232295e-05, | |
| "loss": 0.9211, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "eval_gen_len": 18.0, | |
| "eval_loss": 0.8846908807754517, | |
| "eval_meteor": 53.0378, | |
| "eval_runtime": 69.7539, | |
| "eval_samples_per_second": 14.336, | |
| "eval_steps_per_second": 1.792, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 3.706942236354001e-05, | |
| "loss": 0.9031, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "eval_gen_len": 18.076, | |
| "eval_loss": 0.8841462135314941, | |
| "eval_meteor": 52.8109, | |
| "eval_runtime": 69.4673, | |
| "eval_samples_per_second": 14.395, | |
| "eval_steps_per_second": 1.799, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 3.596537714184773e-05, | |
| "loss": 0.908, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "eval_gen_len": 17.95, | |
| "eval_loss": 0.881539523601532, | |
| "eval_meteor": 53.4599, | |
| "eval_runtime": 70.9294, | |
| "eval_samples_per_second": 14.099, | |
| "eval_steps_per_second": 1.762, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 3.486133192015545e-05, | |
| "loss": 0.9136, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "eval_gen_len": 18.011, | |
| "eval_loss": 0.8800588250160217, | |
| "eval_meteor": 53.4037, | |
| "eval_runtime": 70.4901, | |
| "eval_samples_per_second": 14.186, | |
| "eval_steps_per_second": 1.773, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 3.375728669846317e-05, | |
| "loss": 0.9177, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "eval_gen_len": 18.066, | |
| "eval_loss": 0.8781697750091553, | |
| "eval_meteor": 53.3926, | |
| "eval_runtime": 70.1677, | |
| "eval_samples_per_second": 14.252, | |
| "eval_steps_per_second": 1.781, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 3.265324147677089e-05, | |
| "loss": 0.8913, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "eval_gen_len": 18.054, | |
| "eval_loss": 0.8868271708488464, | |
| "eval_meteor": 53.3419, | |
| "eval_runtime": 70.5548, | |
| "eval_samples_per_second": 14.173, | |
| "eval_steps_per_second": 1.772, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 3.1549196255078614e-05, | |
| "loss": 0.8568, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "eval_gen_len": 18.014, | |
| "eval_loss": 0.888134241104126, | |
| "eval_meteor": 53.4476, | |
| "eval_runtime": 70.8184, | |
| "eval_samples_per_second": 14.121, | |
| "eval_steps_per_second": 1.765, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 3.0445151033386326e-05, | |
| "loss": 0.839, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "eval_gen_len": 17.947, | |
| "eval_loss": 0.8903856873512268, | |
| "eval_meteor": 53.6926, | |
| "eval_runtime": 70.639, | |
| "eval_samples_per_second": 14.156, | |
| "eval_steps_per_second": 1.77, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 2.9341105811694048e-05, | |
| "loss": 0.8543, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "eval_gen_len": 18.009, | |
| "eval_loss": 0.883150577545166, | |
| "eval_meteor": 53.522, | |
| "eval_runtime": 70.8477, | |
| "eval_samples_per_second": 14.115, | |
| "eval_steps_per_second": 1.764, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 2.8237060590001767e-05, | |
| "loss": 0.8482, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "eval_gen_len": 18.037, | |
| "eval_loss": 0.8839625120162964, | |
| "eval_meteor": 53.0315, | |
| "eval_runtime": 70.3219, | |
| "eval_samples_per_second": 14.22, | |
| "eval_steps_per_second": 1.778, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 2.713301536830949e-05, | |
| "loss": 0.8446, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "eval_gen_len": 18.017, | |
| "eval_loss": 0.8828043341636658, | |
| "eval_meteor": 53.1681, | |
| "eval_runtime": 70.6648, | |
| "eval_samples_per_second": 14.151, | |
| "eval_steps_per_second": 1.769, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 2.6028970146617204e-05, | |
| "loss": 0.8528, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "eval_gen_len": 17.963, | |
| "eval_loss": 0.8883506059646606, | |
| "eval_meteor": 52.6487, | |
| "eval_runtime": 70.7584, | |
| "eval_samples_per_second": 14.133, | |
| "eval_steps_per_second": 1.767, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 2.4924924924924926e-05, | |
| "loss": 0.8693, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "eval_gen_len": 17.973, | |
| "eval_loss": 0.8843633532524109, | |
| "eval_meteor": 52.8738, | |
| "eval_runtime": 70.1007, | |
| "eval_samples_per_second": 14.265, | |
| "eval_steps_per_second": 1.783, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 2.3820879703232645e-05, | |
| "loss": 0.8351, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "eval_gen_len": 18.004, | |
| "eval_loss": 0.881372332572937, | |
| "eval_meteor": 52.9678, | |
| "eval_runtime": 70.705, | |
| "eval_samples_per_second": 14.143, | |
| "eval_steps_per_second": 1.768, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 2.2716834481540364e-05, | |
| "loss": 0.8466, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "eval_gen_len": 18.04, | |
| "eval_loss": 0.8801607489585876, | |
| "eval_meteor": 52.9869, | |
| "eval_runtime": 70.5358, | |
| "eval_samples_per_second": 14.177, | |
| "eval_steps_per_second": 1.772, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 2.1612789259848086e-05, | |
| "loss": 0.8534, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "eval_gen_len": 18.038, | |
| "eval_loss": 0.8797491192817688, | |
| "eval_meteor": 53.1901, | |
| "eval_runtime": 70.786, | |
| "eval_samples_per_second": 14.127, | |
| "eval_steps_per_second": 1.766, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 2.05087440381558e-05, | |
| "loss": 0.8375, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "eval_gen_len": 18.018, | |
| "eval_loss": 0.8791442513465881, | |
| "eval_meteor": 53.3976, | |
| "eval_runtime": 70.571, | |
| "eval_samples_per_second": 14.17, | |
| "eval_steps_per_second": 1.771, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 1.9404698816463523e-05, | |
| "loss": 0.8513, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "eval_gen_len": 18.056, | |
| "eval_loss": 0.878569483757019, | |
| "eval_meteor": 53.3592, | |
| "eval_runtime": 70.24, | |
| "eval_samples_per_second": 14.237, | |
| "eval_steps_per_second": 1.78, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 1.8300653594771242e-05, | |
| "loss": 0.8476, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "eval_gen_len": 18.022, | |
| "eval_loss": 0.8775736093521118, | |
| "eval_meteor": 53.5136, | |
| "eval_runtime": 70.3201, | |
| "eval_samples_per_second": 14.221, | |
| "eval_steps_per_second": 1.778, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 1.719660837307896e-05, | |
| "loss": 0.8491, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "eval_gen_len": 18.051, | |
| "eval_loss": 0.8797674775123596, | |
| "eval_meteor": 53.4259, | |
| "eval_runtime": 70.4108, | |
| "eval_samples_per_second": 14.202, | |
| "eval_steps_per_second": 1.775, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 1.609256315138668e-05, | |
| "loss": 0.8308, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "eval_gen_len": 18.032, | |
| "eval_loss": 0.8773834109306335, | |
| "eval_meteor": 53.6228, | |
| "eval_runtime": 70.3135, | |
| "eval_samples_per_second": 14.222, | |
| "eval_steps_per_second": 1.778, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 1.4988517929694402e-05, | |
| "loss": 0.8372, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "eval_gen_len": 18.048, | |
| "eval_loss": 0.8753427863121033, | |
| "eval_meteor": 53.4677, | |
| "eval_runtime": 70.4635, | |
| "eval_samples_per_second": 14.192, | |
| "eval_steps_per_second": 1.774, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 1.3884472708002119e-05, | |
| "loss": 0.8585, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "eval_gen_len": 18.041, | |
| "eval_loss": 0.8747315406799316, | |
| "eval_meteor": 53.2041, | |
| "eval_runtime": 70.6628, | |
| "eval_samples_per_second": 14.152, | |
| "eval_steps_per_second": 1.769, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 1.278042748630984e-05, | |
| "loss": 0.8476, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "eval_gen_len": 18.017, | |
| "eval_loss": 0.8767881989479065, | |
| "eval_meteor": 53.4269, | |
| "eval_runtime": 70.2278, | |
| "eval_samples_per_second": 14.239, | |
| "eval_steps_per_second": 1.78, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 1.167638226461756e-05, | |
| "loss": 0.8357, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "eval_gen_len": 18.051, | |
| "eval_loss": 0.8746951222419739, | |
| "eval_meteor": 53.2368, | |
| "eval_runtime": 69.6603, | |
| "eval_samples_per_second": 14.355, | |
| "eval_steps_per_second": 1.794, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 1.0572337042925278e-05, | |
| "loss": 0.8474, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "eval_gen_len": 18.052, | |
| "eval_loss": 0.8737708330154419, | |
| "eval_meteor": 53.4196, | |
| "eval_runtime": 104.6472, | |
| "eval_samples_per_second": 9.556, | |
| "eval_steps_per_second": 1.194, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 9.468291821232999e-06, | |
| "loss": 0.8489, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "eval_gen_len": 18.048, | |
| "eval_loss": 0.8735561966896057, | |
| "eval_meteor": 53.7004, | |
| "eval_runtime": 70.2575, | |
| "eval_samples_per_second": 14.233, | |
| "eval_steps_per_second": 1.779, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 8.364246599540718e-06, | |
| "loss": 0.8394, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "eval_gen_len": 18.027, | |
| "eval_loss": 0.8742736577987671, | |
| "eval_meteor": 53.5816, | |
| "eval_runtime": 70.1416, | |
| "eval_samples_per_second": 14.257, | |
| "eval_steps_per_second": 1.782, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 7.260201377848437e-06, | |
| "loss": 0.849, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "eval_gen_len": 18.043, | |
| "eval_loss": 0.8737511038780212, | |
| "eval_meteor": 53.6027, | |
| "eval_runtime": 69.8283, | |
| "eval_samples_per_second": 14.321, | |
| "eval_steps_per_second": 1.79, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 6.156156156156157e-06, | |
| "loss": 0.8591, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "eval_gen_len": 18.019, | |
| "eval_loss": 0.8744534850120544, | |
| "eval_meteor": 53.4663, | |
| "eval_runtime": 70.1119, | |
| "eval_samples_per_second": 14.263, | |
| "eval_steps_per_second": 1.783, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 5.052110934463876e-06, | |
| "loss": 0.8391, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "eval_gen_len": 18.03, | |
| "eval_loss": 0.8740697503089905, | |
| "eval_meteor": 53.4725, | |
| "eval_runtime": 70.2567, | |
| "eval_samples_per_second": 14.234, | |
| "eval_steps_per_second": 1.779, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 3.948065712771595e-06, | |
| "loss": 0.8354, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "eval_gen_len": 18.04, | |
| "eval_loss": 0.8721567988395691, | |
| "eval_meteor": 53.6118, | |
| "eval_runtime": 70.117, | |
| "eval_samples_per_second": 14.262, | |
| "eval_steps_per_second": 1.783, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 2.8440204910793146e-06, | |
| "loss": 0.8517, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "eval_gen_len": 18.052, | |
| "eval_loss": 0.8713221549987793, | |
| "eval_meteor": 53.5295, | |
| "eval_runtime": 69.6161, | |
| "eval_samples_per_second": 14.365, | |
| "eval_steps_per_second": 1.796, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 1.7399752693870342e-06, | |
| "loss": 0.8438, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "eval_gen_len": 18.057, | |
| "eval_loss": 0.8717844486236572, | |
| "eval_meteor": 53.4443, | |
| "eval_runtime": 69.4435, | |
| "eval_samples_per_second": 14.4, | |
| "eval_steps_per_second": 1.8, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 6.359300476947536e-07, | |
| "loss": 0.8428, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "eval_gen_len": 18.056, | |
| "eval_loss": 0.8718814849853516, | |
| "eval_meteor": 53.4202, | |
| "eval_runtime": 98.6862, | |
| "eval_samples_per_second": 10.133, | |
| "eval_steps_per_second": 1.267, | |
| "step": 45000 | |
| } | |
| ], | |
| "max_steps": 45288, | |
| "num_train_epochs": 3, | |
| "total_flos": 2.5472014850772173e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |