| { | |
| "best_metric": 3.1630301475524902, | |
| "best_model_checkpoint": "checkpoints/it5-large/checkpoint-78926", | |
| "epoch": 14.251715420729505, | |
| "eval_steps": 4154, | |
| "global_step": 78926, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.14987360057782592, | |
| "eval_g2l_cer": 67.2645, | |
| "eval_g2l_gen_len": 4.5733, | |
| "eval_g2l_rouge1": 27.1595, | |
| "eval_g2l_rouge2": 15.5941, | |
| "eval_g2l_rougeL": 26.9535, | |
| "eval_g2l_rougeLsum": 26.9576, | |
| "eval_l2ex_cer": 130.3597, | |
| "eval_l2ex_gen_len": 47.8171, | |
| "eval_l2ex_rouge1": 22.1003, | |
| "eval_l2ex_rouge2": 9.5437, | |
| "eval_l2ex_rougeL": 20.2017, | |
| "eval_l2ex_rougeLsum": 19.2847, | |
| "eval_l2g_cer": 106.9099, | |
| "eval_l2g_gen_len": 30.346, | |
| "eval_l2g_rouge1": 27.2135, | |
| "eval_l2g_rouge2": 14.1149, | |
| "eval_l2g_rougeL": 25.3922, | |
| "eval_l2g_rougeLsum": 25.3986, | |
| "eval_loss": 3.804034948348999, | |
| "eval_runtime": 310.7233, | |
| "eval_samples_per_second": 31.929, | |
| "eval_steps_per_second": 0.502, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.15005417118093176, | |
| "grad_norm": 129.91856384277344, | |
| "learning_rate": 6.004335260115606e-05, | |
| "loss": 4.6945, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.3001083423618635, | |
| "grad_norm": 190.0506591796875, | |
| "learning_rate": 0.00012008670520231212, | |
| "loss": 3.8417, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 0.45016251354279524, | |
| "grad_norm": 208.42779541015625, | |
| "learning_rate": 0.0001801300578034682, | |
| "loss": 3.712, | |
| "step": 2493 | |
| }, | |
| { | |
| "epoch": 0.600216684723727, | |
| "grad_norm": 293.423583984375, | |
| "learning_rate": 0.00024017341040462423, | |
| "loss": 3.6763, | |
| "step": 3324 | |
| }, | |
| { | |
| "epoch": 0.7500902853015529, | |
| "eval_g2l_cer": 53.5059, | |
| "eval_g2l_gen_len": 3.5087, | |
| "eval_g2l_rouge1": 37.5417, | |
| "eval_g2l_rouge2": 29.1384, | |
| "eval_g2l_rougeL": 37.463, | |
| "eval_g2l_rougeLsum": 37.4022, | |
| "eval_l2ex_cer": 102.0708, | |
| "eval_l2ex_gen_len": 25.9866, | |
| "eval_l2ex_rouge1": 26.7853, | |
| "eval_l2ex_rouge2": 12.9071, | |
| "eval_l2ex_rougeL": 24.0724, | |
| "eval_l2ex_rougeLsum": 24.0445, | |
| "eval_l2g_cer": 86.4648, | |
| "eval_l2g_gen_len": 15.0081, | |
| "eval_l2g_rouge1": 30.7776, | |
| "eval_l2g_rouge2": 18.1789, | |
| "eval_l2g_rougeL": 29.1675, | |
| "eval_l2g_rougeLsum": 29.2136, | |
| "eval_loss": 3.5662293434143066, | |
| "eval_runtime": 296.355, | |
| "eval_samples_per_second": 33.477, | |
| "eval_steps_per_second": 0.526, | |
| "step": 4154 | |
| }, | |
| { | |
| "epoch": 0.7502708559046587, | |
| "grad_norm": 500.86932373046875, | |
| "learning_rate": 0.0002999999989317841, | |
| "loss": 3.6694, | |
| "step": 4155 | |
| }, | |
| { | |
| "epoch": 0.9003250270855905, | |
| "grad_norm": 378.1305236816406, | |
| "learning_rate": 0.00029991745158829114, | |
| "loss": 3.641, | |
| "step": 4986 | |
| }, | |
| { | |
| "epoch": 1.0503791982665223, | |
| "grad_norm": 309.59503173828125, | |
| "learning_rate": 0.0002996710832786393, | |
| "loss": 3.6169, | |
| "step": 5817 | |
| }, | |
| { | |
| "epoch": 1.200433369447454, | |
| "grad_norm": 364.1076965332031, | |
| "learning_rate": 0.00029926116366930635, | |
| "loss": 3.5732, | |
| "step": 6648 | |
| }, | |
| { | |
| "epoch": 1.3504875406283858, | |
| "grad_norm": 402.1815490722656, | |
| "learning_rate": 0.00029868814144453027, | |
| "loss": 3.5547, | |
| "step": 7479 | |
| }, | |
| { | |
| "epoch": 1.500180570603106, | |
| "eval_g2l_cer": 49.6927, | |
| "eval_g2l_gen_len": 4.4371, | |
| "eval_g2l_rouge1": 42.6629, | |
| "eval_g2l_rouge2": 32.7133, | |
| "eval_g2l_rougeL": 42.5078, | |
| "eval_g2l_rougeLsum": 42.487, | |
| "eval_l2ex_cer": 85.9069, | |
| "eval_l2ex_gen_len": 27.3155, | |
| "eval_l2ex_rouge1": 31.0018, | |
| "eval_l2ex_rouge2": 14.7792, | |
| "eval_l2ex_rougeL": 27.5259, | |
| "eval_l2ex_rougeLsum": 27.5817, | |
| "eval_l2g_cer": 76.6936, | |
| "eval_l2g_gen_len": 19.6286, | |
| "eval_l2g_rouge1": 38.3213, | |
| "eval_l2g_rouge2": 24.5167, | |
| "eval_l2g_rougeL": 36.1971, | |
| "eval_l2g_rougeLsum": 36.2764, | |
| "eval_loss": 3.4418885707855225, | |
| "eval_runtime": 302.3378, | |
| "eval_samples_per_second": 32.814, | |
| "eval_steps_per_second": 0.516, | |
| "step": 8308 | |
| }, | |
| { | |
| "epoch": 1.5005417118093174, | |
| "grad_norm": 380.1324157714844, | |
| "learning_rate": 0.0002979526438151941, | |
| "loss": 3.533, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 1.6505958829902492, | |
| "grad_norm": 358.8219909667969, | |
| "learning_rate": 0.0002970554758323025, | |
| "loss": 3.5167, | |
| "step": 9141 | |
| }, | |
| { | |
| "epoch": 1.800650054171181, | |
| "grad_norm": 323.29583740234375, | |
| "learning_rate": 0.0002959976195057994, | |
| "loss": 3.5114, | |
| "step": 9972 | |
| }, | |
| { | |
| "epoch": 1.9507042253521125, | |
| "grad_norm": 283.41790771484375, | |
| "learning_rate": 0.00029478023272969345, | |
| "loss": 3.4955, | |
| "step": 10803 | |
| }, | |
| { | |
| "epoch": 2.1007583965330445, | |
| "grad_norm": 326.611572265625, | |
| "learning_rate": 0.0002934046480146657, | |
| "loss": 3.4415, | |
| "step": 11634 | |
| }, | |
| { | |
| "epoch": 2.2502708559046587, | |
| "eval_g2l_cer": 49.446, | |
| "eval_g2l_gen_len": 4.5047, | |
| "eval_g2l_rouge1": 43.91, | |
| "eval_g2l_rouge2": 33.662, | |
| "eval_g2l_rougeL": 43.778, | |
| "eval_g2l_rougeLsum": 43.7883, | |
| "eval_l2ex_cer": 86.4808, | |
| "eval_l2ex_gen_len": 30.4358, | |
| "eval_l2ex_rouge1": 30.7974, | |
| "eval_l2ex_rouge2": 14.4266, | |
| "eval_l2ex_rougeL": 27.2278, | |
| "eval_l2ex_rougeLsum": 27.3219, | |
| "eval_l2g_cer": 84.1694, | |
| "eval_l2g_gen_len": 24.5493, | |
| "eval_l2g_rouge1": 38.1858, | |
| "eval_l2g_rouge2": 25.1392, | |
| "eval_l2g_rougeL": 36.1473, | |
| "eval_l2g_rougeLsum": 36.1987, | |
| "eval_loss": 3.374830961227417, | |
| "eval_runtime": 306.8842, | |
| "eval_samples_per_second": 32.328, | |
| "eval_steps_per_second": 0.508, | |
| "step": 12462 | |
| }, | |
| { | |
| "epoch": 2.250812567713976, | |
| "grad_norm": 368.7247314453125, | |
| "learning_rate": 0.0002918723710295482, | |
| "loss": 3.424, | |
| "step": 12465 | |
| }, | |
| { | |
| "epoch": 2.400866738894908, | |
| "grad_norm": 468.8291320800781, | |
| "learning_rate": 0.00029018507895326985, | |
| "loss": 3.4228, | |
| "step": 13296 | |
| }, | |
| { | |
| "epoch": 2.5509209100758397, | |
| "grad_norm": 266.9640808105469, | |
| "learning_rate": 0.00028834461863907226, | |
| "loss": 3.4152, | |
| "step": 14127 | |
| }, | |
| { | |
| "epoch": 2.7009750812567717, | |
| "grad_norm": 237.8248748779297, | |
| "learning_rate": 0.0002863530045930063, | |
| "loss": 3.4187, | |
| "step": 14958 | |
| }, | |
| { | |
| "epoch": 2.851029252437703, | |
| "grad_norm": 371.8949890136719, | |
| "learning_rate": 0.00028421241676892145, | |
| "loss": 3.3965, | |
| "step": 15789 | |
| }, | |
| { | |
| "epoch": 3.0003611412062114, | |
| "eval_g2l_cer": 48.3881, | |
| "eval_g2l_gen_len": 4.4987, | |
| "eval_g2l_rouge1": 44.8641, | |
| "eval_g2l_rouge2": 34.4032, | |
| "eval_g2l_rougeL": 44.6885, | |
| "eval_g2l_rougeLsum": 44.711, | |
| "eval_l2ex_cer": 89.7331, | |
| "eval_l2ex_gen_len": 30.1634, | |
| "eval_l2ex_rouge1": 30.5096, | |
| "eval_l2ex_rouge2": 14.191, | |
| "eval_l2ex_rougeL": 26.9741, | |
| "eval_l2ex_rougeLsum": 27.0965, | |
| "eval_l2g_cer": 81.1389, | |
| "eval_l2g_gen_len": 23.1439, | |
| "eval_l2g_rouge1": 39.3934, | |
| "eval_l2g_rouge2": 25.9597, | |
| "eval_l2g_rougeL": 37.0903, | |
| "eval_l2g_rougeLsum": 37.1641, | |
| "eval_loss": 3.325451612472534, | |
| "eval_runtime": 304.8315, | |
| "eval_samples_per_second": 32.546, | |
| "eval_steps_per_second": 0.512, | |
| "step": 16616 | |
| }, | |
| { | |
| "epoch": 3.001083423618635, | |
| "grad_norm": 298.2984619140625, | |
| "learning_rate": 0.0002819251981823618, | |
| "loss": 3.3917, | |
| "step": 16620 | |
| }, | |
| { | |
| "epoch": 3.151137594799567, | |
| "grad_norm": 318.9201354980469, | |
| "learning_rate": 0.00027949385234597935, | |
| "loss": 3.3406, | |
| "step": 17451 | |
| }, | |
| { | |
| "epoch": 3.3011917659804983, | |
| "grad_norm": 286.3103942871094, | |
| "learning_rate": 0.0002769210405292737, | |
| "loss": 3.3328, | |
| "step": 18282 | |
| }, | |
| { | |
| "epoch": 3.4512459371614304, | |
| "grad_norm": 352.0337829589844, | |
| "learning_rate": 0.0002742095788456554, | |
| "loss": 3.3333, | |
| "step": 19113 | |
| }, | |
| { | |
| "epoch": 3.601300108342362, | |
| "grad_norm": 210.31089782714844, | |
| "learning_rate": 0.0002713624351700232, | |
| "loss": 3.3251, | |
| "step": 19944 | |
| }, | |
| { | |
| "epoch": 3.7504514265077646, | |
| "eval_g2l_cer": 47.0881, | |
| "eval_g2l_gen_len": 4.2162, | |
| "eval_g2l_rouge1": 45.6068, | |
| "eval_g2l_rouge2": 34.9617, | |
| "eval_g2l_rougeL": 45.369, | |
| "eval_g2l_rougeLsum": 45.3992, | |
| "eval_l2ex_cer": 87.2057, | |
| "eval_l2ex_gen_len": 25.9743, | |
| "eval_l2ex_rouge1": 32.044, | |
| "eval_l2ex_rouge2": 15.4907, | |
| "eval_l2ex_rougeL": 28.2386, | |
| "eval_l2ex_rougeLsum": 28.3364, | |
| "eval_l2g_cer": 81.5351, | |
| "eval_l2g_gen_len": 20.6293, | |
| "eval_l2g_rouge1": 39.7177, | |
| "eval_l2g_rouge2": 26.6455, | |
| "eval_l2g_rougeL": 37.5652, | |
| "eval_l2g_rougeLsum": 37.5978, | |
| "eval_loss": 3.267240047454834, | |
| "eval_runtime": 303.1609, | |
| "eval_samples_per_second": 32.725, | |
| "eval_steps_per_second": 0.515, | |
| "step": 20770 | |
| }, | |
| { | |
| "epoch": 3.7513542795232935, | |
| "grad_norm": 240.68614196777344, | |
| "learning_rate": 0.0002683827258902275, | |
| "loss": 3.3215, | |
| "step": 20775 | |
| }, | |
| { | |
| "epoch": 3.9014084507042255, | |
| "grad_norm": 203.9857177734375, | |
| "learning_rate": 0.0002652737124959771, | |
| "loss": 3.311, | |
| "step": 21606 | |
| }, | |
| { | |
| "epoch": 4.0514626218851575, | |
| "grad_norm": 182.24288940429688, | |
| "learning_rate": 0.00026203879800892194, | |
| "loss": 3.2913, | |
| "step": 22437 | |
| }, | |
| { | |
| "epoch": 4.201516793066089, | |
| "grad_norm": 300.8165283203125, | |
| "learning_rate": 0.00025868152325781986, | |
| "loss": 3.2554, | |
| "step": 23268 | |
| }, | |
| { | |
| "epoch": 4.351570964247021, | |
| "grad_norm": 230.65304565429688, | |
| "learning_rate": 0.00025520556300286454, | |
| "loss": 3.2636, | |
| "step": 24099 | |
| }, | |
| { | |
| "epoch": 4.500541711809317, | |
| "eval_g2l_cer": 46.1491, | |
| "eval_g2l_gen_len": 3.8852, | |
| "eval_g2l_rouge1": 45.7526, | |
| "eval_g2l_rouge2": 35.7656, | |
| "eval_g2l_rougeL": 45.6115, | |
| "eval_g2l_rougeLsum": 45.6146, | |
| "eval_l2ex_cer": 82.9457, | |
| "eval_l2ex_gen_len": 17.2662, | |
| "eval_l2ex_rouge1": 31.8116, | |
| "eval_l2ex_rouge2": 16.1098, | |
| "eval_l2ex_rougeL": 28.581, | |
| "eval_l2ex_rougeLsum": 28.6511, | |
| "eval_l2g_cer": 69.3136, | |
| "eval_l2g_gen_len": 12.4397, | |
| "eval_l2g_rouge1": 39.1199, | |
| "eval_l2g_rouge2": 26.5659, | |
| "eval_l2g_rougeL": 37.2837, | |
| "eval_l2g_rougeLsum": 37.3241, | |
| "eval_loss": 3.275228261947632, | |
| "eval_runtime": 264.7397, | |
| "eval_samples_per_second": 37.475, | |
| "eval_steps_per_second": 0.589, | |
| "step": 24924 | |
| }, | |
| { | |
| "epoch": 4.501625135427952, | |
| "grad_norm": 278.4391174316406, | |
| "learning_rate": 0.00025161472191341646, | |
| "loss": 3.2605, | |
| "step": 24930 | |
| }, | |
| { | |
| "epoch": 4.651679306608884, | |
| "grad_norm": 185.57086181640625, | |
| "learning_rate": 0.00024791293040353913, | |
| "loss": 3.2372, | |
| "step": 25761 | |
| }, | |
| { | |
| "epoch": 4.801733477789816, | |
| "grad_norm": 199.41229248046875, | |
| "learning_rate": 0.0002441042403299005, | |
| "loss": 3.2549, | |
| "step": 26592 | |
| }, | |
| { | |
| "epoch": 4.951787648970748, | |
| "grad_norm": 111.84984588623047, | |
| "learning_rate": 0.000240192820556746, | |
| "loss": 3.2505, | |
| "step": 27423 | |
| }, | |
| { | |
| "epoch": 5.101841820151679, | |
| "grad_norm": 231.06040954589844, | |
| "learning_rate": 0.0002361829523928005, | |
| "loss": 3.2162, | |
| "step": 28254 | |
| }, | |
| { | |
| "epoch": 5.250631997110871, | |
| "eval_g2l_cer": 45.3929, | |
| "eval_g2l_gen_len": 4.2856, | |
| "eval_g2l_rouge1": 47.7028, | |
| "eval_g2l_rouge2": 36.7159, | |
| "eval_g2l_rougeL": 47.5076, | |
| "eval_g2l_rougeLsum": 47.5342, | |
| "eval_l2ex_cer": 84.2916, | |
| "eval_l2ex_gen_len": 27.5293, | |
| "eval_l2ex_rouge1": 32.3354, | |
| "eval_l2ex_rouge2": 15.6055, | |
| "eval_l2ex_rougeL": 28.4133, | |
| "eval_l2ex_rougeLsum": 28.5758, | |
| "eval_l2g_cer": 74.8295, | |
| "eval_l2g_gen_len": 19.749, | |
| "eval_l2g_rouge1": 40.6449, | |
| "eval_l2g_rouge2": 27.1184, | |
| "eval_l2g_rougeL": 38.3335, | |
| "eval_l2g_rougeLsum": 38.3945, | |
| "eval_loss": 3.2382800579071045, | |
| "eval_runtime": 300.872, | |
| "eval_samples_per_second": 32.974, | |
| "eval_steps_per_second": 0.518, | |
| "step": 29078 | |
| }, | |
| { | |
| "epoch": 5.251895991332611, | |
| "grad_norm": 298.8398132324219, | |
| "learning_rate": 0.00023207902490509098, | |
| "loss": 3.187, | |
| "step": 29085 | |
| }, | |
| { | |
| "epoch": 5.401950162513542, | |
| "grad_norm": 126.86690521240234, | |
| "learning_rate": 0.0002278855301148215, | |
| "loss": 3.2012, | |
| "step": 29916 | |
| }, | |
| { | |
| "epoch": 5.552004333694475, | |
| "grad_norm": 221.00885009765625, | |
| "learning_rate": 0.0002236070580805574, | |
| "loss": 3.1999, | |
| "step": 30747 | |
| }, | |
| { | |
| "epoch": 5.702058504875406, | |
| "grad_norm": 193.86273193359375, | |
| "learning_rate": 0.00021924829187410153, | |
| "loss": 3.1942, | |
| "step": 31578 | |
| }, | |
| { | |
| "epoch": 5.852112676056338, | |
| "grad_norm": 126.05673217773438, | |
| "learning_rate": 0.00021481400245456104, | |
| "loss": 3.1947, | |
| "step": 32409 | |
| }, | |
| { | |
| "epoch": 6.000722282412423, | |
| "eval_g2l_cer": 45.9412, | |
| "eval_g2l_gen_len": 4.4229, | |
| "eval_g2l_rouge1": 47.5003, | |
| "eval_g2l_rouge2": 36.6595, | |
| "eval_g2l_rougeL": 47.3175, | |
| "eval_g2l_rougeLsum": 47.3017, | |
| "eval_l2ex_cer": 82.4504, | |
| "eval_l2ex_gen_len": 23.3741, | |
| "eval_l2ex_rouge1": 32.8857, | |
| "eval_l2ex_rouge2": 15.6166, | |
| "eval_l2ex_rougeL": 28.7672, | |
| "eval_l2ex_rougeLsum": 28.8746, | |
| "eval_l2g_cer": 73.7451, | |
| "eval_l2g_gen_len": 18.5067, | |
| "eval_l2g_rouge1": 40.8866, | |
| "eval_l2g_rouge2": 27.3687, | |
| "eval_l2g_rougeL": 38.5521, | |
| "eval_l2g_rougeLsum": 38.621, | |
| "eval_loss": 3.2279489040374756, | |
| "eval_runtime": 300.5167, | |
| "eval_samples_per_second": 33.013, | |
| "eval_steps_per_second": 0.519, | |
| "step": 33232 | |
| }, | |
| { | |
| "epoch": 6.00216684723727, | |
| "grad_norm": 254.65907287597656, | |
| "learning_rate": 0.00021030904344621589, | |
| "loss": 3.1923, | |
| "step": 33240 | |
| }, | |
| { | |
| "epoch": 6.152221018418201, | |
| "grad_norm": 228.19200134277344, | |
| "learning_rate": 0.0002057383458259045, | |
| "loss": 3.1351, | |
| "step": 34071 | |
| }, | |
| { | |
| "epoch": 6.302275189599134, | |
| "grad_norm": 305.9356689453125, | |
| "learning_rate": 0.00020110691252574222, | |
| "loss": 3.1421, | |
| "step": 34902 | |
| }, | |
| { | |
| "epoch": 6.452329360780065, | |
| "grad_norm": 190.717041015625, | |
| "learning_rate": 0.00019641981295707994, | |
| "loss": 3.1515, | |
| "step": 35733 | |
| }, | |
| { | |
| "epoch": 6.602383531960997, | |
| "grad_norm": 241.10513305664062, | |
| "learning_rate": 0.00019168217746169658, | |
| "loss": 3.1506, | |
| "step": 36564 | |
| }, | |
| { | |
| "epoch": 6.750812567713976, | |
| "eval_g2l_cer": 45.0204, | |
| "eval_g2l_gen_len": 4.0829, | |
| "eval_g2l_rouge1": 47.6328, | |
| "eval_g2l_rouge2": 37.0338, | |
| "eval_g2l_rougeL": 47.5319, | |
| "eval_g2l_rougeLsum": 47.5196, | |
| "eval_l2ex_cer": 84.416, | |
| "eval_l2ex_gen_len": 23.2662, | |
| "eval_l2ex_rouge1": 33.1718, | |
| "eval_l2ex_rouge2": 16.167, | |
| "eval_l2ex_rougeL": 29.1263, | |
| "eval_l2ex_rougeLsum": 29.2504, | |
| "eval_l2g_cer": 75.3622, | |
| "eval_l2g_gen_len": 17.9792, | |
| "eval_l2g_rouge1": 40.4989, | |
| "eval_l2g_rouge2": 27.2808, | |
| "eval_l2g_rougeL": 38.3025, | |
| "eval_l2g_rougeLsum": 38.3215, | |
| "eval_loss": 3.1947431564331055, | |
| "eval_runtime": 297.6683, | |
| "eval_samples_per_second": 33.329, | |
| "eval_steps_per_second": 0.524, | |
| "step": 37386 | |
| }, | |
| { | |
| "epoch": 6.752437703141928, | |
| "grad_norm": 194.801513671875, | |
| "learning_rate": 0.0001868991916962991, | |
| "loss": 3.1481, | |
| "step": 37395 | |
| }, | |
| { | |
| "epoch": 6.902491874322861, | |
| "grad_norm": 133.14971923828125, | |
| "learning_rate": 0.00018207609095647728, | |
| "loss": 3.1368, | |
| "step": 38226 | |
| }, | |
| { | |
| "epoch": 7.052546045503792, | |
| "grad_norm": 179.32647705078125, | |
| "learning_rate": 0.00017721815444632445, | |
| "loss": 3.1199, | |
| "step": 39057 | |
| }, | |
| { | |
| "epoch": 7.202600216684724, | |
| "grad_norm": 218.9005889892578, | |
| "learning_rate": 0.00017233069949999837, | |
| "loss": 3.094, | |
| "step": 39888 | |
| }, | |
| { | |
| "epoch": 7.352654387865655, | |
| "grad_norm": 215.17083740234375, | |
| "learning_rate": 0.00016741907576154572, | |
| "loss": 3.0896, | |
| "step": 40719 | |
| }, | |
| { | |
| "epoch": 7.500902853015529, | |
| "eval_g2l_cer": 44.5361, | |
| "eval_g2l_gen_len": 4.0809, | |
| "eval_g2l_rouge1": 48.0404, | |
| "eval_g2l_rouge2": 37.3411, | |
| "eval_g2l_rougeL": 47.8907, | |
| "eval_g2l_rougeLsum": 47.867, | |
| "eval_l2ex_cer": 83.3722, | |
| "eval_l2ex_gen_len": 21.9188, | |
| "eval_l2ex_rouge1": 33.2159, | |
| "eval_l2ex_rouge2": 16.5159, | |
| "eval_l2ex_rougeL": 29.1348, | |
| "eval_l2ex_rougeLsum": 29.2304, | |
| "eval_l2g_cer": 72.9959, | |
| "eval_l2g_gen_len": 15.519, | |
| "eval_l2g_rouge1": 40.681, | |
| "eval_l2g_rouge2": 27.6769, | |
| "eval_l2g_rougeL": 38.6264, | |
| "eval_l2g_rougeLsum": 38.6627, | |
| "eval_loss": 3.1981189250946045, | |
| "eval_runtime": 291.8194, | |
| "eval_samples_per_second": 33.997, | |
| "eval_steps_per_second": 0.535, | |
| "step": 41540 | |
| }, | |
| { | |
| "epoch": 7.502708559046587, | |
| "grad_norm": 153.0230712890625, | |
| "learning_rate": 0.00016248865932936134, | |
| "loss": 3.0927, | |
| "step": 41550 | |
| }, | |
| { | |
| "epoch": 7.6527627302275185, | |
| "grad_norm": 212.92391967773438, | |
| "learning_rate": 0.0001575448468716914, | |
| "loss": 3.0974, | |
| "step": 42381 | |
| }, | |
| { | |
| "epoch": 7.802816901408451, | |
| "grad_norm": 186.11282348632812, | |
| "learning_rate": 0.00015259304971962191, | |
| "loss": 3.09, | |
| "step": 43212 | |
| }, | |
| { | |
| "epoch": 7.9528710725893825, | |
| "grad_norm": 107.77149200439453, | |
| "learning_rate": 0.00014763868794401698, | |
| "loss": 3.0957, | |
| "step": 44043 | |
| }, | |
| { | |
| "epoch": 8.102925243770315, | |
| "grad_norm": 111.45164489746094, | |
| "learning_rate": 0.00014268718442289166, | |
| "loss": 3.0703, | |
| "step": 44874 | |
| }, | |
| { | |
| "epoch": 8.250993138317082, | |
| "eval_g2l_cer": 44.1066, | |
| "eval_g2l_gen_len": 3.9695, | |
| "eval_g2l_rouge1": 48.1237, | |
| "eval_g2l_rouge2": 37.5462, | |
| "eval_g2l_rougeL": 48.0143, | |
| "eval_g2l_rougeLsum": 48.0057, | |
| "eval_l2ex_cer": 83.1439, | |
| "eval_l2ex_gen_len": 22.295, | |
| "eval_l2ex_rouge1": 33.8654, | |
| "eval_l2ex_rouge2": 16.5697, | |
| "eval_l2ex_rougeL": 29.7053, | |
| "eval_l2ex_rougeLsum": 29.8195, | |
| "eval_l2g_cer": 71.4647, | |
| "eval_l2g_gen_len": 15.6419, | |
| "eval_l2g_rouge1": 41.0845, | |
| "eval_l2g_rouge2": 27.5338, | |
| "eval_l2g_rougeL": 38.8182, | |
| "eval_l2g_rougeLsum": 38.8839, | |
| "eval_loss": 3.19246506690979, | |
| "eval_runtime": 290.3246, | |
| "eval_samples_per_second": 34.172, | |
| "eval_steps_per_second": 0.537, | |
| "step": 45694 | |
| }, | |
| { | |
| "epoch": 8.252979414951247, | |
| "grad_norm": 117.11378479003906, | |
| "learning_rate": 0.0001377439589057116, | |
| "loss": 3.0554, | |
| "step": 45705 | |
| }, | |
| { | |
| "epoch": 8.403033586132178, | |
| "grad_norm": 98.44864654541016, | |
| "learning_rate": 0.00013281442208111732, | |
| "loss": 3.0581, | |
| "step": 46536 | |
| }, | |
| { | |
| "epoch": 8.55308775731311, | |
| "grad_norm": 110.35213470458984, | |
| "learning_rate": 0.00012790396965456613, | |
| "loss": 3.0478, | |
| "step": 47367 | |
| }, | |
| { | |
| "epoch": 8.703141928494041, | |
| "grad_norm": 56.789737701416016, | |
| "learning_rate": 0.00012301797644237423, | |
| "loss": 3.0599, | |
| "step": 48198 | |
| }, | |
| { | |
| "epoch": 8.853196099674973, | |
| "grad_norm": 111.45304107666016, | |
| "learning_rate": 0.00011816179048862318, | |
| "loss": 3.0381, | |
| "step": 49029 | |
| }, | |
| { | |
| "epoch": 9.001083423618635, | |
| "eval_g2l_cer": 44.1774, | |
| "eval_g2l_gen_len": 4.1188, | |
| "eval_g2l_rouge1": 48.6114, | |
| "eval_g2l_rouge2": 37.8262, | |
| "eval_g2l_rougeL": 48.5072, | |
| "eval_g2l_rougeLsum": 48.4844, | |
| "eval_l2ex_cer": 83.6477, | |
| "eval_l2ex_gen_len": 22.4625, | |
| "eval_l2ex_rouge1": 33.2375, | |
| "eval_l2ex_rouge2": 16.4943, | |
| "eval_l2ex_rougeL": 29.1757, | |
| "eval_l2ex_rougeLsum": 29.2794, | |
| "eval_l2g_cer": 72.9254, | |
| "eval_l2g_gen_len": 17.2116, | |
| "eval_l2g_rouge1": 41.0375, | |
| "eval_l2g_rouge2": 27.5603, | |
| "eval_l2g_rougeL": 38.745, | |
| "eval_l2g_rougeLsum": 38.787, | |
| "eval_loss": 3.180062770843506, | |
| "eval_runtime": 297.1459, | |
| "eval_samples_per_second": 33.388, | |
| "eval_steps_per_second": 0.525, | |
| "step": 49848 | |
| }, | |
| { | |
| "epoch": 9.003250270855904, | |
| "grad_norm": 78.80842590332031, | |
| "learning_rate": 0.00011334072721137046, | |
| "loss": 3.0595, | |
| "step": 49860 | |
| }, | |
| { | |
| "epoch": 9.153304442036836, | |
| "grad_norm": 138.7894287109375, | |
| "learning_rate": 0.00010856006358457137, | |
| "loss": 3.0096, | |
| "step": 50691 | |
| }, | |
| { | |
| "epoch": 9.303358613217767, | |
| "grad_norm": 132.17127990722656, | |
| "learning_rate": 0.00010382503236208064, | |
| "loss": 3.0273, | |
| "step": 51522 | |
| }, | |
| { | |
| "epoch": 9.453412784398699, | |
| "grad_norm": 130.38265991210938, | |
| "learning_rate": 9.914081635005574e-05, | |
| "loss": 3.0237, | |
| "step": 52353 | |
| }, | |
| { | |
| "epoch": 9.603466955579632, | |
| "grad_norm": 100.80162811279297, | |
| "learning_rate": 9.451254273403124e-05, | |
| "loss": 3.0167, | |
| "step": 53184 | |
| }, | |
| { | |
| "epoch": 9.751173708920188, | |
| "eval_g2l_cer": 44.0723, | |
| "eval_g2l_gen_len": 4.1516, | |
| "eval_g2l_rouge1": 48.7144, | |
| "eval_g2l_rouge2": 37.9052, | |
| "eval_g2l_rougeL": 48.5889, | |
| "eval_g2l_rougeLsum": 48.5704, | |
| "eval_l2ex_cer": 82.0577, | |
| "eval_l2ex_gen_len": 22.3731, | |
| "eval_l2ex_rouge1": 33.8214, | |
| "eval_l2ex_rouge2": 17.2047, | |
| "eval_l2ex_rougeL": 29.9782, | |
| "eval_l2ex_rougeLsum": 30.0546, | |
| "eval_l2g_cer": 72.335, | |
| "eval_l2g_gen_len": 17.0699, | |
| "eval_l2g_rouge1": 41.6605, | |
| "eval_l2g_rouge2": 28.2593, | |
| "eval_l2g_rougeL": 39.3968, | |
| "eval_l2g_rougeLsum": 39.4309, | |
| "eval_loss": 3.1734836101531982, | |
| "eval_runtime": 297.1857, | |
| "eval_samples_per_second": 33.383, | |
| "eval_steps_per_second": 0.525, | |
| "step": 54002 | |
| }, | |
| { | |
| "epoch": 9.753521126760564, | |
| "grad_norm": 98.55856323242188, | |
| "learning_rate": 8.994527746687389e-05, | |
| "loss": 3.0202, | |
| "step": 54015 | |
| }, | |
| { | |
| "epoch": 9.903575297941495, | |
| "grad_norm": 110.90308380126953, | |
| "learning_rate": 8.544401972376058e-05, | |
| "loss": 3.0123, | |
| "step": 54846 | |
| }, | |
| { | |
| "epoch": 10.053629469122427, | |
| "grad_norm": 103.5262451171875, | |
| "learning_rate": 8.10136964302491e-05, | |
| "loss": 3.0112, | |
| "step": 55677 | |
| }, | |
| { | |
| "epoch": 10.203683640303359, | |
| "grad_norm": 73.44245147705078, | |
| "learning_rate": 7.665915686943095e-05, | |
| "loss": 2.9824, | |
| "step": 56508 | |
| }, | |
| { | |
| "epoch": 10.35373781148429, | |
| "grad_norm": 77.93965148925781, | |
| "learning_rate": 7.238516737406908e-05, | |
| "loss": 2.9999, | |
| "step": 57339 | |
| }, | |
| { | |
| "epoch": 10.501263994221741, | |
| "eval_g2l_cer": 44.1363, | |
| "eval_g2l_gen_len": 4.1471, | |
| "eval_g2l_rouge1": 48.6933, | |
| "eval_g2l_rouge2": 38.0423, | |
| "eval_g2l_rougeL": 48.565, | |
| "eval_g2l_rougeLsum": 48.5648, | |
| "eval_l2ex_cer": 81.2579, | |
| "eval_l2ex_gen_len": 21.4666, | |
| "eval_l2ex_rouge1": 33.958, | |
| "eval_l2ex_rouge2": 16.8411, | |
| "eval_l2ex_rougeL": 29.5656, | |
| "eval_l2ex_rougeLsum": 29.6795, | |
| "eval_l2g_cer": 71.0675, | |
| "eval_l2g_gen_len": 16.2517, | |
| "eval_l2g_rouge1": 41.5203, | |
| "eval_l2g_rouge2": 28.0296, | |
| "eval_l2g_rougeL": 39.1863, | |
| "eval_l2g_rougeLsum": 39.2508, | |
| "eval_loss": 3.1717426776885986, | |
| "eval_runtime": 290.2937, | |
| "eval_samples_per_second": 34.176, | |
| "eval_steps_per_second": 0.537, | |
| "step": 58156 | |
| }, | |
| { | |
| "epoch": 10.503791982665222, | |
| "grad_norm": 129.3556365966797, | |
| "learning_rate": 6.81964061095297e-05, | |
| "loss": 2.9888, | |
| "step": 58170 | |
| }, | |
| { | |
| "epoch": 10.653846153846153, | |
| "grad_norm": 112.28192138671875, | |
| "learning_rate": 6.409745795321991e-05, | |
| "loss": 2.9878, | |
| "step": 59001 | |
| }, | |
| { | |
| "epoch": 10.803900325027085, | |
| "grad_norm": 76.26856231689453, | |
| "learning_rate": 6.009280947613472e-05, | |
| "loss": 2.9817, | |
| "step": 59832 | |
| }, | |
| { | |
| "epoch": 10.953954496208016, | |
| "grad_norm": 58.20437240600586, | |
| "learning_rate": 5.618684403200737e-05, | |
| "loss": 2.9851, | |
| "step": 60663 | |
| }, | |
| { | |
| "epoch": 11.10400866738895, | |
| "grad_norm": 108.53790283203125, | |
| "learning_rate": 5.238383695943713e-05, | |
| "loss": 2.9823, | |
| "step": 61494 | |
| }, | |
| { | |
| "epoch": 11.251354279523294, | |
| "eval_g2l_cer": 44.0289, | |
| "eval_g2l_gen_len": 4.1275, | |
| "eval_g2l_rouge1": 48.9057, | |
| "eval_g2l_rouge2": 38.3159, | |
| "eval_g2l_rougeL": 48.7647, | |
| "eval_g2l_rougeLsum": 48.766, | |
| "eval_l2ex_cer": 82.4492, | |
| "eval_l2ex_gen_len": 22.9445, | |
| "eval_l2ex_rouge1": 33.8799, | |
| "eval_l2ex_rouge2": 16.7295, | |
| "eval_l2ex_rougeL": 29.4575, | |
| "eval_l2ex_rougeLsum": 29.6104, | |
| "eval_l2g_cer": 71.7288, | |
| "eval_l2g_gen_len": 16.7564, | |
| "eval_l2g_rouge1": 41.5535, | |
| "eval_l2g_rouge2": 28.1997, | |
| "eval_l2g_rougeL": 39.2564, | |
| "eval_l2g_rougeLsum": 39.323, | |
| "eval_loss": 3.1673169136047363, | |
| "eval_runtime": 295.6043, | |
| "eval_samples_per_second": 33.562, | |
| "eval_steps_per_second": 0.528, | |
| "step": 62310 | |
| }, | |
| { | |
| "epoch": 11.254062838569881, | |
| "grad_norm": 78.87760162353516, | |
| "learning_rate": 4.868795090224752e-05, | |
| "loss": 2.9644, | |
| "step": 62325 | |
| }, | |
| { | |
| "epoch": 11.404117009750813, | |
| "grad_norm": 63.00550079345703, | |
| "learning_rate": 4.510323125319609e-05, | |
| "loss": 2.9714, | |
| "step": 63156 | |
| }, | |
| { | |
| "epoch": 11.554171180931744, | |
| "grad_norm": 107.51451110839844, | |
| "learning_rate": 4.1633601726023533e-05, | |
| "loss": 2.972, | |
| "step": 63987 | |
| }, | |
| { | |
| "epoch": 11.704225352112676, | |
| "grad_norm": 66.15150451660156, | |
| "learning_rate": 3.82828600606881e-05, | |
| "loss": 2.9604, | |
| "step": 64818 | |
| }, | |
| { | |
| "epoch": 11.854279523293608, | |
| "grad_norm": 65.40077209472656, | |
| "learning_rate": 3.505467386648718e-05, | |
| "loss": 2.9667, | |
| "step": 65649 | |
| }, | |
| { | |
| "epoch": 12.001444564824846, | |
| "eval_g2l_cer": 43.9512, | |
| "eval_g2l_gen_len": 4.0624, | |
| "eval_g2l_rouge1": 48.889, | |
| "eval_g2l_rouge2": 38.1288, | |
| "eval_g2l_rougeL": 48.7444, | |
| "eval_g2l_rougeLsum": 48.751, | |
| "eval_l2ex_cer": 83.3432, | |
| "eval_l2ex_gen_len": 22.5889, | |
| "eval_l2ex_rouge1": 33.672, | |
| "eval_l2ex_rouge2": 16.682, | |
| "eval_l2ex_rougeL": 29.3383, | |
| "eval_l2ex_rougeLsum": 29.4381, | |
| "eval_l2g_cer": 72.969, | |
| "eval_l2g_gen_len": 17.2907, | |
| "eval_l2g_rouge1": 41.384, | |
| "eval_l2g_rouge2": 28.0121, | |
| "eval_l2g_rougeL": 39.0564, | |
| "eval_l2g_rougeLsum": 39.1247, | |
| "eval_loss": 3.163238286972046, | |
| "eval_runtime": 296.9381, | |
| "eval_samples_per_second": 33.411, | |
| "eval_steps_per_second": 0.525, | |
| "step": 66464 | |
| }, | |
| { | |
| "epoch": 12.00433369447454, | |
| "grad_norm": 80.00790405273438, | |
| "learning_rate": 3.195257660761534e-05, | |
| "loss": 2.9548, | |
| "step": 66480 | |
| }, | |
| { | |
| "epoch": 12.15438786565547, | |
| "grad_norm": 50.12080001831055, | |
| "learning_rate": 2.897996373555297e-05, | |
| "loss": 2.9599, | |
| "step": 67311 | |
| }, | |
| { | |
| "epoch": 12.304442036836402, | |
| "grad_norm": 137.98057556152344, | |
| "learning_rate": 2.6140088972519277e-05, | |
| "loss": 2.9426, | |
| "step": 68142 | |
| }, | |
| { | |
| "epoch": 12.454496208017336, | |
| "grad_norm": 74.86833190917969, | |
| "learning_rate": 2.343606075005708e-05, | |
| "loss": 2.9445, | |
| "step": 68973 | |
| }, | |
| { | |
| "epoch": 12.604550379198267, | |
| "grad_norm": 85.49880981445312, | |
| "learning_rate": 2.0870838806648037e-05, | |
| "loss": 2.9445, | |
| "step": 69804 | |
| }, | |
| { | |
| "epoch": 12.751534850126399, | |
| "eval_g2l_cer": 44.0472, | |
| "eval_g2l_gen_len": 4.109, | |
| "eval_g2l_rouge1": 48.9965, | |
| "eval_g2l_rouge2": 38.1664, | |
| "eval_g2l_rougeL": 48.8442, | |
| "eval_g2l_rougeLsum": 48.8419, | |
| "eval_l2ex_cer": 81.2857, | |
| "eval_l2ex_gen_len": 21.2364, | |
| "eval_l2ex_rouge1": 34.1658, | |
| "eval_l2ex_rouge2": 17.3387, | |
| "eval_l2ex_rougeL": 29.9082, | |
| "eval_l2ex_rougeLsum": 30.0362, | |
| "eval_l2g_cer": 70.6762, | |
| "eval_l2g_gen_len": 15.9381, | |
| "eval_l2g_rouge1": 41.6215, | |
| "eval_l2g_rouge2": 28.1386, | |
| "eval_l2g_rougeL": 39.3091, | |
| "eval_l2g_rougeLsum": 39.3715, | |
| "eval_loss": 3.163139581680298, | |
| "eval_runtime": 291.4682, | |
| "eval_samples_per_second": 34.038, | |
| "eval_steps_per_second": 0.535, | |
| "step": 70618 | |
| }, | |
| { | |
| "epoch": 12.754604550379199, | |
| "grad_norm": 60.18415451049805, | |
| "learning_rate": 1.844723094808244e-05, | |
| "loss": 2.9515, | |
| "step": 70635 | |
| }, | |
| { | |
| "epoch": 12.90465872156013, | |
| "grad_norm": 39.721649169921875, | |
| "learning_rate": 1.6167889974129134e-05, | |
| "loss": 2.9545, | |
| "step": 71466 | |
| }, | |
| { | |
| "epoch": 13.054712892741062, | |
| "grad_norm": 49.22962188720703, | |
| "learning_rate": 1.4035310774870041e-05, | |
| "loss": 2.9433, | |
| "step": 72297 | |
| }, | |
| { | |
| "epoch": 13.204767063921993, | |
| "grad_norm": 55.78800964355469, | |
| "learning_rate": 1.205182759987737e-05, | |
| "loss": 2.9241, | |
| "step": 73128 | |
| }, | |
| { | |
| "epoch": 13.354821235102925, | |
| "grad_norm": 69.98873138427734, | |
| "learning_rate": 1.0219611503222213e-05, | |
| "loss": 2.939, | |
| "step": 73959 | |
| }, | |
| { | |
| "epoch": 13.501625135427952, | |
| "eval_g2l_cer": 43.9078, | |
| "eval_g2l_gen_len": 4.0981, | |
| "eval_g2l_rouge1": 48.969, | |
| "eval_g2l_rouge2": 38.1559, | |
| "eval_g2l_rougeL": 48.8152, | |
| "eval_g2l_rougeLsum": 48.8193, | |
| "eval_l2ex_cer": 81.5515, | |
| "eval_l2ex_gen_len": 21.7205, | |
| "eval_l2ex_rouge1": 33.9427, | |
| "eval_l2ex_rouge2": 17.0266, | |
| "eval_l2ex_rougeL": 29.5977, | |
| "eval_l2ex_rougeLsum": 29.7301, | |
| "eval_l2g_cer": 70.7346, | |
| "eval_l2g_gen_len": 16.1531, | |
| "eval_l2g_rouge1": 41.7374, | |
| "eval_l2g_rouge2": 28.1793, | |
| "eval_l2g_rougeL": 39.3779, | |
| "eval_l2g_rougeLsum": 39.4426, | |
| "eval_loss": 3.1640655994415283, | |
| "eval_runtime": 292.5347, | |
| "eval_samples_per_second": 33.914, | |
| "eval_steps_per_second": 0.533, | |
| "step": 74772 | |
| }, | |
| { | |
| "epoch": 13.504875406283857, | |
| "grad_norm": 69.49555206298828, | |
| "learning_rate": 8.54066796711184e-06, | |
| "loss": 2.9465, | |
| "step": 74790 | |
| }, | |
| { | |
| "epoch": 13.654929577464788, | |
| "grad_norm": 73.59809112548828, | |
| "learning_rate": 7.016834706756168e-06, | |
| "loss": 2.9391, | |
| "step": 75621 | |
| }, | |
| { | |
| "epoch": 13.804983748645721, | |
| "grad_norm": 64.26115417480469, | |
| "learning_rate": 5.649779658866368e-06, | |
| "loss": 2.9356, | |
| "step": 76452 | |
| }, | |
| { | |
| "epoch": 13.955037919826653, | |
| "grad_norm": 37.47693634033203, | |
| "learning_rate": 4.440999155987467e-06, | |
| "loss": 2.9523, | |
| "step": 77283 | |
| }, | |
| { | |
| "epoch": 14.105092091007585, | |
| "grad_norm": 97.73049926757812, | |
| "learning_rate": 3.391816288662864e-06, | |
| "loss": 2.9394, | |
| "step": 78114 | |
| }, | |
| { | |
| "epoch": 14.251715420729505, | |
| "eval_g2l_cer": 44.1271, | |
| "eval_g2l_gen_len": 4.1016, | |
| "eval_g2l_rouge1": 48.8563, | |
| "eval_g2l_rouge2": 38.0804, | |
| "eval_g2l_rougeL": 48.7034, | |
| "eval_g2l_rougeLsum": 48.679, | |
| "eval_l2ex_cer": 81.746, | |
| "eval_l2ex_gen_len": 21.9486, | |
| "eval_l2ex_rouge1": 34.1479, | |
| "eval_l2ex_rouge2": 17.1381, | |
| "eval_l2ex_rougeL": 29.7996, | |
| "eval_l2ex_rougeLsum": 29.9184, | |
| "eval_l2g_cer": 71.184, | |
| "eval_l2g_gen_len": 16.3747, | |
| "eval_l2g_rouge1": 41.7919, | |
| "eval_l2g_rouge2": 28.2088, | |
| "eval_l2g_rougeL": 39.4284, | |
| "eval_l2g_rougeLsum": 39.4987, | |
| "eval_loss": 3.1630301475524902, | |
| "eval_runtime": 294.6002, | |
| "eval_samples_per_second": 33.676, | |
| "eval_steps_per_second": 0.53, | |
| "step": 78926 | |
| } | |
| ], | |
| "logging_steps": 831, | |
| "max_steps": 83070, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 15, | |
| "save_steps": 4154, | |
| "total_flos": 1.4551992475225948e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |