| { | |
| "best_metric": 3.5637874603271484, | |
| "best_model_checkpoint": "checkpoints/mt5-base/checkpoint-37386", | |
| "epoch": 13.501625135427952, | |
| "eval_steps": 2077, | |
| "global_step": 37386, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.14987360057782592, | |
| "eval_g2l_cer": 286.0084, | |
| "eval_g2l_gen_len": 8.1826, | |
| "eval_g2l_rouge1": 5.5622, | |
| "eval_g2l_rouge2": 1.1913, | |
| "eval_g2l_rougeL": 5.4996, | |
| "eval_g2l_rougeLsum": 5.5114, | |
| "eval_l2ex_cer": 86.6247, | |
| "eval_l2ex_gen_len": 7.9383, | |
| "eval_l2ex_rouge1": 16.8571, | |
| "eval_l2ex_rouge2": 5.0595, | |
| "eval_l2ex_rougeL": 15.4605, | |
| "eval_l2ex_rougeLsum": 15.4778, | |
| "eval_l2g_cer": 87.9265, | |
| "eval_l2g_gen_len": 5.5834, | |
| "eval_l2g_rouge1": 12.3664, | |
| "eval_l2g_rouge2": 1.6791, | |
| "eval_l2g_rougeL": 11.57, | |
| "eval_l2g_rougeLsum": 11.5871, | |
| "eval_loss": 8.359071731567383, | |
| "eval_runtime": 145.7254, | |
| "eval_samples_per_second": 68.08, | |
| "eval_steps_per_second": 2.134, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.15023474178403756, | |
| "grad_norm": 1659.9119873046875, | |
| "learning_rate": 2.0038535645472063e-05, | |
| "loss": 15.088, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.3004694835680751, | |
| "grad_norm": 8.369742393493652, | |
| "learning_rate": 4.0077071290944125e-05, | |
| "loss": 7.1403, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.4507042253521127, | |
| "grad_norm": 1.8832136392593384, | |
| "learning_rate": 6.0115606936416195e-05, | |
| "loss": 5.3841, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 0.6009389671361502, | |
| "grad_norm": 23.656179428100586, | |
| "learning_rate": 8.015414258188825e-05, | |
| "loss": 4.9928, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 0.7500902853015529, | |
| "eval_g2l_cer": 59.4485, | |
| "eval_g2l_gen_len": 2.9556, | |
| "eval_g2l_rouge1": 27.1975, | |
| "eval_g2l_rouge2": 17.2281, | |
| "eval_g2l_rougeL": 27.1916, | |
| "eval_g2l_rougeLsum": 27.2117, | |
| "eval_l2ex_cer": 95.5123, | |
| "eval_l2ex_gen_len": 23.6742, | |
| "eval_l2ex_rouge1": 25.7497, | |
| "eval_l2ex_rouge2": 11.1192, | |
| "eval_l2ex_rougeL": 22.9461, | |
| "eval_l2ex_rougeLsum": 22.9441, | |
| "eval_l2g_cer": 83.2502, | |
| "eval_l2g_gen_len": 15.9711, | |
| "eval_l2g_rouge1": 27.2934, | |
| "eval_l2g_rouge2": 14.8195, | |
| "eval_l2g_rougeL": 25.9582, | |
| "eval_l2g_rougeLsum": 25.9617, | |
| "eval_loss": 4.2178425788879395, | |
| "eval_runtime": 203.8137, | |
| "eval_samples_per_second": 48.677, | |
| "eval_steps_per_second": 1.526, | |
| "step": 2077 | |
| }, | |
| { | |
| "epoch": 0.7511737089201878, | |
| "grad_norm": 1.5714240074157715, | |
| "learning_rate": 9.999999747638704e-05, | |
| "loss": 4.7614, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.9014084507042254, | |
| "grad_norm": 2.348027467727661, | |
| "learning_rate": 9.997217976013284e-05, | |
| "loss": 4.6037, | |
| "step": 2496 | |
| }, | |
| { | |
| "epoch": 1.051643192488263, | |
| "grad_norm": 2.6275577545166016, | |
| "learning_rate": 9.98898067640237e-05, | |
| "loss": 4.5136, | |
| "step": 2912 | |
| }, | |
| { | |
| "epoch": 1.2018779342723005, | |
| "grad_norm": 1.1122652292251587, | |
| "learning_rate": 9.975296886788363e-05, | |
| "loss": 4.4057, | |
| "step": 3328 | |
| }, | |
| { | |
| "epoch": 1.352112676056338, | |
| "grad_norm": 1.2248876094818115, | |
| "learning_rate": 9.956181621053908e-05, | |
| "loss": 4.3513, | |
| "step": 3744 | |
| }, | |
| { | |
| "epoch": 1.500180570603106, | |
| "eval_g2l_cer": 56.9353, | |
| "eval_g2l_gen_len": 3.093, | |
| "eval_g2l_rouge1": 32.8366, | |
| "eval_g2l_rouge2": 24.3793, | |
| "eval_g2l_rougeL": 32.7964, | |
| "eval_g2l_rougeLsum": 32.7681, | |
| "eval_l2ex_cer": 84.2827, | |
| "eval_l2ex_gen_len": 20.4604, | |
| "eval_l2ex_rouge1": 28.5353, | |
| "eval_l2ex_rouge2": 12.5551, | |
| "eval_l2ex_rougeL": 25.5058, | |
| "eval_l2ex_rougeLsum": 25.5427, | |
| "eval_l2g_cer": 81.1104, | |
| "eval_l2g_gen_len": 18.0072, | |
| "eval_l2g_rouge1": 32.688, | |
| "eval_l2g_rouge2": 18.9467, | |
| "eval_l2g_rougeL": 30.7295, | |
| "eval_l2g_rougeLsum": 30.7676, | |
| "eval_loss": 3.949233293533325, | |
| "eval_runtime": 197.691, | |
| "eval_samples_per_second": 50.184, | |
| "eval_steps_per_second": 1.573, | |
| "step": 4154 | |
| }, | |
| { | |
| "epoch": 1.5023474178403755, | |
| "grad_norm": 1.4981008768081665, | |
| "learning_rate": 9.931655852508637e-05, | |
| "loss": 4.3061, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 1.652582159624413, | |
| "grad_norm": 1.050997018814087, | |
| "learning_rate": 9.901746490877203e-05, | |
| "loss": 4.2525, | |
| "step": 4576 | |
| }, | |
| { | |
| "epoch": 1.8028169014084507, | |
| "grad_norm": 0.8377422094345093, | |
| "learning_rate": 9.866486352773886e-05, | |
| "loss": 4.2289, | |
| "step": 4992 | |
| }, | |
| { | |
| "epoch": 1.9530516431924883, | |
| "grad_norm": 0.968815267086029, | |
| "learning_rate": 9.82591412569612e-05, | |
| "loss": 4.1958, | |
| "step": 5408 | |
| }, | |
| { | |
| "epoch": 2.103286384976526, | |
| "grad_norm": 0.9952152967453003, | |
| "learning_rate": 9.780074325576496e-05, | |
| "loss": 4.1187, | |
| "step": 5824 | |
| }, | |
| { | |
| "epoch": 2.2502708559046587, | |
| "eval_g2l_cer": 53.8806, | |
| "eval_g2l_gen_len": 3.0548, | |
| "eval_g2l_rouge1": 35.439, | |
| "eval_g2l_rouge2": 27.2433, | |
| "eval_g2l_rougeL": 35.4384, | |
| "eval_g2l_rougeLsum": 35.3985, | |
| "eval_l2ex_cer": 89.3431, | |
| "eval_l2ex_gen_len": 23.4573, | |
| "eval_l2ex_rouge1": 27.8815, | |
| "eval_l2ex_rouge2": 12.1568, | |
| "eval_l2ex_rougeL": 24.5796, | |
| "eval_l2ex_rougeLsum": 24.6286, | |
| "eval_l2g_cer": 78.589, | |
| "eval_l2g_gen_len": 17.4946, | |
| "eval_l2g_rouge1": 35.6236, | |
| "eval_l2g_rouge2": 22.8027, | |
| "eval_l2g_rougeL": 33.8966, | |
| "eval_l2g_rougeLsum": 33.9001, | |
| "eval_loss": 3.8344309329986572, | |
| "eval_runtime": 202.9852, | |
| "eval_samples_per_second": 48.875, | |
| "eval_steps_per_second": 1.532, | |
| "step": 6231 | |
| }, | |
| { | |
| "epoch": 2.2535211267605635, | |
| "grad_norm": 24.567110061645508, | |
| "learning_rate": 9.72901724793979e-05, | |
| "loss": 4.0993, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 2.403755868544601, | |
| "grad_norm": 0.9726364612579346, | |
| "learning_rate": 9.672798912718604e-05, | |
| "loss": 4.0734, | |
| "step": 6656 | |
| }, | |
| { | |
| "epoch": 2.5539906103286385, | |
| "grad_norm": 0.9216151833534241, | |
| "learning_rate": 9.611481002788184e-05, | |
| "loss": 4.0584, | |
| "step": 7072 | |
| }, | |
| { | |
| "epoch": 2.704225352112676, | |
| "grad_norm": 0.7880883812904358, | |
| "learning_rate": 9.545130796287832e-05, | |
| "loss": 4.0312, | |
| "step": 7488 | |
| }, | |
| { | |
| "epoch": 2.8544600938967135, | |
| "grad_norm": 0.9635422229766846, | |
| "learning_rate": 9.473821092803199e-05, | |
| "loss": 4.0046, | |
| "step": 7904 | |
| }, | |
| { | |
| "epoch": 3.0003611412062114, | |
| "eval_g2l_cer": 52.062, | |
| "eval_g2l_gen_len": 3.0702, | |
| "eval_g2l_rouge1": 36.8811, | |
| "eval_g2l_rouge2": 28.8156, | |
| "eval_g2l_rougeL": 36.8925, | |
| "eval_g2l_rougeLsum": 36.8317, | |
| "eval_l2ex_cer": 90.1083, | |
| "eval_l2ex_gen_len": 22.4645, | |
| "eval_l2ex_rouge1": 27.5056, | |
| "eval_l2ex_rouge2": 12.5248, | |
| "eval_l2ex_rougeL": 24.4085, | |
| "eval_l2ex_rougeLsum": 24.4463, | |
| "eval_l2g_cer": 78.1779, | |
| "eval_l2g_gen_len": 17.8095, | |
| "eval_l2g_rouge1": 36.8332, | |
| "eval_l2g_rouge2": 23.9422, | |
| "eval_l2g_rougeL": 34.9672, | |
| "eval_l2g_rougeLsum": 34.995, | |
| "eval_loss": 3.7595808506011963, | |
| "eval_runtime": 202.3409, | |
| "eval_samples_per_second": 49.031, | |
| "eval_steps_per_second": 1.537, | |
| "step": 8308 | |
| }, | |
| { | |
| "epoch": 3.004694835680751, | |
| "grad_norm": 0.900855541229248, | |
| "learning_rate": 9.397630133490413e-05, | |
| "loss": 3.992, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 3.1549295774647885, | |
| "grad_norm": 0.8881470561027527, | |
| "learning_rate": 9.316641515229741e-05, | |
| "loss": 3.9362, | |
| "step": 8736 | |
| }, | |
| { | |
| "epoch": 3.3051643192488265, | |
| "grad_norm": 0.7969784140586853, | |
| "learning_rate": 9.230944098902894e-05, | |
| "loss": 3.9143, | |
| "step": 9152 | |
| }, | |
| { | |
| "epoch": 3.455399061032864, | |
| "grad_norm": 0.8603357672691345, | |
| "learning_rate": 9.1406319118947e-05, | |
| "loss": 3.9162, | |
| "step": 9568 | |
| }, | |
| { | |
| "epoch": 3.6056338028169015, | |
| "grad_norm": 0.9974511861801147, | |
| "learning_rate": 9.045804044926044e-05, | |
| "loss": 3.8987, | |
| "step": 9984 | |
| }, | |
| { | |
| "epoch": 3.7504514265077646, | |
| "eval_g2l_cer": 50.7917, | |
| "eval_g2l_gen_len": 3.0031, | |
| "eval_g2l_rouge1": 37.7135, | |
| "eval_g2l_rouge2": 29.9526, | |
| "eval_g2l_rougeL": 37.7649, | |
| "eval_g2l_rougeLsum": 37.7041, | |
| "eval_l2ex_cer": 86.8671, | |
| "eval_l2ex_gen_len": 22.2271, | |
| "eval_l2ex_rouge1": 28.7692, | |
| "eval_l2ex_rouge2": 12.8536, | |
| "eval_l2ex_rougeL": 25.3768, | |
| "eval_l2ex_rougeLsum": 25.4158, | |
| "eval_l2g_cer": 73.3411, | |
| "eval_l2g_gen_len": 15.6692, | |
| "eval_l2g_rouge1": 37.5152, | |
| "eval_l2g_rouge2": 24.5536, | |
| "eval_l2g_rougeL": 35.5225, | |
| "eval_l2g_rougeLsum": 35.5437, | |
| "eval_loss": 3.7121169567108154, | |
| "eval_runtime": 192.3527, | |
| "eval_samples_per_second": 51.577, | |
| "eval_steps_per_second": 1.617, | |
| "step": 10385 | |
| }, | |
| { | |
| "epoch": 3.755868544600939, | |
| "grad_norm": 0.8458616733551025, | |
| "learning_rate": 8.94656454333133e-05, | |
| "loss": 3.8883, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 3.9061032863849765, | |
| "grad_norm": 3.1263327598571777, | |
| "learning_rate": 8.843022292899726e-05, | |
| "loss": 3.8775, | |
| "step": 10816 | |
| }, | |
| { | |
| "epoch": 4.056338028169014, | |
| "grad_norm": 1.013489842414856, | |
| "learning_rate": 8.735290900405437e-05, | |
| "loss": 3.8514, | |
| "step": 11232 | |
| }, | |
| { | |
| "epoch": 4.206572769953052, | |
| "grad_norm": 0.9674685001373291, | |
| "learning_rate": 8.623488568958123e-05, | |
| "loss": 3.7962, | |
| "step": 11648 | |
| }, | |
| { | |
| "epoch": 4.356807511737089, | |
| "grad_norm": 1.0607421398162842, | |
| "learning_rate": 8.507737968310197e-05, | |
| "loss": 3.8043, | |
| "step": 12064 | |
| }, | |
| { | |
| "epoch": 4.500541711809317, | |
| "eval_g2l_cer": 50.088, | |
| "eval_g2l_gen_len": 3.0488, | |
| "eval_g2l_rouge1": 38.7702, | |
| "eval_g2l_rouge2": 30.6004, | |
| "eval_g2l_rougeL": 38.7959, | |
| "eval_g2l_rougeLsum": 38.7454, | |
| "eval_l2ex_cer": 84.5143, | |
| "eval_l2ex_gen_len": 20.52, | |
| "eval_l2ex_rouge1": 28.9181, | |
| "eval_l2ex_rouge2": 13.2853, | |
| "eval_l2ex_rougeL": 25.6409, | |
| "eval_l2ex_rougeLsum": 25.6588, | |
| "eval_l2g_cer": 72.4949, | |
| "eval_l2g_gen_len": 15.2432, | |
| "eval_l2g_rouge1": 37.6479, | |
| "eval_l2g_rouge2": 24.833, | |
| "eval_l2g_rougeL": 35.7678, | |
| "eval_l2g_rougeLsum": 35.776, | |
| "eval_loss": 3.674677848815918, | |
| "eval_runtime": 190.2532, | |
| "eval_samples_per_second": 52.146, | |
| "eval_steps_per_second": 1.635, | |
| "step": 12462 | |
| }, | |
| { | |
| "epoch": 4.507042253521127, | |
| "grad_norm": 0.9242987632751465, | |
| "learning_rate": 8.388166100263313e-05, | |
| "loss": 3.804, | |
| "step": 12480 | |
| }, | |
| { | |
| "epoch": 4.657276995305164, | |
| "grad_norm": 0.8233311772346497, | |
| "learning_rate": 8.264904159321721e-05, | |
| "loss": 3.7844, | |
| "step": 12896 | |
| }, | |
| { | |
| "epoch": 4.807511737089202, | |
| "grad_norm": 1.918661117553711, | |
| "learning_rate": 8.138087388745395e-05, | |
| "loss": 3.7948, | |
| "step": 13312 | |
| }, | |
| { | |
| "epoch": 4.957746478873239, | |
| "grad_norm": 0.8277648091316223, | |
| "learning_rate": 8.00785493216083e-05, | |
| "loss": 3.7951, | |
| "step": 13728 | |
| }, | |
| { | |
| "epoch": 5.107981220657277, | |
| "grad_norm": 1.0518523454666138, | |
| "learning_rate": 7.874349680892367e-05, | |
| "loss": 3.7423, | |
| "step": 14144 | |
| }, | |
| { | |
| "epoch": 5.250631997110871, | |
| "eval_g2l_cer": 49.743, | |
| "eval_g2l_gen_len": 3.0201, | |
| "eval_g2l_rouge1": 38.8263, | |
| "eval_g2l_rouge2": 31.1673, | |
| "eval_g2l_rougeL": 38.8286, | |
| "eval_g2l_rougeLsum": 38.7898, | |
| "eval_l2ex_cer": 86.565, | |
| "eval_l2ex_gen_len": 21.7523, | |
| "eval_l2ex_rouge1": 28.4984, | |
| "eval_l2ex_rouge2": 13.072, | |
| "eval_l2ex_rougeL": 25.2667, | |
| "eval_l2ex_rougeLsum": 25.2757, | |
| "eval_l2g_cer": 73.2917, | |
| "eval_l2g_gen_len": 16.0011, | |
| "eval_l2g_rouge1": 38.0438, | |
| "eval_l2g_rouge2": 25.3209, | |
| "eval_l2g_rougeL": 36.1091, | |
| "eval_l2g_rougeLsum": 36.1243, | |
| "eval_loss": 3.649608850479126, | |
| "eval_runtime": 197.4229, | |
| "eval_samples_per_second": 50.253, | |
| "eval_steps_per_second": 1.575, | |
| "step": 14539 | |
| }, | |
| { | |
| "epoch": 5.258215962441315, | |
| "grad_norm": 0.8540360331535339, | |
| "learning_rate": 7.737718117181538e-05, | |
| "loss": 3.7126, | |
| "step": 14560 | |
| }, | |
| { | |
| "epoch": 5.408450704225352, | |
| "grad_norm": 0.9189392328262329, | |
| "learning_rate": 7.598110153466441e-05, | |
| "loss": 3.7223, | |
| "step": 14976 | |
| }, | |
| { | |
| "epoch": 5.55868544600939, | |
| "grad_norm": 0.92618727684021, | |
| "learning_rate": 7.45567896789749e-05, | |
| "loss": 3.7139, | |
| "step": 15392 | |
| }, | |
| { | |
| "epoch": 5.708920187793427, | |
| "grad_norm": 0.7882264852523804, | |
| "learning_rate": 7.310580836270044e-05, | |
| "loss": 3.7179, | |
| "step": 15808 | |
| }, | |
| { | |
| "epoch": 5.859154929577465, | |
| "grad_norm": 0.8529959321022034, | |
| "learning_rate": 7.162974960558259e-05, | |
| "loss": 3.7121, | |
| "step": 16224 | |
| }, | |
| { | |
| "epoch": 6.000722282412423, | |
| "eval_g2l_cer": 49.3934, | |
| "eval_g2l_gen_len": 3.0096, | |
| "eval_g2l_rouge1": 39.4408, | |
| "eval_g2l_rouge2": 31.7057, | |
| "eval_g2l_rougeL": 39.4639, | |
| "eval_g2l_rougeLsum": 39.4161, | |
| "eval_l2ex_cer": 86.119, | |
| "eval_l2ex_gen_len": 20.7112, | |
| "eval_l2ex_rouge1": 28.8739, | |
| "eval_l2ex_rouge2": 13.2661, | |
| "eval_l2ex_rougeL": 25.7042, | |
| "eval_l2ex_rougeLsum": 25.7118, | |
| "eval_l2g_cer": 73.625, | |
| "eval_l2g_gen_len": 15.9897, | |
| "eval_l2g_rouge1": 38.1171, | |
| "eval_l2g_rouge2": 25.6405, | |
| "eval_l2g_rougeL": 36.2592, | |
| "eval_l2g_rougeLsum": 36.2666, | |
| "eval_loss": 3.6273715496063232, | |
| "eval_runtime": 193.9276, | |
| "eval_samples_per_second": 51.158, | |
| "eval_steps_per_second": 1.604, | |
| "step": 16616 | |
| }, | |
| { | |
| "epoch": 6.009389671361502, | |
| "grad_norm": 0.7976297736167908, | |
| "learning_rate": 7.013023294238368e-05, | |
| "loss": 3.7191, | |
| "step": 16640 | |
| }, | |
| { | |
| "epoch": 6.15962441314554, | |
| "grad_norm": 0.8516309261322021, | |
| "learning_rate": 6.860890364592963e-05, | |
| "loss": 3.6428, | |
| "step": 17056 | |
| }, | |
| { | |
| "epoch": 6.309859154929577, | |
| "grad_norm": 0.9273515343666077, | |
| "learning_rate": 6.706743092191335e-05, | |
| "loss": 3.6566, | |
| "step": 17472 | |
| }, | |
| { | |
| "epoch": 6.460093896713615, | |
| "grad_norm": 0.932829737663269, | |
| "learning_rate": 6.550750607743873e-05, | |
| "loss": 3.6627, | |
| "step": 17888 | |
| }, | |
| { | |
| "epoch": 6.610328638497653, | |
| "grad_norm": 0.9968202114105225, | |
| "learning_rate": 6.393084066531485e-05, | |
| "loss": 3.6652, | |
| "step": 18304 | |
| }, | |
| { | |
| "epoch": 6.750812567713976, | |
| "eval_g2l_cer": 49.5579, | |
| "eval_g2l_gen_len": 2.9938, | |
| "eval_g2l_rouge1": 39.6581, | |
| "eval_g2l_rouge2": 32.026, | |
| "eval_g2l_rougeL": 39.6932, | |
| "eval_g2l_rougeLsum": 39.6518, | |
| "eval_l2ex_cer": 88.4427, | |
| "eval_l2ex_gen_len": 23.11, | |
| "eval_l2ex_rouge1": 28.1485, | |
| "eval_l2ex_rouge2": 12.4558, | |
| "eval_l2ex_rougeL": 24.9414, | |
| "eval_l2ex_rougeLsum": 24.9605, | |
| "eval_l2g_cer": 73.3296, | |
| "eval_l2g_gen_len": 16.3263, | |
| "eval_l2g_rouge1": 38.4506, | |
| "eval_l2g_rouge2": 25.7696, | |
| "eval_l2g_rougeL": 36.5748, | |
| "eval_l2g_rougeLsum": 36.6091, | |
| "eval_loss": 3.6120047569274902, | |
| "eval_runtime": 197.9501, | |
| "eval_samples_per_second": 50.119, | |
| "eval_steps_per_second": 1.571, | |
| "step": 18693 | |
| }, | |
| { | |
| "epoch": 6.76056338028169, | |
| "grad_norm": 0.7791869640350342, | |
| "learning_rate": 6.233916460613673e-05, | |
| "loss": 3.6614, | |
| "step": 18720 | |
| }, | |
| { | |
| "epoch": 6.910798122065728, | |
| "grad_norm": 0.9385781288146973, | |
| "learning_rate": 6.0734224290212784e-05, | |
| "loss": 3.6471, | |
| "step": 19136 | |
| }, | |
| { | |
| "epoch": 7.061032863849765, | |
| "grad_norm": 0.8267916440963745, | |
| "learning_rate": 5.9117780661421754e-05, | |
| "loss": 3.6264, | |
| "step": 19552 | |
| }, | |
| { | |
| "epoch": 7.211267605633803, | |
| "grad_norm": 0.794131875038147, | |
| "learning_rate": 5.7491607285101345e-05, | |
| "loss": 3.6015, | |
| "step": 19968 | |
| }, | |
| { | |
| "epoch": 7.36150234741784, | |
| "grad_norm": 0.8748852610588074, | |
| "learning_rate": 5.585748840208869e-05, | |
| "loss": 3.5993, | |
| "step": 20384 | |
| }, | |
| { | |
| "epoch": 7.500902853015529, | |
| "eval_g2l_cer": 50.088, | |
| "eval_g2l_gen_len": 3.0582, | |
| "eval_g2l_rouge1": 39.9874, | |
| "eval_g2l_rouge2": 32.4432, | |
| "eval_g2l_rougeL": 40.0195, | |
| "eval_g2l_rougeLsum": 39.9365, | |
| "eval_l2ex_cer": 87.6165, | |
| "eval_l2ex_gen_len": 22.7133, | |
| "eval_l2ex_rouge1": 28.1937, | |
| "eval_l2ex_rouge2": 12.5673, | |
| "eval_l2ex_rougeL": 24.9397, | |
| "eval_l2ex_rougeLsum": 24.921, | |
| "eval_l2g_cer": 72.7284, | |
| "eval_l2g_gen_len": 15.6759, | |
| "eval_l2g_rouge1": 38.4813, | |
| "eval_l2g_rouge2": 25.936, | |
| "eval_l2g_rougeL": 36.5693, | |
| "eval_l2g_rougeLsum": 36.5729, | |
| "eval_loss": 3.6013987064361572, | |
| "eval_runtime": 195.438, | |
| "eval_samples_per_second": 50.763, | |
| "eval_steps_per_second": 1.591, | |
| "step": 20770 | |
| }, | |
| { | |
| "epoch": 7.511737089201878, | |
| "grad_norm": 0.9019631743431091, | |
| "learning_rate": 5.4217216971047445e-05, | |
| "loss": 3.5978, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 7.661971830985916, | |
| "grad_norm": 0.8872570395469666, | |
| "learning_rate": 5.257259270122993e-05, | |
| "loss": 3.6113, | |
| "step": 21216 | |
| }, | |
| { | |
| "epoch": 7.812206572769953, | |
| "grad_norm": 0.7394893169403076, | |
| "learning_rate": 5.0925420077832285e-05, | |
| "loss": 3.593, | |
| "step": 21632 | |
| }, | |
| { | |
| "epoch": 7.962441314553991, | |
| "grad_norm": 0.8534842133522034, | |
| "learning_rate": 4.927750638210947e-05, | |
| "loss": 3.5963, | |
| "step": 22048 | |
| }, | |
| { | |
| "epoch": 8.112676056338028, | |
| "grad_norm": 0.9047814607620239, | |
| "learning_rate": 4.7630659708422666e-05, | |
| "loss": 3.5722, | |
| "step": 22464 | |
| }, | |
| { | |
| "epoch": 8.250993138317082, | |
| "eval_g2l_cer": 49.5716, | |
| "eval_g2l_gen_len": 3.0388, | |
| "eval_g2l_rouge1": 40.4088, | |
| "eval_g2l_rouge2": 32.7272, | |
| "eval_g2l_rougeL": 40.4374, | |
| "eval_g2l_rougeLsum": 40.3677, | |
| "eval_l2ex_cer": 83.5858, | |
| "eval_l2ex_gen_len": 20.4851, | |
| "eval_l2ex_rouge1": 29.084, | |
| "eval_l2ex_rouge2": 12.9208, | |
| "eval_l2ex_rougeL": 25.6832, | |
| "eval_l2ex_rougeLsum": 25.7033, | |
| "eval_l2g_cer": 72.1741, | |
| "eval_l2g_gen_len": 15.6461, | |
| "eval_l2g_rouge1": 38.8628, | |
| "eval_l2g_rouge2": 26.1912, | |
| "eval_l2g_rougeL": 36.9072, | |
| "eval_l2g_rougeLsum": 36.9086, | |
| "eval_loss": 3.5901942253112793, | |
| "eval_runtime": 190.412, | |
| "eval_samples_per_second": 52.103, | |
| "eval_steps_per_second": 1.633, | |
| "step": 22847 | |
| }, | |
| { | |
| "epoch": 8.262910798122066, | |
| "grad_norm": 0.8366677761077881, | |
| "learning_rate": 4.598668698039414e-05, | |
| "loss": 3.5641, | |
| "step": 22880 | |
| }, | |
| { | |
| "epoch": 8.413145539906104, | |
| "grad_norm": 0.8628195524215698, | |
| "learning_rate": 4.4347391968347015e-05, | |
| "loss": 3.5702, | |
| "step": 23296 | |
| }, | |
| { | |
| "epoch": 8.56338028169014, | |
| "grad_norm": 0.9060849547386169, | |
| "learning_rate": 4.27145733102046e-05, | |
| "loss": 3.5508, | |
| "step": 23712 | |
| }, | |
| { | |
| "epoch": 8.713615023474178, | |
| "grad_norm": 0.8726539015769958, | |
| "learning_rate": 4.109002253802116e-05, | |
| "loss": 3.5637, | |
| "step": 24128 | |
| }, | |
| { | |
| "epoch": 8.863849765258216, | |
| "grad_norm": 0.9154978394508362, | |
| "learning_rate": 3.947552211230913e-05, | |
| "loss": 3.5435, | |
| "step": 24544 | |
| }, | |
| { | |
| "epoch": 9.001083423618635, | |
| "eval_g2l_cer": 48.6326, | |
| "eval_g2l_gen_len": 3.008, | |
| "eval_g2l_rouge1": 40.6427, | |
| "eval_g2l_rouge2": 33.0447, | |
| "eval_g2l_rougeL": 40.6651, | |
| "eval_g2l_rougeLsum": 40.6197, | |
| "eval_l2ex_cer": 85.6816, | |
| "eval_l2ex_gen_len": 20.9753, | |
| "eval_l2ex_rouge1": 28.5827, | |
| "eval_l2ex_rouge2": 12.8213, | |
| "eval_l2ex_rougeL": 25.352, | |
| "eval_l2ex_rougeLsum": 25.3642, | |
| "eval_l2g_cer": 72.7802, | |
| "eval_l2g_gen_len": 15.8102, | |
| "eval_l2g_rouge1": 38.814, | |
| "eval_l2g_rouge2": 26.1373, | |
| "eval_l2g_rougeL": 36.8943, | |
| "eval_l2g_rougeLsum": 36.9272, | |
| "eval_loss": 3.5814104080200195, | |
| "eval_runtime": 193.5202, | |
| "eval_samples_per_second": 51.266, | |
| "eval_steps_per_second": 1.607, | |
| "step": 24924 | |
| }, | |
| { | |
| "epoch": 9.014084507042254, | |
| "grad_norm": 0.9910312294960022, | |
| "learning_rate": 3.7872843466319744e-05, | |
| "loss": 3.5601, | |
| "step": 24960 | |
| }, | |
| { | |
| "epoch": 9.164319248826292, | |
| "grad_norm": 0.913223922252655, | |
| "learning_rate": 3.6283745062422726e-05, | |
| "loss": 3.5156, | |
| "step": 25376 | |
| }, | |
| { | |
| "epoch": 9.314553990610328, | |
| "grad_norm": 0.9026065468788147, | |
| "learning_rate": 3.470997046271774e-05, | |
| "loss": 3.5337, | |
| "step": 25792 | |
| }, | |
| { | |
| "epoch": 9.464788732394366, | |
| "grad_norm": 0.9726517796516418, | |
| "learning_rate": 3.315324641599434e-05, | |
| "loss": 3.5294, | |
| "step": 26208 | |
| }, | |
| { | |
| "epoch": 9.615023474178404, | |
| "grad_norm": 0.954593300819397, | |
| "learning_rate": 3.161528096313964e-05, | |
| "loss": 3.5242, | |
| "step": 26624 | |
| }, | |
| { | |
| "epoch": 9.751173708920188, | |
| "eval_g2l_cer": 48.3196, | |
| "eval_g2l_gen_len": 3.0196, | |
| "eval_g2l_rouge1": 41.1733, | |
| "eval_g2l_rouge2": 33.4761, | |
| "eval_g2l_rougeL": 41.172, | |
| "eval_g2l_rougeLsum": 41.1111, | |
| "eval_l2ex_cer": 86.3469, | |
| "eval_l2ex_gen_len": 21.333, | |
| "eval_l2ex_rouge1": 28.6196, | |
| "eval_l2ex_rouge2": 12.797, | |
| "eval_l2ex_rougeL": 25.331, | |
| "eval_l2ex_rougeLsum": 25.3251, | |
| "eval_l2g_cer": 71.8519, | |
| "eval_l2g_gen_len": 15.5771, | |
| "eval_l2g_rouge1": 38.9877, | |
| "eval_l2g_rouge2": 26.3016, | |
| "eval_l2g_rougeL": 36.97, | |
| "eval_l2g_rougeLsum": 37.0109, | |
| "eval_loss": 3.5751187801361084, | |
| "eval_runtime": 190.5769, | |
| "eval_samples_per_second": 52.058, | |
| "eval_steps_per_second": 1.632, | |
| "step": 27001 | |
| }, | |
| { | |
| "epoch": 9.765258215962442, | |
| "grad_norm": 0.7817335724830627, | |
| "learning_rate": 3.00977615630722e-05, | |
| "loss": 3.5332, | |
| "step": 27040 | |
| }, | |
| { | |
| "epoch": 9.915492957746478, | |
| "grad_norm": 0.8576836585998535, | |
| "learning_rate": 2.8602353241258667e-05, | |
| "loss": 3.5247, | |
| "step": 27456 | |
| }, | |
| { | |
| "epoch": 10.065727699530516, | |
| "grad_norm": 0.924045741558075, | |
| "learning_rate": 2.7130696762844198e-05, | |
| "loss": 3.5171, | |
| "step": 27872 | |
| }, | |
| { | |
| "epoch": 10.215962441314554, | |
| "grad_norm": 0.9701129198074341, | |
| "learning_rate": 2.568440683240166e-05, | |
| "loss": 3.4886, | |
| "step": 28288 | |
| }, | |
| { | |
| "epoch": 10.366197183098592, | |
| "grad_norm": 0.8473976850509644, | |
| "learning_rate": 2.426507032227427e-05, | |
| "loss": 3.5134, | |
| "step": 28704 | |
| }, | |
| { | |
| "epoch": 10.501263994221741, | |
| "eval_g2l_cer": 48.8336, | |
| "eval_g2l_gen_len": 3.0502, | |
| "eval_g2l_rouge1": 41.0241, | |
| "eval_g2l_rouge2": 33.2994, | |
| "eval_g2l_rougeL": 41.0374, | |
| "eval_g2l_rougeLsum": 40.9554, | |
| "eval_l2ex_cer": 85.2795, | |
| "eval_l2ex_gen_len": 21.6999, | |
| "eval_l2ex_rouge1": 28.6576, | |
| "eval_l2ex_rouge2": 12.5848, | |
| "eval_l2ex_rougeL": 25.1057, | |
| "eval_l2ex_rougeLsum": 25.1478, | |
| "eval_l2g_cer": 71.5555, | |
| "eval_l2g_gen_len": 15.5923, | |
| "eval_l2g_rouge1": 39.111, | |
| "eval_l2g_rouge2": 26.3632, | |
| "eval_l2g_rougeL": 37.134, | |
| "eval_l2g_rougeLsum": 37.1562, | |
| "eval_loss": 3.5716097354888916, | |
| "eval_runtime": 190.1354, | |
| "eval_samples_per_second": 52.179, | |
| "eval_steps_per_second": 1.636, | |
| "step": 29078 | |
| }, | |
| { | |
| "epoch": 10.51643192488263, | |
| "grad_norm": 0.9222161769866943, | |
| "learning_rate": 2.2874244531456016e-05, | |
| "loss": 3.4995, | |
| "step": 29120 | |
| }, | |
| { | |
| "epoch": 10.666666666666666, | |
| "grad_norm": 0.8834406137466431, | |
| "learning_rate": 2.1513455476919875e-05, | |
| "loss": 3.5005, | |
| "step": 29536 | |
| }, | |
| { | |
| "epoch": 10.816901408450704, | |
| "grad_norm": 1.2534151077270508, | |
| "learning_rate": 2.0184196219268805e-05, | |
| "loss": 3.4956, | |
| "step": 29952 | |
| }, | |
| { | |
| "epoch": 10.967136150234742, | |
| "grad_norm": 1.0579476356506348, | |
| "learning_rate": 1.8887925224546575e-05, | |
| "loss": 3.4984, | |
| "step": 30368 | |
| }, | |
| { | |
| "epoch": 11.11737089201878, | |
| "grad_norm": 0.9352797269821167, | |
| "learning_rate": 1.7626064764005655e-05, | |
| "loss": 3.4891, | |
| "step": 30784 | |
| }, | |
| { | |
| "epoch": 11.251354279523294, | |
| "eval_g2l_cer": 48.1779, | |
| "eval_g2l_gen_len": 3.0241, | |
| "eval_g2l_rouge1": 41.3076, | |
| "eval_g2l_rouge2": 33.5874, | |
| "eval_g2l_rougeL": 41.3381, | |
| "eval_g2l_rougeLsum": 41.2834, | |
| "eval_l2ex_cer": 86.303, | |
| "eval_l2ex_gen_len": 21.6927, | |
| "eval_l2ex_rouge1": 28.5306, | |
| "eval_l2ex_rouge2": 12.66, | |
| "eval_l2ex_rougeL": 25.107, | |
| "eval_l2ex_rougeLsum": 25.1229, | |
| "eval_l2g_cer": 71.7607, | |
| "eval_l2g_gen_len": 15.6002, | |
| "eval_l2g_rouge1": 39.1998, | |
| "eval_l2g_rouge2": 26.5146, | |
| "eval_l2g_rougeL": 37.2299, | |
| "eval_l2g_rougeLsum": 37.2583, | |
| "eval_loss": 3.5692920684814453, | |
| "eval_runtime": 191.2935, | |
| "eval_samples_per_second": 51.863, | |
| "eval_steps_per_second": 1.626, | |
| "step": 31155 | |
| }, | |
| { | |
| "epoch": 11.267605633802816, | |
| "grad_norm": 0.8403520584106445, | |
| "learning_rate": 1.6399999353588347e-05, | |
| "loss": 3.4762, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 11.417840375586854, | |
| "grad_norm": 0.8685266375541687, | |
| "learning_rate": 1.5211074234832911e-05, | |
| "loss": 3.491, | |
| "step": 31616 | |
| }, | |
| { | |
| "epoch": 11.568075117370892, | |
| "grad_norm": 0.8662200570106506, | |
| "learning_rate": 1.4060593898871712e-05, | |
| "loss": 3.4818, | |
| "step": 32032 | |
| }, | |
| { | |
| "epoch": 11.71830985915493, | |
| "grad_norm": 0.915972888469696, | |
| "learning_rate": 1.2949820655140888e-05, | |
| "loss": 3.4729, | |
| "step": 32448 | |
| }, | |
| { | |
| "epoch": 11.868544600938968, | |
| "grad_norm": 0.9427916407585144, | |
| "learning_rate": 1.187997324637174e-05, | |
| "loss": 3.4837, | |
| "step": 32864 | |
| }, | |
| { | |
| "epoch": 12.001444564824846, | |
| "eval_g2l_cer": 48.4635, | |
| "eval_g2l_gen_len": 3.0374, | |
| "eval_g2l_rouge1": 41.42, | |
| "eval_g2l_rouge2": 33.7871, | |
| "eval_g2l_rougeL": 41.41, | |
| "eval_g2l_rougeLsum": 41.3653, | |
| "eval_l2ex_cer": 84.6873, | |
| "eval_l2ex_gen_len": 21.5406, | |
| "eval_l2ex_rouge1": 28.7533, | |
| "eval_l2ex_rouge2": 12.7721, | |
| "eval_l2ex_rougeL": 25.3715, | |
| "eval_l2ex_rougeLsum": 25.3817, | |
| "eval_l2g_cer": 71.4847, | |
| "eval_l2g_gen_len": 15.5437, | |
| "eval_l2g_rouge1": 39.2147, | |
| "eval_l2g_rouge2": 26.5099, | |
| "eval_l2g_rougeL": 37.2362, | |
| "eval_l2g_rougeLsum": 37.2641, | |
| "eval_loss": 3.5653076171875, | |
| "eval_runtime": 189.8727, | |
| "eval_samples_per_second": 52.251, | |
| "eval_steps_per_second": 1.638, | |
| "step": 33232 | |
| }, | |
| { | |
| "epoch": 12.018779342723004, | |
| "grad_norm": 0.8259687423706055, | |
| "learning_rate": 1.0852225511383663e-05, | |
| "loss": 3.4764, | |
| "step": 33280 | |
| }, | |
| { | |
| "epoch": 12.169014084507042, | |
| "grad_norm": 0.904097855091095, | |
| "learning_rate": 9.86770509714574e-06, | |
| "loss": 3.4791, | |
| "step": 33696 | |
| }, | |
| { | |
| "epoch": 12.31924882629108, | |
| "grad_norm": 0.9662612080574036, | |
| "learning_rate": 8.927492221520133e-06, | |
| "loss": 3.4593, | |
| "step": 34112 | |
| }, | |
| { | |
| "epoch": 12.469483568075118, | |
| "grad_norm": 0.9324942231178284, | |
| "learning_rate": 8.032618488044715e-06, | |
| "loss": 3.4564, | |
| "step": 34528 | |
| }, | |
| { | |
| "epoch": 12.619718309859154, | |
| "grad_norm": 0.9966897964477539, | |
| "learning_rate": 7.184065754055608e-06, | |
| "loss": 3.4576, | |
| "step": 34944 | |
| }, | |
| { | |
| "epoch": 12.751534850126399, | |
| "eval_g2l_cer": 47.8718, | |
| "eval_g2l_gen_len": 3.0243, | |
| "eval_g2l_rouge1": 41.399, | |
| "eval_g2l_rouge2": 33.8189, | |
| "eval_g2l_rougeL": 41.4105, | |
| "eval_g2l_rougeLsum": 41.3515, | |
| "eval_l2ex_cer": 84.0524, | |
| "eval_l2ex_gen_len": 21.0206, | |
| "eval_l2ex_rouge1": 28.7814, | |
| "eval_l2ex_rouge2": 12.7663, | |
| "eval_l2ex_rougeL": 25.3724, | |
| "eval_l2ex_rougeLsum": 25.3895, | |
| "eval_l2g_cer": 71.6622, | |
| "eval_l2g_gen_len": 15.563, | |
| "eval_l2g_rouge1": 39.1666, | |
| "eval_l2g_rouge2": 26.5275, | |
| "eval_l2g_rougeL": 37.1881, | |
| "eval_l2g_rougeLsum": 37.2249, | |
| "eval_loss": 3.564103841781616, | |
| "eval_runtime": 190.2806, | |
| "eval_samples_per_second": 52.139, | |
| "eval_steps_per_second": 1.634, | |
| "step": 35309 | |
| }, | |
| { | |
| "epoch": 12.769953051643192, | |
| "grad_norm": 1.0099953413009644, | |
| "learning_rate": 6.382765053391182e-06, | |
| "loss": 3.4757, | |
| "step": 35360 | |
| }, | |
| { | |
| "epoch": 12.92018779342723, | |
| "grad_norm": 0.8347458243370056, | |
| "learning_rate": 5.629595574859816e-06, | |
| "loss": 3.4814, | |
| "step": 35776 | |
| }, | |
| { | |
| "epoch": 13.070422535211268, | |
| "grad_norm": 0.8532468676567078, | |
| "learning_rate": 4.925383697592043e-06, | |
| "loss": 3.4667, | |
| "step": 36192 | |
| }, | |
| { | |
| "epoch": 13.220657276995306, | |
| "grad_norm": 0.8852038383483887, | |
| "learning_rate": 4.2709020843357075e-06, | |
| "loss": 3.4512, | |
| "step": 36608 | |
| }, | |
| { | |
| "epoch": 13.370892018779342, | |
| "grad_norm": 1.058424472808838, | |
| "learning_rate": 3.666868833688726e-06, | |
| "loss": 3.4616, | |
| "step": 37024 | |
| }, | |
| { | |
| "epoch": 13.501625135427952, | |
| "eval_g2l_cer": 47.8581, | |
| "eval_g2l_gen_len": 3.0221, | |
| "eval_g2l_rouge1": 41.4693, | |
| "eval_g2l_rouge2": 33.7773, | |
| "eval_g2l_rougeL": 41.4822, | |
| "eval_g2l_rougeLsum": 41.4356, | |
| "eval_l2ex_cer": 84.3083, | |
| "eval_l2ex_gen_len": 21.0319, | |
| "eval_l2ex_rouge1": 28.654, | |
| "eval_l2ex_rouge2": 12.8413, | |
| "eval_l2ex_rougeL": 25.3941, | |
| "eval_l2ex_rougeLsum": 25.4326, | |
| "eval_l2g_cer": 71.0018, | |
| "eval_l2g_gen_len": 15.3407, | |
| "eval_l2g_rouge1": 39.2009, | |
| "eval_l2g_rouge2": 26.5422, | |
| "eval_l2g_rougeL": 37.2433, | |
| "eval_l2g_rougeLsum": 37.2693, | |
| "eval_loss": 3.5637874603271484, | |
| "eval_runtime": 187.7571, | |
| "eval_samples_per_second": 52.84, | |
| "eval_steps_per_second": 1.656, | |
| "step": 37386 | |
| } | |
| ], | |
| "logging_steps": 416, | |
| "max_steps": 41535, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 15, | |
| "save_steps": 2077, | |
| "total_flos": 7.17240637379838e+17, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |