{ "best_global_step": 2793, "best_metric": 0.2159090909090909, "best_model_checkpoint": "./ocr_model_output/checkpoint-2793", "epoch": 25.0, "eval_steps": 500, "global_step": 3675, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06802721088435375, "grad_norm": 65.23433685302734, "learning_rate": 4.9877551020408165e-05, "loss": 8.6208, "step": 10 }, { "epoch": 0.1360544217687075, "grad_norm": 70.44102478027344, "learning_rate": 4.974149659863946e-05, "loss": 4.5903, "step": 20 }, { "epoch": 0.20408163265306123, "grad_norm": 35.348358154296875, "learning_rate": 4.960544217687075e-05, "loss": 1.9034, "step": 30 }, { "epoch": 0.272108843537415, "grad_norm": 22.223546981811523, "learning_rate": 4.9469387755102045e-05, "loss": 1.3092, "step": 40 }, { "epoch": 0.3401360544217687, "grad_norm": 35.46092987060547, "learning_rate": 4.933333333333334e-05, "loss": 1.0594, "step": 50 }, { "epoch": 0.40816326530612246, "grad_norm": 22.021345138549805, "learning_rate": 4.9197278911564624e-05, "loss": 1.0721, "step": 60 }, { "epoch": 0.47619047619047616, "grad_norm": 16.65888214111328, "learning_rate": 4.9061224489795924e-05, "loss": 0.825, "step": 70 }, { "epoch": 0.54421768707483, "grad_norm": 13.323294639587402, "learning_rate": 4.892517006802722e-05, "loss": 0.8588, "step": 80 }, { "epoch": 0.6122448979591837, "grad_norm": 16.98369598388672, "learning_rate": 4.87891156462585e-05, "loss": 0.6796, "step": 90 }, { "epoch": 0.6802721088435374, "grad_norm": 14.879609107971191, "learning_rate": 4.8653061224489796e-05, "loss": 0.715, "step": 100 }, { "epoch": 0.7482993197278912, "grad_norm": 13.820905685424805, "learning_rate": 4.8517006802721096e-05, "loss": 0.6773, "step": 110 }, { "epoch": 0.8163265306122449, "grad_norm": 14.802565574645996, "learning_rate": 4.838095238095238e-05, "loss": 0.6992, "step": 120 }, { "epoch": 0.8843537414965986, "grad_norm": 15.70506763458252, "learning_rate": 4.8244897959183675e-05, "loss": 0.7254, "step": 130 }, { "epoch": 0.9523809523809523, "grad_norm": 16.910625457763672, "learning_rate": 4.810884353741497e-05, "loss": 0.8015, "step": 140 }, { "epoch": 1.0, "eval_cer": 0.8118686868686869, "eval_loss": 0.8795642256736755, "eval_runtime": 3.4405, "eval_samples_per_second": 85.161, "eval_steps_per_second": 42.726, "step": 147 }, { "epoch": 1.0204081632653061, "grad_norm": 12.733670234680176, "learning_rate": 4.797278911564626e-05, "loss": 0.6511, "step": 150 }, { "epoch": 1.08843537414966, "grad_norm": 13.70749568939209, "learning_rate": 4.7836734693877554e-05, "loss": 0.7377, "step": 160 }, { "epoch": 1.1564625850340136, "grad_norm": 15.44306468963623, "learning_rate": 4.770068027210885e-05, "loss": 0.6797, "step": 170 }, { "epoch": 1.2244897959183674, "grad_norm": 19.31881332397461, "learning_rate": 4.756462585034014e-05, "loss": 0.6719, "step": 180 }, { "epoch": 1.2925170068027212, "grad_norm": 12.049680709838867, "learning_rate": 4.742857142857143e-05, "loss": 0.6491, "step": 190 }, { "epoch": 1.3605442176870748, "grad_norm": 14.28982925415039, "learning_rate": 4.729251700680272e-05, "loss": 0.7678, "step": 200 }, { "epoch": 1.4285714285714286, "grad_norm": 25.51521110534668, "learning_rate": 4.715646258503402e-05, "loss": 0.7534, "step": 210 }, { "epoch": 1.4965986394557822, "grad_norm": 14.158224105834961, "learning_rate": 4.7020408163265306e-05, "loss": 0.5653, "step": 220 }, { "epoch": 1.564625850340136, "grad_norm": 10.563309669494629, "learning_rate": 4.68843537414966e-05, "loss": 0.6038, "step": 230 }, { "epoch": 1.6326530612244898, "grad_norm": 14.436025619506836, "learning_rate": 4.67482993197279e-05, "loss": 0.5794, "step": 240 }, { "epoch": 1.7006802721088436, "grad_norm": 14.293322563171387, "learning_rate": 4.6612244897959185e-05, "loss": 0.6102, "step": 250 }, { "epoch": 1.7687074829931972, "grad_norm": 6.588962078094482, "learning_rate": 4.647619047619048e-05, "loss": 0.6853, "step": 260 }, { "epoch": 1.836734693877551, "grad_norm": 16.505081176757812, "learning_rate": 4.634013605442177e-05, "loss": 0.554, "step": 270 }, { "epoch": 1.9047619047619047, "grad_norm": 16.433975219726562, "learning_rate": 4.6204081632653064e-05, "loss": 0.5481, "step": 280 }, { "epoch": 1.9727891156462585, "grad_norm": 14.68355655670166, "learning_rate": 4.606802721088436e-05, "loss": 0.4728, "step": 290 }, { "epoch": 2.0, "eval_cer": 0.571969696969697, "eval_loss": 0.6962071061134338, "eval_runtime": 3.8738, "eval_samples_per_second": 75.637, "eval_steps_per_second": 37.948, "step": 294 }, { "epoch": 2.0408163265306123, "grad_norm": 9.795095443725586, "learning_rate": 4.593197278911564e-05, "loss": 0.4635, "step": 300 }, { "epoch": 2.108843537414966, "grad_norm": 17.107149124145508, "learning_rate": 4.579591836734694e-05, "loss": 0.4794, "step": 310 }, { "epoch": 2.17687074829932, "grad_norm": 11.940792083740234, "learning_rate": 4.5659863945578236e-05, "loss": 0.677, "step": 320 }, { "epoch": 2.2448979591836733, "grad_norm": 8.351872444152832, "learning_rate": 4.552380952380952e-05, "loss": 0.6732, "step": 330 }, { "epoch": 2.312925170068027, "grad_norm": 2.6622140407562256, "learning_rate": 4.538775510204082e-05, "loss": 0.3907, "step": 340 }, { "epoch": 2.380952380952381, "grad_norm": 10.555298805236816, "learning_rate": 4.5251700680272115e-05, "loss": 0.5443, "step": 350 }, { "epoch": 2.4489795918367347, "grad_norm": 7.857567310333252, "learning_rate": 4.51156462585034e-05, "loss": 0.5688, "step": 360 }, { "epoch": 2.5170068027210886, "grad_norm": 15.215005874633789, "learning_rate": 4.4979591836734694e-05, "loss": 0.5548, "step": 370 }, { "epoch": 2.5850340136054424, "grad_norm": 8.438752174377441, "learning_rate": 4.484353741496599e-05, "loss": 0.3828, "step": 380 }, { "epoch": 2.6530612244897958, "grad_norm": 9.824787139892578, "learning_rate": 4.470748299319728e-05, "loss": 0.3945, "step": 390 }, { "epoch": 2.7210884353741496, "grad_norm": 13.708135604858398, "learning_rate": 4.4571428571428574e-05, "loss": 0.3751, "step": 400 }, { "epoch": 2.7891156462585034, "grad_norm": 10.32359790802002, "learning_rate": 4.4435374149659867e-05, "loss": 0.4604, "step": 410 }, { "epoch": 2.857142857142857, "grad_norm": 4.256906509399414, "learning_rate": 4.429931972789116e-05, "loss": 0.4558, "step": 420 }, { "epoch": 2.925170068027211, "grad_norm": 12.131647109985352, "learning_rate": 4.416326530612245e-05, "loss": 0.4292, "step": 430 }, { "epoch": 2.9931972789115644, "grad_norm": 14.156941413879395, "learning_rate": 4.4027210884353746e-05, "loss": 0.3713, "step": 440 }, { "epoch": 3.0, "eval_cer": 0.5454545454545454, "eval_loss": 0.5351251363754272, "eval_runtime": 3.9015, "eval_samples_per_second": 75.099, "eval_steps_per_second": 37.678, "step": 441 }, { "epoch": 3.061224489795918, "grad_norm": 5.429965496063232, "learning_rate": 4.389115646258504e-05, "loss": 0.6159, "step": 450 }, { "epoch": 3.129251700680272, "grad_norm": 11.695433616638184, "learning_rate": 4.3755102040816325e-05, "loss": 0.4156, "step": 460 }, { "epoch": 3.197278911564626, "grad_norm": 14.333072662353516, "learning_rate": 4.361904761904762e-05, "loss": 0.4372, "step": 470 }, { "epoch": 3.2653061224489797, "grad_norm": 14.760481834411621, "learning_rate": 4.348299319727892e-05, "loss": 0.4279, "step": 480 }, { "epoch": 3.3333333333333335, "grad_norm": 12.052332878112793, "learning_rate": 4.3346938775510204e-05, "loss": 0.3854, "step": 490 }, { "epoch": 3.4013605442176873, "grad_norm": 14.331747055053711, "learning_rate": 4.32108843537415e-05, "loss": 0.3839, "step": 500 }, { "epoch": 3.4693877551020407, "grad_norm": 21.911863327026367, "learning_rate": 4.307482993197279e-05, "loss": 0.4815, "step": 510 }, { "epoch": 3.5374149659863945, "grad_norm": 6.438183307647705, "learning_rate": 4.293877551020408e-05, "loss": 0.2597, "step": 520 }, { "epoch": 3.6054421768707483, "grad_norm": 13.568741798400879, "learning_rate": 4.2802721088435376e-05, "loss": 0.3735, "step": 530 }, { "epoch": 3.673469387755102, "grad_norm": 0.18484297394752502, "learning_rate": 4.266666666666667e-05, "loss": 0.3343, "step": 540 }, { "epoch": 3.741496598639456, "grad_norm": 0.7180817127227783, "learning_rate": 4.253061224489796e-05, "loss": 0.1867, "step": 550 }, { "epoch": 3.8095238095238093, "grad_norm": 0.15185348689556122, "learning_rate": 4.2394557823129255e-05, "loss": 0.4185, "step": 560 }, { "epoch": 3.877551020408163, "grad_norm": 10.095551490783691, "learning_rate": 4.225850340136054e-05, "loss": 0.1698, "step": 570 }, { "epoch": 3.945578231292517, "grad_norm": 15.650616645812988, "learning_rate": 4.212244897959184e-05, "loss": 0.4307, "step": 580 }, { "epoch": 4.0, "eval_cer": 0.4911616161616162, "eval_loss": 0.4221580922603607, "eval_runtime": 3.8592, "eval_samples_per_second": 75.922, "eval_steps_per_second": 38.091, "step": 588 }, { "epoch": 4.01360544217687, "grad_norm": 8.345415115356445, "learning_rate": 4.1986394557823134e-05, "loss": 0.2111, "step": 590 }, { "epoch": 4.081632653061225, "grad_norm": 4.688040256500244, "learning_rate": 4.185034013605442e-05, "loss": 0.3211, "step": 600 }, { "epoch": 4.149659863945578, "grad_norm": 19.075292587280273, "learning_rate": 4.1714285714285714e-05, "loss": 0.3103, "step": 610 }, { "epoch": 4.217687074829932, "grad_norm": 7.607833385467529, "learning_rate": 4.1578231292517014e-05, "loss": 0.2308, "step": 620 }, { "epoch": 4.285714285714286, "grad_norm": 8.69675350189209, "learning_rate": 4.14421768707483e-05, "loss": 0.2981, "step": 630 }, { "epoch": 4.35374149659864, "grad_norm": 2.994023323059082, "learning_rate": 4.130612244897959e-05, "loss": 0.2922, "step": 640 }, { "epoch": 4.421768707482993, "grad_norm": 6.535342693328857, "learning_rate": 4.1170068027210886e-05, "loss": 0.3441, "step": 650 }, { "epoch": 4.489795918367347, "grad_norm": 0.3404290974140167, "learning_rate": 4.103401360544218e-05, "loss": 0.3523, "step": 660 }, { "epoch": 4.557823129251701, "grad_norm": 10.70188045501709, "learning_rate": 4.089795918367347e-05, "loss": 0.4148, "step": 670 }, { "epoch": 4.625850340136054, "grad_norm": 14.900300979614258, "learning_rate": 4.0761904761904765e-05, "loss": 0.4041, "step": 680 }, { "epoch": 4.6938775510204085, "grad_norm": 14.758822441101074, "learning_rate": 4.062585034013606e-05, "loss": 0.2453, "step": 690 }, { "epoch": 4.761904761904762, "grad_norm": 0.7187572717666626, "learning_rate": 4.048979591836735e-05, "loss": 0.3703, "step": 700 }, { "epoch": 4.829931972789115, "grad_norm": 9.603391647338867, "learning_rate": 4.035374149659864e-05, "loss": 0.3354, "step": 710 }, { "epoch": 4.8979591836734695, "grad_norm": 1.358739972114563, "learning_rate": 4.021768707482994e-05, "loss": 0.3297, "step": 720 }, { "epoch": 4.965986394557823, "grad_norm": 0.37522122263908386, "learning_rate": 4.008163265306122e-05, "loss": 0.2072, "step": 730 }, { "epoch": 5.0, "eval_cer": 0.5921717171717171, "eval_loss": 0.358783096075058, "eval_runtime": 3.5411, "eval_samples_per_second": 82.742, "eval_steps_per_second": 41.512, "step": 735 }, { "epoch": 5.034013605442177, "grad_norm": 3.8917222023010254, "learning_rate": 3.9945578231292516e-05, "loss": 0.2488, "step": 740 }, { "epoch": 5.1020408163265305, "grad_norm": 16.159465789794922, "learning_rate": 3.9809523809523816e-05, "loss": 0.3769, "step": 750 }, { "epoch": 5.170068027210885, "grad_norm": 17.226072311401367, "learning_rate": 3.96734693877551e-05, "loss": 0.2559, "step": 760 }, { "epoch": 5.238095238095238, "grad_norm": 0.8567410707473755, "learning_rate": 3.9537414965986396e-05, "loss": 0.248, "step": 770 }, { "epoch": 5.3061224489795915, "grad_norm": 1.5087652206420898, "learning_rate": 3.940136054421769e-05, "loss": 0.2737, "step": 780 }, { "epoch": 5.374149659863946, "grad_norm": 3.331455945968628, "learning_rate": 3.926530612244898e-05, "loss": 0.1665, "step": 790 }, { "epoch": 5.442176870748299, "grad_norm": 11.933197021484375, "learning_rate": 3.9129251700680275e-05, "loss": 0.322, "step": 800 }, { "epoch": 5.510204081632653, "grad_norm": 13.032449722290039, "learning_rate": 3.899319727891156e-05, "loss": 0.239, "step": 810 }, { "epoch": 5.578231292517007, "grad_norm": 5.0442047119140625, "learning_rate": 3.885714285714286e-05, "loss": 0.157, "step": 820 }, { "epoch": 5.646258503401361, "grad_norm": 1.6076925992965698, "learning_rate": 3.8721088435374154e-05, "loss": 0.1926, "step": 830 }, { "epoch": 5.714285714285714, "grad_norm": 22.085569381713867, "learning_rate": 3.858503401360544e-05, "loss": 0.3699, "step": 840 }, { "epoch": 5.782312925170068, "grad_norm": 10.831768989562988, "learning_rate": 3.844897959183674e-05, "loss": 0.291, "step": 850 }, { "epoch": 5.850340136054422, "grad_norm": 0.5946142077445984, "learning_rate": 3.831292517006803e-05, "loss": 0.2154, "step": 860 }, { "epoch": 5.918367346938775, "grad_norm": 1.5171136856079102, "learning_rate": 3.817687074829932e-05, "loss": 0.212, "step": 870 }, { "epoch": 5.986394557823129, "grad_norm": 18.592945098876953, "learning_rate": 3.804081632653061e-05, "loss": 0.2963, "step": 880 }, { "epoch": 6.0, "eval_cer": 0.5151515151515151, "eval_loss": 0.2578863501548767, "eval_runtime": 3.7328, "eval_samples_per_second": 78.493, "eval_steps_per_second": 39.38, "step": 882 }, { "epoch": 6.054421768707483, "grad_norm": 0.10681638866662979, "learning_rate": 3.7904761904761905e-05, "loss": 0.1761, "step": 890 }, { "epoch": 6.122448979591836, "grad_norm": 7.661993503570557, "learning_rate": 3.77687074829932e-05, "loss": 0.2027, "step": 900 }, { "epoch": 6.190476190476191, "grad_norm": 12.70997428894043, "learning_rate": 3.763265306122449e-05, "loss": 0.1828, "step": 910 }, { "epoch": 6.258503401360544, "grad_norm": 0.06931126117706299, "learning_rate": 3.7496598639455784e-05, "loss": 0.1242, "step": 920 }, { "epoch": 6.326530612244898, "grad_norm": 17.21261215209961, "learning_rate": 3.736054421768708e-05, "loss": 0.3785, "step": 930 }, { "epoch": 6.394557823129252, "grad_norm": 0.10957188904285431, "learning_rate": 3.722448979591837e-05, "loss": 0.1184, "step": 940 }, { "epoch": 6.462585034013605, "grad_norm": 14.104228019714355, "learning_rate": 3.7088435374149663e-05, "loss": 0.2253, "step": 950 }, { "epoch": 6.530612244897959, "grad_norm": 0.2823491394519806, "learning_rate": 3.6952380952380956e-05, "loss": 0.1337, "step": 960 }, { "epoch": 6.598639455782313, "grad_norm": 0.17527176439762115, "learning_rate": 3.681632653061224e-05, "loss": 0.1745, "step": 970 }, { "epoch": 6.666666666666667, "grad_norm": 12.178832054138184, "learning_rate": 3.6680272108843536e-05, "loss": 0.3322, "step": 980 }, { "epoch": 6.73469387755102, "grad_norm": 18.27240753173828, "learning_rate": 3.6544217687074836e-05, "loss": 0.245, "step": 990 }, { "epoch": 6.802721088435375, "grad_norm": 8.510261535644531, "learning_rate": 3.640816326530612e-05, "loss": 0.2928, "step": 1000 }, { "epoch": 6.870748299319728, "grad_norm": 8.47603988647461, "learning_rate": 3.6272108843537415e-05, "loss": 0.2048, "step": 1010 }, { "epoch": 6.938775510204081, "grad_norm": 9.21681022644043, "learning_rate": 3.6136054421768715e-05, "loss": 0.1416, "step": 1020 }, { "epoch": 7.0, "eval_cer": 0.4166666666666667, "eval_loss": 0.1848345547914505, "eval_runtime": 4.0307, "eval_samples_per_second": 72.692, "eval_steps_per_second": 36.47, "step": 1029 }, { "epoch": 7.006802721088436, "grad_norm": 8.593953132629395, "learning_rate": 3.6e-05, "loss": 0.1307, "step": 1030 }, { "epoch": 7.074829931972789, "grad_norm": 6.022789001464844, "learning_rate": 3.5863945578231294e-05, "loss": 0.1292, "step": 1040 }, { "epoch": 7.142857142857143, "grad_norm": 10.953206062316895, "learning_rate": 3.572789115646259e-05, "loss": 0.0853, "step": 1050 }, { "epoch": 7.210884353741497, "grad_norm": 5.332366943359375, "learning_rate": 3.559183673469388e-05, "loss": 0.112, "step": 1060 }, { "epoch": 7.27891156462585, "grad_norm": 12.159287452697754, "learning_rate": 3.545578231292517e-05, "loss": 0.1007, "step": 1070 }, { "epoch": 7.346938775510204, "grad_norm": 0.3854842483997345, "learning_rate": 3.531972789115646e-05, "loss": 0.2318, "step": 1080 }, { "epoch": 7.414965986394558, "grad_norm": 11.151751518249512, "learning_rate": 3.518367346938776e-05, "loss": 0.1768, "step": 1090 }, { "epoch": 7.482993197278912, "grad_norm": 20.415531158447266, "learning_rate": 3.504761904761905e-05, "loss": 0.1839, "step": 1100 }, { "epoch": 7.551020408163265, "grad_norm": 0.7234401106834412, "learning_rate": 3.491156462585034e-05, "loss": 0.2944, "step": 1110 }, { "epoch": 7.619047619047619, "grad_norm": 0.522950291633606, "learning_rate": 3.477551020408164e-05, "loss": 0.087, "step": 1120 }, { "epoch": 7.687074829931973, "grad_norm": 8.606940269470215, "learning_rate": 3.463945578231293e-05, "loss": 0.1253, "step": 1130 }, { "epoch": 7.755102040816326, "grad_norm": 22.922000885009766, "learning_rate": 3.450340136054422e-05, "loss": 0.2799, "step": 1140 }, { "epoch": 7.8231292517006805, "grad_norm": 21.035017013549805, "learning_rate": 3.436734693877551e-05, "loss": 0.1441, "step": 1150 }, { "epoch": 7.891156462585034, "grad_norm": 5.825491905212402, "learning_rate": 3.4231292517006804e-05, "loss": 0.0218, "step": 1160 }, { "epoch": 7.959183673469388, "grad_norm": 12.801454544067383, "learning_rate": 3.40952380952381e-05, "loss": 0.2319, "step": 1170 }, { "epoch": 8.0, "eval_cer": 0.42424242424242425, "eval_loss": 0.13813678920269012, "eval_runtime": 4.039, "eval_samples_per_second": 72.542, "eval_steps_per_second": 36.395, "step": 1176 }, { "epoch": 8.02721088435374, "grad_norm": 1.3456509113311768, "learning_rate": 3.395918367346939e-05, "loss": 0.1622, "step": 1180 }, { "epoch": 8.095238095238095, "grad_norm": 0.21156376600265503, "learning_rate": 3.382312925170068e-05, "loss": 0.0447, "step": 1190 }, { "epoch": 8.16326530612245, "grad_norm": 0.08529641479253769, "learning_rate": 3.3687074829931976e-05, "loss": 0.3241, "step": 1200 }, { "epoch": 8.231292517006803, "grad_norm": 8.225408554077148, "learning_rate": 3.355102040816327e-05, "loss": 0.1136, "step": 1210 }, { "epoch": 8.299319727891156, "grad_norm": 0.0486108660697937, "learning_rate": 3.341496598639456e-05, "loss": 0.2109, "step": 1220 }, { "epoch": 8.36734693877551, "grad_norm": 0.3150612413883209, "learning_rate": 3.3278911564625855e-05, "loss": 0.0678, "step": 1230 }, { "epoch": 8.435374149659864, "grad_norm": 2.7837414741516113, "learning_rate": 3.314285714285714e-05, "loss": 0.0582, "step": 1240 }, { "epoch": 8.503401360544217, "grad_norm": 0.08137867599725723, "learning_rate": 3.3006802721088434e-05, "loss": 0.2023, "step": 1250 }, { "epoch": 8.571428571428571, "grad_norm": 0.12031784653663635, "learning_rate": 3.2870748299319734e-05, "loss": 0.2334, "step": 1260 }, { "epoch": 8.639455782312925, "grad_norm": 0.7248769998550415, "learning_rate": 3.273469387755102e-05, "loss": 0.0562, "step": 1270 }, { "epoch": 8.70748299319728, "grad_norm": 9.878806114196777, "learning_rate": 3.259863945578231e-05, "loss": 0.1711, "step": 1280 }, { "epoch": 8.775510204081632, "grad_norm": 11.397530555725098, "learning_rate": 3.2462585034013606e-05, "loss": 0.2418, "step": 1290 }, { "epoch": 8.843537414965986, "grad_norm": 13.183833122253418, "learning_rate": 3.23265306122449e-05, "loss": 0.056, "step": 1300 }, { "epoch": 8.91156462585034, "grad_norm": 0.8917256593704224, "learning_rate": 3.219047619047619e-05, "loss": 0.0436, "step": 1310 }, { "epoch": 8.979591836734693, "grad_norm": 7.448833465576172, "learning_rate": 3.2054421768707485e-05, "loss": 0.0476, "step": 1320 }, { "epoch": 9.0, "eval_cer": 0.37752525252525254, "eval_loss": 0.11764977127313614, "eval_runtime": 3.9588, "eval_samples_per_second": 74.013, "eval_steps_per_second": 37.133, "step": 1323 }, { "epoch": 9.047619047619047, "grad_norm": 7.476083278656006, "learning_rate": 3.191836734693878e-05, "loss": 0.098, "step": 1330 }, { "epoch": 9.115646258503402, "grad_norm": 0.2827729880809784, "learning_rate": 3.178231292517007e-05, "loss": 0.1332, "step": 1340 }, { "epoch": 9.183673469387756, "grad_norm": 1.02791166305542, "learning_rate": 3.164625850340136e-05, "loss": 0.0211, "step": 1350 }, { "epoch": 9.251700680272108, "grad_norm": 14.376386642456055, "learning_rate": 3.151020408163266e-05, "loss": 0.1141, "step": 1360 }, { "epoch": 9.319727891156463, "grad_norm": 0.9921436309814453, "learning_rate": 3.137414965986395e-05, "loss": 0.0931, "step": 1370 }, { "epoch": 9.387755102040817, "grad_norm": 0.1799956113100052, "learning_rate": 3.123809523809524e-05, "loss": 0.0095, "step": 1380 }, { "epoch": 9.45578231292517, "grad_norm": 0.05473727360367775, "learning_rate": 3.110204081632653e-05, "loss": 0.1565, "step": 1390 }, { "epoch": 9.523809523809524, "grad_norm": 0.5269390344619751, "learning_rate": 3.096598639455782e-05, "loss": 0.0674, "step": 1400 }, { "epoch": 9.591836734693878, "grad_norm": 2.706407070159912, "learning_rate": 3.0829931972789116e-05, "loss": 0.0966, "step": 1410 }, { "epoch": 9.65986394557823, "grad_norm": 0.16416242718696594, "learning_rate": 3.069387755102041e-05, "loss": 0.0667, "step": 1420 }, { "epoch": 9.727891156462585, "grad_norm": 15.035467147827148, "learning_rate": 3.05578231292517e-05, "loss": 0.1272, "step": 1430 }, { "epoch": 9.795918367346939, "grad_norm": 0.19053949415683746, "learning_rate": 3.0421768707482995e-05, "loss": 0.2847, "step": 1440 }, { "epoch": 9.863945578231293, "grad_norm": 0.12774477899074554, "learning_rate": 3.0285714285714288e-05, "loss": 0.1103, "step": 1450 }, { "epoch": 9.931972789115646, "grad_norm": 0.474401593208313, "learning_rate": 3.0149659863945578e-05, "loss": 0.0972, "step": 1460 }, { "epoch": 10.0, "grad_norm": 0.0236662644892931, "learning_rate": 3.0013605442176874e-05, "loss": 0.0626, "step": 1470 }, { "epoch": 10.0, "eval_cer": 0.4305555555555556, "eval_loss": 0.09693024307489395, "eval_runtime": 4.1253, "eval_samples_per_second": 71.025, "eval_steps_per_second": 35.633, "step": 1470 }, { "epoch": 10.068027210884354, "grad_norm": 0.03369349241256714, "learning_rate": 2.987755102040816e-05, "loss": 0.0542, "step": 1480 }, { "epoch": 10.136054421768707, "grad_norm": 0.2802339792251587, "learning_rate": 2.9741496598639457e-05, "loss": 0.1356, "step": 1490 }, { "epoch": 10.204081632653061, "grad_norm": 0.48600488901138306, "learning_rate": 2.960544217687075e-05, "loss": 0.1529, "step": 1500 }, { "epoch": 10.272108843537415, "grad_norm": 2.2791683673858643, "learning_rate": 2.946938775510204e-05, "loss": 0.1232, "step": 1510 }, { "epoch": 10.34013605442177, "grad_norm": 6.162140369415283, "learning_rate": 2.9333333333333336e-05, "loss": 0.1948, "step": 1520 }, { "epoch": 10.408163265306122, "grad_norm": 1.365488886833191, "learning_rate": 2.919727891156463e-05, "loss": 0.0417, "step": 1530 }, { "epoch": 10.476190476190476, "grad_norm": 14.345423698425293, "learning_rate": 2.906122448979592e-05, "loss": 0.0859, "step": 1540 }, { "epoch": 10.54421768707483, "grad_norm": 0.1597384810447693, "learning_rate": 2.892517006802721e-05, "loss": 0.0376, "step": 1550 }, { "epoch": 10.612244897959183, "grad_norm": 0.24928878247737885, "learning_rate": 2.87891156462585e-05, "loss": 0.5477, "step": 1560 }, { "epoch": 10.680272108843537, "grad_norm": 0.3629794418811798, "learning_rate": 2.8653061224489798e-05, "loss": 0.0102, "step": 1570 }, { "epoch": 10.748299319727892, "grad_norm": 17.98668098449707, "learning_rate": 2.851700680272109e-05, "loss": 0.0821, "step": 1580 }, { "epoch": 10.816326530612244, "grad_norm": 5.073668479919434, "learning_rate": 2.838095238095238e-05, "loss": 0.053, "step": 1590 }, { "epoch": 10.884353741496598, "grad_norm": 12.528962135314941, "learning_rate": 2.8244897959183673e-05, "loss": 0.0463, "step": 1600 }, { "epoch": 10.952380952380953, "grad_norm": 1.0358648300170898, "learning_rate": 2.810884353741497e-05, "loss": 0.0124, "step": 1610 }, { "epoch": 11.0, "eval_cer": 0.39646464646464646, "eval_loss": 0.08722148090600967, "eval_runtime": 3.8388, "eval_samples_per_second": 76.326, "eval_steps_per_second": 38.293, "step": 1617 }, { "epoch": 11.020408163265307, "grad_norm": 0.19600285589694977, "learning_rate": 2.797278911564626e-05, "loss": 0.118, "step": 1620 }, { "epoch": 11.08843537414966, "grad_norm": 26.618637084960938, "learning_rate": 2.7836734693877553e-05, "loss": 0.3147, "step": 1630 }, { "epoch": 11.156462585034014, "grad_norm": 0.05680645629763603, "learning_rate": 2.7700680272108842e-05, "loss": 0.2198, "step": 1640 }, { "epoch": 11.224489795918368, "grad_norm": 13.787897109985352, "learning_rate": 2.7564625850340135e-05, "loss": 0.2091, "step": 1650 }, { "epoch": 11.29251700680272, "grad_norm": 1.339880108833313, "learning_rate": 2.742857142857143e-05, "loss": 0.0221, "step": 1660 }, { "epoch": 11.360544217687075, "grad_norm": 0.18231110274791718, "learning_rate": 2.729251700680272e-05, "loss": 0.0567, "step": 1670 }, { "epoch": 11.428571428571429, "grad_norm": 0.15762682259082794, "learning_rate": 2.7156462585034014e-05, "loss": 0.0118, "step": 1680 }, { "epoch": 11.496598639455783, "grad_norm": 0.19339053332805634, "learning_rate": 2.702040816326531e-05, "loss": 0.0082, "step": 1690 }, { "epoch": 11.564625850340136, "grad_norm": 11.242050170898438, "learning_rate": 2.6884353741496597e-05, "loss": 0.0398, "step": 1700 }, { "epoch": 11.63265306122449, "grad_norm": 0.104960598051548, "learning_rate": 2.6748299319727893e-05, "loss": 0.0745, "step": 1710 }, { "epoch": 11.700680272108844, "grad_norm": 0.4059283137321472, "learning_rate": 2.6612244897959187e-05, "loss": 0.0854, "step": 1720 }, { "epoch": 11.768707482993197, "grad_norm": 0.11370517313480377, "learning_rate": 2.6476190476190476e-05, "loss": 0.084, "step": 1730 }, { "epoch": 11.83673469387755, "grad_norm": 6.6930365562438965, "learning_rate": 2.6340136054421773e-05, "loss": 0.082, "step": 1740 }, { "epoch": 11.904761904761905, "grad_norm": 0.035495854914188385, "learning_rate": 2.620408163265306e-05, "loss": 0.0704, "step": 1750 }, { "epoch": 11.972789115646258, "grad_norm": 0.19433455169200897, "learning_rate": 2.6068027210884355e-05, "loss": 0.0755, "step": 1760 }, { "epoch": 12.0, "eval_cer": 0.4090909090909091, "eval_loss": 0.06354419887065887, "eval_runtime": 3.8826, "eval_samples_per_second": 75.466, "eval_steps_per_second": 37.862, "step": 1764 }, { "epoch": 12.040816326530612, "grad_norm": 0.22618860006332397, "learning_rate": 2.593197278911565e-05, "loss": 0.0842, "step": 1770 }, { "epoch": 12.108843537414966, "grad_norm": 0.2526053786277771, "learning_rate": 2.5795918367346938e-05, "loss": 0.0257, "step": 1780 }, { "epoch": 12.17687074829932, "grad_norm": 5.3073649406433105, "learning_rate": 2.5659863945578234e-05, "loss": 0.0723, "step": 1790 }, { "epoch": 12.244897959183673, "grad_norm": 0.07300838083028793, "learning_rate": 2.5523809523809527e-05, "loss": 0.0447, "step": 1800 }, { "epoch": 12.312925170068027, "grad_norm": 1.9569120407104492, "learning_rate": 2.5387755102040817e-05, "loss": 0.0437, "step": 1810 }, { "epoch": 12.380952380952381, "grad_norm": 4.616933822631836, "learning_rate": 2.525170068027211e-05, "loss": 0.0802, "step": 1820 }, { "epoch": 12.448979591836734, "grad_norm": 0.09218256920576096, "learning_rate": 2.51156462585034e-05, "loss": 0.0345, "step": 1830 }, { "epoch": 12.517006802721088, "grad_norm": 0.10980120301246643, "learning_rate": 2.4979591836734696e-05, "loss": 0.0167, "step": 1840 }, { "epoch": 12.585034013605442, "grad_norm": 1.9402213096618652, "learning_rate": 2.4843537414965986e-05, "loss": 0.0189, "step": 1850 }, { "epoch": 12.653061224489797, "grad_norm": 0.28110960125923157, "learning_rate": 2.4707482993197282e-05, "loss": 0.0939, "step": 1860 }, { "epoch": 12.72108843537415, "grad_norm": 14.07165813446045, "learning_rate": 2.4571428571428572e-05, "loss": 0.1164, "step": 1870 }, { "epoch": 12.789115646258503, "grad_norm": 0.09300262480974197, "learning_rate": 2.4435374149659865e-05, "loss": 0.0511, "step": 1880 }, { "epoch": 12.857142857142858, "grad_norm": 0.08766383677721024, "learning_rate": 2.4299319727891158e-05, "loss": 0.0821, "step": 1890 }, { "epoch": 12.92517006802721, "grad_norm": 4.915824890136719, "learning_rate": 2.416326530612245e-05, "loss": 0.0425, "step": 1900 }, { "epoch": 12.993197278911564, "grad_norm": 0.25317126512527466, "learning_rate": 2.4027210884353744e-05, "loss": 0.048, "step": 1910 }, { "epoch": 13.0, "eval_cer": 0.43813131313131315, "eval_loss": 0.044823333621025085, "eval_runtime": 3.9907, "eval_samples_per_second": 73.42, "eval_steps_per_second": 36.835, "step": 1911 }, { "epoch": 13.061224489795919, "grad_norm": 11.498345375061035, "learning_rate": 2.3891156462585034e-05, "loss": 0.0573, "step": 1920 }, { "epoch": 13.129251700680273, "grad_norm": 9.521465301513672, "learning_rate": 2.3755102040816327e-05, "loss": 0.0186, "step": 1930 }, { "epoch": 13.197278911564625, "grad_norm": 5.737509250640869, "learning_rate": 2.361904761904762e-05, "loss": 0.0189, "step": 1940 }, { "epoch": 13.26530612244898, "grad_norm": 0.3337570130825043, "learning_rate": 2.3482993197278913e-05, "loss": 0.0249, "step": 1950 }, { "epoch": 13.333333333333334, "grad_norm": 0.21208225190639496, "learning_rate": 2.3346938775510206e-05, "loss": 0.0266, "step": 1960 }, { "epoch": 13.401360544217686, "grad_norm": 0.034205980598926544, "learning_rate": 2.3210884353741495e-05, "loss": 0.0062, "step": 1970 }, { "epoch": 13.46938775510204, "grad_norm": 6.113767623901367, "learning_rate": 2.3074829931972792e-05, "loss": 0.1134, "step": 1980 }, { "epoch": 13.537414965986395, "grad_norm": 1.1116629838943481, "learning_rate": 2.293877551020408e-05, "loss": 0.0873, "step": 1990 }, { "epoch": 13.60544217687075, "grad_norm": 8.336064338684082, "learning_rate": 2.2802721088435375e-05, "loss": 0.0285, "step": 2000 }, { "epoch": 13.673469387755102, "grad_norm": 0.6493708491325378, "learning_rate": 2.2666666666666668e-05, "loss": 0.0107, "step": 2010 }, { "epoch": 13.741496598639456, "grad_norm": 4.806843280792236, "learning_rate": 2.253061224489796e-05, "loss": 0.0163, "step": 2020 }, { "epoch": 13.80952380952381, "grad_norm": 0.049966610968112946, "learning_rate": 2.2394557823129254e-05, "loss": 0.01, "step": 2030 }, { "epoch": 13.877551020408163, "grad_norm": 0.1961314082145691, "learning_rate": 2.2258503401360543e-05, "loss": 0.107, "step": 2040 }, { "epoch": 13.945578231292517, "grad_norm": 0.04836405813694, "learning_rate": 2.2122448979591836e-05, "loss": 0.0518, "step": 2050 }, { "epoch": 14.0, "eval_cer": 0.37752525252525254, "eval_loss": 0.04542345553636551, "eval_runtime": 3.7709, "eval_samples_per_second": 77.701, "eval_steps_per_second": 38.983, "step": 2058 }, { "epoch": 14.013605442176871, "grad_norm": 0.12112589925527573, "learning_rate": 2.1986394557823133e-05, "loss": 0.007, "step": 2060 }, { "epoch": 14.081632653061224, "grad_norm": 0.18053178489208221, "learning_rate": 2.1850340136054422e-05, "loss": 0.1291, "step": 2070 }, { "epoch": 14.149659863945578, "grad_norm": 0.2066822201013565, "learning_rate": 2.1714285714285715e-05, "loss": 0.0257, "step": 2080 }, { "epoch": 14.217687074829932, "grad_norm": 0.11014904081821442, "learning_rate": 2.1578231292517005e-05, "loss": 0.0535, "step": 2090 }, { "epoch": 14.285714285714286, "grad_norm": 12.59344482421875, "learning_rate": 2.14421768707483e-05, "loss": 0.1287, "step": 2100 }, { "epoch": 14.353741496598639, "grad_norm": 0.045586470514535904, "learning_rate": 2.1306122448979595e-05, "loss": 0.0348, "step": 2110 }, { "epoch": 14.421768707482993, "grad_norm": 0.16031715273857117, "learning_rate": 2.1170068027210884e-05, "loss": 0.0664, "step": 2120 }, { "epoch": 14.489795918367347, "grad_norm": 2.561408281326294, "learning_rate": 2.1034013605442177e-05, "loss": 0.04, "step": 2130 }, { "epoch": 14.5578231292517, "grad_norm": 0.2670551538467407, "learning_rate": 2.089795918367347e-05, "loss": 0.0092, "step": 2140 }, { "epoch": 14.625850340136054, "grad_norm": 0.12540097534656525, "learning_rate": 2.0761904761904763e-05, "loss": 0.0111, "step": 2150 }, { "epoch": 14.693877551020408, "grad_norm": 1.2808445692062378, "learning_rate": 2.0625850340136056e-05, "loss": 0.0595, "step": 2160 }, { "epoch": 14.761904761904763, "grad_norm": 0.03977341949939728, "learning_rate": 2.0489795918367346e-05, "loss": 0.0437, "step": 2170 }, { "epoch": 14.829931972789115, "grad_norm": 0.40472060441970825, "learning_rate": 2.0353741496598642e-05, "loss": 0.0268, "step": 2180 }, { "epoch": 14.89795918367347, "grad_norm": 0.08766451478004456, "learning_rate": 2.0217687074829932e-05, "loss": 0.0072, "step": 2190 }, { "epoch": 14.965986394557824, "grad_norm": 0.34755828976631165, "learning_rate": 2.0081632653061225e-05, "loss": 0.0482, "step": 2200 }, { "epoch": 15.0, "eval_cer": 0.38257575757575757, "eval_loss": 0.03393391892313957, "eval_runtime": 3.9624, "eval_samples_per_second": 73.945, "eval_steps_per_second": 37.098, "step": 2205 }, { "epoch": 15.034013605442176, "grad_norm": 0.07853918522596359, "learning_rate": 1.9945578231292518e-05, "loss": 0.0218, "step": 2210 }, { "epoch": 15.10204081632653, "grad_norm": 0.08187614381313324, "learning_rate": 1.980952380952381e-05, "loss": 0.0059, "step": 2220 }, { "epoch": 15.170068027210885, "grad_norm": 0.5369409918785095, "learning_rate": 1.9673469387755104e-05, "loss": 0.0105, "step": 2230 }, { "epoch": 15.238095238095237, "grad_norm": 0.46228042244911194, "learning_rate": 1.9537414965986394e-05, "loss": 0.0028, "step": 2240 }, { "epoch": 15.306122448979592, "grad_norm": 0.1829945147037506, "learning_rate": 1.940136054421769e-05, "loss": 0.0355, "step": 2250 }, { "epoch": 15.374149659863946, "grad_norm": 0.08586379885673523, "learning_rate": 1.926530612244898e-05, "loss": 0.0096, "step": 2260 }, { "epoch": 15.4421768707483, "grad_norm": 0.13552436232566833, "learning_rate": 1.9129251700680273e-05, "loss": 0.1455, "step": 2270 }, { "epoch": 15.510204081632653, "grad_norm": 0.3371906876564026, "learning_rate": 1.8993197278911566e-05, "loss": 0.0229, "step": 2280 }, { "epoch": 15.578231292517007, "grad_norm": 0.03920818492770195, "learning_rate": 1.885714285714286e-05, "loss": 0.0224, "step": 2290 }, { "epoch": 15.646258503401361, "grad_norm": 0.8000497817993164, "learning_rate": 1.8721088435374152e-05, "loss": 0.0681, "step": 2300 }, { "epoch": 15.714285714285714, "grad_norm": 0.15928244590759277, "learning_rate": 1.8585034013605442e-05, "loss": 0.0338, "step": 2310 }, { "epoch": 15.782312925170068, "grad_norm": 2.3006467819213867, "learning_rate": 1.8448979591836735e-05, "loss": 0.0085, "step": 2320 }, { "epoch": 15.850340136054422, "grad_norm": 0.3312649726867676, "learning_rate": 1.8312925170068028e-05, "loss": 0.0015, "step": 2330 }, { "epoch": 15.918367346938776, "grad_norm": 11.605361938476562, "learning_rate": 1.817687074829932e-05, "loss": 0.0616, "step": 2340 }, { "epoch": 15.986394557823129, "grad_norm": 0.06783591210842133, "learning_rate": 1.8040816326530614e-05, "loss": 0.0061, "step": 2350 }, { "epoch": 16.0, "eval_cer": 0.36742424242424243, "eval_loss": 0.01737603358924389, "eval_runtime": 3.7608, "eval_samples_per_second": 77.91, "eval_steps_per_second": 39.088, "step": 2352 }, { "epoch": 16.05442176870748, "grad_norm": 0.03763847053050995, "learning_rate": 1.7904761904761904e-05, "loss": 0.018, "step": 2360 }, { "epoch": 16.122448979591837, "grad_norm": 1.681272268295288, "learning_rate": 1.77687074829932e-05, "loss": 0.0158, "step": 2370 }, { "epoch": 16.19047619047619, "grad_norm": 0.31114622950553894, "learning_rate": 1.763265306122449e-05, "loss": 0.0045, "step": 2380 }, { "epoch": 16.258503401360546, "grad_norm": 3.3073906898498535, "learning_rate": 1.7496598639455783e-05, "loss": 0.0266, "step": 2390 }, { "epoch": 16.3265306122449, "grad_norm": 0.05872774124145508, "learning_rate": 1.7360544217687076e-05, "loss": 0.0094, "step": 2400 }, { "epoch": 16.39455782312925, "grad_norm": 0.36872556805610657, "learning_rate": 1.722448979591837e-05, "loss": 0.0031, "step": 2410 }, { "epoch": 16.462585034013607, "grad_norm": 0.18953841924667358, "learning_rate": 1.7088435374149662e-05, "loss": 0.0072, "step": 2420 }, { "epoch": 16.53061224489796, "grad_norm": 0.053531669080257416, "learning_rate": 1.695238095238095e-05, "loss": 0.0214, "step": 2430 }, { "epoch": 16.598639455782312, "grad_norm": 0.9419485926628113, "learning_rate": 1.6816326530612244e-05, "loss": 0.0311, "step": 2440 }, { "epoch": 16.666666666666668, "grad_norm": 0.10363417118787766, "learning_rate": 1.668027210884354e-05, "loss": 0.0145, "step": 2450 }, { "epoch": 16.73469387755102, "grad_norm": 11.27441692352295, "learning_rate": 1.654421768707483e-05, "loss": 0.0174, "step": 2460 }, { "epoch": 16.802721088435373, "grad_norm": 0.07533001154661179, "learning_rate": 1.6408163265306124e-05, "loss": 0.0043, "step": 2470 }, { "epoch": 16.87074829931973, "grad_norm": 0.09791432321071625, "learning_rate": 1.6272108843537413e-05, "loss": 0.0192, "step": 2480 }, { "epoch": 16.93877551020408, "grad_norm": 1.773054838180542, "learning_rate": 1.613605442176871e-05, "loss": 0.0846, "step": 2490 }, { "epoch": 17.0, "eval_cer": 0.22853535353535354, "eval_loss": 0.016882039606571198, "eval_runtime": 3.9198, "eval_samples_per_second": 74.749, "eval_steps_per_second": 37.502, "step": 2499 }, { "epoch": 17.006802721088434, "grad_norm": 0.08635395020246506, "learning_rate": 1.6000000000000003e-05, "loss": 0.0117, "step": 2500 }, { "epoch": 17.07482993197279, "grad_norm": 0.04216604679822922, "learning_rate": 1.5863945578231292e-05, "loss": 0.0075, "step": 2510 }, { "epoch": 17.142857142857142, "grad_norm": 0.3129735589027405, "learning_rate": 1.5727891156462585e-05, "loss": 0.0024, "step": 2520 }, { "epoch": 17.2108843537415, "grad_norm": 0.0337909497320652, "learning_rate": 1.559183673469388e-05, "loss": 0.0032, "step": 2530 }, { "epoch": 17.27891156462585, "grad_norm": 0.3642733097076416, "learning_rate": 1.545578231292517e-05, "loss": 0.1344, "step": 2540 }, { "epoch": 17.346938775510203, "grad_norm": 0.06059624254703522, "learning_rate": 1.5319727891156464e-05, "loss": 0.0039, "step": 2550 }, { "epoch": 17.41496598639456, "grad_norm": 0.5465549826622009, "learning_rate": 1.5183673469387754e-05, "loss": 0.0395, "step": 2560 }, { "epoch": 17.482993197278912, "grad_norm": 0.048258326947689056, "learning_rate": 1.5047619047619049e-05, "loss": 0.0226, "step": 2570 }, { "epoch": 17.551020408163264, "grad_norm": 0.5764261484146118, "learning_rate": 1.4911564625850342e-05, "loss": 0.0348, "step": 2580 }, { "epoch": 17.61904761904762, "grad_norm": 1.802079439163208, "learning_rate": 1.4775510204081633e-05, "loss": 0.004, "step": 2590 }, { "epoch": 17.687074829931973, "grad_norm": 0.03979931399226189, "learning_rate": 1.4639455782312925e-05, "loss": 0.004, "step": 2600 }, { "epoch": 17.755102040816325, "grad_norm": 0.25388839840888977, "learning_rate": 1.450340136054422e-05, "loss": 0.0039, "step": 2610 }, { "epoch": 17.82312925170068, "grad_norm": 0.44963565468788147, "learning_rate": 1.436734693877551e-05, "loss": 0.006, "step": 2620 }, { "epoch": 17.891156462585034, "grad_norm": 0.0887552797794342, "learning_rate": 1.4231292517006804e-05, "loss": 0.0042, "step": 2630 }, { "epoch": 17.959183673469386, "grad_norm": 0.11289983987808228, "learning_rate": 1.4095238095238095e-05, "loss": 0.0034, "step": 2640 }, { "epoch": 18.0, "eval_cer": 0.24242424242424243, "eval_loss": 0.013762996532022953, "eval_runtime": 3.9761, "eval_samples_per_second": 73.691, "eval_steps_per_second": 36.971, "step": 2646 }, { "epoch": 18.027210884353742, "grad_norm": 0.06268062442541122, "learning_rate": 1.395918367346939e-05, "loss": 0.0031, "step": 2650 }, { "epoch": 18.095238095238095, "grad_norm": 0.03095332533121109, "learning_rate": 1.3823129251700681e-05, "loss": 0.0356, "step": 2660 }, { "epoch": 18.163265306122447, "grad_norm": 0.6670628786087036, "learning_rate": 1.3687074829931972e-05, "loss": 0.004, "step": 2670 }, { "epoch": 18.231292517006803, "grad_norm": 0.09079564362764359, "learning_rate": 1.3551020408163265e-05, "loss": 0.0036, "step": 2680 }, { "epoch": 18.299319727891156, "grad_norm": 0.17814789712429047, "learning_rate": 1.3414965986394558e-05, "loss": 0.0016, "step": 2690 }, { "epoch": 18.367346938775512, "grad_norm": 0.053088486194610596, "learning_rate": 1.3278911564625852e-05, "loss": 0.0037, "step": 2700 }, { "epoch": 18.435374149659864, "grad_norm": 0.05287722125649452, "learning_rate": 1.3142857142857143e-05, "loss": 0.0031, "step": 2710 }, { "epoch": 18.503401360544217, "grad_norm": 0.22168047726154327, "learning_rate": 1.3006802721088434e-05, "loss": 0.003, "step": 2720 }, { "epoch": 18.571428571428573, "grad_norm": 0.2615916430950165, "learning_rate": 1.2870748299319729e-05, "loss": 0.0022, "step": 2730 }, { "epoch": 18.639455782312925, "grad_norm": 0.04484458267688751, "learning_rate": 1.273469387755102e-05, "loss": 0.083, "step": 2740 }, { "epoch": 18.707482993197278, "grad_norm": 0.4530847370624542, "learning_rate": 1.2598639455782313e-05, "loss": 0.0034, "step": 2750 }, { "epoch": 18.775510204081634, "grad_norm": 0.11792109161615372, "learning_rate": 1.2462585034013606e-05, "loss": 0.0295, "step": 2760 }, { "epoch": 18.843537414965986, "grad_norm": 0.049426767975091934, "learning_rate": 1.2326530612244898e-05, "loss": 0.0019, "step": 2770 }, { "epoch": 18.91156462585034, "grad_norm": 0.04500193893909454, "learning_rate": 1.219047619047619e-05, "loss": 0.0134, "step": 2780 }, { "epoch": 18.979591836734695, "grad_norm": 0.14980462193489075, "learning_rate": 1.2054421768707484e-05, "loss": 0.0032, "step": 2790 }, { "epoch": 19.0, "eval_cer": 0.2159090909090909, "eval_loss": 0.012968610972166061, "eval_runtime": 3.6907, "eval_samples_per_second": 79.389, "eval_steps_per_second": 39.83, "step": 2793 }, { "epoch": 19.047619047619047, "grad_norm": 0.5596031546592712, "learning_rate": 1.1918367346938777e-05, "loss": 0.004, "step": 2800 }, { "epoch": 19.1156462585034, "grad_norm": 0.09450047463178635, "learning_rate": 1.178231292517007e-05, "loss": 0.0176, "step": 2810 }, { "epoch": 19.183673469387756, "grad_norm": 0.030270878225564957, "learning_rate": 1.1646258503401361e-05, "loss": 0.0031, "step": 2820 }, { "epoch": 19.25170068027211, "grad_norm": 0.5096073746681213, "learning_rate": 1.1510204081632654e-05, "loss": 0.0231, "step": 2830 }, { "epoch": 19.31972789115646, "grad_norm": 0.2736698389053345, "learning_rate": 1.1374149659863946e-05, "loss": 0.0016, "step": 2840 }, { "epoch": 19.387755102040817, "grad_norm": 0.06535348296165466, "learning_rate": 1.1238095238095239e-05, "loss": 0.0013, "step": 2850 }, { "epoch": 19.45578231292517, "grad_norm": 0.0859360322356224, "learning_rate": 1.1102040816326532e-05, "loss": 0.003, "step": 2860 }, { "epoch": 19.523809523809526, "grad_norm": 0.037795525044202805, "learning_rate": 1.0965986394557825e-05, "loss": 0.0026, "step": 2870 }, { "epoch": 19.591836734693878, "grad_norm": 1.3488638401031494, "learning_rate": 1.0829931972789116e-05, "loss": 0.004, "step": 2880 }, { "epoch": 19.65986394557823, "grad_norm": 0.05746370553970337, "learning_rate": 1.0693877551020409e-05, "loss": 0.0192, "step": 2890 }, { "epoch": 19.727891156462587, "grad_norm": 0.025979384779930115, "learning_rate": 1.05578231292517e-05, "loss": 0.0091, "step": 2900 }, { "epoch": 19.79591836734694, "grad_norm": 4.972421646118164, "learning_rate": 1.0421768707482993e-05, "loss": 0.0082, "step": 2910 }, { "epoch": 19.86394557823129, "grad_norm": 0.49525704979896545, "learning_rate": 1.0285714285714286e-05, "loss": 0.0034, "step": 2920 }, { "epoch": 19.931972789115648, "grad_norm": 0.02950323186814785, "learning_rate": 1.014965986394558e-05, "loss": 0.07, "step": 2930 }, { "epoch": 20.0, "grad_norm": 0.047243040055036545, "learning_rate": 1.001360544217687e-05, "loss": 0.0019, "step": 2940 }, { "epoch": 20.0, "eval_cer": 0.30176767676767674, "eval_loss": 0.011288419365882874, "eval_runtime": 4.0348, "eval_samples_per_second": 72.618, "eval_steps_per_second": 36.433, "step": 2940 }, { "epoch": 20.068027210884352, "grad_norm": 8.58004093170166, "learning_rate": 9.877551020408164e-06, "loss": 0.02, "step": 2950 }, { "epoch": 20.13605442176871, "grad_norm": 0.2544482946395874, "learning_rate": 9.741496598639455e-06, "loss": 0.0156, "step": 2960 }, { "epoch": 20.20408163265306, "grad_norm": 0.5715163350105286, "learning_rate": 9.60544217687075e-06, "loss": 0.0027, "step": 2970 }, { "epoch": 20.272108843537413, "grad_norm": 0.134610116481781, "learning_rate": 9.469387755102041e-06, "loss": 0.0394, "step": 2980 }, { "epoch": 20.34013605442177, "grad_norm": 0.23469507694244385, "learning_rate": 9.333333333333334e-06, "loss": 0.0142, "step": 2990 }, { "epoch": 20.408163265306122, "grad_norm": 0.19277207553386688, "learning_rate": 9.197278911564626e-06, "loss": 0.0214, "step": 3000 }, { "epoch": 20.476190476190474, "grad_norm": 0.04216855764389038, "learning_rate": 9.061224489795919e-06, "loss": 0.0014, "step": 3010 }, { "epoch": 20.54421768707483, "grad_norm": 0.02860959619283676, "learning_rate": 8.925170068027212e-06, "loss": 0.0049, "step": 3020 }, { "epoch": 20.612244897959183, "grad_norm": 0.38055145740509033, "learning_rate": 8.789115646258505e-06, "loss": 0.0193, "step": 3030 }, { "epoch": 20.68027210884354, "grad_norm": 0.034134916961193085, "learning_rate": 8.653061224489796e-06, "loss": 0.0015, "step": 3040 }, { "epoch": 20.74829931972789, "grad_norm": 0.6501132845878601, "learning_rate": 8.517006802721089e-06, "loss": 0.003, "step": 3050 }, { "epoch": 20.816326530612244, "grad_norm": 0.26927316188812256, "learning_rate": 8.38095238095238e-06, "loss": 0.0064, "step": 3060 }, { "epoch": 20.8843537414966, "grad_norm": 0.308063805103302, "learning_rate": 8.244897959183674e-06, "loss": 0.0171, "step": 3070 }, { "epoch": 20.952380952380953, "grad_norm": 0.0912749320268631, "learning_rate": 8.108843537414967e-06, "loss": 0.0034, "step": 3080 }, { "epoch": 21.0, "eval_cer": 0.23863636363636365, "eval_loss": 0.009314554743468761, "eval_runtime": 3.7562, "eval_samples_per_second": 78.005, "eval_steps_per_second": 39.135, "step": 3087 }, { "epoch": 21.020408163265305, "grad_norm": 0.031063944101333618, "learning_rate": 7.97278911564626e-06, "loss": 0.0025, "step": 3090 }, { "epoch": 21.08843537414966, "grad_norm": 0.45678919553756714, "learning_rate": 7.836734693877551e-06, "loss": 0.0017, "step": 3100 }, { "epoch": 21.156462585034014, "grad_norm": 0.06373850256204605, "learning_rate": 7.700680272108844e-06, "loss": 0.0203, "step": 3110 }, { "epoch": 21.224489795918366, "grad_norm": 0.04051206260919571, "learning_rate": 7.564625850340136e-06, "loss": 0.0028, "step": 3120 }, { "epoch": 21.292517006802722, "grad_norm": 0.20778831839561462, "learning_rate": 7.428571428571429e-06, "loss": 0.0032, "step": 3130 }, { "epoch": 21.360544217687075, "grad_norm": 0.23982657492160797, "learning_rate": 7.292517006802721e-06, "loss": 0.0097, "step": 3140 }, { "epoch": 21.428571428571427, "grad_norm": 0.30359897017478943, "learning_rate": 7.1564625850340144e-06, "loss": 0.002, "step": 3150 }, { "epoch": 21.496598639455783, "grad_norm": 0.844930112361908, "learning_rate": 7.020408163265306e-06, "loss": 0.064, "step": 3160 }, { "epoch": 21.564625850340136, "grad_norm": 0.2660425305366516, "learning_rate": 6.884353741496599e-06, "loss": 0.011, "step": 3170 }, { "epoch": 21.632653061224488, "grad_norm": 0.1279953122138977, "learning_rate": 6.748299319727891e-06, "loss": 0.0027, "step": 3180 }, { "epoch": 21.700680272108844, "grad_norm": 0.05603710934519768, "learning_rate": 6.612244897959184e-06, "loss": 0.0012, "step": 3190 }, { "epoch": 21.768707482993197, "grad_norm": 0.05168928578495979, "learning_rate": 6.476190476190476e-06, "loss": 0.0176, "step": 3200 }, { "epoch": 21.836734693877553, "grad_norm": 0.046198636293411255, "learning_rate": 6.340136054421769e-06, "loss": 0.0013, "step": 3210 }, { "epoch": 21.904761904761905, "grad_norm": 0.042502377182245255, "learning_rate": 6.2040816326530614e-06, "loss": 0.0015, "step": 3220 }, { "epoch": 21.972789115646258, "grad_norm": 0.24547749757766724, "learning_rate": 6.0680272108843545e-06, "loss": 0.0023, "step": 3230 }, { "epoch": 22.0, "eval_cer": 0.23106060606060605, "eval_loss": 0.009045995771884918, "eval_runtime": 3.8812, "eval_samples_per_second": 75.492, "eval_steps_per_second": 37.875, "step": 3234 }, { "epoch": 22.040816326530614, "grad_norm": 1.8699299097061157, "learning_rate": 5.931972789115647e-06, "loss": 0.0095, "step": 3240 }, { "epoch": 22.108843537414966, "grad_norm": 0.028658084571361542, "learning_rate": 5.795918367346939e-06, "loss": 0.002, "step": 3250 }, { "epoch": 22.17687074829932, "grad_norm": 0.11185970157384872, "learning_rate": 5.659863945578232e-06, "loss": 0.0139, "step": 3260 }, { "epoch": 22.244897959183675, "grad_norm": 0.08078885078430176, "learning_rate": 5.523809523809524e-06, "loss": 0.0093, "step": 3270 }, { "epoch": 22.312925170068027, "grad_norm": 0.033784542232751846, "learning_rate": 5.387755102040816e-06, "loss": 0.0035, "step": 3280 }, { "epoch": 22.38095238095238, "grad_norm": 0.04999591037631035, "learning_rate": 5.251700680272109e-06, "loss": 0.0158, "step": 3290 }, { "epoch": 22.448979591836736, "grad_norm": 0.47869572043418884, "learning_rate": 5.1156462585034015e-06, "loss": 0.002, "step": 3300 }, { "epoch": 22.517006802721088, "grad_norm": 0.5939333438873291, "learning_rate": 4.9795918367346945e-06, "loss": 0.0025, "step": 3310 }, { "epoch": 22.58503401360544, "grad_norm": 0.02597820572555065, "learning_rate": 4.843537414965987e-06, "loss": 0.017, "step": 3320 }, { "epoch": 22.653061224489797, "grad_norm": 0.06343343108892441, "learning_rate": 4.707482993197279e-06, "loss": 0.0414, "step": 3330 }, { "epoch": 22.72108843537415, "grad_norm": 0.40962278842926025, "learning_rate": 4.571428571428572e-06, "loss": 0.0114, "step": 3340 }, { "epoch": 22.7891156462585, "grad_norm": 0.29765334725379944, "learning_rate": 4.435374149659864e-06, "loss": 0.0023, "step": 3350 }, { "epoch": 22.857142857142858, "grad_norm": 0.17841386795043945, "learning_rate": 4.299319727891156e-06, "loss": 0.0016, "step": 3360 }, { "epoch": 22.92517006802721, "grad_norm": 0.44667163491249084, "learning_rate": 4.163265306122449e-06, "loss": 0.0021, "step": 3370 }, { "epoch": 22.993197278911566, "grad_norm": 0.19756975769996643, "learning_rate": 4.0272108843537416e-06, "loss": 0.0073, "step": 3380 }, { "epoch": 23.0, "eval_cer": 0.23737373737373738, "eval_loss": 0.008361349813640118, "eval_runtime": 3.9088, "eval_samples_per_second": 74.958, "eval_steps_per_second": 37.607, "step": 3381 }, { "epoch": 23.06122448979592, "grad_norm": 4.8710503578186035, "learning_rate": 3.891156462585034e-06, "loss": 0.0383, "step": 3390 }, { "epoch": 23.12925170068027, "grad_norm": 0.05327881500124931, "learning_rate": 3.7551020408163268e-06, "loss": 0.0026, "step": 3400 }, { "epoch": 23.197278911564627, "grad_norm": 0.4828534722328186, "learning_rate": 3.619047619047619e-06, "loss": 0.0024, "step": 3410 }, { "epoch": 23.26530612244898, "grad_norm": 0.03481818363070488, "learning_rate": 3.4829931972789116e-06, "loss": 0.0068, "step": 3420 }, { "epoch": 23.333333333333332, "grad_norm": 0.19071730971336365, "learning_rate": 3.346938775510204e-06, "loss": 0.0171, "step": 3430 }, { "epoch": 23.401360544217688, "grad_norm": 0.08115135878324509, "learning_rate": 3.210884353741497e-06, "loss": 0.0015, "step": 3440 }, { "epoch": 23.46938775510204, "grad_norm": 0.13966763019561768, "learning_rate": 3.074829931972789e-06, "loss": 0.0309, "step": 3450 }, { "epoch": 23.537414965986393, "grad_norm": 0.08014482259750366, "learning_rate": 2.9387755102040816e-06, "loss": 0.0028, "step": 3460 }, { "epoch": 23.60544217687075, "grad_norm": 0.7266091108322144, "learning_rate": 2.8027210884353742e-06, "loss": 0.0019, "step": 3470 }, { "epoch": 23.6734693877551, "grad_norm": 0.763943076133728, "learning_rate": 2.666666666666667e-06, "loss": 0.0029, "step": 3480 }, { "epoch": 23.741496598639454, "grad_norm": 0.20366428792476654, "learning_rate": 2.5306122448979594e-06, "loss": 0.0077, "step": 3490 }, { "epoch": 23.80952380952381, "grad_norm": 0.06424231082201004, "learning_rate": 2.394557823129252e-06, "loss": 0.0048, "step": 3500 }, { "epoch": 23.877551020408163, "grad_norm": 0.0471784844994545, "learning_rate": 2.2585034013605447e-06, "loss": 0.0027, "step": 3510 }, { "epoch": 23.94557823129252, "grad_norm": 0.6746675968170166, "learning_rate": 2.122448979591837e-06, "loss": 0.0022, "step": 3520 }, { "epoch": 24.0, "eval_cer": 0.2702020202020202, "eval_loss": 0.008024842478334904, "eval_runtime": 3.9491, "eval_samples_per_second": 74.195, "eval_steps_per_second": 37.224, "step": 3528 }, { "epoch": 24.01360544217687, "grad_norm": 0.248748779296875, "learning_rate": 1.9863945578231295e-06, "loss": 0.0013, "step": 3530 }, { "epoch": 24.081632653061224, "grad_norm": 5.284445762634277, "learning_rate": 1.8503401360544219e-06, "loss": 0.0097, "step": 3540 }, { "epoch": 24.14965986394558, "grad_norm": 0.07442311942577362, "learning_rate": 1.7142857142857145e-06, "loss": 0.011, "step": 3550 }, { "epoch": 24.217687074829932, "grad_norm": 0.1644800305366516, "learning_rate": 1.5782312925170069e-06, "loss": 0.0014, "step": 3560 }, { "epoch": 24.285714285714285, "grad_norm": 0.05962975695729256, "learning_rate": 1.4421768707482995e-06, "loss": 0.0018, "step": 3570 }, { "epoch": 24.35374149659864, "grad_norm": 0.039682451635599136, "learning_rate": 1.306122448979592e-06, "loss": 0.0025, "step": 3580 }, { "epoch": 24.421768707482993, "grad_norm": 0.0817071795463562, "learning_rate": 1.1700680272108845e-06, "loss": 0.0032, "step": 3590 }, { "epoch": 24.489795918367346, "grad_norm": 0.07747264206409454, "learning_rate": 1.034013605442177e-06, "loss": 0.015, "step": 3600 }, { "epoch": 24.5578231292517, "grad_norm": 0.04547140747308731, "learning_rate": 8.979591836734694e-07, "loss": 0.0015, "step": 3610 }, { "epoch": 24.625850340136054, "grad_norm": 0.04727374389767647, "learning_rate": 7.619047619047619e-07, "loss": 0.0023, "step": 3620 }, { "epoch": 24.693877551020407, "grad_norm": 0.14271779358386993, "learning_rate": 6.258503401360544e-07, "loss": 0.0025, "step": 3630 }, { "epoch": 24.761904761904763, "grad_norm": 0.4749351441860199, "learning_rate": 4.897959183673469e-07, "loss": 0.0026, "step": 3640 }, { "epoch": 24.829931972789115, "grad_norm": 10.92783260345459, "learning_rate": 3.537414965986395e-07, "loss": 0.0217, "step": 3650 }, { "epoch": 24.897959183673468, "grad_norm": 0.14225248992443085, "learning_rate": 2.1768707482993197e-07, "loss": 0.0016, "step": 3660 }, { "epoch": 24.965986394557824, "grad_norm": 0.06080883741378784, "learning_rate": 8.16326530612245e-08, "loss": 0.0391, "step": 3670 }, { "epoch": 25.0, "eval_cer": 0.26515151515151514, "eval_loss": 0.007971594110131264, "eval_runtime": 3.9963, "eval_samples_per_second": 73.317, "eval_steps_per_second": 36.784, "step": 3675 } ], "logging_steps": 10, "max_steps": 3675, "num_input_tokens_seen": 0, "num_train_epochs": 25, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.759922930951168e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }