{ "best_metric": 28.375, "best_model_checkpoint": "whisper-medium-clean/checkpoint-225000", "epoch": 60.0, "eval_steps": 1000, "global_step": 225000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.5, "grad_norm": 28.940319061279297, "learning_rate": 1.0000000000000002e-06, "loss": 3.7819, "step": 10 }, { "epoch": 2.5, "eval_cer": 81.94444444444444, "eval_loss": 7.813055038452148, "eval_runtime": 6.8451, "eval_samples_per_second": 1.315, "eval_steps_per_second": 1.315, "step": 10 }, { "epoch": 5.0, "grad_norm": 26.397560119628906, "learning_rate": 2.0000000000000003e-06, "loss": 3.1011, "step": 20 }, { "epoch": 5.0, "eval_cer": 81.94444444444444, "eval_loss": 6.869063377380371, "eval_runtime": 6.9299, "eval_samples_per_second": 1.299, "eval_steps_per_second": 1.299, "step": 20 }, { "epoch": 7.5, "grad_norm": 25.42325782775879, "learning_rate": 3e-06, "loss": 2.9982, "step": 30 }, { "epoch": 7.5, "eval_cer": 81.94444444444444, "eval_loss": 8.201728820800781, "eval_runtime": 6.9228, "eval_samples_per_second": 1.3, "eval_steps_per_second": 1.3, "step": 30 }, { "epoch": 10.0, "grad_norm": 26.787145614624023, "learning_rate": 4.000000000000001e-06, "loss": 2.1424, "step": 40 }, { "epoch": 10.0, "eval_cer": 80.55555555555556, "eval_loss": 6.396951675415039, "eval_runtime": 6.9264, "eval_samples_per_second": 1.299, "eval_steps_per_second": 1.299, "step": 40 }, { "epoch": 12.5, "grad_norm": 12.553820610046387, "learning_rate": 5e-06, "loss": 1.1901, "step": 50 }, { "epoch": 12.5, "eval_cer": 77.77777777777779, "eval_loss": 5.309788227081299, "eval_runtime": 6.8991, "eval_samples_per_second": 1.305, "eval_steps_per_second": 1.305, "step": 50 }, { "epoch": 15.0, "grad_norm": 11.38632583618164, "learning_rate": 6e-06, "loss": 0.6653, "step": 60 }, { "epoch": 15.0, "eval_cer": 77.77777777777779, "eval_loss": 4.889711380004883, "eval_runtime": 6.8936, "eval_samples_per_second": 1.306, "eval_steps_per_second": 1.306, "step": 60 }, { "epoch": 17.5, "grad_norm": 1.9933379888534546, "learning_rate": 7e-06, "loss": 0.4194, "step": 70 }, { "epoch": 17.5, "eval_cer": 79.16666666666666, "eval_loss": 4.77920389175415, "eval_runtime": 6.9368, "eval_samples_per_second": 1.297, "eval_steps_per_second": 1.297, "step": 70 }, { "epoch": 20.0, "grad_norm": 1.9275708198547363, "learning_rate": 8.000000000000001e-06, "loss": 0.346, "step": 80 }, { "epoch": 20.0, "eval_cer": 81.94444444444444, "eval_loss": 4.646539211273193, "eval_runtime": 7.0626, "eval_samples_per_second": 1.274, "eval_steps_per_second": 1.274, "step": 80 }, { "epoch": 22.5, "grad_norm": 9.696667671203613, "learning_rate": 9e-06, "loss": 0.1948, "step": 90 }, { "epoch": 22.5, "eval_cer": 83.33333333333334, "eval_loss": 4.1625494956970215, "eval_runtime": 6.9072, "eval_samples_per_second": 1.303, "eval_steps_per_second": 1.303, "step": 90 }, { "epoch": 25.0, "grad_norm": 4.2763776779174805, "learning_rate": 0.0, "loss": 0.0696, "step": 100 }, { "epoch": 25.0, "eval_cer": 62.5, "eval_loss": 3.3797714710235596, "eval_runtime": 7.009, "eval_samples_per_second": 1.284, "eval_steps_per_second": 1.284, "step": 100 }, { "epoch": 0.029333333333333333, "grad_norm": 30.060693740844727, "learning_rate": 9.88888888888889e-06, "loss": 2.9007, "step": 110 }, { "epoch": 0.029333333333333333, "eval_cer": 74.27184466019418, "eval_loss": 2.2464828491210938, "eval_runtime": 77.163, "eval_samples_per_second": 1.283, "eval_steps_per_second": 1.283, "step": 110 }, { "epoch": 0.032, "grad_norm": 23.69278907775879, "learning_rate": 9.777777777777779e-06, "loss": 2.3313, "step": 120 }, { "epoch": 0.032, "eval_cer": 73.05825242718447, "eval_loss": 1.8683823347091675, "eval_runtime": 77.5893, "eval_samples_per_second": 1.276, "eval_steps_per_second": 1.276, "step": 120 }, { "epoch": 0.034666666666666665, "grad_norm": 22.04669761657715, "learning_rate": 9.666666666666667e-06, "loss": 2.1455, "step": 130 }, { "epoch": 0.034666666666666665, "eval_cer": 65.77669902912622, "eval_loss": 1.8104327917099, "eval_runtime": 77.4412, "eval_samples_per_second": 1.278, "eval_steps_per_second": 1.278, "step": 130 }, { "epoch": 0.037333333333333336, "grad_norm": 22.959644317626953, "learning_rate": 9.555555555555556e-06, "loss": 1.9559, "step": 140 }, { "epoch": 0.037333333333333336, "eval_cer": 84.10194174757282, "eval_loss": 1.7080837488174438, "eval_runtime": 79.7762, "eval_samples_per_second": 1.241, "eval_steps_per_second": 1.241, "step": 140 }, { "epoch": 0.04, "grad_norm": 20.132877349853516, "learning_rate": 9.444444444444445e-06, "loss": 1.7925, "step": 150 }, { "epoch": 0.04, "eval_cer": 57.76699029126213, "eval_loss": 1.6740916967391968, "eval_runtime": 77.3782, "eval_samples_per_second": 1.279, "eval_steps_per_second": 1.279, "step": 150 }, { "epoch": 0.042666666666666665, "grad_norm": 24.77916717529297, "learning_rate": 9.333333333333334e-06, "loss": 1.6834, "step": 160 }, { "epoch": 0.042666666666666665, "eval_cer": 56.432038834951456, "eval_loss": 1.6661072969436646, "eval_runtime": 77.4224, "eval_samples_per_second": 1.279, "eval_steps_per_second": 1.279, "step": 160 }, { "epoch": 0.04533333333333334, "grad_norm": 18.739564895629883, "learning_rate": 9.222222222222224e-06, "loss": 1.8451, "step": 170 }, { "epoch": 0.04533333333333334, "eval_cer": 55.46116504854369, "eval_loss": 1.6888178586959839, "eval_runtime": 77.204, "eval_samples_per_second": 1.282, "eval_steps_per_second": 1.282, "step": 170 }, { "epoch": 0.048, "grad_norm": 21.450254440307617, "learning_rate": 9.111111111111112e-06, "loss": 1.7342, "step": 180 }, { "epoch": 0.048, "eval_cer": 58.37378640776699, "eval_loss": 1.6961427927017212, "eval_runtime": 76.8506, "eval_samples_per_second": 1.288, "eval_steps_per_second": 1.288, "step": 180 }, { "epoch": 0.050666666666666665, "grad_norm": 21.943395614624023, "learning_rate": 9e-06, "loss": 1.6679, "step": 190 }, { "epoch": 0.050666666666666665, "eval_cer": 57.16019417475729, "eval_loss": 1.7804681062698364, "eval_runtime": 77.6099, "eval_samples_per_second": 1.276, "eval_steps_per_second": 1.276, "step": 190 }, { "epoch": 0.05333333333333334, "grad_norm": 22.061710357666016, "learning_rate": 8.888888888888888e-06, "loss": 1.609, "step": 200 }, { "epoch": 0.05333333333333334, "eval_cer": 54.490291262135926, "eval_loss": 1.565760612487793, "eval_runtime": 77.5643, "eval_samples_per_second": 1.276, "eval_steps_per_second": 1.276, "step": 200 }, { "epoch": 0.056, "grad_norm": 15.489553451538086, "learning_rate": 8.777777777777778e-06, "loss": 1.598, "step": 210 }, { "epoch": 0.056, "eval_cer": 52.54854368932039, "eval_loss": 1.6144999265670776, "eval_runtime": 77.4908, "eval_samples_per_second": 1.278, "eval_steps_per_second": 1.278, "step": 210 }, { "epoch": 0.058666666666666666, "grad_norm": 18.321216583251953, "learning_rate": 8.666666666666668e-06, "loss": 1.506, "step": 220 }, { "epoch": 0.058666666666666666, "eval_cer": 55.94660194174757, "eval_loss": 1.6863118410110474, "eval_runtime": 76.3715, "eval_samples_per_second": 1.296, "eval_steps_per_second": 1.296, "step": 220 }, { "epoch": 0.06133333333333333, "grad_norm": 27.31436538696289, "learning_rate": 8.555555555555556e-06, "loss": 1.6391, "step": 230 }, { "epoch": 0.06133333333333333, "eval_cer": 55.582524271844655, "eval_loss": 1.6335813999176025, "eval_runtime": 77.1677, "eval_samples_per_second": 1.283, "eval_steps_per_second": 1.283, "step": 230 }, { "epoch": 0.064, "grad_norm": 16.303030014038086, "learning_rate": 8.444444444444446e-06, "loss": 1.2887, "step": 240 }, { "epoch": 0.064, "eval_cer": 55.21844660194175, "eval_loss": 1.7295167446136475, "eval_runtime": 76.984, "eval_samples_per_second": 1.286, "eval_steps_per_second": 1.286, "step": 240 }, { "epoch": 0.06666666666666667, "grad_norm": 15.233794212341309, "learning_rate": 8.333333333333334e-06, "loss": 1.5345, "step": 250 }, { "epoch": 0.06666666666666667, "eval_cer": 55.582524271844655, "eval_loss": 1.6172842979431152, "eval_runtime": 77.6597, "eval_samples_per_second": 1.275, "eval_steps_per_second": 1.275, "step": 250 }, { "epoch": 0.06933333333333333, "grad_norm": 23.4495906829834, "learning_rate": 8.222222222222222e-06, "loss": 1.5216, "step": 260 }, { "epoch": 0.06933333333333333, "eval_cer": 55.33980582524271, "eval_loss": 1.6310383081436157, "eval_runtime": 77.6226, "eval_samples_per_second": 1.275, "eval_steps_per_second": 1.275, "step": 260 }, { "epoch": 0.072, "grad_norm": 14.853434562683105, "learning_rate": 8.111111111111112e-06, "loss": 1.6175, "step": 270 }, { "epoch": 0.072, "eval_cer": 52.54854368932039, "eval_loss": 1.6513257026672363, "eval_runtime": 77.2293, "eval_samples_per_second": 1.282, "eval_steps_per_second": 1.282, "step": 270 }, { "epoch": 0.07466666666666667, "grad_norm": 25.87192153930664, "learning_rate": 8.000000000000001e-06, "loss": 1.5837, "step": 280 }, { "epoch": 0.07466666666666667, "eval_cer": 53.7621359223301, "eval_loss": 1.6197609901428223, "eval_runtime": 77.8112, "eval_samples_per_second": 1.272, "eval_steps_per_second": 1.272, "step": 280 }, { "epoch": 0.07733333333333334, "grad_norm": 23.910364151000977, "learning_rate": 7.88888888888889e-06, "loss": 1.4979, "step": 290 }, { "epoch": 0.07733333333333334, "eval_cer": 51.334951456310684, "eval_loss": 1.6021751165390015, "eval_runtime": 77.6869, "eval_samples_per_second": 1.274, "eval_steps_per_second": 1.274, "step": 290 }, { "epoch": 0.08, "grad_norm": 18.366762161254883, "learning_rate": 7.77777777777778e-06, "loss": 1.4745, "step": 300 }, { "epoch": 0.08, "eval_cer": 52.79126213592234, "eval_loss": 1.5048097372055054, "eval_runtime": 78.2047, "eval_samples_per_second": 1.266, "eval_steps_per_second": 1.266, "step": 300 }, { "epoch": 0.08266666666666667, "grad_norm": 18.047597885131836, "learning_rate": 7.666666666666667e-06, "loss": 1.5534, "step": 310 }, { "epoch": 0.08266666666666667, "eval_cer": 52.79126213592234, "eval_loss": 1.4332234859466553, "eval_runtime": 77.7797, "eval_samples_per_second": 1.273, "eval_steps_per_second": 1.273, "step": 310 }, { "epoch": 0.08533333333333333, "grad_norm": 18.885778427124023, "learning_rate": 7.555555555555556e-06, "loss": 1.3683, "step": 320 }, { "epoch": 0.08533333333333333, "eval_cer": 50.8495145631068, "eval_loss": 1.466812252998352, "eval_runtime": 77.7517, "eval_samples_per_second": 1.273, "eval_steps_per_second": 1.273, "step": 320 }, { "epoch": 0.088, "grad_norm": 17.74494743347168, "learning_rate": 7.444444444444445e-06, "loss": 1.5475, "step": 330 }, { "epoch": 0.088, "eval_cer": 48.786407766990294, "eval_loss": 1.3928931951522827, "eval_runtime": 78.0245, "eval_samples_per_second": 1.269, "eval_steps_per_second": 1.269, "step": 330 }, { "epoch": 0.09066666666666667, "grad_norm": 22.675891876220703, "learning_rate": 7.333333333333333e-06, "loss": 1.4288, "step": 340 }, { "epoch": 0.09066666666666667, "eval_cer": 48.66504854368932, "eval_loss": 1.3858826160430908, "eval_runtime": 78.4821, "eval_samples_per_second": 1.261, "eval_steps_per_second": 1.261, "step": 340 }, { "epoch": 0.09333333333333334, "grad_norm": 16.487394332885742, "learning_rate": 7.222222222222223e-06, "loss": 1.4127, "step": 350 }, { "epoch": 0.09333333333333334, "eval_cer": 51.45631067961165, "eval_loss": 1.5098240375518799, "eval_runtime": 77.7435, "eval_samples_per_second": 1.273, "eval_steps_per_second": 1.273, "step": 350 }, { "epoch": 0.096, "grad_norm": 26.096511840820312, "learning_rate": 7.111111111111112e-06, "loss": 1.3516, "step": 360 }, { "epoch": 0.096, "eval_cer": 52.54854368932039, "eval_loss": 1.582693338394165, "eval_runtime": 78.1295, "eval_samples_per_second": 1.267, "eval_steps_per_second": 1.267, "step": 360 }, { "epoch": 0.09866666666666667, "grad_norm": 14.613516807556152, "learning_rate": 7e-06, "loss": 1.5494, "step": 370 }, { "epoch": 0.09866666666666667, "eval_cer": 87.01456310679612, "eval_loss": 1.5777655839920044, "eval_runtime": 79.9586, "eval_samples_per_second": 1.238, "eval_steps_per_second": 1.238, "step": 370 }, { "epoch": 0.10133333333333333, "grad_norm": 33.40306091308594, "learning_rate": 6.88888888888889e-06, "loss": 1.3882, "step": 380 }, { "epoch": 0.10133333333333333, "eval_cer": 87.86407766990291, "eval_loss": 1.5355967283248901, "eval_runtime": 80.2797, "eval_samples_per_second": 1.233, "eval_steps_per_second": 1.233, "step": 380 }, { "epoch": 0.104, "grad_norm": 22.47502899169922, "learning_rate": 6.777777777777779e-06, "loss": 1.388, "step": 390 }, { "epoch": 0.104, "eval_cer": 52.42718446601942, "eval_loss": 1.5560169219970703, "eval_runtime": 77.0569, "eval_samples_per_second": 1.285, "eval_steps_per_second": 1.285, "step": 390 }, { "epoch": 0.10666666666666667, "grad_norm": 14.884334564208984, "learning_rate": 6.666666666666667e-06, "loss": 1.3382, "step": 400 }, { "epoch": 0.10666666666666667, "eval_cer": 50.60679611650486, "eval_loss": 1.4680137634277344, "eval_runtime": 77.1152, "eval_samples_per_second": 1.284, "eval_steps_per_second": 1.284, "step": 400 }, { "epoch": 0.10933333333333334, "grad_norm": 25.081527709960938, "learning_rate": 6.555555555555556e-06, "loss": 1.2751, "step": 410 }, { "epoch": 0.10933333333333334, "eval_cer": 51.69902912621359, "eval_loss": 1.4957218170166016, "eval_runtime": 77.3634, "eval_samples_per_second": 1.28, "eval_steps_per_second": 1.28, "step": 410 }, { "epoch": 0.112, "grad_norm": 15.062318801879883, "learning_rate": 6.444444444444445e-06, "loss": 1.0807, "step": 420 }, { "epoch": 0.112, "eval_cer": 51.94174757281553, "eval_loss": 1.5191024541854858, "eval_runtime": 77.1298, "eval_samples_per_second": 1.284, "eval_steps_per_second": 1.284, "step": 420 }, { "epoch": 0.11466666666666667, "grad_norm": 17.136804580688477, "learning_rate": 6.333333333333333e-06, "loss": 1.2305, "step": 430 }, { "epoch": 0.11466666666666667, "eval_cer": 50.8495145631068, "eval_loss": 1.4538663625717163, "eval_runtime": 77.2074, "eval_samples_per_second": 1.282, "eval_steps_per_second": 1.282, "step": 430 }, { "epoch": 0.11733333333333333, "grad_norm": 20.760255813598633, "learning_rate": 6.222222222222223e-06, "loss": 1.201, "step": 440 }, { "epoch": 0.11733333333333333, "eval_cer": 50.36407766990292, "eval_loss": 1.4003076553344727, "eval_runtime": 77.7921, "eval_samples_per_second": 1.273, "eval_steps_per_second": 1.273, "step": 440 }, { "epoch": 0.12, "grad_norm": 15.381857872009277, "learning_rate": 6.111111111111112e-06, "loss": 1.2639, "step": 450 }, { "epoch": 0.12, "eval_cer": 51.334951456310684, "eval_loss": 1.4118114709854126, "eval_runtime": 77.581, "eval_samples_per_second": 1.276, "eval_steps_per_second": 1.276, "step": 450 }, { "epoch": 0.12266666666666666, "grad_norm": 15.08823013305664, "learning_rate": 6e-06, "loss": 1.2348, "step": 460 }, { "epoch": 0.12266666666666666, "eval_cer": 51.334951456310684, "eval_loss": 1.3688950538635254, "eval_runtime": 78.0141, "eval_samples_per_second": 1.269, "eval_steps_per_second": 1.269, "step": 460 }, { "epoch": 0.12533333333333332, "grad_norm": 13.176762580871582, "learning_rate": 5.88888888888889e-06, "loss": 1.1162, "step": 470 }, { "epoch": 0.12533333333333332, "eval_cer": 69.05339805825243, "eval_loss": 1.4524732828140259, "eval_runtime": 80.7402, "eval_samples_per_second": 1.226, "eval_steps_per_second": 1.226, "step": 470 }, { "epoch": 0.128, "grad_norm": 17.04905891418457, "learning_rate": 5.777777777777778e-06, "loss": 1.3585, "step": 480 }, { "epoch": 0.128, "eval_cer": 51.213592233009706, "eval_loss": 1.4150296449661255, "eval_runtime": 78.0509, "eval_samples_per_second": 1.268, "eval_steps_per_second": 1.268, "step": 480 }, { "epoch": 0.13066666666666665, "grad_norm": 14.833456039428711, "learning_rate": 5.666666666666667e-06, "loss": 1.1588, "step": 490 }, { "epoch": 0.13066666666666665, "eval_cer": 51.213592233009706, "eval_loss": 1.4175814390182495, "eval_runtime": 77.9995, "eval_samples_per_second": 1.269, "eval_steps_per_second": 1.269, "step": 490 }, { "epoch": 0.13333333333333333, "grad_norm": 16.29265594482422, "learning_rate": 5.555555555555557e-06, "loss": 1.3108, "step": 500 }, { "epoch": 0.13333333333333333, "eval_cer": 52.18446601941748, "eval_loss": 1.4835201501846313, "eval_runtime": 77.8901, "eval_samples_per_second": 1.271, "eval_steps_per_second": 1.271, "step": 500 }, { "epoch": 0.136, "grad_norm": 24.089468002319336, "learning_rate": 5.444444444444445e-06, "loss": 1.2215, "step": 510 }, { "epoch": 0.136, "eval_cer": 53.03398058252428, "eval_loss": 1.4001387357711792, "eval_runtime": 77.5673, "eval_samples_per_second": 1.276, "eval_steps_per_second": 1.276, "step": 510 }, { "epoch": 0.13866666666666666, "grad_norm": 19.17269515991211, "learning_rate": 5.333333333333334e-06, "loss": 1.1983, "step": 520 }, { "epoch": 0.13866666666666666, "eval_cer": 50.12135922330098, "eval_loss": 1.3796579837799072, "eval_runtime": 78.2009, "eval_samples_per_second": 1.266, "eval_steps_per_second": 1.266, "step": 520 }, { "epoch": 0.14133333333333334, "grad_norm": 16.18842887878418, "learning_rate": 5.2222222222222226e-06, "loss": 1.1601, "step": 530 }, { "epoch": 0.14133333333333334, "eval_cer": 52.79126213592234, "eval_loss": 1.3958033323287964, "eval_runtime": 78.062, "eval_samples_per_second": 1.268, "eval_steps_per_second": 1.268, "step": 530 }, { "epoch": 0.144, "grad_norm": 18.497060775756836, "learning_rate": 5.1111111111111115e-06, "loss": 1.1819, "step": 540 }, { "epoch": 0.144, "eval_cer": 51.94174757281553, "eval_loss": 1.3504077196121216, "eval_runtime": 78.1284, "eval_samples_per_second": 1.267, "eval_steps_per_second": 1.267, "step": 540 }, { "epoch": 0.14666666666666667, "grad_norm": 18.69913673400879, "learning_rate": 5e-06, "loss": 1.1447, "step": 550 }, { "epoch": 0.14666666666666667, "eval_cer": 50.970873786407765, "eval_loss": 1.3514248132705688, "eval_runtime": 77.9101, "eval_samples_per_second": 1.271, "eval_steps_per_second": 1.271, "step": 550 }, { "epoch": 0.14933333333333335, "grad_norm": 17.90473747253418, "learning_rate": 4.888888888888889e-06, "loss": 1.1678, "step": 560 }, { "epoch": 0.14933333333333335, "eval_cer": 51.334951456310684, "eval_loss": 1.2929948568344116, "eval_runtime": 78.5632, "eval_samples_per_second": 1.26, "eval_steps_per_second": 1.26, "step": 560 }, { "epoch": 0.152, "grad_norm": 15.230451583862305, "learning_rate": 4.777777777777778e-06, "loss": 1.0188, "step": 570 }, { "epoch": 0.152, "eval_cer": 53.15533980582524, "eval_loss": 1.2831701040267944, "eval_runtime": 78.5138, "eval_samples_per_second": 1.261, "eval_steps_per_second": 1.261, "step": 570 }, { "epoch": 0.15466666666666667, "grad_norm": 29.844749450683594, "learning_rate": 4.666666666666667e-06, "loss": 1.1046, "step": 580 }, { "epoch": 0.15466666666666667, "eval_cer": 54.36893203883495, "eval_loss": 1.3857146501541138, "eval_runtime": 78.0718, "eval_samples_per_second": 1.268, "eval_steps_per_second": 1.268, "step": 580 }, { "epoch": 0.15733333333333333, "grad_norm": 16.595338821411133, "learning_rate": 4.555555555555556e-06, "loss": 1.0125, "step": 590 }, { "epoch": 0.15733333333333333, "eval_cer": 51.45631067961165, "eval_loss": 1.3539527654647827, "eval_runtime": 77.8613, "eval_samples_per_second": 1.271, "eval_steps_per_second": 1.271, "step": 590 }, { "epoch": 0.16, "grad_norm": 14.425292015075684, "learning_rate": 4.444444444444444e-06, "loss": 1.1293, "step": 600 }, { "epoch": 0.16, "eval_cer": 52.9126213592233, "eval_loss": 1.3646886348724365, "eval_runtime": 77.96, "eval_samples_per_second": 1.27, "eval_steps_per_second": 1.27, "step": 600 }, { "epoch": 0.16266666666666665, "grad_norm": 20.61813735961914, "learning_rate": 4.333333333333334e-06, "loss": 1.2416, "step": 610 }, { "epoch": 0.16266666666666665, "eval_cer": 54.73300970873787, "eval_loss": 1.3387656211853027, "eval_runtime": 78.0058, "eval_samples_per_second": 1.269, "eval_steps_per_second": 1.269, "step": 610 }, { "epoch": 0.16533333333333333, "grad_norm": 17.798131942749023, "learning_rate": 4.222222222222223e-06, "loss": 1.0653, "step": 620 }, { "epoch": 0.16533333333333333, "eval_cer": 85.43689320388349, "eval_loss": 1.3320649862289429, "eval_runtime": 81.5533, "eval_samples_per_second": 1.214, "eval_steps_per_second": 1.214, "step": 620 }, { "epoch": 0.168, "grad_norm": 19.9289608001709, "learning_rate": 4.111111111111111e-06, "loss": 1.2892, "step": 630 }, { "epoch": 0.168, "eval_cer": 53.15533980582524, "eval_loss": 1.307315707206726, "eval_runtime": 78.4977, "eval_samples_per_second": 1.261, "eval_steps_per_second": 1.261, "step": 630 }, { "epoch": 0.17066666666666666, "grad_norm": 15.202763557434082, "learning_rate": 4.000000000000001e-06, "loss": 1.2046, "step": 640 }, { "epoch": 0.17066666666666666, "eval_cer": 53.640776699029125, "eval_loss": 1.3364214897155762, "eval_runtime": 78.8468, "eval_samples_per_second": 1.256, "eval_steps_per_second": 1.256, "step": 640 }, { "epoch": 0.17333333333333334, "grad_norm": 15.811935424804688, "learning_rate": 3.88888888888889e-06, "loss": 1.0636, "step": 650 }, { "epoch": 0.17333333333333334, "eval_cer": 54.97572815533981, "eval_loss": 1.3250948190689087, "eval_runtime": 78.9646, "eval_samples_per_second": 1.254, "eval_steps_per_second": 1.254, "step": 650 }, { "epoch": 0.176, "grad_norm": 15.582385063171387, "learning_rate": 3.777777777777778e-06, "loss": 1.1544, "step": 660 }, { "epoch": 0.176, "eval_cer": 54.490291262135926, "eval_loss": 1.2882179021835327, "eval_runtime": 78.5958, "eval_samples_per_second": 1.26, "eval_steps_per_second": 1.26, "step": 660 }, { "epoch": 0.17866666666666667, "grad_norm": 14.485274314880371, "learning_rate": 3.6666666666666666e-06, "loss": 0.8095, "step": 670 }, { "epoch": 0.17866666666666667, "eval_cer": 54.004854368932044, "eval_loss": 1.2490488290786743, "eval_runtime": 79.0105, "eval_samples_per_second": 1.253, "eval_steps_per_second": 1.253, "step": 670 }, { "epoch": 0.18133333333333335, "grad_norm": 14.364302635192871, "learning_rate": 3.555555555555556e-06, "loss": 1.0163, "step": 680 }, { "epoch": 0.18133333333333335, "eval_cer": 53.27669902912622, "eval_loss": 1.251212477684021, "eval_runtime": 78.8342, "eval_samples_per_second": 1.256, "eval_steps_per_second": 1.256, "step": 680 }, { "epoch": 0.184, "grad_norm": 18.270353317260742, "learning_rate": 3.444444444444445e-06, "loss": 1.1736, "step": 690 }, { "epoch": 0.184, "eval_cer": 52.06310679611651, "eval_loss": 1.2570871114730835, "eval_runtime": 78.3257, "eval_samples_per_second": 1.264, "eval_steps_per_second": 1.264, "step": 690 }, { "epoch": 0.18666666666666668, "grad_norm": 15.880080223083496, "learning_rate": 3.3333333333333333e-06, "loss": 0.9871, "step": 700 }, { "epoch": 0.18666666666666668, "eval_cer": 51.577669902912625, "eval_loss": 1.249259352684021, "eval_runtime": 78.4177, "eval_samples_per_second": 1.262, "eval_steps_per_second": 1.262, "step": 700 }, { "epoch": 0.18933333333333333, "grad_norm": 21.195627212524414, "learning_rate": 3.2222222222222227e-06, "loss": 1.0884, "step": 710 }, { "epoch": 0.18933333333333333, "eval_cer": 52.42718446601942, "eval_loss": 1.227440595626831, "eval_runtime": 78.3084, "eval_samples_per_second": 1.264, "eval_steps_per_second": 1.264, "step": 710 }, { "epoch": 0.192, "grad_norm": 23.771629333496094, "learning_rate": 3.1111111111111116e-06, "loss": 1.1232, "step": 720 }, { "epoch": 0.192, "eval_cer": 52.06310679611651, "eval_loss": 1.2512867450714111, "eval_runtime": 78.3655, "eval_samples_per_second": 1.263, "eval_steps_per_second": 1.263, "step": 720 }, { "epoch": 0.19466666666666665, "grad_norm": 10.434704780578613, "learning_rate": 3e-06, "loss": 1.0498, "step": 730 }, { "epoch": 0.19466666666666665, "eval_cer": 52.42718446601942, "eval_loss": 1.2518444061279297, "eval_runtime": 78.1699, "eval_samples_per_second": 1.266, "eval_steps_per_second": 1.266, "step": 730 }, { "epoch": 0.19733333333333333, "grad_norm": 21.347713470458984, "learning_rate": 2.888888888888889e-06, "loss": 1.0718, "step": 740 }, { "epoch": 0.19733333333333333, "eval_cer": 51.213592233009706, "eval_loss": 1.2372485399246216, "eval_runtime": 78.2758, "eval_samples_per_second": 1.265, "eval_steps_per_second": 1.265, "step": 740 }, { "epoch": 0.2, "grad_norm": 16.782791137695312, "learning_rate": 2.7777777777777783e-06, "loss": 1.1631, "step": 750 }, { "epoch": 0.2, "eval_cer": 50.8495145631068, "eval_loss": 1.2331024408340454, "eval_runtime": 78.246, "eval_samples_per_second": 1.265, "eval_steps_per_second": 1.265, "step": 750 }, { "epoch": 0.20266666666666666, "grad_norm": 12.992459297180176, "learning_rate": 2.666666666666667e-06, "loss": 1.0654, "step": 760 }, { "epoch": 0.20266666666666666, "eval_cer": 52.18446601941748, "eval_loss": 1.2558914422988892, "eval_runtime": 78.4164, "eval_samples_per_second": 1.262, "eval_steps_per_second": 1.262, "step": 760 }, { "epoch": 0.20533333333333334, "grad_norm": 13.584222793579102, "learning_rate": 2.5555555555555557e-06, "loss": 0.9858, "step": 770 }, { "epoch": 0.20533333333333334, "eval_cer": 53.7621359223301, "eval_loss": 1.2489798069000244, "eval_runtime": 78.2835, "eval_samples_per_second": 1.265, "eval_steps_per_second": 1.265, "step": 770 }, { "epoch": 0.208, "grad_norm": 12.311653137207031, "learning_rate": 2.4444444444444447e-06, "loss": 1.155, "step": 780 }, { "epoch": 0.208, "eval_cer": 52.18446601941748, "eval_loss": 1.2482852935791016, "eval_runtime": 77.9862, "eval_samples_per_second": 1.269, "eval_steps_per_second": 1.269, "step": 780 }, { "epoch": 0.21066666666666667, "grad_norm": 14.416386604309082, "learning_rate": 2.3333333333333336e-06, "loss": 1.077, "step": 790 }, { "epoch": 0.21066666666666667, "eval_cer": 51.94174757281553, "eval_loss": 1.2321821451187134, "eval_runtime": 78.3289, "eval_samples_per_second": 1.264, "eval_steps_per_second": 1.264, "step": 790 }, { "epoch": 0.21333333333333335, "grad_norm": 20.134000778198242, "learning_rate": 2.222222222222222e-06, "loss": 0.9659, "step": 800 }, { "epoch": 0.21333333333333335, "eval_cer": 52.18446601941748, "eval_loss": 1.2238733768463135, "eval_runtime": 78.379, "eval_samples_per_second": 1.263, "eval_steps_per_second": 1.263, "step": 800 }, { "epoch": 0.216, "grad_norm": 16.14183807373047, "learning_rate": 2.1111111111111114e-06, "loss": 1.0858, "step": 810 }, { "epoch": 0.216, "eval_cer": 53.51941747572816, "eval_loss": 1.2186323404312134, "eval_runtime": 78.305, "eval_samples_per_second": 1.264, "eval_steps_per_second": 1.264, "step": 810 }, { "epoch": 0.21866666666666668, "grad_norm": 14.142354965209961, "learning_rate": 2.0000000000000003e-06, "loss": 1.0114, "step": 820 }, { "epoch": 0.21866666666666668, "eval_cer": 92.35436893203884, "eval_loss": 1.210742712020874, "eval_runtime": 81.017, "eval_samples_per_second": 1.222, "eval_steps_per_second": 1.222, "step": 820 }, { "epoch": 0.22133333333333333, "grad_norm": 17.690200805664062, "learning_rate": 1.888888888888889e-06, "loss": 1.0711, "step": 830 }, { "epoch": 0.22133333333333333, "eval_cer": 91.99029126213593, "eval_loss": 1.2100653648376465, "eval_runtime": 81.155, "eval_samples_per_second": 1.22, "eval_steps_per_second": 1.22, "step": 830 }, { "epoch": 0.224, "grad_norm": 12.702727317810059, "learning_rate": 1.777777777777778e-06, "loss": 1.0218, "step": 840 }, { "epoch": 0.224, "eval_cer": 53.03398058252428, "eval_loss": 1.197860598564148, "eval_runtime": 78.126, "eval_samples_per_second": 1.267, "eval_steps_per_second": 1.267, "step": 840 }, { "epoch": 0.22666666666666666, "grad_norm": 12.827967643737793, "learning_rate": 1.6666666666666667e-06, "loss": 0.9593, "step": 850 }, { "epoch": 0.22666666666666666, "eval_cer": 52.54854368932039, "eval_loss": 1.1930850744247437, "eval_runtime": 78.0654, "eval_samples_per_second": 1.268, "eval_steps_per_second": 1.268, "step": 850 }, { "epoch": 0.22933333333333333, "grad_norm": 16.459444046020508, "learning_rate": 1.5555555555555558e-06, "loss": 1.0961, "step": 860 }, { "epoch": 0.22933333333333333, "eval_cer": 52.42718446601942, "eval_loss": 1.168959140777588, "eval_runtime": 78.3283, "eval_samples_per_second": 1.264, "eval_steps_per_second": 1.264, "step": 860 }, { "epoch": 0.232, "grad_norm": 13.297350883483887, "learning_rate": 1.4444444444444445e-06, "loss": 0.8744, "step": 870 }, { "epoch": 0.232, "eval_cer": 51.577669902912625, "eval_loss": 1.1725083589553833, "eval_runtime": 78.1274, "eval_samples_per_second": 1.267, "eval_steps_per_second": 1.267, "step": 870 }, { "epoch": 0.23466666666666666, "grad_norm": 14.628539085388184, "learning_rate": 1.3333333333333334e-06, "loss": 1.0125, "step": 880 }, { "epoch": 0.23466666666666666, "eval_cer": 53.398058252427184, "eval_loss": 1.1670215129852295, "eval_runtime": 78.3518, "eval_samples_per_second": 1.264, "eval_steps_per_second": 1.264, "step": 880 }, { "epoch": 0.23733333333333334, "grad_norm": 17.61985206604004, "learning_rate": 1.2222222222222223e-06, "loss": 1.0657, "step": 890 }, { "epoch": 0.23733333333333334, "eval_cer": 54.12621359223301, "eval_loss": 1.1755800247192383, "eval_runtime": 79.0194, "eval_samples_per_second": 1.253, "eval_steps_per_second": 1.253, "step": 890 }, { "epoch": 0.24, "grad_norm": 13.798720359802246, "learning_rate": 1.111111111111111e-06, "loss": 1.049, "step": 900 }, { "epoch": 0.24, "eval_cer": 53.883495145631066, "eval_loss": 1.1638038158416748, "eval_runtime": 78.5113, "eval_samples_per_second": 1.261, "eval_steps_per_second": 1.261, "step": 900 }, { "epoch": 0.24266666666666667, "grad_norm": 23.81746482849121, "learning_rate": 1.0000000000000002e-06, "loss": 1.0409, "step": 910 }, { "epoch": 0.24266666666666667, "eval_cer": 53.883495145631066, "eval_loss": 1.1673129796981812, "eval_runtime": 78.4064, "eval_samples_per_second": 1.263, "eval_steps_per_second": 1.263, "step": 910 }, { "epoch": 0.24533333333333332, "grad_norm": 10.653603553771973, "learning_rate": 8.88888888888889e-07, "loss": 1.0895, "step": 920 }, { "epoch": 0.24533333333333332, "eval_cer": 90.53398058252428, "eval_loss": 1.1688292026519775, "eval_runtime": 80.6808, "eval_samples_per_second": 1.227, "eval_steps_per_second": 1.227, "step": 920 }, { "epoch": 0.248, "grad_norm": 25.826753616333008, "learning_rate": 7.777777777777779e-07, "loss": 0.9301, "step": 930 }, { "epoch": 0.248, "eval_cer": 89.32038834951457, "eval_loss": 1.1726011037826538, "eval_runtime": 80.7112, "eval_samples_per_second": 1.227, "eval_steps_per_second": 1.227, "step": 930 }, { "epoch": 0.25066666666666665, "grad_norm": 18.675426483154297, "learning_rate": 6.666666666666667e-07, "loss": 0.9795, "step": 940 }, { "epoch": 0.25066666666666665, "eval_cer": 89.44174757281553, "eval_loss": 1.1721538305282593, "eval_runtime": 80.2393, "eval_samples_per_second": 1.234, "eval_steps_per_second": 1.234, "step": 940 }, { "epoch": 0.25333333333333335, "grad_norm": 19.179977416992188, "learning_rate": 5.555555555555555e-07, "loss": 0.9664, "step": 950 }, { "epoch": 0.25333333333333335, "eval_cer": 89.32038834951457, "eval_loss": 1.1713266372680664, "eval_runtime": 80.8455, "eval_samples_per_second": 1.225, "eval_steps_per_second": 1.225, "step": 950 }, { "epoch": 0.256, "grad_norm": 12.702851295471191, "learning_rate": 4.444444444444445e-07, "loss": 0.8043, "step": 960 }, { "epoch": 0.256, "eval_cer": 89.44174757281553, "eval_loss": 1.1796256303787231, "eval_runtime": 80.7217, "eval_samples_per_second": 1.226, "eval_steps_per_second": 1.226, "step": 960 }, { "epoch": 0.25866666666666666, "grad_norm": 14.771796226501465, "learning_rate": 3.3333333333333335e-07, "loss": 1.0233, "step": 970 }, { "epoch": 0.25866666666666666, "eval_cer": 89.44174757281553, "eval_loss": 1.183519959449768, "eval_runtime": 80.3959, "eval_samples_per_second": 1.231, "eval_steps_per_second": 1.231, "step": 970 }, { "epoch": 0.2613333333333333, "grad_norm": 21.99308204650879, "learning_rate": 2.2222222222222224e-07, "loss": 0.9277, "step": 980 }, { "epoch": 0.2613333333333333, "eval_cer": 89.32038834951457, "eval_loss": 1.1782771348953247, "eval_runtime": 81.6692, "eval_samples_per_second": 1.212, "eval_steps_per_second": 1.212, "step": 980 }, { "epoch": 0.264, "grad_norm": 15.665560722351074, "learning_rate": 1.1111111111111112e-07, "loss": 1.029, "step": 990 }, { "epoch": 0.264, "eval_cer": 89.32038834951457, "eval_loss": 1.1745351552963257, "eval_runtime": 80.7175, "eval_samples_per_second": 1.226, "eval_steps_per_second": 1.226, "step": 990 }, { "epoch": 0.26666666666666666, "grad_norm": 16.09244728088379, "learning_rate": 0.0, "loss": 1.0365, "step": 1000 }, { "epoch": 0.26666666666666666, "eval_cer": 89.32038834951457, "eval_loss": 1.1732313632965088, "eval_runtime": 80.9658, "eval_samples_per_second": 1.223, "eval_steps_per_second": 1.223, "step": 1000 }, { "epoch": 0.2693333333333333, "grad_norm": 16.427200317382812, "learning_rate": 9.080808080808081e-06, "loss": 1.0599, "step": 1010 }, { "epoch": 0.2693333333333333, "eval_cer": 50.31525851197982, "eval_loss": 1.0901631116867065, "eval_runtime": 76.9254, "eval_samples_per_second": 1.287, "eval_steps_per_second": 1.287, "step": 1010 }, { "epoch": 0.272, "grad_norm": 23.313121795654297, "learning_rate": 9.070707070707072e-06, "loss": 1.0114, "step": 1020 }, { "epoch": 0.272, "eval_cer": 49.18032786885246, "eval_loss": 1.0451587438583374, "eval_runtime": 77.2747, "eval_samples_per_second": 1.281, "eval_steps_per_second": 1.281, "step": 1020 }, { "epoch": 0.27466666666666667, "grad_norm": 16.21845245361328, "learning_rate": 9.06060606060606e-06, "loss": 1.1568, "step": 1030 }, { "epoch": 0.27466666666666667, "eval_cer": 69.4829760403531, "eval_loss": 1.1285865306854248, "eval_runtime": 80.287, "eval_samples_per_second": 1.233, "eval_steps_per_second": 1.233, "step": 1030 }, { "epoch": 0.2773333333333333, "grad_norm": 21.60283660888672, "learning_rate": 9.050505050505052e-06, "loss": 1.1959, "step": 1040 }, { "epoch": 0.2773333333333333, "eval_cer": 53.84615384615385, "eval_loss": 1.1765520572662354, "eval_runtime": 78.8941, "eval_samples_per_second": 1.255, "eval_steps_per_second": 1.255, "step": 1040 }, { "epoch": 0.28, "grad_norm": 13.436543464660645, "learning_rate": 9.040404040404042e-06, "loss": 1.0342, "step": 1050 }, { "epoch": 0.28, "eval_cer": 52.96343001261034, "eval_loss": 1.1394500732421875, "eval_runtime": 78.5797, "eval_samples_per_second": 1.26, "eval_steps_per_second": 1.26, "step": 1050 }, { "epoch": 0.2826666666666667, "grad_norm": 15.90085220336914, "learning_rate": 9.030303030303031e-06, "loss": 1.0229, "step": 1060 }, { "epoch": 0.2826666666666667, "eval_cer": 54.09836065573771, "eval_loss": 1.1051841974258423, "eval_runtime": 78.2017, "eval_samples_per_second": 1.266, "eval_steps_per_second": 1.266, "step": 1060 }, { "epoch": 0.2853333333333333, "grad_norm": 17.880510330200195, "learning_rate": 9.020202020202021e-06, "loss": 1.0015, "step": 1070 }, { "epoch": 0.2853333333333333, "eval_cer": 59.2686002522068, "eval_loss": 1.1580164432525635, "eval_runtime": 78.8364, "eval_samples_per_second": 1.256, "eval_steps_per_second": 1.256, "step": 1070 }, { "epoch": 0.288, "grad_norm": 16.91229248046875, "learning_rate": 9.010101010101012e-06, "loss": 1.1533, "step": 1080 }, { "epoch": 0.288, "eval_cer": 56.49432534678437, "eval_loss": 1.168419599533081, "eval_runtime": 78.6616, "eval_samples_per_second": 1.259, "eval_steps_per_second": 1.259, "step": 1080 }, { "epoch": 0.2906666666666667, "grad_norm": 26.38800621032715, "learning_rate": 9e-06, "loss": 1.1976, "step": 1090 }, { "epoch": 0.2906666666666667, "eval_cer": 51.19798234552333, "eval_loss": 1.0737069845199585, "eval_runtime": 78.97, "eval_samples_per_second": 1.254, "eval_steps_per_second": 1.254, "step": 1090 }, { "epoch": 0.29333333333333333, "grad_norm": 11.324457168579102, "learning_rate": 8.98989898989899e-06, "loss": 1.0131, "step": 1100 }, { "epoch": 0.29333333333333333, "eval_cer": 51.45018915510718, "eval_loss": 1.1314114332199097, "eval_runtime": 78.2488, "eval_samples_per_second": 1.265, "eval_steps_per_second": 1.265, "step": 1100 }, { "epoch": 0.296, "grad_norm": 20.900928497314453, "learning_rate": 8.97979797979798e-06, "loss": 1.0431, "step": 1110 }, { "epoch": 0.296, "eval_cer": 54.09836065573771, "eval_loss": 1.2488269805908203, "eval_runtime": 78.5131, "eval_samples_per_second": 1.261, "eval_steps_per_second": 1.261, "step": 1110 }, { "epoch": 0.2986666666666667, "grad_norm": 26.65207862854004, "learning_rate": 8.969696969696971e-06, "loss": 1.1839, "step": 1120 }, { "epoch": 0.2986666666666667, "eval_cer": 52.459016393442624, "eval_loss": 1.1696010828018188, "eval_runtime": 78.4873, "eval_samples_per_second": 1.261, "eval_steps_per_second": 1.261, "step": 1120 }, { "epoch": 0.30133333333333334, "grad_norm": 14.266066551208496, "learning_rate": 8.95959595959596e-06, "loss": 1.0215, "step": 1130 }, { "epoch": 0.30133333333333334, "eval_cer": 51.32408575031526, "eval_loss": 1.2062351703643799, "eval_runtime": 78.0558, "eval_samples_per_second": 1.268, "eval_steps_per_second": 1.268, "step": 1130 }, { "epoch": 0.304, "grad_norm": 13.581376075744629, "learning_rate": 8.94949494949495e-06, "loss": 0.9763, "step": 1140 }, { "epoch": 0.304, "eval_cer": 51.19798234552333, "eval_loss": 1.1882985830307007, "eval_runtime": 78.2858, "eval_samples_per_second": 1.265, "eval_steps_per_second": 1.265, "step": 1140 }, { "epoch": 0.30666666666666664, "grad_norm": 15.974523544311523, "learning_rate": 8.93939393939394e-06, "loss": 1.1372, "step": 1150 }, { "epoch": 0.30666666666666664, "eval_cer": 48.675914249684745, "eval_loss": 1.1872459650039673, "eval_runtime": 77.7901, "eval_samples_per_second": 1.273, "eval_steps_per_second": 1.273, "step": 1150 }, { "epoch": 0.30933333333333335, "grad_norm": 16.938993453979492, "learning_rate": 8.92929292929293e-06, "loss": 1.1522, "step": 1160 }, { "epoch": 0.30933333333333335, "eval_cer": 47.91929382093317, "eval_loss": 1.1992875337600708, "eval_runtime": 78.9052, "eval_samples_per_second": 1.255, "eval_steps_per_second": 1.255, "step": 1160 }, { "epoch": 0.312, "grad_norm": 13.329106330871582, "learning_rate": 8.919191919191919e-06, "loss": 1.0531, "step": 1170 }, { "epoch": 0.312, "eval_cer": 49.43253467843632, "eval_loss": 1.1403378248214722, "eval_runtime": 78.4504, "eval_samples_per_second": 1.262, "eval_steps_per_second": 1.262, "step": 1170 }, { "epoch": 0.31466666666666665, "grad_norm": 19.03329086303711, "learning_rate": 8.90909090909091e-06, "loss": 0.9934, "step": 1180 }, { "epoch": 0.31466666666666665, "eval_cer": 49.68474148802018, "eval_loss": 1.1829012632369995, "eval_runtime": 78.8873, "eval_samples_per_second": 1.255, "eval_steps_per_second": 1.255, "step": 1180 }, { "epoch": 0.31733333333333336, "grad_norm": 13.096351623535156, "learning_rate": 8.8989898989899e-06, "loss": 0.9284, "step": 1190 }, { "epoch": 0.31733333333333336, "eval_cer": 48.9281210592686, "eval_loss": 1.1716654300689697, "eval_runtime": 78.6464, "eval_samples_per_second": 1.259, "eval_steps_per_second": 1.259, "step": 1190 }, { "epoch": 0.32, "grad_norm": 15.558133125305176, "learning_rate": 8.888888888888888e-06, "loss": 1.0007, "step": 1200 }, { "epoch": 0.32, "eval_cer": 50.945775535939475, "eval_loss": 1.1625868082046509, "eval_runtime": 78.2452, "eval_samples_per_second": 1.265, "eval_steps_per_second": 1.265, "step": 1200 }, { "epoch": 0.32266666666666666, "grad_norm": 13.843978881835938, "learning_rate": 8.87878787878788e-06, "loss": 1.2634, "step": 1210 }, { "epoch": 0.32266666666666666, "eval_cer": 49.68474148802018, "eval_loss": 1.131119966506958, "eval_runtime": 78.4445, "eval_samples_per_second": 1.262, "eval_steps_per_second": 1.262, "step": 1210 }, { "epoch": 0.3253333333333333, "grad_norm": 18.294511795043945, "learning_rate": 8.86868686868687e-06, "loss": 1.0039, "step": 1220 }, { "epoch": 0.3253333333333333, "eval_cer": 46.406052963430014, "eval_loss": 1.0547950267791748, "eval_runtime": 78.7958, "eval_samples_per_second": 1.256, "eval_steps_per_second": 1.256, "step": 1220 }, { "epoch": 0.328, "grad_norm": 16.219919204711914, "learning_rate": 8.85858585858586e-06, "loss": 0.9441, "step": 1230 }, { "epoch": 0.328, "eval_cer": 49.30643127364439, "eval_loss": 1.1434153318405151, "eval_runtime": 79.0104, "eval_samples_per_second": 1.253, "eval_steps_per_second": 1.253, "step": 1230 }, { "epoch": 0.33066666666666666, "grad_norm": 14.341035842895508, "learning_rate": 8.84848484848485e-06, "loss": 0.8696, "step": 1240 }, { "epoch": 0.33066666666666666, "eval_cer": 48.9281210592686, "eval_loss": 1.1886433362960815, "eval_runtime": 78.6773, "eval_samples_per_second": 1.258, "eval_steps_per_second": 1.258, "step": 1240 }, { "epoch": 0.3333333333333333, "grad_norm": 22.90915870666504, "learning_rate": 8.83838383838384e-06, "loss": 0.8534, "step": 1250 }, { "epoch": 0.3333333333333333, "eval_cer": 48.80201765447667, "eval_loss": 1.1410605907440186, "eval_runtime": 79.4849, "eval_samples_per_second": 1.246, "eval_steps_per_second": 1.246, "step": 1250 }, { "epoch": 0.336, "grad_norm": 19.806961059570312, "learning_rate": 8.82828282828283e-06, "loss": 1.2614, "step": 1260 }, { "epoch": 0.336, "eval_cer": 46.910466582597735, "eval_loss": 0.9779375791549683, "eval_runtime": 79.2948, "eval_samples_per_second": 1.249, "eval_steps_per_second": 1.249, "step": 1260 }, { "epoch": 0.33866666666666667, "grad_norm": 17.70494270324707, "learning_rate": 8.818181818181819e-06, "loss": 0.8782, "step": 1270 }, { "epoch": 0.33866666666666667, "eval_cer": 51.576292559899116, "eval_loss": 1.040684700012207, "eval_runtime": 79.608, "eval_samples_per_second": 1.244, "eval_steps_per_second": 1.244, "step": 1270 }, { "epoch": 0.3413333333333333, "grad_norm": 20.204618453979492, "learning_rate": 8.808080808080809e-06, "loss": 0.8056, "step": 1280 }, { "epoch": 0.3413333333333333, "eval_cer": 49.18032786885246, "eval_loss": 1.0853091478347778, "eval_runtime": 79.6188, "eval_samples_per_second": 1.243, "eval_steps_per_second": 1.243, "step": 1280 }, { "epoch": 0.344, "grad_norm": 19.028974533081055, "learning_rate": 8.7979797979798e-06, "loss": 1.0951, "step": 1290 }, { "epoch": 0.344, "eval_cer": 51.071878940731395, "eval_loss": 1.1110036373138428, "eval_runtime": 79.392, "eval_samples_per_second": 1.247, "eval_steps_per_second": 1.247, "step": 1290 }, { "epoch": 0.3466666666666667, "grad_norm": 16.38770294189453, "learning_rate": 8.787878787878788e-06, "loss": 0.8958, "step": 1300 }, { "epoch": 0.3466666666666667, "eval_cer": 50.31525851197982, "eval_loss": 1.1046932935714722, "eval_runtime": 79.913, "eval_samples_per_second": 1.239, "eval_steps_per_second": 1.239, "step": 1300 }, { "epoch": 0.37333333333333335, "grad_norm": 11.964598655700684, "learning_rate": 3.157894736842105e-06, "loss": 0.9507, "step": 1400 }, { "epoch": 0.37333333333333335, "eval_cer": 46.84014869888476, "eval_loss": 1.150152325630188, "eval_runtime": 78.5836, "eval_samples_per_second": 1.26, "eval_steps_per_second": 1.26, "step": 1400 }, { "epoch": 0.4, "grad_norm": 14.202909469604492, "learning_rate": 2.631578947368421e-06, "loss": 0.9197, "step": 1500 }, { "epoch": 0.4, "eval_cer": 45.72490706319702, "eval_loss": 1.0882861614227295, "eval_runtime": 79.236, "eval_samples_per_second": 1.249, "eval_steps_per_second": 1.249, "step": 1500 }, { "epoch": 0.4266666666666667, "grad_norm": 8.897924423217773, "learning_rate": 2.105263157894737e-06, "loss": 0.9193, "step": 1600 }, { "epoch": 0.4266666666666667, "eval_cer": 45.84882280049566, "eval_loss": 1.081554651260376, "eval_runtime": 78.458, "eval_samples_per_second": 1.262, "eval_steps_per_second": 1.262, "step": 1600 }, { "epoch": 0.4533333333333333, "grad_norm": 16.955913543701172, "learning_rate": 1.5789473684210526e-06, "loss": 0.8502, "step": 1700 }, { "epoch": 0.4533333333333333, "eval_cer": 47.21189591078067, "eval_loss": 1.0790964365005493, "eval_runtime": 78.3325, "eval_samples_per_second": 1.264, "eval_steps_per_second": 1.264, "step": 1700 }, { "epoch": 0.48, "grad_norm": 11.926959991455078, "learning_rate": 1.0526315789473685e-06, "loss": 0.8387, "step": 1800 }, { "epoch": 0.48, "eval_cer": 45.353159851301115, "eval_loss": 1.054158091545105, "eval_runtime": 79.2789, "eval_samples_per_second": 1.249, "eval_steps_per_second": 1.249, "step": 1800 }, { "epoch": 0.5066666666666667, "grad_norm": 9.69470500946045, "learning_rate": 5.263157894736843e-07, "loss": 0.8734, "step": 1900 }, { "epoch": 0.5066666666666667, "eval_cer": 45.9727385377943, "eval_loss": 1.0481570959091187, "eval_runtime": 79.0464, "eval_samples_per_second": 1.252, "eval_steps_per_second": 1.252, "step": 1900 }, { "epoch": 0.5333333333333333, "grad_norm": 21.345584869384766, "learning_rate": 0.0, "loss": 0.8679, "step": 2000 }, { "epoch": 0.5333333333333333, "eval_cer": 45.9727385377943, "eval_loss": 1.0438498258590698, "eval_runtime": 79.1208, "eval_samples_per_second": 1.251, "eval_steps_per_second": 1.251, "step": 2000 }, { "epoch": 0.8, "grad_norm": 8.957441329956055, "learning_rate": 9.637046307884857e-06, "loss": 0.8834, "step": 3000 }, { "epoch": 0.8, "eval_cer": 45.393858477970625, "eval_loss": 1.0715968608856201, "eval_runtime": 77.3709, "eval_samples_per_second": 1.28, "eval_steps_per_second": 1.28, "step": 3000 }, { "epoch": 1.0666666666666667, "grad_norm": 8.574357986450195, "learning_rate": 9.51188986232791e-06, "loss": 0.757, "step": 4000 }, { "epoch": 1.0666666666666667, "eval_cer": 42.72363150867824, "eval_loss": 1.043562650680542, "eval_runtime": 78.9351, "eval_samples_per_second": 1.254, "eval_steps_per_second": 1.254, "step": 4000 }, { "epoch": 1.3333333333333333, "grad_norm": 10.907554626464844, "learning_rate": 9.386733416770964e-06, "loss": 0.6228, "step": 5000 }, { "epoch": 1.3333333333333333, "eval_cer": 41.388518024032045, "eval_loss": 0.9789133667945862, "eval_runtime": 83.325, "eval_samples_per_second": 1.188, "eval_steps_per_second": 1.188, "step": 5000 }, { "epoch": 1.6, "grad_norm": 15.36947250366211, "learning_rate": 9.261576971214017e-06, "loss": 0.594, "step": 6000 }, { "epoch": 1.6, "eval_cer": 35.647530040053404, "eval_loss": 0.7933751940727234, "eval_runtime": 79.3561, "eval_samples_per_second": 1.248, "eval_steps_per_second": 1.248, "step": 6000 }, { "epoch": 1.8666666666666667, "grad_norm": 6.123430252075195, "learning_rate": 9.136420525657072e-06, "loss": 0.5759, "step": 7000 }, { "epoch": 1.8666666666666667, "eval_cer": 36.715620827770366, "eval_loss": 0.8272244334220886, "eval_runtime": 79.0213, "eval_samples_per_second": 1.253, "eval_steps_per_second": 1.253, "step": 7000 }, { "epoch": 2.1333333333333333, "grad_norm": 9.646613121032715, "learning_rate": 9.011264080100126e-06, "loss": 0.4532, "step": 8000 }, { "epoch": 2.1333333333333333, "eval_cer": 36.18157543391188, "eval_loss": 0.810710608959198, "eval_runtime": 83.7244, "eval_samples_per_second": 1.182, "eval_steps_per_second": 1.182, "step": 8000 }, { "epoch": 2.4, "grad_norm": 11.642197608947754, "learning_rate": 8.88610763454318e-06, "loss": 0.3469, "step": 9000 }, { "epoch": 2.4, "eval_cer": 32.97730307076102, "eval_loss": 0.7934185266494751, "eval_runtime": 78.596, "eval_samples_per_second": 1.26, "eval_steps_per_second": 1.26, "step": 9000 }, { "epoch": 2.6666666666666665, "grad_norm": 6.712106227874756, "learning_rate": 8.760951188986233e-06, "loss": 0.3617, "step": 10000 }, { "epoch": 2.6666666666666665, "eval_cer": 36.9826435246996, "eval_loss": 0.8574942350387573, "eval_runtime": 93.5531, "eval_samples_per_second": 1.058, "eval_steps_per_second": 1.058, "step": 10000 }, { "epoch": 2.9333333333333336, "grad_norm": 8.81134033203125, "learning_rate": 8.635794743429288e-06, "loss": 0.3462, "step": 11000 }, { "epoch": 2.9333333333333336, "eval_cer": 35.11348464619493, "eval_loss": 0.8061275482177734, "eval_runtime": 78.1131, "eval_samples_per_second": 1.267, "eval_steps_per_second": 1.267, "step": 11000 }, { "epoch": 3.2, "grad_norm": 7.097293376922607, "learning_rate": 8.510638297872341e-06, "loss": 0.2267, "step": 12000 }, { "epoch": 3.2, "eval_cer": 35.51401869158878, "eval_loss": 0.8908804059028625, "eval_runtime": 89.1656, "eval_samples_per_second": 1.11, "eval_steps_per_second": 1.11, "step": 12000 }, { "epoch": 3.466666666666667, "grad_norm": 5.570465564727783, "learning_rate": 8.385481852315395e-06, "loss": 0.1918, "step": 13000 }, { "epoch": 3.466666666666667, "eval_cer": 40.053404539385845, "eval_loss": 0.896449625492096, "eval_runtime": 97.4805, "eval_samples_per_second": 1.016, "eval_steps_per_second": 1.016, "step": 13000 }, { "epoch": 3.7333333333333334, "grad_norm": 13.11853313446045, "learning_rate": 8.260325406758448e-06, "loss": 0.1983, "step": 14000 }, { "epoch": 3.7333333333333334, "eval_cer": 32.176234979973295, "eval_loss": 0.8650264739990234, "eval_runtime": 78.8752, "eval_samples_per_second": 1.255, "eval_steps_per_second": 1.255, "step": 14000 }, { "epoch": 4.0, "grad_norm": 8.057831764221191, "learning_rate": 8.135168961201503e-06, "loss": 0.2064, "step": 15000 }, { "epoch": 4.0, "eval_cer": 34.31241655540721, "eval_loss": 0.8947641253471375, "eval_runtime": 101.8113, "eval_samples_per_second": 0.972, "eval_steps_per_second": 0.972, "step": 15000 }, { "epoch": 4.266666666666667, "grad_norm": 6.645699977874756, "learning_rate": 8.010012515644557e-06, "loss": 0.1008, "step": 16000 }, { "epoch": 4.266666666666667, "eval_cer": 33.64485981308411, "eval_loss": 0.9120751023292542, "eval_runtime": 95.4926, "eval_samples_per_second": 1.037, "eval_steps_per_second": 1.037, "step": 16000 }, { "epoch": 4.533333333333333, "grad_norm": 10.748971939086914, "learning_rate": 7.88485607008761e-06, "loss": 0.1036, "step": 17000 }, { "epoch": 4.533333333333333, "eval_cer": 40.32042723631509, "eval_loss": 1.0235034227371216, "eval_runtime": 110.9242, "eval_samples_per_second": 0.893, "eval_steps_per_second": 0.893, "step": 17000 }, { "epoch": 4.8, "grad_norm": 3.087092638015747, "learning_rate": 7.759699624530665e-06, "loss": 0.1069, "step": 18000 }, { "epoch": 4.8, "eval_cer": 35.51401869158878, "eval_loss": 1.0253996849060059, "eval_runtime": 101.8801, "eval_samples_per_second": 0.972, "eval_steps_per_second": 0.972, "step": 18000 }, { "epoch": 5.066666666666666, "grad_norm": 18.12947654724121, "learning_rate": 7.634543178973719e-06, "loss": 0.6101, "step": 19000 }, { "epoch": 5.066666666666666, "eval_cer": 40.693196405648266, "eval_loss": 0.8904989957809448, "eval_runtime": 110.361, "eval_samples_per_second": 0.897, "eval_steps_per_second": 0.897, "step": 19000 }, { "epoch": 5.333333333333333, "grad_norm": 9.397184371948242, "learning_rate": 7.509386733416771e-06, "loss": 0.5802, "step": 20000 }, { "epoch": 5.333333333333333, "eval_cer": 34.981905910735826, "eval_loss": 0.9554204344749451, "eval_runtime": 114.8149, "eval_samples_per_second": 0.862, "eval_steps_per_second": 0.862, "step": 20000 }, { "epoch": 5.6, "grad_norm": 10.324832916259766, "learning_rate": 7.384230287859825e-06, "loss": 0.5739, "step": 21000 }, { "epoch": 5.6, "eval_cer": 34.981905910735826, "eval_loss": 0.9567387104034424, "eval_runtime": 105.411, "eval_samples_per_second": 0.939, "eval_steps_per_second": 0.939, "step": 21000 }, { "epoch": 5.866666666666667, "grad_norm": 29.525453567504883, "learning_rate": 7.25907384230288e-06, "loss": 0.5524, "step": 22000 }, { "epoch": 5.866666666666667, "eval_cer": 34.016887816646566, "eval_loss": 1.0537254810333252, "eval_runtime": 121.4049, "eval_samples_per_second": 0.815, "eval_steps_per_second": 0.815, "step": 22000 }, { "epoch": 6.133333333333334, "grad_norm": 12.727578163146973, "learning_rate": 7.133917396745933e-06, "loss": 0.4384, "step": 23000 }, { "epoch": 6.133333333333334, "eval_cer": 36.55006031363088, "eval_loss": 1.0797538757324219, "eval_runtime": 120.6543, "eval_samples_per_second": 0.821, "eval_steps_per_second": 0.821, "step": 23000 }, { "epoch": 6.4, "grad_norm": 16.795650482177734, "learning_rate": 7.008760951188987e-06, "loss": 0.3206, "step": 24000 }, { "epoch": 6.4, "eval_cer": 34.499396863691196, "eval_loss": 0.9962962865829468, "eval_runtime": 121.1564, "eval_samples_per_second": 0.817, "eval_steps_per_second": 0.817, "step": 24000 }, { "epoch": 6.666666666666667, "grad_norm": 9.25767707824707, "learning_rate": 6.883604505632041e-06, "loss": 0.3354, "step": 25000 }, { "epoch": 6.666666666666667, "eval_cer": 36.55006031363088, "eval_loss": 1.092819333076477, "eval_runtime": 113.9745, "eval_samples_per_second": 0.869, "eval_steps_per_second": 0.869, "step": 25000 }, { "epoch": 6.933333333333334, "grad_norm": 10.387906074523926, "learning_rate": 6.758448060075094e-06, "loss": 0.3382, "step": 26000 }, { "epoch": 6.933333333333334, "eval_cer": 33.8962605548854, "eval_loss": 1.0058472156524658, "eval_runtime": 95.2523, "eval_samples_per_second": 1.039, "eval_steps_per_second": 1.039, "step": 26000 }, { "epoch": 7.2, "grad_norm": 6.268089294433594, "learning_rate": 6.633291614518148e-06, "loss": 0.2082, "step": 27000 }, { "epoch": 7.2, "eval_cer": 35.10253317249698, "eval_loss": 1.116202712059021, "eval_runtime": 98.445, "eval_samples_per_second": 1.006, "eval_steps_per_second": 1.006, "step": 27000 }, { "epoch": 7.466666666666667, "grad_norm": 3.8901703357696533, "learning_rate": 6.508135168961201e-06, "loss": 0.1632, "step": 28000 }, { "epoch": 7.466666666666667, "eval_cer": 32.68998793727383, "eval_loss": 1.1056886911392212, "eval_runtime": 125.755, "eval_samples_per_second": 0.787, "eval_steps_per_second": 0.787, "step": 28000 }, { "epoch": 7.733333333333333, "grad_norm": 3.7998549938201904, "learning_rate": 6.382978723404256e-06, "loss": 0.1736, "step": 29000 }, { "epoch": 7.733333333333333, "eval_cer": 34.37876960193004, "eval_loss": 1.151659607887268, "eval_runtime": 123.6571, "eval_samples_per_second": 0.801, "eval_steps_per_second": 0.801, "step": 29000 }, { "epoch": 8.0, "grad_norm": 5.119089126586914, "learning_rate": 6.25782227784731e-06, "loss": 0.1724, "step": 30000 }, { "epoch": 8.0, "eval_cer": 36.67068757539204, "eval_loss": 1.1902227401733398, "eval_runtime": 123.7933, "eval_samples_per_second": 0.8, "eval_steps_per_second": 0.8, "step": 30000 }, { "epoch": 8.266666666666667, "grad_norm": 6.640472888946533, "learning_rate": 6.132665832290363e-06, "loss": 0.0758, "step": 31000 }, { "epoch": 8.266666666666667, "eval_cer": 35.826296743063935, "eval_loss": 1.2155195474624634, "eval_runtime": 109.4112, "eval_samples_per_second": 0.905, "eval_steps_per_second": 0.905, "step": 31000 }, { "epoch": 8.533333333333333, "grad_norm": 9.896190643310547, "learning_rate": 6.0075093867334175e-06, "loss": 0.0769, "step": 32000 }, { "epoch": 8.533333333333333, "eval_cer": 34.981905910735826, "eval_loss": 1.3153955936431885, "eval_runtime": 143.3287, "eval_samples_per_second": 0.691, "eval_steps_per_second": 0.691, "step": 32000 }, { "epoch": 8.8, "grad_norm": 5.391891956329346, "learning_rate": 5.882352941176471e-06, "loss": 0.0815, "step": 33000 }, { "epoch": 8.8, "eval_cer": 35.94692400482509, "eval_loss": 1.218248963356018, "eval_runtime": 102.2091, "eval_samples_per_second": 0.969, "eval_steps_per_second": 0.969, "step": 33000 }, { "epoch": 9.066666666666666, "grad_norm": 1.8143051862716675, "learning_rate": 5.757196495619524e-06, "loss": 0.0698, "step": 34000 }, { "epoch": 9.066666666666666, "eval_cer": 35.94692400482509, "eval_loss": 1.2726112604141235, "eval_runtime": 126.9759, "eval_samples_per_second": 0.78, "eval_steps_per_second": 0.78, "step": 34000 }, { "epoch": 9.333333333333334, "grad_norm": 1.9652811288833618, "learning_rate": 5.632040050062579e-06, "loss": 0.0389, "step": 35000 }, { "epoch": 9.333333333333334, "eval_cer": 33.41375150784077, "eval_loss": 1.3238201141357422, "eval_runtime": 118.6725, "eval_samples_per_second": 0.834, "eval_steps_per_second": 0.834, "step": 35000 }, { "epoch": 9.6, "grad_norm": 5.921022891998291, "learning_rate": 5.506883604505633e-06, "loss": 0.0397, "step": 36000 }, { "epoch": 9.6, "eval_cer": 37.273823884197824, "eval_loss": 1.3997122049331665, "eval_runtime": 118.4204, "eval_samples_per_second": 0.836, "eval_steps_per_second": 0.836, "step": 36000 }, { "epoch": 9.866666666666667, "grad_norm": 4.224556922912598, "learning_rate": 5.381727158948686e-06, "loss": 0.0385, "step": 37000 }, { "epoch": 9.866666666666667, "eval_cer": 37.15319662243667, "eval_loss": 1.3291655778884888, "eval_runtime": 125.4751, "eval_samples_per_second": 0.789, "eval_steps_per_second": 0.789, "step": 37000 }, { "epoch": 10.133333333333333, "grad_norm": 2.9837727546691895, "learning_rate": 5.25657071339174e-06, "loss": 0.0298, "step": 38000 }, { "epoch": 10.133333333333333, "eval_cer": 33.65500603136309, "eval_loss": 1.3235493898391724, "eval_runtime": 119.5982, "eval_samples_per_second": 0.828, "eval_steps_per_second": 0.828, "step": 38000 }, { "epoch": 10.4, "grad_norm": 8.313368797302246, "learning_rate": 5.131414267834794e-06, "loss": 0.0215, "step": 39000 }, { "epoch": 10.4, "eval_cer": 34.25814234016888, "eval_loss": 1.3103588819503784, "eval_runtime": 129.8548, "eval_samples_per_second": 0.762, "eval_steps_per_second": 0.762, "step": 39000 }, { "epoch": 10.666666666666666, "grad_norm": 4.944197177886963, "learning_rate": 5.006257822277848e-06, "loss": 0.0237, "step": 40000 }, { "epoch": 10.666666666666666, "eval_cer": 37.635705669481304, "eval_loss": 1.4052081108093262, "eval_runtime": 128.7688, "eval_samples_per_second": 0.769, "eval_steps_per_second": 0.769, "step": 40000 }, { "epoch": 10.933333333333334, "grad_norm": 2.447826623916626, "learning_rate": 4.881101376720902e-06, "loss": 0.022, "step": 41000 }, { "epoch": 10.933333333333334, "eval_cer": 34.016887816646566, "eval_loss": 1.3398523330688477, "eval_runtime": 128.1594, "eval_samples_per_second": 0.772, "eval_steps_per_second": 0.772, "step": 41000 }, { "epoch": 11.2, "grad_norm": 4.827037334442139, "learning_rate": 4.755944931163955e-06, "loss": 0.0157, "step": 42000 }, { "epoch": 11.2, "eval_cer": 35.10253317249698, "eval_loss": 1.4211913347244263, "eval_runtime": 147.5015, "eval_samples_per_second": 0.671, "eval_steps_per_second": 0.671, "step": 42000 }, { "epoch": 11.466666666666667, "grad_norm": 4.327616214752197, "learning_rate": 4.630788485607009e-06, "loss": 0.0134, "step": 43000 }, { "epoch": 11.466666666666667, "eval_cer": 37.997587454764776, "eval_loss": 1.4274876117706299, "eval_runtime": 104.6876, "eval_samples_per_second": 0.946, "eval_steps_per_second": 0.946, "step": 43000 }, { "epoch": 11.733333333333333, "grad_norm": 1.2661657333374023, "learning_rate": 4.505632040050063e-06, "loss": 0.0147, "step": 44000 }, { "epoch": 11.733333333333333, "eval_cer": 34.74065138721351, "eval_loss": 1.4563192129135132, "eval_runtime": 127.7157, "eval_samples_per_second": 0.775, "eval_steps_per_second": 0.775, "step": 44000 }, { "epoch": 12.0, "grad_norm": 1.5173884630203247, "learning_rate": 4.380475594493116e-06, "loss": 0.0144, "step": 45000 }, { "epoch": 12.0, "eval_cer": 33.0518697225573, "eval_loss": 1.374932050704956, "eval_runtime": 117.8977, "eval_samples_per_second": 0.84, "eval_steps_per_second": 0.84, "step": 45000 }, { "epoch": 12.266666666666667, "grad_norm": 3.4065911769866943, "learning_rate": 4.255319148936171e-06, "loss": 0.0091, "step": 46000 }, { "epoch": 12.266666666666667, "eval_cer": 33.775633293124244, "eval_loss": 1.4892818927764893, "eval_runtime": 127.9403, "eval_samples_per_second": 0.774, "eval_steps_per_second": 0.774, "step": 46000 }, { "epoch": 12.533333333333333, "grad_norm": 0.3180766701698303, "learning_rate": 4.130162703379224e-06, "loss": 0.0088, "step": 47000 }, { "epoch": 12.533333333333333, "eval_cer": 37.03256936067551, "eval_loss": 1.4363093376159668, "eval_runtime": 125.1911, "eval_samples_per_second": 0.791, "eval_steps_per_second": 0.791, "step": 47000 }, { "epoch": 12.8, "grad_norm": 1.2687398195266724, "learning_rate": 4.005006257822278e-06, "loss": 0.0093, "step": 48000 }, { "epoch": 12.8, "eval_cer": 37.51507840772014, "eval_loss": 1.5040009021759033, "eval_runtime": 128.1399, "eval_samples_per_second": 0.773, "eval_steps_per_second": 0.773, "step": 48000 }, { "epoch": 13.066666666666666, "grad_norm": 0.379712849855423, "learning_rate": 3.879849812265333e-06, "loss": 0.0087, "step": 49000 }, { "epoch": 13.066666666666666, "eval_cer": 35.46441495778046, "eval_loss": 1.4787448644638062, "eval_runtime": 141.6422, "eval_samples_per_second": 0.699, "eval_steps_per_second": 0.699, "step": 49000 }, { "epoch": 13.333333333333334, "grad_norm": 0.10000209510326385, "learning_rate": 3.7546933667083856e-06, "loss": 0.0066, "step": 50000 }, { "epoch": 13.333333333333334, "eval_cer": 35.343787696019305, "eval_loss": 1.4243420362472534, "eval_runtime": 136.9845, "eval_samples_per_second": 0.723, "eval_steps_per_second": 0.723, "step": 50000 }, { "epoch": 13.6, "grad_norm": 9.813934326171875, "learning_rate": 3.62953692115144e-06, "loss": 0.0063, "step": 51000 }, { "epoch": 13.6, "eval_cer": 32.810615199034984, "eval_loss": 1.4343503713607788, "eval_runtime": 132.5107, "eval_samples_per_second": 0.747, "eval_steps_per_second": 0.747, "step": 51000 }, { "epoch": 13.866666666666667, "grad_norm": 2.027151107788086, "learning_rate": 3.5043804755944933e-06, "loss": 0.0071, "step": 52000 }, { "epoch": 13.866666666666667, "eval_cer": 33.0518697225573, "eval_loss": 1.5221620798110962, "eval_runtime": 127.4995, "eval_samples_per_second": 0.776, "eval_steps_per_second": 0.776, "step": 52000 }, { "epoch": 14.133333333333333, "grad_norm": 0.10521159321069717, "learning_rate": 3.379224030037547e-06, "loss": 0.0054, "step": 53000 }, { "epoch": 14.133333333333333, "eval_cer": 32.93124246079614, "eval_loss": 1.5020145177841187, "eval_runtime": 135.115, "eval_samples_per_second": 0.733, "eval_steps_per_second": 0.733, "step": 53000 }, { "epoch": 14.4, "grad_norm": 2.590341567993164, "learning_rate": 3.2540675844806006e-06, "loss": 0.0047, "step": 54000 }, { "epoch": 14.4, "eval_cer": 32.56936067551266, "eval_loss": 1.494498610496521, "eval_runtime": 149.7186, "eval_samples_per_second": 0.661, "eval_steps_per_second": 0.661, "step": 54000 }, { "epoch": 14.666666666666666, "grad_norm": 0.30251413583755493, "learning_rate": 3.128911138923655e-06, "loss": 0.0048, "step": 55000 }, { "epoch": 14.666666666666666, "eval_cer": 34.25814234016888, "eval_loss": 1.53855562210083, "eval_runtime": 150.0723, "eval_samples_per_second": 0.66, "eval_steps_per_second": 0.66, "step": 55000 }, { "epoch": 14.933333333333334, "grad_norm": 0.06477013975381851, "learning_rate": 3.0037546933667087e-06, "loss": 0.0054, "step": 56000 }, { "epoch": 14.933333333333334, "eval_cer": 34.37876960193004, "eval_loss": 1.538918137550354, "eval_runtime": 95.0361, "eval_samples_per_second": 1.042, "eval_steps_per_second": 1.042, "step": 56000 }, { "epoch": 15.2, "grad_norm": 20.067230224609375, "learning_rate": 2.878598247809762e-06, "loss": 0.0038, "step": 57000 }, { "epoch": 15.2, "eval_cer": 32.93124246079614, "eval_loss": 1.4607055187225342, "eval_runtime": 87.2648, "eval_samples_per_second": 1.134, "eval_steps_per_second": 1.134, "step": 57000 }, { "epoch": 15.466666666666667, "grad_norm": 0.16094616055488586, "learning_rate": 2.7534418022528165e-06, "loss": 0.0031, "step": 58000 }, { "epoch": 15.466666666666667, "eval_cer": 32.810615199034984, "eval_loss": 1.5141421556472778, "eval_runtime": 94.6547, "eval_samples_per_second": 1.046, "eval_steps_per_second": 1.046, "step": 58000 }, { "epoch": 15.733333333333333, "grad_norm": 0.37339481711387634, "learning_rate": 2.62828535669587e-06, "loss": 0.0032, "step": 59000 }, { "epoch": 15.733333333333333, "eval_cer": 38.96260554885404, "eval_loss": 1.6072230339050293, "eval_runtime": 96.1338, "eval_samples_per_second": 1.03, "eval_steps_per_second": 1.03, "step": 59000 }, { "epoch": 16.0, "grad_norm": 0.0806913673877716, "learning_rate": 2.503128911138924e-06, "loss": 0.0027, "step": 60000 }, { "epoch": 16.0, "eval_cer": 35.94692400482509, "eval_loss": 1.617326259613037, "eval_runtime": 98.9687, "eval_samples_per_second": 1.0, "eval_steps_per_second": 1.0, "step": 60000 }, { "epoch": 16.266666666666666, "grad_norm": 0.02615281380712986, "learning_rate": 2.3779724655819776e-06, "loss": 0.0023, "step": 61000 }, { "epoch": 16.266666666666666, "eval_cer": 36.308805790108565, "eval_loss": 1.6615768671035767, "eval_runtime": 115.1739, "eval_samples_per_second": 0.86, "eval_steps_per_second": 0.86, "step": 61000 }, { "epoch": 16.533333333333335, "grad_norm": 0.11968094855546951, "learning_rate": 2.2528160200250315e-06, "loss": 0.0023, "step": 62000 }, { "epoch": 16.533333333333335, "eval_cer": 35.22316043425814, "eval_loss": 1.6153184175491333, "eval_runtime": 140.3831, "eval_samples_per_second": 0.705, "eval_steps_per_second": 0.705, "step": 62000 }, { "epoch": 16.8, "grad_norm": 0.06228575110435486, "learning_rate": 2.1276595744680853e-06, "loss": 0.0021, "step": 63000 }, { "epoch": 16.8, "eval_cer": 34.016887816646566, "eval_loss": 1.6445003747940063, "eval_runtime": 133.4409, "eval_samples_per_second": 0.742, "eval_steps_per_second": 0.742, "step": 63000 }, { "epoch": 17.066666666666666, "grad_norm": 0.11423548310995102, "learning_rate": 2.002503128911139e-06, "loss": 0.0021, "step": 64000 }, { "epoch": 17.066666666666666, "eval_cer": 33.8962605548854, "eval_loss": 1.6439446210861206, "eval_runtime": 133.5071, "eval_samples_per_second": 0.742, "eval_steps_per_second": 0.742, "step": 64000 }, { "epoch": 17.333333333333332, "grad_norm": 0.06907499581575394, "learning_rate": 1.8773466833541928e-06, "loss": 0.0015, "step": 65000 }, { "epoch": 17.333333333333332, "eval_cer": 32.93124246079614, "eval_loss": 1.6491665840148926, "eval_runtime": 132.7692, "eval_samples_per_second": 0.746, "eval_steps_per_second": 0.746, "step": 65000 }, { "epoch": 17.6, "grad_norm": 0.033104829490184784, "learning_rate": 1.7521902377972467e-06, "loss": 0.0015, "step": 66000 }, { "epoch": 17.6, "eval_cer": 34.13751507840772, "eval_loss": 1.651322841644287, "eval_runtime": 143.0837, "eval_samples_per_second": 0.692, "eval_steps_per_second": 0.692, "step": 66000 }, { "epoch": 17.866666666666667, "grad_norm": 0.01781543157994747, "learning_rate": 1.6270337922403003e-06, "loss": 0.0013, "step": 67000 }, { "epoch": 17.866666666666667, "eval_cer": 34.74065138721351, "eval_loss": 1.7133798599243164, "eval_runtime": 133.6812, "eval_samples_per_second": 0.741, "eval_steps_per_second": 0.741, "step": 67000 }, { "epoch": 18.133333333333333, "grad_norm": 0.010594404302537441, "learning_rate": 1.5018773466833544e-06, "loss": 0.0014, "step": 68000 }, { "epoch": 18.133333333333333, "eval_cer": 34.37876960193004, "eval_loss": 1.6600372791290283, "eval_runtime": 151.4685, "eval_samples_per_second": 0.654, "eval_steps_per_second": 0.654, "step": 68000 }, { "epoch": 18.4, "grad_norm": 0.1840209811925888, "learning_rate": 1.3767209011264082e-06, "loss": 0.0009, "step": 69000 }, { "epoch": 18.4, "eval_cer": 33.0518697225573, "eval_loss": 1.666447639465332, "eval_runtime": 138.1603, "eval_samples_per_second": 0.717, "eval_steps_per_second": 0.717, "step": 69000 }, { "epoch": 18.666666666666668, "grad_norm": 0.018077213317155838, "learning_rate": 1.251564455569462e-06, "loss": 0.0008, "step": 70000 }, { "epoch": 18.666666666666668, "eval_cer": 33.53437876960193, "eval_loss": 1.6725918054580688, "eval_runtime": 131.4776, "eval_samples_per_second": 0.753, "eval_steps_per_second": 0.753, "step": 70000 }, { "epoch": 18.933333333333334, "grad_norm": 0.13267947733402252, "learning_rate": 1.1264080100125157e-06, "loss": 0.0009, "step": 71000 }, { "epoch": 18.933333333333334, "eval_cer": 35.10253317249698, "eval_loss": 1.6664079427719116, "eval_runtime": 133.5644, "eval_samples_per_second": 0.741, "eval_steps_per_second": 0.741, "step": 71000 }, { "epoch": 19.2, "grad_norm": 0.005200152285397053, "learning_rate": 1.0012515644555696e-06, "loss": 0.0007, "step": 72000 }, { "epoch": 19.2, "eval_cer": 33.8962605548854, "eval_loss": 1.6933456659317017, "eval_runtime": 141.6099, "eval_samples_per_second": 0.699, "eval_steps_per_second": 0.699, "step": 72000 }, { "epoch": 19.466666666666665, "grad_norm": 0.07510178536176682, "learning_rate": 8.760951188986233e-07, "loss": 0.0005, "step": 73000 }, { "epoch": 19.466666666666665, "eval_cer": 35.94692400482509, "eval_loss": 1.7604153156280518, "eval_runtime": 140.6369, "eval_samples_per_second": 0.704, "eval_steps_per_second": 0.704, "step": 73000 }, { "epoch": 19.733333333333334, "grad_norm": 0.027590099722146988, "learning_rate": 7.509386733416772e-07, "loss": 0.0005, "step": 74000 }, { "epoch": 19.733333333333334, "eval_cer": 35.70566948130278, "eval_loss": 1.736212968826294, "eval_runtime": 139.15, "eval_samples_per_second": 0.711, "eval_steps_per_second": 0.711, "step": 74000 }, { "epoch": 20.0, "grad_norm": 0.015025763772428036, "learning_rate": 6.25782227784731e-07, "loss": 0.0004, "step": 75000 }, { "epoch": 20.0, "eval_cer": 33.65500603136309, "eval_loss": 1.7256932258605957, "eval_runtime": 151.3285, "eval_samples_per_second": 0.654, "eval_steps_per_second": 0.654, "step": 75000 }, { "epoch": 20.266666666666666, "grad_norm": 0.0052428352646529675, "learning_rate": 5.006257822277848e-07, "loss": 0.0001, "step": 76000 }, { "epoch": 20.266666666666666, "eval_cer": 34.13751507840772, "eval_loss": 1.7596440315246582, "eval_runtime": 152.8671, "eval_samples_per_second": 0.648, "eval_steps_per_second": 0.648, "step": 76000 }, { "epoch": 20.533333333333335, "grad_norm": 0.010569967329502106, "learning_rate": 3.754693366708386e-07, "loss": 0.0001, "step": 77000 }, { "epoch": 20.533333333333335, "eval_cer": 34.25814234016888, "eval_loss": 1.7473951578140259, "eval_runtime": 97.5521, "eval_samples_per_second": 1.015, "eval_steps_per_second": 1.015, "step": 77000 }, { "epoch": 20.8, "grad_norm": 0.004566431976854801, "learning_rate": 2.503128911138924e-07, "loss": 0.0001, "step": 78000 }, { "epoch": 20.8, "eval_cer": 35.10253317249698, "eval_loss": 1.765366554260254, "eval_runtime": 95.7556, "eval_samples_per_second": 1.034, "eval_steps_per_second": 1.034, "step": 78000 }, { "epoch": 21.066666666666666, "grad_norm": 0.008088044822216034, "learning_rate": 1.251564455569462e-07, "loss": 0.0001, "step": 79000 }, { "epoch": 21.066666666666666, "eval_cer": 34.499396863691196, "eval_loss": 1.7550737857818604, "eval_runtime": 101.9958, "eval_samples_per_second": 0.971, "eval_steps_per_second": 0.971, "step": 79000 }, { "epoch": 21.333333333333332, "grad_norm": 0.004822420887649059, "learning_rate": 0.0, "loss": 0.0001, "step": 80000 }, { "epoch": 21.333333333333332, "eval_cer": 34.37876960193004, "eval_loss": 1.755260944366455, "eval_runtime": 90.994, "eval_samples_per_second": 1.088, "eval_steps_per_second": 1.088, "step": 80000 }, { "epoch": 21.6, "grad_norm": 14.32432746887207, "learning_rate": 3.252710592160134e-06, "loss": 0.5875, "step": 81000 }, { "epoch": 21.6, "eval_cer": 31.193838254172015, "eval_loss": 0.8317356109619141, "eval_runtime": 105.8745, "eval_samples_per_second": 0.935, "eval_steps_per_second": 0.935, "step": 81000 }, { "epoch": 21.866666666666667, "grad_norm": 11.412335395812988, "learning_rate": 3.1693077564637197e-06, "loss": 0.5407, "step": 82000 }, { "epoch": 21.866666666666667, "eval_cer": 34.65982028241335, "eval_loss": 0.9225997924804688, "eval_runtime": 118.4773, "eval_samples_per_second": 0.836, "eval_steps_per_second": 0.836, "step": 82000 }, { "epoch": 22.133333333333333, "grad_norm": 10.880860328674316, "learning_rate": 3.0859049207673064e-06, "loss": 0.4377, "step": 83000 }, { "epoch": 22.133333333333333, "eval_cer": 30.038510911424904, "eval_loss": 0.859394907951355, "eval_runtime": 100.9655, "eval_samples_per_second": 0.981, "eval_steps_per_second": 0.981, "step": 83000 }, { "epoch": 22.4, "grad_norm": 7.9051361083984375, "learning_rate": 3.0025020850708923e-06, "loss": 0.3746, "step": 84000 }, { "epoch": 22.4, "eval_cer": 32.09242618741977, "eval_loss": 0.8850279450416565, "eval_runtime": 101.1338, "eval_samples_per_second": 0.979, "eval_steps_per_second": 0.979, "step": 84000 }, { "epoch": 22.666666666666668, "grad_norm": 8.64020824432373, "learning_rate": 2.919099249374479e-06, "loss": 0.3562, "step": 85000 }, { "epoch": 22.666666666666668, "eval_cer": 30.29525032092426, "eval_loss": 0.8574259877204895, "eval_runtime": 99.1014, "eval_samples_per_second": 0.999, "eval_steps_per_second": 0.999, "step": 85000 }, { "epoch": 22.933333333333334, "grad_norm": 7.546156406402588, "learning_rate": 2.835696413678065e-06, "loss": 0.3537, "step": 86000 }, { "epoch": 22.933333333333334, "eval_cer": 30.551989730423617, "eval_loss": 0.8776237964630127, "eval_runtime": 131.3735, "eval_samples_per_second": 0.754, "eval_steps_per_second": 0.754, "step": 86000 }, { "epoch": 23.2, "grad_norm": 9.436278343200684, "learning_rate": 2.7522935779816517e-06, "loss": 0.2377, "step": 87000 }, { "epoch": 23.2, "eval_cer": 33.889602053915276, "eval_loss": 0.9568796753883362, "eval_runtime": 127.0826, "eval_samples_per_second": 0.779, "eval_steps_per_second": 0.779, "step": 87000 }, { "epoch": 23.466666666666665, "grad_norm": 5.736475944519043, "learning_rate": 2.668890742285238e-06, "loss": 0.1894, "step": 88000 }, { "epoch": 23.466666666666665, "eval_cer": 33.1193838254172, "eval_loss": 0.9892663955688477, "eval_runtime": 122.0811, "eval_samples_per_second": 0.811, "eval_steps_per_second": 0.811, "step": 88000 }, { "epoch": 23.733333333333334, "grad_norm": 6.387293338775635, "learning_rate": 2.605042016806723e-06, "loss": 0.1418, "step": 89000 }, { "epoch": 23.733333333333334, "eval_cer": 34.27471116816431, "eval_loss": 1.0336097478866577, "eval_runtime": 96.6819, "eval_samples_per_second": 1.024, "eval_steps_per_second": 1.024, "step": 89000 }, { "epoch": 24.0, "grad_norm": 19.179807662963867, "learning_rate": 2.521008403361345e-06, "loss": 0.1461, "step": 90000 }, { "epoch": 24.0, "eval_cer": 36.071887034659824, "eval_loss": 1.0472933053970337, "eval_runtime": 93.7113, "eval_samples_per_second": 1.056, "eval_steps_per_second": 1.056, "step": 90000 }, { "epoch": 24.266666666666666, "grad_norm": 7.663275718688965, "learning_rate": 2.4369747899159667e-06, "loss": 0.1028, "step": 91000 }, { "epoch": 24.266666666666666, "eval_cer": 36.32862644415918, "eval_loss": 1.0862057209014893, "eval_runtime": 93.406, "eval_samples_per_second": 1.06, "eval_steps_per_second": 1.06, "step": 91000 }, { "epoch": 24.533333333333335, "grad_norm": 1.3407938480377197, "learning_rate": 2.3529411764705885e-06, "loss": 0.103, "step": 92000 }, { "epoch": 24.533333333333335, "eval_cer": 34.91655969191271, "eval_loss": 1.1298171281814575, "eval_runtime": 89.034, "eval_samples_per_second": 1.112, "eval_steps_per_second": 1.112, "step": 92000 }, { "epoch": 24.8, "grad_norm": 11.514740943908691, "learning_rate": 2.2689075630252102e-06, "loss": 0.1052, "step": 93000 }, { "epoch": 24.8, "eval_cer": 34.65982028241335, "eval_loss": 1.1279983520507812, "eval_runtime": 91.3819, "eval_samples_per_second": 1.083, "eval_steps_per_second": 1.083, "step": 93000 }, { "epoch": 25.066666666666666, "grad_norm": 2.231095790863037, "learning_rate": 2.184873949579832e-06, "loss": 0.0968, "step": 94000 }, { "epoch": 25.066666666666666, "eval_cer": 36.071887034659824, "eval_loss": 1.2260563373565674, "eval_runtime": 88.8767, "eval_samples_per_second": 1.114, "eval_steps_per_second": 1.114, "step": 94000 }, { "epoch": 25.333333333333332, "grad_norm": 1.5772309303283691, "learning_rate": 2.100840336134454e-06, "loss": 0.0366, "step": 95000 }, { "epoch": 25.333333333333332, "eval_cer": 36.58536585365854, "eval_loss": 1.2595826387405396, "eval_runtime": 126.9284, "eval_samples_per_second": 0.78, "eval_steps_per_second": 0.78, "step": 95000 }, { "epoch": 25.6, "grad_norm": 1.9487134218215942, "learning_rate": 2.0168067226890756e-06, "loss": 0.0393, "step": 96000 }, { "epoch": 25.6, "eval_cer": 34.53145057766367, "eval_loss": 1.2439144849777222, "eval_runtime": 120.7517, "eval_samples_per_second": 0.82, "eval_steps_per_second": 0.82, "step": 96000 }, { "epoch": 25.866666666666667, "grad_norm": 1.347065806388855, "learning_rate": 1.932773109243698e-06, "loss": 0.0402, "step": 97000 }, { "epoch": 25.866666666666667, "eval_cer": 34.27471116816431, "eval_loss": 1.2496284246444702, "eval_runtime": 139.8733, "eval_samples_per_second": 0.708, "eval_steps_per_second": 0.708, "step": 97000 }, { "epoch": 26.133333333333333, "grad_norm": 0.21094612777233124, "learning_rate": 1.8487394957983196e-06, "loss": 0.0282, "step": 98000 }, { "epoch": 26.133333333333333, "eval_cer": 34.017971758664956, "eval_loss": 1.30524742603302, "eval_runtime": 138.4425, "eval_samples_per_second": 0.715, "eval_steps_per_second": 0.715, "step": 98000 }, { "epoch": 26.4, "grad_norm": 1.7374966144561768, "learning_rate": 1.7647058823529414e-06, "loss": 0.0141, "step": 99000 }, { "epoch": 26.4, "eval_cer": 36.58536585365854, "eval_loss": 1.3404479026794434, "eval_runtime": 139.2585, "eval_samples_per_second": 0.711, "eval_steps_per_second": 0.711, "step": 99000 }, { "epoch": 26.666666666666668, "grad_norm": 5.082799911499023, "learning_rate": 1.6806722689075632e-06, "loss": 0.0163, "step": 100000 }, { "epoch": 26.666666666666668, "eval_cer": 32.60590500641848, "eval_loss": 1.3241037130355835, "eval_runtime": 130.5198, "eval_samples_per_second": 0.759, "eval_steps_per_second": 0.759, "step": 100000 }, { "epoch": 26.933333333333334, "grad_norm": 2.0732219219207764, "learning_rate": 1.5966386554621848e-06, "loss": 0.0139, "step": 101000 }, { "epoch": 26.933333333333334, "eval_cer": 32.73427471116817, "eval_loss": 1.342888593673706, "eval_runtime": 139.288, "eval_samples_per_second": 0.711, "eval_steps_per_second": 0.711, "step": 101000 }, { "epoch": 27.2, "grad_norm": 0.7511962056159973, "learning_rate": 1.5126050420168068e-06, "loss": 0.0075, "step": 102000 }, { "epoch": 27.2, "eval_cer": 32.60590500641848, "eval_loss": 1.4031819105148315, "eval_runtime": 138.6922, "eval_samples_per_second": 0.714, "eval_steps_per_second": 0.714, "step": 102000 }, { "epoch": 27.466666666666665, "grad_norm": 0.33939579129219055, "learning_rate": 1.4285714285714286e-06, "loss": 0.0058, "step": 103000 }, { "epoch": 27.466666666666665, "eval_cer": 33.889602053915276, "eval_loss": 1.4162280559539795, "eval_runtime": 138.3665, "eval_samples_per_second": 0.715, "eval_steps_per_second": 0.715, "step": 103000 }, { "epoch": 27.733333333333334, "grad_norm": 1.3353774547576904, "learning_rate": 1.3445378151260504e-06, "loss": 0.0053, "step": 104000 }, { "epoch": 27.733333333333334, "eval_cer": 32.86264441591784, "eval_loss": 1.4276233911514282, "eval_runtime": 138.4317, "eval_samples_per_second": 0.715, "eval_steps_per_second": 0.715, "step": 104000 }, { "epoch": 28.0, "grad_norm": 4.673173904418945, "learning_rate": 1.2605042016806724e-06, "loss": 0.0058, "step": 105000 }, { "epoch": 28.0, "eval_cer": 33.889602053915276, "eval_loss": 1.4254465103149414, "eval_runtime": 126.82, "eval_samples_per_second": 0.781, "eval_steps_per_second": 0.781, "step": 105000 }, { "epoch": 28.266666666666666, "grad_norm": 0.5961702466011047, "learning_rate": 1.1764705882352942e-06, "loss": 0.0024, "step": 106000 }, { "epoch": 28.266666666666666, "eval_cer": 31.964056482670088, "eval_loss": 1.4501264095306396, "eval_runtime": 141.7081, "eval_samples_per_second": 0.699, "eval_steps_per_second": 0.699, "step": 106000 }, { "epoch": 28.533333333333335, "grad_norm": 0.3718901574611664, "learning_rate": 1.092436974789916e-06, "loss": 0.0025, "step": 107000 }, { "epoch": 28.533333333333335, "eval_cer": 33.632862644415916, "eval_loss": 1.456369400024414, "eval_runtime": 139.6234, "eval_samples_per_second": 0.709, "eval_steps_per_second": 0.709, "step": 107000 }, { "epoch": 28.8, "grad_norm": 1.3455713987350464, "learning_rate": 1.0084033613445378e-06, "loss": 0.0031, "step": 108000 }, { "epoch": 28.8, "eval_cer": 33.761232349165596, "eval_loss": 1.4892566204071045, "eval_runtime": 155.9343, "eval_samples_per_second": 0.635, "eval_steps_per_second": 0.635, "step": 108000 }, { "epoch": 29.066666666666666, "grad_norm": 0.1176782175898552, "learning_rate": 9.243697478991598e-07, "loss": 0.0025, "step": 109000 }, { "epoch": 29.066666666666666, "eval_cer": 33.24775353016688, "eval_loss": 1.5334933996200562, "eval_runtime": 148.9794, "eval_samples_per_second": 0.665, "eval_steps_per_second": 0.665, "step": 109000 }, { "epoch": 29.333333333333332, "grad_norm": 0.22500748932361603, "learning_rate": 8.403361344537816e-07, "loss": 0.0014, "step": 110000 }, { "epoch": 29.333333333333332, "eval_cer": 31.322207958921695, "eval_loss": 1.5306977033615112, "eval_runtime": 148.8071, "eval_samples_per_second": 0.665, "eval_steps_per_second": 0.665, "step": 110000 }, { "epoch": 29.6, "grad_norm": 13.704888343811035, "learning_rate": 7.563025210084034e-07, "loss": 0.0011, "step": 111000 }, { "epoch": 29.6, "eval_cer": 33.1193838254172, "eval_loss": 1.5167608261108398, "eval_runtime": 101.0691, "eval_samples_per_second": 0.98, "eval_steps_per_second": 0.98, "step": 111000 }, { "epoch": 29.866666666666667, "grad_norm": 0.059027571231126785, "learning_rate": 6.722689075630252e-07, "loss": 0.0011, "step": 112000 }, { "epoch": 29.866666666666667, "eval_cer": 32.47753530166881, "eval_loss": 1.5043797492980957, "eval_runtime": 98.5539, "eval_samples_per_second": 1.005, "eval_steps_per_second": 1.005, "step": 112000 }, { "epoch": 30.133333333333333, "grad_norm": 0.023644184693694115, "learning_rate": 5.882352941176471e-07, "loss": 0.0008, "step": 113000 }, { "epoch": 30.133333333333333, "eval_cer": 32.73427471116817, "eval_loss": 1.5240405797958374, "eval_runtime": 93.2907, "eval_samples_per_second": 1.061, "eval_steps_per_second": 1.061, "step": 113000 }, { "epoch": 30.4, "grad_norm": 0.0645926371216774, "learning_rate": 5.042016806722689e-07, "loss": 0.0009, "step": 114000 }, { "epoch": 30.4, "eval_cer": 32.09242618741977, "eval_loss": 1.5282586812973022, "eval_runtime": 81.359, "eval_samples_per_second": 1.217, "eval_steps_per_second": 1.217, "step": 114000 }, { "epoch": 30.666666666666668, "grad_norm": 0.04764328524470329, "learning_rate": 4.201680672268908e-07, "loss": 0.0006, "step": 115000 }, { "epoch": 30.666666666666668, "eval_cer": 31.964056482670088, "eval_loss": 1.5477633476257324, "eval_runtime": 79.1799, "eval_samples_per_second": 1.25, "eval_steps_per_second": 1.25, "step": 115000 }, { "epoch": 30.933333333333334, "grad_norm": 0.33984529972076416, "learning_rate": 3.361344537815126e-07, "loss": 0.0007, "step": 116000 }, { "epoch": 30.933333333333334, "eval_cer": 33.761232349165596, "eval_loss": 1.5459610223770142, "eval_runtime": 101.4106, "eval_samples_per_second": 0.976, "eval_steps_per_second": 0.976, "step": 116000 }, { "epoch": 31.2, "grad_norm": 0.028893066570162773, "learning_rate": 2.5210084033613445e-07, "loss": 0.0004, "step": 117000 }, { "epoch": 31.2, "eval_cer": 34.017971758664956, "eval_loss": 1.5544700622558594, "eval_runtime": 78.702, "eval_samples_per_second": 1.258, "eval_steps_per_second": 1.258, "step": 117000 }, { "epoch": 31.466666666666665, "grad_norm": 0.03760391101241112, "learning_rate": 1.680672268907563e-07, "loss": 0.0004, "step": 118000 }, { "epoch": 31.466666666666665, "eval_cer": 34.403080872913996, "eval_loss": 1.5584869384765625, "eval_runtime": 78.6792, "eval_samples_per_second": 1.258, "eval_steps_per_second": 1.258, "step": 118000 }, { "epoch": 31.733333333333334, "grad_norm": 0.02360822632908821, "learning_rate": 8.403361344537815e-08, "loss": 0.0003, "step": 119000 }, { "epoch": 31.733333333333334, "eval_cer": 34.017971758664956, "eval_loss": 1.5569974184036255, "eval_runtime": 78.2713, "eval_samples_per_second": 1.265, "eval_steps_per_second": 1.265, "step": 119000 }, { "epoch": 32.0, "grad_norm": 0.05067881569266319, "learning_rate": 0.0, "loss": 0.0003, "step": 120000 }, { "epoch": 32.0, "eval_cer": 34.017971758664956, "eval_loss": 1.5553821325302124, "eval_runtime": 79.0045, "eval_samples_per_second": 1.253, "eval_steps_per_second": 1.253, "step": 120000 }, { "epoch": 32.266666666666666, "grad_norm": 8.542917251586914, "learning_rate": 2.4528301886792453e-06, "loss": 0.2438, "step": 121000 }, { "epoch": 32.266666666666666, "eval_cer": 32.49097472924188, "eval_loss": 0.9014519453048706, "eval_runtime": 78.9647, "eval_samples_per_second": 1.254, "eval_steps_per_second": 1.254, "step": 121000 }, { "epoch": 32.53333333333333, "grad_norm": 10.492444038391113, "learning_rate": 2.389937106918239e-06, "loss": 0.2189, "step": 122000 }, { "epoch": 32.53333333333333, "eval_cer": 34.05535499398315, "eval_loss": 0.8799840211868286, "eval_runtime": 79.6839, "eval_samples_per_second": 1.242, "eval_steps_per_second": 1.242, "step": 122000 }, { "epoch": 32.8, "grad_norm": 1.4531430006027222, "learning_rate": 2.327044025157233e-06, "loss": 0.2178, "step": 123000 }, { "epoch": 32.8, "eval_cer": 28.51985559566787, "eval_loss": 0.8893880248069763, "eval_runtime": 79.9682, "eval_samples_per_second": 1.238, "eval_steps_per_second": 1.238, "step": 123000 }, { "epoch": 33.06666666666667, "grad_norm": 0.9759045243263245, "learning_rate": 2.2641509433962266e-06, "loss": 0.1867, "step": 124000 }, { "epoch": 33.06666666666667, "eval_cer": 33.33333333333333, "eval_loss": 0.9809114336967468, "eval_runtime": 79.1595, "eval_samples_per_second": 1.251, "eval_steps_per_second": 1.251, "step": 124000 }, { "epoch": 33.333333333333336, "grad_norm": 13.012406349182129, "learning_rate": 2.2012578616352204e-06, "loss": 0.0829, "step": 125000 }, { "epoch": 33.333333333333336, "eval_cer": 33.2129963898917, "eval_loss": 1.0518907308578491, "eval_runtime": 80.5248, "eval_samples_per_second": 1.229, "eval_steps_per_second": 1.229, "step": 125000 }, { "epoch": 33.6, "grad_norm": 7.532169342041016, "learning_rate": 2.138364779874214e-06, "loss": 0.0889, "step": 126000 }, { "epoch": 33.6, "eval_cer": 30.806257521058967, "eval_loss": 1.0699083805084229, "eval_runtime": 138.2949, "eval_samples_per_second": 0.716, "eval_steps_per_second": 0.716, "step": 126000 }, { "epoch": 33.86666666666667, "grad_norm": 8.014657020568848, "learning_rate": 2.075471698113208e-06, "loss": 0.086, "step": 127000 }, { "epoch": 33.86666666666667, "eval_cer": 33.33333333333333, "eval_loss": 1.0419602394104004, "eval_runtime": 130.1457, "eval_samples_per_second": 0.761, "eval_steps_per_second": 0.761, "step": 127000 }, { "epoch": 34.13333333333333, "grad_norm": 3.8977677822113037, "learning_rate": 2.0125786163522013e-06, "loss": 0.0633, "step": 128000 }, { "epoch": 34.13333333333333, "eval_cer": 31.889290012033694, "eval_loss": 1.1622824668884277, "eval_runtime": 130.9914, "eval_samples_per_second": 0.756, "eval_steps_per_second": 0.756, "step": 128000 }, { "epoch": 34.4, "grad_norm": 1.6470654010772705, "learning_rate": 1.949685534591195e-06, "loss": 0.0334, "step": 129000 }, { "epoch": 34.4, "eval_cer": 31.28760529482551, "eval_loss": 1.1540066003799438, "eval_runtime": 140.5397, "eval_samples_per_second": 0.704, "eval_steps_per_second": 0.704, "step": 129000 }, { "epoch": 34.666666666666664, "grad_norm": 2.1039841175079346, "learning_rate": 1.8867924528301889e-06, "loss": 0.0295, "step": 130000 }, { "epoch": 34.666666666666664, "eval_cer": 29.482551143200965, "eval_loss": 1.1704764366149902, "eval_runtime": 132.9282, "eval_samples_per_second": 0.745, "eval_steps_per_second": 0.745, "step": 130000 }, { "epoch": 34.93333333333333, "grad_norm": 7.380443096160889, "learning_rate": 1.8238993710691824e-06, "loss": 0.0311, "step": 131000 }, { "epoch": 34.93333333333333, "eval_cer": 30.445246690734056, "eval_loss": 1.169384241104126, "eval_runtime": 131.4972, "eval_samples_per_second": 0.753, "eval_steps_per_second": 0.753, "step": 131000 }, { "epoch": 35.2, "grad_norm": 0.5855227708816528, "learning_rate": 1.7610062893081762e-06, "loss": 0.0164, "step": 132000 }, { "epoch": 35.2, "eval_cer": 33.2129963898917, "eval_loss": 1.2629327774047852, "eval_runtime": 141.6079, "eval_samples_per_second": 0.699, "eval_steps_per_second": 0.699, "step": 132000 }, { "epoch": 35.46666666666667, "grad_norm": 1.5775542259216309, "learning_rate": 1.6981132075471698e-06, "loss": 0.0105, "step": 133000 }, { "epoch": 35.46666666666667, "eval_cer": 30.9265944645006, "eval_loss": 1.2349437475204468, "eval_runtime": 141.4731, "eval_samples_per_second": 0.7, "eval_steps_per_second": 0.7, "step": 133000 }, { "epoch": 35.733333333333334, "grad_norm": 1.080212950706482, "learning_rate": 1.6352201257861635e-06, "loss": 0.0123, "step": 134000 }, { "epoch": 35.733333333333334, "eval_cer": 33.69434416365824, "eval_loss": 1.2958662509918213, "eval_runtime": 132.1396, "eval_samples_per_second": 0.749, "eval_steps_per_second": 0.749, "step": 134000 }, { "epoch": 36.0, "grad_norm": 2.049687147140503, "learning_rate": 1.5723270440251573e-06, "loss": 0.0123, "step": 135000 }, { "epoch": 36.0, "eval_cer": 33.2129963898917, "eval_loss": 1.2999323606491089, "eval_runtime": 135.3553, "eval_samples_per_second": 0.731, "eval_steps_per_second": 0.731, "step": 135000 }, { "epoch": 36.266666666666666, "grad_norm": 0.15309026837348938, "learning_rate": 1.509433962264151e-06, "loss": 0.0054, "step": 136000 }, { "epoch": 36.266666666666666, "eval_cer": 34.05535499398315, "eval_loss": 1.3017274141311646, "eval_runtime": 140.5992, "eval_samples_per_second": 0.704, "eval_steps_per_second": 0.704, "step": 136000 }, { "epoch": 36.53333333333333, "grad_norm": 7.4319658279418945, "learning_rate": 1.4465408805031447e-06, "loss": 0.0046, "step": 137000 }, { "epoch": 36.53333333333333, "eval_cer": 32.851985559566785, "eval_loss": 1.3190721273422241, "eval_runtime": 139.5862, "eval_samples_per_second": 0.709, "eval_steps_per_second": 0.709, "step": 137000 }, { "epoch": 36.8, "grad_norm": 0.05639100819826126, "learning_rate": 1.3836477987421384e-06, "loss": 0.0048, "step": 138000 }, { "epoch": 36.8, "eval_cer": 33.935018050541515, "eval_loss": 1.312601089477539, "eval_runtime": 136.2621, "eval_samples_per_second": 0.727, "eval_steps_per_second": 0.727, "step": 138000 }, { "epoch": 37.06666666666667, "grad_norm": 0.4124259054660797, "learning_rate": 1.3207547169811322e-06, "loss": 0.0042, "step": 139000 }, { "epoch": 37.06666666666667, "eval_cer": 33.45367027677497, "eval_loss": 1.338242530822754, "eval_runtime": 141.1632, "eval_samples_per_second": 0.701, "eval_steps_per_second": 0.701, "step": 139000 }, { "epoch": 37.333333333333336, "grad_norm": 0.04568612948060036, "learning_rate": 1.257861635220126e-06, "loss": 0.0028, "step": 140000 }, { "epoch": 37.333333333333336, "eval_cer": 33.092659446450064, "eval_loss": 1.3590086698532104, "eval_runtime": 130.7233, "eval_samples_per_second": 0.757, "eval_steps_per_second": 0.757, "step": 140000 }, { "epoch": 37.6, "grad_norm": 6.521934509277344, "learning_rate": 1.1949685534591195e-06, "loss": 0.002, "step": 141000 }, { "epoch": 37.6, "eval_cer": 35.619735258724425, "eval_loss": 1.3925501108169556, "eval_runtime": 136.222, "eval_samples_per_second": 0.727, "eval_steps_per_second": 0.727, "step": 141000 }, { "epoch": 37.86666666666667, "grad_norm": 0.37722429633140564, "learning_rate": 1.1320754716981133e-06, "loss": 0.0025, "step": 142000 }, { "epoch": 37.86666666666667, "eval_cer": 33.2129963898917, "eval_loss": 1.3863495588302612, "eval_runtime": 142.1041, "eval_samples_per_second": 0.697, "eval_steps_per_second": 0.697, "step": 142000 }, { "epoch": 38.13333333333333, "grad_norm": 0.0507730133831501, "learning_rate": 1.069182389937107e-06, "loss": 0.0019, "step": 143000 }, { "epoch": 38.13333333333333, "eval_cer": 32.49097472924188, "eval_loss": 1.4475711584091187, "eval_runtime": 128.8649, "eval_samples_per_second": 0.768, "eval_steps_per_second": 0.768, "step": 143000 }, { "epoch": 38.4, "grad_norm": 0.029291188344359398, "learning_rate": 1.0062893081761007e-06, "loss": 0.0012, "step": 144000 }, { "epoch": 38.4, "eval_cer": 32.49097472924188, "eval_loss": 1.4133862257003784, "eval_runtime": 138.9534, "eval_samples_per_second": 0.712, "eval_steps_per_second": 0.712, "step": 144000 }, { "epoch": 38.666666666666664, "grad_norm": 0.029871659353375435, "learning_rate": 9.433962264150944e-07, "loss": 0.0015, "step": 145000 }, { "epoch": 38.666666666666664, "eval_cer": 32.61131167268351, "eval_loss": 1.4183237552642822, "eval_runtime": 141.5271, "eval_samples_per_second": 0.7, "eval_steps_per_second": 0.7, "step": 145000 }, { "epoch": 38.93333333333333, "grad_norm": 0.04213670641183853, "learning_rate": 8.805031446540881e-07, "loss": 0.0012, "step": 146000 }, { "epoch": 38.93333333333333, "eval_cer": 34.29602888086642, "eval_loss": 1.4451824426651, "eval_runtime": 138.8658, "eval_samples_per_second": 0.713, "eval_steps_per_second": 0.713, "step": 146000 }, { "epoch": 39.2, "grad_norm": 0.025760957971215248, "learning_rate": 8.176100628930818e-07, "loss": 0.0008, "step": 147000 }, { "epoch": 39.2, "eval_cer": 34.777376654632974, "eval_loss": 1.4834933280944824, "eval_runtime": 127.478, "eval_samples_per_second": 0.777, "eval_steps_per_second": 0.777, "step": 147000 }, { "epoch": 39.46666666666667, "grad_norm": 0.015352617017924786, "learning_rate": 7.547169811320755e-07, "loss": 0.0007, "step": 148000 }, { "epoch": 39.46666666666667, "eval_cer": 34.5367027677497, "eval_loss": 1.492653727531433, "eval_runtime": 138.6569, "eval_samples_per_second": 0.714, "eval_steps_per_second": 0.714, "step": 148000 }, { "epoch": 39.733333333333334, "grad_norm": 0.0862458199262619, "learning_rate": 6.918238993710692e-07, "loss": 0.0007, "step": 149000 }, { "epoch": 39.733333333333334, "eval_cer": 32.49097472924188, "eval_loss": 1.4610902070999146, "eval_runtime": 142.2105, "eval_samples_per_second": 0.696, "eval_steps_per_second": 0.696, "step": 149000 }, { "epoch": 40.0, "grad_norm": 0.03945042937994003, "learning_rate": 6.28930817610063e-07, "loss": 0.0009, "step": 150000 }, { "epoch": 40.0, "eval_cer": 32.61131167268351, "eval_loss": 1.513584017753601, "eval_runtime": 133.4522, "eval_samples_per_second": 0.742, "eval_steps_per_second": 0.742, "step": 150000 }, { "epoch": 40.266666666666666, "grad_norm": 0.037018537521362305, "learning_rate": 5.660377358490567e-07, "loss": 0.0003, "step": 151000 }, { "epoch": 40.266666666666666, "eval_cer": 34.777376654632974, "eval_loss": 1.5051733255386353, "eval_runtime": 152.3438, "eval_samples_per_second": 0.65, "eval_steps_per_second": 0.65, "step": 151000 }, { "epoch": 40.53333333333333, "grad_norm": 0.04961505904793739, "learning_rate": 5.031446540880503e-07, "loss": 0.0005, "step": 152000 }, { "epoch": 40.53333333333333, "eval_cer": 33.57400722021661, "eval_loss": 1.5090794563293457, "eval_runtime": 152.6527, "eval_samples_per_second": 0.649, "eval_steps_per_second": 0.649, "step": 152000 }, { "epoch": 40.8, "grad_norm": 0.043190281838178635, "learning_rate": 4.4025157232704405e-07, "loss": 0.0004, "step": 153000 }, { "epoch": 40.8, "eval_cer": 33.2129963898917, "eval_loss": 1.508406639099121, "eval_runtime": 153.8282, "eval_samples_per_second": 0.644, "eval_steps_per_second": 0.644, "step": 153000 }, { "epoch": 41.06666666666667, "grad_norm": 0.036041852086782455, "learning_rate": 3.773584905660378e-07, "loss": 0.0002, "step": 154000 }, { "epoch": 41.06666666666667, "eval_cer": 33.935018050541515, "eval_loss": 1.5361274480819702, "eval_runtime": 82.6142, "eval_samples_per_second": 1.198, "eval_steps_per_second": 1.198, "step": 154000 }, { "epoch": 41.333333333333336, "grad_norm": 0.01777110993862152, "learning_rate": 3.144654088050315e-07, "loss": 0.0002, "step": 155000 }, { "epoch": 41.333333333333336, "eval_cer": 33.092659446450064, "eval_loss": 1.5560057163238525, "eval_runtime": 83.0022, "eval_samples_per_second": 1.193, "eval_steps_per_second": 1.193, "step": 155000 }, { "epoch": 41.6, "grad_norm": 0.006947483401745558, "learning_rate": 2.5157232704402517e-07, "loss": 0.0001, "step": 156000 }, { "epoch": 41.6, "eval_cer": 33.2129963898917, "eval_loss": 1.5325310230255127, "eval_runtime": 82.2544, "eval_samples_per_second": 1.204, "eval_steps_per_second": 1.204, "step": 156000 }, { "epoch": 41.86666666666667, "grad_norm": 0.03959225118160248, "learning_rate": 1.886792452830189e-07, "loss": 0.0002, "step": 157000 }, { "epoch": 41.86666666666667, "eval_cer": 33.45367027677497, "eval_loss": 1.5379877090454102, "eval_runtime": 82.6778, "eval_samples_per_second": 1.197, "eval_steps_per_second": 1.197, "step": 157000 }, { "epoch": 42.13333333333333, "grad_norm": 0.17936809360980988, "learning_rate": 1.2578616352201258e-07, "loss": 0.0002, "step": 158000 }, { "epoch": 42.13333333333333, "eval_cer": 32.731648616125156, "eval_loss": 1.5386216640472412, "eval_runtime": 82.9423, "eval_samples_per_second": 1.194, "eval_steps_per_second": 1.194, "step": 158000 }, { "epoch": 42.4, "grad_norm": 0.3688109219074249, "learning_rate": 6.289308176100629e-08, "loss": 0.0001, "step": 159000 }, { "epoch": 42.4, "eval_cer": 32.12996389891697, "eval_loss": 1.5448355674743652, "eval_runtime": 82.8733, "eval_samples_per_second": 1.195, "eval_steps_per_second": 1.195, "step": 159000 }, { "epoch": 42.666666666666664, "grad_norm": 0.01078395452350378, "learning_rate": 0.0, "loss": 0.0001, "step": 160000 }, { "epoch": 42.666666666666664, "eval_cer": 32.37063778580024, "eval_loss": 1.5441501140594482, "eval_runtime": 82.7104, "eval_samples_per_second": 1.197, "eval_steps_per_second": 1.197, "step": 160000 }, { "epoch": 42.93333333333333, "grad_norm": 21.12993049621582, "learning_rate": 1.9597989949748746e-06, "loss": 0.2437, "step": 161000 }, { "epoch": 42.93333333333333, "eval_cer": 34.413965087281795, "eval_loss": 1.1097244024276733, "eval_runtime": 82.2475, "eval_samples_per_second": 1.204, "eval_steps_per_second": 1.204, "step": 161000 }, { "epoch": 43.2, "grad_norm": 24.733867645263672, "learning_rate": 1.9095477386934674e-06, "loss": 0.1948, "step": 162000 }, { "epoch": 43.2, "eval_cer": 33.042394014962596, "eval_loss": 1.0970935821533203, "eval_runtime": 82.1258, "eval_samples_per_second": 1.205, "eval_steps_per_second": 1.205, "step": 162000 }, { "epoch": 43.46666666666667, "grad_norm": 2.1681711673736572, "learning_rate": 1.8592964824120604e-06, "loss": 0.1795, "step": 163000 }, { "epoch": 43.46666666666667, "eval_cer": 34.1645885286783, "eval_loss": 1.1059696674346924, "eval_runtime": 82.8179, "eval_samples_per_second": 1.195, "eval_steps_per_second": 1.195, "step": 163000 }, { "epoch": 43.733333333333334, "grad_norm": 7.347228527069092, "learning_rate": 1.8090452261306535e-06, "loss": 0.1736, "step": 164000 }, { "epoch": 43.733333333333334, "eval_cer": 34.78802992518703, "eval_loss": 1.1568942070007324, "eval_runtime": 82.9427, "eval_samples_per_second": 1.194, "eval_steps_per_second": 1.194, "step": 164000 }, { "epoch": 44.0, "grad_norm": 16.711997985839844, "learning_rate": 1.7587939698492465e-06, "loss": 0.1728, "step": 165000 }, { "epoch": 44.0, "eval_cer": 32.04488778054863, "eval_loss": 1.1192028522491455, "eval_runtime": 82.551, "eval_samples_per_second": 1.199, "eval_steps_per_second": 1.199, "step": 165000 }, { "epoch": 44.266666666666666, "grad_norm": 0.13528181612491608, "learning_rate": 1.7085427135678393e-06, "loss": 0.0722, "step": 166000 }, { "epoch": 44.266666666666666, "eval_cer": 32.418952618453865, "eval_loss": 1.2699230909347534, "eval_runtime": 83.0431, "eval_samples_per_second": 1.192, "eval_steps_per_second": 1.192, "step": 166000 }, { "epoch": 44.53333333333333, "grad_norm": 0.9568387866020203, "learning_rate": 1.6582914572864323e-06, "loss": 0.0745, "step": 167000 }, { "epoch": 44.53333333333333, "eval_cer": 34.03990024937656, "eval_loss": 1.3056560754776, "eval_runtime": 82.9267, "eval_samples_per_second": 1.194, "eval_steps_per_second": 1.194, "step": 167000 }, { "epoch": 44.8, "grad_norm": 0.2640259563922882, "learning_rate": 1.6080402010050254e-06, "loss": 0.0739, "step": 168000 }, { "epoch": 44.8, "eval_cer": 35.41147132169576, "eval_loss": 1.3166236877441406, "eval_runtime": 82.7459, "eval_samples_per_second": 1.196, "eval_steps_per_second": 1.196, "step": 168000 }, { "epoch": 45.06666666666667, "grad_norm": 0.09736265242099762, "learning_rate": 1.5577889447236184e-06, "loss": 0.0579, "step": 169000 }, { "epoch": 45.06666666666667, "eval_cer": 35.785536159601, "eval_loss": 1.4574410915374756, "eval_runtime": 82.7625, "eval_samples_per_second": 1.196, "eval_steps_per_second": 1.196, "step": 169000 }, { "epoch": 45.333333333333336, "grad_norm": 9.942195892333984, "learning_rate": 1.507537688442211e-06, "loss": 0.0264, "step": 170000 }, { "epoch": 45.333333333333336, "eval_cer": 35.785536159601, "eval_loss": 1.5011882781982422, "eval_runtime": 82.3432, "eval_samples_per_second": 1.202, "eval_steps_per_second": 1.202, "step": 170000 }, { "epoch": 45.6, "grad_norm": 0.22312113642692566, "learning_rate": 1.457286432160804e-06, "loss": 0.029, "step": 171000 }, { "epoch": 45.6, "eval_cer": 33.66583541147132, "eval_loss": 1.4832149744033813, "eval_runtime": 82.3824, "eval_samples_per_second": 1.202, "eval_steps_per_second": 1.202, "step": 171000 }, { "epoch": 45.86666666666667, "grad_norm": 7.559528827667236, "learning_rate": 1.407035175879397e-06, "loss": 0.0264, "step": 172000 }, { "epoch": 45.86666666666667, "eval_cer": 35.28678304239401, "eval_loss": 1.5051957368850708, "eval_runtime": 83.0512, "eval_samples_per_second": 1.192, "eval_steps_per_second": 1.192, "step": 172000 }, { "epoch": 46.13333333333333, "grad_norm": 0.13890038430690765, "learning_rate": 1.35678391959799e-06, "loss": 0.0198, "step": 173000 }, { "epoch": 46.13333333333333, "eval_cer": 35.910224438902745, "eval_loss": 1.5969452857971191, "eval_runtime": 83.0972, "eval_samples_per_second": 1.191, "eval_steps_per_second": 1.191, "step": 173000 }, { "epoch": 46.4, "grad_norm": 23.76213264465332, "learning_rate": 1.3065326633165831e-06, "loss": 0.0093, "step": 174000 }, { "epoch": 46.4, "eval_cer": 35.16209476309227, "eval_loss": 1.5947421789169312, "eval_runtime": 82.6136, "eval_samples_per_second": 1.198, "eval_steps_per_second": 1.198, "step": 174000 }, { "epoch": 46.666666666666664, "grad_norm": 0.32037144899368286, "learning_rate": 1.256281407035176e-06, "loss": 0.0108, "step": 175000 }, { "epoch": 46.666666666666664, "eval_cer": 36.78304239401496, "eval_loss": 1.58470618724823, "eval_runtime": 82.3123, "eval_samples_per_second": 1.203, "eval_steps_per_second": 1.203, "step": 175000 }, { "epoch": 46.93333333333333, "grad_norm": 0.17757229506969452, "learning_rate": 1.206030150753769e-06, "loss": 0.0106, "step": 176000 }, { "epoch": 46.93333333333333, "eval_cer": 36.907730673316706, "eval_loss": 1.6137627363204956, "eval_runtime": 82.6988, "eval_samples_per_second": 1.197, "eval_steps_per_second": 1.197, "step": 176000 }, { "epoch": 47.2, "grad_norm": 1.0095301866531372, "learning_rate": 1.155778894472362e-06, "loss": 0.0059, "step": 177000 }, { "epoch": 47.2, "eval_cer": 36.408977556109726, "eval_loss": 1.6764711141586304, "eval_runtime": 82.559, "eval_samples_per_second": 1.199, "eval_steps_per_second": 1.199, "step": 177000 }, { "epoch": 47.46666666666667, "grad_norm": 0.2900920808315277, "learning_rate": 1.105527638190955e-06, "loss": 0.0048, "step": 178000 }, { "epoch": 47.46666666666667, "eval_cer": 34.66334164588529, "eval_loss": 1.62752366065979, "eval_runtime": 82.6072, "eval_samples_per_second": 1.198, "eval_steps_per_second": 1.198, "step": 178000 }, { "epoch": 47.733333333333334, "grad_norm": 1.6907705068588257, "learning_rate": 1.0552763819095479e-06, "loss": 0.0047, "step": 179000 }, { "epoch": 47.733333333333334, "eval_cer": 36.15960099750624, "eval_loss": 1.680855393409729, "eval_runtime": 82.3981, "eval_samples_per_second": 1.201, "eval_steps_per_second": 1.201, "step": 179000 }, { "epoch": 48.0, "grad_norm": 0.05522582679986954, "learning_rate": 1.0050251256281409e-06, "loss": 0.004, "step": 180000 }, { "epoch": 48.0, "eval_cer": 36.53366583541147, "eval_loss": 1.673848032951355, "eval_runtime": 83.5612, "eval_samples_per_second": 1.185, "eval_steps_per_second": 1.185, "step": 180000 }, { "epoch": 48.266666666666666, "grad_norm": 0.05876125767827034, "learning_rate": 9.547738693467337e-07, "loss": 0.0019, "step": 181000 }, { "epoch": 48.266666666666666, "eval_cer": 35.785536159601, "eval_loss": 1.7074730396270752, "eval_runtime": 82.6963, "eval_samples_per_second": 1.197, "eval_steps_per_second": 1.197, "step": 181000 }, { "epoch": 48.53333333333333, "grad_norm": 0.25979533791542053, "learning_rate": 9.045226130653267e-07, "loss": 0.0023, "step": 182000 }, { "epoch": 48.53333333333333, "eval_cer": 35.16209476309227, "eval_loss": 1.7071537971496582, "eval_runtime": 82.873, "eval_samples_per_second": 1.195, "eval_steps_per_second": 1.195, "step": 182000 }, { "epoch": 48.8, "grad_norm": 1.9857276678085327, "learning_rate": 8.542713567839197e-07, "loss": 0.0019, "step": 183000 }, { "epoch": 48.8, "eval_cer": 35.66084788029925, "eval_loss": 1.7293034791946411, "eval_runtime": 82.7894, "eval_samples_per_second": 1.196, "eval_steps_per_second": 1.196, "step": 183000 }, { "epoch": 49.06666666666667, "grad_norm": 0.021623745560646057, "learning_rate": 8.040201005025127e-07, "loss": 0.0019, "step": 184000 }, { "epoch": 49.06666666666667, "eval_cer": 36.53366583541147, "eval_loss": 1.7291345596313477, "eval_runtime": 82.8955, "eval_samples_per_second": 1.194, "eval_steps_per_second": 1.194, "step": 184000 }, { "epoch": 49.333333333333336, "grad_norm": 0.13192743062973022, "learning_rate": 7.537688442211055e-07, "loss": 0.0008, "step": 185000 }, { "epoch": 49.333333333333336, "eval_cer": 36.78304239401496, "eval_loss": 1.7502952814102173, "eval_runtime": 83.2448, "eval_samples_per_second": 1.189, "eval_steps_per_second": 1.189, "step": 185000 }, { "epoch": 49.6, "grad_norm": 0.07062412798404694, "learning_rate": 7.035175879396985e-07, "loss": 0.0009, "step": 186000 }, { "epoch": 49.6, "eval_cer": 36.53366583541147, "eval_loss": 1.72696852684021, "eval_runtime": 83.1626, "eval_samples_per_second": 1.19, "eval_steps_per_second": 1.19, "step": 186000 }, { "epoch": 49.86666666666667, "grad_norm": 0.038012657314538956, "learning_rate": 6.532663316582916e-07, "loss": 0.0009, "step": 187000 }, { "epoch": 49.86666666666667, "eval_cer": 36.53366583541147, "eval_loss": 1.7277522087097168, "eval_runtime": 83.1004, "eval_samples_per_second": 1.191, "eval_steps_per_second": 1.191, "step": 187000 }, { "epoch": 50.13333333333333, "grad_norm": 0.01662178337574005, "learning_rate": 6.030150753768845e-07, "loss": 0.001, "step": 188000 }, { "epoch": 50.13333333333333, "eval_cer": 34.66334164588529, "eval_loss": 1.7733581066131592, "eval_runtime": 82.9672, "eval_samples_per_second": 1.193, "eval_steps_per_second": 1.193, "step": 188000 }, { "epoch": 50.4, "grad_norm": 0.047121066600084305, "learning_rate": 5.527638190954775e-07, "loss": 0.0005, "step": 189000 }, { "epoch": 50.4, "eval_cer": 36.03491271820449, "eval_loss": 1.7523555755615234, "eval_runtime": 82.8793, "eval_samples_per_second": 1.195, "eval_steps_per_second": 1.195, "step": 189000 }, { "epoch": 50.666666666666664, "grad_norm": 0.020025352016091347, "learning_rate": 5.025125628140704e-07, "loss": 0.0008, "step": 190000 }, { "epoch": 50.666666666666664, "eval_cer": 34.66334164588529, "eval_loss": 1.7309192419052124, "eval_runtime": 82.6175, "eval_samples_per_second": 1.198, "eval_steps_per_second": 1.198, "step": 190000 }, { "epoch": 50.93333333333333, "grad_norm": 0.020201655104756355, "learning_rate": 4.5226130653266337e-07, "loss": 0.0005, "step": 191000 }, { "epoch": 50.93333333333333, "eval_cer": 36.28428927680798, "eval_loss": 1.7656340599060059, "eval_runtime": 82.9749, "eval_samples_per_second": 1.193, "eval_steps_per_second": 1.193, "step": 191000 }, { "epoch": 51.2, "grad_norm": 0.027762647718191147, "learning_rate": 4.0201005025125634e-07, "loss": 0.0003, "step": 192000 }, { "epoch": 51.2, "eval_cer": 36.15960099750624, "eval_loss": 1.7615617513656616, "eval_runtime": 82.7808, "eval_samples_per_second": 1.196, "eval_steps_per_second": 1.196, "step": 192000 }, { "epoch": 51.46666666666667, "grad_norm": 0.021721765398979187, "learning_rate": 3.5175879396984927e-07, "loss": 0.0003, "step": 193000 }, { "epoch": 51.46666666666667, "eval_cer": 35.16209476309227, "eval_loss": 1.772621989250183, "eval_runtime": 82.9625, "eval_samples_per_second": 1.193, "eval_steps_per_second": 1.193, "step": 193000 }, { "epoch": 51.733333333333334, "grad_norm": 0.021287057548761368, "learning_rate": 3.0150753768844224e-07, "loss": 0.0002, "step": 194000 }, { "epoch": 51.733333333333334, "eval_cer": 36.28428927680798, "eval_loss": 1.7833250761032104, "eval_runtime": 83.1006, "eval_samples_per_second": 1.191, "eval_steps_per_second": 1.191, "step": 194000 }, { "epoch": 52.0, "grad_norm": 0.020342178642749786, "learning_rate": 2.512562814070352e-07, "loss": 0.0004, "step": 195000 }, { "epoch": 52.0, "eval_cer": 34.66334164588529, "eval_loss": 1.7803997993469238, "eval_runtime": 82.7795, "eval_samples_per_second": 1.196, "eval_steps_per_second": 1.196, "step": 195000 }, { "epoch": 52.266666666666666, "grad_norm": 0.013502071611583233, "learning_rate": 2.0100502512562817e-07, "loss": 0.0001, "step": 196000 }, { "epoch": 52.266666666666666, "eval_cer": 34.413965087281795, "eval_loss": 1.786993145942688, "eval_runtime": 82.9528, "eval_samples_per_second": 1.193, "eval_steps_per_second": 1.193, "step": 196000 }, { "epoch": 52.53333333333333, "grad_norm": 0.013876430690288544, "learning_rate": 1.5075376884422112e-07, "loss": 0.0001, "step": 197000 }, { "epoch": 52.53333333333333, "eval_cer": 34.413965087281795, "eval_loss": 1.8022912740707397, "eval_runtime": 82.844, "eval_samples_per_second": 1.195, "eval_steps_per_second": 1.195, "step": 197000 }, { "epoch": 52.8, "grad_norm": 0.007446631323546171, "learning_rate": 1.0050251256281409e-07, "loss": 0.0002, "step": 198000 }, { "epoch": 52.8, "eval_cer": 34.28927680798005, "eval_loss": 1.8034993410110474, "eval_runtime": 82.9855, "eval_samples_per_second": 1.193, "eval_steps_per_second": 1.193, "step": 198000 }, { "epoch": 53.06666666666667, "grad_norm": 0.03002343513071537, "learning_rate": 5.025125628140704e-08, "loss": 0.0001, "step": 199000 }, { "epoch": 53.06666666666667, "eval_cer": 35.03740648379052, "eval_loss": 1.8082956075668335, "eval_runtime": 82.8861, "eval_samples_per_second": 1.194, "eval_steps_per_second": 1.194, "step": 199000 }, { "epoch": 53.333333333333336, "grad_norm": 0.005953139625489712, "learning_rate": 0.0, "loss": 0.0001, "step": 200000 }, { "epoch": 53.333333333333336, "eval_cer": 35.16209476309227, "eval_loss": 1.8082053661346436, "eval_runtime": 82.8236, "eval_samples_per_second": 1.195, "eval_steps_per_second": 1.195, "step": 200000 }, { "epoch": 53.6, "grad_norm": 30.62691879272461, "learning_rate": 1.6317991631799166e-06, "loss": 0.248, "step": 201000 }, { "epoch": 53.6, "eval_cer": 30.375000000000004, "eval_loss": 1.015176773071289, "eval_runtime": 81.9262, "eval_samples_per_second": 1.208, "eval_steps_per_second": 1.208, "step": 201000 }, { "epoch": 53.86666666666667, "grad_norm": 1.1325130462646484, "learning_rate": 1.589958158995816e-06, "loss": 0.2154, "step": 202000 }, { "epoch": 53.86666666666667, "eval_cer": 28.875, "eval_loss": 1.0113086700439453, "eval_runtime": 81.4097, "eval_samples_per_second": 1.216, "eval_steps_per_second": 1.216, "step": 202000 }, { "epoch": 54.13333333333333, "grad_norm": 7.334270000457764, "learning_rate": 1.5481171548117155e-06, "loss": 0.1798, "step": 203000 }, { "epoch": 54.13333333333333, "eval_cer": 31.0, "eval_loss": 1.1168731451034546, "eval_runtime": 81.9464, "eval_samples_per_second": 1.208, "eval_steps_per_second": 1.208, "step": 203000 }, { "epoch": 54.4, "grad_norm": 0.5677210092544556, "learning_rate": 1.5062761506276152e-06, "loss": 0.1294, "step": 204000 }, { "epoch": 54.4, "eval_cer": 30.5, "eval_loss": 1.128467082977295, "eval_runtime": 81.5886, "eval_samples_per_second": 1.213, "eval_steps_per_second": 1.213, "step": 204000 }, { "epoch": 54.666666666666664, "grad_norm": 11.697511672973633, "learning_rate": 1.4644351464435146e-06, "loss": 0.1363, "step": 205000 }, { "epoch": 54.666666666666664, "eval_cer": 32.25, "eval_loss": 1.1191054582595825, "eval_runtime": 81.6688, "eval_samples_per_second": 1.212, "eval_steps_per_second": 1.212, "step": 205000 }, { "epoch": 54.93333333333333, "grad_norm": 11.908825874328613, "learning_rate": 1.4225941422594145e-06, "loss": 0.1334, "step": 206000 }, { "epoch": 54.93333333333333, "eval_cer": 32.125, "eval_loss": 1.1015084981918335, "eval_runtime": 81.5921, "eval_samples_per_second": 1.213, "eval_steps_per_second": 1.213, "step": 206000 }, { "epoch": 55.2, "grad_norm": 1.5424853563308716, "learning_rate": 1.380753138075314e-06, "loss": 0.0702, "step": 207000 }, { "epoch": 55.2, "eval_cer": 32.125, "eval_loss": 1.2612977027893066, "eval_runtime": 82.3504, "eval_samples_per_second": 1.202, "eval_steps_per_second": 1.202, "step": 207000 }, { "epoch": 55.46666666666667, "grad_norm": 10.856619834899902, "learning_rate": 1.3389121338912134e-06, "loss": 0.0579, "step": 208000 }, { "epoch": 55.46666666666667, "eval_cer": 30.375000000000004, "eval_loss": 1.2345651388168335, "eval_runtime": 80.9654, "eval_samples_per_second": 1.223, "eval_steps_per_second": 1.223, "step": 208000 }, { "epoch": 55.733333333333334, "grad_norm": 16.02048683166504, "learning_rate": 1.297071129707113e-06, "loss": 0.0529, "step": 209000 }, { "epoch": 55.733333333333334, "eval_cer": 31.374999999999996, "eval_loss": 1.2531167268753052, "eval_runtime": 81.8525, "eval_samples_per_second": 1.209, "eval_steps_per_second": 1.209, "step": 209000 }, { "epoch": 56.0, "grad_norm": 2.857239246368408, "learning_rate": 1.2552301255230125e-06, "loss": 0.0555, "step": 210000 }, { "epoch": 56.0, "eval_cer": 30.375000000000004, "eval_loss": 1.2908639907836914, "eval_runtime": 80.9554, "eval_samples_per_second": 1.223, "eval_steps_per_second": 1.223, "step": 210000 }, { "epoch": 56.266666666666666, "grad_norm": 0.15558059513568878, "learning_rate": 1.2133891213389122e-06, "loss": 0.0197, "step": 211000 }, { "epoch": 56.266666666666666, "eval_cer": 30.375000000000004, "eval_loss": 1.3364766836166382, "eval_runtime": 82.074, "eval_samples_per_second": 1.206, "eval_steps_per_second": 1.206, "step": 211000 }, { "epoch": 56.53333333333333, "grad_norm": 0.6175013780593872, "learning_rate": 1.1715481171548119e-06, "loss": 0.0204, "step": 212000 }, { "epoch": 56.53333333333333, "eval_cer": 34.375, "eval_loss": 1.3453211784362793, "eval_runtime": 82.0799, "eval_samples_per_second": 1.206, "eval_steps_per_second": 1.206, "step": 212000 }, { "epoch": 56.8, "grad_norm": 0.8239704966545105, "learning_rate": 1.1297071129707113e-06, "loss": 0.0196, "step": 213000 }, { "epoch": 56.8, "eval_cer": 32.5, "eval_loss": 1.3590368032455444, "eval_runtime": 81.7135, "eval_samples_per_second": 1.212, "eval_steps_per_second": 1.212, "step": 213000 }, { "epoch": 57.06666666666667, "grad_norm": 0.026584114879369736, "learning_rate": 1.087866108786611e-06, "loss": 0.0191, "step": 214000 }, { "epoch": 57.06666666666667, "eval_cer": 31.5, "eval_loss": 1.3954163789749146, "eval_runtime": 81.1063, "eval_samples_per_second": 1.221, "eval_steps_per_second": 1.221, "step": 214000 }, { "epoch": 57.333333333333336, "grad_norm": 9.560175895690918, "learning_rate": 1.0460251046025104e-06, "loss": 0.0062, "step": 215000 }, { "epoch": 57.333333333333336, "eval_cer": 30.75, "eval_loss": 1.425083041191101, "eval_runtime": 81.957, "eval_samples_per_second": 1.208, "eval_steps_per_second": 1.208, "step": 215000 }, { "epoch": 57.6, "grad_norm": 0.09854646027088165, "learning_rate": 1.0041841004184101e-06, "loss": 0.0083, "step": 216000 }, { "epoch": 57.6, "eval_cer": 32.0, "eval_loss": 1.4339793920516968, "eval_runtime": 81.544, "eval_samples_per_second": 1.214, "eval_steps_per_second": 1.214, "step": 216000 }, { "epoch": 57.86666666666667, "grad_norm": 1.014176845550537, "learning_rate": 9.623430962343098e-07, "loss": 0.0086, "step": 217000 }, { "epoch": 57.86666666666667, "eval_cer": 31.5, "eval_loss": 1.4078923463821411, "eval_runtime": 81.621, "eval_samples_per_second": 1.213, "eval_steps_per_second": 1.213, "step": 217000 }, { "epoch": 58.13333333333333, "grad_norm": 0.11759959161281586, "learning_rate": 9.205020920502093e-07, "loss": 0.0069, "step": 218000 }, { "epoch": 58.13333333333333, "eval_cer": 31.75, "eval_loss": 1.4461549520492554, "eval_runtime": 81.585, "eval_samples_per_second": 1.213, "eval_steps_per_second": 1.213, "step": 218000 }, { "epoch": 58.4, "grad_norm": 0.05193065479397774, "learning_rate": 8.786610878661088e-07, "loss": 0.003, "step": 219000 }, { "epoch": 58.4, "eval_cer": 29.25, "eval_loss": 1.4369601011276245, "eval_runtime": 81.4708, "eval_samples_per_second": 1.215, "eval_steps_per_second": 1.215, "step": 219000 }, { "epoch": 58.666666666666664, "grad_norm": 0.04452740028500557, "learning_rate": 8.368200836820084e-07, "loss": 0.0036, "step": 220000 }, { "epoch": 58.666666666666664, "eval_cer": 30.375000000000004, "eval_loss": 1.4363255500793457, "eval_runtime": 81.968, "eval_samples_per_second": 1.208, "eval_steps_per_second": 1.208, "step": 220000 }, { "epoch": 58.93333333333333, "grad_norm": 0.09197155386209488, "learning_rate": 7.94979079497908e-07, "loss": 0.0034, "step": 221000 }, { "epoch": 58.93333333333333, "eval_cer": 28.625, "eval_loss": 1.4594810009002686, "eval_runtime": 81.2508, "eval_samples_per_second": 1.218, "eval_steps_per_second": 1.218, "step": 221000 }, { "epoch": 59.2, "grad_norm": 0.09820029139518738, "learning_rate": 7.531380753138076e-07, "loss": 0.0021, "step": 222000 }, { "epoch": 59.2, "eval_cer": 29.125, "eval_loss": 1.4578285217285156, "eval_runtime": 82.0546, "eval_samples_per_second": 1.207, "eval_steps_per_second": 1.207, "step": 222000 }, { "epoch": 59.46666666666667, "grad_norm": 0.0352126844227314, "learning_rate": 7.112970711297073e-07, "loss": 0.0017, "step": 223000 }, { "epoch": 59.46666666666667, "eval_cer": 28.999999999999996, "eval_loss": 1.4742412567138672, "eval_runtime": 81.9935, "eval_samples_per_second": 1.207, "eval_steps_per_second": 1.207, "step": 223000 }, { "epoch": 59.733333333333334, "grad_norm": 2.8213016986846924, "learning_rate": 6.694560669456067e-07, "loss": 0.0016, "step": 224000 }, { "epoch": 59.733333333333334, "eval_cer": 29.75, "eval_loss": 1.46147882938385, "eval_runtime": 81.9577, "eval_samples_per_second": 1.208, "eval_steps_per_second": 1.208, "step": 224000 }, { "epoch": 60.0, "grad_norm": 0.026645859703421593, "learning_rate": 6.276150627615063e-07, "loss": 0.0011, "step": 225000 }, { "epoch": 60.0, "eval_cer": 28.375, "eval_loss": 1.4601441621780396, "eval_runtime": 82.2098, "eval_samples_per_second": 1.204, "eval_steps_per_second": 1.204, "step": 225000 } ], "logging_steps": 1000, "max_steps": 240000, "num_input_tokens_seen": 0, "num_train_epochs": 64, "save_steps": 1000, "total_flos": 9.184321502281728e+20, "train_batch_size": 4, "trial_name": null, "trial_params": null }