| { | |
| "best_metric": 28.375, | |
| "best_model_checkpoint": "whisper-medium-clean/checkpoint-225000", | |
| "epoch": 60.0, | |
| "eval_steps": 1000, | |
| "global_step": 225000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 28.940319061279297, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 3.7819, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "eval_cer": 81.94444444444444, | |
| "eval_loss": 7.813055038452148, | |
| "eval_runtime": 6.8451, | |
| "eval_samples_per_second": 1.315, | |
| "eval_steps_per_second": 1.315, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 26.397560119628906, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 3.1011, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_cer": 81.94444444444444, | |
| "eval_loss": 6.869063377380371, | |
| "eval_runtime": 6.9299, | |
| "eval_samples_per_second": 1.299, | |
| "eval_steps_per_second": 1.299, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "grad_norm": 25.42325782775879, | |
| "learning_rate": 3e-06, | |
| "loss": 2.9982, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "eval_cer": 81.94444444444444, | |
| "eval_loss": 8.201728820800781, | |
| "eval_runtime": 6.9228, | |
| "eval_samples_per_second": 1.3, | |
| "eval_steps_per_second": 1.3, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 26.787145614624023, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 2.1424, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_cer": 80.55555555555556, | |
| "eval_loss": 6.396951675415039, | |
| "eval_runtime": 6.9264, | |
| "eval_samples_per_second": 1.299, | |
| "eval_steps_per_second": 1.299, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "grad_norm": 12.553820610046387, | |
| "learning_rate": 5e-06, | |
| "loss": 1.1901, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "eval_cer": 77.77777777777779, | |
| "eval_loss": 5.309788227081299, | |
| "eval_runtime": 6.8991, | |
| "eval_samples_per_second": 1.305, | |
| "eval_steps_per_second": 1.305, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 11.38632583618164, | |
| "learning_rate": 6e-06, | |
| "loss": 0.6653, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_cer": 77.77777777777779, | |
| "eval_loss": 4.889711380004883, | |
| "eval_runtime": 6.8936, | |
| "eval_samples_per_second": 1.306, | |
| "eval_steps_per_second": 1.306, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 17.5, | |
| "grad_norm": 1.9933379888534546, | |
| "learning_rate": 7e-06, | |
| "loss": 0.4194, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 17.5, | |
| "eval_cer": 79.16666666666666, | |
| "eval_loss": 4.77920389175415, | |
| "eval_runtime": 6.9368, | |
| "eval_samples_per_second": 1.297, | |
| "eval_steps_per_second": 1.297, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 1.9275708198547363, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.346, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_cer": 81.94444444444444, | |
| "eval_loss": 4.646539211273193, | |
| "eval_runtime": 7.0626, | |
| "eval_samples_per_second": 1.274, | |
| "eval_steps_per_second": 1.274, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 22.5, | |
| "grad_norm": 9.696667671203613, | |
| "learning_rate": 9e-06, | |
| "loss": 0.1948, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 22.5, | |
| "eval_cer": 83.33333333333334, | |
| "eval_loss": 4.1625494956970215, | |
| "eval_runtime": 6.9072, | |
| "eval_samples_per_second": 1.303, | |
| "eval_steps_per_second": 1.303, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "grad_norm": 4.2763776779174805, | |
| "learning_rate": 0.0, | |
| "loss": 0.0696, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_cer": 62.5, | |
| "eval_loss": 3.3797714710235596, | |
| "eval_runtime": 7.009, | |
| "eval_samples_per_second": 1.284, | |
| "eval_steps_per_second": 1.284, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.029333333333333333, | |
| "grad_norm": 30.060693740844727, | |
| "learning_rate": 9.88888888888889e-06, | |
| "loss": 2.9007, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.029333333333333333, | |
| "eval_cer": 74.27184466019418, | |
| "eval_loss": 2.2464828491210938, | |
| "eval_runtime": 77.163, | |
| "eval_samples_per_second": 1.283, | |
| "eval_steps_per_second": 1.283, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 23.69278907775879, | |
| "learning_rate": 9.777777777777779e-06, | |
| "loss": 2.3313, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "eval_cer": 73.05825242718447, | |
| "eval_loss": 1.8683823347091675, | |
| "eval_runtime": 77.5893, | |
| "eval_samples_per_second": 1.276, | |
| "eval_steps_per_second": 1.276, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.034666666666666665, | |
| "grad_norm": 22.04669761657715, | |
| "learning_rate": 9.666666666666667e-06, | |
| "loss": 2.1455, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.034666666666666665, | |
| "eval_cer": 65.77669902912622, | |
| "eval_loss": 1.8104327917099, | |
| "eval_runtime": 77.4412, | |
| "eval_samples_per_second": 1.278, | |
| "eval_steps_per_second": 1.278, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.037333333333333336, | |
| "grad_norm": 22.959644317626953, | |
| "learning_rate": 9.555555555555556e-06, | |
| "loss": 1.9559, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.037333333333333336, | |
| "eval_cer": 84.10194174757282, | |
| "eval_loss": 1.7080837488174438, | |
| "eval_runtime": 79.7762, | |
| "eval_samples_per_second": 1.241, | |
| "eval_steps_per_second": 1.241, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 20.132877349853516, | |
| "learning_rate": 9.444444444444445e-06, | |
| "loss": 1.7925, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_cer": 57.76699029126213, | |
| "eval_loss": 1.6740916967391968, | |
| "eval_runtime": 77.3782, | |
| "eval_samples_per_second": 1.279, | |
| "eval_steps_per_second": 1.279, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.042666666666666665, | |
| "grad_norm": 24.77916717529297, | |
| "learning_rate": 9.333333333333334e-06, | |
| "loss": 1.6834, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.042666666666666665, | |
| "eval_cer": 56.432038834951456, | |
| "eval_loss": 1.6661072969436646, | |
| "eval_runtime": 77.4224, | |
| "eval_samples_per_second": 1.279, | |
| "eval_steps_per_second": 1.279, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.04533333333333334, | |
| "grad_norm": 18.739564895629883, | |
| "learning_rate": 9.222222222222224e-06, | |
| "loss": 1.8451, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.04533333333333334, | |
| "eval_cer": 55.46116504854369, | |
| "eval_loss": 1.6888178586959839, | |
| "eval_runtime": 77.204, | |
| "eval_samples_per_second": 1.282, | |
| "eval_steps_per_second": 1.282, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.048, | |
| "grad_norm": 21.450254440307617, | |
| "learning_rate": 9.111111111111112e-06, | |
| "loss": 1.7342, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.048, | |
| "eval_cer": 58.37378640776699, | |
| "eval_loss": 1.6961427927017212, | |
| "eval_runtime": 76.8506, | |
| "eval_samples_per_second": 1.288, | |
| "eval_steps_per_second": 1.288, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.050666666666666665, | |
| "grad_norm": 21.943395614624023, | |
| "learning_rate": 9e-06, | |
| "loss": 1.6679, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.050666666666666665, | |
| "eval_cer": 57.16019417475729, | |
| "eval_loss": 1.7804681062698364, | |
| "eval_runtime": 77.6099, | |
| "eval_samples_per_second": 1.276, | |
| "eval_steps_per_second": 1.276, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.05333333333333334, | |
| "grad_norm": 22.061710357666016, | |
| "learning_rate": 8.888888888888888e-06, | |
| "loss": 1.609, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.05333333333333334, | |
| "eval_cer": 54.490291262135926, | |
| "eval_loss": 1.565760612487793, | |
| "eval_runtime": 77.5643, | |
| "eval_samples_per_second": 1.276, | |
| "eval_steps_per_second": 1.276, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.056, | |
| "grad_norm": 15.489553451538086, | |
| "learning_rate": 8.777777777777778e-06, | |
| "loss": 1.598, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.056, | |
| "eval_cer": 52.54854368932039, | |
| "eval_loss": 1.6144999265670776, | |
| "eval_runtime": 77.4908, | |
| "eval_samples_per_second": 1.278, | |
| "eval_steps_per_second": 1.278, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.058666666666666666, | |
| "grad_norm": 18.321216583251953, | |
| "learning_rate": 8.666666666666668e-06, | |
| "loss": 1.506, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.058666666666666666, | |
| "eval_cer": 55.94660194174757, | |
| "eval_loss": 1.6863118410110474, | |
| "eval_runtime": 76.3715, | |
| "eval_samples_per_second": 1.296, | |
| "eval_steps_per_second": 1.296, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.06133333333333333, | |
| "grad_norm": 27.31436538696289, | |
| "learning_rate": 8.555555555555556e-06, | |
| "loss": 1.6391, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.06133333333333333, | |
| "eval_cer": 55.582524271844655, | |
| "eval_loss": 1.6335813999176025, | |
| "eval_runtime": 77.1677, | |
| "eval_samples_per_second": 1.283, | |
| "eval_steps_per_second": 1.283, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 16.303030014038086, | |
| "learning_rate": 8.444444444444446e-06, | |
| "loss": 1.2887, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "eval_cer": 55.21844660194175, | |
| "eval_loss": 1.7295167446136475, | |
| "eval_runtime": 76.984, | |
| "eval_samples_per_second": 1.286, | |
| "eval_steps_per_second": 1.286, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.06666666666666667, | |
| "grad_norm": 15.233794212341309, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 1.5345, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.06666666666666667, | |
| "eval_cer": 55.582524271844655, | |
| "eval_loss": 1.6172842979431152, | |
| "eval_runtime": 77.6597, | |
| "eval_samples_per_second": 1.275, | |
| "eval_steps_per_second": 1.275, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.06933333333333333, | |
| "grad_norm": 23.4495906829834, | |
| "learning_rate": 8.222222222222222e-06, | |
| "loss": 1.5216, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.06933333333333333, | |
| "eval_cer": 55.33980582524271, | |
| "eval_loss": 1.6310383081436157, | |
| "eval_runtime": 77.6226, | |
| "eval_samples_per_second": 1.275, | |
| "eval_steps_per_second": 1.275, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.072, | |
| "grad_norm": 14.853434562683105, | |
| "learning_rate": 8.111111111111112e-06, | |
| "loss": 1.6175, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.072, | |
| "eval_cer": 52.54854368932039, | |
| "eval_loss": 1.6513257026672363, | |
| "eval_runtime": 77.2293, | |
| "eval_samples_per_second": 1.282, | |
| "eval_steps_per_second": 1.282, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.07466666666666667, | |
| "grad_norm": 25.87192153930664, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 1.5837, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.07466666666666667, | |
| "eval_cer": 53.7621359223301, | |
| "eval_loss": 1.6197609901428223, | |
| "eval_runtime": 77.8112, | |
| "eval_samples_per_second": 1.272, | |
| "eval_steps_per_second": 1.272, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.07733333333333334, | |
| "grad_norm": 23.910364151000977, | |
| "learning_rate": 7.88888888888889e-06, | |
| "loss": 1.4979, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.07733333333333334, | |
| "eval_cer": 51.334951456310684, | |
| "eval_loss": 1.6021751165390015, | |
| "eval_runtime": 77.6869, | |
| "eval_samples_per_second": 1.274, | |
| "eval_steps_per_second": 1.274, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 18.366762161254883, | |
| "learning_rate": 7.77777777777778e-06, | |
| "loss": 1.4745, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_cer": 52.79126213592234, | |
| "eval_loss": 1.5048097372055054, | |
| "eval_runtime": 78.2047, | |
| "eval_samples_per_second": 1.266, | |
| "eval_steps_per_second": 1.266, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.08266666666666667, | |
| "grad_norm": 18.047597885131836, | |
| "learning_rate": 7.666666666666667e-06, | |
| "loss": 1.5534, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.08266666666666667, | |
| "eval_cer": 52.79126213592234, | |
| "eval_loss": 1.4332234859466553, | |
| "eval_runtime": 77.7797, | |
| "eval_samples_per_second": 1.273, | |
| "eval_steps_per_second": 1.273, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.08533333333333333, | |
| "grad_norm": 18.885778427124023, | |
| "learning_rate": 7.555555555555556e-06, | |
| "loss": 1.3683, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.08533333333333333, | |
| "eval_cer": 50.8495145631068, | |
| "eval_loss": 1.466812252998352, | |
| "eval_runtime": 77.7517, | |
| "eval_samples_per_second": 1.273, | |
| "eval_steps_per_second": 1.273, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.088, | |
| "grad_norm": 17.74494743347168, | |
| "learning_rate": 7.444444444444445e-06, | |
| "loss": 1.5475, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.088, | |
| "eval_cer": 48.786407766990294, | |
| "eval_loss": 1.3928931951522827, | |
| "eval_runtime": 78.0245, | |
| "eval_samples_per_second": 1.269, | |
| "eval_steps_per_second": 1.269, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.09066666666666667, | |
| "grad_norm": 22.675891876220703, | |
| "learning_rate": 7.333333333333333e-06, | |
| "loss": 1.4288, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.09066666666666667, | |
| "eval_cer": 48.66504854368932, | |
| "eval_loss": 1.3858826160430908, | |
| "eval_runtime": 78.4821, | |
| "eval_samples_per_second": 1.261, | |
| "eval_steps_per_second": 1.261, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.09333333333333334, | |
| "grad_norm": 16.487394332885742, | |
| "learning_rate": 7.222222222222223e-06, | |
| "loss": 1.4127, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.09333333333333334, | |
| "eval_cer": 51.45631067961165, | |
| "eval_loss": 1.5098240375518799, | |
| "eval_runtime": 77.7435, | |
| "eval_samples_per_second": 1.273, | |
| "eval_steps_per_second": 1.273, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 26.096511840820312, | |
| "learning_rate": 7.111111111111112e-06, | |
| "loss": 1.3516, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "eval_cer": 52.54854368932039, | |
| "eval_loss": 1.582693338394165, | |
| "eval_runtime": 78.1295, | |
| "eval_samples_per_second": 1.267, | |
| "eval_steps_per_second": 1.267, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.09866666666666667, | |
| "grad_norm": 14.613516807556152, | |
| "learning_rate": 7e-06, | |
| "loss": 1.5494, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.09866666666666667, | |
| "eval_cer": 87.01456310679612, | |
| "eval_loss": 1.5777655839920044, | |
| "eval_runtime": 79.9586, | |
| "eval_samples_per_second": 1.238, | |
| "eval_steps_per_second": 1.238, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.10133333333333333, | |
| "grad_norm": 33.40306091308594, | |
| "learning_rate": 6.88888888888889e-06, | |
| "loss": 1.3882, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.10133333333333333, | |
| "eval_cer": 87.86407766990291, | |
| "eval_loss": 1.5355967283248901, | |
| "eval_runtime": 80.2797, | |
| "eval_samples_per_second": 1.233, | |
| "eval_steps_per_second": 1.233, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.104, | |
| "grad_norm": 22.47502899169922, | |
| "learning_rate": 6.777777777777779e-06, | |
| "loss": 1.388, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.104, | |
| "eval_cer": 52.42718446601942, | |
| "eval_loss": 1.5560169219970703, | |
| "eval_runtime": 77.0569, | |
| "eval_samples_per_second": 1.285, | |
| "eval_steps_per_second": 1.285, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.10666666666666667, | |
| "grad_norm": 14.884334564208984, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 1.3382, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.10666666666666667, | |
| "eval_cer": 50.60679611650486, | |
| "eval_loss": 1.4680137634277344, | |
| "eval_runtime": 77.1152, | |
| "eval_samples_per_second": 1.284, | |
| "eval_steps_per_second": 1.284, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.10933333333333334, | |
| "grad_norm": 25.081527709960938, | |
| "learning_rate": 6.555555555555556e-06, | |
| "loss": 1.2751, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.10933333333333334, | |
| "eval_cer": 51.69902912621359, | |
| "eval_loss": 1.4957218170166016, | |
| "eval_runtime": 77.3634, | |
| "eval_samples_per_second": 1.28, | |
| "eval_steps_per_second": 1.28, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.112, | |
| "grad_norm": 15.062318801879883, | |
| "learning_rate": 6.444444444444445e-06, | |
| "loss": 1.0807, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.112, | |
| "eval_cer": 51.94174757281553, | |
| "eval_loss": 1.5191024541854858, | |
| "eval_runtime": 77.1298, | |
| "eval_samples_per_second": 1.284, | |
| "eval_steps_per_second": 1.284, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.11466666666666667, | |
| "grad_norm": 17.136804580688477, | |
| "learning_rate": 6.333333333333333e-06, | |
| "loss": 1.2305, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.11466666666666667, | |
| "eval_cer": 50.8495145631068, | |
| "eval_loss": 1.4538663625717163, | |
| "eval_runtime": 77.2074, | |
| "eval_samples_per_second": 1.282, | |
| "eval_steps_per_second": 1.282, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.11733333333333333, | |
| "grad_norm": 20.760255813598633, | |
| "learning_rate": 6.222222222222223e-06, | |
| "loss": 1.201, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.11733333333333333, | |
| "eval_cer": 50.36407766990292, | |
| "eval_loss": 1.4003076553344727, | |
| "eval_runtime": 77.7921, | |
| "eval_samples_per_second": 1.273, | |
| "eval_steps_per_second": 1.273, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 15.381857872009277, | |
| "learning_rate": 6.111111111111112e-06, | |
| "loss": 1.2639, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_cer": 51.334951456310684, | |
| "eval_loss": 1.4118114709854126, | |
| "eval_runtime": 77.581, | |
| "eval_samples_per_second": 1.276, | |
| "eval_steps_per_second": 1.276, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.12266666666666666, | |
| "grad_norm": 15.08823013305664, | |
| "learning_rate": 6e-06, | |
| "loss": 1.2348, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.12266666666666666, | |
| "eval_cer": 51.334951456310684, | |
| "eval_loss": 1.3688950538635254, | |
| "eval_runtime": 78.0141, | |
| "eval_samples_per_second": 1.269, | |
| "eval_steps_per_second": 1.269, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.12533333333333332, | |
| "grad_norm": 13.176762580871582, | |
| "learning_rate": 5.88888888888889e-06, | |
| "loss": 1.1162, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.12533333333333332, | |
| "eval_cer": 69.05339805825243, | |
| "eval_loss": 1.4524732828140259, | |
| "eval_runtime": 80.7402, | |
| "eval_samples_per_second": 1.226, | |
| "eval_steps_per_second": 1.226, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 17.04905891418457, | |
| "learning_rate": 5.777777777777778e-06, | |
| "loss": 1.3585, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "eval_cer": 51.213592233009706, | |
| "eval_loss": 1.4150296449661255, | |
| "eval_runtime": 78.0509, | |
| "eval_samples_per_second": 1.268, | |
| "eval_steps_per_second": 1.268, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.13066666666666665, | |
| "grad_norm": 14.833456039428711, | |
| "learning_rate": 5.666666666666667e-06, | |
| "loss": 1.1588, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.13066666666666665, | |
| "eval_cer": 51.213592233009706, | |
| "eval_loss": 1.4175814390182495, | |
| "eval_runtime": 77.9995, | |
| "eval_samples_per_second": 1.269, | |
| "eval_steps_per_second": 1.269, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.13333333333333333, | |
| "grad_norm": 16.29265594482422, | |
| "learning_rate": 5.555555555555557e-06, | |
| "loss": 1.3108, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.13333333333333333, | |
| "eval_cer": 52.18446601941748, | |
| "eval_loss": 1.4835201501846313, | |
| "eval_runtime": 77.8901, | |
| "eval_samples_per_second": 1.271, | |
| "eval_steps_per_second": 1.271, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.136, | |
| "grad_norm": 24.089468002319336, | |
| "learning_rate": 5.444444444444445e-06, | |
| "loss": 1.2215, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.136, | |
| "eval_cer": 53.03398058252428, | |
| "eval_loss": 1.4001387357711792, | |
| "eval_runtime": 77.5673, | |
| "eval_samples_per_second": 1.276, | |
| "eval_steps_per_second": 1.276, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.13866666666666666, | |
| "grad_norm": 19.17269515991211, | |
| "learning_rate": 5.333333333333334e-06, | |
| "loss": 1.1983, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.13866666666666666, | |
| "eval_cer": 50.12135922330098, | |
| "eval_loss": 1.3796579837799072, | |
| "eval_runtime": 78.2009, | |
| "eval_samples_per_second": 1.266, | |
| "eval_steps_per_second": 1.266, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.14133333333333334, | |
| "grad_norm": 16.18842887878418, | |
| "learning_rate": 5.2222222222222226e-06, | |
| "loss": 1.1601, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.14133333333333334, | |
| "eval_cer": 52.79126213592234, | |
| "eval_loss": 1.3958033323287964, | |
| "eval_runtime": 78.062, | |
| "eval_samples_per_second": 1.268, | |
| "eval_steps_per_second": 1.268, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.144, | |
| "grad_norm": 18.497060775756836, | |
| "learning_rate": 5.1111111111111115e-06, | |
| "loss": 1.1819, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.144, | |
| "eval_cer": 51.94174757281553, | |
| "eval_loss": 1.3504077196121216, | |
| "eval_runtime": 78.1284, | |
| "eval_samples_per_second": 1.267, | |
| "eval_steps_per_second": 1.267, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.14666666666666667, | |
| "grad_norm": 18.69913673400879, | |
| "learning_rate": 5e-06, | |
| "loss": 1.1447, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.14666666666666667, | |
| "eval_cer": 50.970873786407765, | |
| "eval_loss": 1.3514248132705688, | |
| "eval_runtime": 77.9101, | |
| "eval_samples_per_second": 1.271, | |
| "eval_steps_per_second": 1.271, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.14933333333333335, | |
| "grad_norm": 17.90473747253418, | |
| "learning_rate": 4.888888888888889e-06, | |
| "loss": 1.1678, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.14933333333333335, | |
| "eval_cer": 51.334951456310684, | |
| "eval_loss": 1.2929948568344116, | |
| "eval_runtime": 78.5632, | |
| "eval_samples_per_second": 1.26, | |
| "eval_steps_per_second": 1.26, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.152, | |
| "grad_norm": 15.230451583862305, | |
| "learning_rate": 4.777777777777778e-06, | |
| "loss": 1.0188, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.152, | |
| "eval_cer": 53.15533980582524, | |
| "eval_loss": 1.2831701040267944, | |
| "eval_runtime": 78.5138, | |
| "eval_samples_per_second": 1.261, | |
| "eval_steps_per_second": 1.261, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.15466666666666667, | |
| "grad_norm": 29.844749450683594, | |
| "learning_rate": 4.666666666666667e-06, | |
| "loss": 1.1046, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.15466666666666667, | |
| "eval_cer": 54.36893203883495, | |
| "eval_loss": 1.3857146501541138, | |
| "eval_runtime": 78.0718, | |
| "eval_samples_per_second": 1.268, | |
| "eval_steps_per_second": 1.268, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.15733333333333333, | |
| "grad_norm": 16.595338821411133, | |
| "learning_rate": 4.555555555555556e-06, | |
| "loss": 1.0125, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.15733333333333333, | |
| "eval_cer": 51.45631067961165, | |
| "eval_loss": 1.3539527654647827, | |
| "eval_runtime": 77.8613, | |
| "eval_samples_per_second": 1.271, | |
| "eval_steps_per_second": 1.271, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 14.425292015075684, | |
| "learning_rate": 4.444444444444444e-06, | |
| "loss": 1.1293, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_cer": 52.9126213592233, | |
| "eval_loss": 1.3646886348724365, | |
| "eval_runtime": 77.96, | |
| "eval_samples_per_second": 1.27, | |
| "eval_steps_per_second": 1.27, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.16266666666666665, | |
| "grad_norm": 20.61813735961914, | |
| "learning_rate": 4.333333333333334e-06, | |
| "loss": 1.2416, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.16266666666666665, | |
| "eval_cer": 54.73300970873787, | |
| "eval_loss": 1.3387656211853027, | |
| "eval_runtime": 78.0058, | |
| "eval_samples_per_second": 1.269, | |
| "eval_steps_per_second": 1.269, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.16533333333333333, | |
| "grad_norm": 17.798131942749023, | |
| "learning_rate": 4.222222222222223e-06, | |
| "loss": 1.0653, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.16533333333333333, | |
| "eval_cer": 85.43689320388349, | |
| "eval_loss": 1.3320649862289429, | |
| "eval_runtime": 81.5533, | |
| "eval_samples_per_second": 1.214, | |
| "eval_steps_per_second": 1.214, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.168, | |
| "grad_norm": 19.9289608001709, | |
| "learning_rate": 4.111111111111111e-06, | |
| "loss": 1.2892, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.168, | |
| "eval_cer": 53.15533980582524, | |
| "eval_loss": 1.307315707206726, | |
| "eval_runtime": 78.4977, | |
| "eval_samples_per_second": 1.261, | |
| "eval_steps_per_second": 1.261, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.17066666666666666, | |
| "grad_norm": 15.202763557434082, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 1.2046, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.17066666666666666, | |
| "eval_cer": 53.640776699029125, | |
| "eval_loss": 1.3364214897155762, | |
| "eval_runtime": 78.8468, | |
| "eval_samples_per_second": 1.256, | |
| "eval_steps_per_second": 1.256, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.17333333333333334, | |
| "grad_norm": 15.811935424804688, | |
| "learning_rate": 3.88888888888889e-06, | |
| "loss": 1.0636, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.17333333333333334, | |
| "eval_cer": 54.97572815533981, | |
| "eval_loss": 1.3250948190689087, | |
| "eval_runtime": 78.9646, | |
| "eval_samples_per_second": 1.254, | |
| "eval_steps_per_second": 1.254, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.176, | |
| "grad_norm": 15.582385063171387, | |
| "learning_rate": 3.777777777777778e-06, | |
| "loss": 1.1544, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.176, | |
| "eval_cer": 54.490291262135926, | |
| "eval_loss": 1.2882179021835327, | |
| "eval_runtime": 78.5958, | |
| "eval_samples_per_second": 1.26, | |
| "eval_steps_per_second": 1.26, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.17866666666666667, | |
| "grad_norm": 14.485274314880371, | |
| "learning_rate": 3.6666666666666666e-06, | |
| "loss": 0.8095, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.17866666666666667, | |
| "eval_cer": 54.004854368932044, | |
| "eval_loss": 1.2490488290786743, | |
| "eval_runtime": 79.0105, | |
| "eval_samples_per_second": 1.253, | |
| "eval_steps_per_second": 1.253, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.18133333333333335, | |
| "grad_norm": 14.364302635192871, | |
| "learning_rate": 3.555555555555556e-06, | |
| "loss": 1.0163, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.18133333333333335, | |
| "eval_cer": 53.27669902912622, | |
| "eval_loss": 1.251212477684021, | |
| "eval_runtime": 78.8342, | |
| "eval_samples_per_second": 1.256, | |
| "eval_steps_per_second": 1.256, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.184, | |
| "grad_norm": 18.270353317260742, | |
| "learning_rate": 3.444444444444445e-06, | |
| "loss": 1.1736, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.184, | |
| "eval_cer": 52.06310679611651, | |
| "eval_loss": 1.2570871114730835, | |
| "eval_runtime": 78.3257, | |
| "eval_samples_per_second": 1.264, | |
| "eval_steps_per_second": 1.264, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.18666666666666668, | |
| "grad_norm": 15.880080223083496, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.9871, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.18666666666666668, | |
| "eval_cer": 51.577669902912625, | |
| "eval_loss": 1.249259352684021, | |
| "eval_runtime": 78.4177, | |
| "eval_samples_per_second": 1.262, | |
| "eval_steps_per_second": 1.262, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.18933333333333333, | |
| "grad_norm": 21.195627212524414, | |
| "learning_rate": 3.2222222222222227e-06, | |
| "loss": 1.0884, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.18933333333333333, | |
| "eval_cer": 52.42718446601942, | |
| "eval_loss": 1.227440595626831, | |
| "eval_runtime": 78.3084, | |
| "eval_samples_per_second": 1.264, | |
| "eval_steps_per_second": 1.264, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 23.771629333496094, | |
| "learning_rate": 3.1111111111111116e-06, | |
| "loss": 1.1232, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "eval_cer": 52.06310679611651, | |
| "eval_loss": 1.2512867450714111, | |
| "eval_runtime": 78.3655, | |
| "eval_samples_per_second": 1.263, | |
| "eval_steps_per_second": 1.263, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.19466666666666665, | |
| "grad_norm": 10.434704780578613, | |
| "learning_rate": 3e-06, | |
| "loss": 1.0498, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.19466666666666665, | |
| "eval_cer": 52.42718446601942, | |
| "eval_loss": 1.2518444061279297, | |
| "eval_runtime": 78.1699, | |
| "eval_samples_per_second": 1.266, | |
| "eval_steps_per_second": 1.266, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.19733333333333333, | |
| "grad_norm": 21.347713470458984, | |
| "learning_rate": 2.888888888888889e-06, | |
| "loss": 1.0718, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.19733333333333333, | |
| "eval_cer": 51.213592233009706, | |
| "eval_loss": 1.2372485399246216, | |
| "eval_runtime": 78.2758, | |
| "eval_samples_per_second": 1.265, | |
| "eval_steps_per_second": 1.265, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 16.782791137695312, | |
| "learning_rate": 2.7777777777777783e-06, | |
| "loss": 1.1631, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_cer": 50.8495145631068, | |
| "eval_loss": 1.2331024408340454, | |
| "eval_runtime": 78.246, | |
| "eval_samples_per_second": 1.265, | |
| "eval_steps_per_second": 1.265, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.20266666666666666, | |
| "grad_norm": 12.992459297180176, | |
| "learning_rate": 2.666666666666667e-06, | |
| "loss": 1.0654, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.20266666666666666, | |
| "eval_cer": 52.18446601941748, | |
| "eval_loss": 1.2558914422988892, | |
| "eval_runtime": 78.4164, | |
| "eval_samples_per_second": 1.262, | |
| "eval_steps_per_second": 1.262, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.20533333333333334, | |
| "grad_norm": 13.584222793579102, | |
| "learning_rate": 2.5555555555555557e-06, | |
| "loss": 0.9858, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.20533333333333334, | |
| "eval_cer": 53.7621359223301, | |
| "eval_loss": 1.2489798069000244, | |
| "eval_runtime": 78.2835, | |
| "eval_samples_per_second": 1.265, | |
| "eval_steps_per_second": 1.265, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.208, | |
| "grad_norm": 12.311653137207031, | |
| "learning_rate": 2.4444444444444447e-06, | |
| "loss": 1.155, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.208, | |
| "eval_cer": 52.18446601941748, | |
| "eval_loss": 1.2482852935791016, | |
| "eval_runtime": 77.9862, | |
| "eval_samples_per_second": 1.269, | |
| "eval_steps_per_second": 1.269, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.21066666666666667, | |
| "grad_norm": 14.416386604309082, | |
| "learning_rate": 2.3333333333333336e-06, | |
| "loss": 1.077, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.21066666666666667, | |
| "eval_cer": 51.94174757281553, | |
| "eval_loss": 1.2321821451187134, | |
| "eval_runtime": 78.3289, | |
| "eval_samples_per_second": 1.264, | |
| "eval_steps_per_second": 1.264, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.21333333333333335, | |
| "grad_norm": 20.134000778198242, | |
| "learning_rate": 2.222222222222222e-06, | |
| "loss": 0.9659, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.21333333333333335, | |
| "eval_cer": 52.18446601941748, | |
| "eval_loss": 1.2238733768463135, | |
| "eval_runtime": 78.379, | |
| "eval_samples_per_second": 1.263, | |
| "eval_steps_per_second": 1.263, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.216, | |
| "grad_norm": 16.14183807373047, | |
| "learning_rate": 2.1111111111111114e-06, | |
| "loss": 1.0858, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.216, | |
| "eval_cer": 53.51941747572816, | |
| "eval_loss": 1.2186323404312134, | |
| "eval_runtime": 78.305, | |
| "eval_samples_per_second": 1.264, | |
| "eval_steps_per_second": 1.264, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.21866666666666668, | |
| "grad_norm": 14.142354965209961, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 1.0114, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.21866666666666668, | |
| "eval_cer": 92.35436893203884, | |
| "eval_loss": 1.210742712020874, | |
| "eval_runtime": 81.017, | |
| "eval_samples_per_second": 1.222, | |
| "eval_steps_per_second": 1.222, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.22133333333333333, | |
| "grad_norm": 17.690200805664062, | |
| "learning_rate": 1.888888888888889e-06, | |
| "loss": 1.0711, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.22133333333333333, | |
| "eval_cer": 91.99029126213593, | |
| "eval_loss": 1.2100653648376465, | |
| "eval_runtime": 81.155, | |
| "eval_samples_per_second": 1.22, | |
| "eval_steps_per_second": 1.22, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "grad_norm": 12.702727317810059, | |
| "learning_rate": 1.777777777777778e-06, | |
| "loss": 1.0218, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "eval_cer": 53.03398058252428, | |
| "eval_loss": 1.197860598564148, | |
| "eval_runtime": 78.126, | |
| "eval_samples_per_second": 1.267, | |
| "eval_steps_per_second": 1.267, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.22666666666666666, | |
| "grad_norm": 12.827967643737793, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 0.9593, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.22666666666666666, | |
| "eval_cer": 52.54854368932039, | |
| "eval_loss": 1.1930850744247437, | |
| "eval_runtime": 78.0654, | |
| "eval_samples_per_second": 1.268, | |
| "eval_steps_per_second": 1.268, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.22933333333333333, | |
| "grad_norm": 16.459444046020508, | |
| "learning_rate": 1.5555555555555558e-06, | |
| "loss": 1.0961, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.22933333333333333, | |
| "eval_cer": 52.42718446601942, | |
| "eval_loss": 1.168959140777588, | |
| "eval_runtime": 78.3283, | |
| "eval_samples_per_second": 1.264, | |
| "eval_steps_per_second": 1.264, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.232, | |
| "grad_norm": 13.297350883483887, | |
| "learning_rate": 1.4444444444444445e-06, | |
| "loss": 0.8744, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.232, | |
| "eval_cer": 51.577669902912625, | |
| "eval_loss": 1.1725083589553833, | |
| "eval_runtime": 78.1274, | |
| "eval_samples_per_second": 1.267, | |
| "eval_steps_per_second": 1.267, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.23466666666666666, | |
| "grad_norm": 14.628539085388184, | |
| "learning_rate": 1.3333333333333334e-06, | |
| "loss": 1.0125, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.23466666666666666, | |
| "eval_cer": 53.398058252427184, | |
| "eval_loss": 1.1670215129852295, | |
| "eval_runtime": 78.3518, | |
| "eval_samples_per_second": 1.264, | |
| "eval_steps_per_second": 1.264, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.23733333333333334, | |
| "grad_norm": 17.61985206604004, | |
| "learning_rate": 1.2222222222222223e-06, | |
| "loss": 1.0657, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.23733333333333334, | |
| "eval_cer": 54.12621359223301, | |
| "eval_loss": 1.1755800247192383, | |
| "eval_runtime": 79.0194, | |
| "eval_samples_per_second": 1.253, | |
| "eval_steps_per_second": 1.253, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 13.798720359802246, | |
| "learning_rate": 1.111111111111111e-06, | |
| "loss": 1.049, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_cer": 53.883495145631066, | |
| "eval_loss": 1.1638038158416748, | |
| "eval_runtime": 78.5113, | |
| "eval_samples_per_second": 1.261, | |
| "eval_steps_per_second": 1.261, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.24266666666666667, | |
| "grad_norm": 23.81746482849121, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 1.0409, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.24266666666666667, | |
| "eval_cer": 53.883495145631066, | |
| "eval_loss": 1.1673129796981812, | |
| "eval_runtime": 78.4064, | |
| "eval_samples_per_second": 1.263, | |
| "eval_steps_per_second": 1.263, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.24533333333333332, | |
| "grad_norm": 10.653603553771973, | |
| "learning_rate": 8.88888888888889e-07, | |
| "loss": 1.0895, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.24533333333333332, | |
| "eval_cer": 90.53398058252428, | |
| "eval_loss": 1.1688292026519775, | |
| "eval_runtime": 80.6808, | |
| "eval_samples_per_second": 1.227, | |
| "eval_steps_per_second": 1.227, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.248, | |
| "grad_norm": 25.826753616333008, | |
| "learning_rate": 7.777777777777779e-07, | |
| "loss": 0.9301, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.248, | |
| "eval_cer": 89.32038834951457, | |
| "eval_loss": 1.1726011037826538, | |
| "eval_runtime": 80.7112, | |
| "eval_samples_per_second": 1.227, | |
| "eval_steps_per_second": 1.227, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.25066666666666665, | |
| "grad_norm": 18.675426483154297, | |
| "learning_rate": 6.666666666666667e-07, | |
| "loss": 0.9795, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.25066666666666665, | |
| "eval_cer": 89.44174757281553, | |
| "eval_loss": 1.1721538305282593, | |
| "eval_runtime": 80.2393, | |
| "eval_samples_per_second": 1.234, | |
| "eval_steps_per_second": 1.234, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.25333333333333335, | |
| "grad_norm": 19.179977416992188, | |
| "learning_rate": 5.555555555555555e-07, | |
| "loss": 0.9664, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.25333333333333335, | |
| "eval_cer": 89.32038834951457, | |
| "eval_loss": 1.1713266372680664, | |
| "eval_runtime": 80.8455, | |
| "eval_samples_per_second": 1.225, | |
| "eval_steps_per_second": 1.225, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 12.702851295471191, | |
| "learning_rate": 4.444444444444445e-07, | |
| "loss": 0.8043, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "eval_cer": 89.44174757281553, | |
| "eval_loss": 1.1796256303787231, | |
| "eval_runtime": 80.7217, | |
| "eval_samples_per_second": 1.226, | |
| "eval_steps_per_second": 1.226, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.25866666666666666, | |
| "grad_norm": 14.771796226501465, | |
| "learning_rate": 3.3333333333333335e-07, | |
| "loss": 1.0233, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.25866666666666666, | |
| "eval_cer": 89.44174757281553, | |
| "eval_loss": 1.183519959449768, | |
| "eval_runtime": 80.3959, | |
| "eval_samples_per_second": 1.231, | |
| "eval_steps_per_second": 1.231, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.2613333333333333, | |
| "grad_norm": 21.99308204650879, | |
| "learning_rate": 2.2222222222222224e-07, | |
| "loss": 0.9277, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.2613333333333333, | |
| "eval_cer": 89.32038834951457, | |
| "eval_loss": 1.1782771348953247, | |
| "eval_runtime": 81.6692, | |
| "eval_samples_per_second": 1.212, | |
| "eval_steps_per_second": 1.212, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.264, | |
| "grad_norm": 15.665560722351074, | |
| "learning_rate": 1.1111111111111112e-07, | |
| "loss": 1.029, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.264, | |
| "eval_cer": 89.32038834951457, | |
| "eval_loss": 1.1745351552963257, | |
| "eval_runtime": 80.7175, | |
| "eval_samples_per_second": 1.226, | |
| "eval_steps_per_second": 1.226, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.26666666666666666, | |
| "grad_norm": 16.09244728088379, | |
| "learning_rate": 0.0, | |
| "loss": 1.0365, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.26666666666666666, | |
| "eval_cer": 89.32038834951457, | |
| "eval_loss": 1.1732313632965088, | |
| "eval_runtime": 80.9658, | |
| "eval_samples_per_second": 1.223, | |
| "eval_steps_per_second": 1.223, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.2693333333333333, | |
| "grad_norm": 16.427200317382812, | |
| "learning_rate": 9.080808080808081e-06, | |
| "loss": 1.0599, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.2693333333333333, | |
| "eval_cer": 50.31525851197982, | |
| "eval_loss": 1.0901631116867065, | |
| "eval_runtime": 76.9254, | |
| "eval_samples_per_second": 1.287, | |
| "eval_steps_per_second": 1.287, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.272, | |
| "grad_norm": 23.313121795654297, | |
| "learning_rate": 9.070707070707072e-06, | |
| "loss": 1.0114, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.272, | |
| "eval_cer": 49.18032786885246, | |
| "eval_loss": 1.0451587438583374, | |
| "eval_runtime": 77.2747, | |
| "eval_samples_per_second": 1.281, | |
| "eval_steps_per_second": 1.281, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.27466666666666667, | |
| "grad_norm": 16.21845245361328, | |
| "learning_rate": 9.06060606060606e-06, | |
| "loss": 1.1568, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.27466666666666667, | |
| "eval_cer": 69.4829760403531, | |
| "eval_loss": 1.1285865306854248, | |
| "eval_runtime": 80.287, | |
| "eval_samples_per_second": 1.233, | |
| "eval_steps_per_second": 1.233, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.2773333333333333, | |
| "grad_norm": 21.60283660888672, | |
| "learning_rate": 9.050505050505052e-06, | |
| "loss": 1.1959, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.2773333333333333, | |
| "eval_cer": 53.84615384615385, | |
| "eval_loss": 1.1765520572662354, | |
| "eval_runtime": 78.8941, | |
| "eval_samples_per_second": 1.255, | |
| "eval_steps_per_second": 1.255, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 13.436543464660645, | |
| "learning_rate": 9.040404040404042e-06, | |
| "loss": 1.0342, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_cer": 52.96343001261034, | |
| "eval_loss": 1.1394500732421875, | |
| "eval_runtime": 78.5797, | |
| "eval_samples_per_second": 1.26, | |
| "eval_steps_per_second": 1.26, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.2826666666666667, | |
| "grad_norm": 15.90085220336914, | |
| "learning_rate": 9.030303030303031e-06, | |
| "loss": 1.0229, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.2826666666666667, | |
| "eval_cer": 54.09836065573771, | |
| "eval_loss": 1.1051841974258423, | |
| "eval_runtime": 78.2017, | |
| "eval_samples_per_second": 1.266, | |
| "eval_steps_per_second": 1.266, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.2853333333333333, | |
| "grad_norm": 17.880510330200195, | |
| "learning_rate": 9.020202020202021e-06, | |
| "loss": 1.0015, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.2853333333333333, | |
| "eval_cer": 59.2686002522068, | |
| "eval_loss": 1.1580164432525635, | |
| "eval_runtime": 78.8364, | |
| "eval_samples_per_second": 1.256, | |
| "eval_steps_per_second": 1.256, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "grad_norm": 16.91229248046875, | |
| "learning_rate": 9.010101010101012e-06, | |
| "loss": 1.1533, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "eval_cer": 56.49432534678437, | |
| "eval_loss": 1.168419599533081, | |
| "eval_runtime": 78.6616, | |
| "eval_samples_per_second": 1.259, | |
| "eval_steps_per_second": 1.259, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.2906666666666667, | |
| "grad_norm": 26.38800621032715, | |
| "learning_rate": 9e-06, | |
| "loss": 1.1976, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.2906666666666667, | |
| "eval_cer": 51.19798234552333, | |
| "eval_loss": 1.0737069845199585, | |
| "eval_runtime": 78.97, | |
| "eval_samples_per_second": 1.254, | |
| "eval_steps_per_second": 1.254, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.29333333333333333, | |
| "grad_norm": 11.324457168579102, | |
| "learning_rate": 8.98989898989899e-06, | |
| "loss": 1.0131, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.29333333333333333, | |
| "eval_cer": 51.45018915510718, | |
| "eval_loss": 1.1314114332199097, | |
| "eval_runtime": 78.2488, | |
| "eval_samples_per_second": 1.265, | |
| "eval_steps_per_second": 1.265, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.296, | |
| "grad_norm": 20.900928497314453, | |
| "learning_rate": 8.97979797979798e-06, | |
| "loss": 1.0431, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.296, | |
| "eval_cer": 54.09836065573771, | |
| "eval_loss": 1.2488269805908203, | |
| "eval_runtime": 78.5131, | |
| "eval_samples_per_second": 1.261, | |
| "eval_steps_per_second": 1.261, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.2986666666666667, | |
| "grad_norm": 26.65207862854004, | |
| "learning_rate": 8.969696969696971e-06, | |
| "loss": 1.1839, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.2986666666666667, | |
| "eval_cer": 52.459016393442624, | |
| "eval_loss": 1.1696010828018188, | |
| "eval_runtime": 78.4873, | |
| "eval_samples_per_second": 1.261, | |
| "eval_steps_per_second": 1.261, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.30133333333333334, | |
| "grad_norm": 14.266066551208496, | |
| "learning_rate": 8.95959595959596e-06, | |
| "loss": 1.0215, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.30133333333333334, | |
| "eval_cer": 51.32408575031526, | |
| "eval_loss": 1.2062351703643799, | |
| "eval_runtime": 78.0558, | |
| "eval_samples_per_second": 1.268, | |
| "eval_steps_per_second": 1.268, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.304, | |
| "grad_norm": 13.581376075744629, | |
| "learning_rate": 8.94949494949495e-06, | |
| "loss": 0.9763, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.304, | |
| "eval_cer": 51.19798234552333, | |
| "eval_loss": 1.1882985830307007, | |
| "eval_runtime": 78.2858, | |
| "eval_samples_per_second": 1.265, | |
| "eval_steps_per_second": 1.265, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.30666666666666664, | |
| "grad_norm": 15.974523544311523, | |
| "learning_rate": 8.93939393939394e-06, | |
| "loss": 1.1372, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.30666666666666664, | |
| "eval_cer": 48.675914249684745, | |
| "eval_loss": 1.1872459650039673, | |
| "eval_runtime": 77.7901, | |
| "eval_samples_per_second": 1.273, | |
| "eval_steps_per_second": 1.273, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.30933333333333335, | |
| "grad_norm": 16.938993453979492, | |
| "learning_rate": 8.92929292929293e-06, | |
| "loss": 1.1522, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.30933333333333335, | |
| "eval_cer": 47.91929382093317, | |
| "eval_loss": 1.1992875337600708, | |
| "eval_runtime": 78.9052, | |
| "eval_samples_per_second": 1.255, | |
| "eval_steps_per_second": 1.255, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.312, | |
| "grad_norm": 13.329106330871582, | |
| "learning_rate": 8.919191919191919e-06, | |
| "loss": 1.0531, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.312, | |
| "eval_cer": 49.43253467843632, | |
| "eval_loss": 1.1403378248214722, | |
| "eval_runtime": 78.4504, | |
| "eval_samples_per_second": 1.262, | |
| "eval_steps_per_second": 1.262, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.31466666666666665, | |
| "grad_norm": 19.03329086303711, | |
| "learning_rate": 8.90909090909091e-06, | |
| "loss": 0.9934, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.31466666666666665, | |
| "eval_cer": 49.68474148802018, | |
| "eval_loss": 1.1829012632369995, | |
| "eval_runtime": 78.8873, | |
| "eval_samples_per_second": 1.255, | |
| "eval_steps_per_second": 1.255, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.31733333333333336, | |
| "grad_norm": 13.096351623535156, | |
| "learning_rate": 8.8989898989899e-06, | |
| "loss": 0.9284, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.31733333333333336, | |
| "eval_cer": 48.9281210592686, | |
| "eval_loss": 1.1716654300689697, | |
| "eval_runtime": 78.6464, | |
| "eval_samples_per_second": 1.259, | |
| "eval_steps_per_second": 1.259, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 15.558133125305176, | |
| "learning_rate": 8.888888888888888e-06, | |
| "loss": 1.0007, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_cer": 50.945775535939475, | |
| "eval_loss": 1.1625868082046509, | |
| "eval_runtime": 78.2452, | |
| "eval_samples_per_second": 1.265, | |
| "eval_steps_per_second": 1.265, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.32266666666666666, | |
| "grad_norm": 13.843978881835938, | |
| "learning_rate": 8.87878787878788e-06, | |
| "loss": 1.2634, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.32266666666666666, | |
| "eval_cer": 49.68474148802018, | |
| "eval_loss": 1.131119966506958, | |
| "eval_runtime": 78.4445, | |
| "eval_samples_per_second": 1.262, | |
| "eval_steps_per_second": 1.262, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.3253333333333333, | |
| "grad_norm": 18.294511795043945, | |
| "learning_rate": 8.86868686868687e-06, | |
| "loss": 1.0039, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.3253333333333333, | |
| "eval_cer": 46.406052963430014, | |
| "eval_loss": 1.0547950267791748, | |
| "eval_runtime": 78.7958, | |
| "eval_samples_per_second": 1.256, | |
| "eval_steps_per_second": 1.256, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.328, | |
| "grad_norm": 16.219919204711914, | |
| "learning_rate": 8.85858585858586e-06, | |
| "loss": 0.9441, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.328, | |
| "eval_cer": 49.30643127364439, | |
| "eval_loss": 1.1434153318405151, | |
| "eval_runtime": 79.0104, | |
| "eval_samples_per_second": 1.253, | |
| "eval_steps_per_second": 1.253, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.33066666666666666, | |
| "grad_norm": 14.341035842895508, | |
| "learning_rate": 8.84848484848485e-06, | |
| "loss": 0.8696, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.33066666666666666, | |
| "eval_cer": 48.9281210592686, | |
| "eval_loss": 1.1886433362960815, | |
| "eval_runtime": 78.6773, | |
| "eval_samples_per_second": 1.258, | |
| "eval_steps_per_second": 1.258, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 22.90915870666504, | |
| "learning_rate": 8.83838383838384e-06, | |
| "loss": 0.8534, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "eval_cer": 48.80201765447667, | |
| "eval_loss": 1.1410605907440186, | |
| "eval_runtime": 79.4849, | |
| "eval_samples_per_second": 1.246, | |
| "eval_steps_per_second": 1.246, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.336, | |
| "grad_norm": 19.806961059570312, | |
| "learning_rate": 8.82828282828283e-06, | |
| "loss": 1.2614, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.336, | |
| "eval_cer": 46.910466582597735, | |
| "eval_loss": 0.9779375791549683, | |
| "eval_runtime": 79.2948, | |
| "eval_samples_per_second": 1.249, | |
| "eval_steps_per_second": 1.249, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.33866666666666667, | |
| "grad_norm": 17.70494270324707, | |
| "learning_rate": 8.818181818181819e-06, | |
| "loss": 0.8782, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.33866666666666667, | |
| "eval_cer": 51.576292559899116, | |
| "eval_loss": 1.040684700012207, | |
| "eval_runtime": 79.608, | |
| "eval_samples_per_second": 1.244, | |
| "eval_steps_per_second": 1.244, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.3413333333333333, | |
| "grad_norm": 20.204618453979492, | |
| "learning_rate": 8.808080808080809e-06, | |
| "loss": 0.8056, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.3413333333333333, | |
| "eval_cer": 49.18032786885246, | |
| "eval_loss": 1.0853091478347778, | |
| "eval_runtime": 79.6188, | |
| "eval_samples_per_second": 1.243, | |
| "eval_steps_per_second": 1.243, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.344, | |
| "grad_norm": 19.028974533081055, | |
| "learning_rate": 8.7979797979798e-06, | |
| "loss": 1.0951, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.344, | |
| "eval_cer": 51.071878940731395, | |
| "eval_loss": 1.1110036373138428, | |
| "eval_runtime": 79.392, | |
| "eval_samples_per_second": 1.247, | |
| "eval_steps_per_second": 1.247, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.3466666666666667, | |
| "grad_norm": 16.38770294189453, | |
| "learning_rate": 8.787878787878788e-06, | |
| "loss": 0.8958, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.3466666666666667, | |
| "eval_cer": 50.31525851197982, | |
| "eval_loss": 1.1046932935714722, | |
| "eval_runtime": 79.913, | |
| "eval_samples_per_second": 1.239, | |
| "eval_steps_per_second": 1.239, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.37333333333333335, | |
| "grad_norm": 11.964598655700684, | |
| "learning_rate": 3.157894736842105e-06, | |
| "loss": 0.9507, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.37333333333333335, | |
| "eval_cer": 46.84014869888476, | |
| "eval_loss": 1.150152325630188, | |
| "eval_runtime": 78.5836, | |
| "eval_samples_per_second": 1.26, | |
| "eval_steps_per_second": 1.26, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 14.202909469604492, | |
| "learning_rate": 2.631578947368421e-06, | |
| "loss": 0.9197, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_cer": 45.72490706319702, | |
| "eval_loss": 1.0882861614227295, | |
| "eval_runtime": 79.236, | |
| "eval_samples_per_second": 1.249, | |
| "eval_steps_per_second": 1.249, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.4266666666666667, | |
| "grad_norm": 8.897924423217773, | |
| "learning_rate": 2.105263157894737e-06, | |
| "loss": 0.9193, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.4266666666666667, | |
| "eval_cer": 45.84882280049566, | |
| "eval_loss": 1.081554651260376, | |
| "eval_runtime": 78.458, | |
| "eval_samples_per_second": 1.262, | |
| "eval_steps_per_second": 1.262, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.4533333333333333, | |
| "grad_norm": 16.955913543701172, | |
| "learning_rate": 1.5789473684210526e-06, | |
| "loss": 0.8502, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.4533333333333333, | |
| "eval_cer": 47.21189591078067, | |
| "eval_loss": 1.0790964365005493, | |
| "eval_runtime": 78.3325, | |
| "eval_samples_per_second": 1.264, | |
| "eval_steps_per_second": 1.264, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 11.926959991455078, | |
| "learning_rate": 1.0526315789473685e-06, | |
| "loss": 0.8387, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_cer": 45.353159851301115, | |
| "eval_loss": 1.054158091545105, | |
| "eval_runtime": 79.2789, | |
| "eval_samples_per_second": 1.249, | |
| "eval_steps_per_second": 1.249, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.5066666666666667, | |
| "grad_norm": 9.69470500946045, | |
| "learning_rate": 5.263157894736843e-07, | |
| "loss": 0.8734, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.5066666666666667, | |
| "eval_cer": 45.9727385377943, | |
| "eval_loss": 1.0481570959091187, | |
| "eval_runtime": 79.0464, | |
| "eval_samples_per_second": 1.252, | |
| "eval_steps_per_second": 1.252, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.5333333333333333, | |
| "grad_norm": 21.345584869384766, | |
| "learning_rate": 0.0, | |
| "loss": 0.8679, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.5333333333333333, | |
| "eval_cer": 45.9727385377943, | |
| "eval_loss": 1.0438498258590698, | |
| "eval_runtime": 79.1208, | |
| "eval_samples_per_second": 1.251, | |
| "eval_steps_per_second": 1.251, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 8.957441329956055, | |
| "learning_rate": 9.637046307884857e-06, | |
| "loss": 0.8834, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_cer": 45.393858477970625, | |
| "eval_loss": 1.0715968608856201, | |
| "eval_runtime": 77.3709, | |
| "eval_samples_per_second": 1.28, | |
| "eval_steps_per_second": 1.28, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.0666666666666667, | |
| "grad_norm": 8.574357986450195, | |
| "learning_rate": 9.51188986232791e-06, | |
| "loss": 0.757, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.0666666666666667, | |
| "eval_cer": 42.72363150867824, | |
| "eval_loss": 1.043562650680542, | |
| "eval_runtime": 78.9351, | |
| "eval_samples_per_second": 1.254, | |
| "eval_steps_per_second": 1.254, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 10.907554626464844, | |
| "learning_rate": 9.386733416770964e-06, | |
| "loss": 0.6228, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "eval_cer": 41.388518024032045, | |
| "eval_loss": 0.9789133667945862, | |
| "eval_runtime": 83.325, | |
| "eval_samples_per_second": 1.188, | |
| "eval_steps_per_second": 1.188, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 15.36947250366211, | |
| "learning_rate": 9.261576971214017e-06, | |
| "loss": 0.594, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_cer": 35.647530040053404, | |
| "eval_loss": 0.7933751940727234, | |
| "eval_runtime": 79.3561, | |
| "eval_samples_per_second": 1.248, | |
| "eval_steps_per_second": 1.248, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.8666666666666667, | |
| "grad_norm": 6.123430252075195, | |
| "learning_rate": 9.136420525657072e-06, | |
| "loss": 0.5759, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.8666666666666667, | |
| "eval_cer": 36.715620827770366, | |
| "eval_loss": 0.8272244334220886, | |
| "eval_runtime": 79.0213, | |
| "eval_samples_per_second": 1.253, | |
| "eval_steps_per_second": 1.253, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.1333333333333333, | |
| "grad_norm": 9.646613121032715, | |
| "learning_rate": 9.011264080100126e-06, | |
| "loss": 0.4532, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.1333333333333333, | |
| "eval_cer": 36.18157543391188, | |
| "eval_loss": 0.810710608959198, | |
| "eval_runtime": 83.7244, | |
| "eval_samples_per_second": 1.182, | |
| "eval_steps_per_second": 1.182, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 11.642197608947754, | |
| "learning_rate": 8.88610763454318e-06, | |
| "loss": 0.3469, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "eval_cer": 32.97730307076102, | |
| "eval_loss": 0.7934185266494751, | |
| "eval_runtime": 78.596, | |
| "eval_samples_per_second": 1.26, | |
| "eval_steps_per_second": 1.26, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 6.712106227874756, | |
| "learning_rate": 8.760951188986233e-06, | |
| "loss": 0.3617, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "eval_cer": 36.9826435246996, | |
| "eval_loss": 0.8574942350387573, | |
| "eval_runtime": 93.5531, | |
| "eval_samples_per_second": 1.058, | |
| "eval_steps_per_second": 1.058, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.9333333333333336, | |
| "grad_norm": 8.81134033203125, | |
| "learning_rate": 8.635794743429288e-06, | |
| "loss": 0.3462, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.9333333333333336, | |
| "eval_cer": 35.11348464619493, | |
| "eval_loss": 0.8061275482177734, | |
| "eval_runtime": 78.1131, | |
| "eval_samples_per_second": 1.267, | |
| "eval_steps_per_second": 1.267, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "grad_norm": 7.097293376922607, | |
| "learning_rate": 8.510638297872341e-06, | |
| "loss": 0.2267, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "eval_cer": 35.51401869158878, | |
| "eval_loss": 0.8908804059028625, | |
| "eval_runtime": 89.1656, | |
| "eval_samples_per_second": 1.11, | |
| "eval_steps_per_second": 1.11, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 3.466666666666667, | |
| "grad_norm": 5.570465564727783, | |
| "learning_rate": 8.385481852315395e-06, | |
| "loss": 0.1918, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 3.466666666666667, | |
| "eval_cer": 40.053404539385845, | |
| "eval_loss": 0.896449625492096, | |
| "eval_runtime": 97.4805, | |
| "eval_samples_per_second": 1.016, | |
| "eval_steps_per_second": 1.016, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 3.7333333333333334, | |
| "grad_norm": 13.11853313446045, | |
| "learning_rate": 8.260325406758448e-06, | |
| "loss": 0.1983, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 3.7333333333333334, | |
| "eval_cer": 32.176234979973295, | |
| "eval_loss": 0.8650264739990234, | |
| "eval_runtime": 78.8752, | |
| "eval_samples_per_second": 1.255, | |
| "eval_steps_per_second": 1.255, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 8.057831764221191, | |
| "learning_rate": 8.135168961201503e-06, | |
| "loss": 0.2064, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_cer": 34.31241655540721, | |
| "eval_loss": 0.8947641253471375, | |
| "eval_runtime": 101.8113, | |
| "eval_samples_per_second": 0.972, | |
| "eval_steps_per_second": 0.972, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 4.266666666666667, | |
| "grad_norm": 6.645699977874756, | |
| "learning_rate": 8.010012515644557e-06, | |
| "loss": 0.1008, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 4.266666666666667, | |
| "eval_cer": 33.64485981308411, | |
| "eval_loss": 0.9120751023292542, | |
| "eval_runtime": 95.4926, | |
| "eval_samples_per_second": 1.037, | |
| "eval_steps_per_second": 1.037, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 4.533333333333333, | |
| "grad_norm": 10.748971939086914, | |
| "learning_rate": 7.88485607008761e-06, | |
| "loss": 0.1036, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 4.533333333333333, | |
| "eval_cer": 40.32042723631509, | |
| "eval_loss": 1.0235034227371216, | |
| "eval_runtime": 110.9242, | |
| "eval_samples_per_second": 0.893, | |
| "eval_steps_per_second": 0.893, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "grad_norm": 3.087092638015747, | |
| "learning_rate": 7.759699624530665e-06, | |
| "loss": 0.1069, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "eval_cer": 35.51401869158878, | |
| "eval_loss": 1.0253996849060059, | |
| "eval_runtime": 101.8801, | |
| "eval_samples_per_second": 0.972, | |
| "eval_steps_per_second": 0.972, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 5.066666666666666, | |
| "grad_norm": 18.12947654724121, | |
| "learning_rate": 7.634543178973719e-06, | |
| "loss": 0.6101, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 5.066666666666666, | |
| "eval_cer": 40.693196405648266, | |
| "eval_loss": 0.8904989957809448, | |
| "eval_runtime": 110.361, | |
| "eval_samples_per_second": 0.897, | |
| "eval_steps_per_second": 0.897, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 5.333333333333333, | |
| "grad_norm": 9.397184371948242, | |
| "learning_rate": 7.509386733416771e-06, | |
| "loss": 0.5802, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 5.333333333333333, | |
| "eval_cer": 34.981905910735826, | |
| "eval_loss": 0.9554204344749451, | |
| "eval_runtime": 114.8149, | |
| "eval_samples_per_second": 0.862, | |
| "eval_steps_per_second": 0.862, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "grad_norm": 10.324832916259766, | |
| "learning_rate": 7.384230287859825e-06, | |
| "loss": 0.5739, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "eval_cer": 34.981905910735826, | |
| "eval_loss": 0.9567387104034424, | |
| "eval_runtime": 105.411, | |
| "eval_samples_per_second": 0.939, | |
| "eval_steps_per_second": 0.939, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 5.866666666666667, | |
| "grad_norm": 29.525453567504883, | |
| "learning_rate": 7.25907384230288e-06, | |
| "loss": 0.5524, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 5.866666666666667, | |
| "eval_cer": 34.016887816646566, | |
| "eval_loss": 1.0537254810333252, | |
| "eval_runtime": 121.4049, | |
| "eval_samples_per_second": 0.815, | |
| "eval_steps_per_second": 0.815, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 6.133333333333334, | |
| "grad_norm": 12.727578163146973, | |
| "learning_rate": 7.133917396745933e-06, | |
| "loss": 0.4384, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 6.133333333333334, | |
| "eval_cer": 36.55006031363088, | |
| "eval_loss": 1.0797538757324219, | |
| "eval_runtime": 120.6543, | |
| "eval_samples_per_second": 0.821, | |
| "eval_steps_per_second": 0.821, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "grad_norm": 16.795650482177734, | |
| "learning_rate": 7.008760951188987e-06, | |
| "loss": 0.3206, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "eval_cer": 34.499396863691196, | |
| "eval_loss": 0.9962962865829468, | |
| "eval_runtime": 121.1564, | |
| "eval_samples_per_second": 0.817, | |
| "eval_steps_per_second": 0.817, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 6.666666666666667, | |
| "grad_norm": 9.25767707824707, | |
| "learning_rate": 6.883604505632041e-06, | |
| "loss": 0.3354, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 6.666666666666667, | |
| "eval_cer": 36.55006031363088, | |
| "eval_loss": 1.092819333076477, | |
| "eval_runtime": 113.9745, | |
| "eval_samples_per_second": 0.869, | |
| "eval_steps_per_second": 0.869, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 6.933333333333334, | |
| "grad_norm": 10.387906074523926, | |
| "learning_rate": 6.758448060075094e-06, | |
| "loss": 0.3382, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 6.933333333333334, | |
| "eval_cer": 33.8962605548854, | |
| "eval_loss": 1.0058472156524658, | |
| "eval_runtime": 95.2523, | |
| "eval_samples_per_second": 1.039, | |
| "eval_steps_per_second": 1.039, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "grad_norm": 6.268089294433594, | |
| "learning_rate": 6.633291614518148e-06, | |
| "loss": 0.2082, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "eval_cer": 35.10253317249698, | |
| "eval_loss": 1.116202712059021, | |
| "eval_runtime": 98.445, | |
| "eval_samples_per_second": 1.006, | |
| "eval_steps_per_second": 1.006, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 7.466666666666667, | |
| "grad_norm": 3.8901703357696533, | |
| "learning_rate": 6.508135168961201e-06, | |
| "loss": 0.1632, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 7.466666666666667, | |
| "eval_cer": 32.68998793727383, | |
| "eval_loss": 1.1056886911392212, | |
| "eval_runtime": 125.755, | |
| "eval_samples_per_second": 0.787, | |
| "eval_steps_per_second": 0.787, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 7.733333333333333, | |
| "grad_norm": 3.7998549938201904, | |
| "learning_rate": 6.382978723404256e-06, | |
| "loss": 0.1736, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 7.733333333333333, | |
| "eval_cer": 34.37876960193004, | |
| "eval_loss": 1.151659607887268, | |
| "eval_runtime": 123.6571, | |
| "eval_samples_per_second": 0.801, | |
| "eval_steps_per_second": 0.801, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 5.119089126586914, | |
| "learning_rate": 6.25782227784731e-06, | |
| "loss": 0.1724, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_cer": 36.67068757539204, | |
| "eval_loss": 1.1902227401733398, | |
| "eval_runtime": 123.7933, | |
| "eval_samples_per_second": 0.8, | |
| "eval_steps_per_second": 0.8, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 8.266666666666667, | |
| "grad_norm": 6.640472888946533, | |
| "learning_rate": 6.132665832290363e-06, | |
| "loss": 0.0758, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 8.266666666666667, | |
| "eval_cer": 35.826296743063935, | |
| "eval_loss": 1.2155195474624634, | |
| "eval_runtime": 109.4112, | |
| "eval_samples_per_second": 0.905, | |
| "eval_steps_per_second": 0.905, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 8.533333333333333, | |
| "grad_norm": 9.896190643310547, | |
| "learning_rate": 6.0075093867334175e-06, | |
| "loss": 0.0769, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 8.533333333333333, | |
| "eval_cer": 34.981905910735826, | |
| "eval_loss": 1.3153955936431885, | |
| "eval_runtime": 143.3287, | |
| "eval_samples_per_second": 0.691, | |
| "eval_steps_per_second": 0.691, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "grad_norm": 5.391891956329346, | |
| "learning_rate": 5.882352941176471e-06, | |
| "loss": 0.0815, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "eval_cer": 35.94692400482509, | |
| "eval_loss": 1.218248963356018, | |
| "eval_runtime": 102.2091, | |
| "eval_samples_per_second": 0.969, | |
| "eval_steps_per_second": 0.969, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 9.066666666666666, | |
| "grad_norm": 1.8143051862716675, | |
| "learning_rate": 5.757196495619524e-06, | |
| "loss": 0.0698, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 9.066666666666666, | |
| "eval_cer": 35.94692400482509, | |
| "eval_loss": 1.2726112604141235, | |
| "eval_runtime": 126.9759, | |
| "eval_samples_per_second": 0.78, | |
| "eval_steps_per_second": 0.78, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 9.333333333333334, | |
| "grad_norm": 1.9652811288833618, | |
| "learning_rate": 5.632040050062579e-06, | |
| "loss": 0.0389, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 9.333333333333334, | |
| "eval_cer": 33.41375150784077, | |
| "eval_loss": 1.3238201141357422, | |
| "eval_runtime": 118.6725, | |
| "eval_samples_per_second": 0.834, | |
| "eval_steps_per_second": 0.834, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "grad_norm": 5.921022891998291, | |
| "learning_rate": 5.506883604505633e-06, | |
| "loss": 0.0397, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "eval_cer": 37.273823884197824, | |
| "eval_loss": 1.3997122049331665, | |
| "eval_runtime": 118.4204, | |
| "eval_samples_per_second": 0.836, | |
| "eval_steps_per_second": 0.836, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 9.866666666666667, | |
| "grad_norm": 4.224556922912598, | |
| "learning_rate": 5.381727158948686e-06, | |
| "loss": 0.0385, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 9.866666666666667, | |
| "eval_cer": 37.15319662243667, | |
| "eval_loss": 1.3291655778884888, | |
| "eval_runtime": 125.4751, | |
| "eval_samples_per_second": 0.789, | |
| "eval_steps_per_second": 0.789, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 10.133333333333333, | |
| "grad_norm": 2.9837727546691895, | |
| "learning_rate": 5.25657071339174e-06, | |
| "loss": 0.0298, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 10.133333333333333, | |
| "eval_cer": 33.65500603136309, | |
| "eval_loss": 1.3235493898391724, | |
| "eval_runtime": 119.5982, | |
| "eval_samples_per_second": 0.828, | |
| "eval_steps_per_second": 0.828, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 10.4, | |
| "grad_norm": 8.313368797302246, | |
| "learning_rate": 5.131414267834794e-06, | |
| "loss": 0.0215, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 10.4, | |
| "eval_cer": 34.25814234016888, | |
| "eval_loss": 1.3103588819503784, | |
| "eval_runtime": 129.8548, | |
| "eval_samples_per_second": 0.762, | |
| "eval_steps_per_second": 0.762, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 10.666666666666666, | |
| "grad_norm": 4.944197177886963, | |
| "learning_rate": 5.006257822277848e-06, | |
| "loss": 0.0237, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 10.666666666666666, | |
| "eval_cer": 37.635705669481304, | |
| "eval_loss": 1.4052081108093262, | |
| "eval_runtime": 128.7688, | |
| "eval_samples_per_second": 0.769, | |
| "eval_steps_per_second": 0.769, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 10.933333333333334, | |
| "grad_norm": 2.447826623916626, | |
| "learning_rate": 4.881101376720902e-06, | |
| "loss": 0.022, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 10.933333333333334, | |
| "eval_cer": 34.016887816646566, | |
| "eval_loss": 1.3398523330688477, | |
| "eval_runtime": 128.1594, | |
| "eval_samples_per_second": 0.772, | |
| "eval_steps_per_second": 0.772, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 11.2, | |
| "grad_norm": 4.827037334442139, | |
| "learning_rate": 4.755944931163955e-06, | |
| "loss": 0.0157, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 11.2, | |
| "eval_cer": 35.10253317249698, | |
| "eval_loss": 1.4211913347244263, | |
| "eval_runtime": 147.5015, | |
| "eval_samples_per_second": 0.671, | |
| "eval_steps_per_second": 0.671, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 11.466666666666667, | |
| "grad_norm": 4.327616214752197, | |
| "learning_rate": 4.630788485607009e-06, | |
| "loss": 0.0134, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 11.466666666666667, | |
| "eval_cer": 37.997587454764776, | |
| "eval_loss": 1.4274876117706299, | |
| "eval_runtime": 104.6876, | |
| "eval_samples_per_second": 0.946, | |
| "eval_steps_per_second": 0.946, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 11.733333333333333, | |
| "grad_norm": 1.2661657333374023, | |
| "learning_rate": 4.505632040050063e-06, | |
| "loss": 0.0147, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 11.733333333333333, | |
| "eval_cer": 34.74065138721351, | |
| "eval_loss": 1.4563192129135132, | |
| "eval_runtime": 127.7157, | |
| "eval_samples_per_second": 0.775, | |
| "eval_steps_per_second": 0.775, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 1.5173884630203247, | |
| "learning_rate": 4.380475594493116e-06, | |
| "loss": 0.0144, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_cer": 33.0518697225573, | |
| "eval_loss": 1.374932050704956, | |
| "eval_runtime": 117.8977, | |
| "eval_samples_per_second": 0.84, | |
| "eval_steps_per_second": 0.84, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 12.266666666666667, | |
| "grad_norm": 3.4065911769866943, | |
| "learning_rate": 4.255319148936171e-06, | |
| "loss": 0.0091, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 12.266666666666667, | |
| "eval_cer": 33.775633293124244, | |
| "eval_loss": 1.4892818927764893, | |
| "eval_runtime": 127.9403, | |
| "eval_samples_per_second": 0.774, | |
| "eval_steps_per_second": 0.774, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 12.533333333333333, | |
| "grad_norm": 0.3180766701698303, | |
| "learning_rate": 4.130162703379224e-06, | |
| "loss": 0.0088, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 12.533333333333333, | |
| "eval_cer": 37.03256936067551, | |
| "eval_loss": 1.4363093376159668, | |
| "eval_runtime": 125.1911, | |
| "eval_samples_per_second": 0.791, | |
| "eval_steps_per_second": 0.791, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 12.8, | |
| "grad_norm": 1.2687398195266724, | |
| "learning_rate": 4.005006257822278e-06, | |
| "loss": 0.0093, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 12.8, | |
| "eval_cer": 37.51507840772014, | |
| "eval_loss": 1.5040009021759033, | |
| "eval_runtime": 128.1399, | |
| "eval_samples_per_second": 0.773, | |
| "eval_steps_per_second": 0.773, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 13.066666666666666, | |
| "grad_norm": 0.379712849855423, | |
| "learning_rate": 3.879849812265333e-06, | |
| "loss": 0.0087, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 13.066666666666666, | |
| "eval_cer": 35.46441495778046, | |
| "eval_loss": 1.4787448644638062, | |
| "eval_runtime": 141.6422, | |
| "eval_samples_per_second": 0.699, | |
| "eval_steps_per_second": 0.699, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 13.333333333333334, | |
| "grad_norm": 0.10000209510326385, | |
| "learning_rate": 3.7546933667083856e-06, | |
| "loss": 0.0066, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 13.333333333333334, | |
| "eval_cer": 35.343787696019305, | |
| "eval_loss": 1.4243420362472534, | |
| "eval_runtime": 136.9845, | |
| "eval_samples_per_second": 0.723, | |
| "eval_steps_per_second": 0.723, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 13.6, | |
| "grad_norm": 9.813934326171875, | |
| "learning_rate": 3.62953692115144e-06, | |
| "loss": 0.0063, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 13.6, | |
| "eval_cer": 32.810615199034984, | |
| "eval_loss": 1.4343503713607788, | |
| "eval_runtime": 132.5107, | |
| "eval_samples_per_second": 0.747, | |
| "eval_steps_per_second": 0.747, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 13.866666666666667, | |
| "grad_norm": 2.027151107788086, | |
| "learning_rate": 3.5043804755944933e-06, | |
| "loss": 0.0071, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 13.866666666666667, | |
| "eval_cer": 33.0518697225573, | |
| "eval_loss": 1.5221620798110962, | |
| "eval_runtime": 127.4995, | |
| "eval_samples_per_second": 0.776, | |
| "eval_steps_per_second": 0.776, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 14.133333333333333, | |
| "grad_norm": 0.10521159321069717, | |
| "learning_rate": 3.379224030037547e-06, | |
| "loss": 0.0054, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 14.133333333333333, | |
| "eval_cer": 32.93124246079614, | |
| "eval_loss": 1.5020145177841187, | |
| "eval_runtime": 135.115, | |
| "eval_samples_per_second": 0.733, | |
| "eval_steps_per_second": 0.733, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 14.4, | |
| "grad_norm": 2.590341567993164, | |
| "learning_rate": 3.2540675844806006e-06, | |
| "loss": 0.0047, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 14.4, | |
| "eval_cer": 32.56936067551266, | |
| "eval_loss": 1.494498610496521, | |
| "eval_runtime": 149.7186, | |
| "eval_samples_per_second": 0.661, | |
| "eval_steps_per_second": 0.661, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 14.666666666666666, | |
| "grad_norm": 0.30251413583755493, | |
| "learning_rate": 3.128911138923655e-06, | |
| "loss": 0.0048, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 14.666666666666666, | |
| "eval_cer": 34.25814234016888, | |
| "eval_loss": 1.53855562210083, | |
| "eval_runtime": 150.0723, | |
| "eval_samples_per_second": 0.66, | |
| "eval_steps_per_second": 0.66, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 14.933333333333334, | |
| "grad_norm": 0.06477013975381851, | |
| "learning_rate": 3.0037546933667087e-06, | |
| "loss": 0.0054, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 14.933333333333334, | |
| "eval_cer": 34.37876960193004, | |
| "eval_loss": 1.538918137550354, | |
| "eval_runtime": 95.0361, | |
| "eval_samples_per_second": 1.042, | |
| "eval_steps_per_second": 1.042, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 15.2, | |
| "grad_norm": 20.067230224609375, | |
| "learning_rate": 2.878598247809762e-06, | |
| "loss": 0.0038, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 15.2, | |
| "eval_cer": 32.93124246079614, | |
| "eval_loss": 1.4607055187225342, | |
| "eval_runtime": 87.2648, | |
| "eval_samples_per_second": 1.134, | |
| "eval_steps_per_second": 1.134, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 15.466666666666667, | |
| "grad_norm": 0.16094616055488586, | |
| "learning_rate": 2.7534418022528165e-06, | |
| "loss": 0.0031, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 15.466666666666667, | |
| "eval_cer": 32.810615199034984, | |
| "eval_loss": 1.5141421556472778, | |
| "eval_runtime": 94.6547, | |
| "eval_samples_per_second": 1.046, | |
| "eval_steps_per_second": 1.046, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 15.733333333333333, | |
| "grad_norm": 0.37339481711387634, | |
| "learning_rate": 2.62828535669587e-06, | |
| "loss": 0.0032, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 15.733333333333333, | |
| "eval_cer": 38.96260554885404, | |
| "eval_loss": 1.6072230339050293, | |
| "eval_runtime": 96.1338, | |
| "eval_samples_per_second": 1.03, | |
| "eval_steps_per_second": 1.03, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 0.0806913673877716, | |
| "learning_rate": 2.503128911138924e-06, | |
| "loss": 0.0027, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_cer": 35.94692400482509, | |
| "eval_loss": 1.617326259613037, | |
| "eval_runtime": 98.9687, | |
| "eval_samples_per_second": 1.0, | |
| "eval_steps_per_second": 1.0, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 16.266666666666666, | |
| "grad_norm": 0.02615281380712986, | |
| "learning_rate": 2.3779724655819776e-06, | |
| "loss": 0.0023, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 16.266666666666666, | |
| "eval_cer": 36.308805790108565, | |
| "eval_loss": 1.6615768671035767, | |
| "eval_runtime": 115.1739, | |
| "eval_samples_per_second": 0.86, | |
| "eval_steps_per_second": 0.86, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 16.533333333333335, | |
| "grad_norm": 0.11968094855546951, | |
| "learning_rate": 2.2528160200250315e-06, | |
| "loss": 0.0023, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 16.533333333333335, | |
| "eval_cer": 35.22316043425814, | |
| "eval_loss": 1.6153184175491333, | |
| "eval_runtime": 140.3831, | |
| "eval_samples_per_second": 0.705, | |
| "eval_steps_per_second": 0.705, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 16.8, | |
| "grad_norm": 0.06228575110435486, | |
| "learning_rate": 2.1276595744680853e-06, | |
| "loss": 0.0021, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 16.8, | |
| "eval_cer": 34.016887816646566, | |
| "eval_loss": 1.6445003747940063, | |
| "eval_runtime": 133.4409, | |
| "eval_samples_per_second": 0.742, | |
| "eval_steps_per_second": 0.742, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 17.066666666666666, | |
| "grad_norm": 0.11423548310995102, | |
| "learning_rate": 2.002503128911139e-06, | |
| "loss": 0.0021, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 17.066666666666666, | |
| "eval_cer": 33.8962605548854, | |
| "eval_loss": 1.6439446210861206, | |
| "eval_runtime": 133.5071, | |
| "eval_samples_per_second": 0.742, | |
| "eval_steps_per_second": 0.742, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 17.333333333333332, | |
| "grad_norm": 0.06907499581575394, | |
| "learning_rate": 1.8773466833541928e-06, | |
| "loss": 0.0015, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 17.333333333333332, | |
| "eval_cer": 32.93124246079614, | |
| "eval_loss": 1.6491665840148926, | |
| "eval_runtime": 132.7692, | |
| "eval_samples_per_second": 0.746, | |
| "eval_steps_per_second": 0.746, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 17.6, | |
| "grad_norm": 0.033104829490184784, | |
| "learning_rate": 1.7521902377972467e-06, | |
| "loss": 0.0015, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 17.6, | |
| "eval_cer": 34.13751507840772, | |
| "eval_loss": 1.651322841644287, | |
| "eval_runtime": 143.0837, | |
| "eval_samples_per_second": 0.692, | |
| "eval_steps_per_second": 0.692, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 17.866666666666667, | |
| "grad_norm": 0.01781543157994747, | |
| "learning_rate": 1.6270337922403003e-06, | |
| "loss": 0.0013, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 17.866666666666667, | |
| "eval_cer": 34.74065138721351, | |
| "eval_loss": 1.7133798599243164, | |
| "eval_runtime": 133.6812, | |
| "eval_samples_per_second": 0.741, | |
| "eval_steps_per_second": 0.741, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 18.133333333333333, | |
| "grad_norm": 0.010594404302537441, | |
| "learning_rate": 1.5018773466833544e-06, | |
| "loss": 0.0014, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 18.133333333333333, | |
| "eval_cer": 34.37876960193004, | |
| "eval_loss": 1.6600372791290283, | |
| "eval_runtime": 151.4685, | |
| "eval_samples_per_second": 0.654, | |
| "eval_steps_per_second": 0.654, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 18.4, | |
| "grad_norm": 0.1840209811925888, | |
| "learning_rate": 1.3767209011264082e-06, | |
| "loss": 0.0009, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 18.4, | |
| "eval_cer": 33.0518697225573, | |
| "eval_loss": 1.666447639465332, | |
| "eval_runtime": 138.1603, | |
| "eval_samples_per_second": 0.717, | |
| "eval_steps_per_second": 0.717, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 18.666666666666668, | |
| "grad_norm": 0.018077213317155838, | |
| "learning_rate": 1.251564455569462e-06, | |
| "loss": 0.0008, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 18.666666666666668, | |
| "eval_cer": 33.53437876960193, | |
| "eval_loss": 1.6725918054580688, | |
| "eval_runtime": 131.4776, | |
| "eval_samples_per_second": 0.753, | |
| "eval_steps_per_second": 0.753, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 18.933333333333334, | |
| "grad_norm": 0.13267947733402252, | |
| "learning_rate": 1.1264080100125157e-06, | |
| "loss": 0.0009, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 18.933333333333334, | |
| "eval_cer": 35.10253317249698, | |
| "eval_loss": 1.6664079427719116, | |
| "eval_runtime": 133.5644, | |
| "eval_samples_per_second": 0.741, | |
| "eval_steps_per_second": 0.741, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 19.2, | |
| "grad_norm": 0.005200152285397053, | |
| "learning_rate": 1.0012515644555696e-06, | |
| "loss": 0.0007, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 19.2, | |
| "eval_cer": 33.8962605548854, | |
| "eval_loss": 1.6933456659317017, | |
| "eval_runtime": 141.6099, | |
| "eval_samples_per_second": 0.699, | |
| "eval_steps_per_second": 0.699, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 19.466666666666665, | |
| "grad_norm": 0.07510178536176682, | |
| "learning_rate": 8.760951188986233e-07, | |
| "loss": 0.0005, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 19.466666666666665, | |
| "eval_cer": 35.94692400482509, | |
| "eval_loss": 1.7604153156280518, | |
| "eval_runtime": 140.6369, | |
| "eval_samples_per_second": 0.704, | |
| "eval_steps_per_second": 0.704, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 19.733333333333334, | |
| "grad_norm": 0.027590099722146988, | |
| "learning_rate": 7.509386733416772e-07, | |
| "loss": 0.0005, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 19.733333333333334, | |
| "eval_cer": 35.70566948130278, | |
| "eval_loss": 1.736212968826294, | |
| "eval_runtime": 139.15, | |
| "eval_samples_per_second": 0.711, | |
| "eval_steps_per_second": 0.711, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 0.015025763772428036, | |
| "learning_rate": 6.25782227784731e-07, | |
| "loss": 0.0004, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_cer": 33.65500603136309, | |
| "eval_loss": 1.7256932258605957, | |
| "eval_runtime": 151.3285, | |
| "eval_samples_per_second": 0.654, | |
| "eval_steps_per_second": 0.654, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 20.266666666666666, | |
| "grad_norm": 0.0052428352646529675, | |
| "learning_rate": 5.006257822277848e-07, | |
| "loss": 0.0001, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 20.266666666666666, | |
| "eval_cer": 34.13751507840772, | |
| "eval_loss": 1.7596440315246582, | |
| "eval_runtime": 152.8671, | |
| "eval_samples_per_second": 0.648, | |
| "eval_steps_per_second": 0.648, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 20.533333333333335, | |
| "grad_norm": 0.010569967329502106, | |
| "learning_rate": 3.754693366708386e-07, | |
| "loss": 0.0001, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 20.533333333333335, | |
| "eval_cer": 34.25814234016888, | |
| "eval_loss": 1.7473951578140259, | |
| "eval_runtime": 97.5521, | |
| "eval_samples_per_second": 1.015, | |
| "eval_steps_per_second": 1.015, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 20.8, | |
| "grad_norm": 0.004566431976854801, | |
| "learning_rate": 2.503128911138924e-07, | |
| "loss": 0.0001, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 20.8, | |
| "eval_cer": 35.10253317249698, | |
| "eval_loss": 1.765366554260254, | |
| "eval_runtime": 95.7556, | |
| "eval_samples_per_second": 1.034, | |
| "eval_steps_per_second": 1.034, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 21.066666666666666, | |
| "grad_norm": 0.008088044822216034, | |
| "learning_rate": 1.251564455569462e-07, | |
| "loss": 0.0001, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 21.066666666666666, | |
| "eval_cer": 34.499396863691196, | |
| "eval_loss": 1.7550737857818604, | |
| "eval_runtime": 101.9958, | |
| "eval_samples_per_second": 0.971, | |
| "eval_steps_per_second": 0.971, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 21.333333333333332, | |
| "grad_norm": 0.004822420887649059, | |
| "learning_rate": 0.0, | |
| "loss": 0.0001, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 21.333333333333332, | |
| "eval_cer": 34.37876960193004, | |
| "eval_loss": 1.755260944366455, | |
| "eval_runtime": 90.994, | |
| "eval_samples_per_second": 1.088, | |
| "eval_steps_per_second": 1.088, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 21.6, | |
| "grad_norm": 14.32432746887207, | |
| "learning_rate": 3.252710592160134e-06, | |
| "loss": 0.5875, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 21.6, | |
| "eval_cer": 31.193838254172015, | |
| "eval_loss": 0.8317356109619141, | |
| "eval_runtime": 105.8745, | |
| "eval_samples_per_second": 0.935, | |
| "eval_steps_per_second": 0.935, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 21.866666666666667, | |
| "grad_norm": 11.412335395812988, | |
| "learning_rate": 3.1693077564637197e-06, | |
| "loss": 0.5407, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 21.866666666666667, | |
| "eval_cer": 34.65982028241335, | |
| "eval_loss": 0.9225997924804688, | |
| "eval_runtime": 118.4773, | |
| "eval_samples_per_second": 0.836, | |
| "eval_steps_per_second": 0.836, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 22.133333333333333, | |
| "grad_norm": 10.880860328674316, | |
| "learning_rate": 3.0859049207673064e-06, | |
| "loss": 0.4377, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 22.133333333333333, | |
| "eval_cer": 30.038510911424904, | |
| "eval_loss": 0.859394907951355, | |
| "eval_runtime": 100.9655, | |
| "eval_samples_per_second": 0.981, | |
| "eval_steps_per_second": 0.981, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 22.4, | |
| "grad_norm": 7.9051361083984375, | |
| "learning_rate": 3.0025020850708923e-06, | |
| "loss": 0.3746, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 22.4, | |
| "eval_cer": 32.09242618741977, | |
| "eval_loss": 0.8850279450416565, | |
| "eval_runtime": 101.1338, | |
| "eval_samples_per_second": 0.979, | |
| "eval_steps_per_second": 0.979, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 22.666666666666668, | |
| "grad_norm": 8.64020824432373, | |
| "learning_rate": 2.919099249374479e-06, | |
| "loss": 0.3562, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 22.666666666666668, | |
| "eval_cer": 30.29525032092426, | |
| "eval_loss": 0.8574259877204895, | |
| "eval_runtime": 99.1014, | |
| "eval_samples_per_second": 0.999, | |
| "eval_steps_per_second": 0.999, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 22.933333333333334, | |
| "grad_norm": 7.546156406402588, | |
| "learning_rate": 2.835696413678065e-06, | |
| "loss": 0.3537, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 22.933333333333334, | |
| "eval_cer": 30.551989730423617, | |
| "eval_loss": 0.8776237964630127, | |
| "eval_runtime": 131.3735, | |
| "eval_samples_per_second": 0.754, | |
| "eval_steps_per_second": 0.754, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 23.2, | |
| "grad_norm": 9.436278343200684, | |
| "learning_rate": 2.7522935779816517e-06, | |
| "loss": 0.2377, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 23.2, | |
| "eval_cer": 33.889602053915276, | |
| "eval_loss": 0.9568796753883362, | |
| "eval_runtime": 127.0826, | |
| "eval_samples_per_second": 0.779, | |
| "eval_steps_per_second": 0.779, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 23.466666666666665, | |
| "grad_norm": 5.736475944519043, | |
| "learning_rate": 2.668890742285238e-06, | |
| "loss": 0.1894, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 23.466666666666665, | |
| "eval_cer": 33.1193838254172, | |
| "eval_loss": 0.9892663955688477, | |
| "eval_runtime": 122.0811, | |
| "eval_samples_per_second": 0.811, | |
| "eval_steps_per_second": 0.811, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 23.733333333333334, | |
| "grad_norm": 6.387293338775635, | |
| "learning_rate": 2.605042016806723e-06, | |
| "loss": 0.1418, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 23.733333333333334, | |
| "eval_cer": 34.27471116816431, | |
| "eval_loss": 1.0336097478866577, | |
| "eval_runtime": 96.6819, | |
| "eval_samples_per_second": 1.024, | |
| "eval_steps_per_second": 1.024, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "grad_norm": 19.179807662963867, | |
| "learning_rate": 2.521008403361345e-06, | |
| "loss": 0.1461, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_cer": 36.071887034659824, | |
| "eval_loss": 1.0472933053970337, | |
| "eval_runtime": 93.7113, | |
| "eval_samples_per_second": 1.056, | |
| "eval_steps_per_second": 1.056, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 24.266666666666666, | |
| "grad_norm": 7.663275718688965, | |
| "learning_rate": 2.4369747899159667e-06, | |
| "loss": 0.1028, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 24.266666666666666, | |
| "eval_cer": 36.32862644415918, | |
| "eval_loss": 1.0862057209014893, | |
| "eval_runtime": 93.406, | |
| "eval_samples_per_second": 1.06, | |
| "eval_steps_per_second": 1.06, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 24.533333333333335, | |
| "grad_norm": 1.3407938480377197, | |
| "learning_rate": 2.3529411764705885e-06, | |
| "loss": 0.103, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 24.533333333333335, | |
| "eval_cer": 34.91655969191271, | |
| "eval_loss": 1.1298171281814575, | |
| "eval_runtime": 89.034, | |
| "eval_samples_per_second": 1.112, | |
| "eval_steps_per_second": 1.112, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 24.8, | |
| "grad_norm": 11.514740943908691, | |
| "learning_rate": 2.2689075630252102e-06, | |
| "loss": 0.1052, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 24.8, | |
| "eval_cer": 34.65982028241335, | |
| "eval_loss": 1.1279983520507812, | |
| "eval_runtime": 91.3819, | |
| "eval_samples_per_second": 1.083, | |
| "eval_steps_per_second": 1.083, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 25.066666666666666, | |
| "grad_norm": 2.231095790863037, | |
| "learning_rate": 2.184873949579832e-06, | |
| "loss": 0.0968, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 25.066666666666666, | |
| "eval_cer": 36.071887034659824, | |
| "eval_loss": 1.2260563373565674, | |
| "eval_runtime": 88.8767, | |
| "eval_samples_per_second": 1.114, | |
| "eval_steps_per_second": 1.114, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 25.333333333333332, | |
| "grad_norm": 1.5772309303283691, | |
| "learning_rate": 2.100840336134454e-06, | |
| "loss": 0.0366, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 25.333333333333332, | |
| "eval_cer": 36.58536585365854, | |
| "eval_loss": 1.2595826387405396, | |
| "eval_runtime": 126.9284, | |
| "eval_samples_per_second": 0.78, | |
| "eval_steps_per_second": 0.78, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 25.6, | |
| "grad_norm": 1.9487134218215942, | |
| "learning_rate": 2.0168067226890756e-06, | |
| "loss": 0.0393, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 25.6, | |
| "eval_cer": 34.53145057766367, | |
| "eval_loss": 1.2439144849777222, | |
| "eval_runtime": 120.7517, | |
| "eval_samples_per_second": 0.82, | |
| "eval_steps_per_second": 0.82, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 25.866666666666667, | |
| "grad_norm": 1.347065806388855, | |
| "learning_rate": 1.932773109243698e-06, | |
| "loss": 0.0402, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 25.866666666666667, | |
| "eval_cer": 34.27471116816431, | |
| "eval_loss": 1.2496284246444702, | |
| "eval_runtime": 139.8733, | |
| "eval_samples_per_second": 0.708, | |
| "eval_steps_per_second": 0.708, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 26.133333333333333, | |
| "grad_norm": 0.21094612777233124, | |
| "learning_rate": 1.8487394957983196e-06, | |
| "loss": 0.0282, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 26.133333333333333, | |
| "eval_cer": 34.017971758664956, | |
| "eval_loss": 1.30524742603302, | |
| "eval_runtime": 138.4425, | |
| "eval_samples_per_second": 0.715, | |
| "eval_steps_per_second": 0.715, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 26.4, | |
| "grad_norm": 1.7374966144561768, | |
| "learning_rate": 1.7647058823529414e-06, | |
| "loss": 0.0141, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 26.4, | |
| "eval_cer": 36.58536585365854, | |
| "eval_loss": 1.3404479026794434, | |
| "eval_runtime": 139.2585, | |
| "eval_samples_per_second": 0.711, | |
| "eval_steps_per_second": 0.711, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 26.666666666666668, | |
| "grad_norm": 5.082799911499023, | |
| "learning_rate": 1.6806722689075632e-06, | |
| "loss": 0.0163, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 26.666666666666668, | |
| "eval_cer": 32.60590500641848, | |
| "eval_loss": 1.3241037130355835, | |
| "eval_runtime": 130.5198, | |
| "eval_samples_per_second": 0.759, | |
| "eval_steps_per_second": 0.759, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 26.933333333333334, | |
| "grad_norm": 2.0732219219207764, | |
| "learning_rate": 1.5966386554621848e-06, | |
| "loss": 0.0139, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 26.933333333333334, | |
| "eval_cer": 32.73427471116817, | |
| "eval_loss": 1.342888593673706, | |
| "eval_runtime": 139.288, | |
| "eval_samples_per_second": 0.711, | |
| "eval_steps_per_second": 0.711, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 27.2, | |
| "grad_norm": 0.7511962056159973, | |
| "learning_rate": 1.5126050420168068e-06, | |
| "loss": 0.0075, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 27.2, | |
| "eval_cer": 32.60590500641848, | |
| "eval_loss": 1.4031819105148315, | |
| "eval_runtime": 138.6922, | |
| "eval_samples_per_second": 0.714, | |
| "eval_steps_per_second": 0.714, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 27.466666666666665, | |
| "grad_norm": 0.33939579129219055, | |
| "learning_rate": 1.4285714285714286e-06, | |
| "loss": 0.0058, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 27.466666666666665, | |
| "eval_cer": 33.889602053915276, | |
| "eval_loss": 1.4162280559539795, | |
| "eval_runtime": 138.3665, | |
| "eval_samples_per_second": 0.715, | |
| "eval_steps_per_second": 0.715, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 27.733333333333334, | |
| "grad_norm": 1.3353774547576904, | |
| "learning_rate": 1.3445378151260504e-06, | |
| "loss": 0.0053, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 27.733333333333334, | |
| "eval_cer": 32.86264441591784, | |
| "eval_loss": 1.4276233911514282, | |
| "eval_runtime": 138.4317, | |
| "eval_samples_per_second": 0.715, | |
| "eval_steps_per_second": 0.715, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "grad_norm": 4.673173904418945, | |
| "learning_rate": 1.2605042016806724e-06, | |
| "loss": 0.0058, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_cer": 33.889602053915276, | |
| "eval_loss": 1.4254465103149414, | |
| "eval_runtime": 126.82, | |
| "eval_samples_per_second": 0.781, | |
| "eval_steps_per_second": 0.781, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 28.266666666666666, | |
| "grad_norm": 0.5961702466011047, | |
| "learning_rate": 1.1764705882352942e-06, | |
| "loss": 0.0024, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 28.266666666666666, | |
| "eval_cer": 31.964056482670088, | |
| "eval_loss": 1.4501264095306396, | |
| "eval_runtime": 141.7081, | |
| "eval_samples_per_second": 0.699, | |
| "eval_steps_per_second": 0.699, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 28.533333333333335, | |
| "grad_norm": 0.3718901574611664, | |
| "learning_rate": 1.092436974789916e-06, | |
| "loss": 0.0025, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 28.533333333333335, | |
| "eval_cer": 33.632862644415916, | |
| "eval_loss": 1.456369400024414, | |
| "eval_runtime": 139.6234, | |
| "eval_samples_per_second": 0.709, | |
| "eval_steps_per_second": 0.709, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 28.8, | |
| "grad_norm": 1.3455713987350464, | |
| "learning_rate": 1.0084033613445378e-06, | |
| "loss": 0.0031, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 28.8, | |
| "eval_cer": 33.761232349165596, | |
| "eval_loss": 1.4892566204071045, | |
| "eval_runtime": 155.9343, | |
| "eval_samples_per_second": 0.635, | |
| "eval_steps_per_second": 0.635, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 29.066666666666666, | |
| "grad_norm": 0.1176782175898552, | |
| "learning_rate": 9.243697478991598e-07, | |
| "loss": 0.0025, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 29.066666666666666, | |
| "eval_cer": 33.24775353016688, | |
| "eval_loss": 1.5334933996200562, | |
| "eval_runtime": 148.9794, | |
| "eval_samples_per_second": 0.665, | |
| "eval_steps_per_second": 0.665, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 29.333333333333332, | |
| "grad_norm": 0.22500748932361603, | |
| "learning_rate": 8.403361344537816e-07, | |
| "loss": 0.0014, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 29.333333333333332, | |
| "eval_cer": 31.322207958921695, | |
| "eval_loss": 1.5306977033615112, | |
| "eval_runtime": 148.8071, | |
| "eval_samples_per_second": 0.665, | |
| "eval_steps_per_second": 0.665, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 29.6, | |
| "grad_norm": 13.704888343811035, | |
| "learning_rate": 7.563025210084034e-07, | |
| "loss": 0.0011, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 29.6, | |
| "eval_cer": 33.1193838254172, | |
| "eval_loss": 1.5167608261108398, | |
| "eval_runtime": 101.0691, | |
| "eval_samples_per_second": 0.98, | |
| "eval_steps_per_second": 0.98, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 29.866666666666667, | |
| "grad_norm": 0.059027571231126785, | |
| "learning_rate": 6.722689075630252e-07, | |
| "loss": 0.0011, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 29.866666666666667, | |
| "eval_cer": 32.47753530166881, | |
| "eval_loss": 1.5043797492980957, | |
| "eval_runtime": 98.5539, | |
| "eval_samples_per_second": 1.005, | |
| "eval_steps_per_second": 1.005, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 30.133333333333333, | |
| "grad_norm": 0.023644184693694115, | |
| "learning_rate": 5.882352941176471e-07, | |
| "loss": 0.0008, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 30.133333333333333, | |
| "eval_cer": 32.73427471116817, | |
| "eval_loss": 1.5240405797958374, | |
| "eval_runtime": 93.2907, | |
| "eval_samples_per_second": 1.061, | |
| "eval_steps_per_second": 1.061, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 30.4, | |
| "grad_norm": 0.0645926371216774, | |
| "learning_rate": 5.042016806722689e-07, | |
| "loss": 0.0009, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 30.4, | |
| "eval_cer": 32.09242618741977, | |
| "eval_loss": 1.5282586812973022, | |
| "eval_runtime": 81.359, | |
| "eval_samples_per_second": 1.217, | |
| "eval_steps_per_second": 1.217, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 30.666666666666668, | |
| "grad_norm": 0.04764328524470329, | |
| "learning_rate": 4.201680672268908e-07, | |
| "loss": 0.0006, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 30.666666666666668, | |
| "eval_cer": 31.964056482670088, | |
| "eval_loss": 1.5477633476257324, | |
| "eval_runtime": 79.1799, | |
| "eval_samples_per_second": 1.25, | |
| "eval_steps_per_second": 1.25, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 30.933333333333334, | |
| "grad_norm": 0.33984529972076416, | |
| "learning_rate": 3.361344537815126e-07, | |
| "loss": 0.0007, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 30.933333333333334, | |
| "eval_cer": 33.761232349165596, | |
| "eval_loss": 1.5459610223770142, | |
| "eval_runtime": 101.4106, | |
| "eval_samples_per_second": 0.976, | |
| "eval_steps_per_second": 0.976, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 31.2, | |
| "grad_norm": 0.028893066570162773, | |
| "learning_rate": 2.5210084033613445e-07, | |
| "loss": 0.0004, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 31.2, | |
| "eval_cer": 34.017971758664956, | |
| "eval_loss": 1.5544700622558594, | |
| "eval_runtime": 78.702, | |
| "eval_samples_per_second": 1.258, | |
| "eval_steps_per_second": 1.258, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 31.466666666666665, | |
| "grad_norm": 0.03760391101241112, | |
| "learning_rate": 1.680672268907563e-07, | |
| "loss": 0.0004, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 31.466666666666665, | |
| "eval_cer": 34.403080872913996, | |
| "eval_loss": 1.5584869384765625, | |
| "eval_runtime": 78.6792, | |
| "eval_samples_per_second": 1.258, | |
| "eval_steps_per_second": 1.258, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 31.733333333333334, | |
| "grad_norm": 0.02360822632908821, | |
| "learning_rate": 8.403361344537815e-08, | |
| "loss": 0.0003, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 31.733333333333334, | |
| "eval_cer": 34.017971758664956, | |
| "eval_loss": 1.5569974184036255, | |
| "eval_runtime": 78.2713, | |
| "eval_samples_per_second": 1.265, | |
| "eval_steps_per_second": 1.265, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "grad_norm": 0.05067881569266319, | |
| "learning_rate": 0.0, | |
| "loss": 0.0003, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_cer": 34.017971758664956, | |
| "eval_loss": 1.5553821325302124, | |
| "eval_runtime": 79.0045, | |
| "eval_samples_per_second": 1.253, | |
| "eval_steps_per_second": 1.253, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 32.266666666666666, | |
| "grad_norm": 8.542917251586914, | |
| "learning_rate": 2.4528301886792453e-06, | |
| "loss": 0.2438, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 32.266666666666666, | |
| "eval_cer": 32.49097472924188, | |
| "eval_loss": 0.9014519453048706, | |
| "eval_runtime": 78.9647, | |
| "eval_samples_per_second": 1.254, | |
| "eval_steps_per_second": 1.254, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 32.53333333333333, | |
| "grad_norm": 10.492444038391113, | |
| "learning_rate": 2.389937106918239e-06, | |
| "loss": 0.2189, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 32.53333333333333, | |
| "eval_cer": 34.05535499398315, | |
| "eval_loss": 0.8799840211868286, | |
| "eval_runtime": 79.6839, | |
| "eval_samples_per_second": 1.242, | |
| "eval_steps_per_second": 1.242, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 32.8, | |
| "grad_norm": 1.4531430006027222, | |
| "learning_rate": 2.327044025157233e-06, | |
| "loss": 0.2178, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 32.8, | |
| "eval_cer": 28.51985559566787, | |
| "eval_loss": 0.8893880248069763, | |
| "eval_runtime": 79.9682, | |
| "eval_samples_per_second": 1.238, | |
| "eval_steps_per_second": 1.238, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 33.06666666666667, | |
| "grad_norm": 0.9759045243263245, | |
| "learning_rate": 2.2641509433962266e-06, | |
| "loss": 0.1867, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 33.06666666666667, | |
| "eval_cer": 33.33333333333333, | |
| "eval_loss": 0.9809114336967468, | |
| "eval_runtime": 79.1595, | |
| "eval_samples_per_second": 1.251, | |
| "eval_steps_per_second": 1.251, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 33.333333333333336, | |
| "grad_norm": 13.012406349182129, | |
| "learning_rate": 2.2012578616352204e-06, | |
| "loss": 0.0829, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 33.333333333333336, | |
| "eval_cer": 33.2129963898917, | |
| "eval_loss": 1.0518907308578491, | |
| "eval_runtime": 80.5248, | |
| "eval_samples_per_second": 1.229, | |
| "eval_steps_per_second": 1.229, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 33.6, | |
| "grad_norm": 7.532169342041016, | |
| "learning_rate": 2.138364779874214e-06, | |
| "loss": 0.0889, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 33.6, | |
| "eval_cer": 30.806257521058967, | |
| "eval_loss": 1.0699083805084229, | |
| "eval_runtime": 138.2949, | |
| "eval_samples_per_second": 0.716, | |
| "eval_steps_per_second": 0.716, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 33.86666666666667, | |
| "grad_norm": 8.014657020568848, | |
| "learning_rate": 2.075471698113208e-06, | |
| "loss": 0.086, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 33.86666666666667, | |
| "eval_cer": 33.33333333333333, | |
| "eval_loss": 1.0419602394104004, | |
| "eval_runtime": 130.1457, | |
| "eval_samples_per_second": 0.761, | |
| "eval_steps_per_second": 0.761, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 34.13333333333333, | |
| "grad_norm": 3.8977677822113037, | |
| "learning_rate": 2.0125786163522013e-06, | |
| "loss": 0.0633, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 34.13333333333333, | |
| "eval_cer": 31.889290012033694, | |
| "eval_loss": 1.1622824668884277, | |
| "eval_runtime": 130.9914, | |
| "eval_samples_per_second": 0.756, | |
| "eval_steps_per_second": 0.756, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 34.4, | |
| "grad_norm": 1.6470654010772705, | |
| "learning_rate": 1.949685534591195e-06, | |
| "loss": 0.0334, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 34.4, | |
| "eval_cer": 31.28760529482551, | |
| "eval_loss": 1.1540066003799438, | |
| "eval_runtime": 140.5397, | |
| "eval_samples_per_second": 0.704, | |
| "eval_steps_per_second": 0.704, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 34.666666666666664, | |
| "grad_norm": 2.1039841175079346, | |
| "learning_rate": 1.8867924528301889e-06, | |
| "loss": 0.0295, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 34.666666666666664, | |
| "eval_cer": 29.482551143200965, | |
| "eval_loss": 1.1704764366149902, | |
| "eval_runtime": 132.9282, | |
| "eval_samples_per_second": 0.745, | |
| "eval_steps_per_second": 0.745, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 34.93333333333333, | |
| "grad_norm": 7.380443096160889, | |
| "learning_rate": 1.8238993710691824e-06, | |
| "loss": 0.0311, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 34.93333333333333, | |
| "eval_cer": 30.445246690734056, | |
| "eval_loss": 1.169384241104126, | |
| "eval_runtime": 131.4972, | |
| "eval_samples_per_second": 0.753, | |
| "eval_steps_per_second": 0.753, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 35.2, | |
| "grad_norm": 0.5855227708816528, | |
| "learning_rate": 1.7610062893081762e-06, | |
| "loss": 0.0164, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 35.2, | |
| "eval_cer": 33.2129963898917, | |
| "eval_loss": 1.2629327774047852, | |
| "eval_runtime": 141.6079, | |
| "eval_samples_per_second": 0.699, | |
| "eval_steps_per_second": 0.699, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 35.46666666666667, | |
| "grad_norm": 1.5775542259216309, | |
| "learning_rate": 1.6981132075471698e-06, | |
| "loss": 0.0105, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 35.46666666666667, | |
| "eval_cer": 30.9265944645006, | |
| "eval_loss": 1.2349437475204468, | |
| "eval_runtime": 141.4731, | |
| "eval_samples_per_second": 0.7, | |
| "eval_steps_per_second": 0.7, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 35.733333333333334, | |
| "grad_norm": 1.080212950706482, | |
| "learning_rate": 1.6352201257861635e-06, | |
| "loss": 0.0123, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 35.733333333333334, | |
| "eval_cer": 33.69434416365824, | |
| "eval_loss": 1.2958662509918213, | |
| "eval_runtime": 132.1396, | |
| "eval_samples_per_second": 0.749, | |
| "eval_steps_per_second": 0.749, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "grad_norm": 2.049687147140503, | |
| "learning_rate": 1.5723270440251573e-06, | |
| "loss": 0.0123, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_cer": 33.2129963898917, | |
| "eval_loss": 1.2999323606491089, | |
| "eval_runtime": 135.3553, | |
| "eval_samples_per_second": 0.731, | |
| "eval_steps_per_second": 0.731, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 36.266666666666666, | |
| "grad_norm": 0.15309026837348938, | |
| "learning_rate": 1.509433962264151e-06, | |
| "loss": 0.0054, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 36.266666666666666, | |
| "eval_cer": 34.05535499398315, | |
| "eval_loss": 1.3017274141311646, | |
| "eval_runtime": 140.5992, | |
| "eval_samples_per_second": 0.704, | |
| "eval_steps_per_second": 0.704, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 36.53333333333333, | |
| "grad_norm": 7.4319658279418945, | |
| "learning_rate": 1.4465408805031447e-06, | |
| "loss": 0.0046, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 36.53333333333333, | |
| "eval_cer": 32.851985559566785, | |
| "eval_loss": 1.3190721273422241, | |
| "eval_runtime": 139.5862, | |
| "eval_samples_per_second": 0.709, | |
| "eval_steps_per_second": 0.709, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 36.8, | |
| "grad_norm": 0.05639100819826126, | |
| "learning_rate": 1.3836477987421384e-06, | |
| "loss": 0.0048, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 36.8, | |
| "eval_cer": 33.935018050541515, | |
| "eval_loss": 1.312601089477539, | |
| "eval_runtime": 136.2621, | |
| "eval_samples_per_second": 0.727, | |
| "eval_steps_per_second": 0.727, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 37.06666666666667, | |
| "grad_norm": 0.4124259054660797, | |
| "learning_rate": 1.3207547169811322e-06, | |
| "loss": 0.0042, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 37.06666666666667, | |
| "eval_cer": 33.45367027677497, | |
| "eval_loss": 1.338242530822754, | |
| "eval_runtime": 141.1632, | |
| "eval_samples_per_second": 0.701, | |
| "eval_steps_per_second": 0.701, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 37.333333333333336, | |
| "grad_norm": 0.04568612948060036, | |
| "learning_rate": 1.257861635220126e-06, | |
| "loss": 0.0028, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 37.333333333333336, | |
| "eval_cer": 33.092659446450064, | |
| "eval_loss": 1.3590086698532104, | |
| "eval_runtime": 130.7233, | |
| "eval_samples_per_second": 0.757, | |
| "eval_steps_per_second": 0.757, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 37.6, | |
| "grad_norm": 6.521934509277344, | |
| "learning_rate": 1.1949685534591195e-06, | |
| "loss": 0.002, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 37.6, | |
| "eval_cer": 35.619735258724425, | |
| "eval_loss": 1.3925501108169556, | |
| "eval_runtime": 136.222, | |
| "eval_samples_per_second": 0.727, | |
| "eval_steps_per_second": 0.727, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 37.86666666666667, | |
| "grad_norm": 0.37722429633140564, | |
| "learning_rate": 1.1320754716981133e-06, | |
| "loss": 0.0025, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 37.86666666666667, | |
| "eval_cer": 33.2129963898917, | |
| "eval_loss": 1.3863495588302612, | |
| "eval_runtime": 142.1041, | |
| "eval_samples_per_second": 0.697, | |
| "eval_steps_per_second": 0.697, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 38.13333333333333, | |
| "grad_norm": 0.0507730133831501, | |
| "learning_rate": 1.069182389937107e-06, | |
| "loss": 0.0019, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 38.13333333333333, | |
| "eval_cer": 32.49097472924188, | |
| "eval_loss": 1.4475711584091187, | |
| "eval_runtime": 128.8649, | |
| "eval_samples_per_second": 0.768, | |
| "eval_steps_per_second": 0.768, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 38.4, | |
| "grad_norm": 0.029291188344359398, | |
| "learning_rate": 1.0062893081761007e-06, | |
| "loss": 0.0012, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 38.4, | |
| "eval_cer": 32.49097472924188, | |
| "eval_loss": 1.4133862257003784, | |
| "eval_runtime": 138.9534, | |
| "eval_samples_per_second": 0.712, | |
| "eval_steps_per_second": 0.712, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 38.666666666666664, | |
| "grad_norm": 0.029871659353375435, | |
| "learning_rate": 9.433962264150944e-07, | |
| "loss": 0.0015, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 38.666666666666664, | |
| "eval_cer": 32.61131167268351, | |
| "eval_loss": 1.4183237552642822, | |
| "eval_runtime": 141.5271, | |
| "eval_samples_per_second": 0.7, | |
| "eval_steps_per_second": 0.7, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 38.93333333333333, | |
| "grad_norm": 0.04213670641183853, | |
| "learning_rate": 8.805031446540881e-07, | |
| "loss": 0.0012, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 38.93333333333333, | |
| "eval_cer": 34.29602888086642, | |
| "eval_loss": 1.4451824426651, | |
| "eval_runtime": 138.8658, | |
| "eval_samples_per_second": 0.713, | |
| "eval_steps_per_second": 0.713, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 39.2, | |
| "grad_norm": 0.025760957971215248, | |
| "learning_rate": 8.176100628930818e-07, | |
| "loss": 0.0008, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 39.2, | |
| "eval_cer": 34.777376654632974, | |
| "eval_loss": 1.4834933280944824, | |
| "eval_runtime": 127.478, | |
| "eval_samples_per_second": 0.777, | |
| "eval_steps_per_second": 0.777, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 39.46666666666667, | |
| "grad_norm": 0.015352617017924786, | |
| "learning_rate": 7.547169811320755e-07, | |
| "loss": 0.0007, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 39.46666666666667, | |
| "eval_cer": 34.5367027677497, | |
| "eval_loss": 1.492653727531433, | |
| "eval_runtime": 138.6569, | |
| "eval_samples_per_second": 0.714, | |
| "eval_steps_per_second": 0.714, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 39.733333333333334, | |
| "grad_norm": 0.0862458199262619, | |
| "learning_rate": 6.918238993710692e-07, | |
| "loss": 0.0007, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 39.733333333333334, | |
| "eval_cer": 32.49097472924188, | |
| "eval_loss": 1.4610902070999146, | |
| "eval_runtime": 142.2105, | |
| "eval_samples_per_second": 0.696, | |
| "eval_steps_per_second": 0.696, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "grad_norm": 0.03945042937994003, | |
| "learning_rate": 6.28930817610063e-07, | |
| "loss": 0.0009, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_cer": 32.61131167268351, | |
| "eval_loss": 1.513584017753601, | |
| "eval_runtime": 133.4522, | |
| "eval_samples_per_second": 0.742, | |
| "eval_steps_per_second": 0.742, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 40.266666666666666, | |
| "grad_norm": 0.037018537521362305, | |
| "learning_rate": 5.660377358490567e-07, | |
| "loss": 0.0003, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 40.266666666666666, | |
| "eval_cer": 34.777376654632974, | |
| "eval_loss": 1.5051733255386353, | |
| "eval_runtime": 152.3438, | |
| "eval_samples_per_second": 0.65, | |
| "eval_steps_per_second": 0.65, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 40.53333333333333, | |
| "grad_norm": 0.04961505904793739, | |
| "learning_rate": 5.031446540880503e-07, | |
| "loss": 0.0005, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 40.53333333333333, | |
| "eval_cer": 33.57400722021661, | |
| "eval_loss": 1.5090794563293457, | |
| "eval_runtime": 152.6527, | |
| "eval_samples_per_second": 0.649, | |
| "eval_steps_per_second": 0.649, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 40.8, | |
| "grad_norm": 0.043190281838178635, | |
| "learning_rate": 4.4025157232704405e-07, | |
| "loss": 0.0004, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 40.8, | |
| "eval_cer": 33.2129963898917, | |
| "eval_loss": 1.508406639099121, | |
| "eval_runtime": 153.8282, | |
| "eval_samples_per_second": 0.644, | |
| "eval_steps_per_second": 0.644, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 41.06666666666667, | |
| "grad_norm": 0.036041852086782455, | |
| "learning_rate": 3.773584905660378e-07, | |
| "loss": 0.0002, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 41.06666666666667, | |
| "eval_cer": 33.935018050541515, | |
| "eval_loss": 1.5361274480819702, | |
| "eval_runtime": 82.6142, | |
| "eval_samples_per_second": 1.198, | |
| "eval_steps_per_second": 1.198, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 41.333333333333336, | |
| "grad_norm": 0.01777110993862152, | |
| "learning_rate": 3.144654088050315e-07, | |
| "loss": 0.0002, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 41.333333333333336, | |
| "eval_cer": 33.092659446450064, | |
| "eval_loss": 1.5560057163238525, | |
| "eval_runtime": 83.0022, | |
| "eval_samples_per_second": 1.193, | |
| "eval_steps_per_second": 1.193, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 41.6, | |
| "grad_norm": 0.006947483401745558, | |
| "learning_rate": 2.5157232704402517e-07, | |
| "loss": 0.0001, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 41.6, | |
| "eval_cer": 33.2129963898917, | |
| "eval_loss": 1.5325310230255127, | |
| "eval_runtime": 82.2544, | |
| "eval_samples_per_second": 1.204, | |
| "eval_steps_per_second": 1.204, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 41.86666666666667, | |
| "grad_norm": 0.03959225118160248, | |
| "learning_rate": 1.886792452830189e-07, | |
| "loss": 0.0002, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 41.86666666666667, | |
| "eval_cer": 33.45367027677497, | |
| "eval_loss": 1.5379877090454102, | |
| "eval_runtime": 82.6778, | |
| "eval_samples_per_second": 1.197, | |
| "eval_steps_per_second": 1.197, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 42.13333333333333, | |
| "grad_norm": 0.17936809360980988, | |
| "learning_rate": 1.2578616352201258e-07, | |
| "loss": 0.0002, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 42.13333333333333, | |
| "eval_cer": 32.731648616125156, | |
| "eval_loss": 1.5386216640472412, | |
| "eval_runtime": 82.9423, | |
| "eval_samples_per_second": 1.194, | |
| "eval_steps_per_second": 1.194, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 42.4, | |
| "grad_norm": 0.3688109219074249, | |
| "learning_rate": 6.289308176100629e-08, | |
| "loss": 0.0001, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 42.4, | |
| "eval_cer": 32.12996389891697, | |
| "eval_loss": 1.5448355674743652, | |
| "eval_runtime": 82.8733, | |
| "eval_samples_per_second": 1.195, | |
| "eval_steps_per_second": 1.195, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 42.666666666666664, | |
| "grad_norm": 0.01078395452350378, | |
| "learning_rate": 0.0, | |
| "loss": 0.0001, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 42.666666666666664, | |
| "eval_cer": 32.37063778580024, | |
| "eval_loss": 1.5441501140594482, | |
| "eval_runtime": 82.7104, | |
| "eval_samples_per_second": 1.197, | |
| "eval_steps_per_second": 1.197, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 42.93333333333333, | |
| "grad_norm": 21.12993049621582, | |
| "learning_rate": 1.9597989949748746e-06, | |
| "loss": 0.2437, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 42.93333333333333, | |
| "eval_cer": 34.413965087281795, | |
| "eval_loss": 1.1097244024276733, | |
| "eval_runtime": 82.2475, | |
| "eval_samples_per_second": 1.204, | |
| "eval_steps_per_second": 1.204, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 43.2, | |
| "grad_norm": 24.733867645263672, | |
| "learning_rate": 1.9095477386934674e-06, | |
| "loss": 0.1948, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 43.2, | |
| "eval_cer": 33.042394014962596, | |
| "eval_loss": 1.0970935821533203, | |
| "eval_runtime": 82.1258, | |
| "eval_samples_per_second": 1.205, | |
| "eval_steps_per_second": 1.205, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 43.46666666666667, | |
| "grad_norm": 2.1681711673736572, | |
| "learning_rate": 1.8592964824120604e-06, | |
| "loss": 0.1795, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 43.46666666666667, | |
| "eval_cer": 34.1645885286783, | |
| "eval_loss": 1.1059696674346924, | |
| "eval_runtime": 82.8179, | |
| "eval_samples_per_second": 1.195, | |
| "eval_steps_per_second": 1.195, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 43.733333333333334, | |
| "grad_norm": 7.347228527069092, | |
| "learning_rate": 1.8090452261306535e-06, | |
| "loss": 0.1736, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 43.733333333333334, | |
| "eval_cer": 34.78802992518703, | |
| "eval_loss": 1.1568942070007324, | |
| "eval_runtime": 82.9427, | |
| "eval_samples_per_second": 1.194, | |
| "eval_steps_per_second": 1.194, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "grad_norm": 16.711997985839844, | |
| "learning_rate": 1.7587939698492465e-06, | |
| "loss": 0.1728, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_cer": 32.04488778054863, | |
| "eval_loss": 1.1192028522491455, | |
| "eval_runtime": 82.551, | |
| "eval_samples_per_second": 1.199, | |
| "eval_steps_per_second": 1.199, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 44.266666666666666, | |
| "grad_norm": 0.13528181612491608, | |
| "learning_rate": 1.7085427135678393e-06, | |
| "loss": 0.0722, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 44.266666666666666, | |
| "eval_cer": 32.418952618453865, | |
| "eval_loss": 1.2699230909347534, | |
| "eval_runtime": 83.0431, | |
| "eval_samples_per_second": 1.192, | |
| "eval_steps_per_second": 1.192, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 44.53333333333333, | |
| "grad_norm": 0.9568387866020203, | |
| "learning_rate": 1.6582914572864323e-06, | |
| "loss": 0.0745, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 44.53333333333333, | |
| "eval_cer": 34.03990024937656, | |
| "eval_loss": 1.3056560754776, | |
| "eval_runtime": 82.9267, | |
| "eval_samples_per_second": 1.194, | |
| "eval_steps_per_second": 1.194, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 44.8, | |
| "grad_norm": 0.2640259563922882, | |
| "learning_rate": 1.6080402010050254e-06, | |
| "loss": 0.0739, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 44.8, | |
| "eval_cer": 35.41147132169576, | |
| "eval_loss": 1.3166236877441406, | |
| "eval_runtime": 82.7459, | |
| "eval_samples_per_second": 1.196, | |
| "eval_steps_per_second": 1.196, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 45.06666666666667, | |
| "grad_norm": 0.09736265242099762, | |
| "learning_rate": 1.5577889447236184e-06, | |
| "loss": 0.0579, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 45.06666666666667, | |
| "eval_cer": 35.785536159601, | |
| "eval_loss": 1.4574410915374756, | |
| "eval_runtime": 82.7625, | |
| "eval_samples_per_second": 1.196, | |
| "eval_steps_per_second": 1.196, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 45.333333333333336, | |
| "grad_norm": 9.942195892333984, | |
| "learning_rate": 1.507537688442211e-06, | |
| "loss": 0.0264, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 45.333333333333336, | |
| "eval_cer": 35.785536159601, | |
| "eval_loss": 1.5011882781982422, | |
| "eval_runtime": 82.3432, | |
| "eval_samples_per_second": 1.202, | |
| "eval_steps_per_second": 1.202, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 45.6, | |
| "grad_norm": 0.22312113642692566, | |
| "learning_rate": 1.457286432160804e-06, | |
| "loss": 0.029, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 45.6, | |
| "eval_cer": 33.66583541147132, | |
| "eval_loss": 1.4832149744033813, | |
| "eval_runtime": 82.3824, | |
| "eval_samples_per_second": 1.202, | |
| "eval_steps_per_second": 1.202, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 45.86666666666667, | |
| "grad_norm": 7.559528827667236, | |
| "learning_rate": 1.407035175879397e-06, | |
| "loss": 0.0264, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 45.86666666666667, | |
| "eval_cer": 35.28678304239401, | |
| "eval_loss": 1.5051957368850708, | |
| "eval_runtime": 83.0512, | |
| "eval_samples_per_second": 1.192, | |
| "eval_steps_per_second": 1.192, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 46.13333333333333, | |
| "grad_norm": 0.13890038430690765, | |
| "learning_rate": 1.35678391959799e-06, | |
| "loss": 0.0198, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 46.13333333333333, | |
| "eval_cer": 35.910224438902745, | |
| "eval_loss": 1.5969452857971191, | |
| "eval_runtime": 83.0972, | |
| "eval_samples_per_second": 1.191, | |
| "eval_steps_per_second": 1.191, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 46.4, | |
| "grad_norm": 23.76213264465332, | |
| "learning_rate": 1.3065326633165831e-06, | |
| "loss": 0.0093, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 46.4, | |
| "eval_cer": 35.16209476309227, | |
| "eval_loss": 1.5947421789169312, | |
| "eval_runtime": 82.6136, | |
| "eval_samples_per_second": 1.198, | |
| "eval_steps_per_second": 1.198, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 46.666666666666664, | |
| "grad_norm": 0.32037144899368286, | |
| "learning_rate": 1.256281407035176e-06, | |
| "loss": 0.0108, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 46.666666666666664, | |
| "eval_cer": 36.78304239401496, | |
| "eval_loss": 1.58470618724823, | |
| "eval_runtime": 82.3123, | |
| "eval_samples_per_second": 1.203, | |
| "eval_steps_per_second": 1.203, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 46.93333333333333, | |
| "grad_norm": 0.17757229506969452, | |
| "learning_rate": 1.206030150753769e-06, | |
| "loss": 0.0106, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 46.93333333333333, | |
| "eval_cer": 36.907730673316706, | |
| "eval_loss": 1.6137627363204956, | |
| "eval_runtime": 82.6988, | |
| "eval_samples_per_second": 1.197, | |
| "eval_steps_per_second": 1.197, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 47.2, | |
| "grad_norm": 1.0095301866531372, | |
| "learning_rate": 1.155778894472362e-06, | |
| "loss": 0.0059, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 47.2, | |
| "eval_cer": 36.408977556109726, | |
| "eval_loss": 1.6764711141586304, | |
| "eval_runtime": 82.559, | |
| "eval_samples_per_second": 1.199, | |
| "eval_steps_per_second": 1.199, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 47.46666666666667, | |
| "grad_norm": 0.2900920808315277, | |
| "learning_rate": 1.105527638190955e-06, | |
| "loss": 0.0048, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 47.46666666666667, | |
| "eval_cer": 34.66334164588529, | |
| "eval_loss": 1.62752366065979, | |
| "eval_runtime": 82.6072, | |
| "eval_samples_per_second": 1.198, | |
| "eval_steps_per_second": 1.198, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 47.733333333333334, | |
| "grad_norm": 1.6907705068588257, | |
| "learning_rate": 1.0552763819095479e-06, | |
| "loss": 0.0047, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 47.733333333333334, | |
| "eval_cer": 36.15960099750624, | |
| "eval_loss": 1.680855393409729, | |
| "eval_runtime": 82.3981, | |
| "eval_samples_per_second": 1.201, | |
| "eval_steps_per_second": 1.201, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "grad_norm": 0.05522582679986954, | |
| "learning_rate": 1.0050251256281409e-06, | |
| "loss": 0.004, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_cer": 36.53366583541147, | |
| "eval_loss": 1.673848032951355, | |
| "eval_runtime": 83.5612, | |
| "eval_samples_per_second": 1.185, | |
| "eval_steps_per_second": 1.185, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 48.266666666666666, | |
| "grad_norm": 0.05876125767827034, | |
| "learning_rate": 9.547738693467337e-07, | |
| "loss": 0.0019, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 48.266666666666666, | |
| "eval_cer": 35.785536159601, | |
| "eval_loss": 1.7074730396270752, | |
| "eval_runtime": 82.6963, | |
| "eval_samples_per_second": 1.197, | |
| "eval_steps_per_second": 1.197, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 48.53333333333333, | |
| "grad_norm": 0.25979533791542053, | |
| "learning_rate": 9.045226130653267e-07, | |
| "loss": 0.0023, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 48.53333333333333, | |
| "eval_cer": 35.16209476309227, | |
| "eval_loss": 1.7071537971496582, | |
| "eval_runtime": 82.873, | |
| "eval_samples_per_second": 1.195, | |
| "eval_steps_per_second": 1.195, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 48.8, | |
| "grad_norm": 1.9857276678085327, | |
| "learning_rate": 8.542713567839197e-07, | |
| "loss": 0.0019, | |
| "step": 183000 | |
| }, | |
| { | |
| "epoch": 48.8, | |
| "eval_cer": 35.66084788029925, | |
| "eval_loss": 1.7293034791946411, | |
| "eval_runtime": 82.7894, | |
| "eval_samples_per_second": 1.196, | |
| "eval_steps_per_second": 1.196, | |
| "step": 183000 | |
| }, | |
| { | |
| "epoch": 49.06666666666667, | |
| "grad_norm": 0.021623745560646057, | |
| "learning_rate": 8.040201005025127e-07, | |
| "loss": 0.0019, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 49.06666666666667, | |
| "eval_cer": 36.53366583541147, | |
| "eval_loss": 1.7291345596313477, | |
| "eval_runtime": 82.8955, | |
| "eval_samples_per_second": 1.194, | |
| "eval_steps_per_second": 1.194, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 49.333333333333336, | |
| "grad_norm": 0.13192743062973022, | |
| "learning_rate": 7.537688442211055e-07, | |
| "loss": 0.0008, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 49.333333333333336, | |
| "eval_cer": 36.78304239401496, | |
| "eval_loss": 1.7502952814102173, | |
| "eval_runtime": 83.2448, | |
| "eval_samples_per_second": 1.189, | |
| "eval_steps_per_second": 1.189, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 49.6, | |
| "grad_norm": 0.07062412798404694, | |
| "learning_rate": 7.035175879396985e-07, | |
| "loss": 0.0009, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 49.6, | |
| "eval_cer": 36.53366583541147, | |
| "eval_loss": 1.72696852684021, | |
| "eval_runtime": 83.1626, | |
| "eval_samples_per_second": 1.19, | |
| "eval_steps_per_second": 1.19, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 49.86666666666667, | |
| "grad_norm": 0.038012657314538956, | |
| "learning_rate": 6.532663316582916e-07, | |
| "loss": 0.0009, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 49.86666666666667, | |
| "eval_cer": 36.53366583541147, | |
| "eval_loss": 1.7277522087097168, | |
| "eval_runtime": 83.1004, | |
| "eval_samples_per_second": 1.191, | |
| "eval_steps_per_second": 1.191, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 50.13333333333333, | |
| "grad_norm": 0.01662178337574005, | |
| "learning_rate": 6.030150753768845e-07, | |
| "loss": 0.001, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 50.13333333333333, | |
| "eval_cer": 34.66334164588529, | |
| "eval_loss": 1.7733581066131592, | |
| "eval_runtime": 82.9672, | |
| "eval_samples_per_second": 1.193, | |
| "eval_steps_per_second": 1.193, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 50.4, | |
| "grad_norm": 0.047121066600084305, | |
| "learning_rate": 5.527638190954775e-07, | |
| "loss": 0.0005, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 50.4, | |
| "eval_cer": 36.03491271820449, | |
| "eval_loss": 1.7523555755615234, | |
| "eval_runtime": 82.8793, | |
| "eval_samples_per_second": 1.195, | |
| "eval_steps_per_second": 1.195, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 50.666666666666664, | |
| "grad_norm": 0.020025352016091347, | |
| "learning_rate": 5.025125628140704e-07, | |
| "loss": 0.0008, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 50.666666666666664, | |
| "eval_cer": 34.66334164588529, | |
| "eval_loss": 1.7309192419052124, | |
| "eval_runtime": 82.6175, | |
| "eval_samples_per_second": 1.198, | |
| "eval_steps_per_second": 1.198, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 50.93333333333333, | |
| "grad_norm": 0.020201655104756355, | |
| "learning_rate": 4.5226130653266337e-07, | |
| "loss": 0.0005, | |
| "step": 191000 | |
| }, | |
| { | |
| "epoch": 50.93333333333333, | |
| "eval_cer": 36.28428927680798, | |
| "eval_loss": 1.7656340599060059, | |
| "eval_runtime": 82.9749, | |
| "eval_samples_per_second": 1.193, | |
| "eval_steps_per_second": 1.193, | |
| "step": 191000 | |
| }, | |
| { | |
| "epoch": 51.2, | |
| "grad_norm": 0.027762647718191147, | |
| "learning_rate": 4.0201005025125634e-07, | |
| "loss": 0.0003, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 51.2, | |
| "eval_cer": 36.15960099750624, | |
| "eval_loss": 1.7615617513656616, | |
| "eval_runtime": 82.7808, | |
| "eval_samples_per_second": 1.196, | |
| "eval_steps_per_second": 1.196, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 51.46666666666667, | |
| "grad_norm": 0.021721765398979187, | |
| "learning_rate": 3.5175879396984927e-07, | |
| "loss": 0.0003, | |
| "step": 193000 | |
| }, | |
| { | |
| "epoch": 51.46666666666667, | |
| "eval_cer": 35.16209476309227, | |
| "eval_loss": 1.772621989250183, | |
| "eval_runtime": 82.9625, | |
| "eval_samples_per_second": 1.193, | |
| "eval_steps_per_second": 1.193, | |
| "step": 193000 | |
| }, | |
| { | |
| "epoch": 51.733333333333334, | |
| "grad_norm": 0.021287057548761368, | |
| "learning_rate": 3.0150753768844224e-07, | |
| "loss": 0.0002, | |
| "step": 194000 | |
| }, | |
| { | |
| "epoch": 51.733333333333334, | |
| "eval_cer": 36.28428927680798, | |
| "eval_loss": 1.7833250761032104, | |
| "eval_runtime": 83.1006, | |
| "eval_samples_per_second": 1.191, | |
| "eval_steps_per_second": 1.191, | |
| "step": 194000 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "grad_norm": 0.020342178642749786, | |
| "learning_rate": 2.512562814070352e-07, | |
| "loss": 0.0004, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_cer": 34.66334164588529, | |
| "eval_loss": 1.7803997993469238, | |
| "eval_runtime": 82.7795, | |
| "eval_samples_per_second": 1.196, | |
| "eval_steps_per_second": 1.196, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 52.266666666666666, | |
| "grad_norm": 0.013502071611583233, | |
| "learning_rate": 2.0100502512562817e-07, | |
| "loss": 0.0001, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 52.266666666666666, | |
| "eval_cer": 34.413965087281795, | |
| "eval_loss": 1.786993145942688, | |
| "eval_runtime": 82.9528, | |
| "eval_samples_per_second": 1.193, | |
| "eval_steps_per_second": 1.193, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 52.53333333333333, | |
| "grad_norm": 0.013876430690288544, | |
| "learning_rate": 1.5075376884422112e-07, | |
| "loss": 0.0001, | |
| "step": 197000 | |
| }, | |
| { | |
| "epoch": 52.53333333333333, | |
| "eval_cer": 34.413965087281795, | |
| "eval_loss": 1.8022912740707397, | |
| "eval_runtime": 82.844, | |
| "eval_samples_per_second": 1.195, | |
| "eval_steps_per_second": 1.195, | |
| "step": 197000 | |
| }, | |
| { | |
| "epoch": 52.8, | |
| "grad_norm": 0.007446631323546171, | |
| "learning_rate": 1.0050251256281409e-07, | |
| "loss": 0.0002, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 52.8, | |
| "eval_cer": 34.28927680798005, | |
| "eval_loss": 1.8034993410110474, | |
| "eval_runtime": 82.9855, | |
| "eval_samples_per_second": 1.193, | |
| "eval_steps_per_second": 1.193, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 53.06666666666667, | |
| "grad_norm": 0.03002343513071537, | |
| "learning_rate": 5.025125628140704e-08, | |
| "loss": 0.0001, | |
| "step": 199000 | |
| }, | |
| { | |
| "epoch": 53.06666666666667, | |
| "eval_cer": 35.03740648379052, | |
| "eval_loss": 1.8082956075668335, | |
| "eval_runtime": 82.8861, | |
| "eval_samples_per_second": 1.194, | |
| "eval_steps_per_second": 1.194, | |
| "step": 199000 | |
| }, | |
| { | |
| "epoch": 53.333333333333336, | |
| "grad_norm": 0.005953139625489712, | |
| "learning_rate": 0.0, | |
| "loss": 0.0001, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 53.333333333333336, | |
| "eval_cer": 35.16209476309227, | |
| "eval_loss": 1.8082053661346436, | |
| "eval_runtime": 82.8236, | |
| "eval_samples_per_second": 1.195, | |
| "eval_steps_per_second": 1.195, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 53.6, | |
| "grad_norm": 30.62691879272461, | |
| "learning_rate": 1.6317991631799166e-06, | |
| "loss": 0.248, | |
| "step": 201000 | |
| }, | |
| { | |
| "epoch": 53.6, | |
| "eval_cer": 30.375000000000004, | |
| "eval_loss": 1.015176773071289, | |
| "eval_runtime": 81.9262, | |
| "eval_samples_per_second": 1.208, | |
| "eval_steps_per_second": 1.208, | |
| "step": 201000 | |
| }, | |
| { | |
| "epoch": 53.86666666666667, | |
| "grad_norm": 1.1325130462646484, | |
| "learning_rate": 1.589958158995816e-06, | |
| "loss": 0.2154, | |
| "step": 202000 | |
| }, | |
| { | |
| "epoch": 53.86666666666667, | |
| "eval_cer": 28.875, | |
| "eval_loss": 1.0113086700439453, | |
| "eval_runtime": 81.4097, | |
| "eval_samples_per_second": 1.216, | |
| "eval_steps_per_second": 1.216, | |
| "step": 202000 | |
| }, | |
| { | |
| "epoch": 54.13333333333333, | |
| "grad_norm": 7.334270000457764, | |
| "learning_rate": 1.5481171548117155e-06, | |
| "loss": 0.1798, | |
| "step": 203000 | |
| }, | |
| { | |
| "epoch": 54.13333333333333, | |
| "eval_cer": 31.0, | |
| "eval_loss": 1.1168731451034546, | |
| "eval_runtime": 81.9464, | |
| "eval_samples_per_second": 1.208, | |
| "eval_steps_per_second": 1.208, | |
| "step": 203000 | |
| }, | |
| { | |
| "epoch": 54.4, | |
| "grad_norm": 0.5677210092544556, | |
| "learning_rate": 1.5062761506276152e-06, | |
| "loss": 0.1294, | |
| "step": 204000 | |
| }, | |
| { | |
| "epoch": 54.4, | |
| "eval_cer": 30.5, | |
| "eval_loss": 1.128467082977295, | |
| "eval_runtime": 81.5886, | |
| "eval_samples_per_second": 1.213, | |
| "eval_steps_per_second": 1.213, | |
| "step": 204000 | |
| }, | |
| { | |
| "epoch": 54.666666666666664, | |
| "grad_norm": 11.697511672973633, | |
| "learning_rate": 1.4644351464435146e-06, | |
| "loss": 0.1363, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 54.666666666666664, | |
| "eval_cer": 32.25, | |
| "eval_loss": 1.1191054582595825, | |
| "eval_runtime": 81.6688, | |
| "eval_samples_per_second": 1.212, | |
| "eval_steps_per_second": 1.212, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 54.93333333333333, | |
| "grad_norm": 11.908825874328613, | |
| "learning_rate": 1.4225941422594145e-06, | |
| "loss": 0.1334, | |
| "step": 206000 | |
| }, | |
| { | |
| "epoch": 54.93333333333333, | |
| "eval_cer": 32.125, | |
| "eval_loss": 1.1015084981918335, | |
| "eval_runtime": 81.5921, | |
| "eval_samples_per_second": 1.213, | |
| "eval_steps_per_second": 1.213, | |
| "step": 206000 | |
| }, | |
| { | |
| "epoch": 55.2, | |
| "grad_norm": 1.5424853563308716, | |
| "learning_rate": 1.380753138075314e-06, | |
| "loss": 0.0702, | |
| "step": 207000 | |
| }, | |
| { | |
| "epoch": 55.2, | |
| "eval_cer": 32.125, | |
| "eval_loss": 1.2612977027893066, | |
| "eval_runtime": 82.3504, | |
| "eval_samples_per_second": 1.202, | |
| "eval_steps_per_second": 1.202, | |
| "step": 207000 | |
| }, | |
| { | |
| "epoch": 55.46666666666667, | |
| "grad_norm": 10.856619834899902, | |
| "learning_rate": 1.3389121338912134e-06, | |
| "loss": 0.0579, | |
| "step": 208000 | |
| }, | |
| { | |
| "epoch": 55.46666666666667, | |
| "eval_cer": 30.375000000000004, | |
| "eval_loss": 1.2345651388168335, | |
| "eval_runtime": 80.9654, | |
| "eval_samples_per_second": 1.223, | |
| "eval_steps_per_second": 1.223, | |
| "step": 208000 | |
| }, | |
| { | |
| "epoch": 55.733333333333334, | |
| "grad_norm": 16.02048683166504, | |
| "learning_rate": 1.297071129707113e-06, | |
| "loss": 0.0529, | |
| "step": 209000 | |
| }, | |
| { | |
| "epoch": 55.733333333333334, | |
| "eval_cer": 31.374999999999996, | |
| "eval_loss": 1.2531167268753052, | |
| "eval_runtime": 81.8525, | |
| "eval_samples_per_second": 1.209, | |
| "eval_steps_per_second": 1.209, | |
| "step": 209000 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "grad_norm": 2.857239246368408, | |
| "learning_rate": 1.2552301255230125e-06, | |
| "loss": 0.0555, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_cer": 30.375000000000004, | |
| "eval_loss": 1.2908639907836914, | |
| "eval_runtime": 80.9554, | |
| "eval_samples_per_second": 1.223, | |
| "eval_steps_per_second": 1.223, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 56.266666666666666, | |
| "grad_norm": 0.15558059513568878, | |
| "learning_rate": 1.2133891213389122e-06, | |
| "loss": 0.0197, | |
| "step": 211000 | |
| }, | |
| { | |
| "epoch": 56.266666666666666, | |
| "eval_cer": 30.375000000000004, | |
| "eval_loss": 1.3364766836166382, | |
| "eval_runtime": 82.074, | |
| "eval_samples_per_second": 1.206, | |
| "eval_steps_per_second": 1.206, | |
| "step": 211000 | |
| }, | |
| { | |
| "epoch": 56.53333333333333, | |
| "grad_norm": 0.6175013780593872, | |
| "learning_rate": 1.1715481171548119e-06, | |
| "loss": 0.0204, | |
| "step": 212000 | |
| }, | |
| { | |
| "epoch": 56.53333333333333, | |
| "eval_cer": 34.375, | |
| "eval_loss": 1.3453211784362793, | |
| "eval_runtime": 82.0799, | |
| "eval_samples_per_second": 1.206, | |
| "eval_steps_per_second": 1.206, | |
| "step": 212000 | |
| }, | |
| { | |
| "epoch": 56.8, | |
| "grad_norm": 0.8239704966545105, | |
| "learning_rate": 1.1297071129707113e-06, | |
| "loss": 0.0196, | |
| "step": 213000 | |
| }, | |
| { | |
| "epoch": 56.8, | |
| "eval_cer": 32.5, | |
| "eval_loss": 1.3590368032455444, | |
| "eval_runtime": 81.7135, | |
| "eval_samples_per_second": 1.212, | |
| "eval_steps_per_second": 1.212, | |
| "step": 213000 | |
| }, | |
| { | |
| "epoch": 57.06666666666667, | |
| "grad_norm": 0.026584114879369736, | |
| "learning_rate": 1.087866108786611e-06, | |
| "loss": 0.0191, | |
| "step": 214000 | |
| }, | |
| { | |
| "epoch": 57.06666666666667, | |
| "eval_cer": 31.5, | |
| "eval_loss": 1.3954163789749146, | |
| "eval_runtime": 81.1063, | |
| "eval_samples_per_second": 1.221, | |
| "eval_steps_per_second": 1.221, | |
| "step": 214000 | |
| }, | |
| { | |
| "epoch": 57.333333333333336, | |
| "grad_norm": 9.560175895690918, | |
| "learning_rate": 1.0460251046025104e-06, | |
| "loss": 0.0062, | |
| "step": 215000 | |
| }, | |
| { | |
| "epoch": 57.333333333333336, | |
| "eval_cer": 30.75, | |
| "eval_loss": 1.425083041191101, | |
| "eval_runtime": 81.957, | |
| "eval_samples_per_second": 1.208, | |
| "eval_steps_per_second": 1.208, | |
| "step": 215000 | |
| }, | |
| { | |
| "epoch": 57.6, | |
| "grad_norm": 0.09854646027088165, | |
| "learning_rate": 1.0041841004184101e-06, | |
| "loss": 0.0083, | |
| "step": 216000 | |
| }, | |
| { | |
| "epoch": 57.6, | |
| "eval_cer": 32.0, | |
| "eval_loss": 1.4339793920516968, | |
| "eval_runtime": 81.544, | |
| "eval_samples_per_second": 1.214, | |
| "eval_steps_per_second": 1.214, | |
| "step": 216000 | |
| }, | |
| { | |
| "epoch": 57.86666666666667, | |
| "grad_norm": 1.014176845550537, | |
| "learning_rate": 9.623430962343098e-07, | |
| "loss": 0.0086, | |
| "step": 217000 | |
| }, | |
| { | |
| "epoch": 57.86666666666667, | |
| "eval_cer": 31.5, | |
| "eval_loss": 1.4078923463821411, | |
| "eval_runtime": 81.621, | |
| "eval_samples_per_second": 1.213, | |
| "eval_steps_per_second": 1.213, | |
| "step": 217000 | |
| }, | |
| { | |
| "epoch": 58.13333333333333, | |
| "grad_norm": 0.11759959161281586, | |
| "learning_rate": 9.205020920502093e-07, | |
| "loss": 0.0069, | |
| "step": 218000 | |
| }, | |
| { | |
| "epoch": 58.13333333333333, | |
| "eval_cer": 31.75, | |
| "eval_loss": 1.4461549520492554, | |
| "eval_runtime": 81.585, | |
| "eval_samples_per_second": 1.213, | |
| "eval_steps_per_second": 1.213, | |
| "step": 218000 | |
| }, | |
| { | |
| "epoch": 58.4, | |
| "grad_norm": 0.05193065479397774, | |
| "learning_rate": 8.786610878661088e-07, | |
| "loss": 0.003, | |
| "step": 219000 | |
| }, | |
| { | |
| "epoch": 58.4, | |
| "eval_cer": 29.25, | |
| "eval_loss": 1.4369601011276245, | |
| "eval_runtime": 81.4708, | |
| "eval_samples_per_second": 1.215, | |
| "eval_steps_per_second": 1.215, | |
| "step": 219000 | |
| }, | |
| { | |
| "epoch": 58.666666666666664, | |
| "grad_norm": 0.04452740028500557, | |
| "learning_rate": 8.368200836820084e-07, | |
| "loss": 0.0036, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 58.666666666666664, | |
| "eval_cer": 30.375000000000004, | |
| "eval_loss": 1.4363255500793457, | |
| "eval_runtime": 81.968, | |
| "eval_samples_per_second": 1.208, | |
| "eval_steps_per_second": 1.208, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 58.93333333333333, | |
| "grad_norm": 0.09197155386209488, | |
| "learning_rate": 7.94979079497908e-07, | |
| "loss": 0.0034, | |
| "step": 221000 | |
| }, | |
| { | |
| "epoch": 58.93333333333333, | |
| "eval_cer": 28.625, | |
| "eval_loss": 1.4594810009002686, | |
| "eval_runtime": 81.2508, | |
| "eval_samples_per_second": 1.218, | |
| "eval_steps_per_second": 1.218, | |
| "step": 221000 | |
| }, | |
| { | |
| "epoch": 59.2, | |
| "grad_norm": 0.09820029139518738, | |
| "learning_rate": 7.531380753138076e-07, | |
| "loss": 0.0021, | |
| "step": 222000 | |
| }, | |
| { | |
| "epoch": 59.2, | |
| "eval_cer": 29.125, | |
| "eval_loss": 1.4578285217285156, | |
| "eval_runtime": 82.0546, | |
| "eval_samples_per_second": 1.207, | |
| "eval_steps_per_second": 1.207, | |
| "step": 222000 | |
| }, | |
| { | |
| "epoch": 59.46666666666667, | |
| "grad_norm": 0.0352126844227314, | |
| "learning_rate": 7.112970711297073e-07, | |
| "loss": 0.0017, | |
| "step": 223000 | |
| }, | |
| { | |
| "epoch": 59.46666666666667, | |
| "eval_cer": 28.999999999999996, | |
| "eval_loss": 1.4742412567138672, | |
| "eval_runtime": 81.9935, | |
| "eval_samples_per_second": 1.207, | |
| "eval_steps_per_second": 1.207, | |
| "step": 223000 | |
| }, | |
| { | |
| "epoch": 59.733333333333334, | |
| "grad_norm": 2.8213016986846924, | |
| "learning_rate": 6.694560669456067e-07, | |
| "loss": 0.0016, | |
| "step": 224000 | |
| }, | |
| { | |
| "epoch": 59.733333333333334, | |
| "eval_cer": 29.75, | |
| "eval_loss": 1.46147882938385, | |
| "eval_runtime": 81.9577, | |
| "eval_samples_per_second": 1.208, | |
| "eval_steps_per_second": 1.208, | |
| "step": 224000 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "grad_norm": 0.026645859703421593, | |
| "learning_rate": 6.276150627615063e-07, | |
| "loss": 0.0011, | |
| "step": 225000 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_cer": 28.375, | |
| "eval_loss": 1.4601441621780396, | |
| "eval_runtime": 82.2098, | |
| "eval_samples_per_second": 1.204, | |
| "eval_steps_per_second": 1.204, | |
| "step": 225000 | |
| } | |
| ], | |
| "logging_steps": 1000, | |
| "max_steps": 240000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 64, | |
| "save_steps": 1000, | |
| "total_flos": 9.184321502281728e+20, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |