{ "best_metric": 26.328800988875155, "best_model_checkpoint": "results/whisper-tiny/marathi/checkpoint-12000", "epoch": 14.775016789791806, "eval_steps": 1000, "global_step": 22000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "grad_norm": 74.15543365478516, "learning_rate": 4.4e-07, "loss": 3.7766, "step": 25 }, { "epoch": 0.03, "grad_norm": 39.68334197998047, "learning_rate": 9.200000000000001e-07, "loss": 3.2026, "step": 50 }, { "epoch": 0.05, "grad_norm": 14.481256484985352, "learning_rate": 1.42e-06, "loss": 2.4672, "step": 75 }, { "epoch": 0.07, "grad_norm": 8.309144020080566, "learning_rate": 1.9200000000000003e-06, "loss": 1.9195, "step": 100 }, { "epoch": 0.08, "grad_norm": 6.247703552246094, "learning_rate": 2.42e-06, "loss": 1.5361, "step": 125 }, { "epoch": 0.1, "grad_norm": 5.86753511428833, "learning_rate": 2.92e-06, "loss": 1.2775, "step": 150 }, { "epoch": 0.12, "grad_norm": 5.9364752769470215, "learning_rate": 3.4200000000000007e-06, "loss": 1.0933, "step": 175 }, { "epoch": 0.13, "grad_norm": 5.17349910736084, "learning_rate": 3.920000000000001e-06, "loss": 0.9505, "step": 200 }, { "epoch": 0.15, "grad_norm": 5.331369400024414, "learning_rate": 4.42e-06, "loss": 0.8507, "step": 225 }, { "epoch": 0.17, "grad_norm": 5.466459274291992, "learning_rate": 4.92e-06, "loss": 0.7655, "step": 250 }, { "epoch": 0.18, "grad_norm": 4.921384811401367, "learning_rate": 5.420000000000001e-06, "loss": 0.7153, "step": 275 }, { "epoch": 0.2, "grad_norm": 5.227000713348389, "learning_rate": 5.92e-06, "loss": 0.6886, "step": 300 }, { "epoch": 0.22, "grad_norm": 4.924015045166016, "learning_rate": 6.42e-06, "loss": 0.6324, "step": 325 }, { "epoch": 0.24, "grad_norm": 4.0168986320495605, "learning_rate": 6.92e-06, "loss": 0.6107, "step": 350 }, { "epoch": 0.25, "grad_norm": 4.831826686859131, "learning_rate": 7.420000000000001e-06, "loss": 0.5784, "step": 375 }, { "epoch": 0.27, "grad_norm": 4.7476935386657715, "learning_rate": 7.92e-06, "loss": 0.5509, "step": 400 }, { "epoch": 0.29, "grad_norm": 4.2020978927612305, "learning_rate": 8.42e-06, "loss": 0.5442, "step": 425 }, { "epoch": 0.3, "grad_norm": 4.830783843994141, "learning_rate": 8.920000000000001e-06, "loss": 0.5297, "step": 450 }, { "epoch": 0.32, "grad_norm": 4.747669696807861, "learning_rate": 9.42e-06, "loss": 0.5059, "step": 475 }, { "epoch": 0.34, "grad_norm": 4.504109859466553, "learning_rate": 9.920000000000002e-06, "loss": 0.4927, "step": 500 }, { "epoch": 0.35, "grad_norm": 4.707924842834473, "learning_rate": 9.997889447236182e-06, "loss": 0.4721, "step": 525 }, { "epoch": 0.37, "grad_norm": 4.621720790863037, "learning_rate": 9.995376884422112e-06, "loss": 0.464, "step": 550 }, { "epoch": 0.39, "grad_norm": 4.50490140914917, "learning_rate": 9.992864321608041e-06, "loss": 0.4518, "step": 575 }, { "epoch": 0.4, "grad_norm": 4.592816352844238, "learning_rate": 9.99035175879397e-06, "loss": 0.4335, "step": 600 }, { "epoch": 0.42, "grad_norm": 4.791091442108154, "learning_rate": 9.9878391959799e-06, "loss": 0.4348, "step": 625 }, { "epoch": 0.44, "grad_norm": 4.221704959869385, "learning_rate": 9.98532663316583e-06, "loss": 0.4203, "step": 650 }, { "epoch": 0.45, "grad_norm": 4.549515724182129, "learning_rate": 9.98281407035176e-06, "loss": 0.4086, "step": 675 }, { "epoch": 0.47, "grad_norm": 4.485387802124023, "learning_rate": 9.98030150753769e-06, "loss": 0.405, "step": 700 }, { "epoch": 0.49, "grad_norm": 4.758955001831055, "learning_rate": 9.977788944723619e-06, "loss": 0.4016, "step": 725 }, { "epoch": 0.5, "grad_norm": 4.615067005157471, "learning_rate": 9.975276381909548e-06, "loss": 0.393, "step": 750 }, { "epoch": 0.52, "grad_norm": 4.661777019500732, "learning_rate": 9.972763819095477e-06, "loss": 0.3843, "step": 775 }, { "epoch": 0.54, "grad_norm": 4.5793609619140625, "learning_rate": 9.970251256281408e-06, "loss": 0.3832, "step": 800 }, { "epoch": 0.55, "grad_norm": 5.030839443206787, "learning_rate": 9.967738693467338e-06, "loss": 0.3789, "step": 825 }, { "epoch": 0.57, "grad_norm": 4.351238250732422, "learning_rate": 9.965226130653267e-06, "loss": 0.3576, "step": 850 }, { "epoch": 0.59, "grad_norm": 4.560535907745361, "learning_rate": 9.962713567839198e-06, "loss": 0.3589, "step": 875 }, { "epoch": 0.6, "grad_norm": 4.39430046081543, "learning_rate": 9.960201005025126e-06, "loss": 0.3554, "step": 900 }, { "epoch": 0.62, "grad_norm": 4.813572883605957, "learning_rate": 9.957688442211057e-06, "loss": 0.3561, "step": 925 }, { "epoch": 0.64, "grad_norm": 3.895594358444214, "learning_rate": 9.955175879396986e-06, "loss": 0.3532, "step": 950 }, { "epoch": 0.65, "grad_norm": 4.176882266998291, "learning_rate": 9.952663316582915e-06, "loss": 0.3501, "step": 975 }, { "epoch": 0.67, "grad_norm": 4.483668327331543, "learning_rate": 9.950150753768845e-06, "loss": 0.3485, "step": 1000 }, { "epoch": 0.67, "eval_loss": 0.23381204903125763, "eval_runtime": 566.2474, "eval_samples_per_second": 2.448, "eval_steps_per_second": 2.448, "eval_wer": 47.28059332509271, "step": 1000 }, { "epoch": 0.69, "grad_norm": 4.013958930969238, "learning_rate": 9.947638190954774e-06, "loss": 0.3408, "step": 1025 }, { "epoch": 0.71, "grad_norm": 4.734582424163818, "learning_rate": 9.945125628140703e-06, "loss": 0.3313, "step": 1050 }, { "epoch": 0.72, "grad_norm": 4.5922722816467285, "learning_rate": 9.942613065326634e-06, "loss": 0.3364, "step": 1075 }, { "epoch": 0.74, "grad_norm": 3.997859001159668, "learning_rate": 9.940100502512564e-06, "loss": 0.3283, "step": 1100 }, { "epoch": 0.76, "grad_norm": 4.432836532592773, "learning_rate": 9.937587939698493e-06, "loss": 0.3258, "step": 1125 }, { "epoch": 0.77, "grad_norm": 4.074716091156006, "learning_rate": 9.935075376884424e-06, "loss": 0.3338, "step": 1150 }, { "epoch": 0.79, "grad_norm": 4.509114742279053, "learning_rate": 9.932562814070352e-06, "loss": 0.3121, "step": 1175 }, { "epoch": 0.81, "grad_norm": 4.589898586273193, "learning_rate": 9.930050251256283e-06, "loss": 0.3161, "step": 1200 }, { "epoch": 0.82, "grad_norm": 4.0301079750061035, "learning_rate": 9.927537688442212e-06, "loss": 0.3248, "step": 1225 }, { "epoch": 0.84, "grad_norm": 4.21639347076416, "learning_rate": 9.925025125628141e-06, "loss": 0.3096, "step": 1250 }, { "epoch": 0.86, "grad_norm": 4.40596866607666, "learning_rate": 9.922512562814072e-06, "loss": 0.3136, "step": 1275 }, { "epoch": 0.87, "grad_norm": 4.144809722900391, "learning_rate": 9.920000000000002e-06, "loss": 0.3068, "step": 1300 }, { "epoch": 0.89, "grad_norm": 3.97633695602417, "learning_rate": 9.917487437185931e-06, "loss": 0.3044, "step": 1325 }, { "epoch": 0.91, "grad_norm": 4.247403144836426, "learning_rate": 9.91497487437186e-06, "loss": 0.307, "step": 1350 }, { "epoch": 0.92, "grad_norm": 3.9145348072052, "learning_rate": 9.91246231155779e-06, "loss": 0.3007, "step": 1375 }, { "epoch": 0.94, "grad_norm": 4.151167869567871, "learning_rate": 9.909949748743719e-06, "loss": 0.2931, "step": 1400 }, { "epoch": 0.96, "grad_norm": 4.783816337585449, "learning_rate": 9.90743718592965e-06, "loss": 0.2939, "step": 1425 }, { "epoch": 0.97, "grad_norm": 4.319779872894287, "learning_rate": 9.904924623115578e-06, "loss": 0.294, "step": 1450 }, { "epoch": 0.99, "grad_norm": 4.233304500579834, "learning_rate": 9.902412060301509e-06, "loss": 0.2918, "step": 1475 }, { "epoch": 1.01, "grad_norm": 5.045380592346191, "learning_rate": 9.899899497487438e-06, "loss": 0.2839, "step": 1500 }, { "epoch": 1.02, "grad_norm": 4.171890735626221, "learning_rate": 9.897386934673367e-06, "loss": 0.277, "step": 1525 }, { "epoch": 1.04, "grad_norm": 5.11909818649292, "learning_rate": 9.894874371859298e-06, "loss": 0.2708, "step": 1550 }, { "epoch": 1.06, "grad_norm": 4.329667568206787, "learning_rate": 9.892361809045228e-06, "loss": 0.278, "step": 1575 }, { "epoch": 1.07, "grad_norm": 4.358795166015625, "learning_rate": 9.889849246231157e-06, "loss": 0.2702, "step": 1600 }, { "epoch": 1.09, "grad_norm": 4.983689308166504, "learning_rate": 9.887336683417086e-06, "loss": 0.2594, "step": 1625 }, { "epoch": 1.11, "grad_norm": 4.065433502197266, "learning_rate": 9.884824120603015e-06, "loss": 0.2628, "step": 1650 }, { "epoch": 1.12, "grad_norm": 4.138759136199951, "learning_rate": 9.882311557788945e-06, "loss": 0.2692, "step": 1675 }, { "epoch": 1.14, "grad_norm": 4.43567419052124, "learning_rate": 9.879798994974876e-06, "loss": 0.2688, "step": 1700 }, { "epoch": 1.16, "grad_norm": 4.089324951171875, "learning_rate": 9.877286432160805e-06, "loss": 0.2641, "step": 1725 }, { "epoch": 1.18, "grad_norm": 4.174434661865234, "learning_rate": 9.874773869346734e-06, "loss": 0.2638, "step": 1750 }, { "epoch": 1.19, "grad_norm": 4.324215888977051, "learning_rate": 9.872261306532664e-06, "loss": 0.2621, "step": 1775 }, { "epoch": 1.21, "grad_norm": 4.167600631713867, "learning_rate": 9.869748743718593e-06, "loss": 0.2568, "step": 1800 }, { "epoch": 1.23, "grad_norm": 4.090190410614014, "learning_rate": 9.867236180904524e-06, "loss": 0.2579, "step": 1825 }, { "epoch": 1.24, "grad_norm": 3.862471580505371, "learning_rate": 9.864723618090453e-06, "loss": 0.2549, "step": 1850 }, { "epoch": 1.26, "grad_norm": 3.9046545028686523, "learning_rate": 9.862211055276383e-06, "loss": 0.2512, "step": 1875 }, { "epoch": 1.28, "grad_norm": 3.973026990890503, "learning_rate": 9.859698492462312e-06, "loss": 0.2535, "step": 1900 }, { "epoch": 1.29, "grad_norm": 3.875776529312134, "learning_rate": 9.857185929648241e-06, "loss": 0.2454, "step": 1925 }, { "epoch": 1.31, "grad_norm": 3.815830707550049, "learning_rate": 9.854673366834172e-06, "loss": 0.2509, "step": 1950 }, { "epoch": 1.33, "grad_norm": 3.9826467037200928, "learning_rate": 9.852160804020102e-06, "loss": 0.2469, "step": 1975 }, { "epoch": 1.34, "grad_norm": 4.199316024780273, "learning_rate": 9.849648241206031e-06, "loss": 0.2543, "step": 2000 }, { "epoch": 1.34, "eval_loss": 0.1726374477148056, "eval_runtime": 531.5792, "eval_samples_per_second": 2.607, "eval_steps_per_second": 2.607, "eval_wer": 38.892812996644885, "step": 2000 }, { "epoch": 1.36, "grad_norm": 4.188065052032471, "learning_rate": 9.84713567839196e-06, "loss": 0.253, "step": 2025 }, { "epoch": 1.38, "grad_norm": 4.348769187927246, "learning_rate": 9.84462311557789e-06, "loss": 0.2439, "step": 2050 }, { "epoch": 1.39, "grad_norm": 4.025571823120117, "learning_rate": 9.842110552763819e-06, "loss": 0.2499, "step": 2075 }, { "epoch": 1.41, "grad_norm": 3.441206216812134, "learning_rate": 9.83959798994975e-06, "loss": 0.2419, "step": 2100 }, { "epoch": 1.43, "grad_norm": 4.062358856201172, "learning_rate": 9.83708542713568e-06, "loss": 0.2428, "step": 2125 }, { "epoch": 1.44, "grad_norm": 4.701034069061279, "learning_rate": 9.834572864321609e-06, "loss": 0.2435, "step": 2150 }, { "epoch": 1.46, "grad_norm": 4.011937618255615, "learning_rate": 9.832060301507538e-06, "loss": 0.2429, "step": 2175 }, { "epoch": 1.48, "grad_norm": 3.9073057174682617, "learning_rate": 9.829547738693467e-06, "loss": 0.2371, "step": 2200 }, { "epoch": 1.49, "grad_norm": 4.053809642791748, "learning_rate": 9.827035175879398e-06, "loss": 0.236, "step": 2225 }, { "epoch": 1.51, "grad_norm": 3.983830690383911, "learning_rate": 9.824522613065328e-06, "loss": 0.2393, "step": 2250 }, { "epoch": 1.53, "grad_norm": 4.095301151275635, "learning_rate": 9.822010050251257e-06, "loss": 0.2329, "step": 2275 }, { "epoch": 1.54, "grad_norm": 3.980642318725586, "learning_rate": 9.819497487437186e-06, "loss": 0.24, "step": 2300 }, { "epoch": 1.56, "grad_norm": 4.499876976013184, "learning_rate": 9.816984924623116e-06, "loss": 0.2307, "step": 2325 }, { "epoch": 1.58, "grad_norm": 4.50550651550293, "learning_rate": 9.814472361809047e-06, "loss": 0.2336, "step": 2350 }, { "epoch": 1.6, "grad_norm": 4.186766147613525, "learning_rate": 9.811959798994976e-06, "loss": 0.233, "step": 2375 }, { "epoch": 1.61, "grad_norm": 4.362492084503174, "learning_rate": 9.809447236180905e-06, "loss": 0.2281, "step": 2400 }, { "epoch": 1.63, "grad_norm": 4.026979446411133, "learning_rate": 9.806934673366835e-06, "loss": 0.227, "step": 2425 }, { "epoch": 1.65, "grad_norm": 3.556326389312744, "learning_rate": 9.804422110552764e-06, "loss": 0.2331, "step": 2450 }, { "epoch": 1.66, "grad_norm": 4.109285831451416, "learning_rate": 9.801909547738693e-06, "loss": 0.2369, "step": 2475 }, { "epoch": 1.68, "grad_norm": 4.070573329925537, "learning_rate": 9.799396984924624e-06, "loss": 0.2307, "step": 2500 }, { "epoch": 1.7, "grad_norm": 4.282459735870361, "learning_rate": 9.796884422110554e-06, "loss": 0.2276, "step": 2525 }, { "epoch": 1.71, "grad_norm": 3.989485263824463, "learning_rate": 9.794371859296483e-06, "loss": 0.2325, "step": 2550 }, { "epoch": 1.73, "grad_norm": 4.217010021209717, "learning_rate": 9.791859296482414e-06, "loss": 0.222, "step": 2575 }, { "epoch": 1.75, "grad_norm": 4.2022199630737305, "learning_rate": 9.789346733668342e-06, "loss": 0.2281, "step": 2600 }, { "epoch": 1.76, "grad_norm": 3.7386114597320557, "learning_rate": 9.786834170854273e-06, "loss": 0.2165, "step": 2625 }, { "epoch": 1.78, "grad_norm": 4.048258304595947, "learning_rate": 9.784321608040202e-06, "loss": 0.2218, "step": 2650 }, { "epoch": 1.8, "grad_norm": 4.0867133140563965, "learning_rate": 9.781809045226131e-06, "loss": 0.2197, "step": 2675 }, { "epoch": 1.81, "grad_norm": 3.8676252365112305, "learning_rate": 9.77929648241206e-06, "loss": 0.2261, "step": 2700 }, { "epoch": 1.83, "grad_norm": 3.8840291500091553, "learning_rate": 9.77678391959799e-06, "loss": 0.2188, "step": 2725 }, { "epoch": 1.85, "grad_norm": 4.130185127258301, "learning_rate": 9.774271356783921e-06, "loss": 0.2176, "step": 2750 }, { "epoch": 1.86, "grad_norm": 3.8641357421875, "learning_rate": 9.77175879396985e-06, "loss": 0.2165, "step": 2775 }, { "epoch": 1.88, "grad_norm": 3.8261783123016357, "learning_rate": 9.76924623115578e-06, "loss": 0.2149, "step": 2800 }, { "epoch": 1.9, "grad_norm": 3.861722946166992, "learning_rate": 9.766733668341709e-06, "loss": 0.2122, "step": 2825 }, { "epoch": 1.91, "grad_norm": 4.013296127319336, "learning_rate": 9.76422110552764e-06, "loss": 0.2183, "step": 2850 }, { "epoch": 1.93, "grad_norm": 3.78545880317688, "learning_rate": 9.761708542713568e-06, "loss": 0.2151, "step": 2875 }, { "epoch": 1.95, "grad_norm": 4.247804641723633, "learning_rate": 9.759195979899499e-06, "loss": 0.2213, "step": 2900 }, { "epoch": 1.96, "grad_norm": 4.548637390136719, "learning_rate": 9.756683417085428e-06, "loss": 0.2198, "step": 2925 }, { "epoch": 1.98, "grad_norm": 3.617631435394287, "learning_rate": 9.754170854271357e-06, "loss": 0.2103, "step": 2950 }, { "epoch": 2.0, "grad_norm": 3.9520044326782227, "learning_rate": 9.751658291457288e-06, "loss": 0.2053, "step": 2975 }, { "epoch": 2.01, "grad_norm": 3.903465747833252, "learning_rate": 9.749145728643216e-06, "loss": 0.1961, "step": 3000 }, { "epoch": 2.01, "eval_loss": 0.1471080482006073, "eval_runtime": 533.9926, "eval_samples_per_second": 2.596, "eval_steps_per_second": 2.596, "eval_wer": 33.365707222320324, "step": 3000 }, { "epoch": 2.03, "grad_norm": 3.3298327922821045, "learning_rate": 9.746633165829147e-06, "loss": 0.1968, "step": 3025 }, { "epoch": 2.05, "grad_norm": 3.907670259475708, "learning_rate": 9.744120603015076e-06, "loss": 0.1938, "step": 3050 }, { "epoch": 2.07, "grad_norm": 3.819309711456299, "learning_rate": 9.741608040201006e-06, "loss": 0.1879, "step": 3075 }, { "epoch": 2.08, "grad_norm": 4.644184112548828, "learning_rate": 9.739095477386935e-06, "loss": 0.1933, "step": 3100 }, { "epoch": 2.1, "grad_norm": 3.5478782653808594, "learning_rate": 9.736582914572866e-06, "loss": 0.19, "step": 3125 }, { "epoch": 2.12, "grad_norm": 3.4926066398620605, "learning_rate": 9.734070351758794e-06, "loss": 0.1929, "step": 3150 }, { "epoch": 2.13, "grad_norm": 3.6318588256835938, "learning_rate": 9.731557788944725e-06, "loss": 0.1921, "step": 3175 }, { "epoch": 2.15, "grad_norm": 4.020270824432373, "learning_rate": 9.729045226130654e-06, "loss": 0.19, "step": 3200 }, { "epoch": 2.17, "grad_norm": 3.391878128051758, "learning_rate": 9.726532663316583e-06, "loss": 0.1912, "step": 3225 }, { "epoch": 2.18, "grad_norm": 3.8649306297302246, "learning_rate": 9.724020100502514e-06, "loss": 0.1965, "step": 3250 }, { "epoch": 2.2, "grad_norm": 3.8927695751190186, "learning_rate": 9.721507537688444e-06, "loss": 0.1901, "step": 3275 }, { "epoch": 2.22, "grad_norm": 3.7473957538604736, "learning_rate": 9.718994974874373e-06, "loss": 0.1932, "step": 3300 }, { "epoch": 2.23, "grad_norm": 3.2613677978515625, "learning_rate": 9.716482412060302e-06, "loss": 0.1925, "step": 3325 }, { "epoch": 2.25, "grad_norm": 4.175868988037109, "learning_rate": 9.713969849246232e-06, "loss": 0.1898, "step": 3350 }, { "epoch": 2.27, "grad_norm": 4.236743450164795, "learning_rate": 9.711457286432163e-06, "loss": 0.1899, "step": 3375 }, { "epoch": 2.28, "grad_norm": 4.136856555938721, "learning_rate": 9.708944723618092e-06, "loss": 0.1912, "step": 3400 }, { "epoch": 2.3, "grad_norm": 3.826167345046997, "learning_rate": 9.706432160804021e-06, "loss": 0.1909, "step": 3425 }, { "epoch": 2.32, "grad_norm": 3.949150323867798, "learning_rate": 9.70391959798995e-06, "loss": 0.1889, "step": 3450 }, { "epoch": 2.33, "grad_norm": 4.023538589477539, "learning_rate": 9.70140703517588e-06, "loss": 0.1903, "step": 3475 }, { "epoch": 2.35, "grad_norm": 3.7844576835632324, "learning_rate": 9.698894472361809e-06, "loss": 0.1928, "step": 3500 }, { "epoch": 2.37, "grad_norm": 3.364312171936035, "learning_rate": 9.69638190954774e-06, "loss": 0.193, "step": 3525 }, { "epoch": 2.38, "grad_norm": 3.4202849864959717, "learning_rate": 9.69386934673367e-06, "loss": 0.186, "step": 3550 }, { "epoch": 2.4, "grad_norm": 3.6285476684570312, "learning_rate": 9.691356783919599e-06, "loss": 0.1863, "step": 3575 }, { "epoch": 2.42, "grad_norm": 4.26074743270874, "learning_rate": 9.688844221105528e-06, "loss": 0.1831, "step": 3600 }, { "epoch": 2.43, "grad_norm": 3.6059014797210693, "learning_rate": 9.686331658291457e-06, "loss": 0.182, "step": 3625 }, { "epoch": 2.45, "grad_norm": 3.773573637008667, "learning_rate": 9.683819095477388e-06, "loss": 0.1824, "step": 3650 }, { "epoch": 2.47, "grad_norm": 4.112974643707275, "learning_rate": 9.681306532663318e-06, "loss": 0.1828, "step": 3675 }, { "epoch": 2.48, "grad_norm": 3.467885732650757, "learning_rate": 9.678793969849247e-06, "loss": 0.1819, "step": 3700 }, { "epoch": 2.5, "grad_norm": 3.418673038482666, "learning_rate": 9.676281407035176e-06, "loss": 0.1831, "step": 3725 }, { "epoch": 2.52, "grad_norm": 4.263250350952148, "learning_rate": 9.673768844221106e-06, "loss": 0.186, "step": 3750 }, { "epoch": 2.54, "grad_norm": 3.5524044036865234, "learning_rate": 9.671256281407035e-06, "loss": 0.1889, "step": 3775 }, { "epoch": 2.55, "grad_norm": 3.693559408187866, "learning_rate": 9.668743718592966e-06, "loss": 0.1842, "step": 3800 }, { "epoch": 2.57, "grad_norm": 3.682617425918579, "learning_rate": 9.666231155778895e-06, "loss": 0.1826, "step": 3825 }, { "epoch": 2.59, "grad_norm": 3.4766149520874023, "learning_rate": 9.663718592964825e-06, "loss": 0.1766, "step": 3850 }, { "epoch": 2.6, "grad_norm": 3.3245768547058105, "learning_rate": 9.661206030150754e-06, "loss": 0.1816, "step": 3875 }, { "epoch": 2.62, "grad_norm": 4.028345584869385, "learning_rate": 9.658693467336683e-06, "loss": 0.1803, "step": 3900 }, { "epoch": 2.64, "grad_norm": 3.665334463119507, "learning_rate": 9.656180904522614e-06, "loss": 0.178, "step": 3925 }, { "epoch": 2.65, "grad_norm": 4.212314128875732, "learning_rate": 9.653668341708544e-06, "loss": 0.1818, "step": 3950 }, { "epoch": 2.67, "grad_norm": 4.093043804168701, "learning_rate": 9.651155778894473e-06, "loss": 0.1784, "step": 3975 }, { "epoch": 2.69, "grad_norm": 3.8363521099090576, "learning_rate": 9.648643216080404e-06, "loss": 0.1786, "step": 4000 }, { "epoch": 2.69, "eval_loss": 0.1332446187734604, "eval_runtime": 541.0077, "eval_samples_per_second": 2.562, "eval_steps_per_second": 2.562, "eval_wer": 30.64630054741303, "step": 4000 }, { "epoch": 2.7, "grad_norm": 3.6020116806030273, "learning_rate": 9.646130653266332e-06, "loss": 0.1755, "step": 4025 }, { "epoch": 2.72, "grad_norm": 3.253662586212158, "learning_rate": 9.643618090452263e-06, "loss": 0.1767, "step": 4050 }, { "epoch": 2.74, "grad_norm": 3.3012306690216064, "learning_rate": 9.641105527638192e-06, "loss": 0.1813, "step": 4075 }, { "epoch": 2.75, "grad_norm": 3.7597391605377197, "learning_rate": 9.638592964824121e-06, "loss": 0.1806, "step": 4100 }, { "epoch": 2.77, "grad_norm": 3.914498805999756, "learning_rate": 9.63608040201005e-06, "loss": 0.1781, "step": 4125 }, { "epoch": 2.79, "grad_norm": 3.5466084480285645, "learning_rate": 9.63356783919598e-06, "loss": 0.1784, "step": 4150 }, { "epoch": 2.8, "grad_norm": 3.5035176277160645, "learning_rate": 9.63105527638191e-06, "loss": 0.1693, "step": 4175 }, { "epoch": 2.82, "grad_norm": 3.6211013793945312, "learning_rate": 9.62854271356784e-06, "loss": 0.1831, "step": 4200 }, { "epoch": 2.84, "grad_norm": 3.355555772781372, "learning_rate": 9.62603015075377e-06, "loss": 0.1737, "step": 4225 }, { "epoch": 2.85, "grad_norm": 4.14884614944458, "learning_rate": 9.623517587939699e-06, "loss": 0.1773, "step": 4250 }, { "epoch": 2.87, "grad_norm": 3.896099805831909, "learning_rate": 9.62100502512563e-06, "loss": 0.1712, "step": 4275 }, { "epoch": 2.89, "grad_norm": 3.69228196144104, "learning_rate": 9.618492462311558e-06, "loss": 0.1747, "step": 4300 }, { "epoch": 2.9, "grad_norm": 3.4385323524475098, "learning_rate": 9.615979899497489e-06, "loss": 0.1806, "step": 4325 }, { "epoch": 2.92, "grad_norm": 4.37261438369751, "learning_rate": 9.613467336683418e-06, "loss": 0.1705, "step": 4350 }, { "epoch": 2.94, "grad_norm": 3.549129009246826, "learning_rate": 9.610954773869347e-06, "loss": 0.1683, "step": 4375 }, { "epoch": 2.96, "grad_norm": 3.8860154151916504, "learning_rate": 9.608442211055277e-06, "loss": 0.1732, "step": 4400 }, { "epoch": 2.97, "grad_norm": 3.8280348777770996, "learning_rate": 9.605929648241206e-06, "loss": 0.1739, "step": 4425 }, { "epoch": 2.99, "grad_norm": 3.9021239280700684, "learning_rate": 9.603417085427137e-06, "loss": 0.1744, "step": 4450 }, { "epoch": 3.01, "grad_norm": 3.436377763748169, "learning_rate": 9.600904522613066e-06, "loss": 0.1674, "step": 4475 }, { "epoch": 3.02, "grad_norm": 3.5898520946502686, "learning_rate": 9.598391959798996e-06, "loss": 0.1546, "step": 4500 }, { "epoch": 3.04, "grad_norm": 3.295307159423828, "learning_rate": 9.595879396984925e-06, "loss": 0.1543, "step": 4525 }, { "epoch": 3.06, "grad_norm": 3.3402857780456543, "learning_rate": 9.593366834170856e-06, "loss": 0.1585, "step": 4550 }, { "epoch": 3.07, "grad_norm": 3.4992740154266357, "learning_rate": 9.590854271356784e-06, "loss": 0.1584, "step": 4575 }, { "epoch": 3.09, "grad_norm": 3.301234245300293, "learning_rate": 9.588341708542715e-06, "loss": 0.1556, "step": 4600 }, { "epoch": 3.11, "grad_norm": 3.2298898696899414, "learning_rate": 9.585829145728644e-06, "loss": 0.1557, "step": 4625 }, { "epoch": 3.12, "grad_norm": 3.81208872795105, "learning_rate": 9.583316582914573e-06, "loss": 0.1488, "step": 4650 }, { "epoch": 3.14, "grad_norm": 3.7610058784484863, "learning_rate": 9.580804020100504e-06, "loss": 0.1537, "step": 4675 }, { "epoch": 3.16, "grad_norm": 3.394169569015503, "learning_rate": 9.578291457286432e-06, "loss": 0.1546, "step": 4700 }, { "epoch": 3.17, "grad_norm": 3.5936498641967773, "learning_rate": 9.575778894472363e-06, "loss": 0.1544, "step": 4725 }, { "epoch": 3.19, "grad_norm": 3.714808464050293, "learning_rate": 9.573266331658292e-06, "loss": 0.1554, "step": 4750 }, { "epoch": 3.21, "grad_norm": 3.731008768081665, "learning_rate": 9.570753768844222e-06, "loss": 0.157, "step": 4775 }, { "epoch": 3.22, "grad_norm": 3.4987032413482666, "learning_rate": 9.568241206030151e-06, "loss": 0.1506, "step": 4800 }, { "epoch": 3.24, "grad_norm": 3.487567186355591, "learning_rate": 9.565728643216082e-06, "loss": 0.1553, "step": 4825 }, { "epoch": 3.26, "grad_norm": 3.537971258163452, "learning_rate": 9.563216080402011e-06, "loss": 0.1518, "step": 4850 }, { "epoch": 3.27, "grad_norm": 3.5595390796661377, "learning_rate": 9.56070351758794e-06, "loss": 0.1519, "step": 4875 }, { "epoch": 3.29, "grad_norm": 3.397580146789551, "learning_rate": 9.55819095477387e-06, "loss": 0.1516, "step": 4900 }, { "epoch": 3.31, "grad_norm": 3.758497714996338, "learning_rate": 9.5556783919598e-06, "loss": 0.1562, "step": 4925 }, { "epoch": 3.32, "grad_norm": 3.1611812114715576, "learning_rate": 9.55316582914573e-06, "loss": 0.1522, "step": 4950 }, { "epoch": 3.34, "grad_norm": 3.4654600620269775, "learning_rate": 9.550653266331658e-06, "loss": 0.1544, "step": 4975 }, { "epoch": 3.36, "grad_norm": 3.0207717418670654, "learning_rate": 9.548140703517589e-06, "loss": 0.1574, "step": 5000 }, { "epoch": 3.36, "eval_loss": 0.1250012367963791, "eval_runtime": 532.3886, "eval_samples_per_second": 2.603, "eval_steps_per_second": 2.603, "eval_wer": 28.50962387427159, "step": 5000 }, { "epoch": 3.37, "grad_norm": 3.4317171573638916, "learning_rate": 9.545628140703518e-06, "loss": 0.1563, "step": 5025 }, { "epoch": 3.39, "grad_norm": 3.7062742710113525, "learning_rate": 9.543115577889448e-06, "loss": 0.1559, "step": 5050 }, { "epoch": 3.41, "grad_norm": 3.580521821975708, "learning_rate": 9.540603015075379e-06, "loss": 0.1521, "step": 5075 }, { "epoch": 3.43, "grad_norm": 3.364760160446167, "learning_rate": 9.538090452261308e-06, "loss": 0.1637, "step": 5100 }, { "epoch": 3.44, "grad_norm": 3.812782049179077, "learning_rate": 9.535577889447237e-06, "loss": 0.1564, "step": 5125 }, { "epoch": 3.46, "grad_norm": 3.065197229385376, "learning_rate": 9.533065326633166e-06, "loss": 0.1484, "step": 5150 }, { "epoch": 3.48, "grad_norm": 3.456214427947998, "learning_rate": 9.530552763819096e-06, "loss": 0.1517, "step": 5175 }, { "epoch": 3.49, "grad_norm": 3.731849193572998, "learning_rate": 9.528040201005025e-06, "loss": 0.1499, "step": 5200 }, { "epoch": 3.51, "grad_norm": 4.133150577545166, "learning_rate": 9.525527638190956e-06, "loss": 0.1475, "step": 5225 }, { "epoch": 3.53, "grad_norm": 3.353069543838501, "learning_rate": 9.523015075376885e-06, "loss": 0.1479, "step": 5250 }, { "epoch": 3.54, "grad_norm": 3.885782480239868, "learning_rate": 9.520502512562815e-06, "loss": 0.1458, "step": 5275 }, { "epoch": 3.56, "grad_norm": 3.5367889404296875, "learning_rate": 9.517989949748744e-06, "loss": 0.1531, "step": 5300 }, { "epoch": 3.58, "grad_norm": 3.8021907806396484, "learning_rate": 9.515477386934673e-06, "loss": 0.1498, "step": 5325 }, { "epoch": 3.59, "grad_norm": 3.308176279067993, "learning_rate": 9.512964824120604e-06, "loss": 0.1519, "step": 5350 }, { "epoch": 3.61, "grad_norm": 3.668410539627075, "learning_rate": 9.510452261306534e-06, "loss": 0.1526, "step": 5375 }, { "epoch": 3.63, "grad_norm": 3.228257417678833, "learning_rate": 9.507939698492463e-06, "loss": 0.1509, "step": 5400 }, { "epoch": 3.64, "grad_norm": 3.433962345123291, "learning_rate": 9.505427135678392e-06, "loss": 0.1521, "step": 5425 }, { "epoch": 3.66, "grad_norm": 3.707969903945923, "learning_rate": 9.502914572864322e-06, "loss": 0.1439, "step": 5450 }, { "epoch": 3.68, "grad_norm": 3.447314739227295, "learning_rate": 9.500402010050253e-06, "loss": 0.1473, "step": 5475 }, { "epoch": 3.69, "grad_norm": 3.579751968383789, "learning_rate": 9.497889447236182e-06, "loss": 0.1507, "step": 5500 }, { "epoch": 3.71, "grad_norm": 3.470454454421997, "learning_rate": 9.495376884422111e-06, "loss": 0.1473, "step": 5525 }, { "epoch": 3.73, "grad_norm": 3.2754967212677, "learning_rate": 9.49286432160804e-06, "loss": 0.1458, "step": 5550 }, { "epoch": 3.74, "grad_norm": 3.724622964859009, "learning_rate": 9.49035175879397e-06, "loss": 0.1474, "step": 5575 }, { "epoch": 3.76, "grad_norm": 3.176765203475952, "learning_rate": 9.4878391959799e-06, "loss": 0.1484, "step": 5600 }, { "epoch": 3.78, "grad_norm": 3.3496909141540527, "learning_rate": 9.48532663316583e-06, "loss": 0.1485, "step": 5625 }, { "epoch": 3.79, "grad_norm": 3.3814542293548584, "learning_rate": 9.48281407035176e-06, "loss": 0.1491, "step": 5650 }, { "epoch": 3.81, "grad_norm": 3.9236228466033936, "learning_rate": 9.480301507537689e-06, "loss": 0.1522, "step": 5675 }, { "epoch": 3.83, "grad_norm": 3.3441123962402344, "learning_rate": 9.47778894472362e-06, "loss": 0.1442, "step": 5700 }, { "epoch": 3.84, "grad_norm": 3.3952231407165527, "learning_rate": 9.475276381909548e-06, "loss": 0.1487, "step": 5725 }, { "epoch": 3.86, "grad_norm": 3.1410765647888184, "learning_rate": 9.472763819095479e-06, "loss": 0.1522, "step": 5750 }, { "epoch": 3.88, "grad_norm": 3.267335891723633, "learning_rate": 9.470251256281408e-06, "loss": 0.1425, "step": 5775 }, { "epoch": 3.9, "grad_norm": 3.547773838043213, "learning_rate": 9.467738693467337e-06, "loss": 0.1416, "step": 5800 }, { "epoch": 3.91, "grad_norm": 3.4462673664093018, "learning_rate": 9.465226130653267e-06, "loss": 0.1453, "step": 5825 }, { "epoch": 3.93, "grad_norm": 3.4584672451019287, "learning_rate": 9.462713567839196e-06, "loss": 0.1445, "step": 5850 }, { "epoch": 3.95, "grad_norm": 3.501403331756592, "learning_rate": 9.460201005025127e-06, "loss": 0.1423, "step": 5875 }, { "epoch": 3.96, "grad_norm": 3.912052631378174, "learning_rate": 9.457688442211056e-06, "loss": 0.1481, "step": 5900 }, { "epoch": 3.98, "grad_norm": 3.257798433303833, "learning_rate": 9.455175879396986e-06, "loss": 0.1385, "step": 5925 }, { "epoch": 4.0, "grad_norm": 3.7633163928985596, "learning_rate": 9.452663316582915e-06, "loss": 0.1444, "step": 5950 }, { "epoch": 4.01, "grad_norm": 3.1884329319000244, "learning_rate": 9.450150753768846e-06, "loss": 0.1314, "step": 5975 }, { "epoch": 4.03, "grad_norm": 3.5105104446411133, "learning_rate": 9.447638190954774e-06, "loss": 0.1284, "step": 6000 }, { "epoch": 4.03, "eval_loss": 0.11968862265348434, "eval_runtime": 533.7582, "eval_samples_per_second": 2.597, "eval_steps_per_second": 2.597, "eval_wer": 27.644358114073814, "step": 6000 }, { "epoch": 4.05, "grad_norm": 3.4444563388824463, "learning_rate": 9.445125628140705e-06, "loss": 0.1278, "step": 6025 }, { "epoch": 4.06, "grad_norm": 3.446941375732422, "learning_rate": 9.442613065326634e-06, "loss": 0.1316, "step": 6050 }, { "epoch": 4.08, "grad_norm": 3.251770496368408, "learning_rate": 9.440100502512563e-06, "loss": 0.1289, "step": 6075 }, { "epoch": 4.1, "grad_norm": 3.1929450035095215, "learning_rate": 9.437587939698494e-06, "loss": 0.1257, "step": 6100 }, { "epoch": 4.11, "grad_norm": 3.137993097305298, "learning_rate": 9.435075376884422e-06, "loss": 0.1259, "step": 6125 }, { "epoch": 4.13, "grad_norm": 3.5924248695373535, "learning_rate": 9.432562814070353e-06, "loss": 0.1246, "step": 6150 }, { "epoch": 4.15, "grad_norm": 3.7657840251922607, "learning_rate": 9.430050251256282e-06, "loss": 0.1263, "step": 6175 }, { "epoch": 4.16, "grad_norm": 3.8803839683532715, "learning_rate": 9.427537688442212e-06, "loss": 0.1278, "step": 6200 }, { "epoch": 4.18, "grad_norm": 3.049147844314575, "learning_rate": 9.425025125628141e-06, "loss": 0.1248, "step": 6225 }, { "epoch": 4.2, "grad_norm": 3.5847809314727783, "learning_rate": 9.422512562814072e-06, "loss": 0.1326, "step": 6250 }, { "epoch": 4.21, "grad_norm": 3.208193063735962, "learning_rate": 9.42e-06, "loss": 0.1278, "step": 6275 }, { "epoch": 4.23, "grad_norm": 3.787940740585327, "learning_rate": 9.41748743718593e-06, "loss": 0.1286, "step": 6300 }, { "epoch": 4.25, "grad_norm": 2.801053762435913, "learning_rate": 9.41497487437186e-06, "loss": 0.1309, "step": 6325 }, { "epoch": 4.26, "grad_norm": 3.1014838218688965, "learning_rate": 9.41246231155779e-06, "loss": 0.1265, "step": 6350 }, { "epoch": 4.28, "grad_norm": 3.3319690227508545, "learning_rate": 9.40994974874372e-06, "loss": 0.1273, "step": 6375 }, { "epoch": 4.3, "grad_norm": 3.366464376449585, "learning_rate": 9.407437185929648e-06, "loss": 0.1266, "step": 6400 }, { "epoch": 4.31, "grad_norm": 3.5356907844543457, "learning_rate": 9.404924623115579e-06, "loss": 0.13, "step": 6425 }, { "epoch": 4.33, "grad_norm": 3.325680732727051, "learning_rate": 9.402412060301508e-06, "loss": 0.1262, "step": 6450 }, { "epoch": 4.35, "grad_norm": 3.4266843795776367, "learning_rate": 9.399899497487438e-06, "loss": 0.1284, "step": 6475 }, { "epoch": 4.37, "grad_norm": 3.1395492553710938, "learning_rate": 9.397386934673369e-06, "loss": 0.1276, "step": 6500 }, { "epoch": 4.38, "grad_norm": 3.323065757751465, "learning_rate": 9.394874371859298e-06, "loss": 0.1246, "step": 6525 }, { "epoch": 4.4, "grad_norm": 3.3577070236206055, "learning_rate": 9.392361809045227e-06, "loss": 0.1326, "step": 6550 }, { "epoch": 4.42, "grad_norm": 3.4483211040496826, "learning_rate": 9.389849246231157e-06, "loss": 0.1287, "step": 6575 }, { "epoch": 4.43, "grad_norm": 3.939202308654785, "learning_rate": 9.387336683417086e-06, "loss": 0.1295, "step": 6600 }, { "epoch": 4.45, "grad_norm": 3.5882346630096436, "learning_rate": 9.384824120603015e-06, "loss": 0.1257, "step": 6625 }, { "epoch": 4.47, "grad_norm": 3.9268131256103516, "learning_rate": 9.382311557788946e-06, "loss": 0.1308, "step": 6650 }, { "epoch": 4.48, "grad_norm": 3.2181479930877686, "learning_rate": 9.379798994974874e-06, "loss": 0.1289, "step": 6675 }, { "epoch": 4.5, "grad_norm": 3.4467923641204834, "learning_rate": 9.377286432160805e-06, "loss": 0.1286, "step": 6700 }, { "epoch": 4.52, "grad_norm": 3.227398157119751, "learning_rate": 9.374773869346734e-06, "loss": 0.1279, "step": 6725 }, { "epoch": 4.53, "grad_norm": 3.5086820125579834, "learning_rate": 9.372261306532664e-06, "loss": 0.1295, "step": 6750 }, { "epoch": 4.55, "grad_norm": 3.0712101459503174, "learning_rate": 9.369748743718595e-06, "loss": 0.1285, "step": 6775 }, { "epoch": 4.57, "grad_norm": 3.3961784839630127, "learning_rate": 9.367236180904524e-06, "loss": 0.1311, "step": 6800 }, { "epoch": 4.58, "grad_norm": 3.6800429821014404, "learning_rate": 9.364723618090453e-06, "loss": 0.1284, "step": 6825 }, { "epoch": 4.6, "grad_norm": 3.6793227195739746, "learning_rate": 9.362211055276383e-06, "loss": 0.1311, "step": 6850 }, { "epoch": 4.62, "grad_norm": 3.1020681858062744, "learning_rate": 9.359698492462312e-06, "loss": 0.1287, "step": 6875 }, { "epoch": 4.63, "grad_norm": 3.738802909851074, "learning_rate": 9.357185929648241e-06, "loss": 0.1241, "step": 6900 }, { "epoch": 4.65, "grad_norm": 3.344667911529541, "learning_rate": 9.354673366834172e-06, "loss": 0.1247, "step": 6925 }, { "epoch": 4.67, "grad_norm": 3.011655330657959, "learning_rate": 9.352160804020101e-06, "loss": 0.1237, "step": 6950 }, { "epoch": 4.68, "grad_norm": 3.486971139907837, "learning_rate": 9.34964824120603e-06, "loss": 0.1275, "step": 6975 }, { "epoch": 4.7, "grad_norm": 3.265568971633911, "learning_rate": 9.34713567839196e-06, "loss": 0.1216, "step": 7000 }, { "epoch": 4.7, "eval_loss": 0.11660390347242355, "eval_runtime": 534.0823, "eval_samples_per_second": 2.595, "eval_steps_per_second": 2.595, "eval_wer": 26.823238566131025, "step": 7000 }, { "epoch": 4.72, "grad_norm": 3.0894901752471924, "learning_rate": 9.34462311557789e-06, "loss": 0.1258, "step": 7025 }, { "epoch": 4.73, "grad_norm": 3.5530054569244385, "learning_rate": 9.34211055276382e-06, "loss": 0.1294, "step": 7050 }, { "epoch": 4.75, "grad_norm": 3.127763271331787, "learning_rate": 9.33959798994975e-06, "loss": 0.129, "step": 7075 }, { "epoch": 4.77, "grad_norm": 3.453204393386841, "learning_rate": 9.337085427135679e-06, "loss": 0.1264, "step": 7100 }, { "epoch": 4.79, "grad_norm": 3.470991611480713, "learning_rate": 9.334572864321608e-06, "loss": 0.1272, "step": 7125 }, { "epoch": 4.8, "grad_norm": 3.498213768005371, "learning_rate": 9.332060301507538e-06, "loss": 0.1257, "step": 7150 }, { "epoch": 4.82, "grad_norm": 3.052225351333618, "learning_rate": 9.329547738693469e-06, "loss": 0.1242, "step": 7175 }, { "epoch": 4.84, "grad_norm": 2.9512875080108643, "learning_rate": 9.327035175879398e-06, "loss": 0.1226, "step": 7200 }, { "epoch": 4.85, "grad_norm": 3.124257802963257, "learning_rate": 9.324522613065327e-06, "loss": 0.1276, "step": 7225 }, { "epoch": 4.87, "grad_norm": 3.763948678970337, "learning_rate": 9.322010050251257e-06, "loss": 0.123, "step": 7250 }, { "epoch": 4.89, "grad_norm": 3.859360694885254, "learning_rate": 9.319497487437186e-06, "loss": 0.1288, "step": 7275 }, { "epoch": 4.9, "grad_norm": 3.406261682510376, "learning_rate": 9.316984924623115e-06, "loss": 0.1268, "step": 7300 }, { "epoch": 4.92, "grad_norm": 3.5981762409210205, "learning_rate": 9.314472361809046e-06, "loss": 0.1267, "step": 7325 }, { "epoch": 4.94, "grad_norm": 3.2677414417266846, "learning_rate": 9.311959798994976e-06, "loss": 0.1228, "step": 7350 }, { "epoch": 4.95, "grad_norm": 3.4176025390625, "learning_rate": 9.309447236180905e-06, "loss": 0.1292, "step": 7375 }, { "epoch": 4.97, "grad_norm": 3.702085018157959, "learning_rate": 9.306934673366836e-06, "loss": 0.1212, "step": 7400 }, { "epoch": 4.99, "grad_norm": 3.075143337249756, "learning_rate": 9.304422110552764e-06, "loss": 0.1208, "step": 7425 }, { "epoch": 5.0, "grad_norm": 2.96437406539917, "learning_rate": 9.301909547738695e-06, "loss": 0.1223, "step": 7450 }, { "epoch": 5.02, "grad_norm": 3.359867572784424, "learning_rate": 9.299396984924624e-06, "loss": 0.1083, "step": 7475 }, { "epoch": 5.04, "grad_norm": 3.1340601444244385, "learning_rate": 9.296884422110553e-06, "loss": 0.1088, "step": 7500 }, { "epoch": 5.05, "grad_norm": 3.4933323860168457, "learning_rate": 9.294371859296483e-06, "loss": 0.1103, "step": 7525 }, { "epoch": 5.07, "grad_norm": 2.8419055938720703, "learning_rate": 9.291859296482412e-06, "loss": 0.1074, "step": 7550 }, { "epoch": 5.09, "grad_norm": 2.699908971786499, "learning_rate": 9.289346733668343e-06, "loss": 0.1074, "step": 7575 }, { "epoch": 5.1, "grad_norm": 3.4752280712127686, "learning_rate": 9.286834170854272e-06, "loss": 0.1074, "step": 7600 }, { "epoch": 5.12, "grad_norm": 3.5037472248077393, "learning_rate": 9.284321608040202e-06, "loss": 0.1114, "step": 7625 }, { "epoch": 5.14, "grad_norm": 3.3195717334747314, "learning_rate": 9.281809045226131e-06, "loss": 0.1111, "step": 7650 }, { "epoch": 5.15, "grad_norm": 3.210256338119507, "learning_rate": 9.279296482412062e-06, "loss": 0.1095, "step": 7675 }, { "epoch": 5.17, "grad_norm": 3.4619410037994385, "learning_rate": 9.27678391959799e-06, "loss": 0.1118, "step": 7700 }, { "epoch": 5.19, "grad_norm": 3.2132604122161865, "learning_rate": 9.27427135678392e-06, "loss": 0.1065, "step": 7725 }, { "epoch": 5.2, "grad_norm": 3.379657030105591, "learning_rate": 9.27175879396985e-06, "loss": 0.1121, "step": 7750 }, { "epoch": 5.22, "grad_norm": 3.6748008728027344, "learning_rate": 9.26924623115578e-06, "loss": 0.1062, "step": 7775 }, { "epoch": 5.24, "grad_norm": 3.063694715499878, "learning_rate": 9.26673366834171e-06, "loss": 0.1077, "step": 7800 }, { "epoch": 5.26, "grad_norm": 3.237032413482666, "learning_rate": 9.264221105527638e-06, "loss": 0.1079, "step": 7825 }, { "epoch": 5.27, "grad_norm": 3.2364072799682617, "learning_rate": 9.261708542713569e-06, "loss": 0.1075, "step": 7850 }, { "epoch": 5.29, "grad_norm": 3.2186496257781982, "learning_rate": 9.259195979899498e-06, "loss": 0.107, "step": 7875 }, { "epoch": 5.31, "grad_norm": 3.249338150024414, "learning_rate": 9.256683417085428e-06, "loss": 0.1101, "step": 7900 }, { "epoch": 5.32, "grad_norm": 2.9037253856658936, "learning_rate": 9.254170854271357e-06, "loss": 0.1049, "step": 7925 }, { "epoch": 5.34, "grad_norm": 3.467984914779663, "learning_rate": 9.251658291457288e-06, "loss": 0.1164, "step": 7950 }, { "epoch": 5.36, "grad_norm": 3.047340154647827, "learning_rate": 9.249145728643217e-06, "loss": 0.1079, "step": 7975 }, { "epoch": 5.37, "grad_norm": 3.2782435417175293, "learning_rate": 9.246633165829147e-06, "loss": 0.1063, "step": 8000 }, { "epoch": 5.37, "eval_loss": 0.11793605983257294, "eval_runtime": 535.0689, "eval_samples_per_second": 2.59, "eval_steps_per_second": 2.59, "eval_wer": 27.19406674907293, "step": 8000 }, { "epoch": 5.39, "grad_norm": 3.045055389404297, "learning_rate": 9.244120603015076e-06, "loss": 0.1068, "step": 8025 }, { "epoch": 5.41, "grad_norm": 3.3729374408721924, "learning_rate": 9.241608040201005e-06, "loss": 0.1097, "step": 8050 }, { "epoch": 5.42, "grad_norm": 3.581709861755371, "learning_rate": 9.239095477386936e-06, "loss": 0.109, "step": 8075 }, { "epoch": 5.44, "grad_norm": 3.690354585647583, "learning_rate": 9.236582914572864e-06, "loss": 0.1105, "step": 8100 }, { "epoch": 5.46, "grad_norm": 3.4395689964294434, "learning_rate": 9.234070351758795e-06, "loss": 0.1047, "step": 8125 }, { "epoch": 5.47, "grad_norm": 3.5904619693756104, "learning_rate": 9.231557788944724e-06, "loss": 0.1098, "step": 8150 }, { "epoch": 5.49, "grad_norm": 3.4449338912963867, "learning_rate": 9.229045226130654e-06, "loss": 0.1094, "step": 8175 }, { "epoch": 5.51, "grad_norm": 3.081770181655884, "learning_rate": 9.226532663316585e-06, "loss": 0.1046, "step": 8200 }, { "epoch": 5.52, "grad_norm": 3.2109663486480713, "learning_rate": 9.224020100502514e-06, "loss": 0.11, "step": 8225 }, { "epoch": 5.54, "grad_norm": 3.8002219200134277, "learning_rate": 9.221507537688443e-06, "loss": 0.1103, "step": 8250 }, { "epoch": 5.56, "grad_norm": 3.423508644104004, "learning_rate": 9.218994974874373e-06, "loss": 0.1046, "step": 8275 }, { "epoch": 5.57, "grad_norm": 3.408816337585449, "learning_rate": 9.216482412060302e-06, "loss": 0.1137, "step": 8300 }, { "epoch": 5.59, "grad_norm": 3.349015235900879, "learning_rate": 9.213969849246231e-06, "loss": 0.1063, "step": 8325 }, { "epoch": 5.61, "grad_norm": 3.255462169647217, "learning_rate": 9.211457286432162e-06, "loss": 0.1081, "step": 8350 }, { "epoch": 5.62, "grad_norm": 3.0760374069213867, "learning_rate": 9.20894472361809e-06, "loss": 0.1124, "step": 8375 }, { "epoch": 5.64, "grad_norm": 3.469221830368042, "learning_rate": 9.206432160804021e-06, "loss": 0.1095, "step": 8400 }, { "epoch": 5.66, "grad_norm": 3.20563006401062, "learning_rate": 9.20391959798995e-06, "loss": 0.1091, "step": 8425 }, { "epoch": 5.67, "grad_norm": 3.58202862739563, "learning_rate": 9.20140703517588e-06, "loss": 0.1084, "step": 8450 }, { "epoch": 5.69, "grad_norm": 3.2912611961364746, "learning_rate": 9.19889447236181e-06, "loss": 0.109, "step": 8475 }, { "epoch": 5.71, "grad_norm": 3.2603135108947754, "learning_rate": 9.19638190954774e-06, "loss": 0.1051, "step": 8500 }, { "epoch": 5.73, "grad_norm": 3.3398075103759766, "learning_rate": 9.19386934673367e-06, "loss": 0.1086, "step": 8525 }, { "epoch": 5.74, "grad_norm": 3.480815887451172, "learning_rate": 9.191356783919599e-06, "loss": 0.1059, "step": 8550 }, { "epoch": 5.76, "grad_norm": 3.1898598670959473, "learning_rate": 9.188844221105528e-06, "loss": 0.104, "step": 8575 }, { "epoch": 5.78, "grad_norm": 3.3440845012664795, "learning_rate": 9.186331658291459e-06, "loss": 0.1126, "step": 8600 }, { "epoch": 5.79, "grad_norm": 3.6762936115264893, "learning_rate": 9.183819095477388e-06, "loss": 0.1064, "step": 8625 }, { "epoch": 5.81, "grad_norm": 3.66489315032959, "learning_rate": 9.181306532663317e-06, "loss": 0.1096, "step": 8650 }, { "epoch": 5.83, "grad_norm": 3.3494789600372314, "learning_rate": 9.178793969849247e-06, "loss": 0.1111, "step": 8675 }, { "epoch": 5.84, "grad_norm": 3.4388012886047363, "learning_rate": 9.176281407035176e-06, "loss": 0.1107, "step": 8700 }, { "epoch": 5.86, "grad_norm": 3.2451605796813965, "learning_rate": 9.173768844221105e-06, "loss": 0.1089, "step": 8725 }, { "epoch": 5.88, "grad_norm": 3.6606147289276123, "learning_rate": 9.171256281407036e-06, "loss": 0.1084, "step": 8750 }, { "epoch": 5.89, "grad_norm": 3.2145121097564697, "learning_rate": 9.168743718592966e-06, "loss": 0.1063, "step": 8775 }, { "epoch": 5.91, "grad_norm": 3.6518869400024414, "learning_rate": 9.166231155778895e-06, "loss": 0.1043, "step": 8800 }, { "epoch": 5.93, "grad_norm": 2.9405784606933594, "learning_rate": 9.163718592964826e-06, "loss": 0.1117, "step": 8825 }, { "epoch": 5.94, "grad_norm": 3.5626678466796875, "learning_rate": 9.161206030150754e-06, "loss": 0.1048, "step": 8850 }, { "epoch": 5.96, "grad_norm": 3.2351441383361816, "learning_rate": 9.158693467336685e-06, "loss": 0.1093, "step": 8875 }, { "epoch": 5.98, "grad_norm": 3.439530372619629, "learning_rate": 9.156180904522614e-06, "loss": 0.1073, "step": 8900 }, { "epoch": 5.99, "grad_norm": 3.4655144214630127, "learning_rate": 9.153668341708543e-06, "loss": 0.1067, "step": 8925 }, { "epoch": 6.01, "grad_norm": 2.794478178024292, "learning_rate": 9.151155778894473e-06, "loss": 0.0995, "step": 8950 }, { "epoch": 6.03, "grad_norm": 3.5291810035705566, "learning_rate": 9.148643216080402e-06, "loss": 0.0925, "step": 8975 }, { "epoch": 6.04, "grad_norm": 2.9376721382141113, "learning_rate": 9.146130653266331e-06, "loss": 0.0879, "step": 9000 }, { "epoch": 6.04, "eval_loss": 0.1166120246052742, "eval_runtime": 531.7233, "eval_samples_per_second": 2.607, "eval_steps_per_second": 2.607, "eval_wer": 26.876214020837015, "step": 9000 }, { "epoch": 6.06, "grad_norm": 3.121159076690674, "learning_rate": 9.143618090452262e-06, "loss": 0.0925, "step": 9025 }, { "epoch": 6.08, "grad_norm": 3.190279722213745, "learning_rate": 9.141105527638192e-06, "loss": 0.0966, "step": 9050 }, { "epoch": 6.09, "grad_norm": 2.9551713466644287, "learning_rate": 9.138592964824121e-06, "loss": 0.0927, "step": 9075 }, { "epoch": 6.11, "grad_norm": 2.6916284561157227, "learning_rate": 9.136080402010052e-06, "loss": 0.0905, "step": 9100 }, { "epoch": 6.13, "grad_norm": 3.1297528743743896, "learning_rate": 9.13356783919598e-06, "loss": 0.0966, "step": 9125 }, { "epoch": 6.15, "grad_norm": 3.3253660202026367, "learning_rate": 9.13105527638191e-06, "loss": 0.0977, "step": 9150 }, { "epoch": 6.16, "grad_norm": 3.1732029914855957, "learning_rate": 9.12854271356784e-06, "loss": 0.0943, "step": 9175 }, { "epoch": 6.18, "grad_norm": 3.00846791267395, "learning_rate": 9.12603015075377e-06, "loss": 0.095, "step": 9200 }, { "epoch": 6.2, "grad_norm": 3.4318153858184814, "learning_rate": 9.1235175879397e-06, "loss": 0.0918, "step": 9225 }, { "epoch": 6.21, "grad_norm": 2.615586519241333, "learning_rate": 9.121005025125628e-06, "loss": 0.0918, "step": 9250 }, { "epoch": 6.23, "grad_norm": 3.2654173374176025, "learning_rate": 9.118492462311559e-06, "loss": 0.0892, "step": 9275 }, { "epoch": 6.25, "grad_norm": 3.255948066711426, "learning_rate": 9.115979899497488e-06, "loss": 0.0955, "step": 9300 }, { "epoch": 6.26, "grad_norm": 3.593632221221924, "learning_rate": 9.113467336683418e-06, "loss": 0.0951, "step": 9325 }, { "epoch": 6.28, "grad_norm": 3.3398244380950928, "learning_rate": 9.110954773869347e-06, "loss": 0.0926, "step": 9350 }, { "epoch": 6.3, "grad_norm": 3.4789888858795166, "learning_rate": 9.108442211055278e-06, "loss": 0.0946, "step": 9375 }, { "epoch": 6.31, "grad_norm": 3.1585254669189453, "learning_rate": 9.105929648241206e-06, "loss": 0.0921, "step": 9400 }, { "epoch": 6.33, "grad_norm": 3.3125743865966797, "learning_rate": 9.103417085427137e-06, "loss": 0.0912, "step": 9425 }, { "epoch": 6.35, "grad_norm": 2.899616241455078, "learning_rate": 9.100904522613066e-06, "loss": 0.0935, "step": 9450 }, { "epoch": 6.36, "grad_norm": 2.9725539684295654, "learning_rate": 9.098391959798995e-06, "loss": 0.0934, "step": 9475 }, { "epoch": 6.38, "grad_norm": 3.340712070465088, "learning_rate": 9.095879396984926e-06, "loss": 0.0968, "step": 9500 }, { "epoch": 6.4, "grad_norm": 3.4166252613067627, "learning_rate": 9.093366834170854e-06, "loss": 0.0902, "step": 9525 }, { "epoch": 6.41, "grad_norm": 3.42030930519104, "learning_rate": 9.090854271356785e-06, "loss": 0.0903, "step": 9550 }, { "epoch": 6.43, "grad_norm": 2.913060188293457, "learning_rate": 9.088341708542714e-06, "loss": 0.0967, "step": 9575 }, { "epoch": 6.45, "grad_norm": 2.9808599948883057, "learning_rate": 9.085829145728644e-06, "loss": 0.0898, "step": 9600 }, { "epoch": 6.46, "grad_norm": 3.318812847137451, "learning_rate": 9.083316582914573e-06, "loss": 0.0964, "step": 9625 }, { "epoch": 6.48, "grad_norm": 2.8281571865081787, "learning_rate": 9.080804020100504e-06, "loss": 0.0925, "step": 9650 }, { "epoch": 6.5, "grad_norm": 3.3148748874664307, "learning_rate": 9.078291457286433e-06, "loss": 0.0955, "step": 9675 }, { "epoch": 6.51, "grad_norm": 3.047445297241211, "learning_rate": 9.075778894472363e-06, "loss": 0.0907, "step": 9700 }, { "epoch": 6.53, "grad_norm": 3.201747417449951, "learning_rate": 9.073266331658292e-06, "loss": 0.0891, "step": 9725 }, { "epoch": 6.55, "grad_norm": 3.5526840686798096, "learning_rate": 9.070753768844221e-06, "loss": 0.0937, "step": 9750 }, { "epoch": 6.56, "grad_norm": 3.3490021228790283, "learning_rate": 9.068241206030152e-06, "loss": 0.0924, "step": 9775 }, { "epoch": 6.58, "grad_norm": 3.195934534072876, "learning_rate": 9.06572864321608e-06, "loss": 0.0941, "step": 9800 }, { "epoch": 6.6, "grad_norm": 3.1133546829223633, "learning_rate": 9.063216080402011e-06, "loss": 0.093, "step": 9825 }, { "epoch": 6.62, "grad_norm": 3.5979671478271484, "learning_rate": 9.06070351758794e-06, "loss": 0.0881, "step": 9850 }, { "epoch": 6.63, "grad_norm": 3.7291669845581055, "learning_rate": 9.05819095477387e-06, "loss": 0.0955, "step": 9875 }, { "epoch": 6.65, "grad_norm": 3.2835400104522705, "learning_rate": 9.0556783919598e-06, "loss": 0.0902, "step": 9900 }, { "epoch": 6.67, "grad_norm": 3.1277029514312744, "learning_rate": 9.05316582914573e-06, "loss": 0.0936, "step": 9925 }, { "epoch": 6.68, "grad_norm": 3.2376766204833984, "learning_rate": 9.05065326633166e-06, "loss": 0.0896, "step": 9950 }, { "epoch": 6.7, "grad_norm": 2.698474168777466, "learning_rate": 9.048140703517589e-06, "loss": 0.0915, "step": 9975 }, { "epoch": 6.72, "grad_norm": 3.623647451400757, "learning_rate": 9.045628140703518e-06, "loss": 0.0924, "step": 10000 }, { "epoch": 6.72, "eval_loss": 0.1171552762389183, "eval_runtime": 533.9147, "eval_samples_per_second": 2.596, "eval_steps_per_second": 2.596, "eval_wer": 26.558361292601095, "step": 10000 }, { "epoch": 6.73, "grad_norm": 3.608774423599243, "learning_rate": 9.043115577889447e-06, "loss": 0.0927, "step": 10025 }, { "epoch": 6.75, "grad_norm": Infinity, "learning_rate": 9.04070351758794e-06, "loss": 0.0952, "step": 10050 }, { "epoch": 6.77, "grad_norm": 2.832880735397339, "learning_rate": 9.03819095477387e-06, "loss": 0.0909, "step": 10075 }, { "epoch": 6.78, "grad_norm": 3.0156736373901367, "learning_rate": 9.0356783919598e-06, "loss": 0.0944, "step": 10100 }, { "epoch": 6.8, "grad_norm": 3.3390650749206543, "learning_rate": 9.033165829145728e-06, "loss": 0.0919, "step": 10125 }, { "epoch": 6.82, "grad_norm": 3.394937515258789, "learning_rate": 9.03065326633166e-06, "loss": 0.0932, "step": 10150 }, { "epoch": 6.83, "grad_norm": 3.443366765975952, "learning_rate": 9.028140703517589e-06, "loss": 0.0934, "step": 10175 }, { "epoch": 6.85, "grad_norm": 3.167790174484253, "learning_rate": 9.025628140703518e-06, "loss": 0.0934, "step": 10200 }, { "epoch": 6.87, "grad_norm": 3.151536464691162, "learning_rate": 9.023115577889447e-06, "loss": 0.0935, "step": 10225 }, { "epoch": 6.88, "grad_norm": 3.475541114807129, "learning_rate": 9.020603015075378e-06, "loss": 0.0924, "step": 10250 }, { "epoch": 6.9, "grad_norm": 3.254150629043579, "learning_rate": 9.018090452261308e-06, "loss": 0.0946, "step": 10275 }, { "epoch": 6.92, "grad_norm": 3.126755714416504, "learning_rate": 9.015577889447237e-06, "loss": 0.0921, "step": 10300 }, { "epoch": 6.93, "grad_norm": 3.1626737117767334, "learning_rate": 9.013065326633166e-06, "loss": 0.0908, "step": 10325 }, { "epoch": 6.95, "grad_norm": 3.488074779510498, "learning_rate": 9.010552763819096e-06, "loss": 0.0956, "step": 10350 }, { "epoch": 6.97, "grad_norm": 3.0085911750793457, "learning_rate": 9.008040201005027e-06, "loss": 0.0915, "step": 10375 }, { "epoch": 6.98, "grad_norm": 3.424804925918579, "learning_rate": 9.005527638190954e-06, "loss": 0.0968, "step": 10400 }, { "epoch": 7.0, "grad_norm": 3.1618521213531494, "learning_rate": 9.003015075376885e-06, "loss": 0.0911, "step": 10425 }, { "epoch": 7.02, "grad_norm": 3.355823040008545, "learning_rate": 9.000502512562815e-06, "loss": 0.0775, "step": 10450 }, { "epoch": 7.03, "grad_norm": 2.7716736793518066, "learning_rate": 8.997989949748744e-06, "loss": 0.0776, "step": 10475 }, { "epoch": 7.05, "grad_norm": 2.89070987701416, "learning_rate": 8.995477386934675e-06, "loss": 0.0803, "step": 10500 }, { "epoch": 7.07, "grad_norm": 3.0273945331573486, "learning_rate": 8.992964824120604e-06, "loss": 0.0731, "step": 10525 }, { "epoch": 7.09, "grad_norm": 2.902979612350464, "learning_rate": 8.990452261306534e-06, "loss": 0.0805, "step": 10550 }, { "epoch": 7.1, "grad_norm": 2.9858810901641846, "learning_rate": 8.987939698492463e-06, "loss": 0.0761, "step": 10575 }, { "epoch": 7.12, "grad_norm": 2.780200958251953, "learning_rate": 8.985427135678392e-06, "loss": 0.0786, "step": 10600 }, { "epoch": 7.14, "grad_norm": 3.0452048778533936, "learning_rate": 8.982914572864322e-06, "loss": 0.078, "step": 10625 }, { "epoch": 7.15, "grad_norm": 3.0429253578186035, "learning_rate": 8.980402010050253e-06, "loss": 0.078, "step": 10650 }, { "epoch": 7.17, "grad_norm": 2.758443593978882, "learning_rate": 8.977889447236182e-06, "loss": 0.0787, "step": 10675 }, { "epoch": 7.19, "grad_norm": 3.3259782791137695, "learning_rate": 8.975376884422111e-06, "loss": 0.0818, "step": 10700 }, { "epoch": 7.2, "grad_norm": 3.1599812507629395, "learning_rate": 8.97286432160804e-06, "loss": 0.0788, "step": 10725 }, { "epoch": 7.22, "grad_norm": 3.163283348083496, "learning_rate": 8.97035175879397e-06, "loss": 0.0801, "step": 10750 }, { "epoch": 7.24, "grad_norm": 3.883058547973633, "learning_rate": 8.967839195979901e-06, "loss": 0.0818, "step": 10775 }, { "epoch": 7.25, "grad_norm": 3.0166139602661133, "learning_rate": 8.96532663316583e-06, "loss": 0.079, "step": 10800 }, { "epoch": 7.27, "grad_norm": 3.532127857208252, "learning_rate": 8.96281407035176e-06, "loss": 0.0764, "step": 10825 }, { "epoch": 7.29, "grad_norm": 2.8934993743896484, "learning_rate": 8.960301507537689e-06, "loss": 0.0791, "step": 10850 }, { "epoch": 7.3, "grad_norm": 3.4274938106536865, "learning_rate": 8.957788944723618e-06, "loss": 0.0788, "step": 10875 }, { "epoch": 7.32, "grad_norm": 2.964526891708374, "learning_rate": 8.95527638190955e-06, "loss": 0.0781, "step": 10900 }, { "epoch": 7.34, "grad_norm": 3.1131231784820557, "learning_rate": 8.952763819095479e-06, "loss": 0.0776, "step": 10925 }, { "epoch": 7.35, "grad_norm": 2.757322072982788, "learning_rate": 8.950251256281408e-06, "loss": 0.0793, "step": 10950 }, { "epoch": 7.37, "grad_norm": 2.8853962421417236, "learning_rate": 8.947738693467337e-06, "loss": 0.08, "step": 10975 }, { "epoch": 7.39, "grad_norm": 3.2388052940368652, "learning_rate": 8.945226130653267e-06, "loss": 0.0837, "step": 11000 }, { "epoch": 7.39, "eval_loss": 0.11983851343393326, "eval_runtime": 541.1838, "eval_samples_per_second": 2.561, "eval_steps_per_second": 2.561, "eval_wer": 27.052798869856964, "step": 11000 }, { "epoch": 7.4, "grad_norm": 3.431065559387207, "learning_rate": 8.942713567839196e-06, "loss": 0.0797, "step": 11025 }, { "epoch": 7.42, "grad_norm": 3.1514389514923096, "learning_rate": 8.940201005025127e-06, "loss": 0.0809, "step": 11050 }, { "epoch": 7.44, "grad_norm": 3.1348989009857178, "learning_rate": 8.937688442211056e-06, "loss": 0.0796, "step": 11075 }, { "epoch": 7.45, "grad_norm": 3.4892783164978027, "learning_rate": 8.935175879396986e-06, "loss": 0.0813, "step": 11100 }, { "epoch": 7.47, "grad_norm": 3.3423171043395996, "learning_rate": 8.932663316582915e-06, "loss": 0.0768, "step": 11125 }, { "epoch": 7.49, "grad_norm": 3.119539499282837, "learning_rate": 8.930150753768844e-06, "loss": 0.0833, "step": 11150 }, { "epoch": 7.51, "grad_norm": 3.181475877761841, "learning_rate": 8.927638190954775e-06, "loss": 0.0803, "step": 11175 }, { "epoch": 7.52, "grad_norm": 3.3543057441711426, "learning_rate": 8.925125628140705e-06, "loss": 0.0806, "step": 11200 }, { "epoch": 7.54, "grad_norm": 3.1575417518615723, "learning_rate": 8.922613065326634e-06, "loss": 0.0812, "step": 11225 }, { "epoch": 7.56, "grad_norm": 3.0198452472686768, "learning_rate": 8.920100502512563e-06, "loss": 0.0805, "step": 11250 }, { "epoch": 7.57, "grad_norm": 2.9735798835754395, "learning_rate": 8.917587939698493e-06, "loss": 0.0791, "step": 11275 }, { "epoch": 7.59, "grad_norm": 3.363503932952881, "learning_rate": 8.915075376884424e-06, "loss": 0.0817, "step": 11300 }, { "epoch": 7.61, "grad_norm": 3.10579514503479, "learning_rate": 8.912562814070353e-06, "loss": 0.0833, "step": 11325 }, { "epoch": 7.62, "grad_norm": 3.5427165031433105, "learning_rate": 8.910050251256282e-06, "loss": 0.0827, "step": 11350 }, { "epoch": 7.64, "grad_norm": 2.9739034175872803, "learning_rate": 8.907537688442212e-06, "loss": 0.0795, "step": 11375 }, { "epoch": 7.66, "grad_norm": 3.0262250900268555, "learning_rate": 8.905025125628143e-06, "loss": 0.0777, "step": 11400 }, { "epoch": 7.67, "grad_norm": 2.9359376430511475, "learning_rate": 8.90251256281407e-06, "loss": 0.0807, "step": 11425 }, { "epoch": 7.69, "grad_norm": 3.158572196960449, "learning_rate": 8.900000000000001e-06, "loss": 0.0772, "step": 11450 }, { "epoch": 7.71, "grad_norm": 3.330089807510376, "learning_rate": 8.89748743718593e-06, "loss": 0.0793, "step": 11475 }, { "epoch": 7.72, "grad_norm": 3.2174530029296875, "learning_rate": 8.89497487437186e-06, "loss": 0.079, "step": 11500 }, { "epoch": 7.74, "grad_norm": 3.673243522644043, "learning_rate": 8.892462311557791e-06, "loss": 0.0775, "step": 11525 }, { "epoch": 7.76, "grad_norm": 3.3094096183776855, "learning_rate": 8.889949748743718e-06, "loss": 0.078, "step": 11550 }, { "epoch": 7.77, "grad_norm": 3.426079273223877, "learning_rate": 8.88743718592965e-06, "loss": 0.0777, "step": 11575 }, { "epoch": 7.79, "grad_norm": 3.517086982727051, "learning_rate": 8.884924623115579e-06, "loss": 0.0776, "step": 11600 }, { "epoch": 7.81, "grad_norm": 2.9824516773223877, "learning_rate": 8.882412060301508e-06, "loss": 0.0805, "step": 11625 }, { "epoch": 7.82, "grad_norm": 2.965653896331787, "learning_rate": 8.879899497487437e-06, "loss": 0.0786, "step": 11650 }, { "epoch": 7.84, "grad_norm": 2.9882099628448486, "learning_rate": 8.877386934673368e-06, "loss": 0.0822, "step": 11675 }, { "epoch": 7.86, "grad_norm": 3.118823289871216, "learning_rate": 8.874874371859296e-06, "loss": 0.0773, "step": 11700 }, { "epoch": 7.87, "grad_norm": 4.2748188972473145, "learning_rate": 8.872361809045227e-06, "loss": 0.0812, "step": 11725 }, { "epoch": 7.89, "grad_norm": 3.5226612091064453, "learning_rate": 8.869849246231156e-06, "loss": 0.0801, "step": 11750 }, { "epoch": 7.91, "grad_norm": 3.2962095737457275, "learning_rate": 8.867336683417086e-06, "loss": 0.0779, "step": 11775 }, { "epoch": 7.92, "grad_norm": 3.037177801132202, "learning_rate": 8.864824120603017e-06, "loss": 0.0811, "step": 11800 }, { "epoch": 7.94, "grad_norm": 3.207000255584717, "learning_rate": 8.862311557788944e-06, "loss": 0.0813, "step": 11825 }, { "epoch": 7.96, "grad_norm": 3.5045995712280273, "learning_rate": 8.859798994974875e-06, "loss": 0.0793, "step": 11850 }, { "epoch": 7.98, "grad_norm": 2.9062917232513428, "learning_rate": 8.857286432160805e-06, "loss": 0.0808, "step": 11875 }, { "epoch": 7.99, "grad_norm": 3.086449146270752, "learning_rate": 8.854773869346734e-06, "loss": 0.0757, "step": 11900 }, { "epoch": 8.01, "grad_norm": 3.4503021240234375, "learning_rate": 8.852261306532665e-06, "loss": 0.0698, "step": 11925 }, { "epoch": 8.03, "grad_norm": 2.755633592605591, "learning_rate": 8.849748743718594e-06, "loss": 0.0619, "step": 11950 }, { "epoch": 8.04, "grad_norm": 3.3875789642333984, "learning_rate": 8.847236180904524e-06, "loss": 0.0651, "step": 11975 }, { "epoch": 8.06, "grad_norm": 2.697042465209961, "learning_rate": 8.844723618090453e-06, "loss": 0.0654, "step": 12000 }, { "epoch": 8.06, "eval_loss": 0.12158209830522537, "eval_runtime": 532.8467, "eval_samples_per_second": 2.601, "eval_steps_per_second": 2.601, "eval_wer": 26.328800988875155, "step": 12000 }, { "epoch": 8.08, "grad_norm": 2.8202855587005615, "learning_rate": 8.842211055276382e-06, "loss": 0.0658, "step": 12025 }, { "epoch": 8.09, "grad_norm": 2.7945172786712646, "learning_rate": 8.839698492462312e-06, "loss": 0.0627, "step": 12050 }, { "epoch": 8.11, "grad_norm": 3.1584692001342773, "learning_rate": 8.837185929648243e-06, "loss": 0.0673, "step": 12075 }, { "epoch": 8.13, "grad_norm": 3.1642470359802246, "learning_rate": 8.83467336683417e-06, "loss": 0.0683, "step": 12100 }, { "epoch": 8.14, "grad_norm": 2.9188601970672607, "learning_rate": 8.832160804020101e-06, "loss": 0.0682, "step": 12125 }, { "epoch": 8.16, "grad_norm": 3.276679039001465, "learning_rate": 8.829748743718593e-06, "loss": 0.0656, "step": 12150 }, { "epoch": 8.18, "grad_norm": 2.683711051940918, "learning_rate": 8.827236180904524e-06, "loss": 0.0625, "step": 12175 }, { "epoch": 8.19, "grad_norm": 3.232003688812256, "learning_rate": 8.824723618090453e-06, "loss": 0.066, "step": 12200 }, { "epoch": 8.21, "grad_norm": 2.7374961376190186, "learning_rate": 8.822211055276383e-06, "loss": 0.0647, "step": 12225 }, { "epoch": 8.23, "grad_norm": 3.423482656478882, "learning_rate": 8.819698492462312e-06, "loss": 0.0673, "step": 12250 }, { "epoch": 8.24, "grad_norm": 2.9813687801361084, "learning_rate": 8.817185929648241e-06, "loss": 0.0685, "step": 12275 }, { "epoch": 8.26, "grad_norm": 3.047753095626831, "learning_rate": 8.81467336683417e-06, "loss": 0.0658, "step": 12300 }, { "epoch": 8.28, "grad_norm": 3.4329652786254883, "learning_rate": 8.812160804020102e-06, "loss": 0.0662, "step": 12325 }, { "epoch": 8.29, "grad_norm": 3.080573081970215, "learning_rate": 8.809648241206031e-06, "loss": 0.0674, "step": 12350 }, { "epoch": 8.31, "grad_norm": 2.828704833984375, "learning_rate": 8.80713567839196e-06, "loss": 0.0694, "step": 12375 }, { "epoch": 8.33, "grad_norm": 3.132976531982422, "learning_rate": 8.804623115577891e-06, "loss": 0.0685, "step": 12400 }, { "epoch": 8.34, "grad_norm": 3.154456615447998, "learning_rate": 8.802110552763819e-06, "loss": 0.0679, "step": 12425 }, { "epoch": 8.36, "grad_norm": 3.4193313121795654, "learning_rate": 8.79959798994975e-06, "loss": 0.0674, "step": 12450 }, { "epoch": 8.38, "grad_norm": 3.2318356037139893, "learning_rate": 8.79708542713568e-06, "loss": 0.0658, "step": 12475 }, { "epoch": 8.39, "grad_norm": 2.9559836387634277, "learning_rate": 8.794572864321609e-06, "loss": 0.0647, "step": 12500 }, { "epoch": 8.41, "grad_norm": 3.459628105163574, "learning_rate": 8.792060301507538e-06, "loss": 0.0693, "step": 12525 }, { "epoch": 8.43, "grad_norm": 3.2934398651123047, "learning_rate": 8.789547738693467e-06, "loss": 0.0696, "step": 12550 }, { "epoch": 8.45, "grad_norm": 3.2100000381469727, "learning_rate": 8.787035175879398e-06, "loss": 0.0703, "step": 12575 }, { "epoch": 8.46, "grad_norm": 3.280884265899658, "learning_rate": 8.784522613065328e-06, "loss": 0.0661, "step": 12600 }, { "epoch": 8.48, "grad_norm": 3.1474897861480713, "learning_rate": 8.782010050251257e-06, "loss": 0.0663, "step": 12625 }, { "epoch": 8.5, "grad_norm": 2.9876487255096436, "learning_rate": 8.779497487437186e-06, "loss": 0.0693, "step": 12650 }, { "epoch": 8.51, "grad_norm": 3.278313159942627, "learning_rate": 8.776984924623117e-06, "loss": 0.0699, "step": 12675 }, { "epoch": 8.53, "grad_norm": 3.023169755935669, "learning_rate": 8.774472361809045e-06, "loss": 0.0712, "step": 12700 }, { "epoch": 8.55, "grad_norm": 3.168148994445801, "learning_rate": 8.771959798994976e-06, "loss": 0.0698, "step": 12725 }, { "epoch": 8.56, "grad_norm": 3.177262544631958, "learning_rate": 8.769447236180905e-06, "loss": 0.0686, "step": 12750 }, { "epoch": 8.58, "grad_norm": 3.1487865447998047, "learning_rate": 8.766934673366834e-06, "loss": 0.0684, "step": 12775 }, { "epoch": 8.6, "grad_norm": 2.9590165615081787, "learning_rate": 8.764422110552765e-06, "loss": 0.0691, "step": 12800 }, { "epoch": 8.61, "grad_norm": 3.0423812866210938, "learning_rate": 8.761909547738693e-06, "loss": 0.0682, "step": 12825 }, { "epoch": 8.63, "grad_norm": 3.3768019676208496, "learning_rate": 8.759396984924624e-06, "loss": 0.0709, "step": 12850 }, { "epoch": 8.65, "grad_norm": 3.7296512126922607, "learning_rate": 8.756884422110553e-06, "loss": 0.0701, "step": 12875 }, { "epoch": 8.66, "grad_norm": 3.148634433746338, "learning_rate": 8.754371859296483e-06, "loss": 0.0634, "step": 12900 }, { "epoch": 8.68, "grad_norm": 2.908444881439209, "learning_rate": 8.751859296482412e-06, "loss": 0.0659, "step": 12925 }, { "epoch": 8.7, "grad_norm": 3.3164865970611572, "learning_rate": 8.749346733668343e-06, "loss": 0.0655, "step": 12950 }, { "epoch": 8.71, "grad_norm": 2.9725685119628906, "learning_rate": 8.746834170854272e-06, "loss": 0.0659, "step": 12975 }, { "epoch": 8.73, "grad_norm": 3.171374797821045, "learning_rate": 8.744321608040202e-06, "loss": 0.068, "step": 13000 }, { "epoch": 8.73, "eval_loss": 0.12423743307590485, "eval_runtime": 533.8353, "eval_samples_per_second": 2.596, "eval_steps_per_second": 2.596, "eval_wer": 26.86738477838602, "step": 13000 }, { "epoch": 8.75, "grad_norm": 3.3160324096679688, "learning_rate": 8.741809045226131e-06, "loss": 0.0692, "step": 13025 }, { "epoch": 8.76, "grad_norm": 3.2802672386169434, "learning_rate": 8.73929648241206e-06, "loss": 0.067, "step": 13050 }, { "epoch": 8.78, "grad_norm": 3.2849535942077637, "learning_rate": 8.736783919597991e-06, "loss": 0.0654, "step": 13075 }, { "epoch": 8.8, "grad_norm": 3.685974359512329, "learning_rate": 8.734271356783919e-06, "loss": 0.0687, "step": 13100 }, { "epoch": 8.81, "grad_norm": 2.9581081867218018, "learning_rate": 8.73175879396985e-06, "loss": 0.0658, "step": 13125 }, { "epoch": 8.83, "grad_norm": 3.3408470153808594, "learning_rate": 8.72924623115578e-06, "loss": 0.0726, "step": 13150 }, { "epoch": 8.85, "grad_norm": 3.5375308990478516, "learning_rate": 8.726733668341709e-06, "loss": 0.0688, "step": 13175 }, { "epoch": 8.87, "grad_norm": 2.7572827339172363, "learning_rate": 8.72422110552764e-06, "loss": 0.0688, "step": 13200 }, { "epoch": 8.88, "grad_norm": 3.0948410034179688, "learning_rate": 8.721708542713569e-06, "loss": 0.0686, "step": 13225 }, { "epoch": 8.9, "grad_norm": 3.076904773712158, "learning_rate": 8.719195979899498e-06, "loss": 0.0683, "step": 13250 }, { "epoch": 8.92, "grad_norm": 3.060412645339966, "learning_rate": 8.716683417085428e-06, "loss": 0.0692, "step": 13275 }, { "epoch": 8.93, "grad_norm": 3.1852357387542725, "learning_rate": 8.714170854271357e-06, "loss": 0.0647, "step": 13300 }, { "epoch": 8.95, "grad_norm": 3.427971601486206, "learning_rate": 8.711658291457286e-06, "loss": 0.0675, "step": 13325 }, { "epoch": 8.97, "grad_norm": 3.221360683441162, "learning_rate": 8.709145728643217e-06, "loss": 0.0702, "step": 13350 }, { "epoch": 8.98, "grad_norm": 3.490898847579956, "learning_rate": 8.706633165829147e-06, "loss": 0.0693, "step": 13375 }, { "epoch": 9.0, "grad_norm": 3.1776282787323, "learning_rate": 8.704120603015076e-06, "loss": 0.0725, "step": 13400 }, { "epoch": 9.02, "grad_norm": 2.52174973487854, "learning_rate": 8.701608040201005e-06, "loss": 0.0542, "step": 13425 }, { "epoch": 9.03, "grad_norm": 2.8436169624328613, "learning_rate": 8.699095477386935e-06, "loss": 0.0543, "step": 13450 }, { "epoch": 9.05, "grad_norm": 3.0883164405822754, "learning_rate": 8.696582914572866e-06, "loss": 0.0565, "step": 13475 }, { "epoch": 9.07, "grad_norm": 3.2945592403411865, "learning_rate": 8.694070351758795e-06, "loss": 0.0554, "step": 13500 }, { "epoch": 9.08, "grad_norm": 3.1277835369110107, "learning_rate": 8.691557788944724e-06, "loss": 0.0575, "step": 13525 }, { "epoch": 9.1, "grad_norm": 2.555258274078369, "learning_rate": 8.689045226130654e-06, "loss": 0.0557, "step": 13550 }, { "epoch": 9.12, "grad_norm": 2.6981780529022217, "learning_rate": 8.686532663316583e-06, "loss": 0.056, "step": 13575 }, { "epoch": 9.13, "grad_norm": 2.9988884925842285, "learning_rate": 8.684020100502514e-06, "loss": 0.0575, "step": 13600 }, { "epoch": 9.15, "grad_norm": 2.7814390659332275, "learning_rate": 8.681507537688443e-06, "loss": 0.0543, "step": 13625 }, { "epoch": 9.17, "grad_norm": 2.8165695667266846, "learning_rate": 8.678994974874373e-06, "loss": 0.0542, "step": 13650 }, { "epoch": 9.18, "grad_norm": 2.8924388885498047, "learning_rate": 8.676482412060302e-06, "loss": 0.0584, "step": 13675 }, { "epoch": 9.2, "grad_norm": 2.8846709728240967, "learning_rate": 8.673969849246231e-06, "loss": 0.0546, "step": 13700 }, { "epoch": 9.22, "grad_norm": 3.0931618213653564, "learning_rate": 8.67145728643216e-06, "loss": 0.0541, "step": 13725 }, { "epoch": 9.23, "grad_norm": 3.0044896602630615, "learning_rate": 8.668944723618092e-06, "loss": 0.0566, "step": 13750 }, { "epoch": 9.25, "grad_norm": 2.992866039276123, "learning_rate": 8.666432160804021e-06, "loss": 0.0568, "step": 13775 }, { "epoch": 9.27, "grad_norm": 3.3243565559387207, "learning_rate": 8.66391959798995e-06, "loss": 0.0575, "step": 13800 }, { "epoch": 9.28, "grad_norm": 3.164736747741699, "learning_rate": 8.661407035175881e-06, "loss": 0.0565, "step": 13825 }, { "epoch": 9.3, "grad_norm": 2.89432430267334, "learning_rate": 8.658894472361809e-06, "loss": 0.0571, "step": 13850 }, { "epoch": 9.32, "grad_norm": 3.053514242172241, "learning_rate": 8.65638190954774e-06, "loss": 0.0582, "step": 13875 }, { "epoch": 9.34, "grad_norm": 2.7615840435028076, "learning_rate": 8.65386934673367e-06, "loss": 0.0566, "step": 13900 }, { "epoch": 9.35, "grad_norm": 3.1976537704467773, "learning_rate": 8.651356783919599e-06, "loss": 0.0578, "step": 13925 }, { "epoch": 9.37, "grad_norm": 3.1072587966918945, "learning_rate": 8.648844221105528e-06, "loss": 0.0577, "step": 13950 }, { "epoch": 9.39, "grad_norm": 3.4911906719207764, "learning_rate": 8.646331658291457e-06, "loss": 0.0548, "step": 13975 }, { "epoch": 9.4, "grad_norm": 2.923501968383789, "learning_rate": 8.643819095477388e-06, "loss": 0.0586, "step": 14000 }, { "epoch": 9.4, "eval_loss": 0.1282009482383728, "eval_runtime": 533.8178, "eval_samples_per_second": 2.596, "eval_steps_per_second": 2.596, "eval_wer": 26.982164930248985, "step": 14000 }, { "epoch": 9.42, "grad_norm": 3.0205700397491455, "learning_rate": 8.641306532663318e-06, "loss": 0.0552, "step": 14025 }, { "epoch": 9.44, "grad_norm": 3.022747278213501, "learning_rate": 8.638793969849247e-06, "loss": 0.0574, "step": 14050 }, { "epoch": 9.45, "grad_norm": 3.2978105545043945, "learning_rate": 8.636281407035176e-06, "loss": 0.0571, "step": 14075 }, { "epoch": 9.47, "grad_norm": 3.0741355419158936, "learning_rate": 8.633768844221107e-06, "loss": 0.0556, "step": 14100 }, { "epoch": 9.49, "grad_norm": 2.8877174854278564, "learning_rate": 8.631256281407035e-06, "loss": 0.0538, "step": 14125 }, { "epoch": 9.5, "grad_norm": 3.618729591369629, "learning_rate": 8.628743718592966e-06, "loss": 0.0592, "step": 14150 }, { "epoch": 9.52, "grad_norm": 3.005646228790283, "learning_rate": 8.626231155778895e-06, "loss": 0.057, "step": 14175 }, { "epoch": 9.54, "grad_norm": 3.3048083782196045, "learning_rate": 8.623718592964825e-06, "loss": 0.0564, "step": 14200 }, { "epoch": 9.55, "grad_norm": 3.2562224864959717, "learning_rate": 8.621206030150756e-06, "loss": 0.0571, "step": 14225 }, { "epoch": 9.57, "grad_norm": 2.980013608932495, "learning_rate": 8.618693467336683e-06, "loss": 0.0564, "step": 14250 }, { "epoch": 9.59, "grad_norm": 3.220036745071411, "learning_rate": 8.616180904522614e-06, "loss": 0.0588, "step": 14275 }, { "epoch": 9.6, "grad_norm": 3.4643850326538086, "learning_rate": 8.613668341708544e-06, "loss": 0.0565, "step": 14300 }, { "epoch": 9.62, "grad_norm": 3.2021632194519043, "learning_rate": 8.611155778894473e-06, "loss": 0.0586, "step": 14325 }, { "epoch": 9.64, "grad_norm": 3.2279539108276367, "learning_rate": 8.608643216080402e-06, "loss": 0.0562, "step": 14350 }, { "epoch": 9.65, "grad_norm": 3.429431438446045, "learning_rate": 8.606130653266333e-06, "loss": 0.0585, "step": 14375 }, { "epoch": 9.67, "grad_norm": 3.278526544570923, "learning_rate": 8.60361809045226e-06, "loss": 0.0584, "step": 14400 }, { "epoch": 9.69, "grad_norm": 3.5569005012512207, "learning_rate": 8.601105527638192e-06, "loss": 0.0587, "step": 14425 }, { "epoch": 9.7, "grad_norm": 3.0540413856506348, "learning_rate": 8.598592964824121e-06, "loss": 0.0582, "step": 14450 }, { "epoch": 9.72, "grad_norm": 2.9771244525909424, "learning_rate": 8.59608040201005e-06, "loss": 0.0544, "step": 14475 }, { "epoch": 9.74, "grad_norm": 3.271925926208496, "learning_rate": 8.593567839195981e-06, "loss": 0.0556, "step": 14500 }, { "epoch": 9.75, "grad_norm": 3.2107813358306885, "learning_rate": 8.591055276381909e-06, "loss": 0.0556, "step": 14525 }, { "epoch": 9.77, "grad_norm": 2.9411368370056152, "learning_rate": 8.58854271356784e-06, "loss": 0.06, "step": 14550 }, { "epoch": 9.79, "grad_norm": 2.9419991970062256, "learning_rate": 8.58603015075377e-06, "loss": 0.055, "step": 14575 }, { "epoch": 9.81, "grad_norm": 3.3104031085968018, "learning_rate": 8.583517587939699e-06, "loss": 0.0586, "step": 14600 }, { "epoch": 9.82, "grad_norm": 3.488868236541748, "learning_rate": 8.58100502512563e-06, "loss": 0.0608, "step": 14625 }, { "epoch": 9.84, "grad_norm": 2.7537827491760254, "learning_rate": 8.578492462311559e-06, "loss": 0.061, "step": 14650 }, { "epoch": 9.86, "grad_norm": 2.967761278152466, "learning_rate": 8.575979899497488e-06, "loss": 0.0616, "step": 14675 }, { "epoch": 9.87, "grad_norm": 2.6756021976470947, "learning_rate": 8.573467336683418e-06, "loss": 0.0572, "step": 14700 }, { "epoch": 9.89, "grad_norm": 3.6669530868530273, "learning_rate": 8.570954773869347e-06, "loss": 0.0545, "step": 14725 }, { "epoch": 9.91, "grad_norm": 3.402998208999634, "learning_rate": 8.568442211055276e-06, "loss": 0.0595, "step": 14750 }, { "epoch": 9.92, "grad_norm": 3.397134304046631, "learning_rate": 8.565929648241207e-06, "loss": 0.0582, "step": 14775 }, { "epoch": 9.94, "grad_norm": 3.193824291229248, "learning_rate": 8.563417085427135e-06, "loss": 0.0558, "step": 14800 }, { "epoch": 9.96, "grad_norm": 3.0948803424835205, "learning_rate": 8.560904522613066e-06, "loss": 0.0572, "step": 14825 }, { "epoch": 9.97, "grad_norm": 3.6509146690368652, "learning_rate": 8.558391959798995e-06, "loss": 0.0595, "step": 14850 }, { "epoch": 9.99, "grad_norm": 3.0662288665771484, "learning_rate": 8.555879396984925e-06, "loss": 0.057, "step": 14875 }, { "epoch": 10.01, "grad_norm": 2.2760088443756104, "learning_rate": 8.553366834170856e-06, "loss": 0.0524, "step": 14900 }, { "epoch": 10.02, "grad_norm": 2.8303427696228027, "learning_rate": 8.550854271356785e-06, "loss": 0.0494, "step": 14925 }, { "epoch": 10.04, "grad_norm": 3.1542868614196777, "learning_rate": 8.548341708542714e-06, "loss": 0.0445, "step": 14950 }, { "epoch": 10.06, "grad_norm": 2.8265697956085205, "learning_rate": 8.545829145728644e-06, "loss": 0.0464, "step": 14975 }, { "epoch": 10.07, "grad_norm": 3.163896322250366, "learning_rate": 8.543316582914573e-06, "loss": 0.047, "step": 15000 }, { "epoch": 10.07, "eval_loss": 0.13359740376472473, "eval_runtime": 533.7428, "eval_samples_per_second": 2.597, "eval_steps_per_second": 2.597, "eval_wer": 27.405968567896878, "step": 15000 }, { "epoch": 10.09, "grad_norm": 2.813354253768921, "learning_rate": 8.540804020100502e-06, "loss": 0.0476, "step": 15025 }, { "epoch": 10.11, "grad_norm": 2.448727607727051, "learning_rate": 8.538291457286433e-06, "loss": 0.0448, "step": 15050 }, { "epoch": 10.12, "grad_norm": 2.798645257949829, "learning_rate": 8.535778894472363e-06, "loss": 0.0458, "step": 15075 }, { "epoch": 10.14, "grad_norm": 2.969273090362549, "learning_rate": 8.533266331658292e-06, "loss": 0.0442, "step": 15100 }, { "epoch": 10.16, "grad_norm": 2.901127576828003, "learning_rate": 8.530753768844221e-06, "loss": 0.0431, "step": 15125 }, { "epoch": 10.17, "grad_norm": 3.0042836666107178, "learning_rate": 8.52824120603015e-06, "loss": 0.049, "step": 15150 }, { "epoch": 10.19, "grad_norm": 2.694744825363159, "learning_rate": 8.525728643216082e-06, "loss": 0.0474, "step": 15175 }, { "epoch": 10.21, "grad_norm": 2.79301118850708, "learning_rate": 8.523216080402011e-06, "loss": 0.0459, "step": 15200 }, { "epoch": 10.22, "grad_norm": 3.328848123550415, "learning_rate": 8.52070351758794e-06, "loss": 0.0481, "step": 15225 }, { "epoch": 10.24, "grad_norm": 3.0490903854370117, "learning_rate": 8.518190954773871e-06, "loss": 0.0467, "step": 15250 }, { "epoch": 10.26, "grad_norm": 2.891860246658325, "learning_rate": 8.515678391959799e-06, "loss": 0.0482, "step": 15275 }, { "epoch": 10.28, "grad_norm": 3.29339599609375, "learning_rate": 8.51316582914573e-06, "loss": 0.0468, "step": 15300 }, { "epoch": 10.29, "grad_norm": 2.871262550354004, "learning_rate": 8.51065326633166e-06, "loss": 0.0465, "step": 15325 }, { "epoch": 10.31, "grad_norm": 2.673008680343628, "learning_rate": 8.508140703517589e-06, "loss": 0.0457, "step": 15350 }, { "epoch": 10.33, "grad_norm": 2.5940115451812744, "learning_rate": 8.505628140703518e-06, "loss": 0.049, "step": 15375 }, { "epoch": 10.34, "grad_norm": 2.8226072788238525, "learning_rate": 8.503115577889447e-06, "loss": 0.0472, "step": 15400 }, { "epoch": 10.36, "grad_norm": 2.800179958343506, "learning_rate": 8.500603015075377e-06, "loss": 0.0477, "step": 15425 }, { "epoch": 10.38, "grad_norm": 3.0697898864746094, "learning_rate": 8.498090452261308e-06, "loss": 0.0448, "step": 15450 }, { "epoch": 10.39, "grad_norm": 2.9394161701202393, "learning_rate": 8.495577889447237e-06, "loss": 0.0464, "step": 15475 }, { "epoch": 10.41, "grad_norm": 3.055058479309082, "learning_rate": 8.493065326633166e-06, "loss": 0.0486, "step": 15500 }, { "epoch": 10.43, "grad_norm": 3.4436676502227783, "learning_rate": 8.490552763819097e-06, "loss": 0.0479, "step": 15525 }, { "epoch": 10.44, "grad_norm": 3.167590379714966, "learning_rate": 8.488040201005025e-06, "loss": 0.049, "step": 15550 }, { "epoch": 10.46, "grad_norm": 2.786879539489746, "learning_rate": 8.485527638190956e-06, "loss": 0.0476, "step": 15575 }, { "epoch": 10.48, "grad_norm": 3.0949158668518066, "learning_rate": 8.483015075376885e-06, "loss": 0.0463, "step": 15600 }, { "epoch": 10.49, "grad_norm": 3.426304340362549, "learning_rate": 8.480502512562815e-06, "loss": 0.0475, "step": 15625 }, { "epoch": 10.51, "grad_norm": 3.1173408031463623, "learning_rate": 8.477989949748744e-06, "loss": 0.0476, "step": 15650 }, { "epoch": 10.53, "grad_norm": 2.856600046157837, "learning_rate": 8.475477386934673e-06, "loss": 0.0471, "step": 15675 }, { "epoch": 10.54, "grad_norm": 3.2512564659118652, "learning_rate": 8.472964824120604e-06, "loss": 0.0483, "step": 15700 }, { "epoch": 10.56, "grad_norm": 3.3549506664276123, "learning_rate": 8.470452261306534e-06, "loss": 0.0462, "step": 15725 }, { "epoch": 10.58, "grad_norm": 2.7729334831237793, "learning_rate": 8.467939698492463e-06, "loss": 0.0472, "step": 15750 }, { "epoch": 10.59, "grad_norm": 2.711257219314575, "learning_rate": 8.465427135678392e-06, "loss": 0.0472, "step": 15775 }, { "epoch": 10.61, "grad_norm": 3.229771375656128, "learning_rate": 8.462914572864323e-06, "loss": 0.0479, "step": 15800 }, { "epoch": 10.63, "grad_norm": 3.0402400493621826, "learning_rate": 8.460402010050251e-06, "loss": 0.0503, "step": 15825 }, { "epoch": 10.64, "grad_norm": 2.9210867881774902, "learning_rate": 8.457989949748744e-06, "loss": 0.0497, "step": 15850 }, { "epoch": 10.66, "grad_norm": 3.3483831882476807, "learning_rate": 8.455577889447237e-06, "loss": 0.0475, "step": 15875 }, { "epoch": 10.68, "grad_norm": 3.053593873977661, "learning_rate": 8.453065326633167e-06, "loss": 0.046, "step": 15900 }, { "epoch": 10.7, "grad_norm": 3.136958599090576, "learning_rate": 8.450552763819096e-06, "loss": 0.0509, "step": 15925 }, { "epoch": 10.71, "grad_norm": 3.1040425300598145, "learning_rate": 8.448040201005025e-06, "loss": 0.0504, "step": 15950 }, { "epoch": 10.73, "grad_norm": 2.8489692211151123, "learning_rate": 8.445527638190956e-06, "loss": 0.0484, "step": 15975 }, { "epoch": 10.75, "grad_norm": 2.8868560791015625, "learning_rate": 8.443015075376884e-06, "loss": 0.0475, "step": 16000 }, { "epoch": 10.75, "eval_loss": 0.1362370103597641, "eval_runtime": 536.1147, "eval_samples_per_second": 2.585, "eval_steps_per_second": 2.585, "eval_wer": 27.441285537700864, "step": 16000 }, { "epoch": 10.76, "grad_norm": 3.188688039779663, "learning_rate": 8.440502512562815e-06, "loss": 0.0502, "step": 16025 }, { "epoch": 10.78, "grad_norm": 2.4469282627105713, "learning_rate": 8.437989949748744e-06, "loss": 0.0459, "step": 16050 }, { "epoch": 10.8, "grad_norm": 2.948697328567505, "learning_rate": 8.435477386934674e-06, "loss": 0.0472, "step": 16075 }, { "epoch": 10.81, "grad_norm": 3.236891508102417, "learning_rate": 8.432964824120605e-06, "loss": 0.0494, "step": 16100 }, { "epoch": 10.83, "grad_norm": 3.0507919788360596, "learning_rate": 8.430452261306534e-06, "loss": 0.0494, "step": 16125 }, { "epoch": 10.85, "grad_norm": 2.8577802181243896, "learning_rate": 8.427939698492463e-06, "loss": 0.0487, "step": 16150 }, { "epoch": 10.86, "grad_norm": 3.035109758377075, "learning_rate": 8.425427135678393e-06, "loss": 0.0486, "step": 16175 }, { "epoch": 10.88, "grad_norm": 3.5497820377349854, "learning_rate": 8.422914572864322e-06, "loss": 0.0497, "step": 16200 }, { "epoch": 10.9, "grad_norm": 2.838867664337158, "learning_rate": 8.420402010050251e-06, "loss": 0.0451, "step": 16225 }, { "epoch": 10.91, "grad_norm": 3.316819190979004, "learning_rate": 8.417889447236182e-06, "loss": 0.0489, "step": 16250 }, { "epoch": 10.93, "grad_norm": 3.3198862075805664, "learning_rate": 8.415376884422112e-06, "loss": 0.0528, "step": 16275 }, { "epoch": 10.95, "grad_norm": 3.4924492835998535, "learning_rate": 8.412864321608041e-06, "loss": 0.0492, "step": 16300 }, { "epoch": 10.96, "grad_norm": 3.0983831882476807, "learning_rate": 8.41035175879397e-06, "loss": 0.0498, "step": 16325 }, { "epoch": 10.98, "grad_norm": 3.4345991611480713, "learning_rate": 8.4078391959799e-06, "loss": 0.0483, "step": 16350 }, { "epoch": 11.0, "grad_norm": 3.294377326965332, "learning_rate": 8.40532663316583e-06, "loss": 0.0485, "step": 16375 }, { "epoch": 11.01, "grad_norm": 2.1766245365142822, "learning_rate": 8.40281407035176e-06, "loss": 0.0371, "step": 16400 }, { "epoch": 11.03, "grad_norm": 2.683638334274292, "learning_rate": 8.40030150753769e-06, "loss": 0.0355, "step": 16425 }, { "epoch": 11.05, "grad_norm": 2.8458847999572754, "learning_rate": 8.397788944723619e-06, "loss": 0.038, "step": 16450 }, { "epoch": 11.06, "grad_norm": 2.7042036056518555, "learning_rate": 8.395276381909548e-06, "loss": 0.0375, "step": 16475 }, { "epoch": 11.08, "grad_norm": 2.0865659713745117, "learning_rate": 8.392763819095479e-06, "loss": 0.039, "step": 16500 }, { "epoch": 11.1, "grad_norm": 2.3241260051727295, "learning_rate": 8.390251256281408e-06, "loss": 0.0365, "step": 16525 }, { "epoch": 11.11, "grad_norm": 2.7509355545043945, "learning_rate": 8.387738693467338e-06, "loss": 0.0392, "step": 16550 }, { "epoch": 11.13, "grad_norm": 2.3158955574035645, "learning_rate": 8.385226130653267e-06, "loss": 0.0399, "step": 16575 }, { "epoch": 11.15, "grad_norm": 2.368791103363037, "learning_rate": 8.382713567839196e-06, "loss": 0.0366, "step": 16600 }, { "epoch": 11.17, "grad_norm": 3.157816171646118, "learning_rate": 8.380201005025126e-06, "loss": 0.0386, "step": 16625 }, { "epoch": 11.18, "grad_norm": 2.391731023788452, "learning_rate": 8.377688442211057e-06, "loss": 0.0409, "step": 16650 }, { "epoch": 11.2, "grad_norm": 2.881032943725586, "learning_rate": 8.375175879396986e-06, "loss": 0.0399, "step": 16675 }, { "epoch": 11.22, "grad_norm": 2.8162527084350586, "learning_rate": 8.372663316582915e-06, "loss": 0.0386, "step": 16700 }, { "epoch": 11.23, "grad_norm": 2.798832654953003, "learning_rate": 8.370150753768845e-06, "loss": 0.0389, "step": 16725 }, { "epoch": 11.25, "grad_norm": 2.4073362350463867, "learning_rate": 8.367638190954774e-06, "loss": 0.038, "step": 16750 }, { "epoch": 11.27, "grad_norm": 3.539222002029419, "learning_rate": 8.365125628140705e-06, "loss": 0.0385, "step": 16775 }, { "epoch": 11.28, "grad_norm": 3.047471761703491, "learning_rate": 8.362613065326634e-06, "loss": 0.0386, "step": 16800 }, { "epoch": 11.3, "grad_norm": 2.62675142288208, "learning_rate": 8.360100502512563e-06, "loss": 0.0388, "step": 16825 }, { "epoch": 11.32, "grad_norm": 2.6403391361236572, "learning_rate": 8.357587939698493e-06, "loss": 0.041, "step": 16850 }, { "epoch": 11.33, "grad_norm": 2.7048850059509277, "learning_rate": 8.355075376884422e-06, "loss": 0.0405, "step": 16875 }, { "epoch": 11.35, "grad_norm": 2.8291220664978027, "learning_rate": 8.352562814070353e-06, "loss": 0.0375, "step": 16900 }, { "epoch": 11.37, "grad_norm": 2.9671170711517334, "learning_rate": 8.350050251256282e-06, "loss": 0.0377, "step": 16925 }, { "epoch": 11.38, "grad_norm": 3.0989413261413574, "learning_rate": 8.347537688442212e-06, "loss": 0.039, "step": 16950 }, { "epoch": 11.4, "grad_norm": 2.738807201385498, "learning_rate": 8.345025125628141e-06, "loss": 0.0399, "step": 16975 }, { "epoch": 11.42, "grad_norm": 2.9761691093444824, "learning_rate": 8.34251256281407e-06, "loss": 0.0402, "step": 17000 }, { "epoch": 11.42, "eval_loss": 0.13800786435604095, "eval_runtime": 531.8418, "eval_samples_per_second": 2.606, "eval_steps_per_second": 2.606, "eval_wer": 27.76796750838778, "step": 17000 }, { "epoch": 11.43, "grad_norm": 3.1192235946655273, "learning_rate": 8.34e-06, "loss": 0.0389, "step": 17025 }, { "epoch": 11.45, "grad_norm": 3.019216299057007, "learning_rate": 8.33748743718593e-06, "loss": 0.0413, "step": 17050 }, { "epoch": 11.47, "grad_norm": 2.6235885620117188, "learning_rate": 8.33497487437186e-06, "loss": 0.043, "step": 17075 }, { "epoch": 11.48, "grad_norm": 3.3072292804718018, "learning_rate": 8.33246231155779e-06, "loss": 0.0384, "step": 17100 }, { "epoch": 11.5, "grad_norm": 3.032578706741333, "learning_rate": 8.32994974874372e-06, "loss": 0.0394, "step": 17125 }, { "epoch": 11.52, "grad_norm": 3.0692577362060547, "learning_rate": 8.327437185929648e-06, "loss": 0.0402, "step": 17150 }, { "epoch": 11.53, "grad_norm": 3.113739252090454, "learning_rate": 8.324924623115579e-06, "loss": 0.038, "step": 17175 }, { "epoch": 11.55, "grad_norm": 3.1510965824127197, "learning_rate": 8.322412060301508e-06, "loss": 0.0423, "step": 17200 }, { "epoch": 11.57, "grad_norm": 3.110407590866089, "learning_rate": 8.319899497487438e-06, "loss": 0.0381, "step": 17225 }, { "epoch": 11.58, "grad_norm": 2.9603676795959473, "learning_rate": 8.317386934673367e-06, "loss": 0.0421, "step": 17250 }, { "epoch": 11.6, "grad_norm": 2.7330162525177, "learning_rate": 8.314874371859298e-06, "loss": 0.04, "step": 17275 }, { "epoch": 11.62, "grad_norm": 3.783348798751831, "learning_rate": 8.312361809045226e-06, "loss": 0.0428, "step": 17300 }, { "epoch": 11.64, "grad_norm": 3.3141326904296875, "learning_rate": 8.309849246231157e-06, "loss": 0.04, "step": 17325 }, { "epoch": 11.65, "grad_norm": 3.1341404914855957, "learning_rate": 8.307336683417086e-06, "loss": 0.0389, "step": 17350 }, { "epoch": 11.67, "grad_norm": 2.5702879428863525, "learning_rate": 8.304824120603015e-06, "loss": 0.0411, "step": 17375 }, { "epoch": 11.69, "grad_norm": 2.7597875595092773, "learning_rate": 8.302311557788946e-06, "loss": 0.0387, "step": 17400 }, { "epoch": 11.7, "grad_norm": 3.1602911949157715, "learning_rate": 8.299798994974874e-06, "loss": 0.0401, "step": 17425 }, { "epoch": 11.72, "grad_norm": 2.9719858169555664, "learning_rate": 8.297286432160805e-06, "loss": 0.04, "step": 17450 }, { "epoch": 11.74, "grad_norm": 2.7361767292022705, "learning_rate": 8.294773869346734e-06, "loss": 0.041, "step": 17475 }, { "epoch": 11.75, "grad_norm": 2.7034785747528076, "learning_rate": 8.292261306532664e-06, "loss": 0.0413, "step": 17500 }, { "epoch": 11.77, "grad_norm": 3.2431066036224365, "learning_rate": 8.289748743718595e-06, "loss": 0.0396, "step": 17525 }, { "epoch": 11.79, "grad_norm": 2.7960753440856934, "learning_rate": 8.287236180904524e-06, "loss": 0.0406, "step": 17550 }, { "epoch": 11.8, "grad_norm": 3.0115575790405273, "learning_rate": 8.284723618090453e-06, "loss": 0.0395, "step": 17575 }, { "epoch": 11.82, "grad_norm": 2.4014508724212646, "learning_rate": 8.282211055276383e-06, "loss": 0.0404, "step": 17600 }, { "epoch": 11.84, "grad_norm": 3.1004748344421387, "learning_rate": 8.279698492462312e-06, "loss": 0.0385, "step": 17625 }, { "epoch": 11.85, "grad_norm": 2.5941948890686035, "learning_rate": 8.277185929648241e-06, "loss": 0.0398, "step": 17650 }, { "epoch": 11.87, "grad_norm": 2.6056137084960938, "learning_rate": 8.274673366834172e-06, "loss": 0.0381, "step": 17675 }, { "epoch": 11.89, "grad_norm": 2.8399932384490967, "learning_rate": 8.2721608040201e-06, "loss": 0.0401, "step": 17700 }, { "epoch": 11.9, "grad_norm": 2.9396562576293945, "learning_rate": 8.269648241206031e-06, "loss": 0.0409, "step": 17725 }, { "epoch": 11.92, "grad_norm": 3.1237053871154785, "learning_rate": 8.26713567839196e-06, "loss": 0.039, "step": 17750 }, { "epoch": 11.94, "grad_norm": 3.0028700828552246, "learning_rate": 8.26462311557789e-06, "loss": 0.0421, "step": 17775 }, { "epoch": 11.95, "grad_norm": 3.055807590484619, "learning_rate": 8.26211055276382e-06, "loss": 0.0405, "step": 17800 }, { "epoch": 11.97, "grad_norm": 3.251986026763916, "learning_rate": 8.25959798994975e-06, "loss": 0.0433, "step": 17825 }, { "epoch": 11.99, "grad_norm": 2.845550537109375, "learning_rate": 8.25708542713568e-06, "loss": 0.0385, "step": 17850 }, { "epoch": 12.0, "grad_norm": 2.913346290588379, "learning_rate": 8.254572864321609e-06, "loss": 0.0378, "step": 17875 }, { "epoch": 12.02, "grad_norm": 2.3991270065307617, "learning_rate": 8.252060301507538e-06, "loss": 0.0294, "step": 17900 }, { "epoch": 12.04, "grad_norm": 2.4414055347442627, "learning_rate": 8.249547738693467e-06, "loss": 0.0306, "step": 17925 }, { "epoch": 12.06, "grad_norm": 2.274725914001465, "learning_rate": 8.247035175879398e-06, "loss": 0.0295, "step": 17950 }, { "epoch": 12.07, "grad_norm": 2.767655849456787, "learning_rate": 8.244522613065328e-06, "loss": 0.0307, "step": 17975 }, { "epoch": 12.09, "grad_norm": 2.5598373413085938, "learning_rate": 8.242010050251257e-06, "loss": 0.0307, "step": 18000 }, { "epoch": 12.09, "eval_loss": 0.1446864753961563, "eval_runtime": 537.4834, "eval_samples_per_second": 2.579, "eval_steps_per_second": 2.579, "eval_wer": 27.238212961327918, "step": 18000 }, { "epoch": 12.11, "grad_norm": 2.654730796813965, "learning_rate": 8.239497487437186e-06, "loss": 0.0303, "step": 18025 }, { "epoch": 12.12, "grad_norm": 2.6578266620635986, "learning_rate": 8.236984924623116e-06, "loss": 0.0298, "step": 18050 }, { "epoch": 12.14, "grad_norm": 3.2597641944885254, "learning_rate": 8.234472361809047e-06, "loss": 0.0307, "step": 18075 }, { "epoch": 12.16, "grad_norm": 3.1756911277770996, "learning_rate": 8.231959798994976e-06, "loss": 0.0303, "step": 18100 }, { "epoch": 12.17, "grad_norm": 2.3517801761627197, "learning_rate": 8.229447236180905e-06, "loss": 0.0299, "step": 18125 }, { "epoch": 12.19, "grad_norm": 2.7081449031829834, "learning_rate": 8.226934673366835e-06, "loss": 0.0317, "step": 18150 }, { "epoch": 12.21, "grad_norm": 2.9442265033721924, "learning_rate": 8.224422110552764e-06, "loss": 0.0309, "step": 18175 }, { "epoch": 12.22, "grad_norm": 2.202742099761963, "learning_rate": 8.221909547738695e-06, "loss": 0.0299, "step": 18200 }, { "epoch": 12.24, "grad_norm": 2.683105230331421, "learning_rate": 8.219396984924624e-06, "loss": 0.0303, "step": 18225 }, { "epoch": 12.26, "grad_norm": 2.4034810066223145, "learning_rate": 8.216884422110554e-06, "loss": 0.0319, "step": 18250 }, { "epoch": 12.27, "grad_norm": 2.621290683746338, "learning_rate": 8.214371859296483e-06, "loss": 0.0318, "step": 18275 }, { "epoch": 12.29, "grad_norm": 2.842874765396118, "learning_rate": 8.211859296482412e-06, "loss": 0.0332, "step": 18300 }, { "epoch": 12.31, "grad_norm": 2.4797563552856445, "learning_rate": 8.209346733668342e-06, "loss": 0.0325, "step": 18325 }, { "epoch": 12.32, "grad_norm": 2.8069446086883545, "learning_rate": 8.206834170854273e-06, "loss": 0.033, "step": 18350 }, { "epoch": 12.34, "grad_norm": 2.9851083755493164, "learning_rate": 8.204321608040202e-06, "loss": 0.0321, "step": 18375 }, { "epoch": 12.36, "grad_norm": 2.948084592819214, "learning_rate": 8.201809045226131e-06, "loss": 0.0338, "step": 18400 }, { "epoch": 12.37, "grad_norm": 2.7898919582366943, "learning_rate": 8.19929648241206e-06, "loss": 0.0315, "step": 18425 }, { "epoch": 12.39, "grad_norm": 2.366434097290039, "learning_rate": 8.19678391959799e-06, "loss": 0.032, "step": 18450 }, { "epoch": 12.41, "grad_norm": 2.9562463760375977, "learning_rate": 8.194271356783921e-06, "loss": 0.0334, "step": 18475 }, { "epoch": 12.42, "grad_norm": 2.5975656509399414, "learning_rate": 8.19175879396985e-06, "loss": 0.0331, "step": 18500 }, { "epoch": 12.44, "grad_norm": 2.8374183177948, "learning_rate": 8.18924623115578e-06, "loss": 0.0318, "step": 18525 }, { "epoch": 12.46, "grad_norm": 2.839860439300537, "learning_rate": 8.186733668341709e-06, "loss": 0.0324, "step": 18550 }, { "epoch": 12.47, "grad_norm": 2.800180196762085, "learning_rate": 8.184221105527638e-06, "loss": 0.0309, "step": 18575 }, { "epoch": 12.49, "grad_norm": 2.644583225250244, "learning_rate": 8.18170854271357e-06, "loss": 0.0331, "step": 18600 }, { "epoch": 12.51, "grad_norm": 3.0358402729034424, "learning_rate": 8.179195979899498e-06, "loss": 0.0327, "step": 18625 }, { "epoch": 12.53, "grad_norm": 2.807608127593994, "learning_rate": 8.176683417085428e-06, "loss": 0.032, "step": 18650 }, { "epoch": 12.54, "grad_norm": 3.115736961364746, "learning_rate": 8.174170854271357e-06, "loss": 0.034, "step": 18675 }, { "epoch": 12.56, "grad_norm": 2.563960313796997, "learning_rate": 8.171658291457286e-06, "loss": 0.0325, "step": 18700 }, { "epoch": 12.58, "grad_norm": 2.6218457221984863, "learning_rate": 8.169145728643216e-06, "loss": 0.0312, "step": 18725 }, { "epoch": 12.59, "grad_norm": 2.6230452060699463, "learning_rate": 8.166633165829147e-06, "loss": 0.0318, "step": 18750 }, { "epoch": 12.61, "grad_norm": 3.0028395652770996, "learning_rate": 8.164120603015076e-06, "loss": 0.0339, "step": 18775 }, { "epoch": 12.63, "grad_norm": 2.810173273086548, "learning_rate": 8.161608040201005e-06, "loss": 0.0337, "step": 18800 }, { "epoch": 12.64, "grad_norm": 2.7154364585876465, "learning_rate": 8.159095477386936e-06, "loss": 0.0315, "step": 18825 }, { "epoch": 12.66, "grad_norm": 2.9645156860351562, "learning_rate": 8.156582914572864e-06, "loss": 0.0341, "step": 18850 }, { "epoch": 12.68, "grad_norm": 2.558562755584717, "learning_rate": 8.154070351758795e-06, "loss": 0.0321, "step": 18875 }, { "epoch": 12.69, "grad_norm": 3.045975923538208, "learning_rate": 8.151557788944724e-06, "loss": 0.0328, "step": 18900 }, { "epoch": 12.71, "grad_norm": 2.605736494064331, "learning_rate": 8.149045226130654e-06, "loss": 0.0338, "step": 18925 }, { "epoch": 12.73, "grad_norm": 2.6503992080688477, "learning_rate": 8.146532663316583e-06, "loss": 0.0349, "step": 18950 }, { "epoch": 12.74, "grad_norm": 2.7485363483428955, "learning_rate": 8.144020100502512e-06, "loss": 0.0331, "step": 18975 }, { "epoch": 12.76, "grad_norm": 3.0558133125305176, "learning_rate": 8.141507537688443e-06, "loss": 0.0331, "step": 19000 }, { "epoch": 12.76, "eval_loss": 0.15126191079616547, "eval_runtime": 542.0176, "eval_samples_per_second": 2.557, "eval_steps_per_second": 2.557, "eval_wer": 28.129966448878683, "step": 19000 }, { "epoch": 12.78, "grad_norm": 3.117704391479492, "learning_rate": 8.138994974874373e-06, "loss": 0.0336, "step": 19025 }, { "epoch": 12.79, "grad_norm": 2.7645487785339355, "learning_rate": 8.136482412060302e-06, "loss": 0.0324, "step": 19050 }, { "epoch": 12.81, "grad_norm": 2.742771625518799, "learning_rate": 8.133969849246231e-06, "loss": 0.0331, "step": 19075 }, { "epoch": 12.83, "grad_norm": 2.8407609462738037, "learning_rate": 8.131457286432162e-06, "loss": 0.0317, "step": 19100 }, { "epoch": 12.84, "grad_norm": 2.5845396518707275, "learning_rate": 8.12894472361809e-06, "loss": 0.0335, "step": 19125 }, { "epoch": 12.86, "grad_norm": 2.8739688396453857, "learning_rate": 8.126432160804021e-06, "loss": 0.0333, "step": 19150 }, { "epoch": 12.88, "grad_norm": 3.1160261631011963, "learning_rate": 8.12391959798995e-06, "loss": 0.033, "step": 19175 }, { "epoch": 12.89, "grad_norm": 2.978895902633667, "learning_rate": 8.12140703517588e-06, "loss": 0.0358, "step": 19200 }, { "epoch": 12.91, "grad_norm": 3.0800576210021973, "learning_rate": 8.11889447236181e-06, "loss": 0.0335, "step": 19225 }, { "epoch": 12.93, "grad_norm": 2.4890170097351074, "learning_rate": 8.11638190954774e-06, "loss": 0.034, "step": 19250 }, { "epoch": 12.94, "grad_norm": 2.8995964527130127, "learning_rate": 8.11386934673367e-06, "loss": 0.0342, "step": 19275 }, { "epoch": 12.96, "grad_norm": 2.8822238445281982, "learning_rate": 8.111356783919599e-06, "loss": 0.0338, "step": 19300 }, { "epoch": 12.98, "grad_norm": 2.3847439289093018, "learning_rate": 8.108844221105528e-06, "loss": 0.0345, "step": 19325 }, { "epoch": 13.0, "grad_norm": 2.5077168941497803, "learning_rate": 8.106331658291457e-06, "loss": 0.0323, "step": 19350 }, { "epoch": 13.01, "grad_norm": 2.0860869884490967, "learning_rate": 8.103819095477388e-06, "loss": 0.0256, "step": 19375 }, { "epoch": 13.03, "grad_norm": 2.4186856746673584, "learning_rate": 8.101306532663318e-06, "loss": 0.025, "step": 19400 }, { "epoch": 13.05, "grad_norm": 2.169545888900757, "learning_rate": 8.098793969849247e-06, "loss": 0.024, "step": 19425 }, { "epoch": 13.06, "grad_norm": 2.250295877456665, "learning_rate": 8.096281407035176e-06, "loss": 0.0227, "step": 19450 }, { "epoch": 13.08, "grad_norm": 2.8207223415374756, "learning_rate": 8.093768844221106e-06, "loss": 0.0254, "step": 19475 }, { "epoch": 13.1, "grad_norm": 2.4845900535583496, "learning_rate": 8.091256281407037e-06, "loss": 0.0251, "step": 19500 }, { "epoch": 13.11, "grad_norm": 2.9678895473480225, "learning_rate": 8.088743718592966e-06, "loss": 0.0255, "step": 19525 }, { "epoch": 13.13, "grad_norm": 3.0639657974243164, "learning_rate": 8.086231155778895e-06, "loss": 0.0266, "step": 19550 }, { "epoch": 13.15, "grad_norm": 2.5778753757476807, "learning_rate": 8.083718592964825e-06, "loss": 0.0258, "step": 19575 }, { "epoch": 13.16, "grad_norm": 2.3090131282806396, "learning_rate": 8.081206030150754e-06, "loss": 0.0234, "step": 19600 }, { "epoch": 13.18, "grad_norm": 2.645989418029785, "learning_rate": 8.078693467336685e-06, "loss": 0.0243, "step": 19625 }, { "epoch": 13.2, "grad_norm": 2.4817280769348145, "learning_rate": 8.076180904522614e-06, "loss": 0.0274, "step": 19650 }, { "epoch": 13.21, "grad_norm": 2.17031192779541, "learning_rate": 8.073668341708544e-06, "loss": 0.024, "step": 19675 }, { "epoch": 13.23, "grad_norm": 2.587280035018921, "learning_rate": 8.071155778894473e-06, "loss": 0.0258, "step": 19700 }, { "epoch": 13.25, "grad_norm": 2.3844306468963623, "learning_rate": 8.068643216080402e-06, "loss": 0.0264, "step": 19725 }, { "epoch": 13.26, "grad_norm": 2.440300226211548, "learning_rate": 8.066130653266332e-06, "loss": 0.0259, "step": 19750 }, { "epoch": 13.28, "grad_norm": 2.120274543762207, "learning_rate": 8.063618090452263e-06, "loss": 0.0253, "step": 19775 }, { "epoch": 13.3, "grad_norm": 2.412203073501587, "learning_rate": 8.061105527638192e-06, "loss": 0.0256, "step": 19800 }, { "epoch": 13.31, "grad_norm": 2.3215441703796387, "learning_rate": 8.058592964824121e-06, "loss": 0.0247, "step": 19825 }, { "epoch": 13.33, "grad_norm": 2.0729939937591553, "learning_rate": 8.05608040201005e-06, "loss": 0.0248, "step": 19850 }, { "epoch": 13.35, "grad_norm": 2.622880697250366, "learning_rate": 8.05356783919598e-06, "loss": 0.0271, "step": 19875 }, { "epoch": 13.36, "grad_norm": 2.5304481983184814, "learning_rate": 8.051055276381911e-06, "loss": 0.0255, "step": 19900 }, { "epoch": 13.38, "grad_norm": 2.6204922199249268, "learning_rate": 8.04854271356784e-06, "loss": 0.0261, "step": 19925 }, { "epoch": 13.4, "grad_norm": 2.284783363342285, "learning_rate": 8.04603015075377e-06, "loss": 0.0257, "step": 19950 }, { "epoch": 13.42, "grad_norm": 3.0914671421051025, "learning_rate": 8.043517587939699e-06, "loss": 0.027, "step": 19975 }, { "epoch": 13.43, "grad_norm": 2.8612654209136963, "learning_rate": 8.041005025125628e-06, "loss": 0.0258, "step": 20000 }, { "epoch": 13.43, "eval_loss": 0.15857619047164917, "eval_runtime": 534.77, "eval_samples_per_second": 2.592, "eval_steps_per_second": 2.592, "eval_wer": 28.809818117605506, "step": 20000 }, { "epoch": 13.45, "grad_norm": 3.074786424636841, "learning_rate": 8.03849246231156e-06, "loss": 0.026, "step": 20025 }, { "epoch": 13.47, "grad_norm": 2.40915584564209, "learning_rate": 8.035979899497489e-06, "loss": 0.029, "step": 20050 }, { "epoch": 13.48, "grad_norm": 2.7619211673736572, "learning_rate": 8.033467336683418e-06, "loss": 0.0261, "step": 20075 }, { "epoch": 13.5, "grad_norm": 2.8454036712646484, "learning_rate": 8.030954773869347e-06, "loss": 0.0257, "step": 20100 }, { "epoch": 13.52, "grad_norm": 2.519239664077759, "learning_rate": 8.028442211055277e-06, "loss": 0.0255, "step": 20125 }, { "epoch": 13.53, "grad_norm": 2.798295736312866, "learning_rate": 8.025929648241206e-06, "loss": 0.0256, "step": 20150 }, { "epoch": 13.55, "grad_norm": 2.658249855041504, "learning_rate": 8.023417085427137e-06, "loss": 0.0252, "step": 20175 }, { "epoch": 13.57, "grad_norm": 2.55195689201355, "learning_rate": 8.020904522613066e-06, "loss": 0.0281, "step": 20200 }, { "epoch": 13.58, "grad_norm": 2.282550096511841, "learning_rate": 8.018391959798996e-06, "loss": 0.0262, "step": 20225 }, { "epoch": 13.6, "grad_norm": 2.6260697841644287, "learning_rate": 8.015879396984927e-06, "loss": 0.0249, "step": 20250 }, { "epoch": 13.62, "grad_norm": 2.61671781539917, "learning_rate": 8.013366834170854e-06, "loss": 0.0276, "step": 20275 }, { "epoch": 13.63, "grad_norm": 2.5859358310699463, "learning_rate": 8.010854271356785e-06, "loss": 0.0265, "step": 20300 }, { "epoch": 13.65, "grad_norm": 2.6100573539733887, "learning_rate": 8.008341708542714e-06, "loss": 0.0258, "step": 20325 }, { "epoch": 13.67, "grad_norm": 2.5182266235351562, "learning_rate": 8.005829145728644e-06, "loss": 0.028, "step": 20350 }, { "epoch": 13.68, "grad_norm": 3.105220317840576, "learning_rate": 8.003316582914573e-06, "loss": 0.027, "step": 20375 }, { "epoch": 13.7, "grad_norm": 2.7697339057922363, "learning_rate": 8.000804020100502e-06, "loss": 0.0274, "step": 20400 }, { "epoch": 13.72, "grad_norm": 2.74824857711792, "learning_rate": 7.998291457286432e-06, "loss": 0.0264, "step": 20425 }, { "epoch": 13.73, "grad_norm": 2.1460442543029785, "learning_rate": 7.995778894472363e-06, "loss": 0.0266, "step": 20450 }, { "epoch": 13.75, "grad_norm": 2.700098991394043, "learning_rate": 7.993266331658292e-06, "loss": 0.0271, "step": 20475 }, { "epoch": 13.77, "grad_norm": 3.0646328926086426, "learning_rate": 7.990753768844221e-06, "loss": 0.0273, "step": 20500 }, { "epoch": 13.78, "grad_norm": 2.4817585945129395, "learning_rate": 7.988241206030152e-06, "loss": 0.0267, "step": 20525 }, { "epoch": 13.8, "grad_norm": 2.383892059326172, "learning_rate": 7.98572864321608e-06, "loss": 0.0281, "step": 20550 }, { "epoch": 13.82, "grad_norm": 2.6712028980255127, "learning_rate": 7.983216080402011e-06, "loss": 0.0262, "step": 20575 }, { "epoch": 13.83, "grad_norm": 2.8054888248443604, "learning_rate": 7.98070351758794e-06, "loss": 0.0277, "step": 20600 }, { "epoch": 13.85, "grad_norm": 2.520451545715332, "learning_rate": 7.97819095477387e-06, "loss": 0.0256, "step": 20625 }, { "epoch": 13.87, "grad_norm": 2.6715471744537354, "learning_rate": 7.975678391959799e-06, "loss": 0.0271, "step": 20650 }, { "epoch": 13.89, "grad_norm": 2.936898946762085, "learning_rate": 7.973165829145728e-06, "loss": 0.0271, "step": 20675 }, { "epoch": 13.9, "grad_norm": 2.5876598358154297, "learning_rate": 7.97065326633166e-06, "loss": 0.0254, "step": 20700 }, { "epoch": 13.92, "grad_norm": 2.576573133468628, "learning_rate": 7.968140703517589e-06, "loss": 0.0268, "step": 20725 }, { "epoch": 13.94, "grad_norm": 2.962134838104248, "learning_rate": 7.965628140703518e-06, "loss": 0.028, "step": 20750 }, { "epoch": 13.95, "grad_norm": 2.4978857040405273, "learning_rate": 7.963115577889447e-06, "loss": 0.0268, "step": 20775 }, { "epoch": 13.97, "grad_norm": 2.7507359981536865, "learning_rate": 7.960603015075378e-06, "loss": 0.0264, "step": 20800 }, { "epoch": 13.99, "grad_norm": 2.290602922439575, "learning_rate": 7.958090452261306e-06, "loss": 0.0268, "step": 20825 }, { "epoch": 14.0, "grad_norm": 1.895709753036499, "learning_rate": 7.955577889447237e-06, "loss": 0.0267, "step": 20850 }, { "epoch": 14.02, "grad_norm": 2.577284097671509, "learning_rate": 7.953065326633166e-06, "loss": 0.02, "step": 20875 }, { "epoch": 14.04, "grad_norm": 2.139061450958252, "learning_rate": 7.950552763819096e-06, "loss": 0.0182, "step": 20900 }, { "epoch": 14.05, "grad_norm": 2.31142520904541, "learning_rate": 7.948040201005027e-06, "loss": 0.0189, "step": 20925 }, { "epoch": 14.07, "grad_norm": 2.4628167152404785, "learning_rate": 7.945527638190954e-06, "loss": 0.0191, "step": 20950 }, { "epoch": 14.09, "grad_norm": 2.2550642490386963, "learning_rate": 7.943015075376885e-06, "loss": 0.0205, "step": 20975 }, { "epoch": 14.1, "grad_norm": 2.5067131519317627, "learning_rate": 7.940502512562815e-06, "loss": 0.0193, "step": 21000 }, { "epoch": 14.1, "eval_loss": 0.16441361606121063, "eval_runtime": 532.0683, "eval_samples_per_second": 2.605, "eval_steps_per_second": 2.605, "eval_wer": 28.28006357054565, "step": 21000 }, { "epoch": 14.12, "grad_norm": 2.0792436599731445, "learning_rate": 7.937989949748744e-06, "loss": 0.0202, "step": 21025 }, { "epoch": 14.14, "grad_norm": 2.0055572986602783, "learning_rate": 7.935477386934673e-06, "loss": 0.02, "step": 21050 }, { "epoch": 14.15, "grad_norm": 2.557342052459717, "learning_rate": 7.932964824120604e-06, "loss": 0.0202, "step": 21075 }, { "epoch": 14.17, "grad_norm": 2.351605176925659, "learning_rate": 7.930452261306534e-06, "loss": 0.0205, "step": 21100 }, { "epoch": 14.19, "grad_norm": 2.4522876739501953, "learning_rate": 7.927939698492463e-06, "loss": 0.0197, "step": 21125 }, { "epoch": 14.2, "grad_norm": 1.9259110689163208, "learning_rate": 7.925527638190955e-06, "loss": 0.019, "step": 21150 }, { "epoch": 14.22, "grad_norm": 2.6869237422943115, "learning_rate": 7.923015075376886e-06, "loss": 0.0191, "step": 21175 }, { "epoch": 14.24, "grad_norm": 2.1610636711120605, "learning_rate": 7.920502512562815e-06, "loss": 0.0199, "step": 21200 }, { "epoch": 14.25, "grad_norm": 2.3419833183288574, "learning_rate": 7.917989949748744e-06, "loss": 0.0205, "step": 21225 }, { "epoch": 14.27, "grad_norm": 2.655822277069092, "learning_rate": 7.915477386934674e-06, "loss": 0.0211, "step": 21250 }, { "epoch": 14.29, "grad_norm": 2.3895249366760254, "learning_rate": 7.912964824120603e-06, "loss": 0.02, "step": 21275 }, { "epoch": 14.3, "grad_norm": 2.626079559326172, "learning_rate": 7.910452261306534e-06, "loss": 0.0204, "step": 21300 }, { "epoch": 14.32, "grad_norm": 2.4946000576019287, "learning_rate": 7.907939698492463e-06, "loss": 0.0211, "step": 21325 }, { "epoch": 14.34, "grad_norm": 2.2254092693328857, "learning_rate": 7.905427135678393e-06, "loss": 0.0209, "step": 21350 }, { "epoch": 14.36, "grad_norm": 2.813023328781128, "learning_rate": 7.902914572864322e-06, "loss": 0.0205, "step": 21375 }, { "epoch": 14.37, "grad_norm": 2.3448939323425293, "learning_rate": 7.900402010050253e-06, "loss": 0.0204, "step": 21400 }, { "epoch": 14.39, "grad_norm": 2.1861133575439453, "learning_rate": 7.89788944723618e-06, "loss": 0.0211, "step": 21425 }, { "epoch": 14.41, "grad_norm": 2.1422207355499268, "learning_rate": 7.895376884422111e-06, "loss": 0.0224, "step": 21450 }, { "epoch": 14.42, "grad_norm": 2.713761329650879, "learning_rate": 7.89286432160804e-06, "loss": 0.02, "step": 21475 }, { "epoch": 14.44, "grad_norm": 2.430680274963379, "learning_rate": 7.89035175879397e-06, "loss": 0.0218, "step": 21500 }, { "epoch": 14.46, "grad_norm": 2.974393606185913, "learning_rate": 7.887839195979901e-06, "loss": 0.0197, "step": 21525 }, { "epoch": 14.47, "grad_norm": 2.530994415283203, "learning_rate": 7.885326633165829e-06, "loss": 0.0221, "step": 21550 }, { "epoch": 14.49, "grad_norm": 2.5071282386779785, "learning_rate": 7.88281407035176e-06, "loss": 0.0214, "step": 21575 }, { "epoch": 14.51, "grad_norm": 2.2111854553222656, "learning_rate": 7.880301507537689e-06, "loss": 0.0208, "step": 21600 }, { "epoch": 14.52, "grad_norm": 2.194091320037842, "learning_rate": 7.877788944723618e-06, "loss": 0.0203, "step": 21625 }, { "epoch": 14.54, "grad_norm": 2.2206263542175293, "learning_rate": 7.875276381909548e-06, "loss": 0.0221, "step": 21650 }, { "epoch": 14.56, "grad_norm": 2.425065279006958, "learning_rate": 7.872763819095479e-06, "loss": 0.0211, "step": 21675 }, { "epoch": 14.57, "grad_norm": 2.6152865886688232, "learning_rate": 7.870251256281408e-06, "loss": 0.0207, "step": 21700 }, { "epoch": 14.59, "grad_norm": 2.2612714767456055, "learning_rate": 7.867738693467337e-06, "loss": 0.0209, "step": 21725 }, { "epoch": 14.61, "grad_norm": 2.1470086574554443, "learning_rate": 7.865226130653267e-06, "loss": 0.021, "step": 21750 }, { "epoch": 14.62, "grad_norm": 2.484851598739624, "learning_rate": 7.862713567839196e-06, "loss": 0.02, "step": 21775 }, { "epoch": 14.64, "grad_norm": 2.4667041301727295, "learning_rate": 7.860201005025127e-06, "loss": 0.0206, "step": 21800 }, { "epoch": 14.66, "grad_norm": 2.9903693199157715, "learning_rate": 7.857688442211055e-06, "loss": 0.0219, "step": 21825 }, { "epoch": 14.67, "grad_norm": 2.6542530059814453, "learning_rate": 7.855175879396986e-06, "loss": 0.0213, "step": 21850 }, { "epoch": 14.69, "grad_norm": 2.333191394805908, "learning_rate": 7.852663316582915e-06, "loss": 0.0214, "step": 21875 }, { "epoch": 14.71, "grad_norm": 2.71769380569458, "learning_rate": 7.850150753768844e-06, "loss": 0.0215, "step": 21900 }, { "epoch": 14.72, "grad_norm": 2.4674861431121826, "learning_rate": 7.847638190954775e-06, "loss": 0.0211, "step": 21925 }, { "epoch": 14.74, "grad_norm": 2.931941270828247, "learning_rate": 7.845125628140705e-06, "loss": 0.0244, "step": 21950 }, { "epoch": 14.76, "grad_norm": 2.738786458969116, "learning_rate": 7.842613065326634e-06, "loss": 0.0218, "step": 21975 }, { "epoch": 14.78, "grad_norm": 2.375138521194458, "learning_rate": 7.840100502512563e-06, "loss": 0.0219, "step": 22000 }, { "epoch": 14.78, "eval_loss": 0.16828645765781403, "eval_runtime": 539.6518, "eval_samples_per_second": 2.568, "eval_steps_per_second": 2.568, "eval_wer": 28.11230796397669, "step": 22000 } ], "logging_steps": 25, "max_steps": 100000, "num_input_tokens_seen": 0, "num_train_epochs": 68, "save_steps": 1000, "total_flos": 3.465787561869312e+19, "train_batch_size": 64, "trial_name": null, "trial_params": null }