jes3275's picture
Added checkpoints
ab70710
Invalid JSON: Unexpected token 'I', ..."ad_norm": Infinity, "... is not valid JSON
{
"best_metric": 26.328800988875155,
"best_model_checkpoint": "results/whisper-tiny/marathi/checkpoint-12000",
"epoch": 14.775016789791806,
"eval_steps": 1000,
"global_step": 22000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"grad_norm": 74.15543365478516,
"learning_rate": 4.4e-07,
"loss": 3.7766,
"step": 25
},
{
"epoch": 0.03,
"grad_norm": 39.68334197998047,
"learning_rate": 9.200000000000001e-07,
"loss": 3.2026,
"step": 50
},
{
"epoch": 0.05,
"grad_norm": 14.481256484985352,
"learning_rate": 1.42e-06,
"loss": 2.4672,
"step": 75
},
{
"epoch": 0.07,
"grad_norm": 8.309144020080566,
"learning_rate": 1.9200000000000003e-06,
"loss": 1.9195,
"step": 100
},
{
"epoch": 0.08,
"grad_norm": 6.247703552246094,
"learning_rate": 2.42e-06,
"loss": 1.5361,
"step": 125
},
{
"epoch": 0.1,
"grad_norm": 5.86753511428833,
"learning_rate": 2.92e-06,
"loss": 1.2775,
"step": 150
},
{
"epoch": 0.12,
"grad_norm": 5.9364752769470215,
"learning_rate": 3.4200000000000007e-06,
"loss": 1.0933,
"step": 175
},
{
"epoch": 0.13,
"grad_norm": 5.17349910736084,
"learning_rate": 3.920000000000001e-06,
"loss": 0.9505,
"step": 200
},
{
"epoch": 0.15,
"grad_norm": 5.331369400024414,
"learning_rate": 4.42e-06,
"loss": 0.8507,
"step": 225
},
{
"epoch": 0.17,
"grad_norm": 5.466459274291992,
"learning_rate": 4.92e-06,
"loss": 0.7655,
"step": 250
},
{
"epoch": 0.18,
"grad_norm": 4.921384811401367,
"learning_rate": 5.420000000000001e-06,
"loss": 0.7153,
"step": 275
},
{
"epoch": 0.2,
"grad_norm": 5.227000713348389,
"learning_rate": 5.92e-06,
"loss": 0.6886,
"step": 300
},
{
"epoch": 0.22,
"grad_norm": 4.924015045166016,
"learning_rate": 6.42e-06,
"loss": 0.6324,
"step": 325
},
{
"epoch": 0.24,
"grad_norm": 4.0168986320495605,
"learning_rate": 6.92e-06,
"loss": 0.6107,
"step": 350
},
{
"epoch": 0.25,
"grad_norm": 4.831826686859131,
"learning_rate": 7.420000000000001e-06,
"loss": 0.5784,
"step": 375
},
{
"epoch": 0.27,
"grad_norm": 4.7476935386657715,
"learning_rate": 7.92e-06,
"loss": 0.5509,
"step": 400
},
{
"epoch": 0.29,
"grad_norm": 4.2020978927612305,
"learning_rate": 8.42e-06,
"loss": 0.5442,
"step": 425
},
{
"epoch": 0.3,
"grad_norm": 4.830783843994141,
"learning_rate": 8.920000000000001e-06,
"loss": 0.5297,
"step": 450
},
{
"epoch": 0.32,
"grad_norm": 4.747669696807861,
"learning_rate": 9.42e-06,
"loss": 0.5059,
"step": 475
},
{
"epoch": 0.34,
"grad_norm": 4.504109859466553,
"learning_rate": 9.920000000000002e-06,
"loss": 0.4927,
"step": 500
},
{
"epoch": 0.35,
"grad_norm": 4.707924842834473,
"learning_rate": 9.997889447236182e-06,
"loss": 0.4721,
"step": 525
},
{
"epoch": 0.37,
"grad_norm": 4.621720790863037,
"learning_rate": 9.995376884422112e-06,
"loss": 0.464,
"step": 550
},
{
"epoch": 0.39,
"grad_norm": 4.50490140914917,
"learning_rate": 9.992864321608041e-06,
"loss": 0.4518,
"step": 575
},
{
"epoch": 0.4,
"grad_norm": 4.592816352844238,
"learning_rate": 9.99035175879397e-06,
"loss": 0.4335,
"step": 600
},
{
"epoch": 0.42,
"grad_norm": 4.791091442108154,
"learning_rate": 9.9878391959799e-06,
"loss": 0.4348,
"step": 625
},
{
"epoch": 0.44,
"grad_norm": 4.221704959869385,
"learning_rate": 9.98532663316583e-06,
"loss": 0.4203,
"step": 650
},
{
"epoch": 0.45,
"grad_norm": 4.549515724182129,
"learning_rate": 9.98281407035176e-06,
"loss": 0.4086,
"step": 675
},
{
"epoch": 0.47,
"grad_norm": 4.485387802124023,
"learning_rate": 9.98030150753769e-06,
"loss": 0.405,
"step": 700
},
{
"epoch": 0.49,
"grad_norm": 4.758955001831055,
"learning_rate": 9.977788944723619e-06,
"loss": 0.4016,
"step": 725
},
{
"epoch": 0.5,
"grad_norm": 4.615067005157471,
"learning_rate": 9.975276381909548e-06,
"loss": 0.393,
"step": 750
},
{
"epoch": 0.52,
"grad_norm": 4.661777019500732,
"learning_rate": 9.972763819095477e-06,
"loss": 0.3843,
"step": 775
},
{
"epoch": 0.54,
"grad_norm": 4.5793609619140625,
"learning_rate": 9.970251256281408e-06,
"loss": 0.3832,
"step": 800
},
{
"epoch": 0.55,
"grad_norm": 5.030839443206787,
"learning_rate": 9.967738693467338e-06,
"loss": 0.3789,
"step": 825
},
{
"epoch": 0.57,
"grad_norm": 4.351238250732422,
"learning_rate": 9.965226130653267e-06,
"loss": 0.3576,
"step": 850
},
{
"epoch": 0.59,
"grad_norm": 4.560535907745361,
"learning_rate": 9.962713567839198e-06,
"loss": 0.3589,
"step": 875
},
{
"epoch": 0.6,
"grad_norm": 4.39430046081543,
"learning_rate": 9.960201005025126e-06,
"loss": 0.3554,
"step": 900
},
{
"epoch": 0.62,
"grad_norm": 4.813572883605957,
"learning_rate": 9.957688442211057e-06,
"loss": 0.3561,
"step": 925
},
{
"epoch": 0.64,
"grad_norm": 3.895594358444214,
"learning_rate": 9.955175879396986e-06,
"loss": 0.3532,
"step": 950
},
{
"epoch": 0.65,
"grad_norm": 4.176882266998291,
"learning_rate": 9.952663316582915e-06,
"loss": 0.3501,
"step": 975
},
{
"epoch": 0.67,
"grad_norm": 4.483668327331543,
"learning_rate": 9.950150753768845e-06,
"loss": 0.3485,
"step": 1000
},
{
"epoch": 0.67,
"eval_loss": 0.23381204903125763,
"eval_runtime": 566.2474,
"eval_samples_per_second": 2.448,
"eval_steps_per_second": 2.448,
"eval_wer": 47.28059332509271,
"step": 1000
},
{
"epoch": 0.69,
"grad_norm": 4.013958930969238,
"learning_rate": 9.947638190954774e-06,
"loss": 0.3408,
"step": 1025
},
{
"epoch": 0.71,
"grad_norm": 4.734582424163818,
"learning_rate": 9.945125628140703e-06,
"loss": 0.3313,
"step": 1050
},
{
"epoch": 0.72,
"grad_norm": 4.5922722816467285,
"learning_rate": 9.942613065326634e-06,
"loss": 0.3364,
"step": 1075
},
{
"epoch": 0.74,
"grad_norm": 3.997859001159668,
"learning_rate": 9.940100502512564e-06,
"loss": 0.3283,
"step": 1100
},
{
"epoch": 0.76,
"grad_norm": 4.432836532592773,
"learning_rate": 9.937587939698493e-06,
"loss": 0.3258,
"step": 1125
},
{
"epoch": 0.77,
"grad_norm": 4.074716091156006,
"learning_rate": 9.935075376884424e-06,
"loss": 0.3338,
"step": 1150
},
{
"epoch": 0.79,
"grad_norm": 4.509114742279053,
"learning_rate": 9.932562814070352e-06,
"loss": 0.3121,
"step": 1175
},
{
"epoch": 0.81,
"grad_norm": 4.589898586273193,
"learning_rate": 9.930050251256283e-06,
"loss": 0.3161,
"step": 1200
},
{
"epoch": 0.82,
"grad_norm": 4.0301079750061035,
"learning_rate": 9.927537688442212e-06,
"loss": 0.3248,
"step": 1225
},
{
"epoch": 0.84,
"grad_norm": 4.21639347076416,
"learning_rate": 9.925025125628141e-06,
"loss": 0.3096,
"step": 1250
},
{
"epoch": 0.86,
"grad_norm": 4.40596866607666,
"learning_rate": 9.922512562814072e-06,
"loss": 0.3136,
"step": 1275
},
{
"epoch": 0.87,
"grad_norm": 4.144809722900391,
"learning_rate": 9.920000000000002e-06,
"loss": 0.3068,
"step": 1300
},
{
"epoch": 0.89,
"grad_norm": 3.97633695602417,
"learning_rate": 9.917487437185931e-06,
"loss": 0.3044,
"step": 1325
},
{
"epoch": 0.91,
"grad_norm": 4.247403144836426,
"learning_rate": 9.91497487437186e-06,
"loss": 0.307,
"step": 1350
},
{
"epoch": 0.92,
"grad_norm": 3.9145348072052,
"learning_rate": 9.91246231155779e-06,
"loss": 0.3007,
"step": 1375
},
{
"epoch": 0.94,
"grad_norm": 4.151167869567871,
"learning_rate": 9.909949748743719e-06,
"loss": 0.2931,
"step": 1400
},
{
"epoch": 0.96,
"grad_norm": 4.783816337585449,
"learning_rate": 9.90743718592965e-06,
"loss": 0.2939,
"step": 1425
},
{
"epoch": 0.97,
"grad_norm": 4.319779872894287,
"learning_rate": 9.904924623115578e-06,
"loss": 0.294,
"step": 1450
},
{
"epoch": 0.99,
"grad_norm": 4.233304500579834,
"learning_rate": 9.902412060301509e-06,
"loss": 0.2918,
"step": 1475
},
{
"epoch": 1.01,
"grad_norm": 5.045380592346191,
"learning_rate": 9.899899497487438e-06,
"loss": 0.2839,
"step": 1500
},
{
"epoch": 1.02,
"grad_norm": 4.171890735626221,
"learning_rate": 9.897386934673367e-06,
"loss": 0.277,
"step": 1525
},
{
"epoch": 1.04,
"grad_norm": 5.11909818649292,
"learning_rate": 9.894874371859298e-06,
"loss": 0.2708,
"step": 1550
},
{
"epoch": 1.06,
"grad_norm": 4.329667568206787,
"learning_rate": 9.892361809045228e-06,
"loss": 0.278,
"step": 1575
},
{
"epoch": 1.07,
"grad_norm": 4.358795166015625,
"learning_rate": 9.889849246231157e-06,
"loss": 0.2702,
"step": 1600
},
{
"epoch": 1.09,
"grad_norm": 4.983689308166504,
"learning_rate": 9.887336683417086e-06,
"loss": 0.2594,
"step": 1625
},
{
"epoch": 1.11,
"grad_norm": 4.065433502197266,
"learning_rate": 9.884824120603015e-06,
"loss": 0.2628,
"step": 1650
},
{
"epoch": 1.12,
"grad_norm": 4.138759136199951,
"learning_rate": 9.882311557788945e-06,
"loss": 0.2692,
"step": 1675
},
{
"epoch": 1.14,
"grad_norm": 4.43567419052124,
"learning_rate": 9.879798994974876e-06,
"loss": 0.2688,
"step": 1700
},
{
"epoch": 1.16,
"grad_norm": 4.089324951171875,
"learning_rate": 9.877286432160805e-06,
"loss": 0.2641,
"step": 1725
},
{
"epoch": 1.18,
"grad_norm": 4.174434661865234,
"learning_rate": 9.874773869346734e-06,
"loss": 0.2638,
"step": 1750
},
{
"epoch": 1.19,
"grad_norm": 4.324215888977051,
"learning_rate": 9.872261306532664e-06,
"loss": 0.2621,
"step": 1775
},
{
"epoch": 1.21,
"grad_norm": 4.167600631713867,
"learning_rate": 9.869748743718593e-06,
"loss": 0.2568,
"step": 1800
},
{
"epoch": 1.23,
"grad_norm": 4.090190410614014,
"learning_rate": 9.867236180904524e-06,
"loss": 0.2579,
"step": 1825
},
{
"epoch": 1.24,
"grad_norm": 3.862471580505371,
"learning_rate": 9.864723618090453e-06,
"loss": 0.2549,
"step": 1850
},
{
"epoch": 1.26,
"grad_norm": 3.9046545028686523,
"learning_rate": 9.862211055276383e-06,
"loss": 0.2512,
"step": 1875
},
{
"epoch": 1.28,
"grad_norm": 3.973026990890503,
"learning_rate": 9.859698492462312e-06,
"loss": 0.2535,
"step": 1900
},
{
"epoch": 1.29,
"grad_norm": 3.875776529312134,
"learning_rate": 9.857185929648241e-06,
"loss": 0.2454,
"step": 1925
},
{
"epoch": 1.31,
"grad_norm": 3.815830707550049,
"learning_rate": 9.854673366834172e-06,
"loss": 0.2509,
"step": 1950
},
{
"epoch": 1.33,
"grad_norm": 3.9826467037200928,
"learning_rate": 9.852160804020102e-06,
"loss": 0.2469,
"step": 1975
},
{
"epoch": 1.34,
"grad_norm": 4.199316024780273,
"learning_rate": 9.849648241206031e-06,
"loss": 0.2543,
"step": 2000
},
{
"epoch": 1.34,
"eval_loss": 0.1726374477148056,
"eval_runtime": 531.5792,
"eval_samples_per_second": 2.607,
"eval_steps_per_second": 2.607,
"eval_wer": 38.892812996644885,
"step": 2000
},
{
"epoch": 1.36,
"grad_norm": 4.188065052032471,
"learning_rate": 9.84713567839196e-06,
"loss": 0.253,
"step": 2025
},
{
"epoch": 1.38,
"grad_norm": 4.348769187927246,
"learning_rate": 9.84462311557789e-06,
"loss": 0.2439,
"step": 2050
},
{
"epoch": 1.39,
"grad_norm": 4.025571823120117,
"learning_rate": 9.842110552763819e-06,
"loss": 0.2499,
"step": 2075
},
{
"epoch": 1.41,
"grad_norm": 3.441206216812134,
"learning_rate": 9.83959798994975e-06,
"loss": 0.2419,
"step": 2100
},
{
"epoch": 1.43,
"grad_norm": 4.062358856201172,
"learning_rate": 9.83708542713568e-06,
"loss": 0.2428,
"step": 2125
},
{
"epoch": 1.44,
"grad_norm": 4.701034069061279,
"learning_rate": 9.834572864321609e-06,
"loss": 0.2435,
"step": 2150
},
{
"epoch": 1.46,
"grad_norm": 4.011937618255615,
"learning_rate": 9.832060301507538e-06,
"loss": 0.2429,
"step": 2175
},
{
"epoch": 1.48,
"grad_norm": 3.9073057174682617,
"learning_rate": 9.829547738693467e-06,
"loss": 0.2371,
"step": 2200
},
{
"epoch": 1.49,
"grad_norm": 4.053809642791748,
"learning_rate": 9.827035175879398e-06,
"loss": 0.236,
"step": 2225
},
{
"epoch": 1.51,
"grad_norm": 3.983830690383911,
"learning_rate": 9.824522613065328e-06,
"loss": 0.2393,
"step": 2250
},
{
"epoch": 1.53,
"grad_norm": 4.095301151275635,
"learning_rate": 9.822010050251257e-06,
"loss": 0.2329,
"step": 2275
},
{
"epoch": 1.54,
"grad_norm": 3.980642318725586,
"learning_rate": 9.819497487437186e-06,
"loss": 0.24,
"step": 2300
},
{
"epoch": 1.56,
"grad_norm": 4.499876976013184,
"learning_rate": 9.816984924623116e-06,
"loss": 0.2307,
"step": 2325
},
{
"epoch": 1.58,
"grad_norm": 4.50550651550293,
"learning_rate": 9.814472361809047e-06,
"loss": 0.2336,
"step": 2350
},
{
"epoch": 1.6,
"grad_norm": 4.186766147613525,
"learning_rate": 9.811959798994976e-06,
"loss": 0.233,
"step": 2375
},
{
"epoch": 1.61,
"grad_norm": 4.362492084503174,
"learning_rate": 9.809447236180905e-06,
"loss": 0.2281,
"step": 2400
},
{
"epoch": 1.63,
"grad_norm": 4.026979446411133,
"learning_rate": 9.806934673366835e-06,
"loss": 0.227,
"step": 2425
},
{
"epoch": 1.65,
"grad_norm": 3.556326389312744,
"learning_rate": 9.804422110552764e-06,
"loss": 0.2331,
"step": 2450
},
{
"epoch": 1.66,
"grad_norm": 4.109285831451416,
"learning_rate": 9.801909547738693e-06,
"loss": 0.2369,
"step": 2475
},
{
"epoch": 1.68,
"grad_norm": 4.070573329925537,
"learning_rate": 9.799396984924624e-06,
"loss": 0.2307,
"step": 2500
},
{
"epoch": 1.7,
"grad_norm": 4.282459735870361,
"learning_rate": 9.796884422110554e-06,
"loss": 0.2276,
"step": 2525
},
{
"epoch": 1.71,
"grad_norm": 3.989485263824463,
"learning_rate": 9.794371859296483e-06,
"loss": 0.2325,
"step": 2550
},
{
"epoch": 1.73,
"grad_norm": 4.217010021209717,
"learning_rate": 9.791859296482414e-06,
"loss": 0.222,
"step": 2575
},
{
"epoch": 1.75,
"grad_norm": 4.2022199630737305,
"learning_rate": 9.789346733668342e-06,
"loss": 0.2281,
"step": 2600
},
{
"epoch": 1.76,
"grad_norm": 3.7386114597320557,
"learning_rate": 9.786834170854273e-06,
"loss": 0.2165,
"step": 2625
},
{
"epoch": 1.78,
"grad_norm": 4.048258304595947,
"learning_rate": 9.784321608040202e-06,
"loss": 0.2218,
"step": 2650
},
{
"epoch": 1.8,
"grad_norm": 4.0867133140563965,
"learning_rate": 9.781809045226131e-06,
"loss": 0.2197,
"step": 2675
},
{
"epoch": 1.81,
"grad_norm": 3.8676252365112305,
"learning_rate": 9.77929648241206e-06,
"loss": 0.2261,
"step": 2700
},
{
"epoch": 1.83,
"grad_norm": 3.8840291500091553,
"learning_rate": 9.77678391959799e-06,
"loss": 0.2188,
"step": 2725
},
{
"epoch": 1.85,
"grad_norm": 4.130185127258301,
"learning_rate": 9.774271356783921e-06,
"loss": 0.2176,
"step": 2750
},
{
"epoch": 1.86,
"grad_norm": 3.8641357421875,
"learning_rate": 9.77175879396985e-06,
"loss": 0.2165,
"step": 2775
},
{
"epoch": 1.88,
"grad_norm": 3.8261783123016357,
"learning_rate": 9.76924623115578e-06,
"loss": 0.2149,
"step": 2800
},
{
"epoch": 1.9,
"grad_norm": 3.861722946166992,
"learning_rate": 9.766733668341709e-06,
"loss": 0.2122,
"step": 2825
},
{
"epoch": 1.91,
"grad_norm": 4.013296127319336,
"learning_rate": 9.76422110552764e-06,
"loss": 0.2183,
"step": 2850
},
{
"epoch": 1.93,
"grad_norm": 3.78545880317688,
"learning_rate": 9.761708542713568e-06,
"loss": 0.2151,
"step": 2875
},
{
"epoch": 1.95,
"grad_norm": 4.247804641723633,
"learning_rate": 9.759195979899499e-06,
"loss": 0.2213,
"step": 2900
},
{
"epoch": 1.96,
"grad_norm": 4.548637390136719,
"learning_rate": 9.756683417085428e-06,
"loss": 0.2198,
"step": 2925
},
{
"epoch": 1.98,
"grad_norm": 3.617631435394287,
"learning_rate": 9.754170854271357e-06,
"loss": 0.2103,
"step": 2950
},
{
"epoch": 2.0,
"grad_norm": 3.9520044326782227,
"learning_rate": 9.751658291457288e-06,
"loss": 0.2053,
"step": 2975
},
{
"epoch": 2.01,
"grad_norm": 3.903465747833252,
"learning_rate": 9.749145728643216e-06,
"loss": 0.1961,
"step": 3000
},
{
"epoch": 2.01,
"eval_loss": 0.1471080482006073,
"eval_runtime": 533.9926,
"eval_samples_per_second": 2.596,
"eval_steps_per_second": 2.596,
"eval_wer": 33.365707222320324,
"step": 3000
},
{
"epoch": 2.03,
"grad_norm": 3.3298327922821045,
"learning_rate": 9.746633165829147e-06,
"loss": 0.1968,
"step": 3025
},
{
"epoch": 2.05,
"grad_norm": 3.907670259475708,
"learning_rate": 9.744120603015076e-06,
"loss": 0.1938,
"step": 3050
},
{
"epoch": 2.07,
"grad_norm": 3.819309711456299,
"learning_rate": 9.741608040201006e-06,
"loss": 0.1879,
"step": 3075
},
{
"epoch": 2.08,
"grad_norm": 4.644184112548828,
"learning_rate": 9.739095477386935e-06,
"loss": 0.1933,
"step": 3100
},
{
"epoch": 2.1,
"grad_norm": 3.5478782653808594,
"learning_rate": 9.736582914572866e-06,
"loss": 0.19,
"step": 3125
},
{
"epoch": 2.12,
"grad_norm": 3.4926066398620605,
"learning_rate": 9.734070351758794e-06,
"loss": 0.1929,
"step": 3150
},
{
"epoch": 2.13,
"grad_norm": 3.6318588256835938,
"learning_rate": 9.731557788944725e-06,
"loss": 0.1921,
"step": 3175
},
{
"epoch": 2.15,
"grad_norm": 4.020270824432373,
"learning_rate": 9.729045226130654e-06,
"loss": 0.19,
"step": 3200
},
{
"epoch": 2.17,
"grad_norm": 3.391878128051758,
"learning_rate": 9.726532663316583e-06,
"loss": 0.1912,
"step": 3225
},
{
"epoch": 2.18,
"grad_norm": 3.8649306297302246,
"learning_rate": 9.724020100502514e-06,
"loss": 0.1965,
"step": 3250
},
{
"epoch": 2.2,
"grad_norm": 3.8927695751190186,
"learning_rate": 9.721507537688444e-06,
"loss": 0.1901,
"step": 3275
},
{
"epoch": 2.22,
"grad_norm": 3.7473957538604736,
"learning_rate": 9.718994974874373e-06,
"loss": 0.1932,
"step": 3300
},
{
"epoch": 2.23,
"grad_norm": 3.2613677978515625,
"learning_rate": 9.716482412060302e-06,
"loss": 0.1925,
"step": 3325
},
{
"epoch": 2.25,
"grad_norm": 4.175868988037109,
"learning_rate": 9.713969849246232e-06,
"loss": 0.1898,
"step": 3350
},
{
"epoch": 2.27,
"grad_norm": 4.236743450164795,
"learning_rate": 9.711457286432163e-06,
"loss": 0.1899,
"step": 3375
},
{
"epoch": 2.28,
"grad_norm": 4.136856555938721,
"learning_rate": 9.708944723618092e-06,
"loss": 0.1912,
"step": 3400
},
{
"epoch": 2.3,
"grad_norm": 3.826167345046997,
"learning_rate": 9.706432160804021e-06,
"loss": 0.1909,
"step": 3425
},
{
"epoch": 2.32,
"grad_norm": 3.949150323867798,
"learning_rate": 9.70391959798995e-06,
"loss": 0.1889,
"step": 3450
},
{
"epoch": 2.33,
"grad_norm": 4.023538589477539,
"learning_rate": 9.70140703517588e-06,
"loss": 0.1903,
"step": 3475
},
{
"epoch": 2.35,
"grad_norm": 3.7844576835632324,
"learning_rate": 9.698894472361809e-06,
"loss": 0.1928,
"step": 3500
},
{
"epoch": 2.37,
"grad_norm": 3.364312171936035,
"learning_rate": 9.69638190954774e-06,
"loss": 0.193,
"step": 3525
},
{
"epoch": 2.38,
"grad_norm": 3.4202849864959717,
"learning_rate": 9.69386934673367e-06,
"loss": 0.186,
"step": 3550
},
{
"epoch": 2.4,
"grad_norm": 3.6285476684570312,
"learning_rate": 9.691356783919599e-06,
"loss": 0.1863,
"step": 3575
},
{
"epoch": 2.42,
"grad_norm": 4.26074743270874,
"learning_rate": 9.688844221105528e-06,
"loss": 0.1831,
"step": 3600
},
{
"epoch": 2.43,
"grad_norm": 3.6059014797210693,
"learning_rate": 9.686331658291457e-06,
"loss": 0.182,
"step": 3625
},
{
"epoch": 2.45,
"grad_norm": 3.773573637008667,
"learning_rate": 9.683819095477388e-06,
"loss": 0.1824,
"step": 3650
},
{
"epoch": 2.47,
"grad_norm": 4.112974643707275,
"learning_rate": 9.681306532663318e-06,
"loss": 0.1828,
"step": 3675
},
{
"epoch": 2.48,
"grad_norm": 3.467885732650757,
"learning_rate": 9.678793969849247e-06,
"loss": 0.1819,
"step": 3700
},
{
"epoch": 2.5,
"grad_norm": 3.418673038482666,
"learning_rate": 9.676281407035176e-06,
"loss": 0.1831,
"step": 3725
},
{
"epoch": 2.52,
"grad_norm": 4.263250350952148,
"learning_rate": 9.673768844221106e-06,
"loss": 0.186,
"step": 3750
},
{
"epoch": 2.54,
"grad_norm": 3.5524044036865234,
"learning_rate": 9.671256281407035e-06,
"loss": 0.1889,
"step": 3775
},
{
"epoch": 2.55,
"grad_norm": 3.693559408187866,
"learning_rate": 9.668743718592966e-06,
"loss": 0.1842,
"step": 3800
},
{
"epoch": 2.57,
"grad_norm": 3.682617425918579,
"learning_rate": 9.666231155778895e-06,
"loss": 0.1826,
"step": 3825
},
{
"epoch": 2.59,
"grad_norm": 3.4766149520874023,
"learning_rate": 9.663718592964825e-06,
"loss": 0.1766,
"step": 3850
},
{
"epoch": 2.6,
"grad_norm": 3.3245768547058105,
"learning_rate": 9.661206030150754e-06,
"loss": 0.1816,
"step": 3875
},
{
"epoch": 2.62,
"grad_norm": 4.028345584869385,
"learning_rate": 9.658693467336683e-06,
"loss": 0.1803,
"step": 3900
},
{
"epoch": 2.64,
"grad_norm": 3.665334463119507,
"learning_rate": 9.656180904522614e-06,
"loss": 0.178,
"step": 3925
},
{
"epoch": 2.65,
"grad_norm": 4.212314128875732,
"learning_rate": 9.653668341708544e-06,
"loss": 0.1818,
"step": 3950
},
{
"epoch": 2.67,
"grad_norm": 4.093043804168701,
"learning_rate": 9.651155778894473e-06,
"loss": 0.1784,
"step": 3975
},
{
"epoch": 2.69,
"grad_norm": 3.8363521099090576,
"learning_rate": 9.648643216080404e-06,
"loss": 0.1786,
"step": 4000
},
{
"epoch": 2.69,
"eval_loss": 0.1332446187734604,
"eval_runtime": 541.0077,
"eval_samples_per_second": 2.562,
"eval_steps_per_second": 2.562,
"eval_wer": 30.64630054741303,
"step": 4000
},
{
"epoch": 2.7,
"grad_norm": 3.6020116806030273,
"learning_rate": 9.646130653266332e-06,
"loss": 0.1755,
"step": 4025
},
{
"epoch": 2.72,
"grad_norm": 3.253662586212158,
"learning_rate": 9.643618090452263e-06,
"loss": 0.1767,
"step": 4050
},
{
"epoch": 2.74,
"grad_norm": 3.3012306690216064,
"learning_rate": 9.641105527638192e-06,
"loss": 0.1813,
"step": 4075
},
{
"epoch": 2.75,
"grad_norm": 3.7597391605377197,
"learning_rate": 9.638592964824121e-06,
"loss": 0.1806,
"step": 4100
},
{
"epoch": 2.77,
"grad_norm": 3.914498805999756,
"learning_rate": 9.63608040201005e-06,
"loss": 0.1781,
"step": 4125
},
{
"epoch": 2.79,
"grad_norm": 3.5466084480285645,
"learning_rate": 9.63356783919598e-06,
"loss": 0.1784,
"step": 4150
},
{
"epoch": 2.8,
"grad_norm": 3.5035176277160645,
"learning_rate": 9.63105527638191e-06,
"loss": 0.1693,
"step": 4175
},
{
"epoch": 2.82,
"grad_norm": 3.6211013793945312,
"learning_rate": 9.62854271356784e-06,
"loss": 0.1831,
"step": 4200
},
{
"epoch": 2.84,
"grad_norm": 3.355555772781372,
"learning_rate": 9.62603015075377e-06,
"loss": 0.1737,
"step": 4225
},
{
"epoch": 2.85,
"grad_norm": 4.14884614944458,
"learning_rate": 9.623517587939699e-06,
"loss": 0.1773,
"step": 4250
},
{
"epoch": 2.87,
"grad_norm": 3.896099805831909,
"learning_rate": 9.62100502512563e-06,
"loss": 0.1712,
"step": 4275
},
{
"epoch": 2.89,
"grad_norm": 3.69228196144104,
"learning_rate": 9.618492462311558e-06,
"loss": 0.1747,
"step": 4300
},
{
"epoch": 2.9,
"grad_norm": 3.4385323524475098,
"learning_rate": 9.615979899497489e-06,
"loss": 0.1806,
"step": 4325
},
{
"epoch": 2.92,
"grad_norm": 4.37261438369751,
"learning_rate": 9.613467336683418e-06,
"loss": 0.1705,
"step": 4350
},
{
"epoch": 2.94,
"grad_norm": 3.549129009246826,
"learning_rate": 9.610954773869347e-06,
"loss": 0.1683,
"step": 4375
},
{
"epoch": 2.96,
"grad_norm": 3.8860154151916504,
"learning_rate": 9.608442211055277e-06,
"loss": 0.1732,
"step": 4400
},
{
"epoch": 2.97,
"grad_norm": 3.8280348777770996,
"learning_rate": 9.605929648241206e-06,
"loss": 0.1739,
"step": 4425
},
{
"epoch": 2.99,
"grad_norm": 3.9021239280700684,
"learning_rate": 9.603417085427137e-06,
"loss": 0.1744,
"step": 4450
},
{
"epoch": 3.01,
"grad_norm": 3.436377763748169,
"learning_rate": 9.600904522613066e-06,
"loss": 0.1674,
"step": 4475
},
{
"epoch": 3.02,
"grad_norm": 3.5898520946502686,
"learning_rate": 9.598391959798996e-06,
"loss": 0.1546,
"step": 4500
},
{
"epoch": 3.04,
"grad_norm": 3.295307159423828,
"learning_rate": 9.595879396984925e-06,
"loss": 0.1543,
"step": 4525
},
{
"epoch": 3.06,
"grad_norm": 3.3402857780456543,
"learning_rate": 9.593366834170856e-06,
"loss": 0.1585,
"step": 4550
},
{
"epoch": 3.07,
"grad_norm": 3.4992740154266357,
"learning_rate": 9.590854271356784e-06,
"loss": 0.1584,
"step": 4575
},
{
"epoch": 3.09,
"grad_norm": 3.301234245300293,
"learning_rate": 9.588341708542715e-06,
"loss": 0.1556,
"step": 4600
},
{
"epoch": 3.11,
"grad_norm": 3.2298898696899414,
"learning_rate": 9.585829145728644e-06,
"loss": 0.1557,
"step": 4625
},
{
"epoch": 3.12,
"grad_norm": 3.81208872795105,
"learning_rate": 9.583316582914573e-06,
"loss": 0.1488,
"step": 4650
},
{
"epoch": 3.14,
"grad_norm": 3.7610058784484863,
"learning_rate": 9.580804020100504e-06,
"loss": 0.1537,
"step": 4675
},
{
"epoch": 3.16,
"grad_norm": 3.394169569015503,
"learning_rate": 9.578291457286432e-06,
"loss": 0.1546,
"step": 4700
},
{
"epoch": 3.17,
"grad_norm": 3.5936498641967773,
"learning_rate": 9.575778894472363e-06,
"loss": 0.1544,
"step": 4725
},
{
"epoch": 3.19,
"grad_norm": 3.714808464050293,
"learning_rate": 9.573266331658292e-06,
"loss": 0.1554,
"step": 4750
},
{
"epoch": 3.21,
"grad_norm": 3.731008768081665,
"learning_rate": 9.570753768844222e-06,
"loss": 0.157,
"step": 4775
},
{
"epoch": 3.22,
"grad_norm": 3.4987032413482666,
"learning_rate": 9.568241206030151e-06,
"loss": 0.1506,
"step": 4800
},
{
"epoch": 3.24,
"grad_norm": 3.487567186355591,
"learning_rate": 9.565728643216082e-06,
"loss": 0.1553,
"step": 4825
},
{
"epoch": 3.26,
"grad_norm": 3.537971258163452,
"learning_rate": 9.563216080402011e-06,
"loss": 0.1518,
"step": 4850
},
{
"epoch": 3.27,
"grad_norm": 3.5595390796661377,
"learning_rate": 9.56070351758794e-06,
"loss": 0.1519,
"step": 4875
},
{
"epoch": 3.29,
"grad_norm": 3.397580146789551,
"learning_rate": 9.55819095477387e-06,
"loss": 0.1516,
"step": 4900
},
{
"epoch": 3.31,
"grad_norm": 3.758497714996338,
"learning_rate": 9.5556783919598e-06,
"loss": 0.1562,
"step": 4925
},
{
"epoch": 3.32,
"grad_norm": 3.1611812114715576,
"learning_rate": 9.55316582914573e-06,
"loss": 0.1522,
"step": 4950
},
{
"epoch": 3.34,
"grad_norm": 3.4654600620269775,
"learning_rate": 9.550653266331658e-06,
"loss": 0.1544,
"step": 4975
},
{
"epoch": 3.36,
"grad_norm": 3.0207717418670654,
"learning_rate": 9.548140703517589e-06,
"loss": 0.1574,
"step": 5000
},
{
"epoch": 3.36,
"eval_loss": 0.1250012367963791,
"eval_runtime": 532.3886,
"eval_samples_per_second": 2.603,
"eval_steps_per_second": 2.603,
"eval_wer": 28.50962387427159,
"step": 5000
},
{
"epoch": 3.37,
"grad_norm": 3.4317171573638916,
"learning_rate": 9.545628140703518e-06,
"loss": 0.1563,
"step": 5025
},
{
"epoch": 3.39,
"grad_norm": 3.7062742710113525,
"learning_rate": 9.543115577889448e-06,
"loss": 0.1559,
"step": 5050
},
{
"epoch": 3.41,
"grad_norm": 3.580521821975708,
"learning_rate": 9.540603015075379e-06,
"loss": 0.1521,
"step": 5075
},
{
"epoch": 3.43,
"grad_norm": 3.364760160446167,
"learning_rate": 9.538090452261308e-06,
"loss": 0.1637,
"step": 5100
},
{
"epoch": 3.44,
"grad_norm": 3.812782049179077,
"learning_rate": 9.535577889447237e-06,
"loss": 0.1564,
"step": 5125
},
{
"epoch": 3.46,
"grad_norm": 3.065197229385376,
"learning_rate": 9.533065326633166e-06,
"loss": 0.1484,
"step": 5150
},
{
"epoch": 3.48,
"grad_norm": 3.456214427947998,
"learning_rate": 9.530552763819096e-06,
"loss": 0.1517,
"step": 5175
},
{
"epoch": 3.49,
"grad_norm": 3.731849193572998,
"learning_rate": 9.528040201005025e-06,
"loss": 0.1499,
"step": 5200
},
{
"epoch": 3.51,
"grad_norm": 4.133150577545166,
"learning_rate": 9.525527638190956e-06,
"loss": 0.1475,
"step": 5225
},
{
"epoch": 3.53,
"grad_norm": 3.353069543838501,
"learning_rate": 9.523015075376885e-06,
"loss": 0.1479,
"step": 5250
},
{
"epoch": 3.54,
"grad_norm": 3.885782480239868,
"learning_rate": 9.520502512562815e-06,
"loss": 0.1458,
"step": 5275
},
{
"epoch": 3.56,
"grad_norm": 3.5367889404296875,
"learning_rate": 9.517989949748744e-06,
"loss": 0.1531,
"step": 5300
},
{
"epoch": 3.58,
"grad_norm": 3.8021907806396484,
"learning_rate": 9.515477386934673e-06,
"loss": 0.1498,
"step": 5325
},
{
"epoch": 3.59,
"grad_norm": 3.308176279067993,
"learning_rate": 9.512964824120604e-06,
"loss": 0.1519,
"step": 5350
},
{
"epoch": 3.61,
"grad_norm": 3.668410539627075,
"learning_rate": 9.510452261306534e-06,
"loss": 0.1526,
"step": 5375
},
{
"epoch": 3.63,
"grad_norm": 3.228257417678833,
"learning_rate": 9.507939698492463e-06,
"loss": 0.1509,
"step": 5400
},
{
"epoch": 3.64,
"grad_norm": 3.433962345123291,
"learning_rate": 9.505427135678392e-06,
"loss": 0.1521,
"step": 5425
},
{
"epoch": 3.66,
"grad_norm": 3.707969903945923,
"learning_rate": 9.502914572864322e-06,
"loss": 0.1439,
"step": 5450
},
{
"epoch": 3.68,
"grad_norm": 3.447314739227295,
"learning_rate": 9.500402010050253e-06,
"loss": 0.1473,
"step": 5475
},
{
"epoch": 3.69,
"grad_norm": 3.579751968383789,
"learning_rate": 9.497889447236182e-06,
"loss": 0.1507,
"step": 5500
},
{
"epoch": 3.71,
"grad_norm": 3.470454454421997,
"learning_rate": 9.495376884422111e-06,
"loss": 0.1473,
"step": 5525
},
{
"epoch": 3.73,
"grad_norm": 3.2754967212677,
"learning_rate": 9.49286432160804e-06,
"loss": 0.1458,
"step": 5550
},
{
"epoch": 3.74,
"grad_norm": 3.724622964859009,
"learning_rate": 9.49035175879397e-06,
"loss": 0.1474,
"step": 5575
},
{
"epoch": 3.76,
"grad_norm": 3.176765203475952,
"learning_rate": 9.4878391959799e-06,
"loss": 0.1484,
"step": 5600
},
{
"epoch": 3.78,
"grad_norm": 3.3496909141540527,
"learning_rate": 9.48532663316583e-06,
"loss": 0.1485,
"step": 5625
},
{
"epoch": 3.79,
"grad_norm": 3.3814542293548584,
"learning_rate": 9.48281407035176e-06,
"loss": 0.1491,
"step": 5650
},
{
"epoch": 3.81,
"grad_norm": 3.9236228466033936,
"learning_rate": 9.480301507537689e-06,
"loss": 0.1522,
"step": 5675
},
{
"epoch": 3.83,
"grad_norm": 3.3441123962402344,
"learning_rate": 9.47778894472362e-06,
"loss": 0.1442,
"step": 5700
},
{
"epoch": 3.84,
"grad_norm": 3.3952231407165527,
"learning_rate": 9.475276381909548e-06,
"loss": 0.1487,
"step": 5725
},
{
"epoch": 3.86,
"grad_norm": 3.1410765647888184,
"learning_rate": 9.472763819095479e-06,
"loss": 0.1522,
"step": 5750
},
{
"epoch": 3.88,
"grad_norm": 3.267335891723633,
"learning_rate": 9.470251256281408e-06,
"loss": 0.1425,
"step": 5775
},
{
"epoch": 3.9,
"grad_norm": 3.547773838043213,
"learning_rate": 9.467738693467337e-06,
"loss": 0.1416,
"step": 5800
},
{
"epoch": 3.91,
"grad_norm": 3.4462673664093018,
"learning_rate": 9.465226130653267e-06,
"loss": 0.1453,
"step": 5825
},
{
"epoch": 3.93,
"grad_norm": 3.4584672451019287,
"learning_rate": 9.462713567839196e-06,
"loss": 0.1445,
"step": 5850
},
{
"epoch": 3.95,
"grad_norm": 3.501403331756592,
"learning_rate": 9.460201005025127e-06,
"loss": 0.1423,
"step": 5875
},
{
"epoch": 3.96,
"grad_norm": 3.912052631378174,
"learning_rate": 9.457688442211056e-06,
"loss": 0.1481,
"step": 5900
},
{
"epoch": 3.98,
"grad_norm": 3.257798433303833,
"learning_rate": 9.455175879396986e-06,
"loss": 0.1385,
"step": 5925
},
{
"epoch": 4.0,
"grad_norm": 3.7633163928985596,
"learning_rate": 9.452663316582915e-06,
"loss": 0.1444,
"step": 5950
},
{
"epoch": 4.01,
"grad_norm": 3.1884329319000244,
"learning_rate": 9.450150753768846e-06,
"loss": 0.1314,
"step": 5975
},
{
"epoch": 4.03,
"grad_norm": 3.5105104446411133,
"learning_rate": 9.447638190954774e-06,
"loss": 0.1284,
"step": 6000
},
{
"epoch": 4.03,
"eval_loss": 0.11968862265348434,
"eval_runtime": 533.7582,
"eval_samples_per_second": 2.597,
"eval_steps_per_second": 2.597,
"eval_wer": 27.644358114073814,
"step": 6000
},
{
"epoch": 4.05,
"grad_norm": 3.4444563388824463,
"learning_rate": 9.445125628140705e-06,
"loss": 0.1278,
"step": 6025
},
{
"epoch": 4.06,
"grad_norm": 3.446941375732422,
"learning_rate": 9.442613065326634e-06,
"loss": 0.1316,
"step": 6050
},
{
"epoch": 4.08,
"grad_norm": 3.251770496368408,
"learning_rate": 9.440100502512563e-06,
"loss": 0.1289,
"step": 6075
},
{
"epoch": 4.1,
"grad_norm": 3.1929450035095215,
"learning_rate": 9.437587939698494e-06,
"loss": 0.1257,
"step": 6100
},
{
"epoch": 4.11,
"grad_norm": 3.137993097305298,
"learning_rate": 9.435075376884422e-06,
"loss": 0.1259,
"step": 6125
},
{
"epoch": 4.13,
"grad_norm": 3.5924248695373535,
"learning_rate": 9.432562814070353e-06,
"loss": 0.1246,
"step": 6150
},
{
"epoch": 4.15,
"grad_norm": 3.7657840251922607,
"learning_rate": 9.430050251256282e-06,
"loss": 0.1263,
"step": 6175
},
{
"epoch": 4.16,
"grad_norm": 3.8803839683532715,
"learning_rate": 9.427537688442212e-06,
"loss": 0.1278,
"step": 6200
},
{
"epoch": 4.18,
"grad_norm": 3.049147844314575,
"learning_rate": 9.425025125628141e-06,
"loss": 0.1248,
"step": 6225
},
{
"epoch": 4.2,
"grad_norm": 3.5847809314727783,
"learning_rate": 9.422512562814072e-06,
"loss": 0.1326,
"step": 6250
},
{
"epoch": 4.21,
"grad_norm": 3.208193063735962,
"learning_rate": 9.42e-06,
"loss": 0.1278,
"step": 6275
},
{
"epoch": 4.23,
"grad_norm": 3.787940740585327,
"learning_rate": 9.41748743718593e-06,
"loss": 0.1286,
"step": 6300
},
{
"epoch": 4.25,
"grad_norm": 2.801053762435913,
"learning_rate": 9.41497487437186e-06,
"loss": 0.1309,
"step": 6325
},
{
"epoch": 4.26,
"grad_norm": 3.1014838218688965,
"learning_rate": 9.41246231155779e-06,
"loss": 0.1265,
"step": 6350
},
{
"epoch": 4.28,
"grad_norm": 3.3319690227508545,
"learning_rate": 9.40994974874372e-06,
"loss": 0.1273,
"step": 6375
},
{
"epoch": 4.3,
"grad_norm": 3.366464376449585,
"learning_rate": 9.407437185929648e-06,
"loss": 0.1266,
"step": 6400
},
{
"epoch": 4.31,
"grad_norm": 3.5356907844543457,
"learning_rate": 9.404924623115579e-06,
"loss": 0.13,
"step": 6425
},
{
"epoch": 4.33,
"grad_norm": 3.325680732727051,
"learning_rate": 9.402412060301508e-06,
"loss": 0.1262,
"step": 6450
},
{
"epoch": 4.35,
"grad_norm": 3.4266843795776367,
"learning_rate": 9.399899497487438e-06,
"loss": 0.1284,
"step": 6475
},
{
"epoch": 4.37,
"grad_norm": 3.1395492553710938,
"learning_rate": 9.397386934673369e-06,
"loss": 0.1276,
"step": 6500
},
{
"epoch": 4.38,
"grad_norm": 3.323065757751465,
"learning_rate": 9.394874371859298e-06,
"loss": 0.1246,
"step": 6525
},
{
"epoch": 4.4,
"grad_norm": 3.3577070236206055,
"learning_rate": 9.392361809045227e-06,
"loss": 0.1326,
"step": 6550
},
{
"epoch": 4.42,
"grad_norm": 3.4483211040496826,
"learning_rate": 9.389849246231157e-06,
"loss": 0.1287,
"step": 6575
},
{
"epoch": 4.43,
"grad_norm": 3.939202308654785,
"learning_rate": 9.387336683417086e-06,
"loss": 0.1295,
"step": 6600
},
{
"epoch": 4.45,
"grad_norm": 3.5882346630096436,
"learning_rate": 9.384824120603015e-06,
"loss": 0.1257,
"step": 6625
},
{
"epoch": 4.47,
"grad_norm": 3.9268131256103516,
"learning_rate": 9.382311557788946e-06,
"loss": 0.1308,
"step": 6650
},
{
"epoch": 4.48,
"grad_norm": 3.2181479930877686,
"learning_rate": 9.379798994974874e-06,
"loss": 0.1289,
"step": 6675
},
{
"epoch": 4.5,
"grad_norm": 3.4467923641204834,
"learning_rate": 9.377286432160805e-06,
"loss": 0.1286,
"step": 6700
},
{
"epoch": 4.52,
"grad_norm": 3.227398157119751,
"learning_rate": 9.374773869346734e-06,
"loss": 0.1279,
"step": 6725
},
{
"epoch": 4.53,
"grad_norm": 3.5086820125579834,
"learning_rate": 9.372261306532664e-06,
"loss": 0.1295,
"step": 6750
},
{
"epoch": 4.55,
"grad_norm": 3.0712101459503174,
"learning_rate": 9.369748743718595e-06,
"loss": 0.1285,
"step": 6775
},
{
"epoch": 4.57,
"grad_norm": 3.3961784839630127,
"learning_rate": 9.367236180904524e-06,
"loss": 0.1311,
"step": 6800
},
{
"epoch": 4.58,
"grad_norm": 3.6800429821014404,
"learning_rate": 9.364723618090453e-06,
"loss": 0.1284,
"step": 6825
},
{
"epoch": 4.6,
"grad_norm": 3.6793227195739746,
"learning_rate": 9.362211055276383e-06,
"loss": 0.1311,
"step": 6850
},
{
"epoch": 4.62,
"grad_norm": 3.1020681858062744,
"learning_rate": 9.359698492462312e-06,
"loss": 0.1287,
"step": 6875
},
{
"epoch": 4.63,
"grad_norm": 3.738802909851074,
"learning_rate": 9.357185929648241e-06,
"loss": 0.1241,
"step": 6900
},
{
"epoch": 4.65,
"grad_norm": 3.344667911529541,
"learning_rate": 9.354673366834172e-06,
"loss": 0.1247,
"step": 6925
},
{
"epoch": 4.67,
"grad_norm": 3.011655330657959,
"learning_rate": 9.352160804020101e-06,
"loss": 0.1237,
"step": 6950
},
{
"epoch": 4.68,
"grad_norm": 3.486971139907837,
"learning_rate": 9.34964824120603e-06,
"loss": 0.1275,
"step": 6975
},
{
"epoch": 4.7,
"grad_norm": 3.265568971633911,
"learning_rate": 9.34713567839196e-06,
"loss": 0.1216,
"step": 7000
},
{
"epoch": 4.7,
"eval_loss": 0.11660390347242355,
"eval_runtime": 534.0823,
"eval_samples_per_second": 2.595,
"eval_steps_per_second": 2.595,
"eval_wer": 26.823238566131025,
"step": 7000
},
{
"epoch": 4.72,
"grad_norm": 3.0894901752471924,
"learning_rate": 9.34462311557789e-06,
"loss": 0.1258,
"step": 7025
},
{
"epoch": 4.73,
"grad_norm": 3.5530054569244385,
"learning_rate": 9.34211055276382e-06,
"loss": 0.1294,
"step": 7050
},
{
"epoch": 4.75,
"grad_norm": 3.127763271331787,
"learning_rate": 9.33959798994975e-06,
"loss": 0.129,
"step": 7075
},
{
"epoch": 4.77,
"grad_norm": 3.453204393386841,
"learning_rate": 9.337085427135679e-06,
"loss": 0.1264,
"step": 7100
},
{
"epoch": 4.79,
"grad_norm": 3.470991611480713,
"learning_rate": 9.334572864321608e-06,
"loss": 0.1272,
"step": 7125
},
{
"epoch": 4.8,
"grad_norm": 3.498213768005371,
"learning_rate": 9.332060301507538e-06,
"loss": 0.1257,
"step": 7150
},
{
"epoch": 4.82,
"grad_norm": 3.052225351333618,
"learning_rate": 9.329547738693469e-06,
"loss": 0.1242,
"step": 7175
},
{
"epoch": 4.84,
"grad_norm": 2.9512875080108643,
"learning_rate": 9.327035175879398e-06,
"loss": 0.1226,
"step": 7200
},
{
"epoch": 4.85,
"grad_norm": 3.124257802963257,
"learning_rate": 9.324522613065327e-06,
"loss": 0.1276,
"step": 7225
},
{
"epoch": 4.87,
"grad_norm": 3.763948678970337,
"learning_rate": 9.322010050251257e-06,
"loss": 0.123,
"step": 7250
},
{
"epoch": 4.89,
"grad_norm": 3.859360694885254,
"learning_rate": 9.319497487437186e-06,
"loss": 0.1288,
"step": 7275
},
{
"epoch": 4.9,
"grad_norm": 3.406261682510376,
"learning_rate": 9.316984924623115e-06,
"loss": 0.1268,
"step": 7300
},
{
"epoch": 4.92,
"grad_norm": 3.5981762409210205,
"learning_rate": 9.314472361809046e-06,
"loss": 0.1267,
"step": 7325
},
{
"epoch": 4.94,
"grad_norm": 3.2677414417266846,
"learning_rate": 9.311959798994976e-06,
"loss": 0.1228,
"step": 7350
},
{
"epoch": 4.95,
"grad_norm": 3.4176025390625,
"learning_rate": 9.309447236180905e-06,
"loss": 0.1292,
"step": 7375
},
{
"epoch": 4.97,
"grad_norm": 3.702085018157959,
"learning_rate": 9.306934673366836e-06,
"loss": 0.1212,
"step": 7400
},
{
"epoch": 4.99,
"grad_norm": 3.075143337249756,
"learning_rate": 9.304422110552764e-06,
"loss": 0.1208,
"step": 7425
},
{
"epoch": 5.0,
"grad_norm": 2.96437406539917,
"learning_rate": 9.301909547738695e-06,
"loss": 0.1223,
"step": 7450
},
{
"epoch": 5.02,
"grad_norm": 3.359867572784424,
"learning_rate": 9.299396984924624e-06,
"loss": 0.1083,
"step": 7475
},
{
"epoch": 5.04,
"grad_norm": 3.1340601444244385,
"learning_rate": 9.296884422110553e-06,
"loss": 0.1088,
"step": 7500
},
{
"epoch": 5.05,
"grad_norm": 3.4933323860168457,
"learning_rate": 9.294371859296483e-06,
"loss": 0.1103,
"step": 7525
},
{
"epoch": 5.07,
"grad_norm": 2.8419055938720703,
"learning_rate": 9.291859296482412e-06,
"loss": 0.1074,
"step": 7550
},
{
"epoch": 5.09,
"grad_norm": 2.699908971786499,
"learning_rate": 9.289346733668343e-06,
"loss": 0.1074,
"step": 7575
},
{
"epoch": 5.1,
"grad_norm": 3.4752280712127686,
"learning_rate": 9.286834170854272e-06,
"loss": 0.1074,
"step": 7600
},
{
"epoch": 5.12,
"grad_norm": 3.5037472248077393,
"learning_rate": 9.284321608040202e-06,
"loss": 0.1114,
"step": 7625
},
{
"epoch": 5.14,
"grad_norm": 3.3195717334747314,
"learning_rate": 9.281809045226131e-06,
"loss": 0.1111,
"step": 7650
},
{
"epoch": 5.15,
"grad_norm": 3.210256338119507,
"learning_rate": 9.279296482412062e-06,
"loss": 0.1095,
"step": 7675
},
{
"epoch": 5.17,
"grad_norm": 3.4619410037994385,
"learning_rate": 9.27678391959799e-06,
"loss": 0.1118,
"step": 7700
},
{
"epoch": 5.19,
"grad_norm": 3.2132604122161865,
"learning_rate": 9.27427135678392e-06,
"loss": 0.1065,
"step": 7725
},
{
"epoch": 5.2,
"grad_norm": 3.379657030105591,
"learning_rate": 9.27175879396985e-06,
"loss": 0.1121,
"step": 7750
},
{
"epoch": 5.22,
"grad_norm": 3.6748008728027344,
"learning_rate": 9.26924623115578e-06,
"loss": 0.1062,
"step": 7775
},
{
"epoch": 5.24,
"grad_norm": 3.063694715499878,
"learning_rate": 9.26673366834171e-06,
"loss": 0.1077,
"step": 7800
},
{
"epoch": 5.26,
"grad_norm": 3.237032413482666,
"learning_rate": 9.264221105527638e-06,
"loss": 0.1079,
"step": 7825
},
{
"epoch": 5.27,
"grad_norm": 3.2364072799682617,
"learning_rate": 9.261708542713569e-06,
"loss": 0.1075,
"step": 7850
},
{
"epoch": 5.29,
"grad_norm": 3.2186496257781982,
"learning_rate": 9.259195979899498e-06,
"loss": 0.107,
"step": 7875
},
{
"epoch": 5.31,
"grad_norm": 3.249338150024414,
"learning_rate": 9.256683417085428e-06,
"loss": 0.1101,
"step": 7900
},
{
"epoch": 5.32,
"grad_norm": 2.9037253856658936,
"learning_rate": 9.254170854271357e-06,
"loss": 0.1049,
"step": 7925
},
{
"epoch": 5.34,
"grad_norm": 3.467984914779663,
"learning_rate": 9.251658291457288e-06,
"loss": 0.1164,
"step": 7950
},
{
"epoch": 5.36,
"grad_norm": 3.047340154647827,
"learning_rate": 9.249145728643217e-06,
"loss": 0.1079,
"step": 7975
},
{
"epoch": 5.37,
"grad_norm": 3.2782435417175293,
"learning_rate": 9.246633165829147e-06,
"loss": 0.1063,
"step": 8000
},
{
"epoch": 5.37,
"eval_loss": 0.11793605983257294,
"eval_runtime": 535.0689,
"eval_samples_per_second": 2.59,
"eval_steps_per_second": 2.59,
"eval_wer": 27.19406674907293,
"step": 8000
},
{
"epoch": 5.39,
"grad_norm": 3.045055389404297,
"learning_rate": 9.244120603015076e-06,
"loss": 0.1068,
"step": 8025
},
{
"epoch": 5.41,
"grad_norm": 3.3729374408721924,
"learning_rate": 9.241608040201005e-06,
"loss": 0.1097,
"step": 8050
},
{
"epoch": 5.42,
"grad_norm": 3.581709861755371,
"learning_rate": 9.239095477386936e-06,
"loss": 0.109,
"step": 8075
},
{
"epoch": 5.44,
"grad_norm": 3.690354585647583,
"learning_rate": 9.236582914572864e-06,
"loss": 0.1105,
"step": 8100
},
{
"epoch": 5.46,
"grad_norm": 3.4395689964294434,
"learning_rate": 9.234070351758795e-06,
"loss": 0.1047,
"step": 8125
},
{
"epoch": 5.47,
"grad_norm": 3.5904619693756104,
"learning_rate": 9.231557788944724e-06,
"loss": 0.1098,
"step": 8150
},
{
"epoch": 5.49,
"grad_norm": 3.4449338912963867,
"learning_rate": 9.229045226130654e-06,
"loss": 0.1094,
"step": 8175
},
{
"epoch": 5.51,
"grad_norm": 3.081770181655884,
"learning_rate": 9.226532663316585e-06,
"loss": 0.1046,
"step": 8200
},
{
"epoch": 5.52,
"grad_norm": 3.2109663486480713,
"learning_rate": 9.224020100502514e-06,
"loss": 0.11,
"step": 8225
},
{
"epoch": 5.54,
"grad_norm": 3.8002219200134277,
"learning_rate": 9.221507537688443e-06,
"loss": 0.1103,
"step": 8250
},
{
"epoch": 5.56,
"grad_norm": 3.423508644104004,
"learning_rate": 9.218994974874373e-06,
"loss": 0.1046,
"step": 8275
},
{
"epoch": 5.57,
"grad_norm": 3.408816337585449,
"learning_rate": 9.216482412060302e-06,
"loss": 0.1137,
"step": 8300
},
{
"epoch": 5.59,
"grad_norm": 3.349015235900879,
"learning_rate": 9.213969849246231e-06,
"loss": 0.1063,
"step": 8325
},
{
"epoch": 5.61,
"grad_norm": 3.255462169647217,
"learning_rate": 9.211457286432162e-06,
"loss": 0.1081,
"step": 8350
},
{
"epoch": 5.62,
"grad_norm": 3.0760374069213867,
"learning_rate": 9.20894472361809e-06,
"loss": 0.1124,
"step": 8375
},
{
"epoch": 5.64,
"grad_norm": 3.469221830368042,
"learning_rate": 9.206432160804021e-06,
"loss": 0.1095,
"step": 8400
},
{
"epoch": 5.66,
"grad_norm": 3.20563006401062,
"learning_rate": 9.20391959798995e-06,
"loss": 0.1091,
"step": 8425
},
{
"epoch": 5.67,
"grad_norm": 3.58202862739563,
"learning_rate": 9.20140703517588e-06,
"loss": 0.1084,
"step": 8450
},
{
"epoch": 5.69,
"grad_norm": 3.2912611961364746,
"learning_rate": 9.19889447236181e-06,
"loss": 0.109,
"step": 8475
},
{
"epoch": 5.71,
"grad_norm": 3.2603135108947754,
"learning_rate": 9.19638190954774e-06,
"loss": 0.1051,
"step": 8500
},
{
"epoch": 5.73,
"grad_norm": 3.3398075103759766,
"learning_rate": 9.19386934673367e-06,
"loss": 0.1086,
"step": 8525
},
{
"epoch": 5.74,
"grad_norm": 3.480815887451172,
"learning_rate": 9.191356783919599e-06,
"loss": 0.1059,
"step": 8550
},
{
"epoch": 5.76,
"grad_norm": 3.1898598670959473,
"learning_rate": 9.188844221105528e-06,
"loss": 0.104,
"step": 8575
},
{
"epoch": 5.78,
"grad_norm": 3.3440845012664795,
"learning_rate": 9.186331658291459e-06,
"loss": 0.1126,
"step": 8600
},
{
"epoch": 5.79,
"grad_norm": 3.6762936115264893,
"learning_rate": 9.183819095477388e-06,
"loss": 0.1064,
"step": 8625
},
{
"epoch": 5.81,
"grad_norm": 3.66489315032959,
"learning_rate": 9.181306532663317e-06,
"loss": 0.1096,
"step": 8650
},
{
"epoch": 5.83,
"grad_norm": 3.3494789600372314,
"learning_rate": 9.178793969849247e-06,
"loss": 0.1111,
"step": 8675
},
{
"epoch": 5.84,
"grad_norm": 3.4388012886047363,
"learning_rate": 9.176281407035176e-06,
"loss": 0.1107,
"step": 8700
},
{
"epoch": 5.86,
"grad_norm": 3.2451605796813965,
"learning_rate": 9.173768844221105e-06,
"loss": 0.1089,
"step": 8725
},
{
"epoch": 5.88,
"grad_norm": 3.6606147289276123,
"learning_rate": 9.171256281407036e-06,
"loss": 0.1084,
"step": 8750
},
{
"epoch": 5.89,
"grad_norm": 3.2145121097564697,
"learning_rate": 9.168743718592966e-06,
"loss": 0.1063,
"step": 8775
},
{
"epoch": 5.91,
"grad_norm": 3.6518869400024414,
"learning_rate": 9.166231155778895e-06,
"loss": 0.1043,
"step": 8800
},
{
"epoch": 5.93,
"grad_norm": 2.9405784606933594,
"learning_rate": 9.163718592964826e-06,
"loss": 0.1117,
"step": 8825
},
{
"epoch": 5.94,
"grad_norm": 3.5626678466796875,
"learning_rate": 9.161206030150754e-06,
"loss": 0.1048,
"step": 8850
},
{
"epoch": 5.96,
"grad_norm": 3.2351441383361816,
"learning_rate": 9.158693467336685e-06,
"loss": 0.1093,
"step": 8875
},
{
"epoch": 5.98,
"grad_norm": 3.439530372619629,
"learning_rate": 9.156180904522614e-06,
"loss": 0.1073,
"step": 8900
},
{
"epoch": 5.99,
"grad_norm": 3.4655144214630127,
"learning_rate": 9.153668341708543e-06,
"loss": 0.1067,
"step": 8925
},
{
"epoch": 6.01,
"grad_norm": 2.794478178024292,
"learning_rate": 9.151155778894473e-06,
"loss": 0.0995,
"step": 8950
},
{
"epoch": 6.03,
"grad_norm": 3.5291810035705566,
"learning_rate": 9.148643216080402e-06,
"loss": 0.0925,
"step": 8975
},
{
"epoch": 6.04,
"grad_norm": 2.9376721382141113,
"learning_rate": 9.146130653266331e-06,
"loss": 0.0879,
"step": 9000
},
{
"epoch": 6.04,
"eval_loss": 0.1166120246052742,
"eval_runtime": 531.7233,
"eval_samples_per_second": 2.607,
"eval_steps_per_second": 2.607,
"eval_wer": 26.876214020837015,
"step": 9000
},
{
"epoch": 6.06,
"grad_norm": 3.121159076690674,
"learning_rate": 9.143618090452262e-06,
"loss": 0.0925,
"step": 9025
},
{
"epoch": 6.08,
"grad_norm": 3.190279722213745,
"learning_rate": 9.141105527638192e-06,
"loss": 0.0966,
"step": 9050
},
{
"epoch": 6.09,
"grad_norm": 2.9551713466644287,
"learning_rate": 9.138592964824121e-06,
"loss": 0.0927,
"step": 9075
},
{
"epoch": 6.11,
"grad_norm": 2.6916284561157227,
"learning_rate": 9.136080402010052e-06,
"loss": 0.0905,
"step": 9100
},
{
"epoch": 6.13,
"grad_norm": 3.1297528743743896,
"learning_rate": 9.13356783919598e-06,
"loss": 0.0966,
"step": 9125
},
{
"epoch": 6.15,
"grad_norm": 3.3253660202026367,
"learning_rate": 9.13105527638191e-06,
"loss": 0.0977,
"step": 9150
},
{
"epoch": 6.16,
"grad_norm": 3.1732029914855957,
"learning_rate": 9.12854271356784e-06,
"loss": 0.0943,
"step": 9175
},
{
"epoch": 6.18,
"grad_norm": 3.00846791267395,
"learning_rate": 9.12603015075377e-06,
"loss": 0.095,
"step": 9200
},
{
"epoch": 6.2,
"grad_norm": 3.4318153858184814,
"learning_rate": 9.1235175879397e-06,
"loss": 0.0918,
"step": 9225
},
{
"epoch": 6.21,
"grad_norm": 2.615586519241333,
"learning_rate": 9.121005025125628e-06,
"loss": 0.0918,
"step": 9250
},
{
"epoch": 6.23,
"grad_norm": 3.2654173374176025,
"learning_rate": 9.118492462311559e-06,
"loss": 0.0892,
"step": 9275
},
{
"epoch": 6.25,
"grad_norm": 3.255948066711426,
"learning_rate": 9.115979899497488e-06,
"loss": 0.0955,
"step": 9300
},
{
"epoch": 6.26,
"grad_norm": 3.593632221221924,
"learning_rate": 9.113467336683418e-06,
"loss": 0.0951,
"step": 9325
},
{
"epoch": 6.28,
"grad_norm": 3.3398244380950928,
"learning_rate": 9.110954773869347e-06,
"loss": 0.0926,
"step": 9350
},
{
"epoch": 6.3,
"grad_norm": 3.4789888858795166,
"learning_rate": 9.108442211055278e-06,
"loss": 0.0946,
"step": 9375
},
{
"epoch": 6.31,
"grad_norm": 3.1585254669189453,
"learning_rate": 9.105929648241206e-06,
"loss": 0.0921,
"step": 9400
},
{
"epoch": 6.33,
"grad_norm": 3.3125743865966797,
"learning_rate": 9.103417085427137e-06,
"loss": 0.0912,
"step": 9425
},
{
"epoch": 6.35,
"grad_norm": 2.899616241455078,
"learning_rate": 9.100904522613066e-06,
"loss": 0.0935,
"step": 9450
},
{
"epoch": 6.36,
"grad_norm": 2.9725539684295654,
"learning_rate": 9.098391959798995e-06,
"loss": 0.0934,
"step": 9475
},
{
"epoch": 6.38,
"grad_norm": 3.340712070465088,
"learning_rate": 9.095879396984926e-06,
"loss": 0.0968,
"step": 9500
},
{
"epoch": 6.4,
"grad_norm": 3.4166252613067627,
"learning_rate": 9.093366834170854e-06,
"loss": 0.0902,
"step": 9525
},
{
"epoch": 6.41,
"grad_norm": 3.42030930519104,
"learning_rate": 9.090854271356785e-06,
"loss": 0.0903,
"step": 9550
},
{
"epoch": 6.43,
"grad_norm": 2.913060188293457,
"learning_rate": 9.088341708542714e-06,
"loss": 0.0967,
"step": 9575
},
{
"epoch": 6.45,
"grad_norm": 2.9808599948883057,
"learning_rate": 9.085829145728644e-06,
"loss": 0.0898,
"step": 9600
},
{
"epoch": 6.46,
"grad_norm": 3.318812847137451,
"learning_rate": 9.083316582914573e-06,
"loss": 0.0964,
"step": 9625
},
{
"epoch": 6.48,
"grad_norm": 2.8281571865081787,
"learning_rate": 9.080804020100504e-06,
"loss": 0.0925,
"step": 9650
},
{
"epoch": 6.5,
"grad_norm": 3.3148748874664307,
"learning_rate": 9.078291457286433e-06,
"loss": 0.0955,
"step": 9675
},
{
"epoch": 6.51,
"grad_norm": 3.047445297241211,
"learning_rate": 9.075778894472363e-06,
"loss": 0.0907,
"step": 9700
},
{
"epoch": 6.53,
"grad_norm": 3.201747417449951,
"learning_rate": 9.073266331658292e-06,
"loss": 0.0891,
"step": 9725
},
{
"epoch": 6.55,
"grad_norm": 3.5526840686798096,
"learning_rate": 9.070753768844221e-06,
"loss": 0.0937,
"step": 9750
},
{
"epoch": 6.56,
"grad_norm": 3.3490021228790283,
"learning_rate": 9.068241206030152e-06,
"loss": 0.0924,
"step": 9775
},
{
"epoch": 6.58,
"grad_norm": 3.195934534072876,
"learning_rate": 9.06572864321608e-06,
"loss": 0.0941,
"step": 9800
},
{
"epoch": 6.6,
"grad_norm": 3.1133546829223633,
"learning_rate": 9.063216080402011e-06,
"loss": 0.093,
"step": 9825
},
{
"epoch": 6.62,
"grad_norm": 3.5979671478271484,
"learning_rate": 9.06070351758794e-06,
"loss": 0.0881,
"step": 9850
},
{
"epoch": 6.63,
"grad_norm": 3.7291669845581055,
"learning_rate": 9.05819095477387e-06,
"loss": 0.0955,
"step": 9875
},
{
"epoch": 6.65,
"grad_norm": 3.2835400104522705,
"learning_rate": 9.0556783919598e-06,
"loss": 0.0902,
"step": 9900
},
{
"epoch": 6.67,
"grad_norm": 3.1277029514312744,
"learning_rate": 9.05316582914573e-06,
"loss": 0.0936,
"step": 9925
},
{
"epoch": 6.68,
"grad_norm": 3.2376766204833984,
"learning_rate": 9.05065326633166e-06,
"loss": 0.0896,
"step": 9950
},
{
"epoch": 6.7,
"grad_norm": 2.698474168777466,
"learning_rate": 9.048140703517589e-06,
"loss": 0.0915,
"step": 9975
},
{
"epoch": 6.72,
"grad_norm": 3.623647451400757,
"learning_rate": 9.045628140703518e-06,
"loss": 0.0924,
"step": 10000
},
{
"epoch": 6.72,
"eval_loss": 0.1171552762389183,
"eval_runtime": 533.9147,
"eval_samples_per_second": 2.596,
"eval_steps_per_second": 2.596,
"eval_wer": 26.558361292601095,
"step": 10000
},
{
"epoch": 6.73,
"grad_norm": 3.608774423599243,
"learning_rate": 9.043115577889447e-06,
"loss": 0.0927,
"step": 10025
},
{
"epoch": 6.75,
"grad_norm": Infinity,
"learning_rate": 9.04070351758794e-06,
"loss": 0.0952,
"step": 10050
},
{
"epoch": 6.77,
"grad_norm": 2.832880735397339,
"learning_rate": 9.03819095477387e-06,
"loss": 0.0909,
"step": 10075
},
{
"epoch": 6.78,
"grad_norm": 3.0156736373901367,
"learning_rate": 9.0356783919598e-06,
"loss": 0.0944,
"step": 10100
},
{
"epoch": 6.8,
"grad_norm": 3.3390650749206543,
"learning_rate": 9.033165829145728e-06,
"loss": 0.0919,
"step": 10125
},
{
"epoch": 6.82,
"grad_norm": 3.394937515258789,
"learning_rate": 9.03065326633166e-06,
"loss": 0.0932,
"step": 10150
},
{
"epoch": 6.83,
"grad_norm": 3.443366765975952,
"learning_rate": 9.028140703517589e-06,
"loss": 0.0934,
"step": 10175
},
{
"epoch": 6.85,
"grad_norm": 3.167790174484253,
"learning_rate": 9.025628140703518e-06,
"loss": 0.0934,
"step": 10200
},
{
"epoch": 6.87,
"grad_norm": 3.151536464691162,
"learning_rate": 9.023115577889447e-06,
"loss": 0.0935,
"step": 10225
},
{
"epoch": 6.88,
"grad_norm": 3.475541114807129,
"learning_rate": 9.020603015075378e-06,
"loss": 0.0924,
"step": 10250
},
{
"epoch": 6.9,
"grad_norm": 3.254150629043579,
"learning_rate": 9.018090452261308e-06,
"loss": 0.0946,
"step": 10275
},
{
"epoch": 6.92,
"grad_norm": 3.126755714416504,
"learning_rate": 9.015577889447237e-06,
"loss": 0.0921,
"step": 10300
},
{
"epoch": 6.93,
"grad_norm": 3.1626737117767334,
"learning_rate": 9.013065326633166e-06,
"loss": 0.0908,
"step": 10325
},
{
"epoch": 6.95,
"grad_norm": 3.488074779510498,
"learning_rate": 9.010552763819096e-06,
"loss": 0.0956,
"step": 10350
},
{
"epoch": 6.97,
"grad_norm": 3.0085911750793457,
"learning_rate": 9.008040201005027e-06,
"loss": 0.0915,
"step": 10375
},
{
"epoch": 6.98,
"grad_norm": 3.424804925918579,
"learning_rate": 9.005527638190954e-06,
"loss": 0.0968,
"step": 10400
},
{
"epoch": 7.0,
"grad_norm": 3.1618521213531494,
"learning_rate": 9.003015075376885e-06,
"loss": 0.0911,
"step": 10425
},
{
"epoch": 7.02,
"grad_norm": 3.355823040008545,
"learning_rate": 9.000502512562815e-06,
"loss": 0.0775,
"step": 10450
},
{
"epoch": 7.03,
"grad_norm": 2.7716736793518066,
"learning_rate": 8.997989949748744e-06,
"loss": 0.0776,
"step": 10475
},
{
"epoch": 7.05,
"grad_norm": 2.89070987701416,
"learning_rate": 8.995477386934675e-06,
"loss": 0.0803,
"step": 10500
},
{
"epoch": 7.07,
"grad_norm": 3.0273945331573486,
"learning_rate": 8.992964824120604e-06,
"loss": 0.0731,
"step": 10525
},
{
"epoch": 7.09,
"grad_norm": 2.902979612350464,
"learning_rate": 8.990452261306534e-06,
"loss": 0.0805,
"step": 10550
},
{
"epoch": 7.1,
"grad_norm": 2.9858810901641846,
"learning_rate": 8.987939698492463e-06,
"loss": 0.0761,
"step": 10575
},
{
"epoch": 7.12,
"grad_norm": 2.780200958251953,
"learning_rate": 8.985427135678392e-06,
"loss": 0.0786,
"step": 10600
},
{
"epoch": 7.14,
"grad_norm": 3.0452048778533936,
"learning_rate": 8.982914572864322e-06,
"loss": 0.078,
"step": 10625
},
{
"epoch": 7.15,
"grad_norm": 3.0429253578186035,
"learning_rate": 8.980402010050253e-06,
"loss": 0.078,
"step": 10650
},
{
"epoch": 7.17,
"grad_norm": 2.758443593978882,
"learning_rate": 8.977889447236182e-06,
"loss": 0.0787,
"step": 10675
},
{
"epoch": 7.19,
"grad_norm": 3.3259782791137695,
"learning_rate": 8.975376884422111e-06,
"loss": 0.0818,
"step": 10700
},
{
"epoch": 7.2,
"grad_norm": 3.1599812507629395,
"learning_rate": 8.97286432160804e-06,
"loss": 0.0788,
"step": 10725
},
{
"epoch": 7.22,
"grad_norm": 3.163283348083496,
"learning_rate": 8.97035175879397e-06,
"loss": 0.0801,
"step": 10750
},
{
"epoch": 7.24,
"grad_norm": 3.883058547973633,
"learning_rate": 8.967839195979901e-06,
"loss": 0.0818,
"step": 10775
},
{
"epoch": 7.25,
"grad_norm": 3.0166139602661133,
"learning_rate": 8.96532663316583e-06,
"loss": 0.079,
"step": 10800
},
{
"epoch": 7.27,
"grad_norm": 3.532127857208252,
"learning_rate": 8.96281407035176e-06,
"loss": 0.0764,
"step": 10825
},
{
"epoch": 7.29,
"grad_norm": 2.8934993743896484,
"learning_rate": 8.960301507537689e-06,
"loss": 0.0791,
"step": 10850
},
{
"epoch": 7.3,
"grad_norm": 3.4274938106536865,
"learning_rate": 8.957788944723618e-06,
"loss": 0.0788,
"step": 10875
},
{
"epoch": 7.32,
"grad_norm": 2.964526891708374,
"learning_rate": 8.95527638190955e-06,
"loss": 0.0781,
"step": 10900
},
{
"epoch": 7.34,
"grad_norm": 3.1131231784820557,
"learning_rate": 8.952763819095479e-06,
"loss": 0.0776,
"step": 10925
},
{
"epoch": 7.35,
"grad_norm": 2.757322072982788,
"learning_rate": 8.950251256281408e-06,
"loss": 0.0793,
"step": 10950
},
{
"epoch": 7.37,
"grad_norm": 2.8853962421417236,
"learning_rate": 8.947738693467337e-06,
"loss": 0.08,
"step": 10975
},
{
"epoch": 7.39,
"grad_norm": 3.2388052940368652,
"learning_rate": 8.945226130653267e-06,
"loss": 0.0837,
"step": 11000
},
{
"epoch": 7.39,
"eval_loss": 0.11983851343393326,
"eval_runtime": 541.1838,
"eval_samples_per_second": 2.561,
"eval_steps_per_second": 2.561,
"eval_wer": 27.052798869856964,
"step": 11000
},
{
"epoch": 7.4,
"grad_norm": 3.431065559387207,
"learning_rate": 8.942713567839196e-06,
"loss": 0.0797,
"step": 11025
},
{
"epoch": 7.42,
"grad_norm": 3.1514389514923096,
"learning_rate": 8.940201005025127e-06,
"loss": 0.0809,
"step": 11050
},
{
"epoch": 7.44,
"grad_norm": 3.1348989009857178,
"learning_rate": 8.937688442211056e-06,
"loss": 0.0796,
"step": 11075
},
{
"epoch": 7.45,
"grad_norm": 3.4892783164978027,
"learning_rate": 8.935175879396986e-06,
"loss": 0.0813,
"step": 11100
},
{
"epoch": 7.47,
"grad_norm": 3.3423171043395996,
"learning_rate": 8.932663316582915e-06,
"loss": 0.0768,
"step": 11125
},
{
"epoch": 7.49,
"grad_norm": 3.119539499282837,
"learning_rate": 8.930150753768844e-06,
"loss": 0.0833,
"step": 11150
},
{
"epoch": 7.51,
"grad_norm": 3.181475877761841,
"learning_rate": 8.927638190954775e-06,
"loss": 0.0803,
"step": 11175
},
{
"epoch": 7.52,
"grad_norm": 3.3543057441711426,
"learning_rate": 8.925125628140705e-06,
"loss": 0.0806,
"step": 11200
},
{
"epoch": 7.54,
"grad_norm": 3.1575417518615723,
"learning_rate": 8.922613065326634e-06,
"loss": 0.0812,
"step": 11225
},
{
"epoch": 7.56,
"grad_norm": 3.0198452472686768,
"learning_rate": 8.920100502512563e-06,
"loss": 0.0805,
"step": 11250
},
{
"epoch": 7.57,
"grad_norm": 2.9735798835754395,
"learning_rate": 8.917587939698493e-06,
"loss": 0.0791,
"step": 11275
},
{
"epoch": 7.59,
"grad_norm": 3.363503932952881,
"learning_rate": 8.915075376884424e-06,
"loss": 0.0817,
"step": 11300
},
{
"epoch": 7.61,
"grad_norm": 3.10579514503479,
"learning_rate": 8.912562814070353e-06,
"loss": 0.0833,
"step": 11325
},
{
"epoch": 7.62,
"grad_norm": 3.5427165031433105,
"learning_rate": 8.910050251256282e-06,
"loss": 0.0827,
"step": 11350
},
{
"epoch": 7.64,
"grad_norm": 2.9739034175872803,
"learning_rate": 8.907537688442212e-06,
"loss": 0.0795,
"step": 11375
},
{
"epoch": 7.66,
"grad_norm": 3.0262250900268555,
"learning_rate": 8.905025125628143e-06,
"loss": 0.0777,
"step": 11400
},
{
"epoch": 7.67,
"grad_norm": 2.9359376430511475,
"learning_rate": 8.90251256281407e-06,
"loss": 0.0807,
"step": 11425
},
{
"epoch": 7.69,
"grad_norm": 3.158572196960449,
"learning_rate": 8.900000000000001e-06,
"loss": 0.0772,
"step": 11450
},
{
"epoch": 7.71,
"grad_norm": 3.330089807510376,
"learning_rate": 8.89748743718593e-06,
"loss": 0.0793,
"step": 11475
},
{
"epoch": 7.72,
"grad_norm": 3.2174530029296875,
"learning_rate": 8.89497487437186e-06,
"loss": 0.079,
"step": 11500
},
{
"epoch": 7.74,
"grad_norm": 3.673243522644043,
"learning_rate": 8.892462311557791e-06,
"loss": 0.0775,
"step": 11525
},
{
"epoch": 7.76,
"grad_norm": 3.3094096183776855,
"learning_rate": 8.889949748743718e-06,
"loss": 0.078,
"step": 11550
},
{
"epoch": 7.77,
"grad_norm": 3.426079273223877,
"learning_rate": 8.88743718592965e-06,
"loss": 0.0777,
"step": 11575
},
{
"epoch": 7.79,
"grad_norm": 3.517086982727051,
"learning_rate": 8.884924623115579e-06,
"loss": 0.0776,
"step": 11600
},
{
"epoch": 7.81,
"grad_norm": 2.9824516773223877,
"learning_rate": 8.882412060301508e-06,
"loss": 0.0805,
"step": 11625
},
{
"epoch": 7.82,
"grad_norm": 2.965653896331787,
"learning_rate": 8.879899497487437e-06,
"loss": 0.0786,
"step": 11650
},
{
"epoch": 7.84,
"grad_norm": 2.9882099628448486,
"learning_rate": 8.877386934673368e-06,
"loss": 0.0822,
"step": 11675
},
{
"epoch": 7.86,
"grad_norm": 3.118823289871216,
"learning_rate": 8.874874371859296e-06,
"loss": 0.0773,
"step": 11700
},
{
"epoch": 7.87,
"grad_norm": 4.2748188972473145,
"learning_rate": 8.872361809045227e-06,
"loss": 0.0812,
"step": 11725
},
{
"epoch": 7.89,
"grad_norm": 3.5226612091064453,
"learning_rate": 8.869849246231156e-06,
"loss": 0.0801,
"step": 11750
},
{
"epoch": 7.91,
"grad_norm": 3.2962095737457275,
"learning_rate": 8.867336683417086e-06,
"loss": 0.0779,
"step": 11775
},
{
"epoch": 7.92,
"grad_norm": 3.037177801132202,
"learning_rate": 8.864824120603017e-06,
"loss": 0.0811,
"step": 11800
},
{
"epoch": 7.94,
"grad_norm": 3.207000255584717,
"learning_rate": 8.862311557788944e-06,
"loss": 0.0813,
"step": 11825
},
{
"epoch": 7.96,
"grad_norm": 3.5045995712280273,
"learning_rate": 8.859798994974875e-06,
"loss": 0.0793,
"step": 11850
},
{
"epoch": 7.98,
"grad_norm": 2.9062917232513428,
"learning_rate": 8.857286432160805e-06,
"loss": 0.0808,
"step": 11875
},
{
"epoch": 7.99,
"grad_norm": 3.086449146270752,
"learning_rate": 8.854773869346734e-06,
"loss": 0.0757,
"step": 11900
},
{
"epoch": 8.01,
"grad_norm": 3.4503021240234375,
"learning_rate": 8.852261306532665e-06,
"loss": 0.0698,
"step": 11925
},
{
"epoch": 8.03,
"grad_norm": 2.755633592605591,
"learning_rate": 8.849748743718594e-06,
"loss": 0.0619,
"step": 11950
},
{
"epoch": 8.04,
"grad_norm": 3.3875789642333984,
"learning_rate": 8.847236180904524e-06,
"loss": 0.0651,
"step": 11975
},
{
"epoch": 8.06,
"grad_norm": 2.697042465209961,
"learning_rate": 8.844723618090453e-06,
"loss": 0.0654,
"step": 12000
},
{
"epoch": 8.06,
"eval_loss": 0.12158209830522537,
"eval_runtime": 532.8467,
"eval_samples_per_second": 2.601,
"eval_steps_per_second": 2.601,
"eval_wer": 26.328800988875155,
"step": 12000
},
{
"epoch": 8.08,
"grad_norm": 2.8202855587005615,
"learning_rate": 8.842211055276382e-06,
"loss": 0.0658,
"step": 12025
},
{
"epoch": 8.09,
"grad_norm": 2.7945172786712646,
"learning_rate": 8.839698492462312e-06,
"loss": 0.0627,
"step": 12050
},
{
"epoch": 8.11,
"grad_norm": 3.1584692001342773,
"learning_rate": 8.837185929648243e-06,
"loss": 0.0673,
"step": 12075
},
{
"epoch": 8.13,
"grad_norm": 3.1642470359802246,
"learning_rate": 8.83467336683417e-06,
"loss": 0.0683,
"step": 12100
},
{
"epoch": 8.14,
"grad_norm": 2.9188601970672607,
"learning_rate": 8.832160804020101e-06,
"loss": 0.0682,
"step": 12125
},
{
"epoch": 8.16,
"grad_norm": 3.276679039001465,
"learning_rate": 8.829748743718593e-06,
"loss": 0.0656,
"step": 12150
},
{
"epoch": 8.18,
"grad_norm": 2.683711051940918,
"learning_rate": 8.827236180904524e-06,
"loss": 0.0625,
"step": 12175
},
{
"epoch": 8.19,
"grad_norm": 3.232003688812256,
"learning_rate": 8.824723618090453e-06,
"loss": 0.066,
"step": 12200
},
{
"epoch": 8.21,
"grad_norm": 2.7374961376190186,
"learning_rate": 8.822211055276383e-06,
"loss": 0.0647,
"step": 12225
},
{
"epoch": 8.23,
"grad_norm": 3.423482656478882,
"learning_rate": 8.819698492462312e-06,
"loss": 0.0673,
"step": 12250
},
{
"epoch": 8.24,
"grad_norm": 2.9813687801361084,
"learning_rate": 8.817185929648241e-06,
"loss": 0.0685,
"step": 12275
},
{
"epoch": 8.26,
"grad_norm": 3.047753095626831,
"learning_rate": 8.81467336683417e-06,
"loss": 0.0658,
"step": 12300
},
{
"epoch": 8.28,
"grad_norm": 3.4329652786254883,
"learning_rate": 8.812160804020102e-06,
"loss": 0.0662,
"step": 12325
},
{
"epoch": 8.29,
"grad_norm": 3.080573081970215,
"learning_rate": 8.809648241206031e-06,
"loss": 0.0674,
"step": 12350
},
{
"epoch": 8.31,
"grad_norm": 2.828704833984375,
"learning_rate": 8.80713567839196e-06,
"loss": 0.0694,
"step": 12375
},
{
"epoch": 8.33,
"grad_norm": 3.132976531982422,
"learning_rate": 8.804623115577891e-06,
"loss": 0.0685,
"step": 12400
},
{
"epoch": 8.34,
"grad_norm": 3.154456615447998,
"learning_rate": 8.802110552763819e-06,
"loss": 0.0679,
"step": 12425
},
{
"epoch": 8.36,
"grad_norm": 3.4193313121795654,
"learning_rate": 8.79959798994975e-06,
"loss": 0.0674,
"step": 12450
},
{
"epoch": 8.38,
"grad_norm": 3.2318356037139893,
"learning_rate": 8.79708542713568e-06,
"loss": 0.0658,
"step": 12475
},
{
"epoch": 8.39,
"grad_norm": 2.9559836387634277,
"learning_rate": 8.794572864321609e-06,
"loss": 0.0647,
"step": 12500
},
{
"epoch": 8.41,
"grad_norm": 3.459628105163574,
"learning_rate": 8.792060301507538e-06,
"loss": 0.0693,
"step": 12525
},
{
"epoch": 8.43,
"grad_norm": 3.2934398651123047,
"learning_rate": 8.789547738693467e-06,
"loss": 0.0696,
"step": 12550
},
{
"epoch": 8.45,
"grad_norm": 3.2100000381469727,
"learning_rate": 8.787035175879398e-06,
"loss": 0.0703,
"step": 12575
},
{
"epoch": 8.46,
"grad_norm": 3.280884265899658,
"learning_rate": 8.784522613065328e-06,
"loss": 0.0661,
"step": 12600
},
{
"epoch": 8.48,
"grad_norm": 3.1474897861480713,
"learning_rate": 8.782010050251257e-06,
"loss": 0.0663,
"step": 12625
},
{
"epoch": 8.5,
"grad_norm": 2.9876487255096436,
"learning_rate": 8.779497487437186e-06,
"loss": 0.0693,
"step": 12650
},
{
"epoch": 8.51,
"grad_norm": 3.278313159942627,
"learning_rate": 8.776984924623117e-06,
"loss": 0.0699,
"step": 12675
},
{
"epoch": 8.53,
"grad_norm": 3.023169755935669,
"learning_rate": 8.774472361809045e-06,
"loss": 0.0712,
"step": 12700
},
{
"epoch": 8.55,
"grad_norm": 3.168148994445801,
"learning_rate": 8.771959798994976e-06,
"loss": 0.0698,
"step": 12725
},
{
"epoch": 8.56,
"grad_norm": 3.177262544631958,
"learning_rate": 8.769447236180905e-06,
"loss": 0.0686,
"step": 12750
},
{
"epoch": 8.58,
"grad_norm": 3.1487865447998047,
"learning_rate": 8.766934673366834e-06,
"loss": 0.0684,
"step": 12775
},
{
"epoch": 8.6,
"grad_norm": 2.9590165615081787,
"learning_rate": 8.764422110552765e-06,
"loss": 0.0691,
"step": 12800
},
{
"epoch": 8.61,
"grad_norm": 3.0423812866210938,
"learning_rate": 8.761909547738693e-06,
"loss": 0.0682,
"step": 12825
},
{
"epoch": 8.63,
"grad_norm": 3.3768019676208496,
"learning_rate": 8.759396984924624e-06,
"loss": 0.0709,
"step": 12850
},
{
"epoch": 8.65,
"grad_norm": 3.7296512126922607,
"learning_rate": 8.756884422110553e-06,
"loss": 0.0701,
"step": 12875
},
{
"epoch": 8.66,
"grad_norm": 3.148634433746338,
"learning_rate": 8.754371859296483e-06,
"loss": 0.0634,
"step": 12900
},
{
"epoch": 8.68,
"grad_norm": 2.908444881439209,
"learning_rate": 8.751859296482412e-06,
"loss": 0.0659,
"step": 12925
},
{
"epoch": 8.7,
"grad_norm": 3.3164865970611572,
"learning_rate": 8.749346733668343e-06,
"loss": 0.0655,
"step": 12950
},
{
"epoch": 8.71,
"grad_norm": 2.9725685119628906,
"learning_rate": 8.746834170854272e-06,
"loss": 0.0659,
"step": 12975
},
{
"epoch": 8.73,
"grad_norm": 3.171374797821045,
"learning_rate": 8.744321608040202e-06,
"loss": 0.068,
"step": 13000
},
{
"epoch": 8.73,
"eval_loss": 0.12423743307590485,
"eval_runtime": 533.8353,
"eval_samples_per_second": 2.596,
"eval_steps_per_second": 2.596,
"eval_wer": 26.86738477838602,
"step": 13000
},
{
"epoch": 8.75,
"grad_norm": 3.3160324096679688,
"learning_rate": 8.741809045226131e-06,
"loss": 0.0692,
"step": 13025
},
{
"epoch": 8.76,
"grad_norm": 3.2802672386169434,
"learning_rate": 8.73929648241206e-06,
"loss": 0.067,
"step": 13050
},
{
"epoch": 8.78,
"grad_norm": 3.2849535942077637,
"learning_rate": 8.736783919597991e-06,
"loss": 0.0654,
"step": 13075
},
{
"epoch": 8.8,
"grad_norm": 3.685974359512329,
"learning_rate": 8.734271356783919e-06,
"loss": 0.0687,
"step": 13100
},
{
"epoch": 8.81,
"grad_norm": 2.9581081867218018,
"learning_rate": 8.73175879396985e-06,
"loss": 0.0658,
"step": 13125
},
{
"epoch": 8.83,
"grad_norm": 3.3408470153808594,
"learning_rate": 8.72924623115578e-06,
"loss": 0.0726,
"step": 13150
},
{
"epoch": 8.85,
"grad_norm": 3.5375308990478516,
"learning_rate": 8.726733668341709e-06,
"loss": 0.0688,
"step": 13175
},
{
"epoch": 8.87,
"grad_norm": 2.7572827339172363,
"learning_rate": 8.72422110552764e-06,
"loss": 0.0688,
"step": 13200
},
{
"epoch": 8.88,
"grad_norm": 3.0948410034179688,
"learning_rate": 8.721708542713569e-06,
"loss": 0.0686,
"step": 13225
},
{
"epoch": 8.9,
"grad_norm": 3.076904773712158,
"learning_rate": 8.719195979899498e-06,
"loss": 0.0683,
"step": 13250
},
{
"epoch": 8.92,
"grad_norm": 3.060412645339966,
"learning_rate": 8.716683417085428e-06,
"loss": 0.0692,
"step": 13275
},
{
"epoch": 8.93,
"grad_norm": 3.1852357387542725,
"learning_rate": 8.714170854271357e-06,
"loss": 0.0647,
"step": 13300
},
{
"epoch": 8.95,
"grad_norm": 3.427971601486206,
"learning_rate": 8.711658291457286e-06,
"loss": 0.0675,
"step": 13325
},
{
"epoch": 8.97,
"grad_norm": 3.221360683441162,
"learning_rate": 8.709145728643217e-06,
"loss": 0.0702,
"step": 13350
},
{
"epoch": 8.98,
"grad_norm": 3.490898847579956,
"learning_rate": 8.706633165829147e-06,
"loss": 0.0693,
"step": 13375
},
{
"epoch": 9.0,
"grad_norm": 3.1776282787323,
"learning_rate": 8.704120603015076e-06,
"loss": 0.0725,
"step": 13400
},
{
"epoch": 9.02,
"grad_norm": 2.52174973487854,
"learning_rate": 8.701608040201005e-06,
"loss": 0.0542,
"step": 13425
},
{
"epoch": 9.03,
"grad_norm": 2.8436169624328613,
"learning_rate": 8.699095477386935e-06,
"loss": 0.0543,
"step": 13450
},
{
"epoch": 9.05,
"grad_norm": 3.0883164405822754,
"learning_rate": 8.696582914572866e-06,
"loss": 0.0565,
"step": 13475
},
{
"epoch": 9.07,
"grad_norm": 3.2945592403411865,
"learning_rate": 8.694070351758795e-06,
"loss": 0.0554,
"step": 13500
},
{
"epoch": 9.08,
"grad_norm": 3.1277835369110107,
"learning_rate": 8.691557788944724e-06,
"loss": 0.0575,
"step": 13525
},
{
"epoch": 9.1,
"grad_norm": 2.555258274078369,
"learning_rate": 8.689045226130654e-06,
"loss": 0.0557,
"step": 13550
},
{
"epoch": 9.12,
"grad_norm": 2.6981780529022217,
"learning_rate": 8.686532663316583e-06,
"loss": 0.056,
"step": 13575
},
{
"epoch": 9.13,
"grad_norm": 2.9988884925842285,
"learning_rate": 8.684020100502514e-06,
"loss": 0.0575,
"step": 13600
},
{
"epoch": 9.15,
"grad_norm": 2.7814390659332275,
"learning_rate": 8.681507537688443e-06,
"loss": 0.0543,
"step": 13625
},
{
"epoch": 9.17,
"grad_norm": 2.8165695667266846,
"learning_rate": 8.678994974874373e-06,
"loss": 0.0542,
"step": 13650
},
{
"epoch": 9.18,
"grad_norm": 2.8924388885498047,
"learning_rate": 8.676482412060302e-06,
"loss": 0.0584,
"step": 13675
},
{
"epoch": 9.2,
"grad_norm": 2.8846709728240967,
"learning_rate": 8.673969849246231e-06,
"loss": 0.0546,
"step": 13700
},
{
"epoch": 9.22,
"grad_norm": 3.0931618213653564,
"learning_rate": 8.67145728643216e-06,
"loss": 0.0541,
"step": 13725
},
{
"epoch": 9.23,
"grad_norm": 3.0044896602630615,
"learning_rate": 8.668944723618092e-06,
"loss": 0.0566,
"step": 13750
},
{
"epoch": 9.25,
"grad_norm": 2.992866039276123,
"learning_rate": 8.666432160804021e-06,
"loss": 0.0568,
"step": 13775
},
{
"epoch": 9.27,
"grad_norm": 3.3243565559387207,
"learning_rate": 8.66391959798995e-06,
"loss": 0.0575,
"step": 13800
},
{
"epoch": 9.28,
"grad_norm": 3.164736747741699,
"learning_rate": 8.661407035175881e-06,
"loss": 0.0565,
"step": 13825
},
{
"epoch": 9.3,
"grad_norm": 2.89432430267334,
"learning_rate": 8.658894472361809e-06,
"loss": 0.0571,
"step": 13850
},
{
"epoch": 9.32,
"grad_norm": 3.053514242172241,
"learning_rate": 8.65638190954774e-06,
"loss": 0.0582,
"step": 13875
},
{
"epoch": 9.34,
"grad_norm": 2.7615840435028076,
"learning_rate": 8.65386934673367e-06,
"loss": 0.0566,
"step": 13900
},
{
"epoch": 9.35,
"grad_norm": 3.1976537704467773,
"learning_rate": 8.651356783919599e-06,
"loss": 0.0578,
"step": 13925
},
{
"epoch": 9.37,
"grad_norm": 3.1072587966918945,
"learning_rate": 8.648844221105528e-06,
"loss": 0.0577,
"step": 13950
},
{
"epoch": 9.39,
"grad_norm": 3.4911906719207764,
"learning_rate": 8.646331658291457e-06,
"loss": 0.0548,
"step": 13975
},
{
"epoch": 9.4,
"grad_norm": 2.923501968383789,
"learning_rate": 8.643819095477388e-06,
"loss": 0.0586,
"step": 14000
},
{
"epoch": 9.4,
"eval_loss": 0.1282009482383728,
"eval_runtime": 533.8178,
"eval_samples_per_second": 2.596,
"eval_steps_per_second": 2.596,
"eval_wer": 26.982164930248985,
"step": 14000
},
{
"epoch": 9.42,
"grad_norm": 3.0205700397491455,
"learning_rate": 8.641306532663318e-06,
"loss": 0.0552,
"step": 14025
},
{
"epoch": 9.44,
"grad_norm": 3.022747278213501,
"learning_rate": 8.638793969849247e-06,
"loss": 0.0574,
"step": 14050
},
{
"epoch": 9.45,
"grad_norm": 3.2978105545043945,
"learning_rate": 8.636281407035176e-06,
"loss": 0.0571,
"step": 14075
},
{
"epoch": 9.47,
"grad_norm": 3.0741355419158936,
"learning_rate": 8.633768844221107e-06,
"loss": 0.0556,
"step": 14100
},
{
"epoch": 9.49,
"grad_norm": 2.8877174854278564,
"learning_rate": 8.631256281407035e-06,
"loss": 0.0538,
"step": 14125
},
{
"epoch": 9.5,
"grad_norm": 3.618729591369629,
"learning_rate": 8.628743718592966e-06,
"loss": 0.0592,
"step": 14150
},
{
"epoch": 9.52,
"grad_norm": 3.005646228790283,
"learning_rate": 8.626231155778895e-06,
"loss": 0.057,
"step": 14175
},
{
"epoch": 9.54,
"grad_norm": 3.3048083782196045,
"learning_rate": 8.623718592964825e-06,
"loss": 0.0564,
"step": 14200
},
{
"epoch": 9.55,
"grad_norm": 3.2562224864959717,
"learning_rate": 8.621206030150756e-06,
"loss": 0.0571,
"step": 14225
},
{
"epoch": 9.57,
"grad_norm": 2.980013608932495,
"learning_rate": 8.618693467336683e-06,
"loss": 0.0564,
"step": 14250
},
{
"epoch": 9.59,
"grad_norm": 3.220036745071411,
"learning_rate": 8.616180904522614e-06,
"loss": 0.0588,
"step": 14275
},
{
"epoch": 9.6,
"grad_norm": 3.4643850326538086,
"learning_rate": 8.613668341708544e-06,
"loss": 0.0565,
"step": 14300
},
{
"epoch": 9.62,
"grad_norm": 3.2021632194519043,
"learning_rate": 8.611155778894473e-06,
"loss": 0.0586,
"step": 14325
},
{
"epoch": 9.64,
"grad_norm": 3.2279539108276367,
"learning_rate": 8.608643216080402e-06,
"loss": 0.0562,
"step": 14350
},
{
"epoch": 9.65,
"grad_norm": 3.429431438446045,
"learning_rate": 8.606130653266333e-06,
"loss": 0.0585,
"step": 14375
},
{
"epoch": 9.67,
"grad_norm": 3.278526544570923,
"learning_rate": 8.60361809045226e-06,
"loss": 0.0584,
"step": 14400
},
{
"epoch": 9.69,
"grad_norm": 3.5569005012512207,
"learning_rate": 8.601105527638192e-06,
"loss": 0.0587,
"step": 14425
},
{
"epoch": 9.7,
"grad_norm": 3.0540413856506348,
"learning_rate": 8.598592964824121e-06,
"loss": 0.0582,
"step": 14450
},
{
"epoch": 9.72,
"grad_norm": 2.9771244525909424,
"learning_rate": 8.59608040201005e-06,
"loss": 0.0544,
"step": 14475
},
{
"epoch": 9.74,
"grad_norm": 3.271925926208496,
"learning_rate": 8.593567839195981e-06,
"loss": 0.0556,
"step": 14500
},
{
"epoch": 9.75,
"grad_norm": 3.2107813358306885,
"learning_rate": 8.591055276381909e-06,
"loss": 0.0556,
"step": 14525
},
{
"epoch": 9.77,
"grad_norm": 2.9411368370056152,
"learning_rate": 8.58854271356784e-06,
"loss": 0.06,
"step": 14550
},
{
"epoch": 9.79,
"grad_norm": 2.9419991970062256,
"learning_rate": 8.58603015075377e-06,
"loss": 0.055,
"step": 14575
},
{
"epoch": 9.81,
"grad_norm": 3.3104031085968018,
"learning_rate": 8.583517587939699e-06,
"loss": 0.0586,
"step": 14600
},
{
"epoch": 9.82,
"grad_norm": 3.488868236541748,
"learning_rate": 8.58100502512563e-06,
"loss": 0.0608,
"step": 14625
},
{
"epoch": 9.84,
"grad_norm": 2.7537827491760254,
"learning_rate": 8.578492462311559e-06,
"loss": 0.061,
"step": 14650
},
{
"epoch": 9.86,
"grad_norm": 2.967761278152466,
"learning_rate": 8.575979899497488e-06,
"loss": 0.0616,
"step": 14675
},
{
"epoch": 9.87,
"grad_norm": 2.6756021976470947,
"learning_rate": 8.573467336683418e-06,
"loss": 0.0572,
"step": 14700
},
{
"epoch": 9.89,
"grad_norm": 3.6669530868530273,
"learning_rate": 8.570954773869347e-06,
"loss": 0.0545,
"step": 14725
},
{
"epoch": 9.91,
"grad_norm": 3.402998208999634,
"learning_rate": 8.568442211055276e-06,
"loss": 0.0595,
"step": 14750
},
{
"epoch": 9.92,
"grad_norm": 3.397134304046631,
"learning_rate": 8.565929648241207e-06,
"loss": 0.0582,
"step": 14775
},
{
"epoch": 9.94,
"grad_norm": 3.193824291229248,
"learning_rate": 8.563417085427135e-06,
"loss": 0.0558,
"step": 14800
},
{
"epoch": 9.96,
"grad_norm": 3.0948803424835205,
"learning_rate": 8.560904522613066e-06,
"loss": 0.0572,
"step": 14825
},
{
"epoch": 9.97,
"grad_norm": 3.6509146690368652,
"learning_rate": 8.558391959798995e-06,
"loss": 0.0595,
"step": 14850
},
{
"epoch": 9.99,
"grad_norm": 3.0662288665771484,
"learning_rate": 8.555879396984925e-06,
"loss": 0.057,
"step": 14875
},
{
"epoch": 10.01,
"grad_norm": 2.2760088443756104,
"learning_rate": 8.553366834170856e-06,
"loss": 0.0524,
"step": 14900
},
{
"epoch": 10.02,
"grad_norm": 2.8303427696228027,
"learning_rate": 8.550854271356785e-06,
"loss": 0.0494,
"step": 14925
},
{
"epoch": 10.04,
"grad_norm": 3.1542868614196777,
"learning_rate": 8.548341708542714e-06,
"loss": 0.0445,
"step": 14950
},
{
"epoch": 10.06,
"grad_norm": 2.8265697956085205,
"learning_rate": 8.545829145728644e-06,
"loss": 0.0464,
"step": 14975
},
{
"epoch": 10.07,
"grad_norm": 3.163896322250366,
"learning_rate": 8.543316582914573e-06,
"loss": 0.047,
"step": 15000
},
{
"epoch": 10.07,
"eval_loss": 0.13359740376472473,
"eval_runtime": 533.7428,
"eval_samples_per_second": 2.597,
"eval_steps_per_second": 2.597,
"eval_wer": 27.405968567896878,
"step": 15000
},
{
"epoch": 10.09,
"grad_norm": 2.813354253768921,
"learning_rate": 8.540804020100502e-06,
"loss": 0.0476,
"step": 15025
},
{
"epoch": 10.11,
"grad_norm": 2.448727607727051,
"learning_rate": 8.538291457286433e-06,
"loss": 0.0448,
"step": 15050
},
{
"epoch": 10.12,
"grad_norm": 2.798645257949829,
"learning_rate": 8.535778894472363e-06,
"loss": 0.0458,
"step": 15075
},
{
"epoch": 10.14,
"grad_norm": 2.969273090362549,
"learning_rate": 8.533266331658292e-06,
"loss": 0.0442,
"step": 15100
},
{
"epoch": 10.16,
"grad_norm": 2.901127576828003,
"learning_rate": 8.530753768844221e-06,
"loss": 0.0431,
"step": 15125
},
{
"epoch": 10.17,
"grad_norm": 3.0042836666107178,
"learning_rate": 8.52824120603015e-06,
"loss": 0.049,
"step": 15150
},
{
"epoch": 10.19,
"grad_norm": 2.694744825363159,
"learning_rate": 8.525728643216082e-06,
"loss": 0.0474,
"step": 15175
},
{
"epoch": 10.21,
"grad_norm": 2.79301118850708,
"learning_rate": 8.523216080402011e-06,
"loss": 0.0459,
"step": 15200
},
{
"epoch": 10.22,
"grad_norm": 3.328848123550415,
"learning_rate": 8.52070351758794e-06,
"loss": 0.0481,
"step": 15225
},
{
"epoch": 10.24,
"grad_norm": 3.0490903854370117,
"learning_rate": 8.518190954773871e-06,
"loss": 0.0467,
"step": 15250
},
{
"epoch": 10.26,
"grad_norm": 2.891860246658325,
"learning_rate": 8.515678391959799e-06,
"loss": 0.0482,
"step": 15275
},
{
"epoch": 10.28,
"grad_norm": 3.29339599609375,
"learning_rate": 8.51316582914573e-06,
"loss": 0.0468,
"step": 15300
},
{
"epoch": 10.29,
"grad_norm": 2.871262550354004,
"learning_rate": 8.51065326633166e-06,
"loss": 0.0465,
"step": 15325
},
{
"epoch": 10.31,
"grad_norm": 2.673008680343628,
"learning_rate": 8.508140703517589e-06,
"loss": 0.0457,
"step": 15350
},
{
"epoch": 10.33,
"grad_norm": 2.5940115451812744,
"learning_rate": 8.505628140703518e-06,
"loss": 0.049,
"step": 15375
},
{
"epoch": 10.34,
"grad_norm": 2.8226072788238525,
"learning_rate": 8.503115577889447e-06,
"loss": 0.0472,
"step": 15400
},
{
"epoch": 10.36,
"grad_norm": 2.800179958343506,
"learning_rate": 8.500603015075377e-06,
"loss": 0.0477,
"step": 15425
},
{
"epoch": 10.38,
"grad_norm": 3.0697898864746094,
"learning_rate": 8.498090452261308e-06,
"loss": 0.0448,
"step": 15450
},
{
"epoch": 10.39,
"grad_norm": 2.9394161701202393,
"learning_rate": 8.495577889447237e-06,
"loss": 0.0464,
"step": 15475
},
{
"epoch": 10.41,
"grad_norm": 3.055058479309082,
"learning_rate": 8.493065326633166e-06,
"loss": 0.0486,
"step": 15500
},
{
"epoch": 10.43,
"grad_norm": 3.4436676502227783,
"learning_rate": 8.490552763819097e-06,
"loss": 0.0479,
"step": 15525
},
{
"epoch": 10.44,
"grad_norm": 3.167590379714966,
"learning_rate": 8.488040201005025e-06,
"loss": 0.049,
"step": 15550
},
{
"epoch": 10.46,
"grad_norm": 2.786879539489746,
"learning_rate": 8.485527638190956e-06,
"loss": 0.0476,
"step": 15575
},
{
"epoch": 10.48,
"grad_norm": 3.0949158668518066,
"learning_rate": 8.483015075376885e-06,
"loss": 0.0463,
"step": 15600
},
{
"epoch": 10.49,
"grad_norm": 3.426304340362549,
"learning_rate": 8.480502512562815e-06,
"loss": 0.0475,
"step": 15625
},
{
"epoch": 10.51,
"grad_norm": 3.1173408031463623,
"learning_rate": 8.477989949748744e-06,
"loss": 0.0476,
"step": 15650
},
{
"epoch": 10.53,
"grad_norm": 2.856600046157837,
"learning_rate": 8.475477386934673e-06,
"loss": 0.0471,
"step": 15675
},
{
"epoch": 10.54,
"grad_norm": 3.2512564659118652,
"learning_rate": 8.472964824120604e-06,
"loss": 0.0483,
"step": 15700
},
{
"epoch": 10.56,
"grad_norm": 3.3549506664276123,
"learning_rate": 8.470452261306534e-06,
"loss": 0.0462,
"step": 15725
},
{
"epoch": 10.58,
"grad_norm": 2.7729334831237793,
"learning_rate": 8.467939698492463e-06,
"loss": 0.0472,
"step": 15750
},
{
"epoch": 10.59,
"grad_norm": 2.711257219314575,
"learning_rate": 8.465427135678392e-06,
"loss": 0.0472,
"step": 15775
},
{
"epoch": 10.61,
"grad_norm": 3.229771375656128,
"learning_rate": 8.462914572864323e-06,
"loss": 0.0479,
"step": 15800
},
{
"epoch": 10.63,
"grad_norm": 3.0402400493621826,
"learning_rate": 8.460402010050251e-06,
"loss": 0.0503,
"step": 15825
},
{
"epoch": 10.64,
"grad_norm": 2.9210867881774902,
"learning_rate": 8.457989949748744e-06,
"loss": 0.0497,
"step": 15850
},
{
"epoch": 10.66,
"grad_norm": 3.3483831882476807,
"learning_rate": 8.455577889447237e-06,
"loss": 0.0475,
"step": 15875
},
{
"epoch": 10.68,
"grad_norm": 3.053593873977661,
"learning_rate": 8.453065326633167e-06,
"loss": 0.046,
"step": 15900
},
{
"epoch": 10.7,
"grad_norm": 3.136958599090576,
"learning_rate": 8.450552763819096e-06,
"loss": 0.0509,
"step": 15925
},
{
"epoch": 10.71,
"grad_norm": 3.1040425300598145,
"learning_rate": 8.448040201005025e-06,
"loss": 0.0504,
"step": 15950
},
{
"epoch": 10.73,
"grad_norm": 2.8489692211151123,
"learning_rate": 8.445527638190956e-06,
"loss": 0.0484,
"step": 15975
},
{
"epoch": 10.75,
"grad_norm": 2.8868560791015625,
"learning_rate": 8.443015075376884e-06,
"loss": 0.0475,
"step": 16000
},
{
"epoch": 10.75,
"eval_loss": 0.1362370103597641,
"eval_runtime": 536.1147,
"eval_samples_per_second": 2.585,
"eval_steps_per_second": 2.585,
"eval_wer": 27.441285537700864,
"step": 16000
},
{
"epoch": 10.76,
"grad_norm": 3.188688039779663,
"learning_rate": 8.440502512562815e-06,
"loss": 0.0502,
"step": 16025
},
{
"epoch": 10.78,
"grad_norm": 2.4469282627105713,
"learning_rate": 8.437989949748744e-06,
"loss": 0.0459,
"step": 16050
},
{
"epoch": 10.8,
"grad_norm": 2.948697328567505,
"learning_rate": 8.435477386934674e-06,
"loss": 0.0472,
"step": 16075
},
{
"epoch": 10.81,
"grad_norm": 3.236891508102417,
"learning_rate": 8.432964824120605e-06,
"loss": 0.0494,
"step": 16100
},
{
"epoch": 10.83,
"grad_norm": 3.0507919788360596,
"learning_rate": 8.430452261306534e-06,
"loss": 0.0494,
"step": 16125
},
{
"epoch": 10.85,
"grad_norm": 2.8577802181243896,
"learning_rate": 8.427939698492463e-06,
"loss": 0.0487,
"step": 16150
},
{
"epoch": 10.86,
"grad_norm": 3.035109758377075,
"learning_rate": 8.425427135678393e-06,
"loss": 0.0486,
"step": 16175
},
{
"epoch": 10.88,
"grad_norm": 3.5497820377349854,
"learning_rate": 8.422914572864322e-06,
"loss": 0.0497,
"step": 16200
},
{
"epoch": 10.9,
"grad_norm": 2.838867664337158,
"learning_rate": 8.420402010050251e-06,
"loss": 0.0451,
"step": 16225
},
{
"epoch": 10.91,
"grad_norm": 3.316819190979004,
"learning_rate": 8.417889447236182e-06,
"loss": 0.0489,
"step": 16250
},
{
"epoch": 10.93,
"grad_norm": 3.3198862075805664,
"learning_rate": 8.415376884422112e-06,
"loss": 0.0528,
"step": 16275
},
{
"epoch": 10.95,
"grad_norm": 3.4924492835998535,
"learning_rate": 8.412864321608041e-06,
"loss": 0.0492,
"step": 16300
},
{
"epoch": 10.96,
"grad_norm": 3.0983831882476807,
"learning_rate": 8.41035175879397e-06,
"loss": 0.0498,
"step": 16325
},
{
"epoch": 10.98,
"grad_norm": 3.4345991611480713,
"learning_rate": 8.4078391959799e-06,
"loss": 0.0483,
"step": 16350
},
{
"epoch": 11.0,
"grad_norm": 3.294377326965332,
"learning_rate": 8.40532663316583e-06,
"loss": 0.0485,
"step": 16375
},
{
"epoch": 11.01,
"grad_norm": 2.1766245365142822,
"learning_rate": 8.40281407035176e-06,
"loss": 0.0371,
"step": 16400
},
{
"epoch": 11.03,
"grad_norm": 2.683638334274292,
"learning_rate": 8.40030150753769e-06,
"loss": 0.0355,
"step": 16425
},
{
"epoch": 11.05,
"grad_norm": 2.8458847999572754,
"learning_rate": 8.397788944723619e-06,
"loss": 0.038,
"step": 16450
},
{
"epoch": 11.06,
"grad_norm": 2.7042036056518555,
"learning_rate": 8.395276381909548e-06,
"loss": 0.0375,
"step": 16475
},
{
"epoch": 11.08,
"grad_norm": 2.0865659713745117,
"learning_rate": 8.392763819095479e-06,
"loss": 0.039,
"step": 16500
},
{
"epoch": 11.1,
"grad_norm": 2.3241260051727295,
"learning_rate": 8.390251256281408e-06,
"loss": 0.0365,
"step": 16525
},
{
"epoch": 11.11,
"grad_norm": 2.7509355545043945,
"learning_rate": 8.387738693467338e-06,
"loss": 0.0392,
"step": 16550
},
{
"epoch": 11.13,
"grad_norm": 2.3158955574035645,
"learning_rate": 8.385226130653267e-06,
"loss": 0.0399,
"step": 16575
},
{
"epoch": 11.15,
"grad_norm": 2.368791103363037,
"learning_rate": 8.382713567839196e-06,
"loss": 0.0366,
"step": 16600
},
{
"epoch": 11.17,
"grad_norm": 3.157816171646118,
"learning_rate": 8.380201005025126e-06,
"loss": 0.0386,
"step": 16625
},
{
"epoch": 11.18,
"grad_norm": 2.391731023788452,
"learning_rate": 8.377688442211057e-06,
"loss": 0.0409,
"step": 16650
},
{
"epoch": 11.2,
"grad_norm": 2.881032943725586,
"learning_rate": 8.375175879396986e-06,
"loss": 0.0399,
"step": 16675
},
{
"epoch": 11.22,
"grad_norm": 2.8162527084350586,
"learning_rate": 8.372663316582915e-06,
"loss": 0.0386,
"step": 16700
},
{
"epoch": 11.23,
"grad_norm": 2.798832654953003,
"learning_rate": 8.370150753768845e-06,
"loss": 0.0389,
"step": 16725
},
{
"epoch": 11.25,
"grad_norm": 2.4073362350463867,
"learning_rate": 8.367638190954774e-06,
"loss": 0.038,
"step": 16750
},
{
"epoch": 11.27,
"grad_norm": 3.539222002029419,
"learning_rate": 8.365125628140705e-06,
"loss": 0.0385,
"step": 16775
},
{
"epoch": 11.28,
"grad_norm": 3.047471761703491,
"learning_rate": 8.362613065326634e-06,
"loss": 0.0386,
"step": 16800
},
{
"epoch": 11.3,
"grad_norm": 2.62675142288208,
"learning_rate": 8.360100502512563e-06,
"loss": 0.0388,
"step": 16825
},
{
"epoch": 11.32,
"grad_norm": 2.6403391361236572,
"learning_rate": 8.357587939698493e-06,
"loss": 0.041,
"step": 16850
},
{
"epoch": 11.33,
"grad_norm": 2.7048850059509277,
"learning_rate": 8.355075376884422e-06,
"loss": 0.0405,
"step": 16875
},
{
"epoch": 11.35,
"grad_norm": 2.8291220664978027,
"learning_rate": 8.352562814070353e-06,
"loss": 0.0375,
"step": 16900
},
{
"epoch": 11.37,
"grad_norm": 2.9671170711517334,
"learning_rate": 8.350050251256282e-06,
"loss": 0.0377,
"step": 16925
},
{
"epoch": 11.38,
"grad_norm": 3.0989413261413574,
"learning_rate": 8.347537688442212e-06,
"loss": 0.039,
"step": 16950
},
{
"epoch": 11.4,
"grad_norm": 2.738807201385498,
"learning_rate": 8.345025125628141e-06,
"loss": 0.0399,
"step": 16975
},
{
"epoch": 11.42,
"grad_norm": 2.9761691093444824,
"learning_rate": 8.34251256281407e-06,
"loss": 0.0402,
"step": 17000
},
{
"epoch": 11.42,
"eval_loss": 0.13800786435604095,
"eval_runtime": 531.8418,
"eval_samples_per_second": 2.606,
"eval_steps_per_second": 2.606,
"eval_wer": 27.76796750838778,
"step": 17000
},
{
"epoch": 11.43,
"grad_norm": 3.1192235946655273,
"learning_rate": 8.34e-06,
"loss": 0.0389,
"step": 17025
},
{
"epoch": 11.45,
"grad_norm": 3.019216299057007,
"learning_rate": 8.33748743718593e-06,
"loss": 0.0413,
"step": 17050
},
{
"epoch": 11.47,
"grad_norm": 2.6235885620117188,
"learning_rate": 8.33497487437186e-06,
"loss": 0.043,
"step": 17075
},
{
"epoch": 11.48,
"grad_norm": 3.3072292804718018,
"learning_rate": 8.33246231155779e-06,
"loss": 0.0384,
"step": 17100
},
{
"epoch": 11.5,
"grad_norm": 3.032578706741333,
"learning_rate": 8.32994974874372e-06,
"loss": 0.0394,
"step": 17125
},
{
"epoch": 11.52,
"grad_norm": 3.0692577362060547,
"learning_rate": 8.327437185929648e-06,
"loss": 0.0402,
"step": 17150
},
{
"epoch": 11.53,
"grad_norm": 3.113739252090454,
"learning_rate": 8.324924623115579e-06,
"loss": 0.038,
"step": 17175
},
{
"epoch": 11.55,
"grad_norm": 3.1510965824127197,
"learning_rate": 8.322412060301508e-06,
"loss": 0.0423,
"step": 17200
},
{
"epoch": 11.57,
"grad_norm": 3.110407590866089,
"learning_rate": 8.319899497487438e-06,
"loss": 0.0381,
"step": 17225
},
{
"epoch": 11.58,
"grad_norm": 2.9603676795959473,
"learning_rate": 8.317386934673367e-06,
"loss": 0.0421,
"step": 17250
},
{
"epoch": 11.6,
"grad_norm": 2.7330162525177,
"learning_rate": 8.314874371859298e-06,
"loss": 0.04,
"step": 17275
},
{
"epoch": 11.62,
"grad_norm": 3.783348798751831,
"learning_rate": 8.312361809045226e-06,
"loss": 0.0428,
"step": 17300
},
{
"epoch": 11.64,
"grad_norm": 3.3141326904296875,
"learning_rate": 8.309849246231157e-06,
"loss": 0.04,
"step": 17325
},
{
"epoch": 11.65,
"grad_norm": 3.1341404914855957,
"learning_rate": 8.307336683417086e-06,
"loss": 0.0389,
"step": 17350
},
{
"epoch": 11.67,
"grad_norm": 2.5702879428863525,
"learning_rate": 8.304824120603015e-06,
"loss": 0.0411,
"step": 17375
},
{
"epoch": 11.69,
"grad_norm": 2.7597875595092773,
"learning_rate": 8.302311557788946e-06,
"loss": 0.0387,
"step": 17400
},
{
"epoch": 11.7,
"grad_norm": 3.1602911949157715,
"learning_rate": 8.299798994974874e-06,
"loss": 0.0401,
"step": 17425
},
{
"epoch": 11.72,
"grad_norm": 2.9719858169555664,
"learning_rate": 8.297286432160805e-06,
"loss": 0.04,
"step": 17450
},
{
"epoch": 11.74,
"grad_norm": 2.7361767292022705,
"learning_rate": 8.294773869346734e-06,
"loss": 0.041,
"step": 17475
},
{
"epoch": 11.75,
"grad_norm": 2.7034785747528076,
"learning_rate": 8.292261306532664e-06,
"loss": 0.0413,
"step": 17500
},
{
"epoch": 11.77,
"grad_norm": 3.2431066036224365,
"learning_rate": 8.289748743718595e-06,
"loss": 0.0396,
"step": 17525
},
{
"epoch": 11.79,
"grad_norm": 2.7960753440856934,
"learning_rate": 8.287236180904524e-06,
"loss": 0.0406,
"step": 17550
},
{
"epoch": 11.8,
"grad_norm": 3.0115575790405273,
"learning_rate": 8.284723618090453e-06,
"loss": 0.0395,
"step": 17575
},
{
"epoch": 11.82,
"grad_norm": 2.4014508724212646,
"learning_rate": 8.282211055276383e-06,
"loss": 0.0404,
"step": 17600
},
{
"epoch": 11.84,
"grad_norm": 3.1004748344421387,
"learning_rate": 8.279698492462312e-06,
"loss": 0.0385,
"step": 17625
},
{
"epoch": 11.85,
"grad_norm": 2.5941948890686035,
"learning_rate": 8.277185929648241e-06,
"loss": 0.0398,
"step": 17650
},
{
"epoch": 11.87,
"grad_norm": 2.6056137084960938,
"learning_rate": 8.274673366834172e-06,
"loss": 0.0381,
"step": 17675
},
{
"epoch": 11.89,
"grad_norm": 2.8399932384490967,
"learning_rate": 8.2721608040201e-06,
"loss": 0.0401,
"step": 17700
},
{
"epoch": 11.9,
"grad_norm": 2.9396562576293945,
"learning_rate": 8.269648241206031e-06,
"loss": 0.0409,
"step": 17725
},
{
"epoch": 11.92,
"grad_norm": 3.1237053871154785,
"learning_rate": 8.26713567839196e-06,
"loss": 0.039,
"step": 17750
},
{
"epoch": 11.94,
"grad_norm": 3.0028700828552246,
"learning_rate": 8.26462311557789e-06,
"loss": 0.0421,
"step": 17775
},
{
"epoch": 11.95,
"grad_norm": 3.055807590484619,
"learning_rate": 8.26211055276382e-06,
"loss": 0.0405,
"step": 17800
},
{
"epoch": 11.97,
"grad_norm": 3.251986026763916,
"learning_rate": 8.25959798994975e-06,
"loss": 0.0433,
"step": 17825
},
{
"epoch": 11.99,
"grad_norm": 2.845550537109375,
"learning_rate": 8.25708542713568e-06,
"loss": 0.0385,
"step": 17850
},
{
"epoch": 12.0,
"grad_norm": 2.913346290588379,
"learning_rate": 8.254572864321609e-06,
"loss": 0.0378,
"step": 17875
},
{
"epoch": 12.02,
"grad_norm": 2.3991270065307617,
"learning_rate": 8.252060301507538e-06,
"loss": 0.0294,
"step": 17900
},
{
"epoch": 12.04,
"grad_norm": 2.4414055347442627,
"learning_rate": 8.249547738693467e-06,
"loss": 0.0306,
"step": 17925
},
{
"epoch": 12.06,
"grad_norm": 2.274725914001465,
"learning_rate": 8.247035175879398e-06,
"loss": 0.0295,
"step": 17950
},
{
"epoch": 12.07,
"grad_norm": 2.767655849456787,
"learning_rate": 8.244522613065328e-06,
"loss": 0.0307,
"step": 17975
},
{
"epoch": 12.09,
"grad_norm": 2.5598373413085938,
"learning_rate": 8.242010050251257e-06,
"loss": 0.0307,
"step": 18000
},
{
"epoch": 12.09,
"eval_loss": 0.1446864753961563,
"eval_runtime": 537.4834,
"eval_samples_per_second": 2.579,
"eval_steps_per_second": 2.579,
"eval_wer": 27.238212961327918,
"step": 18000
},
{
"epoch": 12.11,
"grad_norm": 2.654730796813965,
"learning_rate": 8.239497487437186e-06,
"loss": 0.0303,
"step": 18025
},
{
"epoch": 12.12,
"grad_norm": 2.6578266620635986,
"learning_rate": 8.236984924623116e-06,
"loss": 0.0298,
"step": 18050
},
{
"epoch": 12.14,
"grad_norm": 3.2597641944885254,
"learning_rate": 8.234472361809047e-06,
"loss": 0.0307,
"step": 18075
},
{
"epoch": 12.16,
"grad_norm": 3.1756911277770996,
"learning_rate": 8.231959798994976e-06,
"loss": 0.0303,
"step": 18100
},
{
"epoch": 12.17,
"grad_norm": 2.3517801761627197,
"learning_rate": 8.229447236180905e-06,
"loss": 0.0299,
"step": 18125
},
{
"epoch": 12.19,
"grad_norm": 2.7081449031829834,
"learning_rate": 8.226934673366835e-06,
"loss": 0.0317,
"step": 18150
},
{
"epoch": 12.21,
"grad_norm": 2.9442265033721924,
"learning_rate": 8.224422110552764e-06,
"loss": 0.0309,
"step": 18175
},
{
"epoch": 12.22,
"grad_norm": 2.202742099761963,
"learning_rate": 8.221909547738695e-06,
"loss": 0.0299,
"step": 18200
},
{
"epoch": 12.24,
"grad_norm": 2.683105230331421,
"learning_rate": 8.219396984924624e-06,
"loss": 0.0303,
"step": 18225
},
{
"epoch": 12.26,
"grad_norm": 2.4034810066223145,
"learning_rate": 8.216884422110554e-06,
"loss": 0.0319,
"step": 18250
},
{
"epoch": 12.27,
"grad_norm": 2.621290683746338,
"learning_rate": 8.214371859296483e-06,
"loss": 0.0318,
"step": 18275
},
{
"epoch": 12.29,
"grad_norm": 2.842874765396118,
"learning_rate": 8.211859296482412e-06,
"loss": 0.0332,
"step": 18300
},
{
"epoch": 12.31,
"grad_norm": 2.4797563552856445,
"learning_rate": 8.209346733668342e-06,
"loss": 0.0325,
"step": 18325
},
{
"epoch": 12.32,
"grad_norm": 2.8069446086883545,
"learning_rate": 8.206834170854273e-06,
"loss": 0.033,
"step": 18350
},
{
"epoch": 12.34,
"grad_norm": 2.9851083755493164,
"learning_rate": 8.204321608040202e-06,
"loss": 0.0321,
"step": 18375
},
{
"epoch": 12.36,
"grad_norm": 2.948084592819214,
"learning_rate": 8.201809045226131e-06,
"loss": 0.0338,
"step": 18400
},
{
"epoch": 12.37,
"grad_norm": 2.7898919582366943,
"learning_rate": 8.19929648241206e-06,
"loss": 0.0315,
"step": 18425
},
{
"epoch": 12.39,
"grad_norm": 2.366434097290039,
"learning_rate": 8.19678391959799e-06,
"loss": 0.032,
"step": 18450
},
{
"epoch": 12.41,
"grad_norm": 2.9562463760375977,
"learning_rate": 8.194271356783921e-06,
"loss": 0.0334,
"step": 18475
},
{
"epoch": 12.42,
"grad_norm": 2.5975656509399414,
"learning_rate": 8.19175879396985e-06,
"loss": 0.0331,
"step": 18500
},
{
"epoch": 12.44,
"grad_norm": 2.8374183177948,
"learning_rate": 8.18924623115578e-06,
"loss": 0.0318,
"step": 18525
},
{
"epoch": 12.46,
"grad_norm": 2.839860439300537,
"learning_rate": 8.186733668341709e-06,
"loss": 0.0324,
"step": 18550
},
{
"epoch": 12.47,
"grad_norm": 2.800180196762085,
"learning_rate": 8.184221105527638e-06,
"loss": 0.0309,
"step": 18575
},
{
"epoch": 12.49,
"grad_norm": 2.644583225250244,
"learning_rate": 8.18170854271357e-06,
"loss": 0.0331,
"step": 18600
},
{
"epoch": 12.51,
"grad_norm": 3.0358402729034424,
"learning_rate": 8.179195979899498e-06,
"loss": 0.0327,
"step": 18625
},
{
"epoch": 12.53,
"grad_norm": 2.807608127593994,
"learning_rate": 8.176683417085428e-06,
"loss": 0.032,
"step": 18650
},
{
"epoch": 12.54,
"grad_norm": 3.115736961364746,
"learning_rate": 8.174170854271357e-06,
"loss": 0.034,
"step": 18675
},
{
"epoch": 12.56,
"grad_norm": 2.563960313796997,
"learning_rate": 8.171658291457286e-06,
"loss": 0.0325,
"step": 18700
},
{
"epoch": 12.58,
"grad_norm": 2.6218457221984863,
"learning_rate": 8.169145728643216e-06,
"loss": 0.0312,
"step": 18725
},
{
"epoch": 12.59,
"grad_norm": 2.6230452060699463,
"learning_rate": 8.166633165829147e-06,
"loss": 0.0318,
"step": 18750
},
{
"epoch": 12.61,
"grad_norm": 3.0028395652770996,
"learning_rate": 8.164120603015076e-06,
"loss": 0.0339,
"step": 18775
},
{
"epoch": 12.63,
"grad_norm": 2.810173273086548,
"learning_rate": 8.161608040201005e-06,
"loss": 0.0337,
"step": 18800
},
{
"epoch": 12.64,
"grad_norm": 2.7154364585876465,
"learning_rate": 8.159095477386936e-06,
"loss": 0.0315,
"step": 18825
},
{
"epoch": 12.66,
"grad_norm": 2.9645156860351562,
"learning_rate": 8.156582914572864e-06,
"loss": 0.0341,
"step": 18850
},
{
"epoch": 12.68,
"grad_norm": 2.558562755584717,
"learning_rate": 8.154070351758795e-06,
"loss": 0.0321,
"step": 18875
},
{
"epoch": 12.69,
"grad_norm": 3.045975923538208,
"learning_rate": 8.151557788944724e-06,
"loss": 0.0328,
"step": 18900
},
{
"epoch": 12.71,
"grad_norm": 2.605736494064331,
"learning_rate": 8.149045226130654e-06,
"loss": 0.0338,
"step": 18925
},
{
"epoch": 12.73,
"grad_norm": 2.6503992080688477,
"learning_rate": 8.146532663316583e-06,
"loss": 0.0349,
"step": 18950
},
{
"epoch": 12.74,
"grad_norm": 2.7485363483428955,
"learning_rate": 8.144020100502512e-06,
"loss": 0.0331,
"step": 18975
},
{
"epoch": 12.76,
"grad_norm": 3.0558133125305176,
"learning_rate": 8.141507537688443e-06,
"loss": 0.0331,
"step": 19000
},
{
"epoch": 12.76,
"eval_loss": 0.15126191079616547,
"eval_runtime": 542.0176,
"eval_samples_per_second": 2.557,
"eval_steps_per_second": 2.557,
"eval_wer": 28.129966448878683,
"step": 19000
},
{
"epoch": 12.78,
"grad_norm": 3.117704391479492,
"learning_rate": 8.138994974874373e-06,
"loss": 0.0336,
"step": 19025
},
{
"epoch": 12.79,
"grad_norm": 2.7645487785339355,
"learning_rate": 8.136482412060302e-06,
"loss": 0.0324,
"step": 19050
},
{
"epoch": 12.81,
"grad_norm": 2.742771625518799,
"learning_rate": 8.133969849246231e-06,
"loss": 0.0331,
"step": 19075
},
{
"epoch": 12.83,
"grad_norm": 2.8407609462738037,
"learning_rate": 8.131457286432162e-06,
"loss": 0.0317,
"step": 19100
},
{
"epoch": 12.84,
"grad_norm": 2.5845396518707275,
"learning_rate": 8.12894472361809e-06,
"loss": 0.0335,
"step": 19125
},
{
"epoch": 12.86,
"grad_norm": 2.8739688396453857,
"learning_rate": 8.126432160804021e-06,
"loss": 0.0333,
"step": 19150
},
{
"epoch": 12.88,
"grad_norm": 3.1160261631011963,
"learning_rate": 8.12391959798995e-06,
"loss": 0.033,
"step": 19175
},
{
"epoch": 12.89,
"grad_norm": 2.978895902633667,
"learning_rate": 8.12140703517588e-06,
"loss": 0.0358,
"step": 19200
},
{
"epoch": 12.91,
"grad_norm": 3.0800576210021973,
"learning_rate": 8.11889447236181e-06,
"loss": 0.0335,
"step": 19225
},
{
"epoch": 12.93,
"grad_norm": 2.4890170097351074,
"learning_rate": 8.11638190954774e-06,
"loss": 0.034,
"step": 19250
},
{
"epoch": 12.94,
"grad_norm": 2.8995964527130127,
"learning_rate": 8.11386934673367e-06,
"loss": 0.0342,
"step": 19275
},
{
"epoch": 12.96,
"grad_norm": 2.8822238445281982,
"learning_rate": 8.111356783919599e-06,
"loss": 0.0338,
"step": 19300
},
{
"epoch": 12.98,
"grad_norm": 2.3847439289093018,
"learning_rate": 8.108844221105528e-06,
"loss": 0.0345,
"step": 19325
},
{
"epoch": 13.0,
"grad_norm": 2.5077168941497803,
"learning_rate": 8.106331658291457e-06,
"loss": 0.0323,
"step": 19350
},
{
"epoch": 13.01,
"grad_norm": 2.0860869884490967,
"learning_rate": 8.103819095477388e-06,
"loss": 0.0256,
"step": 19375
},
{
"epoch": 13.03,
"grad_norm": 2.4186856746673584,
"learning_rate": 8.101306532663318e-06,
"loss": 0.025,
"step": 19400
},
{
"epoch": 13.05,
"grad_norm": 2.169545888900757,
"learning_rate": 8.098793969849247e-06,
"loss": 0.024,
"step": 19425
},
{
"epoch": 13.06,
"grad_norm": 2.250295877456665,
"learning_rate": 8.096281407035176e-06,
"loss": 0.0227,
"step": 19450
},
{
"epoch": 13.08,
"grad_norm": 2.8207223415374756,
"learning_rate": 8.093768844221106e-06,
"loss": 0.0254,
"step": 19475
},
{
"epoch": 13.1,
"grad_norm": 2.4845900535583496,
"learning_rate": 8.091256281407037e-06,
"loss": 0.0251,
"step": 19500
},
{
"epoch": 13.11,
"grad_norm": 2.9678895473480225,
"learning_rate": 8.088743718592966e-06,
"loss": 0.0255,
"step": 19525
},
{
"epoch": 13.13,
"grad_norm": 3.0639657974243164,
"learning_rate": 8.086231155778895e-06,
"loss": 0.0266,
"step": 19550
},
{
"epoch": 13.15,
"grad_norm": 2.5778753757476807,
"learning_rate": 8.083718592964825e-06,
"loss": 0.0258,
"step": 19575
},
{
"epoch": 13.16,
"grad_norm": 2.3090131282806396,
"learning_rate": 8.081206030150754e-06,
"loss": 0.0234,
"step": 19600
},
{
"epoch": 13.18,
"grad_norm": 2.645989418029785,
"learning_rate": 8.078693467336685e-06,
"loss": 0.0243,
"step": 19625
},
{
"epoch": 13.2,
"grad_norm": 2.4817280769348145,
"learning_rate": 8.076180904522614e-06,
"loss": 0.0274,
"step": 19650
},
{
"epoch": 13.21,
"grad_norm": 2.17031192779541,
"learning_rate": 8.073668341708544e-06,
"loss": 0.024,
"step": 19675
},
{
"epoch": 13.23,
"grad_norm": 2.587280035018921,
"learning_rate": 8.071155778894473e-06,
"loss": 0.0258,
"step": 19700
},
{
"epoch": 13.25,
"grad_norm": 2.3844306468963623,
"learning_rate": 8.068643216080402e-06,
"loss": 0.0264,
"step": 19725
},
{
"epoch": 13.26,
"grad_norm": 2.440300226211548,
"learning_rate": 8.066130653266332e-06,
"loss": 0.0259,
"step": 19750
},
{
"epoch": 13.28,
"grad_norm": 2.120274543762207,
"learning_rate": 8.063618090452263e-06,
"loss": 0.0253,
"step": 19775
},
{
"epoch": 13.3,
"grad_norm": 2.412203073501587,
"learning_rate": 8.061105527638192e-06,
"loss": 0.0256,
"step": 19800
},
{
"epoch": 13.31,
"grad_norm": 2.3215441703796387,
"learning_rate": 8.058592964824121e-06,
"loss": 0.0247,
"step": 19825
},
{
"epoch": 13.33,
"grad_norm": 2.0729939937591553,
"learning_rate": 8.05608040201005e-06,
"loss": 0.0248,
"step": 19850
},
{
"epoch": 13.35,
"grad_norm": 2.622880697250366,
"learning_rate": 8.05356783919598e-06,
"loss": 0.0271,
"step": 19875
},
{
"epoch": 13.36,
"grad_norm": 2.5304481983184814,
"learning_rate": 8.051055276381911e-06,
"loss": 0.0255,
"step": 19900
},
{
"epoch": 13.38,
"grad_norm": 2.6204922199249268,
"learning_rate": 8.04854271356784e-06,
"loss": 0.0261,
"step": 19925
},
{
"epoch": 13.4,
"grad_norm": 2.284783363342285,
"learning_rate": 8.04603015075377e-06,
"loss": 0.0257,
"step": 19950
},
{
"epoch": 13.42,
"grad_norm": 3.0914671421051025,
"learning_rate": 8.043517587939699e-06,
"loss": 0.027,
"step": 19975
},
{
"epoch": 13.43,
"grad_norm": 2.8612654209136963,
"learning_rate": 8.041005025125628e-06,
"loss": 0.0258,
"step": 20000
},
{
"epoch": 13.43,
"eval_loss": 0.15857619047164917,
"eval_runtime": 534.77,
"eval_samples_per_second": 2.592,
"eval_steps_per_second": 2.592,
"eval_wer": 28.809818117605506,
"step": 20000
},
{
"epoch": 13.45,
"grad_norm": 3.074786424636841,
"learning_rate": 8.03849246231156e-06,
"loss": 0.026,
"step": 20025
},
{
"epoch": 13.47,
"grad_norm": 2.40915584564209,
"learning_rate": 8.035979899497489e-06,
"loss": 0.029,
"step": 20050
},
{
"epoch": 13.48,
"grad_norm": 2.7619211673736572,
"learning_rate": 8.033467336683418e-06,
"loss": 0.0261,
"step": 20075
},
{
"epoch": 13.5,
"grad_norm": 2.8454036712646484,
"learning_rate": 8.030954773869347e-06,
"loss": 0.0257,
"step": 20100
},
{
"epoch": 13.52,
"grad_norm": 2.519239664077759,
"learning_rate": 8.028442211055277e-06,
"loss": 0.0255,
"step": 20125
},
{
"epoch": 13.53,
"grad_norm": 2.798295736312866,
"learning_rate": 8.025929648241206e-06,
"loss": 0.0256,
"step": 20150
},
{
"epoch": 13.55,
"grad_norm": 2.658249855041504,
"learning_rate": 8.023417085427137e-06,
"loss": 0.0252,
"step": 20175
},
{
"epoch": 13.57,
"grad_norm": 2.55195689201355,
"learning_rate": 8.020904522613066e-06,
"loss": 0.0281,
"step": 20200
},
{
"epoch": 13.58,
"grad_norm": 2.282550096511841,
"learning_rate": 8.018391959798996e-06,
"loss": 0.0262,
"step": 20225
},
{
"epoch": 13.6,
"grad_norm": 2.6260697841644287,
"learning_rate": 8.015879396984927e-06,
"loss": 0.0249,
"step": 20250
},
{
"epoch": 13.62,
"grad_norm": 2.61671781539917,
"learning_rate": 8.013366834170854e-06,
"loss": 0.0276,
"step": 20275
},
{
"epoch": 13.63,
"grad_norm": 2.5859358310699463,
"learning_rate": 8.010854271356785e-06,
"loss": 0.0265,
"step": 20300
},
{
"epoch": 13.65,
"grad_norm": 2.6100573539733887,
"learning_rate": 8.008341708542714e-06,
"loss": 0.0258,
"step": 20325
},
{
"epoch": 13.67,
"grad_norm": 2.5182266235351562,
"learning_rate": 8.005829145728644e-06,
"loss": 0.028,
"step": 20350
},
{
"epoch": 13.68,
"grad_norm": 3.105220317840576,
"learning_rate": 8.003316582914573e-06,
"loss": 0.027,
"step": 20375
},
{
"epoch": 13.7,
"grad_norm": 2.7697339057922363,
"learning_rate": 8.000804020100502e-06,
"loss": 0.0274,
"step": 20400
},
{
"epoch": 13.72,
"grad_norm": 2.74824857711792,
"learning_rate": 7.998291457286432e-06,
"loss": 0.0264,
"step": 20425
},
{
"epoch": 13.73,
"grad_norm": 2.1460442543029785,
"learning_rate": 7.995778894472363e-06,
"loss": 0.0266,
"step": 20450
},
{
"epoch": 13.75,
"grad_norm": 2.700098991394043,
"learning_rate": 7.993266331658292e-06,
"loss": 0.0271,
"step": 20475
},
{
"epoch": 13.77,
"grad_norm": 3.0646328926086426,
"learning_rate": 7.990753768844221e-06,
"loss": 0.0273,
"step": 20500
},
{
"epoch": 13.78,
"grad_norm": 2.4817585945129395,
"learning_rate": 7.988241206030152e-06,
"loss": 0.0267,
"step": 20525
},
{
"epoch": 13.8,
"grad_norm": 2.383892059326172,
"learning_rate": 7.98572864321608e-06,
"loss": 0.0281,
"step": 20550
},
{
"epoch": 13.82,
"grad_norm": 2.6712028980255127,
"learning_rate": 7.983216080402011e-06,
"loss": 0.0262,
"step": 20575
},
{
"epoch": 13.83,
"grad_norm": 2.8054888248443604,
"learning_rate": 7.98070351758794e-06,
"loss": 0.0277,
"step": 20600
},
{
"epoch": 13.85,
"grad_norm": 2.520451545715332,
"learning_rate": 7.97819095477387e-06,
"loss": 0.0256,
"step": 20625
},
{
"epoch": 13.87,
"grad_norm": 2.6715471744537354,
"learning_rate": 7.975678391959799e-06,
"loss": 0.0271,
"step": 20650
},
{
"epoch": 13.89,
"grad_norm": 2.936898946762085,
"learning_rate": 7.973165829145728e-06,
"loss": 0.0271,
"step": 20675
},
{
"epoch": 13.9,
"grad_norm": 2.5876598358154297,
"learning_rate": 7.97065326633166e-06,
"loss": 0.0254,
"step": 20700
},
{
"epoch": 13.92,
"grad_norm": 2.576573133468628,
"learning_rate": 7.968140703517589e-06,
"loss": 0.0268,
"step": 20725
},
{
"epoch": 13.94,
"grad_norm": 2.962134838104248,
"learning_rate": 7.965628140703518e-06,
"loss": 0.028,
"step": 20750
},
{
"epoch": 13.95,
"grad_norm": 2.4978857040405273,
"learning_rate": 7.963115577889447e-06,
"loss": 0.0268,
"step": 20775
},
{
"epoch": 13.97,
"grad_norm": 2.7507359981536865,
"learning_rate": 7.960603015075378e-06,
"loss": 0.0264,
"step": 20800
},
{
"epoch": 13.99,
"grad_norm": 2.290602922439575,
"learning_rate": 7.958090452261306e-06,
"loss": 0.0268,
"step": 20825
},
{
"epoch": 14.0,
"grad_norm": 1.895709753036499,
"learning_rate": 7.955577889447237e-06,
"loss": 0.0267,
"step": 20850
},
{
"epoch": 14.02,
"grad_norm": 2.577284097671509,
"learning_rate": 7.953065326633166e-06,
"loss": 0.02,
"step": 20875
},
{
"epoch": 14.04,
"grad_norm": 2.139061450958252,
"learning_rate": 7.950552763819096e-06,
"loss": 0.0182,
"step": 20900
},
{
"epoch": 14.05,
"grad_norm": 2.31142520904541,
"learning_rate": 7.948040201005027e-06,
"loss": 0.0189,
"step": 20925
},
{
"epoch": 14.07,
"grad_norm": 2.4628167152404785,
"learning_rate": 7.945527638190954e-06,
"loss": 0.0191,
"step": 20950
},
{
"epoch": 14.09,
"grad_norm": 2.2550642490386963,
"learning_rate": 7.943015075376885e-06,
"loss": 0.0205,
"step": 20975
},
{
"epoch": 14.1,
"grad_norm": 2.5067131519317627,
"learning_rate": 7.940502512562815e-06,
"loss": 0.0193,
"step": 21000
},
{
"epoch": 14.1,
"eval_loss": 0.16441361606121063,
"eval_runtime": 532.0683,
"eval_samples_per_second": 2.605,
"eval_steps_per_second": 2.605,
"eval_wer": 28.28006357054565,
"step": 21000
},
{
"epoch": 14.12,
"grad_norm": 2.0792436599731445,
"learning_rate": 7.937989949748744e-06,
"loss": 0.0202,
"step": 21025
},
{
"epoch": 14.14,
"grad_norm": 2.0055572986602783,
"learning_rate": 7.935477386934673e-06,
"loss": 0.02,
"step": 21050
},
{
"epoch": 14.15,
"grad_norm": 2.557342052459717,
"learning_rate": 7.932964824120604e-06,
"loss": 0.0202,
"step": 21075
},
{
"epoch": 14.17,
"grad_norm": 2.351605176925659,
"learning_rate": 7.930452261306534e-06,
"loss": 0.0205,
"step": 21100
},
{
"epoch": 14.19,
"grad_norm": 2.4522876739501953,
"learning_rate": 7.927939698492463e-06,
"loss": 0.0197,
"step": 21125
},
{
"epoch": 14.2,
"grad_norm": 1.9259110689163208,
"learning_rate": 7.925527638190955e-06,
"loss": 0.019,
"step": 21150
},
{
"epoch": 14.22,
"grad_norm": 2.6869237422943115,
"learning_rate": 7.923015075376886e-06,
"loss": 0.0191,
"step": 21175
},
{
"epoch": 14.24,
"grad_norm": 2.1610636711120605,
"learning_rate": 7.920502512562815e-06,
"loss": 0.0199,
"step": 21200
},
{
"epoch": 14.25,
"grad_norm": 2.3419833183288574,
"learning_rate": 7.917989949748744e-06,
"loss": 0.0205,
"step": 21225
},
{
"epoch": 14.27,
"grad_norm": 2.655822277069092,
"learning_rate": 7.915477386934674e-06,
"loss": 0.0211,
"step": 21250
},
{
"epoch": 14.29,
"grad_norm": 2.3895249366760254,
"learning_rate": 7.912964824120603e-06,
"loss": 0.02,
"step": 21275
},
{
"epoch": 14.3,
"grad_norm": 2.626079559326172,
"learning_rate": 7.910452261306534e-06,
"loss": 0.0204,
"step": 21300
},
{
"epoch": 14.32,
"grad_norm": 2.4946000576019287,
"learning_rate": 7.907939698492463e-06,
"loss": 0.0211,
"step": 21325
},
{
"epoch": 14.34,
"grad_norm": 2.2254092693328857,
"learning_rate": 7.905427135678393e-06,
"loss": 0.0209,
"step": 21350
},
{
"epoch": 14.36,
"grad_norm": 2.813023328781128,
"learning_rate": 7.902914572864322e-06,
"loss": 0.0205,
"step": 21375
},
{
"epoch": 14.37,
"grad_norm": 2.3448939323425293,
"learning_rate": 7.900402010050253e-06,
"loss": 0.0204,
"step": 21400
},
{
"epoch": 14.39,
"grad_norm": 2.1861133575439453,
"learning_rate": 7.89788944723618e-06,
"loss": 0.0211,
"step": 21425
},
{
"epoch": 14.41,
"grad_norm": 2.1422207355499268,
"learning_rate": 7.895376884422111e-06,
"loss": 0.0224,
"step": 21450
},
{
"epoch": 14.42,
"grad_norm": 2.713761329650879,
"learning_rate": 7.89286432160804e-06,
"loss": 0.02,
"step": 21475
},
{
"epoch": 14.44,
"grad_norm": 2.430680274963379,
"learning_rate": 7.89035175879397e-06,
"loss": 0.0218,
"step": 21500
},
{
"epoch": 14.46,
"grad_norm": 2.974393606185913,
"learning_rate": 7.887839195979901e-06,
"loss": 0.0197,
"step": 21525
},
{
"epoch": 14.47,
"grad_norm": 2.530994415283203,
"learning_rate": 7.885326633165829e-06,
"loss": 0.0221,
"step": 21550
},
{
"epoch": 14.49,
"grad_norm": 2.5071282386779785,
"learning_rate": 7.88281407035176e-06,
"loss": 0.0214,
"step": 21575
},
{
"epoch": 14.51,
"grad_norm": 2.2111854553222656,
"learning_rate": 7.880301507537689e-06,
"loss": 0.0208,
"step": 21600
},
{
"epoch": 14.52,
"grad_norm": 2.194091320037842,
"learning_rate": 7.877788944723618e-06,
"loss": 0.0203,
"step": 21625
},
{
"epoch": 14.54,
"grad_norm": 2.2206263542175293,
"learning_rate": 7.875276381909548e-06,
"loss": 0.0221,
"step": 21650
},
{
"epoch": 14.56,
"grad_norm": 2.425065279006958,
"learning_rate": 7.872763819095479e-06,
"loss": 0.0211,
"step": 21675
},
{
"epoch": 14.57,
"grad_norm": 2.6152865886688232,
"learning_rate": 7.870251256281408e-06,
"loss": 0.0207,
"step": 21700
},
{
"epoch": 14.59,
"grad_norm": 2.2612714767456055,
"learning_rate": 7.867738693467337e-06,
"loss": 0.0209,
"step": 21725
},
{
"epoch": 14.61,
"grad_norm": 2.1470086574554443,
"learning_rate": 7.865226130653267e-06,
"loss": 0.021,
"step": 21750
},
{
"epoch": 14.62,
"grad_norm": 2.484851598739624,
"learning_rate": 7.862713567839196e-06,
"loss": 0.02,
"step": 21775
},
{
"epoch": 14.64,
"grad_norm": 2.4667041301727295,
"learning_rate": 7.860201005025127e-06,
"loss": 0.0206,
"step": 21800
},
{
"epoch": 14.66,
"grad_norm": 2.9903693199157715,
"learning_rate": 7.857688442211055e-06,
"loss": 0.0219,
"step": 21825
},
{
"epoch": 14.67,
"grad_norm": 2.6542530059814453,
"learning_rate": 7.855175879396986e-06,
"loss": 0.0213,
"step": 21850
},
{
"epoch": 14.69,
"grad_norm": 2.333191394805908,
"learning_rate": 7.852663316582915e-06,
"loss": 0.0214,
"step": 21875
},
{
"epoch": 14.71,
"grad_norm": 2.71769380569458,
"learning_rate": 7.850150753768844e-06,
"loss": 0.0215,
"step": 21900
},
{
"epoch": 14.72,
"grad_norm": 2.4674861431121826,
"learning_rate": 7.847638190954775e-06,
"loss": 0.0211,
"step": 21925
},
{
"epoch": 14.74,
"grad_norm": 2.931941270828247,
"learning_rate": 7.845125628140705e-06,
"loss": 0.0244,
"step": 21950
},
{
"epoch": 14.76,
"grad_norm": 2.738786458969116,
"learning_rate": 7.842613065326634e-06,
"loss": 0.0218,
"step": 21975
},
{
"epoch": 14.78,
"grad_norm": 2.375138521194458,
"learning_rate": 7.840100502512563e-06,
"loss": 0.0219,
"step": 22000
},
{
"epoch": 14.78,
"eval_loss": 0.16828645765781403,
"eval_runtime": 539.6518,
"eval_samples_per_second": 2.568,
"eval_steps_per_second": 2.568,
"eval_wer": 28.11230796397669,
"step": 22000
}
],
"logging_steps": 25,
"max_steps": 100000,
"num_input_tokens_seen": 0,
"num_train_epochs": 68,
"save_steps": 1000,
"total_flos": 3.465787561869312e+19,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}