jes3275's picture
Added checkpoints
ab70710
Invalid JSON: Unexpected token 'N', ..."ad_norm": NaN, "... is not valid JSON
{
"best_metric": 22.172258734002074,
"best_model_checkpoint": "results/whisper-base/maithili/checkpoint-56000",
"epoch": 33.249370277078086,
"eval_steps": 1000,
"global_step": 66000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"grad_norm": 22.37806510925293,
"learning_rate": 4.6000000000000004e-07,
"loss": 2.3423,
"step": 25
},
{
"epoch": 0.03,
"grad_norm": 14.617953300476074,
"learning_rate": 9.600000000000001e-07,
"loss": 2.0051,
"step": 50
},
{
"epoch": 0.04,
"grad_norm": 8.992124557495117,
"learning_rate": 1.46e-06,
"loss": 1.4891,
"step": 75
},
{
"epoch": 0.05,
"grad_norm": 6.06942081451416,
"learning_rate": 1.9600000000000003e-06,
"loss": 1.1894,
"step": 100
},
{
"epoch": 0.06,
"grad_norm": 5.8592963218688965,
"learning_rate": 2.46e-06,
"loss": 0.9395,
"step": 125
},
{
"epoch": 0.08,
"grad_norm": 4.838193893432617,
"learning_rate": 2.96e-06,
"loss": 0.8116,
"step": 150
},
{
"epoch": 0.09,
"grad_norm": 4.628206729888916,
"learning_rate": 3.46e-06,
"loss": 0.7077,
"step": 175
},
{
"epoch": 0.1,
"grad_norm": 4.1971611976623535,
"learning_rate": 3.96e-06,
"loss": 0.654,
"step": 200
},
{
"epoch": 0.11,
"grad_norm": 4.9267659187316895,
"learning_rate": 4.4600000000000005e-06,
"loss": 0.5982,
"step": 225
},
{
"epoch": 0.13,
"grad_norm": 4.413062572479248,
"learning_rate": 4.960000000000001e-06,
"loss": 0.5504,
"step": 250
},
{
"epoch": 0.14,
"grad_norm": 4.560253620147705,
"learning_rate": 5.460000000000001e-06,
"loss": 0.5413,
"step": 275
},
{
"epoch": 0.15,
"grad_norm": 3.9538028240203857,
"learning_rate": 5.9600000000000005e-06,
"loss": 0.5037,
"step": 300
},
{
"epoch": 0.16,
"grad_norm": 3.8549587726593018,
"learning_rate": 6.460000000000001e-06,
"loss": 0.4715,
"step": 325
},
{
"epoch": 0.18,
"grad_norm": 3.935253620147705,
"learning_rate": 6.96e-06,
"loss": 0.4453,
"step": 350
},
{
"epoch": 0.19,
"grad_norm": 4.225427627563477,
"learning_rate": 7.4600000000000006e-06,
"loss": 0.4322,
"step": 375
},
{
"epoch": 0.2,
"grad_norm": 4.021173477172852,
"learning_rate": 7.960000000000002e-06,
"loss": 0.4302,
"step": 400
},
{
"epoch": 0.21,
"grad_norm": 4.08791446685791,
"learning_rate": 8.46e-06,
"loss": 0.4109,
"step": 425
},
{
"epoch": 0.23,
"grad_norm": 4.559580326080322,
"learning_rate": 8.96e-06,
"loss": 0.391,
"step": 450
},
{
"epoch": 0.24,
"grad_norm": 4.0354790687561035,
"learning_rate": 9.460000000000001e-06,
"loss": 0.3832,
"step": 475
},
{
"epoch": 0.25,
"grad_norm": 4.030752182006836,
"learning_rate": 9.960000000000001e-06,
"loss": 0.373,
"step": 500
},
{
"epoch": 0.26,
"grad_norm": 4.1658034324646,
"learning_rate": 9.997688442211056e-06,
"loss": 0.3686,
"step": 525
},
{
"epoch": 0.28,
"grad_norm": 4.24349308013916,
"learning_rate": 9.995175879396986e-06,
"loss": 0.3444,
"step": 550
},
{
"epoch": 0.29,
"grad_norm": 4.5939555168151855,
"learning_rate": 9.992663316582915e-06,
"loss": 0.3418,
"step": 575
},
{
"epoch": 0.3,
"grad_norm": 3.8061537742614746,
"learning_rate": 9.990150753768844e-06,
"loss": 0.3393,
"step": 600
},
{
"epoch": 0.31,
"grad_norm": 4.4001922607421875,
"learning_rate": 9.987638190954775e-06,
"loss": 0.3248,
"step": 625
},
{
"epoch": 0.33,
"grad_norm": 3.4623498916625977,
"learning_rate": 9.985125628140705e-06,
"loss": 0.3191,
"step": 650
},
{
"epoch": 0.34,
"grad_norm": 3.7342793941497803,
"learning_rate": 9.982613065326634e-06,
"loss": 0.3123,
"step": 675
},
{
"epoch": 0.35,
"grad_norm": 4.150409698486328,
"learning_rate": 9.980100502512565e-06,
"loss": 0.3102,
"step": 700
},
{
"epoch": 0.37,
"grad_norm": 3.992783308029175,
"learning_rate": 9.977587939698493e-06,
"loss": 0.3089,
"step": 725
},
{
"epoch": 0.38,
"grad_norm": 3.3655505180358887,
"learning_rate": 9.975075376884424e-06,
"loss": 0.3045,
"step": 750
},
{
"epoch": 0.39,
"grad_norm": 3.5667428970336914,
"learning_rate": 9.972562814070353e-06,
"loss": 0.2959,
"step": 775
},
{
"epoch": 0.4,
"grad_norm": 4.166114807128906,
"learning_rate": 9.970050251256282e-06,
"loss": 0.2918,
"step": 800
},
{
"epoch": 0.42,
"grad_norm": 3.6910595893859863,
"learning_rate": 9.967537688442212e-06,
"loss": 0.2931,
"step": 825
},
{
"epoch": 0.43,
"grad_norm": 3.8466129302978516,
"learning_rate": 9.965025125628141e-06,
"loss": 0.2741,
"step": 850
},
{
"epoch": 0.44,
"grad_norm": 3.8253138065338135,
"learning_rate": 9.96251256281407e-06,
"loss": 0.2733,
"step": 875
},
{
"epoch": 0.45,
"grad_norm": 3.8832590579986572,
"learning_rate": 9.960000000000001e-06,
"loss": 0.2754,
"step": 900
},
{
"epoch": 0.47,
"grad_norm": 3.8620283603668213,
"learning_rate": 9.95748743718593e-06,
"loss": 0.2747,
"step": 925
},
{
"epoch": 0.48,
"grad_norm": 3.543933629989624,
"learning_rate": 9.95497487437186e-06,
"loss": 0.2651,
"step": 950
},
{
"epoch": 0.49,
"grad_norm": 3.5218117237091064,
"learning_rate": 9.952462311557791e-06,
"loss": 0.2769,
"step": 975
},
{
"epoch": 0.5,
"grad_norm": 3.711573600769043,
"learning_rate": 9.949949748743718e-06,
"loss": 0.2647,
"step": 1000
},
{
"epoch": 0.5,
"eval_loss": 0.23160116374492645,
"eval_runtime": 649.8053,
"eval_samples_per_second": 2.168,
"eval_steps_per_second": 2.168,
"eval_wer": 35.17813905223106,
"step": 1000
},
{
"epoch": 0.52,
"grad_norm": 3.524477005004883,
"learning_rate": 9.94743718592965e-06,
"loss": 0.2592,
"step": 1025
},
{
"epoch": 0.53,
"grad_norm": 3.2609434127807617,
"learning_rate": 9.944924623115579e-06,
"loss": 0.2532,
"step": 1050
},
{
"epoch": 0.54,
"grad_norm": 3.6249492168426514,
"learning_rate": 9.942412060301508e-06,
"loss": 0.2562,
"step": 1075
},
{
"epoch": 0.55,
"grad_norm": 3.7930243015289307,
"learning_rate": 9.93989949748744e-06,
"loss": 0.2592,
"step": 1100
},
{
"epoch": 0.57,
"grad_norm": 4.007348537445068,
"learning_rate": 9.937386934673367e-06,
"loss": 0.2517,
"step": 1125
},
{
"epoch": 0.58,
"grad_norm": 3.8038876056671143,
"learning_rate": 9.934874371859298e-06,
"loss": 0.2409,
"step": 1150
},
{
"epoch": 0.59,
"grad_norm": 3.6129648685455322,
"learning_rate": 9.932361809045227e-06,
"loss": 0.2468,
"step": 1175
},
{
"epoch": 0.6,
"grad_norm": 3.1642048358917236,
"learning_rate": 9.929849246231156e-06,
"loss": 0.2483,
"step": 1200
},
{
"epoch": 0.62,
"grad_norm": 3.557328939437866,
"learning_rate": 9.927336683417086e-06,
"loss": 0.2388,
"step": 1225
},
{
"epoch": 0.63,
"grad_norm": 3.287649154663086,
"learning_rate": 9.924824120603017e-06,
"loss": 0.2472,
"step": 1250
},
{
"epoch": 0.64,
"grad_norm": 2.858637809753418,
"learning_rate": 9.922311557788944e-06,
"loss": 0.2377,
"step": 1275
},
{
"epoch": 0.65,
"grad_norm": 3.016263484954834,
"learning_rate": 9.919798994974875e-06,
"loss": 0.2376,
"step": 1300
},
{
"epoch": 0.67,
"grad_norm": 3.3102822303771973,
"learning_rate": 9.917286432160805e-06,
"loss": 0.2345,
"step": 1325
},
{
"epoch": 0.68,
"grad_norm": 3.1757044792175293,
"learning_rate": 9.914773869346734e-06,
"loss": 0.2315,
"step": 1350
},
{
"epoch": 0.69,
"grad_norm": 3.720780849456787,
"learning_rate": 9.912261306532665e-06,
"loss": 0.2362,
"step": 1375
},
{
"epoch": 0.71,
"grad_norm": 3.6007885932922363,
"learning_rate": 9.909748743718593e-06,
"loss": 0.2306,
"step": 1400
},
{
"epoch": 0.72,
"grad_norm": 3.1506540775299072,
"learning_rate": 9.907236180904524e-06,
"loss": 0.2266,
"step": 1425
},
{
"epoch": 0.73,
"grad_norm": 3.562501907348633,
"learning_rate": 9.904723618090453e-06,
"loss": 0.2244,
"step": 1450
},
{
"epoch": 0.74,
"grad_norm": 3.256913900375366,
"learning_rate": 9.902211055276382e-06,
"loss": 0.2313,
"step": 1475
},
{
"epoch": 0.76,
"grad_norm": 3.431098222732544,
"learning_rate": 9.899698492462312e-06,
"loss": 0.2194,
"step": 1500
},
{
"epoch": 0.77,
"grad_norm": 3.5509750843048096,
"learning_rate": 9.897185929648243e-06,
"loss": 0.2253,
"step": 1525
},
{
"epoch": 0.78,
"grad_norm": 3.666264295578003,
"learning_rate": 9.894673366834172e-06,
"loss": 0.2214,
"step": 1550
},
{
"epoch": 0.79,
"grad_norm": 3.291407585144043,
"learning_rate": 9.892160804020101e-06,
"loss": 0.2139,
"step": 1575
},
{
"epoch": 0.81,
"grad_norm": 3.3444149494171143,
"learning_rate": 9.88964824120603e-06,
"loss": 0.2172,
"step": 1600
},
{
"epoch": 0.82,
"grad_norm": 3.133206367492676,
"learning_rate": 9.88713567839196e-06,
"loss": 0.2199,
"step": 1625
},
{
"epoch": 0.83,
"grad_norm": 3.0405352115631104,
"learning_rate": 9.884623115577891e-06,
"loss": 0.2103,
"step": 1650
},
{
"epoch": 0.84,
"grad_norm": 3.157695770263672,
"learning_rate": 9.882110552763819e-06,
"loss": 0.2155,
"step": 1675
},
{
"epoch": 0.86,
"grad_norm": 3.13808274269104,
"learning_rate": 9.87959798994975e-06,
"loss": 0.215,
"step": 1700
},
{
"epoch": 0.87,
"grad_norm": 3.28182315826416,
"learning_rate": 9.877085427135679e-06,
"loss": 0.2139,
"step": 1725
},
{
"epoch": 0.88,
"grad_norm": 3.1201610565185547,
"learning_rate": 9.874572864321608e-06,
"loss": 0.2018,
"step": 1750
},
{
"epoch": 0.89,
"grad_norm": 3.426795721054077,
"learning_rate": 9.87206030150754e-06,
"loss": 0.2125,
"step": 1775
},
{
"epoch": 0.91,
"grad_norm": 3.0987160205841064,
"learning_rate": 9.869547738693469e-06,
"loss": 0.2079,
"step": 1800
},
{
"epoch": 0.92,
"grad_norm": 3.6653778553009033,
"learning_rate": 9.867035175879398e-06,
"loss": 0.1991,
"step": 1825
},
{
"epoch": 0.93,
"grad_norm": 3.522376537322998,
"learning_rate": 9.864522613065327e-06,
"loss": 0.1966,
"step": 1850
},
{
"epoch": 0.94,
"grad_norm": 3.2122714519500732,
"learning_rate": 9.862010050251257e-06,
"loss": 0.1962,
"step": 1875
},
{
"epoch": 0.96,
"grad_norm": 3.3362936973571777,
"learning_rate": 9.859497487437186e-06,
"loss": 0.2056,
"step": 1900
},
{
"epoch": 0.97,
"grad_norm": 2.8921637535095215,
"learning_rate": 9.856984924623117e-06,
"loss": 0.192,
"step": 1925
},
{
"epoch": 0.98,
"grad_norm": 3.2778878211975098,
"learning_rate": 9.854472361809046e-06,
"loss": 0.1953,
"step": 1950
},
{
"epoch": 0.99,
"grad_norm": 3.4213788509368896,
"learning_rate": 9.851959798994976e-06,
"loss": 0.199,
"step": 1975
},
{
"epoch": 1.01,
"grad_norm": 2.996750593185425,
"learning_rate": 9.849447236180905e-06,
"loss": 0.1877,
"step": 2000
},
{
"epoch": 1.01,
"eval_loss": 0.18496885895729065,
"eval_runtime": 642.3954,
"eval_samples_per_second": 2.193,
"eval_steps_per_second": 2.193,
"eval_wer": 29.546869595295743,
"step": 2000
},
{
"epoch": 1.02,
"grad_norm": 2.9950320720672607,
"learning_rate": 9.846934673366834e-06,
"loss": 0.1852,
"step": 2025
},
{
"epoch": 1.03,
"grad_norm": 3.2103137969970703,
"learning_rate": 9.844422110552765e-06,
"loss": 0.1808,
"step": 2050
},
{
"epoch": 1.05,
"grad_norm": 2.737065076828003,
"learning_rate": 9.841909547738695e-06,
"loss": 0.1712,
"step": 2075
},
{
"epoch": 1.06,
"grad_norm": 3.0664756298065186,
"learning_rate": 9.839396984924624e-06,
"loss": 0.1744,
"step": 2100
},
{
"epoch": 1.07,
"grad_norm": 3.143113374710083,
"learning_rate": 9.836884422110553e-06,
"loss": 0.1828,
"step": 2125
},
{
"epoch": 1.08,
"grad_norm": 3.1511785984039307,
"learning_rate": 9.834371859296483e-06,
"loss": 0.1724,
"step": 2150
},
{
"epoch": 1.1,
"grad_norm": 3.206463575363159,
"learning_rate": 9.831859296482414e-06,
"loss": 0.1742,
"step": 2175
},
{
"epoch": 1.11,
"grad_norm": 3.2733755111694336,
"learning_rate": 9.829346733668343e-06,
"loss": 0.178,
"step": 2200
},
{
"epoch": 1.12,
"grad_norm": 2.9530861377716064,
"learning_rate": 9.826834170854272e-06,
"loss": 0.1705,
"step": 2225
},
{
"epoch": 1.13,
"grad_norm": 3.1909892559051514,
"learning_rate": 9.824321608040202e-06,
"loss": 0.1759,
"step": 2250
},
{
"epoch": 1.15,
"grad_norm": 2.792212724685669,
"learning_rate": 9.821809045226131e-06,
"loss": 0.1641,
"step": 2275
},
{
"epoch": 1.16,
"grad_norm": 3.1717071533203125,
"learning_rate": 9.81929648241206e-06,
"loss": 0.1709,
"step": 2300
},
{
"epoch": 1.17,
"grad_norm": 2.979113817214966,
"learning_rate": 9.816783919597991e-06,
"loss": 0.1655,
"step": 2325
},
{
"epoch": 1.18,
"grad_norm": 2.8337669372558594,
"learning_rate": 9.81427135678392e-06,
"loss": 0.1691,
"step": 2350
},
{
"epoch": 1.2,
"grad_norm": 2.9283900260925293,
"learning_rate": 9.81175879396985e-06,
"loss": 0.171,
"step": 2375
},
{
"epoch": 1.21,
"grad_norm": 3.450836420059204,
"learning_rate": 9.809246231155781e-06,
"loss": 0.1695,
"step": 2400
},
{
"epoch": 1.22,
"grad_norm": 2.9222114086151123,
"learning_rate": 9.806733668341709e-06,
"loss": 0.1696,
"step": 2425
},
{
"epoch": 1.23,
"grad_norm": 2.5073487758636475,
"learning_rate": 9.80422110552764e-06,
"loss": 0.1672,
"step": 2450
},
{
"epoch": 1.25,
"grad_norm": 3.157297372817993,
"learning_rate": 9.801708542713569e-06,
"loss": 0.165,
"step": 2475
},
{
"epoch": 1.26,
"grad_norm": 3.0700457096099854,
"learning_rate": 9.799195979899498e-06,
"loss": 0.169,
"step": 2500
},
{
"epoch": 1.27,
"grad_norm": 3.2579360008239746,
"learning_rate": 9.796683417085428e-06,
"loss": 0.1607,
"step": 2525
},
{
"epoch": 1.28,
"grad_norm": 2.8195056915283203,
"learning_rate": 9.794170854271357e-06,
"loss": 0.16,
"step": 2550
},
{
"epoch": 1.3,
"grad_norm": 2.526740312576294,
"learning_rate": 9.791658291457288e-06,
"loss": 0.1678,
"step": 2575
},
{
"epoch": 1.31,
"grad_norm": 3.2605721950531006,
"learning_rate": 9.789145728643217e-06,
"loss": 0.1597,
"step": 2600
},
{
"epoch": 1.32,
"grad_norm": 2.9491026401519775,
"learning_rate": 9.786633165829147e-06,
"loss": 0.1643,
"step": 2625
},
{
"epoch": 1.34,
"grad_norm": 3.419473886489868,
"learning_rate": 9.784120603015076e-06,
"loss": 0.1631,
"step": 2650
},
{
"epoch": 1.35,
"grad_norm": 3.4714505672454834,
"learning_rate": 9.781608040201007e-06,
"loss": 0.1637,
"step": 2675
},
{
"epoch": 1.36,
"grad_norm": 2.919762372970581,
"learning_rate": 9.779095477386934e-06,
"loss": 0.1614,
"step": 2700
},
{
"epoch": 1.37,
"grad_norm": 2.6376657485961914,
"learning_rate": 9.776582914572866e-06,
"loss": 0.1617,
"step": 2725
},
{
"epoch": 1.39,
"grad_norm": 2.929567575454712,
"learning_rate": 9.774070351758795e-06,
"loss": 0.1595,
"step": 2750
},
{
"epoch": 1.4,
"grad_norm": 2.939025402069092,
"learning_rate": 9.771557788944724e-06,
"loss": 0.1589,
"step": 2775
},
{
"epoch": 1.41,
"grad_norm": 3.043203115463257,
"learning_rate": 9.769045226130655e-06,
"loss": 0.1623,
"step": 2800
},
{
"epoch": 1.42,
"grad_norm": 2.872321605682373,
"learning_rate": 9.766532663316583e-06,
"loss": 0.1527,
"step": 2825
},
{
"epoch": 1.44,
"grad_norm": 2.8521175384521484,
"learning_rate": 9.764020100502514e-06,
"loss": 0.1582,
"step": 2850
},
{
"epoch": 1.45,
"grad_norm": 2.8888206481933594,
"learning_rate": 9.761507537688443e-06,
"loss": 0.162,
"step": 2875
},
{
"epoch": 1.46,
"grad_norm": 2.583472490310669,
"learning_rate": 9.758994974874372e-06,
"loss": 0.1559,
"step": 2900
},
{
"epoch": 1.47,
"grad_norm": 3.0001070499420166,
"learning_rate": 9.756482412060302e-06,
"loss": 0.1557,
"step": 2925
},
{
"epoch": 1.49,
"grad_norm": 3.3756625652313232,
"learning_rate": 9.753969849246233e-06,
"loss": 0.1603,
"step": 2950
},
{
"epoch": 1.5,
"grad_norm": 2.995574951171875,
"learning_rate": 9.75145728643216e-06,
"loss": 0.1539,
"step": 2975
},
{
"epoch": 1.51,
"grad_norm": 2.927722692489624,
"learning_rate": 9.748944723618091e-06,
"loss": 0.1579,
"step": 3000
},
{
"epoch": 1.51,
"eval_loss": 0.16459061205387115,
"eval_runtime": 645.71,
"eval_samples_per_second": 2.182,
"eval_steps_per_second": 2.182,
"eval_wer": 26.675890695261156,
"step": 3000
},
{
"epoch": 1.52,
"grad_norm": 3.3339335918426514,
"learning_rate": 9.74643216080402e-06,
"loss": 0.1559,
"step": 3025
},
{
"epoch": 1.54,
"grad_norm": 3.0809624195098877,
"learning_rate": 9.74391959798995e-06,
"loss": 0.1561,
"step": 3050
},
{
"epoch": 1.55,
"grad_norm": 2.9823570251464844,
"learning_rate": 9.741407035175881e-06,
"loss": 0.1599,
"step": 3075
},
{
"epoch": 1.56,
"grad_norm": 2.7149410247802734,
"learning_rate": 9.738894472361809e-06,
"loss": 0.1469,
"step": 3100
},
{
"epoch": 1.57,
"grad_norm": 3.8664979934692383,
"learning_rate": 9.73638190954774e-06,
"loss": 0.1545,
"step": 3125
},
{
"epoch": 1.59,
"grad_norm": 2.9406516551971436,
"learning_rate": 9.733869346733669e-06,
"loss": 0.154,
"step": 3150
},
{
"epoch": 1.6,
"grad_norm": 3.1379194259643555,
"learning_rate": 9.731356783919598e-06,
"loss": 0.1545,
"step": 3175
},
{
"epoch": 1.61,
"grad_norm": 3.072883129119873,
"learning_rate": 9.72884422110553e-06,
"loss": 0.1545,
"step": 3200
},
{
"epoch": 1.62,
"grad_norm": 3.329160213470459,
"learning_rate": 9.726331658291459e-06,
"loss": 0.151,
"step": 3225
},
{
"epoch": 1.64,
"grad_norm": 2.8267323970794678,
"learning_rate": 9.723819095477388e-06,
"loss": 0.1471,
"step": 3250
},
{
"epoch": 1.65,
"grad_norm": 3.380469799041748,
"learning_rate": 9.721306532663317e-06,
"loss": 0.1563,
"step": 3275
},
{
"epoch": 1.66,
"grad_norm": 2.6737258434295654,
"learning_rate": 9.718793969849247e-06,
"loss": 0.1505,
"step": 3300
},
{
"epoch": 1.68,
"grad_norm": 2.920942783355713,
"learning_rate": 9.716281407035176e-06,
"loss": 0.1474,
"step": 3325
},
{
"epoch": 1.69,
"grad_norm": 2.8349497318267822,
"learning_rate": 9.713768844221107e-06,
"loss": 0.1499,
"step": 3350
},
{
"epoch": 1.7,
"grad_norm": 2.9991960525512695,
"learning_rate": 9.711256281407035e-06,
"loss": 0.1457,
"step": 3375
},
{
"epoch": 1.71,
"grad_norm": 3.211735486984253,
"learning_rate": 9.708743718592966e-06,
"loss": 0.1488,
"step": 3400
},
{
"epoch": 1.73,
"grad_norm": 2.8068394660949707,
"learning_rate": 9.706231155778895e-06,
"loss": 0.144,
"step": 3425
},
{
"epoch": 1.74,
"grad_norm": 3.149648904800415,
"learning_rate": 9.703718592964824e-06,
"loss": 0.1476,
"step": 3450
},
{
"epoch": 1.75,
"grad_norm": 2.5642662048339844,
"learning_rate": 9.701206030150755e-06,
"loss": 0.1452,
"step": 3475
},
{
"epoch": 1.76,
"grad_norm": 3.3796427249908447,
"learning_rate": 9.698693467336685e-06,
"loss": 0.1498,
"step": 3500
},
{
"epoch": 1.78,
"grad_norm": 2.480526924133301,
"learning_rate": 9.696180904522614e-06,
"loss": 0.1438,
"step": 3525
},
{
"epoch": 1.79,
"grad_norm": 2.742117166519165,
"learning_rate": 9.693668341708543e-06,
"loss": 0.1454,
"step": 3550
},
{
"epoch": 1.8,
"grad_norm": 3.248408555984497,
"learning_rate": 9.691155778894473e-06,
"loss": 0.1449,
"step": 3575
},
{
"epoch": 1.81,
"grad_norm": 2.808727264404297,
"learning_rate": 9.688643216080402e-06,
"loss": 0.1417,
"step": 3600
},
{
"epoch": 1.83,
"grad_norm": 2.612992525100708,
"learning_rate": 9.686130653266333e-06,
"loss": 0.1491,
"step": 3625
},
{
"epoch": 1.84,
"grad_norm": 2.978003978729248,
"learning_rate": 9.683618090452262e-06,
"loss": 0.1454,
"step": 3650
},
{
"epoch": 1.85,
"grad_norm": 2.9820547103881836,
"learning_rate": 9.681105527638192e-06,
"loss": 0.1381,
"step": 3675
},
{
"epoch": 1.86,
"grad_norm": 2.7569639682769775,
"learning_rate": 9.678592964824121e-06,
"loss": 0.1471,
"step": 3700
},
{
"epoch": 1.88,
"grad_norm": 3.0688931941986084,
"learning_rate": 9.67608040201005e-06,
"loss": 0.1425,
"step": 3725
},
{
"epoch": 1.89,
"grad_norm": 2.921603202819824,
"learning_rate": 9.673567839195981e-06,
"loss": 0.1472,
"step": 3750
},
{
"epoch": 1.9,
"grad_norm": 2.9307756423950195,
"learning_rate": 9.67105527638191e-06,
"loss": 0.1438,
"step": 3775
},
{
"epoch": 1.91,
"grad_norm": 3.2151060104370117,
"learning_rate": 9.66854271356784e-06,
"loss": 0.1428,
"step": 3800
},
{
"epoch": 1.93,
"grad_norm": 2.9010095596313477,
"learning_rate": 9.666030150753771e-06,
"loss": 0.1465,
"step": 3825
},
{
"epoch": 1.94,
"grad_norm": 2.832845687866211,
"learning_rate": 9.663517587939699e-06,
"loss": 0.1429,
"step": 3850
},
{
"epoch": 1.95,
"grad_norm": 2.6402933597564697,
"learning_rate": 9.66100502512563e-06,
"loss": 0.1342,
"step": 3875
},
{
"epoch": 1.96,
"grad_norm": 2.6498653888702393,
"learning_rate": 9.658492462311559e-06,
"loss": 0.1402,
"step": 3900
},
{
"epoch": 1.98,
"grad_norm": 2.972980260848999,
"learning_rate": 9.655979899497488e-06,
"loss": 0.1387,
"step": 3925
},
{
"epoch": 1.99,
"grad_norm": 3.229097843170166,
"learning_rate": 9.653467336683418e-06,
"loss": 0.1432,
"step": 3950
},
{
"epoch": 2.0,
"grad_norm": 2.3604576587677,
"learning_rate": 9.650954773869347e-06,
"loss": 0.1356,
"step": 3975
},
{
"epoch": 2.02,
"grad_norm": 2.725226402282715,
"learning_rate": 9.648442211055276e-06,
"loss": 0.1179,
"step": 4000
},
{
"epoch": 2.02,
"eval_loss": 0.15386536717414856,
"eval_runtime": 646.9871,
"eval_samples_per_second": 2.178,
"eval_steps_per_second": 2.178,
"eval_wer": 25.105499827049467,
"step": 4000
},
{
"epoch": 2.03,
"grad_norm": 3.045943260192871,
"learning_rate": 9.645929648241207e-06,
"loss": 0.1157,
"step": 4025
},
{
"epoch": 2.04,
"grad_norm": 2.593027353286743,
"learning_rate": 9.643417085427137e-06,
"loss": 0.116,
"step": 4050
},
{
"epoch": 2.05,
"grad_norm": 2.6555795669555664,
"learning_rate": 9.640904522613066e-06,
"loss": 0.1187,
"step": 4075
},
{
"epoch": 2.07,
"grad_norm": 2.776094913482666,
"learning_rate": 9.638391959798997e-06,
"loss": 0.1155,
"step": 4100
},
{
"epoch": 2.08,
"grad_norm": 3.2772326469421387,
"learning_rate": 9.635879396984925e-06,
"loss": 0.1152,
"step": 4125
},
{
"epoch": 2.09,
"grad_norm": 3.0243771076202393,
"learning_rate": 9.633366834170856e-06,
"loss": 0.1199,
"step": 4150
},
{
"epoch": 2.1,
"grad_norm": 2.6247713565826416,
"learning_rate": 9.630854271356785e-06,
"loss": 0.117,
"step": 4175
},
{
"epoch": 2.12,
"grad_norm": 2.80692720413208,
"learning_rate": 9.628341708542714e-06,
"loss": 0.1115,
"step": 4200
},
{
"epoch": 2.13,
"grad_norm": 2.899824380874634,
"learning_rate": 9.625829145728644e-06,
"loss": 0.1155,
"step": 4225
},
{
"epoch": 2.14,
"grad_norm": 2.8152291774749756,
"learning_rate": 9.623316582914573e-06,
"loss": 0.1129,
"step": 4250
},
{
"epoch": 2.15,
"grad_norm": 2.659745931625366,
"learning_rate": 9.620804020100504e-06,
"loss": 0.1143,
"step": 4275
},
{
"epoch": 2.17,
"grad_norm": 2.8360331058502197,
"learning_rate": 9.618291457286433e-06,
"loss": 0.1207,
"step": 4300
},
{
"epoch": 2.18,
"grad_norm": 3.03417706489563,
"learning_rate": 9.615778894472363e-06,
"loss": 0.1245,
"step": 4325
},
{
"epoch": 2.19,
"grad_norm": 2.5418951511383057,
"learning_rate": 9.613266331658292e-06,
"loss": 0.1174,
"step": 4350
},
{
"epoch": 2.2,
"grad_norm": 2.902958631515503,
"learning_rate": 9.610753768844223e-06,
"loss": 0.1158,
"step": 4375
},
{
"epoch": 2.22,
"grad_norm": 3.026547431945801,
"learning_rate": 9.60824120603015e-06,
"loss": 0.1104,
"step": 4400
},
{
"epoch": 2.23,
"grad_norm": 2.8732357025146484,
"learning_rate": 9.605728643216082e-06,
"loss": 0.1163,
"step": 4425
},
{
"epoch": 2.24,
"grad_norm": 2.730586528778076,
"learning_rate": 9.60321608040201e-06,
"loss": 0.1128,
"step": 4450
},
{
"epoch": 2.25,
"grad_norm": 2.59441876411438,
"learning_rate": 9.60070351758794e-06,
"loss": 0.1205,
"step": 4475
},
{
"epoch": 2.27,
"grad_norm": 2.5764012336730957,
"learning_rate": 9.598190954773871e-06,
"loss": 0.1198,
"step": 4500
},
{
"epoch": 2.28,
"grad_norm": 2.6175150871276855,
"learning_rate": 9.595678391959799e-06,
"loss": 0.117,
"step": 4525
},
{
"epoch": 2.29,
"grad_norm": 2.7798826694488525,
"learning_rate": 9.59316582914573e-06,
"loss": 0.1101,
"step": 4550
},
{
"epoch": 2.3,
"grad_norm": 2.900200605392456,
"learning_rate": 9.59065326633166e-06,
"loss": 0.1102,
"step": 4575
},
{
"epoch": 2.32,
"grad_norm": 2.6330630779266357,
"learning_rate": 9.588140703517588e-06,
"loss": 0.1164,
"step": 4600
},
{
"epoch": 2.33,
"grad_norm": 3.1691510677337646,
"learning_rate": 9.585628140703518e-06,
"loss": 0.1127,
"step": 4625
},
{
"epoch": 2.34,
"grad_norm": 2.9986257553100586,
"learning_rate": 9.583115577889449e-06,
"loss": 0.1118,
"step": 4650
},
{
"epoch": 2.36,
"grad_norm": 2.9816648960113525,
"learning_rate": 9.580603015075378e-06,
"loss": 0.1124,
"step": 4675
},
{
"epoch": 2.37,
"grad_norm": 2.7380642890930176,
"learning_rate": 9.578090452261307e-06,
"loss": 0.1101,
"step": 4700
},
{
"epoch": 2.38,
"grad_norm": 2.506922721862793,
"learning_rate": 9.575577889447237e-06,
"loss": 0.1101,
"step": 4725
},
{
"epoch": 2.39,
"grad_norm": 2.7849462032318115,
"learning_rate": 9.573065326633166e-06,
"loss": 0.1128,
"step": 4750
},
{
"epoch": 2.41,
"grad_norm": 2.457066774368286,
"learning_rate": 9.570552763819097e-06,
"loss": 0.1139,
"step": 4775
},
{
"epoch": 2.42,
"grad_norm": 2.9274463653564453,
"learning_rate": 9.568040201005025e-06,
"loss": 0.1099,
"step": 4800
},
{
"epoch": 2.43,
"grad_norm": 2.54990291595459,
"learning_rate": 9.565527638190956e-06,
"loss": 0.1099,
"step": 4825
},
{
"epoch": 2.44,
"grad_norm": 3.0029959678649902,
"learning_rate": 9.563015075376885e-06,
"loss": 0.1132,
"step": 4850
},
{
"epoch": 2.46,
"grad_norm": 2.9913110733032227,
"learning_rate": 9.560502512562814e-06,
"loss": 0.1134,
"step": 4875
},
{
"epoch": 2.47,
"grad_norm": 2.846057653427124,
"learning_rate": 9.557989949748745e-06,
"loss": 0.112,
"step": 4900
},
{
"epoch": 2.48,
"grad_norm": 2.566232681274414,
"learning_rate": 9.555477386934675e-06,
"loss": 0.1106,
"step": 4925
},
{
"epoch": 2.49,
"grad_norm": 2.8931262493133545,
"learning_rate": 9.552964824120604e-06,
"loss": 0.1099,
"step": 4950
},
{
"epoch": 2.51,
"grad_norm": 2.9756979942321777,
"learning_rate": 9.550552763819096e-06,
"loss": 0.1125,
"step": 4975
},
{
"epoch": 2.52,
"grad_norm": 2.837172508239746,
"learning_rate": 9.548040201005025e-06,
"loss": 0.1083,
"step": 5000
},
{
"epoch": 2.52,
"eval_loss": 0.1495467722415924,
"eval_runtime": 644.1684,
"eval_samples_per_second": 2.187,
"eval_steps_per_second": 2.187,
"eval_wer": 24.628156347284676,
"step": 5000
},
{
"epoch": 2.53,
"grad_norm": 2.499260902404785,
"learning_rate": 9.545527638190956e-06,
"loss": 0.1115,
"step": 5025
},
{
"epoch": 2.54,
"grad_norm": 2.9325485229492188,
"learning_rate": 9.543015075376885e-06,
"loss": 0.1087,
"step": 5050
},
{
"epoch": 2.56,
"grad_norm": 2.4854938983917236,
"learning_rate": 9.540502512562815e-06,
"loss": 0.1046,
"step": 5075
},
{
"epoch": 2.57,
"grad_norm": 3.0302836894989014,
"learning_rate": 9.537989949748746e-06,
"loss": 0.1105,
"step": 5100
},
{
"epoch": 2.58,
"grad_norm": 2.6300787925720215,
"learning_rate": 9.535477386934673e-06,
"loss": 0.1093,
"step": 5125
},
{
"epoch": 2.59,
"grad_norm": 3.281339168548584,
"learning_rate": 9.532964824120604e-06,
"loss": 0.1133,
"step": 5150
},
{
"epoch": 2.61,
"grad_norm": 2.9684898853302,
"learning_rate": 9.530452261306534e-06,
"loss": 0.1072,
"step": 5175
},
{
"epoch": 2.62,
"grad_norm": 2.7068192958831787,
"learning_rate": 9.527939698492463e-06,
"loss": 0.1108,
"step": 5200
},
{
"epoch": 2.63,
"grad_norm": 2.7589058876037598,
"learning_rate": 9.525427135678392e-06,
"loss": 0.1094,
"step": 5225
},
{
"epoch": 2.64,
"grad_norm": 2.988163709640503,
"learning_rate": 9.522914572864322e-06,
"loss": 0.1055,
"step": 5250
},
{
"epoch": 2.66,
"grad_norm": 2.748220443725586,
"learning_rate": 9.520402010050253e-06,
"loss": 0.1028,
"step": 5275
},
{
"epoch": 2.67,
"grad_norm": 2.696606397628784,
"learning_rate": 9.517889447236182e-06,
"loss": 0.1013,
"step": 5300
},
{
"epoch": 2.68,
"grad_norm": 2.4986040592193604,
"learning_rate": 9.515376884422111e-06,
"loss": 0.1094,
"step": 5325
},
{
"epoch": 2.7,
"grad_norm": 2.414533853530884,
"learning_rate": 9.51286432160804e-06,
"loss": 0.1085,
"step": 5350
},
{
"epoch": 2.71,
"grad_norm": 2.5464062690734863,
"learning_rate": 9.510351758793972e-06,
"loss": 0.1029,
"step": 5375
},
{
"epoch": 2.72,
"grad_norm": 3.065047264099121,
"learning_rate": 9.5078391959799e-06,
"loss": 0.1013,
"step": 5400
},
{
"epoch": 2.73,
"grad_norm": 3.1086618900299072,
"learning_rate": 9.50532663316583e-06,
"loss": 0.1066,
"step": 5425
},
{
"epoch": 2.75,
"grad_norm": 3.5446290969848633,
"learning_rate": 9.50281407035176e-06,
"loss": 0.1098,
"step": 5450
},
{
"epoch": 2.76,
"grad_norm": 2.7708733081817627,
"learning_rate": 9.500301507537689e-06,
"loss": 0.109,
"step": 5475
},
{
"epoch": 2.77,
"grad_norm": 2.786681652069092,
"learning_rate": 9.49778894472362e-06,
"loss": 0.1062,
"step": 5500
},
{
"epoch": 2.78,
"grad_norm": 2.927002429962158,
"learning_rate": 9.49527638190955e-06,
"loss": 0.1053,
"step": 5525
},
{
"epoch": 2.8,
"grad_norm": 2.6356334686279297,
"learning_rate": 9.492763819095479e-06,
"loss": 0.1098,
"step": 5550
},
{
"epoch": 2.81,
"grad_norm": 2.5846285820007324,
"learning_rate": 9.490251256281408e-06,
"loss": 0.1026,
"step": 5575
},
{
"epoch": 2.82,
"grad_norm": 3.0148589611053467,
"learning_rate": 9.487738693467337e-06,
"loss": 0.108,
"step": 5600
},
{
"epoch": 2.83,
"grad_norm": 2.6467926502227783,
"learning_rate": 9.485226130653267e-06,
"loss": 0.1031,
"step": 5625
},
{
"epoch": 2.85,
"grad_norm": 2.8061394691467285,
"learning_rate": 9.482713567839198e-06,
"loss": 0.1045,
"step": 5650
},
{
"epoch": 2.86,
"grad_norm": 2.6581783294677734,
"learning_rate": 9.480201005025125e-06,
"loss": 0.105,
"step": 5675
},
{
"epoch": 2.87,
"grad_norm": 2.814573049545288,
"learning_rate": 9.477688442211056e-06,
"loss": 0.107,
"step": 5700
},
{
"epoch": 2.88,
"grad_norm": 2.7229998111724854,
"learning_rate": 9.475175879396985e-06,
"loss": 0.0983,
"step": 5725
},
{
"epoch": 2.9,
"grad_norm": 2.648622989654541,
"learning_rate": 9.472663316582915e-06,
"loss": 0.108,
"step": 5750
},
{
"epoch": 2.91,
"grad_norm": 2.546680212020874,
"learning_rate": 9.470150753768846e-06,
"loss": 0.106,
"step": 5775
},
{
"epoch": 2.92,
"grad_norm": 2.91450834274292,
"learning_rate": 9.467638190954775e-06,
"loss": 0.1057,
"step": 5800
},
{
"epoch": 2.93,
"grad_norm": 2.5046870708465576,
"learning_rate": 9.465125628140704e-06,
"loss": 0.1045,
"step": 5825
},
{
"epoch": 2.95,
"grad_norm": 2.452519178390503,
"learning_rate": 9.462613065326634e-06,
"loss": 0.1026,
"step": 5850
},
{
"epoch": 2.96,
"grad_norm": 2.6275572776794434,
"learning_rate": 9.460100502512563e-06,
"loss": 0.1016,
"step": 5875
},
{
"epoch": 2.97,
"grad_norm": 2.612506628036499,
"learning_rate": 9.457587939698494e-06,
"loss": 0.1037,
"step": 5900
},
{
"epoch": 2.98,
"grad_norm": 2.721682548522949,
"learning_rate": 9.455075376884423e-06,
"loss": 0.1022,
"step": 5925
},
{
"epoch": 3.0,
"grad_norm": 2.6826882362365723,
"learning_rate": 9.452562814070353e-06,
"loss": 0.1005,
"step": 5950
},
{
"epoch": 3.01,
"grad_norm": 2.7688844203948975,
"learning_rate": 9.450050251256282e-06,
"loss": 0.0843,
"step": 5975
},
{
"epoch": 3.02,
"grad_norm": 2.4665892124176025,
"learning_rate": 9.447537688442211e-06,
"loss": 0.0804,
"step": 6000
},
{
"epoch": 3.02,
"eval_loss": 0.1464279592037201,
"eval_runtime": 645.9237,
"eval_samples_per_second": 2.181,
"eval_steps_per_second": 2.181,
"eval_wer": 24.03320650294016,
"step": 6000
},
{
"epoch": 3.04,
"grad_norm": 1.9517433643341064,
"learning_rate": 9.44502512562814e-06,
"loss": 0.081,
"step": 6025
},
{
"epoch": 3.05,
"grad_norm": 2.6357505321502686,
"learning_rate": 9.442512562814072e-06,
"loss": 0.078,
"step": 6050
},
{
"epoch": 3.06,
"grad_norm": 2.9394261837005615,
"learning_rate": 9.440000000000001e-06,
"loss": 0.083,
"step": 6075
},
{
"epoch": 3.07,
"grad_norm": 2.944277048110962,
"learning_rate": 9.43748743718593e-06,
"loss": 0.079,
"step": 6100
},
{
"epoch": 3.09,
"grad_norm": 2.566026210784912,
"learning_rate": 9.43497487437186e-06,
"loss": 0.0831,
"step": 6125
},
{
"epoch": 3.1,
"grad_norm": 2.319978713989258,
"learning_rate": 9.432462311557789e-06,
"loss": 0.0795,
"step": 6150
},
{
"epoch": 3.11,
"grad_norm": 2.8877954483032227,
"learning_rate": 9.42994974874372e-06,
"loss": 0.0785,
"step": 6175
},
{
"epoch": 3.12,
"grad_norm": 2.5460472106933594,
"learning_rate": 9.42743718592965e-06,
"loss": 0.0801,
"step": 6200
},
{
"epoch": 3.14,
"grad_norm": 2.396923303604126,
"learning_rate": 9.424924623115579e-06,
"loss": 0.0826,
"step": 6225
},
{
"epoch": 3.15,
"grad_norm": 2.4849960803985596,
"learning_rate": 9.422412060301508e-06,
"loss": 0.081,
"step": 6250
},
{
"epoch": 3.16,
"grad_norm": 2.4838786125183105,
"learning_rate": 9.419899497487437e-06,
"loss": 0.0838,
"step": 6275
},
{
"epoch": 3.17,
"grad_norm": 2.7214527130126953,
"learning_rate": 9.417386934673367e-06,
"loss": 0.082,
"step": 6300
},
{
"epoch": 3.19,
"grad_norm": 2.787931203842163,
"learning_rate": 9.414874371859298e-06,
"loss": 0.0817,
"step": 6325
},
{
"epoch": 3.2,
"grad_norm": 2.625025987625122,
"learning_rate": 9.412361809045227e-06,
"loss": 0.0805,
"step": 6350
},
{
"epoch": 3.21,
"grad_norm": 2.490147113800049,
"learning_rate": 9.409849246231156e-06,
"loss": 0.0796,
"step": 6375
},
{
"epoch": 3.22,
"grad_norm": 2.4763355255126953,
"learning_rate": 9.407336683417086e-06,
"loss": 0.083,
"step": 6400
},
{
"epoch": 3.24,
"grad_norm": 3.0030245780944824,
"learning_rate": 9.404824120603015e-06,
"loss": 0.0818,
"step": 6425
},
{
"epoch": 3.25,
"grad_norm": 2.714149236679077,
"learning_rate": 9.402311557788946e-06,
"loss": 0.0819,
"step": 6450
},
{
"epoch": 3.26,
"grad_norm": 2.4112088680267334,
"learning_rate": 9.399798994974875e-06,
"loss": 0.0825,
"step": 6475
},
{
"epoch": 3.27,
"grad_norm": 2.633383274078369,
"learning_rate": 9.397286432160805e-06,
"loss": 0.0804,
"step": 6500
},
{
"epoch": 3.29,
"grad_norm": 2.265183687210083,
"learning_rate": 9.394773869346736e-06,
"loss": 0.0799,
"step": 6525
},
{
"epoch": 3.3,
"grad_norm": 2.4261343479156494,
"learning_rate": 9.392261306532663e-06,
"loss": 0.0763,
"step": 6550
},
{
"epoch": 3.31,
"grad_norm": 2.6179676055908203,
"learning_rate": 9.389748743718594e-06,
"loss": 0.0796,
"step": 6575
},
{
"epoch": 3.32,
"grad_norm": 2.648509979248047,
"learning_rate": 9.387236180904524e-06,
"loss": 0.0823,
"step": 6600
},
{
"epoch": 3.34,
"grad_norm": 2.4044175148010254,
"learning_rate": 9.384723618090453e-06,
"loss": 0.0791,
"step": 6625
},
{
"epoch": 3.35,
"grad_norm": 2.3800647258758545,
"learning_rate": 9.382211055276382e-06,
"loss": 0.0788,
"step": 6650
},
{
"epoch": 3.36,
"grad_norm": 3.1068170070648193,
"learning_rate": 9.379698492462312e-06,
"loss": 0.0811,
"step": 6675
},
{
"epoch": 3.38,
"grad_norm": 2.5507326126098633,
"learning_rate": 9.377185929648241e-06,
"loss": 0.0792,
"step": 6700
},
{
"epoch": 3.39,
"grad_norm": 2.522341728210449,
"learning_rate": 9.374673366834172e-06,
"loss": 0.0784,
"step": 6725
},
{
"epoch": 3.4,
"grad_norm": 2.739595890045166,
"learning_rate": 9.372160804020101e-06,
"loss": 0.0791,
"step": 6750
},
{
"epoch": 3.41,
"grad_norm": 2.401925802230835,
"learning_rate": 9.36964824120603e-06,
"loss": 0.082,
"step": 6775
},
{
"epoch": 3.43,
"grad_norm": 3.219940662384033,
"learning_rate": 9.367135678391962e-06,
"loss": 0.0817,
"step": 6800
},
{
"epoch": 3.44,
"grad_norm": 3.2623674869537354,
"learning_rate": 9.36462311557789e-06,
"loss": 0.0791,
"step": 6825
},
{
"epoch": 3.45,
"grad_norm": 2.358572244644165,
"learning_rate": 9.36211055276382e-06,
"loss": 0.0755,
"step": 6850
},
{
"epoch": 3.46,
"grad_norm": 3.0506913661956787,
"learning_rate": 9.35959798994975e-06,
"loss": 0.0792,
"step": 6875
},
{
"epoch": 3.48,
"grad_norm": 2.2486371994018555,
"learning_rate": 9.357085427135679e-06,
"loss": 0.0807,
"step": 6900
},
{
"epoch": 3.49,
"grad_norm": 2.8625311851501465,
"learning_rate": 9.354572864321608e-06,
"loss": 0.0787,
"step": 6925
},
{
"epoch": 3.5,
"grad_norm": 2.4400510787963867,
"learning_rate": 9.352060301507538e-06,
"loss": 0.0804,
"step": 6950
},
{
"epoch": 3.51,
"grad_norm": 2.5003409385681152,
"learning_rate": 9.349547738693469e-06,
"loss": 0.0789,
"step": 6975
},
{
"epoch": 3.53,
"grad_norm": 2.5204198360443115,
"learning_rate": 9.347035175879398e-06,
"loss": 0.077,
"step": 7000
},
{
"epoch": 3.53,
"eval_loss": 0.15057513117790222,
"eval_runtime": 646.2962,
"eval_samples_per_second": 2.18,
"eval_steps_per_second": 2.18,
"eval_wer": 24.240747146316153,
"step": 7000
},
{
"epoch": 3.54,
"grad_norm": 2.7666544914245605,
"learning_rate": 9.344522613065327e-06,
"loss": 0.0779,
"step": 7025
},
{
"epoch": 3.55,
"grad_norm": 3.256955146789551,
"learning_rate": 9.342010050251257e-06,
"loss": 0.0799,
"step": 7050
},
{
"epoch": 3.56,
"grad_norm": 2.829012155532837,
"learning_rate": 9.339497487437188e-06,
"loss": 0.0811,
"step": 7075
},
{
"epoch": 3.58,
"grad_norm": 2.6960537433624268,
"learning_rate": 9.336984924623115e-06,
"loss": 0.0787,
"step": 7100
},
{
"epoch": 3.59,
"grad_norm": 2.7486023902893066,
"learning_rate": 9.334472361809046e-06,
"loss": 0.0828,
"step": 7125
},
{
"epoch": 3.6,
"grad_norm": 2.8527791500091553,
"learning_rate": 9.331959798994976e-06,
"loss": 0.0813,
"step": 7150
},
{
"epoch": 3.61,
"grad_norm": 2.6692473888397217,
"learning_rate": 9.329447236180905e-06,
"loss": 0.0768,
"step": 7175
},
{
"epoch": 3.63,
"grad_norm": 2.2904937267303467,
"learning_rate": 9.326934673366836e-06,
"loss": 0.0788,
"step": 7200
},
{
"epoch": 3.64,
"grad_norm": 3.499237060546875,
"learning_rate": 9.324422110552764e-06,
"loss": 0.0764,
"step": 7225
},
{
"epoch": 3.65,
"grad_norm": 2.6315267086029053,
"learning_rate": 9.321909547738695e-06,
"loss": 0.0746,
"step": 7250
},
{
"epoch": 3.66,
"grad_norm": 3.006561040878296,
"learning_rate": 9.319396984924624e-06,
"loss": 0.076,
"step": 7275
},
{
"epoch": 3.68,
"grad_norm": 2.663254976272583,
"learning_rate": 9.316884422110553e-06,
"loss": 0.0784,
"step": 7300
},
{
"epoch": 3.69,
"grad_norm": 2.6093807220458984,
"learning_rate": 9.314371859296483e-06,
"loss": 0.0768,
"step": 7325
},
{
"epoch": 3.7,
"grad_norm": 2.7296223640441895,
"learning_rate": 9.311859296482414e-06,
"loss": 0.0761,
"step": 7350
},
{
"epoch": 3.72,
"grad_norm": 2.5256307125091553,
"learning_rate": 9.309346733668343e-06,
"loss": 0.0771,
"step": 7375
},
{
"epoch": 3.73,
"grad_norm": 2.707585573196411,
"learning_rate": 9.306834170854272e-06,
"loss": 0.0753,
"step": 7400
},
{
"epoch": 3.74,
"grad_norm": 3.1183390617370605,
"learning_rate": 9.304321608040201e-06,
"loss": 0.0794,
"step": 7425
},
{
"epoch": 3.75,
"grad_norm": 2.302847385406494,
"learning_rate": 9.30180904522613e-06,
"loss": 0.0772,
"step": 7450
},
{
"epoch": 3.77,
"grad_norm": 2.5927348136901855,
"learning_rate": 9.299296482412062e-06,
"loss": 0.0749,
"step": 7475
},
{
"epoch": 3.78,
"grad_norm": 2.6165075302124023,
"learning_rate": 9.296783919597991e-06,
"loss": 0.0788,
"step": 7500
},
{
"epoch": 3.79,
"grad_norm": 2.674424171447754,
"learning_rate": 9.29427135678392e-06,
"loss": 0.0768,
"step": 7525
},
{
"epoch": 3.8,
"grad_norm": 2.872770309448242,
"learning_rate": 9.29175879396985e-06,
"loss": 0.0811,
"step": 7550
},
{
"epoch": 3.82,
"grad_norm": 2.9125661849975586,
"learning_rate": 9.289246231155779e-06,
"loss": 0.0739,
"step": 7575
},
{
"epoch": 3.83,
"grad_norm": 2.61698317527771,
"learning_rate": 9.28673366834171e-06,
"loss": 0.0771,
"step": 7600
},
{
"epoch": 3.84,
"grad_norm": 3.0637826919555664,
"learning_rate": 9.28422110552764e-06,
"loss": 0.0791,
"step": 7625
},
{
"epoch": 3.85,
"grad_norm": 2.3239142894744873,
"learning_rate": 9.281708542713569e-06,
"loss": 0.0773,
"step": 7650
},
{
"epoch": 3.87,
"grad_norm": 2.6094796657562256,
"learning_rate": 9.279195979899498e-06,
"loss": 0.0755,
"step": 7675
},
{
"epoch": 3.88,
"grad_norm": 2.530613422393799,
"learning_rate": 9.276683417085427e-06,
"loss": 0.0749,
"step": 7700
},
{
"epoch": 3.89,
"grad_norm": 3.653653621673584,
"learning_rate": 9.274170854271357e-06,
"loss": 0.0743,
"step": 7725
},
{
"epoch": 3.9,
"grad_norm": 2.6792755126953125,
"learning_rate": 9.271658291457288e-06,
"loss": 0.0763,
"step": 7750
},
{
"epoch": 3.92,
"grad_norm": 2.9613704681396484,
"learning_rate": 9.269145728643217e-06,
"loss": 0.0723,
"step": 7775
},
{
"epoch": 3.93,
"grad_norm": 2.2027602195739746,
"learning_rate": 9.266633165829146e-06,
"loss": 0.0789,
"step": 7800
},
{
"epoch": 3.94,
"grad_norm": 2.569223165512085,
"learning_rate": 9.264120603015076e-06,
"loss": 0.072,
"step": 7825
},
{
"epoch": 3.95,
"grad_norm": 2.3976686000823975,
"learning_rate": 9.261608040201005e-06,
"loss": 0.0737,
"step": 7850
},
{
"epoch": 3.97,
"grad_norm": 2.5629305839538574,
"learning_rate": 9.259095477386936e-06,
"loss": 0.0762,
"step": 7875
},
{
"epoch": 3.98,
"grad_norm": 2.397019147872925,
"learning_rate": 9.256582914572865e-06,
"loss": 0.0758,
"step": 7900
},
{
"epoch": 3.99,
"grad_norm": 2.764029026031494,
"learning_rate": 9.254070351758795e-06,
"loss": 0.0723,
"step": 7925
},
{
"epoch": 4.01,
"grad_norm": 2.1665878295898438,
"learning_rate": 9.251557788944724e-06,
"loss": 0.0654,
"step": 7950
},
{
"epoch": 4.02,
"grad_norm": 2.1100118160247803,
"learning_rate": 9.249045226130653e-06,
"loss": 0.0557,
"step": 7975
},
{
"epoch": 4.03,
"grad_norm": 2.4302258491516113,
"learning_rate": 9.246532663316584e-06,
"loss": 0.0539,
"step": 8000
},
{
"epoch": 4.03,
"eval_loss": 0.15238162875175476,
"eval_runtime": 644.4842,
"eval_samples_per_second": 2.186,
"eval_steps_per_second": 2.186,
"eval_wer": 23.85333794534763,
"step": 8000
},
{
"epoch": 4.04,
"grad_norm": 2.350156545639038,
"learning_rate": 9.244020100502514e-06,
"loss": 0.0541,
"step": 8025
},
{
"epoch": 4.06,
"grad_norm": 2.4464669227600098,
"learning_rate": 9.241507537688443e-06,
"loss": 0.0538,
"step": 8050
},
{
"epoch": 4.07,
"grad_norm": 2.123314619064331,
"learning_rate": 9.238994974874372e-06,
"loss": 0.0545,
"step": 8075
},
{
"epoch": 4.08,
"grad_norm": 2.584456443786621,
"learning_rate": 9.236482412060302e-06,
"loss": 0.0563,
"step": 8100
},
{
"epoch": 4.09,
"grad_norm": 2.46744704246521,
"learning_rate": 9.233969849246231e-06,
"loss": 0.0548,
"step": 8125
},
{
"epoch": 4.11,
"grad_norm": 2.7734973430633545,
"learning_rate": 9.231457286432162e-06,
"loss": 0.0593,
"step": 8150
},
{
"epoch": 4.12,
"grad_norm": 2.5305910110473633,
"learning_rate": 9.228944723618091e-06,
"loss": 0.058,
"step": 8175
},
{
"epoch": 4.13,
"grad_norm": 2.668431043624878,
"learning_rate": 9.22643216080402e-06,
"loss": 0.0559,
"step": 8200
},
{
"epoch": 4.14,
"grad_norm": 2.23030161857605,
"learning_rate": 9.223919597989952e-06,
"loss": 0.0553,
"step": 8225
},
{
"epoch": 4.16,
"grad_norm": 2.2469186782836914,
"learning_rate": 9.22140703517588e-06,
"loss": 0.0546,
"step": 8250
},
{
"epoch": 4.17,
"grad_norm": 2.3184828758239746,
"learning_rate": 9.21889447236181e-06,
"loss": 0.0551,
"step": 8275
},
{
"epoch": 4.18,
"grad_norm": 2.3341612815856934,
"learning_rate": 9.21638190954774e-06,
"loss": 0.0567,
"step": 8300
},
{
"epoch": 4.19,
"grad_norm": 2.4817066192626953,
"learning_rate": 9.213869346733669e-06,
"loss": 0.0531,
"step": 8325
},
{
"epoch": 4.21,
"grad_norm": 2.299858808517456,
"learning_rate": 9.211356783919598e-06,
"loss": 0.0545,
"step": 8350
},
{
"epoch": 4.22,
"grad_norm": 2.6612911224365234,
"learning_rate": 9.208844221105528e-06,
"loss": 0.0546,
"step": 8375
},
{
"epoch": 4.23,
"grad_norm": 2.7073473930358887,
"learning_rate": 9.206331658291459e-06,
"loss": 0.0551,
"step": 8400
},
{
"epoch": 4.24,
"grad_norm": 2.435814142227173,
"learning_rate": 9.203819095477388e-06,
"loss": 0.0538,
"step": 8425
},
{
"epoch": 4.26,
"grad_norm": 2.920555353164673,
"learning_rate": 9.201306532663317e-06,
"loss": 0.0581,
"step": 8450
},
{
"epoch": 4.27,
"grad_norm": 2.4426980018615723,
"learning_rate": 9.198793969849247e-06,
"loss": 0.0527,
"step": 8475
},
{
"epoch": 4.28,
"grad_norm": 2.282799243927002,
"learning_rate": 9.196281407035178e-06,
"loss": 0.0539,
"step": 8500
},
{
"epoch": 4.29,
"grad_norm": 2.3802311420440674,
"learning_rate": 9.193768844221105e-06,
"loss": 0.0544,
"step": 8525
},
{
"epoch": 4.31,
"grad_norm": 2.6062004566192627,
"learning_rate": 9.191256281407036e-06,
"loss": 0.0539,
"step": 8550
},
{
"epoch": 4.32,
"grad_norm": 2.3153014183044434,
"learning_rate": 9.188743718592966e-06,
"loss": 0.0576,
"step": 8575
},
{
"epoch": 4.33,
"grad_norm": 2.6936705112457275,
"learning_rate": 9.186231155778895e-06,
"loss": 0.0518,
"step": 8600
},
{
"epoch": 4.35,
"grad_norm": 2.5648863315582275,
"learning_rate": 9.183718592964826e-06,
"loss": 0.0585,
"step": 8625
},
{
"epoch": 4.36,
"grad_norm": 2.5685312747955322,
"learning_rate": 9.181206030150754e-06,
"loss": 0.057,
"step": 8650
},
{
"epoch": 4.37,
"grad_norm": 2.8490381240844727,
"learning_rate": 9.178693467336685e-06,
"loss": 0.0543,
"step": 8675
},
{
"epoch": 4.38,
"grad_norm": 2.2152018547058105,
"learning_rate": 9.176180904522614e-06,
"loss": 0.0563,
"step": 8700
},
{
"epoch": 4.4,
"grad_norm": 2.69919490814209,
"learning_rate": 9.173668341708543e-06,
"loss": 0.0553,
"step": 8725
},
{
"epoch": 4.41,
"grad_norm": 2.7225608825683594,
"learning_rate": 9.171155778894473e-06,
"loss": 0.0575,
"step": 8750
},
{
"epoch": 4.42,
"grad_norm": 2.559675455093384,
"learning_rate": 9.168643216080404e-06,
"loss": 0.0553,
"step": 8775
},
{
"epoch": 4.43,
"grad_norm": 2.781768798828125,
"learning_rate": 9.166130653266331e-06,
"loss": 0.0578,
"step": 8800
},
{
"epoch": 4.45,
"grad_norm": 2.8981781005859375,
"learning_rate": 9.163618090452262e-06,
"loss": 0.0542,
"step": 8825
},
{
"epoch": 4.46,
"grad_norm": 2.6946628093719482,
"learning_rate": 9.161105527638192e-06,
"loss": 0.059,
"step": 8850
},
{
"epoch": 4.47,
"grad_norm": 2.129403591156006,
"learning_rate": 9.158592964824121e-06,
"loss": 0.0509,
"step": 8875
},
{
"epoch": 4.48,
"grad_norm": 3.163231372833252,
"learning_rate": 9.156080402010052e-06,
"loss": 0.0549,
"step": 8900
},
{
"epoch": 4.5,
"grad_norm": 2.8405816555023193,
"learning_rate": 9.15356783919598e-06,
"loss": 0.0563,
"step": 8925
},
{
"epoch": 4.51,
"grad_norm": 2.1925594806671143,
"learning_rate": 9.15105527638191e-06,
"loss": 0.0553,
"step": 8950
},
{
"epoch": 4.52,
"grad_norm": 2.4375970363616943,
"learning_rate": 9.14854271356784e-06,
"loss": 0.0584,
"step": 8975
},
{
"epoch": 4.53,
"grad_norm": 2.4587666988372803,
"learning_rate": 9.14603015075377e-06,
"loss": 0.0553,
"step": 9000
},
{
"epoch": 4.53,
"eval_loss": 0.1637195497751236,
"eval_runtime": 651.0456,
"eval_samples_per_second": 2.164,
"eval_steps_per_second": 2.164,
"eval_wer": 24.344517468004153,
"step": 9000
},
{
"epoch": 4.55,
"grad_norm": 2.439188003540039,
"learning_rate": 9.1435175879397e-06,
"loss": 0.0542,
"step": 9025
},
{
"epoch": 4.56,
"grad_norm": NaN,
"learning_rate": 9.141105527638192e-06,
"loss": 0.0547,
"step": 9050
},
{
"epoch": 4.57,
"grad_norm": 2.4117727279663086,
"learning_rate": 9.138592964824121e-06,
"loss": 0.0526,
"step": 9075
},
{
"epoch": 4.58,
"grad_norm": 2.603896379470825,
"learning_rate": 9.136080402010052e-06,
"loss": 0.0527,
"step": 9100
},
{
"epoch": 4.6,
"grad_norm": 2.533517360687256,
"learning_rate": 9.13356783919598e-06,
"loss": 0.0566,
"step": 9125
},
{
"epoch": 4.61,
"grad_norm": 2.7762629985809326,
"learning_rate": 9.13105527638191e-06,
"loss": 0.0547,
"step": 9150
},
{
"epoch": 4.62,
"grad_norm": 2.8527615070343018,
"learning_rate": 9.12854271356784e-06,
"loss": 0.0558,
"step": 9175
},
{
"epoch": 4.63,
"grad_norm": 2.600090503692627,
"learning_rate": 9.12603015075377e-06,
"loss": 0.0503,
"step": 9200
},
{
"epoch": 4.65,
"grad_norm": 2.3100574016571045,
"learning_rate": 9.123618090452263e-06,
"loss": 0.0534,
"step": 9225
},
{
"epoch": 4.66,
"grad_norm": 2.8445324897766113,
"learning_rate": 9.121105527638192e-06,
"loss": 0.0538,
"step": 9250
},
{
"epoch": 4.67,
"grad_norm": 3.1814417839050293,
"learning_rate": 9.118592964824121e-06,
"loss": 0.0546,
"step": 9275
},
{
"epoch": 4.69,
"grad_norm": 2.835566759109497,
"learning_rate": 9.11608040201005e-06,
"loss": 0.0539,
"step": 9300
},
{
"epoch": 4.7,
"grad_norm": 2.6004786491394043,
"learning_rate": 9.11356783919598e-06,
"loss": 0.0497,
"step": 9325
},
{
"epoch": 4.71,
"grad_norm": 2.2775909900665283,
"learning_rate": 9.111055276381911e-06,
"loss": 0.053,
"step": 9350
},
{
"epoch": 4.72,
"grad_norm": 2.242342233657837,
"learning_rate": 9.10854271356784e-06,
"loss": 0.0492,
"step": 9375
},
{
"epoch": 4.74,
"grad_norm": 2.781096935272217,
"learning_rate": 9.10603015075377e-06,
"loss": 0.0559,
"step": 9400
},
{
"epoch": 4.75,
"grad_norm": 2.561607837677002,
"learning_rate": 9.1035175879397e-06,
"loss": 0.0537,
"step": 9425
},
{
"epoch": 4.76,
"grad_norm": 3.002260208129883,
"learning_rate": 9.101005025125628e-06,
"loss": 0.0564,
"step": 9450
},
{
"epoch": 4.77,
"grad_norm": 2.7912750244140625,
"learning_rate": 9.09849246231156e-06,
"loss": 0.0542,
"step": 9475
},
{
"epoch": 4.79,
"grad_norm": 2.549391508102417,
"learning_rate": 9.095979899497489e-06,
"loss": 0.0529,
"step": 9500
},
{
"epoch": 4.8,
"grad_norm": 2.707965612411499,
"learning_rate": 9.093467336683418e-06,
"loss": 0.0526,
"step": 9525
},
{
"epoch": 4.81,
"grad_norm": 2.5057213306427,
"learning_rate": 9.090954773869347e-06,
"loss": 0.0513,
"step": 9550
},
{
"epoch": 4.82,
"grad_norm": 3.1057217121124268,
"learning_rate": 9.088442211055277e-06,
"loss": 0.0515,
"step": 9575
},
{
"epoch": 4.84,
"grad_norm": 2.3811659812927246,
"learning_rate": 9.085929648241206e-06,
"loss": 0.0518,
"step": 9600
},
{
"epoch": 4.85,
"grad_norm": 2.412745714187622,
"learning_rate": 9.083417085427137e-06,
"loss": 0.053,
"step": 9625
},
{
"epoch": 4.86,
"grad_norm": 3.048144578933716,
"learning_rate": 9.080904522613066e-06,
"loss": 0.053,
"step": 9650
},
{
"epoch": 4.87,
"grad_norm": 2.3766226768493652,
"learning_rate": 9.078391959798996e-06,
"loss": 0.0526,
"step": 9675
},
{
"epoch": 4.89,
"grad_norm": 2.6831417083740234,
"learning_rate": 9.075879396984927e-06,
"loss": 0.0525,
"step": 9700
},
{
"epoch": 4.9,
"grad_norm": 2.7392430305480957,
"learning_rate": 9.073366834170854e-06,
"loss": 0.0548,
"step": 9725
},
{
"epoch": 4.91,
"grad_norm": 2.7497315406799316,
"learning_rate": 9.070854271356785e-06,
"loss": 0.0528,
"step": 9750
},
{
"epoch": 4.92,
"grad_norm": 2.9299416542053223,
"learning_rate": 9.068341708542715e-06,
"loss": 0.0542,
"step": 9775
},
{
"epoch": 4.94,
"grad_norm": 2.889383316040039,
"learning_rate": 9.065829145728644e-06,
"loss": 0.0527,
"step": 9800
},
{
"epoch": 4.95,
"grad_norm": 2.648606777191162,
"learning_rate": 9.063316582914573e-06,
"loss": 0.0513,
"step": 9825
},
{
"epoch": 4.96,
"grad_norm": 2.225612163543701,
"learning_rate": 9.060804020100502e-06,
"loss": 0.0526,
"step": 9850
},
{
"epoch": 4.97,
"grad_norm": 2.932143211364746,
"learning_rate": 9.058291457286433e-06,
"loss": 0.051,
"step": 9875
},
{
"epoch": 4.99,
"grad_norm": 2.607672691345215,
"learning_rate": 9.055778894472363e-06,
"loss": 0.053,
"step": 9900
},
{
"epoch": 5.0,
"grad_norm": 2.8472115993499756,
"learning_rate": 9.053266331658292e-06,
"loss": 0.0557,
"step": 9925
},
{
"epoch": 5.01,
"grad_norm": 2.171074151992798,
"learning_rate": 9.050753768844221e-06,
"loss": 0.0356,
"step": 9950
},
{
"epoch": 5.03,
"grad_norm": 1.950042486190796,
"learning_rate": 9.048241206030152e-06,
"loss": 0.0381,
"step": 9975
},
{
"epoch": 5.04,
"grad_norm": 1.7547545433044434,
"learning_rate": 9.04572864321608e-06,
"loss": 0.036,
"step": 10000
},
{
"epoch": 5.04,
"eval_loss": 0.16767631471157074,
"eval_runtime": 646.7265,
"eval_samples_per_second": 2.179,
"eval_steps_per_second": 2.179,
"eval_wer": 23.65963334486337,
"step": 10000
},
{
"epoch": 5.05,
"grad_norm": 1.8145438432693481,
"learning_rate": 9.043216080402011e-06,
"loss": 0.0339,
"step": 10025
},
{
"epoch": 5.06,
"grad_norm": 2.891932725906372,
"learning_rate": 9.04070351758794e-06,
"loss": 0.0351,
"step": 10050
},
{
"epoch": 5.08,
"grad_norm": 1.8098477125167847,
"learning_rate": 9.03819095477387e-06,
"loss": 0.0362,
"step": 10075
},
{
"epoch": 5.09,
"grad_norm": 2.386594772338867,
"learning_rate": 9.0356783919598e-06,
"loss": 0.0373,
"step": 10100
},
{
"epoch": 5.1,
"grad_norm": 2.028424024581909,
"learning_rate": 9.033165829145728e-06,
"loss": 0.0367,
"step": 10125
},
{
"epoch": 5.11,
"grad_norm": 2.1175694465637207,
"learning_rate": 9.03065326633166e-06,
"loss": 0.0386,
"step": 10150
},
{
"epoch": 5.13,
"grad_norm": 1.9647341966629028,
"learning_rate": 9.028140703517589e-06,
"loss": 0.0347,
"step": 10175
},
{
"epoch": 5.14,
"grad_norm": 2.252744197845459,
"learning_rate": 9.025628140703518e-06,
"loss": 0.0354,
"step": 10200
},
{
"epoch": 5.15,
"grad_norm": 2.485556125640869,
"learning_rate": 9.023115577889447e-06,
"loss": 0.0358,
"step": 10225
},
{
"epoch": 5.16,
"grad_norm": 2.0512161254882812,
"learning_rate": 9.020603015075378e-06,
"loss": 0.0365,
"step": 10250
},
{
"epoch": 5.18,
"grad_norm": 2.402486801147461,
"learning_rate": 9.018090452261308e-06,
"loss": 0.0337,
"step": 10275
},
{
"epoch": 5.19,
"grad_norm": 2.146170139312744,
"learning_rate": 9.015577889447237e-06,
"loss": 0.035,
"step": 10300
},
{
"epoch": 5.2,
"grad_norm": 2.5019354820251465,
"learning_rate": 9.013065326633166e-06,
"loss": 0.0378,
"step": 10325
},
{
"epoch": 5.21,
"grad_norm": 2.4359068870544434,
"learning_rate": 9.010552763819096e-06,
"loss": 0.0366,
"step": 10350
},
{
"epoch": 5.23,
"grad_norm": 2.1449923515319824,
"learning_rate": 9.008040201005027e-06,
"loss": 0.0364,
"step": 10375
},
{
"epoch": 5.24,
"grad_norm": 2.202234983444214,
"learning_rate": 9.005527638190954e-06,
"loss": 0.0378,
"step": 10400
},
{
"epoch": 5.25,
"grad_norm": 2.267660140991211,
"learning_rate": 9.003015075376885e-06,
"loss": 0.0358,
"step": 10425
},
{
"epoch": 5.26,
"grad_norm": 2.0055835247039795,
"learning_rate": 9.000502512562815e-06,
"loss": 0.0368,
"step": 10450
},
{
"epoch": 5.28,
"grad_norm": 1.9618968963623047,
"learning_rate": 8.997989949748744e-06,
"loss": 0.0351,
"step": 10475
},
{
"epoch": 5.29,
"grad_norm": 2.122114896774292,
"learning_rate": 8.995477386934675e-06,
"loss": 0.0356,
"step": 10500
},
{
"epoch": 5.3,
"grad_norm": 2.236201047897339,
"learning_rate": 8.992964824120604e-06,
"loss": 0.037,
"step": 10525
},
{
"epoch": 5.31,
"grad_norm": 2.286752939224243,
"learning_rate": 8.990452261306534e-06,
"loss": 0.0353,
"step": 10550
},
{
"epoch": 5.33,
"grad_norm": 2.0843496322631836,
"learning_rate": 8.987939698492463e-06,
"loss": 0.0343,
"step": 10575
},
{
"epoch": 5.34,
"grad_norm": 3.129362106323242,
"learning_rate": 8.985427135678392e-06,
"loss": 0.0376,
"step": 10600
},
{
"epoch": 5.35,
"grad_norm": 2.6233270168304443,
"learning_rate": 8.982914572864322e-06,
"loss": 0.0338,
"step": 10625
},
{
"epoch": 5.37,
"grad_norm": 2.1038076877593994,
"learning_rate": 8.980402010050253e-06,
"loss": 0.0367,
"step": 10650
},
{
"epoch": 5.38,
"grad_norm": 2.270951509475708,
"learning_rate": 8.977889447236182e-06,
"loss": 0.0371,
"step": 10675
},
{
"epoch": 5.39,
"grad_norm": 2.929248094558716,
"learning_rate": 8.975376884422111e-06,
"loss": 0.0375,
"step": 10700
},
{
"epoch": 5.4,
"grad_norm": 1.823087453842163,
"learning_rate": 8.97286432160804e-06,
"loss": 0.0357,
"step": 10725
},
{
"epoch": 5.42,
"grad_norm": 2.542175054550171,
"learning_rate": 8.97035175879397e-06,
"loss": 0.0355,
"step": 10750
},
{
"epoch": 5.43,
"grad_norm": 2.396777391433716,
"learning_rate": 8.967839195979901e-06,
"loss": 0.0369,
"step": 10775
},
{
"epoch": 5.44,
"grad_norm": 2.5692787170410156,
"learning_rate": 8.96532663316583e-06,
"loss": 0.0369,
"step": 10800
},
{
"epoch": 5.45,
"grad_norm": 2.5763325691223145,
"learning_rate": 8.96281407035176e-06,
"loss": 0.0356,
"step": 10825
},
{
"epoch": 5.47,
"grad_norm": 1.9143195152282715,
"learning_rate": 8.960301507537689e-06,
"loss": 0.0361,
"step": 10850
},
{
"epoch": 5.48,
"grad_norm": 2.463517904281616,
"learning_rate": 8.957788944723618e-06,
"loss": 0.0347,
"step": 10875
},
{
"epoch": 5.49,
"grad_norm": 2.0476324558258057,
"learning_rate": 8.95527638190955e-06,
"loss": 0.0389,
"step": 10900
},
{
"epoch": 5.5,
"grad_norm": 3.286231517791748,
"learning_rate": 8.952763819095479e-06,
"loss": 0.0353,
"step": 10925
},
{
"epoch": 5.52,
"grad_norm": 2.1984260082244873,
"learning_rate": 8.950251256281408e-06,
"loss": 0.0366,
"step": 10950
},
{
"epoch": 5.53,
"grad_norm": 2.0697944164276123,
"learning_rate": 8.947738693467337e-06,
"loss": 0.0356,
"step": 10975
},
{
"epoch": 5.54,
"grad_norm": 2.1701056957244873,
"learning_rate": 8.945226130653267e-06,
"loss": 0.0349,
"step": 11000
},
{
"epoch": 5.54,
"eval_loss": 0.17827929556369781,
"eval_runtime": 649.3211,
"eval_samples_per_second": 2.17,
"eval_steps_per_second": 2.17,
"eval_wer": 24.047042545831893,
"step": 11000
},
{
"epoch": 5.55,
"grad_norm": 2.3994946479797363,
"learning_rate": 8.942713567839196e-06,
"loss": 0.0345,
"step": 11025
},
{
"epoch": 5.57,
"grad_norm": 2.7759196758270264,
"learning_rate": 8.940201005025127e-06,
"loss": 0.0357,
"step": 11050
},
{
"epoch": 5.58,
"grad_norm": 2.57523775100708,
"learning_rate": 8.937688442211056e-06,
"loss": 0.0327,
"step": 11075
},
{
"epoch": 5.59,
"grad_norm": 2.1448755264282227,
"learning_rate": 8.935175879396986e-06,
"loss": 0.0372,
"step": 11100
},
{
"epoch": 5.6,
"grad_norm": 2.378547191619873,
"learning_rate": 8.932663316582915e-06,
"loss": 0.0357,
"step": 11125
},
{
"epoch": 5.62,
"grad_norm": 2.524625539779663,
"learning_rate": 8.930150753768844e-06,
"loss": 0.0366,
"step": 11150
},
{
"epoch": 5.63,
"grad_norm": 2.485322952270508,
"learning_rate": 8.927638190954775e-06,
"loss": 0.0347,
"step": 11175
},
{
"epoch": 5.64,
"grad_norm": 2.4604809284210205,
"learning_rate": 8.925125628140705e-06,
"loss": 0.0356,
"step": 11200
},
{
"epoch": 5.65,
"grad_norm": 2.805788516998291,
"learning_rate": 8.922613065326634e-06,
"loss": 0.0378,
"step": 11225
},
{
"epoch": 5.67,
"grad_norm": 2.620722770690918,
"learning_rate": 8.920100502512563e-06,
"loss": 0.0352,
"step": 11250
},
{
"epoch": 5.68,
"grad_norm": 2.9701807498931885,
"learning_rate": 8.917587939698493e-06,
"loss": 0.036,
"step": 11275
},
{
"epoch": 5.69,
"grad_norm": 2.5234711170196533,
"learning_rate": 8.915075376884424e-06,
"loss": 0.0392,
"step": 11300
},
{
"epoch": 5.71,
"grad_norm": 2.4073734283447266,
"learning_rate": 8.912562814070353e-06,
"loss": 0.0373,
"step": 11325
},
{
"epoch": 5.72,
"grad_norm": 2.699392318725586,
"learning_rate": 8.910050251256282e-06,
"loss": 0.0377,
"step": 11350
},
{
"epoch": 5.73,
"grad_norm": 2.1058201789855957,
"learning_rate": 8.907537688442212e-06,
"loss": 0.0353,
"step": 11375
},
{
"epoch": 5.74,
"grad_norm": 2.494295597076416,
"learning_rate": 8.905025125628143e-06,
"loss": 0.0365,
"step": 11400
},
{
"epoch": 5.76,
"grad_norm": 2.452155828475952,
"learning_rate": 8.90251256281407e-06,
"loss": 0.0356,
"step": 11425
},
{
"epoch": 5.77,
"grad_norm": 2.7919886112213135,
"learning_rate": 8.900000000000001e-06,
"loss": 0.035,
"step": 11450
},
{
"epoch": 5.78,
"grad_norm": 2.2973413467407227,
"learning_rate": 8.89748743718593e-06,
"loss": 0.034,
"step": 11475
},
{
"epoch": 5.79,
"grad_norm": 2.4735491275787354,
"learning_rate": 8.89497487437186e-06,
"loss": 0.0346,
"step": 11500
},
{
"epoch": 5.81,
"grad_norm": 2.2433793544769287,
"learning_rate": 8.892462311557791e-06,
"loss": 0.0357,
"step": 11525
},
{
"epoch": 5.82,
"grad_norm": 2.586491107940674,
"learning_rate": 8.889949748743718e-06,
"loss": 0.0373,
"step": 11550
},
{
"epoch": 5.83,
"grad_norm": 2.7574408054351807,
"learning_rate": 8.88743718592965e-06,
"loss": 0.0368,
"step": 11575
},
{
"epoch": 5.84,
"grad_norm": 2.4347455501556396,
"learning_rate": 8.884924623115579e-06,
"loss": 0.0377,
"step": 11600
},
{
"epoch": 5.86,
"grad_norm": 2.858201503753662,
"learning_rate": 8.882412060301508e-06,
"loss": 0.036,
"step": 11625
},
{
"epoch": 5.87,
"grad_norm": 2.416962146759033,
"learning_rate": 8.879899497487437e-06,
"loss": 0.0356,
"step": 11650
},
{
"epoch": 5.88,
"grad_norm": 2.5054562091827393,
"learning_rate": 8.877386934673368e-06,
"loss": 0.0356,
"step": 11675
},
{
"epoch": 5.89,
"grad_norm": 2.83569073677063,
"learning_rate": 8.874874371859296e-06,
"loss": 0.0354,
"step": 11700
},
{
"epoch": 5.91,
"grad_norm": 3.158905029296875,
"learning_rate": 8.872361809045227e-06,
"loss": 0.0348,
"step": 11725
},
{
"epoch": 5.92,
"grad_norm": 2.396923780441284,
"learning_rate": 8.869849246231156e-06,
"loss": 0.0339,
"step": 11750
},
{
"epoch": 5.93,
"grad_norm": 2.8449628353118896,
"learning_rate": 8.867336683417086e-06,
"loss": 0.037,
"step": 11775
},
{
"epoch": 5.94,
"grad_norm": 2.5769689083099365,
"learning_rate": 8.864824120603017e-06,
"loss": 0.0354,
"step": 11800
},
{
"epoch": 5.96,
"grad_norm": 2.1106691360473633,
"learning_rate": 8.862311557788944e-06,
"loss": 0.0343,
"step": 11825
},
{
"epoch": 5.97,
"grad_norm": 2.392437696456909,
"learning_rate": 8.859798994974875e-06,
"loss": 0.0353,
"step": 11850
},
{
"epoch": 5.98,
"grad_norm": 2.2878997325897217,
"learning_rate": 8.857286432160805e-06,
"loss": 0.0354,
"step": 11875
},
{
"epoch": 5.99,
"grad_norm": 2.6576852798461914,
"learning_rate": 8.854773869346734e-06,
"loss": 0.0363,
"step": 11900
},
{
"epoch": 6.01,
"grad_norm": 1.76813542842865,
"learning_rate": 8.852261306532665e-06,
"loss": 0.0284,
"step": 11925
},
{
"epoch": 6.02,
"grad_norm": 1.7315902709960938,
"learning_rate": 8.849748743718594e-06,
"loss": 0.0213,
"step": 11950
},
{
"epoch": 6.03,
"grad_norm": 2.2705891132354736,
"learning_rate": 8.847236180904524e-06,
"loss": 0.0219,
"step": 11975
},
{
"epoch": 6.05,
"grad_norm": 1.689342737197876,
"learning_rate": 8.844723618090453e-06,
"loss": 0.0231,
"step": 12000
},
{
"epoch": 6.05,
"eval_loss": 0.18743818998336792,
"eval_runtime": 649.1371,
"eval_samples_per_second": 2.171,
"eval_steps_per_second": 2.171,
"eval_wer": 23.784157730888968,
"step": 12000
},
{
"epoch": 6.06,
"grad_norm": 1.7385754585266113,
"learning_rate": 8.842211055276382e-06,
"loss": 0.0216,
"step": 12025
},
{
"epoch": 6.07,
"grad_norm": 1.7763196229934692,
"learning_rate": 8.839698492462312e-06,
"loss": 0.0225,
"step": 12050
},
{
"epoch": 6.08,
"grad_norm": 1.9993950128555298,
"learning_rate": 8.837185929648243e-06,
"loss": 0.0232,
"step": 12075
},
{
"epoch": 6.1,
"grad_norm": 1.7563095092773438,
"learning_rate": 8.83467336683417e-06,
"loss": 0.0224,
"step": 12100
},
{
"epoch": 6.11,
"grad_norm": 1.6103582382202148,
"learning_rate": 8.832160804020101e-06,
"loss": 0.0225,
"step": 12125
},
{
"epoch": 6.12,
"grad_norm": 1.9436490535736084,
"learning_rate": 8.82964824120603e-06,
"loss": 0.021,
"step": 12150
},
{
"epoch": 6.13,
"grad_norm": 2.0505597591400146,
"learning_rate": 8.82713567839196e-06,
"loss": 0.0221,
"step": 12175
},
{
"epoch": 6.15,
"grad_norm": 1.8634746074676514,
"learning_rate": 8.824623115577891e-06,
"loss": 0.0238,
"step": 12200
},
{
"epoch": 6.16,
"grad_norm": 2.337662696838379,
"learning_rate": 8.82211055276382e-06,
"loss": 0.0227,
"step": 12225
},
{
"epoch": 6.17,
"grad_norm": 2.1642353534698486,
"learning_rate": 8.81959798994975e-06,
"loss": 0.0224,
"step": 12250
},
{
"epoch": 6.18,
"grad_norm": 1.816988468170166,
"learning_rate": 8.817085427135679e-06,
"loss": 0.0238,
"step": 12275
},
{
"epoch": 6.2,
"grad_norm": 1.9968360662460327,
"learning_rate": 8.814572864321608e-06,
"loss": 0.0234,
"step": 12300
},
{
"epoch": 6.21,
"grad_norm": 2.105729579925537,
"learning_rate": 8.812060301507538e-06,
"loss": 0.0231,
"step": 12325
},
{
"epoch": 6.22,
"grad_norm": 2.2226688861846924,
"learning_rate": 8.809547738693469e-06,
"loss": 0.023,
"step": 12350
},
{
"epoch": 6.23,
"grad_norm": 1.8855944871902466,
"learning_rate": 8.807035175879398e-06,
"loss": 0.0215,
"step": 12375
},
{
"epoch": 6.25,
"grad_norm": 1.9994747638702393,
"learning_rate": 8.804522613065327e-06,
"loss": 0.0242,
"step": 12400
},
{
"epoch": 6.26,
"grad_norm": 1.9194012880325317,
"learning_rate": 8.802010050251257e-06,
"loss": 0.022,
"step": 12425
},
{
"epoch": 6.27,
"grad_norm": 2.565969944000244,
"learning_rate": 8.799497487437186e-06,
"loss": 0.0235,
"step": 12450
},
{
"epoch": 6.28,
"grad_norm": 2.2084851264953613,
"learning_rate": 8.796984924623117e-06,
"loss": 0.0223,
"step": 12475
},
{
"epoch": 6.3,
"grad_norm": 2.1252388954162598,
"learning_rate": 8.794472361809046e-06,
"loss": 0.0241,
"step": 12500
},
{
"epoch": 6.31,
"grad_norm": 2.299900531768799,
"learning_rate": 8.791959798994976e-06,
"loss": 0.0238,
"step": 12525
},
{
"epoch": 6.32,
"grad_norm": 2.038466215133667,
"learning_rate": 8.789447236180905e-06,
"loss": 0.0217,
"step": 12550
},
{
"epoch": 6.34,
"grad_norm": 2.136720657348633,
"learning_rate": 8.786934673366834e-06,
"loss": 0.0232,
"step": 12575
},
{
"epoch": 6.35,
"grad_norm": 2.5748748779296875,
"learning_rate": 8.784422110552765e-06,
"loss": 0.0238,
"step": 12600
},
{
"epoch": 6.36,
"grad_norm": 3.1436619758605957,
"learning_rate": 8.781909547738695e-06,
"loss": 0.025,
"step": 12625
},
{
"epoch": 6.37,
"grad_norm": 1.8555638790130615,
"learning_rate": 8.779396984924624e-06,
"loss": 0.0237,
"step": 12650
},
{
"epoch": 6.39,
"grad_norm": 1.8384400606155396,
"learning_rate": 8.776884422110553e-06,
"loss": 0.0239,
"step": 12675
},
{
"epoch": 6.4,
"grad_norm": 2.0759224891662598,
"learning_rate": 8.774371859296483e-06,
"loss": 0.0215,
"step": 12700
},
{
"epoch": 6.41,
"grad_norm": 2.09233021736145,
"learning_rate": 8.771859296482412e-06,
"loss": 0.0246,
"step": 12725
},
{
"epoch": 6.42,
"grad_norm": 2.437635660171509,
"learning_rate": 8.769346733668343e-06,
"loss": 0.0229,
"step": 12750
},
{
"epoch": 6.44,
"grad_norm": 2.3498973846435547,
"learning_rate": 8.766834170854272e-06,
"loss": 0.0237,
"step": 12775
},
{
"epoch": 6.45,
"grad_norm": 2.1013572216033936,
"learning_rate": 8.764321608040202e-06,
"loss": 0.0233,
"step": 12800
},
{
"epoch": 6.46,
"grad_norm": 2.2095558643341064,
"learning_rate": 8.761809045226131e-06,
"loss": 0.0236,
"step": 12825
},
{
"epoch": 6.47,
"grad_norm": 3.4572582244873047,
"learning_rate": 8.75929648241206e-06,
"loss": 0.0219,
"step": 12850
},
{
"epoch": 6.49,
"grad_norm": 2.6476633548736572,
"learning_rate": 8.756783919597991e-06,
"loss": 0.0231,
"step": 12875
},
{
"epoch": 6.5,
"grad_norm": 2.358466863632202,
"learning_rate": 8.75427135678392e-06,
"loss": 0.0227,
"step": 12900
},
{
"epoch": 6.51,
"grad_norm": 2.037827253341675,
"learning_rate": 8.75175879396985e-06,
"loss": 0.024,
"step": 12925
},
{
"epoch": 6.52,
"grad_norm": 2.3307857513427734,
"learning_rate": 8.74924623115578e-06,
"loss": 0.022,
"step": 12950
},
{
"epoch": 6.54,
"grad_norm": 2.1784884929656982,
"learning_rate": 8.746733668341709e-06,
"loss": 0.0218,
"step": 12975
},
{
"epoch": 6.55,
"grad_norm": 2.4882514476776123,
"learning_rate": 8.74422110552764e-06,
"loss": 0.023,
"step": 13000
},
{
"epoch": 6.55,
"eval_loss": 0.20282986760139465,
"eval_runtime": 651.7625,
"eval_samples_per_second": 2.162,
"eval_steps_per_second": 2.162,
"eval_wer": 24.57973019716361,
"step": 13000
},
{
"epoch": 6.56,
"grad_norm": 3.0175468921661377,
"learning_rate": 8.741809045226131e-06,
"loss": 0.0246,
"step": 13025
},
{
"epoch": 6.57,
"grad_norm": 2.1591644287109375,
"learning_rate": 8.73929648241206e-06,
"loss": 0.0238,
"step": 13050
},
{
"epoch": 6.59,
"grad_norm": 2.0232603549957275,
"learning_rate": 8.736783919597991e-06,
"loss": 0.0231,
"step": 13075
},
{
"epoch": 6.6,
"grad_norm": 1.9856449365615845,
"learning_rate": 8.734271356783919e-06,
"loss": 0.023,
"step": 13100
},
{
"epoch": 6.61,
"grad_norm": 1.9815651178359985,
"learning_rate": 8.73175879396985e-06,
"loss": 0.0237,
"step": 13125
},
{
"epoch": 6.62,
"grad_norm": 1.602400302886963,
"learning_rate": 8.72924623115578e-06,
"loss": 0.0255,
"step": 13150
},
{
"epoch": 6.64,
"grad_norm": 2.4619295597076416,
"learning_rate": 8.726733668341709e-06,
"loss": 0.0226,
"step": 13175
},
{
"epoch": 6.65,
"grad_norm": 2.190075397491455,
"learning_rate": 8.72422110552764e-06,
"loss": 0.0245,
"step": 13200
},
{
"epoch": 6.66,
"grad_norm": 1.8968470096588135,
"learning_rate": 8.721708542713569e-06,
"loss": 0.0221,
"step": 13225
},
{
"epoch": 6.68,
"grad_norm": 2.0752451419830322,
"learning_rate": 8.719195979899498e-06,
"loss": 0.0227,
"step": 13250
},
{
"epoch": 6.69,
"grad_norm": 1.8338621854782104,
"learning_rate": 8.716683417085428e-06,
"loss": 0.0231,
"step": 13275
},
{
"epoch": 6.7,
"grad_norm": 2.2742509841918945,
"learning_rate": 8.714170854271357e-06,
"loss": 0.0247,
"step": 13300
},
{
"epoch": 6.71,
"grad_norm": 2.536423921585083,
"learning_rate": 8.711658291457286e-06,
"loss": 0.0234,
"step": 13325
},
{
"epoch": 6.73,
"grad_norm": 1.7209787368774414,
"learning_rate": 8.709145728643217e-06,
"loss": 0.0237,
"step": 13350
},
{
"epoch": 6.74,
"grad_norm": 2.257042646408081,
"learning_rate": 8.706633165829147e-06,
"loss": 0.0229,
"step": 13375
},
{
"epoch": 6.75,
"grad_norm": 1.924156665802002,
"learning_rate": 8.704120603015076e-06,
"loss": 0.0212,
"step": 13400
},
{
"epoch": 6.76,
"grad_norm": 2.343059778213501,
"learning_rate": 8.701608040201005e-06,
"loss": 0.0236,
"step": 13425
},
{
"epoch": 6.78,
"grad_norm": 2.157313108444214,
"learning_rate": 8.699095477386935e-06,
"loss": 0.0214,
"step": 13450
},
{
"epoch": 6.79,
"grad_norm": 2.0653934478759766,
"learning_rate": 8.696582914572866e-06,
"loss": 0.024,
"step": 13475
},
{
"epoch": 6.8,
"grad_norm": 2.4266180992126465,
"learning_rate": 8.694070351758795e-06,
"loss": 0.0239,
"step": 13500
},
{
"epoch": 6.81,
"grad_norm": 2.243062973022461,
"learning_rate": 8.691557788944724e-06,
"loss": 0.0224,
"step": 13525
},
{
"epoch": 6.83,
"grad_norm": 2.309316873550415,
"learning_rate": 8.689045226130654e-06,
"loss": 0.0238,
"step": 13550
},
{
"epoch": 6.84,
"grad_norm": 1.8502180576324463,
"learning_rate": 8.686532663316583e-06,
"loss": 0.0214,
"step": 13575
},
{
"epoch": 6.85,
"grad_norm": 2.0743794441223145,
"learning_rate": 8.684020100502514e-06,
"loss": 0.0239,
"step": 13600
},
{
"epoch": 6.86,
"grad_norm": 2.225356101989746,
"learning_rate": 8.681507537688443e-06,
"loss": 0.0212,
"step": 13625
},
{
"epoch": 6.88,
"grad_norm": 2.3651390075683594,
"learning_rate": 8.678994974874373e-06,
"loss": 0.0246,
"step": 13650
},
{
"epoch": 6.89,
"grad_norm": 2.4614925384521484,
"learning_rate": 8.676482412060302e-06,
"loss": 0.0237,
"step": 13675
},
{
"epoch": 6.9,
"grad_norm": 2.4582881927490234,
"learning_rate": 8.673969849246231e-06,
"loss": 0.0232,
"step": 13700
},
{
"epoch": 6.91,
"grad_norm": 2.3366026878356934,
"learning_rate": 8.67145728643216e-06,
"loss": 0.0236,
"step": 13725
},
{
"epoch": 6.93,
"grad_norm": 2.092181444168091,
"learning_rate": 8.668944723618092e-06,
"loss": 0.0237,
"step": 13750
},
{
"epoch": 6.94,
"grad_norm": 2.092453956604004,
"learning_rate": 8.666432160804021e-06,
"loss": 0.022,
"step": 13775
},
{
"epoch": 6.95,
"grad_norm": 2.475292682647705,
"learning_rate": 8.66391959798995e-06,
"loss": 0.0229,
"step": 13800
},
{
"epoch": 6.96,
"grad_norm": 2.873953104019165,
"learning_rate": 8.661407035175881e-06,
"loss": 0.0231,
"step": 13825
},
{
"epoch": 6.98,
"grad_norm": 2.619523763656616,
"learning_rate": 8.658894472361809e-06,
"loss": 0.0252,
"step": 13850
},
{
"epoch": 6.99,
"grad_norm": 2.5735602378845215,
"learning_rate": 8.65638190954774e-06,
"loss": 0.0231,
"step": 13875
},
{
"epoch": 7.0,
"grad_norm": 1.6024060249328613,
"learning_rate": 8.65386934673367e-06,
"loss": 0.0213,
"step": 13900
},
{
"epoch": 7.02,
"grad_norm": 1.5747658014297485,
"learning_rate": 8.651356783919599e-06,
"loss": 0.0136,
"step": 13925
},
{
"epoch": 7.03,
"grad_norm": 1.8268576860427856,
"learning_rate": 8.648844221105528e-06,
"loss": 0.0146,
"step": 13950
},
{
"epoch": 7.04,
"grad_norm": 1.4051076173782349,
"learning_rate": 8.646331658291457e-06,
"loss": 0.0145,
"step": 13975
},
{
"epoch": 7.05,
"grad_norm": 1.6383821964263916,
"learning_rate": 8.643819095477388e-06,
"loss": 0.0145,
"step": 14000
},
{
"epoch": 7.05,
"eval_loss": 0.20823825895786285,
"eval_runtime": 650.647,
"eval_samples_per_second": 2.166,
"eval_steps_per_second": 2.166,
"eval_wer": 24.088550674507093,
"step": 14000
},
{
"epoch": 7.07,
"grad_norm": 1.911106824874878,
"learning_rate": 8.641306532663318e-06,
"loss": 0.014,
"step": 14025
},
{
"epoch": 7.08,
"grad_norm": 1.6214771270751953,
"learning_rate": 8.638793969849247e-06,
"loss": 0.0141,
"step": 14050
},
{
"epoch": 7.09,
"grad_norm": 1.3229256868362427,
"learning_rate": 8.636281407035176e-06,
"loss": 0.0147,
"step": 14075
},
{
"epoch": 7.1,
"grad_norm": 1.4375226497650146,
"learning_rate": 8.633768844221107e-06,
"loss": 0.0137,
"step": 14100
},
{
"epoch": 7.12,
"grad_norm": 1.7731012105941772,
"learning_rate": 8.631256281407035e-06,
"loss": 0.0138,
"step": 14125
},
{
"epoch": 7.13,
"grad_norm": 1.825411319732666,
"learning_rate": 8.628743718592966e-06,
"loss": 0.0141,
"step": 14150
},
{
"epoch": 7.14,
"grad_norm": 2.0631155967712402,
"learning_rate": 8.626231155778895e-06,
"loss": 0.0128,
"step": 14175
},
{
"epoch": 7.15,
"grad_norm": 1.8785593509674072,
"learning_rate": 8.623718592964825e-06,
"loss": 0.0135,
"step": 14200
},
{
"epoch": 7.17,
"grad_norm": 1.9748233556747437,
"learning_rate": 8.621206030150756e-06,
"loss": 0.0135,
"step": 14225
},
{
"epoch": 7.18,
"grad_norm": 1.8081029653549194,
"learning_rate": 8.618693467336683e-06,
"loss": 0.0148,
"step": 14250
},
{
"epoch": 7.19,
"grad_norm": 1.8967790603637695,
"learning_rate": 8.616180904522614e-06,
"loss": 0.0152,
"step": 14275
},
{
"epoch": 7.2,
"grad_norm": 1.9241995811462402,
"learning_rate": 8.613668341708544e-06,
"loss": 0.0144,
"step": 14300
},
{
"epoch": 7.22,
"grad_norm": 1.5693718194961548,
"learning_rate": 8.611155778894473e-06,
"loss": 0.015,
"step": 14325
},
{
"epoch": 7.23,
"grad_norm": 1.7336968183517456,
"learning_rate": 8.608643216080402e-06,
"loss": 0.0146,
"step": 14350
},
{
"epoch": 7.24,
"grad_norm": 1.6103991270065308,
"learning_rate": 8.606130653266333e-06,
"loss": 0.0143,
"step": 14375
},
{
"epoch": 7.25,
"grad_norm": 2.3941450119018555,
"learning_rate": 8.60361809045226e-06,
"loss": 0.0159,
"step": 14400
},
{
"epoch": 7.27,
"grad_norm": 1.8727612495422363,
"learning_rate": 8.601105527638192e-06,
"loss": 0.0136,
"step": 14425
},
{
"epoch": 7.28,
"grad_norm": 1.4570417404174805,
"learning_rate": 8.598592964824121e-06,
"loss": 0.0149,
"step": 14450
},
{
"epoch": 7.29,
"grad_norm": 2.069019317626953,
"learning_rate": 8.59608040201005e-06,
"loss": 0.0139,
"step": 14475
},
{
"epoch": 7.3,
"grad_norm": 2.3598926067352295,
"learning_rate": 8.593567839195981e-06,
"loss": 0.0148,
"step": 14500
},
{
"epoch": 7.32,
"grad_norm": 1.342768907546997,
"learning_rate": 8.591055276381909e-06,
"loss": 0.0139,
"step": 14525
},
{
"epoch": 7.33,
"grad_norm": 1.2774055004119873,
"learning_rate": 8.58854271356784e-06,
"loss": 0.0153,
"step": 14550
},
{
"epoch": 7.34,
"grad_norm": 1.7923460006713867,
"learning_rate": 8.58603015075377e-06,
"loss": 0.0137,
"step": 14575
},
{
"epoch": 7.36,
"grad_norm": 1.9054781198501587,
"learning_rate": 8.583618090452261e-06,
"loss": 0.015,
"step": 14600
},
{
"epoch": 7.37,
"grad_norm": 1.9677022695541382,
"learning_rate": 8.581105527638192e-06,
"loss": 0.0151,
"step": 14625
},
{
"epoch": 7.38,
"grad_norm": 1.8171250820159912,
"learning_rate": 8.578592964824121e-06,
"loss": 0.0145,
"step": 14650
},
{
"epoch": 7.39,
"grad_norm": 1.4638991355895996,
"learning_rate": 8.57608040201005e-06,
"loss": 0.0138,
"step": 14675
},
{
"epoch": 7.41,
"grad_norm": 2.3554527759552,
"learning_rate": 8.573567839195982e-06,
"loss": 0.0148,
"step": 14700
},
{
"epoch": 7.42,
"grad_norm": 1.273007869720459,
"learning_rate": 8.57105527638191e-06,
"loss": 0.0145,
"step": 14725
},
{
"epoch": 7.43,
"grad_norm": 2.289254665374756,
"learning_rate": 8.56854271356784e-06,
"loss": 0.015,
"step": 14750
},
{
"epoch": 7.44,
"grad_norm": 1.6287271976470947,
"learning_rate": 8.56603015075377e-06,
"loss": 0.0153,
"step": 14775
},
{
"epoch": 7.46,
"grad_norm": 2.6135382652282715,
"learning_rate": 8.563517587939699e-06,
"loss": 0.0151,
"step": 14800
},
{
"epoch": 7.47,
"grad_norm": 3.0830838680267334,
"learning_rate": 8.56100502512563e-06,
"loss": 0.0161,
"step": 14825
},
{
"epoch": 7.48,
"grad_norm": 2.3788559436798096,
"learning_rate": 8.558492462311558e-06,
"loss": 0.0149,
"step": 14850
},
{
"epoch": 7.49,
"grad_norm": 1.2800523042678833,
"learning_rate": 8.555979899497489e-06,
"loss": 0.0146,
"step": 14875
},
{
"epoch": 7.51,
"grad_norm": 2.244983434677124,
"learning_rate": 8.553467336683418e-06,
"loss": 0.0151,
"step": 14900
},
{
"epoch": 7.52,
"grad_norm": 2.2439663410186768,
"learning_rate": 8.550954773869347e-06,
"loss": 0.0154,
"step": 14925
},
{
"epoch": 7.53,
"grad_norm": 1.619199514389038,
"learning_rate": 8.548442211055277e-06,
"loss": 0.0149,
"step": 14950
},
{
"epoch": 7.54,
"grad_norm": 2.012608289718628,
"learning_rate": 8.545929648241208e-06,
"loss": 0.0161,
"step": 14975
},
{
"epoch": 7.56,
"grad_norm": 1.9494653940200806,
"learning_rate": 8.543417085427135e-06,
"loss": 0.0151,
"step": 15000
},
{
"epoch": 7.56,
"eval_loss": 0.21707138419151306,
"eval_runtime": 647.0406,
"eval_samples_per_second": 2.178,
"eval_steps_per_second": 2.178,
"eval_wer": 24.047042545831893,
"step": 15000
},
{
"epoch": 7.57,
"grad_norm": 1.4061529636383057,
"learning_rate": 8.540904522613066e-06,
"loss": 0.0143,
"step": 15025
},
{
"epoch": 7.58,
"grad_norm": 1.8309212923049927,
"learning_rate": 8.538391959798996e-06,
"loss": 0.0149,
"step": 15050
},
{
"epoch": 7.59,
"grad_norm": 2.8870351314544678,
"learning_rate": 8.535879396984925e-06,
"loss": 0.0148,
"step": 15075
},
{
"epoch": 7.61,
"grad_norm": 2.2940802574157715,
"learning_rate": 8.533366834170856e-06,
"loss": 0.015,
"step": 15100
},
{
"epoch": 7.62,
"grad_norm": 2.183642625808716,
"learning_rate": 8.530854271356784e-06,
"loss": 0.0144,
"step": 15125
},
{
"epoch": 7.63,
"grad_norm": 1.9402830600738525,
"learning_rate": 8.528341708542715e-06,
"loss": 0.0147,
"step": 15150
},
{
"epoch": 7.64,
"grad_norm": 2.082012414932251,
"learning_rate": 8.525829145728644e-06,
"loss": 0.0161,
"step": 15175
},
{
"epoch": 7.66,
"grad_norm": 1.9440219402313232,
"learning_rate": 8.523316582914573e-06,
"loss": 0.0143,
"step": 15200
},
{
"epoch": 7.67,
"grad_norm": 1.9995568990707397,
"learning_rate": 8.520804020100503e-06,
"loss": 0.0157,
"step": 15225
},
{
"epoch": 7.68,
"grad_norm": 2.117058515548706,
"learning_rate": 8.518291457286434e-06,
"loss": 0.0145,
"step": 15250
},
{
"epoch": 7.7,
"grad_norm": 1.6028704643249512,
"learning_rate": 8.515778894472363e-06,
"loss": 0.0146,
"step": 15275
},
{
"epoch": 7.71,
"grad_norm": 2.337704658508301,
"learning_rate": 8.513266331658292e-06,
"loss": 0.0161,
"step": 15300
},
{
"epoch": 7.72,
"grad_norm": 2.1473734378814697,
"learning_rate": 8.510753768844222e-06,
"loss": 0.015,
"step": 15325
},
{
"epoch": 7.73,
"grad_norm": 2.084581136703491,
"learning_rate": 8.508241206030151e-06,
"loss": 0.0169,
"step": 15350
},
{
"epoch": 7.75,
"grad_norm": 1.7122375965118408,
"learning_rate": 8.505728643216082e-06,
"loss": 0.0158,
"step": 15375
},
{
"epoch": 7.76,
"grad_norm": 1.8660895824432373,
"learning_rate": 8.50321608040201e-06,
"loss": 0.0159,
"step": 15400
},
{
"epoch": 7.77,
"grad_norm": 1.6958028078079224,
"learning_rate": 8.50070351758794e-06,
"loss": 0.0157,
"step": 15425
},
{
"epoch": 7.78,
"grad_norm": 1.7803666591644287,
"learning_rate": 8.49819095477387e-06,
"loss": 0.016,
"step": 15450
},
{
"epoch": 7.8,
"grad_norm": 2.085075855255127,
"learning_rate": 8.4956783919598e-06,
"loss": 0.0153,
"step": 15475
},
{
"epoch": 7.81,
"grad_norm": 1.9727402925491333,
"learning_rate": 8.49316582914573e-06,
"loss": 0.0142,
"step": 15500
},
{
"epoch": 7.82,
"grad_norm": 2.111431837081909,
"learning_rate": 8.49065326633166e-06,
"loss": 0.0151,
"step": 15525
},
{
"epoch": 7.83,
"grad_norm": 1.8781590461730957,
"learning_rate": 8.488140703517589e-06,
"loss": 0.0149,
"step": 15550
},
{
"epoch": 7.85,
"grad_norm": 1.771287441253662,
"learning_rate": 8.485628140703518e-06,
"loss": 0.0147,
"step": 15575
},
{
"epoch": 7.86,
"grad_norm": 2.1958467960357666,
"learning_rate": 8.483115577889447e-06,
"loss": 0.0157,
"step": 15600
},
{
"epoch": 7.87,
"grad_norm": 2.4910318851470947,
"learning_rate": 8.480603015075377e-06,
"loss": 0.0154,
"step": 15625
},
{
"epoch": 7.88,
"grad_norm": 2.2781054973602295,
"learning_rate": 8.478090452261308e-06,
"loss": 0.0155,
"step": 15650
},
{
"epoch": 7.9,
"grad_norm": 2.349332332611084,
"learning_rate": 8.475577889447237e-06,
"loss": 0.015,
"step": 15675
},
{
"epoch": 7.91,
"grad_norm": 1.707576870918274,
"learning_rate": 8.473065326633166e-06,
"loss": 0.0159,
"step": 15700
},
{
"epoch": 7.92,
"grad_norm": 2.0872557163238525,
"learning_rate": 8.470552763819096e-06,
"loss": 0.0158,
"step": 15725
},
{
"epoch": 7.93,
"grad_norm": 2.1867074966430664,
"learning_rate": 8.468040201005025e-06,
"loss": 0.0157,
"step": 15750
},
{
"epoch": 7.95,
"grad_norm": 1.594016671180725,
"learning_rate": 8.465527638190956e-06,
"loss": 0.0153,
"step": 15775
},
{
"epoch": 7.96,
"grad_norm": 2.078763246536255,
"learning_rate": 8.463015075376885e-06,
"loss": 0.0155,
"step": 15800
},
{
"epoch": 7.97,
"grad_norm": 1.8778270483016968,
"learning_rate": 8.460502512562815e-06,
"loss": 0.0161,
"step": 15825
},
{
"epoch": 7.98,
"grad_norm": 1.7115347385406494,
"learning_rate": 8.457989949748744e-06,
"loss": 0.0155,
"step": 15850
},
{
"epoch": 8.0,
"grad_norm": 1.6807602643966675,
"learning_rate": 8.455477386934673e-06,
"loss": 0.0144,
"step": 15875
},
{
"epoch": 8.01,
"grad_norm": 1.332097053527832,
"learning_rate": 8.452964824120604e-06,
"loss": 0.0097,
"step": 15900
},
{
"epoch": 8.02,
"grad_norm": 1.1560015678405762,
"learning_rate": 8.450452261306534e-06,
"loss": 0.0079,
"step": 15925
},
{
"epoch": 8.04,
"grad_norm": 1.238347053527832,
"learning_rate": 8.447939698492463e-06,
"loss": 0.0095,
"step": 15950
},
{
"epoch": 8.05,
"grad_norm": 1.5310404300689697,
"learning_rate": 8.445427135678392e-06,
"loss": 0.0092,
"step": 15975
},
{
"epoch": 8.06,
"grad_norm": 1.309462547302246,
"learning_rate": 8.442914572864322e-06,
"loss": 0.0086,
"step": 16000
},
{
"epoch": 8.06,
"eval_loss": 0.22892026603221893,
"eval_runtime": 642.6728,
"eval_samples_per_second": 2.192,
"eval_steps_per_second": 2.192,
"eval_wer": 24.524386025596677,
"step": 16000
},
{
"epoch": 8.07,
"grad_norm": 2.2282941341400146,
"learning_rate": 8.440402010050251e-06,
"loss": 0.0084,
"step": 16025
},
{
"epoch": 8.09,
"grad_norm": 1.7921063899993896,
"learning_rate": 8.437889447236182e-06,
"loss": 0.0096,
"step": 16050
},
{
"epoch": 8.1,
"grad_norm": 1.3115910291671753,
"learning_rate": 8.435376884422111e-06,
"loss": 0.0088,
"step": 16075
},
{
"epoch": 8.11,
"grad_norm": 1.0947102308273315,
"learning_rate": 8.43286432160804e-06,
"loss": 0.0087,
"step": 16100
},
{
"epoch": 8.12,
"grad_norm": 1.5556052923202515,
"learning_rate": 8.430351758793972e-06,
"loss": 0.0096,
"step": 16125
},
{
"epoch": 8.14,
"grad_norm": 1.2743710279464722,
"learning_rate": 8.4278391959799e-06,
"loss": 0.0085,
"step": 16150
},
{
"epoch": 8.15,
"grad_norm": 1.6002846956253052,
"learning_rate": 8.42532663316583e-06,
"loss": 0.0084,
"step": 16175
},
{
"epoch": 8.16,
"grad_norm": 1.713494896888733,
"learning_rate": 8.42281407035176e-06,
"loss": 0.008,
"step": 16200
},
{
"epoch": 8.17,
"grad_norm": 1.761599063873291,
"learning_rate": 8.420301507537689e-06,
"loss": 0.0095,
"step": 16225
},
{
"epoch": 8.19,
"grad_norm": 2.072874069213867,
"learning_rate": 8.417788944723618e-06,
"loss": 0.01,
"step": 16250
},
{
"epoch": 8.2,
"grad_norm": 1.140694499015808,
"learning_rate": 8.415276381909548e-06,
"loss": 0.0098,
"step": 16275
},
{
"epoch": 8.21,
"grad_norm": 1.6233490705490112,
"learning_rate": 8.412763819095479e-06,
"loss": 0.0099,
"step": 16300
},
{
"epoch": 8.22,
"grad_norm": 1.6382660865783691,
"learning_rate": 8.410251256281408e-06,
"loss": 0.0093,
"step": 16325
},
{
"epoch": 8.24,
"grad_norm": 1.4330692291259766,
"learning_rate": 8.407738693467337e-06,
"loss": 0.0093,
"step": 16350
},
{
"epoch": 8.25,
"grad_norm": 1.7515724897384644,
"learning_rate": 8.405226130653267e-06,
"loss": 0.0087,
"step": 16375
},
{
"epoch": 8.26,
"grad_norm": 2.1178219318389893,
"learning_rate": 8.402713567839198e-06,
"loss": 0.0097,
"step": 16400
},
{
"epoch": 8.27,
"grad_norm": 2.0358147621154785,
"learning_rate": 8.400201005025125e-06,
"loss": 0.0099,
"step": 16425
},
{
"epoch": 8.29,
"grad_norm": 1.4251277446746826,
"learning_rate": 8.397688442211056e-06,
"loss": 0.0097,
"step": 16450
},
{
"epoch": 8.3,
"grad_norm": 1.8948729038238525,
"learning_rate": 8.395175879396986e-06,
"loss": 0.01,
"step": 16475
},
{
"epoch": 8.31,
"grad_norm": 2.1116690635681152,
"learning_rate": 8.392663316582915e-06,
"loss": 0.0097,
"step": 16500
},
{
"epoch": 8.32,
"grad_norm": 2.2370381355285645,
"learning_rate": 8.390150753768846e-06,
"loss": 0.0096,
"step": 16525
},
{
"epoch": 8.34,
"grad_norm": 1.6887876987457275,
"learning_rate": 8.387638190954774e-06,
"loss": 0.0097,
"step": 16550
},
{
"epoch": 8.35,
"grad_norm": 1.1379011869430542,
"learning_rate": 8.385125628140705e-06,
"loss": 0.0099,
"step": 16575
},
{
"epoch": 8.36,
"grad_norm": 1.4453860521316528,
"learning_rate": 8.382613065326634e-06,
"loss": 0.0103,
"step": 16600
},
{
"epoch": 8.38,
"grad_norm": 1.388378620147705,
"learning_rate": 8.380100502512563e-06,
"loss": 0.0106,
"step": 16625
},
{
"epoch": 8.39,
"grad_norm": 2.015660285949707,
"learning_rate": 8.377587939698493e-06,
"loss": 0.0098,
"step": 16650
},
{
"epoch": 8.4,
"grad_norm": 1.5107704401016235,
"learning_rate": 8.375075376884424e-06,
"loss": 0.0101,
"step": 16675
},
{
"epoch": 8.41,
"grad_norm": 1.5902012586593628,
"learning_rate": 8.372562814070353e-06,
"loss": 0.01,
"step": 16700
},
{
"epoch": 8.43,
"grad_norm": 1.8684821128845215,
"learning_rate": 8.370050251256282e-06,
"loss": 0.0102,
"step": 16725
},
{
"epoch": 8.44,
"grad_norm": 1.8811595439910889,
"learning_rate": 8.367537688442212e-06,
"loss": 0.0097,
"step": 16750
},
{
"epoch": 8.45,
"grad_norm": 2.1212289333343506,
"learning_rate": 8.365025125628141e-06,
"loss": 0.0109,
"step": 16775
},
{
"epoch": 8.46,
"grad_norm": 1.8294142484664917,
"learning_rate": 8.362512562814072e-06,
"loss": 0.0105,
"step": 16800
},
{
"epoch": 8.48,
"grad_norm": 1.940006971359253,
"learning_rate": 8.36e-06,
"loss": 0.0097,
"step": 16825
},
{
"epoch": 8.49,
"grad_norm": 1.9175909757614136,
"learning_rate": 8.35748743718593e-06,
"loss": 0.0111,
"step": 16850
},
{
"epoch": 8.5,
"grad_norm": 1.57523775100708,
"learning_rate": 8.35497487437186e-06,
"loss": 0.0095,
"step": 16875
},
{
"epoch": 8.51,
"grad_norm": 1.825614094734192,
"learning_rate": 8.35246231155779e-06,
"loss": 0.0106,
"step": 16900
},
{
"epoch": 8.53,
"grad_norm": 2.1004061698913574,
"learning_rate": 8.34994974874372e-06,
"loss": 0.0114,
"step": 16925
},
{
"epoch": 8.54,
"grad_norm": 2.540674924850464,
"learning_rate": 8.34743718592965e-06,
"loss": 0.0099,
"step": 16950
},
{
"epoch": 8.55,
"grad_norm": 1.962494134902954,
"learning_rate": 8.344924623115579e-06,
"loss": 0.0102,
"step": 16975
},
{
"epoch": 8.56,
"grad_norm": 1.7412070035934448,
"learning_rate": 8.342412060301508e-06,
"loss": 0.0093,
"step": 17000
},
{
"epoch": 8.56,
"eval_loss": 0.238552525639534,
"eval_runtime": 647.4251,
"eval_samples_per_second": 2.176,
"eval_steps_per_second": 2.176,
"eval_wer": 24.828778969214802,
"step": 17000
},
{
"epoch": 8.58,
"grad_norm": 2.0897905826568604,
"learning_rate": 8.339899497487438e-06,
"loss": 0.0105,
"step": 17025
},
{
"epoch": 8.59,
"grad_norm": 2.1660778522491455,
"learning_rate": 8.337386934673367e-06,
"loss": 0.0103,
"step": 17050
},
{
"epoch": 8.6,
"grad_norm": 1.5732314586639404,
"learning_rate": 8.334874371859298e-06,
"loss": 0.0089,
"step": 17075
},
{
"epoch": 8.61,
"grad_norm": 1.8174026012420654,
"learning_rate": 8.332361809045226e-06,
"loss": 0.0115,
"step": 17100
},
{
"epoch": 8.63,
"grad_norm": 2.1241679191589355,
"learning_rate": 8.329849246231157e-06,
"loss": 0.0098,
"step": 17125
},
{
"epoch": 8.64,
"grad_norm": 2.3191757202148438,
"learning_rate": 8.327336683417086e-06,
"loss": 0.01,
"step": 17150
},
{
"epoch": 8.65,
"grad_norm": 1.6752322912216187,
"learning_rate": 8.324824120603015e-06,
"loss": 0.01,
"step": 17175
},
{
"epoch": 8.66,
"grad_norm": 2.2477939128875732,
"learning_rate": 8.322311557788946e-06,
"loss": 0.0093,
"step": 17200
},
{
"epoch": 8.68,
"grad_norm": 1.754935622215271,
"learning_rate": 8.319798994974876e-06,
"loss": 0.0104,
"step": 17225
},
{
"epoch": 8.69,
"grad_norm": 1.7442086935043335,
"learning_rate": 8.317286432160805e-06,
"loss": 0.0096,
"step": 17250
},
{
"epoch": 8.7,
"grad_norm": 1.5157816410064697,
"learning_rate": 8.314773869346734e-06,
"loss": 0.0099,
"step": 17275
},
{
"epoch": 8.72,
"grad_norm": 1.630161166191101,
"learning_rate": 8.312261306532663e-06,
"loss": 0.0095,
"step": 17300
},
{
"epoch": 8.73,
"grad_norm": 1.7793052196502686,
"learning_rate": 8.309748743718595e-06,
"loss": 0.0118,
"step": 17325
},
{
"epoch": 8.74,
"grad_norm": 2.110165596008301,
"learning_rate": 8.307236180904524e-06,
"loss": 0.0099,
"step": 17350
},
{
"epoch": 8.75,
"grad_norm": 2.5670361518859863,
"learning_rate": 8.304723618090453e-06,
"loss": 0.0111,
"step": 17375
},
{
"epoch": 8.77,
"grad_norm": 1.9518669843673706,
"learning_rate": 8.302211055276382e-06,
"loss": 0.0107,
"step": 17400
},
{
"epoch": 8.78,
"grad_norm": 1.6892119646072388,
"learning_rate": 8.299698492462312e-06,
"loss": 0.0107,
"step": 17425
},
{
"epoch": 8.79,
"grad_norm": 1.523242473602295,
"learning_rate": 8.297185929648241e-06,
"loss": 0.0108,
"step": 17450
},
{
"epoch": 8.8,
"grad_norm": 1.384037733078003,
"learning_rate": 8.294673366834172e-06,
"loss": 0.0109,
"step": 17475
},
{
"epoch": 8.82,
"grad_norm": 1.202498435974121,
"learning_rate": 8.292160804020101e-06,
"loss": 0.0107,
"step": 17500
},
{
"epoch": 8.83,
"grad_norm": 1.504539966583252,
"learning_rate": 8.28964824120603e-06,
"loss": 0.0105,
"step": 17525
},
{
"epoch": 8.84,
"grad_norm": 1.6238044500350952,
"learning_rate": 8.287135678391962e-06,
"loss": 0.0102,
"step": 17550
},
{
"epoch": 8.85,
"grad_norm": 2.15169358253479,
"learning_rate": 8.28462311557789e-06,
"loss": 0.0096,
"step": 17575
},
{
"epoch": 8.87,
"grad_norm": 1.6884123086929321,
"learning_rate": 8.28211055276382e-06,
"loss": 0.011,
"step": 17600
},
{
"epoch": 8.88,
"grad_norm": 1.7390291690826416,
"learning_rate": 8.27959798994975e-06,
"loss": 0.0097,
"step": 17625
},
{
"epoch": 8.89,
"grad_norm": 2.6695148944854736,
"learning_rate": 8.277085427135679e-06,
"loss": 0.0103,
"step": 17650
},
{
"epoch": 8.9,
"grad_norm": 1.2569199800491333,
"learning_rate": 8.274572864321608e-06,
"loss": 0.0101,
"step": 17675
},
{
"epoch": 8.92,
"grad_norm": 1.746862769126892,
"learning_rate": 8.272060301507538e-06,
"loss": 0.0114,
"step": 17700
},
{
"epoch": 8.93,
"grad_norm": 1.8386530876159668,
"learning_rate": 8.269547738693467e-06,
"loss": 0.0097,
"step": 17725
},
{
"epoch": 8.94,
"grad_norm": 1.7652249336242676,
"learning_rate": 8.267035175879398e-06,
"loss": 0.0094,
"step": 17750
},
{
"epoch": 8.95,
"grad_norm": 1.5770463943481445,
"learning_rate": 8.264522613065327e-06,
"loss": 0.0106,
"step": 17775
},
{
"epoch": 8.97,
"grad_norm": 2.018402338027954,
"learning_rate": 8.262010050251257e-06,
"loss": 0.0104,
"step": 17800
},
{
"epoch": 8.98,
"grad_norm": 1.1194132566452026,
"learning_rate": 8.259497487437188e-06,
"loss": 0.0119,
"step": 17825
},
{
"epoch": 8.99,
"grad_norm": 1.8884706497192383,
"learning_rate": 8.256984924623115e-06,
"loss": 0.0115,
"step": 17850
},
{
"epoch": 9.01,
"grad_norm": 1.9271539449691772,
"learning_rate": 8.254472361809046e-06,
"loss": 0.0096,
"step": 17875
},
{
"epoch": 9.02,
"grad_norm": 1.3741806745529175,
"learning_rate": 8.251959798994976e-06,
"loss": 0.0058,
"step": 17900
},
{
"epoch": 9.03,
"grad_norm": 1.7070800065994263,
"learning_rate": 8.249447236180905e-06,
"loss": 0.0056,
"step": 17925
},
{
"epoch": 9.04,
"grad_norm": 1.0155062675476074,
"learning_rate": 8.246934673366836e-06,
"loss": 0.0057,
"step": 17950
},
{
"epoch": 9.06,
"grad_norm": 1.206286907196045,
"learning_rate": 8.244422110552764e-06,
"loss": 0.0059,
"step": 17975
},
{
"epoch": 9.07,
"grad_norm": 1.463138461112976,
"learning_rate": 8.241909547738695e-06,
"loss": 0.0064,
"step": 18000
},
{
"epoch": 9.07,
"eval_loss": 0.2465205192565918,
"eval_runtime": 640.5205,
"eval_samples_per_second": 2.2,
"eval_steps_per_second": 2.2,
"eval_wer": 23.770321687997235,
"step": 18000
},
{
"epoch": 9.08,
"grad_norm": 1.703255295753479,
"learning_rate": 8.239396984924624e-06,
"loss": 0.0073,
"step": 18025
},
{
"epoch": 9.09,
"grad_norm": 2.1183061599731445,
"learning_rate": 8.236884422110553e-06,
"loss": 0.0068,
"step": 18050
},
{
"epoch": 9.11,
"grad_norm": 1.6237412691116333,
"learning_rate": 8.234371859296483e-06,
"loss": 0.0068,
"step": 18075
},
{
"epoch": 9.12,
"grad_norm": 1.5408103466033936,
"learning_rate": 8.231859296482414e-06,
"loss": 0.0066,
"step": 18100
},
{
"epoch": 9.13,
"grad_norm": 1.5907872915267944,
"learning_rate": 8.229346733668341e-06,
"loss": 0.0062,
"step": 18125
},
{
"epoch": 9.14,
"grad_norm": 1.9507079124450684,
"learning_rate": 8.226834170854272e-06,
"loss": 0.0059,
"step": 18150
},
{
"epoch": 9.16,
"grad_norm": 1.63676118850708,
"learning_rate": 8.224321608040202e-06,
"loss": 0.0061,
"step": 18175
},
{
"epoch": 9.17,
"grad_norm": 1.1874332427978516,
"learning_rate": 8.221809045226131e-06,
"loss": 0.0068,
"step": 18200
},
{
"epoch": 9.18,
"grad_norm": 1.4674066305160522,
"learning_rate": 8.219296482412062e-06,
"loss": 0.0069,
"step": 18225
},
{
"epoch": 9.19,
"grad_norm": 1.2431071996688843,
"learning_rate": 8.21678391959799e-06,
"loss": 0.0069,
"step": 18250
},
{
"epoch": 9.21,
"grad_norm": 1.8287932872772217,
"learning_rate": 8.21427135678392e-06,
"loss": 0.0072,
"step": 18275
},
{
"epoch": 9.22,
"grad_norm": 1.375955581665039,
"learning_rate": 8.21175879396985e-06,
"loss": 0.0064,
"step": 18300
},
{
"epoch": 9.23,
"grad_norm": 1.492646336555481,
"learning_rate": 8.20924623115578e-06,
"loss": 0.0065,
"step": 18325
},
{
"epoch": 9.24,
"grad_norm": 2.215989828109741,
"learning_rate": 8.206733668341709e-06,
"loss": 0.006,
"step": 18350
},
{
"epoch": 9.26,
"grad_norm": 1.6772810220718384,
"learning_rate": 8.20422110552764e-06,
"loss": 0.0069,
"step": 18375
},
{
"epoch": 9.27,
"grad_norm": 1.0609341859817505,
"learning_rate": 8.201708542713569e-06,
"loss": 0.0067,
"step": 18400
},
{
"epoch": 9.28,
"grad_norm": 1.8097535371780396,
"learning_rate": 8.199195979899498e-06,
"loss": 0.0072,
"step": 18425
},
{
"epoch": 9.29,
"grad_norm": 1.3503798246383667,
"learning_rate": 8.196683417085428e-06,
"loss": 0.0066,
"step": 18450
},
{
"epoch": 9.31,
"grad_norm": 1.5325782299041748,
"learning_rate": 8.194170854271357e-06,
"loss": 0.0069,
"step": 18475
},
{
"epoch": 9.32,
"grad_norm": 1.3074427843093872,
"learning_rate": 8.191658291457288e-06,
"loss": 0.0069,
"step": 18500
},
{
"epoch": 9.33,
"grad_norm": 1.6791765689849854,
"learning_rate": 8.189145728643216e-06,
"loss": 0.008,
"step": 18525
},
{
"epoch": 9.35,
"grad_norm": 1.9651422500610352,
"learning_rate": 8.186633165829147e-06,
"loss": 0.0073,
"step": 18550
},
{
"epoch": 9.36,
"grad_norm": 1.8639694452285767,
"learning_rate": 8.184120603015076e-06,
"loss": 0.0071,
"step": 18575
},
{
"epoch": 9.37,
"grad_norm": 1.2917574644088745,
"learning_rate": 8.181608040201005e-06,
"loss": 0.0065,
"step": 18600
},
{
"epoch": 9.38,
"grad_norm": 1.3170853853225708,
"learning_rate": 8.179095477386936e-06,
"loss": 0.0063,
"step": 18625
},
{
"epoch": 9.4,
"grad_norm": 2.0459671020507812,
"learning_rate": 8.176582914572866e-06,
"loss": 0.0064,
"step": 18650
},
{
"epoch": 9.41,
"grad_norm": 2.978271245956421,
"learning_rate": 8.174070351758795e-06,
"loss": 0.007,
"step": 18675
},
{
"epoch": 9.42,
"grad_norm": 1.3300830125808716,
"learning_rate": 8.171557788944724e-06,
"loss": 0.0067,
"step": 18700
},
{
"epoch": 9.43,
"grad_norm": 2.000188112258911,
"learning_rate": 8.169045226130654e-06,
"loss": 0.0072,
"step": 18725
},
{
"epoch": 9.45,
"grad_norm": 2.4457991123199463,
"learning_rate": 8.166532663316583e-06,
"loss": 0.0078,
"step": 18750
},
{
"epoch": 9.46,
"grad_norm": 0.9397627711296082,
"learning_rate": 8.164020100502514e-06,
"loss": 0.0071,
"step": 18775
},
{
"epoch": 9.47,
"grad_norm": 2.0449130535125732,
"learning_rate": 8.161507537688443e-06,
"loss": 0.0067,
"step": 18800
},
{
"epoch": 9.48,
"grad_norm": 1.4966685771942139,
"learning_rate": 8.158994974874373e-06,
"loss": 0.0074,
"step": 18825
},
{
"epoch": 9.5,
"grad_norm": 1.2508461475372314,
"learning_rate": 8.156482412060302e-06,
"loss": 0.0061,
"step": 18850
},
{
"epoch": 9.51,
"grad_norm": 1.4124109745025635,
"learning_rate": 8.154070351758795e-06,
"loss": 0.0071,
"step": 18875
},
{
"epoch": 9.52,
"grad_norm": 1.9999688863754272,
"learning_rate": 8.151557788944724e-06,
"loss": 0.0075,
"step": 18900
},
{
"epoch": 9.53,
"grad_norm": 2.0439536571502686,
"learning_rate": 8.149045226130654e-06,
"loss": 0.0067,
"step": 18925
},
{
"epoch": 9.55,
"grad_norm": 2.8810691833496094,
"learning_rate": 8.146532663316583e-06,
"loss": 0.0069,
"step": 18950
},
{
"epoch": 9.56,
"grad_norm": 1.9366052150726318,
"learning_rate": 8.144020100502512e-06,
"loss": 0.0076,
"step": 18975
},
{
"epoch": 9.57,
"grad_norm": 1.7127236127853394,
"learning_rate": 8.141507537688443e-06,
"loss": 0.0071,
"step": 19000
},
{
"epoch": 9.57,
"eval_loss": 0.2544167637825012,
"eval_runtime": 643.4016,
"eval_samples_per_second": 2.19,
"eval_steps_per_second": 2.19,
"eval_wer": 24.081632653061224,
"step": 19000
},
{
"epoch": 9.58,
"grad_norm": 2.526472806930542,
"learning_rate": 8.138994974874373e-06,
"loss": 0.0084,
"step": 19025
},
{
"epoch": 9.6,
"grad_norm": 1.3105931282043457,
"learning_rate": 8.136482412060302e-06,
"loss": 0.0071,
"step": 19050
},
{
"epoch": 9.61,
"grad_norm": 1.9968011379241943,
"learning_rate": 8.133969849246231e-06,
"loss": 0.0075,
"step": 19075
},
{
"epoch": 9.62,
"grad_norm": 1.2742115259170532,
"learning_rate": 8.131457286432162e-06,
"loss": 0.0074,
"step": 19100
},
{
"epoch": 9.63,
"grad_norm": 1.062193512916565,
"learning_rate": 8.12894472361809e-06,
"loss": 0.0078,
"step": 19125
},
{
"epoch": 9.65,
"grad_norm": 1.0094131231307983,
"learning_rate": 8.126432160804021e-06,
"loss": 0.0069,
"step": 19150
},
{
"epoch": 9.66,
"grad_norm": 1.675285816192627,
"learning_rate": 8.12391959798995e-06,
"loss": 0.0082,
"step": 19175
},
{
"epoch": 9.67,
"grad_norm": 1.96036696434021,
"learning_rate": 8.12140703517588e-06,
"loss": 0.0071,
"step": 19200
},
{
"epoch": 9.69,
"grad_norm": 1.8763169050216675,
"learning_rate": 8.11889447236181e-06,
"loss": 0.0075,
"step": 19225
},
{
"epoch": 9.7,
"grad_norm": 2.053239107131958,
"learning_rate": 8.11638190954774e-06,
"loss": 0.008,
"step": 19250
},
{
"epoch": 9.71,
"grad_norm": 1.5086833238601685,
"learning_rate": 8.11386934673367e-06,
"loss": 0.0072,
"step": 19275
},
{
"epoch": 9.72,
"grad_norm": 1.4810270071029663,
"learning_rate": 8.111356783919599e-06,
"loss": 0.0079,
"step": 19300
},
{
"epoch": 9.74,
"grad_norm": 1.87246835231781,
"learning_rate": 8.108844221105528e-06,
"loss": 0.0082,
"step": 19325
},
{
"epoch": 9.75,
"grad_norm": 2.0673673152923584,
"learning_rate": 8.106331658291457e-06,
"loss": 0.0075,
"step": 19350
},
{
"epoch": 9.76,
"grad_norm": 1.2569661140441895,
"learning_rate": 8.103819095477388e-06,
"loss": 0.0067,
"step": 19375
},
{
"epoch": 9.77,
"grad_norm": 1.6111642122268677,
"learning_rate": 8.101306532663318e-06,
"loss": 0.0075,
"step": 19400
},
{
"epoch": 9.79,
"grad_norm": 2.3493645191192627,
"learning_rate": 8.098793969849247e-06,
"loss": 0.0077,
"step": 19425
},
{
"epoch": 9.8,
"grad_norm": 1.9058961868286133,
"learning_rate": 8.096281407035176e-06,
"loss": 0.0074,
"step": 19450
},
{
"epoch": 9.81,
"grad_norm": 1.802046537399292,
"learning_rate": 8.093768844221106e-06,
"loss": 0.0073,
"step": 19475
},
{
"epoch": 9.82,
"grad_norm": 1.5245673656463623,
"learning_rate": 8.091256281407037e-06,
"loss": 0.0067,
"step": 19500
},
{
"epoch": 9.84,
"grad_norm": 1.501242756843567,
"learning_rate": 8.088743718592966e-06,
"loss": 0.0069,
"step": 19525
},
{
"epoch": 9.85,
"grad_norm": 1.430893898010254,
"learning_rate": 8.086231155778895e-06,
"loss": 0.0083,
"step": 19550
},
{
"epoch": 9.86,
"grad_norm": 1.6655915975570679,
"learning_rate": 8.083718592964825e-06,
"loss": 0.0067,
"step": 19575
},
{
"epoch": 9.87,
"grad_norm": 1.6842190027236938,
"learning_rate": 8.081206030150754e-06,
"loss": 0.0068,
"step": 19600
},
{
"epoch": 9.89,
"grad_norm": 2.076958179473877,
"learning_rate": 8.078693467336685e-06,
"loss": 0.0073,
"step": 19625
},
{
"epoch": 9.9,
"grad_norm": 2.023754119873047,
"learning_rate": 8.076180904522614e-06,
"loss": 0.0081,
"step": 19650
},
{
"epoch": 9.91,
"grad_norm": 1.3943392038345337,
"learning_rate": 8.073668341708544e-06,
"loss": 0.0078,
"step": 19675
},
{
"epoch": 9.92,
"grad_norm": 2.7814877033233643,
"learning_rate": 8.071155778894473e-06,
"loss": 0.0079,
"step": 19700
},
{
"epoch": 9.94,
"grad_norm": 1.6121410131454468,
"learning_rate": 8.068643216080402e-06,
"loss": 0.0077,
"step": 19725
},
{
"epoch": 9.95,
"grad_norm": 1.514812707901001,
"learning_rate": 8.066130653266332e-06,
"loss": 0.0068,
"step": 19750
},
{
"epoch": 9.96,
"grad_norm": 1.439288854598999,
"learning_rate": 8.063618090452263e-06,
"loss": 0.0076,
"step": 19775
},
{
"epoch": 9.97,
"grad_norm": 0.9317819476127625,
"learning_rate": 8.061105527638192e-06,
"loss": 0.0069,
"step": 19800
},
{
"epoch": 9.99,
"grad_norm": 1.976050615310669,
"learning_rate": 8.058592964824121e-06,
"loss": 0.0078,
"step": 19825
},
{
"epoch": 10.0,
"grad_norm": 1.0201984643936157,
"learning_rate": 8.05608040201005e-06,
"loss": 0.0072,
"step": 19850
},
{
"epoch": 10.01,
"grad_norm": 0.900181233882904,
"learning_rate": 8.05356783919598e-06,
"loss": 0.0047,
"step": 19875
},
{
"epoch": 10.03,
"grad_norm": 0.8394168019294739,
"learning_rate": 8.051055276381911e-06,
"loss": 0.0049,
"step": 19900
},
{
"epoch": 10.04,
"grad_norm": 0.7329122424125671,
"learning_rate": 8.04854271356784e-06,
"loss": 0.0042,
"step": 19925
},
{
"epoch": 10.05,
"grad_norm": 1.6395137310028076,
"learning_rate": 8.04603015075377e-06,
"loss": 0.0045,
"step": 19950
},
{
"epoch": 10.06,
"grad_norm": 1.4006826877593994,
"learning_rate": 8.043517587939699e-06,
"loss": 0.0045,
"step": 19975
},
{
"epoch": 10.08,
"grad_norm": 1.6509326696395874,
"learning_rate": 8.041005025125628e-06,
"loss": 0.0045,
"step": 20000
},
{
"epoch": 10.08,
"eval_loss": 0.25544053316116333,
"eval_runtime": 780.2624,
"eval_samples_per_second": 1.806,
"eval_steps_per_second": 1.806,
"eval_wer": 23.528190937391905,
"step": 20000
},
{
"epoch": 10.09,
"grad_norm": 1.6562331914901733,
"learning_rate": 8.03849246231156e-06,
"loss": 0.0046,
"step": 20025
},
{
"epoch": 10.1,
"grad_norm": 1.7131171226501465,
"learning_rate": 8.035979899497489e-06,
"loss": 0.0051,
"step": 20050
},
{
"epoch": 10.11,
"grad_norm": 1.3539303541183472,
"learning_rate": 8.033467336683418e-06,
"loss": 0.0041,
"step": 20075
},
{
"epoch": 10.13,
"grad_norm": 1.5204474925994873,
"learning_rate": 8.030954773869347e-06,
"loss": 0.005,
"step": 20100
},
{
"epoch": 10.14,
"grad_norm": 1.5538609027862549,
"learning_rate": 8.028442211055277e-06,
"loss": 0.005,
"step": 20125
},
{
"epoch": 10.15,
"grad_norm": 1.281728744506836,
"learning_rate": 8.025929648241206e-06,
"loss": 0.0043,
"step": 20150
},
{
"epoch": 10.16,
"grad_norm": 2.3059980869293213,
"learning_rate": 8.023417085427137e-06,
"loss": 0.0046,
"step": 20175
},
{
"epoch": 10.18,
"grad_norm": 0.8961646556854248,
"learning_rate": 8.020904522613066e-06,
"loss": 0.0048,
"step": 20200
},
{
"epoch": 10.19,
"grad_norm": 1.1319265365600586,
"learning_rate": 8.018391959798996e-06,
"loss": 0.0043,
"step": 20225
},
{
"epoch": 10.2,
"grad_norm": 1.8678749799728394,
"learning_rate": 8.015879396984927e-06,
"loss": 0.0052,
"step": 20250
},
{
"epoch": 10.21,
"grad_norm": 1.574100375175476,
"learning_rate": 8.013366834170854e-06,
"loss": 0.0048,
"step": 20275
},
{
"epoch": 10.23,
"grad_norm": 1.676328182220459,
"learning_rate": 8.010854271356785e-06,
"loss": 0.0049,
"step": 20300
},
{
"epoch": 10.24,
"grad_norm": 1.3840720653533936,
"learning_rate": 8.008341708542714e-06,
"loss": 0.005,
"step": 20325
},
{
"epoch": 10.25,
"grad_norm": 1.3118586540222168,
"learning_rate": 8.005829145728644e-06,
"loss": 0.0054,
"step": 20350
},
{
"epoch": 10.26,
"grad_norm": 1.8612523078918457,
"learning_rate": 8.003316582914573e-06,
"loss": 0.0049,
"step": 20375
},
{
"epoch": 10.28,
"grad_norm": 2.0608303546905518,
"learning_rate": 8.000804020100502e-06,
"loss": 0.0053,
"step": 20400
},
{
"epoch": 10.29,
"grad_norm": 1.2124433517456055,
"learning_rate": 7.998291457286432e-06,
"loss": 0.0053,
"step": 20425
},
{
"epoch": 10.3,
"grad_norm": 1.3948249816894531,
"learning_rate": 7.995778894472363e-06,
"loss": 0.0047,
"step": 20450
},
{
"epoch": 10.31,
"grad_norm": 1.4954756498336792,
"learning_rate": 7.993266331658292e-06,
"loss": 0.0051,
"step": 20475
},
{
"epoch": 10.33,
"grad_norm": 1.2343194484710693,
"learning_rate": 7.990753768844221e-06,
"loss": 0.0056,
"step": 20500
},
{
"epoch": 10.34,
"grad_norm": 2.3053460121154785,
"learning_rate": 7.988241206030152e-06,
"loss": 0.0048,
"step": 20525
},
{
"epoch": 10.35,
"grad_norm": 1.0803661346435547,
"learning_rate": 7.98572864321608e-06,
"loss": 0.0051,
"step": 20550
},
{
"epoch": 10.37,
"grad_norm": 1.7742104530334473,
"learning_rate": 7.983216080402011e-06,
"loss": 0.0055,
"step": 20575
},
{
"epoch": 10.38,
"grad_norm": 1.462449073791504,
"learning_rate": 7.98070351758794e-06,
"loss": 0.0049,
"step": 20600
},
{
"epoch": 10.39,
"grad_norm": 1.1726417541503906,
"learning_rate": 7.97819095477387e-06,
"loss": 0.0054,
"step": 20625
},
{
"epoch": 10.4,
"grad_norm": 1.3419547080993652,
"learning_rate": 7.975678391959799e-06,
"loss": 0.0054,
"step": 20650
},
{
"epoch": 10.42,
"grad_norm": 1.3675485849380493,
"learning_rate": 7.973165829145728e-06,
"loss": 0.0052,
"step": 20675
},
{
"epoch": 10.43,
"grad_norm": 2.0673506259918213,
"learning_rate": 7.97065326633166e-06,
"loss": 0.0057,
"step": 20700
},
{
"epoch": 10.44,
"grad_norm": 1.158771276473999,
"learning_rate": 7.968140703517589e-06,
"loss": 0.0052,
"step": 20725
},
{
"epoch": 10.45,
"grad_norm": 0.8775469660758972,
"learning_rate": 7.965628140703518e-06,
"loss": 0.0051,
"step": 20750
},
{
"epoch": 10.47,
"grad_norm": 2.4425106048583984,
"learning_rate": 7.963115577889447e-06,
"loss": 0.0056,
"step": 20775
},
{
"epoch": 10.48,
"grad_norm": 1.8277249336242676,
"learning_rate": 7.960603015075378e-06,
"loss": 0.0055,
"step": 20800
},
{
"epoch": 10.49,
"grad_norm": 1.406618595123291,
"learning_rate": 7.958090452261306e-06,
"loss": 0.0057,
"step": 20825
},
{
"epoch": 10.5,
"grad_norm": 1.2207611799240112,
"learning_rate": 7.955577889447237e-06,
"loss": 0.0055,
"step": 20850
},
{
"epoch": 10.52,
"grad_norm": 1.2478914260864258,
"learning_rate": 7.953065326633166e-06,
"loss": 0.0051,
"step": 20875
},
{
"epoch": 10.53,
"grad_norm": 1.7296686172485352,
"learning_rate": 7.950552763819096e-06,
"loss": 0.0055,
"step": 20900
},
{
"epoch": 10.54,
"grad_norm": 1.705731987953186,
"learning_rate": 7.948040201005027e-06,
"loss": 0.005,
"step": 20925
},
{
"epoch": 10.55,
"grad_norm": 0.8736703991889954,
"learning_rate": 7.945527638190954e-06,
"loss": 0.0057,
"step": 20950
},
{
"epoch": 10.57,
"grad_norm": 1.4896725416183472,
"learning_rate": 7.943015075376885e-06,
"loss": 0.0052,
"step": 20975
},
{
"epoch": 10.58,
"grad_norm": 1.935691237449646,
"learning_rate": 7.940502512562815e-06,
"loss": 0.0055,
"step": 21000
},
{
"epoch": 10.58,
"eval_loss": 0.2658803462982178,
"eval_runtime": 641.6657,
"eval_samples_per_second": 2.196,
"eval_steps_per_second": 2.196,
"eval_wer": 24.116222760290558,
"step": 21000
},
{
"epoch": 10.59,
"grad_norm": 1.7740029096603394,
"learning_rate": 7.937989949748744e-06,
"loss": 0.0049,
"step": 21025
},
{
"epoch": 10.6,
"grad_norm": 1.8998734951019287,
"learning_rate": 7.935477386934673e-06,
"loss": 0.0065,
"step": 21050
},
{
"epoch": 10.62,
"grad_norm": 1.3813358545303345,
"learning_rate": 7.932964824120604e-06,
"loss": 0.006,
"step": 21075
},
{
"epoch": 10.63,
"grad_norm": 1.8003945350646973,
"learning_rate": 7.930452261306534e-06,
"loss": 0.0055,
"step": 21100
},
{
"epoch": 10.64,
"grad_norm": 1.7029547691345215,
"learning_rate": 7.927939698492463e-06,
"loss": 0.0044,
"step": 21125
},
{
"epoch": 10.65,
"grad_norm": 2.208817720413208,
"learning_rate": 7.925427135678392e-06,
"loss": 0.0049,
"step": 21150
},
{
"epoch": 10.67,
"grad_norm": 1.768268346786499,
"learning_rate": 7.922914572864322e-06,
"loss": 0.0057,
"step": 21175
},
{
"epoch": 10.68,
"grad_norm": 1.4503201246261597,
"learning_rate": 7.920402010050253e-06,
"loss": 0.0054,
"step": 21200
},
{
"epoch": 10.69,
"grad_norm": 1.497975468635559,
"learning_rate": 7.91788944723618e-06,
"loss": 0.0058,
"step": 21225
},
{
"epoch": 10.71,
"grad_norm": 1.7116060256958008,
"learning_rate": 7.915477386934674e-06,
"loss": 0.0051,
"step": 21250
},
{
"epoch": 10.72,
"grad_norm": 2.685547113418579,
"learning_rate": 7.912964824120603e-06,
"loss": 0.0062,
"step": 21275
},
{
"epoch": 10.73,
"grad_norm": 1.4006340503692627,
"learning_rate": 7.910452261306534e-06,
"loss": 0.0052,
"step": 21300
},
{
"epoch": 10.74,
"grad_norm": 1.3967355489730835,
"learning_rate": 7.907939698492463e-06,
"loss": 0.0053,
"step": 21325
},
{
"epoch": 10.76,
"grad_norm": 2.3471438884735107,
"learning_rate": 7.905427135678393e-06,
"loss": 0.0064,
"step": 21350
},
{
"epoch": 10.77,
"grad_norm": 1.9880576133728027,
"learning_rate": 7.902914572864322e-06,
"loss": 0.0049,
"step": 21375
},
{
"epoch": 10.78,
"grad_norm": 1.6665552854537964,
"learning_rate": 7.900402010050253e-06,
"loss": 0.0055,
"step": 21400
},
{
"epoch": 10.79,
"grad_norm": 1.3990721702575684,
"learning_rate": 7.89788944723618e-06,
"loss": 0.0061,
"step": 21425
},
{
"epoch": 10.81,
"grad_norm": 1.7920498847961426,
"learning_rate": 7.895376884422111e-06,
"loss": 0.0053,
"step": 21450
},
{
"epoch": 10.82,
"grad_norm": 2.0328104496002197,
"learning_rate": 7.89286432160804e-06,
"loss": 0.0055,
"step": 21475
},
{
"epoch": 10.83,
"grad_norm": 1.8484914302825928,
"learning_rate": 7.89035175879397e-06,
"loss": 0.0057,
"step": 21500
},
{
"epoch": 10.84,
"grad_norm": 1.1959257125854492,
"learning_rate": 7.887839195979901e-06,
"loss": 0.0053,
"step": 21525
},
{
"epoch": 10.86,
"grad_norm": 1.821405291557312,
"learning_rate": 7.885326633165829e-06,
"loss": 0.005,
"step": 21550
},
{
"epoch": 10.87,
"grad_norm": 1.9156001806259155,
"learning_rate": 7.88281407035176e-06,
"loss": 0.0053,
"step": 21575
},
{
"epoch": 10.88,
"grad_norm": 2.608036518096924,
"learning_rate": 7.880301507537689e-06,
"loss": 0.0058,
"step": 21600
},
{
"epoch": 10.89,
"grad_norm": 1.9512995481491089,
"learning_rate": 7.877788944723618e-06,
"loss": 0.0055,
"step": 21625
},
{
"epoch": 10.91,
"grad_norm": 2.440284252166748,
"learning_rate": 7.875276381909548e-06,
"loss": 0.0056,
"step": 21650
},
{
"epoch": 10.92,
"grad_norm": 1.5289582014083862,
"learning_rate": 7.872763819095479e-06,
"loss": 0.0063,
"step": 21675
},
{
"epoch": 10.93,
"grad_norm": 1.6737768650054932,
"learning_rate": 7.870251256281408e-06,
"loss": 0.0051,
"step": 21700
},
{
"epoch": 10.94,
"grad_norm": 1.3897589445114136,
"learning_rate": 7.867738693467337e-06,
"loss": 0.0057,
"step": 21725
},
{
"epoch": 10.96,
"grad_norm": 2.573732852935791,
"learning_rate": 7.865226130653267e-06,
"loss": 0.0063,
"step": 21750
},
{
"epoch": 10.97,
"grad_norm": 1.5241488218307495,
"learning_rate": 7.862713567839196e-06,
"loss": 0.006,
"step": 21775
},
{
"epoch": 10.98,
"grad_norm": 1.3380444049835205,
"learning_rate": 7.860201005025127e-06,
"loss": 0.0049,
"step": 21800
},
{
"epoch": 10.99,
"grad_norm": 1.0626407861709595,
"learning_rate": 7.857688442211055e-06,
"loss": 0.0052,
"step": 21825
},
{
"epoch": 11.01,
"grad_norm": 1.4017066955566406,
"learning_rate": 7.855175879396986e-06,
"loss": 0.0037,
"step": 21850
},
{
"epoch": 11.02,
"grad_norm": 0.9792363047599792,
"learning_rate": 7.852663316582915e-06,
"loss": 0.0035,
"step": 21875
},
{
"epoch": 11.03,
"grad_norm": 0.9123956561088562,
"learning_rate": 7.850150753768844e-06,
"loss": 0.0037,
"step": 21900
},
{
"epoch": 11.05,
"grad_norm": 1.5364477634429932,
"learning_rate": 7.847638190954775e-06,
"loss": 0.0041,
"step": 21925
},
{
"epoch": 11.06,
"grad_norm": 0.9540588855743408,
"learning_rate": 7.845125628140705e-06,
"loss": 0.0042,
"step": 21950
},
{
"epoch": 11.07,
"grad_norm": 1.256990909576416,
"learning_rate": 7.842613065326634e-06,
"loss": 0.0037,
"step": 21975
},
{
"epoch": 11.08,
"grad_norm": 1.1927049160003662,
"learning_rate": 7.840100502512563e-06,
"loss": 0.0034,
"step": 22000
},
{
"epoch": 11.08,
"eval_loss": 0.27246958017349243,
"eval_runtime": 638.6614,
"eval_samples_per_second": 2.206,
"eval_steps_per_second": 2.206,
"eval_wer": 24.24766516776202,
"step": 22000
},
{
"epoch": 11.1,
"grad_norm": 0.5147941708564758,
"learning_rate": 7.837587939698493e-06,
"loss": 0.0033,
"step": 22025
},
{
"epoch": 11.11,
"grad_norm": 0.9543977379798889,
"learning_rate": 7.835075376884422e-06,
"loss": 0.0038,
"step": 22050
},
{
"epoch": 11.12,
"grad_norm": 2.2430100440979004,
"learning_rate": 7.832562814070353e-06,
"loss": 0.0041,
"step": 22075
},
{
"epoch": 11.13,
"grad_norm": 2.2202541828155518,
"learning_rate": 7.830050251256282e-06,
"loss": 0.0041,
"step": 22100
},
{
"epoch": 11.15,
"grad_norm": 1.715667486190796,
"learning_rate": 7.827537688442212e-06,
"loss": 0.0039,
"step": 22125
},
{
"epoch": 11.16,
"grad_norm": 1.9119547605514526,
"learning_rate": 7.825025125628141e-06,
"loss": 0.0041,
"step": 22150
},
{
"epoch": 11.17,
"grad_norm": 1.6011847257614136,
"learning_rate": 7.82251256281407e-06,
"loss": 0.0042,
"step": 22175
},
{
"epoch": 11.18,
"grad_norm": 1.1330533027648926,
"learning_rate": 7.820000000000001e-06,
"loss": 0.0037,
"step": 22200
},
{
"epoch": 11.2,
"grad_norm": 1.442896842956543,
"learning_rate": 7.81748743718593e-06,
"loss": 0.004,
"step": 22225
},
{
"epoch": 11.21,
"grad_norm": 1.1626893281936646,
"learning_rate": 7.81497487437186e-06,
"loss": 0.0041,
"step": 22250
},
{
"epoch": 11.22,
"grad_norm": 1.245456576347351,
"learning_rate": 7.81246231155779e-06,
"loss": 0.0039,
"step": 22275
},
{
"epoch": 11.23,
"grad_norm": 1.619498372077942,
"learning_rate": 7.809949748743719e-06,
"loss": 0.0037,
"step": 22300
},
{
"epoch": 11.25,
"grad_norm": 1.3442145586013794,
"learning_rate": 7.80743718592965e-06,
"loss": 0.004,
"step": 22325
},
{
"epoch": 11.26,
"grad_norm": 1.6045056581497192,
"learning_rate": 7.804924623115579e-06,
"loss": 0.0039,
"step": 22350
},
{
"epoch": 11.27,
"grad_norm": 1.1371634006500244,
"learning_rate": 7.802412060301508e-06,
"loss": 0.0041,
"step": 22375
},
{
"epoch": 11.28,
"grad_norm": 2.101292848587036,
"learning_rate": 7.799899497487438e-06,
"loss": 0.0039,
"step": 22400
},
{
"epoch": 11.3,
"grad_norm": 1.0439993143081665,
"learning_rate": 7.797386934673367e-06,
"loss": 0.0041,
"step": 22425
},
{
"epoch": 11.31,
"grad_norm": 1.2077866792678833,
"learning_rate": 7.794874371859296e-06,
"loss": 0.0042,
"step": 22450
},
{
"epoch": 11.32,
"grad_norm": 1.229236364364624,
"learning_rate": 7.792361809045227e-06,
"loss": 0.0041,
"step": 22475
},
{
"epoch": 11.34,
"grad_norm": 1.1711903810501099,
"learning_rate": 7.789849246231157e-06,
"loss": 0.0046,
"step": 22500
},
{
"epoch": 11.35,
"grad_norm": 1.5802088975906372,
"learning_rate": 7.787336683417086e-06,
"loss": 0.0044,
"step": 22525
},
{
"epoch": 11.36,
"grad_norm": 1.4556586742401123,
"learning_rate": 7.784824120603017e-06,
"loss": 0.0042,
"step": 22550
},
{
"epoch": 11.37,
"grad_norm": 2.2199254035949707,
"learning_rate": 7.782311557788945e-06,
"loss": 0.004,
"step": 22575
},
{
"epoch": 11.39,
"grad_norm": 1.7703883647918701,
"learning_rate": 7.779798994974876e-06,
"loss": 0.0039,
"step": 22600
},
{
"epoch": 11.4,
"grad_norm": 3.0892791748046875,
"learning_rate": 7.777286432160805e-06,
"loss": 0.004,
"step": 22625
},
{
"epoch": 11.41,
"grad_norm": 2.2085580825805664,
"learning_rate": 7.774773869346734e-06,
"loss": 0.0039,
"step": 22650
},
{
"epoch": 11.42,
"grad_norm": 1.9577640295028687,
"learning_rate": 7.772261306532664e-06,
"loss": 0.0043,
"step": 22675
},
{
"epoch": 11.44,
"grad_norm": 2.051767349243164,
"learning_rate": 7.769748743718593e-06,
"loss": 0.0044,
"step": 22700
},
{
"epoch": 11.45,
"grad_norm": 0.8601694703102112,
"learning_rate": 7.767236180904522e-06,
"loss": 0.0041,
"step": 22725
},
{
"epoch": 11.46,
"grad_norm": 1.3375483751296997,
"learning_rate": 7.764723618090453e-06,
"loss": 0.0044,
"step": 22750
},
{
"epoch": 11.47,
"grad_norm": 1.3985751867294312,
"learning_rate": 7.762211055276383e-06,
"loss": 0.0042,
"step": 22775
},
{
"epoch": 11.49,
"grad_norm": 0.8720707297325134,
"learning_rate": 7.759698492462312e-06,
"loss": 0.0047,
"step": 22800
},
{
"epoch": 11.5,
"grad_norm": 0.9330138564109802,
"learning_rate": 7.757185929648243e-06,
"loss": 0.0039,
"step": 22825
},
{
"epoch": 11.51,
"grad_norm": 1.2324626445770264,
"learning_rate": 7.75467336683417e-06,
"loss": 0.0041,
"step": 22850
},
{
"epoch": 11.52,
"grad_norm": 1.7098289728164673,
"learning_rate": 7.752160804020102e-06,
"loss": 0.0043,
"step": 22875
},
{
"epoch": 11.54,
"grad_norm": 1.247007131576538,
"learning_rate": 7.749648241206031e-06,
"loss": 0.0041,
"step": 22900
},
{
"epoch": 11.55,
"grad_norm": 1.9044662714004517,
"learning_rate": 7.74713567839196e-06,
"loss": 0.0046,
"step": 22925
},
{
"epoch": 11.56,
"grad_norm": 2.0409295558929443,
"learning_rate": 7.744623115577891e-06,
"loss": 0.0043,
"step": 22950
},
{
"epoch": 11.57,
"grad_norm": 2.2986319065093994,
"learning_rate": 7.742110552763819e-06,
"loss": 0.0045,
"step": 22975
},
{
"epoch": 11.59,
"grad_norm": 2.2678630352020264,
"learning_rate": 7.73959798994975e-06,
"loss": 0.0042,
"step": 23000
},
{
"epoch": 11.59,
"eval_loss": 0.28508278727531433,
"eval_runtime": 644.32,
"eval_samples_per_second": 2.187,
"eval_steps_per_second": 2.187,
"eval_wer": 24.05396056727776,
"step": 23000
},
{
"epoch": 11.6,
"grad_norm": 1.907650113105774,
"learning_rate": 7.73708542713568e-06,
"loss": 0.0041,
"step": 23025
},
{
"epoch": 11.61,
"grad_norm": 0.9967373013496399,
"learning_rate": 7.734572864321609e-06,
"loss": 0.0044,
"step": 23050
},
{
"epoch": 11.62,
"grad_norm": 1.4243005514144897,
"learning_rate": 7.732060301507538e-06,
"loss": 0.0041,
"step": 23075
},
{
"epoch": 11.64,
"grad_norm": 1.480535864830017,
"learning_rate": 7.729547738693469e-06,
"loss": 0.0041,
"step": 23100
},
{
"epoch": 11.65,
"grad_norm": 1.5228785276412964,
"learning_rate": 7.727035175879396e-06,
"loss": 0.0048,
"step": 23125
},
{
"epoch": 11.66,
"grad_norm": 1.2098692655563354,
"learning_rate": 7.724522613065328e-06,
"loss": 0.0041,
"step": 23150
},
{
"epoch": 11.68,
"grad_norm": 1.1572978496551514,
"learning_rate": 7.722010050251257e-06,
"loss": 0.0044,
"step": 23175
},
{
"epoch": 11.69,
"grad_norm": 1.1368253231048584,
"learning_rate": 7.719497487437186e-06,
"loss": 0.0038,
"step": 23200
},
{
"epoch": 11.7,
"grad_norm": 1.6968629360198975,
"learning_rate": 7.716984924623117e-06,
"loss": 0.004,
"step": 23225
},
{
"epoch": 11.71,
"grad_norm": 1.8138012886047363,
"learning_rate": 7.714472361809045e-06,
"loss": 0.0041,
"step": 23250
},
{
"epoch": 11.73,
"grad_norm": 1.3682103157043457,
"learning_rate": 7.711959798994976e-06,
"loss": 0.0048,
"step": 23275
},
{
"epoch": 11.74,
"grad_norm": 2.3074638843536377,
"learning_rate": 7.709447236180905e-06,
"loss": 0.0043,
"step": 23300
},
{
"epoch": 11.75,
"grad_norm": 1.544771432876587,
"learning_rate": 7.706934673366834e-06,
"loss": 0.0048,
"step": 23325
},
{
"epoch": 11.76,
"grad_norm": 1.589612603187561,
"learning_rate": 7.704422110552764e-06,
"loss": 0.0043,
"step": 23350
},
{
"epoch": 11.78,
"grad_norm": 1.983675241470337,
"learning_rate": 7.701909547738695e-06,
"loss": 0.0042,
"step": 23375
},
{
"epoch": 11.79,
"grad_norm": 1.3556252717971802,
"learning_rate": 7.699396984924624e-06,
"loss": 0.0043,
"step": 23400
},
{
"epoch": 11.8,
"grad_norm": 1.4705439805984497,
"learning_rate": 7.696884422110553e-06,
"loss": 0.0048,
"step": 23425
},
{
"epoch": 11.81,
"grad_norm": 1.4444918632507324,
"learning_rate": 7.694371859296483e-06,
"loss": 0.0047,
"step": 23450
},
{
"epoch": 11.83,
"grad_norm": 1.6264967918395996,
"learning_rate": 7.691859296482412e-06,
"loss": 0.0046,
"step": 23475
},
{
"epoch": 11.84,
"grad_norm": 1.275970697402954,
"learning_rate": 7.689346733668343e-06,
"loss": 0.0044,
"step": 23500
},
{
"epoch": 11.85,
"grad_norm": 1.5057625770568848,
"learning_rate": 7.68683417085427e-06,
"loss": 0.0042,
"step": 23525
},
{
"epoch": 11.86,
"grad_norm": 1.7427964210510254,
"learning_rate": 7.684321608040202e-06,
"loss": 0.0047,
"step": 23550
},
{
"epoch": 11.88,
"grad_norm": NaN,
"learning_rate": 7.681909547738693e-06,
"loss": 0.0047,
"step": 23575
},
{
"epoch": 11.89,
"grad_norm": 2.0153872966766357,
"learning_rate": 7.679396984924624e-06,
"loss": 0.0045,
"step": 23600
},
{
"epoch": 11.9,
"grad_norm": 1.4703736305236816,
"learning_rate": 7.676884422110554e-06,
"loss": 0.005,
"step": 23625
},
{
"epoch": 11.91,
"grad_norm": 2.2874040603637695,
"learning_rate": 7.674371859296483e-06,
"loss": 0.0048,
"step": 23650
},
{
"epoch": 11.93,
"grad_norm": 2.741128921508789,
"learning_rate": 7.671859296482412e-06,
"loss": 0.0043,
"step": 23675
},
{
"epoch": 11.94,
"grad_norm": 1.3430378437042236,
"learning_rate": 7.669346733668343e-06,
"loss": 0.0042,
"step": 23700
},
{
"epoch": 11.95,
"grad_norm": 1.0754338502883911,
"learning_rate": 7.666834170854271e-06,
"loss": 0.0049,
"step": 23725
},
{
"epoch": 11.96,
"grad_norm": 1.6235313415527344,
"learning_rate": 7.664321608040202e-06,
"loss": 0.0046,
"step": 23750
},
{
"epoch": 11.98,
"grad_norm": 1.4375765323638916,
"learning_rate": 7.661809045226131e-06,
"loss": 0.0044,
"step": 23775
},
{
"epoch": 11.99,
"grad_norm": 1.9673402309417725,
"learning_rate": 7.65929648241206e-06,
"loss": 0.0047,
"step": 23800
},
{
"epoch": 12.0,
"grad_norm": 2.2287938594818115,
"learning_rate": 7.656783919597992e-06,
"loss": 0.004,
"step": 23825
},
{
"epoch": 12.02,
"grad_norm": 1.1054763793945312,
"learning_rate": 7.65427135678392e-06,
"loss": 0.0037,
"step": 23850
},
{
"epoch": 12.03,
"grad_norm": 0.7484707236289978,
"learning_rate": 7.65175879396985e-06,
"loss": 0.0033,
"step": 23875
},
{
"epoch": 12.04,
"grad_norm": 1.4415768384933472,
"learning_rate": 7.64924623115578e-06,
"loss": 0.0028,
"step": 23900
},
{
"epoch": 12.05,
"grad_norm": 0.6510108709335327,
"learning_rate": 7.646733668341709e-06,
"loss": 0.0029,
"step": 23925
},
{
"epoch": 12.07,
"grad_norm": 0.967505156993866,
"learning_rate": 7.644221105527638e-06,
"loss": 0.0028,
"step": 23950
},
{
"epoch": 12.08,
"grad_norm": 0.8767016530036926,
"learning_rate": 7.64170854271357e-06,
"loss": 0.0027,
"step": 23975
},
{
"epoch": 12.09,
"grad_norm": 1.4679666757583618,
"learning_rate": 7.639195979899499e-06,
"loss": 0.0031,
"step": 24000
},
{
"epoch": 12.09,
"eval_loss": 0.28861406445503235,
"eval_runtime": 647.0257,
"eval_samples_per_second": 2.178,
"eval_steps_per_second": 2.178,
"eval_wer": 23.825665859564165,
"step": 24000
},
{
"epoch": 12.1,
"grad_norm": 3.264605760574341,
"learning_rate": 7.636683417085428e-06,
"loss": 0.0035,
"step": 24025
},
{
"epoch": 12.12,
"grad_norm": 1.3971178531646729,
"learning_rate": 7.634170854271357e-06,
"loss": 0.0033,
"step": 24050
},
{
"epoch": 12.13,
"grad_norm": 1.240206003189087,
"learning_rate": 7.631658291457287e-06,
"loss": 0.003,
"step": 24075
},
{
"epoch": 12.14,
"grad_norm": 1.3925122022628784,
"learning_rate": 7.629145728643217e-06,
"loss": 0.003,
"step": 24100
},
{
"epoch": 12.15,
"grad_norm": 0.912455677986145,
"learning_rate": 7.626633165829146e-06,
"loss": 0.0029,
"step": 24125
},
{
"epoch": 12.17,
"grad_norm": 1.508727788925171,
"learning_rate": 7.624120603015076e-06,
"loss": 0.0029,
"step": 24150
},
{
"epoch": 12.18,
"grad_norm": 1.45113205909729,
"learning_rate": 7.621608040201006e-06,
"loss": 0.0035,
"step": 24175
},
{
"epoch": 12.19,
"grad_norm": 1.3699947595596313,
"learning_rate": 7.619095477386935e-06,
"loss": 0.0029,
"step": 24200
},
{
"epoch": 12.2,
"grad_norm": 1.319270133972168,
"learning_rate": 7.616582914572865e-06,
"loss": 0.0034,
"step": 24225
},
{
"epoch": 12.22,
"grad_norm": 1.116363286972046,
"learning_rate": 7.614070351758794e-06,
"loss": 0.0034,
"step": 24250
},
{
"epoch": 12.23,
"grad_norm": 1.1260015964508057,
"learning_rate": 7.6115577889447245e-06,
"loss": 0.0034,
"step": 24275
},
{
"epoch": 12.24,
"grad_norm": 1.9116740226745605,
"learning_rate": 7.609045226130654e-06,
"loss": 0.004,
"step": 24300
},
{
"epoch": 12.25,
"grad_norm": 2.0611562728881836,
"learning_rate": 7.606532663316584e-06,
"loss": 0.003,
"step": 24325
},
{
"epoch": 12.27,
"grad_norm": 1.186975359916687,
"learning_rate": 7.6040201005025125e-06,
"loss": 0.0033,
"step": 24350
},
{
"epoch": 12.28,
"grad_norm": 1.219438076019287,
"learning_rate": 7.601507537688443e-06,
"loss": 0.0034,
"step": 24375
},
{
"epoch": 12.29,
"grad_norm": 1.2265121936798096,
"learning_rate": 7.598994974874373e-06,
"loss": 0.0032,
"step": 24400
},
{
"epoch": 12.3,
"grad_norm": 1.6346076726913452,
"learning_rate": 7.596482412060302e-06,
"loss": 0.0034,
"step": 24425
},
{
"epoch": 12.32,
"grad_norm": 0.8696354031562805,
"learning_rate": 7.593969849246232e-06,
"loss": 0.0031,
"step": 24450
},
{
"epoch": 12.33,
"grad_norm": 1.4290732145309448,
"learning_rate": 7.591457286432161e-06,
"loss": 0.0037,
"step": 24475
},
{
"epoch": 12.34,
"grad_norm": 2.6827902793884277,
"learning_rate": 7.588944723618091e-06,
"loss": 0.0038,
"step": 24500
},
{
"epoch": 12.36,
"grad_norm": 1.816667079925537,
"learning_rate": 7.58643216080402e-06,
"loss": 0.0036,
"step": 24525
},
{
"epoch": 12.37,
"grad_norm": 0.5981873869895935,
"learning_rate": 7.5839195979899505e-06,
"loss": 0.0027,
"step": 24550
},
{
"epoch": 12.38,
"grad_norm": 2.2105541229248047,
"learning_rate": 7.58140703517588e-06,
"loss": 0.0035,
"step": 24575
},
{
"epoch": 12.39,
"grad_norm": 1.0556033849716187,
"learning_rate": 7.57889447236181e-06,
"loss": 0.0032,
"step": 24600
},
{
"epoch": 12.41,
"grad_norm": 1.0780327320098877,
"learning_rate": 7.57638190954774e-06,
"loss": 0.0035,
"step": 24625
},
{
"epoch": 12.42,
"grad_norm": 1.9094908237457275,
"learning_rate": 7.573869346733669e-06,
"loss": 0.0036,
"step": 24650
},
{
"epoch": 12.43,
"grad_norm": 1.2956584692001343,
"learning_rate": 7.571356783919599e-06,
"loss": 0.0044,
"step": 24675
},
{
"epoch": 12.44,
"grad_norm": 1.5782350301742554,
"learning_rate": 7.568844221105528e-06,
"loss": 0.0031,
"step": 24700
},
{
"epoch": 12.46,
"grad_norm": 1.1655892133712769,
"learning_rate": 7.566331658291458e-06,
"loss": 0.0033,
"step": 24725
},
{
"epoch": 12.47,
"grad_norm": 1.4031734466552734,
"learning_rate": 7.563819095477387e-06,
"loss": 0.0034,
"step": 24750
},
{
"epoch": 12.48,
"grad_norm": 1.4916791915893555,
"learning_rate": 7.561306532663317e-06,
"loss": 0.0035,
"step": 24775
},
{
"epoch": 12.49,
"grad_norm": 0.40038520097732544,
"learning_rate": 7.558793969849247e-06,
"loss": 0.0035,
"step": 24800
},
{
"epoch": 12.51,
"grad_norm": 1.3992714881896973,
"learning_rate": 7.556281407035176e-06,
"loss": 0.0033,
"step": 24825
},
{
"epoch": 12.52,
"grad_norm": 0.48557183146476746,
"learning_rate": 7.5537688442211066e-06,
"loss": 0.004,
"step": 24850
},
{
"epoch": 12.53,
"grad_norm": 1.111877679824829,
"learning_rate": 7.551256281407036e-06,
"loss": 0.0033,
"step": 24875
},
{
"epoch": 12.54,
"grad_norm": 1.1091711521148682,
"learning_rate": 7.548743718592966e-06,
"loss": 0.0034,
"step": 24900
},
{
"epoch": 12.56,
"grad_norm": 1.899695634841919,
"learning_rate": 7.5462311557788945e-06,
"loss": 0.0039,
"step": 24925
},
{
"epoch": 12.57,
"grad_norm": 1.4109854698181152,
"learning_rate": 7.543718592964825e-06,
"loss": 0.0033,
"step": 24950
},
{
"epoch": 12.58,
"grad_norm": 1.7029036283493042,
"learning_rate": 7.541206030150754e-06,
"loss": 0.0032,
"step": 24975
},
{
"epoch": 12.59,
"grad_norm": 1.6713786125183105,
"learning_rate": 7.538693467336684e-06,
"loss": 0.0035,
"step": 25000
},
{
"epoch": 12.59,
"eval_loss": 0.29140138626098633,
"eval_runtime": 645.0337,
"eval_samples_per_second": 2.184,
"eval_steps_per_second": 2.184,
"eval_wer": 24.399861639571082,
"step": 25000
},
{
"epoch": 12.61,
"grad_norm": 2.3478965759277344,
"learning_rate": 7.536180904522614e-06,
"loss": 0.0034,
"step": 25025
},
{
"epoch": 12.62,
"grad_norm": 0.9575018286705017,
"learning_rate": 7.533668341708543e-06,
"loss": 0.0032,
"step": 25050
},
{
"epoch": 12.63,
"grad_norm": 0.94124835729599,
"learning_rate": 7.531155778894473e-06,
"loss": 0.0034,
"step": 25075
},
{
"epoch": 12.64,
"grad_norm": 1.8110922574996948,
"learning_rate": 7.528643216080402e-06,
"loss": 0.0034,
"step": 25100
},
{
"epoch": 12.66,
"grad_norm": 1.0863940715789795,
"learning_rate": 7.5261306532663325e-06,
"loss": 0.0034,
"step": 25125
},
{
"epoch": 12.67,
"grad_norm": 1.8619202375411987,
"learning_rate": 7.523618090452262e-06,
"loss": 0.0037,
"step": 25150
},
{
"epoch": 12.68,
"grad_norm": 1.9385497570037842,
"learning_rate": 7.521105527638192e-06,
"loss": 0.0034,
"step": 25175
},
{
"epoch": 12.7,
"grad_norm": 1.8523273468017578,
"learning_rate": 7.5185929648241205e-06,
"loss": 0.0035,
"step": 25200
},
{
"epoch": 12.71,
"grad_norm": 1.9459110498428345,
"learning_rate": 7.516080402010051e-06,
"loss": 0.0034,
"step": 25225
},
{
"epoch": 12.72,
"grad_norm": 1.6162514686584473,
"learning_rate": 7.513567839195981e-06,
"loss": 0.0041,
"step": 25250
},
{
"epoch": 12.73,
"grad_norm": 1.7794193029403687,
"learning_rate": 7.51105527638191e-06,
"loss": 0.0037,
"step": 25275
},
{
"epoch": 12.75,
"grad_norm": 1.8779551982879639,
"learning_rate": 7.50854271356784e-06,
"loss": 0.0038,
"step": 25300
},
{
"epoch": 12.76,
"grad_norm": 1.2835701704025269,
"learning_rate": 7.506030150753769e-06,
"loss": 0.0035,
"step": 25325
},
{
"epoch": 12.77,
"grad_norm": 1.1003650426864624,
"learning_rate": 7.503517587939699e-06,
"loss": 0.0037,
"step": 25350
},
{
"epoch": 12.78,
"grad_norm": 1.345335602760315,
"learning_rate": 7.501005025125628e-06,
"loss": 0.0036,
"step": 25375
},
{
"epoch": 12.8,
"grad_norm": 0.9257469177246094,
"learning_rate": 7.4984924623115585e-06,
"loss": 0.0032,
"step": 25400
},
{
"epoch": 12.81,
"grad_norm": 1.3247835636138916,
"learning_rate": 7.495979899497488e-06,
"loss": 0.0042,
"step": 25425
},
{
"epoch": 12.82,
"grad_norm": 1.658199429512024,
"learning_rate": 7.493467336683418e-06,
"loss": 0.0038,
"step": 25450
},
{
"epoch": 12.83,
"grad_norm": 1.2152585983276367,
"learning_rate": 7.490954773869348e-06,
"loss": 0.0035,
"step": 25475
},
{
"epoch": 12.85,
"grad_norm": 1.6380772590637207,
"learning_rate": 7.488442211055277e-06,
"loss": 0.0041,
"step": 25500
},
{
"epoch": 12.86,
"grad_norm": 1.4688934087753296,
"learning_rate": 7.485929648241207e-06,
"loss": 0.0035,
"step": 25525
},
{
"epoch": 12.87,
"grad_norm": 2.3192384243011475,
"learning_rate": 7.483417085427136e-06,
"loss": 0.004,
"step": 25550
},
{
"epoch": 12.88,
"grad_norm": 1.0542503595352173,
"learning_rate": 7.480904522613066e-06,
"loss": 0.0038,
"step": 25575
},
{
"epoch": 12.9,
"grad_norm": 1.6845927238464355,
"learning_rate": 7.478391959798995e-06,
"loss": 0.0034,
"step": 25600
},
{
"epoch": 12.91,
"grad_norm": 1.3695615530014038,
"learning_rate": 7.475879396984925e-06,
"loss": 0.0034,
"step": 25625
},
{
"epoch": 12.92,
"grad_norm": 2.099348306655884,
"learning_rate": 7.473366834170855e-06,
"loss": 0.0041,
"step": 25650
},
{
"epoch": 12.93,
"grad_norm": 1.4898931980133057,
"learning_rate": 7.470854271356784e-06,
"loss": 0.0037,
"step": 25675
},
{
"epoch": 12.95,
"grad_norm": 1.583161473274231,
"learning_rate": 7.4683417085427146e-06,
"loss": 0.0037,
"step": 25700
},
{
"epoch": 12.96,
"grad_norm": 0.7393112778663635,
"learning_rate": 7.465829145728644e-06,
"loss": 0.0042,
"step": 25725
},
{
"epoch": 12.97,
"grad_norm": 1.3594324588775635,
"learning_rate": 7.463316582914574e-06,
"loss": 0.0034,
"step": 25750
},
{
"epoch": 12.98,
"grad_norm": 1.8096977472305298,
"learning_rate": 7.4608040201005025e-06,
"loss": 0.0041,
"step": 25775
},
{
"epoch": 13.0,
"grad_norm": 1.482646107673645,
"learning_rate": 7.458291457286433e-06,
"loss": 0.0036,
"step": 25800
},
{
"epoch": 13.01,
"grad_norm": 0.5999165177345276,
"learning_rate": 7.455778894472362e-06,
"loss": 0.0026,
"step": 25825
},
{
"epoch": 13.02,
"grad_norm": 0.5373625159263611,
"learning_rate": 7.453266331658292e-06,
"loss": 0.0026,
"step": 25850
},
{
"epoch": 13.04,
"grad_norm": 1.5365161895751953,
"learning_rate": 7.450753768844222e-06,
"loss": 0.0025,
"step": 25875
},
{
"epoch": 13.05,
"grad_norm": 0.9741165637969971,
"learning_rate": 7.448241206030151e-06,
"loss": 0.0025,
"step": 25900
},
{
"epoch": 13.06,
"grad_norm": 1.167653203010559,
"learning_rate": 7.445728643216081e-06,
"loss": 0.0027,
"step": 25925
},
{
"epoch": 13.07,
"grad_norm": 0.48862722516059875,
"learning_rate": 7.44321608040201e-06,
"loss": 0.0024,
"step": 25950
},
{
"epoch": 13.09,
"grad_norm": 0.701203465461731,
"learning_rate": 7.4407035175879405e-06,
"loss": 0.0024,
"step": 25975
},
{
"epoch": 13.1,
"grad_norm": 0.8814527988433838,
"learning_rate": 7.43819095477387e-06,
"loss": 0.0022,
"step": 26000
},
{
"epoch": 13.1,
"eval_loss": 0.29991698265075684,
"eval_runtime": 640.3865,
"eval_samples_per_second": 2.2,
"eval_steps_per_second": 2.2,
"eval_wer": 23.86717398823936,
"step": 26000
},
{
"epoch": 13.11,
"grad_norm": 1.1581878662109375,
"learning_rate": 7.4356783919598e-06,
"loss": 0.0021,
"step": 26025
},
{
"epoch": 13.12,
"grad_norm": 0.3536165654659271,
"learning_rate": 7.4331658291457285e-06,
"loss": 0.0025,
"step": 26050
},
{
"epoch": 13.14,
"grad_norm": 2.3617360591888428,
"learning_rate": 7.430653266331659e-06,
"loss": 0.0028,
"step": 26075
},
{
"epoch": 13.15,
"grad_norm": 1.100469708442688,
"learning_rate": 7.428140703517589e-06,
"loss": 0.0029,
"step": 26100
},
{
"epoch": 13.16,
"grad_norm": 1.3083537817001343,
"learning_rate": 7.425628140703518e-06,
"loss": 0.0033,
"step": 26125
},
{
"epoch": 13.17,
"grad_norm": 1.1170566082000732,
"learning_rate": 7.423115577889448e-06,
"loss": 0.0028,
"step": 26150
},
{
"epoch": 13.19,
"grad_norm": 1.4462560415267944,
"learning_rate": 7.420603015075377e-06,
"loss": 0.0027,
"step": 26175
},
{
"epoch": 13.2,
"grad_norm": 0.9630836844444275,
"learning_rate": 7.418090452261307e-06,
"loss": 0.0033,
"step": 26200
},
{
"epoch": 13.21,
"grad_norm": 0.7968631386756897,
"learning_rate": 7.415577889447236e-06,
"loss": 0.0025,
"step": 26225
},
{
"epoch": 13.22,
"grad_norm": 2.100764751434326,
"learning_rate": 7.4130653266331665e-06,
"loss": 0.0026,
"step": 26250
},
{
"epoch": 13.24,
"grad_norm": 1.6591538190841675,
"learning_rate": 7.410552763819097e-06,
"loss": 0.0031,
"step": 26275
},
{
"epoch": 13.25,
"grad_norm": 0.645767867565155,
"learning_rate": 7.408040201005026e-06,
"loss": 0.003,
"step": 26300
},
{
"epoch": 13.26,
"grad_norm": 1.4978662729263306,
"learning_rate": 7.405527638190956e-06,
"loss": 0.0028,
"step": 26325
},
{
"epoch": 13.27,
"grad_norm": 1.357334852218628,
"learning_rate": 7.403015075376885e-06,
"loss": 0.0026,
"step": 26350
},
{
"epoch": 13.29,
"grad_norm": 1.751214861869812,
"learning_rate": 7.400502512562815e-06,
"loss": 0.0027,
"step": 26375
},
{
"epoch": 13.3,
"grad_norm": 0.9337909817695618,
"learning_rate": 7.397989949748744e-06,
"loss": 0.0026,
"step": 26400
},
{
"epoch": 13.31,
"grad_norm": 1.3542364835739136,
"learning_rate": 7.395477386934674e-06,
"loss": 0.0031,
"step": 26425
},
{
"epoch": 13.32,
"grad_norm": 0.9499005675315857,
"learning_rate": 7.392964824120603e-06,
"loss": 0.0031,
"step": 26450
},
{
"epoch": 13.34,
"grad_norm": 1.5429147481918335,
"learning_rate": 7.390452261306533e-06,
"loss": 0.0029,
"step": 26475
},
{
"epoch": 13.35,
"grad_norm": 0.8379466533660889,
"learning_rate": 7.387939698492463e-06,
"loss": 0.0024,
"step": 26500
},
{
"epoch": 13.36,
"grad_norm": 1.3563404083251953,
"learning_rate": 7.385427135678392e-06,
"loss": 0.0029,
"step": 26525
},
{
"epoch": 13.38,
"grad_norm": 2.4341177940368652,
"learning_rate": 7.382914572864323e-06,
"loss": 0.0028,
"step": 26550
},
{
"epoch": 13.39,
"grad_norm": 0.8350504636764526,
"learning_rate": 7.380402010050252e-06,
"loss": 0.0027,
"step": 26575
},
{
"epoch": 13.4,
"grad_norm": 1.5221184492111206,
"learning_rate": 7.377889447236182e-06,
"loss": 0.0027,
"step": 26600
},
{
"epoch": 13.41,
"grad_norm": 2.1058216094970703,
"learning_rate": 7.3753768844221105e-06,
"loss": 0.0027,
"step": 26625
},
{
"epoch": 13.43,
"grad_norm": 1.1398578882217407,
"learning_rate": 7.372864321608041e-06,
"loss": 0.0029,
"step": 26650
},
{
"epoch": 13.44,
"grad_norm": 2.2516255378723145,
"learning_rate": 7.37035175879397e-06,
"loss": 0.0029,
"step": 26675
},
{
"epoch": 13.45,
"grad_norm": 1.5439566373825073,
"learning_rate": 7.3678391959799e-06,
"loss": 0.0028,
"step": 26700
},
{
"epoch": 13.46,
"grad_norm": 0.6818922758102417,
"learning_rate": 7.36532663316583e-06,
"loss": 0.0033,
"step": 26725
},
{
"epoch": 13.48,
"grad_norm": 1.0167973041534424,
"learning_rate": 7.362814070351759e-06,
"loss": 0.0027,
"step": 26750
},
{
"epoch": 13.49,
"grad_norm": 0.8457038998603821,
"learning_rate": 7.360301507537689e-06,
"loss": 0.0027,
"step": 26775
},
{
"epoch": 13.5,
"grad_norm": 1.8538858890533447,
"learning_rate": 7.357788944723618e-06,
"loss": 0.0029,
"step": 26800
},
{
"epoch": 13.51,
"grad_norm": 1.6280864477157593,
"learning_rate": 7.3552763819095485e-06,
"loss": 0.0029,
"step": 26825
},
{
"epoch": 13.53,
"grad_norm": 1.5593993663787842,
"learning_rate": 7.352763819095478e-06,
"loss": 0.0031,
"step": 26850
},
{
"epoch": 13.54,
"grad_norm": 2.2892441749572754,
"learning_rate": 7.350251256281408e-06,
"loss": 0.0032,
"step": 26875
},
{
"epoch": 13.55,
"grad_norm": 1.0337854623794556,
"learning_rate": 7.347738693467338e-06,
"loss": 0.0032,
"step": 26900
},
{
"epoch": 13.56,
"grad_norm": 1.764012336730957,
"learning_rate": 7.345226130653267e-06,
"loss": 0.003,
"step": 26925
},
{
"epoch": 13.58,
"grad_norm": 1.3078733682632446,
"learning_rate": 7.342713567839197e-06,
"loss": 0.003,
"step": 26950
},
{
"epoch": 13.59,
"grad_norm": 1.0039664506912231,
"learning_rate": 7.340201005025126e-06,
"loss": 0.0033,
"step": 26975
},
{
"epoch": 13.6,
"grad_norm": 1.68669593334198,
"learning_rate": 7.337688442211056e-06,
"loss": 0.0032,
"step": 27000
},
{
"epoch": 13.6,
"eval_loss": 0.3001398742198944,
"eval_runtime": 654.1931,
"eval_samples_per_second": 2.154,
"eval_steps_per_second": 2.154,
"eval_wer": 24.538222068488412,
"step": 27000
},
{
"epoch": 13.61,
"grad_norm": 1.84382963180542,
"learning_rate": 7.335175879396985e-06,
"loss": 0.0029,
"step": 27025
},
{
"epoch": 13.63,
"grad_norm": 0.4542626440525055,
"learning_rate": 7.332663316582915e-06,
"loss": 0.0025,
"step": 27050
},
{
"epoch": 13.64,
"grad_norm": 1.7250819206237793,
"learning_rate": 7.330150753768844e-06,
"loss": 0.0029,
"step": 27075
},
{
"epoch": 13.65,
"grad_norm": 2.1424949169158936,
"learning_rate": 7.3276381909547745e-06,
"loss": 0.003,
"step": 27100
},
{
"epoch": 13.66,
"grad_norm": 1.7538329362869263,
"learning_rate": 7.325125628140705e-06,
"loss": 0.0031,
"step": 27125
},
{
"epoch": 13.68,
"grad_norm": 1.9615614414215088,
"learning_rate": 7.322613065326634e-06,
"loss": 0.0034,
"step": 27150
},
{
"epoch": 13.69,
"grad_norm": 0.4876037836074829,
"learning_rate": 7.320100502512564e-06,
"loss": 0.0026,
"step": 27175
},
{
"epoch": 13.7,
"grad_norm": 2.511152744293213,
"learning_rate": 7.317587939698493e-06,
"loss": 0.0036,
"step": 27200
},
{
"epoch": 13.72,
"grad_norm": 1.6201894283294678,
"learning_rate": 7.315075376884423e-06,
"loss": 0.003,
"step": 27225
},
{
"epoch": 13.73,
"grad_norm": 2.252923011779785,
"learning_rate": 7.312562814070352e-06,
"loss": 0.0036,
"step": 27250
},
{
"epoch": 13.74,
"grad_norm": 1.1069494485855103,
"learning_rate": 7.310050251256282e-06,
"loss": 0.0026,
"step": 27275
},
{
"epoch": 13.75,
"grad_norm": 0.6992159485816956,
"learning_rate": 7.307537688442211e-06,
"loss": 0.0031,
"step": 27300
},
{
"epoch": 13.77,
"grad_norm": 1.252886414527893,
"learning_rate": 7.305025125628141e-06,
"loss": 0.0027,
"step": 27325
},
{
"epoch": 13.78,
"grad_norm": 1.4874393939971924,
"learning_rate": 7.302512562814071e-06,
"loss": 0.0026,
"step": 27350
},
{
"epoch": 13.79,
"grad_norm": 0.7340197563171387,
"learning_rate": 7.3e-06,
"loss": 0.0026,
"step": 27375
},
{
"epoch": 13.8,
"grad_norm": 1.225475788116455,
"learning_rate": 7.297587939698493e-06,
"loss": 0.0032,
"step": 27400
},
{
"epoch": 13.82,
"grad_norm": 1.1409143209457397,
"learning_rate": 7.295075376884423e-06,
"loss": 0.0032,
"step": 27425
},
{
"epoch": 13.83,
"grad_norm": 1.395493507385254,
"learning_rate": 7.292562814070352e-06,
"loss": 0.003,
"step": 27450
},
{
"epoch": 13.84,
"grad_norm": 1.5672920942306519,
"learning_rate": 7.2900502512562825e-06,
"loss": 0.0034,
"step": 27475
},
{
"epoch": 13.85,
"grad_norm": 1.7837570905685425,
"learning_rate": 7.287537688442211e-06,
"loss": 0.0034,
"step": 27500
},
{
"epoch": 13.87,
"grad_norm": 2.0357208251953125,
"learning_rate": 7.285025125628141e-06,
"loss": 0.0035,
"step": 27525
},
{
"epoch": 13.88,
"grad_norm": 1.0619276762008667,
"learning_rate": 7.282512562814071e-06,
"loss": 0.0028,
"step": 27550
},
{
"epoch": 13.89,
"grad_norm": 1.7723731994628906,
"learning_rate": 7.280000000000001e-06,
"loss": 0.0035,
"step": 27575
},
{
"epoch": 13.9,
"grad_norm": 2.082310438156128,
"learning_rate": 7.277487437185931e-06,
"loss": 0.0036,
"step": 27600
},
{
"epoch": 13.92,
"grad_norm": 1.348310112953186,
"learning_rate": 7.274974874371859e-06,
"loss": 0.0032,
"step": 27625
},
{
"epoch": 13.93,
"grad_norm": 1.0664507150650024,
"learning_rate": 7.272462311557789e-06,
"loss": 0.003,
"step": 27650
},
{
"epoch": 13.94,
"grad_norm": 1.124415397644043,
"learning_rate": 7.269949748743719e-06,
"loss": 0.0032,
"step": 27675
},
{
"epoch": 13.95,
"grad_norm": 1.4353559017181396,
"learning_rate": 7.267437185929649e-06,
"loss": 0.0033,
"step": 27700
},
{
"epoch": 13.97,
"grad_norm": 1.658118724822998,
"learning_rate": 7.264924623115579e-06,
"loss": 0.0038,
"step": 27725
},
{
"epoch": 13.98,
"grad_norm": 1.0329655408859253,
"learning_rate": 7.262412060301508e-06,
"loss": 0.0032,
"step": 27750
},
{
"epoch": 13.99,
"grad_norm": 1.345921516418457,
"learning_rate": 7.259899497487439e-06,
"loss": 0.0025,
"step": 27775
},
{
"epoch": 14.01,
"grad_norm": 1.0595952272415161,
"learning_rate": 7.257386934673367e-06,
"loss": 0.0027,
"step": 27800
},
{
"epoch": 14.02,
"grad_norm": 1.766713261604309,
"learning_rate": 7.254874371859297e-06,
"loss": 0.0025,
"step": 27825
},
{
"epoch": 14.03,
"grad_norm": 1.3049800395965576,
"learning_rate": 7.2523618090452265e-06,
"loss": 0.0018,
"step": 27850
},
{
"epoch": 14.04,
"grad_norm": 2.0778799057006836,
"learning_rate": 7.249849246231157e-06,
"loss": 0.0025,
"step": 27875
},
{
"epoch": 14.06,
"grad_norm": 0.90594881772995,
"learning_rate": 7.247336683417085e-06,
"loss": 0.0022,
"step": 27900
},
{
"epoch": 14.07,
"grad_norm": 0.35941508412361145,
"learning_rate": 7.244824120603015e-06,
"loss": 0.0022,
"step": 27925
},
{
"epoch": 14.08,
"grad_norm": 0.7269408106803894,
"learning_rate": 7.2423115577889455e-06,
"loss": 0.0021,
"step": 27950
},
{
"epoch": 14.09,
"grad_norm": 1.4327499866485596,
"learning_rate": 7.239798994974875e-06,
"loss": 0.0025,
"step": 27975
},
{
"epoch": 14.11,
"grad_norm": 0.6035653948783875,
"learning_rate": 7.237286432160805e-06,
"loss": 0.0024,
"step": 28000
},
{
"epoch": 14.11,
"eval_loss": 0.3001614212989807,
"eval_runtime": 784.2944,
"eval_samples_per_second": 1.797,
"eval_steps_per_second": 1.797,
"eval_wer": 23.20304392943618,
"step": 28000
},
{
"epoch": 14.12,
"grad_norm": 1.209418535232544,
"learning_rate": 7.234773869346734e-06,
"loss": 0.0022,
"step": 28025
},
{
"epoch": 14.13,
"grad_norm": 1.2968195676803589,
"learning_rate": 7.2322613065326645e-06,
"loss": 0.0024,
"step": 28050
},
{
"epoch": 14.14,
"grad_norm": 1.5654141902923584,
"learning_rate": 7.229748743718593e-06,
"loss": 0.0027,
"step": 28075
},
{
"epoch": 14.16,
"grad_norm": 1.4532732963562012,
"learning_rate": 7.227236180904523e-06,
"loss": 0.0024,
"step": 28100
},
{
"epoch": 14.17,
"grad_norm": 1.7169549465179443,
"learning_rate": 7.2247236180904525e-06,
"loss": 0.0022,
"step": 28125
},
{
"epoch": 14.18,
"grad_norm": 1.32295560836792,
"learning_rate": 7.222211055276383e-06,
"loss": 0.0022,
"step": 28150
},
{
"epoch": 14.19,
"grad_norm": 0.8905125856399536,
"learning_rate": 7.219698492462313e-06,
"loss": 0.0022,
"step": 28175
},
{
"epoch": 14.21,
"grad_norm": 0.8778842687606812,
"learning_rate": 7.217185929648241e-06,
"loss": 0.0021,
"step": 28200
},
{
"epoch": 14.22,
"grad_norm": 1.4678562879562378,
"learning_rate": 7.2146733668341715e-06,
"loss": 0.0025,
"step": 28225
},
{
"epoch": 14.23,
"grad_norm": 0.7229999899864197,
"learning_rate": 7.212160804020101e-06,
"loss": 0.0022,
"step": 28250
},
{
"epoch": 14.24,
"grad_norm": 2.1713428497314453,
"learning_rate": 7.209648241206031e-06,
"loss": 0.0023,
"step": 28275
},
{
"epoch": 14.26,
"grad_norm": 2.5556602478027344,
"learning_rate": 7.20713567839196e-06,
"loss": 0.0022,
"step": 28300
},
{
"epoch": 14.27,
"grad_norm": 0.7470478415489197,
"learning_rate": 7.2046231155778905e-06,
"loss": 0.0022,
"step": 28325
},
{
"epoch": 14.28,
"grad_norm": 1.2497297525405884,
"learning_rate": 7.20211055276382e-06,
"loss": 0.0022,
"step": 28350
},
{
"epoch": 14.29,
"grad_norm": 0.5248861908912659,
"learning_rate": 7.199597989949749e-06,
"loss": 0.0024,
"step": 28375
},
{
"epoch": 14.31,
"grad_norm": 1.0900764465332031,
"learning_rate": 7.197085427135679e-06,
"loss": 0.0025,
"step": 28400
},
{
"epoch": 14.32,
"grad_norm": 1.5685707330703735,
"learning_rate": 7.194572864321609e-06,
"loss": 0.0028,
"step": 28425
},
{
"epoch": 14.33,
"grad_norm": 1.298081874847412,
"learning_rate": 7.192060301507539e-06,
"loss": 0.0026,
"step": 28450
},
{
"epoch": 14.35,
"grad_norm": 1.188835620880127,
"learning_rate": 7.189547738693467e-06,
"loss": 0.0024,
"step": 28475
},
{
"epoch": 14.36,
"grad_norm": 2.094358205795288,
"learning_rate": 7.187035175879397e-06,
"loss": 0.0023,
"step": 28500
},
{
"epoch": 14.37,
"grad_norm": 1.2566583156585693,
"learning_rate": 7.184522613065327e-06,
"loss": 0.0028,
"step": 28525
},
{
"epoch": 14.38,
"grad_norm": 1.1933472156524658,
"learning_rate": 7.182010050251257e-06,
"loss": 0.0025,
"step": 28550
},
{
"epoch": 14.4,
"grad_norm": 1.451371669769287,
"learning_rate": 7.179497487437187e-06,
"loss": 0.0028,
"step": 28575
},
{
"epoch": 14.41,
"grad_norm": 1.804445743560791,
"learning_rate": 7.176984924623116e-06,
"loss": 0.0026,
"step": 28600
},
{
"epoch": 14.42,
"grad_norm": 0.8600190877914429,
"learning_rate": 7.174472361809047e-06,
"loss": 0.0025,
"step": 28625
},
{
"epoch": 14.43,
"grad_norm": 0.6841452121734619,
"learning_rate": 7.171959798994975e-06,
"loss": 0.0027,
"step": 28650
},
{
"epoch": 14.45,
"grad_norm": 0.7692683339118958,
"learning_rate": 7.169447236180905e-06,
"loss": 0.0023,
"step": 28675
},
{
"epoch": 14.46,
"grad_norm": 1.5418920516967773,
"learning_rate": 7.1669346733668345e-06,
"loss": 0.0026,
"step": 28700
},
{
"epoch": 14.47,
"grad_norm": 1.3701914548873901,
"learning_rate": 7.164422110552765e-06,
"loss": 0.0023,
"step": 28725
},
{
"epoch": 14.48,
"grad_norm": 1.2819687128067017,
"learning_rate": 7.161909547738693e-06,
"loss": 0.0031,
"step": 28750
},
{
"epoch": 14.5,
"grad_norm": 0.9504879117012024,
"learning_rate": 7.159396984924623e-06,
"loss": 0.0023,
"step": 28775
},
{
"epoch": 14.51,
"grad_norm": 1.3846092224121094,
"learning_rate": 7.1568844221105535e-06,
"loss": 0.0028,
"step": 28800
},
{
"epoch": 14.52,
"grad_norm": 0.8752845525741577,
"learning_rate": 7.154371859296483e-06,
"loss": 0.0027,
"step": 28825
},
{
"epoch": 14.53,
"grad_norm": 2.0625505447387695,
"learning_rate": 7.151859296482413e-06,
"loss": 0.0028,
"step": 28850
},
{
"epoch": 14.55,
"grad_norm": 1.281076192855835,
"learning_rate": 7.149346733668342e-06,
"loss": 0.0026,
"step": 28875
},
{
"epoch": 14.56,
"grad_norm": 1.1781672239303589,
"learning_rate": 7.1468341708542725e-06,
"loss": 0.0031,
"step": 28900
},
{
"epoch": 14.57,
"grad_norm": 1.7508938312530518,
"learning_rate": 7.144321608040201e-06,
"loss": 0.0027,
"step": 28925
},
{
"epoch": 14.58,
"grad_norm": 1.2148828506469727,
"learning_rate": 7.141809045226131e-06,
"loss": 0.003,
"step": 28950
},
{
"epoch": 14.6,
"grad_norm": 1.6405340433120728,
"learning_rate": 7.139296482412061e-06,
"loss": 0.0025,
"step": 28975
},
{
"epoch": 14.61,
"grad_norm": 0.7206986546516418,
"learning_rate": 7.136783919597991e-06,
"loss": 0.0025,
"step": 29000
},
{
"epoch": 14.61,
"eval_loss": 0.3025396168231964,
"eval_runtime": 646.2169,
"eval_samples_per_second": 2.18,
"eval_steps_per_second": 2.18,
"eval_wer": 23.79107575233483,
"step": 29000
},
{
"epoch": 14.62,
"grad_norm": 1.8807600736618042,
"learning_rate": 7.134271356783921e-06,
"loss": 0.0028,
"step": 29025
},
{
"epoch": 14.63,
"grad_norm": 0.9913462996482849,
"learning_rate": 7.131758793969849e-06,
"loss": 0.0026,
"step": 29050
},
{
"epoch": 14.65,
"grad_norm": 0.6450251340866089,
"learning_rate": 7.1292462311557795e-06,
"loss": 0.002,
"step": 29075
},
{
"epoch": 14.66,
"grad_norm": 1.563607096672058,
"learning_rate": 7.126733668341709e-06,
"loss": 0.0024,
"step": 29100
},
{
"epoch": 14.67,
"grad_norm": 1.7868529558181763,
"learning_rate": 7.124221105527639e-06,
"loss": 0.0023,
"step": 29125
},
{
"epoch": 14.69,
"grad_norm": 1.2877388000488281,
"learning_rate": 7.121708542713568e-06,
"loss": 0.0023,
"step": 29150
},
{
"epoch": 14.7,
"grad_norm": 1.0285090208053589,
"learning_rate": 7.1191959798994985e-06,
"loss": 0.0029,
"step": 29175
},
{
"epoch": 14.71,
"grad_norm": 0.9814359545707703,
"learning_rate": 7.116683417085428e-06,
"loss": 0.0024,
"step": 29200
},
{
"epoch": 14.72,
"grad_norm": 3.390266180038452,
"learning_rate": 7.114170854271357e-06,
"loss": 0.0031,
"step": 29225
},
{
"epoch": 14.74,
"grad_norm": 0.9892065525054932,
"learning_rate": 7.111658291457287e-06,
"loss": 0.0026,
"step": 29250
},
{
"epoch": 14.75,
"grad_norm": 1.4779770374298096,
"learning_rate": 7.109145728643217e-06,
"loss": 0.0027,
"step": 29275
},
{
"epoch": 14.76,
"grad_norm": 1.935717225074768,
"learning_rate": 7.106633165829147e-06,
"loss": 0.0034,
"step": 29300
},
{
"epoch": 14.77,
"grad_norm": 0.9933359622955322,
"learning_rate": 7.104120603015075e-06,
"loss": 0.0026,
"step": 29325
},
{
"epoch": 14.79,
"grad_norm": 1.2649095058441162,
"learning_rate": 7.1016080402010054e-06,
"loss": 0.0025,
"step": 29350
},
{
"epoch": 14.8,
"grad_norm": 0.9197986125946045,
"learning_rate": 7.099095477386935e-06,
"loss": 0.0025,
"step": 29375
},
{
"epoch": 14.81,
"grad_norm": 0.7807173132896423,
"learning_rate": 7.096582914572865e-06,
"loss": 0.0029,
"step": 29400
},
{
"epoch": 14.82,
"grad_norm": 1.1959360837936401,
"learning_rate": 7.094070351758795e-06,
"loss": 0.0029,
"step": 29425
},
{
"epoch": 14.84,
"grad_norm": 1.7630362510681152,
"learning_rate": 7.091557788944724e-06,
"loss": 0.0024,
"step": 29450
},
{
"epoch": 14.85,
"grad_norm": 1.1034917831420898,
"learning_rate": 7.089045226130654e-06,
"loss": 0.0027,
"step": 29475
},
{
"epoch": 14.86,
"grad_norm": 1.185198426246643,
"learning_rate": 7.086532663316583e-06,
"loss": 0.0022,
"step": 29500
},
{
"epoch": 14.87,
"grad_norm": 0.9745866656303406,
"learning_rate": 7.084020100502513e-06,
"loss": 0.0029,
"step": 29525
},
{
"epoch": 14.89,
"grad_norm": 1.3043872117996216,
"learning_rate": 7.0815075376884426e-06,
"loss": 0.0022,
"step": 29550
},
{
"epoch": 14.9,
"grad_norm": 1.9529894590377808,
"learning_rate": 7.078994974874373e-06,
"loss": 0.0026,
"step": 29575
},
{
"epoch": 14.91,
"grad_norm": 1.3992432355880737,
"learning_rate": 7.076482412060303e-06,
"loss": 0.0028,
"step": 29600
},
{
"epoch": 14.92,
"grad_norm": 1.66525399684906,
"learning_rate": 7.073969849246231e-06,
"loss": 0.0026,
"step": 29625
},
{
"epoch": 14.94,
"grad_norm": 1.0909807682037354,
"learning_rate": 7.0714572864321615e-06,
"loss": 0.0024,
"step": 29650
},
{
"epoch": 14.95,
"grad_norm": 1.1937345266342163,
"learning_rate": 7.068944723618091e-06,
"loss": 0.0028,
"step": 29675
},
{
"epoch": 14.96,
"grad_norm": 1.9024869203567505,
"learning_rate": 7.066432160804021e-06,
"loss": 0.0029,
"step": 29700
},
{
"epoch": 14.97,
"grad_norm": 1.720049500465393,
"learning_rate": 7.06391959798995e-06,
"loss": 0.0026,
"step": 29725
},
{
"epoch": 14.99,
"grad_norm": 1.847103476524353,
"learning_rate": 7.0614070351758805e-06,
"loss": 0.0028,
"step": 29750
},
{
"epoch": 15.0,
"grad_norm": 2.2860000133514404,
"learning_rate": 7.058894472361809e-06,
"loss": 0.0031,
"step": 29775
},
{
"epoch": 15.01,
"grad_norm": 0.5796921849250793,
"learning_rate": 7.056381909547739e-06,
"loss": 0.0018,
"step": 29800
},
{
"epoch": 15.03,
"grad_norm": 0.9784811735153198,
"learning_rate": 7.053869346733669e-06,
"loss": 0.0017,
"step": 29825
},
{
"epoch": 15.04,
"grad_norm": 0.5142715573310852,
"learning_rate": 7.051356783919599e-06,
"loss": 0.0016,
"step": 29850
},
{
"epoch": 15.05,
"grad_norm": 0.4503660202026367,
"learning_rate": 7.048844221105529e-06,
"loss": 0.0016,
"step": 29875
},
{
"epoch": 15.06,
"grad_norm": 1.5839601755142212,
"learning_rate": 7.046331658291457e-06,
"loss": 0.0017,
"step": 29900
},
{
"epoch": 15.08,
"grad_norm": 1.3505232334136963,
"learning_rate": 7.0438190954773875e-06,
"loss": 0.0017,
"step": 29925
},
{
"epoch": 15.09,
"grad_norm": 0.8439552783966064,
"learning_rate": 7.041306532663317e-06,
"loss": 0.0021,
"step": 29950
},
{
"epoch": 15.1,
"grad_norm": 0.5884461402893066,
"learning_rate": 7.038793969849247e-06,
"loss": 0.0019,
"step": 29975
},
{
"epoch": 15.11,
"grad_norm": 1.5458250045776367,
"learning_rate": 7.036281407035176e-06,
"loss": 0.0016,
"step": 30000
},
{
"epoch": 15.11,
"eval_loss": 0.3082928955554962,
"eval_runtime": 644.0315,
"eval_samples_per_second": 2.188,
"eval_steps_per_second": 2.188,
"eval_wer": 23.12694569353165,
"step": 30000
},
{
"epoch": 15.13,
"grad_norm": 0.5485078692436218,
"learning_rate": 7.0337688442211065e-06,
"loss": 0.002,
"step": 30025
},
{
"epoch": 15.14,
"grad_norm": 0.7897219657897949,
"learning_rate": 7.031256281407036e-06,
"loss": 0.002,
"step": 30050
},
{
"epoch": 15.15,
"grad_norm": 1.791757345199585,
"learning_rate": 7.028743718592965e-06,
"loss": 0.002,
"step": 30075
},
{
"epoch": 15.16,
"grad_norm": 1.5494959354400635,
"learning_rate": 7.026231155778895e-06,
"loss": 0.0017,
"step": 30100
},
{
"epoch": 15.18,
"grad_norm": 1.4792935848236084,
"learning_rate": 7.023718592964825e-06,
"loss": 0.0021,
"step": 30125
},
{
"epoch": 15.19,
"grad_norm": 0.9007993340492249,
"learning_rate": 7.021306532663317e-06,
"loss": 0.0014,
"step": 30150
},
{
"epoch": 15.2,
"grad_norm": 1.4334176778793335,
"learning_rate": 7.018793969849247e-06,
"loss": 0.0022,
"step": 30175
},
{
"epoch": 15.21,
"grad_norm": 0.3024737238883972,
"learning_rate": 7.016281407035176e-06,
"loss": 0.0018,
"step": 30200
},
{
"epoch": 15.23,
"grad_norm": 0.6104531288146973,
"learning_rate": 7.013768844221106e-06,
"loss": 0.0018,
"step": 30225
},
{
"epoch": 15.24,
"grad_norm": 0.5760412216186523,
"learning_rate": 7.011256281407036e-06,
"loss": 0.0022,
"step": 30250
},
{
"epoch": 15.25,
"grad_norm": 1.2721421718597412,
"learning_rate": 7.008743718592965e-06,
"loss": 0.0023,
"step": 30275
},
{
"epoch": 15.26,
"grad_norm": 1.3227627277374268,
"learning_rate": 7.0062311557788955e-06,
"loss": 0.002,
"step": 30300
},
{
"epoch": 15.28,
"grad_norm": 0.7991645932197571,
"learning_rate": 7.003718592964824e-06,
"loss": 0.0022,
"step": 30325
},
{
"epoch": 15.29,
"grad_norm": 0.947595477104187,
"learning_rate": 7.001206030150754e-06,
"loss": 0.0019,
"step": 30350
},
{
"epoch": 15.3,
"grad_norm": 0.308912456035614,
"learning_rate": 6.9986934673366834e-06,
"loss": 0.0022,
"step": 30375
},
{
"epoch": 15.31,
"grad_norm": 0.7527008056640625,
"learning_rate": 6.996180904522614e-06,
"loss": 0.0021,
"step": 30400
},
{
"epoch": 15.33,
"grad_norm": 1.4596527814865112,
"learning_rate": 6.993668341708544e-06,
"loss": 0.0022,
"step": 30425
},
{
"epoch": 15.34,
"grad_norm": 0.6579309701919556,
"learning_rate": 6.991155778894473e-06,
"loss": 0.0022,
"step": 30450
},
{
"epoch": 15.35,
"grad_norm": 1.1227294206619263,
"learning_rate": 6.988643216080403e-06,
"loss": 0.0025,
"step": 30475
},
{
"epoch": 15.37,
"grad_norm": 1.243522047996521,
"learning_rate": 6.986130653266332e-06,
"loss": 0.0024,
"step": 30500
},
{
"epoch": 15.38,
"grad_norm": 1.4679538011550903,
"learning_rate": 6.983618090452262e-06,
"loss": 0.0025,
"step": 30525
},
{
"epoch": 15.39,
"grad_norm": 0.4298401176929474,
"learning_rate": 6.981105527638191e-06,
"loss": 0.0025,
"step": 30550
},
{
"epoch": 15.4,
"grad_norm": 1.8829938173294067,
"learning_rate": 6.978592964824121e-06,
"loss": 0.0029,
"step": 30575
},
{
"epoch": 15.42,
"grad_norm": 0.4301297068595886,
"learning_rate": 6.976080402010051e-06,
"loss": 0.0022,
"step": 30600
},
{
"epoch": 15.43,
"grad_norm": 0.4856531322002411,
"learning_rate": 6.97356783919598e-06,
"loss": 0.0022,
"step": 30625
},
{
"epoch": 15.44,
"grad_norm": 1.4196797609329224,
"learning_rate": 6.97105527638191e-06,
"loss": 0.0024,
"step": 30650
},
{
"epoch": 15.45,
"grad_norm": 0.9456383585929871,
"learning_rate": 6.9685427135678396e-06,
"loss": 0.0026,
"step": 30675
},
{
"epoch": 15.47,
"grad_norm": 0.5812883973121643,
"learning_rate": 6.96603015075377e-06,
"loss": 0.0023,
"step": 30700
},
{
"epoch": 15.48,
"grad_norm": 1.5798356533050537,
"learning_rate": 6.963517587939699e-06,
"loss": 0.0026,
"step": 30725
},
{
"epoch": 15.49,
"grad_norm": 1.7559192180633545,
"learning_rate": 6.961005025125629e-06,
"loss": 0.0023,
"step": 30750
},
{
"epoch": 15.5,
"grad_norm": 0.9900276064872742,
"learning_rate": 6.958492462311558e-06,
"loss": 0.0023,
"step": 30775
},
{
"epoch": 15.52,
"grad_norm": 1.1745249032974243,
"learning_rate": 6.955979899497488e-06,
"loss": 0.0028,
"step": 30800
},
{
"epoch": 15.53,
"grad_norm": 1.3300117254257202,
"learning_rate": 6.953467336683417e-06,
"loss": 0.0021,
"step": 30825
},
{
"epoch": 15.54,
"grad_norm": 1.642714023590088,
"learning_rate": 6.950954773869347e-06,
"loss": 0.0022,
"step": 30850
},
{
"epoch": 15.55,
"grad_norm": 1.159639835357666,
"learning_rate": 6.9484422110552775e-06,
"loss": 0.0024,
"step": 30875
},
{
"epoch": 15.57,
"grad_norm": 1.291977882385254,
"learning_rate": 6.945929648241206e-06,
"loss": 0.0021,
"step": 30900
},
{
"epoch": 15.58,
"grad_norm": 2.5278995037078857,
"learning_rate": 6.943417085427136e-06,
"loss": 0.0027,
"step": 30925
},
{
"epoch": 15.59,
"grad_norm": 1.345812439918518,
"learning_rate": 6.9409045226130655e-06,
"loss": 0.0023,
"step": 30950
},
{
"epoch": 15.6,
"grad_norm": 0.608741044998169,
"learning_rate": 6.938391959798996e-06,
"loss": 0.0027,
"step": 30975
},
{
"epoch": 15.62,
"grad_norm": 1.752109169960022,
"learning_rate": 6.935879396984925e-06,
"loss": 0.0025,
"step": 31000
},
{
"epoch": 15.62,
"eval_loss": 0.31733256578445435,
"eval_runtime": 645.4398,
"eval_samples_per_second": 2.183,
"eval_steps_per_second": 2.183,
"eval_wer": 23.590453130404704,
"step": 31000
},
{
"epoch": 15.63,
"grad_norm": 0.8238040804862976,
"learning_rate": 6.933366834170855e-06,
"loss": 0.0022,
"step": 31025
},
{
"epoch": 15.64,
"grad_norm": 0.9175546169281006,
"learning_rate": 6.930854271356785e-06,
"loss": 0.0023,
"step": 31050
},
{
"epoch": 15.65,
"grad_norm": 1.2227699756622314,
"learning_rate": 6.928341708542714e-06,
"loss": 0.0022,
"step": 31075
},
{
"epoch": 15.67,
"grad_norm": 1.981120228767395,
"learning_rate": 6.925829145728644e-06,
"loss": 0.0022,
"step": 31100
},
{
"epoch": 15.68,
"grad_norm": 1.4415370225906372,
"learning_rate": 6.923316582914573e-06,
"loss": 0.0025,
"step": 31125
},
{
"epoch": 15.69,
"grad_norm": 1.6438077688217163,
"learning_rate": 6.9208040201005035e-06,
"loss": 0.0024,
"step": 31150
},
{
"epoch": 15.71,
"grad_norm": 2.1774020195007324,
"learning_rate": 6.918291457286432e-06,
"loss": 0.0024,
"step": 31175
},
{
"epoch": 15.72,
"grad_norm": 0.5905573964118958,
"learning_rate": 6.915778894472362e-06,
"loss": 0.0025,
"step": 31200
},
{
"epoch": 15.73,
"grad_norm": 1.7300103902816772,
"learning_rate": 6.9132663316582915e-06,
"loss": 0.0026,
"step": 31225
},
{
"epoch": 15.74,
"grad_norm": 1.1542717218399048,
"learning_rate": 6.910753768844222e-06,
"loss": 0.0023,
"step": 31250
},
{
"epoch": 15.76,
"grad_norm": 1.279700756072998,
"learning_rate": 6.908241206030152e-06,
"loss": 0.0024,
"step": 31275
},
{
"epoch": 15.77,
"grad_norm": 0.8788714408874512,
"learning_rate": 6.905728643216081e-06,
"loss": 0.0026,
"step": 31300
},
{
"epoch": 15.78,
"grad_norm": 0.8979476094245911,
"learning_rate": 6.903216080402011e-06,
"loss": 0.0023,
"step": 31325
},
{
"epoch": 15.79,
"grad_norm": 0.406665176153183,
"learning_rate": 6.90070351758794e-06,
"loss": 0.0021,
"step": 31350
},
{
"epoch": 15.81,
"grad_norm": 0.7604673504829407,
"learning_rate": 6.89819095477387e-06,
"loss": 0.0023,
"step": 31375
},
{
"epoch": 15.82,
"grad_norm": 1.2543871402740479,
"learning_rate": 6.895678391959799e-06,
"loss": 0.0026,
"step": 31400
},
{
"epoch": 15.83,
"grad_norm": 0.8910918235778809,
"learning_rate": 6.8931658291457294e-06,
"loss": 0.002,
"step": 31425
},
{
"epoch": 15.84,
"grad_norm": 0.4898677468299866,
"learning_rate": 6.890653266331658e-06,
"loss": 0.0025,
"step": 31450
},
{
"epoch": 15.86,
"grad_norm": 1.3053447008132935,
"learning_rate": 6.888140703517588e-06,
"loss": 0.003,
"step": 31475
},
{
"epoch": 15.87,
"grad_norm": 1.2385072708129883,
"learning_rate": 6.885628140703518e-06,
"loss": 0.0023,
"step": 31500
},
{
"epoch": 15.88,
"grad_norm": 1.81439208984375,
"learning_rate": 6.8831155778894476e-06,
"loss": 0.0022,
"step": 31525
},
{
"epoch": 15.89,
"grad_norm": 2.093777894973755,
"learning_rate": 6.880603015075378e-06,
"loss": 0.0025,
"step": 31550
},
{
"epoch": 15.91,
"grad_norm": 0.8900623321533203,
"learning_rate": 6.878090452261307e-06,
"loss": 0.0027,
"step": 31575
},
{
"epoch": 15.92,
"grad_norm": 1.2843748331069946,
"learning_rate": 6.875577889447237e-06,
"loss": 0.0021,
"step": 31600
},
{
"epoch": 15.93,
"grad_norm": 0.9298327565193176,
"learning_rate": 6.873065326633166e-06,
"loss": 0.0019,
"step": 31625
},
{
"epoch": 15.94,
"grad_norm": 0.8842711448669434,
"learning_rate": 6.870552763819096e-06,
"loss": 0.0021,
"step": 31650
},
{
"epoch": 15.96,
"grad_norm": 0.9891393184661865,
"learning_rate": 6.868040201005026e-06,
"loss": 0.0024,
"step": 31675
},
{
"epoch": 15.97,
"grad_norm": 1.2643588781356812,
"learning_rate": 6.865527638190955e-06,
"loss": 0.0023,
"step": 31700
},
{
"epoch": 15.98,
"grad_norm": 0.7340218424797058,
"learning_rate": 6.8630150753768855e-06,
"loss": 0.0024,
"step": 31725
},
{
"epoch": 15.99,
"grad_norm": 1.090314507484436,
"learning_rate": 6.860502512562814e-06,
"loss": 0.0025,
"step": 31750
},
{
"epoch": 16.01,
"grad_norm": 2.1509249210357666,
"learning_rate": 6.857989949748744e-06,
"loss": 0.0023,
"step": 31775
},
{
"epoch": 16.02,
"grad_norm": 1.119979977607727,
"learning_rate": 6.8554773869346735e-06,
"loss": 0.0015,
"step": 31800
},
{
"epoch": 16.03,
"grad_norm": 0.9909570217132568,
"learning_rate": 6.852964824120604e-06,
"loss": 0.002,
"step": 31825
},
{
"epoch": 16.05,
"grad_norm": 1.8232905864715576,
"learning_rate": 6.850452261306533e-06,
"loss": 0.0016,
"step": 31850
},
{
"epoch": 16.06,
"grad_norm": 1.7875540256500244,
"learning_rate": 6.847939698492463e-06,
"loss": 0.0015,
"step": 31875
},
{
"epoch": 16.07,
"grad_norm": 1.3473516702651978,
"learning_rate": 6.845427135678393e-06,
"loss": 0.0019,
"step": 31900
},
{
"epoch": 16.08,
"grad_norm": 0.521841824054718,
"learning_rate": 6.842914572864322e-06,
"loss": 0.0017,
"step": 31925
},
{
"epoch": 16.1,
"grad_norm": 1.0813759565353394,
"learning_rate": 6.840402010050252e-06,
"loss": 0.0016,
"step": 31950
},
{
"epoch": 16.11,
"grad_norm": 0.8133834004402161,
"learning_rate": 6.837889447236181e-06,
"loss": 0.0013,
"step": 31975
},
{
"epoch": 16.12,
"grad_norm": 0.6576656103134155,
"learning_rate": 6.8353768844221115e-06,
"loss": 0.0016,
"step": 32000
},
{
"epoch": 16.12,
"eval_loss": 0.3177714943885803,
"eval_runtime": 643.1803,
"eval_samples_per_second": 2.191,
"eval_steps_per_second": 2.191,
"eval_wer": 23.95710826703563,
"step": 32000
},
{
"epoch": 16.13,
"grad_norm": 1.695088267326355,
"learning_rate": 6.83286432160804e-06,
"loss": 0.0019,
"step": 32025
},
{
"epoch": 16.15,
"grad_norm": 0.49403443932533264,
"learning_rate": 6.83035175879397e-06,
"loss": 0.0017,
"step": 32050
},
{
"epoch": 16.16,
"grad_norm": 1.2288966178894043,
"learning_rate": 6.8278391959798995e-06,
"loss": 0.0018,
"step": 32075
},
{
"epoch": 16.17,
"grad_norm": 1.711982011795044,
"learning_rate": 6.82532663316583e-06,
"loss": 0.0018,
"step": 32100
},
{
"epoch": 16.18,
"grad_norm": 0.9123796820640564,
"learning_rate": 6.82281407035176e-06,
"loss": 0.0022,
"step": 32125
},
{
"epoch": 16.2,
"grad_norm": 0.8187395930290222,
"learning_rate": 6.820301507537689e-06,
"loss": 0.0018,
"step": 32150
},
{
"epoch": 16.21,
"grad_norm": 0.676909327507019,
"learning_rate": 6.817788944723619e-06,
"loss": 0.0019,
"step": 32175
},
{
"epoch": 16.22,
"grad_norm": 0.28308579325675964,
"learning_rate": 6.815276381909548e-06,
"loss": 0.0017,
"step": 32200
},
{
"epoch": 16.23,
"grad_norm": 0.8627307415008545,
"learning_rate": 6.812763819095478e-06,
"loss": 0.0017,
"step": 32225
},
{
"epoch": 16.25,
"grad_norm": 0.5035674571990967,
"learning_rate": 6.810251256281407e-06,
"loss": 0.0021,
"step": 32250
},
{
"epoch": 16.26,
"grad_norm": 0.611066997051239,
"learning_rate": 6.8077386934673374e-06,
"loss": 0.0018,
"step": 32275
},
{
"epoch": 16.27,
"grad_norm": 1.7766281366348267,
"learning_rate": 6.805226130653268e-06,
"loss": 0.002,
"step": 32300
},
{
"epoch": 16.28,
"grad_norm": 0.9835132956504822,
"learning_rate": 6.802713567839196e-06,
"loss": 0.0018,
"step": 32325
},
{
"epoch": 16.3,
"grad_norm": 1.363574504852295,
"learning_rate": 6.800201005025126e-06,
"loss": 0.002,
"step": 32350
},
{
"epoch": 16.31,
"grad_norm": 1.0854887962341309,
"learning_rate": 6.7976884422110556e-06,
"loss": 0.0019,
"step": 32375
},
{
"epoch": 16.32,
"grad_norm": 2.8377525806427,
"learning_rate": 6.795175879396986e-06,
"loss": 0.0023,
"step": 32400
},
{
"epoch": 16.34,
"grad_norm": 2.0450568199157715,
"learning_rate": 6.792663316582915e-06,
"loss": 0.0019,
"step": 32425
},
{
"epoch": 16.35,
"grad_norm": 1.6299299001693726,
"learning_rate": 6.790150753768845e-06,
"loss": 0.002,
"step": 32450
},
{
"epoch": 16.36,
"grad_norm": 1.7007014751434326,
"learning_rate": 6.787638190954774e-06,
"loss": 0.0022,
"step": 32475
},
{
"epoch": 16.37,
"grad_norm": 1.5185723304748535,
"learning_rate": 6.785125628140704e-06,
"loss": 0.002,
"step": 32500
},
{
"epoch": 16.39,
"grad_norm": 1.15962815284729,
"learning_rate": 6.782613065326634e-06,
"loss": 0.002,
"step": 32525
},
{
"epoch": 16.4,
"grad_norm": 0.9685630798339844,
"learning_rate": 6.780100502512563e-06,
"loss": 0.0023,
"step": 32550
},
{
"epoch": 16.41,
"grad_norm": 0.7952429056167603,
"learning_rate": 6.7775879396984935e-06,
"loss": 0.0024,
"step": 32575
},
{
"epoch": 16.42,
"grad_norm": 1.4336612224578857,
"learning_rate": 6.775075376884422e-06,
"loss": 0.0019,
"step": 32600
},
{
"epoch": 16.44,
"grad_norm": 1.155975580215454,
"learning_rate": 6.772562814070352e-06,
"loss": 0.0023,
"step": 32625
},
{
"epoch": 16.45,
"grad_norm": 0.660798966884613,
"learning_rate": 6.7700502512562815e-06,
"loss": 0.0024,
"step": 32650
},
{
"epoch": 16.46,
"grad_norm": 1.6323788166046143,
"learning_rate": 6.767537688442212e-06,
"loss": 0.002,
"step": 32675
},
{
"epoch": 16.47,
"grad_norm": 0.47817263007164,
"learning_rate": 6.765125628140704e-06,
"loss": 0.0023,
"step": 32700
},
{
"epoch": 16.49,
"grad_norm": 1.6108895540237427,
"learning_rate": 6.762613065326634e-06,
"loss": 0.0024,
"step": 32725
},
{
"epoch": 16.5,
"grad_norm": 1.8570855855941772,
"learning_rate": 6.7601005025125636e-06,
"loss": 0.0025,
"step": 32750
},
{
"epoch": 16.51,
"grad_norm": 1.3422707319259644,
"learning_rate": 6.757587939698494e-06,
"loss": 0.002,
"step": 32775
},
{
"epoch": 16.52,
"grad_norm": 0.9396295547485352,
"learning_rate": 6.755075376884422e-06,
"loss": 0.0018,
"step": 32800
},
{
"epoch": 16.54,
"grad_norm": 0.6690593361854553,
"learning_rate": 6.752562814070352e-06,
"loss": 0.0024,
"step": 32825
},
{
"epoch": 16.55,
"grad_norm": 0.6064794659614563,
"learning_rate": 6.750050251256282e-06,
"loss": 0.0019,
"step": 32850
},
{
"epoch": 16.56,
"grad_norm": 0.6732133626937866,
"learning_rate": 6.747537688442212e-06,
"loss": 0.0019,
"step": 32875
},
{
"epoch": 16.57,
"grad_norm": 1.029380202293396,
"learning_rate": 6.74502512562814e-06,
"loss": 0.0023,
"step": 32900
},
{
"epoch": 16.59,
"grad_norm": 0.659989058971405,
"learning_rate": 6.7425125628140705e-06,
"loss": 0.0021,
"step": 32925
},
{
"epoch": 16.6,
"grad_norm": 0.6724833846092224,
"learning_rate": 6.740000000000001e-06,
"loss": 0.002,
"step": 32950
},
{
"epoch": 16.61,
"grad_norm": 1.073951005935669,
"learning_rate": 6.73748743718593e-06,
"loss": 0.0018,
"step": 32975
},
{
"epoch": 16.62,
"grad_norm": 0.2644835412502289,
"learning_rate": 6.73497487437186e-06,
"loss": 0.0015,
"step": 33000
},
{
"epoch": 16.62,
"eval_loss": 0.31893399357795715,
"eval_runtime": 638.1335,
"eval_samples_per_second": 2.208,
"eval_steps_per_second": 2.208,
"eval_wer": 23.625043237634035,
"step": 33000
},
{
"epoch": 16.64,
"grad_norm": 0.4319547712802887,
"learning_rate": 6.7324623115577895e-06,
"loss": 0.0021,
"step": 33025
},
{
"epoch": 16.65,
"grad_norm": 0.5117477178573608,
"learning_rate": 6.72994974874372e-06,
"loss": 0.0017,
"step": 33050
},
{
"epoch": 16.66,
"grad_norm": 0.599642276763916,
"learning_rate": 6.727437185929648e-06,
"loss": 0.0022,
"step": 33075
},
{
"epoch": 16.68,
"grad_norm": 2.386610984802246,
"learning_rate": 6.724924623115578e-06,
"loss": 0.0024,
"step": 33100
},
{
"epoch": 16.69,
"grad_norm": 0.7631763219833374,
"learning_rate": 6.7224120603015085e-06,
"loss": 0.0019,
"step": 33125
},
{
"epoch": 16.7,
"grad_norm": 1.534925937652588,
"learning_rate": 6.719899497487438e-06,
"loss": 0.0022,
"step": 33150
},
{
"epoch": 16.71,
"grad_norm": 1.815709114074707,
"learning_rate": 6.717386934673368e-06,
"loss": 0.0022,
"step": 33175
},
{
"epoch": 16.73,
"grad_norm": 0.47216132283210754,
"learning_rate": 6.7148743718592965e-06,
"loss": 0.002,
"step": 33200
},
{
"epoch": 16.74,
"grad_norm": 1.2903132438659668,
"learning_rate": 6.712361809045227e-06,
"loss": 0.0024,
"step": 33225
},
{
"epoch": 16.75,
"grad_norm": 1.4618940353393555,
"learning_rate": 6.709849246231156e-06,
"loss": 0.0021,
"step": 33250
},
{
"epoch": 16.76,
"grad_norm": 2.112274169921875,
"learning_rate": 6.707336683417086e-06,
"loss": 0.0021,
"step": 33275
},
{
"epoch": 16.78,
"grad_norm": 1.849009394645691,
"learning_rate": 6.7048241206030155e-06,
"loss": 0.0023,
"step": 33300
},
{
"epoch": 16.79,
"grad_norm": 1.0181339979171753,
"learning_rate": 6.702311557788946e-06,
"loss": 0.002,
"step": 33325
},
{
"epoch": 16.8,
"grad_norm": 1.6199973821640015,
"learning_rate": 6.699798994974876e-06,
"loss": 0.0024,
"step": 33350
},
{
"epoch": 16.81,
"grad_norm": 0.8824648261070251,
"learning_rate": 6.697286432160804e-06,
"loss": 0.0021,
"step": 33375
},
{
"epoch": 16.83,
"grad_norm": 1.2682048082351685,
"learning_rate": 6.6947738693467344e-06,
"loss": 0.0021,
"step": 33400
},
{
"epoch": 16.84,
"grad_norm": 0.9669592976570129,
"learning_rate": 6.692261306532664e-06,
"loss": 0.0019,
"step": 33425
},
{
"epoch": 16.85,
"grad_norm": 2.088453769683838,
"learning_rate": 6.689748743718594e-06,
"loss": 0.0018,
"step": 33450
},
{
"epoch": 16.86,
"grad_norm": 1.75133216381073,
"learning_rate": 6.687236180904522e-06,
"loss": 0.0023,
"step": 33475
},
{
"epoch": 16.88,
"grad_norm": 2.224334239959717,
"learning_rate": 6.684723618090453e-06,
"loss": 0.0019,
"step": 33500
},
{
"epoch": 16.89,
"grad_norm": 0.6259239315986633,
"learning_rate": 6.682211055276382e-06,
"loss": 0.0026,
"step": 33525
},
{
"epoch": 16.9,
"grad_norm": 1.3590632677078247,
"learning_rate": 6.679698492462312e-06,
"loss": 0.0022,
"step": 33550
},
{
"epoch": 16.91,
"grad_norm": 1.8630064725875854,
"learning_rate": 6.677185929648242e-06,
"loss": 0.0025,
"step": 33575
},
{
"epoch": 16.93,
"grad_norm": 1.7753084897994995,
"learning_rate": 6.6746733668341716e-06,
"loss": 0.0022,
"step": 33600
},
{
"epoch": 16.94,
"grad_norm": 0.30768975615501404,
"learning_rate": 6.672160804020102e-06,
"loss": 0.0021,
"step": 33625
},
{
"epoch": 16.95,
"grad_norm": 1.4861748218536377,
"learning_rate": 6.66964824120603e-06,
"loss": 0.002,
"step": 33650
},
{
"epoch": 16.96,
"grad_norm": 1.5577760934829712,
"learning_rate": 6.66713567839196e-06,
"loss": 0.002,
"step": 33675
},
{
"epoch": 16.98,
"grad_norm": 0.8203927278518677,
"learning_rate": 6.66462311557789e-06,
"loss": 0.0019,
"step": 33700
},
{
"epoch": 16.99,
"grad_norm": 1.1603564023971558,
"learning_rate": 6.66211055276382e-06,
"loss": 0.0021,
"step": 33725
},
{
"epoch": 17.0,
"grad_norm": 0.41753071546554565,
"learning_rate": 6.65959798994975e-06,
"loss": 0.0019,
"step": 33750
},
{
"epoch": 17.02,
"grad_norm": 0.49397504329681396,
"learning_rate": 6.6570854271356785e-06,
"loss": 0.0016,
"step": 33775
},
{
"epoch": 17.03,
"grad_norm": 1.637376070022583,
"learning_rate": 6.654572864321609e-06,
"loss": 0.0017,
"step": 33800
},
{
"epoch": 17.04,
"grad_norm": 0.45649200677871704,
"learning_rate": 6.652060301507538e-06,
"loss": 0.0017,
"step": 33825
},
{
"epoch": 17.05,
"grad_norm": 1.1269056797027588,
"learning_rate": 6.649547738693468e-06,
"loss": 0.0021,
"step": 33850
},
{
"epoch": 17.07,
"grad_norm": 1.279366374015808,
"learning_rate": 6.6470351758793975e-06,
"loss": 0.0016,
"step": 33875
},
{
"epoch": 17.08,
"grad_norm": 1.3553489446640015,
"learning_rate": 6.644522613065328e-06,
"loss": 0.0014,
"step": 33900
},
{
"epoch": 17.09,
"grad_norm": 0.3694764971733093,
"learning_rate": 6.642010050251256e-06,
"loss": 0.0015,
"step": 33925
},
{
"epoch": 17.1,
"grad_norm": 0.7455251812934875,
"learning_rate": 6.639497487437186e-06,
"loss": 0.0015,
"step": 33950
},
{
"epoch": 17.12,
"grad_norm": 1.4663114547729492,
"learning_rate": 6.6369849246231165e-06,
"loss": 0.0014,
"step": 33975
},
{
"epoch": 17.13,
"grad_norm": 0.41065290570259094,
"learning_rate": 6.634472361809046e-06,
"loss": 0.0012,
"step": 34000
},
{
"epoch": 17.13,
"eval_loss": 0.31717780232429504,
"eval_runtime": 640.7627,
"eval_samples_per_second": 2.199,
"eval_steps_per_second": 2.199,
"eval_wer": 22.94707713593912,
"step": 34000
},
{
"epoch": 17.14,
"grad_norm": 0.921389102935791,
"learning_rate": 6.631959798994976e-06,
"loss": 0.0013,
"step": 34025
},
{
"epoch": 17.15,
"grad_norm": 1.2974750995635986,
"learning_rate": 6.6294472361809045e-06,
"loss": 0.0015,
"step": 34050
},
{
"epoch": 17.17,
"grad_norm": 0.946368932723999,
"learning_rate": 6.626934673366835e-06,
"loss": 0.0015,
"step": 34075
},
{
"epoch": 17.18,
"grad_norm": 1.1156178712844849,
"learning_rate": 6.624422110552764e-06,
"loss": 0.0015,
"step": 34100
},
{
"epoch": 17.19,
"grad_norm": 0.447689026594162,
"learning_rate": 6.621909547738694e-06,
"loss": 0.0016,
"step": 34125
},
{
"epoch": 17.2,
"grad_norm": 0.7558609247207642,
"learning_rate": 6.6193969849246235e-06,
"loss": 0.0016,
"step": 34150
},
{
"epoch": 17.22,
"grad_norm": 0.8499734997749329,
"learning_rate": 6.616884422110554e-06,
"loss": 0.0017,
"step": 34175
},
{
"epoch": 17.23,
"grad_norm": 0.40783509612083435,
"learning_rate": 6.614371859296484e-06,
"loss": 0.002,
"step": 34200
},
{
"epoch": 17.24,
"grad_norm": 1.5999126434326172,
"learning_rate": 6.611859296482412e-06,
"loss": 0.002,
"step": 34225
},
{
"epoch": 17.25,
"grad_norm": 0.852052628993988,
"learning_rate": 6.6093467336683424e-06,
"loss": 0.0019,
"step": 34250
},
{
"epoch": 17.27,
"grad_norm": 0.36311525106430054,
"learning_rate": 6.606834170854272e-06,
"loss": 0.0017,
"step": 34275
},
{
"epoch": 17.28,
"grad_norm": 2.138871669769287,
"learning_rate": 6.604321608040202e-06,
"loss": 0.0016,
"step": 34300
},
{
"epoch": 17.29,
"grad_norm": 1.056746482849121,
"learning_rate": 6.60180904522613e-06,
"loss": 0.0015,
"step": 34325
},
{
"epoch": 17.3,
"grad_norm": 0.8301752805709839,
"learning_rate": 6.599296482412061e-06,
"loss": 0.002,
"step": 34350
},
{
"epoch": 17.32,
"grad_norm": 1.781783938407898,
"learning_rate": 6.596783919597991e-06,
"loss": 0.0017,
"step": 34375
},
{
"epoch": 17.33,
"grad_norm": 1.2563107013702393,
"learning_rate": 6.59427135678392e-06,
"loss": 0.0017,
"step": 34400
},
{
"epoch": 17.34,
"grad_norm": 2.3324105739593506,
"learning_rate": 6.59175879396985e-06,
"loss": 0.002,
"step": 34425
},
{
"epoch": 17.36,
"grad_norm": 1.0351413488388062,
"learning_rate": 6.5892462311557796e-06,
"loss": 0.002,
"step": 34450
},
{
"epoch": 17.37,
"grad_norm": 1.9729125499725342,
"learning_rate": 6.58673366834171e-06,
"loss": 0.0019,
"step": 34475
},
{
"epoch": 17.38,
"grad_norm": 0.7360727787017822,
"learning_rate": 6.584221105527638e-06,
"loss": 0.0016,
"step": 34500
},
{
"epoch": 17.39,
"grad_norm": 1.5218764543533325,
"learning_rate": 6.581708542713568e-06,
"loss": 0.0017,
"step": 34525
},
{
"epoch": 17.41,
"grad_norm": 0.9143256545066833,
"learning_rate": 6.579195979899498e-06,
"loss": 0.002,
"step": 34550
},
{
"epoch": 17.42,
"grad_norm": 1.5911108255386353,
"learning_rate": 6.576683417085428e-06,
"loss": 0.0021,
"step": 34575
},
{
"epoch": 17.43,
"grad_norm": 1.1945171356201172,
"learning_rate": 6.574170854271358e-06,
"loss": 0.0021,
"step": 34600
},
{
"epoch": 17.44,
"grad_norm": 0.6065207719802856,
"learning_rate": 6.5716582914572865e-06,
"loss": 0.0015,
"step": 34625
},
{
"epoch": 17.46,
"grad_norm": 1.3287162780761719,
"learning_rate": 6.569145728643217e-06,
"loss": 0.0015,
"step": 34650
},
{
"epoch": 17.47,
"grad_norm": 1.1048755645751953,
"learning_rate": 6.566633165829146e-06,
"loss": 0.0016,
"step": 34675
},
{
"epoch": 17.48,
"grad_norm": 1.8290241956710815,
"learning_rate": 6.564120603015076e-06,
"loss": 0.0017,
"step": 34700
},
{
"epoch": 17.49,
"grad_norm": 0.7153010964393616,
"learning_rate": 6.5616080402010055e-06,
"loss": 0.0017,
"step": 34725
},
{
"epoch": 17.51,
"grad_norm": 1.1753082275390625,
"learning_rate": 6.559095477386936e-06,
"loss": 0.0019,
"step": 34750
},
{
"epoch": 17.52,
"grad_norm": 0.8656060695648193,
"learning_rate": 6.556582914572864e-06,
"loss": 0.0018,
"step": 34775
},
{
"epoch": 17.53,
"grad_norm": 0.9170093536376953,
"learning_rate": 6.554070351758794e-06,
"loss": 0.0017,
"step": 34800
},
{
"epoch": 17.54,
"grad_norm": 0.8003792762756348,
"learning_rate": 6.5515577889447245e-06,
"loss": 0.0016,
"step": 34825
},
{
"epoch": 17.56,
"grad_norm": 0.9868853092193604,
"learning_rate": 6.549145728643217e-06,
"loss": 0.0015,
"step": 34850
},
{
"epoch": 17.57,
"grad_norm": 1.0430176258087158,
"learning_rate": 6.546633165829146e-06,
"loss": 0.0018,
"step": 34875
},
{
"epoch": 17.58,
"grad_norm": 0.36971691250801086,
"learning_rate": 6.544120603015076e-06,
"loss": 0.0018,
"step": 34900
},
{
"epoch": 17.59,
"grad_norm": 0.87406325340271,
"learning_rate": 6.541608040201005e-06,
"loss": 0.002,
"step": 34925
},
{
"epoch": 17.61,
"grad_norm": 2.4083333015441895,
"learning_rate": 6.539095477386935e-06,
"loss": 0.002,
"step": 34950
},
{
"epoch": 17.62,
"grad_norm": 0.49866533279418945,
"learning_rate": 6.536582914572864e-06,
"loss": 0.0021,
"step": 34975
},
{
"epoch": 17.63,
"grad_norm": 0.744525134563446,
"learning_rate": 6.5340703517587945e-06,
"loss": 0.0021,
"step": 35000
},
{
"epoch": 17.63,
"eval_loss": 0.3279932141304016,
"eval_runtime": 640.0538,
"eval_samples_per_second": 2.201,
"eval_steps_per_second": 2.201,
"eval_wer": 23.251470079557247,
"step": 35000
},
{
"epoch": 17.64,
"grad_norm": 1.2228732109069824,
"learning_rate": 6.531557788944725e-06,
"loss": 0.0018,
"step": 35025
},
{
"epoch": 17.66,
"grad_norm": 2.64949631690979,
"learning_rate": 6.529045226130654e-06,
"loss": 0.0019,
"step": 35050
},
{
"epoch": 17.67,
"grad_norm": 0.8812341094017029,
"learning_rate": 6.526532663316583e-06,
"loss": 0.0016,
"step": 35075
},
{
"epoch": 17.68,
"grad_norm": 1.3396104574203491,
"learning_rate": 6.524020100502513e-06,
"loss": 0.0017,
"step": 35100
},
{
"epoch": 17.7,
"grad_norm": 0.6547167897224426,
"learning_rate": 6.521507537688443e-06,
"loss": 0.0019,
"step": 35125
},
{
"epoch": 17.71,
"grad_norm": 1.9075217247009277,
"learning_rate": 6.518994974874372e-06,
"loss": 0.0017,
"step": 35150
},
{
"epoch": 17.72,
"grad_norm": 1.7751950025558472,
"learning_rate": 6.516482412060302e-06,
"loss": 0.0021,
"step": 35175
},
{
"epoch": 17.73,
"grad_norm": 1.1410751342773438,
"learning_rate": 6.514070351758795e-06,
"loss": 0.0021,
"step": 35200
},
{
"epoch": 17.75,
"grad_norm": 0.8662394285202026,
"learning_rate": 6.511557788944725e-06,
"loss": 0.0018,
"step": 35225
},
{
"epoch": 17.76,
"grad_norm": 1.586671233177185,
"learning_rate": 6.509045226130653e-06,
"loss": 0.0025,
"step": 35250
},
{
"epoch": 17.77,
"grad_norm": 1.0892744064331055,
"learning_rate": 6.5065326633165835e-06,
"loss": 0.0024,
"step": 35275
},
{
"epoch": 17.78,
"grad_norm": 0.37060225009918213,
"learning_rate": 6.504020100502513e-06,
"loss": 0.0021,
"step": 35300
},
{
"epoch": 17.8,
"grad_norm": 0.7521613240242004,
"learning_rate": 6.501507537688443e-06,
"loss": 0.0021,
"step": 35325
},
{
"epoch": 17.81,
"grad_norm": 2.0023598670959473,
"learning_rate": 6.4989949748743715e-06,
"loss": 0.0017,
"step": 35350
},
{
"epoch": 17.82,
"grad_norm": 1.1638795137405396,
"learning_rate": 6.496482412060302e-06,
"loss": 0.0019,
"step": 35375
},
{
"epoch": 17.83,
"grad_norm": 1.654910922050476,
"learning_rate": 6.493969849246232e-06,
"loss": 0.002,
"step": 35400
},
{
"epoch": 17.85,
"grad_norm": 1.0789589881896973,
"learning_rate": 6.491457286432161e-06,
"loss": 0.0022,
"step": 35425
},
{
"epoch": 17.86,
"grad_norm": 1.5805654525756836,
"learning_rate": 6.488944723618091e-06,
"loss": 0.002,
"step": 35450
},
{
"epoch": 17.87,
"grad_norm": 1.393417239189148,
"learning_rate": 6.486432160804021e-06,
"loss": 0.0019,
"step": 35475
},
{
"epoch": 17.88,
"grad_norm": 1.4259059429168701,
"learning_rate": 6.483919597989951e-06,
"loss": 0.0018,
"step": 35500
},
{
"epoch": 17.9,
"grad_norm": 1.371140956878662,
"learning_rate": 6.481407035175879e-06,
"loss": 0.0019,
"step": 35525
},
{
"epoch": 17.91,
"grad_norm": 1.099618673324585,
"learning_rate": 6.4788944723618095e-06,
"loss": 0.0019,
"step": 35550
},
{
"epoch": 17.92,
"grad_norm": 0.654387354850769,
"learning_rate": 6.476381909547739e-06,
"loss": 0.0019,
"step": 35575
},
{
"epoch": 17.93,
"grad_norm": 1.3511158227920532,
"learning_rate": 6.473869346733669e-06,
"loss": 0.0017,
"step": 35600
},
{
"epoch": 17.95,
"grad_norm": 1.1146668195724487,
"learning_rate": 6.471356783919599e-06,
"loss": 0.0019,
"step": 35625
},
{
"epoch": 17.96,
"grad_norm": 1.2822802066802979,
"learning_rate": 6.468844221105528e-06,
"loss": 0.0019,
"step": 35650
},
{
"epoch": 17.97,
"grad_norm": 0.6118746399879456,
"learning_rate": 6.466331658291458e-06,
"loss": 0.0021,
"step": 35675
},
{
"epoch": 17.98,
"grad_norm": 1.6765689849853516,
"learning_rate": 6.463819095477387e-06,
"loss": 0.002,
"step": 35700
},
{
"epoch": 18.0,
"grad_norm": 0.8929309844970703,
"learning_rate": 6.461306532663317e-06,
"loss": 0.002,
"step": 35725
},
{
"epoch": 18.01,
"grad_norm": 0.8236270546913147,
"learning_rate": 6.458793969849247e-06,
"loss": 0.0014,
"step": 35750
},
{
"epoch": 18.02,
"grad_norm": 0.3765973150730133,
"learning_rate": 6.456281407035177e-06,
"loss": 0.001,
"step": 35775
},
{
"epoch": 18.04,
"grad_norm": 0.7577652931213379,
"learning_rate": 6.453768844221107e-06,
"loss": 0.0009,
"step": 35800
},
{
"epoch": 18.05,
"grad_norm": 1.3990800380706787,
"learning_rate": 6.451256281407035e-06,
"loss": 0.0012,
"step": 35825
},
{
"epoch": 18.06,
"grad_norm": 0.6104835271835327,
"learning_rate": 6.448743718592966e-06,
"loss": 0.0017,
"step": 35850
},
{
"epoch": 18.07,
"grad_norm": 0.8785896301269531,
"learning_rate": 6.446231155778895e-06,
"loss": 0.0014,
"step": 35875
},
{
"epoch": 18.09,
"grad_norm": 0.3868306875228882,
"learning_rate": 6.443718592964825e-06,
"loss": 0.0014,
"step": 35900
},
{
"epoch": 18.1,
"grad_norm": 1.184727430343628,
"learning_rate": 6.4412060301507536e-06,
"loss": 0.0012,
"step": 35925
},
{
"epoch": 18.11,
"grad_norm": 1.4710326194763184,
"learning_rate": 6.438693467336684e-06,
"loss": 0.0015,
"step": 35950
},
{
"epoch": 18.12,
"grad_norm": 0.4654022753238678,
"learning_rate": 6.436180904522613e-06,
"loss": 0.0011,
"step": 35975
},
{
"epoch": 18.14,
"grad_norm": 0.30787691473960876,
"learning_rate": 6.433668341708543e-06,
"loss": 0.0017,
"step": 36000
},
{
"epoch": 18.14,
"eval_loss": 0.3324070870876312,
"eval_runtime": 779.64,
"eval_samples_per_second": 1.807,
"eval_steps_per_second": 1.807,
"eval_wer": 23.583535108958838,
"step": 36000
},
{
"epoch": 18.15,
"grad_norm": 2.8175275325775146,
"learning_rate": 6.431155778894473e-06,
"loss": 0.0015,
"step": 36025
},
{
"epoch": 18.16,
"grad_norm": 1.644282579421997,
"learning_rate": 6.428643216080403e-06,
"loss": 0.0017,
"step": 36050
},
{
"epoch": 18.17,
"grad_norm": 1.2739876508712769,
"learning_rate": 6.426130653266333e-06,
"loss": 0.0016,
"step": 36075
},
{
"epoch": 18.19,
"grad_norm": 1.1111208200454712,
"learning_rate": 6.423618090452261e-06,
"loss": 0.0011,
"step": 36100
},
{
"epoch": 18.2,
"grad_norm": 0.3262185752391815,
"learning_rate": 6.4211055276381915e-06,
"loss": 0.0012,
"step": 36125
},
{
"epoch": 18.21,
"grad_norm": 1.090649962425232,
"learning_rate": 6.418592964824121e-06,
"loss": 0.0019,
"step": 36150
},
{
"epoch": 18.22,
"grad_norm": 0.6180118322372437,
"learning_rate": 6.416080402010051e-06,
"loss": 0.0016,
"step": 36175
},
{
"epoch": 18.24,
"grad_norm": 1.0317612886428833,
"learning_rate": 6.4135678391959795e-06,
"loss": 0.0019,
"step": 36200
},
{
"epoch": 18.25,
"grad_norm": 0.2697906494140625,
"learning_rate": 6.41105527638191e-06,
"loss": 0.0015,
"step": 36225
},
{
"epoch": 18.26,
"grad_norm": 1.2288458347320557,
"learning_rate": 6.40854271356784e-06,
"loss": 0.0016,
"step": 36250
},
{
"epoch": 18.27,
"grad_norm": 1.6531765460968018,
"learning_rate": 6.406030150753769e-06,
"loss": 0.0013,
"step": 36275
},
{
"epoch": 18.29,
"grad_norm": 0.38945141434669495,
"learning_rate": 6.403517587939699e-06,
"loss": 0.0014,
"step": 36300
},
{
"epoch": 18.3,
"grad_norm": 0.8794446587562561,
"learning_rate": 6.401005025125629e-06,
"loss": 0.0013,
"step": 36325
},
{
"epoch": 18.31,
"grad_norm": 0.6100822687149048,
"learning_rate": 6.398492462311559e-06,
"loss": 0.0016,
"step": 36350
},
{
"epoch": 18.32,
"grad_norm": 1.371356725692749,
"learning_rate": 6.395979899497487e-06,
"loss": 0.0017,
"step": 36375
},
{
"epoch": 18.34,
"grad_norm": 0.791754424571991,
"learning_rate": 6.3934673366834175e-06,
"loss": 0.0014,
"step": 36400
},
{
"epoch": 18.35,
"grad_norm": 1.5653916597366333,
"learning_rate": 6.390954773869347e-06,
"loss": 0.0017,
"step": 36425
},
{
"epoch": 18.36,
"grad_norm": 0.2556110620498657,
"learning_rate": 6.388442211055277e-06,
"loss": 0.0017,
"step": 36450
},
{
"epoch": 18.38,
"grad_norm": 0.7103545665740967,
"learning_rate": 6.385929648241207e-06,
"loss": 0.0016,
"step": 36475
},
{
"epoch": 18.39,
"grad_norm": 1.2815943956375122,
"learning_rate": 6.383417085427136e-06,
"loss": 0.0019,
"step": 36500
},
{
"epoch": 18.4,
"grad_norm": 0.8965465426445007,
"learning_rate": 6.380904522613066e-06,
"loss": 0.0019,
"step": 36525
},
{
"epoch": 18.41,
"grad_norm": 1.0410230159759521,
"learning_rate": 6.378391959798995e-06,
"loss": 0.0015,
"step": 36550
},
{
"epoch": 18.43,
"grad_norm": 1.2315019369125366,
"learning_rate": 6.375879396984925e-06,
"loss": 0.0015,
"step": 36575
},
{
"epoch": 18.44,
"grad_norm": 0.7894676327705383,
"learning_rate": 6.373366834170855e-06,
"loss": 0.0012,
"step": 36600
},
{
"epoch": 18.45,
"grad_norm": 1.8946139812469482,
"learning_rate": 6.370854271356785e-06,
"loss": 0.0016,
"step": 36625
},
{
"epoch": 18.46,
"grad_norm": 0.4600735306739807,
"learning_rate": 6.368341708542715e-06,
"loss": 0.002,
"step": 36650
},
{
"epoch": 18.48,
"grad_norm": 1.5674549341201782,
"learning_rate": 6.365829145728643e-06,
"loss": 0.0018,
"step": 36675
},
{
"epoch": 18.49,
"grad_norm": 0.8131008744239807,
"learning_rate": 6.363316582914574e-06,
"loss": 0.0018,
"step": 36700
},
{
"epoch": 18.5,
"grad_norm": 0.6329362392425537,
"learning_rate": 6.360804020100503e-06,
"loss": 0.0019,
"step": 36725
},
{
"epoch": 18.51,
"grad_norm": 0.8405249714851379,
"learning_rate": 6.358291457286433e-06,
"loss": 0.0017,
"step": 36750
},
{
"epoch": 18.53,
"grad_norm": 1.6821061372756958,
"learning_rate": 6.3557788944723616e-06,
"loss": 0.0014,
"step": 36775
},
{
"epoch": 18.54,
"grad_norm": 0.9368191361427307,
"learning_rate": 6.353266331658292e-06,
"loss": 0.0014,
"step": 36800
},
{
"epoch": 18.55,
"grad_norm": 1.3366914987564087,
"learning_rate": 6.350753768844221e-06,
"loss": 0.0014,
"step": 36825
},
{
"epoch": 18.56,
"grad_norm": 1.3093204498291016,
"learning_rate": 6.348241206030151e-06,
"loss": 0.0018,
"step": 36850
},
{
"epoch": 18.58,
"grad_norm": 1.0798649787902832,
"learning_rate": 6.345728643216081e-06,
"loss": 0.0016,
"step": 36875
},
{
"epoch": 18.59,
"grad_norm": 0.4596012234687805,
"learning_rate": 6.343216080402011e-06,
"loss": 0.0016,
"step": 36900
},
{
"epoch": 18.6,
"grad_norm": 1.961551547050476,
"learning_rate": 6.340703517587941e-06,
"loss": 0.0017,
"step": 36925
},
{
"epoch": 18.61,
"grad_norm": 1.54167640209198,
"learning_rate": 6.338190954773869e-06,
"loss": 0.0018,
"step": 36950
},
{
"epoch": 18.63,
"grad_norm": 1.0647872686386108,
"learning_rate": 6.3356783919597995e-06,
"loss": 0.0015,
"step": 36975
},
{
"epoch": 18.64,
"grad_norm": 0.15062101185321808,
"learning_rate": 6.333165829145729e-06,
"loss": 0.0013,
"step": 37000
},
{
"epoch": 18.64,
"eval_loss": 0.33555132150650024,
"eval_runtime": 644.609,
"eval_samples_per_second": 2.186,
"eval_steps_per_second": 2.186,
"eval_wer": 23.445174680041507,
"step": 37000
},
{
"epoch": 18.65,
"grad_norm": 0.6003009080886841,
"learning_rate": 6.330653266331659e-06,
"loss": 0.0015,
"step": 37025
},
{
"epoch": 18.66,
"grad_norm": 0.7250798344612122,
"learning_rate": 6.3281407035175875e-06,
"loss": 0.0016,
"step": 37050
},
{
"epoch": 18.68,
"grad_norm": 0.7910952568054199,
"learning_rate": 6.325628140703518e-06,
"loss": 0.0016,
"step": 37075
},
{
"epoch": 18.69,
"grad_norm": 1.4833486080169678,
"learning_rate": 6.323115577889448e-06,
"loss": 0.002,
"step": 37100
},
{
"epoch": 18.7,
"grad_norm": 0.8942164182662964,
"learning_rate": 6.320603015075377e-06,
"loss": 0.0018,
"step": 37125
},
{
"epoch": 18.72,
"grad_norm": 0.8438106179237366,
"learning_rate": 6.318090452261307e-06,
"loss": 0.002,
"step": 37150
},
{
"epoch": 18.73,
"grad_norm": 1.0023553371429443,
"learning_rate": 6.315577889447237e-06,
"loss": 0.0013,
"step": 37175
},
{
"epoch": 18.74,
"grad_norm": 0.8116686940193176,
"learning_rate": 6.313065326633167e-06,
"loss": 0.0015,
"step": 37200
},
{
"epoch": 18.75,
"grad_norm": 1.0903185606002808,
"learning_rate": 6.310552763819095e-06,
"loss": 0.0016,
"step": 37225
},
{
"epoch": 18.77,
"grad_norm": 1.1223067045211792,
"learning_rate": 6.3080402010050255e-06,
"loss": 0.0018,
"step": 37250
},
{
"epoch": 18.78,
"grad_norm": 1.5012390613555908,
"learning_rate": 6.305527638190956e-06,
"loss": 0.0018,
"step": 37275
},
{
"epoch": 18.79,
"grad_norm": 1.3460817337036133,
"learning_rate": 6.303015075376885e-06,
"loss": 0.0019,
"step": 37300
},
{
"epoch": 18.8,
"grad_norm": 1.7468082904815674,
"learning_rate": 6.300502512562815e-06,
"loss": 0.0019,
"step": 37325
},
{
"epoch": 18.82,
"grad_norm": 0.5250969529151917,
"learning_rate": 6.297989949748744e-06,
"loss": 0.0017,
"step": 37350
},
{
"epoch": 18.83,
"grad_norm": 0.2302069365978241,
"learning_rate": 6.295477386934674e-06,
"loss": 0.0013,
"step": 37375
},
{
"epoch": 18.84,
"grad_norm": 1.5310719013214111,
"learning_rate": 6.292964824120603e-06,
"loss": 0.0015,
"step": 37400
},
{
"epoch": 18.85,
"grad_norm": 1.8044565916061401,
"learning_rate": 6.290452261306533e-06,
"loss": 0.0014,
"step": 37425
},
{
"epoch": 18.87,
"grad_norm": 0.8181155920028687,
"learning_rate": 6.287939698492463e-06,
"loss": 0.0014,
"step": 37450
},
{
"epoch": 18.88,
"grad_norm": 0.8504043817520142,
"learning_rate": 6.285427135678393e-06,
"loss": 0.0018,
"step": 37475
},
{
"epoch": 18.89,
"grad_norm": 0.27127495408058167,
"learning_rate": 6.282914572864323e-06,
"loss": 0.0016,
"step": 37500
},
{
"epoch": 18.9,
"grad_norm": 1.2492486238479614,
"learning_rate": 6.280402010050251e-06,
"loss": 0.0015,
"step": 37525
},
{
"epoch": 18.92,
"grad_norm": 1.6787339448928833,
"learning_rate": 6.277889447236182e-06,
"loss": 0.0017,
"step": 37550
},
{
"epoch": 18.93,
"grad_norm": 2.014810800552368,
"learning_rate": 6.275376884422111e-06,
"loss": 0.002,
"step": 37575
},
{
"epoch": 18.94,
"grad_norm": 0.9987440705299377,
"learning_rate": 6.272864321608041e-06,
"loss": 0.002,
"step": 37600
},
{
"epoch": 18.95,
"grad_norm": 0.6803994178771973,
"learning_rate": 6.2703517587939696e-06,
"loss": 0.0021,
"step": 37625
},
{
"epoch": 18.97,
"grad_norm": 0.8169840574264526,
"learning_rate": 6.2678391959799e-06,
"loss": 0.0022,
"step": 37650
},
{
"epoch": 18.98,
"grad_norm": 1.3978486061096191,
"learning_rate": 6.265326633165829e-06,
"loss": 0.0018,
"step": 37675
},
{
"epoch": 18.99,
"grad_norm": 1.5592775344848633,
"learning_rate": 6.262814070351759e-06,
"loss": 0.0019,
"step": 37700
},
{
"epoch": 19.01,
"grad_norm": 1.0616681575775146,
"learning_rate": 6.260301507537689e-06,
"loss": 0.0017,
"step": 37725
},
{
"epoch": 19.02,
"grad_norm": 0.9332436919212341,
"learning_rate": 6.257788944723619e-06,
"loss": 0.0014,
"step": 37750
},
{
"epoch": 19.03,
"grad_norm": 1.051811933517456,
"learning_rate": 6.255276381909549e-06,
"loss": 0.0013,
"step": 37775
},
{
"epoch": 19.04,
"grad_norm": 1.1693936586380005,
"learning_rate": 6.252763819095477e-06,
"loss": 0.0014,
"step": 37800
},
{
"epoch": 19.06,
"grad_norm": 1.9111791849136353,
"learning_rate": 6.2502512562814075e-06,
"loss": 0.0014,
"step": 37825
},
{
"epoch": 19.07,
"grad_norm": 0.4390527904033661,
"learning_rate": 6.247738693467337e-06,
"loss": 0.0012,
"step": 37850
},
{
"epoch": 19.08,
"grad_norm": 2.4373393058776855,
"learning_rate": 6.245226130653267e-06,
"loss": 0.0013,
"step": 37875
},
{
"epoch": 19.09,
"grad_norm": 0.6409306526184082,
"learning_rate": 6.242713567839197e-06,
"loss": 0.0014,
"step": 37900
},
{
"epoch": 19.11,
"grad_norm": 0.4275980293750763,
"learning_rate": 6.240201005025126e-06,
"loss": 0.0011,
"step": 37925
},
{
"epoch": 19.12,
"grad_norm": 0.36015447974205017,
"learning_rate": 6.237688442211056e-06,
"loss": 0.0011,
"step": 37950
},
{
"epoch": 19.13,
"grad_norm": 0.4539172351360321,
"learning_rate": 6.235175879396985e-06,
"loss": 0.0012,
"step": 37975
},
{
"epoch": 19.14,
"grad_norm": 0.5768988132476807,
"learning_rate": 6.232663316582915e-06,
"loss": 0.001,
"step": 38000
},
{
"epoch": 19.14,
"eval_loss": 0.3325794041156769,
"eval_runtime": 648.1138,
"eval_samples_per_second": 2.174,
"eval_steps_per_second": 2.174,
"eval_wer": 23.12694569353165,
"step": 38000
},
{
"epoch": 19.16,
"grad_norm": 1.9239482879638672,
"learning_rate": 6.230150753768845e-06,
"loss": 0.0013,
"step": 38025
},
{
"epoch": 19.17,
"grad_norm": 0.30082157254219055,
"learning_rate": 6.227638190954775e-06,
"loss": 0.001,
"step": 38050
},
{
"epoch": 19.18,
"grad_norm": 0.20353496074676514,
"learning_rate": 6.225125628140703e-06,
"loss": 0.0011,
"step": 38075
},
{
"epoch": 19.19,
"grad_norm": 0.6615707278251648,
"learning_rate": 6.2226130653266335e-06,
"loss": 0.0012,
"step": 38100
},
{
"epoch": 19.21,
"grad_norm": 1.4539945125579834,
"learning_rate": 6.220100502512564e-06,
"loss": 0.0012,
"step": 38125
},
{
"epoch": 19.22,
"grad_norm": 0.6011916399002075,
"learning_rate": 6.217587939698493e-06,
"loss": 0.0012,
"step": 38150
},
{
"epoch": 19.23,
"grad_norm": 0.29558372497558594,
"learning_rate": 6.215075376884423e-06,
"loss": 0.0014,
"step": 38175
},
{
"epoch": 19.24,
"grad_norm": 1.2541766166687012,
"learning_rate": 6.212562814070352e-06,
"loss": 0.0011,
"step": 38200
},
{
"epoch": 19.26,
"grad_norm": 2.3364927768707275,
"learning_rate": 6.210050251256282e-06,
"loss": 0.0017,
"step": 38225
},
{
"epoch": 19.27,
"grad_norm": 0.5079712867736816,
"learning_rate": 6.207537688442211e-06,
"loss": 0.0013,
"step": 38250
},
{
"epoch": 19.28,
"grad_norm": 0.9235591292381287,
"learning_rate": 6.205025125628141e-06,
"loss": 0.0014,
"step": 38275
},
{
"epoch": 19.29,
"grad_norm": 0.31673333048820496,
"learning_rate": 6.202512562814071e-06,
"loss": 0.0013,
"step": 38300
},
{
"epoch": 19.31,
"grad_norm": 1.0981833934783936,
"learning_rate": 6.200000000000001e-06,
"loss": 0.0014,
"step": 38325
},
{
"epoch": 19.32,
"grad_norm": 0.6616347432136536,
"learning_rate": 6.197487437185931e-06,
"loss": 0.0014,
"step": 38350
},
{
"epoch": 19.33,
"grad_norm": 0.5207319259643555,
"learning_rate": 6.1949748743718594e-06,
"loss": 0.0013,
"step": 38375
},
{
"epoch": 19.35,
"grad_norm": 1.1700994968414307,
"learning_rate": 6.19246231155779e-06,
"loss": 0.0015,
"step": 38400
},
{
"epoch": 19.36,
"grad_norm": 1.4399977922439575,
"learning_rate": 6.189949748743719e-06,
"loss": 0.0014,
"step": 38425
},
{
"epoch": 19.37,
"grad_norm": 0.9737831354141235,
"learning_rate": 6.187437185929649e-06,
"loss": 0.0013,
"step": 38450
},
{
"epoch": 19.38,
"grad_norm": 0.8050452470779419,
"learning_rate": 6.1849246231155776e-06,
"loss": 0.0011,
"step": 38475
},
{
"epoch": 19.4,
"grad_norm": 0.5391014218330383,
"learning_rate": 6.182412060301508e-06,
"loss": 0.0012,
"step": 38500
},
{
"epoch": 19.41,
"grad_norm": 1.5151572227478027,
"learning_rate": 6.179899497487438e-06,
"loss": 0.0013,
"step": 38525
},
{
"epoch": 19.42,
"grad_norm": 0.6566374897956848,
"learning_rate": 6.177386934673367e-06,
"loss": 0.0012,
"step": 38550
},
{
"epoch": 19.43,
"grad_norm": 0.5387280583381653,
"learning_rate": 6.174874371859297e-06,
"loss": 0.0014,
"step": 38575
},
{
"epoch": 19.45,
"grad_norm": 2.2199933528900146,
"learning_rate": 6.172361809045227e-06,
"loss": 0.0015,
"step": 38600
},
{
"epoch": 19.46,
"grad_norm": 0.5629024505615234,
"learning_rate": 6.169849246231157e-06,
"loss": 0.0014,
"step": 38625
},
{
"epoch": 19.47,
"grad_norm": 1.4785996675491333,
"learning_rate": 6.167336683417085e-06,
"loss": 0.0019,
"step": 38650
},
{
"epoch": 19.48,
"grad_norm": 1.0027951002120972,
"learning_rate": 6.1648241206030155e-06,
"loss": 0.0016,
"step": 38675
},
{
"epoch": 19.5,
"grad_norm": 0.8754851222038269,
"learning_rate": 6.162311557788945e-06,
"loss": 0.0013,
"step": 38700
},
{
"epoch": 19.51,
"grad_norm": 1.2813969850540161,
"learning_rate": 6.159798994974875e-06,
"loss": 0.0012,
"step": 38725
},
{
"epoch": 19.52,
"grad_norm": 0.9958238005638123,
"learning_rate": 6.157286432160805e-06,
"loss": 0.0012,
"step": 38750
},
{
"epoch": 19.53,
"grad_norm": 1.3986196517944336,
"learning_rate": 6.154773869346734e-06,
"loss": 0.0013,
"step": 38775
},
{
"epoch": 19.55,
"grad_norm": 0.44907984137535095,
"learning_rate": 6.152261306532664e-06,
"loss": 0.0013,
"step": 38800
},
{
"epoch": 19.56,
"grad_norm": 1.5967319011688232,
"learning_rate": 6.149748743718593e-06,
"loss": 0.0017,
"step": 38825
},
{
"epoch": 19.57,
"grad_norm": 1.011804223060608,
"learning_rate": 6.147236180904523e-06,
"loss": 0.0013,
"step": 38850
},
{
"epoch": 19.58,
"grad_norm": 0.6981809139251709,
"learning_rate": 6.144723618090453e-06,
"loss": 0.0016,
"step": 38875
},
{
"epoch": 19.6,
"grad_norm": 1.2294851541519165,
"learning_rate": 6.142211055276383e-06,
"loss": 0.0017,
"step": 38900
},
{
"epoch": 19.61,
"grad_norm": 0.9731518030166626,
"learning_rate": 6.139698492462311e-06,
"loss": 0.0016,
"step": 38925
},
{
"epoch": 19.62,
"grad_norm": 1.7019362449645996,
"learning_rate": 6.1371859296482415e-06,
"loss": 0.0016,
"step": 38950
},
{
"epoch": 19.63,
"grad_norm": 0.35968101024627686,
"learning_rate": 6.134673366834172e-06,
"loss": 0.0016,
"step": 38975
},
{
"epoch": 19.65,
"grad_norm": 1.0781711339950562,
"learning_rate": 6.132160804020101e-06,
"loss": 0.0016,
"step": 39000
},
{
"epoch": 19.65,
"eval_loss": 0.34026119112968445,
"eval_runtime": 642.6075,
"eval_samples_per_second": 2.193,
"eval_steps_per_second": 2.193,
"eval_wer": 23.777239709443098,
"step": 39000
},
{
"epoch": 19.66,
"grad_norm": 0.6743261218070984,
"learning_rate": 6.129648241206031e-06,
"loss": 0.0017,
"step": 39025
},
{
"epoch": 19.67,
"grad_norm": 1.3148083686828613,
"learning_rate": 6.12713567839196e-06,
"loss": 0.0014,
"step": 39050
},
{
"epoch": 19.69,
"grad_norm": 1.3721927404403687,
"learning_rate": 6.12462311557789e-06,
"loss": 0.0014,
"step": 39075
},
{
"epoch": 19.7,
"grad_norm": 0.8227803707122803,
"learning_rate": 6.122211055276382e-06,
"loss": 0.0017,
"step": 39100
},
{
"epoch": 19.71,
"grad_norm": 1.0405676364898682,
"learning_rate": 6.1196984924623115e-06,
"loss": 0.0016,
"step": 39125
},
{
"epoch": 19.72,
"grad_norm": 0.7169470191001892,
"learning_rate": 6.117185929648242e-06,
"loss": 0.0017,
"step": 39150
},
{
"epoch": 19.74,
"grad_norm": 0.5327123999595642,
"learning_rate": 6.114673366834172e-06,
"loss": 0.0014,
"step": 39175
},
{
"epoch": 19.75,
"grad_norm": 1.0876247882843018,
"learning_rate": 6.112160804020101e-06,
"loss": 0.0015,
"step": 39200
},
{
"epoch": 19.76,
"grad_norm": 0.7583673596382141,
"learning_rate": 6.109648241206031e-06,
"loss": 0.0011,
"step": 39225
},
{
"epoch": 19.77,
"grad_norm": 0.7003112435340881,
"learning_rate": 6.10713567839196e-06,
"loss": 0.0013,
"step": 39250
},
{
"epoch": 19.79,
"grad_norm": 0.8923580646514893,
"learning_rate": 6.10462311557789e-06,
"loss": 0.0018,
"step": 39275
},
{
"epoch": 19.8,
"grad_norm": 1.0716352462768555,
"learning_rate": 6.102110552763819e-06,
"loss": 0.0012,
"step": 39300
},
{
"epoch": 19.81,
"grad_norm": 1.9225443601608276,
"learning_rate": 6.0995979899497495e-06,
"loss": 0.0016,
"step": 39325
},
{
"epoch": 19.82,
"grad_norm": 0.9054650664329529,
"learning_rate": 6.09708542713568e-06,
"loss": 0.0016,
"step": 39350
},
{
"epoch": 19.84,
"grad_norm": 0.6190009117126465,
"learning_rate": 6.094572864321608e-06,
"loss": 0.0012,
"step": 39375
},
{
"epoch": 19.85,
"grad_norm": 1.1662238836288452,
"learning_rate": 6.092060301507538e-06,
"loss": 0.0013,
"step": 39400
},
{
"epoch": 19.86,
"grad_norm": 1.0806312561035156,
"learning_rate": 6.089547738693468e-06,
"loss": 0.0019,
"step": 39425
},
{
"epoch": 19.87,
"grad_norm": 0.7853173017501831,
"learning_rate": 6.087035175879398e-06,
"loss": 0.0015,
"step": 39450
},
{
"epoch": 19.89,
"grad_norm": 0.9636842012405396,
"learning_rate": 6.084522613065327e-06,
"loss": 0.0014,
"step": 39475
},
{
"epoch": 19.9,
"grad_norm": 1.7559266090393066,
"learning_rate": 6.082010050251257e-06,
"loss": 0.0014,
"step": 39500
},
{
"epoch": 19.91,
"grad_norm": 0.9466189742088318,
"learning_rate": 6.079497487437186e-06,
"loss": 0.0014,
"step": 39525
},
{
"epoch": 19.92,
"grad_norm": 1.7009310722351074,
"learning_rate": 6.076984924623116e-06,
"loss": 0.0015,
"step": 39550
},
{
"epoch": 19.94,
"grad_norm": 1.0113627910614014,
"learning_rate": 6.074472361809046e-06,
"loss": 0.0013,
"step": 39575
},
{
"epoch": 19.95,
"grad_norm": 0.990430474281311,
"learning_rate": 6.071959798994975e-06,
"loss": 0.0016,
"step": 39600
},
{
"epoch": 19.96,
"grad_norm": 1.0662728548049927,
"learning_rate": 6.069447236180906e-06,
"loss": 0.0017,
"step": 39625
},
{
"epoch": 19.97,
"grad_norm": 1.152665138244629,
"learning_rate": 6.066934673366834e-06,
"loss": 0.0019,
"step": 39650
},
{
"epoch": 19.99,
"grad_norm": 2.0811469554901123,
"learning_rate": 6.064422110552764e-06,
"loss": 0.0018,
"step": 39675
},
{
"epoch": 20.0,
"grad_norm": 1.3459393978118896,
"learning_rate": 6.0619095477386936e-06,
"loss": 0.0021,
"step": 39700
},
{
"epoch": 20.01,
"grad_norm": 1.1473982334136963,
"learning_rate": 6.059396984924624e-06,
"loss": 0.0013,
"step": 39725
},
{
"epoch": 20.03,
"grad_norm": 1.0483380556106567,
"learning_rate": 6.056884422110553e-06,
"loss": 0.0012,
"step": 39750
},
{
"epoch": 20.04,
"grad_norm": 0.5056473016738892,
"learning_rate": 6.054371859296483e-06,
"loss": 0.0015,
"step": 39775
},
{
"epoch": 20.05,
"grad_norm": 0.9125507473945618,
"learning_rate": 6.051859296482413e-06,
"loss": 0.0013,
"step": 39800
},
{
"epoch": 20.06,
"grad_norm": 0.17925478518009186,
"learning_rate": 6.049346733668342e-06,
"loss": 0.0013,
"step": 39825
},
{
"epoch": 20.08,
"grad_norm": 0.492924302816391,
"learning_rate": 6.046834170854272e-06,
"loss": 0.0013,
"step": 39850
},
{
"epoch": 20.09,
"grad_norm": 1.4264193773269653,
"learning_rate": 6.044321608040201e-06,
"loss": 0.0014,
"step": 39875
},
{
"epoch": 20.1,
"grad_norm": 0.6481070518493652,
"learning_rate": 6.0418090452261315e-06,
"loss": 0.0014,
"step": 39900
},
{
"epoch": 20.11,
"grad_norm": 0.9014895558357239,
"learning_rate": 6.03929648241206e-06,
"loss": 0.0014,
"step": 39925
},
{
"epoch": 20.13,
"grad_norm": 1.6231021881103516,
"learning_rate": 6.03678391959799e-06,
"loss": 0.0014,
"step": 39950
},
{
"epoch": 20.14,
"grad_norm": 0.2752940058708191,
"learning_rate": 6.03427135678392e-06,
"loss": 0.0009,
"step": 39975
},
{
"epoch": 20.15,
"grad_norm": 0.902050256729126,
"learning_rate": 6.03175879396985e-06,
"loss": 0.0009,
"step": 40000
},
{
"epoch": 20.15,
"eval_loss": 0.3369000256061554,
"eval_runtime": 648.606,
"eval_samples_per_second": 2.172,
"eval_steps_per_second": 2.172,
"eval_wer": 23.24455205811138,
"step": 40000
},
{
"epoch": 20.16,
"grad_norm": 0.18782569468021393,
"learning_rate": 6.02924623115578e-06,
"loss": 0.0007,
"step": 40025
},
{
"epoch": 20.18,
"grad_norm": 0.6005980372428894,
"learning_rate": 6.026733668341709e-06,
"loss": 0.0008,
"step": 40050
},
{
"epoch": 20.19,
"grad_norm": 0.7301942110061646,
"learning_rate": 6.024221105527639e-06,
"loss": 0.0011,
"step": 40075
},
{
"epoch": 20.2,
"grad_norm": 0.4620230793952942,
"learning_rate": 6.021708542713568e-06,
"loss": 0.0012,
"step": 40100
},
{
"epoch": 20.21,
"grad_norm": 0.23639623820781708,
"learning_rate": 6.019195979899498e-06,
"loss": 0.001,
"step": 40125
},
{
"epoch": 20.23,
"grad_norm": 1.1007659435272217,
"learning_rate": 6.016683417085427e-06,
"loss": 0.0011,
"step": 40150
},
{
"epoch": 20.24,
"grad_norm": 0.7579511404037476,
"learning_rate": 6.0141708542713575e-06,
"loss": 0.0013,
"step": 40175
},
{
"epoch": 20.25,
"grad_norm": 0.17022021114826202,
"learning_rate": 6.011658291457288e-06,
"loss": 0.001,
"step": 40200
},
{
"epoch": 20.26,
"grad_norm": 0.919007420539856,
"learning_rate": 6.009145728643216e-06,
"loss": 0.0013,
"step": 40225
},
{
"epoch": 20.28,
"grad_norm": 0.8233655691146851,
"learning_rate": 6.006633165829146e-06,
"loss": 0.0011,
"step": 40250
},
{
"epoch": 20.29,
"grad_norm": 0.6930840611457825,
"learning_rate": 6.004120603015076e-06,
"loss": 0.0012,
"step": 40275
},
{
"epoch": 20.3,
"grad_norm": 0.4709855616092682,
"learning_rate": 6.001608040201006e-06,
"loss": 0.0018,
"step": 40300
},
{
"epoch": 20.31,
"grad_norm": 0.2110186368227005,
"learning_rate": 5.999095477386935e-06,
"loss": 0.0012,
"step": 40325
},
{
"epoch": 20.33,
"grad_norm": 0.48267343640327454,
"learning_rate": 5.996582914572865e-06,
"loss": 0.0012,
"step": 40350
},
{
"epoch": 20.34,
"grad_norm": 0.6853476762771606,
"learning_rate": 5.994070351758794e-06,
"loss": 0.0012,
"step": 40375
},
{
"epoch": 20.35,
"grad_norm": 0.9809117317199707,
"learning_rate": 5.991557788944724e-06,
"loss": 0.0012,
"step": 40400
},
{
"epoch": 20.37,
"grad_norm": 1.3809919357299805,
"learning_rate": 5.989045226130654e-06,
"loss": 0.0015,
"step": 40425
},
{
"epoch": 20.38,
"grad_norm": 1.5639605522155762,
"learning_rate": 5.9865326633165834e-06,
"loss": 0.002,
"step": 40450
},
{
"epoch": 20.39,
"grad_norm": 1.0514106750488281,
"learning_rate": 5.984020100502514e-06,
"loss": 0.0015,
"step": 40475
},
{
"epoch": 20.4,
"grad_norm": 0.9717534780502319,
"learning_rate": 5.981507537688442e-06,
"loss": 0.0017,
"step": 40500
},
{
"epoch": 20.42,
"grad_norm": 1.0924715995788574,
"learning_rate": 5.978994974874372e-06,
"loss": 0.0012,
"step": 40525
},
{
"epoch": 20.43,
"grad_norm": 1.3465650081634521,
"learning_rate": 5.9764824120603016e-06,
"loss": 0.0011,
"step": 40550
},
{
"epoch": 20.44,
"grad_norm": 0.6348648071289062,
"learning_rate": 5.973969849246232e-06,
"loss": 0.0016,
"step": 40575
},
{
"epoch": 20.45,
"grad_norm": 1.0228688716888428,
"learning_rate": 5.971457286432162e-06,
"loss": 0.0011,
"step": 40600
},
{
"epoch": 20.47,
"grad_norm": 1.5299664735794067,
"learning_rate": 5.968944723618091e-06,
"loss": 0.0012,
"step": 40625
},
{
"epoch": 20.48,
"grad_norm": 1.601320743560791,
"learning_rate": 5.966432160804021e-06,
"loss": 0.0014,
"step": 40650
},
{
"epoch": 20.49,
"grad_norm": 0.6638547778129578,
"learning_rate": 5.96391959798995e-06,
"loss": 0.0014,
"step": 40675
},
{
"epoch": 20.5,
"grad_norm": 2.6972315311431885,
"learning_rate": 5.96140703517588e-06,
"loss": 0.0016,
"step": 40700
},
{
"epoch": 20.52,
"grad_norm": 0.6832017302513123,
"learning_rate": 5.958894472361809e-06,
"loss": 0.0016,
"step": 40725
},
{
"epoch": 20.53,
"grad_norm": 0.46338987350463867,
"learning_rate": 5.9563819095477395e-06,
"loss": 0.0012,
"step": 40750
},
{
"epoch": 20.54,
"grad_norm": 0.3584813177585602,
"learning_rate": 5.953869346733668e-06,
"loss": 0.0015,
"step": 40775
},
{
"epoch": 20.55,
"grad_norm": 1.5687421560287476,
"learning_rate": 5.951356783919598e-06,
"loss": 0.0017,
"step": 40800
},
{
"epoch": 20.57,
"grad_norm": 0.5602162480354309,
"learning_rate": 5.948844221105528e-06,
"loss": 0.0012,
"step": 40825
},
{
"epoch": 20.58,
"grad_norm": 0.16096442937850952,
"learning_rate": 5.946331658291458e-06,
"loss": 0.0012,
"step": 40850
},
{
"epoch": 20.59,
"grad_norm": 0.5620841979980469,
"learning_rate": 5.943819095477388e-06,
"loss": 0.0014,
"step": 40875
},
{
"epoch": 20.6,
"grad_norm": 0.5683684349060059,
"learning_rate": 5.941306532663317e-06,
"loss": 0.0017,
"step": 40900
},
{
"epoch": 20.62,
"grad_norm": 1.4936867952346802,
"learning_rate": 5.938793969849247e-06,
"loss": 0.0015,
"step": 40925
},
{
"epoch": 20.63,
"grad_norm": 0.45212438702583313,
"learning_rate": 5.936281407035176e-06,
"loss": 0.0013,
"step": 40950
},
{
"epoch": 20.64,
"grad_norm": 1.8357038497924805,
"learning_rate": 5.933768844221106e-06,
"loss": 0.0011,
"step": 40975
},
{
"epoch": 20.65,
"grad_norm": 0.1379358172416687,
"learning_rate": 5.931256281407035e-06,
"loss": 0.0015,
"step": 41000
},
{
"epoch": 20.65,
"eval_loss": 0.3424818515777588,
"eval_runtime": 650.8582,
"eval_samples_per_second": 2.165,
"eval_steps_per_second": 2.165,
"eval_wer": 23.36215842269111,
"step": 41000
},
{
"epoch": 20.67,
"grad_norm": 0.342557817697525,
"learning_rate": 5.9287437185929655e-06,
"loss": 0.001,
"step": 41025
},
{
"epoch": 20.68,
"grad_norm": 0.7056984901428223,
"learning_rate": 5.926231155778896e-06,
"loss": 0.0013,
"step": 41050
},
{
"epoch": 20.69,
"grad_norm": 1.0098013877868652,
"learning_rate": 5.923718592964824e-06,
"loss": 0.0015,
"step": 41075
},
{
"epoch": 20.71,
"grad_norm": 0.6967382431030273,
"learning_rate": 5.921206030150754e-06,
"loss": 0.0014,
"step": 41100
},
{
"epoch": 20.72,
"grad_norm": 0.544989287853241,
"learning_rate": 5.918693467336684e-06,
"loss": 0.0016,
"step": 41125
},
{
"epoch": 20.73,
"grad_norm": 1.2400965690612793,
"learning_rate": 5.916180904522614e-06,
"loss": 0.0015,
"step": 41150
},
{
"epoch": 20.74,
"grad_norm": 0.926023006439209,
"learning_rate": 5.913668341708543e-06,
"loss": 0.0016,
"step": 41175
},
{
"epoch": 20.76,
"grad_norm": 1.1986762285232544,
"learning_rate": 5.911155778894473e-06,
"loss": 0.0016,
"step": 41200
},
{
"epoch": 20.77,
"grad_norm": 0.6431388854980469,
"learning_rate": 5.908643216080403e-06,
"loss": 0.0014,
"step": 41225
},
{
"epoch": 20.78,
"grad_norm": 1.1368434429168701,
"learning_rate": 5.906130653266332e-06,
"loss": 0.0013,
"step": 41250
},
{
"epoch": 20.79,
"grad_norm": 0.7638266086578369,
"learning_rate": 5.903618090452262e-06,
"loss": 0.0014,
"step": 41275
},
{
"epoch": 20.81,
"grad_norm": 1.2455073595046997,
"learning_rate": 5.9011055276381914e-06,
"loss": 0.0013,
"step": 41300
},
{
"epoch": 20.82,
"grad_norm": 0.9213681817054749,
"learning_rate": 5.898592964824122e-06,
"loss": 0.0015,
"step": 41325
},
{
"epoch": 20.83,
"grad_norm": 0.5151415467262268,
"learning_rate": 5.89608040201005e-06,
"loss": 0.0016,
"step": 41350
},
{
"epoch": 20.84,
"grad_norm": 0.7288360595703125,
"learning_rate": 5.89356783919598e-06,
"loss": 0.0017,
"step": 41375
},
{
"epoch": 20.86,
"grad_norm": 0.4819887578487396,
"learning_rate": 5.8910552763819096e-06,
"loss": 0.0013,
"step": 41400
},
{
"epoch": 20.87,
"grad_norm": 1.1284375190734863,
"learning_rate": 5.88854271356784e-06,
"loss": 0.0015,
"step": 41425
},
{
"epoch": 20.88,
"grad_norm": 0.31427863240242004,
"learning_rate": 5.88603015075377e-06,
"loss": 0.0014,
"step": 41450
},
{
"epoch": 20.89,
"grad_norm": 0.9035623669624329,
"learning_rate": 5.883517587939699e-06,
"loss": 0.0013,
"step": 41475
},
{
"epoch": 20.91,
"grad_norm": 1.357260823249817,
"learning_rate": 5.881005025125629e-06,
"loss": 0.0011,
"step": 41500
},
{
"epoch": 20.92,
"grad_norm": 0.8989688158035278,
"learning_rate": 5.878492462311558e-06,
"loss": 0.0016,
"step": 41525
},
{
"epoch": 20.93,
"grad_norm": 1.1103880405426025,
"learning_rate": 5.875979899497488e-06,
"loss": 0.0016,
"step": 41550
},
{
"epoch": 20.94,
"grad_norm": 0.8313987851142883,
"learning_rate": 5.873467336683417e-06,
"loss": 0.0012,
"step": 41575
},
{
"epoch": 20.96,
"grad_norm": 1.2921781539916992,
"learning_rate": 5.8709547738693475e-06,
"loss": 0.0013,
"step": 41600
},
{
"epoch": 20.97,
"grad_norm": 0.9626322984695435,
"learning_rate": 5.868442211055276e-06,
"loss": 0.0014,
"step": 41625
},
{
"epoch": 20.98,
"grad_norm": 1.2628862857818604,
"learning_rate": 5.865929648241206e-06,
"loss": 0.0012,
"step": 41650
},
{
"epoch": 20.99,
"grad_norm": 0.6756789088249207,
"learning_rate": 5.863417085427136e-06,
"loss": 0.0011,
"step": 41675
},
{
"epoch": 21.01,
"grad_norm": 0.42541012167930603,
"learning_rate": 5.860904522613066e-06,
"loss": 0.0012,
"step": 41700
},
{
"epoch": 21.02,
"grad_norm": 1.8883837461471558,
"learning_rate": 5.858391959798996e-06,
"loss": 0.0012,
"step": 41725
},
{
"epoch": 21.03,
"grad_norm": 0.9031746983528137,
"learning_rate": 5.855879396984925e-06,
"loss": 0.001,
"step": 41750
},
{
"epoch": 21.05,
"grad_norm": 0.30492404103279114,
"learning_rate": 5.853366834170855e-06,
"loss": 0.001,
"step": 41775
},
{
"epoch": 21.06,
"grad_norm": 0.8610237836837769,
"learning_rate": 5.850854271356784e-06,
"loss": 0.0008,
"step": 41800
},
{
"epoch": 21.07,
"grad_norm": 0.36193329095840454,
"learning_rate": 5.848341708542714e-06,
"loss": 0.001,
"step": 41825
},
{
"epoch": 21.08,
"grad_norm": 0.7772315740585327,
"learning_rate": 5.845829145728644e-06,
"loss": 0.0012,
"step": 41850
},
{
"epoch": 21.1,
"grad_norm": 0.4699445068836212,
"learning_rate": 5.8433165829145735e-06,
"loss": 0.0008,
"step": 41875
},
{
"epoch": 21.11,
"grad_norm": 1.3260185718536377,
"learning_rate": 5.840804020100504e-06,
"loss": 0.0009,
"step": 41900
},
{
"epoch": 21.12,
"grad_norm": 0.33898288011550903,
"learning_rate": 5.838291457286432e-06,
"loss": 0.0008,
"step": 41925
},
{
"epoch": 21.13,
"grad_norm": 1.4685719013214111,
"learning_rate": 5.835778894472362e-06,
"loss": 0.0011,
"step": 41950
},
{
"epoch": 21.15,
"grad_norm": 2.672056198120117,
"learning_rate": 5.833266331658292e-06,
"loss": 0.0011,
"step": 41975
},
{
"epoch": 21.16,
"grad_norm": 1.9097732305526733,
"learning_rate": 5.830753768844222e-06,
"loss": 0.0012,
"step": 42000
},
{
"epoch": 21.16,
"eval_loss": 0.33881473541259766,
"eval_runtime": 651.7527,
"eval_samples_per_second": 2.162,
"eval_steps_per_second": 2.162,
"eval_wer": 22.815634728467657,
"step": 42000
},
{
"epoch": 21.17,
"grad_norm": 1.823331356048584,
"learning_rate": 5.828241206030151e-06,
"loss": 0.0012,
"step": 42025
},
{
"epoch": 21.18,
"grad_norm": 0.6585187911987305,
"learning_rate": 5.825728643216081e-06,
"loss": 0.0007,
"step": 42050
},
{
"epoch": 21.2,
"grad_norm": 0.8023566603660583,
"learning_rate": 5.823216080402011e-06,
"loss": 0.0014,
"step": 42075
},
{
"epoch": 21.21,
"grad_norm": 0.2983376979827881,
"learning_rate": 5.82070351758794e-06,
"loss": 0.0011,
"step": 42100
},
{
"epoch": 21.22,
"grad_norm": 0.860016942024231,
"learning_rate": 5.81819095477387e-06,
"loss": 0.0014,
"step": 42125
},
{
"epoch": 21.23,
"grad_norm": 0.5695117115974426,
"learning_rate": 5.8156783919597994e-06,
"loss": 0.0011,
"step": 42150
},
{
"epoch": 21.25,
"grad_norm": 1.583242416381836,
"learning_rate": 5.81316582914573e-06,
"loss": 0.0009,
"step": 42175
},
{
"epoch": 21.26,
"grad_norm": 0.7320623397827148,
"learning_rate": 5.810653266331658e-06,
"loss": 0.0011,
"step": 42200
},
{
"epoch": 21.27,
"grad_norm": 0.40352964401245117,
"learning_rate": 5.808140703517588e-06,
"loss": 0.0011,
"step": 42225
},
{
"epoch": 21.28,
"grad_norm": 1.1269155740737915,
"learning_rate": 5.8056281407035176e-06,
"loss": 0.0012,
"step": 42250
},
{
"epoch": 21.3,
"grad_norm": 0.5032210350036621,
"learning_rate": 5.803216080402011e-06,
"loss": 0.0011,
"step": 42275
},
{
"epoch": 21.31,
"grad_norm": 0.9120995998382568,
"learning_rate": 5.80070351758794e-06,
"loss": 0.0009,
"step": 42300
},
{
"epoch": 21.32,
"grad_norm": 1.0807931423187256,
"learning_rate": 5.79819095477387e-06,
"loss": 0.001,
"step": 42325
},
{
"epoch": 21.34,
"grad_norm": 0.5654991269111633,
"learning_rate": 5.7956783919598e-06,
"loss": 0.0012,
"step": 42350
},
{
"epoch": 21.35,
"grad_norm": 0.4672817885875702,
"learning_rate": 5.793165829145729e-06,
"loss": 0.0013,
"step": 42375
},
{
"epoch": 21.36,
"grad_norm": 0.4968101978302002,
"learning_rate": 5.790653266331658e-06,
"loss": 0.0011,
"step": 42400
},
{
"epoch": 21.37,
"grad_norm": 1.5632412433624268,
"learning_rate": 5.7881407035175884e-06,
"loss": 0.0009,
"step": 42425
},
{
"epoch": 21.39,
"grad_norm": 1.279451847076416,
"learning_rate": 5.785628140703518e-06,
"loss": 0.0012,
"step": 42450
},
{
"epoch": 21.4,
"grad_norm": 0.2353668063879013,
"learning_rate": 5.783115577889448e-06,
"loss": 0.0012,
"step": 42475
},
{
"epoch": 21.41,
"grad_norm": 1.097109079360962,
"learning_rate": 5.780603015075378e-06,
"loss": 0.0013,
"step": 42500
},
{
"epoch": 21.42,
"grad_norm": 0.5137972235679626,
"learning_rate": 5.778090452261307e-06,
"loss": 0.0014,
"step": 42525
},
{
"epoch": 21.44,
"grad_norm": 0.7982520461082458,
"learning_rate": 5.775577889447237e-06,
"loss": 0.001,
"step": 42550
},
{
"epoch": 21.45,
"grad_norm": 4.075470924377441,
"learning_rate": 5.773065326633166e-06,
"loss": 0.0013,
"step": 42575
},
{
"epoch": 21.46,
"grad_norm": 0.4397733211517334,
"learning_rate": 5.770552763819096e-06,
"loss": 0.0012,
"step": 42600
},
{
"epoch": 21.47,
"grad_norm": 0.650394082069397,
"learning_rate": 5.7680402010050256e-06,
"loss": 0.0011,
"step": 42625
},
{
"epoch": 21.49,
"grad_norm": 1.209378957748413,
"learning_rate": 5.765527638190955e-06,
"loss": 0.0011,
"step": 42650
},
{
"epoch": 21.5,
"grad_norm": 0.47554540634155273,
"learning_rate": 5.763015075376885e-06,
"loss": 0.0012,
"step": 42675
},
{
"epoch": 21.51,
"grad_norm": 1.1790157556533813,
"learning_rate": 5.760502512562814e-06,
"loss": 0.0012,
"step": 42700
},
{
"epoch": 21.52,
"grad_norm": 1.1118874549865723,
"learning_rate": 5.7579899497487446e-06,
"loss": 0.0012,
"step": 42725
},
{
"epoch": 21.54,
"grad_norm": 0.956529974937439,
"learning_rate": 5.755477386934674e-06,
"loss": 0.0012,
"step": 42750
},
{
"epoch": 21.55,
"grad_norm": 0.8777883052825928,
"learning_rate": 5.752964824120604e-06,
"loss": 0.0014,
"step": 42775
},
{
"epoch": 21.56,
"grad_norm": 1.8465042114257812,
"learning_rate": 5.7504522613065325e-06,
"loss": 0.0013,
"step": 42800
},
{
"epoch": 21.57,
"grad_norm": 1.209672212600708,
"learning_rate": 5.747939698492463e-06,
"loss": 0.0014,
"step": 42825
},
{
"epoch": 21.59,
"grad_norm": 1.0998687744140625,
"learning_rate": 5.745427135678392e-06,
"loss": 0.0015,
"step": 42850
},
{
"epoch": 21.6,
"grad_norm": 1.5435659885406494,
"learning_rate": 5.742914572864322e-06,
"loss": 0.0016,
"step": 42875
},
{
"epoch": 21.61,
"grad_norm": 1.4470527172088623,
"learning_rate": 5.740402010050252e-06,
"loss": 0.0013,
"step": 42900
},
{
"epoch": 21.62,
"grad_norm": 0.8701749444007874,
"learning_rate": 5.737889447236181e-06,
"loss": 0.0013,
"step": 42925
},
{
"epoch": 21.64,
"grad_norm": 1.1655815839767456,
"learning_rate": 5.735376884422111e-06,
"loss": 0.001,
"step": 42950
},
{
"epoch": 21.65,
"grad_norm": 0.8317478895187378,
"learning_rate": 5.73286432160804e-06,
"loss": 0.0012,
"step": 42975
},
{
"epoch": 21.66,
"grad_norm": 0.2232298105955124,
"learning_rate": 5.7303517587939705e-06,
"loss": 0.0009,
"step": 43000
},
{
"epoch": 21.66,
"eval_loss": 0.34524887800216675,
"eval_runtime": 653.6286,
"eval_samples_per_second": 2.156,
"eval_steps_per_second": 2.156,
"eval_wer": 23.13386371497752,
"step": 43000
},
{
"epoch": 21.68,
"grad_norm": 1.433415412902832,
"learning_rate": 5.7278391959799e-06,
"loss": 0.0012,
"step": 43025
},
{
"epoch": 21.69,
"grad_norm": 1.2699315547943115,
"learning_rate": 5.72532663316583e-06,
"loss": 0.0013,
"step": 43050
},
{
"epoch": 21.7,
"grad_norm": 1.10042405128479,
"learning_rate": 5.7228140703517585e-06,
"loss": 0.0015,
"step": 43075
},
{
"epoch": 21.71,
"grad_norm": 1.3270542621612549,
"learning_rate": 5.720301507537689e-06,
"loss": 0.0012,
"step": 43100
},
{
"epoch": 21.73,
"grad_norm": 1.0051465034484863,
"learning_rate": 5.717788944723619e-06,
"loss": 0.0013,
"step": 43125
},
{
"epoch": 21.74,
"grad_norm": 2.064424514770508,
"learning_rate": 5.715276381909548e-06,
"loss": 0.0013,
"step": 43150
},
{
"epoch": 21.75,
"grad_norm": 1.45639967918396,
"learning_rate": 5.712763819095478e-06,
"loss": 0.0012,
"step": 43175
},
{
"epoch": 21.76,
"grad_norm": 0.627719521522522,
"learning_rate": 5.710251256281407e-06,
"loss": 0.0014,
"step": 43200
},
{
"epoch": 21.78,
"grad_norm": 0.1227678582072258,
"learning_rate": 5.707738693467337e-06,
"loss": 0.001,
"step": 43225
},
{
"epoch": 21.79,
"grad_norm": 0.5857130885124207,
"learning_rate": 5.705226130653266e-06,
"loss": 0.0011,
"step": 43250
},
{
"epoch": 21.8,
"grad_norm": 0.859379231929779,
"learning_rate": 5.7027135678391964e-06,
"loss": 0.0009,
"step": 43275
},
{
"epoch": 21.81,
"grad_norm": 0.30906084179878235,
"learning_rate": 5.700201005025127e-06,
"loss": 0.0012,
"step": 43300
},
{
"epoch": 21.83,
"grad_norm": 0.7923578023910522,
"learning_rate": 5.697688442211056e-06,
"loss": 0.0013,
"step": 43325
},
{
"epoch": 21.84,
"grad_norm": 0.5775353908538818,
"learning_rate": 5.695175879396986e-06,
"loss": 0.0012,
"step": 43350
},
{
"epoch": 21.85,
"grad_norm": 3.3586642742156982,
"learning_rate": 5.692663316582915e-06,
"loss": 0.0012,
"step": 43375
},
{
"epoch": 21.86,
"grad_norm": 1.088348388671875,
"learning_rate": 5.690150753768845e-06,
"loss": 0.0014,
"step": 43400
},
{
"epoch": 21.88,
"grad_norm": 1.9004027843475342,
"learning_rate": 5.687638190954774e-06,
"loss": 0.0015,
"step": 43425
},
{
"epoch": 21.89,
"grad_norm": 1.9646518230438232,
"learning_rate": 5.685125628140704e-06,
"loss": 0.0017,
"step": 43450
},
{
"epoch": 21.9,
"grad_norm": 2.539280891418457,
"learning_rate": 5.6826130653266336e-06,
"loss": 0.0013,
"step": 43475
},
{
"epoch": 21.91,
"grad_norm": 0.3101285398006439,
"learning_rate": 5.680100502512563e-06,
"loss": 0.0012,
"step": 43500
},
{
"epoch": 21.93,
"grad_norm": 2.127978563308716,
"learning_rate": 5.677587939698493e-06,
"loss": 0.0011,
"step": 43525
},
{
"epoch": 21.94,
"grad_norm": 0.44460466504096985,
"learning_rate": 5.675075376884422e-06,
"loss": 0.0016,
"step": 43550
},
{
"epoch": 21.95,
"grad_norm": 1.158146858215332,
"learning_rate": 5.6725628140703526e-06,
"loss": 0.0015,
"step": 43575
},
{
"epoch": 21.96,
"grad_norm": 1.6001086235046387,
"learning_rate": 5.670050251256282e-06,
"loss": 0.0015,
"step": 43600
},
{
"epoch": 21.98,
"grad_norm": 0.6981241703033447,
"learning_rate": 5.667537688442212e-06,
"loss": 0.0013,
"step": 43625
},
{
"epoch": 21.99,
"grad_norm": 0.45709845423698425,
"learning_rate": 5.6650251256281405e-06,
"loss": 0.0015,
"step": 43650
},
{
"epoch": 22.0,
"grad_norm": 1.2674909830093384,
"learning_rate": 5.662512562814071e-06,
"loss": 0.0015,
"step": 43675
},
{
"epoch": 22.02,
"grad_norm": 0.591598629951477,
"learning_rate": 5.66e-06,
"loss": 0.0008,
"step": 43700
},
{
"epoch": 22.03,
"grad_norm": 0.76810222864151,
"learning_rate": 5.65748743718593e-06,
"loss": 0.0009,
"step": 43725
},
{
"epoch": 22.04,
"grad_norm": 1.0374010801315308,
"learning_rate": 5.65497487437186e-06,
"loss": 0.0008,
"step": 43750
},
{
"epoch": 22.05,
"grad_norm": 1.3293951749801636,
"learning_rate": 5.652462311557789e-06,
"loss": 0.001,
"step": 43775
},
{
"epoch": 22.07,
"grad_norm": 1.8345214128494263,
"learning_rate": 5.649949748743719e-06,
"loss": 0.0007,
"step": 43800
},
{
"epoch": 22.08,
"grad_norm": 0.9170948266983032,
"learning_rate": 5.647437185929648e-06,
"loss": 0.0009,
"step": 43825
},
{
"epoch": 22.09,
"grad_norm": 0.6133562326431274,
"learning_rate": 5.6449246231155785e-06,
"loss": 0.0009,
"step": 43850
},
{
"epoch": 22.1,
"grad_norm": 0.9368806481361389,
"learning_rate": 5.642412060301508e-06,
"loss": 0.0011,
"step": 43875
},
{
"epoch": 22.12,
"grad_norm": 0.38559451699256897,
"learning_rate": 5.639899497487438e-06,
"loss": 0.0012,
"step": 43900
},
{
"epoch": 22.13,
"grad_norm": 0.1805734932422638,
"learning_rate": 5.637386934673368e-06,
"loss": 0.0008,
"step": 43925
},
{
"epoch": 22.14,
"grad_norm": 3.672781467437744,
"learning_rate": 5.634874371859297e-06,
"loss": 0.0007,
"step": 43950
},
{
"epoch": 22.15,
"grad_norm": 0.09901037812232971,
"learning_rate": 5.632361809045227e-06,
"loss": 0.0007,
"step": 43975
},
{
"epoch": 22.17,
"grad_norm": 0.24132972955703735,
"learning_rate": 5.629849246231156e-06,
"loss": 0.0007,
"step": 44000
},
{
"epoch": 22.17,
"eval_loss": 0.3424950838088989,
"eval_runtime": 653.4662,
"eval_samples_per_second": 2.156,
"eval_steps_per_second": 2.156,
"eval_wer": 22.725700449671393,
"step": 44000
},
{
"epoch": 22.18,
"grad_norm": 1.1592354774475098,
"learning_rate": 5.627336683417086e-06,
"loss": 0.001,
"step": 44025
},
{
"epoch": 22.19,
"grad_norm": 0.19992341101169586,
"learning_rate": 5.624824120603015e-06,
"loss": 0.0009,
"step": 44050
},
{
"epoch": 22.2,
"grad_norm": 0.8353447914123535,
"learning_rate": 5.622311557788945e-06,
"loss": 0.0009,
"step": 44075
},
{
"epoch": 22.22,
"grad_norm": 0.8957573771476746,
"learning_rate": 5.619798994974874e-06,
"loss": 0.0007,
"step": 44100
},
{
"epoch": 22.23,
"grad_norm": 0.22726494073867798,
"learning_rate": 5.6172864321608044e-06,
"loss": 0.0008,
"step": 44125
},
{
"epoch": 22.24,
"grad_norm": 0.44598016142845154,
"learning_rate": 5.614773869346735e-06,
"loss": 0.0011,
"step": 44150
},
{
"epoch": 22.25,
"grad_norm": 1.2066985368728638,
"learning_rate": 5.612261306532664e-06,
"loss": 0.0007,
"step": 44175
},
{
"epoch": 22.27,
"grad_norm": 0.41478270292282104,
"learning_rate": 5.609748743718594e-06,
"loss": 0.0008,
"step": 44200
},
{
"epoch": 22.28,
"grad_norm": 0.7415564656257629,
"learning_rate": 5.607236180904523e-06,
"loss": 0.0007,
"step": 44225
},
{
"epoch": 22.29,
"grad_norm": 0.26239436864852905,
"learning_rate": 5.604723618090453e-06,
"loss": 0.0007,
"step": 44250
},
{
"epoch": 22.3,
"grad_norm": 0.6609335541725159,
"learning_rate": 5.602211055276382e-06,
"loss": 0.0007,
"step": 44275
},
{
"epoch": 22.32,
"grad_norm": 0.35660025477409363,
"learning_rate": 5.599698492462312e-06,
"loss": 0.0008,
"step": 44300
},
{
"epoch": 22.33,
"grad_norm": 0.5038982629776001,
"learning_rate": 5.597185929648241e-06,
"loss": 0.0008,
"step": 44325
},
{
"epoch": 22.34,
"grad_norm": 1.157596468925476,
"learning_rate": 5.594673366834171e-06,
"loss": 0.0009,
"step": 44350
},
{
"epoch": 22.36,
"grad_norm": 1.0030407905578613,
"learning_rate": 5.592160804020101e-06,
"loss": 0.001,
"step": 44375
},
{
"epoch": 22.37,
"grad_norm": 0.47111421823501587,
"learning_rate": 5.58964824120603e-06,
"loss": 0.0008,
"step": 44400
},
{
"epoch": 22.38,
"grad_norm": 0.3429202139377594,
"learning_rate": 5.5871356783919606e-06,
"loss": 0.0014,
"step": 44425
},
{
"epoch": 22.39,
"grad_norm": 1.4424147605895996,
"learning_rate": 5.58462311557789e-06,
"loss": 0.0011,
"step": 44450
},
{
"epoch": 22.41,
"grad_norm": 0.48875829577445984,
"learning_rate": 5.58211055276382e-06,
"loss": 0.0008,
"step": 44475
},
{
"epoch": 22.42,
"grad_norm": 0.23549383878707886,
"learning_rate": 5.5795979899497485e-06,
"loss": 0.0007,
"step": 44500
},
{
"epoch": 22.43,
"grad_norm": 1.24130117893219,
"learning_rate": 5.577085427135679e-06,
"loss": 0.0014,
"step": 44525
},
{
"epoch": 22.44,
"grad_norm": 0.129581019282341,
"learning_rate": 5.574572864321609e-06,
"loss": 0.001,
"step": 44550
},
{
"epoch": 22.46,
"grad_norm": 1.358135461807251,
"learning_rate": 5.572060301507538e-06,
"loss": 0.0012,
"step": 44575
},
{
"epoch": 22.47,
"grad_norm": 0.5511311292648315,
"learning_rate": 5.569547738693468e-06,
"loss": 0.0011,
"step": 44600
},
{
"epoch": 22.48,
"grad_norm": 0.2945619225502014,
"learning_rate": 5.567035175879397e-06,
"loss": 0.0009,
"step": 44625
},
{
"epoch": 22.49,
"grad_norm": 1.2442690134048462,
"learning_rate": 5.564522613065327e-06,
"loss": 0.0011,
"step": 44650
},
{
"epoch": 22.51,
"grad_norm": 1.0728257894515991,
"learning_rate": 5.562010050251256e-06,
"loss": 0.0009,
"step": 44675
},
{
"epoch": 22.52,
"grad_norm": 0.51957768201828,
"learning_rate": 5.5594974874371865e-06,
"loss": 0.0012,
"step": 44700
},
{
"epoch": 22.53,
"grad_norm": 1.0889358520507812,
"learning_rate": 5.556984924623116e-06,
"loss": 0.0008,
"step": 44725
},
{
"epoch": 22.54,
"grad_norm": 1.2384743690490723,
"learning_rate": 5.554472361809046e-06,
"loss": 0.0009,
"step": 44750
},
{
"epoch": 22.56,
"grad_norm": 1.6399548053741455,
"learning_rate": 5.551959798994976e-06,
"loss": 0.0012,
"step": 44775
},
{
"epoch": 22.57,
"grad_norm": 1.122065544128418,
"learning_rate": 5.549447236180905e-06,
"loss": 0.0013,
"step": 44800
},
{
"epoch": 22.58,
"grad_norm": 1.5687880516052246,
"learning_rate": 5.546934673366835e-06,
"loss": 0.0011,
"step": 44825
},
{
"epoch": 22.59,
"grad_norm": 0.26494044065475464,
"learning_rate": 5.544422110552764e-06,
"loss": 0.0013,
"step": 44850
},
{
"epoch": 22.61,
"grad_norm": 2.1011672019958496,
"learning_rate": 5.541909547738694e-06,
"loss": 0.0012,
"step": 44875
},
{
"epoch": 22.62,
"grad_norm": 0.8654801249504089,
"learning_rate": 5.539396984924623e-06,
"loss": 0.0011,
"step": 44900
},
{
"epoch": 22.63,
"grad_norm": 0.3072070777416229,
"learning_rate": 5.536884422110553e-06,
"loss": 0.0009,
"step": 44925
},
{
"epoch": 22.64,
"grad_norm": 0.3950670659542084,
"learning_rate": 5.534371859296482e-06,
"loss": 0.0009,
"step": 44950
},
{
"epoch": 22.66,
"grad_norm": 0.8394802212715149,
"learning_rate": 5.5318592964824124e-06,
"loss": 0.0007,
"step": 44975
},
{
"epoch": 22.67,
"grad_norm": 0.3768616020679474,
"learning_rate": 5.529346733668343e-06,
"loss": 0.001,
"step": 45000
},
{
"epoch": 22.67,
"eval_loss": 0.34873273968696594,
"eval_runtime": 648.3792,
"eval_samples_per_second": 2.173,
"eval_steps_per_second": 2.173,
"eval_wer": 22.79488066413006,
"step": 45000
},
{
"epoch": 22.68,
"grad_norm": 0.8950992226600647,
"learning_rate": 5.526834170854272e-06,
"loss": 0.0012,
"step": 45025
},
{
"epoch": 22.7,
"grad_norm": 0.6401100754737854,
"learning_rate": 5.524321608040202e-06,
"loss": 0.0013,
"step": 45050
},
{
"epoch": 22.71,
"grad_norm": 1.1628910303115845,
"learning_rate": 5.521809045226131e-06,
"loss": 0.0012,
"step": 45075
},
{
"epoch": 22.72,
"grad_norm": 1.316792607307434,
"learning_rate": 5.519296482412061e-06,
"loss": 0.001,
"step": 45100
},
{
"epoch": 22.73,
"grad_norm": 1.3205770254135132,
"learning_rate": 5.51678391959799e-06,
"loss": 0.0013,
"step": 45125
},
{
"epoch": 22.75,
"grad_norm": 0.2712342441082001,
"learning_rate": 5.51427135678392e-06,
"loss": 0.0011,
"step": 45150
},
{
"epoch": 22.76,
"grad_norm": 0.47601330280303955,
"learning_rate": 5.51175879396985e-06,
"loss": 0.0011,
"step": 45175
},
{
"epoch": 22.77,
"grad_norm": 0.9388231039047241,
"learning_rate": 5.509246231155779e-06,
"loss": 0.0012,
"step": 45200
},
{
"epoch": 22.78,
"grad_norm": 1.183489203453064,
"learning_rate": 5.506834170854271e-06,
"loss": 0.0013,
"step": 45225
},
{
"epoch": 22.8,
"grad_norm": 1.0215598344802856,
"learning_rate": 5.5043216080402015e-06,
"loss": 0.0013,
"step": 45250
},
{
"epoch": 22.81,
"grad_norm": 0.5754547119140625,
"learning_rate": 5.501809045226131e-06,
"loss": 0.0012,
"step": 45275
},
{
"epoch": 22.82,
"grad_norm": 1.5252500772476196,
"learning_rate": 5.499296482412061e-06,
"loss": 0.0014,
"step": 45300
},
{
"epoch": 22.83,
"grad_norm": 0.5785127282142639,
"learning_rate": 5.49678391959799e-06,
"loss": 0.0011,
"step": 45325
},
{
"epoch": 22.85,
"grad_norm": 1.1003527641296387,
"learning_rate": 5.4942713567839204e-06,
"loss": 0.001,
"step": 45350
},
{
"epoch": 22.86,
"grad_norm": 1.1432653665542603,
"learning_rate": 5.491758793969851e-06,
"loss": 0.0012,
"step": 45375
},
{
"epoch": 22.87,
"grad_norm": 0.7556006908416748,
"learning_rate": 5.489246231155779e-06,
"loss": 0.0011,
"step": 45400
},
{
"epoch": 22.88,
"grad_norm": 0.6173690557479858,
"learning_rate": 5.486733668341709e-06,
"loss": 0.001,
"step": 45425
},
{
"epoch": 22.9,
"grad_norm": 0.3593469262123108,
"learning_rate": 5.484221105527639e-06,
"loss": 0.0014,
"step": 45450
},
{
"epoch": 22.91,
"grad_norm": 1.9169950485229492,
"learning_rate": 5.481708542713569e-06,
"loss": 0.0011,
"step": 45475
},
{
"epoch": 22.92,
"grad_norm": 0.8808764815330505,
"learning_rate": 5.479195979899497e-06,
"loss": 0.0014,
"step": 45500
},
{
"epoch": 22.93,
"grad_norm": 0.18010124564170837,
"learning_rate": 5.476683417085427e-06,
"loss": 0.0012,
"step": 45525
},
{
"epoch": 22.95,
"grad_norm": 0.6205843091011047,
"learning_rate": 5.474170854271357e-06,
"loss": 0.001,
"step": 45550
},
{
"epoch": 22.96,
"grad_norm": 1.8465744256973267,
"learning_rate": 5.471658291457287e-06,
"loss": 0.001,
"step": 45575
},
{
"epoch": 22.97,
"grad_norm": 0.47944799065589905,
"learning_rate": 5.469145728643217e-06,
"loss": 0.0012,
"step": 45600
},
{
"epoch": 22.98,
"grad_norm": 1.0778087377548218,
"learning_rate": 5.466633165829146e-06,
"loss": 0.0012,
"step": 45625
},
{
"epoch": 23.0,
"grad_norm": 1.2320517301559448,
"learning_rate": 5.4641206030150766e-06,
"loss": 0.001,
"step": 45650
},
{
"epoch": 23.01,
"grad_norm": 0.2702305316925049,
"learning_rate": 5.461608040201005e-06,
"loss": 0.0009,
"step": 45675
},
{
"epoch": 23.02,
"grad_norm": 0.4143355190753937,
"learning_rate": 5.459095477386935e-06,
"loss": 0.0007,
"step": 45700
},
{
"epoch": 23.04,
"grad_norm": 1.7724355459213257,
"learning_rate": 5.4565829145728645e-06,
"loss": 0.0007,
"step": 45725
},
{
"epoch": 23.05,
"grad_norm": 0.8267619609832764,
"learning_rate": 5.454070351758795e-06,
"loss": 0.0007,
"step": 45750
},
{
"epoch": 23.06,
"grad_norm": 0.7586312890052795,
"learning_rate": 5.451557788944723e-06,
"loss": 0.0007,
"step": 45775
},
{
"epoch": 23.07,
"grad_norm": 0.6827680468559265,
"learning_rate": 5.449045226130653e-06,
"loss": 0.001,
"step": 45800
},
{
"epoch": 23.09,
"grad_norm": 1.9296995401382446,
"learning_rate": 5.4465326633165835e-06,
"loss": 0.0011,
"step": 45825
},
{
"epoch": 23.1,
"grad_norm": 0.6591385006904602,
"learning_rate": 5.444020100502513e-06,
"loss": 0.001,
"step": 45850
},
{
"epoch": 23.11,
"grad_norm": 0.3265586793422699,
"learning_rate": 5.441507537688443e-06,
"loss": 0.0009,
"step": 45875
},
{
"epoch": 23.12,
"grad_norm": 1.2838494777679443,
"learning_rate": 5.438994974874372e-06,
"loss": 0.0008,
"step": 45900
},
{
"epoch": 23.14,
"grad_norm": 2.554136037826538,
"learning_rate": 5.4364824120603025e-06,
"loss": 0.0012,
"step": 45925
},
{
"epoch": 23.15,
"grad_norm": 1.0224181413650513,
"learning_rate": 5.433969849246231e-06,
"loss": 0.001,
"step": 45950
},
{
"epoch": 23.16,
"grad_norm": 1.341222882270813,
"learning_rate": 5.431457286432161e-06,
"loss": 0.0009,
"step": 45975
},
{
"epoch": 23.17,
"grad_norm": 0.49937868118286133,
"learning_rate": 5.428944723618091e-06,
"loss": 0.0007,
"step": 46000
},
{
"epoch": 23.17,
"eval_loss": 0.346920371055603,
"eval_runtime": 644.2541,
"eval_samples_per_second": 2.187,
"eval_steps_per_second": 2.187,
"eval_wer": 22.656520235212728,
"step": 46000
},
{
"epoch": 23.19,
"grad_norm": 0.7111514806747437,
"learning_rate": 5.426432160804021e-06,
"loss": 0.0008,
"step": 46025
},
{
"epoch": 23.2,
"grad_norm": 0.8799687623977661,
"learning_rate": 5.423919597989951e-06,
"loss": 0.0008,
"step": 46050
},
{
"epoch": 23.21,
"grad_norm": 0.3264644742012024,
"learning_rate": 5.421407035175879e-06,
"loss": 0.0012,
"step": 46075
},
{
"epoch": 23.22,
"grad_norm": 1.2488361597061157,
"learning_rate": 5.4188944723618095e-06,
"loss": 0.0011,
"step": 46100
},
{
"epoch": 23.24,
"grad_norm": 0.21208049356937408,
"learning_rate": 5.416381909547739e-06,
"loss": 0.001,
"step": 46125
},
{
"epoch": 23.25,
"grad_norm": 0.593122661113739,
"learning_rate": 5.413869346733669e-06,
"loss": 0.0008,
"step": 46150
},
{
"epoch": 23.26,
"grad_norm": 0.8040767312049866,
"learning_rate": 5.411356783919598e-06,
"loss": 0.0008,
"step": 46175
},
{
"epoch": 23.27,
"grad_norm": 0.21935276687145233,
"learning_rate": 5.4088442211055284e-06,
"loss": 0.0009,
"step": 46200
},
{
"epoch": 23.29,
"grad_norm": 0.16591764986515045,
"learning_rate": 5.406331658291459e-06,
"loss": 0.0007,
"step": 46225
},
{
"epoch": 23.3,
"grad_norm": 0.7102475762367249,
"learning_rate": 5.403819095477387e-06,
"loss": 0.0011,
"step": 46250
},
{
"epoch": 23.31,
"grad_norm": 2.245885133743286,
"learning_rate": 5.401306532663317e-06,
"loss": 0.0011,
"step": 46275
},
{
"epoch": 23.32,
"grad_norm": 0.5632694363594055,
"learning_rate": 5.398793969849247e-06,
"loss": 0.0008,
"step": 46300
},
{
"epoch": 23.34,
"grad_norm": 0.4642152190208435,
"learning_rate": 5.396281407035177e-06,
"loss": 0.0008,
"step": 46325
},
{
"epoch": 23.35,
"grad_norm": 1.0977600812911987,
"learning_rate": 5.393768844221105e-06,
"loss": 0.0012,
"step": 46350
},
{
"epoch": 23.36,
"grad_norm": 1.1424881219863892,
"learning_rate": 5.391256281407035e-06,
"loss": 0.0011,
"step": 46375
},
{
"epoch": 23.38,
"grad_norm": 1.7661696672439575,
"learning_rate": 5.388743718592965e-06,
"loss": 0.0009,
"step": 46400
},
{
"epoch": 23.39,
"grad_norm": 0.5764384269714355,
"learning_rate": 5.386231155778895e-06,
"loss": 0.0011,
"step": 46425
},
{
"epoch": 23.4,
"grad_norm": 0.5465607047080994,
"learning_rate": 5.383718592964825e-06,
"loss": 0.0015,
"step": 46450
},
{
"epoch": 23.41,
"grad_norm": 0.4862133264541626,
"learning_rate": 5.381206030150754e-06,
"loss": 0.0009,
"step": 46475
},
{
"epoch": 23.43,
"grad_norm": 0.23647759854793549,
"learning_rate": 5.3786934673366846e-06,
"loss": 0.0007,
"step": 46500
},
{
"epoch": 23.44,
"grad_norm": 0.3536996841430664,
"learning_rate": 5.376180904522613e-06,
"loss": 0.0007,
"step": 46525
},
{
"epoch": 23.45,
"grad_norm": 1.0129157304763794,
"learning_rate": 5.373668341708543e-06,
"loss": 0.0008,
"step": 46550
},
{
"epoch": 23.46,
"grad_norm": 0.41435906291007996,
"learning_rate": 5.3711557788944725e-06,
"loss": 0.0011,
"step": 46575
},
{
"epoch": 23.48,
"grad_norm": 0.33409273624420166,
"learning_rate": 5.368643216080403e-06,
"loss": 0.001,
"step": 46600
},
{
"epoch": 23.49,
"grad_norm": 0.2821144759654999,
"learning_rate": 5.366130653266333e-06,
"loss": 0.001,
"step": 46625
},
{
"epoch": 23.5,
"grad_norm": 1.0986402034759521,
"learning_rate": 5.363618090452261e-06,
"loss": 0.001,
"step": 46650
},
{
"epoch": 23.51,
"grad_norm": 1.016719102859497,
"learning_rate": 5.3611055276381915e-06,
"loss": 0.001,
"step": 46675
},
{
"epoch": 23.53,
"grad_norm": 0.6379337310791016,
"learning_rate": 5.358592964824121e-06,
"loss": 0.001,
"step": 46700
},
{
"epoch": 23.54,
"grad_norm": 0.8883301019668579,
"learning_rate": 5.356080402010051e-06,
"loss": 0.0008,
"step": 46725
},
{
"epoch": 23.55,
"grad_norm": 0.7816546559333801,
"learning_rate": 5.35356783919598e-06,
"loss": 0.0009,
"step": 46750
},
{
"epoch": 23.56,
"grad_norm": 0.9444398283958435,
"learning_rate": 5.3510552763819105e-06,
"loss": 0.0012,
"step": 46775
},
{
"epoch": 23.58,
"grad_norm": 0.7157164812088013,
"learning_rate": 5.348542713567839e-06,
"loss": 0.001,
"step": 46800
},
{
"epoch": 23.59,
"grad_norm": 0.31857380270957947,
"learning_rate": 5.346030150753769e-06,
"loss": 0.0011,
"step": 46825
},
{
"epoch": 23.6,
"grad_norm": 1.1218419075012207,
"learning_rate": 5.343517587939699e-06,
"loss": 0.0014,
"step": 46850
},
{
"epoch": 23.61,
"grad_norm": 1.2285215854644775,
"learning_rate": 5.341005025125629e-06,
"loss": 0.001,
"step": 46875
},
{
"epoch": 23.63,
"grad_norm": 1.1362957954406738,
"learning_rate": 5.338492462311559e-06,
"loss": 0.001,
"step": 46900
},
{
"epoch": 23.64,
"grad_norm": 1.0595365762710571,
"learning_rate": 5.335979899497487e-06,
"loss": 0.001,
"step": 46925
},
{
"epoch": 23.65,
"grad_norm": 0.9272093176841736,
"learning_rate": 5.3334673366834175e-06,
"loss": 0.0012,
"step": 46950
},
{
"epoch": 23.66,
"grad_norm": 1.4149200916290283,
"learning_rate": 5.330954773869347e-06,
"loss": 0.001,
"step": 46975
},
{
"epoch": 23.68,
"grad_norm": 0.7414202094078064,
"learning_rate": 5.328442211055277e-06,
"loss": 0.0015,
"step": 47000
},
{
"epoch": 23.68,
"eval_loss": 0.3519901931285858,
"eval_runtime": 650.2703,
"eval_samples_per_second": 2.167,
"eval_steps_per_second": 2.167,
"eval_wer": 22.905569007263924,
"step": 47000
},
{
"epoch": 23.69,
"grad_norm": 1.5265312194824219,
"learning_rate": 5.325929648241206e-06,
"loss": 0.0012,
"step": 47025
},
{
"epoch": 23.7,
"grad_norm": 0.3424956500530243,
"learning_rate": 5.3234170854271364e-06,
"loss": 0.001,
"step": 47050
},
{
"epoch": 23.72,
"grad_norm": 1.250054121017456,
"learning_rate": 5.320904522613067e-06,
"loss": 0.0012,
"step": 47075
},
{
"epoch": 23.73,
"grad_norm": 0.7167928218841553,
"learning_rate": 5.318391959798995e-06,
"loss": 0.0011,
"step": 47100
},
{
"epoch": 23.74,
"grad_norm": 1.2113206386566162,
"learning_rate": 5.315879396984925e-06,
"loss": 0.001,
"step": 47125
},
{
"epoch": 23.75,
"grad_norm": 0.683556079864502,
"learning_rate": 5.313366834170855e-06,
"loss": 0.0011,
"step": 47150
},
{
"epoch": 23.77,
"grad_norm": 0.2526809871196747,
"learning_rate": 5.310854271356785e-06,
"loss": 0.001,
"step": 47175
},
{
"epoch": 23.78,
"grad_norm": 1.4190630912780762,
"learning_rate": 5.308341708542713e-06,
"loss": 0.0012,
"step": 47200
},
{
"epoch": 23.79,
"grad_norm": 1.7319457530975342,
"learning_rate": 5.305829145728643e-06,
"loss": 0.0014,
"step": 47225
},
{
"epoch": 23.8,
"grad_norm": 0.9196786284446716,
"learning_rate": 5.3033165829145736e-06,
"loss": 0.0009,
"step": 47250
},
{
"epoch": 23.82,
"grad_norm": 0.6173463463783264,
"learning_rate": 5.300804020100503e-06,
"loss": 0.001,
"step": 47275
},
{
"epoch": 23.83,
"grad_norm": 0.6350324153900146,
"learning_rate": 5.298291457286433e-06,
"loss": 0.0012,
"step": 47300
},
{
"epoch": 23.84,
"grad_norm": 0.24510569870471954,
"learning_rate": 5.295778894472362e-06,
"loss": 0.0007,
"step": 47325
},
{
"epoch": 23.85,
"grad_norm": 0.6556370258331299,
"learning_rate": 5.2932663316582926e-06,
"loss": 0.0012,
"step": 47350
},
{
"epoch": 23.87,
"grad_norm": 0.13942670822143555,
"learning_rate": 5.290753768844221e-06,
"loss": 0.001,
"step": 47375
},
{
"epoch": 23.88,
"grad_norm": 0.5470798015594482,
"learning_rate": 5.288241206030151e-06,
"loss": 0.001,
"step": 47400
},
{
"epoch": 23.89,
"grad_norm": 0.18008272349834442,
"learning_rate": 5.2857286432160805e-06,
"loss": 0.0009,
"step": 47425
},
{
"epoch": 23.9,
"grad_norm": 0.6318380832672119,
"learning_rate": 5.283216080402011e-06,
"loss": 0.0011,
"step": 47450
},
{
"epoch": 23.92,
"grad_norm": 0.3588716387748718,
"learning_rate": 5.280703517587941e-06,
"loss": 0.0013,
"step": 47475
},
{
"epoch": 23.93,
"grad_norm": 0.29687148332595825,
"learning_rate": 5.278190954773869e-06,
"loss": 0.001,
"step": 47500
},
{
"epoch": 23.94,
"grad_norm": 0.864915132522583,
"learning_rate": 5.2756783919597995e-06,
"loss": 0.0012,
"step": 47525
},
{
"epoch": 23.95,
"grad_norm": 1.526944637298584,
"learning_rate": 5.273165829145729e-06,
"loss": 0.0011,
"step": 47550
},
{
"epoch": 23.97,
"grad_norm": 1.1919291019439697,
"learning_rate": 5.270653266331659e-06,
"loss": 0.0012,
"step": 47575
},
{
"epoch": 23.98,
"grad_norm": 0.46352332830429077,
"learning_rate": 5.268140703517588e-06,
"loss": 0.001,
"step": 47600
},
{
"epoch": 23.99,
"grad_norm": 1.840276837348938,
"learning_rate": 5.2656281407035185e-06,
"loss": 0.0011,
"step": 47625
},
{
"epoch": 24.01,
"grad_norm": 0.4287075996398926,
"learning_rate": 5.263115577889447e-06,
"loss": 0.001,
"step": 47650
},
{
"epoch": 24.02,
"grad_norm": 0.6221798062324524,
"learning_rate": 5.260603015075377e-06,
"loss": 0.0008,
"step": 47675
},
{
"epoch": 24.03,
"grad_norm": 0.7368704676628113,
"learning_rate": 5.258090452261307e-06,
"loss": 0.0009,
"step": 47700
},
{
"epoch": 24.04,
"grad_norm": 0.7908537983894348,
"learning_rate": 5.255577889447237e-06,
"loss": 0.0007,
"step": 47725
},
{
"epoch": 24.06,
"grad_norm": 0.32871556282043457,
"learning_rate": 5.253065326633167e-06,
"loss": 0.0006,
"step": 47750
},
{
"epoch": 24.07,
"grad_norm": 0.5463153719902039,
"learning_rate": 5.250552763819095e-06,
"loss": 0.0005,
"step": 47775
},
{
"epoch": 24.08,
"grad_norm": 1.1261115074157715,
"learning_rate": 5.2480402010050255e-06,
"loss": 0.0008,
"step": 47800
},
{
"epoch": 24.09,
"grad_norm": 0.19659703969955444,
"learning_rate": 5.245527638190955e-06,
"loss": 0.0007,
"step": 47825
},
{
"epoch": 24.11,
"grad_norm": 1.187845230102539,
"learning_rate": 5.243015075376885e-06,
"loss": 0.0005,
"step": 47850
},
{
"epoch": 24.12,
"grad_norm": 0.7721708416938782,
"learning_rate": 5.240502512562814e-06,
"loss": 0.0006,
"step": 47875
},
{
"epoch": 24.13,
"grad_norm": 1.14128577709198,
"learning_rate": 5.2379899497487444e-06,
"loss": 0.0007,
"step": 47900
},
{
"epoch": 24.14,
"grad_norm": 0.2921935021877289,
"learning_rate": 5.235477386934675e-06,
"loss": 0.0006,
"step": 47925
},
{
"epoch": 24.16,
"grad_norm": 0.542747437953949,
"learning_rate": 5.232964824120603e-06,
"loss": 0.0007,
"step": 47950
},
{
"epoch": 24.17,
"grad_norm": 0.1501588523387909,
"learning_rate": 5.230452261306533e-06,
"loss": 0.0006,
"step": 47975
},
{
"epoch": 24.18,
"grad_norm": 1.1452776193618774,
"learning_rate": 5.227939698492463e-06,
"loss": 0.0006,
"step": 48000
},
{
"epoch": 24.18,
"eval_loss": 0.3501649498939514,
"eval_runtime": 653.8098,
"eval_samples_per_second": 2.155,
"eval_steps_per_second": 2.155,
"eval_wer": 22.365963334486334,
"step": 48000
},
{
"epoch": 24.19,
"grad_norm": 0.4397837817668915,
"learning_rate": 5.225427135678393e-06,
"loss": 0.0008,
"step": 48025
},
{
"epoch": 24.21,
"grad_norm": 0.2055756002664566,
"learning_rate": 5.222914572864321e-06,
"loss": 0.0007,
"step": 48050
},
{
"epoch": 24.22,
"grad_norm": 0.5878810882568359,
"learning_rate": 5.220402010050251e-06,
"loss": 0.0006,
"step": 48075
},
{
"epoch": 24.23,
"grad_norm": 0.17719118297100067,
"learning_rate": 5.217889447236182e-06,
"loss": 0.0006,
"step": 48100
},
{
"epoch": 24.24,
"grad_norm": 0.6639860272407532,
"learning_rate": 5.215376884422111e-06,
"loss": 0.0007,
"step": 48125
},
{
"epoch": 24.26,
"grad_norm": 0.47352033853530884,
"learning_rate": 5.212864321608041e-06,
"loss": 0.0007,
"step": 48150
},
{
"epoch": 24.27,
"grad_norm": 0.5616294145584106,
"learning_rate": 5.21035175879397e-06,
"loss": 0.0007,
"step": 48175
},
{
"epoch": 24.28,
"grad_norm": 0.23458968102931976,
"learning_rate": 5.2078391959799006e-06,
"loss": 0.0005,
"step": 48200
},
{
"epoch": 24.29,
"grad_norm": 1.2819747924804688,
"learning_rate": 5.205326633165829e-06,
"loss": 0.0009,
"step": 48225
},
{
"epoch": 24.31,
"grad_norm": 0.9145079255104065,
"learning_rate": 5.202814070351759e-06,
"loss": 0.001,
"step": 48250
},
{
"epoch": 24.32,
"grad_norm": 0.3194675147533417,
"learning_rate": 5.2003015075376885e-06,
"loss": 0.0007,
"step": 48275
},
{
"epoch": 24.33,
"grad_norm": 0.7687875032424927,
"learning_rate": 5.197788944723619e-06,
"loss": 0.0007,
"step": 48300
},
{
"epoch": 24.35,
"grad_norm": 0.46240946650505066,
"learning_rate": 5.195276381909549e-06,
"loss": 0.0007,
"step": 48325
},
{
"epoch": 24.36,
"grad_norm": 0.9186506271362305,
"learning_rate": 5.192763819095477e-06,
"loss": 0.0007,
"step": 48350
},
{
"epoch": 24.37,
"grad_norm": 0.2463284581899643,
"learning_rate": 5.1902512562814075e-06,
"loss": 0.0008,
"step": 48375
},
{
"epoch": 24.38,
"grad_norm": 0.2767968773841858,
"learning_rate": 5.187738693467337e-06,
"loss": 0.001,
"step": 48400
},
{
"epoch": 24.4,
"grad_norm": 0.46523067355155945,
"learning_rate": 5.185226130653267e-06,
"loss": 0.0009,
"step": 48425
},
{
"epoch": 24.41,
"grad_norm": 0.23646961152553558,
"learning_rate": 5.182713567839196e-06,
"loss": 0.0009,
"step": 48450
},
{
"epoch": 24.42,
"grad_norm": 0.6883164048194885,
"learning_rate": 5.1802010050251265e-06,
"loss": 0.0007,
"step": 48475
},
{
"epoch": 24.43,
"grad_norm": 0.33344462513923645,
"learning_rate": 5.177688442211055e-06,
"loss": 0.0009,
"step": 48500
},
{
"epoch": 24.45,
"grad_norm": 0.1479116529226303,
"learning_rate": 5.175175879396985e-06,
"loss": 0.0007,
"step": 48525
},
{
"epoch": 24.46,
"grad_norm": 1.2230092287063599,
"learning_rate": 5.172663316582915e-06,
"loss": 0.0008,
"step": 48550
},
{
"epoch": 24.47,
"grad_norm": 0.17954891920089722,
"learning_rate": 5.170150753768845e-06,
"loss": 0.0009,
"step": 48575
},
{
"epoch": 24.48,
"grad_norm": 0.7569608092308044,
"learning_rate": 5.167638190954775e-06,
"loss": 0.0007,
"step": 48600
},
{
"epoch": 24.5,
"grad_norm": 0.9554746747016907,
"learning_rate": 5.165125628140703e-06,
"loss": 0.0009,
"step": 48625
},
{
"epoch": 24.51,
"grad_norm": 0.6516941785812378,
"learning_rate": 5.1626130653266335e-06,
"loss": 0.0009,
"step": 48650
},
{
"epoch": 24.52,
"grad_norm": 0.6526229381561279,
"learning_rate": 5.160100502512563e-06,
"loss": 0.001,
"step": 48675
},
{
"epoch": 24.53,
"grad_norm": 0.17622053623199463,
"learning_rate": 5.157587939698493e-06,
"loss": 0.0007,
"step": 48700
},
{
"epoch": 24.55,
"grad_norm": 0.8113358616828918,
"learning_rate": 5.155075376884423e-06,
"loss": 0.0009,
"step": 48725
},
{
"epoch": 24.56,
"grad_norm": 0.7531803846359253,
"learning_rate": 5.1525628140703525e-06,
"loss": 0.0008,
"step": 48750
},
{
"epoch": 24.57,
"grad_norm": 0.82627934217453,
"learning_rate": 5.150050251256283e-06,
"loss": 0.0007,
"step": 48775
},
{
"epoch": 24.58,
"grad_norm": 1.1042143106460571,
"learning_rate": 5.147537688442211e-06,
"loss": 0.0007,
"step": 48800
},
{
"epoch": 24.6,
"grad_norm": 0.34287524223327637,
"learning_rate": 5.145025125628141e-06,
"loss": 0.0006,
"step": 48825
},
{
"epoch": 24.61,
"grad_norm": 1.404876470565796,
"learning_rate": 5.142512562814071e-06,
"loss": 0.001,
"step": 48850
},
{
"epoch": 24.62,
"grad_norm": 0.3671759068965912,
"learning_rate": 5.140000000000001e-06,
"loss": 0.0009,
"step": 48875
},
{
"epoch": 24.63,
"grad_norm": 0.7027815580368042,
"learning_rate": 5.137487437185929e-06,
"loss": 0.0009,
"step": 48900
},
{
"epoch": 24.65,
"grad_norm": 0.8009297847747803,
"learning_rate": 5.134974874371859e-06,
"loss": 0.001,
"step": 48925
},
{
"epoch": 24.66,
"grad_norm": 1.0700089931488037,
"learning_rate": 5.13246231155779e-06,
"loss": 0.001,
"step": 48950
},
{
"epoch": 24.67,
"grad_norm": 0.4704926609992981,
"learning_rate": 5.129949748743719e-06,
"loss": 0.0014,
"step": 48975
},
{
"epoch": 24.69,
"grad_norm": 0.2424398958683014,
"learning_rate": 5.127437185929649e-06,
"loss": 0.0012,
"step": 49000
},
{
"epoch": 24.69,
"eval_loss": 0.3495071828365326,
"eval_runtime": 650.7727,
"eval_samples_per_second": 2.165,
"eval_steps_per_second": 2.165,
"eval_wer": 22.87097890003459,
"step": 49000
},
{
"epoch": 24.7,
"grad_norm": 0.5802572965621948,
"learning_rate": 5.124924623115578e-06,
"loss": 0.001,
"step": 49025
},
{
"epoch": 24.71,
"grad_norm": 0.9147945046424866,
"learning_rate": 5.1224120603015086e-06,
"loss": 0.001,
"step": 49050
},
{
"epoch": 24.72,
"grad_norm": 0.9649335145950317,
"learning_rate": 5.119899497487437e-06,
"loss": 0.0014,
"step": 49075
},
{
"epoch": 24.74,
"grad_norm": 0.6083235144615173,
"learning_rate": 5.117386934673367e-06,
"loss": 0.0009,
"step": 49100
},
{
"epoch": 24.75,
"grad_norm": 0.29622915387153625,
"learning_rate": 5.1148743718592965e-06,
"loss": 0.0015,
"step": 49125
},
{
"epoch": 24.76,
"grad_norm": 0.1976325958967209,
"learning_rate": 5.112361809045227e-06,
"loss": 0.0012,
"step": 49150
},
{
"epoch": 24.77,
"grad_norm": 0.6826661825180054,
"learning_rate": 5.109849246231157e-06,
"loss": 0.0012,
"step": 49175
},
{
"epoch": 24.79,
"grad_norm": 1.7492895126342773,
"learning_rate": 5.107336683417085e-06,
"loss": 0.0011,
"step": 49200
},
{
"epoch": 24.8,
"grad_norm": 1.210092306137085,
"learning_rate": 5.1048241206030155e-06,
"loss": 0.0009,
"step": 49225
},
{
"epoch": 24.81,
"grad_norm": 0.6438060998916626,
"learning_rate": 5.102311557788945e-06,
"loss": 0.0007,
"step": 49250
},
{
"epoch": 24.82,
"grad_norm": 1.0905343294143677,
"learning_rate": 5.099798994974875e-06,
"loss": 0.0009,
"step": 49275
},
{
"epoch": 24.84,
"grad_norm": 0.9838513135910034,
"learning_rate": 5.097286432160804e-06,
"loss": 0.0008,
"step": 49300
},
{
"epoch": 24.85,
"grad_norm": 0.906428337097168,
"learning_rate": 5.0947738693467345e-06,
"loss": 0.0006,
"step": 49325
},
{
"epoch": 24.86,
"grad_norm": 0.5822015404701233,
"learning_rate": 5.092261306532665e-06,
"loss": 0.001,
"step": 49350
},
{
"epoch": 24.87,
"grad_norm": 0.6224560141563416,
"learning_rate": 5.089748743718593e-06,
"loss": 0.0008,
"step": 49375
},
{
"epoch": 24.89,
"grad_norm": 0.12286537140607834,
"learning_rate": 5.087236180904523e-06,
"loss": 0.0008,
"step": 49400
},
{
"epoch": 24.9,
"grad_norm": 0.958941638469696,
"learning_rate": 5.084723618090453e-06,
"loss": 0.0008,
"step": 49425
},
{
"epoch": 24.91,
"grad_norm": 0.3192073106765747,
"learning_rate": 5.082211055276383e-06,
"loss": 0.0009,
"step": 49450
},
{
"epoch": 24.92,
"grad_norm": NaN,
"learning_rate": 5.079798994974875e-06,
"loss": 0.0009,
"step": 49475
},
{
"epoch": 24.94,
"grad_norm": 0.13195385038852692,
"learning_rate": 5.077286432160804e-06,
"loss": 0.0008,
"step": 49500
},
{
"epoch": 24.95,
"grad_norm": 1.2665826082229614,
"learning_rate": 5.074773869346734e-06,
"loss": 0.0009,
"step": 49525
},
{
"epoch": 24.96,
"grad_norm": 0.7236266732215881,
"learning_rate": 5.072261306532664e-06,
"loss": 0.0009,
"step": 49550
},
{
"epoch": 24.97,
"grad_norm": 1.4165126085281372,
"learning_rate": 5.069748743718593e-06,
"loss": 0.0011,
"step": 49575
},
{
"epoch": 24.99,
"grad_norm": 0.6968191266059875,
"learning_rate": 5.0672361809045235e-06,
"loss": 0.0011,
"step": 49600
},
{
"epoch": 25.0,
"grad_norm": 0.3853646218776703,
"learning_rate": 5.064723618090453e-06,
"loss": 0.001,
"step": 49625
},
{
"epoch": 25.01,
"grad_norm": 1.0228655338287354,
"learning_rate": 5.062211055276382e-06,
"loss": 0.0006,
"step": 49650
},
{
"epoch": 25.03,
"grad_norm": 0.07551419734954834,
"learning_rate": 5.0596984924623115e-06,
"loss": 0.0009,
"step": 49675
},
{
"epoch": 25.04,
"grad_norm": 0.9682655334472656,
"learning_rate": 5.057185929648242e-06,
"loss": 0.0009,
"step": 49700
},
{
"epoch": 25.05,
"grad_norm": 0.4476355016231537,
"learning_rate": 5.054673366834171e-06,
"loss": 0.0008,
"step": 49725
},
{
"epoch": 25.06,
"grad_norm": 0.44491493701934814,
"learning_rate": 5.052160804020101e-06,
"loss": 0.0009,
"step": 49750
},
{
"epoch": 25.08,
"grad_norm": 1.1377488374710083,
"learning_rate": 5.049648241206031e-06,
"loss": 0.0007,
"step": 49775
},
{
"epoch": 25.09,
"grad_norm": 0.2252766638994217,
"learning_rate": 5.04713567839196e-06,
"loss": 0.0006,
"step": 49800
},
{
"epoch": 25.1,
"grad_norm": 0.3592166006565094,
"learning_rate": 5.04462311557789e-06,
"loss": 0.0008,
"step": 49825
},
{
"epoch": 25.11,
"grad_norm": 0.2291008085012436,
"learning_rate": 5.042110552763819e-06,
"loss": 0.0005,
"step": 49850
},
{
"epoch": 25.13,
"grad_norm": 0.36598822474479675,
"learning_rate": 5.0395979899497495e-06,
"loss": 0.0004,
"step": 49875
},
{
"epoch": 25.14,
"grad_norm": 0.13307011127471924,
"learning_rate": 5.037085427135679e-06,
"loss": 0.0006,
"step": 49900
},
{
"epoch": 25.15,
"grad_norm": 0.5685657858848572,
"learning_rate": 5.034572864321608e-06,
"loss": 0.0007,
"step": 49925
},
{
"epoch": 25.16,
"grad_norm": 0.836059033870697,
"learning_rate": 5.0320603015075374e-06,
"loss": 0.0008,
"step": 49950
},
{
"epoch": 25.18,
"grad_norm": 0.8621478080749512,
"learning_rate": 5.029547738693468e-06,
"loss": 0.0007,
"step": 49975
},
{
"epoch": 25.19,
"grad_norm": 0.750243067741394,
"learning_rate": 5.027035175879398e-06,
"loss": 0.0007,
"step": 50000
},
{
"epoch": 25.19,
"eval_loss": 0.3517380654811859,
"eval_runtime": 645.2524,
"eval_samples_per_second": 2.184,
"eval_steps_per_second": 2.184,
"eval_wer": 23.189207886544448,
"step": 50000
},
{
"epoch": 25.2,
"grad_norm": 0.194803848862648,
"learning_rate": 5.024522613065327e-06,
"loss": 0.0007,
"step": 50025
},
{
"epoch": 25.21,
"grad_norm": 0.9492383599281311,
"learning_rate": 5.022010050251257e-06,
"loss": 0.0008,
"step": 50050
},
{
"epoch": 25.23,
"grad_norm": 0.3268399238586426,
"learning_rate": 5.019497487437186e-06,
"loss": 0.0005,
"step": 50075
},
{
"epoch": 25.24,
"grad_norm": 0.23588715493679047,
"learning_rate": 5.016984924623116e-06,
"loss": 0.0005,
"step": 50100
},
{
"epoch": 25.25,
"grad_norm": 0.3195103704929352,
"learning_rate": 5.014472361809045e-06,
"loss": 0.0006,
"step": 50125
},
{
"epoch": 25.26,
"grad_norm": 0.5224353075027466,
"learning_rate": 5.011959798994975e-06,
"loss": 0.0005,
"step": 50150
},
{
"epoch": 25.28,
"grad_norm": 0.3808083236217499,
"learning_rate": 5.009447236180906e-06,
"loss": 0.0006,
"step": 50175
},
{
"epoch": 25.29,
"grad_norm": 0.8282648921012878,
"learning_rate": 5.006934673366834e-06,
"loss": 0.0005,
"step": 50200
},
{
"epoch": 25.3,
"grad_norm": 1.0018339157104492,
"learning_rate": 5.004422110552764e-06,
"loss": 0.0005,
"step": 50225
},
{
"epoch": 25.31,
"grad_norm": 0.6774661540985107,
"learning_rate": 5.0019095477386935e-06,
"loss": 0.0007,
"step": 50250
},
{
"epoch": 25.33,
"grad_norm": 0.9828543663024902,
"learning_rate": 4.999396984924624e-06,
"loss": 0.0008,
"step": 50275
},
{
"epoch": 25.34,
"grad_norm": 0.5815138220787048,
"learning_rate": 4.996884422110553e-06,
"loss": 0.0007,
"step": 50300
},
{
"epoch": 25.35,
"grad_norm": 0.20348763465881348,
"learning_rate": 4.994371859296483e-06,
"loss": 0.001,
"step": 50325
},
{
"epoch": 25.37,
"grad_norm": 0.4329879879951477,
"learning_rate": 4.9918592964824125e-06,
"loss": 0.0013,
"step": 50350
},
{
"epoch": 25.38,
"grad_norm": 0.5430723428726196,
"learning_rate": 4.989346733668342e-06,
"loss": 0.0008,
"step": 50375
},
{
"epoch": 25.39,
"grad_norm": 1.5767520666122437,
"learning_rate": 4.986834170854272e-06,
"loss": 0.001,
"step": 50400
},
{
"epoch": 25.4,
"grad_norm": 0.2442205548286438,
"learning_rate": 4.984321608040201e-06,
"loss": 0.0005,
"step": 50425
},
{
"epoch": 25.42,
"grad_norm": 0.20690348744392395,
"learning_rate": 4.981809045226131e-06,
"loss": 0.0009,
"step": 50450
},
{
"epoch": 25.43,
"grad_norm": 0.6699907183647156,
"learning_rate": 4.979296482412061e-06,
"loss": 0.0007,
"step": 50475
},
{
"epoch": 25.44,
"grad_norm": 1.3233988285064697,
"learning_rate": 4.97678391959799e-06,
"loss": 0.0008,
"step": 50500
},
{
"epoch": 25.45,
"grad_norm": 0.44620636105537415,
"learning_rate": 4.97427135678392e-06,
"loss": 0.0006,
"step": 50525
},
{
"epoch": 25.47,
"grad_norm": 0.28157714009284973,
"learning_rate": 4.97175879396985e-06,
"loss": 0.0007,
"step": 50550
},
{
"epoch": 25.48,
"grad_norm": 0.821960985660553,
"learning_rate": 4.969246231155779e-06,
"loss": 0.0006,
"step": 50575
},
{
"epoch": 25.49,
"grad_norm": 0.5584134459495544,
"learning_rate": 4.966733668341709e-06,
"loss": 0.0007,
"step": 50600
},
{
"epoch": 25.5,
"grad_norm": 0.801690399646759,
"learning_rate": 4.9642211055276385e-06,
"loss": 0.0006,
"step": 50625
},
{
"epoch": 25.52,
"grad_norm": 1.2869949340820312,
"learning_rate": 4.961708542713568e-06,
"loss": 0.0008,
"step": 50650
},
{
"epoch": 25.53,
"grad_norm": 0.13358135521411896,
"learning_rate": 4.959195979899498e-06,
"loss": 0.0006,
"step": 50675
},
{
"epoch": 25.54,
"grad_norm": 0.5842620134353638,
"learning_rate": 4.956683417085428e-06,
"loss": 0.0008,
"step": 50700
},
{
"epoch": 25.55,
"grad_norm": 0.3312392234802246,
"learning_rate": 4.9541708542713575e-06,
"loss": 0.0007,
"step": 50725
},
{
"epoch": 25.57,
"grad_norm": 0.9442441463470459,
"learning_rate": 4.951658291457287e-06,
"loss": 0.0007,
"step": 50750
},
{
"epoch": 25.58,
"grad_norm": 0.8669309616088867,
"learning_rate": 4.949145728643216e-06,
"loss": 0.0009,
"step": 50775
},
{
"epoch": 25.59,
"grad_norm": 1.1537563800811768,
"learning_rate": 4.946633165829146e-06,
"loss": 0.0007,
"step": 50800
},
{
"epoch": 25.6,
"grad_norm": 0.9006689190864563,
"learning_rate": 4.944120603015076e-06,
"loss": 0.0006,
"step": 50825
},
{
"epoch": 25.62,
"grad_norm": 0.2254357486963272,
"learning_rate": 4.941608040201005e-06,
"loss": 0.0008,
"step": 50850
},
{
"epoch": 25.63,
"grad_norm": 0.6781788468360901,
"learning_rate": 4.939095477386935e-06,
"loss": 0.0009,
"step": 50875
},
{
"epoch": 25.64,
"grad_norm": 0.49428144097328186,
"learning_rate": 4.936582914572865e-06,
"loss": 0.0007,
"step": 50900
},
{
"epoch": 25.65,
"grad_norm": 0.15863998234272003,
"learning_rate": 4.934070351758795e-06,
"loss": 0.0007,
"step": 50925
},
{
"epoch": 25.67,
"grad_norm": 1.1742552518844604,
"learning_rate": 4.931557788944724e-06,
"loss": 0.001,
"step": 50950
},
{
"epoch": 25.68,
"grad_norm": 0.7124210596084595,
"learning_rate": 4.929045226130654e-06,
"loss": 0.0007,
"step": 50975
},
{
"epoch": 25.69,
"grad_norm": 0.11829496920108795,
"learning_rate": 4.926532663316583e-06,
"loss": 0.0007,
"step": 51000
},
{
"epoch": 25.69,
"eval_loss": 0.35887035727500916,
"eval_runtime": 648.5707,
"eval_samples_per_second": 2.172,
"eval_steps_per_second": 2.172,
"eval_wer": 22.656520235212728,
"step": 51000
},
{
"epoch": 25.71,
"grad_norm": 1.2925130128860474,
"learning_rate": 4.924020100502513e-06,
"loss": 0.0009,
"step": 51025
},
{
"epoch": 25.72,
"grad_norm": 0.9622329473495483,
"learning_rate": 4.921507537688442e-06,
"loss": 0.001,
"step": 51050
},
{
"epoch": 25.73,
"grad_norm": 0.15050731599330902,
"learning_rate": 4.918994974874372e-06,
"loss": 0.0009,
"step": 51075
},
{
"epoch": 25.74,
"grad_norm": 0.6646810173988342,
"learning_rate": 4.9164824120603015e-06,
"loss": 0.0008,
"step": 51100
},
{
"epoch": 25.76,
"grad_norm": 1.8066281080245972,
"learning_rate": 4.913969849246232e-06,
"loss": 0.0008,
"step": 51125
},
{
"epoch": 25.77,
"grad_norm": 1.1246289014816284,
"learning_rate": 4.911457286432161e-06,
"loss": 0.0007,
"step": 51150
},
{
"epoch": 25.78,
"grad_norm": 0.20243453979492188,
"learning_rate": 4.908944723618091e-06,
"loss": 0.0009,
"step": 51175
},
{
"epoch": 25.79,
"grad_norm": 1.6287262439727783,
"learning_rate": 4.9064321608040205e-06,
"loss": 0.0006,
"step": 51200
},
{
"epoch": 25.81,
"grad_norm": 0.7835111618041992,
"learning_rate": 4.90391959798995e-06,
"loss": 0.0005,
"step": 51225
},
{
"epoch": 25.82,
"grad_norm": 0.7937703132629395,
"learning_rate": 4.90140703517588e-06,
"loss": 0.0008,
"step": 51250
},
{
"epoch": 25.83,
"grad_norm": 0.4282771944999695,
"learning_rate": 4.898894472361809e-06,
"loss": 0.0009,
"step": 51275
},
{
"epoch": 25.84,
"grad_norm": 0.9795933365821838,
"learning_rate": 4.896381909547739e-06,
"loss": 0.0006,
"step": 51300
},
{
"epoch": 25.86,
"grad_norm": 0.2915053963661194,
"learning_rate": 4.893869346733669e-06,
"loss": 0.0007,
"step": 51325
},
{
"epoch": 25.87,
"grad_norm": 0.899832010269165,
"learning_rate": 4.891356783919598e-06,
"loss": 0.0007,
"step": 51350
},
{
"epoch": 25.88,
"grad_norm": 0.8909317851066589,
"learning_rate": 4.888844221105528e-06,
"loss": 0.0008,
"step": 51375
},
{
"epoch": 25.89,
"grad_norm": 0.46953698992729187,
"learning_rate": 4.886331658291458e-06,
"loss": 0.0009,
"step": 51400
},
{
"epoch": 25.91,
"grad_norm": 0.1676747351884842,
"learning_rate": 4.883819095477387e-06,
"loss": 0.0007,
"step": 51425
},
{
"epoch": 25.92,
"grad_norm": 1.4294859170913696,
"learning_rate": 4.881306532663317e-06,
"loss": 0.0007,
"step": 51450
},
{
"epoch": 25.93,
"grad_norm": 1.2342486381530762,
"learning_rate": 4.8787939698492465e-06,
"loss": 0.0006,
"step": 51475
},
{
"epoch": 25.94,
"grad_norm": 2.795978307723999,
"learning_rate": 4.876281407035176e-06,
"loss": 0.0009,
"step": 51500
},
{
"epoch": 25.96,
"grad_norm": 0.16276022791862488,
"learning_rate": 4.873768844221106e-06,
"loss": 0.0005,
"step": 51525
},
{
"epoch": 25.97,
"grad_norm": 0.4440419673919678,
"learning_rate": 4.871256281407036e-06,
"loss": 0.0007,
"step": 51550
},
{
"epoch": 25.98,
"grad_norm": 0.5978400707244873,
"learning_rate": 4.8687437185929655e-06,
"loss": 0.0007,
"step": 51575
},
{
"epoch": 25.99,
"grad_norm": 0.41632863879203796,
"learning_rate": 4.866231155778895e-06,
"loss": 0.0008,
"step": 51600
},
{
"epoch": 26.01,
"grad_norm": 0.6219983100891113,
"learning_rate": 4.863718592964824e-06,
"loss": 0.0005,
"step": 51625
},
{
"epoch": 26.02,
"grad_norm": 1.240051507949829,
"learning_rate": 4.861206030150754e-06,
"loss": 0.0009,
"step": 51650
},
{
"epoch": 26.03,
"grad_norm": 0.7890909314155579,
"learning_rate": 4.858793969849247e-06,
"loss": 0.0006,
"step": 51675
},
{
"epoch": 26.05,
"grad_norm": 0.1678403913974762,
"learning_rate": 4.856281407035176e-06,
"loss": 0.0006,
"step": 51700
},
{
"epoch": 26.06,
"grad_norm": 0.5384219884872437,
"learning_rate": 4.853768844221106e-06,
"loss": 0.0006,
"step": 51725
},
{
"epoch": 26.07,
"grad_norm": 0.4800323247909546,
"learning_rate": 4.8512562814070355e-06,
"loss": 0.0008,
"step": 51750
},
{
"epoch": 26.08,
"grad_norm": 0.088472880423069,
"learning_rate": 4.848743718592966e-06,
"loss": 0.0007,
"step": 51775
},
{
"epoch": 26.1,
"grad_norm": 0.28733333945274353,
"learning_rate": 4.846231155778895e-06,
"loss": 0.0006,
"step": 51800
},
{
"epoch": 26.11,
"grad_norm": 3.326415777206421,
"learning_rate": 4.843718592964824e-06,
"loss": 0.0004,
"step": 51825
},
{
"epoch": 26.12,
"grad_norm": 0.8401397466659546,
"learning_rate": 4.8412060301507545e-06,
"loss": 0.0004,
"step": 51850
},
{
"epoch": 26.13,
"grad_norm": 1.742140531539917,
"learning_rate": 4.838693467336684e-06,
"loss": 0.0005,
"step": 51875
},
{
"epoch": 26.15,
"grad_norm": 1.9404064416885376,
"learning_rate": 4.836180904522613e-06,
"loss": 0.0005,
"step": 51900
},
{
"epoch": 26.16,
"grad_norm": 0.9949504733085632,
"learning_rate": 4.833668341708543e-06,
"loss": 0.0005,
"step": 51925
},
{
"epoch": 26.17,
"grad_norm": 0.13971653580665588,
"learning_rate": 4.831155778894473e-06,
"loss": 0.0006,
"step": 51950
},
{
"epoch": 26.18,
"grad_norm": 0.31128737330436707,
"learning_rate": 4.828643216080403e-06,
"loss": 0.0005,
"step": 51975
},
{
"epoch": 26.2,
"grad_norm": 0.8576771020889282,
"learning_rate": 4.826130653266332e-06,
"loss": 0.0006,
"step": 52000
},
{
"epoch": 26.2,
"eval_loss": 0.354295015335083,
"eval_runtime": 645.4254,
"eval_samples_per_second": 2.183,
"eval_steps_per_second": 2.183,
"eval_wer": 22.960913178830854,
"step": 52000
},
{
"epoch": 26.21,
"grad_norm": 0.8186060786247253,
"learning_rate": 4.8236180904522614e-06,
"loss": 0.0006,
"step": 52025
},
{
"epoch": 26.22,
"grad_norm": 0.21395763754844666,
"learning_rate": 4.821105527638192e-06,
"loss": 0.0009,
"step": 52050
},
{
"epoch": 26.23,
"grad_norm": 0.28404101729393005,
"learning_rate": 4.818592964824121e-06,
"loss": 0.0005,
"step": 52075
},
{
"epoch": 26.25,
"grad_norm": 0.2926516830921173,
"learning_rate": 4.81608040201005e-06,
"loss": 0.0005,
"step": 52100
},
{
"epoch": 26.26,
"grad_norm": 0.8246799111366272,
"learning_rate": 4.81356783919598e-06,
"loss": 0.0005,
"step": 52125
},
{
"epoch": 26.27,
"grad_norm": 0.19814717769622803,
"learning_rate": 4.81105527638191e-06,
"loss": 0.0007,
"step": 52150
},
{
"epoch": 26.28,
"grad_norm": 0.2942245602607727,
"learning_rate": 4.80854271356784e-06,
"loss": 0.0006,
"step": 52175
},
{
"epoch": 26.3,
"grad_norm": 1.1679245233535767,
"learning_rate": 4.806030150753769e-06,
"loss": 0.0007,
"step": 52200
},
{
"epoch": 26.31,
"grad_norm": 0.19946590065956116,
"learning_rate": 4.8035175879396986e-06,
"loss": 0.0007,
"step": 52225
},
{
"epoch": 26.32,
"grad_norm": 0.5419365763664246,
"learning_rate": 4.801005025125629e-06,
"loss": 0.0004,
"step": 52250
},
{
"epoch": 26.34,
"grad_norm": 1.0101172924041748,
"learning_rate": 4.798492462311558e-06,
"loss": 0.0008,
"step": 52275
},
{
"epoch": 26.35,
"grad_norm": 0.7043997049331665,
"learning_rate": 4.795979899497487e-06,
"loss": 0.0005,
"step": 52300
},
{
"epoch": 26.36,
"grad_norm": 1.9109712839126587,
"learning_rate": 4.7934673366834175e-06,
"loss": 0.0005,
"step": 52325
},
{
"epoch": 26.37,
"grad_norm": 1.100644826889038,
"learning_rate": 4.790954773869348e-06,
"loss": 0.0009,
"step": 52350
},
{
"epoch": 26.39,
"grad_norm": 0.7146306037902832,
"learning_rate": 4.788442211055277e-06,
"loss": 0.0007,
"step": 52375
},
{
"epoch": 26.4,
"grad_norm": 0.21567130088806152,
"learning_rate": 4.785929648241206e-06,
"loss": 0.0007,
"step": 52400
},
{
"epoch": 26.41,
"grad_norm": 1.087694764137268,
"learning_rate": 4.7834170854271365e-06,
"loss": 0.001,
"step": 52425
},
{
"epoch": 26.42,
"grad_norm": 0.6008167862892151,
"learning_rate": 4.780904522613066e-06,
"loss": 0.001,
"step": 52450
},
{
"epoch": 26.44,
"grad_norm": 0.45899203419685364,
"learning_rate": 4.778391959798995e-06,
"loss": 0.0006,
"step": 52475
},
{
"epoch": 26.45,
"grad_norm": 0.5328181982040405,
"learning_rate": 4.7758793969849245e-06,
"loss": 0.0009,
"step": 52500
},
{
"epoch": 26.46,
"grad_norm": 0.1914442628622055,
"learning_rate": 4.773366834170855e-06,
"loss": 0.001,
"step": 52525
},
{
"epoch": 26.47,
"grad_norm": 0.6430690884590149,
"learning_rate": 4.770854271356785e-06,
"loss": 0.0009,
"step": 52550
},
{
"epoch": 26.49,
"grad_norm": 0.5766837000846863,
"learning_rate": 4.768341708542714e-06,
"loss": 0.0009,
"step": 52575
},
{
"epoch": 26.5,
"grad_norm": 0.6997876763343811,
"learning_rate": 4.7658291457286435e-06,
"loss": 0.0007,
"step": 52600
},
{
"epoch": 26.51,
"grad_norm": 1.365090250968933,
"learning_rate": 4.763316582914574e-06,
"loss": 0.001,
"step": 52625
},
{
"epoch": 26.52,
"grad_norm": 1.0649492740631104,
"learning_rate": 4.760804020100503e-06,
"loss": 0.0009,
"step": 52650
},
{
"epoch": 26.54,
"grad_norm": 1.1800321340560913,
"learning_rate": 4.758291457286432e-06,
"loss": 0.001,
"step": 52675
},
{
"epoch": 26.55,
"grad_norm": 0.5055447220802307,
"learning_rate": 4.7557788944723625e-06,
"loss": 0.0008,
"step": 52700
},
{
"epoch": 26.56,
"grad_norm": 0.6218178272247314,
"learning_rate": 4.753266331658292e-06,
"loss": 0.001,
"step": 52725
},
{
"epoch": 26.57,
"grad_norm": 0.49634939432144165,
"learning_rate": 4.750753768844221e-06,
"loss": 0.0007,
"step": 52750
},
{
"epoch": 26.59,
"grad_norm": 0.4345056116580963,
"learning_rate": 4.748241206030151e-06,
"loss": 0.0006,
"step": 52775
},
{
"epoch": 26.6,
"grad_norm": 0.8264731168746948,
"learning_rate": 4.745728643216081e-06,
"loss": 0.0008,
"step": 52800
},
{
"epoch": 26.61,
"grad_norm": 0.7606098651885986,
"learning_rate": 4.743216080402011e-06,
"loss": 0.0008,
"step": 52825
},
{
"epoch": 26.62,
"grad_norm": 0.13701969385147095,
"learning_rate": 4.74070351758794e-06,
"loss": 0.0008,
"step": 52850
},
{
"epoch": 26.64,
"grad_norm": 1.5023452043533325,
"learning_rate": 4.7381909547738694e-06,
"loss": 0.0008,
"step": 52875
},
{
"epoch": 26.65,
"grad_norm": 0.7520101070404053,
"learning_rate": 4.7356783919598e-06,
"loss": 0.0009,
"step": 52900
},
{
"epoch": 26.66,
"grad_norm": 0.5014305114746094,
"learning_rate": 4.733165829145729e-06,
"loss": 0.0007,
"step": 52925
},
{
"epoch": 26.68,
"grad_norm": 0.6294064521789551,
"learning_rate": 4.730653266331658e-06,
"loss": 0.0009,
"step": 52950
},
{
"epoch": 26.69,
"grad_norm": 0.1294821947813034,
"learning_rate": 4.728140703517588e-06,
"loss": 0.0007,
"step": 52975
},
{
"epoch": 26.7,
"grad_norm": 0.0864739790558815,
"learning_rate": 4.725628140703518e-06,
"loss": 0.0009,
"step": 53000
},
{
"epoch": 26.7,
"eval_loss": 0.3559441566467285,
"eval_runtime": 647.2062,
"eval_samples_per_second": 2.177,
"eval_steps_per_second": 2.177,
"eval_wer": 22.760290556900724,
"step": 53000
},
{
"epoch": 26.71,
"grad_norm": 0.9976471662521362,
"learning_rate": 4.723115577889448e-06,
"loss": 0.0007,
"step": 53025
},
{
"epoch": 26.73,
"grad_norm": 0.1680731177330017,
"learning_rate": 4.720603015075377e-06,
"loss": 0.0008,
"step": 53050
},
{
"epoch": 26.74,
"grad_norm": 0.7187339663505554,
"learning_rate": 4.7180904522613066e-06,
"loss": 0.0007,
"step": 53075
},
{
"epoch": 26.75,
"grad_norm": 2.314380645751953,
"learning_rate": 4.715577889447237e-06,
"loss": 0.001,
"step": 53100
},
{
"epoch": 26.76,
"grad_norm": 0.23373447358608246,
"learning_rate": 4.713065326633166e-06,
"loss": 0.0007,
"step": 53125
},
{
"epoch": 26.78,
"grad_norm": 0.07850030809640884,
"learning_rate": 4.710552763819095e-06,
"loss": 0.0008,
"step": 53150
},
{
"epoch": 26.79,
"grad_norm": 0.7005709409713745,
"learning_rate": 4.7080402010050256e-06,
"loss": 0.001,
"step": 53175
},
{
"epoch": 26.8,
"grad_norm": 1.0930556058883667,
"learning_rate": 4.705527638190956e-06,
"loss": 0.0006,
"step": 53200
},
{
"epoch": 26.81,
"grad_norm": 0.5338262319564819,
"learning_rate": 4.703015075376885e-06,
"loss": 0.0005,
"step": 53225
},
{
"epoch": 26.83,
"grad_norm": 0.08169445395469666,
"learning_rate": 4.700502512562814e-06,
"loss": 0.0005,
"step": 53250
},
{
"epoch": 26.84,
"grad_norm": 2.5466127395629883,
"learning_rate": 4.6979899497487445e-06,
"loss": 0.0007,
"step": 53275
},
{
"epoch": 26.85,
"grad_norm": 1.4362409114837646,
"learning_rate": 4.695477386934674e-06,
"loss": 0.0005,
"step": 53300
},
{
"epoch": 26.86,
"grad_norm": 1.0999122858047485,
"learning_rate": 4.692964824120603e-06,
"loss": 0.0008,
"step": 53325
},
{
"epoch": 26.88,
"grad_norm": 0.7979241609573364,
"learning_rate": 4.6904522613065325e-06,
"loss": 0.0009,
"step": 53350
},
{
"epoch": 26.89,
"grad_norm": 0.5806974172592163,
"learning_rate": 4.687939698492463e-06,
"loss": 0.0008,
"step": 53375
},
{
"epoch": 26.9,
"grad_norm": 1.0148537158966064,
"learning_rate": 4.685427135678393e-06,
"loss": 0.0008,
"step": 53400
},
{
"epoch": 26.91,
"grad_norm": 1.2113052606582642,
"learning_rate": 4.682914572864322e-06,
"loss": 0.0013,
"step": 53425
},
{
"epoch": 26.93,
"grad_norm": 0.730830729007721,
"learning_rate": 4.6804020100502515e-06,
"loss": 0.0006,
"step": 53450
},
{
"epoch": 26.94,
"grad_norm": 0.5694770216941833,
"learning_rate": 4.677889447236182e-06,
"loss": 0.0007,
"step": 53475
},
{
"epoch": 26.95,
"grad_norm": 0.7788098454475403,
"learning_rate": 4.675376884422111e-06,
"loss": 0.0006,
"step": 53500
},
{
"epoch": 26.96,
"grad_norm": 0.47340822219848633,
"learning_rate": 4.67286432160804e-06,
"loss": 0.0008,
"step": 53525
},
{
"epoch": 26.98,
"grad_norm": 0.4103614389896393,
"learning_rate": 4.6703517587939705e-06,
"loss": 0.0007,
"step": 53550
},
{
"epoch": 26.99,
"grad_norm": 1.1811082363128662,
"learning_rate": 4.6678391959799e-06,
"loss": 0.0006,
"step": 53575
},
{
"epoch": 27.0,
"grad_norm": 0.34475991129875183,
"learning_rate": 4.66532663316583e-06,
"loss": 0.0004,
"step": 53600
},
{
"epoch": 27.02,
"grad_norm": 0.060179028660058975,
"learning_rate": 4.662814070351759e-06,
"loss": 0.0004,
"step": 53625
},
{
"epoch": 27.03,
"grad_norm": 0.4929451048374176,
"learning_rate": 4.660301507537689e-06,
"loss": 0.0004,
"step": 53650
},
{
"epoch": 27.04,
"grad_norm": 0.3733210265636444,
"learning_rate": 4.657788944723619e-06,
"loss": 0.0005,
"step": 53675
},
{
"epoch": 27.05,
"grad_norm": 1.3154228925704956,
"learning_rate": 4.655276381909548e-06,
"loss": 0.0005,
"step": 53700
},
{
"epoch": 27.07,
"grad_norm": 0.241769477725029,
"learning_rate": 4.6527638190954774e-06,
"loss": 0.0004,
"step": 53725
},
{
"epoch": 27.08,
"grad_norm": 0.19261914491653442,
"learning_rate": 4.650251256281408e-06,
"loss": 0.0003,
"step": 53750
},
{
"epoch": 27.09,
"grad_norm": 0.19151557981967926,
"learning_rate": 4.647738693467337e-06,
"loss": 0.0002,
"step": 53775
},
{
"epoch": 27.1,
"grad_norm": 0.2657971680164337,
"learning_rate": 4.645226130653266e-06,
"loss": 0.0004,
"step": 53800
},
{
"epoch": 27.12,
"grad_norm": 0.09917689114809036,
"learning_rate": 4.6427135678391964e-06,
"loss": 0.0003,
"step": 53825
},
{
"epoch": 27.13,
"grad_norm": 0.08713795244693756,
"learning_rate": 4.640201005025126e-06,
"loss": 0.0002,
"step": 53850
},
{
"epoch": 27.14,
"grad_norm": 0.15994809567928314,
"learning_rate": 4.637688442211056e-06,
"loss": 0.0004,
"step": 53875
},
{
"epoch": 27.15,
"grad_norm": 0.1758558303117752,
"learning_rate": 4.635175879396985e-06,
"loss": 0.0004,
"step": 53900
},
{
"epoch": 27.17,
"grad_norm": 0.04318870231509209,
"learning_rate": 4.6326633165829146e-06,
"loss": 0.0004,
"step": 53925
},
{
"epoch": 27.18,
"grad_norm": 0.07124695926904678,
"learning_rate": 4.630150753768845e-06,
"loss": 0.0003,
"step": 53950
},
{
"epoch": 27.19,
"grad_norm": 0.6460732221603394,
"learning_rate": 4.627638190954774e-06,
"loss": 0.0005,
"step": 53975
},
{
"epoch": 27.2,
"grad_norm": 0.9753907322883606,
"learning_rate": 4.625125628140703e-06,
"loss": 0.0004,
"step": 54000
},
{
"epoch": 27.2,
"eval_loss": 0.3613799214363098,
"eval_runtime": 644.7858,
"eval_samples_per_second": 2.185,
"eval_steps_per_second": 2.185,
"eval_wer": 22.407471463161535,
"step": 54000
},
{
"epoch": 27.22,
"grad_norm": 0.14066103100776672,
"learning_rate": 4.6226130653266336e-06,
"loss": 0.0005,
"step": 54025
},
{
"epoch": 27.23,
"grad_norm": 0.20447216928005219,
"learning_rate": 4.620100502512564e-06,
"loss": 0.0003,
"step": 54050
},
{
"epoch": 27.24,
"grad_norm": 0.13957104086875916,
"learning_rate": 4.617587939698493e-06,
"loss": 0.0002,
"step": 54075
},
{
"epoch": 27.25,
"grad_norm": 0.8711459040641785,
"learning_rate": 4.615075376884422e-06,
"loss": 0.0003,
"step": 54100
},
{
"epoch": 27.27,
"grad_norm": 0.19493000209331512,
"learning_rate": 4.612562814070352e-06,
"loss": 0.0003,
"step": 54125
},
{
"epoch": 27.28,
"grad_norm": 0.16989558935165405,
"learning_rate": 4.610050251256282e-06,
"loss": 0.0004,
"step": 54150
},
{
"epoch": 27.29,
"grad_norm": 0.29881447553634644,
"learning_rate": 4.607537688442211e-06,
"loss": 0.0006,
"step": 54175
},
{
"epoch": 27.3,
"grad_norm": 1.1802809238433838,
"learning_rate": 4.6050251256281405e-06,
"loss": 0.0006,
"step": 54200
},
{
"epoch": 27.32,
"grad_norm": 0.17507942020893097,
"learning_rate": 4.602512562814071e-06,
"loss": 0.0005,
"step": 54225
},
{
"epoch": 27.33,
"grad_norm": 0.2276214212179184,
"learning_rate": 4.600000000000001e-06,
"loss": 0.0004,
"step": 54250
},
{
"epoch": 27.34,
"grad_norm": 0.4339704215526581,
"learning_rate": 4.59748743718593e-06,
"loss": 0.0006,
"step": 54275
},
{
"epoch": 27.36,
"grad_norm": 1.0907328128814697,
"learning_rate": 4.5949748743718595e-06,
"loss": 0.0006,
"step": 54300
},
{
"epoch": 27.37,
"grad_norm": 0.9420053362846375,
"learning_rate": 4.59246231155779e-06,
"loss": 0.0004,
"step": 54325
},
{
"epoch": 27.38,
"grad_norm": 0.5085152387619019,
"learning_rate": 4.589949748743719e-06,
"loss": 0.0006,
"step": 54350
},
{
"epoch": 27.39,
"grad_norm": 0.7022001147270203,
"learning_rate": 4.587437185929648e-06,
"loss": 0.0006,
"step": 54375
},
{
"epoch": 27.41,
"grad_norm": 0.4072805941104889,
"learning_rate": 4.5849246231155785e-06,
"loss": 0.0004,
"step": 54400
},
{
"epoch": 27.42,
"grad_norm": 0.6905921101570129,
"learning_rate": 4.582412060301508e-06,
"loss": 0.0007,
"step": 54425
},
{
"epoch": 27.43,
"grad_norm": 0.5085986852645874,
"learning_rate": 4.579899497487438e-06,
"loss": 0.0006,
"step": 54450
},
{
"epoch": 27.44,
"grad_norm": 0.7813529968261719,
"learning_rate": 4.577386934673367e-06,
"loss": 0.0006,
"step": 54475
},
{
"epoch": 27.46,
"grad_norm": 0.7074769139289856,
"learning_rate": 4.574874371859297e-06,
"loss": 0.0009,
"step": 54500
},
{
"epoch": 27.47,
"grad_norm": 1.1880906820297241,
"learning_rate": 4.572361809045227e-06,
"loss": 0.0008,
"step": 54525
},
{
"epoch": 27.48,
"grad_norm": 1.0316381454467773,
"learning_rate": 4.569849246231156e-06,
"loss": 0.0009,
"step": 54550
},
{
"epoch": 27.49,
"grad_norm": 1.5573595762252808,
"learning_rate": 4.5673366834170854e-06,
"loss": 0.0005,
"step": 54575
},
{
"epoch": 27.51,
"grad_norm": 3.481981039047241,
"learning_rate": 4.564824120603016e-06,
"loss": 0.001,
"step": 54600
},
{
"epoch": 27.52,
"grad_norm": 0.9076539874076843,
"learning_rate": 4.562311557788945e-06,
"loss": 0.0007,
"step": 54625
},
{
"epoch": 27.53,
"grad_norm": 2.5089993476867676,
"learning_rate": 4.559798994974875e-06,
"loss": 0.0007,
"step": 54650
},
{
"epoch": 27.54,
"grad_norm": 0.755713701248169,
"learning_rate": 4.5572864321608044e-06,
"loss": 0.001,
"step": 54675
},
{
"epoch": 27.56,
"grad_norm": 0.1939065009355545,
"learning_rate": 4.554773869346734e-06,
"loss": 0.0007,
"step": 54700
},
{
"epoch": 27.57,
"grad_norm": 0.29305967688560486,
"learning_rate": 4.552261306532664e-06,
"loss": 0.0008,
"step": 54725
},
{
"epoch": 27.58,
"grad_norm": 1.5190849304199219,
"learning_rate": 4.549748743718593e-06,
"loss": 0.0009,
"step": 54750
},
{
"epoch": 27.59,
"grad_norm": 0.15516288578510284,
"learning_rate": 4.5472361809045226e-06,
"loss": 0.0007,
"step": 54775
},
{
"epoch": 27.61,
"grad_norm": 0.9610015749931335,
"learning_rate": 4.544723618090453e-06,
"loss": 0.0007,
"step": 54800
},
{
"epoch": 27.62,
"grad_norm": 0.04598340019583702,
"learning_rate": 4.542211055276382e-06,
"loss": 0.0005,
"step": 54825
},
{
"epoch": 27.63,
"grad_norm": 0.8410586714744568,
"learning_rate": 4.539698492462312e-06,
"loss": 0.0006,
"step": 54850
},
{
"epoch": 27.64,
"grad_norm": 0.09928705543279648,
"learning_rate": 4.5371859296482416e-06,
"loss": 0.0008,
"step": 54875
},
{
"epoch": 27.66,
"grad_norm": 1.5359119176864624,
"learning_rate": 4.534673366834172e-06,
"loss": 0.0007,
"step": 54900
},
{
"epoch": 27.67,
"grad_norm": 0.13479715585708618,
"learning_rate": 4.532160804020101e-06,
"loss": 0.0006,
"step": 54925
},
{
"epoch": 27.68,
"grad_norm": 0.5345453023910522,
"learning_rate": 4.52964824120603e-06,
"loss": 0.0007,
"step": 54950
},
{
"epoch": 27.7,
"grad_norm": 0.29486083984375,
"learning_rate": 4.52713567839196e-06,
"loss": 0.0012,
"step": 54975
},
{
"epoch": 27.71,
"grad_norm": 1.310102939605713,
"learning_rate": 4.52462311557789e-06,
"loss": 0.0008,
"step": 55000
},
{
"epoch": 27.71,
"eval_loss": 0.3659009635448456,
"eval_runtime": 646.1523,
"eval_samples_per_second": 2.181,
"eval_steps_per_second": 2.181,
"eval_wer": 22.98858526461432,
"step": 55000
},
{
"epoch": 27.72,
"grad_norm": 0.18702644109725952,
"learning_rate": 4.522110552763819e-06,
"loss": 0.0009,
"step": 55025
},
{
"epoch": 27.73,
"grad_norm": 0.17531992495059967,
"learning_rate": 4.5195979899497485e-06,
"loss": 0.0006,
"step": 55050
},
{
"epoch": 27.75,
"grad_norm": 0.844012439250946,
"learning_rate": 4.517085427135679e-06,
"loss": 0.0005,
"step": 55075
},
{
"epoch": 27.76,
"grad_norm": 0.20820151269435883,
"learning_rate": 4.514572864321609e-06,
"loss": 0.0008,
"step": 55100
},
{
"epoch": 27.77,
"grad_norm": 2.1439287662506104,
"learning_rate": 4.512060301507538e-06,
"loss": 0.0009,
"step": 55125
},
{
"epoch": 27.78,
"grad_norm": 0.31553953886032104,
"learning_rate": 4.5095477386934675e-06,
"loss": 0.0007,
"step": 55150
},
{
"epoch": 27.8,
"grad_norm": 0.08655881881713867,
"learning_rate": 4.507035175879398e-06,
"loss": 0.0005,
"step": 55175
},
{
"epoch": 27.81,
"grad_norm": 0.0824466124176979,
"learning_rate": 4.504522613065327e-06,
"loss": 0.0005,
"step": 55200
},
{
"epoch": 27.82,
"grad_norm": 0.7539893984794617,
"learning_rate": 4.502010050251256e-06,
"loss": 0.0006,
"step": 55225
},
{
"epoch": 27.83,
"grad_norm": 0.1819518655538559,
"learning_rate": 4.499497487437186e-06,
"loss": 0.0011,
"step": 55250
},
{
"epoch": 27.85,
"grad_norm": 1.464959740638733,
"learning_rate": 4.496984924623116e-06,
"loss": 0.0009,
"step": 55275
},
{
"epoch": 27.86,
"grad_norm": 0.3855617344379425,
"learning_rate": 4.494472361809046e-06,
"loss": 0.0008,
"step": 55300
},
{
"epoch": 27.87,
"grad_norm": 1.3555275201797485,
"learning_rate": 4.491959798994975e-06,
"loss": 0.0012,
"step": 55325
},
{
"epoch": 27.88,
"grad_norm": 0.7826224565505981,
"learning_rate": 4.489447236180905e-06,
"loss": 0.001,
"step": 55350
},
{
"epoch": 27.9,
"grad_norm": 0.3365747630596161,
"learning_rate": 4.486934673366835e-06,
"loss": 0.0005,
"step": 55375
},
{
"epoch": 27.91,
"grad_norm": 0.28341349959373474,
"learning_rate": 4.484522613065327e-06,
"loss": 0.0008,
"step": 55400
},
{
"epoch": 27.92,
"grad_norm": 0.6870297789573669,
"learning_rate": 4.4820100502512565e-06,
"loss": 0.0006,
"step": 55425
},
{
"epoch": 27.93,
"grad_norm": 0.2579886019229889,
"learning_rate": 4.479497487437186e-06,
"loss": 0.0008,
"step": 55450
},
{
"epoch": 27.95,
"grad_norm": 1.2634528875350952,
"learning_rate": 4.476984924623116e-06,
"loss": 0.0008,
"step": 55475
},
{
"epoch": 27.96,
"grad_norm": 0.11361195892095566,
"learning_rate": 4.474472361809046e-06,
"loss": 0.0006,
"step": 55500
},
{
"epoch": 27.97,
"grad_norm": 0.15430136024951935,
"learning_rate": 4.4719597989949755e-06,
"loss": 0.0007,
"step": 55525
},
{
"epoch": 27.98,
"grad_norm": 0.3706219494342804,
"learning_rate": 4.469447236180905e-06,
"loss": 0.0007,
"step": 55550
},
{
"epoch": 28.0,
"grad_norm": 0.853947103023529,
"learning_rate": 4.466934673366834e-06,
"loss": 0.0006,
"step": 55575
},
{
"epoch": 28.01,
"grad_norm": 0.9473939538002014,
"learning_rate": 4.464422110552764e-06,
"loss": 0.0004,
"step": 55600
},
{
"epoch": 28.02,
"grad_norm": 1.0833967924118042,
"learning_rate": 4.461909547738694e-06,
"loss": 0.0006,
"step": 55625
},
{
"epoch": 28.04,
"grad_norm": 0.2835502028465271,
"learning_rate": 4.459396984924623e-06,
"loss": 0.0005,
"step": 55650
},
{
"epoch": 28.05,
"grad_norm": 0.5609690546989441,
"learning_rate": 4.456884422110553e-06,
"loss": 0.0005,
"step": 55675
},
{
"epoch": 28.06,
"grad_norm": 0.07128031551837921,
"learning_rate": 4.454371859296483e-06,
"loss": 0.0004,
"step": 55700
},
{
"epoch": 28.07,
"grad_norm": 0.7161921858787537,
"learning_rate": 4.451859296482413e-06,
"loss": 0.0003,
"step": 55725
},
{
"epoch": 28.09,
"grad_norm": 0.13013307750225067,
"learning_rate": 4.449346733668342e-06,
"loss": 0.0002,
"step": 55750
},
{
"epoch": 28.1,
"grad_norm": 0.13601085543632507,
"learning_rate": 4.446834170854272e-06,
"loss": 0.0004,
"step": 55775
},
{
"epoch": 28.11,
"grad_norm": 0.6023567318916321,
"learning_rate": 4.4443216080402014e-06,
"loss": 0.0003,
"step": 55800
},
{
"epoch": 28.12,
"grad_norm": 0.09424587339162827,
"learning_rate": 4.441809045226131e-06,
"loss": 0.0003,
"step": 55825
},
{
"epoch": 28.14,
"grad_norm": 0.04044128209352493,
"learning_rate": 4.43929648241206e-06,
"loss": 0.0003,
"step": 55850
},
{
"epoch": 28.15,
"grad_norm": 0.16100598871707916,
"learning_rate": 4.43678391959799e-06,
"loss": 0.0002,
"step": 55875
},
{
"epoch": 28.16,
"grad_norm": 0.3516765832901001,
"learning_rate": 4.4342713567839204e-06,
"loss": 0.0002,
"step": 55900
},
{
"epoch": 28.17,
"grad_norm": 0.2532273828983307,
"learning_rate": 4.43175879396985e-06,
"loss": 0.0002,
"step": 55925
},
{
"epoch": 28.19,
"grad_norm": 0.19065579771995544,
"learning_rate": 4.429246231155779e-06,
"loss": 0.0006,
"step": 55950
},
{
"epoch": 28.2,
"grad_norm": 0.30931228399276733,
"learning_rate": 4.426733668341709e-06,
"loss": 0.0006,
"step": 55975
},
{
"epoch": 28.21,
"grad_norm": 0.30858734250068665,
"learning_rate": 4.4242211055276386e-06,
"loss": 0.0003,
"step": 56000
},
{
"epoch": 28.21,
"eval_loss": 0.3624221980571747,
"eval_runtime": 781.7858,
"eval_samples_per_second": 1.802,
"eval_steps_per_second": 1.802,
"eval_wer": 22.172258734002074,
"step": 56000
},
{
"epoch": 28.22,
"grad_norm": 0.08408491313457489,
"learning_rate": 4.421708542713568e-06,
"loss": 0.0002,
"step": 56025
},
{
"epoch": 28.24,
"grad_norm": 0.06299348175525665,
"learning_rate": 4.419195979899498e-06,
"loss": 0.0005,
"step": 56050
},
{
"epoch": 28.25,
"grad_norm": 0.07827432453632355,
"learning_rate": 4.416683417085427e-06,
"loss": 0.0004,
"step": 56075
},
{
"epoch": 28.26,
"grad_norm": 0.8975947499275208,
"learning_rate": 4.4141708542713576e-06,
"loss": 0.0005,
"step": 56100
},
{
"epoch": 28.27,
"grad_norm": 1.2385715246200562,
"learning_rate": 4.411658291457287e-06,
"loss": 0.0005,
"step": 56125
},
{
"epoch": 28.29,
"grad_norm": 0.19437459111213684,
"learning_rate": 4.409145728643216e-06,
"loss": 0.0006,
"step": 56150
},
{
"epoch": 28.3,
"grad_norm": 1.1243386268615723,
"learning_rate": 4.406633165829146e-06,
"loss": 0.0006,
"step": 56175
},
{
"epoch": 28.31,
"grad_norm": 0.16806860268115997,
"learning_rate": 4.404120603015076e-06,
"loss": 0.0005,
"step": 56200
},
{
"epoch": 28.32,
"grad_norm": 0.5347501635551453,
"learning_rate": 4.401608040201005e-06,
"loss": 0.0003,
"step": 56225
},
{
"epoch": 28.34,
"grad_norm": 0.3039199113845825,
"learning_rate": 4.399095477386935e-06,
"loss": 0.0004,
"step": 56250
},
{
"epoch": 28.35,
"grad_norm": 0.7065151929855347,
"learning_rate": 4.3965829145728645e-06,
"loss": 0.0006,
"step": 56275
},
{
"epoch": 28.36,
"grad_norm": 0.7291182279586792,
"learning_rate": 4.394070351758795e-06,
"loss": 0.0006,
"step": 56300
},
{
"epoch": 28.38,
"grad_norm": 0.11983204632997513,
"learning_rate": 4.391557788944724e-06,
"loss": 0.0005,
"step": 56325
},
{
"epoch": 28.39,
"grad_norm": 0.32822325825691223,
"learning_rate": 4.389045226130654e-06,
"loss": 0.0011,
"step": 56350
},
{
"epoch": 28.4,
"grad_norm": 0.3108604848384857,
"learning_rate": 4.3865326633165835e-06,
"loss": 0.0005,
"step": 56375
},
{
"epoch": 28.41,
"grad_norm": 0.2673742175102234,
"learning_rate": 4.384020100502513e-06,
"loss": 0.0004,
"step": 56400
},
{
"epoch": 28.43,
"grad_norm": 0.8592258095741272,
"learning_rate": 4.381507537688442e-06,
"loss": 0.0004,
"step": 56425
},
{
"epoch": 28.44,
"grad_norm": 0.1229337826371193,
"learning_rate": 4.378994974874372e-06,
"loss": 0.0004,
"step": 56450
},
{
"epoch": 28.45,
"grad_norm": 0.470473051071167,
"learning_rate": 4.376482412060302e-06,
"loss": 0.0004,
"step": 56475
},
{
"epoch": 28.46,
"grad_norm": 0.09908214956521988,
"learning_rate": 4.373969849246231e-06,
"loss": 0.0004,
"step": 56500
},
{
"epoch": 28.48,
"grad_norm": 0.11872223764657974,
"learning_rate": 4.371457286432161e-06,
"loss": 0.0008,
"step": 56525
},
{
"epoch": 28.49,
"grad_norm": 1.120409607887268,
"learning_rate": 4.368944723618091e-06,
"loss": 0.0006,
"step": 56550
},
{
"epoch": 28.5,
"grad_norm": 0.6582888960838318,
"learning_rate": 4.366432160804021e-06,
"loss": 0.0008,
"step": 56575
},
{
"epoch": 28.51,
"grad_norm": 0.6543013453483582,
"learning_rate": 4.36391959798995e-06,
"loss": 0.0007,
"step": 56600
},
{
"epoch": 28.53,
"grad_norm": 0.4543153941631317,
"learning_rate": 4.36140703517588e-06,
"loss": 0.0011,
"step": 56625
},
{
"epoch": 28.54,
"grad_norm": 0.8596717119216919,
"learning_rate": 4.3588944723618094e-06,
"loss": 0.0007,
"step": 56650
},
{
"epoch": 28.55,
"grad_norm": 1.2596262693405151,
"learning_rate": 4.356381909547739e-06,
"loss": 0.0008,
"step": 56675
},
{
"epoch": 28.56,
"grad_norm": 0.8046451807022095,
"learning_rate": 4.353869346733668e-06,
"loss": 0.0009,
"step": 56700
},
{
"epoch": 28.58,
"grad_norm": 0.7610066533088684,
"learning_rate": 4.351356783919598e-06,
"loss": 0.0014,
"step": 56725
},
{
"epoch": 28.59,
"grad_norm": 0.3740193247795105,
"learning_rate": 4.3488442211055284e-06,
"loss": 0.0008,
"step": 56750
},
{
"epoch": 28.6,
"grad_norm": 0.3946913480758667,
"learning_rate": 4.346331658291458e-06,
"loss": 0.0013,
"step": 56775
},
{
"epoch": 28.61,
"grad_norm": 1.1675328016281128,
"learning_rate": 4.343819095477387e-06,
"loss": 0.0011,
"step": 56800
},
{
"epoch": 28.63,
"grad_norm": 0.23593860864639282,
"learning_rate": 4.341306532663317e-06,
"loss": 0.001,
"step": 56825
},
{
"epoch": 28.64,
"grad_norm": 0.12008998543024063,
"learning_rate": 4.3387939698492466e-06,
"loss": 0.0008,
"step": 56850
},
{
"epoch": 28.65,
"grad_norm": 0.1624538004398346,
"learning_rate": 4.336281407035176e-06,
"loss": 0.0007,
"step": 56875
},
{
"epoch": 28.66,
"grad_norm": 0.2177920937538147,
"learning_rate": 4.333768844221106e-06,
"loss": 0.0007,
"step": 56900
},
{
"epoch": 28.68,
"grad_norm": 0.23931287229061127,
"learning_rate": 4.331256281407035e-06,
"loss": 0.0005,
"step": 56925
},
{
"epoch": 28.69,
"grad_norm": 0.3201751410961151,
"learning_rate": 4.3287437185929656e-06,
"loss": 0.0009,
"step": 56950
},
{
"epoch": 28.7,
"grad_norm": 0.13656805455684662,
"learning_rate": 4.326231155778895e-06,
"loss": 0.0008,
"step": 56975
},
{
"epoch": 28.72,
"grad_norm": 0.4347066581249237,
"learning_rate": 4.323718592964824e-06,
"loss": 0.0006,
"step": 57000
},
{
"epoch": 28.72,
"eval_loss": 0.3676753342151642,
"eval_runtime": 647.1207,
"eval_samples_per_second": 2.177,
"eval_steps_per_second": 2.177,
"eval_wer": 22.73953649256313,
"step": 57000
},
{
"epoch": 28.73,
"grad_norm": 0.6821175217628479,
"learning_rate": 4.321206030150754e-06,
"loss": 0.0008,
"step": 57025
},
{
"epoch": 28.74,
"grad_norm": 0.8675858974456787,
"learning_rate": 4.318693467336684e-06,
"loss": 0.0007,
"step": 57050
},
{
"epoch": 28.75,
"grad_norm": 0.7905115485191345,
"learning_rate": 4.316180904522613e-06,
"loss": 0.0009,
"step": 57075
},
{
"epoch": 28.77,
"grad_norm": 0.3380034565925598,
"learning_rate": 4.313668341708543e-06,
"loss": 0.0008,
"step": 57100
},
{
"epoch": 28.78,
"grad_norm": 0.5678602457046509,
"learning_rate": 4.3111557788944725e-06,
"loss": 0.0007,
"step": 57125
},
{
"epoch": 28.79,
"grad_norm": 0.4038754105567932,
"learning_rate": 4.308643216080403e-06,
"loss": 0.0006,
"step": 57150
},
{
"epoch": 28.8,
"grad_norm": 0.8682851195335388,
"learning_rate": 4.306130653266332e-06,
"loss": 0.0005,
"step": 57175
},
{
"epoch": 28.82,
"grad_norm": 0.2736469507217407,
"learning_rate": 4.303618090452262e-06,
"loss": 0.0008,
"step": 57200
},
{
"epoch": 28.83,
"grad_norm": 0.056282587349414825,
"learning_rate": 4.3011055276381915e-06,
"loss": 0.0005,
"step": 57225
},
{
"epoch": 28.84,
"grad_norm": 0.7956998944282532,
"learning_rate": 4.298592964824121e-06,
"loss": 0.0006,
"step": 57250
},
{
"epoch": 28.85,
"grad_norm": 0.39681777358055115,
"learning_rate": 4.29608040201005e-06,
"loss": 0.0005,
"step": 57275
},
{
"epoch": 28.87,
"grad_norm": 1.7592027187347412,
"learning_rate": 4.29356783919598e-06,
"loss": 0.0005,
"step": 57300
},
{
"epoch": 28.88,
"grad_norm": 0.40772297978401184,
"learning_rate": 4.29105527638191e-06,
"loss": 0.0003,
"step": 57325
},
{
"epoch": 28.89,
"grad_norm": 0.3130989074707031,
"learning_rate": 4.28854271356784e-06,
"loss": 0.0003,
"step": 57350
},
{
"epoch": 28.9,
"grad_norm": 0.22975189983844757,
"learning_rate": 4.286030150753769e-06,
"loss": 0.0004,
"step": 57375
},
{
"epoch": 28.92,
"grad_norm": 0.8328010439872742,
"learning_rate": 4.283517587939699e-06,
"loss": 0.0006,
"step": 57400
},
{
"epoch": 28.93,
"grad_norm": 0.41630086302757263,
"learning_rate": 4.281005025125629e-06,
"loss": 0.0005,
"step": 57425
},
{
"epoch": 28.94,
"grad_norm": 0.48607903718948364,
"learning_rate": 4.278492462311558e-06,
"loss": 0.0005,
"step": 57450
},
{
"epoch": 28.95,
"grad_norm": 0.3274035155773163,
"learning_rate": 4.275979899497488e-06,
"loss": 0.0005,
"step": 57475
},
{
"epoch": 28.97,
"grad_norm": 0.2861725687980652,
"learning_rate": 4.2734673366834174e-06,
"loss": 0.0006,
"step": 57500
},
{
"epoch": 28.98,
"grad_norm": 0.6594395041465759,
"learning_rate": 4.270954773869347e-06,
"loss": 0.0005,
"step": 57525
},
{
"epoch": 28.99,
"grad_norm": 0.2788364887237549,
"learning_rate": 4.268442211055277e-06,
"loss": 0.0003,
"step": 57550
},
{
"epoch": 29.01,
"grad_norm": 0.4915536344051361,
"learning_rate": 4.265929648241206e-06,
"loss": 0.0004,
"step": 57575
},
{
"epoch": 29.02,
"grad_norm": 0.14363212883472443,
"learning_rate": 4.2634170854271364e-06,
"loss": 0.0003,
"step": 57600
},
{
"epoch": 29.03,
"grad_norm": 0.7467771172523499,
"learning_rate": 4.260904522613066e-06,
"loss": 0.0005,
"step": 57625
},
{
"epoch": 29.04,
"grad_norm": 0.7711644172668457,
"learning_rate": 4.258391959798995e-06,
"loss": 0.0005,
"step": 57650
},
{
"epoch": 29.06,
"grad_norm": 0.3670036494731903,
"learning_rate": 4.255879396984925e-06,
"loss": 0.0004,
"step": 57675
},
{
"epoch": 29.07,
"grad_norm": 0.2002963125705719,
"learning_rate": 4.2533668341708546e-06,
"loss": 0.0005,
"step": 57700
},
{
"epoch": 29.08,
"grad_norm": 0.13585589826107025,
"learning_rate": 4.250854271356784e-06,
"loss": 0.0003,
"step": 57725
},
{
"epoch": 29.09,
"grad_norm": 0.4625987112522125,
"learning_rate": 4.248341708542714e-06,
"loss": 0.0003,
"step": 57750
},
{
"epoch": 29.11,
"grad_norm": 0.43475794792175293,
"learning_rate": 4.245829145728643e-06,
"loss": 0.0003,
"step": 57775
},
{
"epoch": 29.12,
"grad_norm": 0.46216335892677307,
"learning_rate": 4.2433165829145736e-06,
"loss": 0.0004,
"step": 57800
},
{
"epoch": 29.13,
"grad_norm": 0.10933476686477661,
"learning_rate": 4.240804020100503e-06,
"loss": 0.0003,
"step": 57825
},
{
"epoch": 29.14,
"grad_norm": 0.3376076817512512,
"learning_rate": 4.238291457286432e-06,
"loss": 0.0005,
"step": 57850
},
{
"epoch": 29.16,
"grad_norm": 0.09248408675193787,
"learning_rate": 4.235778894472362e-06,
"loss": 0.0004,
"step": 57875
},
{
"epoch": 29.17,
"grad_norm": 0.165025994181633,
"learning_rate": 4.233266331658292e-06,
"loss": 0.0004,
"step": 57900
},
{
"epoch": 29.18,
"grad_norm": 0.20500481128692627,
"learning_rate": 4.230753768844221e-06,
"loss": 0.0002,
"step": 57925
},
{
"epoch": 29.19,
"grad_norm": 1.7176584005355835,
"learning_rate": 4.228241206030151e-06,
"loss": 0.0003,
"step": 57950
},
{
"epoch": 29.21,
"grad_norm": 0.04315100982785225,
"learning_rate": 4.225829145728644e-06,
"loss": 0.0004,
"step": 57975
},
{
"epoch": 29.22,
"grad_norm": 0.7562754154205322,
"learning_rate": 4.223316582914574e-06,
"loss": 0.0005,
"step": 58000
},
{
"epoch": 29.22,
"eval_loss": 0.36634162068367004,
"eval_runtime": 646.3446,
"eval_samples_per_second": 2.18,
"eval_steps_per_second": 2.18,
"eval_wer": 22.877896921480456,
"step": 58000
},
{
"epoch": 29.23,
"grad_norm": 0.18568870425224304,
"learning_rate": 4.220804020100503e-06,
"loss": 0.0003,
"step": 58025
},
{
"epoch": 29.24,
"grad_norm": 0.02903875522315502,
"learning_rate": 4.218291457286432e-06,
"loss": 0.0002,
"step": 58050
},
{
"epoch": 29.26,
"grad_norm": 0.5034275054931641,
"learning_rate": 4.215778894472362e-06,
"loss": 0.0003,
"step": 58075
},
{
"epoch": 29.27,
"grad_norm": 0.24400202929973602,
"learning_rate": 4.213266331658292e-06,
"loss": 0.0004,
"step": 58100
},
{
"epoch": 29.28,
"grad_norm": 0.43526986241340637,
"learning_rate": 4.210753768844221e-06,
"loss": 0.0004,
"step": 58125
},
{
"epoch": 29.29,
"grad_norm": 0.8774734735488892,
"learning_rate": 4.2082412060301505e-06,
"loss": 0.0003,
"step": 58150
},
{
"epoch": 29.31,
"grad_norm": 0.2758621275424957,
"learning_rate": 4.205728643216081e-06,
"loss": 0.0004,
"step": 58175
},
{
"epoch": 29.32,
"grad_norm": 0.09988962113857269,
"learning_rate": 4.203216080402011e-06,
"loss": 0.0002,
"step": 58200
},
{
"epoch": 29.33,
"grad_norm": 0.04445599764585495,
"learning_rate": 4.20070351758794e-06,
"loss": 0.0002,
"step": 58225
},
{
"epoch": 29.35,
"grad_norm": 0.6248559355735779,
"learning_rate": 4.1981909547738695e-06,
"loss": 0.0002,
"step": 58250
},
{
"epoch": 29.36,
"grad_norm": 0.5372090339660645,
"learning_rate": 4.1956783919598e-06,
"loss": 0.0003,
"step": 58275
},
{
"epoch": 29.37,
"grad_norm": 0.05950794741511345,
"learning_rate": 4.193165829145729e-06,
"loss": 0.0003,
"step": 58300
},
{
"epoch": 29.38,
"grad_norm": 0.2952395975589752,
"learning_rate": 4.190653266331658e-06,
"loss": 0.0005,
"step": 58325
},
{
"epoch": 29.4,
"grad_norm": 1.0344479084014893,
"learning_rate": 4.188140703517588e-06,
"loss": 0.0005,
"step": 58350
},
{
"epoch": 29.41,
"grad_norm": 0.07533106207847595,
"learning_rate": 4.185628140703518e-06,
"loss": 0.0008,
"step": 58375
},
{
"epoch": 29.42,
"grad_norm": 0.19977255165576935,
"learning_rate": 4.183115577889448e-06,
"loss": 0.0005,
"step": 58400
},
{
"epoch": 29.43,
"grad_norm": 0.21329541504383087,
"learning_rate": 4.180603015075377e-06,
"loss": 0.0003,
"step": 58425
},
{
"epoch": 29.45,
"grad_norm": 0.2345517873764038,
"learning_rate": 4.178090452261307e-06,
"loss": 0.0006,
"step": 58450
},
{
"epoch": 29.46,
"grad_norm": 0.028459738940000534,
"learning_rate": 4.175577889447237e-06,
"loss": 0.0004,
"step": 58475
},
{
"epoch": 29.47,
"grad_norm": 0.8179320693016052,
"learning_rate": 4.173065326633166e-06,
"loss": 0.0005,
"step": 58500
},
{
"epoch": 29.48,
"grad_norm": 0.8002499341964722,
"learning_rate": 4.1705527638190955e-06,
"loss": 0.0004,
"step": 58525
},
{
"epoch": 29.5,
"grad_norm": 0.18765227496623993,
"learning_rate": 4.168040201005026e-06,
"loss": 0.0004,
"step": 58550
},
{
"epoch": 29.51,
"grad_norm": 0.20206257700920105,
"learning_rate": 4.165527638190955e-06,
"loss": 0.0008,
"step": 58575
},
{
"epoch": 29.52,
"grad_norm": 0.7327660322189331,
"learning_rate": 4.163015075376885e-06,
"loss": 0.0005,
"step": 58600
},
{
"epoch": 29.53,
"grad_norm": 1.6903936862945557,
"learning_rate": 4.1605025125628145e-06,
"loss": 0.0007,
"step": 58625
},
{
"epoch": 29.55,
"grad_norm": 0.523650050163269,
"learning_rate": 4.157989949748744e-06,
"loss": 0.0004,
"step": 58650
},
{
"epoch": 29.56,
"grad_norm": 0.5737274289131165,
"learning_rate": 4.155477386934674e-06,
"loss": 0.0006,
"step": 58675
},
{
"epoch": 29.57,
"grad_norm": 0.43866389989852905,
"learning_rate": 4.152964824120603e-06,
"loss": 0.0005,
"step": 58700
},
{
"epoch": 29.58,
"grad_norm": 0.3836389183998108,
"learning_rate": 4.150452261306533e-06,
"loss": 0.0006,
"step": 58725
},
{
"epoch": 29.6,
"grad_norm": 0.2658005654811859,
"learning_rate": 4.147939698492463e-06,
"loss": 0.0005,
"step": 58750
},
{
"epoch": 29.61,
"grad_norm": 0.9957432150840759,
"learning_rate": 4.145427135678392e-06,
"loss": 0.0006,
"step": 58775
},
{
"epoch": 29.62,
"grad_norm": 0.4388526380062103,
"learning_rate": 4.142914572864322e-06,
"loss": 0.0005,
"step": 58800
},
{
"epoch": 29.63,
"grad_norm": 0.48335981369018555,
"learning_rate": 4.140402010050252e-06,
"loss": 0.0008,
"step": 58825
},
{
"epoch": 29.65,
"grad_norm": 0.6199666261672974,
"learning_rate": 4.137889447236182e-06,
"loss": 0.0006,
"step": 58850
},
{
"epoch": 29.66,
"grad_norm": 0.406076043844223,
"learning_rate": 4.135376884422111e-06,
"loss": 0.0006,
"step": 58875
},
{
"epoch": 29.67,
"grad_norm": 0.1077524796128273,
"learning_rate": 4.13286432160804e-06,
"loss": 0.0005,
"step": 58900
},
{
"epoch": 29.69,
"grad_norm": 0.3811541795730591,
"learning_rate": 4.13035175879397e-06,
"loss": 0.0007,
"step": 58925
},
{
"epoch": 29.7,
"grad_norm": 1.440758228302002,
"learning_rate": 4.1278391959799e-06,
"loss": 0.0006,
"step": 58950
},
{
"epoch": 29.71,
"grad_norm": 0.47515958547592163,
"learning_rate": 4.125326633165829e-06,
"loss": 0.0004,
"step": 58975
},
{
"epoch": 29.72,
"grad_norm": 0.956475019454956,
"learning_rate": 4.122814070351759e-06,
"loss": 0.0006,
"step": 59000
},
{
"epoch": 29.72,
"eval_loss": 0.37018460035324097,
"eval_runtime": 650.5122,
"eval_samples_per_second": 2.166,
"eval_steps_per_second": 2.166,
"eval_wer": 23.348322379799377,
"step": 59000
},
{
"epoch": 29.74,
"grad_norm": 1.6364458799362183,
"learning_rate": 4.120301507537689e-06,
"loss": 0.0008,
"step": 59025
},
{
"epoch": 29.75,
"grad_norm": 0.5471516251564026,
"learning_rate": 4.117788944723619e-06,
"loss": 0.0007,
"step": 59050
},
{
"epoch": 29.76,
"grad_norm": 0.9236280918121338,
"learning_rate": 4.115276381909548e-06,
"loss": 0.0007,
"step": 59075
},
{
"epoch": 29.77,
"grad_norm": 0.1981869339942932,
"learning_rate": 4.1127638190954775e-06,
"loss": 0.0007,
"step": 59100
},
{
"epoch": 29.79,
"grad_norm": 0.7604771852493286,
"learning_rate": 4.110251256281408e-06,
"loss": 0.0004,
"step": 59125
},
{
"epoch": 29.8,
"grad_norm": 0.5981962084770203,
"learning_rate": 4.107738693467337e-06,
"loss": 0.0007,
"step": 59150
},
{
"epoch": 29.81,
"grad_norm": 0.9889633655548096,
"learning_rate": 4.105226130653266e-06,
"loss": 0.0004,
"step": 59175
},
{
"epoch": 29.82,
"grad_norm": 0.33064815402030945,
"learning_rate": 4.102713567839196e-06,
"loss": 0.0007,
"step": 59200
},
{
"epoch": 29.84,
"grad_norm": 0.3253353238105774,
"learning_rate": 4.100201005025126e-06,
"loss": 0.0005,
"step": 59225
},
{
"epoch": 29.85,
"grad_norm": 1.7824808359146118,
"learning_rate": 4.097688442211056e-06,
"loss": 0.0006,
"step": 59250
},
{
"epoch": 29.86,
"grad_norm": 0.1777506172657013,
"learning_rate": 4.095175879396985e-06,
"loss": 0.0004,
"step": 59275
},
{
"epoch": 29.87,
"grad_norm": 0.09130828827619553,
"learning_rate": 4.092663316582915e-06,
"loss": 0.0005,
"step": 59300
},
{
"epoch": 29.89,
"grad_norm": 0.26124000549316406,
"learning_rate": 4.090150753768845e-06,
"loss": 0.0004,
"step": 59325
},
{
"epoch": 29.9,
"grad_norm": 0.2676754295825958,
"learning_rate": 4.087638190954774e-06,
"loss": 0.0004,
"step": 59350
},
{
"epoch": 29.91,
"grad_norm": 0.7923325896263123,
"learning_rate": 4.0851256281407035e-06,
"loss": 0.0004,
"step": 59375
},
{
"epoch": 29.92,
"grad_norm": 0.06875083595514297,
"learning_rate": 4.082613065326634e-06,
"loss": 0.0007,
"step": 59400
},
{
"epoch": 29.94,
"grad_norm": 1.1524797677993774,
"learning_rate": 4.080100502512564e-06,
"loss": 0.0006,
"step": 59425
},
{
"epoch": 29.95,
"grad_norm": 1.1805469989776611,
"learning_rate": 4.077587939698493e-06,
"loss": 0.0006,
"step": 59450
},
{
"epoch": 29.96,
"grad_norm": 0.523131251335144,
"learning_rate": 4.0750753768844225e-06,
"loss": 0.0005,
"step": 59475
},
{
"epoch": 29.97,
"grad_norm": 0.9448018074035645,
"learning_rate": 4.072562814070352e-06,
"loss": 0.0004,
"step": 59500
},
{
"epoch": 29.99,
"grad_norm": 1.3744333982467651,
"learning_rate": 4.070050251256282e-06,
"loss": 0.0007,
"step": 59525
},
{
"epoch": 30.0,
"grad_norm": 0.2589998245239258,
"learning_rate": 4.067537688442211e-06,
"loss": 0.0006,
"step": 59550
},
{
"epoch": 30.01,
"grad_norm": 0.44579869508743286,
"learning_rate": 4.065025125628141e-06,
"loss": 0.0004,
"step": 59575
},
{
"epoch": 30.03,
"grad_norm": 0.35231125354766846,
"learning_rate": 4.062512562814071e-06,
"loss": 0.0003,
"step": 59600
},
{
"epoch": 30.04,
"grad_norm": 1.131148099899292,
"learning_rate": 4.060000000000001e-06,
"loss": 0.0005,
"step": 59625
},
{
"epoch": 30.05,
"grad_norm": 0.154410719871521,
"learning_rate": 4.05748743718593e-06,
"loss": 0.0005,
"step": 59650
},
{
"epoch": 30.06,
"grad_norm": 0.5527713894844055,
"learning_rate": 4.05497487437186e-06,
"loss": 0.0006,
"step": 59675
},
{
"epoch": 30.08,
"grad_norm": 0.12832514941692352,
"learning_rate": 4.05246231155779e-06,
"loss": 0.0003,
"step": 59700
},
{
"epoch": 30.09,
"grad_norm": 0.059667546302080154,
"learning_rate": 4.049949748743719e-06,
"loss": 0.0006,
"step": 59725
},
{
"epoch": 30.1,
"grad_norm": 0.9049032330513,
"learning_rate": 4.047437185929648e-06,
"loss": 0.0004,
"step": 59750
},
{
"epoch": 30.11,
"grad_norm": 0.42407527565956116,
"learning_rate": 4.044924623115578e-06,
"loss": 0.0005,
"step": 59775
},
{
"epoch": 30.13,
"grad_norm": 0.2845335304737091,
"learning_rate": 4.042412060301508e-06,
"loss": 0.0002,
"step": 59800
},
{
"epoch": 30.14,
"grad_norm": 0.2934589087963104,
"learning_rate": 4.039899497487437e-06,
"loss": 0.0003,
"step": 59825
},
{
"epoch": 30.15,
"grad_norm": 0.12243688106536865,
"learning_rate": 4.037386934673367e-06,
"loss": 0.0002,
"step": 59850
},
{
"epoch": 30.16,
"grad_norm": 0.03510167449712753,
"learning_rate": 4.034874371859297e-06,
"loss": 0.0002,
"step": 59875
},
{
"epoch": 30.18,
"grad_norm": 0.10497930645942688,
"learning_rate": 4.032361809045227e-06,
"loss": 0.0004,
"step": 59900
},
{
"epoch": 30.19,
"grad_norm": 1.603574514389038,
"learning_rate": 4.029849246231156e-06,
"loss": 0.0003,
"step": 59925
},
{
"epoch": 30.2,
"grad_norm": 0.07143130153417587,
"learning_rate": 4.0273366834170855e-06,
"loss": 0.0003,
"step": 59950
},
{
"epoch": 30.21,
"grad_norm": 0.9120334386825562,
"learning_rate": 4.024824120603016e-06,
"loss": 0.0004,
"step": 59975
},
{
"epoch": 30.23,
"grad_norm": 0.6783850193023682,
"learning_rate": 4.022311557788945e-06,
"loss": 0.0003,
"step": 60000
},
{
"epoch": 30.23,
"eval_loss": 0.3732685446739197,
"eval_runtime": 646.3241,
"eval_samples_per_second": 2.18,
"eval_steps_per_second": 2.18,
"eval_wer": 22.490487720511933,
"step": 60000
},
{
"epoch": 30.24,
"grad_norm": 0.0949537456035614,
"learning_rate": 4.019798994974874e-06,
"loss": 0.0003,
"step": 60025
},
{
"epoch": 30.25,
"grad_norm": 0.08624821156263351,
"learning_rate": 4.0172864321608045e-06,
"loss": 0.0003,
"step": 60050
},
{
"epoch": 30.26,
"grad_norm": 0.613463819026947,
"learning_rate": 4.014874371859297e-06,
"loss": 0.0004,
"step": 60075
},
{
"epoch": 30.28,
"grad_norm": 1.7669380903244019,
"learning_rate": 4.012361809045226e-06,
"loss": 0.0004,
"step": 60100
},
{
"epoch": 30.29,
"grad_norm": 0.22005651891231537,
"learning_rate": 4.009849246231156e-06,
"loss": 0.0005,
"step": 60125
},
{
"epoch": 30.3,
"grad_norm": 0.448355108499527,
"learning_rate": 4.007336683417086e-06,
"loss": 0.0004,
"step": 60150
},
{
"epoch": 30.31,
"grad_norm": 0.3999320864677429,
"learning_rate": 4.004824120603015e-06,
"loss": 0.0004,
"step": 60175
},
{
"epoch": 30.33,
"grad_norm": 0.5650457143783569,
"learning_rate": 4.002311557788945e-06,
"loss": 0.0003,
"step": 60200
},
{
"epoch": 30.34,
"grad_norm": 0.3573535084724426,
"learning_rate": 3.9997989949748745e-06,
"loss": 0.0003,
"step": 60225
},
{
"epoch": 30.35,
"grad_norm": 0.04291848465800285,
"learning_rate": 3.997286432160805e-06,
"loss": 0.0004,
"step": 60250
},
{
"epoch": 30.37,
"grad_norm": 0.6211608052253723,
"learning_rate": 3.994773869346734e-06,
"loss": 0.0003,
"step": 60275
},
{
"epoch": 30.38,
"grad_norm": 0.09989487379789352,
"learning_rate": 3.992261306532663e-06,
"loss": 0.0003,
"step": 60300
},
{
"epoch": 30.39,
"grad_norm": 0.6828473210334778,
"learning_rate": 3.9897487437185935e-06,
"loss": 0.0005,
"step": 60325
},
{
"epoch": 30.4,
"grad_norm": 0.4750407338142395,
"learning_rate": 3.987236180904523e-06,
"loss": 0.0008,
"step": 60350
},
{
"epoch": 30.42,
"grad_norm": 0.12395069003105164,
"learning_rate": 3.984723618090452e-06,
"loss": 0.0006,
"step": 60375
},
{
"epoch": 30.43,
"grad_norm": 0.19850021600723267,
"learning_rate": 3.982211055276382e-06,
"loss": 0.0007,
"step": 60400
},
{
"epoch": 30.44,
"grad_norm": 1.2987253665924072,
"learning_rate": 3.979698492462312e-06,
"loss": 0.0005,
"step": 60425
},
{
"epoch": 30.45,
"grad_norm": 0.12956413626670837,
"learning_rate": 3.977185929648242e-06,
"loss": 0.0005,
"step": 60450
},
{
"epoch": 30.47,
"grad_norm": 0.8285446166992188,
"learning_rate": 3.974673366834171e-06,
"loss": 0.0004,
"step": 60475
},
{
"epoch": 30.48,
"grad_norm": 0.9099289774894714,
"learning_rate": 3.972160804020101e-06,
"loss": 0.0003,
"step": 60500
},
{
"epoch": 30.49,
"grad_norm": 1.001538634300232,
"learning_rate": 3.969648241206031e-06,
"loss": 0.0004,
"step": 60525
},
{
"epoch": 30.5,
"grad_norm": 0.4828273057937622,
"learning_rate": 3.96713567839196e-06,
"loss": 0.0003,
"step": 60550
},
{
"epoch": 30.52,
"grad_norm": 1.4622454643249512,
"learning_rate": 3.964623115577889e-06,
"loss": 0.0005,
"step": 60575
},
{
"epoch": 30.53,
"grad_norm": 0.2010238915681839,
"learning_rate": 3.9621105527638195e-06,
"loss": 0.0004,
"step": 60600
},
{
"epoch": 30.54,
"grad_norm": 0.11599469929933548,
"learning_rate": 3.959597989949749e-06,
"loss": 0.0007,
"step": 60625
},
{
"epoch": 30.55,
"grad_norm": 0.18616123497486115,
"learning_rate": 3.957085427135678e-06,
"loss": 0.0004,
"step": 60650
},
{
"epoch": 30.57,
"grad_norm": 0.4725811779499054,
"learning_rate": 3.954572864321608e-06,
"loss": 0.0005,
"step": 60675
},
{
"epoch": 30.58,
"grad_norm": 1.0674340724945068,
"learning_rate": 3.9520603015075385e-06,
"loss": 0.0006,
"step": 60700
},
{
"epoch": 30.59,
"grad_norm": 0.5848013758659363,
"learning_rate": 3.949547738693468e-06,
"loss": 0.0004,
"step": 60725
},
{
"epoch": 30.6,
"grad_norm": 0.26734229922294617,
"learning_rate": 3.947035175879397e-06,
"loss": 0.0006,
"step": 60750
},
{
"epoch": 30.62,
"grad_norm": 0.08038675040006638,
"learning_rate": 3.944522613065327e-06,
"loss": 0.0006,
"step": 60775
},
{
"epoch": 30.63,
"grad_norm": 0.4599511921405792,
"learning_rate": 3.942010050251257e-06,
"loss": 0.0005,
"step": 60800
},
{
"epoch": 30.64,
"grad_norm": 1.278779149055481,
"learning_rate": 3.939497487437186e-06,
"loss": 0.0005,
"step": 60825
},
{
"epoch": 30.65,
"grad_norm": 2.023319959640503,
"learning_rate": 3.936984924623115e-06,
"loss": 0.0006,
"step": 60850
},
{
"epoch": 30.67,
"grad_norm": 0.48264050483703613,
"learning_rate": 3.934472361809045e-06,
"loss": 0.0004,
"step": 60875
},
{
"epoch": 30.68,
"grad_norm": 0.313340961933136,
"learning_rate": 3.931959798994976e-06,
"loss": 0.0006,
"step": 60900
},
{
"epoch": 30.69,
"grad_norm": 0.23155102133750916,
"learning_rate": 3.929447236180905e-06,
"loss": 0.0005,
"step": 60925
},
{
"epoch": 30.71,
"grad_norm": 0.7064458727836609,
"learning_rate": 3.926934673366834e-06,
"loss": 0.0006,
"step": 60950
},
{
"epoch": 30.72,
"grad_norm": 0.0884753167629242,
"learning_rate": 3.924422110552764e-06,
"loss": 0.0004,
"step": 60975
},
{
"epoch": 30.73,
"grad_norm": 0.06730210036039352,
"learning_rate": 3.921909547738694e-06,
"loss": 0.0005,
"step": 61000
},
{
"epoch": 30.73,
"eval_loss": 0.37581801414489746,
"eval_runtime": 645.7901,
"eval_samples_per_second": 2.182,
"eval_steps_per_second": 2.182,
"eval_wer": 22.80871670702179,
"step": 61000
},
{
"epoch": 30.74,
"grad_norm": 0.38679373264312744,
"learning_rate": 3.919396984924623e-06,
"loss": 0.0006,
"step": 61025
},
{
"epoch": 30.76,
"grad_norm": 1.362016201019287,
"learning_rate": 3.916884422110553e-06,
"loss": 0.0005,
"step": 61050
},
{
"epoch": 30.77,
"grad_norm": 0.26858440041542053,
"learning_rate": 3.914371859296483e-06,
"loss": 0.0005,
"step": 61075
},
{
"epoch": 30.78,
"grad_norm": 0.19505997002124786,
"learning_rate": 3.911859296482413e-06,
"loss": 0.0003,
"step": 61100
},
{
"epoch": 30.79,
"grad_norm": 0.0629916712641716,
"learning_rate": 3.909346733668342e-06,
"loss": 0.0006,
"step": 61125
},
{
"epoch": 30.81,
"grad_norm": 0.16181036829948425,
"learning_rate": 3.906834170854271e-06,
"loss": 0.0004,
"step": 61150
},
{
"epoch": 30.82,
"grad_norm": 0.8519290685653687,
"learning_rate": 3.9043216080402015e-06,
"loss": 0.0004,
"step": 61175
},
{
"epoch": 30.83,
"grad_norm": 0.3916480541229248,
"learning_rate": 3.901809045226131e-06,
"loss": 0.0003,
"step": 61200
},
{
"epoch": 30.84,
"grad_norm": 0.1823578029870987,
"learning_rate": 3.89929648241206e-06,
"loss": 0.0005,
"step": 61225
},
{
"epoch": 30.86,
"grad_norm": 0.525839626789093,
"learning_rate": 3.89678391959799e-06,
"loss": 0.0006,
"step": 61250
},
{
"epoch": 30.87,
"grad_norm": 0.6914676427841187,
"learning_rate": 3.89427135678392e-06,
"loss": 0.0003,
"step": 61275
},
{
"epoch": 30.88,
"grad_norm": 1.1129640340805054,
"learning_rate": 3.89175879396985e-06,
"loss": 0.0005,
"step": 61300
},
{
"epoch": 30.89,
"grad_norm": 0.04879957437515259,
"learning_rate": 3.889246231155779e-06,
"loss": 0.0006,
"step": 61325
},
{
"epoch": 30.91,
"grad_norm": 1.1155563592910767,
"learning_rate": 3.886733668341709e-06,
"loss": 0.0006,
"step": 61350
},
{
"epoch": 30.92,
"grad_norm": 0.35383549332618713,
"learning_rate": 3.884221105527639e-06,
"loss": 0.0004,
"step": 61375
},
{
"epoch": 30.93,
"grad_norm": 0.38548916578292847,
"learning_rate": 3.881708542713568e-06,
"loss": 0.0004,
"step": 61400
},
{
"epoch": 30.94,
"grad_norm": 0.1265828013420105,
"learning_rate": 3.879195979899497e-06,
"loss": 0.0004,
"step": 61425
},
{
"epoch": 30.96,
"grad_norm": 0.2077447772026062,
"learning_rate": 3.8766834170854275e-06,
"loss": 0.0004,
"step": 61450
},
{
"epoch": 30.97,
"grad_norm": 0.43719515204429626,
"learning_rate": 3.874170854271357e-06,
"loss": 0.0004,
"step": 61475
},
{
"epoch": 30.98,
"grad_norm": 0.25596338510513306,
"learning_rate": 3.871658291457287e-06,
"loss": 0.0004,
"step": 61500
},
{
"epoch": 30.99,
"grad_norm": 0.24157127737998962,
"learning_rate": 3.869145728643216e-06,
"loss": 0.0005,
"step": 61525
},
{
"epoch": 31.01,
"grad_norm": 0.2741214334964752,
"learning_rate": 3.8666331658291465e-06,
"loss": 0.0006,
"step": 61550
},
{
"epoch": 31.02,
"grad_norm": 0.15417811274528503,
"learning_rate": 3.864120603015076e-06,
"loss": 0.0004,
"step": 61575
},
{
"epoch": 31.03,
"grad_norm": 0.18006564676761627,
"learning_rate": 3.861608040201005e-06,
"loss": 0.0004,
"step": 61600
},
{
"epoch": 31.05,
"grad_norm": 0.6540391445159912,
"learning_rate": 3.859095477386935e-06,
"loss": 0.0004,
"step": 61625
},
{
"epoch": 31.06,
"grad_norm": 0.21219852566719055,
"learning_rate": 3.856683417085428e-06,
"loss": 0.0004,
"step": 61650
},
{
"epoch": 31.07,
"grad_norm": 0.12796539068222046,
"learning_rate": 3.854170854271357e-06,
"loss": 0.0002,
"step": 61675
},
{
"epoch": 31.08,
"grad_norm": 0.03786884620785713,
"learning_rate": 3.851658291457287e-06,
"loss": 0.0002,
"step": 61700
},
{
"epoch": 31.1,
"grad_norm": 0.10505225509405136,
"learning_rate": 3.8491457286432165e-06,
"loss": 0.0002,
"step": 61725
},
{
"epoch": 31.11,
"grad_norm": 0.2574862837791443,
"learning_rate": 3.846633165829146e-06,
"loss": 0.0002,
"step": 61750
},
{
"epoch": 31.12,
"grad_norm": 0.09054882079362869,
"learning_rate": 3.844120603015076e-06,
"loss": 0.0002,
"step": 61775
},
{
"epoch": 31.13,
"grad_norm": 0.25638747215270996,
"learning_rate": 3.841608040201005e-06,
"loss": 0.0004,
"step": 61800
},
{
"epoch": 31.15,
"grad_norm": 0.5020123720169067,
"learning_rate": 3.839095477386935e-06,
"loss": 0.0004,
"step": 61825
},
{
"epoch": 31.16,
"grad_norm": 0.1703236848115921,
"learning_rate": 3.836582914572865e-06,
"loss": 0.0003,
"step": 61850
},
{
"epoch": 31.17,
"grad_norm": 0.22640874981880188,
"learning_rate": 3.834070351758794e-06,
"loss": 0.0003,
"step": 61875
},
{
"epoch": 31.18,
"grad_norm": 0.21768644452095032,
"learning_rate": 3.831557788944724e-06,
"loss": 0.0002,
"step": 61900
},
{
"epoch": 31.2,
"grad_norm": 0.32308635115623474,
"learning_rate": 3.829045226130654e-06,
"loss": 0.0003,
"step": 61925
},
{
"epoch": 31.21,
"grad_norm": 0.20229199528694153,
"learning_rate": 3.826532663316583e-06,
"loss": 0.0004,
"step": 61950
},
{
"epoch": 31.22,
"grad_norm": 0.10681883990764618,
"learning_rate": 3.824020100502513e-06,
"loss": 0.0005,
"step": 61975
},
{
"epoch": 31.23,
"grad_norm": 0.28597140312194824,
"learning_rate": 3.8215075376884424e-06,
"loss": 0.0002,
"step": 62000
},
{
"epoch": 31.23,
"eval_loss": 0.37976065278053284,
"eval_runtime": 653.558,
"eval_samples_per_second": 2.156,
"eval_steps_per_second": 2.156,
"eval_wer": 23.037011414735385,
"step": 62000
},
{
"epoch": 31.25,
"grad_norm": 0.4050130248069763,
"learning_rate": 3.818994974874372e-06,
"loss": 0.0002,
"step": 62025
},
{
"epoch": 31.26,
"grad_norm": 0.29295334219932556,
"learning_rate": 3.816482412060302e-06,
"loss": 0.0002,
"step": 62050
},
{
"epoch": 31.27,
"grad_norm": 0.05594494193792343,
"learning_rate": 3.8139698492462312e-06,
"loss": 0.0002,
"step": 62075
},
{
"epoch": 31.28,
"grad_norm": 0.06369101256132126,
"learning_rate": 3.811457286432161e-06,
"loss": 0.0002,
"step": 62100
},
{
"epoch": 31.3,
"grad_norm": 0.04477281868457794,
"learning_rate": 3.808944723618091e-06,
"loss": 0.0001,
"step": 62125
},
{
"epoch": 31.31,
"grad_norm": 0.09269160777330399,
"learning_rate": 3.8064321608040205e-06,
"loss": 0.0002,
"step": 62150
},
{
"epoch": 31.32,
"grad_norm": 0.5558028817176819,
"learning_rate": 3.8039195979899502e-06,
"loss": 0.0002,
"step": 62175
},
{
"epoch": 31.34,
"grad_norm": 0.03976639732718468,
"learning_rate": 3.8014070351758796e-06,
"loss": 0.0002,
"step": 62200
},
{
"epoch": 31.35,
"grad_norm": 0.10835079848766327,
"learning_rate": 3.7988944723618093e-06,
"loss": 0.0002,
"step": 62225
},
{
"epoch": 31.36,
"grad_norm": 1.0740280151367188,
"learning_rate": 3.796381909547739e-06,
"loss": 0.0002,
"step": 62250
},
{
"epoch": 31.37,
"grad_norm": 0.20788809657096863,
"learning_rate": 3.7938693467336684e-06,
"loss": 0.0003,
"step": 62275
},
{
"epoch": 31.39,
"grad_norm": 0.331663578748703,
"learning_rate": 3.791356783919598e-06,
"loss": 0.0005,
"step": 62300
},
{
"epoch": 31.4,
"grad_norm": 0.09669731557369232,
"learning_rate": 3.7888442211055283e-06,
"loss": 0.0002,
"step": 62325
},
{
"epoch": 31.41,
"grad_norm": 0.12564200162887573,
"learning_rate": 3.7863316582914576e-06,
"loss": 0.0002,
"step": 62350
},
{
"epoch": 31.42,
"grad_norm": 0.16016307473182678,
"learning_rate": 3.7838190954773874e-06,
"loss": 0.0003,
"step": 62375
},
{
"epoch": 31.44,
"grad_norm": 0.09345220774412155,
"learning_rate": 3.781306532663317e-06,
"loss": 0.0003,
"step": 62400
},
{
"epoch": 31.45,
"grad_norm": 1.2475889921188354,
"learning_rate": 3.7787939698492464e-06,
"loss": 0.0002,
"step": 62425
},
{
"epoch": 31.46,
"grad_norm": 0.4350406229496002,
"learning_rate": 3.776281407035176e-06,
"loss": 0.0003,
"step": 62450
},
{
"epoch": 31.47,
"grad_norm": 0.13535748422145844,
"learning_rate": 3.7737688442211055e-06,
"loss": 0.0002,
"step": 62475
},
{
"epoch": 31.49,
"grad_norm": 0.34315505623817444,
"learning_rate": 3.7712562814070352e-06,
"loss": 0.0004,
"step": 62500
},
{
"epoch": 31.5,
"grad_norm": 1.264566421508789,
"learning_rate": 3.7687437185929654e-06,
"loss": 0.0004,
"step": 62525
},
{
"epoch": 31.51,
"grad_norm": 0.1144946962594986,
"learning_rate": 3.766231155778895e-06,
"loss": 0.0004,
"step": 62550
},
{
"epoch": 31.52,
"grad_norm": 0.1550832986831665,
"learning_rate": 3.7637185929648245e-06,
"loss": 0.0007,
"step": 62575
},
{
"epoch": 31.54,
"grad_norm": 1.2980326414108276,
"learning_rate": 3.7612060301507542e-06,
"loss": 0.0006,
"step": 62600
},
{
"epoch": 31.55,
"grad_norm": 0.5655810832977295,
"learning_rate": 3.7586934673366836e-06,
"loss": 0.0004,
"step": 62625
},
{
"epoch": 31.56,
"grad_norm": 1.1469320058822632,
"learning_rate": 3.7561809045226133e-06,
"loss": 0.0003,
"step": 62650
},
{
"epoch": 31.57,
"grad_norm": 0.10017550736665726,
"learning_rate": 3.753668341708543e-06,
"loss": 0.0003,
"step": 62675
},
{
"epoch": 31.59,
"grad_norm": 0.0341104120016098,
"learning_rate": 3.7511557788944724e-06,
"loss": 0.0005,
"step": 62700
},
{
"epoch": 31.6,
"grad_norm": 0.1871260702610016,
"learning_rate": 3.748643216080402e-06,
"loss": 0.0004,
"step": 62725
},
{
"epoch": 31.61,
"grad_norm": 0.21561290323734283,
"learning_rate": 3.7461306532663323e-06,
"loss": 0.0003,
"step": 62750
},
{
"epoch": 31.62,
"grad_norm": 0.1019524484872818,
"learning_rate": 3.7436180904522616e-06,
"loss": 0.0004,
"step": 62775
},
{
"epoch": 31.64,
"grad_norm": 0.6602054834365845,
"learning_rate": 3.7411055276381914e-06,
"loss": 0.0003,
"step": 62800
},
{
"epoch": 31.65,
"grad_norm": 0.06545541435480118,
"learning_rate": 3.738592964824121e-06,
"loss": 0.0006,
"step": 62825
},
{
"epoch": 31.66,
"grad_norm": 0.6719912886619568,
"learning_rate": 3.7360804020100504e-06,
"loss": 0.0005,
"step": 62850
},
{
"epoch": 31.68,
"grad_norm": 0.23920761048793793,
"learning_rate": 3.73356783919598e-06,
"loss": 0.0004,
"step": 62875
},
{
"epoch": 31.69,
"grad_norm": 0.36367443203926086,
"learning_rate": 3.7310552763819095e-06,
"loss": 0.0004,
"step": 62900
},
{
"epoch": 31.7,
"grad_norm": 0.1650412529706955,
"learning_rate": 3.7285427135678392e-06,
"loss": 0.0004,
"step": 62925
},
{
"epoch": 31.71,
"grad_norm": 0.22094900906085968,
"learning_rate": 3.7260301507537694e-06,
"loss": 0.0003,
"step": 62950
},
{
"epoch": 31.73,
"grad_norm": 0.06936586648225784,
"learning_rate": 3.723517587939699e-06,
"loss": 0.0003,
"step": 62975
},
{
"epoch": 31.74,
"grad_norm": 0.3549976348876953,
"learning_rate": 3.7210050251256285e-06,
"loss": 0.0003,
"step": 63000
},
{
"epoch": 31.74,
"eval_loss": 0.375165730714798,
"eval_runtime": 654.2645,
"eval_samples_per_second": 2.154,
"eval_steps_per_second": 2.154,
"eval_wer": 22.912487028709787,
"step": 63000
},
{
"epoch": 31.75,
"grad_norm": 0.06242278590798378,
"learning_rate": 3.7184924623115582e-06,
"loss": 0.0003,
"step": 63025
},
{
"epoch": 31.76,
"grad_norm": 0.029787451028823853,
"learning_rate": 3.7159798994974876e-06,
"loss": 0.0005,
"step": 63050
},
{
"epoch": 31.78,
"grad_norm": 0.12795278429985046,
"learning_rate": 3.7134673366834173e-06,
"loss": 0.0004,
"step": 63075
},
{
"epoch": 31.79,
"grad_norm": 0.10121666640043259,
"learning_rate": 3.710954773869347e-06,
"loss": 0.0006,
"step": 63100
},
{
"epoch": 31.8,
"grad_norm": 0.10796695947647095,
"learning_rate": 3.7084422110552764e-06,
"loss": 0.0004,
"step": 63125
},
{
"epoch": 31.81,
"grad_norm": 0.06431049853563309,
"learning_rate": 3.7059296482412065e-06,
"loss": 0.0006,
"step": 63150
},
{
"epoch": 31.83,
"grad_norm": 0.07762473076581955,
"learning_rate": 3.7034170854271363e-06,
"loss": 0.0003,
"step": 63175
},
{
"epoch": 31.84,
"grad_norm": 0.19045744836330414,
"learning_rate": 3.7009045226130656e-06,
"loss": 0.0003,
"step": 63200
},
{
"epoch": 31.85,
"grad_norm": 0.495317667722702,
"learning_rate": 3.6983919597989954e-06,
"loss": 0.0008,
"step": 63225
},
{
"epoch": 31.86,
"grad_norm": 0.44762441515922546,
"learning_rate": 3.695879396984925e-06,
"loss": 0.0003,
"step": 63250
},
{
"epoch": 31.88,
"grad_norm": 0.7618858218193054,
"learning_rate": 3.6933668341708544e-06,
"loss": 0.0007,
"step": 63275
},
{
"epoch": 31.89,
"grad_norm": 0.24009497463703156,
"learning_rate": 3.690854271356784e-06,
"loss": 0.0003,
"step": 63300
},
{
"epoch": 31.9,
"grad_norm": 0.22943466901779175,
"learning_rate": 3.6883417085427135e-06,
"loss": 0.0003,
"step": 63325
},
{
"epoch": 31.91,
"grad_norm": 0.06694609671831131,
"learning_rate": 3.6858291457286432e-06,
"loss": 0.0005,
"step": 63350
},
{
"epoch": 31.93,
"grad_norm": 0.21812428534030914,
"learning_rate": 3.6833165829145734e-06,
"loss": 0.0003,
"step": 63375
},
{
"epoch": 31.94,
"grad_norm": 0.28397077322006226,
"learning_rate": 3.680804020100503e-06,
"loss": 0.0002,
"step": 63400
},
{
"epoch": 31.95,
"grad_norm": 1.7889050245285034,
"learning_rate": 3.6782914572864325e-06,
"loss": 0.0005,
"step": 63425
},
{
"epoch": 31.96,
"grad_norm": 0.6206020712852478,
"learning_rate": 3.6757788944723622e-06,
"loss": 0.0004,
"step": 63450
},
{
"epoch": 31.98,
"grad_norm": 0.1217232197523117,
"learning_rate": 3.6732663316582916e-06,
"loss": 0.0004,
"step": 63475
},
{
"epoch": 31.99,
"grad_norm": 0.545870840549469,
"learning_rate": 3.6707537688442213e-06,
"loss": 0.0003,
"step": 63500
},
{
"epoch": 32.0,
"grad_norm": 0.3442104160785675,
"learning_rate": 3.668241206030151e-06,
"loss": 0.0004,
"step": 63525
},
{
"epoch": 32.02,
"grad_norm": 0.10596601665019989,
"learning_rate": 3.6657286432160804e-06,
"loss": 0.0002,
"step": 63550
},
{
"epoch": 32.03,
"grad_norm": 0.12127941101789474,
"learning_rate": 3.6632160804020105e-06,
"loss": 0.0002,
"step": 63575
},
{
"epoch": 32.04,
"grad_norm": 0.05940447375178337,
"learning_rate": 3.6607035175879403e-06,
"loss": 0.0004,
"step": 63600
},
{
"epoch": 32.05,
"grad_norm": 0.21149925887584686,
"learning_rate": 3.6581909547738696e-06,
"loss": 0.0001,
"step": 63625
},
{
"epoch": 32.07,
"grad_norm": 1.6386170387268066,
"learning_rate": 3.6556783919597994e-06,
"loss": 0.0002,
"step": 63650
},
{
"epoch": 32.08,
"grad_norm": 1.5819129943847656,
"learning_rate": 3.653165829145729e-06,
"loss": 0.0004,
"step": 63675
},
{
"epoch": 32.09,
"grad_norm": 0.5431501865386963,
"learning_rate": 3.6506532663316584e-06,
"loss": 0.0002,
"step": 63700
},
{
"epoch": 32.1,
"grad_norm": 0.059780336916446686,
"learning_rate": 3.648140703517588e-06,
"loss": 0.0004,
"step": 63725
},
{
"epoch": 32.12,
"grad_norm": 0.3245247006416321,
"learning_rate": 3.6456281407035175e-06,
"loss": 0.0002,
"step": 63750
},
{
"epoch": 32.13,
"grad_norm": 0.27430394291877747,
"learning_rate": 3.6431155778894477e-06,
"loss": 0.0002,
"step": 63775
},
{
"epoch": 32.14,
"grad_norm": 0.3374156653881073,
"learning_rate": 3.6406030150753774e-06,
"loss": 0.0003,
"step": 63800
},
{
"epoch": 32.15,
"grad_norm": 0.5428460836410522,
"learning_rate": 3.638090452261307e-06,
"loss": 0.0002,
"step": 63825
},
{
"epoch": 32.17,
"grad_norm": 0.02909483201801777,
"learning_rate": 3.6355778894472365e-06,
"loss": 0.0003,
"step": 63850
},
{
"epoch": 32.18,
"grad_norm": 0.05827973410487175,
"learning_rate": 3.6330653266331662e-06,
"loss": 0.0003,
"step": 63875
},
{
"epoch": 32.19,
"grad_norm": 0.5720663666725159,
"learning_rate": 3.6305527638190956e-06,
"loss": 0.0003,
"step": 63900
},
{
"epoch": 32.2,
"grad_norm": 0.09780346602201462,
"learning_rate": 3.6280402010050253e-06,
"loss": 0.0002,
"step": 63925
},
{
"epoch": 32.22,
"grad_norm": 0.061296120285987854,
"learning_rate": 3.625527638190955e-06,
"loss": 0.0002,
"step": 63950
},
{
"epoch": 32.23,
"grad_norm": 0.14354734122753143,
"learning_rate": 3.6230150753768844e-06,
"loss": 0.0003,
"step": 63975
},
{
"epoch": 32.24,
"grad_norm": 0.1332835853099823,
"learning_rate": 3.6205025125628145e-06,
"loss": 0.0003,
"step": 64000
},
{
"epoch": 32.24,
"eval_loss": 0.37142670154571533,
"eval_runtime": 650.391,
"eval_samples_per_second": 2.166,
"eval_steps_per_second": 2.166,
"eval_wer": 22.359045313040472,
"step": 64000
},
{
"epoch": 32.25,
"grad_norm": 0.06854286044836044,
"learning_rate": 3.6179899497487443e-06,
"loss": 0.0002,
"step": 64025
},
{
"epoch": 32.27,
"grad_norm": 1.202950358390808,
"learning_rate": 3.6154773869346736e-06,
"loss": 0.0002,
"step": 64050
},
{
"epoch": 32.28,
"grad_norm": 0.23010912537574768,
"learning_rate": 3.6129648241206034e-06,
"loss": 0.0002,
"step": 64075
},
{
"epoch": 32.29,
"grad_norm": 0.044724371284246445,
"learning_rate": 3.610452261306533e-06,
"loss": 0.0002,
"step": 64100
},
{
"epoch": 32.3,
"grad_norm": 0.8325422406196594,
"learning_rate": 3.6079396984924624e-06,
"loss": 0.0003,
"step": 64125
},
{
"epoch": 32.32,
"grad_norm": 0.28481706976890564,
"learning_rate": 3.605427135678392e-06,
"loss": 0.0002,
"step": 64150
},
{
"epoch": 32.33,
"grad_norm": 0.5033039450645447,
"learning_rate": 3.6029145728643215e-06,
"loss": 0.0002,
"step": 64175
},
{
"epoch": 32.34,
"grad_norm": 0.07772762328386307,
"learning_rate": 3.6004020100502517e-06,
"loss": 0.0003,
"step": 64200
},
{
"epoch": 32.36,
"grad_norm": 0.13087120652198792,
"learning_rate": 3.5978894472361814e-06,
"loss": 0.0003,
"step": 64225
},
{
"epoch": 32.37,
"grad_norm": 0.05218727886676788,
"learning_rate": 3.595376884422111e-06,
"loss": 0.0002,
"step": 64250
},
{
"epoch": 32.38,
"grad_norm": 0.09140007197856903,
"learning_rate": 3.5928643216080405e-06,
"loss": 0.0002,
"step": 64275
},
{
"epoch": 32.39,
"grad_norm": 0.2148062288761139,
"learning_rate": 3.5903517587939702e-06,
"loss": 0.0002,
"step": 64300
},
{
"epoch": 32.41,
"grad_norm": 0.3405974805355072,
"learning_rate": 3.5878391959798996e-06,
"loss": 0.0003,
"step": 64325
},
{
"epoch": 32.42,
"grad_norm": 0.6667714715003967,
"learning_rate": 3.5853266331658293e-06,
"loss": 0.0006,
"step": 64350
},
{
"epoch": 32.43,
"grad_norm": 0.07142732292413712,
"learning_rate": 3.582814070351759e-06,
"loss": 0.0005,
"step": 64375
},
{
"epoch": 32.44,
"grad_norm": 0.34170547127723694,
"learning_rate": 3.5803015075376884e-06,
"loss": 0.0003,
"step": 64400
},
{
"epoch": 32.46,
"grad_norm": 0.051813945174217224,
"learning_rate": 3.5777889447236185e-06,
"loss": 0.0002,
"step": 64425
},
{
"epoch": 32.47,
"grad_norm": 0.035720087587833405,
"learning_rate": 3.5752763819095483e-06,
"loss": 0.0003,
"step": 64450
},
{
"epoch": 32.48,
"grad_norm": 0.26840466260910034,
"learning_rate": 3.5727638190954776e-06,
"loss": 0.0002,
"step": 64475
},
{
"epoch": 32.49,
"grad_norm": 0.23383192718029022,
"learning_rate": 3.5702512562814074e-06,
"loss": 0.0006,
"step": 64500
},
{
"epoch": 32.51,
"grad_norm": 0.6334074139595032,
"learning_rate": 3.567738693467337e-06,
"loss": 0.0003,
"step": 64525
},
{
"epoch": 32.52,
"grad_norm": 0.14389067888259888,
"learning_rate": 3.5652261306532664e-06,
"loss": 0.0003,
"step": 64550
},
{
"epoch": 32.53,
"grad_norm": 0.1418575793504715,
"learning_rate": 3.562713567839196e-06,
"loss": 0.0005,
"step": 64575
},
{
"epoch": 32.54,
"grad_norm": 1.003585934638977,
"learning_rate": 3.5602010050251255e-06,
"loss": 0.0006,
"step": 64600
},
{
"epoch": 32.56,
"grad_norm": 0.30060967803001404,
"learning_rate": 3.5576884422110557e-06,
"loss": 0.0004,
"step": 64625
},
{
"epoch": 32.57,
"grad_norm": 0.2555244266986847,
"learning_rate": 3.5551758793969854e-06,
"loss": 0.0005,
"step": 64650
},
{
"epoch": 32.58,
"grad_norm": 0.5695326328277588,
"learning_rate": 3.552663316582915e-06,
"loss": 0.0005,
"step": 64675
},
{
"epoch": 32.59,
"grad_norm": 1.1138029098510742,
"learning_rate": 3.5501507537688445e-06,
"loss": 0.0005,
"step": 64700
},
{
"epoch": 32.61,
"grad_norm": 0.999645471572876,
"learning_rate": 3.5476381909547742e-06,
"loss": 0.0002,
"step": 64725
},
{
"epoch": 32.62,
"grad_norm": 0.08828813582658768,
"learning_rate": 3.5451256281407036e-06,
"loss": 0.0005,
"step": 64750
},
{
"epoch": 32.63,
"grad_norm": 0.41186901926994324,
"learning_rate": 3.5426130653266333e-06,
"loss": 0.0004,
"step": 64775
},
{
"epoch": 32.64,
"grad_norm": 0.2822958528995514,
"learning_rate": 3.540100502512563e-06,
"loss": 0.0004,
"step": 64800
},
{
"epoch": 32.66,
"grad_norm": 0.0504324808716774,
"learning_rate": 3.5375879396984932e-06,
"loss": 0.0003,
"step": 64825
},
{
"epoch": 32.67,
"grad_norm": 0.19442777335643768,
"learning_rate": 3.5350753768844225e-06,
"loss": 0.0003,
"step": 64850
},
{
"epoch": 32.68,
"grad_norm": 0.16309334337711334,
"learning_rate": 3.5325628140703523e-06,
"loss": 0.0005,
"step": 64875
},
{
"epoch": 32.7,
"grad_norm": 0.21436728537082672,
"learning_rate": 3.5300502512562816e-06,
"loss": 0.0005,
"step": 64900
},
{
"epoch": 32.71,
"grad_norm": 0.6013718843460083,
"learning_rate": 3.5275376884422114e-06,
"loss": 0.0005,
"step": 64925
},
{
"epoch": 32.72,
"grad_norm": 0.22422859072685242,
"learning_rate": 3.525025125628141e-06,
"loss": 0.0003,
"step": 64950
},
{
"epoch": 32.73,
"grad_norm": 0.1287311613559723,
"learning_rate": 3.5225125628140704e-06,
"loss": 0.0003,
"step": 64975
},
{
"epoch": 32.75,
"grad_norm": 0.11317762732505798,
"learning_rate": 3.52e-06,
"loss": 0.0006,
"step": 65000
},
{
"epoch": 32.75,
"eval_loss": 0.37727558612823486,
"eval_runtime": 645.9956,
"eval_samples_per_second": 2.181,
"eval_steps_per_second": 2.181,
"eval_wer": 22.953995157384988,
"step": 65000
},
{
"epoch": 32.76,
"grad_norm": 0.30392730236053467,
"learning_rate": 3.5174874371859295e-06,
"loss": 0.0003,
"step": 65025
},
{
"epoch": 32.77,
"grad_norm": 0.1481235772371292,
"learning_rate": 3.5149748743718597e-06,
"loss": 0.0006,
"step": 65050
},
{
"epoch": 32.78,
"grad_norm": 0.7122224569320679,
"learning_rate": 3.5124623115577894e-06,
"loss": 0.0005,
"step": 65075
},
{
"epoch": 32.8,
"grad_norm": 0.9053061604499817,
"learning_rate": 3.509949748743719e-06,
"loss": 0.0004,
"step": 65100
},
{
"epoch": 32.81,
"grad_norm": 1.0469900369644165,
"learning_rate": 3.5074371859296485e-06,
"loss": 0.0004,
"step": 65125
},
{
"epoch": 32.82,
"grad_norm": 1.669203519821167,
"learning_rate": 3.5049246231155782e-06,
"loss": 0.0004,
"step": 65150
},
{
"epoch": 32.83,
"grad_norm": 1.147189974784851,
"learning_rate": 3.5024120603015076e-06,
"loss": 0.0006,
"step": 65175
},
{
"epoch": 32.85,
"grad_norm": 0.0693323016166687,
"learning_rate": 3.4998994974874373e-06,
"loss": 0.0004,
"step": 65200
},
{
"epoch": 32.86,
"grad_norm": 0.32327863574028015,
"learning_rate": 3.497386934673367e-06,
"loss": 0.0006,
"step": 65225
},
{
"epoch": 32.87,
"grad_norm": 0.20139850676059723,
"learning_rate": 3.494874371859297e-06,
"loss": 0.0005,
"step": 65250
},
{
"epoch": 32.88,
"grad_norm": 0.3941897749900818,
"learning_rate": 3.4923618090452265e-06,
"loss": 0.0003,
"step": 65275
},
{
"epoch": 32.9,
"grad_norm": 0.036489930003881454,
"learning_rate": 3.4898492462311563e-06,
"loss": 0.0006,
"step": 65300
},
{
"epoch": 32.91,
"grad_norm": 0.09961450099945068,
"learning_rate": 3.4873366834170856e-06,
"loss": 0.0004,
"step": 65325
},
{
"epoch": 32.92,
"grad_norm": 0.10360651463270187,
"learning_rate": 3.4848241206030154e-06,
"loss": 0.0004,
"step": 65350
},
{
"epoch": 32.93,
"grad_norm": 1.0170046091079712,
"learning_rate": 3.482311557788945e-06,
"loss": 0.0005,
"step": 65375
},
{
"epoch": 32.95,
"grad_norm": 0.1749623417854309,
"learning_rate": 3.4797989949748744e-06,
"loss": 0.0003,
"step": 65400
},
{
"epoch": 32.96,
"grad_norm": 1.7965832948684692,
"learning_rate": 3.477286432160804e-06,
"loss": 0.0004,
"step": 65425
},
{
"epoch": 32.97,
"grad_norm": 0.4475008547306061,
"learning_rate": 3.4747738693467344e-06,
"loss": 0.0004,
"step": 65450
},
{
"epoch": 32.98,
"grad_norm": 0.29991576075553894,
"learning_rate": 3.4722613065326637e-06,
"loss": 0.0004,
"step": 65475
},
{
"epoch": 33.0,
"grad_norm": 0.0659874826669693,
"learning_rate": 3.4697487437185934e-06,
"loss": 0.0005,
"step": 65500
},
{
"epoch": 33.01,
"grad_norm": 0.15273946523666382,
"learning_rate": 3.467336683417086e-06,
"loss": 0.0004,
"step": 65525
},
{
"epoch": 33.02,
"grad_norm": 0.09499111026525497,
"learning_rate": 3.464824120603015e-06,
"loss": 0.0005,
"step": 65550
},
{
"epoch": 33.04,
"grad_norm": 0.052725620567798615,
"learning_rate": 3.462311557788945e-06,
"loss": 0.0002,
"step": 65575
},
{
"epoch": 33.05,
"grad_norm": 0.30198460817337036,
"learning_rate": 3.4597989949748746e-06,
"loss": 0.0002,
"step": 65600
},
{
"epoch": 33.06,
"grad_norm": 0.12597815692424774,
"learning_rate": 3.457286432160804e-06,
"loss": 0.0003,
"step": 65625
},
{
"epoch": 33.07,
"grad_norm": 0.7220773100852966,
"learning_rate": 3.454773869346734e-06,
"loss": 0.0002,
"step": 65650
},
{
"epoch": 33.09,
"grad_norm": 0.14168275892734528,
"learning_rate": 3.452261306532664e-06,
"loss": 0.0002,
"step": 65675
},
{
"epoch": 33.1,
"grad_norm": 0.06495074182748795,
"learning_rate": 3.449748743718593e-06,
"loss": 0.0002,
"step": 65700
},
{
"epoch": 33.11,
"grad_norm": 0.06904838979244232,
"learning_rate": 3.447236180904523e-06,
"loss": 0.0001,
"step": 65725
},
{
"epoch": 33.12,
"grad_norm": 0.08821985125541687,
"learning_rate": 3.4447236180904527e-06,
"loss": 0.0001,
"step": 65750
},
{
"epoch": 33.14,
"grad_norm": 0.08516985177993774,
"learning_rate": 3.442211055276382e-06,
"loss": 0.0002,
"step": 65775
},
{
"epoch": 33.15,
"grad_norm": 0.05491633340716362,
"learning_rate": 3.4396984924623118e-06,
"loss": 0.0002,
"step": 65800
},
{
"epoch": 33.16,
"grad_norm": 0.09583797305822372,
"learning_rate": 3.437185929648241e-06,
"loss": 0.0001,
"step": 65825
},
{
"epoch": 33.17,
"grad_norm": 0.046035125851631165,
"learning_rate": 3.4346733668341712e-06,
"loss": 0.0002,
"step": 65850
},
{
"epoch": 33.19,
"grad_norm": 0.04913106560707092,
"learning_rate": 3.432160804020101e-06,
"loss": 0.0002,
"step": 65875
},
{
"epoch": 33.2,
"grad_norm": 0.0908324271440506,
"learning_rate": 3.4296482412060307e-06,
"loss": 0.0002,
"step": 65900
},
{
"epoch": 33.21,
"grad_norm": 0.1352541148662567,
"learning_rate": 3.42713567839196e-06,
"loss": 0.0001,
"step": 65925
},
{
"epoch": 33.22,
"grad_norm": 0.12912270426750183,
"learning_rate": 3.42462311557789e-06,
"loss": 0.0002,
"step": 65950
},
{
"epoch": 33.24,
"grad_norm": 0.19946007430553436,
"learning_rate": 3.422110552763819e-06,
"loss": 0.0001,
"step": 65975
},
{
"epoch": 33.25,
"grad_norm": 0.15442493557929993,
"learning_rate": 3.419597989949749e-06,
"loss": 0.0001,
"step": 66000
},
{
"epoch": 33.25,
"eval_loss": 0.37603169679641724,
"eval_runtime": 780.8153,
"eval_samples_per_second": 1.805,
"eval_steps_per_second": 1.805,
"eval_wer": 22.234520927014874,
"step": 66000
}
],
"logging_steps": 25,
"max_steps": 100000,
"num_input_tokens_seen": 0,
"num_train_epochs": 51,
"save_steps": 1000,
"total_flos": 2.0547646783488e+20,
"train_batch_size": 48,
"trial_name": null,
"trial_params": null
}