tadabur-Whisper-Small / trainer_state.json
FaisaI's picture
Upload 17 files
5c0156b verified
Invalid JSON: Unexpected token 'I', ..."ad_norm": Infinity, "... is not valid JSON
{
"best_global_step": 32500,
"best_metric": 0.09504964202642441,
"best_model_checkpoint": "./whisper-quran-finetuned_small\\checkpoint-32500",
"epoch": 3.0,
"eval_steps": 2500,
"global_step": 50595,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0029648077322185656,
"grad_norm": 3.1639976501464844,
"learning_rate": 9.800000000000001e-07,
"loss": 0.1027,
"step": 50
},
{
"epoch": 0.005929615464437131,
"grad_norm": 3.5852792263031006,
"learning_rate": 1.98e-06,
"loss": 0.0837,
"step": 100
},
{
"epoch": 0.008894423196655697,
"grad_norm": 4.179174423217773,
"learning_rate": 2.9800000000000003e-06,
"loss": 0.071,
"step": 150
},
{
"epoch": 0.011859230928874262,
"grad_norm": 3.4920785427093506,
"learning_rate": 3.980000000000001e-06,
"loss": 0.0514,
"step": 200
},
{
"epoch": 0.014824038661092828,
"grad_norm": 3.6322460174560547,
"learning_rate": 4.980000000000001e-06,
"loss": 0.0498,
"step": 250
},
{
"epoch": 0.017788846393311394,
"grad_norm": 2.3850507736206055,
"learning_rate": 5.98e-06,
"loss": 0.0449,
"step": 300
},
{
"epoch": 0.020753654125529958,
"grad_norm": 3.212959051132202,
"learning_rate": 6.98e-06,
"loss": 0.0367,
"step": 350
},
{
"epoch": 0.023718461857748525,
"grad_norm": 3.586538553237915,
"learning_rate": 7.980000000000002e-06,
"loss": 0.0408,
"step": 400
},
{
"epoch": 0.02668326958996709,
"grad_norm": 4.874953746795654,
"learning_rate": 8.98e-06,
"loss": 0.0305,
"step": 450
},
{
"epoch": 0.029648077322185655,
"grad_norm": 4.741893291473389,
"learning_rate": 9.980000000000001e-06,
"loss": 0.0334,
"step": 500
},
{
"epoch": 0.03261288505440422,
"grad_norm": 3.2416727542877197,
"learning_rate": 9.999976392890719e-06,
"loss": 0.0335,
"step": 550
},
{
"epoch": 0.03557769278662279,
"grad_norm": 3.642324686050415,
"learning_rate": 9.99990363485342e-06,
"loss": 0.0263,
"step": 600
},
{
"epoch": 0.03854250051884135,
"grad_norm": 4.084117889404297,
"learning_rate": 9.999781716771276e-06,
"loss": 0.0277,
"step": 650
},
{
"epoch": 0.041507308251059916,
"grad_norm": 5.314033031463623,
"learning_rate": 9.999610639843005e-06,
"loss": 0.0698,
"step": 700
},
{
"epoch": 0.044472115983278486,
"grad_norm": 6.3697381019592285,
"learning_rate": 9.999390405750668e-06,
"loss": 0.0742,
"step": 750
},
{
"epoch": 0.04743692371549705,
"grad_norm": 3.1028659343719482,
"learning_rate": 9.999121016659655e-06,
"loss": 0.0678,
"step": 800
},
{
"epoch": 0.05040173144771561,
"grad_norm": 3.242400884628296,
"learning_rate": 9.99880247521865e-06,
"loss": 0.0723,
"step": 850
},
{
"epoch": 0.05336653917993418,
"grad_norm": 5.612239360809326,
"learning_rate": 9.998434784559616e-06,
"loss": 0.0715,
"step": 900
},
{
"epoch": 0.05633134691215275,
"grad_norm": 2.9017271995544434,
"learning_rate": 9.99801794829776e-06,
"loss": 0.0589,
"step": 950
},
{
"epoch": 0.05929615464437131,
"grad_norm": 5.012439727783203,
"learning_rate": 9.997551970531501e-06,
"loss": 0.0705,
"step": 1000
},
{
"epoch": 0.06226096237658988,
"grad_norm": 2.756843328475952,
"learning_rate": 9.997036855842423e-06,
"loss": 0.06,
"step": 1050
},
{
"epoch": 0.06522577010880844,
"grad_norm": 5.2626566886901855,
"learning_rate": 9.996472609295236e-06,
"loss": 0.0528,
"step": 1100
},
{
"epoch": 0.06819057784102701,
"grad_norm": 3.3507096767425537,
"learning_rate": 9.995859236437724e-06,
"loss": 0.0564,
"step": 1150
},
{
"epoch": 0.07115538557324558,
"grad_norm": 3.6945300102233887,
"learning_rate": 9.995196743300693e-06,
"loss": 0.0514,
"step": 1200
},
{
"epoch": 0.07412019330546414,
"grad_norm": 1.9075812101364136,
"learning_rate": 9.994485136397903e-06,
"loss": 0.0526,
"step": 1250
},
{
"epoch": 0.0770850010376827,
"grad_norm": 4.296079635620117,
"learning_rate": 9.993724422726017e-06,
"loss": 0.0519,
"step": 1300
},
{
"epoch": 0.08004980876990127,
"grad_norm": 4.138225078582764,
"learning_rate": 9.992914609764521e-06,
"loss": 0.0529,
"step": 1350
},
{
"epoch": 0.08301461650211983,
"grad_norm": 2.9909701347351074,
"learning_rate": 9.99205570547566e-06,
"loss": 0.0406,
"step": 1400
},
{
"epoch": 0.08597942423433841,
"grad_norm": 5.061216354370117,
"learning_rate": 9.991147718304348e-06,
"loss": 0.0468,
"step": 1450
},
{
"epoch": 0.08894423196655697,
"grad_norm": 3.402036190032959,
"learning_rate": 9.990190657178099e-06,
"loss": 0.0504,
"step": 1500
},
{
"epoch": 0.09190903969877554,
"grad_norm": 4.70094108581543,
"learning_rate": 9.989184531506922e-06,
"loss": 0.0451,
"step": 1550
},
{
"epoch": 0.0948738474309941,
"grad_norm": 3.8658971786499023,
"learning_rate": 9.98812935118325e-06,
"loss": 0.0415,
"step": 1600
},
{
"epoch": 0.09783865516321266,
"grad_norm": 4.501025199890137,
"learning_rate": 9.98702512658182e-06,
"loss": 0.0434,
"step": 1650
},
{
"epoch": 0.10080346289543123,
"grad_norm": 1.1796941757202148,
"learning_rate": 9.985871868559593e-06,
"loss": 0.0476,
"step": 1700
},
{
"epoch": 0.1037682706276498,
"grad_norm": 3.604438066482544,
"learning_rate": 9.984669588455626e-06,
"loss": 0.042,
"step": 1750
},
{
"epoch": 0.10673307835986837,
"grad_norm": 3.441258668899536,
"learning_rate": 9.98341829809098e-06,
"loss": 0.0389,
"step": 1800
},
{
"epoch": 0.10969788609208693,
"grad_norm": 2.7927887439727783,
"learning_rate": 9.982118009768587e-06,
"loss": 0.0417,
"step": 1850
},
{
"epoch": 0.1126626938243055,
"grad_norm": 3.311816453933716,
"learning_rate": 9.98076873627314e-06,
"loss": 0.041,
"step": 1900
},
{
"epoch": 0.11562750155652406,
"grad_norm": 3.63645076751709,
"learning_rate": 9.979370490870968e-06,
"loss": 0.0459,
"step": 1950
},
{
"epoch": 0.11859230928874262,
"grad_norm": 4.9164204597473145,
"learning_rate": 9.97792328730989e-06,
"loss": 0.0397,
"step": 2000
},
{
"epoch": 0.1215571170209612,
"grad_norm": 4.712317943572998,
"learning_rate": 9.976427139819106e-06,
"loss": 0.0356,
"step": 2050
},
{
"epoch": 0.12452192475317976,
"grad_norm": 1.0168401002883911,
"learning_rate": 9.974882063109027e-06,
"loss": 0.0361,
"step": 2100
},
{
"epoch": 0.12748673248539832,
"grad_norm": 2.246588945388794,
"learning_rate": 9.973288072371163e-06,
"loss": 0.0398,
"step": 2150
},
{
"epoch": 0.13045154021761687,
"grad_norm": 3.196394920349121,
"learning_rate": 9.97164518327794e-06,
"loss": 0.0419,
"step": 2200
},
{
"epoch": 0.13341634794983545,
"grad_norm": 2.1984238624572754,
"learning_rate": 9.969953411982574e-06,
"loss": 0.0395,
"step": 2250
},
{
"epoch": 0.13638115568205403,
"grad_norm": 1.9637821912765503,
"learning_rate": 9.968212775118895e-06,
"loss": 0.0396,
"step": 2300
},
{
"epoch": 0.13934596341427258,
"grad_norm": 2.859842300415039,
"learning_rate": 9.966423289801187e-06,
"loss": 0.0412,
"step": 2350
},
{
"epoch": 0.14231077114649116,
"grad_norm": 3.271143674850464,
"learning_rate": 9.96458497362403e-06,
"loss": 0.0393,
"step": 2400
},
{
"epoch": 0.1452755788787097,
"grad_norm": 2.784824848175049,
"learning_rate": 9.96269784466211e-06,
"loss": 0.036,
"step": 2450
},
{
"epoch": 0.14824038661092828,
"grad_norm": 2.1318204402923584,
"learning_rate": 9.960761921470053e-06,
"loss": 0.0361,
"step": 2500
},
{
"epoch": 0.14824038661092828,
"eval_loss": 0.1340717226266861,
"eval_runtime": 1798.3265,
"eval_samples_per_second": 3.256,
"eval_steps_per_second": 0.814,
"eval_wer": 13.782618229661574,
"step": 2500
},
{
"epoch": 0.15120519434314686,
"grad_norm": 3.058884620666504,
"learning_rate": 9.958777223082243e-06,
"loss": 0.0377,
"step": 2550
},
{
"epoch": 0.1541700020753654,
"grad_norm": 1.4118783473968506,
"learning_rate": 9.956743769012627e-06,
"loss": 0.0348,
"step": 2600
},
{
"epoch": 0.15713480980758399,
"grad_norm": 1.7757619619369507,
"learning_rate": 9.954661579254528e-06,
"loss": 0.0348,
"step": 2650
},
{
"epoch": 0.16009961753980254,
"grad_norm": 3.934190511703491,
"learning_rate": 9.95253067428045e-06,
"loss": 0.0335,
"step": 2700
},
{
"epoch": 0.1630644252720211,
"grad_norm": 2.1128933429718018,
"learning_rate": 9.950351075041872e-06,
"loss": 0.0338,
"step": 2750
},
{
"epoch": 0.16602923300423966,
"grad_norm": 1.1197174787521362,
"learning_rate": 9.94812280296905e-06,
"loss": 0.0288,
"step": 2800
},
{
"epoch": 0.16899404073645824,
"grad_norm": 3.3981051445007324,
"learning_rate": 9.945845879970794e-06,
"loss": 0.0348,
"step": 2850
},
{
"epoch": 0.17195884846867682,
"grad_norm": 1.5275200605392456,
"learning_rate": 9.943520328434266e-06,
"loss": 0.0307,
"step": 2900
},
{
"epoch": 0.17492365620089537,
"grad_norm": 1.5511682033538818,
"learning_rate": 9.941146171224745e-06,
"loss": 0.0297,
"step": 2950
},
{
"epoch": 0.17788846393311394,
"grad_norm": 1.4682283401489258,
"learning_rate": 9.93872343168542e-06,
"loss": 0.0385,
"step": 3000
},
{
"epoch": 0.1808532716653325,
"grad_norm": 4.4206085205078125,
"learning_rate": 9.936252133637146e-06,
"loss": 0.0283,
"step": 3050
},
{
"epoch": 0.18381807939755107,
"grad_norm": 3.574676275253296,
"learning_rate": 9.933732301378216e-06,
"loss": 0.0361,
"step": 3100
},
{
"epoch": 0.18678288712976965,
"grad_norm": 3.5772438049316406,
"learning_rate": 9.93116395968412e-06,
"loss": 0.0334,
"step": 3150
},
{
"epoch": 0.1897476948619882,
"grad_norm": 1.9996916055679321,
"learning_rate": 9.928547133807304e-06,
"loss": 0.0313,
"step": 3200
},
{
"epoch": 0.19271250259420677,
"grad_norm": 2.3533923625946045,
"learning_rate": 9.925881849476921e-06,
"loss": 0.0329,
"step": 3250
},
{
"epoch": 0.19567731032642532,
"grad_norm": 2.9052157402038574,
"learning_rate": 9.923168132898573e-06,
"loss": 0.0315,
"step": 3300
},
{
"epoch": 0.1986421180586439,
"grad_norm": 6.513851165771484,
"learning_rate": 9.92040601075406e-06,
"loss": 0.0278,
"step": 3350
},
{
"epoch": 0.20160692579086245,
"grad_norm": 8.648693084716797,
"learning_rate": 9.917595510201115e-06,
"loss": 0.0311,
"step": 3400
},
{
"epoch": 0.20457173352308103,
"grad_norm": 4.101097106933594,
"learning_rate": 9.914736658873138e-06,
"loss": 0.0295,
"step": 3450
},
{
"epoch": 0.2075365412552996,
"grad_norm": 1.969928503036499,
"learning_rate": 9.911829484878925e-06,
"loss": 0.0252,
"step": 3500
},
{
"epoch": 0.21050134898751816,
"grad_norm": 1.3020251989364624,
"learning_rate": 9.908874016802384e-06,
"loss": 0.0282,
"step": 3550
},
{
"epoch": 0.21346615671973673,
"grad_norm": 4.441299915313721,
"learning_rate": 9.905870283702265e-06,
"loss": 0.0341,
"step": 3600
},
{
"epoch": 0.21643096445195528,
"grad_norm": 2.398268222808838,
"learning_rate": 9.90281831511187e-06,
"loss": 0.0268,
"step": 3650
},
{
"epoch": 0.21939577218417386,
"grad_norm": 1.5908421277999878,
"learning_rate": 9.899718141038755e-06,
"loss": 0.0236,
"step": 3700
},
{
"epoch": 0.2223605799163924,
"grad_norm": 1.215734601020813,
"learning_rate": 9.896569791964452e-06,
"loss": 0.0288,
"step": 3750
},
{
"epoch": 0.225325387648611,
"grad_norm": 2.0204992294311523,
"learning_rate": 9.893373298844149e-06,
"loss": 0.0181,
"step": 3800
},
{
"epoch": 0.22829019538082956,
"grad_norm": 1.6711108684539795,
"learning_rate": 9.890128693106407e-06,
"loss": 0.0247,
"step": 3850
},
{
"epoch": 0.2312550031130481,
"grad_norm": 0.4981835186481476,
"learning_rate": 9.886836006652827e-06,
"loss": 0.0265,
"step": 3900
},
{
"epoch": 0.2342198108452667,
"grad_norm": 3.5468335151672363,
"learning_rate": 9.883495271857761e-06,
"loss": 0.0259,
"step": 3950
},
{
"epoch": 0.23718461857748524,
"grad_norm": 1.3472596406936646,
"learning_rate": 9.880106521567974e-06,
"loss": 0.0181,
"step": 4000
},
{
"epoch": 0.24014942630970382,
"grad_norm": 0.5029717683792114,
"learning_rate": 9.87666978910233e-06,
"loss": 0.0247,
"step": 4050
},
{
"epoch": 0.2431142340419224,
"grad_norm": 1.5131045579910278,
"learning_rate": 9.873185108251466e-06,
"loss": 0.0201,
"step": 4100
},
{
"epoch": 0.24607904177414094,
"grad_norm": 3.9095380306243896,
"learning_rate": 9.86965251327746e-06,
"loss": 0.0222,
"step": 4150
},
{
"epoch": 0.24904384950635952,
"grad_norm": 1.2582632303237915,
"learning_rate": 9.866072038913478e-06,
"loss": 0.0202,
"step": 4200
},
{
"epoch": 0.25200865723857807,
"grad_norm": 2.0971181392669678,
"learning_rate": 9.862443720363461e-06,
"loss": 0.0198,
"step": 4250
},
{
"epoch": 0.25497346497079665,
"grad_norm": 0.3940514326095581,
"learning_rate": 9.858767593301757e-06,
"loss": 0.0222,
"step": 4300
},
{
"epoch": 0.2579382727030152,
"grad_norm": 1.9935857057571411,
"learning_rate": 9.855043693872774e-06,
"loss": 0.0249,
"step": 4350
},
{
"epoch": 0.26090308043523375,
"grad_norm": 3.2804274559020996,
"learning_rate": 9.851272058690633e-06,
"loss": 0.0253,
"step": 4400
},
{
"epoch": 0.2638678881674523,
"grad_norm": 1.2694517374038696,
"learning_rate": 9.847452724838797e-06,
"loss": 0.0235,
"step": 4450
},
{
"epoch": 0.2668326958996709,
"grad_norm": 2.894775152206421,
"learning_rate": 9.843585729869716e-06,
"loss": 0.0242,
"step": 4500
},
{
"epoch": 0.2697975036318895,
"grad_norm": 3.3155581951141357,
"learning_rate": 9.839671111804452e-06,
"loss": 0.0206,
"step": 4550
},
{
"epoch": 0.27276231136410806,
"grad_norm": 2.7083659172058105,
"learning_rate": 9.835708909132307e-06,
"loss": 0.0247,
"step": 4600
},
{
"epoch": 0.2757271190963266,
"grad_norm": 3.1053194999694824,
"learning_rate": 9.831699160810443e-06,
"loss": 0.0241,
"step": 4650
},
{
"epoch": 0.27869192682854516,
"grad_norm": 2.450427532196045,
"learning_rate": 9.8276419062635e-06,
"loss": 0.0274,
"step": 4700
},
{
"epoch": 0.28165673456076373,
"grad_norm": 1.419350028038025,
"learning_rate": 9.823537185383214e-06,
"loss": 0.0212,
"step": 4750
},
{
"epoch": 0.2846215422929823,
"grad_norm": 3.7026216983795166,
"learning_rate": 9.819385038528014e-06,
"loss": 0.0208,
"step": 4800
},
{
"epoch": 0.2875863500252009,
"grad_norm": 0.2673606872558594,
"learning_rate": 9.815185506522629e-06,
"loss": 0.0244,
"step": 4850
},
{
"epoch": 0.2905511577574194,
"grad_norm": 1.0690964460372925,
"learning_rate": 9.810938630657693e-06,
"loss": 0.0224,
"step": 4900
},
{
"epoch": 0.293515965489638,
"grad_norm": 2.188352346420288,
"learning_rate": 9.806644452689334e-06,
"loss": 0.0197,
"step": 4950
},
{
"epoch": 0.29648077322185656,
"grad_norm": 4.49456262588501,
"learning_rate": 9.80230301483876e-06,
"loss": 0.0255,
"step": 5000
},
{
"epoch": 0.29648077322185656,
"eval_loss": 0.12216129153966904,
"eval_runtime": 1783.7018,
"eval_samples_per_second": 3.282,
"eval_steps_per_second": 0.821,
"eval_wer": 11.202397485563921,
"step": 5000
},
{
"epoch": 0.29944558095407514,
"grad_norm": 2.8785881996154785,
"learning_rate": 9.79791435979185e-06,
"loss": 0.0255,
"step": 5050
},
{
"epoch": 0.3024103886862937,
"grad_norm": 1.6858680248260498,
"learning_rate": 9.793478530698732e-06,
"loss": 0.0179,
"step": 5100
},
{
"epoch": 0.30537519641851224,
"grad_norm": 2.8224334716796875,
"learning_rate": 9.788995571173356e-06,
"loss": 0.0222,
"step": 5150
},
{
"epoch": 0.3083400041507308,
"grad_norm": 4.539381980895996,
"learning_rate": 9.784465525293075e-06,
"loss": 0.0194,
"step": 5200
},
{
"epoch": 0.3113048118829494,
"grad_norm": 1.027093529701233,
"learning_rate": 9.779888437598191e-06,
"loss": 0.0193,
"step": 5250
},
{
"epoch": 0.31426961961516797,
"grad_norm": 2.6384902000427246,
"learning_rate": 9.775264353091545e-06,
"loss": 0.0206,
"step": 5300
},
{
"epoch": 0.31723442734738655,
"grad_norm": 1.0687259435653687,
"learning_rate": 9.770593317238052e-06,
"loss": 0.0204,
"step": 5350
},
{
"epoch": 0.32019923507960507,
"grad_norm": 3.0316076278686523,
"learning_rate": 9.765875375964263e-06,
"loss": 0.0228,
"step": 5400
},
{
"epoch": 0.32316404281182365,
"grad_norm": 2.7116587162017822,
"learning_rate": 9.761110575657914e-06,
"loss": 0.0196,
"step": 5450
},
{
"epoch": 0.3261288505440422,
"grad_norm": 0.9300628900527954,
"learning_rate": 9.756298963167466e-06,
"loss": 0.0181,
"step": 5500
},
{
"epoch": 0.3290936582762608,
"grad_norm": 1.9629039764404297,
"learning_rate": 9.751440585801648e-06,
"loss": 0.0165,
"step": 5550
},
{
"epoch": 0.3320584660084793,
"grad_norm": 0.2907628118991852,
"learning_rate": 9.746535491328991e-06,
"loss": 0.0216,
"step": 5600
},
{
"epoch": 0.3350232737406979,
"grad_norm": 2.418546199798584,
"learning_rate": 9.741583727977357e-06,
"loss": 0.0228,
"step": 5650
},
{
"epoch": 0.3379880814729165,
"grad_norm": 1.3516942262649536,
"learning_rate": 9.736585344433466e-06,
"loss": 0.0171,
"step": 5700
},
{
"epoch": 0.34095288920513506,
"grad_norm": 1.436192512512207,
"learning_rate": 9.731540389842417e-06,
"loss": 0.0209,
"step": 5750
},
{
"epoch": 0.34391769693735363,
"grad_norm": 2.503681182861328,
"learning_rate": 9.726448913807208e-06,
"loss": 0.02,
"step": 5800
},
{
"epoch": 0.34688250466957216,
"grad_norm": 2.688851833343506,
"learning_rate": 9.721310966388235e-06,
"loss": 0.021,
"step": 5850
},
{
"epoch": 0.34984731240179073,
"grad_norm": 1.2639074325561523,
"learning_rate": 9.716126598102825e-06,
"loss": 0.021,
"step": 5900
},
{
"epoch": 0.3528121201340093,
"grad_norm": 0.9510352611541748,
"learning_rate": 9.710895859924708e-06,
"loss": 0.0221,
"step": 5950
},
{
"epoch": 0.3557769278662279,
"grad_norm": 1.8296053409576416,
"learning_rate": 9.705618803283545e-06,
"loss": 0.0171,
"step": 6000
},
{
"epoch": 0.35874173559844647,
"grad_norm": 0.9670634865760803,
"learning_rate": 9.700295480064402e-06,
"loss": 0.0151,
"step": 6050
},
{
"epoch": 0.361706543330665,
"grad_norm": 1.096726894378662,
"learning_rate": 9.69492594260725e-06,
"loss": 0.019,
"step": 6100
},
{
"epoch": 0.36467135106288356,
"grad_norm": 3.529752731323242,
"learning_rate": 9.689510243706448e-06,
"loss": 0.02,
"step": 6150
},
{
"epoch": 0.36763615879510214,
"grad_norm": 1.7916548252105713,
"learning_rate": 9.684048436610221e-06,
"loss": 0.0179,
"step": 6200
},
{
"epoch": 0.3706009665273207,
"grad_norm": 1.1350276470184326,
"learning_rate": 9.67854057502014e-06,
"loss": 0.0184,
"step": 6250
},
{
"epoch": 0.3735657742595393,
"grad_norm": 2.405104875564575,
"learning_rate": 9.672986713090596e-06,
"loss": 0.0169,
"step": 6300
},
{
"epoch": 0.3765305819917578,
"grad_norm": 1.0677447319030762,
"learning_rate": 9.667386905428258e-06,
"loss": 0.0174,
"step": 6350
},
{
"epoch": 0.3794953897239764,
"grad_norm": 1.0359286069869995,
"learning_rate": 9.661741207091548e-06,
"loss": 0.0171,
"step": 6400
},
{
"epoch": 0.382460197456195,
"grad_norm": 2.040727376937866,
"learning_rate": 9.656049673590093e-06,
"loss": 0.0148,
"step": 6450
},
{
"epoch": 0.38542500518841355,
"grad_norm": 2.2050840854644775,
"learning_rate": 9.65031236088418e-06,
"loss": 0.0163,
"step": 6500
},
{
"epoch": 0.38838981292063207,
"grad_norm": 0.5841448903083801,
"learning_rate": 9.644529325384207e-06,
"loss": 0.0239,
"step": 6550
},
{
"epoch": 0.39135462065285065,
"grad_norm": 1.0610971450805664,
"learning_rate": 9.638700623950124e-06,
"loss": 0.0175,
"step": 6600
},
{
"epoch": 0.3943194283850692,
"grad_norm": 1.2772971391677856,
"learning_rate": 9.632826313890883e-06,
"loss": 0.0144,
"step": 6650
},
{
"epoch": 0.3972842361172878,
"grad_norm": 1.1676218509674072,
"learning_rate": 9.626906452963864e-06,
"loss": 0.0181,
"step": 6700
},
{
"epoch": 0.4002490438495064,
"grad_norm": 3.738065719604492,
"learning_rate": 9.620941099374315e-06,
"loss": 0.0189,
"step": 6750
},
{
"epoch": 0.4032138515817249,
"grad_norm": 3.0982308387756348,
"learning_rate": 9.61493031177478e-06,
"loss": 0.0181,
"step": 6800
},
{
"epoch": 0.4061786593139435,
"grad_norm": 2.3827264308929443,
"learning_rate": 9.608874149264511e-06,
"loss": 0.0181,
"step": 6850
},
{
"epoch": 0.40914346704616206,
"grad_norm": 0.6366240382194519,
"learning_rate": 9.602772671388901e-06,
"loss": 0.0155,
"step": 6900
},
{
"epoch": 0.41210827477838063,
"grad_norm": 0.6223490834236145,
"learning_rate": 9.596625938138892e-06,
"loss": 0.02,
"step": 6950
},
{
"epoch": 0.4150730825105992,
"grad_norm": 1.028148889541626,
"learning_rate": 9.590434009950386e-06,
"loss": 0.0184,
"step": 7000
},
{
"epoch": 0.41803789024281773,
"grad_norm": 2.3406949043273926,
"learning_rate": 9.58419694770365e-06,
"loss": 0.0205,
"step": 7050
},
{
"epoch": 0.4210026979750363,
"grad_norm": 3.0843560695648193,
"learning_rate": 9.577914812722716e-06,
"loss": 0.0176,
"step": 7100
},
{
"epoch": 0.4239675057072549,
"grad_norm": 2.7670023441314697,
"learning_rate": 9.571587666774783e-06,
"loss": 0.0165,
"step": 7150
},
{
"epoch": 0.42693231343947347,
"grad_norm": 1.995368480682373,
"learning_rate": 9.565215572069604e-06,
"loss": 0.0168,
"step": 7200
},
{
"epoch": 0.42989712117169204,
"grad_norm": 2.7526652812957764,
"learning_rate": 9.55879859125888e-06,
"loss": 0.0209,
"step": 7250
},
{
"epoch": 0.43286192890391056,
"grad_norm": 0.437929630279541,
"learning_rate": 9.552336787435641e-06,
"loss": 0.0211,
"step": 7300
},
{
"epoch": 0.43582673663612914,
"grad_norm": 3.0327157974243164,
"learning_rate": 9.545830224133625e-06,
"loss": 0.016,
"step": 7350
},
{
"epoch": 0.4387915443683477,
"grad_norm": 1.455119013786316,
"learning_rate": 9.539278965326653e-06,
"loss": 0.0145,
"step": 7400
},
{
"epoch": 0.4417563521005663,
"grad_norm": 1.194345474243164,
"learning_rate": 9.53281543019193e-06,
"loss": 0.0158,
"step": 7450
},
{
"epoch": 0.4447211598327848,
"grad_norm": 0.1790812462568283,
"learning_rate": 9.526175864740102e-06,
"loss": 0.0165,
"step": 7500
},
{
"epoch": 0.4447211598327848,
"eval_loss": 0.11122792959213257,
"eval_runtime": 1792.6078,
"eval_samples_per_second": 3.266,
"eval_steps_per_second": 0.817,
"eval_wer": 11.146845990790148,
"step": 7500
},
{
"epoch": 0.4476859675650034,
"grad_norm": 3.042560577392578,
"learning_rate": 9.519491797028879e-06,
"loss": 0.0143,
"step": 7550
},
{
"epoch": 0.450650775297222,
"grad_norm": 3.459578275680542,
"learning_rate": 9.512763292777343e-06,
"loss": 0.0188,
"step": 7600
},
{
"epoch": 0.45361558302944055,
"grad_norm": 0.7293067574501038,
"learning_rate": 9.505990418141481e-06,
"loss": 0.0172,
"step": 7650
},
{
"epoch": 0.4565803907616591,
"grad_norm": 2.7804245948791504,
"learning_rate": 9.499173239713543e-06,
"loss": 0.017,
"step": 7700
},
{
"epoch": 0.45954519849387765,
"grad_norm": 2.085571527481079,
"learning_rate": 9.492311824521379e-06,
"loss": 0.0152,
"step": 7750
},
{
"epoch": 0.4625100062260962,
"grad_norm": 2.437695264816284,
"learning_rate": 9.485406240027784e-06,
"loss": 0.0159,
"step": 7800
},
{
"epoch": 0.4654748139583148,
"grad_norm": 1.5121135711669922,
"learning_rate": 9.478456554129837e-06,
"loss": 0.0161,
"step": 7850
},
{
"epoch": 0.4684396216905334,
"grad_norm": 1.3547513484954834,
"learning_rate": 9.471462835158226e-06,
"loss": 0.0195,
"step": 7900
},
{
"epoch": 0.47140442942275196,
"grad_norm": 2.362305164337158,
"learning_rate": 9.46442515187659e-06,
"loss": 0.0129,
"step": 7950
},
{
"epoch": 0.4743692371549705,
"grad_norm": 0.3598177433013916,
"learning_rate": 9.457343573480824e-06,
"loss": 0.0162,
"step": 8000
},
{
"epoch": 0.47733404488718906,
"grad_norm": 2.5027735233306885,
"learning_rate": 9.450218169598411e-06,
"loss": 0.0153,
"step": 8050
},
{
"epoch": 0.48029885261940763,
"grad_norm": 1.3616218566894531,
"learning_rate": 9.443049010287737e-06,
"loss": 0.0137,
"step": 8100
},
{
"epoch": 0.4832636603516262,
"grad_norm": 2.2974565029144287,
"learning_rate": 9.4358361660374e-06,
"loss": 0.0163,
"step": 8150
},
{
"epoch": 0.4862284680838448,
"grad_norm": 0.3149053156375885,
"learning_rate": 9.428579707765515e-06,
"loss": 0.0105,
"step": 8200
},
{
"epoch": 0.4891932758160633,
"grad_norm": 0.2883080244064331,
"learning_rate": 9.421279706819021e-06,
"loss": 0.0174,
"step": 8250
},
{
"epoch": 0.4921580835482819,
"grad_norm": 1.0277905464172363,
"learning_rate": 9.413936234972977e-06,
"loss": 0.018,
"step": 8300
},
{
"epoch": 0.49512289128050047,
"grad_norm": 3.766566514968872,
"learning_rate": 9.406549364429854e-06,
"loss": 0.0135,
"step": 8350
},
{
"epoch": 0.49808769901271904,
"grad_norm": 0.7654131650924683,
"learning_rate": 9.39911916781883e-06,
"loss": 0.0159,
"step": 8400
},
{
"epoch": 0.5010525067449376,
"grad_norm": 2.5445258617401123,
"learning_rate": 9.39164571819507e-06,
"loss": 0.0165,
"step": 8450
},
{
"epoch": 0.5040173144771561,
"grad_norm": 0.7928656935691833,
"learning_rate": 9.384129089039017e-06,
"loss": 0.0152,
"step": 8500
},
{
"epoch": 0.5069821222093748,
"grad_norm": 2.195737600326538,
"learning_rate": 9.376569354255663e-06,
"loss": 0.0138,
"step": 8550
},
{
"epoch": 0.5099469299415933,
"grad_norm": 1.8981691598892212,
"learning_rate": 9.368966588173815e-06,
"loss": 0.0139,
"step": 8600
},
{
"epoch": 0.5129117376738118,
"grad_norm": 0.7925296425819397,
"learning_rate": 9.36132086554538e-06,
"loss": 0.016,
"step": 8650
},
{
"epoch": 0.5158765454060305,
"grad_norm": 1.9348927736282349,
"learning_rate": 9.353632261544623e-06,
"loss": 0.0127,
"step": 8700
},
{
"epoch": 0.518841353138249,
"grad_norm": 0.1095159500837326,
"learning_rate": 9.345900851767422e-06,
"loss": 0.0118,
"step": 8750
},
{
"epoch": 0.5218061608704675,
"grad_norm": 1.476868748664856,
"learning_rate": 9.338126712230533e-06,
"loss": 0.0168,
"step": 8800
},
{
"epoch": 0.5247709686026861,
"grad_norm": 1.4792319536209106,
"learning_rate": 9.330309919370837e-06,
"loss": 0.014,
"step": 8850
},
{
"epoch": 0.5277357763349046,
"grad_norm": 0.9512746930122375,
"learning_rate": 9.322450550044594e-06,
"loss": 0.0126,
"step": 8900
},
{
"epoch": 0.5307005840671233,
"grad_norm": 1.9876360893249512,
"learning_rate": 9.314548681526685e-06,
"loss": 0.015,
"step": 8950
},
{
"epoch": 0.5336653917993418,
"grad_norm": 3.569706916809082,
"learning_rate": 9.306604391509847e-06,
"loss": 0.0164,
"step": 9000
},
{
"epoch": 0.5366301995315603,
"grad_norm": 1.3703384399414062,
"learning_rate": 9.298617758103918e-06,
"loss": 0.0146,
"step": 9050
},
{
"epoch": 0.539595007263779,
"grad_norm": 1.3747698068618774,
"learning_rate": 9.290588859835062e-06,
"loss": 0.012,
"step": 9100
},
{
"epoch": 0.5425598149959975,
"grad_norm": 2.5859375,
"learning_rate": 9.282517775645e-06,
"loss": 0.0191,
"step": 9150
},
{
"epoch": 0.5455246227282161,
"grad_norm": 0.323015958070755,
"learning_rate": 9.274404584890233e-06,
"loss": 0.0153,
"step": 9200
},
{
"epoch": 0.5484894304604346,
"grad_norm": 1.5014373064041138,
"learning_rate": 9.266249367341262e-06,
"loss": 0.0115,
"step": 9250
},
{
"epoch": 0.5514542381926532,
"grad_norm": 2.329052448272705,
"learning_rate": 9.258052203181809e-06,
"loss": 0.0137,
"step": 9300
},
{
"epoch": 0.5544190459248718,
"grad_norm": 2.8387389183044434,
"learning_rate": 9.249813173008014e-06,
"loss": 0.0179,
"step": 9350
},
{
"epoch": 0.5573838536570903,
"grad_norm": 1.3269604444503784,
"learning_rate": 9.241532357827658e-06,
"loss": 0.0119,
"step": 9400
},
{
"epoch": 0.5603486613893089,
"grad_norm": Infinity,
"learning_rate": 9.23320983905936e-06,
"loss": 0.0147,
"step": 9450
},
{
"epoch": 0.5633134691215275,
"grad_norm": 1.0867879390716553,
"learning_rate": 9.225013388704327e-06,
"loss": 0.0129,
"step": 9500
},
{
"epoch": 0.566278276853746,
"grad_norm": 0.15310339629650116,
"learning_rate": 9.2166085386372e-06,
"loss": 0.0142,
"step": 9550
},
{
"epoch": 0.5692430845859646,
"grad_norm": 1.267125129699707,
"learning_rate": 9.208162230038069e-06,
"loss": 0.0132,
"step": 9600
},
{
"epoch": 0.5722078923181831,
"grad_norm": 7.623719692230225,
"learning_rate": 9.199674545952714e-06,
"loss": 0.0113,
"step": 9650
},
{
"epoch": 0.5751727000504018,
"grad_norm": 2.1407358646392822,
"learning_rate": 9.19114556983373e-06,
"loss": 0.0137,
"step": 9700
},
{
"epoch": 0.5781375077826203,
"grad_norm": 1.6153864860534668,
"learning_rate": 9.182575385539705e-06,
"loss": 0.012,
"step": 9750
},
{
"epoch": 0.5811023155148388,
"grad_norm": 0.7798183560371399,
"learning_rate": 9.17396407733439e-06,
"loss": 0.0117,
"step": 9800
},
{
"epoch": 0.5840671232470575,
"grad_norm": 1.1946767568588257,
"learning_rate": 9.165311729885875e-06,
"loss": 0.0129,
"step": 9850
},
{
"epoch": 0.587031930979276,
"grad_norm": 1.4665184020996094,
"learning_rate": 9.15661842826576e-06,
"loss": 0.0128,
"step": 9900
},
{
"epoch": 0.5899967387114946,
"grad_norm": 1.9920064210891724,
"learning_rate": 9.147884257948311e-06,
"loss": 0.0124,
"step": 9950
},
{
"epoch": 0.5929615464437131,
"grad_norm": 1.1829341650009155,
"learning_rate": 9.139109304809624e-06,
"loss": 0.0099,
"step": 10000
},
{
"epoch": 0.5929615464437131,
"eval_loss": 0.10716072469949722,
"eval_runtime": 1812.2336,
"eval_samples_per_second": 3.231,
"eval_steps_per_second": 0.808,
"eval_wer": 12.313427381039398,
"step": 10000
},
{
"epoch": 0.5959263541759316,
"grad_norm": 0.11610784381628036,
"learning_rate": 9.130293655126778e-06,
"loss": 0.0108,
"step": 10050
},
{
"epoch": 0.5988911619081503,
"grad_norm": 2.0204625129699707,
"learning_rate": 9.121437395576994e-06,
"loss": 0.0141,
"step": 10100
},
{
"epoch": 0.6018559696403688,
"grad_norm": 1.855035662651062,
"learning_rate": 9.112540613236769e-06,
"loss": 0.0107,
"step": 10150
},
{
"epoch": 0.6048207773725874,
"grad_norm": 1.2485501766204834,
"learning_rate": 9.103603395581036e-06,
"loss": 0.0131,
"step": 10200
},
{
"epoch": 0.607785585104806,
"grad_norm": 2.173248052597046,
"learning_rate": 9.094625830482292e-06,
"loss": 0.0132,
"step": 10250
},
{
"epoch": 0.6107503928370245,
"grad_norm": 2.0845844745635986,
"learning_rate": 9.08560800620974e-06,
"loss": 0.0162,
"step": 10300
},
{
"epoch": 0.6137152005692431,
"grad_norm": 0.12550178170204163,
"learning_rate": 9.076550011428415e-06,
"loss": 0.0103,
"step": 10350
},
{
"epoch": 0.6166800083014616,
"grad_norm": 1.7097872495651245,
"learning_rate": 9.067451935198322e-06,
"loss": 0.0118,
"step": 10400
},
{
"epoch": 0.6196448160336803,
"grad_norm": 1.811226725578308,
"learning_rate": 9.05831386697355e-06,
"loss": 0.0103,
"step": 10450
},
{
"epoch": 0.6226096237658988,
"grad_norm": 2.809597969055176,
"learning_rate": 9.0491358966014e-06,
"loss": 0.0115,
"step": 10500
},
{
"epoch": 0.6255744314981173,
"grad_norm": 0.09170404076576233,
"learning_rate": 9.0399181143215e-06,
"loss": 0.0109,
"step": 10550
},
{
"epoch": 0.6285392392303359,
"grad_norm": 1.455367922782898,
"learning_rate": 9.030660610764913e-06,
"loss": 0.013,
"step": 10600
},
{
"epoch": 0.6315040469625545,
"grad_norm": 1.2813187837600708,
"learning_rate": 9.021363476953252e-06,
"loss": 0.0142,
"step": 10650
},
{
"epoch": 0.6344688546947731,
"grad_norm": 3.7294063568115234,
"learning_rate": 9.012026804297783e-06,
"loss": 0.0151,
"step": 10700
},
{
"epoch": 0.6374336624269916,
"grad_norm": 1.4082695245742798,
"learning_rate": 9.002650684598519e-06,
"loss": 0.0114,
"step": 10750
},
{
"epoch": 0.6403984701592101,
"grad_norm": 0.6885934472084045,
"learning_rate": 8.993235210043339e-06,
"loss": 0.0132,
"step": 10800
},
{
"epoch": 0.6433632778914288,
"grad_norm": 1.1015677452087402,
"learning_rate": 8.983780473207049e-06,
"loss": 0.0095,
"step": 10850
},
{
"epoch": 0.6463280856236473,
"grad_norm": 2.8200488090515137,
"learning_rate": 8.974286567050503e-06,
"loss": 0.0133,
"step": 10900
},
{
"epoch": 0.6492928933558658,
"grad_norm": 1.1393108367919922,
"learning_rate": 8.964753584919668e-06,
"loss": 0.0097,
"step": 10950
},
{
"epoch": 0.6522577010880845,
"grad_norm": 1.8517667055130005,
"learning_rate": 8.955181620544718e-06,
"loss": 0.0102,
"step": 11000
},
{
"epoch": 0.655222508820303,
"grad_norm": 1.8156200647354126,
"learning_rate": 8.945570768039104e-06,
"loss": 0.0114,
"step": 11050
},
{
"epoch": 0.6581873165525216,
"grad_norm": 0.035381220281124115,
"learning_rate": 8.935921121898639e-06,
"loss": 0.0108,
"step": 11100
},
{
"epoch": 0.6611521242847401,
"grad_norm": 12.99875545501709,
"learning_rate": 8.926232777000556e-06,
"loss": 0.0157,
"step": 11150
},
{
"epoch": 0.6641169320169586,
"grad_norm": 0.34489646553993225,
"learning_rate": 8.916505828602585e-06,
"loss": 0.014,
"step": 11200
},
{
"epoch": 0.6670817397491773,
"grad_norm": 1.3919397592544556,
"learning_rate": 8.906740372342016e-06,
"loss": 0.0124,
"step": 11250
},
{
"epoch": 0.6700465474813958,
"grad_norm": 2.139253854751587,
"learning_rate": 8.896936504234749e-06,
"loss": 0.0124,
"step": 11300
},
{
"epoch": 0.6730113552136144,
"grad_norm": 2.3928396701812744,
"learning_rate": 8.887094320674363e-06,
"loss": 0.0111,
"step": 11350
},
{
"epoch": 0.675976162945833,
"grad_norm": 0.7535815834999084,
"learning_rate": 8.87721391843116e-06,
"loss": 0.0115,
"step": 11400
},
{
"epoch": 0.6789409706780515,
"grad_norm": 1.517271637916565,
"learning_rate": 8.867295394651216e-06,
"loss": 0.0119,
"step": 11450
},
{
"epoch": 0.6819057784102701,
"grad_norm": 1.9762825965881348,
"learning_rate": 8.85753834981413e-06,
"loss": 0.0132,
"step": 11500
},
{
"epoch": 0.6848705861424886,
"grad_norm": 0.9906989932060242,
"learning_rate": 8.847544633457886e-06,
"loss": 0.0118,
"step": 11550
},
{
"epoch": 0.6878353938747073,
"grad_norm": 0.677872896194458,
"learning_rate": 8.837513087279198e-06,
"loss": 0.0128,
"step": 11600
},
{
"epoch": 0.6908002016069258,
"grad_norm": 0.6202975511550903,
"learning_rate": 8.827443809910221e-06,
"loss": 0.012,
"step": 11650
},
{
"epoch": 0.6937650093391443,
"grad_norm": 0.8511649370193481,
"learning_rate": 8.817336900354088e-06,
"loss": 0.0116,
"step": 11700
},
{
"epoch": 0.696729817071363,
"grad_norm": 1.111178994178772,
"learning_rate": 8.807192457983939e-06,
"loss": 0.0119,
"step": 11750
},
{
"epoch": 0.6996946248035815,
"grad_norm": 2.57309889793396,
"learning_rate": 8.797010582541944e-06,
"loss": 0.0108,
"step": 11800
},
{
"epoch": 0.7026594325358001,
"grad_norm": 3.5267422199249268,
"learning_rate": 8.786791374138324e-06,
"loss": 0.0148,
"step": 11850
},
{
"epoch": 0.7056242402680186,
"grad_norm": 0.18291831016540527,
"learning_rate": 8.776534933250364e-06,
"loss": 0.0119,
"step": 11900
},
{
"epoch": 0.7085890480002371,
"grad_norm": 1.7078309059143066,
"learning_rate": 8.766241360721427e-06,
"loss": 0.0095,
"step": 11950
},
{
"epoch": 0.7115538557324558,
"grad_norm": 1.5815846920013428,
"learning_rate": 8.755910757759958e-06,
"loss": 0.0109,
"step": 12000
},
{
"epoch": 0.7145186634646743,
"grad_norm": 0.38515791296958923,
"learning_rate": 8.745543225938502e-06,
"loss": 0.0095,
"step": 12050
},
{
"epoch": 0.7174834711968929,
"grad_norm": 0.8359034061431885,
"learning_rate": 8.735138867192683e-06,
"loss": 0.011,
"step": 12100
},
{
"epoch": 0.7204482789291115,
"grad_norm": 1.3351069688796997,
"learning_rate": 8.724697783820223e-06,
"loss": 0.0115,
"step": 12150
},
{
"epoch": 0.72341308666133,
"grad_norm": 1.6904191970825195,
"learning_rate": 8.714220078479929e-06,
"loss": 0.012,
"step": 12200
},
{
"epoch": 0.7263778943935486,
"grad_norm": 1.6055872440338135,
"learning_rate": 8.703705854190675e-06,
"loss": 0.0125,
"step": 12250
},
{
"epoch": 0.7293427021257671,
"grad_norm": 2.7653372287750244,
"learning_rate": 8.693155214330405e-06,
"loss": 0.0117,
"step": 12300
},
{
"epoch": 0.7323075098579858,
"grad_norm": 3.0810275077819824,
"learning_rate": 8.682568262635101e-06,
"loss": 0.0119,
"step": 12350
},
{
"epoch": 0.7352723175902043,
"grad_norm": 1.5143022537231445,
"learning_rate": 8.671945103197772e-06,
"loss": 0.0103,
"step": 12400
},
{
"epoch": 0.7382371253224228,
"grad_norm": 1.342732548713684,
"learning_rate": 8.661285840467434e-06,
"loss": 0.0107,
"step": 12450
},
{
"epoch": 0.7412019330546414,
"grad_norm": 1.0729478597640991,
"learning_rate": 8.650590579248071e-06,
"loss": 0.0083,
"step": 12500
},
{
"epoch": 0.7412019330546414,
"eval_loss": 0.10575078427791595,
"eval_runtime": 3755.3699,
"eval_samples_per_second": 1.559,
"eval_steps_per_second": 0.39,
"eval_wer": 13.099919596520722,
"step": 12500
},
{
"epoch": 0.74416674078686,
"grad_norm": 0.11206492781639099,
"learning_rate": 8.639859424697615e-06,
"loss": 0.0116,
"step": 12550
},
{
"epoch": 0.7471315485190786,
"grad_norm": 3.2454352378845215,
"learning_rate": 8.629092482326908e-06,
"loss": 0.0106,
"step": 12600
},
{
"epoch": 0.7500963562512971,
"grad_norm": 2.604566812515259,
"learning_rate": 8.618289857998661e-06,
"loss": 0.0104,
"step": 12650
},
{
"epoch": 0.7530611639835156,
"grad_norm": 0.28832483291625977,
"learning_rate": 8.607451657926424e-06,
"loss": 0.011,
"step": 12700
},
{
"epoch": 0.7560259717157343,
"grad_norm": 0.390303373336792,
"learning_rate": 8.59657798867353e-06,
"loss": 0.01,
"step": 12750
},
{
"epoch": 0.7589907794479528,
"grad_norm": 1.1761589050292969,
"learning_rate": 8.58566895715205e-06,
"loss": 0.0113,
"step": 12800
},
{
"epoch": 0.7619555871801713,
"grad_norm": 1.9691640138626099,
"learning_rate": 8.574724670621753e-06,
"loss": 0.0084,
"step": 12850
},
{
"epoch": 0.76492039491239,
"grad_norm": 0.49470266699790955,
"learning_rate": 8.563745236689031e-06,
"loss": 0.0085,
"step": 12900
},
{
"epoch": 0.7678852026446085,
"grad_norm": 0.1691243201494217,
"learning_rate": 8.552730763305862e-06,
"loss": 0.0096,
"step": 12950
},
{
"epoch": 0.7708500103768271,
"grad_norm": 2.035419225692749,
"learning_rate": 8.54168135876873e-06,
"loss": 0.0099,
"step": 13000
},
{
"epoch": 0.7738148181090456,
"grad_norm": 3.7284252643585205,
"learning_rate": 8.530597131717577e-06,
"loss": 0.0105,
"step": 13050
},
{
"epoch": 0.7767796258412641,
"grad_norm": 1.6268372535705566,
"learning_rate": 8.51947819113472e-06,
"loss": 0.0106,
"step": 13100
},
{
"epoch": 0.7797444335734828,
"grad_norm": 3.1598916053771973,
"learning_rate": 8.508324646343791e-06,
"loss": 0.0111,
"step": 13150
},
{
"epoch": 0.7827092413057013,
"grad_norm": 0.1609897017478943,
"learning_rate": 8.497136607008656e-06,
"loss": 0.0122,
"step": 13200
},
{
"epoch": 0.7856740490379199,
"grad_norm": 0.5666967034339905,
"learning_rate": 8.485914183132338e-06,
"loss": 0.0085,
"step": 13250
},
{
"epoch": 0.7886388567701385,
"grad_norm": 1.9069159030914307,
"learning_rate": 8.474657485055936e-06,
"loss": 0.0106,
"step": 13300
},
{
"epoch": 0.791603664502357,
"grad_norm": 1.5358110666275024,
"learning_rate": 8.463366623457538e-06,
"loss": 0.0081,
"step": 13350
},
{
"epoch": 0.7945684722345756,
"grad_norm": 1.486150860786438,
"learning_rate": 8.45204170935114e-06,
"loss": 0.0083,
"step": 13400
},
{
"epoch": 0.7975332799667941,
"grad_norm": 2.6618776321411133,
"learning_rate": 8.440682854085543e-06,
"loss": 0.0112,
"step": 13450
},
{
"epoch": 0.8004980876990128,
"grad_norm": 1.1893341541290283,
"learning_rate": 8.429290169343264e-06,
"loss": 0.012,
"step": 13500
},
{
"epoch": 0.8034628954312313,
"grad_norm": 2.5905745029449463,
"learning_rate": 8.417863767139444e-06,
"loss": 0.0093,
"step": 13550
},
{
"epoch": 0.8064277031634498,
"grad_norm": 1.5279064178466797,
"learning_rate": 8.406403759820734e-06,
"loss": 0.0109,
"step": 13600
},
{
"epoch": 0.8093925108956684,
"grad_norm": 1.5376503467559814,
"learning_rate": 8.394910260064203e-06,
"loss": 0.0098,
"step": 13650
},
{
"epoch": 0.812357318627887,
"grad_norm": 0.1439572125673294,
"learning_rate": 8.38338338087622e-06,
"loss": 0.0077,
"step": 13700
},
{
"epoch": 0.8153221263601056,
"grad_norm": 1.7664012908935547,
"learning_rate": 8.371823235591352e-06,
"loss": 0.0101,
"step": 13750
},
{
"epoch": 0.8182869340923241,
"grad_norm": 0.2659100592136383,
"learning_rate": 8.360229937871237e-06,
"loss": 0.0093,
"step": 13800
},
{
"epoch": 0.8212517418245426,
"grad_norm": 0.21515263617038727,
"learning_rate": 8.348603601703483e-06,
"loss": 0.0069,
"step": 13850
},
{
"epoch": 0.8242165495567613,
"grad_norm": 1.8138411045074463,
"learning_rate": 8.33717784852218e-06,
"loss": 0.011,
"step": 13900
},
{
"epoch": 0.8271813572889798,
"grad_norm": 1.0726341009140015,
"learning_rate": 8.325486433784653e-06,
"loss": 0.0105,
"step": 13950
},
{
"epoch": 0.8301461650211984,
"grad_norm": 3.987571954727173,
"learning_rate": 8.313762322204512e-06,
"loss": 0.0111,
"step": 14000
},
{
"epoch": 0.833110972753417,
"grad_norm": 0.5988409519195557,
"learning_rate": 8.302005629055549e-06,
"loss": 0.0123,
"step": 14050
},
{
"epoch": 0.8360757804856355,
"grad_norm": 1.5194250345230103,
"learning_rate": 8.290216469931907e-06,
"loss": 0.0105,
"step": 14100
},
{
"epoch": 0.8390405882178541,
"grad_norm": 1.19874107837677,
"learning_rate": 8.278394960746936e-06,
"loss": 0.0091,
"step": 14150
},
{
"epoch": 0.8420053959500726,
"grad_norm": 1.3419668674468994,
"learning_rate": 8.26654121773206e-06,
"loss": 0.011,
"step": 14200
},
{
"epoch": 0.8449702036822913,
"grad_norm": 2.4264957904815674,
"learning_rate": 8.254655357435635e-06,
"loss": 0.0099,
"step": 14250
},
{
"epoch": 0.8479350114145098,
"grad_norm": 0.842897355556488,
"learning_rate": 8.242737496721797e-06,
"loss": 0.0083,
"step": 14300
},
{
"epoch": 0.8508998191467283,
"grad_norm": 0.6375104784965515,
"learning_rate": 8.230787752769317e-06,
"loss": 0.0084,
"step": 14350
},
{
"epoch": 0.8538646268789469,
"grad_norm": 0.03742964193224907,
"learning_rate": 8.218806243070451e-06,
"loss": 0.0074,
"step": 14400
},
{
"epoch": 0.8568294346111655,
"grad_norm": 0.6775150299072266,
"learning_rate": 8.206793085429781e-06,
"loss": 0.0096,
"step": 14450
},
{
"epoch": 0.8597942423433841,
"grad_norm": 1.218371868133545,
"learning_rate": 8.194748397963053e-06,
"loss": 0.0106,
"step": 14500
},
{
"epoch": 0.8627590500756026,
"grad_norm": 0.3041909635066986,
"learning_rate": 8.182672299096028e-06,
"loss": 0.01,
"step": 14550
},
{
"epoch": 0.8657238578078211,
"grad_norm": 0.5829514861106873,
"learning_rate": 8.170564907563308e-06,
"loss": 0.0086,
"step": 14600
},
{
"epoch": 0.8686886655400398,
"grad_norm": 1.9637973308563232,
"learning_rate": 8.158426342407164e-06,
"loss": 0.0104,
"step": 14650
},
{
"epoch": 0.8716534732722583,
"grad_norm": 3.4624621868133545,
"learning_rate": 8.146256722976383e-06,
"loss": 0.0077,
"step": 14700
},
{
"epoch": 0.8746182810044768,
"grad_norm": 0.18807068467140198,
"learning_rate": 8.134056168925077e-06,
"loss": 0.0058,
"step": 14750
},
{
"epoch": 0.8775830887366954,
"grad_norm": 2.7400877475738525,
"learning_rate": 8.121824800211514e-06,
"loss": 0.0105,
"step": 14800
},
{
"epoch": 0.880547896468914,
"grad_norm": 0.5604029893875122,
"learning_rate": 8.109562737096938e-06,
"loss": 0.0104,
"step": 14850
},
{
"epoch": 0.8835127042011326,
"grad_norm": 0.2727389335632324,
"learning_rate": 8.09727010014439e-06,
"loss": 0.0094,
"step": 14900
},
{
"epoch": 0.8864775119333511,
"grad_norm": 0.35746484994888306,
"learning_rate": 8.084947010217514e-06,
"loss": 0.0085,
"step": 14950
},
{
"epoch": 0.8894423196655696,
"grad_norm": 0.06409318745136261,
"learning_rate": 8.072593588479378e-06,
"loss": 0.0094,
"step": 15000
},
{
"epoch": 0.8894423196655696,
"eval_loss": 0.09929487109184265,
"eval_runtime": 4386.1659,
"eval_samples_per_second": 1.335,
"eval_steps_per_second": 0.334,
"eval_wer": 24.461662159198887,
"step": 15000
},
{
"epoch": 0.8924071273977883,
"grad_norm": 0.6662552952766418,
"learning_rate": 8.060209956391271e-06,
"loss": 0.0095,
"step": 15050
},
{
"epoch": 0.8953719351300068,
"grad_norm": 3.1687803268432617,
"learning_rate": 8.047796235711527e-06,
"loss": 0.0101,
"step": 15100
},
{
"epoch": 0.8983367428622254,
"grad_norm": 0.07528534531593323,
"learning_rate": 8.035352548494311e-06,
"loss": 0.0096,
"step": 15150
},
{
"epoch": 0.901301550594444,
"grad_norm": 0.8370321989059448,
"learning_rate": 8.022879017088422e-06,
"loss": 0.0113,
"step": 15200
},
{
"epoch": 0.9042663583266625,
"grad_norm": 0.9781240224838257,
"learning_rate": 8.010375764136098e-06,
"loss": 0.0114,
"step": 15250
},
{
"epoch": 0.9072311660588811,
"grad_norm": 0.2995459735393524,
"learning_rate": 7.997842912571805e-06,
"loss": 0.009,
"step": 15300
},
{
"epoch": 0.9101959737910996,
"grad_norm": 1.0502598285675049,
"learning_rate": 7.985280585621024e-06,
"loss": 0.0089,
"step": 15350
},
{
"epoch": 0.9131607815233183,
"grad_norm": 2.38645339012146,
"learning_rate": 7.97268890679905e-06,
"loss": 0.0128,
"step": 15400
},
{
"epoch": 0.9161255892555368,
"grad_norm": 0.21894457936286926,
"learning_rate": 7.960067999909767e-06,
"loss": 0.0088,
"step": 15450
},
{
"epoch": 0.9190903969877553,
"grad_norm": 0.12525251507759094,
"learning_rate": 7.947417989044434e-06,
"loss": 0.0104,
"step": 15500
},
{
"epoch": 0.9220552047199739,
"grad_norm": 0.13473589718341827,
"learning_rate": 7.934738998580473e-06,
"loss": 0.0091,
"step": 15550
},
{
"epoch": 0.9250200124521925,
"grad_norm": 0.14185883104801178,
"learning_rate": 7.92203115318023e-06,
"loss": 0.0092,
"step": 15600
},
{
"epoch": 0.9279848201844111,
"grad_norm": 0.34865042567253113,
"learning_rate": 7.909294577789765e-06,
"loss": 0.0074,
"step": 15650
},
{
"epoch": 0.9309496279166296,
"grad_norm": 0.6784268021583557,
"learning_rate": 7.896529397637615e-06,
"loss": 0.0073,
"step": 15700
},
{
"epoch": 0.9339144356488481,
"grad_norm": 0.7907705903053284,
"learning_rate": 7.883735738233565e-06,
"loss": 0.0084,
"step": 15750
},
{
"epoch": 0.9368792433810668,
"grad_norm": 2.074660301208496,
"learning_rate": 7.870913725367413e-06,
"loss": 0.0113,
"step": 15800
},
{
"epoch": 0.9398440511132853,
"grad_norm": 0.11929433792829514,
"learning_rate": 7.858063485107736e-06,
"loss": 0.007,
"step": 15850
},
{
"epoch": 0.9428088588455039,
"grad_norm": 1.5103803873062134,
"learning_rate": 7.845185143800644e-06,
"loss": 0.0081,
"step": 15900
},
{
"epoch": 0.9457736665777224,
"grad_norm": 2.054821491241455,
"learning_rate": 7.832278828068546e-06,
"loss": 0.0109,
"step": 15950
},
{
"epoch": 0.948738474309941,
"grad_norm": 0.14713267982006073,
"learning_rate": 7.8193446648089e-06,
"loss": 0.0091,
"step": 16000
},
{
"epoch": 0.9517032820421596,
"grad_norm": 0.16898584365844727,
"learning_rate": 7.806382781192962e-06,
"loss": 0.0081,
"step": 16050
},
{
"epoch": 0.9546680897743781,
"grad_norm": 1.8700170516967773,
"learning_rate": 7.793393304664549e-06,
"loss": 0.0103,
"step": 16100
},
{
"epoch": 0.9576328975065967,
"grad_norm": 0.3140694499015808,
"learning_rate": 7.780376362938773e-06,
"loss": 0.0114,
"step": 16150
},
{
"epoch": 0.9605977052388153,
"grad_norm": 2.2821805477142334,
"learning_rate": 7.767332084000784e-06,
"loss": 0.01,
"step": 16200
},
{
"epoch": 0.9635625129710338,
"grad_norm": 0.20296697318553925,
"learning_rate": 7.754522291679406e-06,
"loss": 0.0089,
"step": 16250
},
{
"epoch": 0.9665273207032524,
"grad_norm": 0.31332024931907654,
"learning_rate": 7.741424263693839e-06,
"loss": 0.0092,
"step": 16300
},
{
"epoch": 0.969492128435471,
"grad_norm": 0.6727187633514404,
"learning_rate": 7.728299281480833e-06,
"loss": 0.0102,
"step": 16350
},
{
"epoch": 0.9724569361676896,
"grad_norm": 1.2364569902420044,
"learning_rate": 7.715147474087817e-06,
"loss": 0.0088,
"step": 16400
},
{
"epoch": 0.9754217438999081,
"grad_norm": 0.06689300388097763,
"learning_rate": 7.701968970825973e-06,
"loss": 0.0102,
"step": 16450
},
{
"epoch": 0.9783865516321266,
"grad_norm": 2.35491681098938,
"learning_rate": 7.688763901268955e-06,
"loss": 0.0078,
"step": 16500
},
{
"epoch": 0.9813513593643453,
"grad_norm": 1.78278386592865,
"learning_rate": 7.675532395251633e-06,
"loss": 0.0069,
"step": 16550
},
{
"epoch": 0.9843161670965638,
"grad_norm": 0.45280998945236206,
"learning_rate": 7.6622745828688e-06,
"loss": 0.0081,
"step": 16600
},
{
"epoch": 0.9872809748287823,
"grad_norm": 1.32427978515625,
"learning_rate": 7.648990594473897e-06,
"loss": 0.0077,
"step": 16650
},
{
"epoch": 0.9902457825610009,
"grad_norm": 2.651205062866211,
"learning_rate": 7.635680560677734e-06,
"loss": 0.0094,
"step": 16700
},
{
"epoch": 0.9932105902932195,
"grad_norm": 1.2493482828140259,
"learning_rate": 7.622344612347206e-06,
"loss": 0.0139,
"step": 16750
},
{
"epoch": 0.9961753980254381,
"grad_norm": 0.7914796471595764,
"learning_rate": 7.6089828806040015e-06,
"loss": 0.0086,
"step": 16800
},
{
"epoch": 0.9991402057576566,
"grad_norm": 2.1952311992645264,
"learning_rate": 7.595595496823321e-06,
"loss": 0.011,
"step": 16850
},
{
"epoch": 1.002075365412553,
"grad_norm": 0.2566259801387787,
"learning_rate": 7.582182592632577e-06,
"loss": 0.0042,
"step": 16900
},
{
"epoch": 1.0050401731447716,
"grad_norm": 0.19997215270996094,
"learning_rate": 7.568744299910109e-06,
"loss": 0.0058,
"step": 16950
},
{
"epoch": 1.0080049808769902,
"grad_norm": 0.04884817823767662,
"learning_rate": 7.555280750783876e-06,
"loss": 0.005,
"step": 17000
},
{
"epoch": 1.0109697886092086,
"grad_norm": 0.07050412893295288,
"learning_rate": 7.541792077630163e-06,
"loss": 0.005,
"step": 17050
},
{
"epoch": 1.0139345963414272,
"grad_norm": 0.8894023299217224,
"learning_rate": 7.528278413072285e-06,
"loss": 0.0049,
"step": 17100
},
{
"epoch": 1.0168994040736459,
"grad_norm": 0.611845076084137,
"learning_rate": 7.514739889979272e-06,
"loss": 0.0052,
"step": 17150
},
{
"epoch": 1.0198642118058643,
"grad_norm": 0.12328474968671799,
"learning_rate": 7.501176641464569e-06,
"loss": 0.004,
"step": 17200
},
{
"epoch": 1.022829019538083,
"grad_norm": 1.4184530973434448,
"learning_rate": 7.4875888008847294e-06,
"loss": 0.0029,
"step": 17250
},
{
"epoch": 1.0257938272703016,
"grad_norm": 0.09332071244716644,
"learning_rate": 7.4739765018380986e-06,
"loss": 0.0035,
"step": 17300
},
{
"epoch": 1.0287586350025202,
"grad_norm": 0.786600649356842,
"learning_rate": 7.460339878163501e-06,
"loss": 0.0041,
"step": 17350
},
{
"epoch": 1.0317234427347386,
"grad_norm": 0.8689146041870117,
"learning_rate": 7.446679063938926e-06,
"loss": 0.0029,
"step": 17400
},
{
"epoch": 1.0346882504669572,
"grad_norm": 2.0220930576324463,
"learning_rate": 7.432994193480214e-06,
"loss": 0.0044,
"step": 17450
},
{
"epoch": 1.0376530581991759,
"grad_norm": 0.15765543282032013,
"learning_rate": 7.419285401339723e-06,
"loss": 0.0057,
"step": 17500
},
{
"epoch": 1.0376530581991759,
"eval_loss": 0.10125549137592316,
"eval_runtime": 4053.4099,
"eval_samples_per_second": 1.444,
"eval_steps_per_second": 0.361,
"eval_wer": 17.37738469410131,
"step": 17500
},
{
"epoch": 1.0406178659313943,
"grad_norm": 1.3988970518112183,
"learning_rate": 7.40555282230502e-06,
"loss": 0.0041,
"step": 17550
},
{
"epoch": 1.043582673663613,
"grad_norm": 0.5283538103103638,
"learning_rate": 7.391796591397548e-06,
"loss": 0.005,
"step": 17600
},
{
"epoch": 1.0465474813958315,
"grad_norm": 0.8953123688697815,
"learning_rate": 7.378016843871301e-06,
"loss": 0.0036,
"step": 17650
},
{
"epoch": 1.04951228912805,
"grad_norm": 0.2542392313480377,
"learning_rate": 7.364490006042365e-06,
"loss": 0.0058,
"step": 17700
},
{
"epoch": 1.0524770968602686,
"grad_norm": 0.13877496123313904,
"learning_rate": 7.350664095540969e-06,
"loss": 0.0047,
"step": 17750
},
{
"epoch": 1.0554419045924872,
"grad_norm": 0.668488621711731,
"learning_rate": 7.3368150728436595e-06,
"loss": 0.0041,
"step": 17800
},
{
"epoch": 1.0584067123247058,
"grad_norm": 1.1569958925247192,
"learning_rate": 7.322943074116774e-06,
"loss": 0.0045,
"step": 17850
},
{
"epoch": 1.0613715200569243,
"grad_norm": 0.23624324798583984,
"learning_rate": 7.3090482357525595e-06,
"loss": 0.0047,
"step": 17900
},
{
"epoch": 1.0643363277891429,
"grad_norm": 0.19879819452762604,
"learning_rate": 7.295130694367823e-06,
"loss": 0.0046,
"step": 17950
},
{
"epoch": 1.0673011355213615,
"grad_norm": 0.0767776146531105,
"learning_rate": 7.2811905868025936e-06,
"loss": 0.004,
"step": 18000
},
{
"epoch": 1.07026594325358,
"grad_norm": 0.13302555680274963,
"learning_rate": 7.267228050118776e-06,
"loss": 0.0032,
"step": 18050
},
{
"epoch": 1.0732307509857986,
"grad_norm": 0.2847168743610382,
"learning_rate": 7.253243221598803e-06,
"loss": 0.0044,
"step": 18100
},
{
"epoch": 1.0761955587180172,
"grad_norm": 0.3598785698413849,
"learning_rate": 7.2392362387442815e-06,
"loss": 0.0041,
"step": 18150
},
{
"epoch": 1.0791603664502356,
"grad_norm": 0.021318763494491577,
"learning_rate": 7.22520723927465e-06,
"loss": 0.0038,
"step": 18200
},
{
"epoch": 1.0821251741824542,
"grad_norm": 0.11015797406435013,
"learning_rate": 7.211156361125812e-06,
"loss": 0.0038,
"step": 18250
},
{
"epoch": 1.0850899819146729,
"grad_norm": 1.6273869276046753,
"learning_rate": 7.197083742448792e-06,
"loss": 0.0026,
"step": 18300
},
{
"epoch": 1.0880547896468915,
"grad_norm": 0.035194288939237595,
"learning_rate": 7.182989521608371e-06,
"loss": 0.003,
"step": 18350
},
{
"epoch": 1.09101959737911,
"grad_norm": 0.05955716222524643,
"learning_rate": 7.168873837181725e-06,
"loss": 0.0033,
"step": 18400
},
{
"epoch": 1.0939844051113286,
"grad_norm": 0.02106299065053463,
"learning_rate": 7.154736827957063e-06,
"loss": 0.0032,
"step": 18450
},
{
"epoch": 1.0969492128435472,
"grad_norm": 0.046555496752262115,
"learning_rate": 7.140578632932267e-06,
"loss": 0.0065,
"step": 18500
},
{
"epoch": 1.0999140205757656,
"grad_norm": 1.2249200344085693,
"learning_rate": 7.126399391313519e-06,
"loss": 0.0035,
"step": 18550
},
{
"epoch": 1.1028788283079842,
"grad_norm": 0.06700051575899124,
"learning_rate": 7.112199242513939e-06,
"loss": 0.0034,
"step": 18600
},
{
"epoch": 1.1058436360402029,
"grad_norm": 0.5902153253555298,
"learning_rate": 7.097978326152205e-06,
"loss": 0.0031,
"step": 18650
},
{
"epoch": 1.1088084437724213,
"grad_norm": 0.08541566133499146,
"learning_rate": 7.0837367820511916e-06,
"loss": 0.0038,
"step": 18700
},
{
"epoch": 1.11177325150464,
"grad_norm": 0.33190247416496277,
"learning_rate": 7.069474750236586e-06,
"loss": 0.0043,
"step": 18750
},
{
"epoch": 1.1147380592368585,
"grad_norm": 0.038232989609241486,
"learning_rate": 7.055192370935517e-06,
"loss": 0.004,
"step": 18800
},
{
"epoch": 1.117702866969077,
"grad_norm": 0.06271852552890778,
"learning_rate": 7.040889784575171e-06,
"loss": 0.0025,
"step": 18850
},
{
"epoch": 1.1206676747012956,
"grad_norm": 0.044638846069574356,
"learning_rate": 7.026567131781414e-06,
"loss": 0.0033,
"step": 18900
},
{
"epoch": 1.1236324824335142,
"grad_norm": 2.2346765995025635,
"learning_rate": 7.012224553377413e-06,
"loss": 0.0044,
"step": 18950
},
{
"epoch": 1.1265972901657328,
"grad_norm": 1.7368957996368408,
"learning_rate": 6.9978621903822454e-06,
"loss": 0.0038,
"step": 19000
},
{
"epoch": 1.1295620978979513,
"grad_norm": 0.01738637126982212,
"learning_rate": 6.983480184009515e-06,
"loss": 0.0019,
"step": 19050
},
{
"epoch": 1.1325269056301699,
"grad_norm": 1.5712268352508545,
"learning_rate": 6.969078675665962e-06,
"loss": 0.0031,
"step": 19100
},
{
"epoch": 1.1354917133623885,
"grad_norm": 3.365665912628174,
"learning_rate": 6.954657806950074e-06,
"loss": 0.0045,
"step": 19150
},
{
"epoch": 1.138456521094607,
"grad_norm": 2.0195226669311523,
"learning_rate": 6.940217719650695e-06,
"loss": 0.0044,
"step": 19200
},
{
"epoch": 1.1414213288268256,
"grad_norm": 3.0657689571380615,
"learning_rate": 6.925758555745628e-06,
"loss": 0.0042,
"step": 19250
},
{
"epoch": 1.1443861365590442,
"grad_norm": 0.5422585010528564,
"learning_rate": 6.911280457400244e-06,
"loss": 0.0031,
"step": 19300
},
{
"epoch": 1.1473509442912628,
"grad_norm": 0.1482527256011963,
"learning_rate": 6.896783566966079e-06,
"loss": 0.0037,
"step": 19350
},
{
"epoch": 1.1503157520234812,
"grad_norm": 1.2458192110061646,
"learning_rate": 6.882268026979436e-06,
"loss": 0.0031,
"step": 19400
},
{
"epoch": 1.1532805597556999,
"grad_norm": 0.020457390695810318,
"learning_rate": 6.867733980159986e-06,
"loss": 0.0049,
"step": 19450
},
{
"epoch": 1.1562453674879185,
"grad_norm": 0.8948466181755066,
"learning_rate": 6.853181569409362e-06,
"loss": 0.0037,
"step": 19500
},
{
"epoch": 1.159210175220137,
"grad_norm": 0.7108380794525146,
"learning_rate": 6.8386109378097536e-06,
"loss": 0.0035,
"step": 19550
},
{
"epoch": 1.1621749829523556,
"grad_norm": 1.8224056959152222,
"learning_rate": 6.824022228622502e-06,
"loss": 0.0035,
"step": 19600
},
{
"epoch": 1.1651397906845742,
"grad_norm": 0.9493487477302551,
"learning_rate": 6.8094155852866915e-06,
"loss": 0.0028,
"step": 19650
},
{
"epoch": 1.1681045984167926,
"grad_norm": 0.969045877456665,
"learning_rate": 6.794791151417741e-06,
"loss": 0.0042,
"step": 19700
},
{
"epoch": 1.1710694061490112,
"grad_norm": 0.3356039524078369,
"learning_rate": 6.780149070805983e-06,
"loss": 0.0028,
"step": 19750
},
{
"epoch": 1.1740342138812299,
"grad_norm": 0.19961191713809967,
"learning_rate": 6.765489487415263e-06,
"loss": 0.0042,
"step": 19800
},
{
"epoch": 1.1769990216134483,
"grad_norm": 0.4981558322906494,
"learning_rate": 6.750812545381513e-06,
"loss": 0.0038,
"step": 19850
},
{
"epoch": 1.179963829345667,
"grad_norm": 3.4783573150634766,
"learning_rate": 6.73611838901134e-06,
"loss": 0.0041,
"step": 19900
},
{
"epoch": 1.1829286370778855,
"grad_norm": 0.049179501831531525,
"learning_rate": 6.721407162780605e-06,
"loss": 0.0035,
"step": 19950
},
{
"epoch": 1.185893444810104,
"grad_norm": 1.4858205318450928,
"learning_rate": 6.706679011333004e-06,
"loss": 0.0031,
"step": 20000
},
{
"epoch": 1.185893444810104,
"eval_loss": 0.1006452813744545,
"eval_runtime": 4425.7501,
"eval_samples_per_second": 1.323,
"eval_steps_per_second": 0.331,
"eval_wer": 24.584460200277757,
"step": 20000
},
{
"epoch": 1.1888582525423226,
"grad_norm": 1.8448115587234497,
"learning_rate": 6.691934079478648e-06,
"loss": 0.0041,
"step": 20050
},
{
"epoch": 1.1918230602745412,
"grad_norm": 1.0905754566192627,
"learning_rate": 6.67717251219263e-06,
"loss": 0.0047,
"step": 20100
},
{
"epoch": 1.1947878680067598,
"grad_norm": 0.17178182303905487,
"learning_rate": 6.662394454613611e-06,
"loss": 0.0049,
"step": 20150
},
{
"epoch": 1.1977526757389783,
"grad_norm": 1.1887060403823853,
"learning_rate": 6.647600052042384e-06,
"loss": 0.0041,
"step": 20200
},
{
"epoch": 1.200717483471197,
"grad_norm": 0.19539013504981995,
"learning_rate": 6.632789449940454e-06,
"loss": 0.0029,
"step": 20250
},
{
"epoch": 1.2036822912034155,
"grad_norm": 0.24206945300102234,
"learning_rate": 6.617962793928598e-06,
"loss": 0.0041,
"step": 20300
},
{
"epoch": 1.206647098935634,
"grad_norm": 0.26349180936813354,
"learning_rate": 6.603120229785443e-06,
"loss": 0.0045,
"step": 20350
},
{
"epoch": 1.2096119066678526,
"grad_norm": 0.38725215196609497,
"learning_rate": 6.588261903446022e-06,
"loss": 0.006,
"step": 20400
},
{
"epoch": 1.2125767144000712,
"grad_norm": 0.3836526870727539,
"learning_rate": 6.57338796100035e-06,
"loss": 0.0037,
"step": 20450
},
{
"epoch": 1.2155415221322898,
"grad_norm": 0.07236127555370331,
"learning_rate": 6.558498548691984e-06,
"loss": 0.0049,
"step": 20500
},
{
"epoch": 1.2185063298645082,
"grad_norm": 0.2256426066160202,
"learning_rate": 6.543593812916576e-06,
"loss": 0.0034,
"step": 20550
},
{
"epoch": 1.2214711375967269,
"grad_norm": 0.41789326071739197,
"learning_rate": 6.528673900220449e-06,
"loss": 0.0023,
"step": 20600
},
{
"epoch": 1.2244359453289455,
"grad_norm": 1.1350836753845215,
"learning_rate": 6.513738957299145e-06,
"loss": 0.0034,
"step": 20650
},
{
"epoch": 1.227400753061164,
"grad_norm": 1.7438832521438599,
"learning_rate": 6.4987891309959885e-06,
"loss": 0.0037,
"step": 20700
},
{
"epoch": 1.2303655607933826,
"grad_norm": 0.8658862113952637,
"learning_rate": 6.483824568300636e-06,
"loss": 0.0037,
"step": 20750
},
{
"epoch": 1.2333303685256012,
"grad_norm": 0.012403754517436028,
"learning_rate": 6.46884541634764e-06,
"loss": 0.0038,
"step": 20800
},
{
"epoch": 1.2362951762578196,
"grad_norm": 0.08823706954717636,
"learning_rate": 6.453851822414994e-06,
"loss": 0.0063,
"step": 20850
},
{
"epoch": 1.2392599839900382,
"grad_norm": 2.1406474113464355,
"learning_rate": 6.438843933922691e-06,
"loss": 0.0038,
"step": 20900
},
{
"epoch": 1.2422247917222569,
"grad_norm": 0.23070839047431946,
"learning_rate": 6.423821898431266e-06,
"loss": 0.0043,
"step": 20950
},
{
"epoch": 1.2451895994544753,
"grad_norm": 2.4584333896636963,
"learning_rate": 6.4087858636403565e-06,
"loss": 0.0028,
"step": 21000
},
{
"epoch": 1.248154407186694,
"grad_norm": 0.4119054675102234,
"learning_rate": 6.393735977387239e-06,
"loss": 0.0042,
"step": 21050
},
{
"epoch": 1.2511192149189125,
"grad_norm": 0.22328363358974457,
"learning_rate": 6.3786723876453835e-06,
"loss": 0.0023,
"step": 21100
},
{
"epoch": 1.254084022651131,
"grad_norm": 0.10614093393087387,
"learning_rate": 6.363595242522993e-06,
"loss": 0.0023,
"step": 21150
},
{
"epoch": 1.2570488303833496,
"grad_norm": 1.6743851900100708,
"learning_rate": 6.3485046902615495e-06,
"loss": 0.004,
"step": 21200
},
{
"epoch": 1.2600136381155682,
"grad_norm": 0.046557892113924026,
"learning_rate": 6.333400879234359e-06,
"loss": 0.0019,
"step": 21250
},
{
"epoch": 1.2629784458477868,
"grad_norm": 1.6077167987823486,
"learning_rate": 6.318283957945091e-06,
"loss": 0.0029,
"step": 21300
},
{
"epoch": 1.2659432535800053,
"grad_norm": 0.8150466680526733,
"learning_rate": 6.303154075026312e-06,
"loss": 0.0032,
"step": 21350
},
{
"epoch": 1.268908061312224,
"grad_norm": 1.1461350917816162,
"learning_rate": 6.288011379238037e-06,
"loss": 0.0044,
"step": 21400
},
{
"epoch": 1.2718728690444425,
"grad_norm": 0.021177906543016434,
"learning_rate": 6.272856019466254e-06,
"loss": 0.0046,
"step": 21450
},
{
"epoch": 1.2748376767766612,
"grad_norm": 0.09657502174377441,
"learning_rate": 6.257688144721469e-06,
"loss": 0.0024,
"step": 21500
},
{
"epoch": 1.2778024845088796,
"grad_norm": 0.7494125366210938,
"learning_rate": 6.242507904137237e-06,
"loss": 0.0049,
"step": 21550
},
{
"epoch": 1.2807672922410982,
"grad_norm": 1.2902694940567017,
"learning_rate": 6.227315446968697e-06,
"loss": 0.005,
"step": 21600
},
{
"epoch": 1.2837320999733168,
"grad_norm": 0.059971459209918976,
"learning_rate": 6.212110922591099e-06,
"loss": 0.0045,
"step": 21650
},
{
"epoch": 1.2866969077055352,
"grad_norm": 1.4867457151412964,
"learning_rate": 6.196894480498349e-06,
"loss": 0.0038,
"step": 21700
},
{
"epoch": 1.2896617154377539,
"grad_norm": 0.17376358807086945,
"learning_rate": 6.181666270301524e-06,
"loss": 0.0048,
"step": 21750
},
{
"epoch": 1.2926265231699725,
"grad_norm": 1.8240177631378174,
"learning_rate": 6.166426441727409e-06,
"loss": 0.0031,
"step": 21800
},
{
"epoch": 1.295591330902191,
"grad_norm": 0.4864703118801117,
"learning_rate": 6.151175144617023e-06,
"loss": 0.004,
"step": 21850
},
{
"epoch": 1.2985561386344096,
"grad_norm": 1.6870369911193848,
"learning_rate": 6.135912528924145e-06,
"loss": 0.0042,
"step": 21900
},
{
"epoch": 1.3015209463666282,
"grad_norm": 0.1494980752468109,
"learning_rate": 6.120638744713844e-06,
"loss": 0.0057,
"step": 21950
},
{
"epoch": 1.3044857540988466,
"grad_norm": 0.04229666665196419,
"learning_rate": 6.105353942160994e-06,
"loss": 0.0029,
"step": 22000
},
{
"epoch": 1.3074505618310652,
"grad_norm": 0.2506253123283386,
"learning_rate": 6.090058271548812e-06,
"loss": 0.0025,
"step": 22050
},
{
"epoch": 1.3104153695632839,
"grad_norm": 0.03190387040376663,
"learning_rate": 6.0747518832673625e-06,
"loss": 0.0039,
"step": 22100
},
{
"epoch": 1.3133801772955023,
"grad_norm": 0.17619547247886658,
"learning_rate": 6.059434927812096e-06,
"loss": 0.0029,
"step": 22150
},
{
"epoch": 1.316344985027721,
"grad_norm": 0.07416270673274994,
"learning_rate": 6.044107555782359e-06,
"loss": 0.0022,
"step": 22200
},
{
"epoch": 1.3193097927599395,
"grad_norm": 0.5905612111091614,
"learning_rate": 6.028769917879912e-06,
"loss": 0.0036,
"step": 22250
},
{
"epoch": 1.322274600492158,
"grad_norm": 0.16600991785526276,
"learning_rate": 6.013422164907456e-06,
"loss": 0.0026,
"step": 22300
},
{
"epoch": 1.3252394082243766,
"grad_norm": 0.02765166386961937,
"learning_rate": 5.998064447767145e-06,
"loss": 0.0048,
"step": 22350
},
{
"epoch": 1.3282042159565952,
"grad_norm": 0.03902462497353554,
"learning_rate": 5.982696917459102e-06,
"loss": 0.0036,
"step": 22400
},
{
"epoch": 1.3311690236888138,
"grad_norm": 0.9107586741447449,
"learning_rate": 5.967319725079935e-06,
"loss": 0.0046,
"step": 22450
},
{
"epoch": 1.3341338314210325,
"grad_norm": 0.13513422012329102,
"learning_rate": 5.951933021821251e-06,
"loss": 0.0026,
"step": 22500
},
{
"epoch": 1.3341338314210325,
"eval_loss": 0.1039728969335556,
"eval_runtime": 3797.1697,
"eval_samples_per_second": 1.542,
"eval_steps_per_second": 0.386,
"eval_wer": 13.602806812367518,
"step": 22500
},
{
"epoch": 1.337098639153251,
"grad_norm": 0.12307656556367874,
"learning_rate": 5.93653695896817e-06,
"loss": 0.0036,
"step": 22550
},
{
"epoch": 1.3400634468854695,
"grad_norm": 1.4716602563858032,
"learning_rate": 5.921131687897837e-06,
"loss": 0.004,
"step": 22600
},
{
"epoch": 1.3430282546176882,
"grad_norm": 0.6326406002044678,
"learning_rate": 5.905717360077936e-06,
"loss": 0.0025,
"step": 22650
},
{
"epoch": 1.3459930623499066,
"grad_norm": 0.0671234279870987,
"learning_rate": 5.890294127065196e-06,
"loss": 0.0046,
"step": 22700
},
{
"epoch": 1.3489578700821252,
"grad_norm": 0.08047901093959808,
"learning_rate": 5.8748621405039056e-06,
"loss": 0.0051,
"step": 22750
},
{
"epoch": 1.3519226778143438,
"grad_norm": 0.06256826221942902,
"learning_rate": 5.859730447208026e-06,
"loss": 0.0021,
"step": 22800
},
{
"epoch": 1.3548874855465622,
"grad_norm": 0.02325344830751419,
"learning_rate": 5.844281576337013e-06,
"loss": 0.0053,
"step": 22850
},
{
"epoch": 1.3578522932787809,
"grad_norm": 1.1424200534820557,
"learning_rate": 5.82882440432198e-06,
"loss": 0.0038,
"step": 22900
},
{
"epoch": 1.3608171010109995,
"grad_norm": 0.3276439607143402,
"learning_rate": 5.813359083140911e-06,
"loss": 0.0033,
"step": 22950
},
{
"epoch": 1.363781908743218,
"grad_norm": 0.19697032868862152,
"learning_rate": 5.7978857648519115e-06,
"loss": 0.0035,
"step": 23000
},
{
"epoch": 1.3667467164754366,
"grad_norm": 1.1077672243118286,
"learning_rate": 5.782404601591718e-06,
"loss": 0.0038,
"step": 23050
},
{
"epoch": 1.3697115242076552,
"grad_norm": 1.1511179208755493,
"learning_rate": 5.7669157455742016e-06,
"loss": 0.0052,
"step": 23100
},
{
"epoch": 1.3726763319398736,
"grad_norm": 0.051693856716156006,
"learning_rate": 5.7514193490888685e-06,
"loss": 0.0043,
"step": 23150
},
{
"epoch": 1.3756411396720922,
"grad_norm": 0.1749086230993271,
"learning_rate": 5.735915564499364e-06,
"loss": 0.0039,
"step": 23200
},
{
"epoch": 1.3786059474043109,
"grad_norm": 0.1431511789560318,
"learning_rate": 5.7204045442419735e-06,
"loss": 0.003,
"step": 23250
},
{
"epoch": 1.3815707551365293,
"grad_norm": 0.014357727020978928,
"learning_rate": 5.70488644082413e-06,
"loss": 0.0034,
"step": 23300
},
{
"epoch": 1.384535562868748,
"grad_norm": 0.3107154667377472,
"learning_rate": 5.6893614068229044e-06,
"loss": 0.005,
"step": 23350
},
{
"epoch": 1.3875003706009665,
"grad_norm": 0.5827867388725281,
"learning_rate": 5.673829594883511e-06,
"loss": 0.0036,
"step": 23400
},
{
"epoch": 1.3904651783331852,
"grad_norm": 0.4399601221084595,
"learning_rate": 5.658291157717809e-06,
"loss": 0.0039,
"step": 23450
},
{
"epoch": 1.3934299860654036,
"grad_norm": 0.3615168035030365,
"learning_rate": 5.642746248102795e-06,
"loss": 0.0029,
"step": 23500
},
{
"epoch": 1.3963947937976222,
"grad_norm": 0.10081396996974945,
"learning_rate": 5.627195018879107e-06,
"loss": 0.004,
"step": 23550
},
{
"epoch": 1.3993596015298408,
"grad_norm": 1.02580988407135,
"learning_rate": 5.611637622949517e-06,
"loss": 0.0027,
"step": 23600
},
{
"epoch": 1.4023244092620595,
"grad_norm": 0.8904252648353577,
"learning_rate": 5.59607421327743e-06,
"loss": 0.0031,
"step": 23650
},
{
"epoch": 1.405289216994278,
"grad_norm": 0.36355146765708923,
"learning_rate": 5.5805049428853744e-06,
"loss": 0.0021,
"step": 23700
},
{
"epoch": 1.4082540247264965,
"grad_norm": 0.9062953591346741,
"learning_rate": 5.564929964853512e-06,
"loss": 0.0045,
"step": 23750
},
{
"epoch": 1.4112188324587152,
"grad_norm": 0.27617210149765015,
"learning_rate": 5.549349432318114e-06,
"loss": 0.0028,
"step": 23800
},
{
"epoch": 1.4141836401909336,
"grad_norm": 1.4333345890045166,
"learning_rate": 5.533763498470073e-06,
"loss": 0.0059,
"step": 23850
},
{
"epoch": 1.4171484479231522,
"grad_norm": 0.024424336850643158,
"learning_rate": 5.518172316553378e-06,
"loss": 0.0037,
"step": 23900
},
{
"epoch": 1.4201132556553708,
"grad_norm": 1.163394808769226,
"learning_rate": 5.502576039863629e-06,
"loss": 0.0048,
"step": 23950
},
{
"epoch": 1.4230780633875892,
"grad_norm": 0.915595293045044,
"learning_rate": 5.486974821746512e-06,
"loss": 0.0026,
"step": 24000
},
{
"epoch": 1.4260428711198079,
"grad_norm": 0.47064208984375,
"learning_rate": 5.471368815596299e-06,
"loss": 0.0035,
"step": 24050
},
{
"epoch": 1.4290076788520265,
"grad_norm": 1.8702881336212158,
"learning_rate": 5.455758174854341e-06,
"loss": 0.0028,
"step": 24100
},
{
"epoch": 1.431972486584245,
"grad_norm": 1.371960163116455,
"learning_rate": 5.440143053007554e-06,
"loss": 0.0037,
"step": 24150
},
{
"epoch": 1.4349372943164636,
"grad_norm": 2.1173136234283447,
"learning_rate": 5.424523603586917e-06,
"loss": 0.0048,
"step": 24200
},
{
"epoch": 1.4379021020486822,
"grad_norm": 0.10133637487888336,
"learning_rate": 5.408899980165957e-06,
"loss": 0.0045,
"step": 24250
},
{
"epoch": 1.4408669097809006,
"grad_norm": 0.07063573598861694,
"learning_rate": 5.393272336359238e-06,
"loss": 0.0018,
"step": 24300
},
{
"epoch": 1.4438317175131192,
"grad_norm": 1.8741753101348877,
"learning_rate": 5.377640825820856e-06,
"loss": 0.0038,
"step": 24350
},
{
"epoch": 1.4467965252453379,
"grad_norm": 1.05840003490448,
"learning_rate": 5.362005602242928e-06,
"loss": 0.0028,
"step": 24400
},
{
"epoch": 1.4497613329775563,
"grad_norm": 0.07426287978887558,
"learning_rate": 5.346366819354072e-06,
"loss": 0.0034,
"step": 24450
},
{
"epoch": 1.452726140709775,
"grad_norm": 0.6569198369979858,
"learning_rate": 5.330724630917905e-06,
"loss": 0.0034,
"step": 24500
},
{
"epoch": 1.4556909484419935,
"grad_norm": 1.0480008125305176,
"learning_rate": 5.31507919073153e-06,
"loss": 0.004,
"step": 24550
},
{
"epoch": 1.4586557561742122,
"grad_norm": 3.5745363235473633,
"learning_rate": 5.299430652624019e-06,
"loss": 0.0045,
"step": 24600
},
{
"epoch": 1.4616205639064308,
"grad_norm": 0.3077293634414673,
"learning_rate": 5.2837791704549056e-06,
"loss": 0.002,
"step": 24650
},
{
"epoch": 1.4645853716386492,
"grad_norm": 0.7704965472221375,
"learning_rate": 5.268124898112668e-06,
"loss": 0.0028,
"step": 24700
},
{
"epoch": 1.4675501793708678,
"grad_norm": 1.4390088319778442,
"learning_rate": 5.252467989513217e-06,
"loss": 0.0033,
"step": 24750
},
{
"epoch": 1.4705149871030865,
"grad_norm": 0.019557828083634377,
"learning_rate": 5.236808598598388e-06,
"loss": 0.003,
"step": 24800
},
{
"epoch": 1.473479794835305,
"grad_norm": 0.04724704474210739,
"learning_rate": 5.2211468793344164e-06,
"loss": 0.0023,
"step": 24850
},
{
"epoch": 1.4764446025675235,
"grad_norm": 0.11611367762088776,
"learning_rate": 5.2054829857104395e-06,
"loss": 0.0022,
"step": 24900
},
{
"epoch": 1.4794094102997422,
"grad_norm": 0.8246281743049622,
"learning_rate": 5.189817071736965e-06,
"loss": 0.0024,
"step": 24950
},
{
"epoch": 1.4823742180319606,
"grad_norm": 0.017677294090390205,
"learning_rate": 5.174149291444369e-06,
"loss": 0.0041,
"step": 25000
},
{
"epoch": 1.4823742180319606,
"eval_loss": 0.10340176522731781,
"eval_runtime": 3475.2421,
"eval_samples_per_second": 1.685,
"eval_steps_per_second": 0.421,
"eval_wer": 7.892697902200132,
"step": 25000
},
{
"epoch": 1.4853390257641792,
"grad_norm": 0.24159014225006104,
"learning_rate": 5.158479798881378e-06,
"loss": 0.0043,
"step": 25050
},
{
"epoch": 1.4883038334963978,
"grad_norm": 0.09349126368761063,
"learning_rate": 5.142808748113552e-06,
"loss": 0.0021,
"step": 25100
},
{
"epoch": 1.4912686412286162,
"grad_norm": 9.830806732177734,
"learning_rate": 5.127136293221768e-06,
"loss": 0.003,
"step": 25150
},
{
"epoch": 1.4942334489608349,
"grad_norm": 0.02017897740006447,
"learning_rate": 5.11146258830072e-06,
"loss": 0.0018,
"step": 25200
},
{
"epoch": 1.4971982566930535,
"grad_norm": 0.9748997092247009,
"learning_rate": 5.095787787457381e-06,
"loss": 0.0028,
"step": 25250
},
{
"epoch": 1.500163064425272,
"grad_norm": 0.08710702508687973,
"learning_rate": 5.080112044809507e-06,
"loss": 0.0045,
"step": 25300
},
{
"epoch": 1.5031278721574906,
"grad_norm": 0.12617884576320648,
"learning_rate": 5.064435514484107e-06,
"loss": 0.0041,
"step": 25350
},
{
"epoch": 1.5060926798897092,
"grad_norm": 1.1999905109405518,
"learning_rate": 5.04875835061594e-06,
"loss": 0.0028,
"step": 25400
},
{
"epoch": 1.5090574876219276,
"grad_norm": 0.6879501342773438,
"learning_rate": 5.033080707345994e-06,
"loss": 0.0027,
"step": 25450
},
{
"epoch": 1.5120222953541462,
"grad_norm": 0.3369424641132355,
"learning_rate": 5.017402738819968e-06,
"loss": 0.0036,
"step": 25500
},
{
"epoch": 1.5149871030863649,
"grad_norm": 0.05699804425239563,
"learning_rate": 5.001724599186759e-06,
"loss": 0.003,
"step": 25550
},
{
"epoch": 1.5179519108185833,
"grad_norm": 0.15526382625102997,
"learning_rate": 4.986046442596949e-06,
"loss": 0.0044,
"step": 25600
},
{
"epoch": 1.5209167185508021,
"grad_norm": 0.13832417130470276,
"learning_rate": 4.970368423201286e-06,
"loss": 0.0027,
"step": 25650
},
{
"epoch": 1.5238815262830205,
"grad_norm": 0.35199618339538574,
"learning_rate": 4.954690695149166e-06,
"loss": 0.0028,
"step": 25700
},
{
"epoch": 1.526846334015239,
"grad_norm": 0.03731779381632805,
"learning_rate": 4.939013412587124e-06,
"loss": 0.0022,
"step": 25750
},
{
"epoch": 1.5298111417474578,
"grad_norm": 1.604335904121399,
"learning_rate": 4.923336729657312e-06,
"loss": 0.0014,
"step": 25800
},
{
"epoch": 1.5327759494796762,
"grad_norm": 0.13903003931045532,
"learning_rate": 4.90766080049599e-06,
"loss": 0.0037,
"step": 25850
},
{
"epoch": 1.5357407572118948,
"grad_norm": 0.04218101501464844,
"learning_rate": 4.891985779232004e-06,
"loss": 0.0028,
"step": 25900
},
{
"epoch": 1.5387055649441135,
"grad_norm": 2.855713129043579,
"learning_rate": 4.876311819985274e-06,
"loss": 0.0038,
"step": 25950
},
{
"epoch": 1.541670372676332,
"grad_norm": 0.18099339306354523,
"learning_rate": 4.860639076865275e-06,
"loss": 0.0023,
"step": 26000
},
{
"epoch": 1.5446351804085505,
"grad_norm": 0.24422989785671234,
"learning_rate": 4.844967703969532e-06,
"loss": 0.0019,
"step": 26050
},
{
"epoch": 1.5475999881407692,
"grad_norm": 0.45736047625541687,
"learning_rate": 4.829297855382093e-06,
"loss": 0.0034,
"step": 26100
},
{
"epoch": 1.5505647958729876,
"grad_norm": 0.041498664766550064,
"learning_rate": 4.81362968517202e-06,
"loss": 0.0031,
"step": 26150
},
{
"epoch": 1.5535296036052062,
"grad_norm": 0.7280819416046143,
"learning_rate": 4.797963347391869e-06,
"loss": 0.0027,
"step": 26200
},
{
"epoch": 1.5564944113374248,
"grad_norm": 0.19048987329006195,
"learning_rate": 4.782298996076183e-06,
"loss": 0.0018,
"step": 26250
},
{
"epoch": 1.5594592190696432,
"grad_norm": 0.026411147788167,
"learning_rate": 4.766636785239976e-06,
"loss": 0.0042,
"step": 26300
},
{
"epoch": 1.5624240268018619,
"grad_norm": 0.03540141507983208,
"learning_rate": 4.75097686887721e-06,
"loss": 0.0021,
"step": 26350
},
{
"epoch": 1.5653888345340805,
"grad_norm": 2.208853244781494,
"learning_rate": 4.735319400959291e-06,
"loss": 0.0019,
"step": 26400
},
{
"epoch": 1.568353642266299,
"grad_norm": 0.07108508050441742,
"learning_rate": 4.719664535433553e-06,
"loss": 0.0046,
"step": 26450
},
{
"epoch": 1.5713184499985176,
"grad_norm": 0.6509733200073242,
"learning_rate": 4.70401242622174e-06,
"loss": 0.0034,
"step": 26500
},
{
"epoch": 1.5742832577307362,
"grad_norm": 1.6000564098358154,
"learning_rate": 4.688363227218495e-06,
"loss": 0.0046,
"step": 26550
},
{
"epoch": 1.5772480654629546,
"grad_norm": 0.046783801168203354,
"learning_rate": 4.6727170922898495e-06,
"loss": 0.0026,
"step": 26600
},
{
"epoch": 1.5802128731951735,
"grad_norm": 0.15229929983615875,
"learning_rate": 4.657387001081686e-06,
"loss": 0.0041,
"step": 26650
},
{
"epoch": 1.5831776809273919,
"grad_norm": 0.048166148364543915,
"learning_rate": 4.641747386836947e-06,
"loss": 0.0029,
"step": 26700
},
{
"epoch": 1.5861424886596103,
"grad_norm": 0.14575374126434326,
"learning_rate": 4.626111295003e-06,
"loss": 0.0018,
"step": 26750
},
{
"epoch": 1.5891072963918291,
"grad_norm": 0.3450085520744324,
"learning_rate": 4.610478879317006e-06,
"loss": 0.002,
"step": 26800
},
{
"epoch": 1.5920721041240475,
"grad_norm": 0.020749246701598167,
"learning_rate": 4.594850293479977e-06,
"loss": 0.0036,
"step": 26850
},
{
"epoch": 1.5950369118562662,
"grad_norm": 0.039859525859355927,
"learning_rate": 4.5792256911552716e-06,
"loss": 0.0034,
"step": 26900
},
{
"epoch": 1.5980017195884848,
"grad_norm": 0.11719939112663269,
"learning_rate": 4.563605225967078e-06,
"loss": 0.0035,
"step": 26950
},
{
"epoch": 1.6009665273207032,
"grad_norm": 2.0398378372192383,
"learning_rate": 4.547989051498915e-06,
"loss": 0.003,
"step": 27000
},
{
"epoch": 1.6039313350529218,
"grad_norm": 0.5318859815597534,
"learning_rate": 4.532377321292105e-06,
"loss": 0.0025,
"step": 27050
},
{
"epoch": 1.6068961427851405,
"grad_norm": 0.13859181106090546,
"learning_rate": 4.51677018884428e-06,
"loss": 0.0037,
"step": 27100
},
{
"epoch": 1.609860950517359,
"grad_norm": 0.6273730993270874,
"learning_rate": 4.50116780760786e-06,
"loss": 0.0036,
"step": 27150
},
{
"epoch": 1.6128257582495775,
"grad_norm": 0.09279701858758926,
"learning_rate": 4.485570330988558e-06,
"loss": 0.003,
"step": 27200
},
{
"epoch": 1.6157905659817962,
"grad_norm": 1.7686452865600586,
"learning_rate": 4.4699779123438605e-06,
"loss": 0.0027,
"step": 27250
},
{
"epoch": 1.6187553737140146,
"grad_norm": 0.1289617270231247,
"learning_rate": 4.45470239706685e-06,
"loss": 0.0016,
"step": 27300
},
{
"epoch": 1.6217201814462332,
"grad_norm": 0.01868435926735401,
"learning_rate": 4.4391204454510306e-06,
"loss": 0.0033,
"step": 27350
},
{
"epoch": 1.6246849891784518,
"grad_norm": 0.004626968875527382,
"learning_rate": 4.423544008514307e-06,
"loss": 0.0029,
"step": 27400
},
{
"epoch": 1.6276497969106702,
"grad_norm": 0.17620013654232025,
"learning_rate": 4.407973239407297e-06,
"loss": 0.0025,
"step": 27450
},
{
"epoch": 1.6306146046428889,
"grad_norm": 0.5455169081687927,
"learning_rate": 4.392408291224895e-06,
"loss": 0.0024,
"step": 27500
},
{
"epoch": 1.6306146046428889,
"eval_loss": 0.09911302477121353,
"eval_runtime": 3697.4743,
"eval_samples_per_second": 1.584,
"eval_steps_per_second": 0.396,
"eval_wer": 10.813537022147504,
"step": 27500
},
{
"epoch": 1.6335794123751075,
"grad_norm": 0.31777656078338623,
"learning_rate": 4.376849317004758e-06,
"loss": 0.0021,
"step": 27550
},
{
"epoch": 1.636544220107326,
"grad_norm": 0.11544878035783768,
"learning_rate": 4.361296469725813e-06,
"loss": 0.0034,
"step": 27600
},
{
"epoch": 1.6395090278395446,
"grad_norm": 0.1291680634021759,
"learning_rate": 4.345749902306739e-06,
"loss": 0.0026,
"step": 27650
},
{
"epoch": 1.6424738355717632,
"grad_norm": 0.03346523270010948,
"learning_rate": 4.33020976760447e-06,
"loss": 0.0025,
"step": 27700
},
{
"epoch": 1.6454386433039816,
"grad_norm": 0.014625953510403633,
"learning_rate": 4.3146762184126985e-06,
"loss": 0.0026,
"step": 27750
},
{
"epoch": 1.6484034510362005,
"grad_norm": 1.6268267631530762,
"learning_rate": 4.29914940746036e-06,
"loss": 0.0045,
"step": 27800
},
{
"epoch": 1.6513682587684189,
"grad_norm": 0.19663332402706146,
"learning_rate": 4.283629487410143e-06,
"loss": 0.0044,
"step": 27850
},
{
"epoch": 1.6543330665006373,
"grad_norm": 0.735085666179657,
"learning_rate": 4.26811661085698e-06,
"loss": 0.0025,
"step": 27900
},
{
"epoch": 1.6572978742328561,
"grad_norm": 1.0356351137161255,
"learning_rate": 4.252610930326554e-06,
"loss": 0.0029,
"step": 27950
},
{
"epoch": 1.6602626819650745,
"grad_norm": 1.1497199535369873,
"learning_rate": 4.237112598273793e-06,
"loss": 0.0032,
"step": 28000
},
{
"epoch": 1.6632274896972932,
"grad_norm": 0.06777022778987885,
"learning_rate": 4.221621767081372e-06,
"loss": 0.0028,
"step": 28050
},
{
"epoch": 1.6661922974295118,
"grad_norm": 0.1217995136976242,
"learning_rate": 4.206138589058217e-06,
"loss": 0.0014,
"step": 28100
},
{
"epoch": 1.6691571051617302,
"grad_norm": 2.1839497089385986,
"learning_rate": 4.190663216438011e-06,
"loss": 0.0036,
"step": 28150
},
{
"epoch": 1.6721219128939488,
"grad_norm": 0.40566369891166687,
"learning_rate": 4.1751958013776875e-06,
"loss": 0.0049,
"step": 28200
},
{
"epoch": 1.6750867206261675,
"grad_norm": 0.15384216606616974,
"learning_rate": 4.159736495955937e-06,
"loss": 0.0026,
"step": 28250
},
{
"epoch": 1.678051528358386,
"grad_norm": 0.9062075018882751,
"learning_rate": 4.1442854521717245e-06,
"loss": 0.0027,
"step": 28300
},
{
"epoch": 1.6810163360906045,
"grad_norm": 0.2344817817211151,
"learning_rate": 4.128842821942776e-06,
"loss": 0.0021,
"step": 28350
},
{
"epoch": 1.6839811438228232,
"grad_norm": 0.011048096232116222,
"learning_rate": 4.113408757104098e-06,
"loss": 0.0034,
"step": 28400
},
{
"epoch": 1.6869459515550416,
"grad_norm": 0.42319443821907043,
"learning_rate": 4.097983409406478e-06,
"loss": 0.0033,
"step": 28450
},
{
"epoch": 1.6899107592872602,
"grad_norm": 0.3838003873825073,
"learning_rate": 4.082566930514997e-06,
"loss": 0.0014,
"step": 28500
},
{
"epoch": 1.6928755670194788,
"grad_norm": 0.0286489836871624,
"learning_rate": 4.067159472007533e-06,
"loss": 0.0017,
"step": 28550
},
{
"epoch": 1.6958403747516972,
"grad_norm": 0.014939649030566216,
"learning_rate": 4.051761185373279e-06,
"loss": 0.0039,
"step": 28600
},
{
"epoch": 1.6988051824839159,
"grad_norm": 0.0331413671374321,
"learning_rate": 4.036372222011243e-06,
"loss": 0.0029,
"step": 28650
},
{
"epoch": 1.7017699902161345,
"grad_norm": 0.029739579185843468,
"learning_rate": 4.020992733228767e-06,
"loss": 0.0021,
"step": 28700
},
{
"epoch": 1.704734797948353,
"grad_norm": 0.03251701593399048,
"learning_rate": 4.005622870240038e-06,
"loss": 0.0026,
"step": 28750
},
{
"epoch": 1.7076996056805718,
"grad_norm": 0.49799245595932007,
"learning_rate": 3.990262784164598e-06,
"loss": 0.0026,
"step": 28800
},
{
"epoch": 1.7106644134127902,
"grad_norm": 0.08971042931079865,
"learning_rate": 3.974912626025864e-06,
"loss": 0.0014,
"step": 28850
},
{
"epoch": 1.7136292211450086,
"grad_norm": 0.011349070817232132,
"learning_rate": 3.959572546749634e-06,
"loss": 0.0014,
"step": 28900
},
{
"epoch": 1.7165940288772275,
"grad_norm": 0.015758298337459564,
"learning_rate": 3.9442426971626156e-06,
"loss": 0.002,
"step": 28950
},
{
"epoch": 1.7195588366094459,
"grad_norm": 1.6579985618591309,
"learning_rate": 3.92892322799093e-06,
"loss": 0.0037,
"step": 29000
},
{
"epoch": 1.7225236443416645,
"grad_norm": 0.313670814037323,
"learning_rate": 3.913614289858639e-06,
"loss": 0.003,
"step": 29050
},
{
"epoch": 1.7254884520738831,
"grad_norm": 0.3589758276939392,
"learning_rate": 3.898316033286261e-06,
"loss": 0.0019,
"step": 29100
},
{
"epoch": 1.7284532598061015,
"grad_norm": 0.1357835978269577,
"learning_rate": 3.883028608689291e-06,
"loss": 0.0026,
"step": 29150
},
{
"epoch": 1.7314180675383202,
"grad_norm": 0.12351395934820175,
"learning_rate": 3.86775216637672e-06,
"loss": 0.0034,
"step": 29200
},
{
"epoch": 1.7343828752705388,
"grad_norm": 0.27879443764686584,
"learning_rate": 3.852486856549564e-06,
"loss": 0.0031,
"step": 29250
},
{
"epoch": 1.7373476830027572,
"grad_norm": 0.11169170588254929,
"learning_rate": 3.837232829299375e-06,
"loss": 0.0026,
"step": 29300
},
{
"epoch": 1.7403124907349758,
"grad_norm": 0.17126189172267914,
"learning_rate": 3.821990234606778e-06,
"loss": 0.0026,
"step": 29350
},
{
"epoch": 1.7432772984671945,
"grad_norm": 0.012666056863963604,
"learning_rate": 3.8067592223399908e-06,
"loss": 0.003,
"step": 29400
},
{
"epoch": 1.746242106199413,
"grad_norm": 0.03582064434885979,
"learning_rate": 3.7915399422533466e-06,
"loss": 0.0021,
"step": 29450
},
{
"epoch": 1.7492069139316315,
"grad_norm": 0.26408132910728455,
"learning_rate": 3.7763325439858288e-06,
"loss": 0.0032,
"step": 29500
},
{
"epoch": 1.7521717216638502,
"grad_norm": 0.6645041108131409,
"learning_rate": 3.761137177059594e-06,
"loss": 0.004,
"step": 29550
},
{
"epoch": 1.7551365293960686,
"grad_norm": 0.7976374626159668,
"learning_rate": 3.7459539908785057e-06,
"loss": 0.0026,
"step": 29600
},
{
"epoch": 1.7581013371282872,
"grad_norm": 1.91818106174469,
"learning_rate": 3.7307831347266653e-06,
"loss": 0.0037,
"step": 29650
},
{
"epoch": 1.7610661448605058,
"grad_norm": 0.10215174406766891,
"learning_rate": 3.7156247577669413e-06,
"loss": 0.0058,
"step": 29700
},
{
"epoch": 1.7640309525927242,
"grad_norm": 0.7943007349967957,
"learning_rate": 3.7004790090395043e-06,
"loss": 0.0041,
"step": 29750
},
{
"epoch": 1.7669957603249429,
"grad_norm": 0.08547532558441162,
"learning_rate": 3.6853460374603613e-06,
"loss": 0.0031,
"step": 29800
},
{
"epoch": 1.7699605680571615,
"grad_norm": 0.7651055455207825,
"learning_rate": 3.67022599181989e-06,
"loss": 0.0019,
"step": 29850
},
{
"epoch": 1.77292537578938,
"grad_norm": 0.016290869563817978,
"learning_rate": 3.6551190207813836e-06,
"loss": 0.002,
"step": 29900
},
{
"epoch": 1.7758901835215988,
"grad_norm": 0.018452471122145653,
"learning_rate": 3.640025272879578e-06,
"loss": 0.0038,
"step": 29950
},
{
"epoch": 1.7788549912538172,
"grad_norm": 0.3965455889701843,
"learning_rate": 3.624944896519198e-06,
"loss": 0.0039,
"step": 30000
},
{
"epoch": 1.7788549912538172,
"eval_loss": 0.09763780981302261,
"eval_runtime": 1933.8956,
"eval_samples_per_second": 3.028,
"eval_steps_per_second": 0.757,
"eval_wer": 17.889043198596593,
"step": 30000
},
{
"epoch": 1.7818197989860356,
"grad_norm": 0.29020628333091736,
"learning_rate": 3.609878039973498e-06,
"loss": 0.0019,
"step": 30050
},
{
"epoch": 1.7847846067182545,
"grad_norm": 0.07447979599237442,
"learning_rate": 3.5948248513828e-06,
"loss": 0.0032,
"step": 30100
},
{
"epoch": 1.7877494144504729,
"grad_norm": 0.13018357753753662,
"learning_rate": 3.5797854787530433e-06,
"loss": 0.0022,
"step": 30150
},
{
"epoch": 1.7907142221826915,
"grad_norm": 0.5166714191436768,
"learning_rate": 3.564760069954323e-06,
"loss": 0.0028,
"step": 30200
},
{
"epoch": 1.7936790299149101,
"grad_norm": 0.013915842399001122,
"learning_rate": 3.5497487727194405e-06,
"loss": 0.0024,
"step": 30250
},
{
"epoch": 1.7966438376471285,
"grad_norm": 0.0228273943066597,
"learning_rate": 3.534751734642451e-06,
"loss": 0.0025,
"step": 30300
},
{
"epoch": 1.7996086453793472,
"grad_norm": 0.06491345912218094,
"learning_rate": 3.5197691031772095e-06,
"loss": 0.0032,
"step": 30350
},
{
"epoch": 1.8025734531115658,
"grad_norm": 0.07135559618473053,
"learning_rate": 3.504801025635921e-06,
"loss": 0.0014,
"step": 30400
},
{
"epoch": 1.8055382608437842,
"grad_norm": 1.0214452743530273,
"learning_rate": 3.489847649187693e-06,
"loss": 0.0012,
"step": 30450
},
{
"epoch": 1.8085030685760028,
"grad_norm": 0.09079097956418991,
"learning_rate": 3.474909120857094e-06,
"loss": 0.0035,
"step": 30500
},
{
"epoch": 1.8114678763082215,
"grad_norm": 1.4023202657699585,
"learning_rate": 3.4599855875226967e-06,
"loss": 0.0039,
"step": 30550
},
{
"epoch": 1.81443268404044,
"grad_norm": 2.584686279296875,
"learning_rate": 3.4450771959156437e-06,
"loss": 0.0033,
"step": 30600
},
{
"epoch": 1.8173974917726585,
"grad_norm": 0.04744521901011467,
"learning_rate": 3.430184092618199e-06,
"loss": 0.0038,
"step": 30650
},
{
"epoch": 1.8203622995048772,
"grad_norm": 0.03454584628343582,
"learning_rate": 3.4153064240623113e-06,
"loss": 0.0015,
"step": 30700
},
{
"epoch": 1.8233271072370956,
"grad_norm": 0.012815977446734905,
"learning_rate": 3.4004443365281703e-06,
"loss": 0.0009,
"step": 30750
},
{
"epoch": 1.8262919149693142,
"grad_norm": 1.009682297706604,
"learning_rate": 3.3855979761427705e-06,
"loss": 0.0023,
"step": 30800
},
{
"epoch": 1.8292567227015328,
"grad_norm": 0.026725037023425102,
"learning_rate": 3.370767488878471e-06,
"loss": 0.0025,
"step": 30850
},
{
"epoch": 1.8322215304337512,
"grad_norm": 0.06415297836065292,
"learning_rate": 3.3559530205515705e-06,
"loss": 0.0016,
"step": 30900
},
{
"epoch": 1.83518633816597,
"grad_norm": 0.007746212650090456,
"learning_rate": 3.341154716820857e-06,
"loss": 0.0018,
"step": 30950
},
{
"epoch": 1.8381511458981885,
"grad_norm": 0.04692048206925392,
"learning_rate": 3.3263727231861942e-06,
"loss": 0.0016,
"step": 31000
},
{
"epoch": 1.841115953630407,
"grad_norm": 1.4830511808395386,
"learning_rate": 3.3116071849870746e-06,
"loss": 0.0019,
"step": 31050
},
{
"epoch": 1.8440807613626258,
"grad_norm": 0.0519493967294693,
"learning_rate": 3.2968582474012e-06,
"loss": 0.0009,
"step": 31100
},
{
"epoch": 1.8470455690948442,
"grad_norm": 0.21671843528747559,
"learning_rate": 3.2821260554430538e-06,
"loss": 0.0009,
"step": 31150
},
{
"epoch": 1.8500103768270628,
"grad_norm": 0.04223645478487015,
"learning_rate": 3.26741075396247e-06,
"loss": 0.0027,
"step": 31200
},
{
"epoch": 1.8529751845592815,
"grad_norm": 0.03890330716967583,
"learning_rate": 3.252712487643214e-06,
"loss": 0.0024,
"step": 31250
},
{
"epoch": 1.8559399922914999,
"grad_norm": 0.01904849335551262,
"learning_rate": 3.2380314010015543e-06,
"loss": 0.002,
"step": 31300
},
{
"epoch": 1.8589048000237185,
"grad_norm": 0.03356530889868736,
"learning_rate": 3.2233676383848533e-06,
"loss": 0.0021,
"step": 31350
},
{
"epoch": 1.8618696077559371,
"grad_norm": 0.07836949825286865,
"learning_rate": 3.2087213439701326e-06,
"loss": 0.003,
"step": 31400
},
{
"epoch": 1.8648344154881555,
"grad_norm": 1.2145752906799316,
"learning_rate": 3.1940926617626655e-06,
"loss": 0.0023,
"step": 31450
},
{
"epoch": 1.8677992232203742,
"grad_norm": 0.08941322565078735,
"learning_rate": 3.179481735594558e-06,
"loss": 0.0027,
"step": 31500
},
{
"epoch": 1.8707640309525928,
"grad_norm": 0.030275221914052963,
"learning_rate": 3.164888709123338e-06,
"loss": 0.0043,
"step": 31550
},
{
"epoch": 1.8737288386848112,
"grad_norm": 1.2117986679077148,
"learning_rate": 3.150313725830536e-06,
"loss": 0.0032,
"step": 31600
},
{
"epoch": 1.8766936464170298,
"grad_norm": 0.02491467259824276,
"learning_rate": 3.1357569290202827e-06,
"loss": 0.0024,
"step": 31650
},
{
"epoch": 1.8796584541492485,
"grad_norm": 0.1975928694009781,
"learning_rate": 3.121218461817893e-06,
"loss": 0.0016,
"step": 31700
},
{
"epoch": 1.882623261881467,
"grad_norm": 0.7876911163330078,
"learning_rate": 3.1066984671684595e-06,
"loss": 0.003,
"step": 31750
},
{
"epoch": 1.8855880696136855,
"grad_norm": 0.011536120437085629,
"learning_rate": 3.0921970878354535e-06,
"loss": 0.0025,
"step": 31800
},
{
"epoch": 1.8885528773459042,
"grad_norm": 0.33370065689086914,
"learning_rate": 3.077714466399314e-06,
"loss": 0.0017,
"step": 31850
},
{
"epoch": 1.8915176850781226,
"grad_norm": 0.05288661643862724,
"learning_rate": 3.06325074525605e-06,
"loss": 0.0045,
"step": 31900
},
{
"epoch": 1.8944824928103412,
"grad_norm": 0.03731587901711464,
"learning_rate": 3.048806066615836e-06,
"loss": 0.0016,
"step": 31950
},
{
"epoch": 1.8974473005425598,
"grad_norm": 0.1143055409193039,
"learning_rate": 3.0343805725016218e-06,
"loss": 0.003,
"step": 32000
},
{
"epoch": 1.9004121082747782,
"grad_norm": 0.02334473840892315,
"learning_rate": 3.0199744047477274e-06,
"loss": 0.0015,
"step": 32050
},
{
"epoch": 1.903376916006997,
"grad_norm": 0.8750964403152466,
"learning_rate": 3.005587704998453e-06,
"loss": 0.0033,
"step": 32100
},
{
"epoch": 1.9063417237392155,
"grad_norm": 0.16102011501789093,
"learning_rate": 2.991220614706686e-06,
"loss": 0.0015,
"step": 32150
},
{
"epoch": 1.909306531471434,
"grad_norm": 1.6478341817855835,
"learning_rate": 2.9768732751325102e-06,
"loss": 0.0014,
"step": 32200
},
{
"epoch": 1.9122713392036528,
"grad_norm": 0.01625387743115425,
"learning_rate": 2.962545827341815e-06,
"loss": 0.0013,
"step": 32250
},
{
"epoch": 1.9152361469358712,
"grad_norm": 0.8058168292045593,
"learning_rate": 2.948238412204909e-06,
"loss": 0.0024,
"step": 32300
},
{
"epoch": 1.9182009546680898,
"grad_norm": 0.2690886855125427,
"learning_rate": 2.9339511703951408e-06,
"loss": 0.0028,
"step": 32350
},
{
"epoch": 1.9211657624003085,
"grad_norm": 0.06821112334728241,
"learning_rate": 2.919684242387505e-06,
"loss": 0.0021,
"step": 32400
},
{
"epoch": 1.9241305701325269,
"grad_norm": 0.012448090128600597,
"learning_rate": 2.905437768457272e-06,
"loss": 0.0012,
"step": 32450
},
{
"epoch": 1.9270953778647455,
"grad_norm": 0.33233898878097534,
"learning_rate": 2.8912118886785974e-06,
"loss": 0.0029,
"step": 32500
},
{
"epoch": 1.9270953778647455,
"eval_loss": 0.09504964202642441,
"eval_runtime": 1925.3076,
"eval_samples_per_second": 3.041,
"eval_steps_per_second": 0.76,
"eval_wer": 14.754769388202615,
"step": 32500
},
{
"epoch": 1.9300601855969641,
"grad_norm": 1.714479923248291,
"learning_rate": 2.877006742923155e-06,
"loss": 0.0028,
"step": 32550
},
{
"epoch": 1.9330249933291825,
"grad_norm": 0.18112489581108093,
"learning_rate": 2.8628224708587617e-06,
"loss": 0.0011,
"step": 32600
},
{
"epoch": 1.9359898010614012,
"grad_norm": 0.025406301021575928,
"learning_rate": 2.848659211947989e-06,
"loss": 0.0023,
"step": 32650
},
{
"epoch": 1.9389546087936198,
"grad_norm": 1.0433779954910278,
"learning_rate": 2.8345171054468122e-06,
"loss": 0.0012,
"step": 32700
},
{
"epoch": 1.9419194165258382,
"grad_norm": 0.6328701972961426,
"learning_rate": 2.820396290403232e-06,
"loss": 0.0042,
"step": 32750
},
{
"epoch": 1.9448842242580568,
"grad_norm": 2.384730339050293,
"learning_rate": 2.8062969056558998e-06,
"loss": 0.0029,
"step": 32800
},
{
"epoch": 1.9478490319902755,
"grad_norm": 1.5068167448043823,
"learning_rate": 2.792219089832767e-06,
"loss": 0.003,
"step": 32850
},
{
"epoch": 1.950813839722494,
"grad_norm": 0.24156026542186737,
"learning_rate": 2.778162981349714e-06,
"loss": 0.0015,
"step": 32900
},
{
"epoch": 1.9537786474547125,
"grad_norm": 0.06006159633398056,
"learning_rate": 2.7641287184091835e-06,
"loss": 0.0033,
"step": 32950
},
{
"epoch": 1.9567434551869312,
"grad_norm": 0.1492675542831421,
"learning_rate": 2.7501164389988423e-06,
"loss": 0.0022,
"step": 33000
},
{
"epoch": 1.9597082629191496,
"grad_norm": 0.019522709771990776,
"learning_rate": 2.736126280890196e-06,
"loss": 0.0026,
"step": 33050
},
{
"epoch": 1.9626730706513684,
"grad_norm": 0.2943977117538452,
"learning_rate": 2.7221583816372602e-06,
"loss": 0.0033,
"step": 33100
},
{
"epoch": 1.9656378783835868,
"grad_norm": 0.8795707821846008,
"learning_rate": 2.7082128785751884e-06,
"loss": 0.0025,
"step": 33150
},
{
"epoch": 1.9686026861158052,
"grad_norm": 0.2423001527786255,
"learning_rate": 2.694289908818937e-06,
"loss": 0.0014,
"step": 33200
},
{
"epoch": 1.971567493848024,
"grad_norm": 1.4210988283157349,
"learning_rate": 2.6803896092619073e-06,
"loss": 0.0021,
"step": 33250
},
{
"epoch": 1.9745323015802425,
"grad_norm": 0.1827964335680008,
"learning_rate": 2.6665121165746027e-06,
"loss": 0.0015,
"step": 33300
},
{
"epoch": 1.977497109312461,
"grad_norm": 0.8498244285583496,
"learning_rate": 2.652934432464768e-06,
"loss": 0.0024,
"step": 33350
},
{
"epoch": 1.9804619170446798,
"grad_norm": 0.006696091033518314,
"learning_rate": 2.6391024997058873e-06,
"loss": 0.0012,
"step": 33400
},
{
"epoch": 1.9834267247768982,
"grad_norm": 0.7741572856903076,
"learning_rate": 2.6252937797597878e-06,
"loss": 0.0019,
"step": 33450
},
{
"epoch": 1.9863915325091168,
"grad_norm": 0.9306731820106506,
"learning_rate": 2.6115084083965435e-06,
"loss": 0.0013,
"step": 33500
},
{
"epoch": 1.9893563402413355,
"grad_norm": 0.019881825894117355,
"learning_rate": 2.5977465211566645e-06,
"loss": 0.0031,
"step": 33550
},
{
"epoch": 1.9923211479735539,
"grad_norm": 0.3807990849018097,
"learning_rate": 2.5840082533497534e-06,
"loss": 0.0018,
"step": 33600
},
{
"epoch": 1.9952859557057725,
"grad_norm": 0.04648787900805473,
"learning_rate": 2.570293740053188e-06,
"loss": 0.0013,
"step": 33650
},
{
"epoch": 1.9982507634379911,
"grad_norm": 0.15467403829097748,
"learning_rate": 2.5566031161107856e-06,
"loss": 0.0018,
"step": 33700
},
{
"epoch": 2.0011859230928875,
"grad_norm": 0.016635119915008545,
"learning_rate": 2.5429365161314758e-06,
"loss": 0.0013,
"step": 33750
},
{
"epoch": 2.004150730825106,
"grad_norm": 0.015187140554189682,
"learning_rate": 2.529294074487984e-06,
"loss": 0.0007,
"step": 33800
},
{
"epoch": 2.0071155385573247,
"grad_norm": 0.10330521315336227,
"learning_rate": 2.5156759253155017e-06,
"loss": 0.001,
"step": 33850
},
{
"epoch": 2.010080346289543,
"grad_norm": 0.744099497795105,
"learning_rate": 2.5020822025103752e-06,
"loss": 0.0005,
"step": 33900
},
{
"epoch": 2.0130451540217615,
"grad_norm": 0.02314056269824505,
"learning_rate": 2.4885130397287875e-06,
"loss": 0.0009,
"step": 33950
},
{
"epoch": 2.0160099617539804,
"grad_norm": 0.1130124107003212,
"learning_rate": 2.4749685703854366e-06,
"loss": 0.0007,
"step": 34000
},
{
"epoch": 2.018974769486199,
"grad_norm": 0.010739482939243317,
"learning_rate": 2.461448927652234e-06,
"loss": 0.0005,
"step": 34050
},
{
"epoch": 2.021939577218417,
"grad_norm": 0.07081291824579239,
"learning_rate": 2.447954244456991e-06,
"loss": 0.0008,
"step": 34100
},
{
"epoch": 2.024904384950636,
"grad_norm": 0.02812119945883751,
"learning_rate": 2.434484653482111e-06,
"loss": 0.0008,
"step": 34150
},
{
"epoch": 2.0278691926828545,
"grad_norm": 0.017130475491285324,
"learning_rate": 2.421040287163287e-06,
"loss": 0.0006,
"step": 34200
},
{
"epoch": 2.030834000415073,
"grad_norm": 0.09646749496459961,
"learning_rate": 2.407621277688194e-06,
"loss": 0.0018,
"step": 34250
},
{
"epoch": 2.0337988081472917,
"grad_norm": 0.04550813138484955,
"learning_rate": 2.3942277569951983e-06,
"loss": 0.0009,
"step": 34300
},
{
"epoch": 2.03676361587951,
"grad_norm": 0.06901838630437851,
"learning_rate": 2.380859856772054e-06,
"loss": 0.0008,
"step": 34350
},
{
"epoch": 2.0397284236117286,
"grad_norm": 0.007037996314466,
"learning_rate": 2.367517708454607e-06,
"loss": 0.0005,
"step": 34400
},
{
"epoch": 2.0426932313439474,
"grad_norm": 0.0740680992603302,
"learning_rate": 2.3544675140285617e-06,
"loss": 0.0012,
"step": 34450
},
{
"epoch": 2.045658039076166,
"grad_norm": 0.617061197757721,
"learning_rate": 2.341176741254201e-06,
"loss": 0.0009,
"step": 34500
},
{
"epoch": 2.0486228468083847,
"grad_norm": 0.00723099522292614,
"learning_rate": 2.3279121105578058e-06,
"loss": 0.0008,
"step": 34550
},
{
"epoch": 2.051587654540603,
"grad_norm": 0.03154204040765762,
"learning_rate": 2.3146737523598583e-06,
"loss": 0.0009,
"step": 34600
},
{
"epoch": 2.0545524622728215,
"grad_norm": 0.004429316148161888,
"learning_rate": 2.3014617968225256e-06,
"loss": 0.0005,
"step": 34650
},
{
"epoch": 2.0575172700050404,
"grad_norm": 0.1683816909790039,
"learning_rate": 2.2882763738483724e-06,
"loss": 0.0013,
"step": 34700
},
{
"epoch": 2.0604820777372588,
"grad_norm": 0.020628100261092186,
"learning_rate": 2.275117613079099e-06,
"loss": 0.0006,
"step": 34750
},
{
"epoch": 2.063446885469477,
"grad_norm": 0.038743212819099426,
"learning_rate": 2.2619856438942485e-06,
"loss": 0.0009,
"step": 34800
},
{
"epoch": 2.066411693201696,
"grad_norm": 0.019078753888607025,
"learning_rate": 2.2488805954099498e-06,
"loss": 0.0003,
"step": 34850
},
{
"epoch": 2.0693765009339145,
"grad_norm": 0.008165969513356686,
"learning_rate": 2.2358025964776427e-06,
"loss": 0.0011,
"step": 34900
},
{
"epoch": 2.072341308666133,
"grad_norm": 0.018806306645274162,
"learning_rate": 2.2227517756828036e-06,
"loss": 0.0008,
"step": 34950
},
{
"epoch": 2.0753061163983517,
"grad_norm": 0.010271578095853329,
"learning_rate": 2.2097282613436964e-06,
"loss": 0.0012,
"step": 35000
},
{
"epoch": 2.0753061163983517,
"eval_loss": 0.09633096307516098,
"eval_runtime": 1965.4539,
"eval_samples_per_second": 2.979,
"eval_steps_per_second": 0.745,
"eval_wer": 17.523572838242817,
"step": 35000
},
{
"epoch": 2.07827092413057,
"grad_norm": 0.05877639353275299,
"learning_rate": 2.1967321815100996e-06,
"loss": 0.0007,
"step": 35050
},
{
"epoch": 2.0812357318627885,
"grad_norm": 0.012410519644618034,
"learning_rate": 2.1837636639620454e-06,
"loss": 0.0008,
"step": 35100
},
{
"epoch": 2.0842005395950074,
"grad_norm": 0.6075167059898376,
"learning_rate": 2.170822836208574e-06,
"loss": 0.0007,
"step": 35150
},
{
"epoch": 2.087165347327226,
"grad_norm": 0.022046292200684547,
"learning_rate": 2.157909825486473e-06,
"loss": 0.0006,
"step": 35200
},
{
"epoch": 2.090130155059444,
"grad_norm": 0.06569838523864746,
"learning_rate": 2.1450247587590267e-06,
"loss": 0.0005,
"step": 35250
},
{
"epoch": 2.093094962791663,
"grad_norm": 0.0742030218243599,
"learning_rate": 2.132167762714769e-06,
"loss": 0.0014,
"step": 35300
},
{
"epoch": 2.0960597705238815,
"grad_norm": 0.051597367972135544,
"learning_rate": 2.119338963766234e-06,
"loss": 0.0009,
"step": 35350
},
{
"epoch": 2.0990245782561,
"grad_norm": 0.039865605533123016,
"learning_rate": 2.106538488048722e-06,
"loss": 0.0004,
"step": 35400
},
{
"epoch": 2.1019893859883187,
"grad_norm": 0.008128203451633453,
"learning_rate": 2.093766461419048e-06,
"loss": 0.0009,
"step": 35450
},
{
"epoch": 2.104954193720537,
"grad_norm": 0.01954249106347561,
"learning_rate": 2.0810230094543133e-06,
"loss": 0.0011,
"step": 35500
},
{
"epoch": 2.107919001452756,
"grad_norm": 0.47133609652519226,
"learning_rate": 2.0683082574506698e-06,
"loss": 0.0008,
"step": 35550
},
{
"epoch": 2.1108838091849744,
"grad_norm": 0.1561160683631897,
"learning_rate": 2.055622330422078e-06,
"loss": 0.0004,
"step": 35600
},
{
"epoch": 2.113848616917193,
"grad_norm": 0.1073426902294159,
"learning_rate": 2.0429653530990924e-06,
"loss": 0.0008,
"step": 35650
},
{
"epoch": 2.1168134246494117,
"grad_norm": 0.015249393880367279,
"learning_rate": 2.0303374499276278e-06,
"loss": 0.0009,
"step": 35700
},
{
"epoch": 2.11977823238163,
"grad_norm": 0.03339747339487076,
"learning_rate": 2.017738745067731e-06,
"loss": 0.0006,
"step": 35750
},
{
"epoch": 2.1227430401138485,
"grad_norm": 0.02580416575074196,
"learning_rate": 2.0051693623923706e-06,
"loss": 0.0013,
"step": 35800
},
{
"epoch": 2.1257078478460674,
"grad_norm": 0.05754227936267853,
"learning_rate": 1.9926294254862127e-06,
"loss": 0.0004,
"step": 35850
},
{
"epoch": 2.1286726555782858,
"grad_norm": 0.1823517233133316,
"learning_rate": 1.9801190576444005e-06,
"loss": 0.0007,
"step": 35900
},
{
"epoch": 2.131637463310504,
"grad_norm": 0.011044003069400787,
"learning_rate": 1.9676383818713612e-06,
"loss": 0.0008,
"step": 35950
},
{
"epoch": 2.134602271042723,
"grad_norm": 0.00475548068061471,
"learning_rate": 1.95518752087957e-06,
"loss": 0.0007,
"step": 36000
},
{
"epoch": 2.1375670787749415,
"grad_norm": 1.1900372505187988,
"learning_rate": 1.942766597088367e-06,
"loss": 0.0007,
"step": 36050
},
{
"epoch": 2.14053188650716,
"grad_norm": 0.009477607905864716,
"learning_rate": 1.9303757326227368e-06,
"loss": 0.0005,
"step": 36100
},
{
"epoch": 2.1434966942393787,
"grad_norm": 0.0051522282883524895,
"learning_rate": 1.9180150493121197e-06,
"loss": 0.0006,
"step": 36150
},
{
"epoch": 2.146461501971597,
"grad_norm": 0.0598483607172966,
"learning_rate": 1.9056846686892088e-06,
"loss": 0.0011,
"step": 36200
},
{
"epoch": 2.1494263097038155,
"grad_norm": 0.012758989818394184,
"learning_rate": 1.8933847119887499e-06,
"loss": 0.0008,
"step": 36250
},
{
"epoch": 2.1523911174360344,
"grad_norm": 0.021726280450820923,
"learning_rate": 1.8811153001463606e-06,
"loss": 0.0005,
"step": 36300
},
{
"epoch": 2.155355925168253,
"grad_norm": 0.005376185290515423,
"learning_rate": 1.8688765537973341e-06,
"loss": 0.0005,
"step": 36350
},
{
"epoch": 2.158320732900471,
"grad_norm": 0.006016736384481192,
"learning_rate": 1.8566685932754507e-06,
"loss": 0.0002,
"step": 36400
},
{
"epoch": 2.16128554063269,
"grad_norm": 0.4107031524181366,
"learning_rate": 1.8444915386118029e-06,
"loss": 0.0004,
"step": 36450
},
{
"epoch": 2.1642503483649085,
"grad_norm": 0.06917353719472885,
"learning_rate": 1.83234550953361e-06,
"loss": 0.0005,
"step": 36500
},
{
"epoch": 2.167215156097127,
"grad_norm": 0.007486425340175629,
"learning_rate": 1.8202306254630365e-06,
"loss": 0.001,
"step": 36550
},
{
"epoch": 2.1701799638293457,
"grad_norm": 0.026751527562737465,
"learning_rate": 1.8081470055160322e-06,
"loss": 0.0008,
"step": 36600
},
{
"epoch": 2.173144771561564,
"grad_norm": 0.0032799181062728167,
"learning_rate": 1.7960947685011425e-06,
"loss": 0.0007,
"step": 36650
},
{
"epoch": 2.176109579293783,
"grad_norm": 0.0042535727843642235,
"learning_rate": 1.784074032918356e-06,
"loss": 0.0013,
"step": 36700
},
{
"epoch": 2.1790743870260014,
"grad_norm": 0.045528728514909744,
"learning_rate": 1.7720849169579308e-06,
"loss": 0.0019,
"step": 36750
},
{
"epoch": 2.18203919475822,
"grad_norm": 0.46880343556404114,
"learning_rate": 1.7601275384992317e-06,
"loss": 0.0005,
"step": 36800
},
{
"epoch": 2.1850040024904387,
"grad_norm": 0.3082144260406494,
"learning_rate": 1.7482020151095807e-06,
"loss": 0.0009,
"step": 36850
},
{
"epoch": 2.187968810222657,
"grad_norm": 0.5959653854370117,
"learning_rate": 1.7363084640430867e-06,
"loss": 0.0007,
"step": 36900
},
{
"epoch": 2.1909336179548755,
"grad_norm": 0.018325725570321083,
"learning_rate": 1.724447002239506e-06,
"loss": 0.0013,
"step": 36950
},
{
"epoch": 2.1938984256870944,
"grad_norm": 0.025977754965424538,
"learning_rate": 1.7126177463230875e-06,
"loss": 0.001,
"step": 37000
},
{
"epoch": 2.1968632334193128,
"grad_norm": 0.18662206828594208,
"learning_rate": 1.7010564337675956e-06,
"loss": 0.0016,
"step": 37050
},
{
"epoch": 2.199828041151531,
"grad_norm": 0.025148356333374977,
"learning_rate": 1.6892912883321422e-06,
"loss": 0.0006,
"step": 37100
},
{
"epoch": 2.20279284888375,
"grad_norm": 0.004004855640232563,
"learning_rate": 1.677558694441807e-06,
"loss": 0.0005,
"step": 37150
},
{
"epoch": 2.2057576566159685,
"grad_norm": 0.016185695305466652,
"learning_rate": 1.6658587674537785e-06,
"loss": 0.0014,
"step": 37200
},
{
"epoch": 2.208722464348187,
"grad_norm": 0.0063825915567576885,
"learning_rate": 1.6541916224040617e-06,
"loss": 0.0008,
"step": 37250
},
{
"epoch": 2.2116872720804057,
"grad_norm": 0.7008712887763977,
"learning_rate": 1.6425573740063438e-06,
"loss": 0.0017,
"step": 37300
},
{
"epoch": 2.214652079812624,
"grad_norm": 0.547462522983551,
"learning_rate": 1.6309561366508619e-06,
"loss": 0.0006,
"step": 37350
},
{
"epoch": 2.2176168875448425,
"grad_norm": 0.003590489737689495,
"learning_rate": 1.6193880244032856e-06,
"loss": 0.0002,
"step": 37400
},
{
"epoch": 2.2205816952770614,
"grad_norm": 0.005014427937567234,
"learning_rate": 1.607853151003591e-06,
"loss": 0.0004,
"step": 37450
},
{
"epoch": 2.22354650300928,
"grad_norm": 0.4616244435310364,
"learning_rate": 1.5963516298649401e-06,
"loss": 0.0008,
"step": 37500
},
{
"epoch": 2.22354650300928,
"eval_loss": 0.09748079627752304,
"eval_runtime": 1908.5821,
"eval_samples_per_second": 3.068,
"eval_steps_per_second": 0.767,
"eval_wer": 14.918500109641109,
"step": 37500
},
{
"epoch": 2.226511310741498,
"grad_norm": 0.007188325747847557,
"learning_rate": 1.5848835740725744e-06,
"loss": 0.0014,
"step": 37550
},
{
"epoch": 2.229476118473717,
"grad_norm": 0.004517501685768366,
"learning_rate": 1.5734490963826914e-06,
"loss": 0.0012,
"step": 37600
},
{
"epoch": 2.2324409262059355,
"grad_norm": 0.009022989310324192,
"learning_rate": 1.5620483092213467e-06,
"loss": 0.0009,
"step": 37650
},
{
"epoch": 2.235405733938154,
"grad_norm": 0.038672007620334625,
"learning_rate": 1.5506813246833424e-06,
"loss": 0.0003,
"step": 37700
},
{
"epoch": 2.2383705416703727,
"grad_norm": 0.01938713528215885,
"learning_rate": 1.5393482545311267e-06,
"loss": 0.0006,
"step": 37750
},
{
"epoch": 2.241335349402591,
"grad_norm": 0.12773361802101135,
"learning_rate": 1.5280492101936955e-06,
"loss": 0.0007,
"step": 37800
},
{
"epoch": 2.24430015713481,
"grad_norm": 0.0028612061869353056,
"learning_rate": 1.516784302765492e-06,
"loss": 0.0006,
"step": 37850
},
{
"epoch": 2.2472649648670284,
"grad_norm": 0.15187284350395203,
"learning_rate": 1.5055536430053241e-06,
"loss": 0.0003,
"step": 37900
},
{
"epoch": 2.250229772599247,
"grad_norm": 0.13594093918800354,
"learning_rate": 1.4943573413352685e-06,
"loss": 0.0009,
"step": 37950
},
{
"epoch": 2.2531945803314657,
"grad_norm": 0.0652371272444725,
"learning_rate": 1.4831955078395822e-06,
"loss": 0.001,
"step": 38000
},
{
"epoch": 2.256159388063684,
"grad_norm": 0.09638596326112747,
"learning_rate": 1.4720682522636294e-06,
"loss": 0.0005,
"step": 38050
},
{
"epoch": 2.2591241957959025,
"grad_norm": 0.019569765776395798,
"learning_rate": 1.4609756840127959e-06,
"loss": 0.0013,
"step": 38100
},
{
"epoch": 2.2620890035281214,
"grad_norm": 0.06489352881908417,
"learning_rate": 1.4499179121514118e-06,
"loss": 0.0002,
"step": 38150
},
{
"epoch": 2.2650538112603398,
"grad_norm": 0.0036429071333259344,
"learning_rate": 1.4388950454016876e-06,
"loss": 0.0009,
"step": 38200
},
{
"epoch": 2.268018618992558,
"grad_norm": 0.0040684971027076244,
"learning_rate": 1.4279071921426342e-06,
"loss": 0.0008,
"step": 38250
},
{
"epoch": 2.270983426724777,
"grad_norm": 0.20703789591789246,
"learning_rate": 1.4169544604090063e-06,
"loss": 0.0003,
"step": 38300
},
{
"epoch": 2.2739482344569955,
"grad_norm": 1.6676180362701416,
"learning_rate": 1.4060369578902366e-06,
"loss": 0.0017,
"step": 38350
},
{
"epoch": 2.276913042189214,
"grad_norm": 0.013792523182928562,
"learning_rate": 1.3951547919293762e-06,
"loss": 0.0007,
"step": 38400
},
{
"epoch": 2.2798778499214327,
"grad_norm": 0.25312694907188416,
"learning_rate": 1.3843080695220429e-06,
"loss": 0.0004,
"step": 38450
},
{
"epoch": 2.282842657653651,
"grad_norm": 0.0025230322498828173,
"learning_rate": 1.37349689731536e-06,
"loss": 0.0014,
"step": 38500
},
{
"epoch": 2.2858074653858695,
"grad_norm": 0.3334575593471527,
"learning_rate": 1.3627213816069208e-06,
"loss": 0.0005,
"step": 38550
},
{
"epoch": 2.2887722731180884,
"grad_norm": 0.008739791810512543,
"learning_rate": 1.3519816283437349e-06,
"loss": 0.001,
"step": 38600
},
{
"epoch": 2.291737080850307,
"grad_norm": 0.5587530732154846,
"learning_rate": 1.3412777431211859e-06,
"loss": 0.0006,
"step": 38650
},
{
"epoch": 2.2947018885825257,
"grad_norm": 0.02083914540708065,
"learning_rate": 1.3306098311819982e-06,
"loss": 0.001,
"step": 38700
},
{
"epoch": 2.297666696314744,
"grad_norm": 0.008118952624499798,
"learning_rate": 1.3199779974152011e-06,
"loss": 0.0007,
"step": 38750
},
{
"epoch": 2.3006315040469625,
"grad_norm": 0.0053606764413416386,
"learning_rate": 1.3093823463550909e-06,
"loss": 0.0009,
"step": 38800
},
{
"epoch": 2.303596311779181,
"grad_norm": 0.06790202856063843,
"learning_rate": 1.2988229821802157e-06,
"loss": 0.0003,
"step": 38850
},
{
"epoch": 2.3065611195113997,
"grad_norm": 0.056741103529930115,
"learning_rate": 1.2883000087123355e-06,
"loss": 0.0011,
"step": 38900
},
{
"epoch": 2.309525927243618,
"grad_norm": 0.012334014289081097,
"learning_rate": 1.277813529415416e-06,
"loss": 0.0008,
"step": 38950
},
{
"epoch": 2.312490734975837,
"grad_norm": 0.033796992152929306,
"learning_rate": 1.2673636473946033e-06,
"loss": 0.0006,
"step": 39000
},
{
"epoch": 2.3154555427080554,
"grad_norm": 0.010236002504825592,
"learning_rate": 1.2569504653952103e-06,
"loss": 0.0007,
"step": 39050
},
{
"epoch": 2.318420350440274,
"grad_norm": 0.09120677411556244,
"learning_rate": 1.2465740858017118e-06,
"loss": 0.0004,
"step": 39100
},
{
"epoch": 2.3213851581724927,
"grad_norm": 0.016125334426760674,
"learning_rate": 1.2362346106367268e-06,
"loss": 0.0005,
"step": 39150
},
{
"epoch": 2.324349965904711,
"grad_norm": 0.04164751619100571,
"learning_rate": 1.2259321415600307e-06,
"loss": 0.0012,
"step": 39200
},
{
"epoch": 2.3273147736369295,
"grad_norm": 0.005560677032917738,
"learning_rate": 1.2156667798675436e-06,
"loss": 0.0008,
"step": 39250
},
{
"epoch": 2.3302795813691484,
"grad_norm": 0.005640542134642601,
"learning_rate": 1.2054386264903373e-06,
"loss": 0.0007,
"step": 39300
},
{
"epoch": 2.333244389101367,
"grad_norm": 0.005680414382368326,
"learning_rate": 1.1952477819936465e-06,
"loss": 0.0007,
"step": 39350
},
{
"epoch": 2.336209196833585,
"grad_norm": 0.1653079390525818,
"learning_rate": 1.185094346575879e-06,
"loss": 0.0003,
"step": 39400
},
{
"epoch": 2.339174004565804,
"grad_norm": 0.02139970287680626,
"learning_rate": 1.1749784200676233e-06,
"loss": 0.0002,
"step": 39450
},
{
"epoch": 2.3421388122980225,
"grad_norm": 0.007563546299934387,
"learning_rate": 1.1649001019306799e-06,
"loss": 0.0006,
"step": 39500
},
{
"epoch": 2.345103620030241,
"grad_norm": 0.020477985963225365,
"learning_rate": 1.15485949125707e-06,
"loss": 0.0006,
"step": 39550
},
{
"epoch": 2.3480684277624597,
"grad_norm": 0.007695810403674841,
"learning_rate": 1.1448566867680715e-06,
"loss": 0.0006,
"step": 39600
},
{
"epoch": 2.351033235494678,
"grad_norm": 0.1916559338569641,
"learning_rate": 1.1348917868132452e-06,
"loss": 0.0004,
"step": 39650
},
{
"epoch": 2.3539980432268965,
"grad_norm": 0.006019901018589735,
"learning_rate": 1.124964889369461e-06,
"loss": 0.0006,
"step": 39700
},
{
"epoch": 2.3569628509591154,
"grad_norm": 0.12937164306640625,
"learning_rate": 1.1150760920399501e-06,
"loss": 0.0007,
"step": 39750
},
{
"epoch": 2.359927658691334,
"grad_norm": 2.1091978549957275,
"learning_rate": 1.1052254920533262e-06,
"loss": 0.001,
"step": 39800
},
{
"epoch": 2.3628924664235527,
"grad_norm": 0.014486163854598999,
"learning_rate": 1.095413186262645e-06,
"loss": 0.0004,
"step": 39850
},
{
"epoch": 2.365857274155771,
"grad_norm": 0.021811697632074356,
"learning_rate": 1.0856392711444452e-06,
"loss": 0.0005,
"step": 39900
},
{
"epoch": 2.3688220818879895,
"grad_norm": 0.030487345531582832,
"learning_rate": 1.075903842797798e-06,
"loss": 0.0009,
"step": 39950
},
{
"epoch": 2.371786889620208,
"grad_norm": 0.6815763711929321,
"learning_rate": 1.0662069969433681e-06,
"loss": 0.0004,
"step": 40000
},
{
"epoch": 2.371786889620208,
"eval_loss": 0.0971890240907669,
"eval_runtime": 1820.3139,
"eval_samples_per_second": 3.216,
"eval_steps_per_second": 0.804,
"eval_wer": 11.26525838754477,
"step": 40000
},
{
"epoch": 2.3747516973524267,
"grad_norm": 0.007269201334565878,
"learning_rate": 1.0565488289224695e-06,
"loss": 0.0011,
"step": 40050
},
{
"epoch": 2.377716505084645,
"grad_norm": 0.012200387194752693,
"learning_rate": 1.046929433696125e-06,
"loss": 0.0006,
"step": 40100
},
{
"epoch": 2.380681312816864,
"grad_norm": 0.0072285993956029415,
"learning_rate": 1.037348905844139e-06,
"loss": 0.0007,
"step": 40150
},
{
"epoch": 2.3836461205490824,
"grad_norm": 0.011018377728760242,
"learning_rate": 1.027807339564163e-06,
"loss": 0.0005,
"step": 40200
},
{
"epoch": 2.386610928281301,
"grad_norm": 0.030931444838643074,
"learning_rate": 1.0183048286707686e-06,
"loss": 0.0003,
"step": 40250
},
{
"epoch": 2.3895757360135197,
"grad_norm": 0.0054734209552407265,
"learning_rate": 1.0088414665945312e-06,
"loss": 0.0006,
"step": 40300
},
{
"epoch": 2.392540543745738,
"grad_norm": 0.0037654677871614695,
"learning_rate": 9.994173463811008e-07,
"loss": 0.001,
"step": 40350
},
{
"epoch": 2.3955053514779565,
"grad_norm": 0.008112763054668903,
"learning_rate": 9.900325606903033e-07,
"loss": 0.0004,
"step": 40400
},
{
"epoch": 2.3984701592101754,
"grad_norm": 0.012320293113589287,
"learning_rate": 9.806872017952102e-07,
"loss": 0.0004,
"step": 40450
},
{
"epoch": 2.401434966942394,
"grad_norm": 0.02817295864224434,
"learning_rate": 9.713813615812456e-07,
"loss": 0.001,
"step": 40500
},
{
"epoch": 2.404399774674612,
"grad_norm": 0.06625434756278992,
"learning_rate": 9.621151315452792e-07,
"loss": 0.0007,
"step": 40550
},
{
"epoch": 2.407364582406831,
"grad_norm": 0.04207722470164299,
"learning_rate": 9.528886027947215e-07,
"loss": 0.0015,
"step": 40600
},
{
"epoch": 2.4103293901390495,
"grad_norm": 0.04913393780589104,
"learning_rate": 9.437018660466352e-07,
"loss": 0.001,
"step": 40650
},
{
"epoch": 2.413294197871268,
"grad_norm": 0.007566593121737242,
"learning_rate": 9.345550116268404e-07,
"loss": 0.0013,
"step": 40700
},
{
"epoch": 2.4162590056034867,
"grad_norm": 0.01240174937993288,
"learning_rate": 9.254481294690221e-07,
"loss": 0.0004,
"step": 40750
},
{
"epoch": 2.419223813335705,
"grad_norm": 0.013987046666443348,
"learning_rate": 9.163813091138557e-07,
"loss": 0.0004,
"step": 40800
},
{
"epoch": 2.4221886210679235,
"grad_norm": 0.012768911197781563,
"learning_rate": 9.073546397081185e-07,
"loss": 0.0011,
"step": 40850
},
{
"epoch": 2.4251534288001424,
"grad_norm": 0.010746861808001995,
"learning_rate": 8.983682100038138e-07,
"loss": 0.0005,
"step": 40900
},
{
"epoch": 2.428118236532361,
"grad_norm": 0.11308763176202774,
"learning_rate": 8.894221083573041e-07,
"loss": 0.0003,
"step": 40950
},
{
"epoch": 2.4310830442645797,
"grad_norm": 0.003393119666725397,
"learning_rate": 8.805164227284336e-07,
"loss": 0.0012,
"step": 41000
},
{
"epoch": 2.434047851996798,
"grad_norm": 0.0036778796929866076,
"learning_rate": 8.716512406796724e-07,
"loss": 0.001,
"step": 41050
},
{
"epoch": 2.4370126597290165,
"grad_norm": 0.2601497769355774,
"learning_rate": 8.628266493752496e-07,
"loss": 0.0004,
"step": 41100
},
{
"epoch": 2.439977467461235,
"grad_norm": 0.0293083768337965,
"learning_rate": 8.540427355802988e-07,
"loss": 0.0005,
"step": 41150
},
{
"epoch": 2.4429422751934537,
"grad_norm": 0.11809295415878296,
"learning_rate": 8.45299585660005e-07,
"loss": 0.0019,
"step": 41200
},
{
"epoch": 2.445907082925672,
"grad_norm": Infinity,
"learning_rate": 8.365972855787496e-07,
"loss": 0.0008,
"step": 41250
},
{
"epoch": 2.448871890657891,
"grad_norm": 0.0271657295525074,
"learning_rate": 8.281087464744486e-07,
"loss": 0.0005,
"step": 41300
},
{
"epoch": 2.4518366983901094,
"grad_norm": 0.40079793334007263,
"learning_rate": 8.194875811138108e-07,
"loss": 0.001,
"step": 41350
},
{
"epoch": 2.454801506122328,
"grad_norm": 0.03252971172332764,
"learning_rate": 8.109075193809662e-07,
"loss": 0.0004,
"step": 41400
},
{
"epoch": 2.4577663138545467,
"grad_norm": 0.16962403059005737,
"learning_rate": 8.023686456367818e-07,
"loss": 0.0008,
"step": 41450
},
{
"epoch": 2.460731121586765,
"grad_norm": 0.01961754634976387,
"learning_rate": 7.938710438371617e-07,
"loss": 0.0004,
"step": 41500
},
{
"epoch": 2.4636959293189835,
"grad_norm": 0.4819841682910919,
"learning_rate": 7.854147975322113e-07,
"loss": 0.0004,
"step": 41550
},
{
"epoch": 2.4666607370512024,
"grad_norm": 0.0028431855607777834,
"learning_rate": 7.76999989865424e-07,
"loss": 0.0003,
"step": 41600
},
{
"epoch": 2.469625544783421,
"grad_norm": 0.004796815570443869,
"learning_rate": 7.686267035728595e-07,
"loss": 0.0005,
"step": 41650
},
{
"epoch": 2.472590352515639,
"grad_norm": 0.014351542107760906,
"learning_rate": 7.602950209823279e-07,
"loss": 0.0004,
"step": 41700
},
{
"epoch": 2.475555160247858,
"grad_norm": 0.04429518058896065,
"learning_rate": 7.520050240125876e-07,
"loss": 0.001,
"step": 41750
},
{
"epoch": 2.4785199679800765,
"grad_norm": 0.03187648952007294,
"learning_rate": 7.437567941725348e-07,
"loss": 0.0002,
"step": 41800
},
{
"epoch": 2.481484775712295,
"grad_norm": 0.005897920113056898,
"learning_rate": 7.355504125604007e-07,
"loss": 0.0003,
"step": 41850
},
{
"epoch": 2.4844495834445137,
"grad_norm": 0.009089338593184948,
"learning_rate": 7.273859598629596e-07,
"loss": 0.0004,
"step": 41900
},
{
"epoch": 2.487414391176732,
"grad_norm": 0.01698206551373005,
"learning_rate": 7.192635163547284e-07,
"loss": 0.0003,
"step": 41950
},
{
"epoch": 2.4903791989089505,
"grad_norm": 0.006857879459857941,
"learning_rate": 7.111831618971848e-07,
"loss": 0.0003,
"step": 42000
},
{
"epoch": 2.4933440066411694,
"grad_norm": 0.02485840767621994,
"learning_rate": 7.031449759379799e-07,
"loss": 0.0002,
"step": 42050
},
{
"epoch": 2.496308814373388,
"grad_norm": 0.003701738314703107,
"learning_rate": 6.951490375101494e-07,
"loss": 0.0008,
"step": 42100
},
{
"epoch": 2.4992736221056067,
"grad_norm": 0.6532867550849915,
"learning_rate": 6.871954252313489e-07,
"loss": 0.0008,
"step": 42150
},
{
"epoch": 2.502238429837825,
"grad_norm": 0.047790996730327606,
"learning_rate": 6.792842173030729e-07,
"loss": 0.0004,
"step": 42200
},
{
"epoch": 2.5052032375700435,
"grad_norm": 0.020621391013264656,
"learning_rate": 6.714154915098875e-07,
"loss": 0.0003,
"step": 42250
},
{
"epoch": 2.508168045302262,
"grad_norm": 0.010965166613459587,
"learning_rate": 6.635893252186676e-07,
"loss": 0.0003,
"step": 42300
},
{
"epoch": 2.5111328530344807,
"grad_norm": 0.004576478153467178,
"learning_rate": 6.558057953778313e-07,
"loss": 0.0007,
"step": 42350
},
{
"epoch": 2.514097660766699,
"grad_norm": 0.007650887127965689,
"learning_rate": 6.480649785165899e-07,
"loss": 0.0009,
"step": 42400
},
{
"epoch": 2.517062468498918,
"grad_norm": 0.01692899316549301,
"learning_rate": 6.403669507441917e-07,
"loss": 0.0005,
"step": 42450
},
{
"epoch": 2.5200272762311364,
"grad_norm": 0.03952137380838394,
"learning_rate": 6.327117877491717e-07,
"loss": 0.0007,
"step": 42500
},
{
"epoch": 2.5200272762311364,
"eval_loss": 0.09690303355455399,
"eval_runtime": 1935.2254,
"eval_samples_per_second": 3.025,
"eval_steps_per_second": 0.757,
"eval_wer": 13.773846941013085,
"step": 42500
},
{
"epoch": 2.522992083963355,
"grad_norm": 0.006857364438474178,
"learning_rate": 6.250995647986141e-07,
"loss": 0.0004,
"step": 42550
},
{
"epoch": 2.5259568916955737,
"grad_norm": 0.020079879090189934,
"learning_rate": 6.175303567374036e-07,
"loss": 0.0008,
"step": 42600
},
{
"epoch": 2.528921699427792,
"grad_norm": 0.006522559095174074,
"learning_rate": 6.100042379874971e-07,
"loss": 0.0008,
"step": 42650
},
{
"epoch": 2.5318865071600105,
"grad_norm": 0.18019999563694,
"learning_rate": 6.025212825471882e-07,
"loss": 0.0012,
"step": 42700
},
{
"epoch": 2.5348513148922294,
"grad_norm": 0.03285055235028267,
"learning_rate": 5.950815639903784e-07,
"loss": 0.0005,
"step": 42750
},
{
"epoch": 2.537816122624448,
"grad_norm": 0.9281402826309204,
"learning_rate": 5.876851554658585e-07,
"loss": 0.0006,
"step": 42800
},
{
"epoch": 2.540780930356666,
"grad_norm": 0.004797223024070263,
"learning_rate": 5.803321296965842e-07,
"loss": 0.0011,
"step": 42850
},
{
"epoch": 2.543745738088885,
"grad_norm": 0.006800955627113581,
"learning_rate": 5.730225589789645e-07,
"loss": 0.0007,
"step": 42900
},
{
"epoch": 2.5467105458211035,
"grad_norm": 0.016797848045825958,
"learning_rate": 5.657565151821509e-07,
"loss": 0.0004,
"step": 42950
},
{
"epoch": 2.5496753535533223,
"grad_norm": 0.008737844415009022,
"learning_rate": 5.585340697473257e-07,
"loss": 0.0004,
"step": 43000
},
{
"epoch": 2.5526401612855407,
"grad_norm": 0.002092113019898534,
"learning_rate": 5.513552936870065e-07,
"loss": 0.001,
"step": 43050
},
{
"epoch": 2.555604969017759,
"grad_norm": 0.004055564291775227,
"learning_rate": 5.442202575843458e-07,
"loss": 0.0005,
"step": 43100
},
{
"epoch": 2.5585697767499775,
"grad_norm": 0.027113450691103935,
"learning_rate": 5.372704263215106e-07,
"loss": 0.0008,
"step": 43150
},
{
"epoch": 2.5615345844821964,
"grad_norm": 1.4055336713790894,
"learning_rate": 5.302222018855358e-07,
"loss": 0.0004,
"step": 43200
},
{
"epoch": 2.564499392214415,
"grad_norm": 0.016375090926885605,
"learning_rate": 5.232179251919717e-07,
"loss": 0.0004,
"step": 43250
},
{
"epoch": 2.5674641999466337,
"grad_norm": 0.009804923087358475,
"learning_rate": 5.162576651082541e-07,
"loss": 0.0001,
"step": 43300
},
{
"epoch": 2.570429007678852,
"grad_norm": 1.839759349822998,
"learning_rate": 5.093414900690458e-07,
"loss": 0.001,
"step": 43350
},
{
"epoch": 2.5733938154110705,
"grad_norm": 0.0017718200106173754,
"learning_rate": 5.024694680755493e-07,
"loss": 0.0003,
"step": 43400
},
{
"epoch": 2.576358623143289,
"grad_norm": 0.004344862885773182,
"learning_rate": 4.956416666948494e-07,
"loss": 0.0004,
"step": 43450
},
{
"epoch": 2.5793234308755077,
"grad_norm": 0.0035795283038169146,
"learning_rate": 4.888581530592456e-07,
"loss": 0.0007,
"step": 43500
},
{
"epoch": 2.582288238607726,
"grad_norm": 0.04727310314774513,
"learning_rate": 4.821189938655885e-07,
"loss": 0.0004,
"step": 43550
},
{
"epoch": 2.585253046339945,
"grad_norm": 0.003710675286129117,
"learning_rate": 4.754242553746297e-07,
"loss": 0.0008,
"step": 43600
},
{
"epoch": 2.5882178540721634,
"grad_norm": 0.0148782255128026,
"learning_rate": 4.687740034103672e-07,
"loss": 0.0003,
"step": 43650
},
{
"epoch": 2.591182661804382,
"grad_norm": 0.1427607536315918,
"learning_rate": 4.621683033593971e-07,
"loss": 0.0007,
"step": 43700
},
{
"epoch": 2.5941474695366007,
"grad_norm": 0.5589999556541443,
"learning_rate": 4.556072201702733e-07,
"loss": 0.0002,
"step": 43750
},
{
"epoch": 2.597112277268819,
"grad_norm": 0.008652674965560436,
"learning_rate": 4.490908183528697e-07,
"loss": 0.0003,
"step": 43800
},
{
"epoch": 2.600077085001038,
"grad_norm": 2.5965774059295654,
"learning_rate": 4.4261916197773924e-07,
"loss": 0.001,
"step": 43850
},
{
"epoch": 2.6030418927332564,
"grad_norm": 0.11119319498538971,
"learning_rate": 4.361923146754948e-07,
"loss": 0.0004,
"step": 43900
},
{
"epoch": 2.606006700465475,
"grad_norm": 0.0016937926411628723,
"learning_rate": 4.298103396361719e-07,
"loss": 0.0004,
"step": 43950
},
{
"epoch": 2.608971508197693,
"grad_norm": 0.0073602148331701756,
"learning_rate": 4.234732996086172e-07,
"loss": 0.0005,
"step": 44000
},
{
"epoch": 2.611936315929912,
"grad_norm": 0.08338925987482071,
"learning_rate": 4.1718125689986677e-07,
"loss": 0.0017,
"step": 44050
},
{
"epoch": 2.6149011236621305,
"grad_norm": 0.004543852526694536,
"learning_rate": 4.1093427337453195e-07,
"loss": 0.0005,
"step": 44100
},
{
"epoch": 2.6178659313943493,
"grad_norm": 0.007618986535817385,
"learning_rate": 4.0473241045419554e-07,
"loss": 0.0008,
"step": 44150
},
{
"epoch": 2.6208307391265677,
"grad_norm": 0.31889769434928894,
"learning_rate": 3.9857572911680296e-07,
"loss": 0.0004,
"step": 44200
},
{
"epoch": 2.623795546858786,
"grad_norm": 0.06992673128843307,
"learning_rate": 3.924642898960679e-07,
"loss": 0.0003,
"step": 44250
},
{
"epoch": 2.6267603545910045,
"grad_norm": 0.04514357075095177,
"learning_rate": 3.8639815288087465e-07,
"loss": 0.0004,
"step": 44300
},
{
"epoch": 2.6297251623232234,
"grad_norm": 0.005873105023056269,
"learning_rate": 3.803773777146852e-07,
"loss": 0.0009,
"step": 44350
},
{
"epoch": 2.632689970055442,
"grad_norm": 0.003560218960046768,
"learning_rate": 3.7440202359495583e-07,
"loss": 0.0003,
"step": 44400
},
{
"epoch": 2.6356547777876607,
"grad_norm": 0.0036311550065875053,
"learning_rate": 3.6847214927255517e-07,
"loss": 0.0005,
"step": 44450
},
{
"epoch": 2.638619585519879,
"grad_norm": 0.010604312643408775,
"learning_rate": 3.625878130511834e-07,
"loss": 0.0007,
"step": 44500
},
{
"epoch": 2.6415843932520975,
"grad_norm": 0.015460291877388954,
"learning_rate": 3.567490727868028e-07,
"loss": 0.0007,
"step": 44550
},
{
"epoch": 2.644549200984316,
"grad_norm": 0.1841159462928772,
"learning_rate": 3.5095598588706537e-07,
"loss": 0.0009,
"step": 44600
},
{
"epoch": 2.6475140087165348,
"grad_norm": 0.018142210319638252,
"learning_rate": 3.452086093107515e-07,
"loss": 0.0002,
"step": 44650
},
{
"epoch": 2.650478816448753,
"grad_norm": 0.0007878096075728536,
"learning_rate": 3.3950699956721013e-07,
"loss": 0.0003,
"step": 44700
},
{
"epoch": 2.653443624180972,
"grad_norm": 0.030863964930176735,
"learning_rate": 3.338512127157978e-07,
"loss": 0.0002,
"step": 44750
},
{
"epoch": 2.6564084319131904,
"grad_norm": 0.00733591802418232,
"learning_rate": 3.2824130436533484e-07,
"loss": 0.0013,
"step": 44800
},
{
"epoch": 2.659373239645409,
"grad_norm": 0.00609589321538806,
"learning_rate": 3.2267732967355136e-07,
"loss": 0.0007,
"step": 44850
},
{
"epoch": 2.6623380473776277,
"grad_norm": 1.0534803867340088,
"learning_rate": 3.1715934334655306e-07,
"loss": 0.0007,
"step": 44900
},
{
"epoch": 2.665302855109846,
"grad_norm": 0.005737192463129759,
"learning_rate": 3.1168739963827574e-07,
"loss": 0.0004,
"step": 44950
},
{
"epoch": 2.668267662842065,
"grad_norm": 1.1238080263137817,
"learning_rate": 3.062615523499557e-07,
"loss": 0.0009,
"step": 45000
},
{
"epoch": 2.668267662842065,
"eval_loss": 0.09690423309803009,
"eval_runtime": 1796.5872,
"eval_samples_per_second": 3.259,
"eval_steps_per_second": 0.815,
"eval_wer": 12.171624881222133,
"step": 45000
},
{
"epoch": 2.6712324705742834,
"grad_norm": 0.004947836045175791,
"learning_rate": 3.008818548296e-07,
"loss": 0.001,
"step": 45050
},
{
"epoch": 2.674197278306502,
"grad_norm": 0.017444021999835968,
"learning_rate": 2.955483599714637e-07,
"loss": 0.0004,
"step": 45100
},
{
"epoch": 2.67716208603872,
"grad_norm": 0.004913876764476299,
"learning_rate": 2.902611202155259e-07,
"loss": 0.0001,
"step": 45150
},
{
"epoch": 2.680126893770939,
"grad_norm": 0.012263476848602295,
"learning_rate": 2.850201875469771e-07,
"loss": 0.0005,
"step": 45200
},
{
"epoch": 2.6830917015031575,
"grad_norm": 0.002866186900064349,
"learning_rate": 2.7982561349570724e-07,
"loss": 0.0007,
"step": 45250
},
{
"epoch": 2.6860565092353763,
"grad_norm": 0.029279733076691628,
"learning_rate": 2.7467744913579953e-07,
"loss": 0.0005,
"step": 45300
},
{
"epoch": 2.6890213169675947,
"grad_norm": 0.004325803369283676,
"learning_rate": 2.695757450850284e-07,
"loss": 0.0006,
"step": 45350
},
{
"epoch": 2.691986124699813,
"grad_norm": 0.01715932786464691,
"learning_rate": 2.645205515043597e-07,
"loss": 0.0001,
"step": 45400
},
{
"epoch": 2.6949509324320315,
"grad_norm": 0.23892708122730255,
"learning_rate": 2.5951191809746146e-07,
"loss": 0.0004,
"step": 45450
},
{
"epoch": 2.6979157401642504,
"grad_norm": 0.0025347827468067408,
"learning_rate": 2.5454989411020926e-07,
"loss": 0.0005,
"step": 45500
},
{
"epoch": 2.700880547896469,
"grad_norm": 0.0021720200311392546,
"learning_rate": 2.4963452833020876e-07,
"loss": 0.0014,
"step": 45550
},
{
"epoch": 2.7038453556286877,
"grad_norm": 0.007619790267199278,
"learning_rate": 2.447658690863125e-07,
"loss": 0.0002,
"step": 45600
},
{
"epoch": 2.706810163360906,
"grad_norm": 0.009241044521331787,
"learning_rate": 2.399439642481433e-07,
"loss": 0.0019,
"step": 45650
},
{
"epoch": 2.7097749710931245,
"grad_norm": 0.06682003289461136,
"learning_rate": 2.3516886122562642e-07,
"loss": 0.0012,
"step": 45700
},
{
"epoch": 2.7127397788253433,
"grad_norm": 0.007818753831088543,
"learning_rate": 2.3044060696852444e-07,
"loss": 0.0008,
"step": 45750
},
{
"epoch": 2.7157045865575618,
"grad_norm": 0.011879599653184414,
"learning_rate": 2.2575924796596926e-07,
"loss": 0.0003,
"step": 45800
},
{
"epoch": 2.71866939428978,
"grad_norm": 0.007650415413081646,
"learning_rate": 2.2112483024601228e-07,
"loss": 0.0005,
"step": 45850
},
{
"epoch": 2.721634202021999,
"grad_norm": 0.00841108150780201,
"learning_rate": 2.165373993751696e-07,
"loss": 0.0003,
"step": 45900
},
{
"epoch": 2.7245990097542174,
"grad_norm": 0.007607500068843365,
"learning_rate": 2.1199700045797077e-07,
"loss": 0.0004,
"step": 45950
},
{
"epoch": 2.727563817486436,
"grad_norm": 0.0019570747390389442,
"learning_rate": 2.075036781365186e-07,
"loss": 0.0003,
"step": 46000
},
{
"epoch": 2.7305286252186547,
"grad_norm": 0.00809240061789751,
"learning_rate": 2.0305747659005114e-07,
"loss": 0.0002,
"step": 46050
},
{
"epoch": 2.733493432950873,
"grad_norm": 0.004082467406988144,
"learning_rate": 1.9865843953450424e-07,
"loss": 0.0009,
"step": 46100
},
{
"epoch": 2.736458240683092,
"grad_norm": 0.007133570034056902,
"learning_rate": 1.9430661022208252e-07,
"loss": 0.0003,
"step": 46150
},
{
"epoch": 2.7394230484153104,
"grad_norm": 0.31264781951904297,
"learning_rate": 1.9000203144083628e-07,
"loss": 0.0003,
"step": 46200
},
{
"epoch": 2.742387856147529,
"grad_norm": 0.23617079854011536,
"learning_rate": 1.8574474551423804e-07,
"loss": 0.0002,
"step": 46250
},
{
"epoch": 2.745352663879747,
"grad_norm": 0.004803156014531851,
"learning_rate": 1.815347943007678e-07,
"loss": 0.0005,
"step": 46300
},
{
"epoch": 2.748317471611966,
"grad_norm": 0.04453460872173309,
"learning_rate": 1.7737221919350177e-07,
"loss": 0.0007,
"step": 46350
},
{
"epoch": 2.7512822793441845,
"grad_norm": 0.00791526585817337,
"learning_rate": 1.732570611197043e-07,
"loss": 0.0006,
"step": 46400
},
{
"epoch": 2.7542470870764033,
"grad_norm": 0.016163982450962067,
"learning_rate": 1.6918936054042656e-07,
"loss": 0.0004,
"step": 46450
},
{
"epoch": 2.7572118948086217,
"grad_norm": 0.0028755166567862034,
"learning_rate": 1.651691574501074e-07,
"loss": 0.0003,
"step": 46500
},
{
"epoch": 2.76017670254084,
"grad_norm": 0.002072387607768178,
"learning_rate": 1.611964913761821e-07,
"loss": 0.0007,
"step": 46550
},
{
"epoch": 2.7631415102730585,
"grad_norm": 0.013004027307033539,
"learning_rate": 1.5727140137869134e-07,
"loss": 0.0008,
"step": 46600
},
{
"epoch": 2.7661063180052774,
"grad_norm": 0.008961477316915989,
"learning_rate": 1.53393926049899e-07,
"loss": 0.0006,
"step": 46650
},
{
"epoch": 2.769071125737496,
"grad_norm": 0.015411244705319405,
"learning_rate": 1.4956410351391281e-07,
"loss": 0.0009,
"step": 46700
},
{
"epoch": 2.7720359334697147,
"grad_norm": 0.059329330921173096,
"learning_rate": 1.457819714263081e-07,
"loss": 0.0004,
"step": 46750
},
{
"epoch": 2.775000741201933,
"grad_norm": 0.007074225228279829,
"learning_rate": 1.4204756697375698e-07,
"loss": 0.0009,
"step": 46800
},
{
"epoch": 2.7779655489341515,
"grad_norm": 0.36626216769218445,
"learning_rate": 1.3836092687366575e-07,
"loss": 0.0005,
"step": 46850
},
{
"epoch": 2.7809303566663703,
"grad_norm": 0.3594434857368469,
"learning_rate": 1.3472208737381198e-07,
"loss": 0.0005,
"step": 46900
},
{
"epoch": 2.7838951643985888,
"grad_norm": 0.027519946917891502,
"learning_rate": 1.311310842519875e-07,
"loss": 0.0003,
"step": 46950
},
{
"epoch": 2.786859972130807,
"grad_norm": 0.12820805609226227,
"learning_rate": 1.2758795281564763e-07,
"loss": 0.0002,
"step": 47000
},
{
"epoch": 2.789824779863026,
"grad_norm": 0.016581403091549873,
"learning_rate": 1.2409272790156412e-07,
"loss": 0.0001,
"step": 47050
},
{
"epoch": 2.7927895875952444,
"grad_norm": 0.09786204993724823,
"learning_rate": 1.2064544387548116e-07,
"loss": 0.0003,
"step": 47100
},
{
"epoch": 2.795754395327463,
"grad_norm": 0.00287329638376832,
"learning_rate": 1.1724613463178047e-07,
"loss": 0.0006,
"step": 47150
},
{
"epoch": 2.7987192030596817,
"grad_norm": 0.003532948438078165,
"learning_rate": 1.1389483359314502e-07,
"loss": 0.0005,
"step": 47200
},
{
"epoch": 2.8016840107919,
"grad_norm": 0.015650948509573936,
"learning_rate": 1.1059157371023088e-07,
"loss": 0.0006,
"step": 47250
},
{
"epoch": 2.804648818524119,
"grad_norm": 0.0076681459322571754,
"learning_rate": 1.0733638746134645e-07,
"loss": 0.0007,
"step": 47300
},
{
"epoch": 2.8076136262563374,
"grad_norm": 0.026479622349143028,
"learning_rate": 1.0412930685212652e-07,
"loss": 0.0008,
"step": 47350
},
{
"epoch": 2.810578433988556,
"grad_norm": 0.002863786881789565,
"learning_rate": 1.0097036341522703e-07,
"loss": 0.0006,
"step": 47400
},
{
"epoch": 2.813543241720774,
"grad_norm": 1.0755295753479004,
"learning_rate": 9.785958821000418e-08,
"loss": 0.0009,
"step": 47450
},
{
"epoch": 2.816508049452993,
"grad_norm": 0.028524285182356834,
"learning_rate": 9.479701182221912e-08,
"loss": 0.0006,
"step": 47500
},
{
"epoch": 2.816508049452993,
"eval_loss": 0.09689270704984665,
"eval_runtime": 1776.8698,
"eval_samples_per_second": 3.295,
"eval_steps_per_second": 0.824,
"eval_wer": 11.217016299978072,
"step": 47500
},
{
"epoch": 2.8194728571852115,
"grad_norm": 0.05420482158660889,
"learning_rate": 9.178266436372985e-08,
"loss": 0.0004,
"step": 47550
},
{
"epoch": 2.8224376649174303,
"grad_norm": 0.0069849551655352116,
"learning_rate": 8.881657547219869e-08,
"loss": 0.0002,
"step": 47600
},
{
"epoch": 2.8254024726496487,
"grad_norm": 0.026374366134405136,
"learning_rate": 8.58987743108003e-08,
"loss": 0.0004,
"step": 47650
},
{
"epoch": 2.828367280381867,
"grad_norm": 0.018248042091727257,
"learning_rate": 8.302928956793576e-08,
"loss": 0.0003,
"step": 47700
},
{
"epoch": 2.8313320881140855,
"grad_norm": 1.0653489828109741,
"learning_rate": 8.020814945694733e-08,
"loss": 0.0007,
"step": 47750
},
{
"epoch": 2.8342968958463044,
"grad_norm": 0.03135114163160324,
"learning_rate": 7.743538171584464e-08,
"loss": 0.0002,
"step": 47800
},
{
"epoch": 2.837261703578523,
"grad_norm": 0.0379420705139637,
"learning_rate": 7.471101360703115e-08,
"loss": 0.0009,
"step": 47850
},
{
"epoch": 2.8402265113107417,
"grad_norm": 0.007235648576170206,
"learning_rate": 7.20350719170343e-08,
"loss": 0.0007,
"step": 47900
},
{
"epoch": 2.84319131904296,
"grad_norm": 0.14785419404506683,
"learning_rate": 6.940758295624406e-08,
"loss": 0.0005,
"step": 47950
},
{
"epoch": 2.8461561267751785,
"grad_norm": 0.008452442474663258,
"learning_rate": 6.687967751188796e-08,
"loss": 0.0006,
"step": 48000
},
{
"epoch": 2.8491209345073973,
"grad_norm": 0.01667088270187378,
"learning_rate": 6.434820071100967e-08,
"loss": 0.0006,
"step": 48050
},
{
"epoch": 2.8520857422396158,
"grad_norm": 0.0059527806006371975,
"learning_rate": 6.186525221818119e-08,
"loss": 0.0006,
"step": 48100
},
{
"epoch": 2.855050549971834,
"grad_norm": 0.00854993611574173,
"learning_rate": 5.943085644624536e-08,
"loss": 0.0007,
"step": 48150
},
{
"epoch": 2.858015357704053,
"grad_norm": 0.6749946475028992,
"learning_rate": 5.704503733066358e-08,
"loss": 0.0009,
"step": 48200
},
{
"epoch": 2.8609801654362714,
"grad_norm": 0.15325838327407837,
"learning_rate": 5.470781832928374e-08,
"loss": 0.0006,
"step": 48250
},
{
"epoch": 2.86394497316849,
"grad_norm": 0.013085714541375637,
"learning_rate": 5.241922242210595e-08,
"loss": 0.0003,
"step": 48300
},
{
"epoch": 2.8669097809007087,
"grad_norm": 0.04127352684736252,
"learning_rate": 5.017927211106e-08,
"loss": 0.001,
"step": 48350
},
{
"epoch": 2.869874588632927,
"grad_norm": 0.28855589032173157,
"learning_rate": 4.7987989419782665e-08,
"loss": 0.0014,
"step": 48400
},
{
"epoch": 2.872839396365146,
"grad_norm": 0.06962502002716064,
"learning_rate": 4.584539589339854e-08,
"loss": 0.0002,
"step": 48450
},
{
"epoch": 2.8758042040973644,
"grad_norm": 0.00731350714340806,
"learning_rate": 4.375151259831401e-08,
"loss": 0.0004,
"step": 48500
},
{
"epoch": 2.878769011829583,
"grad_norm": 0.025366390123963356,
"learning_rate": 4.170636012200413e-08,
"loss": 0.0003,
"step": 48550
},
{
"epoch": 2.881733819561801,
"grad_norm": 0.011618382297456264,
"learning_rate": 3.970995857281446e-08,
"loss": 0.0005,
"step": 48600
},
{
"epoch": 2.88469862729402,
"grad_norm": 0.001259606797248125,
"learning_rate": 3.776232757976117e-08,
"loss": 0.0009,
"step": 48650
},
{
"epoch": 2.8876634350262385,
"grad_norm": 0.002274399623274803,
"learning_rate": 3.586348629233849e-08,
"loss": 0.0005,
"step": 48700
},
{
"epoch": 2.8906282427584573,
"grad_norm": 0.009601451456546783,
"learning_rate": 3.401345338033102e-08,
"loss": 0.0005,
"step": 48750
},
{
"epoch": 2.8935930504906757,
"grad_norm": 0.007640424184501171,
"learning_rate": 3.2212247033629465e-08,
"loss": 0.0004,
"step": 48800
},
{
"epoch": 2.896557858222894,
"grad_norm": 0.0022892076522111893,
"learning_rate": 3.045988496205243e-08,
"loss": 0.0012,
"step": 48850
},
{
"epoch": 2.8995226659551125,
"grad_norm": 0.005462869070470333,
"learning_rate": 2.8756384395171032e-08,
"loss": 0.0003,
"step": 48900
},
{
"epoch": 2.9024874736873314,
"grad_norm": 0.008442184887826443,
"learning_rate": 2.7101762082141216e-08,
"loss": 0.0004,
"step": 48950
},
{
"epoch": 2.90545228141955,
"grad_norm": 0.14164365828037262,
"learning_rate": 2.549603429153835e-08,
"loss": 0.0003,
"step": 49000
},
{
"epoch": 2.9084170891517687,
"grad_norm": 0.1371513307094574,
"learning_rate": 2.393921681119571e-08,
"loss": 0.0005,
"step": 49050
},
{
"epoch": 2.911381896883987,
"grad_norm": 0.01292746514081955,
"learning_rate": 2.243132494805289e-08,
"loss": 0.0005,
"step": 49100
},
{
"epoch": 2.9143467046162055,
"grad_norm": 0.09286642074584961,
"learning_rate": 2.0972373528000966e-08,
"loss": 0.0006,
"step": 49150
},
{
"epoch": 2.9173115123484243,
"grad_norm": 0.027824992313981056,
"learning_rate": 1.9562376895740363e-08,
"loss": 0.0007,
"step": 49200
},
{
"epoch": 2.9202763200806428,
"grad_norm": 0.06678981333971024,
"learning_rate": 1.820134891463765e-08,
"loss": 0.0005,
"step": 49250
},
{
"epoch": 2.9232411278128616,
"grad_norm": 0.007972050458192825,
"learning_rate": 1.688930296659064e-08,
"loss": 0.0004,
"step": 49300
},
{
"epoch": 2.92620593554508,
"grad_norm": 0.016727251932024956,
"learning_rate": 1.56262519518946e-08,
"loss": 0.0004,
"step": 49350
},
{
"epoch": 2.9291707432772984,
"grad_norm": 0.8780906796455383,
"learning_rate": 1.4412208289118491e-08,
"loss": 0.0007,
"step": 49400
},
{
"epoch": 2.932135551009517,
"grad_norm": 0.006345132831484079,
"learning_rate": 1.3247183914980033e-08,
"loss": 0.0008,
"step": 49450
},
{
"epoch": 2.9351003587417357,
"grad_norm": 0.5253104567527771,
"learning_rate": 1.2131190284230266e-08,
"loss": 0.0014,
"step": 49500
},
{
"epoch": 2.938065166473954,
"grad_norm": 0.0056500621140003204,
"learning_rate": 1.1064238369540292e-08,
"loss": 0.0002,
"step": 49550
},
{
"epoch": 2.941029974206173,
"grad_norm": 0.06418248265981674,
"learning_rate": 1.0046338661392485e-08,
"loss": 0.0008,
"step": 49600
},
{
"epoch": 2.9439947819383914,
"grad_norm": 1.171954870223999,
"learning_rate": 9.077501167979451e-09,
"loss": 0.0013,
"step": 49650
},
{
"epoch": 2.94695958967061,
"grad_norm": 0.10743585973978043,
"learning_rate": 8.157735415103563e-09,
"loss": 0.0004,
"step": 49700
},
{
"epoch": 2.949924397402828,
"grad_norm": 0.0010991474846377969,
"learning_rate": 7.2870504460842425e-09,
"loss": 0.0006,
"step": 49750
},
{
"epoch": 2.952889205135047,
"grad_norm": 0.0020814514718949795,
"learning_rate": 6.465454821668604e-09,
"loss": 0.0005,
"step": 49800
},
{
"epoch": 2.9558540128672655,
"grad_norm": 0.1303500384092331,
"learning_rate": 5.6929566199481755e-09,
"loss": 0.0005,
"step": 49850
},
{
"epoch": 2.9588188205994843,
"grad_norm": 0.008715493604540825,
"learning_rate": 4.969563436278413e-09,
"loss": 0.0001,
"step": 49900
},
{
"epoch": 2.9617836283317027,
"grad_norm": 0.005461179651319981,
"learning_rate": 4.295282383204868e-09,
"loss": 0.0005,
"step": 49950
},
{
"epoch": 2.964748436063921,
"grad_norm": 0.0482095368206501,
"learning_rate": 3.6701200903921373e-09,
"loss": 0.0005,
"step": 50000
},
{
"epoch": 2.964748436063921,
"eval_loss": 0.09686783701181412,
"eval_runtime": 1785.8542,
"eval_samples_per_second": 3.279,
"eval_steps_per_second": 0.82,
"eval_wer": 11.62341934069147,
"step": 50000
},
{
"epoch": 2.9677132437961395,
"grad_norm": 0.37676167488098145,
"learning_rate": 3.094082704560575e-09,
"loss": 0.0005,
"step": 50050
},
{
"epoch": 2.9706780515283584,
"grad_norm": 0.004809459205716848,
"learning_rate": 2.567175889424123e-09,
"loss": 0.0002,
"step": 50100
},
{
"epoch": 2.973642859260577,
"grad_norm": 0.024880768731236458,
"learning_rate": 2.0894048256348e-09,
"loss": 0.0004,
"step": 50150
},
{
"epoch": 2.9766076669927957,
"grad_norm": 0.007508778013288975,
"learning_rate": 1.6607742107327408e-09,
"loss": 0.0009,
"step": 50200
},
{
"epoch": 2.979572474725014,
"grad_norm": 0.007764613721519709,
"learning_rate": 1.2812882590990116e-09,
"loss": 0.0009,
"step": 50250
},
{
"epoch": 2.9825372824572325,
"grad_norm": 0.04864663630723953,
"learning_rate": 9.509507019145326e-10,
"loss": 0.0005,
"step": 50300
},
{
"epoch": 2.9855020901894513,
"grad_norm": 0.09490835666656494,
"learning_rate": 6.697647871245494e-10,
"loss": 0.0002,
"step": 50350
},
{
"epoch": 2.9884668979216698,
"grad_norm": 0.039669957011938095,
"learning_rate": 4.3773327940477285e-10,
"loss": 0.0005,
"step": 50400
},
{
"epoch": 2.9914317056538886,
"grad_norm": 0.461487352848053,
"learning_rate": 2.5485846013362234e-10,
"loss": 0.0007,
"step": 50450
},
{
"epoch": 2.994396513386107,
"grad_norm": 0.10162738710641861,
"learning_rate": 1.2114212737446286e-10,
"loss": 0.0002,
"step": 50500
},
{
"epoch": 2.9973613211183254,
"grad_norm": 0.023312032222747803,
"learning_rate": 3.658559585173471e-11,
"loss": 0.0012,
"step": 50550
}
],
"logging_steps": 50,
"max_steps": 50595,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 2500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.3366413561430016e+20,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}