whisper_13 / trainer_state.json
MrEzzat's picture
Upload 8 files
15af6a2 verified
{
"best_global_step": 13000,
"best_metric": 13.793103448275861,
"best_model_checkpoint": "./whisper-large-v3-atc-mrezzat/checkpoint-13000",
"epoch": 27.956989247311828,
"eval_steps": 500,
"global_step": 13000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.053763440860215055,
"grad_norm": 7.093221187591553,
"learning_rate": 4.800000000000001e-07,
"loss": 1.2803,
"step": 25
},
{
"epoch": 0.10752688172043011,
"grad_norm": 3.561824321746826,
"learning_rate": 9.800000000000001e-07,
"loss": 1.024,
"step": 50
},
{
"epoch": 0.16129032258064516,
"grad_norm": 3.972370147705078,
"learning_rate": 1.48e-06,
"loss": 0.8323,
"step": 75
},
{
"epoch": 0.21505376344086022,
"grad_norm": 5.043467044830322,
"learning_rate": 1.98e-06,
"loss": 0.7706,
"step": 100
},
{
"epoch": 0.26881720430107525,
"grad_norm": 3.704352855682373,
"learning_rate": 2.4800000000000004e-06,
"loss": 0.6839,
"step": 125
},
{
"epoch": 0.3225806451612903,
"grad_norm": 2.454521894454956,
"learning_rate": 2.9800000000000003e-06,
"loss": 0.6756,
"step": 150
},
{
"epoch": 0.3763440860215054,
"grad_norm": 4.109330654144287,
"learning_rate": 3.48e-06,
"loss": 0.6282,
"step": 175
},
{
"epoch": 0.43010752688172044,
"grad_norm": 3.429811477661133,
"learning_rate": 3.980000000000001e-06,
"loss": 0.5735,
"step": 200
},
{
"epoch": 0.4838709677419355,
"grad_norm": 3.641101121902466,
"learning_rate": 4.48e-06,
"loss": 0.535,
"step": 225
},
{
"epoch": 0.5376344086021505,
"grad_norm": 3.169020652770996,
"learning_rate": 4.980000000000001e-06,
"loss": 0.5174,
"step": 250
},
{
"epoch": 0.5913978494623656,
"grad_norm": 2.9315528869628906,
"learning_rate": 5.480000000000001e-06,
"loss": 0.446,
"step": 275
},
{
"epoch": 0.6451612903225806,
"grad_norm": 3.473388671875,
"learning_rate": 5.98e-06,
"loss": 0.5147,
"step": 300
},
{
"epoch": 0.6989247311827957,
"grad_norm": 3.428112506866455,
"learning_rate": 6.480000000000001e-06,
"loss": 0.466,
"step": 325
},
{
"epoch": 0.7526881720430108,
"grad_norm": 2.7643871307373047,
"learning_rate": 6.98e-06,
"loss": 0.4712,
"step": 350
},
{
"epoch": 0.8064516129032258,
"grad_norm": 2.77138352394104,
"learning_rate": 7.48e-06,
"loss": 0.4456,
"step": 375
},
{
"epoch": 0.8602150537634409,
"grad_norm": 3.0843310356140137,
"learning_rate": 7.980000000000002e-06,
"loss": 0.508,
"step": 400
},
{
"epoch": 0.9139784946236559,
"grad_norm": 2.7599642276763916,
"learning_rate": 8.48e-06,
"loss": 0.4453,
"step": 425
},
{
"epoch": 0.967741935483871,
"grad_norm": 3.672558546066284,
"learning_rate": 8.98e-06,
"loss": 0.4443,
"step": 450
},
{
"epoch": 1.021505376344086,
"grad_norm": 3.0827476978302,
"learning_rate": 9.48e-06,
"loss": 0.3988,
"step": 475
},
{
"epoch": 1.075268817204301,
"grad_norm": 2.4267773628234863,
"learning_rate": 9.980000000000001e-06,
"loss": 0.3737,
"step": 500
},
{
"epoch": 1.075268817204301,
"eval_loss": 0.27068084478378296,
"eval_runtime": 204.8229,
"eval_samples_per_second": 4.619,
"eval_steps_per_second": 0.581,
"eval_wer": 16.612739206804108,
"step": 500
},
{
"epoch": 1.129032258064516,
"grad_norm": 1.7745027542114258,
"learning_rate": 9.982156133828997e-06,
"loss": 0.3749,
"step": 525
},
{
"epoch": 1.1827956989247312,
"grad_norm": 2.5673892498016357,
"learning_rate": 9.963568773234202e-06,
"loss": 0.3547,
"step": 550
},
{
"epoch": 1.2365591397849462,
"grad_norm": 3.1428632736206055,
"learning_rate": 9.944981412639407e-06,
"loss": 0.3335,
"step": 575
},
{
"epoch": 1.2903225806451613,
"grad_norm": 3.2400989532470703,
"learning_rate": 9.92639405204461e-06,
"loss": 0.3382,
"step": 600
},
{
"epoch": 1.3440860215053765,
"grad_norm": 3.075026273727417,
"learning_rate": 9.907806691449815e-06,
"loss": 0.3641,
"step": 625
},
{
"epoch": 1.3978494623655915,
"grad_norm": 2.6721091270446777,
"learning_rate": 9.88921933085502e-06,
"loss": 0.358,
"step": 650
},
{
"epoch": 1.4516129032258065,
"grad_norm": 1.545538306236267,
"learning_rate": 9.870631970260223e-06,
"loss": 0.334,
"step": 675
},
{
"epoch": 1.5053763440860215,
"grad_norm": 2.7524378299713135,
"learning_rate": 9.852044609665428e-06,
"loss": 0.3401,
"step": 700
},
{
"epoch": 1.5591397849462365,
"grad_norm": 2.7014448642730713,
"learning_rate": 9.833457249070633e-06,
"loss": 0.3305,
"step": 725
},
{
"epoch": 1.6129032258064515,
"grad_norm": 2.5529074668884277,
"learning_rate": 9.814869888475837e-06,
"loss": 0.316,
"step": 750
},
{
"epoch": 1.6666666666666665,
"grad_norm": 2.5125572681427,
"learning_rate": 9.796282527881042e-06,
"loss": 0.3324,
"step": 775
},
{
"epoch": 1.7204301075268817,
"grad_norm": 3.541673183441162,
"learning_rate": 9.777695167286247e-06,
"loss": 0.3025,
"step": 800
},
{
"epoch": 1.7741935483870968,
"grad_norm": 2.809391498565674,
"learning_rate": 9.75910780669145e-06,
"loss": 0.3263,
"step": 825
},
{
"epoch": 1.827956989247312,
"grad_norm": 3.627777576446533,
"learning_rate": 9.740520446096655e-06,
"loss": 0.3209,
"step": 850
},
{
"epoch": 1.881720430107527,
"grad_norm": 2.672043561935425,
"learning_rate": 9.721933085501858e-06,
"loss": 0.2879,
"step": 875
},
{
"epoch": 1.935483870967742,
"grad_norm": 2.783828020095825,
"learning_rate": 9.703345724907063e-06,
"loss": 0.2813,
"step": 900
},
{
"epoch": 1.989247311827957,
"grad_norm": 3.2919387817382812,
"learning_rate": 9.684758364312268e-06,
"loss": 0.3037,
"step": 925
},
{
"epoch": 2.043010752688172,
"grad_norm": 2.6845808029174805,
"learning_rate": 9.666171003717473e-06,
"loss": 0.2344,
"step": 950
},
{
"epoch": 2.096774193548387,
"grad_norm": 2.9660229682922363,
"learning_rate": 9.647583643122678e-06,
"loss": 0.1819,
"step": 975
},
{
"epoch": 2.150537634408602,
"grad_norm": 1.8242266178131104,
"learning_rate": 9.628996282527881e-06,
"loss": 0.2016,
"step": 1000
},
{
"epoch": 2.150537634408602,
"eval_loss": 0.27269652485847473,
"eval_runtime": 202.1375,
"eval_samples_per_second": 4.68,
"eval_steps_per_second": 0.589,
"eval_wer": 14.246094111121382,
"step": 1000
},
{
"epoch": 2.204301075268817,
"grad_norm": 2.224641799926758,
"learning_rate": 9.610408921933086e-06,
"loss": 0.2209,
"step": 1025
},
{
"epoch": 2.258064516129032,
"grad_norm": 2.039360761642456,
"learning_rate": 9.59182156133829e-06,
"loss": 0.2072,
"step": 1050
},
{
"epoch": 2.3118279569892475,
"grad_norm": 2.753037929534912,
"learning_rate": 9.573234200743495e-06,
"loss": 0.1707,
"step": 1075
},
{
"epoch": 2.3655913978494625,
"grad_norm": 2.8433048725128174,
"learning_rate": 9.5546468401487e-06,
"loss": 0.2005,
"step": 1100
},
{
"epoch": 2.4193548387096775,
"grad_norm": 2.2583348751068115,
"learning_rate": 9.536059479553905e-06,
"loss": 0.1792,
"step": 1125
},
{
"epoch": 2.4731182795698925,
"grad_norm": 2.151129722595215,
"learning_rate": 9.51747211895911e-06,
"loss": 0.2076,
"step": 1150
},
{
"epoch": 2.5268817204301075,
"grad_norm": 2.289693593978882,
"learning_rate": 9.498884758364313e-06,
"loss": 0.1901,
"step": 1175
},
{
"epoch": 2.5806451612903225,
"grad_norm": 2.0624475479125977,
"learning_rate": 9.480297397769518e-06,
"loss": 0.1938,
"step": 1200
},
{
"epoch": 2.6344086021505375,
"grad_norm": 2.455775737762451,
"learning_rate": 9.461710037174721e-06,
"loss": 0.2116,
"step": 1225
},
{
"epoch": 2.688172043010753,
"grad_norm": 1.853768229484558,
"learning_rate": 9.443122676579926e-06,
"loss": 0.2008,
"step": 1250
},
{
"epoch": 2.741935483870968,
"grad_norm": 2.4691860675811768,
"learning_rate": 9.424535315985131e-06,
"loss": 0.2167,
"step": 1275
},
{
"epoch": 2.795698924731183,
"grad_norm": 1.8937417268753052,
"learning_rate": 9.405947955390336e-06,
"loss": 0.1864,
"step": 1300
},
{
"epoch": 2.849462365591398,
"grad_norm": 2.786158323287964,
"learning_rate": 9.38736059479554e-06,
"loss": 0.1916,
"step": 1325
},
{
"epoch": 2.903225806451613,
"grad_norm": 2.7571518421173096,
"learning_rate": 9.368773234200744e-06,
"loss": 0.1982,
"step": 1350
},
{
"epoch": 2.956989247311828,
"grad_norm": 2.334691286087036,
"learning_rate": 9.35018587360595e-06,
"loss": 0.1864,
"step": 1375
},
{
"epoch": 3.010752688172043,
"grad_norm": 1.4865392446517944,
"learning_rate": 9.331598513011153e-06,
"loss": 0.1593,
"step": 1400
},
{
"epoch": 3.064516129032258,
"grad_norm": 1.797865629196167,
"learning_rate": 9.313011152416358e-06,
"loss": 0.1123,
"step": 1425
},
{
"epoch": 3.118279569892473,
"grad_norm": 1.5422674417495728,
"learning_rate": 9.294423791821563e-06,
"loss": 0.1132,
"step": 1450
},
{
"epoch": 3.172043010752688,
"grad_norm": 1.649880290031433,
"learning_rate": 9.275836431226768e-06,
"loss": 0.1075,
"step": 1475
},
{
"epoch": 3.225806451612903,
"grad_norm": 2.0276639461517334,
"learning_rate": 9.25724907063197e-06,
"loss": 0.1164,
"step": 1500
},
{
"epoch": 3.225806451612903,
"eval_loss": 0.28087103366851807,
"eval_runtime": 202.3199,
"eval_samples_per_second": 4.676,
"eval_steps_per_second": 0.588,
"eval_wer": 15.050383655357308,
"step": 1500
},
{
"epoch": 3.279569892473118,
"grad_norm": 2.4856255054473877,
"learning_rate": 9.238661710037176e-06,
"loss": 0.1112,
"step": 1525
},
{
"epoch": 3.3333333333333335,
"grad_norm": 2.857877492904663,
"learning_rate": 9.220074349442379e-06,
"loss": 0.1131,
"step": 1550
},
{
"epoch": 3.3870967741935485,
"grad_norm": 1.7496925592422485,
"learning_rate": 9.201486988847584e-06,
"loss": 0.1306,
"step": 1575
},
{
"epoch": 3.4408602150537635,
"grad_norm": 1.9851291179656982,
"learning_rate": 9.182899628252789e-06,
"loss": 0.114,
"step": 1600
},
{
"epoch": 3.4946236559139785,
"grad_norm": 2.6501877307891846,
"learning_rate": 9.164312267657994e-06,
"loss": 0.1208,
"step": 1625
},
{
"epoch": 3.5483870967741935,
"grad_norm": 1.6141562461853027,
"learning_rate": 9.145724907063197e-06,
"loss": 0.1194,
"step": 1650
},
{
"epoch": 3.6021505376344085,
"grad_norm": 2.246312141418457,
"learning_rate": 9.127137546468402e-06,
"loss": 0.121,
"step": 1675
},
{
"epoch": 3.6559139784946235,
"grad_norm": 1.7408199310302734,
"learning_rate": 9.108550185873607e-06,
"loss": 0.1178,
"step": 1700
},
{
"epoch": 3.709677419354839,
"grad_norm": 2.1992955207824707,
"learning_rate": 9.08996282527881e-06,
"loss": 0.0901,
"step": 1725
},
{
"epoch": 3.763440860215054,
"grad_norm": 2.057574987411499,
"learning_rate": 9.071375464684016e-06,
"loss": 0.0999,
"step": 1750
},
{
"epoch": 3.817204301075269,
"grad_norm": 2.032602548599243,
"learning_rate": 9.052788104089219e-06,
"loss": 0.1057,
"step": 1775
},
{
"epoch": 3.870967741935484,
"grad_norm": 1.700415849685669,
"learning_rate": 9.034200743494424e-06,
"loss": 0.1127,
"step": 1800
},
{
"epoch": 3.924731182795699,
"grad_norm": 2.944364070892334,
"learning_rate": 9.015613382899629e-06,
"loss": 0.1304,
"step": 1825
},
{
"epoch": 3.978494623655914,
"grad_norm": 2.807861804962158,
"learning_rate": 8.997026022304834e-06,
"loss": 0.1215,
"step": 1850
},
{
"epoch": 4.032258064516129,
"grad_norm": 2.064152717590332,
"learning_rate": 8.978438661710039e-06,
"loss": 0.0942,
"step": 1875
},
{
"epoch": 4.086021505376344,
"grad_norm": 2.315067768096924,
"learning_rate": 8.959851301115242e-06,
"loss": 0.0636,
"step": 1900
},
{
"epoch": 4.139784946236559,
"grad_norm": 1.4923697710037231,
"learning_rate": 8.941263940520447e-06,
"loss": 0.0553,
"step": 1925
},
{
"epoch": 4.193548387096774,
"grad_norm": 1.0652992725372314,
"learning_rate": 8.92267657992565e-06,
"loss": 0.0545,
"step": 1950
},
{
"epoch": 4.247311827956989,
"grad_norm": 1.9834535121917725,
"learning_rate": 8.904089219330855e-06,
"loss": 0.049,
"step": 1975
},
{
"epoch": 4.301075268817204,
"grad_norm": 1.4393575191497803,
"learning_rate": 8.88550185873606e-06,
"loss": 0.0551,
"step": 2000
},
{
"epoch": 4.301075268817204,
"eval_loss": 0.3065280318260193,
"eval_runtime": 202.95,
"eval_samples_per_second": 4.661,
"eval_steps_per_second": 0.586,
"eval_wer": 15.087362484977351,
"step": 2000
},
{
"epoch": 4.354838709677419,
"grad_norm": 1.7886149883270264,
"learning_rate": 8.866914498141265e-06,
"loss": 0.0629,
"step": 2025
},
{
"epoch": 4.408602150537634,
"grad_norm": 1.470372200012207,
"learning_rate": 8.84832713754647e-06,
"loss": 0.0569,
"step": 2050
},
{
"epoch": 4.462365591397849,
"grad_norm": 2.3206701278686523,
"learning_rate": 8.829739776951673e-06,
"loss": 0.0612,
"step": 2075
},
{
"epoch": 4.516129032258064,
"grad_norm": 1.5979121923446655,
"learning_rate": 8.811152416356878e-06,
"loss": 0.0703,
"step": 2100
},
{
"epoch": 4.56989247311828,
"grad_norm": 2.818779945373535,
"learning_rate": 8.792565055762082e-06,
"loss": 0.0637,
"step": 2125
},
{
"epoch": 4.623655913978495,
"grad_norm": 2.848932981491089,
"learning_rate": 8.773977695167287e-06,
"loss": 0.0636,
"step": 2150
},
{
"epoch": 4.67741935483871,
"grad_norm": 1.2150336503982544,
"learning_rate": 8.755390334572492e-06,
"loss": 0.0898,
"step": 2175
},
{
"epoch": 4.731182795698925,
"grad_norm": 2.5077340602874756,
"learning_rate": 8.736802973977697e-06,
"loss": 0.0509,
"step": 2200
},
{
"epoch": 4.78494623655914,
"grad_norm": 2.6455860137939453,
"learning_rate": 8.7182156133829e-06,
"loss": 0.0662,
"step": 2225
},
{
"epoch": 4.838709677419355,
"grad_norm": 2.1598827838897705,
"learning_rate": 8.699628252788105e-06,
"loss": 0.0618,
"step": 2250
},
{
"epoch": 4.89247311827957,
"grad_norm": 1.961423397064209,
"learning_rate": 8.68104089219331e-06,
"loss": 0.0687,
"step": 2275
},
{
"epoch": 4.946236559139785,
"grad_norm": 1.5583302974700928,
"learning_rate": 8.662453531598513e-06,
"loss": 0.0587,
"step": 2300
},
{
"epoch": 5.0,
"grad_norm": 1.7334260940551758,
"learning_rate": 8.643866171003718e-06,
"loss": 0.0639,
"step": 2325
},
{
"epoch": 5.053763440860215,
"grad_norm": 1.385697364807129,
"learning_rate": 8.625278810408923e-06,
"loss": 0.0295,
"step": 2350
},
{
"epoch": 5.10752688172043,
"grad_norm": 1.8390223979949951,
"learning_rate": 8.606691449814128e-06,
"loss": 0.0336,
"step": 2375
},
{
"epoch": 5.161290322580645,
"grad_norm": 1.6100000143051147,
"learning_rate": 8.588104089219331e-06,
"loss": 0.0346,
"step": 2400
},
{
"epoch": 5.21505376344086,
"grad_norm": 1.8762363195419312,
"learning_rate": 8.569516728624536e-06,
"loss": 0.0355,
"step": 2425
},
{
"epoch": 5.268817204301075,
"grad_norm": 0.8988639116287231,
"learning_rate": 8.55092936802974e-06,
"loss": 0.038,
"step": 2450
},
{
"epoch": 5.32258064516129,
"grad_norm": 1.372160792350769,
"learning_rate": 8.532342007434945e-06,
"loss": 0.0443,
"step": 2475
},
{
"epoch": 5.376344086021505,
"grad_norm": 1.3820526599884033,
"learning_rate": 8.51375464684015e-06,
"loss": 0.0345,
"step": 2500
},
{
"epoch": 5.376344086021505,
"eval_loss": 0.31700512766838074,
"eval_runtime": 202.9264,
"eval_samples_per_second": 4.662,
"eval_steps_per_second": 0.586,
"eval_wer": 14.902468336877137,
"step": 2500
},
{
"epoch": 5.43010752688172,
"grad_norm": 1.8369241952896118,
"learning_rate": 8.495167286245355e-06,
"loss": 0.0362,
"step": 2525
},
{
"epoch": 5.483870967741936,
"grad_norm": 1.735297441482544,
"learning_rate": 8.476579925650558e-06,
"loss": 0.0369,
"step": 2550
},
{
"epoch": 5.53763440860215,
"grad_norm": 2.1869583129882812,
"learning_rate": 8.457992565055763e-06,
"loss": 0.0359,
"step": 2575
},
{
"epoch": 5.591397849462366,
"grad_norm": 0.9142827987670898,
"learning_rate": 8.439405204460968e-06,
"loss": 0.0369,
"step": 2600
},
{
"epoch": 5.645161290322581,
"grad_norm": 1.5425326824188232,
"learning_rate": 8.420817843866171e-06,
"loss": 0.0413,
"step": 2625
},
{
"epoch": 5.698924731182796,
"grad_norm": 1.532554268836975,
"learning_rate": 8.402230483271376e-06,
"loss": 0.0455,
"step": 2650
},
{
"epoch": 5.752688172043011,
"grad_norm": 1.7818132638931274,
"learning_rate": 8.38364312267658e-06,
"loss": 0.0351,
"step": 2675
},
{
"epoch": 5.806451612903226,
"grad_norm": 0.8005560040473938,
"learning_rate": 8.365055762081784e-06,
"loss": 0.0446,
"step": 2700
},
{
"epoch": 5.860215053763441,
"grad_norm": 1.37205171585083,
"learning_rate": 8.34646840148699e-06,
"loss": 0.0447,
"step": 2725
},
{
"epoch": 5.913978494623656,
"grad_norm": 0.9380530714988708,
"learning_rate": 8.327881040892194e-06,
"loss": 0.0335,
"step": 2750
},
{
"epoch": 5.967741935483871,
"grad_norm": 2.020190954208374,
"learning_rate": 8.3092936802974e-06,
"loss": 0.0354,
"step": 2775
},
{
"epoch": 6.021505376344086,
"grad_norm": 0.7758223414421082,
"learning_rate": 8.290706319702603e-06,
"loss": 0.0245,
"step": 2800
},
{
"epoch": 6.075268817204301,
"grad_norm": 1.127894639968872,
"learning_rate": 8.272118959107808e-06,
"loss": 0.0233,
"step": 2825
},
{
"epoch": 6.129032258064516,
"grad_norm": 0.7980286478996277,
"learning_rate": 8.253531598513011e-06,
"loss": 0.0234,
"step": 2850
},
{
"epoch": 6.182795698924731,
"grad_norm": 1.1685783863067627,
"learning_rate": 8.234944237918216e-06,
"loss": 0.0311,
"step": 2875
},
{
"epoch": 6.236559139784946,
"grad_norm": 1.3557145595550537,
"learning_rate": 8.216356877323421e-06,
"loss": 0.0219,
"step": 2900
},
{
"epoch": 6.290322580645161,
"grad_norm": 0.7474266290664673,
"learning_rate": 8.197769516728626e-06,
"loss": 0.0244,
"step": 2925
},
{
"epoch": 6.344086021505376,
"grad_norm": 0.9074203968048096,
"learning_rate": 8.179182156133829e-06,
"loss": 0.0195,
"step": 2950
},
{
"epoch": 6.397849462365591,
"grad_norm": 1.0333547592163086,
"learning_rate": 8.160594795539034e-06,
"loss": 0.0284,
"step": 2975
},
{
"epoch": 6.451612903225806,
"grad_norm": 1.9100396633148193,
"learning_rate": 8.142007434944239e-06,
"loss": 0.0186,
"step": 3000
},
{
"epoch": 6.451612903225806,
"eval_loss": 0.3364327549934387,
"eval_runtime": 203.4115,
"eval_samples_per_second": 4.651,
"eval_steps_per_second": 0.585,
"eval_wer": 14.77304243320699,
"step": 3000
},
{
"epoch": 6.505376344086022,
"grad_norm": 0.6991943120956421,
"learning_rate": 8.123420074349442e-06,
"loss": 0.0201,
"step": 3025
},
{
"epoch": 6.559139784946236,
"grad_norm": 2.668675422668457,
"learning_rate": 8.104832713754647e-06,
"loss": 0.0257,
"step": 3050
},
{
"epoch": 6.612903225806452,
"grad_norm": 1.0620919466018677,
"learning_rate": 8.086245353159852e-06,
"loss": 0.0275,
"step": 3075
},
{
"epoch": 6.666666666666667,
"grad_norm": 1.9009549617767334,
"learning_rate": 8.067657992565057e-06,
"loss": 0.0215,
"step": 3100
},
{
"epoch": 6.720430107526882,
"grad_norm": 0.8860704302787781,
"learning_rate": 8.04907063197026e-06,
"loss": 0.0199,
"step": 3125
},
{
"epoch": 6.774193548387097,
"grad_norm": 1.2130790948867798,
"learning_rate": 8.030483271375466e-06,
"loss": 0.0237,
"step": 3150
},
{
"epoch": 6.827956989247312,
"grad_norm": 1.5909550189971924,
"learning_rate": 8.011895910780669e-06,
"loss": 0.0269,
"step": 3175
},
{
"epoch": 6.881720430107527,
"grad_norm": 1.6755486726760864,
"learning_rate": 7.993308550185874e-06,
"loss": 0.0339,
"step": 3200
},
{
"epoch": 6.935483870967742,
"grad_norm": 1.2641445398330688,
"learning_rate": 7.974721189591079e-06,
"loss": 0.0204,
"step": 3225
},
{
"epoch": 6.989247311827957,
"grad_norm": 1.7877347469329834,
"learning_rate": 7.956133828996284e-06,
"loss": 0.0266,
"step": 3250
},
{
"epoch": 7.043010752688172,
"grad_norm": 0.9837028384208679,
"learning_rate": 7.937546468401489e-06,
"loss": 0.02,
"step": 3275
},
{
"epoch": 7.096774193548387,
"grad_norm": 1.2097680568695068,
"learning_rate": 7.918959107806692e-06,
"loss": 0.0126,
"step": 3300
},
{
"epoch": 7.150537634408602,
"grad_norm": 1.447039246559143,
"learning_rate": 7.900371747211897e-06,
"loss": 0.018,
"step": 3325
},
{
"epoch": 7.204301075268817,
"grad_norm": 0.8316716551780701,
"learning_rate": 7.8817843866171e-06,
"loss": 0.0178,
"step": 3350
},
{
"epoch": 7.258064516129032,
"grad_norm": 0.9670646786689758,
"learning_rate": 7.863197026022305e-06,
"loss": 0.0122,
"step": 3375
},
{
"epoch": 7.311827956989247,
"grad_norm": 1.4154245853424072,
"learning_rate": 7.84460966542751e-06,
"loss": 0.0171,
"step": 3400
},
{
"epoch": 7.365591397849462,
"grad_norm": 1.3647488355636597,
"learning_rate": 7.826022304832714e-06,
"loss": 0.0151,
"step": 3425
},
{
"epoch": 7.419354838709677,
"grad_norm": 1.548120141029358,
"learning_rate": 7.807434944237919e-06,
"loss": 0.0149,
"step": 3450
},
{
"epoch": 7.473118279569892,
"grad_norm": 1.6091225147247314,
"learning_rate": 7.788847583643124e-06,
"loss": 0.0168,
"step": 3475
},
{
"epoch": 7.526881720430108,
"grad_norm": 1.1116617918014526,
"learning_rate": 7.770260223048329e-06,
"loss": 0.0161,
"step": 3500
},
{
"epoch": 7.526881720430108,
"eval_loss": 0.34663301706314087,
"eval_runtime": 202.0433,
"eval_samples_per_second": 4.682,
"eval_steps_per_second": 0.589,
"eval_wer": 14.551169455486734,
"step": 3500
},
{
"epoch": 7.580645161290323,
"grad_norm": 1.468459129333496,
"learning_rate": 7.751672862453532e-06,
"loss": 0.0193,
"step": 3525
},
{
"epoch": 7.634408602150538,
"grad_norm": 1.2769989967346191,
"learning_rate": 7.733085501858737e-06,
"loss": 0.0184,
"step": 3550
},
{
"epoch": 7.688172043010753,
"grad_norm": 1.0488286018371582,
"learning_rate": 7.71449814126394e-06,
"loss": 0.019,
"step": 3575
},
{
"epoch": 7.741935483870968,
"grad_norm": 0.8325207829475403,
"learning_rate": 7.695910780669145e-06,
"loss": 0.0252,
"step": 3600
},
{
"epoch": 7.795698924731183,
"grad_norm": 0.7656351923942566,
"learning_rate": 7.67732342007435e-06,
"loss": 0.0155,
"step": 3625
},
{
"epoch": 7.849462365591398,
"grad_norm": 0.9184199571609497,
"learning_rate": 7.658736059479555e-06,
"loss": 0.028,
"step": 3650
},
{
"epoch": 7.903225806451613,
"grad_norm": 1.2135573625564575,
"learning_rate": 7.64014869888476e-06,
"loss": 0.0178,
"step": 3675
},
{
"epoch": 7.956989247311828,
"grad_norm": 0.8172153830528259,
"learning_rate": 7.621561338289963e-06,
"loss": 0.0254,
"step": 3700
},
{
"epoch": 8.010752688172044,
"grad_norm": 0.431659072637558,
"learning_rate": 7.602973977695168e-06,
"loss": 0.0145,
"step": 3725
},
{
"epoch": 8.064516129032258,
"grad_norm": 0.9533307552337646,
"learning_rate": 7.584386617100372e-06,
"loss": 0.0165,
"step": 3750
},
{
"epoch": 8.118279569892474,
"grad_norm": 0.7198922038078308,
"learning_rate": 7.565799256505577e-06,
"loss": 0.0107,
"step": 3775
},
{
"epoch": 8.172043010752688,
"grad_norm": 0.8582783937454224,
"learning_rate": 7.547211895910781e-06,
"loss": 0.0123,
"step": 3800
},
{
"epoch": 8.225806451612904,
"grad_norm": 1.0113513469696045,
"learning_rate": 7.528624535315986e-06,
"loss": 0.014,
"step": 3825
},
{
"epoch": 8.279569892473118,
"grad_norm": 0.7275539040565491,
"learning_rate": 7.51003717472119e-06,
"loss": 0.0115,
"step": 3850
},
{
"epoch": 8.333333333333334,
"grad_norm": 0.4298296570777893,
"learning_rate": 7.491449814126395e-06,
"loss": 0.0104,
"step": 3875
},
{
"epoch": 8.387096774193548,
"grad_norm": 0.7536816596984863,
"learning_rate": 7.4728624535316e-06,
"loss": 0.0132,
"step": 3900
},
{
"epoch": 8.440860215053764,
"grad_norm": 1.0941580533981323,
"learning_rate": 7.454275092936804e-06,
"loss": 0.012,
"step": 3925
},
{
"epoch": 8.494623655913978,
"grad_norm": 1.0508357286453247,
"learning_rate": 7.435687732342009e-06,
"loss": 0.0135,
"step": 3950
},
{
"epoch": 8.548387096774194,
"grad_norm": 0.6876735687255859,
"learning_rate": 7.417100371747212e-06,
"loss": 0.0156,
"step": 3975
},
{
"epoch": 8.602150537634408,
"grad_norm": 0.8525980114936829,
"learning_rate": 7.398513011152417e-06,
"loss": 0.0106,
"step": 4000
},
{
"epoch": 8.602150537634408,
"eval_loss": 0.3538697063922882,
"eval_runtime": 202.0959,
"eval_samples_per_second": 4.681,
"eval_steps_per_second": 0.589,
"eval_wer": 14.338541185171488,
"step": 4000
},
{
"epoch": 8.655913978494624,
"grad_norm": 0.5362399220466614,
"learning_rate": 7.379925650557621e-06,
"loss": 0.0123,
"step": 4025
},
{
"epoch": 8.709677419354838,
"grad_norm": 0.8804866671562195,
"learning_rate": 7.361338289962826e-06,
"loss": 0.0171,
"step": 4050
},
{
"epoch": 8.763440860215054,
"grad_norm": 0.8643043041229248,
"learning_rate": 7.34275092936803e-06,
"loss": 0.0144,
"step": 4075
},
{
"epoch": 8.817204301075268,
"grad_norm": 0.8704060912132263,
"learning_rate": 7.3241635687732344e-06,
"loss": 0.0121,
"step": 4100
},
{
"epoch": 8.870967741935484,
"grad_norm": 0.6113823056221008,
"learning_rate": 7.305576208178439e-06,
"loss": 0.0154,
"step": 4125
},
{
"epoch": 8.924731182795698,
"grad_norm": 1.1631172895431519,
"learning_rate": 7.2869888475836436e-06,
"loss": 0.0109,
"step": 4150
},
{
"epoch": 8.978494623655914,
"grad_norm": 0.8042282462120056,
"learning_rate": 7.2684014869888485e-06,
"loss": 0.0158,
"step": 4175
},
{
"epoch": 9.03225806451613,
"grad_norm": 0.18347720801830292,
"learning_rate": 7.249814126394053e-06,
"loss": 0.0132,
"step": 4200
},
{
"epoch": 9.086021505376344,
"grad_norm": 0.7228168845176697,
"learning_rate": 7.231226765799258e-06,
"loss": 0.0099,
"step": 4225
},
{
"epoch": 9.13978494623656,
"grad_norm": 0.35777589678764343,
"learning_rate": 7.212639405204461e-06,
"loss": 0.0129,
"step": 4250
},
{
"epoch": 9.193548387096774,
"grad_norm": 0.2701317071914673,
"learning_rate": 7.194052044609666e-06,
"loss": 0.0095,
"step": 4275
},
{
"epoch": 9.24731182795699,
"grad_norm": 1.6921519041061401,
"learning_rate": 7.17546468401487e-06,
"loss": 0.0082,
"step": 4300
},
{
"epoch": 9.301075268817204,
"grad_norm": 0.27636006474494934,
"learning_rate": 7.156877323420075e-06,
"loss": 0.0075,
"step": 4325
},
{
"epoch": 9.35483870967742,
"grad_norm": 0.7335753440856934,
"learning_rate": 7.138289962825279e-06,
"loss": 0.0102,
"step": 4350
},
{
"epoch": 9.408602150537634,
"grad_norm": 0.9723600149154663,
"learning_rate": 7.119702602230484e-06,
"loss": 0.0104,
"step": 4375
},
{
"epoch": 9.46236559139785,
"grad_norm": 0.2777242660522461,
"learning_rate": 7.101115241635689e-06,
"loss": 0.0138,
"step": 4400
},
{
"epoch": 9.516129032258064,
"grad_norm": 0.42475125193595886,
"learning_rate": 7.082527881040892e-06,
"loss": 0.0074,
"step": 4425
},
{
"epoch": 9.56989247311828,
"grad_norm": 0.6463161110877991,
"learning_rate": 7.063940520446097e-06,
"loss": 0.0105,
"step": 4450
},
{
"epoch": 9.623655913978494,
"grad_norm": 1.2284172773361206,
"learning_rate": 7.0453531598513015e-06,
"loss": 0.0086,
"step": 4475
},
{
"epoch": 9.67741935483871,
"grad_norm": 0.6127483248710632,
"learning_rate": 7.0267657992565065e-06,
"loss": 0.013,
"step": 4500
},
{
"epoch": 9.67741935483871,
"eval_loss": 0.3432445228099823,
"eval_runtime": 202.5879,
"eval_samples_per_second": 4.67,
"eval_steps_per_second": 0.587,
"eval_wer": 14.883978922067117,
"step": 4500
},
{
"epoch": 9.731182795698924,
"grad_norm": 1.6641124486923218,
"learning_rate": 7.008178438661711e-06,
"loss": 0.0117,
"step": 4525
},
{
"epoch": 9.78494623655914,
"grad_norm": 0.49032703042030334,
"learning_rate": 6.989591078066915e-06,
"loss": 0.0113,
"step": 4550
},
{
"epoch": 9.838709677419354,
"grad_norm": 1.0603209733963013,
"learning_rate": 6.971003717472119e-06,
"loss": 0.0093,
"step": 4575
},
{
"epoch": 9.89247311827957,
"grad_norm": 1.1902903318405151,
"learning_rate": 6.952416356877324e-06,
"loss": 0.009,
"step": 4600
},
{
"epoch": 9.946236559139784,
"grad_norm": 0.4575275182723999,
"learning_rate": 6.933828996282529e-06,
"loss": 0.0103,
"step": 4625
},
{
"epoch": 10.0,
"grad_norm": 2.514280080795288,
"learning_rate": 6.915241635687733e-06,
"loss": 0.0146,
"step": 4650
},
{
"epoch": 10.053763440860216,
"grad_norm": 0.28566455841064453,
"learning_rate": 6.896654275092938e-06,
"loss": 0.008,
"step": 4675
},
{
"epoch": 10.10752688172043,
"grad_norm": 0.3524170219898224,
"learning_rate": 6.878066914498141e-06,
"loss": 0.0057,
"step": 4700
},
{
"epoch": 10.161290322580646,
"grad_norm": 1.7274552583694458,
"learning_rate": 6.859479553903346e-06,
"loss": 0.0083,
"step": 4725
},
{
"epoch": 10.21505376344086,
"grad_norm": 0.31285515427589417,
"learning_rate": 6.84089219330855e-06,
"loss": 0.0071,
"step": 4750
},
{
"epoch": 10.268817204301076,
"grad_norm": 0.8492361307144165,
"learning_rate": 6.822304832713755e-06,
"loss": 0.0086,
"step": 4775
},
{
"epoch": 10.32258064516129,
"grad_norm": 0.39797672629356384,
"learning_rate": 6.8037174721189595e-06,
"loss": 0.0088,
"step": 4800
},
{
"epoch": 10.376344086021506,
"grad_norm": 0.410177618265152,
"learning_rate": 6.7851301115241644e-06,
"loss": 0.0085,
"step": 4825
},
{
"epoch": 10.43010752688172,
"grad_norm": 0.45091158151626587,
"learning_rate": 6.766542750929369e-06,
"loss": 0.0062,
"step": 4850
},
{
"epoch": 10.483870967741936,
"grad_norm": 1.3592181205749512,
"learning_rate": 6.747955390334573e-06,
"loss": 0.01,
"step": 4875
},
{
"epoch": 10.53763440860215,
"grad_norm": 0.4976150691509247,
"learning_rate": 6.729368029739778e-06,
"loss": 0.0069,
"step": 4900
},
{
"epoch": 10.591397849462366,
"grad_norm": 0.14256972074508667,
"learning_rate": 6.710780669144982e-06,
"loss": 0.0064,
"step": 4925
},
{
"epoch": 10.64516129032258,
"grad_norm": 0.7307581901550293,
"learning_rate": 6.692193308550187e-06,
"loss": 0.0075,
"step": 4950
},
{
"epoch": 10.698924731182796,
"grad_norm": 0.8009108901023865,
"learning_rate": 6.673605947955391e-06,
"loss": 0.0071,
"step": 4975
},
{
"epoch": 10.75268817204301,
"grad_norm": 0.7494556307792664,
"learning_rate": 6.655018587360595e-06,
"loss": 0.0092,
"step": 5000
},
{
"epoch": 10.75268817204301,
"eval_loss": 0.3648987114429474,
"eval_runtime": 202.4649,
"eval_samples_per_second": 4.672,
"eval_steps_per_second": 0.588,
"eval_wer": 14.218359988906352,
"step": 5000
},
{
"epoch": 10.806451612903226,
"grad_norm": 1.12769615650177,
"learning_rate": 6.636431226765799e-06,
"loss": 0.0091,
"step": 5025
},
{
"epoch": 10.86021505376344,
"grad_norm": 0.7359474897384644,
"learning_rate": 6.617843866171004e-06,
"loss": 0.0112,
"step": 5050
},
{
"epoch": 10.913978494623656,
"grad_norm": 0.4451664090156555,
"learning_rate": 6.599256505576209e-06,
"loss": 0.0098,
"step": 5075
},
{
"epoch": 10.967741935483872,
"grad_norm": 0.364681214094162,
"learning_rate": 6.580669144981413e-06,
"loss": 0.0087,
"step": 5100
},
{
"epoch": 11.021505376344086,
"grad_norm": 0.416103720664978,
"learning_rate": 6.562081784386618e-06,
"loss": 0.0072,
"step": 5125
},
{
"epoch": 11.075268817204302,
"grad_norm": 0.2710916996002197,
"learning_rate": 6.5434944237918215e-06,
"loss": 0.0063,
"step": 5150
},
{
"epoch": 11.129032258064516,
"grad_norm": 1.4234521389007568,
"learning_rate": 6.5249070631970265e-06,
"loss": 0.0062,
"step": 5175
},
{
"epoch": 11.182795698924732,
"grad_norm": 0.800237238407135,
"learning_rate": 6.506319702602231e-06,
"loss": 0.0075,
"step": 5200
},
{
"epoch": 11.236559139784946,
"grad_norm": 0.4724205732345581,
"learning_rate": 6.487732342007436e-06,
"loss": 0.0053,
"step": 5225
},
{
"epoch": 11.290322580645162,
"grad_norm": 0.12521684169769287,
"learning_rate": 6.46914498141264e-06,
"loss": 0.0079,
"step": 5250
},
{
"epoch": 11.344086021505376,
"grad_norm": 0.2039920538663864,
"learning_rate": 6.450557620817845e-06,
"loss": 0.0069,
"step": 5275
},
{
"epoch": 11.397849462365592,
"grad_norm": 1.678312063217163,
"learning_rate": 6.43197026022305e-06,
"loss": 0.008,
"step": 5300
},
{
"epoch": 11.451612903225806,
"grad_norm": 0.8350504636764526,
"learning_rate": 6.413382899628253e-06,
"loss": 0.0069,
"step": 5325
},
{
"epoch": 11.505376344086022,
"grad_norm": 0.6541998982429504,
"learning_rate": 6.394795539033458e-06,
"loss": 0.009,
"step": 5350
},
{
"epoch": 11.559139784946236,
"grad_norm": 1.2869340181350708,
"learning_rate": 6.376208178438662e-06,
"loss": 0.0051,
"step": 5375
},
{
"epoch": 11.612903225806452,
"grad_norm": 0.987830638885498,
"learning_rate": 6.357620817843867e-06,
"loss": 0.0072,
"step": 5400
},
{
"epoch": 11.666666666666666,
"grad_norm": 0.4543008804321289,
"learning_rate": 6.339033457249071e-06,
"loss": 0.0065,
"step": 5425
},
{
"epoch": 11.720430107526882,
"grad_norm": 0.866301953792572,
"learning_rate": 6.320446096654275e-06,
"loss": 0.0071,
"step": 5450
},
{
"epoch": 11.774193548387096,
"grad_norm": 1.1665536165237427,
"learning_rate": 6.3018587360594795e-06,
"loss": 0.0089,
"step": 5475
},
{
"epoch": 11.827956989247312,
"grad_norm": 0.5745353102684021,
"learning_rate": 6.2832713754646845e-06,
"loss": 0.0086,
"step": 5500
},
{
"epoch": 11.827956989247312,
"eval_loss": 0.3715842068195343,
"eval_runtime": 202.8772,
"eval_samples_per_second": 4.663,
"eval_steps_per_second": 0.587,
"eval_wer": 15.586576684847925,
"step": 5500
},
{
"epoch": 11.881720430107526,
"grad_norm": 0.7137680053710938,
"learning_rate": 6.2646840148698895e-06,
"loss": 0.0063,
"step": 5525
},
{
"epoch": 11.935483870967742,
"grad_norm": 1.8331615924835205,
"learning_rate": 6.246096654275094e-06,
"loss": 0.0052,
"step": 5550
},
{
"epoch": 11.989247311827956,
"grad_norm": 1.306740403175354,
"learning_rate": 6.2275092936802986e-06,
"loss": 0.0084,
"step": 5575
},
{
"epoch": 12.043010752688172,
"grad_norm": 0.4689745008945465,
"learning_rate": 6.208921933085502e-06,
"loss": 0.0054,
"step": 5600
},
{
"epoch": 12.096774193548388,
"grad_norm": 0.8853312134742737,
"learning_rate": 6.190334572490707e-06,
"loss": 0.0038,
"step": 5625
},
{
"epoch": 12.150537634408602,
"grad_norm": 0.18394626677036285,
"learning_rate": 6.171747211895911e-06,
"loss": 0.0058,
"step": 5650
},
{
"epoch": 12.204301075268818,
"grad_norm": 0.35906341671943665,
"learning_rate": 6.153159851301116e-06,
"loss": 0.0048,
"step": 5675
},
{
"epoch": 12.258064516129032,
"grad_norm": 0.0934007316827774,
"learning_rate": 6.13457249070632e-06,
"loss": 0.0056,
"step": 5700
},
{
"epoch": 12.311827956989248,
"grad_norm": 0.6383976340293884,
"learning_rate": 6.115985130111525e-06,
"loss": 0.0049,
"step": 5725
},
{
"epoch": 12.365591397849462,
"grad_norm": 0.3622893989086151,
"learning_rate": 6.097397769516728e-06,
"loss": 0.0064,
"step": 5750
},
{
"epoch": 12.419354838709678,
"grad_norm": 0.21196268498897552,
"learning_rate": 6.078810408921933e-06,
"loss": 0.0064,
"step": 5775
},
{
"epoch": 12.473118279569892,
"grad_norm": 0.3381194472312927,
"learning_rate": 6.060223048327138e-06,
"loss": 0.0054,
"step": 5800
},
{
"epoch": 12.526881720430108,
"grad_norm": 1.9906443357467651,
"learning_rate": 6.041635687732342e-06,
"loss": 0.0061,
"step": 5825
},
{
"epoch": 12.580645161290322,
"grad_norm": 0.3197634220123291,
"learning_rate": 6.023048327137547e-06,
"loss": 0.0053,
"step": 5850
},
{
"epoch": 12.634408602150538,
"grad_norm": 0.18474631011486053,
"learning_rate": 6.0044609665427515e-06,
"loss": 0.0065,
"step": 5875
},
{
"epoch": 12.688172043010752,
"grad_norm": 0.8498281240463257,
"learning_rate": 5.985873605947956e-06,
"loss": 0.004,
"step": 5900
},
{
"epoch": 12.741935483870968,
"grad_norm": 0.4391692578792572,
"learning_rate": 5.96728624535316e-06,
"loss": 0.006,
"step": 5925
},
{
"epoch": 12.795698924731182,
"grad_norm": 0.6688899993896484,
"learning_rate": 5.948698884758365e-06,
"loss": 0.0053,
"step": 5950
},
{
"epoch": 12.849462365591398,
"grad_norm": 0.9713292121887207,
"learning_rate": 5.930111524163569e-06,
"loss": 0.0072,
"step": 5975
},
{
"epoch": 12.903225806451612,
"grad_norm": 0.8484262228012085,
"learning_rate": 5.911524163568774e-06,
"loss": 0.0068,
"step": 6000
},
{
"epoch": 12.903225806451612,
"eval_loss": 0.37204521894454956,
"eval_runtime": 201.9047,
"eval_samples_per_second": 4.685,
"eval_steps_per_second": 0.589,
"eval_wer": 14.588148285106776,
"step": 6000
},
{
"epoch": 12.956989247311828,
"grad_norm": 1.0728837251663208,
"learning_rate": 5.892936802973979e-06,
"loss": 0.0084,
"step": 6025
},
{
"epoch": 13.010752688172044,
"grad_norm": 0.4754142761230469,
"learning_rate": 5.874349442379182e-06,
"loss": 0.0087,
"step": 6050
},
{
"epoch": 13.064516129032258,
"grad_norm": 0.3025985062122345,
"learning_rate": 5.855762081784387e-06,
"loss": 0.0063,
"step": 6075
},
{
"epoch": 13.118279569892474,
"grad_norm": 0.3236280083656311,
"learning_rate": 5.837174721189591e-06,
"loss": 0.0055,
"step": 6100
},
{
"epoch": 13.172043010752688,
"grad_norm": 0.508432924747467,
"learning_rate": 5.818587360594796e-06,
"loss": 0.0053,
"step": 6125
},
{
"epoch": 13.225806451612904,
"grad_norm": 1.6511017084121704,
"learning_rate": 5.8e-06,
"loss": 0.0046,
"step": 6150
},
{
"epoch": 13.279569892473118,
"grad_norm": 0.142063707113266,
"learning_rate": 5.781412639405205e-06,
"loss": 0.0051,
"step": 6175
},
{
"epoch": 13.333333333333334,
"grad_norm": 0.11750756949186325,
"learning_rate": 5.762825278810409e-06,
"loss": 0.0048,
"step": 6200
},
{
"epoch": 13.387096774193548,
"grad_norm": 0.8060685396194458,
"learning_rate": 5.744237918215614e-06,
"loss": 0.0057,
"step": 6225
},
{
"epoch": 13.440860215053764,
"grad_norm": 0.452999472618103,
"learning_rate": 5.725650557620819e-06,
"loss": 0.0059,
"step": 6250
},
{
"epoch": 13.494623655913978,
"grad_norm": 1.3556956052780151,
"learning_rate": 5.707063197026023e-06,
"loss": 0.0049,
"step": 6275
},
{
"epoch": 13.548387096774194,
"grad_norm": 0.1406233310699463,
"learning_rate": 5.688475836431228e-06,
"loss": 0.0041,
"step": 6300
},
{
"epoch": 13.602150537634408,
"grad_norm": 0.6670034527778625,
"learning_rate": 5.669888475836432e-06,
"loss": 0.0057,
"step": 6325
},
{
"epoch": 13.655913978494624,
"grad_norm": 1.7057311534881592,
"learning_rate": 5.651301115241636e-06,
"loss": 0.0056,
"step": 6350
},
{
"epoch": 13.709677419354838,
"grad_norm": 0.7842967510223389,
"learning_rate": 5.63271375464684e-06,
"loss": 0.0062,
"step": 6375
},
{
"epoch": 13.763440860215054,
"grad_norm": 0.7574280500411987,
"learning_rate": 5.614126394052045e-06,
"loss": 0.006,
"step": 6400
},
{
"epoch": 13.817204301075268,
"grad_norm": 1.1247819662094116,
"learning_rate": 5.595539033457249e-06,
"loss": 0.0091,
"step": 6425
},
{
"epoch": 13.870967741935484,
"grad_norm": 0.5980854034423828,
"learning_rate": 5.576951672862454e-06,
"loss": 0.005,
"step": 6450
},
{
"epoch": 13.924731182795698,
"grad_norm": 0.6640056371688843,
"learning_rate": 5.558364312267659e-06,
"loss": 0.0061,
"step": 6475
},
{
"epoch": 13.978494623655914,
"grad_norm": 0.6742274165153503,
"learning_rate": 5.5397769516728625e-06,
"loss": 0.0056,
"step": 6500
},
{
"epoch": 13.978494623655914,
"eval_loss": 0.37743857502937317,
"eval_runtime": 202.8285,
"eval_samples_per_second": 4.664,
"eval_steps_per_second": 0.587,
"eval_wer": 14.819265970232042,
"step": 6500
},
{
"epoch": 14.03225806451613,
"grad_norm": 0.1915878802537918,
"learning_rate": 5.5211895910780674e-06,
"loss": 0.0045,
"step": 6525
},
{
"epoch": 14.086021505376344,
"grad_norm": 0.09815018624067307,
"learning_rate": 5.5026022304832716e-06,
"loss": 0.0057,
"step": 6550
},
{
"epoch": 14.13978494623656,
"grad_norm": 0.04359288886189461,
"learning_rate": 5.4840148698884765e-06,
"loss": 0.005,
"step": 6575
},
{
"epoch": 14.193548387096774,
"grad_norm": 0.28134745359420776,
"learning_rate": 5.465427509293681e-06,
"loss": 0.0029,
"step": 6600
},
{
"epoch": 14.24731182795699,
"grad_norm": 0.6944845914840698,
"learning_rate": 5.446840148698886e-06,
"loss": 0.0056,
"step": 6625
},
{
"epoch": 14.301075268817204,
"grad_norm": 1.5637778043746948,
"learning_rate": 5.428252788104089e-06,
"loss": 0.0037,
"step": 6650
},
{
"epoch": 14.35483870967742,
"grad_norm": 0.49470245838165283,
"learning_rate": 5.409665427509294e-06,
"loss": 0.0064,
"step": 6675
},
{
"epoch": 14.408602150537634,
"grad_norm": 0.055743150413036346,
"learning_rate": 5.391078066914499e-06,
"loss": 0.0033,
"step": 6700
},
{
"epoch": 14.46236559139785,
"grad_norm": 0.20047767460346222,
"learning_rate": 5.372490706319703e-06,
"loss": 0.0047,
"step": 6725
},
{
"epoch": 14.516129032258064,
"grad_norm": 0.36383625864982605,
"learning_rate": 5.353903345724908e-06,
"loss": 0.0037,
"step": 6750
},
{
"epoch": 14.56989247311828,
"grad_norm": 0.07147414237260818,
"learning_rate": 5.335315985130112e-06,
"loss": 0.0026,
"step": 6775
},
{
"epoch": 14.623655913978494,
"grad_norm": 0.28435853123664856,
"learning_rate": 5.316728624535316e-06,
"loss": 0.0043,
"step": 6800
},
{
"epoch": 14.67741935483871,
"grad_norm": 0.27296435832977295,
"learning_rate": 5.29814126394052e-06,
"loss": 0.0068,
"step": 6825
},
{
"epoch": 14.731182795698924,
"grad_norm": 0.9266132116317749,
"learning_rate": 5.279553903345725e-06,
"loss": 0.0065,
"step": 6850
},
{
"epoch": 14.78494623655914,
"grad_norm": 0.4447098970413208,
"learning_rate": 5.2609665427509295e-06,
"loss": 0.0051,
"step": 6875
},
{
"epoch": 14.838709677419354,
"grad_norm": 0.6710329055786133,
"learning_rate": 5.2423791821561345e-06,
"loss": 0.0044,
"step": 6900
},
{
"epoch": 14.89247311827957,
"grad_norm": 0.5553959012031555,
"learning_rate": 5.2237918215613395e-06,
"loss": 0.0062,
"step": 6925
},
{
"epoch": 14.946236559139784,
"grad_norm": 0.867906928062439,
"learning_rate": 5.205204460966543e-06,
"loss": 0.0075,
"step": 6950
},
{
"epoch": 15.0,
"grad_norm": 0.5631603002548218,
"learning_rate": 5.186617100371748e-06,
"loss": 0.0056,
"step": 6975
},
{
"epoch": 15.053763440860216,
"grad_norm": 0.16968116164207458,
"learning_rate": 5.168029739776952e-06,
"loss": 0.0032,
"step": 7000
},
{
"epoch": 15.053763440860216,
"eval_loss": 0.3897517919540405,
"eval_runtime": 202.1138,
"eval_samples_per_second": 4.681,
"eval_steps_per_second": 0.589,
"eval_wer": 14.597392992511788,
"step": 7000
},
{
"epoch": 15.10752688172043,
"grad_norm": 0.43974125385284424,
"learning_rate": 5.149442379182157e-06,
"loss": 0.005,
"step": 7025
},
{
"epoch": 15.161290322580646,
"grad_norm": 0.1777154952287674,
"learning_rate": 5.130855018587361e-06,
"loss": 0.0023,
"step": 7050
},
{
"epoch": 15.21505376344086,
"grad_norm": 0.0768185630440712,
"learning_rate": 5.112267657992566e-06,
"loss": 0.0045,
"step": 7075
},
{
"epoch": 15.268817204301076,
"grad_norm": 0.04717967286705971,
"learning_rate": 5.093680297397769e-06,
"loss": 0.0043,
"step": 7100
},
{
"epoch": 15.32258064516129,
"grad_norm": 0.25022652745246887,
"learning_rate": 5.075092936802974e-06,
"loss": 0.003,
"step": 7125
},
{
"epoch": 15.376344086021506,
"grad_norm": 0.07506144791841507,
"learning_rate": 5.056505576208179e-06,
"loss": 0.0043,
"step": 7150
},
{
"epoch": 15.43010752688172,
"grad_norm": 0.850889265537262,
"learning_rate": 5.037918215613383e-06,
"loss": 0.0055,
"step": 7175
},
{
"epoch": 15.483870967741936,
"grad_norm": 0.5556985139846802,
"learning_rate": 5.019330855018588e-06,
"loss": 0.0051,
"step": 7200
},
{
"epoch": 15.53763440860215,
"grad_norm": 0.0634092465043068,
"learning_rate": 5.0007434944237924e-06,
"loss": 0.003,
"step": 7225
},
{
"epoch": 15.591397849462366,
"grad_norm": 0.2446642518043518,
"learning_rate": 4.982156133828997e-06,
"loss": 0.0047,
"step": 7250
},
{
"epoch": 15.64516129032258,
"grad_norm": 1.191821575164795,
"learning_rate": 4.9635687732342016e-06,
"loss": 0.0045,
"step": 7275
},
{
"epoch": 15.698924731182796,
"grad_norm": 0.4117543399333954,
"learning_rate": 4.944981412639406e-06,
"loss": 0.0062,
"step": 7300
},
{
"epoch": 15.75268817204301,
"grad_norm": 0.8248342275619507,
"learning_rate": 4.92639405204461e-06,
"loss": 0.0043,
"step": 7325
},
{
"epoch": 15.806451612903226,
"grad_norm": 0.29120975732803345,
"learning_rate": 4.907806691449815e-06,
"loss": 0.0061,
"step": 7350
},
{
"epoch": 15.86021505376344,
"grad_norm": 0.0745767205953598,
"learning_rate": 4.889219330855019e-06,
"loss": 0.0048,
"step": 7375
},
{
"epoch": 15.913978494623656,
"grad_norm": 0.10059848427772522,
"learning_rate": 4.870631970260223e-06,
"loss": 0.004,
"step": 7400
},
{
"epoch": 15.967741935483872,
"grad_norm": 0.11489495635032654,
"learning_rate": 4.852044609665428e-06,
"loss": 0.004,
"step": 7425
},
{
"epoch": 16.021505376344088,
"grad_norm": 0.04976237937808037,
"learning_rate": 4.833457249070632e-06,
"loss": 0.0036,
"step": 7450
},
{
"epoch": 16.0752688172043,
"grad_norm": 0.13619866967201233,
"learning_rate": 4.814869888475836e-06,
"loss": 0.0031,
"step": 7475
},
{
"epoch": 16.129032258064516,
"grad_norm": 0.35101068019866943,
"learning_rate": 4.796282527881041e-06,
"loss": 0.0037,
"step": 7500
},
{
"epoch": 16.129032258064516,
"eval_loss": 0.390476793050766,
"eval_runtime": 202.418,
"eval_samples_per_second": 4.673,
"eval_steps_per_second": 0.588,
"eval_wer": 14.708329481371916,
"step": 7500
},
{
"epoch": 16.182795698924732,
"grad_norm": 0.315719872713089,
"learning_rate": 4.777695167286246e-06,
"loss": 0.003,
"step": 7525
},
{
"epoch": 16.236559139784948,
"grad_norm": 1.0123934745788574,
"learning_rate": 4.75910780669145e-06,
"loss": 0.0035,
"step": 7550
},
{
"epoch": 16.29032258064516,
"grad_norm": 0.517242968082428,
"learning_rate": 4.7405204460966545e-06,
"loss": 0.003,
"step": 7575
},
{
"epoch": 16.344086021505376,
"grad_norm": 0.06284263730049133,
"learning_rate": 4.7219330855018595e-06,
"loss": 0.0024,
"step": 7600
},
{
"epoch": 16.397849462365592,
"grad_norm": 0.02318274788558483,
"learning_rate": 4.703345724907064e-06,
"loss": 0.0039,
"step": 7625
},
{
"epoch": 16.451612903225808,
"grad_norm": 0.2524121105670929,
"learning_rate": 4.684758364312268e-06,
"loss": 0.0041,
"step": 7650
},
{
"epoch": 16.50537634408602,
"grad_norm": 0.047711629420518875,
"learning_rate": 4.666171003717473e-06,
"loss": 0.0041,
"step": 7675
},
{
"epoch": 16.559139784946236,
"grad_norm": 0.32103028893470764,
"learning_rate": 4.647583643122677e-06,
"loss": 0.0061,
"step": 7700
},
{
"epoch": 16.612903225806452,
"grad_norm": 1.1334346532821655,
"learning_rate": 4.628996282527882e-06,
"loss": 0.0043,
"step": 7725
},
{
"epoch": 16.666666666666668,
"grad_norm": 0.11029840260744095,
"learning_rate": 4.610408921933086e-06,
"loss": 0.0015,
"step": 7750
},
{
"epoch": 16.72043010752688,
"grad_norm": 0.03998972475528717,
"learning_rate": 4.59182156133829e-06,
"loss": 0.0035,
"step": 7775
},
{
"epoch": 16.774193548387096,
"grad_norm": 0.9175609946250916,
"learning_rate": 4.573234200743495e-06,
"loss": 0.0037,
"step": 7800
},
{
"epoch": 16.827956989247312,
"grad_norm": 0.055633947253227234,
"learning_rate": 4.554646840148699e-06,
"loss": 0.0035,
"step": 7825
},
{
"epoch": 16.881720430107528,
"grad_norm": 0.28876572847366333,
"learning_rate": 4.536059479553903e-06,
"loss": 0.0036,
"step": 7850
},
{
"epoch": 16.93548387096774,
"grad_norm": 0.09186781197786331,
"learning_rate": 4.517472118959108e-06,
"loss": 0.0034,
"step": 7875
},
{
"epoch": 16.989247311827956,
"grad_norm": 0.024787306785583496,
"learning_rate": 4.4988847583643125e-06,
"loss": 0.0035,
"step": 7900
},
{
"epoch": 17.043010752688172,
"grad_norm": 0.30542510747909546,
"learning_rate": 4.480297397769517e-06,
"loss": 0.0027,
"step": 7925
},
{
"epoch": 17.096774193548388,
"grad_norm": 0.20729881525039673,
"learning_rate": 4.461710037174722e-06,
"loss": 0.0016,
"step": 7950
},
{
"epoch": 17.150537634408604,
"grad_norm": 0.17623752355575562,
"learning_rate": 4.4431226765799266e-06,
"loss": 0.003,
"step": 7975
},
{
"epoch": 17.204301075268816,
"grad_norm": 1.2774063348770142,
"learning_rate": 4.424535315985131e-06,
"loss": 0.0041,
"step": 8000
},
{
"epoch": 17.204301075268816,
"eval_loss": 0.38328301906585693,
"eval_runtime": 202.8745,
"eval_samples_per_second": 4.663,
"eval_steps_per_second": 0.587,
"eval_wer": 14.440232966626606,
"step": 8000
},
{
"epoch": 17.258064516129032,
"grad_norm": 0.24027810990810394,
"learning_rate": 4.405947955390335e-06,
"loss": 0.0042,
"step": 8025
},
{
"epoch": 17.311827956989248,
"grad_norm": 0.6575544476509094,
"learning_rate": 4.38736059479554e-06,
"loss": 0.0033,
"step": 8050
},
{
"epoch": 17.365591397849464,
"grad_norm": 0.7652745842933655,
"learning_rate": 4.368773234200744e-06,
"loss": 0.0025,
"step": 8075
},
{
"epoch": 17.419354838709676,
"grad_norm": 1.0893921852111816,
"learning_rate": 4.350185873605948e-06,
"loss": 0.0044,
"step": 8100
},
{
"epoch": 17.473118279569892,
"grad_norm": 0.381245881319046,
"learning_rate": 4.331598513011153e-06,
"loss": 0.0053,
"step": 8125
},
{
"epoch": 17.526881720430108,
"grad_norm": 0.6958642601966858,
"learning_rate": 4.313011152416357e-06,
"loss": 0.003,
"step": 8150
},
{
"epoch": 17.580645161290324,
"grad_norm": 0.3542903661727905,
"learning_rate": 4.294423791821561e-06,
"loss": 0.0031,
"step": 8175
},
{
"epoch": 17.634408602150536,
"grad_norm": 0.12086351215839386,
"learning_rate": 4.275836431226766e-06,
"loss": 0.0024,
"step": 8200
},
{
"epoch": 17.688172043010752,
"grad_norm": 0.41448554396629333,
"learning_rate": 4.2572490706319704e-06,
"loss": 0.0035,
"step": 8225
},
{
"epoch": 17.741935483870968,
"grad_norm": 0.06691323965787888,
"learning_rate": 4.238661710037175e-06,
"loss": 0.0035,
"step": 8250
},
{
"epoch": 17.795698924731184,
"grad_norm": 0.7434226870536804,
"learning_rate": 4.2200743494423795e-06,
"loss": 0.0053,
"step": 8275
},
{
"epoch": 17.849462365591396,
"grad_norm": 0.36501583456993103,
"learning_rate": 4.201486988847584e-06,
"loss": 0.004,
"step": 8300
},
{
"epoch": 17.903225806451612,
"grad_norm": 0.2055322229862213,
"learning_rate": 4.182899628252789e-06,
"loss": 0.0043,
"step": 8325
},
{
"epoch": 17.956989247311828,
"grad_norm": 0.7392027378082275,
"learning_rate": 4.164312267657993e-06,
"loss": 0.004,
"step": 8350
},
{
"epoch": 18.010752688172044,
"grad_norm": 0.07294179499149323,
"learning_rate": 4.145724907063197e-06,
"loss": 0.0055,
"step": 8375
},
{
"epoch": 18.06451612903226,
"grad_norm": 0.0656030923128128,
"learning_rate": 4.127137546468402e-06,
"loss": 0.0024,
"step": 8400
},
{
"epoch": 18.118279569892472,
"grad_norm": 0.05267421901226044,
"learning_rate": 4.108550185873607e-06,
"loss": 0.0033,
"step": 8425
},
{
"epoch": 18.172043010752688,
"grad_norm": 0.704990565776825,
"learning_rate": 4.089962825278811e-06,
"loss": 0.0047,
"step": 8450
},
{
"epoch": 18.225806451612904,
"grad_norm": 0.24530240893363953,
"learning_rate": 4.071375464684015e-06,
"loss": 0.0043,
"step": 8475
},
{
"epoch": 18.27956989247312,
"grad_norm": 0.19997531175613403,
"learning_rate": 4.05278810408922e-06,
"loss": 0.0035,
"step": 8500
},
{
"epoch": 18.27956989247312,
"eval_loss": 0.3821885585784912,
"eval_runtime": 203.3448,
"eval_samples_per_second": 4.652,
"eval_steps_per_second": 0.585,
"eval_wer": 14.412498844411575,
"step": 8500
},
{
"epoch": 18.333333333333332,
"grad_norm": 0.0332474559545517,
"learning_rate": 4.034200743494424e-06,
"loss": 0.0031,
"step": 8525
},
{
"epoch": 18.387096774193548,
"grad_norm": 1.3561875820159912,
"learning_rate": 4.015613382899628e-06,
"loss": 0.0017,
"step": 8550
},
{
"epoch": 18.440860215053764,
"grad_norm": 0.029482562094926834,
"learning_rate": 3.997026022304833e-06,
"loss": 0.0021,
"step": 8575
},
{
"epoch": 18.49462365591398,
"grad_norm": 0.12231668084859848,
"learning_rate": 3.9784386617100375e-06,
"loss": 0.0043,
"step": 8600
},
{
"epoch": 18.548387096774192,
"grad_norm": 0.044476673007011414,
"learning_rate": 3.959851301115242e-06,
"loss": 0.0024,
"step": 8625
},
{
"epoch": 18.602150537634408,
"grad_norm": 0.6735191345214844,
"learning_rate": 3.941263940520447e-06,
"loss": 0.0032,
"step": 8650
},
{
"epoch": 18.655913978494624,
"grad_norm": 1.0479316711425781,
"learning_rate": 3.922676579925651e-06,
"loss": 0.0024,
"step": 8675
},
{
"epoch": 18.70967741935484,
"grad_norm": 0.023525085300207138,
"learning_rate": 3.904089219330856e-06,
"loss": 0.0049,
"step": 8700
},
{
"epoch": 18.763440860215052,
"grad_norm": 0.165565624833107,
"learning_rate": 3.88550185873606e-06,
"loss": 0.0039,
"step": 8725
},
{
"epoch": 18.817204301075268,
"grad_norm": 0.5960690379142761,
"learning_rate": 3.866914498141264e-06,
"loss": 0.003,
"step": 8750
},
{
"epoch": 18.870967741935484,
"grad_norm": 0.23799718916416168,
"learning_rate": 3.848327137546469e-06,
"loss": 0.002,
"step": 8775
},
{
"epoch": 18.9247311827957,
"grad_norm": 0.01600775308907032,
"learning_rate": 3.829739776951673e-06,
"loss": 0.0022,
"step": 8800
},
{
"epoch": 18.978494623655912,
"grad_norm": 0.3210331201553345,
"learning_rate": 3.8111524163568776e-06,
"loss": 0.0033,
"step": 8825
},
{
"epoch": 19.032258064516128,
"grad_norm": 0.05005327984690666,
"learning_rate": 3.7925650557620818e-06,
"loss": 0.0033,
"step": 8850
},
{
"epoch": 19.086021505376344,
"grad_norm": 0.4820277690887451,
"learning_rate": 3.7739776951672863e-06,
"loss": 0.0034,
"step": 8875
},
{
"epoch": 19.13978494623656,
"grad_norm": 0.1907467395067215,
"learning_rate": 3.7553903345724913e-06,
"loss": 0.0025,
"step": 8900
},
{
"epoch": 19.193548387096776,
"grad_norm": 0.023403950035572052,
"learning_rate": 3.7368029739776954e-06,
"loss": 0.0024,
"step": 8925
},
{
"epoch": 19.247311827956988,
"grad_norm": 0.02337467670440674,
"learning_rate": 3.7182156133829e-06,
"loss": 0.0038,
"step": 8950
},
{
"epoch": 19.301075268817204,
"grad_norm": 0.42413467168807983,
"learning_rate": 3.6996282527881046e-06,
"loss": 0.0048,
"step": 8975
},
{
"epoch": 19.35483870967742,
"grad_norm": 0.0469290092587471,
"learning_rate": 3.6810408921933087e-06,
"loss": 0.0034,
"step": 9000
},
{
"epoch": 19.35483870967742,
"eval_loss": 0.38839593529701233,
"eval_runtime": 202.9911,
"eval_samples_per_second": 4.66,
"eval_steps_per_second": 0.586,
"eval_wer": 14.62512711472682,
"step": 9000
},
{
"epoch": 19.408602150537636,
"grad_norm": 0.2083800882101059,
"learning_rate": 3.6624535315985132e-06,
"loss": 0.0029,
"step": 9025
},
{
"epoch": 19.462365591397848,
"grad_norm": 0.16142559051513672,
"learning_rate": 3.643866171003718e-06,
"loss": 0.0028,
"step": 9050
},
{
"epoch": 19.516129032258064,
"grad_norm": 0.02445228025317192,
"learning_rate": 3.625278810408922e-06,
"loss": 0.0031,
"step": 9075
},
{
"epoch": 19.56989247311828,
"grad_norm": 0.10709693282842636,
"learning_rate": 3.6066914498141265e-06,
"loss": 0.0028,
"step": 9100
},
{
"epoch": 19.623655913978496,
"grad_norm": 0.48716649413108826,
"learning_rate": 3.5881040892193315e-06,
"loss": 0.0024,
"step": 9125
},
{
"epoch": 19.677419354838708,
"grad_norm": 0.043807078152894974,
"learning_rate": 3.5695167286245356e-06,
"loss": 0.0026,
"step": 9150
},
{
"epoch": 19.731182795698924,
"grad_norm": 0.008909267373383045,
"learning_rate": 3.55092936802974e-06,
"loss": 0.0027,
"step": 9175
},
{
"epoch": 19.78494623655914,
"grad_norm": 0.0496838316321373,
"learning_rate": 3.5323420074349447e-06,
"loss": 0.0028,
"step": 9200
},
{
"epoch": 19.838709677419356,
"grad_norm": 0.3045809864997864,
"learning_rate": 3.513754646840149e-06,
"loss": 0.0043,
"step": 9225
},
{
"epoch": 19.892473118279568,
"grad_norm": 0.019404035061597824,
"learning_rate": 3.4951672862453534e-06,
"loss": 0.0022,
"step": 9250
},
{
"epoch": 19.946236559139784,
"grad_norm": 0.05067993700504303,
"learning_rate": 3.476579925650558e-06,
"loss": 0.0027,
"step": 9275
},
{
"epoch": 20.0,
"grad_norm": 0.17729219794273376,
"learning_rate": 3.457992565055762e-06,
"loss": 0.0021,
"step": 9300
},
{
"epoch": 20.053763440860216,
"grad_norm": 0.166994109749794,
"learning_rate": 3.4394052044609666e-06,
"loss": 0.0025,
"step": 9325
},
{
"epoch": 20.107526881720432,
"grad_norm": 0.026689428836107254,
"learning_rate": 3.4208178438661716e-06,
"loss": 0.0035,
"step": 9350
},
{
"epoch": 20.161290322580644,
"grad_norm": 0.016895387321710587,
"learning_rate": 3.4022304832713757e-06,
"loss": 0.0026,
"step": 9375
},
{
"epoch": 20.21505376344086,
"grad_norm": 0.06793255358934402,
"learning_rate": 3.3836431226765803e-06,
"loss": 0.0015,
"step": 9400
},
{
"epoch": 20.268817204301076,
"grad_norm": 0.034562163054943085,
"learning_rate": 3.365055762081785e-06,
"loss": 0.0027,
"step": 9425
},
{
"epoch": 20.322580645161292,
"grad_norm": 0.16164565086364746,
"learning_rate": 3.346468401486989e-06,
"loss": 0.0023,
"step": 9450
},
{
"epoch": 20.376344086021504,
"grad_norm": 0.015665782615542412,
"learning_rate": 3.3278810408921935e-06,
"loss": 0.002,
"step": 9475
},
{
"epoch": 20.43010752688172,
"grad_norm": 0.009676897898316383,
"learning_rate": 3.309293680297398e-06,
"loss": 0.0027,
"step": 9500
},
{
"epoch": 20.43010752688172,
"eval_loss": 0.39532387256622314,
"eval_runtime": 202.6591,
"eval_samples_per_second": 4.668,
"eval_steps_per_second": 0.587,
"eval_wer": 14.449477674031616,
"step": 9500
},
{
"epoch": 20.483870967741936,
"grad_norm": 0.16924279928207397,
"learning_rate": 3.2907063197026022e-06,
"loss": 0.0022,
"step": 9525
},
{
"epoch": 20.537634408602152,
"grad_norm": 0.3499106168746948,
"learning_rate": 3.272118959107807e-06,
"loss": 0.0017,
"step": 9550
},
{
"epoch": 20.591397849462364,
"grad_norm": 0.5156524181365967,
"learning_rate": 3.2535315985130113e-06,
"loss": 0.0027,
"step": 9575
},
{
"epoch": 20.64516129032258,
"grad_norm": 0.12964314222335815,
"learning_rate": 3.234944237918216e-06,
"loss": 0.0029,
"step": 9600
},
{
"epoch": 20.698924731182796,
"grad_norm": 0.4109344482421875,
"learning_rate": 3.2163568773234205e-06,
"loss": 0.002,
"step": 9625
},
{
"epoch": 20.752688172043012,
"grad_norm": 0.17528752982616425,
"learning_rate": 3.197769516728625e-06,
"loss": 0.0025,
"step": 9650
},
{
"epoch": 20.806451612903224,
"grad_norm": 0.27459415793418884,
"learning_rate": 3.179182156133829e-06,
"loss": 0.002,
"step": 9675
},
{
"epoch": 20.86021505376344,
"grad_norm": 0.7621147036552429,
"learning_rate": 3.1605947955390337e-06,
"loss": 0.0019,
"step": 9700
},
{
"epoch": 20.913978494623656,
"grad_norm": 0.03115130215883255,
"learning_rate": 3.1420074349442383e-06,
"loss": 0.0021,
"step": 9725
},
{
"epoch": 20.967741935483872,
"grad_norm": 0.3843834698200226,
"learning_rate": 3.1234200743494424e-06,
"loss": 0.0037,
"step": 9750
},
{
"epoch": 21.021505376344088,
"grad_norm": 0.11314116418361664,
"learning_rate": 3.104832713754647e-06,
"loss": 0.0019,
"step": 9775
},
{
"epoch": 21.0752688172043,
"grad_norm": 0.008430559188127518,
"learning_rate": 3.0862453531598515e-06,
"loss": 0.0016,
"step": 9800
},
{
"epoch": 21.129032258064516,
"grad_norm": 0.014893501996994019,
"learning_rate": 3.067657992565056e-06,
"loss": 0.0012,
"step": 9825
},
{
"epoch": 21.182795698924732,
"grad_norm": 0.020304594188928604,
"learning_rate": 3.0490706319702606e-06,
"loss": 0.0014,
"step": 9850
},
{
"epoch": 21.236559139784948,
"grad_norm": 0.12655578553676605,
"learning_rate": 3.030483271375465e-06,
"loss": 0.0026,
"step": 9875
},
{
"epoch": 21.29032258064516,
"grad_norm": 0.014000285416841507,
"learning_rate": 3.0118959107806693e-06,
"loss": 0.0026,
"step": 9900
},
{
"epoch": 21.344086021505376,
"grad_norm": 0.026862381026148796,
"learning_rate": 2.993308550185874e-06,
"loss": 0.0024,
"step": 9925
},
{
"epoch": 21.397849462365592,
"grad_norm": 0.03989304229617119,
"learning_rate": 2.9747211895910784e-06,
"loss": 0.0018,
"step": 9950
},
{
"epoch": 21.451612903225808,
"grad_norm": 0.019757866859436035,
"learning_rate": 2.9561338289962825e-06,
"loss": 0.0031,
"step": 9975
},
{
"epoch": 21.50537634408602,
"grad_norm": 0.02383114956319332,
"learning_rate": 2.937546468401487e-06,
"loss": 0.0022,
"step": 10000
},
{
"epoch": 21.50537634408602,
"eval_loss": 0.40046602487564087,
"eval_runtime": 202.0733,
"eval_samples_per_second": 4.681,
"eval_steps_per_second": 0.589,
"eval_wer": 14.449477674031616,
"step": 10000
},
{
"epoch": 21.559139784946236,
"grad_norm": 0.019417457282543182,
"learning_rate": 2.9189591078066916e-06,
"loss": 0.0027,
"step": 10025
},
{
"epoch": 21.612903225806452,
"grad_norm": 0.15449251234531403,
"learning_rate": 2.900371747211896e-06,
"loss": 0.0017,
"step": 10050
},
{
"epoch": 21.666666666666668,
"grad_norm": 0.16010086238384247,
"learning_rate": 2.8817843866171008e-06,
"loss": 0.0018,
"step": 10075
},
{
"epoch": 21.72043010752688,
"grad_norm": 0.02312368154525757,
"learning_rate": 2.8631970260223053e-06,
"loss": 0.0036,
"step": 10100
},
{
"epoch": 21.774193548387096,
"grad_norm": 0.040190454572439194,
"learning_rate": 2.8446096654275094e-06,
"loss": 0.0024,
"step": 10125
},
{
"epoch": 21.827956989247312,
"grad_norm": 0.030338788405060768,
"learning_rate": 2.826022304832714e-06,
"loss": 0.0024,
"step": 10150
},
{
"epoch": 21.881720430107528,
"grad_norm": 0.18002262711524963,
"learning_rate": 2.8074349442379186e-06,
"loss": 0.0029,
"step": 10175
},
{
"epoch": 21.93548387096774,
"grad_norm": 0.047431185841560364,
"learning_rate": 2.7888475836431227e-06,
"loss": 0.0013,
"step": 10200
},
{
"epoch": 21.989247311827956,
"grad_norm": 0.12101946771144867,
"learning_rate": 2.7702602230483272e-06,
"loss": 0.003,
"step": 10225
},
{
"epoch": 22.043010752688172,
"grad_norm": 0.01733570732176304,
"learning_rate": 2.751672862453532e-06,
"loss": 0.0013,
"step": 10250
},
{
"epoch": 22.096774193548388,
"grad_norm": 0.02413998357951641,
"learning_rate": 2.7330855018587364e-06,
"loss": 0.0024,
"step": 10275
},
{
"epoch": 22.150537634408604,
"grad_norm": 0.006610923912376165,
"learning_rate": 2.714498141263941e-06,
"loss": 0.0015,
"step": 10300
},
{
"epoch": 22.204301075268816,
"grad_norm": 0.11478333920240402,
"learning_rate": 2.6959107806691455e-06,
"loss": 0.0014,
"step": 10325
},
{
"epoch": 22.258064516129032,
"grad_norm": 0.8776764869689941,
"learning_rate": 2.6773234200743496e-06,
"loss": 0.0019,
"step": 10350
},
{
"epoch": 22.311827956989248,
"grad_norm": 0.020020902156829834,
"learning_rate": 2.658736059479554e-06,
"loss": 0.0022,
"step": 10375
},
{
"epoch": 22.365591397849464,
"grad_norm": 0.019508883357048035,
"learning_rate": 2.6401486988847587e-06,
"loss": 0.0017,
"step": 10400
},
{
"epoch": 22.419354838709676,
"grad_norm": 0.02609153278172016,
"learning_rate": 2.621561338289963e-06,
"loss": 0.0013,
"step": 10425
},
{
"epoch": 22.473118279569892,
"grad_norm": 0.027138570323586464,
"learning_rate": 2.6029739776951674e-06,
"loss": 0.0019,
"step": 10450
},
{
"epoch": 22.526881720430108,
"grad_norm": 0.01063444558531046,
"learning_rate": 2.584386617100372e-06,
"loss": 0.0026,
"step": 10475
},
{
"epoch": 22.580645161290324,
"grad_norm": 0.24903129041194916,
"learning_rate": 2.565799256505576e-06,
"loss": 0.0027,
"step": 10500
},
{
"epoch": 22.580645161290324,
"eval_loss": 0.40335774421691895,
"eval_runtime": 202.0679,
"eval_samples_per_second": 4.682,
"eval_steps_per_second": 0.589,
"eval_wer": 13.93177405935102,
"step": 10500
},
{
"epoch": 22.634408602150536,
"grad_norm": 0.1529041826725006,
"learning_rate": 2.547211895910781e-06,
"loss": 0.0018,
"step": 10525
},
{
"epoch": 22.688172043010752,
"grad_norm": 0.02129989117383957,
"learning_rate": 2.5286245353159856e-06,
"loss": 0.0013,
"step": 10550
},
{
"epoch": 22.741935483870968,
"grad_norm": 0.013442150317132473,
"learning_rate": 2.5100371747211898e-06,
"loss": 0.0028,
"step": 10575
},
{
"epoch": 22.795698924731184,
"grad_norm": 0.024951398372650146,
"learning_rate": 2.4914498141263943e-06,
"loss": 0.0022,
"step": 10600
},
{
"epoch": 22.849462365591396,
"grad_norm": 0.3933217525482178,
"learning_rate": 2.472862453531599e-06,
"loss": 0.0033,
"step": 10625
},
{
"epoch": 22.903225806451612,
"grad_norm": 0.030309738591313362,
"learning_rate": 2.454275092936803e-06,
"loss": 0.0012,
"step": 10650
},
{
"epoch": 22.956989247311828,
"grad_norm": 0.1965196579694748,
"learning_rate": 2.4356877323420076e-06,
"loss": 0.0019,
"step": 10675
},
{
"epoch": 23.010752688172044,
"grad_norm": 0.2897844612598419,
"learning_rate": 2.417100371747212e-06,
"loss": 0.0017,
"step": 10700
},
{
"epoch": 23.06451612903226,
"grad_norm": 0.010055635124444962,
"learning_rate": 2.3985130111524167e-06,
"loss": 0.0017,
"step": 10725
},
{
"epoch": 23.118279569892472,
"grad_norm": 0.02669104002416134,
"learning_rate": 2.379925650557621e-06,
"loss": 0.0005,
"step": 10750
},
{
"epoch": 23.172043010752688,
"grad_norm": 0.2305319756269455,
"learning_rate": 2.3613382899628253e-06,
"loss": 0.0015,
"step": 10775
},
{
"epoch": 23.225806451612904,
"grad_norm": 0.009956962428987026,
"learning_rate": 2.34275092936803e-06,
"loss": 0.0024,
"step": 10800
},
{
"epoch": 23.27956989247312,
"grad_norm": 0.1403415948152542,
"learning_rate": 2.3241635687732345e-06,
"loss": 0.0014,
"step": 10825
},
{
"epoch": 23.333333333333332,
"grad_norm": 0.21458983421325684,
"learning_rate": 2.305576208178439e-06,
"loss": 0.0017,
"step": 10850
},
{
"epoch": 23.387096774193548,
"grad_norm": 0.008475505746901035,
"learning_rate": 2.286988847583643e-06,
"loss": 0.001,
"step": 10875
},
{
"epoch": 23.440860215053764,
"grad_norm": 0.02105923928320408,
"learning_rate": 2.2684014869888477e-06,
"loss": 0.0013,
"step": 10900
},
{
"epoch": 23.49462365591398,
"grad_norm": 0.007669220678508282,
"learning_rate": 2.2498141263940523e-06,
"loss": 0.0016,
"step": 10925
},
{
"epoch": 23.548387096774192,
"grad_norm": 0.007480244617909193,
"learning_rate": 2.231226765799257e-06,
"loss": 0.0013,
"step": 10950
},
{
"epoch": 23.602150537634408,
"grad_norm": 0.00940194632858038,
"learning_rate": 2.212639405204461e-06,
"loss": 0.0031,
"step": 10975
},
{
"epoch": 23.655913978494624,
"grad_norm": 0.013057105243206024,
"learning_rate": 2.1940520446096655e-06,
"loss": 0.0012,
"step": 11000
},
{
"epoch": 23.655913978494624,
"eval_loss": 0.40596359968185425,
"eval_runtime": 202.5237,
"eval_samples_per_second": 4.671,
"eval_steps_per_second": 0.588,
"eval_wer": 13.941018766756033,
"step": 11000
},
{
"epoch": 23.70967741935484,
"grad_norm": 0.0065338280983269215,
"learning_rate": 2.17546468401487e-06,
"loss": 0.0015,
"step": 11025
},
{
"epoch": 23.763440860215052,
"grad_norm": 0.013204723596572876,
"learning_rate": 2.1568773234200746e-06,
"loss": 0.003,
"step": 11050
},
{
"epoch": 23.817204301075268,
"grad_norm": 0.027307022362947464,
"learning_rate": 2.138289962825279e-06,
"loss": 0.0027,
"step": 11075
},
{
"epoch": 23.870967741935484,
"grad_norm": 0.014446156099438667,
"learning_rate": 2.1197026022304833e-06,
"loss": 0.0014,
"step": 11100
},
{
"epoch": 23.9247311827957,
"grad_norm": 0.014391875825822353,
"learning_rate": 2.101115241635688e-06,
"loss": 0.0031,
"step": 11125
},
{
"epoch": 23.978494623655912,
"grad_norm": 0.0412728525698185,
"learning_rate": 2.0825278810408924e-06,
"loss": 0.0022,
"step": 11150
},
{
"epoch": 24.032258064516128,
"grad_norm": 0.009646103717386723,
"learning_rate": 2.063940520446097e-06,
"loss": 0.0015,
"step": 11175
},
{
"epoch": 24.086021505376344,
"grad_norm": 0.007044603582471609,
"learning_rate": 2.045353159851301e-06,
"loss": 0.0012,
"step": 11200
},
{
"epoch": 24.13978494623656,
"grad_norm": 0.013142594136297703,
"learning_rate": 2.0267657992565057e-06,
"loss": 0.0013,
"step": 11225
},
{
"epoch": 24.193548387096776,
"grad_norm": 0.006704692263156176,
"learning_rate": 2.00817843866171e-06,
"loss": 0.0015,
"step": 11250
},
{
"epoch": 24.247311827956988,
"grad_norm": 0.005626600701361895,
"learning_rate": 1.9895910780669148e-06,
"loss": 0.0012,
"step": 11275
},
{
"epoch": 24.301075268817204,
"grad_norm": 0.008840459398925304,
"learning_rate": 1.9710037174721193e-06,
"loss": 0.0019,
"step": 11300
},
{
"epoch": 24.35483870967742,
"grad_norm": 0.005357383284717798,
"learning_rate": 1.9524163568773235e-06,
"loss": 0.0011,
"step": 11325
},
{
"epoch": 24.408602150537636,
"grad_norm": 0.005820517428219318,
"learning_rate": 1.933828996282528e-06,
"loss": 0.0014,
"step": 11350
},
{
"epoch": 24.462365591397848,
"grad_norm": 0.12261584401130676,
"learning_rate": 1.9152416356877326e-06,
"loss": 0.0009,
"step": 11375
},
{
"epoch": 24.516129032258064,
"grad_norm": 0.16665996611118317,
"learning_rate": 1.8966542750929371e-06,
"loss": 0.0032,
"step": 11400
},
{
"epoch": 24.56989247311828,
"grad_norm": 0.006091310176998377,
"learning_rate": 1.8780669144981415e-06,
"loss": 0.0016,
"step": 11425
},
{
"epoch": 24.623655913978496,
"grad_norm": 0.027028294280171394,
"learning_rate": 1.8594795539033458e-06,
"loss": 0.0013,
"step": 11450
},
{
"epoch": 24.677419354838708,
"grad_norm": 0.107554592192173,
"learning_rate": 1.8408921933085502e-06,
"loss": 0.0014,
"step": 11475
},
{
"epoch": 24.731182795698924,
"grad_norm": 0.006071150302886963,
"learning_rate": 1.822304832713755e-06,
"loss": 0.0008,
"step": 11500
},
{
"epoch": 24.731182795698924,
"eval_loss": 0.4129054546356201,
"eval_runtime": 202.4824,
"eval_samples_per_second": 4.672,
"eval_steps_per_second": 0.588,
"eval_wer": 13.848571692705928,
"step": 11500
},
{
"epoch": 24.78494623655914,
"grad_norm": 0.005664244759827852,
"learning_rate": 1.8037174721189593e-06,
"loss": 0.0019,
"step": 11525
},
{
"epoch": 24.838709677419356,
"grad_norm": 0.007854313589632511,
"learning_rate": 1.7851301115241638e-06,
"loss": 0.0019,
"step": 11550
},
{
"epoch": 24.892473118279568,
"grad_norm": 0.1173175498843193,
"learning_rate": 1.7665427509293682e-06,
"loss": 0.002,
"step": 11575
},
{
"epoch": 24.946236559139784,
"grad_norm": 0.005052879452705383,
"learning_rate": 1.7479553903345725e-06,
"loss": 0.0021,
"step": 11600
},
{
"epoch": 25.0,
"grad_norm": 0.011530703864991665,
"learning_rate": 1.7293680297397773e-06,
"loss": 0.0017,
"step": 11625
},
{
"epoch": 25.053763440860216,
"grad_norm": 0.07344318926334381,
"learning_rate": 1.7107806691449816e-06,
"loss": 0.002,
"step": 11650
},
{
"epoch": 25.107526881720432,
"grad_norm": 0.12406457215547562,
"learning_rate": 1.692193308550186e-06,
"loss": 0.0017,
"step": 11675
},
{
"epoch": 25.161290322580644,
"grad_norm": 0.005189701449126005,
"learning_rate": 1.6736059479553903e-06,
"loss": 0.0016,
"step": 11700
},
{
"epoch": 25.21505376344086,
"grad_norm": 0.3264636993408203,
"learning_rate": 1.655018587360595e-06,
"loss": 0.0021,
"step": 11725
},
{
"epoch": 25.268817204301076,
"grad_norm": 0.004206045996397734,
"learning_rate": 1.6364312267657994e-06,
"loss": 0.0014,
"step": 11750
},
{
"epoch": 25.322580645161292,
"grad_norm": 0.12464595586061478,
"learning_rate": 1.6178438661710038e-06,
"loss": 0.0019,
"step": 11775
},
{
"epoch": 25.376344086021504,
"grad_norm": 0.0052951849065721035,
"learning_rate": 1.5992565055762083e-06,
"loss": 0.001,
"step": 11800
},
{
"epoch": 25.43010752688172,
"grad_norm": 0.20631186664104462,
"learning_rate": 1.5806691449814127e-06,
"loss": 0.0013,
"step": 11825
},
{
"epoch": 25.483870967741936,
"grad_norm": 0.005665977951139212,
"learning_rate": 1.5620817843866174e-06,
"loss": 0.0023,
"step": 11850
},
{
"epoch": 25.537634408602152,
"grad_norm": 0.18204852938652039,
"learning_rate": 1.5434944237918218e-06,
"loss": 0.0017,
"step": 11875
},
{
"epoch": 25.591397849462364,
"grad_norm": 0.004289372358471155,
"learning_rate": 1.5249070631970261e-06,
"loss": 0.0005,
"step": 11900
},
{
"epoch": 25.64516129032258,
"grad_norm": 0.06818500906229019,
"learning_rate": 1.5063197026022305e-06,
"loss": 0.0005,
"step": 11925
},
{
"epoch": 25.698924731182796,
"grad_norm": 0.14878062903881073,
"learning_rate": 1.487732342007435e-06,
"loss": 0.0016,
"step": 11950
},
{
"epoch": 25.752688172043012,
"grad_norm": 0.003890681779012084,
"learning_rate": 1.4691449814126396e-06,
"loss": 0.0016,
"step": 11975
},
{
"epoch": 25.806451612903224,
"grad_norm": 0.004652164876461029,
"learning_rate": 1.450557620817844e-06,
"loss": 0.001,
"step": 12000
},
{
"epoch": 25.806451612903224,
"eval_loss": 0.4189203381538391,
"eval_runtime": 202.0119,
"eval_samples_per_second": 4.683,
"eval_steps_per_second": 0.589,
"eval_wer": 13.830082277895904,
"step": 12000
},
{
"epoch": 25.86021505376344,
"grad_norm": 0.16533516347408295,
"learning_rate": 1.4319702602230485e-06,
"loss": 0.0011,
"step": 12025
},
{
"epoch": 25.913978494623656,
"grad_norm": 0.0048462748527526855,
"learning_rate": 1.4133828996282528e-06,
"loss": 0.001,
"step": 12050
},
{
"epoch": 25.967741935483872,
"grad_norm": 0.0046990737318992615,
"learning_rate": 1.3947955390334576e-06,
"loss": 0.0017,
"step": 12075
},
{
"epoch": 26.021505376344088,
"grad_norm": 0.003037052694708109,
"learning_rate": 1.376208178438662e-06,
"loss": 0.0024,
"step": 12100
},
{
"epoch": 26.0752688172043,
"grad_norm": 0.08328765630722046,
"learning_rate": 1.3576208178438663e-06,
"loss": 0.0011,
"step": 12125
},
{
"epoch": 26.129032258064516,
"grad_norm": 0.002522684633731842,
"learning_rate": 1.3390334572490706e-06,
"loss": 0.0022,
"step": 12150
},
{
"epoch": 26.182795698924732,
"grad_norm": 0.00458819093182683,
"learning_rate": 1.3204460966542752e-06,
"loss": 0.0018,
"step": 12175
},
{
"epoch": 26.236559139784948,
"grad_norm": 0.004143861588090658,
"learning_rate": 1.3018587360594797e-06,
"loss": 0.0008,
"step": 12200
},
{
"epoch": 26.29032258064516,
"grad_norm": 0.006522686220705509,
"learning_rate": 1.283271375464684e-06,
"loss": 0.0014,
"step": 12225
},
{
"epoch": 26.344086021505376,
"grad_norm": 0.0033553235698491335,
"learning_rate": 1.2646840148698886e-06,
"loss": 0.0018,
"step": 12250
},
{
"epoch": 26.397849462365592,
"grad_norm": 0.004214679356664419,
"learning_rate": 1.2460966542750932e-06,
"loss": 0.0014,
"step": 12275
},
{
"epoch": 26.451612903225808,
"grad_norm": 0.23780201375484467,
"learning_rate": 1.2275092936802975e-06,
"loss": 0.0021,
"step": 12300
},
{
"epoch": 26.50537634408602,
"grad_norm": 0.003071879968047142,
"learning_rate": 1.2089219330855019e-06,
"loss": 0.0021,
"step": 12325
},
{
"epoch": 26.559139784946236,
"grad_norm": 0.003364423755556345,
"learning_rate": 1.1903345724907064e-06,
"loss": 0.0015,
"step": 12350
},
{
"epoch": 26.612903225806452,
"grad_norm": 0.30511873960494995,
"learning_rate": 1.1717472118959108e-06,
"loss": 0.0018,
"step": 12375
},
{
"epoch": 26.666666666666668,
"grad_norm": 0.003765388624742627,
"learning_rate": 1.1531598513011153e-06,
"loss": 0.0026,
"step": 12400
},
{
"epoch": 26.72043010752688,
"grad_norm": 0.13415110111236572,
"learning_rate": 1.1345724907063199e-06,
"loss": 0.0012,
"step": 12425
},
{
"epoch": 26.774193548387096,
"grad_norm": 0.0052949776872992516,
"learning_rate": 1.1159851301115242e-06,
"loss": 0.0006,
"step": 12450
},
{
"epoch": 26.827956989247312,
"grad_norm": 0.0027304012328386307,
"learning_rate": 1.0973977695167288e-06,
"loss": 0.001,
"step": 12475
},
{
"epoch": 26.881720430107528,
"grad_norm": 0.004548298195004463,
"learning_rate": 1.0788104089219331e-06,
"loss": 0.0008,
"step": 12500
},
{
"epoch": 26.881720430107528,
"eval_loss": 0.4191061854362488,
"eval_runtime": 207.3533,
"eval_samples_per_second": 4.562,
"eval_steps_per_second": 0.574,
"eval_wer": 13.959508181566052,
"step": 12500
},
{
"epoch": 26.93548387096774,
"grad_norm": 0.0037072377745062113,
"learning_rate": 1.0602230483271377e-06,
"loss": 0.002,
"step": 12525
},
{
"epoch": 26.989247311827956,
"grad_norm": 0.004865365568548441,
"learning_rate": 1.041635687732342e-06,
"loss": 0.0012,
"step": 12550
},
{
"epoch": 27.043010752688172,
"grad_norm": 0.16591113805770874,
"learning_rate": 1.0230483271375466e-06,
"loss": 0.0008,
"step": 12575
},
{
"epoch": 27.096774193548388,
"grad_norm": 0.003480426501482725,
"learning_rate": 1.004460966542751e-06,
"loss": 0.002,
"step": 12600
},
{
"epoch": 27.150537634408604,
"grad_norm": 0.003888448467478156,
"learning_rate": 9.858736059479555e-07,
"loss": 0.001,
"step": 12625
},
{
"epoch": 27.204301075268816,
"grad_norm": 0.004046307876706123,
"learning_rate": 9.6728624535316e-07,
"loss": 0.0022,
"step": 12650
},
{
"epoch": 27.258064516129032,
"grad_norm": 0.004325231071561575,
"learning_rate": 9.486988847583644e-07,
"loss": 0.0024,
"step": 12675
},
{
"epoch": 27.311827956989248,
"grad_norm": 0.1196964755654335,
"learning_rate": 9.301115241635688e-07,
"loss": 0.001,
"step": 12700
},
{
"epoch": 27.365591397849464,
"grad_norm": 0.003892822889611125,
"learning_rate": 9.115241635687733e-07,
"loss": 0.002,
"step": 12725
},
{
"epoch": 27.419354838709676,
"grad_norm": 0.0024752148892730474,
"learning_rate": 8.929368029739778e-07,
"loss": 0.0007,
"step": 12750
},
{
"epoch": 27.473118279569892,
"grad_norm": 0.00464650196954608,
"learning_rate": 8.743494423791822e-07,
"loss": 0.0019,
"step": 12775
},
{
"epoch": 27.526881720430108,
"grad_norm": 0.2570537328720093,
"learning_rate": 8.557620817843867e-07,
"loss": 0.0022,
"step": 12800
},
{
"epoch": 27.580645161290324,
"grad_norm": 0.003213089657947421,
"learning_rate": 8.371747211895912e-07,
"loss": 0.0008,
"step": 12825
},
{
"epoch": 27.634408602150536,
"grad_norm": 0.0038951928727328777,
"learning_rate": 8.185873605947955e-07,
"loss": 0.0013,
"step": 12850
},
{
"epoch": 27.688172043010752,
"grad_norm": 0.0030759673099964857,
"learning_rate": 8.000000000000001e-07,
"loss": 0.0009,
"step": 12875
},
{
"epoch": 27.741935483870968,
"grad_norm": 0.0037837938871234655,
"learning_rate": 7.814126394052045e-07,
"loss": 0.0022,
"step": 12900
},
{
"epoch": 27.795698924731184,
"grad_norm": 0.0026918076910078526,
"learning_rate": 7.62825278810409e-07,
"loss": 0.0016,
"step": 12925
},
{
"epoch": 27.849462365591396,
"grad_norm": 0.0030537450220435858,
"learning_rate": 7.442379182156134e-07,
"loss": 0.0008,
"step": 12950
},
{
"epoch": 27.903225806451612,
"grad_norm": 0.11770904064178467,
"learning_rate": 7.25650557620818e-07,
"loss": 0.0014,
"step": 12975
},
{
"epoch": 27.956989247311828,
"grad_norm": 0.0030784786213189363,
"learning_rate": 7.070631970260223e-07,
"loss": 0.0018,
"step": 13000
},
{
"epoch": 27.956989247311828,
"eval_loss": 0.421833336353302,
"eval_runtime": 202.7221,
"eval_samples_per_second": 4.666,
"eval_steps_per_second": 0.587,
"eval_wer": 13.793103448275861,
"step": 13000
}
],
"logging_steps": 25,
"max_steps": 13950,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.4130840981661286e+21,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}