Baselhany's picture
Training in progress, step 51210, checkpoint
4e97522 verified
{
"best_global_step": 28000,
"best_metric": 0.18110816386678455,
"best_model_checkpoint": "./distil-whisper/checkpoint-28000",
"epoch": 30.0,
"eval_steps": 1000,
"global_step": 51210,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05858230814294083,
"grad_norm": 27.49039077758789,
"learning_rate": 1.9e-05,
"loss": 1.7236,
"step": 100
},
{
"epoch": 0.11716461628588166,
"grad_norm": 18.32018280029297,
"learning_rate": 3.9000000000000006e-05,
"loss": 1.796,
"step": 200
},
{
"epoch": 0.1757469244288225,
"grad_norm": 12.191054344177246,
"learning_rate": 5.9e-05,
"loss": 1.8559,
"step": 300
},
{
"epoch": 0.23432923257176333,
"grad_norm": 20.974699020385742,
"learning_rate": 7.900000000000001e-05,
"loss": 1.9804,
"step": 400
},
{
"epoch": 0.29291154071470415,
"grad_norm": 37.665748596191406,
"learning_rate": 9.88e-05,
"loss": 2.2308,
"step": 500
},
{
"epoch": 0.351493848857645,
"grad_norm": 22.89726448059082,
"learning_rate": 9.917896759542318e-05,
"loss": 2.0073,
"step": 600
},
{
"epoch": 0.4100761570005858,
"grad_norm": 28.912738800048828,
"learning_rate": 9.830552886714997e-05,
"loss": 2.2137,
"step": 700
},
{
"epoch": 0.46865846514352666,
"grad_norm": 26.01645851135254,
"learning_rate": 9.743209013887676e-05,
"loss": 2.3668,
"step": 800
},
{
"epoch": 0.5272407732864675,
"grad_norm": 38.67082214355469,
"learning_rate": 9.655865141060355e-05,
"loss": 2.1304,
"step": 900
},
{
"epoch": 0.5858230814294083,
"grad_norm": 42.85211181640625,
"learning_rate": 9.568521268233034e-05,
"loss": 2.1291,
"step": 1000
},
{
"epoch": 0.5858230814294083,
"eval_loss": 0.09121495485305786,
"eval_runtime": 150.358,
"eval_samples_per_second": 3.325,
"eval_steps_per_second": 0.419,
"eval_wer": 0.19776009431181846,
"step": 1000
},
{
"epoch": 0.6444053895723492,
"grad_norm": 41.63334274291992,
"learning_rate": 9.481177395405713e-05,
"loss": 2.176,
"step": 1100
},
{
"epoch": 0.70298769771529,
"grad_norm": 35.67658996582031,
"learning_rate": 9.393833522578391e-05,
"loss": 2.3521,
"step": 1200
},
{
"epoch": 0.7615700058582309,
"grad_norm": 23.11754608154297,
"learning_rate": 9.30648964975107e-05,
"loss": 2.0235,
"step": 1300
},
{
"epoch": 0.8201523140011716,
"grad_norm": 57.13395690917969,
"learning_rate": 9.219145776923749e-05,
"loss": 2.0512,
"step": 1400
},
{
"epoch": 0.8787346221441125,
"grad_norm": 29.889575958251953,
"learning_rate": 9.131801904096428e-05,
"loss": 2.2161,
"step": 1500
},
{
"epoch": 0.9373169302870533,
"grad_norm": 20.80169105529785,
"learning_rate": 9.044458031269108e-05,
"loss": 2.4303,
"step": 1600
},
{
"epoch": 0.9958992384299942,
"grad_norm": 43.849361419677734,
"learning_rate": 8.957114158441786e-05,
"loss": 2.162,
"step": 1700
},
{
"epoch": 1.054481546572935,
"grad_norm": 19.961891174316406,
"learning_rate": 8.869770285614464e-05,
"loss": 1.8381,
"step": 1800
},
{
"epoch": 1.1130638547158758,
"grad_norm": 25.00478744506836,
"learning_rate": 8.782426412787143e-05,
"loss": 1.8102,
"step": 1900
},
{
"epoch": 1.1716461628588166,
"grad_norm": 23.878835678100586,
"learning_rate": 8.695082539959822e-05,
"loss": 1.7057,
"step": 2000
},
{
"epoch": 1.1716461628588166,
"eval_loss": 0.09117516130208969,
"eval_runtime": 145.9897,
"eval_samples_per_second": 3.425,
"eval_steps_per_second": 0.432,
"eval_wer": 0.2002652519893899,
"step": 2000
},
{
"epoch": 1.2302284710017575,
"grad_norm": 36.49687957763672,
"learning_rate": 8.607738667132501e-05,
"loss": 1.7356,
"step": 2100
},
{
"epoch": 1.2888107791446983,
"grad_norm": 23.07692527770996,
"learning_rate": 8.52039479430518e-05,
"loss": 1.6766,
"step": 2200
},
{
"epoch": 1.3473930872876392,
"grad_norm": 23.606229782104492,
"learning_rate": 8.433050921477858e-05,
"loss": 1.7865,
"step": 2300
},
{
"epoch": 1.40597539543058,
"grad_norm": 12.508922576904297,
"learning_rate": 8.345707048650537e-05,
"loss": 1.7503,
"step": 2400
},
{
"epoch": 1.4645577035735209,
"grad_norm": 15.673484802246094,
"learning_rate": 8.258363175823216e-05,
"loss": 1.7208,
"step": 2500
},
{
"epoch": 1.5231400117164617,
"grad_norm": 33.59520721435547,
"learning_rate": 8.171019302995895e-05,
"loss": 1.7469,
"step": 2600
},
{
"epoch": 1.5817223198594026,
"grad_norm": 15.388129234313965,
"learning_rate": 8.083675430168574e-05,
"loss": 1.7075,
"step": 2700
},
{
"epoch": 1.6403046280023434,
"grad_norm": 11.93837833404541,
"learning_rate": 7.996331557341253e-05,
"loss": 1.696,
"step": 2800
},
{
"epoch": 1.698886936145284,
"grad_norm": 25.31481170654297,
"learning_rate": 7.908987684513932e-05,
"loss": 1.7738,
"step": 2900
},
{
"epoch": 1.757469244288225,
"grad_norm": 38.03368377685547,
"learning_rate": 7.821643811686611e-05,
"loss": 1.7162,
"step": 3000
},
{
"epoch": 1.757469244288225,
"eval_loss": 0.09119272977113724,
"eval_runtime": 150.3416,
"eval_samples_per_second": 3.326,
"eval_steps_per_second": 0.419,
"eval_wer": 0.20601237842617154,
"step": 3000
},
{
"epoch": 1.8160515524311658,
"grad_norm": 15.58292293548584,
"learning_rate": 7.73429993885929e-05,
"loss": 1.6062,
"step": 3100
},
{
"epoch": 1.8746338605741066,
"grad_norm": 73.762451171875,
"learning_rate": 7.646956066031969e-05,
"loss": 1.6992,
"step": 3200
},
{
"epoch": 1.9332161687170475,
"grad_norm": 26.838842391967773,
"learning_rate": 7.559612193204648e-05,
"loss": 1.7229,
"step": 3300
},
{
"epoch": 1.9917984768599881,
"grad_norm": 22.824892044067383,
"learning_rate": 7.472268320377325e-05,
"loss": 1.7185,
"step": 3400
},
{
"epoch": 2.050380785002929,
"grad_norm": 13.238216400146484,
"learning_rate": 7.384924447550004e-05,
"loss": 1.4583,
"step": 3500
},
{
"epoch": 2.10896309314587,
"grad_norm": 32.13274383544922,
"learning_rate": 7.297580574722683e-05,
"loss": 1.3799,
"step": 3600
},
{
"epoch": 2.1675454012888107,
"grad_norm": 13.44641399383545,
"learning_rate": 7.210236701895362e-05,
"loss": 1.3713,
"step": 3700
},
{
"epoch": 2.2261277094317515,
"grad_norm": 19.05823516845703,
"learning_rate": 7.122892829068041e-05,
"loss": 1.4489,
"step": 3800
},
{
"epoch": 2.2847100175746924,
"grad_norm": 16.24590492248535,
"learning_rate": 7.03554895624072e-05,
"loss": 1.3858,
"step": 3900
},
{
"epoch": 2.3432923257176332,
"grad_norm": 14.049396514892578,
"learning_rate": 6.948205083413399e-05,
"loss": 1.4996,
"step": 4000
},
{
"epoch": 2.3432923257176332,
"eval_loss": 0.09011241793632507,
"eval_runtime": 149.4815,
"eval_samples_per_second": 3.345,
"eval_steps_per_second": 0.421,
"eval_wer": 0.20468611847922194,
"step": 4000
},
{
"epoch": 2.401874633860574,
"grad_norm": 16.26442527770996,
"learning_rate": 6.860861210586078e-05,
"loss": 1.4692,
"step": 4100
},
{
"epoch": 2.460456942003515,
"grad_norm": 10.367189407348633,
"learning_rate": 6.773517337758757e-05,
"loss": 1.3203,
"step": 4200
},
{
"epoch": 2.519039250146456,
"grad_norm": 21.82206153869629,
"learning_rate": 6.686173464931436e-05,
"loss": 1.5161,
"step": 4300
},
{
"epoch": 2.5776215582893967,
"grad_norm": 10.586897850036621,
"learning_rate": 6.598829592104115e-05,
"loss": 1.3843,
"step": 4400
},
{
"epoch": 2.6362038664323375,
"grad_norm": 18.108095169067383,
"learning_rate": 6.511485719276793e-05,
"loss": 1.446,
"step": 4500
},
{
"epoch": 2.6947861745752784,
"grad_norm": 17.231735229492188,
"learning_rate": 6.424141846449472e-05,
"loss": 1.4083,
"step": 4600
},
{
"epoch": 2.753368482718219,
"grad_norm": 8.833962440490723,
"learning_rate": 6.33679797362215e-05,
"loss": 1.4668,
"step": 4700
},
{
"epoch": 2.81195079086116,
"grad_norm": 14.335036277770996,
"learning_rate": 6.24945410079483e-05,
"loss": 1.3906,
"step": 4800
},
{
"epoch": 2.870533099004101,
"grad_norm": Infinity,
"learning_rate": 6.162110227967508e-05,
"loss": 1.4241,
"step": 4900
},
{
"epoch": 2.9291154071470418,
"grad_norm": 25.987884521484375,
"learning_rate": 6.075639793868461e-05,
"loss": 1.3942,
"step": 5000
},
{
"epoch": 2.9291154071470418,
"eval_loss": 0.0883156806230545,
"eval_runtime": 147.2451,
"eval_samples_per_second": 3.396,
"eval_steps_per_second": 0.428,
"eval_wer": 0.19510757441791923,
"step": 5000
},
{
"epoch": 2.9876977152899826,
"grad_norm": 17.336523056030273,
"learning_rate": 5.988295921041139e-05,
"loss": 1.4093,
"step": 5100
},
{
"epoch": 3.0462800234329235,
"grad_norm": 21.30254364013672,
"learning_rate": 5.900952048213818e-05,
"loss": 1.3729,
"step": 5200
},
{
"epoch": 3.104862331575864,
"grad_norm": 15.282761573791504,
"learning_rate": 5.813608175386497e-05,
"loss": 1.3091,
"step": 5300
},
{
"epoch": 3.1634446397188047,
"grad_norm": 11.485124588012695,
"learning_rate": 5.726264302559175e-05,
"loss": 1.1577,
"step": 5400
},
{
"epoch": 3.2220269478617456,
"grad_norm": 17.49385643005371,
"learning_rate": 5.638920429731854e-05,
"loss": 1.2737,
"step": 5500
},
{
"epoch": 3.2806092560046864,
"grad_norm": 19.74750518798828,
"learning_rate": 5.551576556904533e-05,
"loss": 1.1946,
"step": 5600
},
{
"epoch": 3.3391915641476273,
"grad_norm": 9.402506828308105,
"learning_rate": 5.4642326840772115e-05,
"loss": 1.2035,
"step": 5700
},
{
"epoch": 3.397773872290568,
"grad_norm": 13.279162406921387,
"learning_rate": 5.3768888112498904e-05,
"loss": 1.2891,
"step": 5800
},
{
"epoch": 3.456356180433509,
"grad_norm": 18.554702758789062,
"learning_rate": 5.28954493842257e-05,
"loss": 1.1315,
"step": 5900
},
{
"epoch": 3.51493848857645,
"grad_norm": 10.541516304016113,
"learning_rate": 5.202201065595249e-05,
"loss": 1.2285,
"step": 6000
},
{
"epoch": 3.51493848857645,
"eval_loss": 0.08758817613124847,
"eval_runtime": 148.7151,
"eval_samples_per_second": 3.362,
"eval_steps_per_second": 0.424,
"eval_wer": 0.1956970232832302,
"step": 6000
},
{
"epoch": 3.5735207967193907,
"grad_norm": 17.15789031982422,
"learning_rate": 5.114857192767928e-05,
"loss": 1.278,
"step": 6100
},
{
"epoch": 3.6321031048623316,
"grad_norm": 13.666048049926758,
"learning_rate": 5.027513319940606e-05,
"loss": 1.164,
"step": 6200
},
{
"epoch": 3.6906854130052724,
"grad_norm": 14.286330223083496,
"learning_rate": 4.940169447113285e-05,
"loss": 1.2317,
"step": 6300
},
{
"epoch": 3.7492677211482133,
"grad_norm": 19.070871353149414,
"learning_rate": 4.852825574285964e-05,
"loss": 1.131,
"step": 6400
},
{
"epoch": 3.807850029291154,
"grad_norm": 13.184505462646484,
"learning_rate": 4.7654817014586425e-05,
"loss": 1.2665,
"step": 6500
},
{
"epoch": 3.866432337434095,
"grad_norm": 16.988956451416016,
"learning_rate": 4.6781378286313215e-05,
"loss": 1.2145,
"step": 6600
},
{
"epoch": 3.925014645577036,
"grad_norm": 14.714631080627441,
"learning_rate": 4.590793955804001e-05,
"loss": 1.2228,
"step": 6700
},
{
"epoch": 3.9835969537199767,
"grad_norm": 15.682711601257324,
"learning_rate": 4.5034500829766794e-05,
"loss": 1.2137,
"step": 6800
},
{
"epoch": 4.042179261862917,
"grad_norm": 14.943408012390137,
"learning_rate": 4.4161062101493584e-05,
"loss": 1.2088,
"step": 6900
},
{
"epoch": 4.100761570005858,
"grad_norm": 9.292410850524902,
"learning_rate": 4.3287623373220374e-05,
"loss": 1.0637,
"step": 7000
},
{
"epoch": 4.100761570005858,
"eval_loss": 0.08726315200328827,
"eval_runtime": 149.7325,
"eval_samples_per_second": 3.339,
"eval_steps_per_second": 0.421,
"eval_wer": 0.19201296787503683,
"step": 7000
},
{
"epoch": 4.159343878148799,
"grad_norm": 9.017548561096191,
"learning_rate": 4.241418464494716e-05,
"loss": 1.0308,
"step": 7100
},
{
"epoch": 4.21792618629174,
"grad_norm": 16.15174102783203,
"learning_rate": 4.1540745916673946e-05,
"loss": 1.0464,
"step": 7200
},
{
"epoch": 4.2765084944346805,
"grad_norm": 7.939199447631836,
"learning_rate": 4.0667307188400736e-05,
"loss": 1.1491,
"step": 7300
},
{
"epoch": 4.335090802577621,
"grad_norm": 11.129817008972168,
"learning_rate": 3.9793868460127526e-05,
"loss": 1.0738,
"step": 7400
},
{
"epoch": 4.393673110720562,
"grad_norm": 8.50700855255127,
"learning_rate": 3.8920429731854315e-05,
"loss": 1.0828,
"step": 7500
},
{
"epoch": 4.452255418863503,
"grad_norm": 11.484732627868652,
"learning_rate": 3.80469910035811e-05,
"loss": 1.0418,
"step": 7600
},
{
"epoch": 4.510837727006444,
"grad_norm": 8.929669380187988,
"learning_rate": 3.717355227530789e-05,
"loss": 1.1051,
"step": 7700
},
{
"epoch": 4.569420035149385,
"grad_norm": 12.994172096252441,
"learning_rate": 3.630011354703468e-05,
"loss": 1.0193,
"step": 7800
},
{
"epoch": 4.628002343292326,
"grad_norm": 9.806758880615234,
"learning_rate": 3.542667481876147e-05,
"loss": 1.1699,
"step": 7900
},
{
"epoch": 4.6865846514352665,
"grad_norm": 10.537009239196777,
"learning_rate": 3.455323609048826e-05,
"loss": 1.1144,
"step": 8000
},
{
"epoch": 4.6865846514352665,
"eval_loss": 0.08649158477783203,
"eval_runtime": 148.2812,
"eval_samples_per_second": 3.372,
"eval_steps_per_second": 0.425,
"eval_wer": 0.1927497789566755,
"step": 8000
},
{
"epoch": 4.745166959578207,
"grad_norm": 30.78623390197754,
"learning_rate": 3.367979736221504e-05,
"loss": 1.0902,
"step": 8100
},
{
"epoch": 4.803749267721148,
"grad_norm": 9.64354419708252,
"learning_rate": 3.280635863394183e-05,
"loss": 0.9932,
"step": 8200
},
{
"epoch": 4.862331575864089,
"grad_norm": 11.149614334106445,
"learning_rate": 3.193291990566862e-05,
"loss": 1.0389,
"step": 8300
},
{
"epoch": 4.92091388400703,
"grad_norm": 10.836565971374512,
"learning_rate": 3.10594811773954e-05,
"loss": 1.0626,
"step": 8400
},
{
"epoch": 4.979496192149971,
"grad_norm": 11.348654747009277,
"learning_rate": 3.01860424491222e-05,
"loss": 1.0934,
"step": 8500
},
{
"epoch": 5.038078500292912,
"grad_norm": 8.341979026794434,
"learning_rate": 2.9312603720848985e-05,
"loss": 1.0845,
"step": 8600
},
{
"epoch": 5.0966608084358525,
"grad_norm": 9.784319877624512,
"learning_rate": 2.8439164992575774e-05,
"loss": 0.96,
"step": 8700
},
{
"epoch": 5.155243116578793,
"grad_norm": 11.3285551071167,
"learning_rate": 2.756572626430256e-05,
"loss": 0.9211,
"step": 8800
},
{
"epoch": 5.213825424721734,
"grad_norm": 8.36048698425293,
"learning_rate": 2.6692287536029347e-05,
"loss": 0.9652,
"step": 8900
},
{
"epoch": 5.272407732864675,
"grad_norm": 8.087980270385742,
"learning_rate": 2.5818848807756137e-05,
"loss": 1.0164,
"step": 9000
},
{
"epoch": 5.272407732864675,
"eval_loss": 0.08577600121498108,
"eval_runtime": 149.9869,
"eval_samples_per_second": 3.334,
"eval_steps_per_second": 0.42,
"eval_wer": 0.19230769230769232,
"step": 9000
},
{
"epoch": 5.330990041007616,
"grad_norm": 14.56843090057373,
"learning_rate": 2.4954144466765657e-05,
"loss": 0.9776,
"step": 9100
},
{
"epoch": 5.389572349150557,
"grad_norm": 10.220062255859375,
"learning_rate": 2.4080705738492446e-05,
"loss": 0.9681,
"step": 9200
},
{
"epoch": 5.448154657293498,
"grad_norm": 11.614462852478027,
"learning_rate": 2.3207267010219233e-05,
"loss": 0.9691,
"step": 9300
},
{
"epoch": 5.506736965436438,
"grad_norm": 6.580599784851074,
"learning_rate": 2.2333828281946022e-05,
"loss": 0.9492,
"step": 9400
},
{
"epoch": 5.565319273579379,
"grad_norm": 10.284950256347656,
"learning_rate": 2.1460389553672812e-05,
"loss": 1.0092,
"step": 9500
},
{
"epoch": 5.62390158172232,
"grad_norm": 10.794511795043945,
"learning_rate": 2.0586950825399598e-05,
"loss": 0.9039,
"step": 9600
},
{
"epoch": 5.682483889865261,
"grad_norm": 12.07039737701416,
"learning_rate": 1.9713512097126388e-05,
"loss": 1.0114,
"step": 9700
},
{
"epoch": 5.741066198008202,
"grad_norm": 15.622093200683594,
"learning_rate": 1.8840073368853174e-05,
"loss": 0.9762,
"step": 9800
},
{
"epoch": 5.799648506151143,
"grad_norm": 7.460862636566162,
"learning_rate": 1.7966634640579964e-05,
"loss": 0.974,
"step": 9900
},
{
"epoch": 5.8582308142940835,
"grad_norm": 9.551807403564453,
"learning_rate": 1.7093195912306753e-05,
"loss": 0.9812,
"step": 10000
},
{
"epoch": 5.8582308142940835,
"eval_loss": 0.08563477545976639,
"eval_runtime": 148.6427,
"eval_samples_per_second": 3.364,
"eval_steps_per_second": 0.424,
"eval_wer": 0.1940760389036251,
"step": 10000
},
{
"epoch": 5.916813122437024,
"grad_norm": 10.54980754852295,
"learning_rate": 1.621975718403354e-05,
"loss": 0.9981,
"step": 10100
},
{
"epoch": 5.975395430579965,
"grad_norm": 8.396002769470215,
"learning_rate": 1.534631845576033e-05,
"loss": 0.911,
"step": 10200
},
{
"epoch": 6.033977738722906,
"grad_norm": 6.294841289520264,
"learning_rate": 1.4472879727487117e-05,
"loss": 0.8886,
"step": 10300
},
{
"epoch": 6.092560046865847,
"grad_norm": 10.276989936828613,
"learning_rate": 1.3599440999213905e-05,
"loss": 0.8764,
"step": 10400
},
{
"epoch": 6.151142355008787,
"grad_norm": 9.27648639678955,
"learning_rate": 1.2726002270940693e-05,
"loss": 0.8728,
"step": 10500
},
{
"epoch": 6.209724663151728,
"grad_norm": 10.15081787109375,
"learning_rate": 1.1852563542667483e-05,
"loss": 0.9008,
"step": 10600
},
{
"epoch": 6.268306971294669,
"grad_norm": 7.828310012817383,
"learning_rate": 1.0979124814394271e-05,
"loss": 0.8703,
"step": 10700
},
{
"epoch": 6.3268892794376095,
"grad_norm": 8.747062683105469,
"learning_rate": 1.0105686086121059e-05,
"loss": 0.9343,
"step": 10800
},
{
"epoch": 6.38547158758055,
"grad_norm": 7.199453830718994,
"learning_rate": 9.232247357847849e-06,
"loss": 0.9144,
"step": 10900
},
{
"epoch": 6.444053895723491,
"grad_norm": 7.865077972412109,
"learning_rate": 8.358808629574635e-06,
"loss": 0.8927,
"step": 11000
},
{
"epoch": 6.444053895723491,
"eval_loss": 0.08493725210428238,
"eval_runtime": 152.514,
"eval_samples_per_second": 3.278,
"eval_steps_per_second": 0.413,
"eval_wer": 0.20173887415266725,
"step": 11000
},
{
"epoch": 6.502636203866432,
"grad_norm": 8.560958862304688,
"learning_rate": 7.485369901301424e-06,
"loss": 0.8994,
"step": 11100
},
{
"epoch": 6.561218512009373,
"grad_norm": 8.679183006286621,
"learning_rate": 6.611931173028213e-06,
"loss": 0.9121,
"step": 11200
},
{
"epoch": 6.619800820152314,
"grad_norm": 7.480246067047119,
"learning_rate": 5.7384924447550014e-06,
"loss": 0.9015,
"step": 11300
},
{
"epoch": 6.678383128295255,
"grad_norm": 8.197961807250977,
"learning_rate": 4.865053716481789e-06,
"loss": 0.8958,
"step": 11400
},
{
"epoch": 6.7369654364381955,
"grad_norm": 9.989555358886719,
"learning_rate": 3.991614988208577e-06,
"loss": 0.8816,
"step": 11500
},
{
"epoch": 6.795547744581136,
"grad_norm": 11.78941535949707,
"learning_rate": 3.118176259935366e-06,
"loss": 0.8848,
"step": 11600
},
{
"epoch": 6.854130052724077,
"grad_norm": 8.92465591430664,
"learning_rate": 2.244737531662154e-06,
"loss": 0.8758,
"step": 11700
},
{
"epoch": 6.912712360867018,
"grad_norm": 5.927036762237549,
"learning_rate": 1.3712988033889424e-06,
"loss": 0.8421,
"step": 11800
},
{
"epoch": 6.971294669009959,
"grad_norm": 6.0556960105896,
"learning_rate": 4.978600751157307e-07,
"loss": 0.8566,
"step": 11900
},
{
"epoch": 7.0298769771529,
"grad_norm": 8.540233612060547,
"learning_rate": 3.064574532287266e-05,
"loss": 0.8936,
"step": 12000
},
{
"epoch": 7.0298769771529,
"eval_loss": 0.08440528064966202,
"eval_runtime": 151.0062,
"eval_samples_per_second": 3.311,
"eval_steps_per_second": 0.417,
"eval_wer": 0.19613910993221337,
"step": 12000
},
{
"epoch": 7.088459285295841,
"grad_norm": 8.703925132751465,
"learning_rate": 3.0042245021122513e-05,
"loss": 0.9237,
"step": 12100
},
{
"epoch": 7.147041593438781,
"grad_norm": 11.286332130432129,
"learning_rate": 2.9438744719372363e-05,
"loss": 0.917,
"step": 12200
},
{
"epoch": 7.205623901581722,
"grad_norm": 8.638763427734375,
"learning_rate": 2.884127942063971e-05,
"loss": 0.88,
"step": 12300
},
{
"epoch": 7.264206209724663,
"grad_norm": 9.697504043579102,
"learning_rate": 2.8237779118889563e-05,
"loss": 0.9342,
"step": 12400
},
{
"epoch": 7.322788517867604,
"grad_norm": 8.06286334991455,
"learning_rate": 2.7634278817139407e-05,
"loss": 0.8979,
"step": 12500
},
{
"epoch": 7.381370826010545,
"grad_norm": 15.282951354980469,
"learning_rate": 2.703077851538926e-05,
"loss": 0.9201,
"step": 12600
},
{
"epoch": 7.439953134153486,
"grad_norm": 11.369593620300293,
"learning_rate": 2.6427278213639107e-05,
"loss": 0.9538,
"step": 12700
},
{
"epoch": 7.4985354422964265,
"grad_norm": 10.077091217041016,
"learning_rate": 2.5823777911888958e-05,
"loss": 0.9807,
"step": 12800
},
{
"epoch": 7.557117750439367,
"grad_norm": 9.917128562927246,
"learning_rate": 2.5220277610138805e-05,
"loss": 0.939,
"step": 12900
},
{
"epoch": 7.615700058582308,
"grad_norm": 10.68909740447998,
"learning_rate": 2.4616777308388655e-05,
"loss": 0.8718,
"step": 13000
},
{
"epoch": 7.615700058582308,
"eval_loss": 0.08539500832557678,
"eval_runtime": 149.7688,
"eval_samples_per_second": 3.338,
"eval_steps_per_second": 0.421,
"eval_wer": 0.19790745652814618,
"step": 13000
},
{
"epoch": 7.674282366725249,
"grad_norm": 8.260842323303223,
"learning_rate": 2.4013277006638505e-05,
"loss": 0.9048,
"step": 13100
},
{
"epoch": 7.73286467486819,
"grad_norm": 16.34197235107422,
"learning_rate": 2.3409776704888352e-05,
"loss": 0.9262,
"step": 13200
},
{
"epoch": 7.791446983011131,
"grad_norm": 8.024565696716309,
"learning_rate": 2.2806276403138202e-05,
"loss": 0.9952,
"step": 13300
},
{
"epoch": 7.850029291154072,
"grad_norm": 7.884005069732666,
"learning_rate": 2.220277610138805e-05,
"loss": 0.9656,
"step": 13400
},
{
"epoch": 7.9086115992970125,
"grad_norm": 8.73161506652832,
"learning_rate": 2.15992757996379e-05,
"loss": 0.9205,
"step": 13500
},
{
"epoch": 7.967193907439953,
"grad_norm": 9.00133991241455,
"learning_rate": 2.099577549788775e-05,
"loss": 0.9347,
"step": 13600
},
{
"epoch": 8.025776215582894,
"grad_norm": 6.850646495819092,
"learning_rate": 2.03922751961376e-05,
"loss": 0.8468,
"step": 13700
},
{
"epoch": 8.084358523725834,
"grad_norm": 11.7725248336792,
"learning_rate": 1.978877489438745e-05,
"loss": 0.8948,
"step": 13800
},
{
"epoch": 8.142940831868776,
"grad_norm": 6.4703474044799805,
"learning_rate": 1.9185274592637298e-05,
"loss": 0.8727,
"step": 13900
},
{
"epoch": 8.201523140011716,
"grad_norm": 15.587645530700684,
"learning_rate": 1.8581774290887148e-05,
"loss": 0.9019,
"step": 14000
},
{
"epoch": 8.201523140011716,
"eval_loss": 0.08469171822071075,
"eval_runtime": 147.0496,
"eval_samples_per_second": 3.4,
"eval_steps_per_second": 0.428,
"eval_wer": 0.18538166814028884,
"step": 14000
},
{
"epoch": 8.260105448154658,
"grad_norm": 7.585418701171875,
"learning_rate": 1.7978273989136995e-05,
"loss": 0.8818,
"step": 14100
},
{
"epoch": 8.318687756297598,
"grad_norm": 9.436836242675781,
"learning_rate": 1.7374773687386845e-05,
"loss": 0.8864,
"step": 14200
},
{
"epoch": 8.37727006444054,
"grad_norm": 12.12936019897461,
"learning_rate": 1.6771273385636692e-05,
"loss": 0.8744,
"step": 14300
},
{
"epoch": 8.43585237258348,
"grad_norm": 11.584985733032227,
"learning_rate": 1.6167773083886542e-05,
"loss": 0.8542,
"step": 14400
},
{
"epoch": 8.494434680726421,
"grad_norm": 7.6883440017700195,
"learning_rate": 1.5564272782136393e-05,
"loss": 0.8714,
"step": 14500
},
{
"epoch": 8.553016988869361,
"grad_norm": 13.686609268188477,
"learning_rate": 1.496077248038624e-05,
"loss": 0.8726,
"step": 14600
},
{
"epoch": 8.611599297012303,
"grad_norm": 10.200602531433105,
"learning_rate": 1.436330718165359e-05,
"loss": 0.8839,
"step": 14700
},
{
"epoch": 8.670181605155243,
"grad_norm": 6.929018020629883,
"learning_rate": 1.3759806879903441e-05,
"loss": 0.8628,
"step": 14800
},
{
"epoch": 8.728763913298184,
"grad_norm": 9.72988224029541,
"learning_rate": 1.3156306578153291e-05,
"loss": 0.8672,
"step": 14900
},
{
"epoch": 8.787346221441124,
"grad_norm": 7.273561477661133,
"learning_rate": 1.255280627640314e-05,
"loss": 0.8293,
"step": 15000
},
{
"epoch": 8.787346221441124,
"eval_loss": 0.08473628014326096,
"eval_runtime": 147.7929,
"eval_samples_per_second": 3.383,
"eval_steps_per_second": 0.426,
"eval_wer": 0.19834954317712938,
"step": 15000
},
{
"epoch": 8.845928529584066,
"grad_norm": 6.900521278381348,
"learning_rate": 1.1949305974652989e-05,
"loss": 0.8797,
"step": 15100
},
{
"epoch": 8.904510837727006,
"grad_norm": 13.14035701751709,
"learning_rate": 1.1345805672902837e-05,
"loss": 0.8691,
"step": 15200
},
{
"epoch": 8.963093145869948,
"grad_norm": 6.80872106552124,
"learning_rate": 1.0742305371152686e-05,
"loss": 0.859,
"step": 15300
},
{
"epoch": 9.021675454012888,
"grad_norm": 5.7985520362854,
"learning_rate": 1.0138805069402535e-05,
"loss": 0.8905,
"step": 15400
},
{
"epoch": 9.08025776215583,
"grad_norm": 7.384444236755371,
"learning_rate": 9.535304767652383e-06,
"loss": 0.7981,
"step": 15500
},
{
"epoch": 9.13884007029877,
"grad_norm": 6.441751956939697,
"learning_rate": 8.931804465902233e-06,
"loss": 0.9026,
"step": 15600
},
{
"epoch": 9.197422378441711,
"grad_norm": 5.471485614776611,
"learning_rate": 8.328304164152082e-06,
"loss": 0.8357,
"step": 15700
},
{
"epoch": 9.256004686584651,
"grad_norm": 6.093921661376953,
"learning_rate": 7.724803862401932e-06,
"loss": 0.8427,
"step": 15800
},
{
"epoch": 9.314586994727593,
"grad_norm": 5.414072036743164,
"learning_rate": 7.121303560651781e-06,
"loss": 0.8235,
"step": 15900
},
{
"epoch": 9.373169302870533,
"grad_norm": 5.2771897315979,
"learning_rate": 6.5178032589016296e-06,
"loss": 0.8363,
"step": 16000
},
{
"epoch": 9.373169302870533,
"eval_loss": 0.08421996235847473,
"eval_runtime": 150.6901,
"eval_samples_per_second": 3.318,
"eval_steps_per_second": 0.418,
"eval_wer": 0.19820218096080164,
"step": 16000
},
{
"epoch": 9.431751611013475,
"grad_norm": 5.505492687225342,
"learning_rate": 5.914302957151479e-06,
"loss": 0.7991,
"step": 16100
},
{
"epoch": 9.490333919156415,
"grad_norm": 6.690750598907471,
"learning_rate": 5.310802655401328e-06,
"loss": 0.8071,
"step": 16200
},
{
"epoch": 9.548916227299356,
"grad_norm": 6.654877185821533,
"learning_rate": 4.707302353651177e-06,
"loss": 0.834,
"step": 16300
},
{
"epoch": 9.607498535442296,
"grad_norm": 9.937077522277832,
"learning_rate": 4.1038020519010266e-06,
"loss": 0.8055,
"step": 16400
},
{
"epoch": 9.666080843585238,
"grad_norm": 6.015642166137695,
"learning_rate": 3.500301750150875e-06,
"loss": 0.8734,
"step": 16500
},
{
"epoch": 9.724663151728178,
"grad_norm": 12.434464454650879,
"learning_rate": 2.896801448400724e-06,
"loss": 0.8544,
"step": 16600
},
{
"epoch": 9.783245459871118,
"grad_norm": 6.330708980560303,
"learning_rate": 2.2933011466505732e-06,
"loss": 0.798,
"step": 16700
},
{
"epoch": 9.84182776801406,
"grad_norm": 5.820682048797607,
"learning_rate": 1.6898008449004227e-06,
"loss": 0.7989,
"step": 16800
},
{
"epoch": 9.900410076157002,
"grad_norm": 8.209725379943848,
"learning_rate": 1.0863005431502715e-06,
"loss": 0.8454,
"step": 16900
},
{
"epoch": 9.958992384299941,
"grad_norm": 10.676623344421387,
"learning_rate": 4.828002414001208e-07,
"loss": 0.8034,
"step": 17000
},
{
"epoch": 9.958992384299941,
"eval_loss": 0.08400186896324158,
"eval_runtime": 148.9273,
"eval_samples_per_second": 3.357,
"eval_steps_per_second": 0.423,
"eval_wer": 0.197465369879163,
"step": 17000
},
{
"epoch": 10.017574692442881,
"grad_norm": 8.057076454162598,
"learning_rate": 3.8124720274503957e-05,
"loss": 0.8553,
"step": 17100
},
{
"epoch": 10.076157000585823,
"grad_norm": 6.9262895584106445,
"learning_rate": 3.7751752946441895e-05,
"loss": 0.8579,
"step": 17200
},
{
"epoch": 10.134739308728763,
"grad_norm": 7.844761371612549,
"learning_rate": 3.737878561837983e-05,
"loss": 0.8764,
"step": 17300
},
{
"epoch": 10.193321616871705,
"grad_norm": 17.013307571411133,
"learning_rate": 3.700954796359839e-05,
"loss": 0.9679,
"step": 17400
},
{
"epoch": 10.251903925014645,
"grad_norm": 10.550113677978516,
"learning_rate": 3.663658063553633e-05,
"loss": 0.8919,
"step": 17500
},
{
"epoch": 10.310486233157587,
"grad_norm": 7.075886249542236,
"learning_rate": 3.6263613307474266e-05,
"loss": 0.9601,
"step": 17600
},
{
"epoch": 10.369068541300527,
"grad_norm": 10.027517318725586,
"learning_rate": 3.5890645979412204e-05,
"loss": 0.8596,
"step": 17700
},
{
"epoch": 10.427650849443468,
"grad_norm": 5.811990261077881,
"learning_rate": 3.551767865135014e-05,
"loss": 0.9023,
"step": 17800
},
{
"epoch": 10.486233157586408,
"grad_norm": 12.586297988891602,
"learning_rate": 3.514471132328808e-05,
"loss": 0.93,
"step": 17900
},
{
"epoch": 10.54481546572935,
"grad_norm": 9.313389778137207,
"learning_rate": 3.477174399522602e-05,
"loss": 0.8462,
"step": 18000
},
{
"epoch": 10.54481546572935,
"eval_loss": 0.08548491448163986,
"eval_runtime": 156.5497,
"eval_samples_per_second": 3.194,
"eval_steps_per_second": 0.402,
"eval_wer": 0.19525493663424698,
"step": 18000
},
{
"epoch": 10.60339777387229,
"grad_norm": 9.245261192321777,
"learning_rate": 3.439877666716396e-05,
"loss": 0.9351,
"step": 18100
},
{
"epoch": 10.661980082015232,
"grad_norm": 7.513446807861328,
"learning_rate": 3.4025809339101895e-05,
"loss": 0.8722,
"step": 18200
},
{
"epoch": 10.720562390158172,
"grad_norm": 12.897968292236328,
"learning_rate": 3.365284201103984e-05,
"loss": 0.9576,
"step": 18300
},
{
"epoch": 10.779144698301113,
"grad_norm": 9.550192832946777,
"learning_rate": 3.327987468297777e-05,
"loss": 0.9241,
"step": 18400
},
{
"epoch": 10.837727006444053,
"grad_norm": 6.644899368286133,
"learning_rate": 3.290690735491571e-05,
"loss": 0.9291,
"step": 18500
},
{
"epoch": 10.896309314586995,
"grad_norm": 9.285797119140625,
"learning_rate": 3.253394002685365e-05,
"loss": 0.8971,
"step": 18600
},
{
"epoch": 10.954891622729935,
"grad_norm": 8.316353797912598,
"learning_rate": 3.2160972698791586e-05,
"loss": 0.8892,
"step": 18700
},
{
"epoch": 11.013473930872877,
"grad_norm": 17.100173950195312,
"learning_rate": 3.1788005370729524e-05,
"loss": 0.8732,
"step": 18800
},
{
"epoch": 11.072056239015817,
"grad_norm": 19.12342643737793,
"learning_rate": 3.141503804266747e-05,
"loss": 0.8663,
"step": 18900
},
{
"epoch": 11.130638547158759,
"grad_norm": 7.627189636230469,
"learning_rate": 3.10420707146054e-05,
"loss": 0.8824,
"step": 19000
},
{
"epoch": 11.130638547158759,
"eval_loss": 0.08482780307531357,
"eval_runtime": 146.6345,
"eval_samples_per_second": 3.41,
"eval_steps_per_second": 0.43,
"eval_wer": 0.19304450338933096,
"step": 19000
},
{
"epoch": 11.189220855301699,
"grad_norm": 7.1925950050354,
"learning_rate": 3.066910338654334e-05,
"loss": 0.8375,
"step": 19100
},
{
"epoch": 11.24780316344464,
"grad_norm": 8.55908489227295,
"learning_rate": 3.0296136058481277e-05,
"loss": 0.8335,
"step": 19200
},
{
"epoch": 11.30638547158758,
"grad_norm": 15.089740753173828,
"learning_rate": 2.9923168730419215e-05,
"loss": 0.9117,
"step": 19300
},
{
"epoch": 11.364967779730522,
"grad_norm": 8.451448440551758,
"learning_rate": 2.9550201402357153e-05,
"loss": 0.8586,
"step": 19400
},
{
"epoch": 11.423550087873462,
"grad_norm": 7.994997501373291,
"learning_rate": 2.9177234074295095e-05,
"loss": 0.8459,
"step": 19500
},
{
"epoch": 11.482132396016404,
"grad_norm": 8.656350135803223,
"learning_rate": 2.8804266746233033e-05,
"loss": 0.9268,
"step": 19600
},
{
"epoch": 11.540714704159344,
"grad_norm": 10.245903968811035,
"learning_rate": 2.843129941817097e-05,
"loss": 0.8657,
"step": 19700
},
{
"epoch": 11.599297012302285,
"grad_norm": 8.758448600769043,
"learning_rate": 2.806206176338953e-05,
"loss": 0.8875,
"step": 19800
},
{
"epoch": 11.657879320445225,
"grad_norm": 6.345497131347656,
"learning_rate": 2.7689094435327466e-05,
"loss": 0.9115,
"step": 19900
},
{
"epoch": 11.716461628588167,
"grad_norm": 9.602129936218262,
"learning_rate": 2.7316127107265404e-05,
"loss": 0.8591,
"step": 20000
},
{
"epoch": 11.716461628588167,
"eval_loss": 0.0848940759897232,
"eval_runtime": 147.8483,
"eval_samples_per_second": 3.382,
"eval_steps_per_second": 0.426,
"eval_wer": 0.18376068376068377,
"step": 20000
},
{
"epoch": 11.775043936731107,
"grad_norm": 10.147231101989746,
"learning_rate": 2.6943159779203343e-05,
"loss": 0.8628,
"step": 20100
},
{
"epoch": 11.833626244874049,
"grad_norm": 7.136846542358398,
"learning_rate": 2.657019245114128e-05,
"loss": 0.9034,
"step": 20200
},
{
"epoch": 11.892208553016989,
"grad_norm": 11.765522003173828,
"learning_rate": 2.6197225123079222e-05,
"loss": 0.8635,
"step": 20300
},
{
"epoch": 11.95079086115993,
"grad_norm": 7.662530422210693,
"learning_rate": 2.582425779501716e-05,
"loss": 0.8561,
"step": 20400
},
{
"epoch": 12.00937316930287,
"grad_norm": 10.559505462646484,
"learning_rate": 2.5451290466955095e-05,
"loss": 0.8319,
"step": 20500
},
{
"epoch": 12.067955477445812,
"grad_norm": 6.208855152130127,
"learning_rate": 2.5078323138893034e-05,
"loss": 0.7679,
"step": 20600
},
{
"epoch": 12.126537785588752,
"grad_norm": 10.877766609191895,
"learning_rate": 2.4705355810830972e-05,
"loss": 0.8484,
"step": 20700
},
{
"epoch": 12.185120093731694,
"grad_norm": 8.098186492919922,
"learning_rate": 2.433238848276891e-05,
"loss": 0.8644,
"step": 20800
},
{
"epoch": 12.243702401874634,
"grad_norm": 7.471461772918701,
"learning_rate": 2.3959421154706848e-05,
"loss": 0.8372,
"step": 20900
},
{
"epoch": 12.302284710017574,
"grad_norm": 10.694164276123047,
"learning_rate": 2.3586453826644786e-05,
"loss": 0.8339,
"step": 21000
},
{
"epoch": 12.302284710017574,
"eval_loss": 0.08417258411645889,
"eval_runtime": 146.3017,
"eval_samples_per_second": 3.418,
"eval_steps_per_second": 0.431,
"eval_wer": 0.18626584143825523,
"step": 21000
},
{
"epoch": 12.360867018160516,
"grad_norm": 8.07205581665039,
"learning_rate": 2.3213486498582724e-05,
"loss": 0.7901,
"step": 21100
},
{
"epoch": 12.419449326303456,
"grad_norm": 10.554586410522461,
"learning_rate": 2.2840519170520663e-05,
"loss": 0.8117,
"step": 21200
},
{
"epoch": 12.478031634446397,
"grad_norm": 9.55418872833252,
"learning_rate": 2.24675518424586e-05,
"loss": 0.7946,
"step": 21300
},
{
"epoch": 12.536613942589337,
"grad_norm": 8.738641738891602,
"learning_rate": 2.2094584514396542e-05,
"loss": 0.8026,
"step": 21400
},
{
"epoch": 12.595196250732279,
"grad_norm": 9.077950477600098,
"learning_rate": 2.1721617186334477e-05,
"loss": 0.8102,
"step": 21500
},
{
"epoch": 12.653778558875219,
"grad_norm": 7.797760009765625,
"learning_rate": 2.1348649858272415e-05,
"loss": 0.8608,
"step": 21600
},
{
"epoch": 12.71236086701816,
"grad_norm": 7.361778736114502,
"learning_rate": 2.0975682530210357e-05,
"loss": 0.8448,
"step": 21700
},
{
"epoch": 12.7709431751611,
"grad_norm": 5.896770477294922,
"learning_rate": 2.0602715202148292e-05,
"loss": 0.8394,
"step": 21800
},
{
"epoch": 12.829525483304042,
"grad_norm": 11.283666610717773,
"learning_rate": 2.0233477547366852e-05,
"loss": 0.8326,
"step": 21900
},
{
"epoch": 12.888107791446982,
"grad_norm": 8.800780296325684,
"learning_rate": 1.986051021930479e-05,
"loss": 0.8573,
"step": 22000
},
{
"epoch": 12.888107791446982,
"eval_loss": 0.08360177278518677,
"eval_runtime": 148.5908,
"eval_samples_per_second": 3.365,
"eval_steps_per_second": 0.424,
"eval_wer": 0.19260241674034778,
"step": 22000
},
{
"epoch": 12.946690099589924,
"grad_norm": 7.05850887298584,
"learning_rate": 1.948754289124273e-05,
"loss": 0.7926,
"step": 22100
},
{
"epoch": 13.005272407732864,
"grad_norm": 7.339128017425537,
"learning_rate": 1.9114575563180667e-05,
"loss": 0.8611,
"step": 22200
},
{
"epoch": 13.063854715875806,
"grad_norm": 7.789575576782227,
"learning_rate": 1.8741608235118605e-05,
"loss": 0.8006,
"step": 22300
},
{
"epoch": 13.122437024018746,
"grad_norm": 10.398506164550781,
"learning_rate": 1.8368640907056543e-05,
"loss": 0.8397,
"step": 22400
},
{
"epoch": 13.181019332161688,
"grad_norm": 8.361679077148438,
"learning_rate": 1.799567357899448e-05,
"loss": 0.8027,
"step": 22500
},
{
"epoch": 13.239601640304627,
"grad_norm": 8.431894302368164,
"learning_rate": 1.762270625093242e-05,
"loss": 0.824,
"step": 22600
},
{
"epoch": 13.29818394844757,
"grad_norm": 5.742968559265137,
"learning_rate": 1.7249738922870357e-05,
"loss": 0.792,
"step": 22700
},
{
"epoch": 13.35676625659051,
"grad_norm": 13.589301109313965,
"learning_rate": 1.6876771594808296e-05,
"loss": 0.7701,
"step": 22800
},
{
"epoch": 13.415348564733451,
"grad_norm": 6.760063171386719,
"learning_rate": 1.6503804266746234e-05,
"loss": 0.7756,
"step": 22900
},
{
"epoch": 13.473930872876391,
"grad_norm": 8.249403953552246,
"learning_rate": 1.6130836938684172e-05,
"loss": 0.7445,
"step": 23000
},
{
"epoch": 13.473930872876391,
"eval_loss": 0.08392482995986938,
"eval_runtime": 146.723,
"eval_samples_per_second": 3.408,
"eval_steps_per_second": 0.429,
"eval_wer": 0.18420277040966695,
"step": 23000
},
{
"epoch": 13.532513181019333,
"grad_norm": 6.522210597991943,
"learning_rate": 1.575786961062211e-05,
"loss": 0.7743,
"step": 23100
},
{
"epoch": 13.591095489162273,
"grad_norm": 6.639892101287842,
"learning_rate": 1.538490228256005e-05,
"loss": 0.8147,
"step": 23200
},
{
"epoch": 13.649677797305214,
"grad_norm": 6.0167999267578125,
"learning_rate": 1.5011934954497987e-05,
"loss": 0.781,
"step": 23300
},
{
"epoch": 13.708260105448154,
"grad_norm": 9.794026374816895,
"learning_rate": 1.4638967626435926e-05,
"loss": 0.771,
"step": 23400
},
{
"epoch": 13.766842413591096,
"grad_norm": 8.01543140411377,
"learning_rate": 1.4266000298373863e-05,
"loss": 0.7812,
"step": 23500
},
{
"epoch": 13.825424721734036,
"grad_norm": 10.331818580627441,
"learning_rate": 1.3893032970311801e-05,
"loss": 0.7864,
"step": 23600
},
{
"epoch": 13.884007029876978,
"grad_norm": 8.055398941040039,
"learning_rate": 1.3520065642249741e-05,
"loss": 0.7991,
"step": 23700
},
{
"epoch": 13.942589338019918,
"grad_norm": 8.330449104309082,
"learning_rate": 1.3147098314187677e-05,
"loss": 0.8088,
"step": 23800
},
{
"epoch": 14.00117164616286,
"grad_norm": 6.869382381439209,
"learning_rate": 1.2774130986125616e-05,
"loss": 0.76,
"step": 23900
},
{
"epoch": 14.0597539543058,
"grad_norm": 6.649117469787598,
"learning_rate": 1.2401163658063554e-05,
"loss": 0.783,
"step": 24000
},
{
"epoch": 14.0597539543058,
"eval_loss": 0.08357907831668854,
"eval_runtime": 147.3445,
"eval_samples_per_second": 3.393,
"eval_steps_per_second": 0.428,
"eval_wer": 0.18420277040966695,
"step": 24000
},
{
"epoch": 14.118336262448741,
"grad_norm": 8.647706031799316,
"learning_rate": 1.2028196330001492e-05,
"loss": 0.7496,
"step": 24100
},
{
"epoch": 14.176918570591681,
"grad_norm": 8.944561004638672,
"learning_rate": 1.165522900193943e-05,
"loss": 0.7662,
"step": 24200
},
{
"epoch": 14.235500878734623,
"grad_norm": 8.365220069885254,
"learning_rate": 1.1282261673877368e-05,
"loss": 0.7645,
"step": 24300
},
{
"epoch": 14.294083186877563,
"grad_norm": 9.97271728515625,
"learning_rate": 1.0909294345815308e-05,
"loss": 0.74,
"step": 24400
},
{
"epoch": 14.352665495020505,
"grad_norm": 5.968284606933594,
"learning_rate": 1.0536327017753245e-05,
"loss": 0.7875,
"step": 24500
},
{
"epoch": 14.411247803163445,
"grad_norm": 6.4041008949279785,
"learning_rate": 1.0163359689691183e-05,
"loss": 0.7557,
"step": 24600
},
{
"epoch": 14.469830111306386,
"grad_norm": 6.7281036376953125,
"learning_rate": 9.790392361629123e-06,
"loss": 0.7478,
"step": 24700
},
{
"epoch": 14.528412419449326,
"grad_norm": 9.13178539276123,
"learning_rate": 9.41742503356706e-06,
"loss": 0.7528,
"step": 24800
},
{
"epoch": 14.586994727592266,
"grad_norm": 7.79683780670166,
"learning_rate": 9.044457705504997e-06,
"loss": 0.805,
"step": 24900
},
{
"epoch": 14.645577035735208,
"grad_norm": 6.039112567901611,
"learning_rate": 8.671490377442937e-06,
"loss": 0.7263,
"step": 25000
},
{
"epoch": 14.645577035735208,
"eval_loss": 0.08391948789358139,
"eval_runtime": 147.8059,
"eval_samples_per_second": 3.383,
"eval_steps_per_second": 0.426,
"eval_wer": 0.18243442381373415,
"step": 25000
},
{
"epoch": 14.70415934387815,
"grad_norm": 7.50616979598999,
"learning_rate": 8.298523049380874e-06,
"loss": 0.7722,
"step": 25100
},
{
"epoch": 14.76274165202109,
"grad_norm": 6.403426647186279,
"learning_rate": 7.925555721318812e-06,
"loss": 0.7742,
"step": 25200
},
{
"epoch": 14.82132396016403,
"grad_norm": 7.445984363555908,
"learning_rate": 7.556318066537371e-06,
"loss": 0.7505,
"step": 25300
},
{
"epoch": 14.879906268306971,
"grad_norm": 7.770444869995117,
"learning_rate": 7.1833507384753095e-06,
"loss": 0.7524,
"step": 25400
},
{
"epoch": 14.938488576449911,
"grad_norm": 6.477992057800293,
"learning_rate": 6.8103834104132485e-06,
"loss": 0.7477,
"step": 25500
},
{
"epoch": 14.997070884592853,
"grad_norm": 10.539923667907715,
"learning_rate": 6.437416082351186e-06,
"loss": 0.7285,
"step": 25600
},
{
"epoch": 15.055653192735793,
"grad_norm": 6.943923473358154,
"learning_rate": 6.064448754289125e-06,
"loss": 0.6959,
"step": 25700
},
{
"epoch": 15.114235500878735,
"grad_norm": 4.48841667175293,
"learning_rate": 5.691481426227062e-06,
"loss": 0.7265,
"step": 25800
},
{
"epoch": 15.172817809021675,
"grad_norm": 5.900568008422852,
"learning_rate": 5.318514098165001e-06,
"loss": 0.765,
"step": 25900
},
{
"epoch": 15.231400117164617,
"grad_norm": 5.62386417388916,
"learning_rate": 4.9455467701029394e-06,
"loss": 0.7634,
"step": 26000
},
{
"epoch": 15.231400117164617,
"eval_loss": 0.08351606130599976,
"eval_runtime": 146.6151,
"eval_samples_per_second": 3.41,
"eval_steps_per_second": 0.43,
"eval_wer": 0.1825817860300619,
"step": 26000
},
{
"epoch": 15.289982425307556,
"grad_norm": 6.403947353363037,
"learning_rate": 4.572579442040878e-06,
"loss": 0.728,
"step": 26100
},
{
"epoch": 15.348564733450498,
"grad_norm": 6.236737251281738,
"learning_rate": 4.199612113978816e-06,
"loss": 0.7462,
"step": 26200
},
{
"epoch": 15.407147041593438,
"grad_norm": 5.554813385009766,
"learning_rate": 3.826644785916754e-06,
"loss": 0.7403,
"step": 26300
},
{
"epoch": 15.46572934973638,
"grad_norm": 4.909285068511963,
"learning_rate": 3.453677457854692e-06,
"loss": 0.7653,
"step": 26400
},
{
"epoch": 15.52431165787932,
"grad_norm": 5.502344608306885,
"learning_rate": 3.0807101297926304e-06,
"loss": 0.7319,
"step": 26500
},
{
"epoch": 15.582893966022262,
"grad_norm": 7.525850772857666,
"learning_rate": 2.7077428017305685e-06,
"loss": 0.7295,
"step": 26600
},
{
"epoch": 15.641476274165202,
"grad_norm": 7.245991230010986,
"learning_rate": 2.3347754736685067e-06,
"loss": 0.7382,
"step": 26700
},
{
"epoch": 15.700058582308143,
"grad_norm": 5.762548923492432,
"learning_rate": 1.961808145606445e-06,
"loss": 0.7253,
"step": 26800
},
{
"epoch": 15.758640890451083,
"grad_norm": 6.127166271209717,
"learning_rate": 1.5888408175443833e-06,
"loss": 0.7399,
"step": 26900
},
{
"epoch": 15.817223198594025,
"grad_norm": 6.195973873138428,
"learning_rate": 1.2158734894823213e-06,
"loss": 0.7379,
"step": 27000
},
{
"epoch": 15.817223198594025,
"eval_loss": 0.08341451734304428,
"eval_runtime": 146.5011,
"eval_samples_per_second": 3.413,
"eval_steps_per_second": 0.43,
"eval_wer": 0.18287651046271736,
"step": 27000
},
{
"epoch": 15.875805506736965,
"grad_norm": 8.746485710144043,
"learning_rate": 8.429061614202597e-07,
"loss": 0.7027,
"step": 27100
},
{
"epoch": 15.934387814879907,
"grad_norm": 7.414266109466553,
"learning_rate": 4.699388333581979e-07,
"loss": 0.7047,
"step": 27200
},
{
"epoch": 15.992970123022847,
"grad_norm": 7.6758832931518555,
"learning_rate": 1.0070117857675669e-07,
"loss": 0.7324,
"step": 27300
},
{
"epoch": 16.05155243116579,
"grad_norm": 12.168989181518555,
"learning_rate": 2.7443730771306743e-05,
"loss": 0.7807,
"step": 27400
},
{
"epoch": 16.11013473930873,
"grad_norm": 5.686388969421387,
"learning_rate": 2.717385437469639e-05,
"loss": 0.8098,
"step": 27500
},
{
"epoch": 16.16871704745167,
"grad_norm": 6.920952320098877,
"learning_rate": 2.6903977978086036e-05,
"loss": 0.7786,
"step": 27600
},
{
"epoch": 16.22729935559461,
"grad_norm": 8.017998695373535,
"learning_rate": 2.6634101581475683e-05,
"loss": 0.8257,
"step": 27700
},
{
"epoch": 16.285881663737552,
"grad_norm": 6.6050262451171875,
"learning_rate": 2.6364225184865333e-05,
"loss": 0.7671,
"step": 27800
},
{
"epoch": 16.344463971880494,
"grad_norm": 8.146703720092773,
"learning_rate": 2.609434878825498e-05,
"loss": 0.7733,
"step": 27900
},
{
"epoch": 16.403046280023432,
"grad_norm": 11.652145385742188,
"learning_rate": 2.5824472391644626e-05,
"loss": 0.7902,
"step": 28000
},
{
"epoch": 16.403046280023432,
"eval_loss": 0.08416531980037689,
"eval_runtime": 148.3349,
"eval_samples_per_second": 3.371,
"eval_steps_per_second": 0.425,
"eval_wer": 0.18110816386678455,
"step": 28000
},
{
"epoch": 16.461628588166374,
"grad_norm": 9.010910034179688,
"learning_rate": 2.5554595995034276e-05,
"loss": 0.7898,
"step": 28100
},
{
"epoch": 16.520210896309315,
"grad_norm": 11.093189239501953,
"learning_rate": 2.5284719598423923e-05,
"loss": 0.7682,
"step": 28200
},
{
"epoch": 16.578793204452257,
"grad_norm": 7.964006423950195,
"learning_rate": 2.501484320181357e-05,
"loss": 0.7481,
"step": 28300
},
{
"epoch": 16.637375512595195,
"grad_norm": 10.423765182495117,
"learning_rate": 2.474496680520322e-05,
"loss": 0.7515,
"step": 28400
},
{
"epoch": 16.695957820738137,
"grad_norm": 6.754664897918701,
"learning_rate": 2.4475090408592866e-05,
"loss": 0.8024,
"step": 28500
},
{
"epoch": 16.75454012888108,
"grad_norm": 10.269820213317871,
"learning_rate": 2.4205214011982513e-05,
"loss": 0.7533,
"step": 28600
},
{
"epoch": 16.81312243702402,
"grad_norm": 14.783397674560547,
"learning_rate": 2.3935337615372163e-05,
"loss": 0.7937,
"step": 28700
},
{
"epoch": 16.87170474516696,
"grad_norm": 8.369039535522461,
"learning_rate": 2.366546121876181e-05,
"loss": 0.8246,
"step": 28800
},
{
"epoch": 16.9302870533099,
"grad_norm": 8.320894241333008,
"learning_rate": 2.3395584822151456e-05,
"loss": 0.7703,
"step": 28900
},
{
"epoch": 16.988869361452842,
"grad_norm": 10.491236686706543,
"learning_rate": 2.3125708425541102e-05,
"loss": 0.8261,
"step": 29000
},
{
"epoch": 16.988869361452842,
"eval_loss": 0.0840698629617691,
"eval_runtime": 145.4812,
"eval_samples_per_second": 3.437,
"eval_steps_per_second": 0.433,
"eval_wer": 0.18493958149130563,
"step": 29000
},
{
"epoch": 17.047451669595784,
"grad_norm": 6.7529377937316895,
"learning_rate": 2.2855832028930752e-05,
"loss": 0.8091,
"step": 29100
},
{
"epoch": 17.106033977738722,
"grad_norm": 6.652218341827393,
"learning_rate": 2.25859556323204e-05,
"loss": 0.7695,
"step": 29200
},
{
"epoch": 17.164616285881664,
"grad_norm": 7.654794692993164,
"learning_rate": 2.2316079235710046e-05,
"loss": 0.7926,
"step": 29300
},
{
"epoch": 17.223198594024606,
"grad_norm": 8.277422904968262,
"learning_rate": 2.2046202839099696e-05,
"loss": 0.7702,
"step": 29400
},
{
"epoch": 17.281780902167544,
"grad_norm": 15.797304153442383,
"learning_rate": 2.1776326442489342e-05,
"loss": 0.7597,
"step": 29500
},
{
"epoch": 17.340363210310485,
"grad_norm": 6.769285202026367,
"learning_rate": 2.150645004587899e-05,
"loss": 0.7588,
"step": 29600
},
{
"epoch": 17.398945518453427,
"grad_norm": 8.328302383422852,
"learning_rate": 2.123657364926864e-05,
"loss": 0.7719,
"step": 29700
},
{
"epoch": 17.45752782659637,
"grad_norm": 5.7514190673828125,
"learning_rate": 2.0966697252658282e-05,
"loss": 0.7985,
"step": 29800
},
{
"epoch": 17.516110134739307,
"grad_norm": 5.553383827209473,
"learning_rate": 2.069682085604793e-05,
"loss": 0.7602,
"step": 29900
},
{
"epoch": 17.57469244288225,
"grad_norm": 7.833782196044922,
"learning_rate": 2.042694445943758e-05,
"loss": 0.7531,
"step": 30000
},
{
"epoch": 17.57469244288225,
"eval_loss": 0.08400005102157593,
"eval_runtime": 147.1645,
"eval_samples_per_second": 3.398,
"eval_steps_per_second": 0.428,
"eval_wer": 0.18670792808723843,
"step": 30000
},
{
"epoch": 17.63327475102519,
"grad_norm": 6.075071334838867,
"learning_rate": 2.0157068062827225e-05,
"loss": 0.7471,
"step": 30100
},
{
"epoch": 17.691857059168132,
"grad_norm": 6.894543647766113,
"learning_rate": 1.9887191666216872e-05,
"loss": 0.7926,
"step": 30200
},
{
"epoch": 17.75043936731107,
"grad_norm": 11.549782752990723,
"learning_rate": 1.9617315269606522e-05,
"loss": 0.7308,
"step": 30300
},
{
"epoch": 17.809021675454012,
"grad_norm": 7.361614227294922,
"learning_rate": 1.934743887299617e-05,
"loss": 0.7545,
"step": 30400
},
{
"epoch": 17.867603983596954,
"grad_norm": 12.995938301086426,
"learning_rate": 1.9077562476385815e-05,
"loss": 0.7809,
"step": 30500
},
{
"epoch": 17.926186291739896,
"grad_norm": 4.978572368621826,
"learning_rate": 1.8807686079775465e-05,
"loss": 0.7587,
"step": 30600
},
{
"epoch": 17.984768599882834,
"grad_norm": 6.543401718139648,
"learning_rate": 1.853780968316511e-05,
"loss": 0.7432,
"step": 30700
},
{
"epoch": 18.043350908025776,
"grad_norm": 8.726702690124512,
"learning_rate": 1.8267933286554758e-05,
"loss": 0.7305,
"step": 30800
},
{
"epoch": 18.101933216168717,
"grad_norm": 6.32004976272583,
"learning_rate": 1.7998056889944405e-05,
"loss": 0.7454,
"step": 30900
},
{
"epoch": 18.16051552431166,
"grad_norm": 5.896217346191406,
"learning_rate": 1.7728180493334055e-05,
"loss": 0.7166,
"step": 31000
},
{
"epoch": 18.16051552431166,
"eval_loss": 0.0838567316532135,
"eval_runtime": 147.3922,
"eval_samples_per_second": 3.392,
"eval_steps_per_second": 0.427,
"eval_wer": 0.1905393457117595,
"step": 31000
},
{
"epoch": 18.219097832454597,
"grad_norm": 6.72420597076416,
"learning_rate": 1.74583040967237e-05,
"loss": 0.7028,
"step": 31100
},
{
"epoch": 18.27768014059754,
"grad_norm": 5.8699140548706055,
"learning_rate": 1.7188427700113348e-05,
"loss": 0.7548,
"step": 31200
},
{
"epoch": 18.33626244874048,
"grad_norm": 6.714815139770508,
"learning_rate": 1.6921250067469098e-05,
"loss": 0.7619,
"step": 31300
},
{
"epoch": 18.394844756883423,
"grad_norm": 9.205123901367188,
"learning_rate": 1.6651373670858748e-05,
"loss": 0.7146,
"step": 31400
},
{
"epoch": 18.45342706502636,
"grad_norm": 7.359625339508057,
"learning_rate": 1.6381497274248395e-05,
"loss": 0.7425,
"step": 31500
},
{
"epoch": 18.512009373169303,
"grad_norm": 12.034134864807129,
"learning_rate": 1.6114319641604145e-05,
"loss": 0.7325,
"step": 31600
},
{
"epoch": 18.570591681312244,
"grad_norm": 12.600486755371094,
"learning_rate": 1.584444324499379e-05,
"loss": 0.7628,
"step": 31700
},
{
"epoch": 18.629173989455186,
"grad_norm": 5.2866530418396,
"learning_rate": 1.557456684838344e-05,
"loss": 0.7636,
"step": 31800
},
{
"epoch": 18.687756297598124,
"grad_norm": 5.383816719055176,
"learning_rate": 1.5304690451773088e-05,
"loss": 0.727,
"step": 31900
},
{
"epoch": 18.746338605741066,
"grad_norm": 8.094857215881348,
"learning_rate": 1.5034814055162735e-05,
"loss": 0.7976,
"step": 32000
},
{
"epoch": 18.746338605741066,
"eval_loss": 0.08411071449518204,
"eval_runtime": 145.6438,
"eval_samples_per_second": 3.433,
"eval_steps_per_second": 0.433,
"eval_wer": 0.18376068376068377,
"step": 32000
},
{
"epoch": 18.804920913884008,
"grad_norm": 11.049666404724121,
"learning_rate": 1.4764937658552383e-05,
"loss": 0.7523,
"step": 32100
},
{
"epoch": 18.86350322202695,
"grad_norm": 5.7121782302856445,
"learning_rate": 1.4495061261942031e-05,
"loss": 0.7462,
"step": 32200
},
{
"epoch": 18.922085530169888,
"grad_norm": 6.261909008026123,
"learning_rate": 1.4225184865331678e-05,
"loss": 0.6894,
"step": 32300
},
{
"epoch": 18.98066783831283,
"grad_norm": 9.238897323608398,
"learning_rate": 1.3955308468721326e-05,
"loss": 0.7625,
"step": 32400
},
{
"epoch": 19.03925014645577,
"grad_norm": 5.913239002227783,
"learning_rate": 1.3685432072110973e-05,
"loss": 0.7324,
"step": 32500
},
{
"epoch": 19.097832454598713,
"grad_norm": 6.142359733581543,
"learning_rate": 1.3415555675500621e-05,
"loss": 0.7275,
"step": 32600
},
{
"epoch": 19.15641476274165,
"grad_norm": 8.006820678710938,
"learning_rate": 1.314567927889027e-05,
"loss": 0.7335,
"step": 32700
},
{
"epoch": 19.214997070884593,
"grad_norm": 10.434711456298828,
"learning_rate": 1.2875802882279916e-05,
"loss": 0.7093,
"step": 32800
},
{
"epoch": 19.273579379027534,
"grad_norm": 7.119588375091553,
"learning_rate": 1.2605926485669564e-05,
"loss": 0.7213,
"step": 32900
},
{
"epoch": 19.332161687170476,
"grad_norm": 6.306182861328125,
"learning_rate": 1.2336050089059211e-05,
"loss": 0.7008,
"step": 33000
},
{
"epoch": 19.332161687170476,
"eval_loss": 0.08348561823368073,
"eval_runtime": 148.0399,
"eval_samples_per_second": 3.377,
"eval_steps_per_second": 0.426,
"eval_wer": 0.18641320365458297,
"step": 33000
},
{
"epoch": 19.390743995313414,
"grad_norm": 7.711204528808594,
"learning_rate": 1.206617369244886e-05,
"loss": 0.7326,
"step": 33100
},
{
"epoch": 19.449326303456356,
"grad_norm": 6.137068271636963,
"learning_rate": 1.1796297295838508e-05,
"loss": 0.6976,
"step": 33200
},
{
"epoch": 19.507908611599298,
"grad_norm": 6.798486709594727,
"learning_rate": 1.1526420899228154e-05,
"loss": 0.7118,
"step": 33300
},
{
"epoch": 19.566490919742236,
"grad_norm": 6.276034355163574,
"learning_rate": 1.12565445026178e-05,
"loss": 0.7112,
"step": 33400
},
{
"epoch": 19.625073227885178,
"grad_norm": 7.021731853485107,
"learning_rate": 1.0986668106007449e-05,
"loss": 0.6984,
"step": 33500
},
{
"epoch": 19.68365553602812,
"grad_norm": 5.709362983703613,
"learning_rate": 1.0716791709397096e-05,
"loss": 0.7376,
"step": 33600
},
{
"epoch": 19.74223784417106,
"grad_norm": 7.312982559204102,
"learning_rate": 1.0446915312786744e-05,
"loss": 0.7448,
"step": 33700
},
{
"epoch": 19.800820152314,
"grad_norm": 5.710330486297607,
"learning_rate": 1.0177038916176392e-05,
"loss": 0.7524,
"step": 33800
},
{
"epoch": 19.85940246045694,
"grad_norm": 6.241232395172119,
"learning_rate": 9.907162519566039e-06,
"loss": 0.6942,
"step": 33900
},
{
"epoch": 19.917984768599883,
"grad_norm": 6.5843024253845215,
"learning_rate": 9.637286122955687e-06,
"loss": 0.707,
"step": 34000
},
{
"epoch": 19.917984768599883,
"eval_loss": 0.08329460024833679,
"eval_runtime": 148.0997,
"eval_samples_per_second": 3.376,
"eval_steps_per_second": 0.425,
"eval_wer": 0.18715001473622164,
"step": 34000
},
{
"epoch": 19.976567076742825,
"grad_norm": 7.8604302406311035,
"learning_rate": 9.367409726345334e-06,
"loss": 0.7127,
"step": 34100
},
{
"epoch": 20.035149384885763,
"grad_norm": 6.885805130004883,
"learning_rate": 9.097533329734982e-06,
"loss": 0.6805,
"step": 34200
},
{
"epoch": 20.093731693028705,
"grad_norm": 5.695769786834717,
"learning_rate": 8.82765693312463e-06,
"loss": 0.7139,
"step": 34300
},
{
"epoch": 20.152314001171646,
"grad_norm": 6.634642124176025,
"learning_rate": 8.557780536514277e-06,
"loss": 0.6917,
"step": 34400
},
{
"epoch": 20.210896309314588,
"grad_norm": 5.601979732513428,
"learning_rate": 8.287904139903925e-06,
"loss": 0.6576,
"step": 34500
},
{
"epoch": 20.269478617457526,
"grad_norm": 4.387629508972168,
"learning_rate": 8.018027743293572e-06,
"loss": 0.7389,
"step": 34600
},
{
"epoch": 20.328060925600468,
"grad_norm": 5.184898853302002,
"learning_rate": 7.748151346683219e-06,
"loss": 0.6803,
"step": 34700
},
{
"epoch": 20.38664323374341,
"grad_norm": 9.722779273986816,
"learning_rate": 7.478274950072867e-06,
"loss": 0.7095,
"step": 34800
},
{
"epoch": 20.44522554188635,
"grad_norm": 6.964946269989014,
"learning_rate": 7.208398553462514e-06,
"loss": 0.7372,
"step": 34900
},
{
"epoch": 20.50380785002929,
"grad_norm": 9.976523399353027,
"learning_rate": 6.938522156852162e-06,
"loss": 0.6865,
"step": 35000
},
{
"epoch": 20.50380785002929,
"eval_loss": 0.08354520797729492,
"eval_runtime": 147.4678,
"eval_samples_per_second": 3.391,
"eval_steps_per_second": 0.427,
"eval_wer": 0.1843501326259947,
"step": 35000
},
{
"epoch": 20.56239015817223,
"grad_norm": 6.862576007843018,
"learning_rate": 6.668645760241809e-06,
"loss": 0.6942,
"step": 35100
},
{
"epoch": 20.620972466315173,
"grad_norm": 10.506976127624512,
"learning_rate": 6.3987693636314575e-06,
"loss": 0.6863,
"step": 35200
},
{
"epoch": 20.679554774458115,
"grad_norm": 6.648481845855713,
"learning_rate": 6.128892967021105e-06,
"loss": 0.6987,
"step": 35300
},
{
"epoch": 20.738137082601053,
"grad_norm": 7.607233047485352,
"learning_rate": 5.8590165704107524e-06,
"loss": 0.689,
"step": 35400
},
{
"epoch": 20.796719390743995,
"grad_norm": 6.110599040985107,
"learning_rate": 5.589140173800399e-06,
"loss": 0.7004,
"step": 35500
},
{
"epoch": 20.855301698886937,
"grad_norm": 5.632733345031738,
"learning_rate": 5.319263777190047e-06,
"loss": 0.7299,
"step": 35600
},
{
"epoch": 20.91388400702988,
"grad_norm": 5.644909381866455,
"learning_rate": 5.052086144545798e-06,
"loss": 0.6749,
"step": 35700
},
{
"epoch": 20.972466315172817,
"grad_norm": 5.2816386222839355,
"learning_rate": 4.782209747935446e-06,
"loss": 0.671,
"step": 35800
},
{
"epoch": 21.03104862331576,
"grad_norm": 4.735965728759766,
"learning_rate": 4.512333351325093e-06,
"loss": 0.7137,
"step": 35900
},
{
"epoch": 21.0896309314587,
"grad_norm": 6.491783618927002,
"learning_rate": 4.242456954714741e-06,
"loss": 0.6927,
"step": 36000
},
{
"epoch": 21.0896309314587,
"eval_loss": 0.08344998210668564,
"eval_runtime": 147.4098,
"eval_samples_per_second": 3.392,
"eval_steps_per_second": 0.427,
"eval_wer": 0.18818155025051578,
"step": 36000
},
{
"epoch": 21.148213239601642,
"grad_norm": 6.068084239959717,
"learning_rate": 3.972580558104388e-06,
"loss": 0.7155,
"step": 36100
},
{
"epoch": 21.20679554774458,
"grad_norm": 4.509976863861084,
"learning_rate": 3.702704161494036e-06,
"loss": 0.6615,
"step": 36200
},
{
"epoch": 21.26537785588752,
"grad_norm": 6.346072673797607,
"learning_rate": 3.4328277648836835e-06,
"loss": 0.6682,
"step": 36300
},
{
"epoch": 21.323960164030463,
"grad_norm": 6.095388889312744,
"learning_rate": 3.1629513682733306e-06,
"loss": 0.6588,
"step": 36400
},
{
"epoch": 21.382542472173405,
"grad_norm": 5.079260349273682,
"learning_rate": 2.8930749716629784e-06,
"loss": 0.67,
"step": 36500
},
{
"epoch": 21.441124780316343,
"grad_norm": 4.323718547821045,
"learning_rate": 2.623198575052626e-06,
"loss": 0.6606,
"step": 36600
},
{
"epoch": 21.499707088459285,
"grad_norm": 5.659894943237305,
"learning_rate": 2.3533221784422738e-06,
"loss": 0.69,
"step": 36700
},
{
"epoch": 21.558289396602227,
"grad_norm": 7.08272647857666,
"learning_rate": 2.083445781831921e-06,
"loss": 0.6506,
"step": 36800
},
{
"epoch": 21.61687170474517,
"grad_norm": 4.9979119300842285,
"learning_rate": 1.8135693852215685e-06,
"loss": 0.7215,
"step": 36900
},
{
"epoch": 21.675454012888107,
"grad_norm": 6.872857093811035,
"learning_rate": 1.5436929886112161e-06,
"loss": 0.7014,
"step": 37000
},
{
"epoch": 21.675454012888107,
"eval_loss": 0.08349551260471344,
"eval_runtime": 147.5946,
"eval_samples_per_second": 3.388,
"eval_steps_per_second": 0.427,
"eval_wer": 0.1861184792219275,
"step": 37000
},
{
"epoch": 21.73403632103105,
"grad_norm": 6.6599273681640625,
"learning_rate": 1.2738165920008636e-06,
"loss": 0.6701,
"step": 37100
},
{
"epoch": 21.79261862917399,
"grad_norm": 4.662840366363525,
"learning_rate": 1.0039401953905112e-06,
"loss": 0.7006,
"step": 37200
},
{
"epoch": 21.851200937316932,
"grad_norm": 6.501893043518066,
"learning_rate": 7.340637987801587e-07,
"loss": 0.66,
"step": 37300
},
{
"epoch": 21.90978324545987,
"grad_norm": 5.708708763122559,
"learning_rate": 4.6418740216980626e-07,
"loss": 0.6819,
"step": 37400
},
{
"epoch": 21.968365553602812,
"grad_norm": 4.885653018951416,
"learning_rate": 1.943110055594538e-07,
"loss": 0.7,
"step": 37500
},
{
"epoch": 22.026947861745754,
"grad_norm": 5.802590370178223,
"learning_rate": 1.5496103185816508e-05,
"loss": 0.7875,
"step": 37600
},
{
"epoch": 22.085530169888692,
"grad_norm": 6.833991527557373,
"learning_rate": 1.5268219315436854e-05,
"loss": 0.6936,
"step": 37700
},
{
"epoch": 22.144112478031634,
"grad_norm": 9.980413436889648,
"learning_rate": 1.5042614283760998e-05,
"loss": 0.6935,
"step": 37800
},
{
"epoch": 22.202694786174575,
"grad_norm": 6.407867908477783,
"learning_rate": 1.4814730413381343e-05,
"loss": 0.669,
"step": 37900
},
{
"epoch": 22.261277094317517,
"grad_norm": 7.097990989685059,
"learning_rate": 1.4586846543001687e-05,
"loss": 0.6951,
"step": 38000
},
{
"epoch": 22.261277094317517,
"eval_loss": 0.08332780748605728,
"eval_runtime": 147.6911,
"eval_samples_per_second": 3.385,
"eval_steps_per_second": 0.427,
"eval_wer": 0.1874447391688771,
"step": 38000
},
{
"epoch": 22.319859402460455,
"grad_norm": 7.611706733703613,
"learning_rate": 1.4358962672622033e-05,
"loss": 0.6751,
"step": 38100
},
{
"epoch": 22.378441710603397,
"grad_norm": 11.156392097473145,
"learning_rate": 1.4131078802242378e-05,
"loss": 0.7132,
"step": 38200
},
{
"epoch": 22.43702401874634,
"grad_norm": 7.024628639221191,
"learning_rate": 1.3903194931862724e-05,
"loss": 0.7599,
"step": 38300
},
{
"epoch": 22.49560632688928,
"grad_norm": 6.373144626617432,
"learning_rate": 1.3675311061483068e-05,
"loss": 0.6951,
"step": 38400
},
{
"epoch": 22.55418863503222,
"grad_norm": 5.750803470611572,
"learning_rate": 1.3447427191103415e-05,
"loss": 0.7078,
"step": 38500
},
{
"epoch": 22.61277094317516,
"grad_norm": 6.302127361297607,
"learning_rate": 1.3219543320723759e-05,
"loss": 0.7153,
"step": 38600
},
{
"epoch": 22.671353251318102,
"grad_norm": 6.580202102661133,
"learning_rate": 1.2991659450344103e-05,
"loss": 0.6923,
"step": 38700
},
{
"epoch": 22.729935559461044,
"grad_norm": 5.348507404327393,
"learning_rate": 1.2763775579964451e-05,
"loss": 0.6998,
"step": 38800
},
{
"epoch": 22.788517867603982,
"grad_norm": 7.927694797515869,
"learning_rate": 1.2535891709584796e-05,
"loss": 0.7093,
"step": 38900
},
{
"epoch": 22.847100175746924,
"grad_norm": 4.928946495056152,
"learning_rate": 1.2308007839205142e-05,
"loss": 0.6848,
"step": 39000
},
{
"epoch": 22.847100175746924,
"eval_loss": 0.08338670432567596,
"eval_runtime": 146.104,
"eval_samples_per_second": 3.422,
"eval_steps_per_second": 0.431,
"eval_wer": 0.1927497789566755,
"step": 39000
},
{
"epoch": 22.905682483889866,
"grad_norm": 9.807655334472656,
"learning_rate": 1.2080123968825486e-05,
"loss": 0.6954,
"step": 39100
},
{
"epoch": 22.964264792032807,
"grad_norm": 4.682498931884766,
"learning_rate": 1.1852240098445833e-05,
"loss": 0.7273,
"step": 39200
},
{
"epoch": 23.022847100175746,
"grad_norm": 6.8382086753845215,
"learning_rate": 1.1624356228066179e-05,
"loss": 0.721,
"step": 39300
},
{
"epoch": 23.081429408318687,
"grad_norm": 7.560245037078857,
"learning_rate": 1.1396472357686523e-05,
"loss": 0.689,
"step": 39400
},
{
"epoch": 23.14001171646163,
"grad_norm": 7.024509906768799,
"learning_rate": 1.116858848730687e-05,
"loss": 0.6714,
"step": 39500
},
{
"epoch": 23.19859402460457,
"grad_norm": 8.46679973602295,
"learning_rate": 1.0940704616927216e-05,
"loss": 0.6729,
"step": 39600
},
{
"epoch": 23.25717633274751,
"grad_norm": 7.632561683654785,
"learning_rate": 1.071282074654756e-05,
"loss": 0.7032,
"step": 39700
},
{
"epoch": 23.31575864089045,
"grad_norm": 9.298177719116211,
"learning_rate": 1.0484936876167906e-05,
"loss": 0.6915,
"step": 39800
},
{
"epoch": 23.374340949033392,
"grad_norm": 6.186316013336182,
"learning_rate": 1.0259331844492047e-05,
"loss": 0.6625,
"step": 39900
},
{
"epoch": 23.432923257176334,
"grad_norm": 4.553198337554932,
"learning_rate": 1.0031447974112393e-05,
"loss": 0.7096,
"step": 40000
},
{
"epoch": 23.432923257176334,
"eval_loss": 0.08342915773391724,
"eval_runtime": 146.9046,
"eval_samples_per_second": 3.404,
"eval_steps_per_second": 0.429,
"eval_wer": 0.19363395225464192,
"step": 40000
},
{
"epoch": 23.491505565319272,
"grad_norm": 5.817235946655273,
"learning_rate": 9.803564103732739e-06,
"loss": 0.6733,
"step": 40100
},
{
"epoch": 23.550087873462214,
"grad_norm": 5.511366844177246,
"learning_rate": 9.575680233353083e-06,
"loss": 0.728,
"step": 40200
},
{
"epoch": 23.608670181605156,
"grad_norm": 7.099341869354248,
"learning_rate": 9.34779636297343e-06,
"loss": 0.7007,
"step": 40300
},
{
"epoch": 23.667252489748098,
"grad_norm": 4.963494777679443,
"learning_rate": 9.119912492593774e-06,
"loss": 0.6804,
"step": 40400
},
{
"epoch": 23.725834797891036,
"grad_norm": 7.063295364379883,
"learning_rate": 8.89202862221412e-06,
"loss": 0.7092,
"step": 40500
},
{
"epoch": 23.784417106033978,
"grad_norm": 6.879157543182373,
"learning_rate": 8.664144751834466e-06,
"loss": 0.6753,
"step": 40600
},
{
"epoch": 23.84299941417692,
"grad_norm": 5.31470251083374,
"learning_rate": 8.43626088145481e-06,
"loss": 0.6876,
"step": 40700
},
{
"epoch": 23.90158172231986,
"grad_norm": 6.351736068725586,
"learning_rate": 8.208377011075157e-06,
"loss": 0.6949,
"step": 40800
},
{
"epoch": 23.9601640304628,
"grad_norm": 5.326944351196289,
"learning_rate": 7.980493140695503e-06,
"loss": 0.705,
"step": 40900
},
{
"epoch": 24.01874633860574,
"grad_norm": 6.150857448577881,
"learning_rate": 7.752609270315848e-06,
"loss": 0.6952,
"step": 41000
},
{
"epoch": 24.01874633860574,
"eval_loss": 0.08346738666296005,
"eval_runtime": 145.9289,
"eval_samples_per_second": 3.426,
"eval_steps_per_second": 0.432,
"eval_wer": 0.19333922782198645,
"step": 41000
},
{
"epoch": 24.077328646748683,
"grad_norm": 5.961891174316406,
"learning_rate": 7.524725399936193e-06,
"loss": 0.6498,
"step": 41100
},
{
"epoch": 24.135910954891624,
"grad_norm": 4.94498348236084,
"learning_rate": 7.296841529556539e-06,
"loss": 0.6827,
"step": 41200
},
{
"epoch": 24.194493263034563,
"grad_norm": 6.914667129516602,
"learning_rate": 7.068957659176884e-06,
"loss": 0.6647,
"step": 41300
},
{
"epoch": 24.253075571177504,
"grad_norm": 5.37884521484375,
"learning_rate": 6.84107378879723e-06,
"loss": 0.6447,
"step": 41400
},
{
"epoch": 24.311657879320446,
"grad_norm": 7.762170791625977,
"learning_rate": 6.613189918417574e-06,
"loss": 0.6362,
"step": 41500
},
{
"epoch": 24.370240187463388,
"grad_norm": 5.311378479003906,
"learning_rate": 6.38530604803792e-06,
"loss": 0.7053,
"step": 41600
},
{
"epoch": 24.428822495606326,
"grad_norm": 5.185734748840332,
"learning_rate": 6.157422177658266e-06,
"loss": 0.6724,
"step": 41700
},
{
"epoch": 24.487404803749268,
"grad_norm": 6.12027645111084,
"learning_rate": 5.929538307278611e-06,
"loss": 0.6947,
"step": 41800
},
{
"epoch": 24.54598711189221,
"grad_norm": 11.165681838989258,
"learning_rate": 5.701654436898956e-06,
"loss": 0.6999,
"step": 41900
},
{
"epoch": 24.604569420035148,
"grad_norm": 6.521625995635986,
"learning_rate": 5.473770566519302e-06,
"loss": 0.692,
"step": 42000
},
{
"epoch": 24.604569420035148,
"eval_loss": 0.0832965150475502,
"eval_runtime": 145.9791,
"eval_samples_per_second": 3.425,
"eval_steps_per_second": 0.432,
"eval_wer": 0.19304450338933096,
"step": 42000
},
{
"epoch": 24.66315172817809,
"grad_norm": 7.7173333168029785,
"learning_rate": 5.248165534843444e-06,
"loss": 0.6641,
"step": 42100
},
{
"epoch": 24.72173403632103,
"grad_norm": 5.125652313232422,
"learning_rate": 5.02028166446379e-06,
"loss": 0.6576,
"step": 42200
},
{
"epoch": 24.780316344463973,
"grad_norm": 5.99462366104126,
"learning_rate": 4.792397794084135e-06,
"loss": 0.6733,
"step": 42300
},
{
"epoch": 24.83889865260691,
"grad_norm": 5.6660614013671875,
"learning_rate": 4.5645139237044806e-06,
"loss": 0.7033,
"step": 42400
},
{
"epoch": 24.897480960749853,
"grad_norm": 7.414560317993164,
"learning_rate": 4.336630053324826e-06,
"loss": 0.6756,
"step": 42500
},
{
"epoch": 24.956063268892795,
"grad_norm": 6.082986354827881,
"learning_rate": 4.108746182945172e-06,
"loss": 0.6713,
"step": 42600
},
{
"epoch": 25.014645577035736,
"grad_norm": 5.000583648681641,
"learning_rate": 3.8808623125655165e-06,
"loss": 0.6821,
"step": 42700
},
{
"epoch": 25.073227885178675,
"grad_norm": 6.196038246154785,
"learning_rate": 3.6529784421858623e-06,
"loss": 0.6795,
"step": 42800
},
{
"epoch": 25.131810193321616,
"grad_norm": 7.409509181976318,
"learning_rate": 3.4250945718062076e-06,
"loss": 0.6388,
"step": 42900
},
{
"epoch": 25.190392501464558,
"grad_norm": 6.713177680969238,
"learning_rate": 3.1972107014265533e-06,
"loss": 0.6552,
"step": 43000
},
{
"epoch": 25.190392501464558,
"eval_loss": 0.0831432044506073,
"eval_runtime": 144.9356,
"eval_samples_per_second": 3.45,
"eval_steps_per_second": 0.435,
"eval_wer": 0.18670792808723843,
"step": 43000
},
{
"epoch": 25.2489748096075,
"grad_norm": 4.68192720413208,
"learning_rate": 2.9693268310468986e-06,
"loss": 0.6323,
"step": 43100
},
{
"epoch": 25.307557117750438,
"grad_norm": 4.701879501342773,
"learning_rate": 2.741442960667244e-06,
"loss": 0.6499,
"step": 43200
},
{
"epoch": 25.36613942589338,
"grad_norm": 5.518495559692383,
"learning_rate": 2.5135590902875897e-06,
"loss": 0.6513,
"step": 43300
},
{
"epoch": 25.42472173403632,
"grad_norm": 4.257356643676758,
"learning_rate": 2.285675219907935e-06,
"loss": 0.6957,
"step": 43400
},
{
"epoch": 25.483304042179263,
"grad_norm": 4.883972644805908,
"learning_rate": 2.0577913495282803e-06,
"loss": 0.6552,
"step": 43500
},
{
"epoch": 25.5418863503222,
"grad_norm": 4.6188201904296875,
"learning_rate": 1.8299074791486259e-06,
"loss": 0.6683,
"step": 43600
},
{
"epoch": 25.600468658465143,
"grad_norm": 8.4751558303833,
"learning_rate": 1.6020236087689714e-06,
"loss": 0.636,
"step": 43700
},
{
"epoch": 25.659050966608085,
"grad_norm": 5.713025093078613,
"learning_rate": 1.374139738389317e-06,
"loss": 0.6833,
"step": 43800
},
{
"epoch": 25.717633274751027,
"grad_norm": Infinity,
"learning_rate": 1.1462558680096623e-06,
"loss": 0.6807,
"step": 43900
},
{
"epoch": 25.776215582893965,
"grad_norm": 7.257518291473389,
"learning_rate": 9.206508363338042e-07,
"loss": 0.6641,
"step": 44000
},
{
"epoch": 25.776215582893965,
"eval_loss": 0.0832269936800003,
"eval_runtime": 145.3966,
"eval_samples_per_second": 3.439,
"eval_steps_per_second": 0.433,
"eval_wer": 0.1874447391688771,
"step": 44000
},
{
"epoch": 25.834797891036906,
"grad_norm": 5.92907190322876,
"learning_rate": 6.927669659541498e-07,
"loss": 0.6462,
"step": 44100
},
{
"epoch": 25.89338019917985,
"grad_norm": 4.766348361968994,
"learning_rate": 4.6488309557449525e-07,
"loss": 0.6934,
"step": 44200
},
{
"epoch": 25.95196250732279,
"grad_norm": 6.461400032043457,
"learning_rate": 2.3699922519484072e-07,
"loss": 0.6662,
"step": 44300
},
{
"epoch": 26.010544815465728,
"grad_norm": 5.860207557678223,
"learning_rate": 1.3472687832774603e-05,
"loss": 0.6097,
"step": 44400
},
{
"epoch": 26.06912712360867,
"grad_norm": 5.674122333526611,
"learning_rate": 1.3275488069414319e-05,
"loss": 0.6638,
"step": 44500
},
{
"epoch": 26.12770943175161,
"grad_norm": 7.24979829788208,
"learning_rate": 1.3078288306054034e-05,
"loss": 0.641,
"step": 44600
},
{
"epoch": 26.186291739894553,
"grad_norm": 7.524514198303223,
"learning_rate": 1.288108854269375e-05,
"loss": 0.6722,
"step": 44700
},
{
"epoch": 26.24487404803749,
"grad_norm": 5.542699337005615,
"learning_rate": 1.2683888779333466e-05,
"loss": 0.6386,
"step": 44800
},
{
"epoch": 26.303456356180433,
"grad_norm": 7.757282257080078,
"learning_rate": 1.2486689015973181e-05,
"loss": 0.6976,
"step": 44900
},
{
"epoch": 26.362038664323375,
"grad_norm": 5.828668117523193,
"learning_rate": 1.2289489252612897e-05,
"loss": 0.6921,
"step": 45000
},
{
"epoch": 26.362038664323375,
"eval_loss": 0.08329325169324875,
"eval_runtime": 154.0108,
"eval_samples_per_second": 3.247,
"eval_steps_per_second": 0.409,
"eval_wer": 0.18803418803418803,
"step": 45000
},
{
"epoch": 26.420620972466317,
"grad_norm": 6.481083869934082,
"learning_rate": 1.2092289489252613e-05,
"loss": 0.6643,
"step": 45100
},
{
"epoch": 26.479203280609255,
"grad_norm": 6.508504390716553,
"learning_rate": 1.1895089725892329e-05,
"loss": 0.6986,
"step": 45200
},
{
"epoch": 26.537785588752197,
"grad_norm": 5.0908894538879395,
"learning_rate": 1.1697889962532044e-05,
"loss": 0.6741,
"step": 45300
},
{
"epoch": 26.59636789689514,
"grad_norm": 6.918355464935303,
"learning_rate": 1.1500690199171762e-05,
"loss": 0.6774,
"step": 45400
},
{
"epoch": 26.65495020503808,
"grad_norm": 5.500540733337402,
"learning_rate": 1.1303490435811477e-05,
"loss": 0.6861,
"step": 45500
},
{
"epoch": 26.71353251318102,
"grad_norm": 5.732079029083252,
"learning_rate": 1.1106290672451193e-05,
"loss": 0.6836,
"step": 45600
},
{
"epoch": 26.77211482132396,
"grad_norm": 7.091628074645996,
"learning_rate": 1.0909090909090909e-05,
"loss": 0.7212,
"step": 45700
},
{
"epoch": 26.830697129466902,
"grad_norm": 6.164028644561768,
"learning_rate": 1.0711891145730625e-05,
"loss": 0.6832,
"step": 45800
},
{
"epoch": 26.889279437609844,
"grad_norm": 6.679256916046143,
"learning_rate": 1.051469138237034e-05,
"loss": 0.6886,
"step": 45900
},
{
"epoch": 26.947861745752782,
"grad_norm": 6.822471618652344,
"learning_rate": 1.0317491619010058e-05,
"loss": 0.6894,
"step": 46000
},
{
"epoch": 26.947861745752782,
"eval_loss": 0.08317731320858002,
"eval_runtime": 148.6991,
"eval_samples_per_second": 3.362,
"eval_steps_per_second": 0.424,
"eval_wer": 0.18552903035661655,
"step": 46000
},
{
"epoch": 27.006444053895724,
"grad_norm": 5.017155170440674,
"learning_rate": 1.0120291855649773e-05,
"loss": 0.7585,
"step": 46100
},
{
"epoch": 27.065026362038665,
"grad_norm": 7.552450180053711,
"learning_rate": 9.92309209228949e-06,
"loss": 0.6377,
"step": 46200
},
{
"epoch": 27.123608670181603,
"grad_norm": 5.625948905944824,
"learning_rate": 9.725892328929207e-06,
"loss": 0.6606,
"step": 46300
},
{
"epoch": 27.182190978324545,
"grad_norm": 6.0872039794921875,
"learning_rate": 9.528692565568922e-06,
"loss": 0.6853,
"step": 46400
},
{
"epoch": 27.240773286467487,
"grad_norm": 7.785630226135254,
"learning_rate": 9.331492802208638e-06,
"loss": 0.6862,
"step": 46500
},
{
"epoch": 27.29935559461043,
"grad_norm": 7.472136974334717,
"learning_rate": 9.134293038848354e-06,
"loss": 0.6528,
"step": 46600
},
{
"epoch": 27.357937902753367,
"grad_norm": 5.214349269866943,
"learning_rate": 8.937093275488071e-06,
"loss": 0.6734,
"step": 46700
},
{
"epoch": 27.41652021089631,
"grad_norm": 5.109072208404541,
"learning_rate": 8.739893512127787e-06,
"loss": 0.6936,
"step": 46800
},
{
"epoch": 27.47510251903925,
"grad_norm": 5.236739158630371,
"learning_rate": 8.542693748767503e-06,
"loss": 0.6711,
"step": 46900
},
{
"epoch": 27.533684827182192,
"grad_norm": 6.8590264320373535,
"learning_rate": 8.345493985407218e-06,
"loss": 0.7041,
"step": 47000
},
{
"epoch": 27.533684827182192,
"eval_loss": 0.08270065486431122,
"eval_runtime": 146.6093,
"eval_samples_per_second": 3.41,
"eval_steps_per_second": 0.43,
"eval_wer": 0.18552903035661655,
"step": 47000
},
{
"epoch": 27.59226713532513,
"grad_norm": 6.453911781311035,
"learning_rate": 8.148294222046934e-06,
"loss": 0.7099,
"step": 47100
},
{
"epoch": 27.650849443468072,
"grad_norm": 9.15201473236084,
"learning_rate": 7.95109445868665e-06,
"loss": 0.6753,
"step": 47200
},
{
"epoch": 27.709431751611014,
"grad_norm": 10.103302955627441,
"learning_rate": 7.753894695326366e-06,
"loss": 0.615,
"step": 47300
},
{
"epoch": 27.768014059753956,
"grad_norm": 6.687941551208496,
"learning_rate": 7.556694931966082e-06,
"loss": 0.6812,
"step": 47400
},
{
"epoch": 27.826596367896894,
"grad_norm": 6.050097942352295,
"learning_rate": 7.359495168605799e-06,
"loss": 0.6329,
"step": 47500
},
{
"epoch": 27.885178676039835,
"grad_norm": 6.162942886352539,
"learning_rate": 7.162295405245514e-06,
"loss": 0.6429,
"step": 47600
},
{
"epoch": 27.943760984182777,
"grad_norm": 5.811770915985107,
"learning_rate": 6.96509564188523e-06,
"loss": 0.7052,
"step": 47700
},
{
"epoch": 28.00234329232572,
"grad_norm": 4.816370010375977,
"learning_rate": 6.767895878524947e-06,
"loss": 0.6578,
"step": 47800
},
{
"epoch": 28.060925600468657,
"grad_norm": 4.412487506866455,
"learning_rate": 6.570696115164662e-06,
"loss": 0.6302,
"step": 47900
},
{
"epoch": 28.1195079086116,
"grad_norm": 5.892630100250244,
"learning_rate": 6.373496351804378e-06,
"loss": 0.6452,
"step": 48000
},
{
"epoch": 28.1195079086116,
"eval_loss": 0.08297573775053024,
"eval_runtime": 146.2177,
"eval_samples_per_second": 3.42,
"eval_steps_per_second": 0.431,
"eval_wer": 0.18818155025051578,
"step": 48000
},
{
"epoch": 28.17809021675454,
"grad_norm": 4.816265106201172,
"learning_rate": 6.1782685860776975e-06,
"loss": 0.6636,
"step": 48100
},
{
"epoch": 28.236672524897482,
"grad_norm": 6.337242126464844,
"learning_rate": 5.981068822717413e-06,
"loss": 0.6501,
"step": 48200
},
{
"epoch": 28.29525483304042,
"grad_norm": 7.430861473083496,
"learning_rate": 5.783869059357129e-06,
"loss": 0.6455,
"step": 48300
},
{
"epoch": 28.353837141183362,
"grad_norm": 10.299432754516602,
"learning_rate": 5.586669295996845e-06,
"loss": 0.6569,
"step": 48400
},
{
"epoch": 28.412419449326304,
"grad_norm": 5.967062950134277,
"learning_rate": 5.389469532636561e-06,
"loss": 0.651,
"step": 48500
},
{
"epoch": 28.471001757469246,
"grad_norm": 5.434126853942871,
"learning_rate": 5.192269769276277e-06,
"loss": 0.6563,
"step": 48600
},
{
"epoch": 28.529584065612184,
"grad_norm": 4.6583075523376465,
"learning_rate": 4.995070005915993e-06,
"loss": 0.6512,
"step": 48700
},
{
"epoch": 28.588166373755126,
"grad_norm": 6.697512149810791,
"learning_rate": 4.797870242555709e-06,
"loss": 0.664,
"step": 48800
},
{
"epoch": 28.646748681898067,
"grad_norm": 8.926551818847656,
"learning_rate": 4.600670479195425e-06,
"loss": 0.6354,
"step": 48900
},
{
"epoch": 28.70533099004101,
"grad_norm": 5.536263942718506,
"learning_rate": 4.403470715835141e-06,
"loss": 0.6682,
"step": 49000
},
{
"epoch": 28.70533099004101,
"eval_loss": 0.08284977823495865,
"eval_runtime": 146.4815,
"eval_samples_per_second": 3.413,
"eval_steps_per_second": 0.43,
"eval_wer": 0.18626584143825523,
"step": 49000
},
{
"epoch": 28.763913298183947,
"grad_norm": 8.664471626281738,
"learning_rate": 4.206270952474857e-06,
"loss": 0.6994,
"step": 49100
},
{
"epoch": 28.82249560632689,
"grad_norm": 6.604372978210449,
"learning_rate": 4.009071189114573e-06,
"loss": 0.6788,
"step": 49200
},
{
"epoch": 28.88107791446983,
"grad_norm": 5.231504917144775,
"learning_rate": 3.811871425754289e-06,
"loss": 0.6644,
"step": 49300
},
{
"epoch": 28.939660222612773,
"grad_norm": 5.522902011871338,
"learning_rate": 3.6146716623940048e-06,
"loss": 0.6543,
"step": 49400
},
{
"epoch": 28.99824253075571,
"grad_norm": 4.572765350341797,
"learning_rate": 3.4174718990337218e-06,
"loss": 0.6769,
"step": 49500
},
{
"epoch": 29.056824838898653,
"grad_norm": 5.370816707611084,
"learning_rate": 3.2202721356734375e-06,
"loss": 0.6169,
"step": 49600
},
{
"epoch": 29.115407147041594,
"grad_norm": 4.599956035614014,
"learning_rate": 3.0230723723131532e-06,
"loss": 0.6698,
"step": 49700
},
{
"epoch": 29.173989455184536,
"grad_norm": 4.932507514953613,
"learning_rate": 2.8258726089528694e-06,
"loss": 0.6294,
"step": 49800
},
{
"epoch": 29.232571763327474,
"grad_norm": 8.667673110961914,
"learning_rate": 2.628672845592585e-06,
"loss": 0.6688,
"step": 49900
},
{
"epoch": 29.291154071470416,
"grad_norm": 4.365273952484131,
"learning_rate": 2.4314730822323017e-06,
"loss": 0.6357,
"step": 50000
},
{
"epoch": 29.291154071470416,
"eval_loss": 0.0829482451081276,
"eval_runtime": 146.3076,
"eval_samples_per_second": 3.417,
"eval_steps_per_second": 0.431,
"eval_wer": 0.18773946360153257,
"step": 50000
},
{
"epoch": 29.349736379613358,
"grad_norm": 5.5835652351379395,
"learning_rate": 2.234273318872018e-06,
"loss": 0.633,
"step": 50100
},
{
"epoch": 29.408318687756296,
"grad_norm": 5.577920436859131,
"learning_rate": 2.0390455531453363e-06,
"loss": 0.6456,
"step": 50200
},
{
"epoch": 29.466900995899238,
"grad_norm": 6.7036566734313965,
"learning_rate": 1.8418457897850522e-06,
"loss": 0.6582,
"step": 50300
},
{
"epoch": 29.52548330404218,
"grad_norm": 8.638516426086426,
"learning_rate": 1.6446460264247684e-06,
"loss": 0.6469,
"step": 50400
},
{
"epoch": 29.58406561218512,
"grad_norm": 7.847275257110596,
"learning_rate": 1.4474462630644845e-06,
"loss": 0.6314,
"step": 50500
},
{
"epoch": 29.64264792032806,
"grad_norm": 4.795884609222412,
"learning_rate": 1.2502464997042004e-06,
"loss": 0.6355,
"step": 50600
},
{
"epoch": 29.701230228471,
"grad_norm": 5.026218414306641,
"learning_rate": 1.0530467363439164e-06,
"loss": 0.6476,
"step": 50700
},
{
"epoch": 29.759812536613943,
"grad_norm": 6.32857608795166,
"learning_rate": 8.558469729836324e-07,
"loss": 0.6515,
"step": 50800
},
{
"epoch": 29.818394844756885,
"grad_norm": 8.374032974243164,
"learning_rate": 6.586472096233485e-07,
"loss": 0.6615,
"step": 50900
},
{
"epoch": 29.876977152899823,
"grad_norm": 8.55248737335205,
"learning_rate": 4.6144744626306455e-07,
"loss": 0.6645,
"step": 51000
},
{
"epoch": 29.876977152899823,
"eval_loss": 0.08314584940671921,
"eval_runtime": 145.2309,
"eval_samples_per_second": 3.443,
"eval_steps_per_second": 0.434,
"eval_wer": 0.18980253463012084,
"step": 51000
},
{
"epoch": 29.935559461042764,
"grad_norm": 4.774900436401367,
"learning_rate": 2.6424768290278054e-07,
"loss": 0.6359,
"step": 51100
},
{
"epoch": 29.994141769185706,
"grad_norm": 4.431262493133545,
"learning_rate": 6.704791954249655e-08,
"loss": 0.6618,
"step": 51200
}
],
"logging_steps": 100,
"max_steps": 51210,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.6610187993088e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}