{ "best_global_step": 28000, "best_metric": 0.18110816386678455, "best_model_checkpoint": "./distil-whisper/checkpoint-28000", "epoch": 30.0, "eval_steps": 1000, "global_step": 51210, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05858230814294083, "grad_norm": 27.49039077758789, "learning_rate": 1.9e-05, "loss": 1.7236, "step": 100 }, { "epoch": 0.11716461628588166, "grad_norm": 18.32018280029297, "learning_rate": 3.9000000000000006e-05, "loss": 1.796, "step": 200 }, { "epoch": 0.1757469244288225, "grad_norm": 12.191054344177246, "learning_rate": 5.9e-05, "loss": 1.8559, "step": 300 }, { "epoch": 0.23432923257176333, "grad_norm": 20.974699020385742, "learning_rate": 7.900000000000001e-05, "loss": 1.9804, "step": 400 }, { "epoch": 0.29291154071470415, "grad_norm": 37.665748596191406, "learning_rate": 9.88e-05, "loss": 2.2308, "step": 500 }, { "epoch": 0.351493848857645, "grad_norm": 22.89726448059082, "learning_rate": 9.917896759542318e-05, "loss": 2.0073, "step": 600 }, { "epoch": 0.4100761570005858, "grad_norm": 28.912738800048828, "learning_rate": 9.830552886714997e-05, "loss": 2.2137, "step": 700 }, { "epoch": 0.46865846514352666, "grad_norm": 26.01645851135254, "learning_rate": 9.743209013887676e-05, "loss": 2.3668, "step": 800 }, { "epoch": 0.5272407732864675, "grad_norm": 38.67082214355469, "learning_rate": 9.655865141060355e-05, "loss": 2.1304, "step": 900 }, { "epoch": 0.5858230814294083, "grad_norm": 42.85211181640625, "learning_rate": 9.568521268233034e-05, "loss": 2.1291, "step": 1000 }, { "epoch": 0.5858230814294083, "eval_loss": 0.09121495485305786, "eval_runtime": 150.358, "eval_samples_per_second": 3.325, "eval_steps_per_second": 0.419, "eval_wer": 0.19776009431181846, "step": 1000 }, { "epoch": 0.6444053895723492, "grad_norm": 41.63334274291992, "learning_rate": 9.481177395405713e-05, "loss": 2.176, "step": 1100 }, { "epoch": 0.70298769771529, "grad_norm": 35.67658996582031, "learning_rate": 9.393833522578391e-05, "loss": 2.3521, "step": 1200 }, { "epoch": 0.7615700058582309, "grad_norm": 23.11754608154297, "learning_rate": 9.30648964975107e-05, "loss": 2.0235, "step": 1300 }, { "epoch": 0.8201523140011716, "grad_norm": 57.13395690917969, "learning_rate": 9.219145776923749e-05, "loss": 2.0512, "step": 1400 }, { "epoch": 0.8787346221441125, "grad_norm": 29.889575958251953, "learning_rate": 9.131801904096428e-05, "loss": 2.2161, "step": 1500 }, { "epoch": 0.9373169302870533, "grad_norm": 20.80169105529785, "learning_rate": 9.044458031269108e-05, "loss": 2.4303, "step": 1600 }, { "epoch": 0.9958992384299942, "grad_norm": 43.849361419677734, "learning_rate": 8.957114158441786e-05, "loss": 2.162, "step": 1700 }, { "epoch": 1.054481546572935, "grad_norm": 19.961891174316406, "learning_rate": 8.869770285614464e-05, "loss": 1.8381, "step": 1800 }, { "epoch": 1.1130638547158758, "grad_norm": 25.00478744506836, "learning_rate": 8.782426412787143e-05, "loss": 1.8102, "step": 1900 }, { "epoch": 1.1716461628588166, "grad_norm": 23.878835678100586, "learning_rate": 8.695082539959822e-05, "loss": 1.7057, "step": 2000 }, { "epoch": 1.1716461628588166, "eval_loss": 0.09117516130208969, "eval_runtime": 145.9897, "eval_samples_per_second": 3.425, "eval_steps_per_second": 0.432, "eval_wer": 0.2002652519893899, "step": 2000 }, { "epoch": 1.2302284710017575, "grad_norm": 36.49687957763672, "learning_rate": 8.607738667132501e-05, "loss": 1.7356, "step": 2100 }, { "epoch": 1.2888107791446983, "grad_norm": 23.07692527770996, "learning_rate": 8.52039479430518e-05, "loss": 1.6766, "step": 2200 }, { "epoch": 1.3473930872876392, "grad_norm": 23.606229782104492, "learning_rate": 8.433050921477858e-05, "loss": 1.7865, "step": 2300 }, { "epoch": 1.40597539543058, "grad_norm": 12.508922576904297, "learning_rate": 8.345707048650537e-05, "loss": 1.7503, "step": 2400 }, { "epoch": 1.4645577035735209, "grad_norm": 15.673484802246094, "learning_rate": 8.258363175823216e-05, "loss": 1.7208, "step": 2500 }, { "epoch": 1.5231400117164617, "grad_norm": 33.59520721435547, "learning_rate": 8.171019302995895e-05, "loss": 1.7469, "step": 2600 }, { "epoch": 1.5817223198594026, "grad_norm": 15.388129234313965, "learning_rate": 8.083675430168574e-05, "loss": 1.7075, "step": 2700 }, { "epoch": 1.6403046280023434, "grad_norm": 11.93837833404541, "learning_rate": 7.996331557341253e-05, "loss": 1.696, "step": 2800 }, { "epoch": 1.698886936145284, "grad_norm": 25.31481170654297, "learning_rate": 7.908987684513932e-05, "loss": 1.7738, "step": 2900 }, { "epoch": 1.757469244288225, "grad_norm": 38.03368377685547, "learning_rate": 7.821643811686611e-05, "loss": 1.7162, "step": 3000 }, { "epoch": 1.757469244288225, "eval_loss": 0.09119272977113724, "eval_runtime": 150.3416, "eval_samples_per_second": 3.326, "eval_steps_per_second": 0.419, "eval_wer": 0.20601237842617154, "step": 3000 }, { "epoch": 1.8160515524311658, "grad_norm": 15.58292293548584, "learning_rate": 7.73429993885929e-05, "loss": 1.6062, "step": 3100 }, { "epoch": 1.8746338605741066, "grad_norm": 73.762451171875, "learning_rate": 7.646956066031969e-05, "loss": 1.6992, "step": 3200 }, { "epoch": 1.9332161687170475, "grad_norm": 26.838842391967773, "learning_rate": 7.559612193204648e-05, "loss": 1.7229, "step": 3300 }, { "epoch": 1.9917984768599881, "grad_norm": 22.824892044067383, "learning_rate": 7.472268320377325e-05, "loss": 1.7185, "step": 3400 }, { "epoch": 2.050380785002929, "grad_norm": 13.238216400146484, "learning_rate": 7.384924447550004e-05, "loss": 1.4583, "step": 3500 }, { "epoch": 2.10896309314587, "grad_norm": 32.13274383544922, "learning_rate": 7.297580574722683e-05, "loss": 1.3799, "step": 3600 }, { "epoch": 2.1675454012888107, "grad_norm": 13.44641399383545, "learning_rate": 7.210236701895362e-05, "loss": 1.3713, "step": 3700 }, { "epoch": 2.2261277094317515, "grad_norm": 19.05823516845703, "learning_rate": 7.122892829068041e-05, "loss": 1.4489, "step": 3800 }, { "epoch": 2.2847100175746924, "grad_norm": 16.24590492248535, "learning_rate": 7.03554895624072e-05, "loss": 1.3858, "step": 3900 }, { "epoch": 2.3432923257176332, "grad_norm": 14.049396514892578, "learning_rate": 6.948205083413399e-05, "loss": 1.4996, "step": 4000 }, { "epoch": 2.3432923257176332, "eval_loss": 0.09011241793632507, "eval_runtime": 149.4815, "eval_samples_per_second": 3.345, "eval_steps_per_second": 0.421, "eval_wer": 0.20468611847922194, "step": 4000 }, { "epoch": 2.401874633860574, "grad_norm": 16.26442527770996, "learning_rate": 6.860861210586078e-05, "loss": 1.4692, "step": 4100 }, { "epoch": 2.460456942003515, "grad_norm": 10.367189407348633, "learning_rate": 6.773517337758757e-05, "loss": 1.3203, "step": 4200 }, { "epoch": 2.519039250146456, "grad_norm": 21.82206153869629, "learning_rate": 6.686173464931436e-05, "loss": 1.5161, "step": 4300 }, { "epoch": 2.5776215582893967, "grad_norm": 10.586897850036621, "learning_rate": 6.598829592104115e-05, "loss": 1.3843, "step": 4400 }, { "epoch": 2.6362038664323375, "grad_norm": 18.108095169067383, "learning_rate": 6.511485719276793e-05, "loss": 1.446, "step": 4500 }, { "epoch": 2.6947861745752784, "grad_norm": 17.231735229492188, "learning_rate": 6.424141846449472e-05, "loss": 1.4083, "step": 4600 }, { "epoch": 2.753368482718219, "grad_norm": 8.833962440490723, "learning_rate": 6.33679797362215e-05, "loss": 1.4668, "step": 4700 }, { "epoch": 2.81195079086116, "grad_norm": 14.335036277770996, "learning_rate": 6.24945410079483e-05, "loss": 1.3906, "step": 4800 }, { "epoch": 2.870533099004101, "grad_norm": Infinity, "learning_rate": 6.162110227967508e-05, "loss": 1.4241, "step": 4900 }, { "epoch": 2.9291154071470418, "grad_norm": 25.987884521484375, "learning_rate": 6.075639793868461e-05, "loss": 1.3942, "step": 5000 }, { "epoch": 2.9291154071470418, "eval_loss": 0.0883156806230545, "eval_runtime": 147.2451, "eval_samples_per_second": 3.396, "eval_steps_per_second": 0.428, "eval_wer": 0.19510757441791923, "step": 5000 }, { "epoch": 2.9876977152899826, "grad_norm": 17.336523056030273, "learning_rate": 5.988295921041139e-05, "loss": 1.4093, "step": 5100 }, { "epoch": 3.0462800234329235, "grad_norm": 21.30254364013672, "learning_rate": 5.900952048213818e-05, "loss": 1.3729, "step": 5200 }, { "epoch": 3.104862331575864, "grad_norm": 15.282761573791504, "learning_rate": 5.813608175386497e-05, "loss": 1.3091, "step": 5300 }, { "epoch": 3.1634446397188047, "grad_norm": 11.485124588012695, "learning_rate": 5.726264302559175e-05, "loss": 1.1577, "step": 5400 }, { "epoch": 3.2220269478617456, "grad_norm": 17.49385643005371, "learning_rate": 5.638920429731854e-05, "loss": 1.2737, "step": 5500 }, { "epoch": 3.2806092560046864, "grad_norm": 19.74750518798828, "learning_rate": 5.551576556904533e-05, "loss": 1.1946, "step": 5600 }, { "epoch": 3.3391915641476273, "grad_norm": 9.402506828308105, "learning_rate": 5.4642326840772115e-05, "loss": 1.2035, "step": 5700 }, { "epoch": 3.397773872290568, "grad_norm": 13.279162406921387, "learning_rate": 5.3768888112498904e-05, "loss": 1.2891, "step": 5800 }, { "epoch": 3.456356180433509, "grad_norm": 18.554702758789062, "learning_rate": 5.28954493842257e-05, "loss": 1.1315, "step": 5900 }, { "epoch": 3.51493848857645, "grad_norm": 10.541516304016113, "learning_rate": 5.202201065595249e-05, "loss": 1.2285, "step": 6000 }, { "epoch": 3.51493848857645, "eval_loss": 0.08758817613124847, "eval_runtime": 148.7151, "eval_samples_per_second": 3.362, "eval_steps_per_second": 0.424, "eval_wer": 0.1956970232832302, "step": 6000 }, { "epoch": 3.5735207967193907, "grad_norm": 17.15789031982422, "learning_rate": 5.114857192767928e-05, "loss": 1.278, "step": 6100 }, { "epoch": 3.6321031048623316, "grad_norm": 13.666048049926758, "learning_rate": 5.027513319940606e-05, "loss": 1.164, "step": 6200 }, { "epoch": 3.6906854130052724, "grad_norm": 14.286330223083496, "learning_rate": 4.940169447113285e-05, "loss": 1.2317, "step": 6300 }, { "epoch": 3.7492677211482133, "grad_norm": 19.070871353149414, "learning_rate": 4.852825574285964e-05, "loss": 1.131, "step": 6400 }, { "epoch": 3.807850029291154, "grad_norm": 13.184505462646484, "learning_rate": 4.7654817014586425e-05, "loss": 1.2665, "step": 6500 }, { "epoch": 3.866432337434095, "grad_norm": 16.988956451416016, "learning_rate": 4.6781378286313215e-05, "loss": 1.2145, "step": 6600 }, { "epoch": 3.925014645577036, "grad_norm": 14.714631080627441, "learning_rate": 4.590793955804001e-05, "loss": 1.2228, "step": 6700 }, { "epoch": 3.9835969537199767, "grad_norm": 15.682711601257324, "learning_rate": 4.5034500829766794e-05, "loss": 1.2137, "step": 6800 }, { "epoch": 4.042179261862917, "grad_norm": 14.943408012390137, "learning_rate": 4.4161062101493584e-05, "loss": 1.2088, "step": 6900 }, { "epoch": 4.100761570005858, "grad_norm": 9.292410850524902, "learning_rate": 4.3287623373220374e-05, "loss": 1.0637, "step": 7000 }, { "epoch": 4.100761570005858, "eval_loss": 0.08726315200328827, "eval_runtime": 149.7325, "eval_samples_per_second": 3.339, "eval_steps_per_second": 0.421, "eval_wer": 0.19201296787503683, "step": 7000 }, { "epoch": 4.159343878148799, "grad_norm": 9.017548561096191, "learning_rate": 4.241418464494716e-05, "loss": 1.0308, "step": 7100 }, { "epoch": 4.21792618629174, "grad_norm": 16.15174102783203, "learning_rate": 4.1540745916673946e-05, "loss": 1.0464, "step": 7200 }, { "epoch": 4.2765084944346805, "grad_norm": 7.939199447631836, "learning_rate": 4.0667307188400736e-05, "loss": 1.1491, "step": 7300 }, { "epoch": 4.335090802577621, "grad_norm": 11.129817008972168, "learning_rate": 3.9793868460127526e-05, "loss": 1.0738, "step": 7400 }, { "epoch": 4.393673110720562, "grad_norm": 8.50700855255127, "learning_rate": 3.8920429731854315e-05, "loss": 1.0828, "step": 7500 }, { "epoch": 4.452255418863503, "grad_norm": 11.484732627868652, "learning_rate": 3.80469910035811e-05, "loss": 1.0418, "step": 7600 }, { "epoch": 4.510837727006444, "grad_norm": 8.929669380187988, "learning_rate": 3.717355227530789e-05, "loss": 1.1051, "step": 7700 }, { "epoch": 4.569420035149385, "grad_norm": 12.994172096252441, "learning_rate": 3.630011354703468e-05, "loss": 1.0193, "step": 7800 }, { "epoch": 4.628002343292326, "grad_norm": 9.806758880615234, "learning_rate": 3.542667481876147e-05, "loss": 1.1699, "step": 7900 }, { "epoch": 4.6865846514352665, "grad_norm": 10.537009239196777, "learning_rate": 3.455323609048826e-05, "loss": 1.1144, "step": 8000 }, { "epoch": 4.6865846514352665, "eval_loss": 0.08649158477783203, "eval_runtime": 148.2812, "eval_samples_per_second": 3.372, "eval_steps_per_second": 0.425, "eval_wer": 0.1927497789566755, "step": 8000 }, { "epoch": 4.745166959578207, "grad_norm": 30.78623390197754, "learning_rate": 3.367979736221504e-05, "loss": 1.0902, "step": 8100 }, { "epoch": 4.803749267721148, "grad_norm": 9.64354419708252, "learning_rate": 3.280635863394183e-05, "loss": 0.9932, "step": 8200 }, { "epoch": 4.862331575864089, "grad_norm": 11.149614334106445, "learning_rate": 3.193291990566862e-05, "loss": 1.0389, "step": 8300 }, { "epoch": 4.92091388400703, "grad_norm": 10.836565971374512, "learning_rate": 3.10594811773954e-05, "loss": 1.0626, "step": 8400 }, { "epoch": 4.979496192149971, "grad_norm": 11.348654747009277, "learning_rate": 3.01860424491222e-05, "loss": 1.0934, "step": 8500 }, { "epoch": 5.038078500292912, "grad_norm": 8.341979026794434, "learning_rate": 2.9312603720848985e-05, "loss": 1.0845, "step": 8600 }, { "epoch": 5.0966608084358525, "grad_norm": 9.784319877624512, "learning_rate": 2.8439164992575774e-05, "loss": 0.96, "step": 8700 }, { "epoch": 5.155243116578793, "grad_norm": 11.3285551071167, "learning_rate": 2.756572626430256e-05, "loss": 0.9211, "step": 8800 }, { "epoch": 5.213825424721734, "grad_norm": 8.36048698425293, "learning_rate": 2.6692287536029347e-05, "loss": 0.9652, "step": 8900 }, { "epoch": 5.272407732864675, "grad_norm": 8.087980270385742, "learning_rate": 2.5818848807756137e-05, "loss": 1.0164, "step": 9000 }, { "epoch": 5.272407732864675, "eval_loss": 0.08577600121498108, "eval_runtime": 149.9869, "eval_samples_per_second": 3.334, "eval_steps_per_second": 0.42, "eval_wer": 0.19230769230769232, "step": 9000 }, { "epoch": 5.330990041007616, "grad_norm": 14.56843090057373, "learning_rate": 2.4954144466765657e-05, "loss": 0.9776, "step": 9100 }, { "epoch": 5.389572349150557, "grad_norm": 10.220062255859375, "learning_rate": 2.4080705738492446e-05, "loss": 0.9681, "step": 9200 }, { "epoch": 5.448154657293498, "grad_norm": 11.614462852478027, "learning_rate": 2.3207267010219233e-05, "loss": 0.9691, "step": 9300 }, { "epoch": 5.506736965436438, "grad_norm": 6.580599784851074, "learning_rate": 2.2333828281946022e-05, "loss": 0.9492, "step": 9400 }, { "epoch": 5.565319273579379, "grad_norm": 10.284950256347656, "learning_rate": 2.1460389553672812e-05, "loss": 1.0092, "step": 9500 }, { "epoch": 5.62390158172232, "grad_norm": 10.794511795043945, "learning_rate": 2.0586950825399598e-05, "loss": 0.9039, "step": 9600 }, { "epoch": 5.682483889865261, "grad_norm": 12.07039737701416, "learning_rate": 1.9713512097126388e-05, "loss": 1.0114, "step": 9700 }, { "epoch": 5.741066198008202, "grad_norm": 15.622093200683594, "learning_rate": 1.8840073368853174e-05, "loss": 0.9762, "step": 9800 }, { "epoch": 5.799648506151143, "grad_norm": 7.460862636566162, "learning_rate": 1.7966634640579964e-05, "loss": 0.974, "step": 9900 }, { "epoch": 5.8582308142940835, "grad_norm": 9.551807403564453, "learning_rate": 1.7093195912306753e-05, "loss": 0.9812, "step": 10000 }, { "epoch": 5.8582308142940835, "eval_loss": 0.08563477545976639, "eval_runtime": 148.6427, "eval_samples_per_second": 3.364, "eval_steps_per_second": 0.424, "eval_wer": 0.1940760389036251, "step": 10000 }, { "epoch": 5.916813122437024, "grad_norm": 10.54980754852295, "learning_rate": 1.621975718403354e-05, "loss": 0.9981, "step": 10100 }, { "epoch": 5.975395430579965, "grad_norm": 8.396002769470215, "learning_rate": 1.534631845576033e-05, "loss": 0.911, "step": 10200 }, { "epoch": 6.033977738722906, "grad_norm": 6.294841289520264, "learning_rate": 1.4472879727487117e-05, "loss": 0.8886, "step": 10300 }, { "epoch": 6.092560046865847, "grad_norm": 10.276989936828613, "learning_rate": 1.3599440999213905e-05, "loss": 0.8764, "step": 10400 }, { "epoch": 6.151142355008787, "grad_norm": 9.27648639678955, "learning_rate": 1.2726002270940693e-05, "loss": 0.8728, "step": 10500 }, { "epoch": 6.209724663151728, "grad_norm": 10.15081787109375, "learning_rate": 1.1852563542667483e-05, "loss": 0.9008, "step": 10600 }, { "epoch": 6.268306971294669, "grad_norm": 7.828310012817383, "learning_rate": 1.0979124814394271e-05, "loss": 0.8703, "step": 10700 }, { "epoch": 6.3268892794376095, "grad_norm": 8.747062683105469, "learning_rate": 1.0105686086121059e-05, "loss": 0.9343, "step": 10800 }, { "epoch": 6.38547158758055, "grad_norm": 7.199453830718994, "learning_rate": 9.232247357847849e-06, "loss": 0.9144, "step": 10900 }, { "epoch": 6.444053895723491, "grad_norm": 7.865077972412109, "learning_rate": 8.358808629574635e-06, "loss": 0.8927, "step": 11000 }, { "epoch": 6.444053895723491, "eval_loss": 0.08493725210428238, "eval_runtime": 152.514, "eval_samples_per_second": 3.278, "eval_steps_per_second": 0.413, "eval_wer": 0.20173887415266725, "step": 11000 }, { "epoch": 6.502636203866432, "grad_norm": 8.560958862304688, "learning_rate": 7.485369901301424e-06, "loss": 0.8994, "step": 11100 }, { "epoch": 6.561218512009373, "grad_norm": 8.679183006286621, "learning_rate": 6.611931173028213e-06, "loss": 0.9121, "step": 11200 }, { "epoch": 6.619800820152314, "grad_norm": 7.480246067047119, "learning_rate": 5.7384924447550014e-06, "loss": 0.9015, "step": 11300 }, { "epoch": 6.678383128295255, "grad_norm": 8.197961807250977, "learning_rate": 4.865053716481789e-06, "loss": 0.8958, "step": 11400 }, { "epoch": 6.7369654364381955, "grad_norm": 9.989555358886719, "learning_rate": 3.991614988208577e-06, "loss": 0.8816, "step": 11500 }, { "epoch": 6.795547744581136, "grad_norm": 11.78941535949707, "learning_rate": 3.118176259935366e-06, "loss": 0.8848, "step": 11600 }, { "epoch": 6.854130052724077, "grad_norm": 8.92465591430664, "learning_rate": 2.244737531662154e-06, "loss": 0.8758, "step": 11700 }, { "epoch": 6.912712360867018, "grad_norm": 5.927036762237549, "learning_rate": 1.3712988033889424e-06, "loss": 0.8421, "step": 11800 }, { "epoch": 6.971294669009959, "grad_norm": 6.0556960105896, "learning_rate": 4.978600751157307e-07, "loss": 0.8566, "step": 11900 }, { "epoch": 7.0298769771529, "grad_norm": 8.540233612060547, "learning_rate": 3.064574532287266e-05, "loss": 0.8936, "step": 12000 }, { "epoch": 7.0298769771529, "eval_loss": 0.08440528064966202, "eval_runtime": 151.0062, "eval_samples_per_second": 3.311, "eval_steps_per_second": 0.417, "eval_wer": 0.19613910993221337, "step": 12000 }, { "epoch": 7.088459285295841, "grad_norm": 8.703925132751465, "learning_rate": 3.0042245021122513e-05, "loss": 0.9237, "step": 12100 }, { "epoch": 7.147041593438781, "grad_norm": 11.286332130432129, "learning_rate": 2.9438744719372363e-05, "loss": 0.917, "step": 12200 }, { "epoch": 7.205623901581722, "grad_norm": 8.638763427734375, "learning_rate": 2.884127942063971e-05, "loss": 0.88, "step": 12300 }, { "epoch": 7.264206209724663, "grad_norm": 9.697504043579102, "learning_rate": 2.8237779118889563e-05, "loss": 0.9342, "step": 12400 }, { "epoch": 7.322788517867604, "grad_norm": 8.06286334991455, "learning_rate": 2.7634278817139407e-05, "loss": 0.8979, "step": 12500 }, { "epoch": 7.381370826010545, "grad_norm": 15.282951354980469, "learning_rate": 2.703077851538926e-05, "loss": 0.9201, "step": 12600 }, { "epoch": 7.439953134153486, "grad_norm": 11.369593620300293, "learning_rate": 2.6427278213639107e-05, "loss": 0.9538, "step": 12700 }, { "epoch": 7.4985354422964265, "grad_norm": 10.077091217041016, "learning_rate": 2.5823777911888958e-05, "loss": 0.9807, "step": 12800 }, { "epoch": 7.557117750439367, "grad_norm": 9.917128562927246, "learning_rate": 2.5220277610138805e-05, "loss": 0.939, "step": 12900 }, { "epoch": 7.615700058582308, "grad_norm": 10.68909740447998, "learning_rate": 2.4616777308388655e-05, "loss": 0.8718, "step": 13000 }, { "epoch": 7.615700058582308, "eval_loss": 0.08539500832557678, "eval_runtime": 149.7688, "eval_samples_per_second": 3.338, "eval_steps_per_second": 0.421, "eval_wer": 0.19790745652814618, "step": 13000 }, { "epoch": 7.674282366725249, "grad_norm": 8.260842323303223, "learning_rate": 2.4013277006638505e-05, "loss": 0.9048, "step": 13100 }, { "epoch": 7.73286467486819, "grad_norm": 16.34197235107422, "learning_rate": 2.3409776704888352e-05, "loss": 0.9262, "step": 13200 }, { "epoch": 7.791446983011131, "grad_norm": 8.024565696716309, "learning_rate": 2.2806276403138202e-05, "loss": 0.9952, "step": 13300 }, { "epoch": 7.850029291154072, "grad_norm": 7.884005069732666, "learning_rate": 2.220277610138805e-05, "loss": 0.9656, "step": 13400 }, { "epoch": 7.9086115992970125, "grad_norm": 8.73161506652832, "learning_rate": 2.15992757996379e-05, "loss": 0.9205, "step": 13500 }, { "epoch": 7.967193907439953, "grad_norm": 9.00133991241455, "learning_rate": 2.099577549788775e-05, "loss": 0.9347, "step": 13600 }, { "epoch": 8.025776215582894, "grad_norm": 6.850646495819092, "learning_rate": 2.03922751961376e-05, "loss": 0.8468, "step": 13700 }, { "epoch": 8.084358523725834, "grad_norm": 11.7725248336792, "learning_rate": 1.978877489438745e-05, "loss": 0.8948, "step": 13800 }, { "epoch": 8.142940831868776, "grad_norm": 6.4703474044799805, "learning_rate": 1.9185274592637298e-05, "loss": 0.8727, "step": 13900 }, { "epoch": 8.201523140011716, "grad_norm": 15.587645530700684, "learning_rate": 1.8581774290887148e-05, "loss": 0.9019, "step": 14000 }, { "epoch": 8.201523140011716, "eval_loss": 0.08469171822071075, "eval_runtime": 147.0496, "eval_samples_per_second": 3.4, "eval_steps_per_second": 0.428, "eval_wer": 0.18538166814028884, "step": 14000 }, { "epoch": 8.260105448154658, "grad_norm": 7.585418701171875, "learning_rate": 1.7978273989136995e-05, "loss": 0.8818, "step": 14100 }, { "epoch": 8.318687756297598, "grad_norm": 9.436836242675781, "learning_rate": 1.7374773687386845e-05, "loss": 0.8864, "step": 14200 }, { "epoch": 8.37727006444054, "grad_norm": 12.12936019897461, "learning_rate": 1.6771273385636692e-05, "loss": 0.8744, "step": 14300 }, { "epoch": 8.43585237258348, "grad_norm": 11.584985733032227, "learning_rate": 1.6167773083886542e-05, "loss": 0.8542, "step": 14400 }, { "epoch": 8.494434680726421, "grad_norm": 7.6883440017700195, "learning_rate": 1.5564272782136393e-05, "loss": 0.8714, "step": 14500 }, { "epoch": 8.553016988869361, "grad_norm": 13.686609268188477, "learning_rate": 1.496077248038624e-05, "loss": 0.8726, "step": 14600 }, { "epoch": 8.611599297012303, "grad_norm": 10.200602531433105, "learning_rate": 1.436330718165359e-05, "loss": 0.8839, "step": 14700 }, { "epoch": 8.670181605155243, "grad_norm": 6.929018020629883, "learning_rate": 1.3759806879903441e-05, "loss": 0.8628, "step": 14800 }, { "epoch": 8.728763913298184, "grad_norm": 9.72988224029541, "learning_rate": 1.3156306578153291e-05, "loss": 0.8672, "step": 14900 }, { "epoch": 8.787346221441124, "grad_norm": 7.273561477661133, "learning_rate": 1.255280627640314e-05, "loss": 0.8293, "step": 15000 }, { "epoch": 8.787346221441124, "eval_loss": 0.08473628014326096, "eval_runtime": 147.7929, "eval_samples_per_second": 3.383, "eval_steps_per_second": 0.426, "eval_wer": 0.19834954317712938, "step": 15000 }, { "epoch": 8.845928529584066, "grad_norm": 6.900521278381348, "learning_rate": 1.1949305974652989e-05, "loss": 0.8797, "step": 15100 }, { "epoch": 8.904510837727006, "grad_norm": 13.14035701751709, "learning_rate": 1.1345805672902837e-05, "loss": 0.8691, "step": 15200 }, { "epoch": 8.963093145869948, "grad_norm": 6.80872106552124, "learning_rate": 1.0742305371152686e-05, "loss": 0.859, "step": 15300 }, { "epoch": 9.021675454012888, "grad_norm": 5.7985520362854, "learning_rate": 1.0138805069402535e-05, "loss": 0.8905, "step": 15400 }, { "epoch": 9.08025776215583, "grad_norm": 7.384444236755371, "learning_rate": 9.535304767652383e-06, "loss": 0.7981, "step": 15500 }, { "epoch": 9.13884007029877, "grad_norm": 6.441751956939697, "learning_rate": 8.931804465902233e-06, "loss": 0.9026, "step": 15600 }, { "epoch": 9.197422378441711, "grad_norm": 5.471485614776611, "learning_rate": 8.328304164152082e-06, "loss": 0.8357, "step": 15700 }, { "epoch": 9.256004686584651, "grad_norm": 6.093921661376953, "learning_rate": 7.724803862401932e-06, "loss": 0.8427, "step": 15800 }, { "epoch": 9.314586994727593, "grad_norm": 5.414072036743164, "learning_rate": 7.121303560651781e-06, "loss": 0.8235, "step": 15900 }, { "epoch": 9.373169302870533, "grad_norm": 5.2771897315979, "learning_rate": 6.5178032589016296e-06, "loss": 0.8363, "step": 16000 }, { "epoch": 9.373169302870533, "eval_loss": 0.08421996235847473, "eval_runtime": 150.6901, "eval_samples_per_second": 3.318, "eval_steps_per_second": 0.418, "eval_wer": 0.19820218096080164, "step": 16000 }, { "epoch": 9.431751611013475, "grad_norm": 5.505492687225342, "learning_rate": 5.914302957151479e-06, "loss": 0.7991, "step": 16100 }, { "epoch": 9.490333919156415, "grad_norm": 6.690750598907471, "learning_rate": 5.310802655401328e-06, "loss": 0.8071, "step": 16200 }, { "epoch": 9.548916227299356, "grad_norm": 6.654877185821533, "learning_rate": 4.707302353651177e-06, "loss": 0.834, "step": 16300 }, { "epoch": 9.607498535442296, "grad_norm": 9.937077522277832, "learning_rate": 4.1038020519010266e-06, "loss": 0.8055, "step": 16400 }, { "epoch": 9.666080843585238, "grad_norm": 6.015642166137695, "learning_rate": 3.500301750150875e-06, "loss": 0.8734, "step": 16500 }, { "epoch": 9.724663151728178, "grad_norm": 12.434464454650879, "learning_rate": 2.896801448400724e-06, "loss": 0.8544, "step": 16600 }, { "epoch": 9.783245459871118, "grad_norm": 6.330708980560303, "learning_rate": 2.2933011466505732e-06, "loss": 0.798, "step": 16700 }, { "epoch": 9.84182776801406, "grad_norm": 5.820682048797607, "learning_rate": 1.6898008449004227e-06, "loss": 0.7989, "step": 16800 }, { "epoch": 9.900410076157002, "grad_norm": 8.209725379943848, "learning_rate": 1.0863005431502715e-06, "loss": 0.8454, "step": 16900 }, { "epoch": 9.958992384299941, "grad_norm": 10.676623344421387, "learning_rate": 4.828002414001208e-07, "loss": 0.8034, "step": 17000 }, { "epoch": 9.958992384299941, "eval_loss": 0.08400186896324158, "eval_runtime": 148.9273, "eval_samples_per_second": 3.357, "eval_steps_per_second": 0.423, "eval_wer": 0.197465369879163, "step": 17000 }, { "epoch": 10.017574692442881, "grad_norm": 8.057076454162598, "learning_rate": 3.8124720274503957e-05, "loss": 0.8553, "step": 17100 }, { "epoch": 10.076157000585823, "grad_norm": 6.9262895584106445, "learning_rate": 3.7751752946441895e-05, "loss": 0.8579, "step": 17200 }, { "epoch": 10.134739308728763, "grad_norm": 7.844761371612549, "learning_rate": 3.737878561837983e-05, "loss": 0.8764, "step": 17300 }, { "epoch": 10.193321616871705, "grad_norm": 17.013307571411133, "learning_rate": 3.700954796359839e-05, "loss": 0.9679, "step": 17400 }, { "epoch": 10.251903925014645, "grad_norm": 10.550113677978516, "learning_rate": 3.663658063553633e-05, "loss": 0.8919, "step": 17500 }, { "epoch": 10.310486233157587, "grad_norm": 7.075886249542236, "learning_rate": 3.6263613307474266e-05, "loss": 0.9601, "step": 17600 }, { "epoch": 10.369068541300527, "grad_norm": 10.027517318725586, "learning_rate": 3.5890645979412204e-05, "loss": 0.8596, "step": 17700 }, { "epoch": 10.427650849443468, "grad_norm": 5.811990261077881, "learning_rate": 3.551767865135014e-05, "loss": 0.9023, "step": 17800 }, { "epoch": 10.486233157586408, "grad_norm": 12.586297988891602, "learning_rate": 3.514471132328808e-05, "loss": 0.93, "step": 17900 }, { "epoch": 10.54481546572935, "grad_norm": 9.313389778137207, "learning_rate": 3.477174399522602e-05, "loss": 0.8462, "step": 18000 }, { "epoch": 10.54481546572935, "eval_loss": 0.08548491448163986, "eval_runtime": 156.5497, "eval_samples_per_second": 3.194, "eval_steps_per_second": 0.402, "eval_wer": 0.19525493663424698, "step": 18000 }, { "epoch": 10.60339777387229, "grad_norm": 9.245261192321777, "learning_rate": 3.439877666716396e-05, "loss": 0.9351, "step": 18100 }, { "epoch": 10.661980082015232, "grad_norm": 7.513446807861328, "learning_rate": 3.4025809339101895e-05, "loss": 0.8722, "step": 18200 }, { "epoch": 10.720562390158172, "grad_norm": 12.897968292236328, "learning_rate": 3.365284201103984e-05, "loss": 0.9576, "step": 18300 }, { "epoch": 10.779144698301113, "grad_norm": 9.550192832946777, "learning_rate": 3.327987468297777e-05, "loss": 0.9241, "step": 18400 }, { "epoch": 10.837727006444053, "grad_norm": 6.644899368286133, "learning_rate": 3.290690735491571e-05, "loss": 0.9291, "step": 18500 }, { "epoch": 10.896309314586995, "grad_norm": 9.285797119140625, "learning_rate": 3.253394002685365e-05, "loss": 0.8971, "step": 18600 }, { "epoch": 10.954891622729935, "grad_norm": 8.316353797912598, "learning_rate": 3.2160972698791586e-05, "loss": 0.8892, "step": 18700 }, { "epoch": 11.013473930872877, "grad_norm": 17.100173950195312, "learning_rate": 3.1788005370729524e-05, "loss": 0.8732, "step": 18800 }, { "epoch": 11.072056239015817, "grad_norm": 19.12342643737793, "learning_rate": 3.141503804266747e-05, "loss": 0.8663, "step": 18900 }, { "epoch": 11.130638547158759, "grad_norm": 7.627189636230469, "learning_rate": 3.10420707146054e-05, "loss": 0.8824, "step": 19000 }, { "epoch": 11.130638547158759, "eval_loss": 0.08482780307531357, "eval_runtime": 146.6345, "eval_samples_per_second": 3.41, "eval_steps_per_second": 0.43, "eval_wer": 0.19304450338933096, "step": 19000 }, { "epoch": 11.189220855301699, "grad_norm": 7.1925950050354, "learning_rate": 3.066910338654334e-05, "loss": 0.8375, "step": 19100 }, { "epoch": 11.24780316344464, "grad_norm": 8.55908489227295, "learning_rate": 3.0296136058481277e-05, "loss": 0.8335, "step": 19200 }, { "epoch": 11.30638547158758, "grad_norm": 15.089740753173828, "learning_rate": 2.9923168730419215e-05, "loss": 0.9117, "step": 19300 }, { "epoch": 11.364967779730522, "grad_norm": 8.451448440551758, "learning_rate": 2.9550201402357153e-05, "loss": 0.8586, "step": 19400 }, { "epoch": 11.423550087873462, "grad_norm": 7.994997501373291, "learning_rate": 2.9177234074295095e-05, "loss": 0.8459, "step": 19500 }, { "epoch": 11.482132396016404, "grad_norm": 8.656350135803223, "learning_rate": 2.8804266746233033e-05, "loss": 0.9268, "step": 19600 }, { "epoch": 11.540714704159344, "grad_norm": 10.245903968811035, "learning_rate": 2.843129941817097e-05, "loss": 0.8657, "step": 19700 }, { "epoch": 11.599297012302285, "grad_norm": 8.758448600769043, "learning_rate": 2.806206176338953e-05, "loss": 0.8875, "step": 19800 }, { "epoch": 11.657879320445225, "grad_norm": 6.345497131347656, "learning_rate": 2.7689094435327466e-05, "loss": 0.9115, "step": 19900 }, { "epoch": 11.716461628588167, "grad_norm": 9.602129936218262, "learning_rate": 2.7316127107265404e-05, "loss": 0.8591, "step": 20000 }, { "epoch": 11.716461628588167, "eval_loss": 0.0848940759897232, "eval_runtime": 147.8483, "eval_samples_per_second": 3.382, "eval_steps_per_second": 0.426, "eval_wer": 0.18376068376068377, "step": 20000 }, { "epoch": 11.775043936731107, "grad_norm": 10.147231101989746, "learning_rate": 2.6943159779203343e-05, "loss": 0.8628, "step": 20100 }, { "epoch": 11.833626244874049, "grad_norm": 7.136846542358398, "learning_rate": 2.657019245114128e-05, "loss": 0.9034, "step": 20200 }, { "epoch": 11.892208553016989, "grad_norm": 11.765522003173828, "learning_rate": 2.6197225123079222e-05, "loss": 0.8635, "step": 20300 }, { "epoch": 11.95079086115993, "grad_norm": 7.662530422210693, "learning_rate": 2.582425779501716e-05, "loss": 0.8561, "step": 20400 }, { "epoch": 12.00937316930287, "grad_norm": 10.559505462646484, "learning_rate": 2.5451290466955095e-05, "loss": 0.8319, "step": 20500 }, { "epoch": 12.067955477445812, "grad_norm": 6.208855152130127, "learning_rate": 2.5078323138893034e-05, "loss": 0.7679, "step": 20600 }, { "epoch": 12.126537785588752, "grad_norm": 10.877766609191895, "learning_rate": 2.4705355810830972e-05, "loss": 0.8484, "step": 20700 }, { "epoch": 12.185120093731694, "grad_norm": 8.098186492919922, "learning_rate": 2.433238848276891e-05, "loss": 0.8644, "step": 20800 }, { "epoch": 12.243702401874634, "grad_norm": 7.471461772918701, "learning_rate": 2.3959421154706848e-05, "loss": 0.8372, "step": 20900 }, { "epoch": 12.302284710017574, "grad_norm": 10.694164276123047, "learning_rate": 2.3586453826644786e-05, "loss": 0.8339, "step": 21000 }, { "epoch": 12.302284710017574, "eval_loss": 0.08417258411645889, "eval_runtime": 146.3017, "eval_samples_per_second": 3.418, "eval_steps_per_second": 0.431, "eval_wer": 0.18626584143825523, "step": 21000 }, { "epoch": 12.360867018160516, "grad_norm": 8.07205581665039, "learning_rate": 2.3213486498582724e-05, "loss": 0.7901, "step": 21100 }, { "epoch": 12.419449326303456, "grad_norm": 10.554586410522461, "learning_rate": 2.2840519170520663e-05, "loss": 0.8117, "step": 21200 }, { "epoch": 12.478031634446397, "grad_norm": 9.55418872833252, "learning_rate": 2.24675518424586e-05, "loss": 0.7946, "step": 21300 }, { "epoch": 12.536613942589337, "grad_norm": 8.738641738891602, "learning_rate": 2.2094584514396542e-05, "loss": 0.8026, "step": 21400 }, { "epoch": 12.595196250732279, "grad_norm": 9.077950477600098, "learning_rate": 2.1721617186334477e-05, "loss": 0.8102, "step": 21500 }, { "epoch": 12.653778558875219, "grad_norm": 7.797760009765625, "learning_rate": 2.1348649858272415e-05, "loss": 0.8608, "step": 21600 }, { "epoch": 12.71236086701816, "grad_norm": 7.361778736114502, "learning_rate": 2.0975682530210357e-05, "loss": 0.8448, "step": 21700 }, { "epoch": 12.7709431751611, "grad_norm": 5.896770477294922, "learning_rate": 2.0602715202148292e-05, "loss": 0.8394, "step": 21800 }, { "epoch": 12.829525483304042, "grad_norm": 11.283666610717773, "learning_rate": 2.0233477547366852e-05, "loss": 0.8326, "step": 21900 }, { "epoch": 12.888107791446982, "grad_norm": 8.800780296325684, "learning_rate": 1.986051021930479e-05, "loss": 0.8573, "step": 22000 }, { "epoch": 12.888107791446982, "eval_loss": 0.08360177278518677, "eval_runtime": 148.5908, "eval_samples_per_second": 3.365, "eval_steps_per_second": 0.424, "eval_wer": 0.19260241674034778, "step": 22000 }, { "epoch": 12.946690099589924, "grad_norm": 7.05850887298584, "learning_rate": 1.948754289124273e-05, "loss": 0.7926, "step": 22100 }, { "epoch": 13.005272407732864, "grad_norm": 7.339128017425537, "learning_rate": 1.9114575563180667e-05, "loss": 0.8611, "step": 22200 }, { "epoch": 13.063854715875806, "grad_norm": 7.789575576782227, "learning_rate": 1.8741608235118605e-05, "loss": 0.8006, "step": 22300 }, { "epoch": 13.122437024018746, "grad_norm": 10.398506164550781, "learning_rate": 1.8368640907056543e-05, "loss": 0.8397, "step": 22400 }, { "epoch": 13.181019332161688, "grad_norm": 8.361679077148438, "learning_rate": 1.799567357899448e-05, "loss": 0.8027, "step": 22500 }, { "epoch": 13.239601640304627, "grad_norm": 8.431894302368164, "learning_rate": 1.762270625093242e-05, "loss": 0.824, "step": 22600 }, { "epoch": 13.29818394844757, "grad_norm": 5.742968559265137, "learning_rate": 1.7249738922870357e-05, "loss": 0.792, "step": 22700 }, { "epoch": 13.35676625659051, "grad_norm": 13.589301109313965, "learning_rate": 1.6876771594808296e-05, "loss": 0.7701, "step": 22800 }, { "epoch": 13.415348564733451, "grad_norm": 6.760063171386719, "learning_rate": 1.6503804266746234e-05, "loss": 0.7756, "step": 22900 }, { "epoch": 13.473930872876391, "grad_norm": 8.249403953552246, "learning_rate": 1.6130836938684172e-05, "loss": 0.7445, "step": 23000 }, { "epoch": 13.473930872876391, "eval_loss": 0.08392482995986938, "eval_runtime": 146.723, "eval_samples_per_second": 3.408, "eval_steps_per_second": 0.429, "eval_wer": 0.18420277040966695, "step": 23000 }, { "epoch": 13.532513181019333, "grad_norm": 6.522210597991943, "learning_rate": 1.575786961062211e-05, "loss": 0.7743, "step": 23100 }, { "epoch": 13.591095489162273, "grad_norm": 6.639892101287842, "learning_rate": 1.538490228256005e-05, "loss": 0.8147, "step": 23200 }, { "epoch": 13.649677797305214, "grad_norm": 6.0167999267578125, "learning_rate": 1.5011934954497987e-05, "loss": 0.781, "step": 23300 }, { "epoch": 13.708260105448154, "grad_norm": 9.794026374816895, "learning_rate": 1.4638967626435926e-05, "loss": 0.771, "step": 23400 }, { "epoch": 13.766842413591096, "grad_norm": 8.01543140411377, "learning_rate": 1.4266000298373863e-05, "loss": 0.7812, "step": 23500 }, { "epoch": 13.825424721734036, "grad_norm": 10.331818580627441, "learning_rate": 1.3893032970311801e-05, "loss": 0.7864, "step": 23600 }, { "epoch": 13.884007029876978, "grad_norm": 8.055398941040039, "learning_rate": 1.3520065642249741e-05, "loss": 0.7991, "step": 23700 }, { "epoch": 13.942589338019918, "grad_norm": 8.330449104309082, "learning_rate": 1.3147098314187677e-05, "loss": 0.8088, "step": 23800 }, { "epoch": 14.00117164616286, "grad_norm": 6.869382381439209, "learning_rate": 1.2774130986125616e-05, "loss": 0.76, "step": 23900 }, { "epoch": 14.0597539543058, "grad_norm": 6.649117469787598, "learning_rate": 1.2401163658063554e-05, "loss": 0.783, "step": 24000 }, { "epoch": 14.0597539543058, "eval_loss": 0.08357907831668854, "eval_runtime": 147.3445, "eval_samples_per_second": 3.393, "eval_steps_per_second": 0.428, "eval_wer": 0.18420277040966695, "step": 24000 }, { "epoch": 14.118336262448741, "grad_norm": 8.647706031799316, "learning_rate": 1.2028196330001492e-05, "loss": 0.7496, "step": 24100 }, { "epoch": 14.176918570591681, "grad_norm": 8.944561004638672, "learning_rate": 1.165522900193943e-05, "loss": 0.7662, "step": 24200 }, { "epoch": 14.235500878734623, "grad_norm": 8.365220069885254, "learning_rate": 1.1282261673877368e-05, "loss": 0.7645, "step": 24300 }, { "epoch": 14.294083186877563, "grad_norm": 9.97271728515625, "learning_rate": 1.0909294345815308e-05, "loss": 0.74, "step": 24400 }, { "epoch": 14.352665495020505, "grad_norm": 5.968284606933594, "learning_rate": 1.0536327017753245e-05, "loss": 0.7875, "step": 24500 }, { "epoch": 14.411247803163445, "grad_norm": 6.4041008949279785, "learning_rate": 1.0163359689691183e-05, "loss": 0.7557, "step": 24600 }, { "epoch": 14.469830111306386, "grad_norm": 6.7281036376953125, "learning_rate": 9.790392361629123e-06, "loss": 0.7478, "step": 24700 }, { "epoch": 14.528412419449326, "grad_norm": 9.13178539276123, "learning_rate": 9.41742503356706e-06, "loss": 0.7528, "step": 24800 }, { "epoch": 14.586994727592266, "grad_norm": 7.79683780670166, "learning_rate": 9.044457705504997e-06, "loss": 0.805, "step": 24900 }, { "epoch": 14.645577035735208, "grad_norm": 6.039112567901611, "learning_rate": 8.671490377442937e-06, "loss": 0.7263, "step": 25000 }, { "epoch": 14.645577035735208, "eval_loss": 0.08391948789358139, "eval_runtime": 147.8059, "eval_samples_per_second": 3.383, "eval_steps_per_second": 0.426, "eval_wer": 0.18243442381373415, "step": 25000 }, { "epoch": 14.70415934387815, "grad_norm": 7.50616979598999, "learning_rate": 8.298523049380874e-06, "loss": 0.7722, "step": 25100 }, { "epoch": 14.76274165202109, "grad_norm": 6.403426647186279, "learning_rate": 7.925555721318812e-06, "loss": 0.7742, "step": 25200 }, { "epoch": 14.82132396016403, "grad_norm": 7.445984363555908, "learning_rate": 7.556318066537371e-06, "loss": 0.7505, "step": 25300 }, { "epoch": 14.879906268306971, "grad_norm": 7.770444869995117, "learning_rate": 7.1833507384753095e-06, "loss": 0.7524, "step": 25400 }, { "epoch": 14.938488576449911, "grad_norm": 6.477992057800293, "learning_rate": 6.8103834104132485e-06, "loss": 0.7477, "step": 25500 }, { "epoch": 14.997070884592853, "grad_norm": 10.539923667907715, "learning_rate": 6.437416082351186e-06, "loss": 0.7285, "step": 25600 }, { "epoch": 15.055653192735793, "grad_norm": 6.943923473358154, "learning_rate": 6.064448754289125e-06, "loss": 0.6959, "step": 25700 }, { "epoch": 15.114235500878735, "grad_norm": 4.48841667175293, "learning_rate": 5.691481426227062e-06, "loss": 0.7265, "step": 25800 }, { "epoch": 15.172817809021675, "grad_norm": 5.900568008422852, "learning_rate": 5.318514098165001e-06, "loss": 0.765, "step": 25900 }, { "epoch": 15.231400117164617, "grad_norm": 5.62386417388916, "learning_rate": 4.9455467701029394e-06, "loss": 0.7634, "step": 26000 }, { "epoch": 15.231400117164617, "eval_loss": 0.08351606130599976, "eval_runtime": 146.6151, "eval_samples_per_second": 3.41, "eval_steps_per_second": 0.43, "eval_wer": 0.1825817860300619, "step": 26000 }, { "epoch": 15.289982425307556, "grad_norm": 6.403947353363037, "learning_rate": 4.572579442040878e-06, "loss": 0.728, "step": 26100 }, { "epoch": 15.348564733450498, "grad_norm": 6.236737251281738, "learning_rate": 4.199612113978816e-06, "loss": 0.7462, "step": 26200 }, { "epoch": 15.407147041593438, "grad_norm": 5.554813385009766, "learning_rate": 3.826644785916754e-06, "loss": 0.7403, "step": 26300 }, { "epoch": 15.46572934973638, "grad_norm": 4.909285068511963, "learning_rate": 3.453677457854692e-06, "loss": 0.7653, "step": 26400 }, { "epoch": 15.52431165787932, "grad_norm": 5.502344608306885, "learning_rate": 3.0807101297926304e-06, "loss": 0.7319, "step": 26500 }, { "epoch": 15.582893966022262, "grad_norm": 7.525850772857666, "learning_rate": 2.7077428017305685e-06, "loss": 0.7295, "step": 26600 }, { "epoch": 15.641476274165202, "grad_norm": 7.245991230010986, "learning_rate": 2.3347754736685067e-06, "loss": 0.7382, "step": 26700 }, { "epoch": 15.700058582308143, "grad_norm": 5.762548923492432, "learning_rate": 1.961808145606445e-06, "loss": 0.7253, "step": 26800 }, { "epoch": 15.758640890451083, "grad_norm": 6.127166271209717, "learning_rate": 1.5888408175443833e-06, "loss": 0.7399, "step": 26900 }, { "epoch": 15.817223198594025, "grad_norm": 6.195973873138428, "learning_rate": 1.2158734894823213e-06, "loss": 0.7379, "step": 27000 }, { "epoch": 15.817223198594025, "eval_loss": 0.08341451734304428, "eval_runtime": 146.5011, "eval_samples_per_second": 3.413, "eval_steps_per_second": 0.43, "eval_wer": 0.18287651046271736, "step": 27000 }, { "epoch": 15.875805506736965, "grad_norm": 8.746485710144043, "learning_rate": 8.429061614202597e-07, "loss": 0.7027, "step": 27100 }, { "epoch": 15.934387814879907, "grad_norm": 7.414266109466553, "learning_rate": 4.699388333581979e-07, "loss": 0.7047, "step": 27200 }, { "epoch": 15.992970123022847, "grad_norm": 7.6758832931518555, "learning_rate": 1.0070117857675669e-07, "loss": 0.7324, "step": 27300 }, { "epoch": 16.05155243116579, "grad_norm": 12.168989181518555, "learning_rate": 2.7443730771306743e-05, "loss": 0.7807, "step": 27400 }, { "epoch": 16.11013473930873, "grad_norm": 5.686388969421387, "learning_rate": 2.717385437469639e-05, "loss": 0.8098, "step": 27500 }, { "epoch": 16.16871704745167, "grad_norm": 6.920952320098877, "learning_rate": 2.6903977978086036e-05, "loss": 0.7786, "step": 27600 }, { "epoch": 16.22729935559461, "grad_norm": 8.017998695373535, "learning_rate": 2.6634101581475683e-05, "loss": 0.8257, "step": 27700 }, { "epoch": 16.285881663737552, "grad_norm": 6.6050262451171875, "learning_rate": 2.6364225184865333e-05, "loss": 0.7671, "step": 27800 }, { "epoch": 16.344463971880494, "grad_norm": 8.146703720092773, "learning_rate": 2.609434878825498e-05, "loss": 0.7733, "step": 27900 }, { "epoch": 16.403046280023432, "grad_norm": 11.652145385742188, "learning_rate": 2.5824472391644626e-05, "loss": 0.7902, "step": 28000 }, { "epoch": 16.403046280023432, "eval_loss": 0.08416531980037689, "eval_runtime": 148.3349, "eval_samples_per_second": 3.371, "eval_steps_per_second": 0.425, "eval_wer": 0.18110816386678455, "step": 28000 }, { "epoch": 16.461628588166374, "grad_norm": 9.010910034179688, "learning_rate": 2.5554595995034276e-05, "loss": 0.7898, "step": 28100 }, { "epoch": 16.520210896309315, "grad_norm": 11.093189239501953, "learning_rate": 2.5284719598423923e-05, "loss": 0.7682, "step": 28200 }, { "epoch": 16.578793204452257, "grad_norm": 7.964006423950195, "learning_rate": 2.501484320181357e-05, "loss": 0.7481, "step": 28300 }, { "epoch": 16.637375512595195, "grad_norm": 10.423765182495117, "learning_rate": 2.474496680520322e-05, "loss": 0.7515, "step": 28400 }, { "epoch": 16.695957820738137, "grad_norm": 6.754664897918701, "learning_rate": 2.4475090408592866e-05, "loss": 0.8024, "step": 28500 }, { "epoch": 16.75454012888108, "grad_norm": 10.269820213317871, "learning_rate": 2.4205214011982513e-05, "loss": 0.7533, "step": 28600 }, { "epoch": 16.81312243702402, "grad_norm": 14.783397674560547, "learning_rate": 2.3935337615372163e-05, "loss": 0.7937, "step": 28700 }, { "epoch": 16.87170474516696, "grad_norm": 8.369039535522461, "learning_rate": 2.366546121876181e-05, "loss": 0.8246, "step": 28800 }, { "epoch": 16.9302870533099, "grad_norm": 8.320894241333008, "learning_rate": 2.3395584822151456e-05, "loss": 0.7703, "step": 28900 }, { "epoch": 16.988869361452842, "grad_norm": 10.491236686706543, "learning_rate": 2.3125708425541102e-05, "loss": 0.8261, "step": 29000 }, { "epoch": 16.988869361452842, "eval_loss": 0.0840698629617691, "eval_runtime": 145.4812, "eval_samples_per_second": 3.437, "eval_steps_per_second": 0.433, "eval_wer": 0.18493958149130563, "step": 29000 }, { "epoch": 17.047451669595784, "grad_norm": 6.7529377937316895, "learning_rate": 2.2855832028930752e-05, "loss": 0.8091, "step": 29100 }, { "epoch": 17.106033977738722, "grad_norm": 6.652218341827393, "learning_rate": 2.25859556323204e-05, "loss": 0.7695, "step": 29200 }, { "epoch": 17.164616285881664, "grad_norm": 7.654794692993164, "learning_rate": 2.2316079235710046e-05, "loss": 0.7926, "step": 29300 }, { "epoch": 17.223198594024606, "grad_norm": 8.277422904968262, "learning_rate": 2.2046202839099696e-05, "loss": 0.7702, "step": 29400 }, { "epoch": 17.281780902167544, "grad_norm": 15.797304153442383, "learning_rate": 2.1776326442489342e-05, "loss": 0.7597, "step": 29500 }, { "epoch": 17.340363210310485, "grad_norm": 6.769285202026367, "learning_rate": 2.150645004587899e-05, "loss": 0.7588, "step": 29600 }, { "epoch": 17.398945518453427, "grad_norm": 8.328302383422852, "learning_rate": 2.123657364926864e-05, "loss": 0.7719, "step": 29700 }, { "epoch": 17.45752782659637, "grad_norm": 5.7514190673828125, "learning_rate": 2.0966697252658282e-05, "loss": 0.7985, "step": 29800 }, { "epoch": 17.516110134739307, "grad_norm": 5.553383827209473, "learning_rate": 2.069682085604793e-05, "loss": 0.7602, "step": 29900 }, { "epoch": 17.57469244288225, "grad_norm": 7.833782196044922, "learning_rate": 2.042694445943758e-05, "loss": 0.7531, "step": 30000 }, { "epoch": 17.57469244288225, "eval_loss": 0.08400005102157593, "eval_runtime": 147.1645, "eval_samples_per_second": 3.398, "eval_steps_per_second": 0.428, "eval_wer": 0.18670792808723843, "step": 30000 }, { "epoch": 17.63327475102519, "grad_norm": 6.075071334838867, "learning_rate": 2.0157068062827225e-05, "loss": 0.7471, "step": 30100 }, { "epoch": 17.691857059168132, "grad_norm": 6.894543647766113, "learning_rate": 1.9887191666216872e-05, "loss": 0.7926, "step": 30200 }, { "epoch": 17.75043936731107, "grad_norm": 11.549782752990723, "learning_rate": 1.9617315269606522e-05, "loss": 0.7308, "step": 30300 }, { "epoch": 17.809021675454012, "grad_norm": 7.361614227294922, "learning_rate": 1.934743887299617e-05, "loss": 0.7545, "step": 30400 }, { "epoch": 17.867603983596954, "grad_norm": 12.995938301086426, "learning_rate": 1.9077562476385815e-05, "loss": 0.7809, "step": 30500 }, { "epoch": 17.926186291739896, "grad_norm": 4.978572368621826, "learning_rate": 1.8807686079775465e-05, "loss": 0.7587, "step": 30600 }, { "epoch": 17.984768599882834, "grad_norm": 6.543401718139648, "learning_rate": 1.853780968316511e-05, "loss": 0.7432, "step": 30700 }, { "epoch": 18.043350908025776, "grad_norm": 8.726702690124512, "learning_rate": 1.8267933286554758e-05, "loss": 0.7305, "step": 30800 }, { "epoch": 18.101933216168717, "grad_norm": 6.32004976272583, "learning_rate": 1.7998056889944405e-05, "loss": 0.7454, "step": 30900 }, { "epoch": 18.16051552431166, "grad_norm": 5.896217346191406, "learning_rate": 1.7728180493334055e-05, "loss": 0.7166, "step": 31000 }, { "epoch": 18.16051552431166, "eval_loss": 0.0838567316532135, "eval_runtime": 147.3922, "eval_samples_per_second": 3.392, "eval_steps_per_second": 0.427, "eval_wer": 0.1905393457117595, "step": 31000 }, { "epoch": 18.219097832454597, "grad_norm": 6.72420597076416, "learning_rate": 1.74583040967237e-05, "loss": 0.7028, "step": 31100 }, { "epoch": 18.27768014059754, "grad_norm": 5.8699140548706055, "learning_rate": 1.7188427700113348e-05, "loss": 0.7548, "step": 31200 }, { "epoch": 18.33626244874048, "grad_norm": 6.714815139770508, "learning_rate": 1.6921250067469098e-05, "loss": 0.7619, "step": 31300 }, { "epoch": 18.394844756883423, "grad_norm": 9.205123901367188, "learning_rate": 1.6651373670858748e-05, "loss": 0.7146, "step": 31400 }, { "epoch": 18.45342706502636, "grad_norm": 7.359625339508057, "learning_rate": 1.6381497274248395e-05, "loss": 0.7425, "step": 31500 }, { "epoch": 18.512009373169303, "grad_norm": 12.034134864807129, "learning_rate": 1.6114319641604145e-05, "loss": 0.7325, "step": 31600 }, { "epoch": 18.570591681312244, "grad_norm": 12.600486755371094, "learning_rate": 1.584444324499379e-05, "loss": 0.7628, "step": 31700 }, { "epoch": 18.629173989455186, "grad_norm": 5.2866530418396, "learning_rate": 1.557456684838344e-05, "loss": 0.7636, "step": 31800 }, { "epoch": 18.687756297598124, "grad_norm": 5.383816719055176, "learning_rate": 1.5304690451773088e-05, "loss": 0.727, "step": 31900 }, { "epoch": 18.746338605741066, "grad_norm": 8.094857215881348, "learning_rate": 1.5034814055162735e-05, "loss": 0.7976, "step": 32000 }, { "epoch": 18.746338605741066, "eval_loss": 0.08411071449518204, "eval_runtime": 145.6438, "eval_samples_per_second": 3.433, "eval_steps_per_second": 0.433, "eval_wer": 0.18376068376068377, "step": 32000 }, { "epoch": 18.804920913884008, "grad_norm": 11.049666404724121, "learning_rate": 1.4764937658552383e-05, "loss": 0.7523, "step": 32100 }, { "epoch": 18.86350322202695, "grad_norm": 5.7121782302856445, "learning_rate": 1.4495061261942031e-05, "loss": 0.7462, "step": 32200 }, { "epoch": 18.922085530169888, "grad_norm": 6.261909008026123, "learning_rate": 1.4225184865331678e-05, "loss": 0.6894, "step": 32300 }, { "epoch": 18.98066783831283, "grad_norm": 9.238897323608398, "learning_rate": 1.3955308468721326e-05, "loss": 0.7625, "step": 32400 }, { "epoch": 19.03925014645577, "grad_norm": 5.913239002227783, "learning_rate": 1.3685432072110973e-05, "loss": 0.7324, "step": 32500 }, { "epoch": 19.097832454598713, "grad_norm": 6.142359733581543, "learning_rate": 1.3415555675500621e-05, "loss": 0.7275, "step": 32600 }, { "epoch": 19.15641476274165, "grad_norm": 8.006820678710938, "learning_rate": 1.314567927889027e-05, "loss": 0.7335, "step": 32700 }, { "epoch": 19.214997070884593, "grad_norm": 10.434711456298828, "learning_rate": 1.2875802882279916e-05, "loss": 0.7093, "step": 32800 }, { "epoch": 19.273579379027534, "grad_norm": 7.119588375091553, "learning_rate": 1.2605926485669564e-05, "loss": 0.7213, "step": 32900 }, { "epoch": 19.332161687170476, "grad_norm": 6.306182861328125, "learning_rate": 1.2336050089059211e-05, "loss": 0.7008, "step": 33000 }, { "epoch": 19.332161687170476, "eval_loss": 0.08348561823368073, "eval_runtime": 148.0399, "eval_samples_per_second": 3.377, "eval_steps_per_second": 0.426, "eval_wer": 0.18641320365458297, "step": 33000 }, { "epoch": 19.390743995313414, "grad_norm": 7.711204528808594, "learning_rate": 1.206617369244886e-05, "loss": 0.7326, "step": 33100 }, { "epoch": 19.449326303456356, "grad_norm": 6.137068271636963, "learning_rate": 1.1796297295838508e-05, "loss": 0.6976, "step": 33200 }, { "epoch": 19.507908611599298, "grad_norm": 6.798486709594727, "learning_rate": 1.1526420899228154e-05, "loss": 0.7118, "step": 33300 }, { "epoch": 19.566490919742236, "grad_norm": 6.276034355163574, "learning_rate": 1.12565445026178e-05, "loss": 0.7112, "step": 33400 }, { "epoch": 19.625073227885178, "grad_norm": 7.021731853485107, "learning_rate": 1.0986668106007449e-05, "loss": 0.6984, "step": 33500 }, { "epoch": 19.68365553602812, "grad_norm": 5.709362983703613, "learning_rate": 1.0716791709397096e-05, "loss": 0.7376, "step": 33600 }, { "epoch": 19.74223784417106, "grad_norm": 7.312982559204102, "learning_rate": 1.0446915312786744e-05, "loss": 0.7448, "step": 33700 }, { "epoch": 19.800820152314, "grad_norm": 5.710330486297607, "learning_rate": 1.0177038916176392e-05, "loss": 0.7524, "step": 33800 }, { "epoch": 19.85940246045694, "grad_norm": 6.241232395172119, "learning_rate": 9.907162519566039e-06, "loss": 0.6942, "step": 33900 }, { "epoch": 19.917984768599883, "grad_norm": 6.5843024253845215, "learning_rate": 9.637286122955687e-06, "loss": 0.707, "step": 34000 }, { "epoch": 19.917984768599883, "eval_loss": 0.08329460024833679, "eval_runtime": 148.0997, "eval_samples_per_second": 3.376, "eval_steps_per_second": 0.425, "eval_wer": 0.18715001473622164, "step": 34000 }, { "epoch": 19.976567076742825, "grad_norm": 7.8604302406311035, "learning_rate": 9.367409726345334e-06, "loss": 0.7127, "step": 34100 }, { "epoch": 20.035149384885763, "grad_norm": 6.885805130004883, "learning_rate": 9.097533329734982e-06, "loss": 0.6805, "step": 34200 }, { "epoch": 20.093731693028705, "grad_norm": 5.695769786834717, "learning_rate": 8.82765693312463e-06, "loss": 0.7139, "step": 34300 }, { "epoch": 20.152314001171646, "grad_norm": 6.634642124176025, "learning_rate": 8.557780536514277e-06, "loss": 0.6917, "step": 34400 }, { "epoch": 20.210896309314588, "grad_norm": 5.601979732513428, "learning_rate": 8.287904139903925e-06, "loss": 0.6576, "step": 34500 }, { "epoch": 20.269478617457526, "grad_norm": 4.387629508972168, "learning_rate": 8.018027743293572e-06, "loss": 0.7389, "step": 34600 }, { "epoch": 20.328060925600468, "grad_norm": 5.184898853302002, "learning_rate": 7.748151346683219e-06, "loss": 0.6803, "step": 34700 }, { "epoch": 20.38664323374341, "grad_norm": 9.722779273986816, "learning_rate": 7.478274950072867e-06, "loss": 0.7095, "step": 34800 }, { "epoch": 20.44522554188635, "grad_norm": 6.964946269989014, "learning_rate": 7.208398553462514e-06, "loss": 0.7372, "step": 34900 }, { "epoch": 20.50380785002929, "grad_norm": 9.976523399353027, "learning_rate": 6.938522156852162e-06, "loss": 0.6865, "step": 35000 }, { "epoch": 20.50380785002929, "eval_loss": 0.08354520797729492, "eval_runtime": 147.4678, "eval_samples_per_second": 3.391, "eval_steps_per_second": 0.427, "eval_wer": 0.1843501326259947, "step": 35000 }, { "epoch": 20.56239015817223, "grad_norm": 6.862576007843018, "learning_rate": 6.668645760241809e-06, "loss": 0.6942, "step": 35100 }, { "epoch": 20.620972466315173, "grad_norm": 10.506976127624512, "learning_rate": 6.3987693636314575e-06, "loss": 0.6863, "step": 35200 }, { "epoch": 20.679554774458115, "grad_norm": 6.648481845855713, "learning_rate": 6.128892967021105e-06, "loss": 0.6987, "step": 35300 }, { "epoch": 20.738137082601053, "grad_norm": 7.607233047485352, "learning_rate": 5.8590165704107524e-06, "loss": 0.689, "step": 35400 }, { "epoch": 20.796719390743995, "grad_norm": 6.110599040985107, "learning_rate": 5.589140173800399e-06, "loss": 0.7004, "step": 35500 }, { "epoch": 20.855301698886937, "grad_norm": 5.632733345031738, "learning_rate": 5.319263777190047e-06, "loss": 0.7299, "step": 35600 }, { "epoch": 20.91388400702988, "grad_norm": 5.644909381866455, "learning_rate": 5.052086144545798e-06, "loss": 0.6749, "step": 35700 }, { "epoch": 20.972466315172817, "grad_norm": 5.2816386222839355, "learning_rate": 4.782209747935446e-06, "loss": 0.671, "step": 35800 }, { "epoch": 21.03104862331576, "grad_norm": 4.735965728759766, "learning_rate": 4.512333351325093e-06, "loss": 0.7137, "step": 35900 }, { "epoch": 21.0896309314587, "grad_norm": 6.491783618927002, "learning_rate": 4.242456954714741e-06, "loss": 0.6927, "step": 36000 }, { "epoch": 21.0896309314587, "eval_loss": 0.08344998210668564, "eval_runtime": 147.4098, "eval_samples_per_second": 3.392, "eval_steps_per_second": 0.427, "eval_wer": 0.18818155025051578, "step": 36000 }, { "epoch": 21.148213239601642, "grad_norm": 6.068084239959717, "learning_rate": 3.972580558104388e-06, "loss": 0.7155, "step": 36100 }, { "epoch": 21.20679554774458, "grad_norm": 4.509976863861084, "learning_rate": 3.702704161494036e-06, "loss": 0.6615, "step": 36200 }, { "epoch": 21.26537785588752, "grad_norm": 6.346072673797607, "learning_rate": 3.4328277648836835e-06, "loss": 0.6682, "step": 36300 }, { "epoch": 21.323960164030463, "grad_norm": 6.095388889312744, "learning_rate": 3.1629513682733306e-06, "loss": 0.6588, "step": 36400 }, { "epoch": 21.382542472173405, "grad_norm": 5.079260349273682, "learning_rate": 2.8930749716629784e-06, "loss": 0.67, "step": 36500 }, { "epoch": 21.441124780316343, "grad_norm": 4.323718547821045, "learning_rate": 2.623198575052626e-06, "loss": 0.6606, "step": 36600 }, { "epoch": 21.499707088459285, "grad_norm": 5.659894943237305, "learning_rate": 2.3533221784422738e-06, "loss": 0.69, "step": 36700 }, { "epoch": 21.558289396602227, "grad_norm": 7.08272647857666, "learning_rate": 2.083445781831921e-06, "loss": 0.6506, "step": 36800 }, { "epoch": 21.61687170474517, "grad_norm": 4.9979119300842285, "learning_rate": 1.8135693852215685e-06, "loss": 0.7215, "step": 36900 }, { "epoch": 21.675454012888107, "grad_norm": 6.872857093811035, "learning_rate": 1.5436929886112161e-06, "loss": 0.7014, "step": 37000 }, { "epoch": 21.675454012888107, "eval_loss": 0.08349551260471344, "eval_runtime": 147.5946, "eval_samples_per_second": 3.388, "eval_steps_per_second": 0.427, "eval_wer": 0.1861184792219275, "step": 37000 }, { "epoch": 21.73403632103105, "grad_norm": 6.6599273681640625, "learning_rate": 1.2738165920008636e-06, "loss": 0.6701, "step": 37100 }, { "epoch": 21.79261862917399, "grad_norm": 4.662840366363525, "learning_rate": 1.0039401953905112e-06, "loss": 0.7006, "step": 37200 }, { "epoch": 21.851200937316932, "grad_norm": 6.501893043518066, "learning_rate": 7.340637987801587e-07, "loss": 0.66, "step": 37300 }, { "epoch": 21.90978324545987, "grad_norm": 5.708708763122559, "learning_rate": 4.6418740216980626e-07, "loss": 0.6819, "step": 37400 }, { "epoch": 21.968365553602812, "grad_norm": 4.885653018951416, "learning_rate": 1.943110055594538e-07, "loss": 0.7, "step": 37500 }, { "epoch": 22.026947861745754, "grad_norm": 5.802590370178223, "learning_rate": 1.5496103185816508e-05, "loss": 0.7875, "step": 37600 }, { "epoch": 22.085530169888692, "grad_norm": 6.833991527557373, "learning_rate": 1.5268219315436854e-05, "loss": 0.6936, "step": 37700 }, { "epoch": 22.144112478031634, "grad_norm": 9.980413436889648, "learning_rate": 1.5042614283760998e-05, "loss": 0.6935, "step": 37800 }, { "epoch": 22.202694786174575, "grad_norm": 6.407867908477783, "learning_rate": 1.4814730413381343e-05, "loss": 0.669, "step": 37900 }, { "epoch": 22.261277094317517, "grad_norm": 7.097990989685059, "learning_rate": 1.4586846543001687e-05, "loss": 0.6951, "step": 38000 }, { "epoch": 22.261277094317517, "eval_loss": 0.08332780748605728, "eval_runtime": 147.6911, "eval_samples_per_second": 3.385, "eval_steps_per_second": 0.427, "eval_wer": 0.1874447391688771, "step": 38000 }, { "epoch": 22.319859402460455, "grad_norm": 7.611706733703613, "learning_rate": 1.4358962672622033e-05, "loss": 0.6751, "step": 38100 }, { "epoch": 22.378441710603397, "grad_norm": 11.156392097473145, "learning_rate": 1.4131078802242378e-05, "loss": 0.7132, "step": 38200 }, { "epoch": 22.43702401874634, "grad_norm": 7.024628639221191, "learning_rate": 1.3903194931862724e-05, "loss": 0.7599, "step": 38300 }, { "epoch": 22.49560632688928, "grad_norm": 6.373144626617432, "learning_rate": 1.3675311061483068e-05, "loss": 0.6951, "step": 38400 }, { "epoch": 22.55418863503222, "grad_norm": 5.750803470611572, "learning_rate": 1.3447427191103415e-05, "loss": 0.7078, "step": 38500 }, { "epoch": 22.61277094317516, "grad_norm": 6.302127361297607, "learning_rate": 1.3219543320723759e-05, "loss": 0.7153, "step": 38600 }, { "epoch": 22.671353251318102, "grad_norm": 6.580202102661133, "learning_rate": 1.2991659450344103e-05, "loss": 0.6923, "step": 38700 }, { "epoch": 22.729935559461044, "grad_norm": 5.348507404327393, "learning_rate": 1.2763775579964451e-05, "loss": 0.6998, "step": 38800 }, { "epoch": 22.788517867603982, "grad_norm": 7.927694797515869, "learning_rate": 1.2535891709584796e-05, "loss": 0.7093, "step": 38900 }, { "epoch": 22.847100175746924, "grad_norm": 4.928946495056152, "learning_rate": 1.2308007839205142e-05, "loss": 0.6848, "step": 39000 }, { "epoch": 22.847100175746924, "eval_loss": 0.08338670432567596, "eval_runtime": 146.104, "eval_samples_per_second": 3.422, "eval_steps_per_second": 0.431, "eval_wer": 0.1927497789566755, "step": 39000 }, { "epoch": 22.905682483889866, "grad_norm": 9.807655334472656, "learning_rate": 1.2080123968825486e-05, "loss": 0.6954, "step": 39100 }, { "epoch": 22.964264792032807, "grad_norm": 4.682498931884766, "learning_rate": 1.1852240098445833e-05, "loss": 0.7273, "step": 39200 }, { "epoch": 23.022847100175746, "grad_norm": 6.8382086753845215, "learning_rate": 1.1624356228066179e-05, "loss": 0.721, "step": 39300 }, { "epoch": 23.081429408318687, "grad_norm": 7.560245037078857, "learning_rate": 1.1396472357686523e-05, "loss": 0.689, "step": 39400 }, { "epoch": 23.14001171646163, "grad_norm": 7.024509906768799, "learning_rate": 1.116858848730687e-05, "loss": 0.6714, "step": 39500 }, { "epoch": 23.19859402460457, "grad_norm": 8.46679973602295, "learning_rate": 1.0940704616927216e-05, "loss": 0.6729, "step": 39600 }, { "epoch": 23.25717633274751, "grad_norm": 7.632561683654785, "learning_rate": 1.071282074654756e-05, "loss": 0.7032, "step": 39700 }, { "epoch": 23.31575864089045, "grad_norm": 9.298177719116211, "learning_rate": 1.0484936876167906e-05, "loss": 0.6915, "step": 39800 }, { "epoch": 23.374340949033392, "grad_norm": 6.186316013336182, "learning_rate": 1.0259331844492047e-05, "loss": 0.6625, "step": 39900 }, { "epoch": 23.432923257176334, "grad_norm": 4.553198337554932, "learning_rate": 1.0031447974112393e-05, "loss": 0.7096, "step": 40000 }, { "epoch": 23.432923257176334, "eval_loss": 0.08342915773391724, "eval_runtime": 146.9046, "eval_samples_per_second": 3.404, "eval_steps_per_second": 0.429, "eval_wer": 0.19363395225464192, "step": 40000 }, { "epoch": 23.491505565319272, "grad_norm": 5.817235946655273, "learning_rate": 9.803564103732739e-06, "loss": 0.6733, "step": 40100 }, { "epoch": 23.550087873462214, "grad_norm": 5.511366844177246, "learning_rate": 9.575680233353083e-06, "loss": 0.728, "step": 40200 }, { "epoch": 23.608670181605156, "grad_norm": 7.099341869354248, "learning_rate": 9.34779636297343e-06, "loss": 0.7007, "step": 40300 }, { "epoch": 23.667252489748098, "grad_norm": 4.963494777679443, "learning_rate": 9.119912492593774e-06, "loss": 0.6804, "step": 40400 }, { "epoch": 23.725834797891036, "grad_norm": 7.063295364379883, "learning_rate": 8.89202862221412e-06, "loss": 0.7092, "step": 40500 }, { "epoch": 23.784417106033978, "grad_norm": 6.879157543182373, "learning_rate": 8.664144751834466e-06, "loss": 0.6753, "step": 40600 }, { "epoch": 23.84299941417692, "grad_norm": 5.31470251083374, "learning_rate": 8.43626088145481e-06, "loss": 0.6876, "step": 40700 }, { "epoch": 23.90158172231986, "grad_norm": 6.351736068725586, "learning_rate": 8.208377011075157e-06, "loss": 0.6949, "step": 40800 }, { "epoch": 23.9601640304628, "grad_norm": 5.326944351196289, "learning_rate": 7.980493140695503e-06, "loss": 0.705, "step": 40900 }, { "epoch": 24.01874633860574, "grad_norm": 6.150857448577881, "learning_rate": 7.752609270315848e-06, "loss": 0.6952, "step": 41000 }, { "epoch": 24.01874633860574, "eval_loss": 0.08346738666296005, "eval_runtime": 145.9289, "eval_samples_per_second": 3.426, "eval_steps_per_second": 0.432, "eval_wer": 0.19333922782198645, "step": 41000 }, { "epoch": 24.077328646748683, "grad_norm": 5.961891174316406, "learning_rate": 7.524725399936193e-06, "loss": 0.6498, "step": 41100 }, { "epoch": 24.135910954891624, "grad_norm": 4.94498348236084, "learning_rate": 7.296841529556539e-06, "loss": 0.6827, "step": 41200 }, { "epoch": 24.194493263034563, "grad_norm": 6.914667129516602, "learning_rate": 7.068957659176884e-06, "loss": 0.6647, "step": 41300 }, { "epoch": 24.253075571177504, "grad_norm": 5.37884521484375, "learning_rate": 6.84107378879723e-06, "loss": 0.6447, "step": 41400 }, { "epoch": 24.311657879320446, "grad_norm": 7.762170791625977, "learning_rate": 6.613189918417574e-06, "loss": 0.6362, "step": 41500 }, { "epoch": 24.370240187463388, "grad_norm": 5.311378479003906, "learning_rate": 6.38530604803792e-06, "loss": 0.7053, "step": 41600 }, { "epoch": 24.428822495606326, "grad_norm": 5.185734748840332, "learning_rate": 6.157422177658266e-06, "loss": 0.6724, "step": 41700 }, { "epoch": 24.487404803749268, "grad_norm": 6.12027645111084, "learning_rate": 5.929538307278611e-06, "loss": 0.6947, "step": 41800 }, { "epoch": 24.54598711189221, "grad_norm": 11.165681838989258, "learning_rate": 5.701654436898956e-06, "loss": 0.6999, "step": 41900 }, { "epoch": 24.604569420035148, "grad_norm": 6.521625995635986, "learning_rate": 5.473770566519302e-06, "loss": 0.692, "step": 42000 }, { "epoch": 24.604569420035148, "eval_loss": 0.0832965150475502, "eval_runtime": 145.9791, "eval_samples_per_second": 3.425, "eval_steps_per_second": 0.432, "eval_wer": 0.19304450338933096, "step": 42000 }, { "epoch": 24.66315172817809, "grad_norm": 7.7173333168029785, "learning_rate": 5.248165534843444e-06, "loss": 0.6641, "step": 42100 }, { "epoch": 24.72173403632103, "grad_norm": 5.125652313232422, "learning_rate": 5.02028166446379e-06, "loss": 0.6576, "step": 42200 }, { "epoch": 24.780316344463973, "grad_norm": 5.99462366104126, "learning_rate": 4.792397794084135e-06, "loss": 0.6733, "step": 42300 }, { "epoch": 24.83889865260691, "grad_norm": 5.6660614013671875, "learning_rate": 4.5645139237044806e-06, "loss": 0.7033, "step": 42400 }, { "epoch": 24.897480960749853, "grad_norm": 7.414560317993164, "learning_rate": 4.336630053324826e-06, "loss": 0.6756, "step": 42500 }, { "epoch": 24.956063268892795, "grad_norm": 6.082986354827881, "learning_rate": 4.108746182945172e-06, "loss": 0.6713, "step": 42600 }, { "epoch": 25.014645577035736, "grad_norm": 5.000583648681641, "learning_rate": 3.8808623125655165e-06, "loss": 0.6821, "step": 42700 }, { "epoch": 25.073227885178675, "grad_norm": 6.196038246154785, "learning_rate": 3.6529784421858623e-06, "loss": 0.6795, "step": 42800 }, { "epoch": 25.131810193321616, "grad_norm": 7.409509181976318, "learning_rate": 3.4250945718062076e-06, "loss": 0.6388, "step": 42900 }, { "epoch": 25.190392501464558, "grad_norm": 6.713177680969238, "learning_rate": 3.1972107014265533e-06, "loss": 0.6552, "step": 43000 }, { "epoch": 25.190392501464558, "eval_loss": 0.0831432044506073, "eval_runtime": 144.9356, "eval_samples_per_second": 3.45, "eval_steps_per_second": 0.435, "eval_wer": 0.18670792808723843, "step": 43000 }, { "epoch": 25.2489748096075, "grad_norm": 4.68192720413208, "learning_rate": 2.9693268310468986e-06, "loss": 0.6323, "step": 43100 }, { "epoch": 25.307557117750438, "grad_norm": 4.701879501342773, "learning_rate": 2.741442960667244e-06, "loss": 0.6499, "step": 43200 }, { "epoch": 25.36613942589338, "grad_norm": 5.518495559692383, "learning_rate": 2.5135590902875897e-06, "loss": 0.6513, "step": 43300 }, { "epoch": 25.42472173403632, "grad_norm": 4.257356643676758, "learning_rate": 2.285675219907935e-06, "loss": 0.6957, "step": 43400 }, { "epoch": 25.483304042179263, "grad_norm": 4.883972644805908, "learning_rate": 2.0577913495282803e-06, "loss": 0.6552, "step": 43500 }, { "epoch": 25.5418863503222, "grad_norm": 4.6188201904296875, "learning_rate": 1.8299074791486259e-06, "loss": 0.6683, "step": 43600 }, { "epoch": 25.600468658465143, "grad_norm": 8.4751558303833, "learning_rate": 1.6020236087689714e-06, "loss": 0.636, "step": 43700 }, { "epoch": 25.659050966608085, "grad_norm": 5.713025093078613, "learning_rate": 1.374139738389317e-06, "loss": 0.6833, "step": 43800 }, { "epoch": 25.717633274751027, "grad_norm": Infinity, "learning_rate": 1.1462558680096623e-06, "loss": 0.6807, "step": 43900 }, { "epoch": 25.776215582893965, "grad_norm": 7.257518291473389, "learning_rate": 9.206508363338042e-07, "loss": 0.6641, "step": 44000 }, { "epoch": 25.776215582893965, "eval_loss": 0.0832269936800003, "eval_runtime": 145.3966, "eval_samples_per_second": 3.439, "eval_steps_per_second": 0.433, "eval_wer": 0.1874447391688771, "step": 44000 }, { "epoch": 25.834797891036906, "grad_norm": 5.92907190322876, "learning_rate": 6.927669659541498e-07, "loss": 0.6462, "step": 44100 }, { "epoch": 25.89338019917985, "grad_norm": 4.766348361968994, "learning_rate": 4.6488309557449525e-07, "loss": 0.6934, "step": 44200 }, { "epoch": 25.95196250732279, "grad_norm": 6.461400032043457, "learning_rate": 2.3699922519484072e-07, "loss": 0.6662, "step": 44300 }, { "epoch": 26.010544815465728, "grad_norm": 5.860207557678223, "learning_rate": 1.3472687832774603e-05, "loss": 0.6097, "step": 44400 }, { "epoch": 26.06912712360867, "grad_norm": 5.674122333526611, "learning_rate": 1.3275488069414319e-05, "loss": 0.6638, "step": 44500 }, { "epoch": 26.12770943175161, "grad_norm": 7.24979829788208, "learning_rate": 1.3078288306054034e-05, "loss": 0.641, "step": 44600 }, { "epoch": 26.186291739894553, "grad_norm": 7.524514198303223, "learning_rate": 1.288108854269375e-05, "loss": 0.6722, "step": 44700 }, { "epoch": 26.24487404803749, "grad_norm": 5.542699337005615, "learning_rate": 1.2683888779333466e-05, "loss": 0.6386, "step": 44800 }, { "epoch": 26.303456356180433, "grad_norm": 7.757282257080078, "learning_rate": 1.2486689015973181e-05, "loss": 0.6976, "step": 44900 }, { "epoch": 26.362038664323375, "grad_norm": 5.828668117523193, "learning_rate": 1.2289489252612897e-05, "loss": 0.6921, "step": 45000 }, { "epoch": 26.362038664323375, "eval_loss": 0.08329325169324875, "eval_runtime": 154.0108, "eval_samples_per_second": 3.247, "eval_steps_per_second": 0.409, "eval_wer": 0.18803418803418803, "step": 45000 }, { "epoch": 26.420620972466317, "grad_norm": 6.481083869934082, "learning_rate": 1.2092289489252613e-05, "loss": 0.6643, "step": 45100 }, { "epoch": 26.479203280609255, "grad_norm": 6.508504390716553, "learning_rate": 1.1895089725892329e-05, "loss": 0.6986, "step": 45200 }, { "epoch": 26.537785588752197, "grad_norm": 5.0908894538879395, "learning_rate": 1.1697889962532044e-05, "loss": 0.6741, "step": 45300 }, { "epoch": 26.59636789689514, "grad_norm": 6.918355464935303, "learning_rate": 1.1500690199171762e-05, "loss": 0.6774, "step": 45400 }, { "epoch": 26.65495020503808, "grad_norm": 5.500540733337402, "learning_rate": 1.1303490435811477e-05, "loss": 0.6861, "step": 45500 }, { "epoch": 26.71353251318102, "grad_norm": 5.732079029083252, "learning_rate": 1.1106290672451193e-05, "loss": 0.6836, "step": 45600 }, { "epoch": 26.77211482132396, "grad_norm": 7.091628074645996, "learning_rate": 1.0909090909090909e-05, "loss": 0.7212, "step": 45700 }, { "epoch": 26.830697129466902, "grad_norm": 6.164028644561768, "learning_rate": 1.0711891145730625e-05, "loss": 0.6832, "step": 45800 }, { "epoch": 26.889279437609844, "grad_norm": 6.679256916046143, "learning_rate": 1.051469138237034e-05, "loss": 0.6886, "step": 45900 }, { "epoch": 26.947861745752782, "grad_norm": 6.822471618652344, "learning_rate": 1.0317491619010058e-05, "loss": 0.6894, "step": 46000 }, { "epoch": 26.947861745752782, "eval_loss": 0.08317731320858002, "eval_runtime": 148.6991, "eval_samples_per_second": 3.362, "eval_steps_per_second": 0.424, "eval_wer": 0.18552903035661655, "step": 46000 }, { "epoch": 27.006444053895724, "grad_norm": 5.017155170440674, "learning_rate": 1.0120291855649773e-05, "loss": 0.7585, "step": 46100 }, { "epoch": 27.065026362038665, "grad_norm": 7.552450180053711, "learning_rate": 9.92309209228949e-06, "loss": 0.6377, "step": 46200 }, { "epoch": 27.123608670181603, "grad_norm": 5.625948905944824, "learning_rate": 9.725892328929207e-06, "loss": 0.6606, "step": 46300 }, { "epoch": 27.182190978324545, "grad_norm": 6.0872039794921875, "learning_rate": 9.528692565568922e-06, "loss": 0.6853, "step": 46400 }, { "epoch": 27.240773286467487, "grad_norm": 7.785630226135254, "learning_rate": 9.331492802208638e-06, "loss": 0.6862, "step": 46500 }, { "epoch": 27.29935559461043, "grad_norm": 7.472136974334717, "learning_rate": 9.134293038848354e-06, "loss": 0.6528, "step": 46600 }, { "epoch": 27.357937902753367, "grad_norm": 5.214349269866943, "learning_rate": 8.937093275488071e-06, "loss": 0.6734, "step": 46700 }, { "epoch": 27.41652021089631, "grad_norm": 5.109072208404541, "learning_rate": 8.739893512127787e-06, "loss": 0.6936, "step": 46800 }, { "epoch": 27.47510251903925, "grad_norm": 5.236739158630371, "learning_rate": 8.542693748767503e-06, "loss": 0.6711, "step": 46900 }, { "epoch": 27.533684827182192, "grad_norm": 6.8590264320373535, "learning_rate": 8.345493985407218e-06, "loss": 0.7041, "step": 47000 }, { "epoch": 27.533684827182192, "eval_loss": 0.08270065486431122, "eval_runtime": 146.6093, "eval_samples_per_second": 3.41, "eval_steps_per_second": 0.43, "eval_wer": 0.18552903035661655, "step": 47000 }, { "epoch": 27.59226713532513, "grad_norm": 6.453911781311035, "learning_rate": 8.148294222046934e-06, "loss": 0.7099, "step": 47100 }, { "epoch": 27.650849443468072, "grad_norm": 9.15201473236084, "learning_rate": 7.95109445868665e-06, "loss": 0.6753, "step": 47200 }, { "epoch": 27.709431751611014, "grad_norm": 10.103302955627441, "learning_rate": 7.753894695326366e-06, "loss": 0.615, "step": 47300 }, { "epoch": 27.768014059753956, "grad_norm": 6.687941551208496, "learning_rate": 7.556694931966082e-06, "loss": 0.6812, "step": 47400 }, { "epoch": 27.826596367896894, "grad_norm": 6.050097942352295, "learning_rate": 7.359495168605799e-06, "loss": 0.6329, "step": 47500 }, { "epoch": 27.885178676039835, "grad_norm": 6.162942886352539, "learning_rate": 7.162295405245514e-06, "loss": 0.6429, "step": 47600 }, { "epoch": 27.943760984182777, "grad_norm": 5.811770915985107, "learning_rate": 6.96509564188523e-06, "loss": 0.7052, "step": 47700 }, { "epoch": 28.00234329232572, "grad_norm": 4.816370010375977, "learning_rate": 6.767895878524947e-06, "loss": 0.6578, "step": 47800 }, { "epoch": 28.060925600468657, "grad_norm": 4.412487506866455, "learning_rate": 6.570696115164662e-06, "loss": 0.6302, "step": 47900 }, { "epoch": 28.1195079086116, "grad_norm": 5.892630100250244, "learning_rate": 6.373496351804378e-06, "loss": 0.6452, "step": 48000 }, { "epoch": 28.1195079086116, "eval_loss": 0.08297573775053024, "eval_runtime": 146.2177, "eval_samples_per_second": 3.42, "eval_steps_per_second": 0.431, "eval_wer": 0.18818155025051578, "step": 48000 }, { "epoch": 28.17809021675454, "grad_norm": 4.816265106201172, "learning_rate": 6.1782685860776975e-06, "loss": 0.6636, "step": 48100 }, { "epoch": 28.236672524897482, "grad_norm": 6.337242126464844, "learning_rate": 5.981068822717413e-06, "loss": 0.6501, "step": 48200 }, { "epoch": 28.29525483304042, "grad_norm": 7.430861473083496, "learning_rate": 5.783869059357129e-06, "loss": 0.6455, "step": 48300 }, { "epoch": 28.353837141183362, "grad_norm": 10.299432754516602, "learning_rate": 5.586669295996845e-06, "loss": 0.6569, "step": 48400 }, { "epoch": 28.412419449326304, "grad_norm": 5.967062950134277, "learning_rate": 5.389469532636561e-06, "loss": 0.651, "step": 48500 }, { "epoch": 28.471001757469246, "grad_norm": 5.434126853942871, "learning_rate": 5.192269769276277e-06, "loss": 0.6563, "step": 48600 }, { "epoch": 28.529584065612184, "grad_norm": 4.6583075523376465, "learning_rate": 4.995070005915993e-06, "loss": 0.6512, "step": 48700 }, { "epoch": 28.588166373755126, "grad_norm": 6.697512149810791, "learning_rate": 4.797870242555709e-06, "loss": 0.664, "step": 48800 }, { "epoch": 28.646748681898067, "grad_norm": 8.926551818847656, "learning_rate": 4.600670479195425e-06, "loss": 0.6354, "step": 48900 }, { "epoch": 28.70533099004101, "grad_norm": 5.536263942718506, "learning_rate": 4.403470715835141e-06, "loss": 0.6682, "step": 49000 }, { "epoch": 28.70533099004101, "eval_loss": 0.08284977823495865, "eval_runtime": 146.4815, "eval_samples_per_second": 3.413, "eval_steps_per_second": 0.43, "eval_wer": 0.18626584143825523, "step": 49000 }, { "epoch": 28.763913298183947, "grad_norm": 8.664471626281738, "learning_rate": 4.206270952474857e-06, "loss": 0.6994, "step": 49100 }, { "epoch": 28.82249560632689, "grad_norm": 6.604372978210449, "learning_rate": 4.009071189114573e-06, "loss": 0.6788, "step": 49200 }, { "epoch": 28.88107791446983, "grad_norm": 5.231504917144775, "learning_rate": 3.811871425754289e-06, "loss": 0.6644, "step": 49300 }, { "epoch": 28.939660222612773, "grad_norm": 5.522902011871338, "learning_rate": 3.6146716623940048e-06, "loss": 0.6543, "step": 49400 }, { "epoch": 28.99824253075571, "grad_norm": 4.572765350341797, "learning_rate": 3.4174718990337218e-06, "loss": 0.6769, "step": 49500 }, { "epoch": 29.056824838898653, "grad_norm": 5.370816707611084, "learning_rate": 3.2202721356734375e-06, "loss": 0.6169, "step": 49600 }, { "epoch": 29.115407147041594, "grad_norm": 4.599956035614014, "learning_rate": 3.0230723723131532e-06, "loss": 0.6698, "step": 49700 }, { "epoch": 29.173989455184536, "grad_norm": 4.932507514953613, "learning_rate": 2.8258726089528694e-06, "loss": 0.6294, "step": 49800 }, { "epoch": 29.232571763327474, "grad_norm": 8.667673110961914, "learning_rate": 2.628672845592585e-06, "loss": 0.6688, "step": 49900 }, { "epoch": 29.291154071470416, "grad_norm": 4.365273952484131, "learning_rate": 2.4314730822323017e-06, "loss": 0.6357, "step": 50000 }, { "epoch": 29.291154071470416, "eval_loss": 0.0829482451081276, "eval_runtime": 146.3076, "eval_samples_per_second": 3.417, "eval_steps_per_second": 0.431, "eval_wer": 0.18773946360153257, "step": 50000 }, { "epoch": 29.349736379613358, "grad_norm": 5.5835652351379395, "learning_rate": 2.234273318872018e-06, "loss": 0.633, "step": 50100 }, { "epoch": 29.408318687756296, "grad_norm": 5.577920436859131, "learning_rate": 2.0390455531453363e-06, "loss": 0.6456, "step": 50200 }, { "epoch": 29.466900995899238, "grad_norm": 6.7036566734313965, "learning_rate": 1.8418457897850522e-06, "loss": 0.6582, "step": 50300 }, { "epoch": 29.52548330404218, "grad_norm": 8.638516426086426, "learning_rate": 1.6446460264247684e-06, "loss": 0.6469, "step": 50400 }, { "epoch": 29.58406561218512, "grad_norm": 7.847275257110596, "learning_rate": 1.4474462630644845e-06, "loss": 0.6314, "step": 50500 }, { "epoch": 29.64264792032806, "grad_norm": 4.795884609222412, "learning_rate": 1.2502464997042004e-06, "loss": 0.6355, "step": 50600 }, { "epoch": 29.701230228471, "grad_norm": 5.026218414306641, "learning_rate": 1.0530467363439164e-06, "loss": 0.6476, "step": 50700 }, { "epoch": 29.759812536613943, "grad_norm": 6.32857608795166, "learning_rate": 8.558469729836324e-07, "loss": 0.6515, "step": 50800 }, { "epoch": 29.818394844756885, "grad_norm": 8.374032974243164, "learning_rate": 6.586472096233485e-07, "loss": 0.6615, "step": 50900 }, { "epoch": 29.876977152899823, "grad_norm": 8.55248737335205, "learning_rate": 4.6144744626306455e-07, "loss": 0.6645, "step": 51000 }, { "epoch": 29.876977152899823, "eval_loss": 0.08314584940671921, "eval_runtime": 145.2309, "eval_samples_per_second": 3.443, "eval_steps_per_second": 0.434, "eval_wer": 0.18980253463012084, "step": 51000 }, { "epoch": 29.935559461042764, "grad_norm": 4.774900436401367, "learning_rate": 2.6424768290278054e-07, "loss": 0.6359, "step": 51100 }, { "epoch": 29.994141769185706, "grad_norm": 4.431262493133545, "learning_rate": 6.704791954249655e-08, "loss": 0.6618, "step": 51200 } ], "logging_steps": 100, "max_steps": 51210, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.6610187993088e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }