Invalid JSON: Unexpected token 'I', ..."ad_norm": Infinity,
"... is not valid JSON
| { | |
| "best_global_step": 28000, | |
| "best_metric": 0.18110816386678455, | |
| "best_model_checkpoint": "./distil-whisper/checkpoint-28000", | |
| "epoch": 18.16051552431166, | |
| "eval_steps": 1000, | |
| "global_step": 31000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05858230814294083, | |
| "grad_norm": 27.49039077758789, | |
| "learning_rate": 1.9e-05, | |
| "loss": 1.7236, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.11716461628588166, | |
| "grad_norm": 18.32018280029297, | |
| "learning_rate": 3.9000000000000006e-05, | |
| "loss": 1.796, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.1757469244288225, | |
| "grad_norm": 12.191054344177246, | |
| "learning_rate": 5.9e-05, | |
| "loss": 1.8559, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.23432923257176333, | |
| "grad_norm": 20.974699020385742, | |
| "learning_rate": 7.900000000000001e-05, | |
| "loss": 1.9804, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.29291154071470415, | |
| "grad_norm": 37.665748596191406, | |
| "learning_rate": 9.88e-05, | |
| "loss": 2.2308, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.351493848857645, | |
| "grad_norm": 22.89726448059082, | |
| "learning_rate": 9.917896759542318e-05, | |
| "loss": 2.0073, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.4100761570005858, | |
| "grad_norm": 28.912738800048828, | |
| "learning_rate": 9.830552886714997e-05, | |
| "loss": 2.2137, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.46865846514352666, | |
| "grad_norm": 26.01645851135254, | |
| "learning_rate": 9.743209013887676e-05, | |
| "loss": 2.3668, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.5272407732864675, | |
| "grad_norm": 38.67082214355469, | |
| "learning_rate": 9.655865141060355e-05, | |
| "loss": 2.1304, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.5858230814294083, | |
| "grad_norm": 42.85211181640625, | |
| "learning_rate": 9.568521268233034e-05, | |
| "loss": 2.1291, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5858230814294083, | |
| "eval_loss": 0.09121495485305786, | |
| "eval_runtime": 150.358, | |
| "eval_samples_per_second": 3.325, | |
| "eval_steps_per_second": 0.419, | |
| "eval_wer": 0.19776009431181846, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.6444053895723492, | |
| "grad_norm": 41.63334274291992, | |
| "learning_rate": 9.481177395405713e-05, | |
| "loss": 2.176, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.70298769771529, | |
| "grad_norm": 35.67658996582031, | |
| "learning_rate": 9.393833522578391e-05, | |
| "loss": 2.3521, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.7615700058582309, | |
| "grad_norm": 23.11754608154297, | |
| "learning_rate": 9.30648964975107e-05, | |
| "loss": 2.0235, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.8201523140011716, | |
| "grad_norm": 57.13395690917969, | |
| "learning_rate": 9.219145776923749e-05, | |
| "loss": 2.0512, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.8787346221441125, | |
| "grad_norm": 29.889575958251953, | |
| "learning_rate": 9.131801904096428e-05, | |
| "loss": 2.2161, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.9373169302870533, | |
| "grad_norm": 20.80169105529785, | |
| "learning_rate": 9.044458031269108e-05, | |
| "loss": 2.4303, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.9958992384299942, | |
| "grad_norm": 43.849361419677734, | |
| "learning_rate": 8.957114158441786e-05, | |
| "loss": 2.162, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.054481546572935, | |
| "grad_norm": 19.961891174316406, | |
| "learning_rate": 8.869770285614464e-05, | |
| "loss": 1.8381, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.1130638547158758, | |
| "grad_norm": 25.00478744506836, | |
| "learning_rate": 8.782426412787143e-05, | |
| "loss": 1.8102, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.1716461628588166, | |
| "grad_norm": 23.878835678100586, | |
| "learning_rate": 8.695082539959822e-05, | |
| "loss": 1.7057, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.1716461628588166, | |
| "eval_loss": 0.09117516130208969, | |
| "eval_runtime": 145.9897, | |
| "eval_samples_per_second": 3.425, | |
| "eval_steps_per_second": 0.432, | |
| "eval_wer": 0.2002652519893899, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.2302284710017575, | |
| "grad_norm": 36.49687957763672, | |
| "learning_rate": 8.607738667132501e-05, | |
| "loss": 1.7356, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.2888107791446983, | |
| "grad_norm": 23.07692527770996, | |
| "learning_rate": 8.52039479430518e-05, | |
| "loss": 1.6766, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.3473930872876392, | |
| "grad_norm": 23.606229782104492, | |
| "learning_rate": 8.433050921477858e-05, | |
| "loss": 1.7865, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.40597539543058, | |
| "grad_norm": 12.508922576904297, | |
| "learning_rate": 8.345707048650537e-05, | |
| "loss": 1.7503, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.4645577035735209, | |
| "grad_norm": 15.673484802246094, | |
| "learning_rate": 8.258363175823216e-05, | |
| "loss": 1.7208, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.5231400117164617, | |
| "grad_norm": 33.59520721435547, | |
| "learning_rate": 8.171019302995895e-05, | |
| "loss": 1.7469, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.5817223198594026, | |
| "grad_norm": 15.388129234313965, | |
| "learning_rate": 8.083675430168574e-05, | |
| "loss": 1.7075, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.6403046280023434, | |
| "grad_norm": 11.93837833404541, | |
| "learning_rate": 7.996331557341253e-05, | |
| "loss": 1.696, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.698886936145284, | |
| "grad_norm": 25.31481170654297, | |
| "learning_rate": 7.908987684513932e-05, | |
| "loss": 1.7738, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.757469244288225, | |
| "grad_norm": 38.03368377685547, | |
| "learning_rate": 7.821643811686611e-05, | |
| "loss": 1.7162, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.757469244288225, | |
| "eval_loss": 0.09119272977113724, | |
| "eval_runtime": 150.3416, | |
| "eval_samples_per_second": 3.326, | |
| "eval_steps_per_second": 0.419, | |
| "eval_wer": 0.20601237842617154, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.8160515524311658, | |
| "grad_norm": 15.58292293548584, | |
| "learning_rate": 7.73429993885929e-05, | |
| "loss": 1.6062, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.8746338605741066, | |
| "grad_norm": 73.762451171875, | |
| "learning_rate": 7.646956066031969e-05, | |
| "loss": 1.6992, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.9332161687170475, | |
| "grad_norm": 26.838842391967773, | |
| "learning_rate": 7.559612193204648e-05, | |
| "loss": 1.7229, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.9917984768599881, | |
| "grad_norm": 22.824892044067383, | |
| "learning_rate": 7.472268320377325e-05, | |
| "loss": 1.7185, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.050380785002929, | |
| "grad_norm": 13.238216400146484, | |
| "learning_rate": 7.384924447550004e-05, | |
| "loss": 1.4583, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.10896309314587, | |
| "grad_norm": 32.13274383544922, | |
| "learning_rate": 7.297580574722683e-05, | |
| "loss": 1.3799, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.1675454012888107, | |
| "grad_norm": 13.44641399383545, | |
| "learning_rate": 7.210236701895362e-05, | |
| "loss": 1.3713, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 2.2261277094317515, | |
| "grad_norm": 19.05823516845703, | |
| "learning_rate": 7.122892829068041e-05, | |
| "loss": 1.4489, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 2.2847100175746924, | |
| "grad_norm": 16.24590492248535, | |
| "learning_rate": 7.03554895624072e-05, | |
| "loss": 1.3858, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 2.3432923257176332, | |
| "grad_norm": 14.049396514892578, | |
| "learning_rate": 6.948205083413399e-05, | |
| "loss": 1.4996, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.3432923257176332, | |
| "eval_loss": 0.09011241793632507, | |
| "eval_runtime": 149.4815, | |
| "eval_samples_per_second": 3.345, | |
| "eval_steps_per_second": 0.421, | |
| "eval_wer": 0.20468611847922194, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.401874633860574, | |
| "grad_norm": 16.26442527770996, | |
| "learning_rate": 6.860861210586078e-05, | |
| "loss": 1.4692, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.460456942003515, | |
| "grad_norm": 10.367189407348633, | |
| "learning_rate": 6.773517337758757e-05, | |
| "loss": 1.3203, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.519039250146456, | |
| "grad_norm": 21.82206153869629, | |
| "learning_rate": 6.686173464931436e-05, | |
| "loss": 1.5161, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.5776215582893967, | |
| "grad_norm": 10.586897850036621, | |
| "learning_rate": 6.598829592104115e-05, | |
| "loss": 1.3843, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.6362038664323375, | |
| "grad_norm": 18.108095169067383, | |
| "learning_rate": 6.511485719276793e-05, | |
| "loss": 1.446, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.6947861745752784, | |
| "grad_norm": 17.231735229492188, | |
| "learning_rate": 6.424141846449472e-05, | |
| "loss": 1.4083, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 2.753368482718219, | |
| "grad_norm": 8.833962440490723, | |
| "learning_rate": 6.33679797362215e-05, | |
| "loss": 1.4668, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 2.81195079086116, | |
| "grad_norm": 14.335036277770996, | |
| "learning_rate": 6.24945410079483e-05, | |
| "loss": 1.3906, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 2.870533099004101, | |
| "grad_norm": Infinity, | |
| "learning_rate": 6.162110227967508e-05, | |
| "loss": 1.4241, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 2.9291154071470418, | |
| "grad_norm": 25.987884521484375, | |
| "learning_rate": 6.075639793868461e-05, | |
| "loss": 1.3942, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.9291154071470418, | |
| "eval_loss": 0.0883156806230545, | |
| "eval_runtime": 147.2451, | |
| "eval_samples_per_second": 3.396, | |
| "eval_steps_per_second": 0.428, | |
| "eval_wer": 0.19510757441791923, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.9876977152899826, | |
| "grad_norm": 17.336523056030273, | |
| "learning_rate": 5.988295921041139e-05, | |
| "loss": 1.4093, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 3.0462800234329235, | |
| "grad_norm": 21.30254364013672, | |
| "learning_rate": 5.900952048213818e-05, | |
| "loss": 1.3729, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 3.104862331575864, | |
| "grad_norm": 15.282761573791504, | |
| "learning_rate": 5.813608175386497e-05, | |
| "loss": 1.3091, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 3.1634446397188047, | |
| "grad_norm": 11.485124588012695, | |
| "learning_rate": 5.726264302559175e-05, | |
| "loss": 1.1577, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 3.2220269478617456, | |
| "grad_norm": 17.49385643005371, | |
| "learning_rate": 5.638920429731854e-05, | |
| "loss": 1.2737, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 3.2806092560046864, | |
| "grad_norm": 19.74750518798828, | |
| "learning_rate": 5.551576556904533e-05, | |
| "loss": 1.1946, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 3.3391915641476273, | |
| "grad_norm": 9.402506828308105, | |
| "learning_rate": 5.4642326840772115e-05, | |
| "loss": 1.2035, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 3.397773872290568, | |
| "grad_norm": 13.279162406921387, | |
| "learning_rate": 5.3768888112498904e-05, | |
| "loss": 1.2891, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 3.456356180433509, | |
| "grad_norm": 18.554702758789062, | |
| "learning_rate": 5.28954493842257e-05, | |
| "loss": 1.1315, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 3.51493848857645, | |
| "grad_norm": 10.541516304016113, | |
| "learning_rate": 5.202201065595249e-05, | |
| "loss": 1.2285, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 3.51493848857645, | |
| "eval_loss": 0.08758817613124847, | |
| "eval_runtime": 148.7151, | |
| "eval_samples_per_second": 3.362, | |
| "eval_steps_per_second": 0.424, | |
| "eval_wer": 0.1956970232832302, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 3.5735207967193907, | |
| "grad_norm": 17.15789031982422, | |
| "learning_rate": 5.114857192767928e-05, | |
| "loss": 1.278, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 3.6321031048623316, | |
| "grad_norm": 13.666048049926758, | |
| "learning_rate": 5.027513319940606e-05, | |
| "loss": 1.164, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 3.6906854130052724, | |
| "grad_norm": 14.286330223083496, | |
| "learning_rate": 4.940169447113285e-05, | |
| "loss": 1.2317, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 3.7492677211482133, | |
| "grad_norm": 19.070871353149414, | |
| "learning_rate": 4.852825574285964e-05, | |
| "loss": 1.131, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 3.807850029291154, | |
| "grad_norm": 13.184505462646484, | |
| "learning_rate": 4.7654817014586425e-05, | |
| "loss": 1.2665, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 3.866432337434095, | |
| "grad_norm": 16.988956451416016, | |
| "learning_rate": 4.6781378286313215e-05, | |
| "loss": 1.2145, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 3.925014645577036, | |
| "grad_norm": 14.714631080627441, | |
| "learning_rate": 4.590793955804001e-05, | |
| "loss": 1.2228, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 3.9835969537199767, | |
| "grad_norm": 15.682711601257324, | |
| "learning_rate": 4.5034500829766794e-05, | |
| "loss": 1.2137, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 4.042179261862917, | |
| "grad_norm": 14.943408012390137, | |
| "learning_rate": 4.4161062101493584e-05, | |
| "loss": 1.2088, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 4.100761570005858, | |
| "grad_norm": 9.292410850524902, | |
| "learning_rate": 4.3287623373220374e-05, | |
| "loss": 1.0637, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 4.100761570005858, | |
| "eval_loss": 0.08726315200328827, | |
| "eval_runtime": 149.7325, | |
| "eval_samples_per_second": 3.339, | |
| "eval_steps_per_second": 0.421, | |
| "eval_wer": 0.19201296787503683, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 4.159343878148799, | |
| "grad_norm": 9.017548561096191, | |
| "learning_rate": 4.241418464494716e-05, | |
| "loss": 1.0308, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 4.21792618629174, | |
| "grad_norm": 16.15174102783203, | |
| "learning_rate": 4.1540745916673946e-05, | |
| "loss": 1.0464, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 4.2765084944346805, | |
| "grad_norm": 7.939199447631836, | |
| "learning_rate": 4.0667307188400736e-05, | |
| "loss": 1.1491, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 4.335090802577621, | |
| "grad_norm": 11.129817008972168, | |
| "learning_rate": 3.9793868460127526e-05, | |
| "loss": 1.0738, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 4.393673110720562, | |
| "grad_norm": 8.50700855255127, | |
| "learning_rate": 3.8920429731854315e-05, | |
| "loss": 1.0828, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 4.452255418863503, | |
| "grad_norm": 11.484732627868652, | |
| "learning_rate": 3.80469910035811e-05, | |
| "loss": 1.0418, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 4.510837727006444, | |
| "grad_norm": 8.929669380187988, | |
| "learning_rate": 3.717355227530789e-05, | |
| "loss": 1.1051, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 4.569420035149385, | |
| "grad_norm": 12.994172096252441, | |
| "learning_rate": 3.630011354703468e-05, | |
| "loss": 1.0193, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 4.628002343292326, | |
| "grad_norm": 9.806758880615234, | |
| "learning_rate": 3.542667481876147e-05, | |
| "loss": 1.1699, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 4.6865846514352665, | |
| "grad_norm": 10.537009239196777, | |
| "learning_rate": 3.455323609048826e-05, | |
| "loss": 1.1144, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 4.6865846514352665, | |
| "eval_loss": 0.08649158477783203, | |
| "eval_runtime": 148.2812, | |
| "eval_samples_per_second": 3.372, | |
| "eval_steps_per_second": 0.425, | |
| "eval_wer": 0.1927497789566755, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 4.745166959578207, | |
| "grad_norm": 30.78623390197754, | |
| "learning_rate": 3.367979736221504e-05, | |
| "loss": 1.0902, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 4.803749267721148, | |
| "grad_norm": 9.64354419708252, | |
| "learning_rate": 3.280635863394183e-05, | |
| "loss": 0.9932, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 4.862331575864089, | |
| "grad_norm": 11.149614334106445, | |
| "learning_rate": 3.193291990566862e-05, | |
| "loss": 1.0389, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 4.92091388400703, | |
| "grad_norm": 10.836565971374512, | |
| "learning_rate": 3.10594811773954e-05, | |
| "loss": 1.0626, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 4.979496192149971, | |
| "grad_norm": 11.348654747009277, | |
| "learning_rate": 3.01860424491222e-05, | |
| "loss": 1.0934, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 5.038078500292912, | |
| "grad_norm": 8.341979026794434, | |
| "learning_rate": 2.9312603720848985e-05, | |
| "loss": 1.0845, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 5.0966608084358525, | |
| "grad_norm": 9.784319877624512, | |
| "learning_rate": 2.8439164992575774e-05, | |
| "loss": 0.96, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 5.155243116578793, | |
| "grad_norm": 11.3285551071167, | |
| "learning_rate": 2.756572626430256e-05, | |
| "loss": 0.9211, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 5.213825424721734, | |
| "grad_norm": 8.36048698425293, | |
| "learning_rate": 2.6692287536029347e-05, | |
| "loss": 0.9652, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 5.272407732864675, | |
| "grad_norm": 8.087980270385742, | |
| "learning_rate": 2.5818848807756137e-05, | |
| "loss": 1.0164, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 5.272407732864675, | |
| "eval_loss": 0.08577600121498108, | |
| "eval_runtime": 149.9869, | |
| "eval_samples_per_second": 3.334, | |
| "eval_steps_per_second": 0.42, | |
| "eval_wer": 0.19230769230769232, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 5.330990041007616, | |
| "grad_norm": 14.56843090057373, | |
| "learning_rate": 2.4954144466765657e-05, | |
| "loss": 0.9776, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 5.389572349150557, | |
| "grad_norm": 10.220062255859375, | |
| "learning_rate": 2.4080705738492446e-05, | |
| "loss": 0.9681, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 5.448154657293498, | |
| "grad_norm": 11.614462852478027, | |
| "learning_rate": 2.3207267010219233e-05, | |
| "loss": 0.9691, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 5.506736965436438, | |
| "grad_norm": 6.580599784851074, | |
| "learning_rate": 2.2333828281946022e-05, | |
| "loss": 0.9492, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 5.565319273579379, | |
| "grad_norm": 10.284950256347656, | |
| "learning_rate": 2.1460389553672812e-05, | |
| "loss": 1.0092, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 5.62390158172232, | |
| "grad_norm": 10.794511795043945, | |
| "learning_rate": 2.0586950825399598e-05, | |
| "loss": 0.9039, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 5.682483889865261, | |
| "grad_norm": 12.07039737701416, | |
| "learning_rate": 1.9713512097126388e-05, | |
| "loss": 1.0114, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 5.741066198008202, | |
| "grad_norm": 15.622093200683594, | |
| "learning_rate": 1.8840073368853174e-05, | |
| "loss": 0.9762, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 5.799648506151143, | |
| "grad_norm": 7.460862636566162, | |
| "learning_rate": 1.7966634640579964e-05, | |
| "loss": 0.974, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 5.8582308142940835, | |
| "grad_norm": 9.551807403564453, | |
| "learning_rate": 1.7093195912306753e-05, | |
| "loss": 0.9812, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 5.8582308142940835, | |
| "eval_loss": 0.08563477545976639, | |
| "eval_runtime": 148.6427, | |
| "eval_samples_per_second": 3.364, | |
| "eval_steps_per_second": 0.424, | |
| "eval_wer": 0.1940760389036251, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 5.916813122437024, | |
| "grad_norm": 10.54980754852295, | |
| "learning_rate": 1.621975718403354e-05, | |
| "loss": 0.9981, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 5.975395430579965, | |
| "grad_norm": 8.396002769470215, | |
| "learning_rate": 1.534631845576033e-05, | |
| "loss": 0.911, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 6.033977738722906, | |
| "grad_norm": 6.294841289520264, | |
| "learning_rate": 1.4472879727487117e-05, | |
| "loss": 0.8886, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 6.092560046865847, | |
| "grad_norm": 10.276989936828613, | |
| "learning_rate": 1.3599440999213905e-05, | |
| "loss": 0.8764, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 6.151142355008787, | |
| "grad_norm": 9.27648639678955, | |
| "learning_rate": 1.2726002270940693e-05, | |
| "loss": 0.8728, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 6.209724663151728, | |
| "grad_norm": 10.15081787109375, | |
| "learning_rate": 1.1852563542667483e-05, | |
| "loss": 0.9008, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 6.268306971294669, | |
| "grad_norm": 7.828310012817383, | |
| "learning_rate": 1.0979124814394271e-05, | |
| "loss": 0.8703, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 6.3268892794376095, | |
| "grad_norm": 8.747062683105469, | |
| "learning_rate": 1.0105686086121059e-05, | |
| "loss": 0.9343, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 6.38547158758055, | |
| "grad_norm": 7.199453830718994, | |
| "learning_rate": 9.232247357847849e-06, | |
| "loss": 0.9144, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 6.444053895723491, | |
| "grad_norm": 7.865077972412109, | |
| "learning_rate": 8.358808629574635e-06, | |
| "loss": 0.8927, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 6.444053895723491, | |
| "eval_loss": 0.08493725210428238, | |
| "eval_runtime": 152.514, | |
| "eval_samples_per_second": 3.278, | |
| "eval_steps_per_second": 0.413, | |
| "eval_wer": 0.20173887415266725, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 6.502636203866432, | |
| "grad_norm": 8.560958862304688, | |
| "learning_rate": 7.485369901301424e-06, | |
| "loss": 0.8994, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 6.561218512009373, | |
| "grad_norm": 8.679183006286621, | |
| "learning_rate": 6.611931173028213e-06, | |
| "loss": 0.9121, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 6.619800820152314, | |
| "grad_norm": 7.480246067047119, | |
| "learning_rate": 5.7384924447550014e-06, | |
| "loss": 0.9015, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 6.678383128295255, | |
| "grad_norm": 8.197961807250977, | |
| "learning_rate": 4.865053716481789e-06, | |
| "loss": 0.8958, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 6.7369654364381955, | |
| "grad_norm": 9.989555358886719, | |
| "learning_rate": 3.991614988208577e-06, | |
| "loss": 0.8816, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 6.795547744581136, | |
| "grad_norm": 11.78941535949707, | |
| "learning_rate": 3.118176259935366e-06, | |
| "loss": 0.8848, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 6.854130052724077, | |
| "grad_norm": 8.92465591430664, | |
| "learning_rate": 2.244737531662154e-06, | |
| "loss": 0.8758, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 6.912712360867018, | |
| "grad_norm": 5.927036762237549, | |
| "learning_rate": 1.3712988033889424e-06, | |
| "loss": 0.8421, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 6.971294669009959, | |
| "grad_norm": 6.0556960105896, | |
| "learning_rate": 4.978600751157307e-07, | |
| "loss": 0.8566, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 7.0298769771529, | |
| "grad_norm": 8.540233612060547, | |
| "learning_rate": 3.064574532287266e-05, | |
| "loss": 0.8936, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 7.0298769771529, | |
| "eval_loss": 0.08440528064966202, | |
| "eval_runtime": 151.0062, | |
| "eval_samples_per_second": 3.311, | |
| "eval_steps_per_second": 0.417, | |
| "eval_wer": 0.19613910993221337, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 7.088459285295841, | |
| "grad_norm": 8.703925132751465, | |
| "learning_rate": 3.0042245021122513e-05, | |
| "loss": 0.9237, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 7.147041593438781, | |
| "grad_norm": 11.286332130432129, | |
| "learning_rate": 2.9438744719372363e-05, | |
| "loss": 0.917, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 7.205623901581722, | |
| "grad_norm": 8.638763427734375, | |
| "learning_rate": 2.884127942063971e-05, | |
| "loss": 0.88, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 7.264206209724663, | |
| "grad_norm": 9.697504043579102, | |
| "learning_rate": 2.8237779118889563e-05, | |
| "loss": 0.9342, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 7.322788517867604, | |
| "grad_norm": 8.06286334991455, | |
| "learning_rate": 2.7634278817139407e-05, | |
| "loss": 0.8979, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 7.381370826010545, | |
| "grad_norm": 15.282951354980469, | |
| "learning_rate": 2.703077851538926e-05, | |
| "loss": 0.9201, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 7.439953134153486, | |
| "grad_norm": 11.369593620300293, | |
| "learning_rate": 2.6427278213639107e-05, | |
| "loss": 0.9538, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 7.4985354422964265, | |
| "grad_norm": 10.077091217041016, | |
| "learning_rate": 2.5823777911888958e-05, | |
| "loss": 0.9807, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 7.557117750439367, | |
| "grad_norm": 9.917128562927246, | |
| "learning_rate": 2.5220277610138805e-05, | |
| "loss": 0.939, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 7.615700058582308, | |
| "grad_norm": 10.68909740447998, | |
| "learning_rate": 2.4616777308388655e-05, | |
| "loss": 0.8718, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 7.615700058582308, | |
| "eval_loss": 0.08539500832557678, | |
| "eval_runtime": 149.7688, | |
| "eval_samples_per_second": 3.338, | |
| "eval_steps_per_second": 0.421, | |
| "eval_wer": 0.19790745652814618, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 7.674282366725249, | |
| "grad_norm": 8.260842323303223, | |
| "learning_rate": 2.4013277006638505e-05, | |
| "loss": 0.9048, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 7.73286467486819, | |
| "grad_norm": 16.34197235107422, | |
| "learning_rate": 2.3409776704888352e-05, | |
| "loss": 0.9262, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 7.791446983011131, | |
| "grad_norm": 8.024565696716309, | |
| "learning_rate": 2.2806276403138202e-05, | |
| "loss": 0.9952, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 7.850029291154072, | |
| "grad_norm": 7.884005069732666, | |
| "learning_rate": 2.220277610138805e-05, | |
| "loss": 0.9656, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 7.9086115992970125, | |
| "grad_norm": 8.73161506652832, | |
| "learning_rate": 2.15992757996379e-05, | |
| "loss": 0.9205, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 7.967193907439953, | |
| "grad_norm": 9.00133991241455, | |
| "learning_rate": 2.099577549788775e-05, | |
| "loss": 0.9347, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 8.025776215582894, | |
| "grad_norm": 6.850646495819092, | |
| "learning_rate": 2.03922751961376e-05, | |
| "loss": 0.8468, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 8.084358523725834, | |
| "grad_norm": 11.7725248336792, | |
| "learning_rate": 1.978877489438745e-05, | |
| "loss": 0.8948, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 8.142940831868776, | |
| "grad_norm": 6.4703474044799805, | |
| "learning_rate": 1.9185274592637298e-05, | |
| "loss": 0.8727, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 8.201523140011716, | |
| "grad_norm": 15.587645530700684, | |
| "learning_rate": 1.8581774290887148e-05, | |
| "loss": 0.9019, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 8.201523140011716, | |
| "eval_loss": 0.08469171822071075, | |
| "eval_runtime": 147.0496, | |
| "eval_samples_per_second": 3.4, | |
| "eval_steps_per_second": 0.428, | |
| "eval_wer": 0.18538166814028884, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 8.260105448154658, | |
| "grad_norm": 7.585418701171875, | |
| "learning_rate": 1.7978273989136995e-05, | |
| "loss": 0.8818, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 8.318687756297598, | |
| "grad_norm": 9.436836242675781, | |
| "learning_rate": 1.7374773687386845e-05, | |
| "loss": 0.8864, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 8.37727006444054, | |
| "grad_norm": 12.12936019897461, | |
| "learning_rate": 1.6771273385636692e-05, | |
| "loss": 0.8744, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 8.43585237258348, | |
| "grad_norm": 11.584985733032227, | |
| "learning_rate": 1.6167773083886542e-05, | |
| "loss": 0.8542, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 8.494434680726421, | |
| "grad_norm": 7.6883440017700195, | |
| "learning_rate": 1.5564272782136393e-05, | |
| "loss": 0.8714, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 8.553016988869361, | |
| "grad_norm": 13.686609268188477, | |
| "learning_rate": 1.496077248038624e-05, | |
| "loss": 0.8726, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 8.611599297012303, | |
| "grad_norm": 10.200602531433105, | |
| "learning_rate": 1.436330718165359e-05, | |
| "loss": 0.8839, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 8.670181605155243, | |
| "grad_norm": 6.929018020629883, | |
| "learning_rate": 1.3759806879903441e-05, | |
| "loss": 0.8628, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 8.728763913298184, | |
| "grad_norm": 9.72988224029541, | |
| "learning_rate": 1.3156306578153291e-05, | |
| "loss": 0.8672, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 8.787346221441124, | |
| "grad_norm": 7.273561477661133, | |
| "learning_rate": 1.255280627640314e-05, | |
| "loss": 0.8293, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 8.787346221441124, | |
| "eval_loss": 0.08473628014326096, | |
| "eval_runtime": 147.7929, | |
| "eval_samples_per_second": 3.383, | |
| "eval_steps_per_second": 0.426, | |
| "eval_wer": 0.19834954317712938, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 8.845928529584066, | |
| "grad_norm": 6.900521278381348, | |
| "learning_rate": 1.1949305974652989e-05, | |
| "loss": 0.8797, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 8.904510837727006, | |
| "grad_norm": 13.14035701751709, | |
| "learning_rate": 1.1345805672902837e-05, | |
| "loss": 0.8691, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 8.963093145869948, | |
| "grad_norm": 6.80872106552124, | |
| "learning_rate": 1.0742305371152686e-05, | |
| "loss": 0.859, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 9.021675454012888, | |
| "grad_norm": 5.7985520362854, | |
| "learning_rate": 1.0138805069402535e-05, | |
| "loss": 0.8905, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 9.08025776215583, | |
| "grad_norm": 7.384444236755371, | |
| "learning_rate": 9.535304767652383e-06, | |
| "loss": 0.7981, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 9.13884007029877, | |
| "grad_norm": 6.441751956939697, | |
| "learning_rate": 8.931804465902233e-06, | |
| "loss": 0.9026, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 9.197422378441711, | |
| "grad_norm": 5.471485614776611, | |
| "learning_rate": 8.328304164152082e-06, | |
| "loss": 0.8357, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 9.256004686584651, | |
| "grad_norm": 6.093921661376953, | |
| "learning_rate": 7.724803862401932e-06, | |
| "loss": 0.8427, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 9.314586994727593, | |
| "grad_norm": 5.414072036743164, | |
| "learning_rate": 7.121303560651781e-06, | |
| "loss": 0.8235, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 9.373169302870533, | |
| "grad_norm": 5.2771897315979, | |
| "learning_rate": 6.5178032589016296e-06, | |
| "loss": 0.8363, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 9.373169302870533, | |
| "eval_loss": 0.08421996235847473, | |
| "eval_runtime": 150.6901, | |
| "eval_samples_per_second": 3.318, | |
| "eval_steps_per_second": 0.418, | |
| "eval_wer": 0.19820218096080164, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 9.431751611013475, | |
| "grad_norm": 5.505492687225342, | |
| "learning_rate": 5.914302957151479e-06, | |
| "loss": 0.7991, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 9.490333919156415, | |
| "grad_norm": 6.690750598907471, | |
| "learning_rate": 5.310802655401328e-06, | |
| "loss": 0.8071, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 9.548916227299356, | |
| "grad_norm": 6.654877185821533, | |
| "learning_rate": 4.707302353651177e-06, | |
| "loss": 0.834, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 9.607498535442296, | |
| "grad_norm": 9.937077522277832, | |
| "learning_rate": 4.1038020519010266e-06, | |
| "loss": 0.8055, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 9.666080843585238, | |
| "grad_norm": 6.015642166137695, | |
| "learning_rate": 3.500301750150875e-06, | |
| "loss": 0.8734, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 9.724663151728178, | |
| "grad_norm": 12.434464454650879, | |
| "learning_rate": 2.896801448400724e-06, | |
| "loss": 0.8544, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 9.783245459871118, | |
| "grad_norm": 6.330708980560303, | |
| "learning_rate": 2.2933011466505732e-06, | |
| "loss": 0.798, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 9.84182776801406, | |
| "grad_norm": 5.820682048797607, | |
| "learning_rate": 1.6898008449004227e-06, | |
| "loss": 0.7989, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 9.900410076157002, | |
| "grad_norm": 8.209725379943848, | |
| "learning_rate": 1.0863005431502715e-06, | |
| "loss": 0.8454, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 9.958992384299941, | |
| "grad_norm": 10.676623344421387, | |
| "learning_rate": 4.828002414001208e-07, | |
| "loss": 0.8034, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 9.958992384299941, | |
| "eval_loss": 0.08400186896324158, | |
| "eval_runtime": 148.9273, | |
| "eval_samples_per_second": 3.357, | |
| "eval_steps_per_second": 0.423, | |
| "eval_wer": 0.197465369879163, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 10.017574692442881, | |
| "grad_norm": 8.057076454162598, | |
| "learning_rate": 3.8124720274503957e-05, | |
| "loss": 0.8553, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 10.076157000585823, | |
| "grad_norm": 6.9262895584106445, | |
| "learning_rate": 3.7751752946441895e-05, | |
| "loss": 0.8579, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 10.134739308728763, | |
| "grad_norm": 7.844761371612549, | |
| "learning_rate": 3.737878561837983e-05, | |
| "loss": 0.8764, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 10.193321616871705, | |
| "grad_norm": 17.013307571411133, | |
| "learning_rate": 3.700954796359839e-05, | |
| "loss": 0.9679, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 10.251903925014645, | |
| "grad_norm": 10.550113677978516, | |
| "learning_rate": 3.663658063553633e-05, | |
| "loss": 0.8919, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 10.310486233157587, | |
| "grad_norm": 7.075886249542236, | |
| "learning_rate": 3.6263613307474266e-05, | |
| "loss": 0.9601, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 10.369068541300527, | |
| "grad_norm": 10.027517318725586, | |
| "learning_rate": 3.5890645979412204e-05, | |
| "loss": 0.8596, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 10.427650849443468, | |
| "grad_norm": 5.811990261077881, | |
| "learning_rate": 3.551767865135014e-05, | |
| "loss": 0.9023, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 10.486233157586408, | |
| "grad_norm": 12.586297988891602, | |
| "learning_rate": 3.514471132328808e-05, | |
| "loss": 0.93, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 10.54481546572935, | |
| "grad_norm": 9.313389778137207, | |
| "learning_rate": 3.477174399522602e-05, | |
| "loss": 0.8462, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 10.54481546572935, | |
| "eval_loss": 0.08548491448163986, | |
| "eval_runtime": 156.5497, | |
| "eval_samples_per_second": 3.194, | |
| "eval_steps_per_second": 0.402, | |
| "eval_wer": 0.19525493663424698, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 10.60339777387229, | |
| "grad_norm": 9.245261192321777, | |
| "learning_rate": 3.439877666716396e-05, | |
| "loss": 0.9351, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 10.661980082015232, | |
| "grad_norm": 7.513446807861328, | |
| "learning_rate": 3.4025809339101895e-05, | |
| "loss": 0.8722, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 10.720562390158172, | |
| "grad_norm": 12.897968292236328, | |
| "learning_rate": 3.365284201103984e-05, | |
| "loss": 0.9576, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 10.779144698301113, | |
| "grad_norm": 9.550192832946777, | |
| "learning_rate": 3.327987468297777e-05, | |
| "loss": 0.9241, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 10.837727006444053, | |
| "grad_norm": 6.644899368286133, | |
| "learning_rate": 3.290690735491571e-05, | |
| "loss": 0.9291, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 10.896309314586995, | |
| "grad_norm": 9.285797119140625, | |
| "learning_rate": 3.253394002685365e-05, | |
| "loss": 0.8971, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 10.954891622729935, | |
| "grad_norm": 8.316353797912598, | |
| "learning_rate": 3.2160972698791586e-05, | |
| "loss": 0.8892, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 11.013473930872877, | |
| "grad_norm": 17.100173950195312, | |
| "learning_rate": 3.1788005370729524e-05, | |
| "loss": 0.8732, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 11.072056239015817, | |
| "grad_norm": 19.12342643737793, | |
| "learning_rate": 3.141503804266747e-05, | |
| "loss": 0.8663, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 11.130638547158759, | |
| "grad_norm": 7.627189636230469, | |
| "learning_rate": 3.10420707146054e-05, | |
| "loss": 0.8824, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 11.130638547158759, | |
| "eval_loss": 0.08482780307531357, | |
| "eval_runtime": 146.6345, | |
| "eval_samples_per_second": 3.41, | |
| "eval_steps_per_second": 0.43, | |
| "eval_wer": 0.19304450338933096, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 11.189220855301699, | |
| "grad_norm": 7.1925950050354, | |
| "learning_rate": 3.066910338654334e-05, | |
| "loss": 0.8375, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 11.24780316344464, | |
| "grad_norm": 8.55908489227295, | |
| "learning_rate": 3.0296136058481277e-05, | |
| "loss": 0.8335, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 11.30638547158758, | |
| "grad_norm": 15.089740753173828, | |
| "learning_rate": 2.9923168730419215e-05, | |
| "loss": 0.9117, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 11.364967779730522, | |
| "grad_norm": 8.451448440551758, | |
| "learning_rate": 2.9550201402357153e-05, | |
| "loss": 0.8586, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 11.423550087873462, | |
| "grad_norm": 7.994997501373291, | |
| "learning_rate": 2.9177234074295095e-05, | |
| "loss": 0.8459, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 11.482132396016404, | |
| "grad_norm": 8.656350135803223, | |
| "learning_rate": 2.8804266746233033e-05, | |
| "loss": 0.9268, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 11.540714704159344, | |
| "grad_norm": 10.245903968811035, | |
| "learning_rate": 2.843129941817097e-05, | |
| "loss": 0.8657, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 11.599297012302285, | |
| "grad_norm": 8.758448600769043, | |
| "learning_rate": 2.806206176338953e-05, | |
| "loss": 0.8875, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 11.657879320445225, | |
| "grad_norm": 6.345497131347656, | |
| "learning_rate": 2.7689094435327466e-05, | |
| "loss": 0.9115, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 11.716461628588167, | |
| "grad_norm": 9.602129936218262, | |
| "learning_rate": 2.7316127107265404e-05, | |
| "loss": 0.8591, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 11.716461628588167, | |
| "eval_loss": 0.0848940759897232, | |
| "eval_runtime": 147.8483, | |
| "eval_samples_per_second": 3.382, | |
| "eval_steps_per_second": 0.426, | |
| "eval_wer": 0.18376068376068377, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 11.775043936731107, | |
| "grad_norm": 10.147231101989746, | |
| "learning_rate": 2.6943159779203343e-05, | |
| "loss": 0.8628, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 11.833626244874049, | |
| "grad_norm": 7.136846542358398, | |
| "learning_rate": 2.657019245114128e-05, | |
| "loss": 0.9034, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 11.892208553016989, | |
| "grad_norm": 11.765522003173828, | |
| "learning_rate": 2.6197225123079222e-05, | |
| "loss": 0.8635, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 11.95079086115993, | |
| "grad_norm": 7.662530422210693, | |
| "learning_rate": 2.582425779501716e-05, | |
| "loss": 0.8561, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 12.00937316930287, | |
| "grad_norm": 10.559505462646484, | |
| "learning_rate": 2.5451290466955095e-05, | |
| "loss": 0.8319, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 12.067955477445812, | |
| "grad_norm": 6.208855152130127, | |
| "learning_rate": 2.5078323138893034e-05, | |
| "loss": 0.7679, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 12.126537785588752, | |
| "grad_norm": 10.877766609191895, | |
| "learning_rate": 2.4705355810830972e-05, | |
| "loss": 0.8484, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 12.185120093731694, | |
| "grad_norm": 8.098186492919922, | |
| "learning_rate": 2.433238848276891e-05, | |
| "loss": 0.8644, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 12.243702401874634, | |
| "grad_norm": 7.471461772918701, | |
| "learning_rate": 2.3959421154706848e-05, | |
| "loss": 0.8372, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 12.302284710017574, | |
| "grad_norm": 10.694164276123047, | |
| "learning_rate": 2.3586453826644786e-05, | |
| "loss": 0.8339, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 12.302284710017574, | |
| "eval_loss": 0.08417258411645889, | |
| "eval_runtime": 146.3017, | |
| "eval_samples_per_second": 3.418, | |
| "eval_steps_per_second": 0.431, | |
| "eval_wer": 0.18626584143825523, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 12.360867018160516, | |
| "grad_norm": 8.07205581665039, | |
| "learning_rate": 2.3213486498582724e-05, | |
| "loss": 0.7901, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 12.419449326303456, | |
| "grad_norm": 10.554586410522461, | |
| "learning_rate": 2.2840519170520663e-05, | |
| "loss": 0.8117, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 12.478031634446397, | |
| "grad_norm": 9.55418872833252, | |
| "learning_rate": 2.24675518424586e-05, | |
| "loss": 0.7946, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 12.536613942589337, | |
| "grad_norm": 8.738641738891602, | |
| "learning_rate": 2.2094584514396542e-05, | |
| "loss": 0.8026, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 12.595196250732279, | |
| "grad_norm": 9.077950477600098, | |
| "learning_rate": 2.1721617186334477e-05, | |
| "loss": 0.8102, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 12.653778558875219, | |
| "grad_norm": 7.797760009765625, | |
| "learning_rate": 2.1348649858272415e-05, | |
| "loss": 0.8608, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 12.71236086701816, | |
| "grad_norm": 7.361778736114502, | |
| "learning_rate": 2.0975682530210357e-05, | |
| "loss": 0.8448, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 12.7709431751611, | |
| "grad_norm": 5.896770477294922, | |
| "learning_rate": 2.0602715202148292e-05, | |
| "loss": 0.8394, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 12.829525483304042, | |
| "grad_norm": 11.283666610717773, | |
| "learning_rate": 2.0233477547366852e-05, | |
| "loss": 0.8326, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 12.888107791446982, | |
| "grad_norm": 8.800780296325684, | |
| "learning_rate": 1.986051021930479e-05, | |
| "loss": 0.8573, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 12.888107791446982, | |
| "eval_loss": 0.08360177278518677, | |
| "eval_runtime": 148.5908, | |
| "eval_samples_per_second": 3.365, | |
| "eval_steps_per_second": 0.424, | |
| "eval_wer": 0.19260241674034778, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 12.946690099589924, | |
| "grad_norm": 7.05850887298584, | |
| "learning_rate": 1.948754289124273e-05, | |
| "loss": 0.7926, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 13.005272407732864, | |
| "grad_norm": 7.339128017425537, | |
| "learning_rate": 1.9114575563180667e-05, | |
| "loss": 0.8611, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 13.063854715875806, | |
| "grad_norm": 7.789575576782227, | |
| "learning_rate": 1.8741608235118605e-05, | |
| "loss": 0.8006, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 13.122437024018746, | |
| "grad_norm": 10.398506164550781, | |
| "learning_rate": 1.8368640907056543e-05, | |
| "loss": 0.8397, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 13.181019332161688, | |
| "grad_norm": 8.361679077148438, | |
| "learning_rate": 1.799567357899448e-05, | |
| "loss": 0.8027, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 13.239601640304627, | |
| "grad_norm": 8.431894302368164, | |
| "learning_rate": 1.762270625093242e-05, | |
| "loss": 0.824, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 13.29818394844757, | |
| "grad_norm": 5.742968559265137, | |
| "learning_rate": 1.7249738922870357e-05, | |
| "loss": 0.792, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 13.35676625659051, | |
| "grad_norm": 13.589301109313965, | |
| "learning_rate": 1.6876771594808296e-05, | |
| "loss": 0.7701, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 13.415348564733451, | |
| "grad_norm": 6.760063171386719, | |
| "learning_rate": 1.6503804266746234e-05, | |
| "loss": 0.7756, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 13.473930872876391, | |
| "grad_norm": 8.249403953552246, | |
| "learning_rate": 1.6130836938684172e-05, | |
| "loss": 0.7445, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 13.473930872876391, | |
| "eval_loss": 0.08392482995986938, | |
| "eval_runtime": 146.723, | |
| "eval_samples_per_second": 3.408, | |
| "eval_steps_per_second": 0.429, | |
| "eval_wer": 0.18420277040966695, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 13.532513181019333, | |
| "grad_norm": 6.522210597991943, | |
| "learning_rate": 1.575786961062211e-05, | |
| "loss": 0.7743, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 13.591095489162273, | |
| "grad_norm": 6.639892101287842, | |
| "learning_rate": 1.538490228256005e-05, | |
| "loss": 0.8147, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 13.649677797305214, | |
| "grad_norm": 6.0167999267578125, | |
| "learning_rate": 1.5011934954497987e-05, | |
| "loss": 0.781, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 13.708260105448154, | |
| "grad_norm": 9.794026374816895, | |
| "learning_rate": 1.4638967626435926e-05, | |
| "loss": 0.771, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 13.766842413591096, | |
| "grad_norm": 8.01543140411377, | |
| "learning_rate": 1.4266000298373863e-05, | |
| "loss": 0.7812, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 13.825424721734036, | |
| "grad_norm": 10.331818580627441, | |
| "learning_rate": 1.3893032970311801e-05, | |
| "loss": 0.7864, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 13.884007029876978, | |
| "grad_norm": 8.055398941040039, | |
| "learning_rate": 1.3520065642249741e-05, | |
| "loss": 0.7991, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 13.942589338019918, | |
| "grad_norm": 8.330449104309082, | |
| "learning_rate": 1.3147098314187677e-05, | |
| "loss": 0.8088, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 14.00117164616286, | |
| "grad_norm": 6.869382381439209, | |
| "learning_rate": 1.2774130986125616e-05, | |
| "loss": 0.76, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 14.0597539543058, | |
| "grad_norm": 6.649117469787598, | |
| "learning_rate": 1.2401163658063554e-05, | |
| "loss": 0.783, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 14.0597539543058, | |
| "eval_loss": 0.08357907831668854, | |
| "eval_runtime": 147.3445, | |
| "eval_samples_per_second": 3.393, | |
| "eval_steps_per_second": 0.428, | |
| "eval_wer": 0.18420277040966695, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 14.118336262448741, | |
| "grad_norm": 8.647706031799316, | |
| "learning_rate": 1.2028196330001492e-05, | |
| "loss": 0.7496, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 14.176918570591681, | |
| "grad_norm": 8.944561004638672, | |
| "learning_rate": 1.165522900193943e-05, | |
| "loss": 0.7662, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 14.235500878734623, | |
| "grad_norm": 8.365220069885254, | |
| "learning_rate": 1.1282261673877368e-05, | |
| "loss": 0.7645, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 14.294083186877563, | |
| "grad_norm": 9.97271728515625, | |
| "learning_rate": 1.0909294345815308e-05, | |
| "loss": 0.74, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 14.352665495020505, | |
| "grad_norm": 5.968284606933594, | |
| "learning_rate": 1.0536327017753245e-05, | |
| "loss": 0.7875, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 14.411247803163445, | |
| "grad_norm": 6.4041008949279785, | |
| "learning_rate": 1.0163359689691183e-05, | |
| "loss": 0.7557, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 14.469830111306386, | |
| "grad_norm": 6.7281036376953125, | |
| "learning_rate": 9.790392361629123e-06, | |
| "loss": 0.7478, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 14.528412419449326, | |
| "grad_norm": 9.13178539276123, | |
| "learning_rate": 9.41742503356706e-06, | |
| "loss": 0.7528, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 14.586994727592266, | |
| "grad_norm": 7.79683780670166, | |
| "learning_rate": 9.044457705504997e-06, | |
| "loss": 0.805, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 14.645577035735208, | |
| "grad_norm": 6.039112567901611, | |
| "learning_rate": 8.671490377442937e-06, | |
| "loss": 0.7263, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 14.645577035735208, | |
| "eval_loss": 0.08391948789358139, | |
| "eval_runtime": 147.8059, | |
| "eval_samples_per_second": 3.383, | |
| "eval_steps_per_second": 0.426, | |
| "eval_wer": 0.18243442381373415, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 14.70415934387815, | |
| "grad_norm": 7.50616979598999, | |
| "learning_rate": 8.298523049380874e-06, | |
| "loss": 0.7722, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 14.76274165202109, | |
| "grad_norm": 6.403426647186279, | |
| "learning_rate": 7.925555721318812e-06, | |
| "loss": 0.7742, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 14.82132396016403, | |
| "grad_norm": 7.445984363555908, | |
| "learning_rate": 7.556318066537371e-06, | |
| "loss": 0.7505, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 14.879906268306971, | |
| "grad_norm": 7.770444869995117, | |
| "learning_rate": 7.1833507384753095e-06, | |
| "loss": 0.7524, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 14.938488576449911, | |
| "grad_norm": 6.477992057800293, | |
| "learning_rate": 6.8103834104132485e-06, | |
| "loss": 0.7477, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 14.997070884592853, | |
| "grad_norm": 10.539923667907715, | |
| "learning_rate": 6.437416082351186e-06, | |
| "loss": 0.7285, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 15.055653192735793, | |
| "grad_norm": 6.943923473358154, | |
| "learning_rate": 6.064448754289125e-06, | |
| "loss": 0.6959, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 15.114235500878735, | |
| "grad_norm": 4.48841667175293, | |
| "learning_rate": 5.691481426227062e-06, | |
| "loss": 0.7265, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 15.172817809021675, | |
| "grad_norm": 5.900568008422852, | |
| "learning_rate": 5.318514098165001e-06, | |
| "loss": 0.765, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 15.231400117164617, | |
| "grad_norm": 5.62386417388916, | |
| "learning_rate": 4.9455467701029394e-06, | |
| "loss": 0.7634, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 15.231400117164617, | |
| "eval_loss": 0.08351606130599976, | |
| "eval_runtime": 146.6151, | |
| "eval_samples_per_second": 3.41, | |
| "eval_steps_per_second": 0.43, | |
| "eval_wer": 0.1825817860300619, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 15.289982425307556, | |
| "grad_norm": 6.403947353363037, | |
| "learning_rate": 4.572579442040878e-06, | |
| "loss": 0.728, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 15.348564733450498, | |
| "grad_norm": 6.236737251281738, | |
| "learning_rate": 4.199612113978816e-06, | |
| "loss": 0.7462, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 15.407147041593438, | |
| "grad_norm": 5.554813385009766, | |
| "learning_rate": 3.826644785916754e-06, | |
| "loss": 0.7403, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 15.46572934973638, | |
| "grad_norm": 4.909285068511963, | |
| "learning_rate": 3.453677457854692e-06, | |
| "loss": 0.7653, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 15.52431165787932, | |
| "grad_norm": 5.502344608306885, | |
| "learning_rate": 3.0807101297926304e-06, | |
| "loss": 0.7319, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 15.582893966022262, | |
| "grad_norm": 7.525850772857666, | |
| "learning_rate": 2.7077428017305685e-06, | |
| "loss": 0.7295, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 15.641476274165202, | |
| "grad_norm": 7.245991230010986, | |
| "learning_rate": 2.3347754736685067e-06, | |
| "loss": 0.7382, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 15.700058582308143, | |
| "grad_norm": 5.762548923492432, | |
| "learning_rate": 1.961808145606445e-06, | |
| "loss": 0.7253, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 15.758640890451083, | |
| "grad_norm": 6.127166271209717, | |
| "learning_rate": 1.5888408175443833e-06, | |
| "loss": 0.7399, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 15.817223198594025, | |
| "grad_norm": 6.195973873138428, | |
| "learning_rate": 1.2158734894823213e-06, | |
| "loss": 0.7379, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 15.817223198594025, | |
| "eval_loss": 0.08341451734304428, | |
| "eval_runtime": 146.5011, | |
| "eval_samples_per_second": 3.413, | |
| "eval_steps_per_second": 0.43, | |
| "eval_wer": 0.18287651046271736, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 15.875805506736965, | |
| "grad_norm": 8.746485710144043, | |
| "learning_rate": 8.429061614202597e-07, | |
| "loss": 0.7027, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 15.934387814879907, | |
| "grad_norm": 7.414266109466553, | |
| "learning_rate": 4.699388333581979e-07, | |
| "loss": 0.7047, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 15.992970123022847, | |
| "grad_norm": 7.6758832931518555, | |
| "learning_rate": 1.0070117857675669e-07, | |
| "loss": 0.7324, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 16.05155243116579, | |
| "grad_norm": 12.168989181518555, | |
| "learning_rate": 2.7443730771306743e-05, | |
| "loss": 0.7807, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 16.11013473930873, | |
| "grad_norm": 5.686388969421387, | |
| "learning_rate": 2.717385437469639e-05, | |
| "loss": 0.8098, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 16.16871704745167, | |
| "grad_norm": 6.920952320098877, | |
| "learning_rate": 2.6903977978086036e-05, | |
| "loss": 0.7786, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 16.22729935559461, | |
| "grad_norm": 8.017998695373535, | |
| "learning_rate": 2.6634101581475683e-05, | |
| "loss": 0.8257, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 16.285881663737552, | |
| "grad_norm": 6.6050262451171875, | |
| "learning_rate": 2.6364225184865333e-05, | |
| "loss": 0.7671, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 16.344463971880494, | |
| "grad_norm": 8.146703720092773, | |
| "learning_rate": 2.609434878825498e-05, | |
| "loss": 0.7733, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 16.403046280023432, | |
| "grad_norm": 11.652145385742188, | |
| "learning_rate": 2.5824472391644626e-05, | |
| "loss": 0.7902, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 16.403046280023432, | |
| "eval_loss": 0.08416531980037689, | |
| "eval_runtime": 148.3349, | |
| "eval_samples_per_second": 3.371, | |
| "eval_steps_per_second": 0.425, | |
| "eval_wer": 0.18110816386678455, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 16.461628588166374, | |
| "grad_norm": 9.010910034179688, | |
| "learning_rate": 2.5554595995034276e-05, | |
| "loss": 0.7898, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 16.520210896309315, | |
| "grad_norm": 11.093189239501953, | |
| "learning_rate": 2.5284719598423923e-05, | |
| "loss": 0.7682, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 16.578793204452257, | |
| "grad_norm": 7.964006423950195, | |
| "learning_rate": 2.501484320181357e-05, | |
| "loss": 0.7481, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 16.637375512595195, | |
| "grad_norm": 10.423765182495117, | |
| "learning_rate": 2.474496680520322e-05, | |
| "loss": 0.7515, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 16.695957820738137, | |
| "grad_norm": 6.754664897918701, | |
| "learning_rate": 2.4475090408592866e-05, | |
| "loss": 0.8024, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 16.75454012888108, | |
| "grad_norm": 10.269820213317871, | |
| "learning_rate": 2.4205214011982513e-05, | |
| "loss": 0.7533, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 16.81312243702402, | |
| "grad_norm": 14.783397674560547, | |
| "learning_rate": 2.3935337615372163e-05, | |
| "loss": 0.7937, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 16.87170474516696, | |
| "grad_norm": 8.369039535522461, | |
| "learning_rate": 2.366546121876181e-05, | |
| "loss": 0.8246, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 16.9302870533099, | |
| "grad_norm": 8.320894241333008, | |
| "learning_rate": 2.3395584822151456e-05, | |
| "loss": 0.7703, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 16.988869361452842, | |
| "grad_norm": 10.491236686706543, | |
| "learning_rate": 2.3125708425541102e-05, | |
| "loss": 0.8261, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 16.988869361452842, | |
| "eval_loss": 0.0840698629617691, | |
| "eval_runtime": 145.4812, | |
| "eval_samples_per_second": 3.437, | |
| "eval_steps_per_second": 0.433, | |
| "eval_wer": 0.18493958149130563, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 17.047451669595784, | |
| "grad_norm": 6.7529377937316895, | |
| "learning_rate": 2.2855832028930752e-05, | |
| "loss": 0.8091, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 17.106033977738722, | |
| "grad_norm": 6.652218341827393, | |
| "learning_rate": 2.25859556323204e-05, | |
| "loss": 0.7695, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 17.164616285881664, | |
| "grad_norm": 7.654794692993164, | |
| "learning_rate": 2.2316079235710046e-05, | |
| "loss": 0.7926, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 17.223198594024606, | |
| "grad_norm": 8.277422904968262, | |
| "learning_rate": 2.2046202839099696e-05, | |
| "loss": 0.7702, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 17.281780902167544, | |
| "grad_norm": 15.797304153442383, | |
| "learning_rate": 2.1776326442489342e-05, | |
| "loss": 0.7597, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 17.340363210310485, | |
| "grad_norm": 6.769285202026367, | |
| "learning_rate": 2.150645004587899e-05, | |
| "loss": 0.7588, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 17.398945518453427, | |
| "grad_norm": 8.328302383422852, | |
| "learning_rate": 2.123657364926864e-05, | |
| "loss": 0.7719, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 17.45752782659637, | |
| "grad_norm": 5.7514190673828125, | |
| "learning_rate": 2.0966697252658282e-05, | |
| "loss": 0.7985, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 17.516110134739307, | |
| "grad_norm": 5.553383827209473, | |
| "learning_rate": 2.069682085604793e-05, | |
| "loss": 0.7602, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 17.57469244288225, | |
| "grad_norm": 7.833782196044922, | |
| "learning_rate": 2.042694445943758e-05, | |
| "loss": 0.7531, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 17.57469244288225, | |
| "eval_loss": 0.08400005102157593, | |
| "eval_runtime": 147.1645, | |
| "eval_samples_per_second": 3.398, | |
| "eval_steps_per_second": 0.428, | |
| "eval_wer": 0.18670792808723843, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 17.63327475102519, | |
| "grad_norm": 6.075071334838867, | |
| "learning_rate": 2.0157068062827225e-05, | |
| "loss": 0.7471, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 17.691857059168132, | |
| "grad_norm": 6.894543647766113, | |
| "learning_rate": 1.9887191666216872e-05, | |
| "loss": 0.7926, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 17.75043936731107, | |
| "grad_norm": 11.549782752990723, | |
| "learning_rate": 1.9617315269606522e-05, | |
| "loss": 0.7308, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 17.809021675454012, | |
| "grad_norm": 7.361614227294922, | |
| "learning_rate": 1.934743887299617e-05, | |
| "loss": 0.7545, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 17.867603983596954, | |
| "grad_norm": 12.995938301086426, | |
| "learning_rate": 1.9077562476385815e-05, | |
| "loss": 0.7809, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 17.926186291739896, | |
| "grad_norm": 4.978572368621826, | |
| "learning_rate": 1.8807686079775465e-05, | |
| "loss": 0.7587, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 17.984768599882834, | |
| "grad_norm": 6.543401718139648, | |
| "learning_rate": 1.853780968316511e-05, | |
| "loss": 0.7432, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 18.043350908025776, | |
| "grad_norm": 8.726702690124512, | |
| "learning_rate": 1.8267933286554758e-05, | |
| "loss": 0.7305, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 18.101933216168717, | |
| "grad_norm": 6.32004976272583, | |
| "learning_rate": 1.7998056889944405e-05, | |
| "loss": 0.7454, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 18.16051552431166, | |
| "grad_norm": 5.896217346191406, | |
| "learning_rate": 1.7728180493334055e-05, | |
| "loss": 0.7166, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 18.16051552431166, | |
| "eval_loss": 0.0838567316532135, | |
| "eval_runtime": 147.3922, | |
| "eval_samples_per_second": 3.392, | |
| "eval_steps_per_second": 0.427, | |
| "eval_wer": 0.1905393457117595, | |
| "step": 31000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 37554, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 22, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.032253787111424e+19, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |