{ "best_metric": 0.4557079945799458, "best_model_checkpoint": "whisper-tiny-danish-2/checkpoint-12000", "epoch": 3.00555, "eval_steps": 1000, "global_step": 12000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.001, "grad_norm": 22.681325912475586, "learning_rate": 2.35e-06, "loss": 4.0513, "step": 50 }, { "epoch": 0.002, "grad_norm": 7.509825706481934, "learning_rate": 4.85e-06, "loss": 2.4332, "step": 100 }, { "epoch": 0.003, "grad_norm": 7.44932746887207, "learning_rate": 7.35e-06, "loss": 1.8256, "step": 150 }, { "epoch": 0.004, "grad_norm": 6.986607551574707, "learning_rate": 9.85e-06, "loss": 1.6391, "step": 200 }, { "epoch": 0.005, "grad_norm": 7.076284408569336, "learning_rate": 1.235e-05, "loss": 1.5296, "step": 250 }, { "epoch": 0.006, "grad_norm": 6.501603603363037, "learning_rate": 1.485e-05, "loss": 1.4288, "step": 300 }, { "epoch": 0.007, "grad_norm": 6.999312400817871, "learning_rate": 1.7349999999999998e-05, "loss": 1.3068, "step": 350 }, { "epoch": 0.008, "grad_norm": 6.261101722717285, "learning_rate": 1.985e-05, "loss": 1.2373, "step": 400 }, { "epoch": 0.009, "grad_norm": 6.493436813354492, "learning_rate": 2.235e-05, "loss": 1.2242, "step": 450 }, { "epoch": 0.01, "grad_norm": 6.082971096038818, "learning_rate": 2.485e-05, "loss": 1.1575, "step": 500 }, { "epoch": 0.011, "grad_norm": 6.2460174560546875, "learning_rate": 2.7350000000000004e-05, "loss": 1.093, "step": 550 }, { "epoch": 0.012, "grad_norm": 5.896036148071289, "learning_rate": 2.985e-05, "loss": 1.0961, "step": 600 }, { "epoch": 0.013, "grad_norm": 5.539938926696777, "learning_rate": 3.235e-05, "loss": 1.0036, "step": 650 }, { "epoch": 0.014, "grad_norm": 5.470737934112549, "learning_rate": 3.485e-05, "loss": 0.974, "step": 700 }, { "epoch": 0.015, "grad_norm": 5.766882419586182, "learning_rate": 3.735e-05, "loss": 0.9315, "step": 750 }, { "epoch": 0.016, "grad_norm": 5.046978950500488, "learning_rate": 3.9850000000000006e-05, "loss": 0.9166, "step": 800 }, { "epoch": 0.017, "grad_norm": 5.6074957847595215, "learning_rate": 4.235e-05, "loss": 0.9883, "step": 850 }, { "epoch": 0.018, "grad_norm": 5.528851509094238, "learning_rate": 4.4850000000000006e-05, "loss": 0.9578, "step": 900 }, { "epoch": 0.019, "grad_norm": 5.047084331512451, "learning_rate": 4.735e-05, "loss": 0.8865, "step": 950 }, { "epoch": 0.02, "grad_norm": 5.150805473327637, "learning_rate": 4.9850000000000006e-05, "loss": 0.8565, "step": 1000 }, { "epoch": 0.02, "eval_loss": 1.2942239046096802, "eval_runtime": 216.6252, "eval_samples_per_second": 4.653, "eval_steps_per_second": 0.148, "eval_wer": 0.6471036585365854, "step": 1000 }, { "epoch": 0.021, "grad_norm": 4.584412097930908, "learning_rate": 4.9952040816326534e-05, "loss": 0.8296, "step": 1050 }, { "epoch": 0.022, "grad_norm": 5.257778167724609, "learning_rate": 4.9901020408163266e-05, "loss": 0.802, "step": 1100 }, { "epoch": 0.023, "grad_norm": 5.010299205780029, "learning_rate": 4.9850000000000006e-05, "loss": 0.7911, "step": 1150 }, { "epoch": 0.024, "grad_norm": 5.242851734161377, "learning_rate": 4.979897959183674e-05, "loss": 0.8318, "step": 1200 }, { "epoch": 0.025, "grad_norm": 5.349483489990234, "learning_rate": 4.974795918367347e-05, "loss": 0.8327, "step": 1250 }, { "epoch": 0.026, "grad_norm": 6.035111427307129, "learning_rate": 4.96969387755102e-05, "loss": 0.8355, "step": 1300 }, { "epoch": 0.027, "grad_norm": 5.254024982452393, "learning_rate": 4.964591836734694e-05, "loss": 0.8851, "step": 1350 }, { "epoch": 0.028, "grad_norm": 6.512954235076904, "learning_rate": 4.959489795918368e-05, "loss": 0.8718, "step": 1400 }, { "epoch": 0.029, "grad_norm": 4.468319416046143, "learning_rate": 4.954387755102041e-05, "loss": 0.815, "step": 1450 }, { "epoch": 0.03, "grad_norm": 4.5422492027282715, "learning_rate": 4.9492857142857146e-05, "loss": 0.8086, "step": 1500 }, { "epoch": 0.031, "grad_norm": 4.861804008483887, "learning_rate": 4.944183673469388e-05, "loss": 0.7848, "step": 1550 }, { "epoch": 0.032, "grad_norm": 5.152141571044922, "learning_rate": 4.939081632653062e-05, "loss": 0.7382, "step": 1600 }, { "epoch": 0.033, "grad_norm": 4.768085479736328, "learning_rate": 4.933979591836735e-05, "loss": 0.7786, "step": 1650 }, { "epoch": 0.034, "grad_norm": 4.68101167678833, "learning_rate": 4.928877551020408e-05, "loss": 0.733, "step": 1700 }, { "epoch": 0.035, "grad_norm": 4.635968208312988, "learning_rate": 4.9237755102040814e-05, "loss": 0.7032, "step": 1750 }, { "epoch": 0.036, "grad_norm": 5.221863269805908, "learning_rate": 4.918673469387755e-05, "loss": 0.703, "step": 1800 }, { "epoch": 0.037, "grad_norm": 5.017695426940918, "learning_rate": 4.913571428571429e-05, "loss": 0.6421, "step": 1850 }, { "epoch": 0.038, "grad_norm": 4.761963367462158, "learning_rate": 4.9084693877551025e-05, "loss": 0.703, "step": 1900 }, { "epoch": 0.039, "grad_norm": 4.619095325469971, "learning_rate": 4.903367346938776e-05, "loss": 0.7374, "step": 1950 }, { "epoch": 0.04, "grad_norm": 4.652743816375732, "learning_rate": 4.8982653061224496e-05, "loss": 0.7434, "step": 2000 }, { "epoch": 0.04, "eval_loss": 1.0815221071243286, "eval_runtime": 235.89, "eval_samples_per_second": 4.273, "eval_steps_per_second": 0.136, "eval_wer": 0.5818512872628726, "step": 2000 }, { "epoch": 0.041, "grad_norm": 5.1539177894592285, "learning_rate": 4.893163265306123e-05, "loss": 0.7661, "step": 2050 }, { "epoch": 0.042, "grad_norm": 4.277270793914795, "learning_rate": 4.888061224489796e-05, "loss": 0.6908, "step": 2100 }, { "epoch": 0.043, "grad_norm": 4.588935852050781, "learning_rate": 4.882959183673469e-05, "loss": 0.6411, "step": 2150 }, { "epoch": 0.044, "grad_norm": 4.606882572174072, "learning_rate": 4.877857142857143e-05, "loss": 0.6492, "step": 2200 }, { "epoch": 0.045, "grad_norm": 4.498349189758301, "learning_rate": 4.8727551020408165e-05, "loss": 0.6592, "step": 2250 }, { "epoch": 0.046, "grad_norm": 4.668141841888428, "learning_rate": 4.8676530612244904e-05, "loss": 0.6865, "step": 2300 }, { "epoch": 0.047, "grad_norm": 4.357521057128906, "learning_rate": 4.8625510204081636e-05, "loss": 0.676, "step": 2350 }, { "epoch": 0.048, "grad_norm": 4.414557933807373, "learning_rate": 4.857448979591837e-05, "loss": 0.6496, "step": 2400 }, { "epoch": 0.049, "grad_norm": 4.414867877960205, "learning_rate": 4.852346938775511e-05, "loss": 0.6679, "step": 2450 }, { "epoch": 0.05, "grad_norm": 4.020086765289307, "learning_rate": 4.847244897959184e-05, "loss": 0.6364, "step": 2500 }, { "epoch": 0.051, "grad_norm": 5.012465476989746, "learning_rate": 4.842142857142857e-05, "loss": 0.6624, "step": 2550 }, { "epoch": 0.052, "grad_norm": 4.224608421325684, "learning_rate": 4.8370408163265305e-05, "loss": 0.6609, "step": 2600 }, { "epoch": 0.053, "grad_norm": 4.476141929626465, "learning_rate": 4.8319387755102044e-05, "loss": 0.6402, "step": 2650 }, { "epoch": 0.054, "grad_norm": 4.985313892364502, "learning_rate": 4.8268367346938776e-05, "loss": 0.6706, "step": 2700 }, { "epoch": 0.055, "grad_norm": 4.675138473510742, "learning_rate": 4.8217346938775515e-05, "loss": 0.7041, "step": 2750 }, { "epoch": 0.056, "grad_norm": 5.3597846031188965, "learning_rate": 4.816632653061225e-05, "loss": 0.6993, "step": 2800 }, { "epoch": 0.057, "grad_norm": 4.724060535430908, "learning_rate": 4.811530612244898e-05, "loss": 0.6681, "step": 2850 }, { "epoch": 0.058, "grad_norm": 4.375901222229004, "learning_rate": 4.806428571428572e-05, "loss": 0.6205, "step": 2900 }, { "epoch": 0.059, "grad_norm": 4.6154279708862305, "learning_rate": 4.801326530612245e-05, "loss": 0.5887, "step": 2950 }, { "epoch": 0.06, "grad_norm": 4.766662120819092, "learning_rate": 4.7962244897959184e-05, "loss": 0.6056, "step": 3000 }, { "epoch": 0.06, "eval_loss": 1.0121757984161377, "eval_runtime": 218.7463, "eval_samples_per_second": 4.608, "eval_steps_per_second": 0.146, "eval_wer": 0.5472984417344173, "step": 3000 }, { "epoch": 0.061, "grad_norm": 4.1083550453186035, "learning_rate": 4.791122448979592e-05, "loss": 0.5938, "step": 3050 }, { "epoch": 0.062, "grad_norm": 4.648180961608887, "learning_rate": 4.7860204081632655e-05, "loss": 0.6083, "step": 3100 }, { "epoch": 0.063, "grad_norm": 4.021754264831543, "learning_rate": 4.780918367346939e-05, "loss": 0.6132, "step": 3150 }, { "epoch": 0.064, "grad_norm": 3.82786226272583, "learning_rate": 4.775816326530613e-05, "loss": 0.6348, "step": 3200 }, { "epoch": 0.065, "grad_norm": 4.385377407073975, "learning_rate": 4.770714285714286e-05, "loss": 0.602, "step": 3250 }, { "epoch": 0.066, "grad_norm": 5.215423107147217, "learning_rate": 4.76561224489796e-05, "loss": 0.6135, "step": 3300 }, { "epoch": 0.067, "grad_norm": 4.4256486892700195, "learning_rate": 4.760510204081633e-05, "loss": 0.6353, "step": 3350 }, { "epoch": 0.068, "grad_norm": 4.338476181030273, "learning_rate": 4.755408163265306e-05, "loss": 0.6114, "step": 3400 }, { "epoch": 0.069, "grad_norm": 4.410732269287109, "learning_rate": 4.7503061224489795e-05, "loss": 0.612, "step": 3450 }, { "epoch": 0.07, "grad_norm": 4.397231578826904, "learning_rate": 4.7452040816326534e-05, "loss": 0.5984, "step": 3500 }, { "epoch": 0.071, "grad_norm": 4.233676433563232, "learning_rate": 4.740102040816327e-05, "loss": 0.6419, "step": 3550 }, { "epoch": 0.072, "grad_norm": 4.81524658203125, "learning_rate": 4.735e-05, "loss": 0.6158, "step": 3600 }, { "epoch": 0.073, "grad_norm": 4.236979961395264, "learning_rate": 4.729897959183674e-05, "loss": 0.6041, "step": 3650 }, { "epoch": 0.074, "grad_norm": 4.120030403137207, "learning_rate": 4.724795918367347e-05, "loss": 0.6378, "step": 3700 }, { "epoch": 0.075, "grad_norm": 3.8795013427734375, "learning_rate": 4.719693877551021e-05, "loss": 0.6577, "step": 3750 }, { "epoch": 0.076, "grad_norm": 4.2965087890625, "learning_rate": 4.714591836734694e-05, "loss": 0.6341, "step": 3800 }, { "epoch": 0.077, "grad_norm": 4.946217060089111, "learning_rate": 4.7094897959183674e-05, "loss": 0.6217, "step": 3850 }, { "epoch": 0.078, "grad_norm": 4.450223445892334, "learning_rate": 4.7043877551020407e-05, "loss": 0.6383, "step": 3900 }, { "epoch": 1.00085, "grad_norm": 5.3936591148376465, "learning_rate": 4.6992857142857146e-05, "loss": 0.6186, "step": 3950 }, { "epoch": 1.00185, "grad_norm": 4.123908042907715, "learning_rate": 4.694183673469388e-05, "loss": 0.6141, "step": 4000 }, { "epoch": 1.00185, "eval_loss": 0.9478016495704651, "eval_runtime": 215.0961, "eval_samples_per_second": 4.686, "eval_steps_per_second": 0.149, "eval_wer": 0.5331554878048781, "step": 4000 }, { "epoch": 1.00285, "grad_norm": 4.490115642547607, "learning_rate": 4.689081632653061e-05, "loss": 0.6373, "step": 4050 }, { "epoch": 1.00385, "grad_norm": 4.364250659942627, "learning_rate": 4.683979591836735e-05, "loss": 0.641, "step": 4100 }, { "epoch": 1.00485, "grad_norm": 4.241596221923828, "learning_rate": 4.678877551020409e-05, "loss": 0.6284, "step": 4150 }, { "epoch": 1.00585, "grad_norm": 4.362817764282227, "learning_rate": 4.673775510204082e-05, "loss": 0.6346, "step": 4200 }, { "epoch": 1.00685, "grad_norm": 3.9235153198242188, "learning_rate": 4.668673469387755e-05, "loss": 0.6007, "step": 4250 }, { "epoch": 1.00785, "grad_norm": 3.7977848052978516, "learning_rate": 4.6635714285714286e-05, "loss": 0.5891, "step": 4300 }, { "epoch": 1.00885, "grad_norm": 4.0569305419921875, "learning_rate": 4.6584693877551025e-05, "loss": 0.6123, "step": 4350 }, { "epoch": 1.00985, "grad_norm": 4.0435872077941895, "learning_rate": 4.653367346938776e-05, "loss": 0.5948, "step": 4400 }, { "epoch": 1.01085, "grad_norm": 3.7464935779571533, "learning_rate": 4.648265306122449e-05, "loss": 0.5665, "step": 4450 }, { "epoch": 1.01185, "grad_norm": 3.99239182472229, "learning_rate": 4.643163265306122e-05, "loss": 0.6124, "step": 4500 }, { "epoch": 1.01285, "grad_norm": 3.6230008602142334, "learning_rate": 4.638061224489796e-05, "loss": 0.5632, "step": 4550 }, { "epoch": 1.01385, "grad_norm": 3.5754306316375732, "learning_rate": 4.63295918367347e-05, "loss": 0.5425, "step": 4600 }, { "epoch": 1.01485, "grad_norm": 3.7989814281463623, "learning_rate": 4.627857142857143e-05, "loss": 0.5388, "step": 4650 }, { "epoch": 1.01585, "grad_norm": 3.778059959411621, "learning_rate": 4.6227551020408165e-05, "loss": 0.5297, "step": 4700 }, { "epoch": 1.01685, "grad_norm": 3.987022876739502, "learning_rate": 4.61765306122449e-05, "loss": 0.58, "step": 4750 }, { "epoch": 1.01785, "grad_norm": 3.883904218673706, "learning_rate": 4.6125510204081636e-05, "loss": 0.5964, "step": 4800 }, { "epoch": 1.01885, "grad_norm": 4.473369598388672, "learning_rate": 4.607448979591837e-05, "loss": 0.5512, "step": 4850 }, { "epoch": 1.01985, "grad_norm": 4.040229797363281, "learning_rate": 4.60234693877551e-05, "loss": 0.5292, "step": 4900 }, { "epoch": 1.02085, "grad_norm": 4.538361072540283, "learning_rate": 4.597244897959183e-05, "loss": 0.5196, "step": 4950 }, { "epoch": 1.02185, "grad_norm": 4.212509632110596, "learning_rate": 4.592142857142858e-05, "loss": 0.5085, "step": 5000 }, { "epoch": 1.02185, "eval_loss": 0.9282792806625366, "eval_runtime": 224.1163, "eval_samples_per_second": 4.498, "eval_steps_per_second": 0.143, "eval_wer": 0.5289634146341463, "step": 5000 }, { "epoch": 1.02285, "grad_norm": 3.8673441410064697, "learning_rate": 4.587040816326531e-05, "loss": 0.4988, "step": 5050 }, { "epoch": 1.02385, "grad_norm": 3.70070743560791, "learning_rate": 4.5819387755102044e-05, "loss": 0.5424, "step": 5100 }, { "epoch": 1.02485, "grad_norm": 4.379025459289551, "learning_rate": 4.5768367346938776e-05, "loss": 0.5425, "step": 5150 }, { "epoch": 1.02585, "grad_norm": 4.310212135314941, "learning_rate": 4.5717346938775515e-05, "loss": 0.5466, "step": 5200 }, { "epoch": 1.02685, "grad_norm": 4.193725109100342, "learning_rate": 4.566632653061225e-05, "loss": 0.5949, "step": 5250 }, { "epoch": 1.02785, "grad_norm": 4.096522808074951, "learning_rate": 4.561530612244898e-05, "loss": 0.6003, "step": 5300 }, { "epoch": 1.02885, "grad_norm": 4.387059211730957, "learning_rate": 4.556428571428571e-05, "loss": 0.5672, "step": 5350 }, { "epoch": 1.02985, "grad_norm": 4.410869598388672, "learning_rate": 4.551326530612245e-05, "loss": 0.5512, "step": 5400 }, { "epoch": 1.03085, "grad_norm": 4.141435623168945, "learning_rate": 4.546224489795919e-05, "loss": 0.5534, "step": 5450 }, { "epoch": 1.03185, "grad_norm": 3.6259546279907227, "learning_rate": 4.541122448979592e-05, "loss": 0.4962, "step": 5500 }, { "epoch": 1.03285, "grad_norm": 4.343920707702637, "learning_rate": 4.5360204081632655e-05, "loss": 0.5411, "step": 5550 }, { "epoch": 1.03385, "grad_norm": 4.238042831420898, "learning_rate": 4.530918367346939e-05, "loss": 0.5217, "step": 5600 }, { "epoch": 1.03485, "grad_norm": 4.237799167633057, "learning_rate": 4.525816326530613e-05, "loss": 0.4973, "step": 5650 }, { "epoch": 1.03585, "grad_norm": 4.474476337432861, "learning_rate": 4.520714285714286e-05, "loss": 0.4967, "step": 5700 }, { "epoch": 1.03685, "grad_norm": 3.6932973861694336, "learning_rate": 4.515612244897959e-05, "loss": 0.4524, "step": 5750 }, { "epoch": 1.03785, "grad_norm": 4.185140132904053, "learning_rate": 4.5105102040816324e-05, "loss": 0.4851, "step": 5800 }, { "epoch": 1.03885, "grad_norm": 4.114623069763184, "learning_rate": 4.505408163265306e-05, "loss": 0.5371, "step": 5850 }, { "epoch": 1.03985, "grad_norm": 3.961071729660034, "learning_rate": 4.5003061224489795e-05, "loss": 0.5361, "step": 5900 }, { "epoch": 1.04085, "grad_norm": 3.9876036643981934, "learning_rate": 4.4952040816326534e-05, "loss": 0.5576, "step": 5950 }, { "epoch": 1.04185, "grad_norm": 4.096221446990967, "learning_rate": 4.490102040816327e-05, "loss": 0.5108, "step": 6000 }, { "epoch": 1.04185, "eval_loss": 0.8966282606124878, "eval_runtime": 214.6844, "eval_samples_per_second": 4.695, "eval_steps_per_second": 0.149, "eval_wer": 0.4938177506775068, "step": 6000 }, { "epoch": 1.04285, "grad_norm": 3.3264214992523193, "learning_rate": 4.4850000000000006e-05, "loss": 0.4692, "step": 6050 }, { "epoch": 1.04385, "grad_norm": 4.144850730895996, "learning_rate": 4.479897959183674e-05, "loss": 0.4676, "step": 6100 }, { "epoch": 1.04485, "grad_norm": 4.427196502685547, "learning_rate": 4.474795918367347e-05, "loss": 0.4796, "step": 6150 }, { "epoch": 1.04585, "grad_norm": 3.8419079780578613, "learning_rate": 4.46969387755102e-05, "loss": 0.5092, "step": 6200 }, { "epoch": 1.04685, "grad_norm": 4.1315436363220215, "learning_rate": 4.464591836734694e-05, "loss": 0.4992, "step": 6250 }, { "epoch": 1.04785, "grad_norm": 3.6058404445648193, "learning_rate": 4.4594897959183674e-05, "loss": 0.4844, "step": 6300 }, { "epoch": 1.04885, "grad_norm": 3.776262044906616, "learning_rate": 4.454387755102041e-05, "loss": 0.4886, "step": 6350 }, { "epoch": 1.04985, "grad_norm": 3.4971370697021484, "learning_rate": 4.4492857142857146e-05, "loss": 0.4755, "step": 6400 }, { "epoch": 1.05085, "grad_norm": 4.299288272857666, "learning_rate": 4.444183673469388e-05, "loss": 0.4826, "step": 6450 }, { "epoch": 1.05185, "grad_norm": 3.4071223735809326, "learning_rate": 4.439081632653062e-05, "loss": 0.5001, "step": 6500 }, { "epoch": 1.05285, "grad_norm": 3.8092288970947266, "learning_rate": 4.433979591836735e-05, "loss": 0.4797, "step": 6550 }, { "epoch": 1.05385, "grad_norm": 4.502504348754883, "learning_rate": 4.428877551020408e-05, "loss": 0.4928, "step": 6600 }, { "epoch": 1.05485, "grad_norm": 4.641124725341797, "learning_rate": 4.4237755102040814e-05, "loss": 0.5373, "step": 6650 }, { "epoch": 1.05585, "grad_norm": 4.549639701843262, "learning_rate": 4.4186734693877554e-05, "loss": 0.529, "step": 6700 }, { "epoch": 1.05685, "grad_norm": 3.3781280517578125, "learning_rate": 4.4135714285714286e-05, "loss": 0.5129, "step": 6750 }, { "epoch": 1.05785, "grad_norm": 4.276547431945801, "learning_rate": 4.408469387755102e-05, "loss": 0.478, "step": 6800 }, { "epoch": 1.05885, "grad_norm": 4.88979959487915, "learning_rate": 4.403367346938776e-05, "loss": 0.4405, "step": 6850 }, { "epoch": 1.05985, "grad_norm": 5.3919267654418945, "learning_rate": 4.3982653061224497e-05, "loss": 0.4544, "step": 6900 }, { "epoch": 1.06085, "grad_norm": 3.975532293319702, "learning_rate": 4.393163265306123e-05, "loss": 0.4517, "step": 6950 }, { "epoch": 1.06185, "grad_norm": 3.94978404045105, "learning_rate": 4.388061224489796e-05, "loss": 0.4604, "step": 7000 }, { "epoch": 1.06185, "eval_loss": 0.8832055926322937, "eval_runtime": 217.1866, "eval_samples_per_second": 4.641, "eval_steps_per_second": 0.147, "eval_wer": 0.49817920054200543, "step": 7000 }, { "epoch": 1.06285, "grad_norm": 3.804979085922241, "learning_rate": 4.3829591836734694e-05, "loss": 0.4659, "step": 7050 }, { "epoch": 1.06385, "grad_norm": 4.515665054321289, "learning_rate": 4.377857142857143e-05, "loss": 0.4879, "step": 7100 }, { "epoch": 1.06485, "grad_norm": 4.103418827056885, "learning_rate": 4.3727551020408165e-05, "loss": 0.4608, "step": 7150 }, { "epoch": 1.06585, "grad_norm": 3.9994966983795166, "learning_rate": 4.36765306122449e-05, "loss": 0.4712, "step": 7200 }, { "epoch": 1.06685, "grad_norm": 3.973745107650757, "learning_rate": 4.362551020408163e-05, "loss": 0.4873, "step": 7250 }, { "epoch": 1.06785, "grad_norm": 3.7433559894561768, "learning_rate": 4.357448979591837e-05, "loss": 0.4735, "step": 7300 }, { "epoch": 1.06885, "grad_norm": 3.822484254837036, "learning_rate": 4.352346938775511e-05, "loss": 0.4712, "step": 7350 }, { "epoch": 1.06985, "grad_norm": 3.4630181789398193, "learning_rate": 4.347244897959184e-05, "loss": 0.4583, "step": 7400 }, { "epoch": 1.07085, "grad_norm": 4.550868034362793, "learning_rate": 4.342142857142857e-05, "loss": 0.493, "step": 7450 }, { "epoch": 1.07185, "grad_norm": 4.130419731140137, "learning_rate": 4.337142857142857e-05, "loss": 0.4815, "step": 7500 }, { "epoch": 1.07285, "grad_norm": 3.5420877933502197, "learning_rate": 4.332040816326531e-05, "loss": 0.4735, "step": 7550 }, { "epoch": 1.07385, "grad_norm": 4.168927192687988, "learning_rate": 4.326938775510204e-05, "loss": 0.4919, "step": 7600 }, { "epoch": 1.07485, "grad_norm": 3.9668235778808594, "learning_rate": 4.3218367346938775e-05, "loss": 0.5107, "step": 7650 }, { "epoch": 1.07585, "grad_norm": 4.459245681762695, "learning_rate": 4.316734693877551e-05, "loss": 0.5011, "step": 7700 }, { "epoch": 1.07685, "grad_norm": 4.179884910583496, "learning_rate": 4.311632653061225e-05, "loss": 0.4806, "step": 7750 }, { "epoch": 1.07785, "grad_norm": 5.189551830291748, "learning_rate": 4.3065306122448986e-05, "loss": 0.4973, "step": 7800 }, { "epoch": 2.0007, "grad_norm": 5.872171401977539, "learning_rate": 4.301428571428572e-05, "loss": 0.4958, "step": 7850 }, { "epoch": 2.0017, "grad_norm": 3.948779582977295, "learning_rate": 4.296326530612245e-05, "loss": 0.4801, "step": 7900 }, { "epoch": 2.0027, "grad_norm": 3.7434325218200684, "learning_rate": 4.291224489795918e-05, "loss": 0.5009, "step": 7950 }, { "epoch": 2.0037, "grad_norm": 5.404001235961914, "learning_rate": 4.286122448979592e-05, "loss": 0.5144, "step": 8000 }, { "epoch": 2.0037, "eval_loss": 0.8577666878700256, "eval_runtime": 221.0367, "eval_samples_per_second": 4.56, "eval_steps_per_second": 0.145, "eval_wer": 0.4876778455284553, "step": 8000 }, { "epoch": 2.0047, "grad_norm": 3.783438205718994, "learning_rate": 4.2810204081632654e-05, "loss": 0.4987, "step": 8050 }, { "epoch": 2.0057, "grad_norm": 3.9520227909088135, "learning_rate": 4.275918367346939e-05, "loss": 0.5081, "step": 8100 }, { "epoch": 2.0067, "grad_norm": 3.9701645374298096, "learning_rate": 4.2708163265306126e-05, "loss": 0.4799, "step": 8150 }, { "epoch": 2.0077, "grad_norm": 3.5120835304260254, "learning_rate": 4.265714285714286e-05, "loss": 0.4676, "step": 8200 }, { "epoch": 2.0087, "grad_norm": 4.348593235015869, "learning_rate": 4.26061224489796e-05, "loss": 0.4853, "step": 8250 }, { "epoch": 2.0097, "grad_norm": 3.6884608268737793, "learning_rate": 4.255510204081633e-05, "loss": 0.4803, "step": 8300 }, { "epoch": 2.0107, "grad_norm": 3.584364414215088, "learning_rate": 4.250408163265306e-05, "loss": 0.4561, "step": 8350 }, { "epoch": 2.0117, "grad_norm": 3.6865909099578857, "learning_rate": 4.24530612244898e-05, "loss": 0.4871, "step": 8400 }, { "epoch": 2.0127, "grad_norm": 3.9009077548980713, "learning_rate": 4.2402040816326533e-05, "loss": 0.4599, "step": 8450 }, { "epoch": 2.0137, "grad_norm": 3.513470411300659, "learning_rate": 4.2351020408163266e-05, "loss": 0.4316, "step": 8500 }, { "epoch": 2.0147, "grad_norm": 3.8565685749053955, "learning_rate": 4.23e-05, "loss": 0.4379, "step": 8550 }, { "epoch": 2.0157, "grad_norm": 3.2444798946380615, "learning_rate": 4.224897959183674e-05, "loss": 0.4287, "step": 8600 }, { "epoch": 2.0167, "grad_norm": 3.9673781394958496, "learning_rate": 4.219795918367347e-05, "loss": 0.4613, "step": 8650 }, { "epoch": 2.0177, "grad_norm": 8.936363220214844, "learning_rate": 4.214693877551021e-05, "loss": 0.4869, "step": 8700 }, { "epoch": 2.0187, "grad_norm": 3.6102094650268555, "learning_rate": 4.209591836734694e-05, "loss": 0.4523, "step": 8750 }, { "epoch": 2.0197, "grad_norm": 3.8444738388061523, "learning_rate": 4.2044897959183673e-05, "loss": 0.4334, "step": 8800 }, { "epoch": 2.0207, "grad_norm": 3.0468149185180664, "learning_rate": 4.199387755102041e-05, "loss": 0.4219, "step": 8850 }, { "epoch": 2.0217, "grad_norm": 3.563493251800537, "learning_rate": 4.1942857142857145e-05, "loss": 0.413, "step": 8900 }, { "epoch": 2.0227, "grad_norm": 3.6925594806671143, "learning_rate": 4.189183673469388e-05, "loss": 0.4028, "step": 8950 }, { "epoch": 2.0237, "grad_norm": 3.872044086456299, "learning_rate": 4.184081632653061e-05, "loss": 0.4411, "step": 9000 }, { "epoch": 2.0237, "eval_loss": 0.8616846203804016, "eval_runtime": 215.4703, "eval_samples_per_second": 4.678, "eval_steps_per_second": 0.149, "eval_wer": 0.47925135501355015, "step": 9000 }, { "epoch": 2.0247, "grad_norm": 3.9783222675323486, "learning_rate": 4.178979591836735e-05, "loss": 0.44, "step": 9050 }, { "epoch": 2.0257, "grad_norm": 5.306482315063477, "learning_rate": 4.173877551020408e-05, "loss": 0.4384, "step": 9100 }, { "epoch": 2.0267, "grad_norm": 4.326815128326416, "learning_rate": 4.168775510204082e-05, "loss": 0.4873, "step": 9150 }, { "epoch": 2.0277, "grad_norm": 3.9018495082855225, "learning_rate": 4.163673469387755e-05, "loss": 0.4974, "step": 9200 }, { "epoch": 2.0287, "grad_norm": 3.4675261974334717, "learning_rate": 4.158571428571429e-05, "loss": 0.4705, "step": 9250 }, { "epoch": 2.0297, "grad_norm": 4.265820026397705, "learning_rate": 4.1534693877551024e-05, "loss": 0.4473, "step": 9300 }, { "epoch": 2.0307, "grad_norm": 3.4740707874298096, "learning_rate": 4.1483673469387756e-05, "loss": 0.4679, "step": 9350 }, { "epoch": 2.0317, "grad_norm": 3.339444637298584, "learning_rate": 4.143265306122449e-05, "loss": 0.4065, "step": 9400 }, { "epoch": 2.0327, "grad_norm": 3.825657606124878, "learning_rate": 4.138163265306123e-05, "loss": 0.4436, "step": 9450 }, { "epoch": 2.0337, "grad_norm": 3.392925262451172, "learning_rate": 4.133061224489796e-05, "loss": 0.4285, "step": 9500 }, { "epoch": 2.0347, "grad_norm": 3.815762519836426, "learning_rate": 4.127959183673469e-05, "loss": 0.4123, "step": 9550 }, { "epoch": 2.0357, "grad_norm": 4.090697765350342, "learning_rate": 4.122857142857143e-05, "loss": 0.408, "step": 9600 }, { "epoch": 2.0367, "grad_norm": 3.2577061653137207, "learning_rate": 4.1177551020408164e-05, "loss": 0.3751, "step": 9650 }, { "epoch": 2.0377, "grad_norm": 3.391096591949463, "learning_rate": 4.11265306122449e-05, "loss": 0.3885, "step": 9700 }, { "epoch": 2.0387, "grad_norm": 3.540929079055786, "learning_rate": 4.1075510204081636e-05, "loss": 0.4434, "step": 9750 }, { "epoch": 2.0397, "grad_norm": 4.215907096862793, "learning_rate": 4.102448979591837e-05, "loss": 0.436, "step": 9800 }, { "epoch": 2.0407, "grad_norm": 4.207083225250244, "learning_rate": 4.09734693877551e-05, "loss": 0.4699, "step": 9850 }, { "epoch": 2.0417, "grad_norm": 3.9734325408935547, "learning_rate": 4.092244897959184e-05, "loss": 0.4301, "step": 9900 }, { "epoch": 2.0427, "grad_norm": 3.023761510848999, "learning_rate": 4.087142857142857e-05, "loss": 0.3889, "step": 9950 }, { "epoch": 2.0437, "grad_norm": 3.4388654232025146, "learning_rate": 4.0820408163265304e-05, "loss": 0.3835, "step": 10000 }, { "epoch": 2.0437, "eval_loss": 0.8521081805229187, "eval_runtime": 215.0513, "eval_samples_per_second": 4.687, "eval_steps_per_second": 0.149, "eval_wer": 0.47412771002710025, "step": 10000 }, { "epoch": 2.0447, "grad_norm": 3.153373956680298, "learning_rate": 4.076938775510204e-05, "loss": 0.3933, "step": 10050 }, { "epoch": 2.0457, "grad_norm": 4.276921272277832, "learning_rate": 4.071836734693878e-05, "loss": 0.42, "step": 10100 }, { "epoch": 2.0467, "grad_norm": 3.5044806003570557, "learning_rate": 4.0667346938775515e-05, "loss": 0.4131, "step": 10150 }, { "epoch": 2.0477, "grad_norm": 3.862910032272339, "learning_rate": 4.061632653061225e-05, "loss": 0.4076, "step": 10200 }, { "epoch": 2.0487, "grad_norm": 3.593726396560669, "learning_rate": 4.056530612244898e-05, "loss": 0.397, "step": 10250 }, { "epoch": 2.0497, "grad_norm": 3.8984551429748535, "learning_rate": 4.051428571428572e-05, "loss": 0.398, "step": 10300 }, { "epoch": 2.0507, "grad_norm": 2.999417781829834, "learning_rate": 4.046326530612245e-05, "loss": 0.3986, "step": 10350 }, { "epoch": 2.0517, "grad_norm": 3.8147029876708984, "learning_rate": 4.041224489795918e-05, "loss": 0.4151, "step": 10400 }, { "epoch": 2.0527, "grad_norm": 3.7409307956695557, "learning_rate": 4.0361224489795915e-05, "loss": 0.4006, "step": 10450 }, { "epoch": 2.0537, "grad_norm": 4.3039445877075195, "learning_rate": 4.0310204081632655e-05, "loss": 0.3995, "step": 10500 }, { "epoch": 2.0547, "grad_norm": 3.9257168769836426, "learning_rate": 4.0259183673469394e-05, "loss": 0.4485, "step": 10550 }, { "epoch": 2.0557, "grad_norm": 3.795719623565674, "learning_rate": 4.0208163265306126e-05, "loss": 0.4438, "step": 10600 }, { "epoch": 2.0567, "grad_norm": 4.891661643981934, "learning_rate": 4.015714285714286e-05, "loss": 0.4374, "step": 10650 }, { "epoch": 2.0577, "grad_norm": 4.732306003570557, "learning_rate": 4.010612244897959e-05, "loss": 0.4005, "step": 10700 }, { "epoch": 2.0587, "grad_norm": 4.265634536743164, "learning_rate": 4.005510204081633e-05, "loss": 0.3726, "step": 10750 }, { "epoch": 2.0597, "grad_norm": 4.142653942108154, "learning_rate": 4.000408163265306e-05, "loss": 0.3763, "step": 10800 }, { "epoch": 2.0607, "grad_norm": 3.3085813522338867, "learning_rate": 3.9953061224489795e-05, "loss": 0.3716, "step": 10850 }, { "epoch": 2.0617, "grad_norm": 4.671994686126709, "learning_rate": 3.990204081632653e-05, "loss": 0.3767, "step": 10900 }, { "epoch": 2.0627, "grad_norm": 3.506270408630371, "learning_rate": 3.985102040816327e-05, "loss": 0.3868, "step": 10950 }, { "epoch": 2.0637, "grad_norm": 3.7705044746398926, "learning_rate": 3.9800000000000005e-05, "loss": 0.4116, "step": 11000 }, { "epoch": 2.0637, "eval_loss": 0.8389872908592224, "eval_runtime": 222.6276, "eval_samples_per_second": 4.528, "eval_steps_per_second": 0.144, "eval_wer": 0.48407859078590787, "step": 11000 }, { "epoch": 2.0647, "grad_norm": 3.551223039627075, "learning_rate": 3.974897959183674e-05, "loss": 0.3901, "step": 11050 }, { "epoch": 2.0657, "grad_norm": 4.152121543884277, "learning_rate": 3.969795918367347e-05, "loss": 0.3837, "step": 11100 }, { "epoch": 2.0667, "grad_norm": 3.815230369567871, "learning_rate": 3.964693877551021e-05, "loss": 0.4107, "step": 11150 }, { "epoch": 2.0677, "grad_norm": 3.5564286708831787, "learning_rate": 3.959693877551021e-05, "loss": 0.3975, "step": 11200 }, { "epoch": 2.0687, "grad_norm": 3.2500646114349365, "learning_rate": 3.954591836734694e-05, "loss": 0.3993, "step": 11250 }, { "epoch": 2.0697, "grad_norm": 3.974126100540161, "learning_rate": 3.949489795918367e-05, "loss": 0.3784, "step": 11300 }, { "epoch": 2.0707, "grad_norm": 4.25160026550293, "learning_rate": 3.944387755102041e-05, "loss": 0.4069, "step": 11350 }, { "epoch": 2.0717, "grad_norm": 3.363373279571533, "learning_rate": 3.9392857142857144e-05, "loss": 0.4105, "step": 11400 }, { "epoch": 2.0727, "grad_norm": 3.961094379425049, "learning_rate": 3.934183673469388e-05, "loss": 0.3961, "step": 11450 }, { "epoch": 2.0737, "grad_norm": 3.97780704498291, "learning_rate": 3.9290816326530615e-05, "loss": 0.4065, "step": 11500 }, { "epoch": 2.0747, "grad_norm": 4.831082344055176, "learning_rate": 3.923979591836735e-05, "loss": 0.4255, "step": 11550 }, { "epoch": 2.0757, "grad_norm": 3.660353183746338, "learning_rate": 3.918877551020409e-05, "loss": 0.4315, "step": 11600 }, { "epoch": 2.0767, "grad_norm": 3.697075843811035, "learning_rate": 3.913775510204082e-05, "loss": 0.3948, "step": 11650 }, { "epoch": 2.0777, "grad_norm": 5.073598384857178, "learning_rate": 3.908673469387755e-05, "loss": 0.4224, "step": 11700 }, { "epoch": 3.00055, "grad_norm": 4.665965557098389, "learning_rate": 3.9035714285714284e-05, "loss": 0.4212, "step": 11750 }, { "epoch": 3.00155, "grad_norm": 5.204474925994873, "learning_rate": 3.898469387755102e-05, "loss": 0.4048, "step": 11800 }, { "epoch": 3.00255, "grad_norm": 3.746649742126465, "learning_rate": 3.8933673469387755e-05, "loss": 0.4175, "step": 11850 }, { "epoch": 3.00355, "grad_norm": 3.8436436653137207, "learning_rate": 3.8882653061224495e-05, "loss": 0.4425, "step": 11900 }, { "epoch": 3.00455, "grad_norm": 3.9119129180908203, "learning_rate": 3.883163265306123e-05, "loss": 0.4227, "step": 11950 }, { "epoch": 3.00555, "grad_norm": 4.373188018798828, "learning_rate": 3.878061224489796e-05, "loss": 0.4312, "step": 12000 }, { "epoch": 3.00555, "eval_loss": 0.8273130655288696, "eval_runtime": 214.8756, "eval_samples_per_second": 4.691, "eval_steps_per_second": 0.149, "eval_wer": 0.4557079945799458, "step": 12000 } ], "logging_steps": 50, "max_steps": 50000, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 1000, "total_flos": 1.8907302968866898e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }