whisper-tiny-base / trainer_state.json
abargum's picture
Upload folder using huggingface_hub
38bea69 verified
Raw
History Blame Contribute Delete
42 kB
{
"best_metric": 0.4557079945799458,
"best_model_checkpoint": "whisper-tiny-danish-2/checkpoint-12000",
"epoch": 3.00555,
"eval_steps": 1000,
"global_step": 12000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.001,
"grad_norm": 22.681325912475586,
"learning_rate": 2.35e-06,
"loss": 4.0513,
"step": 50
},
{
"epoch": 0.002,
"grad_norm": 7.509825706481934,
"learning_rate": 4.85e-06,
"loss": 2.4332,
"step": 100
},
{
"epoch": 0.003,
"grad_norm": 7.44932746887207,
"learning_rate": 7.35e-06,
"loss": 1.8256,
"step": 150
},
{
"epoch": 0.004,
"grad_norm": 6.986607551574707,
"learning_rate": 9.85e-06,
"loss": 1.6391,
"step": 200
},
{
"epoch": 0.005,
"grad_norm": 7.076284408569336,
"learning_rate": 1.235e-05,
"loss": 1.5296,
"step": 250
},
{
"epoch": 0.006,
"grad_norm": 6.501603603363037,
"learning_rate": 1.485e-05,
"loss": 1.4288,
"step": 300
},
{
"epoch": 0.007,
"grad_norm": 6.999312400817871,
"learning_rate": 1.7349999999999998e-05,
"loss": 1.3068,
"step": 350
},
{
"epoch": 0.008,
"grad_norm": 6.261101722717285,
"learning_rate": 1.985e-05,
"loss": 1.2373,
"step": 400
},
{
"epoch": 0.009,
"grad_norm": 6.493436813354492,
"learning_rate": 2.235e-05,
"loss": 1.2242,
"step": 450
},
{
"epoch": 0.01,
"grad_norm": 6.082971096038818,
"learning_rate": 2.485e-05,
"loss": 1.1575,
"step": 500
},
{
"epoch": 0.011,
"grad_norm": 6.2460174560546875,
"learning_rate": 2.7350000000000004e-05,
"loss": 1.093,
"step": 550
},
{
"epoch": 0.012,
"grad_norm": 5.896036148071289,
"learning_rate": 2.985e-05,
"loss": 1.0961,
"step": 600
},
{
"epoch": 0.013,
"grad_norm": 5.539938926696777,
"learning_rate": 3.235e-05,
"loss": 1.0036,
"step": 650
},
{
"epoch": 0.014,
"grad_norm": 5.470737934112549,
"learning_rate": 3.485e-05,
"loss": 0.974,
"step": 700
},
{
"epoch": 0.015,
"grad_norm": 5.766882419586182,
"learning_rate": 3.735e-05,
"loss": 0.9315,
"step": 750
},
{
"epoch": 0.016,
"grad_norm": 5.046978950500488,
"learning_rate": 3.9850000000000006e-05,
"loss": 0.9166,
"step": 800
},
{
"epoch": 0.017,
"grad_norm": 5.6074957847595215,
"learning_rate": 4.235e-05,
"loss": 0.9883,
"step": 850
},
{
"epoch": 0.018,
"grad_norm": 5.528851509094238,
"learning_rate": 4.4850000000000006e-05,
"loss": 0.9578,
"step": 900
},
{
"epoch": 0.019,
"grad_norm": 5.047084331512451,
"learning_rate": 4.735e-05,
"loss": 0.8865,
"step": 950
},
{
"epoch": 0.02,
"grad_norm": 5.150805473327637,
"learning_rate": 4.9850000000000006e-05,
"loss": 0.8565,
"step": 1000
},
{
"epoch": 0.02,
"eval_loss": 1.2942239046096802,
"eval_runtime": 216.6252,
"eval_samples_per_second": 4.653,
"eval_steps_per_second": 0.148,
"eval_wer": 0.6471036585365854,
"step": 1000
},
{
"epoch": 0.021,
"grad_norm": 4.584412097930908,
"learning_rate": 4.9952040816326534e-05,
"loss": 0.8296,
"step": 1050
},
{
"epoch": 0.022,
"grad_norm": 5.257778167724609,
"learning_rate": 4.9901020408163266e-05,
"loss": 0.802,
"step": 1100
},
{
"epoch": 0.023,
"grad_norm": 5.010299205780029,
"learning_rate": 4.9850000000000006e-05,
"loss": 0.7911,
"step": 1150
},
{
"epoch": 0.024,
"grad_norm": 5.242851734161377,
"learning_rate": 4.979897959183674e-05,
"loss": 0.8318,
"step": 1200
},
{
"epoch": 0.025,
"grad_norm": 5.349483489990234,
"learning_rate": 4.974795918367347e-05,
"loss": 0.8327,
"step": 1250
},
{
"epoch": 0.026,
"grad_norm": 6.035111427307129,
"learning_rate": 4.96969387755102e-05,
"loss": 0.8355,
"step": 1300
},
{
"epoch": 0.027,
"grad_norm": 5.254024982452393,
"learning_rate": 4.964591836734694e-05,
"loss": 0.8851,
"step": 1350
},
{
"epoch": 0.028,
"grad_norm": 6.512954235076904,
"learning_rate": 4.959489795918368e-05,
"loss": 0.8718,
"step": 1400
},
{
"epoch": 0.029,
"grad_norm": 4.468319416046143,
"learning_rate": 4.954387755102041e-05,
"loss": 0.815,
"step": 1450
},
{
"epoch": 0.03,
"grad_norm": 4.5422492027282715,
"learning_rate": 4.9492857142857146e-05,
"loss": 0.8086,
"step": 1500
},
{
"epoch": 0.031,
"grad_norm": 4.861804008483887,
"learning_rate": 4.944183673469388e-05,
"loss": 0.7848,
"step": 1550
},
{
"epoch": 0.032,
"grad_norm": 5.152141571044922,
"learning_rate": 4.939081632653062e-05,
"loss": 0.7382,
"step": 1600
},
{
"epoch": 0.033,
"grad_norm": 4.768085479736328,
"learning_rate": 4.933979591836735e-05,
"loss": 0.7786,
"step": 1650
},
{
"epoch": 0.034,
"grad_norm": 4.68101167678833,
"learning_rate": 4.928877551020408e-05,
"loss": 0.733,
"step": 1700
},
{
"epoch": 0.035,
"grad_norm": 4.635968208312988,
"learning_rate": 4.9237755102040814e-05,
"loss": 0.7032,
"step": 1750
},
{
"epoch": 0.036,
"grad_norm": 5.221863269805908,
"learning_rate": 4.918673469387755e-05,
"loss": 0.703,
"step": 1800
},
{
"epoch": 0.037,
"grad_norm": 5.017695426940918,
"learning_rate": 4.913571428571429e-05,
"loss": 0.6421,
"step": 1850
},
{
"epoch": 0.038,
"grad_norm": 4.761963367462158,
"learning_rate": 4.9084693877551025e-05,
"loss": 0.703,
"step": 1900
},
{
"epoch": 0.039,
"grad_norm": 4.619095325469971,
"learning_rate": 4.903367346938776e-05,
"loss": 0.7374,
"step": 1950
},
{
"epoch": 0.04,
"grad_norm": 4.652743816375732,
"learning_rate": 4.8982653061224496e-05,
"loss": 0.7434,
"step": 2000
},
{
"epoch": 0.04,
"eval_loss": 1.0815221071243286,
"eval_runtime": 235.89,
"eval_samples_per_second": 4.273,
"eval_steps_per_second": 0.136,
"eval_wer": 0.5818512872628726,
"step": 2000
},
{
"epoch": 0.041,
"grad_norm": 5.1539177894592285,
"learning_rate": 4.893163265306123e-05,
"loss": 0.7661,
"step": 2050
},
{
"epoch": 0.042,
"grad_norm": 4.277270793914795,
"learning_rate": 4.888061224489796e-05,
"loss": 0.6908,
"step": 2100
},
{
"epoch": 0.043,
"grad_norm": 4.588935852050781,
"learning_rate": 4.882959183673469e-05,
"loss": 0.6411,
"step": 2150
},
{
"epoch": 0.044,
"grad_norm": 4.606882572174072,
"learning_rate": 4.877857142857143e-05,
"loss": 0.6492,
"step": 2200
},
{
"epoch": 0.045,
"grad_norm": 4.498349189758301,
"learning_rate": 4.8727551020408165e-05,
"loss": 0.6592,
"step": 2250
},
{
"epoch": 0.046,
"grad_norm": 4.668141841888428,
"learning_rate": 4.8676530612244904e-05,
"loss": 0.6865,
"step": 2300
},
{
"epoch": 0.047,
"grad_norm": 4.357521057128906,
"learning_rate": 4.8625510204081636e-05,
"loss": 0.676,
"step": 2350
},
{
"epoch": 0.048,
"grad_norm": 4.414557933807373,
"learning_rate": 4.857448979591837e-05,
"loss": 0.6496,
"step": 2400
},
{
"epoch": 0.049,
"grad_norm": 4.414867877960205,
"learning_rate": 4.852346938775511e-05,
"loss": 0.6679,
"step": 2450
},
{
"epoch": 0.05,
"grad_norm": 4.020086765289307,
"learning_rate": 4.847244897959184e-05,
"loss": 0.6364,
"step": 2500
},
{
"epoch": 0.051,
"grad_norm": 5.012465476989746,
"learning_rate": 4.842142857142857e-05,
"loss": 0.6624,
"step": 2550
},
{
"epoch": 0.052,
"grad_norm": 4.224608421325684,
"learning_rate": 4.8370408163265305e-05,
"loss": 0.6609,
"step": 2600
},
{
"epoch": 0.053,
"grad_norm": 4.476141929626465,
"learning_rate": 4.8319387755102044e-05,
"loss": 0.6402,
"step": 2650
},
{
"epoch": 0.054,
"grad_norm": 4.985313892364502,
"learning_rate": 4.8268367346938776e-05,
"loss": 0.6706,
"step": 2700
},
{
"epoch": 0.055,
"grad_norm": 4.675138473510742,
"learning_rate": 4.8217346938775515e-05,
"loss": 0.7041,
"step": 2750
},
{
"epoch": 0.056,
"grad_norm": 5.3597846031188965,
"learning_rate": 4.816632653061225e-05,
"loss": 0.6993,
"step": 2800
},
{
"epoch": 0.057,
"grad_norm": 4.724060535430908,
"learning_rate": 4.811530612244898e-05,
"loss": 0.6681,
"step": 2850
},
{
"epoch": 0.058,
"grad_norm": 4.375901222229004,
"learning_rate": 4.806428571428572e-05,
"loss": 0.6205,
"step": 2900
},
{
"epoch": 0.059,
"grad_norm": 4.6154279708862305,
"learning_rate": 4.801326530612245e-05,
"loss": 0.5887,
"step": 2950
},
{
"epoch": 0.06,
"grad_norm": 4.766662120819092,
"learning_rate": 4.7962244897959184e-05,
"loss": 0.6056,
"step": 3000
},
{
"epoch": 0.06,
"eval_loss": 1.0121757984161377,
"eval_runtime": 218.7463,
"eval_samples_per_second": 4.608,
"eval_steps_per_second": 0.146,
"eval_wer": 0.5472984417344173,
"step": 3000
},
{
"epoch": 0.061,
"grad_norm": 4.1083550453186035,
"learning_rate": 4.791122448979592e-05,
"loss": 0.5938,
"step": 3050
},
{
"epoch": 0.062,
"grad_norm": 4.648180961608887,
"learning_rate": 4.7860204081632655e-05,
"loss": 0.6083,
"step": 3100
},
{
"epoch": 0.063,
"grad_norm": 4.021754264831543,
"learning_rate": 4.780918367346939e-05,
"loss": 0.6132,
"step": 3150
},
{
"epoch": 0.064,
"grad_norm": 3.82786226272583,
"learning_rate": 4.775816326530613e-05,
"loss": 0.6348,
"step": 3200
},
{
"epoch": 0.065,
"grad_norm": 4.385377407073975,
"learning_rate": 4.770714285714286e-05,
"loss": 0.602,
"step": 3250
},
{
"epoch": 0.066,
"grad_norm": 5.215423107147217,
"learning_rate": 4.76561224489796e-05,
"loss": 0.6135,
"step": 3300
},
{
"epoch": 0.067,
"grad_norm": 4.4256486892700195,
"learning_rate": 4.760510204081633e-05,
"loss": 0.6353,
"step": 3350
},
{
"epoch": 0.068,
"grad_norm": 4.338476181030273,
"learning_rate": 4.755408163265306e-05,
"loss": 0.6114,
"step": 3400
},
{
"epoch": 0.069,
"grad_norm": 4.410732269287109,
"learning_rate": 4.7503061224489795e-05,
"loss": 0.612,
"step": 3450
},
{
"epoch": 0.07,
"grad_norm": 4.397231578826904,
"learning_rate": 4.7452040816326534e-05,
"loss": 0.5984,
"step": 3500
},
{
"epoch": 0.071,
"grad_norm": 4.233676433563232,
"learning_rate": 4.740102040816327e-05,
"loss": 0.6419,
"step": 3550
},
{
"epoch": 0.072,
"grad_norm": 4.81524658203125,
"learning_rate": 4.735e-05,
"loss": 0.6158,
"step": 3600
},
{
"epoch": 0.073,
"grad_norm": 4.236979961395264,
"learning_rate": 4.729897959183674e-05,
"loss": 0.6041,
"step": 3650
},
{
"epoch": 0.074,
"grad_norm": 4.120030403137207,
"learning_rate": 4.724795918367347e-05,
"loss": 0.6378,
"step": 3700
},
{
"epoch": 0.075,
"grad_norm": 3.8795013427734375,
"learning_rate": 4.719693877551021e-05,
"loss": 0.6577,
"step": 3750
},
{
"epoch": 0.076,
"grad_norm": 4.2965087890625,
"learning_rate": 4.714591836734694e-05,
"loss": 0.6341,
"step": 3800
},
{
"epoch": 0.077,
"grad_norm": 4.946217060089111,
"learning_rate": 4.7094897959183674e-05,
"loss": 0.6217,
"step": 3850
},
{
"epoch": 0.078,
"grad_norm": 4.450223445892334,
"learning_rate": 4.7043877551020407e-05,
"loss": 0.6383,
"step": 3900
},
{
"epoch": 1.00085,
"grad_norm": 5.3936591148376465,
"learning_rate": 4.6992857142857146e-05,
"loss": 0.6186,
"step": 3950
},
{
"epoch": 1.00185,
"grad_norm": 4.123908042907715,
"learning_rate": 4.694183673469388e-05,
"loss": 0.6141,
"step": 4000
},
{
"epoch": 1.00185,
"eval_loss": 0.9478016495704651,
"eval_runtime": 215.0961,
"eval_samples_per_second": 4.686,
"eval_steps_per_second": 0.149,
"eval_wer": 0.5331554878048781,
"step": 4000
},
{
"epoch": 1.00285,
"grad_norm": 4.490115642547607,
"learning_rate": 4.689081632653061e-05,
"loss": 0.6373,
"step": 4050
},
{
"epoch": 1.00385,
"grad_norm": 4.364250659942627,
"learning_rate": 4.683979591836735e-05,
"loss": 0.641,
"step": 4100
},
{
"epoch": 1.00485,
"grad_norm": 4.241596221923828,
"learning_rate": 4.678877551020409e-05,
"loss": 0.6284,
"step": 4150
},
{
"epoch": 1.00585,
"grad_norm": 4.362817764282227,
"learning_rate": 4.673775510204082e-05,
"loss": 0.6346,
"step": 4200
},
{
"epoch": 1.00685,
"grad_norm": 3.9235153198242188,
"learning_rate": 4.668673469387755e-05,
"loss": 0.6007,
"step": 4250
},
{
"epoch": 1.00785,
"grad_norm": 3.7977848052978516,
"learning_rate": 4.6635714285714286e-05,
"loss": 0.5891,
"step": 4300
},
{
"epoch": 1.00885,
"grad_norm": 4.0569305419921875,
"learning_rate": 4.6584693877551025e-05,
"loss": 0.6123,
"step": 4350
},
{
"epoch": 1.00985,
"grad_norm": 4.0435872077941895,
"learning_rate": 4.653367346938776e-05,
"loss": 0.5948,
"step": 4400
},
{
"epoch": 1.01085,
"grad_norm": 3.7464935779571533,
"learning_rate": 4.648265306122449e-05,
"loss": 0.5665,
"step": 4450
},
{
"epoch": 1.01185,
"grad_norm": 3.99239182472229,
"learning_rate": 4.643163265306122e-05,
"loss": 0.6124,
"step": 4500
},
{
"epoch": 1.01285,
"grad_norm": 3.6230008602142334,
"learning_rate": 4.638061224489796e-05,
"loss": 0.5632,
"step": 4550
},
{
"epoch": 1.01385,
"grad_norm": 3.5754306316375732,
"learning_rate": 4.63295918367347e-05,
"loss": 0.5425,
"step": 4600
},
{
"epoch": 1.01485,
"grad_norm": 3.7989814281463623,
"learning_rate": 4.627857142857143e-05,
"loss": 0.5388,
"step": 4650
},
{
"epoch": 1.01585,
"grad_norm": 3.778059959411621,
"learning_rate": 4.6227551020408165e-05,
"loss": 0.5297,
"step": 4700
},
{
"epoch": 1.01685,
"grad_norm": 3.987022876739502,
"learning_rate": 4.61765306122449e-05,
"loss": 0.58,
"step": 4750
},
{
"epoch": 1.01785,
"grad_norm": 3.883904218673706,
"learning_rate": 4.6125510204081636e-05,
"loss": 0.5964,
"step": 4800
},
{
"epoch": 1.01885,
"grad_norm": 4.473369598388672,
"learning_rate": 4.607448979591837e-05,
"loss": 0.5512,
"step": 4850
},
{
"epoch": 1.01985,
"grad_norm": 4.040229797363281,
"learning_rate": 4.60234693877551e-05,
"loss": 0.5292,
"step": 4900
},
{
"epoch": 1.02085,
"grad_norm": 4.538361072540283,
"learning_rate": 4.597244897959183e-05,
"loss": 0.5196,
"step": 4950
},
{
"epoch": 1.02185,
"grad_norm": 4.212509632110596,
"learning_rate": 4.592142857142858e-05,
"loss": 0.5085,
"step": 5000
},
{
"epoch": 1.02185,
"eval_loss": 0.9282792806625366,
"eval_runtime": 224.1163,
"eval_samples_per_second": 4.498,
"eval_steps_per_second": 0.143,
"eval_wer": 0.5289634146341463,
"step": 5000
},
{
"epoch": 1.02285,
"grad_norm": 3.8673441410064697,
"learning_rate": 4.587040816326531e-05,
"loss": 0.4988,
"step": 5050
},
{
"epoch": 1.02385,
"grad_norm": 3.70070743560791,
"learning_rate": 4.5819387755102044e-05,
"loss": 0.5424,
"step": 5100
},
{
"epoch": 1.02485,
"grad_norm": 4.379025459289551,
"learning_rate": 4.5768367346938776e-05,
"loss": 0.5425,
"step": 5150
},
{
"epoch": 1.02585,
"grad_norm": 4.310212135314941,
"learning_rate": 4.5717346938775515e-05,
"loss": 0.5466,
"step": 5200
},
{
"epoch": 1.02685,
"grad_norm": 4.193725109100342,
"learning_rate": 4.566632653061225e-05,
"loss": 0.5949,
"step": 5250
},
{
"epoch": 1.02785,
"grad_norm": 4.096522808074951,
"learning_rate": 4.561530612244898e-05,
"loss": 0.6003,
"step": 5300
},
{
"epoch": 1.02885,
"grad_norm": 4.387059211730957,
"learning_rate": 4.556428571428571e-05,
"loss": 0.5672,
"step": 5350
},
{
"epoch": 1.02985,
"grad_norm": 4.410869598388672,
"learning_rate": 4.551326530612245e-05,
"loss": 0.5512,
"step": 5400
},
{
"epoch": 1.03085,
"grad_norm": 4.141435623168945,
"learning_rate": 4.546224489795919e-05,
"loss": 0.5534,
"step": 5450
},
{
"epoch": 1.03185,
"grad_norm": 3.6259546279907227,
"learning_rate": 4.541122448979592e-05,
"loss": 0.4962,
"step": 5500
},
{
"epoch": 1.03285,
"grad_norm": 4.343920707702637,
"learning_rate": 4.5360204081632655e-05,
"loss": 0.5411,
"step": 5550
},
{
"epoch": 1.03385,
"grad_norm": 4.238042831420898,
"learning_rate": 4.530918367346939e-05,
"loss": 0.5217,
"step": 5600
},
{
"epoch": 1.03485,
"grad_norm": 4.237799167633057,
"learning_rate": 4.525816326530613e-05,
"loss": 0.4973,
"step": 5650
},
{
"epoch": 1.03585,
"grad_norm": 4.474476337432861,
"learning_rate": 4.520714285714286e-05,
"loss": 0.4967,
"step": 5700
},
{
"epoch": 1.03685,
"grad_norm": 3.6932973861694336,
"learning_rate": 4.515612244897959e-05,
"loss": 0.4524,
"step": 5750
},
{
"epoch": 1.03785,
"grad_norm": 4.185140132904053,
"learning_rate": 4.5105102040816324e-05,
"loss": 0.4851,
"step": 5800
},
{
"epoch": 1.03885,
"grad_norm": 4.114623069763184,
"learning_rate": 4.505408163265306e-05,
"loss": 0.5371,
"step": 5850
},
{
"epoch": 1.03985,
"grad_norm": 3.961071729660034,
"learning_rate": 4.5003061224489795e-05,
"loss": 0.5361,
"step": 5900
},
{
"epoch": 1.04085,
"grad_norm": 3.9876036643981934,
"learning_rate": 4.4952040816326534e-05,
"loss": 0.5576,
"step": 5950
},
{
"epoch": 1.04185,
"grad_norm": 4.096221446990967,
"learning_rate": 4.490102040816327e-05,
"loss": 0.5108,
"step": 6000
},
{
"epoch": 1.04185,
"eval_loss": 0.8966282606124878,
"eval_runtime": 214.6844,
"eval_samples_per_second": 4.695,
"eval_steps_per_second": 0.149,
"eval_wer": 0.4938177506775068,
"step": 6000
},
{
"epoch": 1.04285,
"grad_norm": 3.3264214992523193,
"learning_rate": 4.4850000000000006e-05,
"loss": 0.4692,
"step": 6050
},
{
"epoch": 1.04385,
"grad_norm": 4.144850730895996,
"learning_rate": 4.479897959183674e-05,
"loss": 0.4676,
"step": 6100
},
{
"epoch": 1.04485,
"grad_norm": 4.427196502685547,
"learning_rate": 4.474795918367347e-05,
"loss": 0.4796,
"step": 6150
},
{
"epoch": 1.04585,
"grad_norm": 3.8419079780578613,
"learning_rate": 4.46969387755102e-05,
"loss": 0.5092,
"step": 6200
},
{
"epoch": 1.04685,
"grad_norm": 4.1315436363220215,
"learning_rate": 4.464591836734694e-05,
"loss": 0.4992,
"step": 6250
},
{
"epoch": 1.04785,
"grad_norm": 3.6058404445648193,
"learning_rate": 4.4594897959183674e-05,
"loss": 0.4844,
"step": 6300
},
{
"epoch": 1.04885,
"grad_norm": 3.776262044906616,
"learning_rate": 4.454387755102041e-05,
"loss": 0.4886,
"step": 6350
},
{
"epoch": 1.04985,
"grad_norm": 3.4971370697021484,
"learning_rate": 4.4492857142857146e-05,
"loss": 0.4755,
"step": 6400
},
{
"epoch": 1.05085,
"grad_norm": 4.299288272857666,
"learning_rate": 4.444183673469388e-05,
"loss": 0.4826,
"step": 6450
},
{
"epoch": 1.05185,
"grad_norm": 3.4071223735809326,
"learning_rate": 4.439081632653062e-05,
"loss": 0.5001,
"step": 6500
},
{
"epoch": 1.05285,
"grad_norm": 3.8092288970947266,
"learning_rate": 4.433979591836735e-05,
"loss": 0.4797,
"step": 6550
},
{
"epoch": 1.05385,
"grad_norm": 4.502504348754883,
"learning_rate": 4.428877551020408e-05,
"loss": 0.4928,
"step": 6600
},
{
"epoch": 1.05485,
"grad_norm": 4.641124725341797,
"learning_rate": 4.4237755102040814e-05,
"loss": 0.5373,
"step": 6650
},
{
"epoch": 1.05585,
"grad_norm": 4.549639701843262,
"learning_rate": 4.4186734693877554e-05,
"loss": 0.529,
"step": 6700
},
{
"epoch": 1.05685,
"grad_norm": 3.3781280517578125,
"learning_rate": 4.4135714285714286e-05,
"loss": 0.5129,
"step": 6750
},
{
"epoch": 1.05785,
"grad_norm": 4.276547431945801,
"learning_rate": 4.408469387755102e-05,
"loss": 0.478,
"step": 6800
},
{
"epoch": 1.05885,
"grad_norm": 4.88979959487915,
"learning_rate": 4.403367346938776e-05,
"loss": 0.4405,
"step": 6850
},
{
"epoch": 1.05985,
"grad_norm": 5.3919267654418945,
"learning_rate": 4.3982653061224497e-05,
"loss": 0.4544,
"step": 6900
},
{
"epoch": 1.06085,
"grad_norm": 3.975532293319702,
"learning_rate": 4.393163265306123e-05,
"loss": 0.4517,
"step": 6950
},
{
"epoch": 1.06185,
"grad_norm": 3.94978404045105,
"learning_rate": 4.388061224489796e-05,
"loss": 0.4604,
"step": 7000
},
{
"epoch": 1.06185,
"eval_loss": 0.8832055926322937,
"eval_runtime": 217.1866,
"eval_samples_per_second": 4.641,
"eval_steps_per_second": 0.147,
"eval_wer": 0.49817920054200543,
"step": 7000
},
{
"epoch": 1.06285,
"grad_norm": 3.804979085922241,
"learning_rate": 4.3829591836734694e-05,
"loss": 0.4659,
"step": 7050
},
{
"epoch": 1.06385,
"grad_norm": 4.515665054321289,
"learning_rate": 4.377857142857143e-05,
"loss": 0.4879,
"step": 7100
},
{
"epoch": 1.06485,
"grad_norm": 4.103418827056885,
"learning_rate": 4.3727551020408165e-05,
"loss": 0.4608,
"step": 7150
},
{
"epoch": 1.06585,
"grad_norm": 3.9994966983795166,
"learning_rate": 4.36765306122449e-05,
"loss": 0.4712,
"step": 7200
},
{
"epoch": 1.06685,
"grad_norm": 3.973745107650757,
"learning_rate": 4.362551020408163e-05,
"loss": 0.4873,
"step": 7250
},
{
"epoch": 1.06785,
"grad_norm": 3.7433559894561768,
"learning_rate": 4.357448979591837e-05,
"loss": 0.4735,
"step": 7300
},
{
"epoch": 1.06885,
"grad_norm": 3.822484254837036,
"learning_rate": 4.352346938775511e-05,
"loss": 0.4712,
"step": 7350
},
{
"epoch": 1.06985,
"grad_norm": 3.4630181789398193,
"learning_rate": 4.347244897959184e-05,
"loss": 0.4583,
"step": 7400
},
{
"epoch": 1.07085,
"grad_norm": 4.550868034362793,
"learning_rate": 4.342142857142857e-05,
"loss": 0.493,
"step": 7450
},
{
"epoch": 1.07185,
"grad_norm": 4.130419731140137,
"learning_rate": 4.337142857142857e-05,
"loss": 0.4815,
"step": 7500
},
{
"epoch": 1.07285,
"grad_norm": 3.5420877933502197,
"learning_rate": 4.332040816326531e-05,
"loss": 0.4735,
"step": 7550
},
{
"epoch": 1.07385,
"grad_norm": 4.168927192687988,
"learning_rate": 4.326938775510204e-05,
"loss": 0.4919,
"step": 7600
},
{
"epoch": 1.07485,
"grad_norm": 3.9668235778808594,
"learning_rate": 4.3218367346938775e-05,
"loss": 0.5107,
"step": 7650
},
{
"epoch": 1.07585,
"grad_norm": 4.459245681762695,
"learning_rate": 4.316734693877551e-05,
"loss": 0.5011,
"step": 7700
},
{
"epoch": 1.07685,
"grad_norm": 4.179884910583496,
"learning_rate": 4.311632653061225e-05,
"loss": 0.4806,
"step": 7750
},
{
"epoch": 1.07785,
"grad_norm": 5.189551830291748,
"learning_rate": 4.3065306122448986e-05,
"loss": 0.4973,
"step": 7800
},
{
"epoch": 2.0007,
"grad_norm": 5.872171401977539,
"learning_rate": 4.301428571428572e-05,
"loss": 0.4958,
"step": 7850
},
{
"epoch": 2.0017,
"grad_norm": 3.948779582977295,
"learning_rate": 4.296326530612245e-05,
"loss": 0.4801,
"step": 7900
},
{
"epoch": 2.0027,
"grad_norm": 3.7434325218200684,
"learning_rate": 4.291224489795918e-05,
"loss": 0.5009,
"step": 7950
},
{
"epoch": 2.0037,
"grad_norm": 5.404001235961914,
"learning_rate": 4.286122448979592e-05,
"loss": 0.5144,
"step": 8000
},
{
"epoch": 2.0037,
"eval_loss": 0.8577666878700256,
"eval_runtime": 221.0367,
"eval_samples_per_second": 4.56,
"eval_steps_per_second": 0.145,
"eval_wer": 0.4876778455284553,
"step": 8000
},
{
"epoch": 2.0047,
"grad_norm": 3.783438205718994,
"learning_rate": 4.2810204081632654e-05,
"loss": 0.4987,
"step": 8050
},
{
"epoch": 2.0057,
"grad_norm": 3.9520227909088135,
"learning_rate": 4.275918367346939e-05,
"loss": 0.5081,
"step": 8100
},
{
"epoch": 2.0067,
"grad_norm": 3.9701645374298096,
"learning_rate": 4.2708163265306126e-05,
"loss": 0.4799,
"step": 8150
},
{
"epoch": 2.0077,
"grad_norm": 3.5120835304260254,
"learning_rate": 4.265714285714286e-05,
"loss": 0.4676,
"step": 8200
},
{
"epoch": 2.0087,
"grad_norm": 4.348593235015869,
"learning_rate": 4.26061224489796e-05,
"loss": 0.4853,
"step": 8250
},
{
"epoch": 2.0097,
"grad_norm": 3.6884608268737793,
"learning_rate": 4.255510204081633e-05,
"loss": 0.4803,
"step": 8300
},
{
"epoch": 2.0107,
"grad_norm": 3.584364414215088,
"learning_rate": 4.250408163265306e-05,
"loss": 0.4561,
"step": 8350
},
{
"epoch": 2.0117,
"grad_norm": 3.6865909099578857,
"learning_rate": 4.24530612244898e-05,
"loss": 0.4871,
"step": 8400
},
{
"epoch": 2.0127,
"grad_norm": 3.9009077548980713,
"learning_rate": 4.2402040816326533e-05,
"loss": 0.4599,
"step": 8450
},
{
"epoch": 2.0137,
"grad_norm": 3.513470411300659,
"learning_rate": 4.2351020408163266e-05,
"loss": 0.4316,
"step": 8500
},
{
"epoch": 2.0147,
"grad_norm": 3.8565685749053955,
"learning_rate": 4.23e-05,
"loss": 0.4379,
"step": 8550
},
{
"epoch": 2.0157,
"grad_norm": 3.2444798946380615,
"learning_rate": 4.224897959183674e-05,
"loss": 0.4287,
"step": 8600
},
{
"epoch": 2.0167,
"grad_norm": 3.9673781394958496,
"learning_rate": 4.219795918367347e-05,
"loss": 0.4613,
"step": 8650
},
{
"epoch": 2.0177,
"grad_norm": 8.936363220214844,
"learning_rate": 4.214693877551021e-05,
"loss": 0.4869,
"step": 8700
},
{
"epoch": 2.0187,
"grad_norm": 3.6102094650268555,
"learning_rate": 4.209591836734694e-05,
"loss": 0.4523,
"step": 8750
},
{
"epoch": 2.0197,
"grad_norm": 3.8444738388061523,
"learning_rate": 4.2044897959183673e-05,
"loss": 0.4334,
"step": 8800
},
{
"epoch": 2.0207,
"grad_norm": 3.0468149185180664,
"learning_rate": 4.199387755102041e-05,
"loss": 0.4219,
"step": 8850
},
{
"epoch": 2.0217,
"grad_norm": 3.563493251800537,
"learning_rate": 4.1942857142857145e-05,
"loss": 0.413,
"step": 8900
},
{
"epoch": 2.0227,
"grad_norm": 3.6925594806671143,
"learning_rate": 4.189183673469388e-05,
"loss": 0.4028,
"step": 8950
},
{
"epoch": 2.0237,
"grad_norm": 3.872044086456299,
"learning_rate": 4.184081632653061e-05,
"loss": 0.4411,
"step": 9000
},
{
"epoch": 2.0237,
"eval_loss": 0.8616846203804016,
"eval_runtime": 215.4703,
"eval_samples_per_second": 4.678,
"eval_steps_per_second": 0.149,
"eval_wer": 0.47925135501355015,
"step": 9000
},
{
"epoch": 2.0247,
"grad_norm": 3.9783222675323486,
"learning_rate": 4.178979591836735e-05,
"loss": 0.44,
"step": 9050
},
{
"epoch": 2.0257,
"grad_norm": 5.306482315063477,
"learning_rate": 4.173877551020408e-05,
"loss": 0.4384,
"step": 9100
},
{
"epoch": 2.0267,
"grad_norm": 4.326815128326416,
"learning_rate": 4.168775510204082e-05,
"loss": 0.4873,
"step": 9150
},
{
"epoch": 2.0277,
"grad_norm": 3.9018495082855225,
"learning_rate": 4.163673469387755e-05,
"loss": 0.4974,
"step": 9200
},
{
"epoch": 2.0287,
"grad_norm": 3.4675261974334717,
"learning_rate": 4.158571428571429e-05,
"loss": 0.4705,
"step": 9250
},
{
"epoch": 2.0297,
"grad_norm": 4.265820026397705,
"learning_rate": 4.1534693877551024e-05,
"loss": 0.4473,
"step": 9300
},
{
"epoch": 2.0307,
"grad_norm": 3.4740707874298096,
"learning_rate": 4.1483673469387756e-05,
"loss": 0.4679,
"step": 9350
},
{
"epoch": 2.0317,
"grad_norm": 3.339444637298584,
"learning_rate": 4.143265306122449e-05,
"loss": 0.4065,
"step": 9400
},
{
"epoch": 2.0327,
"grad_norm": 3.825657606124878,
"learning_rate": 4.138163265306123e-05,
"loss": 0.4436,
"step": 9450
},
{
"epoch": 2.0337,
"grad_norm": 3.392925262451172,
"learning_rate": 4.133061224489796e-05,
"loss": 0.4285,
"step": 9500
},
{
"epoch": 2.0347,
"grad_norm": 3.815762519836426,
"learning_rate": 4.127959183673469e-05,
"loss": 0.4123,
"step": 9550
},
{
"epoch": 2.0357,
"grad_norm": 4.090697765350342,
"learning_rate": 4.122857142857143e-05,
"loss": 0.408,
"step": 9600
},
{
"epoch": 2.0367,
"grad_norm": 3.2577061653137207,
"learning_rate": 4.1177551020408164e-05,
"loss": 0.3751,
"step": 9650
},
{
"epoch": 2.0377,
"grad_norm": 3.391096591949463,
"learning_rate": 4.11265306122449e-05,
"loss": 0.3885,
"step": 9700
},
{
"epoch": 2.0387,
"grad_norm": 3.540929079055786,
"learning_rate": 4.1075510204081636e-05,
"loss": 0.4434,
"step": 9750
},
{
"epoch": 2.0397,
"grad_norm": 4.215907096862793,
"learning_rate": 4.102448979591837e-05,
"loss": 0.436,
"step": 9800
},
{
"epoch": 2.0407,
"grad_norm": 4.207083225250244,
"learning_rate": 4.09734693877551e-05,
"loss": 0.4699,
"step": 9850
},
{
"epoch": 2.0417,
"grad_norm": 3.9734325408935547,
"learning_rate": 4.092244897959184e-05,
"loss": 0.4301,
"step": 9900
},
{
"epoch": 2.0427,
"grad_norm": 3.023761510848999,
"learning_rate": 4.087142857142857e-05,
"loss": 0.3889,
"step": 9950
},
{
"epoch": 2.0437,
"grad_norm": 3.4388654232025146,
"learning_rate": 4.0820408163265304e-05,
"loss": 0.3835,
"step": 10000
},
{
"epoch": 2.0437,
"eval_loss": 0.8521081805229187,
"eval_runtime": 215.0513,
"eval_samples_per_second": 4.687,
"eval_steps_per_second": 0.149,
"eval_wer": 0.47412771002710025,
"step": 10000
},
{
"epoch": 2.0447,
"grad_norm": 3.153373956680298,
"learning_rate": 4.076938775510204e-05,
"loss": 0.3933,
"step": 10050
},
{
"epoch": 2.0457,
"grad_norm": 4.276921272277832,
"learning_rate": 4.071836734693878e-05,
"loss": 0.42,
"step": 10100
},
{
"epoch": 2.0467,
"grad_norm": 3.5044806003570557,
"learning_rate": 4.0667346938775515e-05,
"loss": 0.4131,
"step": 10150
},
{
"epoch": 2.0477,
"grad_norm": 3.862910032272339,
"learning_rate": 4.061632653061225e-05,
"loss": 0.4076,
"step": 10200
},
{
"epoch": 2.0487,
"grad_norm": 3.593726396560669,
"learning_rate": 4.056530612244898e-05,
"loss": 0.397,
"step": 10250
},
{
"epoch": 2.0497,
"grad_norm": 3.8984551429748535,
"learning_rate": 4.051428571428572e-05,
"loss": 0.398,
"step": 10300
},
{
"epoch": 2.0507,
"grad_norm": 2.999417781829834,
"learning_rate": 4.046326530612245e-05,
"loss": 0.3986,
"step": 10350
},
{
"epoch": 2.0517,
"grad_norm": 3.8147029876708984,
"learning_rate": 4.041224489795918e-05,
"loss": 0.4151,
"step": 10400
},
{
"epoch": 2.0527,
"grad_norm": 3.7409307956695557,
"learning_rate": 4.0361224489795915e-05,
"loss": 0.4006,
"step": 10450
},
{
"epoch": 2.0537,
"grad_norm": 4.3039445877075195,
"learning_rate": 4.0310204081632655e-05,
"loss": 0.3995,
"step": 10500
},
{
"epoch": 2.0547,
"grad_norm": 3.9257168769836426,
"learning_rate": 4.0259183673469394e-05,
"loss": 0.4485,
"step": 10550
},
{
"epoch": 2.0557,
"grad_norm": 3.795719623565674,
"learning_rate": 4.0208163265306126e-05,
"loss": 0.4438,
"step": 10600
},
{
"epoch": 2.0567,
"grad_norm": 4.891661643981934,
"learning_rate": 4.015714285714286e-05,
"loss": 0.4374,
"step": 10650
},
{
"epoch": 2.0577,
"grad_norm": 4.732306003570557,
"learning_rate": 4.010612244897959e-05,
"loss": 0.4005,
"step": 10700
},
{
"epoch": 2.0587,
"grad_norm": 4.265634536743164,
"learning_rate": 4.005510204081633e-05,
"loss": 0.3726,
"step": 10750
},
{
"epoch": 2.0597,
"grad_norm": 4.142653942108154,
"learning_rate": 4.000408163265306e-05,
"loss": 0.3763,
"step": 10800
},
{
"epoch": 2.0607,
"grad_norm": 3.3085813522338867,
"learning_rate": 3.9953061224489795e-05,
"loss": 0.3716,
"step": 10850
},
{
"epoch": 2.0617,
"grad_norm": 4.671994686126709,
"learning_rate": 3.990204081632653e-05,
"loss": 0.3767,
"step": 10900
},
{
"epoch": 2.0627,
"grad_norm": 3.506270408630371,
"learning_rate": 3.985102040816327e-05,
"loss": 0.3868,
"step": 10950
},
{
"epoch": 2.0637,
"grad_norm": 3.7705044746398926,
"learning_rate": 3.9800000000000005e-05,
"loss": 0.4116,
"step": 11000
},
{
"epoch": 2.0637,
"eval_loss": 0.8389872908592224,
"eval_runtime": 222.6276,
"eval_samples_per_second": 4.528,
"eval_steps_per_second": 0.144,
"eval_wer": 0.48407859078590787,
"step": 11000
},
{
"epoch": 2.0647,
"grad_norm": 3.551223039627075,
"learning_rate": 3.974897959183674e-05,
"loss": 0.3901,
"step": 11050
},
{
"epoch": 2.0657,
"grad_norm": 4.152121543884277,
"learning_rate": 3.969795918367347e-05,
"loss": 0.3837,
"step": 11100
},
{
"epoch": 2.0667,
"grad_norm": 3.815230369567871,
"learning_rate": 3.964693877551021e-05,
"loss": 0.4107,
"step": 11150
},
{
"epoch": 2.0677,
"grad_norm": 3.5564286708831787,
"learning_rate": 3.959693877551021e-05,
"loss": 0.3975,
"step": 11200
},
{
"epoch": 2.0687,
"grad_norm": 3.2500646114349365,
"learning_rate": 3.954591836734694e-05,
"loss": 0.3993,
"step": 11250
},
{
"epoch": 2.0697,
"grad_norm": 3.974126100540161,
"learning_rate": 3.949489795918367e-05,
"loss": 0.3784,
"step": 11300
},
{
"epoch": 2.0707,
"grad_norm": 4.25160026550293,
"learning_rate": 3.944387755102041e-05,
"loss": 0.4069,
"step": 11350
},
{
"epoch": 2.0717,
"grad_norm": 3.363373279571533,
"learning_rate": 3.9392857142857144e-05,
"loss": 0.4105,
"step": 11400
},
{
"epoch": 2.0727,
"grad_norm": 3.961094379425049,
"learning_rate": 3.934183673469388e-05,
"loss": 0.3961,
"step": 11450
},
{
"epoch": 2.0737,
"grad_norm": 3.97780704498291,
"learning_rate": 3.9290816326530615e-05,
"loss": 0.4065,
"step": 11500
},
{
"epoch": 2.0747,
"grad_norm": 4.831082344055176,
"learning_rate": 3.923979591836735e-05,
"loss": 0.4255,
"step": 11550
},
{
"epoch": 2.0757,
"grad_norm": 3.660353183746338,
"learning_rate": 3.918877551020409e-05,
"loss": 0.4315,
"step": 11600
},
{
"epoch": 2.0767,
"grad_norm": 3.697075843811035,
"learning_rate": 3.913775510204082e-05,
"loss": 0.3948,
"step": 11650
},
{
"epoch": 2.0777,
"grad_norm": 5.073598384857178,
"learning_rate": 3.908673469387755e-05,
"loss": 0.4224,
"step": 11700
},
{
"epoch": 3.00055,
"grad_norm": 4.665965557098389,
"learning_rate": 3.9035714285714284e-05,
"loss": 0.4212,
"step": 11750
},
{
"epoch": 3.00155,
"grad_norm": 5.204474925994873,
"learning_rate": 3.898469387755102e-05,
"loss": 0.4048,
"step": 11800
},
{
"epoch": 3.00255,
"grad_norm": 3.746649742126465,
"learning_rate": 3.8933673469387755e-05,
"loss": 0.4175,
"step": 11850
},
{
"epoch": 3.00355,
"grad_norm": 3.8436436653137207,
"learning_rate": 3.8882653061224495e-05,
"loss": 0.4425,
"step": 11900
},
{
"epoch": 3.00455,
"grad_norm": 3.9119129180908203,
"learning_rate": 3.883163265306123e-05,
"loss": 0.4227,
"step": 11950
},
{
"epoch": 3.00555,
"grad_norm": 4.373188018798828,
"learning_rate": 3.878061224489796e-05,
"loss": 0.4312,
"step": 12000
},
{
"epoch": 3.00555,
"eval_loss": 0.8273130655288696,
"eval_runtime": 214.8756,
"eval_samples_per_second": 4.691,
"eval_steps_per_second": 0.149,
"eval_wer": 0.4557079945799458,
"step": 12000
}
],
"logging_steps": 50,
"max_steps": 50000,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 1000,
"total_flos": 1.8907302968866898e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}