Baselhany's picture
Training in progress, epoch 49, checkpoint
765be7e verified
{
"best_global_step": 17738,
"best_metric": 0.2120542292956086,
"best_model_checkpoint": "./distil-whisper/checkpoint-17738",
"epoch": 49.972101673899566,
"eval_steps": 500,
"global_step": 20150,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.32,
"grad_norm": 640.8583984375,
"learning_rate": 1.7000000000000003e-05,
"loss": 1277.2005,
"step": 100
},
{
"epoch": 0.64,
"grad_norm": 504.2108459472656,
"learning_rate": 3.7e-05,
"loss": 182.726,
"step": 200
},
{
"epoch": 0.96,
"grad_norm": 340.79022216796875,
"learning_rate": 5.6999999999999996e-05,
"loss": 78.8449,
"step": 300
},
{
"epoch": 1.0,
"eval_loss": 0.18918879330158234,
"eval_runtime": 179.8257,
"eval_samples_per_second": 2.78,
"eval_steps_per_second": 0.35,
"eval_wer": 0.748305334512231,
"step": 313
},
{
"epoch": 1.2784,
"grad_norm": 275.0790710449219,
"learning_rate": 7.7e-05,
"loss": 45.2321,
"step": 400
},
{
"epoch": 1.5984,
"grad_norm": 243.9811248779297,
"learning_rate": 9.7e-05,
"loss": 29.466,
"step": 500
},
{
"epoch": 1.9184,
"grad_norm": 171.8375701904297,
"learning_rate": 9.79665071770335e-05,
"loss": 23.7046,
"step": 600
},
{
"epoch": 2.0,
"eval_loss": 0.14651787281036377,
"eval_runtime": 163.0505,
"eval_samples_per_second": 3.067,
"eval_steps_per_second": 0.386,
"eval_wer": 0.4188034188034188,
"step": 626
},
{
"epoch": 2.2368,
"grad_norm": 152.78085327148438,
"learning_rate": 9.557416267942584e-05,
"loss": 15.7528,
"step": 700
},
{
"epoch": 2.5568,
"grad_norm": 110.85580444335938,
"learning_rate": 9.318181818181818e-05,
"loss": 14.4653,
"step": 800
},
{
"epoch": 2.8768000000000002,
"grad_norm": 98.29090118408203,
"learning_rate": 9.078947368421054e-05,
"loss": 13.1378,
"step": 900
},
{
"epoch": 3.0,
"eval_loss": 0.13471166789531708,
"eval_runtime": 164.1062,
"eval_samples_per_second": 3.047,
"eval_steps_per_second": 0.384,
"eval_wer": 0.36324786324786323,
"step": 939
},
{
"epoch": 3.1952,
"grad_norm": 77.09656524658203,
"learning_rate": 8.839712918660288e-05,
"loss": 9.8286,
"step": 1000
},
{
"epoch": 3.5152,
"grad_norm": 61.489463806152344,
"learning_rate": 8.600478468899522e-05,
"loss": 8.6319,
"step": 1100
},
{
"epoch": 3.8352,
"grad_norm": 83.41131591796875,
"learning_rate": 8.361244019138757e-05,
"loss": 8.2072,
"step": 1200
},
{
"epoch": 4.0,
"eval_loss": 0.13120675086975098,
"eval_runtime": 162.2387,
"eval_samples_per_second": 3.082,
"eval_steps_per_second": 0.388,
"eval_wer": 0.32847038019451813,
"step": 1252
},
{
"epoch": 4.1536,
"grad_norm": 50.904685974121094,
"learning_rate": 8.122009569377991e-05,
"loss": 6.9122,
"step": 1300
},
{
"epoch": 4.4736,
"grad_norm": 39.57589340209961,
"learning_rate": 7.882775119617225e-05,
"loss": 5.9738,
"step": 1400
},
{
"epoch": 4.7936,
"grad_norm": 74.26155090332031,
"learning_rate": 7.643540669856459e-05,
"loss": 5.8166,
"step": 1500
},
{
"epoch": 5.0,
"eval_loss": 0.13161760568618774,
"eval_runtime": 159.5413,
"eval_samples_per_second": 3.134,
"eval_steps_per_second": 0.395,
"eval_wer": 0.293692897141173,
"step": 1565
},
{
"epoch": 5.112,
"grad_norm": 37.957008361816406,
"learning_rate": 7.404306220095693e-05,
"loss": 5.4625,
"step": 1600
},
{
"epoch": 5.432,
"grad_norm": 29.977022171020508,
"learning_rate": 7.165071770334929e-05,
"loss": 4.6891,
"step": 1700
},
{
"epoch": 5.752,
"grad_norm": 31.574848175048828,
"learning_rate": 6.925837320574164e-05,
"loss": 4.5461,
"step": 1800
},
{
"epoch": 6.0,
"eval_loss": 0.1338721662759781,
"eval_runtime": 158.7379,
"eval_samples_per_second": 3.15,
"eval_steps_per_second": 0.397,
"eval_wer": 0.29162982611258476,
"step": 1878
},
{
"epoch": 6.0704,
"grad_norm": 79.52449035644531,
"learning_rate": 6.686602870813398e-05,
"loss": 4.4673,
"step": 1900
},
{
"epoch": 6.3904,
"grad_norm": 59.68710708618164,
"learning_rate": 6.447368421052632e-05,
"loss": 3.7918,
"step": 2000
},
{
"epoch": 6.7104,
"grad_norm": 31.302486419677734,
"learning_rate": 6.208133971291866e-05,
"loss": 3.8785,
"step": 2100
},
{
"epoch": 7.0,
"eval_loss": 0.12756717205047607,
"eval_runtime": 158.3831,
"eval_samples_per_second": 3.157,
"eval_steps_per_second": 0.398,
"eval_wer": 0.2838196286472148,
"step": 2191
},
{
"epoch": 7.0288,
"grad_norm": 38.00631332397461,
"learning_rate": 5.968899521531101e-05,
"loss": 3.7932,
"step": 2200
},
{
"epoch": 7.3488,
"grad_norm": 28.287948608398438,
"learning_rate": 5.729665071770335e-05,
"loss": 3.2972,
"step": 2300
},
{
"epoch": 7.6688,
"grad_norm": 32.95219802856445,
"learning_rate": 5.490430622009569e-05,
"loss": 3.2773,
"step": 2400
},
{
"epoch": 7.9888,
"grad_norm": 44.782039642333984,
"learning_rate": 5.251196172248804e-05,
"loss": 3.1975,
"step": 2500
},
{
"epoch": 8.0,
"eval_loss": 0.12529748678207397,
"eval_runtime": 159.9548,
"eval_samples_per_second": 3.126,
"eval_steps_per_second": 0.394,
"eval_wer": 0.27615679339817273,
"step": 2504
},
{
"epoch": 8.3072,
"grad_norm": 30.53591537475586,
"learning_rate": 5.011961722488039e-05,
"loss": 2.8623,
"step": 2600
},
{
"epoch": 8.6272,
"grad_norm": 53.885047912597656,
"learning_rate": 4.772727272727273e-05,
"loss": 2.9823,
"step": 2700
},
{
"epoch": 8.9472,
"grad_norm": 31.965686798095703,
"learning_rate": 4.533492822966508e-05,
"loss": 2.8784,
"step": 2800
},
{
"epoch": 9.0,
"eval_loss": 0.12403552234172821,
"eval_runtime": 160.5568,
"eval_samples_per_second": 3.114,
"eval_steps_per_second": 0.392,
"eval_wer": 0.28809313292071914,
"step": 2817
},
{
"epoch": 9.2656,
"grad_norm": 52.549591064453125,
"learning_rate": 4.294258373205742e-05,
"loss": 2.62,
"step": 2900
},
{
"epoch": 9.5856,
"grad_norm": 27.579486846923828,
"learning_rate": 4.055023923444976e-05,
"loss": 2.625,
"step": 3000
},
{
"epoch": 9.9056,
"grad_norm": 33.55034637451172,
"learning_rate": 3.815789473684211e-05,
"loss": 2.6303,
"step": 3100
},
{
"epoch": 10.0,
"eval_loss": 0.12375625222921371,
"eval_runtime": 160.2896,
"eval_samples_per_second": 3.119,
"eval_steps_per_second": 0.393,
"eval_wer": 0.2718832891246684,
"step": 3130
},
{
"epoch": 10.224,
"grad_norm": 42.10033416748047,
"learning_rate": 3.576555023923445e-05,
"loss": 2.492,
"step": 3200
},
{
"epoch": 10.544,
"grad_norm": 43.56287384033203,
"learning_rate": 3.337320574162679e-05,
"loss": 2.4219,
"step": 3300
},
{
"epoch": 10.864,
"grad_norm": 24.93031120300293,
"learning_rate": 3.098086124401914e-05,
"loss": 2.481,
"step": 3400
},
{
"epoch": 11.0,
"eval_loss": 0.12246226519346237,
"eval_runtime": 157.3827,
"eval_samples_per_second": 3.177,
"eval_steps_per_second": 0.4,
"eval_wer": 0.26702033598585323,
"step": 3443
},
{
"epoch": 11.1824,
"grad_norm": 28.191184997558594,
"learning_rate": 2.8588516746411487e-05,
"loss": 2.3101,
"step": 3500
},
{
"epoch": 11.5024,
"grad_norm": 25.59966468811035,
"learning_rate": 2.619617224880383e-05,
"loss": 2.2907,
"step": 3600
},
{
"epoch": 11.8224,
"grad_norm": 23.049842834472656,
"learning_rate": 2.380382775119617e-05,
"loss": 2.2994,
"step": 3700
},
{
"epoch": 12.0,
"eval_loss": 0.12213879823684692,
"eval_runtime": 157.5035,
"eval_samples_per_second": 3.175,
"eval_steps_per_second": 0.4,
"eval_wer": 0.26407309165929854,
"step": 3756
},
{
"epoch": 12.1408,
"grad_norm": 18.59426498413086,
"learning_rate": 2.141148325358852e-05,
"loss": 2.2326,
"step": 3800
},
{
"epoch": 12.4608,
"grad_norm": 24.557432174682617,
"learning_rate": 1.9019138755980862e-05,
"loss": 2.1734,
"step": 3900
},
{
"epoch": 12.7808,
"grad_norm": 21.113338470458984,
"learning_rate": 1.6626794258373206e-05,
"loss": 2.0863,
"step": 4000
},
{
"epoch": 13.0,
"eval_loss": 0.12139783799648285,
"eval_runtime": 157.0039,
"eval_samples_per_second": 3.185,
"eval_steps_per_second": 0.401,
"eval_wer": 0.26716769820218095,
"step": 4069
},
{
"epoch": 13.0992,
"grad_norm": 16.859729766845703,
"learning_rate": 1.423444976076555e-05,
"loss": 2.1694,
"step": 4100
},
{
"epoch": 13.4192,
"grad_norm": 38.536155700683594,
"learning_rate": 1.1842105263157895e-05,
"loss": 1.9885,
"step": 4200
},
{
"epoch": 13.7392,
"grad_norm": 20.236295700073242,
"learning_rate": 9.449760765550239e-06,
"loss": 2.0235,
"step": 4300
},
{
"epoch": 14.0,
"eval_loss": 0.12131630629301071,
"eval_runtime": 157.8964,
"eval_samples_per_second": 3.167,
"eval_steps_per_second": 0.399,
"eval_wer": 0.2637783672266431,
"step": 4382
},
{
"epoch": 14.0576,
"grad_norm": 16.027307510375977,
"learning_rate": 7.0574162679425836e-06,
"loss": 2.0284,
"step": 4400
},
{
"epoch": 14.3776,
"grad_norm": 18.118242263793945,
"learning_rate": 4.665071770334928e-06,
"loss": 1.9218,
"step": 4500
},
{
"epoch": 14.6976,
"grad_norm": 21.5025691986084,
"learning_rate": 2.2727272727272728e-06,
"loss": 2.015,
"step": 4600
},
{
"epoch": 14.9536,
"eval_loss": 0.12134864181280136,
"eval_runtime": 154.8466,
"eval_samples_per_second": 3.229,
"eval_steps_per_second": 0.407,
"eval_wer": 0.2625994694960212,
"step": 4680
},
{
"epoch": 12.533333333333333,
"grad_norm": 114.76351165771484,
"learning_rate": 4.021428571428572e-05,
"loss": 7.7843,
"step": 4700
},
{
"epoch": 12.8,
"grad_norm": 130.2476043701172,
"learning_rate": 3.8785714285714285e-05,
"loss": 7.0386,
"step": 4800
},
{
"epoch": 13.0,
"eval_loss": 0.12093591690063477,
"eval_runtime": 155.7693,
"eval_samples_per_second": 3.21,
"eval_steps_per_second": 0.404,
"eval_wer": 0.27600943118184496,
"step": 4875
},
{
"epoch": 13.066666666666666,
"grad_norm": 124.46900177001953,
"learning_rate": 3.735714285714286e-05,
"loss": 6.4105,
"step": 4900
},
{
"epoch": 13.333333333333334,
"grad_norm": 79.74747467041016,
"learning_rate": 3.5928571428571425e-05,
"loss": 5.167,
"step": 5000
},
{
"epoch": 13.6,
"grad_norm": 85.09929656982422,
"learning_rate": 3.45e-05,
"loss": 5.3239,
"step": 5100
},
{
"epoch": 13.866666666666667,
"grad_norm": 86.92524719238281,
"learning_rate": 3.307142857142858e-05,
"loss": 5.2638,
"step": 5200
},
{
"epoch": 14.0,
"eval_loss": 0.11688227951526642,
"eval_runtime": 151.5329,
"eval_samples_per_second": 3.3,
"eval_steps_per_second": 0.416,
"eval_wer": 0.2537577365163572,
"step": 5250
},
{
"epoch": 14.133333333333333,
"grad_norm": 46.18540954589844,
"learning_rate": 3.1642857142857145e-05,
"loss": 4.4287,
"step": 5300
},
{
"epoch": 14.4,
"grad_norm": 63.5553092956543,
"learning_rate": 3.021428571428572e-05,
"loss": 3.7761,
"step": 5400
},
{
"epoch": 14.666666666666666,
"grad_norm": 41.22358322143555,
"learning_rate": 2.878571428571429e-05,
"loss": 3.9576,
"step": 5500
},
{
"epoch": 14.933333333333334,
"grad_norm": 86.58721160888672,
"learning_rate": 2.735714285714286e-05,
"loss": 3.8581,
"step": 5600
},
{
"epoch": 15.0,
"eval_loss": 0.11802562326192856,
"eval_runtime": 151.7648,
"eval_samples_per_second": 3.295,
"eval_steps_per_second": 0.415,
"eval_wer": 0.23740053050397877,
"step": 5625
},
{
"epoch": 15.2,
"grad_norm": 30.3751220703125,
"learning_rate": 2.592857142857143e-05,
"loss": 3.5142,
"step": 5700
},
{
"epoch": 15.466666666666667,
"grad_norm": 55.38408279418945,
"learning_rate": 2.45e-05,
"loss": 3.2256,
"step": 5800
},
{
"epoch": 15.733333333333333,
"grad_norm": 34.806907653808594,
"learning_rate": 2.3071428571428573e-05,
"loss": 3.1364,
"step": 5900
},
{
"epoch": 16.0,
"grad_norm": 31.722949981689453,
"learning_rate": 2.1642857142857146e-05,
"loss": 3.4661,
"step": 6000
},
{
"epoch": 16.0,
"eval_loss": 0.11758579313755035,
"eval_runtime": 151.2711,
"eval_samples_per_second": 3.305,
"eval_steps_per_second": 0.416,
"eval_wer": 0.24078986147951664,
"step": 6000
},
{
"epoch": 16.266666666666666,
"grad_norm": 38.61819076538086,
"learning_rate": 2.0214285714285716e-05,
"loss": 2.9965,
"step": 6100
},
{
"epoch": 16.533333333333335,
"grad_norm": 53.0886116027832,
"learning_rate": 1.8785714285714286e-05,
"loss": 2.7895,
"step": 6200
},
{
"epoch": 16.8,
"grad_norm": 31.704025268554688,
"learning_rate": 1.7357142857142856e-05,
"loss": 2.8903,
"step": 6300
},
{
"epoch": 17.0,
"eval_loss": 0.11670505255460739,
"eval_runtime": 146.7611,
"eval_samples_per_second": 3.407,
"eval_steps_per_second": 0.429,
"eval_wer": 0.23592690834070146,
"step": 6375
},
{
"epoch": 17.066666666666666,
"grad_norm": 25.057514190673828,
"learning_rate": 1.592857142857143e-05,
"loss": 2.8696,
"step": 6400
},
{
"epoch": 17.333333333333332,
"grad_norm": 27.10036277770996,
"learning_rate": 1.45e-05,
"loss": 2.8056,
"step": 6500
},
{
"epoch": 17.6,
"grad_norm": 39.833030700683594,
"learning_rate": 1.3071428571428574e-05,
"loss": 2.6236,
"step": 6600
},
{
"epoch": 17.866666666666667,
"grad_norm": 21.081867218017578,
"learning_rate": 1.1642857142857144e-05,
"loss": 2.6081,
"step": 6700
},
{
"epoch": 18.0,
"eval_loss": 0.11724027991294861,
"eval_runtime": 146.5669,
"eval_samples_per_second": 3.411,
"eval_steps_per_second": 0.43,
"eval_wer": 0.2357795461243737,
"step": 6750
},
{
"epoch": 18.133333333333333,
"grad_norm": 51.97953414916992,
"learning_rate": 1.0214285714285715e-05,
"loss": 2.5349,
"step": 6800
},
{
"epoch": 18.4,
"grad_norm": 29.391460418701172,
"learning_rate": 8.785714285714286e-06,
"loss": 2.4141,
"step": 6900
},
{
"epoch": 18.666666666666668,
"grad_norm": 33.09728240966797,
"learning_rate": 7.3571428571428565e-06,
"loss": 2.5216,
"step": 7000
},
{
"epoch": 18.933333333333334,
"grad_norm": 52.41236114501953,
"learning_rate": 5.928571428571429e-06,
"loss": 2.6719,
"step": 7100
},
{
"epoch": 19.0,
"eval_loss": 0.11649636179208755,
"eval_runtime": 150.9234,
"eval_samples_per_second": 3.313,
"eval_steps_per_second": 0.417,
"eval_wer": 0.24005305039787797,
"step": 7125
},
{
"epoch": 19.2,
"grad_norm": 37.658836364746094,
"learning_rate": 4.5e-06,
"loss": 2.3516,
"step": 7200
},
{
"epoch": 19.466666666666665,
"grad_norm": 36.73357009887695,
"learning_rate": 3.0714285714285715e-06,
"loss": 2.3761,
"step": 7300
},
{
"epoch": 19.733333333333334,
"grad_norm": 30.50603675842285,
"learning_rate": 1.6428571428571429e-06,
"loss": 2.4319,
"step": 7400
},
{
"epoch": 20.0,
"grad_norm": 24.163257598876953,
"learning_rate": 2.142857142857143e-07,
"loss": 2.4235,
"step": 7500
},
{
"epoch": 20.0,
"eval_loss": 0.11603204160928726,
"eval_runtime": 149.7675,
"eval_samples_per_second": 3.339,
"eval_steps_per_second": 0.421,
"eval_wer": 0.24300029472443266,
"step": 7500
},
{
"epoch": 20.266666666666666,
"grad_norm": 152.1876983642578,
"learning_rate": 5.1137931034482754e-05,
"loss": 5.1222,
"step": 7600
},
{
"epoch": 20.533333333333335,
"grad_norm": 222.44189453125,
"learning_rate": 5.044827586206897e-05,
"loss": 5.2045,
"step": 7700
},
{
"epoch": 20.8,
"grad_norm": 150.16041564941406,
"learning_rate": 4.975862068965517e-05,
"loss": 4.9497,
"step": 7800
},
{
"epoch": 21.0,
"eval_loss": 0.11334758251905441,
"eval_runtime": 151.8894,
"eval_samples_per_second": 3.292,
"eval_steps_per_second": 0.415,
"eval_wer": 0.23607427055702918,
"step": 7875
},
{
"epoch": 21.066666666666666,
"grad_norm": 46.202308654785156,
"learning_rate": 4.9068965517241386e-05,
"loss": 4.2033,
"step": 7900
},
{
"epoch": 21.333333333333332,
"grad_norm": 38.9134635925293,
"learning_rate": 4.837931034482759e-05,
"loss": 3.784,
"step": 8000
},
{
"epoch": 21.6,
"grad_norm": 53.533203125,
"learning_rate": 4.7689655172413796e-05,
"loss": 3.5497,
"step": 8100
},
{
"epoch": 21.866666666666667,
"grad_norm": 51.86565399169922,
"learning_rate": 4.7e-05,
"loss": 3.6345,
"step": 8200
},
{
"epoch": 22.0,
"eval_loss": 0.11361408233642578,
"eval_runtime": 148.5623,
"eval_samples_per_second": 3.366,
"eval_steps_per_second": 0.424,
"eval_wer": 0.22737989979369289,
"step": 8250
},
{
"epoch": 22.133333333333333,
"grad_norm": 77.92996215820312,
"learning_rate": 4.631034482758621e-05,
"loss": 3.2194,
"step": 8300
},
{
"epoch": 22.4,
"grad_norm": 46.25454330444336,
"learning_rate": 4.5620689655172414e-05,
"loss": 2.8227,
"step": 8400
},
{
"epoch": 22.666666666666668,
"grad_norm": 32.176517486572266,
"learning_rate": 4.493103448275862e-05,
"loss": 3.0837,
"step": 8500
},
{
"epoch": 22.933333333333334,
"grad_norm": 42.87643051147461,
"learning_rate": 4.4241379310344824e-05,
"loss": 3.092,
"step": 8600
},
{
"epoch": 23.0,
"eval_loss": 0.11234613507986069,
"eval_runtime": 148.2332,
"eval_samples_per_second": 3.373,
"eval_steps_per_second": 0.425,
"eval_wer": 0.2304745063365753,
"step": 8625
},
{
"epoch": 23.2,
"grad_norm": 64.79329681396484,
"learning_rate": 4.355172413793104e-05,
"loss": 2.7315,
"step": 8700
},
{
"epoch": 23.466666666666665,
"grad_norm": 37.61339569091797,
"learning_rate": 4.286206896551724e-05,
"loss": 2.713,
"step": 8800
},
{
"epoch": 23.733333333333334,
"grad_norm": 27.409000396728516,
"learning_rate": 4.217241379310345e-05,
"loss": 2.5189,
"step": 8900
},
{
"epoch": 24.0,
"grad_norm": 39.62417221069336,
"learning_rate": 4.148275862068966e-05,
"loss": 2.606,
"step": 9000
},
{
"epoch": 24.0,
"eval_loss": 0.1097910925745964,
"eval_runtime": 146.0641,
"eval_samples_per_second": 3.423,
"eval_steps_per_second": 0.431,
"eval_wer": 0.2282640730916593,
"step": 9000
},
{
"epoch": 24.266666666666666,
"grad_norm": 21.3342227935791,
"learning_rate": 4.0793103448275866e-05,
"loss": 2.3604,
"step": 9100
},
{
"epoch": 24.533333333333335,
"grad_norm": 61.0772705078125,
"learning_rate": 4.0103448275862074e-05,
"loss": 2.3015,
"step": 9200
},
{
"epoch": 24.8,
"grad_norm": 47.21693420410156,
"learning_rate": 3.9413793103448276e-05,
"loss": 2.4858,
"step": 9300
},
{
"epoch": 25.0,
"eval_loss": 0.11028488725423813,
"eval_runtime": 146.4467,
"eval_samples_per_second": 3.414,
"eval_steps_per_second": 0.43,
"eval_wer": 0.22531682876510462,
"step": 9375
},
{
"epoch": 25.066666666666666,
"grad_norm": 58.333526611328125,
"learning_rate": 3.8724137931034484e-05,
"loss": 2.215,
"step": 9400
},
{
"epoch": 25.333333333333332,
"grad_norm": 33.136444091796875,
"learning_rate": 3.803448275862069e-05,
"loss": 2.146,
"step": 9500
},
{
"epoch": 25.6,
"grad_norm": 25.031856536865234,
"learning_rate": 3.73448275862069e-05,
"loss": 2.1088,
"step": 9600
},
{
"epoch": 25.866666666666667,
"grad_norm": 27.74683952331543,
"learning_rate": 3.66551724137931e-05,
"loss": 2.1898,
"step": 9700
},
{
"epoch": 26.0,
"eval_loss": 0.11085129529237747,
"eval_runtime": 147.8522,
"eval_samples_per_second": 3.382,
"eval_steps_per_second": 0.426,
"eval_wer": 0.2326849395814913,
"step": 9750
},
{
"epoch": 26.133333333333333,
"grad_norm": 26.178314208984375,
"learning_rate": 3.596551724137931e-05,
"loss": 2.1311,
"step": 9800
},
{
"epoch": 26.4,
"grad_norm": 28.952735900878906,
"learning_rate": 3.527586206896552e-05,
"loss": 1.9449,
"step": 9900
},
{
"epoch": 26.666666666666668,
"grad_norm": 31.816133499145508,
"learning_rate": 3.458620689655173e-05,
"loss": 1.9057,
"step": 10000
},
{
"epoch": 26.933333333333334,
"grad_norm": 25.91592025756836,
"learning_rate": 3.389655172413793e-05,
"loss": 2.1861,
"step": 10100
},
{
"epoch": 27.0,
"eval_loss": 0.10877023637294769,
"eval_runtime": 146.9281,
"eval_samples_per_second": 3.403,
"eval_steps_per_second": 0.429,
"eval_wer": 0.23106395520188625,
"step": 10125
},
{
"epoch": 27.2,
"grad_norm": 24.183246612548828,
"learning_rate": 3.320689655172414e-05,
"loss": 1.9927,
"step": 10200
},
{
"epoch": 27.466666666666665,
"grad_norm": 27.9620361328125,
"learning_rate": 3.2517241379310346e-05,
"loss": 1.785,
"step": 10300
},
{
"epoch": 27.733333333333334,
"grad_norm": 41.2671012878418,
"learning_rate": 3.1827586206896554e-05,
"loss": 1.8756,
"step": 10400
},
{
"epoch": 28.0,
"grad_norm": 26.784330368041992,
"learning_rate": 3.113793103448276e-05,
"loss": 1.8994,
"step": 10500
},
{
"epoch": 28.0,
"eval_loss": 0.10841843485832214,
"eval_runtime": 143.9314,
"eval_samples_per_second": 3.474,
"eval_steps_per_second": 0.438,
"eval_wer": 0.2260536398467433,
"step": 10500
},
{
"epoch": 28.266666666666666,
"grad_norm": 31.477703094482422,
"learning_rate": 3.0448275862068964e-05,
"loss": 1.7406,
"step": 10600
},
{
"epoch": 28.533333333333335,
"grad_norm": 26.99530029296875,
"learning_rate": 2.9758620689655176e-05,
"loss": 1.7979,
"step": 10700
},
{
"epoch": 28.8,
"grad_norm": 24.11530876159668,
"learning_rate": 2.906896551724138e-05,
"loss": 1.8208,
"step": 10800
},
{
"epoch": 29.0,
"eval_loss": 0.10783620923757553,
"eval_runtime": 144.2312,
"eval_samples_per_second": 3.467,
"eval_steps_per_second": 0.437,
"eval_wer": 0.22664308871205424,
"step": 10875
},
{
"epoch": 29.066666666666666,
"grad_norm": 17.79966163635254,
"learning_rate": 2.8379310344827586e-05,
"loss": 1.7014,
"step": 10900
},
{
"epoch": 29.333333333333332,
"grad_norm": 28.017330169677734,
"learning_rate": 2.768965517241379e-05,
"loss": 1.6895,
"step": 11000
},
{
"epoch": 29.6,
"grad_norm": 24.27259635925293,
"learning_rate": 2.7000000000000002e-05,
"loss": 1.7268,
"step": 11100
},
{
"epoch": 29.866666666666667,
"grad_norm": 32.436275482177734,
"learning_rate": 2.6310344827586207e-05,
"loss": 1.706,
"step": 11200
},
{
"epoch": 30.0,
"eval_loss": 0.10766720026731491,
"eval_runtime": 143.3181,
"eval_samples_per_second": 3.489,
"eval_steps_per_second": 0.44,
"eval_wer": 0.2287061597406425,
"step": 11250
},
{
"epoch": 30.133333333333333,
"grad_norm": 40.39571762084961,
"learning_rate": 2.5620689655172416e-05,
"loss": 1.618,
"step": 11300
},
{
"epoch": 30.4,
"grad_norm": 32.211952209472656,
"learning_rate": 2.493103448275862e-05,
"loss": 1.6147,
"step": 11400
},
{
"epoch": 30.666666666666668,
"grad_norm": 24.827373504638672,
"learning_rate": 2.424137931034483e-05,
"loss": 1.6014,
"step": 11500
},
{
"epoch": 30.933333333333334,
"grad_norm": 21.28459930419922,
"learning_rate": 2.3551724137931037e-05,
"loss": 1.5895,
"step": 11600
},
{
"epoch": 31.0,
"eval_loss": 0.10668068379163742,
"eval_runtime": 145.3149,
"eval_samples_per_second": 3.441,
"eval_steps_per_second": 0.434,
"eval_wer": 0.22325375773651635,
"step": 11625
},
{
"epoch": 31.2,
"grad_norm": 18.275196075439453,
"learning_rate": 2.2862068965517242e-05,
"loss": 1.6111,
"step": 11700
},
{
"epoch": 31.466666666666665,
"grad_norm": 18.976835250854492,
"learning_rate": 2.217241379310345e-05,
"loss": 1.5058,
"step": 11800
},
{
"epoch": 31.733333333333334,
"grad_norm": 23.13480567932129,
"learning_rate": 2.1482758620689656e-05,
"loss": 1.5864,
"step": 11900
},
{
"epoch": 32.0,
"grad_norm": 18.37510871887207,
"learning_rate": 2.0793103448275864e-05,
"loss": 1.5086,
"step": 12000
},
{
"epoch": 32.0,
"eval_loss": 0.10681495070457458,
"eval_runtime": 146.0166,
"eval_samples_per_second": 3.424,
"eval_steps_per_second": 0.431,
"eval_wer": 0.22988505747126436,
"step": 12000
},
{
"epoch": 32.266666666666666,
"grad_norm": 29.316116333007812,
"learning_rate": 2.010344827586207e-05,
"loss": 1.4952,
"step": 12100
},
{
"epoch": 32.53333333333333,
"grad_norm": 14.011812210083008,
"learning_rate": 1.9413793103448277e-05,
"loss": 1.5013,
"step": 12200
},
{
"epoch": 32.8,
"grad_norm": 26.72800636291504,
"learning_rate": 1.8724137931034482e-05,
"loss": 1.4744,
"step": 12300
},
{
"epoch": 33.0,
"eval_loss": 0.10653579980134964,
"eval_runtime": 144.204,
"eval_samples_per_second": 3.467,
"eval_steps_per_second": 0.437,
"eval_wer": 0.22679045092838196,
"step": 12375
},
{
"epoch": 33.06666666666667,
"grad_norm": 15.76944637298584,
"learning_rate": 1.803448275862069e-05,
"loss": 1.4733,
"step": 12400
},
{
"epoch": 33.333333333333336,
"grad_norm": 12.833415031433105,
"learning_rate": 1.7344827586206896e-05,
"loss": 1.4352,
"step": 12500
},
{
"epoch": 33.6,
"grad_norm": 23.16147232055664,
"learning_rate": 1.6655172413793104e-05,
"loss": 1.4169,
"step": 12600
},
{
"epoch": 33.86666666666667,
"grad_norm": 23.133747100830078,
"learning_rate": 1.596551724137931e-05,
"loss": 1.4184,
"step": 12700
},
{
"epoch": 34.0,
"eval_loss": 0.10563693195581436,
"eval_runtime": 144.3614,
"eval_samples_per_second": 3.464,
"eval_steps_per_second": 0.436,
"eval_wer": 0.22664308871205424,
"step": 12750
},
{
"epoch": 34.13333333333333,
"grad_norm": 16.677919387817383,
"learning_rate": 1.5275862068965517e-05,
"loss": 1.3784,
"step": 12800
},
{
"epoch": 34.4,
"grad_norm": 14.670028686523438,
"learning_rate": 1.4586206896551724e-05,
"loss": 1.402,
"step": 12900
},
{
"epoch": 34.666666666666664,
"grad_norm": 23.33100128173828,
"learning_rate": 1.3896551724137932e-05,
"loss": 1.361,
"step": 13000
},
{
"epoch": 34.93333333333333,
"grad_norm": 15.100184440612793,
"learning_rate": 1.3206896551724137e-05,
"loss": 1.4134,
"step": 13100
},
{
"epoch": 35.0,
"eval_loss": 0.10637735575437546,
"eval_runtime": 148.319,
"eval_samples_per_second": 3.371,
"eval_steps_per_second": 0.425,
"eval_wer": 0.23312702623047452,
"step": 13125
},
{
"epoch": 35.2,
"grad_norm": 12.271963119506836,
"learning_rate": 1.2517241379310346e-05,
"loss": 1.3373,
"step": 13200
},
{
"epoch": 35.46666666666667,
"grad_norm": 20.943439483642578,
"learning_rate": 1.1827586206896552e-05,
"loss": 1.3615,
"step": 13300
},
{
"epoch": 35.733333333333334,
"grad_norm": 15.165372848510742,
"learning_rate": 1.1137931034482759e-05,
"loss": 1.3727,
"step": 13400
},
{
"epoch": 36.0,
"grad_norm": 13.487517356872559,
"learning_rate": 1.0448275862068967e-05,
"loss": 1.3246,
"step": 13500
},
{
"epoch": 36.0,
"eval_loss": 0.10543886572122574,
"eval_runtime": 145.8253,
"eval_samples_per_second": 3.429,
"eval_steps_per_second": 0.432,
"eval_wer": 0.22634836427939875,
"step": 13500
},
{
"epoch": 36.266666666666666,
"grad_norm": 19.09083366394043,
"learning_rate": 9.758620689655174e-06,
"loss": 1.2625,
"step": 13600
},
{
"epoch": 36.53333333333333,
"grad_norm": 15.21319580078125,
"learning_rate": 9.06896551724138e-06,
"loss": 1.3249,
"step": 13700
},
{
"epoch": 36.8,
"grad_norm": 15.249282836914062,
"learning_rate": 8.379310344827587e-06,
"loss": 1.3368,
"step": 13800
},
{
"epoch": 37.0,
"eval_loss": 0.10566218197345734,
"eval_runtime": 144.146,
"eval_samples_per_second": 3.469,
"eval_steps_per_second": 0.437,
"eval_wer": 0.23165340406719717,
"step": 13875
},
{
"epoch": 37.06666666666667,
"grad_norm": 15.04231071472168,
"learning_rate": 7.689655172413794e-06,
"loss": 1.3301,
"step": 13900
},
{
"epoch": 37.333333333333336,
"grad_norm": 10.739683151245117,
"learning_rate": 7.000000000000001e-06,
"loss": 1.3041,
"step": 14000
},
{
"epoch": 37.6,
"grad_norm": 14.751246452331543,
"learning_rate": 6.310344827586208e-06,
"loss": 1.2658,
"step": 14100
},
{
"epoch": 37.86666666666667,
"grad_norm": 12.384917259216309,
"learning_rate": 5.620689655172414e-06,
"loss": 1.3084,
"step": 14200
},
{
"epoch": 38.0,
"eval_loss": 0.10525722056627274,
"eval_runtime": 148.2408,
"eval_samples_per_second": 3.373,
"eval_steps_per_second": 0.425,
"eval_wer": 0.24123194812849985,
"step": 14250
},
{
"epoch": 38.13333333333333,
"grad_norm": 11.925743103027344,
"learning_rate": 4.931034482758621e-06,
"loss": 1.2151,
"step": 14300
},
{
"epoch": 38.4,
"grad_norm": 10.217673301696777,
"learning_rate": 4.241379310344827e-06,
"loss": 1.2426,
"step": 14400
},
{
"epoch": 38.666666666666664,
"grad_norm": 10.630120277404785,
"learning_rate": 3.5517241379310345e-06,
"loss": 1.2779,
"step": 14500
},
{
"epoch": 38.93333333333333,
"grad_norm": 19.7459774017334,
"learning_rate": 2.8620689655172416e-06,
"loss": 1.302,
"step": 14600
},
{
"epoch": 39.0,
"eval_loss": 0.10544682294130325,
"eval_runtime": 148.7257,
"eval_samples_per_second": 3.362,
"eval_steps_per_second": 0.424,
"eval_wer": 0.2309165929855585,
"step": 14625
},
{
"epoch": 39.2,
"grad_norm": 14.623858451843262,
"learning_rate": 2.1724137931034482e-06,
"loss": 1.2663,
"step": 14700
},
{
"epoch": 39.46666666666667,
"grad_norm": 11.66265869140625,
"learning_rate": 1.4827586206896553e-06,
"loss": 1.2647,
"step": 14800
},
{
"epoch": 39.733333333333334,
"grad_norm": 12.55521297454834,
"learning_rate": 7.931034482758622e-07,
"loss": 1.2334,
"step": 14900
},
{
"epoch": 40.0,
"grad_norm": 9.761940956115723,
"learning_rate": 1.0344827586206898e-07,
"loss": 1.2152,
"step": 15000
},
{
"epoch": 40.0,
"eval_loss": 0.1053401455283165,
"eval_runtime": 145.6503,
"eval_samples_per_second": 3.433,
"eval_steps_per_second": 0.433,
"eval_wer": 0.22973769525493665,
"step": 15000
},
{
"epoch": 37.46869187848729,
"grad_norm": 39.86048126220703,
"learning_rate": 2.577608142493639e-05,
"loss": 3.5543,
"step": 15100
},
{
"epoch": 37.716676999380034,
"grad_norm": 53.614952087402344,
"learning_rate": 2.5267175572519086e-05,
"loss": 3.9955,
"step": 15200
},
{
"epoch": 37.964662120272784,
"grad_norm": 33.56686019897461,
"learning_rate": 2.4758269720101782e-05,
"loss": 3.6933,
"step": 15300
},
{
"epoch": 37.99938003719777,
"eval_loss": 0.10439449548721313,
"eval_runtime": 152.7963,
"eval_samples_per_second": 3.272,
"eval_steps_per_second": 0.412,
"eval_wer": 0.21220159151193635,
"step": 15314
},
{
"epoch": 38.21326720396776,
"grad_norm": 44.14704132080078,
"learning_rate": 2.424936386768448e-05,
"loss": 3.1742,
"step": 15400
},
{
"epoch": 38.46125232486051,
"grad_norm": 65.31890869140625,
"learning_rate": 2.374045801526718e-05,
"loss": 2.9259,
"step": 15500
},
{
"epoch": 38.70923744575325,
"grad_norm": 38.38364028930664,
"learning_rate": 2.3231552162849872e-05,
"loss": 2.9688,
"step": 15600
},
{
"epoch": 38.957222566646,
"grad_norm": 101.05147552490234,
"learning_rate": 2.272264631043257e-05,
"loss": 2.9938,
"step": 15700
},
{
"epoch": 39.0,
"eval_loss": 0.10509275645017624,
"eval_runtime": 149.9151,
"eval_samples_per_second": 3.335,
"eval_steps_per_second": 0.42,
"eval_wer": 0.21927497789566755,
"step": 15718
},
{
"epoch": 39.203347799132054,
"grad_norm": 40.039276123046875,
"learning_rate": 2.2213740458015268e-05,
"loss": 2.3839,
"step": 15800
},
{
"epoch": 39.4513329200248,
"grad_norm": 71.81354522705078,
"learning_rate": 2.1704834605597965e-05,
"loss": 2.6085,
"step": 15900
},
{
"epoch": 39.69931804091755,
"grad_norm": 98.01214599609375,
"learning_rate": 2.1195928753180665e-05,
"loss": 2.3986,
"step": 16000
},
{
"epoch": 39.94730316181029,
"grad_norm": 54.38132858276367,
"learning_rate": 2.0687022900763358e-05,
"loss": 2.5582,
"step": 16100
},
{
"epoch": 40.0,
"eval_loss": 0.10408324003219604,
"eval_runtime": 154.5124,
"eval_samples_per_second": 3.236,
"eval_steps_per_second": 0.408,
"eval_wer": 0.22015915119363394,
"step": 16122
},
{
"epoch": 40.19342839429634,
"grad_norm": 37.06816101074219,
"learning_rate": 2.0178117048346058e-05,
"loss": 2.2341,
"step": 16200
},
{
"epoch": 40.44141351518909,
"grad_norm": 27.490217208862305,
"learning_rate": 1.9669211195928754e-05,
"loss": 2.2284,
"step": 16300
},
{
"epoch": 40.68939863608183,
"grad_norm": 32.29278564453125,
"learning_rate": 1.916030534351145e-05,
"loss": 2.3084,
"step": 16400
},
{
"epoch": 40.93738375697458,
"grad_norm": 30.68113136291504,
"learning_rate": 1.8651399491094148e-05,
"loss": 2.1949,
"step": 16500
},
{
"epoch": 41.0,
"eval_loss": 0.10319098085165024,
"eval_runtime": 146.5477,
"eval_samples_per_second": 3.412,
"eval_steps_per_second": 0.43,
"eval_wer": 0.21367521367521367,
"step": 16526
},
{
"epoch": 41.183508989460634,
"grad_norm": 31.351858139038086,
"learning_rate": 1.8142493638676847e-05,
"loss": 1.856,
"step": 16600
},
{
"epoch": 41.43149411035338,
"grad_norm": 24.235687255859375,
"learning_rate": 1.763358778625954e-05,
"loss": 1.998,
"step": 16700
},
{
"epoch": 41.67947923124613,
"grad_norm": 27.65749740600586,
"learning_rate": 1.712468193384224e-05,
"loss": 2.0332,
"step": 16800
},
{
"epoch": 41.92746435213887,
"grad_norm": 23.039844512939453,
"learning_rate": 1.6615776081424937e-05,
"loss": 2.1428,
"step": 16900
},
{
"epoch": 42.0,
"eval_loss": 0.10452839732170105,
"eval_runtime": 147.7082,
"eval_samples_per_second": 3.385,
"eval_steps_per_second": 0.427,
"eval_wer": 0.21455938697318008,
"step": 16930
},
{
"epoch": 42.17358958462492,
"grad_norm": 30.967439651489258,
"learning_rate": 1.6106870229007634e-05,
"loss": 1.9583,
"step": 17000
},
{
"epoch": 42.42157470551767,
"grad_norm": 22.957183837890625,
"learning_rate": 1.5597964376590334e-05,
"loss": 1.9154,
"step": 17100
},
{
"epoch": 42.66955982641041,
"grad_norm": 26.501638412475586,
"learning_rate": 1.5089058524173027e-05,
"loss": 1.8082,
"step": 17200
},
{
"epoch": 42.91754494730316,
"grad_norm": 21.166030883789062,
"learning_rate": 1.4580152671755725e-05,
"loss": 2.0052,
"step": 17300
},
{
"epoch": 43.0,
"eval_loss": 0.10267385095357895,
"eval_runtime": 148.3211,
"eval_samples_per_second": 3.371,
"eval_steps_per_second": 0.425,
"eval_wer": 0.21455938697318008,
"step": 17334
},
{
"epoch": 43.163670179789214,
"grad_norm": 26.231456756591797,
"learning_rate": 1.4071246819338423e-05,
"loss": 1.7369,
"step": 17400
},
{
"epoch": 43.41165530068196,
"grad_norm": 21.52410316467285,
"learning_rate": 1.356234096692112e-05,
"loss": 1.9099,
"step": 17500
},
{
"epoch": 43.65964042157471,
"grad_norm": 23.501480102539062,
"learning_rate": 1.3053435114503818e-05,
"loss": 1.7666,
"step": 17600
},
{
"epoch": 43.90762554246745,
"grad_norm": 23.56607437133789,
"learning_rate": 1.2544529262086516e-05,
"loss": 1.7204,
"step": 17700
},
{
"epoch": 44.0,
"eval_loss": 0.10307622700929642,
"eval_runtime": 145.6608,
"eval_samples_per_second": 3.433,
"eval_steps_per_second": 0.433,
"eval_wer": 0.2120542292956086,
"step": 17738
},
{
"epoch": 44.1537507749535,
"grad_norm": 26.66972541809082,
"learning_rate": 1.2035623409669211e-05,
"loss": 1.8082,
"step": 17800
},
{
"epoch": 44.40173589584625,
"grad_norm": 28.958173751831055,
"learning_rate": 1.152671755725191e-05,
"loss": 1.6666,
"step": 17900
},
{
"epoch": 44.649721016738994,
"grad_norm": 27.299480438232422,
"learning_rate": 1.1017811704834606e-05,
"loss": 1.6912,
"step": 18000
},
{
"epoch": 44.897706137631744,
"grad_norm": 23.34587860107422,
"learning_rate": 1.0508905852417303e-05,
"loss": 1.7391,
"step": 18100
},
{
"epoch": 45.0,
"eval_loss": 0.10261169075965881,
"eval_runtime": 148.8902,
"eval_samples_per_second": 3.358,
"eval_steps_per_second": 0.423,
"eval_wer": 0.2124963159445918,
"step": 18142
},
{
"epoch": 45.143831370117795,
"grad_norm": 23.987537384033203,
"learning_rate": 1e-05,
"loss": 1.6374,
"step": 18200
},
{
"epoch": 45.39181649101054,
"grad_norm": 20.715587615966797,
"learning_rate": 9.491094147582697e-06,
"loss": 1.6636,
"step": 18300
},
{
"epoch": 45.63980161190329,
"grad_norm": 16.032875061035156,
"learning_rate": 8.982188295165394e-06,
"loss": 1.6574,
"step": 18400
},
{
"epoch": 45.88778673279603,
"grad_norm": 22.555355072021484,
"learning_rate": 8.473282442748092e-06,
"loss": 1.6544,
"step": 18500
},
{
"epoch": 46.0,
"eval_loss": 0.10276778787374496,
"eval_runtime": 148.3048,
"eval_samples_per_second": 3.371,
"eval_steps_per_second": 0.425,
"eval_wer": 0.21396993810786913,
"step": 18546
},
{
"epoch": 46.13391196528208,
"grad_norm": 21.823448181152344,
"learning_rate": 7.96437659033079e-06,
"loss": 1.5898,
"step": 18600
},
{
"epoch": 46.38189708617483,
"grad_norm": 19.662864685058594,
"learning_rate": 7.455470737913486e-06,
"loss": 1.6271,
"step": 18700
},
{
"epoch": 46.629882207067574,
"grad_norm": 26.988121032714844,
"learning_rate": 6.9465648854961835e-06,
"loss": 1.5568,
"step": 18800
},
{
"epoch": 46.877867327960324,
"grad_norm": 56.02478790283203,
"learning_rate": 6.437659033078881e-06,
"loss": 1.6764,
"step": 18900
},
{
"epoch": 47.0,
"eval_loss": 0.10327400267124176,
"eval_runtime": 147.8139,
"eval_samples_per_second": 3.383,
"eval_steps_per_second": 0.426,
"eval_wer": 0.2120542292956086,
"step": 18950
},
{
"epoch": 47.123992560446375,
"grad_norm": 30.128440856933594,
"learning_rate": 5.928753180661578e-06,
"loss": 1.5139,
"step": 19000
},
{
"epoch": 47.37197768133912,
"grad_norm": 13.33277702331543,
"learning_rate": 5.419847328244275e-06,
"loss": 1.6313,
"step": 19100
},
{
"epoch": 47.61996280223187,
"grad_norm": 22.089269638061523,
"learning_rate": 4.910941475826972e-06,
"loss": 1.5804,
"step": 19200
},
{
"epoch": 47.86794792312461,
"grad_norm": 16.610151290893555,
"learning_rate": 4.402035623409669e-06,
"loss": 1.535,
"step": 19300
},
{
"epoch": 48.0,
"eval_loss": 0.1028217151761055,
"eval_runtime": 146.2041,
"eval_samples_per_second": 3.42,
"eval_steps_per_second": 0.431,
"eval_wer": 0.21220159151193635,
"step": 19354
},
{
"epoch": 48.11407315561066,
"grad_norm": 28.443222045898438,
"learning_rate": 3.893129770992367e-06,
"loss": 1.5263,
"step": 19400
},
{
"epoch": 48.36205827650341,
"grad_norm": 11.213704109191895,
"learning_rate": 3.3842239185750637e-06,
"loss": 1.5283,
"step": 19500
},
{
"epoch": 48.610043397396154,
"grad_norm": 15.313924789428711,
"learning_rate": 2.875318066157761e-06,
"loss": 1.4908,
"step": 19600
},
{
"epoch": 48.858028518288904,
"grad_norm": 17.469341278076172,
"learning_rate": 2.366412213740458e-06,
"loss": 1.5344,
"step": 19700
},
{
"epoch": 49.0,
"eval_loss": 0.1024635061621666,
"eval_runtime": 149.6385,
"eval_samples_per_second": 3.341,
"eval_steps_per_second": 0.421,
"eval_wer": 0.2163277335691129,
"step": 19758
},
{
"epoch": 49.104153750774955,
"grad_norm": 19.84515380859375,
"learning_rate": 1.8575063613231552e-06,
"loss": 1.4737,
"step": 19800
},
{
"epoch": 49.3521388716677,
"grad_norm": 14.313343048095703,
"learning_rate": 1.3486005089058526e-06,
"loss": 1.5536,
"step": 19900
},
{
"epoch": 49.60012399256045,
"grad_norm": 13.140170097351074,
"learning_rate": 8.396946564885497e-07,
"loss": 1.4557,
"step": 20000
},
{
"epoch": 49.84810911345319,
"grad_norm": 17.50598907470703,
"learning_rate": 3.3078880407124687e-07,
"loss": 1.5171,
"step": 20100
},
{
"epoch": 49.972101673899566,
"eval_loss": 0.10252001881599426,
"eval_runtime": 149.0596,
"eval_samples_per_second": 3.354,
"eval_steps_per_second": 0.423,
"eval_wer": 0.2120542292956086,
"step": 20150
}
],
"logging_steps": 100,
"max_steps": 20150,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.61896007057408e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}