| { |
| "best_global_step": 17738, |
| "best_metric": 0.2120542292956086, |
| "best_model_checkpoint": "./distil-whisper/checkpoint-17738", |
| "epoch": 49.972101673899566, |
| "eval_steps": 500, |
| "global_step": 20150, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.32, |
| "grad_norm": 640.8583984375, |
| "learning_rate": 1.7000000000000003e-05, |
| "loss": 1277.2005, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 504.2108459472656, |
| "learning_rate": 3.7e-05, |
| "loss": 182.726, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 340.79022216796875, |
| "learning_rate": 5.6999999999999996e-05, |
| "loss": 78.8449, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.18918879330158234, |
| "eval_runtime": 179.8257, |
| "eval_samples_per_second": 2.78, |
| "eval_steps_per_second": 0.35, |
| "eval_wer": 0.748305334512231, |
| "step": 313 |
| }, |
| { |
| "epoch": 1.2784, |
| "grad_norm": 275.0790710449219, |
| "learning_rate": 7.7e-05, |
| "loss": 45.2321, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.5984, |
| "grad_norm": 243.9811248779297, |
| "learning_rate": 9.7e-05, |
| "loss": 29.466, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.9184, |
| "grad_norm": 171.8375701904297, |
| "learning_rate": 9.79665071770335e-05, |
| "loss": 23.7046, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.14651787281036377, |
| "eval_runtime": 163.0505, |
| "eval_samples_per_second": 3.067, |
| "eval_steps_per_second": 0.386, |
| "eval_wer": 0.4188034188034188, |
| "step": 626 |
| }, |
| { |
| "epoch": 2.2368, |
| "grad_norm": 152.78085327148438, |
| "learning_rate": 9.557416267942584e-05, |
| "loss": 15.7528, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.5568, |
| "grad_norm": 110.85580444335938, |
| "learning_rate": 9.318181818181818e-05, |
| "loss": 14.4653, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.8768000000000002, |
| "grad_norm": 98.29090118408203, |
| "learning_rate": 9.078947368421054e-05, |
| "loss": 13.1378, |
| "step": 900 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 0.13471166789531708, |
| "eval_runtime": 164.1062, |
| "eval_samples_per_second": 3.047, |
| "eval_steps_per_second": 0.384, |
| "eval_wer": 0.36324786324786323, |
| "step": 939 |
| }, |
| { |
| "epoch": 3.1952, |
| "grad_norm": 77.09656524658203, |
| "learning_rate": 8.839712918660288e-05, |
| "loss": 9.8286, |
| "step": 1000 |
| }, |
| { |
| "epoch": 3.5152, |
| "grad_norm": 61.489463806152344, |
| "learning_rate": 8.600478468899522e-05, |
| "loss": 8.6319, |
| "step": 1100 |
| }, |
| { |
| "epoch": 3.8352, |
| "grad_norm": 83.41131591796875, |
| "learning_rate": 8.361244019138757e-05, |
| "loss": 8.2072, |
| "step": 1200 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_loss": 0.13120675086975098, |
| "eval_runtime": 162.2387, |
| "eval_samples_per_second": 3.082, |
| "eval_steps_per_second": 0.388, |
| "eval_wer": 0.32847038019451813, |
| "step": 1252 |
| }, |
| { |
| "epoch": 4.1536, |
| "grad_norm": 50.904685974121094, |
| "learning_rate": 8.122009569377991e-05, |
| "loss": 6.9122, |
| "step": 1300 |
| }, |
| { |
| "epoch": 4.4736, |
| "grad_norm": 39.57589340209961, |
| "learning_rate": 7.882775119617225e-05, |
| "loss": 5.9738, |
| "step": 1400 |
| }, |
| { |
| "epoch": 4.7936, |
| "grad_norm": 74.26155090332031, |
| "learning_rate": 7.643540669856459e-05, |
| "loss": 5.8166, |
| "step": 1500 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_loss": 0.13161760568618774, |
| "eval_runtime": 159.5413, |
| "eval_samples_per_second": 3.134, |
| "eval_steps_per_second": 0.395, |
| "eval_wer": 0.293692897141173, |
| "step": 1565 |
| }, |
| { |
| "epoch": 5.112, |
| "grad_norm": 37.957008361816406, |
| "learning_rate": 7.404306220095693e-05, |
| "loss": 5.4625, |
| "step": 1600 |
| }, |
| { |
| "epoch": 5.432, |
| "grad_norm": 29.977022171020508, |
| "learning_rate": 7.165071770334929e-05, |
| "loss": 4.6891, |
| "step": 1700 |
| }, |
| { |
| "epoch": 5.752, |
| "grad_norm": 31.574848175048828, |
| "learning_rate": 6.925837320574164e-05, |
| "loss": 4.5461, |
| "step": 1800 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_loss": 0.1338721662759781, |
| "eval_runtime": 158.7379, |
| "eval_samples_per_second": 3.15, |
| "eval_steps_per_second": 0.397, |
| "eval_wer": 0.29162982611258476, |
| "step": 1878 |
| }, |
| { |
| "epoch": 6.0704, |
| "grad_norm": 79.52449035644531, |
| "learning_rate": 6.686602870813398e-05, |
| "loss": 4.4673, |
| "step": 1900 |
| }, |
| { |
| "epoch": 6.3904, |
| "grad_norm": 59.68710708618164, |
| "learning_rate": 6.447368421052632e-05, |
| "loss": 3.7918, |
| "step": 2000 |
| }, |
| { |
| "epoch": 6.7104, |
| "grad_norm": 31.302486419677734, |
| "learning_rate": 6.208133971291866e-05, |
| "loss": 3.8785, |
| "step": 2100 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_loss": 0.12756717205047607, |
| "eval_runtime": 158.3831, |
| "eval_samples_per_second": 3.157, |
| "eval_steps_per_second": 0.398, |
| "eval_wer": 0.2838196286472148, |
| "step": 2191 |
| }, |
| { |
| "epoch": 7.0288, |
| "grad_norm": 38.00631332397461, |
| "learning_rate": 5.968899521531101e-05, |
| "loss": 3.7932, |
| "step": 2200 |
| }, |
| { |
| "epoch": 7.3488, |
| "grad_norm": 28.287948608398438, |
| "learning_rate": 5.729665071770335e-05, |
| "loss": 3.2972, |
| "step": 2300 |
| }, |
| { |
| "epoch": 7.6688, |
| "grad_norm": 32.95219802856445, |
| "learning_rate": 5.490430622009569e-05, |
| "loss": 3.2773, |
| "step": 2400 |
| }, |
| { |
| "epoch": 7.9888, |
| "grad_norm": 44.782039642333984, |
| "learning_rate": 5.251196172248804e-05, |
| "loss": 3.1975, |
| "step": 2500 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_loss": 0.12529748678207397, |
| "eval_runtime": 159.9548, |
| "eval_samples_per_second": 3.126, |
| "eval_steps_per_second": 0.394, |
| "eval_wer": 0.27615679339817273, |
| "step": 2504 |
| }, |
| { |
| "epoch": 8.3072, |
| "grad_norm": 30.53591537475586, |
| "learning_rate": 5.011961722488039e-05, |
| "loss": 2.8623, |
| "step": 2600 |
| }, |
| { |
| "epoch": 8.6272, |
| "grad_norm": 53.885047912597656, |
| "learning_rate": 4.772727272727273e-05, |
| "loss": 2.9823, |
| "step": 2700 |
| }, |
| { |
| "epoch": 8.9472, |
| "grad_norm": 31.965686798095703, |
| "learning_rate": 4.533492822966508e-05, |
| "loss": 2.8784, |
| "step": 2800 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_loss": 0.12403552234172821, |
| "eval_runtime": 160.5568, |
| "eval_samples_per_second": 3.114, |
| "eval_steps_per_second": 0.392, |
| "eval_wer": 0.28809313292071914, |
| "step": 2817 |
| }, |
| { |
| "epoch": 9.2656, |
| "grad_norm": 52.549591064453125, |
| "learning_rate": 4.294258373205742e-05, |
| "loss": 2.62, |
| "step": 2900 |
| }, |
| { |
| "epoch": 9.5856, |
| "grad_norm": 27.579486846923828, |
| "learning_rate": 4.055023923444976e-05, |
| "loss": 2.625, |
| "step": 3000 |
| }, |
| { |
| "epoch": 9.9056, |
| "grad_norm": 33.55034637451172, |
| "learning_rate": 3.815789473684211e-05, |
| "loss": 2.6303, |
| "step": 3100 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_loss": 0.12375625222921371, |
| "eval_runtime": 160.2896, |
| "eval_samples_per_second": 3.119, |
| "eval_steps_per_second": 0.393, |
| "eval_wer": 0.2718832891246684, |
| "step": 3130 |
| }, |
| { |
| "epoch": 10.224, |
| "grad_norm": 42.10033416748047, |
| "learning_rate": 3.576555023923445e-05, |
| "loss": 2.492, |
| "step": 3200 |
| }, |
| { |
| "epoch": 10.544, |
| "grad_norm": 43.56287384033203, |
| "learning_rate": 3.337320574162679e-05, |
| "loss": 2.4219, |
| "step": 3300 |
| }, |
| { |
| "epoch": 10.864, |
| "grad_norm": 24.93031120300293, |
| "learning_rate": 3.098086124401914e-05, |
| "loss": 2.481, |
| "step": 3400 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_loss": 0.12246226519346237, |
| "eval_runtime": 157.3827, |
| "eval_samples_per_second": 3.177, |
| "eval_steps_per_second": 0.4, |
| "eval_wer": 0.26702033598585323, |
| "step": 3443 |
| }, |
| { |
| "epoch": 11.1824, |
| "grad_norm": 28.191184997558594, |
| "learning_rate": 2.8588516746411487e-05, |
| "loss": 2.3101, |
| "step": 3500 |
| }, |
| { |
| "epoch": 11.5024, |
| "grad_norm": 25.59966468811035, |
| "learning_rate": 2.619617224880383e-05, |
| "loss": 2.2907, |
| "step": 3600 |
| }, |
| { |
| "epoch": 11.8224, |
| "grad_norm": 23.049842834472656, |
| "learning_rate": 2.380382775119617e-05, |
| "loss": 2.2994, |
| "step": 3700 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_loss": 0.12213879823684692, |
| "eval_runtime": 157.5035, |
| "eval_samples_per_second": 3.175, |
| "eval_steps_per_second": 0.4, |
| "eval_wer": 0.26407309165929854, |
| "step": 3756 |
| }, |
| { |
| "epoch": 12.1408, |
| "grad_norm": 18.59426498413086, |
| "learning_rate": 2.141148325358852e-05, |
| "loss": 2.2326, |
| "step": 3800 |
| }, |
| { |
| "epoch": 12.4608, |
| "grad_norm": 24.557432174682617, |
| "learning_rate": 1.9019138755980862e-05, |
| "loss": 2.1734, |
| "step": 3900 |
| }, |
| { |
| "epoch": 12.7808, |
| "grad_norm": 21.113338470458984, |
| "learning_rate": 1.6626794258373206e-05, |
| "loss": 2.0863, |
| "step": 4000 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_loss": 0.12139783799648285, |
| "eval_runtime": 157.0039, |
| "eval_samples_per_second": 3.185, |
| "eval_steps_per_second": 0.401, |
| "eval_wer": 0.26716769820218095, |
| "step": 4069 |
| }, |
| { |
| "epoch": 13.0992, |
| "grad_norm": 16.859729766845703, |
| "learning_rate": 1.423444976076555e-05, |
| "loss": 2.1694, |
| "step": 4100 |
| }, |
| { |
| "epoch": 13.4192, |
| "grad_norm": 38.536155700683594, |
| "learning_rate": 1.1842105263157895e-05, |
| "loss": 1.9885, |
| "step": 4200 |
| }, |
| { |
| "epoch": 13.7392, |
| "grad_norm": 20.236295700073242, |
| "learning_rate": 9.449760765550239e-06, |
| "loss": 2.0235, |
| "step": 4300 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_loss": 0.12131630629301071, |
| "eval_runtime": 157.8964, |
| "eval_samples_per_second": 3.167, |
| "eval_steps_per_second": 0.399, |
| "eval_wer": 0.2637783672266431, |
| "step": 4382 |
| }, |
| { |
| "epoch": 14.0576, |
| "grad_norm": 16.027307510375977, |
| "learning_rate": 7.0574162679425836e-06, |
| "loss": 2.0284, |
| "step": 4400 |
| }, |
| { |
| "epoch": 14.3776, |
| "grad_norm": 18.118242263793945, |
| "learning_rate": 4.665071770334928e-06, |
| "loss": 1.9218, |
| "step": 4500 |
| }, |
| { |
| "epoch": 14.6976, |
| "grad_norm": 21.5025691986084, |
| "learning_rate": 2.2727272727272728e-06, |
| "loss": 2.015, |
| "step": 4600 |
| }, |
| { |
| "epoch": 14.9536, |
| "eval_loss": 0.12134864181280136, |
| "eval_runtime": 154.8466, |
| "eval_samples_per_second": 3.229, |
| "eval_steps_per_second": 0.407, |
| "eval_wer": 0.2625994694960212, |
| "step": 4680 |
| }, |
| { |
| "epoch": 12.533333333333333, |
| "grad_norm": 114.76351165771484, |
| "learning_rate": 4.021428571428572e-05, |
| "loss": 7.7843, |
| "step": 4700 |
| }, |
| { |
| "epoch": 12.8, |
| "grad_norm": 130.2476043701172, |
| "learning_rate": 3.8785714285714285e-05, |
| "loss": 7.0386, |
| "step": 4800 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_loss": 0.12093591690063477, |
| "eval_runtime": 155.7693, |
| "eval_samples_per_second": 3.21, |
| "eval_steps_per_second": 0.404, |
| "eval_wer": 0.27600943118184496, |
| "step": 4875 |
| }, |
| { |
| "epoch": 13.066666666666666, |
| "grad_norm": 124.46900177001953, |
| "learning_rate": 3.735714285714286e-05, |
| "loss": 6.4105, |
| "step": 4900 |
| }, |
| { |
| "epoch": 13.333333333333334, |
| "grad_norm": 79.74747467041016, |
| "learning_rate": 3.5928571428571425e-05, |
| "loss": 5.167, |
| "step": 5000 |
| }, |
| { |
| "epoch": 13.6, |
| "grad_norm": 85.09929656982422, |
| "learning_rate": 3.45e-05, |
| "loss": 5.3239, |
| "step": 5100 |
| }, |
| { |
| "epoch": 13.866666666666667, |
| "grad_norm": 86.92524719238281, |
| "learning_rate": 3.307142857142858e-05, |
| "loss": 5.2638, |
| "step": 5200 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_loss": 0.11688227951526642, |
| "eval_runtime": 151.5329, |
| "eval_samples_per_second": 3.3, |
| "eval_steps_per_second": 0.416, |
| "eval_wer": 0.2537577365163572, |
| "step": 5250 |
| }, |
| { |
| "epoch": 14.133333333333333, |
| "grad_norm": 46.18540954589844, |
| "learning_rate": 3.1642857142857145e-05, |
| "loss": 4.4287, |
| "step": 5300 |
| }, |
| { |
| "epoch": 14.4, |
| "grad_norm": 63.5553092956543, |
| "learning_rate": 3.021428571428572e-05, |
| "loss": 3.7761, |
| "step": 5400 |
| }, |
| { |
| "epoch": 14.666666666666666, |
| "grad_norm": 41.22358322143555, |
| "learning_rate": 2.878571428571429e-05, |
| "loss": 3.9576, |
| "step": 5500 |
| }, |
| { |
| "epoch": 14.933333333333334, |
| "grad_norm": 86.58721160888672, |
| "learning_rate": 2.735714285714286e-05, |
| "loss": 3.8581, |
| "step": 5600 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_loss": 0.11802562326192856, |
| "eval_runtime": 151.7648, |
| "eval_samples_per_second": 3.295, |
| "eval_steps_per_second": 0.415, |
| "eval_wer": 0.23740053050397877, |
| "step": 5625 |
| }, |
| { |
| "epoch": 15.2, |
| "grad_norm": 30.3751220703125, |
| "learning_rate": 2.592857142857143e-05, |
| "loss": 3.5142, |
| "step": 5700 |
| }, |
| { |
| "epoch": 15.466666666666667, |
| "grad_norm": 55.38408279418945, |
| "learning_rate": 2.45e-05, |
| "loss": 3.2256, |
| "step": 5800 |
| }, |
| { |
| "epoch": 15.733333333333333, |
| "grad_norm": 34.806907653808594, |
| "learning_rate": 2.3071428571428573e-05, |
| "loss": 3.1364, |
| "step": 5900 |
| }, |
| { |
| "epoch": 16.0, |
| "grad_norm": 31.722949981689453, |
| "learning_rate": 2.1642857142857146e-05, |
| "loss": 3.4661, |
| "step": 6000 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_loss": 0.11758579313755035, |
| "eval_runtime": 151.2711, |
| "eval_samples_per_second": 3.305, |
| "eval_steps_per_second": 0.416, |
| "eval_wer": 0.24078986147951664, |
| "step": 6000 |
| }, |
| { |
| "epoch": 16.266666666666666, |
| "grad_norm": 38.61819076538086, |
| "learning_rate": 2.0214285714285716e-05, |
| "loss": 2.9965, |
| "step": 6100 |
| }, |
| { |
| "epoch": 16.533333333333335, |
| "grad_norm": 53.0886116027832, |
| "learning_rate": 1.8785714285714286e-05, |
| "loss": 2.7895, |
| "step": 6200 |
| }, |
| { |
| "epoch": 16.8, |
| "grad_norm": 31.704025268554688, |
| "learning_rate": 1.7357142857142856e-05, |
| "loss": 2.8903, |
| "step": 6300 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_loss": 0.11670505255460739, |
| "eval_runtime": 146.7611, |
| "eval_samples_per_second": 3.407, |
| "eval_steps_per_second": 0.429, |
| "eval_wer": 0.23592690834070146, |
| "step": 6375 |
| }, |
| { |
| "epoch": 17.066666666666666, |
| "grad_norm": 25.057514190673828, |
| "learning_rate": 1.592857142857143e-05, |
| "loss": 2.8696, |
| "step": 6400 |
| }, |
| { |
| "epoch": 17.333333333333332, |
| "grad_norm": 27.10036277770996, |
| "learning_rate": 1.45e-05, |
| "loss": 2.8056, |
| "step": 6500 |
| }, |
| { |
| "epoch": 17.6, |
| "grad_norm": 39.833030700683594, |
| "learning_rate": 1.3071428571428574e-05, |
| "loss": 2.6236, |
| "step": 6600 |
| }, |
| { |
| "epoch": 17.866666666666667, |
| "grad_norm": 21.081867218017578, |
| "learning_rate": 1.1642857142857144e-05, |
| "loss": 2.6081, |
| "step": 6700 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_loss": 0.11724027991294861, |
| "eval_runtime": 146.5669, |
| "eval_samples_per_second": 3.411, |
| "eval_steps_per_second": 0.43, |
| "eval_wer": 0.2357795461243737, |
| "step": 6750 |
| }, |
| { |
| "epoch": 18.133333333333333, |
| "grad_norm": 51.97953414916992, |
| "learning_rate": 1.0214285714285715e-05, |
| "loss": 2.5349, |
| "step": 6800 |
| }, |
| { |
| "epoch": 18.4, |
| "grad_norm": 29.391460418701172, |
| "learning_rate": 8.785714285714286e-06, |
| "loss": 2.4141, |
| "step": 6900 |
| }, |
| { |
| "epoch": 18.666666666666668, |
| "grad_norm": 33.09728240966797, |
| "learning_rate": 7.3571428571428565e-06, |
| "loss": 2.5216, |
| "step": 7000 |
| }, |
| { |
| "epoch": 18.933333333333334, |
| "grad_norm": 52.41236114501953, |
| "learning_rate": 5.928571428571429e-06, |
| "loss": 2.6719, |
| "step": 7100 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_loss": 0.11649636179208755, |
| "eval_runtime": 150.9234, |
| "eval_samples_per_second": 3.313, |
| "eval_steps_per_second": 0.417, |
| "eval_wer": 0.24005305039787797, |
| "step": 7125 |
| }, |
| { |
| "epoch": 19.2, |
| "grad_norm": 37.658836364746094, |
| "learning_rate": 4.5e-06, |
| "loss": 2.3516, |
| "step": 7200 |
| }, |
| { |
| "epoch": 19.466666666666665, |
| "grad_norm": 36.73357009887695, |
| "learning_rate": 3.0714285714285715e-06, |
| "loss": 2.3761, |
| "step": 7300 |
| }, |
| { |
| "epoch": 19.733333333333334, |
| "grad_norm": 30.50603675842285, |
| "learning_rate": 1.6428571428571429e-06, |
| "loss": 2.4319, |
| "step": 7400 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 24.163257598876953, |
| "learning_rate": 2.142857142857143e-07, |
| "loss": 2.4235, |
| "step": 7500 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_loss": 0.11603204160928726, |
| "eval_runtime": 149.7675, |
| "eval_samples_per_second": 3.339, |
| "eval_steps_per_second": 0.421, |
| "eval_wer": 0.24300029472443266, |
| "step": 7500 |
| }, |
| { |
| "epoch": 20.266666666666666, |
| "grad_norm": 152.1876983642578, |
| "learning_rate": 5.1137931034482754e-05, |
| "loss": 5.1222, |
| "step": 7600 |
| }, |
| { |
| "epoch": 20.533333333333335, |
| "grad_norm": 222.44189453125, |
| "learning_rate": 5.044827586206897e-05, |
| "loss": 5.2045, |
| "step": 7700 |
| }, |
| { |
| "epoch": 20.8, |
| "grad_norm": 150.16041564941406, |
| "learning_rate": 4.975862068965517e-05, |
| "loss": 4.9497, |
| "step": 7800 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_loss": 0.11334758251905441, |
| "eval_runtime": 151.8894, |
| "eval_samples_per_second": 3.292, |
| "eval_steps_per_second": 0.415, |
| "eval_wer": 0.23607427055702918, |
| "step": 7875 |
| }, |
| { |
| "epoch": 21.066666666666666, |
| "grad_norm": 46.202308654785156, |
| "learning_rate": 4.9068965517241386e-05, |
| "loss": 4.2033, |
| "step": 7900 |
| }, |
| { |
| "epoch": 21.333333333333332, |
| "grad_norm": 38.9134635925293, |
| "learning_rate": 4.837931034482759e-05, |
| "loss": 3.784, |
| "step": 8000 |
| }, |
| { |
| "epoch": 21.6, |
| "grad_norm": 53.533203125, |
| "learning_rate": 4.7689655172413796e-05, |
| "loss": 3.5497, |
| "step": 8100 |
| }, |
| { |
| "epoch": 21.866666666666667, |
| "grad_norm": 51.86565399169922, |
| "learning_rate": 4.7e-05, |
| "loss": 3.6345, |
| "step": 8200 |
| }, |
| { |
| "epoch": 22.0, |
| "eval_loss": 0.11361408233642578, |
| "eval_runtime": 148.5623, |
| "eval_samples_per_second": 3.366, |
| "eval_steps_per_second": 0.424, |
| "eval_wer": 0.22737989979369289, |
| "step": 8250 |
| }, |
| { |
| "epoch": 22.133333333333333, |
| "grad_norm": 77.92996215820312, |
| "learning_rate": 4.631034482758621e-05, |
| "loss": 3.2194, |
| "step": 8300 |
| }, |
| { |
| "epoch": 22.4, |
| "grad_norm": 46.25454330444336, |
| "learning_rate": 4.5620689655172414e-05, |
| "loss": 2.8227, |
| "step": 8400 |
| }, |
| { |
| "epoch": 22.666666666666668, |
| "grad_norm": 32.176517486572266, |
| "learning_rate": 4.493103448275862e-05, |
| "loss": 3.0837, |
| "step": 8500 |
| }, |
| { |
| "epoch": 22.933333333333334, |
| "grad_norm": 42.87643051147461, |
| "learning_rate": 4.4241379310344824e-05, |
| "loss": 3.092, |
| "step": 8600 |
| }, |
| { |
| "epoch": 23.0, |
| "eval_loss": 0.11234613507986069, |
| "eval_runtime": 148.2332, |
| "eval_samples_per_second": 3.373, |
| "eval_steps_per_second": 0.425, |
| "eval_wer": 0.2304745063365753, |
| "step": 8625 |
| }, |
| { |
| "epoch": 23.2, |
| "grad_norm": 64.79329681396484, |
| "learning_rate": 4.355172413793104e-05, |
| "loss": 2.7315, |
| "step": 8700 |
| }, |
| { |
| "epoch": 23.466666666666665, |
| "grad_norm": 37.61339569091797, |
| "learning_rate": 4.286206896551724e-05, |
| "loss": 2.713, |
| "step": 8800 |
| }, |
| { |
| "epoch": 23.733333333333334, |
| "grad_norm": 27.409000396728516, |
| "learning_rate": 4.217241379310345e-05, |
| "loss": 2.5189, |
| "step": 8900 |
| }, |
| { |
| "epoch": 24.0, |
| "grad_norm": 39.62417221069336, |
| "learning_rate": 4.148275862068966e-05, |
| "loss": 2.606, |
| "step": 9000 |
| }, |
| { |
| "epoch": 24.0, |
| "eval_loss": 0.1097910925745964, |
| "eval_runtime": 146.0641, |
| "eval_samples_per_second": 3.423, |
| "eval_steps_per_second": 0.431, |
| "eval_wer": 0.2282640730916593, |
| "step": 9000 |
| }, |
| { |
| "epoch": 24.266666666666666, |
| "grad_norm": 21.3342227935791, |
| "learning_rate": 4.0793103448275866e-05, |
| "loss": 2.3604, |
| "step": 9100 |
| }, |
| { |
| "epoch": 24.533333333333335, |
| "grad_norm": 61.0772705078125, |
| "learning_rate": 4.0103448275862074e-05, |
| "loss": 2.3015, |
| "step": 9200 |
| }, |
| { |
| "epoch": 24.8, |
| "grad_norm": 47.21693420410156, |
| "learning_rate": 3.9413793103448276e-05, |
| "loss": 2.4858, |
| "step": 9300 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_loss": 0.11028488725423813, |
| "eval_runtime": 146.4467, |
| "eval_samples_per_second": 3.414, |
| "eval_steps_per_second": 0.43, |
| "eval_wer": 0.22531682876510462, |
| "step": 9375 |
| }, |
| { |
| "epoch": 25.066666666666666, |
| "grad_norm": 58.333526611328125, |
| "learning_rate": 3.8724137931034484e-05, |
| "loss": 2.215, |
| "step": 9400 |
| }, |
| { |
| "epoch": 25.333333333333332, |
| "grad_norm": 33.136444091796875, |
| "learning_rate": 3.803448275862069e-05, |
| "loss": 2.146, |
| "step": 9500 |
| }, |
| { |
| "epoch": 25.6, |
| "grad_norm": 25.031856536865234, |
| "learning_rate": 3.73448275862069e-05, |
| "loss": 2.1088, |
| "step": 9600 |
| }, |
| { |
| "epoch": 25.866666666666667, |
| "grad_norm": 27.74683952331543, |
| "learning_rate": 3.66551724137931e-05, |
| "loss": 2.1898, |
| "step": 9700 |
| }, |
| { |
| "epoch": 26.0, |
| "eval_loss": 0.11085129529237747, |
| "eval_runtime": 147.8522, |
| "eval_samples_per_second": 3.382, |
| "eval_steps_per_second": 0.426, |
| "eval_wer": 0.2326849395814913, |
| "step": 9750 |
| }, |
| { |
| "epoch": 26.133333333333333, |
| "grad_norm": 26.178314208984375, |
| "learning_rate": 3.596551724137931e-05, |
| "loss": 2.1311, |
| "step": 9800 |
| }, |
| { |
| "epoch": 26.4, |
| "grad_norm": 28.952735900878906, |
| "learning_rate": 3.527586206896552e-05, |
| "loss": 1.9449, |
| "step": 9900 |
| }, |
| { |
| "epoch": 26.666666666666668, |
| "grad_norm": 31.816133499145508, |
| "learning_rate": 3.458620689655173e-05, |
| "loss": 1.9057, |
| "step": 10000 |
| }, |
| { |
| "epoch": 26.933333333333334, |
| "grad_norm": 25.91592025756836, |
| "learning_rate": 3.389655172413793e-05, |
| "loss": 2.1861, |
| "step": 10100 |
| }, |
| { |
| "epoch": 27.0, |
| "eval_loss": 0.10877023637294769, |
| "eval_runtime": 146.9281, |
| "eval_samples_per_second": 3.403, |
| "eval_steps_per_second": 0.429, |
| "eval_wer": 0.23106395520188625, |
| "step": 10125 |
| }, |
| { |
| "epoch": 27.2, |
| "grad_norm": 24.183246612548828, |
| "learning_rate": 3.320689655172414e-05, |
| "loss": 1.9927, |
| "step": 10200 |
| }, |
| { |
| "epoch": 27.466666666666665, |
| "grad_norm": 27.9620361328125, |
| "learning_rate": 3.2517241379310346e-05, |
| "loss": 1.785, |
| "step": 10300 |
| }, |
| { |
| "epoch": 27.733333333333334, |
| "grad_norm": 41.2671012878418, |
| "learning_rate": 3.1827586206896554e-05, |
| "loss": 1.8756, |
| "step": 10400 |
| }, |
| { |
| "epoch": 28.0, |
| "grad_norm": 26.784330368041992, |
| "learning_rate": 3.113793103448276e-05, |
| "loss": 1.8994, |
| "step": 10500 |
| }, |
| { |
| "epoch": 28.0, |
| "eval_loss": 0.10841843485832214, |
| "eval_runtime": 143.9314, |
| "eval_samples_per_second": 3.474, |
| "eval_steps_per_second": 0.438, |
| "eval_wer": 0.2260536398467433, |
| "step": 10500 |
| }, |
| { |
| "epoch": 28.266666666666666, |
| "grad_norm": 31.477703094482422, |
| "learning_rate": 3.0448275862068964e-05, |
| "loss": 1.7406, |
| "step": 10600 |
| }, |
| { |
| "epoch": 28.533333333333335, |
| "grad_norm": 26.99530029296875, |
| "learning_rate": 2.9758620689655176e-05, |
| "loss": 1.7979, |
| "step": 10700 |
| }, |
| { |
| "epoch": 28.8, |
| "grad_norm": 24.11530876159668, |
| "learning_rate": 2.906896551724138e-05, |
| "loss": 1.8208, |
| "step": 10800 |
| }, |
| { |
| "epoch": 29.0, |
| "eval_loss": 0.10783620923757553, |
| "eval_runtime": 144.2312, |
| "eval_samples_per_second": 3.467, |
| "eval_steps_per_second": 0.437, |
| "eval_wer": 0.22664308871205424, |
| "step": 10875 |
| }, |
| { |
| "epoch": 29.066666666666666, |
| "grad_norm": 17.79966163635254, |
| "learning_rate": 2.8379310344827586e-05, |
| "loss": 1.7014, |
| "step": 10900 |
| }, |
| { |
| "epoch": 29.333333333333332, |
| "grad_norm": 28.017330169677734, |
| "learning_rate": 2.768965517241379e-05, |
| "loss": 1.6895, |
| "step": 11000 |
| }, |
| { |
| "epoch": 29.6, |
| "grad_norm": 24.27259635925293, |
| "learning_rate": 2.7000000000000002e-05, |
| "loss": 1.7268, |
| "step": 11100 |
| }, |
| { |
| "epoch": 29.866666666666667, |
| "grad_norm": 32.436275482177734, |
| "learning_rate": 2.6310344827586207e-05, |
| "loss": 1.706, |
| "step": 11200 |
| }, |
| { |
| "epoch": 30.0, |
| "eval_loss": 0.10766720026731491, |
| "eval_runtime": 143.3181, |
| "eval_samples_per_second": 3.489, |
| "eval_steps_per_second": 0.44, |
| "eval_wer": 0.2287061597406425, |
| "step": 11250 |
| }, |
| { |
| "epoch": 30.133333333333333, |
| "grad_norm": 40.39571762084961, |
| "learning_rate": 2.5620689655172416e-05, |
| "loss": 1.618, |
| "step": 11300 |
| }, |
| { |
| "epoch": 30.4, |
| "grad_norm": 32.211952209472656, |
| "learning_rate": 2.493103448275862e-05, |
| "loss": 1.6147, |
| "step": 11400 |
| }, |
| { |
| "epoch": 30.666666666666668, |
| "grad_norm": 24.827373504638672, |
| "learning_rate": 2.424137931034483e-05, |
| "loss": 1.6014, |
| "step": 11500 |
| }, |
| { |
| "epoch": 30.933333333333334, |
| "grad_norm": 21.28459930419922, |
| "learning_rate": 2.3551724137931037e-05, |
| "loss": 1.5895, |
| "step": 11600 |
| }, |
| { |
| "epoch": 31.0, |
| "eval_loss": 0.10668068379163742, |
| "eval_runtime": 145.3149, |
| "eval_samples_per_second": 3.441, |
| "eval_steps_per_second": 0.434, |
| "eval_wer": 0.22325375773651635, |
| "step": 11625 |
| }, |
| { |
| "epoch": 31.2, |
| "grad_norm": 18.275196075439453, |
| "learning_rate": 2.2862068965517242e-05, |
| "loss": 1.6111, |
| "step": 11700 |
| }, |
| { |
| "epoch": 31.466666666666665, |
| "grad_norm": 18.976835250854492, |
| "learning_rate": 2.217241379310345e-05, |
| "loss": 1.5058, |
| "step": 11800 |
| }, |
| { |
| "epoch": 31.733333333333334, |
| "grad_norm": 23.13480567932129, |
| "learning_rate": 2.1482758620689656e-05, |
| "loss": 1.5864, |
| "step": 11900 |
| }, |
| { |
| "epoch": 32.0, |
| "grad_norm": 18.37510871887207, |
| "learning_rate": 2.0793103448275864e-05, |
| "loss": 1.5086, |
| "step": 12000 |
| }, |
| { |
| "epoch": 32.0, |
| "eval_loss": 0.10681495070457458, |
| "eval_runtime": 146.0166, |
| "eval_samples_per_second": 3.424, |
| "eval_steps_per_second": 0.431, |
| "eval_wer": 0.22988505747126436, |
| "step": 12000 |
| }, |
| { |
| "epoch": 32.266666666666666, |
| "grad_norm": 29.316116333007812, |
| "learning_rate": 2.010344827586207e-05, |
| "loss": 1.4952, |
| "step": 12100 |
| }, |
| { |
| "epoch": 32.53333333333333, |
| "grad_norm": 14.011812210083008, |
| "learning_rate": 1.9413793103448277e-05, |
| "loss": 1.5013, |
| "step": 12200 |
| }, |
| { |
| "epoch": 32.8, |
| "grad_norm": 26.72800636291504, |
| "learning_rate": 1.8724137931034482e-05, |
| "loss": 1.4744, |
| "step": 12300 |
| }, |
| { |
| "epoch": 33.0, |
| "eval_loss": 0.10653579980134964, |
| "eval_runtime": 144.204, |
| "eval_samples_per_second": 3.467, |
| "eval_steps_per_second": 0.437, |
| "eval_wer": 0.22679045092838196, |
| "step": 12375 |
| }, |
| { |
| "epoch": 33.06666666666667, |
| "grad_norm": 15.76944637298584, |
| "learning_rate": 1.803448275862069e-05, |
| "loss": 1.4733, |
| "step": 12400 |
| }, |
| { |
| "epoch": 33.333333333333336, |
| "grad_norm": 12.833415031433105, |
| "learning_rate": 1.7344827586206896e-05, |
| "loss": 1.4352, |
| "step": 12500 |
| }, |
| { |
| "epoch": 33.6, |
| "grad_norm": 23.16147232055664, |
| "learning_rate": 1.6655172413793104e-05, |
| "loss": 1.4169, |
| "step": 12600 |
| }, |
| { |
| "epoch": 33.86666666666667, |
| "grad_norm": 23.133747100830078, |
| "learning_rate": 1.596551724137931e-05, |
| "loss": 1.4184, |
| "step": 12700 |
| }, |
| { |
| "epoch": 34.0, |
| "eval_loss": 0.10563693195581436, |
| "eval_runtime": 144.3614, |
| "eval_samples_per_second": 3.464, |
| "eval_steps_per_second": 0.436, |
| "eval_wer": 0.22664308871205424, |
| "step": 12750 |
| }, |
| { |
| "epoch": 34.13333333333333, |
| "grad_norm": 16.677919387817383, |
| "learning_rate": 1.5275862068965517e-05, |
| "loss": 1.3784, |
| "step": 12800 |
| }, |
| { |
| "epoch": 34.4, |
| "grad_norm": 14.670028686523438, |
| "learning_rate": 1.4586206896551724e-05, |
| "loss": 1.402, |
| "step": 12900 |
| }, |
| { |
| "epoch": 34.666666666666664, |
| "grad_norm": 23.33100128173828, |
| "learning_rate": 1.3896551724137932e-05, |
| "loss": 1.361, |
| "step": 13000 |
| }, |
| { |
| "epoch": 34.93333333333333, |
| "grad_norm": 15.100184440612793, |
| "learning_rate": 1.3206896551724137e-05, |
| "loss": 1.4134, |
| "step": 13100 |
| }, |
| { |
| "epoch": 35.0, |
| "eval_loss": 0.10637735575437546, |
| "eval_runtime": 148.319, |
| "eval_samples_per_second": 3.371, |
| "eval_steps_per_second": 0.425, |
| "eval_wer": 0.23312702623047452, |
| "step": 13125 |
| }, |
| { |
| "epoch": 35.2, |
| "grad_norm": 12.271963119506836, |
| "learning_rate": 1.2517241379310346e-05, |
| "loss": 1.3373, |
| "step": 13200 |
| }, |
| { |
| "epoch": 35.46666666666667, |
| "grad_norm": 20.943439483642578, |
| "learning_rate": 1.1827586206896552e-05, |
| "loss": 1.3615, |
| "step": 13300 |
| }, |
| { |
| "epoch": 35.733333333333334, |
| "grad_norm": 15.165372848510742, |
| "learning_rate": 1.1137931034482759e-05, |
| "loss": 1.3727, |
| "step": 13400 |
| }, |
| { |
| "epoch": 36.0, |
| "grad_norm": 13.487517356872559, |
| "learning_rate": 1.0448275862068967e-05, |
| "loss": 1.3246, |
| "step": 13500 |
| }, |
| { |
| "epoch": 36.0, |
| "eval_loss": 0.10543886572122574, |
| "eval_runtime": 145.8253, |
| "eval_samples_per_second": 3.429, |
| "eval_steps_per_second": 0.432, |
| "eval_wer": 0.22634836427939875, |
| "step": 13500 |
| }, |
| { |
| "epoch": 36.266666666666666, |
| "grad_norm": 19.09083366394043, |
| "learning_rate": 9.758620689655174e-06, |
| "loss": 1.2625, |
| "step": 13600 |
| }, |
| { |
| "epoch": 36.53333333333333, |
| "grad_norm": 15.21319580078125, |
| "learning_rate": 9.06896551724138e-06, |
| "loss": 1.3249, |
| "step": 13700 |
| }, |
| { |
| "epoch": 36.8, |
| "grad_norm": 15.249282836914062, |
| "learning_rate": 8.379310344827587e-06, |
| "loss": 1.3368, |
| "step": 13800 |
| }, |
| { |
| "epoch": 37.0, |
| "eval_loss": 0.10566218197345734, |
| "eval_runtime": 144.146, |
| "eval_samples_per_second": 3.469, |
| "eval_steps_per_second": 0.437, |
| "eval_wer": 0.23165340406719717, |
| "step": 13875 |
| }, |
| { |
| "epoch": 37.06666666666667, |
| "grad_norm": 15.04231071472168, |
| "learning_rate": 7.689655172413794e-06, |
| "loss": 1.3301, |
| "step": 13900 |
| }, |
| { |
| "epoch": 37.333333333333336, |
| "grad_norm": 10.739683151245117, |
| "learning_rate": 7.000000000000001e-06, |
| "loss": 1.3041, |
| "step": 14000 |
| }, |
| { |
| "epoch": 37.6, |
| "grad_norm": 14.751246452331543, |
| "learning_rate": 6.310344827586208e-06, |
| "loss": 1.2658, |
| "step": 14100 |
| }, |
| { |
| "epoch": 37.86666666666667, |
| "grad_norm": 12.384917259216309, |
| "learning_rate": 5.620689655172414e-06, |
| "loss": 1.3084, |
| "step": 14200 |
| }, |
| { |
| "epoch": 38.0, |
| "eval_loss": 0.10525722056627274, |
| "eval_runtime": 148.2408, |
| "eval_samples_per_second": 3.373, |
| "eval_steps_per_second": 0.425, |
| "eval_wer": 0.24123194812849985, |
| "step": 14250 |
| }, |
| { |
| "epoch": 38.13333333333333, |
| "grad_norm": 11.925743103027344, |
| "learning_rate": 4.931034482758621e-06, |
| "loss": 1.2151, |
| "step": 14300 |
| }, |
| { |
| "epoch": 38.4, |
| "grad_norm": 10.217673301696777, |
| "learning_rate": 4.241379310344827e-06, |
| "loss": 1.2426, |
| "step": 14400 |
| }, |
| { |
| "epoch": 38.666666666666664, |
| "grad_norm": 10.630120277404785, |
| "learning_rate": 3.5517241379310345e-06, |
| "loss": 1.2779, |
| "step": 14500 |
| }, |
| { |
| "epoch": 38.93333333333333, |
| "grad_norm": 19.7459774017334, |
| "learning_rate": 2.8620689655172416e-06, |
| "loss": 1.302, |
| "step": 14600 |
| }, |
| { |
| "epoch": 39.0, |
| "eval_loss": 0.10544682294130325, |
| "eval_runtime": 148.7257, |
| "eval_samples_per_second": 3.362, |
| "eval_steps_per_second": 0.424, |
| "eval_wer": 0.2309165929855585, |
| "step": 14625 |
| }, |
| { |
| "epoch": 39.2, |
| "grad_norm": 14.623858451843262, |
| "learning_rate": 2.1724137931034482e-06, |
| "loss": 1.2663, |
| "step": 14700 |
| }, |
| { |
| "epoch": 39.46666666666667, |
| "grad_norm": 11.66265869140625, |
| "learning_rate": 1.4827586206896553e-06, |
| "loss": 1.2647, |
| "step": 14800 |
| }, |
| { |
| "epoch": 39.733333333333334, |
| "grad_norm": 12.55521297454834, |
| "learning_rate": 7.931034482758622e-07, |
| "loss": 1.2334, |
| "step": 14900 |
| }, |
| { |
| "epoch": 40.0, |
| "grad_norm": 9.761940956115723, |
| "learning_rate": 1.0344827586206898e-07, |
| "loss": 1.2152, |
| "step": 15000 |
| }, |
| { |
| "epoch": 40.0, |
| "eval_loss": 0.1053401455283165, |
| "eval_runtime": 145.6503, |
| "eval_samples_per_second": 3.433, |
| "eval_steps_per_second": 0.433, |
| "eval_wer": 0.22973769525493665, |
| "step": 15000 |
| }, |
| { |
| "epoch": 37.46869187848729, |
| "grad_norm": 39.86048126220703, |
| "learning_rate": 2.577608142493639e-05, |
| "loss": 3.5543, |
| "step": 15100 |
| }, |
| { |
| "epoch": 37.716676999380034, |
| "grad_norm": 53.614952087402344, |
| "learning_rate": 2.5267175572519086e-05, |
| "loss": 3.9955, |
| "step": 15200 |
| }, |
| { |
| "epoch": 37.964662120272784, |
| "grad_norm": 33.56686019897461, |
| "learning_rate": 2.4758269720101782e-05, |
| "loss": 3.6933, |
| "step": 15300 |
| }, |
| { |
| "epoch": 37.99938003719777, |
| "eval_loss": 0.10439449548721313, |
| "eval_runtime": 152.7963, |
| "eval_samples_per_second": 3.272, |
| "eval_steps_per_second": 0.412, |
| "eval_wer": 0.21220159151193635, |
| "step": 15314 |
| }, |
| { |
| "epoch": 38.21326720396776, |
| "grad_norm": 44.14704132080078, |
| "learning_rate": 2.424936386768448e-05, |
| "loss": 3.1742, |
| "step": 15400 |
| }, |
| { |
| "epoch": 38.46125232486051, |
| "grad_norm": 65.31890869140625, |
| "learning_rate": 2.374045801526718e-05, |
| "loss": 2.9259, |
| "step": 15500 |
| }, |
| { |
| "epoch": 38.70923744575325, |
| "grad_norm": 38.38364028930664, |
| "learning_rate": 2.3231552162849872e-05, |
| "loss": 2.9688, |
| "step": 15600 |
| }, |
| { |
| "epoch": 38.957222566646, |
| "grad_norm": 101.05147552490234, |
| "learning_rate": 2.272264631043257e-05, |
| "loss": 2.9938, |
| "step": 15700 |
| }, |
| { |
| "epoch": 39.0, |
| "eval_loss": 0.10509275645017624, |
| "eval_runtime": 149.9151, |
| "eval_samples_per_second": 3.335, |
| "eval_steps_per_second": 0.42, |
| "eval_wer": 0.21927497789566755, |
| "step": 15718 |
| }, |
| { |
| "epoch": 39.203347799132054, |
| "grad_norm": 40.039276123046875, |
| "learning_rate": 2.2213740458015268e-05, |
| "loss": 2.3839, |
| "step": 15800 |
| }, |
| { |
| "epoch": 39.4513329200248, |
| "grad_norm": 71.81354522705078, |
| "learning_rate": 2.1704834605597965e-05, |
| "loss": 2.6085, |
| "step": 15900 |
| }, |
| { |
| "epoch": 39.69931804091755, |
| "grad_norm": 98.01214599609375, |
| "learning_rate": 2.1195928753180665e-05, |
| "loss": 2.3986, |
| "step": 16000 |
| }, |
| { |
| "epoch": 39.94730316181029, |
| "grad_norm": 54.38132858276367, |
| "learning_rate": 2.0687022900763358e-05, |
| "loss": 2.5582, |
| "step": 16100 |
| }, |
| { |
| "epoch": 40.0, |
| "eval_loss": 0.10408324003219604, |
| "eval_runtime": 154.5124, |
| "eval_samples_per_second": 3.236, |
| "eval_steps_per_second": 0.408, |
| "eval_wer": 0.22015915119363394, |
| "step": 16122 |
| }, |
| { |
| "epoch": 40.19342839429634, |
| "grad_norm": 37.06816101074219, |
| "learning_rate": 2.0178117048346058e-05, |
| "loss": 2.2341, |
| "step": 16200 |
| }, |
| { |
| "epoch": 40.44141351518909, |
| "grad_norm": 27.490217208862305, |
| "learning_rate": 1.9669211195928754e-05, |
| "loss": 2.2284, |
| "step": 16300 |
| }, |
| { |
| "epoch": 40.68939863608183, |
| "grad_norm": 32.29278564453125, |
| "learning_rate": 1.916030534351145e-05, |
| "loss": 2.3084, |
| "step": 16400 |
| }, |
| { |
| "epoch": 40.93738375697458, |
| "grad_norm": 30.68113136291504, |
| "learning_rate": 1.8651399491094148e-05, |
| "loss": 2.1949, |
| "step": 16500 |
| }, |
| { |
| "epoch": 41.0, |
| "eval_loss": 0.10319098085165024, |
| "eval_runtime": 146.5477, |
| "eval_samples_per_second": 3.412, |
| "eval_steps_per_second": 0.43, |
| "eval_wer": 0.21367521367521367, |
| "step": 16526 |
| }, |
| { |
| "epoch": 41.183508989460634, |
| "grad_norm": 31.351858139038086, |
| "learning_rate": 1.8142493638676847e-05, |
| "loss": 1.856, |
| "step": 16600 |
| }, |
| { |
| "epoch": 41.43149411035338, |
| "grad_norm": 24.235687255859375, |
| "learning_rate": 1.763358778625954e-05, |
| "loss": 1.998, |
| "step": 16700 |
| }, |
| { |
| "epoch": 41.67947923124613, |
| "grad_norm": 27.65749740600586, |
| "learning_rate": 1.712468193384224e-05, |
| "loss": 2.0332, |
| "step": 16800 |
| }, |
| { |
| "epoch": 41.92746435213887, |
| "grad_norm": 23.039844512939453, |
| "learning_rate": 1.6615776081424937e-05, |
| "loss": 2.1428, |
| "step": 16900 |
| }, |
| { |
| "epoch": 42.0, |
| "eval_loss": 0.10452839732170105, |
| "eval_runtime": 147.7082, |
| "eval_samples_per_second": 3.385, |
| "eval_steps_per_second": 0.427, |
| "eval_wer": 0.21455938697318008, |
| "step": 16930 |
| }, |
| { |
| "epoch": 42.17358958462492, |
| "grad_norm": 30.967439651489258, |
| "learning_rate": 1.6106870229007634e-05, |
| "loss": 1.9583, |
| "step": 17000 |
| }, |
| { |
| "epoch": 42.42157470551767, |
| "grad_norm": 22.957183837890625, |
| "learning_rate": 1.5597964376590334e-05, |
| "loss": 1.9154, |
| "step": 17100 |
| }, |
| { |
| "epoch": 42.66955982641041, |
| "grad_norm": 26.501638412475586, |
| "learning_rate": 1.5089058524173027e-05, |
| "loss": 1.8082, |
| "step": 17200 |
| }, |
| { |
| "epoch": 42.91754494730316, |
| "grad_norm": 21.166030883789062, |
| "learning_rate": 1.4580152671755725e-05, |
| "loss": 2.0052, |
| "step": 17300 |
| }, |
| { |
| "epoch": 43.0, |
| "eval_loss": 0.10267385095357895, |
| "eval_runtime": 148.3211, |
| "eval_samples_per_second": 3.371, |
| "eval_steps_per_second": 0.425, |
| "eval_wer": 0.21455938697318008, |
| "step": 17334 |
| }, |
| { |
| "epoch": 43.163670179789214, |
| "grad_norm": 26.231456756591797, |
| "learning_rate": 1.4071246819338423e-05, |
| "loss": 1.7369, |
| "step": 17400 |
| }, |
| { |
| "epoch": 43.41165530068196, |
| "grad_norm": 21.52410316467285, |
| "learning_rate": 1.356234096692112e-05, |
| "loss": 1.9099, |
| "step": 17500 |
| }, |
| { |
| "epoch": 43.65964042157471, |
| "grad_norm": 23.501480102539062, |
| "learning_rate": 1.3053435114503818e-05, |
| "loss": 1.7666, |
| "step": 17600 |
| }, |
| { |
| "epoch": 43.90762554246745, |
| "grad_norm": 23.56607437133789, |
| "learning_rate": 1.2544529262086516e-05, |
| "loss": 1.7204, |
| "step": 17700 |
| }, |
| { |
| "epoch": 44.0, |
| "eval_loss": 0.10307622700929642, |
| "eval_runtime": 145.6608, |
| "eval_samples_per_second": 3.433, |
| "eval_steps_per_second": 0.433, |
| "eval_wer": 0.2120542292956086, |
| "step": 17738 |
| }, |
| { |
| "epoch": 44.1537507749535, |
| "grad_norm": 26.66972541809082, |
| "learning_rate": 1.2035623409669211e-05, |
| "loss": 1.8082, |
| "step": 17800 |
| }, |
| { |
| "epoch": 44.40173589584625, |
| "grad_norm": 28.958173751831055, |
| "learning_rate": 1.152671755725191e-05, |
| "loss": 1.6666, |
| "step": 17900 |
| }, |
| { |
| "epoch": 44.649721016738994, |
| "grad_norm": 27.299480438232422, |
| "learning_rate": 1.1017811704834606e-05, |
| "loss": 1.6912, |
| "step": 18000 |
| }, |
| { |
| "epoch": 44.897706137631744, |
| "grad_norm": 23.34587860107422, |
| "learning_rate": 1.0508905852417303e-05, |
| "loss": 1.7391, |
| "step": 18100 |
| }, |
| { |
| "epoch": 45.0, |
| "eval_loss": 0.10261169075965881, |
| "eval_runtime": 148.8902, |
| "eval_samples_per_second": 3.358, |
| "eval_steps_per_second": 0.423, |
| "eval_wer": 0.2124963159445918, |
| "step": 18142 |
| }, |
| { |
| "epoch": 45.143831370117795, |
| "grad_norm": 23.987537384033203, |
| "learning_rate": 1e-05, |
| "loss": 1.6374, |
| "step": 18200 |
| }, |
| { |
| "epoch": 45.39181649101054, |
| "grad_norm": 20.715587615966797, |
| "learning_rate": 9.491094147582697e-06, |
| "loss": 1.6636, |
| "step": 18300 |
| }, |
| { |
| "epoch": 45.63980161190329, |
| "grad_norm": 16.032875061035156, |
| "learning_rate": 8.982188295165394e-06, |
| "loss": 1.6574, |
| "step": 18400 |
| }, |
| { |
| "epoch": 45.88778673279603, |
| "grad_norm": 22.555355072021484, |
| "learning_rate": 8.473282442748092e-06, |
| "loss": 1.6544, |
| "step": 18500 |
| }, |
| { |
| "epoch": 46.0, |
| "eval_loss": 0.10276778787374496, |
| "eval_runtime": 148.3048, |
| "eval_samples_per_second": 3.371, |
| "eval_steps_per_second": 0.425, |
| "eval_wer": 0.21396993810786913, |
| "step": 18546 |
| }, |
| { |
| "epoch": 46.13391196528208, |
| "grad_norm": 21.823448181152344, |
| "learning_rate": 7.96437659033079e-06, |
| "loss": 1.5898, |
| "step": 18600 |
| }, |
| { |
| "epoch": 46.38189708617483, |
| "grad_norm": 19.662864685058594, |
| "learning_rate": 7.455470737913486e-06, |
| "loss": 1.6271, |
| "step": 18700 |
| }, |
| { |
| "epoch": 46.629882207067574, |
| "grad_norm": 26.988121032714844, |
| "learning_rate": 6.9465648854961835e-06, |
| "loss": 1.5568, |
| "step": 18800 |
| }, |
| { |
| "epoch": 46.877867327960324, |
| "grad_norm": 56.02478790283203, |
| "learning_rate": 6.437659033078881e-06, |
| "loss": 1.6764, |
| "step": 18900 |
| }, |
| { |
| "epoch": 47.0, |
| "eval_loss": 0.10327400267124176, |
| "eval_runtime": 147.8139, |
| "eval_samples_per_second": 3.383, |
| "eval_steps_per_second": 0.426, |
| "eval_wer": 0.2120542292956086, |
| "step": 18950 |
| }, |
| { |
| "epoch": 47.123992560446375, |
| "grad_norm": 30.128440856933594, |
| "learning_rate": 5.928753180661578e-06, |
| "loss": 1.5139, |
| "step": 19000 |
| }, |
| { |
| "epoch": 47.37197768133912, |
| "grad_norm": 13.33277702331543, |
| "learning_rate": 5.419847328244275e-06, |
| "loss": 1.6313, |
| "step": 19100 |
| }, |
| { |
| "epoch": 47.61996280223187, |
| "grad_norm": 22.089269638061523, |
| "learning_rate": 4.910941475826972e-06, |
| "loss": 1.5804, |
| "step": 19200 |
| }, |
| { |
| "epoch": 47.86794792312461, |
| "grad_norm": 16.610151290893555, |
| "learning_rate": 4.402035623409669e-06, |
| "loss": 1.535, |
| "step": 19300 |
| }, |
| { |
| "epoch": 48.0, |
| "eval_loss": 0.1028217151761055, |
| "eval_runtime": 146.2041, |
| "eval_samples_per_second": 3.42, |
| "eval_steps_per_second": 0.431, |
| "eval_wer": 0.21220159151193635, |
| "step": 19354 |
| }, |
| { |
| "epoch": 48.11407315561066, |
| "grad_norm": 28.443222045898438, |
| "learning_rate": 3.893129770992367e-06, |
| "loss": 1.5263, |
| "step": 19400 |
| }, |
| { |
| "epoch": 48.36205827650341, |
| "grad_norm": 11.213704109191895, |
| "learning_rate": 3.3842239185750637e-06, |
| "loss": 1.5283, |
| "step": 19500 |
| }, |
| { |
| "epoch": 48.610043397396154, |
| "grad_norm": 15.313924789428711, |
| "learning_rate": 2.875318066157761e-06, |
| "loss": 1.4908, |
| "step": 19600 |
| }, |
| { |
| "epoch": 48.858028518288904, |
| "grad_norm": 17.469341278076172, |
| "learning_rate": 2.366412213740458e-06, |
| "loss": 1.5344, |
| "step": 19700 |
| }, |
| { |
| "epoch": 49.0, |
| "eval_loss": 0.1024635061621666, |
| "eval_runtime": 149.6385, |
| "eval_samples_per_second": 3.341, |
| "eval_steps_per_second": 0.421, |
| "eval_wer": 0.2163277335691129, |
| "step": 19758 |
| }, |
| { |
| "epoch": 49.104153750774955, |
| "grad_norm": 19.84515380859375, |
| "learning_rate": 1.8575063613231552e-06, |
| "loss": 1.4737, |
| "step": 19800 |
| }, |
| { |
| "epoch": 49.3521388716677, |
| "grad_norm": 14.313343048095703, |
| "learning_rate": 1.3486005089058526e-06, |
| "loss": 1.5536, |
| "step": 19900 |
| }, |
| { |
| "epoch": 49.60012399256045, |
| "grad_norm": 13.140170097351074, |
| "learning_rate": 8.396946564885497e-07, |
| "loss": 1.4557, |
| "step": 20000 |
| }, |
| { |
| "epoch": 49.84810911345319, |
| "grad_norm": 17.50598907470703, |
| "learning_rate": 3.3078880407124687e-07, |
| "loss": 1.5171, |
| "step": 20100 |
| }, |
| { |
| "epoch": 49.972101673899566, |
| "eval_loss": 0.10252001881599426, |
| "eval_runtime": 149.0596, |
| "eval_samples_per_second": 3.354, |
| "eval_steps_per_second": 0.423, |
| "eval_wer": 0.2120542292956086, |
| "step": 20150 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 20150, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 50, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.61896007057408e+19, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|