| { |
| "best_global_step": 1000, |
| "best_metric": 57.38794435857806, |
| "best_model_checkpoint": "./whisper-small-sdn-2025/asr_training_runs/cycle_3/checkpoint-1000", |
| "epoch": 8.460236886632826, |
| "eval_steps": 1000, |
| "global_step": 5000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04230118443316413, |
| "grad_norm": 22.13687515258789, |
| "learning_rate": 9.960000000000001e-06, |
| "loss": 2.9507, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.08460236886632826, |
| "grad_norm": 22.51654815673828, |
| "learning_rate": 9.91e-06, |
| "loss": 1.7271, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.12690355329949238, |
| "grad_norm": 23.82666015625, |
| "learning_rate": 9.86e-06, |
| "loss": 1.5723, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.1692047377326565, |
| "grad_norm": 20.517061233520508, |
| "learning_rate": 9.810000000000001e-06, |
| "loss": 1.4073, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.21150592216582065, |
| "grad_norm": 20.304147720336914, |
| "learning_rate": 9.760000000000001e-06, |
| "loss": 1.3958, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.25380710659898476, |
| "grad_norm": 18.278545379638672, |
| "learning_rate": 9.71e-06, |
| "loss": 1.2917, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.2961082910321489, |
| "grad_norm": 12.573765754699707, |
| "learning_rate": 9.66e-06, |
| "loss": 1.0512, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.338409475465313, |
| "grad_norm": 19.825847625732422, |
| "learning_rate": 9.610000000000001e-06, |
| "loss": 1.0473, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.38071065989847713, |
| "grad_norm": 20.143924713134766, |
| "learning_rate": 9.56e-06, |
| "loss": 0.943, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.4230118443316413, |
| "grad_norm": 23.05994415283203, |
| "learning_rate": 9.51e-06, |
| "loss": 0.9658, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.4653130287648054, |
| "grad_norm": 19.993017196655273, |
| "learning_rate": 9.460000000000001e-06, |
| "loss": 1.0455, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.5076142131979695, |
| "grad_norm": 21.776580810546875, |
| "learning_rate": 9.41e-06, |
| "loss": 0.9146, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.5499153976311336, |
| "grad_norm": 23.989397048950195, |
| "learning_rate": 9.360000000000002e-06, |
| "loss": 0.8029, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.5922165820642978, |
| "grad_norm": 12.74024772644043, |
| "learning_rate": 9.31e-06, |
| "loss": 0.9284, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.6345177664974619, |
| "grad_norm": 15.003218650817871, |
| "learning_rate": 9.260000000000001e-06, |
| "loss": 0.8649, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.676818950930626, |
| "grad_norm": 18.662107467651367, |
| "learning_rate": 9.210000000000002e-06, |
| "loss": 0.8373, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.7191201353637902, |
| "grad_norm": 17.71957015991211, |
| "learning_rate": 9.16e-06, |
| "loss": 0.8147, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.7614213197969543, |
| "grad_norm": 13.40419864654541, |
| "learning_rate": 9.110000000000001e-06, |
| "loss": 0.918, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.8037225042301185, |
| "grad_norm": 21.269617080688477, |
| "learning_rate": 9.060000000000001e-06, |
| "loss": 0.8491, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.8460236886632826, |
| "grad_norm": 22.920703887939453, |
| "learning_rate": 9.01e-06, |
| "loss": 0.7514, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.8883248730964467, |
| "grad_norm": 14.403526306152344, |
| "learning_rate": 8.96e-06, |
| "loss": 0.7853, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.9306260575296108, |
| "grad_norm": 19.039899826049805, |
| "learning_rate": 8.910000000000001e-06, |
| "loss": 0.8512, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.9729272419627749, |
| "grad_norm": 14.437999725341797, |
| "learning_rate": 8.860000000000002e-06, |
| "loss": 0.7155, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.015228426395939, |
| "grad_norm": 11.453521728515625, |
| "learning_rate": 8.81e-06, |
| "loss": 0.7486, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.0575296108291032, |
| "grad_norm": 14.921658515930176, |
| "learning_rate": 8.76e-06, |
| "loss": 0.4925, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.0998307952622675, |
| "grad_norm": 13.061800003051758, |
| "learning_rate": 8.710000000000001e-06, |
| "loss": 0.5556, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.1421319796954315, |
| "grad_norm": 19.10222816467285, |
| "learning_rate": 8.66e-06, |
| "loss": 0.6538, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.1844331641285957, |
| "grad_norm": 13.2350435256958, |
| "learning_rate": 8.61e-06, |
| "loss": 0.5601, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.2267343485617597, |
| "grad_norm": 11.798190116882324, |
| "learning_rate": 8.560000000000001e-06, |
| "loss": 0.5892, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.2690355329949239, |
| "grad_norm": 9.492058753967285, |
| "learning_rate": 8.51e-06, |
| "loss": 0.5997, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.3113367174280879, |
| "grad_norm": 9.729185104370117, |
| "learning_rate": 8.46e-06, |
| "loss": 0.5505, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.353637901861252, |
| "grad_norm": 15.062725067138672, |
| "learning_rate": 8.41e-06, |
| "loss": 0.5853, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.3959390862944163, |
| "grad_norm": 15.928922653198242, |
| "learning_rate": 8.36e-06, |
| "loss": 0.5996, |
| "step": 825 |
| }, |
| { |
| "epoch": 1.4382402707275803, |
| "grad_norm": 10.66003131866455, |
| "learning_rate": 8.31e-06, |
| "loss": 0.5575, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.4805414551607445, |
| "grad_norm": 10.623987197875977, |
| "learning_rate": 8.26e-06, |
| "loss": 0.6829, |
| "step": 875 |
| }, |
| { |
| "epoch": 1.5228426395939088, |
| "grad_norm": 12.859821319580078, |
| "learning_rate": 8.210000000000001e-06, |
| "loss": 0.5767, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.5651438240270727, |
| "grad_norm": 10.011828422546387, |
| "learning_rate": 8.16e-06, |
| "loss": 0.5729, |
| "step": 925 |
| }, |
| { |
| "epoch": 1.6074450084602367, |
| "grad_norm": 9.297901153564453, |
| "learning_rate": 8.110000000000002e-06, |
| "loss": 0.4935, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.649746192893401, |
| "grad_norm": 12.029204368591309, |
| "learning_rate": 8.06e-06, |
| "loss": 0.6019, |
| "step": 975 |
| }, |
| { |
| "epoch": 1.6920473773265652, |
| "grad_norm": 15.14245891571045, |
| "learning_rate": 8.010000000000001e-06, |
| "loss": 0.5357, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.6920473773265652, |
| "eval_loss": 0.7350032329559326, |
| "eval_runtime": 267.1085, |
| "eval_samples_per_second": 1.965, |
| "eval_steps_per_second": 0.247, |
| "eval_wer": 57.38794435857806, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.7343485617597292, |
| "grad_norm": 14.227155685424805, |
| "learning_rate": 7.960000000000002e-06, |
| "loss": 0.59, |
| "step": 1025 |
| }, |
| { |
| "epoch": 1.7766497461928934, |
| "grad_norm": 18.895673751831055, |
| "learning_rate": 7.91e-06, |
| "loss": 0.5216, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.8189509306260576, |
| "grad_norm": 9.782660484313965, |
| "learning_rate": 7.860000000000001e-06, |
| "loss": 0.5936, |
| "step": 1075 |
| }, |
| { |
| "epoch": 1.8612521150592216, |
| "grad_norm": 12.076578140258789, |
| "learning_rate": 7.810000000000001e-06, |
| "loss": 0.5369, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.9035532994923858, |
| "grad_norm": 15.570003509521484, |
| "learning_rate": 7.76e-06, |
| "loss": 0.4853, |
| "step": 1125 |
| }, |
| { |
| "epoch": 1.94585448392555, |
| "grad_norm": 12.804927825927734, |
| "learning_rate": 7.71e-06, |
| "loss": 0.5245, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.988155668358714, |
| "grad_norm": 12.23946475982666, |
| "learning_rate": 7.660000000000001e-06, |
| "loss": 0.6461, |
| "step": 1175 |
| }, |
| { |
| "epoch": 2.030456852791878, |
| "grad_norm": 9.39681625366211, |
| "learning_rate": 7.610000000000001e-06, |
| "loss": 0.448, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.0727580372250425, |
| "grad_norm": 17.55742073059082, |
| "learning_rate": 7.5600000000000005e-06, |
| "loss": 0.2919, |
| "step": 1225 |
| }, |
| { |
| "epoch": 2.1150592216582065, |
| "grad_norm": 17.609060287475586, |
| "learning_rate": 7.510000000000001e-06, |
| "loss": 0.34, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.1573604060913705, |
| "grad_norm": 7.253561973571777, |
| "learning_rate": 7.4600000000000006e-06, |
| "loss": 0.3617, |
| "step": 1275 |
| }, |
| { |
| "epoch": 2.199661590524535, |
| "grad_norm": 9.8511381149292, |
| "learning_rate": 7.41e-06, |
| "loss": 0.3066, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.241962774957699, |
| "grad_norm": 6.805290222167969, |
| "learning_rate": 7.360000000000001e-06, |
| "loss": 0.3451, |
| "step": 1325 |
| }, |
| { |
| "epoch": 2.284263959390863, |
| "grad_norm": 12.156089782714844, |
| "learning_rate": 7.31e-06, |
| "loss": 0.3457, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.326565143824027, |
| "grad_norm": 7.368825912475586, |
| "learning_rate": 7.260000000000001e-06, |
| "loss": 0.3957, |
| "step": 1375 |
| }, |
| { |
| "epoch": 2.3688663282571913, |
| "grad_norm": 10.008464813232422, |
| "learning_rate": 7.2100000000000004e-06, |
| "loss": 0.4248, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.4111675126903553, |
| "grad_norm": 8.685702323913574, |
| "learning_rate": 7.16e-06, |
| "loss": 0.3268, |
| "step": 1425 |
| }, |
| { |
| "epoch": 2.4534686971235193, |
| "grad_norm": 16.325767517089844, |
| "learning_rate": 7.1100000000000005e-06, |
| "loss": 0.3777, |
| "step": 1450 |
| }, |
| { |
| "epoch": 2.495769881556684, |
| "grad_norm": 7.86500883102417, |
| "learning_rate": 7.06e-06, |
| "loss": 0.3798, |
| "step": 1475 |
| }, |
| { |
| "epoch": 2.5380710659898478, |
| "grad_norm": 7.600804328918457, |
| "learning_rate": 7.01e-06, |
| "loss": 0.3439, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.5803722504230118, |
| "grad_norm": 14.633502006530762, |
| "learning_rate": 6.96e-06, |
| "loss": 0.485, |
| "step": 1525 |
| }, |
| { |
| "epoch": 2.6226734348561758, |
| "grad_norm": 7.198423862457275, |
| "learning_rate": 6.91e-06, |
| "loss": 0.3575, |
| "step": 1550 |
| }, |
| { |
| "epoch": 2.66497461928934, |
| "grad_norm": 16.913612365722656, |
| "learning_rate": 6.860000000000001e-06, |
| "loss": 0.3753, |
| "step": 1575 |
| }, |
| { |
| "epoch": 2.707275803722504, |
| "grad_norm": 11.978515625, |
| "learning_rate": 6.810000000000001e-06, |
| "loss": 0.3621, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.749576988155668, |
| "grad_norm": 8.528421401977539, |
| "learning_rate": 6.760000000000001e-06, |
| "loss": 0.4298, |
| "step": 1625 |
| }, |
| { |
| "epoch": 2.7918781725888326, |
| "grad_norm": 13.898110389709473, |
| "learning_rate": 6.710000000000001e-06, |
| "loss": 0.3661, |
| "step": 1650 |
| }, |
| { |
| "epoch": 2.8341793570219966, |
| "grad_norm": 10.405202865600586, |
| "learning_rate": 6.660000000000001e-06, |
| "loss": 0.3456, |
| "step": 1675 |
| }, |
| { |
| "epoch": 2.8764805414551606, |
| "grad_norm": 9.385127067565918, |
| "learning_rate": 6.610000000000001e-06, |
| "loss": 0.3622, |
| "step": 1700 |
| }, |
| { |
| "epoch": 2.9187817258883246, |
| "grad_norm": 13.76083755493164, |
| "learning_rate": 6.560000000000001e-06, |
| "loss": 0.3024, |
| "step": 1725 |
| }, |
| { |
| "epoch": 2.961082910321489, |
| "grad_norm": 8.907617568969727, |
| "learning_rate": 6.51e-06, |
| "loss": 0.2823, |
| "step": 1750 |
| }, |
| { |
| "epoch": 3.003384094754653, |
| "grad_norm": 5.701330661773682, |
| "learning_rate": 6.460000000000001e-06, |
| "loss": 0.3449, |
| "step": 1775 |
| }, |
| { |
| "epoch": 3.045685279187817, |
| "grad_norm": 12.139259338378906, |
| "learning_rate": 6.4100000000000005e-06, |
| "loss": 0.2832, |
| "step": 1800 |
| }, |
| { |
| "epoch": 3.0879864636209815, |
| "grad_norm": 7.493695259094238, |
| "learning_rate": 6.360000000000001e-06, |
| "loss": 0.246, |
| "step": 1825 |
| }, |
| { |
| "epoch": 3.1302876480541455, |
| "grad_norm": 9.878129959106445, |
| "learning_rate": 6.3100000000000006e-06, |
| "loss": 0.1979, |
| "step": 1850 |
| }, |
| { |
| "epoch": 3.1725888324873095, |
| "grad_norm": 11.870550155639648, |
| "learning_rate": 6.26e-06, |
| "loss": 0.2227, |
| "step": 1875 |
| }, |
| { |
| "epoch": 3.214890016920474, |
| "grad_norm": 9.061797142028809, |
| "learning_rate": 6.210000000000001e-06, |
| "loss": 0.1731, |
| "step": 1900 |
| }, |
| { |
| "epoch": 3.257191201353638, |
| "grad_norm": 10.5235013961792, |
| "learning_rate": 6.16e-06, |
| "loss": 0.2162, |
| "step": 1925 |
| }, |
| { |
| "epoch": 3.299492385786802, |
| "grad_norm": 11.65762996673584, |
| "learning_rate": 6.110000000000001e-06, |
| "loss": 0.2505, |
| "step": 1950 |
| }, |
| { |
| "epoch": 3.3417935702199664, |
| "grad_norm": 7.416925430297852, |
| "learning_rate": 6.0600000000000004e-06, |
| "loss": 0.2204, |
| "step": 1975 |
| }, |
| { |
| "epoch": 3.3840947546531304, |
| "grad_norm": 8.655340194702148, |
| "learning_rate": 6.01e-06, |
| "loss": 0.2169, |
| "step": 2000 |
| }, |
| { |
| "epoch": 3.3840947546531304, |
| "eval_loss": 0.732398271560669, |
| "eval_runtime": 271.7409, |
| "eval_samples_per_second": 1.932, |
| "eval_steps_per_second": 0.243, |
| "eval_wer": 59.58268933539412, |
| "step": 2000 |
| }, |
| { |
| "epoch": 3.4263959390862944, |
| "grad_norm": 8.720595359802246, |
| "learning_rate": 5.9600000000000005e-06, |
| "loss": 0.2475, |
| "step": 2025 |
| }, |
| { |
| "epoch": 3.4686971235194584, |
| "grad_norm": 10.252331733703613, |
| "learning_rate": 5.91e-06, |
| "loss": 0.229, |
| "step": 2050 |
| }, |
| { |
| "epoch": 3.510998307952623, |
| "grad_norm": 7.533905982971191, |
| "learning_rate": 5.86e-06, |
| "loss": 0.1953, |
| "step": 2075 |
| }, |
| { |
| "epoch": 3.553299492385787, |
| "grad_norm": 5.134809970855713, |
| "learning_rate": 5.81e-06, |
| "loss": 0.2581, |
| "step": 2100 |
| }, |
| { |
| "epoch": 3.595600676818951, |
| "grad_norm": 8.298554420471191, |
| "learning_rate": 5.76e-06, |
| "loss": 0.242, |
| "step": 2125 |
| }, |
| { |
| "epoch": 3.6379018612521152, |
| "grad_norm": 13.200987815856934, |
| "learning_rate": 5.71e-06, |
| "loss": 0.2015, |
| "step": 2150 |
| }, |
| { |
| "epoch": 3.6802030456852792, |
| "grad_norm": 10.58892822265625, |
| "learning_rate": 5.66e-06, |
| "loss": 0.2273, |
| "step": 2175 |
| }, |
| { |
| "epoch": 3.7225042301184432, |
| "grad_norm": 8.217840194702148, |
| "learning_rate": 5.610000000000001e-06, |
| "loss": 0.1863, |
| "step": 2200 |
| }, |
| { |
| "epoch": 3.764805414551607, |
| "grad_norm": 9.060100555419922, |
| "learning_rate": 5.560000000000001e-06, |
| "loss": 0.2072, |
| "step": 2225 |
| }, |
| { |
| "epoch": 3.8071065989847717, |
| "grad_norm": 7.986787796020508, |
| "learning_rate": 5.510000000000001e-06, |
| "loss": 0.2047, |
| "step": 2250 |
| }, |
| { |
| "epoch": 3.8494077834179357, |
| "grad_norm": 4.51170539855957, |
| "learning_rate": 5.460000000000001e-06, |
| "loss": 0.2277, |
| "step": 2275 |
| }, |
| { |
| "epoch": 3.8917089678511, |
| "grad_norm": 5.082483768463135, |
| "learning_rate": 5.410000000000001e-06, |
| "loss": 0.1969, |
| "step": 2300 |
| }, |
| { |
| "epoch": 3.934010152284264, |
| "grad_norm": 9.10048770904541, |
| "learning_rate": 5.36e-06, |
| "loss": 0.2005, |
| "step": 2325 |
| }, |
| { |
| "epoch": 3.976311336717428, |
| "grad_norm": 13.18527603149414, |
| "learning_rate": 5.310000000000001e-06, |
| "loss": 0.1891, |
| "step": 2350 |
| }, |
| { |
| "epoch": 4.018612521150592, |
| "grad_norm": 3.0822834968566895, |
| "learning_rate": 5.2600000000000005e-06, |
| "loss": 0.2137, |
| "step": 2375 |
| }, |
| { |
| "epoch": 4.060913705583756, |
| "grad_norm": 4.320342063903809, |
| "learning_rate": 5.210000000000001e-06, |
| "loss": 0.1121, |
| "step": 2400 |
| }, |
| { |
| "epoch": 4.10321489001692, |
| "grad_norm": 6.486440658569336, |
| "learning_rate": 5.1600000000000006e-06, |
| "loss": 0.1079, |
| "step": 2425 |
| }, |
| { |
| "epoch": 4.145516074450085, |
| "grad_norm": 6.29085636138916, |
| "learning_rate": 5.11e-06, |
| "loss": 0.1307, |
| "step": 2450 |
| }, |
| { |
| "epoch": 4.187817258883249, |
| "grad_norm": 7.327591896057129, |
| "learning_rate": 5.060000000000001e-06, |
| "loss": 0.1085, |
| "step": 2475 |
| }, |
| { |
| "epoch": 4.230118443316413, |
| "grad_norm": 10.163105964660645, |
| "learning_rate": 5.01e-06, |
| "loss": 0.1479, |
| "step": 2500 |
| }, |
| { |
| "epoch": 4.272419627749577, |
| "grad_norm": 5.800470352172852, |
| "learning_rate": 4.960000000000001e-06, |
| "loss": 0.164, |
| "step": 2525 |
| }, |
| { |
| "epoch": 4.314720812182741, |
| "grad_norm": 5.962683200836182, |
| "learning_rate": 4.9100000000000004e-06, |
| "loss": 0.1091, |
| "step": 2550 |
| }, |
| { |
| "epoch": 4.357021996615905, |
| "grad_norm": 1.9525275230407715, |
| "learning_rate": 4.86e-06, |
| "loss": 0.1158, |
| "step": 2575 |
| }, |
| { |
| "epoch": 4.39932318104907, |
| "grad_norm": 3.1240804195404053, |
| "learning_rate": 4.8100000000000005e-06, |
| "loss": 0.1322, |
| "step": 2600 |
| }, |
| { |
| "epoch": 4.441624365482234, |
| "grad_norm": 7.414096832275391, |
| "learning_rate": 4.76e-06, |
| "loss": 0.1061, |
| "step": 2625 |
| }, |
| { |
| "epoch": 4.483925549915398, |
| "grad_norm": 3.900723695755005, |
| "learning_rate": 4.71e-06, |
| "loss": 0.1218, |
| "step": 2650 |
| }, |
| { |
| "epoch": 4.526226734348562, |
| "grad_norm": 4.022332668304443, |
| "learning_rate": 4.66e-06, |
| "loss": 0.129, |
| "step": 2675 |
| }, |
| { |
| "epoch": 4.568527918781726, |
| "grad_norm": 6.424135208129883, |
| "learning_rate": 4.610000000000001e-06, |
| "loss": 0.1413, |
| "step": 2700 |
| }, |
| { |
| "epoch": 4.61082910321489, |
| "grad_norm": 6.727682590484619, |
| "learning_rate": 4.56e-06, |
| "loss": 0.1536, |
| "step": 2725 |
| }, |
| { |
| "epoch": 4.653130287648054, |
| "grad_norm": 3.917060613632202, |
| "learning_rate": 4.510000000000001e-06, |
| "loss": 0.1514, |
| "step": 2750 |
| }, |
| { |
| "epoch": 4.695431472081218, |
| "grad_norm": 5.873462677001953, |
| "learning_rate": 4.4600000000000005e-06, |
| "loss": 0.1116, |
| "step": 2775 |
| }, |
| { |
| "epoch": 4.737732656514383, |
| "grad_norm": 6.992873191833496, |
| "learning_rate": 4.41e-06, |
| "loss": 0.1474, |
| "step": 2800 |
| }, |
| { |
| "epoch": 4.780033840947547, |
| "grad_norm": 3.4381256103515625, |
| "learning_rate": 4.360000000000001e-06, |
| "loss": 0.1335, |
| "step": 2825 |
| }, |
| { |
| "epoch": 4.822335025380711, |
| "grad_norm": 12.713067054748535, |
| "learning_rate": 4.31e-06, |
| "loss": 0.1728, |
| "step": 2850 |
| }, |
| { |
| "epoch": 4.864636209813875, |
| "grad_norm": 9.53136920928955, |
| "learning_rate": 4.26e-06, |
| "loss": 0.1547, |
| "step": 2875 |
| }, |
| { |
| "epoch": 4.906937394247039, |
| "grad_norm": 4.442587375640869, |
| "learning_rate": 4.21e-06, |
| "loss": 0.1765, |
| "step": 2900 |
| }, |
| { |
| "epoch": 4.949238578680203, |
| "grad_norm": 6.532963275909424, |
| "learning_rate": 4.16e-06, |
| "loss": 0.1153, |
| "step": 2925 |
| }, |
| { |
| "epoch": 4.991539763113368, |
| "grad_norm": 5.798689842224121, |
| "learning_rate": 4.1100000000000005e-06, |
| "loss": 0.1529, |
| "step": 2950 |
| }, |
| { |
| "epoch": 5.0338409475465316, |
| "grad_norm": 13.38257122039795, |
| "learning_rate": 4.060000000000001e-06, |
| "loss": 0.0865, |
| "step": 2975 |
| }, |
| { |
| "epoch": 5.0761421319796955, |
| "grad_norm": 4.9945220947265625, |
| "learning_rate": 4.0100000000000006e-06, |
| "loss": 0.0917, |
| "step": 3000 |
| }, |
| { |
| "epoch": 5.0761421319796955, |
| "eval_loss": 0.7850525975227356, |
| "eval_runtime": 265.9428, |
| "eval_samples_per_second": 1.974, |
| "eval_steps_per_second": 0.248, |
| "eval_wer": 67.32612055641421, |
| "step": 3000 |
| }, |
| { |
| "epoch": 5.1184433164128595, |
| "grad_norm": 6.642792701721191, |
| "learning_rate": 3.96e-06, |
| "loss": 0.0646, |
| "step": 3025 |
| }, |
| { |
| "epoch": 5.1607445008460235, |
| "grad_norm": 4.741107940673828, |
| "learning_rate": 3.910000000000001e-06, |
| "loss": 0.0566, |
| "step": 3050 |
| }, |
| { |
| "epoch": 5.2030456852791875, |
| "grad_norm": 1.2372127771377563, |
| "learning_rate": 3.86e-06, |
| "loss": 0.0681, |
| "step": 3075 |
| }, |
| { |
| "epoch": 5.2453468697123515, |
| "grad_norm": 6.571516513824463, |
| "learning_rate": 3.8100000000000004e-06, |
| "loss": 0.1036, |
| "step": 3100 |
| }, |
| { |
| "epoch": 5.287648054145516, |
| "grad_norm": 2.1475963592529297, |
| "learning_rate": 3.7600000000000004e-06, |
| "loss": 0.0645, |
| "step": 3125 |
| }, |
| { |
| "epoch": 5.32994923857868, |
| "grad_norm": 1.6281226873397827, |
| "learning_rate": 3.7100000000000005e-06, |
| "loss": 0.0592, |
| "step": 3150 |
| }, |
| { |
| "epoch": 5.372250423011844, |
| "grad_norm": 1.886271357536316, |
| "learning_rate": 3.66e-06, |
| "loss": 0.0762, |
| "step": 3175 |
| }, |
| { |
| "epoch": 5.414551607445008, |
| "grad_norm": 5.298330783843994, |
| "learning_rate": 3.61e-06, |
| "loss": 0.0731, |
| "step": 3200 |
| }, |
| { |
| "epoch": 5.456852791878172, |
| "grad_norm": 2.7591044902801514, |
| "learning_rate": 3.5600000000000002e-06, |
| "loss": 0.0642, |
| "step": 3225 |
| }, |
| { |
| "epoch": 5.499153976311336, |
| "grad_norm": 4.665078163146973, |
| "learning_rate": 3.5100000000000003e-06, |
| "loss": 0.0785, |
| "step": 3250 |
| }, |
| { |
| "epoch": 5.541455160744501, |
| "grad_norm": 1.6366431713104248, |
| "learning_rate": 3.46e-06, |
| "loss": 0.0953, |
| "step": 3275 |
| }, |
| { |
| "epoch": 5.583756345177665, |
| "grad_norm": 6.781162738800049, |
| "learning_rate": 3.4100000000000004e-06, |
| "loss": 0.0529, |
| "step": 3300 |
| }, |
| { |
| "epoch": 5.626057529610829, |
| "grad_norm": 7.282459735870361, |
| "learning_rate": 3.3600000000000004e-06, |
| "loss": 0.0741, |
| "step": 3325 |
| }, |
| { |
| "epoch": 5.668358714043993, |
| "grad_norm": 2.2602906227111816, |
| "learning_rate": 3.3100000000000005e-06, |
| "loss": 0.0962, |
| "step": 3350 |
| }, |
| { |
| "epoch": 5.710659898477157, |
| "grad_norm": 7.06923246383667, |
| "learning_rate": 3.2600000000000006e-06, |
| "loss": 0.088, |
| "step": 3375 |
| }, |
| { |
| "epoch": 5.752961082910321, |
| "grad_norm": 1.4087145328521729, |
| "learning_rate": 3.21e-06, |
| "loss": 0.0935, |
| "step": 3400 |
| }, |
| { |
| "epoch": 5.795262267343485, |
| "grad_norm": 5.96427059173584, |
| "learning_rate": 3.1600000000000002e-06, |
| "loss": 0.0465, |
| "step": 3425 |
| }, |
| { |
| "epoch": 5.837563451776649, |
| "grad_norm": 7.267573833465576, |
| "learning_rate": 3.1100000000000003e-06, |
| "loss": 0.0977, |
| "step": 3450 |
| }, |
| { |
| "epoch": 5.879864636209814, |
| "grad_norm": 4.4437384605407715, |
| "learning_rate": 3.0600000000000003e-06, |
| "loss": 0.0644, |
| "step": 3475 |
| }, |
| { |
| "epoch": 5.922165820642978, |
| "grad_norm": 7.447219371795654, |
| "learning_rate": 3.01e-06, |
| "loss": 0.0699, |
| "step": 3500 |
| }, |
| { |
| "epoch": 5.964467005076142, |
| "grad_norm": 3.2096688747406006, |
| "learning_rate": 2.96e-06, |
| "loss": 0.0878, |
| "step": 3525 |
| }, |
| { |
| "epoch": 6.006768189509306, |
| "grad_norm": 7.191064357757568, |
| "learning_rate": 2.91e-06, |
| "loss": 0.0815, |
| "step": 3550 |
| }, |
| { |
| "epoch": 6.04906937394247, |
| "grad_norm": 6.0771989822387695, |
| "learning_rate": 2.86e-06, |
| "loss": 0.0471, |
| "step": 3575 |
| }, |
| { |
| "epoch": 6.091370558375634, |
| "grad_norm": 11.324472427368164, |
| "learning_rate": 2.8100000000000006e-06, |
| "loss": 0.0509, |
| "step": 3600 |
| }, |
| { |
| "epoch": 6.133671742808799, |
| "grad_norm": 4.35711145401001, |
| "learning_rate": 2.7600000000000003e-06, |
| "loss": 0.0525, |
| "step": 3625 |
| }, |
| { |
| "epoch": 6.175972927241963, |
| "grad_norm": 5.396825790405273, |
| "learning_rate": 2.7100000000000003e-06, |
| "loss": 0.0508, |
| "step": 3650 |
| }, |
| { |
| "epoch": 6.218274111675127, |
| "grad_norm": 0.9582346081733704, |
| "learning_rate": 2.6600000000000004e-06, |
| "loss": 0.0391, |
| "step": 3675 |
| }, |
| { |
| "epoch": 6.260575296108291, |
| "grad_norm": 2.5615832805633545, |
| "learning_rate": 2.6100000000000004e-06, |
| "loss": 0.0557, |
| "step": 3700 |
| }, |
| { |
| "epoch": 6.302876480541455, |
| "grad_norm": 7.233354568481445, |
| "learning_rate": 2.56e-06, |
| "loss": 0.0255, |
| "step": 3725 |
| }, |
| { |
| "epoch": 6.345177664974619, |
| "grad_norm": 1.0129377841949463, |
| "learning_rate": 2.51e-06, |
| "loss": 0.0313, |
| "step": 3750 |
| }, |
| { |
| "epoch": 6.387478849407783, |
| "grad_norm": 3.359922170639038, |
| "learning_rate": 2.46e-06, |
| "loss": 0.037, |
| "step": 3775 |
| }, |
| { |
| "epoch": 6.429780033840948, |
| "grad_norm": 4.721961498260498, |
| "learning_rate": 2.4100000000000002e-06, |
| "loss": 0.0455, |
| "step": 3800 |
| }, |
| { |
| "epoch": 6.472081218274112, |
| "grad_norm": 5.72802734375, |
| "learning_rate": 2.3600000000000003e-06, |
| "loss": 0.0403, |
| "step": 3825 |
| }, |
| { |
| "epoch": 6.514382402707276, |
| "grad_norm": 2.1109373569488525, |
| "learning_rate": 2.3100000000000003e-06, |
| "loss": 0.0572, |
| "step": 3850 |
| }, |
| { |
| "epoch": 6.55668358714044, |
| "grad_norm": 11.605259895324707, |
| "learning_rate": 2.2600000000000004e-06, |
| "loss": 0.04, |
| "step": 3875 |
| }, |
| { |
| "epoch": 6.598984771573604, |
| "grad_norm": 1.7446305751800537, |
| "learning_rate": 2.21e-06, |
| "loss": 0.0432, |
| "step": 3900 |
| }, |
| { |
| "epoch": 6.641285956006768, |
| "grad_norm": 1.7428556680679321, |
| "learning_rate": 2.16e-06, |
| "loss": 0.0438, |
| "step": 3925 |
| }, |
| { |
| "epoch": 6.683587140439933, |
| "grad_norm": 2.7987313270568848, |
| "learning_rate": 2.11e-06, |
| "loss": 0.0379, |
| "step": 3950 |
| }, |
| { |
| "epoch": 6.725888324873097, |
| "grad_norm": 0.8822634220123291, |
| "learning_rate": 2.06e-06, |
| "loss": 0.0262, |
| "step": 3975 |
| }, |
| { |
| "epoch": 6.768189509306261, |
| "grad_norm": 1.6173559427261353, |
| "learning_rate": 2.0100000000000002e-06, |
| "loss": 0.0399, |
| "step": 4000 |
| }, |
| { |
| "epoch": 6.768189509306261, |
| "eval_loss": 0.8369417190551758, |
| "eval_runtime": 268.5703, |
| "eval_samples_per_second": 1.955, |
| "eval_steps_per_second": 0.246, |
| "eval_wer": 69.36630602782071, |
| "step": 4000 |
| }, |
| { |
| "epoch": 6.810490693739425, |
| "grad_norm": 7.732959747314453, |
| "learning_rate": 1.9600000000000003e-06, |
| "loss": 0.0537, |
| "step": 4025 |
| }, |
| { |
| "epoch": 6.852791878172589, |
| "grad_norm": 2.3094513416290283, |
| "learning_rate": 1.9100000000000003e-06, |
| "loss": 0.0409, |
| "step": 4050 |
| }, |
| { |
| "epoch": 6.895093062605753, |
| "grad_norm": 7.310888767242432, |
| "learning_rate": 1.8600000000000002e-06, |
| "loss": 0.0326, |
| "step": 4075 |
| }, |
| { |
| "epoch": 6.937394247038917, |
| "grad_norm": 5.490383625030518, |
| "learning_rate": 1.81e-06, |
| "loss": 0.0494, |
| "step": 4100 |
| }, |
| { |
| "epoch": 6.979695431472082, |
| "grad_norm": 3.2482218742370605, |
| "learning_rate": 1.76e-06, |
| "loss": 0.0552, |
| "step": 4125 |
| }, |
| { |
| "epoch": 7.021996615905246, |
| "grad_norm": 3.474656343460083, |
| "learning_rate": 1.7100000000000004e-06, |
| "loss": 0.036, |
| "step": 4150 |
| }, |
| { |
| "epoch": 7.06429780033841, |
| "grad_norm": 5.680706024169922, |
| "learning_rate": 1.6600000000000002e-06, |
| "loss": 0.0281, |
| "step": 4175 |
| }, |
| { |
| "epoch": 7.106598984771574, |
| "grad_norm": 2.636260747909546, |
| "learning_rate": 1.6100000000000003e-06, |
| "loss": 0.0191, |
| "step": 4200 |
| }, |
| { |
| "epoch": 7.148900169204738, |
| "grad_norm": 0.4349760413169861, |
| "learning_rate": 1.56e-06, |
| "loss": 0.0229, |
| "step": 4225 |
| }, |
| { |
| "epoch": 7.191201353637902, |
| "grad_norm": 1.194022297859192, |
| "learning_rate": 1.5100000000000002e-06, |
| "loss": 0.024, |
| "step": 4250 |
| }, |
| { |
| "epoch": 7.233502538071066, |
| "grad_norm": 3.210624933242798, |
| "learning_rate": 1.46e-06, |
| "loss": 0.0198, |
| "step": 4275 |
| }, |
| { |
| "epoch": 7.2758037225042305, |
| "grad_norm": 6.436493873596191, |
| "learning_rate": 1.41e-06, |
| "loss": 0.0322, |
| "step": 4300 |
| }, |
| { |
| "epoch": 7.3181049069373945, |
| "grad_norm": 2.0612590312957764, |
| "learning_rate": 1.3600000000000001e-06, |
| "loss": 0.028, |
| "step": 4325 |
| }, |
| { |
| "epoch": 7.3604060913705585, |
| "grad_norm": 0.4659048318862915, |
| "learning_rate": 1.3100000000000002e-06, |
| "loss": 0.0275, |
| "step": 4350 |
| }, |
| { |
| "epoch": 7.4027072758037225, |
| "grad_norm": 0.9324387907981873, |
| "learning_rate": 1.26e-06, |
| "loss": 0.0198, |
| "step": 4375 |
| }, |
| { |
| "epoch": 7.4450084602368864, |
| "grad_norm": 0.6756917834281921, |
| "learning_rate": 1.21e-06, |
| "loss": 0.0135, |
| "step": 4400 |
| }, |
| { |
| "epoch": 7.4873096446700504, |
| "grad_norm": 0.6344797015190125, |
| "learning_rate": 1.1600000000000001e-06, |
| "loss": 0.02, |
| "step": 4425 |
| }, |
| { |
| "epoch": 7.529610829103214, |
| "grad_norm": 2.675487756729126, |
| "learning_rate": 1.1100000000000002e-06, |
| "loss": 0.0203, |
| "step": 4450 |
| }, |
| { |
| "epoch": 7.571912013536379, |
| "grad_norm": 2.6844794750213623, |
| "learning_rate": 1.06e-06, |
| "loss": 0.0318, |
| "step": 4475 |
| }, |
| { |
| "epoch": 7.614213197969543, |
| "grad_norm": 0.493827760219574, |
| "learning_rate": 1.01e-06, |
| "loss": 0.0253, |
| "step": 4500 |
| }, |
| { |
| "epoch": 7.656514382402707, |
| "grad_norm": 1.7579808235168457, |
| "learning_rate": 9.600000000000001e-07, |
| "loss": 0.0159, |
| "step": 4525 |
| }, |
| { |
| "epoch": 7.698815566835871, |
| "grad_norm": 0.6291612982749939, |
| "learning_rate": 9.100000000000001e-07, |
| "loss": 0.0284, |
| "step": 4550 |
| }, |
| { |
| "epoch": 7.741116751269035, |
| "grad_norm": 1.4849034547805786, |
| "learning_rate": 8.6e-07, |
| "loss": 0.0173, |
| "step": 4575 |
| }, |
| { |
| "epoch": 7.783417935702199, |
| "grad_norm": 0.6458856463432312, |
| "learning_rate": 8.100000000000001e-07, |
| "loss": 0.0156, |
| "step": 4600 |
| }, |
| { |
| "epoch": 7.825719120135364, |
| "grad_norm": 0.2947687804698944, |
| "learning_rate": 7.6e-07, |
| "loss": 0.0253, |
| "step": 4625 |
| }, |
| { |
| "epoch": 7.868020304568528, |
| "grad_norm": 1.277651309967041, |
| "learning_rate": 7.1e-07, |
| "loss": 0.0167, |
| "step": 4650 |
| }, |
| { |
| "epoch": 7.910321489001692, |
| "grad_norm": 3.256167411804199, |
| "learning_rate": 6.6e-07, |
| "loss": 0.0291, |
| "step": 4675 |
| }, |
| { |
| "epoch": 7.952622673434856, |
| "grad_norm": 0.46765822172164917, |
| "learning_rate": 6.100000000000001e-07, |
| "loss": 0.0499, |
| "step": 4700 |
| }, |
| { |
| "epoch": 7.99492385786802, |
| "grad_norm": 0.4301537871360779, |
| "learning_rate": 5.6e-07, |
| "loss": 0.0468, |
| "step": 4725 |
| }, |
| { |
| "epoch": 8.037225042301184, |
| "grad_norm": 2.860602378845215, |
| "learning_rate": 5.1e-07, |
| "loss": 0.0147, |
| "step": 4750 |
| }, |
| { |
| "epoch": 8.079526226734348, |
| "grad_norm": 0.27871173620224, |
| "learning_rate": 4.6000000000000004e-07, |
| "loss": 0.0158, |
| "step": 4775 |
| }, |
| { |
| "epoch": 8.121827411167512, |
| "grad_norm": 0.5240181684494019, |
| "learning_rate": 4.1000000000000004e-07, |
| "loss": 0.0251, |
| "step": 4800 |
| }, |
| { |
| "epoch": 8.164128595600676, |
| "grad_norm": 0.24851427972316742, |
| "learning_rate": 3.6e-07, |
| "loss": 0.0121, |
| "step": 4825 |
| }, |
| { |
| "epoch": 8.20642978003384, |
| "grad_norm": 0.38774704933166504, |
| "learning_rate": 3.1000000000000005e-07, |
| "loss": 0.0183, |
| "step": 4850 |
| }, |
| { |
| "epoch": 8.248730964467006, |
| "grad_norm": 0.8031614422798157, |
| "learning_rate": 2.6e-07, |
| "loss": 0.0096, |
| "step": 4875 |
| }, |
| { |
| "epoch": 8.29103214890017, |
| "grad_norm": 1.7913848161697388, |
| "learning_rate": 2.1000000000000003e-07, |
| "loss": 0.0212, |
| "step": 4900 |
| }, |
| { |
| "epoch": 8.333333333333334, |
| "grad_norm": 0.41187214851379395, |
| "learning_rate": 1.6e-07, |
| "loss": 0.0125, |
| "step": 4925 |
| }, |
| { |
| "epoch": 8.375634517766498, |
| "grad_norm": 0.9369856715202332, |
| "learning_rate": 1.1e-07, |
| "loss": 0.0103, |
| "step": 4950 |
| }, |
| { |
| "epoch": 8.417935702199662, |
| "grad_norm": 0.24833422899246216, |
| "learning_rate": 6.000000000000001e-08, |
| "loss": 0.0299, |
| "step": 4975 |
| }, |
| { |
| "epoch": 8.460236886632826, |
| "grad_norm": 0.25818368792533875, |
| "learning_rate": 1e-08, |
| "loss": 0.0164, |
| "step": 5000 |
| }, |
| { |
| "epoch": 8.460236886632826, |
| "eval_loss": 0.8701984286308289, |
| "eval_runtime": 276.906, |
| "eval_samples_per_second": 1.896, |
| "eval_steps_per_second": 0.238, |
| "eval_wer": 81.15919629057187, |
| "step": 5000 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 5000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.15318725967872e+19, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|