| { | |
| "best_metric": 0.38667929292929293, | |
| "best_model_checkpoint": "/scratch/elec/puhe/p/palp3/sami_ASR/large_model_output/large-sami-22k-finetuned/outputs/checkpoint-1080", | |
| "epoch": 60.0, | |
| "eval_steps": 500, | |
| "global_step": 64800, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.5049565434455872, | |
| "learning_rate": 3.32716049382716e-05, | |
| "loss": 0.126, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_cer": 0.1485204762139999, | |
| "eval_loss": 0.4803544282913208, | |
| "eval_runtime": 50.5555, | |
| "eval_samples_per_second": 17.604, | |
| "eval_steps_per_second": 2.215, | |
| "eval_wer": 0.38667929292929293, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 2.3093390464782715, | |
| "learning_rate": 6.660493827160493e-05, | |
| "loss": 0.1441, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_cer": 0.19503038087240035, | |
| "eval_loss": 0.6097356677055359, | |
| "eval_runtime": 49.9713, | |
| "eval_samples_per_second": 17.81, | |
| "eval_steps_per_second": 2.241, | |
| "eval_wer": 0.44239267676767674, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 3.067934513092041, | |
| "learning_rate": 9.99074074074074e-05, | |
| "loss": 0.1675, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_cer": 0.16756409623079582, | |
| "eval_loss": 0.5237330198287964, | |
| "eval_runtime": 52.1692, | |
| "eval_samples_per_second": 17.06, | |
| "eval_steps_per_second": 2.147, | |
| "eval_wer": 0.444760101010101, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 6.190335750579834, | |
| "learning_rate": 0.00013324074074074074, | |
| "loss": 0.1919, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_cer": 0.188435508570864, | |
| "eval_loss": 0.6256272196769714, | |
| "eval_runtime": 49.8649, | |
| "eval_samples_per_second": 17.848, | |
| "eval_steps_per_second": 2.246, | |
| "eval_wer": 0.484375, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 10.29676342010498, | |
| "learning_rate": 0.0001665432098765432, | |
| "loss": 0.2168, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_cer": 0.1991552635478931, | |
| "eval_loss": 0.6817235946655273, | |
| "eval_runtime": 49.9056, | |
| "eval_samples_per_second": 17.834, | |
| "eval_steps_per_second": 2.244, | |
| "eval_wer": 0.5130997474747475, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 16.29789924621582, | |
| "learning_rate": 0.00019987654320987656, | |
| "loss": 0.2411, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_cer": 0.20411994269624067, | |
| "eval_loss": 0.6815704703330994, | |
| "eval_runtime": 47.2826, | |
| "eval_samples_per_second": 18.823, | |
| "eval_steps_per_second": 2.369, | |
| "eval_wer": 0.5233585858585859, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 13.96838665008545, | |
| "learning_rate": 0.000233179012345679, | |
| "loss": 0.2493, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_cer": 0.2558662253618535, | |
| "eval_loss": 0.8295482993125916, | |
| "eval_runtime": 47.3326, | |
| "eval_samples_per_second": 18.803, | |
| "eval_steps_per_second": 2.366, | |
| "eval_wer": 0.6788194444444444, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 12.302577018737793, | |
| "learning_rate": 0.0002665123456790123, | |
| "loss": 0.2718, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_cer": 0.2669070789902682, | |
| "eval_loss": 0.8849073648452759, | |
| "eval_runtime": 47.1385, | |
| "eval_samples_per_second": 18.881, | |
| "eval_steps_per_second": 2.376, | |
| "eval_wer": 0.6756628787878788, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 0.21664512157440186, | |
| "learning_rate": 0.0002998148148148148, | |
| "loss": 0.2922, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_cer": 0.3401422714024601, | |
| "eval_loss": 1.0527104139328003, | |
| "eval_runtime": 49.2164, | |
| "eval_samples_per_second": 18.083, | |
| "eval_steps_per_second": 2.276, | |
| "eval_wer": 0.6721906565656566, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 7.9486260414123535, | |
| "learning_rate": 0.0003331172839506173, | |
| "loss": 0.3156, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_cer": 0.35758039816232773, | |
| "eval_loss": 1.0661259889602661, | |
| "eval_runtime": 48.5028, | |
| "eval_samples_per_second": 18.349, | |
| "eval_steps_per_second": 2.309, | |
| "eval_wer": 0.7528409090909091, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 8.1552095413208, | |
| "learning_rate": 0.0003664506172839506, | |
| "loss": 0.3273, | |
| "step": 11880 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_cer": 0.2929654695450279, | |
| "eval_loss": 1.0082694292068481, | |
| "eval_runtime": 48.9797, | |
| "eval_samples_per_second": 18.171, | |
| "eval_steps_per_second": 2.287, | |
| "eval_wer": 0.7840909090909091, | |
| "step": 11880 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 8.20614242553711, | |
| "learning_rate": 0.00039978395061728396, | |
| "loss": 0.3216, | |
| "step": 12960 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_cer": 0.3153682754532431, | |
| "eval_loss": 1.130453109741211, | |
| "eval_runtime": 48.376, | |
| "eval_samples_per_second": 18.398, | |
| "eval_steps_per_second": 2.315, | |
| "eval_wer": 0.728219696969697, | |
| "step": 12960 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 14.636846542358398, | |
| "learning_rate": 0.00043311728395061726, | |
| "loss": 0.3498, | |
| "step": 14040 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_cer": 0.3106258953712394, | |
| "eval_loss": 1.0758916139602661, | |
| "eval_runtime": 48.0575, | |
| "eval_samples_per_second": 18.519, | |
| "eval_steps_per_second": 2.331, | |
| "eval_wer": 0.7312184343434344, | |
| "step": 14040 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 4.806951999664307, | |
| "learning_rate": 0.0004664506172839506, | |
| "loss": 0.3553, | |
| "step": 15120 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_cer": 0.28031912265968484, | |
| "eval_loss": 0.8731944561004639, | |
| "eval_runtime": 47.2505, | |
| "eval_samples_per_second": 18.836, | |
| "eval_steps_per_second": 2.37, | |
| "eval_wer": 0.6756628787878788, | |
| "step": 15120 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 0.5450202822685242, | |
| "learning_rate": 0.0004997530864197531, | |
| "loss": 0.3582, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_cer": 0.31852986217457885, | |
| "eval_loss": 1.055077075958252, | |
| "eval_runtime": 46.8181, | |
| "eval_samples_per_second": 19.01, | |
| "eval_steps_per_second": 2.392, | |
| "eval_wer": 0.7623106060606061, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 5.1030144691467285, | |
| "learning_rate": 0.0004889814814814815, | |
| "loss": 0.3607, | |
| "step": 17280 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_cer": 0.3101071975497703, | |
| "eval_loss": 1.0534826517105103, | |
| "eval_runtime": 47.5102, | |
| "eval_samples_per_second": 18.733, | |
| "eval_steps_per_second": 2.357, | |
| "eval_wer": 0.7482638888888888, | |
| "step": 17280 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "grad_norm": 0.22218887507915497, | |
| "learning_rate": 0.0004778703703703704, | |
| "loss": 0.3447, | |
| "step": 18360 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_cer": 0.30813120584893544, | |
| "eval_loss": 1.064017415046692, | |
| "eval_runtime": 48.3671, | |
| "eval_samples_per_second": 18.401, | |
| "eval_steps_per_second": 2.316, | |
| "eval_wer": 0.7369002525252525, | |
| "step": 18360 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 0.14536279439926147, | |
| "learning_rate": 0.00046675925925925926, | |
| "loss": 0.325, | |
| "step": 19440 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_cer": 0.2905448797115052, | |
| "eval_loss": 1.0327048301696777, | |
| "eval_runtime": 48.9592, | |
| "eval_samples_per_second": 18.178, | |
| "eval_steps_per_second": 2.288, | |
| "eval_wer": 0.7534722222222222, | |
| "step": 19440 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "grad_norm": 1.5726815462112427, | |
| "learning_rate": 0.00045564814814814817, | |
| "loss": 0.3022, | |
| "step": 20520 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_cer": 0.2886923874919725, | |
| "eval_loss": 0.9869930148124695, | |
| "eval_runtime": 49.3541, | |
| "eval_samples_per_second": 18.033, | |
| "eval_steps_per_second": 2.269, | |
| "eval_wer": 0.7231691919191919, | |
| "step": 20520 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 0.41919103264808655, | |
| "learning_rate": 0.00044454732510288065, | |
| "loss": 0.2825, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_cer": 0.28056612162228917, | |
| "eval_loss": 0.9183225035667419, | |
| "eval_runtime": 49.2359, | |
| "eval_samples_per_second": 18.076, | |
| "eval_steps_per_second": 2.275, | |
| "eval_wer": 0.686395202020202, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "grad_norm": 12.236234664916992, | |
| "learning_rate": 0.0004334362139917696, | |
| "loss": 0.2706, | |
| "step": 22680 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_cer": 0.28604949859210593, | |
| "eval_loss": 0.9366316795349121, | |
| "eval_runtime": 49.1391, | |
| "eval_samples_per_second": 18.112, | |
| "eval_steps_per_second": 2.279, | |
| "eval_wer": 0.6811868686868687, | |
| "step": 22680 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "grad_norm": 4.797195911407471, | |
| "learning_rate": 0.0004223353909465021, | |
| "loss": 0.2507, | |
| "step": 23760 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_cer": 0.2608062046139406, | |
| "eval_loss": 0.9585080146789551, | |
| "eval_runtime": 48.7093, | |
| "eval_samples_per_second": 18.272, | |
| "eval_steps_per_second": 2.299, | |
| "eval_wer": 0.6941287878787878, | |
| "step": 23760 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "grad_norm": 4.625443935394287, | |
| "learning_rate": 0.00041122427983539094, | |
| "loss": 0.237, | |
| "step": 24840 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_cer": 0.28024502297090353, | |
| "eval_loss": 1.010016918182373, | |
| "eval_runtime": 50.1358, | |
| "eval_samples_per_second": 17.752, | |
| "eval_steps_per_second": 2.234, | |
| "eval_wer": 0.6797664141414141, | |
| "step": 24840 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "grad_norm": 0.49481087923049927, | |
| "learning_rate": 0.00040011316872427984, | |
| "loss": 0.2298, | |
| "step": 25920 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_cer": 0.24492417131848046, | |
| "eval_loss": 0.9184597730636597, | |
| "eval_runtime": 48.7455, | |
| "eval_samples_per_second": 18.258, | |
| "eval_steps_per_second": 2.298, | |
| "eval_wer": 0.6349431818181818, | |
| "step": 25920 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "grad_norm": 1.7336276769638062, | |
| "learning_rate": 0.0003890123456790123, | |
| "loss": 0.221, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_cer": 0.27846663044015213, | |
| "eval_loss": 0.9352790713310242, | |
| "eval_runtime": 48.8906, | |
| "eval_samples_per_second": 18.204, | |
| "eval_steps_per_second": 2.291, | |
| "eval_wer": 0.6579861111111112, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "grad_norm": 0.02212027832865715, | |
| "learning_rate": 0.0003779012345679013, | |
| "loss": 0.2052, | |
| "step": 28080 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_cer": 0.2507039470434224, | |
| "eval_loss": 0.8651528358459473, | |
| "eval_runtime": 49.0769, | |
| "eval_samples_per_second": 18.135, | |
| "eval_steps_per_second": 2.282, | |
| "eval_wer": 0.6493055555555556, | |
| "step": 28080 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "grad_norm": 2.215277910232544, | |
| "learning_rate": 0.0003667901234567901, | |
| "loss": 0.1928, | |
| "step": 29160 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_cer": 0.2630785950699007, | |
| "eval_loss": 0.8858852386474609, | |
| "eval_runtime": 49.657, | |
| "eval_samples_per_second": 17.923, | |
| "eval_steps_per_second": 2.255, | |
| "eval_wer": 0.6775568181818182, | |
| "step": 29160 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "grad_norm": 0.10988181829452515, | |
| "learning_rate": 0.000355679012345679, | |
| "loss": 0.1889, | |
| "step": 30240 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_cer": 0.2666353801314034, | |
| "eval_loss": 0.9239539504051208, | |
| "eval_runtime": 49.2302, | |
| "eval_samples_per_second": 18.078, | |
| "eval_steps_per_second": 2.275, | |
| "eval_wer": 0.6636679292929293, | |
| "step": 30240 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "grad_norm": 0.5829525589942932, | |
| "learning_rate": 0.0003445781893004115, | |
| "loss": 0.1771, | |
| "step": 31320 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_cer": 0.24934545274909845, | |
| "eval_loss": 0.9042806625366211, | |
| "eval_runtime": 52.6225, | |
| "eval_samples_per_second": 16.913, | |
| "eval_steps_per_second": 2.128, | |
| "eval_wer": 0.6256313131313131, | |
| "step": 31320 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "grad_norm": 3.2479238510131836, | |
| "learning_rate": 0.00033346707818930046, | |
| "loss": 0.163, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_cer": 0.26213999901200413, | |
| "eval_loss": 0.9130964875221252, | |
| "eval_runtime": 50.9345, | |
| "eval_samples_per_second": 17.473, | |
| "eval_steps_per_second": 2.199, | |
| "eval_wer": 0.6504103535353535, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "grad_norm": 2.047846555709839, | |
| "learning_rate": 0.0003223559670781893, | |
| "loss": 0.1603, | |
| "step": 33480 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_cer": 0.24055228968038334, | |
| "eval_loss": 0.8102329969406128, | |
| "eval_runtime": 50.6115, | |
| "eval_samples_per_second": 17.585, | |
| "eval_steps_per_second": 2.213, | |
| "eval_wer": 0.6319444444444444, | |
| "step": 33480 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "grad_norm": 0.3893296420574188, | |
| "learning_rate": 0.0003112448559670782, | |
| "loss": 0.1447, | |
| "step": 34560 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_cer": 0.2447512720446574, | |
| "eval_loss": 0.9245155453681946, | |
| "eval_runtime": 51.908, | |
| "eval_samples_per_second": 17.146, | |
| "eval_steps_per_second": 2.158, | |
| "eval_wer": 0.6336805555555556, | |
| "step": 34560 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "grad_norm": 2.6302273273468018, | |
| "learning_rate": 0.0003001440329218107, | |
| "loss": 0.1418, | |
| "step": 35640 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_cer": 0.25300103739564295, | |
| "eval_loss": 0.9590283632278442, | |
| "eval_runtime": 52.0031, | |
| "eval_samples_per_second": 17.114, | |
| "eval_steps_per_second": 2.154, | |
| "eval_wer": 0.6235795454545454, | |
| "step": 35640 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "grad_norm": 3.61879301071167, | |
| "learning_rate": 0.0002890432098765432, | |
| "loss": 0.1415, | |
| "step": 36720 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_cer": 0.2578916168552092, | |
| "eval_loss": 0.92754727602005, | |
| "eval_runtime": 52.0318, | |
| "eval_samples_per_second": 17.105, | |
| "eval_steps_per_second": 2.153, | |
| "eval_wer": 0.634469696969697, | |
| "step": 36720 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "grad_norm": 6.908621311187744, | |
| "learning_rate": 0.00027793209876543213, | |
| "loss": 0.1313, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_cer": 0.24981475077804674, | |
| "eval_loss": 0.8644362688064575, | |
| "eval_runtime": 53.8225, | |
| "eval_samples_per_second": 16.536, | |
| "eval_steps_per_second": 2.081, | |
| "eval_wer": 0.6279987373737373, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "grad_norm": 2.5687201023101807, | |
| "learning_rate": 0.000266820987654321, | |
| "loss": 0.1285, | |
| "step": 38880 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_cer": 0.26505458677073557, | |
| "eval_loss": 0.9070570468902588, | |
| "eval_runtime": 55.322, | |
| "eval_samples_per_second": 16.088, | |
| "eval_steps_per_second": 2.025, | |
| "eval_wer": 0.625, | |
| "step": 38880 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "grad_norm": 0.1792680323123932, | |
| "learning_rate": 0.0002557098765432099, | |
| "loss": 0.1204, | |
| "step": 39960 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_cer": 0.2386503976683298, | |
| "eval_loss": 0.8658037185668945, | |
| "eval_runtime": 54.276, | |
| "eval_samples_per_second": 16.398, | |
| "eval_steps_per_second": 2.064, | |
| "eval_wer": 0.6092171717171717, | |
| "step": 39960 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "grad_norm": 0.05945800244808197, | |
| "learning_rate": 0.0002445987654320988, | |
| "loss": 0.1116, | |
| "step": 41040 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_cer": 0.24588746727263747, | |
| "eval_loss": 0.8684060573577881, | |
| "eval_runtime": 55.9431, | |
| "eval_samples_per_second": 15.909, | |
| "eval_steps_per_second": 2.002, | |
| "eval_wer": 0.6267361111111112, | |
| "step": 41040 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "grad_norm": 2.164262056350708, | |
| "learning_rate": 0.00023349794238683127, | |
| "loss": 0.102, | |
| "step": 42120 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_cer": 0.24102158770933163, | |
| "eval_loss": 0.9792320728302002, | |
| "eval_runtime": 54.7942, | |
| "eval_samples_per_second": 16.243, | |
| "eval_steps_per_second": 2.044, | |
| "eval_wer": 0.6245265151515151, | |
| "step": 42120 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "grad_norm": 7.841192722320557, | |
| "learning_rate": 0.00022238683127572017, | |
| "loss": 0.0966, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_cer": 0.2466037642641901, | |
| "eval_loss": 0.8880752325057983, | |
| "eval_runtime": 57.0632, | |
| "eval_samples_per_second": 15.597, | |
| "eval_steps_per_second": 1.963, | |
| "eval_wer": 0.6163194444444444, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "grad_norm": 0.5480403304100037, | |
| "learning_rate": 0.00021128600823045268, | |
| "loss": 0.0934, | |
| "step": 44280 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "eval_cer": 0.23398211727510745, | |
| "eval_loss": 0.8669174909591675, | |
| "eval_runtime": 56.5233, | |
| "eval_samples_per_second": 15.746, | |
| "eval_steps_per_second": 1.981, | |
| "eval_wer": 0.5970643939393939, | |
| "step": 44280 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "grad_norm": 2.996035099029541, | |
| "learning_rate": 0.00020017489711934155, | |
| "loss": 0.0847, | |
| "step": 45360 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_cer": 0.2370696043076619, | |
| "eval_loss": 0.9717867970466614, | |
| "eval_runtime": 55.4728, | |
| "eval_samples_per_second": 16.044, | |
| "eval_steps_per_second": 2.019, | |
| "eval_wer": 0.6207386363636364, | |
| "step": 45360 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "grad_norm": 0.41690441966056824, | |
| "learning_rate": 0.00018907407407407406, | |
| "loss": 0.0828, | |
| "step": 46440 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "eval_cer": 0.2392925949711011, | |
| "eval_loss": 0.957336962223053, | |
| "eval_runtime": 54.9772, | |
| "eval_samples_per_second": 16.189, | |
| "eval_steps_per_second": 2.037, | |
| "eval_wer": 0.6223169191919192, | |
| "step": 46440 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "grad_norm": 0.07533986121416092, | |
| "learning_rate": 0.0001779732510288066, | |
| "loss": 0.0727, | |
| "step": 47520 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_cer": 0.2357605098058588, | |
| "eval_loss": 0.9871988892555237, | |
| "eval_runtime": 57.6886, | |
| "eval_samples_per_second": 15.428, | |
| "eval_steps_per_second": 1.941, | |
| "eval_wer": 0.6096906565656566, | |
| "step": 47520 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "grad_norm": 0.7598063945770264, | |
| "learning_rate": 0.00016686213991769547, | |
| "loss": 0.0701, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "eval_cer": 0.24457837277083436, | |
| "eval_loss": 0.9421331882476807, | |
| "eval_runtime": 55.063, | |
| "eval_samples_per_second": 16.163, | |
| "eval_steps_per_second": 2.034, | |
| "eval_wer": 0.6115845959595959, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "grad_norm": 0.43303415179252625, | |
| "learning_rate": 0.00015575102880658438, | |
| "loss": 0.0648, | |
| "step": 49680 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_cer": 0.24672726374549228, | |
| "eval_loss": 0.9590614438056946, | |
| "eval_runtime": 57.1789, | |
| "eval_samples_per_second": 15.565, | |
| "eval_steps_per_second": 1.959, | |
| "eval_wer": 0.6043244949494949, | |
| "step": 49680 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "grad_norm": 6.171388626098633, | |
| "learning_rate": 0.00014463991769547325, | |
| "loss": 0.0634, | |
| "step": 50760 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "eval_cer": 0.23551351084325445, | |
| "eval_loss": 0.9990620017051697, | |
| "eval_runtime": 55.5622, | |
| "eval_samples_per_second": 16.018, | |
| "eval_steps_per_second": 2.016, | |
| "eval_wer": 0.6109532828282829, | |
| "step": 50760 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "grad_norm": 0.05001814663410187, | |
| "learning_rate": 0.0001335390946502058, | |
| "loss": 0.0573, | |
| "step": 51840 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_cer": 0.23452551499283703, | |
| "eval_loss": 0.9873119592666626, | |
| "eval_runtime": 55.0833, | |
| "eval_samples_per_second": 16.157, | |
| "eval_steps_per_second": 2.033, | |
| "eval_wer": 0.6054292929292929, | |
| "step": 51840 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "grad_norm": 3.651003360748291, | |
| "learning_rate": 0.00012242798353909466, | |
| "loss": 0.0527, | |
| "step": 52920 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "eval_cer": 0.23247542360322088, | |
| "eval_loss": 0.9885514974594116, | |
| "eval_runtime": 52.5162, | |
| "eval_samples_per_second": 16.947, | |
| "eval_steps_per_second": 2.133, | |
| "eval_wer": 0.5935921717171717, | |
| "step": 52920 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "grad_norm": 3.5055177211761475, | |
| "learning_rate": 0.00011131687242798354, | |
| "loss": 0.0506, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_cer": 0.22867163957911377, | |
| "eval_loss": 1.0199133157730103, | |
| "eval_runtime": 51.406, | |
| "eval_samples_per_second": 17.313, | |
| "eval_steps_per_second": 2.179, | |
| "eval_wer": 0.5940656565656566, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 51.0, | |
| "grad_norm": 0.08695941418409348, | |
| "learning_rate": 0.00010020576131687243, | |
| "loss": 0.0486, | |
| "step": 55080 | |
| }, | |
| { | |
| "epoch": 51.0, | |
| "eval_cer": 0.22634984933063282, | |
| "eval_loss": 1.0691256523132324, | |
| "eval_runtime": 54.2523, | |
| "eval_samples_per_second": 16.405, | |
| "eval_steps_per_second": 2.064, | |
| "eval_wer": 0.5880681818181818, | |
| "step": 55080 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "grad_norm": 0.4256766438484192, | |
| "learning_rate": 8.909465020576133e-05, | |
| "loss": 0.0447, | |
| "step": 56160 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_cer": 0.22963493553327077, | |
| "eval_loss": 1.0140999555587769, | |
| "eval_runtime": 58.925, | |
| "eval_samples_per_second": 15.104, | |
| "eval_steps_per_second": 1.901, | |
| "eval_wer": 0.5893308080808081, | |
| "step": 56160 | |
| }, | |
| { | |
| "epoch": 53.0, | |
| "grad_norm": 3.884925365447998, | |
| "learning_rate": 7.799382716049382e-05, | |
| "loss": 0.0419, | |
| "step": 57240 | |
| }, | |
| { | |
| "epoch": 53.0, | |
| "eval_cer": 0.2279306426913007, | |
| "eval_loss": 1.0658098459243774, | |
| "eval_runtime": 50.8901, | |
| "eval_samples_per_second": 17.489, | |
| "eval_steps_per_second": 2.201, | |
| "eval_wer": 0.5872790404040404, | |
| "step": 57240 | |
| }, | |
| { | |
| "epoch": 54.0, | |
| "grad_norm": 0.5678676962852478, | |
| "learning_rate": 6.690329218106995e-05, | |
| "loss": 0.0376, | |
| "step": 58320 | |
| }, | |
| { | |
| "epoch": 54.0, | |
| "eval_cer": 0.2253618534802154, | |
| "eval_loss": 1.144079327583313, | |
| "eval_runtime": 52.3564, | |
| "eval_samples_per_second": 16.999, | |
| "eval_steps_per_second": 2.139, | |
| "eval_wer": 0.5888573232323232, | |
| "step": 58320 | |
| }, | |
| { | |
| "epoch": 55.0, | |
| "grad_norm": 1.0211379528045654, | |
| "learning_rate": 5.579218106995885e-05, | |
| "loss": 0.0355, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 55.0, | |
| "eval_cer": 0.22486785555500666, | |
| "eval_loss": 1.146174430847168, | |
| "eval_runtime": 50.8316, | |
| "eval_samples_per_second": 17.509, | |
| "eval_steps_per_second": 2.203, | |
| "eval_wer": 0.5880681818181818, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "grad_norm": 0.02778603509068489, | |
| "learning_rate": 4.468106995884774e-05, | |
| "loss": 0.0335, | |
| "step": 60480 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_cer": 0.22442325742231883, | |
| "eval_loss": 1.1712491512298584, | |
| "eval_runtime": 51.7561, | |
| "eval_samples_per_second": 17.196, | |
| "eval_steps_per_second": 2.164, | |
| "eval_wer": 0.5860164141414141, | |
| "step": 60480 | |
| }, | |
| { | |
| "epoch": 57.0, | |
| "grad_norm": 0.13397055864334106, | |
| "learning_rate": 3.3569958847736626e-05, | |
| "loss": 0.0296, | |
| "step": 61560 | |
| }, | |
| { | |
| "epoch": 57.0, | |
| "eval_cer": 0.22180506841871264, | |
| "eval_loss": 1.162169337272644, | |
| "eval_runtime": 51.0452, | |
| "eval_samples_per_second": 17.436, | |
| "eval_steps_per_second": 2.194, | |
| "eval_wer": 0.5785984848484849, | |
| "step": 61560 | |
| }, | |
| { | |
| "epoch": 58.0, | |
| "grad_norm": 0.001944132731296122, | |
| "learning_rate": 2.246913580246914e-05, | |
| "loss": 0.0301, | |
| "step": 62640 | |
| }, | |
| { | |
| "epoch": 58.0, | |
| "eval_cer": 0.22350936126068272, | |
| "eval_loss": 1.170377492904663, | |
| "eval_runtime": 50.364, | |
| "eval_samples_per_second": 17.671, | |
| "eval_steps_per_second": 2.224, | |
| "eval_wer": 0.5839646464646465, | |
| "step": 62640 | |
| }, | |
| { | |
| "epoch": 59.0, | |
| "grad_norm": 0.18270032107830048, | |
| "learning_rate": 1.1358024691358025e-05, | |
| "loss": 0.0283, | |
| "step": 63720 | |
| }, | |
| { | |
| "epoch": 59.0, | |
| "eval_cer": 0.22133577038976437, | |
| "eval_loss": 1.1973356008529663, | |
| "eval_runtime": 50.9914, | |
| "eval_samples_per_second": 17.454, | |
| "eval_steps_per_second": 2.196, | |
| "eval_wer": 0.5804924242424242, | |
| "step": 63720 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "grad_norm": 0.00017149873019661754, | |
| "learning_rate": 2.469135802469136e-07, | |
| "loss": 0.0245, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_cer": 0.2198290767178778, | |
| "eval_loss": 1.1907662153244019, | |
| "eval_runtime": 51.7658, | |
| "eval_samples_per_second": 17.193, | |
| "eval_steps_per_second": 2.164, | |
| "eval_wer": 0.5762310606060606, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "step": 64800, | |
| "total_flos": 1.8440987587856836e+20, | |
| "train_loss": 0.1667554270485301, | |
| "train_runtime": 81955.137, | |
| "train_samples_per_second": 12.641, | |
| "train_steps_per_second": 0.791 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 64800, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 60, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.8440987587856836e+20, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |