{ "best_metric": 0.19209351050339085, "best_model_checkpoint": "./en-xlsr/checkpoint-6000", "epoch": 30.0, "eval_steps": 600, "global_step": 6450, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.47, "learning_rate": 0.0002983555207517619, "loss": 4.591, "step": 100 }, { "epoch": 0.93, "learning_rate": 0.0002936570086139389, "loss": 2.9761, "step": 200 }, { "epoch": 1.4, "learning_rate": 0.0002889584964761159, "loss": 2.8963, "step": 300 }, { "epoch": 1.86, "learning_rate": 0.00028425998433829287, "loss": 2.0367, "step": 400 }, { "epoch": 2.33, "learning_rate": 0.0002795614722004698, "loss": 0.9943, "step": 500 }, { "epoch": 2.79, "learning_rate": 0.0002748629600626468, "loss": 0.8037, "step": 600 }, { "epoch": 2.79, "eval_cer": 0.13958760039582999, "eval_loss": 0.5361498594284058, "eval_runtime": 23.0079, "eval_samples_per_second": 75.539, "eval_steps_per_second": 4.738, "eval_wer": 0.3517671059160831, "step": 600 }, { "epoch": 3.26, "learning_rate": 0.0002701644479248238, "loss": 0.685, "step": 700 }, { "epoch": 3.72, "learning_rate": 0.0002654659357870008, "loss": 0.6178, "step": 800 }, { "epoch": 4.19, "learning_rate": 0.00026076742364917777, "loss": 0.5696, "step": 900 }, { "epoch": 4.65, "learning_rate": 0.0002560689115113547, "loss": 0.5172, "step": 1000 }, { "epoch": 5.12, "learning_rate": 0.0002513703993735317, "loss": 0.4947, "step": 1100 }, { "epoch": 5.58, "learning_rate": 0.0002466718872357087, "loss": 0.4529, "step": 1200 }, { "epoch": 5.58, "eval_cer": 0.10734586794927853, "eval_loss": 0.4010535180568695, "eval_runtime": 22.5531, "eval_samples_per_second": 77.062, "eval_steps_per_second": 4.833, "eval_wer": 0.256657661134697, "step": 1200 }, { "epoch": 6.05, "learning_rate": 0.00024197337509788565, "loss": 0.4454, "step": 1300 }, { "epoch": 6.51, "learning_rate": 0.00023727486296006264, "loss": 0.4102, "step": 1400 }, { "epoch": 6.98, "learning_rate": 0.0002325763508222396, "loss": 0.4059, "step": 1500 }, { "epoch": 7.44, "learning_rate": 0.0002278778386844166, "loss": 0.3731, "step": 1600 }, { "epoch": 7.91, "learning_rate": 0.00022317932654659358, "loss": 0.3747, "step": 1700 }, { "epoch": 8.37, "learning_rate": 0.00021848081440877052, "loss": 0.3482, "step": 1800 }, { "epoch": 8.37, "eval_cer": 0.1060571191862472, "eval_loss": 0.4073833227157593, "eval_runtime": 22.9802, "eval_samples_per_second": 75.63, "eval_steps_per_second": 4.743, "eval_wer": 0.2514748855929867, "step": 1800 }, { "epoch": 8.84, "learning_rate": 0.0002137823022709475, "loss": 0.3388, "step": 1900 }, { "epoch": 9.3, "learning_rate": 0.00020908379013312447, "loss": 0.3296, "step": 2000 }, { "epoch": 9.77, "learning_rate": 0.00020438527799530146, "loss": 0.3247, "step": 2100 }, { "epoch": 10.23, "learning_rate": 0.00019968676585747845, "loss": 0.3057, "step": 2200 }, { "epoch": 10.7, "learning_rate": 0.00019498825371965542, "loss": 0.3022, "step": 2300 }, { "epoch": 11.16, "learning_rate": 0.0001902897415818324, "loss": 0.291, "step": 2400 }, { "epoch": 11.16, "eval_cer": 0.09982049570800636, "eval_loss": 0.3988887071609497, "eval_runtime": 22.5626, "eval_samples_per_second": 77.03, "eval_steps_per_second": 4.831, "eval_wer": 0.22886916248552683, "step": 2400 }, { "epoch": 11.63, "learning_rate": 0.0001855912294440094, "loss": 0.2773, "step": 2500 }, { "epoch": 12.09, "learning_rate": 0.00018089271730618636, "loss": 0.2759, "step": 2600 }, { "epoch": 12.56, "learning_rate": 0.00017619420516836332, "loss": 0.2551, "step": 2700 }, { "epoch": 13.02, "learning_rate": 0.00017149569303054031, "loss": 0.2587, "step": 2800 }, { "epoch": 13.49, "learning_rate": 0.00016679718089271728, "loss": 0.2426, "step": 2900 }, { "epoch": 13.95, "learning_rate": 0.00016209866875489427, "loss": 0.2469, "step": 3000 }, { "epoch": 13.95, "eval_cer": 0.09549398200354406, "eval_loss": 0.40880683064460754, "eval_runtime": 23.2843, "eval_samples_per_second": 74.642, "eval_steps_per_second": 4.681, "eval_wer": 0.21712521365165133, "step": 3000 }, { "epoch": 14.42, "learning_rate": 0.00015740015661707126, "loss": 0.2398, "step": 3100 }, { "epoch": 14.88, "learning_rate": 0.00015270164447924822, "loss": 0.2298, "step": 3200 }, { "epoch": 15.35, "learning_rate": 0.00014800313234142518, "loss": 0.2263, "step": 3300 }, { "epoch": 15.81, "learning_rate": 0.00014330462020360218, "loss": 0.2193, "step": 3400 }, { "epoch": 16.28, "learning_rate": 0.00013860610806577917, "loss": 0.2139, "step": 3500 }, { "epoch": 16.74, "learning_rate": 0.00013390759592795613, "loss": 0.2088, "step": 3600 }, { "epoch": 16.74, "eval_cer": 0.09318113824131821, "eval_loss": 0.4487144649028778, "eval_runtime": 22.9016, "eval_samples_per_second": 75.89, "eval_steps_per_second": 4.759, "eval_wer": 0.20620830346804875, "step": 3600 }, { "epoch": 17.21, "learning_rate": 0.00012920908379013312, "loss": 0.212, "step": 3700 }, { "epoch": 17.67, "learning_rate": 0.00012451057165231008, "loss": 0.1886, "step": 3800 }, { "epoch": 18.14, "learning_rate": 0.00011981205951448707, "loss": 0.1854, "step": 3900 }, { "epoch": 18.6, "learning_rate": 0.00011511354737666405, "loss": 0.1891, "step": 4000 }, { "epoch": 19.07, "learning_rate": 0.00011041503523884103, "loss": 0.1832, "step": 4100 }, { "epoch": 19.53, "learning_rate": 0.000105716523101018, "loss": 0.1828, "step": 4200 }, { "epoch": 19.53, "eval_cer": 0.09201896301751318, "eval_loss": 0.4508346617221832, "eval_runtime": 22.6393, "eval_samples_per_second": 76.769, "eval_steps_per_second": 4.815, "eval_wer": 0.20146661520648398, "step": 4200 }, { "epoch": 20.0, "learning_rate": 0.00010101801096319498, "loss": 0.1805, "step": 4300 }, { "epoch": 20.47, "learning_rate": 9.631949882537196e-05, "loss": 0.1767, "step": 4400 }, { "epoch": 20.93, "learning_rate": 9.162098668754895e-05, "loss": 0.1717, "step": 4500 }, { "epoch": 21.4, "learning_rate": 8.692247454972591e-05, "loss": 0.1592, "step": 4600 }, { "epoch": 21.86, "learning_rate": 8.222396241190289e-05, "loss": 0.1621, "step": 4700 }, { "epoch": 22.33, "learning_rate": 7.752545027407988e-05, "loss": 0.1566, "step": 4800 }, { "epoch": 22.33, "eval_cer": 0.09145513543368697, "eval_loss": 0.4802495539188385, "eval_runtime": 22.7447, "eval_samples_per_second": 76.413, "eval_steps_per_second": 4.792, "eval_wer": 0.19837900424546506, "step": 4800 }, { "epoch": 22.79, "learning_rate": 7.282693813625684e-05, "loss": 0.1531, "step": 4900 }, { "epoch": 23.26, "learning_rate": 6.812842599843383e-05, "loss": 0.1495, "step": 5000 }, { "epoch": 23.72, "learning_rate": 6.34299138606108e-05, "loss": 0.1502, "step": 5100 }, { "epoch": 24.19, "learning_rate": 5.873140172278778e-05, "loss": 0.1412, "step": 5200 }, { "epoch": 24.65, "learning_rate": 5.4032889584964754e-05, "loss": 0.1378, "step": 5300 }, { "epoch": 25.12, "learning_rate": 4.933437744714174e-05, "loss": 0.1414, "step": 5400 }, { "epoch": 25.12, "eval_cer": 0.08983269279451361, "eval_loss": 0.47997498512268066, "eval_runtime": 23.0232, "eval_samples_per_second": 75.489, "eval_steps_per_second": 4.734, "eval_wer": 0.19882009152561064, "step": 5400 }, { "epoch": 25.58, "learning_rate": 4.463586530931871e-05, "loss": 0.1365, "step": 5500 }, { "epoch": 26.05, "learning_rate": 3.993735317149569e-05, "loss": 0.1374, "step": 5600 }, { "epoch": 26.51, "learning_rate": 3.523884103367267e-05, "loss": 0.133, "step": 5700 }, { "epoch": 26.98, "learning_rate": 3.0540328895849644e-05, "loss": 0.1325, "step": 5800 }, { "epoch": 27.44, "learning_rate": 2.584181675802662e-05, "loss": 0.1324, "step": 5900 }, { "epoch": 27.91, "learning_rate": 2.1143304620203598e-05, "loss": 0.1255, "step": 6000 }, { "epoch": 27.91, "eval_cer": 0.08848641060456125, "eval_loss": 0.49347782135009766, "eval_runtime": 23.1133, "eval_samples_per_second": 75.195, "eval_steps_per_second": 4.716, "eval_wer": 0.19209351050339085, "step": 6000 }, { "epoch": 28.37, "learning_rate": 1.6444792482380578e-05, "loss": 0.1303, "step": 6100 }, { "epoch": 28.84, "learning_rate": 1.1746280344557556e-05, "loss": 0.1264, "step": 6200 }, { "epoch": 29.3, "learning_rate": 7.047768206734534e-06, "loss": 0.1276, "step": 6300 }, { "epoch": 29.77, "learning_rate": 2.349256068911511e-06, "loss": 0.1256, "step": 6400 }, { "epoch": 30.0, "step": 6450, "total_flos": 4.472815672614715e+19, "train_loss": 0.45229944088662316, "train_runtime": 8424.0917, "train_samples_per_second": 48.981, "train_steps_per_second": 0.766 } ], "logging_steps": 100, "max_steps": 6450, "num_train_epochs": 30, "save_steps": 600, "total_flos": 4.472815672614715e+19, "trial_name": null, "trial_params": null }