{ "best_metric": 0.23053792119026184, "best_model_checkpoint": "./wav2vec2-base-demo/checkpoint-12000", "epoch": 5.0, "eval_steps": 500, "global_step": 12160, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.20559210526315788, "grad_norm": 2.4157142639160156, "learning_rate": 9.94e-05, "loss": 6.7669, "step": 500 }, { "epoch": 0.20559210526315788, "eval_cer": 0.48300749888261685, "eval_loss": 2.1082637310028076, "eval_mer": 0.990451329257402, "eval_runtime": 68.7161, "eval_samples_per_second": 17.696, "eval_steps_per_second": 2.212, "eval_wer": 1.00347995545657, "eval_wil": 0.9999032641977176, "eval_wip": 9.673580228241418e-05, "step": 500 }, { "epoch": 0.41118421052631576, "grad_norm": 5.2127461433410645, "learning_rate": 9.573756432246999e-05, "loss": 1.6307, "step": 1000 }, { "epoch": 0.41118421052631576, "eval_cer": 0.2556407157873827, "eval_loss": 0.9759008288383484, "eval_mer": 0.6341346483117595, "eval_runtime": 68.4909, "eval_samples_per_second": 17.754, "eval_steps_per_second": 2.219, "eval_wer": 0.6444181514476615, "eval_wil": 0.8621975111555594, "eval_wip": 0.1378024888444406, "step": 1000 }, { "epoch": 0.6167763157894737, "grad_norm": 6.275217056274414, "learning_rate": 9.144939965694683e-05, "loss": 1.0963, "step": 1500 }, { "epoch": 0.6167763157894737, "eval_cer": 0.18851495638067176, "eval_loss": 0.6141384840011597, "eval_mer": 0.4867403314917127, "eval_runtime": 68.9152, "eval_samples_per_second": 17.645, "eval_steps_per_second": 2.206, "eval_wer": 0.4905345211581292, "eval_wil": 0.7295997310538076, "eval_wip": 0.27040026894619246, "step": 1500 }, { "epoch": 0.8223684210526315, "grad_norm": 2.86210036277771, "learning_rate": 8.716123499142367e-05, "loss": 0.8862, "step": 2000 }, { "epoch": 0.8223684210526315, "eval_cer": 0.15904914830571604, "eval_loss": 0.5130082368850708, "eval_mer": 0.4162177096284017, "eval_runtime": 68.4636, "eval_samples_per_second": 17.761, "eval_steps_per_second": 2.22, "eval_wer": 0.4194042316258352, "eval_wil": 0.6511190009094037, "eval_wip": 0.3488809990905964, "step": 2000 }, { "epoch": 1.0279605263157894, "grad_norm": 1.278159260749817, "learning_rate": 8.287307032590052e-05, "loss": 0.7938, "step": 2500 }, { "epoch": 1.0279605263157894, "eval_cer": 0.14429969044347696, "eval_loss": 0.47192344069480896, "eval_mer": 0.38208359154444677, "eval_runtime": 68.4897, "eval_samples_per_second": 17.754, "eval_steps_per_second": 2.219, "eval_wer": 0.3862054565701559, "eval_wil": 0.60976101750554, "eval_wip": 0.39023898249446, "step": 2500 }, { "epoch": 1.2335526315789473, "grad_norm": 1.1133716106414795, "learning_rate": 7.858490566037736e-05, "loss": 0.7144, "step": 3000 }, { "epoch": 1.2335526315789473, "eval_cer": 0.1325133672134947, "eval_loss": 0.4134939908981323, "eval_mer": 0.358600179818798, "eval_runtime": 68.9577, "eval_samples_per_second": 17.634, "eval_steps_per_second": 2.204, "eval_wer": 0.36087138084632514, "eval_wil": 0.5807818566708536, "eval_wip": 0.4192181433291464, "step": 3000 }, { "epoch": 1.4391447368421053, "grad_norm": 1.0960090160369873, "learning_rate": 7.42967409948542e-05, "loss": 0.6807, "step": 3500 }, { "epoch": 1.4391447368421053, "eval_cer": 0.1280603883527289, "eval_loss": 0.3944030702114105, "eval_mer": 0.35059816056980847, "eval_runtime": 68.4902, "eval_samples_per_second": 17.754, "eval_steps_per_second": 2.219, "eval_wer": 0.3528674832962138, "eval_wil": 0.5707987727352223, "eval_wip": 0.4292012272647777, "step": 3500 }, { "epoch": 1.6447368421052633, "grad_norm": 1.0340492725372314, "learning_rate": 7.00171526586621e-05, "loss": 0.637, "step": 4000 }, { "epoch": 1.6447368421052633, "eval_cer": 0.1168700028141502, "eval_loss": 0.3591752350330353, "eval_mer": 0.3217739707101409, "eval_runtime": 68.5342, "eval_samples_per_second": 17.743, "eval_steps_per_second": 2.218, "eval_wer": 0.32419265033407574, "eval_wil": 0.5317646875493529, "eval_wip": 0.46823531245064715, "step": 4000 }, { "epoch": 1.850328947368421, "grad_norm": 1.9663962125778198, "learning_rate": 6.572898799313894e-05, "loss": 0.6124, "step": 4500 }, { "epoch": 1.850328947368421, "eval_cer": 0.11360889933619163, "eval_loss": 0.33677050471305847, "eval_mer": 0.3150571131879543, "eval_runtime": 68.5056, "eval_samples_per_second": 17.75, "eval_steps_per_second": 2.219, "eval_wer": 0.31674554565701557, "eval_wil": 0.5224878877647927, "eval_wip": 0.4775121122352073, "step": 4500 }, { "epoch": 2.0559210526315788, "grad_norm": 1.006300449371338, "learning_rate": 6.144082332761578e-05, "loss": 0.5749, "step": 5000 }, { "epoch": 2.0559210526315788, "eval_cer": 0.1078978297935738, "eval_loss": 0.34313151240348816, "eval_mer": 0.2996542185338866, "eval_runtime": 68.9465, "eval_samples_per_second": 17.637, "eval_steps_per_second": 2.205, "eval_wer": 0.30157293986636974, "eval_wil": 0.5013751160066184, "eval_wip": 0.4986248839933815, "step": 5000 }, { "epoch": 2.2615131578947367, "grad_norm": 1.4610997438430786, "learning_rate": 5.715265866209263e-05, "loss": 0.5594, "step": 5500 }, { "epoch": 2.2615131578947367, "eval_cer": 0.10349451240709166, "eval_loss": 0.30756306648254395, "eval_mer": 0.2883830346640836, "eval_runtime": 68.5367, "eval_samples_per_second": 17.742, "eval_steps_per_second": 2.218, "eval_wer": 0.2900890868596882, "eval_wil": 0.48511930744607956, "eval_wip": 0.5148806925539204, "step": 5500 }, { "epoch": 2.4671052631578947, "grad_norm": 1.4430501461029053, "learning_rate": 5.2864493996569475e-05, "loss": 0.545, "step": 6000 }, { "epoch": 2.4671052631578947, "eval_cer": 0.0999685477329537, "eval_loss": 0.30883026123046875, "eval_mer": 0.281625881377022, "eval_runtime": 68.4999, "eval_samples_per_second": 17.752, "eval_steps_per_second": 2.219, "eval_wer": 0.2835467706013363, "eval_wil": 0.47570711452036263, "eval_wip": 0.5242928854796374, "step": 6000 }, { "epoch": 2.6726973684210527, "grad_norm": 2.016613245010376, "learning_rate": 4.857632933104632e-05, "loss": 0.5156, "step": 6500 }, { "epoch": 2.6726973684210527, "eval_cer": 0.09727027429687629, "eval_loss": 0.29233318567276, "eval_mer": 0.2730165317839109, "eval_runtime": 68.6063, "eval_samples_per_second": 17.724, "eval_steps_per_second": 2.216, "eval_wer": 0.2747076837416481, "eval_wil": 0.46347096415899913, "eval_wip": 0.5365290358410009, "step": 6500 }, { "epoch": 2.8782894736842106, "grad_norm": 1.225280523300171, "learning_rate": 4.428816466552316e-05, "loss": 0.5206, "step": 7000 }, { "epoch": 2.8782894736842106, "eval_cer": 0.09483686205697826, "eval_loss": 0.27001699805259705, "eval_mer": 0.26869806094182824, "eval_runtime": 68.6118, "eval_samples_per_second": 17.723, "eval_steps_per_second": 2.215, "eval_wer": 0.2700445434298441, "eval_wil": 0.4571035802996898, "eval_wip": 0.5428964197003102, "step": 7000 }, { "epoch": 3.0838815789473686, "grad_norm": 0.7366329431533813, "learning_rate": 4e-05, "loss": 0.5, "step": 7500 }, { "epoch": 3.0838815789473686, "eval_cer": 0.09303249515800625, "eval_loss": 0.2662801742553711, "eval_mer": 0.262010245050533, "eval_runtime": 68.6687, "eval_samples_per_second": 17.708, "eval_steps_per_second": 2.214, "eval_wer": 0.2634326280623608, "eval_wil": 0.4476734779906254, "eval_wip": 0.5523265220093746, "step": 7500 }, { "epoch": 3.2894736842105265, "grad_norm": 1.1763432025909424, "learning_rate": 3.572041166380789e-05, "loss": 0.4824, "step": 8000 }, { "epoch": 3.2894736842105265, "eval_cer": 0.09035077554669006, "eval_loss": 0.25726333260536194, "eval_mer": 0.2568203849882288, "eval_runtime": 68.6086, "eval_samples_per_second": 17.724, "eval_steps_per_second": 2.215, "eval_wer": 0.2581430957683742, "eval_wil": 0.43924823830236503, "eval_wip": 0.560751761697635, "step": 8000 }, { "epoch": 3.495065789473684, "grad_norm": 0.6791394948959351, "learning_rate": 3.1440823327615785e-05, "loss": 0.4834, "step": 8500 }, { "epoch": 3.495065789473684, "eval_cer": 0.08983760697909252, "eval_loss": 0.2671581506729126, "eval_mer": 0.2551337896701929, "eval_runtime": 68.5738, "eval_samples_per_second": 17.733, "eval_steps_per_second": 2.217, "eval_wer": 0.2568207126948775, "eval_wil": 0.43663686522583467, "eval_wip": 0.5633631347741653, "step": 8500 }, { "epoch": 3.700657894736842, "grad_norm": 1.039534330368042, "learning_rate": 2.7152658662092628e-05, "loss": 0.4821, "step": 9000 }, { "epoch": 3.700657894736842, "eval_cer": 0.08763594828585144, "eval_loss": 0.25131911039352417, "eval_mer": 0.24773309337578736, "eval_runtime": 68.6472, "eval_samples_per_second": 17.714, "eval_steps_per_second": 2.214, "eval_wer": 0.24909521158129175, "eval_wil": 0.42557549529315297, "eval_wip": 0.574424504706847, "step": 9000 }, { "epoch": 3.90625, "grad_norm": 1.0900495052337646, "learning_rate": 2.286449399656947e-05, "loss": 0.4469, "step": 9500 }, { "epoch": 3.90625, "eval_cer": 0.08664271880017878, "eval_loss": 0.24175629019737244, "eval_mer": 0.24719490234104446, "eval_runtime": 68.6419, "eval_samples_per_second": 17.715, "eval_steps_per_second": 2.214, "eval_wer": 0.24839922048997773, "eval_wil": 0.4247462420279243, "eval_wip": 0.5752537579720757, "step": 9500 }, { "epoch": 4.1118421052631575, "grad_norm": 0.9752183556556702, "learning_rate": 1.8576329331046313e-05, "loss": 0.4492, "step": 10000 }, { "epoch": 4.1118421052631575, "eval_cer": 0.08579847373735702, "eval_loss": 0.2439761906862259, "eval_mer": 0.24421985324657344, "eval_runtime": 68.593, "eval_samples_per_second": 17.728, "eval_steps_per_second": 2.216, "eval_wer": 0.2455456570155902, "eval_wil": 0.4201968413979644, "eval_wip": 0.5798031586020356, "step": 10000 }, { "epoch": 4.317434210526316, "grad_norm": 1.3445626497268677, "learning_rate": 1.4288164665523158e-05, "loss": 0.4398, "step": 10500 }, { "epoch": 4.317434210526316, "eval_cer": 0.0856329354897449, "eval_loss": 0.24096588790416718, "eval_mer": 0.24401549744015497, "eval_runtime": 68.5807, "eval_samples_per_second": 17.731, "eval_steps_per_second": 2.216, "eval_wer": 0.2454760579064588, "eval_wil": 0.4199702010687998, "eval_wip": 0.5800297989312002, "step": 10500 }, { "epoch": 4.5230263157894735, "grad_norm": 0.947695791721344, "learning_rate": 1e-05, "loss": 0.4348, "step": 11000 }, { "epoch": 4.5230263157894735, "eval_cer": 0.08404376831266865, "eval_loss": 0.23272591829299927, "eval_mer": 0.24004983733647123, "eval_runtime": 68.9296, "eval_samples_per_second": 17.641, "eval_steps_per_second": 2.205, "eval_wer": 0.241369710467706, "eval_wil": 0.41390470101381893, "eval_wip": 0.5860952989861811, "step": 11000 }, { "epoch": 4.728618421052632, "grad_norm": 4.606724739074707, "learning_rate": 5.7118353344768446e-06, "loss": 0.4397, "step": 11500 }, { "epoch": 4.728618421052632, "eval_cer": 0.0832326308993693, "eval_loss": 0.2335677444934845, "eval_mer": 0.2382106502319784, "eval_runtime": 68.6091, "eval_samples_per_second": 17.724, "eval_steps_per_second": 2.215, "eval_wer": 0.23942093541202672, "eval_wil": 0.41151236953121095, "eval_wip": 0.588487630468789, "step": 11500 }, { "epoch": 4.934210526315789, "grad_norm": 1.276310682296753, "learning_rate": 1.4236706689536879e-06, "loss": 0.4395, "step": 12000 }, { "epoch": 4.934210526315789, "eval_cer": 0.0832326308993693, "eval_loss": 0.23053792119026184, "eval_mer": 0.23857340720221606, "eval_runtime": 68.6428, "eval_samples_per_second": 17.715, "eval_steps_per_second": 2.214, "eval_wer": 0.23976893095768373, "eval_wil": 0.411989793635261, "eval_wip": 0.588010206364739, "step": 12000 } ], "logging_steps": 500, "max_steps": 12160, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.6624377074768785e+19, "train_batch_size": 10, "trial_name": null, "trial_params": null }