wav2vec2-large-robust-vi / trainer_state.json
longdnk's picture
Upload 12 files
06a3695 verified
{
"best_metric": 0.23053792119026184,
"best_model_checkpoint": "./wav2vec2-base-demo/checkpoint-12000",
"epoch": 5.0,
"eval_steps": 500,
"global_step": 12160,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.20559210526315788,
"grad_norm": 2.4157142639160156,
"learning_rate": 9.94e-05,
"loss": 6.7669,
"step": 500
},
{
"epoch": 0.20559210526315788,
"eval_cer": 0.48300749888261685,
"eval_loss": 2.1082637310028076,
"eval_mer": 0.990451329257402,
"eval_runtime": 68.7161,
"eval_samples_per_second": 17.696,
"eval_steps_per_second": 2.212,
"eval_wer": 1.00347995545657,
"eval_wil": 0.9999032641977176,
"eval_wip": 9.673580228241418e-05,
"step": 500
},
{
"epoch": 0.41118421052631576,
"grad_norm": 5.2127461433410645,
"learning_rate": 9.573756432246999e-05,
"loss": 1.6307,
"step": 1000
},
{
"epoch": 0.41118421052631576,
"eval_cer": 0.2556407157873827,
"eval_loss": 0.9759008288383484,
"eval_mer": 0.6341346483117595,
"eval_runtime": 68.4909,
"eval_samples_per_second": 17.754,
"eval_steps_per_second": 2.219,
"eval_wer": 0.6444181514476615,
"eval_wil": 0.8621975111555594,
"eval_wip": 0.1378024888444406,
"step": 1000
},
{
"epoch": 0.6167763157894737,
"grad_norm": 6.275217056274414,
"learning_rate": 9.144939965694683e-05,
"loss": 1.0963,
"step": 1500
},
{
"epoch": 0.6167763157894737,
"eval_cer": 0.18851495638067176,
"eval_loss": 0.6141384840011597,
"eval_mer": 0.4867403314917127,
"eval_runtime": 68.9152,
"eval_samples_per_second": 17.645,
"eval_steps_per_second": 2.206,
"eval_wer": 0.4905345211581292,
"eval_wil": 0.7295997310538076,
"eval_wip": 0.27040026894619246,
"step": 1500
},
{
"epoch": 0.8223684210526315,
"grad_norm": 2.86210036277771,
"learning_rate": 8.716123499142367e-05,
"loss": 0.8862,
"step": 2000
},
{
"epoch": 0.8223684210526315,
"eval_cer": 0.15904914830571604,
"eval_loss": 0.5130082368850708,
"eval_mer": 0.4162177096284017,
"eval_runtime": 68.4636,
"eval_samples_per_second": 17.761,
"eval_steps_per_second": 2.22,
"eval_wer": 0.4194042316258352,
"eval_wil": 0.6511190009094037,
"eval_wip": 0.3488809990905964,
"step": 2000
},
{
"epoch": 1.0279605263157894,
"grad_norm": 1.278159260749817,
"learning_rate": 8.287307032590052e-05,
"loss": 0.7938,
"step": 2500
},
{
"epoch": 1.0279605263157894,
"eval_cer": 0.14429969044347696,
"eval_loss": 0.47192344069480896,
"eval_mer": 0.38208359154444677,
"eval_runtime": 68.4897,
"eval_samples_per_second": 17.754,
"eval_steps_per_second": 2.219,
"eval_wer": 0.3862054565701559,
"eval_wil": 0.60976101750554,
"eval_wip": 0.39023898249446,
"step": 2500
},
{
"epoch": 1.2335526315789473,
"grad_norm": 1.1133716106414795,
"learning_rate": 7.858490566037736e-05,
"loss": 0.7144,
"step": 3000
},
{
"epoch": 1.2335526315789473,
"eval_cer": 0.1325133672134947,
"eval_loss": 0.4134939908981323,
"eval_mer": 0.358600179818798,
"eval_runtime": 68.9577,
"eval_samples_per_second": 17.634,
"eval_steps_per_second": 2.204,
"eval_wer": 0.36087138084632514,
"eval_wil": 0.5807818566708536,
"eval_wip": 0.4192181433291464,
"step": 3000
},
{
"epoch": 1.4391447368421053,
"grad_norm": 1.0960090160369873,
"learning_rate": 7.42967409948542e-05,
"loss": 0.6807,
"step": 3500
},
{
"epoch": 1.4391447368421053,
"eval_cer": 0.1280603883527289,
"eval_loss": 0.3944030702114105,
"eval_mer": 0.35059816056980847,
"eval_runtime": 68.4902,
"eval_samples_per_second": 17.754,
"eval_steps_per_second": 2.219,
"eval_wer": 0.3528674832962138,
"eval_wil": 0.5707987727352223,
"eval_wip": 0.4292012272647777,
"step": 3500
},
{
"epoch": 1.6447368421052633,
"grad_norm": 1.0340492725372314,
"learning_rate": 7.00171526586621e-05,
"loss": 0.637,
"step": 4000
},
{
"epoch": 1.6447368421052633,
"eval_cer": 0.1168700028141502,
"eval_loss": 0.3591752350330353,
"eval_mer": 0.3217739707101409,
"eval_runtime": 68.5342,
"eval_samples_per_second": 17.743,
"eval_steps_per_second": 2.218,
"eval_wer": 0.32419265033407574,
"eval_wil": 0.5317646875493529,
"eval_wip": 0.46823531245064715,
"step": 4000
},
{
"epoch": 1.850328947368421,
"grad_norm": 1.9663962125778198,
"learning_rate": 6.572898799313894e-05,
"loss": 0.6124,
"step": 4500
},
{
"epoch": 1.850328947368421,
"eval_cer": 0.11360889933619163,
"eval_loss": 0.33677050471305847,
"eval_mer": 0.3150571131879543,
"eval_runtime": 68.5056,
"eval_samples_per_second": 17.75,
"eval_steps_per_second": 2.219,
"eval_wer": 0.31674554565701557,
"eval_wil": 0.5224878877647927,
"eval_wip": 0.4775121122352073,
"step": 4500
},
{
"epoch": 2.0559210526315788,
"grad_norm": 1.006300449371338,
"learning_rate": 6.144082332761578e-05,
"loss": 0.5749,
"step": 5000
},
{
"epoch": 2.0559210526315788,
"eval_cer": 0.1078978297935738,
"eval_loss": 0.34313151240348816,
"eval_mer": 0.2996542185338866,
"eval_runtime": 68.9465,
"eval_samples_per_second": 17.637,
"eval_steps_per_second": 2.205,
"eval_wer": 0.30157293986636974,
"eval_wil": 0.5013751160066184,
"eval_wip": 0.4986248839933815,
"step": 5000
},
{
"epoch": 2.2615131578947367,
"grad_norm": 1.4610997438430786,
"learning_rate": 5.715265866209263e-05,
"loss": 0.5594,
"step": 5500
},
{
"epoch": 2.2615131578947367,
"eval_cer": 0.10349451240709166,
"eval_loss": 0.30756306648254395,
"eval_mer": 0.2883830346640836,
"eval_runtime": 68.5367,
"eval_samples_per_second": 17.742,
"eval_steps_per_second": 2.218,
"eval_wer": 0.2900890868596882,
"eval_wil": 0.48511930744607956,
"eval_wip": 0.5148806925539204,
"step": 5500
},
{
"epoch": 2.4671052631578947,
"grad_norm": 1.4430501461029053,
"learning_rate": 5.2864493996569475e-05,
"loss": 0.545,
"step": 6000
},
{
"epoch": 2.4671052631578947,
"eval_cer": 0.0999685477329537,
"eval_loss": 0.30883026123046875,
"eval_mer": 0.281625881377022,
"eval_runtime": 68.4999,
"eval_samples_per_second": 17.752,
"eval_steps_per_second": 2.219,
"eval_wer": 0.2835467706013363,
"eval_wil": 0.47570711452036263,
"eval_wip": 0.5242928854796374,
"step": 6000
},
{
"epoch": 2.6726973684210527,
"grad_norm": 2.016613245010376,
"learning_rate": 4.857632933104632e-05,
"loss": 0.5156,
"step": 6500
},
{
"epoch": 2.6726973684210527,
"eval_cer": 0.09727027429687629,
"eval_loss": 0.29233318567276,
"eval_mer": 0.2730165317839109,
"eval_runtime": 68.6063,
"eval_samples_per_second": 17.724,
"eval_steps_per_second": 2.216,
"eval_wer": 0.2747076837416481,
"eval_wil": 0.46347096415899913,
"eval_wip": 0.5365290358410009,
"step": 6500
},
{
"epoch": 2.8782894736842106,
"grad_norm": 1.225280523300171,
"learning_rate": 4.428816466552316e-05,
"loss": 0.5206,
"step": 7000
},
{
"epoch": 2.8782894736842106,
"eval_cer": 0.09483686205697826,
"eval_loss": 0.27001699805259705,
"eval_mer": 0.26869806094182824,
"eval_runtime": 68.6118,
"eval_samples_per_second": 17.723,
"eval_steps_per_second": 2.215,
"eval_wer": 0.2700445434298441,
"eval_wil": 0.4571035802996898,
"eval_wip": 0.5428964197003102,
"step": 7000
},
{
"epoch": 3.0838815789473686,
"grad_norm": 0.7366329431533813,
"learning_rate": 4e-05,
"loss": 0.5,
"step": 7500
},
{
"epoch": 3.0838815789473686,
"eval_cer": 0.09303249515800625,
"eval_loss": 0.2662801742553711,
"eval_mer": 0.262010245050533,
"eval_runtime": 68.6687,
"eval_samples_per_second": 17.708,
"eval_steps_per_second": 2.214,
"eval_wer": 0.2634326280623608,
"eval_wil": 0.4476734779906254,
"eval_wip": 0.5523265220093746,
"step": 7500
},
{
"epoch": 3.2894736842105265,
"grad_norm": 1.1763432025909424,
"learning_rate": 3.572041166380789e-05,
"loss": 0.4824,
"step": 8000
},
{
"epoch": 3.2894736842105265,
"eval_cer": 0.09035077554669006,
"eval_loss": 0.25726333260536194,
"eval_mer": 0.2568203849882288,
"eval_runtime": 68.6086,
"eval_samples_per_second": 17.724,
"eval_steps_per_second": 2.215,
"eval_wer": 0.2581430957683742,
"eval_wil": 0.43924823830236503,
"eval_wip": 0.560751761697635,
"step": 8000
},
{
"epoch": 3.495065789473684,
"grad_norm": 0.6791394948959351,
"learning_rate": 3.1440823327615785e-05,
"loss": 0.4834,
"step": 8500
},
{
"epoch": 3.495065789473684,
"eval_cer": 0.08983760697909252,
"eval_loss": 0.2671581506729126,
"eval_mer": 0.2551337896701929,
"eval_runtime": 68.5738,
"eval_samples_per_second": 17.733,
"eval_steps_per_second": 2.217,
"eval_wer": 0.2568207126948775,
"eval_wil": 0.43663686522583467,
"eval_wip": 0.5633631347741653,
"step": 8500
},
{
"epoch": 3.700657894736842,
"grad_norm": 1.039534330368042,
"learning_rate": 2.7152658662092628e-05,
"loss": 0.4821,
"step": 9000
},
{
"epoch": 3.700657894736842,
"eval_cer": 0.08763594828585144,
"eval_loss": 0.25131911039352417,
"eval_mer": 0.24773309337578736,
"eval_runtime": 68.6472,
"eval_samples_per_second": 17.714,
"eval_steps_per_second": 2.214,
"eval_wer": 0.24909521158129175,
"eval_wil": 0.42557549529315297,
"eval_wip": 0.574424504706847,
"step": 9000
},
{
"epoch": 3.90625,
"grad_norm": 1.0900495052337646,
"learning_rate": 2.286449399656947e-05,
"loss": 0.4469,
"step": 9500
},
{
"epoch": 3.90625,
"eval_cer": 0.08664271880017878,
"eval_loss": 0.24175629019737244,
"eval_mer": 0.24719490234104446,
"eval_runtime": 68.6419,
"eval_samples_per_second": 17.715,
"eval_steps_per_second": 2.214,
"eval_wer": 0.24839922048997773,
"eval_wil": 0.4247462420279243,
"eval_wip": 0.5752537579720757,
"step": 9500
},
{
"epoch": 4.1118421052631575,
"grad_norm": 0.9752183556556702,
"learning_rate": 1.8576329331046313e-05,
"loss": 0.4492,
"step": 10000
},
{
"epoch": 4.1118421052631575,
"eval_cer": 0.08579847373735702,
"eval_loss": 0.2439761906862259,
"eval_mer": 0.24421985324657344,
"eval_runtime": 68.593,
"eval_samples_per_second": 17.728,
"eval_steps_per_second": 2.216,
"eval_wer": 0.2455456570155902,
"eval_wil": 0.4201968413979644,
"eval_wip": 0.5798031586020356,
"step": 10000
},
{
"epoch": 4.317434210526316,
"grad_norm": 1.3445626497268677,
"learning_rate": 1.4288164665523158e-05,
"loss": 0.4398,
"step": 10500
},
{
"epoch": 4.317434210526316,
"eval_cer": 0.0856329354897449,
"eval_loss": 0.24096588790416718,
"eval_mer": 0.24401549744015497,
"eval_runtime": 68.5807,
"eval_samples_per_second": 17.731,
"eval_steps_per_second": 2.216,
"eval_wer": 0.2454760579064588,
"eval_wil": 0.4199702010687998,
"eval_wip": 0.5800297989312002,
"step": 10500
},
{
"epoch": 4.5230263157894735,
"grad_norm": 0.947695791721344,
"learning_rate": 1e-05,
"loss": 0.4348,
"step": 11000
},
{
"epoch": 4.5230263157894735,
"eval_cer": 0.08404376831266865,
"eval_loss": 0.23272591829299927,
"eval_mer": 0.24004983733647123,
"eval_runtime": 68.9296,
"eval_samples_per_second": 17.641,
"eval_steps_per_second": 2.205,
"eval_wer": 0.241369710467706,
"eval_wil": 0.41390470101381893,
"eval_wip": 0.5860952989861811,
"step": 11000
},
{
"epoch": 4.728618421052632,
"grad_norm": 4.606724739074707,
"learning_rate": 5.7118353344768446e-06,
"loss": 0.4397,
"step": 11500
},
{
"epoch": 4.728618421052632,
"eval_cer": 0.0832326308993693,
"eval_loss": 0.2335677444934845,
"eval_mer": 0.2382106502319784,
"eval_runtime": 68.6091,
"eval_samples_per_second": 17.724,
"eval_steps_per_second": 2.215,
"eval_wer": 0.23942093541202672,
"eval_wil": 0.41151236953121095,
"eval_wip": 0.588487630468789,
"step": 11500
},
{
"epoch": 4.934210526315789,
"grad_norm": 1.276310682296753,
"learning_rate": 1.4236706689536879e-06,
"loss": 0.4395,
"step": 12000
},
{
"epoch": 4.934210526315789,
"eval_cer": 0.0832326308993693,
"eval_loss": 0.23053792119026184,
"eval_mer": 0.23857340720221606,
"eval_runtime": 68.6428,
"eval_samples_per_second": 17.715,
"eval_steps_per_second": 2.214,
"eval_wer": 0.23976893095768373,
"eval_wil": 0.411989793635261,
"eval_wip": 0.588010206364739,
"step": 12000
}
],
"logging_steps": 500,
"max_steps": 12160,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.6624377074768785e+19,
"train_batch_size": 10,
"trial_name": null,
"trial_params": null
}