wav2vec2-kac / trainer_state.json
ctaguchi's picture
Model save
e9a55a7 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 30.0,
"eval_steps": 100,
"global_step": 11100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2706359945872801,
"grad_norm": 1.0425561666488647,
"learning_rate": 0.00029699999999999996,
"loss": 5.107,
"step": 100
},
{
"epoch": 0.2706359945872801,
"eval_cer": 0.9907546764136744,
"eval_loss": 2.8580877780914307,
"eval_runtime": 3.2586,
"eval_samples_per_second": 16.879,
"eval_steps_per_second": 2.148,
"eval_wer": 1.0,
"step": 100
},
{
"epoch": 0.5412719891745602,
"grad_norm": 1.5294588804244995,
"learning_rate": 0.00029729999999999996,
"loss": 2.696,
"step": 200
},
{
"epoch": 0.5412719891745602,
"eval_cer": 0.700064502257579,
"eval_loss": 2.3650922775268555,
"eval_runtime": 3.2771,
"eval_samples_per_second": 16.783,
"eval_steps_per_second": 2.136,
"eval_wer": 0.9815465729349736,
"step": 200
},
{
"epoch": 0.8119079837618404,
"grad_norm": 4.284913539886475,
"learning_rate": 0.0002945727272727273,
"loss": 1.9594,
"step": 300
},
{
"epoch": 0.8119079837618404,
"eval_cer": 0.39991399698989466,
"eval_loss": 1.3402470350265503,
"eval_runtime": 3.3075,
"eval_samples_per_second": 16.629,
"eval_steps_per_second": 2.116,
"eval_wer": 0.8391915641476274,
"step": 300
},
{
"epoch": 1.0811907983761841,
"grad_norm": 1.6875638961791992,
"learning_rate": 0.0002918454545454545,
"loss": 1.1816,
"step": 400
},
{
"epoch": 1.0811907983761841,
"eval_cer": 0.19221672758546549,
"eval_loss": 0.5386932492256165,
"eval_runtime": 3.2713,
"eval_samples_per_second": 16.813,
"eval_steps_per_second": 2.14,
"eval_wer": 0.5404217926186292,
"step": 400
},
{
"epoch": 1.3518267929634642,
"grad_norm": 2.1928625106811523,
"learning_rate": 0.0002891181818181818,
"loss": 0.8925,
"step": 500
},
{
"epoch": 1.3518267929634642,
"eval_cer": 0.1498602451085788,
"eval_loss": 0.3904643654823303,
"eval_runtime": 3.2769,
"eval_samples_per_second": 16.784,
"eval_steps_per_second": 2.136,
"eval_wer": 0.45782073813708263,
"step": 500
},
{
"epoch": 1.6224627875507442,
"grad_norm": 1.7728203535079956,
"learning_rate": 0.0002863909090909091,
"loss": 0.7373,
"step": 600
},
{
"epoch": 1.6224627875507442,
"eval_cer": 0.11416899591485702,
"eval_loss": 0.3346728980541229,
"eval_runtime": 3.2076,
"eval_samples_per_second": 17.147,
"eval_steps_per_second": 2.182,
"eval_wer": 0.37434094903339193,
"step": 600
},
{
"epoch": 1.8930987821380243,
"grad_norm": 1.9392462968826294,
"learning_rate": 0.00028366363636363634,
"loss": 0.66,
"step": 700
},
{
"epoch": 1.8930987821380243,
"eval_cer": 0.10728875510642873,
"eval_loss": 0.29814502596855164,
"eval_runtime": 3.2663,
"eval_samples_per_second": 16.839,
"eval_steps_per_second": 2.143,
"eval_wer": 0.3242530755711775,
"step": 700
},
{
"epoch": 2.1623815967523683,
"grad_norm": 1.3830772638320923,
"learning_rate": 0.0002809363636363636,
"loss": 0.6412,
"step": 800
},
{
"epoch": 2.1623815967523683,
"eval_cer": 0.10040851429800043,
"eval_loss": 0.2573491632938385,
"eval_runtime": 3.2309,
"eval_samples_per_second": 17.023,
"eval_steps_per_second": 2.167,
"eval_wer": 0.3312829525483304,
"step": 800
},
{
"epoch": 2.4330175913396483,
"grad_norm": 1.7728252410888672,
"learning_rate": 0.0002782090909090909,
"loss": 0.5683,
"step": 900
},
{
"epoch": 2.4330175913396483,
"eval_cer": 0.10578370242958504,
"eval_loss": 0.2961094379425049,
"eval_runtime": 3.236,
"eval_samples_per_second": 16.996,
"eval_steps_per_second": 2.163,
"eval_wer": 0.35676625659050965,
"step": 900
},
{
"epoch": 2.7036535859269284,
"grad_norm": 2.22818660736084,
"learning_rate": 0.00027548181818181814,
"loss": 0.5538,
"step": 1000
},
{
"epoch": 2.7036535859269284,
"eval_cer": 0.1077187701569555,
"eval_loss": 0.26289868354797363,
"eval_runtime": 3.2491,
"eval_samples_per_second": 16.928,
"eval_steps_per_second": 2.154,
"eval_wer": 0.3453427065026362,
"step": 1000
},
{
"epoch": 2.9742895805142084,
"grad_norm": 2.420909881591797,
"learning_rate": 0.00027275454545454546,
"loss": 0.4991,
"step": 1100
},
{
"epoch": 2.9742895805142084,
"eval_cer": 0.10427864975274134,
"eval_loss": 0.2832812964916229,
"eval_runtime": 3.3057,
"eval_samples_per_second": 16.638,
"eval_steps_per_second": 2.118,
"eval_wer": 0.3312829525483304,
"step": 1100
},
{
"epoch": 3.243572395128552,
"grad_norm": 5.234218120574951,
"learning_rate": 0.00027002727272727267,
"loss": 0.4536,
"step": 1200
},
{
"epoch": 3.243572395128552,
"eval_cer": 0.08535798752956353,
"eval_loss": 0.24891051650047302,
"eval_runtime": 3.2773,
"eval_samples_per_second": 16.782,
"eval_steps_per_second": 2.136,
"eval_wer": 0.3347978910369069,
"step": 1200
},
{
"epoch": 3.514208389715832,
"grad_norm": 2.5880632400512695,
"learning_rate": 0.0002673,
"loss": 0.4621,
"step": 1300
},
{
"epoch": 3.514208389715832,
"eval_cer": 0.09460331111588906,
"eval_loss": 0.23712627589702606,
"eval_runtime": 3.2761,
"eval_samples_per_second": 16.788,
"eval_steps_per_second": 2.137,
"eval_wer": 0.3260105448154657,
"step": 1300
},
{
"epoch": 3.7848443843031125,
"grad_norm": 3.540529251098633,
"learning_rate": 0.00026457272727272726,
"loss": 0.4401,
"step": 1400
},
{
"epoch": 3.7848443843031125,
"eval_cer": 0.08600301010535369,
"eval_loss": 0.21003246307373047,
"eval_runtime": 3.3426,
"eval_samples_per_second": 16.454,
"eval_steps_per_second": 2.094,
"eval_wer": 0.28295254833040423,
"step": 1400
},
{
"epoch": 4.054127198917456,
"grad_norm": 1.8023126125335693,
"learning_rate": 0.0002618454545454545,
"loss": 0.4278,
"step": 1500
},
{
"epoch": 4.054127198917456,
"eval_cer": 0.10105353687379058,
"eval_loss": 0.2677291929721832,
"eval_runtime": 3.2361,
"eval_samples_per_second": 16.996,
"eval_steps_per_second": 2.163,
"eval_wer": 0.3321616871704745,
"step": 1500
},
{
"epoch": 4.324763193504737,
"grad_norm": 3.478238821029663,
"learning_rate": 0.0002591181818181818,
"loss": 0.386,
"step": 1600
},
{
"epoch": 4.324763193504737,
"eval_cer": 0.09030316061062137,
"eval_loss": 0.24809867143630981,
"eval_runtime": 3.2871,
"eval_samples_per_second": 16.732,
"eval_steps_per_second": 2.13,
"eval_wer": 0.29789103690685415,
"step": 1600
},
{
"epoch": 4.595399188092016,
"grad_norm": 2.037954568862915,
"learning_rate": 0.00025639090909090905,
"loss": 0.3943,
"step": 1700
},
{
"epoch": 4.595399188092016,
"eval_cer": 0.09503332616641583,
"eval_loss": 0.2835349440574646,
"eval_runtime": 3.2426,
"eval_samples_per_second": 16.962,
"eval_steps_per_second": 2.159,
"eval_wer": 0.31282952548330406,
"step": 1700
},
{
"epoch": 4.866035182679297,
"grad_norm": 2.778028964996338,
"learning_rate": 0.0002536636363636364,
"loss": 0.3986,
"step": 1800
},
{
"epoch": 4.866035182679297,
"eval_cer": 0.0707374758116534,
"eval_loss": 0.17705407738685608,
"eval_runtime": 3.3043,
"eval_samples_per_second": 16.645,
"eval_steps_per_second": 2.118,
"eval_wer": 0.24253075571177504,
"step": 1800
},
{
"epoch": 5.13531799729364,
"grad_norm": 10.435393333435059,
"learning_rate": 0.00025093636363636364,
"loss": 0.3996,
"step": 1900
},
{
"epoch": 5.13531799729364,
"eval_cer": 0.0885831004085143,
"eval_loss": 0.24632568657398224,
"eval_runtime": 3.2851,
"eval_samples_per_second": 16.742,
"eval_steps_per_second": 2.131,
"eval_wer": 0.3233743409490334,
"step": 1900
},
{
"epoch": 5.40595399188092,
"grad_norm": 8.714759826660156,
"learning_rate": 0.0002482090909090909,
"loss": 0.3468,
"step": 2000
},
{
"epoch": 5.40595399188092,
"eval_cer": 0.0870780477316706,
"eval_loss": 0.2470923364162445,
"eval_runtime": 3.3067,
"eval_samples_per_second": 16.633,
"eval_steps_per_second": 2.117,
"eval_wer": 0.2521968365553603,
"step": 2000
},
{
"epoch": 5.6765899864682,
"grad_norm": 7.187710285186768,
"learning_rate": 0.00024548181818181817,
"loss": 0.3429,
"step": 2100
},
{
"epoch": 5.6765899864682,
"eval_cer": 0.09008815308535799,
"eval_loss": 0.24462804198265076,
"eval_runtime": 3.2272,
"eval_samples_per_second": 17.043,
"eval_steps_per_second": 2.169,
"eval_wer": 0.30492091388400705,
"step": 2100
},
{
"epoch": 5.94722598105548,
"grad_norm": 7.232232570648193,
"learning_rate": 0.00024275454545454544,
"loss": 0.3362,
"step": 2200
},
{
"epoch": 5.94722598105548,
"eval_cer": 0.10234358202537089,
"eval_loss": 0.2832447290420532,
"eval_runtime": 3.2707,
"eval_samples_per_second": 16.816,
"eval_steps_per_second": 2.14,
"eval_wer": 0.3216168717047452,
"step": 2200
},
{
"epoch": 6.216508795669824,
"grad_norm": 2.5066192150115967,
"learning_rate": 0.0002400272727272727,
"loss": 0.3257,
"step": 2300
},
{
"epoch": 6.216508795669824,
"eval_cer": 0.08922812298430445,
"eval_loss": 0.2616831660270691,
"eval_runtime": 3.289,
"eval_samples_per_second": 16.722,
"eval_steps_per_second": 2.128,
"eval_wer": 0.2750439367311072,
"step": 2300
},
{
"epoch": 6.487144790257104,
"grad_norm": 1.5899525880813599,
"learning_rate": 0.0002373,
"loss": 0.3088,
"step": 2400
},
{
"epoch": 6.487144790257104,
"eval_cer": 0.08213287465061277,
"eval_loss": 0.2477259337902069,
"eval_runtime": 3.2212,
"eval_samples_per_second": 17.074,
"eval_steps_per_second": 2.173,
"eval_wer": 0.2671353251318102,
"step": 2400
},
{
"epoch": 6.7577807848443845,
"grad_norm": 1.64902925491333,
"learning_rate": 0.00023457272727272723,
"loss": 0.3092,
"step": 2500
},
{
"epoch": 6.7577807848443845,
"eval_cer": 0.07288755106428725,
"eval_loss": 0.16897092759609222,
"eval_runtime": 3.2797,
"eval_samples_per_second": 16.77,
"eval_steps_per_second": 2.134,
"eval_wer": 0.2126537785588752,
"step": 2500
},
{
"epoch": 7.027063599458728,
"grad_norm": 3.107236862182617,
"learning_rate": 0.00023184545454545453,
"loss": 0.3247,
"step": 2600
},
{
"epoch": 7.027063599458728,
"eval_cer": 0.0668673403569125,
"eval_loss": 0.16091446578502655,
"eval_runtime": 3.2437,
"eval_samples_per_second": 16.956,
"eval_steps_per_second": 2.158,
"eval_wer": 0.24428822495606328,
"step": 2600
},
{
"epoch": 7.2976995940460085,
"grad_norm": 1.4398540258407593,
"learning_rate": 0.0002291181818181818,
"loss": 0.2877,
"step": 2700
},
{
"epoch": 7.2976995940460085,
"eval_cer": 0.081702859600086,
"eval_loss": 0.21893317997455597,
"eval_runtime": 3.3058,
"eval_samples_per_second": 16.637,
"eval_steps_per_second": 2.117,
"eval_wer": 0.2697715289982425,
"step": 2700
},
{
"epoch": 7.568335588633288,
"grad_norm": 1.3356603384017944,
"learning_rate": 0.00022639090909090908,
"loss": 0.2978,
"step": 2800
},
{
"epoch": 7.568335588633288,
"eval_cer": 0.08406794237798323,
"eval_loss": 0.21929292380809784,
"eval_runtime": 3.3113,
"eval_samples_per_second": 16.61,
"eval_steps_per_second": 2.114,
"eval_wer": 0.2697715289982425,
"step": 2800
},
{
"epoch": 7.838971583220569,
"grad_norm": 1.0422672033309937,
"learning_rate": 0.00022366363636363632,
"loss": 0.3011,
"step": 2900
},
{
"epoch": 7.838971583220569,
"eval_cer": 0.09051816813588476,
"eval_loss": 0.22263768315315247,
"eval_runtime": 3.2412,
"eval_samples_per_second": 16.969,
"eval_steps_per_second": 2.16,
"eval_wer": 0.27768014059753954,
"step": 2900
},
{
"epoch": 8.108254397834912,
"grad_norm": 3.0150039196014404,
"learning_rate": 0.00022093636363636362,
"loss": 0.2792,
"step": 3000
},
{
"epoch": 8.108254397834912,
"eval_cer": 0.08772307030746077,
"eval_loss": 0.24399547278881073,
"eval_runtime": 3.2156,
"eval_samples_per_second": 17.104,
"eval_steps_per_second": 2.177,
"eval_wer": 0.2750439367311072,
"step": 3000
},
{
"epoch": 8.378890392422193,
"grad_norm": 3.422705888748169,
"learning_rate": 0.00021820909090909088,
"loss": 0.2652,
"step": 3100
},
{
"epoch": 8.378890392422193,
"eval_cer": 0.08621801763061707,
"eval_loss": 0.25809481739997864,
"eval_runtime": 3.2041,
"eval_samples_per_second": 17.166,
"eval_steps_per_second": 2.185,
"eval_wer": 0.2565905096660808,
"step": 3100
},
{
"epoch": 8.649526387009473,
"grad_norm": 7.11945104598999,
"learning_rate": 0.00021548181818181817,
"loss": 0.2847,
"step": 3200
},
{
"epoch": 8.649526387009473,
"eval_cer": 0.08385293485271984,
"eval_loss": 0.2583249807357788,
"eval_runtime": 3.2692,
"eval_samples_per_second": 16.824,
"eval_steps_per_second": 2.141,
"eval_wer": 0.29876977152899825,
"step": 3200
},
{
"epoch": 8.920162381596752,
"grad_norm": 4.319618225097656,
"learning_rate": 0.0002127545454545454,
"loss": 0.2788,
"step": 3300
},
{
"epoch": 8.920162381596752,
"eval_cer": 0.078907761771662,
"eval_loss": 0.19587865471839905,
"eval_runtime": 3.2926,
"eval_samples_per_second": 16.704,
"eval_steps_per_second": 2.126,
"eval_wer": 0.24253075571177504,
"step": 3300
},
{
"epoch": 9.189445196211096,
"grad_norm": 2.1342787742614746,
"learning_rate": 0.0002100272727272727,
"loss": 0.2589,
"step": 3400
},
{
"epoch": 9.189445196211096,
"eval_cer": 0.06923242313480972,
"eval_loss": 0.19880472123622894,
"eval_runtime": 3.2414,
"eval_samples_per_second": 16.968,
"eval_steps_per_second": 2.16,
"eval_wer": 0.24956063268892795,
"step": 3400
},
{
"epoch": 9.460081190798377,
"grad_norm": 1.656413197517395,
"learning_rate": 0.00020729999999999997,
"loss": 0.2493,
"step": 3500
},
{
"epoch": 9.460081190798377,
"eval_cer": 0.0666523328316491,
"eval_loss": 0.17990422248840332,
"eval_runtime": 3.2699,
"eval_samples_per_second": 16.82,
"eval_steps_per_second": 2.141,
"eval_wer": 0.23198594024604569,
"step": 3500
},
{
"epoch": 9.730717185385656,
"grad_norm": 9.605318069458008,
"learning_rate": 0.00020457272727272726,
"loss": 0.2473,
"step": 3600
},
{
"epoch": 9.730717185385656,
"eval_cer": 0.07009245323586326,
"eval_loss": 0.24749121069908142,
"eval_runtime": 3.2843,
"eval_samples_per_second": 16.746,
"eval_steps_per_second": 2.131,
"eval_wer": 0.2671353251318102,
"step": 3600
},
{
"epoch": 10.0,
"grad_norm": 38.97199249267578,
"learning_rate": 0.00020184545454545456,
"loss": 0.2842,
"step": 3700
},
{
"epoch": 10.0,
"eval_cer": 0.07181251343797034,
"eval_loss": 0.20998047292232513,
"eval_runtime": 3.3675,
"eval_samples_per_second": 16.333,
"eval_steps_per_second": 2.079,
"eval_wer": 0.2592267135325132,
"step": 3700
},
{
"epoch": 10.27063599458728,
"grad_norm": 10.944853782653809,
"learning_rate": 0.0001991181818181818,
"loss": 0.2312,
"step": 3800
},
{
"epoch": 10.27063599458728,
"eval_cer": 0.06106213717480112,
"eval_loss": 0.20029255747795105,
"eval_runtime": 3.2308,
"eval_samples_per_second": 17.024,
"eval_steps_per_second": 2.167,
"eval_wer": 0.24077328646748683,
"step": 3800
},
{
"epoch": 10.541271989174561,
"grad_norm": 6.15484619140625,
"learning_rate": 0.0001963909090909091,
"loss": 0.231,
"step": 3900
},
{
"epoch": 10.541271989174561,
"eval_cer": 0.06880240808428295,
"eval_loss": 0.23144930601119995,
"eval_runtime": 3.3192,
"eval_samples_per_second": 16.57,
"eval_steps_per_second": 2.109,
"eval_wer": 0.2680140597539543,
"step": 3900
},
{
"epoch": 10.81190798376184,
"grad_norm": 9.186676025390625,
"learning_rate": 0.00019366363636363635,
"loss": 0.243,
"step": 4000
},
{
"epoch": 10.81190798376184,
"eval_cer": 0.0707374758116534,
"eval_loss": 0.20184335112571716,
"eval_runtime": 3.2475,
"eval_samples_per_second": 16.936,
"eval_steps_per_second": 2.155,
"eval_wer": 0.2504393673110721,
"step": 4000
},
{
"epoch": 11.081190798376184,
"grad_norm": 3.3094773292541504,
"learning_rate": 0.00019093636363636364,
"loss": 0.2188,
"step": 4100
},
{
"epoch": 11.081190798376184,
"eval_cer": 0.06299720490217157,
"eval_loss": 0.18349941074848175,
"eval_runtime": 3.2855,
"eval_samples_per_second": 16.74,
"eval_steps_per_second": 2.131,
"eval_wer": 0.24077328646748683,
"step": 4100
},
{
"epoch": 11.351826792963465,
"grad_norm": 4.869017124176025,
"learning_rate": 0.00018820909090909088,
"loss": 0.2129,
"step": 4200
},
{
"epoch": 11.351826792963465,
"eval_cer": 0.0627821973769082,
"eval_loss": 0.19908010959625244,
"eval_runtime": 3.2946,
"eval_samples_per_second": 16.694,
"eval_steps_per_second": 2.125,
"eval_wer": 0.2460456942003515,
"step": 4200
},
{
"epoch": 11.622462787550745,
"grad_norm": 3.1255884170532227,
"learning_rate": 0.00018548181818181818,
"loss": 0.2351,
"step": 4300
},
{
"epoch": 11.622462787550745,
"eval_cer": 0.0653622876800688,
"eval_loss": 0.21975572407245636,
"eval_runtime": 3.3112,
"eval_samples_per_second": 16.61,
"eval_steps_per_second": 2.114,
"eval_wer": 0.27065026362038663,
"step": 4300
},
{
"epoch": 11.893098782138024,
"grad_norm": 2.200582265853882,
"learning_rate": 0.00018275454545454544,
"loss": 0.218,
"step": 4400
},
{
"epoch": 11.893098782138024,
"eval_cer": 0.06858740055901956,
"eval_loss": 0.26052218675613403,
"eval_runtime": 3.1812,
"eval_samples_per_second": 17.289,
"eval_steps_per_second": 2.2,
"eval_wer": 0.27943760984182775,
"step": 4400
},
{
"epoch": 12.162381596752368,
"grad_norm": 3.319242477416992,
"learning_rate": 0.0001800272727272727,
"loss": 0.2202,
"step": 4500
},
{
"epoch": 12.162381596752368,
"eval_cer": 0.058052031821113736,
"eval_loss": 0.21814635396003723,
"eval_runtime": 3.272,
"eval_samples_per_second": 16.809,
"eval_steps_per_second": 2.139,
"eval_wer": 0.23286467486818982,
"step": 4500
},
{
"epoch": 12.433017591339649,
"grad_norm": 1.2420411109924316,
"learning_rate": 0.00017729999999999997,
"loss": 0.1962,
"step": 4600
},
{
"epoch": 12.433017591339649,
"eval_cer": 0.05568694904321651,
"eval_loss": 0.1839354932308197,
"eval_runtime": 3.2938,
"eval_samples_per_second": 16.698,
"eval_steps_per_second": 2.125,
"eval_wer": 0.23022847100175747,
"step": 4600
},
{
"epoch": 12.703653585926928,
"grad_norm": 0.9907166361808777,
"learning_rate": 0.00017457272727272727,
"loss": 0.2129,
"step": 4700
},
{
"epoch": 12.703653585926928,
"eval_cer": 0.05762201677058697,
"eval_loss": 0.1895829439163208,
"eval_runtime": 3.2727,
"eval_samples_per_second": 16.806,
"eval_steps_per_second": 2.139,
"eval_wer": 0.22671353251318102,
"step": 4700
},
{
"epoch": 12.974289580514208,
"grad_norm": 2.0261332988739014,
"learning_rate": 0.00017184545454545453,
"loss": 0.2074,
"step": 4800
},
{
"epoch": 12.974289580514208,
"eval_cer": 0.06063212212427435,
"eval_loss": 0.2010865956544876,
"eval_runtime": 3.2907,
"eval_samples_per_second": 16.714,
"eval_steps_per_second": 2.127,
"eval_wer": 0.2460456942003515,
"step": 4800
},
{
"epoch": 13.243572395128552,
"grad_norm": 4.432494640350342,
"learning_rate": 0.0001691181818181818,
"loss": 0.1813,
"step": 4900
},
{
"epoch": 13.243572395128552,
"eval_cer": 0.06106213717480112,
"eval_loss": 0.181858628988266,
"eval_runtime": 3.2727,
"eval_samples_per_second": 16.806,
"eval_steps_per_second": 2.139,
"eval_wer": 0.20913884007029876,
"step": 4900
},
{
"epoch": 13.514208389715833,
"grad_norm": 2.20430850982666,
"learning_rate": 0.00016639090909090906,
"loss": 0.1872,
"step": 5000
},
{
"epoch": 13.514208389715833,
"eval_cer": 0.06557729520533219,
"eval_loss": 0.21534210443496704,
"eval_runtime": 3.2649,
"eval_samples_per_second": 16.846,
"eval_steps_per_second": 2.144,
"eval_wer": 0.2601054481546573,
"step": 5000
},
{
"epoch": 13.784844384303112,
"grad_norm": 3.0091655254364014,
"learning_rate": 0.00016366363636363635,
"loss": 0.1947,
"step": 5100
},
{
"epoch": 13.784844384303112,
"eval_cer": 0.06880240808428295,
"eval_loss": 0.2198285013437271,
"eval_runtime": 3.2966,
"eval_samples_per_second": 16.684,
"eval_steps_per_second": 2.123,
"eval_wer": 0.2671353251318102,
"step": 5100
},
{
"epoch": 14.054127198917456,
"grad_norm": 1.1179289817810059,
"learning_rate": 0.00016093636363636362,
"loss": 0.2001,
"step": 5200
},
{
"epoch": 14.054127198917456,
"eval_cer": 0.06772737045796604,
"eval_loss": 0.21502529084682465,
"eval_runtime": 3.2826,
"eval_samples_per_second": 16.755,
"eval_steps_per_second": 2.132,
"eval_wer": 0.2618629173989455,
"step": 5200
},
{
"epoch": 14.324763193504737,
"grad_norm": 0.8994645476341248,
"learning_rate": 0.00015820909090909089,
"loss": 0.1799,
"step": 5300
},
{
"epoch": 14.324763193504737,
"eval_cer": 0.05160180606321221,
"eval_loss": 0.1535176932811737,
"eval_runtime": 3.2382,
"eval_samples_per_second": 16.985,
"eval_steps_per_second": 2.162,
"eval_wer": 0.18541300527240773,
"step": 5300
},
{
"epoch": 14.595399188092017,
"grad_norm": 3.489891529083252,
"learning_rate": 0.00015548181818181815,
"loss": 0.1813,
"step": 5400
},
{
"epoch": 14.595399188092017,
"eval_cer": 0.06514728015480542,
"eval_loss": 0.21499599516391754,
"eval_runtime": 3.2165,
"eval_samples_per_second": 17.099,
"eval_steps_per_second": 2.176,
"eval_wer": 0.2565905096660808,
"step": 5400
},
{
"epoch": 14.866035182679296,
"grad_norm": 2.1778228282928467,
"learning_rate": 0.00015275454545454544,
"loss": 0.1812,
"step": 5500
},
{
"epoch": 14.866035182679296,
"eval_cer": 0.05783702429585035,
"eval_loss": 0.18473133444786072,
"eval_runtime": 3.2425,
"eval_samples_per_second": 16.962,
"eval_steps_per_second": 2.159,
"eval_wer": 0.23198594024604569,
"step": 5500
},
{
"epoch": 15.13531799729364,
"grad_norm": 9.761114120483398,
"learning_rate": 0.00015002727272727274,
"loss": 0.1932,
"step": 5600
},
{
"epoch": 15.13531799729364,
"eval_cer": 0.0518168135884756,
"eval_loss": 0.16397376358509064,
"eval_runtime": 3.2727,
"eval_samples_per_second": 16.805,
"eval_steps_per_second": 2.139,
"eval_wer": 0.20913884007029876,
"step": 5600
},
{
"epoch": 15.40595399188092,
"grad_norm": 8.160325050354004,
"learning_rate": 0.00014729999999999998,
"loss": 0.169,
"step": 5700
},
{
"epoch": 15.40595399188092,
"eval_cer": 0.06751236293270264,
"eval_loss": 0.2461504340171814,
"eval_runtime": 3.3466,
"eval_samples_per_second": 16.435,
"eval_steps_per_second": 2.092,
"eval_wer": 0.24868189806678384,
"step": 5700
},
{
"epoch": 15.6765899864682,
"grad_norm": 7.290473937988281,
"learning_rate": 0.00014457272727272727,
"loss": 0.1704,
"step": 5800
},
{
"epoch": 15.6765899864682,
"eval_cer": 0.06020210707374758,
"eval_loss": 0.1913210153579712,
"eval_runtime": 3.328,
"eval_samples_per_second": 16.526,
"eval_steps_per_second": 2.103,
"eval_wer": 0.23374340949033393,
"step": 5800
},
{
"epoch": 15.94722598105548,
"grad_norm": 9.403716087341309,
"learning_rate": 0.00014184545454545453,
"loss": 0.1692,
"step": 5900
},
{
"epoch": 15.94722598105548,
"eval_cer": 0.06579230273059557,
"eval_loss": 0.2216808795928955,
"eval_runtime": 3.2705,
"eval_samples_per_second": 16.817,
"eval_steps_per_second": 2.14,
"eval_wer": 0.2390158172231986,
"step": 5900
},
{
"epoch": 16.216508795669824,
"grad_norm": 1.2416729927062988,
"learning_rate": 0.0001391181818181818,
"loss": 0.133,
"step": 6000
},
{
"epoch": 16.216508795669824,
"eval_cer": 0.0599870995484842,
"eval_loss": 0.19244541227817535,
"eval_runtime": 3.3093,
"eval_samples_per_second": 16.62,
"eval_steps_per_second": 2.115,
"eval_wer": 0.2390158172231986,
"step": 6000
},
{
"epoch": 16.487144790257105,
"grad_norm": 2.919238567352295,
"learning_rate": 0.00013639090909090906,
"loss": 0.1529,
"step": 6100
},
{
"epoch": 16.487144790257105,
"eval_cer": 0.054826918942162975,
"eval_loss": 0.160396009683609,
"eval_runtime": 3.2653,
"eval_samples_per_second": 16.844,
"eval_steps_per_second": 2.144,
"eval_wer": 0.20738137082601055,
"step": 6100
},
{
"epoch": 16.757780784844385,
"grad_norm": 1.1939417123794556,
"learning_rate": 0.00013366363636363636,
"loss": 0.1586,
"step": 6200
},
{
"epoch": 16.757780784844385,
"eval_cer": 0.05547194151795313,
"eval_loss": 0.18521469831466675,
"eval_runtime": 3.3238,
"eval_samples_per_second": 16.547,
"eval_steps_per_second": 2.106,
"eval_wer": 0.22231985940246046,
"step": 6200
},
{
"epoch": 17.027063599458728,
"grad_norm": 0.6369737386703491,
"learning_rate": 0.00013093636363636362,
"loss": 0.1821,
"step": 6300
},
{
"epoch": 17.027063599458728,
"eval_cer": 0.05009675338636852,
"eval_loss": 0.162165105342865,
"eval_runtime": 3.3026,
"eval_samples_per_second": 16.654,
"eval_steps_per_second": 2.12,
"eval_wer": 0.20913884007029876,
"step": 6300
},
{
"epoch": 17.29769959404601,
"grad_norm": 1.01472806930542,
"learning_rate": 0.00012820909090909092,
"loss": 0.1425,
"step": 6400
},
{
"epoch": 17.29769959404601,
"eval_cer": 0.054826918942162975,
"eval_loss": 0.1956459879875183,
"eval_runtime": 3.316,
"eval_samples_per_second": 16.586,
"eval_steps_per_second": 2.111,
"eval_wer": 0.23286467486818982,
"step": 6400
},
{
"epoch": 17.56833558863329,
"grad_norm": 1.0718566179275513,
"learning_rate": 0.00012548181818181818,
"loss": 0.1538,
"step": 6500
},
{
"epoch": 17.56833558863329,
"eval_cer": 0.058052031821113736,
"eval_loss": 0.19568397104740143,
"eval_runtime": 3.2443,
"eval_samples_per_second": 16.953,
"eval_steps_per_second": 2.158,
"eval_wer": 0.24165202108963094,
"step": 6500
},
{
"epoch": 17.83897158322057,
"grad_norm": 0.5153430104255676,
"learning_rate": 0.00012275454545454545,
"loss": 0.1395,
"step": 6600
},
{
"epoch": 17.83897158322057,
"eval_cer": 0.05826703934637712,
"eval_loss": 0.19457882642745972,
"eval_runtime": 3.2924,
"eval_samples_per_second": 16.705,
"eval_steps_per_second": 2.126,
"eval_wer": 0.2398945518453427,
"step": 6600
},
{
"epoch": 18.108254397834912,
"grad_norm": 1.9209911823272705,
"learning_rate": 0.00012002727272727273,
"loss": 0.1331,
"step": 6700
},
{
"epoch": 18.108254397834912,
"eval_cer": 0.05869705439690389,
"eval_loss": 0.226872980594635,
"eval_runtime": 3.3195,
"eval_samples_per_second": 16.569,
"eval_steps_per_second": 2.109,
"eval_wer": 0.2539543057996485,
"step": 6700
},
{
"epoch": 18.378890392422193,
"grad_norm": 4.072175979614258,
"learning_rate": 0.00011729999999999999,
"loss": 0.1242,
"step": 6800
},
{
"epoch": 18.378890392422193,
"eval_cer": 0.05826703934637712,
"eval_loss": 0.2108573615550995,
"eval_runtime": 3.2958,
"eval_samples_per_second": 16.688,
"eval_steps_per_second": 2.124,
"eval_wer": 0.23637961335676624,
"step": 6800
},
{
"epoch": 18.649526387009473,
"grad_norm": 2.9033162593841553,
"learning_rate": 0.00011457272727272727,
"loss": 0.1323,
"step": 6900
},
{
"epoch": 18.649526387009473,
"eval_cer": 0.054396903891636206,
"eval_loss": 0.20044924318790436,
"eval_runtime": 3.2775,
"eval_samples_per_second": 16.781,
"eval_steps_per_second": 2.136,
"eval_wer": 0.22759226713532513,
"step": 6900
},
{
"epoch": 18.920162381596754,
"grad_norm": 11.889602661132812,
"learning_rate": 0.00011184545454545454,
"loss": 0.1384,
"step": 7000
},
{
"epoch": 18.920162381596754,
"eval_cer": 0.05697699419479682,
"eval_loss": 0.20252789556980133,
"eval_runtime": 3.2287,
"eval_samples_per_second": 17.035,
"eval_steps_per_second": 2.168,
"eval_wer": 0.2451669595782074,
"step": 7000
},
{
"epoch": 19.189445196211096,
"grad_norm": 1.1512547731399536,
"learning_rate": 0.00010911818181818182,
"loss": 0.1294,
"step": 7100
},
{
"epoch": 19.189445196211096,
"eval_cer": 0.05418189636637282,
"eval_loss": 0.21085655689239502,
"eval_runtime": 3.2121,
"eval_samples_per_second": 17.123,
"eval_steps_per_second": 2.179,
"eval_wer": 0.2390158172231986,
"step": 7100
},
{
"epoch": 19.460081190798377,
"grad_norm": 0.6903840899467468,
"learning_rate": 0.00010639090909090908,
"loss": 0.1279,
"step": 7200
},
{
"epoch": 19.460081190798377,
"eval_cer": 0.04794667813373468,
"eval_loss": 0.16093921661376953,
"eval_runtime": 3.2295,
"eval_samples_per_second": 17.031,
"eval_steps_per_second": 2.168,
"eval_wer": 0.19156414762741653,
"step": 7200
},
{
"epoch": 19.730717185385657,
"grad_norm": 1.4991930723190308,
"learning_rate": 0.00010366363636363636,
"loss": 0.122,
"step": 7300
},
{
"epoch": 19.730717185385657,
"eval_cer": 0.06063212212427435,
"eval_loss": 0.24742065370082855,
"eval_runtime": 3.295,
"eval_samples_per_second": 16.692,
"eval_steps_per_second": 2.124,
"eval_wer": 0.2539543057996485,
"step": 7300
},
{
"epoch": 20.0,
"grad_norm": 4.862514019012451,
"learning_rate": 0.00010093636363636363,
"loss": 0.1222,
"step": 7400
},
{
"epoch": 20.0,
"eval_cer": 0.050311760911631906,
"eval_loss": 0.17492927610874176,
"eval_runtime": 3.2165,
"eval_samples_per_second": 17.099,
"eval_steps_per_second": 2.176,
"eval_wer": 0.1968365553602812,
"step": 7400
},
{
"epoch": 20.27063599458728,
"grad_norm": 10.719727516174316,
"learning_rate": 9.82090909090909e-05,
"loss": 0.1121,
"step": 7500
},
{
"epoch": 20.27063599458728,
"eval_cer": 0.05869705439690389,
"eval_loss": 0.24458029866218567,
"eval_runtime": 3.2693,
"eval_samples_per_second": 16.823,
"eval_steps_per_second": 2.141,
"eval_wer": 0.2513181019332162,
"step": 7500
},
{
"epoch": 20.54127198917456,
"grad_norm": 6.036056995391846,
"learning_rate": 9.548181818181817e-05,
"loss": 0.1142,
"step": 7600
},
{
"epoch": 20.54127198917456,
"eval_cer": 0.05332186626531928,
"eval_loss": 0.19322967529296875,
"eval_runtime": 3.297,
"eval_samples_per_second": 16.682,
"eval_steps_per_second": 2.123,
"eval_wer": 0.22319859402460457,
"step": 7600
},
{
"epoch": 20.81190798376184,
"grad_norm": 5.935936450958252,
"learning_rate": 9.275454545454544e-05,
"loss": 0.1226,
"step": 7700
},
{
"epoch": 20.81190798376184,
"eval_cer": 0.05353687379058267,
"eval_loss": 0.2194850891828537,
"eval_runtime": 3.272,
"eval_samples_per_second": 16.809,
"eval_steps_per_second": 2.139,
"eval_wer": 0.23198594024604569,
"step": 7700
},
{
"epoch": 21.081190798376184,
"grad_norm": 1.9924638271331787,
"learning_rate": 9.002727272727272e-05,
"loss": 0.1052,
"step": 7800
},
{
"epoch": 21.081190798376184,
"eval_cer": 0.05095678348742206,
"eval_loss": 0.20277012884616852,
"eval_runtime": 3.2836,
"eval_samples_per_second": 16.75,
"eval_steps_per_second": 2.132,
"eval_wer": 0.22231985940246046,
"step": 7800
},
{
"epoch": 21.351826792963465,
"grad_norm": 5.319161891937256,
"learning_rate": 8.729999999999998e-05,
"loss": 0.1077,
"step": 7900
},
{
"epoch": 21.351826792963465,
"eval_cer": 0.05353687379058267,
"eval_loss": 0.18649469316005707,
"eval_runtime": 3.264,
"eval_samples_per_second": 16.85,
"eval_steps_per_second": 2.145,
"eval_wer": 0.210896309314587,
"step": 7900
},
{
"epoch": 21.622462787550745,
"grad_norm": 2.034726619720459,
"learning_rate": 8.457272727272726e-05,
"loss": 0.114,
"step": 8000
},
{
"epoch": 21.622462787550745,
"eval_cer": 0.04687164050741776,
"eval_loss": 0.17550522089004517,
"eval_runtime": 3.2743,
"eval_samples_per_second": 16.798,
"eval_steps_per_second": 2.138,
"eval_wer": 0.21704745166959577,
"step": 8000
},
{
"epoch": 21.893098782138026,
"grad_norm": 1.0931280851364136,
"learning_rate": 8.184545454545453e-05,
"loss": 0.1041,
"step": 8100
},
{
"epoch": 21.893098782138026,
"eval_cer": 0.04579660288110084,
"eval_loss": 0.1914069801568985,
"eval_runtime": 3.2452,
"eval_samples_per_second": 16.948,
"eval_steps_per_second": 2.157,
"eval_wer": 0.2117750439367311,
"step": 8100
},
{
"epoch": 22.16238159675237,
"grad_norm": 3.6217257976531982,
"learning_rate": 7.911818181818182e-05,
"loss": 0.1149,
"step": 8200
},
{
"epoch": 22.16238159675237,
"eval_cer": 0.0477316706084713,
"eval_loss": 0.18972058594226837,
"eval_runtime": 3.2691,
"eval_samples_per_second": 16.824,
"eval_steps_per_second": 2.141,
"eval_wer": 0.2179261862917399,
"step": 8200
},
{
"epoch": 22.43301759133965,
"grad_norm": 1.2710479497909546,
"learning_rate": 7.63909090909091e-05,
"loss": 0.0984,
"step": 8300
},
{
"epoch": 22.43301759133965,
"eval_cer": 0.05203182111373898,
"eval_loss": 0.21701879799365997,
"eval_runtime": 3.3005,
"eval_samples_per_second": 16.664,
"eval_steps_per_second": 2.121,
"eval_wer": 0.22934973637961337,
"step": 8300
},
{
"epoch": 22.70365358592693,
"grad_norm": 0.5214836597442627,
"learning_rate": 7.366363636363635e-05,
"loss": 0.0974,
"step": 8400
},
{
"epoch": 22.70365358592693,
"eval_cer": 0.046226617931627606,
"eval_loss": 0.1713176965713501,
"eval_runtime": 3.3717,
"eval_samples_per_second": 16.312,
"eval_steps_per_second": 2.076,
"eval_wer": 0.1968365553602812,
"step": 8400
},
{
"epoch": 22.97428958051421,
"grad_norm": 2.3021202087402344,
"learning_rate": 7.093636363636363e-05,
"loss": 0.1052,
"step": 8500
},
{
"epoch": 22.97428958051421,
"eval_cer": 0.04837669318426145,
"eval_loss": 0.17614266276359558,
"eval_runtime": 3.5916,
"eval_samples_per_second": 15.313,
"eval_steps_per_second": 1.949,
"eval_wer": 0.21441124780316345,
"step": 8500
},
{
"epoch": 23.243572395128552,
"grad_norm": 3.924398183822632,
"learning_rate": 6.82090909090909e-05,
"loss": 0.0892,
"step": 8600
},
{
"epoch": 23.243572395128552,
"eval_cer": 0.04429155020425715,
"eval_loss": 0.14997613430023193,
"eval_runtime": 3.344,
"eval_samples_per_second": 16.448,
"eval_steps_per_second": 2.093,
"eval_wer": 0.18629173989455183,
"step": 8600
},
{
"epoch": 23.514208389715833,
"grad_norm": 1.1947544813156128,
"learning_rate": 6.548181818181817e-05,
"loss": 0.0919,
"step": 8700
},
{
"epoch": 23.514208389715833,
"eval_cer": 0.04601161040636422,
"eval_loss": 0.15268389880657196,
"eval_runtime": 3.3038,
"eval_samples_per_second": 16.648,
"eval_steps_per_second": 2.119,
"eval_wer": 0.19859402460456943,
"step": 8700
},
{
"epoch": 23.784844384303113,
"grad_norm": 1.3969401121139526,
"learning_rate": 6.275454545454545e-05,
"loss": 0.0956,
"step": 8800
},
{
"epoch": 23.784844384303113,
"eval_cer": 0.0449365727800473,
"eval_loss": 0.15815618634223938,
"eval_runtime": 3.2819,
"eval_samples_per_second": 16.759,
"eval_steps_per_second": 2.133,
"eval_wer": 0.1968365553602812,
"step": 8800
},
{
"epoch": 24.054127198917456,
"grad_norm": 2.335787773132324,
"learning_rate": 6.0027272727272725e-05,
"loss": 0.1,
"step": 8900
},
{
"epoch": 24.054127198917456,
"eval_cer": 0.04536658783057407,
"eval_loss": 0.17150932550430298,
"eval_runtime": 3.246,
"eval_samples_per_second": 16.944,
"eval_steps_per_second": 2.157,
"eval_wer": 0.20123022847100175,
"step": 8900
},
{
"epoch": 24.324763193504737,
"grad_norm": 1.6086431741714478,
"learning_rate": 5.73e-05,
"loss": 0.0846,
"step": 9000
},
{
"epoch": 24.324763193504737,
"eval_cer": 0.04579660288110084,
"eval_loss": 0.18286831676959991,
"eval_runtime": 3.2566,
"eval_samples_per_second": 16.889,
"eval_steps_per_second": 2.149,
"eval_wer": 0.20298769771529,
"step": 9000
},
{
"epoch": 24.595399188092017,
"grad_norm": 1.6925404071807861,
"learning_rate": 5.457272727272727e-05,
"loss": 0.083,
"step": 9100
},
{
"epoch": 24.595399188092017,
"eval_cer": 0.04536658783057407,
"eval_loss": 0.17126092314720154,
"eval_runtime": 3.2285,
"eval_samples_per_second": 17.036,
"eval_steps_per_second": 2.168,
"eval_wer": 0.20650263620386644,
"step": 9100
},
{
"epoch": 24.866035182679298,
"grad_norm": 1.5034745931625366,
"learning_rate": 5.184545454545454e-05,
"loss": 0.0821,
"step": 9200
},
{
"epoch": 24.866035182679298,
"eval_cer": 0.04988174586110514,
"eval_loss": 0.19512753188610077,
"eval_runtime": 3.2263,
"eval_samples_per_second": 17.048,
"eval_steps_per_second": 2.17,
"eval_wer": 0.22231985940246046,
"step": 9200
},
{
"epoch": 25.13531799729364,
"grad_norm": 6.091914653778076,
"learning_rate": 4.9118181818181814e-05,
"loss": 0.0917,
"step": 9300
},
{
"epoch": 25.13531799729364,
"eval_cer": 0.04730165555794453,
"eval_loss": 0.18300001323223114,
"eval_runtime": 3.2216,
"eval_samples_per_second": 17.072,
"eval_steps_per_second": 2.173,
"eval_wer": 0.2100175746924429,
"step": 9300
},
{
"epoch": 25.40595399188092,
"grad_norm": 5.720769882202148,
"learning_rate": 4.6390909090909086e-05,
"loss": 0.0806,
"step": 9400
},
{
"epoch": 25.40595399188092,
"eval_cer": 0.0477316706084713,
"eval_loss": 0.17701777815818787,
"eval_runtime": 3.3053,
"eval_samples_per_second": 16.64,
"eval_steps_per_second": 2.118,
"eval_wer": 0.21353251318101935,
"step": 9400
},
{
"epoch": 25.6765899864682,
"grad_norm": 7.1992998123168945,
"learning_rate": 4.366363636363636e-05,
"loss": 0.0756,
"step": 9500
},
{
"epoch": 25.6765899864682,
"eval_cer": 0.04816168565899807,
"eval_loss": 0.1869199126958847,
"eval_runtime": 3.2573,
"eval_samples_per_second": 16.885,
"eval_steps_per_second": 2.149,
"eval_wer": 0.21441124780316345,
"step": 9500
},
{
"epoch": 25.94722598105548,
"grad_norm": 9.373414039611816,
"learning_rate": 4.093636363636364e-05,
"loss": 0.0807,
"step": 9600
},
{
"epoch": 25.94722598105548,
"eval_cer": 0.0490217157600516,
"eval_loss": 0.17646533250808716,
"eval_runtime": 3.2627,
"eval_samples_per_second": 16.857,
"eval_steps_per_second": 2.145,
"eval_wer": 0.20210896309314588,
"step": 9600
},
{
"epoch": 26.216508795669824,
"grad_norm": 1.0124870538711548,
"learning_rate": 3.820909090909091e-05,
"loss": 0.059,
"step": 9700
},
{
"epoch": 26.216508795669824,
"eval_cer": 0.051171791012685444,
"eval_loss": 0.20564059913158417,
"eval_runtime": 3.2967,
"eval_samples_per_second": 16.683,
"eval_steps_per_second": 2.123,
"eval_wer": 0.22407732864674867,
"step": 9700
},
{
"epoch": 26.487144790257105,
"grad_norm": 1.1935291290283203,
"learning_rate": 3.548181818181818e-05,
"loss": 0.0831,
"step": 9800
},
{
"epoch": 26.487144790257105,
"eval_cer": 0.05009675338636852,
"eval_loss": 0.1965063065290451,
"eval_runtime": 3.3021,
"eval_samples_per_second": 16.656,
"eval_steps_per_second": 2.12,
"eval_wer": 0.22056239015817222,
"step": 9800
},
{
"epoch": 26.757780784844385,
"grad_norm": 1.9065356254577637,
"learning_rate": 3.2754545454545455e-05,
"loss": 0.0647,
"step": 9900
},
{
"epoch": 26.757780784844385,
"eval_cer": 0.04751666308320791,
"eval_loss": 0.1832188367843628,
"eval_runtime": 3.2986,
"eval_samples_per_second": 16.674,
"eval_steps_per_second": 2.122,
"eval_wer": 0.20826010544815465,
"step": 9900
},
{
"epoch": 27.027063599458728,
"grad_norm": 0.8792353868484497,
"learning_rate": 3.0027272727272724e-05,
"loss": 0.0788,
"step": 10000
},
{
"epoch": 27.027063599458728,
"eval_cer": 0.046656632982154375,
"eval_loss": 0.17078326642513275,
"eval_runtime": 3.3039,
"eval_samples_per_second": 16.647,
"eval_steps_per_second": 2.119,
"eval_wer": 0.19859402460456943,
"step": 10000
},
{
"epoch": 27.29769959404601,
"grad_norm": 0.8240686655044556,
"learning_rate": 2.7299999999999996e-05,
"loss": 0.0677,
"step": 10100
},
{
"epoch": 27.29769959404601,
"eval_cer": 0.0449365727800473,
"eval_loss": 0.16915105283260345,
"eval_runtime": 3.2712,
"eval_samples_per_second": 16.813,
"eval_steps_per_second": 2.14,
"eval_wer": 0.1827768014059754,
"step": 10100
},
{
"epoch": 27.56833558863329,
"grad_norm": 0.5428586006164551,
"learning_rate": 2.457272727272727e-05,
"loss": 0.0651,
"step": 10200
},
{
"epoch": 27.56833558863329,
"eval_cer": 0.04644162545689099,
"eval_loss": 0.17613492906093597,
"eval_runtime": 3.2478,
"eval_samples_per_second": 16.935,
"eval_steps_per_second": 2.155,
"eval_wer": 0.20210896309314588,
"step": 10200
},
{
"epoch": 27.83897158322057,
"grad_norm": 0.8122203946113586,
"learning_rate": 2.1845454545454544e-05,
"loss": 0.0602,
"step": 10300
},
{
"epoch": 27.83897158322057,
"eval_cer": 0.046656632982154375,
"eval_loss": 0.16123421490192413,
"eval_runtime": 3.2919,
"eval_samples_per_second": 16.708,
"eval_steps_per_second": 2.126,
"eval_wer": 0.18804920913884007,
"step": 10300
},
{
"epoch": 28.108254397834912,
"grad_norm": 1.2256019115447998,
"learning_rate": 1.9118181818181817e-05,
"loss": 0.0586,
"step": 10400
},
{
"epoch": 28.108254397834912,
"eval_cer": 0.04579660288110084,
"eval_loss": 0.17519748210906982,
"eval_runtime": 3.2923,
"eval_samples_per_second": 16.706,
"eval_steps_per_second": 2.126,
"eval_wer": 0.19947275922671354,
"step": 10400
},
{
"epoch": 28.378890392422193,
"grad_norm": 1.0472385883331299,
"learning_rate": 1.639090909090909e-05,
"loss": 0.0602,
"step": 10500
},
{
"epoch": 28.378890392422193,
"eval_cer": 0.04536658783057407,
"eval_loss": 0.17338904738426208,
"eval_runtime": 3.2664,
"eval_samples_per_second": 16.838,
"eval_steps_per_second": 2.143,
"eval_wer": 0.19507908611599298,
"step": 10500
},
{
"epoch": 28.649526387009473,
"grad_norm": 1.4401935338974,
"learning_rate": 1.3663636363636363e-05,
"loss": 0.0636,
"step": 10600
},
{
"epoch": 28.649526387009473,
"eval_cer": 0.04579660288110084,
"eval_loss": 0.1733109951019287,
"eval_runtime": 3.3348,
"eval_samples_per_second": 16.493,
"eval_steps_per_second": 2.099,
"eval_wer": 0.20210896309314588,
"step": 10600
},
{
"epoch": 28.920162381596754,
"grad_norm": 0.8489630818367004,
"learning_rate": 1.0936363636363635e-05,
"loss": 0.0642,
"step": 10700
},
{
"epoch": 28.920162381596754,
"eval_cer": 0.047086648032681144,
"eval_loss": 0.17440861463546753,
"eval_runtime": 3.3022,
"eval_samples_per_second": 16.656,
"eval_steps_per_second": 2.12,
"eval_wer": 0.2056239015817223,
"step": 10700
},
{
"epoch": 29.189445196211096,
"grad_norm": 1.0616735219955444,
"learning_rate": 8.20909090909091e-06,
"loss": 0.0611,
"step": 10800
},
{
"epoch": 29.189445196211096,
"eval_cer": 0.04601161040636422,
"eval_loss": 0.17405055463314056,
"eval_runtime": 3.2326,
"eval_samples_per_second": 17.014,
"eval_steps_per_second": 2.165,
"eval_wer": 0.20298769771529,
"step": 10800
},
{
"epoch": 29.460081190798377,
"grad_norm": 1.259354591369629,
"learning_rate": 5.481818181818182e-06,
"loss": 0.0596,
"step": 10900
},
{
"epoch": 29.460081190798377,
"eval_cer": 0.0449365727800473,
"eval_loss": 0.17219401895999908,
"eval_runtime": 3.1719,
"eval_samples_per_second": 17.34,
"eval_steps_per_second": 2.207,
"eval_wer": 0.19507908611599298,
"step": 10900
},
{
"epoch": 29.730717185385657,
"grad_norm": 0.4698294401168823,
"learning_rate": 2.754545454545454e-06,
"loss": 0.0627,
"step": 11000
},
{
"epoch": 29.730717185385657,
"eval_cer": 0.0449365727800473,
"eval_loss": 0.17380425333976746,
"eval_runtime": 3.2854,
"eval_samples_per_second": 16.741,
"eval_steps_per_second": 2.131,
"eval_wer": 0.19859402460456943,
"step": 11000
},
{
"epoch": 30.0,
"grad_norm": 9.292856216430664,
"learning_rate": 2.727272727272727e-08,
"loss": 0.0623,
"step": 11100
},
{
"epoch": 30.0,
"eval_cer": 0.0449365727800473,
"eval_loss": 0.17295825481414795,
"eval_runtime": 3.2552,
"eval_samples_per_second": 16.896,
"eval_steps_per_second": 2.15,
"eval_wer": 0.19859402460456943,
"step": 11100
},
{
"epoch": 30.0,
"step": 11100,
"total_flos": 1.8673488922198168e+19,
"train_loss": 0.3048185482111063,
"train_runtime": 12419.9418,
"train_samples_per_second": 28.548,
"train_steps_per_second": 0.894
},
{
"epoch": 30.0,
"eval_cer": 0.04515158030531068,
"eval_loss": 0.17296089231967926,
"eval_runtime": 3.3418,
"eval_samples_per_second": 16.458,
"eval_steps_per_second": 2.095,
"eval_wer": 0.19859402460456943,
"step": 11100
}
],
"logging_steps": 100,
"max_steps": 11100,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.8673488922198168e+19,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}