{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 30.0, "eval_steps": 100, "global_step": 11100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2706359945872801, "grad_norm": 1.0425561666488647, "learning_rate": 0.00029699999999999996, "loss": 5.107, "step": 100 }, { "epoch": 0.2706359945872801, "eval_cer": 0.9907546764136744, "eval_loss": 2.8580877780914307, "eval_runtime": 3.2586, "eval_samples_per_second": 16.879, "eval_steps_per_second": 2.148, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.5412719891745602, "grad_norm": 1.5294588804244995, "learning_rate": 0.00029729999999999996, "loss": 2.696, "step": 200 }, { "epoch": 0.5412719891745602, "eval_cer": 0.700064502257579, "eval_loss": 2.3650922775268555, "eval_runtime": 3.2771, "eval_samples_per_second": 16.783, "eval_steps_per_second": 2.136, "eval_wer": 0.9815465729349736, "step": 200 }, { "epoch": 0.8119079837618404, "grad_norm": 4.284913539886475, "learning_rate": 0.0002945727272727273, "loss": 1.9594, "step": 300 }, { "epoch": 0.8119079837618404, "eval_cer": 0.39991399698989466, "eval_loss": 1.3402470350265503, "eval_runtime": 3.3075, "eval_samples_per_second": 16.629, "eval_steps_per_second": 2.116, "eval_wer": 0.8391915641476274, "step": 300 }, { "epoch": 1.0811907983761841, "grad_norm": 1.6875638961791992, "learning_rate": 0.0002918454545454545, "loss": 1.1816, "step": 400 }, { "epoch": 1.0811907983761841, "eval_cer": 0.19221672758546549, "eval_loss": 0.5386932492256165, "eval_runtime": 3.2713, "eval_samples_per_second": 16.813, "eval_steps_per_second": 2.14, "eval_wer": 0.5404217926186292, "step": 400 }, { "epoch": 1.3518267929634642, "grad_norm": 2.1928625106811523, "learning_rate": 0.0002891181818181818, "loss": 0.8925, "step": 500 }, { "epoch": 1.3518267929634642, "eval_cer": 0.1498602451085788, "eval_loss": 0.3904643654823303, "eval_runtime": 3.2769, "eval_samples_per_second": 16.784, "eval_steps_per_second": 2.136, "eval_wer": 0.45782073813708263, "step": 500 }, { "epoch": 1.6224627875507442, "grad_norm": 1.7728203535079956, "learning_rate": 0.0002863909090909091, "loss": 0.7373, "step": 600 }, { "epoch": 1.6224627875507442, "eval_cer": 0.11416899591485702, "eval_loss": 0.3346728980541229, "eval_runtime": 3.2076, "eval_samples_per_second": 17.147, "eval_steps_per_second": 2.182, "eval_wer": 0.37434094903339193, "step": 600 }, { "epoch": 1.8930987821380243, "grad_norm": 1.9392462968826294, "learning_rate": 0.00028366363636363634, "loss": 0.66, "step": 700 }, { "epoch": 1.8930987821380243, "eval_cer": 0.10728875510642873, "eval_loss": 0.29814502596855164, "eval_runtime": 3.2663, "eval_samples_per_second": 16.839, "eval_steps_per_second": 2.143, "eval_wer": 0.3242530755711775, "step": 700 }, { "epoch": 2.1623815967523683, "grad_norm": 1.3830772638320923, "learning_rate": 0.0002809363636363636, "loss": 0.6412, "step": 800 }, { "epoch": 2.1623815967523683, "eval_cer": 0.10040851429800043, "eval_loss": 0.2573491632938385, "eval_runtime": 3.2309, "eval_samples_per_second": 17.023, "eval_steps_per_second": 2.167, "eval_wer": 0.3312829525483304, "step": 800 }, { "epoch": 2.4330175913396483, "grad_norm": 1.7728252410888672, "learning_rate": 0.0002782090909090909, "loss": 0.5683, "step": 900 }, { "epoch": 2.4330175913396483, "eval_cer": 0.10578370242958504, "eval_loss": 0.2961094379425049, "eval_runtime": 3.236, "eval_samples_per_second": 16.996, "eval_steps_per_second": 2.163, "eval_wer": 0.35676625659050965, "step": 900 }, { "epoch": 2.7036535859269284, "grad_norm": 2.22818660736084, "learning_rate": 0.00027548181818181814, "loss": 0.5538, "step": 1000 }, { "epoch": 2.7036535859269284, "eval_cer": 0.1077187701569555, "eval_loss": 0.26289868354797363, "eval_runtime": 3.2491, "eval_samples_per_second": 16.928, "eval_steps_per_second": 2.154, "eval_wer": 0.3453427065026362, "step": 1000 }, { "epoch": 2.9742895805142084, "grad_norm": 2.420909881591797, "learning_rate": 0.00027275454545454546, "loss": 0.4991, "step": 1100 }, { "epoch": 2.9742895805142084, "eval_cer": 0.10427864975274134, "eval_loss": 0.2832812964916229, "eval_runtime": 3.3057, "eval_samples_per_second": 16.638, "eval_steps_per_second": 2.118, "eval_wer": 0.3312829525483304, "step": 1100 }, { "epoch": 3.243572395128552, "grad_norm": 5.234218120574951, "learning_rate": 0.00027002727272727267, "loss": 0.4536, "step": 1200 }, { "epoch": 3.243572395128552, "eval_cer": 0.08535798752956353, "eval_loss": 0.24891051650047302, "eval_runtime": 3.2773, "eval_samples_per_second": 16.782, "eval_steps_per_second": 2.136, "eval_wer": 0.3347978910369069, "step": 1200 }, { "epoch": 3.514208389715832, "grad_norm": 2.5880632400512695, "learning_rate": 0.0002673, "loss": 0.4621, "step": 1300 }, { "epoch": 3.514208389715832, "eval_cer": 0.09460331111588906, "eval_loss": 0.23712627589702606, "eval_runtime": 3.2761, "eval_samples_per_second": 16.788, "eval_steps_per_second": 2.137, "eval_wer": 0.3260105448154657, "step": 1300 }, { "epoch": 3.7848443843031125, "grad_norm": 3.540529251098633, "learning_rate": 0.00026457272727272726, "loss": 0.4401, "step": 1400 }, { "epoch": 3.7848443843031125, "eval_cer": 0.08600301010535369, "eval_loss": 0.21003246307373047, "eval_runtime": 3.3426, "eval_samples_per_second": 16.454, "eval_steps_per_second": 2.094, "eval_wer": 0.28295254833040423, "step": 1400 }, { "epoch": 4.054127198917456, "grad_norm": 1.8023126125335693, "learning_rate": 0.0002618454545454545, "loss": 0.4278, "step": 1500 }, { "epoch": 4.054127198917456, "eval_cer": 0.10105353687379058, "eval_loss": 0.2677291929721832, "eval_runtime": 3.2361, "eval_samples_per_second": 16.996, "eval_steps_per_second": 2.163, "eval_wer": 0.3321616871704745, "step": 1500 }, { "epoch": 4.324763193504737, "grad_norm": 3.478238821029663, "learning_rate": 0.0002591181818181818, "loss": 0.386, "step": 1600 }, { "epoch": 4.324763193504737, "eval_cer": 0.09030316061062137, "eval_loss": 0.24809867143630981, "eval_runtime": 3.2871, "eval_samples_per_second": 16.732, "eval_steps_per_second": 2.13, "eval_wer": 0.29789103690685415, "step": 1600 }, { "epoch": 4.595399188092016, "grad_norm": 2.037954568862915, "learning_rate": 0.00025639090909090905, "loss": 0.3943, "step": 1700 }, { "epoch": 4.595399188092016, "eval_cer": 0.09503332616641583, "eval_loss": 0.2835349440574646, "eval_runtime": 3.2426, "eval_samples_per_second": 16.962, "eval_steps_per_second": 2.159, "eval_wer": 0.31282952548330406, "step": 1700 }, { "epoch": 4.866035182679297, "grad_norm": 2.778028964996338, "learning_rate": 0.0002536636363636364, "loss": 0.3986, "step": 1800 }, { "epoch": 4.866035182679297, "eval_cer": 0.0707374758116534, "eval_loss": 0.17705407738685608, "eval_runtime": 3.3043, "eval_samples_per_second": 16.645, "eval_steps_per_second": 2.118, "eval_wer": 0.24253075571177504, "step": 1800 }, { "epoch": 5.13531799729364, "grad_norm": 10.435393333435059, "learning_rate": 0.00025093636363636364, "loss": 0.3996, "step": 1900 }, { "epoch": 5.13531799729364, "eval_cer": 0.0885831004085143, "eval_loss": 0.24632568657398224, "eval_runtime": 3.2851, "eval_samples_per_second": 16.742, "eval_steps_per_second": 2.131, "eval_wer": 0.3233743409490334, "step": 1900 }, { "epoch": 5.40595399188092, "grad_norm": 8.714759826660156, "learning_rate": 0.0002482090909090909, "loss": 0.3468, "step": 2000 }, { "epoch": 5.40595399188092, "eval_cer": 0.0870780477316706, "eval_loss": 0.2470923364162445, "eval_runtime": 3.3067, "eval_samples_per_second": 16.633, "eval_steps_per_second": 2.117, "eval_wer": 0.2521968365553603, "step": 2000 }, { "epoch": 5.6765899864682, "grad_norm": 7.187710285186768, "learning_rate": 0.00024548181818181817, "loss": 0.3429, "step": 2100 }, { "epoch": 5.6765899864682, "eval_cer": 0.09008815308535799, "eval_loss": 0.24462804198265076, "eval_runtime": 3.2272, "eval_samples_per_second": 17.043, "eval_steps_per_second": 2.169, "eval_wer": 0.30492091388400705, "step": 2100 }, { "epoch": 5.94722598105548, "grad_norm": 7.232232570648193, "learning_rate": 0.00024275454545454544, "loss": 0.3362, "step": 2200 }, { "epoch": 5.94722598105548, "eval_cer": 0.10234358202537089, "eval_loss": 0.2832447290420532, "eval_runtime": 3.2707, "eval_samples_per_second": 16.816, "eval_steps_per_second": 2.14, "eval_wer": 0.3216168717047452, "step": 2200 }, { "epoch": 6.216508795669824, "grad_norm": 2.5066192150115967, "learning_rate": 0.0002400272727272727, "loss": 0.3257, "step": 2300 }, { "epoch": 6.216508795669824, "eval_cer": 0.08922812298430445, "eval_loss": 0.2616831660270691, "eval_runtime": 3.289, "eval_samples_per_second": 16.722, "eval_steps_per_second": 2.128, "eval_wer": 0.2750439367311072, "step": 2300 }, { "epoch": 6.487144790257104, "grad_norm": 1.5899525880813599, "learning_rate": 0.0002373, "loss": 0.3088, "step": 2400 }, { "epoch": 6.487144790257104, "eval_cer": 0.08213287465061277, "eval_loss": 0.2477259337902069, "eval_runtime": 3.2212, "eval_samples_per_second": 17.074, "eval_steps_per_second": 2.173, "eval_wer": 0.2671353251318102, "step": 2400 }, { "epoch": 6.7577807848443845, "grad_norm": 1.64902925491333, "learning_rate": 0.00023457272727272723, "loss": 0.3092, "step": 2500 }, { "epoch": 6.7577807848443845, "eval_cer": 0.07288755106428725, "eval_loss": 0.16897092759609222, "eval_runtime": 3.2797, "eval_samples_per_second": 16.77, "eval_steps_per_second": 2.134, "eval_wer": 0.2126537785588752, "step": 2500 }, { "epoch": 7.027063599458728, "grad_norm": 3.107236862182617, "learning_rate": 0.00023184545454545453, "loss": 0.3247, "step": 2600 }, { "epoch": 7.027063599458728, "eval_cer": 0.0668673403569125, "eval_loss": 0.16091446578502655, "eval_runtime": 3.2437, "eval_samples_per_second": 16.956, "eval_steps_per_second": 2.158, "eval_wer": 0.24428822495606328, "step": 2600 }, { "epoch": 7.2976995940460085, "grad_norm": 1.4398540258407593, "learning_rate": 0.0002291181818181818, "loss": 0.2877, "step": 2700 }, { "epoch": 7.2976995940460085, "eval_cer": 0.081702859600086, "eval_loss": 0.21893317997455597, "eval_runtime": 3.3058, "eval_samples_per_second": 16.637, "eval_steps_per_second": 2.117, "eval_wer": 0.2697715289982425, "step": 2700 }, { "epoch": 7.568335588633288, "grad_norm": 1.3356603384017944, "learning_rate": 0.00022639090909090908, "loss": 0.2978, "step": 2800 }, { "epoch": 7.568335588633288, "eval_cer": 0.08406794237798323, "eval_loss": 0.21929292380809784, "eval_runtime": 3.3113, "eval_samples_per_second": 16.61, "eval_steps_per_second": 2.114, "eval_wer": 0.2697715289982425, "step": 2800 }, { "epoch": 7.838971583220569, "grad_norm": 1.0422672033309937, "learning_rate": 0.00022366363636363632, "loss": 0.3011, "step": 2900 }, { "epoch": 7.838971583220569, "eval_cer": 0.09051816813588476, "eval_loss": 0.22263768315315247, "eval_runtime": 3.2412, "eval_samples_per_second": 16.969, "eval_steps_per_second": 2.16, "eval_wer": 0.27768014059753954, "step": 2900 }, { "epoch": 8.108254397834912, "grad_norm": 3.0150039196014404, "learning_rate": 0.00022093636363636362, "loss": 0.2792, "step": 3000 }, { "epoch": 8.108254397834912, "eval_cer": 0.08772307030746077, "eval_loss": 0.24399547278881073, "eval_runtime": 3.2156, "eval_samples_per_second": 17.104, "eval_steps_per_second": 2.177, "eval_wer": 0.2750439367311072, "step": 3000 }, { "epoch": 8.378890392422193, "grad_norm": 3.422705888748169, "learning_rate": 0.00021820909090909088, "loss": 0.2652, "step": 3100 }, { "epoch": 8.378890392422193, "eval_cer": 0.08621801763061707, "eval_loss": 0.25809481739997864, "eval_runtime": 3.2041, "eval_samples_per_second": 17.166, "eval_steps_per_second": 2.185, "eval_wer": 0.2565905096660808, "step": 3100 }, { "epoch": 8.649526387009473, "grad_norm": 7.11945104598999, "learning_rate": 0.00021548181818181817, "loss": 0.2847, "step": 3200 }, { "epoch": 8.649526387009473, "eval_cer": 0.08385293485271984, "eval_loss": 0.2583249807357788, "eval_runtime": 3.2692, "eval_samples_per_second": 16.824, "eval_steps_per_second": 2.141, "eval_wer": 0.29876977152899825, "step": 3200 }, { "epoch": 8.920162381596752, "grad_norm": 4.319618225097656, "learning_rate": 0.0002127545454545454, "loss": 0.2788, "step": 3300 }, { "epoch": 8.920162381596752, "eval_cer": 0.078907761771662, "eval_loss": 0.19587865471839905, "eval_runtime": 3.2926, "eval_samples_per_second": 16.704, "eval_steps_per_second": 2.126, "eval_wer": 0.24253075571177504, "step": 3300 }, { "epoch": 9.189445196211096, "grad_norm": 2.1342787742614746, "learning_rate": 0.0002100272727272727, "loss": 0.2589, "step": 3400 }, { "epoch": 9.189445196211096, "eval_cer": 0.06923242313480972, "eval_loss": 0.19880472123622894, "eval_runtime": 3.2414, "eval_samples_per_second": 16.968, "eval_steps_per_second": 2.16, "eval_wer": 0.24956063268892795, "step": 3400 }, { "epoch": 9.460081190798377, "grad_norm": 1.656413197517395, "learning_rate": 0.00020729999999999997, "loss": 0.2493, "step": 3500 }, { "epoch": 9.460081190798377, "eval_cer": 0.0666523328316491, "eval_loss": 0.17990422248840332, "eval_runtime": 3.2699, "eval_samples_per_second": 16.82, "eval_steps_per_second": 2.141, "eval_wer": 0.23198594024604569, "step": 3500 }, { "epoch": 9.730717185385656, "grad_norm": 9.605318069458008, "learning_rate": 0.00020457272727272726, "loss": 0.2473, "step": 3600 }, { "epoch": 9.730717185385656, "eval_cer": 0.07009245323586326, "eval_loss": 0.24749121069908142, "eval_runtime": 3.2843, "eval_samples_per_second": 16.746, "eval_steps_per_second": 2.131, "eval_wer": 0.2671353251318102, "step": 3600 }, { "epoch": 10.0, "grad_norm": 38.97199249267578, "learning_rate": 0.00020184545454545456, "loss": 0.2842, "step": 3700 }, { "epoch": 10.0, "eval_cer": 0.07181251343797034, "eval_loss": 0.20998047292232513, "eval_runtime": 3.3675, "eval_samples_per_second": 16.333, "eval_steps_per_second": 2.079, "eval_wer": 0.2592267135325132, "step": 3700 }, { "epoch": 10.27063599458728, "grad_norm": 10.944853782653809, "learning_rate": 0.0001991181818181818, "loss": 0.2312, "step": 3800 }, { "epoch": 10.27063599458728, "eval_cer": 0.06106213717480112, "eval_loss": 0.20029255747795105, "eval_runtime": 3.2308, "eval_samples_per_second": 17.024, "eval_steps_per_second": 2.167, "eval_wer": 0.24077328646748683, "step": 3800 }, { "epoch": 10.541271989174561, "grad_norm": 6.15484619140625, "learning_rate": 0.0001963909090909091, "loss": 0.231, "step": 3900 }, { "epoch": 10.541271989174561, "eval_cer": 0.06880240808428295, "eval_loss": 0.23144930601119995, "eval_runtime": 3.3192, "eval_samples_per_second": 16.57, "eval_steps_per_second": 2.109, "eval_wer": 0.2680140597539543, "step": 3900 }, { "epoch": 10.81190798376184, "grad_norm": 9.186676025390625, "learning_rate": 0.00019366363636363635, "loss": 0.243, "step": 4000 }, { "epoch": 10.81190798376184, "eval_cer": 0.0707374758116534, "eval_loss": 0.20184335112571716, "eval_runtime": 3.2475, "eval_samples_per_second": 16.936, "eval_steps_per_second": 2.155, "eval_wer": 0.2504393673110721, "step": 4000 }, { "epoch": 11.081190798376184, "grad_norm": 3.3094773292541504, "learning_rate": 0.00019093636363636364, "loss": 0.2188, "step": 4100 }, { "epoch": 11.081190798376184, "eval_cer": 0.06299720490217157, "eval_loss": 0.18349941074848175, "eval_runtime": 3.2855, "eval_samples_per_second": 16.74, "eval_steps_per_second": 2.131, "eval_wer": 0.24077328646748683, "step": 4100 }, { "epoch": 11.351826792963465, "grad_norm": 4.869017124176025, "learning_rate": 0.00018820909090909088, "loss": 0.2129, "step": 4200 }, { "epoch": 11.351826792963465, "eval_cer": 0.0627821973769082, "eval_loss": 0.19908010959625244, "eval_runtime": 3.2946, "eval_samples_per_second": 16.694, "eval_steps_per_second": 2.125, "eval_wer": 0.2460456942003515, "step": 4200 }, { "epoch": 11.622462787550745, "grad_norm": 3.1255884170532227, "learning_rate": 0.00018548181818181818, "loss": 0.2351, "step": 4300 }, { "epoch": 11.622462787550745, "eval_cer": 0.0653622876800688, "eval_loss": 0.21975572407245636, "eval_runtime": 3.3112, "eval_samples_per_second": 16.61, "eval_steps_per_second": 2.114, "eval_wer": 0.27065026362038663, "step": 4300 }, { "epoch": 11.893098782138024, "grad_norm": 2.200582265853882, "learning_rate": 0.00018275454545454544, "loss": 0.218, "step": 4400 }, { "epoch": 11.893098782138024, "eval_cer": 0.06858740055901956, "eval_loss": 0.26052218675613403, "eval_runtime": 3.1812, "eval_samples_per_second": 17.289, "eval_steps_per_second": 2.2, "eval_wer": 0.27943760984182775, "step": 4400 }, { "epoch": 12.162381596752368, "grad_norm": 3.319242477416992, "learning_rate": 0.0001800272727272727, "loss": 0.2202, "step": 4500 }, { "epoch": 12.162381596752368, "eval_cer": 0.058052031821113736, "eval_loss": 0.21814635396003723, "eval_runtime": 3.272, "eval_samples_per_second": 16.809, "eval_steps_per_second": 2.139, "eval_wer": 0.23286467486818982, "step": 4500 }, { "epoch": 12.433017591339649, "grad_norm": 1.2420411109924316, "learning_rate": 0.00017729999999999997, "loss": 0.1962, "step": 4600 }, { "epoch": 12.433017591339649, "eval_cer": 0.05568694904321651, "eval_loss": 0.1839354932308197, "eval_runtime": 3.2938, "eval_samples_per_second": 16.698, "eval_steps_per_second": 2.125, "eval_wer": 0.23022847100175747, "step": 4600 }, { "epoch": 12.703653585926928, "grad_norm": 0.9907166361808777, "learning_rate": 0.00017457272727272727, "loss": 0.2129, "step": 4700 }, { "epoch": 12.703653585926928, "eval_cer": 0.05762201677058697, "eval_loss": 0.1895829439163208, "eval_runtime": 3.2727, "eval_samples_per_second": 16.806, "eval_steps_per_second": 2.139, "eval_wer": 0.22671353251318102, "step": 4700 }, { "epoch": 12.974289580514208, "grad_norm": 2.0261332988739014, "learning_rate": 0.00017184545454545453, "loss": 0.2074, "step": 4800 }, { "epoch": 12.974289580514208, "eval_cer": 0.06063212212427435, "eval_loss": 0.2010865956544876, "eval_runtime": 3.2907, "eval_samples_per_second": 16.714, "eval_steps_per_second": 2.127, "eval_wer": 0.2460456942003515, "step": 4800 }, { "epoch": 13.243572395128552, "grad_norm": 4.432494640350342, "learning_rate": 0.0001691181818181818, "loss": 0.1813, "step": 4900 }, { "epoch": 13.243572395128552, "eval_cer": 0.06106213717480112, "eval_loss": 0.181858628988266, "eval_runtime": 3.2727, "eval_samples_per_second": 16.806, "eval_steps_per_second": 2.139, "eval_wer": 0.20913884007029876, "step": 4900 }, { "epoch": 13.514208389715833, "grad_norm": 2.20430850982666, "learning_rate": 0.00016639090909090906, "loss": 0.1872, "step": 5000 }, { "epoch": 13.514208389715833, "eval_cer": 0.06557729520533219, "eval_loss": 0.21534210443496704, "eval_runtime": 3.2649, "eval_samples_per_second": 16.846, "eval_steps_per_second": 2.144, "eval_wer": 0.2601054481546573, "step": 5000 }, { "epoch": 13.784844384303112, "grad_norm": 3.0091655254364014, "learning_rate": 0.00016366363636363635, "loss": 0.1947, "step": 5100 }, { "epoch": 13.784844384303112, "eval_cer": 0.06880240808428295, "eval_loss": 0.2198285013437271, "eval_runtime": 3.2966, "eval_samples_per_second": 16.684, "eval_steps_per_second": 2.123, "eval_wer": 0.2671353251318102, "step": 5100 }, { "epoch": 14.054127198917456, "grad_norm": 1.1179289817810059, "learning_rate": 0.00016093636363636362, "loss": 0.2001, "step": 5200 }, { "epoch": 14.054127198917456, "eval_cer": 0.06772737045796604, "eval_loss": 0.21502529084682465, "eval_runtime": 3.2826, "eval_samples_per_second": 16.755, "eval_steps_per_second": 2.132, "eval_wer": 0.2618629173989455, "step": 5200 }, { "epoch": 14.324763193504737, "grad_norm": 0.8994645476341248, "learning_rate": 0.00015820909090909089, "loss": 0.1799, "step": 5300 }, { "epoch": 14.324763193504737, "eval_cer": 0.05160180606321221, "eval_loss": 0.1535176932811737, "eval_runtime": 3.2382, "eval_samples_per_second": 16.985, "eval_steps_per_second": 2.162, "eval_wer": 0.18541300527240773, "step": 5300 }, { "epoch": 14.595399188092017, "grad_norm": 3.489891529083252, "learning_rate": 0.00015548181818181815, "loss": 0.1813, "step": 5400 }, { "epoch": 14.595399188092017, "eval_cer": 0.06514728015480542, "eval_loss": 0.21499599516391754, "eval_runtime": 3.2165, "eval_samples_per_second": 17.099, "eval_steps_per_second": 2.176, "eval_wer": 0.2565905096660808, "step": 5400 }, { "epoch": 14.866035182679296, "grad_norm": 2.1778228282928467, "learning_rate": 0.00015275454545454544, "loss": 0.1812, "step": 5500 }, { "epoch": 14.866035182679296, "eval_cer": 0.05783702429585035, "eval_loss": 0.18473133444786072, "eval_runtime": 3.2425, "eval_samples_per_second": 16.962, "eval_steps_per_second": 2.159, "eval_wer": 0.23198594024604569, "step": 5500 }, { "epoch": 15.13531799729364, "grad_norm": 9.761114120483398, "learning_rate": 0.00015002727272727274, "loss": 0.1932, "step": 5600 }, { "epoch": 15.13531799729364, "eval_cer": 0.0518168135884756, "eval_loss": 0.16397376358509064, "eval_runtime": 3.2727, "eval_samples_per_second": 16.805, "eval_steps_per_second": 2.139, "eval_wer": 0.20913884007029876, "step": 5600 }, { "epoch": 15.40595399188092, "grad_norm": 8.160325050354004, "learning_rate": 0.00014729999999999998, "loss": 0.169, "step": 5700 }, { "epoch": 15.40595399188092, "eval_cer": 0.06751236293270264, "eval_loss": 0.2461504340171814, "eval_runtime": 3.3466, "eval_samples_per_second": 16.435, "eval_steps_per_second": 2.092, "eval_wer": 0.24868189806678384, "step": 5700 }, { "epoch": 15.6765899864682, "grad_norm": 7.290473937988281, "learning_rate": 0.00014457272727272727, "loss": 0.1704, "step": 5800 }, { "epoch": 15.6765899864682, "eval_cer": 0.06020210707374758, "eval_loss": 0.1913210153579712, "eval_runtime": 3.328, "eval_samples_per_second": 16.526, "eval_steps_per_second": 2.103, "eval_wer": 0.23374340949033393, "step": 5800 }, { "epoch": 15.94722598105548, "grad_norm": 9.403716087341309, "learning_rate": 0.00014184545454545453, "loss": 0.1692, "step": 5900 }, { "epoch": 15.94722598105548, "eval_cer": 0.06579230273059557, "eval_loss": 0.2216808795928955, "eval_runtime": 3.2705, "eval_samples_per_second": 16.817, "eval_steps_per_second": 2.14, "eval_wer": 0.2390158172231986, "step": 5900 }, { "epoch": 16.216508795669824, "grad_norm": 1.2416729927062988, "learning_rate": 0.0001391181818181818, "loss": 0.133, "step": 6000 }, { "epoch": 16.216508795669824, "eval_cer": 0.0599870995484842, "eval_loss": 0.19244541227817535, "eval_runtime": 3.3093, "eval_samples_per_second": 16.62, "eval_steps_per_second": 2.115, "eval_wer": 0.2390158172231986, "step": 6000 }, { "epoch": 16.487144790257105, "grad_norm": 2.919238567352295, "learning_rate": 0.00013639090909090906, "loss": 0.1529, "step": 6100 }, { "epoch": 16.487144790257105, "eval_cer": 0.054826918942162975, "eval_loss": 0.160396009683609, "eval_runtime": 3.2653, "eval_samples_per_second": 16.844, "eval_steps_per_second": 2.144, "eval_wer": 0.20738137082601055, "step": 6100 }, { "epoch": 16.757780784844385, "grad_norm": 1.1939417123794556, "learning_rate": 0.00013366363636363636, "loss": 0.1586, "step": 6200 }, { "epoch": 16.757780784844385, "eval_cer": 0.05547194151795313, "eval_loss": 0.18521469831466675, "eval_runtime": 3.3238, "eval_samples_per_second": 16.547, "eval_steps_per_second": 2.106, "eval_wer": 0.22231985940246046, "step": 6200 }, { "epoch": 17.027063599458728, "grad_norm": 0.6369737386703491, "learning_rate": 0.00013093636363636362, "loss": 0.1821, "step": 6300 }, { "epoch": 17.027063599458728, "eval_cer": 0.05009675338636852, "eval_loss": 0.162165105342865, "eval_runtime": 3.3026, "eval_samples_per_second": 16.654, "eval_steps_per_second": 2.12, "eval_wer": 0.20913884007029876, "step": 6300 }, { "epoch": 17.29769959404601, "grad_norm": 1.01472806930542, "learning_rate": 0.00012820909090909092, "loss": 0.1425, "step": 6400 }, { "epoch": 17.29769959404601, "eval_cer": 0.054826918942162975, "eval_loss": 0.1956459879875183, "eval_runtime": 3.316, "eval_samples_per_second": 16.586, "eval_steps_per_second": 2.111, "eval_wer": 0.23286467486818982, "step": 6400 }, { "epoch": 17.56833558863329, "grad_norm": 1.0718566179275513, "learning_rate": 0.00012548181818181818, "loss": 0.1538, "step": 6500 }, { "epoch": 17.56833558863329, "eval_cer": 0.058052031821113736, "eval_loss": 0.19568397104740143, "eval_runtime": 3.2443, "eval_samples_per_second": 16.953, "eval_steps_per_second": 2.158, "eval_wer": 0.24165202108963094, "step": 6500 }, { "epoch": 17.83897158322057, "grad_norm": 0.5153430104255676, "learning_rate": 0.00012275454545454545, "loss": 0.1395, "step": 6600 }, { "epoch": 17.83897158322057, "eval_cer": 0.05826703934637712, "eval_loss": 0.19457882642745972, "eval_runtime": 3.2924, "eval_samples_per_second": 16.705, "eval_steps_per_second": 2.126, "eval_wer": 0.2398945518453427, "step": 6600 }, { "epoch": 18.108254397834912, "grad_norm": 1.9209911823272705, "learning_rate": 0.00012002727272727273, "loss": 0.1331, "step": 6700 }, { "epoch": 18.108254397834912, "eval_cer": 0.05869705439690389, "eval_loss": 0.226872980594635, "eval_runtime": 3.3195, "eval_samples_per_second": 16.569, "eval_steps_per_second": 2.109, "eval_wer": 0.2539543057996485, "step": 6700 }, { "epoch": 18.378890392422193, "grad_norm": 4.072175979614258, "learning_rate": 0.00011729999999999999, "loss": 0.1242, "step": 6800 }, { "epoch": 18.378890392422193, "eval_cer": 0.05826703934637712, "eval_loss": 0.2108573615550995, "eval_runtime": 3.2958, "eval_samples_per_second": 16.688, "eval_steps_per_second": 2.124, "eval_wer": 0.23637961335676624, "step": 6800 }, { "epoch": 18.649526387009473, "grad_norm": 2.9033162593841553, "learning_rate": 0.00011457272727272727, "loss": 0.1323, "step": 6900 }, { "epoch": 18.649526387009473, "eval_cer": 0.054396903891636206, "eval_loss": 0.20044924318790436, "eval_runtime": 3.2775, "eval_samples_per_second": 16.781, "eval_steps_per_second": 2.136, "eval_wer": 0.22759226713532513, "step": 6900 }, { "epoch": 18.920162381596754, "grad_norm": 11.889602661132812, "learning_rate": 0.00011184545454545454, "loss": 0.1384, "step": 7000 }, { "epoch": 18.920162381596754, "eval_cer": 0.05697699419479682, "eval_loss": 0.20252789556980133, "eval_runtime": 3.2287, "eval_samples_per_second": 17.035, "eval_steps_per_second": 2.168, "eval_wer": 0.2451669595782074, "step": 7000 }, { "epoch": 19.189445196211096, "grad_norm": 1.1512547731399536, "learning_rate": 0.00010911818181818182, "loss": 0.1294, "step": 7100 }, { "epoch": 19.189445196211096, "eval_cer": 0.05418189636637282, "eval_loss": 0.21085655689239502, "eval_runtime": 3.2121, "eval_samples_per_second": 17.123, "eval_steps_per_second": 2.179, "eval_wer": 0.2390158172231986, "step": 7100 }, { "epoch": 19.460081190798377, "grad_norm": 0.6903840899467468, "learning_rate": 0.00010639090909090908, "loss": 0.1279, "step": 7200 }, { "epoch": 19.460081190798377, "eval_cer": 0.04794667813373468, "eval_loss": 0.16093921661376953, "eval_runtime": 3.2295, "eval_samples_per_second": 17.031, "eval_steps_per_second": 2.168, "eval_wer": 0.19156414762741653, "step": 7200 }, { "epoch": 19.730717185385657, "grad_norm": 1.4991930723190308, "learning_rate": 0.00010366363636363636, "loss": 0.122, "step": 7300 }, { "epoch": 19.730717185385657, "eval_cer": 0.06063212212427435, "eval_loss": 0.24742065370082855, "eval_runtime": 3.295, "eval_samples_per_second": 16.692, "eval_steps_per_second": 2.124, "eval_wer": 0.2539543057996485, "step": 7300 }, { "epoch": 20.0, "grad_norm": 4.862514019012451, "learning_rate": 0.00010093636363636363, "loss": 0.1222, "step": 7400 }, { "epoch": 20.0, "eval_cer": 0.050311760911631906, "eval_loss": 0.17492927610874176, "eval_runtime": 3.2165, "eval_samples_per_second": 17.099, "eval_steps_per_second": 2.176, "eval_wer": 0.1968365553602812, "step": 7400 }, { "epoch": 20.27063599458728, "grad_norm": 10.719727516174316, "learning_rate": 9.82090909090909e-05, "loss": 0.1121, "step": 7500 }, { "epoch": 20.27063599458728, "eval_cer": 0.05869705439690389, "eval_loss": 0.24458029866218567, "eval_runtime": 3.2693, "eval_samples_per_second": 16.823, "eval_steps_per_second": 2.141, "eval_wer": 0.2513181019332162, "step": 7500 }, { "epoch": 20.54127198917456, "grad_norm": 6.036056995391846, "learning_rate": 9.548181818181817e-05, "loss": 0.1142, "step": 7600 }, { "epoch": 20.54127198917456, "eval_cer": 0.05332186626531928, "eval_loss": 0.19322967529296875, "eval_runtime": 3.297, "eval_samples_per_second": 16.682, "eval_steps_per_second": 2.123, "eval_wer": 0.22319859402460457, "step": 7600 }, { "epoch": 20.81190798376184, "grad_norm": 5.935936450958252, "learning_rate": 9.275454545454544e-05, "loss": 0.1226, "step": 7700 }, { "epoch": 20.81190798376184, "eval_cer": 0.05353687379058267, "eval_loss": 0.2194850891828537, "eval_runtime": 3.272, "eval_samples_per_second": 16.809, "eval_steps_per_second": 2.139, "eval_wer": 0.23198594024604569, "step": 7700 }, { "epoch": 21.081190798376184, "grad_norm": 1.9924638271331787, "learning_rate": 9.002727272727272e-05, "loss": 0.1052, "step": 7800 }, { "epoch": 21.081190798376184, "eval_cer": 0.05095678348742206, "eval_loss": 0.20277012884616852, "eval_runtime": 3.2836, "eval_samples_per_second": 16.75, "eval_steps_per_second": 2.132, "eval_wer": 0.22231985940246046, "step": 7800 }, { "epoch": 21.351826792963465, "grad_norm": 5.319161891937256, "learning_rate": 8.729999999999998e-05, "loss": 0.1077, "step": 7900 }, { "epoch": 21.351826792963465, "eval_cer": 0.05353687379058267, "eval_loss": 0.18649469316005707, "eval_runtime": 3.264, "eval_samples_per_second": 16.85, "eval_steps_per_second": 2.145, "eval_wer": 0.210896309314587, "step": 7900 }, { "epoch": 21.622462787550745, "grad_norm": 2.034726619720459, "learning_rate": 8.457272727272726e-05, "loss": 0.114, "step": 8000 }, { "epoch": 21.622462787550745, "eval_cer": 0.04687164050741776, "eval_loss": 0.17550522089004517, "eval_runtime": 3.2743, "eval_samples_per_second": 16.798, "eval_steps_per_second": 2.138, "eval_wer": 0.21704745166959577, "step": 8000 }, { "epoch": 21.893098782138026, "grad_norm": 1.0931280851364136, "learning_rate": 8.184545454545453e-05, "loss": 0.1041, "step": 8100 }, { "epoch": 21.893098782138026, "eval_cer": 0.04579660288110084, "eval_loss": 0.1914069801568985, "eval_runtime": 3.2452, "eval_samples_per_second": 16.948, "eval_steps_per_second": 2.157, "eval_wer": 0.2117750439367311, "step": 8100 }, { "epoch": 22.16238159675237, "grad_norm": 3.6217257976531982, "learning_rate": 7.911818181818182e-05, "loss": 0.1149, "step": 8200 }, { "epoch": 22.16238159675237, "eval_cer": 0.0477316706084713, "eval_loss": 0.18972058594226837, "eval_runtime": 3.2691, "eval_samples_per_second": 16.824, "eval_steps_per_second": 2.141, "eval_wer": 0.2179261862917399, "step": 8200 }, { "epoch": 22.43301759133965, "grad_norm": 1.2710479497909546, "learning_rate": 7.63909090909091e-05, "loss": 0.0984, "step": 8300 }, { "epoch": 22.43301759133965, "eval_cer": 0.05203182111373898, "eval_loss": 0.21701879799365997, "eval_runtime": 3.3005, "eval_samples_per_second": 16.664, "eval_steps_per_second": 2.121, "eval_wer": 0.22934973637961337, "step": 8300 }, { "epoch": 22.70365358592693, "grad_norm": 0.5214836597442627, "learning_rate": 7.366363636363635e-05, "loss": 0.0974, "step": 8400 }, { "epoch": 22.70365358592693, "eval_cer": 0.046226617931627606, "eval_loss": 0.1713176965713501, "eval_runtime": 3.3717, "eval_samples_per_second": 16.312, "eval_steps_per_second": 2.076, "eval_wer": 0.1968365553602812, "step": 8400 }, { "epoch": 22.97428958051421, "grad_norm": 2.3021202087402344, "learning_rate": 7.093636363636363e-05, "loss": 0.1052, "step": 8500 }, { "epoch": 22.97428958051421, "eval_cer": 0.04837669318426145, "eval_loss": 0.17614266276359558, "eval_runtime": 3.5916, "eval_samples_per_second": 15.313, "eval_steps_per_second": 1.949, "eval_wer": 0.21441124780316345, "step": 8500 }, { "epoch": 23.243572395128552, "grad_norm": 3.924398183822632, "learning_rate": 6.82090909090909e-05, "loss": 0.0892, "step": 8600 }, { "epoch": 23.243572395128552, "eval_cer": 0.04429155020425715, "eval_loss": 0.14997613430023193, "eval_runtime": 3.344, "eval_samples_per_second": 16.448, "eval_steps_per_second": 2.093, "eval_wer": 0.18629173989455183, "step": 8600 }, { "epoch": 23.514208389715833, "grad_norm": 1.1947544813156128, "learning_rate": 6.548181818181817e-05, "loss": 0.0919, "step": 8700 }, { "epoch": 23.514208389715833, "eval_cer": 0.04601161040636422, "eval_loss": 0.15268389880657196, "eval_runtime": 3.3038, "eval_samples_per_second": 16.648, "eval_steps_per_second": 2.119, "eval_wer": 0.19859402460456943, "step": 8700 }, { "epoch": 23.784844384303113, "grad_norm": 1.3969401121139526, "learning_rate": 6.275454545454545e-05, "loss": 0.0956, "step": 8800 }, { "epoch": 23.784844384303113, "eval_cer": 0.0449365727800473, "eval_loss": 0.15815618634223938, "eval_runtime": 3.2819, "eval_samples_per_second": 16.759, "eval_steps_per_second": 2.133, "eval_wer": 0.1968365553602812, "step": 8800 }, { "epoch": 24.054127198917456, "grad_norm": 2.335787773132324, "learning_rate": 6.0027272727272725e-05, "loss": 0.1, "step": 8900 }, { "epoch": 24.054127198917456, "eval_cer": 0.04536658783057407, "eval_loss": 0.17150932550430298, "eval_runtime": 3.246, "eval_samples_per_second": 16.944, "eval_steps_per_second": 2.157, "eval_wer": 0.20123022847100175, "step": 8900 }, { "epoch": 24.324763193504737, "grad_norm": 1.6086431741714478, "learning_rate": 5.73e-05, "loss": 0.0846, "step": 9000 }, { "epoch": 24.324763193504737, "eval_cer": 0.04579660288110084, "eval_loss": 0.18286831676959991, "eval_runtime": 3.2566, "eval_samples_per_second": 16.889, "eval_steps_per_second": 2.149, "eval_wer": 0.20298769771529, "step": 9000 }, { "epoch": 24.595399188092017, "grad_norm": 1.6925404071807861, "learning_rate": 5.457272727272727e-05, "loss": 0.083, "step": 9100 }, { "epoch": 24.595399188092017, "eval_cer": 0.04536658783057407, "eval_loss": 0.17126092314720154, "eval_runtime": 3.2285, "eval_samples_per_second": 17.036, "eval_steps_per_second": 2.168, "eval_wer": 0.20650263620386644, "step": 9100 }, { "epoch": 24.866035182679298, "grad_norm": 1.5034745931625366, "learning_rate": 5.184545454545454e-05, "loss": 0.0821, "step": 9200 }, { "epoch": 24.866035182679298, "eval_cer": 0.04988174586110514, "eval_loss": 0.19512753188610077, "eval_runtime": 3.2263, "eval_samples_per_second": 17.048, "eval_steps_per_second": 2.17, "eval_wer": 0.22231985940246046, "step": 9200 }, { "epoch": 25.13531799729364, "grad_norm": 6.091914653778076, "learning_rate": 4.9118181818181814e-05, "loss": 0.0917, "step": 9300 }, { "epoch": 25.13531799729364, "eval_cer": 0.04730165555794453, "eval_loss": 0.18300001323223114, "eval_runtime": 3.2216, "eval_samples_per_second": 17.072, "eval_steps_per_second": 2.173, "eval_wer": 0.2100175746924429, "step": 9300 }, { "epoch": 25.40595399188092, "grad_norm": 5.720769882202148, "learning_rate": 4.6390909090909086e-05, "loss": 0.0806, "step": 9400 }, { "epoch": 25.40595399188092, "eval_cer": 0.0477316706084713, "eval_loss": 0.17701777815818787, "eval_runtime": 3.3053, "eval_samples_per_second": 16.64, "eval_steps_per_second": 2.118, "eval_wer": 0.21353251318101935, "step": 9400 }, { "epoch": 25.6765899864682, "grad_norm": 7.1992998123168945, "learning_rate": 4.366363636363636e-05, "loss": 0.0756, "step": 9500 }, { "epoch": 25.6765899864682, "eval_cer": 0.04816168565899807, "eval_loss": 0.1869199126958847, "eval_runtime": 3.2573, "eval_samples_per_second": 16.885, "eval_steps_per_second": 2.149, "eval_wer": 0.21441124780316345, "step": 9500 }, { "epoch": 25.94722598105548, "grad_norm": 9.373414039611816, "learning_rate": 4.093636363636364e-05, "loss": 0.0807, "step": 9600 }, { "epoch": 25.94722598105548, "eval_cer": 0.0490217157600516, "eval_loss": 0.17646533250808716, "eval_runtime": 3.2627, "eval_samples_per_second": 16.857, "eval_steps_per_second": 2.145, "eval_wer": 0.20210896309314588, "step": 9600 }, { "epoch": 26.216508795669824, "grad_norm": 1.0124870538711548, "learning_rate": 3.820909090909091e-05, "loss": 0.059, "step": 9700 }, { "epoch": 26.216508795669824, "eval_cer": 0.051171791012685444, "eval_loss": 0.20564059913158417, "eval_runtime": 3.2967, "eval_samples_per_second": 16.683, "eval_steps_per_second": 2.123, "eval_wer": 0.22407732864674867, "step": 9700 }, { "epoch": 26.487144790257105, "grad_norm": 1.1935291290283203, "learning_rate": 3.548181818181818e-05, "loss": 0.0831, "step": 9800 }, { "epoch": 26.487144790257105, "eval_cer": 0.05009675338636852, "eval_loss": 0.1965063065290451, "eval_runtime": 3.3021, "eval_samples_per_second": 16.656, "eval_steps_per_second": 2.12, "eval_wer": 0.22056239015817222, "step": 9800 }, { "epoch": 26.757780784844385, "grad_norm": 1.9065356254577637, "learning_rate": 3.2754545454545455e-05, "loss": 0.0647, "step": 9900 }, { "epoch": 26.757780784844385, "eval_cer": 0.04751666308320791, "eval_loss": 0.1832188367843628, "eval_runtime": 3.2986, "eval_samples_per_second": 16.674, "eval_steps_per_second": 2.122, "eval_wer": 0.20826010544815465, "step": 9900 }, { "epoch": 27.027063599458728, "grad_norm": 0.8792353868484497, "learning_rate": 3.0027272727272724e-05, "loss": 0.0788, "step": 10000 }, { "epoch": 27.027063599458728, "eval_cer": 0.046656632982154375, "eval_loss": 0.17078326642513275, "eval_runtime": 3.3039, "eval_samples_per_second": 16.647, "eval_steps_per_second": 2.119, "eval_wer": 0.19859402460456943, "step": 10000 }, { "epoch": 27.29769959404601, "grad_norm": 0.8240686655044556, "learning_rate": 2.7299999999999996e-05, "loss": 0.0677, "step": 10100 }, { "epoch": 27.29769959404601, "eval_cer": 0.0449365727800473, "eval_loss": 0.16915105283260345, "eval_runtime": 3.2712, "eval_samples_per_second": 16.813, "eval_steps_per_second": 2.14, "eval_wer": 0.1827768014059754, "step": 10100 }, { "epoch": 27.56833558863329, "grad_norm": 0.5428586006164551, "learning_rate": 2.457272727272727e-05, "loss": 0.0651, "step": 10200 }, { "epoch": 27.56833558863329, "eval_cer": 0.04644162545689099, "eval_loss": 0.17613492906093597, "eval_runtime": 3.2478, "eval_samples_per_second": 16.935, "eval_steps_per_second": 2.155, "eval_wer": 0.20210896309314588, "step": 10200 }, { "epoch": 27.83897158322057, "grad_norm": 0.8122203946113586, "learning_rate": 2.1845454545454544e-05, "loss": 0.0602, "step": 10300 }, { "epoch": 27.83897158322057, "eval_cer": 0.046656632982154375, "eval_loss": 0.16123421490192413, "eval_runtime": 3.2919, "eval_samples_per_second": 16.708, "eval_steps_per_second": 2.126, "eval_wer": 0.18804920913884007, "step": 10300 }, { "epoch": 28.108254397834912, "grad_norm": 1.2256019115447998, "learning_rate": 1.9118181818181817e-05, "loss": 0.0586, "step": 10400 }, { "epoch": 28.108254397834912, "eval_cer": 0.04579660288110084, "eval_loss": 0.17519748210906982, "eval_runtime": 3.2923, "eval_samples_per_second": 16.706, "eval_steps_per_second": 2.126, "eval_wer": 0.19947275922671354, "step": 10400 }, { "epoch": 28.378890392422193, "grad_norm": 1.0472385883331299, "learning_rate": 1.639090909090909e-05, "loss": 0.0602, "step": 10500 }, { "epoch": 28.378890392422193, "eval_cer": 0.04536658783057407, "eval_loss": 0.17338904738426208, "eval_runtime": 3.2664, "eval_samples_per_second": 16.838, "eval_steps_per_second": 2.143, "eval_wer": 0.19507908611599298, "step": 10500 }, { "epoch": 28.649526387009473, "grad_norm": 1.4401935338974, "learning_rate": 1.3663636363636363e-05, "loss": 0.0636, "step": 10600 }, { "epoch": 28.649526387009473, "eval_cer": 0.04579660288110084, "eval_loss": 0.1733109951019287, "eval_runtime": 3.3348, "eval_samples_per_second": 16.493, "eval_steps_per_second": 2.099, "eval_wer": 0.20210896309314588, "step": 10600 }, { "epoch": 28.920162381596754, "grad_norm": 0.8489630818367004, "learning_rate": 1.0936363636363635e-05, "loss": 0.0642, "step": 10700 }, { "epoch": 28.920162381596754, "eval_cer": 0.047086648032681144, "eval_loss": 0.17440861463546753, "eval_runtime": 3.3022, "eval_samples_per_second": 16.656, "eval_steps_per_second": 2.12, "eval_wer": 0.2056239015817223, "step": 10700 }, { "epoch": 29.189445196211096, "grad_norm": 1.0616735219955444, "learning_rate": 8.20909090909091e-06, "loss": 0.0611, "step": 10800 }, { "epoch": 29.189445196211096, "eval_cer": 0.04601161040636422, "eval_loss": 0.17405055463314056, "eval_runtime": 3.2326, "eval_samples_per_second": 17.014, "eval_steps_per_second": 2.165, "eval_wer": 0.20298769771529, "step": 10800 }, { "epoch": 29.460081190798377, "grad_norm": 1.259354591369629, "learning_rate": 5.481818181818182e-06, "loss": 0.0596, "step": 10900 }, { "epoch": 29.460081190798377, "eval_cer": 0.0449365727800473, "eval_loss": 0.17219401895999908, "eval_runtime": 3.1719, "eval_samples_per_second": 17.34, "eval_steps_per_second": 2.207, "eval_wer": 0.19507908611599298, "step": 10900 }, { "epoch": 29.730717185385657, "grad_norm": 0.4698294401168823, "learning_rate": 2.754545454545454e-06, "loss": 0.0627, "step": 11000 }, { "epoch": 29.730717185385657, "eval_cer": 0.0449365727800473, "eval_loss": 0.17380425333976746, "eval_runtime": 3.2854, "eval_samples_per_second": 16.741, "eval_steps_per_second": 2.131, "eval_wer": 0.19859402460456943, "step": 11000 }, { "epoch": 30.0, "grad_norm": 9.292856216430664, "learning_rate": 2.727272727272727e-08, "loss": 0.0623, "step": 11100 }, { "epoch": 30.0, "eval_cer": 0.0449365727800473, "eval_loss": 0.17295825481414795, "eval_runtime": 3.2552, "eval_samples_per_second": 16.896, "eval_steps_per_second": 2.15, "eval_wer": 0.19859402460456943, "step": 11100 }, { "epoch": 30.0, "step": 11100, "total_flos": 1.8673488922198168e+19, "train_loss": 0.3048185482111063, "train_runtime": 12419.9418, "train_samples_per_second": 28.548, "train_steps_per_second": 0.894 }, { "epoch": 30.0, "eval_cer": 0.04515158030531068, "eval_loss": 0.17296089231967926, "eval_runtime": 3.3418, "eval_samples_per_second": 16.458, "eval_steps_per_second": 2.095, "eval_wer": 0.19859402460456943, "step": 11100 } ], "logging_steps": 100, "max_steps": 11100, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.8673488922198168e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }