| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 30.0, | |
| "eval_steps": 100, | |
| "global_step": 11100, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.2706359945872801, | |
| "grad_norm": 1.0425561666488647, | |
| "learning_rate": 0.00029699999999999996, | |
| "loss": 5.107, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2706359945872801, | |
| "eval_cer": 0.9907546764136744, | |
| "eval_loss": 2.8580877780914307, | |
| "eval_runtime": 3.2586, | |
| "eval_samples_per_second": 16.879, | |
| "eval_steps_per_second": 2.148, | |
| "eval_wer": 1.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5412719891745602, | |
| "grad_norm": 1.5294588804244995, | |
| "learning_rate": 0.00029729999999999996, | |
| "loss": 2.696, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5412719891745602, | |
| "eval_cer": 0.700064502257579, | |
| "eval_loss": 2.3650922775268555, | |
| "eval_runtime": 3.2771, | |
| "eval_samples_per_second": 16.783, | |
| "eval_steps_per_second": 2.136, | |
| "eval_wer": 0.9815465729349736, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.8119079837618404, | |
| "grad_norm": 4.284913539886475, | |
| "learning_rate": 0.0002945727272727273, | |
| "loss": 1.9594, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.8119079837618404, | |
| "eval_cer": 0.39991399698989466, | |
| "eval_loss": 1.3402470350265503, | |
| "eval_runtime": 3.3075, | |
| "eval_samples_per_second": 16.629, | |
| "eval_steps_per_second": 2.116, | |
| "eval_wer": 0.8391915641476274, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.0811907983761841, | |
| "grad_norm": 1.6875638961791992, | |
| "learning_rate": 0.0002918454545454545, | |
| "loss": 1.1816, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.0811907983761841, | |
| "eval_cer": 0.19221672758546549, | |
| "eval_loss": 0.5386932492256165, | |
| "eval_runtime": 3.2713, | |
| "eval_samples_per_second": 16.813, | |
| "eval_steps_per_second": 2.14, | |
| "eval_wer": 0.5404217926186292, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.3518267929634642, | |
| "grad_norm": 2.1928625106811523, | |
| "learning_rate": 0.0002891181818181818, | |
| "loss": 0.8925, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.3518267929634642, | |
| "eval_cer": 0.1498602451085788, | |
| "eval_loss": 0.3904643654823303, | |
| "eval_runtime": 3.2769, | |
| "eval_samples_per_second": 16.784, | |
| "eval_steps_per_second": 2.136, | |
| "eval_wer": 0.45782073813708263, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.6224627875507442, | |
| "grad_norm": 1.7728203535079956, | |
| "learning_rate": 0.0002863909090909091, | |
| "loss": 0.7373, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.6224627875507442, | |
| "eval_cer": 0.11416899591485702, | |
| "eval_loss": 0.3346728980541229, | |
| "eval_runtime": 3.2076, | |
| "eval_samples_per_second": 17.147, | |
| "eval_steps_per_second": 2.182, | |
| "eval_wer": 0.37434094903339193, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.8930987821380243, | |
| "grad_norm": 1.9392462968826294, | |
| "learning_rate": 0.00028366363636363634, | |
| "loss": 0.66, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.8930987821380243, | |
| "eval_cer": 0.10728875510642873, | |
| "eval_loss": 0.29814502596855164, | |
| "eval_runtime": 3.2663, | |
| "eval_samples_per_second": 16.839, | |
| "eval_steps_per_second": 2.143, | |
| "eval_wer": 0.3242530755711775, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.1623815967523683, | |
| "grad_norm": 1.3830772638320923, | |
| "learning_rate": 0.0002809363636363636, | |
| "loss": 0.6412, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.1623815967523683, | |
| "eval_cer": 0.10040851429800043, | |
| "eval_loss": 0.2573491632938385, | |
| "eval_runtime": 3.2309, | |
| "eval_samples_per_second": 17.023, | |
| "eval_steps_per_second": 2.167, | |
| "eval_wer": 0.3312829525483304, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.4330175913396483, | |
| "grad_norm": 1.7728252410888672, | |
| "learning_rate": 0.0002782090909090909, | |
| "loss": 0.5683, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.4330175913396483, | |
| "eval_cer": 0.10578370242958504, | |
| "eval_loss": 0.2961094379425049, | |
| "eval_runtime": 3.236, | |
| "eval_samples_per_second": 16.996, | |
| "eval_steps_per_second": 2.163, | |
| "eval_wer": 0.35676625659050965, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.7036535859269284, | |
| "grad_norm": 2.22818660736084, | |
| "learning_rate": 0.00027548181818181814, | |
| "loss": 0.5538, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.7036535859269284, | |
| "eval_cer": 0.1077187701569555, | |
| "eval_loss": 0.26289868354797363, | |
| "eval_runtime": 3.2491, | |
| "eval_samples_per_second": 16.928, | |
| "eval_steps_per_second": 2.154, | |
| "eval_wer": 0.3453427065026362, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.9742895805142084, | |
| "grad_norm": 2.420909881591797, | |
| "learning_rate": 0.00027275454545454546, | |
| "loss": 0.4991, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.9742895805142084, | |
| "eval_cer": 0.10427864975274134, | |
| "eval_loss": 0.2832812964916229, | |
| "eval_runtime": 3.3057, | |
| "eval_samples_per_second": 16.638, | |
| "eval_steps_per_second": 2.118, | |
| "eval_wer": 0.3312829525483304, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 3.243572395128552, | |
| "grad_norm": 5.234218120574951, | |
| "learning_rate": 0.00027002727272727267, | |
| "loss": 0.4536, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 3.243572395128552, | |
| "eval_cer": 0.08535798752956353, | |
| "eval_loss": 0.24891051650047302, | |
| "eval_runtime": 3.2773, | |
| "eval_samples_per_second": 16.782, | |
| "eval_steps_per_second": 2.136, | |
| "eval_wer": 0.3347978910369069, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 3.514208389715832, | |
| "grad_norm": 2.5880632400512695, | |
| "learning_rate": 0.0002673, | |
| "loss": 0.4621, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 3.514208389715832, | |
| "eval_cer": 0.09460331111588906, | |
| "eval_loss": 0.23712627589702606, | |
| "eval_runtime": 3.2761, | |
| "eval_samples_per_second": 16.788, | |
| "eval_steps_per_second": 2.137, | |
| "eval_wer": 0.3260105448154657, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 3.7848443843031125, | |
| "grad_norm": 3.540529251098633, | |
| "learning_rate": 0.00026457272727272726, | |
| "loss": 0.4401, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 3.7848443843031125, | |
| "eval_cer": 0.08600301010535369, | |
| "eval_loss": 0.21003246307373047, | |
| "eval_runtime": 3.3426, | |
| "eval_samples_per_second": 16.454, | |
| "eval_steps_per_second": 2.094, | |
| "eval_wer": 0.28295254833040423, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 4.054127198917456, | |
| "grad_norm": 1.8023126125335693, | |
| "learning_rate": 0.0002618454545454545, | |
| "loss": 0.4278, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 4.054127198917456, | |
| "eval_cer": 0.10105353687379058, | |
| "eval_loss": 0.2677291929721832, | |
| "eval_runtime": 3.2361, | |
| "eval_samples_per_second": 16.996, | |
| "eval_steps_per_second": 2.163, | |
| "eval_wer": 0.3321616871704745, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 4.324763193504737, | |
| "grad_norm": 3.478238821029663, | |
| "learning_rate": 0.0002591181818181818, | |
| "loss": 0.386, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 4.324763193504737, | |
| "eval_cer": 0.09030316061062137, | |
| "eval_loss": 0.24809867143630981, | |
| "eval_runtime": 3.2871, | |
| "eval_samples_per_second": 16.732, | |
| "eval_steps_per_second": 2.13, | |
| "eval_wer": 0.29789103690685415, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 4.595399188092016, | |
| "grad_norm": 2.037954568862915, | |
| "learning_rate": 0.00025639090909090905, | |
| "loss": 0.3943, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 4.595399188092016, | |
| "eval_cer": 0.09503332616641583, | |
| "eval_loss": 0.2835349440574646, | |
| "eval_runtime": 3.2426, | |
| "eval_samples_per_second": 16.962, | |
| "eval_steps_per_second": 2.159, | |
| "eval_wer": 0.31282952548330406, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 4.866035182679297, | |
| "grad_norm": 2.778028964996338, | |
| "learning_rate": 0.0002536636363636364, | |
| "loss": 0.3986, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 4.866035182679297, | |
| "eval_cer": 0.0707374758116534, | |
| "eval_loss": 0.17705407738685608, | |
| "eval_runtime": 3.3043, | |
| "eval_samples_per_second": 16.645, | |
| "eval_steps_per_second": 2.118, | |
| "eval_wer": 0.24253075571177504, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 5.13531799729364, | |
| "grad_norm": 10.435393333435059, | |
| "learning_rate": 0.00025093636363636364, | |
| "loss": 0.3996, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 5.13531799729364, | |
| "eval_cer": 0.0885831004085143, | |
| "eval_loss": 0.24632568657398224, | |
| "eval_runtime": 3.2851, | |
| "eval_samples_per_second": 16.742, | |
| "eval_steps_per_second": 2.131, | |
| "eval_wer": 0.3233743409490334, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 5.40595399188092, | |
| "grad_norm": 8.714759826660156, | |
| "learning_rate": 0.0002482090909090909, | |
| "loss": 0.3468, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 5.40595399188092, | |
| "eval_cer": 0.0870780477316706, | |
| "eval_loss": 0.2470923364162445, | |
| "eval_runtime": 3.3067, | |
| "eval_samples_per_second": 16.633, | |
| "eval_steps_per_second": 2.117, | |
| "eval_wer": 0.2521968365553603, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 5.6765899864682, | |
| "grad_norm": 7.187710285186768, | |
| "learning_rate": 0.00024548181818181817, | |
| "loss": 0.3429, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 5.6765899864682, | |
| "eval_cer": 0.09008815308535799, | |
| "eval_loss": 0.24462804198265076, | |
| "eval_runtime": 3.2272, | |
| "eval_samples_per_second": 17.043, | |
| "eval_steps_per_second": 2.169, | |
| "eval_wer": 0.30492091388400705, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 5.94722598105548, | |
| "grad_norm": 7.232232570648193, | |
| "learning_rate": 0.00024275454545454544, | |
| "loss": 0.3362, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 5.94722598105548, | |
| "eval_cer": 0.10234358202537089, | |
| "eval_loss": 0.2832447290420532, | |
| "eval_runtime": 3.2707, | |
| "eval_samples_per_second": 16.816, | |
| "eval_steps_per_second": 2.14, | |
| "eval_wer": 0.3216168717047452, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 6.216508795669824, | |
| "grad_norm": 2.5066192150115967, | |
| "learning_rate": 0.0002400272727272727, | |
| "loss": 0.3257, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 6.216508795669824, | |
| "eval_cer": 0.08922812298430445, | |
| "eval_loss": 0.2616831660270691, | |
| "eval_runtime": 3.289, | |
| "eval_samples_per_second": 16.722, | |
| "eval_steps_per_second": 2.128, | |
| "eval_wer": 0.2750439367311072, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 6.487144790257104, | |
| "grad_norm": 1.5899525880813599, | |
| "learning_rate": 0.0002373, | |
| "loss": 0.3088, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 6.487144790257104, | |
| "eval_cer": 0.08213287465061277, | |
| "eval_loss": 0.2477259337902069, | |
| "eval_runtime": 3.2212, | |
| "eval_samples_per_second": 17.074, | |
| "eval_steps_per_second": 2.173, | |
| "eval_wer": 0.2671353251318102, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 6.7577807848443845, | |
| "grad_norm": 1.64902925491333, | |
| "learning_rate": 0.00023457272727272723, | |
| "loss": 0.3092, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 6.7577807848443845, | |
| "eval_cer": 0.07288755106428725, | |
| "eval_loss": 0.16897092759609222, | |
| "eval_runtime": 3.2797, | |
| "eval_samples_per_second": 16.77, | |
| "eval_steps_per_second": 2.134, | |
| "eval_wer": 0.2126537785588752, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 7.027063599458728, | |
| "grad_norm": 3.107236862182617, | |
| "learning_rate": 0.00023184545454545453, | |
| "loss": 0.3247, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 7.027063599458728, | |
| "eval_cer": 0.0668673403569125, | |
| "eval_loss": 0.16091446578502655, | |
| "eval_runtime": 3.2437, | |
| "eval_samples_per_second": 16.956, | |
| "eval_steps_per_second": 2.158, | |
| "eval_wer": 0.24428822495606328, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 7.2976995940460085, | |
| "grad_norm": 1.4398540258407593, | |
| "learning_rate": 0.0002291181818181818, | |
| "loss": 0.2877, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 7.2976995940460085, | |
| "eval_cer": 0.081702859600086, | |
| "eval_loss": 0.21893317997455597, | |
| "eval_runtime": 3.3058, | |
| "eval_samples_per_second": 16.637, | |
| "eval_steps_per_second": 2.117, | |
| "eval_wer": 0.2697715289982425, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 7.568335588633288, | |
| "grad_norm": 1.3356603384017944, | |
| "learning_rate": 0.00022639090909090908, | |
| "loss": 0.2978, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 7.568335588633288, | |
| "eval_cer": 0.08406794237798323, | |
| "eval_loss": 0.21929292380809784, | |
| "eval_runtime": 3.3113, | |
| "eval_samples_per_second": 16.61, | |
| "eval_steps_per_second": 2.114, | |
| "eval_wer": 0.2697715289982425, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 7.838971583220569, | |
| "grad_norm": 1.0422672033309937, | |
| "learning_rate": 0.00022366363636363632, | |
| "loss": 0.3011, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 7.838971583220569, | |
| "eval_cer": 0.09051816813588476, | |
| "eval_loss": 0.22263768315315247, | |
| "eval_runtime": 3.2412, | |
| "eval_samples_per_second": 16.969, | |
| "eval_steps_per_second": 2.16, | |
| "eval_wer": 0.27768014059753954, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 8.108254397834912, | |
| "grad_norm": 3.0150039196014404, | |
| "learning_rate": 0.00022093636363636362, | |
| "loss": 0.2792, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 8.108254397834912, | |
| "eval_cer": 0.08772307030746077, | |
| "eval_loss": 0.24399547278881073, | |
| "eval_runtime": 3.2156, | |
| "eval_samples_per_second": 17.104, | |
| "eval_steps_per_second": 2.177, | |
| "eval_wer": 0.2750439367311072, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 8.378890392422193, | |
| "grad_norm": 3.422705888748169, | |
| "learning_rate": 0.00021820909090909088, | |
| "loss": 0.2652, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 8.378890392422193, | |
| "eval_cer": 0.08621801763061707, | |
| "eval_loss": 0.25809481739997864, | |
| "eval_runtime": 3.2041, | |
| "eval_samples_per_second": 17.166, | |
| "eval_steps_per_second": 2.185, | |
| "eval_wer": 0.2565905096660808, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 8.649526387009473, | |
| "grad_norm": 7.11945104598999, | |
| "learning_rate": 0.00021548181818181817, | |
| "loss": 0.2847, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 8.649526387009473, | |
| "eval_cer": 0.08385293485271984, | |
| "eval_loss": 0.2583249807357788, | |
| "eval_runtime": 3.2692, | |
| "eval_samples_per_second": 16.824, | |
| "eval_steps_per_second": 2.141, | |
| "eval_wer": 0.29876977152899825, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 8.920162381596752, | |
| "grad_norm": 4.319618225097656, | |
| "learning_rate": 0.0002127545454545454, | |
| "loss": 0.2788, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 8.920162381596752, | |
| "eval_cer": 0.078907761771662, | |
| "eval_loss": 0.19587865471839905, | |
| "eval_runtime": 3.2926, | |
| "eval_samples_per_second": 16.704, | |
| "eval_steps_per_second": 2.126, | |
| "eval_wer": 0.24253075571177504, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 9.189445196211096, | |
| "grad_norm": 2.1342787742614746, | |
| "learning_rate": 0.0002100272727272727, | |
| "loss": 0.2589, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 9.189445196211096, | |
| "eval_cer": 0.06923242313480972, | |
| "eval_loss": 0.19880472123622894, | |
| "eval_runtime": 3.2414, | |
| "eval_samples_per_second": 16.968, | |
| "eval_steps_per_second": 2.16, | |
| "eval_wer": 0.24956063268892795, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 9.460081190798377, | |
| "grad_norm": 1.656413197517395, | |
| "learning_rate": 0.00020729999999999997, | |
| "loss": 0.2493, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 9.460081190798377, | |
| "eval_cer": 0.0666523328316491, | |
| "eval_loss": 0.17990422248840332, | |
| "eval_runtime": 3.2699, | |
| "eval_samples_per_second": 16.82, | |
| "eval_steps_per_second": 2.141, | |
| "eval_wer": 0.23198594024604569, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 9.730717185385656, | |
| "grad_norm": 9.605318069458008, | |
| "learning_rate": 0.00020457272727272726, | |
| "loss": 0.2473, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 9.730717185385656, | |
| "eval_cer": 0.07009245323586326, | |
| "eval_loss": 0.24749121069908142, | |
| "eval_runtime": 3.2843, | |
| "eval_samples_per_second": 16.746, | |
| "eval_steps_per_second": 2.131, | |
| "eval_wer": 0.2671353251318102, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 38.97199249267578, | |
| "learning_rate": 0.00020184545454545456, | |
| "loss": 0.2842, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_cer": 0.07181251343797034, | |
| "eval_loss": 0.20998047292232513, | |
| "eval_runtime": 3.3675, | |
| "eval_samples_per_second": 16.333, | |
| "eval_steps_per_second": 2.079, | |
| "eval_wer": 0.2592267135325132, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 10.27063599458728, | |
| "grad_norm": 10.944853782653809, | |
| "learning_rate": 0.0001991181818181818, | |
| "loss": 0.2312, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 10.27063599458728, | |
| "eval_cer": 0.06106213717480112, | |
| "eval_loss": 0.20029255747795105, | |
| "eval_runtime": 3.2308, | |
| "eval_samples_per_second": 17.024, | |
| "eval_steps_per_second": 2.167, | |
| "eval_wer": 0.24077328646748683, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 10.541271989174561, | |
| "grad_norm": 6.15484619140625, | |
| "learning_rate": 0.0001963909090909091, | |
| "loss": 0.231, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 10.541271989174561, | |
| "eval_cer": 0.06880240808428295, | |
| "eval_loss": 0.23144930601119995, | |
| "eval_runtime": 3.3192, | |
| "eval_samples_per_second": 16.57, | |
| "eval_steps_per_second": 2.109, | |
| "eval_wer": 0.2680140597539543, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 10.81190798376184, | |
| "grad_norm": 9.186676025390625, | |
| "learning_rate": 0.00019366363636363635, | |
| "loss": 0.243, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 10.81190798376184, | |
| "eval_cer": 0.0707374758116534, | |
| "eval_loss": 0.20184335112571716, | |
| "eval_runtime": 3.2475, | |
| "eval_samples_per_second": 16.936, | |
| "eval_steps_per_second": 2.155, | |
| "eval_wer": 0.2504393673110721, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 11.081190798376184, | |
| "grad_norm": 3.3094773292541504, | |
| "learning_rate": 0.00019093636363636364, | |
| "loss": 0.2188, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 11.081190798376184, | |
| "eval_cer": 0.06299720490217157, | |
| "eval_loss": 0.18349941074848175, | |
| "eval_runtime": 3.2855, | |
| "eval_samples_per_second": 16.74, | |
| "eval_steps_per_second": 2.131, | |
| "eval_wer": 0.24077328646748683, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 11.351826792963465, | |
| "grad_norm": 4.869017124176025, | |
| "learning_rate": 0.00018820909090909088, | |
| "loss": 0.2129, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 11.351826792963465, | |
| "eval_cer": 0.0627821973769082, | |
| "eval_loss": 0.19908010959625244, | |
| "eval_runtime": 3.2946, | |
| "eval_samples_per_second": 16.694, | |
| "eval_steps_per_second": 2.125, | |
| "eval_wer": 0.2460456942003515, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 11.622462787550745, | |
| "grad_norm": 3.1255884170532227, | |
| "learning_rate": 0.00018548181818181818, | |
| "loss": 0.2351, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 11.622462787550745, | |
| "eval_cer": 0.0653622876800688, | |
| "eval_loss": 0.21975572407245636, | |
| "eval_runtime": 3.3112, | |
| "eval_samples_per_second": 16.61, | |
| "eval_steps_per_second": 2.114, | |
| "eval_wer": 0.27065026362038663, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 11.893098782138024, | |
| "grad_norm": 2.200582265853882, | |
| "learning_rate": 0.00018275454545454544, | |
| "loss": 0.218, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 11.893098782138024, | |
| "eval_cer": 0.06858740055901956, | |
| "eval_loss": 0.26052218675613403, | |
| "eval_runtime": 3.1812, | |
| "eval_samples_per_second": 17.289, | |
| "eval_steps_per_second": 2.2, | |
| "eval_wer": 0.27943760984182775, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 12.162381596752368, | |
| "grad_norm": 3.319242477416992, | |
| "learning_rate": 0.0001800272727272727, | |
| "loss": 0.2202, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 12.162381596752368, | |
| "eval_cer": 0.058052031821113736, | |
| "eval_loss": 0.21814635396003723, | |
| "eval_runtime": 3.272, | |
| "eval_samples_per_second": 16.809, | |
| "eval_steps_per_second": 2.139, | |
| "eval_wer": 0.23286467486818982, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 12.433017591339649, | |
| "grad_norm": 1.2420411109924316, | |
| "learning_rate": 0.00017729999999999997, | |
| "loss": 0.1962, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 12.433017591339649, | |
| "eval_cer": 0.05568694904321651, | |
| "eval_loss": 0.1839354932308197, | |
| "eval_runtime": 3.2938, | |
| "eval_samples_per_second": 16.698, | |
| "eval_steps_per_second": 2.125, | |
| "eval_wer": 0.23022847100175747, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 12.703653585926928, | |
| "grad_norm": 0.9907166361808777, | |
| "learning_rate": 0.00017457272727272727, | |
| "loss": 0.2129, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 12.703653585926928, | |
| "eval_cer": 0.05762201677058697, | |
| "eval_loss": 0.1895829439163208, | |
| "eval_runtime": 3.2727, | |
| "eval_samples_per_second": 16.806, | |
| "eval_steps_per_second": 2.139, | |
| "eval_wer": 0.22671353251318102, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 12.974289580514208, | |
| "grad_norm": 2.0261332988739014, | |
| "learning_rate": 0.00017184545454545453, | |
| "loss": 0.2074, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 12.974289580514208, | |
| "eval_cer": 0.06063212212427435, | |
| "eval_loss": 0.2010865956544876, | |
| "eval_runtime": 3.2907, | |
| "eval_samples_per_second": 16.714, | |
| "eval_steps_per_second": 2.127, | |
| "eval_wer": 0.2460456942003515, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 13.243572395128552, | |
| "grad_norm": 4.432494640350342, | |
| "learning_rate": 0.0001691181818181818, | |
| "loss": 0.1813, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 13.243572395128552, | |
| "eval_cer": 0.06106213717480112, | |
| "eval_loss": 0.181858628988266, | |
| "eval_runtime": 3.2727, | |
| "eval_samples_per_second": 16.806, | |
| "eval_steps_per_second": 2.139, | |
| "eval_wer": 0.20913884007029876, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 13.514208389715833, | |
| "grad_norm": 2.20430850982666, | |
| "learning_rate": 0.00016639090909090906, | |
| "loss": 0.1872, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 13.514208389715833, | |
| "eval_cer": 0.06557729520533219, | |
| "eval_loss": 0.21534210443496704, | |
| "eval_runtime": 3.2649, | |
| "eval_samples_per_second": 16.846, | |
| "eval_steps_per_second": 2.144, | |
| "eval_wer": 0.2601054481546573, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 13.784844384303112, | |
| "grad_norm": 3.0091655254364014, | |
| "learning_rate": 0.00016366363636363635, | |
| "loss": 0.1947, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 13.784844384303112, | |
| "eval_cer": 0.06880240808428295, | |
| "eval_loss": 0.2198285013437271, | |
| "eval_runtime": 3.2966, | |
| "eval_samples_per_second": 16.684, | |
| "eval_steps_per_second": 2.123, | |
| "eval_wer": 0.2671353251318102, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 14.054127198917456, | |
| "grad_norm": 1.1179289817810059, | |
| "learning_rate": 0.00016093636363636362, | |
| "loss": 0.2001, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 14.054127198917456, | |
| "eval_cer": 0.06772737045796604, | |
| "eval_loss": 0.21502529084682465, | |
| "eval_runtime": 3.2826, | |
| "eval_samples_per_second": 16.755, | |
| "eval_steps_per_second": 2.132, | |
| "eval_wer": 0.2618629173989455, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 14.324763193504737, | |
| "grad_norm": 0.8994645476341248, | |
| "learning_rate": 0.00015820909090909089, | |
| "loss": 0.1799, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 14.324763193504737, | |
| "eval_cer": 0.05160180606321221, | |
| "eval_loss": 0.1535176932811737, | |
| "eval_runtime": 3.2382, | |
| "eval_samples_per_second": 16.985, | |
| "eval_steps_per_second": 2.162, | |
| "eval_wer": 0.18541300527240773, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 14.595399188092017, | |
| "grad_norm": 3.489891529083252, | |
| "learning_rate": 0.00015548181818181815, | |
| "loss": 0.1813, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 14.595399188092017, | |
| "eval_cer": 0.06514728015480542, | |
| "eval_loss": 0.21499599516391754, | |
| "eval_runtime": 3.2165, | |
| "eval_samples_per_second": 17.099, | |
| "eval_steps_per_second": 2.176, | |
| "eval_wer": 0.2565905096660808, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 14.866035182679296, | |
| "grad_norm": 2.1778228282928467, | |
| "learning_rate": 0.00015275454545454544, | |
| "loss": 0.1812, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 14.866035182679296, | |
| "eval_cer": 0.05783702429585035, | |
| "eval_loss": 0.18473133444786072, | |
| "eval_runtime": 3.2425, | |
| "eval_samples_per_second": 16.962, | |
| "eval_steps_per_second": 2.159, | |
| "eval_wer": 0.23198594024604569, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 15.13531799729364, | |
| "grad_norm": 9.761114120483398, | |
| "learning_rate": 0.00015002727272727274, | |
| "loss": 0.1932, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 15.13531799729364, | |
| "eval_cer": 0.0518168135884756, | |
| "eval_loss": 0.16397376358509064, | |
| "eval_runtime": 3.2727, | |
| "eval_samples_per_second": 16.805, | |
| "eval_steps_per_second": 2.139, | |
| "eval_wer": 0.20913884007029876, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 15.40595399188092, | |
| "grad_norm": 8.160325050354004, | |
| "learning_rate": 0.00014729999999999998, | |
| "loss": 0.169, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 15.40595399188092, | |
| "eval_cer": 0.06751236293270264, | |
| "eval_loss": 0.2461504340171814, | |
| "eval_runtime": 3.3466, | |
| "eval_samples_per_second": 16.435, | |
| "eval_steps_per_second": 2.092, | |
| "eval_wer": 0.24868189806678384, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 15.6765899864682, | |
| "grad_norm": 7.290473937988281, | |
| "learning_rate": 0.00014457272727272727, | |
| "loss": 0.1704, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 15.6765899864682, | |
| "eval_cer": 0.06020210707374758, | |
| "eval_loss": 0.1913210153579712, | |
| "eval_runtime": 3.328, | |
| "eval_samples_per_second": 16.526, | |
| "eval_steps_per_second": 2.103, | |
| "eval_wer": 0.23374340949033393, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 15.94722598105548, | |
| "grad_norm": 9.403716087341309, | |
| "learning_rate": 0.00014184545454545453, | |
| "loss": 0.1692, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 15.94722598105548, | |
| "eval_cer": 0.06579230273059557, | |
| "eval_loss": 0.2216808795928955, | |
| "eval_runtime": 3.2705, | |
| "eval_samples_per_second": 16.817, | |
| "eval_steps_per_second": 2.14, | |
| "eval_wer": 0.2390158172231986, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 16.216508795669824, | |
| "grad_norm": 1.2416729927062988, | |
| "learning_rate": 0.0001391181818181818, | |
| "loss": 0.133, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 16.216508795669824, | |
| "eval_cer": 0.0599870995484842, | |
| "eval_loss": 0.19244541227817535, | |
| "eval_runtime": 3.3093, | |
| "eval_samples_per_second": 16.62, | |
| "eval_steps_per_second": 2.115, | |
| "eval_wer": 0.2390158172231986, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 16.487144790257105, | |
| "grad_norm": 2.919238567352295, | |
| "learning_rate": 0.00013639090909090906, | |
| "loss": 0.1529, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 16.487144790257105, | |
| "eval_cer": 0.054826918942162975, | |
| "eval_loss": 0.160396009683609, | |
| "eval_runtime": 3.2653, | |
| "eval_samples_per_second": 16.844, | |
| "eval_steps_per_second": 2.144, | |
| "eval_wer": 0.20738137082601055, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 16.757780784844385, | |
| "grad_norm": 1.1939417123794556, | |
| "learning_rate": 0.00013366363636363636, | |
| "loss": 0.1586, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 16.757780784844385, | |
| "eval_cer": 0.05547194151795313, | |
| "eval_loss": 0.18521469831466675, | |
| "eval_runtime": 3.3238, | |
| "eval_samples_per_second": 16.547, | |
| "eval_steps_per_second": 2.106, | |
| "eval_wer": 0.22231985940246046, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 17.027063599458728, | |
| "grad_norm": 0.6369737386703491, | |
| "learning_rate": 0.00013093636363636362, | |
| "loss": 0.1821, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 17.027063599458728, | |
| "eval_cer": 0.05009675338636852, | |
| "eval_loss": 0.162165105342865, | |
| "eval_runtime": 3.3026, | |
| "eval_samples_per_second": 16.654, | |
| "eval_steps_per_second": 2.12, | |
| "eval_wer": 0.20913884007029876, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 17.29769959404601, | |
| "grad_norm": 1.01472806930542, | |
| "learning_rate": 0.00012820909090909092, | |
| "loss": 0.1425, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 17.29769959404601, | |
| "eval_cer": 0.054826918942162975, | |
| "eval_loss": 0.1956459879875183, | |
| "eval_runtime": 3.316, | |
| "eval_samples_per_second": 16.586, | |
| "eval_steps_per_second": 2.111, | |
| "eval_wer": 0.23286467486818982, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 17.56833558863329, | |
| "grad_norm": 1.0718566179275513, | |
| "learning_rate": 0.00012548181818181818, | |
| "loss": 0.1538, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 17.56833558863329, | |
| "eval_cer": 0.058052031821113736, | |
| "eval_loss": 0.19568397104740143, | |
| "eval_runtime": 3.2443, | |
| "eval_samples_per_second": 16.953, | |
| "eval_steps_per_second": 2.158, | |
| "eval_wer": 0.24165202108963094, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 17.83897158322057, | |
| "grad_norm": 0.5153430104255676, | |
| "learning_rate": 0.00012275454545454545, | |
| "loss": 0.1395, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 17.83897158322057, | |
| "eval_cer": 0.05826703934637712, | |
| "eval_loss": 0.19457882642745972, | |
| "eval_runtime": 3.2924, | |
| "eval_samples_per_second": 16.705, | |
| "eval_steps_per_second": 2.126, | |
| "eval_wer": 0.2398945518453427, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 18.108254397834912, | |
| "grad_norm": 1.9209911823272705, | |
| "learning_rate": 0.00012002727272727273, | |
| "loss": 0.1331, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 18.108254397834912, | |
| "eval_cer": 0.05869705439690389, | |
| "eval_loss": 0.226872980594635, | |
| "eval_runtime": 3.3195, | |
| "eval_samples_per_second": 16.569, | |
| "eval_steps_per_second": 2.109, | |
| "eval_wer": 0.2539543057996485, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 18.378890392422193, | |
| "grad_norm": 4.072175979614258, | |
| "learning_rate": 0.00011729999999999999, | |
| "loss": 0.1242, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 18.378890392422193, | |
| "eval_cer": 0.05826703934637712, | |
| "eval_loss": 0.2108573615550995, | |
| "eval_runtime": 3.2958, | |
| "eval_samples_per_second": 16.688, | |
| "eval_steps_per_second": 2.124, | |
| "eval_wer": 0.23637961335676624, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 18.649526387009473, | |
| "grad_norm": 2.9033162593841553, | |
| "learning_rate": 0.00011457272727272727, | |
| "loss": 0.1323, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 18.649526387009473, | |
| "eval_cer": 0.054396903891636206, | |
| "eval_loss": 0.20044924318790436, | |
| "eval_runtime": 3.2775, | |
| "eval_samples_per_second": 16.781, | |
| "eval_steps_per_second": 2.136, | |
| "eval_wer": 0.22759226713532513, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 18.920162381596754, | |
| "grad_norm": 11.889602661132812, | |
| "learning_rate": 0.00011184545454545454, | |
| "loss": 0.1384, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 18.920162381596754, | |
| "eval_cer": 0.05697699419479682, | |
| "eval_loss": 0.20252789556980133, | |
| "eval_runtime": 3.2287, | |
| "eval_samples_per_second": 17.035, | |
| "eval_steps_per_second": 2.168, | |
| "eval_wer": 0.2451669595782074, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 19.189445196211096, | |
| "grad_norm": 1.1512547731399536, | |
| "learning_rate": 0.00010911818181818182, | |
| "loss": 0.1294, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 19.189445196211096, | |
| "eval_cer": 0.05418189636637282, | |
| "eval_loss": 0.21085655689239502, | |
| "eval_runtime": 3.2121, | |
| "eval_samples_per_second": 17.123, | |
| "eval_steps_per_second": 2.179, | |
| "eval_wer": 0.2390158172231986, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 19.460081190798377, | |
| "grad_norm": 0.6903840899467468, | |
| "learning_rate": 0.00010639090909090908, | |
| "loss": 0.1279, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 19.460081190798377, | |
| "eval_cer": 0.04794667813373468, | |
| "eval_loss": 0.16093921661376953, | |
| "eval_runtime": 3.2295, | |
| "eval_samples_per_second": 17.031, | |
| "eval_steps_per_second": 2.168, | |
| "eval_wer": 0.19156414762741653, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 19.730717185385657, | |
| "grad_norm": 1.4991930723190308, | |
| "learning_rate": 0.00010366363636363636, | |
| "loss": 0.122, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 19.730717185385657, | |
| "eval_cer": 0.06063212212427435, | |
| "eval_loss": 0.24742065370082855, | |
| "eval_runtime": 3.295, | |
| "eval_samples_per_second": 16.692, | |
| "eval_steps_per_second": 2.124, | |
| "eval_wer": 0.2539543057996485, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 4.862514019012451, | |
| "learning_rate": 0.00010093636363636363, | |
| "loss": 0.1222, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_cer": 0.050311760911631906, | |
| "eval_loss": 0.17492927610874176, | |
| "eval_runtime": 3.2165, | |
| "eval_samples_per_second": 17.099, | |
| "eval_steps_per_second": 2.176, | |
| "eval_wer": 0.1968365553602812, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 20.27063599458728, | |
| "grad_norm": 10.719727516174316, | |
| "learning_rate": 9.82090909090909e-05, | |
| "loss": 0.1121, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 20.27063599458728, | |
| "eval_cer": 0.05869705439690389, | |
| "eval_loss": 0.24458029866218567, | |
| "eval_runtime": 3.2693, | |
| "eval_samples_per_second": 16.823, | |
| "eval_steps_per_second": 2.141, | |
| "eval_wer": 0.2513181019332162, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 20.54127198917456, | |
| "grad_norm": 6.036056995391846, | |
| "learning_rate": 9.548181818181817e-05, | |
| "loss": 0.1142, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 20.54127198917456, | |
| "eval_cer": 0.05332186626531928, | |
| "eval_loss": 0.19322967529296875, | |
| "eval_runtime": 3.297, | |
| "eval_samples_per_second": 16.682, | |
| "eval_steps_per_second": 2.123, | |
| "eval_wer": 0.22319859402460457, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 20.81190798376184, | |
| "grad_norm": 5.935936450958252, | |
| "learning_rate": 9.275454545454544e-05, | |
| "loss": 0.1226, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 20.81190798376184, | |
| "eval_cer": 0.05353687379058267, | |
| "eval_loss": 0.2194850891828537, | |
| "eval_runtime": 3.272, | |
| "eval_samples_per_second": 16.809, | |
| "eval_steps_per_second": 2.139, | |
| "eval_wer": 0.23198594024604569, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 21.081190798376184, | |
| "grad_norm": 1.9924638271331787, | |
| "learning_rate": 9.002727272727272e-05, | |
| "loss": 0.1052, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 21.081190798376184, | |
| "eval_cer": 0.05095678348742206, | |
| "eval_loss": 0.20277012884616852, | |
| "eval_runtime": 3.2836, | |
| "eval_samples_per_second": 16.75, | |
| "eval_steps_per_second": 2.132, | |
| "eval_wer": 0.22231985940246046, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 21.351826792963465, | |
| "grad_norm": 5.319161891937256, | |
| "learning_rate": 8.729999999999998e-05, | |
| "loss": 0.1077, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 21.351826792963465, | |
| "eval_cer": 0.05353687379058267, | |
| "eval_loss": 0.18649469316005707, | |
| "eval_runtime": 3.264, | |
| "eval_samples_per_second": 16.85, | |
| "eval_steps_per_second": 2.145, | |
| "eval_wer": 0.210896309314587, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 21.622462787550745, | |
| "grad_norm": 2.034726619720459, | |
| "learning_rate": 8.457272727272726e-05, | |
| "loss": 0.114, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 21.622462787550745, | |
| "eval_cer": 0.04687164050741776, | |
| "eval_loss": 0.17550522089004517, | |
| "eval_runtime": 3.2743, | |
| "eval_samples_per_second": 16.798, | |
| "eval_steps_per_second": 2.138, | |
| "eval_wer": 0.21704745166959577, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 21.893098782138026, | |
| "grad_norm": 1.0931280851364136, | |
| "learning_rate": 8.184545454545453e-05, | |
| "loss": 0.1041, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 21.893098782138026, | |
| "eval_cer": 0.04579660288110084, | |
| "eval_loss": 0.1914069801568985, | |
| "eval_runtime": 3.2452, | |
| "eval_samples_per_second": 16.948, | |
| "eval_steps_per_second": 2.157, | |
| "eval_wer": 0.2117750439367311, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 22.16238159675237, | |
| "grad_norm": 3.6217257976531982, | |
| "learning_rate": 7.911818181818182e-05, | |
| "loss": 0.1149, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 22.16238159675237, | |
| "eval_cer": 0.0477316706084713, | |
| "eval_loss": 0.18972058594226837, | |
| "eval_runtime": 3.2691, | |
| "eval_samples_per_second": 16.824, | |
| "eval_steps_per_second": 2.141, | |
| "eval_wer": 0.2179261862917399, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 22.43301759133965, | |
| "grad_norm": 1.2710479497909546, | |
| "learning_rate": 7.63909090909091e-05, | |
| "loss": 0.0984, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 22.43301759133965, | |
| "eval_cer": 0.05203182111373898, | |
| "eval_loss": 0.21701879799365997, | |
| "eval_runtime": 3.3005, | |
| "eval_samples_per_second": 16.664, | |
| "eval_steps_per_second": 2.121, | |
| "eval_wer": 0.22934973637961337, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 22.70365358592693, | |
| "grad_norm": 0.5214836597442627, | |
| "learning_rate": 7.366363636363635e-05, | |
| "loss": 0.0974, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 22.70365358592693, | |
| "eval_cer": 0.046226617931627606, | |
| "eval_loss": 0.1713176965713501, | |
| "eval_runtime": 3.3717, | |
| "eval_samples_per_second": 16.312, | |
| "eval_steps_per_second": 2.076, | |
| "eval_wer": 0.1968365553602812, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 22.97428958051421, | |
| "grad_norm": 2.3021202087402344, | |
| "learning_rate": 7.093636363636363e-05, | |
| "loss": 0.1052, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 22.97428958051421, | |
| "eval_cer": 0.04837669318426145, | |
| "eval_loss": 0.17614266276359558, | |
| "eval_runtime": 3.5916, | |
| "eval_samples_per_second": 15.313, | |
| "eval_steps_per_second": 1.949, | |
| "eval_wer": 0.21441124780316345, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 23.243572395128552, | |
| "grad_norm": 3.924398183822632, | |
| "learning_rate": 6.82090909090909e-05, | |
| "loss": 0.0892, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 23.243572395128552, | |
| "eval_cer": 0.04429155020425715, | |
| "eval_loss": 0.14997613430023193, | |
| "eval_runtime": 3.344, | |
| "eval_samples_per_second": 16.448, | |
| "eval_steps_per_second": 2.093, | |
| "eval_wer": 0.18629173989455183, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 23.514208389715833, | |
| "grad_norm": 1.1947544813156128, | |
| "learning_rate": 6.548181818181817e-05, | |
| "loss": 0.0919, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 23.514208389715833, | |
| "eval_cer": 0.04601161040636422, | |
| "eval_loss": 0.15268389880657196, | |
| "eval_runtime": 3.3038, | |
| "eval_samples_per_second": 16.648, | |
| "eval_steps_per_second": 2.119, | |
| "eval_wer": 0.19859402460456943, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 23.784844384303113, | |
| "grad_norm": 1.3969401121139526, | |
| "learning_rate": 6.275454545454545e-05, | |
| "loss": 0.0956, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 23.784844384303113, | |
| "eval_cer": 0.0449365727800473, | |
| "eval_loss": 0.15815618634223938, | |
| "eval_runtime": 3.2819, | |
| "eval_samples_per_second": 16.759, | |
| "eval_steps_per_second": 2.133, | |
| "eval_wer": 0.1968365553602812, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 24.054127198917456, | |
| "grad_norm": 2.335787773132324, | |
| "learning_rate": 6.0027272727272725e-05, | |
| "loss": 0.1, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 24.054127198917456, | |
| "eval_cer": 0.04536658783057407, | |
| "eval_loss": 0.17150932550430298, | |
| "eval_runtime": 3.246, | |
| "eval_samples_per_second": 16.944, | |
| "eval_steps_per_second": 2.157, | |
| "eval_wer": 0.20123022847100175, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 24.324763193504737, | |
| "grad_norm": 1.6086431741714478, | |
| "learning_rate": 5.73e-05, | |
| "loss": 0.0846, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 24.324763193504737, | |
| "eval_cer": 0.04579660288110084, | |
| "eval_loss": 0.18286831676959991, | |
| "eval_runtime": 3.2566, | |
| "eval_samples_per_second": 16.889, | |
| "eval_steps_per_second": 2.149, | |
| "eval_wer": 0.20298769771529, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 24.595399188092017, | |
| "grad_norm": 1.6925404071807861, | |
| "learning_rate": 5.457272727272727e-05, | |
| "loss": 0.083, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 24.595399188092017, | |
| "eval_cer": 0.04536658783057407, | |
| "eval_loss": 0.17126092314720154, | |
| "eval_runtime": 3.2285, | |
| "eval_samples_per_second": 17.036, | |
| "eval_steps_per_second": 2.168, | |
| "eval_wer": 0.20650263620386644, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 24.866035182679298, | |
| "grad_norm": 1.5034745931625366, | |
| "learning_rate": 5.184545454545454e-05, | |
| "loss": 0.0821, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 24.866035182679298, | |
| "eval_cer": 0.04988174586110514, | |
| "eval_loss": 0.19512753188610077, | |
| "eval_runtime": 3.2263, | |
| "eval_samples_per_second": 17.048, | |
| "eval_steps_per_second": 2.17, | |
| "eval_wer": 0.22231985940246046, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 25.13531799729364, | |
| "grad_norm": 6.091914653778076, | |
| "learning_rate": 4.9118181818181814e-05, | |
| "loss": 0.0917, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 25.13531799729364, | |
| "eval_cer": 0.04730165555794453, | |
| "eval_loss": 0.18300001323223114, | |
| "eval_runtime": 3.2216, | |
| "eval_samples_per_second": 17.072, | |
| "eval_steps_per_second": 2.173, | |
| "eval_wer": 0.2100175746924429, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 25.40595399188092, | |
| "grad_norm": 5.720769882202148, | |
| "learning_rate": 4.6390909090909086e-05, | |
| "loss": 0.0806, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 25.40595399188092, | |
| "eval_cer": 0.0477316706084713, | |
| "eval_loss": 0.17701777815818787, | |
| "eval_runtime": 3.3053, | |
| "eval_samples_per_second": 16.64, | |
| "eval_steps_per_second": 2.118, | |
| "eval_wer": 0.21353251318101935, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 25.6765899864682, | |
| "grad_norm": 7.1992998123168945, | |
| "learning_rate": 4.366363636363636e-05, | |
| "loss": 0.0756, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 25.6765899864682, | |
| "eval_cer": 0.04816168565899807, | |
| "eval_loss": 0.1869199126958847, | |
| "eval_runtime": 3.2573, | |
| "eval_samples_per_second": 16.885, | |
| "eval_steps_per_second": 2.149, | |
| "eval_wer": 0.21441124780316345, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 25.94722598105548, | |
| "grad_norm": 9.373414039611816, | |
| "learning_rate": 4.093636363636364e-05, | |
| "loss": 0.0807, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 25.94722598105548, | |
| "eval_cer": 0.0490217157600516, | |
| "eval_loss": 0.17646533250808716, | |
| "eval_runtime": 3.2627, | |
| "eval_samples_per_second": 16.857, | |
| "eval_steps_per_second": 2.145, | |
| "eval_wer": 0.20210896309314588, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 26.216508795669824, | |
| "grad_norm": 1.0124870538711548, | |
| "learning_rate": 3.820909090909091e-05, | |
| "loss": 0.059, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 26.216508795669824, | |
| "eval_cer": 0.051171791012685444, | |
| "eval_loss": 0.20564059913158417, | |
| "eval_runtime": 3.2967, | |
| "eval_samples_per_second": 16.683, | |
| "eval_steps_per_second": 2.123, | |
| "eval_wer": 0.22407732864674867, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 26.487144790257105, | |
| "grad_norm": 1.1935291290283203, | |
| "learning_rate": 3.548181818181818e-05, | |
| "loss": 0.0831, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 26.487144790257105, | |
| "eval_cer": 0.05009675338636852, | |
| "eval_loss": 0.1965063065290451, | |
| "eval_runtime": 3.3021, | |
| "eval_samples_per_second": 16.656, | |
| "eval_steps_per_second": 2.12, | |
| "eval_wer": 0.22056239015817222, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 26.757780784844385, | |
| "grad_norm": 1.9065356254577637, | |
| "learning_rate": 3.2754545454545455e-05, | |
| "loss": 0.0647, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 26.757780784844385, | |
| "eval_cer": 0.04751666308320791, | |
| "eval_loss": 0.1832188367843628, | |
| "eval_runtime": 3.2986, | |
| "eval_samples_per_second": 16.674, | |
| "eval_steps_per_second": 2.122, | |
| "eval_wer": 0.20826010544815465, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 27.027063599458728, | |
| "grad_norm": 0.8792353868484497, | |
| "learning_rate": 3.0027272727272724e-05, | |
| "loss": 0.0788, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 27.027063599458728, | |
| "eval_cer": 0.046656632982154375, | |
| "eval_loss": 0.17078326642513275, | |
| "eval_runtime": 3.3039, | |
| "eval_samples_per_second": 16.647, | |
| "eval_steps_per_second": 2.119, | |
| "eval_wer": 0.19859402460456943, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 27.29769959404601, | |
| "grad_norm": 0.8240686655044556, | |
| "learning_rate": 2.7299999999999996e-05, | |
| "loss": 0.0677, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 27.29769959404601, | |
| "eval_cer": 0.0449365727800473, | |
| "eval_loss": 0.16915105283260345, | |
| "eval_runtime": 3.2712, | |
| "eval_samples_per_second": 16.813, | |
| "eval_steps_per_second": 2.14, | |
| "eval_wer": 0.1827768014059754, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 27.56833558863329, | |
| "grad_norm": 0.5428586006164551, | |
| "learning_rate": 2.457272727272727e-05, | |
| "loss": 0.0651, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 27.56833558863329, | |
| "eval_cer": 0.04644162545689099, | |
| "eval_loss": 0.17613492906093597, | |
| "eval_runtime": 3.2478, | |
| "eval_samples_per_second": 16.935, | |
| "eval_steps_per_second": 2.155, | |
| "eval_wer": 0.20210896309314588, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 27.83897158322057, | |
| "grad_norm": 0.8122203946113586, | |
| "learning_rate": 2.1845454545454544e-05, | |
| "loss": 0.0602, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 27.83897158322057, | |
| "eval_cer": 0.046656632982154375, | |
| "eval_loss": 0.16123421490192413, | |
| "eval_runtime": 3.2919, | |
| "eval_samples_per_second": 16.708, | |
| "eval_steps_per_second": 2.126, | |
| "eval_wer": 0.18804920913884007, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 28.108254397834912, | |
| "grad_norm": 1.2256019115447998, | |
| "learning_rate": 1.9118181818181817e-05, | |
| "loss": 0.0586, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 28.108254397834912, | |
| "eval_cer": 0.04579660288110084, | |
| "eval_loss": 0.17519748210906982, | |
| "eval_runtime": 3.2923, | |
| "eval_samples_per_second": 16.706, | |
| "eval_steps_per_second": 2.126, | |
| "eval_wer": 0.19947275922671354, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 28.378890392422193, | |
| "grad_norm": 1.0472385883331299, | |
| "learning_rate": 1.639090909090909e-05, | |
| "loss": 0.0602, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 28.378890392422193, | |
| "eval_cer": 0.04536658783057407, | |
| "eval_loss": 0.17338904738426208, | |
| "eval_runtime": 3.2664, | |
| "eval_samples_per_second": 16.838, | |
| "eval_steps_per_second": 2.143, | |
| "eval_wer": 0.19507908611599298, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 28.649526387009473, | |
| "grad_norm": 1.4401935338974, | |
| "learning_rate": 1.3663636363636363e-05, | |
| "loss": 0.0636, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 28.649526387009473, | |
| "eval_cer": 0.04579660288110084, | |
| "eval_loss": 0.1733109951019287, | |
| "eval_runtime": 3.3348, | |
| "eval_samples_per_second": 16.493, | |
| "eval_steps_per_second": 2.099, | |
| "eval_wer": 0.20210896309314588, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 28.920162381596754, | |
| "grad_norm": 0.8489630818367004, | |
| "learning_rate": 1.0936363636363635e-05, | |
| "loss": 0.0642, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 28.920162381596754, | |
| "eval_cer": 0.047086648032681144, | |
| "eval_loss": 0.17440861463546753, | |
| "eval_runtime": 3.3022, | |
| "eval_samples_per_second": 16.656, | |
| "eval_steps_per_second": 2.12, | |
| "eval_wer": 0.2056239015817223, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 29.189445196211096, | |
| "grad_norm": 1.0616735219955444, | |
| "learning_rate": 8.20909090909091e-06, | |
| "loss": 0.0611, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 29.189445196211096, | |
| "eval_cer": 0.04601161040636422, | |
| "eval_loss": 0.17405055463314056, | |
| "eval_runtime": 3.2326, | |
| "eval_samples_per_second": 17.014, | |
| "eval_steps_per_second": 2.165, | |
| "eval_wer": 0.20298769771529, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 29.460081190798377, | |
| "grad_norm": 1.259354591369629, | |
| "learning_rate": 5.481818181818182e-06, | |
| "loss": 0.0596, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 29.460081190798377, | |
| "eval_cer": 0.0449365727800473, | |
| "eval_loss": 0.17219401895999908, | |
| "eval_runtime": 3.1719, | |
| "eval_samples_per_second": 17.34, | |
| "eval_steps_per_second": 2.207, | |
| "eval_wer": 0.19507908611599298, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 29.730717185385657, | |
| "grad_norm": 0.4698294401168823, | |
| "learning_rate": 2.754545454545454e-06, | |
| "loss": 0.0627, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 29.730717185385657, | |
| "eval_cer": 0.0449365727800473, | |
| "eval_loss": 0.17380425333976746, | |
| "eval_runtime": 3.2854, | |
| "eval_samples_per_second": 16.741, | |
| "eval_steps_per_second": 2.131, | |
| "eval_wer": 0.19859402460456943, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "grad_norm": 9.292856216430664, | |
| "learning_rate": 2.727272727272727e-08, | |
| "loss": 0.0623, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_cer": 0.0449365727800473, | |
| "eval_loss": 0.17295825481414795, | |
| "eval_runtime": 3.2552, | |
| "eval_samples_per_second": 16.896, | |
| "eval_steps_per_second": 2.15, | |
| "eval_wer": 0.19859402460456943, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "step": 11100, | |
| "total_flos": 1.8673488922198168e+19, | |
| "train_loss": 0.3048185482111063, | |
| "train_runtime": 12419.9418, | |
| "train_samples_per_second": 28.548, | |
| "train_steps_per_second": 0.894 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_cer": 0.04515158030531068, | |
| "eval_loss": 0.17296089231967926, | |
| "eval_runtime": 3.3418, | |
| "eval_samples_per_second": 16.458, | |
| "eval_steps_per_second": 2.095, | |
| "eval_wer": 0.19859402460456943, | |
| "step": 11100 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 11100, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 30, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.8673488922198168e+19, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |