| { | |
| "best_metric": 0.1262135922330097, | |
| "best_model_checkpoint": "results3\\checkpoint-124000", | |
| "epoch": 2.002992, | |
| "eval_steps": 4000, | |
| "global_step": 124000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0008, | |
| "grad_norm": 12.33169937133789, | |
| "learning_rate": 1.8800000000000002e-06, | |
| "loss": 1.7644, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0016, | |
| "grad_norm": 9.583354949951172, | |
| "learning_rate": 3.88e-06, | |
| "loss": 0.929, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0024, | |
| "grad_norm": 12.06747817993164, | |
| "learning_rate": 5.8800000000000005e-06, | |
| "loss": 0.4652, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0032, | |
| "grad_norm": 9.123275756835938, | |
| "learning_rate": 7.88e-06, | |
| "loss": 0.3351, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.004, | |
| "grad_norm": 15.981216430664062, | |
| "learning_rate": 9.88e-06, | |
| "loss": 0.4079, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.0048, | |
| "grad_norm": 20.674148559570312, | |
| "learning_rate": 9.992449799196789e-06, | |
| "loss": 0.3966, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.0056, | |
| "grad_norm": 11.142045974731445, | |
| "learning_rate": 9.984417670682733e-06, | |
| "loss": 0.3628, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.0064, | |
| "grad_norm": 6.716693878173828, | |
| "learning_rate": 9.976385542168675e-06, | |
| "loss": 0.3647, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.0072, | |
| "grad_norm": 15.127047538757324, | |
| "learning_rate": 9.968353413654619e-06, | |
| "loss": 0.4007, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.008, | |
| "grad_norm": 5.561180114746094, | |
| "learning_rate": 9.960321285140563e-06, | |
| "loss": 0.3748, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.0088, | |
| "grad_norm": 11.087468147277832, | |
| "learning_rate": 9.952289156626507e-06, | |
| "loss": 0.3802, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.0096, | |
| "grad_norm": 13.053077697753906, | |
| "learning_rate": 9.94425702811245e-06, | |
| "loss": 0.3555, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.0104, | |
| "grad_norm": 8.72202205657959, | |
| "learning_rate": 9.936224899598395e-06, | |
| "loss": 0.3532, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.0112, | |
| "grad_norm": 13.878268241882324, | |
| "learning_rate": 9.928192771084338e-06, | |
| "loss": 0.3571, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.012, | |
| "grad_norm": 7.951385498046875, | |
| "learning_rate": 9.920160642570282e-06, | |
| "loss": 0.3683, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.0128, | |
| "grad_norm": 7.835740566253662, | |
| "learning_rate": 9.912128514056226e-06, | |
| "loss": 0.3409, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.0136, | |
| "grad_norm": 10.594538688659668, | |
| "learning_rate": 9.904096385542169e-06, | |
| "loss": 0.3455, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.0144, | |
| "grad_norm": 8.736641883850098, | |
| "learning_rate": 9.896064257028112e-06, | |
| "loss": 0.3567, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.0152, | |
| "grad_norm": 6.575601100921631, | |
| "learning_rate": 9.888032128514056e-06, | |
| "loss": 0.3606, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.016, | |
| "grad_norm": 11.653617858886719, | |
| "learning_rate": 9.88e-06, | |
| "loss": 0.3683, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.0168, | |
| "grad_norm": 12.467767715454102, | |
| "learning_rate": 9.871967871485944e-06, | |
| "loss": 0.3723, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.0176, | |
| "grad_norm": 11.205801963806152, | |
| "learning_rate": 9.863935742971888e-06, | |
| "loss": 0.3561, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.0184, | |
| "grad_norm": 10.407919883728027, | |
| "learning_rate": 9.855903614457832e-06, | |
| "loss": 0.3414, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.0192, | |
| "grad_norm": 6.497184753417969, | |
| "learning_rate": 9.847871485943776e-06, | |
| "loss": 0.3439, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 10.192763328552246, | |
| "learning_rate": 9.83983935742972e-06, | |
| "loss": 0.3334, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.0208, | |
| "grad_norm": 10.233382225036621, | |
| "learning_rate": 9.831807228915664e-06, | |
| "loss": 0.3715, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.0216, | |
| "grad_norm": 10.373730659484863, | |
| "learning_rate": 9.823775100401608e-06, | |
| "loss": 0.3557, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.0224, | |
| "grad_norm": 9.90769100189209, | |
| "learning_rate": 9.81574297188755e-06, | |
| "loss": 0.345, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.0232, | |
| "grad_norm": 12.89505386352539, | |
| "learning_rate": 9.807710843373494e-06, | |
| "loss": 0.3466, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.024, | |
| "grad_norm": 11.755387306213379, | |
| "learning_rate": 9.799678714859438e-06, | |
| "loss": 0.3518, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.0248, | |
| "grad_norm": 23.758481979370117, | |
| "learning_rate": 9.791646586345382e-06, | |
| "loss": 0.3367, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.0256, | |
| "grad_norm": 8.041315078735352, | |
| "learning_rate": 9.783614457831326e-06, | |
| "loss": 0.3385, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.0264, | |
| "grad_norm": 6.949316501617432, | |
| "learning_rate": 9.77558232931727e-06, | |
| "loss": 0.3557, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.0272, | |
| "grad_norm": 20.79637908935547, | |
| "learning_rate": 9.767550200803213e-06, | |
| "loss": 0.3497, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.028, | |
| "grad_norm": 10.800033569335938, | |
| "learning_rate": 9.759518072289157e-06, | |
| "loss": 0.351, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.0288, | |
| "grad_norm": 7.772594451904297, | |
| "learning_rate": 9.751485943775101e-06, | |
| "loss": 0.3272, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.0296, | |
| "grad_norm": 7.19946813583374, | |
| "learning_rate": 9.743453815261045e-06, | |
| "loss": 0.3216, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.0304, | |
| "grad_norm": 11.165406227111816, | |
| "learning_rate": 9.735421686746989e-06, | |
| "loss": 0.3305, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.0312, | |
| "grad_norm": 12.210046768188477, | |
| "learning_rate": 9.727389558232933e-06, | |
| "loss": 0.3524, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 6.451345443725586, | |
| "learning_rate": 9.719357429718877e-06, | |
| "loss": 0.3394, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "eval_test1_cer": 0.09286074389424154, | |
| "eval_test1_cer_norm": 0.07516507791677672, | |
| "eval_test1_loss": 0.2566915452480316, | |
| "eval_test1_runtime": 6436.6351, | |
| "eval_test1_samples_per_second": 0.388, | |
| "eval_test1_steps_per_second": 0.097, | |
| "eval_test1_wer": 0.2368582174407417, | |
| "eval_test1_wer_norm": 0.17320786697448787, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "eval_test2_cer": 0.2131304364057192, | |
| "eval_test2_cer_norm": 0.18118124418964984, | |
| "eval_test2_loss": 0.4192444682121277, | |
| "eval_test2_runtime": 3419.6713, | |
| "eval_test2_samples_per_second": 0.731, | |
| "eval_test2_steps_per_second": 0.183, | |
| "eval_test2_wer": 0.42229343099107347, | |
| "eval_test2_wer_norm": 0.35425739170295667, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.0328, | |
| "grad_norm": 43.34160232543945, | |
| "learning_rate": 9.711325301204821e-06, | |
| "loss": 0.3384, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.0336, | |
| "grad_norm": 7.907329559326172, | |
| "learning_rate": 9.703293172690765e-06, | |
| "loss": 0.3674, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.0344, | |
| "grad_norm": 6.71475076675415, | |
| "learning_rate": 9.695261044176709e-06, | |
| "loss": 0.3364, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.0352, | |
| "grad_norm": 9.054850578308105, | |
| "learning_rate": 9.687228915662651e-06, | |
| "loss": 0.3417, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.036, | |
| "grad_norm": 14.793953895568848, | |
| "learning_rate": 9.679196787148595e-06, | |
| "loss": 0.3286, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.0368, | |
| "grad_norm": 5.203219413757324, | |
| "learning_rate": 9.671164658634539e-06, | |
| "loss": 0.3684, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.0376, | |
| "grad_norm": 12.02910327911377, | |
| "learning_rate": 9.663132530120483e-06, | |
| "loss": 0.3393, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.0384, | |
| "grad_norm": 10.032449722290039, | |
| "learning_rate": 9.655100401606427e-06, | |
| "loss": 0.3503, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.0392, | |
| "grad_norm": 9.57917594909668, | |
| "learning_rate": 9.64706827309237e-06, | |
| "loss": 0.3239, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 9.18687629699707, | |
| "learning_rate": 9.639036144578314e-06, | |
| "loss": 0.3407, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.0408, | |
| "grad_norm": 9.08846378326416, | |
| "learning_rate": 9.631004016064258e-06, | |
| "loss": 0.3478, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.0416, | |
| "grad_norm": 6.322196006774902, | |
| "learning_rate": 9.622971887550202e-06, | |
| "loss": 0.3294, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.0424, | |
| "grad_norm": 11.071808815002441, | |
| "learning_rate": 9.614939759036145e-06, | |
| "loss": 0.3533, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.0432, | |
| "grad_norm": 8.492830276489258, | |
| "learning_rate": 9.606907630522088e-06, | |
| "loss": 0.3297, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.044, | |
| "grad_norm": 13.28835678100586, | |
| "learning_rate": 9.598875502008032e-06, | |
| "loss": 0.3228, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.0448, | |
| "grad_norm": 9.030045509338379, | |
| "learning_rate": 9.590843373493976e-06, | |
| "loss": 0.3228, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.0456, | |
| "grad_norm": 12.364481925964355, | |
| "learning_rate": 9.58281124497992e-06, | |
| "loss": 0.3406, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.0464, | |
| "grad_norm": 6.908151149749756, | |
| "learning_rate": 9.574779116465864e-06, | |
| "loss": 0.3365, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.0472, | |
| "grad_norm": 10.87695598602295, | |
| "learning_rate": 9.566746987951808e-06, | |
| "loss": 0.3332, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.048, | |
| "grad_norm": 9.280797004699707, | |
| "learning_rate": 9.558714859437752e-06, | |
| "loss": 0.3414, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.0488, | |
| "grad_norm": 13.637160301208496, | |
| "learning_rate": 9.550682730923696e-06, | |
| "loss": 0.3275, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.0496, | |
| "grad_norm": 10.094959259033203, | |
| "learning_rate": 9.542650602409638e-06, | |
| "loss": 0.3152, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.0504, | |
| "grad_norm": 6.261775016784668, | |
| "learning_rate": 9.534618473895582e-06, | |
| "loss": 0.3236, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.0512, | |
| "grad_norm": 7.221704006195068, | |
| "learning_rate": 9.526586345381526e-06, | |
| "loss": 0.3461, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.052, | |
| "grad_norm": 10.923733711242676, | |
| "learning_rate": 9.51855421686747e-06, | |
| "loss": 0.3249, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.0528, | |
| "grad_norm": 5.432595729827881, | |
| "learning_rate": 9.510522088353414e-06, | |
| "loss": 0.3556, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.0536, | |
| "grad_norm": 15.021200180053711, | |
| "learning_rate": 9.502489959839358e-06, | |
| "loss": 0.3208, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.0544, | |
| "grad_norm": 6.641680717468262, | |
| "learning_rate": 9.494457831325302e-06, | |
| "loss": 0.3565, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.0552, | |
| "grad_norm": 15.715243339538574, | |
| "learning_rate": 9.486425702811246e-06, | |
| "loss": 0.3123, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.056, | |
| "grad_norm": 9.304129600524902, | |
| "learning_rate": 9.47839357429719e-06, | |
| "loss": 0.312, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.0568, | |
| "grad_norm": 10.780037879943848, | |
| "learning_rate": 9.470361445783133e-06, | |
| "loss": 0.3445, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.0576, | |
| "grad_norm": 11.77072811126709, | |
| "learning_rate": 9.462329317269077e-06, | |
| "loss": 0.3293, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.0584, | |
| "grad_norm": 11.29255485534668, | |
| "learning_rate": 9.454297188755021e-06, | |
| "loss": 0.3045, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.0592, | |
| "grad_norm": 9.00106143951416, | |
| "learning_rate": 9.446265060240965e-06, | |
| "loss": 0.3132, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 8.445959091186523, | |
| "learning_rate": 9.438232931726909e-06, | |
| "loss": 0.3327, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.0608, | |
| "grad_norm": 13.996682167053223, | |
| "learning_rate": 9.430200803212853e-06, | |
| "loss": 0.3496, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.0616, | |
| "grad_norm": 6.321784019470215, | |
| "learning_rate": 9.422168674698797e-06, | |
| "loss": 0.2928, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.0624, | |
| "grad_norm": 9.260029792785645, | |
| "learning_rate": 9.414136546184741e-06, | |
| "loss": 0.2978, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.0632, | |
| "grad_norm": 14.37282657623291, | |
| "learning_rate": 9.406104417670685e-06, | |
| "loss": 0.3177, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 13.508281707763672, | |
| "learning_rate": 9.398072289156627e-06, | |
| "loss": 0.3357, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "eval_test1_cer": 0.07946822018074538, | |
| "eval_test1_cer_norm": 0.05569188657046126, | |
| "eval_test1_loss": 0.2466106414794922, | |
| "eval_test1_runtime": 3431.7176, | |
| "eval_test1_samples_per_second": 0.728, | |
| "eval_test1_steps_per_second": 0.182, | |
| "eval_test1_wer": 0.21889851016064607, | |
| "eval_test1_wer_norm": 0.15733948975715245, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "eval_test2_cer": 0.21952346287378205, | |
| "eval_test2_cer_norm": 0.16622927641772545, | |
| "eval_test2_loss": 0.410643607378006, | |
| "eval_test2_runtime": 3658.9711, | |
| "eval_test2_samples_per_second": 0.683, | |
| "eval_test2_steps_per_second": 0.171, | |
| "eval_test2_wer": 0.39210917830167086, | |
| "eval_test2_wer_norm": 0.32348727939491173, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.0648, | |
| "grad_norm": 7.349660873413086, | |
| "learning_rate": 9.390040160642571e-06, | |
| "loss": 0.3311, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.0656, | |
| "grad_norm": 6.557274341583252, | |
| "learning_rate": 9.382008032128515e-06, | |
| "loss": 0.318, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.0664, | |
| "grad_norm": 20.285615921020508, | |
| "learning_rate": 9.373975903614459e-06, | |
| "loss": 0.3116, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.0672, | |
| "grad_norm": 7.684230327606201, | |
| "learning_rate": 9.365943775100403e-06, | |
| "loss": 0.3232, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.068, | |
| "grad_norm": 8.374768257141113, | |
| "learning_rate": 9.357911646586347e-06, | |
| "loss": 0.3328, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.0688, | |
| "grad_norm": 6.924304008483887, | |
| "learning_rate": 9.34987951807229e-06, | |
| "loss": 0.2923, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.0696, | |
| "grad_norm": 5.943380832672119, | |
| "learning_rate": 9.341847389558234e-06, | |
| "loss": 0.3167, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.0704, | |
| "grad_norm": 24.188907623291016, | |
| "learning_rate": 9.333815261044178e-06, | |
| "loss": 0.3521, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.0712, | |
| "grad_norm": 13.702215194702148, | |
| "learning_rate": 9.325783132530122e-06, | |
| "loss": 0.2937, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.072, | |
| "grad_norm": 10.463766098022461, | |
| "learning_rate": 9.317751004016065e-06, | |
| "loss": 0.3317, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.0728, | |
| "grad_norm": 5.035882472991943, | |
| "learning_rate": 9.309718875502008e-06, | |
| "loss": 0.3048, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.0736, | |
| "grad_norm": 11.715450286865234, | |
| "learning_rate": 9.301686746987952e-06, | |
| "loss": 0.3287, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.0744, | |
| "grad_norm": 13.449250221252441, | |
| "learning_rate": 9.293654618473896e-06, | |
| "loss": 0.3345, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.0752, | |
| "grad_norm": 8.16373348236084, | |
| "learning_rate": 9.28562248995984e-06, | |
| "loss": 0.3233, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.076, | |
| "grad_norm": 6.236880302429199, | |
| "learning_rate": 9.277590361445784e-06, | |
| "loss": 0.3131, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.0768, | |
| "grad_norm": 12.164971351623535, | |
| "learning_rate": 9.269558232931728e-06, | |
| "loss": 0.3281, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.0776, | |
| "grad_norm": 15.774728775024414, | |
| "learning_rate": 9.261526104417672e-06, | |
| "loss": 0.315, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.0784, | |
| "grad_norm": 16.81951332092285, | |
| "learning_rate": 9.253493975903616e-06, | |
| "loss": 0.3491, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.0792, | |
| "grad_norm": 35.960697174072266, | |
| "learning_rate": 9.245461847389558e-06, | |
| "loss": 0.3258, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 9.389575958251953, | |
| "learning_rate": 9.237429718875502e-06, | |
| "loss": 0.3082, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.0808, | |
| "grad_norm": 5.77083683013916, | |
| "learning_rate": 9.229477911646586e-06, | |
| "loss": 0.316, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.0816, | |
| "grad_norm": 7.730663776397705, | |
| "learning_rate": 9.22144578313253e-06, | |
| "loss": 0.3538, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.0824, | |
| "grad_norm": 2.672037363052368, | |
| "learning_rate": 9.213413654618474e-06, | |
| "loss": 0.3089, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.0832, | |
| "grad_norm": 10.987016677856445, | |
| "learning_rate": 9.205381526104418e-06, | |
| "loss": 0.3032, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.084, | |
| "grad_norm": 5.871443271636963, | |
| "learning_rate": 9.197349397590362e-06, | |
| "loss": 0.3423, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.0848, | |
| "grad_norm": 8.393383026123047, | |
| "learning_rate": 9.189317269076306e-06, | |
| "loss": 0.2872, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.0856, | |
| "grad_norm": 8.698946952819824, | |
| "learning_rate": 9.18128514056225e-06, | |
| "loss": 0.3028, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.0864, | |
| "grad_norm": 2.9550344944000244, | |
| "learning_rate": 9.173253012048194e-06, | |
| "loss": 0.3262, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.0872, | |
| "grad_norm": 6.455613136291504, | |
| "learning_rate": 9.165220883534138e-06, | |
| "loss": 0.3273, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.088, | |
| "grad_norm": 11.550117492675781, | |
| "learning_rate": 9.157269076305222e-06, | |
| "loss": 0.3307, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.0888, | |
| "grad_norm": 9.657747268676758, | |
| "learning_rate": 9.149236947791166e-06, | |
| "loss": 0.3227, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.0896, | |
| "grad_norm": 9.803390502929688, | |
| "learning_rate": 9.14120481927711e-06, | |
| "loss": 0.3102, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.0904, | |
| "grad_norm": 12.699715614318848, | |
| "learning_rate": 9.133172690763052e-06, | |
| "loss": 0.3353, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.0912, | |
| "grad_norm": 9.973093032836914, | |
| "learning_rate": 9.125140562248996e-06, | |
| "loss": 0.3053, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.092, | |
| "grad_norm": 12.822247505187988, | |
| "learning_rate": 9.11710843373494e-06, | |
| "loss": 0.3088, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.0928, | |
| "grad_norm": 13.785049438476562, | |
| "learning_rate": 9.109076305220884e-06, | |
| "loss": 0.3004, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.0936, | |
| "grad_norm": 11.756546020507812, | |
| "learning_rate": 9.101044176706828e-06, | |
| "loss": 0.304, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.0944, | |
| "grad_norm": 12.297760009765625, | |
| "learning_rate": 9.093012048192772e-06, | |
| "loss": 0.3029, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.0952, | |
| "grad_norm": 7.838765621185303, | |
| "learning_rate": 9.084979919678716e-06, | |
| "loss": 0.3046, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 6.533823013305664, | |
| "learning_rate": 9.07694779116466e-06, | |
| "loss": 0.3065, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "eval_test1_cer": 0.10149656434386436, | |
| "eval_test1_cer_norm": 0.06880207457920139, | |
| "eval_test1_loss": 0.23942527174949646, | |
| "eval_test1_runtime": 3496.8354, | |
| "eval_test1_samples_per_second": 0.715, | |
| "eval_test1_steps_per_second": 0.179, | |
| "eval_test1_wer": 0.23831598588880143, | |
| "eval_test1_wer_norm": 0.1713375610041205, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "eval_test2_cer": 0.2408444394669056, | |
| "eval_test2_cer_norm": 0.178324488689185, | |
| "eval_test2_loss": 0.3968297839164734, | |
| "eval_test2_runtime": 3795.2628, | |
| "eval_test2_samples_per_second": 0.659, | |
| "eval_test2_steps_per_second": 0.165, | |
| "eval_test2_wer": 0.4381437399862669, | |
| "eval_test2_wer_norm": 0.36356864542745815, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.0968, | |
| "grad_norm": 6.5714826583862305, | |
| "learning_rate": 9.068915662650603e-06, | |
| "loss": 0.3177, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.0976, | |
| "grad_norm": 10.972187995910645, | |
| "learning_rate": 9.060883534136546e-06, | |
| "loss": 0.3032, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.0984, | |
| "grad_norm": 12.001550674438477, | |
| "learning_rate": 9.05285140562249e-06, | |
| "loss": 0.3009, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.0992, | |
| "grad_norm": 10.76689338684082, | |
| "learning_rate": 9.044819277108433e-06, | |
| "loss": 0.304, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 6.645873069763184, | |
| "learning_rate": 9.036787148594377e-06, | |
| "loss": 0.2801, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.1008, | |
| "grad_norm": 7.068800926208496, | |
| "learning_rate": 9.028755020080321e-06, | |
| "loss": 0.3198, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.1016, | |
| "grad_norm": 7.24527645111084, | |
| "learning_rate": 9.020722891566265e-06, | |
| "loss": 0.3236, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.1024, | |
| "grad_norm": 8.540508270263672, | |
| "learning_rate": 9.01269076305221e-06, | |
| "loss": 0.3132, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.1032, | |
| "grad_norm": 10.710851669311523, | |
| "learning_rate": 9.004658634538153e-06, | |
| "loss": 0.3034, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.104, | |
| "grad_norm": 7.53853702545166, | |
| "learning_rate": 8.996626506024097e-06, | |
| "loss": 0.2957, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.1048, | |
| "grad_norm": 8.962101936340332, | |
| "learning_rate": 8.988594377510041e-06, | |
| "loss": 0.312, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.1056, | |
| "grad_norm": 7.6881561279296875, | |
| "learning_rate": 8.980642570281125e-06, | |
| "loss": 0.3304, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.1064, | |
| "grad_norm": 3.7053704261779785, | |
| "learning_rate": 8.97261044176707e-06, | |
| "loss": 0.2913, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.1072, | |
| "grad_norm": 5.796387195587158, | |
| "learning_rate": 8.964578313253013e-06, | |
| "loss": 0.3162, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.108, | |
| "grad_norm": 14.237732887268066, | |
| "learning_rate": 8.956546184738957e-06, | |
| "loss": 0.3096, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.1088, | |
| "grad_norm": 9.336588859558105, | |
| "learning_rate": 8.948514056224901e-06, | |
| "loss": 0.3018, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.1096, | |
| "grad_norm": 16.10430335998535, | |
| "learning_rate": 8.940481927710845e-06, | |
| "loss": 0.3149, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.1104, | |
| "grad_norm": 70.292236328125, | |
| "learning_rate": 8.932449799196787e-06, | |
| "loss": 0.2721, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.1112, | |
| "grad_norm": 5.2695770263671875, | |
| "learning_rate": 8.924417670682731e-06, | |
| "loss": 0.294, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.112, | |
| "grad_norm": 7.514453411102295, | |
| "learning_rate": 8.916385542168675e-06, | |
| "loss": 0.3363, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.1128, | |
| "grad_norm": 6.603350639343262, | |
| "learning_rate": 8.908353413654619e-06, | |
| "loss": 0.3214, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.1136, | |
| "grad_norm": 12.648730278015137, | |
| "learning_rate": 8.900321285140563e-06, | |
| "loss": 0.313, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.1144, | |
| "grad_norm": 19.52114486694336, | |
| "learning_rate": 8.892289156626507e-06, | |
| "loss": 0.2778, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.1152, | |
| "grad_norm": 9.065464973449707, | |
| "learning_rate": 8.88425702811245e-06, | |
| "loss": 0.3182, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.116, | |
| "grad_norm": 6.192761421203613, | |
| "learning_rate": 8.876224899598395e-06, | |
| "loss": 0.3268, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.1168, | |
| "grad_norm": 6.336472034454346, | |
| "learning_rate": 8.868192771084338e-06, | |
| "loss": 0.3125, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.1176, | |
| "grad_norm": 7.2869343757629395, | |
| "learning_rate": 8.86016064257028e-06, | |
| "loss": 0.2938, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.1184, | |
| "grad_norm": 8.914804458618164, | |
| "learning_rate": 8.852128514056225e-06, | |
| "loss": 0.2831, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.1192, | |
| "grad_norm": 8.745502471923828, | |
| "learning_rate": 8.844096385542168e-06, | |
| "loss": 0.306, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 8.596148490905762, | |
| "learning_rate": 8.836064257028112e-06, | |
| "loss": 0.2907, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.1208, | |
| "grad_norm": 6.889795303344727, | |
| "learning_rate": 8.828032128514056e-06, | |
| "loss": 0.2806, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.1216, | |
| "grad_norm": 9.58425521850586, | |
| "learning_rate": 8.82e-06, | |
| "loss": 0.2853, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.1224, | |
| "grad_norm": 5.967700004577637, | |
| "learning_rate": 8.811967871485944e-06, | |
| "loss": 0.3109, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.1232, | |
| "grad_norm": 10.997330665588379, | |
| "learning_rate": 8.80401606425703e-06, | |
| "loss": 0.3129, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.124, | |
| "grad_norm": 8.48721981048584, | |
| "learning_rate": 8.795983935742972e-06, | |
| "loss": 0.3227, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.1248, | |
| "grad_norm": 9.547022819519043, | |
| "learning_rate": 8.787951807228916e-06, | |
| "loss": 0.282, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.1256, | |
| "grad_norm": 12.319640159606934, | |
| "learning_rate": 8.77991967871486e-06, | |
| "loss": 0.3019, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.1264, | |
| "grad_norm": 9.99170970916748, | |
| "learning_rate": 8.771887550200804e-06, | |
| "loss": 0.2872, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.1272, | |
| "grad_norm": 9.307220458984375, | |
| "learning_rate": 8.763855421686748e-06, | |
| "loss": 0.2955, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 6.70652961730957, | |
| "learning_rate": 8.755823293172692e-06, | |
| "loss": 0.2813, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "eval_test1_cer": 0.11614478303084622, | |
| "eval_test1_cer_norm": 0.08919730112613153, | |
| "eval_test1_loss": 0.2319698929786682, | |
| "eval_test1_runtime": 2494.875, | |
| "eval_test1_samples_per_second": 1.002, | |
| "eval_test1_steps_per_second": 0.251, | |
| "eval_test1_wer": 0.2696580075220852, | |
| "eval_test1_wer_norm": 0.2076331862415617, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "eval_test2_cer": 0.25404113935864414, | |
| "eval_test2_cer_norm": 0.1936541292221878, | |
| "eval_test2_loss": 0.38276800513267517, | |
| "eval_test2_runtime": 2638.5507, | |
| "eval_test2_samples_per_second": 0.947, | |
| "eval_test2_steps_per_second": 0.237, | |
| "eval_test2_wer": 0.46260585946440835, | |
| "eval_test2_wer_norm": 0.3951409580563832, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.1288, | |
| "grad_norm": 9.956653594970703, | |
| "learning_rate": 8.747791164658636e-06, | |
| "loss": 0.2785, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.1296, | |
| "grad_norm": 10.152132987976074, | |
| "learning_rate": 8.73975903614458e-06, | |
| "loss": 0.2918, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.1304, | |
| "grad_norm": 10.379541397094727, | |
| "learning_rate": 8.731726907630524e-06, | |
| "loss": 0.2879, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.1312, | |
| "grad_norm": 5.655648708343506, | |
| "learning_rate": 8.723694779116466e-06, | |
| "loss": 0.2935, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.132, | |
| "grad_norm": 9.877130508422852, | |
| "learning_rate": 8.71566265060241e-06, | |
| "loss": 0.3331, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.1328, | |
| "grad_norm": 8.812068939208984, | |
| "learning_rate": 8.707630522088354e-06, | |
| "loss": 0.2902, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.1336, | |
| "grad_norm": 8.972885131835938, | |
| "learning_rate": 8.699598393574298e-06, | |
| "loss": 0.3017, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.1344, | |
| "grad_norm": 6.562330722808838, | |
| "learning_rate": 8.691566265060242e-06, | |
| "loss": 0.2866, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.1352, | |
| "grad_norm": 10.704404830932617, | |
| "learning_rate": 8.683534136546186e-06, | |
| "loss": 0.3078, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.136, | |
| "grad_norm": 9.468811988830566, | |
| "learning_rate": 8.67550200803213e-06, | |
| "loss": 0.2906, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.1368, | |
| "grad_norm": 5.418923854827881, | |
| "learning_rate": 8.667469879518073e-06, | |
| "loss": 0.2918, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.1376, | |
| "grad_norm": 10.071795463562012, | |
| "learning_rate": 8.659437751004017e-06, | |
| "loss": 0.3149, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.1384, | |
| "grad_norm": 10.977407455444336, | |
| "learning_rate": 8.65140562248996e-06, | |
| "loss": 0.3091, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.1392, | |
| "grad_norm": 7.745774745941162, | |
| "learning_rate": 8.643373493975904e-06, | |
| "loss": 0.2843, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 5.645944595336914, | |
| "learning_rate": 8.635341365461847e-06, | |
| "loss": 0.2824, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.1408, | |
| "grad_norm": 11.466683387756348, | |
| "learning_rate": 8.627309236947791e-06, | |
| "loss": 0.3228, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.1416, | |
| "grad_norm": 7.580329418182373, | |
| "learning_rate": 8.619277108433735e-06, | |
| "loss": 0.2967, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.1424, | |
| "grad_norm": 11.873896598815918, | |
| "learning_rate": 8.61124497991968e-06, | |
| "loss": 0.3073, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.1432, | |
| "grad_norm": 6.72385835647583, | |
| "learning_rate": 8.603212851405623e-06, | |
| "loss": 0.3105, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.144, | |
| "grad_norm": 11.72990608215332, | |
| "learning_rate": 8.595180722891567e-06, | |
| "loss": 0.3141, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.1448, | |
| "grad_norm": 16.776378631591797, | |
| "learning_rate": 8.587148594377511e-06, | |
| "loss": 0.2964, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.1456, | |
| "grad_norm": 9.208036422729492, | |
| "learning_rate": 8.579116465863455e-06, | |
| "loss": 0.2818, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.1464, | |
| "grad_norm": 10.971381187438965, | |
| "learning_rate": 8.571084337349397e-06, | |
| "loss": 0.2694, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.1472, | |
| "grad_norm": 10.638858795166016, | |
| "learning_rate": 8.563052208835341e-06, | |
| "loss": 0.2927, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.148, | |
| "grad_norm": 6.251519203186035, | |
| "learning_rate": 8.555020080321285e-06, | |
| "loss": 0.2835, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.1488, | |
| "grad_norm": 10.34622859954834, | |
| "learning_rate": 8.546987951807229e-06, | |
| "loss": 0.298, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.1496, | |
| "grad_norm": 9.303108215332031, | |
| "learning_rate": 8.538955823293173e-06, | |
| "loss": 0.2856, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.1504, | |
| "grad_norm": 8.3678617477417, | |
| "learning_rate": 8.530923694779117e-06, | |
| "loss": 0.2782, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.1512, | |
| "grad_norm": 11.090498924255371, | |
| "learning_rate": 8.52289156626506e-06, | |
| "loss": 0.2863, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.152, | |
| "grad_norm": 5.836663722991943, | |
| "learning_rate": 8.514859437751005e-06, | |
| "loss": 0.2693, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.1528, | |
| "grad_norm": 6.694278717041016, | |
| "learning_rate": 8.506827309236948e-06, | |
| "loss": 0.2817, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.1536, | |
| "grad_norm": 5.725604057312012, | |
| "learning_rate": 8.498795180722892e-06, | |
| "loss": 0.265, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.1544, | |
| "grad_norm": 8.070332527160645, | |
| "learning_rate": 8.490763052208836e-06, | |
| "loss": 0.302, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.1552, | |
| "grad_norm": 7.241143226623535, | |
| "learning_rate": 8.48281124497992e-06, | |
| "loss": 0.2898, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.156, | |
| "grad_norm": 8.699810981750488, | |
| "learning_rate": 8.474779116465865e-06, | |
| "loss": 0.2868, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.1568, | |
| "grad_norm": 4.879644870758057, | |
| "learning_rate": 8.466746987951808e-06, | |
| "loss": 0.2917, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.1576, | |
| "grad_norm": 6.502651691436768, | |
| "learning_rate": 8.458714859437752e-06, | |
| "loss": 0.2888, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.1584, | |
| "grad_norm": 5.7493791580200195, | |
| "learning_rate": 8.450682730923695e-06, | |
| "loss": 0.2936, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.1592, | |
| "grad_norm": 10.4688720703125, | |
| "learning_rate": 8.442650602409639e-06, | |
| "loss": 0.2806, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 7.386699676513672, | |
| "learning_rate": 8.434618473895582e-06, | |
| "loss": 0.3055, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_test1_cer": 0.09210452610351781, | |
| "eval_test1_cer_norm": 0.07215885898143924, | |
| "eval_test1_loss": 0.22505834698677063, | |
| "eval_test1_runtime": 2739.9634, | |
| "eval_test1_samples_per_second": 0.912, | |
| "eval_test1_steps_per_second": 0.228, | |
| "eval_test1_wer": 0.23679990670281933, | |
| "eval_test1_wer_norm": 0.17437680820596743, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_test2_cer": 0.15537387538731473, | |
| "eval_test2_cer_norm": 0.12383792996591261, | |
| "eval_test2_loss": 0.3772575259208679, | |
| "eval_test2_runtime": 3715.4173, | |
| "eval_test2_samples_per_second": 0.673, | |
| "eval_test2_steps_per_second": 0.168, | |
| "eval_test2_wer": 0.31795033188372623, | |
| "eval_test2_wer_norm": 0.24954159981663993, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.1608, | |
| "grad_norm": 9.69093132019043, | |
| "learning_rate": 8.426586345381526e-06, | |
| "loss": 0.2777, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.1616, | |
| "grad_norm": 8.120789527893066, | |
| "learning_rate": 8.41855421686747e-06, | |
| "loss": 0.2859, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.1624, | |
| "grad_norm": 7.084224700927734, | |
| "learning_rate": 8.410522088353414e-06, | |
| "loss": 0.3035, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.1632, | |
| "grad_norm": 10.893829345703125, | |
| "learning_rate": 8.402489959839358e-06, | |
| "loss": 0.2792, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.164, | |
| "grad_norm": 36.892173767089844, | |
| "learning_rate": 8.394457831325302e-06, | |
| "loss": 0.2835, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.1648, | |
| "grad_norm": 8.867621421813965, | |
| "learning_rate": 8.386425702811246e-06, | |
| "loss": 0.2752, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.1656, | |
| "grad_norm": 5.385016918182373, | |
| "learning_rate": 8.378393574297188e-06, | |
| "loss": 0.2763, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.1664, | |
| "grad_norm": 6.431478023529053, | |
| "learning_rate": 8.370361445783132e-06, | |
| "loss": 0.2874, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.1672, | |
| "grad_norm": 7.890101432800293, | |
| "learning_rate": 8.362329317269076e-06, | |
| "loss": 0.2737, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.168, | |
| "grad_norm": 5.053267955780029, | |
| "learning_rate": 8.35429718875502e-06, | |
| "loss": 0.2885, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.1688, | |
| "grad_norm": 7.141018390655518, | |
| "learning_rate": 8.346265060240964e-06, | |
| "loss": 0.2735, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.1696, | |
| "grad_norm": 12.165709495544434, | |
| "learning_rate": 8.338232931726908e-06, | |
| "loss": 0.3086, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.1704, | |
| "grad_norm": 5.507659912109375, | |
| "learning_rate": 8.330200803212852e-06, | |
| "loss": 0.2773, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.1712, | |
| "grad_norm": 6.527307987213135, | |
| "learning_rate": 8.322248995983936e-06, | |
| "loss": 0.2584, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.172, | |
| "grad_norm": 8.494378089904785, | |
| "learning_rate": 8.31421686746988e-06, | |
| "loss": 0.3005, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.1728, | |
| "grad_norm": 6.213225364685059, | |
| "learning_rate": 8.306184738955824e-06, | |
| "loss": 0.2708, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.1736, | |
| "grad_norm": 10.444897651672363, | |
| "learning_rate": 8.298152610441768e-06, | |
| "loss": 0.2817, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.1744, | |
| "grad_norm": 6.7094950675964355, | |
| "learning_rate": 8.290120481927712e-06, | |
| "loss": 0.2555, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.1752, | |
| "grad_norm": 7.899540901184082, | |
| "learning_rate": 8.282088353413656e-06, | |
| "loss": 0.2902, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.176, | |
| "grad_norm": 7.049533367156982, | |
| "learning_rate": 8.2740562248996e-06, | |
| "loss": 0.2648, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.1768, | |
| "grad_norm": 7.662154674530029, | |
| "learning_rate": 8.266024096385543e-06, | |
| "loss": 0.2984, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.1776, | |
| "grad_norm": 5.362339019775391, | |
| "learning_rate": 8.257991967871487e-06, | |
| "loss": 0.2533, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.1784, | |
| "grad_norm": 11.844931602478027, | |
| "learning_rate": 8.249959839357431e-06, | |
| "loss": 0.2967, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.1792, | |
| "grad_norm": 8.356042861938477, | |
| "learning_rate": 8.241927710843374e-06, | |
| "loss": 0.2817, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 6.400118827819824, | |
| "learning_rate": 8.233895582329317e-06, | |
| "loss": 0.2975, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.1808, | |
| "grad_norm": 7.8221611976623535, | |
| "learning_rate": 8.225863453815261e-06, | |
| "loss": 0.2928, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.1816, | |
| "grad_norm": 9.997267723083496, | |
| "learning_rate": 8.217831325301205e-06, | |
| "loss": 0.2846, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.1824, | |
| "grad_norm": 3.741245746612549, | |
| "learning_rate": 8.20979919678715e-06, | |
| "loss": 0.2712, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.1832, | |
| "grad_norm": 6.534188270568848, | |
| "learning_rate": 8.201767068273093e-06, | |
| "loss": 0.2879, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.184, | |
| "grad_norm": 5.730849266052246, | |
| "learning_rate": 8.193734939759037e-06, | |
| "loss": 0.2722, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.1848, | |
| "grad_norm": 4.018589973449707, | |
| "learning_rate": 8.185702811244981e-06, | |
| "loss": 0.2979, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.1856, | |
| "grad_norm": 7.68761682510376, | |
| "learning_rate": 8.177670682730925e-06, | |
| "loss": 0.2919, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.1864, | |
| "grad_norm": 9.94321060180664, | |
| "learning_rate": 8.169638554216867e-06, | |
| "loss": 0.2731, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.1872, | |
| "grad_norm": 2.9645307064056396, | |
| "learning_rate": 8.161686746987953e-06, | |
| "loss": 0.2698, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.188, | |
| "grad_norm": 4.740218639373779, | |
| "learning_rate": 8.153654618473897e-06, | |
| "loss": 0.2731, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.1888, | |
| "grad_norm": 5.427436828613281, | |
| "learning_rate": 8.145622489959841e-06, | |
| "loss": 0.269, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.1896, | |
| "grad_norm": 13.593464851379395, | |
| "learning_rate": 8.137590361445785e-06, | |
| "loss": 0.263, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.1904, | |
| "grad_norm": 7.22516393661499, | |
| "learning_rate": 8.129558232931729e-06, | |
| "loss": 0.3125, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.1912, | |
| "grad_norm": 14.015786170959473, | |
| "learning_rate": 8.121526104417673e-06, | |
| "loss": 0.2832, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 8.892059326171875, | |
| "learning_rate": 8.113493975903615e-06, | |
| "loss": 0.3009, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "eval_test1_cer": 0.07456214056314886, | |
| "eval_test1_cer_norm": 0.05662352630441569, | |
| "eval_test1_loss": 0.22622939944267273, | |
| "eval_test1_runtime": 3568.0234, | |
| "eval_test1_samples_per_second": 0.701, | |
| "eval_test1_steps_per_second": 0.175, | |
| "eval_test1_wer": 0.19636140995364296, | |
| "eval_test1_wer_norm": 0.13466202986644846, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "eval_test2_cer": 0.17353100384514877, | |
| "eval_test2_cer_norm": 0.14121571893399443, | |
| "eval_test2_loss": 0.37227049469947815, | |
| "eval_test2_runtime": 3757.9926, | |
| "eval_test2_samples_per_second": 0.665, | |
| "eval_test2_steps_per_second": 0.166, | |
| "eval_test2_wer": 0.3350022888532845, | |
| "eval_test2_wer_norm": 0.2660726564290626, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.1928, | |
| "grad_norm": 6.917489051818848, | |
| "learning_rate": 8.105461847389559e-06, | |
| "loss": 0.2745, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.1936, | |
| "grad_norm": 9.797795295715332, | |
| "learning_rate": 8.097429718875503e-06, | |
| "loss": 0.2792, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.1944, | |
| "grad_norm": 7.5331315994262695, | |
| "learning_rate": 8.089397590361447e-06, | |
| "loss": 0.2631, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 0.1952, | |
| "grad_norm": 28.057600021362305, | |
| "learning_rate": 8.08136546184739e-06, | |
| "loss": 0.2752, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.196, | |
| "grad_norm": 6.589796543121338, | |
| "learning_rate": 8.073413654618475e-06, | |
| "loss": 0.2601, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.1968, | |
| "grad_norm": 7.585195541381836, | |
| "learning_rate": 8.065381526104419e-06, | |
| "loss": 0.2845, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.1976, | |
| "grad_norm": 12.819061279296875, | |
| "learning_rate": 8.057349397590361e-06, | |
| "loss": 0.2807, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 0.1984, | |
| "grad_norm": 6.550510883331299, | |
| "learning_rate": 8.049317269076305e-06, | |
| "loss": 0.2641, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.1992, | |
| "grad_norm": 4.5952653884887695, | |
| "learning_rate": 8.041285140562249e-06, | |
| "loss": 0.2773, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 7.8463134765625, | |
| "learning_rate": 8.033253012048193e-06, | |
| "loss": 0.299, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.2008, | |
| "grad_norm": 8.69621753692627, | |
| "learning_rate": 8.025220883534137e-06, | |
| "loss": 0.2568, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 0.2016, | |
| "grad_norm": 19.935726165771484, | |
| "learning_rate": 8.01718875502008e-06, | |
| "loss": 0.2875, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.2024, | |
| "grad_norm": 7.2960662841796875, | |
| "learning_rate": 8.009156626506025e-06, | |
| "loss": 0.2757, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 0.2032, | |
| "grad_norm": 14.16852855682373, | |
| "learning_rate": 8.001124497991969e-06, | |
| "loss": 0.2733, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 0.204, | |
| "grad_norm": 10.322851181030273, | |
| "learning_rate": 7.993092369477912e-06, | |
| "loss": 0.2732, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.2048, | |
| "grad_norm": 6.632780075073242, | |
| "learning_rate": 7.985060240963856e-06, | |
| "loss": 0.2705, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.2056, | |
| "grad_norm": 9.688406944274902, | |
| "learning_rate": 7.9770281124498e-06, | |
| "loss": 0.3111, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 0.2064, | |
| "grad_norm": 11.941987037658691, | |
| "learning_rate": 7.968995983935744e-06, | |
| "loss": 0.2688, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 0.2072, | |
| "grad_norm": 6.580658912658691, | |
| "learning_rate": 7.960963855421688e-06, | |
| "loss": 0.2842, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 0.208, | |
| "grad_norm": 9.130199432373047, | |
| "learning_rate": 7.95293172690763e-06, | |
| "loss": 0.2789, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.2088, | |
| "grad_norm": 7.189250469207764, | |
| "learning_rate": 7.944899598393574e-06, | |
| "loss": 0.2687, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 0.2096, | |
| "grad_norm": 6.851436614990234, | |
| "learning_rate": 7.936867469879518e-06, | |
| "loss": 0.3035, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 0.2104, | |
| "grad_norm": 5.916522979736328, | |
| "learning_rate": 7.928835341365462e-06, | |
| "loss": 0.2895, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 0.2112, | |
| "grad_norm": 9.012747764587402, | |
| "learning_rate": 7.920803212851406e-06, | |
| "loss": 0.258, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.212, | |
| "grad_norm": 4.732649803161621, | |
| "learning_rate": 7.91277108433735e-06, | |
| "loss": 0.2707, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.2128, | |
| "grad_norm": 7.508853912353516, | |
| "learning_rate": 7.904738955823294e-06, | |
| "loss": 0.2985, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 0.2136, | |
| "grad_norm": 7.832762241363525, | |
| "learning_rate": 7.896706827309238e-06, | |
| "loss": 0.2799, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 0.2144, | |
| "grad_norm": 11.426980972290039, | |
| "learning_rate": 7.888674698795182e-06, | |
| "loss": 0.2877, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.2152, | |
| "grad_norm": 9.61413288116455, | |
| "learning_rate": 7.880642570281126e-06, | |
| "loss": 0.2869, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 0.216, | |
| "grad_norm": 11.701996803283691, | |
| "learning_rate": 7.87261044176707e-06, | |
| "loss": 0.2607, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.2168, | |
| "grad_norm": 7.202447891235352, | |
| "learning_rate": 7.864578313253013e-06, | |
| "loss": 0.2763, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 0.2176, | |
| "grad_norm": 9.715481758117676, | |
| "learning_rate": 7.856546184738957e-06, | |
| "loss": 0.2871, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.2184, | |
| "grad_norm": 8.603879928588867, | |
| "learning_rate": 7.848514056224901e-06, | |
| "loss": 0.2876, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 0.2192, | |
| "grad_norm": 7.729073524475098, | |
| "learning_rate": 7.840481927710844e-06, | |
| "loss": 0.25, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 4.843418598175049, | |
| "learning_rate": 7.832449799196787e-06, | |
| "loss": 0.2913, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.2208, | |
| "grad_norm": 13.062045097351074, | |
| "learning_rate": 7.824417670682731e-06, | |
| "loss": 0.2676, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.2216, | |
| "grad_norm": 9.212933540344238, | |
| "learning_rate": 7.816385542168675e-06, | |
| "loss": 0.2814, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 0.2224, | |
| "grad_norm": 8.371397972106934, | |
| "learning_rate": 7.80835341365462e-06, | |
| "loss": 0.2556, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 0.2232, | |
| "grad_norm": 11.034435272216797, | |
| "learning_rate": 7.800321285140563e-06, | |
| "loss": 0.2656, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "grad_norm": 7.741024494171143, | |
| "learning_rate": 7.792289156626507e-06, | |
| "loss": 0.2481, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "eval_test1_cer": 0.08188158189558593, | |
| "eval_test1_cer_norm": 0.06053257137368838, | |
| "eval_test1_loss": 0.21281211078166962, | |
| "eval_test1_runtime": 3606.8392, | |
| "eval_test1_samples_per_second": 0.693, | |
| "eval_test1_steps_per_second": 0.173, | |
| "eval_test1_wer": 0.20149275489081314, | |
| "eval_test1_wer_norm": 0.13837341827639615, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "eval_test2_cer": 0.22203867547691045, | |
| "eval_test2_cer_norm": 0.17609234583204214, | |
| "eval_test2_loss": 0.35916781425476074, | |
| "eval_test2_runtime": 3907.2621, | |
| "eval_test2_samples_per_second": 0.64, | |
| "eval_test2_steps_per_second": 0.16, | |
| "eval_test2_wer": 0.3924525062943465, | |
| "eval_test2_wer_norm": 0.32526358010543205, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.2248, | |
| "grad_norm": 8.185860633850098, | |
| "learning_rate": 7.784257028112451e-06, | |
| "loss": 0.2649, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 0.2256, | |
| "grad_norm": 9.275001525878906, | |
| "learning_rate": 7.776224899598395e-06, | |
| "loss": 0.2835, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 0.2264, | |
| "grad_norm": 11.974363327026367, | |
| "learning_rate": 7.768192771084337e-06, | |
| "loss": 0.279, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 0.2272, | |
| "grad_norm": 8.101808547973633, | |
| "learning_rate": 7.760160642570281e-06, | |
| "loss": 0.2934, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 0.228, | |
| "grad_norm": 6.571002006530762, | |
| "learning_rate": 7.752128514056225e-06, | |
| "loss": 0.2728, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.2288, | |
| "grad_norm": 5.396939277648926, | |
| "learning_rate": 7.744096385542169e-06, | |
| "loss": 0.2606, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 0.2296, | |
| "grad_norm": 11.369711875915527, | |
| "learning_rate": 7.736144578313253e-06, | |
| "loss": 0.2621, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 0.2304, | |
| "grad_norm": 11.154069900512695, | |
| "learning_rate": 7.728112449799197e-06, | |
| "loss": 0.2668, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 0.2312, | |
| "grad_norm": 9.739912033081055, | |
| "learning_rate": 7.720080321285141e-06, | |
| "loss": 0.2448, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 0.232, | |
| "grad_norm": 7.0749125480651855, | |
| "learning_rate": 7.712048192771085e-06, | |
| "loss": 0.2571, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.2328, | |
| "grad_norm": 11.714834213256836, | |
| "learning_rate": 7.704016064257029e-06, | |
| "loss": 0.2687, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 0.2336, | |
| "grad_norm": 7.279887676239014, | |
| "learning_rate": 7.695983935742973e-06, | |
| "loss": 0.2606, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 0.2344, | |
| "grad_norm": 13.882575035095215, | |
| "learning_rate": 7.687951807228917e-06, | |
| "loss": 0.2906, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 0.2352, | |
| "grad_norm": 11.140130996704102, | |
| "learning_rate": 7.67991967871486e-06, | |
| "loss": 0.2795, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 0.236, | |
| "grad_norm": 8.242719650268555, | |
| "learning_rate": 7.671887550200805e-06, | |
| "loss": 0.2531, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.2368, | |
| "grad_norm": 3.9837567806243896, | |
| "learning_rate": 7.663855421686748e-06, | |
| "loss": 0.2684, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 0.2376, | |
| "grad_norm": 4.7640275955200195, | |
| "learning_rate": 7.655823293172692e-06, | |
| "loss": 0.2812, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 0.2384, | |
| "grad_norm": 5.323102951049805, | |
| "learning_rate": 7.647791164658636e-06, | |
| "loss": 0.2742, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 0.2392, | |
| "grad_norm": 5.6695966720581055, | |
| "learning_rate": 7.63975903614458e-06, | |
| "loss": 0.2662, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 11.021814346313477, | |
| "learning_rate": 7.631726907630522e-06, | |
| "loss": 0.2591, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.2408, | |
| "grad_norm": 7.066900253295898, | |
| "learning_rate": 7.623694779116466e-06, | |
| "loss": 0.2799, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 0.2416, | |
| "grad_norm": 7.006994247436523, | |
| "learning_rate": 7.61566265060241e-06, | |
| "loss": 0.2763, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 0.2424, | |
| "grad_norm": 6.179232597351074, | |
| "learning_rate": 7.607630522088354e-06, | |
| "loss": 0.2607, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 0.2432, | |
| "grad_norm": 11.01894760131836, | |
| "learning_rate": 7.599598393574298e-06, | |
| "loss": 0.2892, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 0.244, | |
| "grad_norm": 8.360103607177734, | |
| "learning_rate": 7.591566265060242e-06, | |
| "loss": 0.2682, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.2448, | |
| "grad_norm": 12.027982711791992, | |
| "learning_rate": 7.583614457831326e-06, | |
| "loss": 0.2795, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 0.2456, | |
| "grad_norm": 9.705676078796387, | |
| "learning_rate": 7.5755823293172694e-06, | |
| "loss": 0.2604, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 0.2464, | |
| "grad_norm": 78.23721313476562, | |
| "learning_rate": 7.567550200803213e-06, | |
| "loss": 0.2744, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 0.2472, | |
| "grad_norm": 7.072327613830566, | |
| "learning_rate": 7.559518072289157e-06, | |
| "loss": 0.2518, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 0.248, | |
| "grad_norm": 6.551907062530518, | |
| "learning_rate": 7.551485943775101e-06, | |
| "loss": 0.252, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.2488, | |
| "grad_norm": 7.861015796661377, | |
| "learning_rate": 7.543453815261045e-06, | |
| "loss": 0.2616, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 0.2496, | |
| "grad_norm": 13.113914489746094, | |
| "learning_rate": 7.535421686746989e-06, | |
| "loss": 0.2929, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 0.2504, | |
| "grad_norm": 10.613142967224121, | |
| "learning_rate": 7.527389558232933e-06, | |
| "loss": 0.2603, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 0.2512, | |
| "grad_norm": 9.165702819824219, | |
| "learning_rate": 7.519357429718877e-06, | |
| "loss": 0.2633, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 0.252, | |
| "grad_norm": 5.82572078704834, | |
| "learning_rate": 7.511325301204821e-06, | |
| "loss": 0.2553, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.2528, | |
| "grad_norm": 4.581362247467041, | |
| "learning_rate": 7.503293172690763e-06, | |
| "loss": 0.2735, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 0.2536, | |
| "grad_norm": 6.59524393081665, | |
| "learning_rate": 7.495261044176707e-06, | |
| "loss": 0.2667, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 0.2544, | |
| "grad_norm": 10.484724044799805, | |
| "learning_rate": 7.487228915662651e-06, | |
| "loss": 0.2679, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 0.2552, | |
| "grad_norm": 8.551301002502441, | |
| "learning_rate": 7.479196787148595e-06, | |
| "loss": 0.2538, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 6.701406002044678, | |
| "learning_rate": 7.471164658634539e-06, | |
| "loss": 0.2507, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "eval_test1_cer": 0.08450033609679587, | |
| "eval_test1_cer_norm": 0.058093019905395345, | |
| "eval_test1_loss": 0.20838095247745514, | |
| "eval_test1_runtime": 3620.5796, | |
| "eval_test1_samples_per_second": 0.69, | |
| "eval_test1_steps_per_second": 0.173, | |
| "eval_test1_wer": 0.20155106562873554, | |
| "eval_test1_wer_norm": 0.13854875946111808, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "eval_test2_cer": 0.17676018217792214, | |
| "eval_test2_cer_norm": 0.13646575766966224, | |
| "eval_test2_loss": 0.35614562034606934, | |
| "eval_test2_runtime": 3787.2574, | |
| "eval_test2_samples_per_second": 0.66, | |
| "eval_test2_steps_per_second": 0.165, | |
| "eval_test2_wer": 0.3315690089265278, | |
| "eval_test2_wer_norm": 0.26286385514554206, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.2568, | |
| "grad_norm": 9.485516548156738, | |
| "learning_rate": 7.463132530120483e-06, | |
| "loss": 0.2773, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 0.2576, | |
| "grad_norm": 7.148492336273193, | |
| "learning_rate": 7.4551004016064265e-06, | |
| "loss": 0.2609, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 0.2584, | |
| "grad_norm": 7.948647499084473, | |
| "learning_rate": 7.4470682730923705e-06, | |
| "loss": 0.2901, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 0.2592, | |
| "grad_norm": 11.446524620056152, | |
| "learning_rate": 7.439036144578314e-06, | |
| "loss": 0.2531, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 7.954312324523926, | |
| "learning_rate": 7.4310040160642574e-06, | |
| "loss": 0.2474, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.2608, | |
| "grad_norm": 6.527101039886475, | |
| "learning_rate": 7.422971887550201e-06, | |
| "loss": 0.2713, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 0.2616, | |
| "grad_norm": 4.255068302154541, | |
| "learning_rate": 7.414939759036145e-06, | |
| "loss": 0.2774, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 0.2624, | |
| "grad_norm": 12.29005241394043, | |
| "learning_rate": 7.406907630522089e-06, | |
| "loss": 0.2869, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 0.2632, | |
| "grad_norm": 7.255998611450195, | |
| "learning_rate": 7.398875502008033e-06, | |
| "loss": 0.2776, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 0.264, | |
| "grad_norm": 5.739976406097412, | |
| "learning_rate": 7.390843373493977e-06, | |
| "loss": 0.2658, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.2648, | |
| "grad_norm": 9.824767112731934, | |
| "learning_rate": 7.38281124497992e-06, | |
| "loss": 0.2741, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 0.2656, | |
| "grad_norm": 5.075085163116455, | |
| "learning_rate": 7.374779116465864e-06, | |
| "loss": 0.2626, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 0.2664, | |
| "grad_norm": 11.58659839630127, | |
| "learning_rate": 7.366746987951808e-06, | |
| "loss": 0.2834, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 0.2672, | |
| "grad_norm": 3.7526087760925293, | |
| "learning_rate": 7.358795180722892e-06, | |
| "loss": 0.2896, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 0.268, | |
| "grad_norm": 5.333492755889893, | |
| "learning_rate": 7.350763052208836e-06, | |
| "loss": 0.2575, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.2688, | |
| "grad_norm": 8.13162612915039, | |
| "learning_rate": 7.34273092369478e-06, | |
| "loss": 0.2513, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 0.2696, | |
| "grad_norm": 8.09466552734375, | |
| "learning_rate": 7.334698795180723e-06, | |
| "loss": 0.3005, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 0.2704, | |
| "grad_norm": 4.5985331535339355, | |
| "learning_rate": 7.326666666666667e-06, | |
| "loss": 0.3217, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 0.2712, | |
| "grad_norm": 6.758331775665283, | |
| "learning_rate": 7.318634538152611e-06, | |
| "loss": 0.3054, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 0.272, | |
| "grad_norm": 9.720072746276855, | |
| "learning_rate": 7.310602409638555e-06, | |
| "loss": 0.2554, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.2728, | |
| "grad_norm": 10.393074989318848, | |
| "learning_rate": 7.302570281124498e-06, | |
| "loss": 0.2348, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 0.2736, | |
| "grad_norm": 5.730389595031738, | |
| "learning_rate": 7.294538152610442e-06, | |
| "loss": 0.264, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 0.2744, | |
| "grad_norm": 9.347960472106934, | |
| "learning_rate": 7.286506024096386e-06, | |
| "loss": 0.3121, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 0.2752, | |
| "grad_norm": 5.9382171630859375, | |
| "learning_rate": 7.27847389558233e-06, | |
| "loss": 0.256, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 0.276, | |
| "grad_norm": 13.730400085449219, | |
| "learning_rate": 7.270441767068274e-06, | |
| "loss": 0.2474, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.2768, | |
| "grad_norm": 5.6664347648620605, | |
| "learning_rate": 7.262409638554218e-06, | |
| "loss": 0.2642, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 0.2776, | |
| "grad_norm": 4.812320232391357, | |
| "learning_rate": 7.2543775100401615e-06, | |
| "loss": 0.2697, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 0.2784, | |
| "grad_norm": 6.524253845214844, | |
| "learning_rate": 7.2463453815261055e-06, | |
| "loss": 0.2511, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 0.2792, | |
| "grad_norm": 10.69779109954834, | |
| "learning_rate": 7.238313253012049e-06, | |
| "loss": 0.2455, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 6.162754535675049, | |
| "learning_rate": 7.230281124497992e-06, | |
| "loss": 0.3028, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.2808, | |
| "grad_norm": 10.023324966430664, | |
| "learning_rate": 7.2222489959839355e-06, | |
| "loss": 0.2618, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 0.2816, | |
| "grad_norm": 7.126629829406738, | |
| "learning_rate": 7.2142168674698794e-06, | |
| "loss": 0.2482, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 0.2824, | |
| "grad_norm": 5.309641361236572, | |
| "learning_rate": 7.206184738955823e-06, | |
| "loss": 0.2641, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 0.2832, | |
| "grad_norm": 8.043412208557129, | |
| "learning_rate": 7.198152610441767e-06, | |
| "loss": 0.2779, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 0.284, | |
| "grad_norm": 6.982008934020996, | |
| "learning_rate": 7.190120481927711e-06, | |
| "loss": 0.2519, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.2848, | |
| "grad_norm": 14.100213050842285, | |
| "learning_rate": 7.182088353413655e-06, | |
| "loss": 0.2621, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 0.2856, | |
| "grad_norm": 7.013679027557373, | |
| "learning_rate": 7.174056224899599e-06, | |
| "loss": 0.2511, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 0.2864, | |
| "grad_norm": 12.395895004272461, | |
| "learning_rate": 7.166024096385543e-06, | |
| "loss": 0.2574, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 0.2872, | |
| "grad_norm": 5.958752155303955, | |
| "learning_rate": 7.157991967871486e-06, | |
| "loss": 0.2376, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "grad_norm": 7.98759126663208, | |
| "learning_rate": 7.14995983935743e-06, | |
| "loss": 0.2558, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "eval_test1_cer": 0.09376167002763462, | |
| "eval_test1_cer_norm": 0.06997382764664922, | |
| "eval_test1_loss": 0.2042306363582611, | |
| "eval_test1_runtime": 3524.6047, | |
| "eval_test1_samples_per_second": 0.709, | |
| "eval_test1_steps_per_second": 0.177, | |
| "eval_test1_wer": 0.2142628064958162, | |
| "eval_test1_wer_norm": 0.15251760717729917, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "eval_test2_cer": 0.18322787172882368, | |
| "eval_test2_cer_norm": 0.1433607065385807, | |
| "eval_test2_loss": 0.34622320532798767, | |
| "eval_test2_runtime": 2531.2967, | |
| "eval_test2_samples_per_second": 0.988, | |
| "eval_test2_steps_per_second": 0.247, | |
| "eval_test2_wer": 0.3355745021744106, | |
| "eval_test2_wer_norm": 0.2676197570479028, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.2888, | |
| "grad_norm": 7.333773612976074, | |
| "learning_rate": 7.141927710843374e-06, | |
| "loss": 0.2581, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 0.2896, | |
| "grad_norm": 6.082370758056641, | |
| "learning_rate": 7.133895582329318e-06, | |
| "loss": 0.2444, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 0.2904, | |
| "grad_norm": 7.414235591888428, | |
| "learning_rate": 7.125863453815262e-06, | |
| "loss": 0.2501, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 0.2912, | |
| "grad_norm": 7.523865699768066, | |
| "learning_rate": 7.117831325301206e-06, | |
| "loss": 0.2732, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 0.292, | |
| "grad_norm": 4.851339817047119, | |
| "learning_rate": 7.1097991967871496e-06, | |
| "loss": 0.2674, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.2928, | |
| "grad_norm": 14.28159236907959, | |
| "learning_rate": 7.1017670682730935e-06, | |
| "loss": 0.2652, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 0.2936, | |
| "grad_norm": 9.865965843200684, | |
| "learning_rate": 7.093734939759037e-06, | |
| "loss": 0.2586, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 0.2944, | |
| "grad_norm": 8.199728965759277, | |
| "learning_rate": 7.085702811244981e-06, | |
| "loss": 0.2606, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 0.2952, | |
| "grad_norm": 6.126343727111816, | |
| "learning_rate": 7.0776706827309235e-06, | |
| "loss": 0.2244, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 0.296, | |
| "grad_norm": 12.142101287841797, | |
| "learning_rate": 7.0696385542168675e-06, | |
| "loss": 0.2737, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.2968, | |
| "grad_norm": 18.582122802734375, | |
| "learning_rate": 7.061606425702811e-06, | |
| "loss": 0.2617, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 0.2976, | |
| "grad_norm": 6.676758766174316, | |
| "learning_rate": 7.053574297188755e-06, | |
| "loss": 0.2679, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 0.2984, | |
| "grad_norm": 11.17313003540039, | |
| "learning_rate": 7.045542168674699e-06, | |
| "loss": 0.2896, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 0.2992, | |
| "grad_norm": 6.7832159996032715, | |
| "learning_rate": 7.037510040160643e-06, | |
| "loss": 0.2645, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 7.81158971786499, | |
| "learning_rate": 7.029558232931728e-06, | |
| "loss": 0.2486, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.3008, | |
| "grad_norm": 5.17408561706543, | |
| "learning_rate": 7.0215261044176705e-06, | |
| "loss": 0.274, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 0.3016, | |
| "grad_norm": 4.426403045654297, | |
| "learning_rate": 7.0134939759036144e-06, | |
| "loss": 0.2574, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 0.3024, | |
| "grad_norm": 4.935957908630371, | |
| "learning_rate": 7.005461847389558e-06, | |
| "loss": 0.239, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 0.3032, | |
| "grad_norm": 5.82489013671875, | |
| "learning_rate": 6.997429718875502e-06, | |
| "loss": 0.2549, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 0.304, | |
| "grad_norm": 5.054333209991455, | |
| "learning_rate": 6.989397590361446e-06, | |
| "loss": 0.2675, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.3048, | |
| "grad_norm": 9.367691040039062, | |
| "learning_rate": 6.98136546184739e-06, | |
| "loss": 0.2699, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 0.3056, | |
| "grad_norm": 11.023392677307129, | |
| "learning_rate": 6.973333333333334e-06, | |
| "loss": 0.2483, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 0.3064, | |
| "grad_norm": 12.55837345123291, | |
| "learning_rate": 6.965301204819278e-06, | |
| "loss": 0.2557, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 0.3072, | |
| "grad_norm": 5.479390621185303, | |
| "learning_rate": 6.957269076305222e-06, | |
| "loss": 0.2625, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 0.308, | |
| "grad_norm": 11.671854972839355, | |
| "learning_rate": 6.949236947791165e-06, | |
| "loss": 0.2608, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.3088, | |
| "grad_norm": 5.811091423034668, | |
| "learning_rate": 6.941204819277109e-06, | |
| "loss": 0.2671, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 0.3096, | |
| "grad_norm": 5.157212734222412, | |
| "learning_rate": 6.933172690763053e-06, | |
| "loss": 0.2701, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 0.3104, | |
| "grad_norm": 6.742706775665283, | |
| "learning_rate": 6.925140562248997e-06, | |
| "loss": 0.264, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 0.3112, | |
| "grad_norm": 10.843968391418457, | |
| "learning_rate": 6.917108433734941e-06, | |
| "loss": 0.2476, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 0.312, | |
| "grad_norm": 5.495096206665039, | |
| "learning_rate": 6.9090763052208846e-06, | |
| "loss": 0.2674, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.3128, | |
| "grad_norm": 11.919147491455078, | |
| "learning_rate": 6.901124497991969e-06, | |
| "loss": 0.2397, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 0.3136, | |
| "grad_norm": 9.006637573242188, | |
| "learning_rate": 6.893092369477912e-06, | |
| "loss": 0.2502, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 0.3144, | |
| "grad_norm": 4.003146171569824, | |
| "learning_rate": 6.885060240963856e-06, | |
| "loss": 0.2697, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 0.3152, | |
| "grad_norm": 10.066797256469727, | |
| "learning_rate": 6.8770281124498e-06, | |
| "loss": 0.2295, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 0.316, | |
| "grad_norm": 8.48736572265625, | |
| "learning_rate": 6.868995983935744e-06, | |
| "loss": 0.2566, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.3168, | |
| "grad_norm": 6.740108966827393, | |
| "learning_rate": 6.860963855421688e-06, | |
| "loss": 0.2529, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 0.3176, | |
| "grad_norm": 7.446850299835205, | |
| "learning_rate": 6.8529317269076315e-06, | |
| "loss": 0.2571, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 0.3184, | |
| "grad_norm": 4.642506122589111, | |
| "learning_rate": 6.8448995983935755e-06, | |
| "loss": 0.3017, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 0.3192, | |
| "grad_norm": 5.234320163726807, | |
| "learning_rate": 6.836867469879519e-06, | |
| "loss": 0.2668, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 7.142499923706055, | |
| "learning_rate": 6.828835341365463e-06, | |
| "loss": 0.2566, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_test1_cer": 0.06914724774068265, | |
| "eval_test1_cer_norm": 0.04478113669652076, | |
| "eval_test1_loss": 0.19836583733558655, | |
| "eval_test1_runtime": 2432.2319, | |
| "eval_test1_samples_per_second": 1.028, | |
| "eval_test1_steps_per_second": 0.257, | |
| "eval_test1_wer": 0.17665238053587567, | |
| "eval_test1_wer_norm": 0.11657266430930185, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_test2_cer": 0.15002146563631613, | |
| "eval_test2_cer_norm": 0.10912806011775643, | |
| "eval_test2_loss": 0.33799564838409424, | |
| "eval_test2_runtime": 2481.3184, | |
| "eval_test2_samples_per_second": 1.008, | |
| "eval_test2_steps_per_second": 0.252, | |
| "eval_test2_wer": 0.28587777523460745, | |
| "eval_test2_wer_norm": 0.2201466880586752, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.3208, | |
| "grad_norm": 6.141145706176758, | |
| "learning_rate": 6.8208032128514055e-06, | |
| "loss": 0.2562, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 0.3216, | |
| "grad_norm": 5.139159679412842, | |
| "learning_rate": 6.8127710843373495e-06, | |
| "loss": 0.2703, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 0.3224, | |
| "grad_norm": 7.440242767333984, | |
| "learning_rate": 6.804738955823293e-06, | |
| "loss": 0.2821, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 0.3232, | |
| "grad_norm": 6.795882225036621, | |
| "learning_rate": 6.796706827309237e-06, | |
| "loss": 0.2452, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 0.324, | |
| "grad_norm": 7.417604446411133, | |
| "learning_rate": 6.788674698795181e-06, | |
| "loss": 0.2521, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.3248, | |
| "grad_norm": 6.449469089508057, | |
| "learning_rate": 6.780642570281125e-06, | |
| "loss": 0.2813, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 0.3256, | |
| "grad_norm": 10.74730110168457, | |
| "learning_rate": 6.772610441767069e-06, | |
| "loss": 0.2551, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 0.3264, | |
| "grad_norm": 3.9926106929779053, | |
| "learning_rate": 6.764578313253013e-06, | |
| "loss": 0.242, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 0.3272, | |
| "grad_norm": 3.6253511905670166, | |
| "learning_rate": 6.756546184738957e-06, | |
| "loss": 0.2359, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 0.328, | |
| "grad_norm": 7.051224231719971, | |
| "learning_rate": 6.7485140562249e-06, | |
| "loss": 0.2618, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.3288, | |
| "grad_norm": 9.776261329650879, | |
| "learning_rate": 6.740481927710844e-06, | |
| "loss": 0.2649, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 0.3296, | |
| "grad_norm": 5.289026260375977, | |
| "learning_rate": 6.732449799196788e-06, | |
| "loss": 0.2446, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 0.3304, | |
| "grad_norm": 9.757101058959961, | |
| "learning_rate": 6.724417670682732e-06, | |
| "loss": 0.2605, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 0.3312, | |
| "grad_norm": 7.856915473937988, | |
| "learning_rate": 6.716385542168675e-06, | |
| "loss": 0.2566, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 0.332, | |
| "grad_norm": 4.344404697418213, | |
| "learning_rate": 6.708353413654619e-06, | |
| "loss": 0.2482, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.3328, | |
| "grad_norm": 7.893800735473633, | |
| "learning_rate": 6.700321285140563e-06, | |
| "loss": 0.2555, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 0.3336, | |
| "grad_norm": 10.901208877563477, | |
| "learning_rate": 6.6922891566265066e-06, | |
| "loss": 0.2455, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 0.3344, | |
| "grad_norm": 8.60644817352295, | |
| "learning_rate": 6.6842570281124505e-06, | |
| "loss": 0.242, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 0.3352, | |
| "grad_norm": 10.857059478759766, | |
| "learning_rate": 6.6762248995983936e-06, | |
| "loss": 0.2611, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 0.336, | |
| "grad_norm": 4.682589054107666, | |
| "learning_rate": 6.6681927710843375e-06, | |
| "loss": 0.2365, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.3368, | |
| "grad_norm": 10.836357116699219, | |
| "learning_rate": 6.660160642570281e-06, | |
| "loss": 0.2477, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 0.3376, | |
| "grad_norm": 4.692202568054199, | |
| "learning_rate": 6.652128514056225e-06, | |
| "loss": 0.2494, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 0.3384, | |
| "grad_norm": 10.428234100341797, | |
| "learning_rate": 6.644096385542169e-06, | |
| "loss": 0.2605, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 0.3392, | |
| "grad_norm": 4.078035831451416, | |
| "learning_rate": 6.636064257028113e-06, | |
| "loss": 0.2358, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 7.381161689758301, | |
| "learning_rate": 6.628032128514057e-06, | |
| "loss": 0.2371, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.3408, | |
| "grad_norm": 2.6584036350250244, | |
| "learning_rate": 6.620000000000001e-06, | |
| "loss": 0.2795, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 0.3416, | |
| "grad_norm": 10.325010299682617, | |
| "learning_rate": 6.611967871485945e-06, | |
| "loss": 0.2476, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 0.3424, | |
| "grad_norm": 6.986746788024902, | |
| "learning_rate": 6.603935742971887e-06, | |
| "loss": 0.2354, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 0.3432, | |
| "grad_norm": 6.040923118591309, | |
| "learning_rate": 6.595903614457831e-06, | |
| "loss": 0.2748, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 0.344, | |
| "grad_norm": 4.043920993804932, | |
| "learning_rate": 6.587871485943775e-06, | |
| "loss": 0.2579, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.3448, | |
| "grad_norm": 7.066967964172363, | |
| "learning_rate": 6.57991967871486e-06, | |
| "loss": 0.2558, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 0.3456, | |
| "grad_norm": 7.57224702835083, | |
| "learning_rate": 6.571887550200804e-06, | |
| "loss": 0.2542, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 0.3464, | |
| "grad_norm": 11.219292640686035, | |
| "learning_rate": 6.563855421686748e-06, | |
| "loss": 0.2688, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 0.3472, | |
| "grad_norm": 9.798463821411133, | |
| "learning_rate": 6.555823293172692e-06, | |
| "loss": 0.2555, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 0.348, | |
| "grad_norm": 6.011776924133301, | |
| "learning_rate": 6.547791164658636e-06, | |
| "loss": 0.2517, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.3488, | |
| "grad_norm": 10.116564750671387, | |
| "learning_rate": 6.539759036144578e-06, | |
| "loss": 0.2585, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 0.3496, | |
| "grad_norm": 8.014334678649902, | |
| "learning_rate": 6.531807228915663e-06, | |
| "loss": 0.2386, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 0.3504, | |
| "grad_norm": 6.700802803039551, | |
| "learning_rate": 6.523775100401607e-06, | |
| "loss": 0.2412, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 0.3512, | |
| "grad_norm": 5.972689151763916, | |
| "learning_rate": 6.515742971887551e-06, | |
| "loss": 0.2461, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 0.352, | |
| "grad_norm": 6.2880167961120605, | |
| "learning_rate": 6.507710843373495e-06, | |
| "loss": 0.2535, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.352, | |
| "eval_test1_cer": 0.07431940398834865, | |
| "eval_test1_cer_norm": 0.051710807501140536, | |
| "eval_test1_loss": 0.19776684045791626, | |
| "eval_test1_runtime": 2427.0653, | |
| "eval_test1_samples_per_second": 1.03, | |
| "eval_test1_steps_per_second": 0.258, | |
| "eval_test1_wer": 0.17638998221522492, | |
| "eval_test1_wer_norm": 0.11625120547064496, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.352, | |
| "eval_test2_cer": 0.1173377011236794, | |
| "eval_test2_cer_norm": 0.09236519987604587, | |
| "eval_test2_loss": 0.3366641104221344, | |
| "eval_test2_runtime": 2441.4105, | |
| "eval_test2_samples_per_second": 1.024, | |
| "eval_test2_steps_per_second": 0.256, | |
| "eval_test2_wer": 0.2464522774090181, | |
| "eval_test2_wer_norm": 0.1820135228054091, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.3528, | |
| "grad_norm": 13.718138694763184, | |
| "learning_rate": 6.499678714859439e-06, | |
| "loss": 0.2509, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 0.3536, | |
| "grad_norm": 8.281696319580078, | |
| "learning_rate": 6.491646586345383e-06, | |
| "loss": 0.2489, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 0.3544, | |
| "grad_norm": 4.6960225105285645, | |
| "learning_rate": 6.483614457831325e-06, | |
| "loss": 0.2693, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 0.3552, | |
| "grad_norm": 6.965161323547363, | |
| "learning_rate": 6.475582329317269e-06, | |
| "loss": 0.2242, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 0.356, | |
| "grad_norm": 4.1582722663879395, | |
| "learning_rate": 6.467550200803213e-06, | |
| "loss": 0.2551, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.3568, | |
| "grad_norm": 6.540956020355225, | |
| "learning_rate": 6.459518072289157e-06, | |
| "loss": 0.2504, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 0.3576, | |
| "grad_norm": 8.521894454956055, | |
| "learning_rate": 6.451485943775101e-06, | |
| "loss": 0.2578, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 0.3584, | |
| "grad_norm": 9.609855651855469, | |
| "learning_rate": 6.443453815261045e-06, | |
| "loss": 0.2727, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 0.3592, | |
| "grad_norm": 7.131938457489014, | |
| "learning_rate": 6.4354216867469885e-06, | |
| "loss": 0.2519, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 3.9930307865142822, | |
| "learning_rate": 6.4273895582329325e-06, | |
| "loss": 0.246, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.3608, | |
| "grad_norm": 12.197285652160645, | |
| "learning_rate": 6.419357429718876e-06, | |
| "loss": 0.2592, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 0.3616, | |
| "grad_norm": 11.38996410369873, | |
| "learning_rate": 6.4113253012048195e-06, | |
| "loss": 0.2645, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 0.3624, | |
| "grad_norm": 9.129228591918945, | |
| "learning_rate": 6.403293172690763e-06, | |
| "loss": 0.254, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 0.3632, | |
| "grad_norm": 4.596703052520752, | |
| "learning_rate": 6.395261044176707e-06, | |
| "loss": 0.2555, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 0.364, | |
| "grad_norm": 4.702436923980713, | |
| "learning_rate": 6.387228915662651e-06, | |
| "loss": 0.2617, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.3648, | |
| "grad_norm": 6.283945083618164, | |
| "learning_rate": 6.379196787148595e-06, | |
| "loss": 0.2546, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 0.3656, | |
| "grad_norm": 8.981857299804688, | |
| "learning_rate": 6.371164658634539e-06, | |
| "loss": 0.2353, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 0.3664, | |
| "grad_norm": 7.928671360015869, | |
| "learning_rate": 6.363132530120483e-06, | |
| "loss": 0.2657, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 0.3672, | |
| "grad_norm": 7.3669114112854, | |
| "learning_rate": 6.355100401606427e-06, | |
| "loss": 0.2486, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 0.368, | |
| "grad_norm": 6.917601108551025, | |
| "learning_rate": 6.347068273092371e-06, | |
| "loss": 0.2524, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.3688, | |
| "grad_norm": 4.770781517028809, | |
| "learning_rate": 6.339036144578313e-06, | |
| "loss": 0.2434, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 0.3696, | |
| "grad_norm": 12.580122947692871, | |
| "learning_rate": 6.331004016064257e-06, | |
| "loss": 0.2463, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 0.3704, | |
| "grad_norm": 6.896516799926758, | |
| "learning_rate": 6.322971887550201e-06, | |
| "loss": 0.2365, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 0.3712, | |
| "grad_norm": 7.098085880279541, | |
| "learning_rate": 6.314939759036145e-06, | |
| "loss": 0.2412, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 0.372, | |
| "grad_norm": 5.216635704040527, | |
| "learning_rate": 6.306907630522089e-06, | |
| "loss": 0.2209, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.3728, | |
| "grad_norm": 7.694733619689941, | |
| "learning_rate": 6.298875502008033e-06, | |
| "loss": 0.2256, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 0.3736, | |
| "grad_norm": 37.76133346557617, | |
| "learning_rate": 6.290923694779118e-06, | |
| "loss": 0.24, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 0.3744, | |
| "grad_norm": 4.706324100494385, | |
| "learning_rate": 6.28289156626506e-06, | |
| "loss": 0.2224, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 0.3752, | |
| "grad_norm": 9.157281875610352, | |
| "learning_rate": 6.274859437751004e-06, | |
| "loss": 0.2609, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 0.376, | |
| "grad_norm": 6.885075092315674, | |
| "learning_rate": 6.266827309236948e-06, | |
| "loss": 0.2711, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.3768, | |
| "grad_norm": 5.564688205718994, | |
| "learning_rate": 6.258795180722892e-06, | |
| "loss": 0.2634, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 0.3776, | |
| "grad_norm": 2.6855292320251465, | |
| "learning_rate": 6.250763052208836e-06, | |
| "loss": 0.2509, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 0.3784, | |
| "grad_norm": 9.584918975830078, | |
| "learning_rate": 6.24273092369478e-06, | |
| "loss": 0.2586, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 0.3792, | |
| "grad_norm": 9.060691833496094, | |
| "learning_rate": 6.2346987951807236e-06, | |
| "loss": 0.2658, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 3.5146710872650146, | |
| "learning_rate": 6.2266666666666675e-06, | |
| "loss": 0.2441, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.3808, | |
| "grad_norm": 6.001715183258057, | |
| "learning_rate": 6.218634538152611e-06, | |
| "loss": 0.2403, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 0.3816, | |
| "grad_norm": 3.3255221843719482, | |
| "learning_rate": 6.2106024096385545e-06, | |
| "loss": 0.237, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 0.3824, | |
| "grad_norm": 9.07324504852295, | |
| "learning_rate": 6.202570281124498e-06, | |
| "loss": 0.2522, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 0.3832, | |
| "grad_norm": 10.312811851501465, | |
| "learning_rate": 6.194538152610442e-06, | |
| "loss": 0.2356, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "grad_norm": 8.543242454528809, | |
| "learning_rate": 6.186506024096386e-06, | |
| "loss": 0.25, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "eval_test1_cer": 0.07607457614459631, | |
| "eval_test1_cer_norm": 0.056354599370903063, | |
| "eval_test1_loss": 0.19436757266521454, | |
| "eval_test1_runtime": 2446.1615, | |
| "eval_test1_samples_per_second": 1.022, | |
| "eval_test1_steps_per_second": 0.256, | |
| "eval_test1_wer": 0.1832123385521444, | |
| "eval_test1_wer_norm": 0.12332329992109646, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "eval_test2_cer": 0.12698323813790272, | |
| "eval_test2_cer_norm": 0.10085799504183453, | |
| "eval_test2_loss": 0.3315908908843994, | |
| "eval_test2_runtime": 2495.6856, | |
| "eval_test2_samples_per_second": 1.002, | |
| "eval_test2_steps_per_second": 0.25, | |
| "eval_test2_wer": 0.25469214923323413, | |
| "eval_test2_wer_norm": 0.1900641760256704, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.3848, | |
| "grad_norm": 7.8291215896606445, | |
| "learning_rate": 6.17847389558233e-06, | |
| "loss": 0.2452, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 0.3856, | |
| "grad_norm": 4.892631530761719, | |
| "learning_rate": 6.170441767068274e-06, | |
| "loss": 0.2568, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 0.3864, | |
| "grad_norm": 9.617656707763672, | |
| "learning_rate": 6.162409638554218e-06, | |
| "loss": 0.2197, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 0.3872, | |
| "grad_norm": 9.659303665161133, | |
| "learning_rate": 6.154377510040162e-06, | |
| "loss": 0.2392, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 0.388, | |
| "grad_norm": 6.675602912902832, | |
| "learning_rate": 6.146345381526105e-06, | |
| "loss": 0.2209, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 0.3888, | |
| "grad_norm": 5.2548298835754395, | |
| "learning_rate": 6.138313253012048e-06, | |
| "loss": 0.2642, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 0.3896, | |
| "grad_norm": 5.428624629974365, | |
| "learning_rate": 6.130281124497992e-06, | |
| "loss": 0.2139, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 0.3904, | |
| "grad_norm": 10.75515079498291, | |
| "learning_rate": 6.122248995983936e-06, | |
| "loss": 0.2238, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 0.3912, | |
| "grad_norm": 9.192462921142578, | |
| "learning_rate": 6.11421686746988e-06, | |
| "loss": 0.2422, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 0.392, | |
| "grad_norm": 5.55458927154541, | |
| "learning_rate": 6.106184738955824e-06, | |
| "loss": 0.2559, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.3928, | |
| "grad_norm": 10.406610488891602, | |
| "learning_rate": 6.098152610441768e-06, | |
| "loss": 0.2474, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 0.3936, | |
| "grad_norm": 6.274507999420166, | |
| "learning_rate": 6.0901204819277116e-06, | |
| "loss": 0.2361, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 0.3944, | |
| "grad_norm": 10.830498695373535, | |
| "learning_rate": 6.0820883534136555e-06, | |
| "loss": 0.2409, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 0.3952, | |
| "grad_norm": 11.912911415100098, | |
| "learning_rate": 6.074056224899599e-06, | |
| "loss": 0.2467, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 0.396, | |
| "grad_norm": 8.484502792358398, | |
| "learning_rate": 6.0660240963855425e-06, | |
| "loss": 0.2411, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 0.3968, | |
| "grad_norm": 9.443007469177246, | |
| "learning_rate": 6.057991967871486e-06, | |
| "loss": 0.2449, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 0.3976, | |
| "grad_norm": 10.93896770477295, | |
| "learning_rate": 6.04995983935743e-06, | |
| "loss": 0.239, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 0.3984, | |
| "grad_norm": 7.4916768074035645, | |
| "learning_rate": 6.041927710843373e-06, | |
| "loss": 0.2349, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 0.3992, | |
| "grad_norm": 7.075978755950928, | |
| "learning_rate": 6.033895582329317e-06, | |
| "loss": 0.2418, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 5.759922981262207, | |
| "learning_rate": 6.025863453815261e-06, | |
| "loss": 0.2536, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.4008, | |
| "grad_norm": 10.531188011169434, | |
| "learning_rate": 6.017831325301205e-06, | |
| "loss": 0.2162, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 0.4016, | |
| "grad_norm": 7.113009452819824, | |
| "learning_rate": 6.009799196787149e-06, | |
| "loss": 0.2373, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 0.4024, | |
| "grad_norm": 8.859579086303711, | |
| "learning_rate": 6.001767068273093e-06, | |
| "loss": 0.2327, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 0.4032, | |
| "grad_norm": 5.7207512855529785, | |
| "learning_rate": 5.993734939759036e-06, | |
| "loss": 0.2605, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 0.404, | |
| "grad_norm": 5.358285427093506, | |
| "learning_rate": 5.98570281124498e-06, | |
| "loss": 0.2442, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 0.4048, | |
| "grad_norm": 9.501456260681152, | |
| "learning_rate": 5.977670682730924e-06, | |
| "loss": 0.2333, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 0.4056, | |
| "grad_norm": 9.682687759399414, | |
| "learning_rate": 5.969638554216868e-06, | |
| "loss": 0.2404, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 0.4064, | |
| "grad_norm": 6.1207380294799805, | |
| "learning_rate": 5.961686746987952e-06, | |
| "loss": 0.2401, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 0.4072, | |
| "grad_norm": 3.812893867492676, | |
| "learning_rate": 5.953654618473896e-06, | |
| "loss": 0.2411, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 0.408, | |
| "grad_norm": 8.557660102844238, | |
| "learning_rate": 5.94562248995984e-06, | |
| "loss": 0.2486, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.4088, | |
| "grad_norm": 8.239738464355469, | |
| "learning_rate": 5.937590361445784e-06, | |
| "loss": 0.219, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 0.4096, | |
| "grad_norm": 8.825906753540039, | |
| "learning_rate": 5.929558232931727e-06, | |
| "loss": 0.2418, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 0.4104, | |
| "grad_norm": 9.038442611694336, | |
| "learning_rate": 5.921526104417671e-06, | |
| "loss": 0.2495, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 0.4112, | |
| "grad_norm": 10.421882629394531, | |
| "learning_rate": 5.913493975903615e-06, | |
| "loss": 0.2294, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 0.412, | |
| "grad_norm": 5.004858493804932, | |
| "learning_rate": 5.905461847389559e-06, | |
| "loss": 0.2553, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 0.4128, | |
| "grad_norm": 5.850473880767822, | |
| "learning_rate": 5.897429718875503e-06, | |
| "loss": 0.2443, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 0.4136, | |
| "grad_norm": 5.483931064605713, | |
| "learning_rate": 5.8893975903614466e-06, | |
| "loss": 0.2849, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 0.4144, | |
| "grad_norm": 9.20142650604248, | |
| "learning_rate": 5.881445783132531e-06, | |
| "loss": 0.2617, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 0.4152, | |
| "grad_norm": 5.675454139709473, | |
| "learning_rate": 5.873413654618474e-06, | |
| "loss": 0.2666, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 0.416, | |
| "grad_norm": 7.959702968597412, | |
| "learning_rate": 5.865381526104418e-06, | |
| "loss": 0.2286, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.416, | |
| "eval_test1_cer": 0.06314885353648518, | |
| "eval_test1_cer_norm": 0.04443057122962038, | |
| "eval_test1_loss": 0.1907189041376114, | |
| "eval_test1_runtime": 2490.0845, | |
| "eval_test1_samples_per_second": 1.004, | |
| "eval_test1_steps_per_second": 0.251, | |
| "eval_test1_wer": 0.18128808420070555, | |
| "eval_test1_wer_norm": 0.12083929980420234, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.416, | |
| "eval_test2_cer": 0.1296011124799343, | |
| "eval_test2_cer_norm": 0.09485396653238302, | |
| "eval_test2_loss": 0.3308376371860504, | |
| "eval_test2_runtime": 3341.4359, | |
| "eval_test2_samples_per_second": 0.748, | |
| "eval_test2_steps_per_second": 0.187, | |
| "eval_test2_wer": 0.2722304875257496, | |
| "eval_test2_wer_norm": 0.2069390327756131, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.4168, | |
| "grad_norm": 13.770788192749023, | |
| "learning_rate": 5.857349397590362e-06, | |
| "loss": 0.2467, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 0.4176, | |
| "grad_norm": 5.342738151550293, | |
| "learning_rate": 5.849317269076306e-06, | |
| "loss": 0.233, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 0.4184, | |
| "grad_norm": 6.316587448120117, | |
| "learning_rate": 5.84128514056225e-06, | |
| "loss": 0.2513, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 0.4192, | |
| "grad_norm": 6.252966403961182, | |
| "learning_rate": 5.8332530120481936e-06, | |
| "loss": 0.2495, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 8.304986953735352, | |
| "learning_rate": 5.8252208835341375e-06, | |
| "loss": 0.2573, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.4208, | |
| "grad_norm": 8.028290748596191, | |
| "learning_rate": 5.817188755020081e-06, | |
| "loss": 0.2393, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 0.4216, | |
| "grad_norm": 10.630465507507324, | |
| "learning_rate": 5.809156626506025e-06, | |
| "loss": 0.2453, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 0.4224, | |
| "grad_norm": 9.712656021118164, | |
| "learning_rate": 5.8011244979919675e-06, | |
| "loss": 0.2553, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 0.4232, | |
| "grad_norm": 9.605881690979004, | |
| "learning_rate": 5.7930923694779115e-06, | |
| "loss": 0.2393, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 0.424, | |
| "grad_norm": 10.953594207763672, | |
| "learning_rate": 5.785060240963855e-06, | |
| "loss": 0.2484, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.4248, | |
| "grad_norm": 4.043101787567139, | |
| "learning_rate": 5.777028112449799e-06, | |
| "loss": 0.2146, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 0.4256, | |
| "grad_norm": 8.587929725646973, | |
| "learning_rate": 5.768995983935743e-06, | |
| "loss": 0.2555, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 0.4264, | |
| "grad_norm": 6.47918701171875, | |
| "learning_rate": 5.760963855421687e-06, | |
| "loss": 0.2277, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 0.4272, | |
| "grad_norm": 6.032763957977295, | |
| "learning_rate": 5.752931726907631e-06, | |
| "loss": 0.24, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 0.428, | |
| "grad_norm": 4.708799362182617, | |
| "learning_rate": 5.744899598393575e-06, | |
| "loss": 0.2262, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 0.4288, | |
| "grad_norm": 7.626307964324951, | |
| "learning_rate": 5.736867469879519e-06, | |
| "loss": 0.2344, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 0.4296, | |
| "grad_norm": 5.346840858459473, | |
| "learning_rate": 5.728835341365462e-06, | |
| "loss": 0.2071, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 0.4304, | |
| "grad_norm": 6.508141040802002, | |
| "learning_rate": 5.720803212851406e-06, | |
| "loss": 0.2275, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 0.4312, | |
| "grad_norm": 7.617433071136475, | |
| "learning_rate": 5.71277108433735e-06, | |
| "loss": 0.2359, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 0.432, | |
| "grad_norm": 5.544378757476807, | |
| "learning_rate": 5.704738955823294e-06, | |
| "loss": 0.2491, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.4328, | |
| "grad_norm": 5.247679233551025, | |
| "learning_rate": 5.696706827309238e-06, | |
| "loss": 0.2353, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 0.4336, | |
| "grad_norm": 9.137139320373535, | |
| "learning_rate": 5.688674698795182e-06, | |
| "loss": 0.2397, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 0.4344, | |
| "grad_norm": 6.74301815032959, | |
| "learning_rate": 5.6806425702811255e-06, | |
| "loss": 0.2602, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 0.4352, | |
| "grad_norm": 6.35469913482666, | |
| "learning_rate": 5.672610441767069e-06, | |
| "loss": 0.2515, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 0.436, | |
| "grad_norm": 10.135574340820312, | |
| "learning_rate": 5.664578313253013e-06, | |
| "loss": 0.2219, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 0.4368, | |
| "grad_norm": 5.893723011016846, | |
| "learning_rate": 5.6565461847389556e-06, | |
| "loss": 0.2569, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 0.4376, | |
| "grad_norm": 14.050215721130371, | |
| "learning_rate": 5.6485140562248995e-06, | |
| "loss": 0.2906, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 0.4384, | |
| "grad_norm": 6.908878326416016, | |
| "learning_rate": 5.640481927710843e-06, | |
| "loss": 0.2444, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 0.4392, | |
| "grad_norm": 7.57582950592041, | |
| "learning_rate": 5.632449799196787e-06, | |
| "loss": 0.2467, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 11.190139770507812, | |
| "learning_rate": 5.624417670682731e-06, | |
| "loss": 0.2593, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.4408, | |
| "grad_norm": 12.42068099975586, | |
| "learning_rate": 5.616385542168675e-06, | |
| "loss": 0.2453, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 0.4416, | |
| "grad_norm": 10.345560073852539, | |
| "learning_rate": 5.608353413654619e-06, | |
| "loss": 0.2411, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 0.4424, | |
| "grad_norm": 7.91195011138916, | |
| "learning_rate": 5.600321285140563e-06, | |
| "loss": 0.2414, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 0.4432, | |
| "grad_norm": 6.820638179779053, | |
| "learning_rate": 5.592289156626507e-06, | |
| "loss": 0.2363, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 0.444, | |
| "grad_norm": 4.268233299255371, | |
| "learning_rate": 5.58425702811245e-06, | |
| "loss": 0.2325, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 0.4448, | |
| "grad_norm": 8.063896179199219, | |
| "learning_rate": 5.576224899598394e-06, | |
| "loss": 0.2607, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 0.4456, | |
| "grad_norm": 3.5322296619415283, | |
| "learning_rate": 5.568192771084338e-06, | |
| "loss": 0.2346, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 0.4464, | |
| "grad_norm": 9.244215965270996, | |
| "learning_rate": 5.560160642570282e-06, | |
| "loss": 0.2155, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 0.4472, | |
| "grad_norm": 10.747200965881348, | |
| "learning_rate": 5.552208835341366e-06, | |
| "loss": 0.2351, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 0.448, | |
| "grad_norm": 14.072000503540039, | |
| "learning_rate": 5.54417670682731e-06, | |
| "loss": 0.2413, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.448, | |
| "eval_test1_cer": 0.0663977892299649, | |
| "eval_test1_cer_norm": 0.0478834009652556, | |
| "eval_test1_loss": 0.1882741004228592, | |
| "eval_test1_runtime": 3418.3969, | |
| "eval_test1_samples_per_second": 0.731, | |
| "eval_test1_steps_per_second": 0.183, | |
| "eval_test1_wer": 0.16968424735415027, | |
| "eval_test1_wer_norm": 0.1093544522049154, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.448, | |
| "eval_test2_cer": 0.13036174263635344, | |
| "eval_test2_cer_norm": 0.09973950263402541, | |
| "eval_test2_loss": 0.3249567449092865, | |
| "eval_test2_runtime": 3504.8089, | |
| "eval_test2_samples_per_second": 0.713, | |
| "eval_test2_steps_per_second": 0.178, | |
| "eval_test2_wer": 0.25629434653238725, | |
| "eval_test2_wer_norm": 0.18966307586523035, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.4488, | |
| "grad_norm": 8.576178550720215, | |
| "learning_rate": 5.536144578313254e-06, | |
| "loss": 0.2404, | |
| "step": 56100 | |
| }, | |
| { | |
| "epoch": 0.4496, | |
| "grad_norm": 6.107906818389893, | |
| "learning_rate": 5.528112449799197e-06, | |
| "loss": 0.2217, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 0.4504, | |
| "grad_norm": 5.073305606842041, | |
| "learning_rate": 5.520080321285141e-06, | |
| "loss": 0.229, | |
| "step": 56300 | |
| }, | |
| { | |
| "epoch": 0.4512, | |
| "grad_norm": 5.073732376098633, | |
| "learning_rate": 5.512048192771085e-06, | |
| "loss": 0.2404, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 0.452, | |
| "grad_norm": 6.29484748840332, | |
| "learning_rate": 5.504016064257029e-06, | |
| "loss": 0.2283, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 0.4528, | |
| "grad_norm": 13.772027015686035, | |
| "learning_rate": 5.495983935742973e-06, | |
| "loss": 0.2433, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 0.4536, | |
| "grad_norm": 6.613615989685059, | |
| "learning_rate": 5.487951807228917e-06, | |
| "loss": 0.2634, | |
| "step": 56700 | |
| }, | |
| { | |
| "epoch": 0.4544, | |
| "grad_norm": 5.8526082038879395, | |
| "learning_rate": 5.480000000000001e-06, | |
| "loss": 0.2277, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 0.4552, | |
| "grad_norm": 6.082682132720947, | |
| "learning_rate": 5.471967871485945e-06, | |
| "loss": 0.2372, | |
| "step": 56900 | |
| }, | |
| { | |
| "epoch": 0.456, | |
| "grad_norm": 4.50082540512085, | |
| "learning_rate": 5.463935742971888e-06, | |
| "loss": 0.2578, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.4568, | |
| "grad_norm": 5.06675910949707, | |
| "learning_rate": 5.455903614457832e-06, | |
| "loss": 0.2033, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 0.4576, | |
| "grad_norm": 3.3439102172851562, | |
| "learning_rate": 5.447871485943776e-06, | |
| "loss": 0.2395, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 0.4584, | |
| "grad_norm": 12.235926628112793, | |
| "learning_rate": 5.43983935742972e-06, | |
| "loss": 0.2335, | |
| "step": 57300 | |
| }, | |
| { | |
| "epoch": 0.4592, | |
| "grad_norm": 6.169180393218994, | |
| "learning_rate": 5.431807228915663e-06, | |
| "loss": 0.2432, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 3.7379937171936035, | |
| "learning_rate": 5.423775100401607e-06, | |
| "loss": 0.2377, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 0.4608, | |
| "grad_norm": 6.2007060050964355, | |
| "learning_rate": 5.4157429718875506e-06, | |
| "loss": 0.2475, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 0.4616, | |
| "grad_norm": 11.921826362609863, | |
| "learning_rate": 5.4077108433734945e-06, | |
| "loss": 0.2359, | |
| "step": 57700 | |
| }, | |
| { | |
| "epoch": 0.4624, | |
| "grad_norm": 4.8144612312316895, | |
| "learning_rate": 5.399678714859438e-06, | |
| "loss": 0.2222, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 0.4632, | |
| "grad_norm": 7.309458255767822, | |
| "learning_rate": 5.3916465863453815e-06, | |
| "loss": 0.2315, | |
| "step": 57900 | |
| }, | |
| { | |
| "epoch": 0.464, | |
| "grad_norm": 6.312740325927734, | |
| "learning_rate": 5.383614457831325e-06, | |
| "loss": 0.2557, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.4648, | |
| "grad_norm": 7.315298557281494, | |
| "learning_rate": 5.375582329317269e-06, | |
| "loss": 0.2673, | |
| "step": 58100 | |
| }, | |
| { | |
| "epoch": 0.4656, | |
| "grad_norm": 9.65597152709961, | |
| "learning_rate": 5.367550200803213e-06, | |
| "loss": 0.2417, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 0.4664, | |
| "grad_norm": 5.868946552276611, | |
| "learning_rate": 5.359518072289157e-06, | |
| "loss": 0.2289, | |
| "step": 58300 | |
| }, | |
| { | |
| "epoch": 0.4672, | |
| "grad_norm": 7.116505146026611, | |
| "learning_rate": 5.351485943775101e-06, | |
| "loss": 0.2238, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 0.468, | |
| "grad_norm": 4.75971794128418, | |
| "learning_rate": 5.343453815261045e-06, | |
| "loss": 0.2466, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 0.4688, | |
| "grad_norm": 5.3648762702941895, | |
| "learning_rate": 5.335421686746989e-06, | |
| "loss": 0.2539, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 0.4696, | |
| "grad_norm": 2.8886609077453613, | |
| "learning_rate": 5.327389558232933e-06, | |
| "loss": 0.2386, | |
| "step": 58700 | |
| }, | |
| { | |
| "epoch": 0.4704, | |
| "grad_norm": 19.657976150512695, | |
| "learning_rate": 5.319357429718875e-06, | |
| "loss": 0.2257, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 0.4712, | |
| "grad_norm": 6.010655879974365, | |
| "learning_rate": 5.311325301204819e-06, | |
| "loss": 0.2402, | |
| "step": 58900 | |
| }, | |
| { | |
| "epoch": 0.472, | |
| "grad_norm": 11.614595413208008, | |
| "learning_rate": 5.303293172690763e-06, | |
| "loss": 0.2422, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.4728, | |
| "grad_norm": 9.773826599121094, | |
| "learning_rate": 5.295261044176707e-06, | |
| "loss": 0.2224, | |
| "step": 59100 | |
| }, | |
| { | |
| "epoch": 0.4736, | |
| "grad_norm": 3.5956804752349854, | |
| "learning_rate": 5.287228915662651e-06, | |
| "loss": 0.2287, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 0.4744, | |
| "grad_norm": 5.884477615356445, | |
| "learning_rate": 5.279196787148595e-06, | |
| "loss": 0.2284, | |
| "step": 59300 | |
| }, | |
| { | |
| "epoch": 0.4752, | |
| "grad_norm": 6.785783290863037, | |
| "learning_rate": 5.271164658634539e-06, | |
| "loss": 0.2486, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 0.476, | |
| "grad_norm": 5.1648054122924805, | |
| "learning_rate": 5.2631325301204825e-06, | |
| "loss": 0.2352, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 0.4768, | |
| "grad_norm": 7.0386643409729, | |
| "learning_rate": 5.255180722891566e-06, | |
| "loss": 0.2489, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 0.4776, | |
| "grad_norm": 8.528487205505371, | |
| "learning_rate": 5.24714859437751e-06, | |
| "loss": 0.2434, | |
| "step": 59700 | |
| }, | |
| { | |
| "epoch": 0.4784, | |
| "grad_norm": 10.057316780090332, | |
| "learning_rate": 5.239116465863454e-06, | |
| "loss": 0.232, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 0.4792, | |
| "grad_norm": 5.452738285064697, | |
| "learning_rate": 5.231084337349398e-06, | |
| "loss": 0.2138, | |
| "step": 59900 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 12.630691528320312, | |
| "learning_rate": 5.223052208835342e-06, | |
| "loss": 0.2406, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_test1_cer": 0.047455000373440887, | |
| "eval_test1_cer_norm": 0.03161812375441208, | |
| "eval_test1_loss": 0.18266192078590393, | |
| "eval_test1_runtime": 3355.1591, | |
| "eval_test1_samples_per_second": 0.745, | |
| "eval_test1_steps_per_second": 0.186, | |
| "eval_test1_wer": 0.1444065424647949, | |
| "eval_test1_wer_norm": 0.08430988632046524, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_test2_cer": 0.11545245828200246, | |
| "eval_test2_cer_norm": 0.0919003718624109, | |
| "eval_test2_loss": 0.32027342915534973, | |
| "eval_test2_runtime": 3441.9039, | |
| "eval_test2_samples_per_second": 0.726, | |
| "eval_test2_steps_per_second": 0.182, | |
| "eval_test2_wer": 0.24284733348592355, | |
| "eval_test2_wer_norm": 0.17731492092596837, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.4808, | |
| "grad_norm": 1.9925609827041626, | |
| "learning_rate": 5.2150200803212856e-06, | |
| "loss": 0.2353, | |
| "step": 60100 | |
| }, | |
| { | |
| "epoch": 0.4816, | |
| "grad_norm": 5.7619476318359375, | |
| "learning_rate": 5.2069879518072295e-06, | |
| "loss": 0.2124, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 0.4824, | |
| "grad_norm": 6.040704727172852, | |
| "learning_rate": 5.198955823293173e-06, | |
| "loss": 0.2304, | |
| "step": 60300 | |
| }, | |
| { | |
| "epoch": 0.4832, | |
| "grad_norm": 14.028429985046387, | |
| "learning_rate": 5.1909236947791165e-06, | |
| "loss": 0.2549, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 0.484, | |
| "grad_norm": 14.17192268371582, | |
| "learning_rate": 5.18289156626506e-06, | |
| "loss": 0.2215, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 0.4848, | |
| "grad_norm": 8.68266773223877, | |
| "learning_rate": 5.174859437751004e-06, | |
| "loss": 0.2144, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 0.4856, | |
| "grad_norm": 7.074737548828125, | |
| "learning_rate": 5.166827309236948e-06, | |
| "loss": 0.2262, | |
| "step": 60700 | |
| }, | |
| { | |
| "epoch": 0.4864, | |
| "grad_norm": 7.095781326293945, | |
| "learning_rate": 5.158795180722892e-06, | |
| "loss": 0.2509, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 0.4872, | |
| "grad_norm": 14.873151779174805, | |
| "learning_rate": 5.150763052208836e-06, | |
| "loss": 0.2417, | |
| "step": 60900 | |
| }, | |
| { | |
| "epoch": 0.488, | |
| "grad_norm": 7.113455295562744, | |
| "learning_rate": 5.14273092369478e-06, | |
| "loss": 0.2232, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.4888, | |
| "grad_norm": 4.443511009216309, | |
| "learning_rate": 5.134698795180724e-06, | |
| "loss": 0.2333, | |
| "step": 61100 | |
| }, | |
| { | |
| "epoch": 0.4896, | |
| "grad_norm": 11.819351196289062, | |
| "learning_rate": 5.126666666666668e-06, | |
| "loss": 0.2252, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 0.4904, | |
| "grad_norm": 9.132615089416504, | |
| "learning_rate": 5.11863453815261e-06, | |
| "loss": 0.2469, | |
| "step": 61300 | |
| }, | |
| { | |
| "epoch": 0.4912, | |
| "grad_norm": 5.749680042266846, | |
| "learning_rate": 5.110602409638554e-06, | |
| "loss": 0.2322, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 0.492, | |
| "grad_norm": 5.154579162597656, | |
| "learning_rate": 5.102570281124498e-06, | |
| "loss": 0.2293, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 0.4928, | |
| "grad_norm": 4.018552780151367, | |
| "learning_rate": 5.094538152610442e-06, | |
| "loss": 0.2087, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 0.4936, | |
| "grad_norm": 12.570889472961426, | |
| "learning_rate": 5.086506024096386e-06, | |
| "loss": 0.2374, | |
| "step": 61700 | |
| }, | |
| { | |
| "epoch": 0.4944, | |
| "grad_norm": 4.278166770935059, | |
| "learning_rate": 5.07847389558233e-06, | |
| "loss": 0.2298, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 1.000696, | |
| "grad_norm": 4.555546760559082, | |
| "learning_rate": 5.070441767068274e-06, | |
| "loss": 0.2077, | |
| "step": 61900 | |
| }, | |
| { | |
| "epoch": 1.001496, | |
| "grad_norm": 15.487360954284668, | |
| "learning_rate": 5.0624096385542175e-06, | |
| "loss": 0.2044, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 1.002296, | |
| "grad_norm": 4.297917366027832, | |
| "learning_rate": 5.0543775100401614e-06, | |
| "loss": 0.1977, | |
| "step": 62100 | |
| }, | |
| { | |
| "epoch": 1.003096, | |
| "grad_norm": 8.327834129333496, | |
| "learning_rate": 5.0463453815261045e-06, | |
| "loss": 0.1749, | |
| "step": 62200 | |
| }, | |
| { | |
| "epoch": 1.003896, | |
| "grad_norm": 8.811240196228027, | |
| "learning_rate": 5.038313253012048e-06, | |
| "loss": 0.1868, | |
| "step": 62300 | |
| }, | |
| { | |
| "epoch": 1.004696, | |
| "grad_norm": 5.721954822540283, | |
| "learning_rate": 5.030281124497992e-06, | |
| "loss": 0.195, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 1.005496, | |
| "grad_norm": 7.047698020935059, | |
| "learning_rate": 5.022248995983936e-06, | |
| "loss": 0.1605, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 1.006296, | |
| "grad_norm": 6.009364128112793, | |
| "learning_rate": 5.0142971887550206e-06, | |
| "loss": 0.176, | |
| "step": 62600 | |
| }, | |
| { | |
| "epoch": 1.007096, | |
| "grad_norm": 5.495716571807861, | |
| "learning_rate": 5.0062650602409645e-06, | |
| "loss": 0.1991, | |
| "step": 62700 | |
| }, | |
| { | |
| "epoch": 1.007896, | |
| "grad_norm": 2.433659076690674, | |
| "learning_rate": 4.9982329317269076e-06, | |
| "loss": 0.1832, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 1.008696, | |
| "grad_norm": 6.076290607452393, | |
| "learning_rate": 4.9902008032128515e-06, | |
| "loss": 0.1807, | |
| "step": 62900 | |
| }, | |
| { | |
| "epoch": 1.009496, | |
| "grad_norm": 5.725612640380859, | |
| "learning_rate": 4.982168674698795e-06, | |
| "loss": 0.1627, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 1.010296, | |
| "grad_norm": 4.852512836456299, | |
| "learning_rate": 4.974136546184739e-06, | |
| "loss": 0.1692, | |
| "step": 63100 | |
| }, | |
| { | |
| "epoch": 1.011096, | |
| "grad_norm": 6.572232246398926, | |
| "learning_rate": 4.966104417670683e-06, | |
| "loss": 0.1616, | |
| "step": 63200 | |
| }, | |
| { | |
| "epoch": 1.011896, | |
| "grad_norm": 9.408519744873047, | |
| "learning_rate": 4.958072289156627e-06, | |
| "loss": 0.1566, | |
| "step": 63300 | |
| }, | |
| { | |
| "epoch": 1.012696, | |
| "grad_norm": 4.744565963745117, | |
| "learning_rate": 4.950040160642571e-06, | |
| "loss": 0.1519, | |
| "step": 63400 | |
| }, | |
| { | |
| "epoch": 1.013496, | |
| "grad_norm": 13.262666702270508, | |
| "learning_rate": 4.942008032128515e-06, | |
| "loss": 0.1804, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 1.014296, | |
| "grad_norm": 3.7456908226013184, | |
| "learning_rate": 4.933975903614458e-06, | |
| "loss": 0.1703, | |
| "step": 63600 | |
| }, | |
| { | |
| "epoch": 1.015096, | |
| "grad_norm": 6.4614152908325195, | |
| "learning_rate": 4.925943775100402e-06, | |
| "loss": 0.1796, | |
| "step": 63700 | |
| }, | |
| { | |
| "epoch": 1.015896, | |
| "grad_norm": 4.488025665283203, | |
| "learning_rate": 4.917911646586346e-06, | |
| "loss": 0.1678, | |
| "step": 63800 | |
| }, | |
| { | |
| "epoch": 1.016696, | |
| "grad_norm": 5.777276992797852, | |
| "learning_rate": 4.90987951807229e-06, | |
| "loss": 0.1839, | |
| "step": 63900 | |
| }, | |
| { | |
| "epoch": 1.017496, | |
| "grad_norm": 4.160243034362793, | |
| "learning_rate": 4.901847389558234e-06, | |
| "loss": 0.1627, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 1.017496, | |
| "eval_test1_cer": 0.07008084995145268, | |
| "eval_test1_cer_norm": 0.050106850433404564, | |
| "eval_test1_loss": 0.18447566032409668, | |
| "eval_test1_runtime": 3416.4242, | |
| "eval_test1_samples_per_second": 0.732, | |
| "eval_test1_steps_per_second": 0.183, | |
| "eval_test1_wer": 0.17367853290183388, | |
| "eval_test1_wer_norm": 0.11408866419240772, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 1.017496, | |
| "eval_test2_cer": 0.14694627991189757, | |
| "eval_test2_cer_norm": 0.11825515184381778, | |
| "eval_test2_loss": 0.3210515081882477, | |
| "eval_test2_runtime": 3532.7138, | |
| "eval_test2_samples_per_second": 0.708, | |
| "eval_test2_steps_per_second": 0.177, | |
| "eval_test2_wer": 0.2882810711833371, | |
| "eval_test2_wer_norm": 0.22352738941095576, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 1.018296, | |
| "grad_norm": 5.366184234619141, | |
| "learning_rate": 4.893815261044177e-06, | |
| "loss": 0.1663, | |
| "step": 64100 | |
| }, | |
| { | |
| "epoch": 1.019096, | |
| "grad_norm": 3.319096326828003, | |
| "learning_rate": 4.885783132530121e-06, | |
| "loss": 0.1597, | |
| "step": 64200 | |
| }, | |
| { | |
| "epoch": 1.019896, | |
| "grad_norm": 2.7266533374786377, | |
| "learning_rate": 4.877751004016065e-06, | |
| "loss": 0.1636, | |
| "step": 64300 | |
| }, | |
| { | |
| "epoch": 1.020696, | |
| "grad_norm": 5.649815082550049, | |
| "learning_rate": 4.869718875502009e-06, | |
| "loss": 0.1719, | |
| "step": 64400 | |
| }, | |
| { | |
| "epoch": 1.021496, | |
| "grad_norm": 9.476170539855957, | |
| "learning_rate": 4.861686746987952e-06, | |
| "loss": 0.1612, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 1.022296, | |
| "grad_norm": 6.443653106689453, | |
| "learning_rate": 4.853654618473896e-06, | |
| "loss": 0.1589, | |
| "step": 64600 | |
| }, | |
| { | |
| "epoch": 1.023096, | |
| "grad_norm": 4.278164863586426, | |
| "learning_rate": 4.8456224899598395e-06, | |
| "loss": 0.1575, | |
| "step": 64700 | |
| }, | |
| { | |
| "epoch": 1.023896, | |
| "grad_norm": 7.513009071350098, | |
| "learning_rate": 4.837590361445783e-06, | |
| "loss": 0.1678, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 1.024696, | |
| "grad_norm": 7.260671615600586, | |
| "learning_rate": 4.829558232931727e-06, | |
| "loss": 0.1605, | |
| "step": 64900 | |
| }, | |
| { | |
| "epoch": 1.025496, | |
| "grad_norm": 5.455900192260742, | |
| "learning_rate": 4.821526104417671e-06, | |
| "loss": 0.1555, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 1.026296, | |
| "grad_norm": 11.086517333984375, | |
| "learning_rate": 4.813493975903615e-06, | |
| "loss": 0.165, | |
| "step": 65100 | |
| }, | |
| { | |
| "epoch": 1.027096, | |
| "grad_norm": 4.682340145111084, | |
| "learning_rate": 4.805461847389558e-06, | |
| "loss": 0.1791, | |
| "step": 65200 | |
| }, | |
| { | |
| "epoch": 1.027896, | |
| "grad_norm": 8.538371086120605, | |
| "learning_rate": 4.797429718875502e-06, | |
| "loss": 0.1656, | |
| "step": 65300 | |
| }, | |
| { | |
| "epoch": 1.028696, | |
| "grad_norm": 6.110259532928467, | |
| "learning_rate": 4.789397590361446e-06, | |
| "loss": 0.1564, | |
| "step": 65400 | |
| }, | |
| { | |
| "epoch": 1.029496, | |
| "grad_norm": 6.267505168914795, | |
| "learning_rate": 4.78136546184739e-06, | |
| "loss": 0.1624, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 1.030296, | |
| "grad_norm": 7.841426372528076, | |
| "learning_rate": 4.773333333333334e-06, | |
| "loss": 0.1497, | |
| "step": 65600 | |
| }, | |
| { | |
| "epoch": 1.031096, | |
| "grad_norm": 3.335782051086426, | |
| "learning_rate": 4.765301204819278e-06, | |
| "loss": 0.1684, | |
| "step": 65700 | |
| }, | |
| { | |
| "epoch": 1.031896, | |
| "grad_norm": 6.293437480926514, | |
| "learning_rate": 4.757269076305222e-06, | |
| "loss": 0.1687, | |
| "step": 65800 | |
| }, | |
| { | |
| "epoch": 1.032696, | |
| "grad_norm": 9.889056205749512, | |
| "learning_rate": 4.749236947791165e-06, | |
| "loss": 0.1605, | |
| "step": 65900 | |
| }, | |
| { | |
| "epoch": 1.033496, | |
| "grad_norm": 7.943349838256836, | |
| "learning_rate": 4.741204819277109e-06, | |
| "loss": 0.1781, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 1.034296, | |
| "grad_norm": 9.100912094116211, | |
| "learning_rate": 4.733172690763053e-06, | |
| "loss": 0.1789, | |
| "step": 66100 | |
| }, | |
| { | |
| "epoch": 1.035096, | |
| "grad_norm": 5.020230293273926, | |
| "learning_rate": 4.725140562248997e-06, | |
| "loss": 0.1679, | |
| "step": 66200 | |
| }, | |
| { | |
| "epoch": 1.035896, | |
| "grad_norm": 3.7011711597442627, | |
| "learning_rate": 4.71710843373494e-06, | |
| "loss": 0.1617, | |
| "step": 66300 | |
| }, | |
| { | |
| "epoch": 1.036696, | |
| "grad_norm": 13.44151782989502, | |
| "learning_rate": 4.709076305220884e-06, | |
| "loss": 0.1717, | |
| "step": 66400 | |
| }, | |
| { | |
| "epoch": 1.037496, | |
| "grad_norm": 5.3160319328308105, | |
| "learning_rate": 4.7010441767068275e-06, | |
| "loss": 0.18, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 1.0382959999999999, | |
| "grad_norm": 9.991889953613281, | |
| "learning_rate": 4.693092369477912e-06, | |
| "loss": 0.1698, | |
| "step": 66600 | |
| }, | |
| { | |
| "epoch": 1.039096, | |
| "grad_norm": 5.639313697814941, | |
| "learning_rate": 4.685060240963856e-06, | |
| "loss": 0.1595, | |
| "step": 66700 | |
| }, | |
| { | |
| "epoch": 1.039896, | |
| "grad_norm": 4.916014194488525, | |
| "learning_rate": 4.6770281124498e-06, | |
| "loss": 0.1703, | |
| "step": 66800 | |
| }, | |
| { | |
| "epoch": 1.040696, | |
| "grad_norm": 5.738508701324463, | |
| "learning_rate": 4.668995983935744e-06, | |
| "loss": 0.1864, | |
| "step": 66900 | |
| }, | |
| { | |
| "epoch": 1.041496, | |
| "grad_norm": 8.484038352966309, | |
| "learning_rate": 4.6609638554216875e-06, | |
| "loss": 0.1587, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 1.042296, | |
| "grad_norm": 6.696768283843994, | |
| "learning_rate": 4.652931726907631e-06, | |
| "loss": 0.1601, | |
| "step": 67100 | |
| }, | |
| { | |
| "epoch": 1.043096, | |
| "grad_norm": 3.6157071590423584, | |
| "learning_rate": 4.6448995983935745e-06, | |
| "loss": 0.1657, | |
| "step": 67200 | |
| }, | |
| { | |
| "epoch": 1.043896, | |
| "grad_norm": 6.275000095367432, | |
| "learning_rate": 4.6368674698795184e-06, | |
| "loss": 0.1559, | |
| "step": 67300 | |
| }, | |
| { | |
| "epoch": 1.044696, | |
| "grad_norm": 11.618382453918457, | |
| "learning_rate": 4.628835341365462e-06, | |
| "loss": 0.1636, | |
| "step": 67400 | |
| }, | |
| { | |
| "epoch": 1.045496, | |
| "grad_norm": 3.8046157360076904, | |
| "learning_rate": 4.620803212851405e-06, | |
| "loss": 0.165, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 1.046296, | |
| "grad_norm": 7.296670913696289, | |
| "learning_rate": 4.612771084337349e-06, | |
| "loss": 0.1692, | |
| "step": 67600 | |
| }, | |
| { | |
| "epoch": 1.047096, | |
| "grad_norm": 5.143111228942871, | |
| "learning_rate": 4.604738955823293e-06, | |
| "loss": 0.1567, | |
| "step": 67700 | |
| }, | |
| { | |
| "epoch": 1.047896, | |
| "grad_norm": 6.818264961242676, | |
| "learning_rate": 4.596706827309237e-06, | |
| "loss": 0.1569, | |
| "step": 67800 | |
| }, | |
| { | |
| "epoch": 1.048696, | |
| "grad_norm": 3.8615856170654297, | |
| "learning_rate": 4.588674698795181e-06, | |
| "loss": 0.1646, | |
| "step": 67900 | |
| }, | |
| { | |
| "epoch": 1.049496, | |
| "grad_norm": 4.350166320800781, | |
| "learning_rate": 4.580642570281125e-06, | |
| "loss": 0.1562, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 1.049496, | |
| "eval_test1_cer": 0.04518167899021585, | |
| "eval_test1_cer_norm": 0.02964919441976613, | |
| "eval_test1_loss": 0.18461571633815765, | |
| "eval_test1_runtime": 3355.9439, | |
| "eval_test1_samples_per_second": 0.745, | |
| "eval_test1_steps_per_second": 0.186, | |
| "eval_test1_wer": 0.1442899209889501, | |
| "eval_test1_wer_norm": 0.08173821561121014, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 1.049496, | |
| "eval_test2_cer": 0.08435995072236532, | |
| "eval_test2_cer_norm": 0.06550201425472575, | |
| "eval_test2_loss": 0.3253004252910614, | |
| "eval_test2_runtime": 3365.7371, | |
| "eval_test2_samples_per_second": 0.743, | |
| "eval_test2_steps_per_second": 0.186, | |
| "eval_test2_wer": 0.19904440375371937, | |
| "eval_test2_wer_norm": 0.13302200320880128, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 1.050296, | |
| "grad_norm": 7.118165016174316, | |
| "learning_rate": 4.572610441767069e-06, | |
| "loss": 0.1473, | |
| "step": 68100 | |
| }, | |
| { | |
| "epoch": 1.051096, | |
| "grad_norm": 7.220821857452393, | |
| "learning_rate": 4.564578313253013e-06, | |
| "loss": 0.1617, | |
| "step": 68200 | |
| }, | |
| { | |
| "epoch": 1.051896, | |
| "grad_norm": 7.148895740509033, | |
| "learning_rate": 4.556546184738957e-06, | |
| "loss": 0.1629, | |
| "step": 68300 | |
| }, | |
| { | |
| "epoch": 1.052696, | |
| "grad_norm": 3.8178248405456543, | |
| "learning_rate": 4.5485140562249e-06, | |
| "loss": 0.181, | |
| "step": 68400 | |
| }, | |
| { | |
| "epoch": 1.053496, | |
| "grad_norm": 6.640100479125977, | |
| "learning_rate": 4.540481927710844e-06, | |
| "loss": 0.1545, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 1.054296, | |
| "grad_norm": 6.102357387542725, | |
| "learning_rate": 4.532530120481928e-06, | |
| "loss": 0.1716, | |
| "step": 68600 | |
| }, | |
| { | |
| "epoch": 1.055096, | |
| "grad_norm": 5.749415397644043, | |
| "learning_rate": 4.524497991967872e-06, | |
| "loss": 0.1419, | |
| "step": 68700 | |
| }, | |
| { | |
| "epoch": 1.055896, | |
| "grad_norm": 5.71680212020874, | |
| "learning_rate": 4.516465863453816e-06, | |
| "loss": 0.1487, | |
| "step": 68800 | |
| }, | |
| { | |
| "epoch": 1.056696, | |
| "grad_norm": 5.209336757659912, | |
| "learning_rate": 4.50843373493976e-06, | |
| "loss": 0.1722, | |
| "step": 68900 | |
| }, | |
| { | |
| "epoch": 1.057496, | |
| "grad_norm": 6.00562047958374, | |
| "learning_rate": 4.500401606425703e-06, | |
| "loss": 0.1659, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 1.058296, | |
| "grad_norm": 4.806443214416504, | |
| "learning_rate": 4.492369477911647e-06, | |
| "loss": 0.1499, | |
| "step": 69100 | |
| }, | |
| { | |
| "epoch": 1.059096, | |
| "grad_norm": 7.675013065338135, | |
| "learning_rate": 4.484337349397591e-06, | |
| "loss": 0.1624, | |
| "step": 69200 | |
| }, | |
| { | |
| "epoch": 1.059896, | |
| "grad_norm": 5.567787170410156, | |
| "learning_rate": 4.476305220883535e-06, | |
| "loss": 0.1572, | |
| "step": 69300 | |
| }, | |
| { | |
| "epoch": 1.060696, | |
| "grad_norm": 20.12483787536621, | |
| "learning_rate": 4.468273092369479e-06, | |
| "loss": 0.169, | |
| "step": 69400 | |
| }, | |
| { | |
| "epoch": 1.061496, | |
| "grad_norm": 11.066375732421875, | |
| "learning_rate": 4.4602409638554225e-06, | |
| "loss": 0.1618, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 1.062296, | |
| "grad_norm": 6.3260579109191895, | |
| "learning_rate": 4.452208835341366e-06, | |
| "loss": 0.1465, | |
| "step": 69600 | |
| }, | |
| { | |
| "epoch": 1.063096, | |
| "grad_norm": 5.906326770782471, | |
| "learning_rate": 4.4441767068273095e-06, | |
| "loss": 0.1532, | |
| "step": 69700 | |
| }, | |
| { | |
| "epoch": 1.063896, | |
| "grad_norm": 4.177192211151123, | |
| "learning_rate": 4.4361445783132534e-06, | |
| "loss": 0.1624, | |
| "step": 69800 | |
| }, | |
| { | |
| "epoch": 1.064696, | |
| "grad_norm": 4.535795211791992, | |
| "learning_rate": 4.428112449799197e-06, | |
| "loss": 0.1637, | |
| "step": 69900 | |
| }, | |
| { | |
| "epoch": 1.065496, | |
| "grad_norm": 8.556774139404297, | |
| "learning_rate": 4.420080321285141e-06, | |
| "loss": 0.1512, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 1.066296, | |
| "grad_norm": 5.175086975097656, | |
| "learning_rate": 4.412048192771084e-06, | |
| "loss": 0.1487, | |
| "step": 70100 | |
| }, | |
| { | |
| "epoch": 1.067096, | |
| "grad_norm": 5.598880767822266, | |
| "learning_rate": 4.404016064257028e-06, | |
| "loss": 0.1604, | |
| "step": 70200 | |
| }, | |
| { | |
| "epoch": 1.067896, | |
| "grad_norm": 4.312199115753174, | |
| "learning_rate": 4.395983935742972e-06, | |
| "loss": 0.1752, | |
| "step": 70300 | |
| }, | |
| { | |
| "epoch": 1.068696, | |
| "grad_norm": 4.8355255126953125, | |
| "learning_rate": 4.387951807228916e-06, | |
| "loss": 0.1375, | |
| "step": 70400 | |
| }, | |
| { | |
| "epoch": 1.069496, | |
| "grad_norm": 0.6319503784179688, | |
| "learning_rate": 4.379919678714859e-06, | |
| "loss": 0.1518, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 1.070296, | |
| "grad_norm": 7.824057579040527, | |
| "learning_rate": 4.371887550200803e-06, | |
| "loss": 0.171, | |
| "step": 70600 | |
| }, | |
| { | |
| "epoch": 1.071096, | |
| "grad_norm": 6.540349960327148, | |
| "learning_rate": 4.363855421686747e-06, | |
| "loss": 0.146, | |
| "step": 70700 | |
| }, | |
| { | |
| "epoch": 1.071896, | |
| "grad_norm": 11.684809684753418, | |
| "learning_rate": 4.355823293172691e-06, | |
| "loss": 0.1504, | |
| "step": 70800 | |
| }, | |
| { | |
| "epoch": 1.072696, | |
| "grad_norm": 9.533754348754883, | |
| "learning_rate": 4.347791164658635e-06, | |
| "loss": 0.1548, | |
| "step": 70900 | |
| }, | |
| { | |
| "epoch": 1.073496, | |
| "grad_norm": 6.979349136352539, | |
| "learning_rate": 4.339759036144579e-06, | |
| "loss": 0.1608, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 1.074296, | |
| "grad_norm": 6.119946002960205, | |
| "learning_rate": 4.331807228915663e-06, | |
| "loss": 0.1642, | |
| "step": 71100 | |
| }, | |
| { | |
| "epoch": 1.075096, | |
| "grad_norm": 5.234997272491455, | |
| "learning_rate": 4.323775100401606e-06, | |
| "loss": 0.159, | |
| "step": 71200 | |
| }, | |
| { | |
| "epoch": 1.075896, | |
| "grad_norm": 11.375402450561523, | |
| "learning_rate": 4.31574297188755e-06, | |
| "loss": 0.1598, | |
| "step": 71300 | |
| }, | |
| { | |
| "epoch": 1.076696, | |
| "grad_norm": 6.117208003997803, | |
| "learning_rate": 4.307710843373494e-06, | |
| "loss": 0.1581, | |
| "step": 71400 | |
| }, | |
| { | |
| "epoch": 1.077496, | |
| "grad_norm": 5.760785102844238, | |
| "learning_rate": 4.299678714859438e-06, | |
| "loss": 0.1581, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 1.078296, | |
| "grad_norm": 12.158953666687012, | |
| "learning_rate": 4.291646586345382e-06, | |
| "loss": 0.1645, | |
| "step": 71600 | |
| }, | |
| { | |
| "epoch": 1.079096, | |
| "grad_norm": 3.9681832790374756, | |
| "learning_rate": 4.283614457831326e-06, | |
| "loss": 0.1664, | |
| "step": 71700 | |
| }, | |
| { | |
| "epoch": 1.079896, | |
| "grad_norm": 4.026464939117432, | |
| "learning_rate": 4.27558232931727e-06, | |
| "loss": 0.1478, | |
| "step": 71800 | |
| }, | |
| { | |
| "epoch": 1.080696, | |
| "grad_norm": 9.842916488647461, | |
| "learning_rate": 4.267550200803214e-06, | |
| "loss": 0.1516, | |
| "step": 71900 | |
| }, | |
| { | |
| "epoch": 1.081496, | |
| "grad_norm": 6.5888776779174805, | |
| "learning_rate": 4.2595180722891575e-06, | |
| "loss": 0.1743, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 1.081496, | |
| "eval_test1_cer": 0.056235529165733066, | |
| "eval_test1_cer_norm": 0.03523423055682282, | |
| "eval_test1_loss": 0.18351316452026367, | |
| "eval_test1_runtime": 3389.512, | |
| "eval_test1_samples_per_second": 0.738, | |
| "eval_test1_steps_per_second": 0.184, | |
| "eval_test1_wer": 0.15283244409458002, | |
| "eval_test1_wer_norm": 0.09255092200239633, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 1.081496, | |
| "eval_test2_cer": 0.10279239929816703, | |
| "eval_test2_cer_norm": 0.0767353579175705, | |
| "eval_test2_loss": 0.32321926951408386, | |
| "eval_test2_runtime": 3400.9588, | |
| "eval_test2_samples_per_second": 0.735, | |
| "eval_test2_steps_per_second": 0.184, | |
| "eval_test2_wer": 0.21498054474708173, | |
| "eval_test2_wer_norm": 0.14843570937428374, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 1.082296, | |
| "grad_norm": 11.681933403015137, | |
| "learning_rate": 4.251485943775101e-06, | |
| "loss": 0.1564, | |
| "step": 72100 | |
| }, | |
| { | |
| "epoch": 1.083096, | |
| "grad_norm": 5.162091255187988, | |
| "learning_rate": 4.2434538152610445e-06, | |
| "loss": 0.1469, | |
| "step": 72200 | |
| }, | |
| { | |
| "epoch": 1.083896, | |
| "grad_norm": 7.147501468658447, | |
| "learning_rate": 4.2354216867469884e-06, | |
| "loss": 0.1663, | |
| "step": 72300 | |
| }, | |
| { | |
| "epoch": 1.084696, | |
| "grad_norm": 7.40344762802124, | |
| "learning_rate": 4.227389558232932e-06, | |
| "loss": 0.1284, | |
| "step": 72400 | |
| }, | |
| { | |
| "epoch": 1.085496, | |
| "grad_norm": 3.5983896255493164, | |
| "learning_rate": 4.219357429718876e-06, | |
| "loss": 0.1558, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 1.086296, | |
| "grad_norm": 4.397582054138184, | |
| "learning_rate": 4.211325301204819e-06, | |
| "loss": 0.1599, | |
| "step": 72600 | |
| }, | |
| { | |
| "epoch": 1.087096, | |
| "grad_norm": 2.2897703647613525, | |
| "learning_rate": 4.203293172690763e-06, | |
| "loss": 0.1611, | |
| "step": 72700 | |
| }, | |
| { | |
| "epoch": 1.087896, | |
| "grad_norm": 4.20655632019043, | |
| "learning_rate": 4.195261044176707e-06, | |
| "loss": 0.1597, | |
| "step": 72800 | |
| }, | |
| { | |
| "epoch": 1.088696, | |
| "grad_norm": 4.474186420440674, | |
| "learning_rate": 4.187228915662651e-06, | |
| "loss": 0.145, | |
| "step": 72900 | |
| }, | |
| { | |
| "epoch": 1.089496, | |
| "grad_norm": 4.5594401359558105, | |
| "learning_rate": 4.179196787148594e-06, | |
| "loss": 0.1545, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 1.090296, | |
| "grad_norm": 3.502408266067505, | |
| "learning_rate": 4.171164658634538e-06, | |
| "loss": 0.1648, | |
| "step": 73100 | |
| }, | |
| { | |
| "epoch": 1.091096, | |
| "grad_norm": 2.6792593002319336, | |
| "learning_rate": 4.163132530120482e-06, | |
| "loss": 0.1491, | |
| "step": 73200 | |
| }, | |
| { | |
| "epoch": 1.091896, | |
| "grad_norm": 6.028800010681152, | |
| "learning_rate": 4.155100401606426e-06, | |
| "loss": 0.1544, | |
| "step": 73300 | |
| }, | |
| { | |
| "epoch": 1.0926960000000001, | |
| "grad_norm": 5.256026268005371, | |
| "learning_rate": 4.14706827309237e-06, | |
| "loss": 0.1526, | |
| "step": 73400 | |
| }, | |
| { | |
| "epoch": 1.093496, | |
| "grad_norm": 5.323427677154541, | |
| "learning_rate": 4.139036144578314e-06, | |
| "loss": 0.1412, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 1.094296, | |
| "grad_norm": 3.4750475883483887, | |
| "learning_rate": 4.131004016064257e-06, | |
| "loss": 0.1372, | |
| "step": 73600 | |
| }, | |
| { | |
| "epoch": 1.095096, | |
| "grad_norm": 2.8323254585266113, | |
| "learning_rate": 4.122971887550201e-06, | |
| "loss": 0.156, | |
| "step": 73700 | |
| }, | |
| { | |
| "epoch": 1.095896, | |
| "grad_norm": 11.332598686218262, | |
| "learning_rate": 4.114939759036145e-06, | |
| "loss": 0.1443, | |
| "step": 73800 | |
| }, | |
| { | |
| "epoch": 1.0966960000000001, | |
| "grad_norm": 3.0766139030456543, | |
| "learning_rate": 4.106907630522089e-06, | |
| "loss": 0.1513, | |
| "step": 73900 | |
| }, | |
| { | |
| "epoch": 1.097496, | |
| "grad_norm": 8.13890266418457, | |
| "learning_rate": 4.0988755020080325e-06, | |
| "loss": 0.1441, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 1.098296, | |
| "grad_norm": 6.174729824066162, | |
| "learning_rate": 4.0908433734939765e-06, | |
| "loss": 0.1465, | |
| "step": 74100 | |
| }, | |
| { | |
| "epoch": 1.099096, | |
| "grad_norm": 7.782393455505371, | |
| "learning_rate": 4.08281124497992e-06, | |
| "loss": 0.1525, | |
| "step": 74200 | |
| }, | |
| { | |
| "epoch": 1.099896, | |
| "grad_norm": 4.48836088180542, | |
| "learning_rate": 4.074779116465864e-06, | |
| "loss": 0.1419, | |
| "step": 74300 | |
| }, | |
| { | |
| "epoch": 1.100696, | |
| "grad_norm": 9.527304649353027, | |
| "learning_rate": 4.066746987951807e-06, | |
| "loss": 0.1471, | |
| "step": 74400 | |
| }, | |
| { | |
| "epoch": 1.101496, | |
| "grad_norm": 5.195642471313477, | |
| "learning_rate": 4.058714859437751e-06, | |
| "loss": 0.1454, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 1.102296, | |
| "grad_norm": 4.529500961303711, | |
| "learning_rate": 4.050682730923695e-06, | |
| "loss": 0.1582, | |
| "step": 74600 | |
| }, | |
| { | |
| "epoch": 1.103096, | |
| "grad_norm": 4.841000080108643, | |
| "learning_rate": 4.042650602409639e-06, | |
| "loss": 0.1521, | |
| "step": 74700 | |
| }, | |
| { | |
| "epoch": 1.103896, | |
| "grad_norm": 4.943587303161621, | |
| "learning_rate": 4.034618473895583e-06, | |
| "loss": 0.148, | |
| "step": 74800 | |
| }, | |
| { | |
| "epoch": 1.104696, | |
| "grad_norm": 7.331528186798096, | |
| "learning_rate": 4.026586345381526e-06, | |
| "loss": 0.1469, | |
| "step": 74900 | |
| }, | |
| { | |
| "epoch": 1.105496, | |
| "grad_norm": 8.253586769104004, | |
| "learning_rate": 4.01855421686747e-06, | |
| "loss": 0.1736, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 1.106296, | |
| "grad_norm": 6.348284721374512, | |
| "learning_rate": 4.010602409638554e-06, | |
| "loss": 0.1398, | |
| "step": 75100 | |
| }, | |
| { | |
| "epoch": 1.107096, | |
| "grad_norm": 2.285827398300171, | |
| "learning_rate": 4.002570281124498e-06, | |
| "loss": 0.146, | |
| "step": 75200 | |
| }, | |
| { | |
| "epoch": 1.107896, | |
| "grad_norm": 7.164559364318848, | |
| "learning_rate": 3.994538152610442e-06, | |
| "loss": 0.1564, | |
| "step": 75300 | |
| }, | |
| { | |
| "epoch": 1.108696, | |
| "grad_norm": 2.311056137084961, | |
| "learning_rate": 3.986506024096386e-06, | |
| "loss": 0.1509, | |
| "step": 75400 | |
| }, | |
| { | |
| "epoch": 1.109496, | |
| "grad_norm": 3.1986663341522217, | |
| "learning_rate": 3.97847389558233e-06, | |
| "loss": 0.1521, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 1.110296, | |
| "grad_norm": 10.752972602844238, | |
| "learning_rate": 3.970441767068273e-06, | |
| "loss": 0.1321, | |
| "step": 75600 | |
| }, | |
| { | |
| "epoch": 1.111096, | |
| "grad_norm": 5.435462474822998, | |
| "learning_rate": 3.962409638554217e-06, | |
| "loss": 0.1413, | |
| "step": 75700 | |
| }, | |
| { | |
| "epoch": 1.111896, | |
| "grad_norm": 3.7016944885253906, | |
| "learning_rate": 3.954457831325301e-06, | |
| "loss": 0.1661, | |
| "step": 75800 | |
| }, | |
| { | |
| "epoch": 1.112696, | |
| "grad_norm": 9.687053680419922, | |
| "learning_rate": 3.946425702811245e-06, | |
| "loss": 0.1516, | |
| "step": 75900 | |
| }, | |
| { | |
| "epoch": 1.113496, | |
| "grad_norm": 2.699572801589966, | |
| "learning_rate": 3.938393574297189e-06, | |
| "loss": 0.153, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 1.113496, | |
| "eval_test1_cer": 0.07078571962058407, | |
| "eval_test1_cer_norm": 0.04619780536413187, | |
| "eval_test1_loss": 0.18133017420768738, | |
| "eval_test1_runtime": 3430.9909, | |
| "eval_test1_samples_per_second": 0.729, | |
| "eval_test1_steps_per_second": 0.182, | |
| "eval_test1_wer": 0.16875127554739205, | |
| "eval_test1_wer_norm": 0.11002659341301615, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 1.113496, | |
| "eval_test2_cer": 0.13182233919438532, | |
| "eval_test2_cer_norm": 0.09897931515339324, | |
| "eval_test2_loss": 0.32256972789764404, | |
| "eval_test2_runtime": 3529.0223, | |
| "eval_test2_samples_per_second": 0.708, | |
| "eval_test2_steps_per_second": 0.177, | |
| "eval_test2_wer": 0.26012817578393227, | |
| "eval_test2_wer_norm": 0.19547902819161128, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 1.114296, | |
| "grad_norm": 4.699991703033447, | |
| "learning_rate": 3.930361445783133e-06, | |
| "loss": 0.1306, | |
| "step": 76100 | |
| }, | |
| { | |
| "epoch": 1.115096, | |
| "grad_norm": 5.847876071929932, | |
| "learning_rate": 3.922329317269077e-06, | |
| "loss": 0.159, | |
| "step": 76200 | |
| }, | |
| { | |
| "epoch": 1.115896, | |
| "grad_norm": 14.771410942077637, | |
| "learning_rate": 3.91429718875502e-06, | |
| "loss": 0.1593, | |
| "step": 76300 | |
| }, | |
| { | |
| "epoch": 1.116696, | |
| "grad_norm": 3.190328598022461, | |
| "learning_rate": 3.906265060240964e-06, | |
| "loss": 0.1513, | |
| "step": 76400 | |
| }, | |
| { | |
| "epoch": 1.117496, | |
| "grad_norm": 3.2370731830596924, | |
| "learning_rate": 3.898232931726908e-06, | |
| "loss": 0.1524, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 1.118296, | |
| "grad_norm": 3.932393789291382, | |
| "learning_rate": 3.890200803212852e-06, | |
| "loss": 0.129, | |
| "step": 76600 | |
| }, | |
| { | |
| "epoch": 1.119096, | |
| "grad_norm": 3.998230457305908, | |
| "learning_rate": 3.882168674698796e-06, | |
| "loss": 0.1534, | |
| "step": 76700 | |
| }, | |
| { | |
| "epoch": 1.119896, | |
| "grad_norm": 5.405791282653809, | |
| "learning_rate": 3.874136546184739e-06, | |
| "loss": 0.1455, | |
| "step": 76800 | |
| }, | |
| { | |
| "epoch": 1.120696, | |
| "grad_norm": 5.66414737701416, | |
| "learning_rate": 3.866104417670683e-06, | |
| "loss": 0.1321, | |
| "step": 76900 | |
| }, | |
| { | |
| "epoch": 1.121496, | |
| "grad_norm": 2.428980827331543, | |
| "learning_rate": 3.858072289156627e-06, | |
| "loss": 0.1251, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 1.122296, | |
| "grad_norm": 8.633134841918945, | |
| "learning_rate": 3.850040160642571e-06, | |
| "loss": 0.1581, | |
| "step": 77100 | |
| }, | |
| { | |
| "epoch": 1.123096, | |
| "grad_norm": 6.84693717956543, | |
| "learning_rate": 3.8420080321285145e-06, | |
| "loss": 0.1516, | |
| "step": 77200 | |
| }, | |
| { | |
| "epoch": 1.123896, | |
| "grad_norm": 1.603908896446228, | |
| "learning_rate": 3.8339759036144584e-06, | |
| "loss": 0.1614, | |
| "step": 77300 | |
| }, | |
| { | |
| "epoch": 1.124696, | |
| "grad_norm": 12.588580131530762, | |
| "learning_rate": 3.8259437751004015e-06, | |
| "loss": 0.1423, | |
| "step": 77400 | |
| }, | |
| { | |
| "epoch": 1.125496, | |
| "grad_norm": 8.18181324005127, | |
| "learning_rate": 3.8179116465863454e-06, | |
| "loss": 0.1409, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 1.126296, | |
| "grad_norm": 4.0206217765808105, | |
| "learning_rate": 3.8098795180722898e-06, | |
| "loss": 0.1407, | |
| "step": 77600 | |
| }, | |
| { | |
| "epoch": 1.1270959999999999, | |
| "grad_norm": 3.093519449234009, | |
| "learning_rate": 3.8018473895582333e-06, | |
| "loss": 0.128, | |
| "step": 77700 | |
| }, | |
| { | |
| "epoch": 1.127896, | |
| "grad_norm": 5.9990925788879395, | |
| "learning_rate": 3.7938152610441768e-06, | |
| "loss": 0.138, | |
| "step": 77800 | |
| }, | |
| { | |
| "epoch": 1.128696, | |
| "grad_norm": 5.512988567352295, | |
| "learning_rate": 3.7857831325301207e-06, | |
| "loss": 0.1366, | |
| "step": 77900 | |
| }, | |
| { | |
| "epoch": 1.129496, | |
| "grad_norm": 2.798251152038574, | |
| "learning_rate": 3.7777510040160646e-06, | |
| "loss": 0.141, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 1.130296, | |
| "grad_norm": 4.432553768157959, | |
| "learning_rate": 3.769718875502008e-06, | |
| "loss": 0.1415, | |
| "step": 78100 | |
| }, | |
| { | |
| "epoch": 1.1310959999999999, | |
| "grad_norm": 4.363057613372803, | |
| "learning_rate": 3.761686746987952e-06, | |
| "loss": 0.1516, | |
| "step": 78200 | |
| }, | |
| { | |
| "epoch": 1.131896, | |
| "grad_norm": 8.875186920166016, | |
| "learning_rate": 3.753654618473896e-06, | |
| "loss": 0.1578, | |
| "step": 78300 | |
| }, | |
| { | |
| "epoch": 1.132696, | |
| "grad_norm": 5.948533535003662, | |
| "learning_rate": 3.74562248995984e-06, | |
| "loss": 0.1436, | |
| "step": 78400 | |
| }, | |
| { | |
| "epoch": 1.133496, | |
| "grad_norm": 3.6585094928741455, | |
| "learning_rate": 3.737590361445784e-06, | |
| "loss": 0.1452, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 1.134296, | |
| "grad_norm": 4.998322486877441, | |
| "learning_rate": 3.729558232931727e-06, | |
| "loss": 0.1395, | |
| "step": 78600 | |
| }, | |
| { | |
| "epoch": 1.1350959999999999, | |
| "grad_norm": 2.1381640434265137, | |
| "learning_rate": 3.7215261044176708e-06, | |
| "loss": 0.1386, | |
| "step": 78700 | |
| }, | |
| { | |
| "epoch": 1.135896, | |
| "grad_norm": 3.784778594970703, | |
| "learning_rate": 3.7134939759036147e-06, | |
| "loss": 0.1494, | |
| "step": 78800 | |
| }, | |
| { | |
| "epoch": 1.136696, | |
| "grad_norm": 12.97223949432373, | |
| "learning_rate": 3.7054618473895586e-06, | |
| "loss": 0.1389, | |
| "step": 78900 | |
| }, | |
| { | |
| "epoch": 1.137496, | |
| "grad_norm": 2.718700647354126, | |
| "learning_rate": 3.697429718875502e-06, | |
| "loss": 0.1488, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 1.138296, | |
| "grad_norm": 5.164200782775879, | |
| "learning_rate": 3.689397590361446e-06, | |
| "loss": 0.1554, | |
| "step": 79100 | |
| }, | |
| { | |
| "epoch": 1.1390959999999999, | |
| "grad_norm": 5.673985004425049, | |
| "learning_rate": 3.68136546184739e-06, | |
| "loss": 0.136, | |
| "step": 79200 | |
| }, | |
| { | |
| "epoch": 1.139896, | |
| "grad_norm": 2.9805924892425537, | |
| "learning_rate": 3.673333333333334e-06, | |
| "loss": 0.1394, | |
| "step": 79300 | |
| }, | |
| { | |
| "epoch": 1.140696, | |
| "grad_norm": 7.971452713012695, | |
| "learning_rate": 3.665301204819278e-06, | |
| "loss": 0.1515, | |
| "step": 79400 | |
| }, | |
| { | |
| "epoch": 1.141496, | |
| "grad_norm": 3.0903499126434326, | |
| "learning_rate": 3.657269076305221e-06, | |
| "loss": 0.1378, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 1.142296, | |
| "grad_norm": 4.741939067840576, | |
| "learning_rate": 3.649236947791165e-06, | |
| "loss": 0.1472, | |
| "step": 79600 | |
| }, | |
| { | |
| "epoch": 1.143096, | |
| "grad_norm": 5.633648872375488, | |
| "learning_rate": 3.6412048192771087e-06, | |
| "loss": 0.1573, | |
| "step": 79700 | |
| }, | |
| { | |
| "epoch": 1.143896, | |
| "grad_norm": 3.337674379348755, | |
| "learning_rate": 3.633253012048193e-06, | |
| "loss": 0.1531, | |
| "step": 79800 | |
| }, | |
| { | |
| "epoch": 1.144696, | |
| "grad_norm": 15.155500411987305, | |
| "learning_rate": 3.625220883534137e-06, | |
| "loss": 0.1438, | |
| "step": 79900 | |
| }, | |
| { | |
| "epoch": 1.145496, | |
| "grad_norm": 7.186131477355957, | |
| "learning_rate": 3.617188755020081e-06, | |
| "loss": 0.1315, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 1.145496, | |
| "eval_test1_cer": 0.050181118828889384, | |
| "eval_test1_cer_norm": 0.03300117655533412, | |
| "eval_test1_loss": 0.17834880948066711, | |
| "eval_test1_runtime": 3408.2771, | |
| "eval_test1_samples_per_second": 0.734, | |
| "eval_test1_steps_per_second": 0.183, | |
| "eval_test1_wer": 0.1458643109128546, | |
| "eval_test1_wer_norm": 0.08547882755194483, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 1.145496, | |
| "eval_test2_cer": 0.10706219434800462, | |
| "eval_test2_cer_norm": 0.08262802138208862, | |
| "eval_test2_loss": 0.3213089108467102, | |
| "eval_test2_runtime": 3477.8704, | |
| "eval_test2_samples_per_second": 0.719, | |
| "eval_test2_steps_per_second": 0.18, | |
| "eval_test2_wer": 0.23108834973678188, | |
| "eval_test2_wer_norm": 0.16551111620444647, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 1.146296, | |
| "grad_norm": 5.269469738006592, | |
| "learning_rate": 3.6091566265060248e-06, | |
| "loss": 0.1244, | |
| "step": 80100 | |
| }, | |
| { | |
| "epoch": 1.147096, | |
| "grad_norm": 5.666225433349609, | |
| "learning_rate": 3.6012048192771087e-06, | |
| "loss": 0.13, | |
| "step": 80200 | |
| }, | |
| { | |
| "epoch": 1.147896, | |
| "grad_norm": 5.9655375480651855, | |
| "learning_rate": 3.5931726907630526e-06, | |
| "loss": 0.1464, | |
| "step": 80300 | |
| }, | |
| { | |
| "epoch": 1.148696, | |
| "grad_norm": 2.792882204055786, | |
| "learning_rate": 3.5851405622489965e-06, | |
| "loss": 0.1482, | |
| "step": 80400 | |
| }, | |
| { | |
| "epoch": 1.149496, | |
| "grad_norm": 2.3720922470092773, | |
| "learning_rate": 3.57710843373494e-06, | |
| "loss": 0.1375, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 1.150296, | |
| "grad_norm": 3.8564910888671875, | |
| "learning_rate": 3.569076305220884e-06, | |
| "loss": 0.1291, | |
| "step": 80600 | |
| }, | |
| { | |
| "epoch": 1.151096, | |
| "grad_norm": 5.648325443267822, | |
| "learning_rate": 3.561044176706828e-06, | |
| "loss": 0.1341, | |
| "step": 80700 | |
| }, | |
| { | |
| "epoch": 1.151896, | |
| "grad_norm": 8.060567855834961, | |
| "learning_rate": 3.5530120481927718e-06, | |
| "loss": 0.1285, | |
| "step": 80800 | |
| }, | |
| { | |
| "epoch": 1.152696, | |
| "grad_norm": 2.7208454608917236, | |
| "learning_rate": 3.544979919678715e-06, | |
| "loss": 0.1413, | |
| "step": 80900 | |
| }, | |
| { | |
| "epoch": 1.153496, | |
| "grad_norm": 5.355711460113525, | |
| "learning_rate": 3.5369477911646588e-06, | |
| "loss": 0.1259, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 1.154296, | |
| "grad_norm": 7.103275775909424, | |
| "learning_rate": 3.5289156626506027e-06, | |
| "loss": 0.149, | |
| "step": 81100 | |
| }, | |
| { | |
| "epoch": 1.155096, | |
| "grad_norm": 3.506634473800659, | |
| "learning_rate": 3.5208835341365466e-06, | |
| "loss": 0.1414, | |
| "step": 81200 | |
| }, | |
| { | |
| "epoch": 1.155896, | |
| "grad_norm": 5.049018383026123, | |
| "learning_rate": 3.5128514056224905e-06, | |
| "loss": 0.1404, | |
| "step": 81300 | |
| }, | |
| { | |
| "epoch": 1.156696, | |
| "grad_norm": 4.586015701293945, | |
| "learning_rate": 3.504819277108434e-06, | |
| "loss": 0.1446, | |
| "step": 81400 | |
| }, | |
| { | |
| "epoch": 1.157496, | |
| "grad_norm": 4.436155796051025, | |
| "learning_rate": 3.496787148594378e-06, | |
| "loss": 0.1406, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 1.158296, | |
| "grad_norm": 2.5560381412506104, | |
| "learning_rate": 3.4887550200803214e-06, | |
| "loss": 0.1384, | |
| "step": 81600 | |
| }, | |
| { | |
| "epoch": 1.159096, | |
| "grad_norm": 5.3852057456970215, | |
| "learning_rate": 3.4807228915662654e-06, | |
| "loss": 0.1412, | |
| "step": 81700 | |
| }, | |
| { | |
| "epoch": 1.159896, | |
| "grad_norm": 5.565433502197266, | |
| "learning_rate": 3.472690763052209e-06, | |
| "loss": 0.1441, | |
| "step": 81800 | |
| }, | |
| { | |
| "epoch": 1.160696, | |
| "grad_norm": 10.139933586120605, | |
| "learning_rate": 3.4646586345381528e-06, | |
| "loss": 0.1387, | |
| "step": 81900 | |
| }, | |
| { | |
| "epoch": 1.161496, | |
| "grad_norm": 10.769366264343262, | |
| "learning_rate": 3.4566265060240967e-06, | |
| "loss": 0.1424, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 1.162296, | |
| "grad_norm": 7.583619594573975, | |
| "learning_rate": 3.4485943775100406e-06, | |
| "loss": 0.1476, | |
| "step": 82100 | |
| }, | |
| { | |
| "epoch": 1.163096, | |
| "grad_norm": 3.2200429439544678, | |
| "learning_rate": 3.4405622489959845e-06, | |
| "loss": 0.1391, | |
| "step": 82200 | |
| }, | |
| { | |
| "epoch": 1.163896, | |
| "grad_norm": 4.72890567779541, | |
| "learning_rate": 3.4325301204819276e-06, | |
| "loss": 0.1371, | |
| "step": 82300 | |
| }, | |
| { | |
| "epoch": 1.164696, | |
| "grad_norm": 3.9485466480255127, | |
| "learning_rate": 3.4244979919678715e-06, | |
| "loss": 0.1349, | |
| "step": 82400 | |
| }, | |
| { | |
| "epoch": 1.165496, | |
| "grad_norm": 7.35235071182251, | |
| "learning_rate": 3.4164658634538154e-06, | |
| "loss": 0.1419, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 1.166296, | |
| "grad_norm": 7.841335773468018, | |
| "learning_rate": 3.4084337349397594e-06, | |
| "loss": 0.1459, | |
| "step": 82600 | |
| }, | |
| { | |
| "epoch": 1.167096, | |
| "grad_norm": 3.344604015350342, | |
| "learning_rate": 3.400401606425703e-06, | |
| "loss": 0.1198, | |
| "step": 82700 | |
| }, | |
| { | |
| "epoch": 1.167896, | |
| "grad_norm": 4.77374792098999, | |
| "learning_rate": 3.3923694779116468e-06, | |
| "loss": 0.1531, | |
| "step": 82800 | |
| }, | |
| { | |
| "epoch": 1.168696, | |
| "grad_norm": 6.152327537536621, | |
| "learning_rate": 3.3843373493975907e-06, | |
| "loss": 0.1303, | |
| "step": 82900 | |
| }, | |
| { | |
| "epoch": 1.169496, | |
| "grad_norm": 5.056685924530029, | |
| "learning_rate": 3.3763052208835346e-06, | |
| "loss": 0.1479, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 1.170296, | |
| "grad_norm": 7.288972854614258, | |
| "learning_rate": 3.3682730923694785e-06, | |
| "loss": 0.1502, | |
| "step": 83100 | |
| }, | |
| { | |
| "epoch": 1.171096, | |
| "grad_norm": 6.630878448486328, | |
| "learning_rate": 3.3602409638554216e-06, | |
| "loss": 0.1435, | |
| "step": 83200 | |
| }, | |
| { | |
| "epoch": 1.171896, | |
| "grad_norm": 6.784937381744385, | |
| "learning_rate": 3.3522088353413655e-06, | |
| "loss": 0.1445, | |
| "step": 83300 | |
| }, | |
| { | |
| "epoch": 1.172696, | |
| "grad_norm": 5.813895225524902, | |
| "learning_rate": 3.3441767068273095e-06, | |
| "loss": 0.1362, | |
| "step": 83400 | |
| }, | |
| { | |
| "epoch": 1.173496, | |
| "grad_norm": 6.4298415184021, | |
| "learning_rate": 3.3361445783132534e-06, | |
| "loss": 0.138, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 1.174296, | |
| "grad_norm": 3.369779586791992, | |
| "learning_rate": 3.328112449799197e-06, | |
| "loss": 0.1251, | |
| "step": 83600 | |
| }, | |
| { | |
| "epoch": 1.175096, | |
| "grad_norm": 2.4179258346557617, | |
| "learning_rate": 3.320080321285141e-06, | |
| "loss": 0.1491, | |
| "step": 83700 | |
| }, | |
| { | |
| "epoch": 1.175896, | |
| "grad_norm": 6.803851127624512, | |
| "learning_rate": 3.3120481927710847e-06, | |
| "loss": 0.1209, | |
| "step": 83800 | |
| }, | |
| { | |
| "epoch": 1.176696, | |
| "grad_norm": 5.81317138671875, | |
| "learning_rate": 3.3040160642570286e-06, | |
| "loss": 0.1463, | |
| "step": 83900 | |
| }, | |
| { | |
| "epoch": 1.177496, | |
| "grad_norm": 3.342420816421509, | |
| "learning_rate": 3.2959839357429726e-06, | |
| "loss": 0.1108, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 1.177496, | |
| "eval_test1_cer": 0.04321177832549108, | |
| "eval_test1_cer_norm": 0.028405407352270273, | |
| "eval_test1_loss": 0.17878110706806183, | |
| "eval_test1_runtime": 3382.137, | |
| "eval_test1_samples_per_second": 0.739, | |
| "eval_test1_steps_per_second": 0.185, | |
| "eval_test1_wer": 0.13557246566955306, | |
| "eval_test1_wer_norm": 0.07484146234548059, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 1.177496, | |
| "eval_test2_cer": 0.10907343114197185, | |
| "eval_test2_cer_norm": 0.08453091106290672, | |
| "eval_test2_loss": 0.32212311029434204, | |
| "eval_test2_runtime": 3480.5629, | |
| "eval_test2_samples_per_second": 0.718, | |
| "eval_test2_steps_per_second": 0.18, | |
| "eval_test2_wer": 0.2316891737239643, | |
| "eval_test2_wer_norm": 0.16588356635342655, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 1.178296, | |
| "grad_norm": 5.4935503005981445, | |
| "learning_rate": 3.2879518072289156e-06, | |
| "loss": 0.1458, | |
| "step": 84100 | |
| }, | |
| { | |
| "epoch": 1.179096, | |
| "grad_norm": 7.605450630187988, | |
| "learning_rate": 3.2799196787148595e-06, | |
| "loss": 0.1377, | |
| "step": 84200 | |
| }, | |
| { | |
| "epoch": 1.179896, | |
| "grad_norm": 6.349878311157227, | |
| "learning_rate": 3.2719678714859443e-06, | |
| "loss": 0.1365, | |
| "step": 84300 | |
| }, | |
| { | |
| "epoch": 1.180696, | |
| "grad_norm": 6.173891544342041, | |
| "learning_rate": 3.2639357429718878e-06, | |
| "loss": 0.1394, | |
| "step": 84400 | |
| }, | |
| { | |
| "epoch": 1.181496, | |
| "grad_norm": 1.916458249092102, | |
| "learning_rate": 3.2559036144578317e-06, | |
| "loss": 0.1343, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 1.182296, | |
| "grad_norm": 4.605401039123535, | |
| "learning_rate": 3.2478714859437756e-06, | |
| "loss": 0.1438, | |
| "step": 84600 | |
| }, | |
| { | |
| "epoch": 1.183096, | |
| "grad_norm": 10.40539264678955, | |
| "learning_rate": 3.2398393574297195e-06, | |
| "loss": 0.1426, | |
| "step": 84700 | |
| }, | |
| { | |
| "epoch": 1.183896, | |
| "grad_norm": 5.316925525665283, | |
| "learning_rate": 3.2318072289156626e-06, | |
| "loss": 0.1312, | |
| "step": 84800 | |
| }, | |
| { | |
| "epoch": 1.184696, | |
| "grad_norm": 3.3205056190490723, | |
| "learning_rate": 3.2237751004016065e-06, | |
| "loss": 0.1339, | |
| "step": 84900 | |
| }, | |
| { | |
| "epoch": 1.185496, | |
| "grad_norm": 4.223670959472656, | |
| "learning_rate": 3.2157429718875504e-06, | |
| "loss": 0.1477, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 1.186296, | |
| "grad_norm": 2.418142318725586, | |
| "learning_rate": 3.2077108433734944e-06, | |
| "loss": 0.13, | |
| "step": 85100 | |
| }, | |
| { | |
| "epoch": 1.187096, | |
| "grad_norm": 5.908360958099365, | |
| "learning_rate": 3.1996787148594383e-06, | |
| "loss": 0.1351, | |
| "step": 85200 | |
| }, | |
| { | |
| "epoch": 1.187896, | |
| "grad_norm": 3.79148268699646, | |
| "learning_rate": 3.1916465863453818e-06, | |
| "loss": 0.1193, | |
| "step": 85300 | |
| }, | |
| { | |
| "epoch": 1.188696, | |
| "grad_norm": 4.751871109008789, | |
| "learning_rate": 3.1836144578313257e-06, | |
| "loss": 0.1357, | |
| "step": 85400 | |
| }, | |
| { | |
| "epoch": 1.189496, | |
| "grad_norm": 5.901039123535156, | |
| "learning_rate": 3.175582329317269e-06, | |
| "loss": 0.1214, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 1.190296, | |
| "grad_norm": 4.6331305503845215, | |
| "learning_rate": 3.167550200803213e-06, | |
| "loss": 0.1593, | |
| "step": 85600 | |
| }, | |
| { | |
| "epoch": 1.191096, | |
| "grad_norm": 5.742949962615967, | |
| "learning_rate": 3.1595180722891566e-06, | |
| "loss": 0.1321, | |
| "step": 85700 | |
| }, | |
| { | |
| "epoch": 1.191896, | |
| "grad_norm": 3.329071283340454, | |
| "learning_rate": 3.1514859437751005e-06, | |
| "loss": 0.1514, | |
| "step": 85800 | |
| }, | |
| { | |
| "epoch": 1.192696, | |
| "grad_norm": 6.989814758300781, | |
| "learning_rate": 3.1434538152610445e-06, | |
| "loss": 0.135, | |
| "step": 85900 | |
| }, | |
| { | |
| "epoch": 1.1934960000000001, | |
| "grad_norm": 4.263644218444824, | |
| "learning_rate": 3.1354216867469884e-06, | |
| "loss": 0.1364, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 1.194296, | |
| "grad_norm": 3.34621524810791, | |
| "learning_rate": 3.1273895582329323e-06, | |
| "loss": 0.1215, | |
| "step": 86100 | |
| }, | |
| { | |
| "epoch": 1.195096, | |
| "grad_norm": 5.672510623931885, | |
| "learning_rate": 3.1193574297188754e-06, | |
| "loss": 0.1323, | |
| "step": 86200 | |
| }, | |
| { | |
| "epoch": 1.195896, | |
| "grad_norm": 7.123562812805176, | |
| "learning_rate": 3.1113253012048193e-06, | |
| "loss": 0.128, | |
| "step": 86300 | |
| }, | |
| { | |
| "epoch": 1.196696, | |
| "grad_norm": 5.052427768707275, | |
| "learning_rate": 3.1033734939759036e-06, | |
| "loss": 0.1343, | |
| "step": 86400 | |
| }, | |
| { | |
| "epoch": 1.1974960000000001, | |
| "grad_norm": 9.78446102142334, | |
| "learning_rate": 3.0953413654618475e-06, | |
| "loss": 0.1354, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 1.198296, | |
| "grad_norm": 3.7782175540924072, | |
| "learning_rate": 3.0873092369477914e-06, | |
| "loss": 0.1369, | |
| "step": 86600 | |
| }, | |
| { | |
| "epoch": 1.199096, | |
| "grad_norm": 3.037858247756958, | |
| "learning_rate": 3.0792771084337354e-06, | |
| "loss": 0.1368, | |
| "step": 86700 | |
| }, | |
| { | |
| "epoch": 1.199896, | |
| "grad_norm": 6.603598117828369, | |
| "learning_rate": 3.0712449799196793e-06, | |
| "loss": 0.1313, | |
| "step": 86800 | |
| }, | |
| { | |
| "epoch": 1.200696, | |
| "grad_norm": 5.845118522644043, | |
| "learning_rate": 3.0632128514056224e-06, | |
| "loss": 0.1381, | |
| "step": 86900 | |
| }, | |
| { | |
| "epoch": 1.2014960000000001, | |
| "grad_norm": 5.331429958343506, | |
| "learning_rate": 3.0551807228915663e-06, | |
| "loss": 0.1394, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 1.202296, | |
| "grad_norm": 2.769038677215576, | |
| "learning_rate": 3.04714859437751e-06, | |
| "loss": 0.138, | |
| "step": 87100 | |
| }, | |
| { | |
| "epoch": 1.203096, | |
| "grad_norm": 4.4109320640563965, | |
| "learning_rate": 3.039116465863454e-06, | |
| "loss": 0.1355, | |
| "step": 87200 | |
| }, | |
| { | |
| "epoch": 1.203896, | |
| "grad_norm": 5.107141494750977, | |
| "learning_rate": 3.031084337349398e-06, | |
| "loss": 0.1242, | |
| "step": 87300 | |
| }, | |
| { | |
| "epoch": 1.204696, | |
| "grad_norm": 3.513662815093994, | |
| "learning_rate": 3.0230522088353415e-06, | |
| "loss": 0.1341, | |
| "step": 87400 | |
| }, | |
| { | |
| "epoch": 1.2054960000000001, | |
| "grad_norm": 5.542943000793457, | |
| "learning_rate": 3.0150200803212855e-06, | |
| "loss": 0.1582, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 1.206296, | |
| "grad_norm": 6.642317771911621, | |
| "learning_rate": 3.0069879518072294e-06, | |
| "loss": 0.1328, | |
| "step": 87600 | |
| }, | |
| { | |
| "epoch": 1.207096, | |
| "grad_norm": 4.840209007263184, | |
| "learning_rate": 2.9989558232931733e-06, | |
| "loss": 0.1442, | |
| "step": 87700 | |
| }, | |
| { | |
| "epoch": 1.207896, | |
| "grad_norm": 8.443116188049316, | |
| "learning_rate": 2.9909236947791164e-06, | |
| "loss": 0.1301, | |
| "step": 87800 | |
| }, | |
| { | |
| "epoch": 1.208696, | |
| "grad_norm": 5.006475925445557, | |
| "learning_rate": 2.9828915662650603e-06, | |
| "loss": 0.137, | |
| "step": 87900 | |
| }, | |
| { | |
| "epoch": 1.209496, | |
| "grad_norm": 16.58333969116211, | |
| "learning_rate": 2.974859437751004e-06, | |
| "loss": 0.141, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 1.209496, | |
| "eval_test1_cer": 0.05272518485323773, | |
| "eval_test1_cer_norm": 0.035959372823972914, | |
| "eval_test1_loss": 0.1771300584077835, | |
| "eval_test1_runtime": 3405.3955, | |
| "eval_test1_samples_per_second": 0.734, | |
| "eval_test1_steps_per_second": 0.184, | |
| "eval_test1_wer": 0.15084987900521882, | |
| "eval_test1_wer_norm": 0.0903883807241591, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 1.209496, | |
| "eval_test2_cer": 0.10232108858774779, | |
| "eval_test2_cer_norm": 0.08082681282925318, | |
| "eval_test2_loss": 0.31750166416168213, | |
| "eval_test2_runtime": 3462.1713, | |
| "eval_test2_samples_per_second": 0.722, | |
| "eval_test2_steps_per_second": 0.181, | |
| "eval_test2_wer": 0.2193579766536965, | |
| "eval_test2_wer_norm": 0.15279051111620445, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 1.210296, | |
| "grad_norm": 7.229742527008057, | |
| "learning_rate": 2.966827309236948e-06, | |
| "loss": 0.1565, | |
| "step": 88100 | |
| }, | |
| { | |
| "epoch": 1.211096, | |
| "grad_norm": 4.775784015655518, | |
| "learning_rate": 2.958795180722892e-06, | |
| "loss": 0.1191, | |
| "step": 88200 | |
| }, | |
| { | |
| "epoch": 1.211896, | |
| "grad_norm": 5.914107799530029, | |
| "learning_rate": 2.9507630522088355e-06, | |
| "loss": 0.1272, | |
| "step": 88300 | |
| }, | |
| { | |
| "epoch": 1.212696, | |
| "grad_norm": 3.42290997505188, | |
| "learning_rate": 2.9427309236947795e-06, | |
| "loss": 0.1533, | |
| "step": 88400 | |
| }, | |
| { | |
| "epoch": 1.213496, | |
| "grad_norm": 5.825013637542725, | |
| "learning_rate": 2.9347791164658634e-06, | |
| "loss": 0.1337, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 1.214296, | |
| "grad_norm": 2.929975748062134, | |
| "learning_rate": 2.9267469879518073e-06, | |
| "loss": 0.1363, | |
| "step": 88600 | |
| }, | |
| { | |
| "epoch": 1.215096, | |
| "grad_norm": 4.544336318969727, | |
| "learning_rate": 2.918714859437751e-06, | |
| "loss": 0.1416, | |
| "step": 88700 | |
| }, | |
| { | |
| "epoch": 1.215896, | |
| "grad_norm": 6.057349681854248, | |
| "learning_rate": 2.910682730923695e-06, | |
| "loss": 0.1298, | |
| "step": 88800 | |
| }, | |
| { | |
| "epoch": 1.216696, | |
| "grad_norm": 4.51986026763916, | |
| "learning_rate": 2.902650602409639e-06, | |
| "loss": 0.1333, | |
| "step": 88900 | |
| }, | |
| { | |
| "epoch": 1.217496, | |
| "grad_norm": 4.948803424835205, | |
| "learning_rate": 2.8946184738955825e-06, | |
| "loss": 0.134, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 1.218296, | |
| "grad_norm": 4.863702774047852, | |
| "learning_rate": 2.8865863453815264e-06, | |
| "loss": 0.1378, | |
| "step": 89100 | |
| }, | |
| { | |
| "epoch": 1.219096, | |
| "grad_norm": 4.049374580383301, | |
| "learning_rate": 2.8785542168674704e-06, | |
| "loss": 0.125, | |
| "step": 89200 | |
| }, | |
| { | |
| "epoch": 1.219896, | |
| "grad_norm": 3.5720324516296387, | |
| "learning_rate": 2.870522088353414e-06, | |
| "loss": 0.1395, | |
| "step": 89300 | |
| }, | |
| { | |
| "epoch": 1.220696, | |
| "grad_norm": 1.931492567062378, | |
| "learning_rate": 2.8624899598393574e-06, | |
| "loss": 0.131, | |
| "step": 89400 | |
| }, | |
| { | |
| "epoch": 1.221496, | |
| "grad_norm": 6.574014663696289, | |
| "learning_rate": 2.8544578313253013e-06, | |
| "loss": 0.132, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 1.222296, | |
| "grad_norm": 7.461375713348389, | |
| "learning_rate": 2.846425702811245e-06, | |
| "loss": 0.1289, | |
| "step": 89600 | |
| }, | |
| { | |
| "epoch": 1.223096, | |
| "grad_norm": 5.367936611175537, | |
| "learning_rate": 2.838393574297189e-06, | |
| "loss": 0.1261, | |
| "step": 89700 | |
| }, | |
| { | |
| "epoch": 1.223896, | |
| "grad_norm": 5.853065013885498, | |
| "learning_rate": 2.830361445783133e-06, | |
| "loss": 0.1161, | |
| "step": 89800 | |
| }, | |
| { | |
| "epoch": 1.224696, | |
| "grad_norm": 4.45815896987915, | |
| "learning_rate": 2.8223293172690765e-06, | |
| "loss": 0.1285, | |
| "step": 89900 | |
| }, | |
| { | |
| "epoch": 1.225496, | |
| "grad_norm": 5.519855499267578, | |
| "learning_rate": 2.81429718875502e-06, | |
| "loss": 0.1464, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 1.226296, | |
| "grad_norm": 3.830179214477539, | |
| "learning_rate": 2.806265060240964e-06, | |
| "loss": 0.1278, | |
| "step": 90100 | |
| }, | |
| { | |
| "epoch": 1.227096, | |
| "grad_norm": 3.485785722732544, | |
| "learning_rate": 2.798232931726908e-06, | |
| "loss": 0.1599, | |
| "step": 90200 | |
| }, | |
| { | |
| "epoch": 1.2278959999999999, | |
| "grad_norm": 5.015017509460449, | |
| "learning_rate": 2.7902008032128514e-06, | |
| "loss": 0.1213, | |
| "step": 90300 | |
| }, | |
| { | |
| "epoch": 1.228696, | |
| "grad_norm": 5.003856658935547, | |
| "learning_rate": 2.7821686746987953e-06, | |
| "loss": 0.1268, | |
| "step": 90400 | |
| }, | |
| { | |
| "epoch": 1.229496, | |
| "grad_norm": 8.828622817993164, | |
| "learning_rate": 2.77421686746988e-06, | |
| "loss": 0.128, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 1.230296, | |
| "grad_norm": 5.4841790199279785, | |
| "learning_rate": 2.766184738955823e-06, | |
| "loss": 0.1267, | |
| "step": 90600 | |
| }, | |
| { | |
| "epoch": 1.231096, | |
| "grad_norm": 1.1707990169525146, | |
| "learning_rate": 2.758152610441767e-06, | |
| "loss": 0.1173, | |
| "step": 90700 | |
| }, | |
| { | |
| "epoch": 1.2318959999999999, | |
| "grad_norm": 4.860354423522949, | |
| "learning_rate": 2.750120481927711e-06, | |
| "loss": 0.1248, | |
| "step": 90800 | |
| }, | |
| { | |
| "epoch": 1.232696, | |
| "grad_norm": 4.709814071655273, | |
| "learning_rate": 2.742088353413655e-06, | |
| "loss": 0.1288, | |
| "step": 90900 | |
| }, | |
| { | |
| "epoch": 1.233496, | |
| "grad_norm": 5.921780109405518, | |
| "learning_rate": 2.7340562248995988e-06, | |
| "loss": 0.1165, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 1.234296, | |
| "grad_norm": 31.39832305908203, | |
| "learning_rate": 2.7260240963855423e-06, | |
| "loss": 0.1362, | |
| "step": 91100 | |
| }, | |
| { | |
| "epoch": 1.235096, | |
| "grad_norm": 9.929832458496094, | |
| "learning_rate": 2.717991967871486e-06, | |
| "loss": 0.1356, | |
| "step": 91200 | |
| }, | |
| { | |
| "epoch": 1.2358959999999999, | |
| "grad_norm": 6.635560035705566, | |
| "learning_rate": 2.70995983935743e-06, | |
| "loss": 0.1242, | |
| "step": 91300 | |
| }, | |
| { | |
| "epoch": 1.236696, | |
| "grad_norm": 3.8171298503875732, | |
| "learning_rate": 2.701927710843374e-06, | |
| "loss": 0.1322, | |
| "step": 91400 | |
| }, | |
| { | |
| "epoch": 1.237496, | |
| "grad_norm": 5.8909430503845215, | |
| "learning_rate": 2.693895582329317e-06, | |
| "loss": 0.1404, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 1.238296, | |
| "grad_norm": 12.169204711914062, | |
| "learning_rate": 2.685863453815261e-06, | |
| "loss": 0.1312, | |
| "step": 91600 | |
| }, | |
| { | |
| "epoch": 1.239096, | |
| "grad_norm": 2.663769483566284, | |
| "learning_rate": 2.677831325301205e-06, | |
| "loss": 0.1266, | |
| "step": 91700 | |
| }, | |
| { | |
| "epoch": 1.2398959999999999, | |
| "grad_norm": 10.018877983093262, | |
| "learning_rate": 2.669799196787149e-06, | |
| "loss": 0.1279, | |
| "step": 91800 | |
| }, | |
| { | |
| "epoch": 1.240696, | |
| "grad_norm": 3.6418004035949707, | |
| "learning_rate": 2.661767068273093e-06, | |
| "loss": 0.1233, | |
| "step": 91900 | |
| }, | |
| { | |
| "epoch": 1.241496, | |
| "grad_norm": 6.836270332336426, | |
| "learning_rate": 2.6537349397590363e-06, | |
| "loss": 0.137, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 1.241496, | |
| "eval_test1_cer": 0.04675013070430951, | |
| "eval_test1_cer_norm": 0.031027444954018296, | |
| "eval_test1_loss": 0.17643441259860992, | |
| "eval_test1_runtime": 3383.8187, | |
| "eval_test1_samples_per_second": 0.739, | |
| "eval_test1_steps_per_second": 0.185, | |
| "eval_test1_wer": 0.1424531327443949, | |
| "eval_test1_wer_norm": 0.08281948625032877, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 1.241496, | |
| "eval_test2_cer": 0.10391234554074738, | |
| "eval_test2_cer_norm": 0.07987294700960644, | |
| "eval_test2_loss": 0.31574827432632446, | |
| "eval_test2_runtime": 3451.2496, | |
| "eval_test2_samples_per_second": 0.724, | |
| "eval_test2_steps_per_second": 0.181, | |
| "eval_test2_wer": 0.22485122453650722, | |
| "eval_test2_wer_norm": 0.1593513637405455, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 1.242296, | |
| "grad_norm": 6.066310405731201, | |
| "learning_rate": 2.64570281124498e-06, | |
| "loss": 0.1255, | |
| "step": 92100 | |
| }, | |
| { | |
| "epoch": 1.243096, | |
| "grad_norm": 4.294830799102783, | |
| "learning_rate": 2.637670682730924e-06, | |
| "loss": 0.1399, | |
| "step": 92200 | |
| }, | |
| { | |
| "epoch": 1.243896, | |
| "grad_norm": 6.00723934173584, | |
| "learning_rate": 2.629638554216868e-06, | |
| "loss": 0.1446, | |
| "step": 92300 | |
| }, | |
| { | |
| "epoch": 1.244696, | |
| "grad_norm": 10.248518943786621, | |
| "learning_rate": 2.621606425702811e-06, | |
| "loss": 0.1317, | |
| "step": 92400 | |
| }, | |
| { | |
| "epoch": 1.245496, | |
| "grad_norm": 2.9732255935668945, | |
| "learning_rate": 2.613654618473896e-06, | |
| "loss": 0.1329, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 1.246296, | |
| "grad_norm": 5.735791206359863, | |
| "learning_rate": 2.6056224899598398e-06, | |
| "loss": 0.1246, | |
| "step": 92600 | |
| }, | |
| { | |
| "epoch": 1.247096, | |
| "grad_norm": 4.58491849899292, | |
| "learning_rate": 2.5975903614457833e-06, | |
| "loss": 0.118, | |
| "step": 92700 | |
| }, | |
| { | |
| "epoch": 1.247896, | |
| "grad_norm": 6.321929931640625, | |
| "learning_rate": 2.589558232931727e-06, | |
| "loss": 0.1317, | |
| "step": 92800 | |
| }, | |
| { | |
| "epoch": 1.248696, | |
| "grad_norm": 2.8514037132263184, | |
| "learning_rate": 2.581526104417671e-06, | |
| "loss": 0.124, | |
| "step": 92900 | |
| }, | |
| { | |
| "epoch": 1.249496, | |
| "grad_norm": 6.127628803253174, | |
| "learning_rate": 2.573493975903615e-06, | |
| "loss": 0.1429, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 1.250296, | |
| "grad_norm": 5.147157192230225, | |
| "learning_rate": 2.565461847389558e-06, | |
| "loss": 0.1294, | |
| "step": 93100 | |
| }, | |
| { | |
| "epoch": 1.251096, | |
| "grad_norm": 1.7546809911727905, | |
| "learning_rate": 2.557429718875502e-06, | |
| "loss": 0.1228, | |
| "step": 93200 | |
| }, | |
| { | |
| "epoch": 1.251896, | |
| "grad_norm": 5.040311813354492, | |
| "learning_rate": 2.549397590361446e-06, | |
| "loss": 0.1272, | |
| "step": 93300 | |
| }, | |
| { | |
| "epoch": 1.252696, | |
| "grad_norm": 6.703260898590088, | |
| "learning_rate": 2.54136546184739e-06, | |
| "loss": 0.1312, | |
| "step": 93400 | |
| }, | |
| { | |
| "epoch": 1.253496, | |
| "grad_norm": 4.4106974601745605, | |
| "learning_rate": 2.5333333333333338e-06, | |
| "loss": 0.1209, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 1.254296, | |
| "grad_norm": 4.892759799957275, | |
| "learning_rate": 2.5253012048192773e-06, | |
| "loss": 0.1273, | |
| "step": 93600 | |
| }, | |
| { | |
| "epoch": 1.255096, | |
| "grad_norm": 3.4537353515625, | |
| "learning_rate": 2.517269076305221e-06, | |
| "loss": 0.1292, | |
| "step": 93700 | |
| }, | |
| { | |
| "epoch": 1.255896, | |
| "grad_norm": 6.59584379196167, | |
| "learning_rate": 2.5092369477911647e-06, | |
| "loss": 0.1194, | |
| "step": 93800 | |
| }, | |
| { | |
| "epoch": 1.256696, | |
| "grad_norm": 7.888189792633057, | |
| "learning_rate": 2.5012048192771086e-06, | |
| "loss": 0.1292, | |
| "step": 93900 | |
| }, | |
| { | |
| "epoch": 1.257496, | |
| "grad_norm": 3.4533464908599854, | |
| "learning_rate": 2.4931726907630525e-06, | |
| "loss": 0.1271, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 1.258296, | |
| "grad_norm": 1.2868270874023438, | |
| "learning_rate": 2.4851405622489965e-06, | |
| "loss": 0.1416, | |
| "step": 94100 | |
| }, | |
| { | |
| "epoch": 1.259096, | |
| "grad_norm": 4.686305999755859, | |
| "learning_rate": 2.47710843373494e-06, | |
| "loss": 0.135, | |
| "step": 94200 | |
| }, | |
| { | |
| "epoch": 1.259896, | |
| "grad_norm": 2.333994150161743, | |
| "learning_rate": 2.469076305220884e-06, | |
| "loss": 0.1143, | |
| "step": 94300 | |
| }, | |
| { | |
| "epoch": 1.260696, | |
| "grad_norm": 24.702146530151367, | |
| "learning_rate": 2.4610441767068274e-06, | |
| "loss": 0.122, | |
| "step": 94400 | |
| }, | |
| { | |
| "epoch": 1.261496, | |
| "grad_norm": 2.577688694000244, | |
| "learning_rate": 2.4530120481927713e-06, | |
| "loss": 0.1323, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 1.262296, | |
| "grad_norm": 4.011758804321289, | |
| "learning_rate": 2.4449799196787148e-06, | |
| "loss": 0.1375, | |
| "step": 94600 | |
| }, | |
| { | |
| "epoch": 1.263096, | |
| "grad_norm": 3.8181052207946777, | |
| "learning_rate": 2.4369477911646587e-06, | |
| "loss": 0.1364, | |
| "step": 94700 | |
| }, | |
| { | |
| "epoch": 1.263896, | |
| "grad_norm": 5.570916652679443, | |
| "learning_rate": 2.4289959839357434e-06, | |
| "loss": 0.1334, | |
| "step": 94800 | |
| }, | |
| { | |
| "epoch": 1.264696, | |
| "grad_norm": 5.255520820617676, | |
| "learning_rate": 2.420963855421687e-06, | |
| "loss": 0.1346, | |
| "step": 94900 | |
| }, | |
| { | |
| "epoch": 1.265496, | |
| "grad_norm": 6.02402400970459, | |
| "learning_rate": 2.412931726907631e-06, | |
| "loss": 0.1299, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 1.266296, | |
| "grad_norm": 5.61665153503418, | |
| "learning_rate": 2.4048995983935744e-06, | |
| "loss": 0.1332, | |
| "step": 95100 | |
| }, | |
| { | |
| "epoch": 1.267096, | |
| "grad_norm": 3.135876417160034, | |
| "learning_rate": 2.3968674698795183e-06, | |
| "loss": 0.1466, | |
| "step": 95200 | |
| }, | |
| { | |
| "epoch": 1.267896, | |
| "grad_norm": 3.9527804851531982, | |
| "learning_rate": 2.3888353413654618e-06, | |
| "loss": 0.118, | |
| "step": 95300 | |
| }, | |
| { | |
| "epoch": 1.268696, | |
| "grad_norm": 8.5547513961792, | |
| "learning_rate": 2.3808032128514057e-06, | |
| "loss": 0.1289, | |
| "step": 95400 | |
| }, | |
| { | |
| "epoch": 1.269496, | |
| "grad_norm": 4.350017070770264, | |
| "learning_rate": 2.3727710843373496e-06, | |
| "loss": 0.1485, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 1.270296, | |
| "grad_norm": 7.225109100341797, | |
| "learning_rate": 2.3647389558232935e-06, | |
| "loss": 0.1724, | |
| "step": 95600 | |
| }, | |
| { | |
| "epoch": 1.271096, | |
| "grad_norm": 3.152327060699463, | |
| "learning_rate": 2.356706827309237e-06, | |
| "loss": 0.151, | |
| "step": 95700 | |
| }, | |
| { | |
| "epoch": 1.271896, | |
| "grad_norm": 9.343049049377441, | |
| "learning_rate": 2.348674698795181e-06, | |
| "loss": 0.1219, | |
| "step": 95800 | |
| }, | |
| { | |
| "epoch": 1.272696, | |
| "grad_norm": 3.7528295516967773, | |
| "learning_rate": 2.340642570281125e-06, | |
| "loss": 0.1011, | |
| "step": 95900 | |
| }, | |
| { | |
| "epoch": 1.273496, | |
| "grad_norm": 1.7038061618804932, | |
| "learning_rate": 2.3326104417670684e-06, | |
| "loss": 0.1287, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 1.273496, | |
| "eval_test1_cer": 0.04843528269474942, | |
| "eval_test1_cer_norm": 0.03259298388839532, | |
| "eval_test1_loss": 0.17615145444869995, | |
| "eval_test1_runtime": 3390.194, | |
| "eval_test1_samples_per_second": 0.737, | |
| "eval_test1_steps_per_second": 0.184, | |
| "eval_test1_wer": 0.14195749147205458, | |
| "eval_test1_wer_norm": 0.08346240392764254, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 1.273496, | |
| "eval_test2_cer": 0.10040784709000634, | |
| "eval_test2_cer_norm": 0.07324914781530834, | |
| "eval_test2_loss": 0.31477001309394836, | |
| "eval_test2_runtime": 3429.6171, | |
| "eval_test2_samples_per_second": 0.729, | |
| "eval_test2_steps_per_second": 0.182, | |
| "eval_test2_wer": 0.21283474479285877, | |
| "eval_test2_wer_norm": 0.14789135915654367, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 1.274296, | |
| "grad_norm": 4.975541591644287, | |
| "learning_rate": 2.3245783132530123e-06, | |
| "loss": 0.1545, | |
| "step": 96100 | |
| }, | |
| { | |
| "epoch": 1.275096, | |
| "grad_norm": 2.7354822158813477, | |
| "learning_rate": 2.316546184738956e-06, | |
| "loss": 0.1379, | |
| "step": 96200 | |
| }, | |
| { | |
| "epoch": 1.275896, | |
| "grad_norm": 4.287178993225098, | |
| "learning_rate": 2.3085140562248997e-06, | |
| "loss": 0.1109, | |
| "step": 96300 | |
| }, | |
| { | |
| "epoch": 1.276696, | |
| "grad_norm": 7.494585990905762, | |
| "learning_rate": 2.3004819277108436e-06, | |
| "loss": 0.1361, | |
| "step": 96400 | |
| }, | |
| { | |
| "epoch": 1.277496, | |
| "grad_norm": 5.967809677124023, | |
| "learning_rate": 2.292449799196787e-06, | |
| "loss": 0.1282, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 1.278296, | |
| "grad_norm": 6.256405830383301, | |
| "learning_rate": 2.284417670682731e-06, | |
| "loss": 0.1254, | |
| "step": 96600 | |
| }, | |
| { | |
| "epoch": 1.279096, | |
| "grad_norm": 4.7275214195251465, | |
| "learning_rate": 2.276385542168675e-06, | |
| "loss": 0.1125, | |
| "step": 96700 | |
| }, | |
| { | |
| "epoch": 1.279896, | |
| "grad_norm": 6.460155010223389, | |
| "learning_rate": 2.2685140562248997e-06, | |
| "loss": 0.1531, | |
| "step": 96800 | |
| }, | |
| { | |
| "epoch": 1.280696, | |
| "grad_norm": 7.814698696136475, | |
| "learning_rate": 2.2604819277108436e-06, | |
| "loss": 0.1353, | |
| "step": 96900 | |
| }, | |
| { | |
| "epoch": 1.281496, | |
| "grad_norm": 2.7123336791992188, | |
| "learning_rate": 2.252449799196787e-06, | |
| "loss": 0.1223, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 1.282296, | |
| "grad_norm": 4.504587650299072, | |
| "learning_rate": 2.244417670682731e-06, | |
| "loss": 0.1358, | |
| "step": 97100 | |
| }, | |
| { | |
| "epoch": 1.283096, | |
| "grad_norm": 7.585097789764404, | |
| "learning_rate": 2.236385542168675e-06, | |
| "loss": 0.1378, | |
| "step": 97200 | |
| }, | |
| { | |
| "epoch": 1.283896, | |
| "grad_norm": 3.129997491836548, | |
| "learning_rate": 2.228353413654619e-06, | |
| "loss": 0.1234, | |
| "step": 97300 | |
| }, | |
| { | |
| "epoch": 1.284696, | |
| "grad_norm": 6.103450775146484, | |
| "learning_rate": 2.2203212851405628e-06, | |
| "loss": 0.1188, | |
| "step": 97400 | |
| }, | |
| { | |
| "epoch": 1.285496, | |
| "grad_norm": 5.75684928894043, | |
| "learning_rate": 2.2122891566265063e-06, | |
| "loss": 0.118, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 1.286296, | |
| "grad_norm": 4.268872261047363, | |
| "learning_rate": 2.20425702811245e-06, | |
| "loss": 0.1242, | |
| "step": 97600 | |
| }, | |
| { | |
| "epoch": 1.287096, | |
| "grad_norm": 10.057229042053223, | |
| "learning_rate": 2.1962248995983937e-06, | |
| "loss": 0.107, | |
| "step": 97700 | |
| }, | |
| { | |
| "epoch": 1.287896, | |
| "grad_norm": 6.790497303009033, | |
| "learning_rate": 2.1881927710843376e-06, | |
| "loss": 0.1169, | |
| "step": 97800 | |
| }, | |
| { | |
| "epoch": 1.288696, | |
| "grad_norm": 7.399913311004639, | |
| "learning_rate": 2.180160642570281e-06, | |
| "loss": 0.1389, | |
| "step": 97900 | |
| }, | |
| { | |
| "epoch": 1.289496, | |
| "grad_norm": 8.95304012298584, | |
| "learning_rate": 2.172128514056225e-06, | |
| "loss": 0.1119, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 1.290296, | |
| "grad_norm": 3.782831907272339, | |
| "learning_rate": 2.164096385542169e-06, | |
| "loss": 0.1158, | |
| "step": 98100 | |
| }, | |
| { | |
| "epoch": 1.291096, | |
| "grad_norm": 9.19373893737793, | |
| "learning_rate": 2.1560642570281124e-06, | |
| "loss": 0.1373, | |
| "step": 98200 | |
| }, | |
| { | |
| "epoch": 1.291896, | |
| "grad_norm": 8.121448516845703, | |
| "learning_rate": 2.1480321285140563e-06, | |
| "loss": 0.1379, | |
| "step": 98300 | |
| }, | |
| { | |
| "epoch": 1.292696, | |
| "grad_norm": 5.722080707550049, | |
| "learning_rate": 2.1400000000000003e-06, | |
| "loss": 0.1171, | |
| "step": 98400 | |
| }, | |
| { | |
| "epoch": 1.293496, | |
| "grad_norm": 4.1129255294799805, | |
| "learning_rate": 2.131967871485944e-06, | |
| "loss": 0.1302, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 1.2942960000000001, | |
| "grad_norm": 3.102550983428955, | |
| "learning_rate": 2.1239357429718877e-06, | |
| "loss": 0.1256, | |
| "step": 98600 | |
| }, | |
| { | |
| "epoch": 1.295096, | |
| "grad_norm": 5.705405235290527, | |
| "learning_rate": 2.1159036144578316e-06, | |
| "loss": 0.1103, | |
| "step": 98700 | |
| }, | |
| { | |
| "epoch": 1.295896, | |
| "grad_norm": 5.043685436248779, | |
| "learning_rate": 2.107871485943775e-06, | |
| "loss": 0.1207, | |
| "step": 98800 | |
| }, | |
| { | |
| "epoch": 1.296696, | |
| "grad_norm": 5.753846645355225, | |
| "learning_rate": 2.099839357429719e-06, | |
| "loss": 0.1195, | |
| "step": 98900 | |
| }, | |
| { | |
| "epoch": 1.297496, | |
| "grad_norm": 5.277960300445557, | |
| "learning_rate": 2.091807228915663e-06, | |
| "loss": 0.1326, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 1.2982960000000001, | |
| "grad_norm": 4.641193866729736, | |
| "learning_rate": 2.0837751004016064e-06, | |
| "loss": 0.1409, | |
| "step": 99100 | |
| }, | |
| { | |
| "epoch": 1.299096, | |
| "grad_norm": 6.307822227478027, | |
| "learning_rate": 2.0757429718875504e-06, | |
| "loss": 0.1271, | |
| "step": 99200 | |
| }, | |
| { | |
| "epoch": 1.299896, | |
| "grad_norm": 4.50252628326416, | |
| "learning_rate": 2.0677108433734943e-06, | |
| "loss": 0.1242, | |
| "step": 99300 | |
| }, | |
| { | |
| "epoch": 1.300696, | |
| "grad_norm": 6.755404949188232, | |
| "learning_rate": 2.059678714859438e-06, | |
| "loss": 0.1349, | |
| "step": 99400 | |
| }, | |
| { | |
| "epoch": 1.301496, | |
| "grad_norm": 4.805443286895752, | |
| "learning_rate": 2.0516465863453817e-06, | |
| "loss": 0.1227, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 1.3022960000000001, | |
| "grad_norm": 3.4150850772857666, | |
| "learning_rate": 2.0436144578313256e-06, | |
| "loss": 0.1147, | |
| "step": 99600 | |
| }, | |
| { | |
| "epoch": 1.303096, | |
| "grad_norm": 6.213440895080566, | |
| "learning_rate": 2.035582329317269e-06, | |
| "loss": 0.1236, | |
| "step": 99700 | |
| }, | |
| { | |
| "epoch": 1.303896, | |
| "grad_norm": 6.957757472991943, | |
| "learning_rate": 2.027550200803213e-06, | |
| "loss": 0.1323, | |
| "step": 99800 | |
| }, | |
| { | |
| "epoch": 1.304696, | |
| "grad_norm": 7.19875955581665, | |
| "learning_rate": 2.019518072289157e-06, | |
| "loss": 0.1283, | |
| "step": 99900 | |
| }, | |
| { | |
| "epoch": 1.305496, | |
| "grad_norm": 2.5074002742767334, | |
| "learning_rate": 2.0114859437751004e-06, | |
| "loss": 0.1128, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 1.305496, | |
| "eval_test1_cer": 0.0477444170587796, | |
| "eval_test1_cer_norm": 0.03308281508872188, | |
| "eval_test1_loss": 0.17668992280960083, | |
| "eval_test1_runtime": 3382.0305, | |
| "eval_test1_samples_per_second": 0.739, | |
| "eval_test1_steps_per_second": 0.185, | |
| "eval_test1_wer": 0.1387795562552844, | |
| "eval_test1_wer_norm": 0.07951722727139893, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 1.305496, | |
| "eval_test2_cer": 0.10429499384029567, | |
| "eval_test2_cer_norm": 0.0824295010845987, | |
| "eval_test2_loss": 0.3178161084651947, | |
| "eval_test2_runtime": 3460.99, | |
| "eval_test2_samples_per_second": 0.722, | |
| "eval_test2_steps_per_second": 0.181, | |
| "eval_test2_wer": 0.2208743419546807, | |
| "eval_test2_wer_norm": 0.15585606234242494, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 1.3062960000000001, | |
| "grad_norm": 4.352635383605957, | |
| "learning_rate": 2.0034538152610444e-06, | |
| "loss": 0.1269, | |
| "step": 100100 | |
| }, | |
| { | |
| "epoch": 1.307096, | |
| "grad_norm": 6.455359935760498, | |
| "learning_rate": 1.995421686746988e-06, | |
| "loss": 0.1228, | |
| "step": 100200 | |
| }, | |
| { | |
| "epoch": 1.307896, | |
| "grad_norm": 4.56587028503418, | |
| "learning_rate": 1.9873895582329318e-06, | |
| "loss": 0.1302, | |
| "step": 100300 | |
| }, | |
| { | |
| "epoch": 1.308696, | |
| "grad_norm": 6.275040149688721, | |
| "learning_rate": 1.9793574297188757e-06, | |
| "loss": 0.1316, | |
| "step": 100400 | |
| }, | |
| { | |
| "epoch": 1.309496, | |
| "grad_norm": 2.4755728244781494, | |
| "learning_rate": 1.9713253012048196e-06, | |
| "loss": 0.1324, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 1.3102960000000001, | |
| "grad_norm": 5.687671184539795, | |
| "learning_rate": 1.963293172690763e-06, | |
| "loss": 0.1296, | |
| "step": 100600 | |
| }, | |
| { | |
| "epoch": 1.311096, | |
| "grad_norm": 3.915098190307617, | |
| "learning_rate": 1.955261044176707e-06, | |
| "loss": 0.1198, | |
| "step": 100700 | |
| }, | |
| { | |
| "epoch": 1.311896, | |
| "grad_norm": 5.507267475128174, | |
| "learning_rate": 1.947228915662651e-06, | |
| "loss": 0.1308, | |
| "step": 100800 | |
| }, | |
| { | |
| "epoch": 1.3126959999999999, | |
| "grad_norm": 6.799343109130859, | |
| "learning_rate": 1.939277108433735e-06, | |
| "loss": 0.1106, | |
| "step": 100900 | |
| }, | |
| { | |
| "epoch": 1.313496, | |
| "grad_norm": 2.4747636318206787, | |
| "learning_rate": 1.9312449799196788e-06, | |
| "loss": 0.1198, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 1.3142960000000001, | |
| "grad_norm": 1.4364855289459229, | |
| "learning_rate": 1.9232128514056227e-06, | |
| "loss": 0.1251, | |
| "step": 101100 | |
| }, | |
| { | |
| "epoch": 1.315096, | |
| "grad_norm": 3.4942729473114014, | |
| "learning_rate": 1.9151807228915666e-06, | |
| "loss": 0.105, | |
| "step": 101200 | |
| }, | |
| { | |
| "epoch": 1.315896, | |
| "grad_norm": 7.960775852203369, | |
| "learning_rate": 1.9071485943775103e-06, | |
| "loss": 0.121, | |
| "step": 101300 | |
| }, | |
| { | |
| "epoch": 1.3166959999999999, | |
| "grad_norm": 4.901790618896484, | |
| "learning_rate": 1.899116465863454e-06, | |
| "loss": 0.1248, | |
| "step": 101400 | |
| }, | |
| { | |
| "epoch": 1.317496, | |
| "grad_norm": 3.4517650604248047, | |
| "learning_rate": 1.891084337349398e-06, | |
| "loss": 0.1101, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 1.3182960000000001, | |
| "grad_norm": 2.9589340686798096, | |
| "learning_rate": 1.8830522088353414e-06, | |
| "loss": 0.1569, | |
| "step": 101600 | |
| }, | |
| { | |
| "epoch": 1.319096, | |
| "grad_norm": 4.821226119995117, | |
| "learning_rate": 1.8750200803212854e-06, | |
| "loss": 0.1248, | |
| "step": 101700 | |
| }, | |
| { | |
| "epoch": 1.319896, | |
| "grad_norm": 5.956510543823242, | |
| "learning_rate": 1.866987951807229e-06, | |
| "loss": 0.129, | |
| "step": 101800 | |
| }, | |
| { | |
| "epoch": 1.3206959999999999, | |
| "grad_norm": 8.236087799072266, | |
| "learning_rate": 1.8589558232931728e-06, | |
| "loss": 0.1288, | |
| "step": 101900 | |
| }, | |
| { | |
| "epoch": 1.321496, | |
| "grad_norm": 7.885124206542969, | |
| "learning_rate": 1.8509236947791165e-06, | |
| "loss": 0.133, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 1.3222960000000001, | |
| "grad_norm": 4.618575096130371, | |
| "learning_rate": 1.8428915662650604e-06, | |
| "loss": 0.131, | |
| "step": 102100 | |
| }, | |
| { | |
| "epoch": 1.323096, | |
| "grad_norm": 3.3860082626342773, | |
| "learning_rate": 1.8348594377510043e-06, | |
| "loss": 0.1275, | |
| "step": 102200 | |
| }, | |
| { | |
| "epoch": 1.323896, | |
| "grad_norm": 0.5851134061813354, | |
| "learning_rate": 1.8268273092369478e-06, | |
| "loss": 0.1163, | |
| "step": 102300 | |
| }, | |
| { | |
| "epoch": 1.3246959999999999, | |
| "grad_norm": 8.403120040893555, | |
| "learning_rate": 1.8187951807228917e-06, | |
| "loss": 0.142, | |
| "step": 102400 | |
| }, | |
| { | |
| "epoch": 1.325496, | |
| "grad_norm": 4.019391059875488, | |
| "learning_rate": 1.8107630522088354e-06, | |
| "loss": 0.1234, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 1.326296, | |
| "grad_norm": 7.64513635635376, | |
| "learning_rate": 1.8028112449799198e-06, | |
| "loss": 0.1202, | |
| "step": 102600 | |
| }, | |
| { | |
| "epoch": 1.327096, | |
| "grad_norm": 4.8131489753723145, | |
| "learning_rate": 1.7947791164658637e-06, | |
| "loss": 0.1169, | |
| "step": 102700 | |
| }, | |
| { | |
| "epoch": 1.327896, | |
| "grad_norm": 7.043356895446777, | |
| "learning_rate": 1.7867469879518074e-06, | |
| "loss": 0.1209, | |
| "step": 102800 | |
| }, | |
| { | |
| "epoch": 1.3286959999999999, | |
| "grad_norm": 4.337855815887451, | |
| "learning_rate": 1.7787148594377513e-06, | |
| "loss": 0.1365, | |
| "step": 102900 | |
| }, | |
| { | |
| "epoch": 1.329496, | |
| "grad_norm": 6.657166481018066, | |
| "learning_rate": 1.7706827309236948e-06, | |
| "loss": 0.1256, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 1.330296, | |
| "grad_norm": 4.905508041381836, | |
| "learning_rate": 1.7626506024096387e-06, | |
| "loss": 0.1118, | |
| "step": 103100 | |
| }, | |
| { | |
| "epoch": 1.331096, | |
| "grad_norm": 5.122402667999268, | |
| "learning_rate": 1.7546184738955824e-06, | |
| "loss": 0.1276, | |
| "step": 103200 | |
| }, | |
| { | |
| "epoch": 1.331896, | |
| "grad_norm": 4.78582239151001, | |
| "learning_rate": 1.7465863453815264e-06, | |
| "loss": 0.1194, | |
| "step": 103300 | |
| }, | |
| { | |
| "epoch": 1.3326959999999999, | |
| "grad_norm": 5.943844318389893, | |
| "learning_rate": 1.7385542168674698e-06, | |
| "loss": 0.1206, | |
| "step": 103400 | |
| }, | |
| { | |
| "epoch": 1.333496, | |
| "grad_norm": 8.206818580627441, | |
| "learning_rate": 1.7305220883534138e-06, | |
| "loss": 0.1151, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 1.334296, | |
| "grad_norm": 5.556063175201416, | |
| "learning_rate": 1.7224899598393577e-06, | |
| "loss": 0.1114, | |
| "step": 103600 | |
| }, | |
| { | |
| "epoch": 1.335096, | |
| "grad_norm": 7.587924957275391, | |
| "learning_rate": 1.7144578313253014e-06, | |
| "loss": 0.1207, | |
| "step": 103700 | |
| }, | |
| { | |
| "epoch": 1.335896, | |
| "grad_norm": 7.417220592498779, | |
| "learning_rate": 1.706425702811245e-06, | |
| "loss": 0.1163, | |
| "step": 103800 | |
| }, | |
| { | |
| "epoch": 1.3366959999999999, | |
| "grad_norm": 7.316643238067627, | |
| "learning_rate": 1.6983935742971888e-06, | |
| "loss": 0.1157, | |
| "step": 103900 | |
| }, | |
| { | |
| "epoch": 1.337496, | |
| "grad_norm": 1.7829804420471191, | |
| "learning_rate": 1.6903614457831327e-06, | |
| "loss": 0.1155, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 1.337496, | |
| "eval_test1_cer": 0.045685824184031665, | |
| "eval_test1_cer_norm": 0.03030710495353807, | |
| "eval_test1_loss": 0.17226466536521912, | |
| "eval_test1_runtime": 3385.0143, | |
| "eval_test1_samples_per_second": 0.739, | |
| "eval_test1_steps_per_second": 0.185, | |
| "eval_test1_wer": 0.13615557304877693, | |
| "eval_test1_wer_norm": 0.07755925070867062, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 1.337496, | |
| "eval_test2_cer": 0.10362769253742488, | |
| "eval_test2_cer_norm": 0.08312674310505114, | |
| "eval_test2_loss": 0.31396326422691345, | |
| "eval_test2_runtime": 3448.444, | |
| "eval_test2_samples_per_second": 0.725, | |
| "eval_test2_steps_per_second": 0.181, | |
| "eval_test2_wer": 0.21904325932707713, | |
| "eval_test2_wer_norm": 0.15513981205592484, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 1.338296, | |
| "grad_norm": 3.8256428241729736, | |
| "learning_rate": 1.6823293172690762e-06, | |
| "loss": 0.1251, | |
| "step": 104100 | |
| }, | |
| { | |
| "epoch": 1.339096, | |
| "grad_norm": 4.764732837677002, | |
| "learning_rate": 1.6742971887550201e-06, | |
| "loss": 0.1183, | |
| "step": 104200 | |
| }, | |
| { | |
| "epoch": 1.339896, | |
| "grad_norm": 3.5565683841705322, | |
| "learning_rate": 1.666265060240964e-06, | |
| "loss": 0.1152, | |
| "step": 104300 | |
| }, | |
| { | |
| "epoch": 1.3406959999999999, | |
| "grad_norm": 5.896035671234131, | |
| "learning_rate": 1.6582329317269078e-06, | |
| "loss": 0.136, | |
| "step": 104400 | |
| }, | |
| { | |
| "epoch": 1.341496, | |
| "grad_norm": 6.5424628257751465, | |
| "learning_rate": 1.6502008032128517e-06, | |
| "loss": 0.128, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 1.342296, | |
| "grad_norm": 5.04475736618042, | |
| "learning_rate": 1.6421686746987952e-06, | |
| "loss": 0.1149, | |
| "step": 104600 | |
| }, | |
| { | |
| "epoch": 1.343096, | |
| "grad_norm": 5.732762813568115, | |
| "learning_rate": 1.6341365461847391e-06, | |
| "loss": 0.131, | |
| "step": 104700 | |
| }, | |
| { | |
| "epoch": 1.343896, | |
| "grad_norm": 5.360403060913086, | |
| "learning_rate": 1.6261044176706828e-06, | |
| "loss": 0.1283, | |
| "step": 104800 | |
| }, | |
| { | |
| "epoch": 1.344696, | |
| "grad_norm": 6.674147129058838, | |
| "learning_rate": 1.6180722891566267e-06, | |
| "loss": 0.1357, | |
| "step": 104900 | |
| }, | |
| { | |
| "epoch": 1.345496, | |
| "grad_norm": 3.1230456829071045, | |
| "learning_rate": 1.6100401606425702e-06, | |
| "loss": 0.1237, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 1.346296, | |
| "grad_norm": 6.3545756340026855, | |
| "learning_rate": 1.6020080321285142e-06, | |
| "loss": 0.1248, | |
| "step": 105100 | |
| }, | |
| { | |
| "epoch": 1.347096, | |
| "grad_norm": 9.923174858093262, | |
| "learning_rate": 1.593975903614458e-06, | |
| "loss": 0.1382, | |
| "step": 105200 | |
| }, | |
| { | |
| "epoch": 1.347896, | |
| "grad_norm": 3.6945223808288574, | |
| "learning_rate": 1.5859437751004018e-06, | |
| "loss": 0.1244, | |
| "step": 105300 | |
| }, | |
| { | |
| "epoch": 1.348696, | |
| "grad_norm": 8.398360252380371, | |
| "learning_rate": 1.5779116465863457e-06, | |
| "loss": 0.123, | |
| "step": 105400 | |
| }, | |
| { | |
| "epoch": 1.349496, | |
| "grad_norm": 1.4128965139389038, | |
| "learning_rate": 1.5698795180722892e-06, | |
| "loss": 0.1175, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 1.350296, | |
| "grad_norm": 11.629409790039062, | |
| "learning_rate": 1.5618473895582331e-06, | |
| "loss": 0.1117, | |
| "step": 105600 | |
| }, | |
| { | |
| "epoch": 1.351096, | |
| "grad_norm": 4.094301700592041, | |
| "learning_rate": 1.5538152610441768e-06, | |
| "loss": 0.1193, | |
| "step": 105700 | |
| }, | |
| { | |
| "epoch": 1.351896, | |
| "grad_norm": 4.269906044006348, | |
| "learning_rate": 1.5457831325301205e-06, | |
| "loss": 0.1256, | |
| "step": 105800 | |
| }, | |
| { | |
| "epoch": 1.352696, | |
| "grad_norm": 5.27727746963501, | |
| "learning_rate": 1.5377510040160642e-06, | |
| "loss": 0.1224, | |
| "step": 105900 | |
| }, | |
| { | |
| "epoch": 1.353496, | |
| "grad_norm": 4.353626728057861, | |
| "learning_rate": 1.5297188755020082e-06, | |
| "loss": 0.1196, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 1.354296, | |
| "grad_norm": 3.226297616958618, | |
| "learning_rate": 1.521686746987952e-06, | |
| "loss": 0.1364, | |
| "step": 106100 | |
| }, | |
| { | |
| "epoch": 1.355096, | |
| "grad_norm": 0.7694222331047058, | |
| "learning_rate": 1.5136546184738956e-06, | |
| "loss": 0.103, | |
| "step": 106200 | |
| }, | |
| { | |
| "epoch": 1.355896, | |
| "grad_norm": 3.6190733909606934, | |
| "learning_rate": 1.5056224899598395e-06, | |
| "loss": 0.1235, | |
| "step": 106300 | |
| }, | |
| { | |
| "epoch": 1.356696, | |
| "grad_norm": 3.509673833847046, | |
| "learning_rate": 1.4975903614457832e-06, | |
| "loss": 0.121, | |
| "step": 106400 | |
| }, | |
| { | |
| "epoch": 1.357496, | |
| "grad_norm": 8.607564926147461, | |
| "learning_rate": 1.4895582329317271e-06, | |
| "loss": 0.1318, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 1.358296, | |
| "grad_norm": 3.327017307281494, | |
| "learning_rate": 1.4815261044176706e-06, | |
| "loss": 0.1283, | |
| "step": 106600 | |
| }, | |
| { | |
| "epoch": 1.359096, | |
| "grad_norm": 8.573492050170898, | |
| "learning_rate": 1.4735742971887552e-06, | |
| "loss": 0.1222, | |
| "step": 106700 | |
| }, | |
| { | |
| "epoch": 1.359896, | |
| "grad_norm": 4.089103698730469, | |
| "learning_rate": 1.465542168674699e-06, | |
| "loss": 0.1257, | |
| "step": 106800 | |
| }, | |
| { | |
| "epoch": 1.360696, | |
| "grad_norm": 3.942911386489868, | |
| "learning_rate": 1.4575100401606426e-06, | |
| "loss": 0.1205, | |
| "step": 106900 | |
| }, | |
| { | |
| "epoch": 1.361496, | |
| "grad_norm": 6.010968208312988, | |
| "learning_rate": 1.4494779116465865e-06, | |
| "loss": 0.1295, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 1.362296, | |
| "grad_norm": 6.7945756912231445, | |
| "learning_rate": 1.4414457831325302e-06, | |
| "loss": 0.1246, | |
| "step": 107100 | |
| }, | |
| { | |
| "epoch": 1.363096, | |
| "grad_norm": 2.255643129348755, | |
| "learning_rate": 1.4334136546184741e-06, | |
| "loss": 0.1254, | |
| "step": 107200 | |
| }, | |
| { | |
| "epoch": 1.363896, | |
| "grad_norm": 4.047929763793945, | |
| "learning_rate": 1.4253815261044176e-06, | |
| "loss": 0.1197, | |
| "step": 107300 | |
| }, | |
| { | |
| "epoch": 1.364696, | |
| "grad_norm": 4.005350112915039, | |
| "learning_rate": 1.4173493975903615e-06, | |
| "loss": 0.1261, | |
| "step": 107400 | |
| }, | |
| { | |
| "epoch": 1.365496, | |
| "grad_norm": 5.783274173736572, | |
| "learning_rate": 1.4093172690763055e-06, | |
| "loss": 0.1157, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 1.366296, | |
| "grad_norm": 5.45566463470459, | |
| "learning_rate": 1.4012851405622492e-06, | |
| "loss": 0.125, | |
| "step": 107600 | |
| }, | |
| { | |
| "epoch": 1.367096, | |
| "grad_norm": 4.851735591888428, | |
| "learning_rate": 1.3932530120481929e-06, | |
| "loss": 0.1242, | |
| "step": 107700 | |
| }, | |
| { | |
| "epoch": 1.367896, | |
| "grad_norm": 5.266098976135254, | |
| "learning_rate": 1.3852208835341366e-06, | |
| "loss": 0.1267, | |
| "step": 107800 | |
| }, | |
| { | |
| "epoch": 1.368696, | |
| "grad_norm": 4.1477742195129395, | |
| "learning_rate": 1.3771887550200805e-06, | |
| "loss": 0.1158, | |
| "step": 107900 | |
| }, | |
| { | |
| "epoch": 1.369496, | |
| "grad_norm": 12.537534713745117, | |
| "learning_rate": 1.369156626506024e-06, | |
| "loss": 0.1114, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 1.369496, | |
| "eval_test1_cer": 0.041395922025543354, | |
| "eval_test1_cer_norm": 0.027075179484716785, | |
| "eval_test1_loss": 0.1729966700077057, | |
| "eval_test1_runtime": 3379.841, | |
| "eval_test1_samples_per_second": 0.74, | |
| "eval_test1_steps_per_second": 0.185, | |
| "eval_test1_wer": 0.12840024490509927, | |
| "eval_test1_wer_norm": 0.06993190917326632, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 1.369496, | |
| "eval_test2_cer": 0.1033523724194572, | |
| "eval_test2_cer_norm": 0.077180818097304, | |
| "eval_test2_loss": 0.3105059862136841, | |
| "eval_test2_runtime": 3449.5535, | |
| "eval_test2_samples_per_second": 0.725, | |
| "eval_test2_steps_per_second": 0.181, | |
| "eval_test2_wer": 0.2153238727397574, | |
| "eval_test2_wer_norm": 0.15038391015356406, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 1.370296, | |
| "grad_norm": 2.571197986602783, | |
| "learning_rate": 1.361124497991968e-06, | |
| "loss": 0.1133, | |
| "step": 108100 | |
| }, | |
| { | |
| "epoch": 1.371096, | |
| "grad_norm": 2.839953660964966, | |
| "learning_rate": 1.3531726907630524e-06, | |
| "loss": 0.1225, | |
| "step": 108200 | |
| }, | |
| { | |
| "epoch": 1.371896, | |
| "grad_norm": 2.108365535736084, | |
| "learning_rate": 1.345140562248996e-06, | |
| "loss": 0.1111, | |
| "step": 108300 | |
| }, | |
| { | |
| "epoch": 1.372696, | |
| "grad_norm": 4.538883686065674, | |
| "learning_rate": 1.3371084337349399e-06, | |
| "loss": 0.0988, | |
| "step": 108400 | |
| }, | |
| { | |
| "epoch": 1.373496, | |
| "grad_norm": 5.8930277824401855, | |
| "learning_rate": 1.3290763052208836e-06, | |
| "loss": 0.1124, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 1.374296, | |
| "grad_norm": 3.4291086196899414, | |
| "learning_rate": 1.3210441767068275e-06, | |
| "loss": 0.0998, | |
| "step": 108600 | |
| }, | |
| { | |
| "epoch": 1.375096, | |
| "grad_norm": 3.634300947189331, | |
| "learning_rate": 1.313012048192771e-06, | |
| "loss": 0.123, | |
| "step": 108700 | |
| }, | |
| { | |
| "epoch": 1.375896, | |
| "grad_norm": 10.641935348510742, | |
| "learning_rate": 1.304979919678715e-06, | |
| "loss": 0.1331, | |
| "step": 108800 | |
| }, | |
| { | |
| "epoch": 1.376696, | |
| "grad_norm": 3.8561360836029053, | |
| "learning_rate": 1.2969477911646588e-06, | |
| "loss": 0.1386, | |
| "step": 108900 | |
| }, | |
| { | |
| "epoch": 1.377496, | |
| "grad_norm": 5.578998565673828, | |
| "learning_rate": 1.2889156626506025e-06, | |
| "loss": 0.1252, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 1.378296, | |
| "grad_norm": 6.097825050354004, | |
| "learning_rate": 1.2808835341365464e-06, | |
| "loss": 0.1226, | |
| "step": 109100 | |
| }, | |
| { | |
| "epoch": 1.379096, | |
| "grad_norm": 7.930078983306885, | |
| "learning_rate": 1.27285140562249e-06, | |
| "loss": 0.1324, | |
| "step": 109200 | |
| }, | |
| { | |
| "epoch": 1.379896, | |
| "grad_norm": 4.191389083862305, | |
| "learning_rate": 1.2648192771084339e-06, | |
| "loss": 0.1265, | |
| "step": 109300 | |
| }, | |
| { | |
| "epoch": 1.380696, | |
| "grad_norm": 4.194836139678955, | |
| "learning_rate": 1.2567871485943776e-06, | |
| "loss": 0.1131, | |
| "step": 109400 | |
| }, | |
| { | |
| "epoch": 1.381496, | |
| "grad_norm": 0.9715979695320129, | |
| "learning_rate": 1.2487550200803215e-06, | |
| "loss": 0.1149, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 1.382296, | |
| "grad_norm": 2.0711517333984375, | |
| "learning_rate": 1.2407228915662652e-06, | |
| "loss": 0.1196, | |
| "step": 109600 | |
| }, | |
| { | |
| "epoch": 1.383096, | |
| "grad_norm": 3.178311824798584, | |
| "learning_rate": 1.232690763052209e-06, | |
| "loss": 0.1116, | |
| "step": 109700 | |
| }, | |
| { | |
| "epoch": 1.383896, | |
| "grad_norm": 4.997854232788086, | |
| "learning_rate": 1.2246586345381526e-06, | |
| "loss": 0.1215, | |
| "step": 109800 | |
| }, | |
| { | |
| "epoch": 1.384696, | |
| "grad_norm": 4.431860446929932, | |
| "learning_rate": 1.2166265060240963e-06, | |
| "loss": 0.1129, | |
| "step": 109900 | |
| }, | |
| { | |
| "epoch": 1.385496, | |
| "grad_norm": 4.320796966552734, | |
| "learning_rate": 1.2085943775100402e-06, | |
| "loss": 0.1375, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 1.386296, | |
| "grad_norm": 7.0986552238464355, | |
| "learning_rate": 1.2005622489959842e-06, | |
| "loss": 0.1051, | |
| "step": 110100 | |
| }, | |
| { | |
| "epoch": 1.387096, | |
| "grad_norm": 2.893299102783203, | |
| "learning_rate": 1.1925301204819279e-06, | |
| "loss": 0.1133, | |
| "step": 110200 | |
| }, | |
| { | |
| "epoch": 1.387896, | |
| "grad_norm": 1.8434147834777832, | |
| "learning_rate": 1.1844979919678716e-06, | |
| "loss": 0.1099, | |
| "step": 110300 | |
| }, | |
| { | |
| "epoch": 1.388696, | |
| "grad_norm": 3.1360116004943848, | |
| "learning_rate": 1.1764658634538153e-06, | |
| "loss": 0.1276, | |
| "step": 110400 | |
| }, | |
| { | |
| "epoch": 1.389496, | |
| "grad_norm": 2.8671581745147705, | |
| "learning_rate": 1.1684337349397592e-06, | |
| "loss": 0.1113, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 1.390296, | |
| "grad_norm": 8.354147911071777, | |
| "learning_rate": 1.160401606425703e-06, | |
| "loss": 0.1085, | |
| "step": 110600 | |
| }, | |
| { | |
| "epoch": 1.391096, | |
| "grad_norm": 14.839973449707031, | |
| "learning_rate": 1.1523694779116466e-06, | |
| "loss": 0.1096, | |
| "step": 110700 | |
| }, | |
| { | |
| "epoch": 1.391896, | |
| "grad_norm": 3.917928695678711, | |
| "learning_rate": 1.1443373493975903e-06, | |
| "loss": 0.1219, | |
| "step": 110800 | |
| }, | |
| { | |
| "epoch": 1.392696, | |
| "grad_norm": 6.117668151855469, | |
| "learning_rate": 1.1363052208835343e-06, | |
| "loss": 0.1115, | |
| "step": 110900 | |
| }, | |
| { | |
| "epoch": 1.393496, | |
| "grad_norm": 1.9348158836364746, | |
| "learning_rate": 1.128273092369478e-06, | |
| "loss": 0.1219, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 1.394296, | |
| "grad_norm": 5.231213569641113, | |
| "learning_rate": 1.1202409638554219e-06, | |
| "loss": 0.1178, | |
| "step": 111100 | |
| }, | |
| { | |
| "epoch": 1.3950960000000001, | |
| "grad_norm": 5.860185623168945, | |
| "learning_rate": 1.1122088353413656e-06, | |
| "loss": 0.1136, | |
| "step": 111200 | |
| }, | |
| { | |
| "epoch": 1.395896, | |
| "grad_norm": 4.386104106903076, | |
| "learning_rate": 1.10425702811245e-06, | |
| "loss": 0.1159, | |
| "step": 111300 | |
| }, | |
| { | |
| "epoch": 1.396696, | |
| "grad_norm": 1.6194918155670166, | |
| "learning_rate": 1.0962248995983938e-06, | |
| "loss": 0.1187, | |
| "step": 111400 | |
| }, | |
| { | |
| "epoch": 1.397496, | |
| "grad_norm": 5.940235137939453, | |
| "learning_rate": 1.0881927710843375e-06, | |
| "loss": 0.1157, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 1.398296, | |
| "grad_norm": 4.942705154418945, | |
| "learning_rate": 1.0801606425702812e-06, | |
| "loss": 0.1005, | |
| "step": 111600 | |
| }, | |
| { | |
| "epoch": 1.3990960000000001, | |
| "grad_norm": 5.09475564956665, | |
| "learning_rate": 1.072128514056225e-06, | |
| "loss": 0.1204, | |
| "step": 111700 | |
| }, | |
| { | |
| "epoch": 1.399896, | |
| "grad_norm": 5.27365255355835, | |
| "learning_rate": 1.0640963855421687e-06, | |
| "loss": 0.127, | |
| "step": 111800 | |
| }, | |
| { | |
| "epoch": 1.400696, | |
| "grad_norm": 6.454215049743652, | |
| "learning_rate": 1.0560642570281126e-06, | |
| "loss": 0.1025, | |
| "step": 111900 | |
| }, | |
| { | |
| "epoch": 1.401496, | |
| "grad_norm": 5.676671028137207, | |
| "learning_rate": 1.0480321285140563e-06, | |
| "loss": 0.1141, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 1.401496, | |
| "eval_test1_cer": 0.04145660616924341, | |
| "eval_test1_cer_norm": 0.02695992508463995, | |
| "eval_test1_loss": 0.17103232443332672, | |
| "eval_test1_runtime": 2490.4053, | |
| "eval_test1_samples_per_second": 1.004, | |
| "eval_test1_steps_per_second": 0.251, | |
| "eval_test1_wer": 0.12912912912912913, | |
| "eval_test1_wer_norm": 0.07016569741956223, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 1.401496, | |
| "eval_test2_cer": 0.08579254862433269, | |
| "eval_test2_cer_norm": 0.06814572358227455, | |
| "eval_test2_loss": 0.3084600865840912, | |
| "eval_test2_runtime": 2471.9908, | |
| "eval_test2_samples_per_second": 1.011, | |
| "eval_test2_steps_per_second": 0.253, | |
| "eval_test2_wer": 0.19641222247653925, | |
| "eval_test2_wer_norm": 0.13070135228054092, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 1.402296, | |
| "grad_norm": 2.858332633972168, | |
| "learning_rate": 1.04e-06, | |
| "loss": 0.1044, | |
| "step": 112100 | |
| }, | |
| { | |
| "epoch": 1.4030960000000001, | |
| "grad_norm": 5.488587379455566, | |
| "learning_rate": 1.0319678714859437e-06, | |
| "loss": 0.1366, | |
| "step": 112200 | |
| }, | |
| { | |
| "epoch": 1.403896, | |
| "grad_norm": 13.426318168640137, | |
| "learning_rate": 1.0239357429718876e-06, | |
| "loss": 0.1186, | |
| "step": 112300 | |
| }, | |
| { | |
| "epoch": 1.404696, | |
| "grad_norm": 4.668258190155029, | |
| "learning_rate": 1.0159036144578315e-06, | |
| "loss": 0.1056, | |
| "step": 112400 | |
| }, | |
| { | |
| "epoch": 1.405496, | |
| "grad_norm": 3.497905969619751, | |
| "learning_rate": 1.0078714859437753e-06, | |
| "loss": 0.1168, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 1.406296, | |
| "grad_norm": 5.060064315795898, | |
| "learning_rate": 9.99839357429719e-07, | |
| "loss": 0.1167, | |
| "step": 112600 | |
| }, | |
| { | |
| "epoch": 1.4070960000000001, | |
| "grad_norm": 1.985062837600708, | |
| "learning_rate": 9.918072289156627e-07, | |
| "loss": 0.1234, | |
| "step": 112700 | |
| }, | |
| { | |
| "epoch": 1.407896, | |
| "grad_norm": 3.2145867347717285, | |
| "learning_rate": 9.837751004016064e-07, | |
| "loss": 0.1231, | |
| "step": 112800 | |
| }, | |
| { | |
| "epoch": 1.408696, | |
| "grad_norm": 6.194746971130371, | |
| "learning_rate": 9.757429718875503e-07, | |
| "loss": 0.1038, | |
| "step": 112900 | |
| }, | |
| { | |
| "epoch": 1.409496, | |
| "grad_norm": 4.408815860748291, | |
| "learning_rate": 9.67710843373494e-07, | |
| "loss": 0.1157, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 1.410296, | |
| "grad_norm": 4.587121486663818, | |
| "learning_rate": 9.59678714859438e-07, | |
| "loss": 0.1163, | |
| "step": 113100 | |
| }, | |
| { | |
| "epoch": 1.4110960000000001, | |
| "grad_norm": 4.01561975479126, | |
| "learning_rate": 9.516465863453816e-07, | |
| "loss": 0.1077, | |
| "step": 113200 | |
| }, | |
| { | |
| "epoch": 1.411896, | |
| "grad_norm": 4.253355026245117, | |
| "learning_rate": 9.436144578313254e-07, | |
| "loss": 0.1209, | |
| "step": 113300 | |
| }, | |
| { | |
| "epoch": 1.412696, | |
| "grad_norm": 1.5479423999786377, | |
| "learning_rate": 9.355823293172692e-07, | |
| "loss": 0.1378, | |
| "step": 113400 | |
| }, | |
| { | |
| "epoch": 1.4134959999999999, | |
| "grad_norm": 4.244668960571289, | |
| "learning_rate": 9.27550200803213e-07, | |
| "loss": 0.1381, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 1.414296, | |
| "grad_norm": 4.061355113983154, | |
| "learning_rate": 9.195180722891567e-07, | |
| "loss": 0.1283, | |
| "step": 113600 | |
| }, | |
| { | |
| "epoch": 1.4150960000000001, | |
| "grad_norm": 8.999344825744629, | |
| "learning_rate": 9.114859437751005e-07, | |
| "loss": 0.1291, | |
| "step": 113700 | |
| }, | |
| { | |
| "epoch": 1.415896, | |
| "grad_norm": 3.263817310333252, | |
| "learning_rate": 9.034538152610442e-07, | |
| "loss": 0.119, | |
| "step": 113800 | |
| }, | |
| { | |
| "epoch": 1.416696, | |
| "grad_norm": 5.394161224365234, | |
| "learning_rate": 8.954216867469879e-07, | |
| "loss": 0.1218, | |
| "step": 113900 | |
| }, | |
| { | |
| "epoch": 1.4174959999999999, | |
| "grad_norm": 11.564488410949707, | |
| "learning_rate": 8.873895582329318e-07, | |
| "loss": 0.1222, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 1.418296, | |
| "grad_norm": 3.848464250564575, | |
| "learning_rate": 8.793574297188756e-07, | |
| "loss": 0.1175, | |
| "step": 114100 | |
| }, | |
| { | |
| "epoch": 1.4190960000000001, | |
| "grad_norm": 3.4740781784057617, | |
| "learning_rate": 8.713253012048194e-07, | |
| "loss": 0.1267, | |
| "step": 114200 | |
| }, | |
| { | |
| "epoch": 1.419896, | |
| "grad_norm": 3.503969192504883, | |
| "learning_rate": 8.632931726907632e-07, | |
| "loss": 0.1333, | |
| "step": 114300 | |
| }, | |
| { | |
| "epoch": 1.420696, | |
| "grad_norm": 3.658344268798828, | |
| "learning_rate": 8.552610441767069e-07, | |
| "loss": 0.1137, | |
| "step": 114400 | |
| }, | |
| { | |
| "epoch": 1.4214959999999999, | |
| "grad_norm": 5.1479973793029785, | |
| "learning_rate": 8.472289156626507e-07, | |
| "loss": 0.1254, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 1.422296, | |
| "grad_norm": 5.20347261428833, | |
| "learning_rate": 8.391967871485944e-07, | |
| "loss": 0.1167, | |
| "step": 114600 | |
| }, | |
| { | |
| "epoch": 1.4230960000000001, | |
| "grad_norm": 5.197832107543945, | |
| "learning_rate": 8.311646586345381e-07, | |
| "loss": 0.1297, | |
| "step": 114700 | |
| }, | |
| { | |
| "epoch": 1.423896, | |
| "grad_norm": 2.3857178688049316, | |
| "learning_rate": 8.23132530120482e-07, | |
| "loss": 0.1173, | |
| "step": 114800 | |
| }, | |
| { | |
| "epoch": 1.424696, | |
| "grad_norm": 5.162271976470947, | |
| "learning_rate": 8.151004016064258e-07, | |
| "loss": 0.1089, | |
| "step": 114900 | |
| }, | |
| { | |
| "epoch": 1.4254959999999999, | |
| "grad_norm": 4.035723686218262, | |
| "learning_rate": 8.070682730923695e-07, | |
| "loss": 0.1186, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 1.426296, | |
| "grad_norm": 7.1610283851623535, | |
| "learning_rate": 7.991164658634539e-07, | |
| "loss": 0.1196, | |
| "step": 115100 | |
| }, | |
| { | |
| "epoch": 1.427096, | |
| "grad_norm": 6.167757987976074, | |
| "learning_rate": 7.910843373493976e-07, | |
| "loss": 0.112, | |
| "step": 115200 | |
| }, | |
| { | |
| "epoch": 1.427896, | |
| "grad_norm": 8.109100341796875, | |
| "learning_rate": 7.830522088353415e-07, | |
| "loss": 0.1092, | |
| "step": 115300 | |
| }, | |
| { | |
| "epoch": 1.428696, | |
| "grad_norm": 2.2968735694885254, | |
| "learning_rate": 7.750200803212853e-07, | |
| "loss": 0.1058, | |
| "step": 115400 | |
| }, | |
| { | |
| "epoch": 1.4294959999999999, | |
| "grad_norm": 4.020580768585205, | |
| "learning_rate": 7.66987951807229e-07, | |
| "loss": 0.1089, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 1.430296, | |
| "grad_norm": 1.7853455543518066, | |
| "learning_rate": 7.589558232931728e-07, | |
| "loss": 0.1077, | |
| "step": 115600 | |
| }, | |
| { | |
| "epoch": 1.431096, | |
| "grad_norm": 2.6213812828063965, | |
| "learning_rate": 7.509236947791165e-07, | |
| "loss": 0.114, | |
| "step": 115700 | |
| }, | |
| { | |
| "epoch": 1.431896, | |
| "grad_norm": 7.169987678527832, | |
| "learning_rate": 7.428915662650602e-07, | |
| "loss": 0.1247, | |
| "step": 115800 | |
| }, | |
| { | |
| "epoch": 1.432696, | |
| "grad_norm": 5.280975818634033, | |
| "learning_rate": 7.348594377510041e-07, | |
| "loss": 0.1117, | |
| "step": 115900 | |
| }, | |
| { | |
| "epoch": 1.4334959999999999, | |
| "grad_norm": 7.396920204162598, | |
| "learning_rate": 7.268273092369478e-07, | |
| "loss": 0.1218, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 1.4334959999999999, | |
| "eval_test1_cer": 0.04053700799163493, | |
| "eval_test1_cer_norm": 0.02625399188416933, | |
| "eval_test1_loss": 0.16978052258491516, | |
| "eval_test1_runtime": 2459.5813, | |
| "eval_test1_samples_per_second": 1.016, | |
| "eval_test1_steps_per_second": 0.254, | |
| "eval_test1_wer": 0.12650514592262166, | |
| "eval_test1_wer_norm": 0.06905520324965662, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 1.4334959999999999, | |
| "eval_test2_cer": 0.0906269832381379, | |
| "eval_test2_cer_norm": 0.07077006507592191, | |
| "eval_test2_loss": 0.30657845735549927, | |
| "eval_test2_runtime": 2479.2829, | |
| "eval_test2_samples_per_second": 1.008, | |
| "eval_test2_steps_per_second": 0.252, | |
| "eval_test2_wer": 0.20224879835202564, | |
| "eval_test2_wer_norm": 0.13654595461838184, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 1.434296, | |
| "grad_norm": 7.435978412628174, | |
| "learning_rate": 7.187951807228916e-07, | |
| "loss": 0.1312, | |
| "step": 116100 | |
| }, | |
| { | |
| "epoch": 1.435096, | |
| "grad_norm": 6.736993789672852, | |
| "learning_rate": 7.107630522088355e-07, | |
| "loss": 0.1245, | |
| "step": 116200 | |
| }, | |
| { | |
| "epoch": 1.435896, | |
| "grad_norm": 3.6509387493133545, | |
| "learning_rate": 7.027309236947792e-07, | |
| "loss": 0.1041, | |
| "step": 116300 | |
| }, | |
| { | |
| "epoch": 1.436696, | |
| "grad_norm": 5.3854851722717285, | |
| "learning_rate": 6.94698795180723e-07, | |
| "loss": 0.126, | |
| "step": 116400 | |
| }, | |
| { | |
| "epoch": 1.4374959999999999, | |
| "grad_norm": 5.283120155334473, | |
| "learning_rate": 6.866666666666667e-07, | |
| "loss": 0.1615, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 1.438296, | |
| "grad_norm": 3.9473323822021484, | |
| "learning_rate": 6.786345381526105e-07, | |
| "loss": 0.1218, | |
| "step": 116600 | |
| }, | |
| { | |
| "epoch": 1.439096, | |
| "grad_norm": 11.495280265808105, | |
| "learning_rate": 6.706024096385542e-07, | |
| "loss": 0.1165, | |
| "step": 116700 | |
| }, | |
| { | |
| "epoch": 1.439896, | |
| "grad_norm": 6.106124401092529, | |
| "learning_rate": 6.62570281124498e-07, | |
| "loss": 0.1291, | |
| "step": 116800 | |
| }, | |
| { | |
| "epoch": 1.440696, | |
| "grad_norm": 3.252533435821533, | |
| "learning_rate": 6.545381526104418e-07, | |
| "loss": 0.1302, | |
| "step": 116900 | |
| }, | |
| { | |
| "epoch": 1.4414959999999999, | |
| "grad_norm": 4.417702674865723, | |
| "learning_rate": 6.465060240963857e-07, | |
| "loss": 0.1185, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 1.442296, | |
| "grad_norm": 7.446925163269043, | |
| "learning_rate": 6.384738955823294e-07, | |
| "loss": 0.1181, | |
| "step": 117100 | |
| }, | |
| { | |
| "epoch": 1.443096, | |
| "grad_norm": 4.945169448852539, | |
| "learning_rate": 6.304417670682732e-07, | |
| "loss": 0.1146, | |
| "step": 117200 | |
| }, | |
| { | |
| "epoch": 1.443896, | |
| "grad_norm": 4.255674362182617, | |
| "learning_rate": 6.224096385542169e-07, | |
| "loss": 0.1149, | |
| "step": 117300 | |
| }, | |
| { | |
| "epoch": 1.444696, | |
| "grad_norm": 7.243592739105225, | |
| "learning_rate": 6.143775100401607e-07, | |
| "loss": 0.1266, | |
| "step": 117400 | |
| }, | |
| { | |
| "epoch": 1.445496, | |
| "grad_norm": 4.7151265144348145, | |
| "learning_rate": 6.063453815261044e-07, | |
| "loss": 0.1228, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 1.446296, | |
| "grad_norm": 3.492438793182373, | |
| "learning_rate": 5.983132530120483e-07, | |
| "loss": 0.107, | |
| "step": 117600 | |
| }, | |
| { | |
| "epoch": 1.447096, | |
| "grad_norm": 5.758606910705566, | |
| "learning_rate": 5.902811244979921e-07, | |
| "loss": 0.1104, | |
| "step": 117700 | |
| }, | |
| { | |
| "epoch": 1.447896, | |
| "grad_norm": 5.847075462341309, | |
| "learning_rate": 5.822489959839358e-07, | |
| "loss": 0.1234, | |
| "step": 117800 | |
| }, | |
| { | |
| "epoch": 1.448696, | |
| "grad_norm": 4.088864803314209, | |
| "learning_rate": 5.742168674698796e-07, | |
| "loss": 0.1247, | |
| "step": 117900 | |
| }, | |
| { | |
| "epoch": 1.449496, | |
| "grad_norm": 2.7524282932281494, | |
| "learning_rate": 5.661847389558233e-07, | |
| "loss": 0.0971, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 1.450296, | |
| "grad_norm": 6.989465236663818, | |
| "learning_rate": 5.581526104417671e-07, | |
| "loss": 0.126, | |
| "step": 118100 | |
| }, | |
| { | |
| "epoch": 1.451096, | |
| "grad_norm": 9.567557334899902, | |
| "learning_rate": 5.501204819277109e-07, | |
| "loss": 0.1191, | |
| "step": 118200 | |
| }, | |
| { | |
| "epoch": 1.451896, | |
| "grad_norm": 10.12435531616211, | |
| "learning_rate": 5.420883534136546e-07, | |
| "loss": 0.1096, | |
| "step": 118300 | |
| }, | |
| { | |
| "epoch": 1.452696, | |
| "grad_norm": 1.7563780546188354, | |
| "learning_rate": 5.340562248995985e-07, | |
| "loss": 0.1197, | |
| "step": 118400 | |
| }, | |
| { | |
| "epoch": 1.453496, | |
| "grad_norm": 5.218570232391357, | |
| "learning_rate": 5.260240963855423e-07, | |
| "loss": 0.1207, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 1.454296, | |
| "grad_norm": 4.665302753448486, | |
| "learning_rate": 5.17991967871486e-07, | |
| "loss": 0.115, | |
| "step": 118600 | |
| }, | |
| { | |
| "epoch": 1.455096, | |
| "grad_norm": 5.446451663970947, | |
| "learning_rate": 5.099598393574298e-07, | |
| "loss": 0.119, | |
| "step": 118700 | |
| }, | |
| { | |
| "epoch": 1.455896, | |
| "grad_norm": 4.688202381134033, | |
| "learning_rate": 5.020080321285141e-07, | |
| "loss": 0.1369, | |
| "step": 118800 | |
| }, | |
| { | |
| "epoch": 1.456696, | |
| "grad_norm": 3.5095245838165283, | |
| "learning_rate": 4.939759036144578e-07, | |
| "loss": 0.1094, | |
| "step": 118900 | |
| }, | |
| { | |
| "epoch": 1.457496, | |
| "grad_norm": 3.919813394546509, | |
| "learning_rate": 4.859437751004017e-07, | |
| "loss": 0.1113, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 1.458296, | |
| "grad_norm": 3.1427431106567383, | |
| "learning_rate": 4.779116465863454e-07, | |
| "loss": 0.1127, | |
| "step": 119100 | |
| }, | |
| { | |
| "epoch": 1.459096, | |
| "grad_norm": 5.548600673675537, | |
| "learning_rate": 4.698795180722892e-07, | |
| "loss": 0.1175, | |
| "step": 119200 | |
| }, | |
| { | |
| "epoch": 1.459896, | |
| "grad_norm": 3.4401326179504395, | |
| "learning_rate": 4.6184738955823296e-07, | |
| "loss": 0.1118, | |
| "step": 119300 | |
| }, | |
| { | |
| "epoch": 1.460696, | |
| "grad_norm": 0.7513042688369751, | |
| "learning_rate": 4.538152610441767e-07, | |
| "loss": 0.1227, | |
| "step": 119400 | |
| }, | |
| { | |
| "epoch": 1.461496, | |
| "grad_norm": 4.836828708648682, | |
| "learning_rate": 4.4578313253012054e-07, | |
| "loss": 0.1208, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 1.462296, | |
| "grad_norm": 9.92773723602295, | |
| "learning_rate": 4.377510040160643e-07, | |
| "loss": 0.1021, | |
| "step": 119600 | |
| }, | |
| { | |
| "epoch": 1.463096, | |
| "grad_norm": 4.6646575927734375, | |
| "learning_rate": 4.2971887550200806e-07, | |
| "loss": 0.1142, | |
| "step": 119700 | |
| }, | |
| { | |
| "epoch": 1.463896, | |
| "grad_norm": 0.4946214258670807, | |
| "learning_rate": 4.216867469879518e-07, | |
| "loss": 0.1334, | |
| "step": 119800 | |
| }, | |
| { | |
| "epoch": 1.464696, | |
| "grad_norm": 7.3409247398376465, | |
| "learning_rate": 4.1365461847389564e-07, | |
| "loss": 0.1387, | |
| "step": 119900 | |
| }, | |
| { | |
| "epoch": 1.465496, | |
| "grad_norm": 2.3244385719299316, | |
| "learning_rate": 4.056224899598394e-07, | |
| "loss": 0.1216, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 1.465496, | |
| "eval_test1_cer": 0.04472888191799238, | |
| "eval_test1_cer_norm": 0.029865296419910196, | |
| "eval_test1_loss": 0.16931886970996857, | |
| "eval_test1_runtime": 2461.9854, | |
| "eval_test1_samples_per_second": 1.015, | |
| "eval_test1_steps_per_second": 0.254, | |
| "eval_test1_wer": 0.13493104755240679, | |
| "eval_test1_wer_norm": 0.076477980069552, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 1.465496, | |
| "eval_test2_cer": 0.09805129353791019, | |
| "eval_test2_cer_norm": 0.07766985590331578, | |
| "eval_test2_loss": 0.3047462999820709, | |
| "eval_test2_runtime": 2488.9087, | |
| "eval_test2_samples_per_second": 1.004, | |
| "eval_test2_steps_per_second": 0.251, | |
| "eval_test2_wer": 0.21137560082398718, | |
| "eval_test2_wer_norm": 0.1462296584918634, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 1.466296, | |
| "grad_norm": 5.934924602508545, | |
| "learning_rate": 3.9759036144578316e-07, | |
| "loss": 0.1166, | |
| "step": 120100 | |
| }, | |
| { | |
| "epoch": 1.467096, | |
| "grad_norm": 6.291118621826172, | |
| "learning_rate": 3.895582329317269e-07, | |
| "loss": 0.1144, | |
| "step": 120200 | |
| }, | |
| { | |
| "epoch": 1.467896, | |
| "grad_norm": 7.283012866973877, | |
| "learning_rate": 3.8152610441767073e-07, | |
| "loss": 0.1258, | |
| "step": 120300 | |
| }, | |
| { | |
| "epoch": 1.468696, | |
| "grad_norm": 2.082486391067505, | |
| "learning_rate": 3.734939759036145e-07, | |
| "loss": 0.1209, | |
| "step": 120400 | |
| }, | |
| { | |
| "epoch": 1.469496, | |
| "grad_norm": 7.8560967445373535, | |
| "learning_rate": 3.6546184738955826e-07, | |
| "loss": 0.115, | |
| "step": 120500 | |
| }, | |
| { | |
| "epoch": 1.470296, | |
| "grad_norm": 6.016510963439941, | |
| "learning_rate": 3.57429718875502e-07, | |
| "loss": 0.1082, | |
| "step": 120600 | |
| }, | |
| { | |
| "epoch": 1.471096, | |
| "grad_norm": 2.1153485774993896, | |
| "learning_rate": 3.4939759036144583e-07, | |
| "loss": 0.1286, | |
| "step": 120700 | |
| }, | |
| { | |
| "epoch": 1.471896, | |
| "grad_norm": 5.02449369430542, | |
| "learning_rate": 3.413654618473896e-07, | |
| "loss": 0.111, | |
| "step": 120800 | |
| }, | |
| { | |
| "epoch": 1.472696, | |
| "grad_norm": 2.6848514080047607, | |
| "learning_rate": 3.3333333333333335e-07, | |
| "loss": 0.1161, | |
| "step": 120900 | |
| }, | |
| { | |
| "epoch": 1.473496, | |
| "grad_norm": 3.306321620941162, | |
| "learning_rate": 3.253012048192771e-07, | |
| "loss": 0.1164, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 1.474296, | |
| "grad_norm": 5.202853202819824, | |
| "learning_rate": 3.1726907630522093e-07, | |
| "loss": 0.1148, | |
| "step": 121100 | |
| }, | |
| { | |
| "epoch": 1.475096, | |
| "grad_norm": 6.273738861083984, | |
| "learning_rate": 3.092369477911647e-07, | |
| "loss": 0.1257, | |
| "step": 121200 | |
| }, | |
| { | |
| "epoch": 1.475896, | |
| "grad_norm": 1.6317389011383057, | |
| "learning_rate": 3.01285140562249e-07, | |
| "loss": 0.1169, | |
| "step": 121300 | |
| }, | |
| { | |
| "epoch": 1.476696, | |
| "grad_norm": 3.1588313579559326, | |
| "learning_rate": 2.932530120481928e-07, | |
| "loss": 0.1243, | |
| "step": 121400 | |
| }, | |
| { | |
| "epoch": 1.477496, | |
| "grad_norm": 3.1574482917785645, | |
| "learning_rate": 2.852208835341366e-07, | |
| "loss": 0.1254, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 1.478296, | |
| "grad_norm": 3.313225507736206, | |
| "learning_rate": 2.7718875502008034e-07, | |
| "loss": 0.1196, | |
| "step": 121600 | |
| }, | |
| { | |
| "epoch": 1.479096, | |
| "grad_norm": 3.645125389099121, | |
| "learning_rate": 2.691566265060241e-07, | |
| "loss": 0.0956, | |
| "step": 121700 | |
| }, | |
| { | |
| "epoch": 1.479896, | |
| "grad_norm": 0.8086249232292175, | |
| "learning_rate": 2.611244979919679e-07, | |
| "loss": 0.1171, | |
| "step": 121800 | |
| }, | |
| { | |
| "epoch": 1.480696, | |
| "grad_norm": 4.813057899475098, | |
| "learning_rate": 2.530923694779117e-07, | |
| "loss": 0.1254, | |
| "step": 121900 | |
| }, | |
| { | |
| "epoch": 1.481496, | |
| "grad_norm": 3.0106194019317627, | |
| "learning_rate": 2.4506024096385544e-07, | |
| "loss": 0.109, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 1.482296, | |
| "grad_norm": 3.1917545795440674, | |
| "learning_rate": 2.370281124497992e-07, | |
| "loss": 0.1082, | |
| "step": 122100 | |
| }, | |
| { | |
| "epoch": 1.483096, | |
| "grad_norm": 8.249537467956543, | |
| "learning_rate": 2.28995983935743e-07, | |
| "loss": 0.1334, | |
| "step": 122200 | |
| }, | |
| { | |
| "epoch": 1.483896, | |
| "grad_norm": 2.7427356243133545, | |
| "learning_rate": 2.2096385542168677e-07, | |
| "loss": 0.1209, | |
| "step": 122300 | |
| }, | |
| { | |
| "epoch": 1.484696, | |
| "grad_norm": 2.4920859336853027, | |
| "learning_rate": 2.1293172690763056e-07, | |
| "loss": 0.1041, | |
| "step": 122400 | |
| }, | |
| { | |
| "epoch": 1.485496, | |
| "grad_norm": 5.013920307159424, | |
| "learning_rate": 2.0489959839357432e-07, | |
| "loss": 0.1114, | |
| "step": 122500 | |
| }, | |
| { | |
| "epoch": 1.486296, | |
| "grad_norm": 0.5811383128166199, | |
| "learning_rate": 1.968674698795181e-07, | |
| "loss": 0.1228, | |
| "step": 122600 | |
| }, | |
| { | |
| "epoch": 1.487096, | |
| "grad_norm": 5.773433685302734, | |
| "learning_rate": 1.8883534136546187e-07, | |
| "loss": 0.1284, | |
| "step": 122700 | |
| }, | |
| { | |
| "epoch": 1.487896, | |
| "grad_norm": 3.8643128871917725, | |
| "learning_rate": 1.8080321285140566e-07, | |
| "loss": 0.1159, | |
| "step": 122800 | |
| }, | |
| { | |
| "epoch": 1.488696, | |
| "grad_norm": 5.357062816619873, | |
| "learning_rate": 1.7277108433734942e-07, | |
| "loss": 0.1225, | |
| "step": 122900 | |
| }, | |
| { | |
| "epoch": 1.489496, | |
| "grad_norm": 4.911788463592529, | |
| "learning_rate": 1.647389558232932e-07, | |
| "loss": 0.1116, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 1.490296, | |
| "grad_norm": 4.041905403137207, | |
| "learning_rate": 1.5670682730923697e-07, | |
| "loss": 0.1236, | |
| "step": 123100 | |
| }, | |
| { | |
| "epoch": 1.491096, | |
| "grad_norm": 5.671024799346924, | |
| "learning_rate": 1.4867469879518073e-07, | |
| "loss": 0.1168, | |
| "step": 123200 | |
| }, | |
| { | |
| "epoch": 1.491896, | |
| "grad_norm": 8.380229949951172, | |
| "learning_rate": 1.4064257028112452e-07, | |
| "loss": 0.1192, | |
| "step": 123300 | |
| }, | |
| { | |
| "epoch": 1.492696, | |
| "grad_norm": 8.371071815490723, | |
| "learning_rate": 1.3261044176706828e-07, | |
| "loss": 0.102, | |
| "step": 123400 | |
| }, | |
| { | |
| "epoch": 1.493496, | |
| "grad_norm": 4.045628547668457, | |
| "learning_rate": 1.2457831325301207e-07, | |
| "loss": 0.1177, | |
| "step": 123500 | |
| }, | |
| { | |
| "epoch": 1.494296, | |
| "grad_norm": 7.542957782745361, | |
| "learning_rate": 1.1654618473895584e-07, | |
| "loss": 0.108, | |
| "step": 123600 | |
| }, | |
| { | |
| "epoch": 2.000592, | |
| "grad_norm": 5.552263259887695, | |
| "learning_rate": 1.0851405622489961e-07, | |
| "loss": 0.1093, | |
| "step": 123700 | |
| }, | |
| { | |
| "epoch": 2.001392, | |
| "grad_norm": 4.822888374328613, | |
| "learning_rate": 1.0048192771084339e-07, | |
| "loss": 0.1042, | |
| "step": 123800 | |
| }, | |
| { | |
| "epoch": 2.002192, | |
| "grad_norm": 0.32907435297966003, | |
| "learning_rate": 9.244979919678716e-08, | |
| "loss": 0.1014, | |
| "step": 123900 | |
| }, | |
| { | |
| "epoch": 2.002992, | |
| "grad_norm": 5.526859760284424, | |
| "learning_rate": 8.441767068273094e-08, | |
| "loss": 0.0819, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 2.002992, | |
| "eval_test1_cer": 0.04037829561580402, | |
| "eval_test1_cer_norm": 0.026181957884121306, | |
| "eval_test1_loss": 0.1684691458940506, | |
| "eval_test1_runtime": 2457.6805, | |
| "eval_test1_samples_per_second": 1.017, | |
| "eval_test1_steps_per_second": 0.254, | |
| "eval_test1_wer": 0.1262135922330097, | |
| "eval_test1_wer_norm": 0.06817849732604693, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 2.002992, | |
| "eval_test2_cer": 0.09239089857020197, | |
| "eval_test2_cer_norm": 0.07192245119305857, | |
| "eval_test2_loss": 0.30393916368484497, | |
| "eval_test2_runtime": 2485.4178, | |
| "eval_test2_samples_per_second": 1.006, | |
| "eval_test2_steps_per_second": 0.251, | |
| "eval_test2_wer": 0.20482375829709315, | |
| "eval_test2_wer_norm": 0.13981205592482238, | |
| "step": 124000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 125000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 9223372036854775807, | |
| "save_steps": 4000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.062139105004749e+20, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |