| { | |
| "best_metric": 21.95886308189698, | |
| "best_model_checkpoint": "./whisper-tiny-lv/checkpoint-91000", | |
| "epoch": 41.91616766467066, | |
| "eval_steps": 1000, | |
| "global_step": 91000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 6.921171188354492, | |
| "learning_rate": 4.9000000000000005e-06, | |
| "loss": 1.8377, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 6.424713611602783, | |
| "learning_rate": 9.9e-06, | |
| "loss": 0.7472, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 6.457923889160156, | |
| "learning_rate": 9.988688827331488e-06, | |
| "loss": 0.5694, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 5.4181013107299805, | |
| "learning_rate": 9.977146814404432e-06, | |
| "loss": 0.481, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_loss": 0.511210560798645, | |
| "eval_runtime": 658.2418, | |
| "eval_samples_per_second": 10.258, | |
| "eval_steps_per_second": 0.321, | |
| "eval_wer": 51.379495011603225, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 5.981955528259277, | |
| "learning_rate": 9.965604801477378e-06, | |
| "loss": 0.4294, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 6.043950080871582, | |
| "learning_rate": 9.954062788550324e-06, | |
| "loss": 0.3919, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 5.050214767456055, | |
| "learning_rate": 9.94252077562327e-06, | |
| "loss": 0.3599, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 5.506693363189697, | |
| "learning_rate": 9.930978762696215e-06, | |
| "loss": 0.3399, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "eval_loss": 0.39190948009490967, | |
| "eval_runtime": 666.4927, | |
| "eval_samples_per_second": 10.131, | |
| "eval_steps_per_second": 0.317, | |
| "eval_wer": 42.10881250371899, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 4.966064929962158, | |
| "learning_rate": 9.91943674976916e-06, | |
| "loss": 0.3048, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "grad_norm": 4.654673099517822, | |
| "learning_rate": 9.907894736842107e-06, | |
| "loss": 0.2753, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "grad_norm": 4.946408271789551, | |
| "learning_rate": 9.896352723915051e-06, | |
| "loss": 0.2609, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "grad_norm": 4.584148406982422, | |
| "learning_rate": 9.884810710987997e-06, | |
| "loss": 0.2539, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "eval_loss": 0.33731845021247864, | |
| "eval_runtime": 657.5879, | |
| "eval_samples_per_second": 10.268, | |
| "eval_steps_per_second": 0.321, | |
| "eval_wer": 38.419580696987126, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 4.824137210845947, | |
| "learning_rate": 9.873268698060943e-06, | |
| "loss": 0.2474, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "grad_norm": 4.751330852508545, | |
| "learning_rate": 9.861726685133887e-06, | |
| "loss": 0.2374, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "grad_norm": 5.100882530212402, | |
| "learning_rate": 9.850184672206833e-06, | |
| "loss": 0.2293, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "grad_norm": 4.561028480529785, | |
| "learning_rate": 9.83864265927978e-06, | |
| "loss": 0.2252, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "eval_loss": 0.30130764842033386, | |
| "eval_runtime": 668.7558, | |
| "eval_samples_per_second": 10.096, | |
| "eval_steps_per_second": 0.316, | |
| "eval_wer": 35.210345716722536, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 4.661614418029785, | |
| "learning_rate": 9.827100646352725e-06, | |
| "loss": 0.2127, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "grad_norm": 4.2613959312438965, | |
| "learning_rate": 9.81555863342567e-06, | |
| "loss": 0.1886, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "grad_norm": 4.164435386657715, | |
| "learning_rate": 9.804016620498615e-06, | |
| "loss": 0.1725, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "grad_norm": 4.097248077392578, | |
| "learning_rate": 9.792474607571561e-06, | |
| "loss": 0.1715, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "eval_loss": 0.283372163772583, | |
| "eval_runtime": 661.6835, | |
| "eval_samples_per_second": 10.204, | |
| "eval_steps_per_second": 0.319, | |
| "eval_wer": 33.31415990638079, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "grad_norm": 4.298040866851807, | |
| "learning_rate": 9.780932594644506e-06, | |
| "loss": 0.1657, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "grad_norm": 4.593989849090576, | |
| "learning_rate": 9.769390581717453e-06, | |
| "loss": 0.1637, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "grad_norm": 4.4265055656433105, | |
| "learning_rate": 9.757848568790398e-06, | |
| "loss": 0.1619, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "grad_norm": 3.670001983642578, | |
| "learning_rate": 9.746306555863344e-06, | |
| "loss": 0.1562, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "eval_loss": 0.2656751573085785, | |
| "eval_runtime": 654.0062, | |
| "eval_samples_per_second": 10.324, | |
| "eval_steps_per_second": 0.323, | |
| "eval_wer": 31.9693754090882, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 4.299009799957275, | |
| "learning_rate": 9.73476454293629e-06, | |
| "loss": 0.1547, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "grad_norm": 4.178277492523193, | |
| "learning_rate": 9.723222530009234e-06, | |
| "loss": 0.1523, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "grad_norm": 3.7556698322296143, | |
| "learning_rate": 9.71168051708218e-06, | |
| "loss": 0.1205, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "grad_norm": 4.128946304321289, | |
| "learning_rate": 9.700138504155126e-06, | |
| "loss": 0.1177, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "eval_loss": 0.25489723682403564, | |
| "eval_runtime": 667.3883, | |
| "eval_samples_per_second": 10.117, | |
| "eval_steps_per_second": 0.316, | |
| "eval_wer": 30.705912688180575, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "grad_norm": 3.462374448776245, | |
| "learning_rate": 9.68859649122807e-06, | |
| "loss": 0.1173, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "grad_norm": 4.692279815673828, | |
| "learning_rate": 9.677054478301016e-06, | |
| "loss": 0.117, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "grad_norm": 3.5876481533050537, | |
| "learning_rate": 9.665512465373962e-06, | |
| "loss": 0.1147, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "grad_norm": 4.460909843444824, | |
| "learning_rate": 9.653970452446908e-06, | |
| "loss": 0.1149, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "eval_loss": 0.24221286177635193, | |
| "eval_runtime": 671.3545, | |
| "eval_samples_per_second": 10.057, | |
| "eval_steps_per_second": 0.314, | |
| "eval_wer": 29.97004978479481, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "grad_norm": 3.741210460662842, | |
| "learning_rate": 9.642428439519853e-06, | |
| "loss": 0.1124, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "grad_norm": 4.417487144470215, | |
| "learning_rate": 9.630886426592799e-06, | |
| "loss": 0.1124, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "grad_norm": 3.8332269191741943, | |
| "learning_rate": 9.619344413665745e-06, | |
| "loss": 0.1028, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "grad_norm": 3.3890438079833984, | |
| "learning_rate": 9.607802400738689e-06, | |
| "loss": 0.0834, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "eval_loss": 0.23551978170871735, | |
| "eval_runtime": 665.3587, | |
| "eval_samples_per_second": 10.148, | |
| "eval_steps_per_second": 0.317, | |
| "eval_wer": 29.347243985163736, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "grad_norm": 3.3677661418914795, | |
| "learning_rate": 9.596260387811635e-06, | |
| "loss": 0.0835, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "grad_norm": 3.392284631729126, | |
| "learning_rate": 9.584718374884581e-06, | |
| "loss": 0.0835, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "grad_norm": 3.0857667922973633, | |
| "learning_rate": 9.573176361957525e-06, | |
| "loss": 0.0813, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "grad_norm": 3.726276159286499, | |
| "learning_rate": 9.561634349030471e-06, | |
| "loss": 0.0825, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "eval_loss": 0.229040265083313, | |
| "eval_runtime": 671.1646, | |
| "eval_samples_per_second": 10.06, | |
| "eval_steps_per_second": 0.314, | |
| "eval_wer": 28.81567725172065, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "grad_norm": 3.28595232963562, | |
| "learning_rate": 9.550092336103417e-06, | |
| "loss": 0.0816, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "grad_norm": 3.427420139312744, | |
| "learning_rate": 9.538550323176363e-06, | |
| "loss": 0.0814, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "grad_norm": 3.8041698932647705, | |
| "learning_rate": 9.527008310249308e-06, | |
| "loss": 0.0802, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "grad_norm": 2.9603447914123535, | |
| "learning_rate": 9.515466297322253e-06, | |
| "loss": 0.0669, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "eval_loss": 0.22645771503448486, | |
| "eval_runtime": 667.9924, | |
| "eval_samples_per_second": 10.108, | |
| "eval_steps_per_second": 0.316, | |
| "eval_wer": 28.53402622131424, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 5.18, | |
| "grad_norm": 3.0245766639709473, | |
| "learning_rate": 9.5039242843952e-06, | |
| "loss": 0.057, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 5.3, | |
| "grad_norm": 2.435096502304077, | |
| "learning_rate": 9.492382271468144e-06, | |
| "loss": 0.0588, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 5.41, | |
| "grad_norm": 2.9605906009674072, | |
| "learning_rate": 9.480840258541091e-06, | |
| "loss": 0.0583, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 5.53, | |
| "grad_norm": 2.877732515335083, | |
| "learning_rate": 9.469298245614036e-06, | |
| "loss": 0.0567, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 5.53, | |
| "eval_loss": 0.2239210605621338, | |
| "eval_runtime": 671.9438, | |
| "eval_samples_per_second": 10.048, | |
| "eval_steps_per_second": 0.314, | |
| "eval_wer": 27.875518178392213, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "grad_norm": 3.2738921642303467, | |
| "learning_rate": 9.457848568790397e-06, | |
| "loss": 0.0589, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "grad_norm": 2.727008104324341, | |
| "learning_rate": 9.446306555863343e-06, | |
| "loss": 0.0586, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 5.87, | |
| "grad_norm": 3.6808159351348877, | |
| "learning_rate": 9.43476454293629e-06, | |
| "loss": 0.0602, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "grad_norm": 3.2749545574188232, | |
| "learning_rate": 9.423222530009234e-06, | |
| "loss": 0.0589, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "eval_loss": 0.21996423602104187, | |
| "eval_runtime": 674.0776, | |
| "eval_samples_per_second": 10.017, | |
| "eval_steps_per_second": 0.313, | |
| "eval_wer": 27.879485094313427, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 6.1, | |
| "grad_norm": 2.8687331676483154, | |
| "learning_rate": 9.41168051708218e-06, | |
| "loss": 0.0406, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "grad_norm": 2.4594838619232178, | |
| "learning_rate": 9.400138504155126e-06, | |
| "loss": 0.0405, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "grad_norm": 2.6956489086151123, | |
| "learning_rate": 9.388596491228072e-06, | |
| "loss": 0.0398, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "grad_norm": 2.6623504161834717, | |
| "learning_rate": 9.377054478301016e-06, | |
| "loss": 0.0415, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "eval_loss": 0.22312334179878235, | |
| "eval_runtime": 673.796, | |
| "eval_samples_per_second": 10.021, | |
| "eval_steps_per_second": 0.313, | |
| "eval_wer": 26.97701172223655, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "grad_norm": 3.0151000022888184, | |
| "learning_rate": 9.365512465373962e-06, | |
| "loss": 0.0422, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "grad_norm": 3.232048749923706, | |
| "learning_rate": 9.353970452446908e-06, | |
| "loss": 0.0412, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 6.79, | |
| "grad_norm": 2.809514284133911, | |
| "learning_rate": 9.342428439519852e-06, | |
| "loss": 0.0411, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "grad_norm": 2.0289547443389893, | |
| "learning_rate": 9.3308864265928e-06, | |
| "loss": 0.0423, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "eval_loss": 0.2184455841779709, | |
| "eval_runtime": 688.9862, | |
| "eval_samples_per_second": 9.8, | |
| "eval_steps_per_second": 0.306, | |
| "eval_wer": 27.084118452109408, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "grad_norm": 3.098123788833618, | |
| "learning_rate": 9.319344413665744e-06, | |
| "loss": 0.0375, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "grad_norm": 2.8762073516845703, | |
| "learning_rate": 9.307802400738688e-06, | |
| "loss": 0.0266, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "grad_norm": 4.211934566497803, | |
| "learning_rate": 9.296260387811636e-06, | |
| "loss": 0.0278, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "grad_norm": 2.860619306564331, | |
| "learning_rate": 9.28471837488458e-06, | |
| "loss": 0.0281, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "eval_loss": 0.22083307802677155, | |
| "eval_runtime": 665.4984, | |
| "eval_samples_per_second": 10.146, | |
| "eval_steps_per_second": 0.317, | |
| "eval_wer": 27.224943967312615, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 7.49, | |
| "grad_norm": 2.3288867473602295, | |
| "learning_rate": 9.273176361957526e-06, | |
| "loss": 0.0278, | |
| "step": 16250 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "grad_norm": 2.9130966663360596, | |
| "learning_rate": 9.261634349030472e-06, | |
| "loss": 0.0281, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 7.72, | |
| "grad_norm": 1.9348437786102295, | |
| "learning_rate": 9.250092336103417e-06, | |
| "loss": 0.0294, | |
| "step": 16750 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "grad_norm": 3.404127836227417, | |
| "learning_rate": 9.238550323176363e-06, | |
| "loss": 0.0296, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "eval_loss": 0.2222394496202469, | |
| "eval_runtime": 668.3229, | |
| "eval_samples_per_second": 10.103, | |
| "eval_steps_per_second": 0.316, | |
| "eval_wer": 26.802467421702996, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 7.95, | |
| "grad_norm": 2.1417627334594727, | |
| "learning_rate": 9.227008310249309e-06, | |
| "loss": 0.0287, | |
| "step": 17250 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "grad_norm": 1.783292531967163, | |
| "learning_rate": 9.215512465373963e-06, | |
| "loss": 0.023, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 8.18, | |
| "grad_norm": 1.875301480293274, | |
| "learning_rate": 9.203970452446908e-06, | |
| "loss": 0.0185, | |
| "step": 17750 | |
| }, | |
| { | |
| "epoch": 8.29, | |
| "grad_norm": 1.9140523672103882, | |
| "learning_rate": 9.192428439519852e-06, | |
| "loss": 0.0186, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 8.29, | |
| "eval_loss": 0.2229994833469391, | |
| "eval_runtime": 670.5805, | |
| "eval_samples_per_second": 10.069, | |
| "eval_steps_per_second": 0.315, | |
| "eval_wer": 26.498998353729892, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 8.41, | |
| "grad_norm": 2.4175968170166016, | |
| "learning_rate": 9.1808864265928e-06, | |
| "loss": 0.0192, | |
| "step": 18250 | |
| }, | |
| { | |
| "epoch": 8.52, | |
| "grad_norm": 2.8320376873016357, | |
| "learning_rate": 9.169344413665744e-06, | |
| "loss": 0.0198, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 8.64, | |
| "grad_norm": 2.431974172592163, | |
| "learning_rate": 9.15780240073869e-06, | |
| "loss": 0.0196, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "grad_norm": 2.0679523944854736, | |
| "learning_rate": 9.146260387811636e-06, | |
| "loss": 0.0201, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "eval_loss": 0.22365085780620575, | |
| "eval_runtime": 657.293, | |
| "eval_samples_per_second": 10.272, | |
| "eval_steps_per_second": 0.321, | |
| "eval_wer": 26.042803022789933, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 8.87, | |
| "grad_norm": 2.1873040199279785, | |
| "learning_rate": 9.13471837488458e-06, | |
| "loss": 0.0202, | |
| "step": 19250 | |
| }, | |
| { | |
| "epoch": 8.98, | |
| "grad_norm": 2.5239417552948, | |
| "learning_rate": 9.123222530009235e-06, | |
| "loss": 0.0201, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 9.1, | |
| "grad_norm": 2.638354778289795, | |
| "learning_rate": 9.111680517082179e-06, | |
| "loss": 0.0134, | |
| "step": 19750 | |
| }, | |
| { | |
| "epoch": 9.21, | |
| "grad_norm": 1.6381027698516846, | |
| "learning_rate": 9.100138504155125e-06, | |
| "loss": 0.0127, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 9.21, | |
| "eval_loss": 0.22688329219818115, | |
| "eval_runtime": 665.6628, | |
| "eval_samples_per_second": 10.143, | |
| "eval_steps_per_second": 0.317, | |
| "eval_wer": 26.134042088977928, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 9.33, | |
| "grad_norm": 1.1191093921661377, | |
| "learning_rate": 9.088596491228071e-06, | |
| "loss": 0.0133, | |
| "step": 20250 | |
| }, | |
| { | |
| "epoch": 9.44, | |
| "grad_norm": 2.025820016860962, | |
| "learning_rate": 9.077054478301015e-06, | |
| "loss": 0.0133, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 9.56, | |
| "grad_norm": 1.148484468460083, | |
| "learning_rate": 9.065512465373963e-06, | |
| "loss": 0.0136, | |
| "step": 20750 | |
| }, | |
| { | |
| "epoch": 9.67, | |
| "grad_norm": 2.849606513977051, | |
| "learning_rate": 9.053970452446907e-06, | |
| "loss": 0.0135, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 9.67, | |
| "eval_loss": 0.22943329811096191, | |
| "eval_runtime": 665.6291, | |
| "eval_samples_per_second": 10.144, | |
| "eval_steps_per_second": 0.317, | |
| "eval_wer": 26.330404427078168, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 9.79, | |
| "grad_norm": 2.421963691711426, | |
| "learning_rate": 9.042428439519853e-06, | |
| "loss": 0.014, | |
| "step": 21250 | |
| }, | |
| { | |
| "epoch": 9.9, | |
| "grad_norm": 1.5668810606002808, | |
| "learning_rate": 9.0308864265928e-06, | |
| "loss": 0.0148, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 10.02, | |
| "grad_norm": 0.8170527815818787, | |
| "learning_rate": 9.019390581717452e-06, | |
| "loss": 0.0135, | |
| "step": 21750 | |
| }, | |
| { | |
| "epoch": 10.13, | |
| "grad_norm": 1.8164241313934326, | |
| "learning_rate": 9.007848568790398e-06, | |
| "loss": 0.0086, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 10.13, | |
| "eval_loss": 0.23072278499603271, | |
| "eval_runtime": 665.1317, | |
| "eval_samples_per_second": 10.151, | |
| "eval_steps_per_second": 0.317, | |
| "eval_wer": 26.07057143423845, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 10.25, | |
| "grad_norm": 1.6295300722122192, | |
| "learning_rate": 8.996306555863344e-06, | |
| "loss": 0.0091, | |
| "step": 22250 | |
| }, | |
| { | |
| "epoch": 10.36, | |
| "grad_norm": 1.5732313394546509, | |
| "learning_rate": 8.984764542936288e-06, | |
| "loss": 0.0093, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 10.48, | |
| "grad_norm": 1.6755157709121704, | |
| "learning_rate": 8.973222530009234e-06, | |
| "loss": 0.0095, | |
| "step": 22750 | |
| }, | |
| { | |
| "epoch": 10.59, | |
| "grad_norm": 1.9076685905456543, | |
| "learning_rate": 8.96168051708218e-06, | |
| "loss": 0.0097, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 10.59, | |
| "eval_loss": 0.2347114235162735, | |
| "eval_runtime": 674.6572, | |
| "eval_samples_per_second": 10.008, | |
| "eval_steps_per_second": 0.313, | |
| "eval_wer": 25.425947597040683, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 10.71, | |
| "grad_norm": 1.4558827877044678, | |
| "learning_rate": 8.950138504155126e-06, | |
| "loss": 0.0097, | |
| "step": 23250 | |
| }, | |
| { | |
| "epoch": 10.82, | |
| "grad_norm": 2.6334474086761475, | |
| "learning_rate": 8.93859649122807e-06, | |
| "loss": 0.0103, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 10.94, | |
| "grad_norm": 1.4682759046554565, | |
| "learning_rate": 8.927100646352724e-06, | |
| "loss": 0.0104, | |
| "step": 23750 | |
| }, | |
| { | |
| "epoch": 11.05, | |
| "grad_norm": 1.105055332183838, | |
| "learning_rate": 8.915558633425671e-06, | |
| "loss": 0.0082, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 11.05, | |
| "eval_loss": 0.23529361188411713, | |
| "eval_runtime": 677.9021, | |
| "eval_samples_per_second": 9.96, | |
| "eval_steps_per_second": 0.311, | |
| "eval_wer": 25.18594918380705, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 11.17, | |
| "grad_norm": 0.9997087717056274, | |
| "learning_rate": 8.904016620498616e-06, | |
| "loss": 0.0062, | |
| "step": 24250 | |
| }, | |
| { | |
| "epoch": 11.29, | |
| "grad_norm": 1.274902582168579, | |
| "learning_rate": 8.89247460757156e-06, | |
| "loss": 0.0067, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 11.4, | |
| "grad_norm": 1.1603277921676636, | |
| "learning_rate": 8.880932594644508e-06, | |
| "loss": 0.0068, | |
| "step": 24750 | |
| }, | |
| { | |
| "epoch": 11.52, | |
| "grad_norm": 1.450378179550171, | |
| "learning_rate": 8.869390581717452e-06, | |
| "loss": 0.0069, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 11.52, | |
| "eval_loss": 0.23996803164482117, | |
| "eval_runtime": 672.8137, | |
| "eval_samples_per_second": 10.035, | |
| "eval_steps_per_second": 0.314, | |
| "eval_wer": 26.251066108653827, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 11.63, | |
| "grad_norm": 1.0134578943252563, | |
| "learning_rate": 8.857848568790398e-06, | |
| "loss": 0.0072, | |
| "step": 25250 | |
| }, | |
| { | |
| "epoch": 11.75, | |
| "grad_norm": 1.8789595365524292, | |
| "learning_rate": 8.846306555863344e-06, | |
| "loss": 0.0072, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 11.86, | |
| "grad_norm": 0.972827136516571, | |
| "learning_rate": 8.834764542936288e-06, | |
| "loss": 0.0076, | |
| "step": 25750 | |
| }, | |
| { | |
| "epoch": 11.98, | |
| "grad_norm": 2.2902746200561523, | |
| "learning_rate": 8.823268698060943e-06, | |
| "loss": 0.0075, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 11.98, | |
| "eval_loss": 0.23599743843078613, | |
| "eval_runtime": 667.2943, | |
| "eval_samples_per_second": 10.118, | |
| "eval_steps_per_second": 0.316, | |
| "eval_wer": 25.400162643552772, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 12.09, | |
| "grad_norm": 1.1216572523117065, | |
| "learning_rate": 8.811726685133887e-06, | |
| "loss": 0.0054, | |
| "step": 26250 | |
| }, | |
| { | |
| "epoch": 12.21, | |
| "grad_norm": 1.168253779411316, | |
| "learning_rate": 8.800184672206835e-06, | |
| "loss": 0.0048, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 12.32, | |
| "grad_norm": 0.6528610587120056, | |
| "learning_rate": 8.788642659279779e-06, | |
| "loss": 0.0049, | |
| "step": 26750 | |
| }, | |
| { | |
| "epoch": 12.44, | |
| "grad_norm": 1.3029311895370483, | |
| "learning_rate": 8.777100646352723e-06, | |
| "loss": 0.0052, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 12.44, | |
| "eval_loss": 0.24270391464233398, | |
| "eval_runtime": 668.33, | |
| "eval_samples_per_second": 10.103, | |
| "eval_steps_per_second": 0.316, | |
| "eval_wer": 24.73967114267013, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 12.55, | |
| "grad_norm": 1.0006558895111084, | |
| "learning_rate": 8.765558633425671e-06, | |
| "loss": 0.0053, | |
| "step": 27250 | |
| }, | |
| { | |
| "epoch": 12.67, | |
| "grad_norm": 1.4560002088546753, | |
| "learning_rate": 8.754016620498615e-06, | |
| "loss": 0.0052, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 12.78, | |
| "grad_norm": 1.9307841062545776, | |
| "learning_rate": 8.742474607571561e-06, | |
| "loss": 0.0053, | |
| "step": 27750 | |
| }, | |
| { | |
| "epoch": 12.9, | |
| "grad_norm": 1.977569580078125, | |
| "learning_rate": 8.730932594644507e-06, | |
| "loss": 0.0056, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 12.9, | |
| "eval_loss": 0.24133123457431793, | |
| "eval_runtime": 682.6727, | |
| "eval_samples_per_second": 9.891, | |
| "eval_steps_per_second": 0.309, | |
| "eval_wer": 25.247436380585913, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 13.01, | |
| "grad_norm": 0.9914600849151611, | |
| "learning_rate": 8.719390581717452e-06, | |
| "loss": 0.0056, | |
| "step": 28250 | |
| }, | |
| { | |
| "epoch": 13.13, | |
| "grad_norm": 0.8464221358299255, | |
| "learning_rate": 8.707848568790398e-06, | |
| "loss": 0.0037, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 13.24, | |
| "grad_norm": 0.6425495147705078, | |
| "learning_rate": 8.69635272391505e-06, | |
| "loss": 0.0038, | |
| "step": 28750 | |
| }, | |
| { | |
| "epoch": 13.36, | |
| "grad_norm": 2.0510671138763428, | |
| "learning_rate": 8.684810710987998e-06, | |
| "loss": 0.0041, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 13.36, | |
| "eval_loss": 0.24362993240356445, | |
| "eval_runtime": 666.1767, | |
| "eval_samples_per_second": 10.135, | |
| "eval_steps_per_second": 0.317, | |
| "eval_wer": 25.074875538012968, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 13.47, | |
| "grad_norm": 1.125919222831726, | |
| "learning_rate": 8.673268698060943e-06, | |
| "loss": 0.0041, | |
| "step": 29250 | |
| }, | |
| { | |
| "epoch": 13.59, | |
| "grad_norm": 1.247779369354248, | |
| "learning_rate": 8.661726685133889e-06, | |
| "loss": 0.0044, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 13.7, | |
| "grad_norm": 1.009018063545227, | |
| "learning_rate": 8.650184672206835e-06, | |
| "loss": 0.0046, | |
| "step": 29750 | |
| }, | |
| { | |
| "epoch": 13.82, | |
| "grad_norm": 1.848225712776184, | |
| "learning_rate": 8.638642659279779e-06, | |
| "loss": 0.0048, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 13.82, | |
| "eval_loss": 0.24545399844646454, | |
| "eval_runtime": 667.6447, | |
| "eval_samples_per_second": 10.113, | |
| "eval_steps_per_second": 0.316, | |
| "eval_wer": 25.29900628756174, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 13.93, | |
| "grad_norm": 0.4666302502155304, | |
| "learning_rate": 8.627100646352725e-06, | |
| "loss": 0.0046, | |
| "step": 30250 | |
| }, | |
| { | |
| "epoch": 14.05, | |
| "grad_norm": 0.6961706280708313, | |
| "learning_rate": 8.61555863342567e-06, | |
| "loss": 0.004, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 14.16, | |
| "grad_norm": 0.9278767108917236, | |
| "learning_rate": 8.604016620498615e-06, | |
| "loss": 0.003, | |
| "step": 30750 | |
| }, | |
| { | |
| "epoch": 14.28, | |
| "grad_norm": 0.9175160527229309, | |
| "learning_rate": 8.59252077562327e-06, | |
| "loss": 0.0033, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 14.28, | |
| "eval_loss": 0.247360959649086, | |
| "eval_runtime": 674.4213, | |
| "eval_samples_per_second": 10.012, | |
| "eval_steps_per_second": 0.313, | |
| "eval_wer": 25.13041236091001, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 14.39, | |
| "grad_norm": 0.6542864441871643, | |
| "learning_rate": 8.580978762696216e-06, | |
| "loss": 0.0034, | |
| "step": 31250 | |
| }, | |
| { | |
| "epoch": 14.51, | |
| "grad_norm": 14.155986785888672, | |
| "learning_rate": 8.56943674976916e-06, | |
| "loss": 0.0037, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 14.62, | |
| "grad_norm": 0.7710313200950623, | |
| "learning_rate": 8.557940904893815e-06, | |
| "loss": 0.0039, | |
| "step": 31750 | |
| }, | |
| { | |
| "epoch": 14.74, | |
| "grad_norm": 1.6279186010360718, | |
| "learning_rate": 8.546398891966759e-06, | |
| "loss": 0.0038, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 14.74, | |
| "eval_loss": 0.25091758370399475, | |
| "eval_runtime": 686.3757, | |
| "eval_samples_per_second": 9.837, | |
| "eval_steps_per_second": 0.307, | |
| "eval_wer": 25.287105539798084, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 14.85, | |
| "grad_norm": 0.883815586566925, | |
| "learning_rate": 8.534856879039707e-06, | |
| "loss": 0.0038, | |
| "step": 32250 | |
| }, | |
| { | |
| "epoch": 14.97, | |
| "grad_norm": 0.6397805213928223, | |
| "learning_rate": 8.523314866112651e-06, | |
| "loss": 0.0037, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 15.09, | |
| "grad_norm": 0.40614956617355347, | |
| "learning_rate": 8.511772853185595e-06, | |
| "loss": 0.003, | |
| "step": 32750 | |
| }, | |
| { | |
| "epoch": 15.2, | |
| "grad_norm": 0.3507106602191925, | |
| "learning_rate": 8.500230840258543e-06, | |
| "loss": 0.0026, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 15.2, | |
| "eval_loss": 0.2525634467601776, | |
| "eval_runtime": 668.1892, | |
| "eval_samples_per_second": 10.105, | |
| "eval_steps_per_second": 0.316, | |
| "eval_wer": 25.26925441815261, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 15.32, | |
| "grad_norm": 0.236125648021698, | |
| "learning_rate": 8.488688827331487e-06, | |
| "loss": 0.0028, | |
| "step": 33250 | |
| }, | |
| { | |
| "epoch": 15.43, | |
| "grad_norm": 1.2400788068771362, | |
| "learning_rate": 8.477146814404433e-06, | |
| "loss": 0.0028, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 15.55, | |
| "grad_norm": 1.4743680953979492, | |
| "learning_rate": 8.465604801477379e-06, | |
| "loss": 0.0032, | |
| "step": 33750 | |
| }, | |
| { | |
| "epoch": 15.66, | |
| "grad_norm": 1.9355671405792236, | |
| "learning_rate": 8.454062788550323e-06, | |
| "loss": 0.0035, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 15.66, | |
| "eval_loss": 0.2545054256916046, | |
| "eval_runtime": 681.2267, | |
| "eval_samples_per_second": 9.912, | |
| "eval_steps_per_second": 0.31, | |
| "eval_wer": 25.43784834480433, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 15.78, | |
| "grad_norm": 0.8273574709892273, | |
| "learning_rate": 8.442566943674978e-06, | |
| "loss": 0.0033, | |
| "step": 34250 | |
| }, | |
| { | |
| "epoch": 15.89, | |
| "grad_norm": 1.760032057762146, | |
| "learning_rate": 8.431024930747922e-06, | |
| "loss": 0.0035, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 16.01, | |
| "grad_norm": 1.946081280708313, | |
| "learning_rate": 8.419482917820868e-06, | |
| "loss": 0.0033, | |
| "step": 34750 | |
| }, | |
| { | |
| "epoch": 16.12, | |
| "grad_norm": 0.564914345741272, | |
| "learning_rate": 8.407940904893814e-06, | |
| "loss": 0.0024, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 16.12, | |
| "eval_loss": 0.2501762807369232, | |
| "eval_runtime": 670.7755, | |
| "eval_samples_per_second": 10.066, | |
| "eval_steps_per_second": 0.315, | |
| "eval_wer": 24.212071325148262, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 16.24, | |
| "grad_norm": 1.6005988121032715, | |
| "learning_rate": 8.396398891966759e-06, | |
| "loss": 0.0024, | |
| "step": 35250 | |
| }, | |
| { | |
| "epoch": 16.35, | |
| "grad_norm": 0.8374671339988708, | |
| "learning_rate": 8.384856879039706e-06, | |
| "loss": 0.0028, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 16.47, | |
| "grad_norm": 1.0652960538864136, | |
| "learning_rate": 8.37331486611265e-06, | |
| "loss": 0.003, | |
| "step": 35750 | |
| }, | |
| { | |
| "epoch": 16.58, | |
| "grad_norm": 1.4033843278884888, | |
| "learning_rate": 8.361772853185595e-06, | |
| "loss": 0.0027, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 16.58, | |
| "eval_loss": 0.2562379240989685, | |
| "eval_runtime": 678.952, | |
| "eval_samples_per_second": 9.945, | |
| "eval_steps_per_second": 0.311, | |
| "eval_wer": 24.701985441418568, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 16.7, | |
| "grad_norm": 0.6179186701774597, | |
| "learning_rate": 8.350230840258543e-06, | |
| "loss": 0.0029, | |
| "step": 36250 | |
| }, | |
| { | |
| "epoch": 16.81, | |
| "grad_norm": 1.806768774986267, | |
| "learning_rate": 8.338688827331487e-06, | |
| "loss": 0.0031, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 16.93, | |
| "grad_norm": 0.528777003288269, | |
| "learning_rate": 8.327146814404433e-06, | |
| "loss": 0.0031, | |
| "step": 36750 | |
| }, | |
| { | |
| "epoch": 17.04, | |
| "grad_norm": 0.18216899037361145, | |
| "learning_rate": 8.315604801477379e-06, | |
| "loss": 0.0029, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 17.04, | |
| "eval_loss": 0.25483274459838867, | |
| "eval_runtime": 673.8208, | |
| "eval_samples_per_second": 10.02, | |
| "eval_steps_per_second": 0.313, | |
| "eval_wer": 24.735704226748915, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 17.16, | |
| "grad_norm": 0.3349086046218872, | |
| "learning_rate": 8.304062788550323e-06, | |
| "loss": 0.0019, | |
| "step": 37250 | |
| }, | |
| { | |
| "epoch": 17.27, | |
| "grad_norm": 1.109415888786316, | |
| "learning_rate": 8.29252077562327e-06, | |
| "loss": 0.0023, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 17.39, | |
| "grad_norm": 0.5039780735969543, | |
| "learning_rate": 8.280978762696215e-06, | |
| "loss": 0.0024, | |
| "step": 37750 | |
| }, | |
| { | |
| "epoch": 17.5, | |
| "grad_norm": 1.2728774547576904, | |
| "learning_rate": 8.269436749769161e-06, | |
| "loss": 0.0026, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 17.5, | |
| "eval_loss": 0.25514695048332214, | |
| "eval_runtime": 676.5647, | |
| "eval_samples_per_second": 9.98, | |
| "eval_steps_per_second": 0.312, | |
| "eval_wer": 24.102981137314792, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 17.62, | |
| "grad_norm": 0.5640347003936768, | |
| "learning_rate": 8.257894736842105e-06, | |
| "loss": 0.0027, | |
| "step": 38250 | |
| }, | |
| { | |
| "epoch": 17.73, | |
| "grad_norm": 0.6144846677780151, | |
| "learning_rate": 8.24639889196676e-06, | |
| "loss": 0.0027, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 17.85, | |
| "grad_norm": 0.42385855317115784, | |
| "learning_rate": 8.234856879039706e-06, | |
| "loss": 0.0027, | |
| "step": 38750 | |
| }, | |
| { | |
| "epoch": 17.96, | |
| "grad_norm": 0.5083030462265015, | |
| "learning_rate": 8.22331486611265e-06, | |
| "loss": 0.0026, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 17.96, | |
| "eval_loss": 0.2563398778438568, | |
| "eval_runtime": 693.9966, | |
| "eval_samples_per_second": 9.729, | |
| "eval_steps_per_second": 0.304, | |
| "eval_wer": 24.52942459884563, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 18.08, | |
| "grad_norm": 0.7999371290206909, | |
| "learning_rate": 8.211772853185596e-06, | |
| "loss": 0.0022, | |
| "step": 39250 | |
| }, | |
| { | |
| "epoch": 18.19, | |
| "grad_norm": 0.47253143787384033, | |
| "learning_rate": 8.200230840258542e-06, | |
| "loss": 0.0019, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 18.31, | |
| "grad_norm": 0.9011787176132202, | |
| "learning_rate": 8.188688827331487e-06, | |
| "loss": 0.0019, | |
| "step": 39750 | |
| }, | |
| { | |
| "epoch": 18.42, | |
| "grad_norm": 1.1234568357467651, | |
| "learning_rate": 8.177146814404433e-06, | |
| "loss": 0.002, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 18.42, | |
| "eval_loss": 0.2568005323410034, | |
| "eval_runtime": 670.3005, | |
| "eval_samples_per_second": 10.073, | |
| "eval_steps_per_second": 0.315, | |
| "eval_wer": 24.36479758811512, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 18.54, | |
| "grad_norm": 0.7889108657836914, | |
| "learning_rate": 8.165604801477379e-06, | |
| "loss": 0.002, | |
| "step": 40250 | |
| }, | |
| { | |
| "epoch": 18.65, | |
| "grad_norm": 1.1920995712280273, | |
| "learning_rate": 8.154062788550325e-06, | |
| "loss": 0.0021, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 18.77, | |
| "grad_norm": 0.8429755568504333, | |
| "learning_rate": 8.142520775623269e-06, | |
| "loss": 0.0022, | |
| "step": 40750 | |
| }, | |
| { | |
| "epoch": 18.89, | |
| "grad_norm": 3.1110446453094482, | |
| "learning_rate": 8.131024930747923e-06, | |
| "loss": 0.0028, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 18.89, | |
| "eval_loss": 0.2559947073459625, | |
| "eval_runtime": 691.8949, | |
| "eval_samples_per_second": 9.759, | |
| "eval_steps_per_second": 0.305, | |
| "eval_wer": 24.575044131939624, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "grad_norm": 0.3139660656452179, | |
| "learning_rate": 8.11948291782087e-06, | |
| "loss": 0.0026, | |
| "step": 41250 | |
| }, | |
| { | |
| "epoch": 19.12, | |
| "grad_norm": 0.21102827787399292, | |
| "learning_rate": 8.107940904893814e-06, | |
| "loss": 0.0017, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 19.23, | |
| "grad_norm": 0.2366773635149002, | |
| "learning_rate": 8.09639889196676e-06, | |
| "loss": 0.0015, | |
| "step": 41750 | |
| }, | |
| { | |
| "epoch": 19.35, | |
| "grad_norm": 0.5561370253562927, | |
| "learning_rate": 8.084903047091414e-06, | |
| "loss": 0.0019, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 19.35, | |
| "eval_loss": 0.26248979568481445, | |
| "eval_runtime": 678.0519, | |
| "eval_samples_per_second": 9.958, | |
| "eval_steps_per_second": 0.311, | |
| "eval_wer": 24.596862169506316, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 19.46, | |
| "grad_norm": 0.6396375894546509, | |
| "learning_rate": 8.073361034164359e-06, | |
| "loss": 0.0019, | |
| "step": 42250 | |
| }, | |
| { | |
| "epoch": 19.58, | |
| "grad_norm": 0.4240398406982422, | |
| "learning_rate": 8.061819021237305e-06, | |
| "loss": 0.0021, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 19.69, | |
| "grad_norm": 2.101404905319214, | |
| "learning_rate": 8.05027700831025e-06, | |
| "loss": 0.002, | |
| "step": 42750 | |
| }, | |
| { | |
| "epoch": 19.81, | |
| "grad_norm": 0.4409444034099579, | |
| "learning_rate": 8.038734995383195e-06, | |
| "loss": 0.0021, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 19.81, | |
| "eval_loss": 0.26329436898231506, | |
| "eval_runtime": 674.3068, | |
| "eval_samples_per_second": 10.013, | |
| "eval_steps_per_second": 0.313, | |
| "eval_wer": 24.102981137314792, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 19.92, | |
| "grad_norm": 1.5778237581253052, | |
| "learning_rate": 8.027192982456141e-06, | |
| "loss": 0.0021, | |
| "step": 43250 | |
| }, | |
| { | |
| "epoch": 20.04, | |
| "grad_norm": 0.34445250034332275, | |
| "learning_rate": 8.015650969529087e-06, | |
| "loss": 0.002, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 20.15, | |
| "grad_norm": 0.8328190445899963, | |
| "learning_rate": 8.004108956602033e-06, | |
| "loss": 0.0015, | |
| "step": 43750 | |
| }, | |
| { | |
| "epoch": 20.27, | |
| "grad_norm": 1.4873714447021484, | |
| "learning_rate": 7.992566943674977e-06, | |
| "loss": 0.0015, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 20.27, | |
| "eval_loss": 0.2650669813156128, | |
| "eval_runtime": 678.5481, | |
| "eval_samples_per_second": 9.951, | |
| "eval_steps_per_second": 0.311, | |
| "eval_wer": 24.23983973659678, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 20.38, | |
| "grad_norm": 2.7649030685424805, | |
| "learning_rate": 7.981024930747923e-06, | |
| "loss": 0.0016, | |
| "step": 44250 | |
| }, | |
| { | |
| "epoch": 20.5, | |
| "grad_norm": 0.20013980567455292, | |
| "learning_rate": 7.969529085872578e-06, | |
| "loss": 0.0019, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 20.61, | |
| "grad_norm": 1.8381603956222534, | |
| "learning_rate": 7.957987072945522e-06, | |
| "loss": 0.0021, | |
| "step": 44750 | |
| }, | |
| { | |
| "epoch": 20.73, | |
| "grad_norm": 1.9389904737472534, | |
| "learning_rate": 7.946445060018468e-06, | |
| "loss": 0.0018, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 20.73, | |
| "eval_loss": 0.26352566480636597, | |
| "eval_runtime": 684.4278, | |
| "eval_samples_per_second": 9.865, | |
| "eval_steps_per_second": 0.308, | |
| "eval_wer": 24.100997679354187, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 20.84, | |
| "grad_norm": 0.1582639217376709, | |
| "learning_rate": 7.934903047091414e-06, | |
| "loss": 0.0022, | |
| "step": 45250 | |
| }, | |
| { | |
| "epoch": 20.96, | |
| "grad_norm": 1.1058118343353271, | |
| "learning_rate": 7.923361034164358e-06, | |
| "loss": 0.0021, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 21.07, | |
| "grad_norm": 0.4075948894023895, | |
| "learning_rate": 7.911819021237304e-06, | |
| "loss": 0.0018, | |
| "step": 45750 | |
| }, | |
| { | |
| "epoch": 21.19, | |
| "grad_norm": 0.894478440284729, | |
| "learning_rate": 7.90027700831025e-06, | |
| "loss": 0.0013, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 21.19, | |
| "eval_loss": 0.2585604190826416, | |
| "eval_runtime": 674.5545, | |
| "eval_samples_per_second": 10.01, | |
| "eval_steps_per_second": 0.313, | |
| "eval_wer": 23.849098518356904, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 21.3, | |
| "grad_norm": 0.8086264729499817, | |
| "learning_rate": 7.888734995383196e-06, | |
| "loss": 0.0016, | |
| "step": 46250 | |
| }, | |
| { | |
| "epoch": 21.42, | |
| "grad_norm": 0.5767725706100464, | |
| "learning_rate": 7.87719298245614e-06, | |
| "loss": 0.0016, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 21.53, | |
| "grad_norm": 1.622611403465271, | |
| "learning_rate": 7.865650969529087e-06, | |
| "loss": 0.0019, | |
| "step": 46750 | |
| }, | |
| { | |
| "epoch": 21.65, | |
| "grad_norm": 2.0076467990875244, | |
| "learning_rate": 7.854108956602033e-06, | |
| "loss": 0.0018, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 21.65, | |
| "eval_loss": 0.2612689435482025, | |
| "eval_runtime": 680.9382, | |
| "eval_samples_per_second": 9.916, | |
| "eval_steps_per_second": 0.31, | |
| "eval_wer": 23.940337584544896, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 21.76, | |
| "grad_norm": 0.3226953446865082, | |
| "learning_rate": 7.842566943674977e-06, | |
| "loss": 0.0018, | |
| "step": 47250 | |
| }, | |
| { | |
| "epoch": 21.88, | |
| "grad_norm": 0.5793449878692627, | |
| "learning_rate": 7.831024930747923e-06, | |
| "loss": 0.0019, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 21.99, | |
| "grad_norm": 0.3840419352054596, | |
| "learning_rate": 7.819482917820869e-06, | |
| "loss": 0.0019, | |
| "step": 47750 | |
| }, | |
| { | |
| "epoch": 22.11, | |
| "grad_norm": 2.221217155456543, | |
| "learning_rate": 7.807940904893813e-06, | |
| "loss": 0.0014, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 22.11, | |
| "eval_loss": 0.26184049248695374, | |
| "eval_runtime": 679.8585, | |
| "eval_samples_per_second": 9.931, | |
| "eval_steps_per_second": 0.31, | |
| "eval_wer": 23.591248983477794, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 22.22, | |
| "grad_norm": 0.1267741620540619, | |
| "learning_rate": 7.79639889196676e-06, | |
| "loss": 0.0013, | |
| "step": 48250 | |
| }, | |
| { | |
| "epoch": 22.34, | |
| "grad_norm": 0.3871385157108307, | |
| "learning_rate": 7.784856879039705e-06, | |
| "loss": 0.0016, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 22.46, | |
| "grad_norm": 0.319933146238327, | |
| "learning_rate": 7.773361034164358e-06, | |
| "loss": 0.0016, | |
| "step": 48750 | |
| }, | |
| { | |
| "epoch": 22.57, | |
| "grad_norm": 0.21517297625541687, | |
| "learning_rate": 7.761819021237304e-06, | |
| "loss": 0.0017, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 22.57, | |
| "eval_loss": 0.2654561698436737, | |
| "eval_runtime": 676.7053, | |
| "eval_samples_per_second": 9.978, | |
| "eval_steps_per_second": 0.312, | |
| "eval_wer": 23.755875994208303, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 22.69, | |
| "grad_norm": 0.17343254387378693, | |
| "learning_rate": 7.75027700831025e-06, | |
| "loss": 0.0017, | |
| "step": 49250 | |
| }, | |
| { | |
| "epoch": 22.8, | |
| "grad_norm": 0.31837859749794006, | |
| "learning_rate": 7.738734995383196e-06, | |
| "loss": 0.0015, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 22.92, | |
| "grad_norm": 1.0666159391403198, | |
| "learning_rate": 7.72719298245614e-06, | |
| "loss": 0.0015, | |
| "step": 49750 | |
| }, | |
| { | |
| "epoch": 23.03, | |
| "grad_norm": 0.5933089852333069, | |
| "learning_rate": 7.715650969529086e-06, | |
| "loss": 0.0016, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 23.03, | |
| "eval_loss": 0.2641240656375885, | |
| "eval_runtime": 701.7818, | |
| "eval_samples_per_second": 9.621, | |
| "eval_steps_per_second": 0.301, | |
| "eval_wer": 23.57141440387171, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 23.15, | |
| "grad_norm": 4.56223201751709, | |
| "learning_rate": 7.704108956602032e-06, | |
| "loss": 0.0012, | |
| "step": 50250 | |
| }, | |
| { | |
| "epoch": 23.26, | |
| "grad_norm": 0.13189882040023804, | |
| "learning_rate": 7.692566943674977e-06, | |
| "loss": 0.0013, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 23.38, | |
| "grad_norm": 0.3501899242401123, | |
| "learning_rate": 7.681024930747923e-06, | |
| "loss": 0.0014, | |
| "step": 50750 | |
| }, | |
| { | |
| "epoch": 23.49, | |
| "grad_norm": 0.3943934738636017, | |
| "learning_rate": 7.669529085872577e-06, | |
| "loss": 0.0014, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 23.49, | |
| "eval_loss": 0.26596611738204956, | |
| "eval_runtime": 667.3547, | |
| "eval_samples_per_second": 10.118, | |
| "eval_steps_per_second": 0.316, | |
| "eval_wer": 23.60910010512327, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 23.61, | |
| "grad_norm": 0.6058038473129272, | |
| "learning_rate": 7.657987072945522e-06, | |
| "loss": 0.0014, | |
| "step": 51250 | |
| }, | |
| { | |
| "epoch": 23.72, | |
| "grad_norm": 0.18997740745544434, | |
| "learning_rate": 7.646445060018468e-06, | |
| "loss": 0.0018, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 23.84, | |
| "grad_norm": 0.4755234122276306, | |
| "learning_rate": 7.634903047091414e-06, | |
| "loss": 0.0017, | |
| "step": 51750 | |
| }, | |
| { | |
| "epoch": 23.95, | |
| "grad_norm": 0.6140190362930298, | |
| "learning_rate": 7.62336103416436e-06, | |
| "loss": 0.0018, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 23.95, | |
| "eval_loss": 0.2636994421482086, | |
| "eval_runtime": 678.6943, | |
| "eval_samples_per_second": 9.949, | |
| "eval_steps_per_second": 0.311, | |
| "eval_wer": 23.870916555923596, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 24.07, | |
| "grad_norm": 0.6709560751914978, | |
| "learning_rate": 7.611819021237305e-06, | |
| "loss": 0.0013, | |
| "step": 52250 | |
| }, | |
| { | |
| "epoch": 24.18, | |
| "grad_norm": 0.40510040521621704, | |
| "learning_rate": 7.60027700831025e-06, | |
| "loss": 0.0011, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 24.3, | |
| "grad_norm": 0.464121550321579, | |
| "learning_rate": 7.588734995383196e-06, | |
| "loss": 0.0011, | |
| "step": 52750 | |
| }, | |
| { | |
| "epoch": 24.41, | |
| "grad_norm": 0.415995329618454, | |
| "learning_rate": 7.577192982456141e-06, | |
| "loss": 0.0012, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 24.41, | |
| "eval_loss": 0.2662787139415741, | |
| "eval_runtime": 679.0891, | |
| "eval_samples_per_second": 9.943, | |
| "eval_steps_per_second": 0.311, | |
| "eval_wer": 23.48017533768372, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 24.53, | |
| "grad_norm": 0.6342004537582397, | |
| "learning_rate": 7.565650969529087e-06, | |
| "loss": 0.0014, | |
| "step": 53250 | |
| }, | |
| { | |
| "epoch": 24.64, | |
| "grad_norm": 0.36562052369117737, | |
| "learning_rate": 7.554155124653741e-06, | |
| "loss": 0.0014, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 24.76, | |
| "grad_norm": 0.47582271695137024, | |
| "learning_rate": 7.542613111726685e-06, | |
| "loss": 0.0015, | |
| "step": 53750 | |
| }, | |
| { | |
| "epoch": 24.87, | |
| "grad_norm": 0.7419930696487427, | |
| "learning_rate": 7.531071098799632e-06, | |
| "loss": 0.0015, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 24.87, | |
| "eval_loss": 0.2703973352909088, | |
| "eval_runtime": 665.4932, | |
| "eval_samples_per_second": 10.146, | |
| "eval_steps_per_second": 0.317, | |
| "eval_wer": 23.75190907828709, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 24.99, | |
| "grad_norm": 0.7274812459945679, | |
| "learning_rate": 7.519529085872577e-06, | |
| "loss": 0.0016, | |
| "step": 54250 | |
| }, | |
| { | |
| "epoch": 25.1, | |
| "grad_norm": 1.2647254467010498, | |
| "learning_rate": 7.507987072945521e-06, | |
| "loss": 0.0012, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 25.22, | |
| "grad_norm": 0.27594470977783203, | |
| "learning_rate": 7.496445060018468e-06, | |
| "loss": 0.0011, | |
| "step": 54750 | |
| }, | |
| { | |
| "epoch": 25.33, | |
| "grad_norm": 0.14190466701984406, | |
| "learning_rate": 7.484903047091413e-06, | |
| "loss": 0.0012, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 25.33, | |
| "eval_loss": 0.2656785249710083, | |
| "eval_runtime": 668.1719, | |
| "eval_samples_per_second": 10.105, | |
| "eval_steps_per_second": 0.316, | |
| "eval_wer": 24.233889362714958, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 25.45, | |
| "grad_norm": 0.559374213218689, | |
| "learning_rate": 7.473361034164359e-06, | |
| "loss": 0.0013, | |
| "step": 55250 | |
| }, | |
| { | |
| "epoch": 25.56, | |
| "grad_norm": 3.8242385387420654, | |
| "learning_rate": 7.461865189289012e-06, | |
| "loss": 0.0012, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 25.68, | |
| "grad_norm": 0.23001307249069214, | |
| "learning_rate": 7.450323176361957e-06, | |
| "loss": 0.0014, | |
| "step": 55750 | |
| }, | |
| { | |
| "epoch": 25.79, | |
| "grad_norm": 0.45375123620033264, | |
| "learning_rate": 7.438781163434904e-06, | |
| "loss": 0.0013, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 25.79, | |
| "eval_loss": 0.2668148875236511, | |
| "eval_runtime": 675.9749, | |
| "eval_samples_per_second": 9.989, | |
| "eval_steps_per_second": 0.312, | |
| "eval_wer": 23.287779915504693, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 25.91, | |
| "grad_norm": 0.11875366419553757, | |
| "learning_rate": 7.427239150507849e-06, | |
| "loss": 0.0014, | |
| "step": 56250 | |
| }, | |
| { | |
| "epoch": 26.02, | |
| "grad_norm": 0.21367508172988892, | |
| "learning_rate": 7.415697137580795e-06, | |
| "loss": 0.0011, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 26.14, | |
| "grad_norm": 0.2883310317993164, | |
| "learning_rate": 7.4041551246537405e-06, | |
| "loss": 0.001, | |
| "step": 56750 | |
| }, | |
| { | |
| "epoch": 26.26, | |
| "grad_norm": 0.4850456118583679, | |
| "learning_rate": 7.392613111726686e-06, | |
| "loss": 0.001, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 26.26, | |
| "eval_loss": 0.26939988136291504, | |
| "eval_runtime": 668.5911, | |
| "eval_samples_per_second": 10.099, | |
| "eval_steps_per_second": 0.316, | |
| "eval_wer": 23.285796457544084, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 26.37, | |
| "grad_norm": 0.21186549961566925, | |
| "learning_rate": 7.381071098799632e-06, | |
| "loss": 0.0014, | |
| "step": 57250 | |
| }, | |
| { | |
| "epoch": 26.49, | |
| "grad_norm": 1.3765850067138672, | |
| "learning_rate": 7.369529085872577e-06, | |
| "loss": 0.0014, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 26.6, | |
| "grad_norm": 1.716868281364441, | |
| "learning_rate": 7.358033240997231e-06, | |
| "loss": 0.0012, | |
| "step": 57750 | |
| }, | |
| { | |
| "epoch": 26.72, | |
| "grad_norm": 1.3002432584762573, | |
| "learning_rate": 7.3464912280701765e-06, | |
| "loss": 0.0013, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 26.72, | |
| "eval_loss": 0.2650892734527588, | |
| "eval_runtime": 675.4132, | |
| "eval_samples_per_second": 9.997, | |
| "eval_steps_per_second": 0.312, | |
| "eval_wer": 23.279846083662257, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 26.83, | |
| "grad_norm": 0.3222731053829193, | |
| "learning_rate": 7.334949215143121e-06, | |
| "loss": 0.0013, | |
| "step": 58250 | |
| }, | |
| { | |
| "epoch": 26.95, | |
| "grad_norm": 0.376442015171051, | |
| "learning_rate": 7.323407202216068e-06, | |
| "loss": 0.0013, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 27.06, | |
| "grad_norm": 0.1525341123342514, | |
| "learning_rate": 7.311865189289013e-06, | |
| "loss": 0.001, | |
| "step": 58750 | |
| }, | |
| { | |
| "epoch": 27.18, | |
| "grad_norm": 0.3236662745475769, | |
| "learning_rate": 7.300323176361959e-06, | |
| "loss": 0.0009, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 27.18, | |
| "eval_loss": 0.26985055208206177, | |
| "eval_runtime": 680.6209, | |
| "eval_samples_per_second": 9.92, | |
| "eval_steps_per_second": 0.31, | |
| "eval_wer": 23.258028046095564, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 27.29, | |
| "grad_norm": 0.12151502072811127, | |
| "learning_rate": 7.288827331486612e-06, | |
| "loss": 0.0012, | |
| "step": 59250 | |
| }, | |
| { | |
| "epoch": 27.41, | |
| "grad_norm": 1.5825998783111572, | |
| "learning_rate": 7.277285318559557e-06, | |
| "loss": 0.0012, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 27.52, | |
| "grad_norm": 0.1484094262123108, | |
| "learning_rate": 7.265743305632504e-06, | |
| "loss": 0.0012, | |
| "step": 59750 | |
| }, | |
| { | |
| "epoch": 27.64, | |
| "grad_norm": 0.6981366872787476, | |
| "learning_rate": 7.254201292705448e-06, | |
| "loss": 0.001, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 27.64, | |
| "eval_loss": 0.2713184356689453, | |
| "eval_runtime": 670.5479, | |
| "eval_samples_per_second": 10.069, | |
| "eval_steps_per_second": 0.315, | |
| "eval_wer": 23.236210008528868, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 27.75, | |
| "grad_norm": 0.1501929610967636, | |
| "learning_rate": 7.242659279778393e-06, | |
| "loss": 0.0011, | |
| "step": 60250 | |
| }, | |
| { | |
| "epoch": 27.87, | |
| "grad_norm": 3.85675048828125, | |
| "learning_rate": 7.23111726685134e-06, | |
| "loss": 0.0014, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 27.98, | |
| "grad_norm": 0.3616078197956085, | |
| "learning_rate": 7.219575253924285e-06, | |
| "loss": 0.0013, | |
| "step": 60750 | |
| }, | |
| { | |
| "epoch": 28.1, | |
| "grad_norm": 0.20366336405277252, | |
| "learning_rate": 7.208033240997231e-06, | |
| "loss": 0.0011, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 28.1, | |
| "eval_loss": 0.2708372175693512, | |
| "eval_runtime": 673.0742, | |
| "eval_samples_per_second": 10.032, | |
| "eval_steps_per_second": 0.313, | |
| "eval_wer": 23.390919729456332, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 28.21, | |
| "grad_norm": 0.40390634536743164, | |
| "learning_rate": 7.196491228070176e-06, | |
| "loss": 0.001, | |
| "step": 61250 | |
| }, | |
| { | |
| "epoch": 28.33, | |
| "grad_norm": 1.1840142011642456, | |
| "learning_rate": 7.184949215143121e-06, | |
| "loss": 0.0009, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 28.44, | |
| "grad_norm": 1.5266140699386597, | |
| "learning_rate": 7.173407202216067e-06, | |
| "loss": 0.0009, | |
| "step": 61750 | |
| }, | |
| { | |
| "epoch": 28.56, | |
| "grad_norm": 0.1660241335630417, | |
| "learning_rate": 7.1618651892890125e-06, | |
| "loss": 0.0012, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 28.56, | |
| "eval_loss": 0.27276286482810974, | |
| "eval_runtime": 683.6949, | |
| "eval_samples_per_second": 9.876, | |
| "eval_steps_per_second": 0.309, | |
| "eval_wer": 22.956542436083065, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 28.67, | |
| "grad_norm": 1.5870364904403687, | |
| "learning_rate": 7.1503231763619585e-06, | |
| "loss": 0.0011, | |
| "step": 62250 | |
| }, | |
| { | |
| "epoch": 28.79, | |
| "grad_norm": 0.21234387159347534, | |
| "learning_rate": 7.138781163434904e-06, | |
| "loss": 0.0014, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 28.9, | |
| "grad_norm": 0.25426217913627625, | |
| "learning_rate": 7.127239150507849e-06, | |
| "loss": 0.0012, | |
| "step": 62750 | |
| }, | |
| { | |
| "epoch": 29.02, | |
| "grad_norm": 0.1948922723531723, | |
| "learning_rate": 7.115697137580795e-06, | |
| "loss": 0.0013, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 29.02, | |
| "eval_loss": 0.27328047156333923, | |
| "eval_runtime": 662.6991, | |
| "eval_samples_per_second": 10.189, | |
| "eval_steps_per_second": 0.318, | |
| "eval_wer": 22.79389888331317, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 29.13, | |
| "grad_norm": 0.1855873465538025, | |
| "learning_rate": 7.10415512465374e-06, | |
| "loss": 0.0008, | |
| "step": 63250 | |
| }, | |
| { | |
| "epoch": 29.25, | |
| "grad_norm": 1.3260753154754639, | |
| "learning_rate": 7.092613111726686e-06, | |
| "loss": 0.0007, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 29.36, | |
| "grad_norm": 0.13366416096687317, | |
| "learning_rate": 7.08111726685134e-06, | |
| "loss": 0.0009, | |
| "step": 63750 | |
| }, | |
| { | |
| "epoch": 29.48, | |
| "grad_norm": 0.8121051788330078, | |
| "learning_rate": 7.069575253924285e-06, | |
| "loss": 0.0009, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 29.48, | |
| "eval_loss": 0.272777795791626, | |
| "eval_runtime": 672.0892, | |
| "eval_samples_per_second": 10.046, | |
| "eval_steps_per_second": 0.314, | |
| "eval_wer": 22.861336453973855, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 29.59, | |
| "grad_norm": 0.20612682402133942, | |
| "learning_rate": 7.058033240997231e-06, | |
| "loss": 0.0011, | |
| "step": 64250 | |
| }, | |
| { | |
| "epoch": 29.71, | |
| "grad_norm": 0.37823590636253357, | |
| "learning_rate": 7.046491228070176e-06, | |
| "loss": 0.0011, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 29.82, | |
| "grad_norm": 2.2586910724639893, | |
| "learning_rate": 7.034949215143122e-06, | |
| "loss": 0.0011, | |
| "step": 64750 | |
| }, | |
| { | |
| "epoch": 29.94, | |
| "grad_norm": 0.2618952989578247, | |
| "learning_rate": 7.023407202216067e-06, | |
| "loss": 0.001, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 29.94, | |
| "eval_loss": 0.27243489027023315, | |
| "eval_runtime": 683.4018, | |
| "eval_samples_per_second": 9.88, | |
| "eval_steps_per_second": 0.309, | |
| "eval_wer": 22.87522065969812, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 30.06, | |
| "grad_norm": 0.1083679348230362, | |
| "learning_rate": 7.011865189289012e-06, | |
| "loss": 0.0009, | |
| "step": 65250 | |
| }, | |
| { | |
| "epoch": 30.17, | |
| "grad_norm": 0.12795807421207428, | |
| "learning_rate": 7.000323176361958e-06, | |
| "loss": 0.0006, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 30.29, | |
| "grad_norm": 1.4113242626190186, | |
| "learning_rate": 6.988781163434903e-06, | |
| "loss": 0.0005, | |
| "step": 65750 | |
| }, | |
| { | |
| "epoch": 30.4, | |
| "grad_norm": 0.4899054765701294, | |
| "learning_rate": 6.977285318559557e-06, | |
| "loss": 0.0009, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 30.4, | |
| "eval_loss": 0.2714119553565979, | |
| "eval_runtime": 679.0994, | |
| "eval_samples_per_second": 9.943, | |
| "eval_steps_per_second": 0.311, | |
| "eval_wer": 23.03588075450741, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 30.52, | |
| "grad_norm": 0.13397559523582458, | |
| "learning_rate": 6.965743305632503e-06, | |
| "loss": 0.0008, | |
| "step": 66250 | |
| }, | |
| { | |
| "epoch": 30.63, | |
| "grad_norm": 0.22414207458496094, | |
| "learning_rate": 6.954201292705448e-06, | |
| "loss": 0.0008, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 30.75, | |
| "grad_norm": 0.2454010248184204, | |
| "learning_rate": 6.942659279778394e-06, | |
| "loss": 0.0011, | |
| "step": 66750 | |
| }, | |
| { | |
| "epoch": 30.86, | |
| "grad_norm": 0.5344116687774658, | |
| "learning_rate": 6.9311172668513394e-06, | |
| "loss": 0.0014, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 30.86, | |
| "eval_loss": 0.27881717681884766, | |
| "eval_runtime": 673.8635, | |
| "eval_samples_per_second": 10.02, | |
| "eval_steps_per_second": 0.313, | |
| "eval_wer": 23.321498700835036, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 30.98, | |
| "grad_norm": 1.2090712785720825, | |
| "learning_rate": 6.919575253924285e-06, | |
| "loss": 0.0012, | |
| "step": 67250 | |
| }, | |
| { | |
| "epoch": 31.09, | |
| "grad_norm": 0.17009182274341583, | |
| "learning_rate": 6.908033240997231e-06, | |
| "loss": 0.0009, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 31.21, | |
| "grad_norm": 0.47179415822029114, | |
| "learning_rate": 6.896491228070176e-06, | |
| "loss": 0.0007, | |
| "step": 67750 | |
| }, | |
| { | |
| "epoch": 31.32, | |
| "grad_norm": 0.2590140402317047, | |
| "learning_rate": 6.884949215143122e-06, | |
| "loss": 0.0007, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 31.32, | |
| "eval_loss": 0.278424471616745, | |
| "eval_runtime": 660.7801, | |
| "eval_samples_per_second": 10.218, | |
| "eval_steps_per_second": 0.319, | |
| "eval_wer": 23.246127298331913, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 31.44, | |
| "grad_norm": 0.6639719009399414, | |
| "learning_rate": 6.873407202216067e-06, | |
| "loss": 0.0009, | |
| "step": 68250 | |
| }, | |
| { | |
| "epoch": 31.55, | |
| "grad_norm": 0.8088191151618958, | |
| "learning_rate": 6.861911357340721e-06, | |
| "loss": 0.001, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 31.67, | |
| "grad_norm": 0.9694509506225586, | |
| "learning_rate": 6.850369344413667e-06, | |
| "loss": 0.001, | |
| "step": 68750 | |
| }, | |
| { | |
| "epoch": 31.78, | |
| "grad_norm": 1.2024418115615845, | |
| "learning_rate": 6.838827331486612e-06, | |
| "loss": 0.0009, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 31.78, | |
| "eval_loss": 0.27510857582092285, | |
| "eval_runtime": 673.7472, | |
| "eval_samples_per_second": 10.022, | |
| "eval_steps_per_second": 0.313, | |
| "eval_wer": 23.137037110498444, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 31.9, | |
| "grad_norm": 0.2124684602022171, | |
| "learning_rate": 6.827285318559558e-06, | |
| "loss": 0.0012, | |
| "step": 69250 | |
| }, | |
| { | |
| "epoch": 32.01, | |
| "grad_norm": 0.1560162901878357, | |
| "learning_rate": 6.815743305632503e-06, | |
| "loss": 0.001, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 32.13, | |
| "grad_norm": 0.1794072687625885, | |
| "learning_rate": 6.804201292705448e-06, | |
| "loss": 0.0006, | |
| "step": 69750 | |
| }, | |
| { | |
| "epoch": 32.24, | |
| "grad_norm": 0.2194598764181137, | |
| "learning_rate": 6.792659279778394e-06, | |
| "loss": 0.0005, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 32.24, | |
| "eval_loss": 0.27563953399658203, | |
| "eval_runtime": 687.9789, | |
| "eval_samples_per_second": 9.814, | |
| "eval_steps_per_second": 0.307, | |
| "eval_wer": 22.787948509431345, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 32.36, | |
| "grad_norm": 0.11972519010305405, | |
| "learning_rate": 6.781117266851339e-06, | |
| "loss": 0.0007, | |
| "step": 70250 | |
| }, | |
| { | |
| "epoch": 32.47, | |
| "grad_norm": 1.2118364572525024, | |
| "learning_rate": 6.769575253924285e-06, | |
| "loss": 0.0008, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 32.59, | |
| "grad_norm": 0.159651979804039, | |
| "learning_rate": 6.75803324099723e-06, | |
| "loss": 0.0009, | |
| "step": 70750 | |
| }, | |
| { | |
| "epoch": 32.7, | |
| "grad_norm": 1.5151838064193726, | |
| "learning_rate": 6.7464912280701755e-06, | |
| "loss": 0.0009, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 32.7, | |
| "eval_loss": 0.27915722131729126, | |
| "eval_runtime": 689.7748, | |
| "eval_samples_per_second": 9.789, | |
| "eval_steps_per_second": 0.306, | |
| "eval_wer": 22.797865799234383, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 32.82, | |
| "grad_norm": 0.10878114402294159, | |
| "learning_rate": 6.73499538319483e-06, | |
| "loss": 0.0008, | |
| "step": 71250 | |
| }, | |
| { | |
| "epoch": 32.93, | |
| "grad_norm": 0.13962584733963013, | |
| "learning_rate": 6.723453370267775e-06, | |
| "loss": 0.001, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 33.05, | |
| "grad_norm": 2.416551113128662, | |
| "learning_rate": 6.711911357340721e-06, | |
| "loss": 0.0009, | |
| "step": 71750 | |
| }, | |
| { | |
| "epoch": 33.16, | |
| "grad_norm": 0.14477728307247162, | |
| "learning_rate": 6.700369344413666e-06, | |
| "loss": 0.0007, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 33.16, | |
| "eval_loss": 0.2731185853481293, | |
| "eval_runtime": 673.8757, | |
| "eval_samples_per_second": 10.02, | |
| "eval_steps_per_second": 0.313, | |
| "eval_wer": 23.30364757918956, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 33.28, | |
| "grad_norm": 0.0625206008553505, | |
| "learning_rate": 6.6888273314866115e-06, | |
| "loss": 0.0007, | |
| "step": 72250 | |
| }, | |
| { | |
| "epoch": 33.39, | |
| "grad_norm": 0.1424214392900467, | |
| "learning_rate": 6.6772853185595575e-06, | |
| "loss": 0.0008, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 33.51, | |
| "grad_norm": 0.3345101773738861, | |
| "learning_rate": 6.665743305632503e-06, | |
| "loss": 0.0008, | |
| "step": 72750 | |
| }, | |
| { | |
| "epoch": 33.63, | |
| "grad_norm": 1.2112958431243896, | |
| "learning_rate": 6.654201292705449e-06, | |
| "loss": 0.0009, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 33.63, | |
| "eval_loss": 0.2806909680366516, | |
| "eval_runtime": 699.4048, | |
| "eval_samples_per_second": 9.654, | |
| "eval_steps_per_second": 0.302, | |
| "eval_wer": 22.60348691909475, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 33.74, | |
| "grad_norm": 0.6432926058769226, | |
| "learning_rate": 6.642659279778394e-06, | |
| "loss": 0.001, | |
| "step": 73250 | |
| }, | |
| { | |
| "epoch": 33.86, | |
| "grad_norm": 0.5472640991210938, | |
| "learning_rate": 6.6311634349030475e-06, | |
| "loss": 0.0008, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 33.97, | |
| "grad_norm": 0.05136106163263321, | |
| "learning_rate": 6.6196214219759935e-06, | |
| "loss": 0.0008, | |
| "step": 73750 | |
| }, | |
| { | |
| "epoch": 34.09, | |
| "grad_norm": 0.16334278881549835, | |
| "learning_rate": 6.608079409048939e-06, | |
| "loss": 0.0008, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 34.09, | |
| "eval_loss": 0.2772423326969147, | |
| "eval_runtime": 671.9826, | |
| "eval_samples_per_second": 10.048, | |
| "eval_steps_per_second": 0.314, | |
| "eval_wer": 22.41307495487633, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 34.2, | |
| "grad_norm": 0.17405687272548676, | |
| "learning_rate": 6.596537396121884e-06, | |
| "loss": 0.0008, | |
| "step": 74250 | |
| }, | |
| { | |
| "epoch": 34.32, | |
| "grad_norm": 1.0651663541793823, | |
| "learning_rate": 6.58499538319483e-06, | |
| "loss": 0.0008, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 34.43, | |
| "grad_norm": 0.22232329845428467, | |
| "learning_rate": 6.573453370267775e-06, | |
| "loss": 0.0007, | |
| "step": 74750 | |
| }, | |
| { | |
| "epoch": 34.55, | |
| "grad_norm": 0.10098864883184433, | |
| "learning_rate": 6.561911357340721e-06, | |
| "loss": 0.0007, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 34.55, | |
| "eval_loss": 0.2794438600540161, | |
| "eval_runtime": 681.119, | |
| "eval_samples_per_second": 9.913, | |
| "eval_steps_per_second": 0.31, | |
| "eval_wer": 22.53604934843406, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 34.66, | |
| "grad_norm": 0.09176724404096603, | |
| "learning_rate": 6.550369344413666e-06, | |
| "loss": 0.0008, | |
| "step": 75250 | |
| }, | |
| { | |
| "epoch": 34.78, | |
| "grad_norm": 0.3179700970649719, | |
| "learning_rate": 6.538827331486611e-06, | |
| "loss": 0.0008, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 34.89, | |
| "grad_norm": 0.38459789752960205, | |
| "learning_rate": 6.527285318559557e-06, | |
| "loss": 0.0008, | |
| "step": 75750 | |
| }, | |
| { | |
| "epoch": 35.01, | |
| "grad_norm": 0.2603273391723633, | |
| "learning_rate": 6.515743305632502e-06, | |
| "loss": 0.0008, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 35.01, | |
| "eval_loss": 0.2777673900127411, | |
| "eval_runtime": 692.7589, | |
| "eval_samples_per_second": 9.747, | |
| "eval_steps_per_second": 0.305, | |
| "eval_wer": 22.811750004958643, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 35.12, | |
| "grad_norm": 0.051916543394327164, | |
| "learning_rate": 6.504247460757157e-06, | |
| "loss": 0.0007, | |
| "step": 76250 | |
| }, | |
| { | |
| "epoch": 35.24, | |
| "grad_norm": 0.30884623527526855, | |
| "learning_rate": 6.492705447830102e-06, | |
| "loss": 0.0006, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 35.35, | |
| "grad_norm": 0.1257990300655365, | |
| "learning_rate": 6.481163434903047e-06, | |
| "loss": 0.0007, | |
| "step": 76750 | |
| }, | |
| { | |
| "epoch": 35.47, | |
| "grad_norm": 0.08370446413755417, | |
| "learning_rate": 6.469621421975993e-06, | |
| "loss": 0.0008, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 35.47, | |
| "eval_loss": 0.2764694094657898, | |
| "eval_runtime": 692.6631, | |
| "eval_samples_per_second": 9.748, | |
| "eval_steps_per_second": 0.305, | |
| "eval_wer": 22.82960112660412, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 35.58, | |
| "grad_norm": 0.16313733160495758, | |
| "learning_rate": 6.458079409048938e-06, | |
| "loss": 0.0007, | |
| "step": 77250 | |
| }, | |
| { | |
| "epoch": 35.7, | |
| "grad_norm": 1.0557291507720947, | |
| "learning_rate": 6.446537396121884e-06, | |
| "loss": 0.0007, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 35.81, | |
| "grad_norm": 0.2264009267091751, | |
| "learning_rate": 6.4349953831948295e-06, | |
| "loss": 0.0008, | |
| "step": 77750 | |
| }, | |
| { | |
| "epoch": 35.93, | |
| "grad_norm": 0.2705702781677246, | |
| "learning_rate": 6.423453370267775e-06, | |
| "loss": 0.0009, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 35.93, | |
| "eval_loss": 0.27600711584091187, | |
| "eval_runtime": 689.6167, | |
| "eval_samples_per_second": 9.791, | |
| "eval_steps_per_second": 0.306, | |
| "eval_wer": 22.551917012118928, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 36.04, | |
| "grad_norm": 0.2169518917798996, | |
| "learning_rate": 6.411911357340721e-06, | |
| "loss": 0.0006, | |
| "step": 78250 | |
| }, | |
| { | |
| "epoch": 36.16, | |
| "grad_norm": 0.19748559594154358, | |
| "learning_rate": 6.400415512465374e-06, | |
| "loss": 0.0006, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 36.27, | |
| "grad_norm": 1.7767668962478638, | |
| "learning_rate": 6.3888734995383196e-06, | |
| "loss": 0.0006, | |
| "step": 78750 | |
| }, | |
| { | |
| "epoch": 36.39, | |
| "grad_norm": 0.2516990303993225, | |
| "learning_rate": 6.3773314866112655e-06, | |
| "loss": 0.0005, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 36.39, | |
| "eval_loss": 0.2752860188484192, | |
| "eval_runtime": 664.3231, | |
| "eval_samples_per_second": 10.164, | |
| "eval_steps_per_second": 0.318, | |
| "eval_wer": 22.64315607830692, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 36.5, | |
| "grad_norm": 0.05742982402443886, | |
| "learning_rate": 6.365789473684211e-06, | |
| "loss": 0.0005, | |
| "step": 79250 | |
| }, | |
| { | |
| "epoch": 36.62, | |
| "grad_norm": 5.542628765106201, | |
| "learning_rate": 6.354247460757157e-06, | |
| "loss": 0.0009, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 36.73, | |
| "grad_norm": 0.12612101435661316, | |
| "learning_rate": 6.342705447830102e-06, | |
| "loss": 0.0009, | |
| "step": 79750 | |
| }, | |
| { | |
| "epoch": 36.85, | |
| "grad_norm": 1.6482515335083008, | |
| "learning_rate": 6.331163434903047e-06, | |
| "loss": 0.0007, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 36.85, | |
| "eval_loss": 0.2798755466938019, | |
| "eval_runtime": 679.0069, | |
| "eval_samples_per_second": 9.944, | |
| "eval_steps_per_second": 0.311, | |
| "eval_wer": 22.450760656127894, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 36.96, | |
| "grad_norm": 0.1331368237733841, | |
| "learning_rate": 6.319621421975993e-06, | |
| "loss": 0.0007, | |
| "step": 80250 | |
| }, | |
| { | |
| "epoch": 37.08, | |
| "grad_norm": 0.097502700984478, | |
| "learning_rate": 6.308079409048938e-06, | |
| "loss": 0.0006, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 37.19, | |
| "grad_norm": 0.05282368138432503, | |
| "learning_rate": 6.296537396121884e-06, | |
| "loss": 0.0005, | |
| "step": 80750 | |
| }, | |
| { | |
| "epoch": 37.31, | |
| "grad_norm": 0.7441471815109253, | |
| "learning_rate": 6.284995383194829e-06, | |
| "loss": 0.0006, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 37.31, | |
| "eval_loss": 0.2776803970336914, | |
| "eval_runtime": 685.1247, | |
| "eval_samples_per_second": 9.855, | |
| "eval_steps_per_second": 0.308, | |
| "eval_wer": 22.208778784933653, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 37.43, | |
| "grad_norm": 0.12682919204235077, | |
| "learning_rate": 6.273499538319483e-06, | |
| "loss": 0.0006, | |
| "step": 81250 | |
| }, | |
| { | |
| "epoch": 37.54, | |
| "grad_norm": 0.14379066228866577, | |
| "learning_rate": 6.261957525392429e-06, | |
| "loss": 0.0006, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 37.66, | |
| "grad_norm": 0.23371708393096924, | |
| "learning_rate": 6.250415512465374e-06, | |
| "loss": 0.0006, | |
| "step": 81750 | |
| }, | |
| { | |
| "epoch": 37.77, | |
| "grad_norm": 0.21299830079078674, | |
| "learning_rate": 6.238919667590029e-06, | |
| "loss": 0.0008, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 37.77, | |
| "eval_loss": 0.27769771218299866, | |
| "eval_runtime": 686.8168, | |
| "eval_samples_per_second": 9.831, | |
| "eval_steps_per_second": 0.307, | |
| "eval_wer": 22.746295892258566, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 37.89, | |
| "grad_norm": 0.1677282303571701, | |
| "learning_rate": 6.227377654662974e-06, | |
| "loss": 0.0008, | |
| "step": 82250 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "grad_norm": 1.1451334953308105, | |
| "learning_rate": 6.215835641735919e-06, | |
| "loss": 0.0009, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 38.12, | |
| "grad_norm": 0.19697508215904236, | |
| "learning_rate": 6.204293628808865e-06, | |
| "loss": 0.0006, | |
| "step": 82750 | |
| }, | |
| { | |
| "epoch": 38.23, | |
| "grad_norm": 0.07527792453765869, | |
| "learning_rate": 6.19275161588181e-06, | |
| "loss": 0.0007, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 38.23, | |
| "eval_loss": 0.2804949879646301, | |
| "eval_runtime": 675.5771, | |
| "eval_samples_per_second": 9.994, | |
| "eval_steps_per_second": 0.312, | |
| "eval_wer": 22.71257710692822, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 38.35, | |
| "grad_norm": 1.9222028255462646, | |
| "learning_rate": 6.181209602954756e-06, | |
| "loss": 0.0007, | |
| "step": 83250 | |
| }, | |
| { | |
| "epoch": 38.46, | |
| "grad_norm": 0.09556487202644348, | |
| "learning_rate": 6.169667590027701e-06, | |
| "loss": 0.0007, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 38.58, | |
| "grad_norm": 0.21437525749206543, | |
| "learning_rate": 6.1581255771006465e-06, | |
| "loss": 0.0006, | |
| "step": 83750 | |
| }, | |
| { | |
| "epoch": 38.69, | |
| "grad_norm": 0.1807592660188675, | |
| "learning_rate": 6.1465835641735925e-06, | |
| "loss": 0.0007, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 38.69, | |
| "eval_loss": 0.2847980260848999, | |
| "eval_runtime": 672.9632, | |
| "eval_samples_per_second": 10.033, | |
| "eval_steps_per_second": 0.314, | |
| "eval_wer": 22.424975702639983, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 38.81, | |
| "grad_norm": 0.16911369562149048, | |
| "learning_rate": 6.135041551246538e-06, | |
| "loss": 0.0007, | |
| "step": 84250 | |
| }, | |
| { | |
| "epoch": 38.92, | |
| "grad_norm": 1.3031611442565918, | |
| "learning_rate": 6.123545706371191e-06, | |
| "loss": 0.0008, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 39.04, | |
| "grad_norm": 0.04561692103743553, | |
| "learning_rate": 6.112003693444137e-06, | |
| "loss": 0.0006, | |
| "step": 84750 | |
| }, | |
| { | |
| "epoch": 39.15, | |
| "grad_norm": 0.061062462627887726, | |
| "learning_rate": 6.1004616805170825e-06, | |
| "loss": 0.0003, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 39.15, | |
| "eval_loss": 0.2790899872779846, | |
| "eval_runtime": 674.5949, | |
| "eval_samples_per_second": 10.009, | |
| "eval_steps_per_second": 0.313, | |
| "eval_wer": 22.01439990479402, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 39.27, | |
| "grad_norm": 0.07837537676095963, | |
| "learning_rate": 6.0889196675900285e-06, | |
| "loss": 0.0004, | |
| "step": 85250 | |
| }, | |
| { | |
| "epoch": 39.38, | |
| "grad_norm": 0.05536266788840294, | |
| "learning_rate": 6.077377654662974e-06, | |
| "loss": 0.0006, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 39.5, | |
| "grad_norm": 5.097941875457764, | |
| "learning_rate": 6.065835641735919e-06, | |
| "loss": 0.0009, | |
| "step": 85750 | |
| }, | |
| { | |
| "epoch": 39.61, | |
| "grad_norm": 0.07944060117006302, | |
| "learning_rate": 6.054293628808865e-06, | |
| "loss": 0.0006, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 39.61, | |
| "eval_loss": 0.2777423858642578, | |
| "eval_runtime": 687.4077, | |
| "eval_samples_per_second": 9.822, | |
| "eval_steps_per_second": 0.307, | |
| "eval_wer": 22.262332149870083, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 39.73, | |
| "grad_norm": 1.522270917892456, | |
| "learning_rate": 6.04275161588181e-06, | |
| "loss": 0.0006, | |
| "step": 86250 | |
| }, | |
| { | |
| "epoch": 39.84, | |
| "grad_norm": 0.05595465004444122, | |
| "learning_rate": 6.031209602954756e-06, | |
| "loss": 0.0006, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 39.96, | |
| "grad_norm": 0.5327405333518982, | |
| "learning_rate": 6.019667590027701e-06, | |
| "loss": 0.0005, | |
| "step": 86750 | |
| }, | |
| { | |
| "epoch": 40.07, | |
| "grad_norm": 0.18009261786937714, | |
| "learning_rate": 6.008125577100646e-06, | |
| "loss": 0.0003, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 40.07, | |
| "eval_loss": 0.2798568308353424, | |
| "eval_runtime": 666.6019, | |
| "eval_samples_per_second": 10.129, | |
| "eval_steps_per_second": 0.317, | |
| "eval_wer": 22.0798540174941, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 40.19, | |
| "grad_norm": 0.15836778283119202, | |
| "learning_rate": 5.996583564173592e-06, | |
| "loss": 0.0005, | |
| "step": 87250 | |
| }, | |
| { | |
| "epoch": 40.3, | |
| "grad_norm": 0.06244779750704765, | |
| "learning_rate": 5.985041551246537e-06, | |
| "loss": 0.0005, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 40.42, | |
| "grad_norm": 0.47360849380493164, | |
| "learning_rate": 5.973499538319484e-06, | |
| "loss": 0.0005, | |
| "step": 87750 | |
| }, | |
| { | |
| "epoch": 40.53, | |
| "grad_norm": 0.12535277009010315, | |
| "learning_rate": 5.9619575253924285e-06, | |
| "loss": 0.0005, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 40.53, | |
| "eval_loss": 0.2800135612487793, | |
| "eval_runtime": 683.9129, | |
| "eval_samples_per_second": 9.873, | |
| "eval_steps_per_second": 0.309, | |
| "eval_wer": 22.23059682250035, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 40.65, | |
| "grad_norm": 0.06063379347324371, | |
| "learning_rate": 5.950415512465374e-06, | |
| "loss": 0.0007, | |
| "step": 88250 | |
| }, | |
| { | |
| "epoch": 40.76, | |
| "grad_norm": 0.14233584702014923, | |
| "learning_rate": 5.9388734995383205e-06, | |
| "loss": 0.0005, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 40.88, | |
| "grad_norm": 2.8523402214050293, | |
| "learning_rate": 5.927331486611266e-06, | |
| "loss": 0.0006, | |
| "step": 88750 | |
| }, | |
| { | |
| "epoch": 40.99, | |
| "grad_norm": 0.20620940625667572, | |
| "learning_rate": 5.915789473684212e-06, | |
| "loss": 0.0007, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 40.99, | |
| "eval_loss": 0.2811349332332611, | |
| "eval_runtime": 676.2894, | |
| "eval_samples_per_second": 9.984, | |
| "eval_steps_per_second": 0.312, | |
| "eval_wer": 22.298034393161036, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 41.11, | |
| "grad_norm": 0.5996530652046204, | |
| "learning_rate": 5.904247460757157e-06, | |
| "loss": 0.0004, | |
| "step": 89250 | |
| }, | |
| { | |
| "epoch": 41.23, | |
| "grad_norm": 0.19429056346416473, | |
| "learning_rate": 5.892705447830102e-06, | |
| "loss": 0.0004, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 41.34, | |
| "grad_norm": 0.07230094075202942, | |
| "learning_rate": 5.881163434903048e-06, | |
| "loss": 0.0005, | |
| "step": 89750 | |
| }, | |
| { | |
| "epoch": 41.46, | |
| "grad_norm": 1.4591439962387085, | |
| "learning_rate": 5.869621421975993e-06, | |
| "loss": 0.0004, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 41.46, | |
| "eval_loss": 0.2819642424583435, | |
| "eval_runtime": 679.526, | |
| "eval_samples_per_second": 9.936, | |
| "eval_steps_per_second": 0.311, | |
| "eval_wer": 22.71257710692822, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 41.57, | |
| "grad_norm": 0.07382282614707947, | |
| "learning_rate": 5.858079409048939e-06, | |
| "loss": 0.0005, | |
| "step": 90250 | |
| }, | |
| { | |
| "epoch": 41.69, | |
| "grad_norm": 0.9907983541488647, | |
| "learning_rate": 5.846583564173593e-06, | |
| "loss": 0.0006, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 41.8, | |
| "grad_norm": 0.11259205639362335, | |
| "learning_rate": 5.835041551246537e-06, | |
| "loss": 0.0006, | |
| "step": 90750 | |
| }, | |
| { | |
| "epoch": 41.92, | |
| "grad_norm": 0.06520923972129822, | |
| "learning_rate": 5.823499538319484e-06, | |
| "loss": 0.0006, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 41.92, | |
| "eval_loss": 0.28300294280052185, | |
| "eval_runtime": 684.3308, | |
| "eval_samples_per_second": 9.867, | |
| "eval_steps_per_second": 0.308, | |
| "eval_wer": 21.95886308189698, | |
| "step": 91000 | |
| } | |
| ], | |
| "logging_steps": 250, | |
| "max_steps": 217100, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 100, | |
| "save_steps": 1000, | |
| "total_flos": 1.4334909633589248e+20, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |