| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 49.99782451051487, | |
| "global_step": 17200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.7125e-06, | |
| "loss": 12.2762, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 7.4625e-06, | |
| "loss": 6.2128, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.1212499999999998e-05, | |
| "loss": 4.138, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 1.49625e-05, | |
| "loss": 3.6755, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 1.8712499999999997e-05, | |
| "loss": 3.3332, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "eval_loss": 3.3030903339385986, | |
| "eval_runtime": 129.9278, | |
| "eval_samples_per_second": 35.558, | |
| "eval_steps_per_second": 4.449, | |
| "eval_wer": 1.0, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.2462499999999997e-05, | |
| "loss": 3.1386, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 2.6212499999999997e-05, | |
| "loss": 3.0898, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 2.99625e-05, | |
| "loss": 3.0216, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 3.37125e-05, | |
| "loss": 2.9594, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 3.7462499999999996e-05, | |
| "loss": 2.9272, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "eval_loss": 2.9353110790252686, | |
| "eval_runtime": 125.4064, | |
| "eval_samples_per_second": 36.84, | |
| "eval_steps_per_second": 4.609, | |
| "eval_wer": 0.9970017744600135, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 4.12125e-05, | |
| "loss": 2.914, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "learning_rate": 4.4962499999999995e-05, | |
| "loss": 2.8467, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 4.871249999999999e-05, | |
| "loss": 2.6884, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 5.2462499999999994e-05, | |
| "loss": 2.3644, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 5.62125e-05, | |
| "loss": 2.0736, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "eval_loss": 1.1564792394638062, | |
| "eval_runtime": 127.7559, | |
| "eval_samples_per_second": 36.163, | |
| "eval_steps_per_second": 4.524, | |
| "eval_wer": 0.8714128373003732, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 5.9962499999999994e-05, | |
| "loss": 1.9481, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "learning_rate": 6.37125e-05, | |
| "loss": 1.878, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "learning_rate": 6.746249999999999e-05, | |
| "loss": 1.8085, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "learning_rate": 7.121249999999999e-05, | |
| "loss": 1.7549, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "learning_rate": 7.49625e-05, | |
| "loss": 1.7339, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "eval_loss": 0.7155577540397644, | |
| "eval_runtime": 125.3514, | |
| "eval_samples_per_second": 36.856, | |
| "eval_steps_per_second": 4.611, | |
| "eval_wer": 0.6687572661078137, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 6.1, | |
| "learning_rate": 7.451151315789474e-05, | |
| "loss": 1.696, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 6.39, | |
| "learning_rate": 7.402302631578947e-05, | |
| "loss": 1.6408, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "learning_rate": 7.352960526315788e-05, | |
| "loss": 1.6268, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 6.97, | |
| "learning_rate": 7.304111842105262e-05, | |
| "loss": 1.5878, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 7.27, | |
| "learning_rate": 7.254769736842105e-05, | |
| "loss": 1.5989, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 7.27, | |
| "eval_loss": 0.5790680646896362, | |
| "eval_runtime": 125.9295, | |
| "eval_samples_per_second": 36.687, | |
| "eval_steps_per_second": 4.59, | |
| "eval_wer": 0.5519182524628281, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 7.56, | |
| "learning_rate": 7.205427631578947e-05, | |
| "loss": 1.5733, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 7.85, | |
| "learning_rate": 7.156085526315789e-05, | |
| "loss": 1.5318, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 8.14, | |
| "learning_rate": 7.106743421052632e-05, | |
| "loss": 1.5481, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 8.43, | |
| "learning_rate": 7.057401315789473e-05, | |
| "loss": 1.5188, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 8.72, | |
| "learning_rate": 7.008059210526316e-05, | |
| "loss": 1.4916, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 8.72, | |
| "eval_loss": 0.5038185119628906, | |
| "eval_runtime": 126.4527, | |
| "eval_samples_per_second": 36.535, | |
| "eval_steps_per_second": 4.571, | |
| "eval_wer": 0.5168879642660467, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 9.01, | |
| "learning_rate": 6.958717105263157e-05, | |
| "loss": 1.507, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 9.3, | |
| "learning_rate": 6.909375e-05, | |
| "loss": 1.4638, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 9.59, | |
| "learning_rate": 6.860032894736842e-05, | |
| "loss": 1.4799, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 9.88, | |
| "learning_rate": 6.810690789473684e-05, | |
| "loss": 1.4729, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 10.17, | |
| "learning_rate": 6.761842105263158e-05, | |
| "loss": 1.4562, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 10.17, | |
| "eval_loss": 0.4860605299472809, | |
| "eval_runtime": 126.424, | |
| "eval_samples_per_second": 36.544, | |
| "eval_steps_per_second": 4.572, | |
| "eval_wer": 0.48048093985192436, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 10.46, | |
| "learning_rate": 6.712499999999999e-05, | |
| "loss": 1.4289, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 10.75, | |
| "learning_rate": 6.663157894736842e-05, | |
| "loss": 1.4292, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 11.05, | |
| "learning_rate": 6.613815789473683e-05, | |
| "loss": 1.4459, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 11.34, | |
| "learning_rate": 6.564473684210526e-05, | |
| "loss": 1.4098, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 11.63, | |
| "learning_rate": 6.515131578947369e-05, | |
| "loss": 1.3893, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 11.63, | |
| "eval_loss": 0.45840778946876526, | |
| "eval_runtime": 124.6037, | |
| "eval_samples_per_second": 37.078, | |
| "eval_steps_per_second": 4.639, | |
| "eval_wer": 0.4761365722327602, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 11.92, | |
| "learning_rate": 6.46578947368421e-05, | |
| "loss": 1.4025, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 12.21, | |
| "learning_rate": 6.416447368421053e-05, | |
| "loss": 1.3738, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "learning_rate": 6.367105263157894e-05, | |
| "loss": 1.3743, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 12.79, | |
| "learning_rate": 6.317763157894737e-05, | |
| "loss": 1.3751, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 13.08, | |
| "learning_rate": 6.268421052631578e-05, | |
| "loss": 1.3797, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 13.08, | |
| "eval_loss": 0.4297567903995514, | |
| "eval_runtime": 127.5647, | |
| "eval_samples_per_second": 36.217, | |
| "eval_steps_per_second": 4.531, | |
| "eval_wer": 0.46861041424463074, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 13.37, | |
| "learning_rate": 6.21907894736842e-05, | |
| "loss": 1.3529, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 13.66, | |
| "learning_rate": 6.169736842105262e-05, | |
| "loss": 1.3587, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 13.95, | |
| "learning_rate": 6.120394736842105e-05, | |
| "loss": 1.3445, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 14.24, | |
| "learning_rate": 6.0710526315789474e-05, | |
| "loss": 1.339, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 14.53, | |
| "learning_rate": 6.021710526315789e-05, | |
| "loss": 1.3508, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 14.53, | |
| "eval_loss": 0.4138409197330475, | |
| "eval_runtime": 126.3715, | |
| "eval_samples_per_second": 36.559, | |
| "eval_steps_per_second": 4.574, | |
| "eval_wer": 0.37438046870219666, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 14.82, | |
| "learning_rate": 5.972368421052631e-05, | |
| "loss": 1.3324, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 15.12, | |
| "learning_rate": 5.9230263157894734e-05, | |
| "loss": 1.3305, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 15.41, | |
| "learning_rate": 5.873684210526315e-05, | |
| "loss": 1.323, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 15.7, | |
| "learning_rate": 5.8243421052631574e-05, | |
| "loss": 1.3168, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 15.99, | |
| "learning_rate": 5.7749999999999994e-05, | |
| "loss": 1.3165, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 15.99, | |
| "eval_loss": 0.4014705717563629, | |
| "eval_runtime": 126.7978, | |
| "eval_samples_per_second": 36.436, | |
| "eval_steps_per_second": 4.558, | |
| "eval_wer": 0.3577984458177813, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 16.28, | |
| "learning_rate": 5.725657894736842e-05, | |
| "loss": 1.3181, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 16.57, | |
| "learning_rate": 5.6763157894736834e-05, | |
| "loss": 1.3059, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 16.86, | |
| "learning_rate": 5.626973684210526e-05, | |
| "loss": 1.2762, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 17.15, | |
| "learning_rate": 5.577631578947368e-05, | |
| "loss": 1.3141, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 17.44, | |
| "learning_rate": 5.52828947368421e-05, | |
| "loss": 1.281, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 17.44, | |
| "eval_loss": 0.3883207142353058, | |
| "eval_runtime": 127.3552, | |
| "eval_samples_per_second": 36.276, | |
| "eval_steps_per_second": 4.538, | |
| "eval_wer": 0.3472434681515022, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 17.73, | |
| "learning_rate": 5.478947368421052e-05, | |
| "loss": 1.28, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 18.02, | |
| "learning_rate": 5.429605263157895e-05, | |
| "loss": 1.2915, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 18.31, | |
| "learning_rate": 5.380263157894736e-05, | |
| "loss": 1.275, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 18.6, | |
| "learning_rate": 5.330921052631578e-05, | |
| "loss": 1.2683, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 18.89, | |
| "learning_rate": 5.281578947368421e-05, | |
| "loss": 1.2682, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 18.89, | |
| "eval_loss": 0.3903850018978119, | |
| "eval_runtime": 126.1551, | |
| "eval_samples_per_second": 36.622, | |
| "eval_steps_per_second": 4.582, | |
| "eval_wer": 0.3433886067429481, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 19.19, | |
| "learning_rate": 5.232236842105262e-05, | |
| "loss": 1.2515, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 19.48, | |
| "learning_rate": 5.182894736842105e-05, | |
| "loss": 1.267, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 19.77, | |
| "learning_rate": 5.133552631578947e-05, | |
| "loss": 1.2424, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 20.06, | |
| "learning_rate": 5.084210526315789e-05, | |
| "loss": 1.2384, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 20.35, | |
| "learning_rate": 5.0353618421052625e-05, | |
| "loss": 1.2477, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 20.35, | |
| "eval_loss": 0.37260007858276367, | |
| "eval_runtime": 128.6776, | |
| "eval_samples_per_second": 35.904, | |
| "eval_steps_per_second": 4.492, | |
| "eval_wer": 0.3320687756225907, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 20.64, | |
| "learning_rate": 4.9860197368421045e-05, | |
| "loss": 1.2452, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 20.93, | |
| "learning_rate": 4.936677631578947e-05, | |
| "loss": 1.2384, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 21.22, | |
| "learning_rate": 4.8873355263157886e-05, | |
| "loss": 1.244, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 21.51, | |
| "learning_rate": 4.837993421052631e-05, | |
| "loss": 1.2226, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 21.8, | |
| "learning_rate": 4.788651315789473e-05, | |
| "loss": 1.2364, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 21.8, | |
| "eval_loss": 0.368549108505249, | |
| "eval_runtime": 126.3453, | |
| "eval_samples_per_second": 36.566, | |
| "eval_steps_per_second": 4.575, | |
| "eval_wer": 0.3280915376613841, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 22.09, | |
| "learning_rate": 4.739309210526315e-05, | |
| "loss": 1.2456, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 22.38, | |
| "learning_rate": 4.689967105263157e-05, | |
| "loss": 1.2106, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 22.67, | |
| "learning_rate": 4.640625e-05, | |
| "loss": 1.2171, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 22.96, | |
| "learning_rate": 4.591282894736841e-05, | |
| "loss": 1.2253, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 23.26, | |
| "learning_rate": 4.541940789473684e-05, | |
| "loss": 1.2041, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 23.26, | |
| "eval_loss": 0.35974061489105225, | |
| "eval_runtime": 125.2529, | |
| "eval_samples_per_second": 36.885, | |
| "eval_steps_per_second": 4.615, | |
| "eval_wer": 0.31940280242305574, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 23.55, | |
| "learning_rate": 4.492598684210526e-05, | |
| "loss": 1.21, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 23.84, | |
| "learning_rate": 4.443256578947369e-05, | |
| "loss": 1.1871, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 24.13, | |
| "learning_rate": 4.39391447368421e-05, | |
| "loss": 1.2241, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 24.42, | |
| "learning_rate": 4.344572368421052e-05, | |
| "loss": 1.2148, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 24.71, | |
| "learning_rate": 4.295230263157895e-05, | |
| "loss": 1.1901, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 24.71, | |
| "eval_loss": 0.3542426526546478, | |
| "eval_runtime": 126.861, | |
| "eval_samples_per_second": 36.418, | |
| "eval_steps_per_second": 4.556, | |
| "eval_wer": 0.32029003242978643, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "learning_rate": 4.245888157894736e-05, | |
| "loss": 1.2091, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 25.29, | |
| "learning_rate": 4.196546052631579e-05, | |
| "loss": 1.2028, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 25.58, | |
| "learning_rate": 4.147203947368421e-05, | |
| "loss": 1.1999, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 25.87, | |
| "learning_rate": 4.097861842105263e-05, | |
| "loss": 1.165, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 26.16, | |
| "learning_rate": 4.048519736842105e-05, | |
| "loss": 1.1903, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 26.16, | |
| "eval_loss": 0.3500427007675171, | |
| "eval_runtime": 126.1461, | |
| "eval_samples_per_second": 36.624, | |
| "eval_steps_per_second": 4.582, | |
| "eval_wer": 0.3137734810010402, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 26.45, | |
| "learning_rate": 3.9991776315789475e-05, | |
| "loss": 1.1652, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 26.74, | |
| "learning_rate": 3.949835526315789e-05, | |
| "loss": 1.1751, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 27.03, | |
| "learning_rate": 3.9004934210526315e-05, | |
| "loss": 1.1955, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 27.32, | |
| "learning_rate": 3.851644736842105e-05, | |
| "loss": 1.16, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 27.61, | |
| "learning_rate": 3.802302631578947e-05, | |
| "loss": 1.1677, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 27.61, | |
| "eval_loss": 0.34578773379325867, | |
| "eval_runtime": 126.776, | |
| "eval_samples_per_second": 36.442, | |
| "eval_steps_per_second": 4.559, | |
| "eval_wer": 0.30673682922352075, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 27.91, | |
| "learning_rate": 3.752960526315789e-05, | |
| "loss": 1.142, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 28.2, | |
| "learning_rate": 3.703618421052631e-05, | |
| "loss": 1.1598, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 28.49, | |
| "learning_rate": 3.654276315789473e-05, | |
| "loss": 1.1494, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 28.78, | |
| "learning_rate": 3.604934210526316e-05, | |
| "loss": 1.1524, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 29.07, | |
| "learning_rate": 3.555592105263158e-05, | |
| "loss": 1.1718, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 29.07, | |
| "eval_loss": 0.3594951629638672, | |
| "eval_runtime": 125.5582, | |
| "eval_samples_per_second": 36.796, | |
| "eval_steps_per_second": 4.603, | |
| "eval_wer": 0.3111729792571743, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 29.36, | |
| "learning_rate": 3.50625e-05, | |
| "loss": 1.1492, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 29.65, | |
| "learning_rate": 3.456907894736842e-05, | |
| "loss": 1.1436, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 29.94, | |
| "learning_rate": 3.407565789473684e-05, | |
| "loss": 1.1438, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 30.23, | |
| "learning_rate": 3.358223684210526e-05, | |
| "loss": 1.1366, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 30.52, | |
| "learning_rate": 3.3093749999999995e-05, | |
| "loss": 1.1562, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 30.52, | |
| "eval_loss": 0.3433065414428711, | |
| "eval_runtime": 126.4498, | |
| "eval_samples_per_second": 36.536, | |
| "eval_steps_per_second": 4.571, | |
| "eval_wer": 0.30223949091354096, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 30.81, | |
| "learning_rate": 3.260032894736842e-05, | |
| "loss": 1.1504, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 31.1, | |
| "learning_rate": 3.210690789473684e-05, | |
| "loss": 1.1289, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 31.39, | |
| "learning_rate": 3.161348684210526e-05, | |
| "loss": 1.1363, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 31.68, | |
| "learning_rate": 3.112006578947368e-05, | |
| "loss": 1.125, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 31.97, | |
| "learning_rate": 3.06266447368421e-05, | |
| "loss": 1.1392, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 31.97, | |
| "eval_loss": 0.3440292775630951, | |
| "eval_runtime": 125.688, | |
| "eval_samples_per_second": 36.758, | |
| "eval_steps_per_second": 4.599, | |
| "eval_wer": 0.2935507556752126, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 32.27, | |
| "learning_rate": 3.0133223684210525e-05, | |
| "loss": 1.1342, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 32.56, | |
| "learning_rate": 2.9639802631578946e-05, | |
| "loss": 1.1125, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 32.85, | |
| "learning_rate": 2.9146381578947366e-05, | |
| "loss": 1.1312, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 33.14, | |
| "learning_rate": 2.865296052631579e-05, | |
| "loss": 1.1122, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 33.43, | |
| "learning_rate": 2.8159539473684206e-05, | |
| "loss": 1.1258, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 33.43, | |
| "eval_loss": 0.3396157920360565, | |
| "eval_runtime": 128.0055, | |
| "eval_samples_per_second": 36.092, | |
| "eval_steps_per_second": 4.515, | |
| "eval_wer": 0.2949580860307165, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 33.72, | |
| "learning_rate": 2.7666118421052626e-05, | |
| "loss": 1.0971, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 34.01, | |
| "learning_rate": 2.717269736842105e-05, | |
| "loss": 1.125, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 34.3, | |
| "learning_rate": 2.667927631578947e-05, | |
| "loss": 1.1112, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 34.59, | |
| "learning_rate": 2.6185855263157893e-05, | |
| "loss": 1.1201, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 34.88, | |
| "learning_rate": 2.5692434210526313e-05, | |
| "loss": 1.1067, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 34.88, | |
| "eval_loss": 0.33788973093032837, | |
| "eval_runtime": 126.1083, | |
| "eval_samples_per_second": 36.635, | |
| "eval_steps_per_second": 4.583, | |
| "eval_wer": 0.29391788533317015, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 35.17, | |
| "learning_rate": 2.5199013157894733e-05, | |
| "loss": 1.1007, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 35.46, | |
| "learning_rate": 2.4705592105263157e-05, | |
| "loss": 1.1034, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 35.75, | |
| "learning_rate": 2.4212171052631577e-05, | |
| "loss": 1.0804, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 36.05, | |
| "learning_rate": 2.3718749999999997e-05, | |
| "loss": 1.1141, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 36.34, | |
| "learning_rate": 2.322532894736842e-05, | |
| "loss": 1.0953, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 36.34, | |
| "eval_loss": 0.33700206875801086, | |
| "eval_runtime": 125.9293, | |
| "eval_samples_per_second": 36.687, | |
| "eval_steps_per_second": 4.59, | |
| "eval_wer": 0.28675885700299825, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 36.63, | |
| "learning_rate": 2.273190789473684e-05, | |
| "loss": 1.106, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 36.92, | |
| "learning_rate": 2.2238486842105264e-05, | |
| "loss": 1.1017, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 37.21, | |
| "learning_rate": 2.174506578947368e-05, | |
| "loss": 1.0888, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 37.5, | |
| "learning_rate": 2.12516447368421e-05, | |
| "loss": 1.0685, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 37.79, | |
| "learning_rate": 2.0758223684210525e-05, | |
| "loss": 1.0835, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 37.79, | |
| "eval_loss": 0.33165690302848816, | |
| "eval_runtime": 126.2757, | |
| "eval_samples_per_second": 36.587, | |
| "eval_steps_per_second": 4.577, | |
| "eval_wer": 0.28602459768708316, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 38.08, | |
| "learning_rate": 2.0264802631578945e-05, | |
| "loss": 1.0903, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 38.37, | |
| "learning_rate": 1.9771381578947365e-05, | |
| "loss": 1.0963, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 38.66, | |
| "learning_rate": 1.927796052631579e-05, | |
| "loss": 1.0671, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 38.95, | |
| "learning_rate": 1.878453947368421e-05, | |
| "loss": 1.086, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 39.24, | |
| "learning_rate": 1.829111842105263e-05, | |
| "loss": 1.0772, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 39.24, | |
| "eval_loss": 0.33021438121795654, | |
| "eval_runtime": 126.3484, | |
| "eval_samples_per_second": 36.566, | |
| "eval_steps_per_second": 4.575, | |
| "eval_wer": 0.28538212078565744, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 39.53, | |
| "learning_rate": 1.7797697368421052e-05, | |
| "loss": 1.0555, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 39.82, | |
| "learning_rate": 1.7304276315789472e-05, | |
| "loss": 1.0821, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 40.12, | |
| "learning_rate": 1.6810855263157892e-05, | |
| "loss": 1.0802, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 40.41, | |
| "learning_rate": 1.6317434210526316e-05, | |
| "loss": 1.0585, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 40.7, | |
| "learning_rate": 1.5824013157894736e-05, | |
| "loss": 1.0853, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 40.7, | |
| "eval_loss": 0.3264598250389099, | |
| "eval_runtime": 126.0436, | |
| "eval_samples_per_second": 36.654, | |
| "eval_steps_per_second": 4.586, | |
| "eval_wer": 0.2783148748699749, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 40.99, | |
| "learning_rate": 1.5330592105263156e-05, | |
| "loss": 1.0629, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 41.28, | |
| "learning_rate": 1.4837171052631578e-05, | |
| "loss": 1.0672, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 41.57, | |
| "learning_rate": 1.4348684210526315e-05, | |
| "loss": 1.0552, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 41.86, | |
| "learning_rate": 1.3855263157894737e-05, | |
| "loss": 1.0349, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 42.15, | |
| "learning_rate": 1.3361842105263156e-05, | |
| "loss": 1.0689, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 42.15, | |
| "eval_loss": 0.33058255910873413, | |
| "eval_runtime": 126.6193, | |
| "eval_samples_per_second": 36.487, | |
| "eval_steps_per_second": 4.565, | |
| "eval_wer": 0.2769993269289604, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 42.44, | |
| "learning_rate": 1.2868421052631577e-05, | |
| "loss": 1.0482, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 42.73, | |
| "learning_rate": 1.2375e-05, | |
| "loss": 1.0354, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 43.02, | |
| "learning_rate": 1.188157894736842e-05, | |
| "loss": 1.0682, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 43.31, | |
| "learning_rate": 1.1388157894736841e-05, | |
| "loss": 1.0402, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 43.6, | |
| "learning_rate": 1.0894736842105263e-05, | |
| "loss": 1.0394, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 43.6, | |
| "eval_loss": 0.32332995533943176, | |
| "eval_runtime": 129.4205, | |
| "eval_samples_per_second": 35.698, | |
| "eval_steps_per_second": 4.466, | |
| "eval_wer": 0.2756531848497828, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 43.89, | |
| "learning_rate": 1.0401315789473685e-05, | |
| "loss": 1.0715, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 44.19, | |
| "learning_rate": 9.907894736842103e-06, | |
| "loss": 1.0456, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 44.48, | |
| "learning_rate": 9.414473684210525e-06, | |
| "loss": 1.0326, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 44.77, | |
| "learning_rate": 8.921052631578947e-06, | |
| "loss": 1.0389, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 45.06, | |
| "learning_rate": 8.427631578947367e-06, | |
| "loss": 1.0581, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 45.06, | |
| "eval_loss": 0.31987854838371277, | |
| "eval_runtime": 125.2957, | |
| "eval_samples_per_second": 36.873, | |
| "eval_steps_per_second": 4.613, | |
| "eval_wer": 0.2713088172306186, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 45.35, | |
| "learning_rate": 7.934210526315789e-06, | |
| "loss": 1.0552, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 45.64, | |
| "learning_rate": 7.44078947368421e-06, | |
| "loss": 1.0408, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 45.93, | |
| "learning_rate": 6.9473684210526315e-06, | |
| "loss": 1.0416, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 46.22, | |
| "learning_rate": 6.453947368421052e-06, | |
| "loss": 1.0322, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 46.51, | |
| "learning_rate": 5.9605263157894735e-06, | |
| "loss": 1.0362, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 46.51, | |
| "eval_loss": 0.3154148459434509, | |
| "eval_runtime": 127.002, | |
| "eval_samples_per_second": 36.377, | |
| "eval_steps_per_second": 4.551, | |
| "eval_wer": 0.26827999755246895, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 46.8, | |
| "learning_rate": 5.467105263157894e-06, | |
| "loss": 1.0543, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 47.09, | |
| "learning_rate": 4.973684210526316e-06, | |
| "loss": 1.047, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 47.38, | |
| "learning_rate": 4.480263157894736e-06, | |
| "loss": 1.0112, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 47.67, | |
| "learning_rate": 3.986842105263157e-06, | |
| "loss": 1.0115, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 47.96, | |
| "learning_rate": 3.4934210526315787e-06, | |
| "loss": 1.0406, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 47.96, | |
| "eval_loss": 0.3175775110721588, | |
| "eval_runtime": 126.2727, | |
| "eval_samples_per_second": 36.587, | |
| "eval_steps_per_second": 4.577, | |
| "eval_wer": 0.2688000979012421, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 48.26, | |
| "learning_rate": 2.9999999999999997e-06, | |
| "loss": 1.0288, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 48.55, | |
| "learning_rate": 2.506578947368421e-06, | |
| "loss": 1.0301, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 48.84, | |
| "learning_rate": 2.013157894736842e-06, | |
| "loss": 1.0351, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 49.13, | |
| "learning_rate": 1.519736842105263e-06, | |
| "loss": 1.0242, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 49.42, | |
| "learning_rate": 1.0263157894736841e-06, | |
| "loss": 1.0082, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 49.42, | |
| "eval_loss": 0.3149297535419464, | |
| "eval_runtime": 125.5652, | |
| "eval_samples_per_second": 36.794, | |
| "eval_steps_per_second": 4.603, | |
| "eval_wer": 0.2679128678945114, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 49.71, | |
| "learning_rate": 5.328947368421052e-07, | |
| "loss": 1.0334, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "learning_rate": 3.947368421052631e-08, | |
| "loss": 1.026, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "step": 17200, | |
| "total_flos": 5.074861809449127e+19, | |
| "train_loss": 1.453643293602522, | |
| "train_runtime": 28316.9848, | |
| "train_samples_per_second": 19.476, | |
| "train_steps_per_second": 0.607 | |
| } | |
| ], | |
| "max_steps": 17200, | |
| "num_train_epochs": 50, | |
| "total_flos": 5.074861809449127e+19, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |