| { |
| "best_global_step": 12000, |
| "best_metric": 51.341187305729306, |
| "best_model_checkpoint": "./whisper-medium-ml-exp2/checkpoint-12000", |
| "epoch": 4.111466666666667, |
| "eval_steps": 500, |
| "global_step": 15000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03333333333333333, |
| "grad_norm": 1.354724407196045, |
| "learning_rate": 9.940000000000001e-06, |
| "loss": 0.3119, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.03333333333333333, |
| "eval_loss": 0.4192824959754944, |
| "eval_runtime": 3090.4729, |
| "eval_samples_per_second": 1.902, |
| "eval_steps_per_second": 0.06, |
| "eval_wer": 79.09167658006831, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.06666666666666667, |
| "grad_norm": 0.926690399646759, |
| "learning_rate": 9.657241379310346e-06, |
| "loss": 0.0405, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.06666666666666667, |
| "eval_loss": 0.4335130751132965, |
| "eval_runtime": 2992.1696, |
| "eval_samples_per_second": 1.964, |
| "eval_steps_per_second": 0.061, |
| "eval_wer": 73.58302313979816, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.8192572593688965, |
| "learning_rate": 9.312413793103448e-06, |
| "loss": 0.0355, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.1, |
| "eval_loss": 0.43317151069641113, |
| "eval_runtime": 3082.3837, |
| "eval_samples_per_second": 1.907, |
| "eval_steps_per_second": 0.06, |
| "eval_wer": 77.27848344142139, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.13333333333333333, |
| "grad_norm": 1.0174602270126343, |
| "learning_rate": 8.967586206896553e-06, |
| "loss": 0.126, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.13333333333333333, |
| "eval_loss": 0.19669494032859802, |
| "eval_runtime": 3003.1726, |
| "eval_samples_per_second": 1.957, |
| "eval_steps_per_second": 0.061, |
| "eval_wer": 58.40208757051306, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.16666666666666666, |
| "grad_norm": 1.0093193054199219, |
| "learning_rate": 8.622758620689657e-06, |
| "loss": 0.0519, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.16666666666666666, |
| "eval_loss": 0.18607404828071594, |
| "eval_runtime": 3062.2455, |
| "eval_samples_per_second": 1.92, |
| "eval_steps_per_second": 0.06, |
| "eval_wer": 58.567097739744426, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.2591651678085327, |
| "learning_rate": 8.27793103448276e-06, |
| "loss": 0.0439, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.2, |
| "eval_loss": 0.19417671859264374, |
| "eval_runtime": 2997.3111, |
| "eval_samples_per_second": 1.961, |
| "eval_steps_per_second": 0.061, |
| "eval_wer": 57.42737633830922, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.0112, |
| "grad_norm": 1.0831571817398071, |
| "learning_rate": 7.933103448275864e-06, |
| "loss": 0.0534, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.0112, |
| "eval_loss": 0.193574920296669, |
| "eval_runtime": 2956.9818, |
| "eval_samples_per_second": 1.988, |
| "eval_steps_per_second": 0.062, |
| "eval_wer": 61.14969876050501, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.0445333333333333, |
| "grad_norm": 1.4980469942092896, |
| "learning_rate": 7.588275862068966e-06, |
| "loss": 0.0214, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.0445333333333333, |
| "eval_loss": 0.22530874609947205, |
| "eval_runtime": 3078.9727, |
| "eval_samples_per_second": 1.909, |
| "eval_steps_per_second": 0.06, |
| "eval_wer": 59.78164933420316, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.0778666666666668, |
| "grad_norm": 0.5621655583381653, |
| "learning_rate": 7.243448275862069e-06, |
| "loss": 0.0129, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.0778666666666668, |
| "eval_loss": 0.26299336552619934, |
| "eval_runtime": 2998.2811, |
| "eval_samples_per_second": 1.96, |
| "eval_steps_per_second": 0.061, |
| "eval_wer": 61.061437507195215, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.1112, |
| "grad_norm": 1.5936987400054932, |
| "learning_rate": 6.9e-06, |
| "loss": 0.048, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.1112, |
| "eval_loss": 0.17795228958129883, |
| "eval_runtime": 2987.0925, |
| "eval_samples_per_second": 1.968, |
| "eval_steps_per_second": 0.062, |
| "eval_wer": 56.3605664069995, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.1445333333333334, |
| "grad_norm": 0.48164331912994385, |
| "learning_rate": 6.555172413793104e-06, |
| "loss": 0.047, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.1445333333333334, |
| "eval_loss": 0.16377924382686615, |
| "eval_runtime": 2984.4656, |
| "eval_samples_per_second": 1.97, |
| "eval_steps_per_second": 0.062, |
| "eval_wer": 52.99512644383898, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.1778666666666666, |
| "grad_norm": 0.45277634263038635, |
| "learning_rate": 6.2103448275862075e-06, |
| "loss": 0.0325, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.1778666666666666, |
| "eval_loss": 0.16828514635562897, |
| "eval_runtime": 3034.653, |
| "eval_samples_per_second": 1.937, |
| "eval_steps_per_second": 0.061, |
| "eval_wer": 54.55121071414867, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.2112, |
| "grad_norm": 0.6970316767692566, |
| "learning_rate": 5.865517241379311e-06, |
| "loss": 0.0293, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.2112, |
| "eval_loss": 0.16886456310749054, |
| "eval_runtime": 3048.6335, |
| "eval_samples_per_second": 1.928, |
| "eval_steps_per_second": 0.06, |
| "eval_wer": 57.24509766299551, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.0224, |
| "grad_norm": 0.521206259727478, |
| "learning_rate": 5.520689655172414e-06, |
| "loss": 0.028, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.0224, |
| "eval_loss": 0.21454408764839172, |
| "eval_runtime": 2924.4034, |
| "eval_samples_per_second": 2.01, |
| "eval_steps_per_second": 0.063, |
| "eval_wer": 56.52365785333282, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.0557333333333334, |
| "grad_norm": 0.5115911364555359, |
| "learning_rate": 5.175862068965518e-06, |
| "loss": 0.009, |
| "step": 7500 |
| }, |
| { |
| "epoch": 2.0557333333333334, |
| "eval_loss": 0.22271297872066498, |
| "eval_runtime": 2915.2649, |
| "eval_samples_per_second": 2.016, |
| "eval_steps_per_second": 0.063, |
| "eval_wer": 56.30684216585441, |
| "step": 7500 |
| }, |
| { |
| "epoch": 2.0890666666666666, |
| "grad_norm": 0.5929153561592102, |
| "learning_rate": 4.831034482758621e-06, |
| "loss": 0.0076, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.0890666666666666, |
| "eval_loss": 0.2749842405319214, |
| "eval_runtime": 3041.7569, |
| "eval_samples_per_second": 1.932, |
| "eval_steps_per_second": 0.06, |
| "eval_wer": 66.05395448789287, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.1224, |
| "grad_norm": 0.3881845474243164, |
| "learning_rate": 4.486206896551725e-06, |
| "loss": 0.0385, |
| "step": 8500 |
| }, |
| { |
| "epoch": 2.1224, |
| "eval_loss": 0.21780993044376373, |
| "eval_runtime": 2981.8862, |
| "eval_samples_per_second": 1.971, |
| "eval_steps_per_second": 0.062, |
| "eval_wer": 54.45143712345063, |
| "step": 8500 |
| }, |
| { |
| "epoch": 2.1557333333333335, |
| "grad_norm": 0.688723623752594, |
| "learning_rate": 4.141379310344828e-06, |
| "loss": 0.0245, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.1557333333333335, |
| "eval_loss": 0.1720988005399704, |
| "eval_runtime": 3050.0463, |
| "eval_samples_per_second": 1.927, |
| "eval_steps_per_second": 0.06, |
| "eval_wer": 52.00314670555278, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.1890666666666667, |
| "grad_norm": 0.39908483624458313, |
| "learning_rate": 3.7965517241379313e-06, |
| "loss": 0.0226, |
| "step": 9500 |
| }, |
| { |
| "epoch": 2.1890666666666667, |
| "eval_loss": 0.1741122305393219, |
| "eval_runtime": 3209.6808, |
| "eval_samples_per_second": 1.831, |
| "eval_steps_per_second": 0.057, |
| "eval_wer": 53.75110326566638, |
| "step": 9500 |
| }, |
| { |
| "epoch": 3.0002666666666666, |
| "grad_norm": 2.697366237640381, |
| "learning_rate": 3.4517241379310346e-06, |
| "loss": 0.0212, |
| "step": 10000 |
| }, |
| { |
| "epoch": 3.0002666666666666, |
| "eval_loss": 0.20012931525707245, |
| "eval_runtime": 3160.8774, |
| "eval_samples_per_second": 1.86, |
| "eval_steps_per_second": 0.058, |
| "eval_wer": 56.14950688821521, |
| "step": 10000 |
| }, |
| { |
| "epoch": 3.0336, |
| "grad_norm": 0.3792371451854706, |
| "learning_rate": 3.1068965517241384e-06, |
| "loss": 0.0121, |
| "step": 10500 |
| }, |
| { |
| "epoch": 3.0336, |
| "eval_loss": 0.23216772079467773, |
| "eval_runtime": 3109.7166, |
| "eval_samples_per_second": 1.89, |
| "eval_steps_per_second": 0.059, |
| "eval_wer": 55.472197705207414, |
| "step": 10500 |
| }, |
| { |
| "epoch": 3.0669333333333335, |
| "grad_norm": 0.2577882409095764, |
| "learning_rate": 2.7620689655172417e-06, |
| "loss": 0.0042, |
| "step": 11000 |
| }, |
| { |
| "epoch": 3.0669333333333335, |
| "eval_loss": 0.24030156433582306, |
| "eval_runtime": 3127.3687, |
| "eval_samples_per_second": 1.88, |
| "eval_steps_per_second": 0.059, |
| "eval_wer": 57.6864039295445, |
| "step": 11000 |
| }, |
| { |
| "epoch": 3.1002666666666667, |
| "grad_norm": 0.24431835114955902, |
| "learning_rate": 2.4179310344827587e-06, |
| "loss": 0.0059, |
| "step": 11500 |
| }, |
| { |
| "epoch": 3.1002666666666667, |
| "eval_loss": 0.2953338325023651, |
| "eval_runtime": 3087.8251, |
| "eval_samples_per_second": 1.904, |
| "eval_steps_per_second": 0.06, |
| "eval_wer": 64.00667715568518, |
| "step": 11500 |
| }, |
| { |
| "epoch": 3.1336, |
| "grad_norm": 0.6591205596923828, |
| "learning_rate": 2.073793103448276e-06, |
| "loss": 0.0248, |
| "step": 12000 |
| }, |
| { |
| "epoch": 3.1336, |
| "eval_loss": 0.1744297742843628, |
| "eval_runtime": 2988.8074, |
| "eval_samples_per_second": 1.967, |
| "eval_steps_per_second": 0.062, |
| "eval_wer": 51.341187305729306, |
| "step": 12000 |
| }, |
| { |
| "epoch": 3.166933333333333, |
| "grad_norm": 0.45652803778648376, |
| "learning_rate": 1.7289655172413794e-06, |
| "loss": 0.0172, |
| "step": 12500 |
| }, |
| { |
| "epoch": 3.166933333333333, |
| "eval_loss": 0.18724997341632843, |
| "eval_runtime": 3015.7946, |
| "eval_samples_per_second": 1.949, |
| "eval_steps_per_second": 0.061, |
| "eval_wer": 53.53236885528992, |
| "step": 12500 |
| }, |
| { |
| "epoch": 3.200266666666667, |
| "grad_norm": 0.42330440878868103, |
| "learning_rate": 1.384137931034483e-06, |
| "loss": 0.015, |
| "step": 13000 |
| }, |
| { |
| "epoch": 3.200266666666667, |
| "eval_loss": 0.19304682314395905, |
| "eval_runtime": 3065.8492, |
| "eval_samples_per_second": 1.917, |
| "eval_steps_per_second": 0.06, |
| "eval_wer": 54.702789823093745, |
| "step": 13000 |
| }, |
| { |
| "epoch": 4.011466666666666, |
| "grad_norm": 0.7199889421463013, |
| "learning_rate": 1.0393103448275863e-06, |
| "loss": 0.0158, |
| "step": 13500 |
| }, |
| { |
| "epoch": 4.011466666666666, |
| "eval_loss": 0.21734359860420227, |
| "eval_runtime": 3066.4573, |
| "eval_samples_per_second": 1.917, |
| "eval_steps_per_second": 0.06, |
| "eval_wer": 60.96358263939522, |
| "step": 13500 |
| }, |
| { |
| "epoch": 4.0448, |
| "grad_norm": 0.42120951414108276, |
| "learning_rate": 6.944827586206897e-07, |
| "loss": 0.0028, |
| "step": 14000 |
| }, |
| { |
| "epoch": 4.0448, |
| "eval_loss": 0.23296251893043518, |
| "eval_runtime": 2966.4298, |
| "eval_samples_per_second": 1.982, |
| "eval_steps_per_second": 0.062, |
| "eval_wer": 53.49207567443109, |
| "step": 14000 |
| }, |
| { |
| "epoch": 4.078133333333334, |
| "grad_norm": 0.202627032995224, |
| "learning_rate": 3.496551724137931e-07, |
| "loss": 0.0028, |
| "step": 14500 |
| }, |
| { |
| "epoch": 4.078133333333334, |
| "eval_loss": 0.24154414236545563, |
| "eval_runtime": 2989.4533, |
| "eval_samples_per_second": 1.966, |
| "eval_steps_per_second": 0.062, |
| "eval_wer": 53.47672589124679, |
| "step": 14500 |
| }, |
| { |
| "epoch": 4.111466666666667, |
| "grad_norm": 2.03951358795166, |
| "learning_rate": 4.827586206896552e-09, |
| "loss": 0.0194, |
| "step": 15000 |
| }, |
| { |
| "epoch": 4.111466666666667, |
| "eval_loss": 0.22199244797229767, |
| "eval_runtime": 3069.065, |
| "eval_samples_per_second": 1.915, |
| "eval_steps_per_second": 0.06, |
| "eval_wer": 57.69216009823861, |
| "step": 15000 |
| }, |
| { |
| "epoch": 4.111466666666667, |
| "step": 15000, |
| "total_flos": 4.898454489936691e+20, |
| "train_loss": 0.03751766018072764, |
| "train_runtime": 129332.0882, |
| "train_samples_per_second": 3.711, |
| "train_steps_per_second": 0.116 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 15000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.898454489936691e+20, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|