| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 48.248543689320385, | |
| "global_step": 12400, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 0.00023999999999999998, | |
| "loss": 6.4311, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "eval_loss": 2.4840803146362305, | |
| "eval_runtime": 62.57, | |
| "eval_samples_per_second": 17.692, | |
| "eval_wer": 1.0, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 0.0002927125506072874, | |
| "loss": 0.9229, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "eval_loss": 0.44123658537864685, | |
| "eval_runtime": 62.8111, | |
| "eval_samples_per_second": 17.624, | |
| "eval_wer": 0.5713327745180218, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 0.000282995951417004, | |
| "loss": 0.426, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "eval_loss": 0.39316678047180176, | |
| "eval_runtime": 63.0988, | |
| "eval_samples_per_second": 17.544, | |
| "eval_wer": 0.4953897736797988, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 6.23, | |
| "learning_rate": 0.00027327935222672063, | |
| "loss": 0.3148, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 6.23, | |
| "eval_loss": 0.37084177136421204, | |
| "eval_runtime": 62.6872, | |
| "eval_samples_per_second": 17.659, | |
| "eval_wer": 0.46135792120704106, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 7.78, | |
| "learning_rate": 0.0002635627530364372, | |
| "loss": 0.2419, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 7.78, | |
| "eval_loss": 0.34887808561325073, | |
| "eval_runtime": 63.5236, | |
| "eval_samples_per_second": 17.427, | |
| "eval_wer": 0.446940486169321, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 9.34, | |
| "learning_rate": 0.0002538461538461538, | |
| "loss": 0.1958, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 9.34, | |
| "eval_loss": 0.3414151668548584, | |
| "eval_runtime": 63.0338, | |
| "eval_samples_per_second": 17.562, | |
| "eval_wer": 0.42196144174350375, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 10.89, | |
| "learning_rate": 0.00024412955465587043, | |
| "loss": 0.162, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 10.89, | |
| "eval_loss": 0.3522416353225708, | |
| "eval_runtime": 62.8897, | |
| "eval_samples_per_second": 17.602, | |
| "eval_wer": 0.4239731768650461, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 12.45, | |
| "learning_rate": 0.00023441295546558702, | |
| "loss": 0.1439, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 12.45, | |
| "eval_loss": 0.306948184967041, | |
| "eval_runtime": 62.7745, | |
| "eval_samples_per_second": 17.635, | |
| "eval_wer": 0.3870913663034367, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 14.01, | |
| "learning_rate": 0.00022469635627530362, | |
| "loss": 0.1309, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 14.01, | |
| "eval_loss": 0.6407384872436523, | |
| "eval_runtime": 63.159, | |
| "eval_samples_per_second": 17.527, | |
| "eval_wer": 0.5264040234702431, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 15.56, | |
| "learning_rate": 0.00021497975708502023, | |
| "loss": 0.1113, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 15.56, | |
| "eval_loss": 0.342593252658844, | |
| "eval_runtime": 62.6873, | |
| "eval_samples_per_second": 17.659, | |
| "eval_wer": 0.3886001676445935, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 17.12, | |
| "learning_rate": 0.00020526315789473683, | |
| "loss": 0.1063, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 17.12, | |
| "eval_loss": 0.3707052767276764, | |
| "eval_runtime": 64.3471, | |
| "eval_samples_per_second": 17.204, | |
| "eval_wer": 0.39765297569153396, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 18.68, | |
| "learning_rate": 0.00019554655870445344, | |
| "loss": 0.0903, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 18.68, | |
| "eval_loss": 0.3410646319389343, | |
| "eval_runtime": 63.3922, | |
| "eval_samples_per_second": 17.463, | |
| "eval_wer": 0.3748533109807209, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 20.23, | |
| "learning_rate": 0.00018582995951417, | |
| "loss": 0.0865, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 20.23, | |
| "eval_loss": 0.3687942624092102, | |
| "eval_runtime": 63.5277, | |
| "eval_samples_per_second": 17.425, | |
| "eval_wer": 0.38507963118189437, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 21.79, | |
| "learning_rate": 0.00017611336032388663, | |
| "loss": 0.0763, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 21.79, | |
| "eval_loss": 0.37619391083717346, | |
| "eval_runtime": 63.8668, | |
| "eval_samples_per_second": 17.333, | |
| "eval_wer": 0.3964794635373009, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 23.35, | |
| "learning_rate": 0.00016639676113360322, | |
| "loss": 0.0672, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 23.35, | |
| "eval_loss": 0.43969622254371643, | |
| "eval_runtime": 65.4951, | |
| "eval_samples_per_second": 16.902, | |
| "eval_wer": 0.41525565800502934, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 24.9, | |
| "learning_rate": 0.00015668016194331984, | |
| "loss": 0.0594, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 24.9, | |
| "eval_loss": 0.43122151494026184, | |
| "eval_runtime": 64.2665, | |
| "eval_samples_per_second": 17.225, | |
| "eval_wer": 0.3978206202849958, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 26.46, | |
| "learning_rate": 0.00014696356275303643, | |
| "loss": 0.055, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 26.46, | |
| "eval_loss": 0.35460254549980164, | |
| "eval_runtime": 64.2687, | |
| "eval_samples_per_second": 17.225, | |
| "eval_wer": 0.3678122380553227, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 28.02, | |
| "learning_rate": 0.00013724696356275302, | |
| "loss": 0.0501, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 28.02, | |
| "eval_loss": 0.39982739090919495, | |
| "eval_runtime": 64.3879, | |
| "eval_samples_per_second": 17.193, | |
| "eval_wer": 0.37921207041072924, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 29.57, | |
| "learning_rate": 0.00012753036437246964, | |
| "loss": 0.0456, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 29.57, | |
| "eval_loss": 0.3560320734977722, | |
| "eval_runtime": 64.0157, | |
| "eval_samples_per_second": 17.293, | |
| "eval_wer": 0.3678122380553227, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 31.13, | |
| "learning_rate": 0.00011781376518218623, | |
| "loss": 0.0413, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 31.13, | |
| "eval_loss": 0.2975221872329712, | |
| "eval_runtime": 63.7473, | |
| "eval_samples_per_second": 17.365, | |
| "eval_wer": 0.34752724224643755, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 32.68, | |
| "learning_rate": 0.00010809716599190283, | |
| "loss": 0.0358, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 32.68, | |
| "eval_loss": 0.49690723419189453, | |
| "eval_runtime": 63.7946, | |
| "eval_samples_per_second": 17.353, | |
| "eval_wer": 0.41223805532271585, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 34.24, | |
| "learning_rate": 9.838056680161942e-05, | |
| "loss": 0.036, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 34.24, | |
| "eval_loss": 0.3520979583263397, | |
| "eval_runtime": 64.5821, | |
| "eval_samples_per_second": 17.141, | |
| "eval_wer": 0.3577535624476111, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 35.8, | |
| "learning_rate": 8.866396761133603e-05, | |
| "loss": 0.0318, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 35.8, | |
| "eval_loss": 0.40838906168937683, | |
| "eval_runtime": 63.7392, | |
| "eval_samples_per_second": 17.368, | |
| "eval_wer": 0.381894383906119, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 37.35, | |
| "learning_rate": 7.894736842105262e-05, | |
| "loss": 0.0297, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 37.35, | |
| "eval_loss": 0.3980065584182739, | |
| "eval_runtime": 65.2097, | |
| "eval_samples_per_second": 16.976, | |
| "eval_wer": 0.36496227996647107, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 38.91, | |
| "learning_rate": 6.923076923076922e-05, | |
| "loss": 0.0282, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 38.91, | |
| "eval_loss": 0.4590842127799988, | |
| "eval_runtime": 64.5592, | |
| "eval_samples_per_second": 17.147, | |
| "eval_wer": 0.387929589270746, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 40.47, | |
| "learning_rate": 5.951417004048583e-05, | |
| "loss": 0.0239, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 40.47, | |
| "eval_loss": 0.3659476041793823, | |
| "eval_runtime": 64.4767, | |
| "eval_samples_per_second": 17.169, | |
| "eval_wer": 0.35305951383067896, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 42.02, | |
| "learning_rate": 4.9797570850202426e-05, | |
| "loss": 0.0236, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 42.02, | |
| "eval_loss": 0.3309062123298645, | |
| "eval_runtime": 64.1712, | |
| "eval_samples_per_second": 17.251, | |
| "eval_wer": 0.3317686504610226, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 43.58, | |
| "learning_rate": 4.008097165991903e-05, | |
| "loss": 0.0193, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 43.58, | |
| "eval_loss": 0.36551418900489807, | |
| "eval_runtime": 64.1896, | |
| "eval_samples_per_second": 17.246, | |
| "eval_wer": 0.34316848281642914, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 45.14, | |
| "learning_rate": 3.0364372469635626e-05, | |
| "loss": 0.0205, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 45.14, | |
| "eval_loss": 0.3832060694694519, | |
| "eval_runtime": 64.591, | |
| "eval_samples_per_second": 17.139, | |
| "eval_wer": 0.34501257334450963, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 46.69, | |
| "learning_rate": 2.0647773279352223e-05, | |
| "loss": 0.0182, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 46.69, | |
| "eval_loss": 0.34930258989334106, | |
| "eval_runtime": 63.9823, | |
| "eval_samples_per_second": 17.302, | |
| "eval_wer": 0.33461860854987424, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 48.25, | |
| "learning_rate": 1.0931174089068825e-05, | |
| "loss": 0.0163, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 48.25, | |
| "eval_loss": 0.32886430621147156, | |
| "eval_runtime": 65.5766, | |
| "eval_samples_per_second": 16.881, | |
| "eval_wer": 0.32606873428331934, | |
| "step": 12400 | |
| } | |
| ], | |
| "max_steps": 12850, | |
| "num_train_epochs": 50, | |
| "total_flos": 3.1458311712632816e+19, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |