| { |
| "best_metric": 0.4671205098493627, |
| "best_model_checkpoint": "audio/train/checkpoint/whisper-tiny/superb_si_42/checkpoint-43240", |
| "epoch": 10.0, |
| "eval_steps": 500, |
| "global_step": 43240, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.23126734505087881, |
| "grad_norm": 4.078686237335205, |
| "learning_rate": 1.1563367252543942e-05, |
| "loss": 7.0981, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.46253469010175763, |
| "grad_norm": 7.5214362144470215, |
| "learning_rate": 2.3126734505087884e-05, |
| "loss": 6.6758, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.6938020351526365, |
| "grad_norm": 9.81009292602539, |
| "learning_rate": 3.469010175763183e-05, |
| "loss": 6.2103, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.9250693802035153, |
| "grad_norm": 12.731938362121582, |
| "learning_rate": 4.625346901017577e-05, |
| "loss": 5.7888, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.02607184241019699, |
| "eval_loss": 5.914073944091797, |
| "eval_runtime": 18.6704, |
| "eval_samples_per_second": 369.783, |
| "eval_steps_per_second": 23.138, |
| "step": 4324 |
| }, |
| { |
| "epoch": 1.1563367252543941, |
| "grad_norm": 12.256651878356934, |
| "learning_rate": 4.913146263747559e-05, |
| "loss": 5.3294, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.3876040703052728, |
| "grad_norm": 18.503366470336914, |
| "learning_rate": 4.78466440538596e-05, |
| "loss": 4.8925, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.6188714153561516, |
| "grad_norm": 21.304777145385742, |
| "learning_rate": 4.65618254702436e-05, |
| "loss": 4.517, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.8501387604070305, |
| "grad_norm": 24.696853637695312, |
| "learning_rate": 4.527700688662761e-05, |
| "loss": 4.1883, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.13455967555040557, |
| "eval_loss": 4.352254390716553, |
| "eval_runtime": 18.9265, |
| "eval_samples_per_second": 364.779, |
| "eval_steps_per_second": 22.825, |
| "step": 8648 |
| }, |
| { |
| "epoch": 2.0814061054579094, |
| "grad_norm": 21.23572540283203, |
| "learning_rate": 4.399218830301162e-05, |
| "loss": 3.9332, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.3126734505087883, |
| "grad_norm": 23.86716079711914, |
| "learning_rate": 4.2707369719395625e-05, |
| "loss": 3.6836, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.543940795559667, |
| "grad_norm": 28.8061466217041, |
| "learning_rate": 4.142255113577963e-05, |
| "loss": 3.501, |
| "step": 11000 |
| }, |
| { |
| "epoch": 2.7752081406105455, |
| "grad_norm": 32.60738754272461, |
| "learning_rate": 4.0137732552163636e-05, |
| "loss": 3.3463, |
| "step": 12000 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.21625144843568944, |
| "eval_loss": 3.680591344833374, |
| "eval_runtime": 18.8335, |
| "eval_samples_per_second": 366.581, |
| "eval_steps_per_second": 22.938, |
| "step": 12972 |
| }, |
| { |
| "epoch": 3.0064754856614244, |
| "grad_norm": 28.044601440429688, |
| "learning_rate": 3.885291396854764e-05, |
| "loss": 3.1735, |
| "step": 13000 |
| }, |
| { |
| "epoch": 3.2377428307123033, |
| "grad_norm": 29.33180809020996, |
| "learning_rate": 3.756809538493165e-05, |
| "loss": 3.0224, |
| "step": 14000 |
| }, |
| { |
| "epoch": 3.469010175763182, |
| "grad_norm": 28.656097412109375, |
| "learning_rate": 3.628327680131566e-05, |
| "loss": 2.8806, |
| "step": 15000 |
| }, |
| { |
| "epoch": 3.700277520814061, |
| "grad_norm": 31.90932846069336, |
| "learning_rate": 3.4998458217699664e-05, |
| "loss": 2.7806, |
| "step": 16000 |
| }, |
| { |
| "epoch": 3.93154486586494, |
| "grad_norm": 30.47547721862793, |
| "learning_rate": 3.371363963408367e-05, |
| "loss": 2.6753, |
| "step": 17000 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.28070683661645424, |
| "eval_loss": 3.263786792755127, |
| "eval_runtime": 18.4034, |
| "eval_samples_per_second": 375.148, |
| "eval_steps_per_second": 23.474, |
| "step": 17296 |
| }, |
| { |
| "epoch": 4.162812210915819, |
| "grad_norm": 31.83561134338379, |
| "learning_rate": 3.242882105046768e-05, |
| "loss": 2.5596, |
| "step": 18000 |
| }, |
| { |
| "epoch": 4.394079555966697, |
| "grad_norm": 35.408870697021484, |
| "learning_rate": 3.114400246685168e-05, |
| "loss": 2.4519, |
| "step": 19000 |
| }, |
| { |
| "epoch": 4.6253469010175765, |
| "grad_norm": 38.24852752685547, |
| "learning_rate": 2.9859183883235685e-05, |
| "loss": 2.3864, |
| "step": 20000 |
| }, |
| { |
| "epoch": 4.856614246068455, |
| "grad_norm": 31.56917381286621, |
| "learning_rate": 2.8574365299619694e-05, |
| "loss": 2.3032, |
| "step": 21000 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.3609501738122827, |
| "eval_loss": 2.9113447666168213, |
| "eval_runtime": 18.7471, |
| "eval_samples_per_second": 368.27, |
| "eval_steps_per_second": 23.044, |
| "step": 21620 |
| }, |
| { |
| "epoch": 5.087881591119334, |
| "grad_norm": 30.523977279663086, |
| "learning_rate": 2.72895467160037e-05, |
| "loss": 2.2216, |
| "step": 22000 |
| }, |
| { |
| "epoch": 5.319148936170213, |
| "grad_norm": 37.3154182434082, |
| "learning_rate": 2.6004728132387708e-05, |
| "loss": 2.0986, |
| "step": 23000 |
| }, |
| { |
| "epoch": 5.550416281221091, |
| "grad_norm": 37.61198425292969, |
| "learning_rate": 2.4719909548771713e-05, |
| "loss": 2.0772, |
| "step": 24000 |
| }, |
| { |
| "epoch": 5.78168362627197, |
| "grad_norm": 29.80394172668457, |
| "learning_rate": 2.343509096515572e-05, |
| "loss": 2.0252, |
| "step": 25000 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.3734067207415991, |
| "eval_loss": 2.8072962760925293, |
| "eval_runtime": 18.4609, |
| "eval_samples_per_second": 373.98, |
| "eval_steps_per_second": 23.401, |
| "step": 25944 |
| }, |
| { |
| "epoch": 6.012950971322849, |
| "grad_norm": 36.79635238647461, |
| "learning_rate": 2.2150272381539727e-05, |
| "loss": 1.9812, |
| "step": 26000 |
| }, |
| { |
| "epoch": 6.244218316373728, |
| "grad_norm": 33.27775955200195, |
| "learning_rate": 2.0865453797923736e-05, |
| "loss": 1.8742, |
| "step": 27000 |
| }, |
| { |
| "epoch": 6.475485661424607, |
| "grad_norm": 35.029197692871094, |
| "learning_rate": 1.958063521430774e-05, |
| "loss": 1.8408, |
| "step": 28000 |
| }, |
| { |
| "epoch": 6.706753006475486, |
| "grad_norm": 31.686641693115234, |
| "learning_rate": 1.8295816630691746e-05, |
| "loss": 1.8034, |
| "step": 29000 |
| }, |
| { |
| "epoch": 6.938020351526364, |
| "grad_norm": 28.687349319458008, |
| "learning_rate": 1.7010998047075755e-05, |
| "loss": 1.7579, |
| "step": 30000 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.4216396292004635, |
| "eval_loss": 2.591789484024048, |
| "eval_runtime": 18.6674, |
| "eval_samples_per_second": 369.843, |
| "eval_steps_per_second": 23.142, |
| "step": 30268 |
| }, |
| { |
| "epoch": 7.169287696577244, |
| "grad_norm": 38.40964889526367, |
| "learning_rate": 1.572617946345976e-05, |
| "loss": 1.6965, |
| "step": 31000 |
| }, |
| { |
| "epoch": 7.400555041628122, |
| "grad_norm": 30.718547821044922, |
| "learning_rate": 1.4441360879843766e-05, |
| "loss": 1.6318, |
| "step": 32000 |
| }, |
| { |
| "epoch": 7.631822386679001, |
| "grad_norm": 41.18671798706055, |
| "learning_rate": 1.3156542296227773e-05, |
| "loss": 1.6352, |
| "step": 33000 |
| }, |
| { |
| "epoch": 7.86308973172988, |
| "grad_norm": 38.69245910644531, |
| "learning_rate": 1.187172371261178e-05, |
| "loss": 1.5959, |
| "step": 34000 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.44148319814600234, |
| "eval_loss": 2.5087039470672607, |
| "eval_runtime": 18.5229, |
| "eval_samples_per_second": 372.729, |
| "eval_steps_per_second": 23.323, |
| "step": 34592 |
| }, |
| { |
| "epoch": 8.094357076780758, |
| "grad_norm": 36.10582733154297, |
| "learning_rate": 1.0586905128995787e-05, |
| "loss": 1.547, |
| "step": 35000 |
| }, |
| { |
| "epoch": 8.325624421831638, |
| "grad_norm": 28.88361167907715, |
| "learning_rate": 9.302086545379794e-06, |
| "loss": 1.5137, |
| "step": 36000 |
| }, |
| { |
| "epoch": 8.556891766882517, |
| "grad_norm": 40.176639556884766, |
| "learning_rate": 8.017267961763799e-06, |
| "loss": 1.4881, |
| "step": 37000 |
| }, |
| { |
| "epoch": 8.788159111933394, |
| "grad_norm": 31.49353790283203, |
| "learning_rate": 6.732449378147807e-06, |
| "loss": 1.4736, |
| "step": 38000 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.44944959443800697, |
| "eval_loss": 2.4713730812072754, |
| "eval_runtime": 18.4431, |
| "eval_samples_per_second": 374.34, |
| "eval_steps_per_second": 23.423, |
| "step": 38916 |
| }, |
| { |
| "epoch": 9.019426456984274, |
| "grad_norm": 36.280303955078125, |
| "learning_rate": 5.447630794531813e-06, |
| "loss": 1.4577, |
| "step": 39000 |
| }, |
| { |
| "epoch": 9.250693802035153, |
| "grad_norm": 29.408279418945312, |
| "learning_rate": 4.162812210915819e-06, |
| "loss": 1.4087, |
| "step": 40000 |
| }, |
| { |
| "epoch": 9.481961147086032, |
| "grad_norm": 36.85555648803711, |
| "learning_rate": 2.877993627299825e-06, |
| "loss": 1.3773, |
| "step": 41000 |
| }, |
| { |
| "epoch": 9.71322849213691, |
| "grad_norm": 36.18992233276367, |
| "learning_rate": 1.5931750436838318e-06, |
| "loss": 1.3754, |
| "step": 42000 |
| }, |
| { |
| "epoch": 9.94449583718779, |
| "grad_norm": 32.56552505493164, |
| "learning_rate": 3.0835646006783846e-07, |
| "loss": 1.3673, |
| "step": 43000 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.4671205098493627, |
| "eval_loss": 2.3923428058624268, |
| "eval_runtime": 18.4915, |
| "eval_samples_per_second": 373.361, |
| "eval_steps_per_second": 23.362, |
| "step": 43240 |
| }, |
| { |
| "epoch": 10.0, |
| "step": 43240, |
| "total_flos": 1.6043690488478396e+19, |
| "train_loss": 2.797751490392694, |
| "train_runtime": 9529.7807, |
| "train_samples_per_second": 145.188, |
| "train_steps_per_second": 4.537 |
| } |
| ], |
| "logging_steps": 1000, |
| "max_steps": 43240, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "total_flos": 1.6043690488478396e+19, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|