{ "best_global_step": 2000, "best_metric": 0.6051455140113831, "best_model_checkpoint": "./whisper-large-v3-lt1/checkpoint-2000", "epoch": 1.179, "eval_steps": 500, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 0.42568153142929077, "learning_rate": 9.9e-06, "loss": 1.3821, "step": 100 }, { "epoch": 0.1, "grad_norm": 0.4062618613243103, "learning_rate": 9.478947368421053e-06, "loss": 1.1435, "step": 200 }, { "epoch": 0.15, "grad_norm": 0.5448814630508423, "learning_rate": 8.95263157894737e-06, "loss": 1.009, "step": 300 }, { "epoch": 0.2, "grad_norm": 0.709639310836792, "learning_rate": 8.426315789473684e-06, "loss": 0.919, "step": 400 }, { "epoch": 0.25, "grad_norm": 0.8839156627655029, "learning_rate": 7.9e-06, "loss": 0.8441, "step": 500 }, { "epoch": 0.25, "eval_loss": 0.8364643454551697, "eval_runtime": 1166.4094, "eval_samples_per_second": 2.503, "eval_steps_per_second": 0.313, "step": 500 }, { "epoch": 0.3, "grad_norm": 1.2657830715179443, "learning_rate": 7.373684210526316e-06, "loss": 0.796, "step": 600 }, { "epoch": 0.35, "grad_norm": 1.17739737033844, "learning_rate": 6.8473684210526325e-06, "loss": 0.8109, "step": 700 }, { "epoch": 0.4, "grad_norm": 1.271132230758667, "learning_rate": 6.321052631578948e-06, "loss": 0.7474, "step": 800 }, { "epoch": 0.45, "grad_norm": 1.3513257503509521, "learning_rate": 5.794736842105264e-06, "loss": 0.7134, "step": 900 }, { "epoch": 0.5, "grad_norm": 1.3304741382598877, "learning_rate": 5.268421052631579e-06, "loss": 0.6847, "step": 1000 }, { "epoch": 0.5, "eval_loss": 0.6822749972343445, "eval_runtime": 1166.2313, "eval_samples_per_second": 2.504, "eval_steps_per_second": 0.313, "step": 1000 }, { "epoch": 0.55, "grad_norm": 1.2514197826385498, "learning_rate": 4.7421052631578954e-06, "loss": 0.6689, "step": 1100 }, { "epoch": 0.6, "grad_norm": 2.271310806274414, "learning_rate": 4.215789473684211e-06, "loss": 0.6526, "step": 1200 }, { "epoch": 0.65, "grad_norm": 2.1460328102111816, "learning_rate": 3.6894736842105265e-06, "loss": 0.6299, "step": 1300 }, { "epoch": 0.7, "grad_norm": 2.67410945892334, "learning_rate": 3.1631578947368424e-06, "loss": 0.6143, "step": 1400 }, { "epoch": 0.75, "grad_norm": 1.5648741722106934, "learning_rate": 2.6368421052631584e-06, "loss": 0.6632, "step": 1500 }, { "epoch": 0.75, "eval_loss": 0.6280742287635803, "eval_runtime": 1163.7066, "eval_samples_per_second": 2.509, "eval_steps_per_second": 0.314, "step": 1500 }, { "epoch": 0.8, "grad_norm": 1.4991073608398438, "learning_rate": 2.110526315789474e-06, "loss": 0.6425, "step": 1600 }, { "epoch": 1.029, "grad_norm": 1.7371805906295776, "learning_rate": 1.5842105263157894e-06, "loss": 0.6223, "step": 1700 }, { "epoch": 1.079, "grad_norm": 3.491868495941162, "learning_rate": 1.0578947368421054e-06, "loss": 0.6013, "step": 1800 }, { "epoch": 1.129, "grad_norm": 2.1243815422058105, "learning_rate": 5.315789473684211e-07, "loss": 0.5967, "step": 1900 }, { "epoch": 1.179, "grad_norm": 2.0289883613586426, "learning_rate": 5.263157894736842e-09, "loss": 0.5986, "step": 2000 }, { "epoch": 1.179, "eval_loss": 0.6051455140113831, "eval_runtime": 1166.0635, "eval_samples_per_second": 2.504, "eval_steps_per_second": 0.313, "step": 2000 } ], "logging_steps": 100, "max_steps": 2000, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.0985211708309504e+20, "train_batch_size": 16, "trial_name": null, "trial_params": null }