{ "best_metric": 2.911093627065303, "best_model_checkpoint": "./3data-finetune/checkpoint-200", "epoch": 2.2222222222222223, "eval_steps": 25, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.28, "learning_rate": 5e-06, "loss": 0.2226, "step": 25 }, { "epoch": 0.28, "eval_loss": 0.08195594698190689, "eval_runtime": 247.5945, "eval_samples_per_second": 1.026, "eval_steps_per_second": 0.513, "eval_wer": 6.058221872541306, "step": 25 }, { "epoch": 0.56, "learning_rate": 9.800000000000001e-06, "loss": 0.079, "step": 50 }, { "epoch": 0.56, "eval_loss": 0.07928125560283661, "eval_runtime": 245.6975, "eval_samples_per_second": 1.034, "eval_steps_per_second": 0.517, "eval_wer": 5.664830841856806, "step": 50 }, { "epoch": 0.83, "learning_rate": 8.400000000000001e-06, "loss": 0.0773, "step": 75 }, { "epoch": 0.83, "eval_loss": 0.06937055289745331, "eval_runtime": 247.4688, "eval_samples_per_second": 1.026, "eval_steps_per_second": 0.513, "eval_wer": 4.248623131392605, "step": 75 }, { "epoch": 1.11, "learning_rate": 6.800000000000001e-06, "loss": 0.0624, "step": 100 }, { "epoch": 1.11, "eval_loss": 0.059788815677165985, "eval_runtime": 245.9389, "eval_samples_per_second": 1.033, "eval_steps_per_second": 0.516, "eval_wer": 3.6191974822974036, "step": 100 }, { "epoch": 1.39, "learning_rate": 5.133333333333334e-06, "loss": 0.0426, "step": 125 }, { "epoch": 1.39, "eval_loss": 0.055591508746147156, "eval_runtime": 246.0647, "eval_samples_per_second": 1.032, "eval_steps_per_second": 0.516, "eval_wer": 2.832415420928403, "step": 125 }, { "epoch": 1.67, "learning_rate": 3.4666666666666672e-06, "loss": 0.0575, "step": 150 }, { "epoch": 1.67, "eval_loss": 0.05484224110841751, "eval_runtime": 248.469, "eval_samples_per_second": 1.022, "eval_steps_per_second": 0.511, "eval_wer": 3.6978756884343036, "step": 150 }, { "epoch": 1.94, "learning_rate": 1.8000000000000001e-06, "loss": 0.0312, "step": 175 }, { "epoch": 1.94, "eval_loss": 0.05231180042028427, "eval_runtime": 246.3108, "eval_samples_per_second": 1.031, "eval_steps_per_second": 0.516, "eval_wer": 2.911093627065303, "step": 175 }, { "epoch": 2.22, "learning_rate": 1.3333333333333336e-07, "loss": 0.0236, "step": 200 }, { "epoch": 2.22, "eval_loss": 0.052269306033849716, "eval_runtime": 248.7723, "eval_samples_per_second": 1.021, "eval_steps_per_second": 0.511, "eval_wer": 2.911093627065303, "step": 200 } ], "logging_steps": 25, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "total_flos": 3.265935704064e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }