{ "best_metric": 2.196078431372549, "best_model_checkpoint": "./simpandnet-finetune/checkpoint-150", "epoch": 1.662049861495845, "eval_steps": 25, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.28, "learning_rate": 4.2000000000000004e-06, "loss": 1.187, "step": 25 }, { "epoch": 0.28, "eval_loss": 0.2987758219242096, "eval_runtime": 312.4773, "eval_samples_per_second": 0.816, "eval_steps_per_second": 0.41, "eval_wer": 3.7647058823529407, "step": 25 }, { "epoch": 0.55, "learning_rate": 9.200000000000002e-06, "loss": 0.1612, "step": 50 }, { "epoch": 0.55, "eval_loss": 0.11934595555067062, "eval_runtime": 260.2565, "eval_samples_per_second": 0.98, "eval_steps_per_second": 0.492, "eval_wer": 2.3529411764705883, "step": 50 }, { "epoch": 0.83, "learning_rate": 8.6e-06, "loss": 0.0931, "step": 75 }, { "epoch": 0.83, "eval_loss": 0.07363112270832062, "eval_runtime": 258.571, "eval_samples_per_second": 0.986, "eval_steps_per_second": 0.495, "eval_wer": 3.294117647058824, "step": 75 }, { "epoch": 1.11, "learning_rate": 6.9333333333333344e-06, "loss": 0.0491, "step": 100 }, { "epoch": 1.11, "eval_loss": 0.07703772187232971, "eval_runtime": 259.0088, "eval_samples_per_second": 0.985, "eval_steps_per_second": 0.494, "eval_wer": 4.313725490196078, "step": 100 }, { "epoch": 1.39, "learning_rate": 5.2666666666666665e-06, "loss": 0.0377, "step": 125 }, { "epoch": 1.39, "eval_loss": 0.07970306277275085, "eval_runtime": 276.5028, "eval_samples_per_second": 0.922, "eval_steps_per_second": 0.463, "eval_wer": 2.03921568627451, "step": 125 }, { "epoch": 1.66, "learning_rate": 3.6000000000000003e-06, "loss": 0.0472, "step": 150 }, { "epoch": 1.66, "eval_loss": 0.05988682806491852, "eval_runtime": 260.1111, "eval_samples_per_second": 0.98, "eval_steps_per_second": 0.492, "eval_wer": 2.196078431372549, "step": 150 } ], "logging_steps": 25, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "total_flos": 2.44843117314048e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }