{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 50.0, "eval_steps": 500, "global_step": 4650, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.410810810810811, "grad_norm": 2.1526288986206055, "learning_rate": 4.987804878048781e-06, "loss": 1.5, "step": 410 }, { "epoch": 5.378378378378378, "eval_loss": 1.1479133367538452, "eval_runtime": 16.0814, "eval_samples_per_second": 20.396, "eval_steps_per_second": 2.55, "eval_wer": 0.9441779328390755, "step": 500 }, { "epoch": 8.821621621621622, "grad_norm": 1.133339524269104, "learning_rate": 4.517688679245283e-06, "loss": 1.2719, "step": 820 }, { "epoch": 10.756756756756756, "eval_loss": 1.126757025718689, "eval_runtime": 16.315, "eval_samples_per_second": 20.104, "eval_steps_per_second": 2.513, "eval_wer": 0.8634976013955517, "step": 1000 }, { "epoch": 13.227027027027027, "grad_norm": 2.221667766571045, "learning_rate": 4.034198113207547e-06, "loss": 1.1462, "step": 1230 }, { "epoch": 16.12972972972973, "eval_loss": 1.1268436908721924, "eval_runtime": 16.4501, "eval_samples_per_second": 19.939, "eval_steps_per_second": 2.492, "eval_wer": 0.8495420846053205, "step": 1500 }, { "epoch": 17.63783783783784, "grad_norm": 1.2538217306137085, "learning_rate": 3.5507075471698116e-06, "loss": 1.0711, "step": 1640 }, { "epoch": 21.508108108108107, "eval_loss": 1.1400920152664185, "eval_runtime": 16.4996, "eval_samples_per_second": 19.879, "eval_steps_per_second": 2.485, "eval_wer": 0.85041430440471, "step": 2000 }, { "epoch": 22.043243243243243, "grad_norm": 1.2463653087615967, "learning_rate": 3.067216981132076e-06, "loss": 1.0059, "step": 2050 }, { "epoch": 26.454054054054055, "grad_norm": 2.5332655906677246, "learning_rate": 2.5837264150943397e-06, "loss": 0.9435, "step": 2460 }, { "epoch": 26.886486486486486, "eval_loss": 1.1470223665237427, "eval_runtime": 16.2457, "eval_samples_per_second": 20.19, "eval_steps_per_second": 2.524, "eval_wer": 0.8438726559092892, "step": 2500 }, { "epoch": 30.864864864864863, "grad_norm": 1.5317901372909546, "learning_rate": 2.100235849056604e-06, "loss": 0.9056, "step": 2870 }, { "epoch": 32.25945945945946, "eval_loss": 1.1596176624298096, "eval_runtime": 15.9807, "eval_samples_per_second": 20.525, "eval_steps_per_second": 2.566, "eval_wer": 0.8360226777147841, "step": 3000 }, { "epoch": 35.270270270270274, "grad_norm": 1.1806330680847168, "learning_rate": 1.6167452830188683e-06, "loss": 0.8754, "step": 3280 }, { "epoch": 37.637837837837836, "eval_loss": 1.1706100702285767, "eval_runtime": 16.3502, "eval_samples_per_second": 20.061, "eval_steps_per_second": 2.508, "eval_wer": 0.840819886611426, "step": 3500 }, { "epoch": 39.68108108108108, "grad_norm": 1.8476966619491577, "learning_rate": 1.133254716981132e-06, "loss": 0.8491, "step": 3690 }, { "epoch": 43.01081081081081, "eval_loss": 1.1760590076446533, "eval_runtime": 16.1267, "eval_samples_per_second": 20.339, "eval_steps_per_second": 2.542, "eval_wer": 0.8403837767117314, "step": 4000 }, { "epoch": 44.086486486486486, "grad_norm": 1.2304221391677856, "learning_rate": 6.497641509433964e-07, "loss": 0.8404, "step": 4100 }, { "epoch": 48.38918918918919, "eval_loss": 1.1790771484375, "eval_runtime": 16.6689, "eval_samples_per_second": 19.677, "eval_steps_per_second": 2.46, "eval_wer": 0.8412559965111208, "step": 4500 }, { "epoch": 48.497297297297294, "grad_norm": 2.8257575035095215, "learning_rate": 1.6745283018867927e-07, "loss": 0.827, "step": 4510 } ], "logging_steps": 410, "max_steps": 4650, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.9474157216000532e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }