{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 80, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.625, "grad_norm": 35.632633209228516, "learning_rate": 9.620253164556963e-06, "loss": 3.8394, "step": 10 }, { "epoch": 1.0, "eval_cer": 0.6162631650936944, "eval_loss": 1.9695916175842285, "eval_runtime": 53.8754, "eval_samples_per_second": 14.051, "eval_steps_per_second": 0.056, "eval_wer": 0.9849082490138913, "step": 16 }, { "epoch": 1.25, "grad_norm": 14.219266891479492, "learning_rate": 8.354430379746837e-06, "loss": 2.1751, "step": 20 }, { "epoch": 1.875, "grad_norm": 9.090901374816895, "learning_rate": 7.08860759493671e-06, "loss": 1.3507, "step": 30 }, { "epoch": 2.0, "eval_cer": 0.41071672821775407, "eval_loss": 1.0905992984771729, "eval_runtime": 54.5857, "eval_samples_per_second": 13.868, "eval_steps_per_second": 0.055, "eval_wer": 0.8955582232893158, "step": 32 }, { "epoch": 2.5, "grad_norm": 4.849871635437012, "learning_rate": 5.8227848101265824e-06, "loss": 0.9898, "step": 40 }, { "epoch": 3.0, "eval_cer": 0.2880590890439064, "eval_loss": 0.7982303500175476, "eval_runtime": 54.2612, "eval_samples_per_second": 13.951, "eval_steps_per_second": 0.055, "eval_wer": 0.7871720116618076, "step": 48 }, { "epoch": 3.125, "grad_norm": 3.600881576538086, "learning_rate": 4.556962025316456e-06, "loss": 0.8102, "step": 50 }, { "epoch": 3.75, "grad_norm": 3.1939327716827393, "learning_rate": 3.2911392405063294e-06, "loss": 0.6834, "step": 60 }, { "epoch": 4.0, "eval_cer": 0.22661058678703325, "eval_loss": 0.6941161155700684, "eval_runtime": 51.0421, "eval_samples_per_second": 14.831, "eval_steps_per_second": 0.059, "eval_wer": 0.7153146973074944, "step": 64 }, { "epoch": 4.375, "grad_norm": 2.299682378768921, "learning_rate": 2.0253164556962026e-06, "loss": 0.636, "step": 70 }, { "epoch": 5.0, "grad_norm": 2.070575714111328, "learning_rate": 7.59493670886076e-07, "loss": 0.5981, "step": 80 }, { "epoch": 5.0, "eval_cer": 0.21645465736561345, "eval_loss": 0.6611320376396179, "eval_runtime": 51.3922, "eval_samples_per_second": 14.73, "eval_steps_per_second": 0.058, "eval_wer": 0.6882181444006173, "step": 80 }, { "epoch": 5.0, "step": 80, "total_flos": 1.9924990745190072e+18, "train_loss": 1.3853384613990785, "train_runtime": 2046.8169, "train_samples_per_second": 14.442, "train_steps_per_second": 0.039 } ], "logging_steps": 10, "max_steps": 80, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.9924990745190072e+18, "train_batch_size": 192, "trial_name": null, "trial_params": null }