| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 50.0, | |
| "eval_steps": 500, | |
| "global_step": 4650, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 4.410810810810811, | |
| "grad_norm": 2.1526288986206055, | |
| "learning_rate": 4.987804878048781e-06, | |
| "loss": 1.5, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 5.378378378378378, | |
| "eval_loss": 1.1479133367538452, | |
| "eval_runtime": 16.0814, | |
| "eval_samples_per_second": 20.396, | |
| "eval_steps_per_second": 2.55, | |
| "eval_wer": 0.9441779328390755, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 8.821621621621622, | |
| "grad_norm": 1.133339524269104, | |
| "learning_rate": 4.517688679245283e-06, | |
| "loss": 1.2719, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 10.756756756756756, | |
| "eval_loss": 1.126757025718689, | |
| "eval_runtime": 16.315, | |
| "eval_samples_per_second": 20.104, | |
| "eval_steps_per_second": 2.513, | |
| "eval_wer": 0.8634976013955517, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 13.227027027027027, | |
| "grad_norm": 2.221667766571045, | |
| "learning_rate": 4.034198113207547e-06, | |
| "loss": 1.1462, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 16.12972972972973, | |
| "eval_loss": 1.1268436908721924, | |
| "eval_runtime": 16.4501, | |
| "eval_samples_per_second": 19.939, | |
| "eval_steps_per_second": 2.492, | |
| "eval_wer": 0.8495420846053205, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 17.63783783783784, | |
| "grad_norm": 1.2538217306137085, | |
| "learning_rate": 3.5507075471698116e-06, | |
| "loss": 1.0711, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 21.508108108108107, | |
| "eval_loss": 1.1400920152664185, | |
| "eval_runtime": 16.4996, | |
| "eval_samples_per_second": 19.879, | |
| "eval_steps_per_second": 2.485, | |
| "eval_wer": 0.85041430440471, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 22.043243243243243, | |
| "grad_norm": 1.2463653087615967, | |
| "learning_rate": 3.067216981132076e-06, | |
| "loss": 1.0059, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 26.454054054054055, | |
| "grad_norm": 2.5332655906677246, | |
| "learning_rate": 2.5837264150943397e-06, | |
| "loss": 0.9435, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 26.886486486486486, | |
| "eval_loss": 1.1470223665237427, | |
| "eval_runtime": 16.2457, | |
| "eval_samples_per_second": 20.19, | |
| "eval_steps_per_second": 2.524, | |
| "eval_wer": 0.8438726559092892, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 30.864864864864863, | |
| "grad_norm": 1.5317901372909546, | |
| "learning_rate": 2.100235849056604e-06, | |
| "loss": 0.9056, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 32.25945945945946, | |
| "eval_loss": 1.1596176624298096, | |
| "eval_runtime": 15.9807, | |
| "eval_samples_per_second": 20.525, | |
| "eval_steps_per_second": 2.566, | |
| "eval_wer": 0.8360226777147841, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 35.270270270270274, | |
| "grad_norm": 1.1806330680847168, | |
| "learning_rate": 1.6167452830188683e-06, | |
| "loss": 0.8754, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 37.637837837837836, | |
| "eval_loss": 1.1706100702285767, | |
| "eval_runtime": 16.3502, | |
| "eval_samples_per_second": 20.061, | |
| "eval_steps_per_second": 2.508, | |
| "eval_wer": 0.840819886611426, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 39.68108108108108, | |
| "grad_norm": 1.8476966619491577, | |
| "learning_rate": 1.133254716981132e-06, | |
| "loss": 0.8491, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 43.01081081081081, | |
| "eval_loss": 1.1760590076446533, | |
| "eval_runtime": 16.1267, | |
| "eval_samples_per_second": 20.339, | |
| "eval_steps_per_second": 2.542, | |
| "eval_wer": 0.8403837767117314, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 44.086486486486486, | |
| "grad_norm": 1.2304221391677856, | |
| "learning_rate": 6.497641509433964e-07, | |
| "loss": 0.8404, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 48.38918918918919, | |
| "eval_loss": 1.1790771484375, | |
| "eval_runtime": 16.6689, | |
| "eval_samples_per_second": 19.677, | |
| "eval_steps_per_second": 2.46, | |
| "eval_wer": 0.8412559965111208, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 48.497297297297294, | |
| "grad_norm": 2.8257575035095215, | |
| "learning_rate": 1.6745283018867927e-07, | |
| "loss": 0.827, | |
| "step": 4510 | |
| } | |
| ], | |
| "logging_steps": 410, | |
| "max_steps": 4650, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.9474157216000532e+19, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |