| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 48.54368932038835, |
| "global_step": 1250, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.94, |
| "learning_rate": 1.7999999999999997e-05, |
| "loss": 16.9127, |
| "step": 50 |
| }, |
| { |
| "epoch": 3.88, |
| "learning_rate": 3.675e-05, |
| "loss": 7.1724, |
| "step": 100 |
| }, |
| { |
| "epoch": 5.83, |
| "learning_rate": 5.5499999999999994e-05, |
| "loss": 5.1406, |
| "step": 150 |
| }, |
| { |
| "epoch": 7.77, |
| "learning_rate": 7.424999999999999e-05, |
| "loss": 3.9404, |
| "step": 200 |
| }, |
| { |
| "epoch": 9.71, |
| "learning_rate": 7.157142857142856e-05, |
| "loss": 3.5502, |
| "step": 250 |
| }, |
| { |
| "epoch": 11.65, |
| "learning_rate": 6.799999999999999e-05, |
| "loss": 3.4399, |
| "step": 300 |
| }, |
| { |
| "epoch": 13.59, |
| "learning_rate": 6.442857142857143e-05, |
| "loss": 3.3599, |
| "step": 350 |
| }, |
| { |
| "epoch": 15.53, |
| "learning_rate": 6.0857142857142847e-05, |
| "loss": 3.3068, |
| "step": 400 |
| }, |
| { |
| "epoch": 17.48, |
| "learning_rate": 5.728571428571428e-05, |
| "loss": 3.2754, |
| "step": 450 |
| }, |
| { |
| "epoch": 19.42, |
| "learning_rate": 5.371428571428571e-05, |
| "loss": 3.2536, |
| "step": 500 |
| }, |
| { |
| "epoch": 21.36, |
| "learning_rate": 5.014285714285714e-05, |
| "loss": 3.2317, |
| "step": 550 |
| }, |
| { |
| "epoch": 23.3, |
| "learning_rate": 4.657142857142857e-05, |
| "loss": 3.1561, |
| "step": 600 |
| }, |
| { |
| "epoch": 25.24, |
| "learning_rate": 4.3e-05, |
| "loss": 3.0022, |
| "step": 650 |
| }, |
| { |
| "epoch": 27.18, |
| "learning_rate": 3.942857142857142e-05, |
| "loss": 2.7978, |
| "step": 700 |
| }, |
| { |
| "epoch": 29.13, |
| "learning_rate": 3.5857142857142856e-05, |
| "loss": 2.5915, |
| "step": 750 |
| }, |
| { |
| "epoch": 31.07, |
| "learning_rate": 3.228571428571428e-05, |
| "loss": 2.4439, |
| "step": 800 |
| }, |
| { |
| "epoch": 33.01, |
| "learning_rate": 2.871428571428571e-05, |
| "loss": 2.348, |
| "step": 850 |
| }, |
| { |
| "epoch": 34.95, |
| "learning_rate": 2.514285714285714e-05, |
| "loss": 2.2765, |
| "step": 900 |
| }, |
| { |
| "epoch": 36.89, |
| "learning_rate": 2.157142857142857e-05, |
| "loss": 2.2252, |
| "step": 950 |
| }, |
| { |
| "epoch": 38.83, |
| "learning_rate": 1.7999999999999997e-05, |
| "loss": 2.1908, |
| "step": 1000 |
| }, |
| { |
| "epoch": 38.83, |
| "eval_loss": 1.3249318599700928, |
| "eval_runtime": 12.8861, |
| "eval_samples_per_second": 17.616, |
| "eval_steps_per_second": 2.25, |
| "eval_wer": 0.9675356615838662, |
| "step": 1000 |
| }, |
| { |
| "epoch": 40.78, |
| "learning_rate": 1.4428571428571427e-05, |
| "loss": 2.1528, |
| "step": 1050 |
| }, |
| { |
| "epoch": 42.72, |
| "learning_rate": 1.0857142857142856e-05, |
| "loss": 2.121, |
| "step": 1100 |
| }, |
| { |
| "epoch": 44.66, |
| "learning_rate": 7.285714285714285e-06, |
| "loss": 2.1319, |
| "step": 1150 |
| }, |
| { |
| "epoch": 46.6, |
| "learning_rate": 3.785714285714285e-06, |
| "loss": 2.0928, |
| "step": 1200 |
| }, |
| { |
| "epoch": 48.54, |
| "learning_rate": 2.1428571428571426e-07, |
| "loss": 2.0927, |
| "step": 1250 |
| }, |
| { |
| "epoch": 48.54, |
| "step": 1250, |
| "total_flos": 6.576223071172055e+18, |
| "train_loss": 3.6082658813476565, |
| "train_runtime": 3022.5141, |
| "train_samples_per_second": 13.631, |
| "train_steps_per_second": 0.414 |
| } |
| ], |
| "max_steps": 1250, |
| "num_train_epochs": 50, |
| "total_flos": 6.576223071172055e+18, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|