| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 100.0, | |
| "global_step": 4000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 0.00019800000000000002, | |
| "loss": 4.9562, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 0.000398, | |
| "loss": 2.9459, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "learning_rate": 0.000598, | |
| "loss": 2.4239, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 0.0007980000000000001, | |
| "loss": 0.7168, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "learning_rate": 0.000998, | |
| "loss": 0.3825, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "eval_loss": 0.4022243320941925, | |
| "eval_runtime": 139.1694, | |
| "eval_samples_per_second": 18.747, | |
| "eval_steps_per_second": 0.266, | |
| "eval_wer": 0.5059179128221445, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "learning_rate": 0.0009980271764103532, | |
| "loss": 0.2825, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 17.5, | |
| "learning_rate": 0.000992044732251972, | |
| "loss": 0.2192, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 0.0009821006332271156, | |
| "loss": 0.1926, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 22.5, | |
| "learning_rate": 0.0009682749433740962, | |
| "loss": 0.1798, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "learning_rate": 0.0009506789790182364, | |
| "loss": 0.1592, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_loss": 0.45845186710357666, | |
| "eval_runtime": 138.4112, | |
| "eval_samples_per_second": 18.85, | |
| "eval_steps_per_second": 0.267, | |
| "eval_wer": 0.44556156538339164, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 27.5, | |
| "learning_rate": 0.00092945441251827, | |
| "loss": 0.1496, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "learning_rate": 0.0009047721316038118, | |
| "loss": 0.1421, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 32.5, | |
| "learning_rate": 0.0008768308634878388, | |
| "loss": 0.1438, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "learning_rate": 0.0008458555748320216, | |
| "loss": 0.1268, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 37.5, | |
| "learning_rate": 0.0008120956604474414, | |
| "loss": 0.1215, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 37.5, | |
| "eval_loss": 0.45503726601600647, | |
| "eval_runtime": 139.0805, | |
| "eval_samples_per_second": 18.759, | |
| "eval_steps_per_second": 0.266, | |
| "eval_wer": 0.4163538020999045, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "learning_rate": 0.0007758229353142152, | |
| "loss": 0.119, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 42.5, | |
| "learning_rate": 0.0007373294460870985, | |
| "loss": 0.1138, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "learning_rate": 0.0006969251197075427, | |
| "loss": 0.1082, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 47.5, | |
| "learning_rate": 0.0006549352680541975, | |
| "loss": 0.1051, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "learning_rate": 0.000611697968722942, | |
| "loss": 0.0972, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_loss": 0.47254303097724915, | |
| "eval_runtime": 138.2935, | |
| "eval_samples_per_second": 18.866, | |
| "eval_steps_per_second": 0.268, | |
| "eval_wer": 0.40884505249761377, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 52.5, | |
| "learning_rate": 0.0005675613430248713, | |
| "loss": 0.0906, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 55.0, | |
| "learning_rate": 0.0005228807531181908, | |
| "loss": 0.0862, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 57.5, | |
| "learning_rate": 0.00047801594084106763, | |
| "loss": 0.082, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "learning_rate": 0.0004333281312818746, | |
| "loss": 0.0802, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 62.5, | |
| "learning_rate": 0.00038917712440717607, | |
| "loss": 0.0731, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 62.5, | |
| "eval_loss": 0.45677438378334045, | |
| "eval_runtime": 136.9902, | |
| "eval_samples_per_second": 19.045, | |
| "eval_steps_per_second": 0.27, | |
| "eval_wer": 0.3824053452115813, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 65.0, | |
| "learning_rate": 0.00034591839816395533, | |
| "loss": 0.0671, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 67.5, | |
| "learning_rate": 0.00030390024638020374, | |
| "loss": 0.0623, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 70.0, | |
| "learning_rate": 0.0002634609745078109, | |
| "loss": 0.06, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 72.5, | |
| "learning_rate": 0.00022492617578598646, | |
| "loss": 0.0562, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 75.0, | |
| "learning_rate": 0.00018860610975594382, | |
| "loss": 0.0527, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 75.0, | |
| "eval_loss": 0.4711548089981079, | |
| "eval_runtime": 138.6984, | |
| "eval_samples_per_second": 18.811, | |
| "eval_steps_per_second": 0.267, | |
| "eval_wer": 0.3652879414572065, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 77.5, | |
| "learning_rate": 0.0001547932042335039, | |
| "loss": 0.0513, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "learning_rate": 0.00012375970085226701, | |
| "loss": 0.0487, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 82.5, | |
| "learning_rate": 9.575546313405425e-05, | |
| "loss": 0.0471, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 85.0, | |
| "learning_rate": 7.100596473474763e-05, | |
| "loss": 0.0439, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 87.5, | |
| "learning_rate": 4.9710474062988955e-05, | |
| "loss": 0.0428, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 87.5, | |
| "eval_loss": 0.4812512695789337, | |
| "eval_runtime": 136.666, | |
| "eval_samples_per_second": 19.09, | |
| "eval_steps_per_second": 0.271, | |
| "eval_wer": 0.35202036271078585, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 90.0, | |
| "learning_rate": 3.204044988812144e-05, | |
| "loss": 0.0406, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 92.5, | |
| "learning_rate": 1.8138160854995144e-05, | |
| "loss": 0.0402, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 95.0, | |
| "learning_rate": 8.115540020491363e-06, | |
| "loss": 0.0385, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 97.5, | |
| "learning_rate": 2.053283634363745e-06, | |
| "loss": 0.0393, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "learning_rate": 2.0142048445803695e-10, | |
| "loss": 0.0383, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "eval_loss": 0.48349595069885254, | |
| "eval_runtime": 138.1231, | |
| "eval_samples_per_second": 18.889, | |
| "eval_steps_per_second": 0.268, | |
| "eval_wer": 0.3475023862551702, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "step": 4000, | |
| "total_flos": 1.2056228474452825e+20, | |
| "train_loss": 0.37067342311143875, | |
| "train_runtime": 40725.3583, | |
| "train_samples_per_second": 14.008, | |
| "train_steps_per_second": 0.098 | |
| } | |
| ], | |
| "max_steps": 4000, | |
| "num_train_epochs": 100, | |
| "total_flos": 1.2056228474452825e+20, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |