{ "best_metric": 85.87243015287297, "best_model_checkpoint": "./Shukv4/checkpoint-500", "epoch": 5.376344086021505, "eval_steps": 500, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13440860215053763, "grad_norm": 349994.25, "learning_rate": 3.125e-06, "loss": 0.5555, "step": 25 }, { "epoch": 0.26881720430107525, "grad_norm": 296199.8125, "learning_rate": 6.25e-06, "loss": 0.3776, "step": 50 }, { "epoch": 0.4032258064516129, "grad_norm": 218550.265625, "learning_rate": 9.375000000000001e-06, "loss": 0.286, "step": 75 }, { "epoch": 0.5376344086021505, "grad_norm": 241839.0, "learning_rate": 1.25e-05, "loss": 0.2629, "step": 100 }, { "epoch": 0.6720430107526881, "grad_norm": 224651.953125, "learning_rate": 1.2152777777777779e-05, "loss": 0.2473, "step": 125 }, { "epoch": 0.8064516129032258, "grad_norm": 247373.3125, "learning_rate": 1.1805555555555555e-05, "loss": 0.2225, "step": 150 }, { "epoch": 0.9408602150537635, "grad_norm": 232539.5, "learning_rate": 1.1458333333333333e-05, "loss": 0.1814, "step": 175 }, { "epoch": 1.075268817204301, "grad_norm": 208122.359375, "learning_rate": 1.1111111111111112e-05, "loss": 0.1379, "step": 200 }, { "epoch": 1.2096774193548387, "grad_norm": 203154.734375, "learning_rate": 1.076388888888889e-05, "loss": 0.0926, "step": 225 }, { "epoch": 1.3440860215053765, "grad_norm": 193545.453125, "learning_rate": 1.0416666666666668e-05, "loss": 0.0877, "step": 250 }, { "epoch": 1.478494623655914, "grad_norm": 159916.4375, "learning_rate": 1.0069444444444445e-05, "loss": 0.0759, "step": 275 }, { "epoch": 1.6129032258064515, "grad_norm": 203974.796875, "learning_rate": 9.722222222222223e-06, "loss": 0.0886, "step": 300 }, { "epoch": 1.7473118279569892, "grad_norm": 176038.296875, "learning_rate": 9.375000000000001e-06, "loss": 0.0809, "step": 325 }, { "epoch": 1.881720430107527, "grad_norm": 156732.640625, "learning_rate": 9.027777777777777e-06, "loss": 0.0598, "step": 350 }, { "epoch": 2.0161290322580645, "grad_norm": 116201.7734375, "learning_rate": 8.680555555555556e-06, "loss": 0.0546, "step": 375 }, { "epoch": 2.150537634408602, "grad_norm": 81153.625, "learning_rate": 8.333333333333334e-06, "loss": 0.0281, "step": 400 }, { "epoch": 2.28494623655914, "grad_norm": 95937.9765625, "learning_rate": 7.98611111111111e-06, "loss": 0.0328, "step": 425 }, { "epoch": 2.4193548387096775, "grad_norm": 105021.359375, "learning_rate": 7.63888888888889e-06, "loss": 0.0271, "step": 450 }, { "epoch": 2.553763440860215, "grad_norm": 73798.6796875, "learning_rate": 7.2916666666666674e-06, "loss": 0.0317, "step": 475 }, { "epoch": 2.688172043010753, "grad_norm": 111273.5546875, "learning_rate": 6.944444444444445e-06, "loss": 0.0286, "step": 500 }, { "epoch": 2.688172043010753, "eval_loss": 0.032379038631916046, "eval_runtime": 903.1767, "eval_samples_per_second": 0.227, "eval_steps_per_second": 0.029, "eval_wer": 85.87243015287297, "step": 500 }, { "epoch": 2.8225806451612905, "grad_norm": 115137.6015625, "learning_rate": 6.597222222222223e-06, "loss": 0.0293, "step": 525 }, { "epoch": 2.956989247311828, "grad_norm": 148418.078125, "learning_rate": 6.25e-06, "loss": 0.0264, "step": 550 }, { "epoch": 3.0913978494623655, "grad_norm": 44248.91796875, "learning_rate": 5.902777777777778e-06, "loss": 0.0157, "step": 575 }, { "epoch": 3.225806451612903, "grad_norm": 86631.046875, "learning_rate": 5.555555555555556e-06, "loss": 0.0101, "step": 600 }, { "epoch": 3.360215053763441, "grad_norm": 69759.2421875, "learning_rate": 5.208333333333334e-06, "loss": 0.0175, "step": 625 }, { "epoch": 3.4946236559139785, "grad_norm": 79651.625, "learning_rate": 4.861111111111111e-06, "loss": 0.0121, "step": 650 }, { "epoch": 3.629032258064516, "grad_norm": 78669.6171875, "learning_rate": 4.513888888888889e-06, "loss": 0.0137, "step": 675 }, { "epoch": 3.763440860215054, "grad_norm": 104462.546875, "learning_rate": 4.166666666666667e-06, "loss": 0.0114, "step": 700 }, { "epoch": 3.8978494623655915, "grad_norm": 79927.2109375, "learning_rate": 3.819444444444445e-06, "loss": 0.0105, "step": 725 }, { "epoch": 4.032258064516129, "grad_norm": 69011.8984375, "learning_rate": 3.4722222222222224e-06, "loss": 0.0074, "step": 750 }, { "epoch": 4.166666666666667, "grad_norm": 43131.57421875, "learning_rate": 3.125e-06, "loss": 0.0036, "step": 775 }, { "epoch": 4.301075268817204, "grad_norm": 15227.64453125, "learning_rate": 2.777777777777778e-06, "loss": 0.0038, "step": 800 }, { "epoch": 4.435483870967742, "grad_norm": 27650.37890625, "learning_rate": 2.4305555555555557e-06, "loss": 0.0082, "step": 825 }, { "epoch": 4.56989247311828, "grad_norm": 146017.515625, "learning_rate": 2.0833333333333334e-06, "loss": 0.0066, "step": 850 }, { "epoch": 4.704301075268817, "grad_norm": 41058.7421875, "learning_rate": 1.7361111111111112e-06, "loss": 0.0047, "step": 875 }, { "epoch": 4.838709677419355, "grad_norm": 6614.59765625, "learning_rate": 1.388888888888889e-06, "loss": 0.0051, "step": 900 }, { "epoch": 4.973118279569892, "grad_norm": 24049.263671875, "learning_rate": 1.0416666666666667e-06, "loss": 0.0026, "step": 925 }, { "epoch": 5.10752688172043, "grad_norm": 42294.18359375, "learning_rate": 6.944444444444445e-07, "loss": 0.0047, "step": 950 }, { "epoch": 5.241935483870968, "grad_norm": 9962.9453125, "learning_rate": 3.4722222222222224e-07, "loss": 0.0013, "step": 975 }, { "epoch": 5.376344086021505, "grad_norm": 18759.73828125, "learning_rate": 0.0, "loss": 0.0047, "step": 1000 }, { "epoch": 5.376344086021505, "eval_loss": 0.0025792771484702826, "eval_runtime": 847.1283, "eval_samples_per_second": 0.242, "eval_steps_per_second": 0.031, "eval_wer": 96.36267791249341, "step": 1000 } ], "logging_steps": 25, "max_steps": 1000, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.4543672418304e+18, "train_batch_size": 12, "trial_name": null, "trial_params": null }