| { | |
| "best_metric": 85.87243015287297, | |
| "best_model_checkpoint": "./Shukv4/checkpoint-500", | |
| "epoch": 5.376344086021505, | |
| "eval_steps": 500, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.13440860215053763, | |
| "grad_norm": 349994.25, | |
| "learning_rate": 3.125e-06, | |
| "loss": 0.5555, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.26881720430107525, | |
| "grad_norm": 296199.8125, | |
| "learning_rate": 6.25e-06, | |
| "loss": 0.3776, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.4032258064516129, | |
| "grad_norm": 218550.265625, | |
| "learning_rate": 9.375000000000001e-06, | |
| "loss": 0.286, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.5376344086021505, | |
| "grad_norm": 241839.0, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.2629, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.6720430107526881, | |
| "grad_norm": 224651.953125, | |
| "learning_rate": 1.2152777777777779e-05, | |
| "loss": 0.2473, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.8064516129032258, | |
| "grad_norm": 247373.3125, | |
| "learning_rate": 1.1805555555555555e-05, | |
| "loss": 0.2225, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.9408602150537635, | |
| "grad_norm": 232539.5, | |
| "learning_rate": 1.1458333333333333e-05, | |
| "loss": 0.1814, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 1.075268817204301, | |
| "grad_norm": 208122.359375, | |
| "learning_rate": 1.1111111111111112e-05, | |
| "loss": 0.1379, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.2096774193548387, | |
| "grad_norm": 203154.734375, | |
| "learning_rate": 1.076388888888889e-05, | |
| "loss": 0.0926, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.3440860215053765, | |
| "grad_norm": 193545.453125, | |
| "learning_rate": 1.0416666666666668e-05, | |
| "loss": 0.0877, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.478494623655914, | |
| "grad_norm": 159916.4375, | |
| "learning_rate": 1.0069444444444445e-05, | |
| "loss": 0.0759, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.6129032258064515, | |
| "grad_norm": 203974.796875, | |
| "learning_rate": 9.722222222222223e-06, | |
| "loss": 0.0886, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.7473118279569892, | |
| "grad_norm": 176038.296875, | |
| "learning_rate": 9.375000000000001e-06, | |
| "loss": 0.0809, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.881720430107527, | |
| "grad_norm": 156732.640625, | |
| "learning_rate": 9.027777777777777e-06, | |
| "loss": 0.0598, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.0161290322580645, | |
| "grad_norm": 116201.7734375, | |
| "learning_rate": 8.680555555555556e-06, | |
| "loss": 0.0546, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 2.150537634408602, | |
| "grad_norm": 81153.625, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.0281, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.28494623655914, | |
| "grad_norm": 95937.9765625, | |
| "learning_rate": 7.98611111111111e-06, | |
| "loss": 0.0328, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 2.4193548387096775, | |
| "grad_norm": 105021.359375, | |
| "learning_rate": 7.63888888888889e-06, | |
| "loss": 0.0271, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.553763440860215, | |
| "grad_norm": 73798.6796875, | |
| "learning_rate": 7.2916666666666674e-06, | |
| "loss": 0.0317, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 2.688172043010753, | |
| "grad_norm": 111273.5546875, | |
| "learning_rate": 6.944444444444445e-06, | |
| "loss": 0.0286, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.688172043010753, | |
| "eval_loss": 0.032379038631916046, | |
| "eval_runtime": 903.1767, | |
| "eval_samples_per_second": 0.227, | |
| "eval_steps_per_second": 0.029, | |
| "eval_wer": 85.87243015287297, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.8225806451612905, | |
| "grad_norm": 115137.6015625, | |
| "learning_rate": 6.597222222222223e-06, | |
| "loss": 0.0293, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 2.956989247311828, | |
| "grad_norm": 148418.078125, | |
| "learning_rate": 6.25e-06, | |
| "loss": 0.0264, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 3.0913978494623655, | |
| "grad_norm": 44248.91796875, | |
| "learning_rate": 5.902777777777778e-06, | |
| "loss": 0.0157, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 3.225806451612903, | |
| "grad_norm": 86631.046875, | |
| "learning_rate": 5.555555555555556e-06, | |
| "loss": 0.0101, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 3.360215053763441, | |
| "grad_norm": 69759.2421875, | |
| "learning_rate": 5.208333333333334e-06, | |
| "loss": 0.0175, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 3.4946236559139785, | |
| "grad_norm": 79651.625, | |
| "learning_rate": 4.861111111111111e-06, | |
| "loss": 0.0121, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 3.629032258064516, | |
| "grad_norm": 78669.6171875, | |
| "learning_rate": 4.513888888888889e-06, | |
| "loss": 0.0137, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 3.763440860215054, | |
| "grad_norm": 104462.546875, | |
| "learning_rate": 4.166666666666667e-06, | |
| "loss": 0.0114, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 3.8978494623655915, | |
| "grad_norm": 79927.2109375, | |
| "learning_rate": 3.819444444444445e-06, | |
| "loss": 0.0105, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 4.032258064516129, | |
| "grad_norm": 69011.8984375, | |
| "learning_rate": 3.4722222222222224e-06, | |
| "loss": 0.0074, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 4.166666666666667, | |
| "grad_norm": 43131.57421875, | |
| "learning_rate": 3.125e-06, | |
| "loss": 0.0036, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 4.301075268817204, | |
| "grad_norm": 15227.64453125, | |
| "learning_rate": 2.777777777777778e-06, | |
| "loss": 0.0038, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 4.435483870967742, | |
| "grad_norm": 27650.37890625, | |
| "learning_rate": 2.4305555555555557e-06, | |
| "loss": 0.0082, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 4.56989247311828, | |
| "grad_norm": 146017.515625, | |
| "learning_rate": 2.0833333333333334e-06, | |
| "loss": 0.0066, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 4.704301075268817, | |
| "grad_norm": 41058.7421875, | |
| "learning_rate": 1.7361111111111112e-06, | |
| "loss": 0.0047, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 4.838709677419355, | |
| "grad_norm": 6614.59765625, | |
| "learning_rate": 1.388888888888889e-06, | |
| "loss": 0.0051, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 4.973118279569892, | |
| "grad_norm": 24049.263671875, | |
| "learning_rate": 1.0416666666666667e-06, | |
| "loss": 0.0026, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 5.10752688172043, | |
| "grad_norm": 42294.18359375, | |
| "learning_rate": 6.944444444444445e-07, | |
| "loss": 0.0047, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 5.241935483870968, | |
| "grad_norm": 9962.9453125, | |
| "learning_rate": 3.4722222222222224e-07, | |
| "loss": 0.0013, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 5.376344086021505, | |
| "grad_norm": 18759.73828125, | |
| "learning_rate": 0.0, | |
| "loss": 0.0047, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 5.376344086021505, | |
| "eval_loss": 0.0025792771484702826, | |
| "eval_runtime": 847.1283, | |
| "eval_samples_per_second": 0.242, | |
| "eval_steps_per_second": 0.031, | |
| "eval_wer": 96.36267791249341, | |
| "step": 1000 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 1000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 6, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.4543672418304e+18, | |
| "train_batch_size": 12, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |