diff --git a/checkpoints/whisper-base/bengali/checkpoint-19000/config.json b/checkpoints/whisper-base/bengali/checkpoint-19000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..664b4efc2b4332772fd201173788ef4f8e439281 --- /dev/null +++ b/checkpoints/whisper-base/bengali/checkpoint-19000/config.json @@ -0,0 +1,154 @@ +{ + "_name_or_path": "openai/whisper-base", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 512, + "decoder_attention_heads": 8, + "decoder_ffn_dim": 2048, + "decoder_layerdrop": 0.0, + "decoder_layers": 6, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 8, + "encoder_ffn_dim": 2048, + "encoder_layerdrop": 0.0, + "encoder_layers": 6, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50302 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 6, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "torch_dtype": "float32", + "transformers_version": "4.39.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoints/whisper-base/bengali/checkpoint-19000/generation_config.json b/checkpoints/whisper-base/bengali/checkpoint-19000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a12c5b82ac1e48f22fa79bdad1595064164bc2ab --- /dev/null +++ b/checkpoints/whisper-base/bengali/checkpoint-19000/generation_config.json @@ -0,0 +1,256 @@ +{ + "alignment_heads": [ + [ + 3, + 1 + ], + [ + 4, + 2 + ], + [ + 4, + 3 + ], + [ + 4, + 7 + ], + [ + 5, + 1 + ], + [ + 5, + 2 + ], + [ + 5, + 4 + ], + [ + 5, + 6 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.39.2" +} diff --git a/checkpoints/whisper-base/bengali/checkpoint-19000/model.safetensors b/checkpoints/whisper-base/bengali/checkpoint-19000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e20d9ec1e0a9d81baefb6314f243b9a43cdc0d2c --- /dev/null +++ b/checkpoints/whisper-base/bengali/checkpoint-19000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49c92828da77648870c0be2e95ee6c6cb57b8f52008e00499cc719e0e261304a +size 290403936 diff --git a/checkpoints/whisper-base/bengali/checkpoint-19000/optimizer.pt b/checkpoints/whisper-base/bengali/checkpoint-19000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..bee25e1355379354cc8d98cbc773d03698ae2008 --- /dev/null +++ b/checkpoints/whisper-base/bengali/checkpoint-19000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:318e450b793746972caf49df9deb225562065a8d93ed2a9bf1fca5c807f40392 +size 574811077 diff --git a/checkpoints/whisper-base/bengali/checkpoint-19000/preprocessor_config.json b/checkpoints/whisper-base/bengali/checkpoint-19000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoints/whisper-base/bengali/checkpoint-19000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoints/whisper-base/bengali/checkpoint-19000/rng_state.pth b/checkpoints/whisper-base/bengali/checkpoint-19000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..773d6bcf66644290f22e6a060d174555d3c0a8cf --- /dev/null +++ b/checkpoints/whisper-base/bengali/checkpoint-19000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4563375f0fddda67cea3325712e36f015490f0f9205343975c7de3049de36e2 +size 14575 diff --git a/checkpoints/whisper-base/bengali/checkpoint-19000/scheduler.pt b/checkpoints/whisper-base/bengali/checkpoint-19000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8887b5a33e929cc0dd1675d10db0a63fd333c3ed --- /dev/null +++ b/checkpoints/whisper-base/bengali/checkpoint-19000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72f95d154f980217769f628c073fdc2353f1b3d81f67ccce510e41575f8e4e1a +size 627 diff --git a/checkpoints/whisper-base/bengali/checkpoint-19000/trainer_state.json b/checkpoints/whisper-base/bengali/checkpoint-19000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2d4763ee819f14d37e01be3083c2dfc4fe935151 --- /dev/null +++ b/checkpoints/whisper-base/bengali/checkpoint-19000/trainer_state.json @@ -0,0 +1,5512 @@ +{ + "best_metric": 24.73569978295876, + "best_model_checkpoint": "results/whisper-base/bengali/checkpoint-9000", + "epoch": 10.626398210290828, + "eval_steps": 1000, + "global_step": 19000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "grad_norm": 20.479082107543945, + "learning_rate": 4.4e-07, + "loss": 2.3197, + "step": 25 + }, + { + "epoch": 0.03, + "grad_norm": 11.922679901123047, + "learning_rate": 9.400000000000001e-07, + "loss": 2.0798, + "step": 50 + }, + { + "epoch": 0.04, + "grad_norm": 7.62770938873291, + "learning_rate": 1.44e-06, + "loss": 1.7731, + "step": 75 + }, + { + "epoch": 0.06, + "grad_norm": 5.752090930938721, + "learning_rate": 1.94e-06, + "loss": 1.5748, + "step": 100 + }, + { + "epoch": 0.07, + "grad_norm": 4.62416934967041, + "learning_rate": 2.4400000000000004e-06, + "loss": 1.4206, + "step": 125 + }, + { + "epoch": 0.08, + "grad_norm": 5.872653484344482, + "learning_rate": 2.9400000000000002e-06, + "loss": 1.3248, + "step": 150 + }, + { + "epoch": 0.1, + "grad_norm": 6.6760993003845215, + "learning_rate": 3.44e-06, + "loss": 1.2546, + "step": 175 + }, + { + "epoch": 0.11, + "grad_norm": 6.7307329177856445, + "learning_rate": 3.94e-06, + "loss": 1.128, + "step": 200 + }, + { + "epoch": 0.13, + "grad_norm": 6.937326431274414, + "learning_rate": 4.440000000000001e-06, + "loss": 0.8661, + "step": 225 + }, + { + "epoch": 0.14, + "grad_norm": 4.662399768829346, + "learning_rate": 4.94e-06, + "loss": 0.646, + "step": 250 + }, + { + "epoch": 0.15, + "grad_norm": 4.14452600479126, + "learning_rate": 5.4400000000000004e-06, + "loss": 0.5209, + "step": 275 + }, + { + "epoch": 0.17, + "grad_norm": 4.323141098022461, + "learning_rate": 5.94e-06, + "loss": 0.4461, + "step": 300 + }, + { + "epoch": 0.18, + "grad_norm": 4.011408805847168, + "learning_rate": 6.440000000000001e-06, + "loss": 0.4018, + "step": 325 + }, + { + "epoch": 0.2, + "grad_norm": 5.2240705490112305, + "learning_rate": 6.9400000000000005e-06, + "loss": 0.3637, + "step": 350 + }, + { + "epoch": 0.21, + "grad_norm": 4.614215850830078, + "learning_rate": 7.440000000000001e-06, + "loss": 0.3387, + "step": 375 + }, + { + "epoch": 0.22, + "grad_norm": 4.833929538726807, + "learning_rate": 7.94e-06, + "loss": 0.3189, + "step": 400 + }, + { + "epoch": 0.24, + "grad_norm": 3.848154067993164, + "learning_rate": 8.44e-06, + "loss": 0.2969, + "step": 425 + }, + { + "epoch": 0.25, + "grad_norm": 5.367223262786865, + "learning_rate": 8.94e-06, + "loss": 0.2912, + "step": 450 + }, + { + "epoch": 0.27, + "grad_norm": 4.365331172943115, + "learning_rate": 9.440000000000001e-06, + "loss": 0.286, + "step": 475 + }, + { + "epoch": 0.28, + "grad_norm": 6.139851093292236, + "learning_rate": 9.940000000000001e-06, + "loss": 0.2743, + "step": 500 + }, + { + "epoch": 0.29, + "grad_norm": 5.86342191696167, + "learning_rate": 9.997788944723618e-06, + "loss": 0.2666, + "step": 525 + }, + { + "epoch": 0.31, + "grad_norm": 3.888645887374878, + "learning_rate": 9.99527638190955e-06, + "loss": 0.2557, + "step": 550 + }, + { + "epoch": 0.32, + "grad_norm": 4.206381797790527, + "learning_rate": 9.992763819095477e-06, + "loss": 0.2506, + "step": 575 + }, + { + "epoch": 0.34, + "grad_norm": 4.591144561767578, + "learning_rate": 9.990251256281408e-06, + "loss": 0.2393, + "step": 600 + }, + { + "epoch": 0.35, + "grad_norm": 3.7976574897766113, + "learning_rate": 9.987738693467337e-06, + "loss": 0.238, + "step": 625 + }, + { + "epoch": 0.36, + "grad_norm": 4.7065911293029785, + "learning_rate": 9.985226130653267e-06, + "loss": 0.2286, + "step": 650 + }, + { + "epoch": 0.38, + "grad_norm": 4.082373142242432, + "learning_rate": 9.982713567839198e-06, + "loss": 0.2256, + "step": 675 + }, + { + "epoch": 0.39, + "grad_norm": 3.7245709896087646, + "learning_rate": 9.980201005025127e-06, + "loss": 0.2217, + "step": 700 + }, + { + "epoch": 0.41, + "grad_norm": 3.2761738300323486, + "learning_rate": 9.977688442211056e-06, + "loss": 0.2141, + "step": 725 + }, + { + "epoch": 0.42, + "grad_norm": 3.9429969787597656, + "learning_rate": 9.975175879396986e-06, + "loss": 0.2148, + "step": 750 + }, + { + "epoch": 0.43, + "grad_norm": 2.745335340499878, + "learning_rate": 9.972663316582915e-06, + "loss": 0.2087, + "step": 775 + }, + { + "epoch": 0.45, + "grad_norm": 3.284982442855835, + "learning_rate": 9.970150753768844e-06, + "loss": 0.2071, + "step": 800 + }, + { + "epoch": 0.46, + "grad_norm": 3.2479090690612793, + "learning_rate": 9.967638190954775e-06, + "loss": 0.2009, + "step": 825 + }, + { + "epoch": 0.48, + "grad_norm": 3.2984981536865234, + "learning_rate": 9.965125628140703e-06, + "loss": 0.2012, + "step": 850 + }, + { + "epoch": 0.49, + "grad_norm": 3.311579704284668, + "learning_rate": 9.962613065326634e-06, + "loss": 0.1941, + "step": 875 + }, + { + "epoch": 0.5, + "grad_norm": 3.8732566833496094, + "learning_rate": 9.960100502512563e-06, + "loss": 0.1977, + "step": 900 + }, + { + "epoch": 0.52, + "grad_norm": 3.0260491371154785, + "learning_rate": 9.957587939698493e-06, + "loss": 0.1911, + "step": 925 + }, + { + "epoch": 0.53, + "grad_norm": 2.8873238563537598, + "learning_rate": 9.955075376884424e-06, + "loss": 0.1885, + "step": 950 + }, + { + "epoch": 0.55, + "grad_norm": 3.0286946296691895, + "learning_rate": 9.952562814070353e-06, + "loss": 0.186, + "step": 975 + }, + { + "epoch": 0.56, + "grad_norm": 2.839372158050537, + "learning_rate": 9.950050251256282e-06, + "loss": 0.1856, + "step": 1000 + }, + { + "epoch": 0.56, + "eval_loss": 0.14572674036026, + "eval_runtime": 1294.9998, + "eval_samples_per_second": 1.158, + "eval_steps_per_second": 1.158, + "eval_wer": 40.77574739200448, + "step": 1000 + }, + { + "epoch": 0.57, + "grad_norm": 5.458705425262451, + "learning_rate": 9.947537688442212e-06, + "loss": 0.1829, + "step": 1025 + }, + { + "epoch": 0.59, + "grad_norm": 2.867703676223755, + "learning_rate": 9.945025125628141e-06, + "loss": 0.1831, + "step": 1050 + }, + { + "epoch": 0.6, + "grad_norm": 2.979769229888916, + "learning_rate": 9.94251256281407e-06, + "loss": 0.1829, + "step": 1075 + }, + { + "epoch": 0.62, + "grad_norm": 3.345287561416626, + "learning_rate": 9.940000000000001e-06, + "loss": 0.1753, + "step": 1100 + }, + { + "epoch": 0.63, + "grad_norm": 3.8183023929595947, + "learning_rate": 9.93748743718593e-06, + "loss": 0.1785, + "step": 1125 + }, + { + "epoch": 0.64, + "grad_norm": 3.1384637355804443, + "learning_rate": 9.93497487437186e-06, + "loss": 0.1727, + "step": 1150 + }, + { + "epoch": 0.66, + "grad_norm": 2.651932716369629, + "learning_rate": 9.93246231155779e-06, + "loss": 0.1685, + "step": 1175 + }, + { + "epoch": 0.67, + "grad_norm": 3.3064398765563965, + "learning_rate": 9.929949748743719e-06, + "loss": 0.1713, + "step": 1200 + }, + { + "epoch": 0.69, + "grad_norm": 3.0926802158355713, + "learning_rate": 9.92743718592965e-06, + "loss": 0.167, + "step": 1225 + }, + { + "epoch": 0.7, + "grad_norm": 2.8239219188690186, + "learning_rate": 9.924924623115579e-06, + "loss": 0.166, + "step": 1250 + }, + { + "epoch": 0.71, + "grad_norm": 2.59196400642395, + "learning_rate": 9.922412060301508e-06, + "loss": 0.1652, + "step": 1275 + }, + { + "epoch": 0.73, + "grad_norm": 2.587282419204712, + "learning_rate": 9.91989949748744e-06, + "loss": 0.162, + "step": 1300 + }, + { + "epoch": 0.74, + "grad_norm": 3.395512104034424, + "learning_rate": 9.917386934673367e-06, + "loss": 0.1643, + "step": 1325 + }, + { + "epoch": 0.76, + "grad_norm": 3.0003013610839844, + "learning_rate": 9.914874371859298e-06, + "loss": 0.1616, + "step": 1350 + }, + { + "epoch": 0.77, + "grad_norm": 2.4067747592926025, + "learning_rate": 9.912361809045227e-06, + "loss": 0.1613, + "step": 1375 + }, + { + "epoch": 0.78, + "grad_norm": 3.117004632949829, + "learning_rate": 9.909849246231157e-06, + "loss": 0.1551, + "step": 1400 + }, + { + "epoch": 0.8, + "grad_norm": 2.4046616554260254, + "learning_rate": 9.907336683417086e-06, + "loss": 0.1563, + "step": 1425 + }, + { + "epoch": 0.81, + "grad_norm": 4.316405773162842, + "learning_rate": 9.904824120603015e-06, + "loss": 0.1611, + "step": 1450 + }, + { + "epoch": 0.82, + "grad_norm": 3.059438943862915, + "learning_rate": 9.902311557788945e-06, + "loss": 0.152, + "step": 1475 + }, + { + "epoch": 0.84, + "grad_norm": 2.760357141494751, + "learning_rate": 9.899798994974876e-06, + "loss": 0.1466, + "step": 1500 + }, + { + "epoch": 0.85, + "grad_norm": 3.3387234210968018, + "learning_rate": 9.897286432160805e-06, + "loss": 0.1585, + "step": 1525 + }, + { + "epoch": 0.87, + "grad_norm": 2.262953281402588, + "learning_rate": 9.894773869346734e-06, + "loss": 0.1531, + "step": 1550 + }, + { + "epoch": 0.88, + "grad_norm": 2.503844976425171, + "learning_rate": 9.892261306532665e-06, + "loss": 0.1472, + "step": 1575 + }, + { + "epoch": 0.89, + "grad_norm": 2.459182024002075, + "learning_rate": 9.889748743718593e-06, + "loss": 0.1495, + "step": 1600 + }, + { + "epoch": 0.91, + "grad_norm": 2.699077844619751, + "learning_rate": 9.887236180904524e-06, + "loss": 0.1536, + "step": 1625 + }, + { + "epoch": 0.92, + "grad_norm": 3.1890530586242676, + "learning_rate": 9.884723618090453e-06, + "loss": 0.1483, + "step": 1650 + }, + { + "epoch": 0.94, + "grad_norm": 3.2312211990356445, + "learning_rate": 9.882211055276383e-06, + "loss": 0.1435, + "step": 1675 + }, + { + "epoch": 0.95, + "grad_norm": 2.5697576999664307, + "learning_rate": 9.879698492462312e-06, + "loss": 0.1465, + "step": 1700 + }, + { + "epoch": 0.96, + "grad_norm": 3.028343439102173, + "learning_rate": 9.877185929648241e-06, + "loss": 0.1445, + "step": 1725 + }, + { + "epoch": 0.98, + "grad_norm": 2.123739004135132, + "learning_rate": 9.874673366834172e-06, + "loss": 0.1412, + "step": 1750 + }, + { + "epoch": 0.99, + "grad_norm": 2.920504331588745, + "learning_rate": 9.872160804020102e-06, + "loss": 0.1408, + "step": 1775 + }, + { + "epoch": 1.01, + "grad_norm": 2.644196033477783, + "learning_rate": 9.869648241206031e-06, + "loss": 0.1401, + "step": 1800 + }, + { + "epoch": 1.02, + "grad_norm": 2.547008991241455, + "learning_rate": 9.86713567839196e-06, + "loss": 0.1327, + "step": 1825 + }, + { + "epoch": 1.03, + "grad_norm": 2.1837239265441895, + "learning_rate": 9.864623115577891e-06, + "loss": 0.1301, + "step": 1850 + }, + { + "epoch": 1.05, + "grad_norm": 2.398655414581299, + "learning_rate": 9.862110552763819e-06, + "loss": 0.1323, + "step": 1875 + }, + { + "epoch": 1.06, + "grad_norm": 2.1983821392059326, + "learning_rate": 9.85959798994975e-06, + "loss": 0.1291, + "step": 1900 + }, + { + "epoch": 1.08, + "grad_norm": 2.395869731903076, + "learning_rate": 9.85708542713568e-06, + "loss": 0.1247, + "step": 1925 + }, + { + "epoch": 1.09, + "grad_norm": 2.402700424194336, + "learning_rate": 9.854572864321609e-06, + "loss": 0.1297, + "step": 1950 + }, + { + "epoch": 1.1, + "grad_norm": 2.341567277908325, + "learning_rate": 9.85206030150754e-06, + "loss": 0.1275, + "step": 1975 + }, + { + "epoch": 1.12, + "grad_norm": 3.422062873840332, + "learning_rate": 9.849547738693467e-06, + "loss": 0.125, + "step": 2000 + }, + { + "epoch": 1.12, + "eval_loss": 0.10941009223461151, + "eval_runtime": 1282.51, + "eval_samples_per_second": 1.17, + "eval_steps_per_second": 1.17, + "eval_wer": 32.99026815094868, + "step": 2000 + }, + { + "epoch": 1.13, + "grad_norm": 3.0541255474090576, + "learning_rate": 9.847035175879398e-06, + "loss": 0.1222, + "step": 2025 + }, + { + "epoch": 1.15, + "grad_norm": 2.4372646808624268, + "learning_rate": 9.844522613065328e-06, + "loss": 0.1228, + "step": 2050 + }, + { + "epoch": 1.16, + "grad_norm": 2.5140273571014404, + "learning_rate": 9.842010050251257e-06, + "loss": 0.1277, + "step": 2075 + }, + { + "epoch": 1.17, + "grad_norm": 2.391875982284546, + "learning_rate": 9.839497487437186e-06, + "loss": 0.1223, + "step": 2100 + }, + { + "epoch": 1.19, + "grad_norm": 2.4066147804260254, + "learning_rate": 9.836984924623117e-06, + "loss": 0.1211, + "step": 2125 + }, + { + "epoch": 1.2, + "grad_norm": 2.2214889526367188, + "learning_rate": 9.834472361809047e-06, + "loss": 0.1219, + "step": 2150 + }, + { + "epoch": 1.22, + "grad_norm": 2.5247437953948975, + "learning_rate": 9.831959798994976e-06, + "loss": 0.1188, + "step": 2175 + }, + { + "epoch": 1.23, + "grad_norm": 3.2937655448913574, + "learning_rate": 9.829447236180905e-06, + "loss": 0.1195, + "step": 2200 + }, + { + "epoch": 1.24, + "grad_norm": 2.3191704750061035, + "learning_rate": 9.826934673366834e-06, + "loss": 0.1226, + "step": 2225 + }, + { + "epoch": 1.26, + "grad_norm": 2.650092124938965, + "learning_rate": 9.824422110552766e-06, + "loss": 0.1218, + "step": 2250 + }, + { + "epoch": 1.27, + "grad_norm": 3.117119550704956, + "learning_rate": 9.821909547738693e-06, + "loss": 0.1202, + "step": 2275 + }, + { + "epoch": 1.29, + "grad_norm": 2.1879711151123047, + "learning_rate": 9.819396984924624e-06, + "loss": 0.119, + "step": 2300 + }, + { + "epoch": 1.3, + "grad_norm": 3.037064552307129, + "learning_rate": 9.816884422110553e-06, + "loss": 0.1209, + "step": 2325 + }, + { + "epoch": 1.31, + "grad_norm": 2.5333287715911865, + "learning_rate": 9.814371859296483e-06, + "loss": 0.1157, + "step": 2350 + }, + { + "epoch": 1.33, + "grad_norm": 2.4529097080230713, + "learning_rate": 9.811859296482414e-06, + "loss": 0.1169, + "step": 2375 + }, + { + "epoch": 1.34, + "grad_norm": 2.246457815170288, + "learning_rate": 9.809346733668343e-06, + "loss": 0.1172, + "step": 2400 + }, + { + "epoch": 1.36, + "grad_norm": 1.8162719011306763, + "learning_rate": 9.806834170854272e-06, + "loss": 0.1155, + "step": 2425 + }, + { + "epoch": 1.37, + "grad_norm": 2.8499510288238525, + "learning_rate": 9.804321608040202e-06, + "loss": 0.1166, + "step": 2450 + }, + { + "epoch": 1.38, + "grad_norm": 1.823399543762207, + "learning_rate": 9.801809045226131e-06, + "loss": 0.115, + "step": 2475 + }, + { + "epoch": 1.4, + "grad_norm": 2.7357358932495117, + "learning_rate": 9.79929648241206e-06, + "loss": 0.1108, + "step": 2500 + }, + { + "epoch": 1.41, + "grad_norm": 3.5756173133850098, + "learning_rate": 9.796783919597991e-06, + "loss": 0.1183, + "step": 2525 + }, + { + "epoch": 1.43, + "grad_norm": 2.150749683380127, + "learning_rate": 9.79427135678392e-06, + "loss": 0.1157, + "step": 2550 + }, + { + "epoch": 1.44, + "grad_norm": 2.2136292457580566, + "learning_rate": 9.79175879396985e-06, + "loss": 0.118, + "step": 2575 + }, + { + "epoch": 1.45, + "grad_norm": 2.220170259475708, + "learning_rate": 9.78924623115578e-06, + "loss": 0.1165, + "step": 2600 + }, + { + "epoch": 1.47, + "grad_norm": 2.3177106380462646, + "learning_rate": 9.786733668341709e-06, + "loss": 0.1123, + "step": 2625 + }, + { + "epoch": 1.48, + "grad_norm": 2.216575860977173, + "learning_rate": 9.78422110552764e-06, + "loss": 0.1128, + "step": 2650 + }, + { + "epoch": 1.5, + "grad_norm": 2.057546615600586, + "learning_rate": 9.781708542713569e-06, + "loss": 0.1097, + "step": 2675 + }, + { + "epoch": 1.51, + "grad_norm": 2.5831878185272217, + "learning_rate": 9.779195979899498e-06, + "loss": 0.1093, + "step": 2700 + }, + { + "epoch": 1.52, + "grad_norm": 2.306478261947632, + "learning_rate": 9.776683417085428e-06, + "loss": 0.114, + "step": 2725 + }, + { + "epoch": 1.54, + "grad_norm": 2.323587417602539, + "learning_rate": 9.774170854271357e-06, + "loss": 0.1117, + "step": 2750 + }, + { + "epoch": 1.55, + "grad_norm": 3.0451858043670654, + "learning_rate": 9.771658291457288e-06, + "loss": 0.1142, + "step": 2775 + }, + { + "epoch": 1.57, + "grad_norm": 3.178542375564575, + "learning_rate": 9.769145728643217e-06, + "loss": 0.1159, + "step": 2800 + }, + { + "epoch": 1.58, + "grad_norm": 2.5522382259368896, + "learning_rate": 9.766633165829147e-06, + "loss": 0.1102, + "step": 2825 + }, + { + "epoch": 1.59, + "grad_norm": 2.8957133293151855, + "learning_rate": 9.764120603015076e-06, + "loss": 0.112, + "step": 2850 + }, + { + "epoch": 1.61, + "grad_norm": 2.62931752204895, + "learning_rate": 9.761608040201005e-06, + "loss": 0.1132, + "step": 2875 + }, + { + "epoch": 1.62, + "grad_norm": 2.26515793800354, + "learning_rate": 9.759095477386935e-06, + "loss": 0.1091, + "step": 2900 + }, + { + "epoch": 1.64, + "grad_norm": 2.3211770057678223, + "learning_rate": 9.756582914572866e-06, + "loss": 0.1071, + "step": 2925 + }, + { + "epoch": 1.65, + "grad_norm": 2.146005392074585, + "learning_rate": 9.754070351758795e-06, + "loss": 0.1067, + "step": 2950 + }, + { + "epoch": 1.66, + "grad_norm": 2.2931926250457764, + "learning_rate": 9.751557788944724e-06, + "loss": 0.1101, + "step": 2975 + }, + { + "epoch": 1.68, + "grad_norm": 2.441265821456909, + "learning_rate": 9.749045226130654e-06, + "loss": 0.1093, + "step": 3000 + }, + { + "epoch": 1.68, + "eval_loss": 0.09424228966236115, + "eval_runtime": 1257.7532, + "eval_samples_per_second": 1.193, + "eval_steps_per_second": 1.193, + "eval_wer": 28.91549394384933, + "step": 3000 + }, + { + "epoch": 1.69, + "grad_norm": 2.352782726287842, + "learning_rate": 9.746532663316583e-06, + "loss": 0.1069, + "step": 3025 + }, + { + "epoch": 1.71, + "grad_norm": 2.396038055419922, + "learning_rate": 9.744020100502514e-06, + "loss": 0.1071, + "step": 3050 + }, + { + "epoch": 1.72, + "grad_norm": 2.1817808151245117, + "learning_rate": 9.741507537688443e-06, + "loss": 0.1085, + "step": 3075 + }, + { + "epoch": 1.73, + "grad_norm": 2.5631215572357178, + "learning_rate": 9.738994974874373e-06, + "loss": 0.1068, + "step": 3100 + }, + { + "epoch": 1.75, + "grad_norm": 2.2351434230804443, + "learning_rate": 9.736482412060302e-06, + "loss": 0.105, + "step": 3125 + }, + { + "epoch": 1.76, + "grad_norm": 2.0444986820220947, + "learning_rate": 9.733969849246231e-06, + "loss": 0.107, + "step": 3150 + }, + { + "epoch": 1.78, + "grad_norm": 2.6535725593566895, + "learning_rate": 9.731457286432162e-06, + "loss": 0.1069, + "step": 3175 + }, + { + "epoch": 1.79, + "grad_norm": 2.8334600925445557, + "learning_rate": 9.728944723618092e-06, + "loss": 0.1051, + "step": 3200 + }, + { + "epoch": 1.8, + "grad_norm": 2.6148955821990967, + "learning_rate": 9.726432160804021e-06, + "loss": 0.103, + "step": 3225 + }, + { + "epoch": 1.82, + "grad_norm": 2.369356393814087, + "learning_rate": 9.72391959798995e-06, + "loss": 0.1116, + "step": 3250 + }, + { + "epoch": 1.83, + "grad_norm": 2.479933261871338, + "learning_rate": 9.721407035175881e-06, + "loss": 0.1049, + "step": 3275 + }, + { + "epoch": 1.85, + "grad_norm": 2.4552206993103027, + "learning_rate": 9.718894472361809e-06, + "loss": 0.1046, + "step": 3300 + }, + { + "epoch": 1.86, + "grad_norm": 1.9066778421401978, + "learning_rate": 9.71638190954774e-06, + "loss": 0.1013, + "step": 3325 + }, + { + "epoch": 1.87, + "grad_norm": 2.111132860183716, + "learning_rate": 9.71386934673367e-06, + "loss": 0.1003, + "step": 3350 + }, + { + "epoch": 1.89, + "grad_norm": 3.1862635612487793, + "learning_rate": 9.711356783919599e-06, + "loss": 0.1049, + "step": 3375 + }, + { + "epoch": 1.9, + "grad_norm": 1.8302373886108398, + "learning_rate": 9.70884422110553e-06, + "loss": 0.1072, + "step": 3400 + }, + { + "epoch": 1.92, + "grad_norm": 2.316040277481079, + "learning_rate": 9.706331658291457e-06, + "loss": 0.1019, + "step": 3425 + }, + { + "epoch": 1.93, + "grad_norm": 2.2230210304260254, + "learning_rate": 9.703819095477388e-06, + "loss": 0.1054, + "step": 3450 + }, + { + "epoch": 1.94, + "grad_norm": 2.367607355117798, + "learning_rate": 9.701306532663318e-06, + "loss": 0.1029, + "step": 3475 + }, + { + "epoch": 1.96, + "grad_norm": 2.147073984146118, + "learning_rate": 9.698793969849247e-06, + "loss": 0.1009, + "step": 3500 + }, + { + "epoch": 1.97, + "grad_norm": 2.4300241470336914, + "learning_rate": 9.696281407035176e-06, + "loss": 0.101, + "step": 3525 + }, + { + "epoch": 1.99, + "grad_norm": 2.174846649169922, + "learning_rate": 9.693768844221107e-06, + "loss": 0.0978, + "step": 3550 + }, + { + "epoch": 2.0, + "grad_norm": 2.3007450103759766, + "learning_rate": 9.691256281407035e-06, + "loss": 0.0995, + "step": 3575 + }, + { + "epoch": 2.01, + "grad_norm": 2.4721336364746094, + "learning_rate": 9.688743718592966e-06, + "loss": 0.0887, + "step": 3600 + }, + { + "epoch": 2.03, + "grad_norm": 2.2181708812713623, + "learning_rate": 9.686231155778895e-06, + "loss": 0.0866, + "step": 3625 + }, + { + "epoch": 2.04, + "grad_norm": 1.8976894617080688, + "learning_rate": 9.683718592964825e-06, + "loss": 0.0862, + "step": 3650 + }, + { + "epoch": 2.06, + "grad_norm": 2.3264853954315186, + "learning_rate": 9.681206030150756e-06, + "loss": 0.088, + "step": 3675 + }, + { + "epoch": 2.07, + "grad_norm": 1.9397796392440796, + "learning_rate": 9.678693467336683e-06, + "loss": 0.0896, + "step": 3700 + }, + { + "epoch": 2.08, + "grad_norm": 2.0393741130828857, + "learning_rate": 9.676180904522614e-06, + "loss": 0.0854, + "step": 3725 + }, + { + "epoch": 2.1, + "grad_norm": 2.169718027114868, + "learning_rate": 9.673668341708544e-06, + "loss": 0.0883, + "step": 3750 + }, + { + "epoch": 2.11, + "grad_norm": 2.1115963459014893, + "learning_rate": 9.671155778894473e-06, + "loss": 0.0923, + "step": 3775 + }, + { + "epoch": 2.13, + "grad_norm": 2.055377721786499, + "learning_rate": 9.668643216080404e-06, + "loss": 0.0859, + "step": 3800 + }, + { + "epoch": 2.14, + "grad_norm": 2.477351665496826, + "learning_rate": 9.666130653266333e-06, + "loss": 0.0849, + "step": 3825 + }, + { + "epoch": 2.15, + "grad_norm": 2.2051820755004883, + "learning_rate": 9.663618090452263e-06, + "loss": 0.0839, + "step": 3850 + }, + { + "epoch": 2.17, + "grad_norm": 2.350365400314331, + "learning_rate": 9.661105527638192e-06, + "loss": 0.0871, + "step": 3875 + }, + { + "epoch": 2.18, + "grad_norm": 2.170224905014038, + "learning_rate": 9.658592964824121e-06, + "loss": 0.0855, + "step": 3900 + }, + { + "epoch": 2.2, + "grad_norm": 1.870482325553894, + "learning_rate": 9.65608040201005e-06, + "loss": 0.0867, + "step": 3925 + }, + { + "epoch": 2.21, + "grad_norm": 1.9789308309555054, + "learning_rate": 9.653567839195982e-06, + "loss": 0.0878, + "step": 3950 + }, + { + "epoch": 2.22, + "grad_norm": 1.9876500368118286, + "learning_rate": 9.651055276381909e-06, + "loss": 0.0846, + "step": 3975 + }, + { + "epoch": 2.24, + "grad_norm": 2.23836088180542, + "learning_rate": 9.64854271356784e-06, + "loss": 0.0841, + "step": 4000 + }, + { + "epoch": 2.24, + "eval_loss": 0.08813700079917908, + "eval_runtime": 1548.8639, + "eval_samples_per_second": 0.968, + "eval_steps_per_second": 0.968, + "eval_wer": 27.97031435972835, + "step": 4000 + }, + { + "epoch": 2.25, + "grad_norm": 2.6324143409729004, + "learning_rate": 9.64603015075377e-06, + "loss": 0.0846, + "step": 4025 + }, + { + "epoch": 2.27, + "grad_norm": 1.8375723361968994, + "learning_rate": 9.643517587939699e-06, + "loss": 0.0862, + "step": 4050 + }, + { + "epoch": 2.28, + "grad_norm": 2.220909595489502, + "learning_rate": 9.64100502512563e-06, + "loss": 0.084, + "step": 4075 + }, + { + "epoch": 2.29, + "grad_norm": 2.610175371170044, + "learning_rate": 9.638492462311559e-06, + "loss": 0.0842, + "step": 4100 + }, + { + "epoch": 2.31, + "grad_norm": 1.811500072479248, + "learning_rate": 9.635979899497488e-06, + "loss": 0.0823, + "step": 4125 + }, + { + "epoch": 2.32, + "grad_norm": 2.0553157329559326, + "learning_rate": 9.633467336683418e-06, + "loss": 0.0825, + "step": 4150 + }, + { + "epoch": 2.34, + "grad_norm": 1.9910943508148193, + "learning_rate": 9.630954773869347e-06, + "loss": 0.0854, + "step": 4175 + }, + { + "epoch": 2.35, + "grad_norm": 1.9267401695251465, + "learning_rate": 9.628442211055276e-06, + "loss": 0.0812, + "step": 4200 + }, + { + "epoch": 2.36, + "grad_norm": 1.8411636352539062, + "learning_rate": 9.625929648241207e-06, + "loss": 0.0848, + "step": 4225 + }, + { + "epoch": 2.38, + "grad_norm": 2.24184513092041, + "learning_rate": 9.623417085427137e-06, + "loss": 0.0811, + "step": 4250 + }, + { + "epoch": 2.39, + "grad_norm": 2.0340802669525146, + "learning_rate": 9.620904522613066e-06, + "loss": 0.0829, + "step": 4275 + }, + { + "epoch": 2.4, + "grad_norm": 1.898876667022705, + "learning_rate": 9.618391959798995e-06, + "loss": 0.0846, + "step": 4300 + }, + { + "epoch": 2.42, + "grad_norm": 2.3137125968933105, + "learning_rate": 9.615879396984925e-06, + "loss": 0.0823, + "step": 4325 + }, + { + "epoch": 2.43, + "grad_norm": 2.510221481323242, + "learning_rate": 9.613366834170856e-06, + "loss": 0.0862, + "step": 4350 + }, + { + "epoch": 2.45, + "grad_norm": 2.1451172828674316, + "learning_rate": 9.610854271356785e-06, + "loss": 0.0832, + "step": 4375 + }, + { + "epoch": 2.46, + "grad_norm": 1.8479361534118652, + "learning_rate": 9.608341708542714e-06, + "loss": 0.0842, + "step": 4400 + }, + { + "epoch": 2.47, + "grad_norm": 2.3328495025634766, + "learning_rate": 9.605829145728644e-06, + "loss": 0.0857, + "step": 4425 + }, + { + "epoch": 2.49, + "grad_norm": 1.9808458089828491, + "learning_rate": 9.603316582914573e-06, + "loss": 0.0791, + "step": 4450 + }, + { + "epoch": 2.5, + "grad_norm": 1.7810078859329224, + "learning_rate": 9.600804020100504e-06, + "loss": 0.0812, + "step": 4475 + }, + { + "epoch": 2.52, + "grad_norm": 2.0694406032562256, + "learning_rate": 9.598291457286433e-06, + "loss": 0.0816, + "step": 4500 + }, + { + "epoch": 2.53, + "grad_norm": 2.125455141067505, + "learning_rate": 9.595778894472363e-06, + "loss": 0.084, + "step": 4525 + }, + { + "epoch": 2.54, + "grad_norm": 2.307854175567627, + "learning_rate": 9.593266331658292e-06, + "loss": 0.0796, + "step": 4550 + }, + { + "epoch": 2.56, + "grad_norm": 2.1585569381713867, + "learning_rate": 9.590753768844221e-06, + "loss": 0.0796, + "step": 4575 + }, + { + "epoch": 2.57, + "grad_norm": 2.4452669620513916, + "learning_rate": 9.58824120603015e-06, + "loss": 0.0854, + "step": 4600 + }, + { + "epoch": 2.59, + "grad_norm": 2.0743563175201416, + "learning_rate": 9.585728643216082e-06, + "loss": 0.0849, + "step": 4625 + }, + { + "epoch": 2.6, + "grad_norm": 1.8115513324737549, + "learning_rate": 9.583216080402011e-06, + "loss": 0.0807, + "step": 4650 + }, + { + "epoch": 2.61, + "grad_norm": 2.037189483642578, + "learning_rate": 9.58070351758794e-06, + "loss": 0.0846, + "step": 4675 + }, + { + "epoch": 2.63, + "grad_norm": 2.2096242904663086, + "learning_rate": 9.57819095477387e-06, + "loss": 0.0791, + "step": 4700 + }, + { + "epoch": 2.64, + "grad_norm": 1.8818609714508057, + "learning_rate": 9.575678391959799e-06, + "loss": 0.0846, + "step": 4725 + }, + { + "epoch": 2.66, + "grad_norm": 2.0996897220611572, + "learning_rate": 9.57316582914573e-06, + "loss": 0.0772, + "step": 4750 + }, + { + "epoch": 2.67, + "grad_norm": 2.697157144546509, + "learning_rate": 9.57065326633166e-06, + "loss": 0.0779, + "step": 4775 + }, + { + "epoch": 2.68, + "grad_norm": 1.9727689027786255, + "learning_rate": 9.568140703517589e-06, + "loss": 0.0825, + "step": 4800 + }, + { + "epoch": 2.7, + "grad_norm": 2.3720059394836426, + "learning_rate": 9.565628140703518e-06, + "loss": 0.0796, + "step": 4825 + }, + { + "epoch": 2.71, + "grad_norm": 2.233374834060669, + "learning_rate": 9.563115577889447e-06, + "loss": 0.0811, + "step": 4850 + }, + { + "epoch": 2.73, + "grad_norm": 1.9238030910491943, + "learning_rate": 9.560603015075378e-06, + "loss": 0.0798, + "step": 4875 + }, + { + "epoch": 2.74, + "grad_norm": 2.8772740364074707, + "learning_rate": 9.558090452261308e-06, + "loss": 0.0772, + "step": 4900 + }, + { + "epoch": 2.75, + "grad_norm": 2.729653835296631, + "learning_rate": 9.555577889447237e-06, + "loss": 0.0771, + "step": 4925 + }, + { + "epoch": 2.77, + "grad_norm": 2.06793212890625, + "learning_rate": 9.553065326633166e-06, + "loss": 0.0774, + "step": 4950 + }, + { + "epoch": 2.78, + "grad_norm": 1.8402355909347534, + "learning_rate": 9.550552763819096e-06, + "loss": 0.0804, + "step": 4975 + }, + { + "epoch": 2.8, + "grad_norm": 2.32189679145813, + "learning_rate": 9.548040201005025e-06, + "loss": 0.0809, + "step": 5000 + }, + { + "epoch": 2.8, + "eval_loss": 0.08343477547168732, + "eval_runtime": 1539.3544, + "eval_samples_per_second": 0.974, + "eval_steps_per_second": 0.974, + "eval_wer": 25.953931246936918, + "step": 5000 + }, + { + "epoch": 2.81, + "grad_norm": 2.597062587738037, + "learning_rate": 9.545527638190956e-06, + "loss": 0.0789, + "step": 5025 + }, + { + "epoch": 2.82, + "grad_norm": 2.2318174839019775, + "learning_rate": 9.543015075376885e-06, + "loss": 0.0804, + "step": 5050 + }, + { + "epoch": 2.84, + "grad_norm": 1.986308217048645, + "learning_rate": 9.540502512562815e-06, + "loss": 0.0795, + "step": 5075 + }, + { + "epoch": 2.85, + "grad_norm": 1.7781836986541748, + "learning_rate": 9.537989949748746e-06, + "loss": 0.0766, + "step": 5100 + }, + { + "epoch": 2.87, + "grad_norm": 2.045193672180176, + "learning_rate": 9.535477386934673e-06, + "loss": 0.0779, + "step": 5125 + }, + { + "epoch": 2.88, + "grad_norm": 1.809615135192871, + "learning_rate": 9.532964824120604e-06, + "loss": 0.075, + "step": 5150 + }, + { + "epoch": 2.89, + "grad_norm": 2.1733291149139404, + "learning_rate": 9.530452261306534e-06, + "loss": 0.078, + "step": 5175 + }, + { + "epoch": 2.91, + "grad_norm": 1.6681715250015259, + "learning_rate": 9.527939698492463e-06, + "loss": 0.0779, + "step": 5200 + }, + { + "epoch": 2.92, + "grad_norm": 2.364774703979492, + "learning_rate": 9.525427135678392e-06, + "loss": 0.0793, + "step": 5225 + }, + { + "epoch": 2.94, + "grad_norm": 1.6892741918563843, + "learning_rate": 9.522914572864322e-06, + "loss": 0.0757, + "step": 5250 + }, + { + "epoch": 2.95, + "grad_norm": 2.010044574737549, + "learning_rate": 9.520402010050253e-06, + "loss": 0.0771, + "step": 5275 + }, + { + "epoch": 2.96, + "grad_norm": 1.8010215759277344, + "learning_rate": 9.517889447236182e-06, + "loss": 0.0763, + "step": 5300 + }, + { + "epoch": 2.98, + "grad_norm": 1.8384108543395996, + "learning_rate": 9.515376884422111e-06, + "loss": 0.0766, + "step": 5325 + }, + { + "epoch": 2.99, + "grad_norm": 1.962332010269165, + "learning_rate": 9.51286432160804e-06, + "loss": 0.0761, + "step": 5350 + }, + { + "epoch": 3.01, + "grad_norm": 1.5537636280059814, + "learning_rate": 9.510351758793972e-06, + "loss": 0.0743, + "step": 5375 + }, + { + "epoch": 3.02, + "grad_norm": 1.6470623016357422, + "learning_rate": 9.5078391959799e-06, + "loss": 0.0608, + "step": 5400 + }, + { + "epoch": 3.03, + "grad_norm": 1.8470031023025513, + "learning_rate": 9.50532663316583e-06, + "loss": 0.064, + "step": 5425 + }, + { + "epoch": 3.05, + "grad_norm": 1.7956515550613403, + "learning_rate": 9.50281407035176e-06, + "loss": 0.0665, + "step": 5450 + }, + { + "epoch": 3.06, + "grad_norm": 2.0213046073913574, + "learning_rate": 9.500301507537689e-06, + "loss": 0.0633, + "step": 5475 + }, + { + "epoch": 3.08, + "grad_norm": 1.9063010215759277, + "learning_rate": 9.49778894472362e-06, + "loss": 0.0634, + "step": 5500 + }, + { + "epoch": 3.09, + "grad_norm": 1.8944132328033447, + "learning_rate": 9.49527638190955e-06, + "loss": 0.0617, + "step": 5525 + }, + { + "epoch": 3.1, + "grad_norm": 2.2454872131347656, + "learning_rate": 9.492763819095479e-06, + "loss": 0.068, + "step": 5550 + }, + { + "epoch": 3.12, + "grad_norm": 2.492413282394409, + "learning_rate": 9.490251256281408e-06, + "loss": 0.0614, + "step": 5575 + }, + { + "epoch": 3.13, + "grad_norm": 2.1105923652648926, + "learning_rate": 9.487738693467337e-06, + "loss": 0.0625, + "step": 5600 + }, + { + "epoch": 3.15, + "grad_norm": 2.487652540206909, + "learning_rate": 9.485226130653267e-06, + "loss": 0.0639, + "step": 5625 + }, + { + "epoch": 3.16, + "grad_norm": 1.711843729019165, + "learning_rate": 9.482713567839198e-06, + "loss": 0.0601, + "step": 5650 + }, + { + "epoch": 3.17, + "grad_norm": 2.1762421131134033, + "learning_rate": 9.480201005025125e-06, + "loss": 0.0646, + "step": 5675 + }, + { + "epoch": 3.19, + "grad_norm": 2.102074146270752, + "learning_rate": 9.477688442211056e-06, + "loss": 0.065, + "step": 5700 + }, + { + "epoch": 3.2, + "grad_norm": 1.9095159769058228, + "learning_rate": 9.475175879396985e-06, + "loss": 0.061, + "step": 5725 + }, + { + "epoch": 3.22, + "grad_norm": 2.3133902549743652, + "learning_rate": 9.472663316582915e-06, + "loss": 0.0681, + "step": 5750 + }, + { + "epoch": 3.23, + "grad_norm": 1.6935017108917236, + "learning_rate": 9.470150753768846e-06, + "loss": 0.062, + "step": 5775 + }, + { + "epoch": 3.24, + "grad_norm": 2.0041425228118896, + "learning_rate": 9.467638190954775e-06, + "loss": 0.0638, + "step": 5800 + }, + { + "epoch": 3.26, + "grad_norm": 2.380725383758545, + "learning_rate": 9.465125628140704e-06, + "loss": 0.0666, + "step": 5825 + }, + { + "epoch": 3.27, + "grad_norm": 1.7464348077774048, + "learning_rate": 9.462613065326634e-06, + "loss": 0.0599, + "step": 5850 + }, + { + "epoch": 3.29, + "grad_norm": 1.8467971086502075, + "learning_rate": 9.460100502512563e-06, + "loss": 0.0648, + "step": 5875 + }, + { + "epoch": 3.3, + "grad_norm": 2.2188971042633057, + "learning_rate": 9.457587939698494e-06, + "loss": 0.0657, + "step": 5900 + }, + { + "epoch": 3.31, + "grad_norm": 2.2380785942077637, + "learning_rate": 9.455075376884423e-06, + "loss": 0.0632, + "step": 5925 + }, + { + "epoch": 3.33, + "grad_norm": 1.6837469339370728, + "learning_rate": 9.452562814070353e-06, + "loss": 0.0633, + "step": 5950 + }, + { + "epoch": 3.34, + "grad_norm": 1.9306910037994385, + "learning_rate": 9.450050251256282e-06, + "loss": 0.0631, + "step": 5975 + }, + { + "epoch": 3.36, + "grad_norm": 2.4268481731414795, + "learning_rate": 9.447537688442211e-06, + "loss": 0.0651, + "step": 6000 + }, + { + "epoch": 3.36, + "eval_loss": 0.08409538865089417, + "eval_runtime": 1258.0848, + "eval_samples_per_second": 1.192, + "eval_steps_per_second": 1.192, + "eval_wer": 26.093957851991878, + "step": 6000 + }, + { + "epoch": 3.37, + "grad_norm": 2.007359504699707, + "learning_rate": 9.44502512562814e-06, + "loss": 0.0578, + "step": 6025 + }, + { + "epoch": 3.38, + "grad_norm": 1.8847688436508179, + "learning_rate": 9.442613065326634e-06, + "loss": 0.0642, + "step": 6050 + }, + { + "epoch": 3.4, + "grad_norm": 2.306269407272339, + "learning_rate": 9.440100502512563e-06, + "loss": 0.0638, + "step": 6075 + }, + { + "epoch": 3.41, + "grad_norm": 2.3040685653686523, + "learning_rate": 9.437587939698494e-06, + "loss": 0.062, + "step": 6100 + }, + { + "epoch": 3.43, + "grad_norm": 2.1980137825012207, + "learning_rate": 9.435075376884422e-06, + "loss": 0.0645, + "step": 6125 + }, + { + "epoch": 3.44, + "grad_norm": 2.4031503200531006, + "learning_rate": 9.432562814070353e-06, + "loss": 0.0653, + "step": 6150 + }, + { + "epoch": 3.45, + "grad_norm": 2.2297208309173584, + "learning_rate": 9.430050251256282e-06, + "loss": 0.0603, + "step": 6175 + }, + { + "epoch": 3.47, + "grad_norm": 1.817520260810852, + "learning_rate": 9.427537688442212e-06, + "loss": 0.0651, + "step": 6200 + }, + { + "epoch": 3.48, + "grad_norm": 2.1112923622131348, + "learning_rate": 9.425025125628141e-06, + "loss": 0.0645, + "step": 6225 + }, + { + "epoch": 3.5, + "grad_norm": 1.82675302028656, + "learning_rate": 9.422512562814072e-06, + "loss": 0.065, + "step": 6250 + }, + { + "epoch": 3.51, + "grad_norm": 1.9629912376403809, + "learning_rate": 9.42e-06, + "loss": 0.0641, + "step": 6275 + }, + { + "epoch": 3.52, + "grad_norm": 2.342519521713257, + "learning_rate": 9.41748743718593e-06, + "loss": 0.063, + "step": 6300 + }, + { + "epoch": 3.54, + "grad_norm": 2.0519747734069824, + "learning_rate": 9.41497487437186e-06, + "loss": 0.0633, + "step": 6325 + }, + { + "epoch": 3.55, + "grad_norm": 1.8285983800888062, + "learning_rate": 9.41246231155779e-06, + "loss": 0.0623, + "step": 6350 + }, + { + "epoch": 3.57, + "grad_norm": 1.7750009298324585, + "learning_rate": 9.40994974874372e-06, + "loss": 0.0611, + "step": 6375 + }, + { + "epoch": 3.58, + "grad_norm": 2.1673221588134766, + "learning_rate": 9.407437185929648e-06, + "loss": 0.0636, + "step": 6400 + }, + { + "epoch": 3.59, + "grad_norm": 2.1094229221343994, + "learning_rate": 9.404924623115579e-06, + "loss": 0.0609, + "step": 6425 + }, + { + "epoch": 3.61, + "grad_norm": 2.2001357078552246, + "learning_rate": 9.402412060301508e-06, + "loss": 0.0623, + "step": 6450 + }, + { + "epoch": 3.62, + "grad_norm": 2.0866901874542236, + "learning_rate": 9.399899497487438e-06, + "loss": 0.0637, + "step": 6475 + }, + { + "epoch": 3.64, + "grad_norm": 1.9105075597763062, + "learning_rate": 9.397386934673369e-06, + "loss": 0.0601, + "step": 6500 + }, + { + "epoch": 3.65, + "grad_norm": 1.6594455242156982, + "learning_rate": 9.394874371859298e-06, + "loss": 0.0638, + "step": 6525 + }, + { + "epoch": 3.66, + "grad_norm": 1.9969534873962402, + "learning_rate": 9.392361809045227e-06, + "loss": 0.0617, + "step": 6550 + }, + { + "epoch": 3.68, + "grad_norm": 1.8099467754364014, + "learning_rate": 9.389849246231157e-06, + "loss": 0.0628, + "step": 6575 + }, + { + "epoch": 3.69, + "grad_norm": 2.2564048767089844, + "learning_rate": 9.387336683417086e-06, + "loss": 0.0629, + "step": 6600 + }, + { + "epoch": 3.71, + "grad_norm": 1.8307623863220215, + "learning_rate": 9.384824120603015e-06, + "loss": 0.0577, + "step": 6625 + }, + { + "epoch": 3.72, + "grad_norm": 1.759268879890442, + "learning_rate": 9.382311557788946e-06, + "loss": 0.059, + "step": 6650 + }, + { + "epoch": 3.73, + "grad_norm": 2.067389726638794, + "learning_rate": 9.379798994974874e-06, + "loss": 0.0616, + "step": 6675 + }, + { + "epoch": 3.75, + "grad_norm": 1.8005328178405762, + "learning_rate": 9.377286432160805e-06, + "loss": 0.0599, + "step": 6700 + }, + { + "epoch": 3.76, + "grad_norm": 2.594447135925293, + "learning_rate": 9.374773869346734e-06, + "loss": 0.0606, + "step": 6725 + }, + { + "epoch": 3.78, + "grad_norm": 1.6017422676086426, + "learning_rate": 9.372261306532664e-06, + "loss": 0.0621, + "step": 6750 + }, + { + "epoch": 3.79, + "grad_norm": 1.852373719215393, + "learning_rate": 9.369748743718595e-06, + "loss": 0.0605, + "step": 6775 + }, + { + "epoch": 3.8, + "grad_norm": 1.860703706741333, + "learning_rate": 9.367236180904524e-06, + "loss": 0.0595, + "step": 6800 + }, + { + "epoch": 3.82, + "grad_norm": 2.018710136413574, + "learning_rate": 9.364723618090453e-06, + "loss": 0.0601, + "step": 6825 + }, + { + "epoch": 3.83, + "grad_norm": 2.154536247253418, + "learning_rate": 9.362211055276383e-06, + "loss": 0.0613, + "step": 6850 + }, + { + "epoch": 3.85, + "grad_norm": 1.9843919277191162, + "learning_rate": 9.359698492462312e-06, + "loss": 0.0597, + "step": 6875 + }, + { + "epoch": 3.86, + "grad_norm": 2.1597299575805664, + "learning_rate": 9.357185929648241e-06, + "loss": 0.0608, + "step": 6900 + }, + { + "epoch": 3.87, + "grad_norm": 1.994773268699646, + "learning_rate": 9.354673366834172e-06, + "loss": 0.0634, + "step": 6925 + }, + { + "epoch": 3.89, + "grad_norm": 1.985724687576294, + "learning_rate": 9.352160804020101e-06, + "loss": 0.0617, + "step": 6950 + }, + { + "epoch": 3.9, + "grad_norm": 1.6854017972946167, + "learning_rate": 9.34964824120603e-06, + "loss": 0.0597, + "step": 6975 + }, + { + "epoch": 3.91, + "grad_norm": 1.9718555212020874, + "learning_rate": 9.34713567839196e-06, + "loss": 0.0621, + "step": 7000 + }, + { + "epoch": 3.91, + "eval_loss": 0.07950112968683243, + "eval_runtime": 1540.6934, + "eval_samples_per_second": 0.974, + "eval_steps_per_second": 0.974, + "eval_wer": 24.74270111321151, + "step": 7000 + }, + { + "epoch": 3.93, + "grad_norm": 2.3680989742279053, + "learning_rate": 9.34462311557789e-06, + "loss": 0.0609, + "step": 7025 + }, + { + "epoch": 3.94, + "grad_norm": 2.2002029418945312, + "learning_rate": 9.34211055276382e-06, + "loss": 0.0603, + "step": 7050 + }, + { + "epoch": 3.96, + "grad_norm": 2.0039074420928955, + "learning_rate": 9.33959798994975e-06, + "loss": 0.0572, + "step": 7075 + }, + { + "epoch": 3.97, + "grad_norm": 1.7604914903640747, + "learning_rate": 9.337085427135679e-06, + "loss": 0.0585, + "step": 7100 + }, + { + "epoch": 3.98, + "grad_norm": 2.173600912094116, + "learning_rate": 9.334572864321608e-06, + "loss": 0.0608, + "step": 7125 + }, + { + "epoch": 4.0, + "grad_norm": 1.7853466272354126, + "learning_rate": 9.332060301507538e-06, + "loss": 0.0626, + "step": 7150 + }, + { + "epoch": 4.01, + "grad_norm": 1.6174085140228271, + "learning_rate": 9.329547738693469e-06, + "loss": 0.048, + "step": 7175 + }, + { + "epoch": 4.03, + "grad_norm": 1.6191383600234985, + "learning_rate": 9.327035175879398e-06, + "loss": 0.0466, + "step": 7200 + }, + { + "epoch": 4.04, + "grad_norm": 1.7680180072784424, + "learning_rate": 9.324522613065327e-06, + "loss": 0.0466, + "step": 7225 + }, + { + "epoch": 4.05, + "grad_norm": 1.6971068382263184, + "learning_rate": 9.322010050251257e-06, + "loss": 0.0494, + "step": 7250 + }, + { + "epoch": 4.07, + "grad_norm": 1.7964837551116943, + "learning_rate": 9.319497487437186e-06, + "loss": 0.0448, + "step": 7275 + }, + { + "epoch": 4.08, + "grad_norm": 1.9257533550262451, + "learning_rate": 9.316984924623115e-06, + "loss": 0.0473, + "step": 7300 + }, + { + "epoch": 4.1, + "grad_norm": 2.192664861679077, + "learning_rate": 9.314472361809046e-06, + "loss": 0.0486, + "step": 7325 + }, + { + "epoch": 4.11, + "grad_norm": 1.9059473276138306, + "learning_rate": 9.311959798994976e-06, + "loss": 0.0473, + "step": 7350 + }, + { + "epoch": 4.12, + "grad_norm": 2.3671586513519287, + "learning_rate": 9.309447236180905e-06, + "loss": 0.0483, + "step": 7375 + }, + { + "epoch": 4.14, + "grad_norm": 1.9169944524765015, + "learning_rate": 9.306934673366836e-06, + "loss": 0.048, + "step": 7400 + }, + { + "epoch": 4.15, + "grad_norm": 1.6867883205413818, + "learning_rate": 9.304422110552764e-06, + "loss": 0.0444, + "step": 7425 + }, + { + "epoch": 4.17, + "grad_norm": 2.1389479637145996, + "learning_rate": 9.301909547738695e-06, + "loss": 0.0474, + "step": 7450 + }, + { + "epoch": 4.18, + "grad_norm": 1.9204530715942383, + "learning_rate": 9.299396984924624e-06, + "loss": 0.0488, + "step": 7475 + }, + { + "epoch": 4.19, + "grad_norm": 1.8195775747299194, + "learning_rate": 9.296884422110553e-06, + "loss": 0.0495, + "step": 7500 + }, + { + "epoch": 4.21, + "grad_norm": 1.8835941553115845, + "learning_rate": 9.294371859296483e-06, + "loss": 0.0458, + "step": 7525 + }, + { + "epoch": 4.22, + "grad_norm": 2.0439579486846924, + "learning_rate": 9.291859296482412e-06, + "loss": 0.0475, + "step": 7550 + }, + { + "epoch": 4.24, + "grad_norm": 1.8745596408843994, + "learning_rate": 9.289346733668343e-06, + "loss": 0.0473, + "step": 7575 + }, + { + "epoch": 4.25, + "grad_norm": 1.8819526433944702, + "learning_rate": 9.286834170854272e-06, + "loss": 0.0503, + "step": 7600 + }, + { + "epoch": 4.26, + "grad_norm": 1.7282466888427734, + "learning_rate": 9.284321608040202e-06, + "loss": 0.046, + "step": 7625 + }, + { + "epoch": 4.28, + "grad_norm": 2.1101884841918945, + "learning_rate": 9.281809045226131e-06, + "loss": 0.0475, + "step": 7650 + }, + { + "epoch": 4.29, + "grad_norm": 1.5361087322235107, + "learning_rate": 9.279296482412062e-06, + "loss": 0.0479, + "step": 7675 + }, + { + "epoch": 4.31, + "grad_norm": 1.6633384227752686, + "learning_rate": 9.27678391959799e-06, + "loss": 0.047, + "step": 7700 + }, + { + "epoch": 4.32, + "grad_norm": 1.911967158317566, + "learning_rate": 9.27427135678392e-06, + "loss": 0.0482, + "step": 7725 + }, + { + "epoch": 4.33, + "grad_norm": 1.876800537109375, + "learning_rate": 9.27175879396985e-06, + "loss": 0.0475, + "step": 7750 + }, + { + "epoch": 4.35, + "grad_norm": 1.90635347366333, + "learning_rate": 9.26924623115578e-06, + "loss": 0.0491, + "step": 7775 + }, + { + "epoch": 4.36, + "grad_norm": 1.8439699411392212, + "learning_rate": 9.26673366834171e-06, + "loss": 0.0473, + "step": 7800 + }, + { + "epoch": 4.38, + "grad_norm": 1.8636940717697144, + "learning_rate": 9.264221105527638e-06, + "loss": 0.0472, + "step": 7825 + }, + { + "epoch": 4.39, + "grad_norm": 2.0078325271606445, + "learning_rate": 9.261708542713569e-06, + "loss": 0.05, + "step": 7850 + }, + { + "epoch": 4.4, + "grad_norm": 1.5912322998046875, + "learning_rate": 9.259195979899498e-06, + "loss": 0.046, + "step": 7875 + }, + { + "epoch": 4.42, + "grad_norm": 2.0076215267181396, + "learning_rate": 9.256683417085428e-06, + "loss": 0.0453, + "step": 7900 + }, + { + "epoch": 4.43, + "grad_norm": 1.8626244068145752, + "learning_rate": 9.254170854271357e-06, + "loss": 0.0479, + "step": 7925 + }, + { + "epoch": 4.45, + "grad_norm": 1.7121516466140747, + "learning_rate": 9.251658291457288e-06, + "loss": 0.0475, + "step": 7950 + }, + { + "epoch": 4.46, + "grad_norm": 2.361213207244873, + "learning_rate": 9.249145728643217e-06, + "loss": 0.0478, + "step": 7975 + }, + { + "epoch": 4.47, + "grad_norm": 1.7829092741012573, + "learning_rate": 9.246633165829147e-06, + "loss": 0.048, + "step": 8000 + }, + { + "epoch": 4.47, + "eval_loss": 0.08309131115674973, + "eval_runtime": 1537.2902, + "eval_samples_per_second": 0.976, + "eval_steps_per_second": 0.976, + "eval_wer": 25.16978225862914, + "step": 8000 + }, + { + "epoch": 4.49, + "grad_norm": 1.985420823097229, + "learning_rate": 9.244120603015076e-06, + "loss": 0.0484, + "step": 8025 + }, + { + "epoch": 4.5, + "grad_norm": 2.077108144760132, + "learning_rate": 9.241608040201005e-06, + "loss": 0.0472, + "step": 8050 + }, + { + "epoch": 4.52, + "grad_norm": 1.9472047090530396, + "learning_rate": 9.239095477386936e-06, + "loss": 0.0478, + "step": 8075 + }, + { + "epoch": 4.53, + "grad_norm": 2.2777373790740967, + "learning_rate": 9.236582914572864e-06, + "loss": 0.0488, + "step": 8100 + }, + { + "epoch": 4.54, + "grad_norm": 2.1253933906555176, + "learning_rate": 9.234070351758795e-06, + "loss": 0.0481, + "step": 8125 + }, + { + "epoch": 4.56, + "grad_norm": 1.873387098312378, + "learning_rate": 9.231557788944724e-06, + "loss": 0.0478, + "step": 8150 + }, + { + "epoch": 4.57, + "grad_norm": 1.9682562351226807, + "learning_rate": 9.229045226130654e-06, + "loss": 0.0471, + "step": 8175 + }, + { + "epoch": 4.59, + "grad_norm": 1.9117881059646606, + "learning_rate": 9.226532663316585e-06, + "loss": 0.0499, + "step": 8200 + }, + { + "epoch": 4.6, + "grad_norm": 2.2858989238739014, + "learning_rate": 9.224020100502514e-06, + "loss": 0.0466, + "step": 8225 + }, + { + "epoch": 4.61, + "grad_norm": 1.874833345413208, + "learning_rate": 9.221507537688443e-06, + "loss": 0.0505, + "step": 8250 + }, + { + "epoch": 4.63, + "grad_norm": 1.8459376096725464, + "learning_rate": 9.218994974874373e-06, + "loss": 0.0468, + "step": 8275 + }, + { + "epoch": 4.64, + "grad_norm": 1.8114656209945679, + "learning_rate": 9.216482412060302e-06, + "loss": 0.0453, + "step": 8300 + }, + { + "epoch": 4.66, + "grad_norm": 1.9587324857711792, + "learning_rate": 9.213969849246231e-06, + "loss": 0.0478, + "step": 8325 + }, + { + "epoch": 4.67, + "grad_norm": 2.2526862621307373, + "learning_rate": 9.211457286432162e-06, + "loss": 0.0452, + "step": 8350 + }, + { + "epoch": 4.68, + "grad_norm": 2.2629716396331787, + "learning_rate": 9.20894472361809e-06, + "loss": 0.0498, + "step": 8375 + }, + { + "epoch": 4.7, + "grad_norm": 1.932504653930664, + "learning_rate": 9.206432160804021e-06, + "loss": 0.0451, + "step": 8400 + }, + { + "epoch": 4.71, + "grad_norm": 1.9428602457046509, + "learning_rate": 9.20391959798995e-06, + "loss": 0.0477, + "step": 8425 + }, + { + "epoch": 4.73, + "grad_norm": 1.9576139450073242, + "learning_rate": 9.20140703517588e-06, + "loss": 0.0471, + "step": 8450 + }, + { + "epoch": 4.74, + "grad_norm": 1.9237579107284546, + "learning_rate": 9.19889447236181e-06, + "loss": 0.0462, + "step": 8475 + }, + { + "epoch": 4.75, + "grad_norm": 2.21309494972229, + "learning_rate": 9.19638190954774e-06, + "loss": 0.0471, + "step": 8500 + }, + { + "epoch": 4.77, + "grad_norm": 1.881838321685791, + "learning_rate": 9.19386934673367e-06, + "loss": 0.0464, + "step": 8525 + }, + { + "epoch": 4.78, + "grad_norm": 1.9514816999435425, + "learning_rate": 9.191356783919599e-06, + "loss": 0.0446, + "step": 8550 + }, + { + "epoch": 4.8, + "grad_norm": 1.9100663661956787, + "learning_rate": 9.188844221105528e-06, + "loss": 0.0476, + "step": 8575 + }, + { + "epoch": 4.81, + "grad_norm": 2.2070868015289307, + "learning_rate": 9.186331658291459e-06, + "loss": 0.0486, + "step": 8600 + }, + { + "epoch": 4.82, + "grad_norm": 1.7795501947402954, + "learning_rate": 9.183819095477388e-06, + "loss": 0.0463, + "step": 8625 + }, + { + "epoch": 4.84, + "grad_norm": 1.6400113105773926, + "learning_rate": 9.181306532663317e-06, + "loss": 0.0467, + "step": 8650 + }, + { + "epoch": 4.85, + "grad_norm": 2.5019569396972656, + "learning_rate": 9.178793969849247e-06, + "loss": 0.048, + "step": 8675 + }, + { + "epoch": 4.87, + "grad_norm": 1.6988704204559326, + "learning_rate": 9.176281407035176e-06, + "loss": 0.047, + "step": 8700 + }, + { + "epoch": 4.88, + "grad_norm": 1.8742605447769165, + "learning_rate": 9.173768844221105e-06, + "loss": 0.0465, + "step": 8725 + }, + { + "epoch": 4.89, + "grad_norm": 1.8531486988067627, + "learning_rate": 9.171256281407036e-06, + "loss": 0.0453, + "step": 8750 + }, + { + "epoch": 4.91, + "grad_norm": 1.8690688610076904, + "learning_rate": 9.168743718592966e-06, + "loss": 0.0458, + "step": 8775 + }, + { + "epoch": 4.92, + "grad_norm": 1.8053923845291138, + "learning_rate": 9.166231155778895e-06, + "loss": 0.0467, + "step": 8800 + }, + { + "epoch": 4.94, + "grad_norm": 1.7939424514770508, + "learning_rate": 9.163718592964826e-06, + "loss": 0.0486, + "step": 8825 + }, + { + "epoch": 4.95, + "grad_norm": 1.8470284938812256, + "learning_rate": 9.161206030150754e-06, + "loss": 0.0449, + "step": 8850 + }, + { + "epoch": 4.96, + "grad_norm": 1.9548052549362183, + "learning_rate": 9.158693467336685e-06, + "loss": 0.0469, + "step": 8875 + }, + { + "epoch": 4.98, + "grad_norm": 2.1690359115600586, + "learning_rate": 9.156180904522614e-06, + "loss": 0.0456, + "step": 8900 + }, + { + "epoch": 4.99, + "grad_norm": 1.6433491706848145, + "learning_rate": 9.153668341708543e-06, + "loss": 0.0454, + "step": 8925 + }, + { + "epoch": 5.01, + "grad_norm": 1.6121461391448975, + "learning_rate": 9.151155778894473e-06, + "loss": 0.0417, + "step": 8950 + }, + { + "epoch": 5.02, + "grad_norm": 1.1955963373184204, + "learning_rate": 9.148643216080402e-06, + "loss": 0.0339, + "step": 8975 + }, + { + "epoch": 5.03, + "grad_norm": 2.314229965209961, + "learning_rate": 9.146130653266331e-06, + "loss": 0.0348, + "step": 9000 + }, + { + "epoch": 5.03, + "eval_loss": 0.08638431876897812, + "eval_runtime": 1547.2933, + "eval_samples_per_second": 0.969, + "eval_steps_per_second": 0.969, + "eval_wer": 24.73569978295876, + "step": 9000 + }, + { + "epoch": 5.05, + "grad_norm": 1.9079633951187134, + "learning_rate": 9.143618090452262e-06, + "loss": 0.0333, + "step": 9025 + }, + { + "epoch": 5.06, + "grad_norm": 1.8292887210845947, + "learning_rate": 9.141105527638192e-06, + "loss": 0.0357, + "step": 9050 + }, + { + "epoch": 5.08, + "grad_norm": 1.8790791034698486, + "learning_rate": 9.138592964824121e-06, + "loss": 0.0329, + "step": 9075 + }, + { + "epoch": 5.09, + "grad_norm": 1.7024917602539062, + "learning_rate": 9.136080402010052e-06, + "loss": 0.0353, + "step": 9100 + }, + { + "epoch": 5.1, + "grad_norm": 1.728759527206421, + "learning_rate": 9.13356783919598e-06, + "loss": 0.0343, + "step": 9125 + }, + { + "epoch": 5.12, + "grad_norm": 1.4763672351837158, + "learning_rate": 9.13105527638191e-06, + "loss": 0.0357, + "step": 9150 + }, + { + "epoch": 5.13, + "grad_norm": 1.541526436805725, + "learning_rate": 9.12854271356784e-06, + "loss": 0.0333, + "step": 9175 + }, + { + "epoch": 5.15, + "grad_norm": 2.032904624938965, + "learning_rate": 9.12603015075377e-06, + "loss": 0.0362, + "step": 9200 + }, + { + "epoch": 5.16, + "grad_norm": 1.4788271188735962, + "learning_rate": 9.1235175879397e-06, + "loss": 0.0363, + "step": 9225 + }, + { + "epoch": 5.17, + "grad_norm": 1.5795788764953613, + "learning_rate": 9.121005025125628e-06, + "loss": 0.0341, + "step": 9250 + }, + { + "epoch": 5.19, + "grad_norm": 2.0994884967803955, + "learning_rate": 9.118492462311559e-06, + "loss": 0.0376, + "step": 9275 + }, + { + "epoch": 5.2, + "grad_norm": 1.8375917673110962, + "learning_rate": 9.115979899497488e-06, + "loss": 0.0343, + "step": 9300 + }, + { + "epoch": 5.22, + "grad_norm": 1.9826256036758423, + "learning_rate": 9.113467336683418e-06, + "loss": 0.0359, + "step": 9325 + }, + { + "epoch": 5.23, + "grad_norm": 1.5949604511260986, + "learning_rate": 9.110954773869347e-06, + "loss": 0.0334, + "step": 9350 + }, + { + "epoch": 5.24, + "grad_norm": 2.062195301055908, + "learning_rate": 9.108442211055278e-06, + "loss": 0.0347, + "step": 9375 + }, + { + "epoch": 5.26, + "grad_norm": 1.67949640750885, + "learning_rate": 9.105929648241206e-06, + "loss": 0.0356, + "step": 9400 + }, + { + "epoch": 5.27, + "grad_norm": 1.844469666481018, + "learning_rate": 9.103417085427137e-06, + "loss": 0.0355, + "step": 9425 + }, + { + "epoch": 5.29, + "grad_norm": 1.7946102619171143, + "learning_rate": 9.100904522613066e-06, + "loss": 0.0356, + "step": 9450 + }, + { + "epoch": 5.3, + "grad_norm": 1.9098948240280151, + "learning_rate": 9.098391959798995e-06, + "loss": 0.0367, + "step": 9475 + }, + { + "epoch": 5.31, + "grad_norm": 2.024116039276123, + "learning_rate": 9.095879396984926e-06, + "loss": 0.0363, + "step": 9500 + }, + { + "epoch": 5.33, + "grad_norm": 1.7224719524383545, + "learning_rate": 9.093366834170854e-06, + "loss": 0.0354, + "step": 9525 + }, + { + "epoch": 5.34, + "grad_norm": 1.597653865814209, + "learning_rate": 9.090854271356785e-06, + "loss": 0.0355, + "step": 9550 + }, + { + "epoch": 5.36, + "grad_norm": 1.5614638328552246, + "learning_rate": 9.088341708542714e-06, + "loss": 0.0328, + "step": 9575 + }, + { + "epoch": 5.37, + "grad_norm": 2.019812822341919, + "learning_rate": 9.085829145728644e-06, + "loss": 0.033, + "step": 9600 + }, + { + "epoch": 5.38, + "grad_norm": 1.9035489559173584, + "learning_rate": 9.083316582914573e-06, + "loss": 0.0353, + "step": 9625 + }, + { + "epoch": 5.4, + "grad_norm": 1.9559494256973267, + "learning_rate": 9.080804020100504e-06, + "loss": 0.0376, + "step": 9650 + }, + { + "epoch": 5.41, + "grad_norm": 1.6555471420288086, + "learning_rate": 9.078291457286433e-06, + "loss": 0.0371, + "step": 9675 + }, + { + "epoch": 5.43, + "grad_norm": 1.8791228532791138, + "learning_rate": 9.075778894472363e-06, + "loss": 0.0359, + "step": 9700 + }, + { + "epoch": 5.44, + "grad_norm": 2.1574184894561768, + "learning_rate": 9.073266331658292e-06, + "loss": 0.0336, + "step": 9725 + }, + { + "epoch": 5.45, + "grad_norm": 1.6978013515472412, + "learning_rate": 9.070753768844221e-06, + "loss": 0.0355, + "step": 9750 + }, + { + "epoch": 5.47, + "grad_norm": 1.5776162147521973, + "learning_rate": 9.068241206030152e-06, + "loss": 0.0378, + "step": 9775 + }, + { + "epoch": 5.48, + "grad_norm": 1.9801268577575684, + "learning_rate": 9.06572864321608e-06, + "loss": 0.0351, + "step": 9800 + }, + { + "epoch": 5.49, + "grad_norm": 2.0189411640167236, + "learning_rate": 9.063216080402011e-06, + "loss": 0.0358, + "step": 9825 + }, + { + "epoch": 5.51, + "grad_norm": 1.6830401420593262, + "learning_rate": 9.06070351758794e-06, + "loss": 0.0369, + "step": 9850 + }, + { + "epoch": 5.52, + "grad_norm": 1.9140214920043945, + "learning_rate": 9.05819095477387e-06, + "loss": 0.0367, + "step": 9875 + }, + { + "epoch": 5.54, + "grad_norm": 1.8516499996185303, + "learning_rate": 9.0556783919598e-06, + "loss": 0.036, + "step": 9900 + }, + { + "epoch": 5.55, + "grad_norm": 1.8060190677642822, + "learning_rate": 9.05316582914573e-06, + "loss": 0.0345, + "step": 9925 + }, + { + "epoch": 5.56, + "grad_norm": 1.9838569164276123, + "learning_rate": 9.05065326633166e-06, + "loss": 0.0367, + "step": 9950 + }, + { + "epoch": 5.58, + "grad_norm": 1.842403531074524, + "learning_rate": 9.048140703517589e-06, + "loss": 0.035, + "step": 9975 + }, + { + "epoch": 5.59, + "grad_norm": 1.4839140176773071, + "learning_rate": 9.045628140703518e-06, + "loss": 0.0354, + "step": 10000 + }, + { + "epoch": 5.59, + "eval_loss": 0.08825862407684326, + "eval_runtime": 1547.5777, + "eval_samples_per_second": 0.969, + "eval_steps_per_second": 0.969, + "eval_wer": 25.722887348596235, + "step": 10000 + }, + { + "epoch": 5.61, + "grad_norm": 1.8587559461593628, + "learning_rate": 9.043115577889447e-06, + "loss": 0.0372, + "step": 10025 + }, + { + "epoch": 5.62, + "grad_norm": 2.097219705581665, + "learning_rate": 9.04070351758794e-06, + "loss": 0.0349, + "step": 10050 + }, + { + "epoch": 5.63, + "grad_norm": 1.9071590900421143, + "learning_rate": 9.03819095477387e-06, + "loss": 0.0361, + "step": 10075 + }, + { + "epoch": 5.65, + "grad_norm": 1.5997395515441895, + "learning_rate": 9.0356783919598e-06, + "loss": 0.0355, + "step": 10100 + }, + { + "epoch": 5.66, + "grad_norm": 1.879860758781433, + "learning_rate": 9.033165829145728e-06, + "loss": 0.0358, + "step": 10125 + }, + { + "epoch": 5.68, + "grad_norm": 1.8191801309585571, + "learning_rate": 9.03065326633166e-06, + "loss": 0.0364, + "step": 10150 + }, + { + "epoch": 5.69, + "grad_norm": 1.6246849298477173, + "learning_rate": 9.028140703517589e-06, + "loss": 0.0361, + "step": 10175 + }, + { + "epoch": 5.7, + "grad_norm": 1.9530048370361328, + "learning_rate": 9.025628140703518e-06, + "loss": 0.0336, + "step": 10200 + }, + { + "epoch": 5.72, + "grad_norm": 1.7234457731246948, + "learning_rate": 9.023115577889447e-06, + "loss": 0.0344, + "step": 10225 + }, + { + "epoch": 5.73, + "grad_norm": 2.0469868183135986, + "learning_rate": 9.020603015075378e-06, + "loss": 0.0358, + "step": 10250 + }, + { + "epoch": 5.75, + "grad_norm": 1.5930458307266235, + "learning_rate": 9.018090452261308e-06, + "loss": 0.0343, + "step": 10275 + }, + { + "epoch": 5.76, + "grad_norm": 1.6492174863815308, + "learning_rate": 9.015577889447237e-06, + "loss": 0.0368, + "step": 10300 + }, + { + "epoch": 5.77, + "grad_norm": 1.5358837842941284, + "learning_rate": 9.013065326633166e-06, + "loss": 0.0344, + "step": 10325 + }, + { + "epoch": 5.79, + "grad_norm": 1.8455798625946045, + "learning_rate": 9.010552763819096e-06, + "loss": 0.0373, + "step": 10350 + }, + { + "epoch": 5.8, + "grad_norm": 2.1980483531951904, + "learning_rate": 9.008040201005027e-06, + "loss": 0.0346, + "step": 10375 + }, + { + "epoch": 5.82, + "grad_norm": 1.9728219509124756, + "learning_rate": 9.005527638190954e-06, + "loss": 0.0371, + "step": 10400 + }, + { + "epoch": 5.83, + "grad_norm": 1.9018830060958862, + "learning_rate": 9.003015075376885e-06, + "loss": 0.0342, + "step": 10425 + }, + { + "epoch": 5.84, + "grad_norm": 1.5646787881851196, + "learning_rate": 9.000502512562815e-06, + "loss": 0.0338, + "step": 10450 + }, + { + "epoch": 5.86, + "grad_norm": 1.8304989337921143, + "learning_rate": 8.997989949748744e-06, + "loss": 0.0369, + "step": 10475 + }, + { + "epoch": 5.87, + "grad_norm": 2.231433868408203, + "learning_rate": 8.995477386934675e-06, + "loss": 0.0351, + "step": 10500 + }, + { + "epoch": 5.89, + "grad_norm": 1.581472396850586, + "learning_rate": 8.992964824120604e-06, + "loss": 0.0343, + "step": 10525 + }, + { + "epoch": 5.9, + "grad_norm": 1.9242390394210815, + "learning_rate": 8.990452261306534e-06, + "loss": 0.0343, + "step": 10550 + }, + { + "epoch": 5.91, + "grad_norm": 1.6705659627914429, + "learning_rate": 8.987939698492463e-06, + "loss": 0.0338, + "step": 10575 + }, + { + "epoch": 5.93, + "grad_norm": 1.8718876838684082, + "learning_rate": 8.985427135678392e-06, + "loss": 0.0361, + "step": 10600 + }, + { + "epoch": 5.94, + "grad_norm": 2.0012893676757812, + "learning_rate": 8.982914572864322e-06, + "loss": 0.0358, + "step": 10625 + }, + { + "epoch": 5.96, + "grad_norm": 1.8063054084777832, + "learning_rate": 8.980402010050253e-06, + "loss": 0.0336, + "step": 10650 + }, + { + "epoch": 5.97, + "grad_norm": 2.0780587196350098, + "learning_rate": 8.977889447236182e-06, + "loss": 0.0364, + "step": 10675 + }, + { + "epoch": 5.98, + "grad_norm": 1.8927561044692993, + "learning_rate": 8.975376884422111e-06, + "loss": 0.0339, + "step": 10700 + }, + { + "epoch": 6.0, + "grad_norm": 1.6720718145370483, + "learning_rate": 8.97286432160804e-06, + "loss": 0.033, + "step": 10725 + }, + { + "epoch": 6.01, + "grad_norm": 1.5935943126678467, + "learning_rate": 8.97035175879397e-06, + "loss": 0.0262, + "step": 10750 + }, + { + "epoch": 6.03, + "grad_norm": 1.691170573234558, + "learning_rate": 8.967839195979901e-06, + "loss": 0.0256, + "step": 10775 + }, + { + "epoch": 6.04, + "grad_norm": 1.8816449642181396, + "learning_rate": 8.96532663316583e-06, + "loss": 0.0255, + "step": 10800 + }, + { + "epoch": 6.05, + "grad_norm": 1.4208954572677612, + "learning_rate": 8.96281407035176e-06, + "loss": 0.0249, + "step": 10825 + }, + { + "epoch": 6.07, + "grad_norm": 1.672662377357483, + "learning_rate": 8.960301507537689e-06, + "loss": 0.024, + "step": 10850 + }, + { + "epoch": 6.08, + "grad_norm": 1.7169475555419922, + "learning_rate": 8.957788944723618e-06, + "loss": 0.0241, + "step": 10875 + }, + { + "epoch": 6.1, + "grad_norm": 1.526678442955017, + "learning_rate": 8.95527638190955e-06, + "loss": 0.0256, + "step": 10900 + }, + { + "epoch": 6.11, + "grad_norm": 1.5596081018447876, + "learning_rate": 8.952763819095479e-06, + "loss": 0.0258, + "step": 10925 + }, + { + "epoch": 6.12, + "grad_norm": 2.0523030757904053, + "learning_rate": 8.950251256281408e-06, + "loss": 0.0255, + "step": 10950 + }, + { + "epoch": 6.14, + "grad_norm": 1.6910170316696167, + "learning_rate": 8.947738693467337e-06, + "loss": 0.0239, + "step": 10975 + }, + { + "epoch": 6.15, + "grad_norm": 2.0352160930633545, + "learning_rate": 8.945226130653267e-06, + "loss": 0.0248, + "step": 11000 + }, + { + "epoch": 6.15, + "eval_loss": 0.0970708429813385, + "eval_runtime": 1542.3258, + "eval_samples_per_second": 0.973, + "eval_steps_per_second": 0.973, + "eval_wer": 25.253798221662116, + "step": 11000 + }, + { + "epoch": 6.17, + "grad_norm": 1.617034673690796, + "learning_rate": 8.942713567839196e-06, + "loss": 0.0258, + "step": 11025 + }, + { + "epoch": 6.18, + "grad_norm": 1.3897294998168945, + "learning_rate": 8.940201005025127e-06, + "loss": 0.0255, + "step": 11050 + }, + { + "epoch": 6.19, + "grad_norm": 1.8258416652679443, + "learning_rate": 8.937688442211056e-06, + "loss": 0.0262, + "step": 11075 + }, + { + "epoch": 6.21, + "grad_norm": 1.945985198020935, + "learning_rate": 8.935175879396986e-06, + "loss": 0.027, + "step": 11100 + }, + { + "epoch": 6.22, + "grad_norm": 1.790618896484375, + "learning_rate": 8.932663316582915e-06, + "loss": 0.025, + "step": 11125 + }, + { + "epoch": 6.24, + "grad_norm": 1.7258163690567017, + "learning_rate": 8.930150753768844e-06, + "loss": 0.0252, + "step": 11150 + }, + { + "epoch": 6.25, + "grad_norm": 2.1420340538024902, + "learning_rate": 8.927638190954775e-06, + "loss": 0.0255, + "step": 11175 + }, + { + "epoch": 6.26, + "grad_norm": 1.4598020315170288, + "learning_rate": 8.925125628140705e-06, + "loss": 0.0249, + "step": 11200 + }, + { + "epoch": 6.28, + "grad_norm": 1.576206088066101, + "learning_rate": 8.922613065326634e-06, + "loss": 0.0243, + "step": 11225 + }, + { + "epoch": 6.29, + "grad_norm": 2.0248894691467285, + "learning_rate": 8.920100502512563e-06, + "loss": 0.0252, + "step": 11250 + }, + { + "epoch": 6.31, + "grad_norm": 1.5580759048461914, + "learning_rate": 8.917587939698493e-06, + "loss": 0.026, + "step": 11275 + }, + { + "epoch": 6.32, + "grad_norm": 1.6908998489379883, + "learning_rate": 8.915075376884424e-06, + "loss": 0.0255, + "step": 11300 + }, + { + "epoch": 6.33, + "grad_norm": 1.8284715414047241, + "learning_rate": 8.912562814070353e-06, + "loss": 0.0253, + "step": 11325 + }, + { + "epoch": 6.35, + "grad_norm": 1.3091851472854614, + "learning_rate": 8.910050251256282e-06, + "loss": 0.024, + "step": 11350 + }, + { + "epoch": 6.36, + "grad_norm": 1.5588383674621582, + "learning_rate": 8.907537688442212e-06, + "loss": 0.0251, + "step": 11375 + }, + { + "epoch": 6.38, + "grad_norm": 1.6450953483581543, + "learning_rate": 8.905025125628143e-06, + "loss": 0.0265, + "step": 11400 + }, + { + "epoch": 6.39, + "grad_norm": 1.5461299419403076, + "learning_rate": 8.90251256281407e-06, + "loss": 0.0254, + "step": 11425 + }, + { + "epoch": 6.4, + "grad_norm": 1.8010703325271606, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0272, + "step": 11450 + }, + { + "epoch": 6.42, + "grad_norm": 1.8586952686309814, + "learning_rate": 8.89748743718593e-06, + "loss": 0.0262, + "step": 11475 + }, + { + "epoch": 6.43, + "grad_norm": 1.745080590248108, + "learning_rate": 8.89497487437186e-06, + "loss": 0.0257, + "step": 11500 + }, + { + "epoch": 6.45, + "grad_norm": 1.6464567184448242, + "learning_rate": 8.892462311557791e-06, + "loss": 0.0266, + "step": 11525 + }, + { + "epoch": 6.46, + "grad_norm": 1.5463519096374512, + "learning_rate": 8.889949748743718e-06, + "loss": 0.0256, + "step": 11550 + }, + { + "epoch": 6.47, + "grad_norm": 2.099302291870117, + "learning_rate": 8.88743718592965e-06, + "loss": 0.0257, + "step": 11575 + }, + { + "epoch": 6.49, + "grad_norm": 1.8039714097976685, + "learning_rate": 8.884924623115579e-06, + "loss": 0.0264, + "step": 11600 + }, + { + "epoch": 6.5, + "grad_norm": 1.673970103263855, + "learning_rate": 8.882412060301508e-06, + "loss": 0.0268, + "step": 11625 + }, + { + "epoch": 6.52, + "grad_norm": 1.7410236597061157, + "learning_rate": 8.879899497487437e-06, + "loss": 0.0244, + "step": 11650 + }, + { + "epoch": 6.53, + "grad_norm": 1.5405129194259644, + "learning_rate": 8.877386934673368e-06, + "loss": 0.0259, + "step": 11675 + }, + { + "epoch": 6.54, + "grad_norm": 1.6754980087280273, + "learning_rate": 8.874874371859296e-06, + "loss": 0.0248, + "step": 11700 + }, + { + "epoch": 6.56, + "grad_norm": 1.7075492143630981, + "learning_rate": 8.872361809045227e-06, + "loss": 0.0253, + "step": 11725 + }, + { + "epoch": 6.57, + "grad_norm": 1.7080812454223633, + "learning_rate": 8.869849246231156e-06, + "loss": 0.0256, + "step": 11750 + }, + { + "epoch": 6.59, + "grad_norm": 1.8137493133544922, + "learning_rate": 8.867336683417086e-06, + "loss": 0.027, + "step": 11775 + }, + { + "epoch": 6.6, + "grad_norm": 1.8992966413497925, + "learning_rate": 8.864824120603017e-06, + "loss": 0.0265, + "step": 11800 + }, + { + "epoch": 6.61, + "grad_norm": 1.7295725345611572, + "learning_rate": 8.862311557788944e-06, + "loss": 0.0257, + "step": 11825 + }, + { + "epoch": 6.63, + "grad_norm": 1.847601056098938, + "learning_rate": 8.859798994974875e-06, + "loss": 0.0257, + "step": 11850 + }, + { + "epoch": 6.64, + "grad_norm": 1.9641332626342773, + "learning_rate": 8.857286432160805e-06, + "loss": 0.0263, + "step": 11875 + }, + { + "epoch": 6.66, + "grad_norm": 1.715695858001709, + "learning_rate": 8.854773869346734e-06, + "loss": 0.0246, + "step": 11900 + }, + { + "epoch": 6.67, + "grad_norm": 1.8434436321258545, + "learning_rate": 8.852261306532665e-06, + "loss": 0.027, + "step": 11925 + }, + { + "epoch": 6.68, + "grad_norm": 1.5504242181777954, + "learning_rate": 8.849748743718594e-06, + "loss": 0.0262, + "step": 11950 + }, + { + "epoch": 6.7, + "grad_norm": 1.9810289144515991, + "learning_rate": 8.847236180904524e-06, + "loss": 0.0265, + "step": 11975 + }, + { + "epoch": 6.71, + "grad_norm": 1.6186991930007935, + "learning_rate": 8.844723618090453e-06, + "loss": 0.0265, + "step": 12000 + }, + { + "epoch": 6.71, + "eval_loss": 0.09767824411392212, + "eval_runtime": 1544.1555, + "eval_samples_per_second": 0.971, + "eval_steps_per_second": 0.971, + "eval_wer": 25.407827487222573, + "step": 12000 + }, + { + "epoch": 6.73, + "grad_norm": 1.813325047492981, + "learning_rate": 8.842211055276382e-06, + "loss": 0.027, + "step": 12025 + }, + { + "epoch": 6.74, + "grad_norm": 1.896941065788269, + "learning_rate": 8.839698492462312e-06, + "loss": 0.026, + "step": 12050 + }, + { + "epoch": 6.75, + "grad_norm": 1.9059789180755615, + "learning_rate": 8.837185929648243e-06, + "loss": 0.0259, + "step": 12075 + }, + { + "epoch": 6.77, + "grad_norm": 1.596055269241333, + "learning_rate": 8.83467336683417e-06, + "loss": 0.0257, + "step": 12100 + }, + { + "epoch": 6.78, + "grad_norm": 1.7050496339797974, + "learning_rate": 8.832160804020101e-06, + "loss": 0.0271, + "step": 12125 + }, + { + "epoch": 6.8, + "grad_norm": 1.666527271270752, + "learning_rate": 8.82964824120603e-06, + "loss": 0.0263, + "step": 12150 + }, + { + "epoch": 6.81, + "grad_norm": 1.5924322605133057, + "learning_rate": 8.82713567839196e-06, + "loss": 0.0263, + "step": 12175 + }, + { + "epoch": 6.82, + "grad_norm": 1.512611985206604, + "learning_rate": 8.824623115577891e-06, + "loss": 0.0243, + "step": 12200 + }, + { + "epoch": 6.84, + "grad_norm": 2.3868093490600586, + "learning_rate": 8.82211055276382e-06, + "loss": 0.0261, + "step": 12225 + }, + { + "epoch": 6.85, + "grad_norm": 1.4095665216445923, + "learning_rate": 8.81959798994975e-06, + "loss": 0.0268, + "step": 12250 + }, + { + "epoch": 6.87, + "grad_norm": 1.6239408254623413, + "learning_rate": 8.817085427135679e-06, + "loss": 0.025, + "step": 12275 + }, + { + "epoch": 6.88, + "grad_norm": 1.8920422792434692, + "learning_rate": 8.814572864321608e-06, + "loss": 0.0269, + "step": 12300 + }, + { + "epoch": 6.89, + "grad_norm": 1.8745660781860352, + "learning_rate": 8.812160804020102e-06, + "loss": 0.0264, + "step": 12325 + }, + { + "epoch": 6.91, + "grad_norm": 1.9475117921829224, + "learning_rate": 8.809648241206031e-06, + "loss": 0.0272, + "step": 12350 + }, + { + "epoch": 6.92, + "grad_norm": 1.8490169048309326, + "learning_rate": 8.80713567839196e-06, + "loss": 0.0258, + "step": 12375 + }, + { + "epoch": 6.94, + "grad_norm": 1.6835732460021973, + "learning_rate": 8.804623115577891e-06, + "loss": 0.0274, + "step": 12400 + }, + { + "epoch": 6.95, + "grad_norm": 1.7207229137420654, + "learning_rate": 8.802110552763819e-06, + "loss": 0.0269, + "step": 12425 + }, + { + "epoch": 6.96, + "grad_norm": 1.711963415145874, + "learning_rate": 8.79959798994975e-06, + "loss": 0.025, + "step": 12450 + }, + { + "epoch": 6.98, + "grad_norm": 2.1076231002807617, + "learning_rate": 8.79708542713568e-06, + "loss": 0.0269, + "step": 12475 + }, + { + "epoch": 6.99, + "grad_norm": 1.554739236831665, + "learning_rate": 8.794572864321609e-06, + "loss": 0.0257, + "step": 12500 + }, + { + "epoch": 7.01, + "grad_norm": 1.307066559791565, + "learning_rate": 8.792060301507538e-06, + "loss": 0.0231, + "step": 12525 + }, + { + "epoch": 7.02, + "grad_norm": 1.4923175573349, + "learning_rate": 8.789547738693467e-06, + "loss": 0.0184, + "step": 12550 + }, + { + "epoch": 7.03, + "grad_norm": 1.799414038658142, + "learning_rate": 8.787035175879398e-06, + "loss": 0.0184, + "step": 12575 + }, + { + "epoch": 7.05, + "grad_norm": 1.4363048076629639, + "learning_rate": 8.784522613065328e-06, + "loss": 0.0182, + "step": 12600 + }, + { + "epoch": 7.06, + "grad_norm": 1.7587112188339233, + "learning_rate": 8.782010050251257e-06, + "loss": 0.0179, + "step": 12625 + }, + { + "epoch": 7.07, + "grad_norm": 1.457387924194336, + "learning_rate": 8.779497487437186e-06, + "loss": 0.0184, + "step": 12650 + }, + { + "epoch": 7.09, + "grad_norm": 1.6355394124984741, + "learning_rate": 8.776984924623117e-06, + "loss": 0.0186, + "step": 12675 + }, + { + "epoch": 7.1, + "grad_norm": 1.719256043434143, + "learning_rate": 8.774472361809045e-06, + "loss": 0.0175, + "step": 12700 + }, + { + "epoch": 7.12, + "grad_norm": 1.6304877996444702, + "learning_rate": 8.771959798994976e-06, + "loss": 0.0167, + "step": 12725 + }, + { + "epoch": 7.13, + "grad_norm": 1.9032924175262451, + "learning_rate": 8.769447236180905e-06, + "loss": 0.0172, + "step": 12750 + }, + { + "epoch": 7.14, + "grad_norm": 1.4420280456542969, + "learning_rate": 8.766934673366834e-06, + "loss": 0.0173, + "step": 12775 + }, + { + "epoch": 7.16, + "grad_norm": 1.5082676410675049, + "learning_rate": 8.764422110552765e-06, + "loss": 0.0185, + "step": 12800 + }, + { + "epoch": 7.17, + "grad_norm": 1.89511239528656, + "learning_rate": 8.761909547738693e-06, + "loss": 0.0176, + "step": 12825 + }, + { + "epoch": 7.19, + "grad_norm": 1.8311001062393188, + "learning_rate": 8.759396984924624e-06, + "loss": 0.0182, + "step": 12850 + }, + { + "epoch": 7.2, + "grad_norm": 1.8964591026306152, + "learning_rate": 8.756884422110553e-06, + "loss": 0.0179, + "step": 12875 + }, + { + "epoch": 7.21, + "grad_norm": 1.7208322286605835, + "learning_rate": 8.754371859296483e-06, + "loss": 0.0182, + "step": 12900 + }, + { + "epoch": 7.23, + "grad_norm": 1.1779571771621704, + "learning_rate": 8.751859296482412e-06, + "loss": 0.0178, + "step": 12925 + }, + { + "epoch": 7.24, + "grad_norm": 1.6696969270706177, + "learning_rate": 8.749346733668343e-06, + "loss": 0.018, + "step": 12950 + }, + { + "epoch": 7.26, + "grad_norm": 1.5363738536834717, + "learning_rate": 8.746834170854272e-06, + "loss": 0.0174, + "step": 12975 + }, + { + "epoch": 7.27, + "grad_norm": 1.5757735967636108, + "learning_rate": 8.744321608040202e-06, + "loss": 0.0173, + "step": 13000 + }, + { + "epoch": 7.27, + "eval_loss": 0.10644286870956421, + "eval_runtime": 1250.7357, + "eval_samples_per_second": 1.199, + "eval_steps_per_second": 1.199, + "eval_wer": 25.56185675278303, + "step": 13000 + }, + { + "epoch": 7.28, + "grad_norm": 1.2004117965698242, + "learning_rate": 8.741809045226131e-06, + "loss": 0.0177, + "step": 13025 + }, + { + "epoch": 7.3, + "grad_norm": 1.7819678783416748, + "learning_rate": 8.73929648241206e-06, + "loss": 0.0187, + "step": 13050 + }, + { + "epoch": 7.31, + "grad_norm": 1.9206719398498535, + "learning_rate": 8.736783919597991e-06, + "loss": 0.018, + "step": 13075 + }, + { + "epoch": 7.33, + "grad_norm": 1.931118130683899, + "learning_rate": 8.734271356783919e-06, + "loss": 0.0185, + "step": 13100 + }, + { + "epoch": 7.34, + "grad_norm": 1.9506186246871948, + "learning_rate": 8.73175879396985e-06, + "loss": 0.0177, + "step": 13125 + }, + { + "epoch": 7.35, + "grad_norm": 2.123161554336548, + "learning_rate": 8.72924623115578e-06, + "loss": 0.0188, + "step": 13150 + }, + { + "epoch": 7.37, + "grad_norm": 1.6503745317459106, + "learning_rate": 8.726733668341709e-06, + "loss": 0.0195, + "step": 13175 + }, + { + "epoch": 7.38, + "grad_norm": 1.7911384105682373, + "learning_rate": 8.72422110552764e-06, + "loss": 0.018, + "step": 13200 + }, + { + "epoch": 7.4, + "grad_norm": 1.8498948812484741, + "learning_rate": 8.721708542713569e-06, + "loss": 0.0191, + "step": 13225 + }, + { + "epoch": 7.41, + "grad_norm": 1.825381875038147, + "learning_rate": 8.719195979899498e-06, + "loss": 0.018, + "step": 13250 + }, + { + "epoch": 7.42, + "grad_norm": 1.6786212921142578, + "learning_rate": 8.716683417085428e-06, + "loss": 0.0186, + "step": 13275 + }, + { + "epoch": 7.44, + "grad_norm": 1.7535996437072754, + "learning_rate": 8.714170854271357e-06, + "loss": 0.0189, + "step": 13300 + }, + { + "epoch": 7.45, + "grad_norm": 1.445857286453247, + "learning_rate": 8.711658291457286e-06, + "loss": 0.0179, + "step": 13325 + }, + { + "epoch": 7.47, + "grad_norm": 1.8488236665725708, + "learning_rate": 8.709145728643217e-06, + "loss": 0.0197, + "step": 13350 + }, + { + "epoch": 7.48, + "grad_norm": 1.4603967666625977, + "learning_rate": 8.706633165829147e-06, + "loss": 0.0203, + "step": 13375 + }, + { + "epoch": 7.49, + "grad_norm": 1.525683045387268, + "learning_rate": 8.704120603015076e-06, + "loss": 0.0183, + "step": 13400 + }, + { + "epoch": 7.51, + "grad_norm": 1.6381289958953857, + "learning_rate": 8.701608040201005e-06, + "loss": 0.0197, + "step": 13425 + }, + { + "epoch": 7.52, + "grad_norm": 1.4716131687164307, + "learning_rate": 8.699095477386935e-06, + "loss": 0.0196, + "step": 13450 + }, + { + "epoch": 7.54, + "grad_norm": 1.688878059387207, + "learning_rate": 8.696582914572866e-06, + "loss": 0.0183, + "step": 13475 + }, + { + "epoch": 7.55, + "grad_norm": 1.4268814325332642, + "learning_rate": 8.694070351758795e-06, + "loss": 0.0181, + "step": 13500 + }, + { + "epoch": 7.56, + "grad_norm": 1.8417357206344604, + "learning_rate": 8.691557788944724e-06, + "loss": 0.0191, + "step": 13525 + }, + { + "epoch": 7.58, + "grad_norm": 1.6161320209503174, + "learning_rate": 8.689045226130654e-06, + "loss": 0.0183, + "step": 13550 + }, + { + "epoch": 7.59, + "grad_norm": 1.398176670074463, + "learning_rate": 8.686532663316583e-06, + "loss": 0.0197, + "step": 13575 + }, + { + "epoch": 7.61, + "grad_norm": 1.9973474740982056, + "learning_rate": 8.684020100502514e-06, + "loss": 0.0191, + "step": 13600 + }, + { + "epoch": 7.62, + "grad_norm": 1.3835101127624512, + "learning_rate": 8.681507537688443e-06, + "loss": 0.0181, + "step": 13625 + }, + { + "epoch": 7.63, + "grad_norm": 1.814956545829773, + "learning_rate": 8.678994974874373e-06, + "loss": 0.0188, + "step": 13650 + }, + { + "epoch": 7.65, + "grad_norm": 1.386847734451294, + "learning_rate": 8.676482412060302e-06, + "loss": 0.019, + "step": 13675 + }, + { + "epoch": 7.66, + "grad_norm": 1.6018749475479126, + "learning_rate": 8.673969849246231e-06, + "loss": 0.0183, + "step": 13700 + }, + { + "epoch": 7.68, + "grad_norm": 1.71701180934906, + "learning_rate": 8.67145728643216e-06, + "loss": 0.0206, + "step": 13725 + }, + { + "epoch": 7.69, + "grad_norm": 1.40883207321167, + "learning_rate": 8.668944723618092e-06, + "loss": 0.0192, + "step": 13750 + }, + { + "epoch": 7.7, + "grad_norm": 1.1789219379425049, + "learning_rate": 8.666432160804021e-06, + "loss": 0.0193, + "step": 13775 + }, + { + "epoch": 7.72, + "grad_norm": 1.4395115375518799, + "learning_rate": 8.66391959798995e-06, + "loss": 0.0192, + "step": 13800 + }, + { + "epoch": 7.73, + "grad_norm": 2.011089563369751, + "learning_rate": 8.661407035175881e-06, + "loss": 0.0195, + "step": 13825 + }, + { + "epoch": 7.75, + "grad_norm": 1.7866398096084595, + "learning_rate": 8.658894472361809e-06, + "loss": 0.0193, + "step": 13850 + }, + { + "epoch": 7.76, + "grad_norm": 1.6536067724227905, + "learning_rate": 8.65638190954774e-06, + "loss": 0.0188, + "step": 13875 + }, + { + "epoch": 7.77, + "grad_norm": 1.7321090698242188, + "learning_rate": 8.65386934673367e-06, + "loss": 0.0187, + "step": 13900 + }, + { + "epoch": 7.79, + "grad_norm": 1.5572364330291748, + "learning_rate": 8.651356783919599e-06, + "loss": 0.0193, + "step": 13925 + }, + { + "epoch": 7.8, + "grad_norm": 1.6894944906234741, + "learning_rate": 8.648844221105528e-06, + "loss": 0.0187, + "step": 13950 + }, + { + "epoch": 7.82, + "grad_norm": 1.5226629972457886, + "learning_rate": 8.646331658291457e-06, + "loss": 0.0178, + "step": 13975 + }, + { + "epoch": 7.83, + "grad_norm": 1.7922390699386597, + "learning_rate": 8.643819095477388e-06, + "loss": 0.0195, + "step": 14000 + }, + { + "epoch": 7.83, + "eval_loss": 0.107501320540905, + "eval_runtime": 1255.7901, + "eval_samples_per_second": 1.194, + "eval_steps_per_second": 1.194, + "eval_wer": 26.423020373871037, + "step": 14000 + }, + { + "epoch": 7.84, + "grad_norm": 1.9192249774932861, + "learning_rate": 8.641306532663318e-06, + "loss": 0.0193, + "step": 14025 + }, + { + "epoch": 7.86, + "grad_norm": 1.9778767824172974, + "learning_rate": 8.638793969849247e-06, + "loss": 0.019, + "step": 14050 + }, + { + "epoch": 7.87, + "grad_norm": 1.847006916999817, + "learning_rate": 8.636281407035176e-06, + "loss": 0.019, + "step": 14075 + }, + { + "epoch": 7.89, + "grad_norm": 1.6009154319763184, + "learning_rate": 8.633768844221107e-06, + "loss": 0.019, + "step": 14100 + }, + { + "epoch": 7.9, + "grad_norm": 1.8647724390029907, + "learning_rate": 8.631256281407035e-06, + "loss": 0.0194, + "step": 14125 + }, + { + "epoch": 7.91, + "grad_norm": 1.5168023109436035, + "learning_rate": 8.628743718592966e-06, + "loss": 0.0205, + "step": 14150 + }, + { + "epoch": 7.93, + "grad_norm": 1.575461506843567, + "learning_rate": 8.626231155778895e-06, + "loss": 0.0188, + "step": 14175 + }, + { + "epoch": 7.94, + "grad_norm": 1.7249157428741455, + "learning_rate": 8.623718592964825e-06, + "loss": 0.0187, + "step": 14200 + }, + { + "epoch": 7.96, + "grad_norm": 1.7031468152999878, + "learning_rate": 8.621206030150756e-06, + "loss": 0.0197, + "step": 14225 + }, + { + "epoch": 7.97, + "grad_norm": 1.5277540683746338, + "learning_rate": 8.618693467336683e-06, + "loss": 0.0183, + "step": 14250 + }, + { + "epoch": 7.98, + "grad_norm": 1.483960747718811, + "learning_rate": 8.616180904522614e-06, + "loss": 0.0183, + "step": 14275 + }, + { + "epoch": 8.0, + "grad_norm": 1.9222930669784546, + "learning_rate": 8.613668341708544e-06, + "loss": 0.0192, + "step": 14300 + }, + { + "epoch": 8.01, + "grad_norm": 1.2624781131744385, + "learning_rate": 8.611155778894473e-06, + "loss": 0.0131, + "step": 14325 + }, + { + "epoch": 8.03, + "grad_norm": 1.2885479927062988, + "learning_rate": 8.608643216080402e-06, + "loss": 0.0115, + "step": 14350 + }, + { + "epoch": 8.04, + "grad_norm": 1.4258344173431396, + "learning_rate": 8.606130653266333e-06, + "loss": 0.0126, + "step": 14375 + }, + { + "epoch": 8.05, + "grad_norm": 1.5901182889938354, + "learning_rate": 8.60361809045226e-06, + "loss": 0.0135, + "step": 14400 + }, + { + "epoch": 8.07, + "grad_norm": 1.3778425455093384, + "learning_rate": 8.601105527638192e-06, + "loss": 0.012, + "step": 14425 + }, + { + "epoch": 8.08, + "grad_norm": 1.6377310752868652, + "learning_rate": 8.598592964824121e-06, + "loss": 0.0121, + "step": 14450 + }, + { + "epoch": 8.1, + "grad_norm": 1.2959672212600708, + "learning_rate": 8.59608040201005e-06, + "loss": 0.0117, + "step": 14475 + }, + { + "epoch": 8.11, + "grad_norm": 1.87339448928833, + "learning_rate": 8.593668341708544e-06, + "loss": 0.0139, + "step": 14500 + }, + { + "epoch": 8.12, + "grad_norm": 1.5233855247497559, + "learning_rate": 8.591155778894473e-06, + "loss": 0.0128, + "step": 14525 + }, + { + "epoch": 8.14, + "grad_norm": 1.1843684911727905, + "learning_rate": 8.588643216080402e-06, + "loss": 0.0125, + "step": 14550 + }, + { + "epoch": 8.15, + "grad_norm": 1.488094449043274, + "learning_rate": 8.586130653266332e-06, + "loss": 0.013, + "step": 14575 + }, + { + "epoch": 8.17, + "grad_norm": 1.7795379161834717, + "learning_rate": 8.583618090452261e-06, + "loss": 0.0126, + "step": 14600 + }, + { + "epoch": 8.18, + "grad_norm": 1.287470817565918, + "learning_rate": 8.581105527638192e-06, + "loss": 0.0128, + "step": 14625 + }, + { + "epoch": 8.19, + "grad_norm": 1.567731261253357, + "learning_rate": 8.578592964824121e-06, + "loss": 0.0125, + "step": 14650 + }, + { + "epoch": 8.21, + "grad_norm": 1.5656017065048218, + "learning_rate": 8.57608040201005e-06, + "loss": 0.0129, + "step": 14675 + }, + { + "epoch": 8.22, + "grad_norm": 1.6254287958145142, + "learning_rate": 8.573567839195982e-06, + "loss": 0.0128, + "step": 14700 + }, + { + "epoch": 8.24, + "grad_norm": 1.6085509061813354, + "learning_rate": 8.57105527638191e-06, + "loss": 0.0133, + "step": 14725 + }, + { + "epoch": 8.25, + "grad_norm": 1.7630277872085571, + "learning_rate": 8.56854271356784e-06, + "loss": 0.0139, + "step": 14750 + }, + { + "epoch": 8.26, + "grad_norm": 1.7692389488220215, + "learning_rate": 8.56603015075377e-06, + "loss": 0.0131, + "step": 14775 + }, + { + "epoch": 8.28, + "grad_norm": 1.5621066093444824, + "learning_rate": 8.563517587939699e-06, + "loss": 0.0142, + "step": 14800 + }, + { + "epoch": 8.29, + "grad_norm": 1.638834834098816, + "learning_rate": 8.56100502512563e-06, + "loss": 0.0126, + "step": 14825 + }, + { + "epoch": 8.31, + "grad_norm": 1.44390070438385, + "learning_rate": 8.558492462311558e-06, + "loss": 0.0137, + "step": 14850 + }, + { + "epoch": 8.32, + "grad_norm": 1.1245650053024292, + "learning_rate": 8.555979899497489e-06, + "loss": 0.0135, + "step": 14875 + }, + { + "epoch": 8.33, + "grad_norm": 1.2431079149246216, + "learning_rate": 8.553467336683418e-06, + "loss": 0.0133, + "step": 14900 + }, + { + "epoch": 8.35, + "grad_norm": 1.6766127347946167, + "learning_rate": 8.550954773869347e-06, + "loss": 0.0132, + "step": 14925 + }, + { + "epoch": 8.36, + "grad_norm": 1.9637749195098877, + "learning_rate": 8.548442211055277e-06, + "loss": 0.0137, + "step": 14950 + }, + { + "epoch": 8.38, + "grad_norm": 1.3625088930130005, + "learning_rate": 8.54603015075377e-06, + "loss": 0.0127, + "step": 14975 + }, + { + "epoch": 8.39, + "grad_norm": 1.1293421983718872, + "learning_rate": 8.5435175879397e-06, + "loss": 0.0139, + "step": 15000 + }, + { + "epoch": 8.39, + "eval_loss": 0.11623761057853699, + "eval_runtime": 1248.4909, + "eval_samples_per_second": 1.201, + "eval_steps_per_second": 1.201, + "eval_wer": 25.16978225862914, + "step": 15000 + }, + { + "epoch": 8.4, + "grad_norm": 1.4315195083618164, + "learning_rate": 8.54100502512563e-06, + "loss": 0.0139, + "step": 15025 + }, + { + "epoch": 8.42, + "grad_norm": 1.5422781705856323, + "learning_rate": 8.538492462311558e-06, + "loss": 0.0144, + "step": 15050 + }, + { + "epoch": 8.43, + "grad_norm": 1.5740288496017456, + "learning_rate": 8.535979899497489e-06, + "loss": 0.0148, + "step": 15075 + }, + { + "epoch": 8.45, + "grad_norm": 1.6422525644302368, + "learning_rate": 8.533467336683418e-06, + "loss": 0.0133, + "step": 15100 + }, + { + "epoch": 8.46, + "grad_norm": 1.862137794494629, + "learning_rate": 8.530954773869347e-06, + "loss": 0.0124, + "step": 15125 + }, + { + "epoch": 8.47, + "grad_norm": 1.6580989360809326, + "learning_rate": 8.528442211055277e-06, + "loss": 0.0135, + "step": 15150 + }, + { + "epoch": 8.49, + "grad_norm": 1.8225072622299194, + "learning_rate": 8.525929648241206e-06, + "loss": 0.0135, + "step": 15175 + }, + { + "epoch": 8.5, + "grad_norm": 1.4441477060317993, + "learning_rate": 8.523417085427135e-06, + "loss": 0.0127, + "step": 15200 + }, + { + "epoch": 8.52, + "grad_norm": 1.7101575136184692, + "learning_rate": 8.520904522613066e-06, + "loss": 0.0139, + "step": 15225 + }, + { + "epoch": 8.53, + "grad_norm": 1.7326629161834717, + "learning_rate": 8.518391959798996e-06, + "loss": 0.0132, + "step": 15250 + }, + { + "epoch": 8.54, + "grad_norm": 1.2983744144439697, + "learning_rate": 8.515879396984925e-06, + "loss": 0.0135, + "step": 15275 + }, + { + "epoch": 8.56, + "grad_norm": 1.4976806640625, + "learning_rate": 8.513366834170856e-06, + "loss": 0.0137, + "step": 15300 + }, + { + "epoch": 8.57, + "grad_norm": 1.4674962759017944, + "learning_rate": 8.510854271356784e-06, + "loss": 0.0133, + "step": 15325 + }, + { + "epoch": 8.59, + "grad_norm": 1.4622610807418823, + "learning_rate": 8.508341708542715e-06, + "loss": 0.0133, + "step": 15350 + }, + { + "epoch": 8.6, + "grad_norm": 1.3362663984298706, + "learning_rate": 8.505829145728644e-06, + "loss": 0.0136, + "step": 15375 + }, + { + "epoch": 8.61, + "grad_norm": 1.571964144706726, + "learning_rate": 8.503316582914573e-06, + "loss": 0.0137, + "step": 15400 + }, + { + "epoch": 8.63, + "grad_norm": 1.281298279762268, + "learning_rate": 8.500804020100504e-06, + "loss": 0.013, + "step": 15425 + }, + { + "epoch": 8.64, + "grad_norm": 1.6459954977035522, + "learning_rate": 8.498291457286432e-06, + "loss": 0.0139, + "step": 15450 + }, + { + "epoch": 8.65, + "grad_norm": 1.8181471824645996, + "learning_rate": 8.495778894472363e-06, + "loss": 0.0141, + "step": 15475 + }, + { + "epoch": 8.67, + "grad_norm": 1.6314300298690796, + "learning_rate": 8.493266331658292e-06, + "loss": 0.0134, + "step": 15500 + }, + { + "epoch": 8.68, + "grad_norm": 1.6022205352783203, + "learning_rate": 8.490753768844222e-06, + "loss": 0.0122, + "step": 15525 + }, + { + "epoch": 8.7, + "grad_norm": 2.033066511154175, + "learning_rate": 8.488241206030151e-06, + "loss": 0.0137, + "step": 15550 + }, + { + "epoch": 8.71, + "grad_norm": 1.8259072303771973, + "learning_rate": 8.485728643216082e-06, + "loss": 0.0134, + "step": 15575 + }, + { + "epoch": 8.72, + "grad_norm": 1.478550672531128, + "learning_rate": 8.48321608040201e-06, + "loss": 0.0144, + "step": 15600 + }, + { + "epoch": 8.74, + "grad_norm": 1.3908416032791138, + "learning_rate": 8.48070351758794e-06, + "loss": 0.0138, + "step": 15625 + }, + { + "epoch": 8.75, + "grad_norm": 1.4211598634719849, + "learning_rate": 8.47819095477387e-06, + "loss": 0.0144, + "step": 15650 + }, + { + "epoch": 8.77, + "grad_norm": 1.4250924587249756, + "learning_rate": 8.4756783919598e-06, + "loss": 0.0136, + "step": 15675 + }, + { + "epoch": 8.78, + "grad_norm": 1.8028937578201294, + "learning_rate": 8.47316582914573e-06, + "loss": 0.0136, + "step": 15700 + }, + { + "epoch": 8.79, + "grad_norm": 1.6728827953338623, + "learning_rate": 8.470653266331658e-06, + "loss": 0.0134, + "step": 15725 + }, + { + "epoch": 8.81, + "grad_norm": 1.2716115713119507, + "learning_rate": 8.468140703517589e-06, + "loss": 0.0135, + "step": 15750 + }, + { + "epoch": 8.82, + "grad_norm": 1.5330822467803955, + "learning_rate": 8.465628140703518e-06, + "loss": 0.014, + "step": 15775 + }, + { + "epoch": 8.84, + "grad_norm": 1.5610933303833008, + "learning_rate": 8.463115577889448e-06, + "loss": 0.0146, + "step": 15800 + }, + { + "epoch": 8.85, + "grad_norm": 1.4767587184906006, + "learning_rate": 8.460603015075377e-06, + "loss": 0.0142, + "step": 15825 + }, + { + "epoch": 8.86, + "grad_norm": 1.5992600917816162, + "learning_rate": 8.458090452261308e-06, + "loss": 0.0136, + "step": 15850 + }, + { + "epoch": 8.88, + "grad_norm": 1.625845193862915, + "learning_rate": 8.455577889447237e-06, + "loss": 0.0135, + "step": 15875 + }, + { + "epoch": 8.89, + "grad_norm": 1.4823307991027832, + "learning_rate": 8.453065326633167e-06, + "loss": 0.0143, + "step": 15900 + }, + { + "epoch": 8.91, + "grad_norm": 1.5383321046829224, + "learning_rate": 8.450552763819096e-06, + "loss": 0.0139, + "step": 15925 + }, + { + "epoch": 8.92, + "grad_norm": 1.2531863451004028, + "learning_rate": 8.448040201005025e-06, + "loss": 0.0134, + "step": 15950 + }, + { + "epoch": 8.93, + "grad_norm": 1.6139166355133057, + "learning_rate": 8.445527638190956e-06, + "loss": 0.0139, + "step": 15975 + }, + { + "epoch": 8.95, + "grad_norm": 1.510171890258789, + "learning_rate": 8.443015075376884e-06, + "loss": 0.0134, + "step": 16000 + }, + { + "epoch": 8.95, + "eval_loss": 0.12022976577281952, + "eval_runtime": 1263.3302, + "eval_samples_per_second": 1.187, + "eval_steps_per_second": 1.187, + "eval_wer": 26.01694321921165, + "step": 16000 + }, + { + "epoch": 8.96, + "grad_norm": 2.0292775630950928, + "learning_rate": 8.440502512562815e-06, + "loss": 0.0137, + "step": 16025 + }, + { + "epoch": 8.98, + "grad_norm": 2.210850954055786, + "learning_rate": 8.437989949748744e-06, + "loss": 0.014, + "step": 16050 + }, + { + "epoch": 8.99, + "grad_norm": 2.1284592151641846, + "learning_rate": 8.435477386934674e-06, + "loss": 0.0138, + "step": 16075 + }, + { + "epoch": 9.0, + "grad_norm": 1.2880257368087769, + "learning_rate": 8.432964824120605e-06, + "loss": 0.012, + "step": 16100 + }, + { + "epoch": 9.02, + "grad_norm": 1.1273249387741089, + "learning_rate": 8.430452261306534e-06, + "loss": 0.0085, + "step": 16125 + }, + { + "epoch": 9.03, + "grad_norm": 0.9629200100898743, + "learning_rate": 8.427939698492463e-06, + "loss": 0.0087, + "step": 16150 + }, + { + "epoch": 9.05, + "grad_norm": 1.1696195602416992, + "learning_rate": 8.425427135678393e-06, + "loss": 0.0095, + "step": 16175 + }, + { + "epoch": 9.06, + "grad_norm": 1.1937648057937622, + "learning_rate": 8.422914572864322e-06, + "loss": 0.0088, + "step": 16200 + }, + { + "epoch": 9.07, + "grad_norm": 1.186058759689331, + "learning_rate": 8.420402010050251e-06, + "loss": 0.0088, + "step": 16225 + }, + { + "epoch": 9.09, + "grad_norm": 1.2673548460006714, + "learning_rate": 8.417889447236182e-06, + "loss": 0.0093, + "step": 16250 + }, + { + "epoch": 9.1, + "grad_norm": 1.0217103958129883, + "learning_rate": 8.415376884422112e-06, + "loss": 0.009, + "step": 16275 + }, + { + "epoch": 9.12, + "grad_norm": 1.6276463270187378, + "learning_rate": 8.412864321608041e-06, + "loss": 0.0094, + "step": 16300 + }, + { + "epoch": 9.13, + "grad_norm": 0.9528993964195251, + "learning_rate": 8.41035175879397e-06, + "loss": 0.0086, + "step": 16325 + }, + { + "epoch": 9.14, + "grad_norm": 1.4897090196609497, + "learning_rate": 8.4078391959799e-06, + "loss": 0.0096, + "step": 16350 + }, + { + "epoch": 9.16, + "grad_norm": 1.4035155773162842, + "learning_rate": 8.40532663316583e-06, + "loss": 0.0095, + "step": 16375 + }, + { + "epoch": 9.17, + "grad_norm": 1.117077112197876, + "learning_rate": 8.40281407035176e-06, + "loss": 0.0084, + "step": 16400 + }, + { + "epoch": 9.19, + "grad_norm": 1.4381709098815918, + "learning_rate": 8.40030150753769e-06, + "loss": 0.0094, + "step": 16425 + }, + { + "epoch": 9.2, + "grad_norm": 2.295844078063965, + "learning_rate": 8.397788944723619e-06, + "loss": 0.011, + "step": 16450 + }, + { + "epoch": 9.21, + "grad_norm": 1.4901695251464844, + "learning_rate": 8.395276381909548e-06, + "loss": 0.0104, + "step": 16475 + }, + { + "epoch": 9.23, + "grad_norm": 2.0876200199127197, + "learning_rate": 8.392763819095479e-06, + "loss": 0.0095, + "step": 16500 + }, + { + "epoch": 9.24, + "grad_norm": 1.1527096033096313, + "learning_rate": 8.390251256281408e-06, + "loss": 0.0089, + "step": 16525 + }, + { + "epoch": 9.26, + "grad_norm": 1.299315094947815, + "learning_rate": 8.387738693467338e-06, + "loss": 0.01, + "step": 16550 + }, + { + "epoch": 9.27, + "grad_norm": 1.179197072982788, + "learning_rate": 8.385226130653267e-06, + "loss": 0.0084, + "step": 16575 + }, + { + "epoch": 9.28, + "grad_norm": 1.4978837966918945, + "learning_rate": 8.382713567839196e-06, + "loss": 0.0103, + "step": 16600 + }, + { + "epoch": 9.3, + "grad_norm": 1.2782052755355835, + "learning_rate": 8.380201005025126e-06, + "loss": 0.0092, + "step": 16625 + }, + { + "epoch": 9.31, + "grad_norm": 1.43800687789917, + "learning_rate": 8.377688442211057e-06, + "loss": 0.0099, + "step": 16650 + }, + { + "epoch": 9.33, + "grad_norm": 1.925214171409607, + "learning_rate": 8.375175879396986e-06, + "loss": 0.0097, + "step": 16675 + }, + { + "epoch": 9.34, + "grad_norm": 1.757615089416504, + "learning_rate": 8.372663316582915e-06, + "loss": 0.0095, + "step": 16700 + }, + { + "epoch": 9.35, + "grad_norm": 1.2906619310379028, + "learning_rate": 8.370150753768845e-06, + "loss": 0.01, + "step": 16725 + }, + { + "epoch": 9.37, + "grad_norm": 1.38942289352417, + "learning_rate": 8.367638190954774e-06, + "loss": 0.0097, + "step": 16750 + }, + { + "epoch": 9.38, + "grad_norm": 1.7287613153457642, + "learning_rate": 8.365125628140705e-06, + "loss": 0.0098, + "step": 16775 + }, + { + "epoch": 9.4, + "grad_norm": 1.5018854141235352, + "learning_rate": 8.362613065326634e-06, + "loss": 0.0097, + "step": 16800 + }, + { + "epoch": 9.41, + "grad_norm": 1.5454444885253906, + "learning_rate": 8.360100502512563e-06, + "loss": 0.0105, + "step": 16825 + }, + { + "epoch": 9.42, + "grad_norm": 1.5580748319625854, + "learning_rate": 8.357587939698493e-06, + "loss": 0.0096, + "step": 16850 + }, + { + "epoch": 9.44, + "grad_norm": 1.2850528955459595, + "learning_rate": 8.355075376884422e-06, + "loss": 0.0098, + "step": 16875 + }, + { + "epoch": 9.45, + "grad_norm": 1.6075043678283691, + "learning_rate": 8.352562814070353e-06, + "loss": 0.0106, + "step": 16900 + }, + { + "epoch": 9.47, + "grad_norm": 1.6207281351089478, + "learning_rate": 8.350050251256282e-06, + "loss": 0.01, + "step": 16925 + }, + { + "epoch": 9.48, + "grad_norm": 1.1454306840896606, + "learning_rate": 8.347537688442212e-06, + "loss": 0.0097, + "step": 16950 + }, + { + "epoch": 9.49, + "grad_norm": 1.3012179136276245, + "learning_rate": 8.345025125628141e-06, + "loss": 0.01, + "step": 16975 + }, + { + "epoch": 9.51, + "grad_norm": 1.6009056568145752, + "learning_rate": 8.34251256281407e-06, + "loss": 0.01, + "step": 17000 + }, + { + "epoch": 9.51, + "eval_loss": 0.12827400863170624, + "eval_runtime": 1269.7762, + "eval_samples_per_second": 1.181, + "eval_steps_per_second": 1.181, + "eval_wer": 25.981936567947912, + "step": 17000 + }, + { + "epoch": 9.52, + "grad_norm": 1.3005088567733765, + "learning_rate": 8.34e-06, + "loss": 0.0094, + "step": 17025 + }, + { + "epoch": 9.54, + "grad_norm": 1.4232419729232788, + "learning_rate": 8.33748743718593e-06, + "loss": 0.0099, + "step": 17050 + }, + { + "epoch": 9.55, + "grad_norm": 1.7481540441513062, + "learning_rate": 8.33497487437186e-06, + "loss": 0.0093, + "step": 17075 + }, + { + "epoch": 9.56, + "grad_norm": 1.8427752256393433, + "learning_rate": 8.33246231155779e-06, + "loss": 0.0106, + "step": 17100 + }, + { + "epoch": 9.58, + "grad_norm": 1.2959924936294556, + "learning_rate": 8.32994974874372e-06, + "loss": 0.0096, + "step": 17125 + }, + { + "epoch": 9.59, + "grad_norm": 1.1869925260543823, + "learning_rate": 8.327437185929648e-06, + "loss": 0.0105, + "step": 17150 + }, + { + "epoch": 9.61, + "grad_norm": 1.8451534509658813, + "learning_rate": 8.324924623115579e-06, + "loss": 0.0098, + "step": 17175 + }, + { + "epoch": 9.62, + "grad_norm": 1.6278190612792969, + "learning_rate": 8.322412060301508e-06, + "loss": 0.0095, + "step": 17200 + }, + { + "epoch": 9.63, + "grad_norm": 1.4719972610473633, + "learning_rate": 8.319899497487438e-06, + "loss": 0.0095, + "step": 17225 + }, + { + "epoch": 9.65, + "grad_norm": 1.4860060214996338, + "learning_rate": 8.317386934673367e-06, + "loss": 0.0095, + "step": 17250 + }, + { + "epoch": 9.66, + "grad_norm": 1.251642107963562, + "learning_rate": 8.314874371859298e-06, + "loss": 0.0113, + "step": 17275 + }, + { + "epoch": 9.68, + "grad_norm": 1.6547269821166992, + "learning_rate": 8.312361809045226e-06, + "loss": 0.0102, + "step": 17300 + }, + { + "epoch": 9.69, + "grad_norm": 1.5958502292633057, + "learning_rate": 8.309849246231157e-06, + "loss": 0.0104, + "step": 17325 + }, + { + "epoch": 9.7, + "grad_norm": 1.3772475719451904, + "learning_rate": 8.307336683417086e-06, + "loss": 0.01, + "step": 17350 + }, + { + "epoch": 9.72, + "grad_norm": 1.5301170349121094, + "learning_rate": 8.304824120603015e-06, + "loss": 0.0109, + "step": 17375 + }, + { + "epoch": 9.73, + "grad_norm": 1.4869940280914307, + "learning_rate": 8.302311557788946e-06, + "loss": 0.0105, + "step": 17400 + }, + { + "epoch": 9.75, + "grad_norm": 1.6237093210220337, + "learning_rate": 8.299798994974874e-06, + "loss": 0.0105, + "step": 17425 + }, + { + "epoch": 9.76, + "grad_norm": 1.461899757385254, + "learning_rate": 8.297286432160805e-06, + "loss": 0.0102, + "step": 17450 + }, + { + "epoch": 9.77, + "grad_norm": 2.14113450050354, + "learning_rate": 8.294773869346734e-06, + "loss": 0.0103, + "step": 17475 + }, + { + "epoch": 9.79, + "grad_norm": 1.672645926475525, + "learning_rate": 8.292261306532664e-06, + "loss": 0.01, + "step": 17500 + }, + { + "epoch": 9.8, + "grad_norm": 1.068429946899414, + "learning_rate": 8.289748743718595e-06, + "loss": 0.0094, + "step": 17525 + }, + { + "epoch": 9.82, + "grad_norm": 1.8176536560058594, + "learning_rate": 8.287236180904524e-06, + "loss": 0.0107, + "step": 17550 + }, + { + "epoch": 9.83, + "grad_norm": 1.489044189453125, + "learning_rate": 8.284723618090453e-06, + "loss": 0.0102, + "step": 17575 + }, + { + "epoch": 9.84, + "grad_norm": 1.374991536140442, + "learning_rate": 8.282211055276383e-06, + "loss": 0.0104, + "step": 17600 + }, + { + "epoch": 9.86, + "grad_norm": 1.9951503276824951, + "learning_rate": 8.279698492462312e-06, + "loss": 0.0101, + "step": 17625 + }, + { + "epoch": 9.87, + "grad_norm": 1.7742674350738525, + "learning_rate": 8.277185929648241e-06, + "loss": 0.0108, + "step": 17650 + }, + { + "epoch": 9.89, + "grad_norm": 1.54373300075531, + "learning_rate": 8.274673366834172e-06, + "loss": 0.0108, + "step": 17675 + }, + { + "epoch": 9.9, + "grad_norm": 1.442535400390625, + "learning_rate": 8.2721608040201e-06, + "loss": 0.0098, + "step": 17700 + }, + { + "epoch": 9.91, + "grad_norm": 1.58523428440094, + "learning_rate": 8.269648241206031e-06, + "loss": 0.0104, + "step": 17725 + }, + { + "epoch": 9.93, + "grad_norm": 1.551440954208374, + "learning_rate": 8.26713567839196e-06, + "loss": 0.0101, + "step": 17750 + }, + { + "epoch": 9.94, + "grad_norm": 1.3735955953598022, + "learning_rate": 8.26462311557789e-06, + "loss": 0.0101, + "step": 17775 + }, + { + "epoch": 9.96, + "grad_norm": 1.8585009574890137, + "learning_rate": 8.26211055276382e-06, + "loss": 0.0103, + "step": 17800 + }, + { + "epoch": 9.97, + "grad_norm": 1.6317896842956543, + "learning_rate": 8.25959798994975e-06, + "loss": 0.0097, + "step": 17825 + }, + { + "epoch": 9.98, + "grad_norm": 1.6716476678848267, + "learning_rate": 8.25708542713568e-06, + "loss": 0.0103, + "step": 17850 + }, + { + "epoch": 10.0, + "grad_norm": 1.3141593933105469, + "learning_rate": 8.254572864321609e-06, + "loss": 0.0097, + "step": 17875 + }, + { + "epoch": 10.01, + "grad_norm": 1.5377665758132935, + "learning_rate": 8.252060301507538e-06, + "loss": 0.0073, + "step": 17900 + }, + { + "epoch": 10.03, + "grad_norm": 1.3977289199829102, + "learning_rate": 8.249547738693467e-06, + "loss": 0.0064, + "step": 17925 + }, + { + "epoch": 10.04, + "grad_norm": 1.2745766639709473, + "learning_rate": 8.247035175879398e-06, + "loss": 0.0064, + "step": 17950 + }, + { + "epoch": 10.05, + "grad_norm": 1.3880548477172852, + "learning_rate": 8.244522613065328e-06, + "loss": 0.0067, + "step": 17975 + }, + { + "epoch": 10.07, + "grad_norm": 1.0573828220367432, + "learning_rate": 8.242010050251257e-06, + "loss": 0.007, + "step": 18000 + }, + { + "epoch": 10.07, + "eval_loss": 0.13322582840919495, + "eval_runtime": 1266.9247, + "eval_samples_per_second": 1.184, + "eval_steps_per_second": 1.184, + "eval_wer": 26.13596583350837, + "step": 18000 + }, + { + "epoch": 10.08, + "grad_norm": 0.9800087213516235, + "learning_rate": 8.239497487437186e-06, + "loss": 0.0067, + "step": 18025 + }, + { + "epoch": 10.1, + "grad_norm": 1.3136003017425537, + "learning_rate": 8.236984924623116e-06, + "loss": 0.0072, + "step": 18050 + }, + { + "epoch": 10.11, + "grad_norm": 2.4294538497924805, + "learning_rate": 8.234472361809047e-06, + "loss": 0.0071, + "step": 18075 + }, + { + "epoch": 10.12, + "grad_norm": 1.492241382598877, + "learning_rate": 8.231959798994976e-06, + "loss": 0.0067, + "step": 18100 + }, + { + "epoch": 10.14, + "grad_norm": 1.0879470109939575, + "learning_rate": 8.229447236180905e-06, + "loss": 0.0065, + "step": 18125 + }, + { + "epoch": 10.15, + "grad_norm": 0.8511327505111694, + "learning_rate": 8.226934673366835e-06, + "loss": 0.0068, + "step": 18150 + }, + { + "epoch": 10.16, + "grad_norm": 1.3865950107574463, + "learning_rate": 8.224422110552764e-06, + "loss": 0.007, + "step": 18175 + }, + { + "epoch": 10.18, + "grad_norm": 1.5761487483978271, + "learning_rate": 8.221909547738695e-06, + "loss": 0.0065, + "step": 18200 + }, + { + "epoch": 10.19, + "grad_norm": 1.1414053440093994, + "learning_rate": 8.219396984924624e-06, + "loss": 0.007, + "step": 18225 + }, + { + "epoch": 10.21, + "grad_norm": 1.3173131942749023, + "learning_rate": 8.216984924623116e-06, + "loss": 0.0066, + "step": 18250 + }, + { + "epoch": 10.22, + "grad_norm": 1.83867609500885, + "learning_rate": 8.214472361809047e-06, + "loss": 0.0074, + "step": 18275 + }, + { + "epoch": 10.23, + "grad_norm": 0.9537453651428223, + "learning_rate": 8.211959798994974e-06, + "loss": 0.006, + "step": 18300 + }, + { + "epoch": 10.25, + "grad_norm": 1.2373839616775513, + "learning_rate": 8.209447236180905e-06, + "loss": 0.0073, + "step": 18325 + }, + { + "epoch": 10.26, + "grad_norm": 1.6300586462020874, + "learning_rate": 8.206934673366835e-06, + "loss": 0.0069, + "step": 18350 + }, + { + "epoch": 10.28, + "grad_norm": 0.9293125867843628, + "learning_rate": 8.204422110552764e-06, + "loss": 0.0079, + "step": 18375 + }, + { + "epoch": 10.29, + "grad_norm": 1.9881733655929565, + "learning_rate": 8.201909547738695e-06, + "loss": 0.0071, + "step": 18400 + }, + { + "epoch": 10.3, + "grad_norm": 1.0632404088974, + "learning_rate": 8.199396984924623e-06, + "loss": 0.0068, + "step": 18425 + }, + { + "epoch": 10.32, + "grad_norm": 1.6890215873718262, + "learning_rate": 8.196884422110554e-06, + "loss": 0.007, + "step": 18450 + }, + { + "epoch": 10.33, + "grad_norm": 1.7216650247573853, + "learning_rate": 8.194371859296483e-06, + "loss": 0.0069, + "step": 18475 + }, + { + "epoch": 10.35, + "grad_norm": 1.3629086017608643, + "learning_rate": 8.191859296482412e-06, + "loss": 0.0076, + "step": 18500 + }, + { + "epoch": 10.36, + "grad_norm": 1.6645286083221436, + "learning_rate": 8.189346733668342e-06, + "loss": 0.0081, + "step": 18525 + }, + { + "epoch": 10.37, + "grad_norm": 1.1869726181030273, + "learning_rate": 8.186834170854273e-06, + "loss": 0.0078, + "step": 18550 + }, + { + "epoch": 10.39, + "grad_norm": 1.2777962684631348, + "learning_rate": 8.184321608040202e-06, + "loss": 0.0073, + "step": 18575 + }, + { + "epoch": 10.4, + "grad_norm": 1.689989686012268, + "learning_rate": 8.181809045226131e-06, + "loss": 0.0074, + "step": 18600 + }, + { + "epoch": 10.42, + "grad_norm": 1.9431265592575073, + "learning_rate": 8.17929648241206e-06, + "loss": 0.0074, + "step": 18625 + }, + { + "epoch": 10.43, + "grad_norm": 1.4865610599517822, + "learning_rate": 8.17678391959799e-06, + "loss": 0.007, + "step": 18650 + }, + { + "epoch": 10.44, + "grad_norm": 1.4271529912948608, + "learning_rate": 8.174271356783921e-06, + "loss": 0.0077, + "step": 18675 + }, + { + "epoch": 10.46, + "grad_norm": 1.168408751487732, + "learning_rate": 8.171758793969849e-06, + "loss": 0.0073, + "step": 18700 + }, + { + "epoch": 10.47, + "grad_norm": 1.581976056098938, + "learning_rate": 8.16924623115578e-06, + "loss": 0.0079, + "step": 18725 + }, + { + "epoch": 10.49, + "grad_norm": 1.1006019115447998, + "learning_rate": 8.166733668341709e-06, + "loss": 0.0071, + "step": 18750 + }, + { + "epoch": 10.5, + "grad_norm": 1.4304567575454712, + "learning_rate": 8.164221105527638e-06, + "loss": 0.0075, + "step": 18775 + }, + { + "epoch": 10.51, + "grad_norm": 1.6005663871765137, + "learning_rate": 8.16170854271357e-06, + "loss": 0.007, + "step": 18800 + }, + { + "epoch": 10.53, + "grad_norm": 1.4893625974655151, + "learning_rate": 8.159195979899499e-06, + "loss": 0.0066, + "step": 18825 + }, + { + "epoch": 10.54, + "grad_norm": 1.077237844467163, + "learning_rate": 8.156683417085428e-06, + "loss": 0.007, + "step": 18850 + }, + { + "epoch": 10.56, + "grad_norm": 1.4600884914398193, + "learning_rate": 8.154170854271357e-06, + "loss": 0.0078, + "step": 18875 + }, + { + "epoch": 10.57, + "grad_norm": 1.8610810041427612, + "learning_rate": 8.151658291457287e-06, + "loss": 0.0083, + "step": 18900 + }, + { + "epoch": 10.58, + "grad_norm": 1.4527398347854614, + "learning_rate": 8.149145728643216e-06, + "loss": 0.0079, + "step": 18925 + }, + { + "epoch": 10.6, + "grad_norm": 1.2474983930587769, + "learning_rate": 8.146633165829147e-06, + "loss": 0.0075, + "step": 18950 + }, + { + "epoch": 10.61, + "grad_norm": 1.0266172885894775, + "learning_rate": 8.144120603015076e-06, + "loss": 0.0071, + "step": 18975 + }, + { + "epoch": 10.63, + "grad_norm": 1.6183582544326782, + "learning_rate": 8.141608040201006e-06, + "loss": 0.0073, + "step": 19000 + }, + { + "epoch": 10.63, + "eval_loss": 0.13868772983551025, + "eval_runtime": 1263.8628, + "eval_samples_per_second": 1.187, + "eval_steps_per_second": 1.187, + "eval_wer": 25.90492193516768, + "step": 19000 + } + ], + "logging_steps": 25, + "max_steps": 100000, + "num_input_tokens_seen": 0, + "num_train_epochs": 56, + "save_steps": 1000, + "total_flos": 5.91367500988416e+19, + "train_batch_size": 48, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/whisper-base/bengali/checkpoint-19000/training_args.bin b/checkpoints/whisper-base/bengali/checkpoint-19000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..cfa46fee211fb3f2a81deba86889b6f434f18474 --- /dev/null +++ b/checkpoints/whisper-base/bengali/checkpoint-19000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4110af4ac47c3c8bf6eeb4d8a74414ace5336a8c2554d5bc189daa3727d40c6 +size 4667 diff --git a/checkpoints/whisper-base/bhojpuri/checkpoint-16000/config.json b/checkpoints/whisper-base/bhojpuri/checkpoint-16000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7861ebfd3dce452d730fc7657aa35befb4dcfe2d --- /dev/null +++ b/checkpoints/whisper-base/bhojpuri/checkpoint-16000/config.json @@ -0,0 +1,154 @@ +{ + "_name_or_path": "openai/whisper-base", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 512, + "decoder_attention_heads": 8, + "decoder_ffn_dim": 2048, + "decoder_layerdrop": 0.0, + "decoder_layers": 6, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 8, + "encoder_ffn_dim": 2048, + "encoder_layerdrop": 0.0, + "encoder_layers": 6, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50276 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 6, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "torch_dtype": "float32", + "transformers_version": "4.39.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoints/whisper-base/bhojpuri/checkpoint-16000/generation_config.json b/checkpoints/whisper-base/bhojpuri/checkpoint-16000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a12c5b82ac1e48f22fa79bdad1595064164bc2ab --- /dev/null +++ b/checkpoints/whisper-base/bhojpuri/checkpoint-16000/generation_config.json @@ -0,0 +1,256 @@ +{ + "alignment_heads": [ + [ + 3, + 1 + ], + [ + 4, + 2 + ], + [ + 4, + 3 + ], + [ + 4, + 7 + ], + [ + 5, + 1 + ], + [ + 5, + 2 + ], + [ + 5, + 4 + ], + [ + 5, + 6 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.39.2" +} diff --git a/checkpoints/whisper-base/bhojpuri/checkpoint-16000/model.safetensors b/checkpoints/whisper-base/bhojpuri/checkpoint-16000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fd81e0127e74735c1c9cbaa9b82413c811a794ba --- /dev/null +++ b/checkpoints/whisper-base/bhojpuri/checkpoint-16000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eed2806ae146ea386e8ee93df18db853c1889bd2d700850ea0d3781a7d1eecee +size 290403936 diff --git a/checkpoints/whisper-base/bhojpuri/checkpoint-16000/optimizer.pt b/checkpoints/whisper-base/bhojpuri/checkpoint-16000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..fe9753151052f26ebfffa5c50e9d0501946f8bc5 --- /dev/null +++ b/checkpoints/whisper-base/bhojpuri/checkpoint-16000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ca9cd06348298b2385a9853047a059f1c85386cb9d5d617d1d459551ef7c87d +size 574811077 diff --git a/checkpoints/whisper-base/bhojpuri/checkpoint-16000/preprocessor_config.json b/checkpoints/whisper-base/bhojpuri/checkpoint-16000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoints/whisper-base/bhojpuri/checkpoint-16000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoints/whisper-base/bhojpuri/checkpoint-16000/rng_state.pth b/checkpoints/whisper-base/bhojpuri/checkpoint-16000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0225d9bd1bc6eace0f3df2c3ca48605b69d4e070 --- /dev/null +++ b/checkpoints/whisper-base/bhojpuri/checkpoint-16000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e266b16286716d200874100e66b9100a543314b0e018fc7764d46bc6e2f2f367 +size 14575 diff --git a/checkpoints/whisper-base/bhojpuri/checkpoint-16000/scheduler.pt b/checkpoints/whisper-base/bhojpuri/checkpoint-16000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5039d5af98eddfd11cb2bca3ea49d715c0d52151 --- /dev/null +++ b/checkpoints/whisper-base/bhojpuri/checkpoint-16000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15016d00c7ecdeae8cbd8952e30aa086bcb1f3f4b8ded36b84d69bcd6be85562 +size 627 diff --git a/checkpoints/whisper-base/bhojpuri/checkpoint-16000/trainer_state.json b/checkpoints/whisper-base/bhojpuri/checkpoint-16000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b56193829f89595df5c43efc500d66c4cb35034a --- /dev/null +++ b/checkpoints/whisper-base/bhojpuri/checkpoint-16000/trainer_state.json @@ -0,0 +1,4645 @@ +{ + "best_metric": 18.901392332633986, + "best_model_checkpoint": "results/whisper-base/bhojpuri/checkpoint-6000", + "epoch": 8.060453400503778, + "eval_steps": 1000, + "global_step": 16000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "grad_norm": 25.6799373626709, + "learning_rate": 4.6000000000000004e-07, + "loss": 2.2113, + "step": 25 + }, + { + "epoch": 0.03, + "grad_norm": 13.445375442504883, + "learning_rate": 9.600000000000001e-07, + "loss": 1.8948, + "step": 50 + }, + { + "epoch": 0.04, + "grad_norm": 8.735254287719727, + "learning_rate": 1.46e-06, + "loss": 1.4212, + "step": 75 + }, + { + "epoch": 0.05, + "grad_norm": 6.870646953582764, + "learning_rate": 1.9600000000000003e-06, + "loss": 1.1262, + "step": 100 + }, + { + "epoch": 0.06, + "grad_norm": 5.709643363952637, + "learning_rate": 2.46e-06, + "loss": 0.8991, + "step": 125 + }, + { + "epoch": 0.08, + "grad_norm": 4.728764057159424, + "learning_rate": 2.96e-06, + "loss": 0.7792, + "step": 150 + }, + { + "epoch": 0.09, + "grad_norm": 4.8977789878845215, + "learning_rate": 3.46e-06, + "loss": 0.7132, + "step": 175 + }, + { + "epoch": 0.1, + "grad_norm": 4.852188587188721, + "learning_rate": 3.96e-06, + "loss": 0.6345, + "step": 200 + }, + { + "epoch": 0.11, + "grad_norm": 5.193722724914551, + "learning_rate": 4.4600000000000005e-06, + "loss": 0.5963, + "step": 225 + }, + { + "epoch": 0.13, + "grad_norm": 4.424361705780029, + "learning_rate": 4.960000000000001e-06, + "loss": 0.5545, + "step": 250 + }, + { + "epoch": 0.14, + "grad_norm": 4.56205415725708, + "learning_rate": 5.460000000000001e-06, + "loss": 0.5234, + "step": 275 + }, + { + "epoch": 0.15, + "grad_norm": 4.768870830535889, + "learning_rate": 5.9600000000000005e-06, + "loss": 0.4985, + "step": 300 + }, + { + "epoch": 0.16, + "grad_norm": 4.571390151977539, + "learning_rate": 6.460000000000001e-06, + "loss": 0.4866, + "step": 325 + }, + { + "epoch": 0.18, + "grad_norm": 4.5495924949646, + "learning_rate": 6.96e-06, + "loss": 0.4559, + "step": 350 + }, + { + "epoch": 0.19, + "grad_norm": 4.467849254608154, + "learning_rate": 7.4600000000000006e-06, + "loss": 0.4482, + "step": 375 + }, + { + "epoch": 0.2, + "grad_norm": 4.289758205413818, + "learning_rate": 7.960000000000002e-06, + "loss": 0.4187, + "step": 400 + }, + { + "epoch": 0.21, + "grad_norm": 3.9595603942871094, + "learning_rate": 8.46e-06, + "loss": 0.4185, + "step": 425 + }, + { + "epoch": 0.23, + "grad_norm": 4.080057621002197, + "learning_rate": 8.96e-06, + "loss": 0.3968, + "step": 450 + }, + { + "epoch": 0.24, + "grad_norm": 4.245291233062744, + "learning_rate": 9.460000000000001e-06, + "loss": 0.39, + "step": 475 + }, + { + "epoch": 0.25, + "grad_norm": 4.128876209259033, + "learning_rate": 9.960000000000001e-06, + "loss": 0.3785, + "step": 500 + }, + { + "epoch": 0.26, + "grad_norm": 3.8182666301727295, + "learning_rate": 9.997688442211056e-06, + "loss": 0.3692, + "step": 525 + }, + { + "epoch": 0.28, + "grad_norm": 4.988592624664307, + "learning_rate": 9.995175879396986e-06, + "loss": 0.3652, + "step": 550 + }, + { + "epoch": 0.29, + "grad_norm": 4.217241287231445, + "learning_rate": 9.992663316582915e-06, + "loss": 0.3571, + "step": 575 + }, + { + "epoch": 0.3, + "grad_norm": 4.806396007537842, + "learning_rate": 9.990150753768844e-06, + "loss": 0.3372, + "step": 600 + }, + { + "epoch": 0.31, + "grad_norm": 4.173343181610107, + "learning_rate": 9.987638190954775e-06, + "loss": 0.3336, + "step": 625 + }, + { + "epoch": 0.33, + "grad_norm": 3.7830517292022705, + "learning_rate": 9.985125628140705e-06, + "loss": 0.3245, + "step": 650 + }, + { + "epoch": 0.34, + "grad_norm": 4.541599750518799, + "learning_rate": 9.982613065326634e-06, + "loss": 0.3283, + "step": 675 + }, + { + "epoch": 0.35, + "grad_norm": 3.847547769546509, + "learning_rate": 9.980100502512565e-06, + "loss": 0.3127, + "step": 700 + }, + { + "epoch": 0.37, + "grad_norm": 3.906520366668701, + "learning_rate": 9.977587939698493e-06, + "loss": 0.3228, + "step": 725 + }, + { + "epoch": 0.38, + "grad_norm": 4.33050537109375, + "learning_rate": 9.975075376884424e-06, + "loss": 0.3231, + "step": 750 + }, + { + "epoch": 0.39, + "grad_norm": 4.281782627105713, + "learning_rate": 9.972562814070353e-06, + "loss": 0.3025, + "step": 775 + }, + { + "epoch": 0.4, + "grad_norm": 3.4937245845794678, + "learning_rate": 9.970050251256282e-06, + "loss": 0.3034, + "step": 800 + }, + { + "epoch": 0.42, + "grad_norm": 3.5297112464904785, + "learning_rate": 9.967537688442212e-06, + "loss": 0.2987, + "step": 825 + }, + { + "epoch": 0.43, + "grad_norm": 3.548499345779419, + "learning_rate": 9.965025125628141e-06, + "loss": 0.298, + "step": 850 + }, + { + "epoch": 0.44, + "grad_norm": 3.8480348587036133, + "learning_rate": 9.96251256281407e-06, + "loss": 0.3055, + "step": 875 + }, + { + "epoch": 0.45, + "grad_norm": 4.239658832550049, + "learning_rate": 9.960000000000001e-06, + "loss": 0.2922, + "step": 900 + }, + { + "epoch": 0.47, + "grad_norm": 4.1223530769348145, + "learning_rate": 9.95748743718593e-06, + "loss": 0.2916, + "step": 925 + }, + { + "epoch": 0.48, + "grad_norm": 4.06390380859375, + "learning_rate": 9.95497487437186e-06, + "loss": 0.2777, + "step": 950 + }, + { + "epoch": 0.49, + "grad_norm": 3.471312999725342, + "learning_rate": 9.952462311557791e-06, + "loss": 0.2815, + "step": 975 + }, + { + "epoch": 0.5, + "grad_norm": 3.8829519748687744, + "learning_rate": 9.949949748743718e-06, + "loss": 0.2738, + "step": 1000 + }, + { + "epoch": 0.5, + "eval_loss": 0.1950405240058899, + "eval_runtime": 705.6661, + "eval_samples_per_second": 2.126, + "eval_steps_per_second": 2.126, + "eval_wer": 27.636849132176234, + "step": 1000 + }, + { + "epoch": 0.52, + "grad_norm": 3.4115381240844727, + "learning_rate": 9.94743718592965e-06, + "loss": 0.2787, + "step": 1025 + }, + { + "epoch": 0.53, + "grad_norm": 3.591128349304199, + "learning_rate": 9.944924623115579e-06, + "loss": 0.2742, + "step": 1050 + }, + { + "epoch": 0.54, + "grad_norm": 3.013019323348999, + "learning_rate": 9.942412060301508e-06, + "loss": 0.262, + "step": 1075 + }, + { + "epoch": 0.55, + "grad_norm": 3.8131942749023438, + "learning_rate": 9.93989949748744e-06, + "loss": 0.2772, + "step": 1100 + }, + { + "epoch": 0.57, + "grad_norm": 3.045408248901367, + "learning_rate": 9.937386934673367e-06, + "loss": 0.254, + "step": 1125 + }, + { + "epoch": 0.58, + "grad_norm": 3.206812858581543, + "learning_rate": 9.934874371859298e-06, + "loss": 0.2566, + "step": 1150 + }, + { + "epoch": 0.59, + "grad_norm": 3.6248373985290527, + "learning_rate": 9.932361809045227e-06, + "loss": 0.266, + "step": 1175 + }, + { + "epoch": 0.6, + "grad_norm": 3.1897921562194824, + "learning_rate": 9.929849246231156e-06, + "loss": 0.2535, + "step": 1200 + }, + { + "epoch": 0.62, + "grad_norm": 3.373941659927368, + "learning_rate": 9.927336683417086e-06, + "loss": 0.2514, + "step": 1225 + }, + { + "epoch": 0.63, + "grad_norm": 3.179002046585083, + "learning_rate": 9.924824120603017e-06, + "loss": 0.2547, + "step": 1250 + }, + { + "epoch": 0.64, + "grad_norm": 3.5799050331115723, + "learning_rate": 9.922311557788944e-06, + "loss": 0.2516, + "step": 1275 + }, + { + "epoch": 0.65, + "grad_norm": 3.636662721633911, + "learning_rate": 9.919798994974875e-06, + "loss": 0.2531, + "step": 1300 + }, + { + "epoch": 0.67, + "grad_norm": 3.8128602504730225, + "learning_rate": 9.917286432160805e-06, + "loss": 0.2449, + "step": 1325 + }, + { + "epoch": 0.68, + "grad_norm": 3.5023345947265625, + "learning_rate": 9.914773869346734e-06, + "loss": 0.2473, + "step": 1350 + }, + { + "epoch": 0.69, + "grad_norm": 3.846860647201538, + "learning_rate": 9.912261306532665e-06, + "loss": 0.2438, + "step": 1375 + }, + { + "epoch": 0.71, + "grad_norm": 3.5465805530548096, + "learning_rate": 9.909748743718593e-06, + "loss": 0.2496, + "step": 1400 + }, + { + "epoch": 0.72, + "grad_norm": 3.1322274208068848, + "learning_rate": 9.907236180904524e-06, + "loss": 0.2466, + "step": 1425 + }, + { + "epoch": 0.73, + "grad_norm": 3.314667224884033, + "learning_rate": 9.904723618090453e-06, + "loss": 0.2466, + "step": 1450 + }, + { + "epoch": 0.74, + "grad_norm": 3.361319065093994, + "learning_rate": 9.902211055276382e-06, + "loss": 0.2347, + "step": 1475 + }, + { + "epoch": 0.76, + "grad_norm": 3.4589295387268066, + "learning_rate": 9.899698492462312e-06, + "loss": 0.2379, + "step": 1500 + }, + { + "epoch": 0.77, + "grad_norm": 3.114238977432251, + "learning_rate": 9.897185929648243e-06, + "loss": 0.2377, + "step": 1525 + }, + { + "epoch": 0.78, + "grad_norm": 3.52107572555542, + "learning_rate": 9.894673366834172e-06, + "loss": 0.2268, + "step": 1550 + }, + { + "epoch": 0.79, + "grad_norm": 3.0359439849853516, + "learning_rate": 9.892160804020101e-06, + "loss": 0.2327, + "step": 1575 + }, + { + "epoch": 0.81, + "grad_norm": 3.3463919162750244, + "learning_rate": 9.88964824120603e-06, + "loss": 0.226, + "step": 1600 + }, + { + "epoch": 0.82, + "grad_norm": 3.0792479515075684, + "learning_rate": 9.88713567839196e-06, + "loss": 0.2197, + "step": 1625 + }, + { + "epoch": 0.83, + "grad_norm": 3.881321430206299, + "learning_rate": 9.884623115577891e-06, + "loss": 0.2316, + "step": 1650 + }, + { + "epoch": 0.84, + "grad_norm": 3.745701313018799, + "learning_rate": 9.882110552763819e-06, + "loss": 0.2294, + "step": 1675 + }, + { + "epoch": 0.86, + "grad_norm": 3.260382890701294, + "learning_rate": 9.87959798994975e-06, + "loss": 0.2232, + "step": 1700 + }, + { + "epoch": 0.87, + "grad_norm": 3.0946555137634277, + "learning_rate": 9.877085427135679e-06, + "loss": 0.231, + "step": 1725 + }, + { + "epoch": 0.88, + "grad_norm": 4.12130880355835, + "learning_rate": 9.874572864321608e-06, + "loss": 0.2231, + "step": 1750 + }, + { + "epoch": 0.89, + "grad_norm": 3.871645212173462, + "learning_rate": 9.87206030150754e-06, + "loss": 0.2216, + "step": 1775 + }, + { + "epoch": 0.91, + "grad_norm": 3.2469520568847656, + "learning_rate": 9.869547738693469e-06, + "loss": 0.2214, + "step": 1800 + }, + { + "epoch": 0.92, + "grad_norm": 2.991210460662842, + "learning_rate": 9.867035175879398e-06, + "loss": 0.2206, + "step": 1825 + }, + { + "epoch": 0.93, + "grad_norm": 3.1080331802368164, + "learning_rate": 9.864522613065327e-06, + "loss": 0.2131, + "step": 1850 + }, + { + "epoch": 0.94, + "grad_norm": 3.048530340194702, + "learning_rate": 9.862010050251257e-06, + "loss": 0.2211, + "step": 1875 + }, + { + "epoch": 0.96, + "grad_norm": 3.0590028762817383, + "learning_rate": 9.859497487437186e-06, + "loss": 0.2093, + "step": 1900 + }, + { + "epoch": 0.97, + "grad_norm": 3.4788081645965576, + "learning_rate": 9.856984924623117e-06, + "loss": 0.2225, + "step": 1925 + }, + { + "epoch": 0.98, + "grad_norm": 3.4214766025543213, + "learning_rate": 9.854472361809046e-06, + "loss": 0.2169, + "step": 1950 + }, + { + "epoch": 0.99, + "grad_norm": 3.4709339141845703, + "learning_rate": 9.851959798994976e-06, + "loss": 0.2145, + "step": 1975 + }, + { + "epoch": 1.01, + "grad_norm": 3.310551404953003, + "learning_rate": 9.849447236180905e-06, + "loss": 0.2067, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.15650032460689545, + "eval_runtime": 682.1005, + "eval_samples_per_second": 2.199, + "eval_steps_per_second": 2.199, + "eval_wer": 22.557060207260474, + "step": 2000 + }, + { + "epoch": 1.02, + "grad_norm": 2.916468381881714, + "learning_rate": 9.846934673366834e-06, + "loss": 0.1858, + "step": 2025 + }, + { + "epoch": 1.03, + "grad_norm": 3.4199585914611816, + "learning_rate": 9.844422110552765e-06, + "loss": 0.1882, + "step": 2050 + }, + { + "epoch": 1.05, + "grad_norm": 3.099595308303833, + "learning_rate": 9.841909547738695e-06, + "loss": 0.1857, + "step": 2075 + }, + { + "epoch": 1.06, + "grad_norm": 3.0986616611480713, + "learning_rate": 9.839396984924624e-06, + "loss": 0.1894, + "step": 2100 + }, + { + "epoch": 1.07, + "grad_norm": 3.339907646179199, + "learning_rate": 9.836884422110553e-06, + "loss": 0.1895, + "step": 2125 + }, + { + "epoch": 1.08, + "grad_norm": 2.88399076461792, + "learning_rate": 9.834371859296483e-06, + "loss": 0.193, + "step": 2150 + }, + { + "epoch": 1.1, + "grad_norm": 2.784425973892212, + "learning_rate": 9.831859296482414e-06, + "loss": 0.18, + "step": 2175 + }, + { + "epoch": 1.11, + "grad_norm": 3.152578830718994, + "learning_rate": 9.829346733668343e-06, + "loss": 0.189, + "step": 2200 + }, + { + "epoch": 1.12, + "grad_norm": 2.8035740852355957, + "learning_rate": 9.826834170854272e-06, + "loss": 0.1882, + "step": 2225 + }, + { + "epoch": 1.13, + "grad_norm": 2.949218988418579, + "learning_rate": 9.824321608040202e-06, + "loss": 0.1918, + "step": 2250 + }, + { + "epoch": 1.15, + "grad_norm": 3.3745362758636475, + "learning_rate": 9.821809045226131e-06, + "loss": 0.19, + "step": 2275 + }, + { + "epoch": 1.16, + "grad_norm": 3.1311423778533936, + "learning_rate": 9.81929648241206e-06, + "loss": 0.1872, + "step": 2300 + }, + { + "epoch": 1.17, + "grad_norm": 2.693122625350952, + "learning_rate": 9.816783919597991e-06, + "loss": 0.1901, + "step": 2325 + }, + { + "epoch": 1.18, + "grad_norm": 3.5980801582336426, + "learning_rate": 9.81427135678392e-06, + "loss": 0.1889, + "step": 2350 + }, + { + "epoch": 1.2, + "grad_norm": 2.9633264541625977, + "learning_rate": 9.81175879396985e-06, + "loss": 0.1924, + "step": 2375 + }, + { + "epoch": 1.21, + "grad_norm": 3.360875368118286, + "learning_rate": 9.809246231155781e-06, + "loss": 0.186, + "step": 2400 + }, + { + "epoch": 1.22, + "grad_norm": 2.904611825942993, + "learning_rate": 9.806733668341709e-06, + "loss": 0.1814, + "step": 2425 + }, + { + "epoch": 1.23, + "grad_norm": 3.2357099056243896, + "learning_rate": 9.80422110552764e-06, + "loss": 0.1835, + "step": 2450 + }, + { + "epoch": 1.25, + "grad_norm": 3.0824134349823, + "learning_rate": 9.801708542713569e-06, + "loss": 0.1804, + "step": 2475 + }, + { + "epoch": 1.26, + "grad_norm": 2.9033923149108887, + "learning_rate": 9.799195979899498e-06, + "loss": 0.1816, + "step": 2500 + }, + { + "epoch": 1.27, + "grad_norm": 2.9479734897613525, + "learning_rate": 9.796683417085428e-06, + "loss": 0.1799, + "step": 2525 + }, + { + "epoch": 1.28, + "grad_norm": 3.165250778198242, + "learning_rate": 9.794170854271357e-06, + "loss": 0.1828, + "step": 2550 + }, + { + "epoch": 1.3, + "grad_norm": 2.560901403427124, + "learning_rate": 9.791658291457288e-06, + "loss": 0.1733, + "step": 2575 + }, + { + "epoch": 1.31, + "grad_norm": 2.653095006942749, + "learning_rate": 9.789145728643217e-06, + "loss": 0.1788, + "step": 2600 + }, + { + "epoch": 1.32, + "grad_norm": 3.0283360481262207, + "learning_rate": 9.786633165829147e-06, + "loss": 0.1738, + "step": 2625 + }, + { + "epoch": 1.34, + "grad_norm": 3.1723458766937256, + "learning_rate": 9.784120603015076e-06, + "loss": 0.1829, + "step": 2650 + }, + { + "epoch": 1.35, + "grad_norm": 3.0975966453552246, + "learning_rate": 9.781608040201007e-06, + "loss": 0.1698, + "step": 2675 + }, + { + "epoch": 1.36, + "grad_norm": 3.258195161819458, + "learning_rate": 9.779095477386934e-06, + "loss": 0.1778, + "step": 2700 + }, + { + "epoch": 1.37, + "grad_norm": 2.9353551864624023, + "learning_rate": 9.776582914572866e-06, + "loss": 0.1733, + "step": 2725 + }, + { + "epoch": 1.39, + "grad_norm": 2.7637884616851807, + "learning_rate": 9.774070351758795e-06, + "loss": 0.1692, + "step": 2750 + }, + { + "epoch": 1.4, + "grad_norm": 2.9036009311676025, + "learning_rate": 9.771557788944724e-06, + "loss": 0.187, + "step": 2775 + }, + { + "epoch": 1.41, + "grad_norm": 2.4787402153015137, + "learning_rate": 9.769045226130655e-06, + "loss": 0.1704, + "step": 2800 + }, + { + "epoch": 1.42, + "grad_norm": 3.491973876953125, + "learning_rate": 9.766532663316583e-06, + "loss": 0.1728, + "step": 2825 + }, + { + "epoch": 1.44, + "grad_norm": 2.917079210281372, + "learning_rate": 9.764020100502514e-06, + "loss": 0.182, + "step": 2850 + }, + { + "epoch": 1.45, + "grad_norm": 2.874762535095215, + "learning_rate": 9.761507537688443e-06, + "loss": 0.1734, + "step": 2875 + }, + { + "epoch": 1.46, + "grad_norm": 3.3363747596740723, + "learning_rate": 9.758994974874372e-06, + "loss": 0.1703, + "step": 2900 + }, + { + "epoch": 1.47, + "grad_norm": 2.8660428524017334, + "learning_rate": 9.756482412060302e-06, + "loss": 0.1717, + "step": 2925 + }, + { + "epoch": 1.49, + "grad_norm": 2.9264183044433594, + "learning_rate": 9.753969849246233e-06, + "loss": 0.1741, + "step": 2950 + }, + { + "epoch": 1.5, + "grad_norm": 3.1803531646728516, + "learning_rate": 9.75145728643216e-06, + "loss": 0.1727, + "step": 2975 + }, + { + "epoch": 1.51, + "grad_norm": 2.9490115642547607, + "learning_rate": 9.748944723618091e-06, + "loss": 0.1657, + "step": 3000 + }, + { + "epoch": 1.51, + "eval_loss": 0.14186391234397888, + "eval_runtime": 821.3737, + "eval_samples_per_second": 1.826, + "eval_steps_per_second": 1.826, + "eval_wer": 20.68154364549558, + "step": 3000 + }, + { + "epoch": 1.52, + "grad_norm": 2.535720109939575, + "learning_rate": 9.74643216080402e-06, + "loss": 0.1706, + "step": 3025 + }, + { + "epoch": 1.54, + "grad_norm": 2.959380865097046, + "learning_rate": 9.74391959798995e-06, + "loss": 0.1726, + "step": 3050 + }, + { + "epoch": 1.55, + "grad_norm": 3.224212646484375, + "learning_rate": 9.741407035175881e-06, + "loss": 0.173, + "step": 3075 + }, + { + "epoch": 1.56, + "grad_norm": 3.0631802082061768, + "learning_rate": 9.738894472361809e-06, + "loss": 0.1724, + "step": 3100 + }, + { + "epoch": 1.57, + "grad_norm": 2.9977240562438965, + "learning_rate": 9.73638190954774e-06, + "loss": 0.1726, + "step": 3125 + }, + { + "epoch": 1.59, + "grad_norm": 3.5702133178710938, + "learning_rate": 9.733869346733669e-06, + "loss": 0.165, + "step": 3150 + }, + { + "epoch": 1.6, + "grad_norm": 2.502072334289551, + "learning_rate": 9.731356783919598e-06, + "loss": 0.1714, + "step": 3175 + }, + { + "epoch": 1.61, + "grad_norm": 3.091139554977417, + "learning_rate": 9.72884422110553e-06, + "loss": 0.1679, + "step": 3200 + }, + { + "epoch": 1.62, + "grad_norm": 3.483013391494751, + "learning_rate": 9.726331658291459e-06, + "loss": 0.1681, + "step": 3225 + }, + { + "epoch": 1.64, + "grad_norm": 3.1797478199005127, + "learning_rate": 9.723819095477388e-06, + "loss": 0.1694, + "step": 3250 + }, + { + "epoch": 1.65, + "grad_norm": 3.257357597351074, + "learning_rate": 9.721306532663317e-06, + "loss": 0.1748, + "step": 3275 + }, + { + "epoch": 1.66, + "grad_norm": 2.9625837802886963, + "learning_rate": 9.718793969849247e-06, + "loss": 0.1665, + "step": 3300 + }, + { + "epoch": 1.68, + "grad_norm": 3.1587677001953125, + "learning_rate": 9.716281407035176e-06, + "loss": 0.1683, + "step": 3325 + }, + { + "epoch": 1.69, + "grad_norm": 3.124415874481201, + "learning_rate": 9.713768844221107e-06, + "loss": 0.165, + "step": 3350 + }, + { + "epoch": 1.7, + "grad_norm": 3.0871171951293945, + "learning_rate": 9.711256281407035e-06, + "loss": 0.1734, + "step": 3375 + }, + { + "epoch": 1.71, + "grad_norm": 2.8239259719848633, + "learning_rate": 9.708743718592966e-06, + "loss": 0.1716, + "step": 3400 + }, + { + "epoch": 1.73, + "grad_norm": 2.9695584774017334, + "learning_rate": 9.706231155778895e-06, + "loss": 0.1699, + "step": 3425 + }, + { + "epoch": 1.74, + "grad_norm": 3.134615421295166, + "learning_rate": 9.703718592964824e-06, + "loss": 0.1664, + "step": 3450 + }, + { + "epoch": 1.75, + "grad_norm": 3.0564093589782715, + "learning_rate": 9.701206030150755e-06, + "loss": 0.1547, + "step": 3475 + }, + { + "epoch": 1.76, + "grad_norm": 2.7680304050445557, + "learning_rate": 9.698693467336685e-06, + "loss": 0.1597, + "step": 3500 + }, + { + "epoch": 1.78, + "grad_norm": 3.2189650535583496, + "learning_rate": 9.696180904522614e-06, + "loss": 0.1692, + "step": 3525 + }, + { + "epoch": 1.79, + "grad_norm": 3.0747690200805664, + "learning_rate": 9.693668341708543e-06, + "loss": 0.1678, + "step": 3550 + }, + { + "epoch": 1.8, + "grad_norm": 3.010129690170288, + "learning_rate": 9.691155778894473e-06, + "loss": 0.1644, + "step": 3575 + }, + { + "epoch": 1.81, + "grad_norm": 2.7910287380218506, + "learning_rate": 9.688643216080402e-06, + "loss": 0.163, + "step": 3600 + }, + { + "epoch": 1.83, + "grad_norm": 3.032376289367676, + "learning_rate": 9.686130653266333e-06, + "loss": 0.1621, + "step": 3625 + }, + { + "epoch": 1.84, + "grad_norm": 3.164289951324463, + "learning_rate": 9.683618090452262e-06, + "loss": 0.162, + "step": 3650 + }, + { + "epoch": 1.85, + "grad_norm": 2.870012044906616, + "learning_rate": 9.681105527638192e-06, + "loss": 0.1588, + "step": 3675 + }, + { + "epoch": 1.86, + "grad_norm": 2.7865922451019287, + "learning_rate": 9.678592964824121e-06, + "loss": 0.1671, + "step": 3700 + }, + { + "epoch": 1.88, + "grad_norm": 3.3145291805267334, + "learning_rate": 9.67608040201005e-06, + "loss": 0.161, + "step": 3725 + }, + { + "epoch": 1.89, + "grad_norm": 3.2861533164978027, + "learning_rate": 9.673567839195981e-06, + "loss": 0.1583, + "step": 3750 + }, + { + "epoch": 1.9, + "grad_norm": 3.101914644241333, + "learning_rate": 9.67105527638191e-06, + "loss": 0.1578, + "step": 3775 + }, + { + "epoch": 1.91, + "grad_norm": 2.5809364318847656, + "learning_rate": 9.66854271356784e-06, + "loss": 0.1606, + "step": 3800 + }, + { + "epoch": 1.93, + "grad_norm": 3.262985944747925, + "learning_rate": 9.666030150753771e-06, + "loss": 0.1577, + "step": 3825 + }, + { + "epoch": 1.94, + "grad_norm": 2.980534076690674, + "learning_rate": 9.663517587939699e-06, + "loss": 0.1611, + "step": 3850 + }, + { + "epoch": 1.95, + "grad_norm": 2.9390132427215576, + "learning_rate": 9.66100502512563e-06, + "loss": 0.1538, + "step": 3875 + }, + { + "epoch": 1.96, + "grad_norm": 2.8582780361175537, + "learning_rate": 9.658492462311559e-06, + "loss": 0.1632, + "step": 3900 + }, + { + "epoch": 1.98, + "grad_norm": 3.3426969051361084, + "learning_rate": 9.655979899497488e-06, + "loss": 0.1609, + "step": 3925 + }, + { + "epoch": 1.99, + "grad_norm": 3.272286891937256, + "learning_rate": 9.653467336683418e-06, + "loss": 0.1597, + "step": 3950 + }, + { + "epoch": 2.0, + "grad_norm": 2.430133819580078, + "learning_rate": 9.650954773869347e-06, + "loss": 0.1514, + "step": 3975 + }, + { + "epoch": 2.02, + "grad_norm": 3.225931167602539, + "learning_rate": 9.648442211055276e-06, + "loss": 0.1367, + "step": 4000 + }, + { + "epoch": 2.02, + "eval_loss": 0.13564527034759521, + "eval_runtime": 676.257, + "eval_samples_per_second": 2.218, + "eval_steps_per_second": 2.218, + "eval_wer": 19.893190921228303, + "step": 4000 + }, + { + "epoch": 2.03, + "grad_norm": 2.686483860015869, + "learning_rate": 9.645929648241207e-06, + "loss": 0.1284, + "step": 4025 + }, + { + "epoch": 2.04, + "grad_norm": 2.5527188777923584, + "learning_rate": 9.643417085427137e-06, + "loss": 0.1352, + "step": 4050 + }, + { + "epoch": 2.05, + "grad_norm": 2.7886154651641846, + "learning_rate": 9.640904522613066e-06, + "loss": 0.1318, + "step": 4075 + }, + { + "epoch": 2.07, + "grad_norm": 2.470370292663574, + "learning_rate": 9.638391959798997e-06, + "loss": 0.1323, + "step": 4100 + }, + { + "epoch": 2.08, + "grad_norm": 2.691927909851074, + "learning_rate": 9.635879396984925e-06, + "loss": 0.1364, + "step": 4125 + }, + { + "epoch": 2.09, + "grad_norm": 2.7322866916656494, + "learning_rate": 9.633366834170856e-06, + "loss": 0.1407, + "step": 4150 + }, + { + "epoch": 2.1, + "grad_norm": 2.8455071449279785, + "learning_rate": 9.630854271356785e-06, + "loss": 0.1325, + "step": 4175 + }, + { + "epoch": 2.12, + "grad_norm": 3.082878589630127, + "learning_rate": 9.628341708542714e-06, + "loss": 0.1317, + "step": 4200 + }, + { + "epoch": 2.13, + "grad_norm": 2.9639124870300293, + "learning_rate": 9.625829145728644e-06, + "loss": 0.137, + "step": 4225 + }, + { + "epoch": 2.14, + "grad_norm": 2.671689987182617, + "learning_rate": 9.623316582914573e-06, + "loss": 0.1362, + "step": 4250 + }, + { + "epoch": 2.15, + "grad_norm": 2.939455986022949, + "learning_rate": 9.620804020100504e-06, + "loss": 0.1288, + "step": 4275 + }, + { + "epoch": 2.17, + "grad_norm": 2.9023149013519287, + "learning_rate": 9.618291457286433e-06, + "loss": 0.1307, + "step": 4300 + }, + { + "epoch": 2.18, + "grad_norm": 2.7500696182250977, + "learning_rate": 9.615778894472363e-06, + "loss": 0.1336, + "step": 4325 + }, + { + "epoch": 2.19, + "grad_norm": 2.803208589553833, + "learning_rate": 9.613266331658292e-06, + "loss": 0.1339, + "step": 4350 + }, + { + "epoch": 2.2, + "grad_norm": 2.5373857021331787, + "learning_rate": 9.610753768844223e-06, + "loss": 0.1367, + "step": 4375 + }, + { + "epoch": 2.22, + "grad_norm": 2.9384660720825195, + "learning_rate": 9.60824120603015e-06, + "loss": 0.1305, + "step": 4400 + }, + { + "epoch": 2.23, + "grad_norm": 2.850027084350586, + "learning_rate": 9.605728643216082e-06, + "loss": 0.1318, + "step": 4425 + }, + { + "epoch": 2.24, + "grad_norm": 2.6726455688476562, + "learning_rate": 9.60321608040201e-06, + "loss": 0.1316, + "step": 4450 + }, + { + "epoch": 2.25, + "grad_norm": 2.957423210144043, + "learning_rate": 9.60070351758794e-06, + "loss": 0.1299, + "step": 4475 + }, + { + "epoch": 2.27, + "grad_norm": 2.602191925048828, + "learning_rate": 9.598190954773871e-06, + "loss": 0.1307, + "step": 4500 + }, + { + "epoch": 2.28, + "grad_norm": 2.751736640930176, + "learning_rate": 9.595678391959799e-06, + "loss": 0.1329, + "step": 4525 + }, + { + "epoch": 2.29, + "grad_norm": 3.0037434101104736, + "learning_rate": 9.59316582914573e-06, + "loss": 0.1278, + "step": 4550 + }, + { + "epoch": 2.3, + "grad_norm": 3.0463547706604004, + "learning_rate": 9.59065326633166e-06, + "loss": 0.1224, + "step": 4575 + }, + { + "epoch": 2.32, + "grad_norm": 3.2266712188720703, + "learning_rate": 9.588140703517588e-06, + "loss": 0.1255, + "step": 4600 + }, + { + "epoch": 2.33, + "grad_norm": 2.8732638359069824, + "learning_rate": 9.585628140703518e-06, + "loss": 0.1266, + "step": 4625 + }, + { + "epoch": 2.34, + "grad_norm": 2.9471144676208496, + "learning_rate": 9.583115577889449e-06, + "loss": 0.1328, + "step": 4650 + }, + { + "epoch": 2.36, + "grad_norm": 2.89511775970459, + "learning_rate": 9.580603015075378e-06, + "loss": 0.1236, + "step": 4675 + }, + { + "epoch": 2.37, + "grad_norm": 3.1739041805267334, + "learning_rate": 9.578090452261307e-06, + "loss": 0.1296, + "step": 4700 + }, + { + "epoch": 2.38, + "grad_norm": 2.7334251403808594, + "learning_rate": 9.575577889447237e-06, + "loss": 0.1351, + "step": 4725 + }, + { + "epoch": 2.39, + "grad_norm": 2.680706024169922, + "learning_rate": 9.573065326633166e-06, + "loss": 0.1309, + "step": 4750 + }, + { + "epoch": 2.41, + "grad_norm": 3.2536261081695557, + "learning_rate": 9.570552763819097e-06, + "loss": 0.1298, + "step": 4775 + }, + { + "epoch": 2.42, + "grad_norm": 2.4739296436309814, + "learning_rate": 9.568040201005025e-06, + "loss": 0.1197, + "step": 4800 + }, + { + "epoch": 2.43, + "grad_norm": 3.236776113510132, + "learning_rate": 9.565527638190956e-06, + "loss": 0.1291, + "step": 4825 + }, + { + "epoch": 2.44, + "grad_norm": 2.824042320251465, + "learning_rate": 9.563015075376885e-06, + "loss": 0.1294, + "step": 4850 + }, + { + "epoch": 2.46, + "grad_norm": 2.406785011291504, + "learning_rate": 9.560502512562814e-06, + "loss": 0.1252, + "step": 4875 + }, + { + "epoch": 2.47, + "grad_norm": 2.861377477645874, + "learning_rate": 9.557989949748745e-06, + "loss": 0.1299, + "step": 4900 + }, + { + "epoch": 2.48, + "grad_norm": 2.5021440982818604, + "learning_rate": 9.555477386934675e-06, + "loss": 0.1272, + "step": 4925 + }, + { + "epoch": 2.49, + "grad_norm": 3.0189554691314697, + "learning_rate": 9.552964824120604e-06, + "loss": 0.1292, + "step": 4950 + }, + { + "epoch": 2.51, + "grad_norm": 3.1360931396484375, + "learning_rate": 9.550452261306533e-06, + "loss": 0.129, + "step": 4975 + }, + { + "epoch": 2.52, + "grad_norm": 2.903367757797241, + "learning_rate": 9.547939698492463e-06, + "loss": 0.1271, + "step": 5000 + }, + { + "epoch": 2.52, + "eval_loss": 0.13270609080791473, + "eval_runtime": 669.2672, + "eval_samples_per_second": 2.241, + "eval_steps_per_second": 2.241, + "eval_wer": 19.810541038845443, + "step": 5000 + }, + { + "epoch": 2.53, + "grad_norm": 2.9371252059936523, + "learning_rate": 9.545427135678392e-06, + "loss": 0.1303, + "step": 5025 + }, + { + "epoch": 2.54, + "grad_norm": 3.2883806228637695, + "learning_rate": 9.542914572864323e-06, + "loss": 0.1291, + "step": 5050 + }, + { + "epoch": 2.56, + "grad_norm": 2.6903512477874756, + "learning_rate": 9.540402010050252e-06, + "loss": 0.128, + "step": 5075 + }, + { + "epoch": 2.57, + "grad_norm": 3.278709650039673, + "learning_rate": 9.537889447236182e-06, + "loss": 0.1333, + "step": 5100 + }, + { + "epoch": 2.58, + "grad_norm": 2.8289425373077393, + "learning_rate": 9.535376884422111e-06, + "loss": 0.1248, + "step": 5125 + }, + { + "epoch": 2.59, + "grad_norm": 3.4989209175109863, + "learning_rate": 9.53286432160804e-06, + "loss": 0.1281, + "step": 5150 + }, + { + "epoch": 2.61, + "grad_norm": 2.6169960498809814, + "learning_rate": 9.530351758793971e-06, + "loss": 0.126, + "step": 5175 + }, + { + "epoch": 2.62, + "grad_norm": 2.828947067260742, + "learning_rate": 9.5278391959799e-06, + "loss": 0.1305, + "step": 5200 + }, + { + "epoch": 2.63, + "grad_norm": 3.1784260272979736, + "learning_rate": 9.52532663316583e-06, + "loss": 0.1273, + "step": 5225 + }, + { + "epoch": 2.64, + "grad_norm": 3.2241265773773193, + "learning_rate": 9.52281407035176e-06, + "loss": 0.1255, + "step": 5250 + }, + { + "epoch": 2.66, + "grad_norm": 3.028698682785034, + "learning_rate": 9.520301507537689e-06, + "loss": 0.1314, + "step": 5275 + }, + { + "epoch": 2.67, + "grad_norm": 3.175008773803711, + "learning_rate": 9.51778894472362e-06, + "loss": 0.1219, + "step": 5300 + }, + { + "epoch": 2.68, + "grad_norm": 3.167264938354492, + "learning_rate": 9.515276381909549e-06, + "loss": 0.1242, + "step": 5325 + }, + { + "epoch": 2.7, + "grad_norm": 2.69851016998291, + "learning_rate": 9.512763819095478e-06, + "loss": 0.1271, + "step": 5350 + }, + { + "epoch": 2.71, + "grad_norm": 2.8417932987213135, + "learning_rate": 9.510251256281408e-06, + "loss": 0.1213, + "step": 5375 + }, + { + "epoch": 2.72, + "grad_norm": 2.545701503753662, + "learning_rate": 9.507738693467337e-06, + "loss": 0.1298, + "step": 5400 + }, + { + "epoch": 2.73, + "grad_norm": 2.5989434719085693, + "learning_rate": 9.505226130653266e-06, + "loss": 0.1242, + "step": 5425 + }, + { + "epoch": 2.75, + "grad_norm": 3.072298288345337, + "learning_rate": 9.502713567839197e-06, + "loss": 0.1233, + "step": 5450 + }, + { + "epoch": 2.76, + "grad_norm": 2.847338914871216, + "learning_rate": 9.500201005025127e-06, + "loss": 0.1252, + "step": 5475 + }, + { + "epoch": 2.77, + "grad_norm": 3.0387043952941895, + "learning_rate": 9.497688442211056e-06, + "loss": 0.125, + "step": 5500 + }, + { + "epoch": 2.78, + "grad_norm": 2.878793239593506, + "learning_rate": 9.495175879396987e-06, + "loss": 0.1215, + "step": 5525 + }, + { + "epoch": 2.8, + "grad_norm": 2.850881576538086, + "learning_rate": 9.492663316582915e-06, + "loss": 0.1224, + "step": 5550 + }, + { + "epoch": 2.81, + "grad_norm": 3.003540277481079, + "learning_rate": 9.490150753768846e-06, + "loss": 0.121, + "step": 5575 + }, + { + "epoch": 2.82, + "grad_norm": 2.9830756187438965, + "learning_rate": 9.487638190954775e-06, + "loss": 0.1221, + "step": 5600 + }, + { + "epoch": 2.83, + "grad_norm": 2.689512252807617, + "learning_rate": 9.485125628140704e-06, + "loss": 0.1196, + "step": 5625 + }, + { + "epoch": 2.85, + "grad_norm": 2.7271206378936768, + "learning_rate": 9.482613065326634e-06, + "loss": 0.1231, + "step": 5650 + }, + { + "epoch": 2.86, + "grad_norm": 2.583075761795044, + "learning_rate": 9.480100502512563e-06, + "loss": 0.1295, + "step": 5675 + }, + { + "epoch": 2.87, + "grad_norm": 2.903951644897461, + "learning_rate": 9.477587939698494e-06, + "loss": 0.1252, + "step": 5700 + }, + { + "epoch": 2.88, + "grad_norm": 2.9422061443328857, + "learning_rate": 9.475075376884423e-06, + "loss": 0.1265, + "step": 5725 + }, + { + "epoch": 2.9, + "grad_norm": 2.623379707336426, + "learning_rate": 9.472562814070353e-06, + "loss": 0.1277, + "step": 5750 + }, + { + "epoch": 2.91, + "grad_norm": 2.666494369506836, + "learning_rate": 9.470050251256282e-06, + "loss": 0.1271, + "step": 5775 + }, + { + "epoch": 2.92, + "grad_norm": 2.9853527545928955, + "learning_rate": 9.467537688442213e-06, + "loss": 0.1279, + "step": 5800 + }, + { + "epoch": 2.93, + "grad_norm": 2.9918391704559326, + "learning_rate": 9.46502512562814e-06, + "loss": 0.1195, + "step": 5825 + }, + { + "epoch": 2.95, + "grad_norm": 2.8394436836242676, + "learning_rate": 9.462512562814072e-06, + "loss": 0.1238, + "step": 5850 + }, + { + "epoch": 2.96, + "grad_norm": 2.8405983448028564, + "learning_rate": 9.460000000000001e-06, + "loss": 0.1202, + "step": 5875 + }, + { + "epoch": 2.97, + "grad_norm": 2.935657262802124, + "learning_rate": 9.45748743718593e-06, + "loss": 0.1254, + "step": 5900 + }, + { + "epoch": 2.98, + "grad_norm": 2.6138455867767334, + "learning_rate": 9.454974874371861e-06, + "loss": 0.1187, + "step": 5925 + }, + { + "epoch": 3.0, + "grad_norm": 2.673956871032715, + "learning_rate": 9.452462311557789e-06, + "loss": 0.1201, + "step": 5950 + }, + { + "epoch": 3.01, + "grad_norm": 2.4361724853515625, + "learning_rate": 9.44994974874372e-06, + "loss": 0.101, + "step": 5975 + }, + { + "epoch": 3.02, + "grad_norm": 2.3077287673950195, + "learning_rate": 9.44743718592965e-06, + "loss": 0.0983, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.13235020637512207, + "eval_runtime": 819.1584, + "eval_samples_per_second": 1.831, + "eval_steps_per_second": 1.831, + "eval_wer": 18.901392332633986, + "step": 6000 + }, + { + "epoch": 3.04, + "grad_norm": 2.5819451808929443, + "learning_rate": 9.444924623115579e-06, + "loss": 0.0973, + "step": 6025 + }, + { + "epoch": 3.05, + "grad_norm": 2.713898181915283, + "learning_rate": 9.442412060301508e-06, + "loss": 0.0971, + "step": 6050 + }, + { + "epoch": 3.06, + "grad_norm": 2.651773691177368, + "learning_rate": 9.439899497487439e-06, + "loss": 0.0997, + "step": 6075 + }, + { + "epoch": 3.07, + "grad_norm": 2.7597413063049316, + "learning_rate": 9.437386934673367e-06, + "loss": 0.0964, + "step": 6100 + }, + { + "epoch": 3.09, + "grad_norm": 2.751300096511841, + "learning_rate": 9.434874371859298e-06, + "loss": 0.1012, + "step": 6125 + }, + { + "epoch": 3.1, + "grad_norm": 3.1045525074005127, + "learning_rate": 9.432361809045227e-06, + "loss": 0.0996, + "step": 6150 + }, + { + "epoch": 3.11, + "grad_norm": 2.7806971073150635, + "learning_rate": 9.429849246231156e-06, + "loss": 0.1011, + "step": 6175 + }, + { + "epoch": 3.12, + "grad_norm": 2.8187263011932373, + "learning_rate": 9.427336683417087e-06, + "loss": 0.0976, + "step": 6200 + }, + { + "epoch": 3.14, + "grad_norm": 2.4587740898132324, + "learning_rate": 9.424824120603015e-06, + "loss": 0.0975, + "step": 6225 + }, + { + "epoch": 3.15, + "grad_norm": 2.73453950881958, + "learning_rate": 9.422311557788946e-06, + "loss": 0.0962, + "step": 6250 + }, + { + "epoch": 3.16, + "grad_norm": 3.1345460414886475, + "learning_rate": 9.419798994974875e-06, + "loss": 0.103, + "step": 6275 + }, + { + "epoch": 3.17, + "grad_norm": 2.7945988178253174, + "learning_rate": 9.417286432160804e-06, + "loss": 0.095, + "step": 6300 + }, + { + "epoch": 3.19, + "grad_norm": 2.6713311672210693, + "learning_rate": 9.414773869346736e-06, + "loss": 0.0984, + "step": 6325 + }, + { + "epoch": 3.2, + "grad_norm": 2.382458209991455, + "learning_rate": 9.412261306532665e-06, + "loss": 0.0979, + "step": 6350 + }, + { + "epoch": 3.21, + "grad_norm": 2.4509615898132324, + "learning_rate": 9.409748743718594e-06, + "loss": 0.0993, + "step": 6375 + }, + { + "epoch": 3.22, + "grad_norm": 2.3005762100219727, + "learning_rate": 9.407236180904523e-06, + "loss": 0.0979, + "step": 6400 + }, + { + "epoch": 3.24, + "grad_norm": 2.5626466274261475, + "learning_rate": 9.404723618090453e-06, + "loss": 0.1045, + "step": 6425 + }, + { + "epoch": 3.25, + "grad_norm": 2.5955326557159424, + "learning_rate": 9.402211055276382e-06, + "loss": 0.0926, + "step": 6450 + }, + { + "epoch": 3.26, + "grad_norm": 2.6772453784942627, + "learning_rate": 9.399798994974875e-06, + "loss": 0.104, + "step": 6475 + }, + { + "epoch": 3.27, + "grad_norm": 2.9193766117095947, + "learning_rate": 9.397286432160805e-06, + "loss": 0.1009, + "step": 6500 + }, + { + "epoch": 3.29, + "grad_norm": 2.7305448055267334, + "learning_rate": 9.394773869346736e-06, + "loss": 0.1016, + "step": 6525 + }, + { + "epoch": 3.3, + "grad_norm": 2.837235927581787, + "learning_rate": 9.392261306532663e-06, + "loss": 0.098, + "step": 6550 + }, + { + "epoch": 3.31, + "grad_norm": 2.727816581726074, + "learning_rate": 9.389748743718594e-06, + "loss": 0.0935, + "step": 6575 + }, + { + "epoch": 3.32, + "grad_norm": 2.8016111850738525, + "learning_rate": 9.387236180904524e-06, + "loss": 0.0922, + "step": 6600 + }, + { + "epoch": 3.34, + "grad_norm": 2.5953567028045654, + "learning_rate": 9.384723618090453e-06, + "loss": 0.0989, + "step": 6625 + }, + { + "epoch": 3.35, + "grad_norm": 3.132827043533325, + "learning_rate": 9.382211055276382e-06, + "loss": 0.1004, + "step": 6650 + }, + { + "epoch": 3.36, + "grad_norm": 2.6111388206481934, + "learning_rate": 9.379698492462312e-06, + "loss": 0.0984, + "step": 6675 + }, + { + "epoch": 3.38, + "grad_norm": 2.574298858642578, + "learning_rate": 9.377185929648241e-06, + "loss": 0.0963, + "step": 6700 + }, + { + "epoch": 3.39, + "grad_norm": 2.732889413833618, + "learning_rate": 9.374673366834172e-06, + "loss": 0.0967, + "step": 6725 + }, + { + "epoch": 3.4, + "grad_norm": 2.8378517627716064, + "learning_rate": 9.372160804020101e-06, + "loss": 0.1017, + "step": 6750 + }, + { + "epoch": 3.41, + "grad_norm": 2.778153419494629, + "learning_rate": 9.36964824120603e-06, + "loss": 0.0975, + "step": 6775 + }, + { + "epoch": 3.43, + "grad_norm": 2.606316328048706, + "learning_rate": 9.367135678391962e-06, + "loss": 0.0966, + "step": 6800 + }, + { + "epoch": 3.44, + "grad_norm": 2.891472339630127, + "learning_rate": 9.36462311557789e-06, + "loss": 0.0904, + "step": 6825 + }, + { + "epoch": 3.45, + "grad_norm": 2.6390256881713867, + "learning_rate": 9.36211055276382e-06, + "loss": 0.1029, + "step": 6850 + }, + { + "epoch": 3.46, + "grad_norm": 3.025780439376831, + "learning_rate": 9.35959798994975e-06, + "loss": 0.0959, + "step": 6875 + }, + { + "epoch": 3.48, + "grad_norm": 3.1740646362304688, + "learning_rate": 9.357085427135679e-06, + "loss": 0.097, + "step": 6900 + }, + { + "epoch": 3.49, + "grad_norm": 3.3724825382232666, + "learning_rate": 9.354572864321608e-06, + "loss": 0.097, + "step": 6925 + }, + { + "epoch": 3.5, + "grad_norm": 3.3055806159973145, + "learning_rate": 9.352060301507538e-06, + "loss": 0.1004, + "step": 6950 + }, + { + "epoch": 3.51, + "grad_norm": 2.405369281768799, + "learning_rate": 9.349547738693469e-06, + "loss": 0.095, + "step": 6975 + }, + { + "epoch": 3.53, + "grad_norm": 2.6018126010894775, + "learning_rate": 9.347035175879398e-06, + "loss": 0.0976, + "step": 7000 + }, + { + "epoch": 3.53, + "eval_loss": 0.13390778005123138, + "eval_runtime": 678.7951, + "eval_samples_per_second": 2.21, + "eval_steps_per_second": 2.21, + "eval_wer": 19.492656875834445, + "step": 7000 + }, + { + "epoch": 3.54, + "grad_norm": 2.8142216205596924, + "learning_rate": 9.344522613065327e-06, + "loss": 0.0972, + "step": 7025 + }, + { + "epoch": 3.55, + "grad_norm": 3.1229586601257324, + "learning_rate": 9.342010050251257e-06, + "loss": 0.0985, + "step": 7050 + }, + { + "epoch": 3.56, + "grad_norm": 2.5739147663116455, + "learning_rate": 9.339497487437188e-06, + "loss": 0.0972, + "step": 7075 + }, + { + "epoch": 3.58, + "grad_norm": 2.492269992828369, + "learning_rate": 9.336984924623115e-06, + "loss": 0.0988, + "step": 7100 + }, + { + "epoch": 3.59, + "grad_norm": 2.7131028175354004, + "learning_rate": 9.334472361809046e-06, + "loss": 0.0956, + "step": 7125 + }, + { + "epoch": 3.6, + "grad_norm": 3.1555612087249756, + "learning_rate": 9.331959798994976e-06, + "loss": 0.0959, + "step": 7150 + }, + { + "epoch": 3.61, + "grad_norm": 2.846494674682617, + "learning_rate": 9.329447236180905e-06, + "loss": 0.0971, + "step": 7175 + }, + { + "epoch": 3.63, + "grad_norm": 3.0389609336853027, + "learning_rate": 9.326934673366836e-06, + "loss": 0.0936, + "step": 7200 + }, + { + "epoch": 3.64, + "grad_norm": 2.628812313079834, + "learning_rate": 9.324422110552764e-06, + "loss": 0.0991, + "step": 7225 + }, + { + "epoch": 3.65, + "grad_norm": 2.828676700592041, + "learning_rate": 9.321909547738695e-06, + "loss": 0.0974, + "step": 7250 + }, + { + "epoch": 3.66, + "grad_norm": 3.0449953079223633, + "learning_rate": 9.319396984924624e-06, + "loss": 0.0989, + "step": 7275 + }, + { + "epoch": 3.68, + "grad_norm": 2.347057819366455, + "learning_rate": 9.316884422110553e-06, + "loss": 0.097, + "step": 7300 + }, + { + "epoch": 3.69, + "grad_norm": 2.78208065032959, + "learning_rate": 9.314371859296483e-06, + "loss": 0.0991, + "step": 7325 + }, + { + "epoch": 3.7, + "grad_norm": 3.12455415725708, + "learning_rate": 9.311859296482414e-06, + "loss": 0.0983, + "step": 7350 + }, + { + "epoch": 3.72, + "grad_norm": 2.5676562786102295, + "learning_rate": 9.309346733668343e-06, + "loss": 0.0914, + "step": 7375 + }, + { + "epoch": 3.73, + "grad_norm": 2.8863365650177, + "learning_rate": 9.306834170854272e-06, + "loss": 0.1014, + "step": 7400 + }, + { + "epoch": 3.74, + "grad_norm": 2.6704916954040527, + "learning_rate": 9.304321608040201e-06, + "loss": 0.0943, + "step": 7425 + }, + { + "epoch": 3.75, + "grad_norm": 2.758479118347168, + "learning_rate": 9.30180904522613e-06, + "loss": 0.0986, + "step": 7450 + }, + { + "epoch": 3.77, + "grad_norm": 3.166201114654541, + "learning_rate": 9.299296482412062e-06, + "loss": 0.0912, + "step": 7475 + }, + { + "epoch": 3.78, + "grad_norm": 2.7396621704101562, + "learning_rate": 9.296783919597991e-06, + "loss": 0.0969, + "step": 7500 + }, + { + "epoch": 3.79, + "grad_norm": 2.869274139404297, + "learning_rate": 9.29427135678392e-06, + "loss": 0.0952, + "step": 7525 + }, + { + "epoch": 3.8, + "grad_norm": 2.7193753719329834, + "learning_rate": 9.29175879396985e-06, + "loss": 0.0957, + "step": 7550 + }, + { + "epoch": 3.82, + "grad_norm": 3.094287872314453, + "learning_rate": 9.289246231155779e-06, + "loss": 0.0979, + "step": 7575 + }, + { + "epoch": 3.83, + "grad_norm": 3.0483832359313965, + "learning_rate": 9.28673366834171e-06, + "loss": 0.0929, + "step": 7600 + }, + { + "epoch": 3.84, + "grad_norm": 2.668569326400757, + "learning_rate": 9.28422110552764e-06, + "loss": 0.097, + "step": 7625 + }, + { + "epoch": 3.85, + "grad_norm": 2.6928296089172363, + "learning_rate": 9.281708542713569e-06, + "loss": 0.0955, + "step": 7650 + }, + { + "epoch": 3.87, + "grad_norm": 2.6545631885528564, + "learning_rate": 9.279195979899498e-06, + "loss": 0.0977, + "step": 7675 + }, + { + "epoch": 3.88, + "grad_norm": 2.4570605754852295, + "learning_rate": 9.276683417085427e-06, + "loss": 0.0933, + "step": 7700 + }, + { + "epoch": 3.89, + "grad_norm": 3.0421929359436035, + "learning_rate": 9.274170854271357e-06, + "loss": 0.0966, + "step": 7725 + }, + { + "epoch": 3.9, + "grad_norm": 3.043304443359375, + "learning_rate": 9.271658291457288e-06, + "loss": 0.0947, + "step": 7750 + }, + { + "epoch": 3.92, + "grad_norm": 2.4443531036376953, + "learning_rate": 9.269145728643217e-06, + "loss": 0.0925, + "step": 7775 + }, + { + "epoch": 3.93, + "grad_norm": 2.9380245208740234, + "learning_rate": 9.266633165829146e-06, + "loss": 0.0984, + "step": 7800 + }, + { + "epoch": 3.94, + "grad_norm": 2.8176586627960205, + "learning_rate": 9.264120603015076e-06, + "loss": 0.0912, + "step": 7825 + }, + { + "epoch": 3.95, + "grad_norm": 2.462188482284546, + "learning_rate": 9.261608040201005e-06, + "loss": 0.0922, + "step": 7850 + }, + { + "epoch": 3.97, + "grad_norm": 2.926297426223755, + "learning_rate": 9.259095477386936e-06, + "loss": 0.0951, + "step": 7875 + }, + { + "epoch": 3.98, + "grad_norm": 3.3212361335754395, + "learning_rate": 9.256582914572865e-06, + "loss": 0.0964, + "step": 7900 + }, + { + "epoch": 3.99, + "grad_norm": 2.6651363372802734, + "learning_rate": 9.254070351758795e-06, + "loss": 0.0912, + "step": 7925 + }, + { + "epoch": 4.01, + "grad_norm": 2.6940066814422607, + "learning_rate": 9.251557788944724e-06, + "loss": 0.0876, + "step": 7950 + }, + { + "epoch": 4.02, + "grad_norm": 2.6326427459716797, + "learning_rate": 9.249045226130653e-06, + "loss": 0.0707, + "step": 7975 + }, + { + "epoch": 4.03, + "grad_norm": 2.188326597213745, + "learning_rate": 9.246532663316584e-06, + "loss": 0.0733, + "step": 8000 + }, + { + "epoch": 4.03, + "eval_loss": 0.13569985330104828, + "eval_runtime": 814.8426, + "eval_samples_per_second": 1.841, + "eval_steps_per_second": 1.841, + "eval_wer": 19.168415029563228, + "step": 8000 + }, + { + "epoch": 4.04, + "grad_norm": 2.4395296573638916, + "learning_rate": 9.244020100502514e-06, + "loss": 0.073, + "step": 8025 + }, + { + "epoch": 4.06, + "grad_norm": 2.6178853511810303, + "learning_rate": 9.241507537688443e-06, + "loss": 0.071, + "step": 8050 + }, + { + "epoch": 4.07, + "grad_norm": 2.1570680141448975, + "learning_rate": 9.238994974874372e-06, + "loss": 0.0708, + "step": 8075 + }, + { + "epoch": 4.08, + "grad_norm": 2.5915088653564453, + "learning_rate": 9.236482412060302e-06, + "loss": 0.0717, + "step": 8100 + }, + { + "epoch": 4.09, + "grad_norm": 2.7510876655578613, + "learning_rate": 9.233969849246231e-06, + "loss": 0.0719, + "step": 8125 + }, + { + "epoch": 4.11, + "grad_norm": 2.468698501586914, + "learning_rate": 9.231457286432162e-06, + "loss": 0.0711, + "step": 8150 + }, + { + "epoch": 4.12, + "grad_norm": 2.2371208667755127, + "learning_rate": 9.228944723618091e-06, + "loss": 0.0732, + "step": 8175 + }, + { + "epoch": 4.13, + "grad_norm": 2.407306432723999, + "learning_rate": 9.22643216080402e-06, + "loss": 0.0711, + "step": 8200 + }, + { + "epoch": 4.14, + "grad_norm": 2.4796528816223145, + "learning_rate": 9.223919597989952e-06, + "loss": 0.0732, + "step": 8225 + }, + { + "epoch": 4.16, + "grad_norm": 2.5129926204681396, + "learning_rate": 9.22140703517588e-06, + "loss": 0.0715, + "step": 8250 + }, + { + "epoch": 4.17, + "grad_norm": 2.3417532444000244, + "learning_rate": 9.21889447236181e-06, + "loss": 0.0708, + "step": 8275 + }, + { + "epoch": 4.18, + "grad_norm": 2.69663405418396, + "learning_rate": 9.21638190954774e-06, + "loss": 0.0752, + "step": 8300 + }, + { + "epoch": 4.19, + "grad_norm": 2.4996347427368164, + "learning_rate": 9.213869346733669e-06, + "loss": 0.073, + "step": 8325 + }, + { + "epoch": 4.21, + "grad_norm": 2.6909101009368896, + "learning_rate": 9.211356783919598e-06, + "loss": 0.0748, + "step": 8350 + }, + { + "epoch": 4.22, + "grad_norm": 2.4807589054107666, + "learning_rate": 9.208844221105528e-06, + "loss": 0.0714, + "step": 8375 + }, + { + "epoch": 4.23, + "grad_norm": 2.641909599304199, + "learning_rate": 9.206331658291459e-06, + "loss": 0.0735, + "step": 8400 + }, + { + "epoch": 4.24, + "grad_norm": 2.3178141117095947, + "learning_rate": 9.203819095477388e-06, + "loss": 0.0762, + "step": 8425 + }, + { + "epoch": 4.26, + "grad_norm": 2.490973949432373, + "learning_rate": 9.201306532663317e-06, + "loss": 0.0726, + "step": 8450 + }, + { + "epoch": 4.27, + "grad_norm": 2.6471314430236816, + "learning_rate": 9.198793969849247e-06, + "loss": 0.0741, + "step": 8475 + }, + { + "epoch": 4.28, + "grad_norm": 3.1670546531677246, + "learning_rate": 9.196281407035178e-06, + "loss": 0.0727, + "step": 8500 + }, + { + "epoch": 4.29, + "grad_norm": 2.927062511444092, + "learning_rate": 9.193768844221105e-06, + "loss": 0.0683, + "step": 8525 + }, + { + "epoch": 4.31, + "grad_norm": 2.8444395065307617, + "learning_rate": 9.191256281407036e-06, + "loss": 0.0725, + "step": 8550 + }, + { + "epoch": 4.32, + "grad_norm": 3.3665058612823486, + "learning_rate": 9.188743718592966e-06, + "loss": 0.0715, + "step": 8575 + }, + { + "epoch": 4.33, + "grad_norm": 2.858034133911133, + "learning_rate": 9.186331658291459e-06, + "loss": 0.074, + "step": 8600 + }, + { + "epoch": 4.35, + "grad_norm": 3.0506083965301514, + "learning_rate": 9.183819095477388e-06, + "loss": 0.0775, + "step": 8625 + }, + { + "epoch": 4.36, + "grad_norm": 2.682406425476074, + "learning_rate": 9.181306532663317e-06, + "loss": 0.0751, + "step": 8650 + }, + { + "epoch": 4.37, + "grad_norm": 2.3102126121520996, + "learning_rate": 9.178793969849247e-06, + "loss": 0.0707, + "step": 8675 + }, + { + "epoch": 4.38, + "grad_norm": 3.0163309574127197, + "learning_rate": 9.176281407035176e-06, + "loss": 0.0753, + "step": 8700 + }, + { + "epoch": 4.4, + "grad_norm": 2.728445529937744, + "learning_rate": 9.173768844221105e-06, + "loss": 0.0708, + "step": 8725 + }, + { + "epoch": 4.41, + "grad_norm": 3.0077121257781982, + "learning_rate": 9.171256281407036e-06, + "loss": 0.0744, + "step": 8750 + }, + { + "epoch": 4.42, + "grad_norm": 2.284630537033081, + "learning_rate": 9.168743718592966e-06, + "loss": 0.0749, + "step": 8775 + }, + { + "epoch": 4.43, + "grad_norm": 2.743715286254883, + "learning_rate": 9.166231155778895e-06, + "loss": 0.0737, + "step": 8800 + }, + { + "epoch": 4.45, + "grad_norm": 2.8000056743621826, + "learning_rate": 9.163718592964826e-06, + "loss": 0.0711, + "step": 8825 + }, + { + "epoch": 4.46, + "grad_norm": 2.429260730743408, + "learning_rate": 9.161206030150754e-06, + "loss": 0.0673, + "step": 8850 + }, + { + "epoch": 4.47, + "grad_norm": 2.470041036605835, + "learning_rate": 9.158693467336685e-06, + "loss": 0.0733, + "step": 8875 + }, + { + "epoch": 4.48, + "grad_norm": 2.6142337322235107, + "learning_rate": 9.156180904522614e-06, + "loss": 0.0733, + "step": 8900 + }, + { + "epoch": 4.5, + "grad_norm": 2.6166369915008545, + "learning_rate": 9.153668341708543e-06, + "loss": 0.0728, + "step": 8925 + }, + { + "epoch": 4.51, + "grad_norm": 2.7032384872436523, + "learning_rate": 9.151155778894473e-06, + "loss": 0.0724, + "step": 8950 + }, + { + "epoch": 4.52, + "grad_norm": 2.6168084144592285, + "learning_rate": 9.148643216080402e-06, + "loss": 0.0751, + "step": 8975 + }, + { + "epoch": 4.53, + "grad_norm": 2.84675669670105, + "learning_rate": 9.146130653266331e-06, + "loss": 0.072, + "step": 9000 + }, + { + "epoch": 4.53, + "eval_loss": 0.14217503368854523, + "eval_runtime": 824.809, + "eval_samples_per_second": 1.819, + "eval_steps_per_second": 1.819, + "eval_wer": 19.193845762604106, + "step": 9000 + }, + { + "epoch": 4.55, + "grad_norm": 2.567945957183838, + "learning_rate": 9.143618090452262e-06, + "loss": 0.074, + "step": 9025 + }, + { + "epoch": 4.56, + "grad_norm": 2.8439247608184814, + "learning_rate": 9.141105527638192e-06, + "loss": 0.0732, + "step": 9050 + }, + { + "epoch": 4.57, + "grad_norm": 2.5398049354553223, + "learning_rate": 9.138592964824121e-06, + "loss": 0.0705, + "step": 9075 + }, + { + "epoch": 4.58, + "grad_norm": 2.5424253940582275, + "learning_rate": 9.136080402010052e-06, + "loss": 0.0725, + "step": 9100 + }, + { + "epoch": 4.6, + "grad_norm": 2.628404140472412, + "learning_rate": 9.13356783919598e-06, + "loss": 0.0753, + "step": 9125 + }, + { + "epoch": 4.61, + "grad_norm": 2.516603946685791, + "learning_rate": 9.13105527638191e-06, + "loss": 0.0705, + "step": 9150 + }, + { + "epoch": 4.62, + "grad_norm": 2.667400360107422, + "learning_rate": 9.12854271356784e-06, + "loss": 0.0749, + "step": 9175 + }, + { + "epoch": 4.63, + "grad_norm": 2.758493661880493, + "learning_rate": 9.12603015075377e-06, + "loss": 0.0747, + "step": 9200 + }, + { + "epoch": 4.65, + "grad_norm": 2.916896343231201, + "learning_rate": 9.1235175879397e-06, + "loss": 0.0725, + "step": 9225 + }, + { + "epoch": 4.66, + "grad_norm": 2.598928451538086, + "learning_rate": 9.121005025125628e-06, + "loss": 0.0736, + "step": 9250 + }, + { + "epoch": 4.67, + "grad_norm": 2.3542425632476807, + "learning_rate": 9.118492462311559e-06, + "loss": 0.066, + "step": 9275 + }, + { + "epoch": 4.69, + "grad_norm": 2.695847272872925, + "learning_rate": 9.115979899497488e-06, + "loss": 0.0737, + "step": 9300 + }, + { + "epoch": 4.7, + "grad_norm": 3.31416392326355, + "learning_rate": 9.113467336683418e-06, + "loss": 0.075, + "step": 9325 + }, + { + "epoch": 4.71, + "grad_norm": 2.284458875656128, + "learning_rate": 9.110954773869347e-06, + "loss": 0.0704, + "step": 9350 + }, + { + "epoch": 4.72, + "grad_norm": 2.668292999267578, + "learning_rate": 9.108442211055278e-06, + "loss": 0.0721, + "step": 9375 + }, + { + "epoch": 4.74, + "grad_norm": 2.6385998725891113, + "learning_rate": 9.105929648241206e-06, + "loss": 0.0707, + "step": 9400 + }, + { + "epoch": 4.75, + "grad_norm": 2.5886073112487793, + "learning_rate": 9.103417085427137e-06, + "loss": 0.0682, + "step": 9425 + }, + { + "epoch": 4.76, + "grad_norm": 2.717463493347168, + "learning_rate": 9.100904522613066e-06, + "loss": 0.0732, + "step": 9450 + }, + { + "epoch": 4.77, + "grad_norm": 2.928118944168091, + "learning_rate": 9.098391959798995e-06, + "loss": 0.0748, + "step": 9475 + }, + { + "epoch": 4.79, + "grad_norm": 2.7326712608337402, + "learning_rate": 9.095879396984926e-06, + "loss": 0.0723, + "step": 9500 + }, + { + "epoch": 4.8, + "grad_norm": 2.9435110092163086, + "learning_rate": 9.093366834170854e-06, + "loss": 0.0739, + "step": 9525 + }, + { + "epoch": 4.81, + "grad_norm": 3.177210569381714, + "learning_rate": 9.090854271356785e-06, + "loss": 0.073, + "step": 9550 + }, + { + "epoch": 4.82, + "grad_norm": 2.23905348777771, + "learning_rate": 9.088341708542714e-06, + "loss": 0.0697, + "step": 9575 + }, + { + "epoch": 4.84, + "grad_norm": 2.7496800422668457, + "learning_rate": 9.085829145728644e-06, + "loss": 0.068, + "step": 9600 + }, + { + "epoch": 4.85, + "grad_norm": 2.592437505722046, + "learning_rate": 9.083316582914573e-06, + "loss": 0.0729, + "step": 9625 + }, + { + "epoch": 4.86, + "grad_norm": 2.811861991882324, + "learning_rate": 9.080804020100504e-06, + "loss": 0.0718, + "step": 9650 + }, + { + "epoch": 4.87, + "grad_norm": 2.7160444259643555, + "learning_rate": 9.078291457286433e-06, + "loss": 0.0679, + "step": 9675 + }, + { + "epoch": 4.89, + "grad_norm": 2.4843125343322754, + "learning_rate": 9.075778894472363e-06, + "loss": 0.0728, + "step": 9700 + }, + { + "epoch": 4.9, + "grad_norm": 2.6941096782684326, + "learning_rate": 9.073266331658292e-06, + "loss": 0.0687, + "step": 9725 + }, + { + "epoch": 4.91, + "grad_norm": 3.1161856651306152, + "learning_rate": 9.070753768844221e-06, + "loss": 0.0732, + "step": 9750 + }, + { + "epoch": 4.92, + "grad_norm": 2.8130764961242676, + "learning_rate": 9.068241206030152e-06, + "loss": 0.0719, + "step": 9775 + }, + { + "epoch": 4.94, + "grad_norm": 2.9914069175720215, + "learning_rate": 9.06572864321608e-06, + "loss": 0.0706, + "step": 9800 + }, + { + "epoch": 4.95, + "grad_norm": 2.857271671295166, + "learning_rate": 9.063216080402011e-06, + "loss": 0.0741, + "step": 9825 + }, + { + "epoch": 4.96, + "grad_norm": 2.784555435180664, + "learning_rate": 9.06070351758794e-06, + "loss": 0.0709, + "step": 9850 + }, + { + "epoch": 4.97, + "grad_norm": 2.8854422569274902, + "learning_rate": 9.05819095477387e-06, + "loss": 0.0736, + "step": 9875 + }, + { + "epoch": 4.99, + "grad_norm": 2.668797731399536, + "learning_rate": 9.0556783919598e-06, + "loss": 0.0707, + "step": 9900 + }, + { + "epoch": 5.0, + "grad_norm": 3.1415677070617676, + "learning_rate": 9.05316582914573e-06, + "loss": 0.0695, + "step": 9925 + }, + { + "epoch": 5.01, + "grad_norm": 2.0550312995910645, + "learning_rate": 9.05065326633166e-06, + "loss": 0.053, + "step": 9950 + }, + { + "epoch": 5.03, + "grad_norm": 2.0791637897491455, + "learning_rate": 9.048140703517589e-06, + "loss": 0.0515, + "step": 9975 + }, + { + "epoch": 5.04, + "grad_norm": 2.6897776126861572, + "learning_rate": 9.045628140703518e-06, + "loss": 0.0535, + "step": 10000 + }, + { + "epoch": 5.04, + "eval_loss": 0.1476612091064453, + "eval_runtime": 683.6889, + "eval_samples_per_second": 2.194, + "eval_steps_per_second": 2.194, + "eval_wer": 19.816898722105663, + "step": 10000 + }, + { + "epoch": 5.05, + "grad_norm": 2.2946739196777344, + "learning_rate": 9.043115577889447e-06, + "loss": 0.0479, + "step": 10025 + }, + { + "epoch": 5.06, + "grad_norm": 2.4670610427856445, + "learning_rate": 9.040603015075378e-06, + "loss": 0.051, + "step": 10050 + }, + { + "epoch": 5.08, + "grad_norm": 2.167872428894043, + "learning_rate": 9.038090452261308e-06, + "loss": 0.0497, + "step": 10075 + }, + { + "epoch": 5.09, + "grad_norm": 2.3786306381225586, + "learning_rate": 9.0356783919598e-06, + "loss": 0.0509, + "step": 10100 + }, + { + "epoch": 5.1, + "grad_norm": 2.1302120685577393, + "learning_rate": 9.033165829145728e-06, + "loss": 0.0532, + "step": 10125 + }, + { + "epoch": 5.11, + "grad_norm": 2.3376047611236572, + "learning_rate": 9.03065326633166e-06, + "loss": 0.051, + "step": 10150 + }, + { + "epoch": 5.13, + "grad_norm": 2.5930166244506836, + "learning_rate": 9.028140703517589e-06, + "loss": 0.051, + "step": 10175 + }, + { + "epoch": 5.14, + "grad_norm": 2.5798375606536865, + "learning_rate": 9.025628140703518e-06, + "loss": 0.0511, + "step": 10200 + }, + { + "epoch": 5.15, + "grad_norm": 3.092294931411743, + "learning_rate": 9.023115577889447e-06, + "loss": 0.0552, + "step": 10225 + }, + { + "epoch": 5.16, + "grad_norm": 2.2021780014038086, + "learning_rate": 9.020603015075378e-06, + "loss": 0.0495, + "step": 10250 + }, + { + "epoch": 5.18, + "grad_norm": 2.892244338989258, + "learning_rate": 9.018090452261308e-06, + "loss": 0.0518, + "step": 10275 + }, + { + "epoch": 5.19, + "grad_norm": 2.1757004261016846, + "learning_rate": 9.015577889447237e-06, + "loss": 0.0517, + "step": 10300 + }, + { + "epoch": 5.2, + "grad_norm": 2.2850658893585205, + "learning_rate": 9.013065326633166e-06, + "loss": 0.0505, + "step": 10325 + }, + { + "epoch": 5.21, + "grad_norm": 2.639526605606079, + "learning_rate": 9.010552763819096e-06, + "loss": 0.052, + "step": 10350 + }, + { + "epoch": 5.23, + "grad_norm": 2.539785861968994, + "learning_rate": 9.008040201005027e-06, + "loss": 0.0544, + "step": 10375 + }, + { + "epoch": 5.24, + "grad_norm": 2.6035704612731934, + "learning_rate": 9.005527638190954e-06, + "loss": 0.0548, + "step": 10400 + }, + { + "epoch": 5.25, + "grad_norm": 2.3089330196380615, + "learning_rate": 9.003015075376885e-06, + "loss": 0.0528, + "step": 10425 + }, + { + "epoch": 5.26, + "grad_norm": 2.814896583557129, + "learning_rate": 9.000502512562815e-06, + "loss": 0.0543, + "step": 10450 + }, + { + "epoch": 5.28, + "grad_norm": 2.8685858249664307, + "learning_rate": 8.997989949748744e-06, + "loss": 0.052, + "step": 10475 + }, + { + "epoch": 5.29, + "grad_norm": 2.0773022174835205, + "learning_rate": 8.995477386934675e-06, + "loss": 0.0517, + "step": 10500 + }, + { + "epoch": 5.3, + "grad_norm": 2.0877130031585693, + "learning_rate": 8.992964824120604e-06, + "loss": 0.0537, + "step": 10525 + }, + { + "epoch": 5.31, + "grad_norm": 2.3637301921844482, + "learning_rate": 8.990452261306534e-06, + "loss": 0.0504, + "step": 10550 + }, + { + "epoch": 5.33, + "grad_norm": 2.565547227859497, + "learning_rate": 8.987939698492463e-06, + "loss": 0.0524, + "step": 10575 + }, + { + "epoch": 5.34, + "grad_norm": 2.561403512954712, + "learning_rate": 8.985427135678392e-06, + "loss": 0.0508, + "step": 10600 + }, + { + "epoch": 5.35, + "grad_norm": 2.737248659133911, + "learning_rate": 8.982914572864322e-06, + "loss": 0.0562, + "step": 10625 + }, + { + "epoch": 5.37, + "grad_norm": 2.424041271209717, + "learning_rate": 8.980402010050253e-06, + "loss": 0.0507, + "step": 10650 + }, + { + "epoch": 5.38, + "grad_norm": 2.323554754257202, + "learning_rate": 8.977889447236182e-06, + "loss": 0.0518, + "step": 10675 + }, + { + "epoch": 5.39, + "grad_norm": 2.3861303329467773, + "learning_rate": 8.975376884422111e-06, + "loss": 0.0518, + "step": 10700 + }, + { + "epoch": 5.4, + "grad_norm": 2.369279384613037, + "learning_rate": 8.97286432160804e-06, + "loss": 0.0512, + "step": 10725 + }, + { + "epoch": 5.42, + "grad_norm": 2.7172603607177734, + "learning_rate": 8.97035175879397e-06, + "loss": 0.0504, + "step": 10750 + }, + { + "epoch": 5.43, + "grad_norm": 2.343226432800293, + "learning_rate": 8.967839195979901e-06, + "loss": 0.0537, + "step": 10775 + }, + { + "epoch": 5.44, + "grad_norm": 2.245473861694336, + "learning_rate": 8.96532663316583e-06, + "loss": 0.0492, + "step": 10800 + }, + { + "epoch": 5.45, + "grad_norm": 2.867196559906006, + "learning_rate": 8.96281407035176e-06, + "loss": 0.053, + "step": 10825 + }, + { + "epoch": 5.47, + "grad_norm": 3.020620346069336, + "learning_rate": 8.960301507537689e-06, + "loss": 0.0558, + "step": 10850 + }, + { + "epoch": 5.48, + "grad_norm": 2.9534358978271484, + "learning_rate": 8.957788944723618e-06, + "loss": 0.0554, + "step": 10875 + }, + { + "epoch": 5.49, + "grad_norm": 2.195884943008423, + "learning_rate": 8.95527638190955e-06, + "loss": 0.052, + "step": 10900 + }, + { + "epoch": 5.5, + "grad_norm": 2.6542305946350098, + "learning_rate": 8.952763819095479e-06, + "loss": 0.0528, + "step": 10925 + }, + { + "epoch": 5.52, + "grad_norm": 2.5432066917419434, + "learning_rate": 8.950251256281408e-06, + "loss": 0.0501, + "step": 10950 + }, + { + "epoch": 5.53, + "grad_norm": 2.779487133026123, + "learning_rate": 8.947738693467337e-06, + "loss": 0.0523, + "step": 10975 + }, + { + "epoch": 5.54, + "grad_norm": 2.913694143295288, + "learning_rate": 8.945226130653267e-06, + "loss": 0.0539, + "step": 11000 + }, + { + "epoch": 5.54, + "eval_loss": 0.1580062061548233, + "eval_runtime": 681.2619, + "eval_samples_per_second": 2.202, + "eval_steps_per_second": 2.202, + "eval_wer": 20.484455464428763, + "step": 11000 + }, + { + "epoch": 5.55, + "grad_norm": 2.264240026473999, + "learning_rate": 8.942713567839196e-06, + "loss": 0.0509, + "step": 11025 + }, + { + "epoch": 5.57, + "grad_norm": 2.3422155380249023, + "learning_rate": 8.940201005025127e-06, + "loss": 0.051, + "step": 11050 + }, + { + "epoch": 5.58, + "grad_norm": 2.5940051078796387, + "learning_rate": 8.937688442211056e-06, + "loss": 0.0527, + "step": 11075 + }, + { + "epoch": 5.59, + "grad_norm": 2.9431750774383545, + "learning_rate": 8.935175879396986e-06, + "loss": 0.0517, + "step": 11100 + }, + { + "epoch": 5.6, + "grad_norm": 2.385577917098999, + "learning_rate": 8.932663316582915e-06, + "loss": 0.0524, + "step": 11125 + }, + { + "epoch": 5.62, + "grad_norm": 2.418839931488037, + "learning_rate": 8.930150753768844e-06, + "loss": 0.0516, + "step": 11150 + }, + { + "epoch": 5.63, + "grad_norm": 2.1453869342803955, + "learning_rate": 8.927638190954775e-06, + "loss": 0.0506, + "step": 11175 + }, + { + "epoch": 5.64, + "grad_norm": 2.526531934738159, + "learning_rate": 8.925125628140705e-06, + "loss": 0.0524, + "step": 11200 + }, + { + "epoch": 5.65, + "grad_norm": 2.440185546875, + "learning_rate": 8.922613065326634e-06, + "loss": 0.0527, + "step": 11225 + }, + { + "epoch": 5.67, + "grad_norm": 2.998537063598633, + "learning_rate": 8.920100502512563e-06, + "loss": 0.052, + "step": 11250 + }, + { + "epoch": 5.68, + "grad_norm": 2.6036269664764404, + "learning_rate": 8.917587939698493e-06, + "loss": 0.0523, + "step": 11275 + }, + { + "epoch": 5.69, + "grad_norm": 2.5308218002319336, + "learning_rate": 8.915075376884424e-06, + "loss": 0.052, + "step": 11300 + }, + { + "epoch": 5.71, + "grad_norm": 2.1581854820251465, + "learning_rate": 8.912562814070353e-06, + "loss": 0.0509, + "step": 11325 + }, + { + "epoch": 5.72, + "grad_norm": 2.727381467819214, + "learning_rate": 8.910050251256282e-06, + "loss": 0.0537, + "step": 11350 + }, + { + "epoch": 5.73, + "grad_norm": 2.49672269821167, + "learning_rate": 8.907537688442212e-06, + "loss": 0.0508, + "step": 11375 + }, + { + "epoch": 5.74, + "grad_norm": 3.1774888038635254, + "learning_rate": 8.905025125628143e-06, + "loss": 0.0534, + "step": 11400 + }, + { + "epoch": 5.76, + "grad_norm": 2.8274941444396973, + "learning_rate": 8.90251256281407e-06, + "loss": 0.0494, + "step": 11425 + }, + { + "epoch": 5.77, + "grad_norm": 3.0065722465515137, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0494, + "step": 11450 + }, + { + "epoch": 5.78, + "grad_norm": 2.651773452758789, + "learning_rate": 8.89748743718593e-06, + "loss": 0.0555, + "step": 11475 + }, + { + "epoch": 5.79, + "grad_norm": 2.4494576454162598, + "learning_rate": 8.89497487437186e-06, + "loss": 0.0505, + "step": 11500 + }, + { + "epoch": 5.81, + "grad_norm": 2.5862834453582764, + "learning_rate": 8.892462311557791e-06, + "loss": 0.0533, + "step": 11525 + }, + { + "epoch": 5.82, + "grad_norm": 2.5324857234954834, + "learning_rate": 8.889949748743718e-06, + "loss": 0.0509, + "step": 11550 + }, + { + "epoch": 5.83, + "grad_norm": 2.3287007808685303, + "learning_rate": 8.88743718592965e-06, + "loss": 0.0556, + "step": 11575 + }, + { + "epoch": 5.84, + "grad_norm": 2.597256660461426, + "learning_rate": 8.884924623115579e-06, + "loss": 0.0542, + "step": 11600 + }, + { + "epoch": 5.86, + "grad_norm": 2.6935956478118896, + "learning_rate": 8.882412060301508e-06, + "loss": 0.0531, + "step": 11625 + }, + { + "epoch": 5.87, + "grad_norm": 2.5459225177764893, + "learning_rate": 8.879899497487437e-06, + "loss": 0.0531, + "step": 11650 + }, + { + "epoch": 5.88, + "grad_norm": 2.544337749481201, + "learning_rate": 8.877386934673368e-06, + "loss": 0.0531, + "step": 11675 + }, + { + "epoch": 5.89, + "grad_norm": 1.9885578155517578, + "learning_rate": 8.874874371859296e-06, + "loss": 0.0516, + "step": 11700 + }, + { + "epoch": 5.91, + "grad_norm": 2.4947054386138916, + "learning_rate": 8.872361809045227e-06, + "loss": 0.0552, + "step": 11725 + }, + { + "epoch": 5.92, + "grad_norm": 2.079897165298462, + "learning_rate": 8.869849246231156e-06, + "loss": 0.052, + "step": 11750 + }, + { + "epoch": 5.93, + "grad_norm": 2.4189982414245605, + "learning_rate": 8.867336683417086e-06, + "loss": 0.0498, + "step": 11775 + }, + { + "epoch": 5.94, + "grad_norm": 2.7939512729644775, + "learning_rate": 8.864824120603017e-06, + "loss": 0.0531, + "step": 11800 + }, + { + "epoch": 5.96, + "grad_norm": 2.382002830505371, + "learning_rate": 8.862311557788944e-06, + "loss": 0.0508, + "step": 11825 + }, + { + "epoch": 5.97, + "grad_norm": 2.4482948780059814, + "learning_rate": 8.859798994974875e-06, + "loss": 0.0538, + "step": 11850 + }, + { + "epoch": 5.98, + "grad_norm": 2.629868984222412, + "learning_rate": 8.857286432160805e-06, + "loss": 0.0527, + "step": 11875 + }, + { + "epoch": 5.99, + "grad_norm": 2.8602843284606934, + "learning_rate": 8.854773869346734e-06, + "loss": 0.0495, + "step": 11900 + }, + { + "epoch": 6.01, + "grad_norm": 1.8928202390670776, + "learning_rate": 8.852261306532665e-06, + "loss": 0.0413, + "step": 11925 + }, + { + "epoch": 6.02, + "grad_norm": 1.7060325145721436, + "learning_rate": 8.849748743718594e-06, + "loss": 0.033, + "step": 11950 + }, + { + "epoch": 6.03, + "grad_norm": 2.1884119510650635, + "learning_rate": 8.847236180904524e-06, + "loss": 0.0347, + "step": 11975 + }, + { + "epoch": 6.05, + "grad_norm": 1.720334768295288, + "learning_rate": 8.844723618090453e-06, + "loss": 0.0349, + "step": 12000 + }, + { + "epoch": 6.05, + "eval_loss": 0.15791888535022736, + "eval_runtime": 684.9801, + "eval_samples_per_second": 2.19, + "eval_steps_per_second": 2.19, + "eval_wer": 19.657956640600165, + "step": 12000 + }, + { + "epoch": 6.06, + "grad_norm": 2.009782075881958, + "learning_rate": 8.842211055276382e-06, + "loss": 0.0344, + "step": 12025 + }, + { + "epoch": 6.07, + "grad_norm": 1.7443175315856934, + "learning_rate": 8.839698492462312e-06, + "loss": 0.0334, + "step": 12050 + }, + { + "epoch": 6.08, + "grad_norm": 2.1407268047332764, + "learning_rate": 8.837185929648243e-06, + "loss": 0.0352, + "step": 12075 + }, + { + "epoch": 6.1, + "grad_norm": 2.0177431106567383, + "learning_rate": 8.83467336683417e-06, + "loss": 0.0361, + "step": 12100 + }, + { + "epoch": 6.11, + "grad_norm": 2.3297183513641357, + "learning_rate": 8.832160804020101e-06, + "loss": 0.0354, + "step": 12125 + }, + { + "epoch": 6.12, + "grad_norm": 1.9675050973892212, + "learning_rate": 8.82964824120603e-06, + "loss": 0.0371, + "step": 12150 + }, + { + "epoch": 6.13, + "grad_norm": 2.2743990421295166, + "learning_rate": 8.82713567839196e-06, + "loss": 0.0373, + "step": 12175 + }, + { + "epoch": 6.15, + "grad_norm": 2.1525580883026123, + "learning_rate": 8.824623115577891e-06, + "loss": 0.0366, + "step": 12200 + }, + { + "epoch": 6.16, + "grad_norm": 2.0537710189819336, + "learning_rate": 8.82211055276382e-06, + "loss": 0.0369, + "step": 12225 + }, + { + "epoch": 6.17, + "grad_norm": 2.3041796684265137, + "learning_rate": 8.81959798994975e-06, + "loss": 0.0362, + "step": 12250 + }, + { + "epoch": 6.18, + "grad_norm": 2.4455666542053223, + "learning_rate": 8.817085427135679e-06, + "loss": 0.0374, + "step": 12275 + }, + { + "epoch": 6.2, + "grad_norm": 2.269235610961914, + "learning_rate": 8.814572864321608e-06, + "loss": 0.0369, + "step": 12300 + }, + { + "epoch": 6.21, + "grad_norm": 2.190075159072876, + "learning_rate": 8.812060301507538e-06, + "loss": 0.035, + "step": 12325 + }, + { + "epoch": 6.22, + "grad_norm": 2.4138638973236084, + "learning_rate": 8.809547738693469e-06, + "loss": 0.0354, + "step": 12350 + }, + { + "epoch": 6.23, + "grad_norm": 2.2292511463165283, + "learning_rate": 8.807035175879398e-06, + "loss": 0.0362, + "step": 12375 + }, + { + "epoch": 6.25, + "grad_norm": 2.1857666969299316, + "learning_rate": 8.804522613065327e-06, + "loss": 0.0359, + "step": 12400 + }, + { + "epoch": 6.26, + "grad_norm": 2.2910640239715576, + "learning_rate": 8.802010050251257e-06, + "loss": 0.0344, + "step": 12425 + }, + { + "epoch": 6.27, + "grad_norm": 2.9382243156433105, + "learning_rate": 8.799497487437186e-06, + "loss": 0.034, + "step": 12450 + }, + { + "epoch": 6.28, + "grad_norm": 2.316387414932251, + "learning_rate": 8.796984924623117e-06, + "loss": 0.0356, + "step": 12475 + }, + { + "epoch": 6.3, + "grad_norm": 2.440032482147217, + "learning_rate": 8.794472361809046e-06, + "loss": 0.0364, + "step": 12500 + }, + { + "epoch": 6.31, + "grad_norm": 2.3906569480895996, + "learning_rate": 8.791959798994976e-06, + "loss": 0.0359, + "step": 12525 + }, + { + "epoch": 6.32, + "grad_norm": 2.378338575363159, + "learning_rate": 8.789447236180905e-06, + "loss": 0.0367, + "step": 12550 + }, + { + "epoch": 6.34, + "grad_norm": 2.3678393363952637, + "learning_rate": 8.786934673366834e-06, + "loss": 0.0354, + "step": 12575 + }, + { + "epoch": 6.35, + "grad_norm": 2.4715754985809326, + "learning_rate": 8.784422110552765e-06, + "loss": 0.0346, + "step": 12600 + }, + { + "epoch": 6.36, + "grad_norm": 2.047273635864258, + "learning_rate": 8.781909547738695e-06, + "loss": 0.039, + "step": 12625 + }, + { + "epoch": 6.37, + "grad_norm": 2.018861770629883, + "learning_rate": 8.779396984924624e-06, + "loss": 0.0369, + "step": 12650 + }, + { + "epoch": 6.39, + "grad_norm": 2.9614944458007812, + "learning_rate": 8.776884422110553e-06, + "loss": 0.0382, + "step": 12675 + }, + { + "epoch": 6.4, + "grad_norm": 2.3012194633483887, + "learning_rate": 8.774371859296483e-06, + "loss": 0.0358, + "step": 12700 + }, + { + "epoch": 6.41, + "grad_norm": 2.559779644012451, + "learning_rate": 8.771859296482412e-06, + "loss": 0.037, + "step": 12725 + }, + { + "epoch": 6.42, + "grad_norm": 2.9270224571228027, + "learning_rate": 8.769346733668343e-06, + "loss": 0.0373, + "step": 12750 + }, + { + "epoch": 6.44, + "grad_norm": 2.291433334350586, + "learning_rate": 8.766834170854272e-06, + "loss": 0.0366, + "step": 12775 + }, + { + "epoch": 6.45, + "grad_norm": 2.4711899757385254, + "learning_rate": 8.764321608040202e-06, + "loss": 0.0398, + "step": 12800 + }, + { + "epoch": 6.46, + "grad_norm": 2.392552614212036, + "learning_rate": 8.761809045226131e-06, + "loss": 0.036, + "step": 12825 + }, + { + "epoch": 6.47, + "grad_norm": 2.3102073669433594, + "learning_rate": 8.75929648241206e-06, + "loss": 0.038, + "step": 12850 + }, + { + "epoch": 6.49, + "grad_norm": 2.3226354122161865, + "learning_rate": 8.756884422110553e-06, + "loss": 0.0373, + "step": 12875 + }, + { + "epoch": 6.5, + "grad_norm": 1.9695569276809692, + "learning_rate": 8.754371859296483e-06, + "loss": 0.0381, + "step": 12900 + }, + { + "epoch": 6.51, + "grad_norm": 2.413104295730591, + "learning_rate": 8.751859296482412e-06, + "loss": 0.0365, + "step": 12925 + }, + { + "epoch": 6.52, + "grad_norm": 2.064389944076538, + "learning_rate": 8.749346733668343e-06, + "loss": 0.036, + "step": 12950 + }, + { + "epoch": 6.54, + "grad_norm": 2.6705169677734375, + "learning_rate": 8.746834170854272e-06, + "loss": 0.0372, + "step": 12975 + }, + { + "epoch": 6.55, + "grad_norm": 2.0969362258911133, + "learning_rate": 8.744321608040202e-06, + "loss": 0.0336, + "step": 13000 + }, + { + "epoch": 6.55, + "eval_loss": 0.16908448934555054, + "eval_runtime": 667.2343, + "eval_samples_per_second": 2.248, + "eval_steps_per_second": 2.248, + "eval_wer": 19.950410070570285, + "step": 13000 + }, + { + "epoch": 6.56, + "grad_norm": 2.136242628097534, + "learning_rate": 8.741809045226131e-06, + "loss": 0.0357, + "step": 13025 + }, + { + "epoch": 6.57, + "grad_norm": 2.511798143386841, + "learning_rate": 8.73929648241206e-06, + "loss": 0.0353, + "step": 13050 + }, + { + "epoch": 6.59, + "grad_norm": 2.4828379154205322, + "learning_rate": 8.736783919597991e-06, + "loss": 0.0349, + "step": 13075 + }, + { + "epoch": 6.6, + "grad_norm": 2.624856472015381, + "learning_rate": 8.734271356783919e-06, + "loss": 0.0403, + "step": 13100 + }, + { + "epoch": 6.61, + "grad_norm": 2.1515939235687256, + "learning_rate": 8.73175879396985e-06, + "loss": 0.0355, + "step": 13125 + }, + { + "epoch": 6.62, + "grad_norm": 2.4161157608032227, + "learning_rate": 8.72924623115578e-06, + "loss": 0.0346, + "step": 13150 + }, + { + "epoch": 6.64, + "grad_norm": 2.460517168045044, + "learning_rate": 8.726733668341709e-06, + "loss": 0.0357, + "step": 13175 + }, + { + "epoch": 6.65, + "grad_norm": 2.4376771450042725, + "learning_rate": 8.72422110552764e-06, + "loss": 0.0373, + "step": 13200 + }, + { + "epoch": 6.66, + "grad_norm": 2.0659916400909424, + "learning_rate": 8.721708542713569e-06, + "loss": 0.037, + "step": 13225 + }, + { + "epoch": 6.68, + "grad_norm": 2.146392345428467, + "learning_rate": 8.719195979899498e-06, + "loss": 0.0353, + "step": 13250 + }, + { + "epoch": 6.69, + "grad_norm": 2.3663198947906494, + "learning_rate": 8.716683417085428e-06, + "loss": 0.0375, + "step": 13275 + }, + { + "epoch": 6.7, + "grad_norm": 2.5607895851135254, + "learning_rate": 8.714170854271357e-06, + "loss": 0.035, + "step": 13300 + }, + { + "epoch": 6.71, + "grad_norm": 2.8335697650909424, + "learning_rate": 8.711658291457286e-06, + "loss": 0.0374, + "step": 13325 + }, + { + "epoch": 6.73, + "grad_norm": 2.7597601413726807, + "learning_rate": 8.709145728643217e-06, + "loss": 0.0387, + "step": 13350 + }, + { + "epoch": 6.74, + "grad_norm": 2.6189017295837402, + "learning_rate": 8.706633165829147e-06, + "loss": 0.0364, + "step": 13375 + }, + { + "epoch": 6.75, + "grad_norm": 2.3776888847351074, + "learning_rate": 8.704120603015076e-06, + "loss": 0.0376, + "step": 13400 + }, + { + "epoch": 6.76, + "grad_norm": 2.189185380935669, + "learning_rate": 8.701608040201005e-06, + "loss": 0.0381, + "step": 13425 + }, + { + "epoch": 6.78, + "grad_norm": 2.7241108417510986, + "learning_rate": 8.699095477386935e-06, + "loss": 0.0374, + "step": 13450 + }, + { + "epoch": 6.79, + "grad_norm": 4.041163444519043, + "learning_rate": 8.696582914572866e-06, + "loss": 0.0368, + "step": 13475 + }, + { + "epoch": 6.8, + "grad_norm": 2.481370449066162, + "learning_rate": 8.694070351758795e-06, + "loss": 0.033, + "step": 13500 + }, + { + "epoch": 6.81, + "grad_norm": 3.1583364009857178, + "learning_rate": 8.691557788944724e-06, + "loss": 0.0384, + "step": 13525 + }, + { + "epoch": 6.83, + "grad_norm": 2.2413814067840576, + "learning_rate": 8.689045226130654e-06, + "loss": 0.0351, + "step": 13550 + }, + { + "epoch": 6.84, + "grad_norm": 2.8035519123077393, + "learning_rate": 8.686532663316583e-06, + "loss": 0.0388, + "step": 13575 + }, + { + "epoch": 6.85, + "grad_norm": 2.9254982471466064, + "learning_rate": 8.684020100502514e-06, + "loss": 0.035, + "step": 13600 + }, + { + "epoch": 6.86, + "grad_norm": 2.2909460067749023, + "learning_rate": 8.681507537688443e-06, + "loss": 0.0352, + "step": 13625 + }, + { + "epoch": 6.88, + "grad_norm": 2.610309362411499, + "learning_rate": 8.678994974874373e-06, + "loss": 0.0364, + "step": 13650 + }, + { + "epoch": 6.89, + "grad_norm": 2.6387126445770264, + "learning_rate": 8.676482412060302e-06, + "loss": 0.0379, + "step": 13675 + }, + { + "epoch": 6.9, + "grad_norm": 2.5027027130126953, + "learning_rate": 8.673969849246231e-06, + "loss": 0.0369, + "step": 13700 + }, + { + "epoch": 6.91, + "grad_norm": 2.3496592044830322, + "learning_rate": 8.67145728643216e-06, + "loss": 0.0366, + "step": 13725 + }, + { + "epoch": 6.93, + "grad_norm": 2.65702223777771, + "learning_rate": 8.668944723618092e-06, + "loss": 0.0385, + "step": 13750 + }, + { + "epoch": 6.94, + "grad_norm": 2.748074531555176, + "learning_rate": 8.666432160804021e-06, + "loss": 0.0369, + "step": 13775 + }, + { + "epoch": 6.95, + "grad_norm": 3.506335973739624, + "learning_rate": 8.66391959798995e-06, + "loss": 0.0378, + "step": 13800 + }, + { + "epoch": 6.96, + "grad_norm": 2.694732666015625, + "learning_rate": 8.661407035175881e-06, + "loss": 0.0354, + "step": 13825 + }, + { + "epoch": 6.98, + "grad_norm": 2.2550511360168457, + "learning_rate": 8.658894472361809e-06, + "loss": 0.0373, + "step": 13850 + }, + { + "epoch": 6.99, + "grad_norm": 2.433151960372925, + "learning_rate": 8.65638190954774e-06, + "loss": 0.0344, + "step": 13875 + }, + { + "epoch": 7.0, + "grad_norm": 1.508744478225708, + "learning_rate": 8.65386934673367e-06, + "loss": 0.034, + "step": 13900 + }, + { + "epoch": 7.02, + "grad_norm": 1.420714020729065, + "learning_rate": 8.651356783919599e-06, + "loss": 0.0235, + "step": 13925 + }, + { + "epoch": 7.03, + "grad_norm": 1.869701623916626, + "learning_rate": 8.648844221105528e-06, + "loss": 0.0225, + "step": 13950 + }, + { + "epoch": 7.04, + "grad_norm": 2.068693161010742, + "learning_rate": 8.646331658291457e-06, + "loss": 0.0244, + "step": 13975 + }, + { + "epoch": 7.05, + "grad_norm": 1.8615219593048096, + "learning_rate": 8.643819095477388e-06, + "loss": 0.0234, + "step": 14000 + }, + { + "epoch": 7.05, + "eval_loss": 0.18252834677696228, + "eval_runtime": 823.1964, + "eval_samples_per_second": 1.822, + "eval_steps_per_second": 1.822, + "eval_wer": 20.8150549939602, + "step": 14000 + }, + { + "epoch": 7.07, + "grad_norm": 1.8759124279022217, + "learning_rate": 8.641306532663318e-06, + "loss": 0.0254, + "step": 14025 + }, + { + "epoch": 7.08, + "grad_norm": 2.134404182434082, + "learning_rate": 8.638793969849247e-06, + "loss": 0.0228, + "step": 14050 + }, + { + "epoch": 7.09, + "grad_norm": 2.2461295127868652, + "learning_rate": 8.636281407035176e-06, + "loss": 0.0224, + "step": 14075 + }, + { + "epoch": 7.1, + "grad_norm": 2.219928026199341, + "learning_rate": 8.633768844221107e-06, + "loss": 0.024, + "step": 14100 + }, + { + "epoch": 7.12, + "grad_norm": 2.1890671253204346, + "learning_rate": 8.631256281407035e-06, + "loss": 0.0257, + "step": 14125 + }, + { + "epoch": 7.13, + "grad_norm": 2.1304495334625244, + "learning_rate": 8.628743718592966e-06, + "loss": 0.0221, + "step": 14150 + }, + { + "epoch": 7.14, + "grad_norm": 2.0116004943847656, + "learning_rate": 8.626231155778895e-06, + "loss": 0.0231, + "step": 14175 + }, + { + "epoch": 7.15, + "grad_norm": 1.6010977029800415, + "learning_rate": 8.623718592964825e-06, + "loss": 0.0246, + "step": 14200 + }, + { + "epoch": 7.17, + "grad_norm": 2.0499043464660645, + "learning_rate": 8.621206030150756e-06, + "loss": 0.022, + "step": 14225 + }, + { + "epoch": 7.18, + "grad_norm": 2.107008218765259, + "learning_rate": 8.618693467336683e-06, + "loss": 0.0238, + "step": 14250 + }, + { + "epoch": 7.19, + "grad_norm": 1.815064549446106, + "learning_rate": 8.616180904522614e-06, + "loss": 0.0248, + "step": 14275 + }, + { + "epoch": 7.2, + "grad_norm": 2.8692736625671387, + "learning_rate": 8.613668341708544e-06, + "loss": 0.0235, + "step": 14300 + }, + { + "epoch": 7.22, + "grad_norm": 2.3197147846221924, + "learning_rate": 8.611155778894473e-06, + "loss": 0.0254, + "step": 14325 + }, + { + "epoch": 7.23, + "grad_norm": 2.3354978561401367, + "learning_rate": 8.608643216080402e-06, + "loss": 0.026, + "step": 14350 + }, + { + "epoch": 7.24, + "grad_norm": 2.1069259643554688, + "learning_rate": 8.606130653266333e-06, + "loss": 0.0237, + "step": 14375 + }, + { + "epoch": 7.25, + "grad_norm": 2.3068268299102783, + "learning_rate": 8.60361809045226e-06, + "loss": 0.0233, + "step": 14400 + }, + { + "epoch": 7.27, + "grad_norm": 1.6271668672561646, + "learning_rate": 8.601105527638192e-06, + "loss": 0.0241, + "step": 14425 + }, + { + "epoch": 7.28, + "grad_norm": 1.9415867328643799, + "learning_rate": 8.598592964824121e-06, + "loss": 0.0239, + "step": 14450 + }, + { + "epoch": 7.29, + "grad_norm": 1.7038017511367798, + "learning_rate": 8.59608040201005e-06, + "loss": 0.0249, + "step": 14475 + }, + { + "epoch": 7.3, + "grad_norm": 1.6272844076156616, + "learning_rate": 8.593668341708544e-06, + "loss": 0.0252, + "step": 14500 + }, + { + "epoch": 7.32, + "grad_norm": 2.3678505420684814, + "learning_rate": 8.591155778894473e-06, + "loss": 0.0273, + "step": 14525 + }, + { + "epoch": 7.33, + "grad_norm": 2.458712577819824, + "learning_rate": 8.588643216080402e-06, + "loss": 0.0238, + "step": 14550 + }, + { + "epoch": 7.34, + "grad_norm": 2.5460939407348633, + "learning_rate": 8.586130653266332e-06, + "loss": 0.0256, + "step": 14575 + }, + { + "epoch": 7.36, + "grad_norm": 1.972413420677185, + "learning_rate": 8.583618090452261e-06, + "loss": 0.0251, + "step": 14600 + }, + { + "epoch": 7.37, + "grad_norm": 2.010920286178589, + "learning_rate": 8.581105527638192e-06, + "loss": 0.0261, + "step": 14625 + }, + { + "epoch": 7.38, + "grad_norm": 2.3204383850097656, + "learning_rate": 8.578592964824121e-06, + "loss": 0.0227, + "step": 14650 + }, + { + "epoch": 7.39, + "grad_norm": 2.339850902557373, + "learning_rate": 8.57608040201005e-06, + "loss": 0.0262, + "step": 14675 + }, + { + "epoch": 7.41, + "grad_norm": 1.6972728967666626, + "learning_rate": 8.573567839195982e-06, + "loss": 0.024, + "step": 14700 + }, + { + "epoch": 7.42, + "grad_norm": 2.018177032470703, + "learning_rate": 8.57105527638191e-06, + "loss": 0.0228, + "step": 14725 + }, + { + "epoch": 7.43, + "grad_norm": 2.995488405227661, + "learning_rate": 8.56854271356784e-06, + "loss": 0.0253, + "step": 14750 + }, + { + "epoch": 7.44, + "grad_norm": 2.454914093017578, + "learning_rate": 8.56603015075377e-06, + "loss": 0.0237, + "step": 14775 + }, + { + "epoch": 7.46, + "grad_norm": 2.540642738342285, + "learning_rate": 8.563517587939699e-06, + "loss": 0.0246, + "step": 14800 + }, + { + "epoch": 7.47, + "grad_norm": 1.939743161201477, + "learning_rate": 8.56100502512563e-06, + "loss": 0.0252, + "step": 14825 + }, + { + "epoch": 7.48, + "grad_norm": 2.0343706607818604, + "learning_rate": 8.558492462311558e-06, + "loss": 0.0252, + "step": 14850 + }, + { + "epoch": 7.49, + "grad_norm": 1.7984944581985474, + "learning_rate": 8.555979899497489e-06, + "loss": 0.0257, + "step": 14875 + }, + { + "epoch": 7.51, + "grad_norm": 1.940292239189148, + "learning_rate": 8.553467336683418e-06, + "loss": 0.0224, + "step": 14900 + }, + { + "epoch": 7.52, + "grad_norm": 1.8464412689208984, + "learning_rate": 8.550954773869347e-06, + "loss": 0.0248, + "step": 14925 + }, + { + "epoch": 7.53, + "grad_norm": 2.073688268661499, + "learning_rate": 8.548442211055277e-06, + "loss": 0.0258, + "step": 14950 + }, + { + "epoch": 7.54, + "grad_norm": 2.0554683208465576, + "learning_rate": 8.545929648241208e-06, + "loss": 0.0257, + "step": 14975 + }, + { + "epoch": 7.56, + "grad_norm": 2.351391077041626, + "learning_rate": 8.543417085427135e-06, + "loss": 0.0232, + "step": 15000 + }, + { + "epoch": 7.56, + "eval_loss": 0.19071106612682343, + "eval_runtime": 821.9053, + "eval_samples_per_second": 1.825, + "eval_steps_per_second": 1.825, + "eval_wer": 20.52260156399008, + "step": 15000 + }, + { + "epoch": 7.57, + "grad_norm": 2.5925848484039307, + "learning_rate": 8.540904522613066e-06, + "loss": 0.0235, + "step": 15025 + }, + { + "epoch": 7.58, + "grad_norm": 1.4901732206344604, + "learning_rate": 8.538391959798996e-06, + "loss": 0.0255, + "step": 15050 + }, + { + "epoch": 7.59, + "grad_norm": 2.5382041931152344, + "learning_rate": 8.535879396984925e-06, + "loss": 0.0256, + "step": 15075 + }, + { + "epoch": 7.61, + "grad_norm": 2.2413501739501953, + "learning_rate": 8.533366834170856e-06, + "loss": 0.0251, + "step": 15100 + }, + { + "epoch": 7.62, + "grad_norm": 2.122349977493286, + "learning_rate": 8.530854271356784e-06, + "loss": 0.0231, + "step": 15125 + }, + { + "epoch": 7.63, + "grad_norm": 1.9606690406799316, + "learning_rate": 8.528341708542715e-06, + "loss": 0.0264, + "step": 15150 + }, + { + "epoch": 7.64, + "grad_norm": 2.5316102504730225, + "learning_rate": 8.525829145728644e-06, + "loss": 0.0261, + "step": 15175 + }, + { + "epoch": 7.66, + "grad_norm": 2.255422353744507, + "learning_rate": 8.523316582914573e-06, + "loss": 0.0249, + "step": 15200 + }, + { + "epoch": 7.67, + "grad_norm": 2.157343626022339, + "learning_rate": 8.520804020100503e-06, + "loss": 0.0238, + "step": 15225 + }, + { + "epoch": 7.68, + "grad_norm": 2.6522364616394043, + "learning_rate": 8.518291457286434e-06, + "loss": 0.0254, + "step": 15250 + }, + { + "epoch": 7.7, + "grad_norm": 2.060448408126831, + "learning_rate": 8.515778894472363e-06, + "loss": 0.023, + "step": 15275 + }, + { + "epoch": 7.71, + "grad_norm": 2.227752923965454, + "learning_rate": 8.513266331658292e-06, + "loss": 0.0267, + "step": 15300 + }, + { + "epoch": 7.72, + "grad_norm": 2.1365671157836914, + "learning_rate": 8.510753768844222e-06, + "loss": 0.0252, + "step": 15325 + }, + { + "epoch": 7.73, + "grad_norm": 2.365065097808838, + "learning_rate": 8.508241206030151e-06, + "loss": 0.0265, + "step": 15350 + }, + { + "epoch": 7.75, + "grad_norm": 2.0919487476348877, + "learning_rate": 8.505728643216082e-06, + "loss": 0.0236, + "step": 15375 + }, + { + "epoch": 7.76, + "grad_norm": 2.2057507038116455, + "learning_rate": 8.50321608040201e-06, + "loss": 0.0244, + "step": 15400 + }, + { + "epoch": 7.77, + "grad_norm": 1.8395442962646484, + "learning_rate": 8.50070351758794e-06, + "loss": 0.0231, + "step": 15425 + }, + { + "epoch": 7.78, + "grad_norm": 2.5028109550476074, + "learning_rate": 8.49819095477387e-06, + "loss": 0.0258, + "step": 15450 + }, + { + "epoch": 7.8, + "grad_norm": 2.344120502471924, + "learning_rate": 8.4956783919598e-06, + "loss": 0.0264, + "step": 15475 + }, + { + "epoch": 7.81, + "grad_norm": 2.7482991218566895, + "learning_rate": 8.49316582914573e-06, + "loss": 0.0262, + "step": 15500 + }, + { + "epoch": 7.82, + "grad_norm": 2.4089808464050293, + "learning_rate": 8.49065326633166e-06, + "loss": 0.0266, + "step": 15525 + }, + { + "epoch": 7.83, + "grad_norm": 2.3025505542755127, + "learning_rate": 8.488140703517589e-06, + "loss": 0.0251, + "step": 15550 + }, + { + "epoch": 7.85, + "grad_norm": 2.4090678691864014, + "learning_rate": 8.485628140703518e-06, + "loss": 0.0247, + "step": 15575 + }, + { + "epoch": 7.86, + "grad_norm": 1.9564759731292725, + "learning_rate": 8.483115577889447e-06, + "loss": 0.0259, + "step": 15600 + }, + { + "epoch": 7.87, + "grad_norm": 2.8987693786621094, + "learning_rate": 8.480603015075377e-06, + "loss": 0.027, + "step": 15625 + }, + { + "epoch": 7.88, + "grad_norm": 2.70403790473938, + "learning_rate": 8.478090452261308e-06, + "loss": 0.0258, + "step": 15650 + }, + { + "epoch": 7.9, + "grad_norm": 1.7813621759414673, + "learning_rate": 8.475577889447237e-06, + "loss": 0.0253, + "step": 15675 + }, + { + "epoch": 7.91, + "grad_norm": 2.453068494796753, + "learning_rate": 8.473065326633166e-06, + "loss": 0.0266, + "step": 15700 + }, + { + "epoch": 7.92, + "grad_norm": 2.0693554878234863, + "learning_rate": 8.470552763819096e-06, + "loss": 0.0241, + "step": 15725 + }, + { + "epoch": 7.93, + "grad_norm": 2.543778419494629, + "learning_rate": 8.468040201005025e-06, + "loss": 0.0244, + "step": 15750 + }, + { + "epoch": 7.95, + "grad_norm": 1.8381553888320923, + "learning_rate": 8.465527638190956e-06, + "loss": 0.025, + "step": 15775 + }, + { + "epoch": 7.96, + "grad_norm": 2.5781338214874268, + "learning_rate": 8.463015075376885e-06, + "loss": 0.0248, + "step": 15800 + }, + { + "epoch": 7.97, + "grad_norm": 2.3859455585479736, + "learning_rate": 8.460502512562815e-06, + "loss": 0.0239, + "step": 15825 + }, + { + "epoch": 7.98, + "grad_norm": 2.432790994644165, + "learning_rate": 8.457989949748744e-06, + "loss": 0.0262, + "step": 15850 + }, + { + "epoch": 8.0, + "grad_norm": 2.313905715942383, + "learning_rate": 8.455477386934673e-06, + "loss": 0.0261, + "step": 15875 + }, + { + "epoch": 8.01, + "grad_norm": 2.110212564468384, + "learning_rate": 8.452964824120604e-06, + "loss": 0.0183, + "step": 15900 + }, + { + "epoch": 8.02, + "grad_norm": 1.7873753309249878, + "learning_rate": 8.450452261306534e-06, + "loss": 0.0159, + "step": 15925 + }, + { + "epoch": 8.04, + "grad_norm": 1.2131909132003784, + "learning_rate": 8.447939698492463e-06, + "loss": 0.0171, + "step": 15950 + }, + { + "epoch": 8.05, + "grad_norm": 2.6613032817840576, + "learning_rate": 8.445427135678392e-06, + "loss": 0.014, + "step": 15975 + }, + { + "epoch": 8.06, + "grad_norm": 1.509589433670044, + "learning_rate": 8.442914572864322e-06, + "loss": 0.0153, + "step": 16000 + }, + { + "epoch": 8.06, + "eval_loss": 0.2002580165863037, + "eval_runtime": 678.2011, + "eval_samples_per_second": 2.212, + "eval_steps_per_second": 2.212, + "eval_wer": 20.58617839659228, + "step": 16000 + } + ], + "logging_steps": 25, + "max_steps": 100000, + "num_input_tokens_seen": 0, + "num_train_epochs": 51, + "save_steps": 1000, + "total_flos": 4.981247705088e+19, + "train_batch_size": 48, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/whisper-base/bhojpuri/checkpoint-16000/training_args.bin b/checkpoints/whisper-base/bhojpuri/checkpoint-16000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2eb389dce5d123f5bb00be9bf30f2b9ba7345b1c --- /dev/null +++ b/checkpoints/whisper-base/bhojpuri/checkpoint-16000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a53ed401800f303f7d0ba9c90705fe962c3dc162c86f0b16d6b4a64b3df0042 +size 4667 diff --git a/checkpoints/whisper-base/chattisgarhi/checkpoint-18000/config.json b/checkpoints/whisper-base/chattisgarhi/checkpoint-18000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7861ebfd3dce452d730fc7657aa35befb4dcfe2d --- /dev/null +++ b/checkpoints/whisper-base/chattisgarhi/checkpoint-18000/config.json @@ -0,0 +1,154 @@ +{ + "_name_or_path": "openai/whisper-base", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 512, + "decoder_attention_heads": 8, + "decoder_ffn_dim": 2048, + "decoder_layerdrop": 0.0, + "decoder_layers": 6, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 8, + "encoder_ffn_dim": 2048, + "encoder_layerdrop": 0.0, + "encoder_layers": 6, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50276 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 6, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "torch_dtype": "float32", + "transformers_version": "4.39.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoints/whisper-base/chattisgarhi/checkpoint-18000/generation_config.json b/checkpoints/whisper-base/chattisgarhi/checkpoint-18000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a12c5b82ac1e48f22fa79bdad1595064164bc2ab --- /dev/null +++ b/checkpoints/whisper-base/chattisgarhi/checkpoint-18000/generation_config.json @@ -0,0 +1,256 @@ +{ + "alignment_heads": [ + [ + 3, + 1 + ], + [ + 4, + 2 + ], + [ + 4, + 3 + ], + [ + 4, + 7 + ], + [ + 5, + 1 + ], + [ + 5, + 2 + ], + [ + 5, + 4 + ], + [ + 5, + 6 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.39.2" +} diff --git a/checkpoints/whisper-base/chattisgarhi/checkpoint-18000/model.safetensors b/checkpoints/whisper-base/chattisgarhi/checkpoint-18000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eb3ea7525dcca07bfdd19c5ec9f08c4d7f623a8d --- /dev/null +++ b/checkpoints/whisper-base/chattisgarhi/checkpoint-18000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22f43028c52e9a5fcdcf8246e63b95b58423f413a801920452ae364134e015a4 +size 290403936 diff --git a/checkpoints/whisper-base/chattisgarhi/checkpoint-18000/optimizer.pt b/checkpoints/whisper-base/chattisgarhi/checkpoint-18000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d595f04f15cb55f616cde98efdc7af60743ecdae --- /dev/null +++ b/checkpoints/whisper-base/chattisgarhi/checkpoint-18000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3b96f62a0eae7646f02fc717042d53dc7d8a5a639005f9b57b1e8a943bfc763 +size 574811077 diff --git a/checkpoints/whisper-base/chattisgarhi/checkpoint-18000/preprocessor_config.json b/checkpoints/whisper-base/chattisgarhi/checkpoint-18000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoints/whisper-base/chattisgarhi/checkpoint-18000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoints/whisper-base/chattisgarhi/checkpoint-18000/rng_state.pth b/checkpoints/whisper-base/chattisgarhi/checkpoint-18000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..54fc922629395b5e8e60895cc345f89055dfc545 --- /dev/null +++ b/checkpoints/whisper-base/chattisgarhi/checkpoint-18000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4c982e3cff8d825880c5f37788ac44fe541a079837bea9018b522e3a1d1c74c +size 14575 diff --git a/checkpoints/whisper-base/chattisgarhi/checkpoint-18000/scheduler.pt b/checkpoints/whisper-base/chattisgarhi/checkpoint-18000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7d233145708936452ec959c52a2c26620acae7fa --- /dev/null +++ b/checkpoints/whisper-base/chattisgarhi/checkpoint-18000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:346606fbfc4456475a7c8e70abef87cf9fdada01185763731429a46bbc45ed57 +size 627 diff --git a/checkpoints/whisper-base/chattisgarhi/checkpoint-18000/trainer_state.json b/checkpoints/whisper-base/chattisgarhi/checkpoint-18000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..fce7ae350e35c51992a517a0b06a11da35d98fbb --- /dev/null +++ b/checkpoints/whisper-base/chattisgarhi/checkpoint-18000/trainer_state.json @@ -0,0 +1,5223 @@ +{ + "best_metric": 13.975030205396695, + "best_model_checkpoint": "results/whisper-base/chattisgarhi/checkpoint-8000", + "epoch": 10.06711409395973, + "eval_steps": 1000, + "global_step": 18000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "grad_norm": 23.673376083374023, + "learning_rate": 4.800000000000001e-07, + "loss": 2.0983, + "step": 25 + }, + { + "epoch": 0.03, + "grad_norm": 11.906577110290527, + "learning_rate": 9.800000000000001e-07, + "loss": 1.7517, + "step": 50 + }, + { + "epoch": 0.04, + "grad_norm": 8.46860122680664, + "learning_rate": 1.48e-06, + "loss": 1.3198, + "step": 75 + }, + { + "epoch": 0.06, + "grad_norm": 5.6751861572265625, + "learning_rate": 1.98e-06, + "loss": 1.0203, + "step": 100 + }, + { + "epoch": 0.07, + "grad_norm": 4.756661891937256, + "learning_rate": 2.4800000000000004e-06, + "loss": 0.7827, + "step": 125 + }, + { + "epoch": 0.08, + "grad_norm": 4.377810001373291, + "learning_rate": 2.9800000000000003e-06, + "loss": 0.6378, + "step": 150 + }, + { + "epoch": 0.1, + "grad_norm": 4.152875900268555, + "learning_rate": 3.48e-06, + "loss": 0.5473, + "step": 175 + }, + { + "epoch": 0.11, + "grad_norm": 3.97214937210083, + "learning_rate": 3.980000000000001e-06, + "loss": 0.4895, + "step": 200 + }, + { + "epoch": 0.13, + "grad_norm": 3.5363082885742188, + "learning_rate": 4.48e-06, + "loss": 0.4429, + "step": 225 + }, + { + "epoch": 0.14, + "grad_norm": 3.732100009918213, + "learning_rate": 4.980000000000001e-06, + "loss": 0.4112, + "step": 250 + }, + { + "epoch": 0.15, + "grad_norm": 3.43440318107605, + "learning_rate": 5.480000000000001e-06, + "loss": 0.3871, + "step": 275 + }, + { + "epoch": 0.17, + "grad_norm": 3.6351027488708496, + "learning_rate": 5.98e-06, + "loss": 0.3688, + "step": 300 + }, + { + "epoch": 0.18, + "grad_norm": 3.485503911972046, + "learning_rate": 6.480000000000001e-06, + "loss": 0.3445, + "step": 325 + }, + { + "epoch": 0.2, + "grad_norm": 3.5935561656951904, + "learning_rate": 6.98e-06, + "loss": 0.3265, + "step": 350 + }, + { + "epoch": 0.21, + "grad_norm": 3.663973331451416, + "learning_rate": 7.48e-06, + "loss": 0.3295, + "step": 375 + }, + { + "epoch": 0.22, + "grad_norm": 3.408698320388794, + "learning_rate": 7.980000000000002e-06, + "loss": 0.3066, + "step": 400 + }, + { + "epoch": 0.24, + "grad_norm": 3.895949602127075, + "learning_rate": 8.48e-06, + "loss": 0.3057, + "step": 425 + }, + { + "epoch": 0.25, + "grad_norm": 2.8062074184417725, + "learning_rate": 8.98e-06, + "loss": 0.285, + "step": 450 + }, + { + "epoch": 0.27, + "grad_norm": 3.242084264755249, + "learning_rate": 9.48e-06, + "loss": 0.273, + "step": 475 + }, + { + "epoch": 0.28, + "grad_norm": 3.7018635272979736, + "learning_rate": 9.980000000000001e-06, + "loss": 0.2667, + "step": 500 + }, + { + "epoch": 0.29, + "grad_norm": 3.5255682468414307, + "learning_rate": 9.997587939698492e-06, + "loss": 0.2593, + "step": 525 + }, + { + "epoch": 0.31, + "grad_norm": 3.4959287643432617, + "learning_rate": 9.995075376884423e-06, + "loss": 0.2533, + "step": 550 + }, + { + "epoch": 0.32, + "grad_norm": 2.829500198364258, + "learning_rate": 9.992562814070353e-06, + "loss": 0.2547, + "step": 575 + }, + { + "epoch": 0.34, + "grad_norm": 2.652364730834961, + "learning_rate": 9.990050251256282e-06, + "loss": 0.2465, + "step": 600 + }, + { + "epoch": 0.35, + "grad_norm": 3.0150463581085205, + "learning_rate": 9.987537688442211e-06, + "loss": 0.2399, + "step": 625 + }, + { + "epoch": 0.36, + "grad_norm": 2.9498066902160645, + "learning_rate": 9.985025125628142e-06, + "loss": 0.2295, + "step": 650 + }, + { + "epoch": 0.38, + "grad_norm": 2.7754244804382324, + "learning_rate": 9.98251256281407e-06, + "loss": 0.2213, + "step": 675 + }, + { + "epoch": 0.39, + "grad_norm": 3.791512966156006, + "learning_rate": 9.980000000000001e-06, + "loss": 0.2304, + "step": 700 + }, + { + "epoch": 0.41, + "grad_norm": 2.854616403579712, + "learning_rate": 9.97748743718593e-06, + "loss": 0.2173, + "step": 725 + }, + { + "epoch": 0.42, + "grad_norm": 2.843431234359741, + "learning_rate": 9.97497487437186e-06, + "loss": 0.2109, + "step": 750 + }, + { + "epoch": 0.43, + "grad_norm": 2.9085946083068848, + "learning_rate": 9.97246231155779e-06, + "loss": 0.2114, + "step": 775 + }, + { + "epoch": 0.45, + "grad_norm": 3.4142544269561768, + "learning_rate": 9.969949748743718e-06, + "loss": 0.2131, + "step": 800 + }, + { + "epoch": 0.46, + "grad_norm": 3.7411675453186035, + "learning_rate": 9.96743718592965e-06, + "loss": 0.2186, + "step": 825 + }, + { + "epoch": 0.48, + "grad_norm": 3.2526090145111084, + "learning_rate": 9.964924623115579e-06, + "loss": 0.2058, + "step": 850 + }, + { + "epoch": 0.49, + "grad_norm": 3.02831768989563, + "learning_rate": 9.962412060301508e-06, + "loss": 0.2025, + "step": 875 + }, + { + "epoch": 0.5, + "grad_norm": 2.171527862548828, + "learning_rate": 9.959899497487437e-06, + "loss": 0.1982, + "step": 900 + }, + { + "epoch": 0.52, + "grad_norm": 2.772057294845581, + "learning_rate": 9.957386934673368e-06, + "loss": 0.1994, + "step": 925 + }, + { + "epoch": 0.53, + "grad_norm": 2.7260427474975586, + "learning_rate": 9.954874371859298e-06, + "loss": 0.1968, + "step": 950 + }, + { + "epoch": 0.55, + "grad_norm": 2.8749184608459473, + "learning_rate": 9.952361809045227e-06, + "loss": 0.1854, + "step": 975 + }, + { + "epoch": 0.56, + "grad_norm": 2.726077079772949, + "learning_rate": 9.949849246231156e-06, + "loss": 0.1941, + "step": 1000 + }, + { + "epoch": 0.56, + "eval_loss": 0.1546681970357895, + "eval_runtime": 710.3851, + "eval_samples_per_second": 1.989, + "eval_steps_per_second": 1.989, + "eval_wer": 21.99528220470629, + "step": 1000 + }, + { + "epoch": 0.57, + "grad_norm": 2.5814173221588135, + "learning_rate": 9.947336683417086e-06, + "loss": 0.1841, + "step": 1025 + }, + { + "epoch": 0.59, + "grad_norm": 2.55696177482605, + "learning_rate": 9.944824120603017e-06, + "loss": 0.1886, + "step": 1050 + }, + { + "epoch": 0.6, + "grad_norm": 3.1567795276641846, + "learning_rate": 9.942311557788944e-06, + "loss": 0.1841, + "step": 1075 + }, + { + "epoch": 0.62, + "grad_norm": 3.326760768890381, + "learning_rate": 9.939798994974875e-06, + "loss": 0.1844, + "step": 1100 + }, + { + "epoch": 0.63, + "grad_norm": 2.647516965866089, + "learning_rate": 9.937286432160805e-06, + "loss": 0.1802, + "step": 1125 + }, + { + "epoch": 0.64, + "grad_norm": 2.631408214569092, + "learning_rate": 9.934773869346734e-06, + "loss": 0.1786, + "step": 1150 + }, + { + "epoch": 0.66, + "grad_norm": 2.9023611545562744, + "learning_rate": 9.932261306532665e-06, + "loss": 0.1758, + "step": 1175 + }, + { + "epoch": 0.67, + "grad_norm": 2.4790966510772705, + "learning_rate": 9.929748743718594e-06, + "loss": 0.1758, + "step": 1200 + }, + { + "epoch": 0.69, + "grad_norm": 2.550218105316162, + "learning_rate": 9.927236180904524e-06, + "loss": 0.1721, + "step": 1225 + }, + { + "epoch": 0.7, + "grad_norm": 2.747119426727295, + "learning_rate": 9.924723618090453e-06, + "loss": 0.173, + "step": 1250 + }, + { + "epoch": 0.71, + "grad_norm": 2.7648959159851074, + "learning_rate": 9.922211055276382e-06, + "loss": 0.1724, + "step": 1275 + }, + { + "epoch": 0.73, + "grad_norm": 2.4910073280334473, + "learning_rate": 9.919698492462312e-06, + "loss": 0.1672, + "step": 1300 + }, + { + "epoch": 0.74, + "grad_norm": 2.672913074493408, + "learning_rate": 9.917185929648243e-06, + "loss": 0.1612, + "step": 1325 + }, + { + "epoch": 0.76, + "grad_norm": 2.793551445007324, + "learning_rate": 9.914673366834172e-06, + "loss": 0.1701, + "step": 1350 + }, + { + "epoch": 0.77, + "grad_norm": 2.895176649093628, + "learning_rate": 9.912160804020101e-06, + "loss": 0.1679, + "step": 1375 + }, + { + "epoch": 0.78, + "grad_norm": 2.6390109062194824, + "learning_rate": 9.90964824120603e-06, + "loss": 0.1671, + "step": 1400 + }, + { + "epoch": 0.8, + "grad_norm": 2.698962688446045, + "learning_rate": 9.90713567839196e-06, + "loss": 0.1678, + "step": 1425 + }, + { + "epoch": 0.81, + "grad_norm": 2.418217182159424, + "learning_rate": 9.904623115577891e-06, + "loss": 0.1684, + "step": 1450 + }, + { + "epoch": 0.82, + "grad_norm": 2.463066577911377, + "learning_rate": 9.90211055276382e-06, + "loss": 0.1603, + "step": 1475 + }, + { + "epoch": 0.84, + "grad_norm": 2.4540905952453613, + "learning_rate": 9.89959798994975e-06, + "loss": 0.1618, + "step": 1500 + }, + { + "epoch": 0.85, + "grad_norm": 2.6827480792999268, + "learning_rate": 9.897085427135679e-06, + "loss": 0.1603, + "step": 1525 + }, + { + "epoch": 0.87, + "grad_norm": 2.8632402420043945, + "learning_rate": 9.894572864321608e-06, + "loss": 0.1611, + "step": 1550 + }, + { + "epoch": 0.88, + "grad_norm": 2.491076707839966, + "learning_rate": 9.89206030150754e-06, + "loss": 0.1644, + "step": 1575 + }, + { + "epoch": 0.89, + "grad_norm": 3.1432077884674072, + "learning_rate": 9.889547738693469e-06, + "loss": 0.1535, + "step": 1600 + }, + { + "epoch": 0.91, + "grad_norm": 2.3269588947296143, + "learning_rate": 9.887035175879398e-06, + "loss": 0.1583, + "step": 1625 + }, + { + "epoch": 0.92, + "grad_norm": 2.6731081008911133, + "learning_rate": 9.884522613065327e-06, + "loss": 0.1573, + "step": 1650 + }, + { + "epoch": 0.94, + "grad_norm": 3.008270025253296, + "learning_rate": 9.882010050251256e-06, + "loss": 0.1544, + "step": 1675 + }, + { + "epoch": 0.95, + "grad_norm": 2.757263660430908, + "learning_rate": 9.879497487437186e-06, + "loss": 0.1507, + "step": 1700 + }, + { + "epoch": 0.96, + "grad_norm": 2.3355700969696045, + "learning_rate": 9.876984924623117e-06, + "loss": 0.148, + "step": 1725 + }, + { + "epoch": 0.98, + "grad_norm": 2.198577642440796, + "learning_rate": 9.874472361809046e-06, + "loss": 0.1532, + "step": 1750 + }, + { + "epoch": 0.99, + "grad_norm": 2.4552009105682373, + "learning_rate": 9.871959798994975e-06, + "loss": 0.1514, + "step": 1775 + }, + { + "epoch": 1.01, + "grad_norm": 2.2388720512390137, + "learning_rate": 9.869447236180906e-06, + "loss": 0.1432, + "step": 1800 + }, + { + "epoch": 1.02, + "grad_norm": 2.719278573989868, + "learning_rate": 9.866934673366834e-06, + "loss": 0.134, + "step": 1825 + }, + { + "epoch": 1.03, + "grad_norm": 2.4748311042785645, + "learning_rate": 9.864422110552765e-06, + "loss": 0.1292, + "step": 1850 + }, + { + "epoch": 1.05, + "grad_norm": 2.209710121154785, + "learning_rate": 9.861909547738694e-06, + "loss": 0.1329, + "step": 1875 + }, + { + "epoch": 1.06, + "grad_norm": 2.387752056121826, + "learning_rate": 9.859396984924624e-06, + "loss": 0.1341, + "step": 1900 + }, + { + "epoch": 1.08, + "grad_norm": 2.3665711879730225, + "learning_rate": 9.856884422110553e-06, + "loss": 0.1305, + "step": 1925 + }, + { + "epoch": 1.09, + "grad_norm": 2.6520116329193115, + "learning_rate": 9.854371859296482e-06, + "loss": 0.1323, + "step": 1950 + }, + { + "epoch": 1.1, + "grad_norm": 2.41034197807312, + "learning_rate": 9.851859296482413e-06, + "loss": 0.1275, + "step": 1975 + }, + { + "epoch": 1.12, + "grad_norm": 2.4383058547973633, + "learning_rate": 9.849346733668343e-06, + "loss": 0.1313, + "step": 2000 + }, + { + "epoch": 1.12, + "eval_loss": 0.12279482930898666, + "eval_runtime": 677.2263, + "eval_samples_per_second": 2.086, + "eval_steps_per_second": 2.086, + "eval_wer": 17.78954030262931, + "step": 2000 + }, + { + "epoch": 1.13, + "grad_norm": 2.2422139644622803, + "learning_rate": 9.846834170854272e-06, + "loss": 0.1296, + "step": 2025 + }, + { + "epoch": 1.15, + "grad_norm": 2.4497735500335693, + "learning_rate": 9.844321608040201e-06, + "loss": 0.13, + "step": 2050 + }, + { + "epoch": 1.16, + "grad_norm": 2.57629132270813, + "learning_rate": 9.841809045226132e-06, + "loss": 0.129, + "step": 2075 + }, + { + "epoch": 1.17, + "grad_norm": 2.748279571533203, + "learning_rate": 9.83929648241206e-06, + "loss": 0.1283, + "step": 2100 + }, + { + "epoch": 1.19, + "grad_norm": 2.642169713973999, + "learning_rate": 9.836783919597991e-06, + "loss": 0.1304, + "step": 2125 + }, + { + "epoch": 1.2, + "grad_norm": 2.311958074569702, + "learning_rate": 9.83427135678392e-06, + "loss": 0.127, + "step": 2150 + }, + { + "epoch": 1.22, + "grad_norm": 2.2447726726531982, + "learning_rate": 9.83175879396985e-06, + "loss": 0.1236, + "step": 2175 + }, + { + "epoch": 1.23, + "grad_norm": 2.2935266494750977, + "learning_rate": 9.82924623115578e-06, + "loss": 0.1267, + "step": 2200 + }, + { + "epoch": 1.24, + "grad_norm": 2.421398401260376, + "learning_rate": 9.826733668341708e-06, + "loss": 0.1267, + "step": 2225 + }, + { + "epoch": 1.26, + "grad_norm": 2.0578255653381348, + "learning_rate": 9.82422110552764e-06, + "loss": 0.1251, + "step": 2250 + }, + { + "epoch": 1.27, + "grad_norm": 2.7478525638580322, + "learning_rate": 9.821708542713569e-06, + "loss": 0.1279, + "step": 2275 + }, + { + "epoch": 1.29, + "grad_norm": 2.629814863204956, + "learning_rate": 9.819195979899498e-06, + "loss": 0.1249, + "step": 2300 + }, + { + "epoch": 1.3, + "grad_norm": 2.491145610809326, + "learning_rate": 9.816683417085427e-06, + "loss": 0.1258, + "step": 2325 + }, + { + "epoch": 1.31, + "grad_norm": 2.3992831707000732, + "learning_rate": 9.814170854271358e-06, + "loss": 0.1232, + "step": 2350 + }, + { + "epoch": 1.33, + "grad_norm": 2.2892181873321533, + "learning_rate": 9.811658291457288e-06, + "loss": 0.1246, + "step": 2375 + }, + { + "epoch": 1.34, + "grad_norm": 2.3157501220703125, + "learning_rate": 9.809145728643217e-06, + "loss": 0.119, + "step": 2400 + }, + { + "epoch": 1.36, + "grad_norm": 2.321444034576416, + "learning_rate": 9.806633165829146e-06, + "loss": 0.1226, + "step": 2425 + }, + { + "epoch": 1.37, + "grad_norm": 2.145904779434204, + "learning_rate": 9.804120603015076e-06, + "loss": 0.1208, + "step": 2450 + }, + { + "epoch": 1.38, + "grad_norm": 2.0246620178222656, + "learning_rate": 9.801608040201007e-06, + "loss": 0.1165, + "step": 2475 + }, + { + "epoch": 1.4, + "grad_norm": 2.3031256198883057, + "learning_rate": 9.799095477386934e-06, + "loss": 0.1188, + "step": 2500 + }, + { + "epoch": 1.41, + "grad_norm": 2.5238595008850098, + "learning_rate": 9.796582914572865e-06, + "loss": 0.118, + "step": 2525 + }, + { + "epoch": 1.43, + "grad_norm": 2.5228681564331055, + "learning_rate": 9.794070351758795e-06, + "loss": 0.1207, + "step": 2550 + }, + { + "epoch": 1.44, + "grad_norm": 2.4521663188934326, + "learning_rate": 9.791557788944724e-06, + "loss": 0.1244, + "step": 2575 + }, + { + "epoch": 1.45, + "grad_norm": 2.527763605117798, + "learning_rate": 9.789045226130655e-06, + "loss": 0.1193, + "step": 2600 + }, + { + "epoch": 1.47, + "grad_norm": 2.0904929637908936, + "learning_rate": 9.786532663316584e-06, + "loss": 0.1204, + "step": 2625 + }, + { + "epoch": 1.48, + "grad_norm": 2.5250778198242188, + "learning_rate": 9.784020100502514e-06, + "loss": 0.1198, + "step": 2650 + }, + { + "epoch": 1.5, + "grad_norm": 2.2415354251861572, + "learning_rate": 9.781507537688443e-06, + "loss": 0.1245, + "step": 2675 + }, + { + "epoch": 1.51, + "grad_norm": 2.567434787750244, + "learning_rate": 9.778994974874372e-06, + "loss": 0.1217, + "step": 2700 + }, + { + "epoch": 1.52, + "grad_norm": 2.0654029846191406, + "learning_rate": 9.776482412060302e-06, + "loss": 0.1189, + "step": 2725 + }, + { + "epoch": 1.54, + "grad_norm": 2.8258910179138184, + "learning_rate": 9.773969849246233e-06, + "loss": 0.1186, + "step": 2750 + }, + { + "epoch": 1.55, + "grad_norm": 2.470416307449341, + "learning_rate": 9.77145728643216e-06, + "loss": 0.1182, + "step": 2775 + }, + { + "epoch": 1.57, + "grad_norm": 2.3527629375457764, + "learning_rate": 9.768944723618091e-06, + "loss": 0.1152, + "step": 2800 + }, + { + "epoch": 1.58, + "grad_norm": 2.5689525604248047, + "learning_rate": 9.76643216080402e-06, + "loss": 0.1172, + "step": 2825 + }, + { + "epoch": 1.59, + "grad_norm": 2.185293436050415, + "learning_rate": 9.76391959798995e-06, + "loss": 0.1124, + "step": 2850 + }, + { + "epoch": 1.61, + "grad_norm": 2.228426933288574, + "learning_rate": 9.761407035175881e-06, + "loss": 0.1118, + "step": 2875 + }, + { + "epoch": 1.62, + "grad_norm": 2.419212818145752, + "learning_rate": 9.75889447236181e-06, + "loss": 0.115, + "step": 2900 + }, + { + "epoch": 1.64, + "grad_norm": 2.2327044010162354, + "learning_rate": 9.75638190954774e-06, + "loss": 0.1168, + "step": 2925 + }, + { + "epoch": 1.65, + "grad_norm": 2.500908613204956, + "learning_rate": 9.753869346733669e-06, + "loss": 0.1131, + "step": 2950 + }, + { + "epoch": 1.66, + "grad_norm": 2.5576207637786865, + "learning_rate": 9.751356783919598e-06, + "loss": 0.1125, + "step": 2975 + }, + { + "epoch": 1.68, + "grad_norm": 2.1059110164642334, + "learning_rate": 9.74884422110553e-06, + "loss": 0.1139, + "step": 3000 + }, + { + "epoch": 1.68, + "eval_loss": 0.10822924226522446, + "eval_runtime": 678.8567, + "eval_samples_per_second": 2.081, + "eval_steps_per_second": 2.081, + "eval_wer": 15.816121051723146, + "step": 3000 + }, + { + "epoch": 1.69, + "grad_norm": 2.2231743335723877, + "learning_rate": 9.746331658291459e-06, + "loss": 0.1151, + "step": 3025 + }, + { + "epoch": 1.71, + "grad_norm": 2.169917583465576, + "learning_rate": 9.743819095477388e-06, + "loss": 0.1129, + "step": 3050 + }, + { + "epoch": 1.72, + "grad_norm": 2.295457363128662, + "learning_rate": 9.741306532663317e-06, + "loss": 0.108, + "step": 3075 + }, + { + "epoch": 1.73, + "grad_norm": 2.0443716049194336, + "learning_rate": 9.738793969849247e-06, + "loss": 0.113, + "step": 3100 + }, + { + "epoch": 1.75, + "grad_norm": 2.169991970062256, + "learning_rate": 9.736281407035176e-06, + "loss": 0.1086, + "step": 3125 + }, + { + "epoch": 1.76, + "grad_norm": 2.7031877040863037, + "learning_rate": 9.733768844221107e-06, + "loss": 0.1111, + "step": 3150 + }, + { + "epoch": 1.78, + "grad_norm": 2.107434034347534, + "learning_rate": 9.731256281407036e-06, + "loss": 0.1136, + "step": 3175 + }, + { + "epoch": 1.79, + "grad_norm": 2.2052252292633057, + "learning_rate": 9.728743718592966e-06, + "loss": 0.1076, + "step": 3200 + }, + { + "epoch": 1.8, + "grad_norm": 2.011760950088501, + "learning_rate": 9.726231155778897e-06, + "loss": 0.1072, + "step": 3225 + }, + { + "epoch": 1.82, + "grad_norm": 2.1108334064483643, + "learning_rate": 9.723718592964824e-06, + "loss": 0.1059, + "step": 3250 + }, + { + "epoch": 1.83, + "grad_norm": 2.069176435470581, + "learning_rate": 9.721206030150755e-06, + "loss": 0.1142, + "step": 3275 + }, + { + "epoch": 1.85, + "grad_norm": 2.0196709632873535, + "learning_rate": 9.718693467336685e-06, + "loss": 0.111, + "step": 3300 + }, + { + "epoch": 1.86, + "grad_norm": 2.477323055267334, + "learning_rate": 9.716180904522614e-06, + "loss": 0.1117, + "step": 3325 + }, + { + "epoch": 1.87, + "grad_norm": 2.2283053398132324, + "learning_rate": 9.713668341708543e-06, + "loss": 0.1101, + "step": 3350 + }, + { + "epoch": 1.89, + "grad_norm": 2.2493302822113037, + "learning_rate": 9.711155778894472e-06, + "loss": 0.1077, + "step": 3375 + }, + { + "epoch": 1.9, + "grad_norm": 2.4783432483673096, + "learning_rate": 9.708643216080402e-06, + "loss": 0.1097, + "step": 3400 + }, + { + "epoch": 1.92, + "grad_norm": 1.9987316131591797, + "learning_rate": 9.706130653266333e-06, + "loss": 0.1078, + "step": 3425 + }, + { + "epoch": 1.93, + "grad_norm": 2.471186876296997, + "learning_rate": 9.703618090452262e-06, + "loss": 0.1069, + "step": 3450 + }, + { + "epoch": 1.94, + "grad_norm": 2.1283512115478516, + "learning_rate": 9.701105527638191e-06, + "loss": 0.1082, + "step": 3475 + }, + { + "epoch": 1.96, + "grad_norm": 2.5182206630706787, + "learning_rate": 9.698592964824122e-06, + "loss": 0.1059, + "step": 3500 + }, + { + "epoch": 1.97, + "grad_norm": 2.2008726596832275, + "learning_rate": 9.69608040201005e-06, + "loss": 0.1076, + "step": 3525 + }, + { + "epoch": 1.99, + "grad_norm": 2.245281934738159, + "learning_rate": 9.693567839195981e-06, + "loss": 0.1024, + "step": 3550 + }, + { + "epoch": 2.0, + "grad_norm": 2.3658406734466553, + "learning_rate": 9.69105527638191e-06, + "loss": 0.1108, + "step": 3575 + }, + { + "epoch": 2.01, + "grad_norm": 2.231194257736206, + "learning_rate": 9.68854271356784e-06, + "loss": 0.0851, + "step": 3600 + }, + { + "epoch": 2.03, + "grad_norm": 2.235867977142334, + "learning_rate": 9.68603015075377e-06, + "loss": 0.0903, + "step": 3625 + }, + { + "epoch": 2.04, + "grad_norm": 2.1218183040618896, + "learning_rate": 9.683517587939698e-06, + "loss": 0.0892, + "step": 3650 + }, + { + "epoch": 2.06, + "grad_norm": 2.307832956314087, + "learning_rate": 9.68100502512563e-06, + "loss": 0.0885, + "step": 3675 + }, + { + "epoch": 2.07, + "grad_norm": 2.0942413806915283, + "learning_rate": 9.678492462311559e-06, + "loss": 0.0884, + "step": 3700 + }, + { + "epoch": 2.08, + "grad_norm": 2.0379369258880615, + "learning_rate": 9.675979899497488e-06, + "loss": 0.0861, + "step": 3725 + }, + { + "epoch": 2.1, + "grad_norm": 1.968270182609558, + "learning_rate": 9.673467336683417e-06, + "loss": 0.0871, + "step": 3750 + }, + { + "epoch": 2.11, + "grad_norm": 1.8443219661712646, + "learning_rate": 9.670954773869348e-06, + "loss": 0.0862, + "step": 3775 + }, + { + "epoch": 2.13, + "grad_norm": 2.250330686569214, + "learning_rate": 9.668442211055276e-06, + "loss": 0.0901, + "step": 3800 + }, + { + "epoch": 2.14, + "grad_norm": 1.9912415742874146, + "learning_rate": 9.665929648241207e-06, + "loss": 0.0873, + "step": 3825 + }, + { + "epoch": 2.15, + "grad_norm": 1.9895495176315308, + "learning_rate": 9.663417085427136e-06, + "loss": 0.0882, + "step": 3850 + }, + { + "epoch": 2.17, + "grad_norm": 2.3159797191619873, + "learning_rate": 9.660904522613066e-06, + "loss": 0.0892, + "step": 3875 + }, + { + "epoch": 2.18, + "grad_norm": 1.920285701751709, + "learning_rate": 9.658391959798997e-06, + "loss": 0.0838, + "step": 3900 + }, + { + "epoch": 2.2, + "grad_norm": 2.3233156204223633, + "learning_rate": 9.655879396984924e-06, + "loss": 0.085, + "step": 3925 + }, + { + "epoch": 2.21, + "grad_norm": 2.1708197593688965, + "learning_rate": 9.653366834170855e-06, + "loss": 0.0913, + "step": 3950 + }, + { + "epoch": 2.22, + "grad_norm": 2.224600315093994, + "learning_rate": 9.650854271356785e-06, + "loss": 0.0874, + "step": 3975 + }, + { + "epoch": 2.24, + "grad_norm": 2.685110092163086, + "learning_rate": 9.648341708542714e-06, + "loss": 0.0901, + "step": 4000 + }, + { + "epoch": 2.24, + "eval_loss": 0.10448439419269562, + "eval_runtime": 676.9106, + "eval_samples_per_second": 2.087, + "eval_steps_per_second": 2.087, + "eval_wer": 15.413382429089234, + "step": 4000 + }, + { + "epoch": 2.25, + "grad_norm": 2.0230889320373535, + "learning_rate": 9.645829145728643e-06, + "loss": 0.085, + "step": 4025 + }, + { + "epoch": 2.27, + "grad_norm": 2.3776917457580566, + "learning_rate": 9.643316582914574e-06, + "loss": 0.0837, + "step": 4050 + }, + { + "epoch": 2.28, + "grad_norm": 2.0375349521636963, + "learning_rate": 9.640804020100504e-06, + "loss": 0.0893, + "step": 4075 + }, + { + "epoch": 2.29, + "grad_norm": 2.1879827976226807, + "learning_rate": 9.638291457286433e-06, + "loss": 0.0835, + "step": 4100 + }, + { + "epoch": 2.31, + "grad_norm": 2.2006475925445557, + "learning_rate": 9.635879396984925e-06, + "loss": 0.0862, + "step": 4125 + }, + { + "epoch": 2.32, + "grad_norm": 2.18501877784729, + "learning_rate": 9.633366834170856e-06, + "loss": 0.0841, + "step": 4150 + }, + { + "epoch": 2.34, + "grad_norm": 2.2227416038513184, + "learning_rate": 9.630854271356785e-06, + "loss": 0.0877, + "step": 4175 + }, + { + "epoch": 2.35, + "grad_norm": 1.9900517463684082, + "learning_rate": 9.628341708542714e-06, + "loss": 0.0851, + "step": 4200 + }, + { + "epoch": 2.36, + "grad_norm": 1.8921295404434204, + "learning_rate": 9.625829145728644e-06, + "loss": 0.0858, + "step": 4225 + }, + { + "epoch": 2.38, + "grad_norm": 1.9083319902420044, + "learning_rate": 9.623316582914573e-06, + "loss": 0.0827, + "step": 4250 + }, + { + "epoch": 2.39, + "grad_norm": 2.352965831756592, + "learning_rate": 9.620804020100504e-06, + "loss": 0.0849, + "step": 4275 + }, + { + "epoch": 2.4, + "grad_norm": 2.0110859870910645, + "learning_rate": 9.618291457286433e-06, + "loss": 0.0838, + "step": 4300 + }, + { + "epoch": 2.42, + "grad_norm": 2.159532070159912, + "learning_rate": 9.615778894472363e-06, + "loss": 0.0863, + "step": 4325 + }, + { + "epoch": 2.43, + "grad_norm": 2.185480833053589, + "learning_rate": 9.613266331658292e-06, + "loss": 0.0873, + "step": 4350 + }, + { + "epoch": 2.45, + "grad_norm": 1.7967451810836792, + "learning_rate": 9.610753768844223e-06, + "loss": 0.0844, + "step": 4375 + }, + { + "epoch": 2.46, + "grad_norm": 2.2678308486938477, + "learning_rate": 9.60824120603015e-06, + "loss": 0.0837, + "step": 4400 + }, + { + "epoch": 2.47, + "grad_norm": 2.1582045555114746, + "learning_rate": 9.605728643216082e-06, + "loss": 0.0847, + "step": 4425 + }, + { + "epoch": 2.49, + "grad_norm": 2.0805447101593018, + "learning_rate": 9.60321608040201e-06, + "loss": 0.0821, + "step": 4450 + }, + { + "epoch": 2.5, + "grad_norm": 2.0734381675720215, + "learning_rate": 9.60070351758794e-06, + "loss": 0.0816, + "step": 4475 + }, + { + "epoch": 2.52, + "grad_norm": 2.0650417804718018, + "learning_rate": 9.598190954773871e-06, + "loss": 0.0799, + "step": 4500 + }, + { + "epoch": 2.53, + "grad_norm": 2.05556321144104, + "learning_rate": 9.595678391959799e-06, + "loss": 0.0792, + "step": 4525 + }, + { + "epoch": 2.54, + "grad_norm": 1.783263921737671, + "learning_rate": 9.59316582914573e-06, + "loss": 0.0842, + "step": 4550 + }, + { + "epoch": 2.56, + "grad_norm": 2.309741258621216, + "learning_rate": 9.59065326633166e-06, + "loss": 0.0801, + "step": 4575 + }, + { + "epoch": 2.57, + "grad_norm": 2.1626064777374268, + "learning_rate": 9.588140703517588e-06, + "loss": 0.0843, + "step": 4600 + }, + { + "epoch": 2.59, + "grad_norm": 2.442471742630005, + "learning_rate": 9.585628140703518e-06, + "loss": 0.0839, + "step": 4625 + }, + { + "epoch": 2.6, + "grad_norm": 2.201251268386841, + "learning_rate": 9.583115577889449e-06, + "loss": 0.0808, + "step": 4650 + }, + { + "epoch": 2.61, + "grad_norm": 2.1837899684906006, + "learning_rate": 9.580603015075378e-06, + "loss": 0.0848, + "step": 4675 + }, + { + "epoch": 2.63, + "grad_norm": 1.8953593969345093, + "learning_rate": 9.578090452261307e-06, + "loss": 0.0831, + "step": 4700 + }, + { + "epoch": 2.64, + "grad_norm": 2.0459980964660645, + "learning_rate": 9.575577889447237e-06, + "loss": 0.0771, + "step": 4725 + }, + { + "epoch": 2.66, + "grad_norm": 2.172243356704712, + "learning_rate": 9.573065326633166e-06, + "loss": 0.0787, + "step": 4750 + }, + { + "epoch": 2.67, + "grad_norm": 2.168787956237793, + "learning_rate": 9.570552763819097e-06, + "loss": 0.0815, + "step": 4775 + }, + { + "epoch": 2.68, + "grad_norm": 2.4168004989624023, + "learning_rate": 9.568040201005025e-06, + "loss": 0.0776, + "step": 4800 + }, + { + "epoch": 2.7, + "grad_norm": 2.1056787967681885, + "learning_rate": 9.565527638190956e-06, + "loss": 0.078, + "step": 4825 + }, + { + "epoch": 2.71, + "grad_norm": 2.0550010204315186, + "learning_rate": 9.563015075376885e-06, + "loss": 0.079, + "step": 4850 + }, + { + "epoch": 2.73, + "grad_norm": 2.2736563682556152, + "learning_rate": 9.560502512562814e-06, + "loss": 0.0829, + "step": 4875 + }, + { + "epoch": 2.74, + "grad_norm": 1.7727265357971191, + "learning_rate": 9.557989949748745e-06, + "loss": 0.08, + "step": 4900 + }, + { + "epoch": 2.75, + "grad_norm": 1.983271598815918, + "learning_rate": 9.555477386934675e-06, + "loss": 0.0799, + "step": 4925 + }, + { + "epoch": 2.77, + "grad_norm": 2.030801296234131, + "learning_rate": 9.552964824120604e-06, + "loss": 0.083, + "step": 4950 + }, + { + "epoch": 2.78, + "grad_norm": 2.5478954315185547, + "learning_rate": 9.550452261306533e-06, + "loss": 0.0779, + "step": 4975 + }, + { + "epoch": 2.8, + "grad_norm": 1.9575772285461426, + "learning_rate": 9.547939698492463e-06, + "loss": 0.0823, + "step": 5000 + }, + { + "epoch": 2.8, + "eval_loss": 0.09735996276140213, + "eval_runtime": 668.4343, + "eval_samples_per_second": 2.114, + "eval_steps_per_second": 2.114, + "eval_wer": 14.596398366031874, + "step": 5000 + }, + { + "epoch": 2.81, + "grad_norm": 1.9496409893035889, + "learning_rate": 9.545427135678392e-06, + "loss": 0.079, + "step": 5025 + }, + { + "epoch": 2.82, + "grad_norm": 2.3001370429992676, + "learning_rate": 9.542914572864323e-06, + "loss": 0.0807, + "step": 5050 + }, + { + "epoch": 2.84, + "grad_norm": 2.3679587841033936, + "learning_rate": 9.540402010050252e-06, + "loss": 0.08, + "step": 5075 + }, + { + "epoch": 2.85, + "grad_norm": 2.019852876663208, + "learning_rate": 9.537889447236182e-06, + "loss": 0.0838, + "step": 5100 + }, + { + "epoch": 2.87, + "grad_norm": 2.1242547035217285, + "learning_rate": 9.535376884422111e-06, + "loss": 0.0804, + "step": 5125 + }, + { + "epoch": 2.88, + "grad_norm": 2.547555923461914, + "learning_rate": 9.53286432160804e-06, + "loss": 0.0839, + "step": 5150 + }, + { + "epoch": 2.89, + "grad_norm": 1.7543941736221313, + "learning_rate": 9.530351758793971e-06, + "loss": 0.0779, + "step": 5175 + }, + { + "epoch": 2.91, + "grad_norm": 1.6930170059204102, + "learning_rate": 9.5278391959799e-06, + "loss": 0.0762, + "step": 5200 + }, + { + "epoch": 2.92, + "grad_norm": 2.7235805988311768, + "learning_rate": 9.52532663316583e-06, + "loss": 0.083, + "step": 5225 + }, + { + "epoch": 2.94, + "grad_norm": 2.17390513420105, + "learning_rate": 9.52281407035176e-06, + "loss": 0.0768, + "step": 5250 + }, + { + "epoch": 2.95, + "grad_norm": 2.0930418968200684, + "learning_rate": 9.520301507537689e-06, + "loss": 0.0806, + "step": 5275 + }, + { + "epoch": 2.96, + "grad_norm": 2.205819606781006, + "learning_rate": 9.51778894472362e-06, + "loss": 0.0781, + "step": 5300 + }, + { + "epoch": 2.98, + "grad_norm": 1.8624869585037231, + "learning_rate": 9.515276381909549e-06, + "loss": 0.0789, + "step": 5325 + }, + { + "epoch": 2.99, + "grad_norm": 2.392763376235962, + "learning_rate": 9.512763819095478e-06, + "loss": 0.0783, + "step": 5350 + }, + { + "epoch": 3.01, + "grad_norm": 1.6396794319152832, + "learning_rate": 9.510251256281408e-06, + "loss": 0.072, + "step": 5375 + }, + { + "epoch": 3.02, + "grad_norm": 1.9565173387527466, + "learning_rate": 9.507738693467337e-06, + "loss": 0.0601, + "step": 5400 + }, + { + "epoch": 3.03, + "grad_norm": 1.769208312034607, + "learning_rate": 9.505226130653266e-06, + "loss": 0.0605, + "step": 5425 + }, + { + "epoch": 3.05, + "grad_norm": 1.9820194244384766, + "learning_rate": 9.502713567839197e-06, + "loss": 0.0622, + "step": 5450 + }, + { + "epoch": 3.06, + "grad_norm": 1.972813367843628, + "learning_rate": 9.500201005025127e-06, + "loss": 0.0594, + "step": 5475 + }, + { + "epoch": 3.08, + "grad_norm": 2.2712409496307373, + "learning_rate": 9.497688442211056e-06, + "loss": 0.0619, + "step": 5500 + }, + { + "epoch": 3.09, + "grad_norm": 1.9396847486495972, + "learning_rate": 9.495175879396987e-06, + "loss": 0.0597, + "step": 5525 + }, + { + "epoch": 3.1, + "grad_norm": 2.372843027114868, + "learning_rate": 9.492663316582915e-06, + "loss": 0.0566, + "step": 5550 + }, + { + "epoch": 3.12, + "grad_norm": 1.6251579523086548, + "learning_rate": 9.490150753768846e-06, + "loss": 0.0607, + "step": 5575 + }, + { + "epoch": 3.13, + "grad_norm": 1.8766635656356812, + "learning_rate": 9.487638190954775e-06, + "loss": 0.0612, + "step": 5600 + }, + { + "epoch": 3.15, + "grad_norm": 2.07450008392334, + "learning_rate": 9.485125628140704e-06, + "loss": 0.0617, + "step": 5625 + }, + { + "epoch": 3.16, + "grad_norm": 2.1745102405548096, + "learning_rate": 9.482613065326634e-06, + "loss": 0.0611, + "step": 5650 + }, + { + "epoch": 3.17, + "grad_norm": 2.009330987930298, + "learning_rate": 9.480100502512563e-06, + "loss": 0.0641, + "step": 5675 + }, + { + "epoch": 3.19, + "grad_norm": 2.073385238647461, + "learning_rate": 9.477587939698494e-06, + "loss": 0.0613, + "step": 5700 + }, + { + "epoch": 3.2, + "grad_norm": 1.9672677516937256, + "learning_rate": 9.475075376884423e-06, + "loss": 0.0615, + "step": 5725 + }, + { + "epoch": 3.22, + "grad_norm": 2.165126085281372, + "learning_rate": 9.472562814070353e-06, + "loss": 0.0622, + "step": 5750 + }, + { + "epoch": 3.23, + "grad_norm": 2.0650722980499268, + "learning_rate": 9.470050251256282e-06, + "loss": 0.0602, + "step": 5775 + }, + { + "epoch": 3.24, + "grad_norm": 2.5429232120513916, + "learning_rate": 9.467537688442213e-06, + "loss": 0.0611, + "step": 5800 + }, + { + "epoch": 3.26, + "grad_norm": 2.0629403591156006, + "learning_rate": 9.46502512562814e-06, + "loss": 0.0619, + "step": 5825 + }, + { + "epoch": 3.27, + "grad_norm": 2.4412765502929688, + "learning_rate": 9.462512562814072e-06, + "loss": 0.0617, + "step": 5850 + }, + { + "epoch": 3.29, + "grad_norm": 2.0057930946350098, + "learning_rate": 9.460000000000001e-06, + "loss": 0.0589, + "step": 5875 + }, + { + "epoch": 3.3, + "grad_norm": 1.9777556657791138, + "learning_rate": 9.45748743718593e-06, + "loss": 0.0642, + "step": 5900 + }, + { + "epoch": 3.31, + "grad_norm": 2.1047022342681885, + "learning_rate": 9.454974874371861e-06, + "loss": 0.0581, + "step": 5925 + }, + { + "epoch": 3.33, + "grad_norm": 1.9299229383468628, + "learning_rate": 9.452462311557789e-06, + "loss": 0.0604, + "step": 5950 + }, + { + "epoch": 3.34, + "grad_norm": 1.7390228509902954, + "learning_rate": 9.44994974874372e-06, + "loss": 0.0629, + "step": 5975 + }, + { + "epoch": 3.36, + "grad_norm": 2.1520278453826904, + "learning_rate": 9.44743718592965e-06, + "loss": 0.0583, + "step": 6000 + }, + { + "epoch": 3.36, + "eval_loss": 0.10234396904706955, + "eval_runtime": 676.9068, + "eval_samples_per_second": 2.087, + "eval_steps_per_second": 2.087, + "eval_wer": 14.46406996145216, + "step": 6000 + }, + { + "epoch": 3.37, + "grad_norm": 1.799324870109558, + "learning_rate": 9.444924623115579e-06, + "loss": 0.0596, + "step": 6025 + }, + { + "epoch": 3.38, + "grad_norm": 1.8605114221572876, + "learning_rate": 9.442412060301508e-06, + "loss": 0.0578, + "step": 6050 + }, + { + "epoch": 3.4, + "grad_norm": 2.6501071453094482, + "learning_rate": 9.439899497487439e-06, + "loss": 0.0594, + "step": 6075 + }, + { + "epoch": 3.41, + "grad_norm": 2.2467777729034424, + "learning_rate": 9.437386934673367e-06, + "loss": 0.0581, + "step": 6100 + }, + { + "epoch": 3.43, + "grad_norm": 2.166944980621338, + "learning_rate": 9.434874371859298e-06, + "loss": 0.0628, + "step": 6125 + }, + { + "epoch": 3.44, + "grad_norm": 2.1419265270233154, + "learning_rate": 9.432361809045227e-06, + "loss": 0.0629, + "step": 6150 + }, + { + "epoch": 3.45, + "grad_norm": 2.0849955081939697, + "learning_rate": 9.429849246231156e-06, + "loss": 0.0628, + "step": 6175 + }, + { + "epoch": 3.47, + "grad_norm": 2.152421712875366, + "learning_rate": 9.427336683417087e-06, + "loss": 0.0587, + "step": 6200 + }, + { + "epoch": 3.48, + "grad_norm": 1.8509870767593384, + "learning_rate": 9.424824120603015e-06, + "loss": 0.0578, + "step": 6225 + }, + { + "epoch": 3.5, + "grad_norm": 2.2127346992492676, + "learning_rate": 9.422311557788946e-06, + "loss": 0.0592, + "step": 6250 + }, + { + "epoch": 3.51, + "grad_norm": 2.099177122116089, + "learning_rate": 9.419798994974875e-06, + "loss": 0.0584, + "step": 6275 + }, + { + "epoch": 3.52, + "grad_norm": 2.1693599224090576, + "learning_rate": 9.417386934673367e-06, + "loss": 0.0619, + "step": 6300 + }, + { + "epoch": 3.54, + "grad_norm": 2.5007553100585938, + "learning_rate": 9.414874371859298e-06, + "loss": 0.0589, + "step": 6325 + }, + { + "epoch": 3.55, + "grad_norm": 2.0171797275543213, + "learning_rate": 9.412361809045227e-06, + "loss": 0.0595, + "step": 6350 + }, + { + "epoch": 3.57, + "grad_norm": 2.293124198913574, + "learning_rate": 9.409849246231156e-06, + "loss": 0.063, + "step": 6375 + }, + { + "epoch": 3.58, + "grad_norm": 2.2081234455108643, + "learning_rate": 9.407336683417086e-06, + "loss": 0.063, + "step": 6400 + }, + { + "epoch": 3.59, + "grad_norm": 2.4747345447540283, + "learning_rate": 9.404824120603015e-06, + "loss": 0.0604, + "step": 6425 + }, + { + "epoch": 3.61, + "grad_norm": 1.977877140045166, + "learning_rate": 9.402311557788946e-06, + "loss": 0.062, + "step": 6450 + }, + { + "epoch": 3.62, + "grad_norm": 2.394362211227417, + "learning_rate": 9.399798994974875e-06, + "loss": 0.0574, + "step": 6475 + }, + { + "epoch": 3.64, + "grad_norm": 2.2917609214782715, + "learning_rate": 9.397286432160805e-06, + "loss": 0.0608, + "step": 6500 + }, + { + "epoch": 3.65, + "grad_norm": 1.7538307905197144, + "learning_rate": 9.394773869346736e-06, + "loss": 0.0576, + "step": 6525 + }, + { + "epoch": 3.66, + "grad_norm": 2.057720184326172, + "learning_rate": 9.392261306532663e-06, + "loss": 0.0567, + "step": 6550 + }, + { + "epoch": 3.68, + "grad_norm": 2.4363086223602295, + "learning_rate": 9.389748743718594e-06, + "loss": 0.0607, + "step": 6575 + }, + { + "epoch": 3.69, + "grad_norm": 3.339693546295166, + "learning_rate": 9.387236180904524e-06, + "loss": 0.0629, + "step": 6600 + }, + { + "epoch": 3.71, + "grad_norm": 2.443676710128784, + "learning_rate": 9.384723618090453e-06, + "loss": 0.0614, + "step": 6625 + }, + { + "epoch": 3.72, + "grad_norm": 1.9510763883590698, + "learning_rate": 9.382211055276382e-06, + "loss": 0.0593, + "step": 6650 + }, + { + "epoch": 3.73, + "grad_norm": 2.2457497119903564, + "learning_rate": 9.379698492462312e-06, + "loss": 0.0597, + "step": 6675 + }, + { + "epoch": 3.75, + "grad_norm": 2.0239596366882324, + "learning_rate": 9.377185929648241e-06, + "loss": 0.0588, + "step": 6700 + }, + { + "epoch": 3.76, + "grad_norm": 1.8981671333312988, + "learning_rate": 9.374673366834172e-06, + "loss": 0.0565, + "step": 6725 + }, + { + "epoch": 3.78, + "grad_norm": 2.010528802871704, + "learning_rate": 9.372160804020101e-06, + "loss": 0.0572, + "step": 6750 + }, + { + "epoch": 3.79, + "grad_norm": 1.8298327922821045, + "learning_rate": 9.36964824120603e-06, + "loss": 0.0563, + "step": 6775 + }, + { + "epoch": 3.8, + "grad_norm": 2.0941715240478516, + "learning_rate": 9.367135678391962e-06, + "loss": 0.058, + "step": 6800 + }, + { + "epoch": 3.82, + "grad_norm": 1.9318456649780273, + "learning_rate": 9.36462311557789e-06, + "loss": 0.057, + "step": 6825 + }, + { + "epoch": 3.83, + "grad_norm": 2.243870258331299, + "learning_rate": 9.36211055276382e-06, + "loss": 0.0595, + "step": 6850 + }, + { + "epoch": 3.85, + "grad_norm": 1.9291200637817383, + "learning_rate": 9.35959798994975e-06, + "loss": 0.0559, + "step": 6875 + }, + { + "epoch": 3.86, + "grad_norm": 1.850081205368042, + "learning_rate": 9.357085427135679e-06, + "loss": 0.0597, + "step": 6900 + }, + { + "epoch": 3.87, + "grad_norm": 2.0577139854431152, + "learning_rate": 9.354572864321608e-06, + "loss": 0.0554, + "step": 6925 + }, + { + "epoch": 3.89, + "grad_norm": 2.8981473445892334, + "learning_rate": 9.352060301507538e-06, + "loss": 0.0606, + "step": 6950 + }, + { + "epoch": 3.9, + "grad_norm": 1.9889436960220337, + "learning_rate": 9.349547738693469e-06, + "loss": 0.0579, + "step": 6975 + }, + { + "epoch": 3.91, + "grad_norm": 2.1812872886657715, + "learning_rate": 9.347035175879398e-06, + "loss": 0.0558, + "step": 7000 + }, + { + "epoch": 3.91, + "eval_loss": 0.09919197857379913, + "eval_runtime": 672.9701, + "eval_samples_per_second": 2.1, + "eval_steps_per_second": 2.1, + "eval_wer": 14.360508601346297, + "step": 7000 + }, + { + "epoch": 3.93, + "grad_norm": 2.1293954849243164, + "learning_rate": 9.344522613065327e-06, + "loss": 0.0541, + "step": 7025 + }, + { + "epoch": 3.94, + "grad_norm": 1.9174184799194336, + "learning_rate": 9.342010050251257e-06, + "loss": 0.0556, + "step": 7050 + }, + { + "epoch": 3.96, + "grad_norm": 2.169060707092285, + "learning_rate": 9.339497487437188e-06, + "loss": 0.057, + "step": 7075 + }, + { + "epoch": 3.97, + "grad_norm": 2.074878692626953, + "learning_rate": 9.336984924623115e-06, + "loss": 0.0561, + "step": 7100 + }, + { + "epoch": 3.98, + "grad_norm": 2.2435126304626465, + "learning_rate": 9.334472361809046e-06, + "loss": 0.0553, + "step": 7125 + }, + { + "epoch": 4.0, + "grad_norm": 2.362947463989258, + "learning_rate": 9.331959798994976e-06, + "loss": 0.0573, + "step": 7150 + }, + { + "epoch": 4.01, + "grad_norm": 2.087655782699585, + "learning_rate": 9.329447236180905e-06, + "loss": 0.0448, + "step": 7175 + }, + { + "epoch": 4.03, + "grad_norm": 1.8595786094665527, + "learning_rate": 9.326934673366836e-06, + "loss": 0.0421, + "step": 7200 + }, + { + "epoch": 4.04, + "grad_norm": 1.4074618816375732, + "learning_rate": 9.324422110552764e-06, + "loss": 0.0386, + "step": 7225 + }, + { + "epoch": 4.05, + "grad_norm": 1.706325888633728, + "learning_rate": 9.321909547738695e-06, + "loss": 0.0396, + "step": 7250 + }, + { + "epoch": 4.07, + "grad_norm": 1.8378698825836182, + "learning_rate": 9.319396984924624e-06, + "loss": 0.0414, + "step": 7275 + }, + { + "epoch": 4.08, + "grad_norm": 1.7179068326950073, + "learning_rate": 9.316884422110553e-06, + "loss": 0.0411, + "step": 7300 + }, + { + "epoch": 4.1, + "grad_norm": 1.9358149766921997, + "learning_rate": 9.314371859296483e-06, + "loss": 0.0403, + "step": 7325 + }, + { + "epoch": 4.11, + "grad_norm": 1.9292702674865723, + "learning_rate": 9.311859296482414e-06, + "loss": 0.0432, + "step": 7350 + }, + { + "epoch": 4.12, + "grad_norm": 1.9096845388412476, + "learning_rate": 9.309346733668343e-06, + "loss": 0.0393, + "step": 7375 + }, + { + "epoch": 4.14, + "grad_norm": 2.2434754371643066, + "learning_rate": 9.306834170854272e-06, + "loss": 0.0415, + "step": 7400 + }, + { + "epoch": 4.15, + "grad_norm": 1.632128119468689, + "learning_rate": 9.304321608040201e-06, + "loss": 0.042, + "step": 7425 + }, + { + "epoch": 4.17, + "grad_norm": 2.089311122894287, + "learning_rate": 9.30180904522613e-06, + "loss": 0.0401, + "step": 7450 + }, + { + "epoch": 4.18, + "grad_norm": 1.7298128604888916, + "learning_rate": 9.299296482412062e-06, + "loss": 0.04, + "step": 7475 + }, + { + "epoch": 4.19, + "grad_norm": 1.9498367309570312, + "learning_rate": 9.296783919597991e-06, + "loss": 0.041, + "step": 7500 + }, + { + "epoch": 4.21, + "grad_norm": 2.0158305168151855, + "learning_rate": 9.29427135678392e-06, + "loss": 0.0423, + "step": 7525 + }, + { + "epoch": 4.22, + "grad_norm": 1.886702299118042, + "learning_rate": 9.29175879396985e-06, + "loss": 0.0426, + "step": 7550 + }, + { + "epoch": 4.24, + "grad_norm": 1.8279521465301514, + "learning_rate": 9.289246231155779e-06, + "loss": 0.0414, + "step": 7575 + }, + { + "epoch": 4.25, + "grad_norm": 1.8654464483261108, + "learning_rate": 9.28673366834171e-06, + "loss": 0.0398, + "step": 7600 + }, + { + "epoch": 4.26, + "grad_norm": 2.0675809383392334, + "learning_rate": 9.28422110552764e-06, + "loss": 0.0413, + "step": 7625 + }, + { + "epoch": 4.28, + "grad_norm": 1.7680038213729858, + "learning_rate": 9.281708542713569e-06, + "loss": 0.0431, + "step": 7650 + }, + { + "epoch": 4.29, + "grad_norm": 2.2940163612365723, + "learning_rate": 9.279195979899498e-06, + "loss": 0.042, + "step": 7675 + }, + { + "epoch": 4.31, + "grad_norm": 1.7312703132629395, + "learning_rate": 9.276683417085427e-06, + "loss": 0.0443, + "step": 7700 + }, + { + "epoch": 4.32, + "grad_norm": 1.8352696895599365, + "learning_rate": 9.274170854271357e-06, + "loss": 0.0391, + "step": 7725 + }, + { + "epoch": 4.33, + "grad_norm": 1.9055894613265991, + "learning_rate": 9.271658291457288e-06, + "loss": 0.0432, + "step": 7750 + }, + { + "epoch": 4.35, + "grad_norm": 2.128627061843872, + "learning_rate": 9.269145728643217e-06, + "loss": 0.0425, + "step": 7775 + }, + { + "epoch": 4.36, + "grad_norm": 1.737990140914917, + "learning_rate": 9.266633165829146e-06, + "loss": 0.0416, + "step": 7800 + }, + { + "epoch": 4.38, + "grad_norm": 2.123157262802124, + "learning_rate": 9.264120603015076e-06, + "loss": 0.043, + "step": 7825 + }, + { + "epoch": 4.39, + "grad_norm": 2.3434855937957764, + "learning_rate": 9.261608040201005e-06, + "loss": 0.0399, + "step": 7850 + }, + { + "epoch": 4.4, + "grad_norm": 2.0191001892089844, + "learning_rate": 9.259095477386936e-06, + "loss": 0.042, + "step": 7875 + }, + { + "epoch": 4.42, + "grad_norm": 1.7619872093200684, + "learning_rate": 9.256582914572865e-06, + "loss": 0.04, + "step": 7900 + }, + { + "epoch": 4.43, + "grad_norm": 2.2720260620117188, + "learning_rate": 9.254070351758795e-06, + "loss": 0.0429, + "step": 7925 + }, + { + "epoch": 4.45, + "grad_norm": 1.8458821773529053, + "learning_rate": 9.251557788944724e-06, + "loss": 0.0396, + "step": 7950 + }, + { + "epoch": 4.46, + "grad_norm": 2.207663059234619, + "learning_rate": 9.249045226130653e-06, + "loss": 0.041, + "step": 7975 + }, + { + "epoch": 4.47, + "grad_norm": 2.076078176498413, + "learning_rate": 9.246532663316584e-06, + "loss": 0.0385, + "step": 8000 + }, + { + "epoch": 4.47, + "eval_loss": 0.10598404705524445, + "eval_runtime": 673.09, + "eval_samples_per_second": 2.099, + "eval_steps_per_second": 2.099, + "eval_wer": 13.975030205396695, + "step": 8000 + }, + { + "epoch": 4.49, + "grad_norm": 2.0860214233398438, + "learning_rate": 9.244020100502514e-06, + "loss": 0.0399, + "step": 8025 + }, + { + "epoch": 4.5, + "grad_norm": 1.837888240814209, + "learning_rate": 9.241507537688443e-06, + "loss": 0.0405, + "step": 8050 + }, + { + "epoch": 4.52, + "grad_norm": 2.221224784851074, + "learning_rate": 9.238994974874372e-06, + "loss": 0.0426, + "step": 8075 + }, + { + "epoch": 4.53, + "grad_norm": 2.0723636150360107, + "learning_rate": 9.236482412060302e-06, + "loss": 0.0428, + "step": 8100 + }, + { + "epoch": 4.54, + "grad_norm": 2.1072254180908203, + "learning_rate": 9.233969849246231e-06, + "loss": 0.0446, + "step": 8125 + }, + { + "epoch": 4.56, + "grad_norm": 2.0675926208496094, + "learning_rate": 9.231457286432162e-06, + "loss": 0.0432, + "step": 8150 + }, + { + "epoch": 4.57, + "grad_norm": 1.8129034042358398, + "learning_rate": 9.228944723618091e-06, + "loss": 0.0421, + "step": 8175 + }, + { + "epoch": 4.59, + "grad_norm": 1.7693535089492798, + "learning_rate": 9.22643216080402e-06, + "loss": 0.04, + "step": 8200 + }, + { + "epoch": 4.6, + "grad_norm": 2.395664930343628, + "learning_rate": 9.223919597989952e-06, + "loss": 0.043, + "step": 8225 + }, + { + "epoch": 4.61, + "grad_norm": 2.4626173973083496, + "learning_rate": 9.22140703517588e-06, + "loss": 0.0445, + "step": 8250 + }, + { + "epoch": 4.63, + "grad_norm": 2.014711380004883, + "learning_rate": 9.21889447236181e-06, + "loss": 0.0406, + "step": 8275 + }, + { + "epoch": 4.64, + "grad_norm": 2.398771286010742, + "learning_rate": 9.21638190954774e-06, + "loss": 0.0413, + "step": 8300 + }, + { + "epoch": 4.66, + "grad_norm": 1.534219741821289, + "learning_rate": 9.213869346733669e-06, + "loss": 0.0414, + "step": 8325 + }, + { + "epoch": 4.67, + "grad_norm": 2.169102907180786, + "learning_rate": 9.211457286432162e-06, + "loss": 0.0414, + "step": 8350 + }, + { + "epoch": 4.68, + "grad_norm": 1.9306070804595947, + "learning_rate": 9.20894472361809e-06, + "loss": 0.0417, + "step": 8375 + }, + { + "epoch": 4.7, + "grad_norm": 2.0716676712036133, + "learning_rate": 9.206432160804021e-06, + "loss": 0.0414, + "step": 8400 + }, + { + "epoch": 4.71, + "grad_norm": 2.0713541507720947, + "learning_rate": 9.20391959798995e-06, + "loss": 0.0402, + "step": 8425 + }, + { + "epoch": 4.73, + "grad_norm": 1.7349578142166138, + "learning_rate": 9.20140703517588e-06, + "loss": 0.0433, + "step": 8450 + }, + { + "epoch": 4.74, + "grad_norm": 1.5610567331314087, + "learning_rate": 9.19889447236181e-06, + "loss": 0.0436, + "step": 8475 + }, + { + "epoch": 4.75, + "grad_norm": 1.8884963989257812, + "learning_rate": 9.19638190954774e-06, + "loss": 0.0423, + "step": 8500 + }, + { + "epoch": 4.77, + "grad_norm": 2.170077323913574, + "learning_rate": 9.19386934673367e-06, + "loss": 0.0407, + "step": 8525 + }, + { + "epoch": 4.78, + "grad_norm": 2.077347993850708, + "learning_rate": 9.191356783919599e-06, + "loss": 0.0404, + "step": 8550 + }, + { + "epoch": 4.8, + "grad_norm": 2.0732617378234863, + "learning_rate": 9.188844221105528e-06, + "loss": 0.0417, + "step": 8575 + }, + { + "epoch": 4.81, + "grad_norm": 2.1588826179504395, + "learning_rate": 9.186331658291459e-06, + "loss": 0.0416, + "step": 8600 + }, + { + "epoch": 4.82, + "grad_norm": 2.027784824371338, + "learning_rate": 9.183819095477388e-06, + "loss": 0.0401, + "step": 8625 + }, + { + "epoch": 4.84, + "grad_norm": 1.9872699975967407, + "learning_rate": 9.181306532663317e-06, + "loss": 0.0413, + "step": 8650 + }, + { + "epoch": 4.85, + "grad_norm": 1.699783444404602, + "learning_rate": 9.178793969849247e-06, + "loss": 0.0437, + "step": 8675 + }, + { + "epoch": 4.87, + "grad_norm": 1.877113938331604, + "learning_rate": 9.176281407035176e-06, + "loss": 0.0426, + "step": 8700 + }, + { + "epoch": 4.88, + "grad_norm": 2.81520414352417, + "learning_rate": 9.173768844221105e-06, + "loss": 0.0439, + "step": 8725 + }, + { + "epoch": 4.89, + "grad_norm": 2.038836717605591, + "learning_rate": 9.171256281407036e-06, + "loss": 0.0411, + "step": 8750 + }, + { + "epoch": 4.91, + "grad_norm": 2.3348782062530518, + "learning_rate": 9.168743718592966e-06, + "loss": 0.0389, + "step": 8775 + }, + { + "epoch": 4.92, + "grad_norm": 1.626365065574646, + "learning_rate": 9.166231155778895e-06, + "loss": 0.0383, + "step": 8800 + }, + { + "epoch": 4.94, + "grad_norm": 1.887453317642212, + "learning_rate": 9.163718592964826e-06, + "loss": 0.0395, + "step": 8825 + }, + { + "epoch": 4.95, + "grad_norm": 2.1265499591827393, + "learning_rate": 9.161206030150754e-06, + "loss": 0.0397, + "step": 8850 + }, + { + "epoch": 4.96, + "grad_norm": 2.506556272506714, + "learning_rate": 9.158693467336685e-06, + "loss": 0.0405, + "step": 8875 + }, + { + "epoch": 4.98, + "grad_norm": 2.012840747833252, + "learning_rate": 9.156180904522614e-06, + "loss": 0.0416, + "step": 8900 + }, + { + "epoch": 4.99, + "grad_norm": 1.933095932006836, + "learning_rate": 9.153668341708543e-06, + "loss": 0.0417, + "step": 8925 + }, + { + "epoch": 5.01, + "grad_norm": 1.468355655670166, + "learning_rate": 9.151155778894473e-06, + "loss": 0.0344, + "step": 8950 + }, + { + "epoch": 5.02, + "grad_norm": 1.2304960489273071, + "learning_rate": 9.148643216080402e-06, + "loss": 0.0268, + "step": 8975 + }, + { + "epoch": 5.03, + "grad_norm": 1.939210057258606, + "learning_rate": 9.146130653266331e-06, + "loss": 0.0289, + "step": 9000 + }, + { + "epoch": 5.03, + "eval_loss": 0.11269509792327881, + "eval_runtime": 668.1436, + "eval_samples_per_second": 2.115, + "eval_steps_per_second": 2.115, + "eval_wer": 14.751740406190669, + "step": 9000 + }, + { + "epoch": 5.05, + "grad_norm": 1.6851907968521118, + "learning_rate": 9.143618090452262e-06, + "loss": 0.0275, + "step": 9025 + }, + { + "epoch": 5.06, + "grad_norm": 1.8670835494995117, + "learning_rate": 9.141105527638192e-06, + "loss": 0.026, + "step": 9050 + }, + { + "epoch": 5.08, + "grad_norm": 1.6536991596221924, + "learning_rate": 9.138693467336685e-06, + "loss": 0.0289, + "step": 9075 + }, + { + "epoch": 5.09, + "grad_norm": 1.744946837425232, + "learning_rate": 9.136180904522614e-06, + "loss": 0.0262, + "step": 9100 + }, + { + "epoch": 5.1, + "grad_norm": 1.892578363418579, + "learning_rate": 9.133668341708544e-06, + "loss": 0.027, + "step": 9125 + }, + { + "epoch": 5.12, + "grad_norm": 1.833176612854004, + "learning_rate": 9.131155778894473e-06, + "loss": 0.0298, + "step": 9150 + }, + { + "epoch": 5.13, + "grad_norm": 1.7283798456192017, + "learning_rate": 9.128643216080402e-06, + "loss": 0.0266, + "step": 9175 + }, + { + "epoch": 5.15, + "grad_norm": 1.737438678741455, + "learning_rate": 9.126130653266332e-06, + "loss": 0.0251, + "step": 9200 + }, + { + "epoch": 5.16, + "grad_norm": 1.9309884309768677, + "learning_rate": 9.123618090452263e-06, + "loss": 0.0266, + "step": 9225 + }, + { + "epoch": 5.17, + "grad_norm": 1.7976386547088623, + "learning_rate": 9.121105527638192e-06, + "loss": 0.0278, + "step": 9250 + }, + { + "epoch": 5.19, + "grad_norm": 2.064976930618286, + "learning_rate": 9.118592964824121e-06, + "loss": 0.0256, + "step": 9275 + }, + { + "epoch": 5.2, + "grad_norm": 2.112159490585327, + "learning_rate": 9.11608040201005e-06, + "loss": 0.0287, + "step": 9300 + }, + { + "epoch": 5.22, + "grad_norm": 1.832209825515747, + "learning_rate": 9.11356783919598e-06, + "loss": 0.0281, + "step": 9325 + }, + { + "epoch": 5.23, + "grad_norm": 1.8899515867233276, + "learning_rate": 9.111055276381911e-06, + "loss": 0.0279, + "step": 9350 + }, + { + "epoch": 5.24, + "grad_norm": 1.475650429725647, + "learning_rate": 9.10854271356784e-06, + "loss": 0.0254, + "step": 9375 + }, + { + "epoch": 5.26, + "grad_norm": 1.8875083923339844, + "learning_rate": 9.10603015075377e-06, + "loss": 0.0273, + "step": 9400 + }, + { + "epoch": 5.27, + "grad_norm": 2.1313300132751465, + "learning_rate": 9.1035175879397e-06, + "loss": 0.0282, + "step": 9425 + }, + { + "epoch": 5.29, + "grad_norm": 1.6776704788208008, + "learning_rate": 9.101005025125628e-06, + "loss": 0.0267, + "step": 9450 + }, + { + "epoch": 5.3, + "grad_norm": 1.878529667854309, + "learning_rate": 9.09849246231156e-06, + "loss": 0.0288, + "step": 9475 + }, + { + "epoch": 5.31, + "grad_norm": 2.2397854328155518, + "learning_rate": 9.095979899497489e-06, + "loss": 0.0282, + "step": 9500 + }, + { + "epoch": 5.33, + "grad_norm": 2.1173501014709473, + "learning_rate": 9.093467336683418e-06, + "loss": 0.0266, + "step": 9525 + }, + { + "epoch": 5.34, + "grad_norm": 1.9220620393753052, + "learning_rate": 9.090954773869347e-06, + "loss": 0.0261, + "step": 9550 + }, + { + "epoch": 5.36, + "grad_norm": 1.7095258235931396, + "learning_rate": 9.088442211055277e-06, + "loss": 0.0291, + "step": 9575 + }, + { + "epoch": 5.37, + "grad_norm": 1.740257978439331, + "learning_rate": 9.085929648241206e-06, + "loss": 0.0276, + "step": 9600 + }, + { + "epoch": 5.38, + "grad_norm": 2.070951461791992, + "learning_rate": 9.083417085427137e-06, + "loss": 0.0291, + "step": 9625 + }, + { + "epoch": 5.4, + "grad_norm": 2.3475615978240967, + "learning_rate": 9.080904522613066e-06, + "loss": 0.0304, + "step": 9650 + }, + { + "epoch": 5.41, + "grad_norm": 1.9384034872055054, + "learning_rate": 9.078391959798996e-06, + "loss": 0.0292, + "step": 9675 + }, + { + "epoch": 5.43, + "grad_norm": 2.043879270553589, + "learning_rate": 9.075879396984927e-06, + "loss": 0.027, + "step": 9700 + }, + { + "epoch": 5.44, + "grad_norm": 1.9742095470428467, + "learning_rate": 9.073366834170854e-06, + "loss": 0.0276, + "step": 9725 + }, + { + "epoch": 5.45, + "grad_norm": 2.001415967941284, + "learning_rate": 9.070854271356785e-06, + "loss": 0.0276, + "step": 9750 + }, + { + "epoch": 5.47, + "grad_norm": 1.732538104057312, + "learning_rate": 9.068341708542715e-06, + "loss": 0.0275, + "step": 9775 + }, + { + "epoch": 5.48, + "grad_norm": 1.964091181755066, + "learning_rate": 9.065829145728644e-06, + "loss": 0.0287, + "step": 9800 + }, + { + "epoch": 5.49, + "grad_norm": 1.5566002130508423, + "learning_rate": 9.063316582914573e-06, + "loss": 0.027, + "step": 9825 + }, + { + "epoch": 5.51, + "grad_norm": 1.5997827053070068, + "learning_rate": 9.060804020100502e-06, + "loss": 0.0263, + "step": 9850 + }, + { + "epoch": 5.52, + "grad_norm": 1.891287088394165, + "learning_rate": 9.058291457286433e-06, + "loss": 0.0265, + "step": 9875 + }, + { + "epoch": 5.54, + "grad_norm": 2.0822737216949463, + "learning_rate": 9.055778894472363e-06, + "loss": 0.0271, + "step": 9900 + }, + { + "epoch": 5.55, + "grad_norm": 1.3018207550048828, + "learning_rate": 9.053266331658292e-06, + "loss": 0.0266, + "step": 9925 + }, + { + "epoch": 5.56, + "grad_norm": 2.2431700229644775, + "learning_rate": 9.050753768844221e-06, + "loss": 0.0297, + "step": 9950 + }, + { + "epoch": 5.58, + "grad_norm": 2.4056737422943115, + "learning_rate": 9.048241206030152e-06, + "loss": 0.028, + "step": 9975 + }, + { + "epoch": 5.59, + "grad_norm": 1.7971937656402588, + "learning_rate": 9.04572864321608e-06, + "loss": 0.0295, + "step": 10000 + }, + { + "epoch": 5.59, + "eval_loss": 0.12024379521608353, + "eval_runtime": 665.5245, + "eval_samples_per_second": 2.123, + "eval_steps_per_second": 2.123, + "eval_wer": 14.79776767734883, + "step": 10000 + }, + { + "epoch": 5.61, + "grad_norm": 1.7843098640441895, + "learning_rate": 9.043216080402011e-06, + "loss": 0.0281, + "step": 10025 + }, + { + "epoch": 5.62, + "grad_norm": 2.1457159519195557, + "learning_rate": 9.04070351758794e-06, + "loss": 0.0281, + "step": 10050 + }, + { + "epoch": 5.63, + "grad_norm": 1.8894964456558228, + "learning_rate": 9.03819095477387e-06, + "loss": 0.0269, + "step": 10075 + }, + { + "epoch": 5.65, + "grad_norm": 2.022164821624756, + "learning_rate": 9.0356783919598e-06, + "loss": 0.0283, + "step": 10100 + }, + { + "epoch": 5.66, + "grad_norm": 2.125027894973755, + "learning_rate": 9.033165829145728e-06, + "loss": 0.0297, + "step": 10125 + }, + { + "epoch": 5.68, + "grad_norm": 1.742487907409668, + "learning_rate": 9.03065326633166e-06, + "loss": 0.0284, + "step": 10150 + }, + { + "epoch": 5.69, + "grad_norm": 2.119999647140503, + "learning_rate": 9.028140703517589e-06, + "loss": 0.0291, + "step": 10175 + }, + { + "epoch": 5.7, + "grad_norm": 1.4654369354248047, + "learning_rate": 9.025628140703518e-06, + "loss": 0.0269, + "step": 10200 + }, + { + "epoch": 5.72, + "grad_norm": 1.3925327062606812, + "learning_rate": 9.023115577889447e-06, + "loss": 0.029, + "step": 10225 + }, + { + "epoch": 5.73, + "grad_norm": 1.7220244407653809, + "learning_rate": 9.020603015075378e-06, + "loss": 0.0291, + "step": 10250 + }, + { + "epoch": 5.75, + "grad_norm": 1.9362173080444336, + "learning_rate": 9.018090452261308e-06, + "loss": 0.0282, + "step": 10275 + }, + { + "epoch": 5.76, + "grad_norm": 1.285713791847229, + "learning_rate": 9.015577889447237e-06, + "loss": 0.0286, + "step": 10300 + }, + { + "epoch": 5.77, + "grad_norm": 1.5760838985443115, + "learning_rate": 9.013065326633166e-06, + "loss": 0.0286, + "step": 10325 + }, + { + "epoch": 5.79, + "grad_norm": 1.6609561443328857, + "learning_rate": 9.010552763819096e-06, + "loss": 0.0289, + "step": 10350 + }, + { + "epoch": 5.8, + "grad_norm": 2.0139236450195312, + "learning_rate": 9.008040201005027e-06, + "loss": 0.029, + "step": 10375 + }, + { + "epoch": 5.82, + "grad_norm": 2.279127597808838, + "learning_rate": 9.005527638190954e-06, + "loss": 0.0272, + "step": 10400 + }, + { + "epoch": 5.83, + "grad_norm": 1.6756246089935303, + "learning_rate": 9.003015075376885e-06, + "loss": 0.029, + "step": 10425 + }, + { + "epoch": 5.84, + "grad_norm": 1.296057939529419, + "learning_rate": 9.000502512562815e-06, + "loss": 0.0279, + "step": 10450 + }, + { + "epoch": 5.86, + "grad_norm": 1.8277320861816406, + "learning_rate": 8.997989949748744e-06, + "loss": 0.028, + "step": 10475 + }, + { + "epoch": 5.87, + "grad_norm": 1.6914379596710205, + "learning_rate": 8.995477386934675e-06, + "loss": 0.0276, + "step": 10500 + }, + { + "epoch": 5.89, + "grad_norm": 2.0595273971557617, + "learning_rate": 8.992964824120604e-06, + "loss": 0.0286, + "step": 10525 + }, + { + "epoch": 5.9, + "grad_norm": 2.017573118209839, + "learning_rate": 8.990452261306534e-06, + "loss": 0.0295, + "step": 10550 + }, + { + "epoch": 5.91, + "grad_norm": 2.0280215740203857, + "learning_rate": 8.987939698492463e-06, + "loss": 0.028, + "step": 10575 + }, + { + "epoch": 5.93, + "grad_norm": 1.3270870447158813, + "learning_rate": 8.985427135678392e-06, + "loss": 0.0273, + "step": 10600 + }, + { + "epoch": 5.94, + "grad_norm": 2.011035919189453, + "learning_rate": 8.982914572864322e-06, + "loss": 0.0266, + "step": 10625 + }, + { + "epoch": 5.96, + "grad_norm": 1.9440771341323853, + "learning_rate": 8.980402010050253e-06, + "loss": 0.0286, + "step": 10650 + }, + { + "epoch": 5.97, + "grad_norm": 2.374587059020996, + "learning_rate": 8.977889447236182e-06, + "loss": 0.0284, + "step": 10675 + }, + { + "epoch": 5.98, + "grad_norm": 1.806307077407837, + "learning_rate": 8.975376884422111e-06, + "loss": 0.0263, + "step": 10700 + }, + { + "epoch": 6.0, + "grad_norm": 2.0423974990844727, + "learning_rate": 8.97286432160804e-06, + "loss": 0.0281, + "step": 10725 + }, + { + "epoch": 6.01, + "grad_norm": 1.3818082809448242, + "learning_rate": 8.97035175879397e-06, + "loss": 0.0199, + "step": 10750 + }, + { + "epoch": 6.03, + "grad_norm": 1.9150089025497437, + "learning_rate": 8.967839195979901e-06, + "loss": 0.0158, + "step": 10775 + }, + { + "epoch": 6.04, + "grad_norm": 1.4514268636703491, + "learning_rate": 8.96532663316583e-06, + "loss": 0.0178, + "step": 10800 + }, + { + "epoch": 6.05, + "grad_norm": 1.6171258687973022, + "learning_rate": 8.96281407035176e-06, + "loss": 0.0167, + "step": 10825 + }, + { + "epoch": 6.07, + "grad_norm": 1.1259238719940186, + "learning_rate": 8.960301507537689e-06, + "loss": 0.0172, + "step": 10850 + }, + { + "epoch": 6.08, + "grad_norm": 1.3454668521881104, + "learning_rate": 8.957788944723618e-06, + "loss": 0.0192, + "step": 10875 + }, + { + "epoch": 6.1, + "grad_norm": 1.2800912857055664, + "learning_rate": 8.95527638190955e-06, + "loss": 0.018, + "step": 10900 + }, + { + "epoch": 6.11, + "grad_norm": 1.3140242099761963, + "learning_rate": 8.952763819095479e-06, + "loss": 0.0192, + "step": 10925 + }, + { + "epoch": 6.12, + "grad_norm": 1.465201497077942, + "learning_rate": 8.950251256281408e-06, + "loss": 0.0187, + "step": 10950 + }, + { + "epoch": 6.14, + "grad_norm": 1.5963821411132812, + "learning_rate": 8.947738693467337e-06, + "loss": 0.0191, + "step": 10975 + }, + { + "epoch": 6.15, + "grad_norm": 2.0704030990600586, + "learning_rate": 8.945226130653267e-06, + "loss": 0.0178, + "step": 11000 + }, + { + "epoch": 6.15, + "eval_loss": 0.12868213653564453, + "eval_runtime": 704.2347, + "eval_samples_per_second": 2.006, + "eval_steps_per_second": 2.006, + "eval_wer": 14.77475404176975, + "step": 11000 + }, + { + "epoch": 6.17, + "grad_norm": 1.6333519220352173, + "learning_rate": 8.942713567839196e-06, + "loss": 0.0179, + "step": 11025 + }, + { + "epoch": 6.18, + "grad_norm": 1.4811276197433472, + "learning_rate": 8.940201005025127e-06, + "loss": 0.0163, + "step": 11050 + }, + { + "epoch": 6.19, + "grad_norm": 1.6339362859725952, + "learning_rate": 8.937688442211056e-06, + "loss": 0.0182, + "step": 11075 + }, + { + "epoch": 6.21, + "grad_norm": 1.7332963943481445, + "learning_rate": 8.935175879396986e-06, + "loss": 0.0184, + "step": 11100 + }, + { + "epoch": 6.22, + "grad_norm": 1.4031999111175537, + "learning_rate": 8.932663316582915e-06, + "loss": 0.0179, + "step": 11125 + }, + { + "epoch": 6.24, + "grad_norm": 1.7487659454345703, + "learning_rate": 8.930150753768844e-06, + "loss": 0.0172, + "step": 11150 + }, + { + "epoch": 6.25, + "grad_norm": 1.501487135887146, + "learning_rate": 8.927638190954775e-06, + "loss": 0.0186, + "step": 11175 + }, + { + "epoch": 6.26, + "grad_norm": 1.9125261306762695, + "learning_rate": 8.925125628140705e-06, + "loss": 0.0191, + "step": 11200 + }, + { + "epoch": 6.28, + "grad_norm": 1.5762921571731567, + "learning_rate": 8.922713567839196e-06, + "loss": 0.018, + "step": 11225 + }, + { + "epoch": 6.29, + "grad_norm": 1.7063307762145996, + "learning_rate": 8.920201005025127e-06, + "loss": 0.0178, + "step": 11250 + }, + { + "epoch": 6.31, + "grad_norm": 1.354273796081543, + "learning_rate": 8.917688442211055e-06, + "loss": 0.0182, + "step": 11275 + }, + { + "epoch": 6.32, + "grad_norm": 1.9617756605148315, + "learning_rate": 8.915175879396986e-06, + "loss": 0.019, + "step": 11300 + }, + { + "epoch": 6.33, + "grad_norm": 1.96258544921875, + "learning_rate": 8.912663316582915e-06, + "loss": 0.018, + "step": 11325 + }, + { + "epoch": 6.35, + "grad_norm": 1.7744132280349731, + "learning_rate": 8.910150753768844e-06, + "loss": 0.0196, + "step": 11350 + }, + { + "epoch": 6.36, + "grad_norm": 1.3027286529541016, + "learning_rate": 8.907638190954775e-06, + "loss": 0.0178, + "step": 11375 + }, + { + "epoch": 6.38, + "grad_norm": 1.6519018411636353, + "learning_rate": 8.905125628140705e-06, + "loss": 0.0184, + "step": 11400 + }, + { + "epoch": 6.39, + "grad_norm": 2.334596633911133, + "learning_rate": 8.902613065326634e-06, + "loss": 0.0196, + "step": 11425 + }, + { + "epoch": 6.4, + "grad_norm": 1.196197509765625, + "learning_rate": 8.900100502512563e-06, + "loss": 0.0176, + "step": 11450 + }, + { + "epoch": 6.42, + "grad_norm": 1.834585189819336, + "learning_rate": 8.897587939698493e-06, + "loss": 0.0185, + "step": 11475 + }, + { + "epoch": 6.43, + "grad_norm": 1.4259742498397827, + "learning_rate": 8.895075376884424e-06, + "loss": 0.0177, + "step": 11500 + }, + { + "epoch": 6.45, + "grad_norm": 1.7768261432647705, + "learning_rate": 8.892562814070353e-06, + "loss": 0.0179, + "step": 11525 + }, + { + "epoch": 6.46, + "grad_norm": 1.488857388496399, + "learning_rate": 8.890050251256282e-06, + "loss": 0.0193, + "step": 11550 + }, + { + "epoch": 6.47, + "grad_norm": 1.6345274448394775, + "learning_rate": 8.887537688442212e-06, + "loss": 0.0172, + "step": 11575 + }, + { + "epoch": 6.49, + "grad_norm": 1.7173552513122559, + "learning_rate": 8.885025125628141e-06, + "loss": 0.0192, + "step": 11600 + }, + { + "epoch": 6.5, + "grad_norm": 1.6805514097213745, + "learning_rate": 8.88251256281407e-06, + "loss": 0.0182, + "step": 11625 + }, + { + "epoch": 6.52, + "grad_norm": 2.0110466480255127, + "learning_rate": 8.880000000000001e-06, + "loss": 0.0171, + "step": 11650 + }, + { + "epoch": 6.53, + "grad_norm": 1.8901313543319702, + "learning_rate": 8.87748743718593e-06, + "loss": 0.018, + "step": 11675 + }, + { + "epoch": 6.54, + "grad_norm": 1.9096134901046753, + "learning_rate": 8.87497487437186e-06, + "loss": 0.0193, + "step": 11700 + }, + { + "epoch": 6.56, + "grad_norm": 2.06321120262146, + "learning_rate": 8.87246231155779e-06, + "loss": 0.0195, + "step": 11725 + }, + { + "epoch": 6.57, + "grad_norm": 2.2982308864593506, + "learning_rate": 8.869949748743719e-06, + "loss": 0.0189, + "step": 11750 + }, + { + "epoch": 6.59, + "grad_norm": 1.319758653640747, + "learning_rate": 8.86743718592965e-06, + "loss": 0.0173, + "step": 11775 + }, + { + "epoch": 6.6, + "grad_norm": 1.8411797285079956, + "learning_rate": 8.864924623115579e-06, + "loss": 0.0191, + "step": 11800 + }, + { + "epoch": 6.61, + "grad_norm": 1.7386378049850464, + "learning_rate": 8.862412060301508e-06, + "loss": 0.0181, + "step": 11825 + }, + { + "epoch": 6.63, + "grad_norm": 1.4454030990600586, + "learning_rate": 8.859899497487438e-06, + "loss": 0.0189, + "step": 11850 + }, + { + "epoch": 6.64, + "grad_norm": 1.4748815298080444, + "learning_rate": 8.857386934673367e-06, + "loss": 0.0187, + "step": 11875 + }, + { + "epoch": 6.66, + "grad_norm": 1.5114494562149048, + "learning_rate": 8.854874371859296e-06, + "loss": 0.0188, + "step": 11900 + }, + { + "epoch": 6.67, + "grad_norm": 1.6016466617584229, + "learning_rate": 8.852361809045227e-06, + "loss": 0.0178, + "step": 11925 + }, + { + "epoch": 6.68, + "grad_norm": 1.4686626195907593, + "learning_rate": 8.849849246231157e-06, + "loss": 0.0179, + "step": 11950 + }, + { + "epoch": 6.7, + "grad_norm": 1.7126622200012207, + "learning_rate": 8.847336683417086e-06, + "loss": 0.0184, + "step": 11975 + }, + { + "epoch": 6.71, + "grad_norm": 1.4847170114517212, + "learning_rate": 8.844824120603015e-06, + "loss": 0.0186, + "step": 12000 + }, + { + "epoch": 6.71, + "eval_loss": 0.13346508145332336, + "eval_runtime": 663.3782, + "eval_samples_per_second": 2.13, + "eval_steps_per_second": 2.13, + "eval_wer": 14.76900063287498, + "step": 12000 + }, + { + "epoch": 6.73, + "grad_norm": 1.6814005374908447, + "learning_rate": 8.842311557788945e-06, + "loss": 0.0163, + "step": 12025 + }, + { + "epoch": 6.74, + "grad_norm": 1.2319942712783813, + "learning_rate": 8.839798994974876e-06, + "loss": 0.0186, + "step": 12050 + }, + { + "epoch": 6.75, + "grad_norm": 1.7550262212753296, + "learning_rate": 8.837286432160805e-06, + "loss": 0.0178, + "step": 12075 + }, + { + "epoch": 6.77, + "grad_norm": 1.8842459917068481, + "learning_rate": 8.834773869346734e-06, + "loss": 0.018, + "step": 12100 + }, + { + "epoch": 6.78, + "grad_norm": 1.9291971921920776, + "learning_rate": 8.832261306532665e-06, + "loss": 0.0182, + "step": 12125 + }, + { + "epoch": 6.8, + "grad_norm": 1.5719925165176392, + "learning_rate": 8.829748743718593e-06, + "loss": 0.0193, + "step": 12150 + }, + { + "epoch": 6.81, + "grad_norm": 1.580599308013916, + "learning_rate": 8.827236180904524e-06, + "loss": 0.02, + "step": 12175 + }, + { + "epoch": 6.82, + "grad_norm": 2.0825822353363037, + "learning_rate": 8.824723618090453e-06, + "loss": 0.0189, + "step": 12200 + }, + { + "epoch": 6.84, + "grad_norm": 2.117241382598877, + "learning_rate": 8.822211055276383e-06, + "loss": 0.0207, + "step": 12225 + }, + { + "epoch": 6.85, + "grad_norm": 1.551885962486267, + "learning_rate": 8.819698492462312e-06, + "loss": 0.0189, + "step": 12250 + }, + { + "epoch": 6.87, + "grad_norm": 2.4384641647338867, + "learning_rate": 8.817185929648241e-06, + "loss": 0.0187, + "step": 12275 + }, + { + "epoch": 6.88, + "grad_norm": 1.7111737728118896, + "learning_rate": 8.81467336683417e-06, + "loss": 0.0189, + "step": 12300 + }, + { + "epoch": 6.89, + "grad_norm": 1.6200928688049316, + "learning_rate": 8.812160804020102e-06, + "loss": 0.0196, + "step": 12325 + }, + { + "epoch": 6.91, + "grad_norm": 1.7117749452590942, + "learning_rate": 8.809648241206031e-06, + "loss": 0.0185, + "step": 12350 + }, + { + "epoch": 6.92, + "grad_norm": 1.9618446826934814, + "learning_rate": 8.80713567839196e-06, + "loss": 0.0171, + "step": 12375 + }, + { + "epoch": 6.94, + "grad_norm": 1.3066211938858032, + "learning_rate": 8.804623115577891e-06, + "loss": 0.0184, + "step": 12400 + }, + { + "epoch": 6.95, + "grad_norm": 1.4500553607940674, + "learning_rate": 8.802110552763819e-06, + "loss": 0.0195, + "step": 12425 + }, + { + "epoch": 6.96, + "grad_norm": 1.8370364904403687, + "learning_rate": 8.79959798994975e-06, + "loss": 0.0174, + "step": 12450 + }, + { + "epoch": 6.98, + "grad_norm": 2.0185766220092773, + "learning_rate": 8.79708542713568e-06, + "loss": 0.0182, + "step": 12475 + }, + { + "epoch": 6.99, + "grad_norm": 1.6966073513031006, + "learning_rate": 8.794572864321609e-06, + "loss": 0.021, + "step": 12500 + }, + { + "epoch": 7.01, + "grad_norm": 1.1646565198898315, + "learning_rate": 8.792060301507538e-06, + "loss": 0.0166, + "step": 12525 + }, + { + "epoch": 7.02, + "grad_norm": 0.9114201664924622, + "learning_rate": 8.789547738693467e-06, + "loss": 0.0111, + "step": 12550 + }, + { + "epoch": 7.03, + "grad_norm": 1.108992099761963, + "learning_rate": 8.787035175879398e-06, + "loss": 0.0107, + "step": 12575 + }, + { + "epoch": 7.05, + "grad_norm": 1.3684626817703247, + "learning_rate": 8.784522613065328e-06, + "loss": 0.0106, + "step": 12600 + }, + { + "epoch": 7.06, + "grad_norm": 1.0961270332336426, + "learning_rate": 8.782010050251257e-06, + "loss": 0.0108, + "step": 12625 + }, + { + "epoch": 7.07, + "grad_norm": 1.5187110900878906, + "learning_rate": 8.779497487437186e-06, + "loss": 0.0129, + "step": 12650 + }, + { + "epoch": 7.09, + "grad_norm": 1.5021947622299194, + "learning_rate": 8.776984924623117e-06, + "loss": 0.0115, + "step": 12675 + }, + { + "epoch": 7.1, + "grad_norm": 1.335440993309021, + "learning_rate": 8.774472361809045e-06, + "loss": 0.0109, + "step": 12700 + }, + { + "epoch": 7.12, + "grad_norm": 1.280112624168396, + "learning_rate": 8.771959798994976e-06, + "loss": 0.0129, + "step": 12725 + }, + { + "epoch": 7.13, + "grad_norm": 2.075493097305298, + "learning_rate": 8.769447236180905e-06, + "loss": 0.0119, + "step": 12750 + }, + { + "epoch": 7.14, + "grad_norm": 1.2106783390045166, + "learning_rate": 8.766934673366834e-06, + "loss": 0.012, + "step": 12775 + }, + { + "epoch": 7.16, + "grad_norm": 1.5772730112075806, + "learning_rate": 8.764422110552765e-06, + "loss": 0.0112, + "step": 12800 + }, + { + "epoch": 7.17, + "grad_norm": 1.404213786125183, + "learning_rate": 8.761909547738693e-06, + "loss": 0.0121, + "step": 12825 + }, + { + "epoch": 7.19, + "grad_norm": 1.8210283517837524, + "learning_rate": 8.759396984924624e-06, + "loss": 0.0116, + "step": 12850 + }, + { + "epoch": 7.2, + "grad_norm": 1.0989882946014404, + "learning_rate": 8.756884422110553e-06, + "loss": 0.0112, + "step": 12875 + }, + { + "epoch": 7.21, + "grad_norm": 1.5659524202346802, + "learning_rate": 8.754371859296483e-06, + "loss": 0.0104, + "step": 12900 + }, + { + "epoch": 7.23, + "grad_norm": 1.4689717292785645, + "learning_rate": 8.751859296482412e-06, + "loss": 0.0123, + "step": 12925 + }, + { + "epoch": 7.24, + "grad_norm": 1.8746931552886963, + "learning_rate": 8.749346733668343e-06, + "loss": 0.012, + "step": 12950 + }, + { + "epoch": 7.26, + "grad_norm": 1.7436597347259521, + "learning_rate": 8.746834170854272e-06, + "loss": 0.0123, + "step": 12975 + }, + { + "epoch": 7.27, + "grad_norm": 1.497960090637207, + "learning_rate": 8.744321608040202e-06, + "loss": 0.0112, + "step": 13000 + }, + { + "epoch": 7.27, + "eval_loss": 0.14194945991039276, + "eval_runtime": 664.5525, + "eval_samples_per_second": 2.126, + "eval_steps_per_second": 2.126, + "eval_wer": 14.889822219665152, + "step": 13000 + }, + { + "epoch": 7.28, + "grad_norm": 1.2526633739471436, + "learning_rate": 8.741809045226131e-06, + "loss": 0.0119, + "step": 13025 + }, + { + "epoch": 7.3, + "grad_norm": 1.6523659229278564, + "learning_rate": 8.73929648241206e-06, + "loss": 0.0116, + "step": 13050 + }, + { + "epoch": 7.31, + "grad_norm": 1.061896800994873, + "learning_rate": 8.736783919597991e-06, + "loss": 0.012, + "step": 13075 + }, + { + "epoch": 7.33, + "grad_norm": 3.0462448596954346, + "learning_rate": 8.734271356783919e-06, + "loss": 0.012, + "step": 13100 + }, + { + "epoch": 7.34, + "grad_norm": 1.7102919816970825, + "learning_rate": 8.73175879396985e-06, + "loss": 0.0116, + "step": 13125 + }, + { + "epoch": 7.35, + "grad_norm": 1.8242716789245605, + "learning_rate": 8.72924623115578e-06, + "loss": 0.0129, + "step": 13150 + }, + { + "epoch": 7.37, + "grad_norm": 1.769819736480713, + "learning_rate": 8.726733668341709e-06, + "loss": 0.0133, + "step": 13175 + }, + { + "epoch": 7.38, + "grad_norm": 1.656846523284912, + "learning_rate": 8.72422110552764e-06, + "loss": 0.0128, + "step": 13200 + }, + { + "epoch": 7.4, + "grad_norm": 1.3304965496063232, + "learning_rate": 8.721708542713569e-06, + "loss": 0.0124, + "step": 13225 + }, + { + "epoch": 7.41, + "grad_norm": 1.6686651706695557, + "learning_rate": 8.719195979899498e-06, + "loss": 0.0124, + "step": 13250 + }, + { + "epoch": 7.42, + "grad_norm": 1.3314317464828491, + "learning_rate": 8.716683417085428e-06, + "loss": 0.011, + "step": 13275 + }, + { + "epoch": 7.44, + "grad_norm": 1.848137617111206, + "learning_rate": 8.714170854271357e-06, + "loss": 0.0126, + "step": 13300 + }, + { + "epoch": 7.45, + "grad_norm": 1.609485387802124, + "learning_rate": 8.711658291457286e-06, + "loss": 0.0114, + "step": 13325 + }, + { + "epoch": 7.47, + "grad_norm": 1.572728157043457, + "learning_rate": 8.709145728643217e-06, + "loss": 0.0116, + "step": 13350 + }, + { + "epoch": 7.48, + "grad_norm": 1.483894944190979, + "learning_rate": 8.706633165829147e-06, + "loss": 0.0114, + "step": 13375 + }, + { + "epoch": 7.49, + "grad_norm": 1.8998823165893555, + "learning_rate": 8.704120603015076e-06, + "loss": 0.0134, + "step": 13400 + }, + { + "epoch": 7.51, + "grad_norm": 1.0379048585891724, + "learning_rate": 8.701608040201005e-06, + "loss": 0.012, + "step": 13425 + }, + { + "epoch": 7.52, + "grad_norm": 1.3812390565872192, + "learning_rate": 8.699095477386935e-06, + "loss": 0.012, + "step": 13450 + }, + { + "epoch": 7.54, + "grad_norm": 1.185532569885254, + "learning_rate": 8.696582914572866e-06, + "loss": 0.0122, + "step": 13475 + }, + { + "epoch": 7.55, + "grad_norm": 1.5508016347885132, + "learning_rate": 8.694170854271357e-06, + "loss": 0.0123, + "step": 13500 + }, + { + "epoch": 7.56, + "grad_norm": 1.6121983528137207, + "learning_rate": 8.691658291457287e-06, + "loss": 0.0118, + "step": 13525 + }, + { + "epoch": 7.58, + "grad_norm": 1.1354798078536987, + "learning_rate": 8.689145728643218e-06, + "loss": 0.0119, + "step": 13550 + }, + { + "epoch": 7.59, + "grad_norm": 1.305696725845337, + "learning_rate": 8.686633165829147e-06, + "loss": 0.0137, + "step": 13575 + }, + { + "epoch": 7.61, + "grad_norm": 1.2802340984344482, + "learning_rate": 8.684120603015076e-06, + "loss": 0.0122, + "step": 13600 + }, + { + "epoch": 7.62, + "grad_norm": 1.4077941179275513, + "learning_rate": 8.681608040201006e-06, + "loss": 0.0113, + "step": 13625 + }, + { + "epoch": 7.63, + "grad_norm": 1.565801739692688, + "learning_rate": 8.679095477386935e-06, + "loss": 0.0117, + "step": 13650 + }, + { + "epoch": 7.65, + "grad_norm": 1.7073423862457275, + "learning_rate": 8.676582914572866e-06, + "loss": 0.0126, + "step": 13675 + }, + { + "epoch": 7.66, + "grad_norm": 1.4420313835144043, + "learning_rate": 8.674070351758794e-06, + "loss": 0.0125, + "step": 13700 + }, + { + "epoch": 7.68, + "grad_norm": 1.351138949394226, + "learning_rate": 8.671557788944725e-06, + "loss": 0.0115, + "step": 13725 + }, + { + "epoch": 7.69, + "grad_norm": 1.2214667797088623, + "learning_rate": 8.669045226130654e-06, + "loss": 0.0119, + "step": 13750 + }, + { + "epoch": 7.7, + "grad_norm": 1.4725024700164795, + "learning_rate": 8.666532663316583e-06, + "loss": 0.0141, + "step": 13775 + }, + { + "epoch": 7.72, + "grad_norm": 1.6302193403244019, + "learning_rate": 8.664020100502514e-06, + "loss": 0.0129, + "step": 13800 + }, + { + "epoch": 7.73, + "grad_norm": 2.063391923904419, + "learning_rate": 8.661507537688444e-06, + "loss": 0.0141, + "step": 13825 + }, + { + "epoch": 7.75, + "grad_norm": 1.9950083494186401, + "learning_rate": 8.658994974874373e-06, + "loss": 0.0126, + "step": 13850 + }, + { + "epoch": 7.76, + "grad_norm": 1.2398418188095093, + "learning_rate": 8.656482412060302e-06, + "loss": 0.0123, + "step": 13875 + }, + { + "epoch": 7.77, + "grad_norm": 0.8326249718666077, + "learning_rate": 8.653969849246231e-06, + "loss": 0.0118, + "step": 13900 + }, + { + "epoch": 7.79, + "grad_norm": 1.746744155883789, + "learning_rate": 8.65145728643216e-06, + "loss": 0.0125, + "step": 13925 + }, + { + "epoch": 7.8, + "grad_norm": 1.693219780921936, + "learning_rate": 8.648944723618092e-06, + "loss": 0.0129, + "step": 13950 + }, + { + "epoch": 7.82, + "grad_norm": 1.6507676839828491, + "learning_rate": 8.64643216080402e-06, + "loss": 0.0126, + "step": 13975 + }, + { + "epoch": 7.83, + "grad_norm": 1.4563243389129639, + "learning_rate": 8.64391959798995e-06, + "loss": 0.0117, + "step": 14000 + }, + { + "epoch": 7.83, + "eval_loss": 0.14970333874225616, + "eval_runtime": 665.2848, + "eval_samples_per_second": 2.124, + "eval_steps_per_second": 2.124, + "eval_wer": 14.740233588401127, + "step": 14000 + }, + { + "epoch": 7.84, + "grad_norm": 1.6021775007247925, + "learning_rate": 8.64140703517588e-06, + "loss": 0.0128, + "step": 14025 + }, + { + "epoch": 7.86, + "grad_norm": 1.8711954355239868, + "learning_rate": 8.638894472361809e-06, + "loss": 0.0128, + "step": 14050 + }, + { + "epoch": 7.87, + "grad_norm": 1.4479572772979736, + "learning_rate": 8.63638190954774e-06, + "loss": 0.0116, + "step": 14075 + }, + { + "epoch": 7.89, + "grad_norm": 1.211432695388794, + "learning_rate": 8.63386934673367e-06, + "loss": 0.0125, + "step": 14100 + }, + { + "epoch": 7.9, + "grad_norm": 2.320456027984619, + "learning_rate": 8.631356783919599e-06, + "loss": 0.012, + "step": 14125 + }, + { + "epoch": 7.91, + "grad_norm": 4.528357982635498, + "learning_rate": 8.628844221105528e-06, + "loss": 0.0119, + "step": 14150 + }, + { + "epoch": 7.93, + "grad_norm": 1.7860376834869385, + "learning_rate": 8.626331658291457e-06, + "loss": 0.0122, + "step": 14175 + }, + { + "epoch": 7.94, + "grad_norm": 1.7701290845870972, + "learning_rate": 8.623819095477388e-06, + "loss": 0.0125, + "step": 14200 + }, + { + "epoch": 7.96, + "grad_norm": 1.682796597480774, + "learning_rate": 8.621306532663318e-06, + "loss": 0.013, + "step": 14225 + }, + { + "epoch": 7.97, + "grad_norm": 2.3187553882598877, + "learning_rate": 8.618793969849247e-06, + "loss": 0.0139, + "step": 14250 + }, + { + "epoch": 7.98, + "grad_norm": 1.2007170915603638, + "learning_rate": 8.616281407035176e-06, + "loss": 0.0117, + "step": 14275 + }, + { + "epoch": 8.0, + "grad_norm": 1.4833934307098389, + "learning_rate": 8.613768844221106e-06, + "loss": 0.0125, + "step": 14300 + }, + { + "epoch": 8.01, + "grad_norm": 0.7548562288284302, + "learning_rate": 8.611256281407035e-06, + "loss": 0.0073, + "step": 14325 + }, + { + "epoch": 8.03, + "grad_norm": 1.2205129861831665, + "learning_rate": 8.608743718592966e-06, + "loss": 0.0071, + "step": 14350 + }, + { + "epoch": 8.04, + "grad_norm": 1.4187291860580444, + "learning_rate": 8.606231155778895e-06, + "loss": 0.0073, + "step": 14375 + }, + { + "epoch": 8.05, + "grad_norm": 0.9094333052635193, + "learning_rate": 8.603718592964825e-06, + "loss": 0.0071, + "step": 14400 + }, + { + "epoch": 8.07, + "grad_norm": 0.8845241069793701, + "learning_rate": 8.601206030150756e-06, + "loss": 0.008, + "step": 14425 + }, + { + "epoch": 8.08, + "grad_norm": 0.8561604619026184, + "learning_rate": 8.598693467336683e-06, + "loss": 0.0069, + "step": 14450 + }, + { + "epoch": 8.1, + "grad_norm": 0.847540557384491, + "learning_rate": 8.596180904522614e-06, + "loss": 0.0064, + "step": 14475 + }, + { + "epoch": 8.11, + "grad_norm": 1.117789387702942, + "learning_rate": 8.593668341708544e-06, + "loss": 0.0087, + "step": 14500 + }, + { + "epoch": 8.12, + "grad_norm": 1.2738511562347412, + "learning_rate": 8.591155778894473e-06, + "loss": 0.0083, + "step": 14525 + }, + { + "epoch": 8.14, + "grad_norm": 1.074594497680664, + "learning_rate": 8.588643216080402e-06, + "loss": 0.0079, + "step": 14550 + }, + { + "epoch": 8.15, + "grad_norm": 1.515325903892517, + "learning_rate": 8.586130653266332e-06, + "loss": 0.0072, + "step": 14575 + }, + { + "epoch": 8.17, + "grad_norm": 1.5327142477035522, + "learning_rate": 8.583618090452261e-06, + "loss": 0.0073, + "step": 14600 + }, + { + "epoch": 8.18, + "grad_norm": 1.5838332176208496, + "learning_rate": 8.581105527638192e-06, + "loss": 0.007, + "step": 14625 + }, + { + "epoch": 8.19, + "grad_norm": 1.5385130643844604, + "learning_rate": 8.578592964824121e-06, + "loss": 0.0074, + "step": 14650 + }, + { + "epoch": 8.21, + "grad_norm": 2.050981283187866, + "learning_rate": 8.57608040201005e-06, + "loss": 0.0083, + "step": 14675 + }, + { + "epoch": 8.22, + "grad_norm": 1.0908812284469604, + "learning_rate": 8.573567839195982e-06, + "loss": 0.0081, + "step": 14700 + }, + { + "epoch": 8.24, + "grad_norm": 1.4452788829803467, + "learning_rate": 8.57105527638191e-06, + "loss": 0.0083, + "step": 14725 + }, + { + "epoch": 8.25, + "grad_norm": 0.6927215456962585, + "learning_rate": 8.56854271356784e-06, + "loss": 0.0085, + "step": 14750 + }, + { + "epoch": 8.26, + "grad_norm": 1.544752597808838, + "learning_rate": 8.56603015075377e-06, + "loss": 0.0073, + "step": 14775 + }, + { + "epoch": 8.28, + "grad_norm": 1.4217361211776733, + "learning_rate": 8.563517587939699e-06, + "loss": 0.0084, + "step": 14800 + }, + { + "epoch": 8.29, + "grad_norm": 0.7164564728736877, + "learning_rate": 8.56100502512563e-06, + "loss": 0.0076, + "step": 14825 + }, + { + "epoch": 8.31, + "grad_norm": 1.191162347793579, + "learning_rate": 8.558492462311558e-06, + "loss": 0.0074, + "step": 14850 + }, + { + "epoch": 8.32, + "grad_norm": 1.2003357410430908, + "learning_rate": 8.555979899497489e-06, + "loss": 0.0084, + "step": 14875 + }, + { + "epoch": 8.33, + "grad_norm": 1.1223558187484741, + "learning_rate": 8.553467336683418e-06, + "loss": 0.008, + "step": 14900 + }, + { + "epoch": 8.35, + "grad_norm": 1.4492493867874146, + "learning_rate": 8.550954773869347e-06, + "loss": 0.0079, + "step": 14925 + }, + { + "epoch": 8.36, + "grad_norm": 1.1047662496566772, + "learning_rate": 8.548442211055277e-06, + "loss": 0.0082, + "step": 14950 + }, + { + "epoch": 8.38, + "grad_norm": 0.9143801927566528, + "learning_rate": 8.545929648241208e-06, + "loss": 0.0096, + "step": 14975 + }, + { + "epoch": 8.39, + "grad_norm": 1.536102294921875, + "learning_rate": 8.543417085427135e-06, + "loss": 0.0069, + "step": 15000 + }, + { + "epoch": 8.39, + "eval_loss": 0.16292449831962585, + "eval_runtime": 669.9911, + "eval_samples_per_second": 2.109, + "eval_steps_per_second": 2.109, + "eval_wer": 15.010643806455326, + "step": 15000 + }, + { + "epoch": 8.4, + "grad_norm": 1.478403925895691, + "learning_rate": 8.540904522613066e-06, + "loss": 0.0084, + "step": 15025 + }, + { + "epoch": 8.42, + "grad_norm": 1.383904218673706, + "learning_rate": 8.538391959798996e-06, + "loss": 0.0078, + "step": 15050 + }, + { + "epoch": 8.43, + "grad_norm": 1.4146394729614258, + "learning_rate": 8.535879396984925e-06, + "loss": 0.0081, + "step": 15075 + }, + { + "epoch": 8.45, + "grad_norm": 1.7921966314315796, + "learning_rate": 8.533366834170856e-06, + "loss": 0.0076, + "step": 15100 + }, + { + "epoch": 8.46, + "grad_norm": 1.6373380422592163, + "learning_rate": 8.530854271356784e-06, + "loss": 0.0081, + "step": 15125 + }, + { + "epoch": 8.47, + "grad_norm": 2.244276523590088, + "learning_rate": 8.528341708542715e-06, + "loss": 0.0083, + "step": 15150 + }, + { + "epoch": 8.49, + "grad_norm": 1.8590312004089355, + "learning_rate": 8.525829145728644e-06, + "loss": 0.0087, + "step": 15175 + }, + { + "epoch": 8.5, + "grad_norm": 1.4160947799682617, + "learning_rate": 8.523316582914573e-06, + "loss": 0.0085, + "step": 15200 + }, + { + "epoch": 8.52, + "grad_norm": 1.334657073020935, + "learning_rate": 8.520804020100503e-06, + "loss": 0.009, + "step": 15225 + }, + { + "epoch": 8.53, + "grad_norm": 1.384863257408142, + "learning_rate": 8.518291457286434e-06, + "loss": 0.0079, + "step": 15250 + }, + { + "epoch": 8.54, + "grad_norm": 1.8485593795776367, + "learning_rate": 8.515778894472363e-06, + "loss": 0.0077, + "step": 15275 + }, + { + "epoch": 8.56, + "grad_norm": 1.4605650901794434, + "learning_rate": 8.513266331658292e-06, + "loss": 0.0086, + "step": 15300 + }, + { + "epoch": 8.57, + "grad_norm": 1.5395112037658691, + "learning_rate": 8.510753768844222e-06, + "loss": 0.0087, + "step": 15325 + }, + { + "epoch": 8.59, + "grad_norm": 1.39652419090271, + "learning_rate": 8.508241206030151e-06, + "loss": 0.0089, + "step": 15350 + }, + { + "epoch": 8.6, + "grad_norm": 1.8926337957382202, + "learning_rate": 8.505728643216082e-06, + "loss": 0.0084, + "step": 15375 + }, + { + "epoch": 8.61, + "grad_norm": 1.3901464939117432, + "learning_rate": 8.50321608040201e-06, + "loss": 0.0082, + "step": 15400 + }, + { + "epoch": 8.63, + "grad_norm": 1.5044126510620117, + "learning_rate": 8.50070351758794e-06, + "loss": 0.0088, + "step": 15425 + }, + { + "epoch": 8.64, + "grad_norm": 2.0128767490386963, + "learning_rate": 8.49819095477387e-06, + "loss": 0.008, + "step": 15450 + }, + { + "epoch": 8.65, + "grad_norm": 1.4999572038650513, + "learning_rate": 8.4956783919598e-06, + "loss": 0.0073, + "step": 15475 + }, + { + "epoch": 8.67, + "grad_norm": 1.4779800176620483, + "learning_rate": 8.49316582914573e-06, + "loss": 0.0092, + "step": 15500 + }, + { + "epoch": 8.68, + "grad_norm": 2.3917160034179688, + "learning_rate": 8.490753768844222e-06, + "loss": 0.009, + "step": 15525 + }, + { + "epoch": 8.7, + "grad_norm": 1.5033729076385498, + "learning_rate": 8.488241206030151e-06, + "loss": 0.008, + "step": 15550 + }, + { + "epoch": 8.71, + "grad_norm": 1.8788156509399414, + "learning_rate": 8.485728643216082e-06, + "loss": 0.0084, + "step": 15575 + }, + { + "epoch": 8.72, + "grad_norm": 1.3282579183578491, + "learning_rate": 8.48321608040201e-06, + "loss": 0.0091, + "step": 15600 + }, + { + "epoch": 8.74, + "grad_norm": 1.5350632667541504, + "learning_rate": 8.48070351758794e-06, + "loss": 0.0084, + "step": 15625 + }, + { + "epoch": 8.75, + "grad_norm": 1.5350359678268433, + "learning_rate": 8.47819095477387e-06, + "loss": 0.0088, + "step": 15650 + }, + { + "epoch": 8.77, + "grad_norm": 1.6284046173095703, + "learning_rate": 8.4756783919598e-06, + "loss": 0.0092, + "step": 15675 + }, + { + "epoch": 8.78, + "grad_norm": 1.3610342741012573, + "learning_rate": 8.47316582914573e-06, + "loss": 0.0087, + "step": 15700 + }, + { + "epoch": 8.79, + "grad_norm": 2.0138654708862305, + "learning_rate": 8.470653266331658e-06, + "loss": 0.0084, + "step": 15725 + }, + { + "epoch": 8.81, + "grad_norm": 1.628960132598877, + "learning_rate": 8.468140703517589e-06, + "loss": 0.0092, + "step": 15750 + }, + { + "epoch": 8.82, + "grad_norm": 1.5197557210922241, + "learning_rate": 8.465628140703518e-06, + "loss": 0.0081, + "step": 15775 + }, + { + "epoch": 8.84, + "grad_norm": 1.5494999885559082, + "learning_rate": 8.463115577889448e-06, + "loss": 0.0087, + "step": 15800 + }, + { + "epoch": 8.85, + "grad_norm": 1.260890245437622, + "learning_rate": 8.460603015075377e-06, + "loss": 0.0088, + "step": 15825 + }, + { + "epoch": 8.86, + "grad_norm": 1.2190687656402588, + "learning_rate": 8.458090452261308e-06, + "loss": 0.0084, + "step": 15850 + }, + { + "epoch": 8.88, + "grad_norm": 1.536759376525879, + "learning_rate": 8.455577889447237e-06, + "loss": 0.0083, + "step": 15875 + }, + { + "epoch": 8.89, + "grad_norm": 1.348574161529541, + "learning_rate": 8.453065326633167e-06, + "loss": 0.0088, + "step": 15900 + }, + { + "epoch": 8.91, + "grad_norm": 1.8172141313552856, + "learning_rate": 8.450552763819096e-06, + "loss": 0.0077, + "step": 15925 + }, + { + "epoch": 8.92, + "grad_norm": 1.764458417892456, + "learning_rate": 8.448040201005025e-06, + "loss": 0.0082, + "step": 15950 + }, + { + "epoch": 8.93, + "grad_norm": 1.4002294540405273, + "learning_rate": 8.445527638190956e-06, + "loss": 0.0089, + "step": 15975 + }, + { + "epoch": 8.95, + "grad_norm": 1.458240270614624, + "learning_rate": 8.443015075376884e-06, + "loss": 0.009, + "step": 16000 + }, + { + "epoch": 8.95, + "eval_loss": 0.16181136667728424, + "eval_runtime": 658.0834, + "eval_samples_per_second": 2.147, + "eval_steps_per_second": 2.147, + "eval_wer": 14.671192681663886, + "step": 16000 + }, + { + "epoch": 8.96, + "grad_norm": 1.3792202472686768, + "learning_rate": 8.440502512562815e-06, + "loss": 0.0089, + "step": 16025 + }, + { + "epoch": 8.98, + "grad_norm": 1.0990585088729858, + "learning_rate": 8.437989949748744e-06, + "loss": 0.0088, + "step": 16050 + }, + { + "epoch": 8.99, + "grad_norm": 1.3024276494979858, + "learning_rate": 8.435477386934674e-06, + "loss": 0.0073, + "step": 16075 + }, + { + "epoch": 9.0, + "grad_norm": 1.2445907592773438, + "learning_rate": 8.432964824120605e-06, + "loss": 0.0072, + "step": 16100 + }, + { + "epoch": 9.02, + "grad_norm": 1.4532568454742432, + "learning_rate": 8.430452261306534e-06, + "loss": 0.005, + "step": 16125 + }, + { + "epoch": 9.03, + "grad_norm": 1.2526249885559082, + "learning_rate": 8.427939698492463e-06, + "loss": 0.0053, + "step": 16150 + }, + { + "epoch": 9.05, + "grad_norm": 1.123509168624878, + "learning_rate": 8.425427135678393e-06, + "loss": 0.005, + "step": 16175 + }, + { + "epoch": 9.06, + "grad_norm": 0.9249866008758545, + "learning_rate": 8.422914572864322e-06, + "loss": 0.0046, + "step": 16200 + }, + { + "epoch": 9.07, + "grad_norm": 1.201412320137024, + "learning_rate": 8.420402010050251e-06, + "loss": 0.0047, + "step": 16225 + }, + { + "epoch": 9.09, + "grad_norm": 1.1973247528076172, + "learning_rate": 8.417889447236182e-06, + "loss": 0.0051, + "step": 16250 + }, + { + "epoch": 9.1, + "grad_norm": 1.1320230960845947, + "learning_rate": 8.415376884422112e-06, + "loss": 0.0054, + "step": 16275 + }, + { + "epoch": 9.12, + "grad_norm": 1.6098406314849854, + "learning_rate": 8.412864321608041e-06, + "loss": 0.0048, + "step": 16300 + }, + { + "epoch": 9.13, + "grad_norm": 1.1688694953918457, + "learning_rate": 8.41035175879397e-06, + "loss": 0.0051, + "step": 16325 + }, + { + "epoch": 9.14, + "grad_norm": 0.721691906452179, + "learning_rate": 8.4078391959799e-06, + "loss": 0.0047, + "step": 16350 + }, + { + "epoch": 9.16, + "grad_norm": 1.161227822303772, + "learning_rate": 8.40532663316583e-06, + "loss": 0.0051, + "step": 16375 + }, + { + "epoch": 9.17, + "grad_norm": 1.0887811183929443, + "learning_rate": 8.40281407035176e-06, + "loss": 0.0052, + "step": 16400 + }, + { + "epoch": 9.19, + "grad_norm": 1.3002018928527832, + "learning_rate": 8.40030150753769e-06, + "loss": 0.0049, + "step": 16425 + }, + { + "epoch": 9.2, + "grad_norm": 1.3032176494598389, + "learning_rate": 8.397788944723619e-06, + "loss": 0.0054, + "step": 16450 + }, + { + "epoch": 9.21, + "grad_norm": 1.5191234350204468, + "learning_rate": 8.395276381909548e-06, + "loss": 0.0053, + "step": 16475 + }, + { + "epoch": 9.23, + "grad_norm": 0.7929081320762634, + "learning_rate": 8.392763819095479e-06, + "loss": 0.0053, + "step": 16500 + }, + { + "epoch": 9.24, + "grad_norm": 0.713760495185852, + "learning_rate": 8.390251256281408e-06, + "loss": 0.0053, + "step": 16525 + }, + { + "epoch": 9.26, + "grad_norm": 1.0171557664871216, + "learning_rate": 8.387738693467338e-06, + "loss": 0.0051, + "step": 16550 + }, + { + "epoch": 9.27, + "grad_norm": 1.2539921998977661, + "learning_rate": 8.385226130653267e-06, + "loss": 0.0049, + "step": 16575 + }, + { + "epoch": 9.28, + "grad_norm": 1.2567251920700073, + "learning_rate": 8.382713567839196e-06, + "loss": 0.0059, + "step": 16600 + }, + { + "epoch": 9.3, + "grad_norm": 1.7700473070144653, + "learning_rate": 8.380201005025126e-06, + "loss": 0.0057, + "step": 16625 + }, + { + "epoch": 9.31, + "grad_norm": 1.418623924255371, + "learning_rate": 8.377688442211057e-06, + "loss": 0.006, + "step": 16650 + }, + { + "epoch": 9.33, + "grad_norm": 1.3245000839233398, + "learning_rate": 8.375175879396986e-06, + "loss": 0.0052, + "step": 16675 + }, + { + "epoch": 9.34, + "grad_norm": 0.7978353500366211, + "learning_rate": 8.372663316582915e-06, + "loss": 0.0056, + "step": 16700 + }, + { + "epoch": 9.35, + "grad_norm": 1.2670987844467163, + "learning_rate": 8.370150753768845e-06, + "loss": 0.006, + "step": 16725 + }, + { + "epoch": 9.37, + "grad_norm": 0.9033834338188171, + "learning_rate": 8.367638190954774e-06, + "loss": 0.0052, + "step": 16750 + }, + { + "epoch": 9.38, + "grad_norm": 1.2073125839233398, + "learning_rate": 8.365125628140705e-06, + "loss": 0.0062, + "step": 16775 + }, + { + "epoch": 9.4, + "grad_norm": 1.8725367784500122, + "learning_rate": 8.362613065326634e-06, + "loss": 0.0061, + "step": 16800 + }, + { + "epoch": 9.41, + "grad_norm": 1.2179232835769653, + "learning_rate": 8.360100502512563e-06, + "loss": 0.0053, + "step": 16825 + }, + { + "epoch": 9.42, + "grad_norm": 1.3296008110046387, + "learning_rate": 8.357587939698493e-06, + "loss": 0.0053, + "step": 16850 + }, + { + "epoch": 9.44, + "grad_norm": 1.3358724117279053, + "learning_rate": 8.355075376884422e-06, + "loss": 0.0055, + "step": 16875 + }, + { + "epoch": 9.45, + "grad_norm": 1.144808053970337, + "learning_rate": 8.352562814070353e-06, + "loss": 0.0059, + "step": 16900 + }, + { + "epoch": 9.47, + "grad_norm": 1.2998437881469727, + "learning_rate": 8.350050251256282e-06, + "loss": 0.0057, + "step": 16925 + }, + { + "epoch": 9.48, + "grad_norm": 1.3167157173156738, + "learning_rate": 8.347537688442212e-06, + "loss": 0.006, + "step": 16950 + }, + { + "epoch": 9.49, + "grad_norm": 1.3349064588546753, + "learning_rate": 8.345025125628141e-06, + "loss": 0.0066, + "step": 16975 + }, + { + "epoch": 9.51, + "grad_norm": 1.4480096101760864, + "learning_rate": 8.34251256281407e-06, + "loss": 0.0067, + "step": 17000 + }, + { + "epoch": 9.51, + "eval_loss": 0.17122092843055725, + "eval_runtime": 663.3299, + "eval_samples_per_second": 2.13, + "eval_steps_per_second": 2.13, + "eval_wer": 15.263793797825212, + "step": 17000 + }, + { + "epoch": 9.52, + "grad_norm": 1.2224031686782837, + "learning_rate": 8.34e-06, + "loss": 0.0063, + "step": 17025 + }, + { + "epoch": 9.54, + "grad_norm": 1.4613298177719116, + "learning_rate": 8.33748743718593e-06, + "loss": 0.0062, + "step": 17050 + }, + { + "epoch": 9.55, + "grad_norm": 0.916854739189148, + "learning_rate": 8.33497487437186e-06, + "loss": 0.0058, + "step": 17075 + }, + { + "epoch": 9.56, + "grad_norm": 1.469382643699646, + "learning_rate": 8.33246231155779e-06, + "loss": 0.0057, + "step": 17100 + }, + { + "epoch": 9.58, + "grad_norm": 1.6864712238311768, + "learning_rate": 8.32994974874372e-06, + "loss": 0.0061, + "step": 17125 + }, + { + "epoch": 9.59, + "grad_norm": 1.3123747110366821, + "learning_rate": 8.327437185929648e-06, + "loss": 0.0051, + "step": 17150 + }, + { + "epoch": 9.61, + "grad_norm": 1.027280330657959, + "learning_rate": 8.324924623115579e-06, + "loss": 0.006, + "step": 17175 + }, + { + "epoch": 9.62, + "grad_norm": 1.714094877243042, + "learning_rate": 8.322412060301508e-06, + "loss": 0.0061, + "step": 17200 + }, + { + "epoch": 9.63, + "grad_norm": 1.61386239528656, + "learning_rate": 8.319899497487438e-06, + "loss": 0.006, + "step": 17225 + }, + { + "epoch": 9.65, + "grad_norm": 1.3451932668685913, + "learning_rate": 8.317386934673367e-06, + "loss": 0.0057, + "step": 17250 + }, + { + "epoch": 9.66, + "grad_norm": 1.1886340379714966, + "learning_rate": 8.314874371859298e-06, + "loss": 0.0057, + "step": 17275 + }, + { + "epoch": 9.68, + "grad_norm": 1.2680896520614624, + "learning_rate": 8.312361809045226e-06, + "loss": 0.0055, + "step": 17300 + }, + { + "epoch": 9.69, + "grad_norm": 1.4850106239318848, + "learning_rate": 8.309849246231157e-06, + "loss": 0.006, + "step": 17325 + }, + { + "epoch": 9.7, + "grad_norm": 1.9217225313186646, + "learning_rate": 8.307336683417086e-06, + "loss": 0.0068, + "step": 17350 + }, + { + "epoch": 9.72, + "grad_norm": 0.8586521744728088, + "learning_rate": 8.304824120603015e-06, + "loss": 0.0062, + "step": 17375 + }, + { + "epoch": 9.73, + "grad_norm": 1.7632566690444946, + "learning_rate": 8.302311557788946e-06, + "loss": 0.0055, + "step": 17400 + }, + { + "epoch": 9.75, + "grad_norm": 0.7378219962120056, + "learning_rate": 8.299798994974874e-06, + "loss": 0.006, + "step": 17425 + }, + { + "epoch": 9.76, + "grad_norm": 1.226503849029541, + "learning_rate": 8.297286432160805e-06, + "loss": 0.0061, + "step": 17450 + }, + { + "epoch": 9.77, + "grad_norm": 1.8022719621658325, + "learning_rate": 8.294773869346734e-06, + "loss": 0.0051, + "step": 17475 + }, + { + "epoch": 9.79, + "grad_norm": 1.5893287658691406, + "learning_rate": 8.292261306532664e-06, + "loss": 0.006, + "step": 17500 + }, + { + "epoch": 9.8, + "grad_norm": 1.371327519416809, + "learning_rate": 8.289849246231157e-06, + "loss": 0.0062, + "step": 17525 + }, + { + "epoch": 9.82, + "grad_norm": 1.0447958707809448, + "learning_rate": 8.287336683417086e-06, + "loss": 0.0058, + "step": 17550 + }, + { + "epoch": 9.83, + "grad_norm": 1.652135968208313, + "learning_rate": 8.284824120603016e-06, + "loss": 0.0061, + "step": 17575 + }, + { + "epoch": 9.84, + "grad_norm": 1.2331994771957397, + "learning_rate": 8.282311557788945e-06, + "loss": 0.0062, + "step": 17600 + }, + { + "epoch": 9.86, + "grad_norm": 1.4162846803665161, + "learning_rate": 8.279798994974874e-06, + "loss": 0.0058, + "step": 17625 + }, + { + "epoch": 9.87, + "grad_norm": 1.9513553380966187, + "learning_rate": 8.277286432160805e-06, + "loss": 0.006, + "step": 17650 + }, + { + "epoch": 9.89, + "grad_norm": 1.8846778869628906, + "learning_rate": 8.274773869346735e-06, + "loss": 0.0073, + "step": 17675 + }, + { + "epoch": 9.9, + "grad_norm": 1.450873851776123, + "learning_rate": 8.272261306532664e-06, + "loss": 0.006, + "step": 17700 + }, + { + "epoch": 9.91, + "grad_norm": 1.3346210718154907, + "learning_rate": 8.269748743718595e-06, + "loss": 0.006, + "step": 17725 + }, + { + "epoch": 9.93, + "grad_norm": 0.9933092594146729, + "learning_rate": 8.267236180904523e-06, + "loss": 0.0064, + "step": 17750 + }, + { + "epoch": 9.94, + "grad_norm": 1.1116559505462646, + "learning_rate": 8.264723618090454e-06, + "loss": 0.0061, + "step": 17775 + }, + { + "epoch": 9.96, + "grad_norm": 1.3309531211853027, + "learning_rate": 8.262211055276383e-06, + "loss": 0.0061, + "step": 17800 + }, + { + "epoch": 9.97, + "grad_norm": 1.6288272142410278, + "learning_rate": 8.259698492462312e-06, + "loss": 0.006, + "step": 17825 + }, + { + "epoch": 9.98, + "grad_norm": 1.0559711456298828, + "learning_rate": 8.257185929648242e-06, + "loss": 0.0056, + "step": 17850 + }, + { + "epoch": 10.0, + "grad_norm": 1.0807549953460693, + "learning_rate": 8.25467336683417e-06, + "loss": 0.0068, + "step": 17875 + }, + { + "epoch": 10.01, + "grad_norm": 0.9323234558105469, + "learning_rate": 8.2521608040201e-06, + "loss": 0.0041, + "step": 17900 + }, + { + "epoch": 10.03, + "grad_norm": 2.231278419494629, + "learning_rate": 8.249648241206031e-06, + "loss": 0.0038, + "step": 17925 + }, + { + "epoch": 10.04, + "grad_norm": 0.872959315776825, + "learning_rate": 8.24713567839196e-06, + "loss": 0.0036, + "step": 17950 + }, + { + "epoch": 10.05, + "grad_norm": 0.8332386612892151, + "learning_rate": 8.24462311557789e-06, + "loss": 0.0034, + "step": 17975 + }, + { + "epoch": 10.07, + "grad_norm": 0.8897435665130615, + "learning_rate": 8.24211055276382e-06, + "loss": 0.004, + "step": 18000 + }, + { + "epoch": 10.07, + "eval_loss": 0.1750999540090561, + "eval_runtime": 663.601, + "eval_samples_per_second": 2.129, + "eval_steps_per_second": 2.129, + "eval_wer": 14.918589264139001, + "step": 18000 + } + ], + "logging_steps": 25, + "max_steps": 100000, + "num_input_tokens_seen": 0, + "num_train_epochs": 56, + "save_steps": 1000, + "total_flos": 5.60234702831616e+19, + "train_batch_size": 48, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/whisper-base/chattisgarhi/checkpoint-18000/training_args.bin b/checkpoints/whisper-base/chattisgarhi/checkpoint-18000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3a9d4587a93764adaab43ef6b89a636bad70eb0 --- /dev/null +++ b/checkpoints/whisper-base/chattisgarhi/checkpoint-18000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0301591cd0a8dfe0ef0fd703b5da49da4a710dd3462b5a37711458976d0367f9 +size 4667 diff --git a/checkpoints/whisper-base/hindi/checkpoint-41000/config.json b/checkpoints/whisper-base/hindi/checkpoint-41000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7861ebfd3dce452d730fc7657aa35befb4dcfe2d --- /dev/null +++ b/checkpoints/whisper-base/hindi/checkpoint-41000/config.json @@ -0,0 +1,154 @@ +{ + "_name_or_path": "openai/whisper-base", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 512, + "decoder_attention_heads": 8, + "decoder_ffn_dim": 2048, + "decoder_layerdrop": 0.0, + "decoder_layers": 6, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 8, + "encoder_ffn_dim": 2048, + "encoder_layerdrop": 0.0, + "encoder_layers": 6, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50276 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 6, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "torch_dtype": "float32", + "transformers_version": "4.39.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoints/whisper-base/hindi/checkpoint-41000/generation_config.json b/checkpoints/whisper-base/hindi/checkpoint-41000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a12c5b82ac1e48f22fa79bdad1595064164bc2ab --- /dev/null +++ b/checkpoints/whisper-base/hindi/checkpoint-41000/generation_config.json @@ -0,0 +1,256 @@ +{ + "alignment_heads": [ + [ + 3, + 1 + ], + [ + 4, + 2 + ], + [ + 4, + 3 + ], + [ + 4, + 7 + ], + [ + 5, + 1 + ], + [ + 5, + 2 + ], + [ + 5, + 4 + ], + [ + 5, + 6 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.39.2" +} diff --git a/checkpoints/whisper-base/hindi/checkpoint-41000/model.safetensors b/checkpoints/whisper-base/hindi/checkpoint-41000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..029259c7631aee812f5e98e068a9d4125fced31a --- /dev/null +++ b/checkpoints/whisper-base/hindi/checkpoint-41000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df990837647339ad3df89471145ee5d18a585863d7c123434c78bab11ad80f07 +size 290403936 diff --git a/checkpoints/whisper-base/hindi/checkpoint-41000/optimizer.pt b/checkpoints/whisper-base/hindi/checkpoint-41000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..fcd5b8b719818869ddac4e3e7b95318044f74704 --- /dev/null +++ b/checkpoints/whisper-base/hindi/checkpoint-41000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45f5d6764b786e0387dfde4a2e437878e4c2681c1e648ac45e787e7a94639ce0 +size 574811077 diff --git a/checkpoints/whisper-base/hindi/checkpoint-41000/preprocessor_config.json b/checkpoints/whisper-base/hindi/checkpoint-41000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoints/whisper-base/hindi/checkpoint-41000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoints/whisper-base/hindi/checkpoint-41000/rng_state.pth b/checkpoints/whisper-base/hindi/checkpoint-41000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..57c84de56bae33e9fcd32f73d47a5c69f8df0f7c --- /dev/null +++ b/checkpoints/whisper-base/hindi/checkpoint-41000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:068bc4fd27254673c53e46f0dba6fefbdd6c08c05816bfd7f88dd809718e09bf +size 14575 diff --git a/checkpoints/whisper-base/hindi/checkpoint-41000/scheduler.pt b/checkpoints/whisper-base/hindi/checkpoint-41000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9d0c36bfd4d154bc5abfa794a0e825af3374f6f2 --- /dev/null +++ b/checkpoints/whisper-base/hindi/checkpoint-41000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:131cb7b70b6daedfadbc8ee83fed52384e12729b176a4b5406dd5da8ac91da04 +size 627 diff --git a/checkpoints/whisper-base/hindi/checkpoint-41000/trainer_state.json b/checkpoints/whisper-base/hindi/checkpoint-41000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d56610f0e709d34a62a7f2b88b674ca03bfcd085 --- /dev/null +++ b/checkpoints/whisper-base/hindi/checkpoint-41000/trainer_state.json @@ -0,0 +1,11870 @@ +{ + "best_metric": 14.895979429640018, + "best_model_checkpoint": "results/whisper-base/hindi/checkpoint-31000", + "epoch": 22.930648769574944, + "eval_steps": 1000, + "global_step": 41000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "grad_norm": 22.323068618774414, + "learning_rate": 4.6000000000000004e-07, + "loss": 2.0695, + "step": 25 + }, + { + "epoch": 0.03, + "grad_norm": 12.523134231567383, + "learning_rate": 9.600000000000001e-07, + "loss": 1.7308, + "step": 50 + }, + { + "epoch": 0.04, + "grad_norm": 8.468318939208984, + "learning_rate": 1.46e-06, + "loss": 1.3356, + "step": 75 + }, + { + "epoch": 0.06, + "grad_norm": 6.410621643066406, + "learning_rate": 1.9600000000000003e-06, + "loss": 1.0681, + "step": 100 + }, + { + "epoch": 0.07, + "grad_norm": 5.392359256744385, + "learning_rate": 2.46e-06, + "loss": 0.8695, + "step": 125 + }, + { + "epoch": 0.08, + "grad_norm": 4.659060478210449, + "learning_rate": 2.96e-06, + "loss": 0.7439, + "step": 150 + }, + { + "epoch": 0.1, + "grad_norm": 4.5716118812561035, + "learning_rate": 3.46e-06, + "loss": 0.6782, + "step": 175 + }, + { + "epoch": 0.11, + "grad_norm": 4.426967620849609, + "learning_rate": 3.96e-06, + "loss": 0.6095, + "step": 200 + }, + { + "epoch": 0.13, + "grad_norm": 4.410024642944336, + "learning_rate": 4.4600000000000005e-06, + "loss": 0.5743, + "step": 225 + }, + { + "epoch": 0.14, + "grad_norm": 4.351156711578369, + "learning_rate": 4.960000000000001e-06, + "loss": 0.5277, + "step": 250 + }, + { + "epoch": 0.15, + "grad_norm": 4.568362712860107, + "learning_rate": 5.460000000000001e-06, + "loss": 0.5007, + "step": 275 + }, + { + "epoch": 0.17, + "grad_norm": 3.854410409927368, + "learning_rate": 5.9600000000000005e-06, + "loss": 0.4681, + "step": 300 + }, + { + "epoch": 0.18, + "grad_norm": 4.4026031494140625, + "learning_rate": 6.460000000000001e-06, + "loss": 0.4569, + "step": 325 + }, + { + "epoch": 0.2, + "grad_norm": 4.597707271575928, + "learning_rate": 6.96e-06, + "loss": 0.4247, + "step": 350 + }, + { + "epoch": 0.21, + "grad_norm": 4.075223445892334, + "learning_rate": 7.4600000000000006e-06, + "loss": 0.4032, + "step": 375 + }, + { + "epoch": 0.22, + "grad_norm": 4.3208417892456055, + "learning_rate": 7.960000000000002e-06, + "loss": 0.3981, + "step": 400 + }, + { + "epoch": 0.24, + "grad_norm": 3.89902400970459, + "learning_rate": 8.46e-06, + "loss": 0.3774, + "step": 425 + }, + { + "epoch": 0.25, + "grad_norm": 3.888470411300659, + "learning_rate": 8.96e-06, + "loss": 0.3621, + "step": 450 + }, + { + "epoch": 0.27, + "grad_norm": 4.14376974105835, + "learning_rate": 9.460000000000001e-06, + "loss": 0.3659, + "step": 475 + }, + { + "epoch": 0.28, + "grad_norm": 3.7522778511047363, + "learning_rate": 9.960000000000001e-06, + "loss": 0.3438, + "step": 500 + }, + { + "epoch": 0.29, + "grad_norm": 3.84590482711792, + "learning_rate": 9.997688442211056e-06, + "loss": 0.341, + "step": 525 + }, + { + "epoch": 0.31, + "grad_norm": 3.9214351177215576, + "learning_rate": 9.995175879396986e-06, + "loss": 0.3355, + "step": 550 + }, + { + "epoch": 0.32, + "grad_norm": 3.864016056060791, + "learning_rate": 9.992663316582915e-06, + "loss": 0.3244, + "step": 575 + }, + { + "epoch": 0.34, + "grad_norm": 3.4953441619873047, + "learning_rate": 9.990150753768844e-06, + "loss": 0.3129, + "step": 600 + }, + { + "epoch": 0.35, + "grad_norm": 4.370288372039795, + "learning_rate": 9.987638190954775e-06, + "loss": 0.3057, + "step": 625 + }, + { + "epoch": 0.36, + "grad_norm": 3.6735498905181885, + "learning_rate": 9.985125628140705e-06, + "loss": 0.2917, + "step": 650 + }, + { + "epoch": 0.38, + "grad_norm": 3.4687347412109375, + "learning_rate": 9.982613065326634e-06, + "loss": 0.2969, + "step": 675 + }, + { + "epoch": 0.39, + "grad_norm": 3.2111527919769287, + "learning_rate": 9.980100502512565e-06, + "loss": 0.2899, + "step": 700 + }, + { + "epoch": 0.41, + "grad_norm": 3.646392583847046, + "learning_rate": 9.977587939698493e-06, + "loss": 0.2845, + "step": 725 + }, + { + "epoch": 0.42, + "grad_norm": 3.9819488525390625, + "learning_rate": 9.975075376884424e-06, + "loss": 0.2725, + "step": 750 + }, + { + "epoch": 0.43, + "grad_norm": 3.3497848510742188, + "learning_rate": 9.972562814070353e-06, + "loss": 0.2711, + "step": 775 + }, + { + "epoch": 0.45, + "grad_norm": 3.792102813720703, + "learning_rate": 9.970050251256282e-06, + "loss": 0.2668, + "step": 800 + }, + { + "epoch": 0.46, + "grad_norm": 3.645860195159912, + "learning_rate": 9.967537688442212e-06, + "loss": 0.2594, + "step": 825 + }, + { + "epoch": 0.48, + "grad_norm": 3.270514488220215, + "learning_rate": 9.965025125628141e-06, + "loss": 0.265, + "step": 850 + }, + { + "epoch": 0.49, + "grad_norm": 3.535519599914551, + "learning_rate": 9.96251256281407e-06, + "loss": 0.2611, + "step": 875 + }, + { + "epoch": 0.5, + "grad_norm": 3.8991453647613525, + "learning_rate": 9.960000000000001e-06, + "loss": 0.2469, + "step": 900 + }, + { + "epoch": 0.52, + "grad_norm": 3.4293551445007324, + "learning_rate": 9.95748743718593e-06, + "loss": 0.2487, + "step": 925 + }, + { + "epoch": 0.53, + "grad_norm": 3.645487070083618, + "learning_rate": 9.95497487437186e-06, + "loss": 0.2492, + "step": 950 + }, + { + "epoch": 0.55, + "grad_norm": 3.3415768146514893, + "learning_rate": 9.952462311557791e-06, + "loss": 0.2412, + "step": 975 + }, + { + "epoch": 0.56, + "grad_norm": 3.1853740215301514, + "learning_rate": 9.949949748743718e-06, + "loss": 0.243, + "step": 1000 + }, + { + "epoch": 0.56, + "eval_loss": 0.19408917427062988, + "eval_runtime": 795.5115, + "eval_samples_per_second": 1.935, + "eval_steps_per_second": 1.935, + "eval_wer": 27.764142122487144, + "step": 1000 + }, + { + "epoch": 0.57, + "grad_norm": 3.6156532764434814, + "learning_rate": 9.94743718592965e-06, + "loss": 0.2339, + "step": 1025 + }, + { + "epoch": 0.59, + "grad_norm": 3.8019535541534424, + "learning_rate": 9.944924623115579e-06, + "loss": 0.2396, + "step": 1050 + }, + { + "epoch": 0.6, + "grad_norm": 3.9320361614227295, + "learning_rate": 9.942412060301508e-06, + "loss": 0.2298, + "step": 1075 + }, + { + "epoch": 0.62, + "grad_norm": 3.9764721393585205, + "learning_rate": 9.93989949748744e-06, + "loss": 0.2273, + "step": 1100 + }, + { + "epoch": 0.63, + "grad_norm": 3.4414422512054443, + "learning_rate": 9.937386934673367e-06, + "loss": 0.2203, + "step": 1125 + }, + { + "epoch": 0.64, + "grad_norm": 3.126206398010254, + "learning_rate": 9.934874371859298e-06, + "loss": 0.2187, + "step": 1150 + }, + { + "epoch": 0.66, + "grad_norm": 3.3051350116729736, + "learning_rate": 9.932361809045227e-06, + "loss": 0.2227, + "step": 1175 + }, + { + "epoch": 0.67, + "grad_norm": 3.189908981323242, + "learning_rate": 9.929849246231156e-06, + "loss": 0.2203, + "step": 1200 + }, + { + "epoch": 0.69, + "grad_norm": 3.0853095054626465, + "learning_rate": 9.927336683417086e-06, + "loss": 0.2116, + "step": 1225 + }, + { + "epoch": 0.7, + "grad_norm": 3.26802921295166, + "learning_rate": 9.924824120603017e-06, + "loss": 0.2136, + "step": 1250 + }, + { + "epoch": 0.71, + "grad_norm": 3.402909517288208, + "learning_rate": 9.922311557788944e-06, + "loss": 0.2171, + "step": 1275 + }, + { + "epoch": 0.73, + "grad_norm": 3.3051140308380127, + "learning_rate": 9.919798994974875e-06, + "loss": 0.2167, + "step": 1300 + }, + { + "epoch": 0.74, + "grad_norm": 3.3593668937683105, + "learning_rate": 9.917286432160805e-06, + "loss": 0.2065, + "step": 1325 + }, + { + "epoch": 0.76, + "grad_norm": 3.34014892578125, + "learning_rate": 9.914773869346734e-06, + "loss": 0.2095, + "step": 1350 + }, + { + "epoch": 0.77, + "grad_norm": 2.971251964569092, + "learning_rate": 9.912261306532665e-06, + "loss": 0.2054, + "step": 1375 + }, + { + "epoch": 0.78, + "grad_norm": 3.027353286743164, + "learning_rate": 9.909748743718593e-06, + "loss": 0.2028, + "step": 1400 + }, + { + "epoch": 0.8, + "grad_norm": 3.7483932971954346, + "learning_rate": 9.907236180904524e-06, + "loss": 0.1977, + "step": 1425 + }, + { + "epoch": 0.81, + "grad_norm": 3.134272575378418, + "learning_rate": 9.904723618090453e-06, + "loss": 0.204, + "step": 1450 + }, + { + "epoch": 0.82, + "grad_norm": 3.5834851264953613, + "learning_rate": 9.902211055276382e-06, + "loss": 0.1977, + "step": 1475 + }, + { + "epoch": 0.84, + "grad_norm": 3.184398889541626, + "learning_rate": 9.899698492462312e-06, + "loss": 0.1962, + "step": 1500 + }, + { + "epoch": 0.85, + "grad_norm": 2.9432215690612793, + "learning_rate": 9.897185929648243e-06, + "loss": 0.1921, + "step": 1525 + }, + { + "epoch": 0.87, + "grad_norm": 3.1441590785980225, + "learning_rate": 9.894673366834172e-06, + "loss": 0.1982, + "step": 1550 + }, + { + "epoch": 0.88, + "grad_norm": 3.326399803161621, + "learning_rate": 9.892160804020101e-06, + "loss": 0.1914, + "step": 1575 + }, + { + "epoch": 0.89, + "grad_norm": 2.968341112136841, + "learning_rate": 9.88964824120603e-06, + "loss": 0.189, + "step": 1600 + }, + { + "epoch": 0.91, + "grad_norm": 3.158761978149414, + "learning_rate": 9.88713567839196e-06, + "loss": 0.1836, + "step": 1625 + }, + { + "epoch": 0.92, + "grad_norm": 2.8407812118530273, + "learning_rate": 9.884623115577891e-06, + "loss": 0.1849, + "step": 1650 + }, + { + "epoch": 0.94, + "grad_norm": 3.1481268405914307, + "learning_rate": 9.882110552763819e-06, + "loss": 0.1893, + "step": 1675 + }, + { + "epoch": 0.95, + "grad_norm": 3.0672945976257324, + "learning_rate": 9.87959798994975e-06, + "loss": 0.1835, + "step": 1700 + }, + { + "epoch": 0.96, + "grad_norm": 3.3139028549194336, + "learning_rate": 9.877085427135679e-06, + "loss": 0.1922, + "step": 1725 + }, + { + "epoch": 0.98, + "grad_norm": 3.3685638904571533, + "learning_rate": 9.874572864321608e-06, + "loss": 0.1848, + "step": 1750 + }, + { + "epoch": 0.99, + "grad_norm": 3.7184336185455322, + "learning_rate": 9.87206030150754e-06, + "loss": 0.1807, + "step": 1775 + }, + { + "epoch": 1.01, + "grad_norm": 2.671935796737671, + "learning_rate": 9.869547738693469e-06, + "loss": 0.1734, + "step": 1800 + }, + { + "epoch": 1.02, + "grad_norm": 3.1680548191070557, + "learning_rate": 9.867035175879398e-06, + "loss": 0.16, + "step": 1825 + }, + { + "epoch": 1.03, + "grad_norm": 2.9266278743743896, + "learning_rate": 9.864522613065327e-06, + "loss": 0.1562, + "step": 1850 + }, + { + "epoch": 1.05, + "grad_norm": 2.9216926097869873, + "learning_rate": 9.862010050251257e-06, + "loss": 0.1557, + "step": 1875 + }, + { + "epoch": 1.06, + "grad_norm": 3.0652902126312256, + "learning_rate": 9.859497487437186e-06, + "loss": 0.1555, + "step": 1900 + }, + { + "epoch": 1.08, + "grad_norm": 3.542198657989502, + "learning_rate": 9.856984924623117e-06, + "loss": 0.1563, + "step": 1925 + }, + { + "epoch": 1.09, + "grad_norm": 3.262537717819214, + "learning_rate": 9.854472361809046e-06, + "loss": 0.1558, + "step": 1950 + }, + { + "epoch": 1.1, + "grad_norm": 2.5913093090057373, + "learning_rate": 9.851959798994976e-06, + "loss": 0.1494, + "step": 1975 + }, + { + "epoch": 1.12, + "grad_norm": 2.803159475326538, + "learning_rate": 9.849447236180905e-06, + "loss": 0.1562, + "step": 2000 + }, + { + "epoch": 1.12, + "eval_loss": 0.1435854583978653, + "eval_runtime": 755.3288, + "eval_samples_per_second": 2.038, + "eval_steps_per_second": 2.038, + "eval_wer": 21.62809724170173, + "step": 2000 + }, + { + "epoch": 1.13, + "grad_norm": 2.4067015647888184, + "learning_rate": 9.846934673366834e-06, + "loss": 0.1497, + "step": 2025 + }, + { + "epoch": 1.15, + "grad_norm": 3.3005032539367676, + "learning_rate": 9.844422110552765e-06, + "loss": 0.1521, + "step": 2050 + }, + { + "epoch": 1.16, + "grad_norm": 3.004237651824951, + "learning_rate": 9.841909547738695e-06, + "loss": 0.152, + "step": 2075 + }, + { + "epoch": 1.17, + "grad_norm": 3.2019922733306885, + "learning_rate": 9.839396984924624e-06, + "loss": 0.1522, + "step": 2100 + }, + { + "epoch": 1.19, + "grad_norm": 2.914491891860962, + "learning_rate": 9.836884422110553e-06, + "loss": 0.1549, + "step": 2125 + }, + { + "epoch": 1.2, + "grad_norm": 2.960998773574829, + "learning_rate": 9.834371859296483e-06, + "loss": 0.1518, + "step": 2150 + }, + { + "epoch": 1.22, + "grad_norm": 3.2093870639801025, + "learning_rate": 9.831859296482414e-06, + "loss": 0.1469, + "step": 2175 + }, + { + "epoch": 1.23, + "grad_norm": 3.311241626739502, + "learning_rate": 9.829346733668343e-06, + "loss": 0.1522, + "step": 2200 + }, + { + "epoch": 1.24, + "grad_norm": 2.9978621006011963, + "learning_rate": 9.826834170854272e-06, + "loss": 0.1457, + "step": 2225 + }, + { + "epoch": 1.26, + "grad_norm": 3.2164812088012695, + "learning_rate": 9.824321608040202e-06, + "loss": 0.1497, + "step": 2250 + }, + { + "epoch": 1.27, + "grad_norm": 2.8114171028137207, + "learning_rate": 9.821809045226131e-06, + "loss": 0.1413, + "step": 2275 + }, + { + "epoch": 1.29, + "grad_norm": 3.010749578475952, + "learning_rate": 9.81929648241206e-06, + "loss": 0.1421, + "step": 2300 + }, + { + "epoch": 1.3, + "grad_norm": 2.845639705657959, + "learning_rate": 9.816783919597991e-06, + "loss": 0.1439, + "step": 2325 + }, + { + "epoch": 1.31, + "grad_norm": 2.800644874572754, + "learning_rate": 9.81427135678392e-06, + "loss": 0.1469, + "step": 2350 + }, + { + "epoch": 1.33, + "grad_norm": 2.814570188522339, + "learning_rate": 9.81175879396985e-06, + "loss": 0.1429, + "step": 2375 + }, + { + "epoch": 1.34, + "grad_norm": 2.6885321140289307, + "learning_rate": 9.809246231155781e-06, + "loss": 0.1382, + "step": 2400 + }, + { + "epoch": 1.36, + "grad_norm": 2.9958417415618896, + "learning_rate": 9.806733668341709e-06, + "loss": 0.1409, + "step": 2425 + }, + { + "epoch": 1.37, + "grad_norm": 2.4826242923736572, + "learning_rate": 9.80422110552764e-06, + "loss": 0.1424, + "step": 2450 + }, + { + "epoch": 1.38, + "grad_norm": 3.3825933933258057, + "learning_rate": 9.801708542713569e-06, + "loss": 0.1406, + "step": 2475 + }, + { + "epoch": 1.4, + "grad_norm": 2.864114284515381, + "learning_rate": 9.799195979899498e-06, + "loss": 0.1421, + "step": 2500 + }, + { + "epoch": 1.41, + "grad_norm": 2.9447243213653564, + "learning_rate": 9.796683417085428e-06, + "loss": 0.1426, + "step": 2525 + }, + { + "epoch": 1.43, + "grad_norm": 3.1255292892456055, + "learning_rate": 9.794170854271357e-06, + "loss": 0.1397, + "step": 2550 + }, + { + "epoch": 1.44, + "grad_norm": 2.910285234451294, + "learning_rate": 9.791658291457288e-06, + "loss": 0.1425, + "step": 2575 + }, + { + "epoch": 1.45, + "grad_norm": 3.661036491394043, + "learning_rate": 9.789145728643217e-06, + "loss": 0.1425, + "step": 2600 + }, + { + "epoch": 1.47, + "grad_norm": 2.7877285480499268, + "learning_rate": 9.786633165829147e-06, + "loss": 0.1359, + "step": 2625 + }, + { + "epoch": 1.48, + "grad_norm": 2.870011806488037, + "learning_rate": 9.784120603015076e-06, + "loss": 0.1367, + "step": 2650 + }, + { + "epoch": 1.5, + "grad_norm": 3.2470264434814453, + "learning_rate": 9.781608040201007e-06, + "loss": 0.1329, + "step": 2675 + }, + { + "epoch": 1.51, + "grad_norm": 2.963841676712036, + "learning_rate": 9.779095477386934e-06, + "loss": 0.1404, + "step": 2700 + }, + { + "epoch": 1.52, + "grad_norm": 3.303471088409424, + "learning_rate": 9.776582914572866e-06, + "loss": 0.1399, + "step": 2725 + }, + { + "epoch": 1.54, + "grad_norm": 2.826647996902466, + "learning_rate": 9.774070351758795e-06, + "loss": 0.1328, + "step": 2750 + }, + { + "epoch": 1.55, + "grad_norm": 3.1963107585906982, + "learning_rate": 9.771557788944724e-06, + "loss": 0.1348, + "step": 2775 + }, + { + "epoch": 1.57, + "grad_norm": 3.0382866859436035, + "learning_rate": 9.769045226130655e-06, + "loss": 0.1308, + "step": 2800 + }, + { + "epoch": 1.58, + "grad_norm": 3.1483850479125977, + "learning_rate": 9.766532663316583e-06, + "loss": 0.1349, + "step": 2825 + }, + { + "epoch": 1.59, + "grad_norm": 2.8060142993927, + "learning_rate": 9.764020100502514e-06, + "loss": 0.1285, + "step": 2850 + }, + { + "epoch": 1.61, + "grad_norm": 2.8080978393554688, + "learning_rate": 9.761507537688443e-06, + "loss": 0.1311, + "step": 2875 + }, + { + "epoch": 1.62, + "grad_norm": 2.397549867630005, + "learning_rate": 9.758994974874372e-06, + "loss": 0.1319, + "step": 2900 + }, + { + "epoch": 1.64, + "grad_norm": 2.6907544136047363, + "learning_rate": 9.756482412060302e-06, + "loss": 0.1299, + "step": 2925 + }, + { + "epoch": 1.65, + "grad_norm": 3.1900405883789062, + "learning_rate": 9.753969849246233e-06, + "loss": 0.1318, + "step": 2950 + }, + { + "epoch": 1.66, + "grad_norm": 3.9892752170562744, + "learning_rate": 9.75145728643216e-06, + "loss": 0.1304, + "step": 2975 + }, + { + "epoch": 1.68, + "grad_norm": 3.261746883392334, + "learning_rate": 9.748944723618091e-06, + "loss": 0.1267, + "step": 3000 + }, + { + "epoch": 1.68, + "eval_loss": 0.12474098801612854, + "eval_runtime": 756.619, + "eval_samples_per_second": 2.034, + "eval_steps_per_second": 2.034, + "eval_wer": 19.097709209911173, + "step": 3000 + }, + { + "epoch": 1.69, + "grad_norm": 2.8653976917266846, + "learning_rate": 9.74643216080402e-06, + "loss": 0.1319, + "step": 3025 + }, + { + "epoch": 1.71, + "grad_norm": 3.0463194847106934, + "learning_rate": 9.74391959798995e-06, + "loss": 0.1304, + "step": 3050 + }, + { + "epoch": 1.72, + "grad_norm": 2.6778175830841064, + "learning_rate": 9.741407035175881e-06, + "loss": 0.1226, + "step": 3075 + }, + { + "epoch": 1.73, + "grad_norm": 2.887112855911255, + "learning_rate": 9.738894472361809e-06, + "loss": 0.125, + "step": 3100 + }, + { + "epoch": 1.75, + "grad_norm": 2.8986849784851074, + "learning_rate": 9.73638190954774e-06, + "loss": 0.1276, + "step": 3125 + }, + { + "epoch": 1.76, + "grad_norm": 2.488614320755005, + "learning_rate": 9.733869346733669e-06, + "loss": 0.1273, + "step": 3150 + }, + { + "epoch": 1.78, + "grad_norm": 3.011021614074707, + "learning_rate": 9.731356783919598e-06, + "loss": 0.132, + "step": 3175 + }, + { + "epoch": 1.79, + "grad_norm": 2.8007774353027344, + "learning_rate": 9.72884422110553e-06, + "loss": 0.1265, + "step": 3200 + }, + { + "epoch": 1.8, + "grad_norm": 2.860513210296631, + "learning_rate": 9.726331658291459e-06, + "loss": 0.1232, + "step": 3225 + }, + { + "epoch": 1.82, + "grad_norm": 2.7371485233306885, + "learning_rate": 9.723819095477388e-06, + "loss": 0.1263, + "step": 3250 + }, + { + "epoch": 1.83, + "grad_norm": 2.8170742988586426, + "learning_rate": 9.721306532663317e-06, + "loss": 0.13, + "step": 3275 + }, + { + "epoch": 1.85, + "grad_norm": 2.6937077045440674, + "learning_rate": 9.718793969849247e-06, + "loss": 0.1245, + "step": 3300 + }, + { + "epoch": 1.86, + "grad_norm": 3.0572993755340576, + "learning_rate": 9.716281407035176e-06, + "loss": 0.1244, + "step": 3325 + }, + { + "epoch": 1.87, + "grad_norm": 2.82900071144104, + "learning_rate": 9.713768844221107e-06, + "loss": 0.1221, + "step": 3350 + }, + { + "epoch": 1.89, + "grad_norm": 2.8739500045776367, + "learning_rate": 9.711256281407035e-06, + "loss": 0.1243, + "step": 3375 + }, + { + "epoch": 1.9, + "grad_norm": 2.8662750720977783, + "learning_rate": 9.708743718592966e-06, + "loss": 0.126, + "step": 3400 + }, + { + "epoch": 1.92, + "grad_norm": 3.0628154277801514, + "learning_rate": 9.706231155778895e-06, + "loss": 0.1232, + "step": 3425 + }, + { + "epoch": 1.93, + "grad_norm": 2.487823247909546, + "learning_rate": 9.703718592964824e-06, + "loss": 0.1203, + "step": 3450 + }, + { + "epoch": 1.94, + "grad_norm": 2.960767984390259, + "learning_rate": 9.701206030150755e-06, + "loss": 0.1216, + "step": 3475 + }, + { + "epoch": 1.96, + "grad_norm": 2.85933518409729, + "learning_rate": 9.698693467336685e-06, + "loss": 0.1211, + "step": 3500 + }, + { + "epoch": 1.97, + "grad_norm": 3.0033442974090576, + "learning_rate": 9.696180904522614e-06, + "loss": 0.1191, + "step": 3525 + }, + { + "epoch": 1.99, + "grad_norm": 3.1155312061309814, + "learning_rate": 9.693668341708543e-06, + "loss": 0.1246, + "step": 3550 + }, + { + "epoch": 2.0, + "grad_norm": 3.1561484336853027, + "learning_rate": 9.691155778894473e-06, + "loss": 0.1184, + "step": 3575 + }, + { + "epoch": 2.01, + "grad_norm": 2.058255910873413, + "learning_rate": 9.688643216080402e-06, + "loss": 0.0988, + "step": 3600 + }, + { + "epoch": 2.03, + "grad_norm": 3.017179489135742, + "learning_rate": 9.686130653266333e-06, + "loss": 0.0955, + "step": 3625 + }, + { + "epoch": 2.04, + "grad_norm": 2.5454823970794678, + "learning_rate": 9.683618090452262e-06, + "loss": 0.0991, + "step": 3650 + }, + { + "epoch": 2.06, + "grad_norm": 2.7024450302124023, + "learning_rate": 9.681105527638192e-06, + "loss": 0.0948, + "step": 3675 + }, + { + "epoch": 2.07, + "grad_norm": 2.32110333442688, + "learning_rate": 9.678592964824121e-06, + "loss": 0.0925, + "step": 3700 + }, + { + "epoch": 2.08, + "grad_norm": 2.344771385192871, + "learning_rate": 9.67608040201005e-06, + "loss": 0.0986, + "step": 3725 + }, + { + "epoch": 2.1, + "grad_norm": 2.4634320735931396, + "learning_rate": 9.673567839195981e-06, + "loss": 0.0936, + "step": 3750 + }, + { + "epoch": 2.11, + "grad_norm": 2.9091079235076904, + "learning_rate": 9.67105527638191e-06, + "loss": 0.0972, + "step": 3775 + }, + { + "epoch": 2.13, + "grad_norm": 2.6262307167053223, + "learning_rate": 9.66854271356784e-06, + "loss": 0.0924, + "step": 3800 + }, + { + "epoch": 2.14, + "grad_norm": 2.5775580406188965, + "learning_rate": 9.666030150753771e-06, + "loss": 0.0967, + "step": 3825 + }, + { + "epoch": 2.15, + "grad_norm": 2.659714698791504, + "learning_rate": 9.663618090452263e-06, + "loss": 0.0991, + "step": 3850 + }, + { + "epoch": 2.17, + "grad_norm": 2.4374215602874756, + "learning_rate": 9.661105527638192e-06, + "loss": 0.0976, + "step": 3875 + }, + { + "epoch": 2.18, + "grad_norm": 2.0937116146087646, + "learning_rate": 9.658592964824121e-06, + "loss": 0.0939, + "step": 3900 + }, + { + "epoch": 2.2, + "grad_norm": 2.6346898078918457, + "learning_rate": 9.65608040201005e-06, + "loss": 0.0975, + "step": 3925 + }, + { + "epoch": 2.21, + "grad_norm": 2.417570114135742, + "learning_rate": 9.653567839195982e-06, + "loss": 0.0899, + "step": 3950 + }, + { + "epoch": 2.22, + "grad_norm": 2.940917491912842, + "learning_rate": 9.651055276381909e-06, + "loss": 0.0951, + "step": 3975 + }, + { + "epoch": 2.24, + "grad_norm": 2.552783727645874, + "learning_rate": 9.64854271356784e-06, + "loss": 0.0954, + "step": 4000 + }, + { + "epoch": 2.24, + "eval_loss": 0.11762142926454544, + "eval_runtime": 763.4389, + "eval_samples_per_second": 2.016, + "eval_steps_per_second": 2.016, + "eval_wer": 17.50818139317438, + "step": 4000 + }, + { + "epoch": 2.25, + "grad_norm": 2.5978775024414062, + "learning_rate": 9.64603015075377e-06, + "loss": 0.0928, + "step": 4025 + }, + { + "epoch": 2.27, + "grad_norm": 2.615558385848999, + "learning_rate": 9.643517587939699e-06, + "loss": 0.0949, + "step": 4050 + }, + { + "epoch": 2.28, + "grad_norm": 3.1624608039855957, + "learning_rate": 9.64100502512563e-06, + "loss": 0.0925, + "step": 4075 + }, + { + "epoch": 2.29, + "grad_norm": 2.6607539653778076, + "learning_rate": 9.638492462311559e-06, + "loss": 0.0896, + "step": 4100 + }, + { + "epoch": 2.31, + "grad_norm": 2.4963743686676025, + "learning_rate": 9.635979899497488e-06, + "loss": 0.0921, + "step": 4125 + }, + { + "epoch": 2.32, + "grad_norm": 2.6439154148101807, + "learning_rate": 9.633467336683418e-06, + "loss": 0.0949, + "step": 4150 + }, + { + "epoch": 2.34, + "grad_norm": 2.8848607540130615, + "learning_rate": 9.630954773869347e-06, + "loss": 0.0917, + "step": 4175 + }, + { + "epoch": 2.35, + "grad_norm": 2.6331586837768555, + "learning_rate": 9.628442211055276e-06, + "loss": 0.0898, + "step": 4200 + }, + { + "epoch": 2.36, + "grad_norm": 2.608696222305298, + "learning_rate": 9.625929648241207e-06, + "loss": 0.0936, + "step": 4225 + }, + { + "epoch": 2.38, + "grad_norm": 2.6145801544189453, + "learning_rate": 9.623417085427137e-06, + "loss": 0.0957, + "step": 4250 + }, + { + "epoch": 2.39, + "grad_norm": 2.7365803718566895, + "learning_rate": 9.620904522613066e-06, + "loss": 0.0908, + "step": 4275 + }, + { + "epoch": 2.4, + "grad_norm": 2.540405750274658, + "learning_rate": 9.618391959798995e-06, + "loss": 0.0866, + "step": 4300 + }, + { + "epoch": 2.42, + "grad_norm": 2.540832281112671, + "learning_rate": 9.615879396984925e-06, + "loss": 0.0954, + "step": 4325 + }, + { + "epoch": 2.43, + "grad_norm": 2.989983320236206, + "learning_rate": 9.613366834170856e-06, + "loss": 0.0914, + "step": 4350 + }, + { + "epoch": 2.45, + "grad_norm": 2.5780951976776123, + "learning_rate": 9.610854271356785e-06, + "loss": 0.0893, + "step": 4375 + }, + { + "epoch": 2.46, + "grad_norm": 2.9423980712890625, + "learning_rate": 9.608341708542714e-06, + "loss": 0.0875, + "step": 4400 + }, + { + "epoch": 2.47, + "grad_norm": 2.7259531021118164, + "learning_rate": 9.605829145728644e-06, + "loss": 0.0891, + "step": 4425 + }, + { + "epoch": 2.49, + "grad_norm": 2.503217935562134, + "learning_rate": 9.603316582914573e-06, + "loss": 0.093, + "step": 4450 + }, + { + "epoch": 2.5, + "grad_norm": 2.8601889610290527, + "learning_rate": 9.600804020100504e-06, + "loss": 0.0899, + "step": 4475 + }, + { + "epoch": 2.52, + "grad_norm": 2.5476183891296387, + "learning_rate": 9.598291457286433e-06, + "loss": 0.091, + "step": 4500 + }, + { + "epoch": 2.53, + "grad_norm": 2.5602009296417236, + "learning_rate": 9.595778894472363e-06, + "loss": 0.0932, + "step": 4525 + }, + { + "epoch": 2.54, + "grad_norm": 2.4345078468322754, + "learning_rate": 9.593266331658292e-06, + "loss": 0.0913, + "step": 4550 + }, + { + "epoch": 2.56, + "grad_norm": 2.8410239219665527, + "learning_rate": 9.590753768844221e-06, + "loss": 0.0905, + "step": 4575 + }, + { + "epoch": 2.57, + "grad_norm": 2.575145959854126, + "learning_rate": 9.58824120603015e-06, + "loss": 0.0861, + "step": 4600 + }, + { + "epoch": 2.59, + "grad_norm": 2.4720702171325684, + "learning_rate": 9.585728643216082e-06, + "loss": 0.0912, + "step": 4625 + }, + { + "epoch": 2.6, + "grad_norm": 2.776111602783203, + "learning_rate": 9.583216080402011e-06, + "loss": 0.0863, + "step": 4650 + }, + { + "epoch": 2.61, + "grad_norm": 2.4933269023895264, + "learning_rate": 9.58070351758794e-06, + "loss": 0.0868, + "step": 4675 + }, + { + "epoch": 2.63, + "grad_norm": 2.7585020065307617, + "learning_rate": 9.57819095477387e-06, + "loss": 0.0856, + "step": 4700 + }, + { + "epoch": 2.64, + "grad_norm": 2.527068853378296, + "learning_rate": 9.575678391959799e-06, + "loss": 0.0881, + "step": 4725 + }, + { + "epoch": 2.66, + "grad_norm": 2.543527126312256, + "learning_rate": 9.57316582914573e-06, + "loss": 0.0855, + "step": 4750 + }, + { + "epoch": 2.67, + "grad_norm": 2.94807505607605, + "learning_rate": 9.57065326633166e-06, + "loss": 0.0899, + "step": 4775 + }, + { + "epoch": 2.68, + "grad_norm": 2.7505455017089844, + "learning_rate": 9.568140703517589e-06, + "loss": 0.0916, + "step": 4800 + }, + { + "epoch": 2.7, + "grad_norm": 2.643261432647705, + "learning_rate": 9.565628140703518e-06, + "loss": 0.0831, + "step": 4825 + }, + { + "epoch": 2.71, + "grad_norm": 2.9926657676696777, + "learning_rate": 9.563115577889447e-06, + "loss": 0.0896, + "step": 4850 + }, + { + "epoch": 2.73, + "grad_norm": 2.718395233154297, + "learning_rate": 9.560603015075378e-06, + "loss": 0.0868, + "step": 4875 + }, + { + "epoch": 2.74, + "grad_norm": 2.664257764816284, + "learning_rate": 9.558090452261308e-06, + "loss": 0.0909, + "step": 4900 + }, + { + "epoch": 2.75, + "grad_norm": 2.9240074157714844, + "learning_rate": 9.555577889447237e-06, + "loss": 0.0872, + "step": 4925 + }, + { + "epoch": 2.77, + "grad_norm": 2.5917422771453857, + "learning_rate": 9.553065326633166e-06, + "loss": 0.0895, + "step": 4950 + }, + { + "epoch": 2.78, + "grad_norm": 2.4758799076080322, + "learning_rate": 9.550552763819096e-06, + "loss": 0.0855, + "step": 4975 + }, + { + "epoch": 2.8, + "grad_norm": 2.819601535797119, + "learning_rate": 9.548040201005025e-06, + "loss": 0.0893, + "step": 5000 + }, + { + "epoch": 2.8, + "eval_loss": 0.11293814331293106, + "eval_runtime": 756.1396, + "eval_samples_per_second": 2.035, + "eval_steps_per_second": 2.035, + "eval_wer": 17.18092566619916, + "step": 5000 + }, + { + "epoch": 2.81, + "grad_norm": 2.6236979961395264, + "learning_rate": 9.545527638190956e-06, + "loss": 0.0894, + "step": 5025 + }, + { + "epoch": 2.82, + "grad_norm": 2.684126138687134, + "learning_rate": 9.543015075376885e-06, + "loss": 0.086, + "step": 5050 + }, + { + "epoch": 2.84, + "grad_norm": 2.662917375564575, + "learning_rate": 9.540502512562815e-06, + "loss": 0.084, + "step": 5075 + }, + { + "epoch": 2.85, + "grad_norm": 2.825493097305298, + "learning_rate": 9.537989949748746e-06, + "loss": 0.0861, + "step": 5100 + }, + { + "epoch": 2.87, + "grad_norm": 3.0958547592163086, + "learning_rate": 9.535477386934673e-06, + "loss": 0.0845, + "step": 5125 + }, + { + "epoch": 2.88, + "grad_norm": 2.7122533321380615, + "learning_rate": 9.532964824120604e-06, + "loss": 0.0842, + "step": 5150 + }, + { + "epoch": 2.89, + "grad_norm": 2.8364784717559814, + "learning_rate": 9.530452261306534e-06, + "loss": 0.0901, + "step": 5175 + }, + { + "epoch": 2.91, + "grad_norm": 2.7396883964538574, + "learning_rate": 9.527939698492463e-06, + "loss": 0.086, + "step": 5200 + }, + { + "epoch": 2.92, + "grad_norm": 2.4766998291015625, + "learning_rate": 9.525427135678392e-06, + "loss": 0.0836, + "step": 5225 + }, + { + "epoch": 2.94, + "grad_norm": 2.544602632522583, + "learning_rate": 9.522914572864322e-06, + "loss": 0.0856, + "step": 5250 + }, + { + "epoch": 2.95, + "grad_norm": 3.068958044052124, + "learning_rate": 9.520402010050253e-06, + "loss": 0.0872, + "step": 5275 + }, + { + "epoch": 2.96, + "grad_norm": 2.2795217037200928, + "learning_rate": 9.517889447236182e-06, + "loss": 0.0846, + "step": 5300 + }, + { + "epoch": 2.98, + "grad_norm": 2.6451680660247803, + "learning_rate": 9.515376884422111e-06, + "loss": 0.0797, + "step": 5325 + }, + { + "epoch": 2.99, + "grad_norm": 2.696707010269165, + "learning_rate": 9.51286432160804e-06, + "loss": 0.0863, + "step": 5350 + }, + { + "epoch": 3.01, + "grad_norm": 2.5157997608184814, + "learning_rate": 9.510351758793972e-06, + "loss": 0.0736, + "step": 5375 + }, + { + "epoch": 3.02, + "grad_norm": 2.3105037212371826, + "learning_rate": 9.5078391959799e-06, + "loss": 0.0648, + "step": 5400 + }, + { + "epoch": 3.03, + "grad_norm": 2.149718761444092, + "learning_rate": 9.50532663316583e-06, + "loss": 0.0657, + "step": 5425 + }, + { + "epoch": 3.05, + "grad_norm": 2.212775468826294, + "learning_rate": 9.50281407035176e-06, + "loss": 0.0616, + "step": 5450 + }, + { + "epoch": 3.06, + "grad_norm": 2.334582805633545, + "learning_rate": 9.500301507537689e-06, + "loss": 0.0639, + "step": 5475 + }, + { + "epoch": 3.08, + "grad_norm": 2.464796781539917, + "learning_rate": 9.49778894472362e-06, + "loss": 0.0633, + "step": 5500 + }, + { + "epoch": 3.09, + "grad_norm": 2.184905767440796, + "learning_rate": 9.49527638190955e-06, + "loss": 0.0594, + "step": 5525 + }, + { + "epoch": 3.1, + "grad_norm": 2.2937352657318115, + "learning_rate": 9.492763819095479e-06, + "loss": 0.062, + "step": 5550 + }, + { + "epoch": 3.12, + "grad_norm": 2.1052401065826416, + "learning_rate": 9.490251256281408e-06, + "loss": 0.0632, + "step": 5575 + }, + { + "epoch": 3.13, + "grad_norm": 2.241842031478882, + "learning_rate": 9.487738693467337e-06, + "loss": 0.0629, + "step": 5600 + }, + { + "epoch": 3.15, + "grad_norm": 2.6391873359680176, + "learning_rate": 9.485226130653267e-06, + "loss": 0.0639, + "step": 5625 + }, + { + "epoch": 3.16, + "grad_norm": 2.8353567123413086, + "learning_rate": 9.482713567839198e-06, + "loss": 0.0632, + "step": 5650 + }, + { + "epoch": 3.17, + "grad_norm": 2.1846556663513184, + "learning_rate": 9.480201005025125e-06, + "loss": 0.064, + "step": 5675 + }, + { + "epoch": 3.19, + "grad_norm": 2.6169180870056152, + "learning_rate": 9.477688442211056e-06, + "loss": 0.0631, + "step": 5700 + }, + { + "epoch": 3.2, + "grad_norm": 2.0712201595306396, + "learning_rate": 9.475175879396985e-06, + "loss": 0.0621, + "step": 5725 + }, + { + "epoch": 3.22, + "grad_norm": 2.634260654449463, + "learning_rate": 9.472663316582915e-06, + "loss": 0.0615, + "step": 5750 + }, + { + "epoch": 3.23, + "grad_norm": 2.237175941467285, + "learning_rate": 9.470150753768846e-06, + "loss": 0.0623, + "step": 5775 + }, + { + "epoch": 3.24, + "grad_norm": 2.2260894775390625, + "learning_rate": 9.467638190954775e-06, + "loss": 0.0604, + "step": 5800 + }, + { + "epoch": 3.26, + "grad_norm": 2.3916525840759277, + "learning_rate": 9.465125628140704e-06, + "loss": 0.0631, + "step": 5825 + }, + { + "epoch": 3.27, + "grad_norm": 2.0022995471954346, + "learning_rate": 9.462613065326634e-06, + "loss": 0.0627, + "step": 5850 + }, + { + "epoch": 3.29, + "grad_norm": 2.283360242843628, + "learning_rate": 9.460100502512563e-06, + "loss": 0.0635, + "step": 5875 + }, + { + "epoch": 3.3, + "grad_norm": 2.1689538955688477, + "learning_rate": 9.457587939698494e-06, + "loss": 0.0642, + "step": 5900 + }, + { + "epoch": 3.31, + "grad_norm": 2.0184733867645264, + "learning_rate": 9.455075376884423e-06, + "loss": 0.0601, + "step": 5925 + }, + { + "epoch": 3.33, + "grad_norm": 2.105276584625244, + "learning_rate": 9.452562814070353e-06, + "loss": 0.0609, + "step": 5950 + }, + { + "epoch": 3.34, + "grad_norm": 2.5847296714782715, + "learning_rate": 9.450050251256282e-06, + "loss": 0.0595, + "step": 5975 + }, + { + "epoch": 3.36, + "grad_norm": 2.2198989391326904, + "learning_rate": 9.447537688442211e-06, + "loss": 0.0635, + "step": 6000 + }, + { + "epoch": 3.36, + "eval_loss": 0.11239945143461227, + "eval_runtime": 756.8317, + "eval_samples_per_second": 2.033, + "eval_steps_per_second": 2.033, + "eval_wer": 16.3218793828892, + "step": 6000 + }, + { + "epoch": 3.37, + "grad_norm": 2.007871150970459, + "learning_rate": 9.44502512562814e-06, + "loss": 0.063, + "step": 6025 + }, + { + "epoch": 3.38, + "grad_norm": 2.6297013759613037, + "learning_rate": 9.442512562814072e-06, + "loss": 0.0611, + "step": 6050 + }, + { + "epoch": 3.4, + "grad_norm": 2.375972270965576, + "learning_rate": 9.440000000000001e-06, + "loss": 0.062, + "step": 6075 + }, + { + "epoch": 3.41, + "grad_norm": 2.3270552158355713, + "learning_rate": 9.43748743718593e-06, + "loss": 0.0615, + "step": 6100 + }, + { + "epoch": 3.43, + "grad_norm": 2.0002822875976562, + "learning_rate": 9.43497487437186e-06, + "loss": 0.0616, + "step": 6125 + }, + { + "epoch": 3.44, + "grad_norm": 2.214451789855957, + "learning_rate": 9.432462311557789e-06, + "loss": 0.0608, + "step": 6150 + }, + { + "epoch": 3.45, + "grad_norm": 2.4241418838500977, + "learning_rate": 9.42994974874372e-06, + "loss": 0.0605, + "step": 6175 + }, + { + "epoch": 3.47, + "grad_norm": 2.477762460708618, + "learning_rate": 9.42743718592965e-06, + "loss": 0.0654, + "step": 6200 + }, + { + "epoch": 3.48, + "grad_norm": 2.4460363388061523, + "learning_rate": 9.424924623115579e-06, + "loss": 0.0627, + "step": 6225 + }, + { + "epoch": 3.5, + "grad_norm": 2.7717792987823486, + "learning_rate": 9.422412060301508e-06, + "loss": 0.0645, + "step": 6250 + }, + { + "epoch": 3.51, + "grad_norm": 2.597203016281128, + "learning_rate": 9.419899497487437e-06, + "loss": 0.0595, + "step": 6275 + }, + { + "epoch": 3.52, + "grad_norm": 2.2559094429016113, + "learning_rate": 9.417386934673367e-06, + "loss": 0.0609, + "step": 6300 + }, + { + "epoch": 3.54, + "grad_norm": 2.840709686279297, + "learning_rate": 9.414874371859298e-06, + "loss": 0.0632, + "step": 6325 + }, + { + "epoch": 3.55, + "grad_norm": 2.603513479232788, + "learning_rate": 9.412361809045227e-06, + "loss": 0.0643, + "step": 6350 + }, + { + "epoch": 3.57, + "grad_norm": 2.504890203475952, + "learning_rate": 9.409849246231156e-06, + "loss": 0.0608, + "step": 6375 + }, + { + "epoch": 3.58, + "grad_norm": 2.752936840057373, + "learning_rate": 9.407336683417086e-06, + "loss": 0.0589, + "step": 6400 + }, + { + "epoch": 3.59, + "grad_norm": 2.5600643157958984, + "learning_rate": 9.404824120603015e-06, + "loss": 0.0603, + "step": 6425 + }, + { + "epoch": 3.61, + "grad_norm": 2.3973841667175293, + "learning_rate": 9.402311557788946e-06, + "loss": 0.0601, + "step": 6450 + }, + { + "epoch": 3.62, + "grad_norm": 2.2357335090637207, + "learning_rate": 9.399798994974875e-06, + "loss": 0.0575, + "step": 6475 + }, + { + "epoch": 3.64, + "grad_norm": 2.1826131343841553, + "learning_rate": 9.397286432160805e-06, + "loss": 0.0595, + "step": 6500 + }, + { + "epoch": 3.65, + "grad_norm": 2.857245922088623, + "learning_rate": 9.394773869346736e-06, + "loss": 0.062, + "step": 6525 + }, + { + "epoch": 3.66, + "grad_norm": 2.547675371170044, + "learning_rate": 9.392261306532663e-06, + "loss": 0.0627, + "step": 6550 + }, + { + "epoch": 3.68, + "grad_norm": 2.414590835571289, + "learning_rate": 9.389748743718594e-06, + "loss": 0.0618, + "step": 6575 + }, + { + "epoch": 3.69, + "grad_norm": 2.5117504596710205, + "learning_rate": 9.387236180904524e-06, + "loss": 0.0596, + "step": 6600 + }, + { + "epoch": 3.71, + "grad_norm": 2.2186226844787598, + "learning_rate": 9.384723618090453e-06, + "loss": 0.0648, + "step": 6625 + }, + { + "epoch": 3.72, + "grad_norm": 2.602156400680542, + "learning_rate": 9.382211055276382e-06, + "loss": 0.0607, + "step": 6650 + }, + { + "epoch": 3.73, + "grad_norm": 2.4105050563812256, + "learning_rate": 9.379698492462312e-06, + "loss": 0.0606, + "step": 6675 + }, + { + "epoch": 3.75, + "grad_norm": 2.1411783695220947, + "learning_rate": 9.377185929648241e-06, + "loss": 0.0603, + "step": 6700 + }, + { + "epoch": 3.76, + "grad_norm": 2.42120099067688, + "learning_rate": 9.374673366834172e-06, + "loss": 0.0594, + "step": 6725 + }, + { + "epoch": 3.78, + "grad_norm": 2.2645204067230225, + "learning_rate": 9.372160804020101e-06, + "loss": 0.0612, + "step": 6750 + }, + { + "epoch": 3.79, + "grad_norm": 2.5481066703796387, + "learning_rate": 9.36964824120603e-06, + "loss": 0.0617, + "step": 6775 + }, + { + "epoch": 3.8, + "grad_norm": 2.574767827987671, + "learning_rate": 9.367135678391962e-06, + "loss": 0.0614, + "step": 6800 + }, + { + "epoch": 3.82, + "grad_norm": 2.415529251098633, + "learning_rate": 9.36462311557789e-06, + "loss": 0.0583, + "step": 6825 + }, + { + "epoch": 3.83, + "grad_norm": 1.980372667312622, + "learning_rate": 9.36211055276382e-06, + "loss": 0.0565, + "step": 6850 + }, + { + "epoch": 3.85, + "grad_norm": 1.9812791347503662, + "learning_rate": 9.35959798994975e-06, + "loss": 0.0544, + "step": 6875 + }, + { + "epoch": 3.86, + "grad_norm": 2.602052927017212, + "learning_rate": 9.357085427135679e-06, + "loss": 0.0574, + "step": 6900 + }, + { + "epoch": 3.87, + "grad_norm": 2.5911543369293213, + "learning_rate": 9.354572864321608e-06, + "loss": 0.063, + "step": 6925 + }, + { + "epoch": 3.89, + "grad_norm": 2.4896442890167236, + "learning_rate": 9.352060301507538e-06, + "loss": 0.0597, + "step": 6950 + }, + { + "epoch": 3.9, + "grad_norm": 2.1949775218963623, + "learning_rate": 9.349547738693469e-06, + "loss": 0.0584, + "step": 6975 + }, + { + "epoch": 3.91, + "grad_norm": 2.9989583492279053, + "learning_rate": 9.347035175879398e-06, + "loss": 0.0621, + "step": 7000 + }, + { + "epoch": 3.91, + "eval_loss": 0.11221127212047577, + "eval_runtime": 756.6479, + "eval_samples_per_second": 2.034, + "eval_steps_per_second": 2.034, + "eval_wer": 16.43291257597008, + "step": 7000 + }, + { + "epoch": 3.93, + "grad_norm": 2.7108559608459473, + "learning_rate": 9.344522613065327e-06, + "loss": 0.0601, + "step": 7025 + }, + { + "epoch": 3.94, + "grad_norm": 2.0736100673675537, + "learning_rate": 9.342010050251257e-06, + "loss": 0.0589, + "step": 7050 + }, + { + "epoch": 3.96, + "grad_norm": 2.7566006183624268, + "learning_rate": 9.339497487437188e-06, + "loss": 0.0605, + "step": 7075 + }, + { + "epoch": 3.97, + "grad_norm": 2.0916481018066406, + "learning_rate": 9.336984924623115e-06, + "loss": 0.0574, + "step": 7100 + }, + { + "epoch": 3.98, + "grad_norm": 2.1417672634124756, + "learning_rate": 9.334472361809046e-06, + "loss": 0.0582, + "step": 7125 + }, + { + "epoch": 4.0, + "grad_norm": 2.253265142440796, + "learning_rate": 9.331959798994976e-06, + "loss": 0.0605, + "step": 7150 + }, + { + "epoch": 4.01, + "grad_norm": 2.0841262340545654, + "learning_rate": 9.329447236180905e-06, + "loss": 0.0432, + "step": 7175 + }, + { + "epoch": 4.03, + "grad_norm": 2.0625362396240234, + "learning_rate": 9.326934673366836e-06, + "loss": 0.0405, + "step": 7200 + }, + { + "epoch": 4.04, + "grad_norm": 2.470043897628784, + "learning_rate": 9.324422110552764e-06, + "loss": 0.0411, + "step": 7225 + }, + { + "epoch": 4.05, + "grad_norm": 1.765326976776123, + "learning_rate": 9.321909547738695e-06, + "loss": 0.0412, + "step": 7250 + }, + { + "epoch": 4.07, + "grad_norm": 2.3616178035736084, + "learning_rate": 9.319396984924624e-06, + "loss": 0.0397, + "step": 7275 + }, + { + "epoch": 4.08, + "grad_norm": 1.9313087463378906, + "learning_rate": 9.316884422110553e-06, + "loss": 0.0407, + "step": 7300 + }, + { + "epoch": 4.1, + "grad_norm": 2.1446056365966797, + "learning_rate": 9.314371859296483e-06, + "loss": 0.0388, + "step": 7325 + }, + { + "epoch": 4.11, + "grad_norm": 2.346489191055298, + "learning_rate": 9.311859296482414e-06, + "loss": 0.0397, + "step": 7350 + }, + { + "epoch": 4.12, + "grad_norm": 2.1581079959869385, + "learning_rate": 9.309346733668343e-06, + "loss": 0.0418, + "step": 7375 + }, + { + "epoch": 4.14, + "grad_norm": 2.124351739883423, + "learning_rate": 9.306834170854272e-06, + "loss": 0.0403, + "step": 7400 + }, + { + "epoch": 4.15, + "grad_norm": 1.9916259050369263, + "learning_rate": 9.304321608040201e-06, + "loss": 0.0398, + "step": 7425 + }, + { + "epoch": 4.17, + "grad_norm": 2.398169755935669, + "learning_rate": 9.30180904522613e-06, + "loss": 0.042, + "step": 7450 + }, + { + "epoch": 4.18, + "grad_norm": 2.5444700717926025, + "learning_rate": 9.299296482412062e-06, + "loss": 0.0443, + "step": 7475 + }, + { + "epoch": 4.19, + "grad_norm": 1.9942892789840698, + "learning_rate": 9.296783919597991e-06, + "loss": 0.0395, + "step": 7500 + }, + { + "epoch": 4.21, + "grad_norm": 2.062899351119995, + "learning_rate": 9.29427135678392e-06, + "loss": 0.04, + "step": 7525 + }, + { + "epoch": 4.22, + "grad_norm": 2.2951488494873047, + "learning_rate": 9.29175879396985e-06, + "loss": 0.0422, + "step": 7550 + }, + { + "epoch": 4.24, + "grad_norm": 2.5485541820526123, + "learning_rate": 9.289246231155779e-06, + "loss": 0.0411, + "step": 7575 + }, + { + "epoch": 4.25, + "grad_norm": 2.6162617206573486, + "learning_rate": 9.28673366834171e-06, + "loss": 0.0398, + "step": 7600 + }, + { + "epoch": 4.26, + "grad_norm": 2.514819622039795, + "learning_rate": 9.28422110552764e-06, + "loss": 0.0395, + "step": 7625 + }, + { + "epoch": 4.28, + "grad_norm": 2.1233532428741455, + "learning_rate": 9.281708542713569e-06, + "loss": 0.0408, + "step": 7650 + }, + { + "epoch": 4.29, + "grad_norm": 2.1466000080108643, + "learning_rate": 9.279195979899498e-06, + "loss": 0.0417, + "step": 7675 + }, + { + "epoch": 4.31, + "grad_norm": 1.9756128787994385, + "learning_rate": 9.276683417085427e-06, + "loss": 0.0423, + "step": 7700 + }, + { + "epoch": 4.32, + "grad_norm": 2.0592334270477295, + "learning_rate": 9.274170854271357e-06, + "loss": 0.0389, + "step": 7725 + }, + { + "epoch": 4.33, + "grad_norm": 2.252636432647705, + "learning_rate": 9.271658291457288e-06, + "loss": 0.04, + "step": 7750 + }, + { + "epoch": 4.35, + "grad_norm": 2.4046926498413086, + "learning_rate": 9.269145728643217e-06, + "loss": 0.0397, + "step": 7775 + }, + { + "epoch": 4.36, + "grad_norm": 2.1105189323425293, + "learning_rate": 9.266633165829146e-06, + "loss": 0.039, + "step": 7800 + }, + { + "epoch": 4.38, + "grad_norm": 2.563977003097534, + "learning_rate": 9.264120603015076e-06, + "loss": 0.0418, + "step": 7825 + }, + { + "epoch": 4.39, + "grad_norm": 2.2563071250915527, + "learning_rate": 9.261608040201005e-06, + "loss": 0.0389, + "step": 7850 + }, + { + "epoch": 4.4, + "grad_norm": 2.0223286151885986, + "learning_rate": 9.259095477386936e-06, + "loss": 0.0419, + "step": 7875 + }, + { + "epoch": 4.42, + "grad_norm": 2.190652370452881, + "learning_rate": 9.256683417085428e-06, + "loss": 0.041, + "step": 7900 + }, + { + "epoch": 4.43, + "grad_norm": 2.299483060836792, + "learning_rate": 9.254170854271357e-06, + "loss": 0.0391, + "step": 7925 + }, + { + "epoch": 4.45, + "grad_norm": 2.4138424396514893, + "learning_rate": 9.251658291457288e-06, + "loss": 0.0377, + "step": 7950 + }, + { + "epoch": 4.46, + "grad_norm": 1.9993958473205566, + "learning_rate": 9.249145728643217e-06, + "loss": 0.0415, + "step": 7975 + }, + { + "epoch": 4.47, + "grad_norm": 2.4869425296783447, + "learning_rate": 9.246633165829147e-06, + "loss": 0.0411, + "step": 8000 + }, + { + "epoch": 4.47, + "eval_loss": 0.11895450204610825, + "eval_runtime": 760.5202, + "eval_samples_per_second": 2.024, + "eval_steps_per_second": 2.024, + "eval_wer": 16.327723235156615, + "step": 8000 + }, + { + "epoch": 4.49, + "grad_norm": 2.0129308700561523, + "learning_rate": 9.244120603015076e-06, + "loss": 0.0388, + "step": 8025 + }, + { + "epoch": 4.5, + "grad_norm": 2.1154327392578125, + "learning_rate": 9.241608040201005e-06, + "loss": 0.0427, + "step": 8050 + }, + { + "epoch": 4.52, + "grad_norm": 2.203622579574585, + "learning_rate": 9.239095477386936e-06, + "loss": 0.0406, + "step": 8075 + }, + { + "epoch": 4.53, + "grad_norm": 1.9519188404083252, + "learning_rate": 9.236582914572864e-06, + "loss": 0.0401, + "step": 8100 + }, + { + "epoch": 4.54, + "grad_norm": 2.1124324798583984, + "learning_rate": 9.234070351758795e-06, + "loss": 0.0426, + "step": 8125 + }, + { + "epoch": 4.56, + "grad_norm": 2.644378423690796, + "learning_rate": 9.231557788944724e-06, + "loss": 0.0409, + "step": 8150 + }, + { + "epoch": 4.57, + "grad_norm": 2.176788568496704, + "learning_rate": 9.229045226130654e-06, + "loss": 0.0388, + "step": 8175 + }, + { + "epoch": 4.59, + "grad_norm": 2.009443521499634, + "learning_rate": 9.226532663316585e-06, + "loss": 0.0396, + "step": 8200 + }, + { + "epoch": 4.6, + "grad_norm": 2.2705161571502686, + "learning_rate": 9.224020100502514e-06, + "loss": 0.0409, + "step": 8225 + }, + { + "epoch": 4.61, + "grad_norm": 2.00069522857666, + "learning_rate": 9.221507537688443e-06, + "loss": 0.041, + "step": 8250 + }, + { + "epoch": 4.63, + "grad_norm": 2.80146861076355, + "learning_rate": 9.218994974874373e-06, + "loss": 0.0405, + "step": 8275 + }, + { + "epoch": 4.64, + "grad_norm": 2.2466108798980713, + "learning_rate": 9.216482412060302e-06, + "loss": 0.041, + "step": 8300 + }, + { + "epoch": 4.66, + "grad_norm": 1.9566564559936523, + "learning_rate": 9.213969849246231e-06, + "loss": 0.0402, + "step": 8325 + }, + { + "epoch": 4.67, + "grad_norm": 2.1580088138580322, + "learning_rate": 9.211457286432162e-06, + "loss": 0.0422, + "step": 8350 + }, + { + "epoch": 4.68, + "grad_norm": 2.2507355213165283, + "learning_rate": 9.20894472361809e-06, + "loss": 0.0398, + "step": 8375 + }, + { + "epoch": 4.7, + "grad_norm": 2.5199291706085205, + "learning_rate": 9.206432160804021e-06, + "loss": 0.0399, + "step": 8400 + }, + { + "epoch": 4.71, + "grad_norm": 2.264404296875, + "learning_rate": 9.20391959798995e-06, + "loss": 0.042, + "step": 8425 + }, + { + "epoch": 4.73, + "grad_norm": 2.2650234699249268, + "learning_rate": 9.20140703517588e-06, + "loss": 0.0388, + "step": 8450 + }, + { + "epoch": 4.74, + "grad_norm": 2.489088773727417, + "learning_rate": 9.19889447236181e-06, + "loss": 0.0405, + "step": 8475 + }, + { + "epoch": 4.75, + "grad_norm": 2.594514846801758, + "learning_rate": 9.19638190954774e-06, + "loss": 0.041, + "step": 8500 + }, + { + "epoch": 4.77, + "grad_norm": 1.95991849899292, + "learning_rate": 9.19386934673367e-06, + "loss": 0.0409, + "step": 8525 + }, + { + "epoch": 4.78, + "grad_norm": 2.17498779296875, + "learning_rate": 9.191356783919599e-06, + "loss": 0.0431, + "step": 8550 + }, + { + "epoch": 4.8, + "grad_norm": 2.225038528442383, + "learning_rate": 9.188844221105528e-06, + "loss": 0.0415, + "step": 8575 + }, + { + "epoch": 4.81, + "grad_norm": 2.2811858654022217, + "learning_rate": 9.186331658291459e-06, + "loss": 0.042, + "step": 8600 + }, + { + "epoch": 4.82, + "grad_norm": 2.008151054382324, + "learning_rate": 9.183819095477388e-06, + "loss": 0.0404, + "step": 8625 + }, + { + "epoch": 4.84, + "grad_norm": 2.2357606887817383, + "learning_rate": 9.181306532663317e-06, + "loss": 0.0386, + "step": 8650 + }, + { + "epoch": 4.85, + "grad_norm": 2.1292245388031006, + "learning_rate": 9.178793969849247e-06, + "loss": 0.0379, + "step": 8675 + }, + { + "epoch": 4.87, + "grad_norm": 2.490473747253418, + "learning_rate": 9.176281407035176e-06, + "loss": 0.0405, + "step": 8700 + }, + { + "epoch": 4.88, + "grad_norm": 2.4771227836608887, + "learning_rate": 9.173768844221105e-06, + "loss": 0.0388, + "step": 8725 + }, + { + "epoch": 4.89, + "grad_norm": 2.2272984981536865, + "learning_rate": 9.171256281407036e-06, + "loss": 0.0401, + "step": 8750 + }, + { + "epoch": 4.91, + "grad_norm": 2.2645790576934814, + "learning_rate": 9.168743718592966e-06, + "loss": 0.0377, + "step": 8775 + }, + { + "epoch": 4.92, + "grad_norm": 1.89028000831604, + "learning_rate": 9.166231155778895e-06, + "loss": 0.038, + "step": 8800 + }, + { + "epoch": 4.94, + "grad_norm": 2.4221341609954834, + "learning_rate": 9.163718592964826e-06, + "loss": 0.0393, + "step": 8825 + }, + { + "epoch": 4.95, + "grad_norm": 2.5047740936279297, + "learning_rate": 9.161206030150754e-06, + "loss": 0.0397, + "step": 8850 + }, + { + "epoch": 4.96, + "grad_norm": 2.0876448154449463, + "learning_rate": 9.158693467336685e-06, + "loss": 0.0379, + "step": 8875 + }, + { + "epoch": 4.98, + "grad_norm": 2.498077630996704, + "learning_rate": 9.156180904522614e-06, + "loss": 0.0419, + "step": 8900 + }, + { + "epoch": 4.99, + "grad_norm": 2.1457018852233887, + "learning_rate": 9.153668341708543e-06, + "loss": 0.04, + "step": 8925 + }, + { + "epoch": 5.01, + "grad_norm": 1.668181300163269, + "learning_rate": 9.151155778894473e-06, + "loss": 0.0337, + "step": 8950 + }, + { + "epoch": 5.02, + "grad_norm": 1.5665873289108276, + "learning_rate": 9.148643216080402e-06, + "loss": 0.0243, + "step": 8975 + }, + { + "epoch": 5.03, + "grad_norm": 1.9349863529205322, + "learning_rate": 9.146130653266331e-06, + "loss": 0.0254, + "step": 9000 + }, + { + "epoch": 5.03, + "eval_loss": 0.12422630190849304, + "eval_runtime": 755.6025, + "eval_samples_per_second": 2.037, + "eval_steps_per_second": 2.037, + "eval_wer": 16.298503973819543, + "step": 9000 + }, + { + "epoch": 5.05, + "grad_norm": 1.7973382472991943, + "learning_rate": 9.143618090452262e-06, + "loss": 0.0236, + "step": 9025 + }, + { + "epoch": 5.06, + "grad_norm": 1.7267107963562012, + "learning_rate": 9.141105527638192e-06, + "loss": 0.0251, + "step": 9050 + }, + { + "epoch": 5.08, + "grad_norm": 1.7422178983688354, + "learning_rate": 9.138592964824121e-06, + "loss": 0.0251, + "step": 9075 + }, + { + "epoch": 5.09, + "grad_norm": 1.876273274421692, + "learning_rate": 9.136080402010052e-06, + "loss": 0.025, + "step": 9100 + }, + { + "epoch": 5.1, + "grad_norm": 1.8056306838989258, + "learning_rate": 9.13356783919598e-06, + "loss": 0.0258, + "step": 9125 + }, + { + "epoch": 5.12, + "grad_norm": 1.685093641281128, + "learning_rate": 9.13105527638191e-06, + "loss": 0.028, + "step": 9150 + }, + { + "epoch": 5.13, + "grad_norm": 2.3393263816833496, + "learning_rate": 9.12854271356784e-06, + "loss": 0.0257, + "step": 9175 + }, + { + "epoch": 5.15, + "grad_norm": 1.628409504890442, + "learning_rate": 9.12603015075377e-06, + "loss": 0.0241, + "step": 9200 + }, + { + "epoch": 5.16, + "grad_norm": 2.220980405807495, + "learning_rate": 9.1235175879397e-06, + "loss": 0.0258, + "step": 9225 + }, + { + "epoch": 5.17, + "grad_norm": 1.9749892950057983, + "learning_rate": 9.121005025125628e-06, + "loss": 0.0246, + "step": 9250 + }, + { + "epoch": 5.19, + "grad_norm": 1.7304292917251587, + "learning_rate": 9.118492462311559e-06, + "loss": 0.0252, + "step": 9275 + }, + { + "epoch": 5.2, + "grad_norm": 2.0432450771331787, + "learning_rate": 9.115979899497488e-06, + "loss": 0.0245, + "step": 9300 + }, + { + "epoch": 5.22, + "grad_norm": 2.0510048866271973, + "learning_rate": 9.113467336683418e-06, + "loss": 0.0258, + "step": 9325 + }, + { + "epoch": 5.23, + "grad_norm": 1.7150670289993286, + "learning_rate": 9.110954773869347e-06, + "loss": 0.0273, + "step": 9350 + }, + { + "epoch": 5.24, + "grad_norm": 1.7934012413024902, + "learning_rate": 9.108442211055278e-06, + "loss": 0.0252, + "step": 9375 + }, + { + "epoch": 5.26, + "grad_norm": 1.7437806129455566, + "learning_rate": 9.105929648241206e-06, + "loss": 0.0244, + "step": 9400 + }, + { + "epoch": 5.27, + "grad_norm": 2.1139166355133057, + "learning_rate": 9.103417085427137e-06, + "loss": 0.0251, + "step": 9425 + }, + { + "epoch": 5.29, + "grad_norm": 1.9236050844192505, + "learning_rate": 9.100904522613066e-06, + "loss": 0.0274, + "step": 9450 + }, + { + "epoch": 5.3, + "grad_norm": 1.858186960220337, + "learning_rate": 9.098391959798995e-06, + "loss": 0.0267, + "step": 9475 + }, + { + "epoch": 5.31, + "grad_norm": 1.9849894046783447, + "learning_rate": 9.095879396984926e-06, + "loss": 0.0286, + "step": 9500 + }, + { + "epoch": 5.33, + "grad_norm": 1.7437586784362793, + "learning_rate": 9.093366834170854e-06, + "loss": 0.0274, + "step": 9525 + }, + { + "epoch": 5.34, + "grad_norm": 2.6068198680877686, + "learning_rate": 9.090854271356785e-06, + "loss": 0.0249, + "step": 9550 + }, + { + "epoch": 5.36, + "grad_norm": 2.1027534008026123, + "learning_rate": 9.088341708542714e-06, + "loss": 0.0259, + "step": 9575 + }, + { + "epoch": 5.37, + "grad_norm": 2.1794934272766113, + "learning_rate": 9.085829145728644e-06, + "loss": 0.0249, + "step": 9600 + }, + { + "epoch": 5.38, + "grad_norm": 1.8740226030349731, + "learning_rate": 9.083316582914573e-06, + "loss": 0.0262, + "step": 9625 + }, + { + "epoch": 5.4, + "grad_norm": 1.510231375694275, + "learning_rate": 9.080804020100504e-06, + "loss": 0.0263, + "step": 9650 + }, + { + "epoch": 5.41, + "grad_norm": 1.7715526819229126, + "learning_rate": 9.078291457286433e-06, + "loss": 0.0255, + "step": 9675 + }, + { + "epoch": 5.43, + "grad_norm": 1.896027684211731, + "learning_rate": 9.075778894472363e-06, + "loss": 0.0249, + "step": 9700 + }, + { + "epoch": 5.44, + "grad_norm": 1.7648839950561523, + "learning_rate": 9.073266331658292e-06, + "loss": 0.0258, + "step": 9725 + }, + { + "epoch": 5.45, + "grad_norm": 1.8870106935501099, + "learning_rate": 9.070753768844221e-06, + "loss": 0.0255, + "step": 9750 + }, + { + "epoch": 5.47, + "grad_norm": 1.7485008239746094, + "learning_rate": 9.068241206030152e-06, + "loss": 0.0249, + "step": 9775 + }, + { + "epoch": 5.48, + "grad_norm": 2.439951181411743, + "learning_rate": 9.06572864321608e-06, + "loss": 0.0263, + "step": 9800 + }, + { + "epoch": 5.49, + "grad_norm": 1.970945954322815, + "learning_rate": 9.063216080402011e-06, + "loss": 0.0244, + "step": 9825 + }, + { + "epoch": 5.51, + "grad_norm": 2.340816020965576, + "learning_rate": 9.06070351758794e-06, + "loss": 0.0271, + "step": 9850 + }, + { + "epoch": 5.52, + "grad_norm": 1.6422665119171143, + "learning_rate": 9.05819095477387e-06, + "loss": 0.0247, + "step": 9875 + }, + { + "epoch": 5.54, + "grad_norm": 2.1123127937316895, + "learning_rate": 9.0556783919598e-06, + "loss": 0.0257, + "step": 9900 + }, + { + "epoch": 5.55, + "grad_norm": 1.9259475469589233, + "learning_rate": 9.05316582914573e-06, + "loss": 0.0248, + "step": 9925 + }, + { + "epoch": 5.56, + "grad_norm": 1.9068354368209839, + "learning_rate": 9.05065326633166e-06, + "loss": 0.0252, + "step": 9950 + }, + { + "epoch": 5.58, + "grad_norm": 1.8748539686203003, + "learning_rate": 9.048140703517589e-06, + "loss": 0.0254, + "step": 9975 + }, + { + "epoch": 5.59, + "grad_norm": 2.727257251739502, + "learning_rate": 9.04572864321608e-06, + "loss": 0.0266, + "step": 10000 + }, + { + "epoch": 5.59, + "eval_loss": 0.12934260070323944, + "eval_runtime": 761.2744, + "eval_samples_per_second": 2.022, + "eval_steps_per_second": 2.022, + "eval_wer": 15.994623655913978, + "step": 10000 + }, + { + "epoch": 5.61, + "grad_norm": 1.5931107997894287, + "learning_rate": 9.043216080402011e-06, + "loss": 0.0255, + "step": 10025 + }, + { + "epoch": 5.62, + "grad_norm": 2.115570545196533, + "learning_rate": 9.04070351758794e-06, + "loss": 0.0263, + "step": 10050 + }, + { + "epoch": 5.63, + "grad_norm": 1.7367600202560425, + "learning_rate": 9.03819095477387e-06, + "loss": 0.0246, + "step": 10075 + }, + { + "epoch": 5.65, + "grad_norm": 1.5627069473266602, + "learning_rate": 9.0356783919598e-06, + "loss": 0.0259, + "step": 10100 + }, + { + "epoch": 5.66, + "grad_norm": 2.296149969100952, + "learning_rate": 9.033165829145728e-06, + "loss": 0.0272, + "step": 10125 + }, + { + "epoch": 5.68, + "grad_norm": 1.9977073669433594, + "learning_rate": 9.03065326633166e-06, + "loss": 0.0251, + "step": 10150 + }, + { + "epoch": 5.69, + "grad_norm": 2.0896854400634766, + "learning_rate": 9.028140703517589e-06, + "loss": 0.026, + "step": 10175 + }, + { + "epoch": 5.7, + "grad_norm": 2.001432418823242, + "learning_rate": 9.025628140703518e-06, + "loss": 0.0265, + "step": 10200 + }, + { + "epoch": 5.72, + "grad_norm": 2.437760353088379, + "learning_rate": 9.023115577889447e-06, + "loss": 0.0257, + "step": 10225 + }, + { + "epoch": 5.73, + "grad_norm": 2.039419412612915, + "learning_rate": 9.020603015075378e-06, + "loss": 0.0264, + "step": 10250 + }, + { + "epoch": 5.75, + "grad_norm": 2.1487600803375244, + "learning_rate": 9.018090452261308e-06, + "loss": 0.0257, + "step": 10275 + }, + { + "epoch": 5.76, + "grad_norm": 2.186326265335083, + "learning_rate": 9.015577889447237e-06, + "loss": 0.0256, + "step": 10300 + }, + { + "epoch": 5.77, + "grad_norm": 2.6533422470092773, + "learning_rate": 9.013065326633166e-06, + "loss": 0.0266, + "step": 10325 + }, + { + "epoch": 5.79, + "grad_norm": 2.785947561264038, + "learning_rate": 9.010552763819096e-06, + "loss": 0.0254, + "step": 10350 + }, + { + "epoch": 5.8, + "grad_norm": 2.040907144546509, + "learning_rate": 9.008040201005027e-06, + "loss": 0.0278, + "step": 10375 + }, + { + "epoch": 5.82, + "grad_norm": 1.9981815814971924, + "learning_rate": 9.005527638190954e-06, + "loss": 0.0273, + "step": 10400 + }, + { + "epoch": 5.83, + "grad_norm": 2.0052943229675293, + "learning_rate": 9.003015075376885e-06, + "loss": 0.027, + "step": 10425 + }, + { + "epoch": 5.84, + "grad_norm": 1.9560832977294922, + "learning_rate": 9.000502512562815e-06, + "loss": 0.0255, + "step": 10450 + }, + { + "epoch": 5.86, + "grad_norm": 1.6769510507583618, + "learning_rate": 8.997989949748744e-06, + "loss": 0.0252, + "step": 10475 + }, + { + "epoch": 5.87, + "grad_norm": 1.7325701713562012, + "learning_rate": 8.995477386934675e-06, + "loss": 0.0237, + "step": 10500 + }, + { + "epoch": 5.89, + "grad_norm": 2.2958810329437256, + "learning_rate": 8.992964824120604e-06, + "loss": 0.0269, + "step": 10525 + }, + { + "epoch": 5.9, + "grad_norm": 1.8434412479400635, + "learning_rate": 8.990452261306534e-06, + "loss": 0.0251, + "step": 10550 + }, + { + "epoch": 5.91, + "grad_norm": 2.119659662246704, + "learning_rate": 8.987939698492463e-06, + "loss": 0.0247, + "step": 10575 + }, + { + "epoch": 5.93, + "grad_norm": 2.7302348613739014, + "learning_rate": 8.985427135678392e-06, + "loss": 0.0282, + "step": 10600 + }, + { + "epoch": 5.94, + "grad_norm": 2.5924785137176514, + "learning_rate": 8.982914572864322e-06, + "loss": 0.0245, + "step": 10625 + }, + { + "epoch": 5.96, + "grad_norm": 2.452630043029785, + "learning_rate": 8.980402010050253e-06, + "loss": 0.0267, + "step": 10650 + }, + { + "epoch": 5.97, + "grad_norm": 2.096748113632202, + "learning_rate": 8.977889447236182e-06, + "loss": 0.0245, + "step": 10675 + }, + { + "epoch": 5.98, + "grad_norm": 2.2449941635131836, + "learning_rate": 8.975376884422111e-06, + "loss": 0.0247, + "step": 10700 + }, + { + "epoch": 6.0, + "grad_norm": 1.8486614227294922, + "learning_rate": 8.97286432160804e-06, + "loss": 0.0268, + "step": 10725 + }, + { + "epoch": 6.01, + "grad_norm": 1.591162085533142, + "learning_rate": 8.97035175879397e-06, + "loss": 0.0167, + "step": 10750 + }, + { + "epoch": 6.03, + "grad_norm": 1.8919384479522705, + "learning_rate": 8.967839195979901e-06, + "loss": 0.0161, + "step": 10775 + }, + { + "epoch": 6.04, + "grad_norm": 1.5010147094726562, + "learning_rate": 8.96532663316583e-06, + "loss": 0.0152, + "step": 10800 + }, + { + "epoch": 6.05, + "grad_norm": 1.6276648044586182, + "learning_rate": 8.96281407035176e-06, + "loss": 0.0163, + "step": 10825 + }, + { + "epoch": 6.07, + "grad_norm": 1.7681570053100586, + "learning_rate": 8.960301507537689e-06, + "loss": 0.0156, + "step": 10850 + }, + { + "epoch": 6.08, + "grad_norm": 1.94306480884552, + "learning_rate": 8.957788944723618e-06, + "loss": 0.0159, + "step": 10875 + }, + { + "epoch": 6.1, + "grad_norm": 1.6536585092544556, + "learning_rate": 8.95527638190955e-06, + "loss": 0.0157, + "step": 10900 + }, + { + "epoch": 6.11, + "grad_norm": 1.6869453191757202, + "learning_rate": 8.952763819095479e-06, + "loss": 0.0167, + "step": 10925 + }, + { + "epoch": 6.12, + "grad_norm": 2.1340219974517822, + "learning_rate": 8.950251256281408e-06, + "loss": 0.0165, + "step": 10950 + }, + { + "epoch": 6.14, + "grad_norm": 1.5351003408432007, + "learning_rate": 8.947738693467337e-06, + "loss": 0.0153, + "step": 10975 + }, + { + "epoch": 6.15, + "grad_norm": 1.0836293697357178, + "learning_rate": 8.945226130653267e-06, + "loss": 0.0148, + "step": 11000 + }, + { + "epoch": 6.15, + "eval_loss": 0.13720138370990753, + "eval_runtime": 756.0145, + "eval_samples_per_second": 2.036, + "eval_steps_per_second": 2.036, + "eval_wer": 16.286816269284714, + "step": 11000 + }, + { + "epoch": 6.17, + "grad_norm": 1.538231611251831, + "learning_rate": 8.942713567839196e-06, + "loss": 0.0149, + "step": 11025 + }, + { + "epoch": 6.18, + "grad_norm": 1.6468478441238403, + "learning_rate": 8.940201005025127e-06, + "loss": 0.016, + "step": 11050 + }, + { + "epoch": 6.19, + "grad_norm": 1.752801775932312, + "learning_rate": 8.937688442211056e-06, + "loss": 0.0149, + "step": 11075 + }, + { + "epoch": 6.21, + "grad_norm": 1.774678111076355, + "learning_rate": 8.935175879396986e-06, + "loss": 0.0154, + "step": 11100 + }, + { + "epoch": 6.22, + "grad_norm": 1.6859246492385864, + "learning_rate": 8.932663316582915e-06, + "loss": 0.0161, + "step": 11125 + }, + { + "epoch": 6.24, + "grad_norm": 1.5582952499389648, + "learning_rate": 8.930150753768844e-06, + "loss": 0.0153, + "step": 11150 + }, + { + "epoch": 6.25, + "grad_norm": 1.2000463008880615, + "learning_rate": 8.927638190954775e-06, + "loss": 0.0156, + "step": 11175 + }, + { + "epoch": 6.26, + "grad_norm": 2.015148401260376, + "learning_rate": 8.925125628140705e-06, + "loss": 0.0156, + "step": 11200 + }, + { + "epoch": 6.28, + "grad_norm": 1.178218960762024, + "learning_rate": 8.922613065326634e-06, + "loss": 0.0161, + "step": 11225 + }, + { + "epoch": 6.29, + "grad_norm": 2.044891834259033, + "learning_rate": 8.920100502512563e-06, + "loss": 0.0152, + "step": 11250 + }, + { + "epoch": 6.31, + "grad_norm": 1.8650555610656738, + "learning_rate": 8.917587939698493e-06, + "loss": 0.0161, + "step": 11275 + }, + { + "epoch": 6.32, + "grad_norm": 1.306752324104309, + "learning_rate": 8.915075376884424e-06, + "loss": 0.0157, + "step": 11300 + }, + { + "epoch": 6.33, + "grad_norm": 1.8784193992614746, + "learning_rate": 8.912562814070353e-06, + "loss": 0.0154, + "step": 11325 + }, + { + "epoch": 6.35, + "grad_norm": 1.8208807706832886, + "learning_rate": 8.910050251256282e-06, + "loss": 0.0154, + "step": 11350 + }, + { + "epoch": 6.36, + "grad_norm": 1.5234003067016602, + "learning_rate": 8.907537688442212e-06, + "loss": 0.0152, + "step": 11375 + }, + { + "epoch": 6.38, + "grad_norm": 1.9955077171325684, + "learning_rate": 8.905025125628143e-06, + "loss": 0.0149, + "step": 11400 + }, + { + "epoch": 6.39, + "grad_norm": 1.9024913311004639, + "learning_rate": 8.90251256281407e-06, + "loss": 0.0157, + "step": 11425 + }, + { + "epoch": 6.4, + "grad_norm": 1.743261456489563, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0182, + "step": 11450 + }, + { + "epoch": 6.42, + "grad_norm": 2.266770124435425, + "learning_rate": 8.89748743718593e-06, + "loss": 0.0151, + "step": 11475 + }, + { + "epoch": 6.43, + "grad_norm": 1.6484559774398804, + "learning_rate": 8.89497487437186e-06, + "loss": 0.0157, + "step": 11500 + }, + { + "epoch": 6.45, + "grad_norm": 2.035109043121338, + "learning_rate": 8.892462311557791e-06, + "loss": 0.0148, + "step": 11525 + }, + { + "epoch": 6.46, + "grad_norm": 2.141247510910034, + "learning_rate": 8.889949748743718e-06, + "loss": 0.0158, + "step": 11550 + }, + { + "epoch": 6.47, + "grad_norm": 1.7964688539505005, + "learning_rate": 8.88743718592965e-06, + "loss": 0.0169, + "step": 11575 + }, + { + "epoch": 6.49, + "grad_norm": 2.240086317062378, + "learning_rate": 8.884924623115579e-06, + "loss": 0.0172, + "step": 11600 + }, + { + "epoch": 6.5, + "grad_norm": 1.505306601524353, + "learning_rate": 8.882412060301508e-06, + "loss": 0.0148, + "step": 11625 + }, + { + "epoch": 6.52, + "grad_norm": 1.9585260152816772, + "learning_rate": 8.879899497487437e-06, + "loss": 0.0167, + "step": 11650 + }, + { + "epoch": 6.53, + "grad_norm": 2.0186779499053955, + "learning_rate": 8.877386934673368e-06, + "loss": 0.016, + "step": 11675 + }, + { + "epoch": 6.54, + "grad_norm": 1.8327144384384155, + "learning_rate": 8.874874371859296e-06, + "loss": 0.0164, + "step": 11700 + }, + { + "epoch": 6.56, + "grad_norm": 1.6224167346954346, + "learning_rate": 8.872361809045227e-06, + "loss": 0.0162, + "step": 11725 + }, + { + "epoch": 6.57, + "grad_norm": 1.2875187397003174, + "learning_rate": 8.869849246231156e-06, + "loss": 0.0161, + "step": 11750 + }, + { + "epoch": 6.59, + "grad_norm": 1.5960376262664795, + "learning_rate": 8.867336683417086e-06, + "loss": 0.0152, + "step": 11775 + }, + { + "epoch": 6.6, + "grad_norm": 1.90456223487854, + "learning_rate": 8.864824120603017e-06, + "loss": 0.0161, + "step": 11800 + }, + { + "epoch": 6.61, + "grad_norm": 1.8225324153900146, + "learning_rate": 8.862311557788944e-06, + "loss": 0.0159, + "step": 11825 + }, + { + "epoch": 6.63, + "grad_norm": 2.2913146018981934, + "learning_rate": 8.859798994974875e-06, + "loss": 0.016, + "step": 11850 + }, + { + "epoch": 6.64, + "grad_norm": 1.889854073524475, + "learning_rate": 8.857286432160805e-06, + "loss": 0.0147, + "step": 11875 + }, + { + "epoch": 6.66, + "grad_norm": 2.2725677490234375, + "learning_rate": 8.854773869346734e-06, + "loss": 0.016, + "step": 11900 + }, + { + "epoch": 6.67, + "grad_norm": 1.7620115280151367, + "learning_rate": 8.852261306532665e-06, + "loss": 0.0161, + "step": 11925 + }, + { + "epoch": 6.68, + "grad_norm": 1.6161775588989258, + "learning_rate": 8.849748743718594e-06, + "loss": 0.0145, + "step": 11950 + }, + { + "epoch": 6.7, + "grad_norm": 1.7443838119506836, + "learning_rate": 8.847236180904524e-06, + "loss": 0.0164, + "step": 11975 + }, + { + "epoch": 6.71, + "grad_norm": 1.5310142040252686, + "learning_rate": 8.844723618090453e-06, + "loss": 0.0176, + "step": 12000 + }, + { + "epoch": 6.71, + "eval_loss": 0.14148233830928802, + "eval_runtime": 752.9161, + "eval_samples_per_second": 2.044, + "eval_steps_per_second": 2.044, + "eval_wer": 16.00631136044881, + "step": 12000 + }, + { + "epoch": 6.73, + "grad_norm": 2.0277419090270996, + "learning_rate": 8.842211055276382e-06, + "loss": 0.0174, + "step": 12025 + }, + { + "epoch": 6.74, + "grad_norm": 1.726722240447998, + "learning_rate": 8.839698492462312e-06, + "loss": 0.0169, + "step": 12050 + }, + { + "epoch": 6.75, + "grad_norm": 2.328922986984253, + "learning_rate": 8.837185929648243e-06, + "loss": 0.0178, + "step": 12075 + }, + { + "epoch": 6.77, + "grad_norm": 1.7815828323364258, + "learning_rate": 8.83467336683417e-06, + "loss": 0.0158, + "step": 12100 + }, + { + "epoch": 6.78, + "grad_norm": 2.0201869010925293, + "learning_rate": 8.832160804020101e-06, + "loss": 0.0162, + "step": 12125 + }, + { + "epoch": 6.8, + "grad_norm": 2.1795129776000977, + "learning_rate": 8.82964824120603e-06, + "loss": 0.0173, + "step": 12150 + }, + { + "epoch": 6.81, + "grad_norm": 1.7573039531707764, + "learning_rate": 8.82713567839196e-06, + "loss": 0.0176, + "step": 12175 + }, + { + "epoch": 6.82, + "grad_norm": 1.6062272787094116, + "learning_rate": 8.824623115577891e-06, + "loss": 0.0151, + "step": 12200 + }, + { + "epoch": 6.84, + "grad_norm": 1.9971317052841187, + "learning_rate": 8.82211055276382e-06, + "loss": 0.0171, + "step": 12225 + }, + { + "epoch": 6.85, + "grad_norm": 1.1527676582336426, + "learning_rate": 8.81959798994975e-06, + "loss": 0.0169, + "step": 12250 + }, + { + "epoch": 6.87, + "grad_norm": 1.6671463251113892, + "learning_rate": 8.817085427135679e-06, + "loss": 0.0174, + "step": 12275 + }, + { + "epoch": 6.88, + "grad_norm": 2.1665661334991455, + "learning_rate": 8.814572864321608e-06, + "loss": 0.0164, + "step": 12300 + }, + { + "epoch": 6.89, + "grad_norm": 1.927233099937439, + "learning_rate": 8.812060301507538e-06, + "loss": 0.0154, + "step": 12325 + }, + { + "epoch": 6.91, + "grad_norm": 1.5298579931259155, + "learning_rate": 8.809547738693469e-06, + "loss": 0.0169, + "step": 12350 + }, + { + "epoch": 6.92, + "grad_norm": 2.1054165363311768, + "learning_rate": 8.807035175879398e-06, + "loss": 0.0167, + "step": 12375 + }, + { + "epoch": 6.94, + "grad_norm": 1.8180631399154663, + "learning_rate": 8.804522613065327e-06, + "loss": 0.0161, + "step": 12400 + }, + { + "epoch": 6.95, + "grad_norm": 2.0618014335632324, + "learning_rate": 8.802010050251257e-06, + "loss": 0.0172, + "step": 12425 + }, + { + "epoch": 6.96, + "grad_norm": 1.2794722318649292, + "learning_rate": 8.799497487437186e-06, + "loss": 0.0166, + "step": 12450 + }, + { + "epoch": 6.98, + "grad_norm": 1.6884711980819702, + "learning_rate": 8.796984924623117e-06, + "loss": 0.0167, + "step": 12475 + }, + { + "epoch": 6.99, + "grad_norm": 2.3004558086395264, + "learning_rate": 8.794472361809046e-06, + "loss": 0.0165, + "step": 12500 + }, + { + "epoch": 7.01, + "grad_norm": 1.2583965063095093, + "learning_rate": 8.791959798994976e-06, + "loss": 0.0129, + "step": 12525 + }, + { + "epoch": 7.02, + "grad_norm": 0.7343959212303162, + "learning_rate": 8.789447236180905e-06, + "loss": 0.0095, + "step": 12550 + }, + { + "epoch": 7.03, + "grad_norm": 1.2305463552474976, + "learning_rate": 8.786934673366834e-06, + "loss": 0.009, + "step": 12575 + }, + { + "epoch": 7.05, + "grad_norm": 1.496688961982727, + "learning_rate": 8.784422110552765e-06, + "loss": 0.0089, + "step": 12600 + }, + { + "epoch": 7.06, + "grad_norm": 1.1468839645385742, + "learning_rate": 8.781909547738695e-06, + "loss": 0.0095, + "step": 12625 + }, + { + "epoch": 7.07, + "grad_norm": 1.3296170234680176, + "learning_rate": 8.779396984924624e-06, + "loss": 0.0095, + "step": 12650 + }, + { + "epoch": 7.09, + "grad_norm": 1.3252195119857788, + "learning_rate": 8.776984924623117e-06, + "loss": 0.0098, + "step": 12675 + }, + { + "epoch": 7.1, + "grad_norm": 1.8207086324691772, + "learning_rate": 8.774472361809045e-06, + "loss": 0.0104, + "step": 12700 + }, + { + "epoch": 7.12, + "grad_norm": 1.6231937408447266, + "learning_rate": 8.771959798994976e-06, + "loss": 0.0099, + "step": 12725 + }, + { + "epoch": 7.13, + "grad_norm": 1.935926914215088, + "learning_rate": 8.769447236180905e-06, + "loss": 0.0102, + "step": 12750 + }, + { + "epoch": 7.14, + "grad_norm": 1.725852608680725, + "learning_rate": 8.766934673366834e-06, + "loss": 0.009, + "step": 12775 + }, + { + "epoch": 7.16, + "grad_norm": 1.370095133781433, + "learning_rate": 8.764422110552765e-06, + "loss": 0.0091, + "step": 12800 + }, + { + "epoch": 7.17, + "grad_norm": 1.3415814638137817, + "learning_rate": 8.761909547738693e-06, + "loss": 0.0094, + "step": 12825 + }, + { + "epoch": 7.19, + "grad_norm": 1.2145850658416748, + "learning_rate": 8.759396984924624e-06, + "loss": 0.0094, + "step": 12850 + }, + { + "epoch": 7.2, + "grad_norm": 1.2444536685943604, + "learning_rate": 8.756884422110553e-06, + "loss": 0.0091, + "step": 12875 + }, + { + "epoch": 7.21, + "grad_norm": 1.6578384637832642, + "learning_rate": 8.754371859296483e-06, + "loss": 0.0102, + "step": 12900 + }, + { + "epoch": 7.23, + "grad_norm": 1.248545527458191, + "learning_rate": 8.751859296482412e-06, + "loss": 0.0101, + "step": 12925 + }, + { + "epoch": 7.24, + "grad_norm": 1.1102337837219238, + "learning_rate": 8.749346733668343e-06, + "loss": 0.0109, + "step": 12950 + }, + { + "epoch": 7.26, + "grad_norm": 1.2566320896148682, + "learning_rate": 8.746834170854272e-06, + "loss": 0.0108, + "step": 12975 + }, + { + "epoch": 7.27, + "grad_norm": 1.5954285860061646, + "learning_rate": 8.744321608040202e-06, + "loss": 0.0099, + "step": 13000 + }, + { + "epoch": 7.27, + "eval_loss": 0.14806413650512695, + "eval_runtime": 755.8705, + "eval_samples_per_second": 2.036, + "eval_steps_per_second": 2.036, + "eval_wer": 16.000467508181394, + "step": 13000 + }, + { + "epoch": 7.28, + "grad_norm": 1.7502775192260742, + "learning_rate": 8.741809045226131e-06, + "loss": 0.0098, + "step": 13025 + }, + { + "epoch": 7.3, + "grad_norm": 1.2737886905670166, + "learning_rate": 8.73929648241206e-06, + "loss": 0.0098, + "step": 13050 + }, + { + "epoch": 7.31, + "grad_norm": 1.617136001586914, + "learning_rate": 8.736783919597991e-06, + "loss": 0.01, + "step": 13075 + }, + { + "epoch": 7.33, + "grad_norm": 1.1968501806259155, + "learning_rate": 8.734271356783919e-06, + "loss": 0.0104, + "step": 13100 + }, + { + "epoch": 7.34, + "grad_norm": 1.1906235218048096, + "learning_rate": 8.73175879396985e-06, + "loss": 0.0096, + "step": 13125 + }, + { + "epoch": 7.35, + "grad_norm": 1.7133976221084595, + "learning_rate": 8.72924623115578e-06, + "loss": 0.0108, + "step": 13150 + }, + { + "epoch": 7.37, + "grad_norm": 1.7873108386993408, + "learning_rate": 8.726733668341709e-06, + "loss": 0.0103, + "step": 13175 + }, + { + "epoch": 7.38, + "grad_norm": 0.9776824712753296, + "learning_rate": 8.72422110552764e-06, + "loss": 0.0107, + "step": 13200 + }, + { + "epoch": 7.4, + "grad_norm": 1.3554304838180542, + "learning_rate": 8.721708542713569e-06, + "loss": 0.0098, + "step": 13225 + }, + { + "epoch": 7.41, + "grad_norm": 1.4258984327316284, + "learning_rate": 8.719195979899498e-06, + "loss": 0.0091, + "step": 13250 + }, + { + "epoch": 7.42, + "grad_norm": 1.4660829305648804, + "learning_rate": 8.716683417085428e-06, + "loss": 0.0099, + "step": 13275 + }, + { + "epoch": 7.44, + "grad_norm": 1.6164659261703491, + "learning_rate": 8.714170854271357e-06, + "loss": 0.0092, + "step": 13300 + }, + { + "epoch": 7.45, + "grad_norm": 1.3757909536361694, + "learning_rate": 8.711658291457286e-06, + "loss": 0.01, + "step": 13325 + }, + { + "epoch": 7.47, + "grad_norm": 1.5681990385055542, + "learning_rate": 8.709145728643217e-06, + "loss": 0.0098, + "step": 13350 + }, + { + "epoch": 7.48, + "grad_norm": 1.5184909105300903, + "learning_rate": 8.706633165829147e-06, + "loss": 0.0092, + "step": 13375 + }, + { + "epoch": 7.49, + "grad_norm": 1.7105239629745483, + "learning_rate": 8.704120603015076e-06, + "loss": 0.0107, + "step": 13400 + }, + { + "epoch": 7.51, + "grad_norm": 1.6654719114303589, + "learning_rate": 8.701608040201005e-06, + "loss": 0.0092, + "step": 13425 + }, + { + "epoch": 7.52, + "grad_norm": 1.535030484199524, + "learning_rate": 8.699095477386935e-06, + "loss": 0.0101, + "step": 13450 + }, + { + "epoch": 7.54, + "grad_norm": 1.8340117931365967, + "learning_rate": 8.696582914572866e-06, + "loss": 0.0112, + "step": 13475 + }, + { + "epoch": 7.55, + "grad_norm": 1.3008439540863037, + "learning_rate": 8.694070351758795e-06, + "loss": 0.0097, + "step": 13500 + }, + { + "epoch": 7.56, + "grad_norm": 1.5416207313537598, + "learning_rate": 8.691557788944724e-06, + "loss": 0.0099, + "step": 13525 + }, + { + "epoch": 7.58, + "grad_norm": 1.7615350484848022, + "learning_rate": 8.689045226130654e-06, + "loss": 0.0101, + "step": 13550 + }, + { + "epoch": 7.59, + "grad_norm": 1.1080527305603027, + "learning_rate": 8.686532663316583e-06, + "loss": 0.0108, + "step": 13575 + }, + { + "epoch": 7.61, + "grad_norm": 1.5510646104812622, + "learning_rate": 8.684020100502514e-06, + "loss": 0.0111, + "step": 13600 + }, + { + "epoch": 7.62, + "grad_norm": 1.7211899757385254, + "learning_rate": 8.681507537688443e-06, + "loss": 0.0114, + "step": 13625 + }, + { + "epoch": 7.63, + "grad_norm": 1.5297832489013672, + "learning_rate": 8.678994974874373e-06, + "loss": 0.0104, + "step": 13650 + }, + { + "epoch": 7.65, + "grad_norm": 2.170487880706787, + "learning_rate": 8.676482412060302e-06, + "loss": 0.0103, + "step": 13675 + }, + { + "epoch": 7.66, + "grad_norm": 1.54282546043396, + "learning_rate": 8.673969849246231e-06, + "loss": 0.0099, + "step": 13700 + }, + { + "epoch": 7.68, + "grad_norm": 1.6874700784683228, + "learning_rate": 8.67145728643216e-06, + "loss": 0.0118, + "step": 13725 + }, + { + "epoch": 7.69, + "grad_norm": 1.6438724994659424, + "learning_rate": 8.668944723618092e-06, + "loss": 0.0107, + "step": 13750 + }, + { + "epoch": 7.7, + "grad_norm": 1.3200067281723022, + "learning_rate": 8.666432160804021e-06, + "loss": 0.01, + "step": 13775 + }, + { + "epoch": 7.72, + "grad_norm": 1.8182300329208374, + "learning_rate": 8.66391959798995e-06, + "loss": 0.0103, + "step": 13800 + }, + { + "epoch": 7.73, + "grad_norm": 1.8602378368377686, + "learning_rate": 8.661407035175881e-06, + "loss": 0.011, + "step": 13825 + }, + { + "epoch": 7.75, + "grad_norm": 1.8888895511627197, + "learning_rate": 8.658894472361809e-06, + "loss": 0.0105, + "step": 13850 + }, + { + "epoch": 7.76, + "grad_norm": 1.7531503438949585, + "learning_rate": 8.65638190954774e-06, + "loss": 0.0113, + "step": 13875 + }, + { + "epoch": 7.77, + "grad_norm": 1.7014554738998413, + "learning_rate": 8.65386934673367e-06, + "loss": 0.01, + "step": 13900 + }, + { + "epoch": 7.79, + "grad_norm": 1.2867510318756104, + "learning_rate": 8.651356783919599e-06, + "loss": 0.01, + "step": 13925 + }, + { + "epoch": 7.8, + "grad_norm": 1.424118161201477, + "learning_rate": 8.648844221105528e-06, + "loss": 0.0112, + "step": 13950 + }, + { + "epoch": 7.82, + "grad_norm": 1.7731105089187622, + "learning_rate": 8.646331658291457e-06, + "loss": 0.0109, + "step": 13975 + }, + { + "epoch": 7.83, + "grad_norm": 1.8137600421905518, + "learning_rate": 8.643819095477388e-06, + "loss": 0.0121, + "step": 14000 + }, + { + "epoch": 7.83, + "eval_loss": 0.15425679087638855, + "eval_runtime": 757.6125, + "eval_samples_per_second": 2.031, + "eval_steps_per_second": 2.031, + "eval_wer": 16.076437587657786, + "step": 14000 + }, + { + "epoch": 7.84, + "grad_norm": 1.7519497871398926, + "learning_rate": 8.641306532663318e-06, + "loss": 0.0102, + "step": 14025 + }, + { + "epoch": 7.86, + "grad_norm": 1.5568550825119019, + "learning_rate": 8.638793969849247e-06, + "loss": 0.0099, + "step": 14050 + }, + { + "epoch": 7.87, + "grad_norm": 1.7601958513259888, + "learning_rate": 8.636281407035176e-06, + "loss": 0.0104, + "step": 14075 + }, + { + "epoch": 7.89, + "grad_norm": 1.3401893377304077, + "learning_rate": 8.633768844221107e-06, + "loss": 0.0108, + "step": 14100 + }, + { + "epoch": 7.9, + "grad_norm": 1.9004765748977661, + "learning_rate": 8.631256281407035e-06, + "loss": 0.01, + "step": 14125 + }, + { + "epoch": 7.91, + "grad_norm": 2.4924957752227783, + "learning_rate": 8.628743718592966e-06, + "loss": 0.0103, + "step": 14150 + }, + { + "epoch": 7.93, + "grad_norm": 1.7700423002243042, + "learning_rate": 8.626231155778895e-06, + "loss": 0.0109, + "step": 14175 + }, + { + "epoch": 7.94, + "grad_norm": 1.6704262495040894, + "learning_rate": 8.623718592964825e-06, + "loss": 0.0109, + "step": 14200 + }, + { + "epoch": 7.96, + "grad_norm": 1.6668369770050049, + "learning_rate": 8.621206030150756e-06, + "loss": 0.0103, + "step": 14225 + }, + { + "epoch": 7.97, + "grad_norm": 1.7921608686447144, + "learning_rate": 8.618693467336683e-06, + "loss": 0.0114, + "step": 14250 + }, + { + "epoch": 7.98, + "grad_norm": 1.7974798679351807, + "learning_rate": 8.616180904522614e-06, + "loss": 0.0109, + "step": 14275 + }, + { + "epoch": 8.0, + "grad_norm": 2.067434549331665, + "learning_rate": 8.613668341708544e-06, + "loss": 0.0111, + "step": 14300 + }, + { + "epoch": 8.01, + "grad_norm": 0.6613499522209167, + "learning_rate": 8.611155778894473e-06, + "loss": 0.0066, + "step": 14325 + }, + { + "epoch": 8.03, + "grad_norm": 1.353959560394287, + "learning_rate": 8.608643216080402e-06, + "loss": 0.0066, + "step": 14350 + }, + { + "epoch": 8.04, + "grad_norm": 0.46767738461494446, + "learning_rate": 8.606130653266333e-06, + "loss": 0.0064, + "step": 14375 + }, + { + "epoch": 8.05, + "grad_norm": 1.3457773923873901, + "learning_rate": 8.60361809045226e-06, + "loss": 0.0058, + "step": 14400 + }, + { + "epoch": 8.07, + "grad_norm": 1.2310553789138794, + "learning_rate": 8.601105527638192e-06, + "loss": 0.0066, + "step": 14425 + }, + { + "epoch": 8.08, + "grad_norm": 0.8491060137748718, + "learning_rate": 8.598592964824121e-06, + "loss": 0.0066, + "step": 14450 + }, + { + "epoch": 8.1, + "grad_norm": 1.0222899913787842, + "learning_rate": 8.59608040201005e-06, + "loss": 0.0056, + "step": 14475 + }, + { + "epoch": 8.11, + "grad_norm": 1.3025904893875122, + "learning_rate": 8.593567839195981e-06, + "loss": 0.0062, + "step": 14500 + }, + { + "epoch": 8.12, + "grad_norm": 2.2065799236297607, + "learning_rate": 8.591055276381909e-06, + "loss": 0.0064, + "step": 14525 + }, + { + "epoch": 8.14, + "grad_norm": 1.7162373065948486, + "learning_rate": 8.58854271356784e-06, + "loss": 0.0068, + "step": 14550 + }, + { + "epoch": 8.15, + "grad_norm": 1.5832626819610596, + "learning_rate": 8.58603015075377e-06, + "loss": 0.0072, + "step": 14575 + }, + { + "epoch": 8.17, + "grad_norm": 1.0218473672866821, + "learning_rate": 8.583517587939699e-06, + "loss": 0.0061, + "step": 14600 + }, + { + "epoch": 8.18, + "grad_norm": 1.1279516220092773, + "learning_rate": 8.58100502512563e-06, + "loss": 0.006, + "step": 14625 + }, + { + "epoch": 8.19, + "grad_norm": 1.4568241834640503, + "learning_rate": 8.578492462311559e-06, + "loss": 0.0066, + "step": 14650 + }, + { + "epoch": 8.21, + "grad_norm": 1.0202233791351318, + "learning_rate": 8.575979899497488e-06, + "loss": 0.0064, + "step": 14675 + }, + { + "epoch": 8.22, + "grad_norm": 1.116339087486267, + "learning_rate": 8.573467336683418e-06, + "loss": 0.0069, + "step": 14700 + }, + { + "epoch": 8.24, + "grad_norm": 0.904866635799408, + "learning_rate": 8.570954773869347e-06, + "loss": 0.006, + "step": 14725 + }, + { + "epoch": 8.25, + "grad_norm": 0.9386972784996033, + "learning_rate": 8.568442211055276e-06, + "loss": 0.0067, + "step": 14750 + }, + { + "epoch": 8.26, + "grad_norm": 0.9872827529907227, + "learning_rate": 8.565929648241207e-06, + "loss": 0.0063, + "step": 14775 + }, + { + "epoch": 8.28, + "grad_norm": 1.3515868186950684, + "learning_rate": 8.563417085427135e-06, + "loss": 0.0066, + "step": 14800 + }, + { + "epoch": 8.29, + "grad_norm": 0.8813887238502502, + "learning_rate": 8.560904522613066e-06, + "loss": 0.0062, + "step": 14825 + }, + { + "epoch": 8.31, + "grad_norm": 0.8552685379981995, + "learning_rate": 8.558391959798995e-06, + "loss": 0.007, + "step": 14850 + }, + { + "epoch": 8.32, + "grad_norm": 1.3299704790115356, + "learning_rate": 8.555879396984925e-06, + "loss": 0.0072, + "step": 14875 + }, + { + "epoch": 8.33, + "grad_norm": 1.8845906257629395, + "learning_rate": 8.553366834170856e-06, + "loss": 0.0071, + "step": 14900 + }, + { + "epoch": 8.35, + "grad_norm": 1.3729891777038574, + "learning_rate": 8.550854271356785e-06, + "loss": 0.0068, + "step": 14925 + }, + { + "epoch": 8.36, + "grad_norm": 1.3436650037765503, + "learning_rate": 8.548341708542714e-06, + "loss": 0.0067, + "step": 14950 + }, + { + "epoch": 8.38, + "grad_norm": 1.0894922018051147, + "learning_rate": 8.545829145728644e-06, + "loss": 0.0069, + "step": 14975 + }, + { + "epoch": 8.39, + "grad_norm": 1.3984309434890747, + "learning_rate": 8.543417085427135e-06, + "loss": 0.0073, + "step": 15000 + }, + { + "epoch": 8.39, + "eval_loss": 0.16124805808067322, + "eval_runtime": 752.5841, + "eval_samples_per_second": 2.045, + "eval_steps_per_second": 2.045, + "eval_wer": 15.684899485741, + "step": 15000 + }, + { + "epoch": 8.4, + "grad_norm": 1.5289641618728638, + "learning_rate": 8.540904522613066e-06, + "loss": 0.0063, + "step": 15025 + }, + { + "epoch": 8.42, + "grad_norm": 1.3611866235733032, + "learning_rate": 8.538391959798996e-06, + "loss": 0.0069, + "step": 15050 + }, + { + "epoch": 8.43, + "grad_norm": 1.1869916915893555, + "learning_rate": 8.535879396984925e-06, + "loss": 0.0067, + "step": 15075 + }, + { + "epoch": 8.45, + "grad_norm": 0.8257679343223572, + "learning_rate": 8.533366834170856e-06, + "loss": 0.0071, + "step": 15100 + }, + { + "epoch": 8.46, + "grad_norm": 1.7792719602584839, + "learning_rate": 8.530854271356784e-06, + "loss": 0.0072, + "step": 15125 + }, + { + "epoch": 8.47, + "grad_norm": 1.1164981126785278, + "learning_rate": 8.528341708542715e-06, + "loss": 0.0072, + "step": 15150 + }, + { + "epoch": 8.49, + "grad_norm": 1.6938334703445435, + "learning_rate": 8.525829145728644e-06, + "loss": 0.0072, + "step": 15175 + }, + { + "epoch": 8.5, + "grad_norm": 1.674504280090332, + "learning_rate": 8.523316582914573e-06, + "loss": 0.0071, + "step": 15200 + }, + { + "epoch": 8.52, + "grad_norm": 1.1777126789093018, + "learning_rate": 8.520804020100503e-06, + "loss": 0.0069, + "step": 15225 + }, + { + "epoch": 8.53, + "grad_norm": 1.9340919256210327, + "learning_rate": 8.518291457286434e-06, + "loss": 0.0075, + "step": 15250 + }, + { + "epoch": 8.54, + "grad_norm": 1.697860598564148, + "learning_rate": 8.515778894472363e-06, + "loss": 0.0073, + "step": 15275 + }, + { + "epoch": 8.56, + "grad_norm": 1.5397309064865112, + "learning_rate": 8.513266331658292e-06, + "loss": 0.0067, + "step": 15300 + }, + { + "epoch": 8.57, + "grad_norm": 1.5631085634231567, + "learning_rate": 8.510753768844222e-06, + "loss": 0.0065, + "step": 15325 + }, + { + "epoch": 8.59, + "grad_norm": 1.3397555351257324, + "learning_rate": 8.508241206030151e-06, + "loss": 0.0077, + "step": 15350 + }, + { + "epoch": 8.6, + "grad_norm": 1.899144172668457, + "learning_rate": 8.505728643216082e-06, + "loss": 0.0076, + "step": 15375 + }, + { + "epoch": 8.61, + "grad_norm": 1.3122050762176514, + "learning_rate": 8.50321608040201e-06, + "loss": 0.0066, + "step": 15400 + }, + { + "epoch": 8.63, + "grad_norm": 1.6981794834136963, + "learning_rate": 8.50070351758794e-06, + "loss": 0.0068, + "step": 15425 + }, + { + "epoch": 8.64, + "grad_norm": 1.5130527019500732, + "learning_rate": 8.49819095477387e-06, + "loss": 0.0066, + "step": 15450 + }, + { + "epoch": 8.65, + "grad_norm": 2.0017104148864746, + "learning_rate": 8.4956783919598e-06, + "loss": 0.0071, + "step": 15475 + }, + { + "epoch": 8.67, + "grad_norm": 1.2204324007034302, + "learning_rate": 8.49316582914573e-06, + "loss": 0.0071, + "step": 15500 + }, + { + "epoch": 8.68, + "grad_norm": 1.0175747871398926, + "learning_rate": 8.49065326633166e-06, + "loss": 0.0082, + "step": 15525 + }, + { + "epoch": 8.7, + "grad_norm": 1.4277863502502441, + "learning_rate": 8.488140703517589e-06, + "loss": 0.0068, + "step": 15550 + }, + { + "epoch": 8.71, + "grad_norm": 1.9543935060501099, + "learning_rate": 8.485628140703518e-06, + "loss": 0.0068, + "step": 15575 + }, + { + "epoch": 8.72, + "grad_norm": 1.6043189764022827, + "learning_rate": 8.483115577889447e-06, + "loss": 0.0067, + "step": 15600 + }, + { + "epoch": 8.74, + "grad_norm": 1.7654818296432495, + "learning_rate": 8.480603015075377e-06, + "loss": 0.0075, + "step": 15625 + }, + { + "epoch": 8.75, + "grad_norm": 0.9702335000038147, + "learning_rate": 8.478090452261308e-06, + "loss": 0.0075, + "step": 15650 + }, + { + "epoch": 8.77, + "grad_norm": 1.1004133224487305, + "learning_rate": 8.475577889447237e-06, + "loss": 0.0071, + "step": 15675 + }, + { + "epoch": 8.78, + "grad_norm": 1.510627031326294, + "learning_rate": 8.473065326633166e-06, + "loss": 0.0068, + "step": 15700 + }, + { + "epoch": 8.79, + "grad_norm": 1.5628174543380737, + "learning_rate": 8.470552763819096e-06, + "loss": 0.0074, + "step": 15725 + }, + { + "epoch": 8.81, + "grad_norm": 3.459683418273926, + "learning_rate": 8.468040201005025e-06, + "loss": 0.007, + "step": 15750 + }, + { + "epoch": 8.82, + "grad_norm": 1.0426031351089478, + "learning_rate": 8.465527638190956e-06, + "loss": 0.0068, + "step": 15775 + }, + { + "epoch": 8.84, + "grad_norm": 1.3776123523712158, + "learning_rate": 8.463015075376885e-06, + "loss": 0.0082, + "step": 15800 + }, + { + "epoch": 8.85, + "grad_norm": 1.4047850370407104, + "learning_rate": 8.460502512562815e-06, + "loss": 0.0079, + "step": 15825 + }, + { + "epoch": 8.86, + "grad_norm": 1.205950140953064, + "learning_rate": 8.457989949748744e-06, + "loss": 0.008, + "step": 15850 + }, + { + "epoch": 8.88, + "grad_norm": 1.2326229810714722, + "learning_rate": 8.455477386934673e-06, + "loss": 0.0078, + "step": 15875 + }, + { + "epoch": 8.89, + "grad_norm": 1.2276500463485718, + "learning_rate": 8.452964824120604e-06, + "loss": 0.0072, + "step": 15900 + }, + { + "epoch": 8.91, + "grad_norm": 0.6853958964347839, + "learning_rate": 8.450452261306534e-06, + "loss": 0.007, + "step": 15925 + }, + { + "epoch": 8.92, + "grad_norm": 1.4219454526901245, + "learning_rate": 8.447939698492463e-06, + "loss": 0.0076, + "step": 15950 + }, + { + "epoch": 8.93, + "grad_norm": 1.6224793195724487, + "learning_rate": 8.445427135678392e-06, + "loss": 0.008, + "step": 15975 + }, + { + "epoch": 8.95, + "grad_norm": 1.310726284980774, + "learning_rate": 8.442914572864322e-06, + "loss": 0.0069, + "step": 16000 + }, + { + "epoch": 8.95, + "eval_loss": 0.16760963201522827, + "eval_runtime": 752.417, + "eval_samples_per_second": 2.045, + "eval_steps_per_second": 2.045, + "eval_wer": 15.918653576437588, + "step": 16000 + }, + { + "epoch": 8.96, + "grad_norm": 1.6762831211090088, + "learning_rate": 8.440402010050251e-06, + "loss": 0.0068, + "step": 16025 + }, + { + "epoch": 8.98, + "grad_norm": 1.8669105768203735, + "learning_rate": 8.437889447236182e-06, + "loss": 0.008, + "step": 16050 + }, + { + "epoch": 8.99, + "grad_norm": 1.7251466512680054, + "learning_rate": 8.435376884422111e-06, + "loss": 0.0079, + "step": 16075 + }, + { + "epoch": 9.0, + "grad_norm": 0.9082014560699463, + "learning_rate": 8.43286432160804e-06, + "loss": 0.0065, + "step": 16100 + }, + { + "epoch": 9.02, + "grad_norm": 0.8214170932769775, + "learning_rate": 8.430351758793972e-06, + "loss": 0.0047, + "step": 16125 + }, + { + "epoch": 9.03, + "grad_norm": 1.570635199546814, + "learning_rate": 8.4278391959799e-06, + "loss": 0.0046, + "step": 16150 + }, + { + "epoch": 9.05, + "grad_norm": 1.426879644393921, + "learning_rate": 8.42532663316583e-06, + "loss": 0.0039, + "step": 16175 + }, + { + "epoch": 9.06, + "grad_norm": 1.728606104850769, + "learning_rate": 8.42281407035176e-06, + "loss": 0.0036, + "step": 16200 + }, + { + "epoch": 9.07, + "grad_norm": 1.310128092765808, + "learning_rate": 8.420301507537689e-06, + "loss": 0.0047, + "step": 16225 + }, + { + "epoch": 9.09, + "grad_norm": 0.9519062638282776, + "learning_rate": 8.417788944723618e-06, + "loss": 0.0047, + "step": 16250 + }, + { + "epoch": 9.1, + "grad_norm": 1.0541446208953857, + "learning_rate": 8.415276381909548e-06, + "loss": 0.0041, + "step": 16275 + }, + { + "epoch": 9.12, + "grad_norm": 1.8733175992965698, + "learning_rate": 8.412763819095479e-06, + "loss": 0.0047, + "step": 16300 + }, + { + "epoch": 9.13, + "grad_norm": 0.802599310874939, + "learning_rate": 8.410251256281408e-06, + "loss": 0.0046, + "step": 16325 + }, + { + "epoch": 9.14, + "grad_norm": 0.5900276303291321, + "learning_rate": 8.407738693467337e-06, + "loss": 0.0047, + "step": 16350 + }, + { + "epoch": 9.16, + "grad_norm": 1.1969093084335327, + "learning_rate": 8.405226130653267e-06, + "loss": 0.0053, + "step": 16375 + }, + { + "epoch": 9.17, + "grad_norm": 0.5119644403457642, + "learning_rate": 8.402713567839198e-06, + "loss": 0.0044, + "step": 16400 + }, + { + "epoch": 9.19, + "grad_norm": 1.2693040370941162, + "learning_rate": 8.400201005025125e-06, + "loss": 0.0045, + "step": 16425 + }, + { + "epoch": 9.2, + "grad_norm": 1.1644113063812256, + "learning_rate": 8.397688442211056e-06, + "loss": 0.0045, + "step": 16450 + }, + { + "epoch": 9.21, + "grad_norm": 1.377126693725586, + "learning_rate": 8.395175879396986e-06, + "loss": 0.0048, + "step": 16475 + }, + { + "epoch": 9.23, + "grad_norm": 1.25846529006958, + "learning_rate": 8.392663316582915e-06, + "loss": 0.0042, + "step": 16500 + }, + { + "epoch": 9.24, + "grad_norm": 1.5646183490753174, + "learning_rate": 8.390150753768846e-06, + "loss": 0.0049, + "step": 16525 + }, + { + "epoch": 9.26, + "grad_norm": 0.9275146722793579, + "learning_rate": 8.387638190954774e-06, + "loss": 0.0054, + "step": 16550 + }, + { + "epoch": 9.27, + "grad_norm": 1.0268568992614746, + "learning_rate": 8.385125628140705e-06, + "loss": 0.0046, + "step": 16575 + }, + { + "epoch": 9.28, + "grad_norm": 1.0819637775421143, + "learning_rate": 8.382613065326634e-06, + "loss": 0.0046, + "step": 16600 + }, + { + "epoch": 9.3, + "grad_norm": 1.4162029027938843, + "learning_rate": 8.380100502512563e-06, + "loss": 0.0053, + "step": 16625 + }, + { + "epoch": 9.31, + "grad_norm": 1.0918031930923462, + "learning_rate": 8.377587939698493e-06, + "loss": 0.0048, + "step": 16650 + }, + { + "epoch": 9.33, + "grad_norm": 0.8820499777793884, + "learning_rate": 8.375075376884424e-06, + "loss": 0.0047, + "step": 16675 + }, + { + "epoch": 9.34, + "grad_norm": 0.7173017859458923, + "learning_rate": 8.372562814070353e-06, + "loss": 0.0045, + "step": 16700 + }, + { + "epoch": 9.35, + "grad_norm": 0.6749385595321655, + "learning_rate": 8.370050251256282e-06, + "loss": 0.0045, + "step": 16725 + }, + { + "epoch": 9.37, + "grad_norm": 1.290108323097229, + "learning_rate": 8.367537688442212e-06, + "loss": 0.0047, + "step": 16750 + }, + { + "epoch": 9.38, + "grad_norm": 1.3779593706130981, + "learning_rate": 8.365025125628141e-06, + "loss": 0.0047, + "step": 16775 + }, + { + "epoch": 9.4, + "grad_norm": 1.2898823022842407, + "learning_rate": 8.362512562814072e-06, + "loss": 0.0054, + "step": 16800 + }, + { + "epoch": 9.41, + "grad_norm": 1.7998939752578735, + "learning_rate": 8.36e-06, + "loss": 0.0051, + "step": 16825 + }, + { + "epoch": 9.42, + "grad_norm": 1.1434041261672974, + "learning_rate": 8.35748743718593e-06, + "loss": 0.0049, + "step": 16850 + }, + { + "epoch": 9.44, + "grad_norm": 1.310171127319336, + "learning_rate": 8.35497487437186e-06, + "loss": 0.0045, + "step": 16875 + }, + { + "epoch": 9.45, + "grad_norm": 1.317851185798645, + "learning_rate": 8.35246231155779e-06, + "loss": 0.0058, + "step": 16900 + }, + { + "epoch": 9.47, + "grad_norm": 0.996315062046051, + "learning_rate": 8.34994974874372e-06, + "loss": 0.0048, + "step": 16925 + }, + { + "epoch": 9.48, + "grad_norm": 2.0353639125823975, + "learning_rate": 8.34743718592965e-06, + "loss": 0.0052, + "step": 16950 + }, + { + "epoch": 9.49, + "grad_norm": 1.5747225284576416, + "learning_rate": 8.344924623115579e-06, + "loss": 0.0051, + "step": 16975 + }, + { + "epoch": 9.51, + "grad_norm": 1.2317149639129639, + "learning_rate": 8.342412060301508e-06, + "loss": 0.0053, + "step": 17000 + }, + { + "epoch": 9.51, + "eval_loss": 0.1691037267446518, + "eval_runtime": 753.4376, + "eval_samples_per_second": 2.043, + "eval_steps_per_second": 2.043, + "eval_wer": 15.813464235624123, + "step": 17000 + }, + { + "epoch": 9.52, + "grad_norm": NaN, + "learning_rate": 8.34e-06, + "loss": 0.0053, + "step": 17025 + }, + { + "epoch": 9.54, + "grad_norm": 1.2521945238113403, + "learning_rate": 8.33748743718593e-06, + "loss": 0.005, + "step": 17050 + }, + { + "epoch": 9.55, + "grad_norm": 1.3215126991271973, + "learning_rate": 8.33497487437186e-06, + "loss": 0.006, + "step": 17075 + }, + { + "epoch": 9.56, + "grad_norm": 1.1341578960418701, + "learning_rate": 8.33246231155779e-06, + "loss": 0.0047, + "step": 17100 + }, + { + "epoch": 9.58, + "grad_norm": 1.1844580173492432, + "learning_rate": 8.32994974874372e-06, + "loss": 0.0054, + "step": 17125 + }, + { + "epoch": 9.59, + "grad_norm": 1.1164480447769165, + "learning_rate": 8.327437185929648e-06, + "loss": 0.0048, + "step": 17150 + }, + { + "epoch": 9.61, + "grad_norm": 1.2919723987579346, + "learning_rate": 8.324924623115579e-06, + "loss": 0.0053, + "step": 17175 + }, + { + "epoch": 9.62, + "grad_norm": 1.7231905460357666, + "learning_rate": 8.322412060301508e-06, + "loss": 0.005, + "step": 17200 + }, + { + "epoch": 9.63, + "grad_norm": 1.2363483905792236, + "learning_rate": 8.319899497487438e-06, + "loss": 0.0049, + "step": 17225 + }, + { + "epoch": 9.65, + "grad_norm": 1.4607367515563965, + "learning_rate": 8.317386934673367e-06, + "loss": 0.0051, + "step": 17250 + }, + { + "epoch": 9.66, + "grad_norm": 1.8882145881652832, + "learning_rate": 8.314874371859298e-06, + "loss": 0.0053, + "step": 17275 + }, + { + "epoch": 9.68, + "grad_norm": 1.3341093063354492, + "learning_rate": 8.312361809045226e-06, + "loss": 0.006, + "step": 17300 + }, + { + "epoch": 9.69, + "grad_norm": 1.3131000995635986, + "learning_rate": 8.309849246231157e-06, + "loss": 0.0051, + "step": 17325 + }, + { + "epoch": 9.7, + "grad_norm": 0.8955929279327393, + "learning_rate": 8.307336683417086e-06, + "loss": 0.0051, + "step": 17350 + }, + { + "epoch": 9.72, + "grad_norm": 1.0981415510177612, + "learning_rate": 8.304824120603015e-06, + "loss": 0.0054, + "step": 17375 + }, + { + "epoch": 9.73, + "grad_norm": 1.4736478328704834, + "learning_rate": 8.302311557788946e-06, + "loss": 0.0052, + "step": 17400 + }, + { + "epoch": 9.75, + "grad_norm": 1.2069016695022583, + "learning_rate": 8.299798994974874e-06, + "loss": 0.0051, + "step": 17425 + }, + { + "epoch": 9.76, + "grad_norm": 1.594504714012146, + "learning_rate": 8.297286432160805e-06, + "loss": 0.0057, + "step": 17450 + }, + { + "epoch": 9.77, + "grad_norm": 1.5609164237976074, + "learning_rate": 8.294773869346734e-06, + "loss": 0.0054, + "step": 17475 + }, + { + "epoch": 9.79, + "grad_norm": 1.4264171123504639, + "learning_rate": 8.292261306532664e-06, + "loss": 0.0056, + "step": 17500 + }, + { + "epoch": 9.8, + "grad_norm": 2.2810609340667725, + "learning_rate": 8.289748743718595e-06, + "loss": 0.0059, + "step": 17525 + }, + { + "epoch": 9.82, + "grad_norm": 1.5259751081466675, + "learning_rate": 8.287236180904524e-06, + "loss": 0.0049, + "step": 17550 + }, + { + "epoch": 9.83, + "grad_norm": 1.2636386156082153, + "learning_rate": 8.284723618090453e-06, + "loss": 0.0051, + "step": 17575 + }, + { + "epoch": 9.84, + "grad_norm": 1.611131191253662, + "learning_rate": 8.282211055276383e-06, + "loss": 0.005, + "step": 17600 + }, + { + "epoch": 9.86, + "grad_norm": 1.3393259048461914, + "learning_rate": 8.279698492462312e-06, + "loss": 0.0052, + "step": 17625 + }, + { + "epoch": 9.87, + "grad_norm": 1.4543604850769043, + "learning_rate": 8.277185929648241e-06, + "loss": 0.0049, + "step": 17650 + }, + { + "epoch": 9.89, + "grad_norm": 2.471161365509033, + "learning_rate": 8.274673366834172e-06, + "loss": 0.0059, + "step": 17675 + }, + { + "epoch": 9.9, + "grad_norm": 1.480251431465149, + "learning_rate": 8.2721608040201e-06, + "loss": 0.0053, + "step": 17700 + }, + { + "epoch": 9.91, + "grad_norm": 0.9905931949615479, + "learning_rate": 8.269648241206031e-06, + "loss": 0.0059, + "step": 17725 + }, + { + "epoch": 9.93, + "grad_norm": 1.4796502590179443, + "learning_rate": 8.26713567839196e-06, + "loss": 0.0057, + "step": 17750 + }, + { + "epoch": 9.94, + "grad_norm": 1.2823208570480347, + "learning_rate": 8.26462311557789e-06, + "loss": 0.0054, + "step": 17775 + }, + { + "epoch": 9.96, + "grad_norm": 0.6789861917495728, + "learning_rate": 8.26211055276382e-06, + "loss": 0.0049, + "step": 17800 + }, + { + "epoch": 9.97, + "grad_norm": 0.8929684162139893, + "learning_rate": 8.25959798994975e-06, + "loss": 0.0053, + "step": 17825 + }, + { + "epoch": 9.98, + "grad_norm": 1.9768999814987183, + "learning_rate": 8.25708542713568e-06, + "loss": 0.0051, + "step": 17850 + }, + { + "epoch": 10.0, + "grad_norm": 1.8341892957687378, + "learning_rate": 8.254572864321609e-06, + "loss": 0.0053, + "step": 17875 + }, + { + "epoch": 10.01, + "grad_norm": 0.8522334694862366, + "learning_rate": 8.252060301507538e-06, + "loss": 0.0038, + "step": 17900 + }, + { + "epoch": 10.03, + "grad_norm": 1.197489857673645, + "learning_rate": 8.249547738693467e-06, + "loss": 0.0033, + "step": 17925 + }, + { + "epoch": 10.04, + "grad_norm": 0.49319350719451904, + "learning_rate": 8.247035175879398e-06, + "loss": 0.0037, + "step": 17950 + }, + { + "epoch": 10.05, + "grad_norm": 1.235946536064148, + "learning_rate": 8.244522613065328e-06, + "loss": 0.0028, + "step": 17975 + }, + { + "epoch": 10.07, + "grad_norm": 1.3508857488632202, + "learning_rate": 8.242010050251257e-06, + "loss": 0.0032, + "step": 18000 + }, + { + "epoch": 10.07, + "eval_loss": 0.17616941034793854, + "eval_runtime": 757.3797, + "eval_samples_per_second": 2.032, + "eval_steps_per_second": 2.032, + "eval_wer": 15.69658719027583, + "step": 18000 + }, + { + "epoch": 10.08, + "grad_norm": 0.9012784957885742, + "learning_rate": 8.239597989949748e-06, + "loss": 0.0032, + "step": 18025 + }, + { + "epoch": 10.1, + "grad_norm": 1.2391079664230347, + "learning_rate": 8.23708542713568e-06, + "loss": 0.0029, + "step": 18050 + }, + { + "epoch": 10.11, + "grad_norm": 2.0320775508880615, + "learning_rate": 8.234572864321609e-06, + "loss": 0.0033, + "step": 18075 + }, + { + "epoch": 10.12, + "grad_norm": 1.7037626504898071, + "learning_rate": 8.232060301507538e-06, + "loss": 0.0036, + "step": 18100 + }, + { + "epoch": 10.14, + "grad_norm": 0.781554102897644, + "learning_rate": 8.229547738693467e-06, + "loss": 0.0038, + "step": 18125 + }, + { + "epoch": 10.15, + "grad_norm": 0.7765903472900391, + "learning_rate": 8.227035175879397e-06, + "loss": 0.0035, + "step": 18150 + }, + { + "epoch": 10.16, + "grad_norm": 0.6607799530029297, + "learning_rate": 8.224522613065328e-06, + "loss": 0.0033, + "step": 18175 + }, + { + "epoch": 10.18, + "grad_norm": 0.9506460428237915, + "learning_rate": 8.222010050251257e-06, + "loss": 0.0035, + "step": 18200 + }, + { + "epoch": 10.19, + "grad_norm": 0.9609546065330505, + "learning_rate": 8.219497487437186e-06, + "loss": 0.0028, + "step": 18225 + }, + { + "epoch": 10.21, + "grad_norm": 0.9613615274429321, + "learning_rate": 8.216984924623116e-06, + "loss": 0.0032, + "step": 18250 + }, + { + "epoch": 10.22, + "grad_norm": 0.5726645588874817, + "learning_rate": 8.214472361809047e-06, + "loss": 0.0032, + "step": 18275 + }, + { + "epoch": 10.23, + "grad_norm": 0.9365046620368958, + "learning_rate": 8.211959798994974e-06, + "loss": 0.0034, + "step": 18300 + }, + { + "epoch": 10.25, + "grad_norm": 0.7060268521308899, + "learning_rate": 8.209447236180905e-06, + "loss": 0.0034, + "step": 18325 + }, + { + "epoch": 10.26, + "grad_norm": 1.037100911140442, + "learning_rate": 8.206934673366835e-06, + "loss": 0.0031, + "step": 18350 + }, + { + "epoch": 10.28, + "grad_norm": 1.105318307876587, + "learning_rate": 8.204422110552764e-06, + "loss": 0.0036, + "step": 18375 + }, + { + "epoch": 10.29, + "grad_norm": 1.0618062019348145, + "learning_rate": 8.201909547738695e-06, + "loss": 0.0034, + "step": 18400 + }, + { + "epoch": 10.3, + "grad_norm": 1.367396354675293, + "learning_rate": 8.199396984924623e-06, + "loss": 0.0038, + "step": 18425 + }, + { + "epoch": 10.32, + "grad_norm": 1.4584746360778809, + "learning_rate": 8.196884422110554e-06, + "loss": 0.0042, + "step": 18450 + }, + { + "epoch": 10.33, + "grad_norm": 1.782736897468567, + "learning_rate": 8.194371859296483e-06, + "loss": 0.0045, + "step": 18475 + }, + { + "epoch": 10.35, + "grad_norm": 0.8661909699440002, + "learning_rate": 8.191859296482412e-06, + "loss": 0.0038, + "step": 18500 + }, + { + "epoch": 10.36, + "grad_norm": 1.476789951324463, + "learning_rate": 8.189346733668342e-06, + "loss": 0.0035, + "step": 18525 + }, + { + "epoch": 10.37, + "grad_norm": 1.7728875875473022, + "learning_rate": 8.186834170854273e-06, + "loss": 0.004, + "step": 18550 + }, + { + "epoch": 10.39, + "grad_norm": 1.956113576889038, + "learning_rate": 8.184321608040202e-06, + "loss": 0.0039, + "step": 18575 + }, + { + "epoch": 10.4, + "grad_norm": 1.2195167541503906, + "learning_rate": 8.181809045226131e-06, + "loss": 0.0042, + "step": 18600 + }, + { + "epoch": 10.42, + "grad_norm": 1.838324785232544, + "learning_rate": 8.17929648241206e-06, + "loss": 0.0045, + "step": 18625 + }, + { + "epoch": 10.43, + "grad_norm": 1.2510162591934204, + "learning_rate": 8.17678391959799e-06, + "loss": 0.0042, + "step": 18650 + }, + { + "epoch": 10.44, + "grad_norm": 1.7234545946121216, + "learning_rate": 8.174271356783921e-06, + "loss": 0.0044, + "step": 18675 + }, + { + "epoch": 10.46, + "grad_norm": 1.4265068769454956, + "learning_rate": 8.171758793969849e-06, + "loss": 0.004, + "step": 18700 + }, + { + "epoch": 10.47, + "grad_norm": 1.8112068176269531, + "learning_rate": 8.16924623115578e-06, + "loss": 0.004, + "step": 18725 + }, + { + "epoch": 10.49, + "grad_norm": 1.4219517707824707, + "learning_rate": 8.166733668341709e-06, + "loss": 0.0039, + "step": 18750 + }, + { + "epoch": 10.5, + "grad_norm": 0.9819099307060242, + "learning_rate": 8.164221105527638e-06, + "loss": 0.0036, + "step": 18775 + }, + { + "epoch": 10.51, + "grad_norm": 1.0875754356384277, + "learning_rate": 8.16170854271357e-06, + "loss": 0.0036, + "step": 18800 + }, + { + "epoch": 10.53, + "grad_norm": 1.3577704429626465, + "learning_rate": 8.159195979899499e-06, + "loss": 0.0039, + "step": 18825 + }, + { + "epoch": 10.54, + "grad_norm": 0.8017152547836304, + "learning_rate": 8.156683417085428e-06, + "loss": 0.0038, + "step": 18850 + }, + { + "epoch": 10.56, + "grad_norm": 1.4583171606063843, + "learning_rate": 8.154170854271357e-06, + "loss": 0.0041, + "step": 18875 + }, + { + "epoch": 10.57, + "grad_norm": 1.4608862400054932, + "learning_rate": 8.151658291457287e-06, + "loss": 0.0037, + "step": 18900 + }, + { + "epoch": 10.58, + "grad_norm": 1.1530755758285522, + "learning_rate": 8.149145728643216e-06, + "loss": 0.0039, + "step": 18925 + }, + { + "epoch": 10.6, + "grad_norm": 1.4272164106369019, + "learning_rate": 8.146633165829147e-06, + "loss": 0.0038, + "step": 18950 + }, + { + "epoch": 10.61, + "grad_norm": 0.9212055802345276, + "learning_rate": 8.144120603015076e-06, + "loss": 0.0041, + "step": 18975 + }, + { + "epoch": 10.63, + "grad_norm": 1.2338883876800537, + "learning_rate": 8.141608040201006e-06, + "loss": 0.004, + "step": 19000 + }, + { + "epoch": 10.63, + "eval_loss": 0.18160806596279144, + "eval_runtime": 760.8706, + "eval_samples_per_second": 2.023, + "eval_steps_per_second": 2.023, + "eval_wer": 16.129032258064516, + "step": 19000 + }, + { + "epoch": 10.64, + "grad_norm": 0.6575191617012024, + "learning_rate": 8.139095477386935e-06, + "loss": 0.0041, + "step": 19025 + }, + { + "epoch": 10.65, + "grad_norm": 2.2444114685058594, + "learning_rate": 8.136582914572864e-06, + "loss": 0.0036, + "step": 19050 + }, + { + "epoch": 10.67, + "grad_norm": 1.5388089418411255, + "learning_rate": 8.134070351758795e-06, + "loss": 0.0048, + "step": 19075 + }, + { + "epoch": 10.68, + "grad_norm": 0.8047139048576355, + "learning_rate": 8.131557788944725e-06, + "loss": 0.0039, + "step": 19100 + }, + { + "epoch": 10.7, + "grad_norm": 1.5189204216003418, + "learning_rate": 8.129045226130654e-06, + "loss": 0.0044, + "step": 19125 + }, + { + "epoch": 10.71, + "grad_norm": 1.4302115440368652, + "learning_rate": 8.126532663316583e-06, + "loss": 0.004, + "step": 19150 + }, + { + "epoch": 10.72, + "grad_norm": 1.595556378364563, + "learning_rate": 8.124020100502513e-06, + "loss": 0.0041, + "step": 19175 + }, + { + "epoch": 10.74, + "grad_norm": 1.2348064184188843, + "learning_rate": 8.121507537688444e-06, + "loss": 0.0044, + "step": 19200 + }, + { + "epoch": 10.75, + "grad_norm": 1.1426401138305664, + "learning_rate": 8.118994974874373e-06, + "loss": 0.0038, + "step": 19225 + }, + { + "epoch": 10.77, + "grad_norm": 1.3892803192138672, + "learning_rate": 8.116482412060302e-06, + "loss": 0.005, + "step": 19250 + }, + { + "epoch": 10.78, + "grad_norm": 1.3394361734390259, + "learning_rate": 8.113969849246232e-06, + "loss": 0.0039, + "step": 19275 + }, + { + "epoch": 10.79, + "grad_norm": 1.8651641607284546, + "learning_rate": 8.111457286432161e-06, + "loss": 0.0041, + "step": 19300 + }, + { + "epoch": 10.81, + "grad_norm": 1.5506017208099365, + "learning_rate": 8.10894472361809e-06, + "loss": 0.0035, + "step": 19325 + }, + { + "epoch": 10.82, + "grad_norm": 1.1086325645446777, + "learning_rate": 8.106432160804021e-06, + "loss": 0.0037, + "step": 19350 + }, + { + "epoch": 10.84, + "grad_norm": 0.7974542379379272, + "learning_rate": 8.10391959798995e-06, + "loss": 0.0045, + "step": 19375 + }, + { + "epoch": 10.85, + "grad_norm": 1.5743807554244995, + "learning_rate": 8.10140703517588e-06, + "loss": 0.0044, + "step": 19400 + }, + { + "epoch": 10.86, + "grad_norm": 1.1005268096923828, + "learning_rate": 8.098894472361811e-06, + "loss": 0.0041, + "step": 19425 + }, + { + "epoch": 10.88, + "grad_norm": 1.0295090675354004, + "learning_rate": 8.096381909547739e-06, + "loss": 0.0046, + "step": 19450 + }, + { + "epoch": 10.89, + "grad_norm": 1.0392544269561768, + "learning_rate": 8.09386934673367e-06, + "loss": 0.0035, + "step": 19475 + }, + { + "epoch": 10.91, + "grad_norm": 1.4186832904815674, + "learning_rate": 8.091356783919599e-06, + "loss": 0.0045, + "step": 19500 + }, + { + "epoch": 10.92, + "grad_norm": 0.508133053779602, + "learning_rate": 8.088844221105528e-06, + "loss": 0.0041, + "step": 19525 + }, + { + "epoch": 10.93, + "grad_norm": 0.8010510802268982, + "learning_rate": 8.086331658291458e-06, + "loss": 0.0038, + "step": 19550 + }, + { + "epoch": 10.95, + "grad_norm": 1.2284573316574097, + "learning_rate": 8.083819095477387e-06, + "loss": 0.0038, + "step": 19575 + }, + { + "epoch": 10.96, + "grad_norm": 1.4716583490371704, + "learning_rate": 8.081306532663318e-06, + "loss": 0.0038, + "step": 19600 + }, + { + "epoch": 10.98, + "grad_norm": 1.9497301578521729, + "learning_rate": 8.078793969849247e-06, + "loss": 0.0041, + "step": 19625 + }, + { + "epoch": 10.99, + "grad_norm": 0.9995880722999573, + "learning_rate": 8.076281407035177e-06, + "loss": 0.0048, + "step": 19650 + }, + { + "epoch": 11.0, + "grad_norm": 0.4566727578639984, + "learning_rate": 8.073768844221106e-06, + "loss": 0.0031, + "step": 19675 + }, + { + "epoch": 11.02, + "grad_norm": 0.8414078950881958, + "learning_rate": 8.071256281407037e-06, + "loss": 0.0024, + "step": 19700 + }, + { + "epoch": 11.03, + "grad_norm": 0.4728333055973053, + "learning_rate": 8.068743718592964e-06, + "loss": 0.0024, + "step": 19725 + }, + { + "epoch": 11.05, + "grad_norm": 1.1845600605010986, + "learning_rate": 8.066231155778895e-06, + "loss": 0.0023, + "step": 19750 + }, + { + "epoch": 11.06, + "grad_norm": 0.6874567866325378, + "learning_rate": 8.063718592964825e-06, + "loss": 0.0028, + "step": 19775 + }, + { + "epoch": 11.07, + "grad_norm": 2.042022943496704, + "learning_rate": 8.061206030150754e-06, + "loss": 0.0025, + "step": 19800 + }, + { + "epoch": 11.09, + "grad_norm": 1.3198320865631104, + "learning_rate": 8.058693467336685e-06, + "loss": 0.0029, + "step": 19825 + }, + { + "epoch": 11.1, + "grad_norm": 0.7643749117851257, + "learning_rate": 8.056180904522613e-06, + "loss": 0.0026, + "step": 19850 + }, + { + "epoch": 11.12, + "grad_norm": 1.4038019180297852, + "learning_rate": 8.053668341708544e-06, + "loss": 0.0029, + "step": 19875 + }, + { + "epoch": 11.13, + "grad_norm": 0.5852758288383484, + "learning_rate": 8.051155778894473e-06, + "loss": 0.003, + "step": 19900 + }, + { + "epoch": 11.14, + "grad_norm": 1.279355764389038, + "learning_rate": 8.048643216080402e-06, + "loss": 0.003, + "step": 19925 + }, + { + "epoch": 11.16, + "grad_norm": 0.9047188758850098, + "learning_rate": 8.046130653266332e-06, + "loss": 0.0023, + "step": 19950 + }, + { + "epoch": 11.17, + "grad_norm": 0.82230144739151, + "learning_rate": 8.043618090452263e-06, + "loss": 0.0025, + "step": 19975 + }, + { + "epoch": 11.19, + "grad_norm": 1.7922263145446777, + "learning_rate": 8.04110552763819e-06, + "loss": 0.0026, + "step": 20000 + }, + { + "epoch": 11.19, + "eval_loss": 0.18887905776500702, + "eval_runtime": 762.6611, + "eval_samples_per_second": 2.018, + "eval_steps_per_second": 2.018, + "eval_wer": 15.877746610565685, + "step": 20000 + }, + { + "epoch": 11.2, + "grad_norm": 0.4617973566055298, + "learning_rate": 8.038592964824121e-06, + "loss": 0.0027, + "step": 20025 + }, + { + "epoch": 11.21, + "grad_norm": 0.9600264430046082, + "learning_rate": 8.03608040201005e-06, + "loss": 0.0029, + "step": 20050 + }, + { + "epoch": 11.23, + "grad_norm": 1.7673015594482422, + "learning_rate": 8.03356783919598e-06, + "loss": 0.0029, + "step": 20075 + }, + { + "epoch": 11.24, + "grad_norm": 1.1767991781234741, + "learning_rate": 8.031055276381911e-06, + "loss": 0.0025, + "step": 20100 + }, + { + "epoch": 11.26, + "grad_norm": 0.8826992511749268, + "learning_rate": 8.028542713567839e-06, + "loss": 0.0029, + "step": 20125 + }, + { + "epoch": 11.27, + "grad_norm": 1.2247871160507202, + "learning_rate": 8.02603015075377e-06, + "loss": 0.0024, + "step": 20150 + }, + { + "epoch": 11.28, + "grad_norm": 0.6439158916473389, + "learning_rate": 8.023517587939699e-06, + "loss": 0.0028, + "step": 20175 + }, + { + "epoch": 11.3, + "grad_norm": 0.5720233917236328, + "learning_rate": 8.021005025125628e-06, + "loss": 0.0026, + "step": 20200 + }, + { + "epoch": 11.31, + "grad_norm": 0.7048913836479187, + "learning_rate": 8.01849246231156e-06, + "loss": 0.0029, + "step": 20225 + }, + { + "epoch": 11.33, + "grad_norm": 1.0097029209136963, + "learning_rate": 8.015979899497489e-06, + "loss": 0.003, + "step": 20250 + }, + { + "epoch": 11.34, + "grad_norm": 1.3910776376724243, + "learning_rate": 8.013467336683418e-06, + "loss": 0.0036, + "step": 20275 + }, + { + "epoch": 11.35, + "grad_norm": 1.124796748161316, + "learning_rate": 8.010954773869347e-06, + "loss": 0.0027, + "step": 20300 + }, + { + "epoch": 11.37, + "grad_norm": 1.201909065246582, + "learning_rate": 8.008442211055277e-06, + "loss": 0.0027, + "step": 20325 + }, + { + "epoch": 11.38, + "grad_norm": 0.7784567475318909, + "learning_rate": 8.005929648241206e-06, + "loss": 0.0034, + "step": 20350 + }, + { + "epoch": 11.4, + "grad_norm": 1.3398573398590088, + "learning_rate": 8.003417085427137e-06, + "loss": 0.0033, + "step": 20375 + }, + { + "epoch": 11.41, + "grad_norm": 1.5645928382873535, + "learning_rate": 8.000904522613065e-06, + "loss": 0.0033, + "step": 20400 + }, + { + "epoch": 11.42, + "grad_norm": 0.9480635523796082, + "learning_rate": 7.998391959798996e-06, + "loss": 0.0031, + "step": 20425 + }, + { + "epoch": 11.44, + "grad_norm": 2.4662258625030518, + "learning_rate": 7.995879396984925e-06, + "loss": 0.0031, + "step": 20450 + }, + { + "epoch": 11.45, + "grad_norm": 1.466617465019226, + "learning_rate": 7.993366834170854e-06, + "loss": 0.0029, + "step": 20475 + }, + { + "epoch": 11.47, + "grad_norm": 0.9943632483482361, + "learning_rate": 7.990854271356785e-06, + "loss": 0.0037, + "step": 20500 + }, + { + "epoch": 11.48, + "grad_norm": 0.6884747743606567, + "learning_rate": 7.988341708542715e-06, + "loss": 0.0032, + "step": 20525 + }, + { + "epoch": 11.49, + "grad_norm": 1.386391520500183, + "learning_rate": 7.985829145728644e-06, + "loss": 0.0032, + "step": 20550 + }, + { + "epoch": 11.51, + "grad_norm": 1.4206699132919312, + "learning_rate": 7.983316582914573e-06, + "loss": 0.0035, + "step": 20575 + }, + { + "epoch": 11.52, + "grad_norm": 1.2999849319458008, + "learning_rate": 7.980804020100503e-06, + "loss": 0.0039, + "step": 20600 + }, + { + "epoch": 11.54, + "grad_norm": 1.329799771308899, + "learning_rate": 7.978291457286432e-06, + "loss": 0.0037, + "step": 20625 + }, + { + "epoch": 11.55, + "grad_norm": 0.8634512424468994, + "learning_rate": 7.975778894472363e-06, + "loss": 0.0037, + "step": 20650 + }, + { + "epoch": 11.56, + "grad_norm": 1.7093886137008667, + "learning_rate": 7.973266331658292e-06, + "loss": 0.0034, + "step": 20675 + }, + { + "epoch": 11.58, + "grad_norm": 1.2981669902801514, + "learning_rate": 7.970753768844222e-06, + "loss": 0.004, + "step": 20700 + }, + { + "epoch": 11.59, + "grad_norm": 1.8105932474136353, + "learning_rate": 7.968241206030151e-06, + "loss": 0.0032, + "step": 20725 + }, + { + "epoch": 11.61, + "grad_norm": 1.5272125005722046, + "learning_rate": 7.96572864321608e-06, + "loss": 0.0031, + "step": 20750 + }, + { + "epoch": 11.62, + "grad_norm": 1.0384894609451294, + "learning_rate": 7.963216080402011e-06, + "loss": 0.0035, + "step": 20775 + }, + { + "epoch": 11.63, + "grad_norm": 0.7314445972442627, + "learning_rate": 7.96070351758794e-06, + "loss": 0.0037, + "step": 20800 + }, + { + "epoch": 11.65, + "grad_norm": 1.4100817441940308, + "learning_rate": 7.95819095477387e-06, + "loss": 0.0035, + "step": 20825 + }, + { + "epoch": 11.66, + "grad_norm": 1.519355058670044, + "learning_rate": 7.955678391959801e-06, + "loss": 0.0038, + "step": 20850 + }, + { + "epoch": 11.68, + "grad_norm": 0.8480135202407837, + "learning_rate": 7.953165829145729e-06, + "loss": 0.0036, + "step": 20875 + }, + { + "epoch": 11.69, + "grad_norm": 1.200534701347351, + "learning_rate": 7.95065326633166e-06, + "loss": 0.0033, + "step": 20900 + }, + { + "epoch": 11.7, + "grad_norm": 1.4117076396942139, + "learning_rate": 7.948140703517589e-06, + "loss": 0.0035, + "step": 20925 + }, + { + "epoch": 11.72, + "grad_norm": 2.9445955753326416, + "learning_rate": 7.945628140703518e-06, + "loss": 0.0032, + "step": 20950 + }, + { + "epoch": 11.73, + "grad_norm": 1.186203956604004, + "learning_rate": 7.943115577889448e-06, + "loss": 0.0031, + "step": 20975 + }, + { + "epoch": 11.74, + "grad_norm": 1.4824252128601074, + "learning_rate": 7.940603015075377e-06, + "loss": 0.0034, + "step": 21000 + }, + { + "epoch": 11.74, + "eval_loss": 0.19144625961780548, + "eval_runtime": 766.4003, + "eval_samples_per_second": 2.008, + "eval_steps_per_second": 2.008, + "eval_wer": 15.90696587190276, + "step": 21000 + }, + { + "epoch": 11.76, + "grad_norm": 0.679831862449646, + "learning_rate": 7.938090452261306e-06, + "loss": 0.0032, + "step": 21025 + }, + { + "epoch": 11.77, + "grad_norm": 1.7366113662719727, + "learning_rate": 7.935577889447237e-06, + "loss": 0.0038, + "step": 21050 + }, + { + "epoch": 11.79, + "grad_norm": 0.8601617217063904, + "learning_rate": 7.933065326633167e-06, + "loss": 0.0039, + "step": 21075 + }, + { + "epoch": 11.8, + "grad_norm": 1.057368278503418, + "learning_rate": 7.930552763819096e-06, + "loss": 0.0034, + "step": 21100 + }, + { + "epoch": 11.81, + "grad_norm": 1.4458407163619995, + "learning_rate": 7.928040201005027e-06, + "loss": 0.0037, + "step": 21125 + }, + { + "epoch": 11.83, + "grad_norm": 1.1673171520233154, + "learning_rate": 7.925527638190955e-06, + "loss": 0.0043, + "step": 21150 + }, + { + "epoch": 11.84, + "grad_norm": 1.0042715072631836, + "learning_rate": 7.923015075376886e-06, + "loss": 0.0033, + "step": 21175 + }, + { + "epoch": 11.86, + "grad_norm": 1.2026900053024292, + "learning_rate": 7.920502512562815e-06, + "loss": 0.004, + "step": 21200 + }, + { + "epoch": 11.87, + "grad_norm": 1.232175350189209, + "learning_rate": 7.917989949748744e-06, + "loss": 0.0034, + "step": 21225 + }, + { + "epoch": 11.88, + "grad_norm": 0.9671189188957214, + "learning_rate": 7.915477386934674e-06, + "loss": 0.0035, + "step": 21250 + }, + { + "epoch": 11.9, + "grad_norm": 1.0250332355499268, + "learning_rate": 7.912964824120603e-06, + "loss": 0.0037, + "step": 21275 + }, + { + "epoch": 11.91, + "grad_norm": 0.5897387266159058, + "learning_rate": 7.910452261306534e-06, + "loss": 0.0034, + "step": 21300 + }, + { + "epoch": 11.93, + "grad_norm": 0.9510942697525024, + "learning_rate": 7.907939698492463e-06, + "loss": 0.003, + "step": 21325 + }, + { + "epoch": 11.94, + "grad_norm": 1.1963902711868286, + "learning_rate": 7.905427135678393e-06, + "loss": 0.0037, + "step": 21350 + }, + { + "epoch": 11.95, + "grad_norm": 0.742148220539093, + "learning_rate": 7.902914572864322e-06, + "loss": 0.0033, + "step": 21375 + }, + { + "epoch": 11.97, + "grad_norm": 1.2807279825210571, + "learning_rate": 7.900402010050253e-06, + "loss": 0.0039, + "step": 21400 + }, + { + "epoch": 11.98, + "grad_norm": 1.1065462827682495, + "learning_rate": 7.89788944723618e-06, + "loss": 0.0039, + "step": 21425 + }, + { + "epoch": 12.0, + "grad_norm": 1.3884105682373047, + "learning_rate": 7.895376884422111e-06, + "loss": 0.0038, + "step": 21450 + }, + { + "epoch": 12.01, + "grad_norm": 1.7353380918502808, + "learning_rate": 7.89286432160804e-06, + "loss": 0.003, + "step": 21475 + }, + { + "epoch": 12.02, + "grad_norm": 1.3812053203582764, + "learning_rate": 7.89035175879397e-06, + "loss": 0.0029, + "step": 21500 + }, + { + "epoch": 12.04, + "grad_norm": 0.6461287140846252, + "learning_rate": 7.887839195979901e-06, + "loss": 0.0025, + "step": 21525 + }, + { + "epoch": 12.05, + "grad_norm": 0.5335476994514465, + "learning_rate": 7.885326633165829e-06, + "loss": 0.0025, + "step": 21550 + }, + { + "epoch": 12.07, + "grad_norm": 1.2660001516342163, + "learning_rate": 7.88281407035176e-06, + "loss": 0.0022, + "step": 21575 + }, + { + "epoch": 12.08, + "grad_norm": 0.9450500011444092, + "learning_rate": 7.880301507537689e-06, + "loss": 0.0026, + "step": 21600 + }, + { + "epoch": 12.09, + "grad_norm": 2.120697259902954, + "learning_rate": 7.877788944723618e-06, + "loss": 0.0025, + "step": 21625 + }, + { + "epoch": 12.11, + "grad_norm": 1.1769088506698608, + "learning_rate": 7.875276381909548e-06, + "loss": 0.0025, + "step": 21650 + }, + { + "epoch": 12.12, + "grad_norm": 1.4108384847640991, + "learning_rate": 7.872763819095479e-06, + "loss": 0.0025, + "step": 21675 + }, + { + "epoch": 12.14, + "grad_norm": 1.0660864114761353, + "learning_rate": 7.870251256281408e-06, + "loss": 0.0026, + "step": 21700 + }, + { + "epoch": 12.15, + "grad_norm": 1.8561534881591797, + "learning_rate": 7.867738693467337e-06, + "loss": 0.0022, + "step": 21725 + }, + { + "epoch": 12.16, + "grad_norm": 1.188014030456543, + "learning_rate": 7.865226130653267e-06, + "loss": 0.0023, + "step": 21750 + }, + { + "epoch": 12.18, + "grad_norm": 0.5477595329284668, + "learning_rate": 7.862713567839196e-06, + "loss": 0.0025, + "step": 21775 + }, + { + "epoch": 12.19, + "grad_norm": 1.9913005828857422, + "learning_rate": 7.860201005025127e-06, + "loss": 0.0028, + "step": 21800 + }, + { + "epoch": 12.21, + "grad_norm": 0.5319697856903076, + "learning_rate": 7.857688442211055e-06, + "loss": 0.0023, + "step": 21825 + }, + { + "epoch": 12.22, + "grad_norm": 2.3672914505004883, + "learning_rate": 7.855175879396986e-06, + "loss": 0.0029, + "step": 21850 + }, + { + "epoch": 12.23, + "grad_norm": 0.4948159158229828, + "learning_rate": 7.852663316582915e-06, + "loss": 0.0026, + "step": 21875 + }, + { + "epoch": 12.25, + "grad_norm": 0.5486177206039429, + "learning_rate": 7.850150753768844e-06, + "loss": 0.003, + "step": 21900 + }, + { + "epoch": 12.26, + "grad_norm": 0.9666187167167664, + "learning_rate": 7.847638190954775e-06, + "loss": 0.0028, + "step": 21925 + }, + { + "epoch": 12.28, + "grad_norm": 0.44732800126075745, + "learning_rate": 7.845125628140705e-06, + "loss": 0.0025, + "step": 21950 + }, + { + "epoch": 12.29, + "grad_norm": 1.491809368133545, + "learning_rate": 7.842613065326634e-06, + "loss": 0.0024, + "step": 21975 + }, + { + "epoch": 12.3, + "grad_norm": 0.41159114241600037, + "learning_rate": 7.840100502512563e-06, + "loss": 0.0023, + "step": 22000 + }, + { + "epoch": 12.3, + "eval_loss": 0.19644691050052643, + "eval_runtime": 767.567, + "eval_samples_per_second": 2.005, + "eval_steps_per_second": 2.005, + "eval_wer": 15.684899485741, + "step": 22000 + }, + { + "epoch": 12.32, + "grad_norm": 0.8640540838241577, + "learning_rate": 7.837587939698493e-06, + "loss": 0.0024, + "step": 22025 + }, + { + "epoch": 12.33, + "grad_norm": 1.1752383708953857, + "learning_rate": 7.835075376884422e-06, + "loss": 0.0028, + "step": 22050 + }, + { + "epoch": 12.35, + "grad_norm": 0.6782914400100708, + "learning_rate": 7.832562814070353e-06, + "loss": 0.0025, + "step": 22075 + }, + { + "epoch": 12.36, + "grad_norm": 1.293980598449707, + "learning_rate": 7.830050251256282e-06, + "loss": 0.0025, + "step": 22100 + }, + { + "epoch": 12.37, + "grad_norm": 0.8322327136993408, + "learning_rate": 7.827537688442212e-06, + "loss": 0.0027, + "step": 22125 + }, + { + "epoch": 12.39, + "grad_norm": 1.26438570022583, + "learning_rate": 7.825025125628141e-06, + "loss": 0.0027, + "step": 22150 + }, + { + "epoch": 12.4, + "grad_norm": 1.3920725584030151, + "learning_rate": 7.82251256281407e-06, + "loss": 0.0028, + "step": 22175 + }, + { + "epoch": 12.42, + "grad_norm": 1.488665223121643, + "learning_rate": 7.820000000000001e-06, + "loss": 0.0023, + "step": 22200 + }, + { + "epoch": 12.43, + "grad_norm": 1.0058155059814453, + "learning_rate": 7.81748743718593e-06, + "loss": 0.0032, + "step": 22225 + }, + { + "epoch": 12.44, + "grad_norm": 1.9791488647460938, + "learning_rate": 7.81497487437186e-06, + "loss": 0.0029, + "step": 22250 + }, + { + "epoch": 12.46, + "grad_norm": 1.377987265586853, + "learning_rate": 7.81246231155779e-06, + "loss": 0.0026, + "step": 22275 + }, + { + "epoch": 12.47, + "grad_norm": 1.1476207971572876, + "learning_rate": 7.809949748743719e-06, + "loss": 0.0024, + "step": 22300 + }, + { + "epoch": 12.49, + "grad_norm": 0.864319384098053, + "learning_rate": 7.80743718592965e-06, + "loss": 0.0029, + "step": 22325 + }, + { + "epoch": 12.5, + "grad_norm": 1.1502805948257446, + "learning_rate": 7.804924623115579e-06, + "loss": 0.003, + "step": 22350 + }, + { + "epoch": 12.51, + "grad_norm": 0.8402910232543945, + "learning_rate": 7.802412060301508e-06, + "loss": 0.0029, + "step": 22375 + }, + { + "epoch": 12.53, + "grad_norm": 1.5440311431884766, + "learning_rate": 7.799899497487438e-06, + "loss": 0.0029, + "step": 22400 + }, + { + "epoch": 12.54, + "grad_norm": 2.6225740909576416, + "learning_rate": 7.797386934673367e-06, + "loss": 0.0025, + "step": 22425 + }, + { + "epoch": 12.56, + "grad_norm": 0.8309369087219238, + "learning_rate": 7.794874371859296e-06, + "loss": 0.0025, + "step": 22450 + }, + { + "epoch": 12.57, + "grad_norm": 1.15687096118927, + "learning_rate": 7.792361809045227e-06, + "loss": 0.0025, + "step": 22475 + }, + { + "epoch": 12.58, + "grad_norm": 1.6842396259307861, + "learning_rate": 7.789849246231157e-06, + "loss": 0.0026, + "step": 22500 + }, + { + "epoch": 12.6, + "grad_norm": 1.5239055156707764, + "learning_rate": 7.787336683417086e-06, + "loss": 0.0029, + "step": 22525 + }, + { + "epoch": 12.61, + "grad_norm": 1.2024272680282593, + "learning_rate": 7.784824120603017e-06, + "loss": 0.0026, + "step": 22550 + }, + { + "epoch": 12.63, + "grad_norm": 0.8227180242538452, + "learning_rate": 7.782311557788945e-06, + "loss": 0.0032, + "step": 22575 + }, + { + "epoch": 12.64, + "grad_norm": 1.6435647010803223, + "learning_rate": 7.779798994974876e-06, + "loss": 0.003, + "step": 22600 + }, + { + "epoch": 12.65, + "grad_norm": 0.9485065340995789, + "learning_rate": 7.777286432160805e-06, + "loss": 0.0029, + "step": 22625 + }, + { + "epoch": 12.67, + "grad_norm": 1.2093783617019653, + "learning_rate": 7.774773869346734e-06, + "loss": 0.0023, + "step": 22650 + }, + { + "epoch": 12.68, + "grad_norm": 1.4277679920196533, + "learning_rate": 7.772261306532664e-06, + "loss": 0.0028, + "step": 22675 + }, + { + "epoch": 12.7, + "grad_norm": 1.0801323652267456, + "learning_rate": 7.769748743718593e-06, + "loss": 0.0028, + "step": 22700 + }, + { + "epoch": 12.71, + "grad_norm": 1.4279265403747559, + "learning_rate": 7.767236180904522e-06, + "loss": 0.0029, + "step": 22725 + }, + { + "epoch": 12.72, + "grad_norm": 1.406038522720337, + "learning_rate": 7.764723618090453e-06, + "loss": 0.0026, + "step": 22750 + }, + { + "epoch": 12.74, + "grad_norm": 1.060463309288025, + "learning_rate": 7.762211055276383e-06, + "loss": 0.0025, + "step": 22775 + }, + { + "epoch": 12.75, + "grad_norm": 1.0090751647949219, + "learning_rate": 7.759698492462312e-06, + "loss": 0.0029, + "step": 22800 + }, + { + "epoch": 12.77, + "grad_norm": 0.9715700149536133, + "learning_rate": 7.757185929648243e-06, + "loss": 0.0027, + "step": 22825 + }, + { + "epoch": 12.78, + "grad_norm": 1.6400513648986816, + "learning_rate": 7.75467336683417e-06, + "loss": 0.0025, + "step": 22850 + }, + { + "epoch": 12.79, + "grad_norm": 1.386476993560791, + "learning_rate": 7.752160804020102e-06, + "loss": 0.003, + "step": 22875 + }, + { + "epoch": 12.81, + "grad_norm": 2.3613617420196533, + "learning_rate": 7.749648241206031e-06, + "loss": 0.0028, + "step": 22900 + }, + { + "epoch": 12.82, + "grad_norm": 1.3630965948104858, + "learning_rate": 7.74713567839196e-06, + "loss": 0.0031, + "step": 22925 + }, + { + "epoch": 12.84, + "grad_norm": 0.7133073210716248, + "learning_rate": 7.744623115577891e-06, + "loss": 0.0031, + "step": 22950 + }, + { + "epoch": 12.85, + "grad_norm": 2.3335700035095215, + "learning_rate": 7.742110552763819e-06, + "loss": 0.0029, + "step": 22975 + }, + { + "epoch": 12.86, + "grad_norm": 1.5158627033233643, + "learning_rate": 7.73959798994975e-06, + "loss": 0.0029, + "step": 23000 + }, + { + "epoch": 12.86, + "eval_loss": 0.19873502850532532, + "eval_runtime": 763.8399, + "eval_samples_per_second": 2.015, + "eval_steps_per_second": 2.015, + "eval_wer": 15.62061711079944, + "step": 23000 + }, + { + "epoch": 12.88, + "grad_norm": 1.3536944389343262, + "learning_rate": 7.73708542713568e-06, + "loss": 0.0029, + "step": 23025 + }, + { + "epoch": 12.89, + "grad_norm": 0.2909288704395294, + "learning_rate": 7.734572864321609e-06, + "loss": 0.0028, + "step": 23050 + }, + { + "epoch": 12.91, + "grad_norm": 1.2498669624328613, + "learning_rate": 7.732060301507538e-06, + "loss": 0.0027, + "step": 23075 + }, + { + "epoch": 12.92, + "grad_norm": 1.0599061250686646, + "learning_rate": 7.729547738693469e-06, + "loss": 0.003, + "step": 23100 + }, + { + "epoch": 12.93, + "grad_norm": 1.4277160167694092, + "learning_rate": 7.727035175879396e-06, + "loss": 0.0032, + "step": 23125 + }, + { + "epoch": 12.95, + "grad_norm": 1.7774913311004639, + "learning_rate": 7.724522613065328e-06, + "loss": 0.0032, + "step": 23150 + }, + { + "epoch": 12.96, + "grad_norm": 1.3884702920913696, + "learning_rate": 7.722010050251257e-06, + "loss": 0.0032, + "step": 23175 + }, + { + "epoch": 12.98, + "grad_norm": 0.6391613483428955, + "learning_rate": 7.719497487437186e-06, + "loss": 0.0031, + "step": 23200 + }, + { + "epoch": 12.99, + "grad_norm": 1.357005000114441, + "learning_rate": 7.716984924623117e-06, + "loss": 0.0032, + "step": 23225 + }, + { + "epoch": 13.0, + "grad_norm": 0.8932874202728271, + "learning_rate": 7.714472361809045e-06, + "loss": 0.0028, + "step": 23250 + }, + { + "epoch": 13.02, + "grad_norm": 0.6009535789489746, + "learning_rate": 7.711959798994976e-06, + "loss": 0.002, + "step": 23275 + }, + { + "epoch": 13.03, + "grad_norm": 0.504006028175354, + "learning_rate": 7.709447236180905e-06, + "loss": 0.0017, + "step": 23300 + }, + { + "epoch": 13.05, + "grad_norm": 0.4201720654964447, + "learning_rate": 7.706934673366834e-06, + "loss": 0.0016, + "step": 23325 + }, + { + "epoch": 13.06, + "grad_norm": 1.1929343938827515, + "learning_rate": 7.704422110552764e-06, + "loss": 0.0022, + "step": 23350 + }, + { + "epoch": 13.07, + "grad_norm": 1.8136353492736816, + "learning_rate": 7.701909547738695e-06, + "loss": 0.0021, + "step": 23375 + }, + { + "epoch": 13.09, + "grad_norm": 1.0713380575180054, + "learning_rate": 7.699497487437186e-06, + "loss": 0.0021, + "step": 23400 + }, + { + "epoch": 13.1, + "grad_norm": 0.8888652324676514, + "learning_rate": 7.696984924623117e-06, + "loss": 0.002, + "step": 23425 + }, + { + "epoch": 13.12, + "grad_norm": 0.8778591156005859, + "learning_rate": 7.694472361809045e-06, + "loss": 0.0023, + "step": 23450 + }, + { + "epoch": 13.13, + "grad_norm": 1.04825758934021, + "learning_rate": 7.691959798994976e-06, + "loss": 0.0018, + "step": 23475 + }, + { + "epoch": 13.14, + "grad_norm": 1.4909553527832031, + "learning_rate": 7.689447236180905e-06, + "loss": 0.0021, + "step": 23500 + }, + { + "epoch": 13.16, + "grad_norm": 0.9640198349952698, + "learning_rate": 7.686934673366835e-06, + "loss": 0.0022, + "step": 23525 + }, + { + "epoch": 13.17, + "grad_norm": 0.8568798899650574, + "learning_rate": 7.684422110552766e-06, + "loss": 0.002, + "step": 23550 + }, + { + "epoch": 13.19, + "grad_norm": 2.0633556842803955, + "learning_rate": 7.681909547738693e-06, + "loss": 0.0024, + "step": 23575 + }, + { + "epoch": 13.2, + "grad_norm": 0.36461710929870605, + "learning_rate": 7.679396984924624e-06, + "loss": 0.0017, + "step": 23600 + }, + { + "epoch": 13.21, + "grad_norm": 1.1638803482055664, + "learning_rate": 7.676884422110554e-06, + "loss": 0.0018, + "step": 23625 + }, + { + "epoch": 13.23, + "grad_norm": 0.8867812156677246, + "learning_rate": 7.674371859296483e-06, + "loss": 0.002, + "step": 23650 + }, + { + "epoch": 13.24, + "grad_norm": 0.6543762683868408, + "learning_rate": 7.671859296482412e-06, + "loss": 0.0021, + "step": 23675 + }, + { + "epoch": 13.26, + "grad_norm": 1.5225844383239746, + "learning_rate": 7.669346733668343e-06, + "loss": 0.0025, + "step": 23700 + }, + { + "epoch": 13.27, + "grad_norm": 0.7849925756454468, + "learning_rate": 7.666834170854271e-06, + "loss": 0.0021, + "step": 23725 + }, + { + "epoch": 13.28, + "grad_norm": 2.36126708984375, + "learning_rate": 7.664321608040202e-06, + "loss": 0.0025, + "step": 23750 + }, + { + "epoch": 13.3, + "grad_norm": 0.6898000240325928, + "learning_rate": 7.661809045226131e-06, + "loss": 0.0021, + "step": 23775 + }, + { + "epoch": 13.31, + "grad_norm": 0.9730959534645081, + "learning_rate": 7.65929648241206e-06, + "loss": 0.002, + "step": 23800 + }, + { + "epoch": 13.32, + "grad_norm": 0.7122257351875305, + "learning_rate": 7.656783919597992e-06, + "loss": 0.0019, + "step": 23825 + }, + { + "epoch": 13.34, + "grad_norm": 1.2148780822753906, + "learning_rate": 7.65427135678392e-06, + "loss": 0.0022, + "step": 23850 + }, + { + "epoch": 13.35, + "grad_norm": 0.5776109099388123, + "learning_rate": 7.65175879396985e-06, + "loss": 0.0022, + "step": 23875 + }, + { + "epoch": 13.37, + "grad_norm": 1.0715388059616089, + "learning_rate": 7.64924623115578e-06, + "loss": 0.0021, + "step": 23900 + }, + { + "epoch": 13.38, + "grad_norm": 1.7551685571670532, + "learning_rate": 7.646733668341709e-06, + "loss": 0.0033, + "step": 23925 + }, + { + "epoch": 13.39, + "grad_norm": 1.1267277002334595, + "learning_rate": 7.644221105527638e-06, + "loss": 0.0025, + "step": 23950 + }, + { + "epoch": 13.41, + "grad_norm": 0.742885172367096, + "learning_rate": 7.64170854271357e-06, + "loss": 0.0025, + "step": 23975 + }, + { + "epoch": 13.42, + "grad_norm": 0.895202100276947, + "learning_rate": 7.639195979899499e-06, + "loss": 0.0025, + "step": 24000 + }, + { + "epoch": 13.42, + "eval_loss": 0.19512023031711578, + "eval_runtime": 761.9734, + "eval_samples_per_second": 2.02, + "eval_steps_per_second": 2.02, + "eval_wer": 15.521271622253389, + "step": 24000 + }, + { + "epoch": 13.44, + "grad_norm": 1.3447036743164062, + "learning_rate": 7.636683417085428e-06, + "loss": 0.0025, + "step": 24025 + }, + { + "epoch": 13.45, + "grad_norm": 1.3623594045639038, + "learning_rate": 7.634170854271357e-06, + "loss": 0.0024, + "step": 24050 + }, + { + "epoch": 13.46, + "grad_norm": 1.3918298482894897, + "learning_rate": 7.631658291457287e-06, + "loss": 0.0022, + "step": 24075 + }, + { + "epoch": 13.48, + "grad_norm": 0.9132897853851318, + "learning_rate": 7.629145728643217e-06, + "loss": 0.002, + "step": 24100 + }, + { + "epoch": 13.49, + "grad_norm": 0.7921774387359619, + "learning_rate": 7.626633165829146e-06, + "loss": 0.002, + "step": 24125 + }, + { + "epoch": 13.51, + "grad_norm": 0.5542047023773193, + "learning_rate": 7.624120603015076e-06, + "loss": 0.0021, + "step": 24150 + }, + { + "epoch": 13.52, + "grad_norm": 0.6862865090370178, + "learning_rate": 7.621608040201006e-06, + "loss": 0.0028, + "step": 24175 + }, + { + "epoch": 13.53, + "grad_norm": 0.4851762056350708, + "learning_rate": 7.619095477386935e-06, + "loss": 0.0023, + "step": 24200 + }, + { + "epoch": 13.55, + "grad_norm": 1.1880477666854858, + "learning_rate": 7.616582914572865e-06, + "loss": 0.0025, + "step": 24225 + }, + { + "epoch": 13.56, + "grad_norm": 1.0346899032592773, + "learning_rate": 7.614070351758794e-06, + "loss": 0.0022, + "step": 24250 + }, + { + "epoch": 13.58, + "grad_norm": 0.9813887476921082, + "learning_rate": 7.6115577889447245e-06, + "loss": 0.0022, + "step": 24275 + }, + { + "epoch": 13.59, + "grad_norm": 0.803973376750946, + "learning_rate": 7.609045226130654e-06, + "loss": 0.0021, + "step": 24300 + }, + { + "epoch": 13.6, + "grad_norm": 1.198880672454834, + "learning_rate": 7.606532663316584e-06, + "loss": 0.0024, + "step": 24325 + }, + { + "epoch": 13.62, + "grad_norm": 1.1560088396072388, + "learning_rate": 7.6040201005025125e-06, + "loss": 0.0026, + "step": 24350 + }, + { + "epoch": 13.63, + "grad_norm": 1.8568098545074463, + "learning_rate": 7.601507537688443e-06, + "loss": 0.0025, + "step": 24375 + }, + { + "epoch": 13.65, + "grad_norm": 1.3446215391159058, + "learning_rate": 7.598994974874373e-06, + "loss": 0.0022, + "step": 24400 + }, + { + "epoch": 13.66, + "grad_norm": 1.045198917388916, + "learning_rate": 7.596482412060302e-06, + "loss": 0.0024, + "step": 24425 + }, + { + "epoch": 13.67, + "grad_norm": 0.4915551543235779, + "learning_rate": 7.593969849246232e-06, + "loss": 0.0023, + "step": 24450 + }, + { + "epoch": 13.69, + "grad_norm": 1.8346890211105347, + "learning_rate": 7.591457286432161e-06, + "loss": 0.0018, + "step": 24475 + }, + { + "epoch": 13.7, + "grad_norm": 0.9836933016777039, + "learning_rate": 7.588944723618091e-06, + "loss": 0.0023, + "step": 24500 + }, + { + "epoch": 13.72, + "grad_norm": 1.3588805198669434, + "learning_rate": 7.58643216080402e-06, + "loss": 0.003, + "step": 24525 + }, + { + "epoch": 13.73, + "grad_norm": 0.7071232795715332, + "learning_rate": 7.5839195979899505e-06, + "loss": 0.0028, + "step": 24550 + }, + { + "epoch": 13.74, + "grad_norm": 1.4838030338287354, + "learning_rate": 7.58140703517588e-06, + "loss": 0.0026, + "step": 24575 + }, + { + "epoch": 13.76, + "grad_norm": 1.407073974609375, + "learning_rate": 7.57889447236181e-06, + "loss": 0.003, + "step": 24600 + }, + { + "epoch": 13.77, + "grad_norm": 1.452406883239746, + "learning_rate": 7.57638190954774e-06, + "loss": 0.0024, + "step": 24625 + }, + { + "epoch": 13.79, + "grad_norm": 0.6675143241882324, + "learning_rate": 7.573869346733669e-06, + "loss": 0.0026, + "step": 24650 + }, + { + "epoch": 13.8, + "grad_norm": 1.5939953327178955, + "learning_rate": 7.571356783919599e-06, + "loss": 0.0028, + "step": 24675 + }, + { + "epoch": 13.81, + "grad_norm": 0.7208271622657776, + "learning_rate": 7.568844221105528e-06, + "loss": 0.0023, + "step": 24700 + }, + { + "epoch": 13.83, + "grad_norm": 0.946986198425293, + "learning_rate": 7.566331658291458e-06, + "loss": 0.0022, + "step": 24725 + }, + { + "epoch": 13.84, + "grad_norm": 1.069899320602417, + "learning_rate": 7.563819095477387e-06, + "loss": 0.0026, + "step": 24750 + }, + { + "epoch": 13.86, + "grad_norm": 1.2208495140075684, + "learning_rate": 7.561306532663317e-06, + "loss": 0.003, + "step": 24775 + }, + { + "epoch": 13.87, + "grad_norm": 1.2793633937835693, + "learning_rate": 7.558793969849247e-06, + "loss": 0.0026, + "step": 24800 + }, + { + "epoch": 13.88, + "grad_norm": 0.9618927836418152, + "learning_rate": 7.556281407035176e-06, + "loss": 0.0026, + "step": 24825 + }, + { + "epoch": 13.9, + "grad_norm": 1.441151738166809, + "learning_rate": 7.5537688442211066e-06, + "loss": 0.0029, + "step": 24850 + }, + { + "epoch": 13.91, + "grad_norm": 1.0940569639205933, + "learning_rate": 7.551256281407036e-06, + "loss": 0.0025, + "step": 24875 + }, + { + "epoch": 13.93, + "grad_norm": 0.8523790240287781, + "learning_rate": 7.548743718592966e-06, + "loss": 0.0028, + "step": 24900 + }, + { + "epoch": 13.94, + "grad_norm": 1.3309508562088013, + "learning_rate": 7.5462311557788945e-06, + "loss": 0.0028, + "step": 24925 + }, + { + "epoch": 13.95, + "grad_norm": 1.9282065629959106, + "learning_rate": 7.543718592964825e-06, + "loss": 0.0029, + "step": 24950 + }, + { + "epoch": 13.97, + "grad_norm": 0.8029336929321289, + "learning_rate": 7.541206030150754e-06, + "loss": 0.0024, + "step": 24975 + }, + { + "epoch": 13.98, + "grad_norm": 1.2224254608154297, + "learning_rate": 7.538693467336684e-06, + "loss": 0.0023, + "step": 25000 + }, + { + "epoch": 13.98, + "eval_loss": 0.2003735452890396, + "eval_runtime": 761.319, + "eval_samples_per_second": 2.021, + "eval_steps_per_second": 2.021, + "eval_wer": 15.532959326788218, + "step": 25000 + }, + { + "epoch": 14.0, + "grad_norm": 0.6900804042816162, + "learning_rate": 7.536180904522614e-06, + "loss": 0.0027, + "step": 25025 + }, + { + "epoch": 14.01, + "grad_norm": 0.5148425102233887, + "learning_rate": 7.533668341708543e-06, + "loss": 0.0019, + "step": 25050 + }, + { + "epoch": 14.02, + "grad_norm": 0.2919558584690094, + "learning_rate": 7.531155778894473e-06, + "loss": 0.0016, + "step": 25075 + }, + { + "epoch": 14.04, + "grad_norm": 1.3689295053482056, + "learning_rate": 7.528643216080402e-06, + "loss": 0.0017, + "step": 25100 + }, + { + "epoch": 14.05, + "grad_norm": 1.2881200313568115, + "learning_rate": 7.5261306532663325e-06, + "loss": 0.0019, + "step": 25125 + }, + { + "epoch": 14.07, + "grad_norm": 0.20271843671798706, + "learning_rate": 7.523618090452262e-06, + "loss": 0.0014, + "step": 25150 + }, + { + "epoch": 14.08, + "grad_norm": 0.2870531678199768, + "learning_rate": 7.521105527638192e-06, + "loss": 0.0011, + "step": 25175 + }, + { + "epoch": 14.09, + "grad_norm": 1.0732131004333496, + "learning_rate": 7.5185929648241205e-06, + "loss": 0.0016, + "step": 25200 + }, + { + "epoch": 14.11, + "grad_norm": 1.2838255167007446, + "learning_rate": 7.516080402010051e-06, + "loss": 0.0017, + "step": 25225 + }, + { + "epoch": 14.12, + "grad_norm": 1.27001953125, + "learning_rate": 7.513567839195981e-06, + "loss": 0.0017, + "step": 25250 + }, + { + "epoch": 14.14, + "grad_norm": 1.3139007091522217, + "learning_rate": 7.51105527638191e-06, + "loss": 0.0018, + "step": 25275 + }, + { + "epoch": 14.15, + "grad_norm": 0.6414282321929932, + "learning_rate": 7.50854271356784e-06, + "loss": 0.0015, + "step": 25300 + }, + { + "epoch": 14.16, + "grad_norm": 0.430123507976532, + "learning_rate": 7.506030150753769e-06, + "loss": 0.0018, + "step": 25325 + }, + { + "epoch": 14.18, + "grad_norm": 1.086958885192871, + "learning_rate": 7.503517587939699e-06, + "loss": 0.0021, + "step": 25350 + }, + { + "epoch": 14.19, + "grad_norm": 2.5306973457336426, + "learning_rate": 7.501005025125628e-06, + "loss": 0.0018, + "step": 25375 + }, + { + "epoch": 14.21, + "grad_norm": 0.7079110741615295, + "learning_rate": 7.4984924623115585e-06, + "loss": 0.0019, + "step": 25400 + }, + { + "epoch": 14.22, + "grad_norm": 1.5514994859695435, + "learning_rate": 7.495979899497488e-06, + "loss": 0.0018, + "step": 25425 + }, + { + "epoch": 14.23, + "grad_norm": 1.2856292724609375, + "learning_rate": 7.493467336683418e-06, + "loss": 0.0019, + "step": 25450 + }, + { + "epoch": 14.25, + "grad_norm": 1.886988878250122, + "learning_rate": 7.490954773869348e-06, + "loss": 0.002, + "step": 25475 + }, + { + "epoch": 14.26, + "grad_norm": 1.005176067352295, + "learning_rate": 7.488442211055277e-06, + "loss": 0.0017, + "step": 25500 + }, + { + "epoch": 14.28, + "grad_norm": 1.279975175857544, + "learning_rate": 7.485929648241207e-06, + "loss": 0.002, + "step": 25525 + }, + { + "epoch": 14.29, + "grad_norm": 1.6667780876159668, + "learning_rate": 7.483417085427136e-06, + "loss": 0.0021, + "step": 25550 + }, + { + "epoch": 14.3, + "grad_norm": 0.5154997706413269, + "learning_rate": 7.480904522613066e-06, + "loss": 0.002, + "step": 25575 + }, + { + "epoch": 14.32, + "grad_norm": 1.015742301940918, + "learning_rate": 7.478391959798995e-06, + "loss": 0.0024, + "step": 25600 + }, + { + "epoch": 14.33, + "grad_norm": 1.1054123640060425, + "learning_rate": 7.475879396984925e-06, + "loss": 0.0019, + "step": 25625 + }, + { + "epoch": 14.35, + "grad_norm": 0.5859752297401428, + "learning_rate": 7.473366834170855e-06, + "loss": 0.0022, + "step": 25650 + }, + { + "epoch": 14.36, + "grad_norm": 0.8504732251167297, + "learning_rate": 7.470854271356784e-06, + "loss": 0.0022, + "step": 25675 + }, + { + "epoch": 14.37, + "grad_norm": 1.3065191507339478, + "learning_rate": 7.4683417085427146e-06, + "loss": 0.0019, + "step": 25700 + }, + { + "epoch": 14.39, + "grad_norm": 1.4367316961288452, + "learning_rate": 7.465829145728644e-06, + "loss": 0.0019, + "step": 25725 + }, + { + "epoch": 14.4, + "grad_norm": 1.0775121450424194, + "learning_rate": 7.463316582914574e-06, + "loss": 0.0019, + "step": 25750 + }, + { + "epoch": 14.42, + "grad_norm": 1.214430332183838, + "learning_rate": 7.4609045226130665e-06, + "loss": 0.0024, + "step": 25775 + }, + { + "epoch": 14.43, + "grad_norm": 0.5668609142303467, + "learning_rate": 7.458391959798995e-06, + "loss": 0.0018, + "step": 25800 + }, + { + "epoch": 14.44, + "grad_norm": 0.5086265206336975, + "learning_rate": 7.455879396984925e-06, + "loss": 0.002, + "step": 25825 + }, + { + "epoch": 14.46, + "grad_norm": 0.8033430576324463, + "learning_rate": 7.453366834170855e-06, + "loss": 0.0021, + "step": 25850 + }, + { + "epoch": 14.47, + "grad_norm": 1.045922875404358, + "learning_rate": 7.450854271356785e-06, + "loss": 0.0027, + "step": 25875 + }, + { + "epoch": 14.49, + "grad_norm": 1.7342844009399414, + "learning_rate": 7.448341708542715e-06, + "loss": 0.0029, + "step": 25900 + }, + { + "epoch": 14.5, + "grad_norm": 0.7541054487228394, + "learning_rate": 7.445829145728643e-06, + "loss": 0.0027, + "step": 25925 + }, + { + "epoch": 14.51, + "grad_norm": 1.385183334350586, + "learning_rate": 7.443316582914573e-06, + "loss": 0.0022, + "step": 25950 + }, + { + "epoch": 14.53, + "grad_norm": 1.3125897645950317, + "learning_rate": 7.440804020100503e-06, + "loss": 0.0021, + "step": 25975 + }, + { + "epoch": 14.54, + "grad_norm": 0.45414167642593384, + "learning_rate": 7.438291457286433e-06, + "loss": 0.0022, + "step": 26000 + }, + { + "epoch": 14.54, + "eval_loss": 0.20331861078739166, + "eval_runtime": 767.1387, + "eval_samples_per_second": 2.006, + "eval_steps_per_second": 2.006, + "eval_wer": 15.480364656381488, + "step": 26000 + }, + { + "epoch": 14.56, + "grad_norm": 0.878839910030365, + "learning_rate": 7.435778894472362e-06, + "loss": 0.0017, + "step": 26025 + }, + { + "epoch": 14.57, + "grad_norm": 1.0181266069412231, + "learning_rate": 7.433266331658292e-06, + "loss": 0.0022, + "step": 26050 + }, + { + "epoch": 14.58, + "grad_norm": 0.8261253833770752, + "learning_rate": 7.4307537688442226e-06, + "loss": 0.0019, + "step": 26075 + }, + { + "epoch": 14.6, + "grad_norm": 0.7502545714378357, + "learning_rate": 7.428241206030151e-06, + "loss": 0.0018, + "step": 26100 + }, + { + "epoch": 14.61, + "grad_norm": 0.8476191759109497, + "learning_rate": 7.425728643216081e-06, + "loss": 0.0021, + "step": 26125 + }, + { + "epoch": 14.63, + "grad_norm": 1.2427834272384644, + "learning_rate": 7.4232160804020105e-06, + "loss": 0.0024, + "step": 26150 + }, + { + "epoch": 14.64, + "grad_norm": 1.2672892808914185, + "learning_rate": 7.420703517587941e-06, + "loss": 0.0021, + "step": 26175 + }, + { + "epoch": 14.65, + "grad_norm": 1.6590334177017212, + "learning_rate": 7.418190954773869e-06, + "loss": 0.0023, + "step": 26200 + }, + { + "epoch": 14.67, + "grad_norm": 2.070087432861328, + "learning_rate": 7.415678391959799e-06, + "loss": 0.0019, + "step": 26225 + }, + { + "epoch": 14.68, + "grad_norm": 0.6491419076919556, + "learning_rate": 7.4131658291457295e-06, + "loss": 0.0024, + "step": 26250 + }, + { + "epoch": 14.7, + "grad_norm": 0.9597603678703308, + "learning_rate": 7.410653266331659e-06, + "loss": 0.002, + "step": 26275 + }, + { + "epoch": 14.71, + "grad_norm": 0.9065916538238525, + "learning_rate": 7.408140703517589e-06, + "loss": 0.0024, + "step": 26300 + }, + { + "epoch": 14.72, + "grad_norm": 1.879719853401184, + "learning_rate": 7.405628140703518e-06, + "loss": 0.0026, + "step": 26325 + }, + { + "epoch": 14.74, + "grad_norm": 1.367193341255188, + "learning_rate": 7.4031155778894485e-06, + "loss": 0.002, + "step": 26350 + }, + { + "epoch": 14.75, + "grad_norm": 1.0430370569229126, + "learning_rate": 7.400603015075377e-06, + "loss": 0.0021, + "step": 26375 + }, + { + "epoch": 14.77, + "grad_norm": 1.205898642539978, + "learning_rate": 7.398090452261307e-06, + "loss": 0.0025, + "step": 26400 + }, + { + "epoch": 14.78, + "grad_norm": 0.7472314834594727, + "learning_rate": 7.3955778894472365e-06, + "loss": 0.0023, + "step": 26425 + }, + { + "epoch": 14.79, + "grad_norm": 0.7651177048683167, + "learning_rate": 7.393065326633167e-06, + "loss": 0.0021, + "step": 26450 + }, + { + "epoch": 14.81, + "grad_norm": 0.4880220293998718, + "learning_rate": 7.390552763819097e-06, + "loss": 0.0018, + "step": 26475 + }, + { + "epoch": 14.82, + "grad_norm": 0.7474664449691772, + "learning_rate": 7.388040201005025e-06, + "loss": 0.0021, + "step": 26500 + }, + { + "epoch": 14.84, + "grad_norm": 0.7403668761253357, + "learning_rate": 7.3855276381909555e-06, + "loss": 0.0022, + "step": 26525 + }, + { + "epoch": 14.85, + "grad_norm": 1.0597673654556274, + "learning_rate": 7.383015075376885e-06, + "loss": 0.0022, + "step": 26550 + }, + { + "epoch": 14.86, + "grad_norm": 0.8271737098693848, + "learning_rate": 7.380502512562815e-06, + "loss": 0.0018, + "step": 26575 + }, + { + "epoch": 14.88, + "grad_norm": 0.7283473014831543, + "learning_rate": 7.377989949748744e-06, + "loss": 0.0031, + "step": 26600 + }, + { + "epoch": 14.89, + "grad_norm": 0.668359637260437, + "learning_rate": 7.3754773869346745e-06, + "loss": 0.0026, + "step": 26625 + }, + { + "epoch": 14.9, + "grad_norm": 0.9218766093254089, + "learning_rate": 7.372964824120603e-06, + "loss": 0.0023, + "step": 26650 + }, + { + "epoch": 14.92, + "grad_norm": 0.8353140950202942, + "learning_rate": 7.370452261306533e-06, + "loss": 0.0021, + "step": 26675 + }, + { + "epoch": 14.93, + "grad_norm": 1.1086146831512451, + "learning_rate": 7.367939698492463e-06, + "loss": 0.0021, + "step": 26700 + }, + { + "epoch": 14.95, + "grad_norm": 0.9221814274787903, + "learning_rate": 7.365427135678393e-06, + "loss": 0.002, + "step": 26725 + }, + { + "epoch": 14.96, + "grad_norm": 0.894939124584198, + "learning_rate": 7.362914572864323e-06, + "loss": 0.0019, + "step": 26750 + }, + { + "epoch": 14.97, + "grad_norm": 1.5379173755645752, + "learning_rate": 7.360402010050251e-06, + "loss": 0.0024, + "step": 26775 + }, + { + "epoch": 14.99, + "grad_norm": 0.7634355425834656, + "learning_rate": 7.357889447236181e-06, + "loss": 0.0021, + "step": 26800 + }, + { + "epoch": 15.0, + "grad_norm": 0.2922452390193939, + "learning_rate": 7.355376884422111e-06, + "loss": 0.002, + "step": 26825 + }, + { + "epoch": 15.02, + "grad_norm": 0.9698469638824463, + "learning_rate": 7.352864321608041e-06, + "loss": 0.0015, + "step": 26850 + }, + { + "epoch": 15.03, + "grad_norm": 1.3203896284103394, + "learning_rate": 7.350351758793971e-06, + "loss": 0.0017, + "step": 26875 + }, + { + "epoch": 15.04, + "grad_norm": 0.64471834897995, + "learning_rate": 7.3478391959799e-06, + "loss": 0.0015, + "step": 26900 + }, + { + "epoch": 15.06, + "grad_norm": 1.1590235233306885, + "learning_rate": 7.3453266331658306e-06, + "loss": 0.0015, + "step": 26925 + }, + { + "epoch": 15.07, + "grad_norm": 0.2123766988515854, + "learning_rate": 7.342814070351759e-06, + "loss": 0.0014, + "step": 26950 + }, + { + "epoch": 15.09, + "grad_norm": 0.985645592212677, + "learning_rate": 7.340301507537689e-06, + "loss": 0.0015, + "step": 26975 + }, + { + "epoch": 15.1, + "grad_norm": 0.8787118792533875, + "learning_rate": 7.3377889447236185e-06, + "loss": 0.0018, + "step": 27000 + }, + { + "epoch": 15.1, + "eval_loss": 0.20508131384849548, + "eval_runtime": 759.3924, + "eval_samples_per_second": 2.027, + "eval_steps_per_second": 2.027, + "eval_wer": 15.275829827021973, + "step": 27000 + }, + { + "epoch": 15.11, + "grad_norm": 0.6402799487113953, + "learning_rate": 7.335276381909549e-06, + "loss": 0.0013, + "step": 27025 + }, + { + "epoch": 15.13, + "grad_norm": 1.0611077547073364, + "learning_rate": 7.332763819095477e-06, + "loss": 0.0015, + "step": 27050 + }, + { + "epoch": 15.14, + "grad_norm": 1.188818335533142, + "learning_rate": 7.330251256281407e-06, + "loss": 0.0016, + "step": 27075 + }, + { + "epoch": 15.16, + "grad_norm": 1.4070847034454346, + "learning_rate": 7.3277386934673375e-06, + "loss": 0.0017, + "step": 27100 + }, + { + "epoch": 15.17, + "grad_norm": 1.4860950708389282, + "learning_rate": 7.325226130653267e-06, + "loss": 0.0016, + "step": 27125 + }, + { + "epoch": 15.18, + "grad_norm": 1.8810749053955078, + "learning_rate": 7.322713567839197e-06, + "loss": 0.0016, + "step": 27150 + }, + { + "epoch": 15.2, + "grad_norm": 1.0632048845291138, + "learning_rate": 7.320201005025126e-06, + "loss": 0.0018, + "step": 27175 + }, + { + "epoch": 15.21, + "grad_norm": 1.061009168624878, + "learning_rate": 7.3176884422110565e-06, + "loss": 0.0016, + "step": 27200 + }, + { + "epoch": 15.23, + "grad_norm": 0.6606448292732239, + "learning_rate": 7.315175879396985e-06, + "loss": 0.0015, + "step": 27225 + }, + { + "epoch": 15.24, + "grad_norm": 0.6828747987747192, + "learning_rate": 7.312663316582915e-06, + "loss": 0.0017, + "step": 27250 + }, + { + "epoch": 15.25, + "grad_norm": 0.6235216856002808, + "learning_rate": 7.3101507537688445e-06, + "loss": 0.0014, + "step": 27275 + }, + { + "epoch": 15.27, + "grad_norm": 0.8191012144088745, + "learning_rate": 7.307638190954775e-06, + "loss": 0.0016, + "step": 27300 + }, + { + "epoch": 15.28, + "grad_norm": 2.2713983058929443, + "learning_rate": 7.305125628140705e-06, + "loss": 0.0017, + "step": 27325 + }, + { + "epoch": 15.3, + "grad_norm": 1.2317100763320923, + "learning_rate": 7.302613065326633e-06, + "loss": 0.0015, + "step": 27350 + }, + { + "epoch": 15.31, + "grad_norm": 0.3510379195213318, + "learning_rate": 7.3001005025125635e-06, + "loss": 0.0016, + "step": 27375 + }, + { + "epoch": 15.32, + "grad_norm": 1.5196717977523804, + "learning_rate": 7.297587939698493e-06, + "loss": 0.0019, + "step": 27400 + }, + { + "epoch": 15.34, + "grad_norm": 1.236497163772583, + "learning_rate": 7.295075376884423e-06, + "loss": 0.0023, + "step": 27425 + }, + { + "epoch": 15.35, + "grad_norm": 1.225793480873108, + "learning_rate": 7.292562814070352e-06, + "loss": 0.0017, + "step": 27450 + }, + { + "epoch": 15.37, + "grad_norm": 1.1143921613693237, + "learning_rate": 7.2900502512562825e-06, + "loss": 0.0016, + "step": 27475 + }, + { + "epoch": 15.38, + "grad_norm": 0.5935631990432739, + "learning_rate": 7.287537688442211e-06, + "loss": 0.0018, + "step": 27500 + }, + { + "epoch": 15.39, + "grad_norm": 0.6081658601760864, + "learning_rate": 7.285025125628141e-06, + "loss": 0.0017, + "step": 27525 + }, + { + "epoch": 15.41, + "grad_norm": 0.9420928955078125, + "learning_rate": 7.282512562814071e-06, + "loss": 0.0017, + "step": 27550 + }, + { + "epoch": 15.42, + "grad_norm": 0.9445229172706604, + "learning_rate": 7.280000000000001e-06, + "loss": 0.0017, + "step": 27575 + }, + { + "epoch": 15.44, + "grad_norm": 0.4759940505027771, + "learning_rate": 7.277487437185931e-06, + "loss": 0.0014, + "step": 27600 + }, + { + "epoch": 15.45, + "grad_norm": 0.7664267420768738, + "learning_rate": 7.274974874371859e-06, + "loss": 0.0016, + "step": 27625 + }, + { + "epoch": 15.46, + "grad_norm": 0.8280103206634521, + "learning_rate": 7.272462311557789e-06, + "loss": 0.0025, + "step": 27650 + }, + { + "epoch": 15.48, + "grad_norm": 0.8986691832542419, + "learning_rate": 7.269949748743719e-06, + "loss": 0.0018, + "step": 27675 + }, + { + "epoch": 15.49, + "grad_norm": 0.8900121450424194, + "learning_rate": 7.267437185929649e-06, + "loss": 0.0019, + "step": 27700 + }, + { + "epoch": 15.51, + "grad_norm": 0.6778602600097656, + "learning_rate": 7.264924623115579e-06, + "loss": 0.0022, + "step": 27725 + }, + { + "epoch": 15.52, + "grad_norm": 0.910729706287384, + "learning_rate": 7.262412060301508e-06, + "loss": 0.0021, + "step": 27750 + }, + { + "epoch": 15.53, + "grad_norm": 1.2103006839752197, + "learning_rate": 7.259899497487439e-06, + "loss": 0.0022, + "step": 27775 + }, + { + "epoch": 15.55, + "grad_norm": 0.7615714073181152, + "learning_rate": 7.257386934673367e-06, + "loss": 0.0024, + "step": 27800 + }, + { + "epoch": 15.56, + "grad_norm": 1.6932957172393799, + "learning_rate": 7.254874371859297e-06, + "loss": 0.0019, + "step": 27825 + }, + { + "epoch": 15.58, + "grad_norm": 0.49849367141723633, + "learning_rate": 7.2523618090452265e-06, + "loss": 0.0019, + "step": 27850 + }, + { + "epoch": 15.59, + "grad_norm": 0.2759300172328949, + "learning_rate": 7.249849246231157e-06, + "loss": 0.0019, + "step": 27875 + }, + { + "epoch": 15.6, + "grad_norm": 1.9788423776626587, + "learning_rate": 7.247336683417085e-06, + "loss": 0.0016, + "step": 27900 + }, + { + "epoch": 15.62, + "grad_norm": 0.8968381285667419, + "learning_rate": 7.244824120603015e-06, + "loss": 0.0018, + "step": 27925 + }, + { + "epoch": 15.63, + "grad_norm": 1.2179943323135376, + "learning_rate": 7.2423115577889455e-06, + "loss": 0.0022, + "step": 27950 + }, + { + "epoch": 15.65, + "grad_norm": 0.34921443462371826, + "learning_rate": 7.239798994974875e-06, + "loss": 0.002, + "step": 27975 + }, + { + "epoch": 15.66, + "grad_norm": 1.5491278171539307, + "learning_rate": 7.237286432160805e-06, + "loss": 0.0021, + "step": 28000 + }, + { + "epoch": 15.66, + "eval_loss": 0.20625917613506317, + "eval_runtime": 759.3875, + "eval_samples_per_second": 2.027, + "eval_steps_per_second": 2.027, + "eval_wer": 15.310892940626461, + "step": 28000 + }, + { + "epoch": 15.67, + "grad_norm": 1.0297913551330566, + "learning_rate": 7.234773869346734e-06, + "loss": 0.0023, + "step": 28025 + }, + { + "epoch": 15.69, + "grad_norm": 0.3987344801425934, + "learning_rate": 7.2322613065326645e-06, + "loss": 0.0019, + "step": 28050 + }, + { + "epoch": 15.7, + "grad_norm": 0.8203557133674622, + "learning_rate": 7.229748743718593e-06, + "loss": 0.002, + "step": 28075 + }, + { + "epoch": 15.72, + "grad_norm": 0.9130855798721313, + "learning_rate": 7.227236180904523e-06, + "loss": 0.002, + "step": 28100 + }, + { + "epoch": 15.73, + "grad_norm": 0.48221319913864136, + "learning_rate": 7.2247236180904525e-06, + "loss": 0.0022, + "step": 28125 + }, + { + "epoch": 15.74, + "grad_norm": 1.0055809020996094, + "learning_rate": 7.222211055276383e-06, + "loss": 0.0019, + "step": 28150 + }, + { + "epoch": 15.76, + "grad_norm": 0.9684811234474182, + "learning_rate": 7.219698492462313e-06, + "loss": 0.0019, + "step": 28175 + }, + { + "epoch": 15.77, + "grad_norm": 2.0485405921936035, + "learning_rate": 7.217185929648241e-06, + "loss": 0.0016, + "step": 28200 + }, + { + "epoch": 15.79, + "grad_norm": 0.6756620407104492, + "learning_rate": 7.2146733668341715e-06, + "loss": 0.002, + "step": 28225 + }, + { + "epoch": 15.8, + "grad_norm": 1.8306759595870972, + "learning_rate": 7.212160804020101e-06, + "loss": 0.0019, + "step": 28250 + }, + { + "epoch": 15.81, + "grad_norm": 0.6337253451347351, + "learning_rate": 7.209648241206031e-06, + "loss": 0.002, + "step": 28275 + }, + { + "epoch": 15.83, + "grad_norm": 1.221612811088562, + "learning_rate": 7.20713567839196e-06, + "loss": 0.0021, + "step": 28300 + }, + { + "epoch": 15.84, + "grad_norm": 0.7835298180580139, + "learning_rate": 7.2046231155778905e-06, + "loss": 0.0017, + "step": 28325 + }, + { + "epoch": 15.86, + "grad_norm": 0.9692056179046631, + "learning_rate": 7.20211055276382e-06, + "loss": 0.0022, + "step": 28350 + }, + { + "epoch": 15.87, + "grad_norm": 1.0694783926010132, + "learning_rate": 7.199597989949749e-06, + "loss": 0.002, + "step": 28375 + }, + { + "epoch": 15.88, + "grad_norm": 0.6357870101928711, + "learning_rate": 7.197085427135679e-06, + "loss": 0.0022, + "step": 28400 + }, + { + "epoch": 15.9, + "grad_norm": 0.9514111280441284, + "learning_rate": 7.194572864321609e-06, + "loss": 0.0019, + "step": 28425 + }, + { + "epoch": 15.91, + "grad_norm": 0.6707019209861755, + "learning_rate": 7.192060301507539e-06, + "loss": 0.0021, + "step": 28450 + }, + { + "epoch": 15.93, + "grad_norm": 1.256881833076477, + "learning_rate": 7.189547738693467e-06, + "loss": 0.0019, + "step": 28475 + }, + { + "epoch": 15.94, + "grad_norm": 0.4793357849121094, + "learning_rate": 7.187035175879397e-06, + "loss": 0.0021, + "step": 28500 + }, + { + "epoch": 15.95, + "grad_norm": 0.5421711206436157, + "learning_rate": 7.184522613065327e-06, + "loss": 0.0018, + "step": 28525 + }, + { + "epoch": 15.97, + "grad_norm": 2.475353479385376, + "learning_rate": 7.182010050251257e-06, + "loss": 0.0025, + "step": 28550 + }, + { + "epoch": 15.98, + "grad_norm": 1.4441078901290894, + "learning_rate": 7.179497487437187e-06, + "loss": 0.0019, + "step": 28575 + }, + { + "epoch": 16.0, + "grad_norm": 0.4080997407436371, + "learning_rate": 7.176984924623116e-06, + "loss": 0.0019, + "step": 28600 + }, + { + "epoch": 16.01, + "grad_norm": 0.306756854057312, + "learning_rate": 7.174472361809047e-06, + "loss": 0.0016, + "step": 28625 + }, + { + "epoch": 16.02, + "grad_norm": 0.6107569336891174, + "learning_rate": 7.171959798994975e-06, + "loss": 0.0014, + "step": 28650 + }, + { + "epoch": 16.04, + "grad_norm": 1.019136905670166, + "learning_rate": 7.169447236180905e-06, + "loss": 0.0017, + "step": 28675 + }, + { + "epoch": 16.05, + "grad_norm": 1.168971300125122, + "learning_rate": 7.1669346733668345e-06, + "loss": 0.0014, + "step": 28700 + }, + { + "epoch": 16.07, + "grad_norm": 0.5141229629516602, + "learning_rate": 7.164422110552765e-06, + "loss": 0.0013, + "step": 28725 + }, + { + "epoch": 16.08, + "grad_norm": 0.22357693314552307, + "learning_rate": 7.161909547738693e-06, + "loss": 0.0012, + "step": 28750 + }, + { + "epoch": 16.09, + "grad_norm": 1.0830832719802856, + "learning_rate": 7.159396984924623e-06, + "loss": 0.0015, + "step": 28775 + }, + { + "epoch": 16.11, + "grad_norm": 0.8508783578872681, + "learning_rate": 7.1568844221105535e-06, + "loss": 0.0013, + "step": 28800 + }, + { + "epoch": 16.12, + "grad_norm": 0.8577681183815002, + "learning_rate": 7.154472361809046e-06, + "loss": 0.0016, + "step": 28825 + }, + { + "epoch": 16.14, + "grad_norm": 1.781736969947815, + "learning_rate": 7.151959798994975e-06, + "loss": 0.0012, + "step": 28850 + }, + { + "epoch": 16.15, + "grad_norm": 0.3834931254386902, + "learning_rate": 7.149447236180905e-06, + "loss": 0.0013, + "step": 28875 + }, + { + "epoch": 16.16, + "grad_norm": 0.6491885185241699, + "learning_rate": 7.146934673366835e-06, + "loss": 0.0017, + "step": 28900 + }, + { + "epoch": 16.18, + "grad_norm": 0.5028931498527527, + "learning_rate": 7.144422110552764e-06, + "loss": 0.0017, + "step": 28925 + }, + { + "epoch": 16.19, + "grad_norm": 2.0896987915039062, + "learning_rate": 7.141909547738694e-06, + "loss": 0.0015, + "step": 28950 + }, + { + "epoch": 16.21, + "grad_norm": 0.24445843696594238, + "learning_rate": 7.1393969849246236e-06, + "loss": 0.0017, + "step": 28975 + }, + { + "epoch": 16.22, + "grad_norm": 0.4740554690361023, + "learning_rate": 7.136884422110554e-06, + "loss": 0.0013, + "step": 29000 + }, + { + "epoch": 16.22, + "eval_loss": 0.20890949666500092, + "eval_runtime": 765.1165, + "eval_samples_per_second": 2.011, + "eval_steps_per_second": 2.011, + "eval_wer": 15.299205236091632, + "step": 29000 + }, + { + "epoch": 16.23, + "grad_norm": 0.4692896902561188, + "learning_rate": 7.134371859296483e-06, + "loss": 0.0012, + "step": 29025 + }, + { + "epoch": 16.25, + "grad_norm": 1.6924082040786743, + "learning_rate": 7.131859296482413e-06, + "loss": 0.0017, + "step": 29050 + }, + { + "epoch": 16.26, + "grad_norm": 0.9935564398765564, + "learning_rate": 7.129346733668342e-06, + "loss": 0.0011, + "step": 29075 + }, + { + "epoch": 16.28, + "grad_norm": 0.3517515957355499, + "learning_rate": 7.126834170854272e-06, + "loss": 0.0011, + "step": 29100 + }, + { + "epoch": 16.29, + "grad_norm": 0.15534855425357819, + "learning_rate": 7.124422110552764e-06, + "loss": 0.0015, + "step": 29125 + }, + { + "epoch": 16.3, + "grad_norm": 0.4924086332321167, + "learning_rate": 7.1219095477386944e-06, + "loss": 0.0013, + "step": 29150 + }, + { + "epoch": 16.32, + "grad_norm": 0.6303420662879944, + "learning_rate": 7.119396984924624e-06, + "loss": 0.0014, + "step": 29175 + }, + { + "epoch": 16.33, + "grad_norm": 1.0597422122955322, + "learning_rate": 7.116884422110554e-06, + "loss": 0.0017, + "step": 29200 + }, + { + "epoch": 16.35, + "grad_norm": 1.8915430307388306, + "learning_rate": 7.114371859296482e-06, + "loss": 0.0017, + "step": 29225 + }, + { + "epoch": 16.36, + "grad_norm": 0.7259461283683777, + "learning_rate": 7.1118592964824126e-06, + "loss": 0.0018, + "step": 29250 + }, + { + "epoch": 16.37, + "grad_norm": 1.654383897781372, + "learning_rate": 7.109346733668342e-06, + "loss": 0.0015, + "step": 29275 + }, + { + "epoch": 16.39, + "grad_norm": 1.0830329656600952, + "learning_rate": 7.106834170854272e-06, + "loss": 0.0016, + "step": 29300 + }, + { + "epoch": 16.4, + "grad_norm": 0.2282690405845642, + "learning_rate": 7.104321608040201e-06, + "loss": 0.0015, + "step": 29325 + }, + { + "epoch": 16.41, + "grad_norm": 1.4532291889190674, + "learning_rate": 7.1018090452261316e-06, + "loss": 0.0017, + "step": 29350 + }, + { + "epoch": 16.43, + "grad_norm": 0.46281036734580994, + "learning_rate": 7.099296482412062e-06, + "loss": 0.0016, + "step": 29375 + }, + { + "epoch": 16.44, + "grad_norm": 1.3128682374954224, + "learning_rate": 7.09678391959799e-06, + "loss": 0.0016, + "step": 29400 + }, + { + "epoch": 16.46, + "grad_norm": 1.1160944700241089, + "learning_rate": 7.09427135678392e-06, + "loss": 0.0019, + "step": 29425 + }, + { + "epoch": 16.47, + "grad_norm": 0.2500441074371338, + "learning_rate": 7.09175879396985e-06, + "loss": 0.0016, + "step": 29450 + }, + { + "epoch": 16.48, + "grad_norm": 0.7285224199295044, + "learning_rate": 7.08924623115578e-06, + "loss": 0.0018, + "step": 29475 + }, + { + "epoch": 16.5, + "grad_norm": 0.9950671792030334, + "learning_rate": 7.086733668341708e-06, + "loss": 0.0017, + "step": 29500 + }, + { + "epoch": 16.51, + "grad_norm": 1.1263285875320435, + "learning_rate": 7.0842211055276385e-06, + "loss": 0.0017, + "step": 29525 + }, + { + "epoch": 16.53, + "grad_norm": 0.6833540201187134, + "learning_rate": 7.081708542713568e-06, + "loss": 0.0019, + "step": 29550 + }, + { + "epoch": 16.54, + "grad_norm": 0.39665961265563965, + "learning_rate": 7.079195979899498e-06, + "loss": 0.0017, + "step": 29575 + }, + { + "epoch": 16.55, + "grad_norm": 1.4684909582138062, + "learning_rate": 7.076683417085428e-06, + "loss": 0.0019, + "step": 29600 + }, + { + "epoch": 16.57, + "grad_norm": 0.9129800796508789, + "learning_rate": 7.0741708542713575e-06, + "loss": 0.0019, + "step": 29625 + }, + { + "epoch": 16.58, + "grad_norm": 0.9707451462745667, + "learning_rate": 7.071658291457288e-06, + "loss": 0.0019, + "step": 29650 + }, + { + "epoch": 16.6, + "grad_norm": 1.0399523973464966, + "learning_rate": 7.069145728643216e-06, + "loss": 0.0016, + "step": 29675 + }, + { + "epoch": 16.61, + "grad_norm": 1.3553062677383423, + "learning_rate": 7.066633165829146e-06, + "loss": 0.0017, + "step": 29700 + }, + { + "epoch": 16.62, + "grad_norm": 1.1914117336273193, + "learning_rate": 7.064120603015076e-06, + "loss": 0.0016, + "step": 29725 + }, + { + "epoch": 16.64, + "grad_norm": 0.6500517129898071, + "learning_rate": 7.061608040201006e-06, + "loss": 0.0015, + "step": 29750 + }, + { + "epoch": 16.65, + "grad_norm": 1.4816306829452515, + "learning_rate": 7.059095477386936e-06, + "loss": 0.0021, + "step": 29775 + }, + { + "epoch": 16.67, + "grad_norm": 0.8494013547897339, + "learning_rate": 7.0565829145728645e-06, + "loss": 0.0016, + "step": 29800 + }, + { + "epoch": 16.68, + "grad_norm": 1.277443289756775, + "learning_rate": 7.054070351758795e-06, + "loss": 0.0019, + "step": 29825 + }, + { + "epoch": 16.69, + "grad_norm": 1.5183736085891724, + "learning_rate": 7.051557788944724e-06, + "loss": 0.0015, + "step": 29850 + }, + { + "epoch": 16.71, + "grad_norm": 1.2969121932983398, + "learning_rate": 7.049045226130654e-06, + "loss": 0.0021, + "step": 29875 + }, + { + "epoch": 16.72, + "grad_norm": 1.6866015195846558, + "learning_rate": 7.0465326633165834e-06, + "loss": 0.0023, + "step": 29900 + }, + { + "epoch": 16.74, + "grad_norm": 1.084509015083313, + "learning_rate": 7.044020100502514e-06, + "loss": 0.0019, + "step": 29925 + }, + { + "epoch": 16.75, + "grad_norm": 0.6918638944625854, + "learning_rate": 7.041507537688442e-06, + "loss": 0.002, + "step": 29950 + }, + { + "epoch": 16.76, + "grad_norm": 1.1457772254943848, + "learning_rate": 7.038994974874372e-06, + "loss": 0.0019, + "step": 29975 + }, + { + "epoch": 16.78, + "grad_norm": 1.6438637971878052, + "learning_rate": 7.0364824120603024e-06, + "loss": 0.0019, + "step": 30000 + }, + { + "epoch": 16.78, + "eval_loss": 0.20692865550518036, + "eval_runtime": 756.0968, + "eval_samples_per_second": 2.035, + "eval_steps_per_second": 2.035, + "eval_wer": 15.41023842917251, + "step": 30000 + }, + { + "epoch": 16.79, + "grad_norm": 0.42780670523643494, + "learning_rate": 7.033969849246232e-06, + "loss": 0.0019, + "step": 30025 + }, + { + "epoch": 16.81, + "grad_norm": 0.6531612873077393, + "learning_rate": 7.031457286432162e-06, + "loss": 0.0021, + "step": 30050 + }, + { + "epoch": 16.82, + "grad_norm": 0.9711599946022034, + "learning_rate": 7.02894472361809e-06, + "loss": 0.0019, + "step": 30075 + }, + { + "epoch": 16.83, + "grad_norm": 1.1955443620681763, + "learning_rate": 7.0264321608040206e-06, + "loss": 0.0019, + "step": 30100 + }, + { + "epoch": 16.85, + "grad_norm": 1.4676185846328735, + "learning_rate": 7.02391959798995e-06, + "loss": 0.0025, + "step": 30125 + }, + { + "epoch": 16.86, + "grad_norm": 1.3602385520935059, + "learning_rate": 7.02140703517588e-06, + "loss": 0.002, + "step": 30150 + }, + { + "epoch": 16.88, + "grad_norm": 1.7014977931976318, + "learning_rate": 7.018894472361809e-06, + "loss": 0.002, + "step": 30175 + }, + { + "epoch": 16.89, + "grad_norm": 1.7635717391967773, + "learning_rate": 7.0163819095477396e-06, + "loss": 0.0016, + "step": 30200 + }, + { + "epoch": 16.9, + "grad_norm": 0.8794500827789307, + "learning_rate": 7.01386934673367e-06, + "loss": 0.0022, + "step": 30225 + }, + { + "epoch": 16.92, + "grad_norm": 0.6013706922531128, + "learning_rate": 7.011356783919598e-06, + "loss": 0.0019, + "step": 30250 + }, + { + "epoch": 16.93, + "grad_norm": 1.7869819402694702, + "learning_rate": 7.008844221105528e-06, + "loss": 0.002, + "step": 30275 + }, + { + "epoch": 16.95, + "grad_norm": 0.2550281286239624, + "learning_rate": 7.006331658291458e-06, + "loss": 0.0014, + "step": 30300 + }, + { + "epoch": 16.96, + "grad_norm": 0.4228287935256958, + "learning_rate": 7.003819095477388e-06, + "loss": 0.0018, + "step": 30325 + }, + { + "epoch": 16.97, + "grad_norm": 1.2630960941314697, + "learning_rate": 7.001306532663316e-06, + "loss": 0.0023, + "step": 30350 + }, + { + "epoch": 16.99, + "grad_norm": 0.44659557938575745, + "learning_rate": 6.9987939698492465e-06, + "loss": 0.0017, + "step": 30375 + }, + { + "epoch": 17.0, + "grad_norm": 1.215463638305664, + "learning_rate": 6.996281407035177e-06, + "loss": 0.0017, + "step": 30400 + }, + { + "epoch": 17.02, + "grad_norm": 0.6900143027305603, + "learning_rate": 6.993768844221106e-06, + "loss": 0.001, + "step": 30425 + }, + { + "epoch": 17.03, + "grad_norm": 0.665034830570221, + "learning_rate": 6.991256281407036e-06, + "loss": 0.0011, + "step": 30450 + }, + { + "epoch": 17.04, + "grad_norm": 1.1440362930297852, + "learning_rate": 6.9887437185929655e-06, + "loss": 0.0015, + "step": 30475 + }, + { + "epoch": 17.06, + "grad_norm": 0.43543970584869385, + "learning_rate": 6.986231155778896e-06, + "loss": 0.0012, + "step": 30500 + }, + { + "epoch": 17.07, + "grad_norm": 1.2201470136642456, + "learning_rate": 6.983718592964824e-06, + "loss": 0.0012, + "step": 30525 + }, + { + "epoch": 17.09, + "grad_norm": 2.4899706840515137, + "learning_rate": 6.981206030150754e-06, + "loss": 0.0015, + "step": 30550 + }, + { + "epoch": 17.1, + "grad_norm": 1.2970794439315796, + "learning_rate": 6.978693467336684e-06, + "loss": 0.0014, + "step": 30575 + }, + { + "epoch": 17.11, + "grad_norm": 0.6124092936515808, + "learning_rate": 6.976180904522614e-06, + "loss": 0.0014, + "step": 30600 + }, + { + "epoch": 17.13, + "grad_norm": 0.4075736403465271, + "learning_rate": 6.973668341708544e-06, + "loss": 0.001, + "step": 30625 + }, + { + "epoch": 17.14, + "grad_norm": 1.0653010606765747, + "learning_rate": 6.9711557788944725e-06, + "loss": 0.0011, + "step": 30650 + }, + { + "epoch": 17.16, + "grad_norm": 1.3723009824752808, + "learning_rate": 6.968643216080403e-06, + "loss": 0.0012, + "step": 30675 + }, + { + "epoch": 17.17, + "grad_norm": 0.6445235013961792, + "learning_rate": 6.966130653266332e-06, + "loss": 0.0014, + "step": 30700 + }, + { + "epoch": 17.18, + "grad_norm": 0.6434729695320129, + "learning_rate": 6.963618090452262e-06, + "loss": 0.0013, + "step": 30725 + }, + { + "epoch": 17.2, + "grad_norm": 0.9862807393074036, + "learning_rate": 6.9611055276381914e-06, + "loss": 0.0011, + "step": 30750 + }, + { + "epoch": 17.21, + "grad_norm": 2.94486403465271, + "learning_rate": 6.958592964824122e-06, + "loss": 0.0013, + "step": 30775 + }, + { + "epoch": 17.23, + "grad_norm": 0.23455284535884857, + "learning_rate": 6.95608040201005e-06, + "loss": 0.0013, + "step": 30800 + }, + { + "epoch": 17.24, + "grad_norm": 0.7880411148071289, + "learning_rate": 6.95356783919598e-06, + "loss": 0.001, + "step": 30825 + }, + { + "epoch": 17.25, + "grad_norm": 1.8447456359863281, + "learning_rate": 6.9510552763819104e-06, + "loss": 0.0011, + "step": 30850 + }, + { + "epoch": 17.27, + "grad_norm": 0.6399122476577759, + "learning_rate": 6.94854271356784e-06, + "loss": 0.0015, + "step": 30875 + }, + { + "epoch": 17.28, + "grad_norm": 0.8138588070869446, + "learning_rate": 6.94603015075377e-06, + "loss": 0.0015, + "step": 30900 + }, + { + "epoch": 17.3, + "grad_norm": 0.856282114982605, + "learning_rate": 6.943517587939698e-06, + "loss": 0.0013, + "step": 30925 + }, + { + "epoch": 17.31, + "grad_norm": 1.598760962486267, + "learning_rate": 6.9410050251256286e-06, + "loss": 0.0019, + "step": 30950 + }, + { + "epoch": 17.32, + "grad_norm": 0.6111915707588196, + "learning_rate": 6.938492462311558e-06, + "loss": 0.0016, + "step": 30975 + }, + { + "epoch": 17.34, + "grad_norm": 1.1163259744644165, + "learning_rate": 6.935979899497488e-06, + "loss": 0.0018, + "step": 31000 + }, + { + "epoch": 17.34, + "eval_loss": 0.2104346603155136, + "eval_runtime": 763.746, + "eval_samples_per_second": 2.015, + "eval_steps_per_second": 2.015, + "eval_wer": 14.895979429640018, + "step": 31000 + }, + { + "epoch": 17.35, + "grad_norm": 1.0770196914672852, + "learning_rate": 6.933467336683418e-06, + "loss": 0.0016, + "step": 31025 + }, + { + "epoch": 17.37, + "grad_norm": 0.2731950283050537, + "learning_rate": 6.9309547738693476e-06, + "loss": 0.0014, + "step": 31050 + }, + { + "epoch": 17.38, + "grad_norm": 1.8197996616363525, + "learning_rate": 6.928442211055278e-06, + "loss": 0.0016, + "step": 31075 + }, + { + "epoch": 17.39, + "grad_norm": 1.4708235263824463, + "learning_rate": 6.925929648241206e-06, + "loss": 0.0015, + "step": 31100 + }, + { + "epoch": 17.41, + "grad_norm": 0.911407470703125, + "learning_rate": 6.923417085427136e-06, + "loss": 0.0013, + "step": 31125 + }, + { + "epoch": 17.42, + "grad_norm": 0.19341106712818146, + "learning_rate": 6.920904522613066e-06, + "loss": 0.0017, + "step": 31150 + }, + { + "epoch": 17.44, + "grad_norm": 0.7731698155403137, + "learning_rate": 6.918391959798996e-06, + "loss": 0.0013, + "step": 31175 + }, + { + "epoch": 17.45, + "grad_norm": 1.1832797527313232, + "learning_rate": 6.915879396984924e-06, + "loss": 0.0017, + "step": 31200 + }, + { + "epoch": 17.46, + "grad_norm": 0.3132323622703552, + "learning_rate": 6.9133668341708545e-06, + "loss": 0.002, + "step": 31225 + }, + { + "epoch": 17.48, + "grad_norm": 1.658793330192566, + "learning_rate": 6.910854271356785e-06, + "loss": 0.0019, + "step": 31250 + }, + { + "epoch": 17.49, + "grad_norm": 0.8533968925476074, + "learning_rate": 6.908341708542714e-06, + "loss": 0.0016, + "step": 31275 + }, + { + "epoch": 17.51, + "grad_norm": 0.4159184396266937, + "learning_rate": 6.905829145728644e-06, + "loss": 0.0014, + "step": 31300 + }, + { + "epoch": 17.52, + "grad_norm": 0.4636985957622528, + "learning_rate": 6.9033165829145735e-06, + "loss": 0.0019, + "step": 31325 + }, + { + "epoch": 17.53, + "grad_norm": 1.2203309535980225, + "learning_rate": 6.900804020100504e-06, + "loss": 0.0013, + "step": 31350 + }, + { + "epoch": 17.55, + "grad_norm": 0.9346339106559753, + "learning_rate": 6.898291457286432e-06, + "loss": 0.0017, + "step": 31375 + }, + { + "epoch": 17.56, + "grad_norm": 1.803324818611145, + "learning_rate": 6.895778894472362e-06, + "loss": 0.0017, + "step": 31400 + }, + { + "epoch": 17.58, + "grad_norm": 2.9906113147735596, + "learning_rate": 6.893266331658292e-06, + "loss": 0.0013, + "step": 31425 + }, + { + "epoch": 17.59, + "grad_norm": 0.45823991298675537, + "learning_rate": 6.890753768844222e-06, + "loss": 0.0016, + "step": 31450 + }, + { + "epoch": 17.6, + "grad_norm": 0.8044922947883606, + "learning_rate": 6.888241206030152e-06, + "loss": 0.0015, + "step": 31475 + }, + { + "epoch": 17.62, + "grad_norm": 0.7113275527954102, + "learning_rate": 6.8857286432160805e-06, + "loss": 0.0015, + "step": 31500 + }, + { + "epoch": 17.63, + "grad_norm": 1.4748834371566772, + "learning_rate": 6.883216080402011e-06, + "loss": 0.0016, + "step": 31525 + }, + { + "epoch": 17.65, + "grad_norm": 0.17823927104473114, + "learning_rate": 6.88070351758794e-06, + "loss": 0.0019, + "step": 31550 + }, + { + "epoch": 17.66, + "grad_norm": 1.9138675928115845, + "learning_rate": 6.87819095477387e-06, + "loss": 0.0015, + "step": 31575 + }, + { + "epoch": 17.67, + "grad_norm": 0.398894727230072, + "learning_rate": 6.8756783919597994e-06, + "loss": 0.0014, + "step": 31600 + }, + { + "epoch": 17.69, + "grad_norm": 0.4193999767303467, + "learning_rate": 6.87316582914573e-06, + "loss": 0.0016, + "step": 31625 + }, + { + "epoch": 17.7, + "grad_norm": 0.5674384236335754, + "learning_rate": 6.870653266331658e-06, + "loss": 0.0016, + "step": 31650 + }, + { + "epoch": 17.72, + "grad_norm": 0.8240883350372314, + "learning_rate": 6.868140703517588e-06, + "loss": 0.0016, + "step": 31675 + }, + { + "epoch": 17.73, + "grad_norm": 0.648665726184845, + "learning_rate": 6.8656281407035184e-06, + "loss": 0.0014, + "step": 31700 + }, + { + "epoch": 17.74, + "grad_norm": 1.4077035188674927, + "learning_rate": 6.863115577889448e-06, + "loss": 0.0016, + "step": 31725 + }, + { + "epoch": 17.76, + "grad_norm": 0.74284827709198, + "learning_rate": 6.860603015075378e-06, + "loss": 0.0016, + "step": 31750 + }, + { + "epoch": 17.77, + "grad_norm": 0.7828480005264282, + "learning_rate": 6.858090452261306e-06, + "loss": 0.0017, + "step": 31775 + }, + { + "epoch": 17.79, + "grad_norm": 0.6374925374984741, + "learning_rate": 6.8555778894472366e-06, + "loss": 0.0017, + "step": 31800 + }, + { + "epoch": 17.8, + "grad_norm": 0.9306843876838684, + "learning_rate": 6.853065326633166e-06, + "loss": 0.0016, + "step": 31825 + }, + { + "epoch": 17.81, + "grad_norm": 0.38129517436027527, + "learning_rate": 6.850552763819096e-06, + "loss": 0.0018, + "step": 31850 + }, + { + "epoch": 17.83, + "grad_norm": 0.9314115047454834, + "learning_rate": 6.848040201005026e-06, + "loss": 0.0014, + "step": 31875 + }, + { + "epoch": 17.84, + "grad_norm": 1.0283581018447876, + "learning_rate": 6.8455276381909556e-06, + "loss": 0.0019, + "step": 31900 + }, + { + "epoch": 17.86, + "grad_norm": 0.48465949296951294, + "learning_rate": 6.843015075376886e-06, + "loss": 0.0018, + "step": 31925 + }, + { + "epoch": 17.87, + "grad_norm": 1.548902988433838, + "learning_rate": 6.840502512562814e-06, + "loss": 0.0014, + "step": 31950 + }, + { + "epoch": 17.88, + "grad_norm": 1.1933473348617554, + "learning_rate": 6.837989949748744e-06, + "loss": 0.0016, + "step": 31975 + }, + { + "epoch": 17.9, + "grad_norm": 0.6905304789543152, + "learning_rate": 6.835477386934674e-06, + "loss": 0.0014, + "step": 32000 + }, + { + "epoch": 17.9, + "eval_loss": 0.21092204749584198, + "eval_runtime": 768.4934, + "eval_samples_per_second": 2.003, + "eval_steps_per_second": 2.003, + "eval_wer": 15.427769985974754, + "step": 32000 + }, + { + "epoch": 17.91, + "grad_norm": 0.52266526222229, + "learning_rate": 6.832964824120604e-06, + "loss": 0.0016, + "step": 32025 + }, + { + "epoch": 17.93, + "grad_norm": 1.4868412017822266, + "learning_rate": 6.830452261306532e-06, + "loss": 0.0016, + "step": 32050 + }, + { + "epoch": 17.94, + "grad_norm": 0.6696006059646606, + "learning_rate": 6.8279396984924625e-06, + "loss": 0.0016, + "step": 32075 + }, + { + "epoch": 17.95, + "grad_norm": 0.3903909921646118, + "learning_rate": 6.825427135678393e-06, + "loss": 0.0013, + "step": 32100 + }, + { + "epoch": 17.97, + "grad_norm": 0.6952781677246094, + "learning_rate": 6.822914572864322e-06, + "loss": 0.0017, + "step": 32125 + }, + { + "epoch": 17.98, + "grad_norm": 0.7897183299064636, + "learning_rate": 6.820402010050252e-06, + "loss": 0.0019, + "step": 32150 + }, + { + "epoch": 17.99, + "grad_norm": 0.7587228417396545, + "learning_rate": 6.8178894472361815e-06, + "loss": 0.0016, + "step": 32175 + }, + { + "epoch": 18.01, + "grad_norm": 0.3065742254257202, + "learning_rate": 6.815376884422112e-06, + "loss": 0.0016, + "step": 32200 + }, + { + "epoch": 18.02, + "grad_norm": 0.5566627383232117, + "learning_rate": 6.81286432160804e-06, + "loss": 0.0013, + "step": 32225 + }, + { + "epoch": 18.04, + "grad_norm": 1.039034128189087, + "learning_rate": 6.81035175879397e-06, + "loss": 0.0011, + "step": 32250 + }, + { + "epoch": 18.05, + "grad_norm": 0.382648229598999, + "learning_rate": 6.8078391959799e-06, + "loss": 0.0012, + "step": 32275 + }, + { + "epoch": 18.06, + "grad_norm": 0.6021915674209595, + "learning_rate": 6.80532663316583e-06, + "loss": 0.0009, + "step": 32300 + }, + { + "epoch": 18.08, + "grad_norm": 0.36141666769981384, + "learning_rate": 6.80281407035176e-06, + "loss": 0.001, + "step": 32325 + }, + { + "epoch": 18.09, + "grad_norm": 0.282529354095459, + "learning_rate": 6.8003015075376885e-06, + "loss": 0.001, + "step": 32350 + }, + { + "epoch": 18.11, + "grad_norm": 0.24827836453914642, + "learning_rate": 6.797788944723619e-06, + "loss": 0.0011, + "step": 32375 + }, + { + "epoch": 18.12, + "grad_norm": 0.9590837359428406, + "learning_rate": 6.795276381909548e-06, + "loss": 0.001, + "step": 32400 + }, + { + "epoch": 18.13, + "grad_norm": 0.2914819121360779, + "learning_rate": 6.792763819095478e-06, + "loss": 0.0011, + "step": 32425 + }, + { + "epoch": 18.15, + "grad_norm": 0.6171585917472839, + "learning_rate": 6.7902512562814074e-06, + "loss": 0.001, + "step": 32450 + }, + { + "epoch": 18.16, + "grad_norm": 0.8532363772392273, + "learning_rate": 6.787738693467338e-06, + "loss": 0.001, + "step": 32475 + }, + { + "epoch": 18.18, + "grad_norm": 0.3601439893245697, + "learning_rate": 6.785226130653268e-06, + "loss": 0.0009, + "step": 32500 + }, + { + "epoch": 18.19, + "grad_norm": 2.0107998847961426, + "learning_rate": 6.782713567839196e-06, + "loss": 0.0011, + "step": 32525 + }, + { + "epoch": 18.2, + "grad_norm": 0.1530548334121704, + "learning_rate": 6.7802010050251264e-06, + "loss": 0.001, + "step": 32550 + }, + { + "epoch": 18.22, + "grad_norm": 0.20049265027046204, + "learning_rate": 6.777688442211056e-06, + "loss": 0.0009, + "step": 32575 + }, + { + "epoch": 18.23, + "grad_norm": 0.6103872060775757, + "learning_rate": 6.775175879396986e-06, + "loss": 0.001, + "step": 32600 + }, + { + "epoch": 18.25, + "grad_norm": 0.3714231848716736, + "learning_rate": 6.772663316582914e-06, + "loss": 0.0009, + "step": 32625 + }, + { + "epoch": 18.26, + "grad_norm": 2.503746271133423, + "learning_rate": 6.7701507537688446e-06, + "loss": 0.0012, + "step": 32650 + }, + { + "epoch": 18.27, + "grad_norm": 0.8006837964057922, + "learning_rate": 6.767638190954774e-06, + "loss": 0.0011, + "step": 32675 + }, + { + "epoch": 18.29, + "grad_norm": 1.1390272378921509, + "learning_rate": 6.765125628140704e-06, + "loss": 0.0011, + "step": 32700 + }, + { + "epoch": 18.3, + "grad_norm": 0.5239310264587402, + "learning_rate": 6.762613065326634e-06, + "loss": 0.0013, + "step": 32725 + }, + { + "epoch": 18.32, + "grad_norm": 0.2470119148492813, + "learning_rate": 6.7601005025125636e-06, + "loss": 0.0011, + "step": 32750 + }, + { + "epoch": 18.33, + "grad_norm": 0.6358858346939087, + "learning_rate": 6.757587939698494e-06, + "loss": 0.0016, + "step": 32775 + }, + { + "epoch": 18.34, + "grad_norm": 0.34877467155456543, + "learning_rate": 6.755075376884422e-06, + "loss": 0.0015, + "step": 32800 + }, + { + "epoch": 18.36, + "grad_norm": 1.144821286201477, + "learning_rate": 6.752562814070352e-06, + "loss": 0.0018, + "step": 32825 + }, + { + "epoch": 18.37, + "grad_norm": 0.3631689250469208, + "learning_rate": 6.750050251256282e-06, + "loss": 0.0012, + "step": 32850 + }, + { + "epoch": 18.39, + "grad_norm": 0.8698270320892334, + "learning_rate": 6.747537688442212e-06, + "loss": 0.0012, + "step": 32875 + }, + { + "epoch": 18.4, + "grad_norm": 0.7015060782432556, + "learning_rate": 6.74502512562814e-06, + "loss": 0.0014, + "step": 32900 + }, + { + "epoch": 18.41, + "grad_norm": 0.2923341989517212, + "learning_rate": 6.7425125628140705e-06, + "loss": 0.0012, + "step": 32925 + }, + { + "epoch": 18.43, + "grad_norm": 1.4739975929260254, + "learning_rate": 6.740000000000001e-06, + "loss": 0.0015, + "step": 32950 + }, + { + "epoch": 18.44, + "grad_norm": 0.7718896269798279, + "learning_rate": 6.73748743718593e-06, + "loss": 0.0014, + "step": 32975 + }, + { + "epoch": 18.46, + "grad_norm": 0.38582682609558105, + "learning_rate": 6.73497487437186e-06, + "loss": 0.0011, + "step": 33000 + }, + { + "epoch": 18.46, + "eval_loss": 0.20911477506160736, + "eval_runtime": 751.0214, + "eval_samples_per_second": 2.049, + "eval_steps_per_second": 2.049, + "eval_wer": 15.100514258999532, + "step": 33000 + }, + { + "epoch": 18.47, + "grad_norm": 1.6240116357803345, + "learning_rate": 6.7324623115577895e-06, + "loss": 0.0018, + "step": 33025 + }, + { + "epoch": 18.48, + "grad_norm": 1.9460439682006836, + "learning_rate": 6.72994974874372e-06, + "loss": 0.0013, + "step": 33050 + }, + { + "epoch": 18.5, + "grad_norm": 0.827022910118103, + "learning_rate": 6.727437185929648e-06, + "loss": 0.0018, + "step": 33075 + }, + { + "epoch": 18.51, + "grad_norm": 0.5244318842887878, + "learning_rate": 6.724924623115578e-06, + "loss": 0.0014, + "step": 33100 + }, + { + "epoch": 18.53, + "grad_norm": 0.9605728983879089, + "learning_rate": 6.7224120603015085e-06, + "loss": 0.0017, + "step": 33125 + }, + { + "epoch": 18.54, + "grad_norm": 0.6337628364562988, + "learning_rate": 6.719899497487438e-06, + "loss": 0.0014, + "step": 33150 + }, + { + "epoch": 18.55, + "grad_norm": 0.3719654083251953, + "learning_rate": 6.717386934673368e-06, + "loss": 0.0021, + "step": 33175 + }, + { + "epoch": 18.57, + "grad_norm": 0.391492635011673, + "learning_rate": 6.7148743718592965e-06, + "loss": 0.0019, + "step": 33200 + }, + { + "epoch": 18.58, + "grad_norm": 0.4877717196941376, + "learning_rate": 6.712361809045227e-06, + "loss": 0.0017, + "step": 33225 + }, + { + "epoch": 18.6, + "grad_norm": 0.7119495868682861, + "learning_rate": 6.709849246231156e-06, + "loss": 0.0011, + "step": 33250 + }, + { + "epoch": 18.61, + "grad_norm": 1.007393479347229, + "learning_rate": 6.707336683417086e-06, + "loss": 0.0012, + "step": 33275 + }, + { + "epoch": 18.62, + "grad_norm": 1.2025009393692017, + "learning_rate": 6.7048241206030155e-06, + "loss": 0.0013, + "step": 33300 + }, + { + "epoch": 18.64, + "grad_norm": 0.8436883091926575, + "learning_rate": 6.702311557788946e-06, + "loss": 0.0014, + "step": 33325 + }, + { + "epoch": 18.65, + "grad_norm": 0.6343744993209839, + "learning_rate": 6.699798994974876e-06, + "loss": 0.0014, + "step": 33350 + }, + { + "epoch": 18.67, + "grad_norm": 0.7845780849456787, + "learning_rate": 6.697286432160804e-06, + "loss": 0.0015, + "step": 33375 + }, + { + "epoch": 18.68, + "grad_norm": 1.030934453010559, + "learning_rate": 6.6947738693467344e-06, + "loss": 0.0017, + "step": 33400 + }, + { + "epoch": 18.69, + "grad_norm": 1.6531257629394531, + "learning_rate": 6.692261306532664e-06, + "loss": 0.0014, + "step": 33425 + }, + { + "epoch": 18.71, + "grad_norm": 0.5432845950126648, + "learning_rate": 6.689849246231156e-06, + "loss": 0.0014, + "step": 33450 + }, + { + "epoch": 18.72, + "grad_norm": 0.8898112177848816, + "learning_rate": 6.687336683417086e-06, + "loss": 0.0016, + "step": 33475 + }, + { + "epoch": 18.74, + "grad_norm": 1.0100805759429932, + "learning_rate": 6.684824120603015e-06, + "loss": 0.0018, + "step": 33500 + }, + { + "epoch": 18.75, + "grad_norm": 1.7442337274551392, + "learning_rate": 6.682412060301509e-06, + "loss": 0.0016, + "step": 33525 + }, + { + "epoch": 18.76, + "grad_norm": 1.042807936668396, + "learning_rate": 6.679899497487437e-06, + "loss": 0.0013, + "step": 33550 + }, + { + "epoch": 18.78, + "grad_norm": 0.7412787079811096, + "learning_rate": 6.6773869346733675e-06, + "loss": 0.0015, + "step": 33575 + }, + { + "epoch": 18.79, + "grad_norm": 1.528171181678772, + "learning_rate": 6.674874371859297e-06, + "loss": 0.0013, + "step": 33600 + }, + { + "epoch": 18.81, + "grad_norm": 0.7422769069671631, + "learning_rate": 6.672361809045227e-06, + "loss": 0.0012, + "step": 33625 + }, + { + "epoch": 18.82, + "grad_norm": 0.9560335278511047, + "learning_rate": 6.669849246231156e-06, + "loss": 0.0018, + "step": 33650 + }, + { + "epoch": 18.83, + "grad_norm": 0.584435760974884, + "learning_rate": 6.667336683417086e-06, + "loss": 0.0014, + "step": 33675 + }, + { + "epoch": 18.85, + "grad_norm": 0.7915756702423096, + "learning_rate": 6.664824120603015e-06, + "loss": 0.0014, + "step": 33700 + }, + { + "epoch": 18.86, + "grad_norm": 0.5873784422874451, + "learning_rate": 6.662311557788945e-06, + "loss": 0.0013, + "step": 33725 + }, + { + "epoch": 18.88, + "grad_norm": 2.2877142429351807, + "learning_rate": 6.659798994974875e-06, + "loss": 0.0015, + "step": 33750 + }, + { + "epoch": 18.89, + "grad_norm": 0.29497790336608887, + "learning_rate": 6.657286432160805e-06, + "loss": 0.0016, + "step": 33775 + }, + { + "epoch": 18.9, + "grad_norm": 0.5995815992355347, + "learning_rate": 6.654773869346735e-06, + "loss": 0.0011, + "step": 33800 + }, + { + "epoch": 18.92, + "grad_norm": 0.27464285492897034, + "learning_rate": 6.652261306532663e-06, + "loss": 0.0012, + "step": 33825 + }, + { + "epoch": 18.93, + "grad_norm": 1.154937744140625, + "learning_rate": 6.6497487437185935e-06, + "loss": 0.0016, + "step": 33850 + }, + { + "epoch": 18.95, + "grad_norm": 1.1207845211029053, + "learning_rate": 6.647236180904523e-06, + "loss": 0.0016, + "step": 33875 + }, + { + "epoch": 18.96, + "grad_norm": 0.7670401334762573, + "learning_rate": 6.644723618090453e-06, + "loss": 0.0013, + "step": 33900 + }, + { + "epoch": 18.97, + "grad_norm": 0.25169479846954346, + "learning_rate": 6.642211055276383e-06, + "loss": 0.0013, + "step": 33925 + }, + { + "epoch": 18.99, + "grad_norm": 0.8069249987602234, + "learning_rate": 6.639698492462312e-06, + "loss": 0.0014, + "step": 33950 + }, + { + "epoch": 19.0, + "grad_norm": 0.6290206909179688, + "learning_rate": 6.637185929648242e-06, + "loss": 0.0016, + "step": 33975 + }, + { + "epoch": 19.02, + "grad_norm": 2.107786178588867, + "learning_rate": 6.634673366834171e-06, + "loss": 0.001, + "step": 34000 + }, + { + "epoch": 19.02, + "eval_loss": 0.2131330668926239, + "eval_runtime": 757.5814, + "eval_samples_per_second": 2.031, + "eval_steps_per_second": 2.031, + "eval_wer": 15.11220196353436, + "step": 34000 + }, + { + "epoch": 19.03, + "grad_norm": 0.9138082265853882, + "learning_rate": 6.632160804020101e-06, + "loss": 0.0011, + "step": 34025 + }, + { + "epoch": 19.04, + "grad_norm": 0.2753586173057556, + "learning_rate": 6.629648241206031e-06, + "loss": 0.0008, + "step": 34050 + }, + { + "epoch": 19.06, + "grad_norm": 0.23910900950431824, + "learning_rate": 6.627135678391961e-06, + "loss": 0.001, + "step": 34075 + }, + { + "epoch": 19.07, + "grad_norm": 0.24647869169712067, + "learning_rate": 6.624623115577889e-06, + "loss": 0.001, + "step": 34100 + }, + { + "epoch": 19.09, + "grad_norm": 2.2527575492858887, + "learning_rate": 6.622110552763819e-06, + "loss": 0.0012, + "step": 34125 + }, + { + "epoch": 19.1, + "grad_norm": 0.7607452273368835, + "learning_rate": 6.61959798994975e-06, + "loss": 0.0012, + "step": 34150 + }, + { + "epoch": 19.11, + "grad_norm": 0.7652255296707153, + "learning_rate": 6.617085427135679e-06, + "loss": 0.0012, + "step": 34175 + }, + { + "epoch": 19.13, + "grad_norm": 0.33822470903396606, + "learning_rate": 6.614572864321609e-06, + "loss": 0.001, + "step": 34200 + }, + { + "epoch": 19.14, + "grad_norm": 0.920847475528717, + "learning_rate": 6.6120603015075376e-06, + "loss": 0.0013, + "step": 34225 + }, + { + "epoch": 19.16, + "grad_norm": 1.153990626335144, + "learning_rate": 6.609547738693468e-06, + "loss": 0.0016, + "step": 34250 + }, + { + "epoch": 19.17, + "grad_norm": 0.6900044679641724, + "learning_rate": 6.607035175879397e-06, + "loss": 0.0011, + "step": 34275 + }, + { + "epoch": 19.18, + "grad_norm": 0.909891664981842, + "learning_rate": 6.604522613065327e-06, + "loss": 0.001, + "step": 34300 + }, + { + "epoch": 19.2, + "grad_norm": 0.828757107257843, + "learning_rate": 6.6020100502512565e-06, + "loss": 0.0011, + "step": 34325 + }, + { + "epoch": 19.21, + "grad_norm": 0.982976496219635, + "learning_rate": 6.599497487437187e-06, + "loss": 0.0011, + "step": 34350 + }, + { + "epoch": 19.23, + "grad_norm": 2.126403570175171, + "learning_rate": 6.596984924623117e-06, + "loss": 0.001, + "step": 34375 + }, + { + "epoch": 19.24, + "grad_norm": 1.1327265501022339, + "learning_rate": 6.594472361809045e-06, + "loss": 0.0007, + "step": 34400 + }, + { + "epoch": 19.25, + "grad_norm": 0.8209764957427979, + "learning_rate": 6.5919597989949755e-06, + "loss": 0.001, + "step": 34425 + }, + { + "epoch": 19.27, + "grad_norm": 0.3624749183654785, + "learning_rate": 6.589447236180905e-06, + "loss": 0.001, + "step": 34450 + }, + { + "epoch": 19.28, + "grad_norm": 0.8429078459739685, + "learning_rate": 6.586934673366835e-06, + "loss": 0.0013, + "step": 34475 + }, + { + "epoch": 19.3, + "grad_norm": 0.8391066193580627, + "learning_rate": 6.584422110552764e-06, + "loss": 0.001, + "step": 34500 + }, + { + "epoch": 19.31, + "grad_norm": 0.3341628909111023, + "learning_rate": 6.581909547738694e-06, + "loss": 0.0013, + "step": 34525 + }, + { + "epoch": 19.32, + "grad_norm": 1.6523183584213257, + "learning_rate": 6.579396984924624e-06, + "loss": 0.0012, + "step": 34550 + }, + { + "epoch": 19.34, + "grad_norm": 0.7974796891212463, + "learning_rate": 6.576884422110553e-06, + "loss": 0.001, + "step": 34575 + }, + { + "epoch": 19.35, + "grad_norm": 1.0605530738830566, + "learning_rate": 6.574371859296483e-06, + "loss": 0.0012, + "step": 34600 + }, + { + "epoch": 19.37, + "grad_norm": 0.8969795107841492, + "learning_rate": 6.571859296482413e-06, + "loss": 0.0012, + "step": 34625 + }, + { + "epoch": 19.38, + "grad_norm": 0.6363099217414856, + "learning_rate": 6.569346733668343e-06, + "loss": 0.0013, + "step": 34650 + }, + { + "epoch": 19.39, + "grad_norm": 1.105879545211792, + "learning_rate": 6.566834170854271e-06, + "loss": 0.0015, + "step": 34675 + }, + { + "epoch": 19.41, + "grad_norm": 0.5895460844039917, + "learning_rate": 6.5643216080402015e-06, + "loss": 0.0012, + "step": 34700 + }, + { + "epoch": 19.42, + "grad_norm": 1.0729901790618896, + "learning_rate": 6.561809045226131e-06, + "loss": 0.0009, + "step": 34725 + }, + { + "epoch": 19.44, + "grad_norm": 0.16200564801692963, + "learning_rate": 6.559296482412061e-06, + "loss": 0.0013, + "step": 34750 + }, + { + "epoch": 19.45, + "grad_norm": 0.7450486421585083, + "learning_rate": 6.556783919597991e-06, + "loss": 0.0012, + "step": 34775 + }, + { + "epoch": 19.46, + "grad_norm": 0.7242839336395264, + "learning_rate": 6.55427135678392e-06, + "loss": 0.001, + "step": 34800 + }, + { + "epoch": 19.48, + "grad_norm": 1.0801267623901367, + "learning_rate": 6.55175879396985e-06, + "loss": 0.0011, + "step": 34825 + }, + { + "epoch": 19.49, + "grad_norm": 0.3062281310558319, + "learning_rate": 6.549246231155779e-06, + "loss": 0.001, + "step": 34850 + }, + { + "epoch": 19.51, + "grad_norm": 0.4413515031337738, + "learning_rate": 6.546733668341709e-06, + "loss": 0.0011, + "step": 34875 + }, + { + "epoch": 19.52, + "grad_norm": 1.537662386894226, + "learning_rate": 6.544221105527639e-06, + "loss": 0.0011, + "step": 34900 + }, + { + "epoch": 19.53, + "grad_norm": 0.4062853157520294, + "learning_rate": 6.541708542713569e-06, + "loss": 0.0015, + "step": 34925 + }, + { + "epoch": 19.55, + "grad_norm": 0.9977118968963623, + "learning_rate": 6.539195979899497e-06, + "loss": 0.0019, + "step": 34950 + }, + { + "epoch": 19.56, + "grad_norm": 0.211568683385849, + "learning_rate": 6.536683417085427e-06, + "loss": 0.0015, + "step": 34975 + }, + { + "epoch": 19.57, + "grad_norm": 0.81590735912323, + "learning_rate": 6.534170854271358e-06, + "loss": 0.0013, + "step": 35000 + }, + { + "epoch": 19.57, + "eval_loss": 0.21847817301750183, + "eval_runtime": 757.3896, + "eval_samples_per_second": 2.032, + "eval_steps_per_second": 2.032, + "eval_wer": 14.977793361383823, + "step": 35000 + }, + { + "epoch": 19.59, + "grad_norm": 1.6194974184036255, + "learning_rate": 6.531658291457287e-06, + "loss": 0.0012, + "step": 35025 + }, + { + "epoch": 19.6, + "grad_norm": 1.2685402631759644, + "learning_rate": 6.529145728643217e-06, + "loss": 0.0013, + "step": 35050 + }, + { + "epoch": 19.62, + "grad_norm": 0.20931394398212433, + "learning_rate": 6.5266331658291456e-06, + "loss": 0.0012, + "step": 35075 + }, + { + "epoch": 19.63, + "grad_norm": 1.2172448635101318, + "learning_rate": 6.524120603015076e-06, + "loss": 0.0014, + "step": 35100 + }, + { + "epoch": 19.64, + "grad_norm": 2.016782522201538, + "learning_rate": 6.521608040201005e-06, + "loss": 0.0015, + "step": 35125 + }, + { + "epoch": 19.66, + "grad_norm": 1.5451335906982422, + "learning_rate": 6.519095477386935e-06, + "loss": 0.0016, + "step": 35150 + }, + { + "epoch": 19.67, + "grad_norm": 0.6774836182594299, + "learning_rate": 6.516582914572865e-06, + "loss": 0.0016, + "step": 35175 + }, + { + "epoch": 19.69, + "grad_norm": 0.32344770431518555, + "learning_rate": 6.514070351758795e-06, + "loss": 0.0017, + "step": 35200 + }, + { + "epoch": 19.7, + "grad_norm": 0.292594313621521, + "learning_rate": 6.511557788944725e-06, + "loss": 0.0013, + "step": 35225 + }, + { + "epoch": 19.71, + "grad_norm": 0.3792760372161865, + "learning_rate": 6.509045226130653e-06, + "loss": 0.0017, + "step": 35250 + }, + { + "epoch": 19.73, + "grad_norm": 1.5811372995376587, + "learning_rate": 6.5065326633165835e-06, + "loss": 0.0015, + "step": 35275 + }, + { + "epoch": 19.74, + "grad_norm": 1.0945768356323242, + "learning_rate": 6.504020100502513e-06, + "loss": 0.0015, + "step": 35300 + }, + { + "epoch": 19.76, + "grad_norm": 1.1204854249954224, + "learning_rate": 6.501507537688443e-06, + "loss": 0.0018, + "step": 35325 + }, + { + "epoch": 19.77, + "grad_norm": 0.4627445638179779, + "learning_rate": 6.4989949748743715e-06, + "loss": 0.0014, + "step": 35350 + }, + { + "epoch": 19.78, + "grad_norm": 0.38483551144599915, + "learning_rate": 6.496482412060302e-06, + "loss": 0.0015, + "step": 35375 + }, + { + "epoch": 19.8, + "grad_norm": 0.21439413726329803, + "learning_rate": 6.493969849246232e-06, + "loss": 0.0016, + "step": 35400 + }, + { + "epoch": 19.81, + "grad_norm": 0.7108317017555237, + "learning_rate": 6.491457286432161e-06, + "loss": 0.0015, + "step": 35425 + }, + { + "epoch": 19.83, + "grad_norm": 1.0045725107192993, + "learning_rate": 6.488944723618091e-06, + "loss": 0.0016, + "step": 35450 + }, + { + "epoch": 19.84, + "grad_norm": 0.7934654951095581, + "learning_rate": 6.486432160804021e-06, + "loss": 0.0011, + "step": 35475 + }, + { + "epoch": 19.85, + "grad_norm": 0.39619210362434387, + "learning_rate": 6.483919597989951e-06, + "loss": 0.0011, + "step": 35500 + }, + { + "epoch": 19.87, + "grad_norm": 0.98689204454422, + "learning_rate": 6.481407035175879e-06, + "loss": 0.0009, + "step": 35525 + }, + { + "epoch": 19.88, + "grad_norm": 0.7024122476577759, + "learning_rate": 6.4788944723618095e-06, + "loss": 0.0012, + "step": 35550 + }, + { + "epoch": 19.9, + "grad_norm": 0.9824861884117126, + "learning_rate": 6.476381909547739e-06, + "loss": 0.0016, + "step": 35575 + }, + { + "epoch": 19.91, + "grad_norm": 0.8765001893043518, + "learning_rate": 6.473869346733669e-06, + "loss": 0.0012, + "step": 35600 + }, + { + "epoch": 19.92, + "grad_norm": 0.8700053095817566, + "learning_rate": 6.471356783919599e-06, + "loss": 0.0016, + "step": 35625 + }, + { + "epoch": 19.94, + "grad_norm": 1.113398551940918, + "learning_rate": 6.468844221105528e-06, + "loss": 0.0013, + "step": 35650 + }, + { + "epoch": 19.95, + "grad_norm": 2.339182138442993, + "learning_rate": 6.466331658291458e-06, + "loss": 0.0012, + "step": 35675 + }, + { + "epoch": 19.97, + "grad_norm": 0.40886229276657104, + "learning_rate": 6.463819095477387e-06, + "loss": 0.0012, + "step": 35700 + }, + { + "epoch": 19.98, + "grad_norm": 0.17570988833904266, + "learning_rate": 6.461306532663317e-06, + "loss": 0.0014, + "step": 35725 + }, + { + "epoch": 19.99, + "grad_norm": 0.6729360222816467, + "learning_rate": 6.458793969849247e-06, + "loss": 0.0016, + "step": 35750 + }, + { + "epoch": 20.01, + "grad_norm": 0.8948715329170227, + "learning_rate": 6.456281407035177e-06, + "loss": 0.0012, + "step": 35775 + }, + { + "epoch": 20.02, + "grad_norm": 0.4410959482192993, + "learning_rate": 6.453768844221107e-06, + "loss": 0.0012, + "step": 35800 + }, + { + "epoch": 20.04, + "grad_norm": 0.878842294216156, + "learning_rate": 6.451256281407035e-06, + "loss": 0.001, + "step": 35825 + }, + { + "epoch": 20.05, + "grad_norm": 1.9205482006072998, + "learning_rate": 6.448743718592966e-06, + "loss": 0.0011, + "step": 35850 + }, + { + "epoch": 20.06, + "grad_norm": 0.08941115438938141, + "learning_rate": 6.446231155778895e-06, + "loss": 0.0009, + "step": 35875 + }, + { + "epoch": 20.08, + "grad_norm": 0.6014299392700195, + "learning_rate": 6.443718592964825e-06, + "loss": 0.0008, + "step": 35900 + }, + { + "epoch": 20.09, + "grad_norm": 1.1500434875488281, + "learning_rate": 6.4412060301507536e-06, + "loss": 0.0008, + "step": 35925 + }, + { + "epoch": 20.11, + "grad_norm": 0.5007115006446838, + "learning_rate": 6.438693467336684e-06, + "loss": 0.0009, + "step": 35950 + }, + { + "epoch": 20.12, + "grad_norm": 0.38675054907798767, + "learning_rate": 6.436180904522613e-06, + "loss": 0.0008, + "step": 35975 + }, + { + "epoch": 20.13, + "grad_norm": 0.33463147282600403, + "learning_rate": 6.433668341708543e-06, + "loss": 0.0009, + "step": 36000 + }, + { + "epoch": 20.13, + "eval_loss": 0.2137802690267563, + "eval_runtime": 749.1081, + "eval_samples_per_second": 2.054, + "eval_steps_per_second": 2.054, + "eval_wer": 15.176484338475923, + "step": 36000 + }, + { + "epoch": 20.15, + "grad_norm": 1.1036505699157715, + "learning_rate": 6.431155778894473e-06, + "loss": 0.001, + "step": 36025 + }, + { + "epoch": 20.16, + "grad_norm": 0.2192777693271637, + "learning_rate": 6.428643216080403e-06, + "loss": 0.0009, + "step": 36050 + }, + { + "epoch": 20.18, + "grad_norm": 0.430225670337677, + "learning_rate": 6.426130653266333e-06, + "loss": 0.0009, + "step": 36075 + }, + { + "epoch": 20.19, + "grad_norm": 0.47257208824157715, + "learning_rate": 6.423618090452261e-06, + "loss": 0.0008, + "step": 36100 + }, + { + "epoch": 20.2, + "grad_norm": 0.08323296904563904, + "learning_rate": 6.4211055276381915e-06, + "loss": 0.0006, + "step": 36125 + }, + { + "epoch": 20.22, + "grad_norm": 0.8010158538818359, + "learning_rate": 6.418592964824121e-06, + "loss": 0.0009, + "step": 36150 + }, + { + "epoch": 20.23, + "grad_norm": 0.23060384392738342, + "learning_rate": 6.416080402010051e-06, + "loss": 0.0007, + "step": 36175 + }, + { + "epoch": 20.25, + "grad_norm": 0.3528291881084442, + "learning_rate": 6.4135678391959795e-06, + "loss": 0.001, + "step": 36200 + }, + { + "epoch": 20.26, + "grad_norm": 0.4060511291027069, + "learning_rate": 6.41105527638191e-06, + "loss": 0.0008, + "step": 36225 + }, + { + "epoch": 20.27, + "grad_norm": 0.3226715624332428, + "learning_rate": 6.40854271356784e-06, + "loss": 0.001, + "step": 36250 + }, + { + "epoch": 20.29, + "grad_norm": 0.3287621736526489, + "learning_rate": 6.406030150753769e-06, + "loss": 0.0007, + "step": 36275 + }, + { + "epoch": 20.3, + "grad_norm": 0.5851756930351257, + "learning_rate": 6.403517587939699e-06, + "loss": 0.0008, + "step": 36300 + }, + { + "epoch": 20.32, + "grad_norm": 0.28493577241897583, + "learning_rate": 6.401005025125629e-06, + "loss": 0.0009, + "step": 36325 + }, + { + "epoch": 20.33, + "grad_norm": 0.2753860652446747, + "learning_rate": 6.398492462311559e-06, + "loss": 0.0008, + "step": 36350 + }, + { + "epoch": 20.34, + "grad_norm": 0.6044716835021973, + "learning_rate": 6.395979899497487e-06, + "loss": 0.0012, + "step": 36375 + }, + { + "epoch": 20.36, + "grad_norm": 0.9337707161903381, + "learning_rate": 6.3934673366834175e-06, + "loss": 0.0009, + "step": 36400 + }, + { + "epoch": 20.37, + "grad_norm": 0.28234073519706726, + "learning_rate": 6.390954773869347e-06, + "loss": 0.0012, + "step": 36425 + }, + { + "epoch": 20.39, + "grad_norm": 0.4068894684314728, + "learning_rate": 6.388442211055277e-06, + "loss": 0.0011, + "step": 36450 + }, + { + "epoch": 20.4, + "grad_norm": 0.6446384787559509, + "learning_rate": 6.385929648241207e-06, + "loss": 0.0008, + "step": 36475 + }, + { + "epoch": 20.41, + "grad_norm": 0.6603647470474243, + "learning_rate": 6.383417085427136e-06, + "loss": 0.0009, + "step": 36500 + }, + { + "epoch": 20.43, + "grad_norm": 0.4054325520992279, + "learning_rate": 6.380904522613066e-06, + "loss": 0.001, + "step": 36525 + }, + { + "epoch": 20.44, + "grad_norm": 0.3088557720184326, + "learning_rate": 6.378391959798995e-06, + "loss": 0.0012, + "step": 36550 + }, + { + "epoch": 20.46, + "grad_norm": 0.7073306441307068, + "learning_rate": 6.375879396984925e-06, + "loss": 0.0015, + "step": 36575 + }, + { + "epoch": 20.47, + "grad_norm": 0.9050595164299011, + "learning_rate": 6.373366834170855e-06, + "loss": 0.0015, + "step": 36600 + }, + { + "epoch": 20.48, + "grad_norm": 0.8496512770652771, + "learning_rate": 6.370854271356785e-06, + "loss": 0.0012, + "step": 36625 + }, + { + "epoch": 20.5, + "grad_norm": 0.5716794729232788, + "learning_rate": 6.368341708542715e-06, + "loss": 0.0013, + "step": 36650 + }, + { + "epoch": 20.51, + "grad_norm": 0.509078323841095, + "learning_rate": 6.365829145728643e-06, + "loss": 0.0011, + "step": 36675 + }, + { + "epoch": 20.53, + "grad_norm": 0.29505252838134766, + "learning_rate": 6.363316582914574e-06, + "loss": 0.0009, + "step": 36700 + }, + { + "epoch": 20.54, + "grad_norm": 0.6264859437942505, + "learning_rate": 6.360804020100503e-06, + "loss": 0.0012, + "step": 36725 + }, + { + "epoch": 20.55, + "grad_norm": 0.9035800695419312, + "learning_rate": 6.358291457286433e-06, + "loss": 0.0014, + "step": 36750 + }, + { + "epoch": 20.57, + "grad_norm": 0.5693862438201904, + "learning_rate": 6.3557788944723616e-06, + "loss": 0.0013, + "step": 36775 + }, + { + "epoch": 20.58, + "grad_norm": 1.1559367179870605, + "learning_rate": 6.353266331658292e-06, + "loss": 0.001, + "step": 36800 + }, + { + "epoch": 20.6, + "grad_norm": 1.1151930093765259, + "learning_rate": 6.350753768844221e-06, + "loss": 0.0012, + "step": 36825 + }, + { + "epoch": 20.61, + "grad_norm": 1.119670033454895, + "learning_rate": 6.348241206030151e-06, + "loss": 0.0012, + "step": 36850 + }, + { + "epoch": 20.62, + "grad_norm": 0.38768938183784485, + "learning_rate": 6.345728643216081e-06, + "loss": 0.0011, + "step": 36875 + }, + { + "epoch": 20.64, + "grad_norm": 0.3257204294204712, + "learning_rate": 6.343216080402011e-06, + "loss": 0.001, + "step": 36900 + }, + { + "epoch": 20.65, + "grad_norm": 0.6941291093826294, + "learning_rate": 6.340703517587941e-06, + "loss": 0.0012, + "step": 36925 + }, + { + "epoch": 20.67, + "grad_norm": 2.5032589435577393, + "learning_rate": 6.338190954773869e-06, + "loss": 0.0015, + "step": 36950 + }, + { + "epoch": 20.68, + "grad_norm": 0.2462470531463623, + "learning_rate": 6.3356783919597995e-06, + "loss": 0.0011, + "step": 36975 + }, + { + "epoch": 20.69, + "grad_norm": 0.7149875164031982, + "learning_rate": 6.333165829145729e-06, + "loss": 0.0014, + "step": 37000 + }, + { + "epoch": 20.69, + "eval_loss": 0.21927934885025024, + "eval_runtime": 755.0363, + "eval_samples_per_second": 2.038, + "eval_steps_per_second": 2.038, + "eval_wer": 15.194015895278168, + "step": 37000 + }, + { + "epoch": 20.71, + "grad_norm": 0.6852760314941406, + "learning_rate": 6.330653266331659e-06, + "loss": 0.0012, + "step": 37025 + }, + { + "epoch": 20.72, + "grad_norm": 1.5134037733078003, + "learning_rate": 6.3281407035175875e-06, + "loss": 0.0013, + "step": 37050 + }, + { + "epoch": 20.74, + "grad_norm": 2.1262474060058594, + "learning_rate": 6.325628140703518e-06, + "loss": 0.0016, + "step": 37075 + }, + { + "epoch": 20.75, + "grad_norm": 1.0946928262710571, + "learning_rate": 6.323115577889448e-06, + "loss": 0.0015, + "step": 37100 + }, + { + "epoch": 20.76, + "grad_norm": 0.3361787497997284, + "learning_rate": 6.320603015075377e-06, + "loss": 0.0015, + "step": 37125 + }, + { + "epoch": 20.78, + "grad_norm": 0.5213021636009216, + "learning_rate": 6.318090452261307e-06, + "loss": 0.0014, + "step": 37150 + }, + { + "epoch": 20.79, + "grad_norm": 0.2807977795600891, + "learning_rate": 6.315577889447237e-06, + "loss": 0.0018, + "step": 37175 + }, + { + "epoch": 20.81, + "grad_norm": 1.0842852592468262, + "learning_rate": 6.313065326633167e-06, + "loss": 0.0013, + "step": 37200 + }, + { + "epoch": 20.82, + "grad_norm": 1.5291721820831299, + "learning_rate": 6.310552763819095e-06, + "loss": 0.0015, + "step": 37225 + }, + { + "epoch": 20.83, + "grad_norm": 0.6190590858459473, + "learning_rate": 6.3080402010050255e-06, + "loss": 0.0012, + "step": 37250 + }, + { + "epoch": 20.85, + "grad_norm": 0.4261666536331177, + "learning_rate": 6.305527638190956e-06, + "loss": 0.0012, + "step": 37275 + }, + { + "epoch": 20.86, + "grad_norm": 0.7875332236289978, + "learning_rate": 6.303015075376885e-06, + "loss": 0.0012, + "step": 37300 + }, + { + "epoch": 20.88, + "grad_norm": 1.1459020376205444, + "learning_rate": 6.300502512562815e-06, + "loss": 0.0013, + "step": 37325 + }, + { + "epoch": 20.89, + "grad_norm": 0.9221647381782532, + "learning_rate": 6.297989949748744e-06, + "loss": 0.0013, + "step": 37350 + }, + { + "epoch": 20.9, + "grad_norm": 1.0424515008926392, + "learning_rate": 6.295477386934674e-06, + "loss": 0.0014, + "step": 37375 + }, + { + "epoch": 20.92, + "grad_norm": 1.292209267616272, + "learning_rate": 6.292964824120603e-06, + "loss": 0.0015, + "step": 37400 + }, + { + "epoch": 20.93, + "grad_norm": 0.9286717772483826, + "learning_rate": 6.290452261306533e-06, + "loss": 0.0014, + "step": 37425 + }, + { + "epoch": 20.95, + "grad_norm": 1.8094725608825684, + "learning_rate": 6.287939698492463e-06, + "loss": 0.0015, + "step": 37450 + }, + { + "epoch": 20.96, + "grad_norm": 0.7906381487846375, + "learning_rate": 6.285427135678393e-06, + "loss": 0.0012, + "step": 37475 + }, + { + "epoch": 20.97, + "grad_norm": 1.0112146139144897, + "learning_rate": 6.282914572864323e-06, + "loss": 0.0011, + "step": 37500 + }, + { + "epoch": 20.99, + "grad_norm": 0.3802068531513214, + "learning_rate": 6.280402010050251e-06, + "loss": 0.0009, + "step": 37525 + }, + { + "epoch": 21.0, + "grad_norm": 1.0410456657409668, + "learning_rate": 6.277889447236182e-06, + "loss": 0.0012, + "step": 37550 + }, + { + "epoch": 21.02, + "grad_norm": 0.53364098072052, + "learning_rate": 6.275477386934674e-06, + "loss": 0.001, + "step": 37575 + }, + { + "epoch": 21.03, + "grad_norm": 0.3212553858757019, + "learning_rate": 6.272964824120603e-06, + "loss": 0.0007, + "step": 37600 + }, + { + "epoch": 21.04, + "grad_norm": 0.5032297372817993, + "learning_rate": 6.2704522613065335e-06, + "loss": 0.0007, + "step": 37625 + }, + { + "epoch": 21.06, + "grad_norm": 0.20742608606815338, + "learning_rate": 6.267939698492462e-06, + "loss": 0.0007, + "step": 37650 + }, + { + "epoch": 21.07, + "grad_norm": 0.34225302934646606, + "learning_rate": 6.265427135678392e-06, + "loss": 0.0006, + "step": 37675 + }, + { + "epoch": 21.09, + "grad_norm": 0.5598475337028503, + "learning_rate": 6.262914572864322e-06, + "loss": 0.0007, + "step": 37700 + }, + { + "epoch": 21.1, + "grad_norm": 0.13691556453704834, + "learning_rate": 6.260402010050252e-06, + "loss": 0.0007, + "step": 37725 + }, + { + "epoch": 21.11, + "grad_norm": 0.6689016222953796, + "learning_rate": 6.257889447236182e-06, + "loss": 0.0006, + "step": 37750 + }, + { + "epoch": 21.13, + "grad_norm": 1.2237187623977661, + "learning_rate": 6.255376884422111e-06, + "loss": 0.0007, + "step": 37775 + }, + { + "epoch": 21.14, + "grad_norm": 0.4034348726272583, + "learning_rate": 6.252864321608041e-06, + "loss": 0.0011, + "step": 37800 + }, + { + "epoch": 21.15, + "grad_norm": 0.3181818425655365, + "learning_rate": 6.25035175879397e-06, + "loss": 0.0009, + "step": 37825 + }, + { + "epoch": 21.17, + "grad_norm": 0.7661883234977722, + "learning_rate": 6.2478391959799e-06, + "loss": 0.0008, + "step": 37850 + }, + { + "epoch": 21.18, + "grad_norm": 0.11268677562475204, + "learning_rate": 6.24532663316583e-06, + "loss": 0.0011, + "step": 37875 + }, + { + "epoch": 21.2, + "grad_norm": 0.8971688151359558, + "learning_rate": 6.242814070351759e-06, + "loss": 0.0011, + "step": 37900 + }, + { + "epoch": 21.21, + "grad_norm": 0.589178204536438, + "learning_rate": 6.24030150753769e-06, + "loss": 0.0008, + "step": 37925 + }, + { + "epoch": 21.22, + "grad_norm": 1.8504348993301392, + "learning_rate": 6.237788944723618e-06, + "loss": 0.0011, + "step": 37950 + }, + { + "epoch": 21.24, + "grad_norm": 0.2898845076560974, + "learning_rate": 6.235276381909548e-06, + "loss": 0.001, + "step": 37975 + }, + { + "epoch": 21.25, + "grad_norm": 0.6730803847312927, + "learning_rate": 6.2327638190954776e-06, + "loss": 0.0007, + "step": 38000 + }, + { + "epoch": 21.25, + "eval_loss": 0.22080941498279572, + "eval_runtime": 748.2697, + "eval_samples_per_second": 2.057, + "eval_steps_per_second": 2.057, + "eval_wer": 15.234922861150071, + "step": 38000 + }, + { + "epoch": 21.27, + "grad_norm": 0.8860182762145996, + "learning_rate": 6.230251256281408e-06, + "loss": 0.0009, + "step": 38025 + }, + { + "epoch": 21.28, + "grad_norm": 0.4529080092906952, + "learning_rate": 6.227738693467337e-06, + "loss": 0.0009, + "step": 38050 + }, + { + "epoch": 21.29, + "grad_norm": 0.6693102717399597, + "learning_rate": 6.225226130653267e-06, + "loss": 0.001, + "step": 38075 + }, + { + "epoch": 21.31, + "grad_norm": 1.7307766675949097, + "learning_rate": 6.222713567839197e-06, + "loss": 0.0008, + "step": 38100 + }, + { + "epoch": 21.32, + "grad_norm": 0.4575481712818146, + "learning_rate": 6.220201005025126e-06, + "loss": 0.001, + "step": 38125 + }, + { + "epoch": 21.34, + "grad_norm": 0.16125130653381348, + "learning_rate": 6.217688442211056e-06, + "loss": 0.0008, + "step": 38150 + }, + { + "epoch": 21.35, + "grad_norm": 1.795700192451477, + "learning_rate": 6.215175879396985e-06, + "loss": 0.0008, + "step": 38175 + }, + { + "epoch": 21.36, + "grad_norm": 0.5810615420341492, + "learning_rate": 6.2126633165829155e-06, + "loss": 0.0011, + "step": 38200 + }, + { + "epoch": 21.38, + "grad_norm": 0.6461288928985596, + "learning_rate": 6.210150753768844e-06, + "loss": 0.0016, + "step": 38225 + }, + { + "epoch": 21.39, + "grad_norm": 0.8849642276763916, + "learning_rate": 6.207638190954774e-06, + "loss": 0.0011, + "step": 38250 + }, + { + "epoch": 21.41, + "grad_norm": 1.4438166618347168, + "learning_rate": 6.2051256281407035e-06, + "loss": 0.0012, + "step": 38275 + }, + { + "epoch": 21.42, + "grad_norm": 0.30240166187286377, + "learning_rate": 6.202613065326634e-06, + "loss": 0.0013, + "step": 38300 + }, + { + "epoch": 21.43, + "grad_norm": 0.2977248430252075, + "learning_rate": 6.200100502512564e-06, + "loss": 0.0013, + "step": 38325 + }, + { + "epoch": 21.45, + "grad_norm": 0.975920557975769, + "learning_rate": 6.197587939698493e-06, + "loss": 0.0011, + "step": 38350 + }, + { + "epoch": 21.46, + "grad_norm": 1.2568304538726807, + "learning_rate": 6.195075376884423e-06, + "loss": 0.0012, + "step": 38375 + }, + { + "epoch": 21.48, + "grad_norm": 0.06168259307742119, + "learning_rate": 6.192562814070352e-06, + "loss": 0.0011, + "step": 38400 + }, + { + "epoch": 21.49, + "grad_norm": 0.40123751759529114, + "learning_rate": 6.190050251256282e-06, + "loss": 0.0011, + "step": 38425 + }, + { + "epoch": 21.5, + "grad_norm": 0.7271655201911926, + "learning_rate": 6.187537688442211e-06, + "loss": 0.0011, + "step": 38450 + }, + { + "epoch": 21.52, + "grad_norm": 0.3118450939655304, + "learning_rate": 6.1850251256281415e-06, + "loss": 0.0012, + "step": 38475 + }, + { + "epoch": 21.53, + "grad_norm": 0.4779285192489624, + "learning_rate": 6.18251256281407e-06, + "loss": 0.001, + "step": 38500 + }, + { + "epoch": 21.55, + "grad_norm": 0.6722844839096069, + "learning_rate": 6.18e-06, + "loss": 0.0013, + "step": 38525 + }, + { + "epoch": 21.56, + "grad_norm": 0.45952871441841125, + "learning_rate": 6.17748743718593e-06, + "loss": 0.0011, + "step": 38550 + }, + { + "epoch": 21.57, + "grad_norm": 0.17184565961360931, + "learning_rate": 6.17497487437186e-06, + "loss": 0.0011, + "step": 38575 + }, + { + "epoch": 21.59, + "grad_norm": 1.4723483324050903, + "learning_rate": 6.17246231155779e-06, + "loss": 0.001, + "step": 38600 + }, + { + "epoch": 21.6, + "grad_norm": 0.30593568086624146, + "learning_rate": 6.169949748743719e-06, + "loss": 0.0009, + "step": 38625 + }, + { + "epoch": 21.62, + "grad_norm": 0.2659848928451538, + "learning_rate": 6.167437185929649e-06, + "loss": 0.0007, + "step": 38650 + }, + { + "epoch": 21.63, + "grad_norm": 0.17490684986114502, + "learning_rate": 6.164924623115578e-06, + "loss": 0.0007, + "step": 38675 + }, + { + "epoch": 21.64, + "grad_norm": 0.40411970019340515, + "learning_rate": 6.162412060301508e-06, + "loss": 0.0008, + "step": 38700 + }, + { + "epoch": 21.66, + "grad_norm": 0.537494421005249, + "learning_rate": 6.159899497487438e-06, + "loss": 0.001, + "step": 38725 + }, + { + "epoch": 21.67, + "grad_norm": 1.4303185939788818, + "learning_rate": 6.157386934673367e-06, + "loss": 0.001, + "step": 38750 + }, + { + "epoch": 21.69, + "grad_norm": 0.5504719018936157, + "learning_rate": 6.154874371859298e-06, + "loss": 0.001, + "step": 38775 + }, + { + "epoch": 21.7, + "grad_norm": 0.6407922506332397, + "learning_rate": 6.152361809045226e-06, + "loss": 0.0011, + "step": 38800 + }, + { + "epoch": 21.71, + "grad_norm": 0.6178415417671204, + "learning_rate": 6.149849246231156e-06, + "loss": 0.001, + "step": 38825 + }, + { + "epoch": 21.73, + "grad_norm": 0.7516167759895325, + "learning_rate": 6.1473366834170856e-06, + "loss": 0.0013, + "step": 38850 + }, + { + "epoch": 21.74, + "grad_norm": 0.6634389758110046, + "learning_rate": 6.144824120603016e-06, + "loss": 0.0009, + "step": 38875 + }, + { + "epoch": 21.76, + "grad_norm": 0.5878238081932068, + "learning_rate": 6.142311557788945e-06, + "loss": 0.0011, + "step": 38900 + }, + { + "epoch": 21.77, + "grad_norm": 0.44687119126319885, + "learning_rate": 6.139798994974875e-06, + "loss": 0.0012, + "step": 38925 + }, + { + "epoch": 21.78, + "grad_norm": 0.4877602159976959, + "learning_rate": 6.137286432160805e-06, + "loss": 0.0012, + "step": 38950 + }, + { + "epoch": 21.8, + "grad_norm": 0.9401794075965881, + "learning_rate": 6.134773869346734e-06, + "loss": 0.0013, + "step": 38975 + }, + { + "epoch": 21.81, + "grad_norm": 0.8687130212783813, + "learning_rate": 6.132261306532664e-06, + "loss": 0.0015, + "step": 39000 + }, + { + "epoch": 21.81, + "eval_loss": 0.22277826070785522, + "eval_runtime": 755.1346, + "eval_samples_per_second": 2.038, + "eval_steps_per_second": 2.038, + "eval_wer": 15.462833099579242, + "step": 39000 + }, + { + "epoch": 21.83, + "grad_norm": 1.4917749166488647, + "learning_rate": 6.129748743718593e-06, + "loss": 0.0015, + "step": 39025 + }, + { + "epoch": 21.84, + "grad_norm": 0.41239655017852783, + "learning_rate": 6.1272361809045235e-06, + "loss": 0.001, + "step": 39050 + }, + { + "epoch": 21.85, + "grad_norm": 0.7275034785270691, + "learning_rate": 6.124723618090452e-06, + "loss": 0.0013, + "step": 39075 + }, + { + "epoch": 21.87, + "grad_norm": 1.5411518812179565, + "learning_rate": 6.122211055276382e-06, + "loss": 0.0016, + "step": 39100 + }, + { + "epoch": 21.88, + "grad_norm": 0.35557830333709717, + "learning_rate": 6.1196984924623115e-06, + "loss": 0.0009, + "step": 39125 + }, + { + "epoch": 21.9, + "grad_norm": 0.5089533925056458, + "learning_rate": 6.117185929648242e-06, + "loss": 0.0013, + "step": 39150 + }, + { + "epoch": 21.91, + "grad_norm": 0.12480711191892624, + "learning_rate": 6.114673366834172e-06, + "loss": 0.0011, + "step": 39175 + }, + { + "epoch": 21.92, + "grad_norm": 0.8617958426475525, + "learning_rate": 6.112160804020101e-06, + "loss": 0.0009, + "step": 39200 + }, + { + "epoch": 21.94, + "grad_norm": 1.6151044368743896, + "learning_rate": 6.109648241206031e-06, + "loss": 0.0014, + "step": 39225 + }, + { + "epoch": 21.95, + "grad_norm": 0.7569000720977783, + "learning_rate": 6.10713567839196e-06, + "loss": 0.0012, + "step": 39250 + }, + { + "epoch": 21.97, + "grad_norm": 0.6402112245559692, + "learning_rate": 6.10462311557789e-06, + "loss": 0.0011, + "step": 39275 + }, + { + "epoch": 21.98, + "grad_norm": 2.0237255096435547, + "learning_rate": 6.102110552763819e-06, + "loss": 0.0012, + "step": 39300 + }, + { + "epoch": 21.99, + "grad_norm": 1.0305166244506836, + "learning_rate": 6.0995979899497495e-06, + "loss": 0.0012, + "step": 39325 + }, + { + "epoch": 22.01, + "grad_norm": 0.3617115318775177, + "learning_rate": 6.09708542713568e-06, + "loss": 0.001, + "step": 39350 + }, + { + "epoch": 22.02, + "grad_norm": 0.603792130947113, + "learning_rate": 6.094572864321608e-06, + "loss": 0.0007, + "step": 39375 + }, + { + "epoch": 22.04, + "grad_norm": 1.3871476650238037, + "learning_rate": 6.092060301507538e-06, + "loss": 0.0011, + "step": 39400 + }, + { + "epoch": 22.05, + "grad_norm": 0.8533299565315247, + "learning_rate": 6.089547738693468e-06, + "loss": 0.0007, + "step": 39425 + }, + { + "epoch": 22.06, + "grad_norm": 0.27363818883895874, + "learning_rate": 6.087035175879398e-06, + "loss": 0.0007, + "step": 39450 + }, + { + "epoch": 22.08, + "grad_norm": 0.7629588842391968, + "learning_rate": 6.084522613065327e-06, + "loss": 0.0007, + "step": 39475 + }, + { + "epoch": 22.09, + "grad_norm": 0.16893883049488068, + "learning_rate": 6.082010050251257e-06, + "loss": 0.0007, + "step": 39500 + }, + { + "epoch": 22.11, + "grad_norm": 0.2457587718963623, + "learning_rate": 6.079497487437186e-06, + "loss": 0.0008, + "step": 39525 + }, + { + "epoch": 22.12, + "grad_norm": 0.3908367156982422, + "learning_rate": 6.076984924623116e-06, + "loss": 0.0006, + "step": 39550 + }, + { + "epoch": 22.13, + "grad_norm": 1.2105603218078613, + "learning_rate": 6.074472361809046e-06, + "loss": 0.0005, + "step": 39575 + }, + { + "epoch": 22.15, + "grad_norm": 0.46513453125953674, + "learning_rate": 6.071959798994975e-06, + "loss": 0.0007, + "step": 39600 + }, + { + "epoch": 22.16, + "grad_norm": 0.5781392455101013, + "learning_rate": 6.069447236180906e-06, + "loss": 0.0008, + "step": 39625 + }, + { + "epoch": 22.18, + "grad_norm": 0.21944007277488708, + "learning_rate": 6.066934673366834e-06, + "loss": 0.0006, + "step": 39650 + }, + { + "epoch": 22.19, + "grad_norm": 0.7348284721374512, + "learning_rate": 6.064422110552764e-06, + "loss": 0.0006, + "step": 39675 + }, + { + "epoch": 22.2, + "grad_norm": 2.7276599407196045, + "learning_rate": 6.0619095477386936e-06, + "loss": 0.0007, + "step": 39700 + }, + { + "epoch": 22.22, + "grad_norm": 0.30620241165161133, + "learning_rate": 6.059396984924624e-06, + "loss": 0.0008, + "step": 39725 + }, + { + "epoch": 22.23, + "grad_norm": 0.3266680836677551, + "learning_rate": 6.056884422110553e-06, + "loss": 0.0009, + "step": 39750 + }, + { + "epoch": 22.25, + "grad_norm": 0.43737247586250305, + "learning_rate": 6.054371859296483e-06, + "loss": 0.0005, + "step": 39775 + }, + { + "epoch": 22.26, + "grad_norm": 2.450934410095215, + "learning_rate": 6.051859296482413e-06, + "loss": 0.0007, + "step": 39800 + }, + { + "epoch": 22.27, + "grad_norm": 0.5663560032844543, + "learning_rate": 6.049346733668342e-06, + "loss": 0.001, + "step": 39825 + }, + { + "epoch": 22.29, + "grad_norm": 0.12583227455615997, + "learning_rate": 6.046934673366834e-06, + "loss": 0.0009, + "step": 39850 + }, + { + "epoch": 22.3, + "grad_norm": 0.17088620364665985, + "learning_rate": 6.0444221105527644e-06, + "loss": 0.0006, + "step": 39875 + }, + { + "epoch": 22.32, + "grad_norm": 1.4772939682006836, + "learning_rate": 6.041909547738694e-06, + "loss": 0.0007, + "step": 39900 + }, + { + "epoch": 22.33, + "grad_norm": 0.9859763979911804, + "learning_rate": 6.039396984924624e-06, + "loss": 0.0006, + "step": 39925 + }, + { + "epoch": 22.34, + "grad_norm": 0.8843104243278503, + "learning_rate": 6.036884422110554e-06, + "loss": 0.0007, + "step": 39950 + }, + { + "epoch": 22.36, + "grad_norm": 0.9143196940422058, + "learning_rate": 6.0343718592964826e-06, + "loss": 0.0006, + "step": 39975 + }, + { + "epoch": 22.37, + "grad_norm": 0.2987785041332245, + "learning_rate": 6.031859296482413e-06, + "loss": 0.0008, + "step": 40000 + }, + { + "epoch": 22.37, + "eval_loss": 0.2264074683189392, + "eval_runtime": 756.3354, + "eval_samples_per_second": 2.035, + "eval_steps_per_second": 2.035, + "eval_wer": 15.404394576905094, + "step": 40000 + }, + { + "epoch": 22.39, + "grad_norm": 0.36540213227272034, + "learning_rate": 6.029346733668342e-06, + "loss": 0.0008, + "step": 40025 + }, + { + "epoch": 22.4, + "grad_norm": 0.9223921895027161, + "learning_rate": 6.026834170854272e-06, + "loss": 0.0009, + "step": 40050 + }, + { + "epoch": 22.41, + "grad_norm": 1.0681039094924927, + "learning_rate": 6.0243216080402016e-06, + "loss": 0.0009, + "step": 40075 + }, + { + "epoch": 22.43, + "grad_norm": 0.4613714814186096, + "learning_rate": 6.021809045226131e-06, + "loss": 0.0008, + "step": 40100 + }, + { + "epoch": 22.44, + "grad_norm": 1.7767751216888428, + "learning_rate": 6.01929648241206e-06, + "loss": 0.001, + "step": 40125 + }, + { + "epoch": 22.46, + "grad_norm": 1.2436317205429077, + "learning_rate": 6.01678391959799e-06, + "loss": 0.0012, + "step": 40150 + }, + { + "epoch": 22.47, + "grad_norm": 0.7894191741943359, + "learning_rate": 6.0142713567839205e-06, + "loss": 0.0009, + "step": 40175 + }, + { + "epoch": 22.48, + "grad_norm": 1.0412967205047607, + "learning_rate": 6.01175879396985e-06, + "loss": 0.0008, + "step": 40200 + }, + { + "epoch": 22.5, + "grad_norm": 0.6890994906425476, + "learning_rate": 6.00924623115578e-06, + "loss": 0.0008, + "step": 40225 + }, + { + "epoch": 22.51, + "grad_norm": 0.9242078065872192, + "learning_rate": 6.0067336683417085e-06, + "loss": 0.0009, + "step": 40250 + }, + { + "epoch": 22.53, + "grad_norm": 0.36296001076698303, + "learning_rate": 6.004221105527639e-06, + "loss": 0.0015, + "step": 40275 + }, + { + "epoch": 22.54, + "grad_norm": 0.4141708016395569, + "learning_rate": 6.001708542713568e-06, + "loss": 0.0009, + "step": 40300 + }, + { + "epoch": 22.55, + "grad_norm": 2.082895040512085, + "learning_rate": 5.999195979899498e-06, + "loss": 0.0009, + "step": 40325 + }, + { + "epoch": 22.57, + "grad_norm": 0.11350537836551666, + "learning_rate": 5.9966834170854275e-06, + "loss": 0.0008, + "step": 40350 + }, + { + "epoch": 22.58, + "grad_norm": 0.21093903481960297, + "learning_rate": 5.994170854271358e-06, + "loss": 0.0007, + "step": 40375 + }, + { + "epoch": 22.6, + "grad_norm": 0.27272096276283264, + "learning_rate": 5.991658291457287e-06, + "loss": 0.0008, + "step": 40400 + }, + { + "epoch": 22.61, + "grad_norm": 0.39380961656570435, + "learning_rate": 5.989145728643216e-06, + "loss": 0.0006, + "step": 40425 + }, + { + "epoch": 22.62, + "grad_norm": 0.5488741993904114, + "learning_rate": 5.9866331658291465e-06, + "loss": 0.0008, + "step": 40450 + }, + { + "epoch": 22.64, + "grad_norm": 0.2330881655216217, + "learning_rate": 5.984120603015076e-06, + "loss": 0.0007, + "step": 40475 + }, + { + "epoch": 22.65, + "grad_norm": 0.5600420236587524, + "learning_rate": 5.981608040201006e-06, + "loss": 0.0008, + "step": 40500 + }, + { + "epoch": 22.66, + "grad_norm": 0.8227638006210327, + "learning_rate": 5.9790954773869345e-06, + "loss": 0.0009, + "step": 40525 + }, + { + "epoch": 22.68, + "grad_norm": 0.655738115310669, + "learning_rate": 5.976582914572865e-06, + "loss": 0.0008, + "step": 40550 + }, + { + "epoch": 22.69, + "grad_norm": 0.3333284258842468, + "learning_rate": 5.974070351758794e-06, + "loss": 0.0009, + "step": 40575 + }, + { + "epoch": 22.71, + "grad_norm": 0.3375628888607025, + "learning_rate": 5.971557788944724e-06, + "loss": 0.001, + "step": 40600 + }, + { + "epoch": 22.72, + "grad_norm": 0.1302882879972458, + "learning_rate": 5.969045226130654e-06, + "loss": 0.0012, + "step": 40625 + }, + { + "epoch": 22.73, + "grad_norm": 0.3120960593223572, + "learning_rate": 5.966532663316584e-06, + "loss": 0.0008, + "step": 40650 + }, + { + "epoch": 22.75, + "grad_norm": 2.2685253620147705, + "learning_rate": 5.964020100502513e-06, + "loss": 0.0009, + "step": 40675 + }, + { + "epoch": 22.76, + "grad_norm": 1.3423000574111938, + "learning_rate": 5.961507537688442e-06, + "loss": 0.0009, + "step": 40700 + }, + { + "epoch": 22.78, + "grad_norm": 0.2089194804430008, + "learning_rate": 5.9589949748743724e-06, + "loss": 0.0008, + "step": 40725 + }, + { + "epoch": 22.79, + "grad_norm": 0.6747369170188904, + "learning_rate": 5.956482412060302e-06, + "loss": 0.0012, + "step": 40750 + }, + { + "epoch": 22.8, + "grad_norm": 0.39017269015312195, + "learning_rate": 5.953969849246232e-06, + "loss": 0.0009, + "step": 40775 + }, + { + "epoch": 22.82, + "grad_norm": 0.654545247554779, + "learning_rate": 5.951457286432162e-06, + "loss": 0.0011, + "step": 40800 + }, + { + "epoch": 22.83, + "grad_norm": 0.26760074496269226, + "learning_rate": 5.9489447236180906e-06, + "loss": 0.0011, + "step": 40825 + }, + { + "epoch": 22.85, + "grad_norm": 0.2864469587802887, + "learning_rate": 5.946432160804021e-06, + "loss": 0.0014, + "step": 40850 + }, + { + "epoch": 22.86, + "grad_norm": 1.4990299940109253, + "learning_rate": 5.94391959798995e-06, + "loss": 0.0012, + "step": 40875 + }, + { + "epoch": 22.87, + "grad_norm": 1.298228144645691, + "learning_rate": 5.94140703517588e-06, + "loss": 0.0015, + "step": 40900 + }, + { + "epoch": 22.89, + "grad_norm": 0.3374519646167755, + "learning_rate": 5.9388944723618096e-06, + "loss": 0.001, + "step": 40925 + }, + { + "epoch": 22.9, + "grad_norm": 2.2666993141174316, + "learning_rate": 5.936381909547739e-06, + "loss": 0.0011, + "step": 40950 + }, + { + "epoch": 22.92, + "grad_norm": 0.9021838307380676, + "learning_rate": 5.933869346733668e-06, + "loss": 0.0009, + "step": 40975 + }, + { + "epoch": 22.93, + "grad_norm": 0.28160324692726135, + "learning_rate": 5.931356783919598e-06, + "loss": 0.0009, + "step": 41000 + }, + { + "epoch": 22.93, + "eval_loss": 0.22463606297969818, + "eval_runtime": 748.7206, + "eval_samples_per_second": 2.056, + "eval_steps_per_second": 2.056, + "eval_wer": 15.275829827021973, + "step": 41000 + } + ], + "logging_steps": 25, + "max_steps": 100000, + "num_input_tokens_seen": 0, + "num_train_epochs": 56, + "save_steps": 1000, + "total_flos": 1.2761022636490752e+20, + "train_batch_size": 48, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/whisper-base/hindi/checkpoint-41000/training_args.bin b/checkpoints/whisper-base/hindi/checkpoint-41000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7c1753187382808f996bea5006a2af01a4b3f053 --- /dev/null +++ b/checkpoints/whisper-base/hindi/checkpoint-41000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b470d76f106856ccfd4e75fb02d16d0b697b8e4cbac171ee27e6dd8674d4d8d +size 4667 diff --git a/checkpoints/whisper-base/kannada/checkpoint-18000/config.json b/checkpoints/whisper-base/kannada/checkpoint-18000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7e7f0c041ed3d808e6d244ec3a48421b1ae19a6f --- /dev/null +++ b/checkpoints/whisper-base/kannada/checkpoint-18000/config.json @@ -0,0 +1,154 @@ +{ + "_name_or_path": "openai/whisper-base", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 512, + "decoder_attention_heads": 8, + "decoder_ffn_dim": 2048, + "decoder_layerdrop": 0.0, + "decoder_layers": 6, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 8, + "encoder_ffn_dim": 2048, + "encoder_layerdrop": 0.0, + "encoder_layers": 6, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50306 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 6, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "torch_dtype": "float32", + "transformers_version": "4.39.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoints/whisper-base/kannada/checkpoint-18000/generation_config.json b/checkpoints/whisper-base/kannada/checkpoint-18000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a12c5b82ac1e48f22fa79bdad1595064164bc2ab --- /dev/null +++ b/checkpoints/whisper-base/kannada/checkpoint-18000/generation_config.json @@ -0,0 +1,256 @@ +{ + "alignment_heads": [ + [ + 3, + 1 + ], + [ + 4, + 2 + ], + [ + 4, + 3 + ], + [ + 4, + 7 + ], + [ + 5, + 1 + ], + [ + 5, + 2 + ], + [ + 5, + 4 + ], + [ + 5, + 6 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.39.2" +} diff --git a/checkpoints/whisper-base/kannada/checkpoint-18000/model.safetensors b/checkpoints/whisper-base/kannada/checkpoint-18000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..575d43b26b3bb0c68eac50f2883d339cf4e3f89e --- /dev/null +++ b/checkpoints/whisper-base/kannada/checkpoint-18000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:742861390dd59cd05e5d0a9f43201d7934cef3ad2c30c9c857f8cb992242a34a +size 290403936 diff --git a/checkpoints/whisper-base/kannada/checkpoint-18000/optimizer.pt b/checkpoints/whisper-base/kannada/checkpoint-18000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..46f04d799535445d5353beb744cd41f7e16b57d8 --- /dev/null +++ b/checkpoints/whisper-base/kannada/checkpoint-18000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33ff7d3904010480c684101ab347a72de1833396c7602d1ada77a925318dc015 +size 574811077 diff --git a/checkpoints/whisper-base/kannada/checkpoint-18000/preprocessor_config.json b/checkpoints/whisper-base/kannada/checkpoint-18000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoints/whisper-base/kannada/checkpoint-18000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoints/whisper-base/kannada/checkpoint-18000/rng_state.pth b/checkpoints/whisper-base/kannada/checkpoint-18000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..54fc922629395b5e8e60895cc345f89055dfc545 --- /dev/null +++ b/checkpoints/whisper-base/kannada/checkpoint-18000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4c982e3cff8d825880c5f37788ac44fe541a079837bea9018b522e3a1d1c74c +size 14575 diff --git a/checkpoints/whisper-base/kannada/checkpoint-18000/scheduler.pt b/checkpoints/whisper-base/kannada/checkpoint-18000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..39fe57fc5d1682e8d31ca40c1f9ad80c692eebab --- /dev/null +++ b/checkpoints/whisper-base/kannada/checkpoint-18000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07626faf2167f8b69184e4840f7ce6472de774208d2323527b5327bc9fb39736 +size 627 diff --git a/checkpoints/whisper-base/kannada/checkpoint-18000/trainer_state.json b/checkpoints/whisper-base/kannada/checkpoint-18000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d7c7fad4cbf79559e5d5fc3728257e19dfac364f --- /dev/null +++ b/checkpoints/whisper-base/kannada/checkpoint-18000/trainer_state.json @@ -0,0 +1,5223 @@ +{ + "best_metric": 31.430015159171298, + "best_model_checkpoint": "results/whisper-base/kannada/checkpoint-8000", + "epoch": 10.06711409395973, + "eval_steps": 1000, + "global_step": 18000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "grad_norm": 15.371527671813965, + "learning_rate": 4.6000000000000004e-07, + "loss": 2.2975, + "step": 25 + }, + { + "epoch": 0.03, + "grad_norm": 6.702200889587402, + "learning_rate": 9.600000000000001e-07, + "loss": 2.0939, + "step": 50 + }, + { + "epoch": 0.04, + "grad_norm": 5.697695255279541, + "learning_rate": 1.46e-06, + "loss": 1.8783, + "step": 75 + }, + { + "epoch": 0.06, + "grad_norm": 5.038358688354492, + "learning_rate": 1.9600000000000003e-06, + "loss": 1.7504, + "step": 100 + }, + { + "epoch": 0.07, + "grad_norm": 4.391455173492432, + "learning_rate": 2.46e-06, + "loss": 1.6433, + "step": 125 + }, + { + "epoch": 0.08, + "grad_norm": 5.171632766723633, + "learning_rate": 2.96e-06, + "loss": 1.5492, + "step": 150 + }, + { + "epoch": 0.1, + "grad_norm": 7.691624164581299, + "learning_rate": 3.46e-06, + "loss": 1.3703, + "step": 175 + }, + { + "epoch": 0.11, + "grad_norm": 6.353484630584717, + "learning_rate": 3.96e-06, + "loss": 1.0018, + "step": 200 + }, + { + "epoch": 0.13, + "grad_norm": 6.241015434265137, + "learning_rate": 4.4600000000000005e-06, + "loss": 0.7036, + "step": 225 + }, + { + "epoch": 0.14, + "grad_norm": 5.290311813354492, + "learning_rate": 4.960000000000001e-06, + "loss": 0.5412, + "step": 250 + }, + { + "epoch": 0.15, + "grad_norm": 4.513641834259033, + "learning_rate": 5.460000000000001e-06, + "loss": 0.4575, + "step": 275 + }, + { + "epoch": 0.17, + "grad_norm": 3.4067599773406982, + "learning_rate": 5.9600000000000005e-06, + "loss": 0.4076, + "step": 300 + }, + { + "epoch": 0.18, + "grad_norm": 5.2033796310424805, + "learning_rate": 6.460000000000001e-06, + "loss": 0.3665, + "step": 325 + }, + { + "epoch": 0.2, + "grad_norm": 4.876632213592529, + "learning_rate": 6.96e-06, + "loss": 0.3542, + "step": 350 + }, + { + "epoch": 0.21, + "grad_norm": 4.570387840270996, + "learning_rate": 7.4600000000000006e-06, + "loss": 0.3351, + "step": 375 + }, + { + "epoch": 0.22, + "grad_norm": 4.75396728515625, + "learning_rate": 7.960000000000002e-06, + "loss": 0.3152, + "step": 400 + }, + { + "epoch": 0.24, + "grad_norm": 3.448577880859375, + "learning_rate": 8.46e-06, + "loss": 0.3098, + "step": 425 + }, + { + "epoch": 0.25, + "grad_norm": 3.031611442565918, + "learning_rate": 8.96e-06, + "loss": 0.3016, + "step": 450 + }, + { + "epoch": 0.27, + "grad_norm": 3.0785105228424072, + "learning_rate": 9.460000000000001e-06, + "loss": 0.291, + "step": 475 + }, + { + "epoch": 0.28, + "grad_norm": 4.834298133850098, + "learning_rate": 9.960000000000001e-06, + "loss": 0.2833, + "step": 500 + }, + { + "epoch": 0.29, + "grad_norm": 3.968628168106079, + "learning_rate": 9.997688442211056e-06, + "loss": 0.2661, + "step": 525 + }, + { + "epoch": 0.31, + "grad_norm": 3.733210563659668, + "learning_rate": 9.995175879396986e-06, + "loss": 0.2679, + "step": 550 + }, + { + "epoch": 0.32, + "grad_norm": 3.722085475921631, + "learning_rate": 9.992663316582915e-06, + "loss": 0.264, + "step": 575 + }, + { + "epoch": 0.34, + "grad_norm": 3.314404249191284, + "learning_rate": 9.990150753768844e-06, + "loss": 0.2571, + "step": 600 + }, + { + "epoch": 0.35, + "grad_norm": 3.4890575408935547, + "learning_rate": 9.987638190954775e-06, + "loss": 0.2475, + "step": 625 + }, + { + "epoch": 0.36, + "grad_norm": 3.6552934646606445, + "learning_rate": 9.985125628140705e-06, + "loss": 0.2413, + "step": 650 + }, + { + "epoch": 0.38, + "grad_norm": 3.529229164123535, + "learning_rate": 9.982613065326634e-06, + "loss": 0.236, + "step": 675 + }, + { + "epoch": 0.39, + "grad_norm": 3.1094226837158203, + "learning_rate": 9.980100502512565e-06, + "loss": 0.2393, + "step": 700 + }, + { + "epoch": 0.41, + "grad_norm": 3.1240782737731934, + "learning_rate": 9.977587939698493e-06, + "loss": 0.2383, + "step": 725 + }, + { + "epoch": 0.42, + "grad_norm": 3.1622979640960693, + "learning_rate": 9.975075376884424e-06, + "loss": 0.2341, + "step": 750 + }, + { + "epoch": 0.43, + "grad_norm": 3.208216428756714, + "learning_rate": 9.972562814070353e-06, + "loss": 0.2264, + "step": 775 + }, + { + "epoch": 0.45, + "grad_norm": 3.8562748432159424, + "learning_rate": 9.970050251256282e-06, + "loss": 0.2205, + "step": 800 + }, + { + "epoch": 0.46, + "grad_norm": 2.792146921157837, + "learning_rate": 9.967537688442212e-06, + "loss": 0.219, + "step": 825 + }, + { + "epoch": 0.48, + "grad_norm": 3.3259260654449463, + "learning_rate": 9.965025125628141e-06, + "loss": 0.2167, + "step": 850 + }, + { + "epoch": 0.49, + "grad_norm": 2.7845840454101562, + "learning_rate": 9.96251256281407e-06, + "loss": 0.2102, + "step": 875 + }, + { + "epoch": 0.5, + "grad_norm": 2.9967143535614014, + "learning_rate": 9.960000000000001e-06, + "loss": 0.2103, + "step": 900 + }, + { + "epoch": 0.52, + "grad_norm": 3.5690603256225586, + "learning_rate": 9.95748743718593e-06, + "loss": 0.2114, + "step": 925 + }, + { + "epoch": 0.53, + "grad_norm": 2.893219232559204, + "learning_rate": 9.95497487437186e-06, + "loss": 0.2091, + "step": 950 + }, + { + "epoch": 0.55, + "grad_norm": 3.0092265605926514, + "learning_rate": 9.952462311557791e-06, + "loss": 0.2087, + "step": 975 + }, + { + "epoch": 0.56, + "grad_norm": 3.9722728729248047, + "learning_rate": 9.949949748743718e-06, + "loss": 0.2067, + "step": 1000 + }, + { + "epoch": 0.56, + "eval_loss": 0.14277268946170807, + "eval_runtime": 1158.4212, + "eval_samples_per_second": 1.234, + "eval_steps_per_second": 1.234, + "eval_wer": 48.14721239683342, + "step": 1000 + }, + { + "epoch": 0.57, + "grad_norm": 2.6526119709014893, + "learning_rate": 9.94743718592965e-06, + "loss": 0.2006, + "step": 1025 + }, + { + "epoch": 0.59, + "grad_norm": 2.687270402908325, + "learning_rate": 9.944924623115579e-06, + "loss": 0.1929, + "step": 1050 + }, + { + "epoch": 0.6, + "grad_norm": 2.5070507526397705, + "learning_rate": 9.942412060301508e-06, + "loss": 0.1956, + "step": 1075 + }, + { + "epoch": 0.62, + "grad_norm": 2.791297674179077, + "learning_rate": 9.93989949748744e-06, + "loss": 0.1971, + "step": 1100 + }, + { + "epoch": 0.63, + "grad_norm": 2.4629204273223877, + "learning_rate": 9.937386934673367e-06, + "loss": 0.1953, + "step": 1125 + }, + { + "epoch": 0.64, + "grad_norm": 2.765550374984741, + "learning_rate": 9.934874371859298e-06, + "loss": 0.1876, + "step": 1150 + }, + { + "epoch": 0.66, + "grad_norm": 2.9944865703582764, + "learning_rate": 9.932361809045227e-06, + "loss": 0.1872, + "step": 1175 + }, + { + "epoch": 0.67, + "grad_norm": 2.8354108333587646, + "learning_rate": 9.929849246231156e-06, + "loss": 0.1958, + "step": 1200 + }, + { + "epoch": 0.69, + "grad_norm": 3.5469353199005127, + "learning_rate": 9.927336683417086e-06, + "loss": 0.1851, + "step": 1225 + }, + { + "epoch": 0.7, + "grad_norm": 2.7006568908691406, + "learning_rate": 9.924824120603017e-06, + "loss": 0.1872, + "step": 1250 + }, + { + "epoch": 0.71, + "grad_norm": 2.5851032733917236, + "learning_rate": 9.922311557788944e-06, + "loss": 0.1848, + "step": 1275 + }, + { + "epoch": 0.73, + "grad_norm": 3.0025763511657715, + "learning_rate": 9.919798994974875e-06, + "loss": 0.1798, + "step": 1300 + }, + { + "epoch": 0.74, + "grad_norm": 2.510704278945923, + "learning_rate": 9.917286432160805e-06, + "loss": 0.1853, + "step": 1325 + }, + { + "epoch": 0.76, + "grad_norm": 2.3565640449523926, + "learning_rate": 9.914773869346734e-06, + "loss": 0.1815, + "step": 1350 + }, + { + "epoch": 0.77, + "grad_norm": 3.1379666328430176, + "learning_rate": 9.912261306532665e-06, + "loss": 0.179, + "step": 1375 + }, + { + "epoch": 0.78, + "grad_norm": 3.08866810798645, + "learning_rate": 9.909748743718593e-06, + "loss": 0.1842, + "step": 1400 + }, + { + "epoch": 0.8, + "grad_norm": 3.172569990158081, + "learning_rate": 9.907236180904524e-06, + "loss": 0.1793, + "step": 1425 + }, + { + "epoch": 0.81, + "grad_norm": 2.465576171875, + "learning_rate": 9.904723618090453e-06, + "loss": 0.179, + "step": 1450 + }, + { + "epoch": 0.82, + "grad_norm": 2.950554370880127, + "learning_rate": 9.902211055276382e-06, + "loss": 0.1737, + "step": 1475 + }, + { + "epoch": 0.84, + "grad_norm": 2.908867359161377, + "learning_rate": 9.899698492462312e-06, + "loss": 0.1724, + "step": 1500 + }, + { + "epoch": 0.85, + "grad_norm": 2.837392807006836, + "learning_rate": 9.897185929648243e-06, + "loss": 0.1698, + "step": 1525 + }, + { + "epoch": 0.87, + "grad_norm": 2.753249168395996, + "learning_rate": 9.894673366834172e-06, + "loss": 0.174, + "step": 1550 + }, + { + "epoch": 0.88, + "grad_norm": 3.205561637878418, + "learning_rate": 9.892160804020101e-06, + "loss": 0.172, + "step": 1575 + }, + { + "epoch": 0.89, + "grad_norm": 2.212885856628418, + "learning_rate": 9.88964824120603e-06, + "loss": 0.1635, + "step": 1600 + }, + { + "epoch": 0.91, + "grad_norm": 2.668921709060669, + "learning_rate": 9.88713567839196e-06, + "loss": 0.1636, + "step": 1625 + }, + { + "epoch": 0.92, + "grad_norm": 2.5961480140686035, + "learning_rate": 9.884623115577891e-06, + "loss": 0.1728, + "step": 1650 + }, + { + "epoch": 0.94, + "grad_norm": 3.0131990909576416, + "learning_rate": 9.882110552763819e-06, + "loss": 0.173, + "step": 1675 + }, + { + "epoch": 0.95, + "grad_norm": 3.0836806297302246, + "learning_rate": 9.87959798994975e-06, + "loss": 0.1645, + "step": 1700 + }, + { + "epoch": 0.96, + "grad_norm": 3.579216241836548, + "learning_rate": 9.877085427135679e-06, + "loss": 0.1666, + "step": 1725 + }, + { + "epoch": 0.98, + "grad_norm": 2.8720693588256836, + "learning_rate": 9.874572864321608e-06, + "loss": 0.1671, + "step": 1750 + }, + { + "epoch": 0.99, + "grad_norm": 2.4276199340820312, + "learning_rate": 9.87206030150754e-06, + "loss": 0.1692, + "step": 1775 + }, + { + "epoch": 1.01, + "grad_norm": 2.299262762069702, + "learning_rate": 9.869547738693469e-06, + "loss": 0.1522, + "step": 1800 + }, + { + "epoch": 1.02, + "grad_norm": 3.208414077758789, + "learning_rate": 9.867035175879398e-06, + "loss": 0.1466, + "step": 1825 + }, + { + "epoch": 1.03, + "grad_norm": 2.92405366897583, + "learning_rate": 9.864522613065327e-06, + "loss": 0.1488, + "step": 1850 + }, + { + "epoch": 1.05, + "grad_norm": 3.464637279510498, + "learning_rate": 9.862010050251257e-06, + "loss": 0.1539, + "step": 1875 + }, + { + "epoch": 1.06, + "grad_norm": 2.5286684036254883, + "learning_rate": 9.859497487437186e-06, + "loss": 0.1509, + "step": 1900 + }, + { + "epoch": 1.08, + "grad_norm": 2.243924617767334, + "learning_rate": 9.856984924623117e-06, + "loss": 0.1488, + "step": 1925 + }, + { + "epoch": 1.09, + "grad_norm": 2.430565118789673, + "learning_rate": 9.854472361809046e-06, + "loss": 0.1471, + "step": 1950 + }, + { + "epoch": 1.1, + "grad_norm": 3.0661568641662598, + "learning_rate": 9.851959798994976e-06, + "loss": 0.1444, + "step": 1975 + }, + { + "epoch": 1.12, + "grad_norm": 2.508648633956909, + "learning_rate": 9.849447236180905e-06, + "loss": 0.1434, + "step": 2000 + }, + { + "epoch": 1.12, + "eval_loss": 0.1081596314907074, + "eval_runtime": 1133.8862, + "eval_samples_per_second": 1.261, + "eval_steps_per_second": 1.261, + "eval_wer": 38.15058110156645, + "step": 2000 + }, + { + "epoch": 1.13, + "grad_norm": 2.9095020294189453, + "learning_rate": 9.846934673366834e-06, + "loss": 0.1458, + "step": 2025 + }, + { + "epoch": 1.15, + "grad_norm": 2.2184669971466064, + "learning_rate": 9.844422110552765e-06, + "loss": 0.1488, + "step": 2050 + }, + { + "epoch": 1.16, + "grad_norm": 2.3408446311950684, + "learning_rate": 9.841909547738695e-06, + "loss": 0.1488, + "step": 2075 + }, + { + "epoch": 1.17, + "grad_norm": 1.8990191221237183, + "learning_rate": 9.839396984924624e-06, + "loss": 0.1478, + "step": 2100 + }, + { + "epoch": 1.19, + "grad_norm": 2.786724805831909, + "learning_rate": 9.836884422110553e-06, + "loss": 0.144, + "step": 2125 + }, + { + "epoch": 1.2, + "grad_norm": 2.310656785964966, + "learning_rate": 9.834371859296483e-06, + "loss": 0.1446, + "step": 2150 + }, + { + "epoch": 1.22, + "grad_norm": 2.823796033859253, + "learning_rate": 9.831859296482414e-06, + "loss": 0.1464, + "step": 2175 + }, + { + "epoch": 1.23, + "grad_norm": 2.256270408630371, + "learning_rate": 9.829346733668343e-06, + "loss": 0.1419, + "step": 2200 + }, + { + "epoch": 1.24, + "grad_norm": 2.3745439052581787, + "learning_rate": 9.826834170854272e-06, + "loss": 0.1447, + "step": 2225 + }, + { + "epoch": 1.26, + "grad_norm": 2.4730031490325928, + "learning_rate": 9.824321608040202e-06, + "loss": 0.1373, + "step": 2250 + }, + { + "epoch": 1.27, + "grad_norm": 2.135254144668579, + "learning_rate": 9.821809045226131e-06, + "loss": 0.1368, + "step": 2275 + }, + { + "epoch": 1.29, + "grad_norm": 2.1918561458587646, + "learning_rate": 9.81929648241206e-06, + "loss": 0.1381, + "step": 2300 + }, + { + "epoch": 1.3, + "grad_norm": 2.4906232357025146, + "learning_rate": 9.816783919597991e-06, + "loss": 0.1405, + "step": 2325 + }, + { + "epoch": 1.31, + "grad_norm": 2.484963893890381, + "learning_rate": 9.81427135678392e-06, + "loss": 0.1368, + "step": 2350 + }, + { + "epoch": 1.33, + "grad_norm": 2.0800821781158447, + "learning_rate": 9.81175879396985e-06, + "loss": 0.1437, + "step": 2375 + }, + { + "epoch": 1.34, + "grad_norm": 2.035243272781372, + "learning_rate": 9.809246231155781e-06, + "loss": 0.1352, + "step": 2400 + }, + { + "epoch": 1.36, + "grad_norm": 2.6583197116851807, + "learning_rate": 9.806733668341709e-06, + "loss": 0.1411, + "step": 2425 + }, + { + "epoch": 1.37, + "grad_norm": 2.5662262439727783, + "learning_rate": 9.80422110552764e-06, + "loss": 0.1419, + "step": 2450 + }, + { + "epoch": 1.38, + "grad_norm": 2.4251856803894043, + "learning_rate": 9.801708542713569e-06, + "loss": 0.1378, + "step": 2475 + }, + { + "epoch": 1.4, + "grad_norm": 2.3742973804473877, + "learning_rate": 9.799195979899498e-06, + "loss": 0.1336, + "step": 2500 + }, + { + "epoch": 1.41, + "grad_norm": 2.394425630569458, + "learning_rate": 9.796683417085428e-06, + "loss": 0.1388, + "step": 2525 + }, + { + "epoch": 1.43, + "grad_norm": 2.3727734088897705, + "learning_rate": 9.794170854271357e-06, + "loss": 0.139, + "step": 2550 + }, + { + "epoch": 1.44, + "grad_norm": 2.920910120010376, + "learning_rate": 9.791658291457288e-06, + "loss": 0.136, + "step": 2575 + }, + { + "epoch": 1.45, + "grad_norm": 2.017719030380249, + "learning_rate": 9.789145728643217e-06, + "loss": 0.1395, + "step": 2600 + }, + { + "epoch": 1.47, + "grad_norm": 2.2757158279418945, + "learning_rate": 9.786633165829147e-06, + "loss": 0.1323, + "step": 2625 + }, + { + "epoch": 1.48, + "grad_norm": 2.0749268531799316, + "learning_rate": 9.784120603015076e-06, + "loss": 0.1314, + "step": 2650 + }, + { + "epoch": 1.5, + "grad_norm": 2.273589611053467, + "learning_rate": 9.781608040201007e-06, + "loss": 0.1337, + "step": 2675 + }, + { + "epoch": 1.51, + "grad_norm": 2.454434394836426, + "learning_rate": 9.779095477386934e-06, + "loss": 0.1318, + "step": 2700 + }, + { + "epoch": 1.52, + "grad_norm": 2.35109543800354, + "learning_rate": 9.776582914572866e-06, + "loss": 0.1303, + "step": 2725 + }, + { + "epoch": 1.54, + "grad_norm": 2.2759006023406982, + "learning_rate": 9.774070351758795e-06, + "loss": 0.1266, + "step": 2750 + }, + { + "epoch": 1.55, + "grad_norm": 2.3673529624938965, + "learning_rate": 9.771557788944724e-06, + "loss": 0.1354, + "step": 2775 + }, + { + "epoch": 1.57, + "grad_norm": 2.671313762664795, + "learning_rate": 9.769045226130655e-06, + "loss": 0.1314, + "step": 2800 + }, + { + "epoch": 1.58, + "grad_norm": 2.2889761924743652, + "learning_rate": 9.766532663316583e-06, + "loss": 0.1316, + "step": 2825 + }, + { + "epoch": 1.59, + "grad_norm": 2.183966875076294, + "learning_rate": 9.764020100502514e-06, + "loss": 0.1305, + "step": 2850 + }, + { + "epoch": 1.61, + "grad_norm": 2.1337826251983643, + "learning_rate": 9.761507537688443e-06, + "loss": 0.1325, + "step": 2875 + }, + { + "epoch": 1.62, + "grad_norm": 2.5112051963806152, + "learning_rate": 9.758994974874372e-06, + "loss": 0.131, + "step": 2900 + }, + { + "epoch": 1.64, + "grad_norm": 2.130584955215454, + "learning_rate": 9.756482412060302e-06, + "loss": 0.1402, + "step": 2925 + }, + { + "epoch": 1.65, + "grad_norm": 2.3534483909606934, + "learning_rate": 9.753969849246233e-06, + "loss": 0.1325, + "step": 2950 + }, + { + "epoch": 1.66, + "grad_norm": 1.9876480102539062, + "learning_rate": 9.75145728643216e-06, + "loss": 0.1306, + "step": 2975 + }, + { + "epoch": 1.68, + "grad_norm": 3.348470687866211, + "learning_rate": 9.748944723618091e-06, + "loss": 0.1321, + "step": 3000 + }, + { + "epoch": 1.68, + "eval_loss": 0.09756702184677124, + "eval_runtime": 1126.8535, + "eval_samples_per_second": 1.269, + "eval_steps_per_second": 1.269, + "eval_wer": 36.323058783897594, + "step": 3000 + }, + { + "epoch": 1.69, + "grad_norm": 2.399486541748047, + "learning_rate": 9.74643216080402e-06, + "loss": 0.125, + "step": 3025 + }, + { + "epoch": 1.71, + "grad_norm": 2.3203823566436768, + "learning_rate": 9.74391959798995e-06, + "loss": 0.129, + "step": 3050 + }, + { + "epoch": 1.72, + "grad_norm": 1.9343129396438599, + "learning_rate": 9.741407035175881e-06, + "loss": 0.1287, + "step": 3075 + }, + { + "epoch": 1.73, + "grad_norm": 2.511561870574951, + "learning_rate": 9.738894472361809e-06, + "loss": 0.1269, + "step": 3100 + }, + { + "epoch": 1.75, + "grad_norm": 2.600206136703491, + "learning_rate": 9.73638190954774e-06, + "loss": 0.1274, + "step": 3125 + }, + { + "epoch": 1.76, + "grad_norm": 2.2494468688964844, + "learning_rate": 9.733869346733669e-06, + "loss": 0.128, + "step": 3150 + }, + { + "epoch": 1.78, + "grad_norm": 2.304333209991455, + "learning_rate": 9.731356783919598e-06, + "loss": 0.1293, + "step": 3175 + }, + { + "epoch": 1.79, + "grad_norm": 2.3565030097961426, + "learning_rate": 9.72884422110553e-06, + "loss": 0.1258, + "step": 3200 + }, + { + "epoch": 1.8, + "grad_norm": 2.362494707107544, + "learning_rate": 9.726331658291459e-06, + "loss": 0.1266, + "step": 3225 + }, + { + "epoch": 1.82, + "grad_norm": 2.3757336139678955, + "learning_rate": 9.723819095477388e-06, + "loss": 0.1242, + "step": 3250 + }, + { + "epoch": 1.83, + "grad_norm": 2.344635248184204, + "learning_rate": 9.721306532663317e-06, + "loss": 0.1232, + "step": 3275 + }, + { + "epoch": 1.85, + "grad_norm": 1.9005662202835083, + "learning_rate": 9.718793969849247e-06, + "loss": 0.1218, + "step": 3300 + }, + { + "epoch": 1.86, + "grad_norm": 2.1534547805786133, + "learning_rate": 9.716281407035176e-06, + "loss": 0.1245, + "step": 3325 + }, + { + "epoch": 1.87, + "grad_norm": 2.475545883178711, + "learning_rate": 9.713768844221107e-06, + "loss": 0.1263, + "step": 3350 + }, + { + "epoch": 1.89, + "grad_norm": 2.2025973796844482, + "learning_rate": 9.711256281407035e-06, + "loss": 0.122, + "step": 3375 + }, + { + "epoch": 1.9, + "grad_norm": 2.586442708969116, + "learning_rate": 9.708743718592966e-06, + "loss": 0.1266, + "step": 3400 + }, + { + "epoch": 1.92, + "grad_norm": 3.0265519618988037, + "learning_rate": 9.706231155778895e-06, + "loss": 0.1273, + "step": 3425 + }, + { + "epoch": 1.93, + "grad_norm": 2.687541961669922, + "learning_rate": 9.703718592964824e-06, + "loss": 0.1246, + "step": 3450 + }, + { + "epoch": 1.94, + "grad_norm": 2.177675485610962, + "learning_rate": 9.701206030150755e-06, + "loss": 0.1224, + "step": 3475 + }, + { + "epoch": 1.96, + "grad_norm": 2.2825403213500977, + "learning_rate": 9.698693467336685e-06, + "loss": 0.1231, + "step": 3500 + }, + { + "epoch": 1.97, + "grad_norm": 2.365032196044922, + "learning_rate": 9.696180904522614e-06, + "loss": 0.1199, + "step": 3525 + }, + { + "epoch": 1.99, + "grad_norm": 2.207110643386841, + "learning_rate": 9.693668341708543e-06, + "loss": 0.1258, + "step": 3550 + }, + { + "epoch": 2.0, + "grad_norm": 2.3551571369171143, + "learning_rate": 9.691155778894473e-06, + "loss": 0.127, + "step": 3575 + }, + { + "epoch": 2.01, + "grad_norm": 2.107489585876465, + "learning_rate": 9.688643216080402e-06, + "loss": 0.1118, + "step": 3600 + }, + { + "epoch": 2.03, + "grad_norm": 2.0215773582458496, + "learning_rate": 9.686130653266333e-06, + "loss": 0.1072, + "step": 3625 + }, + { + "epoch": 2.04, + "grad_norm": 1.8396859169006348, + "learning_rate": 9.683618090452262e-06, + "loss": 0.1093, + "step": 3650 + }, + { + "epoch": 2.06, + "grad_norm": 1.9967023134231567, + "learning_rate": 9.681105527638192e-06, + "loss": 0.1069, + "step": 3675 + }, + { + "epoch": 2.07, + "grad_norm": 2.3010103702545166, + "learning_rate": 9.678592964824121e-06, + "loss": 0.1077, + "step": 3700 + }, + { + "epoch": 2.08, + "grad_norm": 2.1592774391174316, + "learning_rate": 9.67608040201005e-06, + "loss": 0.1043, + "step": 3725 + }, + { + "epoch": 2.1, + "grad_norm": 1.8097106218338013, + "learning_rate": 9.673567839195981e-06, + "loss": 0.1081, + "step": 3750 + }, + { + "epoch": 2.11, + "grad_norm": 1.6795625686645508, + "learning_rate": 9.67105527638191e-06, + "loss": 0.1073, + "step": 3775 + }, + { + "epoch": 2.13, + "grad_norm": 1.9867186546325684, + "learning_rate": 9.66854271356784e-06, + "loss": 0.1073, + "step": 3800 + }, + { + "epoch": 2.14, + "grad_norm": 2.4452531337738037, + "learning_rate": 9.666030150753771e-06, + "loss": 0.106, + "step": 3825 + }, + { + "epoch": 2.15, + "grad_norm": 2.0397822856903076, + "learning_rate": 9.663517587939699e-06, + "loss": 0.1076, + "step": 3850 + }, + { + "epoch": 2.17, + "grad_norm": 2.0880374908447266, + "learning_rate": 9.66100502512563e-06, + "loss": 0.1071, + "step": 3875 + }, + { + "epoch": 2.18, + "grad_norm": 2.205049514770508, + "learning_rate": 9.658492462311559e-06, + "loss": 0.1062, + "step": 3900 + }, + { + "epoch": 2.2, + "grad_norm": 1.6995338201522827, + "learning_rate": 9.655979899497488e-06, + "loss": 0.1069, + "step": 3925 + }, + { + "epoch": 2.21, + "grad_norm": 2.0157582759857178, + "learning_rate": 9.653467336683418e-06, + "loss": 0.1022, + "step": 3950 + }, + { + "epoch": 2.22, + "grad_norm": 2.6505632400512695, + "learning_rate": 9.650954773869347e-06, + "loss": 0.1075, + "step": 3975 + }, + { + "epoch": 2.24, + "grad_norm": 2.7166402339935303, + "learning_rate": 9.648442211055276e-06, + "loss": 0.1065, + "step": 4000 + }, + { + "epoch": 2.24, + "eval_loss": 0.08993615955114365, + "eval_runtime": 1123.1229, + "eval_samples_per_second": 1.273, + "eval_steps_per_second": 1.273, + "eval_wer": 33.49334680815226, + "step": 4000 + }, + { + "epoch": 2.25, + "grad_norm": 2.390237331390381, + "learning_rate": 9.64603015075377e-06, + "loss": 0.1077, + "step": 4025 + }, + { + "epoch": 2.27, + "grad_norm": 2.1558032035827637, + "learning_rate": 9.643517587939699e-06, + "loss": 0.1043, + "step": 4050 + }, + { + "epoch": 2.28, + "grad_norm": 2.2904441356658936, + "learning_rate": 9.64100502512563e-06, + "loss": 0.1067, + "step": 4075 + }, + { + "epoch": 2.29, + "grad_norm": 2.376784324645996, + "learning_rate": 9.638492462311559e-06, + "loss": 0.1105, + "step": 4100 + }, + { + "epoch": 2.31, + "grad_norm": 2.3897860050201416, + "learning_rate": 9.635979899497488e-06, + "loss": 0.106, + "step": 4125 + }, + { + "epoch": 2.32, + "grad_norm": 2.18989634513855, + "learning_rate": 9.633467336683418e-06, + "loss": 0.1053, + "step": 4150 + }, + { + "epoch": 2.34, + "grad_norm": 2.0560553073883057, + "learning_rate": 9.630954773869347e-06, + "loss": 0.1037, + "step": 4175 + }, + { + "epoch": 2.35, + "grad_norm": 2.103466749191284, + "learning_rate": 9.628442211055276e-06, + "loss": 0.1029, + "step": 4200 + }, + { + "epoch": 2.36, + "grad_norm": 2.302995204925537, + "learning_rate": 9.625929648241207e-06, + "loss": 0.1035, + "step": 4225 + }, + { + "epoch": 2.38, + "grad_norm": 2.207853317260742, + "learning_rate": 9.623417085427137e-06, + "loss": 0.103, + "step": 4250 + }, + { + "epoch": 2.39, + "grad_norm": 2.108558177947998, + "learning_rate": 9.620904522613066e-06, + "loss": 0.1063, + "step": 4275 + }, + { + "epoch": 2.4, + "grad_norm": 2.44773530960083, + "learning_rate": 9.618391959798995e-06, + "loss": 0.1048, + "step": 4300 + }, + { + "epoch": 2.42, + "grad_norm": 1.9453063011169434, + "learning_rate": 9.615879396984925e-06, + "loss": 0.1037, + "step": 4325 + }, + { + "epoch": 2.43, + "grad_norm": 2.135312080383301, + "learning_rate": 9.613366834170856e-06, + "loss": 0.1051, + "step": 4350 + }, + { + "epoch": 2.45, + "grad_norm": 2.2999866008758545, + "learning_rate": 9.610854271356785e-06, + "loss": 0.1065, + "step": 4375 + }, + { + "epoch": 2.46, + "grad_norm": 3.002105712890625, + "learning_rate": 9.608341708542714e-06, + "loss": 0.1025, + "step": 4400 + }, + { + "epoch": 2.47, + "grad_norm": 2.1419241428375244, + "learning_rate": 9.605829145728644e-06, + "loss": 0.109, + "step": 4425 + }, + { + "epoch": 2.49, + "grad_norm": 2.3228838443756104, + "learning_rate": 9.603316582914573e-06, + "loss": 0.1049, + "step": 4450 + }, + { + "epoch": 2.5, + "grad_norm": 2.0737078189849854, + "learning_rate": 9.600804020100504e-06, + "loss": 0.1018, + "step": 4475 + }, + { + "epoch": 2.52, + "grad_norm": 2.46830677986145, + "learning_rate": 9.598291457286433e-06, + "loss": 0.1014, + "step": 4500 + }, + { + "epoch": 2.53, + "grad_norm": 2.135310649871826, + "learning_rate": 9.595778894472363e-06, + "loss": 0.101, + "step": 4525 + }, + { + "epoch": 2.54, + "grad_norm": 2.1868910789489746, + "learning_rate": 9.593266331658292e-06, + "loss": 0.1011, + "step": 4550 + }, + { + "epoch": 2.56, + "grad_norm": 2.222989082336426, + "learning_rate": 9.590753768844221e-06, + "loss": 0.1002, + "step": 4575 + }, + { + "epoch": 2.57, + "grad_norm": 2.176161766052246, + "learning_rate": 9.58824120603015e-06, + "loss": 0.1026, + "step": 4600 + }, + { + "epoch": 2.59, + "grad_norm": 2.6243674755096436, + "learning_rate": 9.585728643216082e-06, + "loss": 0.1055, + "step": 4625 + }, + { + "epoch": 2.6, + "grad_norm": 2.2601819038391113, + "learning_rate": 9.583216080402011e-06, + "loss": 0.099, + "step": 4650 + }, + { + "epoch": 2.61, + "grad_norm": 1.782845377922058, + "learning_rate": 9.58070351758794e-06, + "loss": 0.0978, + "step": 4675 + }, + { + "epoch": 2.63, + "grad_norm": 2.278578042984009, + "learning_rate": 9.57819095477387e-06, + "loss": 0.1003, + "step": 4700 + }, + { + "epoch": 2.64, + "grad_norm": 1.9820176362991333, + "learning_rate": 9.575678391959799e-06, + "loss": 0.1011, + "step": 4725 + }, + { + "epoch": 2.66, + "grad_norm": 2.1944828033447266, + "learning_rate": 9.57316582914573e-06, + "loss": 0.1018, + "step": 4750 + }, + { + "epoch": 2.67, + "grad_norm": 2.1688055992126465, + "learning_rate": 9.57065326633166e-06, + "loss": 0.1048, + "step": 4775 + }, + { + "epoch": 2.68, + "grad_norm": 2.270236015319824, + "learning_rate": 9.568140703517589e-06, + "loss": 0.1014, + "step": 4800 + }, + { + "epoch": 2.7, + "grad_norm": 1.8286148309707642, + "learning_rate": 9.565628140703518e-06, + "loss": 0.1026, + "step": 4825 + }, + { + "epoch": 2.71, + "grad_norm": 2.193108081817627, + "learning_rate": 9.563115577889447e-06, + "loss": 0.1007, + "step": 4850 + }, + { + "epoch": 2.73, + "grad_norm": 2.8026459217071533, + "learning_rate": 9.560603015075378e-06, + "loss": 0.1019, + "step": 4875 + }, + { + "epoch": 2.74, + "grad_norm": 2.093276262283325, + "learning_rate": 9.558090452261308e-06, + "loss": 0.0991, + "step": 4900 + }, + { + "epoch": 2.75, + "grad_norm": 1.8048577308654785, + "learning_rate": 9.555577889447237e-06, + "loss": 0.1008, + "step": 4925 + }, + { + "epoch": 2.77, + "grad_norm": 2.331813097000122, + "learning_rate": 9.553065326633166e-06, + "loss": 0.1016, + "step": 4950 + }, + { + "epoch": 2.78, + "grad_norm": 2.3709557056427, + "learning_rate": 9.550552763819096e-06, + "loss": 0.1018, + "step": 4975 + }, + { + "epoch": 2.8, + "grad_norm": 2.0339698791503906, + "learning_rate": 9.548040201005025e-06, + "loss": 0.1025, + "step": 5000 + }, + { + "epoch": 2.8, + "eval_loss": 0.08567250519990921, + "eval_runtime": 1122.2509, + "eval_samples_per_second": 1.274, + "eval_steps_per_second": 1.274, + "eval_wer": 32.289034866093985, + "step": 5000 + }, + { + "epoch": 2.81, + "grad_norm": 1.7360154390335083, + "learning_rate": 9.545527638190956e-06, + "loss": 0.0971, + "step": 5025 + }, + { + "epoch": 2.82, + "grad_norm": 2.060800313949585, + "learning_rate": 9.543015075376885e-06, + "loss": 0.1039, + "step": 5050 + }, + { + "epoch": 2.84, + "grad_norm": 2.3192574977874756, + "learning_rate": 9.540502512562815e-06, + "loss": 0.0995, + "step": 5075 + }, + { + "epoch": 2.85, + "grad_norm": 2.3179783821105957, + "learning_rate": 9.537989949748746e-06, + "loss": 0.0974, + "step": 5100 + }, + { + "epoch": 2.87, + "grad_norm": 2.43072772026062, + "learning_rate": 9.535477386934673e-06, + "loss": 0.093, + "step": 5125 + }, + { + "epoch": 2.88, + "grad_norm": 2.25108003616333, + "learning_rate": 9.532964824120604e-06, + "loss": 0.0961, + "step": 5150 + }, + { + "epoch": 2.89, + "grad_norm": 2.0208256244659424, + "learning_rate": 9.530452261306534e-06, + "loss": 0.0976, + "step": 5175 + }, + { + "epoch": 2.91, + "grad_norm": 2.101325750350952, + "learning_rate": 9.527939698492463e-06, + "loss": 0.0988, + "step": 5200 + }, + { + "epoch": 2.92, + "grad_norm": 2.294297456741333, + "learning_rate": 9.525427135678392e-06, + "loss": 0.097, + "step": 5225 + }, + { + "epoch": 2.94, + "grad_norm": 1.8510397672653198, + "learning_rate": 9.522914572864322e-06, + "loss": 0.1034, + "step": 5250 + }, + { + "epoch": 2.95, + "grad_norm": 2.397599220275879, + "learning_rate": 9.520402010050253e-06, + "loss": 0.1016, + "step": 5275 + }, + { + "epoch": 2.96, + "grad_norm": 2.099172830581665, + "learning_rate": 9.517889447236182e-06, + "loss": 0.0989, + "step": 5300 + }, + { + "epoch": 2.98, + "grad_norm": 2.079193592071533, + "learning_rate": 9.515376884422111e-06, + "loss": 0.1021, + "step": 5325 + }, + { + "epoch": 2.99, + "grad_norm": 2.3001720905303955, + "learning_rate": 9.51286432160804e-06, + "loss": 0.0963, + "step": 5350 + }, + { + "epoch": 3.01, + "grad_norm": 2.308499574661255, + "learning_rate": 9.510351758793972e-06, + "loss": 0.0987, + "step": 5375 + }, + { + "epoch": 3.02, + "grad_norm": 1.9268041849136353, + "learning_rate": 9.5078391959799e-06, + "loss": 0.0815, + "step": 5400 + }, + { + "epoch": 3.03, + "grad_norm": 1.79337477684021, + "learning_rate": 9.50532663316583e-06, + "loss": 0.0836, + "step": 5425 + }, + { + "epoch": 3.05, + "grad_norm": 2.0377745628356934, + "learning_rate": 9.50281407035176e-06, + "loss": 0.0825, + "step": 5450 + }, + { + "epoch": 3.06, + "grad_norm": 1.9441152811050415, + "learning_rate": 9.500301507537689e-06, + "loss": 0.0834, + "step": 5475 + }, + { + "epoch": 3.08, + "grad_norm": 2.329594373703003, + "learning_rate": 9.49778894472362e-06, + "loss": 0.0863, + "step": 5500 + }, + { + "epoch": 3.09, + "grad_norm": 1.9910224676132202, + "learning_rate": 9.49527638190955e-06, + "loss": 0.0825, + "step": 5525 + }, + { + "epoch": 3.1, + "grad_norm": 2.2351512908935547, + "learning_rate": 9.492763819095479e-06, + "loss": 0.083, + "step": 5550 + }, + { + "epoch": 3.12, + "grad_norm": 2.2850327491760254, + "learning_rate": 9.490251256281408e-06, + "loss": 0.0842, + "step": 5575 + }, + { + "epoch": 3.13, + "grad_norm": 2.328458786010742, + "learning_rate": 9.487738693467337e-06, + "loss": 0.0841, + "step": 5600 + }, + { + "epoch": 3.15, + "grad_norm": 1.9934041500091553, + "learning_rate": 9.485226130653267e-06, + "loss": 0.084, + "step": 5625 + }, + { + "epoch": 3.16, + "grad_norm": 2.288125991821289, + "learning_rate": 9.482713567839198e-06, + "loss": 0.0851, + "step": 5650 + }, + { + "epoch": 3.17, + "grad_norm": 1.883324384689331, + "learning_rate": 9.480201005025125e-06, + "loss": 0.0806, + "step": 5675 + }, + { + "epoch": 3.19, + "grad_norm": 1.739647388458252, + "learning_rate": 9.477688442211056e-06, + "loss": 0.0795, + "step": 5700 + }, + { + "epoch": 3.2, + "grad_norm": 1.9885121583938599, + "learning_rate": 9.475175879396985e-06, + "loss": 0.0823, + "step": 5725 + }, + { + "epoch": 3.22, + "grad_norm": 1.9250913858413696, + "learning_rate": 9.472663316582915e-06, + "loss": 0.0817, + "step": 5750 + }, + { + "epoch": 3.23, + "grad_norm": 2.2095437049865723, + "learning_rate": 9.470150753768846e-06, + "loss": 0.0825, + "step": 5775 + }, + { + "epoch": 3.24, + "grad_norm": 2.1711695194244385, + "learning_rate": 9.467638190954775e-06, + "loss": 0.0797, + "step": 5800 + }, + { + "epoch": 3.26, + "grad_norm": 2.022733449935913, + "learning_rate": 9.465125628140704e-06, + "loss": 0.0847, + "step": 5825 + }, + { + "epoch": 3.27, + "grad_norm": 2.079735517501831, + "learning_rate": 9.462613065326634e-06, + "loss": 0.0832, + "step": 5850 + }, + { + "epoch": 3.29, + "grad_norm": 2.360428810119629, + "learning_rate": 9.460100502512563e-06, + "loss": 0.0812, + "step": 5875 + }, + { + "epoch": 3.3, + "grad_norm": 2.0452306270599365, + "learning_rate": 9.457587939698494e-06, + "loss": 0.0855, + "step": 5900 + }, + { + "epoch": 3.31, + "grad_norm": 1.9197665452957153, + "learning_rate": 9.455075376884423e-06, + "loss": 0.0833, + "step": 5925 + }, + { + "epoch": 3.33, + "grad_norm": 1.7063461542129517, + "learning_rate": 9.452562814070353e-06, + "loss": 0.0816, + "step": 5950 + }, + { + "epoch": 3.34, + "grad_norm": 2.1676430702209473, + "learning_rate": 9.450050251256282e-06, + "loss": 0.0841, + "step": 5975 + }, + { + "epoch": 3.36, + "grad_norm": 1.8548740148544312, + "learning_rate": 9.447537688442211e-06, + "loss": 0.0836, + "step": 6000 + }, + { + "epoch": 3.36, + "eval_loss": 0.0868639200925827, + "eval_runtime": 1125.4621, + "eval_samples_per_second": 1.271, + "eval_steps_per_second": 1.271, + "eval_wer": 32.17113020043793, + "step": 6000 + }, + { + "epoch": 3.37, + "grad_norm": 1.7713325023651123, + "learning_rate": 9.44502512562814e-06, + "loss": 0.0806, + "step": 6025 + }, + { + "epoch": 3.38, + "grad_norm": 2.245654821395874, + "learning_rate": 9.442512562814072e-06, + "loss": 0.0827, + "step": 6050 + }, + { + "epoch": 3.4, + "grad_norm": 1.8112534284591675, + "learning_rate": 9.440000000000001e-06, + "loss": 0.0802, + "step": 6075 + }, + { + "epoch": 3.41, + "grad_norm": 2.1099753379821777, + "learning_rate": 9.43748743718593e-06, + "loss": 0.0797, + "step": 6100 + }, + { + "epoch": 3.43, + "grad_norm": 1.8546332120895386, + "learning_rate": 9.43497487437186e-06, + "loss": 0.0817, + "step": 6125 + }, + { + "epoch": 3.44, + "grad_norm": 2.587749481201172, + "learning_rate": 9.432462311557789e-06, + "loss": 0.0819, + "step": 6150 + }, + { + "epoch": 3.45, + "grad_norm": 1.9097250699996948, + "learning_rate": 9.42994974874372e-06, + "loss": 0.0814, + "step": 6175 + }, + { + "epoch": 3.47, + "grad_norm": 2.1678810119628906, + "learning_rate": 9.42743718592965e-06, + "loss": 0.0876, + "step": 6200 + }, + { + "epoch": 3.48, + "grad_norm": 2.1287403106689453, + "learning_rate": 9.424924623115579e-06, + "loss": 0.083, + "step": 6225 + }, + { + "epoch": 3.5, + "grad_norm": 1.9582960605621338, + "learning_rate": 9.422412060301508e-06, + "loss": 0.0842, + "step": 6250 + }, + { + "epoch": 3.51, + "grad_norm": 2.1427805423736572, + "learning_rate": 9.419899497487437e-06, + "loss": 0.082, + "step": 6275 + }, + { + "epoch": 3.52, + "grad_norm": 2.124584436416626, + "learning_rate": 9.417386934673367e-06, + "loss": 0.0843, + "step": 6300 + }, + { + "epoch": 3.54, + "grad_norm": 1.8547803163528442, + "learning_rate": 9.414874371859298e-06, + "loss": 0.0807, + "step": 6325 + }, + { + "epoch": 3.55, + "grad_norm": 2.2380597591400146, + "learning_rate": 9.412361809045227e-06, + "loss": 0.0813, + "step": 6350 + }, + { + "epoch": 3.57, + "grad_norm": 2.100323438644409, + "learning_rate": 9.409849246231156e-06, + "loss": 0.0808, + "step": 6375 + }, + { + "epoch": 3.58, + "grad_norm": 2.2011923789978027, + "learning_rate": 9.407336683417086e-06, + "loss": 0.0831, + "step": 6400 + }, + { + "epoch": 3.59, + "grad_norm": 2.087977409362793, + "learning_rate": 9.404824120603015e-06, + "loss": 0.0843, + "step": 6425 + }, + { + "epoch": 3.61, + "grad_norm": 2.1033151149749756, + "learning_rate": 9.402412060301508e-06, + "loss": 0.0818, + "step": 6450 + }, + { + "epoch": 3.62, + "grad_norm": 2.1980388164520264, + "learning_rate": 9.399899497487438e-06, + "loss": 0.0826, + "step": 6475 + }, + { + "epoch": 3.64, + "grad_norm": 2.0598039627075195, + "learning_rate": 9.397386934673369e-06, + "loss": 0.0815, + "step": 6500 + }, + { + "epoch": 3.65, + "grad_norm": 2.0134546756744385, + "learning_rate": 9.394874371859298e-06, + "loss": 0.0821, + "step": 6525 + }, + { + "epoch": 3.66, + "grad_norm": 2.011451244354248, + "learning_rate": 9.392361809045227e-06, + "loss": 0.0817, + "step": 6550 + }, + { + "epoch": 3.68, + "grad_norm": 2.0850038528442383, + "learning_rate": 9.389849246231157e-06, + "loss": 0.081, + "step": 6575 + }, + { + "epoch": 3.69, + "grad_norm": 2.222620964050293, + "learning_rate": 9.387336683417086e-06, + "loss": 0.0786, + "step": 6600 + }, + { + "epoch": 3.71, + "grad_norm": 2.0406014919281006, + "learning_rate": 9.384824120603015e-06, + "loss": 0.0829, + "step": 6625 + }, + { + "epoch": 3.72, + "grad_norm": 2.1892542839050293, + "learning_rate": 9.382311557788946e-06, + "loss": 0.0816, + "step": 6650 + }, + { + "epoch": 3.73, + "grad_norm": 1.9825727939605713, + "learning_rate": 9.379798994974874e-06, + "loss": 0.0809, + "step": 6675 + }, + { + "epoch": 3.75, + "grad_norm": 2.036494731903076, + "learning_rate": 9.377286432160805e-06, + "loss": 0.0821, + "step": 6700 + }, + { + "epoch": 3.76, + "grad_norm": 1.9626446962356567, + "learning_rate": 9.374773869346734e-06, + "loss": 0.0807, + "step": 6725 + }, + { + "epoch": 3.78, + "grad_norm": 1.7763574123382568, + "learning_rate": 9.372261306532664e-06, + "loss": 0.0832, + "step": 6750 + }, + { + "epoch": 3.79, + "grad_norm": 1.8493731021881104, + "learning_rate": 9.369748743718595e-06, + "loss": 0.0785, + "step": 6775 + }, + { + "epoch": 3.8, + "grad_norm": 1.9705915451049805, + "learning_rate": 9.367236180904524e-06, + "loss": 0.0834, + "step": 6800 + }, + { + "epoch": 3.82, + "grad_norm": 2.231788396835327, + "learning_rate": 9.364723618090453e-06, + "loss": 0.0809, + "step": 6825 + }, + { + "epoch": 3.83, + "grad_norm": 2.028977870941162, + "learning_rate": 9.362211055276383e-06, + "loss": 0.08, + "step": 6850 + }, + { + "epoch": 3.85, + "grad_norm": 1.9474574327468872, + "learning_rate": 9.359698492462312e-06, + "loss": 0.0791, + "step": 6875 + }, + { + "epoch": 3.86, + "grad_norm": 1.7085590362548828, + "learning_rate": 9.357185929648241e-06, + "loss": 0.0831, + "step": 6900 + }, + { + "epoch": 3.87, + "grad_norm": 2.645578145980835, + "learning_rate": 9.354673366834172e-06, + "loss": 0.0818, + "step": 6925 + }, + { + "epoch": 3.89, + "grad_norm": 1.732824683189392, + "learning_rate": 9.352160804020101e-06, + "loss": 0.0803, + "step": 6950 + }, + { + "epoch": 3.9, + "grad_norm": 2.6652276515960693, + "learning_rate": 9.34964824120603e-06, + "loss": 0.0818, + "step": 6975 + }, + { + "epoch": 3.91, + "grad_norm": 1.9401618242263794, + "learning_rate": 9.34713567839196e-06, + "loss": 0.0778, + "step": 7000 + }, + { + "epoch": 3.91, + "eval_loss": 0.08661001175642014, + "eval_runtime": 1126.6157, + "eval_samples_per_second": 1.269, + "eval_steps_per_second": 1.269, + "eval_wer": 31.952164392790973, + "step": 7000 + }, + { + "epoch": 3.93, + "grad_norm": 2.131165027618408, + "learning_rate": 9.34462311557789e-06, + "loss": 0.0809, + "step": 7025 + }, + { + "epoch": 3.94, + "grad_norm": 2.3842930793762207, + "learning_rate": 9.34211055276382e-06, + "loss": 0.0776, + "step": 7050 + }, + { + "epoch": 3.96, + "grad_norm": 1.8666373491287231, + "learning_rate": 9.33959798994975e-06, + "loss": 0.0819, + "step": 7075 + }, + { + "epoch": 3.97, + "grad_norm": 2.0492358207702637, + "learning_rate": 9.337085427135679e-06, + "loss": 0.0807, + "step": 7100 + }, + { + "epoch": 3.98, + "grad_norm": 1.978629469871521, + "learning_rate": 9.334572864321608e-06, + "loss": 0.0794, + "step": 7125 + }, + { + "epoch": 4.0, + "grad_norm": 1.7910689115524292, + "learning_rate": 9.332060301507538e-06, + "loss": 0.0808, + "step": 7150 + }, + { + "epoch": 4.01, + "grad_norm": 2.016683340072632, + "learning_rate": 9.329547738693469e-06, + "loss": 0.0651, + "step": 7175 + }, + { + "epoch": 4.03, + "grad_norm": 1.6141430139541626, + "learning_rate": 9.327035175879398e-06, + "loss": 0.0654, + "step": 7200 + }, + { + "epoch": 4.04, + "grad_norm": 1.8333170413970947, + "learning_rate": 9.324522613065327e-06, + "loss": 0.0652, + "step": 7225 + }, + { + "epoch": 4.05, + "grad_norm": 1.9165650606155396, + "learning_rate": 9.322010050251257e-06, + "loss": 0.0651, + "step": 7250 + }, + { + "epoch": 4.07, + "grad_norm": 1.7872289419174194, + "learning_rate": 9.319497487437186e-06, + "loss": 0.0669, + "step": 7275 + }, + { + "epoch": 4.08, + "grad_norm": 1.98605215549469, + "learning_rate": 9.316984924623115e-06, + "loss": 0.0643, + "step": 7300 + }, + { + "epoch": 4.1, + "grad_norm": 1.9935442209243774, + "learning_rate": 9.314472361809046e-06, + "loss": 0.0659, + "step": 7325 + }, + { + "epoch": 4.11, + "grad_norm": 2.6521196365356445, + "learning_rate": 9.311959798994976e-06, + "loss": 0.0692, + "step": 7350 + }, + { + "epoch": 4.12, + "grad_norm": 1.9968782663345337, + "learning_rate": 9.309447236180905e-06, + "loss": 0.0664, + "step": 7375 + }, + { + "epoch": 4.14, + "grad_norm": 2.339772939682007, + "learning_rate": 9.306934673366836e-06, + "loss": 0.0669, + "step": 7400 + }, + { + "epoch": 4.15, + "grad_norm": 1.9993939399719238, + "learning_rate": 9.304422110552764e-06, + "loss": 0.0684, + "step": 7425 + }, + { + "epoch": 4.17, + "grad_norm": 2.043445110321045, + "learning_rate": 9.301909547738695e-06, + "loss": 0.0682, + "step": 7450 + }, + { + "epoch": 4.18, + "grad_norm": 1.8886429071426392, + "learning_rate": 9.299396984924624e-06, + "loss": 0.0645, + "step": 7475 + }, + { + "epoch": 4.19, + "grad_norm": 2.184920310974121, + "learning_rate": 9.296884422110553e-06, + "loss": 0.0636, + "step": 7500 + }, + { + "epoch": 4.21, + "grad_norm": 1.965453863143921, + "learning_rate": 9.294371859296483e-06, + "loss": 0.0642, + "step": 7525 + }, + { + "epoch": 4.22, + "grad_norm": 1.9355676174163818, + "learning_rate": 9.291859296482412e-06, + "loss": 0.0642, + "step": 7550 + }, + { + "epoch": 4.24, + "grad_norm": 1.7883771657943726, + "learning_rate": 9.289346733668343e-06, + "loss": 0.0644, + "step": 7575 + }, + { + "epoch": 4.25, + "grad_norm": 1.9759441614151, + "learning_rate": 9.286834170854272e-06, + "loss": 0.0658, + "step": 7600 + }, + { + "epoch": 4.26, + "grad_norm": 1.9749606847763062, + "learning_rate": 9.284321608040202e-06, + "loss": 0.067, + "step": 7625 + }, + { + "epoch": 4.28, + "grad_norm": 2.1564793586730957, + "learning_rate": 9.281809045226131e-06, + "loss": 0.065, + "step": 7650 + }, + { + "epoch": 4.29, + "grad_norm": 2.174618721008301, + "learning_rate": 9.279296482412062e-06, + "loss": 0.067, + "step": 7675 + }, + { + "epoch": 4.31, + "grad_norm": 2.0062124729156494, + "learning_rate": 9.27678391959799e-06, + "loss": 0.0659, + "step": 7700 + }, + { + "epoch": 4.32, + "grad_norm": 2.0640249252319336, + "learning_rate": 9.27427135678392e-06, + "loss": 0.0649, + "step": 7725 + }, + { + "epoch": 4.33, + "grad_norm": 2.3740134239196777, + "learning_rate": 9.27175879396985e-06, + "loss": 0.0655, + "step": 7750 + }, + { + "epoch": 4.35, + "grad_norm": 2.027810573577881, + "learning_rate": 9.26924623115578e-06, + "loss": 0.0655, + "step": 7775 + }, + { + "epoch": 4.36, + "grad_norm": 1.911761999130249, + "learning_rate": 9.26673366834171e-06, + "loss": 0.0662, + "step": 7800 + }, + { + "epoch": 4.38, + "grad_norm": 1.8511099815368652, + "learning_rate": 9.264221105527638e-06, + "loss": 0.0642, + "step": 7825 + }, + { + "epoch": 4.39, + "grad_norm": 1.992234230041504, + "learning_rate": 9.261708542713569e-06, + "loss": 0.0661, + "step": 7850 + }, + { + "epoch": 4.4, + "grad_norm": 2.1079294681549072, + "learning_rate": 9.259195979899498e-06, + "loss": 0.0647, + "step": 7875 + }, + { + "epoch": 4.42, + "grad_norm": 2.1714980602264404, + "learning_rate": 9.256683417085428e-06, + "loss": 0.0675, + "step": 7900 + }, + { + "epoch": 4.43, + "grad_norm": 2.2146270275115967, + "learning_rate": 9.254170854271357e-06, + "loss": 0.0629, + "step": 7925 + }, + { + "epoch": 4.45, + "grad_norm": 1.544264316558838, + "learning_rate": 9.251658291457288e-06, + "loss": 0.0689, + "step": 7950 + }, + { + "epoch": 4.46, + "grad_norm": 2.0538835525512695, + "learning_rate": 9.249145728643217e-06, + "loss": 0.0645, + "step": 7975 + }, + { + "epoch": 4.47, + "grad_norm": 1.9014445543289185, + "learning_rate": 9.246633165829147e-06, + "loss": 0.0634, + "step": 8000 + }, + { + "epoch": 4.47, + "eval_loss": 0.0857253149151802, + "eval_runtime": 1127.359, + "eval_samples_per_second": 1.268, + "eval_steps_per_second": 1.268, + "eval_wer": 31.430015159171298, + "step": 8000 + }, + { + "epoch": 4.49, + "grad_norm": 2.2251081466674805, + "learning_rate": 9.244120603015076e-06, + "loss": 0.0651, + "step": 8025 + }, + { + "epoch": 4.5, + "grad_norm": 2.01047420501709, + "learning_rate": 9.241608040201005e-06, + "loss": 0.0644, + "step": 8050 + }, + { + "epoch": 4.52, + "grad_norm": 1.9254204034805298, + "learning_rate": 9.239095477386936e-06, + "loss": 0.0642, + "step": 8075 + }, + { + "epoch": 4.53, + "grad_norm": 1.9501245021820068, + "learning_rate": 9.236582914572864e-06, + "loss": 0.0659, + "step": 8100 + }, + { + "epoch": 4.54, + "grad_norm": 2.1747324466705322, + "learning_rate": 9.234070351758795e-06, + "loss": 0.0611, + "step": 8125 + }, + { + "epoch": 4.56, + "grad_norm": 2.0258383750915527, + "learning_rate": 9.231557788944724e-06, + "loss": 0.0619, + "step": 8150 + }, + { + "epoch": 4.57, + "grad_norm": 1.9337571859359741, + "learning_rate": 9.229045226130654e-06, + "loss": 0.0653, + "step": 8175 + }, + { + "epoch": 4.59, + "grad_norm": 2.270827054977417, + "learning_rate": 9.226532663316585e-06, + "loss": 0.0647, + "step": 8200 + }, + { + "epoch": 4.6, + "grad_norm": 2.0705583095550537, + "learning_rate": 9.224020100502514e-06, + "loss": 0.0655, + "step": 8225 + }, + { + "epoch": 4.61, + "grad_norm": 2.44061017036438, + "learning_rate": 9.221507537688443e-06, + "loss": 0.0627, + "step": 8250 + }, + { + "epoch": 4.63, + "grad_norm": 2.017228603363037, + "learning_rate": 9.218994974874373e-06, + "loss": 0.0643, + "step": 8275 + }, + { + "epoch": 4.64, + "grad_norm": 2.039165496826172, + "learning_rate": 9.216482412060302e-06, + "loss": 0.0639, + "step": 8300 + }, + { + "epoch": 4.66, + "grad_norm": 2.0717718601226807, + "learning_rate": 9.213969849246231e-06, + "loss": 0.064, + "step": 8325 + }, + { + "epoch": 4.67, + "grad_norm": 1.729555606842041, + "learning_rate": 9.211457286432162e-06, + "loss": 0.0632, + "step": 8350 + }, + { + "epoch": 4.68, + "grad_norm": 1.8726036548614502, + "learning_rate": 9.20894472361809e-06, + "loss": 0.0684, + "step": 8375 + }, + { + "epoch": 4.7, + "grad_norm": 1.8846001625061035, + "learning_rate": 9.206432160804021e-06, + "loss": 0.0636, + "step": 8400 + }, + { + "epoch": 4.71, + "grad_norm": 2.030989170074463, + "learning_rate": 9.20391959798995e-06, + "loss": 0.0644, + "step": 8425 + }, + { + "epoch": 4.73, + "grad_norm": 1.71998929977417, + "learning_rate": 9.20140703517588e-06, + "loss": 0.0638, + "step": 8450 + }, + { + "epoch": 4.74, + "grad_norm": 2.258084535598755, + "learning_rate": 9.19889447236181e-06, + "loss": 0.0634, + "step": 8475 + }, + { + "epoch": 4.75, + "grad_norm": 2.0711519718170166, + "learning_rate": 9.19638190954774e-06, + "loss": 0.0635, + "step": 8500 + }, + { + "epoch": 4.77, + "grad_norm": 2.7386820316314697, + "learning_rate": 9.19386934673367e-06, + "loss": 0.0641, + "step": 8525 + }, + { + "epoch": 4.78, + "grad_norm": 1.8931379318237305, + "learning_rate": 9.191356783919599e-06, + "loss": 0.0639, + "step": 8550 + }, + { + "epoch": 4.8, + "grad_norm": 1.967337727546692, + "learning_rate": 9.188844221105528e-06, + "loss": 0.0641, + "step": 8575 + }, + { + "epoch": 4.81, + "grad_norm": 2.3691563606262207, + "learning_rate": 9.186331658291459e-06, + "loss": 0.0634, + "step": 8600 + }, + { + "epoch": 4.82, + "grad_norm": 2.4058635234832764, + "learning_rate": 9.183819095477388e-06, + "loss": 0.0655, + "step": 8625 + }, + { + "epoch": 4.84, + "grad_norm": 1.8672115802764893, + "learning_rate": 9.181306532663317e-06, + "loss": 0.0643, + "step": 8650 + }, + { + "epoch": 4.85, + "grad_norm": 1.7965874671936035, + "learning_rate": 9.178793969849247e-06, + "loss": 0.0618, + "step": 8675 + }, + { + "epoch": 4.87, + "grad_norm": 2.775364637374878, + "learning_rate": 9.176281407035176e-06, + "loss": 0.0637, + "step": 8700 + }, + { + "epoch": 4.88, + "grad_norm": 1.9921495914459229, + "learning_rate": 9.173768844221105e-06, + "loss": 0.0649, + "step": 8725 + }, + { + "epoch": 4.89, + "grad_norm": 2.3203635215759277, + "learning_rate": 9.171256281407036e-06, + "loss": 0.0642, + "step": 8750 + }, + { + "epoch": 4.91, + "grad_norm": 2.132582426071167, + "learning_rate": 9.168743718592966e-06, + "loss": 0.0636, + "step": 8775 + }, + { + "epoch": 4.92, + "grad_norm": 2.331496477127075, + "learning_rate": 9.166231155778895e-06, + "loss": 0.0644, + "step": 8800 + }, + { + "epoch": 4.94, + "grad_norm": 2.003134250640869, + "learning_rate": 9.163718592964826e-06, + "loss": 0.0668, + "step": 8825 + }, + { + "epoch": 4.95, + "grad_norm": 2.0844907760620117, + "learning_rate": 9.161306532663318e-06, + "loss": 0.0585, + "step": 8850 + }, + { + "epoch": 4.96, + "grad_norm": 2.182340383529663, + "learning_rate": 9.158793969849247e-06, + "loss": 0.0624, + "step": 8875 + }, + { + "epoch": 4.98, + "grad_norm": 2.1356654167175293, + "learning_rate": 9.156281407035176e-06, + "loss": 0.0633, + "step": 8900 + }, + { + "epoch": 4.99, + "grad_norm": 2.294193744659424, + "learning_rate": 9.153768844221106e-06, + "loss": 0.0661, + "step": 8925 + }, + { + "epoch": 5.01, + "grad_norm": 1.8841824531555176, + "learning_rate": 9.151256281407037e-06, + "loss": 0.059, + "step": 8950 + }, + { + "epoch": 5.02, + "grad_norm": 1.822686791419983, + "learning_rate": 9.148743718592964e-06, + "loss": 0.0483, + "step": 8975 + }, + { + "epoch": 5.03, + "grad_norm": 2.030924081802368, + "learning_rate": 9.146231155778895e-06, + "loss": 0.0505, + "step": 9000 + }, + { + "epoch": 5.03, + "eval_loss": 0.09121539443731308, + "eval_runtime": 1123.9956, + "eval_samples_per_second": 1.272, + "eval_steps_per_second": 1.272, + "eval_wer": 32.0448037729493, + "step": 9000 + }, + { + "epoch": 5.05, + "grad_norm": 2.1822900772094727, + "learning_rate": 9.143718592964825e-06, + "loss": 0.0509, + "step": 9025 + }, + { + "epoch": 5.06, + "grad_norm": 2.2777459621429443, + "learning_rate": 9.141206030150754e-06, + "loss": 0.049, + "step": 9050 + }, + { + "epoch": 5.08, + "grad_norm": 2.088038444519043, + "learning_rate": 9.138693467336685e-06, + "loss": 0.051, + "step": 9075 + }, + { + "epoch": 5.09, + "grad_norm": 2.0333406925201416, + "learning_rate": 9.136180904522614e-06, + "loss": 0.0492, + "step": 9100 + }, + { + "epoch": 5.1, + "grad_norm": 2.1352896690368652, + "learning_rate": 9.133668341708544e-06, + "loss": 0.0473, + "step": 9125 + }, + { + "epoch": 5.12, + "grad_norm": 2.0653650760650635, + "learning_rate": 9.131155778894473e-06, + "loss": 0.0489, + "step": 9150 + }, + { + "epoch": 5.13, + "grad_norm": 2.1148762702941895, + "learning_rate": 9.128643216080402e-06, + "loss": 0.0501, + "step": 9175 + }, + { + "epoch": 5.15, + "grad_norm": 1.9316242933273315, + "learning_rate": 9.126130653266332e-06, + "loss": 0.0499, + "step": 9200 + }, + { + "epoch": 5.16, + "grad_norm": 2.023138999938965, + "learning_rate": 9.123618090452263e-06, + "loss": 0.0484, + "step": 9225 + }, + { + "epoch": 5.17, + "grad_norm": 1.7025102376937866, + "learning_rate": 9.121105527638192e-06, + "loss": 0.0511, + "step": 9250 + }, + { + "epoch": 5.19, + "grad_norm": 1.8935761451721191, + "learning_rate": 9.118592964824121e-06, + "loss": 0.051, + "step": 9275 + }, + { + "epoch": 5.2, + "grad_norm": 2.1580758094787598, + "learning_rate": 9.11608040201005e-06, + "loss": 0.0489, + "step": 9300 + }, + { + "epoch": 5.22, + "grad_norm": 2.0983216762542725, + "learning_rate": 9.11356783919598e-06, + "loss": 0.0484, + "step": 9325 + }, + { + "epoch": 5.23, + "grad_norm": 2.1321070194244385, + "learning_rate": 9.111055276381911e-06, + "loss": 0.0505, + "step": 9350 + }, + { + "epoch": 5.24, + "grad_norm": 1.9594415426254272, + "learning_rate": 9.10854271356784e-06, + "loss": 0.0495, + "step": 9375 + }, + { + "epoch": 5.26, + "grad_norm": 1.8090366125106812, + "learning_rate": 9.10603015075377e-06, + "loss": 0.0486, + "step": 9400 + }, + { + "epoch": 5.27, + "grad_norm": 1.7660349607467651, + "learning_rate": 9.1035175879397e-06, + "loss": 0.0511, + "step": 9425 + }, + { + "epoch": 5.29, + "grad_norm": 2.162297487258911, + "learning_rate": 9.101005025125628e-06, + "loss": 0.0496, + "step": 9450 + }, + { + "epoch": 5.3, + "grad_norm": 2.38175106048584, + "learning_rate": 9.09849246231156e-06, + "loss": 0.0532, + "step": 9475 + }, + { + "epoch": 5.31, + "grad_norm": 2.036396026611328, + "learning_rate": 9.095979899497489e-06, + "loss": 0.0505, + "step": 9500 + }, + { + "epoch": 5.33, + "grad_norm": 2.5879578590393066, + "learning_rate": 9.093467336683418e-06, + "loss": 0.0502, + "step": 9525 + }, + { + "epoch": 5.34, + "grad_norm": 1.927018404006958, + "learning_rate": 9.090954773869347e-06, + "loss": 0.0504, + "step": 9550 + }, + { + "epoch": 5.36, + "grad_norm": 1.7385034561157227, + "learning_rate": 9.088442211055277e-06, + "loss": 0.0509, + "step": 9575 + }, + { + "epoch": 5.37, + "grad_norm": 2.1973049640655518, + "learning_rate": 9.085929648241206e-06, + "loss": 0.051, + "step": 9600 + }, + { + "epoch": 5.38, + "grad_norm": 2.07732892036438, + "learning_rate": 9.083417085427137e-06, + "loss": 0.0491, + "step": 9625 + }, + { + "epoch": 5.4, + "grad_norm": 1.86457097530365, + "learning_rate": 9.080904522613066e-06, + "loss": 0.0489, + "step": 9650 + }, + { + "epoch": 5.41, + "grad_norm": 2.1250147819519043, + "learning_rate": 9.078391959798996e-06, + "loss": 0.0522, + "step": 9675 + }, + { + "epoch": 5.43, + "grad_norm": 2.152125597000122, + "learning_rate": 9.075879396984927e-06, + "loss": 0.0495, + "step": 9700 + }, + { + "epoch": 5.44, + "grad_norm": 2.210969924926758, + "learning_rate": 9.073366834170854e-06, + "loss": 0.0521, + "step": 9725 + }, + { + "epoch": 5.45, + "grad_norm": 2.3422908782958984, + "learning_rate": 9.070854271356785e-06, + "loss": 0.0526, + "step": 9750 + }, + { + "epoch": 5.47, + "grad_norm": 2.218177080154419, + "learning_rate": 9.068341708542715e-06, + "loss": 0.0496, + "step": 9775 + }, + { + "epoch": 5.48, + "grad_norm": 2.2847001552581787, + "learning_rate": 9.065829145728644e-06, + "loss": 0.0509, + "step": 9800 + }, + { + "epoch": 5.49, + "grad_norm": 1.9726907014846802, + "learning_rate": 9.063316582914573e-06, + "loss": 0.0474, + "step": 9825 + }, + { + "epoch": 5.51, + "grad_norm": 1.885382890701294, + "learning_rate": 9.060804020100502e-06, + "loss": 0.051, + "step": 9850 + }, + { + "epoch": 5.52, + "grad_norm": 1.929391860961914, + "learning_rate": 9.058291457286433e-06, + "loss": 0.0508, + "step": 9875 + }, + { + "epoch": 5.54, + "grad_norm": 1.971108078956604, + "learning_rate": 9.055778894472363e-06, + "loss": 0.0496, + "step": 9900 + }, + { + "epoch": 5.55, + "grad_norm": 2.0183298587799072, + "learning_rate": 9.053266331658292e-06, + "loss": 0.0486, + "step": 9925 + }, + { + "epoch": 5.56, + "grad_norm": 1.991962194442749, + "learning_rate": 9.050753768844221e-06, + "loss": 0.0481, + "step": 9950 + }, + { + "epoch": 5.58, + "grad_norm": 1.6029181480407715, + "learning_rate": 9.048241206030152e-06, + "loss": 0.0497, + "step": 9975 + }, + { + "epoch": 5.59, + "grad_norm": 2.157927989959717, + "learning_rate": 9.04572864321608e-06, + "loss": 0.0493, + "step": 10000 + }, + { + "epoch": 5.59, + "eval_loss": 0.09408088028430939, + "eval_runtime": 1112.5567, + "eval_samples_per_second": 1.285, + "eval_steps_per_second": 1.285, + "eval_wer": 31.99427320195385, + "step": 10000 + }, + { + "epoch": 5.61, + "grad_norm": 1.791551113128662, + "learning_rate": 9.043216080402011e-06, + "loss": 0.0499, + "step": 10025 + }, + { + "epoch": 5.62, + "grad_norm": 2.0241196155548096, + "learning_rate": 9.04070351758794e-06, + "loss": 0.0497, + "step": 10050 + }, + { + "epoch": 5.63, + "grad_norm": 1.9032353162765503, + "learning_rate": 9.03819095477387e-06, + "loss": 0.0483, + "step": 10075 + }, + { + "epoch": 5.65, + "grad_norm": 1.8380191326141357, + "learning_rate": 9.0356783919598e-06, + "loss": 0.0504, + "step": 10100 + }, + { + "epoch": 5.66, + "grad_norm": 1.9578734636306763, + "learning_rate": 9.033165829145728e-06, + "loss": 0.0498, + "step": 10125 + }, + { + "epoch": 5.68, + "grad_norm": 1.9855990409851074, + "learning_rate": 9.03065326633166e-06, + "loss": 0.0489, + "step": 10150 + }, + { + "epoch": 5.69, + "grad_norm": 1.9638714790344238, + "learning_rate": 9.028140703517589e-06, + "loss": 0.0499, + "step": 10175 + }, + { + "epoch": 5.7, + "grad_norm": 2.2299182415008545, + "learning_rate": 9.025628140703518e-06, + "loss": 0.05, + "step": 10200 + }, + { + "epoch": 5.72, + "grad_norm": 2.2847042083740234, + "learning_rate": 9.023115577889447e-06, + "loss": 0.0492, + "step": 10225 + }, + { + "epoch": 5.73, + "grad_norm": 2.1564724445343018, + "learning_rate": 9.020603015075378e-06, + "loss": 0.0497, + "step": 10250 + }, + { + "epoch": 5.75, + "grad_norm": 2.049359083175659, + "learning_rate": 9.018090452261308e-06, + "loss": 0.0495, + "step": 10275 + }, + { + "epoch": 5.76, + "grad_norm": 2.1565001010894775, + "learning_rate": 9.015577889447237e-06, + "loss": 0.0498, + "step": 10300 + }, + { + "epoch": 5.77, + "grad_norm": 2.1797873973846436, + "learning_rate": 9.013065326633166e-06, + "loss": 0.0489, + "step": 10325 + }, + { + "epoch": 5.79, + "grad_norm": 1.9211273193359375, + "learning_rate": 9.010552763819096e-06, + "loss": 0.0465, + "step": 10350 + }, + { + "epoch": 5.8, + "grad_norm": 2.1134414672851562, + "learning_rate": 9.008040201005027e-06, + "loss": 0.0492, + "step": 10375 + }, + { + "epoch": 5.82, + "grad_norm": 2.213566780090332, + "learning_rate": 9.005527638190954e-06, + "loss": 0.0494, + "step": 10400 + }, + { + "epoch": 5.83, + "grad_norm": 2.2658944129943848, + "learning_rate": 9.003015075376885e-06, + "loss": 0.0513, + "step": 10425 + }, + { + "epoch": 5.84, + "grad_norm": 2.15393328666687, + "learning_rate": 9.000502512562815e-06, + "loss": 0.0506, + "step": 10450 + }, + { + "epoch": 5.86, + "grad_norm": 2.0125491619110107, + "learning_rate": 8.997989949748744e-06, + "loss": 0.0504, + "step": 10475 + }, + { + "epoch": 5.87, + "grad_norm": 2.021787166595459, + "learning_rate": 8.995477386934675e-06, + "loss": 0.05, + "step": 10500 + }, + { + "epoch": 5.89, + "grad_norm": 1.8181731700897217, + "learning_rate": 8.992964824120604e-06, + "loss": 0.0513, + "step": 10525 + }, + { + "epoch": 5.9, + "grad_norm": 2.1621928215026855, + "learning_rate": 8.990452261306534e-06, + "loss": 0.0529, + "step": 10550 + }, + { + "epoch": 5.91, + "grad_norm": 1.633863091468811, + "learning_rate": 8.987939698492463e-06, + "loss": 0.0505, + "step": 10575 + }, + { + "epoch": 5.93, + "grad_norm": 2.091038465499878, + "learning_rate": 8.985427135678392e-06, + "loss": 0.0471, + "step": 10600 + }, + { + "epoch": 5.94, + "grad_norm": 2.222620725631714, + "learning_rate": 8.982914572864322e-06, + "loss": 0.0508, + "step": 10625 + }, + { + "epoch": 5.96, + "grad_norm": 2.4221885204315186, + "learning_rate": 8.980402010050253e-06, + "loss": 0.0504, + "step": 10650 + }, + { + "epoch": 5.97, + "grad_norm": 2.513897657394409, + "learning_rate": 8.977889447236182e-06, + "loss": 0.0478, + "step": 10675 + }, + { + "epoch": 5.98, + "grad_norm": 2.311898946762085, + "learning_rate": 8.975376884422111e-06, + "loss": 0.0485, + "step": 10700 + }, + { + "epoch": 6.0, + "grad_norm": 1.8122689723968506, + "learning_rate": 8.97286432160804e-06, + "loss": 0.0478, + "step": 10725 + }, + { + "epoch": 6.01, + "grad_norm": 2.0776751041412354, + "learning_rate": 8.97035175879397e-06, + "loss": 0.0371, + "step": 10750 + }, + { + "epoch": 6.03, + "grad_norm": 1.674000859260559, + "learning_rate": 8.967839195979901e-06, + "loss": 0.0351, + "step": 10775 + }, + { + "epoch": 6.04, + "grad_norm": 1.9208186864852905, + "learning_rate": 8.96532663316583e-06, + "loss": 0.0348, + "step": 10800 + }, + { + "epoch": 6.05, + "grad_norm": 1.7870476245880127, + "learning_rate": 8.96281407035176e-06, + "loss": 0.0374, + "step": 10825 + }, + { + "epoch": 6.07, + "grad_norm": 1.9227241277694702, + "learning_rate": 8.960301507537689e-06, + "loss": 0.0367, + "step": 10850 + }, + { + "epoch": 6.08, + "grad_norm": 1.810810923576355, + "learning_rate": 8.957788944723618e-06, + "loss": 0.0342, + "step": 10875 + }, + { + "epoch": 6.1, + "grad_norm": 2.014636754989624, + "learning_rate": 8.95527638190955e-06, + "loss": 0.0358, + "step": 10900 + }, + { + "epoch": 6.11, + "grad_norm": 1.6799869537353516, + "learning_rate": 8.952763819095479e-06, + "loss": 0.036, + "step": 10925 + }, + { + "epoch": 6.12, + "grad_norm": 2.291813850402832, + "learning_rate": 8.950251256281408e-06, + "loss": 0.0368, + "step": 10950 + }, + { + "epoch": 6.14, + "grad_norm": 2.054922103881836, + "learning_rate": 8.947738693467337e-06, + "loss": 0.0372, + "step": 10975 + }, + { + "epoch": 6.15, + "grad_norm": 1.837943434715271, + "learning_rate": 8.945226130653267e-06, + "loss": 0.0355, + "step": 11000 + }, + { + "epoch": 6.15, + "eval_loss": 0.10295047610998154, + "eval_runtime": 1363.8075, + "eval_samples_per_second": 1.049, + "eval_steps_per_second": 1.049, + "eval_wer": 32.86171467070911, + "step": 11000 + }, + { + "epoch": 6.17, + "grad_norm": 2.1512575149536133, + "learning_rate": 8.942713567839196e-06, + "loss": 0.0383, + "step": 11025 + }, + { + "epoch": 6.18, + "grad_norm": 1.9821395874023438, + "learning_rate": 8.940201005025127e-06, + "loss": 0.0362, + "step": 11050 + }, + { + "epoch": 6.19, + "grad_norm": 2.0132622718811035, + "learning_rate": 8.937688442211056e-06, + "loss": 0.0366, + "step": 11075 + }, + { + "epoch": 6.21, + "grad_norm": 1.6676757335662842, + "learning_rate": 8.935175879396986e-06, + "loss": 0.0365, + "step": 11100 + }, + { + "epoch": 6.22, + "grad_norm": 1.8551216125488281, + "learning_rate": 8.932663316582915e-06, + "loss": 0.0358, + "step": 11125 + }, + { + "epoch": 6.24, + "grad_norm": 2.1621651649475098, + "learning_rate": 8.930150753768844e-06, + "loss": 0.0377, + "step": 11150 + }, + { + "epoch": 6.25, + "grad_norm": 1.8369964361190796, + "learning_rate": 8.927638190954775e-06, + "loss": 0.0382, + "step": 11175 + }, + { + "epoch": 6.26, + "grad_norm": 2.036353588104248, + "learning_rate": 8.925125628140705e-06, + "loss": 0.0365, + "step": 11200 + }, + { + "epoch": 6.28, + "grad_norm": 1.6832373142242432, + "learning_rate": 8.922613065326634e-06, + "loss": 0.0353, + "step": 11225 + }, + { + "epoch": 6.29, + "grad_norm": 2.129995822906494, + "learning_rate": 8.920100502512563e-06, + "loss": 0.0386, + "step": 11250 + }, + { + "epoch": 6.31, + "grad_norm": 2.2060372829437256, + "learning_rate": 8.917587939698493e-06, + "loss": 0.0361, + "step": 11275 + }, + { + "epoch": 6.32, + "grad_norm": 1.7014271020889282, + "learning_rate": 8.915075376884424e-06, + "loss": 0.0364, + "step": 11300 + }, + { + "epoch": 6.33, + "grad_norm": 2.2171339988708496, + "learning_rate": 8.912562814070353e-06, + "loss": 0.0376, + "step": 11325 + }, + { + "epoch": 6.35, + "grad_norm": 1.716725468635559, + "learning_rate": 8.910050251256282e-06, + "loss": 0.0364, + "step": 11350 + }, + { + "epoch": 6.36, + "grad_norm": 1.7247710227966309, + "learning_rate": 8.907537688442212e-06, + "loss": 0.0363, + "step": 11375 + }, + { + "epoch": 6.38, + "grad_norm": 2.0066280364990234, + "learning_rate": 8.905025125628143e-06, + "loss": 0.037, + "step": 11400 + }, + { + "epoch": 6.39, + "grad_norm": 2.020876407623291, + "learning_rate": 8.90251256281407e-06, + "loss": 0.0372, + "step": 11425 + }, + { + "epoch": 6.4, + "grad_norm": 1.8083990812301636, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0378, + "step": 11450 + }, + { + "epoch": 6.42, + "grad_norm": 1.7945133447647095, + "learning_rate": 8.89748743718593e-06, + "loss": 0.0354, + "step": 11475 + }, + { + "epoch": 6.43, + "grad_norm": 2.2276597023010254, + "learning_rate": 8.89497487437186e-06, + "loss": 0.037, + "step": 11500 + }, + { + "epoch": 6.45, + "grad_norm": 2.187774181365967, + "learning_rate": 8.892462311557791e-06, + "loss": 0.038, + "step": 11525 + }, + { + "epoch": 6.46, + "grad_norm": 2.2305829524993896, + "learning_rate": 8.889949748743718e-06, + "loss": 0.0393, + "step": 11550 + }, + { + "epoch": 6.47, + "grad_norm": 2.2807681560516357, + "learning_rate": 8.88743718592965e-06, + "loss": 0.0359, + "step": 11575 + }, + { + "epoch": 6.49, + "grad_norm": 1.8010451793670654, + "learning_rate": 8.884924623115579e-06, + "loss": 0.0367, + "step": 11600 + }, + { + "epoch": 6.5, + "grad_norm": 2.455719232559204, + "learning_rate": 8.882412060301508e-06, + "loss": 0.0374, + "step": 11625 + }, + { + "epoch": 6.52, + "grad_norm": 2.0416178703308105, + "learning_rate": 8.879899497487437e-06, + "loss": 0.0378, + "step": 11650 + }, + { + "epoch": 6.53, + "grad_norm": 2.0058817863464355, + "learning_rate": 8.877386934673368e-06, + "loss": 0.0364, + "step": 11675 + }, + { + "epoch": 6.54, + "grad_norm": 2.2373459339141846, + "learning_rate": 8.874874371859296e-06, + "loss": 0.0359, + "step": 11700 + }, + { + "epoch": 6.56, + "grad_norm": 1.8764866590499878, + "learning_rate": 8.872361809045227e-06, + "loss": 0.0379, + "step": 11725 + }, + { + "epoch": 6.57, + "grad_norm": 1.7421679496765137, + "learning_rate": 8.869849246231156e-06, + "loss": 0.037, + "step": 11750 + }, + { + "epoch": 6.59, + "grad_norm": 2.182452440261841, + "learning_rate": 8.867336683417086e-06, + "loss": 0.0377, + "step": 11775 + }, + { + "epoch": 6.6, + "grad_norm": 1.945114254951477, + "learning_rate": 8.864824120603017e-06, + "loss": 0.0375, + "step": 11800 + }, + { + "epoch": 6.61, + "grad_norm": 2.1583731174468994, + "learning_rate": 8.862311557788944e-06, + "loss": 0.036, + "step": 11825 + }, + { + "epoch": 6.63, + "grad_norm": 1.9917817115783691, + "learning_rate": 8.859798994974875e-06, + "loss": 0.0372, + "step": 11850 + }, + { + "epoch": 6.64, + "grad_norm": 2.047527313232422, + "learning_rate": 8.857286432160805e-06, + "loss": 0.0376, + "step": 11875 + }, + { + "epoch": 6.66, + "grad_norm": 2.1988823413848877, + "learning_rate": 8.854773869346734e-06, + "loss": 0.0382, + "step": 11900 + }, + { + "epoch": 6.67, + "grad_norm": 2.0210208892822266, + "learning_rate": 8.852261306532665e-06, + "loss": 0.0358, + "step": 11925 + }, + { + "epoch": 6.68, + "grad_norm": 1.884233832359314, + "learning_rate": 8.849748743718594e-06, + "loss": 0.0357, + "step": 11950 + }, + { + "epoch": 6.7, + "grad_norm": 2.1466429233551025, + "learning_rate": 8.847236180904524e-06, + "loss": 0.0373, + "step": 11975 + }, + { + "epoch": 6.71, + "grad_norm": 1.7139405012130737, + "learning_rate": 8.844723618090453e-06, + "loss": 0.0368, + "step": 12000 + }, + { + "epoch": 6.71, + "eval_loss": 0.10729934275150299, + "eval_runtime": 1125.1233, + "eval_samples_per_second": 1.271, + "eval_steps_per_second": 1.271, + "eval_wer": 33.451237998989384, + "step": 12000 + }, + { + "epoch": 6.73, + "grad_norm": 2.092406749725342, + "learning_rate": 8.842211055276382e-06, + "loss": 0.0372, + "step": 12025 + }, + { + "epoch": 6.74, + "grad_norm": 2.298687696456909, + "learning_rate": 8.839698492462312e-06, + "loss": 0.0389, + "step": 12050 + }, + { + "epoch": 6.75, + "grad_norm": 2.1710333824157715, + "learning_rate": 8.837185929648243e-06, + "loss": 0.0371, + "step": 12075 + }, + { + "epoch": 6.77, + "grad_norm": 1.8482507467269897, + "learning_rate": 8.83467336683417e-06, + "loss": 0.0386, + "step": 12100 + }, + { + "epoch": 6.78, + "grad_norm": 2.053034543991089, + "learning_rate": 8.832160804020101e-06, + "loss": 0.0381, + "step": 12125 + }, + { + "epoch": 6.8, + "grad_norm": 2.142703056335449, + "learning_rate": 8.82964824120603e-06, + "loss": 0.0366, + "step": 12150 + }, + { + "epoch": 6.81, + "grad_norm": 1.896445631980896, + "learning_rate": 8.82713567839196e-06, + "loss": 0.0371, + "step": 12175 + }, + { + "epoch": 6.82, + "grad_norm": 1.9503456354141235, + "learning_rate": 8.824623115577891e-06, + "loss": 0.0376, + "step": 12200 + }, + { + "epoch": 6.84, + "grad_norm": 2.107482671737671, + "learning_rate": 8.82211055276382e-06, + "loss": 0.0382, + "step": 12225 + }, + { + "epoch": 6.85, + "grad_norm": 2.262901782989502, + "learning_rate": 8.81959798994975e-06, + "loss": 0.039, + "step": 12250 + }, + { + "epoch": 6.87, + "grad_norm": 2.264549970626831, + "learning_rate": 8.817085427135679e-06, + "loss": 0.0369, + "step": 12275 + }, + { + "epoch": 6.88, + "grad_norm": 2.191319227218628, + "learning_rate": 8.814572864321608e-06, + "loss": 0.0362, + "step": 12300 + }, + { + "epoch": 6.89, + "grad_norm": 2.148512125015259, + "learning_rate": 8.812060301507538e-06, + "loss": 0.0366, + "step": 12325 + }, + { + "epoch": 6.91, + "grad_norm": 1.8618416786193848, + "learning_rate": 8.809547738693469e-06, + "loss": 0.0352, + "step": 12350 + }, + { + "epoch": 6.92, + "grad_norm": 1.870177149772644, + "learning_rate": 8.807035175879398e-06, + "loss": 0.0366, + "step": 12375 + }, + { + "epoch": 6.94, + "grad_norm": 2.1114089488983154, + "learning_rate": 8.804522613065327e-06, + "loss": 0.038, + "step": 12400 + }, + { + "epoch": 6.95, + "grad_norm": NaN, + "learning_rate": 8.802110552763819e-06, + "loss": 0.0377, + "step": 12425 + }, + { + "epoch": 6.96, + "grad_norm": 1.613968849182129, + "learning_rate": 8.79959798994975e-06, + "loss": 0.036, + "step": 12450 + }, + { + "epoch": 6.98, + "grad_norm": 2.0975472927093506, + "learning_rate": 8.79708542713568e-06, + "loss": 0.036, + "step": 12475 + }, + { + "epoch": 6.99, + "grad_norm": 1.967268705368042, + "learning_rate": 8.794572864321609e-06, + "loss": 0.0375, + "step": 12500 + }, + { + "epoch": 7.01, + "grad_norm": 1.8776395320892334, + "learning_rate": 8.792060301507538e-06, + "loss": 0.0328, + "step": 12525 + }, + { + "epoch": 7.02, + "grad_norm": 1.5957058668136597, + "learning_rate": 8.789547738693467e-06, + "loss": 0.0251, + "step": 12550 + }, + { + "epoch": 7.03, + "grad_norm": 1.6244521141052246, + "learning_rate": 8.787035175879398e-06, + "loss": 0.0248, + "step": 12575 + }, + { + "epoch": 7.05, + "grad_norm": 2.0203723907470703, + "learning_rate": 8.784522613065328e-06, + "loss": 0.0258, + "step": 12600 + }, + { + "epoch": 7.06, + "grad_norm": 1.8605165481567383, + "learning_rate": 8.782010050251257e-06, + "loss": 0.0266, + "step": 12625 + }, + { + "epoch": 7.07, + "grad_norm": 2.012575387954712, + "learning_rate": 8.779497487437186e-06, + "loss": 0.0267, + "step": 12650 + }, + { + "epoch": 7.09, + "grad_norm": 1.9168787002563477, + "learning_rate": 8.776984924623117e-06, + "loss": 0.0239, + "step": 12675 + }, + { + "epoch": 7.1, + "grad_norm": 2.0559630393981934, + "learning_rate": 8.774472361809045e-06, + "loss": 0.0248, + "step": 12700 + }, + { + "epoch": 7.12, + "grad_norm": 1.7209094762802124, + "learning_rate": 8.771959798994976e-06, + "loss": 0.0256, + "step": 12725 + }, + { + "epoch": 7.13, + "grad_norm": 1.962271809577942, + "learning_rate": 8.769447236180905e-06, + "loss": 0.0272, + "step": 12750 + }, + { + "epoch": 7.14, + "grad_norm": 1.950584888458252, + "learning_rate": 8.766934673366834e-06, + "loss": 0.0256, + "step": 12775 + }, + { + "epoch": 7.16, + "grad_norm": 1.7162928581237793, + "learning_rate": 8.764422110552765e-06, + "loss": 0.0255, + "step": 12800 + }, + { + "epoch": 7.17, + "grad_norm": 1.9905593395233154, + "learning_rate": 8.761909547738693e-06, + "loss": 0.0262, + "step": 12825 + }, + { + "epoch": 7.19, + "grad_norm": 2.1522715091705322, + "learning_rate": 8.759396984924624e-06, + "loss": 0.0261, + "step": 12850 + }, + { + "epoch": 7.2, + "grad_norm": 2.026097297668457, + "learning_rate": 8.756884422110553e-06, + "loss": 0.0253, + "step": 12875 + }, + { + "epoch": 7.21, + "grad_norm": 1.768907904624939, + "learning_rate": 8.754371859296483e-06, + "loss": 0.0252, + "step": 12900 + }, + { + "epoch": 7.23, + "grad_norm": 1.9881784915924072, + "learning_rate": 8.751859296482412e-06, + "loss": 0.0251, + "step": 12925 + }, + { + "epoch": 7.24, + "grad_norm": 2.424466133117676, + "learning_rate": 8.749346733668343e-06, + "loss": 0.0256, + "step": 12950 + }, + { + "epoch": 7.26, + "grad_norm": 2.123943567276001, + "learning_rate": 8.746834170854272e-06, + "loss": 0.0255, + "step": 12975 + }, + { + "epoch": 7.27, + "grad_norm": 1.7357263565063477, + "learning_rate": 8.744321608040202e-06, + "loss": 0.0262, + "step": 13000 + }, + { + "epoch": 7.27, + "eval_loss": 0.11884187161922455, + "eval_runtime": 1116.1974, + "eval_samples_per_second": 1.281, + "eval_steps_per_second": 1.281, + "eval_wer": 33.2743810005053, + "step": 13000 + }, + { + "epoch": 7.28, + "grad_norm": 2.173398017883301, + "learning_rate": 8.741809045226131e-06, + "loss": 0.0269, + "step": 13025 + }, + { + "epoch": 7.3, + "grad_norm": 1.7893646955490112, + "learning_rate": 8.73929648241206e-06, + "loss": 0.0253, + "step": 13050 + }, + { + "epoch": 7.31, + "grad_norm": 1.9690555334091187, + "learning_rate": 8.736783919597991e-06, + "loss": 0.0262, + "step": 13075 + }, + { + "epoch": 7.33, + "grad_norm": 1.6328657865524292, + "learning_rate": 8.734271356783919e-06, + "loss": 0.025, + "step": 13100 + }, + { + "epoch": 7.34, + "grad_norm": 2.1064035892486572, + "learning_rate": 8.73175879396985e-06, + "loss": 0.027, + "step": 13125 + }, + { + "epoch": 7.35, + "grad_norm": 1.949397325515747, + "learning_rate": 8.72924623115578e-06, + "loss": 0.0271, + "step": 13150 + }, + { + "epoch": 7.37, + "grad_norm": 1.9304531812667847, + "learning_rate": 8.726733668341709e-06, + "loss": 0.0279, + "step": 13175 + }, + { + "epoch": 7.38, + "grad_norm": 1.9707785844802856, + "learning_rate": 8.72422110552764e-06, + "loss": 0.0247, + "step": 13200 + }, + { + "epoch": 7.4, + "grad_norm": 1.685416340827942, + "learning_rate": 8.721708542713569e-06, + "loss": 0.0265, + "step": 13225 + }, + { + "epoch": 7.41, + "grad_norm": 2.2971854209899902, + "learning_rate": 8.719195979899498e-06, + "loss": 0.0266, + "step": 13250 + }, + { + "epoch": 7.42, + "grad_norm": 1.9062155485153198, + "learning_rate": 8.716683417085428e-06, + "loss": 0.0265, + "step": 13275 + }, + { + "epoch": 7.44, + "grad_norm": 1.991284728050232, + "learning_rate": 8.714170854271357e-06, + "loss": 0.0261, + "step": 13300 + }, + { + "epoch": 7.45, + "grad_norm": 1.9703929424285889, + "learning_rate": 8.711658291457286e-06, + "loss": 0.0255, + "step": 13325 + }, + { + "epoch": 7.47, + "grad_norm": 1.8658965826034546, + "learning_rate": 8.709145728643217e-06, + "loss": 0.0257, + "step": 13350 + }, + { + "epoch": 7.48, + "grad_norm": 1.669040560722351, + "learning_rate": 8.706633165829147e-06, + "loss": 0.0274, + "step": 13375 + }, + { + "epoch": 7.49, + "grad_norm": 1.7893307209014893, + "learning_rate": 8.704120603015076e-06, + "loss": 0.0259, + "step": 13400 + }, + { + "epoch": 7.51, + "grad_norm": 1.9402819871902466, + "learning_rate": 8.701608040201005e-06, + "loss": 0.0275, + "step": 13425 + }, + { + "epoch": 7.52, + "grad_norm": 2.2205240726470947, + "learning_rate": 8.699095477386935e-06, + "loss": 0.0274, + "step": 13450 + }, + { + "epoch": 7.54, + "grad_norm": 1.8321993350982666, + "learning_rate": 8.696582914572866e-06, + "loss": 0.0273, + "step": 13475 + }, + { + "epoch": 7.55, + "grad_norm": 1.8441137075424194, + "learning_rate": 8.694070351758795e-06, + "loss": 0.0257, + "step": 13500 + }, + { + "epoch": 7.56, + "grad_norm": 2.2048592567443848, + "learning_rate": 8.691557788944724e-06, + "loss": 0.0274, + "step": 13525 + }, + { + "epoch": 7.58, + "grad_norm": 1.970394492149353, + "learning_rate": 8.689045226130654e-06, + "loss": 0.0257, + "step": 13550 + }, + { + "epoch": 7.59, + "grad_norm": 1.954429268836975, + "learning_rate": 8.686532663316583e-06, + "loss": 0.0264, + "step": 13575 + }, + { + "epoch": 7.61, + "grad_norm": 2.1119158267974854, + "learning_rate": 8.684020100502514e-06, + "loss": 0.0271, + "step": 13600 + }, + { + "epoch": 7.62, + "grad_norm": 1.9283645153045654, + "learning_rate": 8.681507537688443e-06, + "loss": 0.0273, + "step": 13625 + }, + { + "epoch": 7.63, + "grad_norm": 1.7391477823257446, + "learning_rate": 8.678994974874373e-06, + "loss": 0.0263, + "step": 13650 + }, + { + "epoch": 7.65, + "grad_norm": 2.0596108436584473, + "learning_rate": 8.676482412060302e-06, + "loss": 0.0278, + "step": 13675 + }, + { + "epoch": 7.66, + "grad_norm": 1.5383646488189697, + "learning_rate": 8.673969849246231e-06, + "loss": 0.0267, + "step": 13700 + }, + { + "epoch": 7.68, + "grad_norm": 2.116941452026367, + "learning_rate": 8.67145728643216e-06, + "loss": 0.028, + "step": 13725 + }, + { + "epoch": 7.69, + "grad_norm": 1.7421655654907227, + "learning_rate": 8.668944723618092e-06, + "loss": 0.0288, + "step": 13750 + }, + { + "epoch": 7.7, + "grad_norm": 1.6251894235610962, + "learning_rate": 8.666432160804021e-06, + "loss": 0.0277, + "step": 13775 + }, + { + "epoch": 7.72, + "grad_norm": 2.0601091384887695, + "learning_rate": 8.66391959798995e-06, + "loss": 0.0266, + "step": 13800 + }, + { + "epoch": 7.73, + "grad_norm": 1.7614444494247437, + "learning_rate": 8.661407035175881e-06, + "loss": 0.0277, + "step": 13825 + }, + { + "epoch": 7.75, + "grad_norm": 1.9296365976333618, + "learning_rate": 8.658894472361809e-06, + "loss": 0.026, + "step": 13850 + }, + { + "epoch": 7.76, + "grad_norm": 2.399764060974121, + "learning_rate": 8.65638190954774e-06, + "loss": 0.0268, + "step": 13875 + }, + { + "epoch": 7.77, + "grad_norm": 1.9063934087753296, + "learning_rate": 8.65386934673367e-06, + "loss": 0.0256, + "step": 13900 + }, + { + "epoch": 7.79, + "grad_norm": 1.9456448554992676, + "learning_rate": 8.651356783919599e-06, + "loss": 0.0274, + "step": 13925 + }, + { + "epoch": 7.8, + "grad_norm": 2.0229716300964355, + "learning_rate": 8.648844221105528e-06, + "loss": 0.0252, + "step": 13950 + }, + { + "epoch": 7.82, + "grad_norm": 2.07765793800354, + "learning_rate": 8.646331658291457e-06, + "loss": 0.0262, + "step": 13975 + }, + { + "epoch": 7.83, + "grad_norm": 2.0000078678131104, + "learning_rate": 8.643819095477388e-06, + "loss": 0.0274, + "step": 14000 + }, + { + "epoch": 7.83, + "eval_loss": 0.11842904984951019, + "eval_runtime": 1109.0006, + "eval_samples_per_second": 1.289, + "eval_steps_per_second": 1.289, + "eval_wer": 33.29964628600303, + "step": 14000 + }, + { + "epoch": 7.84, + "grad_norm": 2.079373598098755, + "learning_rate": 8.641306532663318e-06, + "loss": 0.0258, + "step": 14025 + }, + { + "epoch": 7.86, + "grad_norm": 1.9359194040298462, + "learning_rate": 8.638793969849247e-06, + "loss": 0.0273, + "step": 14050 + }, + { + "epoch": 7.87, + "grad_norm": 1.7530440092086792, + "learning_rate": 8.636281407035176e-06, + "loss": 0.0286, + "step": 14075 + }, + { + "epoch": 7.89, + "grad_norm": 2.0229170322418213, + "learning_rate": 8.633768844221107e-06, + "loss": 0.0279, + "step": 14100 + }, + { + "epoch": 7.9, + "grad_norm": 1.9933878183364868, + "learning_rate": 8.631256281407035e-06, + "loss": 0.0279, + "step": 14125 + }, + { + "epoch": 7.91, + "grad_norm": 2.288254737854004, + "learning_rate": 8.628743718592966e-06, + "loss": 0.0277, + "step": 14150 + }, + { + "epoch": 7.93, + "grad_norm": 1.5023448467254639, + "learning_rate": 8.626231155778895e-06, + "loss": 0.0254, + "step": 14175 + }, + { + "epoch": 7.94, + "grad_norm": 2.456772565841675, + "learning_rate": 8.623718592964825e-06, + "loss": 0.0271, + "step": 14200 + }, + { + "epoch": 7.96, + "grad_norm": 2.039050579071045, + "learning_rate": 8.621206030150756e-06, + "loss": 0.0271, + "step": 14225 + }, + { + "epoch": 7.97, + "grad_norm": 1.9126863479614258, + "learning_rate": 8.618693467336683e-06, + "loss": 0.0248, + "step": 14250 + }, + { + "epoch": 7.98, + "grad_norm": 2.2252838611602783, + "learning_rate": 8.616180904522614e-06, + "loss": 0.0277, + "step": 14275 + }, + { + "epoch": 8.0, + "grad_norm": 2.050999879837036, + "learning_rate": 8.613668341708544e-06, + "loss": 0.0272, + "step": 14300 + }, + { + "epoch": 8.01, + "grad_norm": 1.3068825006484985, + "learning_rate": 8.611155778894473e-06, + "loss": 0.0191, + "step": 14325 + }, + { + "epoch": 8.03, + "grad_norm": 1.9300107955932617, + "learning_rate": 8.608643216080402e-06, + "loss": 0.0171, + "step": 14350 + }, + { + "epoch": 8.04, + "grad_norm": 1.750525951385498, + "learning_rate": 8.606130653266333e-06, + "loss": 0.0168, + "step": 14375 + }, + { + "epoch": 8.05, + "grad_norm": 1.658878207206726, + "learning_rate": 8.60361809045226e-06, + "loss": 0.0174, + "step": 14400 + }, + { + "epoch": 8.07, + "grad_norm": 1.6386585235595703, + "learning_rate": 8.601105527638192e-06, + "loss": 0.0175, + "step": 14425 + }, + { + "epoch": 8.08, + "grad_norm": 1.9891026020050049, + "learning_rate": 8.598592964824121e-06, + "loss": 0.0179, + "step": 14450 + }, + { + "epoch": 8.1, + "grad_norm": 1.371500849723816, + "learning_rate": 8.59608040201005e-06, + "loss": 0.0175, + "step": 14475 + }, + { + "epoch": 8.11, + "grad_norm": 1.8828202486038208, + "learning_rate": 8.593567839195981e-06, + "loss": 0.0178, + "step": 14500 + }, + { + "epoch": 8.12, + "grad_norm": 1.7653783559799194, + "learning_rate": 8.591055276381909e-06, + "loss": 0.0176, + "step": 14525 + }, + { + "epoch": 8.14, + "grad_norm": 1.6308467388153076, + "learning_rate": 8.58854271356784e-06, + "loss": 0.0175, + "step": 14550 + }, + { + "epoch": 8.15, + "grad_norm": 1.5783458948135376, + "learning_rate": 8.58603015075377e-06, + "loss": 0.0167, + "step": 14575 + }, + { + "epoch": 8.17, + "grad_norm": 1.8351773023605347, + "learning_rate": 8.583517587939699e-06, + "loss": 0.0179, + "step": 14600 + }, + { + "epoch": 8.18, + "grad_norm": 1.5631933212280273, + "learning_rate": 8.58100502512563e-06, + "loss": 0.0176, + "step": 14625 + }, + { + "epoch": 8.19, + "grad_norm": 2.031529664993286, + "learning_rate": 8.578492462311559e-06, + "loss": 0.0183, + "step": 14650 + }, + { + "epoch": 8.21, + "grad_norm": 1.5908170938491821, + "learning_rate": 8.575979899497488e-06, + "loss": 0.0174, + "step": 14675 + }, + { + "epoch": 8.22, + "grad_norm": 2.0777623653411865, + "learning_rate": 8.573467336683418e-06, + "loss": 0.0174, + "step": 14700 + }, + { + "epoch": 8.24, + "grad_norm": 1.9531618356704712, + "learning_rate": 8.570954773869347e-06, + "loss": 0.017, + "step": 14725 + }, + { + "epoch": 8.25, + "grad_norm": 1.9662383794784546, + "learning_rate": 8.568442211055276e-06, + "loss": 0.0185, + "step": 14750 + }, + { + "epoch": 8.26, + "grad_norm": 1.8461543321609497, + "learning_rate": 8.565929648241207e-06, + "loss": 0.0187, + "step": 14775 + }, + { + "epoch": 8.28, + "grad_norm": 2.146507740020752, + "learning_rate": 8.563417085427135e-06, + "loss": 0.0186, + "step": 14800 + }, + { + "epoch": 8.29, + "grad_norm": 1.7800989151000977, + "learning_rate": 8.560904522613066e-06, + "loss": 0.0179, + "step": 14825 + }, + { + "epoch": 8.31, + "grad_norm": 1.4540683031082153, + "learning_rate": 8.558391959798995e-06, + "loss": 0.0184, + "step": 14850 + }, + { + "epoch": 8.32, + "grad_norm": 1.9251949787139893, + "learning_rate": 8.555879396984925e-06, + "loss": 0.0192, + "step": 14875 + }, + { + "epoch": 8.33, + "grad_norm": 2.200953960418701, + "learning_rate": 8.553366834170856e-06, + "loss": 0.0185, + "step": 14900 + }, + { + "epoch": 8.35, + "grad_norm": 1.9251872301101685, + "learning_rate": 8.550854271356785e-06, + "loss": 0.0176, + "step": 14925 + }, + { + "epoch": 8.36, + "grad_norm": 1.8213937282562256, + "learning_rate": 8.548341708542714e-06, + "loss": 0.0192, + "step": 14950 + }, + { + "epoch": 8.38, + "grad_norm": 2.124493360519409, + "learning_rate": 8.545829145728644e-06, + "loss": 0.0188, + "step": 14975 + }, + { + "epoch": 8.39, + "grad_norm": 1.6381850242614746, + "learning_rate": 8.543316582914573e-06, + "loss": 0.0188, + "step": 15000 + }, + { + "epoch": 8.39, + "eval_loss": 0.13210591673851013, + "eval_runtime": 1110.5601, + "eval_samples_per_second": 1.288, + "eval_steps_per_second": 1.288, + "eval_wer": 33.61125147380832, + "step": 15000 + }, + { + "epoch": 8.4, + "grad_norm": 2.0150794982910156, + "learning_rate": 8.540804020100502e-06, + "loss": 0.0184, + "step": 15025 + }, + { + "epoch": 8.42, + "grad_norm": 2.196256637573242, + "learning_rate": 8.538291457286433e-06, + "loss": 0.0192, + "step": 15050 + }, + { + "epoch": 8.43, + "grad_norm": 1.749823808670044, + "learning_rate": 8.535778894472363e-06, + "loss": 0.0185, + "step": 15075 + }, + { + "epoch": 8.45, + "grad_norm": 1.4908981323242188, + "learning_rate": 8.533266331658292e-06, + "loss": 0.0189, + "step": 15100 + }, + { + "epoch": 8.46, + "grad_norm": 1.8969824314117432, + "learning_rate": 8.530753768844221e-06, + "loss": 0.0185, + "step": 15125 + }, + { + "epoch": 8.47, + "grad_norm": 1.757111668586731, + "learning_rate": 8.52824120603015e-06, + "loss": 0.0183, + "step": 15150 + }, + { + "epoch": 8.49, + "grad_norm": 1.8340396881103516, + "learning_rate": 8.525728643216082e-06, + "loss": 0.019, + "step": 15175 + }, + { + "epoch": 8.5, + "grad_norm": 1.6712031364440918, + "learning_rate": 8.523216080402011e-06, + "loss": 0.0186, + "step": 15200 + }, + { + "epoch": 8.52, + "grad_norm": 1.835571527481079, + "learning_rate": 8.52070351758794e-06, + "loss": 0.019, + "step": 15225 + }, + { + "epoch": 8.53, + "grad_norm": 1.9817243814468384, + "learning_rate": 8.518190954773871e-06, + "loss": 0.0185, + "step": 15250 + }, + { + "epoch": 8.54, + "grad_norm": 2.108416795730591, + "learning_rate": 8.515678391959799e-06, + "loss": 0.0201, + "step": 15275 + }, + { + "epoch": 8.56, + "grad_norm": 2.099987030029297, + "learning_rate": 8.51316582914573e-06, + "loss": 0.0188, + "step": 15300 + }, + { + "epoch": 8.57, + "grad_norm": 2.0511202812194824, + "learning_rate": 8.51065326633166e-06, + "loss": 0.0192, + "step": 15325 + }, + { + "epoch": 8.59, + "grad_norm": 1.6903257369995117, + "learning_rate": 8.508140703517589e-06, + "loss": 0.0184, + "step": 15350 + }, + { + "epoch": 8.6, + "grad_norm": 1.9979782104492188, + "learning_rate": 8.505628140703518e-06, + "loss": 0.0201, + "step": 15375 + }, + { + "epoch": 8.61, + "grad_norm": 2.2117624282836914, + "learning_rate": 8.503115577889447e-06, + "loss": 0.0208, + "step": 15400 + }, + { + "epoch": 8.63, + "grad_norm": 1.9261033535003662, + "learning_rate": 8.500603015075377e-06, + "loss": 0.0183, + "step": 15425 + }, + { + "epoch": 8.64, + "grad_norm": 2.0639586448669434, + "learning_rate": 8.498090452261308e-06, + "loss": 0.0199, + "step": 15450 + }, + { + "epoch": 8.65, + "grad_norm": 1.8180594444274902, + "learning_rate": 8.495577889447237e-06, + "loss": 0.0182, + "step": 15475 + }, + { + "epoch": 8.67, + "grad_norm": 1.9696592092514038, + "learning_rate": 8.493065326633166e-06, + "loss": 0.0193, + "step": 15500 + }, + { + "epoch": 8.68, + "grad_norm": 1.8539423942565918, + "learning_rate": 8.490552763819097e-06, + "loss": 0.0192, + "step": 15525 + }, + { + "epoch": 8.7, + "grad_norm": 2.1110453605651855, + "learning_rate": 8.488040201005025e-06, + "loss": 0.0192, + "step": 15550 + }, + { + "epoch": 8.71, + "grad_norm": 2.117213487625122, + "learning_rate": 8.485527638190956e-06, + "loss": 0.0196, + "step": 15575 + }, + { + "epoch": 8.72, + "grad_norm": 2.087003707885742, + "learning_rate": 8.483015075376885e-06, + "loss": 0.0199, + "step": 15600 + }, + { + "epoch": 8.74, + "grad_norm": 1.9678318500518799, + "learning_rate": 8.480502512562815e-06, + "loss": 0.0181, + "step": 15625 + }, + { + "epoch": 8.75, + "grad_norm": 1.9911158084869385, + "learning_rate": 8.477989949748744e-06, + "loss": 0.0189, + "step": 15650 + }, + { + "epoch": 8.77, + "grad_norm": 1.8968164920806885, + "learning_rate": 8.475477386934673e-06, + "loss": 0.0199, + "step": 15675 + }, + { + "epoch": 8.78, + "grad_norm": 1.6177029609680176, + "learning_rate": 8.472964824120604e-06, + "loss": 0.0189, + "step": 15700 + }, + { + "epoch": 8.79, + "grad_norm": 2.1239004135131836, + "learning_rate": 8.470452261306534e-06, + "loss": 0.0183, + "step": 15725 + }, + { + "epoch": 8.81, + "grad_norm": 2.28218674659729, + "learning_rate": 8.467939698492463e-06, + "loss": 0.0201, + "step": 15750 + }, + { + "epoch": 8.82, + "grad_norm": 2.065746784210205, + "learning_rate": 8.465427135678392e-06, + "loss": 0.0203, + "step": 15775 + }, + { + "epoch": 8.84, + "grad_norm": 2.133787155151367, + "learning_rate": 8.462914572864323e-06, + "loss": 0.0194, + "step": 15800 + }, + { + "epoch": 8.85, + "grad_norm": 2.1499948501586914, + "learning_rate": 8.460402010050251e-06, + "loss": 0.0183, + "step": 15825 + }, + { + "epoch": 8.86, + "grad_norm": 1.9663341045379639, + "learning_rate": 8.457889447236182e-06, + "loss": 0.019, + "step": 15850 + }, + { + "epoch": 8.88, + "grad_norm": 1.9844950437545776, + "learning_rate": 8.455376884422111e-06, + "loss": 0.0195, + "step": 15875 + }, + { + "epoch": 8.89, + "grad_norm": 2.070413112640381, + "learning_rate": 8.45286432160804e-06, + "loss": 0.0188, + "step": 15900 + }, + { + "epoch": 8.91, + "grad_norm": 1.83562171459198, + "learning_rate": 8.450351758793972e-06, + "loss": 0.0192, + "step": 15925 + }, + { + "epoch": 8.92, + "grad_norm": 2.4204366207122803, + "learning_rate": 8.4478391959799e-06, + "loss": 0.0193, + "step": 15950 + }, + { + "epoch": 8.93, + "grad_norm": 2.3948545455932617, + "learning_rate": 8.44532663316583e-06, + "loss": 0.0192, + "step": 15975 + }, + { + "epoch": 8.95, + "grad_norm": 1.92209792137146, + "learning_rate": 8.44281407035176e-06, + "loss": 0.019, + "step": 16000 + }, + { + "epoch": 8.95, + "eval_loss": 0.13486050069332123, + "eval_runtime": 1123.1786, + "eval_samples_per_second": 1.273, + "eval_steps_per_second": 1.273, + "eval_wer": 33.964965470776484, + "step": 16000 + }, + { + "epoch": 8.96, + "grad_norm": 2.0860226154327393, + "learning_rate": 8.440301507537689e-06, + "loss": 0.0184, + "step": 16025 + }, + { + "epoch": 8.98, + "grad_norm": 2.078385591506958, + "learning_rate": 8.437788944723618e-06, + "loss": 0.0192, + "step": 16050 + }, + { + "epoch": 8.99, + "grad_norm": 1.8864285945892334, + "learning_rate": 8.43527638190955e-06, + "loss": 0.0188, + "step": 16075 + }, + { + "epoch": 9.0, + "grad_norm": 1.8263746500015259, + "learning_rate": 8.432763819095479e-06, + "loss": 0.0167, + "step": 16100 + }, + { + "epoch": 9.02, + "grad_norm": 1.704453945159912, + "learning_rate": 8.430251256281408e-06, + "loss": 0.0122, + "step": 16125 + }, + { + "epoch": 9.03, + "grad_norm": 1.3977042436599731, + "learning_rate": 8.427738693467337e-06, + "loss": 0.0117, + "step": 16150 + }, + { + "epoch": 9.05, + "grad_norm": 1.8829319477081299, + "learning_rate": 8.425226130653266e-06, + "loss": 0.0119, + "step": 16175 + }, + { + "epoch": 9.06, + "grad_norm": 1.3460557460784912, + "learning_rate": 8.422713567839198e-06, + "loss": 0.0122, + "step": 16200 + }, + { + "epoch": 9.07, + "grad_norm": 1.5699985027313232, + "learning_rate": 8.420201005025125e-06, + "loss": 0.0119, + "step": 16225 + }, + { + "epoch": 9.09, + "grad_norm": 1.3774198293685913, + "learning_rate": 8.417688442211056e-06, + "loss": 0.0116, + "step": 16250 + }, + { + "epoch": 9.1, + "grad_norm": 1.6538969278335571, + "learning_rate": 8.415175879396985e-06, + "loss": 0.0121, + "step": 16275 + }, + { + "epoch": 9.12, + "grad_norm": 1.463186264038086, + "learning_rate": 8.412663316582915e-06, + "loss": 0.0128, + "step": 16300 + }, + { + "epoch": 9.13, + "grad_norm": 1.433985948562622, + "learning_rate": 8.410150753768846e-06, + "loss": 0.0124, + "step": 16325 + }, + { + "epoch": 9.14, + "grad_norm": 1.8047584295272827, + "learning_rate": 8.407638190954775e-06, + "loss": 0.0128, + "step": 16350 + }, + { + "epoch": 9.16, + "grad_norm": 1.8436475992202759, + "learning_rate": 8.405125628140704e-06, + "loss": 0.0131, + "step": 16375 + }, + { + "epoch": 9.17, + "grad_norm": 1.5934550762176514, + "learning_rate": 8.402613065326634e-06, + "loss": 0.0123, + "step": 16400 + }, + { + "epoch": 9.19, + "grad_norm": 1.6266775131225586, + "learning_rate": 8.400100502512563e-06, + "loss": 0.0119, + "step": 16425 + }, + { + "epoch": 9.2, + "grad_norm": 1.8237180709838867, + "learning_rate": 8.397688442211056e-06, + "loss": 0.0125, + "step": 16450 + }, + { + "epoch": 9.21, + "grad_norm": 1.9706188440322876, + "learning_rate": 8.395175879396986e-06, + "loss": 0.0133, + "step": 16475 + }, + { + "epoch": 9.23, + "grad_norm": 1.7060139179229736, + "learning_rate": 8.392663316582915e-06, + "loss": 0.0128, + "step": 16500 + }, + { + "epoch": 9.24, + "grad_norm": 2.0143470764160156, + "learning_rate": 8.390150753768846e-06, + "loss": 0.0125, + "step": 16525 + }, + { + "epoch": 9.26, + "grad_norm": 1.661948800086975, + "learning_rate": 8.387638190954774e-06, + "loss": 0.0128, + "step": 16550 + }, + { + "epoch": 9.27, + "grad_norm": 1.7219727039337158, + "learning_rate": 8.385226130653267e-06, + "loss": 0.0127, + "step": 16575 + }, + { + "epoch": 9.28, + "grad_norm": 2.237853527069092, + "learning_rate": 8.382713567839196e-06, + "loss": 0.0132, + "step": 16600 + }, + { + "epoch": 9.3, + "grad_norm": 1.7033536434173584, + "learning_rate": 8.380201005025126e-06, + "loss": 0.0123, + "step": 16625 + }, + { + "epoch": 9.31, + "grad_norm": 1.6009854078292847, + "learning_rate": 8.377688442211057e-06, + "loss": 0.0125, + "step": 16650 + }, + { + "epoch": 9.33, + "grad_norm": 1.6521884202957153, + "learning_rate": 8.375175879396986e-06, + "loss": 0.0139, + "step": 16675 + }, + { + "epoch": 9.34, + "grad_norm": 1.3785544633865356, + "learning_rate": 8.372663316582915e-06, + "loss": 0.0137, + "step": 16700 + }, + { + "epoch": 9.35, + "grad_norm": 1.5283645391464233, + "learning_rate": 8.370150753768845e-06, + "loss": 0.0129, + "step": 16725 + }, + { + "epoch": 9.37, + "grad_norm": 2.058617115020752, + "learning_rate": 8.367638190954774e-06, + "loss": 0.0134, + "step": 16750 + }, + { + "epoch": 9.38, + "grad_norm": 1.714177131652832, + "learning_rate": 8.365125628140705e-06, + "loss": 0.0132, + "step": 16775 + }, + { + "epoch": 9.4, + "grad_norm": 1.332634687423706, + "learning_rate": 8.362613065326634e-06, + "loss": 0.0118, + "step": 16800 + }, + { + "epoch": 9.41, + "grad_norm": 1.8287341594696045, + "learning_rate": 8.360100502512563e-06, + "loss": 0.0129, + "step": 16825 + }, + { + "epoch": 9.42, + "grad_norm": 1.5074189901351929, + "learning_rate": 8.357587939698493e-06, + "loss": 0.0131, + "step": 16850 + }, + { + "epoch": 9.44, + "grad_norm": 2.0107340812683105, + "learning_rate": 8.355075376884422e-06, + "loss": 0.0142, + "step": 16875 + }, + { + "epoch": 9.45, + "grad_norm": 1.8936904668807983, + "learning_rate": 8.352562814070353e-06, + "loss": 0.0134, + "step": 16900 + }, + { + "epoch": 9.47, + "grad_norm": 1.4762846231460571, + "learning_rate": 8.350050251256282e-06, + "loss": 0.0122, + "step": 16925 + }, + { + "epoch": 9.48, + "grad_norm": 1.7671955823898315, + "learning_rate": 8.347537688442212e-06, + "loss": 0.0132, + "step": 16950 + }, + { + "epoch": 9.49, + "grad_norm": 1.7466484308242798, + "learning_rate": 8.345025125628141e-06, + "loss": 0.0124, + "step": 16975 + }, + { + "epoch": 9.51, + "grad_norm": 2.381716251373291, + "learning_rate": 8.34251256281407e-06, + "loss": 0.0127, + "step": 17000 + }, + { + "epoch": 9.51, + "eval_loss": 0.14586423337459564, + "eval_runtime": 1116.9432, + "eval_samples_per_second": 1.28, + "eval_steps_per_second": 1.28, + "eval_wer": 34.41131884790298, + "step": 17000 + }, + { + "epoch": 9.52, + "grad_norm": 1.9521162509918213, + "learning_rate": 8.34e-06, + "loss": 0.0132, + "step": 17025 + }, + { + "epoch": 9.54, + "grad_norm": 1.8651758432388306, + "learning_rate": 8.33748743718593e-06, + "loss": 0.0137, + "step": 17050 + }, + { + "epoch": 9.55, + "grad_norm": 1.7266855239868164, + "learning_rate": 8.33497487437186e-06, + "loss": 0.0131, + "step": 17075 + }, + { + "epoch": 9.56, + "grad_norm": 1.9833261966705322, + "learning_rate": 8.33246231155779e-06, + "loss": 0.0132, + "step": 17100 + }, + { + "epoch": 9.58, + "grad_norm": 1.8291089534759521, + "learning_rate": 8.32994974874372e-06, + "loss": 0.0126, + "step": 17125 + }, + { + "epoch": 9.59, + "grad_norm": 1.3913580179214478, + "learning_rate": 8.327437185929648e-06, + "loss": 0.0133, + "step": 17150 + }, + { + "epoch": 9.61, + "grad_norm": 2.294111490249634, + "learning_rate": 8.324924623115579e-06, + "loss": 0.0137, + "step": 17175 + }, + { + "epoch": 9.62, + "grad_norm": 2.223754405975342, + "learning_rate": 8.322412060301508e-06, + "loss": 0.0131, + "step": 17200 + }, + { + "epoch": 9.63, + "grad_norm": 1.987259030342102, + "learning_rate": 8.319899497487438e-06, + "loss": 0.0133, + "step": 17225 + }, + { + "epoch": 9.65, + "grad_norm": 1.8146421909332275, + "learning_rate": 8.317386934673367e-06, + "loss": 0.0137, + "step": 17250 + }, + { + "epoch": 9.66, + "grad_norm": 1.9060008525848389, + "learning_rate": 8.314874371859298e-06, + "loss": 0.0135, + "step": 17275 + }, + { + "epoch": 9.68, + "grad_norm": 1.7321094274520874, + "learning_rate": 8.312361809045226e-06, + "loss": 0.013, + "step": 17300 + }, + { + "epoch": 9.69, + "grad_norm": 1.716107726097107, + "learning_rate": 8.309849246231157e-06, + "loss": 0.0136, + "step": 17325 + }, + { + "epoch": 9.7, + "grad_norm": 1.7512887716293335, + "learning_rate": 8.307336683417086e-06, + "loss": 0.013, + "step": 17350 + }, + { + "epoch": 9.72, + "grad_norm": 1.792899250984192, + "learning_rate": 8.304824120603015e-06, + "loss": 0.0136, + "step": 17375 + }, + { + "epoch": 9.73, + "grad_norm": 1.2933809757232666, + "learning_rate": 8.302311557788946e-06, + "loss": 0.0129, + "step": 17400 + }, + { + "epoch": 9.75, + "grad_norm": 2.244049310684204, + "learning_rate": 8.299798994974874e-06, + "loss": 0.0127, + "step": 17425 + }, + { + "epoch": 9.76, + "grad_norm": 2.0611095428466797, + "learning_rate": 8.297286432160805e-06, + "loss": 0.0145, + "step": 17450 + }, + { + "epoch": 9.77, + "grad_norm": 2.0197601318359375, + "learning_rate": 8.294773869346734e-06, + "loss": 0.0139, + "step": 17475 + }, + { + "epoch": 9.79, + "grad_norm": 1.9071155786514282, + "learning_rate": 8.292261306532664e-06, + "loss": 0.0135, + "step": 17500 + }, + { + "epoch": 9.8, + "grad_norm": 1.6899077892303467, + "learning_rate": 8.289748743718595e-06, + "loss": 0.0136, + "step": 17525 + }, + { + "epoch": 9.82, + "grad_norm": 2.269104480743408, + "learning_rate": 8.287236180904524e-06, + "loss": 0.0133, + "step": 17550 + }, + { + "epoch": 9.83, + "grad_norm": 1.8881139755249023, + "learning_rate": 8.284723618090453e-06, + "loss": 0.0138, + "step": 17575 + }, + { + "epoch": 9.84, + "grad_norm": 1.9432883262634277, + "learning_rate": 8.282211055276383e-06, + "loss": 0.015, + "step": 17600 + }, + { + "epoch": 9.86, + "grad_norm": 1.52784264087677, + "learning_rate": 8.279698492462312e-06, + "loss": 0.0126, + "step": 17625 + }, + { + "epoch": 9.87, + "grad_norm": 1.9927188158035278, + "learning_rate": 8.277185929648241e-06, + "loss": 0.0141, + "step": 17650 + }, + { + "epoch": 9.89, + "grad_norm": 2.025327205657959, + "learning_rate": 8.274673366834172e-06, + "loss": 0.0129, + "step": 17675 + }, + { + "epoch": 9.9, + "grad_norm": 1.8177061080932617, + "learning_rate": 8.2721608040201e-06, + "loss": 0.014, + "step": 17700 + }, + { + "epoch": 9.91, + "grad_norm": 1.8043619394302368, + "learning_rate": 8.269648241206031e-06, + "loss": 0.0139, + "step": 17725 + }, + { + "epoch": 9.93, + "grad_norm": 1.4666588306427002, + "learning_rate": 8.26713567839196e-06, + "loss": 0.0133, + "step": 17750 + }, + { + "epoch": 9.94, + "grad_norm": 1.905358076095581, + "learning_rate": 8.26462311557789e-06, + "loss": 0.0134, + "step": 17775 + }, + { + "epoch": 9.96, + "grad_norm": 1.9146682024002075, + "learning_rate": 8.26211055276382e-06, + "loss": 0.0143, + "step": 17800 + }, + { + "epoch": 9.97, + "grad_norm": 2.3354909420013428, + "learning_rate": 8.25959798994975e-06, + "loss": 0.0148, + "step": 17825 + }, + { + "epoch": 9.98, + "grad_norm": 2.019625425338745, + "learning_rate": 8.25708542713568e-06, + "loss": 0.0134, + "step": 17850 + }, + { + "epoch": 10.0, + "grad_norm": 1.7942144870758057, + "learning_rate": 8.254572864321609e-06, + "loss": 0.0138, + "step": 17875 + }, + { + "epoch": 10.01, + "grad_norm": 1.6707584857940674, + "learning_rate": 8.252060301507538e-06, + "loss": 0.0098, + "step": 17900 + }, + { + "epoch": 10.03, + "grad_norm": 0.9876168966293335, + "learning_rate": 8.249547738693467e-06, + "loss": 0.008, + "step": 17925 + }, + { + "epoch": 10.04, + "grad_norm": 1.1493449211120605, + "learning_rate": 8.247035175879398e-06, + "loss": 0.0083, + "step": 17950 + }, + { + "epoch": 10.05, + "grad_norm": 1.6167488098144531, + "learning_rate": 8.244522613065328e-06, + "loss": 0.0086, + "step": 17975 + }, + { + "epoch": 10.07, + "grad_norm": 1.2718091011047363, + "learning_rate": 8.242010050251257e-06, + "loss": 0.0084, + "step": 18000 + }, + { + "epoch": 10.07, + "eval_loss": 0.1541416049003601, + "eval_runtime": 1116.5599, + "eval_samples_per_second": 1.281, + "eval_steps_per_second": 1.281, + "eval_wer": 33.87232609061816, + "step": 18000 + } + ], + "logging_steps": 25, + "max_steps": 100000, + "num_input_tokens_seen": 0, + "num_train_epochs": 56, + "save_steps": 1000, + "total_flos": 5.60234702831616e+19, + "train_batch_size": 48, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/whisper-base/kannada/checkpoint-18000/training_args.bin b/checkpoints/whisper-base/kannada/checkpoint-18000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2ca5b444a13c5ea446c0b119b779d14c382eac71 --- /dev/null +++ b/checkpoints/whisper-base/kannada/checkpoint-18000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbfa4042151f1409b8a5d1ea67e0201d961572da0ace5d5193e5844cc9193c72 +size 4667 diff --git a/checkpoints/whisper-base/magahi/checkpoint-17000/config.json b/checkpoints/whisper-base/magahi/checkpoint-17000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7861ebfd3dce452d730fc7657aa35befb4dcfe2d --- /dev/null +++ b/checkpoints/whisper-base/magahi/checkpoint-17000/config.json @@ -0,0 +1,154 @@ +{ + "_name_or_path": "openai/whisper-base", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 512, + "decoder_attention_heads": 8, + "decoder_ffn_dim": 2048, + "decoder_layerdrop": 0.0, + "decoder_layers": 6, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 8, + "encoder_ffn_dim": 2048, + "encoder_layerdrop": 0.0, + "encoder_layers": 6, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50276 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 6, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "torch_dtype": "float32", + "transformers_version": "4.39.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoints/whisper-base/magahi/checkpoint-17000/generation_config.json b/checkpoints/whisper-base/magahi/checkpoint-17000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a12c5b82ac1e48f22fa79bdad1595064164bc2ab --- /dev/null +++ b/checkpoints/whisper-base/magahi/checkpoint-17000/generation_config.json @@ -0,0 +1,256 @@ +{ + "alignment_heads": [ + [ + 3, + 1 + ], + [ + 4, + 2 + ], + [ + 4, + 3 + ], + [ + 4, + 7 + ], + [ + 5, + 1 + ], + [ + 5, + 2 + ], + [ + 5, + 4 + ], + [ + 5, + 6 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.39.2" +} diff --git a/checkpoints/whisper-base/magahi/checkpoint-17000/model.safetensors b/checkpoints/whisper-base/magahi/checkpoint-17000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3c5114ea5d93f56231a0e0085a9044a00c5da68f --- /dev/null +++ b/checkpoints/whisper-base/magahi/checkpoint-17000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:316505167fee84dcdd3fff6f1a3d6acc50fa3612fd9f68db6da4a9c659b23e3b +size 290403936 diff --git a/checkpoints/whisper-base/magahi/checkpoint-17000/optimizer.pt b/checkpoints/whisper-base/magahi/checkpoint-17000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..653103fa36cc91922620b2cf4c103ac85b1e74c1 --- /dev/null +++ b/checkpoints/whisper-base/magahi/checkpoint-17000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ef33b9808694415b984f75f22c87e636bbff4fdbb128a7b62965c74a6082538 +size 574811077 diff --git a/checkpoints/whisper-base/magahi/checkpoint-17000/preprocessor_config.json b/checkpoints/whisper-base/magahi/checkpoint-17000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoints/whisper-base/magahi/checkpoint-17000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoints/whisper-base/magahi/checkpoint-17000/rng_state.pth b/checkpoints/whisper-base/magahi/checkpoint-17000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8843dc5107f707c9e31c09e544c5c22f50e106a0 --- /dev/null +++ b/checkpoints/whisper-base/magahi/checkpoint-17000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84cca79eb7a944fb613a93dab9487133524c4146e9d4394db72817c12733ae9b +size 14575 diff --git a/checkpoints/whisper-base/magahi/checkpoint-17000/scheduler.pt b/checkpoints/whisper-base/magahi/checkpoint-17000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e0dd58cd958c3e3b3d71627db6892ad2b3eb3f96 --- /dev/null +++ b/checkpoints/whisper-base/magahi/checkpoint-17000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5f44eae1f6407b9bff5ebbf6886b3aa7d18d0de3e7e4a4880574d9506385429 +size 627 diff --git a/checkpoints/whisper-base/magahi/checkpoint-17000/trainer_state.json b/checkpoints/whisper-base/magahi/checkpoint-17000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..775bf4087311307a688f402ac83208ed1673da4b --- /dev/null +++ b/checkpoints/whisper-base/magahi/checkpoint-17000/trainer_state.json @@ -0,0 +1,4934 @@ +{ + "best_metric": 25.431499460625673, + "best_model_checkpoint": "results/whisper-base/magahi/checkpoint-7000", + "epoch": 8.564231738035264, + "eval_steps": 1000, + "global_step": 17000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "grad_norm": 23.93043327331543, + "learning_rate": 4.6000000000000004e-07, + "loss": 2.2722, + "step": 25 + }, + { + "epoch": 0.03, + "grad_norm": 13.230968475341797, + "learning_rate": 9.600000000000001e-07, + "loss": 1.9352, + "step": 50 + }, + { + "epoch": 0.04, + "grad_norm": 8.388914108276367, + "learning_rate": 1.46e-06, + "loss": 1.4853, + "step": 75 + }, + { + "epoch": 0.05, + "grad_norm": 6.454675674438477, + "learning_rate": 1.9600000000000003e-06, + "loss": 1.1908, + "step": 100 + }, + { + "epoch": 0.06, + "grad_norm": 5.16117000579834, + "learning_rate": 2.46e-06, + "loss": 0.9842, + "step": 125 + }, + { + "epoch": 0.08, + "grad_norm": 4.8475213050842285, + "learning_rate": 2.96e-06, + "loss": 0.8563, + "step": 150 + }, + { + "epoch": 0.09, + "grad_norm": 4.574538707733154, + "learning_rate": 3.46e-06, + "loss": 0.7801, + "step": 175 + }, + { + "epoch": 0.1, + "grad_norm": 4.637448787689209, + "learning_rate": 3.96e-06, + "loss": 0.7162, + "step": 200 + }, + { + "epoch": 0.11, + "grad_norm": 4.411474704742432, + "learning_rate": 4.4600000000000005e-06, + "loss": 0.6716, + "step": 225 + }, + { + "epoch": 0.13, + "grad_norm": 4.7494096755981445, + "learning_rate": 4.960000000000001e-06, + "loss": 0.6494, + "step": 250 + }, + { + "epoch": 0.14, + "grad_norm": 4.384847640991211, + "learning_rate": 5.460000000000001e-06, + "loss": 0.6, + "step": 275 + }, + { + "epoch": 0.15, + "grad_norm": 4.231034278869629, + "learning_rate": 5.9600000000000005e-06, + "loss": 0.5828, + "step": 300 + }, + { + "epoch": 0.16, + "grad_norm": 4.0297651290893555, + "learning_rate": 6.460000000000001e-06, + "loss": 0.5419, + "step": 325 + }, + { + "epoch": 0.18, + "grad_norm": 4.855077266693115, + "learning_rate": 6.96e-06, + "loss": 0.5342, + "step": 350 + }, + { + "epoch": 0.19, + "grad_norm": 4.083898544311523, + "learning_rate": 7.4600000000000006e-06, + "loss": 0.5329, + "step": 375 + }, + { + "epoch": 0.2, + "grad_norm": 4.743200778961182, + "learning_rate": 7.960000000000002e-06, + "loss": 0.497, + "step": 400 + }, + { + "epoch": 0.21, + "grad_norm": 4.951360702514648, + "learning_rate": 8.46e-06, + "loss": 0.4966, + "step": 425 + }, + { + "epoch": 0.23, + "grad_norm": 4.463785171508789, + "learning_rate": 8.96e-06, + "loss": 0.4855, + "step": 450 + }, + { + "epoch": 0.24, + "grad_norm": 4.492912769317627, + "learning_rate": 9.460000000000001e-06, + "loss": 0.4628, + "step": 475 + }, + { + "epoch": 0.25, + "grad_norm": 4.218761444091797, + "learning_rate": 9.960000000000001e-06, + "loss": 0.4515, + "step": 500 + }, + { + "epoch": 0.26, + "grad_norm": 4.644235134124756, + "learning_rate": 9.997688442211056e-06, + "loss": 0.434, + "step": 525 + }, + { + "epoch": 0.28, + "grad_norm": 3.7479326725006104, + "learning_rate": 9.995175879396986e-06, + "loss": 0.4275, + "step": 550 + }, + { + "epoch": 0.29, + "grad_norm": 4.490478038787842, + "learning_rate": 9.992663316582915e-06, + "loss": 0.4283, + "step": 575 + }, + { + "epoch": 0.3, + "grad_norm": 3.6259822845458984, + "learning_rate": 9.990150753768844e-06, + "loss": 0.4201, + "step": 600 + }, + { + "epoch": 0.31, + "grad_norm": 3.9655518531799316, + "learning_rate": 9.987638190954775e-06, + "loss": 0.4146, + "step": 625 + }, + { + "epoch": 0.33, + "grad_norm": 4.631214618682861, + "learning_rate": 9.985125628140705e-06, + "loss": 0.4073, + "step": 650 + }, + { + "epoch": 0.34, + "grad_norm": 4.231256484985352, + "learning_rate": 9.982613065326634e-06, + "loss": 0.4024, + "step": 675 + }, + { + "epoch": 0.35, + "grad_norm": 3.9977126121520996, + "learning_rate": 9.980100502512565e-06, + "loss": 0.3885, + "step": 700 + }, + { + "epoch": 0.37, + "grad_norm": 4.1103434562683105, + "learning_rate": 9.977587939698493e-06, + "loss": 0.3708, + "step": 725 + }, + { + "epoch": 0.38, + "grad_norm": 3.9829013347625732, + "learning_rate": 9.975075376884424e-06, + "loss": 0.38, + "step": 750 + }, + { + "epoch": 0.39, + "grad_norm": 3.9997475147247314, + "learning_rate": 9.972562814070353e-06, + "loss": 0.3775, + "step": 775 + }, + { + "epoch": 0.4, + "grad_norm": 3.8803422451019287, + "learning_rate": 9.970050251256282e-06, + "loss": 0.3658, + "step": 800 + }, + { + "epoch": 0.42, + "grad_norm": 4.075962066650391, + "learning_rate": 9.967537688442212e-06, + "loss": 0.3747, + "step": 825 + }, + { + "epoch": 0.43, + "grad_norm": 4.341451168060303, + "learning_rate": 9.965025125628141e-06, + "loss": 0.3715, + "step": 850 + }, + { + "epoch": 0.44, + "grad_norm": 3.6851956844329834, + "learning_rate": 9.96251256281407e-06, + "loss": 0.3641, + "step": 875 + }, + { + "epoch": 0.45, + "grad_norm": 3.727428436279297, + "learning_rate": 9.960000000000001e-06, + "loss": 0.3512, + "step": 900 + }, + { + "epoch": 0.47, + "grad_norm": 3.6382839679718018, + "learning_rate": 9.95748743718593e-06, + "loss": 0.3429, + "step": 925 + }, + { + "epoch": 0.48, + "grad_norm": 3.4660427570343018, + "learning_rate": 9.95497487437186e-06, + "loss": 0.357, + "step": 950 + }, + { + "epoch": 0.49, + "grad_norm": 4.407652378082275, + "learning_rate": 9.952462311557791e-06, + "loss": 0.3446, + "step": 975 + }, + { + "epoch": 0.5, + "grad_norm": 3.7394986152648926, + "learning_rate": 9.949949748743718e-06, + "loss": 0.3442, + "step": 1000 + }, + { + "epoch": 0.5, + "eval_loss": 0.25089502334594727, + "eval_runtime": 649.2241, + "eval_samples_per_second": 2.204, + "eval_steps_per_second": 2.204, + "eval_wer": 37.27076591154261, + "step": 1000 + }, + { + "epoch": 0.52, + "grad_norm": 3.865875244140625, + "learning_rate": 9.94743718592965e-06, + "loss": 0.3491, + "step": 1025 + }, + { + "epoch": 0.53, + "grad_norm": 4.8148393630981445, + "learning_rate": 9.944924623115579e-06, + "loss": 0.3289, + "step": 1050 + }, + { + "epoch": 0.54, + "grad_norm": 3.47527813911438, + "learning_rate": 9.942412060301508e-06, + "loss": 0.3264, + "step": 1075 + }, + { + "epoch": 0.55, + "grad_norm": 3.9304556846618652, + "learning_rate": 9.93989949748744e-06, + "loss": 0.334, + "step": 1100 + }, + { + "epoch": 0.57, + "grad_norm": 3.4828851222991943, + "learning_rate": 9.937386934673367e-06, + "loss": 0.3307, + "step": 1125 + }, + { + "epoch": 0.58, + "grad_norm": 3.2522785663604736, + "learning_rate": 9.934874371859298e-06, + "loss": 0.3342, + "step": 1150 + }, + { + "epoch": 0.59, + "grad_norm": 3.6895081996917725, + "learning_rate": 9.932361809045227e-06, + "loss": 0.328, + "step": 1175 + }, + { + "epoch": 0.6, + "grad_norm": 3.3385732173919678, + "learning_rate": 9.929849246231156e-06, + "loss": 0.3201, + "step": 1200 + }, + { + "epoch": 0.62, + "grad_norm": 4.071571350097656, + "learning_rate": 9.927336683417086e-06, + "loss": 0.3181, + "step": 1225 + }, + { + "epoch": 0.63, + "grad_norm": 3.6905808448791504, + "learning_rate": 9.924824120603017e-06, + "loss": 0.3111, + "step": 1250 + }, + { + "epoch": 0.64, + "grad_norm": 3.639202356338501, + "learning_rate": 9.922311557788944e-06, + "loss": 0.3103, + "step": 1275 + }, + { + "epoch": 0.65, + "grad_norm": 3.3078248500823975, + "learning_rate": 9.919798994974875e-06, + "loss": 0.3039, + "step": 1300 + }, + { + "epoch": 0.67, + "grad_norm": 3.5267553329467773, + "learning_rate": 9.917286432160805e-06, + "loss": 0.3128, + "step": 1325 + }, + { + "epoch": 0.68, + "grad_norm": 3.4609274864196777, + "learning_rate": 9.914773869346734e-06, + "loss": 0.3109, + "step": 1350 + }, + { + "epoch": 0.69, + "grad_norm": 3.844273328781128, + "learning_rate": 9.912261306532665e-06, + "loss": 0.2998, + "step": 1375 + }, + { + "epoch": 0.71, + "grad_norm": 3.677699327468872, + "learning_rate": 9.909748743718593e-06, + "loss": 0.299, + "step": 1400 + }, + { + "epoch": 0.72, + "grad_norm": 3.8274316787719727, + "learning_rate": 9.907236180904524e-06, + "loss": 0.3025, + "step": 1425 + }, + { + "epoch": 0.73, + "grad_norm": 3.5496952533721924, + "learning_rate": 9.904723618090453e-06, + "loss": 0.2992, + "step": 1450 + }, + { + "epoch": 0.74, + "grad_norm": 3.6155552864074707, + "learning_rate": 9.902211055276382e-06, + "loss": 0.2987, + "step": 1475 + }, + { + "epoch": 0.76, + "grad_norm": 3.6053521633148193, + "learning_rate": 9.899698492462312e-06, + "loss": 0.3061, + "step": 1500 + }, + { + "epoch": 0.77, + "grad_norm": 3.570275068283081, + "learning_rate": 9.897185929648243e-06, + "loss": 0.3021, + "step": 1525 + }, + { + "epoch": 0.78, + "grad_norm": 3.0928289890289307, + "learning_rate": 9.894673366834172e-06, + "loss": 0.2916, + "step": 1550 + }, + { + "epoch": 0.79, + "grad_norm": 4.001222133636475, + "learning_rate": 9.892160804020101e-06, + "loss": 0.2915, + "step": 1575 + }, + { + "epoch": 0.81, + "grad_norm": 3.923720121383667, + "learning_rate": 9.88964824120603e-06, + "loss": 0.2912, + "step": 1600 + }, + { + "epoch": 0.82, + "grad_norm": 3.6034436225891113, + "learning_rate": 9.88713567839196e-06, + "loss": 0.2907, + "step": 1625 + }, + { + "epoch": 0.83, + "grad_norm": 3.5392444133758545, + "learning_rate": 9.884623115577891e-06, + "loss": 0.29, + "step": 1650 + }, + { + "epoch": 0.84, + "grad_norm": 3.473675012588501, + "learning_rate": 9.882110552763819e-06, + "loss": 0.2877, + "step": 1675 + }, + { + "epoch": 0.86, + "grad_norm": 4.562033176422119, + "learning_rate": 9.87959798994975e-06, + "loss": 0.2855, + "step": 1700 + }, + { + "epoch": 0.87, + "grad_norm": 3.6619389057159424, + "learning_rate": 9.877085427135679e-06, + "loss": 0.2823, + "step": 1725 + }, + { + "epoch": 0.88, + "grad_norm": 3.4154460430145264, + "learning_rate": 9.874572864321608e-06, + "loss": 0.2852, + "step": 1750 + }, + { + "epoch": 0.89, + "grad_norm": 3.561830759048462, + "learning_rate": 9.87206030150754e-06, + "loss": 0.2784, + "step": 1775 + }, + { + "epoch": 0.91, + "grad_norm": 3.7505078315734863, + "learning_rate": 9.869547738693469e-06, + "loss": 0.2747, + "step": 1800 + }, + { + "epoch": 0.92, + "grad_norm": 2.8509621620178223, + "learning_rate": 9.867035175879398e-06, + "loss": 0.2689, + "step": 1825 + }, + { + "epoch": 0.93, + "grad_norm": 3.5754363536834717, + "learning_rate": 9.864522613065327e-06, + "loss": 0.2736, + "step": 1850 + }, + { + "epoch": 0.94, + "grad_norm": 3.4208121299743652, + "learning_rate": 9.862010050251257e-06, + "loss": 0.283, + "step": 1875 + }, + { + "epoch": 0.96, + "grad_norm": 3.750174045562744, + "learning_rate": 9.859497487437186e-06, + "loss": 0.2622, + "step": 1900 + }, + { + "epoch": 0.97, + "grad_norm": 3.9755547046661377, + "learning_rate": 9.856984924623117e-06, + "loss": 0.2695, + "step": 1925 + }, + { + "epoch": 0.98, + "grad_norm": 3.5628674030303955, + "learning_rate": 9.854472361809046e-06, + "loss": 0.2716, + "step": 1950 + }, + { + "epoch": 0.99, + "grad_norm": 3.1685283184051514, + "learning_rate": 9.851959798994976e-06, + "loss": 0.2709, + "step": 1975 + }, + { + "epoch": 1.01, + "grad_norm": 3.691366195678711, + "learning_rate": 9.849447236180905e-06, + "loss": 0.2572, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.20135310292243958, + "eval_runtime": 645.9284, + "eval_samples_per_second": 2.215, + "eval_steps_per_second": 2.215, + "eval_wer": 30.218446601941746, + "step": 2000 + }, + { + "epoch": 1.02, + "grad_norm": 3.2129886150360107, + "learning_rate": 9.846934673366834e-06, + "loss": 0.2432, + "step": 2025 + }, + { + "epoch": 1.03, + "grad_norm": 3.053058385848999, + "learning_rate": 9.844422110552765e-06, + "loss": 0.2421, + "step": 2050 + }, + { + "epoch": 1.05, + "grad_norm": 2.9329400062561035, + "learning_rate": 9.841909547738695e-06, + "loss": 0.2458, + "step": 2075 + }, + { + "epoch": 1.06, + "grad_norm": 3.673103094100952, + "learning_rate": 9.839396984924624e-06, + "loss": 0.2459, + "step": 2100 + }, + { + "epoch": 1.07, + "grad_norm": 3.641197681427002, + "learning_rate": 9.836884422110553e-06, + "loss": 0.2521, + "step": 2125 + }, + { + "epoch": 1.08, + "grad_norm": 3.4105143547058105, + "learning_rate": 9.834371859296483e-06, + "loss": 0.2428, + "step": 2150 + }, + { + "epoch": 1.1, + "grad_norm": 3.7379119396209717, + "learning_rate": 9.831859296482414e-06, + "loss": 0.2448, + "step": 2175 + }, + { + "epoch": 1.11, + "grad_norm": 3.5225021839141846, + "learning_rate": 9.829346733668343e-06, + "loss": 0.24, + "step": 2200 + }, + { + "epoch": 1.12, + "grad_norm": 3.1047000885009766, + "learning_rate": 9.826834170854272e-06, + "loss": 0.2433, + "step": 2225 + }, + { + "epoch": 1.13, + "grad_norm": 2.8932137489318848, + "learning_rate": 9.824321608040202e-06, + "loss": 0.2481, + "step": 2250 + }, + { + "epoch": 1.15, + "grad_norm": 3.218092679977417, + "learning_rate": 9.821809045226131e-06, + "loss": 0.2387, + "step": 2275 + }, + { + "epoch": 1.16, + "grad_norm": 3.691563606262207, + "learning_rate": 9.81929648241206e-06, + "loss": 0.2396, + "step": 2300 + }, + { + "epoch": 1.17, + "grad_norm": 3.1356592178344727, + "learning_rate": 9.816783919597991e-06, + "loss": 0.2367, + "step": 2325 + }, + { + "epoch": 1.18, + "grad_norm": 3.2843363285064697, + "learning_rate": 9.81427135678392e-06, + "loss": 0.2281, + "step": 2350 + }, + { + "epoch": 1.2, + "grad_norm": 3.2618348598480225, + "learning_rate": 9.81175879396985e-06, + "loss": 0.2456, + "step": 2375 + }, + { + "epoch": 1.21, + "grad_norm": 3.1043753623962402, + "learning_rate": 9.809246231155781e-06, + "loss": 0.2305, + "step": 2400 + }, + { + "epoch": 1.22, + "grad_norm": 3.3077290058135986, + "learning_rate": 9.806733668341709e-06, + "loss": 0.2281, + "step": 2425 + }, + { + "epoch": 1.23, + "grad_norm": 3.1290342807769775, + "learning_rate": 9.80422110552764e-06, + "loss": 0.2365, + "step": 2450 + }, + { + "epoch": 1.25, + "grad_norm": 3.6781229972839355, + "learning_rate": 9.801708542713569e-06, + "loss": 0.2324, + "step": 2475 + }, + { + "epoch": 1.26, + "grad_norm": 3.1686601638793945, + "learning_rate": 9.799195979899498e-06, + "loss": 0.2393, + "step": 2500 + }, + { + "epoch": 1.27, + "grad_norm": 3.0842082500457764, + "learning_rate": 9.796683417085428e-06, + "loss": 0.233, + "step": 2525 + }, + { + "epoch": 1.28, + "grad_norm": 3.0532703399658203, + "learning_rate": 9.794170854271357e-06, + "loss": 0.2261, + "step": 2550 + }, + { + "epoch": 1.3, + "grad_norm": 3.2483017444610596, + "learning_rate": 9.791658291457288e-06, + "loss": 0.2287, + "step": 2575 + }, + { + "epoch": 1.31, + "grad_norm": 3.6076836585998535, + "learning_rate": 9.789145728643217e-06, + "loss": 0.2346, + "step": 2600 + }, + { + "epoch": 1.32, + "grad_norm": 3.3149044513702393, + "learning_rate": 9.786633165829147e-06, + "loss": 0.2272, + "step": 2625 + }, + { + "epoch": 1.34, + "grad_norm": 3.732696056365967, + "learning_rate": 9.784120603015076e-06, + "loss": 0.2287, + "step": 2650 + }, + { + "epoch": 1.35, + "grad_norm": 3.115788698196411, + "learning_rate": 9.781608040201007e-06, + "loss": 0.2376, + "step": 2675 + }, + { + "epoch": 1.36, + "grad_norm": 3.86326265335083, + "learning_rate": 9.779095477386934e-06, + "loss": 0.2296, + "step": 2700 + }, + { + "epoch": 1.37, + "grad_norm": 3.796238899230957, + "learning_rate": 9.776582914572866e-06, + "loss": 0.2221, + "step": 2725 + }, + { + "epoch": 1.39, + "grad_norm": 3.3138833045959473, + "learning_rate": 9.774070351758795e-06, + "loss": 0.2297, + "step": 2750 + }, + { + "epoch": 1.4, + "grad_norm": 3.0888984203338623, + "learning_rate": 9.771557788944724e-06, + "loss": 0.2248, + "step": 2775 + }, + { + "epoch": 1.41, + "grad_norm": 3.3492276668548584, + "learning_rate": 9.769045226130655e-06, + "loss": 0.2329, + "step": 2800 + }, + { + "epoch": 1.42, + "grad_norm": 3.301872968673706, + "learning_rate": 9.766532663316583e-06, + "loss": 0.2294, + "step": 2825 + }, + { + "epoch": 1.44, + "grad_norm": 3.3724021911621094, + "learning_rate": 9.764020100502514e-06, + "loss": 0.2239, + "step": 2850 + }, + { + "epoch": 1.45, + "grad_norm": 2.8966658115386963, + "learning_rate": 9.761507537688443e-06, + "loss": 0.2202, + "step": 2875 + }, + { + "epoch": 1.46, + "grad_norm": 3.436643362045288, + "learning_rate": 9.758994974874372e-06, + "loss": 0.2215, + "step": 2900 + }, + { + "epoch": 1.47, + "grad_norm": 3.310248374938965, + "learning_rate": 9.756482412060302e-06, + "loss": 0.2212, + "step": 2925 + }, + { + "epoch": 1.49, + "grad_norm": 3.329249143600464, + "learning_rate": 9.753969849246233e-06, + "loss": 0.2161, + "step": 2950 + }, + { + "epoch": 1.5, + "grad_norm": 3.455294132232666, + "learning_rate": 9.75145728643216e-06, + "loss": 0.2179, + "step": 2975 + }, + { + "epoch": 1.51, + "grad_norm": 2.8488094806671143, + "learning_rate": 9.748944723618091e-06, + "loss": 0.2215, + "step": 3000 + }, + { + "epoch": 1.51, + "eval_loss": 0.18193194270133972, + "eval_runtime": 647.6842, + "eval_samples_per_second": 2.209, + "eval_steps_per_second": 2.209, + "eval_wer": 28.398058252427184, + "step": 3000 + }, + { + "epoch": 1.52, + "grad_norm": 3.6192870140075684, + "learning_rate": 9.74643216080402e-06, + "loss": 0.2138, + "step": 3025 + }, + { + "epoch": 1.54, + "grad_norm": 3.118206739425659, + "learning_rate": 9.74391959798995e-06, + "loss": 0.2196, + "step": 3050 + }, + { + "epoch": 1.55, + "grad_norm": 3.1838414669036865, + "learning_rate": 9.741407035175881e-06, + "loss": 0.2205, + "step": 3075 + }, + { + "epoch": 1.56, + "grad_norm": 3.3293378353118896, + "learning_rate": 9.738894472361809e-06, + "loss": 0.223, + "step": 3100 + }, + { + "epoch": 1.57, + "grad_norm": 3.3730127811431885, + "learning_rate": 9.73638190954774e-06, + "loss": 0.2102, + "step": 3125 + }, + { + "epoch": 1.59, + "grad_norm": 3.2894821166992188, + "learning_rate": 9.733869346733669e-06, + "loss": 0.2172, + "step": 3150 + }, + { + "epoch": 1.6, + "grad_norm": 4.162414073944092, + "learning_rate": 9.731356783919598e-06, + "loss": 0.2144, + "step": 3175 + }, + { + "epoch": 1.61, + "grad_norm": 3.0499982833862305, + "learning_rate": 9.72884422110553e-06, + "loss": 0.219, + "step": 3200 + }, + { + "epoch": 1.62, + "grad_norm": 3.087831735610962, + "learning_rate": 9.726331658291459e-06, + "loss": 0.2161, + "step": 3225 + }, + { + "epoch": 1.64, + "grad_norm": 3.3241844177246094, + "learning_rate": 9.723819095477388e-06, + "loss": 0.2186, + "step": 3250 + }, + { + "epoch": 1.65, + "grad_norm": 3.1080734729766846, + "learning_rate": 9.721306532663317e-06, + "loss": 0.2114, + "step": 3275 + }, + { + "epoch": 1.66, + "grad_norm": 3.1843862533569336, + "learning_rate": 9.718793969849247e-06, + "loss": 0.2179, + "step": 3300 + }, + { + "epoch": 1.68, + "grad_norm": 3.8087072372436523, + "learning_rate": 9.716281407035176e-06, + "loss": 0.2064, + "step": 3325 + }, + { + "epoch": 1.69, + "grad_norm": 3.0165863037109375, + "learning_rate": 9.713768844221107e-06, + "loss": 0.2158, + "step": 3350 + }, + { + "epoch": 1.7, + "grad_norm": 3.2621240615844727, + "learning_rate": 9.711256281407035e-06, + "loss": 0.2076, + "step": 3375 + }, + { + "epoch": 1.71, + "grad_norm": 3.336860418319702, + "learning_rate": 9.708743718592966e-06, + "loss": 0.2124, + "step": 3400 + }, + { + "epoch": 1.73, + "grad_norm": 3.2854115962982178, + "learning_rate": 9.706231155778895e-06, + "loss": 0.215, + "step": 3425 + }, + { + "epoch": 1.74, + "grad_norm": 3.2942354679107666, + "learning_rate": 9.703718592964824e-06, + "loss": 0.2106, + "step": 3450 + }, + { + "epoch": 1.75, + "grad_norm": 3.563640594482422, + "learning_rate": 9.701206030150755e-06, + "loss": 0.2088, + "step": 3475 + }, + { + "epoch": 1.76, + "grad_norm": 3.1726765632629395, + "learning_rate": 9.698693467336685e-06, + "loss": 0.2129, + "step": 3500 + }, + { + "epoch": 1.78, + "grad_norm": 3.0317459106445312, + "learning_rate": 9.696180904522614e-06, + "loss": 0.2128, + "step": 3525 + }, + { + "epoch": 1.79, + "grad_norm": 3.0901999473571777, + "learning_rate": 9.693668341708543e-06, + "loss": 0.2095, + "step": 3550 + }, + { + "epoch": 1.8, + "grad_norm": 3.3199517726898193, + "learning_rate": 9.691155778894473e-06, + "loss": 0.22, + "step": 3575 + }, + { + "epoch": 1.81, + "grad_norm": 3.839920997619629, + "learning_rate": 9.688643216080402e-06, + "loss": 0.2096, + "step": 3600 + }, + { + "epoch": 1.83, + "grad_norm": 3.180243730545044, + "learning_rate": 9.686130653266333e-06, + "loss": 0.2117, + "step": 3625 + }, + { + "epoch": 1.84, + "grad_norm": 3.3647642135620117, + "learning_rate": 9.683618090452262e-06, + "loss": 0.2118, + "step": 3650 + }, + { + "epoch": 1.85, + "grad_norm": 3.502943992614746, + "learning_rate": 9.681105527638192e-06, + "loss": 0.2031, + "step": 3675 + }, + { + "epoch": 1.86, + "grad_norm": 3.2605371475219727, + "learning_rate": 9.678592964824121e-06, + "loss": 0.2032, + "step": 3700 + }, + { + "epoch": 1.88, + "grad_norm": 3.3966424465179443, + "learning_rate": 9.67608040201005e-06, + "loss": 0.2082, + "step": 3725 + }, + { + "epoch": 1.89, + "grad_norm": 3.329594850540161, + "learning_rate": 9.673567839195981e-06, + "loss": 0.2084, + "step": 3750 + }, + { + "epoch": 1.9, + "grad_norm": 3.8776133060455322, + "learning_rate": 9.67105527638191e-06, + "loss": 0.2021, + "step": 3775 + }, + { + "epoch": 1.91, + "grad_norm": 3.222127914428711, + "learning_rate": 9.66854271356784e-06, + "loss": 0.2064, + "step": 3800 + }, + { + "epoch": 1.93, + "grad_norm": 3.0964362621307373, + "learning_rate": 9.666030150753771e-06, + "loss": 0.1983, + "step": 3825 + }, + { + "epoch": 1.94, + "grad_norm": 2.8416354656219482, + "learning_rate": 9.663517587939699e-06, + "loss": 0.2012, + "step": 3850 + }, + { + "epoch": 1.95, + "grad_norm": 3.381728410720825, + "learning_rate": 9.66100502512563e-06, + "loss": 0.1949, + "step": 3875 + }, + { + "epoch": 1.96, + "grad_norm": 3.2973520755767822, + "learning_rate": 9.658492462311559e-06, + "loss": 0.2017, + "step": 3900 + }, + { + "epoch": 1.98, + "grad_norm": 3.0813581943511963, + "learning_rate": 9.655979899497488e-06, + "loss": 0.1973, + "step": 3925 + }, + { + "epoch": 1.99, + "grad_norm": 3.1948111057281494, + "learning_rate": 9.653467336683418e-06, + "loss": 0.2026, + "step": 3950 + }, + { + "epoch": 2.0, + "grad_norm": 2.9330861568450928, + "learning_rate": 9.650954773869347e-06, + "loss": 0.2048, + "step": 3975 + }, + { + "epoch": 2.02, + "grad_norm": 2.610488176345825, + "learning_rate": 9.648442211055276e-06, + "loss": 0.1742, + "step": 4000 + }, + { + "epoch": 2.02, + "eval_loss": 0.1707114577293396, + "eval_runtime": 641.1798, + "eval_samples_per_second": 2.232, + "eval_steps_per_second": 2.232, + "eval_wer": 26.786677454153185, + "step": 4000 + }, + { + "epoch": 2.03, + "grad_norm": 2.789903402328491, + "learning_rate": 9.645929648241207e-06, + "loss": 0.1738, + "step": 4025 + }, + { + "epoch": 2.04, + "grad_norm": 3.0232911109924316, + "learning_rate": 9.643417085427137e-06, + "loss": 0.1784, + "step": 4050 + }, + { + "epoch": 2.05, + "grad_norm": 2.9220082759857178, + "learning_rate": 9.640904522613066e-06, + "loss": 0.1683, + "step": 4075 + }, + { + "epoch": 2.07, + "grad_norm": 3.11247181892395, + "learning_rate": 9.638391959798997e-06, + "loss": 0.1817, + "step": 4100 + }, + { + "epoch": 2.08, + "grad_norm": 3.3239734172821045, + "learning_rate": 9.635879396984925e-06, + "loss": 0.1744, + "step": 4125 + }, + { + "epoch": 2.09, + "grad_norm": 3.141796350479126, + "learning_rate": 9.633366834170856e-06, + "loss": 0.1772, + "step": 4150 + }, + { + "epoch": 2.1, + "grad_norm": 3.1875391006469727, + "learning_rate": 9.630854271356785e-06, + "loss": 0.1741, + "step": 4175 + }, + { + "epoch": 2.12, + "grad_norm": 3.2703638076782227, + "learning_rate": 9.628341708542714e-06, + "loss": 0.179, + "step": 4200 + }, + { + "epoch": 2.13, + "grad_norm": 3.089282989501953, + "learning_rate": 9.625829145728644e-06, + "loss": 0.1748, + "step": 4225 + }, + { + "epoch": 2.14, + "grad_norm": 3.392026901245117, + "learning_rate": 9.623316582914573e-06, + "loss": 0.1725, + "step": 4250 + }, + { + "epoch": 2.15, + "grad_norm": 3.30961012840271, + "learning_rate": 9.620804020100504e-06, + "loss": 0.1784, + "step": 4275 + }, + { + "epoch": 2.17, + "grad_norm": 3.252200126647949, + "learning_rate": 9.618291457286433e-06, + "loss": 0.1786, + "step": 4300 + }, + { + "epoch": 2.18, + "grad_norm": 3.3455686569213867, + "learning_rate": 9.615778894472363e-06, + "loss": 0.177, + "step": 4325 + }, + { + "epoch": 2.19, + "grad_norm": 3.1732468605041504, + "learning_rate": 9.613266331658292e-06, + "loss": 0.1679, + "step": 4350 + }, + { + "epoch": 2.2, + "grad_norm": 3.1775176525115967, + "learning_rate": 9.610753768844223e-06, + "loss": 0.1734, + "step": 4375 + }, + { + "epoch": 2.22, + "grad_norm": 3.1159160137176514, + "learning_rate": 9.60824120603015e-06, + "loss": 0.1784, + "step": 4400 + }, + { + "epoch": 2.23, + "grad_norm": 3.3726646900177, + "learning_rate": 9.605728643216082e-06, + "loss": 0.169, + "step": 4425 + }, + { + "epoch": 2.24, + "grad_norm": 3.5800869464874268, + "learning_rate": 9.60321608040201e-06, + "loss": 0.1756, + "step": 4450 + }, + { + "epoch": 2.25, + "grad_norm": 2.9235026836395264, + "learning_rate": 9.60070351758794e-06, + "loss": 0.1776, + "step": 4475 + }, + { + "epoch": 2.27, + "grad_norm": 3.007122278213501, + "learning_rate": 9.598190954773871e-06, + "loss": 0.1753, + "step": 4500 + }, + { + "epoch": 2.28, + "grad_norm": 3.430283784866333, + "learning_rate": 9.595678391959799e-06, + "loss": 0.171, + "step": 4525 + }, + { + "epoch": 2.29, + "grad_norm": 3.40035343170166, + "learning_rate": 9.59316582914573e-06, + "loss": 0.1615, + "step": 4550 + }, + { + "epoch": 2.3, + "grad_norm": 3.3558509349823, + "learning_rate": 9.59065326633166e-06, + "loss": 0.1723, + "step": 4575 + }, + { + "epoch": 2.32, + "grad_norm": 3.077122926712036, + "learning_rate": 9.588140703517588e-06, + "loss": 0.1701, + "step": 4600 + }, + { + "epoch": 2.33, + "grad_norm": 2.906874656677246, + "learning_rate": 9.585628140703518e-06, + "loss": 0.1744, + "step": 4625 + }, + { + "epoch": 2.34, + "grad_norm": 2.717848300933838, + "learning_rate": 9.583115577889449e-06, + "loss": 0.1678, + "step": 4650 + }, + { + "epoch": 2.36, + "grad_norm": 3.2456955909729004, + "learning_rate": 9.580603015075378e-06, + "loss": 0.1672, + "step": 4675 + }, + { + "epoch": 2.37, + "grad_norm": 3.5863399505615234, + "learning_rate": 9.578090452261307e-06, + "loss": 0.1743, + "step": 4700 + }, + { + "epoch": 2.38, + "grad_norm": 3.393237352371216, + "learning_rate": 9.575577889447237e-06, + "loss": 0.1738, + "step": 4725 + }, + { + "epoch": 2.39, + "grad_norm": 3.576611280441284, + "learning_rate": 9.573065326633166e-06, + "loss": 0.1714, + "step": 4750 + }, + { + "epoch": 2.41, + "grad_norm": 2.9657905101776123, + "learning_rate": 9.570552763819097e-06, + "loss": 0.1665, + "step": 4775 + }, + { + "epoch": 2.42, + "grad_norm": 3.1622745990753174, + "learning_rate": 9.568040201005025e-06, + "loss": 0.1727, + "step": 4800 + }, + { + "epoch": 2.43, + "grad_norm": 3.5474541187286377, + "learning_rate": 9.565527638190956e-06, + "loss": 0.1667, + "step": 4825 + }, + { + "epoch": 2.44, + "grad_norm": 2.940755844116211, + "learning_rate": 9.563015075376885e-06, + "loss": 0.1656, + "step": 4850 + }, + { + "epoch": 2.46, + "grad_norm": 3.325941324234009, + "learning_rate": 9.560502512562814e-06, + "loss": 0.1726, + "step": 4875 + }, + { + "epoch": 2.47, + "grad_norm": 3.2569503784179688, + "learning_rate": 9.557989949748745e-06, + "loss": 0.1668, + "step": 4900 + }, + { + "epoch": 2.48, + "grad_norm": 3.4932727813720703, + "learning_rate": 9.555477386934675e-06, + "loss": 0.1647, + "step": 4925 + }, + { + "epoch": 2.49, + "grad_norm": 3.0983216762542725, + "learning_rate": 9.552964824120604e-06, + "loss": 0.1722, + "step": 4950 + }, + { + "epoch": 2.51, + "grad_norm": 3.3976876735687256, + "learning_rate": 9.550452261306533e-06, + "loss": 0.167, + "step": 4975 + }, + { + "epoch": 2.52, + "grad_norm": 3.5759806632995605, + "learning_rate": 9.547939698492463e-06, + "loss": 0.1711, + "step": 5000 + }, + { + "epoch": 2.52, + "eval_loss": 0.16534781455993652, + "eval_runtime": 640.9436, + "eval_samples_per_second": 2.233, + "eval_steps_per_second": 2.233, + "eval_wer": 25.957389428263216, + "step": 5000 + }, + { + "epoch": 2.53, + "grad_norm": 3.3311638832092285, + "learning_rate": 9.545427135678392e-06, + "loss": 0.1698, + "step": 5025 + }, + { + "epoch": 2.54, + "grad_norm": 3.27502179145813, + "learning_rate": 9.542914572864323e-06, + "loss": 0.1728, + "step": 5050 + }, + { + "epoch": 2.56, + "grad_norm": 3.318530559539795, + "learning_rate": 9.540402010050252e-06, + "loss": 0.1722, + "step": 5075 + }, + { + "epoch": 2.57, + "grad_norm": 3.1254446506500244, + "learning_rate": 9.537889447236182e-06, + "loss": 0.1668, + "step": 5100 + }, + { + "epoch": 2.58, + "grad_norm": 3.226919174194336, + "learning_rate": 9.535376884422111e-06, + "loss": 0.1705, + "step": 5125 + }, + { + "epoch": 2.59, + "grad_norm": 3.8104517459869385, + "learning_rate": 9.53286432160804e-06, + "loss": 0.1692, + "step": 5150 + }, + { + "epoch": 2.61, + "grad_norm": 3.2810254096984863, + "learning_rate": 9.530351758793971e-06, + "loss": 0.1662, + "step": 5175 + }, + { + "epoch": 2.62, + "grad_norm": 3.153223991394043, + "learning_rate": 9.5278391959799e-06, + "loss": 0.1661, + "step": 5200 + }, + { + "epoch": 2.63, + "grad_norm": 2.9779469966888428, + "learning_rate": 9.52532663316583e-06, + "loss": 0.1662, + "step": 5225 + }, + { + "epoch": 2.64, + "grad_norm": 3.2319440841674805, + "learning_rate": 9.52281407035176e-06, + "loss": 0.1674, + "step": 5250 + }, + { + "epoch": 2.66, + "grad_norm": 3.009509801864624, + "learning_rate": 9.520301507537689e-06, + "loss": 0.1675, + "step": 5275 + }, + { + "epoch": 2.67, + "grad_norm": 3.2088656425476074, + "learning_rate": 9.51778894472362e-06, + "loss": 0.1691, + "step": 5300 + }, + { + "epoch": 2.68, + "grad_norm": 2.947505474090576, + "learning_rate": 9.515276381909549e-06, + "loss": 0.1688, + "step": 5325 + }, + { + "epoch": 2.7, + "grad_norm": 3.309274196624756, + "learning_rate": 9.512763819095478e-06, + "loss": 0.1607, + "step": 5350 + }, + { + "epoch": 2.71, + "grad_norm": 3.2126705646514893, + "learning_rate": 9.510251256281408e-06, + "loss": 0.1692, + "step": 5375 + }, + { + "epoch": 2.72, + "grad_norm": 3.2418758869171143, + "learning_rate": 9.507738693467337e-06, + "loss": 0.1655, + "step": 5400 + }, + { + "epoch": 2.73, + "grad_norm": 3.7290778160095215, + "learning_rate": 9.505226130653266e-06, + "loss": 0.1652, + "step": 5425 + }, + { + "epoch": 2.75, + "grad_norm": 3.0041186809539795, + "learning_rate": 9.502713567839197e-06, + "loss": 0.1616, + "step": 5450 + }, + { + "epoch": 2.76, + "grad_norm": 3.080031394958496, + "learning_rate": 9.500201005025127e-06, + "loss": 0.1622, + "step": 5475 + }, + { + "epoch": 2.77, + "grad_norm": 3.051947593688965, + "learning_rate": 9.497688442211056e-06, + "loss": 0.1666, + "step": 5500 + }, + { + "epoch": 2.78, + "grad_norm": 2.831784963607788, + "learning_rate": 9.495175879396987e-06, + "loss": 0.1615, + "step": 5525 + }, + { + "epoch": 2.8, + "grad_norm": 3.337547779083252, + "learning_rate": 9.492663316582915e-06, + "loss": 0.1623, + "step": 5550 + }, + { + "epoch": 2.81, + "grad_norm": 2.887444257736206, + "learning_rate": 9.490150753768846e-06, + "loss": 0.1603, + "step": 5575 + }, + { + "epoch": 2.82, + "grad_norm": 3.4559426307678223, + "learning_rate": 9.487638190954775e-06, + "loss": 0.1556, + "step": 5600 + }, + { + "epoch": 2.83, + "grad_norm": 3.687380790710449, + "learning_rate": 9.485125628140704e-06, + "loss": 0.1674, + "step": 5625 + }, + { + "epoch": 2.85, + "grad_norm": 3.254998207092285, + "learning_rate": 9.482613065326634e-06, + "loss": 0.1588, + "step": 5650 + }, + { + "epoch": 2.86, + "grad_norm": 3.1799871921539307, + "learning_rate": 9.480100502512563e-06, + "loss": 0.1569, + "step": 5675 + }, + { + "epoch": 2.87, + "grad_norm": 3.085669755935669, + "learning_rate": 9.477587939698494e-06, + "loss": 0.1521, + "step": 5700 + }, + { + "epoch": 2.88, + "grad_norm": 6.66331148147583, + "learning_rate": 9.475075376884423e-06, + "loss": 0.1601, + "step": 5725 + }, + { + "epoch": 2.9, + "grad_norm": 2.997605323791504, + "learning_rate": 9.472562814070353e-06, + "loss": 0.1682, + "step": 5750 + }, + { + "epoch": 2.91, + "grad_norm": 3.2419841289520264, + "learning_rate": 9.470050251256282e-06, + "loss": 0.1596, + "step": 5775 + }, + { + "epoch": 2.92, + "grad_norm": 3.0139105319976807, + "learning_rate": 9.467537688442213e-06, + "loss": 0.1613, + "step": 5800 + }, + { + "epoch": 2.93, + "grad_norm": 3.2966501712799072, + "learning_rate": 9.46502512562814e-06, + "loss": 0.1592, + "step": 5825 + }, + { + "epoch": 2.95, + "grad_norm": 3.321432113647461, + "learning_rate": 9.462512562814072e-06, + "loss": 0.1655, + "step": 5850 + }, + { + "epoch": 2.96, + "grad_norm": 3.232642650604248, + "learning_rate": 9.460000000000001e-06, + "loss": 0.1591, + "step": 5875 + }, + { + "epoch": 2.97, + "grad_norm": 3.370011806488037, + "learning_rate": 9.45748743718593e-06, + "loss": 0.1661, + "step": 5900 + }, + { + "epoch": 2.98, + "grad_norm": 3.0988075733184814, + "learning_rate": 9.454974874371861e-06, + "loss": 0.1579, + "step": 5925 + }, + { + "epoch": 3.0, + "grad_norm": 3.4116175174713135, + "learning_rate": 9.452462311557789e-06, + "loss": 0.1645, + "step": 5950 + }, + { + "epoch": 3.01, + "grad_norm": 2.821533441543579, + "learning_rate": 9.44994974874372e-06, + "loss": 0.1402, + "step": 5975 + }, + { + "epoch": 3.02, + "grad_norm": 2.989851951599121, + "learning_rate": 9.44743718592965e-06, + "loss": 0.1373, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.16814002394676208, + "eval_runtime": 644.3048, + "eval_samples_per_second": 2.221, + "eval_steps_per_second": 2.221, + "eval_wer": 25.977615965480044, + "step": 6000 + }, + { + "epoch": 3.04, + "grad_norm": 2.3335049152374268, + "learning_rate": 9.444924623115579e-06, + "loss": 0.1296, + "step": 6025 + }, + { + "epoch": 3.05, + "grad_norm": 3.0255050659179688, + "learning_rate": 9.442412060301508e-06, + "loss": 0.1292, + "step": 6050 + }, + { + "epoch": 3.06, + "grad_norm": 3.270336627960205, + "learning_rate": 9.439899497487439e-06, + "loss": 0.1323, + "step": 6075 + }, + { + "epoch": 3.07, + "grad_norm": 2.6645612716674805, + "learning_rate": 9.43748743718593e-06, + "loss": 0.1314, + "step": 6100 + }, + { + "epoch": 3.09, + "grad_norm": 3.163541316986084, + "learning_rate": 9.43497487437186e-06, + "loss": 0.1319, + "step": 6125 + }, + { + "epoch": 3.1, + "grad_norm": 3.179093837738037, + "learning_rate": 9.432462311557789e-06, + "loss": 0.1319, + "step": 6150 + }, + { + "epoch": 3.11, + "grad_norm": 3.2071480751037598, + "learning_rate": 9.42994974874372e-06, + "loss": 0.1323, + "step": 6175 + }, + { + "epoch": 3.12, + "grad_norm": 3.2225699424743652, + "learning_rate": 9.42743718592965e-06, + "loss": 0.1329, + "step": 6200 + }, + { + "epoch": 3.14, + "grad_norm": 3.2494139671325684, + "learning_rate": 9.424924623115579e-06, + "loss": 0.135, + "step": 6225 + }, + { + "epoch": 3.15, + "grad_norm": 3.1642544269561768, + "learning_rate": 9.422412060301508e-06, + "loss": 0.1316, + "step": 6250 + }, + { + "epoch": 3.16, + "grad_norm": 2.78086519241333, + "learning_rate": 9.419899497487437e-06, + "loss": 0.1357, + "step": 6275 + }, + { + "epoch": 3.17, + "grad_norm": 3.117426633834839, + "learning_rate": 9.417386934673367e-06, + "loss": 0.1401, + "step": 6300 + }, + { + "epoch": 3.19, + "grad_norm": 3.0528464317321777, + "learning_rate": 9.414874371859298e-06, + "loss": 0.1337, + "step": 6325 + }, + { + "epoch": 3.2, + "grad_norm": 2.7742674350738525, + "learning_rate": 9.412361809045227e-06, + "loss": 0.1294, + "step": 6350 + }, + { + "epoch": 3.21, + "grad_norm": 3.284179210662842, + "learning_rate": 9.409849246231156e-06, + "loss": 0.131, + "step": 6375 + }, + { + "epoch": 3.22, + "grad_norm": 3.261300563812256, + "learning_rate": 9.407336683417086e-06, + "loss": 0.132, + "step": 6400 + }, + { + "epoch": 3.24, + "grad_norm": 3.436267375946045, + "learning_rate": 9.404824120603015e-06, + "loss": 0.1306, + "step": 6425 + }, + { + "epoch": 3.25, + "grad_norm": 3.1308603286743164, + "learning_rate": 9.402311557788946e-06, + "loss": 0.1316, + "step": 6450 + }, + { + "epoch": 3.26, + "grad_norm": 2.6901774406433105, + "learning_rate": 9.399798994974875e-06, + "loss": 0.1352, + "step": 6475 + }, + { + "epoch": 3.27, + "grad_norm": 2.8536081314086914, + "learning_rate": 9.397286432160805e-06, + "loss": 0.1294, + "step": 6500 + }, + { + "epoch": 3.29, + "grad_norm": 3.0733909606933594, + "learning_rate": 9.394773869346736e-06, + "loss": 0.1336, + "step": 6525 + }, + { + "epoch": 3.3, + "grad_norm": 2.5114903450012207, + "learning_rate": 9.392261306532663e-06, + "loss": 0.1336, + "step": 6550 + }, + { + "epoch": 3.31, + "grad_norm": 3.215423822402954, + "learning_rate": 9.389748743718594e-06, + "loss": 0.139, + "step": 6575 + }, + { + "epoch": 3.32, + "grad_norm": 3.2459371089935303, + "learning_rate": 9.387236180904524e-06, + "loss": 0.1326, + "step": 6600 + }, + { + "epoch": 3.34, + "grad_norm": 3.6960456371307373, + "learning_rate": 9.384723618090453e-06, + "loss": 0.1303, + "step": 6625 + }, + { + "epoch": 3.35, + "grad_norm": 3.082793951034546, + "learning_rate": 9.382211055276382e-06, + "loss": 0.1323, + "step": 6650 + }, + { + "epoch": 3.36, + "grad_norm": 3.442915439605713, + "learning_rate": 9.379698492462312e-06, + "loss": 0.1344, + "step": 6675 + }, + { + "epoch": 3.38, + "grad_norm": 3.179396867752075, + "learning_rate": 9.377185929648241e-06, + "loss": 0.1367, + "step": 6700 + }, + { + "epoch": 3.39, + "grad_norm": 3.6881515979766846, + "learning_rate": 9.374673366834172e-06, + "loss": 0.1288, + "step": 6725 + }, + { + "epoch": 3.4, + "grad_norm": 3.6729483604431152, + "learning_rate": 9.372160804020101e-06, + "loss": 0.1342, + "step": 6750 + }, + { + "epoch": 3.41, + "grad_norm": 3.5134401321411133, + "learning_rate": 9.36964824120603e-06, + "loss": 0.1329, + "step": 6775 + }, + { + "epoch": 3.43, + "grad_norm": 3.2673442363739014, + "learning_rate": 9.367135678391962e-06, + "loss": 0.1331, + "step": 6800 + }, + { + "epoch": 3.44, + "grad_norm": 3.155186891555786, + "learning_rate": 9.36462311557789e-06, + "loss": 0.1268, + "step": 6825 + }, + { + "epoch": 3.45, + "grad_norm": 3.012155771255493, + "learning_rate": 9.36211055276382e-06, + "loss": 0.1295, + "step": 6850 + }, + { + "epoch": 3.46, + "grad_norm": 3.19975209236145, + "learning_rate": 9.35959798994975e-06, + "loss": 0.1309, + "step": 6875 + }, + { + "epoch": 3.48, + "grad_norm": 3.403193473815918, + "learning_rate": 9.357085427135679e-06, + "loss": 0.1292, + "step": 6900 + }, + { + "epoch": 3.49, + "grad_norm": 3.423659086227417, + "learning_rate": 9.354572864321608e-06, + "loss": 0.1303, + "step": 6925 + }, + { + "epoch": 3.5, + "grad_norm": 2.8954126834869385, + "learning_rate": 9.352060301507538e-06, + "loss": 0.1306, + "step": 6950 + }, + { + "epoch": 3.51, + "grad_norm": 3.165268898010254, + "learning_rate": 9.349547738693469e-06, + "loss": 0.1335, + "step": 6975 + }, + { + "epoch": 3.53, + "grad_norm": 3.148109197616577, + "learning_rate": 9.347035175879398e-06, + "loss": 0.1359, + "step": 7000 + }, + { + "epoch": 3.53, + "eval_loss": 0.16573382914066315, + "eval_runtime": 640.1142, + "eval_samples_per_second": 2.236, + "eval_steps_per_second": 2.236, + "eval_wer": 25.431499460625673, + "step": 7000 + }, + { + "epoch": 3.54, + "grad_norm": 3.1869614124298096, + "learning_rate": 9.344522613065327e-06, + "loss": 0.1288, + "step": 7025 + }, + { + "epoch": 3.55, + "grad_norm": 3.3055672645568848, + "learning_rate": 9.342010050251257e-06, + "loss": 0.1355, + "step": 7050 + }, + { + "epoch": 3.56, + "grad_norm": 3.1968953609466553, + "learning_rate": 9.339497487437188e-06, + "loss": 0.13, + "step": 7075 + }, + { + "epoch": 3.58, + "grad_norm": 3.080793857574463, + "learning_rate": 9.336984924623115e-06, + "loss": 0.1286, + "step": 7100 + }, + { + "epoch": 3.59, + "grad_norm": 2.8871359825134277, + "learning_rate": 9.334472361809046e-06, + "loss": 0.1305, + "step": 7125 + }, + { + "epoch": 3.6, + "grad_norm": 2.922416925430298, + "learning_rate": 9.331959798994976e-06, + "loss": 0.1285, + "step": 7150 + }, + { + "epoch": 3.61, + "grad_norm": 3.3787424564361572, + "learning_rate": 9.329447236180905e-06, + "loss": 0.1282, + "step": 7175 + }, + { + "epoch": 3.63, + "grad_norm": 3.2346718311309814, + "learning_rate": 9.326934673366836e-06, + "loss": 0.131, + "step": 7200 + }, + { + "epoch": 3.64, + "grad_norm": 3.152421712875366, + "learning_rate": 9.324422110552764e-06, + "loss": 0.1276, + "step": 7225 + }, + { + "epoch": 3.65, + "grad_norm": 2.8158936500549316, + "learning_rate": 9.321909547738695e-06, + "loss": 0.1279, + "step": 7250 + }, + { + "epoch": 3.66, + "grad_norm": 3.505182981491089, + "learning_rate": 9.319396984924624e-06, + "loss": 0.1295, + "step": 7275 + }, + { + "epoch": 3.68, + "grad_norm": 3.118612051010132, + "learning_rate": 9.316884422110553e-06, + "loss": 0.1277, + "step": 7300 + }, + { + "epoch": 3.69, + "grad_norm": 3.1707253456115723, + "learning_rate": 9.314371859296483e-06, + "loss": 0.1317, + "step": 7325 + }, + { + "epoch": 3.7, + "grad_norm": 3.3863115310668945, + "learning_rate": 9.311859296482414e-06, + "loss": 0.1283, + "step": 7350 + }, + { + "epoch": 3.72, + "grad_norm": 3.9100446701049805, + "learning_rate": 9.309346733668343e-06, + "loss": 0.1337, + "step": 7375 + }, + { + "epoch": 3.73, + "grad_norm": 3.1977946758270264, + "learning_rate": 9.306834170854272e-06, + "loss": 0.1267, + "step": 7400 + }, + { + "epoch": 3.74, + "grad_norm": 4.162247657775879, + "learning_rate": 9.304321608040201e-06, + "loss": 0.1367, + "step": 7425 + }, + { + "epoch": 3.75, + "grad_norm": 3.2267277240753174, + "learning_rate": 9.30180904522613e-06, + "loss": 0.125, + "step": 7450 + }, + { + "epoch": 3.77, + "grad_norm": 3.0316526889801025, + "learning_rate": 9.299296482412062e-06, + "loss": 0.1264, + "step": 7475 + }, + { + "epoch": 3.78, + "grad_norm": 3.3379135131835938, + "learning_rate": 9.296783919597991e-06, + "loss": 0.1324, + "step": 7500 + }, + { + "epoch": 3.79, + "grad_norm": 3.4995505809783936, + "learning_rate": 9.29427135678392e-06, + "loss": 0.1307, + "step": 7525 + }, + { + "epoch": 3.8, + "grad_norm": 2.8635482788085938, + "learning_rate": 9.29175879396985e-06, + "loss": 0.1267, + "step": 7550 + }, + { + "epoch": 3.82, + "grad_norm": 3.3978378772735596, + "learning_rate": 9.289246231155779e-06, + "loss": 0.127, + "step": 7575 + }, + { + "epoch": 3.83, + "grad_norm": 3.2807343006134033, + "learning_rate": 9.28673366834171e-06, + "loss": 0.1266, + "step": 7600 + }, + { + "epoch": 3.84, + "grad_norm": 3.115029811859131, + "learning_rate": 9.28422110552764e-06, + "loss": 0.1266, + "step": 7625 + }, + { + "epoch": 3.85, + "grad_norm": 3.094233512878418, + "learning_rate": 9.281708542713569e-06, + "loss": 0.1319, + "step": 7650 + }, + { + "epoch": 3.87, + "grad_norm": 3.2299365997314453, + "learning_rate": 9.279195979899498e-06, + "loss": 0.1322, + "step": 7675 + }, + { + "epoch": 3.88, + "grad_norm": 3.39212703704834, + "learning_rate": 9.276683417085427e-06, + "loss": 0.1305, + "step": 7700 + }, + { + "epoch": 3.89, + "grad_norm": 3.241276741027832, + "learning_rate": 9.274170854271357e-06, + "loss": 0.1257, + "step": 7725 + }, + { + "epoch": 3.9, + "grad_norm": 3.012622117996216, + "learning_rate": 9.271658291457288e-06, + "loss": 0.1286, + "step": 7750 + }, + { + "epoch": 3.92, + "grad_norm": 3.312514066696167, + "learning_rate": 9.269145728643217e-06, + "loss": 0.1317, + "step": 7775 + }, + { + "epoch": 3.93, + "grad_norm": 3.4923620223999023, + "learning_rate": 9.266633165829146e-06, + "loss": 0.1357, + "step": 7800 + }, + { + "epoch": 3.94, + "grad_norm": 3.0651772022247314, + "learning_rate": 9.264120603015076e-06, + "loss": 0.1192, + "step": 7825 + }, + { + "epoch": 3.95, + "grad_norm": 3.142446517944336, + "learning_rate": 9.261608040201005e-06, + "loss": 0.1238, + "step": 7850 + }, + { + "epoch": 3.97, + "grad_norm": 2.9347586631774902, + "learning_rate": 9.259095477386936e-06, + "loss": 0.1258, + "step": 7875 + }, + { + "epoch": 3.98, + "grad_norm": 3.4585201740264893, + "learning_rate": 9.256582914572865e-06, + "loss": 0.1281, + "step": 7900 + }, + { + "epoch": 3.99, + "grad_norm": 3.0866055488586426, + "learning_rate": 9.254070351758795e-06, + "loss": 0.127, + "step": 7925 + }, + { + "epoch": 4.01, + "grad_norm": 2.5864970684051514, + "learning_rate": 9.251557788944724e-06, + "loss": 0.1142, + "step": 7950 + }, + { + "epoch": 4.02, + "grad_norm": 3.044384002685547, + "learning_rate": 9.249045226130653e-06, + "loss": 0.1016, + "step": 7975 + }, + { + "epoch": 4.03, + "grad_norm": 2.921030282974243, + "learning_rate": 9.246532663316584e-06, + "loss": 0.1018, + "step": 8000 + }, + { + "epoch": 4.03, + "eval_loss": 0.16997864842414856, + "eval_runtime": 643.7459, + "eval_samples_per_second": 2.223, + "eval_steps_per_second": 2.223, + "eval_wer": 26.321467098166128, + "step": 8000 + }, + { + "epoch": 4.04, + "grad_norm": 2.8127450942993164, + "learning_rate": 9.244020100502514e-06, + "loss": 0.1026, + "step": 8025 + }, + { + "epoch": 4.06, + "grad_norm": 2.687281370162964, + "learning_rate": 9.241507537688443e-06, + "loss": 0.1025, + "step": 8050 + }, + { + "epoch": 4.07, + "grad_norm": 2.703577756881714, + "learning_rate": 9.238994974874372e-06, + "loss": 0.1027, + "step": 8075 + }, + { + "epoch": 4.08, + "grad_norm": 3.099841833114624, + "learning_rate": 9.236482412060302e-06, + "loss": 0.1028, + "step": 8100 + }, + { + "epoch": 4.09, + "grad_norm": 2.8887107372283936, + "learning_rate": 9.233969849246231e-06, + "loss": 0.1046, + "step": 8125 + }, + { + "epoch": 4.11, + "grad_norm": 3.350189685821533, + "learning_rate": 9.231457286432162e-06, + "loss": 0.0981, + "step": 8150 + }, + { + "epoch": 4.12, + "grad_norm": 3.085238456726074, + "learning_rate": 9.228944723618091e-06, + "loss": 0.1026, + "step": 8175 + }, + { + "epoch": 4.13, + "grad_norm": 2.9334661960601807, + "learning_rate": 9.22643216080402e-06, + "loss": 0.0995, + "step": 8200 + }, + { + "epoch": 4.14, + "grad_norm": 2.9753615856170654, + "learning_rate": 9.224020100502514e-06, + "loss": 0.0987, + "step": 8225 + }, + { + "epoch": 4.16, + "grad_norm": 3.2063608169555664, + "learning_rate": 9.221507537688443e-06, + "loss": 0.1053, + "step": 8250 + }, + { + "epoch": 4.17, + "grad_norm": 3.486284017562866, + "learning_rate": 9.218994974874373e-06, + "loss": 0.1019, + "step": 8275 + }, + { + "epoch": 4.18, + "grad_norm": 3.0759146213531494, + "learning_rate": 9.216482412060302e-06, + "loss": 0.1025, + "step": 8300 + }, + { + "epoch": 4.19, + "grad_norm": 3.127159595489502, + "learning_rate": 9.213969849246231e-06, + "loss": 0.0991, + "step": 8325 + }, + { + "epoch": 4.21, + "grad_norm": 3.2545323371887207, + "learning_rate": 9.211457286432162e-06, + "loss": 0.0994, + "step": 8350 + }, + { + "epoch": 4.22, + "grad_norm": 2.993659496307373, + "learning_rate": 9.20894472361809e-06, + "loss": 0.1018, + "step": 8375 + }, + { + "epoch": 4.23, + "grad_norm": 2.970054864883423, + "learning_rate": 9.206432160804021e-06, + "loss": 0.0987, + "step": 8400 + }, + { + "epoch": 4.24, + "grad_norm": 2.8245937824249268, + "learning_rate": 9.20391959798995e-06, + "loss": 0.1006, + "step": 8425 + }, + { + "epoch": 4.26, + "grad_norm": 3.300325632095337, + "learning_rate": 9.20140703517588e-06, + "loss": 0.0995, + "step": 8450 + }, + { + "epoch": 4.27, + "grad_norm": 3.1504898071289062, + "learning_rate": 9.19889447236181e-06, + "loss": 0.1011, + "step": 8475 + }, + { + "epoch": 4.28, + "grad_norm": 3.2930846214294434, + "learning_rate": 9.19638190954774e-06, + "loss": 0.1035, + "step": 8500 + }, + { + "epoch": 4.29, + "grad_norm": 3.108095645904541, + "learning_rate": 9.19386934673367e-06, + "loss": 0.1022, + "step": 8525 + }, + { + "epoch": 4.31, + "grad_norm": 3.2980425357818604, + "learning_rate": 9.191356783919599e-06, + "loss": 0.105, + "step": 8550 + }, + { + "epoch": 4.32, + "grad_norm": 2.9646871089935303, + "learning_rate": 9.188844221105528e-06, + "loss": 0.1005, + "step": 8575 + }, + { + "epoch": 4.33, + "grad_norm": 3.2865025997161865, + "learning_rate": 9.186331658291459e-06, + "loss": 0.1015, + "step": 8600 + }, + { + "epoch": 4.35, + "grad_norm": 3.148343801498413, + "learning_rate": 9.183819095477388e-06, + "loss": 0.0959, + "step": 8625 + }, + { + "epoch": 4.36, + "grad_norm": 3.2818965911865234, + "learning_rate": 9.181306532663317e-06, + "loss": 0.0992, + "step": 8650 + }, + { + "epoch": 4.37, + "grad_norm": 3.32326078414917, + "learning_rate": 9.178793969849247e-06, + "loss": 0.1005, + "step": 8675 + }, + { + "epoch": 4.38, + "grad_norm": 3.0383994579315186, + "learning_rate": 9.176281407035176e-06, + "loss": 0.0985, + "step": 8700 + }, + { + "epoch": 4.4, + "grad_norm": 2.9868807792663574, + "learning_rate": 9.173768844221105e-06, + "loss": 0.0977, + "step": 8725 + }, + { + "epoch": 4.41, + "grad_norm": 3.3646769523620605, + "learning_rate": 9.171256281407036e-06, + "loss": 0.1023, + "step": 8750 + }, + { + "epoch": 4.42, + "grad_norm": 3.090210199356079, + "learning_rate": 9.168743718592966e-06, + "loss": 0.0992, + "step": 8775 + }, + { + "epoch": 4.43, + "grad_norm": 2.9281513690948486, + "learning_rate": 9.166231155778895e-06, + "loss": 0.0992, + "step": 8800 + }, + { + "epoch": 4.45, + "grad_norm": 3.3010168075561523, + "learning_rate": 9.163718592964826e-06, + "loss": 0.1023, + "step": 8825 + }, + { + "epoch": 4.46, + "grad_norm": 3.2135019302368164, + "learning_rate": 9.161206030150754e-06, + "loss": 0.0976, + "step": 8850 + }, + { + "epoch": 4.47, + "grad_norm": 3.102108955383301, + "learning_rate": 9.158693467336685e-06, + "loss": 0.1022, + "step": 8875 + }, + { + "epoch": 4.48, + "grad_norm": 3.3704023361206055, + "learning_rate": 9.156180904522614e-06, + "loss": 0.1033, + "step": 8900 + }, + { + "epoch": 4.5, + "grad_norm": 3.3164472579956055, + "learning_rate": 9.153668341708543e-06, + "loss": 0.0997, + "step": 8925 + }, + { + "epoch": 4.51, + "grad_norm": 3.2379391193389893, + "learning_rate": 9.151155778894473e-06, + "loss": 0.0984, + "step": 8950 + }, + { + "epoch": 4.52, + "grad_norm": 2.903475046157837, + "learning_rate": 9.148643216080402e-06, + "loss": 0.0967, + "step": 8975 + }, + { + "epoch": 4.53, + "grad_norm": 3.0086185932159424, + "learning_rate": 9.146130653266331e-06, + "loss": 0.0981, + "step": 9000 + }, + { + "epoch": 4.53, + "eval_loss": 0.17512790858745575, + "eval_runtime": 640.7771, + "eval_samples_per_second": 2.233, + "eval_steps_per_second": 2.233, + "eval_wer": 25.937162891046384, + "step": 9000 + }, + { + "epoch": 4.55, + "grad_norm": 2.8969457149505615, + "learning_rate": 9.143618090452262e-06, + "loss": 0.0985, + "step": 9025 + }, + { + "epoch": 4.56, + "grad_norm": 2.912540912628174, + "learning_rate": 9.141105527638192e-06, + "loss": 0.0993, + "step": 9050 + }, + { + "epoch": 4.57, + "grad_norm": 3.0564053058624268, + "learning_rate": 9.138592964824121e-06, + "loss": 0.1032, + "step": 9075 + }, + { + "epoch": 4.58, + "grad_norm": 3.089611291885376, + "learning_rate": 9.136080402010052e-06, + "loss": 0.0986, + "step": 9100 + }, + { + "epoch": 4.6, + "grad_norm": 3.1568117141723633, + "learning_rate": 9.13356783919598e-06, + "loss": 0.0988, + "step": 9125 + }, + { + "epoch": 4.61, + "grad_norm": 3.6259915828704834, + "learning_rate": 9.13105527638191e-06, + "loss": 0.099, + "step": 9150 + }, + { + "epoch": 4.62, + "grad_norm": 3.1273672580718994, + "learning_rate": 9.12854271356784e-06, + "loss": 0.0941, + "step": 9175 + }, + { + "epoch": 4.63, + "grad_norm": 3.0625510215759277, + "learning_rate": 9.12603015075377e-06, + "loss": 0.0997, + "step": 9200 + }, + { + "epoch": 4.65, + "grad_norm": 3.5971662998199463, + "learning_rate": 9.1235175879397e-06, + "loss": 0.1002, + "step": 9225 + }, + { + "epoch": 4.66, + "grad_norm": 3.189336061477661, + "learning_rate": 9.121005025125628e-06, + "loss": 0.1022, + "step": 9250 + }, + { + "epoch": 4.67, + "grad_norm": 3.147265911102295, + "learning_rate": 9.118492462311559e-06, + "loss": 0.0982, + "step": 9275 + }, + { + "epoch": 4.69, + "grad_norm": 3.1727993488311768, + "learning_rate": 9.115979899497488e-06, + "loss": 0.0975, + "step": 9300 + }, + { + "epoch": 4.7, + "grad_norm": 3.496676445007324, + "learning_rate": 9.113467336683418e-06, + "loss": 0.0987, + "step": 9325 + }, + { + "epoch": 4.71, + "grad_norm": 3.1849424839019775, + "learning_rate": 9.110954773869347e-06, + "loss": 0.0946, + "step": 9350 + }, + { + "epoch": 4.72, + "grad_norm": 3.139408826828003, + "learning_rate": 9.108442211055278e-06, + "loss": 0.0987, + "step": 9375 + }, + { + "epoch": 4.74, + "grad_norm": 3.476555824279785, + "learning_rate": 9.105929648241206e-06, + "loss": 0.1004, + "step": 9400 + }, + { + "epoch": 4.75, + "grad_norm": 3.7997958660125732, + "learning_rate": 9.103417085427137e-06, + "loss": 0.1013, + "step": 9425 + }, + { + "epoch": 4.76, + "grad_norm": 3.7546234130859375, + "learning_rate": 9.100904522613066e-06, + "loss": 0.1025, + "step": 9450 + }, + { + "epoch": 4.77, + "grad_norm": 4.383953094482422, + "learning_rate": 9.09849246231156e-06, + "loss": 0.1024, + "step": 9475 + }, + { + "epoch": 4.79, + "grad_norm": 3.1972999572753906, + "learning_rate": 9.095979899497489e-06, + "loss": 0.0963, + "step": 9500 + }, + { + "epoch": 4.8, + "grad_norm": 3.2800564765930176, + "learning_rate": 9.093467336683418e-06, + "loss": 0.1015, + "step": 9525 + }, + { + "epoch": 4.81, + "grad_norm": 2.84741473197937, + "learning_rate": 9.090954773869347e-06, + "loss": 0.1003, + "step": 9550 + }, + { + "epoch": 4.82, + "grad_norm": 2.9392895698547363, + "learning_rate": 9.088442211055277e-06, + "loss": 0.0947, + "step": 9575 + }, + { + "epoch": 4.84, + "grad_norm": 3.1422994136810303, + "learning_rate": 9.085929648241206e-06, + "loss": 0.0997, + "step": 9600 + }, + { + "epoch": 4.85, + "grad_norm": 3.0518736839294434, + "learning_rate": 9.083417085427137e-06, + "loss": 0.1011, + "step": 9625 + }, + { + "epoch": 4.86, + "grad_norm": 3.137798309326172, + "learning_rate": 9.080904522613066e-06, + "loss": 0.0991, + "step": 9650 + }, + { + "epoch": 4.87, + "grad_norm": 3.0191142559051514, + "learning_rate": 9.078391959798996e-06, + "loss": 0.1048, + "step": 9675 + }, + { + "epoch": 4.89, + "grad_norm": 3.258415699005127, + "learning_rate": 9.075879396984927e-06, + "loss": 0.1053, + "step": 9700 + }, + { + "epoch": 4.9, + "grad_norm": 3.040956735610962, + "learning_rate": 9.073366834170854e-06, + "loss": 0.097, + "step": 9725 + }, + { + "epoch": 4.91, + "grad_norm": 3.433220148086548, + "learning_rate": 9.070854271356785e-06, + "loss": 0.0973, + "step": 9750 + }, + { + "epoch": 4.92, + "grad_norm": 2.707573175430298, + "learning_rate": 9.068341708542715e-06, + "loss": 0.0968, + "step": 9775 + }, + { + "epoch": 4.94, + "grad_norm": 2.8816564083099365, + "learning_rate": 9.065829145728644e-06, + "loss": 0.0979, + "step": 9800 + }, + { + "epoch": 4.95, + "grad_norm": 3.2339932918548584, + "learning_rate": 9.063316582914573e-06, + "loss": 0.0977, + "step": 9825 + }, + { + "epoch": 4.96, + "grad_norm": 2.817073345184326, + "learning_rate": 9.060804020100502e-06, + "loss": 0.0957, + "step": 9850 + }, + { + "epoch": 4.97, + "grad_norm": 3.0118443965911865, + "learning_rate": 9.058291457286433e-06, + "loss": 0.0954, + "step": 9875 + }, + { + "epoch": 4.99, + "grad_norm": 3.1879994869232178, + "learning_rate": 9.055778894472363e-06, + "loss": 0.0984, + "step": 9900 + }, + { + "epoch": 5.0, + "grad_norm": 3.434926986694336, + "learning_rate": 9.053266331658292e-06, + "loss": 0.0993, + "step": 9925 + }, + { + "epoch": 5.01, + "grad_norm": 2.875670909881592, + "learning_rate": 9.050753768844221e-06, + "loss": 0.0744, + "step": 9950 + }, + { + "epoch": 5.03, + "grad_norm": 2.6668901443481445, + "learning_rate": 9.048241206030152e-06, + "loss": 0.071, + "step": 9975 + }, + { + "epoch": 5.04, + "grad_norm": 2.8233089447021484, + "learning_rate": 9.04572864321608e-06, + "loss": 0.0722, + "step": 10000 + }, + { + "epoch": 5.04, + "eval_loss": 0.17968392372131348, + "eval_runtime": 641.8374, + "eval_samples_per_second": 2.23, + "eval_steps_per_second": 2.23, + "eval_wer": 26.254045307443363, + "step": 10000 + }, + { + "epoch": 5.05, + "grad_norm": 2.8495895862579346, + "learning_rate": 9.043216080402011e-06, + "loss": 0.0736, + "step": 10025 + }, + { + "epoch": 5.06, + "grad_norm": 3.0333521366119385, + "learning_rate": 9.04070351758794e-06, + "loss": 0.0735, + "step": 10050 + }, + { + "epoch": 5.08, + "grad_norm": 2.9026684761047363, + "learning_rate": 9.03819095477387e-06, + "loss": 0.0712, + "step": 10075 + }, + { + "epoch": 5.09, + "grad_norm": 2.817342519760132, + "learning_rate": 9.0356783919598e-06, + "loss": 0.0724, + "step": 10100 + }, + { + "epoch": 5.1, + "grad_norm": 2.7992191314697266, + "learning_rate": 9.033165829145728e-06, + "loss": 0.0727, + "step": 10125 + }, + { + "epoch": 5.11, + "grad_norm": 2.887803792953491, + "learning_rate": 9.03065326633166e-06, + "loss": 0.0763, + "step": 10150 + }, + { + "epoch": 5.13, + "grad_norm": 3.253915309906006, + "learning_rate": 9.028140703517589e-06, + "loss": 0.0747, + "step": 10175 + }, + { + "epoch": 5.14, + "grad_norm": 3.234462261199951, + "learning_rate": 9.025628140703518e-06, + "loss": 0.075, + "step": 10200 + }, + { + "epoch": 5.15, + "grad_norm": 3.0502281188964844, + "learning_rate": 9.023115577889447e-06, + "loss": 0.0723, + "step": 10225 + }, + { + "epoch": 5.16, + "grad_norm": 3.2875289916992188, + "learning_rate": 9.020603015075378e-06, + "loss": 0.0756, + "step": 10250 + }, + { + "epoch": 5.18, + "grad_norm": 2.9132416248321533, + "learning_rate": 9.018090452261308e-06, + "loss": 0.0748, + "step": 10275 + }, + { + "epoch": 5.19, + "grad_norm": 3.255037546157837, + "learning_rate": 9.015577889447237e-06, + "loss": 0.0722, + "step": 10300 + }, + { + "epoch": 5.2, + "grad_norm": 2.8713300228118896, + "learning_rate": 9.013065326633166e-06, + "loss": 0.0763, + "step": 10325 + }, + { + "epoch": 5.21, + "grad_norm": 3.148291826248169, + "learning_rate": 9.010552763819096e-06, + "loss": 0.071, + "step": 10350 + }, + { + "epoch": 5.23, + "grad_norm": 2.7413222789764404, + "learning_rate": 9.008040201005027e-06, + "loss": 0.076, + "step": 10375 + }, + { + "epoch": 5.24, + "grad_norm": 3.072864055633545, + "learning_rate": 9.005527638190954e-06, + "loss": 0.0786, + "step": 10400 + }, + { + "epoch": 5.25, + "grad_norm": 3.1105775833129883, + "learning_rate": 9.003015075376885e-06, + "loss": 0.0737, + "step": 10425 + }, + { + "epoch": 5.26, + "grad_norm": 2.9133994579315186, + "learning_rate": 9.000502512562815e-06, + "loss": 0.0715, + "step": 10450 + }, + { + "epoch": 5.28, + "grad_norm": 2.7990071773529053, + "learning_rate": 8.997989949748744e-06, + "loss": 0.0745, + "step": 10475 + }, + { + "epoch": 5.29, + "grad_norm": 3.0488502979278564, + "learning_rate": 8.995477386934675e-06, + "loss": 0.0752, + "step": 10500 + }, + { + "epoch": 5.3, + "grad_norm": 3.2010629177093506, + "learning_rate": 8.992964824120604e-06, + "loss": 0.0724, + "step": 10525 + }, + { + "epoch": 5.31, + "grad_norm": 2.969665050506592, + "learning_rate": 8.990452261306534e-06, + "loss": 0.0773, + "step": 10550 + }, + { + "epoch": 5.33, + "grad_norm": 2.7600491046905518, + "learning_rate": 8.987939698492463e-06, + "loss": 0.0736, + "step": 10575 + }, + { + "epoch": 5.34, + "grad_norm": 2.964404582977295, + "learning_rate": 8.985427135678392e-06, + "loss": 0.0778, + "step": 10600 + }, + { + "epoch": 5.35, + "grad_norm": 3.034454107284546, + "learning_rate": 8.982914572864322e-06, + "loss": 0.0733, + "step": 10625 + }, + { + "epoch": 5.37, + "grad_norm": 2.961419105529785, + "learning_rate": 8.980402010050253e-06, + "loss": 0.0738, + "step": 10650 + }, + { + "epoch": 5.38, + "grad_norm": 2.8201451301574707, + "learning_rate": 8.977889447236182e-06, + "loss": 0.0747, + "step": 10675 + }, + { + "epoch": 5.39, + "grad_norm": 2.3992698192596436, + "learning_rate": 8.975376884422111e-06, + "loss": 0.0805, + "step": 10700 + }, + { + "epoch": 5.4, + "grad_norm": 3.14664626121521, + "learning_rate": 8.97286432160804e-06, + "loss": 0.0797, + "step": 10725 + }, + { + "epoch": 5.42, + "grad_norm": 3.208887815475464, + "learning_rate": 8.97035175879397e-06, + "loss": 0.0744, + "step": 10750 + }, + { + "epoch": 5.43, + "grad_norm": 3.393000841140747, + "learning_rate": 8.967839195979901e-06, + "loss": 0.0782, + "step": 10775 + }, + { + "epoch": 5.44, + "grad_norm": 3.2120070457458496, + "learning_rate": 8.96532663316583e-06, + "loss": 0.0729, + "step": 10800 + }, + { + "epoch": 5.45, + "grad_norm": 3.261009931564331, + "learning_rate": 8.96281407035176e-06, + "loss": 0.0746, + "step": 10825 + }, + { + "epoch": 5.47, + "grad_norm": 3.327291250228882, + "learning_rate": 8.960301507537689e-06, + "loss": 0.0765, + "step": 10850 + }, + { + "epoch": 5.48, + "grad_norm": 3.124267101287842, + "learning_rate": 8.957788944723618e-06, + "loss": 0.077, + "step": 10875 + }, + { + "epoch": 5.49, + "grad_norm": 3.3869893550872803, + "learning_rate": 8.95527638190955e-06, + "loss": 0.0736, + "step": 10900 + }, + { + "epoch": 5.5, + "grad_norm": 2.997990131378174, + "learning_rate": 8.952763819095479e-06, + "loss": 0.0743, + "step": 10925 + }, + { + "epoch": 5.52, + "grad_norm": 2.768388509750366, + "learning_rate": 8.950251256281408e-06, + "loss": 0.0757, + "step": 10950 + }, + { + "epoch": 5.53, + "grad_norm": 2.7903389930725098, + "learning_rate": 8.947738693467337e-06, + "loss": 0.0712, + "step": 10975 + }, + { + "epoch": 5.54, + "grad_norm": 3.2805747985839844, + "learning_rate": 8.945226130653267e-06, + "loss": 0.0759, + "step": 11000 + }, + { + "epoch": 5.54, + "eval_loss": 0.19008655846118927, + "eval_runtime": 638.9856, + "eval_samples_per_second": 2.239, + "eval_steps_per_second": 2.239, + "eval_wer": 26.699029126213592, + "step": 11000 + }, + { + "epoch": 5.55, + "grad_norm": 3.2012717723846436, + "learning_rate": 8.942713567839196e-06, + "loss": 0.0724, + "step": 11025 + }, + { + "epoch": 5.57, + "grad_norm": 3.0376741886138916, + "learning_rate": 8.940201005025127e-06, + "loss": 0.0782, + "step": 11050 + }, + { + "epoch": 5.58, + "grad_norm": 3.2168312072753906, + "learning_rate": 8.937688442211056e-06, + "loss": 0.0763, + "step": 11075 + }, + { + "epoch": 5.59, + "grad_norm": 3.0400373935699463, + "learning_rate": 8.935175879396986e-06, + "loss": 0.0726, + "step": 11100 + }, + { + "epoch": 5.6, + "grad_norm": 2.767463207244873, + "learning_rate": 8.932663316582915e-06, + "loss": 0.0719, + "step": 11125 + }, + { + "epoch": 5.62, + "grad_norm": 3.149066686630249, + "learning_rate": 8.930150753768844e-06, + "loss": 0.0768, + "step": 11150 + }, + { + "epoch": 5.63, + "grad_norm": 2.871328592300415, + "learning_rate": 8.927638190954775e-06, + "loss": 0.0757, + "step": 11175 + }, + { + "epoch": 5.64, + "grad_norm": 3.0108718872070312, + "learning_rate": 8.925125628140705e-06, + "loss": 0.0746, + "step": 11200 + }, + { + "epoch": 5.65, + "grad_norm": 3.104356050491333, + "learning_rate": 8.922613065326634e-06, + "loss": 0.0752, + "step": 11225 + }, + { + "epoch": 5.67, + "grad_norm": 3.511143684387207, + "learning_rate": 8.920100502512563e-06, + "loss": 0.076, + "step": 11250 + }, + { + "epoch": 5.68, + "grad_norm": 3.0380494594573975, + "learning_rate": 8.917587939698493e-06, + "loss": 0.07, + "step": 11275 + }, + { + "epoch": 5.69, + "grad_norm": 3.4459969997406006, + "learning_rate": 8.915075376884424e-06, + "loss": 0.0746, + "step": 11300 + }, + { + "epoch": 5.71, + "grad_norm": 3.1613292694091797, + "learning_rate": 8.912562814070353e-06, + "loss": 0.0717, + "step": 11325 + }, + { + "epoch": 5.72, + "grad_norm": 3.4601426124572754, + "learning_rate": 8.910050251256282e-06, + "loss": 0.0732, + "step": 11350 + }, + { + "epoch": 5.73, + "grad_norm": 3.4558703899383545, + "learning_rate": 8.907537688442212e-06, + "loss": 0.0728, + "step": 11375 + }, + { + "epoch": 5.74, + "grad_norm": 2.6091301441192627, + "learning_rate": 8.905025125628143e-06, + "loss": 0.0704, + "step": 11400 + }, + { + "epoch": 5.76, + "grad_norm": 3.289591073989868, + "learning_rate": 8.90251256281407e-06, + "loss": 0.0731, + "step": 11425 + }, + { + "epoch": 5.77, + "grad_norm": 3.642345905303955, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0745, + "step": 11450 + }, + { + "epoch": 5.78, + "grad_norm": 3.1483981609344482, + "learning_rate": 8.89748743718593e-06, + "loss": 0.0745, + "step": 11475 + }, + { + "epoch": 5.79, + "grad_norm": 2.815776824951172, + "learning_rate": 8.89497487437186e-06, + "loss": 0.074, + "step": 11500 + }, + { + "epoch": 5.81, + "grad_norm": 3.5915329456329346, + "learning_rate": 8.892462311557791e-06, + "loss": 0.0711, + "step": 11525 + }, + { + "epoch": 5.82, + "grad_norm": 3.433957576751709, + "learning_rate": 8.889949748743718e-06, + "loss": 0.0717, + "step": 11550 + }, + { + "epoch": 5.83, + "grad_norm": 3.0351011753082275, + "learning_rate": 8.88743718592965e-06, + "loss": 0.0746, + "step": 11575 + }, + { + "epoch": 5.84, + "grad_norm": 3.106444835662842, + "learning_rate": 8.884924623115579e-06, + "loss": 0.0717, + "step": 11600 + }, + { + "epoch": 5.86, + "grad_norm": 3.4326331615448, + "learning_rate": 8.882412060301508e-06, + "loss": 0.0727, + "step": 11625 + }, + { + "epoch": 5.87, + "grad_norm": 3.1930453777313232, + "learning_rate": 8.879899497487437e-06, + "loss": 0.0752, + "step": 11650 + }, + { + "epoch": 5.88, + "grad_norm": 3.3129940032958984, + "learning_rate": 8.877386934673368e-06, + "loss": 0.0732, + "step": 11675 + }, + { + "epoch": 5.89, + "grad_norm": 3.2507965564727783, + "learning_rate": 8.874874371859296e-06, + "loss": 0.0739, + "step": 11700 + }, + { + "epoch": 5.91, + "grad_norm": 3.4068336486816406, + "learning_rate": 8.872361809045227e-06, + "loss": 0.0739, + "step": 11725 + }, + { + "epoch": 5.92, + "grad_norm": 2.890373706817627, + "learning_rate": 8.869849246231156e-06, + "loss": 0.0709, + "step": 11750 + }, + { + "epoch": 5.93, + "grad_norm": 3.252119779586792, + "learning_rate": 8.867336683417086e-06, + "loss": 0.0706, + "step": 11775 + }, + { + "epoch": 5.94, + "grad_norm": 3.001891851425171, + "learning_rate": 8.864824120603017e-06, + "loss": 0.0782, + "step": 11800 + }, + { + "epoch": 5.96, + "grad_norm": 3.4063515663146973, + "learning_rate": 8.862311557788944e-06, + "loss": 0.0738, + "step": 11825 + }, + { + "epoch": 5.97, + "grad_norm": 3.021040678024292, + "learning_rate": 8.859798994974875e-06, + "loss": 0.0704, + "step": 11850 + }, + { + "epoch": 5.98, + "grad_norm": 2.491487503051758, + "learning_rate": 8.857286432160805e-06, + "loss": 0.0713, + "step": 11875 + }, + { + "epoch": 5.99, + "grad_norm": 3.7813737392425537, + "learning_rate": 8.854773869346734e-06, + "loss": 0.0723, + "step": 11900 + }, + { + "epoch": 6.01, + "grad_norm": 2.638810396194458, + "learning_rate": 8.852261306532665e-06, + "loss": 0.0606, + "step": 11925 + }, + { + "epoch": 6.02, + "grad_norm": 2.8524045944213867, + "learning_rate": 8.849748743718594e-06, + "loss": 0.0529, + "step": 11950 + }, + { + "epoch": 6.03, + "grad_norm": 2.748837471008301, + "learning_rate": 8.847236180904524e-06, + "loss": 0.0543, + "step": 11975 + }, + { + "epoch": 6.05, + "grad_norm": 2.79878830909729, + "learning_rate": 8.844723618090453e-06, + "loss": 0.0517, + "step": 12000 + }, + { + "epoch": 6.05, + "eval_loss": 0.19410496950149536, + "eval_runtime": 646.3472, + "eval_samples_per_second": 2.214, + "eval_steps_per_second": 2.214, + "eval_wer": 26.820388349514563, + "step": 12000 + }, + { + "epoch": 6.06, + "grad_norm": 2.8254170417785645, + "learning_rate": 8.842211055276382e-06, + "loss": 0.0533, + "step": 12025 + }, + { + "epoch": 6.07, + "grad_norm": 3.093252182006836, + "learning_rate": 8.839698492462312e-06, + "loss": 0.0547, + "step": 12050 + }, + { + "epoch": 6.08, + "grad_norm": 2.651522159576416, + "learning_rate": 8.837185929648243e-06, + "loss": 0.0507, + "step": 12075 + }, + { + "epoch": 6.1, + "grad_norm": 2.742237091064453, + "learning_rate": 8.83467336683417e-06, + "loss": 0.0522, + "step": 12100 + }, + { + "epoch": 6.11, + "grad_norm": 3.0948822498321533, + "learning_rate": 8.832160804020101e-06, + "loss": 0.0521, + "step": 12125 + }, + { + "epoch": 6.12, + "grad_norm": 2.8140323162078857, + "learning_rate": 8.82964824120603e-06, + "loss": 0.0497, + "step": 12150 + }, + { + "epoch": 6.13, + "grad_norm": 2.9247100353240967, + "learning_rate": 8.82713567839196e-06, + "loss": 0.0495, + "step": 12175 + }, + { + "epoch": 6.15, + "grad_norm": 3.213573932647705, + "learning_rate": 8.824723618090453e-06, + "loss": 0.0514, + "step": 12200 + }, + { + "epoch": 6.16, + "grad_norm": 2.4465277194976807, + "learning_rate": 8.822211055276383e-06, + "loss": 0.0532, + "step": 12225 + }, + { + "epoch": 6.17, + "grad_norm": 2.3753342628479004, + "learning_rate": 8.819698492462312e-06, + "loss": 0.0492, + "step": 12250 + }, + { + "epoch": 6.18, + "grad_norm": 2.9053711891174316, + "learning_rate": 8.817185929648241e-06, + "loss": 0.0532, + "step": 12275 + }, + { + "epoch": 6.2, + "grad_norm": 2.476351737976074, + "learning_rate": 8.81467336683417e-06, + "loss": 0.0525, + "step": 12300 + }, + { + "epoch": 6.21, + "grad_norm": 4.093915939331055, + "learning_rate": 8.812160804020102e-06, + "loss": 0.053, + "step": 12325 + }, + { + "epoch": 6.22, + "grad_norm": 2.5626816749572754, + "learning_rate": 8.809648241206031e-06, + "loss": 0.0498, + "step": 12350 + }, + { + "epoch": 6.23, + "grad_norm": 3.1961519718170166, + "learning_rate": 8.80713567839196e-06, + "loss": 0.0524, + "step": 12375 + }, + { + "epoch": 6.25, + "grad_norm": 2.968162775039673, + "learning_rate": 8.804623115577891e-06, + "loss": 0.0551, + "step": 12400 + }, + { + "epoch": 6.26, + "grad_norm": 3.0323755741119385, + "learning_rate": 8.802110552763819e-06, + "loss": 0.0552, + "step": 12425 + }, + { + "epoch": 6.27, + "grad_norm": 3.0983328819274902, + "learning_rate": 8.79959798994975e-06, + "loss": 0.0526, + "step": 12450 + }, + { + "epoch": 6.28, + "grad_norm": 2.855795383453369, + "learning_rate": 8.79708542713568e-06, + "loss": 0.053, + "step": 12475 + }, + { + "epoch": 6.3, + "grad_norm": 3.2037718296051025, + "learning_rate": 8.794572864321609e-06, + "loss": 0.0488, + "step": 12500 + }, + { + "epoch": 6.31, + "grad_norm": 2.260732650756836, + "learning_rate": 8.792060301507538e-06, + "loss": 0.0502, + "step": 12525 + }, + { + "epoch": 6.32, + "grad_norm": 3.156822443008423, + "learning_rate": 8.789547738693467e-06, + "loss": 0.0539, + "step": 12550 + }, + { + "epoch": 6.34, + "grad_norm": 3.198518991470337, + "learning_rate": 8.787035175879398e-06, + "loss": 0.0533, + "step": 12575 + }, + { + "epoch": 6.35, + "grad_norm": 3.390371084213257, + "learning_rate": 8.784522613065328e-06, + "loss": 0.0509, + "step": 12600 + }, + { + "epoch": 6.36, + "grad_norm": 2.5450425148010254, + "learning_rate": 8.782010050251257e-06, + "loss": 0.0559, + "step": 12625 + }, + { + "epoch": 6.37, + "grad_norm": 2.738161325454712, + "learning_rate": 8.779497487437186e-06, + "loss": 0.051, + "step": 12650 + }, + { + "epoch": 6.39, + "grad_norm": 3.246572494506836, + "learning_rate": 8.776984924623117e-06, + "loss": 0.0552, + "step": 12675 + }, + { + "epoch": 6.4, + "grad_norm": 2.963050365447998, + "learning_rate": 8.774472361809045e-06, + "loss": 0.0535, + "step": 12700 + }, + { + "epoch": 6.41, + "grad_norm": 3.087432384490967, + "learning_rate": 8.771959798994976e-06, + "loss": 0.0564, + "step": 12725 + }, + { + "epoch": 6.42, + "grad_norm": 3.698723793029785, + "learning_rate": 8.769447236180905e-06, + "loss": 0.0523, + "step": 12750 + }, + { + "epoch": 6.44, + "grad_norm": 3.110818386077881, + "learning_rate": 8.766934673366834e-06, + "loss": 0.0547, + "step": 12775 + }, + { + "epoch": 6.45, + "grad_norm": 3.007974624633789, + "learning_rate": 8.764422110552765e-06, + "loss": 0.0539, + "step": 12800 + }, + { + "epoch": 6.46, + "grad_norm": 2.696997880935669, + "learning_rate": 8.761909547738693e-06, + "loss": 0.0509, + "step": 12825 + }, + { + "epoch": 6.47, + "grad_norm": 3.05181622505188, + "learning_rate": 8.759396984924624e-06, + "loss": 0.0526, + "step": 12850 + }, + { + "epoch": 6.49, + "grad_norm": 3.369204521179199, + "learning_rate": 8.756884422110553e-06, + "loss": 0.0542, + "step": 12875 + }, + { + "epoch": 6.5, + "grad_norm": 3.575322389602661, + "learning_rate": 8.754371859296483e-06, + "loss": 0.0523, + "step": 12900 + }, + { + "epoch": 6.51, + "grad_norm": 2.971766710281372, + "learning_rate": 8.751859296482412e-06, + "loss": 0.0536, + "step": 12925 + }, + { + "epoch": 6.52, + "grad_norm": 3.1936326026916504, + "learning_rate": 8.749346733668343e-06, + "loss": 0.0523, + "step": 12950 + }, + { + "epoch": 6.54, + "grad_norm": 3.110833168029785, + "learning_rate": 8.746834170854272e-06, + "loss": 0.0531, + "step": 12975 + }, + { + "epoch": 6.55, + "grad_norm": 2.9373364448547363, + "learning_rate": 8.744321608040202e-06, + "loss": 0.0575, + "step": 13000 + }, + { + "epoch": 6.55, + "eval_loss": 0.20703713595867157, + "eval_runtime": 638.6733, + "eval_samples_per_second": 2.241, + "eval_steps_per_second": 2.241, + "eval_wer": 26.854099244875947, + "step": 13000 + }, + { + "epoch": 6.56, + "grad_norm": 2.5592892169952393, + "learning_rate": 8.741809045226131e-06, + "loss": 0.0528, + "step": 13025 + }, + { + "epoch": 6.57, + "grad_norm": 2.6712217330932617, + "learning_rate": 8.73929648241206e-06, + "loss": 0.0498, + "step": 13050 + }, + { + "epoch": 6.59, + "grad_norm": 3.346395969390869, + "learning_rate": 8.736783919597991e-06, + "loss": 0.0553, + "step": 13075 + }, + { + "epoch": 6.6, + "grad_norm": 2.8349058628082275, + "learning_rate": 8.734271356783919e-06, + "loss": 0.0561, + "step": 13100 + }, + { + "epoch": 6.61, + "grad_norm": 3.2247519493103027, + "learning_rate": 8.73175879396985e-06, + "loss": 0.0545, + "step": 13125 + }, + { + "epoch": 6.62, + "grad_norm": 3.241006374359131, + "learning_rate": 8.72924623115578e-06, + "loss": 0.0554, + "step": 13150 + }, + { + "epoch": 6.64, + "grad_norm": 2.9004268646240234, + "learning_rate": 8.726733668341709e-06, + "loss": 0.0538, + "step": 13175 + }, + { + "epoch": 6.65, + "grad_norm": 3.1002416610717773, + "learning_rate": 8.72422110552764e-06, + "loss": 0.0539, + "step": 13200 + }, + { + "epoch": 6.66, + "grad_norm": 3.230339527130127, + "learning_rate": 8.721708542713569e-06, + "loss": 0.0518, + "step": 13225 + }, + { + "epoch": 6.68, + "grad_norm": 2.587028980255127, + "learning_rate": 8.719195979899498e-06, + "loss": 0.0528, + "step": 13250 + }, + { + "epoch": 6.69, + "grad_norm": 3.6443424224853516, + "learning_rate": 8.716683417085428e-06, + "loss": 0.0529, + "step": 13275 + }, + { + "epoch": 6.7, + "grad_norm": 3.202755928039551, + "learning_rate": 8.714170854271357e-06, + "loss": 0.0499, + "step": 13300 + }, + { + "epoch": 6.71, + "grad_norm": 3.081509590148926, + "learning_rate": 8.711658291457286e-06, + "loss": 0.0551, + "step": 13325 + }, + { + "epoch": 6.73, + "grad_norm": 2.6462221145629883, + "learning_rate": 8.709145728643217e-06, + "loss": 0.0559, + "step": 13350 + }, + { + "epoch": 6.74, + "grad_norm": 3.7177746295928955, + "learning_rate": 8.706633165829147e-06, + "loss": 0.0545, + "step": 13375 + }, + { + "epoch": 6.75, + "grad_norm": 3.0639798641204834, + "learning_rate": 8.704120603015076e-06, + "loss": 0.0535, + "step": 13400 + }, + { + "epoch": 6.76, + "grad_norm": 3.338195562362671, + "learning_rate": 8.701608040201005e-06, + "loss": 0.0547, + "step": 13425 + }, + { + "epoch": 6.78, + "grad_norm": 2.962996244430542, + "learning_rate": 8.699095477386935e-06, + "loss": 0.0538, + "step": 13450 + }, + { + "epoch": 6.79, + "grad_norm": 3.1073009967803955, + "learning_rate": 8.696582914572866e-06, + "loss": 0.054, + "step": 13475 + }, + { + "epoch": 6.8, + "grad_norm": 2.9525363445281982, + "learning_rate": 8.694070351758795e-06, + "loss": 0.054, + "step": 13500 + }, + { + "epoch": 6.81, + "grad_norm": 2.863752603530884, + "learning_rate": 8.691557788944724e-06, + "loss": 0.0514, + "step": 13525 + }, + { + "epoch": 6.83, + "grad_norm": 2.745175838470459, + "learning_rate": 8.689045226130654e-06, + "loss": 0.0522, + "step": 13550 + }, + { + "epoch": 6.84, + "grad_norm": 2.8566792011260986, + "learning_rate": 8.686532663316583e-06, + "loss": 0.0515, + "step": 13575 + }, + { + "epoch": 6.85, + "grad_norm": 3.254241704940796, + "learning_rate": 8.684020100502514e-06, + "loss": 0.053, + "step": 13600 + }, + { + "epoch": 6.86, + "grad_norm": 2.774897336959839, + "learning_rate": 8.681507537688443e-06, + "loss": 0.0533, + "step": 13625 + }, + { + "epoch": 6.88, + "grad_norm": 4.372440814971924, + "learning_rate": 8.678994974874373e-06, + "loss": 0.0514, + "step": 13650 + }, + { + "epoch": 6.89, + "grad_norm": 3.0720090866088867, + "learning_rate": 8.676482412060302e-06, + "loss": 0.054, + "step": 13675 + }, + { + "epoch": 6.9, + "grad_norm": 3.1626296043395996, + "learning_rate": 8.673969849246231e-06, + "loss": 0.057, + "step": 13700 + }, + { + "epoch": 6.91, + "grad_norm": 3.649174451828003, + "learning_rate": 8.67145728643216e-06, + "loss": 0.0529, + "step": 13725 + }, + { + "epoch": 6.93, + "grad_norm": 2.9002509117126465, + "learning_rate": 8.668944723618092e-06, + "loss": 0.0527, + "step": 13750 + }, + { + "epoch": 6.94, + "grad_norm": 3.623666286468506, + "learning_rate": 8.666432160804021e-06, + "loss": 0.0532, + "step": 13775 + }, + { + "epoch": 6.95, + "grad_norm": 2.810264825820923, + "learning_rate": 8.66391959798995e-06, + "loss": 0.0507, + "step": 13800 + }, + { + "epoch": 6.96, + "grad_norm": 3.1401755809783936, + "learning_rate": 8.661407035175881e-06, + "loss": 0.0494, + "step": 13825 + }, + { + "epoch": 6.98, + "grad_norm": 3.836899757385254, + "learning_rate": 8.658894472361809e-06, + "loss": 0.0579, + "step": 13850 + }, + { + "epoch": 6.99, + "grad_norm": 2.801562547683716, + "learning_rate": 8.65638190954774e-06, + "loss": 0.0538, + "step": 13875 + }, + { + "epoch": 7.0, + "grad_norm": 2.895209550857544, + "learning_rate": 8.65386934673367e-06, + "loss": 0.0476, + "step": 13900 + }, + { + "epoch": 7.02, + "grad_norm": 2.6360881328582764, + "learning_rate": 8.651356783919599e-06, + "loss": 0.035, + "step": 13925 + }, + { + "epoch": 7.03, + "grad_norm": 2.4468023777008057, + "learning_rate": 8.648844221105528e-06, + "loss": 0.0339, + "step": 13950 + }, + { + "epoch": 7.04, + "grad_norm": 2.511629819869995, + "learning_rate": 8.646331658291457e-06, + "loss": 0.034, + "step": 13975 + }, + { + "epoch": 7.05, + "grad_norm": 2.343121290206909, + "learning_rate": 8.643819095477388e-06, + "loss": 0.0363, + "step": 14000 + }, + { + "epoch": 7.05, + "eval_loss": 0.21771244704723358, + "eval_runtime": 649.3851, + "eval_samples_per_second": 2.204, + "eval_steps_per_second": 2.204, + "eval_wer": 27.009169363538295, + "step": 14000 + }, + { + "epoch": 7.07, + "grad_norm": 2.413473129272461, + "learning_rate": 8.641306532663318e-06, + "loss": 0.0344, + "step": 14025 + }, + { + "epoch": 7.08, + "grad_norm": 2.5712168216705322, + "learning_rate": 8.638793969849247e-06, + "loss": 0.0325, + "step": 14050 + }, + { + "epoch": 7.09, + "grad_norm": 2.519373893737793, + "learning_rate": 8.636281407035176e-06, + "loss": 0.0362, + "step": 14075 + }, + { + "epoch": 7.1, + "grad_norm": 2.6694931983947754, + "learning_rate": 8.633768844221107e-06, + "loss": 0.0347, + "step": 14100 + }, + { + "epoch": 7.12, + "grad_norm": 2.466127395629883, + "learning_rate": 8.631256281407035e-06, + "loss": 0.0343, + "step": 14125 + }, + { + "epoch": 7.13, + "grad_norm": 2.9331612586975098, + "learning_rate": 8.628743718592966e-06, + "loss": 0.0345, + "step": 14150 + }, + { + "epoch": 7.14, + "grad_norm": 2.6077215671539307, + "learning_rate": 8.626231155778895e-06, + "loss": 0.036, + "step": 14175 + }, + { + "epoch": 7.15, + "grad_norm": 2.2470898628234863, + "learning_rate": 8.623718592964825e-06, + "loss": 0.0358, + "step": 14200 + }, + { + "epoch": 7.17, + "grad_norm": 3.0216410160064697, + "learning_rate": 8.621206030150756e-06, + "loss": 0.0355, + "step": 14225 + }, + { + "epoch": 7.18, + "grad_norm": 2.5506224632263184, + "learning_rate": 8.618693467336683e-06, + "loss": 0.036, + "step": 14250 + }, + { + "epoch": 7.19, + "grad_norm": 2.964569330215454, + "learning_rate": 8.616180904522614e-06, + "loss": 0.037, + "step": 14275 + }, + { + "epoch": 7.2, + "grad_norm": 2.7908012866973877, + "learning_rate": 8.613668341708544e-06, + "loss": 0.0362, + "step": 14300 + }, + { + "epoch": 7.22, + "grad_norm": 2.3793702125549316, + "learning_rate": 8.611155778894473e-06, + "loss": 0.0359, + "step": 14325 + }, + { + "epoch": 7.23, + "grad_norm": 2.5525882244110107, + "learning_rate": 8.608643216080402e-06, + "loss": 0.0369, + "step": 14350 + }, + { + "epoch": 7.24, + "grad_norm": 2.7319822311401367, + "learning_rate": 8.606130653266333e-06, + "loss": 0.0361, + "step": 14375 + }, + { + "epoch": 7.25, + "grad_norm": 2.3261148929595947, + "learning_rate": 8.60361809045226e-06, + "loss": 0.0341, + "step": 14400 + }, + { + "epoch": 7.27, + "grad_norm": 3.016164541244507, + "learning_rate": 8.601105527638192e-06, + "loss": 0.0385, + "step": 14425 + }, + { + "epoch": 7.28, + "grad_norm": 2.4649343490600586, + "learning_rate": 8.598592964824121e-06, + "loss": 0.0341, + "step": 14450 + }, + { + "epoch": 7.29, + "grad_norm": 2.3269355297088623, + "learning_rate": 8.59608040201005e-06, + "loss": 0.0362, + "step": 14475 + }, + { + "epoch": 7.3, + "grad_norm": 2.815135955810547, + "learning_rate": 8.593567839195981e-06, + "loss": 0.0371, + "step": 14500 + }, + { + "epoch": 7.32, + "grad_norm": 2.6473183631896973, + "learning_rate": 8.591055276381909e-06, + "loss": 0.037, + "step": 14525 + }, + { + "epoch": 7.33, + "grad_norm": 2.728820323944092, + "learning_rate": 8.58854271356784e-06, + "loss": 0.0383, + "step": 14550 + }, + { + "epoch": 7.34, + "grad_norm": 2.755659341812134, + "learning_rate": 8.58603015075377e-06, + "loss": 0.038, + "step": 14575 + }, + { + "epoch": 7.36, + "grad_norm": 3.1346089839935303, + "learning_rate": 8.583517587939699e-06, + "loss": 0.0364, + "step": 14600 + }, + { + "epoch": 7.37, + "grad_norm": 2.9444706439971924, + "learning_rate": 8.58100502512563e-06, + "loss": 0.0376, + "step": 14625 + }, + { + "epoch": 7.38, + "grad_norm": 2.7211592197418213, + "learning_rate": 8.578492462311559e-06, + "loss": 0.0345, + "step": 14650 + }, + { + "epoch": 7.39, + "grad_norm": 2.668515682220459, + "learning_rate": 8.575979899497488e-06, + "loss": 0.0363, + "step": 14675 + }, + { + "epoch": 7.41, + "grad_norm": 3.070080041885376, + "learning_rate": 8.573467336683418e-06, + "loss": 0.0399, + "step": 14700 + }, + { + "epoch": 7.42, + "grad_norm": 2.3741774559020996, + "learning_rate": 8.570954773869347e-06, + "loss": 0.0387, + "step": 14725 + }, + { + "epoch": 7.43, + "grad_norm": 2.966240406036377, + "learning_rate": 8.568442211055276e-06, + "loss": 0.0373, + "step": 14750 + }, + { + "epoch": 7.44, + "grad_norm": 2.5987908840179443, + "learning_rate": 8.565929648241207e-06, + "loss": 0.0354, + "step": 14775 + }, + { + "epoch": 7.46, + "grad_norm": 2.6801822185516357, + "learning_rate": 8.563417085427135e-06, + "loss": 0.0365, + "step": 14800 + }, + { + "epoch": 7.47, + "grad_norm": 2.9004406929016113, + "learning_rate": 8.560904522613066e-06, + "loss": 0.0384, + "step": 14825 + }, + { + "epoch": 7.48, + "grad_norm": 2.8115575313568115, + "learning_rate": 8.558391959798995e-06, + "loss": 0.037, + "step": 14850 + }, + { + "epoch": 7.49, + "grad_norm": 2.4969582557678223, + "learning_rate": 8.555879396984925e-06, + "loss": 0.0362, + "step": 14875 + }, + { + "epoch": 7.51, + "grad_norm": 2.944546699523926, + "learning_rate": 8.553366834170856e-06, + "loss": 0.0373, + "step": 14900 + }, + { + "epoch": 7.52, + "grad_norm": 2.6705429553985596, + "learning_rate": 8.550854271356785e-06, + "loss": 0.0358, + "step": 14925 + }, + { + "epoch": 7.53, + "grad_norm": 2.5035767555236816, + "learning_rate": 8.548341708542714e-06, + "loss": 0.0372, + "step": 14950 + }, + { + "epoch": 7.54, + "grad_norm": 2.560549020767212, + "learning_rate": 8.545929648241208e-06, + "loss": 0.035, + "step": 14975 + }, + { + "epoch": 7.56, + "grad_norm": 2.4358303546905518, + "learning_rate": 8.543417085427135e-06, + "loss": 0.037, + "step": 15000 + }, + { + "epoch": 7.56, + "eval_loss": 0.22938773036003113, + "eval_runtime": 647.0226, + "eval_samples_per_second": 2.212, + "eval_steps_per_second": 2.212, + "eval_wer": 27.494606256742177, + "step": 15000 + }, + { + "epoch": 7.57, + "grad_norm": 2.569387435913086, + "learning_rate": 8.540904522613066e-06, + "loss": 0.038, + "step": 15025 + }, + { + "epoch": 7.58, + "grad_norm": 3.2271666526794434, + "learning_rate": 8.538391959798996e-06, + "loss": 0.0389, + "step": 15050 + }, + { + "epoch": 7.59, + "grad_norm": 2.4217026233673096, + "learning_rate": 8.535879396984925e-06, + "loss": 0.036, + "step": 15075 + }, + { + "epoch": 7.61, + "grad_norm": 2.854887008666992, + "learning_rate": 8.533366834170856e-06, + "loss": 0.0345, + "step": 15100 + }, + { + "epoch": 7.62, + "grad_norm": 3.163228988647461, + "learning_rate": 8.530854271356784e-06, + "loss": 0.0368, + "step": 15125 + }, + { + "epoch": 7.63, + "grad_norm": 2.9980268478393555, + "learning_rate": 8.528341708542715e-06, + "loss": 0.0348, + "step": 15150 + }, + { + "epoch": 7.64, + "grad_norm": 3.311469078063965, + "learning_rate": 8.525829145728644e-06, + "loss": 0.0403, + "step": 15175 + }, + { + "epoch": 7.66, + "grad_norm": 3.9748189449310303, + "learning_rate": 8.523316582914573e-06, + "loss": 0.035, + "step": 15200 + }, + { + "epoch": 7.67, + "grad_norm": 2.8011021614074707, + "learning_rate": 8.520804020100503e-06, + "loss": 0.0377, + "step": 15225 + }, + { + "epoch": 7.68, + "grad_norm": 2.337939977645874, + "learning_rate": 8.518291457286434e-06, + "loss": 0.0371, + "step": 15250 + }, + { + "epoch": 7.7, + "grad_norm": 3.128544569015503, + "learning_rate": 8.515778894472363e-06, + "loss": 0.0362, + "step": 15275 + }, + { + "epoch": 7.71, + "grad_norm": 3.0999114513397217, + "learning_rate": 8.513266331658292e-06, + "loss": 0.0398, + "step": 15300 + }, + { + "epoch": 7.72, + "grad_norm": 3.0560312271118164, + "learning_rate": 8.510753768844222e-06, + "loss": 0.0357, + "step": 15325 + }, + { + "epoch": 7.73, + "grad_norm": 2.7128045558929443, + "learning_rate": 8.508241206030151e-06, + "loss": 0.0368, + "step": 15350 + }, + { + "epoch": 7.75, + "grad_norm": 3.410581111907959, + "learning_rate": 8.505728643216082e-06, + "loss": 0.0375, + "step": 15375 + }, + { + "epoch": 7.76, + "grad_norm": 2.8622584342956543, + "learning_rate": 8.50321608040201e-06, + "loss": 0.0387, + "step": 15400 + }, + { + "epoch": 7.77, + "grad_norm": 2.9065823554992676, + "learning_rate": 8.50070351758794e-06, + "loss": 0.0358, + "step": 15425 + }, + { + "epoch": 7.78, + "grad_norm": 2.5962724685668945, + "learning_rate": 8.49819095477387e-06, + "loss": 0.039, + "step": 15450 + }, + { + "epoch": 7.8, + "grad_norm": 2.453361988067627, + "learning_rate": 8.4956783919598e-06, + "loss": 0.0393, + "step": 15475 + }, + { + "epoch": 7.81, + "grad_norm": 3.051579475402832, + "learning_rate": 8.49316582914573e-06, + "loss": 0.0381, + "step": 15500 + }, + { + "epoch": 7.82, + "grad_norm": 4.121551036834717, + "learning_rate": 8.49065326633166e-06, + "loss": 0.0392, + "step": 15525 + }, + { + "epoch": 7.83, + "grad_norm": 2.6381332874298096, + "learning_rate": 8.488140703517589e-06, + "loss": 0.0397, + "step": 15550 + }, + { + "epoch": 7.85, + "grad_norm": 2.958284616470337, + "learning_rate": 8.485628140703518e-06, + "loss": 0.039, + "step": 15575 + }, + { + "epoch": 7.86, + "grad_norm": 2.7606234550476074, + "learning_rate": 8.483115577889447e-06, + "loss": 0.0387, + "step": 15600 + }, + { + "epoch": 7.87, + "grad_norm": 2.755718231201172, + "learning_rate": 8.480603015075377e-06, + "loss": 0.038, + "step": 15625 + }, + { + "epoch": 7.88, + "grad_norm": 2.7270545959472656, + "learning_rate": 8.478090452261308e-06, + "loss": 0.0378, + "step": 15650 + }, + { + "epoch": 7.9, + "grad_norm": 3.053008556365967, + "learning_rate": 8.475577889447237e-06, + "loss": 0.0372, + "step": 15675 + }, + { + "epoch": 7.91, + "grad_norm": 2.3077919483184814, + "learning_rate": 8.473065326633166e-06, + "loss": 0.0378, + "step": 15700 + }, + { + "epoch": 7.92, + "grad_norm": 2.8426690101623535, + "learning_rate": 8.470552763819096e-06, + "loss": 0.037, + "step": 15725 + }, + { + "epoch": 7.93, + "grad_norm": 2.693056344985962, + "learning_rate": 8.468040201005025e-06, + "loss": 0.0366, + "step": 15750 + }, + { + "epoch": 7.95, + "grad_norm": 3.4918949604034424, + "learning_rate": 8.465527638190956e-06, + "loss": 0.0362, + "step": 15775 + }, + { + "epoch": 7.96, + "grad_norm": 2.753361463546753, + "learning_rate": 8.463015075376885e-06, + "loss": 0.0363, + "step": 15800 + }, + { + "epoch": 7.97, + "grad_norm": 3.151332139968872, + "learning_rate": 8.460502512562815e-06, + "loss": 0.0392, + "step": 15825 + }, + { + "epoch": 7.98, + "grad_norm": 3.1316654682159424, + "learning_rate": 8.457989949748744e-06, + "loss": 0.0365, + "step": 15850 + }, + { + "epoch": 8.0, + "grad_norm": 2.8634965419769287, + "learning_rate": 8.455477386934673e-06, + "loss": 0.0354, + "step": 15875 + }, + { + "epoch": 8.01, + "grad_norm": 2.3533437252044678, + "learning_rate": 8.452964824120604e-06, + "loss": 0.0283, + "step": 15900 + }, + { + "epoch": 8.02, + "grad_norm": 1.9110933542251587, + "learning_rate": 8.450452261306534e-06, + "loss": 0.0245, + "step": 15925 + }, + { + "epoch": 8.04, + "grad_norm": 1.973682165145874, + "learning_rate": 8.447939698492463e-06, + "loss": 0.0231, + "step": 15950 + }, + { + "epoch": 8.05, + "grad_norm": 2.531682014465332, + "learning_rate": 8.445427135678392e-06, + "loss": 0.0246, + "step": 15975 + }, + { + "epoch": 8.06, + "grad_norm": 2.282473087310791, + "learning_rate": 8.442914572864322e-06, + "loss": 0.0219, + "step": 16000 + }, + { + "epoch": 8.06, + "eval_loss": 0.24251039326190948, + "eval_runtime": 645.2227, + "eval_samples_per_second": 2.218, + "eval_steps_per_second": 2.218, + "eval_wer": 27.353020496224378, + "step": 16000 + }, + { + "epoch": 8.07, + "grad_norm": 1.797715663909912, + "learning_rate": 8.440402010050251e-06, + "loss": 0.024, + "step": 16025 + }, + { + "epoch": 8.09, + "grad_norm": 2.7163150310516357, + "learning_rate": 8.437889447236182e-06, + "loss": 0.024, + "step": 16050 + }, + { + "epoch": 8.1, + "grad_norm": 2.643120765686035, + "learning_rate": 8.435376884422111e-06, + "loss": 0.0242, + "step": 16075 + }, + { + "epoch": 8.11, + "grad_norm": 2.520521879196167, + "learning_rate": 8.43286432160804e-06, + "loss": 0.0239, + "step": 16100 + }, + { + "epoch": 8.12, + "grad_norm": 2.5036051273345947, + "learning_rate": 8.430351758793972e-06, + "loss": 0.0234, + "step": 16125 + }, + { + "epoch": 8.14, + "grad_norm": 2.0528650283813477, + "learning_rate": 8.4278391959799e-06, + "loss": 0.0228, + "step": 16150 + }, + { + "epoch": 8.15, + "grad_norm": 2.3187363147735596, + "learning_rate": 8.42532663316583e-06, + "loss": 0.0234, + "step": 16175 + }, + { + "epoch": 8.16, + "grad_norm": 1.8649437427520752, + "learning_rate": 8.42281407035176e-06, + "loss": 0.0262, + "step": 16200 + }, + { + "epoch": 8.17, + "grad_norm": 3.5686943531036377, + "learning_rate": 8.420301507537689e-06, + "loss": 0.0252, + "step": 16225 + }, + { + "epoch": 8.19, + "grad_norm": 2.162992000579834, + "learning_rate": 8.417788944723618e-06, + "loss": 0.0253, + "step": 16250 + }, + { + "epoch": 8.2, + "grad_norm": 2.2129812240600586, + "learning_rate": 8.415276381909548e-06, + "loss": 0.0234, + "step": 16275 + }, + { + "epoch": 8.21, + "grad_norm": 2.2268874645233154, + "learning_rate": 8.412763819095479e-06, + "loss": 0.0237, + "step": 16300 + }, + { + "epoch": 8.22, + "grad_norm": 2.157003402709961, + "learning_rate": 8.410251256281408e-06, + "loss": 0.0235, + "step": 16325 + }, + { + "epoch": 8.24, + "grad_norm": 2.988657236099243, + "learning_rate": 8.407738693467337e-06, + "loss": 0.026, + "step": 16350 + }, + { + "epoch": 8.25, + "grad_norm": 2.333310127258301, + "learning_rate": 8.405226130653267e-06, + "loss": 0.0261, + "step": 16375 + }, + { + "epoch": 8.26, + "grad_norm": 2.463899850845337, + "learning_rate": 8.402713567839198e-06, + "loss": 0.0244, + "step": 16400 + }, + { + "epoch": 8.27, + "grad_norm": 3.018486499786377, + "learning_rate": 8.400201005025125e-06, + "loss": 0.0252, + "step": 16425 + }, + { + "epoch": 8.29, + "grad_norm": 2.668846607208252, + "learning_rate": 8.397688442211056e-06, + "loss": 0.0236, + "step": 16450 + }, + { + "epoch": 8.3, + "grad_norm": 2.6295289993286133, + "learning_rate": 8.395175879396986e-06, + "loss": 0.0251, + "step": 16475 + }, + { + "epoch": 8.31, + "grad_norm": 2.7233710289001465, + "learning_rate": 8.392663316582915e-06, + "loss": 0.0255, + "step": 16500 + }, + { + "epoch": 8.32, + "grad_norm": 2.1756927967071533, + "learning_rate": 8.390150753768846e-06, + "loss": 0.0233, + "step": 16525 + }, + { + "epoch": 8.34, + "grad_norm": 1.994356393814087, + "learning_rate": 8.387638190954774e-06, + "loss": 0.0259, + "step": 16550 + }, + { + "epoch": 8.35, + "grad_norm": 2.5735764503479004, + "learning_rate": 8.385125628140705e-06, + "loss": 0.025, + "step": 16575 + }, + { + "epoch": 8.36, + "grad_norm": 2.204200029373169, + "learning_rate": 8.382613065326634e-06, + "loss": 0.0262, + "step": 16600 + }, + { + "epoch": 8.38, + "grad_norm": 2.8326077461242676, + "learning_rate": 8.380100502512563e-06, + "loss": 0.0263, + "step": 16625 + }, + { + "epoch": 8.39, + "grad_norm": 2.6359524726867676, + "learning_rate": 8.377587939698493e-06, + "loss": 0.0246, + "step": 16650 + }, + { + "epoch": 8.4, + "grad_norm": 2.6582038402557373, + "learning_rate": 8.375075376884424e-06, + "loss": 0.0251, + "step": 16675 + }, + { + "epoch": 8.41, + "grad_norm": 2.6335105895996094, + "learning_rate": 8.372562814070353e-06, + "loss": 0.0263, + "step": 16700 + }, + { + "epoch": 8.43, + "grad_norm": 2.17193341255188, + "learning_rate": 8.370050251256282e-06, + "loss": 0.0243, + "step": 16725 + }, + { + "epoch": 8.44, + "grad_norm": 2.738447904586792, + "learning_rate": 8.367537688442212e-06, + "loss": 0.0264, + "step": 16750 + }, + { + "epoch": 8.45, + "grad_norm": 2.6468822956085205, + "learning_rate": 8.365025125628141e-06, + "loss": 0.0242, + "step": 16775 + }, + { + "epoch": 8.46, + "grad_norm": 2.8824996948242188, + "learning_rate": 8.362512562814072e-06, + "loss": 0.0257, + "step": 16800 + }, + { + "epoch": 8.48, + "grad_norm": 1.907247543334961, + "learning_rate": 8.36e-06, + "loss": 0.0255, + "step": 16825 + }, + { + "epoch": 8.49, + "grad_norm": 2.0344223976135254, + "learning_rate": 8.35748743718593e-06, + "loss": 0.0259, + "step": 16850 + }, + { + "epoch": 8.5, + "grad_norm": 3.155583143234253, + "learning_rate": 8.35497487437186e-06, + "loss": 0.0259, + "step": 16875 + }, + { + "epoch": 8.51, + "grad_norm": 2.2232720851898193, + "learning_rate": 8.35246231155779e-06, + "loss": 0.0245, + "step": 16900 + }, + { + "epoch": 8.53, + "grad_norm": 2.3047924041748047, + "learning_rate": 8.34994974874372e-06, + "loss": 0.0251, + "step": 16925 + }, + { + "epoch": 8.54, + "grad_norm": 2.391136884689331, + "learning_rate": 8.34743718592965e-06, + "loss": 0.0264, + "step": 16950 + }, + { + "epoch": 8.55, + "grad_norm": 2.657806634902954, + "learning_rate": 8.345025125628141e-06, + "loss": 0.0266, + "step": 16975 + }, + { + "epoch": 8.56, + "grad_norm": 2.5439977645874023, + "learning_rate": 8.34251256281407e-06, + "loss": 0.0251, + "step": 17000 + }, + { + "epoch": 8.56, + "eval_loss": 0.25923389196395874, + "eval_runtime": 785.426, + "eval_samples_per_second": 1.822, + "eval_steps_per_second": 1.822, + "eval_wer": 27.818230852211435, + "step": 17000 + } + ], + "logging_steps": 25, + "max_steps": 100000, + "num_input_tokens_seen": 0, + "num_train_epochs": 51, + "save_steps": 1000, + "total_flos": 5.292575686656e+19, + "train_batch_size": 48, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/whisper-base/magahi/checkpoint-17000/training_args.bin b/checkpoints/whisper-base/magahi/checkpoint-17000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..16e1f6546d179375472993981ee1609868a40cba --- /dev/null +++ b/checkpoints/whisper-base/magahi/checkpoint-17000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40c3d2035ae0d1a10d899f2ea951c4037c5ca1de0d6879ebaf11869f89963f6f +size 4667 diff --git a/checkpoints/whisper-base/maithili/checkpoint-66000/config.json b/checkpoints/whisper-base/maithili/checkpoint-66000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7861ebfd3dce452d730fc7657aa35befb4dcfe2d --- /dev/null +++ b/checkpoints/whisper-base/maithili/checkpoint-66000/config.json @@ -0,0 +1,154 @@ +{ + "_name_or_path": "openai/whisper-base", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 512, + "decoder_attention_heads": 8, + "decoder_ffn_dim": 2048, + "decoder_layerdrop": 0.0, + "decoder_layers": 6, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 8, + "encoder_ffn_dim": 2048, + "encoder_layerdrop": 0.0, + "encoder_layers": 6, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50276 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 6, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "torch_dtype": "float32", + "transformers_version": "4.39.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoints/whisper-base/maithili/checkpoint-66000/generation_config.json b/checkpoints/whisper-base/maithili/checkpoint-66000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a12c5b82ac1e48f22fa79bdad1595064164bc2ab --- /dev/null +++ b/checkpoints/whisper-base/maithili/checkpoint-66000/generation_config.json @@ -0,0 +1,256 @@ +{ + "alignment_heads": [ + [ + 3, + 1 + ], + [ + 4, + 2 + ], + [ + 4, + 3 + ], + [ + 4, + 7 + ], + [ + 5, + 1 + ], + [ + 5, + 2 + ], + [ + 5, + 4 + ], + [ + 5, + 6 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.39.2" +} diff --git a/checkpoints/whisper-base/maithili/checkpoint-66000/model.safetensors b/checkpoints/whisper-base/maithili/checkpoint-66000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..56502f17d801427add534565af25ae88cc316b3f --- /dev/null +++ b/checkpoints/whisper-base/maithili/checkpoint-66000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fb6210be6f9da404a6abb6ef8636cb28404b059f5d98f51934c0b1302fb2ced +size 290403936 diff --git a/checkpoints/whisper-base/maithili/checkpoint-66000/optimizer.pt b/checkpoints/whisper-base/maithili/checkpoint-66000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..be367f1f5a5b434179f174c7feb7d344805d7aa8 --- /dev/null +++ b/checkpoints/whisper-base/maithili/checkpoint-66000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95ca9adb127577e3cdad1aa6316d41251639663489c5acbe46c7bf8795f880d6 +size 574811077 diff --git a/checkpoints/whisper-base/maithili/checkpoint-66000/preprocessor_config.json b/checkpoints/whisper-base/maithili/checkpoint-66000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoints/whisper-base/maithili/checkpoint-66000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoints/whisper-base/maithili/checkpoint-66000/rng_state.pth b/checkpoints/whisper-base/maithili/checkpoint-66000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..3e2abfa6173f5153fdcfeeaa33e8d9837f07cff7 --- /dev/null +++ b/checkpoints/whisper-base/maithili/checkpoint-66000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0a0de2aa8ad8a2b6ccb04c4e61686b55e74571109380c815392fafc3e31ae65 +size 14575 diff --git a/checkpoints/whisper-base/maithili/checkpoint-66000/scheduler.pt b/checkpoints/whisper-base/maithili/checkpoint-66000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..223a205bd47e1ed02d42273ac86ecb7fd192d193 --- /dev/null +++ b/checkpoints/whisper-base/maithili/checkpoint-66000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf7a5b95927fd27411ed88e9b7d9a07f38fed1e62e786ad8d5aeab8b1e2d8b5f +size 627 diff --git a/checkpoints/whisper-base/maithili/checkpoint-66000/trainer_state.json b/checkpoints/whisper-base/maithili/checkpoint-66000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..43b3d04219e88a086a08d5716b486551811ef8b3 --- /dev/null +++ b/checkpoints/whisper-base/maithili/checkpoint-66000/trainer_state.json @@ -0,0 +1,19095 @@ +{ + "best_metric": 22.172258734002074, + "best_model_checkpoint": "results/whisper-base/maithili/checkpoint-56000", + "epoch": 33.249370277078086, + "eval_steps": 1000, + "global_step": 66000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "grad_norm": 22.37806510925293, + "learning_rate": 4.6000000000000004e-07, + "loss": 2.3423, + "step": 25 + }, + { + "epoch": 0.03, + "grad_norm": 14.617953300476074, + "learning_rate": 9.600000000000001e-07, + "loss": 2.0051, + "step": 50 + }, + { + "epoch": 0.04, + "grad_norm": 8.992124557495117, + "learning_rate": 1.46e-06, + "loss": 1.4891, + "step": 75 + }, + { + "epoch": 0.05, + "grad_norm": 6.06942081451416, + "learning_rate": 1.9600000000000003e-06, + "loss": 1.1894, + "step": 100 + }, + { + "epoch": 0.06, + "grad_norm": 5.8592963218688965, + "learning_rate": 2.46e-06, + "loss": 0.9395, + "step": 125 + }, + { + "epoch": 0.08, + "grad_norm": 4.838193893432617, + "learning_rate": 2.96e-06, + "loss": 0.8116, + "step": 150 + }, + { + "epoch": 0.09, + "grad_norm": 4.628206729888916, + "learning_rate": 3.46e-06, + "loss": 0.7077, + "step": 175 + }, + { + "epoch": 0.1, + "grad_norm": 4.1971611976623535, + "learning_rate": 3.96e-06, + "loss": 0.654, + "step": 200 + }, + { + "epoch": 0.11, + "grad_norm": 4.9267659187316895, + "learning_rate": 4.4600000000000005e-06, + "loss": 0.5982, + "step": 225 + }, + { + "epoch": 0.13, + "grad_norm": 4.413062572479248, + "learning_rate": 4.960000000000001e-06, + "loss": 0.5504, + "step": 250 + }, + { + "epoch": 0.14, + "grad_norm": 4.560253620147705, + "learning_rate": 5.460000000000001e-06, + "loss": 0.5413, + "step": 275 + }, + { + "epoch": 0.15, + "grad_norm": 3.9538028240203857, + "learning_rate": 5.9600000000000005e-06, + "loss": 0.5037, + "step": 300 + }, + { + "epoch": 0.16, + "grad_norm": 3.8549587726593018, + "learning_rate": 6.460000000000001e-06, + "loss": 0.4715, + "step": 325 + }, + { + "epoch": 0.18, + "grad_norm": 3.935253620147705, + "learning_rate": 6.96e-06, + "loss": 0.4453, + "step": 350 + }, + { + "epoch": 0.19, + "grad_norm": 4.225427627563477, + "learning_rate": 7.4600000000000006e-06, + "loss": 0.4322, + "step": 375 + }, + { + "epoch": 0.2, + "grad_norm": 4.021173477172852, + "learning_rate": 7.960000000000002e-06, + "loss": 0.4302, + "step": 400 + }, + { + "epoch": 0.21, + "grad_norm": 4.08791446685791, + "learning_rate": 8.46e-06, + "loss": 0.4109, + "step": 425 + }, + { + "epoch": 0.23, + "grad_norm": 4.559580326080322, + "learning_rate": 8.96e-06, + "loss": 0.391, + "step": 450 + }, + { + "epoch": 0.24, + "grad_norm": 4.0354790687561035, + "learning_rate": 9.460000000000001e-06, + "loss": 0.3832, + "step": 475 + }, + { + "epoch": 0.25, + "grad_norm": 4.030752182006836, + "learning_rate": 9.960000000000001e-06, + "loss": 0.373, + "step": 500 + }, + { + "epoch": 0.26, + "grad_norm": 4.1658034324646, + "learning_rate": 9.997688442211056e-06, + "loss": 0.3686, + "step": 525 + }, + { + "epoch": 0.28, + "grad_norm": 4.24349308013916, + "learning_rate": 9.995175879396986e-06, + "loss": 0.3444, + "step": 550 + }, + { + "epoch": 0.29, + "grad_norm": 4.5939555168151855, + "learning_rate": 9.992663316582915e-06, + "loss": 0.3418, + "step": 575 + }, + { + "epoch": 0.3, + "grad_norm": 3.8061537742614746, + "learning_rate": 9.990150753768844e-06, + "loss": 0.3393, + "step": 600 + }, + { + "epoch": 0.31, + "grad_norm": 4.4001922607421875, + "learning_rate": 9.987638190954775e-06, + "loss": 0.3248, + "step": 625 + }, + { + "epoch": 0.33, + "grad_norm": 3.4623498916625977, + "learning_rate": 9.985125628140705e-06, + "loss": 0.3191, + "step": 650 + }, + { + "epoch": 0.34, + "grad_norm": 3.7342793941497803, + "learning_rate": 9.982613065326634e-06, + "loss": 0.3123, + "step": 675 + }, + { + "epoch": 0.35, + "grad_norm": 4.150409698486328, + "learning_rate": 9.980100502512565e-06, + "loss": 0.3102, + "step": 700 + }, + { + "epoch": 0.37, + "grad_norm": 3.992783308029175, + "learning_rate": 9.977587939698493e-06, + "loss": 0.3089, + "step": 725 + }, + { + "epoch": 0.38, + "grad_norm": 3.3655505180358887, + "learning_rate": 9.975075376884424e-06, + "loss": 0.3045, + "step": 750 + }, + { + "epoch": 0.39, + "grad_norm": 3.5667428970336914, + "learning_rate": 9.972562814070353e-06, + "loss": 0.2959, + "step": 775 + }, + { + "epoch": 0.4, + "grad_norm": 4.166114807128906, + "learning_rate": 9.970050251256282e-06, + "loss": 0.2918, + "step": 800 + }, + { + "epoch": 0.42, + "grad_norm": 3.6910595893859863, + "learning_rate": 9.967537688442212e-06, + "loss": 0.2931, + "step": 825 + }, + { + "epoch": 0.43, + "grad_norm": 3.8466129302978516, + "learning_rate": 9.965025125628141e-06, + "loss": 0.2741, + "step": 850 + }, + { + "epoch": 0.44, + "grad_norm": 3.8253138065338135, + "learning_rate": 9.96251256281407e-06, + "loss": 0.2733, + "step": 875 + }, + { + "epoch": 0.45, + "grad_norm": 3.8832590579986572, + "learning_rate": 9.960000000000001e-06, + "loss": 0.2754, + "step": 900 + }, + { + "epoch": 0.47, + "grad_norm": 3.8620283603668213, + "learning_rate": 9.95748743718593e-06, + "loss": 0.2747, + "step": 925 + }, + { + "epoch": 0.48, + "grad_norm": 3.543933629989624, + "learning_rate": 9.95497487437186e-06, + "loss": 0.2651, + "step": 950 + }, + { + "epoch": 0.49, + "grad_norm": 3.5218117237091064, + "learning_rate": 9.952462311557791e-06, + "loss": 0.2769, + "step": 975 + }, + { + "epoch": 0.5, + "grad_norm": 3.711573600769043, + "learning_rate": 9.949949748743718e-06, + "loss": 0.2647, + "step": 1000 + }, + { + "epoch": 0.5, + "eval_loss": 0.23160116374492645, + "eval_runtime": 649.8053, + "eval_samples_per_second": 2.168, + "eval_steps_per_second": 2.168, + "eval_wer": 35.17813905223106, + "step": 1000 + }, + { + "epoch": 0.52, + "grad_norm": 3.524477005004883, + "learning_rate": 9.94743718592965e-06, + "loss": 0.2592, + "step": 1025 + }, + { + "epoch": 0.53, + "grad_norm": 3.2609434127807617, + "learning_rate": 9.944924623115579e-06, + "loss": 0.2532, + "step": 1050 + }, + { + "epoch": 0.54, + "grad_norm": 3.6249492168426514, + "learning_rate": 9.942412060301508e-06, + "loss": 0.2562, + "step": 1075 + }, + { + "epoch": 0.55, + "grad_norm": 3.7930243015289307, + "learning_rate": 9.93989949748744e-06, + "loss": 0.2592, + "step": 1100 + }, + { + "epoch": 0.57, + "grad_norm": 4.007348537445068, + "learning_rate": 9.937386934673367e-06, + "loss": 0.2517, + "step": 1125 + }, + { + "epoch": 0.58, + "grad_norm": 3.8038876056671143, + "learning_rate": 9.934874371859298e-06, + "loss": 0.2409, + "step": 1150 + }, + { + "epoch": 0.59, + "grad_norm": 3.6129648685455322, + "learning_rate": 9.932361809045227e-06, + "loss": 0.2468, + "step": 1175 + }, + { + "epoch": 0.6, + "grad_norm": 3.1642048358917236, + "learning_rate": 9.929849246231156e-06, + "loss": 0.2483, + "step": 1200 + }, + { + "epoch": 0.62, + "grad_norm": 3.557328939437866, + "learning_rate": 9.927336683417086e-06, + "loss": 0.2388, + "step": 1225 + }, + { + "epoch": 0.63, + "grad_norm": 3.287649154663086, + "learning_rate": 9.924824120603017e-06, + "loss": 0.2472, + "step": 1250 + }, + { + "epoch": 0.64, + "grad_norm": 2.858637809753418, + "learning_rate": 9.922311557788944e-06, + "loss": 0.2377, + "step": 1275 + }, + { + "epoch": 0.65, + "grad_norm": 3.016263484954834, + "learning_rate": 9.919798994974875e-06, + "loss": 0.2376, + "step": 1300 + }, + { + "epoch": 0.67, + "grad_norm": 3.3102822303771973, + "learning_rate": 9.917286432160805e-06, + "loss": 0.2345, + "step": 1325 + }, + { + "epoch": 0.68, + "grad_norm": 3.1757044792175293, + "learning_rate": 9.914773869346734e-06, + "loss": 0.2315, + "step": 1350 + }, + { + "epoch": 0.69, + "grad_norm": 3.720780849456787, + "learning_rate": 9.912261306532665e-06, + "loss": 0.2362, + "step": 1375 + }, + { + "epoch": 0.71, + "grad_norm": 3.6007885932922363, + "learning_rate": 9.909748743718593e-06, + "loss": 0.2306, + "step": 1400 + }, + { + "epoch": 0.72, + "grad_norm": 3.1506540775299072, + "learning_rate": 9.907236180904524e-06, + "loss": 0.2266, + "step": 1425 + }, + { + "epoch": 0.73, + "grad_norm": 3.562501907348633, + "learning_rate": 9.904723618090453e-06, + "loss": 0.2244, + "step": 1450 + }, + { + "epoch": 0.74, + "grad_norm": 3.256913900375366, + "learning_rate": 9.902211055276382e-06, + "loss": 0.2313, + "step": 1475 + }, + { + "epoch": 0.76, + "grad_norm": 3.431098222732544, + "learning_rate": 9.899698492462312e-06, + "loss": 0.2194, + "step": 1500 + }, + { + "epoch": 0.77, + "grad_norm": 3.5509750843048096, + "learning_rate": 9.897185929648243e-06, + "loss": 0.2253, + "step": 1525 + }, + { + "epoch": 0.78, + "grad_norm": 3.666264295578003, + "learning_rate": 9.894673366834172e-06, + "loss": 0.2214, + "step": 1550 + }, + { + "epoch": 0.79, + "grad_norm": 3.291407585144043, + "learning_rate": 9.892160804020101e-06, + "loss": 0.2139, + "step": 1575 + }, + { + "epoch": 0.81, + "grad_norm": 3.3444149494171143, + "learning_rate": 9.88964824120603e-06, + "loss": 0.2172, + "step": 1600 + }, + { + "epoch": 0.82, + "grad_norm": 3.133206367492676, + "learning_rate": 9.88713567839196e-06, + "loss": 0.2199, + "step": 1625 + }, + { + "epoch": 0.83, + "grad_norm": 3.0405352115631104, + "learning_rate": 9.884623115577891e-06, + "loss": 0.2103, + "step": 1650 + }, + { + "epoch": 0.84, + "grad_norm": 3.157695770263672, + "learning_rate": 9.882110552763819e-06, + "loss": 0.2155, + "step": 1675 + }, + { + "epoch": 0.86, + "grad_norm": 3.13808274269104, + "learning_rate": 9.87959798994975e-06, + "loss": 0.215, + "step": 1700 + }, + { + "epoch": 0.87, + "grad_norm": 3.28182315826416, + "learning_rate": 9.877085427135679e-06, + "loss": 0.2139, + "step": 1725 + }, + { + "epoch": 0.88, + "grad_norm": 3.1201610565185547, + "learning_rate": 9.874572864321608e-06, + "loss": 0.2018, + "step": 1750 + }, + { + "epoch": 0.89, + "grad_norm": 3.426795721054077, + "learning_rate": 9.87206030150754e-06, + "loss": 0.2125, + "step": 1775 + }, + { + "epoch": 0.91, + "grad_norm": 3.0987160205841064, + "learning_rate": 9.869547738693469e-06, + "loss": 0.2079, + "step": 1800 + }, + { + "epoch": 0.92, + "grad_norm": 3.6653778553009033, + "learning_rate": 9.867035175879398e-06, + "loss": 0.1991, + "step": 1825 + }, + { + "epoch": 0.93, + "grad_norm": 3.522376537322998, + "learning_rate": 9.864522613065327e-06, + "loss": 0.1966, + "step": 1850 + }, + { + "epoch": 0.94, + "grad_norm": 3.2122714519500732, + "learning_rate": 9.862010050251257e-06, + "loss": 0.1962, + "step": 1875 + }, + { + "epoch": 0.96, + "grad_norm": 3.3362936973571777, + "learning_rate": 9.859497487437186e-06, + "loss": 0.2056, + "step": 1900 + }, + { + "epoch": 0.97, + "grad_norm": 2.8921637535095215, + "learning_rate": 9.856984924623117e-06, + "loss": 0.192, + "step": 1925 + }, + { + "epoch": 0.98, + "grad_norm": 3.2778878211975098, + "learning_rate": 9.854472361809046e-06, + "loss": 0.1953, + "step": 1950 + }, + { + "epoch": 0.99, + "grad_norm": 3.4213788509368896, + "learning_rate": 9.851959798994976e-06, + "loss": 0.199, + "step": 1975 + }, + { + "epoch": 1.01, + "grad_norm": 2.996750593185425, + "learning_rate": 9.849447236180905e-06, + "loss": 0.1877, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.18496885895729065, + "eval_runtime": 642.3954, + "eval_samples_per_second": 2.193, + "eval_steps_per_second": 2.193, + "eval_wer": 29.546869595295743, + "step": 2000 + }, + { + "epoch": 1.02, + "grad_norm": 2.9950320720672607, + "learning_rate": 9.846934673366834e-06, + "loss": 0.1852, + "step": 2025 + }, + { + "epoch": 1.03, + "grad_norm": 3.2103137969970703, + "learning_rate": 9.844422110552765e-06, + "loss": 0.1808, + "step": 2050 + }, + { + "epoch": 1.05, + "grad_norm": 2.737065076828003, + "learning_rate": 9.841909547738695e-06, + "loss": 0.1712, + "step": 2075 + }, + { + "epoch": 1.06, + "grad_norm": 3.0664756298065186, + "learning_rate": 9.839396984924624e-06, + "loss": 0.1744, + "step": 2100 + }, + { + "epoch": 1.07, + "grad_norm": 3.143113374710083, + "learning_rate": 9.836884422110553e-06, + "loss": 0.1828, + "step": 2125 + }, + { + "epoch": 1.08, + "grad_norm": 3.1511785984039307, + "learning_rate": 9.834371859296483e-06, + "loss": 0.1724, + "step": 2150 + }, + { + "epoch": 1.1, + "grad_norm": 3.206463575363159, + "learning_rate": 9.831859296482414e-06, + "loss": 0.1742, + "step": 2175 + }, + { + "epoch": 1.11, + "grad_norm": 3.2733755111694336, + "learning_rate": 9.829346733668343e-06, + "loss": 0.178, + "step": 2200 + }, + { + "epoch": 1.12, + "grad_norm": 2.9530861377716064, + "learning_rate": 9.826834170854272e-06, + "loss": 0.1705, + "step": 2225 + }, + { + "epoch": 1.13, + "grad_norm": 3.1909892559051514, + "learning_rate": 9.824321608040202e-06, + "loss": 0.1759, + "step": 2250 + }, + { + "epoch": 1.15, + "grad_norm": 2.792212724685669, + "learning_rate": 9.821809045226131e-06, + "loss": 0.1641, + "step": 2275 + }, + { + "epoch": 1.16, + "grad_norm": 3.1717071533203125, + "learning_rate": 9.81929648241206e-06, + "loss": 0.1709, + "step": 2300 + }, + { + "epoch": 1.17, + "grad_norm": 2.979113817214966, + "learning_rate": 9.816783919597991e-06, + "loss": 0.1655, + "step": 2325 + }, + { + "epoch": 1.18, + "grad_norm": 2.8337669372558594, + "learning_rate": 9.81427135678392e-06, + "loss": 0.1691, + "step": 2350 + }, + { + "epoch": 1.2, + "grad_norm": 2.9283900260925293, + "learning_rate": 9.81175879396985e-06, + "loss": 0.171, + "step": 2375 + }, + { + "epoch": 1.21, + "grad_norm": 3.450836420059204, + "learning_rate": 9.809246231155781e-06, + "loss": 0.1695, + "step": 2400 + }, + { + "epoch": 1.22, + "grad_norm": 2.9222114086151123, + "learning_rate": 9.806733668341709e-06, + "loss": 0.1696, + "step": 2425 + }, + { + "epoch": 1.23, + "grad_norm": 2.5073487758636475, + "learning_rate": 9.80422110552764e-06, + "loss": 0.1672, + "step": 2450 + }, + { + "epoch": 1.25, + "grad_norm": 3.157297372817993, + "learning_rate": 9.801708542713569e-06, + "loss": 0.165, + "step": 2475 + }, + { + "epoch": 1.26, + "grad_norm": 3.0700457096099854, + "learning_rate": 9.799195979899498e-06, + "loss": 0.169, + "step": 2500 + }, + { + "epoch": 1.27, + "grad_norm": 3.2579360008239746, + "learning_rate": 9.796683417085428e-06, + "loss": 0.1607, + "step": 2525 + }, + { + "epoch": 1.28, + "grad_norm": 2.8195056915283203, + "learning_rate": 9.794170854271357e-06, + "loss": 0.16, + "step": 2550 + }, + { + "epoch": 1.3, + "grad_norm": 2.526740312576294, + "learning_rate": 9.791658291457288e-06, + "loss": 0.1678, + "step": 2575 + }, + { + "epoch": 1.31, + "grad_norm": 3.2605721950531006, + "learning_rate": 9.789145728643217e-06, + "loss": 0.1597, + "step": 2600 + }, + { + "epoch": 1.32, + "grad_norm": 2.9491026401519775, + "learning_rate": 9.786633165829147e-06, + "loss": 0.1643, + "step": 2625 + }, + { + "epoch": 1.34, + "grad_norm": 3.419473886489868, + "learning_rate": 9.784120603015076e-06, + "loss": 0.1631, + "step": 2650 + }, + { + "epoch": 1.35, + "grad_norm": 3.4714505672454834, + "learning_rate": 9.781608040201007e-06, + "loss": 0.1637, + "step": 2675 + }, + { + "epoch": 1.36, + "grad_norm": 2.919762372970581, + "learning_rate": 9.779095477386934e-06, + "loss": 0.1614, + "step": 2700 + }, + { + "epoch": 1.37, + "grad_norm": 2.6376657485961914, + "learning_rate": 9.776582914572866e-06, + "loss": 0.1617, + "step": 2725 + }, + { + "epoch": 1.39, + "grad_norm": 2.929567575454712, + "learning_rate": 9.774070351758795e-06, + "loss": 0.1595, + "step": 2750 + }, + { + "epoch": 1.4, + "grad_norm": 2.939025402069092, + "learning_rate": 9.771557788944724e-06, + "loss": 0.1589, + "step": 2775 + }, + { + "epoch": 1.41, + "grad_norm": 3.043203115463257, + "learning_rate": 9.769045226130655e-06, + "loss": 0.1623, + "step": 2800 + }, + { + "epoch": 1.42, + "grad_norm": 2.872321605682373, + "learning_rate": 9.766532663316583e-06, + "loss": 0.1527, + "step": 2825 + }, + { + "epoch": 1.44, + "grad_norm": 2.8521175384521484, + "learning_rate": 9.764020100502514e-06, + "loss": 0.1582, + "step": 2850 + }, + { + "epoch": 1.45, + "grad_norm": 2.8888206481933594, + "learning_rate": 9.761507537688443e-06, + "loss": 0.162, + "step": 2875 + }, + { + "epoch": 1.46, + "grad_norm": 2.583472490310669, + "learning_rate": 9.758994974874372e-06, + "loss": 0.1559, + "step": 2900 + }, + { + "epoch": 1.47, + "grad_norm": 3.0001070499420166, + "learning_rate": 9.756482412060302e-06, + "loss": 0.1557, + "step": 2925 + }, + { + "epoch": 1.49, + "grad_norm": 3.3756625652313232, + "learning_rate": 9.753969849246233e-06, + "loss": 0.1603, + "step": 2950 + }, + { + "epoch": 1.5, + "grad_norm": 2.995574951171875, + "learning_rate": 9.75145728643216e-06, + "loss": 0.1539, + "step": 2975 + }, + { + "epoch": 1.51, + "grad_norm": 2.927722692489624, + "learning_rate": 9.748944723618091e-06, + "loss": 0.1579, + "step": 3000 + }, + { + "epoch": 1.51, + "eval_loss": 0.16459061205387115, + "eval_runtime": 645.71, + "eval_samples_per_second": 2.182, + "eval_steps_per_second": 2.182, + "eval_wer": 26.675890695261156, + "step": 3000 + }, + { + "epoch": 1.52, + "grad_norm": 3.3339335918426514, + "learning_rate": 9.74643216080402e-06, + "loss": 0.1559, + "step": 3025 + }, + { + "epoch": 1.54, + "grad_norm": 3.0809624195098877, + "learning_rate": 9.74391959798995e-06, + "loss": 0.1561, + "step": 3050 + }, + { + "epoch": 1.55, + "grad_norm": 2.9823570251464844, + "learning_rate": 9.741407035175881e-06, + "loss": 0.1599, + "step": 3075 + }, + { + "epoch": 1.56, + "grad_norm": 2.7149410247802734, + "learning_rate": 9.738894472361809e-06, + "loss": 0.1469, + "step": 3100 + }, + { + "epoch": 1.57, + "grad_norm": 3.8664979934692383, + "learning_rate": 9.73638190954774e-06, + "loss": 0.1545, + "step": 3125 + }, + { + "epoch": 1.59, + "grad_norm": 2.9406516551971436, + "learning_rate": 9.733869346733669e-06, + "loss": 0.154, + "step": 3150 + }, + { + "epoch": 1.6, + "grad_norm": 3.1379194259643555, + "learning_rate": 9.731356783919598e-06, + "loss": 0.1545, + "step": 3175 + }, + { + "epoch": 1.61, + "grad_norm": 3.072883129119873, + "learning_rate": 9.72884422110553e-06, + "loss": 0.1545, + "step": 3200 + }, + { + "epoch": 1.62, + "grad_norm": 3.329160213470459, + "learning_rate": 9.726331658291459e-06, + "loss": 0.151, + "step": 3225 + }, + { + "epoch": 1.64, + "grad_norm": 2.8267323970794678, + "learning_rate": 9.723819095477388e-06, + "loss": 0.1471, + "step": 3250 + }, + { + "epoch": 1.65, + "grad_norm": 3.380469799041748, + "learning_rate": 9.721306532663317e-06, + "loss": 0.1563, + "step": 3275 + }, + { + "epoch": 1.66, + "grad_norm": 2.6737258434295654, + "learning_rate": 9.718793969849247e-06, + "loss": 0.1505, + "step": 3300 + }, + { + "epoch": 1.68, + "grad_norm": 2.920942783355713, + "learning_rate": 9.716281407035176e-06, + "loss": 0.1474, + "step": 3325 + }, + { + "epoch": 1.69, + "grad_norm": 2.8349497318267822, + "learning_rate": 9.713768844221107e-06, + "loss": 0.1499, + "step": 3350 + }, + { + "epoch": 1.7, + "grad_norm": 2.9991960525512695, + "learning_rate": 9.711256281407035e-06, + "loss": 0.1457, + "step": 3375 + }, + { + "epoch": 1.71, + "grad_norm": 3.211735486984253, + "learning_rate": 9.708743718592966e-06, + "loss": 0.1488, + "step": 3400 + }, + { + "epoch": 1.73, + "grad_norm": 2.8068394660949707, + "learning_rate": 9.706231155778895e-06, + "loss": 0.144, + "step": 3425 + }, + { + "epoch": 1.74, + "grad_norm": 3.149648904800415, + "learning_rate": 9.703718592964824e-06, + "loss": 0.1476, + "step": 3450 + }, + { + "epoch": 1.75, + "grad_norm": 2.5642662048339844, + "learning_rate": 9.701206030150755e-06, + "loss": 0.1452, + "step": 3475 + }, + { + "epoch": 1.76, + "grad_norm": 3.3796427249908447, + "learning_rate": 9.698693467336685e-06, + "loss": 0.1498, + "step": 3500 + }, + { + "epoch": 1.78, + "grad_norm": 2.480526924133301, + "learning_rate": 9.696180904522614e-06, + "loss": 0.1438, + "step": 3525 + }, + { + "epoch": 1.79, + "grad_norm": 2.742117166519165, + "learning_rate": 9.693668341708543e-06, + "loss": 0.1454, + "step": 3550 + }, + { + "epoch": 1.8, + "grad_norm": 3.248408555984497, + "learning_rate": 9.691155778894473e-06, + "loss": 0.1449, + "step": 3575 + }, + { + "epoch": 1.81, + "grad_norm": 2.808727264404297, + "learning_rate": 9.688643216080402e-06, + "loss": 0.1417, + "step": 3600 + }, + { + "epoch": 1.83, + "grad_norm": 2.612992525100708, + "learning_rate": 9.686130653266333e-06, + "loss": 0.1491, + "step": 3625 + }, + { + "epoch": 1.84, + "grad_norm": 2.978003978729248, + "learning_rate": 9.683618090452262e-06, + "loss": 0.1454, + "step": 3650 + }, + { + "epoch": 1.85, + "grad_norm": 2.9820547103881836, + "learning_rate": 9.681105527638192e-06, + "loss": 0.1381, + "step": 3675 + }, + { + "epoch": 1.86, + "grad_norm": 2.7569639682769775, + "learning_rate": 9.678592964824121e-06, + "loss": 0.1471, + "step": 3700 + }, + { + "epoch": 1.88, + "grad_norm": 3.0688931941986084, + "learning_rate": 9.67608040201005e-06, + "loss": 0.1425, + "step": 3725 + }, + { + "epoch": 1.89, + "grad_norm": 2.921603202819824, + "learning_rate": 9.673567839195981e-06, + "loss": 0.1472, + "step": 3750 + }, + { + "epoch": 1.9, + "grad_norm": 2.9307756423950195, + "learning_rate": 9.67105527638191e-06, + "loss": 0.1438, + "step": 3775 + }, + { + "epoch": 1.91, + "grad_norm": 3.2151060104370117, + "learning_rate": 9.66854271356784e-06, + "loss": 0.1428, + "step": 3800 + }, + { + "epoch": 1.93, + "grad_norm": 2.9010095596313477, + "learning_rate": 9.666030150753771e-06, + "loss": 0.1465, + "step": 3825 + }, + { + "epoch": 1.94, + "grad_norm": 2.832845687866211, + "learning_rate": 9.663517587939699e-06, + "loss": 0.1429, + "step": 3850 + }, + { + "epoch": 1.95, + "grad_norm": 2.6402933597564697, + "learning_rate": 9.66100502512563e-06, + "loss": 0.1342, + "step": 3875 + }, + { + "epoch": 1.96, + "grad_norm": 2.6498653888702393, + "learning_rate": 9.658492462311559e-06, + "loss": 0.1402, + "step": 3900 + }, + { + "epoch": 1.98, + "grad_norm": 2.972980260848999, + "learning_rate": 9.655979899497488e-06, + "loss": 0.1387, + "step": 3925 + }, + { + "epoch": 1.99, + "grad_norm": 3.229097843170166, + "learning_rate": 9.653467336683418e-06, + "loss": 0.1432, + "step": 3950 + }, + { + "epoch": 2.0, + "grad_norm": 2.3604576587677, + "learning_rate": 9.650954773869347e-06, + "loss": 0.1356, + "step": 3975 + }, + { + "epoch": 2.02, + "grad_norm": 2.725226402282715, + "learning_rate": 9.648442211055276e-06, + "loss": 0.1179, + "step": 4000 + }, + { + "epoch": 2.02, + "eval_loss": 0.15386536717414856, + "eval_runtime": 646.9871, + "eval_samples_per_second": 2.178, + "eval_steps_per_second": 2.178, + "eval_wer": 25.105499827049467, + "step": 4000 + }, + { + "epoch": 2.03, + "grad_norm": 3.045943260192871, + "learning_rate": 9.645929648241207e-06, + "loss": 0.1157, + "step": 4025 + }, + { + "epoch": 2.04, + "grad_norm": 2.593027353286743, + "learning_rate": 9.643417085427137e-06, + "loss": 0.116, + "step": 4050 + }, + { + "epoch": 2.05, + "grad_norm": 2.6555795669555664, + "learning_rate": 9.640904522613066e-06, + "loss": 0.1187, + "step": 4075 + }, + { + "epoch": 2.07, + "grad_norm": 2.776094913482666, + "learning_rate": 9.638391959798997e-06, + "loss": 0.1155, + "step": 4100 + }, + { + "epoch": 2.08, + "grad_norm": 3.2772326469421387, + "learning_rate": 9.635879396984925e-06, + "loss": 0.1152, + "step": 4125 + }, + { + "epoch": 2.09, + "grad_norm": 3.0243771076202393, + "learning_rate": 9.633366834170856e-06, + "loss": 0.1199, + "step": 4150 + }, + { + "epoch": 2.1, + "grad_norm": 2.6247713565826416, + "learning_rate": 9.630854271356785e-06, + "loss": 0.117, + "step": 4175 + }, + { + "epoch": 2.12, + "grad_norm": 2.80692720413208, + "learning_rate": 9.628341708542714e-06, + "loss": 0.1115, + "step": 4200 + }, + { + "epoch": 2.13, + "grad_norm": 2.899824380874634, + "learning_rate": 9.625829145728644e-06, + "loss": 0.1155, + "step": 4225 + }, + { + "epoch": 2.14, + "grad_norm": 2.8152291774749756, + "learning_rate": 9.623316582914573e-06, + "loss": 0.1129, + "step": 4250 + }, + { + "epoch": 2.15, + "grad_norm": 2.659745931625366, + "learning_rate": 9.620804020100504e-06, + "loss": 0.1143, + "step": 4275 + }, + { + "epoch": 2.17, + "grad_norm": 2.8360331058502197, + "learning_rate": 9.618291457286433e-06, + "loss": 0.1207, + "step": 4300 + }, + { + "epoch": 2.18, + "grad_norm": 3.03417706489563, + "learning_rate": 9.615778894472363e-06, + "loss": 0.1245, + "step": 4325 + }, + { + "epoch": 2.19, + "grad_norm": 2.5418951511383057, + "learning_rate": 9.613266331658292e-06, + "loss": 0.1174, + "step": 4350 + }, + { + "epoch": 2.2, + "grad_norm": 2.902958631515503, + "learning_rate": 9.610753768844223e-06, + "loss": 0.1158, + "step": 4375 + }, + { + "epoch": 2.22, + "grad_norm": 3.026547431945801, + "learning_rate": 9.60824120603015e-06, + "loss": 0.1104, + "step": 4400 + }, + { + "epoch": 2.23, + "grad_norm": 2.8732357025146484, + "learning_rate": 9.605728643216082e-06, + "loss": 0.1163, + "step": 4425 + }, + { + "epoch": 2.24, + "grad_norm": 2.730586528778076, + "learning_rate": 9.60321608040201e-06, + "loss": 0.1128, + "step": 4450 + }, + { + "epoch": 2.25, + "grad_norm": 2.59441876411438, + "learning_rate": 9.60070351758794e-06, + "loss": 0.1205, + "step": 4475 + }, + { + "epoch": 2.27, + "grad_norm": 2.5764012336730957, + "learning_rate": 9.598190954773871e-06, + "loss": 0.1198, + "step": 4500 + }, + { + "epoch": 2.28, + "grad_norm": 2.6175150871276855, + "learning_rate": 9.595678391959799e-06, + "loss": 0.117, + "step": 4525 + }, + { + "epoch": 2.29, + "grad_norm": 2.7798826694488525, + "learning_rate": 9.59316582914573e-06, + "loss": 0.1101, + "step": 4550 + }, + { + "epoch": 2.3, + "grad_norm": 2.900200605392456, + "learning_rate": 9.59065326633166e-06, + "loss": 0.1102, + "step": 4575 + }, + { + "epoch": 2.32, + "grad_norm": 2.6330630779266357, + "learning_rate": 9.588140703517588e-06, + "loss": 0.1164, + "step": 4600 + }, + { + "epoch": 2.33, + "grad_norm": 3.1691510677337646, + "learning_rate": 9.585628140703518e-06, + "loss": 0.1127, + "step": 4625 + }, + { + "epoch": 2.34, + "grad_norm": 2.9986257553100586, + "learning_rate": 9.583115577889449e-06, + "loss": 0.1118, + "step": 4650 + }, + { + "epoch": 2.36, + "grad_norm": 2.9816648960113525, + "learning_rate": 9.580603015075378e-06, + "loss": 0.1124, + "step": 4675 + }, + { + "epoch": 2.37, + "grad_norm": 2.7380642890930176, + "learning_rate": 9.578090452261307e-06, + "loss": 0.1101, + "step": 4700 + }, + { + "epoch": 2.38, + "grad_norm": 2.506922721862793, + "learning_rate": 9.575577889447237e-06, + "loss": 0.1101, + "step": 4725 + }, + { + "epoch": 2.39, + "grad_norm": 2.7849462032318115, + "learning_rate": 9.573065326633166e-06, + "loss": 0.1128, + "step": 4750 + }, + { + "epoch": 2.41, + "grad_norm": 2.457066774368286, + "learning_rate": 9.570552763819097e-06, + "loss": 0.1139, + "step": 4775 + }, + { + "epoch": 2.42, + "grad_norm": 2.9274463653564453, + "learning_rate": 9.568040201005025e-06, + "loss": 0.1099, + "step": 4800 + }, + { + "epoch": 2.43, + "grad_norm": 2.54990291595459, + "learning_rate": 9.565527638190956e-06, + "loss": 0.1099, + "step": 4825 + }, + { + "epoch": 2.44, + "grad_norm": 3.0029959678649902, + "learning_rate": 9.563015075376885e-06, + "loss": 0.1132, + "step": 4850 + }, + { + "epoch": 2.46, + "grad_norm": 2.9913110733032227, + "learning_rate": 9.560502512562814e-06, + "loss": 0.1134, + "step": 4875 + }, + { + "epoch": 2.47, + "grad_norm": 2.846057653427124, + "learning_rate": 9.557989949748745e-06, + "loss": 0.112, + "step": 4900 + }, + { + "epoch": 2.48, + "grad_norm": 2.566232681274414, + "learning_rate": 9.555477386934675e-06, + "loss": 0.1106, + "step": 4925 + }, + { + "epoch": 2.49, + "grad_norm": 2.8931262493133545, + "learning_rate": 9.552964824120604e-06, + "loss": 0.1099, + "step": 4950 + }, + { + "epoch": 2.51, + "grad_norm": 2.9756979942321777, + "learning_rate": 9.550552763819096e-06, + "loss": 0.1125, + "step": 4975 + }, + { + "epoch": 2.52, + "grad_norm": 2.837172508239746, + "learning_rate": 9.548040201005025e-06, + "loss": 0.1083, + "step": 5000 + }, + { + "epoch": 2.52, + "eval_loss": 0.1495467722415924, + "eval_runtime": 644.1684, + "eval_samples_per_second": 2.187, + "eval_steps_per_second": 2.187, + "eval_wer": 24.628156347284676, + "step": 5000 + }, + { + "epoch": 2.53, + "grad_norm": 2.499260902404785, + "learning_rate": 9.545527638190956e-06, + "loss": 0.1115, + "step": 5025 + }, + { + "epoch": 2.54, + "grad_norm": 2.9325485229492188, + "learning_rate": 9.543015075376885e-06, + "loss": 0.1087, + "step": 5050 + }, + { + "epoch": 2.56, + "grad_norm": 2.4854938983917236, + "learning_rate": 9.540502512562815e-06, + "loss": 0.1046, + "step": 5075 + }, + { + "epoch": 2.57, + "grad_norm": 3.0302836894989014, + "learning_rate": 9.537989949748746e-06, + "loss": 0.1105, + "step": 5100 + }, + { + "epoch": 2.58, + "grad_norm": 2.6300787925720215, + "learning_rate": 9.535477386934673e-06, + "loss": 0.1093, + "step": 5125 + }, + { + "epoch": 2.59, + "grad_norm": 3.281339168548584, + "learning_rate": 9.532964824120604e-06, + "loss": 0.1133, + "step": 5150 + }, + { + "epoch": 2.61, + "grad_norm": 2.9684898853302, + "learning_rate": 9.530452261306534e-06, + "loss": 0.1072, + "step": 5175 + }, + { + "epoch": 2.62, + "grad_norm": 2.7068192958831787, + "learning_rate": 9.527939698492463e-06, + "loss": 0.1108, + "step": 5200 + }, + { + "epoch": 2.63, + "grad_norm": 2.7589058876037598, + "learning_rate": 9.525427135678392e-06, + "loss": 0.1094, + "step": 5225 + }, + { + "epoch": 2.64, + "grad_norm": 2.988163709640503, + "learning_rate": 9.522914572864322e-06, + "loss": 0.1055, + "step": 5250 + }, + { + "epoch": 2.66, + "grad_norm": 2.748220443725586, + "learning_rate": 9.520402010050253e-06, + "loss": 0.1028, + "step": 5275 + }, + { + "epoch": 2.67, + "grad_norm": 2.696606397628784, + "learning_rate": 9.517889447236182e-06, + "loss": 0.1013, + "step": 5300 + }, + { + "epoch": 2.68, + "grad_norm": 2.4986040592193604, + "learning_rate": 9.515376884422111e-06, + "loss": 0.1094, + "step": 5325 + }, + { + "epoch": 2.7, + "grad_norm": 2.414533853530884, + "learning_rate": 9.51286432160804e-06, + "loss": 0.1085, + "step": 5350 + }, + { + "epoch": 2.71, + "grad_norm": 2.5464062690734863, + "learning_rate": 9.510351758793972e-06, + "loss": 0.1029, + "step": 5375 + }, + { + "epoch": 2.72, + "grad_norm": 3.065047264099121, + "learning_rate": 9.5078391959799e-06, + "loss": 0.1013, + "step": 5400 + }, + { + "epoch": 2.73, + "grad_norm": 3.1086618900299072, + "learning_rate": 9.50532663316583e-06, + "loss": 0.1066, + "step": 5425 + }, + { + "epoch": 2.75, + "grad_norm": 3.5446290969848633, + "learning_rate": 9.50281407035176e-06, + "loss": 0.1098, + "step": 5450 + }, + { + "epoch": 2.76, + "grad_norm": 2.7708733081817627, + "learning_rate": 9.500301507537689e-06, + "loss": 0.109, + "step": 5475 + }, + { + "epoch": 2.77, + "grad_norm": 2.786681652069092, + "learning_rate": 9.49778894472362e-06, + "loss": 0.1062, + "step": 5500 + }, + { + "epoch": 2.78, + "grad_norm": 2.927002429962158, + "learning_rate": 9.49527638190955e-06, + "loss": 0.1053, + "step": 5525 + }, + { + "epoch": 2.8, + "grad_norm": 2.6356334686279297, + "learning_rate": 9.492763819095479e-06, + "loss": 0.1098, + "step": 5550 + }, + { + "epoch": 2.81, + "grad_norm": 2.5846285820007324, + "learning_rate": 9.490251256281408e-06, + "loss": 0.1026, + "step": 5575 + }, + { + "epoch": 2.82, + "grad_norm": 3.0148589611053467, + "learning_rate": 9.487738693467337e-06, + "loss": 0.108, + "step": 5600 + }, + { + "epoch": 2.83, + "grad_norm": 2.6467926502227783, + "learning_rate": 9.485226130653267e-06, + "loss": 0.1031, + "step": 5625 + }, + { + "epoch": 2.85, + "grad_norm": 2.8061394691467285, + "learning_rate": 9.482713567839198e-06, + "loss": 0.1045, + "step": 5650 + }, + { + "epoch": 2.86, + "grad_norm": 2.6581783294677734, + "learning_rate": 9.480201005025125e-06, + "loss": 0.105, + "step": 5675 + }, + { + "epoch": 2.87, + "grad_norm": 2.814573049545288, + "learning_rate": 9.477688442211056e-06, + "loss": 0.107, + "step": 5700 + }, + { + "epoch": 2.88, + "grad_norm": 2.7229998111724854, + "learning_rate": 9.475175879396985e-06, + "loss": 0.0983, + "step": 5725 + }, + { + "epoch": 2.9, + "grad_norm": 2.648622989654541, + "learning_rate": 9.472663316582915e-06, + "loss": 0.108, + "step": 5750 + }, + { + "epoch": 2.91, + "grad_norm": 2.546680212020874, + "learning_rate": 9.470150753768846e-06, + "loss": 0.106, + "step": 5775 + }, + { + "epoch": 2.92, + "grad_norm": 2.91450834274292, + "learning_rate": 9.467638190954775e-06, + "loss": 0.1057, + "step": 5800 + }, + { + "epoch": 2.93, + "grad_norm": 2.5046870708465576, + "learning_rate": 9.465125628140704e-06, + "loss": 0.1045, + "step": 5825 + }, + { + "epoch": 2.95, + "grad_norm": 2.452519178390503, + "learning_rate": 9.462613065326634e-06, + "loss": 0.1026, + "step": 5850 + }, + { + "epoch": 2.96, + "grad_norm": 2.6275572776794434, + "learning_rate": 9.460100502512563e-06, + "loss": 0.1016, + "step": 5875 + }, + { + "epoch": 2.97, + "grad_norm": 2.612506628036499, + "learning_rate": 9.457587939698494e-06, + "loss": 0.1037, + "step": 5900 + }, + { + "epoch": 2.98, + "grad_norm": 2.721682548522949, + "learning_rate": 9.455075376884423e-06, + "loss": 0.1022, + "step": 5925 + }, + { + "epoch": 3.0, + "grad_norm": 2.6826882362365723, + "learning_rate": 9.452562814070353e-06, + "loss": 0.1005, + "step": 5950 + }, + { + "epoch": 3.01, + "grad_norm": 2.7688844203948975, + "learning_rate": 9.450050251256282e-06, + "loss": 0.0843, + "step": 5975 + }, + { + "epoch": 3.02, + "grad_norm": 2.4665892124176025, + "learning_rate": 9.447537688442211e-06, + "loss": 0.0804, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.1464279592037201, + "eval_runtime": 645.9237, + "eval_samples_per_second": 2.181, + "eval_steps_per_second": 2.181, + "eval_wer": 24.03320650294016, + "step": 6000 + }, + { + "epoch": 3.04, + "grad_norm": 1.9517433643341064, + "learning_rate": 9.44502512562814e-06, + "loss": 0.081, + "step": 6025 + }, + { + "epoch": 3.05, + "grad_norm": 2.6357505321502686, + "learning_rate": 9.442512562814072e-06, + "loss": 0.078, + "step": 6050 + }, + { + "epoch": 3.06, + "grad_norm": 2.9394261837005615, + "learning_rate": 9.440000000000001e-06, + "loss": 0.083, + "step": 6075 + }, + { + "epoch": 3.07, + "grad_norm": 2.944277048110962, + "learning_rate": 9.43748743718593e-06, + "loss": 0.079, + "step": 6100 + }, + { + "epoch": 3.09, + "grad_norm": 2.566026210784912, + "learning_rate": 9.43497487437186e-06, + "loss": 0.0831, + "step": 6125 + }, + { + "epoch": 3.1, + "grad_norm": 2.319978713989258, + "learning_rate": 9.432462311557789e-06, + "loss": 0.0795, + "step": 6150 + }, + { + "epoch": 3.11, + "grad_norm": 2.8877954483032227, + "learning_rate": 9.42994974874372e-06, + "loss": 0.0785, + "step": 6175 + }, + { + "epoch": 3.12, + "grad_norm": 2.5460472106933594, + "learning_rate": 9.42743718592965e-06, + "loss": 0.0801, + "step": 6200 + }, + { + "epoch": 3.14, + "grad_norm": 2.396923303604126, + "learning_rate": 9.424924623115579e-06, + "loss": 0.0826, + "step": 6225 + }, + { + "epoch": 3.15, + "grad_norm": 2.4849960803985596, + "learning_rate": 9.422412060301508e-06, + "loss": 0.081, + "step": 6250 + }, + { + "epoch": 3.16, + "grad_norm": 2.4838786125183105, + "learning_rate": 9.419899497487437e-06, + "loss": 0.0838, + "step": 6275 + }, + { + "epoch": 3.17, + "grad_norm": 2.7214527130126953, + "learning_rate": 9.417386934673367e-06, + "loss": 0.082, + "step": 6300 + }, + { + "epoch": 3.19, + "grad_norm": 2.787931203842163, + "learning_rate": 9.414874371859298e-06, + "loss": 0.0817, + "step": 6325 + }, + { + "epoch": 3.2, + "grad_norm": 2.625025987625122, + "learning_rate": 9.412361809045227e-06, + "loss": 0.0805, + "step": 6350 + }, + { + "epoch": 3.21, + "grad_norm": 2.490147113800049, + "learning_rate": 9.409849246231156e-06, + "loss": 0.0796, + "step": 6375 + }, + { + "epoch": 3.22, + "grad_norm": 2.4763355255126953, + "learning_rate": 9.407336683417086e-06, + "loss": 0.083, + "step": 6400 + }, + { + "epoch": 3.24, + "grad_norm": 3.0030245780944824, + "learning_rate": 9.404824120603015e-06, + "loss": 0.0818, + "step": 6425 + }, + { + "epoch": 3.25, + "grad_norm": 2.714149236679077, + "learning_rate": 9.402311557788946e-06, + "loss": 0.0819, + "step": 6450 + }, + { + "epoch": 3.26, + "grad_norm": 2.4112088680267334, + "learning_rate": 9.399798994974875e-06, + "loss": 0.0825, + "step": 6475 + }, + { + "epoch": 3.27, + "grad_norm": 2.633383274078369, + "learning_rate": 9.397286432160805e-06, + "loss": 0.0804, + "step": 6500 + }, + { + "epoch": 3.29, + "grad_norm": 2.265183687210083, + "learning_rate": 9.394773869346736e-06, + "loss": 0.0799, + "step": 6525 + }, + { + "epoch": 3.3, + "grad_norm": 2.4261343479156494, + "learning_rate": 9.392261306532663e-06, + "loss": 0.0763, + "step": 6550 + }, + { + "epoch": 3.31, + "grad_norm": 2.6179676055908203, + "learning_rate": 9.389748743718594e-06, + "loss": 0.0796, + "step": 6575 + }, + { + "epoch": 3.32, + "grad_norm": 2.648509979248047, + "learning_rate": 9.387236180904524e-06, + "loss": 0.0823, + "step": 6600 + }, + { + "epoch": 3.34, + "grad_norm": 2.4044175148010254, + "learning_rate": 9.384723618090453e-06, + "loss": 0.0791, + "step": 6625 + }, + { + "epoch": 3.35, + "grad_norm": 2.3800647258758545, + "learning_rate": 9.382211055276382e-06, + "loss": 0.0788, + "step": 6650 + }, + { + "epoch": 3.36, + "grad_norm": 3.1068170070648193, + "learning_rate": 9.379698492462312e-06, + "loss": 0.0811, + "step": 6675 + }, + { + "epoch": 3.38, + "grad_norm": 2.5507326126098633, + "learning_rate": 9.377185929648241e-06, + "loss": 0.0792, + "step": 6700 + }, + { + "epoch": 3.39, + "grad_norm": 2.522341728210449, + "learning_rate": 9.374673366834172e-06, + "loss": 0.0784, + "step": 6725 + }, + { + "epoch": 3.4, + "grad_norm": 2.739595890045166, + "learning_rate": 9.372160804020101e-06, + "loss": 0.0791, + "step": 6750 + }, + { + "epoch": 3.41, + "grad_norm": 2.401925802230835, + "learning_rate": 9.36964824120603e-06, + "loss": 0.082, + "step": 6775 + }, + { + "epoch": 3.43, + "grad_norm": 3.219940662384033, + "learning_rate": 9.367135678391962e-06, + "loss": 0.0817, + "step": 6800 + }, + { + "epoch": 3.44, + "grad_norm": 3.2623674869537354, + "learning_rate": 9.36462311557789e-06, + "loss": 0.0791, + "step": 6825 + }, + { + "epoch": 3.45, + "grad_norm": 2.358572244644165, + "learning_rate": 9.36211055276382e-06, + "loss": 0.0755, + "step": 6850 + }, + { + "epoch": 3.46, + "grad_norm": 3.0506913661956787, + "learning_rate": 9.35959798994975e-06, + "loss": 0.0792, + "step": 6875 + }, + { + "epoch": 3.48, + "grad_norm": 2.2486371994018555, + "learning_rate": 9.357085427135679e-06, + "loss": 0.0807, + "step": 6900 + }, + { + "epoch": 3.49, + "grad_norm": 2.8625311851501465, + "learning_rate": 9.354572864321608e-06, + "loss": 0.0787, + "step": 6925 + }, + { + "epoch": 3.5, + "grad_norm": 2.4400510787963867, + "learning_rate": 9.352060301507538e-06, + "loss": 0.0804, + "step": 6950 + }, + { + "epoch": 3.51, + "grad_norm": 2.5003409385681152, + "learning_rate": 9.349547738693469e-06, + "loss": 0.0789, + "step": 6975 + }, + { + "epoch": 3.53, + "grad_norm": 2.5204198360443115, + "learning_rate": 9.347035175879398e-06, + "loss": 0.077, + "step": 7000 + }, + { + "epoch": 3.53, + "eval_loss": 0.15057513117790222, + "eval_runtime": 646.2962, + "eval_samples_per_second": 2.18, + "eval_steps_per_second": 2.18, + "eval_wer": 24.240747146316153, + "step": 7000 + }, + { + "epoch": 3.54, + "grad_norm": 2.7666544914245605, + "learning_rate": 9.344522613065327e-06, + "loss": 0.0779, + "step": 7025 + }, + { + "epoch": 3.55, + "grad_norm": 3.256955146789551, + "learning_rate": 9.342010050251257e-06, + "loss": 0.0799, + "step": 7050 + }, + { + "epoch": 3.56, + "grad_norm": 2.829012155532837, + "learning_rate": 9.339497487437188e-06, + "loss": 0.0811, + "step": 7075 + }, + { + "epoch": 3.58, + "grad_norm": 2.6960537433624268, + "learning_rate": 9.336984924623115e-06, + "loss": 0.0787, + "step": 7100 + }, + { + "epoch": 3.59, + "grad_norm": 2.7486023902893066, + "learning_rate": 9.334472361809046e-06, + "loss": 0.0828, + "step": 7125 + }, + { + "epoch": 3.6, + "grad_norm": 2.8527791500091553, + "learning_rate": 9.331959798994976e-06, + "loss": 0.0813, + "step": 7150 + }, + { + "epoch": 3.61, + "grad_norm": 2.6692473888397217, + "learning_rate": 9.329447236180905e-06, + "loss": 0.0768, + "step": 7175 + }, + { + "epoch": 3.63, + "grad_norm": 2.2904937267303467, + "learning_rate": 9.326934673366836e-06, + "loss": 0.0788, + "step": 7200 + }, + { + "epoch": 3.64, + "grad_norm": 3.499237060546875, + "learning_rate": 9.324422110552764e-06, + "loss": 0.0764, + "step": 7225 + }, + { + "epoch": 3.65, + "grad_norm": 2.6315267086029053, + "learning_rate": 9.321909547738695e-06, + "loss": 0.0746, + "step": 7250 + }, + { + "epoch": 3.66, + "grad_norm": 3.006561040878296, + "learning_rate": 9.319396984924624e-06, + "loss": 0.076, + "step": 7275 + }, + { + "epoch": 3.68, + "grad_norm": 2.663254976272583, + "learning_rate": 9.316884422110553e-06, + "loss": 0.0784, + "step": 7300 + }, + { + "epoch": 3.69, + "grad_norm": 2.6093807220458984, + "learning_rate": 9.314371859296483e-06, + "loss": 0.0768, + "step": 7325 + }, + { + "epoch": 3.7, + "grad_norm": 2.7296223640441895, + "learning_rate": 9.311859296482414e-06, + "loss": 0.0761, + "step": 7350 + }, + { + "epoch": 3.72, + "grad_norm": 2.5256307125091553, + "learning_rate": 9.309346733668343e-06, + "loss": 0.0771, + "step": 7375 + }, + { + "epoch": 3.73, + "grad_norm": 2.707585573196411, + "learning_rate": 9.306834170854272e-06, + "loss": 0.0753, + "step": 7400 + }, + { + "epoch": 3.74, + "grad_norm": 3.1183390617370605, + "learning_rate": 9.304321608040201e-06, + "loss": 0.0794, + "step": 7425 + }, + { + "epoch": 3.75, + "grad_norm": 2.302847385406494, + "learning_rate": 9.30180904522613e-06, + "loss": 0.0772, + "step": 7450 + }, + { + "epoch": 3.77, + "grad_norm": 2.5927348136901855, + "learning_rate": 9.299296482412062e-06, + "loss": 0.0749, + "step": 7475 + }, + { + "epoch": 3.78, + "grad_norm": 2.6165075302124023, + "learning_rate": 9.296783919597991e-06, + "loss": 0.0788, + "step": 7500 + }, + { + "epoch": 3.79, + "grad_norm": 2.674424171447754, + "learning_rate": 9.29427135678392e-06, + "loss": 0.0768, + "step": 7525 + }, + { + "epoch": 3.8, + "grad_norm": 2.872770309448242, + "learning_rate": 9.29175879396985e-06, + "loss": 0.0811, + "step": 7550 + }, + { + "epoch": 3.82, + "grad_norm": 2.9125661849975586, + "learning_rate": 9.289246231155779e-06, + "loss": 0.0739, + "step": 7575 + }, + { + "epoch": 3.83, + "grad_norm": 2.61698317527771, + "learning_rate": 9.28673366834171e-06, + "loss": 0.0771, + "step": 7600 + }, + { + "epoch": 3.84, + "grad_norm": 3.0637826919555664, + "learning_rate": 9.28422110552764e-06, + "loss": 0.0791, + "step": 7625 + }, + { + "epoch": 3.85, + "grad_norm": 2.3239142894744873, + "learning_rate": 9.281708542713569e-06, + "loss": 0.0773, + "step": 7650 + }, + { + "epoch": 3.87, + "grad_norm": 2.6094796657562256, + "learning_rate": 9.279195979899498e-06, + "loss": 0.0755, + "step": 7675 + }, + { + "epoch": 3.88, + "grad_norm": 2.530613422393799, + "learning_rate": 9.276683417085427e-06, + "loss": 0.0749, + "step": 7700 + }, + { + "epoch": 3.89, + "grad_norm": 3.653653621673584, + "learning_rate": 9.274170854271357e-06, + "loss": 0.0743, + "step": 7725 + }, + { + "epoch": 3.9, + "grad_norm": 2.6792755126953125, + "learning_rate": 9.271658291457288e-06, + "loss": 0.0763, + "step": 7750 + }, + { + "epoch": 3.92, + "grad_norm": 2.9613704681396484, + "learning_rate": 9.269145728643217e-06, + "loss": 0.0723, + "step": 7775 + }, + { + "epoch": 3.93, + "grad_norm": 2.2027602195739746, + "learning_rate": 9.266633165829146e-06, + "loss": 0.0789, + "step": 7800 + }, + { + "epoch": 3.94, + "grad_norm": 2.569223165512085, + "learning_rate": 9.264120603015076e-06, + "loss": 0.072, + "step": 7825 + }, + { + "epoch": 3.95, + "grad_norm": 2.3976686000823975, + "learning_rate": 9.261608040201005e-06, + "loss": 0.0737, + "step": 7850 + }, + { + "epoch": 3.97, + "grad_norm": 2.5629305839538574, + "learning_rate": 9.259095477386936e-06, + "loss": 0.0762, + "step": 7875 + }, + { + "epoch": 3.98, + "grad_norm": 2.397019147872925, + "learning_rate": 9.256582914572865e-06, + "loss": 0.0758, + "step": 7900 + }, + { + "epoch": 3.99, + "grad_norm": 2.764029026031494, + "learning_rate": 9.254070351758795e-06, + "loss": 0.0723, + "step": 7925 + }, + { + "epoch": 4.01, + "grad_norm": 2.1665878295898438, + "learning_rate": 9.251557788944724e-06, + "loss": 0.0654, + "step": 7950 + }, + { + "epoch": 4.02, + "grad_norm": 2.1100118160247803, + "learning_rate": 9.249045226130653e-06, + "loss": 0.0557, + "step": 7975 + }, + { + "epoch": 4.03, + "grad_norm": 2.4302258491516113, + "learning_rate": 9.246532663316584e-06, + "loss": 0.0539, + "step": 8000 + }, + { + "epoch": 4.03, + "eval_loss": 0.15238162875175476, + "eval_runtime": 644.4842, + "eval_samples_per_second": 2.186, + "eval_steps_per_second": 2.186, + "eval_wer": 23.85333794534763, + "step": 8000 + }, + { + "epoch": 4.04, + "grad_norm": 2.350156545639038, + "learning_rate": 9.244020100502514e-06, + "loss": 0.0541, + "step": 8025 + }, + { + "epoch": 4.06, + "grad_norm": 2.4464669227600098, + "learning_rate": 9.241507537688443e-06, + "loss": 0.0538, + "step": 8050 + }, + { + "epoch": 4.07, + "grad_norm": 2.123314619064331, + "learning_rate": 9.238994974874372e-06, + "loss": 0.0545, + "step": 8075 + }, + { + "epoch": 4.08, + "grad_norm": 2.584456443786621, + "learning_rate": 9.236482412060302e-06, + "loss": 0.0563, + "step": 8100 + }, + { + "epoch": 4.09, + "grad_norm": 2.46744704246521, + "learning_rate": 9.233969849246231e-06, + "loss": 0.0548, + "step": 8125 + }, + { + "epoch": 4.11, + "grad_norm": 2.7734973430633545, + "learning_rate": 9.231457286432162e-06, + "loss": 0.0593, + "step": 8150 + }, + { + "epoch": 4.12, + "grad_norm": 2.5305910110473633, + "learning_rate": 9.228944723618091e-06, + "loss": 0.058, + "step": 8175 + }, + { + "epoch": 4.13, + "grad_norm": 2.668431043624878, + "learning_rate": 9.22643216080402e-06, + "loss": 0.0559, + "step": 8200 + }, + { + "epoch": 4.14, + "grad_norm": 2.23030161857605, + "learning_rate": 9.223919597989952e-06, + "loss": 0.0553, + "step": 8225 + }, + { + "epoch": 4.16, + "grad_norm": 2.2469186782836914, + "learning_rate": 9.22140703517588e-06, + "loss": 0.0546, + "step": 8250 + }, + { + "epoch": 4.17, + "grad_norm": 2.3184828758239746, + "learning_rate": 9.21889447236181e-06, + "loss": 0.0551, + "step": 8275 + }, + { + "epoch": 4.18, + "grad_norm": 2.3341612815856934, + "learning_rate": 9.21638190954774e-06, + "loss": 0.0567, + "step": 8300 + }, + { + "epoch": 4.19, + "grad_norm": 2.4817066192626953, + "learning_rate": 9.213869346733669e-06, + "loss": 0.0531, + "step": 8325 + }, + { + "epoch": 4.21, + "grad_norm": 2.299858808517456, + "learning_rate": 9.211356783919598e-06, + "loss": 0.0545, + "step": 8350 + }, + { + "epoch": 4.22, + "grad_norm": 2.6612911224365234, + "learning_rate": 9.208844221105528e-06, + "loss": 0.0546, + "step": 8375 + }, + { + "epoch": 4.23, + "grad_norm": 2.7073473930358887, + "learning_rate": 9.206331658291459e-06, + "loss": 0.0551, + "step": 8400 + }, + { + "epoch": 4.24, + "grad_norm": 2.435814142227173, + "learning_rate": 9.203819095477388e-06, + "loss": 0.0538, + "step": 8425 + }, + { + "epoch": 4.26, + "grad_norm": 2.920555353164673, + "learning_rate": 9.201306532663317e-06, + "loss": 0.0581, + "step": 8450 + }, + { + "epoch": 4.27, + "grad_norm": 2.4426980018615723, + "learning_rate": 9.198793969849247e-06, + "loss": 0.0527, + "step": 8475 + }, + { + "epoch": 4.28, + "grad_norm": 2.282799243927002, + "learning_rate": 9.196281407035178e-06, + "loss": 0.0539, + "step": 8500 + }, + { + "epoch": 4.29, + "grad_norm": 2.3802311420440674, + "learning_rate": 9.193768844221105e-06, + "loss": 0.0544, + "step": 8525 + }, + { + "epoch": 4.31, + "grad_norm": 2.6062004566192627, + "learning_rate": 9.191256281407036e-06, + "loss": 0.0539, + "step": 8550 + }, + { + "epoch": 4.32, + "grad_norm": 2.3153014183044434, + "learning_rate": 9.188743718592966e-06, + "loss": 0.0576, + "step": 8575 + }, + { + "epoch": 4.33, + "grad_norm": 2.6936705112457275, + "learning_rate": 9.186231155778895e-06, + "loss": 0.0518, + "step": 8600 + }, + { + "epoch": 4.35, + "grad_norm": 2.5648863315582275, + "learning_rate": 9.183718592964826e-06, + "loss": 0.0585, + "step": 8625 + }, + { + "epoch": 4.36, + "grad_norm": 2.5685312747955322, + "learning_rate": 9.181206030150754e-06, + "loss": 0.057, + "step": 8650 + }, + { + "epoch": 4.37, + "grad_norm": 2.8490381240844727, + "learning_rate": 9.178693467336685e-06, + "loss": 0.0543, + "step": 8675 + }, + { + "epoch": 4.38, + "grad_norm": 2.2152018547058105, + "learning_rate": 9.176180904522614e-06, + "loss": 0.0563, + "step": 8700 + }, + { + "epoch": 4.4, + "grad_norm": 2.69919490814209, + "learning_rate": 9.173668341708543e-06, + "loss": 0.0553, + "step": 8725 + }, + { + "epoch": 4.41, + "grad_norm": 2.7225608825683594, + "learning_rate": 9.171155778894473e-06, + "loss": 0.0575, + "step": 8750 + }, + { + "epoch": 4.42, + "grad_norm": 2.559675455093384, + "learning_rate": 9.168643216080404e-06, + "loss": 0.0553, + "step": 8775 + }, + { + "epoch": 4.43, + "grad_norm": 2.781768798828125, + "learning_rate": 9.166130653266331e-06, + "loss": 0.0578, + "step": 8800 + }, + { + "epoch": 4.45, + "grad_norm": 2.8981781005859375, + "learning_rate": 9.163618090452262e-06, + "loss": 0.0542, + "step": 8825 + }, + { + "epoch": 4.46, + "grad_norm": 2.6946628093719482, + "learning_rate": 9.161105527638192e-06, + "loss": 0.059, + "step": 8850 + }, + { + "epoch": 4.47, + "grad_norm": 2.129403591156006, + "learning_rate": 9.158592964824121e-06, + "loss": 0.0509, + "step": 8875 + }, + { + "epoch": 4.48, + "grad_norm": 3.163231372833252, + "learning_rate": 9.156080402010052e-06, + "loss": 0.0549, + "step": 8900 + }, + { + "epoch": 4.5, + "grad_norm": 2.8405816555023193, + "learning_rate": 9.15356783919598e-06, + "loss": 0.0563, + "step": 8925 + }, + { + "epoch": 4.51, + "grad_norm": 2.1925594806671143, + "learning_rate": 9.15105527638191e-06, + "loss": 0.0553, + "step": 8950 + }, + { + "epoch": 4.52, + "grad_norm": 2.4375970363616943, + "learning_rate": 9.14854271356784e-06, + "loss": 0.0584, + "step": 8975 + }, + { + "epoch": 4.53, + "grad_norm": 2.4587666988372803, + "learning_rate": 9.14603015075377e-06, + "loss": 0.0553, + "step": 9000 + }, + { + "epoch": 4.53, + "eval_loss": 0.1637195497751236, + "eval_runtime": 651.0456, + "eval_samples_per_second": 2.164, + "eval_steps_per_second": 2.164, + "eval_wer": 24.344517468004153, + "step": 9000 + }, + { + "epoch": 4.55, + "grad_norm": 2.439188003540039, + "learning_rate": 9.1435175879397e-06, + "loss": 0.0542, + "step": 9025 + }, + { + "epoch": 4.56, + "grad_norm": NaN, + "learning_rate": 9.141105527638192e-06, + "loss": 0.0547, + "step": 9050 + }, + { + "epoch": 4.57, + "grad_norm": 2.4117727279663086, + "learning_rate": 9.138592964824121e-06, + "loss": 0.0526, + "step": 9075 + }, + { + "epoch": 4.58, + "grad_norm": 2.603896379470825, + "learning_rate": 9.136080402010052e-06, + "loss": 0.0527, + "step": 9100 + }, + { + "epoch": 4.6, + "grad_norm": 2.533517360687256, + "learning_rate": 9.13356783919598e-06, + "loss": 0.0566, + "step": 9125 + }, + { + "epoch": 4.61, + "grad_norm": 2.7762629985809326, + "learning_rate": 9.13105527638191e-06, + "loss": 0.0547, + "step": 9150 + }, + { + "epoch": 4.62, + "grad_norm": 2.8527615070343018, + "learning_rate": 9.12854271356784e-06, + "loss": 0.0558, + "step": 9175 + }, + { + "epoch": 4.63, + "grad_norm": 2.600090503692627, + "learning_rate": 9.12603015075377e-06, + "loss": 0.0503, + "step": 9200 + }, + { + "epoch": 4.65, + "grad_norm": 2.3100574016571045, + "learning_rate": 9.123618090452263e-06, + "loss": 0.0534, + "step": 9225 + }, + { + "epoch": 4.66, + "grad_norm": 2.8445324897766113, + "learning_rate": 9.121105527638192e-06, + "loss": 0.0538, + "step": 9250 + }, + { + "epoch": 4.67, + "grad_norm": 3.1814417839050293, + "learning_rate": 9.118592964824121e-06, + "loss": 0.0546, + "step": 9275 + }, + { + "epoch": 4.69, + "grad_norm": 2.835566759109497, + "learning_rate": 9.11608040201005e-06, + "loss": 0.0539, + "step": 9300 + }, + { + "epoch": 4.7, + "grad_norm": 2.6004786491394043, + "learning_rate": 9.11356783919598e-06, + "loss": 0.0497, + "step": 9325 + }, + { + "epoch": 4.71, + "grad_norm": 2.2775909900665283, + "learning_rate": 9.111055276381911e-06, + "loss": 0.053, + "step": 9350 + }, + { + "epoch": 4.72, + "grad_norm": 2.242342233657837, + "learning_rate": 9.10854271356784e-06, + "loss": 0.0492, + "step": 9375 + }, + { + "epoch": 4.74, + "grad_norm": 2.781096935272217, + "learning_rate": 9.10603015075377e-06, + "loss": 0.0559, + "step": 9400 + }, + { + "epoch": 4.75, + "grad_norm": 2.561607837677002, + "learning_rate": 9.1035175879397e-06, + "loss": 0.0537, + "step": 9425 + }, + { + "epoch": 4.76, + "grad_norm": 3.002260208129883, + "learning_rate": 9.101005025125628e-06, + "loss": 0.0564, + "step": 9450 + }, + { + "epoch": 4.77, + "grad_norm": 2.7912750244140625, + "learning_rate": 9.09849246231156e-06, + "loss": 0.0542, + "step": 9475 + }, + { + "epoch": 4.79, + "grad_norm": 2.549391508102417, + "learning_rate": 9.095979899497489e-06, + "loss": 0.0529, + "step": 9500 + }, + { + "epoch": 4.8, + "grad_norm": 2.707965612411499, + "learning_rate": 9.093467336683418e-06, + "loss": 0.0526, + "step": 9525 + }, + { + "epoch": 4.81, + "grad_norm": 2.5057213306427, + "learning_rate": 9.090954773869347e-06, + "loss": 0.0513, + "step": 9550 + }, + { + "epoch": 4.82, + "grad_norm": 3.1057217121124268, + "learning_rate": 9.088442211055277e-06, + "loss": 0.0515, + "step": 9575 + }, + { + "epoch": 4.84, + "grad_norm": 2.3811659812927246, + "learning_rate": 9.085929648241206e-06, + "loss": 0.0518, + "step": 9600 + }, + { + "epoch": 4.85, + "grad_norm": 2.412745714187622, + "learning_rate": 9.083417085427137e-06, + "loss": 0.053, + "step": 9625 + }, + { + "epoch": 4.86, + "grad_norm": 3.048144578933716, + "learning_rate": 9.080904522613066e-06, + "loss": 0.053, + "step": 9650 + }, + { + "epoch": 4.87, + "grad_norm": 2.3766226768493652, + "learning_rate": 9.078391959798996e-06, + "loss": 0.0526, + "step": 9675 + }, + { + "epoch": 4.89, + "grad_norm": 2.6831417083740234, + "learning_rate": 9.075879396984927e-06, + "loss": 0.0525, + "step": 9700 + }, + { + "epoch": 4.9, + "grad_norm": 2.7392430305480957, + "learning_rate": 9.073366834170854e-06, + "loss": 0.0548, + "step": 9725 + }, + { + "epoch": 4.91, + "grad_norm": 2.7497315406799316, + "learning_rate": 9.070854271356785e-06, + "loss": 0.0528, + "step": 9750 + }, + { + "epoch": 4.92, + "grad_norm": 2.9299416542053223, + "learning_rate": 9.068341708542715e-06, + "loss": 0.0542, + "step": 9775 + }, + { + "epoch": 4.94, + "grad_norm": 2.889383316040039, + "learning_rate": 9.065829145728644e-06, + "loss": 0.0527, + "step": 9800 + }, + { + "epoch": 4.95, + "grad_norm": 2.648606777191162, + "learning_rate": 9.063316582914573e-06, + "loss": 0.0513, + "step": 9825 + }, + { + "epoch": 4.96, + "grad_norm": 2.225612163543701, + "learning_rate": 9.060804020100502e-06, + "loss": 0.0526, + "step": 9850 + }, + { + "epoch": 4.97, + "grad_norm": 2.932143211364746, + "learning_rate": 9.058291457286433e-06, + "loss": 0.051, + "step": 9875 + }, + { + "epoch": 4.99, + "grad_norm": 2.607672691345215, + "learning_rate": 9.055778894472363e-06, + "loss": 0.053, + "step": 9900 + }, + { + "epoch": 5.0, + "grad_norm": 2.8472115993499756, + "learning_rate": 9.053266331658292e-06, + "loss": 0.0557, + "step": 9925 + }, + { + "epoch": 5.01, + "grad_norm": 2.171074151992798, + "learning_rate": 9.050753768844221e-06, + "loss": 0.0356, + "step": 9950 + }, + { + "epoch": 5.03, + "grad_norm": 1.950042486190796, + "learning_rate": 9.048241206030152e-06, + "loss": 0.0381, + "step": 9975 + }, + { + "epoch": 5.04, + "grad_norm": 1.7547545433044434, + "learning_rate": 9.04572864321608e-06, + "loss": 0.036, + "step": 10000 + }, + { + "epoch": 5.04, + "eval_loss": 0.16767631471157074, + "eval_runtime": 646.7265, + "eval_samples_per_second": 2.179, + "eval_steps_per_second": 2.179, + "eval_wer": 23.65963334486337, + "step": 10000 + }, + { + "epoch": 5.05, + "grad_norm": 1.8145438432693481, + "learning_rate": 9.043216080402011e-06, + "loss": 0.0339, + "step": 10025 + }, + { + "epoch": 5.06, + "grad_norm": 2.891932725906372, + "learning_rate": 9.04070351758794e-06, + "loss": 0.0351, + "step": 10050 + }, + { + "epoch": 5.08, + "grad_norm": 1.8098477125167847, + "learning_rate": 9.03819095477387e-06, + "loss": 0.0362, + "step": 10075 + }, + { + "epoch": 5.09, + "grad_norm": 2.386594772338867, + "learning_rate": 9.0356783919598e-06, + "loss": 0.0373, + "step": 10100 + }, + { + "epoch": 5.1, + "grad_norm": 2.028424024581909, + "learning_rate": 9.033165829145728e-06, + "loss": 0.0367, + "step": 10125 + }, + { + "epoch": 5.11, + "grad_norm": 2.1175694465637207, + "learning_rate": 9.03065326633166e-06, + "loss": 0.0386, + "step": 10150 + }, + { + "epoch": 5.13, + "grad_norm": 1.9647341966629028, + "learning_rate": 9.028140703517589e-06, + "loss": 0.0347, + "step": 10175 + }, + { + "epoch": 5.14, + "grad_norm": 2.252744197845459, + "learning_rate": 9.025628140703518e-06, + "loss": 0.0354, + "step": 10200 + }, + { + "epoch": 5.15, + "grad_norm": 2.485556125640869, + "learning_rate": 9.023115577889447e-06, + "loss": 0.0358, + "step": 10225 + }, + { + "epoch": 5.16, + "grad_norm": 2.0512161254882812, + "learning_rate": 9.020603015075378e-06, + "loss": 0.0365, + "step": 10250 + }, + { + "epoch": 5.18, + "grad_norm": 2.402486801147461, + "learning_rate": 9.018090452261308e-06, + "loss": 0.0337, + "step": 10275 + }, + { + "epoch": 5.19, + "grad_norm": 2.146170139312744, + "learning_rate": 9.015577889447237e-06, + "loss": 0.035, + "step": 10300 + }, + { + "epoch": 5.2, + "grad_norm": 2.5019354820251465, + "learning_rate": 9.013065326633166e-06, + "loss": 0.0378, + "step": 10325 + }, + { + "epoch": 5.21, + "grad_norm": 2.4359068870544434, + "learning_rate": 9.010552763819096e-06, + "loss": 0.0366, + "step": 10350 + }, + { + "epoch": 5.23, + "grad_norm": 2.1449923515319824, + "learning_rate": 9.008040201005027e-06, + "loss": 0.0364, + "step": 10375 + }, + { + "epoch": 5.24, + "grad_norm": 2.202234983444214, + "learning_rate": 9.005527638190954e-06, + "loss": 0.0378, + "step": 10400 + }, + { + "epoch": 5.25, + "grad_norm": 2.267660140991211, + "learning_rate": 9.003015075376885e-06, + "loss": 0.0358, + "step": 10425 + }, + { + "epoch": 5.26, + "grad_norm": 2.0055835247039795, + "learning_rate": 9.000502512562815e-06, + "loss": 0.0368, + "step": 10450 + }, + { + "epoch": 5.28, + "grad_norm": 1.9618968963623047, + "learning_rate": 8.997989949748744e-06, + "loss": 0.0351, + "step": 10475 + }, + { + "epoch": 5.29, + "grad_norm": 2.122114896774292, + "learning_rate": 8.995477386934675e-06, + "loss": 0.0356, + "step": 10500 + }, + { + "epoch": 5.3, + "grad_norm": 2.236201047897339, + "learning_rate": 8.992964824120604e-06, + "loss": 0.037, + "step": 10525 + }, + { + "epoch": 5.31, + "grad_norm": 2.286752939224243, + "learning_rate": 8.990452261306534e-06, + "loss": 0.0353, + "step": 10550 + }, + { + "epoch": 5.33, + "grad_norm": 2.0843496322631836, + "learning_rate": 8.987939698492463e-06, + "loss": 0.0343, + "step": 10575 + }, + { + "epoch": 5.34, + "grad_norm": 3.129362106323242, + "learning_rate": 8.985427135678392e-06, + "loss": 0.0376, + "step": 10600 + }, + { + "epoch": 5.35, + "grad_norm": 2.6233270168304443, + "learning_rate": 8.982914572864322e-06, + "loss": 0.0338, + "step": 10625 + }, + { + "epoch": 5.37, + "grad_norm": 2.1038076877593994, + "learning_rate": 8.980402010050253e-06, + "loss": 0.0367, + "step": 10650 + }, + { + "epoch": 5.38, + "grad_norm": 2.270951509475708, + "learning_rate": 8.977889447236182e-06, + "loss": 0.0371, + "step": 10675 + }, + { + "epoch": 5.39, + "grad_norm": 2.929248094558716, + "learning_rate": 8.975376884422111e-06, + "loss": 0.0375, + "step": 10700 + }, + { + "epoch": 5.4, + "grad_norm": 1.823087453842163, + "learning_rate": 8.97286432160804e-06, + "loss": 0.0357, + "step": 10725 + }, + { + "epoch": 5.42, + "grad_norm": 2.542175054550171, + "learning_rate": 8.97035175879397e-06, + "loss": 0.0355, + "step": 10750 + }, + { + "epoch": 5.43, + "grad_norm": 2.396777391433716, + "learning_rate": 8.967839195979901e-06, + "loss": 0.0369, + "step": 10775 + }, + { + "epoch": 5.44, + "grad_norm": 2.5692787170410156, + "learning_rate": 8.96532663316583e-06, + "loss": 0.0369, + "step": 10800 + }, + { + "epoch": 5.45, + "grad_norm": 2.5763325691223145, + "learning_rate": 8.96281407035176e-06, + "loss": 0.0356, + "step": 10825 + }, + { + "epoch": 5.47, + "grad_norm": 1.9143195152282715, + "learning_rate": 8.960301507537689e-06, + "loss": 0.0361, + "step": 10850 + }, + { + "epoch": 5.48, + "grad_norm": 2.463517904281616, + "learning_rate": 8.957788944723618e-06, + "loss": 0.0347, + "step": 10875 + }, + { + "epoch": 5.49, + "grad_norm": 2.0476324558258057, + "learning_rate": 8.95527638190955e-06, + "loss": 0.0389, + "step": 10900 + }, + { + "epoch": 5.5, + "grad_norm": 3.286231517791748, + "learning_rate": 8.952763819095479e-06, + "loss": 0.0353, + "step": 10925 + }, + { + "epoch": 5.52, + "grad_norm": 2.1984260082244873, + "learning_rate": 8.950251256281408e-06, + "loss": 0.0366, + "step": 10950 + }, + { + "epoch": 5.53, + "grad_norm": 2.0697944164276123, + "learning_rate": 8.947738693467337e-06, + "loss": 0.0356, + "step": 10975 + }, + { + "epoch": 5.54, + "grad_norm": 2.1701056957244873, + "learning_rate": 8.945226130653267e-06, + "loss": 0.0349, + "step": 11000 + }, + { + "epoch": 5.54, + "eval_loss": 0.17827929556369781, + "eval_runtime": 649.3211, + "eval_samples_per_second": 2.17, + "eval_steps_per_second": 2.17, + "eval_wer": 24.047042545831893, + "step": 11000 + }, + { + "epoch": 5.55, + "grad_norm": 2.3994946479797363, + "learning_rate": 8.942713567839196e-06, + "loss": 0.0345, + "step": 11025 + }, + { + "epoch": 5.57, + "grad_norm": 2.7759196758270264, + "learning_rate": 8.940201005025127e-06, + "loss": 0.0357, + "step": 11050 + }, + { + "epoch": 5.58, + "grad_norm": 2.57523775100708, + "learning_rate": 8.937688442211056e-06, + "loss": 0.0327, + "step": 11075 + }, + { + "epoch": 5.59, + "grad_norm": 2.1448755264282227, + "learning_rate": 8.935175879396986e-06, + "loss": 0.0372, + "step": 11100 + }, + { + "epoch": 5.6, + "grad_norm": 2.378547191619873, + "learning_rate": 8.932663316582915e-06, + "loss": 0.0357, + "step": 11125 + }, + { + "epoch": 5.62, + "grad_norm": 2.524625539779663, + "learning_rate": 8.930150753768844e-06, + "loss": 0.0366, + "step": 11150 + }, + { + "epoch": 5.63, + "grad_norm": 2.485322952270508, + "learning_rate": 8.927638190954775e-06, + "loss": 0.0347, + "step": 11175 + }, + { + "epoch": 5.64, + "grad_norm": 2.4604809284210205, + "learning_rate": 8.925125628140705e-06, + "loss": 0.0356, + "step": 11200 + }, + { + "epoch": 5.65, + "grad_norm": 2.805788516998291, + "learning_rate": 8.922613065326634e-06, + "loss": 0.0378, + "step": 11225 + }, + { + "epoch": 5.67, + "grad_norm": 2.620722770690918, + "learning_rate": 8.920100502512563e-06, + "loss": 0.0352, + "step": 11250 + }, + { + "epoch": 5.68, + "grad_norm": 2.9701807498931885, + "learning_rate": 8.917587939698493e-06, + "loss": 0.036, + "step": 11275 + }, + { + "epoch": 5.69, + "grad_norm": 2.5234711170196533, + "learning_rate": 8.915075376884424e-06, + "loss": 0.0392, + "step": 11300 + }, + { + "epoch": 5.71, + "grad_norm": 2.4073734283447266, + "learning_rate": 8.912562814070353e-06, + "loss": 0.0373, + "step": 11325 + }, + { + "epoch": 5.72, + "grad_norm": 2.699392318725586, + "learning_rate": 8.910050251256282e-06, + "loss": 0.0377, + "step": 11350 + }, + { + "epoch": 5.73, + "grad_norm": 2.1058201789855957, + "learning_rate": 8.907537688442212e-06, + "loss": 0.0353, + "step": 11375 + }, + { + "epoch": 5.74, + "grad_norm": 2.494295597076416, + "learning_rate": 8.905025125628143e-06, + "loss": 0.0365, + "step": 11400 + }, + { + "epoch": 5.76, + "grad_norm": 2.452155828475952, + "learning_rate": 8.90251256281407e-06, + "loss": 0.0356, + "step": 11425 + }, + { + "epoch": 5.77, + "grad_norm": 2.7919886112213135, + "learning_rate": 8.900000000000001e-06, + "loss": 0.035, + "step": 11450 + }, + { + "epoch": 5.78, + "grad_norm": 2.2973413467407227, + "learning_rate": 8.89748743718593e-06, + "loss": 0.034, + "step": 11475 + }, + { + "epoch": 5.79, + "grad_norm": 2.4735491275787354, + "learning_rate": 8.89497487437186e-06, + "loss": 0.0346, + "step": 11500 + }, + { + "epoch": 5.81, + "grad_norm": 2.2433793544769287, + "learning_rate": 8.892462311557791e-06, + "loss": 0.0357, + "step": 11525 + }, + { + "epoch": 5.82, + "grad_norm": 2.586491107940674, + "learning_rate": 8.889949748743718e-06, + "loss": 0.0373, + "step": 11550 + }, + { + "epoch": 5.83, + "grad_norm": 2.7574408054351807, + "learning_rate": 8.88743718592965e-06, + "loss": 0.0368, + "step": 11575 + }, + { + "epoch": 5.84, + "grad_norm": 2.4347455501556396, + "learning_rate": 8.884924623115579e-06, + "loss": 0.0377, + "step": 11600 + }, + { + "epoch": 5.86, + "grad_norm": 2.858201503753662, + "learning_rate": 8.882412060301508e-06, + "loss": 0.036, + "step": 11625 + }, + { + "epoch": 5.87, + "grad_norm": 2.416962146759033, + "learning_rate": 8.879899497487437e-06, + "loss": 0.0356, + "step": 11650 + }, + { + "epoch": 5.88, + "grad_norm": 2.5054562091827393, + "learning_rate": 8.877386934673368e-06, + "loss": 0.0356, + "step": 11675 + }, + { + "epoch": 5.89, + "grad_norm": 2.83569073677063, + "learning_rate": 8.874874371859296e-06, + "loss": 0.0354, + "step": 11700 + }, + { + "epoch": 5.91, + "grad_norm": 3.158905029296875, + "learning_rate": 8.872361809045227e-06, + "loss": 0.0348, + "step": 11725 + }, + { + "epoch": 5.92, + "grad_norm": 2.396923780441284, + "learning_rate": 8.869849246231156e-06, + "loss": 0.0339, + "step": 11750 + }, + { + "epoch": 5.93, + "grad_norm": 2.8449628353118896, + "learning_rate": 8.867336683417086e-06, + "loss": 0.037, + "step": 11775 + }, + { + "epoch": 5.94, + "grad_norm": 2.5769689083099365, + "learning_rate": 8.864824120603017e-06, + "loss": 0.0354, + "step": 11800 + }, + { + "epoch": 5.96, + "grad_norm": 2.1106691360473633, + "learning_rate": 8.862311557788944e-06, + "loss": 0.0343, + "step": 11825 + }, + { + "epoch": 5.97, + "grad_norm": 2.392437696456909, + "learning_rate": 8.859798994974875e-06, + "loss": 0.0353, + "step": 11850 + }, + { + "epoch": 5.98, + "grad_norm": 2.2878997325897217, + "learning_rate": 8.857286432160805e-06, + "loss": 0.0354, + "step": 11875 + }, + { + "epoch": 5.99, + "grad_norm": 2.6576852798461914, + "learning_rate": 8.854773869346734e-06, + "loss": 0.0363, + "step": 11900 + }, + { + "epoch": 6.01, + "grad_norm": 1.76813542842865, + "learning_rate": 8.852261306532665e-06, + "loss": 0.0284, + "step": 11925 + }, + { + "epoch": 6.02, + "grad_norm": 1.7315902709960938, + "learning_rate": 8.849748743718594e-06, + "loss": 0.0213, + "step": 11950 + }, + { + "epoch": 6.03, + "grad_norm": 2.2705891132354736, + "learning_rate": 8.847236180904524e-06, + "loss": 0.0219, + "step": 11975 + }, + { + "epoch": 6.05, + "grad_norm": 1.689342737197876, + "learning_rate": 8.844723618090453e-06, + "loss": 0.0231, + "step": 12000 + }, + { + "epoch": 6.05, + "eval_loss": 0.18743818998336792, + "eval_runtime": 649.1371, + "eval_samples_per_second": 2.171, + "eval_steps_per_second": 2.171, + "eval_wer": 23.784157730888968, + "step": 12000 + }, + { + "epoch": 6.06, + "grad_norm": 1.7385754585266113, + "learning_rate": 8.842211055276382e-06, + "loss": 0.0216, + "step": 12025 + }, + { + "epoch": 6.07, + "grad_norm": 1.7763196229934692, + "learning_rate": 8.839698492462312e-06, + "loss": 0.0225, + "step": 12050 + }, + { + "epoch": 6.08, + "grad_norm": 1.9993950128555298, + "learning_rate": 8.837185929648243e-06, + "loss": 0.0232, + "step": 12075 + }, + { + "epoch": 6.1, + "grad_norm": 1.7563095092773438, + "learning_rate": 8.83467336683417e-06, + "loss": 0.0224, + "step": 12100 + }, + { + "epoch": 6.11, + "grad_norm": 1.6103582382202148, + "learning_rate": 8.832160804020101e-06, + "loss": 0.0225, + "step": 12125 + }, + { + "epoch": 6.12, + "grad_norm": 1.9436490535736084, + "learning_rate": 8.82964824120603e-06, + "loss": 0.021, + "step": 12150 + }, + { + "epoch": 6.13, + "grad_norm": 2.0505597591400146, + "learning_rate": 8.82713567839196e-06, + "loss": 0.0221, + "step": 12175 + }, + { + "epoch": 6.15, + "grad_norm": 1.8634746074676514, + "learning_rate": 8.824623115577891e-06, + "loss": 0.0238, + "step": 12200 + }, + { + "epoch": 6.16, + "grad_norm": 2.337662696838379, + "learning_rate": 8.82211055276382e-06, + "loss": 0.0227, + "step": 12225 + }, + { + "epoch": 6.17, + "grad_norm": 2.1642353534698486, + "learning_rate": 8.81959798994975e-06, + "loss": 0.0224, + "step": 12250 + }, + { + "epoch": 6.18, + "grad_norm": 1.816988468170166, + "learning_rate": 8.817085427135679e-06, + "loss": 0.0238, + "step": 12275 + }, + { + "epoch": 6.2, + "grad_norm": 1.9968360662460327, + "learning_rate": 8.814572864321608e-06, + "loss": 0.0234, + "step": 12300 + }, + { + "epoch": 6.21, + "grad_norm": 2.105729579925537, + "learning_rate": 8.812060301507538e-06, + "loss": 0.0231, + "step": 12325 + }, + { + "epoch": 6.22, + "grad_norm": 2.2226688861846924, + "learning_rate": 8.809547738693469e-06, + "loss": 0.023, + "step": 12350 + }, + { + "epoch": 6.23, + "grad_norm": 1.8855944871902466, + "learning_rate": 8.807035175879398e-06, + "loss": 0.0215, + "step": 12375 + }, + { + "epoch": 6.25, + "grad_norm": 1.9994747638702393, + "learning_rate": 8.804522613065327e-06, + "loss": 0.0242, + "step": 12400 + }, + { + "epoch": 6.26, + "grad_norm": 1.9194012880325317, + "learning_rate": 8.802010050251257e-06, + "loss": 0.022, + "step": 12425 + }, + { + "epoch": 6.27, + "grad_norm": 2.565969944000244, + "learning_rate": 8.799497487437186e-06, + "loss": 0.0235, + "step": 12450 + }, + { + "epoch": 6.28, + "grad_norm": 2.2084851264953613, + "learning_rate": 8.796984924623117e-06, + "loss": 0.0223, + "step": 12475 + }, + { + "epoch": 6.3, + "grad_norm": 2.1252388954162598, + "learning_rate": 8.794472361809046e-06, + "loss": 0.0241, + "step": 12500 + }, + { + "epoch": 6.31, + "grad_norm": 2.299900531768799, + "learning_rate": 8.791959798994976e-06, + "loss": 0.0238, + "step": 12525 + }, + { + "epoch": 6.32, + "grad_norm": 2.038466215133667, + "learning_rate": 8.789447236180905e-06, + "loss": 0.0217, + "step": 12550 + }, + { + "epoch": 6.34, + "grad_norm": 2.136720657348633, + "learning_rate": 8.786934673366834e-06, + "loss": 0.0232, + "step": 12575 + }, + { + "epoch": 6.35, + "grad_norm": 2.5748748779296875, + "learning_rate": 8.784422110552765e-06, + "loss": 0.0238, + "step": 12600 + }, + { + "epoch": 6.36, + "grad_norm": 3.1436619758605957, + "learning_rate": 8.781909547738695e-06, + "loss": 0.025, + "step": 12625 + }, + { + "epoch": 6.37, + "grad_norm": 1.8555638790130615, + "learning_rate": 8.779396984924624e-06, + "loss": 0.0237, + "step": 12650 + }, + { + "epoch": 6.39, + "grad_norm": 1.8384400606155396, + "learning_rate": 8.776884422110553e-06, + "loss": 0.0239, + "step": 12675 + }, + { + "epoch": 6.4, + "grad_norm": 2.0759224891662598, + "learning_rate": 8.774371859296483e-06, + "loss": 0.0215, + "step": 12700 + }, + { + "epoch": 6.41, + "grad_norm": 2.09233021736145, + "learning_rate": 8.771859296482412e-06, + "loss": 0.0246, + "step": 12725 + }, + { + "epoch": 6.42, + "grad_norm": 2.437635660171509, + "learning_rate": 8.769346733668343e-06, + "loss": 0.0229, + "step": 12750 + }, + { + "epoch": 6.44, + "grad_norm": 2.3498973846435547, + "learning_rate": 8.766834170854272e-06, + "loss": 0.0237, + "step": 12775 + }, + { + "epoch": 6.45, + "grad_norm": 2.1013572216033936, + "learning_rate": 8.764321608040202e-06, + "loss": 0.0233, + "step": 12800 + }, + { + "epoch": 6.46, + "grad_norm": 2.2095558643341064, + "learning_rate": 8.761809045226131e-06, + "loss": 0.0236, + "step": 12825 + }, + { + "epoch": 6.47, + "grad_norm": 3.4572582244873047, + "learning_rate": 8.75929648241206e-06, + "loss": 0.0219, + "step": 12850 + }, + { + "epoch": 6.49, + "grad_norm": 2.6476633548736572, + "learning_rate": 8.756783919597991e-06, + "loss": 0.0231, + "step": 12875 + }, + { + "epoch": 6.5, + "grad_norm": 2.358466863632202, + "learning_rate": 8.75427135678392e-06, + "loss": 0.0227, + "step": 12900 + }, + { + "epoch": 6.51, + "grad_norm": 2.037827253341675, + "learning_rate": 8.75175879396985e-06, + "loss": 0.024, + "step": 12925 + }, + { + "epoch": 6.52, + "grad_norm": 2.3307857513427734, + "learning_rate": 8.74924623115578e-06, + "loss": 0.022, + "step": 12950 + }, + { + "epoch": 6.54, + "grad_norm": 2.1784884929656982, + "learning_rate": 8.746733668341709e-06, + "loss": 0.0218, + "step": 12975 + }, + { + "epoch": 6.55, + "grad_norm": 2.4882514476776123, + "learning_rate": 8.74422110552764e-06, + "loss": 0.023, + "step": 13000 + }, + { + "epoch": 6.55, + "eval_loss": 0.20282986760139465, + "eval_runtime": 651.7625, + "eval_samples_per_second": 2.162, + "eval_steps_per_second": 2.162, + "eval_wer": 24.57973019716361, + "step": 13000 + }, + { + "epoch": 6.56, + "grad_norm": 3.0175468921661377, + "learning_rate": 8.741809045226131e-06, + "loss": 0.0246, + "step": 13025 + }, + { + "epoch": 6.57, + "grad_norm": 2.1591644287109375, + "learning_rate": 8.73929648241206e-06, + "loss": 0.0238, + "step": 13050 + }, + { + "epoch": 6.59, + "grad_norm": 2.0232603549957275, + "learning_rate": 8.736783919597991e-06, + "loss": 0.0231, + "step": 13075 + }, + { + "epoch": 6.6, + "grad_norm": 1.9856449365615845, + "learning_rate": 8.734271356783919e-06, + "loss": 0.023, + "step": 13100 + }, + { + "epoch": 6.61, + "grad_norm": 1.9815651178359985, + "learning_rate": 8.73175879396985e-06, + "loss": 0.0237, + "step": 13125 + }, + { + "epoch": 6.62, + "grad_norm": 1.602400302886963, + "learning_rate": 8.72924623115578e-06, + "loss": 0.0255, + "step": 13150 + }, + { + "epoch": 6.64, + "grad_norm": 2.4619295597076416, + "learning_rate": 8.726733668341709e-06, + "loss": 0.0226, + "step": 13175 + }, + { + "epoch": 6.65, + "grad_norm": 2.190075397491455, + "learning_rate": 8.72422110552764e-06, + "loss": 0.0245, + "step": 13200 + }, + { + "epoch": 6.66, + "grad_norm": 1.8968470096588135, + "learning_rate": 8.721708542713569e-06, + "loss": 0.0221, + "step": 13225 + }, + { + "epoch": 6.68, + "grad_norm": 2.0752451419830322, + "learning_rate": 8.719195979899498e-06, + "loss": 0.0227, + "step": 13250 + }, + { + "epoch": 6.69, + "grad_norm": 1.8338621854782104, + "learning_rate": 8.716683417085428e-06, + "loss": 0.0231, + "step": 13275 + }, + { + "epoch": 6.7, + "grad_norm": 2.2742509841918945, + "learning_rate": 8.714170854271357e-06, + "loss": 0.0247, + "step": 13300 + }, + { + "epoch": 6.71, + "grad_norm": 2.536423921585083, + "learning_rate": 8.711658291457286e-06, + "loss": 0.0234, + "step": 13325 + }, + { + "epoch": 6.73, + "grad_norm": 1.7209787368774414, + "learning_rate": 8.709145728643217e-06, + "loss": 0.0237, + "step": 13350 + }, + { + "epoch": 6.74, + "grad_norm": 2.257042646408081, + "learning_rate": 8.706633165829147e-06, + "loss": 0.0229, + "step": 13375 + }, + { + "epoch": 6.75, + "grad_norm": 1.924156665802002, + "learning_rate": 8.704120603015076e-06, + "loss": 0.0212, + "step": 13400 + }, + { + "epoch": 6.76, + "grad_norm": 2.343059778213501, + "learning_rate": 8.701608040201005e-06, + "loss": 0.0236, + "step": 13425 + }, + { + "epoch": 6.78, + "grad_norm": 2.157313108444214, + "learning_rate": 8.699095477386935e-06, + "loss": 0.0214, + "step": 13450 + }, + { + "epoch": 6.79, + "grad_norm": 2.0653934478759766, + "learning_rate": 8.696582914572866e-06, + "loss": 0.024, + "step": 13475 + }, + { + "epoch": 6.8, + "grad_norm": 2.4266180992126465, + "learning_rate": 8.694070351758795e-06, + "loss": 0.0239, + "step": 13500 + }, + { + "epoch": 6.81, + "grad_norm": 2.243062973022461, + "learning_rate": 8.691557788944724e-06, + "loss": 0.0224, + "step": 13525 + }, + { + "epoch": 6.83, + "grad_norm": 2.309316873550415, + "learning_rate": 8.689045226130654e-06, + "loss": 0.0238, + "step": 13550 + }, + { + "epoch": 6.84, + "grad_norm": 1.8502180576324463, + "learning_rate": 8.686532663316583e-06, + "loss": 0.0214, + "step": 13575 + }, + { + "epoch": 6.85, + "grad_norm": 2.0743794441223145, + "learning_rate": 8.684020100502514e-06, + "loss": 0.0239, + "step": 13600 + }, + { + "epoch": 6.86, + "grad_norm": 2.225356101989746, + "learning_rate": 8.681507537688443e-06, + "loss": 0.0212, + "step": 13625 + }, + { + "epoch": 6.88, + "grad_norm": 2.3651390075683594, + "learning_rate": 8.678994974874373e-06, + "loss": 0.0246, + "step": 13650 + }, + { + "epoch": 6.89, + "grad_norm": 2.4614925384521484, + "learning_rate": 8.676482412060302e-06, + "loss": 0.0237, + "step": 13675 + }, + { + "epoch": 6.9, + "grad_norm": 2.4582881927490234, + "learning_rate": 8.673969849246231e-06, + "loss": 0.0232, + "step": 13700 + }, + { + "epoch": 6.91, + "grad_norm": 2.3366026878356934, + "learning_rate": 8.67145728643216e-06, + "loss": 0.0236, + "step": 13725 + }, + { + "epoch": 6.93, + "grad_norm": 2.092181444168091, + "learning_rate": 8.668944723618092e-06, + "loss": 0.0237, + "step": 13750 + }, + { + "epoch": 6.94, + "grad_norm": 2.092453956604004, + "learning_rate": 8.666432160804021e-06, + "loss": 0.022, + "step": 13775 + }, + { + "epoch": 6.95, + "grad_norm": 2.475292682647705, + "learning_rate": 8.66391959798995e-06, + "loss": 0.0229, + "step": 13800 + }, + { + "epoch": 6.96, + "grad_norm": 2.873953104019165, + "learning_rate": 8.661407035175881e-06, + "loss": 0.0231, + "step": 13825 + }, + { + "epoch": 6.98, + "grad_norm": 2.619523763656616, + "learning_rate": 8.658894472361809e-06, + "loss": 0.0252, + "step": 13850 + }, + { + "epoch": 6.99, + "grad_norm": 2.5735602378845215, + "learning_rate": 8.65638190954774e-06, + "loss": 0.0231, + "step": 13875 + }, + { + "epoch": 7.0, + "grad_norm": 1.6024060249328613, + "learning_rate": 8.65386934673367e-06, + "loss": 0.0213, + "step": 13900 + }, + { + "epoch": 7.02, + "grad_norm": 1.5747658014297485, + "learning_rate": 8.651356783919599e-06, + "loss": 0.0136, + "step": 13925 + }, + { + "epoch": 7.03, + "grad_norm": 1.8268576860427856, + "learning_rate": 8.648844221105528e-06, + "loss": 0.0146, + "step": 13950 + }, + { + "epoch": 7.04, + "grad_norm": 1.4051076173782349, + "learning_rate": 8.646331658291457e-06, + "loss": 0.0145, + "step": 13975 + }, + { + "epoch": 7.05, + "grad_norm": 1.6383821964263916, + "learning_rate": 8.643819095477388e-06, + "loss": 0.0145, + "step": 14000 + }, + { + "epoch": 7.05, + "eval_loss": 0.20823825895786285, + "eval_runtime": 650.647, + "eval_samples_per_second": 2.166, + "eval_steps_per_second": 2.166, + "eval_wer": 24.088550674507093, + "step": 14000 + }, + { + "epoch": 7.07, + "grad_norm": 1.911106824874878, + "learning_rate": 8.641306532663318e-06, + "loss": 0.014, + "step": 14025 + }, + { + "epoch": 7.08, + "grad_norm": 1.6214771270751953, + "learning_rate": 8.638793969849247e-06, + "loss": 0.0141, + "step": 14050 + }, + { + "epoch": 7.09, + "grad_norm": 1.3229256868362427, + "learning_rate": 8.636281407035176e-06, + "loss": 0.0147, + "step": 14075 + }, + { + "epoch": 7.1, + "grad_norm": 1.4375226497650146, + "learning_rate": 8.633768844221107e-06, + "loss": 0.0137, + "step": 14100 + }, + { + "epoch": 7.12, + "grad_norm": 1.7731012105941772, + "learning_rate": 8.631256281407035e-06, + "loss": 0.0138, + "step": 14125 + }, + { + "epoch": 7.13, + "grad_norm": 1.825411319732666, + "learning_rate": 8.628743718592966e-06, + "loss": 0.0141, + "step": 14150 + }, + { + "epoch": 7.14, + "grad_norm": 2.0631155967712402, + "learning_rate": 8.626231155778895e-06, + "loss": 0.0128, + "step": 14175 + }, + { + "epoch": 7.15, + "grad_norm": 1.8785593509674072, + "learning_rate": 8.623718592964825e-06, + "loss": 0.0135, + "step": 14200 + }, + { + "epoch": 7.17, + "grad_norm": 1.9748233556747437, + "learning_rate": 8.621206030150756e-06, + "loss": 0.0135, + "step": 14225 + }, + { + "epoch": 7.18, + "grad_norm": 1.8081029653549194, + "learning_rate": 8.618693467336683e-06, + "loss": 0.0148, + "step": 14250 + }, + { + "epoch": 7.19, + "grad_norm": 1.8967790603637695, + "learning_rate": 8.616180904522614e-06, + "loss": 0.0152, + "step": 14275 + }, + { + "epoch": 7.2, + "grad_norm": 1.9241995811462402, + "learning_rate": 8.613668341708544e-06, + "loss": 0.0144, + "step": 14300 + }, + { + "epoch": 7.22, + "grad_norm": 1.5693718194961548, + "learning_rate": 8.611155778894473e-06, + "loss": 0.015, + "step": 14325 + }, + { + "epoch": 7.23, + "grad_norm": 1.7336968183517456, + "learning_rate": 8.608643216080402e-06, + "loss": 0.0146, + "step": 14350 + }, + { + "epoch": 7.24, + "grad_norm": 1.6103991270065308, + "learning_rate": 8.606130653266333e-06, + "loss": 0.0143, + "step": 14375 + }, + { + "epoch": 7.25, + "grad_norm": 2.3941450119018555, + "learning_rate": 8.60361809045226e-06, + "loss": 0.0159, + "step": 14400 + }, + { + "epoch": 7.27, + "grad_norm": 1.8727612495422363, + "learning_rate": 8.601105527638192e-06, + "loss": 0.0136, + "step": 14425 + }, + { + "epoch": 7.28, + "grad_norm": 1.4570417404174805, + "learning_rate": 8.598592964824121e-06, + "loss": 0.0149, + "step": 14450 + }, + { + "epoch": 7.29, + "grad_norm": 2.069019317626953, + "learning_rate": 8.59608040201005e-06, + "loss": 0.0139, + "step": 14475 + }, + { + "epoch": 7.3, + "grad_norm": 2.3598926067352295, + "learning_rate": 8.593567839195981e-06, + "loss": 0.0148, + "step": 14500 + }, + { + "epoch": 7.32, + "grad_norm": 1.342768907546997, + "learning_rate": 8.591055276381909e-06, + "loss": 0.0139, + "step": 14525 + }, + { + "epoch": 7.33, + "grad_norm": 1.2774055004119873, + "learning_rate": 8.58854271356784e-06, + "loss": 0.0153, + "step": 14550 + }, + { + "epoch": 7.34, + "grad_norm": 1.7923460006713867, + "learning_rate": 8.58603015075377e-06, + "loss": 0.0137, + "step": 14575 + }, + { + "epoch": 7.36, + "grad_norm": 1.9054781198501587, + "learning_rate": 8.583618090452261e-06, + "loss": 0.015, + "step": 14600 + }, + { + "epoch": 7.37, + "grad_norm": 1.9677022695541382, + "learning_rate": 8.581105527638192e-06, + "loss": 0.0151, + "step": 14625 + }, + { + "epoch": 7.38, + "grad_norm": 1.8171250820159912, + "learning_rate": 8.578592964824121e-06, + "loss": 0.0145, + "step": 14650 + }, + { + "epoch": 7.39, + "grad_norm": 1.4638991355895996, + "learning_rate": 8.57608040201005e-06, + "loss": 0.0138, + "step": 14675 + }, + { + "epoch": 7.41, + "grad_norm": 2.3554527759552, + "learning_rate": 8.573567839195982e-06, + "loss": 0.0148, + "step": 14700 + }, + { + "epoch": 7.42, + "grad_norm": 1.273007869720459, + "learning_rate": 8.57105527638191e-06, + "loss": 0.0145, + "step": 14725 + }, + { + "epoch": 7.43, + "grad_norm": 2.289254665374756, + "learning_rate": 8.56854271356784e-06, + "loss": 0.015, + "step": 14750 + }, + { + "epoch": 7.44, + "grad_norm": 1.6287271976470947, + "learning_rate": 8.56603015075377e-06, + "loss": 0.0153, + "step": 14775 + }, + { + "epoch": 7.46, + "grad_norm": 2.6135382652282715, + "learning_rate": 8.563517587939699e-06, + "loss": 0.0151, + "step": 14800 + }, + { + "epoch": 7.47, + "grad_norm": 3.0830838680267334, + "learning_rate": 8.56100502512563e-06, + "loss": 0.0161, + "step": 14825 + }, + { + "epoch": 7.48, + "grad_norm": 2.3788559436798096, + "learning_rate": 8.558492462311558e-06, + "loss": 0.0149, + "step": 14850 + }, + { + "epoch": 7.49, + "grad_norm": 1.2800523042678833, + "learning_rate": 8.555979899497489e-06, + "loss": 0.0146, + "step": 14875 + }, + { + "epoch": 7.51, + "grad_norm": 2.244983434677124, + "learning_rate": 8.553467336683418e-06, + "loss": 0.0151, + "step": 14900 + }, + { + "epoch": 7.52, + "grad_norm": 2.2439663410186768, + "learning_rate": 8.550954773869347e-06, + "loss": 0.0154, + "step": 14925 + }, + { + "epoch": 7.53, + "grad_norm": 1.619199514389038, + "learning_rate": 8.548442211055277e-06, + "loss": 0.0149, + "step": 14950 + }, + { + "epoch": 7.54, + "grad_norm": 2.012608289718628, + "learning_rate": 8.545929648241208e-06, + "loss": 0.0161, + "step": 14975 + }, + { + "epoch": 7.56, + "grad_norm": 1.9494653940200806, + "learning_rate": 8.543417085427135e-06, + "loss": 0.0151, + "step": 15000 + }, + { + "epoch": 7.56, + "eval_loss": 0.21707138419151306, + "eval_runtime": 647.0406, + "eval_samples_per_second": 2.178, + "eval_steps_per_second": 2.178, + "eval_wer": 24.047042545831893, + "step": 15000 + }, + { + "epoch": 7.57, + "grad_norm": 1.4061529636383057, + "learning_rate": 8.540904522613066e-06, + "loss": 0.0143, + "step": 15025 + }, + { + "epoch": 7.58, + "grad_norm": 1.8309212923049927, + "learning_rate": 8.538391959798996e-06, + "loss": 0.0149, + "step": 15050 + }, + { + "epoch": 7.59, + "grad_norm": 2.8870351314544678, + "learning_rate": 8.535879396984925e-06, + "loss": 0.0148, + "step": 15075 + }, + { + "epoch": 7.61, + "grad_norm": 2.2940802574157715, + "learning_rate": 8.533366834170856e-06, + "loss": 0.015, + "step": 15100 + }, + { + "epoch": 7.62, + "grad_norm": 2.183642625808716, + "learning_rate": 8.530854271356784e-06, + "loss": 0.0144, + "step": 15125 + }, + { + "epoch": 7.63, + "grad_norm": 1.9402830600738525, + "learning_rate": 8.528341708542715e-06, + "loss": 0.0147, + "step": 15150 + }, + { + "epoch": 7.64, + "grad_norm": 2.082012414932251, + "learning_rate": 8.525829145728644e-06, + "loss": 0.0161, + "step": 15175 + }, + { + "epoch": 7.66, + "grad_norm": 1.9440219402313232, + "learning_rate": 8.523316582914573e-06, + "loss": 0.0143, + "step": 15200 + }, + { + "epoch": 7.67, + "grad_norm": 1.9995568990707397, + "learning_rate": 8.520804020100503e-06, + "loss": 0.0157, + "step": 15225 + }, + { + "epoch": 7.68, + "grad_norm": 2.117058515548706, + "learning_rate": 8.518291457286434e-06, + "loss": 0.0145, + "step": 15250 + }, + { + "epoch": 7.7, + "grad_norm": 1.6028704643249512, + "learning_rate": 8.515778894472363e-06, + "loss": 0.0146, + "step": 15275 + }, + { + "epoch": 7.71, + "grad_norm": 2.337704658508301, + "learning_rate": 8.513266331658292e-06, + "loss": 0.0161, + "step": 15300 + }, + { + "epoch": 7.72, + "grad_norm": 2.1473734378814697, + "learning_rate": 8.510753768844222e-06, + "loss": 0.015, + "step": 15325 + }, + { + "epoch": 7.73, + "grad_norm": 2.084581136703491, + "learning_rate": 8.508241206030151e-06, + "loss": 0.0169, + "step": 15350 + }, + { + "epoch": 7.75, + "grad_norm": 1.7122375965118408, + "learning_rate": 8.505728643216082e-06, + "loss": 0.0158, + "step": 15375 + }, + { + "epoch": 7.76, + "grad_norm": 1.8660895824432373, + "learning_rate": 8.50321608040201e-06, + "loss": 0.0159, + "step": 15400 + }, + { + "epoch": 7.77, + "grad_norm": 1.6958028078079224, + "learning_rate": 8.50070351758794e-06, + "loss": 0.0157, + "step": 15425 + }, + { + "epoch": 7.78, + "grad_norm": 1.7803666591644287, + "learning_rate": 8.49819095477387e-06, + "loss": 0.016, + "step": 15450 + }, + { + "epoch": 7.8, + "grad_norm": 2.085075855255127, + "learning_rate": 8.4956783919598e-06, + "loss": 0.0153, + "step": 15475 + }, + { + "epoch": 7.81, + "grad_norm": 1.9727402925491333, + "learning_rate": 8.49316582914573e-06, + "loss": 0.0142, + "step": 15500 + }, + { + "epoch": 7.82, + "grad_norm": 2.111431837081909, + "learning_rate": 8.49065326633166e-06, + "loss": 0.0151, + "step": 15525 + }, + { + "epoch": 7.83, + "grad_norm": 1.8781590461730957, + "learning_rate": 8.488140703517589e-06, + "loss": 0.0149, + "step": 15550 + }, + { + "epoch": 7.85, + "grad_norm": 1.771287441253662, + "learning_rate": 8.485628140703518e-06, + "loss": 0.0147, + "step": 15575 + }, + { + "epoch": 7.86, + "grad_norm": 2.1958467960357666, + "learning_rate": 8.483115577889447e-06, + "loss": 0.0157, + "step": 15600 + }, + { + "epoch": 7.87, + "grad_norm": 2.4910318851470947, + "learning_rate": 8.480603015075377e-06, + "loss": 0.0154, + "step": 15625 + }, + { + "epoch": 7.88, + "grad_norm": 2.2781054973602295, + "learning_rate": 8.478090452261308e-06, + "loss": 0.0155, + "step": 15650 + }, + { + "epoch": 7.9, + "grad_norm": 2.349332332611084, + "learning_rate": 8.475577889447237e-06, + "loss": 0.015, + "step": 15675 + }, + { + "epoch": 7.91, + "grad_norm": 1.707576870918274, + "learning_rate": 8.473065326633166e-06, + "loss": 0.0159, + "step": 15700 + }, + { + "epoch": 7.92, + "grad_norm": 2.0872557163238525, + "learning_rate": 8.470552763819096e-06, + "loss": 0.0158, + "step": 15725 + }, + { + "epoch": 7.93, + "grad_norm": 2.1867074966430664, + "learning_rate": 8.468040201005025e-06, + "loss": 0.0157, + "step": 15750 + }, + { + "epoch": 7.95, + "grad_norm": 1.594016671180725, + "learning_rate": 8.465527638190956e-06, + "loss": 0.0153, + "step": 15775 + }, + { + "epoch": 7.96, + "grad_norm": 2.078763246536255, + "learning_rate": 8.463015075376885e-06, + "loss": 0.0155, + "step": 15800 + }, + { + "epoch": 7.97, + "grad_norm": 1.8778270483016968, + "learning_rate": 8.460502512562815e-06, + "loss": 0.0161, + "step": 15825 + }, + { + "epoch": 7.98, + "grad_norm": 1.7115347385406494, + "learning_rate": 8.457989949748744e-06, + "loss": 0.0155, + "step": 15850 + }, + { + "epoch": 8.0, + "grad_norm": 1.6807602643966675, + "learning_rate": 8.455477386934673e-06, + "loss": 0.0144, + "step": 15875 + }, + { + "epoch": 8.01, + "grad_norm": 1.332097053527832, + "learning_rate": 8.452964824120604e-06, + "loss": 0.0097, + "step": 15900 + }, + { + "epoch": 8.02, + "grad_norm": 1.1560015678405762, + "learning_rate": 8.450452261306534e-06, + "loss": 0.0079, + "step": 15925 + }, + { + "epoch": 8.04, + "grad_norm": 1.238347053527832, + "learning_rate": 8.447939698492463e-06, + "loss": 0.0095, + "step": 15950 + }, + { + "epoch": 8.05, + "grad_norm": 1.5310404300689697, + "learning_rate": 8.445427135678392e-06, + "loss": 0.0092, + "step": 15975 + }, + { + "epoch": 8.06, + "grad_norm": 1.309462547302246, + "learning_rate": 8.442914572864322e-06, + "loss": 0.0086, + "step": 16000 + }, + { + "epoch": 8.06, + "eval_loss": 0.22892026603221893, + "eval_runtime": 642.6728, + "eval_samples_per_second": 2.192, + "eval_steps_per_second": 2.192, + "eval_wer": 24.524386025596677, + "step": 16000 + }, + { + "epoch": 8.07, + "grad_norm": 2.2282941341400146, + "learning_rate": 8.440402010050251e-06, + "loss": 0.0084, + "step": 16025 + }, + { + "epoch": 8.09, + "grad_norm": 1.7921063899993896, + "learning_rate": 8.437889447236182e-06, + "loss": 0.0096, + "step": 16050 + }, + { + "epoch": 8.1, + "grad_norm": 1.3115910291671753, + "learning_rate": 8.435376884422111e-06, + "loss": 0.0088, + "step": 16075 + }, + { + "epoch": 8.11, + "grad_norm": 1.0947102308273315, + "learning_rate": 8.43286432160804e-06, + "loss": 0.0087, + "step": 16100 + }, + { + "epoch": 8.12, + "grad_norm": 1.5556052923202515, + "learning_rate": 8.430351758793972e-06, + "loss": 0.0096, + "step": 16125 + }, + { + "epoch": 8.14, + "grad_norm": 1.2743710279464722, + "learning_rate": 8.4278391959799e-06, + "loss": 0.0085, + "step": 16150 + }, + { + "epoch": 8.15, + "grad_norm": 1.6002846956253052, + "learning_rate": 8.42532663316583e-06, + "loss": 0.0084, + "step": 16175 + }, + { + "epoch": 8.16, + "grad_norm": 1.713494896888733, + "learning_rate": 8.42281407035176e-06, + "loss": 0.008, + "step": 16200 + }, + { + "epoch": 8.17, + "grad_norm": 1.761599063873291, + "learning_rate": 8.420301507537689e-06, + "loss": 0.0095, + "step": 16225 + }, + { + "epoch": 8.19, + "grad_norm": 2.072874069213867, + "learning_rate": 8.417788944723618e-06, + "loss": 0.01, + "step": 16250 + }, + { + "epoch": 8.2, + "grad_norm": 1.140694499015808, + "learning_rate": 8.415276381909548e-06, + "loss": 0.0098, + "step": 16275 + }, + { + "epoch": 8.21, + "grad_norm": 1.6233490705490112, + "learning_rate": 8.412763819095479e-06, + "loss": 0.0099, + "step": 16300 + }, + { + "epoch": 8.22, + "grad_norm": 1.6382660865783691, + "learning_rate": 8.410251256281408e-06, + "loss": 0.0093, + "step": 16325 + }, + { + "epoch": 8.24, + "grad_norm": 1.4330692291259766, + "learning_rate": 8.407738693467337e-06, + "loss": 0.0093, + "step": 16350 + }, + { + "epoch": 8.25, + "grad_norm": 1.7515724897384644, + "learning_rate": 8.405226130653267e-06, + "loss": 0.0087, + "step": 16375 + }, + { + "epoch": 8.26, + "grad_norm": 2.1178219318389893, + "learning_rate": 8.402713567839198e-06, + "loss": 0.0097, + "step": 16400 + }, + { + "epoch": 8.27, + "grad_norm": 2.0358147621154785, + "learning_rate": 8.400201005025125e-06, + "loss": 0.0099, + "step": 16425 + }, + { + "epoch": 8.29, + "grad_norm": 1.4251277446746826, + "learning_rate": 8.397688442211056e-06, + "loss": 0.0097, + "step": 16450 + }, + { + "epoch": 8.3, + "grad_norm": 1.8948729038238525, + "learning_rate": 8.395175879396986e-06, + "loss": 0.01, + "step": 16475 + }, + { + "epoch": 8.31, + "grad_norm": 2.1116690635681152, + "learning_rate": 8.392663316582915e-06, + "loss": 0.0097, + "step": 16500 + }, + { + "epoch": 8.32, + "grad_norm": 2.2370381355285645, + "learning_rate": 8.390150753768846e-06, + "loss": 0.0096, + "step": 16525 + }, + { + "epoch": 8.34, + "grad_norm": 1.6887876987457275, + "learning_rate": 8.387638190954774e-06, + "loss": 0.0097, + "step": 16550 + }, + { + "epoch": 8.35, + "grad_norm": 1.1379011869430542, + "learning_rate": 8.385125628140705e-06, + "loss": 0.0099, + "step": 16575 + }, + { + "epoch": 8.36, + "grad_norm": 1.4453860521316528, + "learning_rate": 8.382613065326634e-06, + "loss": 0.0103, + "step": 16600 + }, + { + "epoch": 8.38, + "grad_norm": 1.388378620147705, + "learning_rate": 8.380100502512563e-06, + "loss": 0.0106, + "step": 16625 + }, + { + "epoch": 8.39, + "grad_norm": 2.015660285949707, + "learning_rate": 8.377587939698493e-06, + "loss": 0.0098, + "step": 16650 + }, + { + "epoch": 8.4, + "grad_norm": 1.5107704401016235, + "learning_rate": 8.375075376884424e-06, + "loss": 0.0101, + "step": 16675 + }, + { + "epoch": 8.41, + "grad_norm": 1.5902012586593628, + "learning_rate": 8.372562814070353e-06, + "loss": 0.01, + "step": 16700 + }, + { + "epoch": 8.43, + "grad_norm": 1.8684821128845215, + "learning_rate": 8.370050251256282e-06, + "loss": 0.0102, + "step": 16725 + }, + { + "epoch": 8.44, + "grad_norm": 1.8811595439910889, + "learning_rate": 8.367537688442212e-06, + "loss": 0.0097, + "step": 16750 + }, + { + "epoch": 8.45, + "grad_norm": 2.1212289333343506, + "learning_rate": 8.365025125628141e-06, + "loss": 0.0109, + "step": 16775 + }, + { + "epoch": 8.46, + "grad_norm": 1.8294142484664917, + "learning_rate": 8.362512562814072e-06, + "loss": 0.0105, + "step": 16800 + }, + { + "epoch": 8.48, + "grad_norm": 1.940006971359253, + "learning_rate": 8.36e-06, + "loss": 0.0097, + "step": 16825 + }, + { + "epoch": 8.49, + "grad_norm": 1.9175909757614136, + "learning_rate": 8.35748743718593e-06, + "loss": 0.0111, + "step": 16850 + }, + { + "epoch": 8.5, + "grad_norm": 1.57523775100708, + "learning_rate": 8.35497487437186e-06, + "loss": 0.0095, + "step": 16875 + }, + { + "epoch": 8.51, + "grad_norm": 1.825614094734192, + "learning_rate": 8.35246231155779e-06, + "loss": 0.0106, + "step": 16900 + }, + { + "epoch": 8.53, + "grad_norm": 2.1004061698913574, + "learning_rate": 8.34994974874372e-06, + "loss": 0.0114, + "step": 16925 + }, + { + "epoch": 8.54, + "grad_norm": 2.540674924850464, + "learning_rate": 8.34743718592965e-06, + "loss": 0.0099, + "step": 16950 + }, + { + "epoch": 8.55, + "grad_norm": 1.962494134902954, + "learning_rate": 8.344924623115579e-06, + "loss": 0.0102, + "step": 16975 + }, + { + "epoch": 8.56, + "grad_norm": 1.7412070035934448, + "learning_rate": 8.342412060301508e-06, + "loss": 0.0093, + "step": 17000 + }, + { + "epoch": 8.56, + "eval_loss": 0.238552525639534, + "eval_runtime": 647.4251, + "eval_samples_per_second": 2.176, + "eval_steps_per_second": 2.176, + "eval_wer": 24.828778969214802, + "step": 17000 + }, + { + "epoch": 8.58, + "grad_norm": 2.0897905826568604, + "learning_rate": 8.339899497487438e-06, + "loss": 0.0105, + "step": 17025 + }, + { + "epoch": 8.59, + "grad_norm": 2.1660778522491455, + "learning_rate": 8.337386934673367e-06, + "loss": 0.0103, + "step": 17050 + }, + { + "epoch": 8.6, + "grad_norm": 1.5732314586639404, + "learning_rate": 8.334874371859298e-06, + "loss": 0.0089, + "step": 17075 + }, + { + "epoch": 8.61, + "grad_norm": 1.8174026012420654, + "learning_rate": 8.332361809045226e-06, + "loss": 0.0115, + "step": 17100 + }, + { + "epoch": 8.63, + "grad_norm": 2.1241679191589355, + "learning_rate": 8.329849246231157e-06, + "loss": 0.0098, + "step": 17125 + }, + { + "epoch": 8.64, + "grad_norm": 2.3191757202148438, + "learning_rate": 8.327336683417086e-06, + "loss": 0.01, + "step": 17150 + }, + { + "epoch": 8.65, + "grad_norm": 1.6752322912216187, + "learning_rate": 8.324824120603015e-06, + "loss": 0.01, + "step": 17175 + }, + { + "epoch": 8.66, + "grad_norm": 2.2477939128875732, + "learning_rate": 8.322311557788946e-06, + "loss": 0.0093, + "step": 17200 + }, + { + "epoch": 8.68, + "grad_norm": 1.754935622215271, + "learning_rate": 8.319798994974876e-06, + "loss": 0.0104, + "step": 17225 + }, + { + "epoch": 8.69, + "grad_norm": 1.7442086935043335, + "learning_rate": 8.317286432160805e-06, + "loss": 0.0096, + "step": 17250 + }, + { + "epoch": 8.7, + "grad_norm": 1.5157816410064697, + "learning_rate": 8.314773869346734e-06, + "loss": 0.0099, + "step": 17275 + }, + { + "epoch": 8.72, + "grad_norm": 1.630161166191101, + "learning_rate": 8.312261306532663e-06, + "loss": 0.0095, + "step": 17300 + }, + { + "epoch": 8.73, + "grad_norm": 1.7793052196502686, + "learning_rate": 8.309748743718595e-06, + "loss": 0.0118, + "step": 17325 + }, + { + "epoch": 8.74, + "grad_norm": 2.110165596008301, + "learning_rate": 8.307236180904524e-06, + "loss": 0.0099, + "step": 17350 + }, + { + "epoch": 8.75, + "grad_norm": 2.5670361518859863, + "learning_rate": 8.304723618090453e-06, + "loss": 0.0111, + "step": 17375 + }, + { + "epoch": 8.77, + "grad_norm": 1.9518669843673706, + "learning_rate": 8.302211055276382e-06, + "loss": 0.0107, + "step": 17400 + }, + { + "epoch": 8.78, + "grad_norm": 1.6892119646072388, + "learning_rate": 8.299698492462312e-06, + "loss": 0.0107, + "step": 17425 + }, + { + "epoch": 8.79, + "grad_norm": 1.523242473602295, + "learning_rate": 8.297185929648241e-06, + "loss": 0.0108, + "step": 17450 + }, + { + "epoch": 8.8, + "grad_norm": 1.384037733078003, + "learning_rate": 8.294673366834172e-06, + "loss": 0.0109, + "step": 17475 + }, + { + "epoch": 8.82, + "grad_norm": 1.202498435974121, + "learning_rate": 8.292160804020101e-06, + "loss": 0.0107, + "step": 17500 + }, + { + "epoch": 8.83, + "grad_norm": 1.504539966583252, + "learning_rate": 8.28964824120603e-06, + "loss": 0.0105, + "step": 17525 + }, + { + "epoch": 8.84, + "grad_norm": 1.6238044500350952, + "learning_rate": 8.287135678391962e-06, + "loss": 0.0102, + "step": 17550 + }, + { + "epoch": 8.85, + "grad_norm": 2.15169358253479, + "learning_rate": 8.28462311557789e-06, + "loss": 0.0096, + "step": 17575 + }, + { + "epoch": 8.87, + "grad_norm": 1.6884123086929321, + "learning_rate": 8.28211055276382e-06, + "loss": 0.011, + "step": 17600 + }, + { + "epoch": 8.88, + "grad_norm": 1.7390291690826416, + "learning_rate": 8.27959798994975e-06, + "loss": 0.0097, + "step": 17625 + }, + { + "epoch": 8.89, + "grad_norm": 2.6695148944854736, + "learning_rate": 8.277085427135679e-06, + "loss": 0.0103, + "step": 17650 + }, + { + "epoch": 8.9, + "grad_norm": 1.2569199800491333, + "learning_rate": 8.274572864321608e-06, + "loss": 0.0101, + "step": 17675 + }, + { + "epoch": 8.92, + "grad_norm": 1.746862769126892, + "learning_rate": 8.272060301507538e-06, + "loss": 0.0114, + "step": 17700 + }, + { + "epoch": 8.93, + "grad_norm": 1.8386530876159668, + "learning_rate": 8.269547738693467e-06, + "loss": 0.0097, + "step": 17725 + }, + { + "epoch": 8.94, + "grad_norm": 1.7652249336242676, + "learning_rate": 8.267035175879398e-06, + "loss": 0.0094, + "step": 17750 + }, + { + "epoch": 8.95, + "grad_norm": 1.5770463943481445, + "learning_rate": 8.264522613065327e-06, + "loss": 0.0106, + "step": 17775 + }, + { + "epoch": 8.97, + "grad_norm": 2.018402338027954, + "learning_rate": 8.262010050251257e-06, + "loss": 0.0104, + "step": 17800 + }, + { + "epoch": 8.98, + "grad_norm": 1.1194132566452026, + "learning_rate": 8.259497487437188e-06, + "loss": 0.0119, + "step": 17825 + }, + { + "epoch": 8.99, + "grad_norm": 1.8884706497192383, + "learning_rate": 8.256984924623115e-06, + "loss": 0.0115, + "step": 17850 + }, + { + "epoch": 9.01, + "grad_norm": 1.9271539449691772, + "learning_rate": 8.254472361809046e-06, + "loss": 0.0096, + "step": 17875 + }, + { + "epoch": 9.02, + "grad_norm": 1.3741806745529175, + "learning_rate": 8.251959798994976e-06, + "loss": 0.0058, + "step": 17900 + }, + { + "epoch": 9.03, + "grad_norm": 1.7070800065994263, + "learning_rate": 8.249447236180905e-06, + "loss": 0.0056, + "step": 17925 + }, + { + "epoch": 9.04, + "grad_norm": 1.0155062675476074, + "learning_rate": 8.246934673366836e-06, + "loss": 0.0057, + "step": 17950 + }, + { + "epoch": 9.06, + "grad_norm": 1.206286907196045, + "learning_rate": 8.244422110552764e-06, + "loss": 0.0059, + "step": 17975 + }, + { + "epoch": 9.07, + "grad_norm": 1.463138461112976, + "learning_rate": 8.241909547738695e-06, + "loss": 0.0064, + "step": 18000 + }, + { + "epoch": 9.07, + "eval_loss": 0.2465205192565918, + "eval_runtime": 640.5205, + "eval_samples_per_second": 2.2, + "eval_steps_per_second": 2.2, + "eval_wer": 23.770321687997235, + "step": 18000 + }, + { + "epoch": 9.08, + "grad_norm": 1.703255295753479, + "learning_rate": 8.239396984924624e-06, + "loss": 0.0073, + "step": 18025 + }, + { + "epoch": 9.09, + "grad_norm": 2.1183061599731445, + "learning_rate": 8.236884422110553e-06, + "loss": 0.0068, + "step": 18050 + }, + { + "epoch": 9.11, + "grad_norm": 1.6237412691116333, + "learning_rate": 8.234371859296483e-06, + "loss": 0.0068, + "step": 18075 + }, + { + "epoch": 9.12, + "grad_norm": 1.5408103466033936, + "learning_rate": 8.231859296482414e-06, + "loss": 0.0066, + "step": 18100 + }, + { + "epoch": 9.13, + "grad_norm": 1.5907872915267944, + "learning_rate": 8.229346733668341e-06, + "loss": 0.0062, + "step": 18125 + }, + { + "epoch": 9.14, + "grad_norm": 1.9507079124450684, + "learning_rate": 8.226834170854272e-06, + "loss": 0.0059, + "step": 18150 + }, + { + "epoch": 9.16, + "grad_norm": 1.63676118850708, + "learning_rate": 8.224321608040202e-06, + "loss": 0.0061, + "step": 18175 + }, + { + "epoch": 9.17, + "grad_norm": 1.1874332427978516, + "learning_rate": 8.221809045226131e-06, + "loss": 0.0068, + "step": 18200 + }, + { + "epoch": 9.18, + "grad_norm": 1.4674066305160522, + "learning_rate": 8.219296482412062e-06, + "loss": 0.0069, + "step": 18225 + }, + { + "epoch": 9.19, + "grad_norm": 1.2431071996688843, + "learning_rate": 8.21678391959799e-06, + "loss": 0.0069, + "step": 18250 + }, + { + "epoch": 9.21, + "grad_norm": 1.8287932872772217, + "learning_rate": 8.21427135678392e-06, + "loss": 0.0072, + "step": 18275 + }, + { + "epoch": 9.22, + "grad_norm": 1.375955581665039, + "learning_rate": 8.21175879396985e-06, + "loss": 0.0064, + "step": 18300 + }, + { + "epoch": 9.23, + "grad_norm": 1.492646336555481, + "learning_rate": 8.20924623115578e-06, + "loss": 0.0065, + "step": 18325 + }, + { + "epoch": 9.24, + "grad_norm": 2.215989828109741, + "learning_rate": 8.206733668341709e-06, + "loss": 0.006, + "step": 18350 + }, + { + "epoch": 9.26, + "grad_norm": 1.6772810220718384, + "learning_rate": 8.20422110552764e-06, + "loss": 0.0069, + "step": 18375 + }, + { + "epoch": 9.27, + "grad_norm": 1.0609341859817505, + "learning_rate": 8.201708542713569e-06, + "loss": 0.0067, + "step": 18400 + }, + { + "epoch": 9.28, + "grad_norm": 1.8097535371780396, + "learning_rate": 8.199195979899498e-06, + "loss": 0.0072, + "step": 18425 + }, + { + "epoch": 9.29, + "grad_norm": 1.3503798246383667, + "learning_rate": 8.196683417085428e-06, + "loss": 0.0066, + "step": 18450 + }, + { + "epoch": 9.31, + "grad_norm": 1.5325782299041748, + "learning_rate": 8.194170854271357e-06, + "loss": 0.0069, + "step": 18475 + }, + { + "epoch": 9.32, + "grad_norm": 1.3074427843093872, + "learning_rate": 8.191658291457288e-06, + "loss": 0.0069, + "step": 18500 + }, + { + "epoch": 9.33, + "grad_norm": 1.6791765689849854, + "learning_rate": 8.189145728643216e-06, + "loss": 0.008, + "step": 18525 + }, + { + "epoch": 9.35, + "grad_norm": 1.9651422500610352, + "learning_rate": 8.186633165829147e-06, + "loss": 0.0073, + "step": 18550 + }, + { + "epoch": 9.36, + "grad_norm": 1.8639694452285767, + "learning_rate": 8.184120603015076e-06, + "loss": 0.0071, + "step": 18575 + }, + { + "epoch": 9.37, + "grad_norm": 1.2917574644088745, + "learning_rate": 8.181608040201005e-06, + "loss": 0.0065, + "step": 18600 + }, + { + "epoch": 9.38, + "grad_norm": 1.3170853853225708, + "learning_rate": 8.179095477386936e-06, + "loss": 0.0063, + "step": 18625 + }, + { + "epoch": 9.4, + "grad_norm": 2.0459671020507812, + "learning_rate": 8.176582914572866e-06, + "loss": 0.0064, + "step": 18650 + }, + { + "epoch": 9.41, + "grad_norm": 2.978271245956421, + "learning_rate": 8.174070351758795e-06, + "loss": 0.007, + "step": 18675 + }, + { + "epoch": 9.42, + "grad_norm": 1.3300830125808716, + "learning_rate": 8.171557788944724e-06, + "loss": 0.0067, + "step": 18700 + }, + { + "epoch": 9.43, + "grad_norm": 2.000188112258911, + "learning_rate": 8.169045226130654e-06, + "loss": 0.0072, + "step": 18725 + }, + { + "epoch": 9.45, + "grad_norm": 2.4457991123199463, + "learning_rate": 8.166532663316583e-06, + "loss": 0.0078, + "step": 18750 + }, + { + "epoch": 9.46, + "grad_norm": 0.9397627711296082, + "learning_rate": 8.164020100502514e-06, + "loss": 0.0071, + "step": 18775 + }, + { + "epoch": 9.47, + "grad_norm": 2.0449130535125732, + "learning_rate": 8.161507537688443e-06, + "loss": 0.0067, + "step": 18800 + }, + { + "epoch": 9.48, + "grad_norm": 1.4966685771942139, + "learning_rate": 8.158994974874373e-06, + "loss": 0.0074, + "step": 18825 + }, + { + "epoch": 9.5, + "grad_norm": 1.2508461475372314, + "learning_rate": 8.156482412060302e-06, + "loss": 0.0061, + "step": 18850 + }, + { + "epoch": 9.51, + "grad_norm": 1.4124109745025635, + "learning_rate": 8.154070351758795e-06, + "loss": 0.0071, + "step": 18875 + }, + { + "epoch": 9.52, + "grad_norm": 1.9999688863754272, + "learning_rate": 8.151557788944724e-06, + "loss": 0.0075, + "step": 18900 + }, + { + "epoch": 9.53, + "grad_norm": 2.0439536571502686, + "learning_rate": 8.149045226130654e-06, + "loss": 0.0067, + "step": 18925 + }, + { + "epoch": 9.55, + "grad_norm": 2.8810691833496094, + "learning_rate": 8.146532663316583e-06, + "loss": 0.0069, + "step": 18950 + }, + { + "epoch": 9.56, + "grad_norm": 1.9366052150726318, + "learning_rate": 8.144020100502512e-06, + "loss": 0.0076, + "step": 18975 + }, + { + "epoch": 9.57, + "grad_norm": 1.7127236127853394, + "learning_rate": 8.141507537688443e-06, + "loss": 0.0071, + "step": 19000 + }, + { + "epoch": 9.57, + "eval_loss": 0.2544167637825012, + "eval_runtime": 643.4016, + "eval_samples_per_second": 2.19, + "eval_steps_per_second": 2.19, + "eval_wer": 24.081632653061224, + "step": 19000 + }, + { + "epoch": 9.58, + "grad_norm": 2.526472806930542, + "learning_rate": 8.138994974874373e-06, + "loss": 0.0084, + "step": 19025 + }, + { + "epoch": 9.6, + "grad_norm": 1.3105931282043457, + "learning_rate": 8.136482412060302e-06, + "loss": 0.0071, + "step": 19050 + }, + { + "epoch": 9.61, + "grad_norm": 1.9968011379241943, + "learning_rate": 8.133969849246231e-06, + "loss": 0.0075, + "step": 19075 + }, + { + "epoch": 9.62, + "grad_norm": 1.2742115259170532, + "learning_rate": 8.131457286432162e-06, + "loss": 0.0074, + "step": 19100 + }, + { + "epoch": 9.63, + "grad_norm": 1.062193512916565, + "learning_rate": 8.12894472361809e-06, + "loss": 0.0078, + "step": 19125 + }, + { + "epoch": 9.65, + "grad_norm": 1.0094131231307983, + "learning_rate": 8.126432160804021e-06, + "loss": 0.0069, + "step": 19150 + }, + { + "epoch": 9.66, + "grad_norm": 1.675285816192627, + "learning_rate": 8.12391959798995e-06, + "loss": 0.0082, + "step": 19175 + }, + { + "epoch": 9.67, + "grad_norm": 1.96036696434021, + "learning_rate": 8.12140703517588e-06, + "loss": 0.0071, + "step": 19200 + }, + { + "epoch": 9.69, + "grad_norm": 1.8763169050216675, + "learning_rate": 8.11889447236181e-06, + "loss": 0.0075, + "step": 19225 + }, + { + "epoch": 9.7, + "grad_norm": 2.053239107131958, + "learning_rate": 8.11638190954774e-06, + "loss": 0.008, + "step": 19250 + }, + { + "epoch": 9.71, + "grad_norm": 1.5086833238601685, + "learning_rate": 8.11386934673367e-06, + "loss": 0.0072, + "step": 19275 + }, + { + "epoch": 9.72, + "grad_norm": 1.4810270071029663, + "learning_rate": 8.111356783919599e-06, + "loss": 0.0079, + "step": 19300 + }, + { + "epoch": 9.74, + "grad_norm": 1.87246835231781, + "learning_rate": 8.108844221105528e-06, + "loss": 0.0082, + "step": 19325 + }, + { + "epoch": 9.75, + "grad_norm": 2.0673673152923584, + "learning_rate": 8.106331658291457e-06, + "loss": 0.0075, + "step": 19350 + }, + { + "epoch": 9.76, + "grad_norm": 1.2569661140441895, + "learning_rate": 8.103819095477388e-06, + "loss": 0.0067, + "step": 19375 + }, + { + "epoch": 9.77, + "grad_norm": 1.6111642122268677, + "learning_rate": 8.101306532663318e-06, + "loss": 0.0075, + "step": 19400 + }, + { + "epoch": 9.79, + "grad_norm": 2.3493645191192627, + "learning_rate": 8.098793969849247e-06, + "loss": 0.0077, + "step": 19425 + }, + { + "epoch": 9.8, + "grad_norm": 1.9058961868286133, + "learning_rate": 8.096281407035176e-06, + "loss": 0.0074, + "step": 19450 + }, + { + "epoch": 9.81, + "grad_norm": 1.802046537399292, + "learning_rate": 8.093768844221106e-06, + "loss": 0.0073, + "step": 19475 + }, + { + "epoch": 9.82, + "grad_norm": 1.5245673656463623, + "learning_rate": 8.091256281407037e-06, + "loss": 0.0067, + "step": 19500 + }, + { + "epoch": 9.84, + "grad_norm": 1.501242756843567, + "learning_rate": 8.088743718592966e-06, + "loss": 0.0069, + "step": 19525 + }, + { + "epoch": 9.85, + "grad_norm": 1.430893898010254, + "learning_rate": 8.086231155778895e-06, + "loss": 0.0083, + "step": 19550 + }, + { + "epoch": 9.86, + "grad_norm": 1.6655915975570679, + "learning_rate": 8.083718592964825e-06, + "loss": 0.0067, + "step": 19575 + }, + { + "epoch": 9.87, + "grad_norm": 1.6842190027236938, + "learning_rate": 8.081206030150754e-06, + "loss": 0.0068, + "step": 19600 + }, + { + "epoch": 9.89, + "grad_norm": 2.076958179473877, + "learning_rate": 8.078693467336685e-06, + "loss": 0.0073, + "step": 19625 + }, + { + "epoch": 9.9, + "grad_norm": 2.023754119873047, + "learning_rate": 8.076180904522614e-06, + "loss": 0.0081, + "step": 19650 + }, + { + "epoch": 9.91, + "grad_norm": 1.3943392038345337, + "learning_rate": 8.073668341708544e-06, + "loss": 0.0078, + "step": 19675 + }, + { + "epoch": 9.92, + "grad_norm": 2.7814877033233643, + "learning_rate": 8.071155778894473e-06, + "loss": 0.0079, + "step": 19700 + }, + { + "epoch": 9.94, + "grad_norm": 1.6121410131454468, + "learning_rate": 8.068643216080402e-06, + "loss": 0.0077, + "step": 19725 + }, + { + "epoch": 9.95, + "grad_norm": 1.514812707901001, + "learning_rate": 8.066130653266332e-06, + "loss": 0.0068, + "step": 19750 + }, + { + "epoch": 9.96, + "grad_norm": 1.439288854598999, + "learning_rate": 8.063618090452263e-06, + "loss": 0.0076, + "step": 19775 + }, + { + "epoch": 9.97, + "grad_norm": 0.9317819476127625, + "learning_rate": 8.061105527638192e-06, + "loss": 0.0069, + "step": 19800 + }, + { + "epoch": 9.99, + "grad_norm": 1.976050615310669, + "learning_rate": 8.058592964824121e-06, + "loss": 0.0078, + "step": 19825 + }, + { + "epoch": 10.0, + "grad_norm": 1.0201984643936157, + "learning_rate": 8.05608040201005e-06, + "loss": 0.0072, + "step": 19850 + }, + { + "epoch": 10.01, + "grad_norm": 0.900181233882904, + "learning_rate": 8.05356783919598e-06, + "loss": 0.0047, + "step": 19875 + }, + { + "epoch": 10.03, + "grad_norm": 0.8394168019294739, + "learning_rate": 8.051055276381911e-06, + "loss": 0.0049, + "step": 19900 + }, + { + "epoch": 10.04, + "grad_norm": 0.7329122424125671, + "learning_rate": 8.04854271356784e-06, + "loss": 0.0042, + "step": 19925 + }, + { + "epoch": 10.05, + "grad_norm": 1.6395137310028076, + "learning_rate": 8.04603015075377e-06, + "loss": 0.0045, + "step": 19950 + }, + { + "epoch": 10.06, + "grad_norm": 1.4006826877593994, + "learning_rate": 8.043517587939699e-06, + "loss": 0.0045, + "step": 19975 + }, + { + "epoch": 10.08, + "grad_norm": 1.6509326696395874, + "learning_rate": 8.041005025125628e-06, + "loss": 0.0045, + "step": 20000 + }, + { + "epoch": 10.08, + "eval_loss": 0.25544053316116333, + "eval_runtime": 780.2624, + "eval_samples_per_second": 1.806, + "eval_steps_per_second": 1.806, + "eval_wer": 23.528190937391905, + "step": 20000 + }, + { + "epoch": 10.09, + "grad_norm": 1.6562331914901733, + "learning_rate": 8.03849246231156e-06, + "loss": 0.0046, + "step": 20025 + }, + { + "epoch": 10.1, + "grad_norm": 1.7131171226501465, + "learning_rate": 8.035979899497489e-06, + "loss": 0.0051, + "step": 20050 + }, + { + "epoch": 10.11, + "grad_norm": 1.3539303541183472, + "learning_rate": 8.033467336683418e-06, + "loss": 0.0041, + "step": 20075 + }, + { + "epoch": 10.13, + "grad_norm": 1.5204474925994873, + "learning_rate": 8.030954773869347e-06, + "loss": 0.005, + "step": 20100 + }, + { + "epoch": 10.14, + "grad_norm": 1.5538609027862549, + "learning_rate": 8.028442211055277e-06, + "loss": 0.005, + "step": 20125 + }, + { + "epoch": 10.15, + "grad_norm": 1.281728744506836, + "learning_rate": 8.025929648241206e-06, + "loss": 0.0043, + "step": 20150 + }, + { + "epoch": 10.16, + "grad_norm": 2.3059980869293213, + "learning_rate": 8.023417085427137e-06, + "loss": 0.0046, + "step": 20175 + }, + { + "epoch": 10.18, + "grad_norm": 0.8961646556854248, + "learning_rate": 8.020904522613066e-06, + "loss": 0.0048, + "step": 20200 + }, + { + "epoch": 10.19, + "grad_norm": 1.1319265365600586, + "learning_rate": 8.018391959798996e-06, + "loss": 0.0043, + "step": 20225 + }, + { + "epoch": 10.2, + "grad_norm": 1.8678749799728394, + "learning_rate": 8.015879396984927e-06, + "loss": 0.0052, + "step": 20250 + }, + { + "epoch": 10.21, + "grad_norm": 1.574100375175476, + "learning_rate": 8.013366834170854e-06, + "loss": 0.0048, + "step": 20275 + }, + { + "epoch": 10.23, + "grad_norm": 1.676328182220459, + "learning_rate": 8.010854271356785e-06, + "loss": 0.0049, + "step": 20300 + }, + { + "epoch": 10.24, + "grad_norm": 1.3840720653533936, + "learning_rate": 8.008341708542714e-06, + "loss": 0.005, + "step": 20325 + }, + { + "epoch": 10.25, + "grad_norm": 1.3118586540222168, + "learning_rate": 8.005829145728644e-06, + "loss": 0.0054, + "step": 20350 + }, + { + "epoch": 10.26, + "grad_norm": 1.8612523078918457, + "learning_rate": 8.003316582914573e-06, + "loss": 0.0049, + "step": 20375 + }, + { + "epoch": 10.28, + "grad_norm": 2.0608303546905518, + "learning_rate": 8.000804020100502e-06, + "loss": 0.0053, + "step": 20400 + }, + { + "epoch": 10.29, + "grad_norm": 1.2124433517456055, + "learning_rate": 7.998291457286432e-06, + "loss": 0.0053, + "step": 20425 + }, + { + "epoch": 10.3, + "grad_norm": 1.3948249816894531, + "learning_rate": 7.995778894472363e-06, + "loss": 0.0047, + "step": 20450 + }, + { + "epoch": 10.31, + "grad_norm": 1.4954756498336792, + "learning_rate": 7.993266331658292e-06, + "loss": 0.0051, + "step": 20475 + }, + { + "epoch": 10.33, + "grad_norm": 1.2343194484710693, + "learning_rate": 7.990753768844221e-06, + "loss": 0.0056, + "step": 20500 + }, + { + "epoch": 10.34, + "grad_norm": 2.3053460121154785, + "learning_rate": 7.988241206030152e-06, + "loss": 0.0048, + "step": 20525 + }, + { + "epoch": 10.35, + "grad_norm": 1.0803661346435547, + "learning_rate": 7.98572864321608e-06, + "loss": 0.0051, + "step": 20550 + }, + { + "epoch": 10.37, + "grad_norm": 1.7742104530334473, + "learning_rate": 7.983216080402011e-06, + "loss": 0.0055, + "step": 20575 + }, + { + "epoch": 10.38, + "grad_norm": 1.462449073791504, + "learning_rate": 7.98070351758794e-06, + "loss": 0.0049, + "step": 20600 + }, + { + "epoch": 10.39, + "grad_norm": 1.1726417541503906, + "learning_rate": 7.97819095477387e-06, + "loss": 0.0054, + "step": 20625 + }, + { + "epoch": 10.4, + "grad_norm": 1.3419547080993652, + "learning_rate": 7.975678391959799e-06, + "loss": 0.0054, + "step": 20650 + }, + { + "epoch": 10.42, + "grad_norm": 1.3675485849380493, + "learning_rate": 7.973165829145728e-06, + "loss": 0.0052, + "step": 20675 + }, + { + "epoch": 10.43, + "grad_norm": 2.0673506259918213, + "learning_rate": 7.97065326633166e-06, + "loss": 0.0057, + "step": 20700 + }, + { + "epoch": 10.44, + "grad_norm": 1.158771276473999, + "learning_rate": 7.968140703517589e-06, + "loss": 0.0052, + "step": 20725 + }, + { + "epoch": 10.45, + "grad_norm": 0.8775469660758972, + "learning_rate": 7.965628140703518e-06, + "loss": 0.0051, + "step": 20750 + }, + { + "epoch": 10.47, + "grad_norm": 2.4425106048583984, + "learning_rate": 7.963115577889447e-06, + "loss": 0.0056, + "step": 20775 + }, + { + "epoch": 10.48, + "grad_norm": 1.8277249336242676, + "learning_rate": 7.960603015075378e-06, + "loss": 0.0055, + "step": 20800 + }, + { + "epoch": 10.49, + "grad_norm": 1.406618595123291, + "learning_rate": 7.958090452261306e-06, + "loss": 0.0057, + "step": 20825 + }, + { + "epoch": 10.5, + "grad_norm": 1.2207611799240112, + "learning_rate": 7.955577889447237e-06, + "loss": 0.0055, + "step": 20850 + }, + { + "epoch": 10.52, + "grad_norm": 1.2478914260864258, + "learning_rate": 7.953065326633166e-06, + "loss": 0.0051, + "step": 20875 + }, + { + "epoch": 10.53, + "grad_norm": 1.7296686172485352, + "learning_rate": 7.950552763819096e-06, + "loss": 0.0055, + "step": 20900 + }, + { + "epoch": 10.54, + "grad_norm": 1.705731987953186, + "learning_rate": 7.948040201005027e-06, + "loss": 0.005, + "step": 20925 + }, + { + "epoch": 10.55, + "grad_norm": 0.8736703991889954, + "learning_rate": 7.945527638190954e-06, + "loss": 0.0057, + "step": 20950 + }, + { + "epoch": 10.57, + "grad_norm": 1.4896725416183472, + "learning_rate": 7.943015075376885e-06, + "loss": 0.0052, + "step": 20975 + }, + { + "epoch": 10.58, + "grad_norm": 1.935691237449646, + "learning_rate": 7.940502512562815e-06, + "loss": 0.0055, + "step": 21000 + }, + { + "epoch": 10.58, + "eval_loss": 0.2658803462982178, + "eval_runtime": 641.6657, + "eval_samples_per_second": 2.196, + "eval_steps_per_second": 2.196, + "eval_wer": 24.116222760290558, + "step": 21000 + }, + { + "epoch": 10.59, + "grad_norm": 1.7740029096603394, + "learning_rate": 7.937989949748744e-06, + "loss": 0.0049, + "step": 21025 + }, + { + "epoch": 10.6, + "grad_norm": 1.8998734951019287, + "learning_rate": 7.935477386934673e-06, + "loss": 0.0065, + "step": 21050 + }, + { + "epoch": 10.62, + "grad_norm": 1.3813358545303345, + "learning_rate": 7.932964824120604e-06, + "loss": 0.006, + "step": 21075 + }, + { + "epoch": 10.63, + "grad_norm": 1.8003945350646973, + "learning_rate": 7.930452261306534e-06, + "loss": 0.0055, + "step": 21100 + }, + { + "epoch": 10.64, + "grad_norm": 1.7029547691345215, + "learning_rate": 7.927939698492463e-06, + "loss": 0.0044, + "step": 21125 + }, + { + "epoch": 10.65, + "grad_norm": 2.208817720413208, + "learning_rate": 7.925427135678392e-06, + "loss": 0.0049, + "step": 21150 + }, + { + "epoch": 10.67, + "grad_norm": 1.768268346786499, + "learning_rate": 7.922914572864322e-06, + "loss": 0.0057, + "step": 21175 + }, + { + "epoch": 10.68, + "grad_norm": 1.4503201246261597, + "learning_rate": 7.920402010050253e-06, + "loss": 0.0054, + "step": 21200 + }, + { + "epoch": 10.69, + "grad_norm": 1.497975468635559, + "learning_rate": 7.91788944723618e-06, + "loss": 0.0058, + "step": 21225 + }, + { + "epoch": 10.71, + "grad_norm": 1.7116060256958008, + "learning_rate": 7.915477386934674e-06, + "loss": 0.0051, + "step": 21250 + }, + { + "epoch": 10.72, + "grad_norm": 2.685547113418579, + "learning_rate": 7.912964824120603e-06, + "loss": 0.0062, + "step": 21275 + }, + { + "epoch": 10.73, + "grad_norm": 1.4006340503692627, + "learning_rate": 7.910452261306534e-06, + "loss": 0.0052, + "step": 21300 + }, + { + "epoch": 10.74, + "grad_norm": 1.3967355489730835, + "learning_rate": 7.907939698492463e-06, + "loss": 0.0053, + "step": 21325 + }, + { + "epoch": 10.76, + "grad_norm": 2.3471438884735107, + "learning_rate": 7.905427135678393e-06, + "loss": 0.0064, + "step": 21350 + }, + { + "epoch": 10.77, + "grad_norm": 1.9880576133728027, + "learning_rate": 7.902914572864322e-06, + "loss": 0.0049, + "step": 21375 + }, + { + "epoch": 10.78, + "grad_norm": 1.6665552854537964, + "learning_rate": 7.900402010050253e-06, + "loss": 0.0055, + "step": 21400 + }, + { + "epoch": 10.79, + "grad_norm": 1.3990721702575684, + "learning_rate": 7.89788944723618e-06, + "loss": 0.0061, + "step": 21425 + }, + { + "epoch": 10.81, + "grad_norm": 1.7920498847961426, + "learning_rate": 7.895376884422111e-06, + "loss": 0.0053, + "step": 21450 + }, + { + "epoch": 10.82, + "grad_norm": 2.0328104496002197, + "learning_rate": 7.89286432160804e-06, + "loss": 0.0055, + "step": 21475 + }, + { + "epoch": 10.83, + "grad_norm": 1.8484914302825928, + "learning_rate": 7.89035175879397e-06, + "loss": 0.0057, + "step": 21500 + }, + { + "epoch": 10.84, + "grad_norm": 1.1959257125854492, + "learning_rate": 7.887839195979901e-06, + "loss": 0.0053, + "step": 21525 + }, + { + "epoch": 10.86, + "grad_norm": 1.821405291557312, + "learning_rate": 7.885326633165829e-06, + "loss": 0.005, + "step": 21550 + }, + { + "epoch": 10.87, + "grad_norm": 1.9156001806259155, + "learning_rate": 7.88281407035176e-06, + "loss": 0.0053, + "step": 21575 + }, + { + "epoch": 10.88, + "grad_norm": 2.608036518096924, + "learning_rate": 7.880301507537689e-06, + "loss": 0.0058, + "step": 21600 + }, + { + "epoch": 10.89, + "grad_norm": 1.9512995481491089, + "learning_rate": 7.877788944723618e-06, + "loss": 0.0055, + "step": 21625 + }, + { + "epoch": 10.91, + "grad_norm": 2.440284252166748, + "learning_rate": 7.875276381909548e-06, + "loss": 0.0056, + "step": 21650 + }, + { + "epoch": 10.92, + "grad_norm": 1.5289582014083862, + "learning_rate": 7.872763819095479e-06, + "loss": 0.0063, + "step": 21675 + }, + { + "epoch": 10.93, + "grad_norm": 1.6737768650054932, + "learning_rate": 7.870251256281408e-06, + "loss": 0.0051, + "step": 21700 + }, + { + "epoch": 10.94, + "grad_norm": 1.3897589445114136, + "learning_rate": 7.867738693467337e-06, + "loss": 0.0057, + "step": 21725 + }, + { + "epoch": 10.96, + "grad_norm": 2.573732852935791, + "learning_rate": 7.865226130653267e-06, + "loss": 0.0063, + "step": 21750 + }, + { + "epoch": 10.97, + "grad_norm": 1.5241488218307495, + "learning_rate": 7.862713567839196e-06, + "loss": 0.006, + "step": 21775 + }, + { + "epoch": 10.98, + "grad_norm": 1.3380444049835205, + "learning_rate": 7.860201005025127e-06, + "loss": 0.0049, + "step": 21800 + }, + { + "epoch": 10.99, + "grad_norm": 1.0626407861709595, + "learning_rate": 7.857688442211055e-06, + "loss": 0.0052, + "step": 21825 + }, + { + "epoch": 11.01, + "grad_norm": 1.4017066955566406, + "learning_rate": 7.855175879396986e-06, + "loss": 0.0037, + "step": 21850 + }, + { + "epoch": 11.02, + "grad_norm": 0.9792363047599792, + "learning_rate": 7.852663316582915e-06, + "loss": 0.0035, + "step": 21875 + }, + { + "epoch": 11.03, + "grad_norm": 0.9123956561088562, + "learning_rate": 7.850150753768844e-06, + "loss": 0.0037, + "step": 21900 + }, + { + "epoch": 11.05, + "grad_norm": 1.5364477634429932, + "learning_rate": 7.847638190954775e-06, + "loss": 0.0041, + "step": 21925 + }, + { + "epoch": 11.06, + "grad_norm": 0.9540588855743408, + "learning_rate": 7.845125628140705e-06, + "loss": 0.0042, + "step": 21950 + }, + { + "epoch": 11.07, + "grad_norm": 1.256990909576416, + "learning_rate": 7.842613065326634e-06, + "loss": 0.0037, + "step": 21975 + }, + { + "epoch": 11.08, + "grad_norm": 1.1927049160003662, + "learning_rate": 7.840100502512563e-06, + "loss": 0.0034, + "step": 22000 + }, + { + "epoch": 11.08, + "eval_loss": 0.27246958017349243, + "eval_runtime": 638.6614, + "eval_samples_per_second": 2.206, + "eval_steps_per_second": 2.206, + "eval_wer": 24.24766516776202, + "step": 22000 + }, + { + "epoch": 11.1, + "grad_norm": 0.5147941708564758, + "learning_rate": 7.837587939698493e-06, + "loss": 0.0033, + "step": 22025 + }, + { + "epoch": 11.11, + "grad_norm": 0.9543977379798889, + "learning_rate": 7.835075376884422e-06, + "loss": 0.0038, + "step": 22050 + }, + { + "epoch": 11.12, + "grad_norm": 2.2430100440979004, + "learning_rate": 7.832562814070353e-06, + "loss": 0.0041, + "step": 22075 + }, + { + "epoch": 11.13, + "grad_norm": 2.2202541828155518, + "learning_rate": 7.830050251256282e-06, + "loss": 0.0041, + "step": 22100 + }, + { + "epoch": 11.15, + "grad_norm": 1.715667486190796, + "learning_rate": 7.827537688442212e-06, + "loss": 0.0039, + "step": 22125 + }, + { + "epoch": 11.16, + "grad_norm": 1.9119547605514526, + "learning_rate": 7.825025125628141e-06, + "loss": 0.0041, + "step": 22150 + }, + { + "epoch": 11.17, + "grad_norm": 1.6011847257614136, + "learning_rate": 7.82251256281407e-06, + "loss": 0.0042, + "step": 22175 + }, + { + "epoch": 11.18, + "grad_norm": 1.1330533027648926, + "learning_rate": 7.820000000000001e-06, + "loss": 0.0037, + "step": 22200 + }, + { + "epoch": 11.2, + "grad_norm": 1.442896842956543, + "learning_rate": 7.81748743718593e-06, + "loss": 0.004, + "step": 22225 + }, + { + "epoch": 11.21, + "grad_norm": 1.1626893281936646, + "learning_rate": 7.81497487437186e-06, + "loss": 0.0041, + "step": 22250 + }, + { + "epoch": 11.22, + "grad_norm": 1.245456576347351, + "learning_rate": 7.81246231155779e-06, + "loss": 0.0039, + "step": 22275 + }, + { + "epoch": 11.23, + "grad_norm": 1.619498372077942, + "learning_rate": 7.809949748743719e-06, + "loss": 0.0037, + "step": 22300 + }, + { + "epoch": 11.25, + "grad_norm": 1.3442145586013794, + "learning_rate": 7.80743718592965e-06, + "loss": 0.004, + "step": 22325 + }, + { + "epoch": 11.26, + "grad_norm": 1.6045056581497192, + "learning_rate": 7.804924623115579e-06, + "loss": 0.0039, + "step": 22350 + }, + { + "epoch": 11.27, + "grad_norm": 1.1371634006500244, + "learning_rate": 7.802412060301508e-06, + "loss": 0.0041, + "step": 22375 + }, + { + "epoch": 11.28, + "grad_norm": 2.101292848587036, + "learning_rate": 7.799899497487438e-06, + "loss": 0.0039, + "step": 22400 + }, + { + "epoch": 11.3, + "grad_norm": 1.0439993143081665, + "learning_rate": 7.797386934673367e-06, + "loss": 0.0041, + "step": 22425 + }, + { + "epoch": 11.31, + "grad_norm": 1.2077866792678833, + "learning_rate": 7.794874371859296e-06, + "loss": 0.0042, + "step": 22450 + }, + { + "epoch": 11.32, + "grad_norm": 1.229236364364624, + "learning_rate": 7.792361809045227e-06, + "loss": 0.0041, + "step": 22475 + }, + { + "epoch": 11.34, + "grad_norm": 1.1711903810501099, + "learning_rate": 7.789849246231157e-06, + "loss": 0.0046, + "step": 22500 + }, + { + "epoch": 11.35, + "grad_norm": 1.5802088975906372, + "learning_rate": 7.787336683417086e-06, + "loss": 0.0044, + "step": 22525 + }, + { + "epoch": 11.36, + "grad_norm": 1.4556586742401123, + "learning_rate": 7.784824120603017e-06, + "loss": 0.0042, + "step": 22550 + }, + { + "epoch": 11.37, + "grad_norm": 2.2199254035949707, + "learning_rate": 7.782311557788945e-06, + "loss": 0.004, + "step": 22575 + }, + { + "epoch": 11.39, + "grad_norm": 1.7703883647918701, + "learning_rate": 7.779798994974876e-06, + "loss": 0.0039, + "step": 22600 + }, + { + "epoch": 11.4, + "grad_norm": 3.0892791748046875, + "learning_rate": 7.777286432160805e-06, + "loss": 0.004, + "step": 22625 + }, + { + "epoch": 11.41, + "grad_norm": 2.2085580825805664, + "learning_rate": 7.774773869346734e-06, + "loss": 0.0039, + "step": 22650 + }, + { + "epoch": 11.42, + "grad_norm": 1.9577640295028687, + "learning_rate": 7.772261306532664e-06, + "loss": 0.0043, + "step": 22675 + }, + { + "epoch": 11.44, + "grad_norm": 2.051767349243164, + "learning_rate": 7.769748743718593e-06, + "loss": 0.0044, + "step": 22700 + }, + { + "epoch": 11.45, + "grad_norm": 0.8601694703102112, + "learning_rate": 7.767236180904522e-06, + "loss": 0.0041, + "step": 22725 + }, + { + "epoch": 11.46, + "grad_norm": 1.3375483751296997, + "learning_rate": 7.764723618090453e-06, + "loss": 0.0044, + "step": 22750 + }, + { + "epoch": 11.47, + "grad_norm": 1.3985751867294312, + "learning_rate": 7.762211055276383e-06, + "loss": 0.0042, + "step": 22775 + }, + { + "epoch": 11.49, + "grad_norm": 0.8720707297325134, + "learning_rate": 7.759698492462312e-06, + "loss": 0.0047, + "step": 22800 + }, + { + "epoch": 11.5, + "grad_norm": 0.9330138564109802, + "learning_rate": 7.757185929648243e-06, + "loss": 0.0039, + "step": 22825 + }, + { + "epoch": 11.51, + "grad_norm": 1.2324626445770264, + "learning_rate": 7.75467336683417e-06, + "loss": 0.0041, + "step": 22850 + }, + { + "epoch": 11.52, + "grad_norm": 1.7098289728164673, + "learning_rate": 7.752160804020102e-06, + "loss": 0.0043, + "step": 22875 + }, + { + "epoch": 11.54, + "grad_norm": 1.247007131576538, + "learning_rate": 7.749648241206031e-06, + "loss": 0.0041, + "step": 22900 + }, + { + "epoch": 11.55, + "grad_norm": 1.9044662714004517, + "learning_rate": 7.74713567839196e-06, + "loss": 0.0046, + "step": 22925 + }, + { + "epoch": 11.56, + "grad_norm": 2.0409295558929443, + "learning_rate": 7.744623115577891e-06, + "loss": 0.0043, + "step": 22950 + }, + { + "epoch": 11.57, + "grad_norm": 2.2986319065093994, + "learning_rate": 7.742110552763819e-06, + "loss": 0.0045, + "step": 22975 + }, + { + "epoch": 11.59, + "grad_norm": 2.2678630352020264, + "learning_rate": 7.73959798994975e-06, + "loss": 0.0042, + "step": 23000 + }, + { + "epoch": 11.59, + "eval_loss": 0.28508278727531433, + "eval_runtime": 644.32, + "eval_samples_per_second": 2.187, + "eval_steps_per_second": 2.187, + "eval_wer": 24.05396056727776, + "step": 23000 + }, + { + "epoch": 11.6, + "grad_norm": 1.907650113105774, + "learning_rate": 7.73708542713568e-06, + "loss": 0.0041, + "step": 23025 + }, + { + "epoch": 11.61, + "grad_norm": 0.9967373013496399, + "learning_rate": 7.734572864321609e-06, + "loss": 0.0044, + "step": 23050 + }, + { + "epoch": 11.62, + "grad_norm": 1.4243005514144897, + "learning_rate": 7.732060301507538e-06, + "loss": 0.0041, + "step": 23075 + }, + { + "epoch": 11.64, + "grad_norm": 1.480535864830017, + "learning_rate": 7.729547738693469e-06, + "loss": 0.0041, + "step": 23100 + }, + { + "epoch": 11.65, + "grad_norm": 1.5228785276412964, + "learning_rate": 7.727035175879396e-06, + "loss": 0.0048, + "step": 23125 + }, + { + "epoch": 11.66, + "grad_norm": 1.2098692655563354, + "learning_rate": 7.724522613065328e-06, + "loss": 0.0041, + "step": 23150 + }, + { + "epoch": 11.68, + "grad_norm": 1.1572978496551514, + "learning_rate": 7.722010050251257e-06, + "loss": 0.0044, + "step": 23175 + }, + { + "epoch": 11.69, + "grad_norm": 1.1368253231048584, + "learning_rate": 7.719497487437186e-06, + "loss": 0.0038, + "step": 23200 + }, + { + "epoch": 11.7, + "grad_norm": 1.6968629360198975, + "learning_rate": 7.716984924623117e-06, + "loss": 0.004, + "step": 23225 + }, + { + "epoch": 11.71, + "grad_norm": 1.8138012886047363, + "learning_rate": 7.714472361809045e-06, + "loss": 0.0041, + "step": 23250 + }, + { + "epoch": 11.73, + "grad_norm": 1.3682103157043457, + "learning_rate": 7.711959798994976e-06, + "loss": 0.0048, + "step": 23275 + }, + { + "epoch": 11.74, + "grad_norm": 2.3074638843536377, + "learning_rate": 7.709447236180905e-06, + "loss": 0.0043, + "step": 23300 + }, + { + "epoch": 11.75, + "grad_norm": 1.544771432876587, + "learning_rate": 7.706934673366834e-06, + "loss": 0.0048, + "step": 23325 + }, + { + "epoch": 11.76, + "grad_norm": 1.589612603187561, + "learning_rate": 7.704422110552764e-06, + "loss": 0.0043, + "step": 23350 + }, + { + "epoch": 11.78, + "grad_norm": 1.983675241470337, + "learning_rate": 7.701909547738695e-06, + "loss": 0.0042, + "step": 23375 + }, + { + "epoch": 11.79, + "grad_norm": 1.3556252717971802, + "learning_rate": 7.699396984924624e-06, + "loss": 0.0043, + "step": 23400 + }, + { + "epoch": 11.8, + "grad_norm": 1.4705439805984497, + "learning_rate": 7.696884422110553e-06, + "loss": 0.0048, + "step": 23425 + }, + { + "epoch": 11.81, + "grad_norm": 1.4444918632507324, + "learning_rate": 7.694371859296483e-06, + "loss": 0.0047, + "step": 23450 + }, + { + "epoch": 11.83, + "grad_norm": 1.6264967918395996, + "learning_rate": 7.691859296482412e-06, + "loss": 0.0046, + "step": 23475 + }, + { + "epoch": 11.84, + "grad_norm": 1.275970697402954, + "learning_rate": 7.689346733668343e-06, + "loss": 0.0044, + "step": 23500 + }, + { + "epoch": 11.85, + "grad_norm": 1.5057625770568848, + "learning_rate": 7.68683417085427e-06, + "loss": 0.0042, + "step": 23525 + }, + { + "epoch": 11.86, + "grad_norm": 1.7427964210510254, + "learning_rate": 7.684321608040202e-06, + "loss": 0.0047, + "step": 23550 + }, + { + "epoch": 11.88, + "grad_norm": NaN, + "learning_rate": 7.681909547738693e-06, + "loss": 0.0047, + "step": 23575 + }, + { + "epoch": 11.89, + "grad_norm": 2.0153872966766357, + "learning_rate": 7.679396984924624e-06, + "loss": 0.0045, + "step": 23600 + }, + { + "epoch": 11.9, + "grad_norm": 1.4703736305236816, + "learning_rate": 7.676884422110554e-06, + "loss": 0.005, + "step": 23625 + }, + { + "epoch": 11.91, + "grad_norm": 2.2874040603637695, + "learning_rate": 7.674371859296483e-06, + "loss": 0.0048, + "step": 23650 + }, + { + "epoch": 11.93, + "grad_norm": 2.741128921508789, + "learning_rate": 7.671859296482412e-06, + "loss": 0.0043, + "step": 23675 + }, + { + "epoch": 11.94, + "grad_norm": 1.3430378437042236, + "learning_rate": 7.669346733668343e-06, + "loss": 0.0042, + "step": 23700 + }, + { + "epoch": 11.95, + "grad_norm": 1.0754338502883911, + "learning_rate": 7.666834170854271e-06, + "loss": 0.0049, + "step": 23725 + }, + { + "epoch": 11.96, + "grad_norm": 1.6235313415527344, + "learning_rate": 7.664321608040202e-06, + "loss": 0.0046, + "step": 23750 + }, + { + "epoch": 11.98, + "grad_norm": 1.4375765323638916, + "learning_rate": 7.661809045226131e-06, + "loss": 0.0044, + "step": 23775 + }, + { + "epoch": 11.99, + "grad_norm": 1.9673402309417725, + "learning_rate": 7.65929648241206e-06, + "loss": 0.0047, + "step": 23800 + }, + { + "epoch": 12.0, + "grad_norm": 2.2287938594818115, + "learning_rate": 7.656783919597992e-06, + "loss": 0.004, + "step": 23825 + }, + { + "epoch": 12.02, + "grad_norm": 1.1054763793945312, + "learning_rate": 7.65427135678392e-06, + "loss": 0.0037, + "step": 23850 + }, + { + "epoch": 12.03, + "grad_norm": 0.7484707236289978, + "learning_rate": 7.65175879396985e-06, + "loss": 0.0033, + "step": 23875 + }, + { + "epoch": 12.04, + "grad_norm": 1.4415768384933472, + "learning_rate": 7.64924623115578e-06, + "loss": 0.0028, + "step": 23900 + }, + { + "epoch": 12.05, + "grad_norm": 0.6510108709335327, + "learning_rate": 7.646733668341709e-06, + "loss": 0.0029, + "step": 23925 + }, + { + "epoch": 12.07, + "grad_norm": 0.967505156993866, + "learning_rate": 7.644221105527638e-06, + "loss": 0.0028, + "step": 23950 + }, + { + "epoch": 12.08, + "grad_norm": 0.8767016530036926, + "learning_rate": 7.64170854271357e-06, + "loss": 0.0027, + "step": 23975 + }, + { + "epoch": 12.09, + "grad_norm": 1.4679666757583618, + "learning_rate": 7.639195979899499e-06, + "loss": 0.0031, + "step": 24000 + }, + { + "epoch": 12.09, + "eval_loss": 0.28861406445503235, + "eval_runtime": 647.0257, + "eval_samples_per_second": 2.178, + "eval_steps_per_second": 2.178, + "eval_wer": 23.825665859564165, + "step": 24000 + }, + { + "epoch": 12.1, + "grad_norm": 3.264605760574341, + "learning_rate": 7.636683417085428e-06, + "loss": 0.0035, + "step": 24025 + }, + { + "epoch": 12.12, + "grad_norm": 1.3971178531646729, + "learning_rate": 7.634170854271357e-06, + "loss": 0.0033, + "step": 24050 + }, + { + "epoch": 12.13, + "grad_norm": 1.240206003189087, + "learning_rate": 7.631658291457287e-06, + "loss": 0.003, + "step": 24075 + }, + { + "epoch": 12.14, + "grad_norm": 1.3925122022628784, + "learning_rate": 7.629145728643217e-06, + "loss": 0.003, + "step": 24100 + }, + { + "epoch": 12.15, + "grad_norm": 0.912455677986145, + "learning_rate": 7.626633165829146e-06, + "loss": 0.0029, + "step": 24125 + }, + { + "epoch": 12.17, + "grad_norm": 1.508727788925171, + "learning_rate": 7.624120603015076e-06, + "loss": 0.0029, + "step": 24150 + }, + { + "epoch": 12.18, + "grad_norm": 1.45113205909729, + "learning_rate": 7.621608040201006e-06, + "loss": 0.0035, + "step": 24175 + }, + { + "epoch": 12.19, + "grad_norm": 1.3699947595596313, + "learning_rate": 7.619095477386935e-06, + "loss": 0.0029, + "step": 24200 + }, + { + "epoch": 12.2, + "grad_norm": 1.319270133972168, + "learning_rate": 7.616582914572865e-06, + "loss": 0.0034, + "step": 24225 + }, + { + "epoch": 12.22, + "grad_norm": 1.116363286972046, + "learning_rate": 7.614070351758794e-06, + "loss": 0.0034, + "step": 24250 + }, + { + "epoch": 12.23, + "grad_norm": 1.1260015964508057, + "learning_rate": 7.6115577889447245e-06, + "loss": 0.0034, + "step": 24275 + }, + { + "epoch": 12.24, + "grad_norm": 1.9116740226745605, + "learning_rate": 7.609045226130654e-06, + "loss": 0.004, + "step": 24300 + }, + { + "epoch": 12.25, + "grad_norm": 2.0611562728881836, + "learning_rate": 7.606532663316584e-06, + "loss": 0.003, + "step": 24325 + }, + { + "epoch": 12.27, + "grad_norm": 1.186975359916687, + "learning_rate": 7.6040201005025125e-06, + "loss": 0.0033, + "step": 24350 + }, + { + "epoch": 12.28, + "grad_norm": 1.219438076019287, + "learning_rate": 7.601507537688443e-06, + "loss": 0.0034, + "step": 24375 + }, + { + "epoch": 12.29, + "grad_norm": 1.2265121936798096, + "learning_rate": 7.598994974874373e-06, + "loss": 0.0032, + "step": 24400 + }, + { + "epoch": 12.3, + "grad_norm": 1.6346076726913452, + "learning_rate": 7.596482412060302e-06, + "loss": 0.0034, + "step": 24425 + }, + { + "epoch": 12.32, + "grad_norm": 0.8696354031562805, + "learning_rate": 7.593969849246232e-06, + "loss": 0.0031, + "step": 24450 + }, + { + "epoch": 12.33, + "grad_norm": 1.4290732145309448, + "learning_rate": 7.591457286432161e-06, + "loss": 0.0037, + "step": 24475 + }, + { + "epoch": 12.34, + "grad_norm": 2.6827902793884277, + "learning_rate": 7.588944723618091e-06, + "loss": 0.0038, + "step": 24500 + }, + { + "epoch": 12.36, + "grad_norm": 1.816667079925537, + "learning_rate": 7.58643216080402e-06, + "loss": 0.0036, + "step": 24525 + }, + { + "epoch": 12.37, + "grad_norm": 0.5981873869895935, + "learning_rate": 7.5839195979899505e-06, + "loss": 0.0027, + "step": 24550 + }, + { + "epoch": 12.38, + "grad_norm": 2.2105541229248047, + "learning_rate": 7.58140703517588e-06, + "loss": 0.0035, + "step": 24575 + }, + { + "epoch": 12.39, + "grad_norm": 1.0556033849716187, + "learning_rate": 7.57889447236181e-06, + "loss": 0.0032, + "step": 24600 + }, + { + "epoch": 12.41, + "grad_norm": 1.0780327320098877, + "learning_rate": 7.57638190954774e-06, + "loss": 0.0035, + "step": 24625 + }, + { + "epoch": 12.42, + "grad_norm": 1.9094908237457275, + "learning_rate": 7.573869346733669e-06, + "loss": 0.0036, + "step": 24650 + }, + { + "epoch": 12.43, + "grad_norm": 1.2956584692001343, + "learning_rate": 7.571356783919599e-06, + "loss": 0.0044, + "step": 24675 + }, + { + "epoch": 12.44, + "grad_norm": 1.5782350301742554, + "learning_rate": 7.568844221105528e-06, + "loss": 0.0031, + "step": 24700 + }, + { + "epoch": 12.46, + "grad_norm": 1.1655892133712769, + "learning_rate": 7.566331658291458e-06, + "loss": 0.0033, + "step": 24725 + }, + { + "epoch": 12.47, + "grad_norm": 1.4031734466552734, + "learning_rate": 7.563819095477387e-06, + "loss": 0.0034, + "step": 24750 + }, + { + "epoch": 12.48, + "grad_norm": 1.4916791915893555, + "learning_rate": 7.561306532663317e-06, + "loss": 0.0035, + "step": 24775 + }, + { + "epoch": 12.49, + "grad_norm": 0.40038520097732544, + "learning_rate": 7.558793969849247e-06, + "loss": 0.0035, + "step": 24800 + }, + { + "epoch": 12.51, + "grad_norm": 1.3992714881896973, + "learning_rate": 7.556281407035176e-06, + "loss": 0.0033, + "step": 24825 + }, + { + "epoch": 12.52, + "grad_norm": 0.48557183146476746, + "learning_rate": 7.5537688442211066e-06, + "loss": 0.004, + "step": 24850 + }, + { + "epoch": 12.53, + "grad_norm": 1.111877679824829, + "learning_rate": 7.551256281407036e-06, + "loss": 0.0033, + "step": 24875 + }, + { + "epoch": 12.54, + "grad_norm": 1.1091711521148682, + "learning_rate": 7.548743718592966e-06, + "loss": 0.0034, + "step": 24900 + }, + { + "epoch": 12.56, + "grad_norm": 1.899695634841919, + "learning_rate": 7.5462311557788945e-06, + "loss": 0.0039, + "step": 24925 + }, + { + "epoch": 12.57, + "grad_norm": 1.4109854698181152, + "learning_rate": 7.543718592964825e-06, + "loss": 0.0033, + "step": 24950 + }, + { + "epoch": 12.58, + "grad_norm": 1.7029036283493042, + "learning_rate": 7.541206030150754e-06, + "loss": 0.0032, + "step": 24975 + }, + { + "epoch": 12.59, + "grad_norm": 1.6713786125183105, + "learning_rate": 7.538693467336684e-06, + "loss": 0.0035, + "step": 25000 + }, + { + "epoch": 12.59, + "eval_loss": 0.29140138626098633, + "eval_runtime": 645.0337, + "eval_samples_per_second": 2.184, + "eval_steps_per_second": 2.184, + "eval_wer": 24.399861639571082, + "step": 25000 + }, + { + "epoch": 12.61, + "grad_norm": 2.3478965759277344, + "learning_rate": 7.536180904522614e-06, + "loss": 0.0034, + "step": 25025 + }, + { + "epoch": 12.62, + "grad_norm": 0.9575018286705017, + "learning_rate": 7.533668341708543e-06, + "loss": 0.0032, + "step": 25050 + }, + { + "epoch": 12.63, + "grad_norm": 0.94124835729599, + "learning_rate": 7.531155778894473e-06, + "loss": 0.0034, + "step": 25075 + }, + { + "epoch": 12.64, + "grad_norm": 1.8110922574996948, + "learning_rate": 7.528643216080402e-06, + "loss": 0.0034, + "step": 25100 + }, + { + "epoch": 12.66, + "grad_norm": 1.0863940715789795, + "learning_rate": 7.5261306532663325e-06, + "loss": 0.0034, + "step": 25125 + }, + { + "epoch": 12.67, + "grad_norm": 1.8619202375411987, + "learning_rate": 7.523618090452262e-06, + "loss": 0.0037, + "step": 25150 + }, + { + "epoch": 12.68, + "grad_norm": 1.9385497570037842, + "learning_rate": 7.521105527638192e-06, + "loss": 0.0034, + "step": 25175 + }, + { + "epoch": 12.7, + "grad_norm": 1.8523273468017578, + "learning_rate": 7.5185929648241205e-06, + "loss": 0.0035, + "step": 25200 + }, + { + "epoch": 12.71, + "grad_norm": 1.9459110498428345, + "learning_rate": 7.516080402010051e-06, + "loss": 0.0034, + "step": 25225 + }, + { + "epoch": 12.72, + "grad_norm": 1.6162514686584473, + "learning_rate": 7.513567839195981e-06, + "loss": 0.0041, + "step": 25250 + }, + { + "epoch": 12.73, + "grad_norm": 1.7794193029403687, + "learning_rate": 7.51105527638191e-06, + "loss": 0.0037, + "step": 25275 + }, + { + "epoch": 12.75, + "grad_norm": 1.8779551982879639, + "learning_rate": 7.50854271356784e-06, + "loss": 0.0038, + "step": 25300 + }, + { + "epoch": 12.76, + "grad_norm": 1.2835701704025269, + "learning_rate": 7.506030150753769e-06, + "loss": 0.0035, + "step": 25325 + }, + { + "epoch": 12.77, + "grad_norm": 1.1003650426864624, + "learning_rate": 7.503517587939699e-06, + "loss": 0.0037, + "step": 25350 + }, + { + "epoch": 12.78, + "grad_norm": 1.345335602760315, + "learning_rate": 7.501005025125628e-06, + "loss": 0.0036, + "step": 25375 + }, + { + "epoch": 12.8, + "grad_norm": 0.9257469177246094, + "learning_rate": 7.4984924623115585e-06, + "loss": 0.0032, + "step": 25400 + }, + { + "epoch": 12.81, + "grad_norm": 1.3247835636138916, + "learning_rate": 7.495979899497488e-06, + "loss": 0.0042, + "step": 25425 + }, + { + "epoch": 12.82, + "grad_norm": 1.658199429512024, + "learning_rate": 7.493467336683418e-06, + "loss": 0.0038, + "step": 25450 + }, + { + "epoch": 12.83, + "grad_norm": 1.2152585983276367, + "learning_rate": 7.490954773869348e-06, + "loss": 0.0035, + "step": 25475 + }, + { + "epoch": 12.85, + "grad_norm": 1.6380772590637207, + "learning_rate": 7.488442211055277e-06, + "loss": 0.0041, + "step": 25500 + }, + { + "epoch": 12.86, + "grad_norm": 1.4688934087753296, + "learning_rate": 7.485929648241207e-06, + "loss": 0.0035, + "step": 25525 + }, + { + "epoch": 12.87, + "grad_norm": 2.3192384243011475, + "learning_rate": 7.483417085427136e-06, + "loss": 0.004, + "step": 25550 + }, + { + "epoch": 12.88, + "grad_norm": 1.0542503595352173, + "learning_rate": 7.480904522613066e-06, + "loss": 0.0038, + "step": 25575 + }, + { + "epoch": 12.9, + "grad_norm": 1.6845927238464355, + "learning_rate": 7.478391959798995e-06, + "loss": 0.0034, + "step": 25600 + }, + { + "epoch": 12.91, + "grad_norm": 1.3695615530014038, + "learning_rate": 7.475879396984925e-06, + "loss": 0.0034, + "step": 25625 + }, + { + "epoch": 12.92, + "grad_norm": 2.099348306655884, + "learning_rate": 7.473366834170855e-06, + "loss": 0.0041, + "step": 25650 + }, + { + "epoch": 12.93, + "grad_norm": 1.4898931980133057, + "learning_rate": 7.470854271356784e-06, + "loss": 0.0037, + "step": 25675 + }, + { + "epoch": 12.95, + "grad_norm": 1.583161473274231, + "learning_rate": 7.4683417085427146e-06, + "loss": 0.0037, + "step": 25700 + }, + { + "epoch": 12.96, + "grad_norm": 0.7393112778663635, + "learning_rate": 7.465829145728644e-06, + "loss": 0.0042, + "step": 25725 + }, + { + "epoch": 12.97, + "grad_norm": 1.3594324588775635, + "learning_rate": 7.463316582914574e-06, + "loss": 0.0034, + "step": 25750 + }, + { + "epoch": 12.98, + "grad_norm": 1.8096977472305298, + "learning_rate": 7.4608040201005025e-06, + "loss": 0.0041, + "step": 25775 + }, + { + "epoch": 13.0, + "grad_norm": 1.482646107673645, + "learning_rate": 7.458291457286433e-06, + "loss": 0.0036, + "step": 25800 + }, + { + "epoch": 13.01, + "grad_norm": 0.5999165177345276, + "learning_rate": 7.455778894472362e-06, + "loss": 0.0026, + "step": 25825 + }, + { + "epoch": 13.02, + "grad_norm": 0.5373625159263611, + "learning_rate": 7.453266331658292e-06, + "loss": 0.0026, + "step": 25850 + }, + { + "epoch": 13.04, + "grad_norm": 1.5365161895751953, + "learning_rate": 7.450753768844222e-06, + "loss": 0.0025, + "step": 25875 + }, + { + "epoch": 13.05, + "grad_norm": 0.9741165637969971, + "learning_rate": 7.448241206030151e-06, + "loss": 0.0025, + "step": 25900 + }, + { + "epoch": 13.06, + "grad_norm": 1.167653203010559, + "learning_rate": 7.445728643216081e-06, + "loss": 0.0027, + "step": 25925 + }, + { + "epoch": 13.07, + "grad_norm": 0.48862722516059875, + "learning_rate": 7.44321608040201e-06, + "loss": 0.0024, + "step": 25950 + }, + { + "epoch": 13.09, + "grad_norm": 0.701203465461731, + "learning_rate": 7.4407035175879405e-06, + "loss": 0.0024, + "step": 25975 + }, + { + "epoch": 13.1, + "grad_norm": 0.8814527988433838, + "learning_rate": 7.43819095477387e-06, + "loss": 0.0022, + "step": 26000 + }, + { + "epoch": 13.1, + "eval_loss": 0.29991698265075684, + "eval_runtime": 640.3865, + "eval_samples_per_second": 2.2, + "eval_steps_per_second": 2.2, + "eval_wer": 23.86717398823936, + "step": 26000 + }, + { + "epoch": 13.11, + "grad_norm": 1.1581878662109375, + "learning_rate": 7.4356783919598e-06, + "loss": 0.0021, + "step": 26025 + }, + { + "epoch": 13.12, + "grad_norm": 0.3536165654659271, + "learning_rate": 7.4331658291457285e-06, + "loss": 0.0025, + "step": 26050 + }, + { + "epoch": 13.14, + "grad_norm": 2.3617360591888428, + "learning_rate": 7.430653266331659e-06, + "loss": 0.0028, + "step": 26075 + }, + { + "epoch": 13.15, + "grad_norm": 1.100469708442688, + "learning_rate": 7.428140703517589e-06, + "loss": 0.0029, + "step": 26100 + }, + { + "epoch": 13.16, + "grad_norm": 1.3083537817001343, + "learning_rate": 7.425628140703518e-06, + "loss": 0.0033, + "step": 26125 + }, + { + "epoch": 13.17, + "grad_norm": 1.1170566082000732, + "learning_rate": 7.423115577889448e-06, + "loss": 0.0028, + "step": 26150 + }, + { + "epoch": 13.19, + "grad_norm": 1.4462560415267944, + "learning_rate": 7.420603015075377e-06, + "loss": 0.0027, + "step": 26175 + }, + { + "epoch": 13.2, + "grad_norm": 0.9630836844444275, + "learning_rate": 7.418090452261307e-06, + "loss": 0.0033, + "step": 26200 + }, + { + "epoch": 13.21, + "grad_norm": 0.7968631386756897, + "learning_rate": 7.415577889447236e-06, + "loss": 0.0025, + "step": 26225 + }, + { + "epoch": 13.22, + "grad_norm": 2.100764751434326, + "learning_rate": 7.4130653266331665e-06, + "loss": 0.0026, + "step": 26250 + }, + { + "epoch": 13.24, + "grad_norm": 1.6591538190841675, + "learning_rate": 7.410552763819097e-06, + "loss": 0.0031, + "step": 26275 + }, + { + "epoch": 13.25, + "grad_norm": 0.645767867565155, + "learning_rate": 7.408040201005026e-06, + "loss": 0.003, + "step": 26300 + }, + { + "epoch": 13.26, + "grad_norm": 1.4978662729263306, + "learning_rate": 7.405527638190956e-06, + "loss": 0.0028, + "step": 26325 + }, + { + "epoch": 13.27, + "grad_norm": 1.357334852218628, + "learning_rate": 7.403015075376885e-06, + "loss": 0.0026, + "step": 26350 + }, + { + "epoch": 13.29, + "grad_norm": 1.751214861869812, + "learning_rate": 7.400502512562815e-06, + "loss": 0.0027, + "step": 26375 + }, + { + "epoch": 13.3, + "grad_norm": 0.9337909817695618, + "learning_rate": 7.397989949748744e-06, + "loss": 0.0026, + "step": 26400 + }, + { + "epoch": 13.31, + "grad_norm": 1.3542364835739136, + "learning_rate": 7.395477386934674e-06, + "loss": 0.0031, + "step": 26425 + }, + { + "epoch": 13.32, + "grad_norm": 0.9499005675315857, + "learning_rate": 7.392964824120603e-06, + "loss": 0.0031, + "step": 26450 + }, + { + "epoch": 13.34, + "grad_norm": 1.5429147481918335, + "learning_rate": 7.390452261306533e-06, + "loss": 0.0029, + "step": 26475 + }, + { + "epoch": 13.35, + "grad_norm": 0.8379466533660889, + "learning_rate": 7.387939698492463e-06, + "loss": 0.0024, + "step": 26500 + }, + { + "epoch": 13.36, + "grad_norm": 1.3563404083251953, + "learning_rate": 7.385427135678392e-06, + "loss": 0.0029, + "step": 26525 + }, + { + "epoch": 13.38, + "grad_norm": 2.4341177940368652, + "learning_rate": 7.382914572864323e-06, + "loss": 0.0028, + "step": 26550 + }, + { + "epoch": 13.39, + "grad_norm": 0.8350504636764526, + "learning_rate": 7.380402010050252e-06, + "loss": 0.0027, + "step": 26575 + }, + { + "epoch": 13.4, + "grad_norm": 1.5221184492111206, + "learning_rate": 7.377889447236182e-06, + "loss": 0.0027, + "step": 26600 + }, + { + "epoch": 13.41, + "grad_norm": 2.1058216094970703, + "learning_rate": 7.3753768844221105e-06, + "loss": 0.0027, + "step": 26625 + }, + { + "epoch": 13.43, + "grad_norm": 1.1398578882217407, + "learning_rate": 7.372864321608041e-06, + "loss": 0.0029, + "step": 26650 + }, + { + "epoch": 13.44, + "grad_norm": 2.2516255378723145, + "learning_rate": 7.37035175879397e-06, + "loss": 0.0029, + "step": 26675 + }, + { + "epoch": 13.45, + "grad_norm": 1.5439566373825073, + "learning_rate": 7.3678391959799e-06, + "loss": 0.0028, + "step": 26700 + }, + { + "epoch": 13.46, + "grad_norm": 0.6818922758102417, + "learning_rate": 7.36532663316583e-06, + "loss": 0.0033, + "step": 26725 + }, + { + "epoch": 13.48, + "grad_norm": 1.0167973041534424, + "learning_rate": 7.362814070351759e-06, + "loss": 0.0027, + "step": 26750 + }, + { + "epoch": 13.49, + "grad_norm": 0.8457038998603821, + "learning_rate": 7.360301507537689e-06, + "loss": 0.0027, + "step": 26775 + }, + { + "epoch": 13.5, + "grad_norm": 1.8538858890533447, + "learning_rate": 7.357788944723618e-06, + "loss": 0.0029, + "step": 26800 + }, + { + "epoch": 13.51, + "grad_norm": 1.6280864477157593, + "learning_rate": 7.3552763819095485e-06, + "loss": 0.0029, + "step": 26825 + }, + { + "epoch": 13.53, + "grad_norm": 1.5593993663787842, + "learning_rate": 7.352763819095478e-06, + "loss": 0.0031, + "step": 26850 + }, + { + "epoch": 13.54, + "grad_norm": 2.2892441749572754, + "learning_rate": 7.350251256281408e-06, + "loss": 0.0032, + "step": 26875 + }, + { + "epoch": 13.55, + "grad_norm": 1.0337854623794556, + "learning_rate": 7.347738693467338e-06, + "loss": 0.0032, + "step": 26900 + }, + { + "epoch": 13.56, + "grad_norm": 1.764012336730957, + "learning_rate": 7.345226130653267e-06, + "loss": 0.003, + "step": 26925 + }, + { + "epoch": 13.58, + "grad_norm": 1.3078733682632446, + "learning_rate": 7.342713567839197e-06, + "loss": 0.003, + "step": 26950 + }, + { + "epoch": 13.59, + "grad_norm": 1.0039664506912231, + "learning_rate": 7.340201005025126e-06, + "loss": 0.0033, + "step": 26975 + }, + { + "epoch": 13.6, + "grad_norm": 1.68669593334198, + "learning_rate": 7.337688442211056e-06, + "loss": 0.0032, + "step": 27000 + }, + { + "epoch": 13.6, + "eval_loss": 0.3001398742198944, + "eval_runtime": 654.1931, + "eval_samples_per_second": 2.154, + "eval_steps_per_second": 2.154, + "eval_wer": 24.538222068488412, + "step": 27000 + }, + { + "epoch": 13.61, + "grad_norm": 1.84382963180542, + "learning_rate": 7.335175879396985e-06, + "loss": 0.0029, + "step": 27025 + }, + { + "epoch": 13.63, + "grad_norm": 0.4542626440525055, + "learning_rate": 7.332663316582915e-06, + "loss": 0.0025, + "step": 27050 + }, + { + "epoch": 13.64, + "grad_norm": 1.7250819206237793, + "learning_rate": 7.330150753768844e-06, + "loss": 0.0029, + "step": 27075 + }, + { + "epoch": 13.65, + "grad_norm": 2.1424949169158936, + "learning_rate": 7.3276381909547745e-06, + "loss": 0.003, + "step": 27100 + }, + { + "epoch": 13.66, + "grad_norm": 1.7538329362869263, + "learning_rate": 7.325125628140705e-06, + "loss": 0.0031, + "step": 27125 + }, + { + "epoch": 13.68, + "grad_norm": 1.9615614414215088, + "learning_rate": 7.322613065326634e-06, + "loss": 0.0034, + "step": 27150 + }, + { + "epoch": 13.69, + "grad_norm": 0.4876037836074829, + "learning_rate": 7.320100502512564e-06, + "loss": 0.0026, + "step": 27175 + }, + { + "epoch": 13.7, + "grad_norm": 2.511152744293213, + "learning_rate": 7.317587939698493e-06, + "loss": 0.0036, + "step": 27200 + }, + { + "epoch": 13.72, + "grad_norm": 1.6201894283294678, + "learning_rate": 7.315075376884423e-06, + "loss": 0.003, + "step": 27225 + }, + { + "epoch": 13.73, + "grad_norm": 2.252923011779785, + "learning_rate": 7.312562814070352e-06, + "loss": 0.0036, + "step": 27250 + }, + { + "epoch": 13.74, + "grad_norm": 1.1069494485855103, + "learning_rate": 7.310050251256282e-06, + "loss": 0.0026, + "step": 27275 + }, + { + "epoch": 13.75, + "grad_norm": 0.6992159485816956, + "learning_rate": 7.307537688442211e-06, + "loss": 0.0031, + "step": 27300 + }, + { + "epoch": 13.77, + "grad_norm": 1.252886414527893, + "learning_rate": 7.305025125628141e-06, + "loss": 0.0027, + "step": 27325 + }, + { + "epoch": 13.78, + "grad_norm": 1.4874393939971924, + "learning_rate": 7.302512562814071e-06, + "loss": 0.0026, + "step": 27350 + }, + { + "epoch": 13.79, + "grad_norm": 0.7340197563171387, + "learning_rate": 7.3e-06, + "loss": 0.0026, + "step": 27375 + }, + { + "epoch": 13.8, + "grad_norm": 1.225475788116455, + "learning_rate": 7.297587939698493e-06, + "loss": 0.0032, + "step": 27400 + }, + { + "epoch": 13.82, + "grad_norm": 1.1409143209457397, + "learning_rate": 7.295075376884423e-06, + "loss": 0.0032, + "step": 27425 + }, + { + "epoch": 13.83, + "grad_norm": 1.395493507385254, + "learning_rate": 7.292562814070352e-06, + "loss": 0.003, + "step": 27450 + }, + { + "epoch": 13.84, + "grad_norm": 1.5672920942306519, + "learning_rate": 7.2900502512562825e-06, + "loss": 0.0034, + "step": 27475 + }, + { + "epoch": 13.85, + "grad_norm": 1.7837570905685425, + "learning_rate": 7.287537688442211e-06, + "loss": 0.0034, + "step": 27500 + }, + { + "epoch": 13.87, + "grad_norm": 2.0357208251953125, + "learning_rate": 7.285025125628141e-06, + "loss": 0.0035, + "step": 27525 + }, + { + "epoch": 13.88, + "grad_norm": 1.0619276762008667, + "learning_rate": 7.282512562814071e-06, + "loss": 0.0028, + "step": 27550 + }, + { + "epoch": 13.89, + "grad_norm": 1.7723731994628906, + "learning_rate": 7.280000000000001e-06, + "loss": 0.0035, + "step": 27575 + }, + { + "epoch": 13.9, + "grad_norm": 2.082310438156128, + "learning_rate": 7.277487437185931e-06, + "loss": 0.0036, + "step": 27600 + }, + { + "epoch": 13.92, + "grad_norm": 1.348310112953186, + "learning_rate": 7.274974874371859e-06, + "loss": 0.0032, + "step": 27625 + }, + { + "epoch": 13.93, + "grad_norm": 1.0664507150650024, + "learning_rate": 7.272462311557789e-06, + "loss": 0.003, + "step": 27650 + }, + { + "epoch": 13.94, + "grad_norm": 1.124415397644043, + "learning_rate": 7.269949748743719e-06, + "loss": 0.0032, + "step": 27675 + }, + { + "epoch": 13.95, + "grad_norm": 1.4353559017181396, + "learning_rate": 7.267437185929649e-06, + "loss": 0.0033, + "step": 27700 + }, + { + "epoch": 13.97, + "grad_norm": 1.658118724822998, + "learning_rate": 7.264924623115579e-06, + "loss": 0.0038, + "step": 27725 + }, + { + "epoch": 13.98, + "grad_norm": 1.0329655408859253, + "learning_rate": 7.262412060301508e-06, + "loss": 0.0032, + "step": 27750 + }, + { + "epoch": 13.99, + "grad_norm": 1.345921516418457, + "learning_rate": 7.259899497487439e-06, + "loss": 0.0025, + "step": 27775 + }, + { + "epoch": 14.01, + "grad_norm": 1.0595952272415161, + "learning_rate": 7.257386934673367e-06, + "loss": 0.0027, + "step": 27800 + }, + { + "epoch": 14.02, + "grad_norm": 1.766713261604309, + "learning_rate": 7.254874371859297e-06, + "loss": 0.0025, + "step": 27825 + }, + { + "epoch": 14.03, + "grad_norm": 1.3049800395965576, + "learning_rate": 7.2523618090452265e-06, + "loss": 0.0018, + "step": 27850 + }, + { + "epoch": 14.04, + "grad_norm": 2.0778799057006836, + "learning_rate": 7.249849246231157e-06, + "loss": 0.0025, + "step": 27875 + }, + { + "epoch": 14.06, + "grad_norm": 0.90594881772995, + "learning_rate": 7.247336683417085e-06, + "loss": 0.0022, + "step": 27900 + }, + { + "epoch": 14.07, + "grad_norm": 0.35941508412361145, + "learning_rate": 7.244824120603015e-06, + "loss": 0.0022, + "step": 27925 + }, + { + "epoch": 14.08, + "grad_norm": 0.7269408106803894, + "learning_rate": 7.2423115577889455e-06, + "loss": 0.0021, + "step": 27950 + }, + { + "epoch": 14.09, + "grad_norm": 1.4327499866485596, + "learning_rate": 7.239798994974875e-06, + "loss": 0.0025, + "step": 27975 + }, + { + "epoch": 14.11, + "grad_norm": 0.6035653948783875, + "learning_rate": 7.237286432160805e-06, + "loss": 0.0024, + "step": 28000 + }, + { + "epoch": 14.11, + "eval_loss": 0.3001614212989807, + "eval_runtime": 784.2944, + "eval_samples_per_second": 1.797, + "eval_steps_per_second": 1.797, + "eval_wer": 23.20304392943618, + "step": 28000 + }, + { + "epoch": 14.12, + "grad_norm": 1.209418535232544, + "learning_rate": 7.234773869346734e-06, + "loss": 0.0022, + "step": 28025 + }, + { + "epoch": 14.13, + "grad_norm": 1.2968195676803589, + "learning_rate": 7.2322613065326645e-06, + "loss": 0.0024, + "step": 28050 + }, + { + "epoch": 14.14, + "grad_norm": 1.5654141902923584, + "learning_rate": 7.229748743718593e-06, + "loss": 0.0027, + "step": 28075 + }, + { + "epoch": 14.16, + "grad_norm": 1.4532732963562012, + "learning_rate": 7.227236180904523e-06, + "loss": 0.0024, + "step": 28100 + }, + { + "epoch": 14.17, + "grad_norm": 1.7169549465179443, + "learning_rate": 7.2247236180904525e-06, + "loss": 0.0022, + "step": 28125 + }, + { + "epoch": 14.18, + "grad_norm": 1.32295560836792, + "learning_rate": 7.222211055276383e-06, + "loss": 0.0022, + "step": 28150 + }, + { + "epoch": 14.19, + "grad_norm": 0.8905125856399536, + "learning_rate": 7.219698492462313e-06, + "loss": 0.0022, + "step": 28175 + }, + { + "epoch": 14.21, + "grad_norm": 0.8778842687606812, + "learning_rate": 7.217185929648241e-06, + "loss": 0.0021, + "step": 28200 + }, + { + "epoch": 14.22, + "grad_norm": 1.4678562879562378, + "learning_rate": 7.2146733668341715e-06, + "loss": 0.0025, + "step": 28225 + }, + { + "epoch": 14.23, + "grad_norm": 0.7229999899864197, + "learning_rate": 7.212160804020101e-06, + "loss": 0.0022, + "step": 28250 + }, + { + "epoch": 14.24, + "grad_norm": 2.1713428497314453, + "learning_rate": 7.209648241206031e-06, + "loss": 0.0023, + "step": 28275 + }, + { + "epoch": 14.26, + "grad_norm": 2.5556602478027344, + "learning_rate": 7.20713567839196e-06, + "loss": 0.0022, + "step": 28300 + }, + { + "epoch": 14.27, + "grad_norm": 0.7470478415489197, + "learning_rate": 7.2046231155778905e-06, + "loss": 0.0022, + "step": 28325 + }, + { + "epoch": 14.28, + "grad_norm": 1.2497297525405884, + "learning_rate": 7.20211055276382e-06, + "loss": 0.0022, + "step": 28350 + }, + { + "epoch": 14.29, + "grad_norm": 0.5248861908912659, + "learning_rate": 7.199597989949749e-06, + "loss": 0.0024, + "step": 28375 + }, + { + "epoch": 14.31, + "grad_norm": 1.0900764465332031, + "learning_rate": 7.197085427135679e-06, + "loss": 0.0025, + "step": 28400 + }, + { + "epoch": 14.32, + "grad_norm": 1.5685707330703735, + "learning_rate": 7.194572864321609e-06, + "loss": 0.0028, + "step": 28425 + }, + { + "epoch": 14.33, + "grad_norm": 1.298081874847412, + "learning_rate": 7.192060301507539e-06, + "loss": 0.0026, + "step": 28450 + }, + { + "epoch": 14.35, + "grad_norm": 1.188835620880127, + "learning_rate": 7.189547738693467e-06, + "loss": 0.0024, + "step": 28475 + }, + { + "epoch": 14.36, + "grad_norm": 2.094358205795288, + "learning_rate": 7.187035175879397e-06, + "loss": 0.0023, + "step": 28500 + }, + { + "epoch": 14.37, + "grad_norm": 1.2566583156585693, + "learning_rate": 7.184522613065327e-06, + "loss": 0.0028, + "step": 28525 + }, + { + "epoch": 14.38, + "grad_norm": 1.1933472156524658, + "learning_rate": 7.182010050251257e-06, + "loss": 0.0025, + "step": 28550 + }, + { + "epoch": 14.4, + "grad_norm": 1.451371669769287, + "learning_rate": 7.179497487437187e-06, + "loss": 0.0028, + "step": 28575 + }, + { + "epoch": 14.41, + "grad_norm": 1.804445743560791, + "learning_rate": 7.176984924623116e-06, + "loss": 0.0026, + "step": 28600 + }, + { + "epoch": 14.42, + "grad_norm": 0.8600190877914429, + "learning_rate": 7.174472361809047e-06, + "loss": 0.0025, + "step": 28625 + }, + { + "epoch": 14.43, + "grad_norm": 0.6841452121734619, + "learning_rate": 7.171959798994975e-06, + "loss": 0.0027, + "step": 28650 + }, + { + "epoch": 14.45, + "grad_norm": 0.7692683339118958, + "learning_rate": 7.169447236180905e-06, + "loss": 0.0023, + "step": 28675 + }, + { + "epoch": 14.46, + "grad_norm": 1.5418920516967773, + "learning_rate": 7.1669346733668345e-06, + "loss": 0.0026, + "step": 28700 + }, + { + "epoch": 14.47, + "grad_norm": 1.3701914548873901, + "learning_rate": 7.164422110552765e-06, + "loss": 0.0023, + "step": 28725 + }, + { + "epoch": 14.48, + "grad_norm": 1.2819687128067017, + "learning_rate": 7.161909547738693e-06, + "loss": 0.0031, + "step": 28750 + }, + { + "epoch": 14.5, + "grad_norm": 0.9504879117012024, + "learning_rate": 7.159396984924623e-06, + "loss": 0.0023, + "step": 28775 + }, + { + "epoch": 14.51, + "grad_norm": 1.3846092224121094, + "learning_rate": 7.1568844221105535e-06, + "loss": 0.0028, + "step": 28800 + }, + { + "epoch": 14.52, + "grad_norm": 0.8752845525741577, + "learning_rate": 7.154371859296483e-06, + "loss": 0.0027, + "step": 28825 + }, + { + "epoch": 14.53, + "grad_norm": 2.0625505447387695, + "learning_rate": 7.151859296482413e-06, + "loss": 0.0028, + "step": 28850 + }, + { + "epoch": 14.55, + "grad_norm": 1.281076192855835, + "learning_rate": 7.149346733668342e-06, + "loss": 0.0026, + "step": 28875 + }, + { + "epoch": 14.56, + "grad_norm": 1.1781672239303589, + "learning_rate": 7.1468341708542725e-06, + "loss": 0.0031, + "step": 28900 + }, + { + "epoch": 14.57, + "grad_norm": 1.7508938312530518, + "learning_rate": 7.144321608040201e-06, + "loss": 0.0027, + "step": 28925 + }, + { + "epoch": 14.58, + "grad_norm": 1.2148828506469727, + "learning_rate": 7.141809045226131e-06, + "loss": 0.003, + "step": 28950 + }, + { + "epoch": 14.6, + "grad_norm": 1.6405340433120728, + "learning_rate": 7.139296482412061e-06, + "loss": 0.0025, + "step": 28975 + }, + { + "epoch": 14.61, + "grad_norm": 0.7206986546516418, + "learning_rate": 7.136783919597991e-06, + "loss": 0.0025, + "step": 29000 + }, + { + "epoch": 14.61, + "eval_loss": 0.3025396168231964, + "eval_runtime": 646.2169, + "eval_samples_per_second": 2.18, + "eval_steps_per_second": 2.18, + "eval_wer": 23.79107575233483, + "step": 29000 + }, + { + "epoch": 14.62, + "grad_norm": 1.8807600736618042, + "learning_rate": 7.134271356783921e-06, + "loss": 0.0028, + "step": 29025 + }, + { + "epoch": 14.63, + "grad_norm": 0.9913462996482849, + "learning_rate": 7.131758793969849e-06, + "loss": 0.0026, + "step": 29050 + }, + { + "epoch": 14.65, + "grad_norm": 0.6450251340866089, + "learning_rate": 7.1292462311557795e-06, + "loss": 0.002, + "step": 29075 + }, + { + "epoch": 14.66, + "grad_norm": 1.563607096672058, + "learning_rate": 7.126733668341709e-06, + "loss": 0.0024, + "step": 29100 + }, + { + "epoch": 14.67, + "grad_norm": 1.7868529558181763, + "learning_rate": 7.124221105527639e-06, + "loss": 0.0023, + "step": 29125 + }, + { + "epoch": 14.69, + "grad_norm": 1.2877388000488281, + "learning_rate": 7.121708542713568e-06, + "loss": 0.0023, + "step": 29150 + }, + { + "epoch": 14.7, + "grad_norm": 1.0285090208053589, + "learning_rate": 7.1191959798994985e-06, + "loss": 0.0029, + "step": 29175 + }, + { + "epoch": 14.71, + "grad_norm": 0.9814359545707703, + "learning_rate": 7.116683417085428e-06, + "loss": 0.0024, + "step": 29200 + }, + { + "epoch": 14.72, + "grad_norm": 3.390266180038452, + "learning_rate": 7.114170854271357e-06, + "loss": 0.0031, + "step": 29225 + }, + { + "epoch": 14.74, + "grad_norm": 0.9892065525054932, + "learning_rate": 7.111658291457287e-06, + "loss": 0.0026, + "step": 29250 + }, + { + "epoch": 14.75, + "grad_norm": 1.4779770374298096, + "learning_rate": 7.109145728643217e-06, + "loss": 0.0027, + "step": 29275 + }, + { + "epoch": 14.76, + "grad_norm": 1.935717225074768, + "learning_rate": 7.106633165829147e-06, + "loss": 0.0034, + "step": 29300 + }, + { + "epoch": 14.77, + "grad_norm": 0.9933359622955322, + "learning_rate": 7.104120603015075e-06, + "loss": 0.0026, + "step": 29325 + }, + { + "epoch": 14.79, + "grad_norm": 1.2649095058441162, + "learning_rate": 7.1016080402010054e-06, + "loss": 0.0025, + "step": 29350 + }, + { + "epoch": 14.8, + "grad_norm": 0.9197986125946045, + "learning_rate": 7.099095477386935e-06, + "loss": 0.0025, + "step": 29375 + }, + { + "epoch": 14.81, + "grad_norm": 0.7807173132896423, + "learning_rate": 7.096582914572865e-06, + "loss": 0.0029, + "step": 29400 + }, + { + "epoch": 14.82, + "grad_norm": 1.1959360837936401, + "learning_rate": 7.094070351758795e-06, + "loss": 0.0029, + "step": 29425 + }, + { + "epoch": 14.84, + "grad_norm": 1.7630362510681152, + "learning_rate": 7.091557788944724e-06, + "loss": 0.0024, + "step": 29450 + }, + { + "epoch": 14.85, + "grad_norm": 1.1034917831420898, + "learning_rate": 7.089045226130654e-06, + "loss": 0.0027, + "step": 29475 + }, + { + "epoch": 14.86, + "grad_norm": 1.185198426246643, + "learning_rate": 7.086532663316583e-06, + "loss": 0.0022, + "step": 29500 + }, + { + "epoch": 14.87, + "grad_norm": 0.9745866656303406, + "learning_rate": 7.084020100502513e-06, + "loss": 0.0029, + "step": 29525 + }, + { + "epoch": 14.89, + "grad_norm": 1.3043872117996216, + "learning_rate": 7.0815075376884426e-06, + "loss": 0.0022, + "step": 29550 + }, + { + "epoch": 14.9, + "grad_norm": 1.9529894590377808, + "learning_rate": 7.078994974874373e-06, + "loss": 0.0026, + "step": 29575 + }, + { + "epoch": 14.91, + "grad_norm": 1.3992432355880737, + "learning_rate": 7.076482412060303e-06, + "loss": 0.0028, + "step": 29600 + }, + { + "epoch": 14.92, + "grad_norm": 1.66525399684906, + "learning_rate": 7.073969849246231e-06, + "loss": 0.0026, + "step": 29625 + }, + { + "epoch": 14.94, + "grad_norm": 1.0909807682037354, + "learning_rate": 7.0714572864321615e-06, + "loss": 0.0024, + "step": 29650 + }, + { + "epoch": 14.95, + "grad_norm": 1.1937345266342163, + "learning_rate": 7.068944723618091e-06, + "loss": 0.0028, + "step": 29675 + }, + { + "epoch": 14.96, + "grad_norm": 1.9024869203567505, + "learning_rate": 7.066432160804021e-06, + "loss": 0.0029, + "step": 29700 + }, + { + "epoch": 14.97, + "grad_norm": 1.720049500465393, + "learning_rate": 7.06391959798995e-06, + "loss": 0.0026, + "step": 29725 + }, + { + "epoch": 14.99, + "grad_norm": 1.847103476524353, + "learning_rate": 7.0614070351758805e-06, + "loss": 0.0028, + "step": 29750 + }, + { + "epoch": 15.0, + "grad_norm": 2.2860000133514404, + "learning_rate": 7.058894472361809e-06, + "loss": 0.0031, + "step": 29775 + }, + { + "epoch": 15.01, + "grad_norm": 0.5796921849250793, + "learning_rate": 7.056381909547739e-06, + "loss": 0.0018, + "step": 29800 + }, + { + "epoch": 15.03, + "grad_norm": 0.9784811735153198, + "learning_rate": 7.053869346733669e-06, + "loss": 0.0017, + "step": 29825 + }, + { + "epoch": 15.04, + "grad_norm": 0.5142715573310852, + "learning_rate": 7.051356783919599e-06, + "loss": 0.0016, + "step": 29850 + }, + { + "epoch": 15.05, + "grad_norm": 0.4503660202026367, + "learning_rate": 7.048844221105529e-06, + "loss": 0.0016, + "step": 29875 + }, + { + "epoch": 15.06, + "grad_norm": 1.5839601755142212, + "learning_rate": 7.046331658291457e-06, + "loss": 0.0017, + "step": 29900 + }, + { + "epoch": 15.08, + "grad_norm": 1.3505232334136963, + "learning_rate": 7.0438190954773875e-06, + "loss": 0.0017, + "step": 29925 + }, + { + "epoch": 15.09, + "grad_norm": 0.8439552783966064, + "learning_rate": 7.041306532663317e-06, + "loss": 0.0021, + "step": 29950 + }, + { + "epoch": 15.1, + "grad_norm": 0.5884461402893066, + "learning_rate": 7.038793969849247e-06, + "loss": 0.0019, + "step": 29975 + }, + { + "epoch": 15.11, + "grad_norm": 1.5458250045776367, + "learning_rate": 7.036281407035176e-06, + "loss": 0.0016, + "step": 30000 + }, + { + "epoch": 15.11, + "eval_loss": 0.3082928955554962, + "eval_runtime": 644.0315, + "eval_samples_per_second": 2.188, + "eval_steps_per_second": 2.188, + "eval_wer": 23.12694569353165, + "step": 30000 + }, + { + "epoch": 15.13, + "grad_norm": 0.5485078692436218, + "learning_rate": 7.0337688442211065e-06, + "loss": 0.002, + "step": 30025 + }, + { + "epoch": 15.14, + "grad_norm": 0.7897219657897949, + "learning_rate": 7.031256281407036e-06, + "loss": 0.002, + "step": 30050 + }, + { + "epoch": 15.15, + "grad_norm": 1.791757345199585, + "learning_rate": 7.028743718592965e-06, + "loss": 0.002, + "step": 30075 + }, + { + "epoch": 15.16, + "grad_norm": 1.5494959354400635, + "learning_rate": 7.026231155778895e-06, + "loss": 0.0017, + "step": 30100 + }, + { + "epoch": 15.18, + "grad_norm": 1.4792935848236084, + "learning_rate": 7.023718592964825e-06, + "loss": 0.0021, + "step": 30125 + }, + { + "epoch": 15.19, + "grad_norm": 0.9007993340492249, + "learning_rate": 7.021306532663317e-06, + "loss": 0.0014, + "step": 30150 + }, + { + "epoch": 15.2, + "grad_norm": 1.4334176778793335, + "learning_rate": 7.018793969849247e-06, + "loss": 0.0022, + "step": 30175 + }, + { + "epoch": 15.21, + "grad_norm": 0.3024737238883972, + "learning_rate": 7.016281407035176e-06, + "loss": 0.0018, + "step": 30200 + }, + { + "epoch": 15.23, + "grad_norm": 0.6104531288146973, + "learning_rate": 7.013768844221106e-06, + "loss": 0.0018, + "step": 30225 + }, + { + "epoch": 15.24, + "grad_norm": 0.5760412216186523, + "learning_rate": 7.011256281407036e-06, + "loss": 0.0022, + "step": 30250 + }, + { + "epoch": 15.25, + "grad_norm": 1.2721421718597412, + "learning_rate": 7.008743718592965e-06, + "loss": 0.0023, + "step": 30275 + }, + { + "epoch": 15.26, + "grad_norm": 1.3227627277374268, + "learning_rate": 7.0062311557788955e-06, + "loss": 0.002, + "step": 30300 + }, + { + "epoch": 15.28, + "grad_norm": 0.7991645932197571, + "learning_rate": 7.003718592964824e-06, + "loss": 0.0022, + "step": 30325 + }, + { + "epoch": 15.29, + "grad_norm": 0.947595477104187, + "learning_rate": 7.001206030150754e-06, + "loss": 0.0019, + "step": 30350 + }, + { + "epoch": 15.3, + "grad_norm": 0.308912456035614, + "learning_rate": 6.9986934673366834e-06, + "loss": 0.0022, + "step": 30375 + }, + { + "epoch": 15.31, + "grad_norm": 0.7527008056640625, + "learning_rate": 6.996180904522614e-06, + "loss": 0.0021, + "step": 30400 + }, + { + "epoch": 15.33, + "grad_norm": 1.4596527814865112, + "learning_rate": 6.993668341708544e-06, + "loss": 0.0022, + "step": 30425 + }, + { + "epoch": 15.34, + "grad_norm": 0.6579309701919556, + "learning_rate": 6.991155778894473e-06, + "loss": 0.0022, + "step": 30450 + }, + { + "epoch": 15.35, + "grad_norm": 1.1227294206619263, + "learning_rate": 6.988643216080403e-06, + "loss": 0.0025, + "step": 30475 + }, + { + "epoch": 15.37, + "grad_norm": 1.243522047996521, + "learning_rate": 6.986130653266332e-06, + "loss": 0.0024, + "step": 30500 + }, + { + "epoch": 15.38, + "grad_norm": 1.4679538011550903, + "learning_rate": 6.983618090452262e-06, + "loss": 0.0025, + "step": 30525 + }, + { + "epoch": 15.39, + "grad_norm": 0.4298401176929474, + "learning_rate": 6.981105527638191e-06, + "loss": 0.0025, + "step": 30550 + }, + { + "epoch": 15.4, + "grad_norm": 1.8829938173294067, + "learning_rate": 6.978592964824121e-06, + "loss": 0.0029, + "step": 30575 + }, + { + "epoch": 15.42, + "grad_norm": 0.4301297068595886, + "learning_rate": 6.976080402010051e-06, + "loss": 0.0022, + "step": 30600 + }, + { + "epoch": 15.43, + "grad_norm": 0.4856531322002411, + "learning_rate": 6.97356783919598e-06, + "loss": 0.0022, + "step": 30625 + }, + { + "epoch": 15.44, + "grad_norm": 1.4196797609329224, + "learning_rate": 6.97105527638191e-06, + "loss": 0.0024, + "step": 30650 + }, + { + "epoch": 15.45, + "grad_norm": 0.9456383585929871, + "learning_rate": 6.9685427135678396e-06, + "loss": 0.0026, + "step": 30675 + }, + { + "epoch": 15.47, + "grad_norm": 0.5812883973121643, + "learning_rate": 6.96603015075377e-06, + "loss": 0.0023, + "step": 30700 + }, + { + "epoch": 15.48, + "grad_norm": 1.5798356533050537, + "learning_rate": 6.963517587939699e-06, + "loss": 0.0026, + "step": 30725 + }, + { + "epoch": 15.49, + "grad_norm": 1.7559192180633545, + "learning_rate": 6.961005025125629e-06, + "loss": 0.0023, + "step": 30750 + }, + { + "epoch": 15.5, + "grad_norm": 0.9900276064872742, + "learning_rate": 6.958492462311558e-06, + "loss": 0.0023, + "step": 30775 + }, + { + "epoch": 15.52, + "grad_norm": 1.1745249032974243, + "learning_rate": 6.955979899497488e-06, + "loss": 0.0028, + "step": 30800 + }, + { + "epoch": 15.53, + "grad_norm": 1.3300117254257202, + "learning_rate": 6.953467336683417e-06, + "loss": 0.0021, + "step": 30825 + }, + { + "epoch": 15.54, + "grad_norm": 1.642714023590088, + "learning_rate": 6.950954773869347e-06, + "loss": 0.0022, + "step": 30850 + }, + { + "epoch": 15.55, + "grad_norm": 1.159639835357666, + "learning_rate": 6.9484422110552775e-06, + "loss": 0.0024, + "step": 30875 + }, + { + "epoch": 15.57, + "grad_norm": 1.291977882385254, + "learning_rate": 6.945929648241206e-06, + "loss": 0.0021, + "step": 30900 + }, + { + "epoch": 15.58, + "grad_norm": 2.5278995037078857, + "learning_rate": 6.943417085427136e-06, + "loss": 0.0027, + "step": 30925 + }, + { + "epoch": 15.59, + "grad_norm": 1.345812439918518, + "learning_rate": 6.9409045226130655e-06, + "loss": 0.0023, + "step": 30950 + }, + { + "epoch": 15.6, + "grad_norm": 0.608741044998169, + "learning_rate": 6.938391959798996e-06, + "loss": 0.0027, + "step": 30975 + }, + { + "epoch": 15.62, + "grad_norm": 1.752109169960022, + "learning_rate": 6.935879396984925e-06, + "loss": 0.0025, + "step": 31000 + }, + { + "epoch": 15.62, + "eval_loss": 0.31733256578445435, + "eval_runtime": 645.4398, + "eval_samples_per_second": 2.183, + "eval_steps_per_second": 2.183, + "eval_wer": 23.590453130404704, + "step": 31000 + }, + { + "epoch": 15.63, + "grad_norm": 0.8238040804862976, + "learning_rate": 6.933366834170855e-06, + "loss": 0.0022, + "step": 31025 + }, + { + "epoch": 15.64, + "grad_norm": 0.9175546169281006, + "learning_rate": 6.930854271356785e-06, + "loss": 0.0023, + "step": 31050 + }, + { + "epoch": 15.65, + "grad_norm": 1.2227699756622314, + "learning_rate": 6.928341708542714e-06, + "loss": 0.0022, + "step": 31075 + }, + { + "epoch": 15.67, + "grad_norm": 1.981120228767395, + "learning_rate": 6.925829145728644e-06, + "loss": 0.0022, + "step": 31100 + }, + { + "epoch": 15.68, + "grad_norm": 1.4415370225906372, + "learning_rate": 6.923316582914573e-06, + "loss": 0.0025, + "step": 31125 + }, + { + "epoch": 15.69, + "grad_norm": 1.6438077688217163, + "learning_rate": 6.9208040201005035e-06, + "loss": 0.0024, + "step": 31150 + }, + { + "epoch": 15.71, + "grad_norm": 2.1774020195007324, + "learning_rate": 6.918291457286432e-06, + "loss": 0.0024, + "step": 31175 + }, + { + "epoch": 15.72, + "grad_norm": 0.5905573964118958, + "learning_rate": 6.915778894472362e-06, + "loss": 0.0025, + "step": 31200 + }, + { + "epoch": 15.73, + "grad_norm": 1.7300103902816772, + "learning_rate": 6.9132663316582915e-06, + "loss": 0.0026, + "step": 31225 + }, + { + "epoch": 15.74, + "grad_norm": 1.1542717218399048, + "learning_rate": 6.910753768844222e-06, + "loss": 0.0023, + "step": 31250 + }, + { + "epoch": 15.76, + "grad_norm": 1.279700756072998, + "learning_rate": 6.908241206030152e-06, + "loss": 0.0024, + "step": 31275 + }, + { + "epoch": 15.77, + "grad_norm": 0.8788714408874512, + "learning_rate": 6.905728643216081e-06, + "loss": 0.0026, + "step": 31300 + }, + { + "epoch": 15.78, + "grad_norm": 0.8979476094245911, + "learning_rate": 6.903216080402011e-06, + "loss": 0.0023, + "step": 31325 + }, + { + "epoch": 15.79, + "grad_norm": 0.406665176153183, + "learning_rate": 6.90070351758794e-06, + "loss": 0.0021, + "step": 31350 + }, + { + "epoch": 15.81, + "grad_norm": 0.7604673504829407, + "learning_rate": 6.89819095477387e-06, + "loss": 0.0023, + "step": 31375 + }, + { + "epoch": 15.82, + "grad_norm": 1.2543871402740479, + "learning_rate": 6.895678391959799e-06, + "loss": 0.0026, + "step": 31400 + }, + { + "epoch": 15.83, + "grad_norm": 0.8910918235778809, + "learning_rate": 6.8931658291457294e-06, + "loss": 0.002, + "step": 31425 + }, + { + "epoch": 15.84, + "grad_norm": 0.4898677468299866, + "learning_rate": 6.890653266331658e-06, + "loss": 0.0025, + "step": 31450 + }, + { + "epoch": 15.86, + "grad_norm": 1.3053447008132935, + "learning_rate": 6.888140703517588e-06, + "loss": 0.003, + "step": 31475 + }, + { + "epoch": 15.87, + "grad_norm": 1.2385072708129883, + "learning_rate": 6.885628140703518e-06, + "loss": 0.0023, + "step": 31500 + }, + { + "epoch": 15.88, + "grad_norm": 1.81439208984375, + "learning_rate": 6.8831155778894476e-06, + "loss": 0.0022, + "step": 31525 + }, + { + "epoch": 15.89, + "grad_norm": 2.093777894973755, + "learning_rate": 6.880603015075378e-06, + "loss": 0.0025, + "step": 31550 + }, + { + "epoch": 15.91, + "grad_norm": 0.8900623321533203, + "learning_rate": 6.878090452261307e-06, + "loss": 0.0027, + "step": 31575 + }, + { + "epoch": 15.92, + "grad_norm": 1.2843748331069946, + "learning_rate": 6.875577889447237e-06, + "loss": 0.0021, + "step": 31600 + }, + { + "epoch": 15.93, + "grad_norm": 0.9298327565193176, + "learning_rate": 6.873065326633166e-06, + "loss": 0.0019, + "step": 31625 + }, + { + "epoch": 15.94, + "grad_norm": 0.8842711448669434, + "learning_rate": 6.870552763819096e-06, + "loss": 0.0021, + "step": 31650 + }, + { + "epoch": 15.96, + "grad_norm": 0.9891393184661865, + "learning_rate": 6.868040201005026e-06, + "loss": 0.0024, + "step": 31675 + }, + { + "epoch": 15.97, + "grad_norm": 1.2643588781356812, + "learning_rate": 6.865527638190955e-06, + "loss": 0.0023, + "step": 31700 + }, + { + "epoch": 15.98, + "grad_norm": 0.7340218424797058, + "learning_rate": 6.8630150753768855e-06, + "loss": 0.0024, + "step": 31725 + }, + { + "epoch": 15.99, + "grad_norm": 1.090314507484436, + "learning_rate": 6.860502512562814e-06, + "loss": 0.0025, + "step": 31750 + }, + { + "epoch": 16.01, + "grad_norm": 2.1509249210357666, + "learning_rate": 6.857989949748744e-06, + "loss": 0.0023, + "step": 31775 + }, + { + "epoch": 16.02, + "grad_norm": 1.119979977607727, + "learning_rate": 6.8554773869346735e-06, + "loss": 0.0015, + "step": 31800 + }, + { + "epoch": 16.03, + "grad_norm": 0.9909570217132568, + "learning_rate": 6.852964824120604e-06, + "loss": 0.002, + "step": 31825 + }, + { + "epoch": 16.05, + "grad_norm": 1.8232905864715576, + "learning_rate": 6.850452261306533e-06, + "loss": 0.0016, + "step": 31850 + }, + { + "epoch": 16.06, + "grad_norm": 1.7875540256500244, + "learning_rate": 6.847939698492463e-06, + "loss": 0.0015, + "step": 31875 + }, + { + "epoch": 16.07, + "grad_norm": 1.3473516702651978, + "learning_rate": 6.845427135678393e-06, + "loss": 0.0019, + "step": 31900 + }, + { + "epoch": 16.08, + "grad_norm": 0.521841824054718, + "learning_rate": 6.842914572864322e-06, + "loss": 0.0017, + "step": 31925 + }, + { + "epoch": 16.1, + "grad_norm": 1.0813759565353394, + "learning_rate": 6.840402010050252e-06, + "loss": 0.0016, + "step": 31950 + }, + { + "epoch": 16.11, + "grad_norm": 0.8133834004402161, + "learning_rate": 6.837889447236181e-06, + "loss": 0.0013, + "step": 31975 + }, + { + "epoch": 16.12, + "grad_norm": 0.6576656103134155, + "learning_rate": 6.8353768844221115e-06, + "loss": 0.0016, + "step": 32000 + }, + { + "epoch": 16.12, + "eval_loss": 0.3177714943885803, + "eval_runtime": 643.1803, + "eval_samples_per_second": 2.191, + "eval_steps_per_second": 2.191, + "eval_wer": 23.95710826703563, + "step": 32000 + }, + { + "epoch": 16.13, + "grad_norm": 1.695088267326355, + "learning_rate": 6.83286432160804e-06, + "loss": 0.0019, + "step": 32025 + }, + { + "epoch": 16.15, + "grad_norm": 0.49403443932533264, + "learning_rate": 6.83035175879397e-06, + "loss": 0.0017, + "step": 32050 + }, + { + "epoch": 16.16, + "grad_norm": 1.2288966178894043, + "learning_rate": 6.8278391959798995e-06, + "loss": 0.0018, + "step": 32075 + }, + { + "epoch": 16.17, + "grad_norm": 1.711982011795044, + "learning_rate": 6.82532663316583e-06, + "loss": 0.0018, + "step": 32100 + }, + { + "epoch": 16.18, + "grad_norm": 0.9123796820640564, + "learning_rate": 6.82281407035176e-06, + "loss": 0.0022, + "step": 32125 + }, + { + "epoch": 16.2, + "grad_norm": 0.8187395930290222, + "learning_rate": 6.820301507537689e-06, + "loss": 0.0018, + "step": 32150 + }, + { + "epoch": 16.21, + "grad_norm": 0.676909327507019, + "learning_rate": 6.817788944723619e-06, + "loss": 0.0019, + "step": 32175 + }, + { + "epoch": 16.22, + "grad_norm": 0.28308579325675964, + "learning_rate": 6.815276381909548e-06, + "loss": 0.0017, + "step": 32200 + }, + { + "epoch": 16.23, + "grad_norm": 0.8627307415008545, + "learning_rate": 6.812763819095478e-06, + "loss": 0.0017, + "step": 32225 + }, + { + "epoch": 16.25, + "grad_norm": 0.5035674571990967, + "learning_rate": 6.810251256281407e-06, + "loss": 0.0021, + "step": 32250 + }, + { + "epoch": 16.26, + "grad_norm": 0.611066997051239, + "learning_rate": 6.8077386934673374e-06, + "loss": 0.0018, + "step": 32275 + }, + { + "epoch": 16.27, + "grad_norm": 1.7766281366348267, + "learning_rate": 6.805226130653268e-06, + "loss": 0.002, + "step": 32300 + }, + { + "epoch": 16.28, + "grad_norm": 0.9835132956504822, + "learning_rate": 6.802713567839196e-06, + "loss": 0.0018, + "step": 32325 + }, + { + "epoch": 16.3, + "grad_norm": 1.363574504852295, + "learning_rate": 6.800201005025126e-06, + "loss": 0.002, + "step": 32350 + }, + { + "epoch": 16.31, + "grad_norm": 1.0854887962341309, + "learning_rate": 6.7976884422110556e-06, + "loss": 0.0019, + "step": 32375 + }, + { + "epoch": 16.32, + "grad_norm": 2.8377525806427, + "learning_rate": 6.795175879396986e-06, + "loss": 0.0023, + "step": 32400 + }, + { + "epoch": 16.34, + "grad_norm": 2.0450568199157715, + "learning_rate": 6.792663316582915e-06, + "loss": 0.0019, + "step": 32425 + }, + { + "epoch": 16.35, + "grad_norm": 1.6299299001693726, + "learning_rate": 6.790150753768845e-06, + "loss": 0.002, + "step": 32450 + }, + { + "epoch": 16.36, + "grad_norm": 1.7007014751434326, + "learning_rate": 6.787638190954774e-06, + "loss": 0.0022, + "step": 32475 + }, + { + "epoch": 16.37, + "grad_norm": 1.5185723304748535, + "learning_rate": 6.785125628140704e-06, + "loss": 0.002, + "step": 32500 + }, + { + "epoch": 16.39, + "grad_norm": 1.15962815284729, + "learning_rate": 6.782613065326634e-06, + "loss": 0.002, + "step": 32525 + }, + { + "epoch": 16.4, + "grad_norm": 0.9685630798339844, + "learning_rate": 6.780100502512563e-06, + "loss": 0.0023, + "step": 32550 + }, + { + "epoch": 16.41, + "grad_norm": 0.7952429056167603, + "learning_rate": 6.7775879396984935e-06, + "loss": 0.0024, + "step": 32575 + }, + { + "epoch": 16.42, + "grad_norm": 1.4336612224578857, + "learning_rate": 6.775075376884422e-06, + "loss": 0.0019, + "step": 32600 + }, + { + "epoch": 16.44, + "grad_norm": 1.155975580215454, + "learning_rate": 6.772562814070352e-06, + "loss": 0.0023, + "step": 32625 + }, + { + "epoch": 16.45, + "grad_norm": 0.660798966884613, + "learning_rate": 6.7700502512562815e-06, + "loss": 0.0024, + "step": 32650 + }, + { + "epoch": 16.46, + "grad_norm": 1.6323788166046143, + "learning_rate": 6.767537688442212e-06, + "loss": 0.002, + "step": 32675 + }, + { + "epoch": 16.47, + "grad_norm": 0.47817263007164, + "learning_rate": 6.765125628140704e-06, + "loss": 0.0023, + "step": 32700 + }, + { + "epoch": 16.49, + "grad_norm": 1.6108895540237427, + "learning_rate": 6.762613065326634e-06, + "loss": 0.0024, + "step": 32725 + }, + { + "epoch": 16.5, + "grad_norm": 1.8570855855941772, + "learning_rate": 6.7601005025125636e-06, + "loss": 0.0025, + "step": 32750 + }, + { + "epoch": 16.51, + "grad_norm": 1.3422707319259644, + "learning_rate": 6.757587939698494e-06, + "loss": 0.002, + "step": 32775 + }, + { + "epoch": 16.52, + "grad_norm": 0.9396295547485352, + "learning_rate": 6.755075376884422e-06, + "loss": 0.0018, + "step": 32800 + }, + { + "epoch": 16.54, + "grad_norm": 0.6690593361854553, + "learning_rate": 6.752562814070352e-06, + "loss": 0.0024, + "step": 32825 + }, + { + "epoch": 16.55, + "grad_norm": 0.6064794659614563, + "learning_rate": 6.750050251256282e-06, + "loss": 0.0019, + "step": 32850 + }, + { + "epoch": 16.56, + "grad_norm": 0.6732133626937866, + "learning_rate": 6.747537688442212e-06, + "loss": 0.0019, + "step": 32875 + }, + { + "epoch": 16.57, + "grad_norm": 1.029380202293396, + "learning_rate": 6.74502512562814e-06, + "loss": 0.0023, + "step": 32900 + }, + { + "epoch": 16.59, + "grad_norm": 0.659989058971405, + "learning_rate": 6.7425125628140705e-06, + "loss": 0.0021, + "step": 32925 + }, + { + "epoch": 16.6, + "grad_norm": 0.6724833846092224, + "learning_rate": 6.740000000000001e-06, + "loss": 0.002, + "step": 32950 + }, + { + "epoch": 16.61, + "grad_norm": 1.073951005935669, + "learning_rate": 6.73748743718593e-06, + "loss": 0.0018, + "step": 32975 + }, + { + "epoch": 16.62, + "grad_norm": 0.2644835412502289, + "learning_rate": 6.73497487437186e-06, + "loss": 0.0015, + "step": 33000 + }, + { + "epoch": 16.62, + "eval_loss": 0.31893399357795715, + "eval_runtime": 638.1335, + "eval_samples_per_second": 2.208, + "eval_steps_per_second": 2.208, + "eval_wer": 23.625043237634035, + "step": 33000 + }, + { + "epoch": 16.64, + "grad_norm": 0.4319547712802887, + "learning_rate": 6.7324623115577895e-06, + "loss": 0.0021, + "step": 33025 + }, + { + "epoch": 16.65, + "grad_norm": 0.5117477178573608, + "learning_rate": 6.72994974874372e-06, + "loss": 0.0017, + "step": 33050 + }, + { + "epoch": 16.66, + "grad_norm": 0.599642276763916, + "learning_rate": 6.727437185929648e-06, + "loss": 0.0022, + "step": 33075 + }, + { + "epoch": 16.68, + "grad_norm": 2.386610984802246, + "learning_rate": 6.724924623115578e-06, + "loss": 0.0024, + "step": 33100 + }, + { + "epoch": 16.69, + "grad_norm": 0.7631763219833374, + "learning_rate": 6.7224120603015085e-06, + "loss": 0.0019, + "step": 33125 + }, + { + "epoch": 16.7, + "grad_norm": 1.534925937652588, + "learning_rate": 6.719899497487438e-06, + "loss": 0.0022, + "step": 33150 + }, + { + "epoch": 16.71, + "grad_norm": 1.815709114074707, + "learning_rate": 6.717386934673368e-06, + "loss": 0.0022, + "step": 33175 + }, + { + "epoch": 16.73, + "grad_norm": 0.47216132283210754, + "learning_rate": 6.7148743718592965e-06, + "loss": 0.002, + "step": 33200 + }, + { + "epoch": 16.74, + "grad_norm": 1.2903132438659668, + "learning_rate": 6.712361809045227e-06, + "loss": 0.0024, + "step": 33225 + }, + { + "epoch": 16.75, + "grad_norm": 1.4618940353393555, + "learning_rate": 6.709849246231156e-06, + "loss": 0.0021, + "step": 33250 + }, + { + "epoch": 16.76, + "grad_norm": 2.112274169921875, + "learning_rate": 6.707336683417086e-06, + "loss": 0.0021, + "step": 33275 + }, + { + "epoch": 16.78, + "grad_norm": 1.849009394645691, + "learning_rate": 6.7048241206030155e-06, + "loss": 0.0023, + "step": 33300 + }, + { + "epoch": 16.79, + "grad_norm": 1.0181339979171753, + "learning_rate": 6.702311557788946e-06, + "loss": 0.002, + "step": 33325 + }, + { + "epoch": 16.8, + "grad_norm": 1.6199973821640015, + "learning_rate": 6.699798994974876e-06, + "loss": 0.0024, + "step": 33350 + }, + { + "epoch": 16.81, + "grad_norm": 0.8824648261070251, + "learning_rate": 6.697286432160804e-06, + "loss": 0.0021, + "step": 33375 + }, + { + "epoch": 16.83, + "grad_norm": 1.2682048082351685, + "learning_rate": 6.6947738693467344e-06, + "loss": 0.0021, + "step": 33400 + }, + { + "epoch": 16.84, + "grad_norm": 0.9669592976570129, + "learning_rate": 6.692261306532664e-06, + "loss": 0.0019, + "step": 33425 + }, + { + "epoch": 16.85, + "grad_norm": 2.088453769683838, + "learning_rate": 6.689748743718594e-06, + "loss": 0.0018, + "step": 33450 + }, + { + "epoch": 16.86, + "grad_norm": 1.75133216381073, + "learning_rate": 6.687236180904522e-06, + "loss": 0.0023, + "step": 33475 + }, + { + "epoch": 16.88, + "grad_norm": 2.224334239959717, + "learning_rate": 6.684723618090453e-06, + "loss": 0.0019, + "step": 33500 + }, + { + "epoch": 16.89, + "grad_norm": 0.6259239315986633, + "learning_rate": 6.682211055276382e-06, + "loss": 0.0026, + "step": 33525 + }, + { + "epoch": 16.9, + "grad_norm": 1.3590632677078247, + "learning_rate": 6.679698492462312e-06, + "loss": 0.0022, + "step": 33550 + }, + { + "epoch": 16.91, + "grad_norm": 1.8630064725875854, + "learning_rate": 6.677185929648242e-06, + "loss": 0.0025, + "step": 33575 + }, + { + "epoch": 16.93, + "grad_norm": 1.7753084897994995, + "learning_rate": 6.6746733668341716e-06, + "loss": 0.0022, + "step": 33600 + }, + { + "epoch": 16.94, + "grad_norm": 0.30768975615501404, + "learning_rate": 6.672160804020102e-06, + "loss": 0.0021, + "step": 33625 + }, + { + "epoch": 16.95, + "grad_norm": 1.4861748218536377, + "learning_rate": 6.66964824120603e-06, + "loss": 0.002, + "step": 33650 + }, + { + "epoch": 16.96, + "grad_norm": 1.5577760934829712, + "learning_rate": 6.66713567839196e-06, + "loss": 0.002, + "step": 33675 + }, + { + "epoch": 16.98, + "grad_norm": 0.8203927278518677, + "learning_rate": 6.66462311557789e-06, + "loss": 0.0019, + "step": 33700 + }, + { + "epoch": 16.99, + "grad_norm": 1.1603564023971558, + "learning_rate": 6.66211055276382e-06, + "loss": 0.0021, + "step": 33725 + }, + { + "epoch": 17.0, + "grad_norm": 0.41753071546554565, + "learning_rate": 6.65959798994975e-06, + "loss": 0.0019, + "step": 33750 + }, + { + "epoch": 17.02, + "grad_norm": 0.49397504329681396, + "learning_rate": 6.6570854271356785e-06, + "loss": 0.0016, + "step": 33775 + }, + { + "epoch": 17.03, + "grad_norm": 1.637376070022583, + "learning_rate": 6.654572864321609e-06, + "loss": 0.0017, + "step": 33800 + }, + { + "epoch": 17.04, + "grad_norm": 0.45649200677871704, + "learning_rate": 6.652060301507538e-06, + "loss": 0.0017, + "step": 33825 + }, + { + "epoch": 17.05, + "grad_norm": 1.1269056797027588, + "learning_rate": 6.649547738693468e-06, + "loss": 0.0021, + "step": 33850 + }, + { + "epoch": 17.07, + "grad_norm": 1.279366374015808, + "learning_rate": 6.6470351758793975e-06, + "loss": 0.0016, + "step": 33875 + }, + { + "epoch": 17.08, + "grad_norm": 1.3553489446640015, + "learning_rate": 6.644522613065328e-06, + "loss": 0.0014, + "step": 33900 + }, + { + "epoch": 17.09, + "grad_norm": 0.3694764971733093, + "learning_rate": 6.642010050251256e-06, + "loss": 0.0015, + "step": 33925 + }, + { + "epoch": 17.1, + "grad_norm": 0.7455251812934875, + "learning_rate": 6.639497487437186e-06, + "loss": 0.0015, + "step": 33950 + }, + { + "epoch": 17.12, + "grad_norm": 1.4663114547729492, + "learning_rate": 6.6369849246231165e-06, + "loss": 0.0014, + "step": 33975 + }, + { + "epoch": 17.13, + "grad_norm": 0.41065290570259094, + "learning_rate": 6.634472361809046e-06, + "loss": 0.0012, + "step": 34000 + }, + { + "epoch": 17.13, + "eval_loss": 0.31717780232429504, + "eval_runtime": 640.7627, + "eval_samples_per_second": 2.199, + "eval_steps_per_second": 2.199, + "eval_wer": 22.94707713593912, + "step": 34000 + }, + { + "epoch": 17.14, + "grad_norm": 0.921389102935791, + "learning_rate": 6.631959798994976e-06, + "loss": 0.0013, + "step": 34025 + }, + { + "epoch": 17.15, + "grad_norm": 1.2974750995635986, + "learning_rate": 6.6294472361809045e-06, + "loss": 0.0015, + "step": 34050 + }, + { + "epoch": 17.17, + "grad_norm": 0.946368932723999, + "learning_rate": 6.626934673366835e-06, + "loss": 0.0015, + "step": 34075 + }, + { + "epoch": 17.18, + "grad_norm": 1.1156178712844849, + "learning_rate": 6.624422110552764e-06, + "loss": 0.0015, + "step": 34100 + }, + { + "epoch": 17.19, + "grad_norm": 0.447689026594162, + "learning_rate": 6.621909547738694e-06, + "loss": 0.0016, + "step": 34125 + }, + { + "epoch": 17.2, + "grad_norm": 0.7558609247207642, + "learning_rate": 6.6193969849246235e-06, + "loss": 0.0016, + "step": 34150 + }, + { + "epoch": 17.22, + "grad_norm": 0.8499734997749329, + "learning_rate": 6.616884422110554e-06, + "loss": 0.0017, + "step": 34175 + }, + { + "epoch": 17.23, + "grad_norm": 0.40783509612083435, + "learning_rate": 6.614371859296484e-06, + "loss": 0.002, + "step": 34200 + }, + { + "epoch": 17.24, + "grad_norm": 1.5999126434326172, + "learning_rate": 6.611859296482412e-06, + "loss": 0.002, + "step": 34225 + }, + { + "epoch": 17.25, + "grad_norm": 0.852052628993988, + "learning_rate": 6.6093467336683424e-06, + "loss": 0.0019, + "step": 34250 + }, + { + "epoch": 17.27, + "grad_norm": 0.36311525106430054, + "learning_rate": 6.606834170854272e-06, + "loss": 0.0017, + "step": 34275 + }, + { + "epoch": 17.28, + "grad_norm": 2.138871669769287, + "learning_rate": 6.604321608040202e-06, + "loss": 0.0016, + "step": 34300 + }, + { + "epoch": 17.29, + "grad_norm": 1.056746482849121, + "learning_rate": 6.60180904522613e-06, + "loss": 0.0015, + "step": 34325 + }, + { + "epoch": 17.3, + "grad_norm": 0.8301752805709839, + "learning_rate": 6.599296482412061e-06, + "loss": 0.002, + "step": 34350 + }, + { + "epoch": 17.32, + "grad_norm": 1.781783938407898, + "learning_rate": 6.596783919597991e-06, + "loss": 0.0017, + "step": 34375 + }, + { + "epoch": 17.33, + "grad_norm": 1.2563107013702393, + "learning_rate": 6.59427135678392e-06, + "loss": 0.0017, + "step": 34400 + }, + { + "epoch": 17.34, + "grad_norm": 2.3324105739593506, + "learning_rate": 6.59175879396985e-06, + "loss": 0.002, + "step": 34425 + }, + { + "epoch": 17.36, + "grad_norm": 1.0351413488388062, + "learning_rate": 6.5892462311557796e-06, + "loss": 0.002, + "step": 34450 + }, + { + "epoch": 17.37, + "grad_norm": 1.9729125499725342, + "learning_rate": 6.58673366834171e-06, + "loss": 0.0019, + "step": 34475 + }, + { + "epoch": 17.38, + "grad_norm": 0.7360727787017822, + "learning_rate": 6.584221105527638e-06, + "loss": 0.0016, + "step": 34500 + }, + { + "epoch": 17.39, + "grad_norm": 1.5218764543533325, + "learning_rate": 6.581708542713568e-06, + "loss": 0.0017, + "step": 34525 + }, + { + "epoch": 17.41, + "grad_norm": 0.9143256545066833, + "learning_rate": 6.579195979899498e-06, + "loss": 0.002, + "step": 34550 + }, + { + "epoch": 17.42, + "grad_norm": 1.5911108255386353, + "learning_rate": 6.576683417085428e-06, + "loss": 0.0021, + "step": 34575 + }, + { + "epoch": 17.43, + "grad_norm": 1.1945171356201172, + "learning_rate": 6.574170854271358e-06, + "loss": 0.0021, + "step": 34600 + }, + { + "epoch": 17.44, + "grad_norm": 0.6065207719802856, + "learning_rate": 6.5716582914572865e-06, + "loss": 0.0015, + "step": 34625 + }, + { + "epoch": 17.46, + "grad_norm": 1.3287162780761719, + "learning_rate": 6.569145728643217e-06, + "loss": 0.0015, + "step": 34650 + }, + { + "epoch": 17.47, + "grad_norm": 1.1048755645751953, + "learning_rate": 6.566633165829146e-06, + "loss": 0.0016, + "step": 34675 + }, + { + "epoch": 17.48, + "grad_norm": 1.8290241956710815, + "learning_rate": 6.564120603015076e-06, + "loss": 0.0017, + "step": 34700 + }, + { + "epoch": 17.49, + "grad_norm": 0.7153010964393616, + "learning_rate": 6.5616080402010055e-06, + "loss": 0.0017, + "step": 34725 + }, + { + "epoch": 17.51, + "grad_norm": 1.1753082275390625, + "learning_rate": 6.559095477386936e-06, + "loss": 0.0019, + "step": 34750 + }, + { + "epoch": 17.52, + "grad_norm": 0.8656060695648193, + "learning_rate": 6.556582914572864e-06, + "loss": 0.0018, + "step": 34775 + }, + { + "epoch": 17.53, + "grad_norm": 0.9170093536376953, + "learning_rate": 6.554070351758794e-06, + "loss": 0.0017, + "step": 34800 + }, + { + "epoch": 17.54, + "grad_norm": 0.8003792762756348, + "learning_rate": 6.5515577889447245e-06, + "loss": 0.0016, + "step": 34825 + }, + { + "epoch": 17.56, + "grad_norm": 0.9868853092193604, + "learning_rate": 6.549145728643217e-06, + "loss": 0.0015, + "step": 34850 + }, + { + "epoch": 17.57, + "grad_norm": 1.0430176258087158, + "learning_rate": 6.546633165829146e-06, + "loss": 0.0018, + "step": 34875 + }, + { + "epoch": 17.58, + "grad_norm": 0.36971691250801086, + "learning_rate": 6.544120603015076e-06, + "loss": 0.0018, + "step": 34900 + }, + { + "epoch": 17.59, + "grad_norm": 0.87406325340271, + "learning_rate": 6.541608040201005e-06, + "loss": 0.002, + "step": 34925 + }, + { + "epoch": 17.61, + "grad_norm": 2.4083333015441895, + "learning_rate": 6.539095477386935e-06, + "loss": 0.002, + "step": 34950 + }, + { + "epoch": 17.62, + "grad_norm": 0.49866533279418945, + "learning_rate": 6.536582914572864e-06, + "loss": 0.0021, + "step": 34975 + }, + { + "epoch": 17.63, + "grad_norm": 0.744525134563446, + "learning_rate": 6.5340703517587945e-06, + "loss": 0.0021, + "step": 35000 + }, + { + "epoch": 17.63, + "eval_loss": 0.3279932141304016, + "eval_runtime": 640.0538, + "eval_samples_per_second": 2.201, + "eval_steps_per_second": 2.201, + "eval_wer": 23.251470079557247, + "step": 35000 + }, + { + "epoch": 17.64, + "grad_norm": 1.2228732109069824, + "learning_rate": 6.531557788944725e-06, + "loss": 0.0018, + "step": 35025 + }, + { + "epoch": 17.66, + "grad_norm": 2.64949631690979, + "learning_rate": 6.529045226130654e-06, + "loss": 0.0019, + "step": 35050 + }, + { + "epoch": 17.67, + "grad_norm": 0.8812341094017029, + "learning_rate": 6.526532663316583e-06, + "loss": 0.0016, + "step": 35075 + }, + { + "epoch": 17.68, + "grad_norm": 1.3396104574203491, + "learning_rate": 6.524020100502513e-06, + "loss": 0.0017, + "step": 35100 + }, + { + "epoch": 17.7, + "grad_norm": 0.6547167897224426, + "learning_rate": 6.521507537688443e-06, + "loss": 0.0019, + "step": 35125 + }, + { + "epoch": 17.71, + "grad_norm": 1.9075217247009277, + "learning_rate": 6.518994974874372e-06, + "loss": 0.0017, + "step": 35150 + }, + { + "epoch": 17.72, + "grad_norm": 1.7751950025558472, + "learning_rate": 6.516482412060302e-06, + "loss": 0.0021, + "step": 35175 + }, + { + "epoch": 17.73, + "grad_norm": 1.1410751342773438, + "learning_rate": 6.514070351758795e-06, + "loss": 0.0021, + "step": 35200 + }, + { + "epoch": 17.75, + "grad_norm": 0.8662394285202026, + "learning_rate": 6.511557788944725e-06, + "loss": 0.0018, + "step": 35225 + }, + { + "epoch": 17.76, + "grad_norm": 1.586671233177185, + "learning_rate": 6.509045226130653e-06, + "loss": 0.0025, + "step": 35250 + }, + { + "epoch": 17.77, + "grad_norm": 1.0892744064331055, + "learning_rate": 6.5065326633165835e-06, + "loss": 0.0024, + "step": 35275 + }, + { + "epoch": 17.78, + "grad_norm": 0.37060225009918213, + "learning_rate": 6.504020100502513e-06, + "loss": 0.0021, + "step": 35300 + }, + { + "epoch": 17.8, + "grad_norm": 0.7521613240242004, + "learning_rate": 6.501507537688443e-06, + "loss": 0.0021, + "step": 35325 + }, + { + "epoch": 17.81, + "grad_norm": 2.0023598670959473, + "learning_rate": 6.4989949748743715e-06, + "loss": 0.0017, + "step": 35350 + }, + { + "epoch": 17.82, + "grad_norm": 1.1638795137405396, + "learning_rate": 6.496482412060302e-06, + "loss": 0.0019, + "step": 35375 + }, + { + "epoch": 17.83, + "grad_norm": 1.654910922050476, + "learning_rate": 6.493969849246232e-06, + "loss": 0.002, + "step": 35400 + }, + { + "epoch": 17.85, + "grad_norm": 1.0789589881896973, + "learning_rate": 6.491457286432161e-06, + "loss": 0.0022, + "step": 35425 + }, + { + "epoch": 17.86, + "grad_norm": 1.5805654525756836, + "learning_rate": 6.488944723618091e-06, + "loss": 0.002, + "step": 35450 + }, + { + "epoch": 17.87, + "grad_norm": 1.393417239189148, + "learning_rate": 6.486432160804021e-06, + "loss": 0.0019, + "step": 35475 + }, + { + "epoch": 17.88, + "grad_norm": 1.4259059429168701, + "learning_rate": 6.483919597989951e-06, + "loss": 0.0018, + "step": 35500 + }, + { + "epoch": 17.9, + "grad_norm": 1.371140956878662, + "learning_rate": 6.481407035175879e-06, + "loss": 0.0019, + "step": 35525 + }, + { + "epoch": 17.91, + "grad_norm": 1.099618673324585, + "learning_rate": 6.4788944723618095e-06, + "loss": 0.0019, + "step": 35550 + }, + { + "epoch": 17.92, + "grad_norm": 0.654387354850769, + "learning_rate": 6.476381909547739e-06, + "loss": 0.0019, + "step": 35575 + }, + { + "epoch": 17.93, + "grad_norm": 1.3511158227920532, + "learning_rate": 6.473869346733669e-06, + "loss": 0.0017, + "step": 35600 + }, + { + "epoch": 17.95, + "grad_norm": 1.1146668195724487, + "learning_rate": 6.471356783919599e-06, + "loss": 0.0019, + "step": 35625 + }, + { + "epoch": 17.96, + "grad_norm": 1.2822802066802979, + "learning_rate": 6.468844221105528e-06, + "loss": 0.0019, + "step": 35650 + }, + { + "epoch": 17.97, + "grad_norm": 0.6118746399879456, + "learning_rate": 6.466331658291458e-06, + "loss": 0.0021, + "step": 35675 + }, + { + "epoch": 17.98, + "grad_norm": 1.6765689849853516, + "learning_rate": 6.463819095477387e-06, + "loss": 0.002, + "step": 35700 + }, + { + "epoch": 18.0, + "grad_norm": 0.8929309844970703, + "learning_rate": 6.461306532663317e-06, + "loss": 0.002, + "step": 35725 + }, + { + "epoch": 18.01, + "grad_norm": 0.8236270546913147, + "learning_rate": 6.458793969849247e-06, + "loss": 0.0014, + "step": 35750 + }, + { + "epoch": 18.02, + "grad_norm": 0.3765973150730133, + "learning_rate": 6.456281407035177e-06, + "loss": 0.001, + "step": 35775 + }, + { + "epoch": 18.04, + "grad_norm": 0.7577652931213379, + "learning_rate": 6.453768844221107e-06, + "loss": 0.0009, + "step": 35800 + }, + { + "epoch": 18.05, + "grad_norm": 1.3990800380706787, + "learning_rate": 6.451256281407035e-06, + "loss": 0.0012, + "step": 35825 + }, + { + "epoch": 18.06, + "grad_norm": 0.6104835271835327, + "learning_rate": 6.448743718592966e-06, + "loss": 0.0017, + "step": 35850 + }, + { + "epoch": 18.07, + "grad_norm": 0.8785896301269531, + "learning_rate": 6.446231155778895e-06, + "loss": 0.0014, + "step": 35875 + }, + { + "epoch": 18.09, + "grad_norm": 0.3868306875228882, + "learning_rate": 6.443718592964825e-06, + "loss": 0.0014, + "step": 35900 + }, + { + "epoch": 18.1, + "grad_norm": 1.184727430343628, + "learning_rate": 6.4412060301507536e-06, + "loss": 0.0012, + "step": 35925 + }, + { + "epoch": 18.11, + "grad_norm": 1.4710326194763184, + "learning_rate": 6.438693467336684e-06, + "loss": 0.0015, + "step": 35950 + }, + { + "epoch": 18.12, + "grad_norm": 0.4654022753238678, + "learning_rate": 6.436180904522613e-06, + "loss": 0.0011, + "step": 35975 + }, + { + "epoch": 18.14, + "grad_norm": 0.30787691473960876, + "learning_rate": 6.433668341708543e-06, + "loss": 0.0017, + "step": 36000 + }, + { + "epoch": 18.14, + "eval_loss": 0.3324070870876312, + "eval_runtime": 779.64, + "eval_samples_per_second": 1.807, + "eval_steps_per_second": 1.807, + "eval_wer": 23.583535108958838, + "step": 36000 + }, + { + "epoch": 18.15, + "grad_norm": 2.8175275325775146, + "learning_rate": 6.431155778894473e-06, + "loss": 0.0015, + "step": 36025 + }, + { + "epoch": 18.16, + "grad_norm": 1.644282579421997, + "learning_rate": 6.428643216080403e-06, + "loss": 0.0017, + "step": 36050 + }, + { + "epoch": 18.17, + "grad_norm": 1.2739876508712769, + "learning_rate": 6.426130653266333e-06, + "loss": 0.0016, + "step": 36075 + }, + { + "epoch": 18.19, + "grad_norm": 1.1111208200454712, + "learning_rate": 6.423618090452261e-06, + "loss": 0.0011, + "step": 36100 + }, + { + "epoch": 18.2, + "grad_norm": 0.3262185752391815, + "learning_rate": 6.4211055276381915e-06, + "loss": 0.0012, + "step": 36125 + }, + { + "epoch": 18.21, + "grad_norm": 1.090649962425232, + "learning_rate": 6.418592964824121e-06, + "loss": 0.0019, + "step": 36150 + }, + { + "epoch": 18.22, + "grad_norm": 0.6180118322372437, + "learning_rate": 6.416080402010051e-06, + "loss": 0.0016, + "step": 36175 + }, + { + "epoch": 18.24, + "grad_norm": 1.0317612886428833, + "learning_rate": 6.4135678391959795e-06, + "loss": 0.0019, + "step": 36200 + }, + { + "epoch": 18.25, + "grad_norm": 0.2697906494140625, + "learning_rate": 6.41105527638191e-06, + "loss": 0.0015, + "step": 36225 + }, + { + "epoch": 18.26, + "grad_norm": 1.2288458347320557, + "learning_rate": 6.40854271356784e-06, + "loss": 0.0016, + "step": 36250 + }, + { + "epoch": 18.27, + "grad_norm": 1.6531765460968018, + "learning_rate": 6.406030150753769e-06, + "loss": 0.0013, + "step": 36275 + }, + { + "epoch": 18.29, + "grad_norm": 0.38945141434669495, + "learning_rate": 6.403517587939699e-06, + "loss": 0.0014, + "step": 36300 + }, + { + "epoch": 18.3, + "grad_norm": 0.8794446587562561, + "learning_rate": 6.401005025125629e-06, + "loss": 0.0013, + "step": 36325 + }, + { + "epoch": 18.31, + "grad_norm": 0.6100822687149048, + "learning_rate": 6.398492462311559e-06, + "loss": 0.0016, + "step": 36350 + }, + { + "epoch": 18.32, + "grad_norm": 1.371356725692749, + "learning_rate": 6.395979899497487e-06, + "loss": 0.0017, + "step": 36375 + }, + { + "epoch": 18.34, + "grad_norm": 0.791754424571991, + "learning_rate": 6.3934673366834175e-06, + "loss": 0.0014, + "step": 36400 + }, + { + "epoch": 18.35, + "grad_norm": 1.5653916597366333, + "learning_rate": 6.390954773869347e-06, + "loss": 0.0017, + "step": 36425 + }, + { + "epoch": 18.36, + "grad_norm": 0.2556110620498657, + "learning_rate": 6.388442211055277e-06, + "loss": 0.0017, + "step": 36450 + }, + { + "epoch": 18.38, + "grad_norm": 0.7103545665740967, + "learning_rate": 6.385929648241207e-06, + "loss": 0.0016, + "step": 36475 + }, + { + "epoch": 18.39, + "grad_norm": 1.2815943956375122, + "learning_rate": 6.383417085427136e-06, + "loss": 0.0019, + "step": 36500 + }, + { + "epoch": 18.4, + "grad_norm": 0.8965465426445007, + "learning_rate": 6.380904522613066e-06, + "loss": 0.0019, + "step": 36525 + }, + { + "epoch": 18.41, + "grad_norm": 1.0410230159759521, + "learning_rate": 6.378391959798995e-06, + "loss": 0.0015, + "step": 36550 + }, + { + "epoch": 18.43, + "grad_norm": 1.2315019369125366, + "learning_rate": 6.375879396984925e-06, + "loss": 0.0015, + "step": 36575 + }, + { + "epoch": 18.44, + "grad_norm": 0.7894676327705383, + "learning_rate": 6.373366834170855e-06, + "loss": 0.0012, + "step": 36600 + }, + { + "epoch": 18.45, + "grad_norm": 1.8946139812469482, + "learning_rate": 6.370854271356785e-06, + "loss": 0.0016, + "step": 36625 + }, + { + "epoch": 18.46, + "grad_norm": 0.4600735306739807, + "learning_rate": 6.368341708542715e-06, + "loss": 0.002, + "step": 36650 + }, + { + "epoch": 18.48, + "grad_norm": 1.5674549341201782, + "learning_rate": 6.365829145728643e-06, + "loss": 0.0018, + "step": 36675 + }, + { + "epoch": 18.49, + "grad_norm": 0.8131008744239807, + "learning_rate": 6.363316582914574e-06, + "loss": 0.0018, + "step": 36700 + }, + { + "epoch": 18.5, + "grad_norm": 0.6329362392425537, + "learning_rate": 6.360804020100503e-06, + "loss": 0.0019, + "step": 36725 + }, + { + "epoch": 18.51, + "grad_norm": 0.8405249714851379, + "learning_rate": 6.358291457286433e-06, + "loss": 0.0017, + "step": 36750 + }, + { + "epoch": 18.53, + "grad_norm": 1.6821061372756958, + "learning_rate": 6.3557788944723616e-06, + "loss": 0.0014, + "step": 36775 + }, + { + "epoch": 18.54, + "grad_norm": 0.9368191361427307, + "learning_rate": 6.353266331658292e-06, + "loss": 0.0014, + "step": 36800 + }, + { + "epoch": 18.55, + "grad_norm": 1.3366914987564087, + "learning_rate": 6.350753768844221e-06, + "loss": 0.0014, + "step": 36825 + }, + { + "epoch": 18.56, + "grad_norm": 1.3093204498291016, + "learning_rate": 6.348241206030151e-06, + "loss": 0.0018, + "step": 36850 + }, + { + "epoch": 18.58, + "grad_norm": 1.0798649787902832, + "learning_rate": 6.345728643216081e-06, + "loss": 0.0016, + "step": 36875 + }, + { + "epoch": 18.59, + "grad_norm": 0.4596012234687805, + "learning_rate": 6.343216080402011e-06, + "loss": 0.0016, + "step": 36900 + }, + { + "epoch": 18.6, + "grad_norm": 1.961551547050476, + "learning_rate": 6.340703517587941e-06, + "loss": 0.0017, + "step": 36925 + }, + { + "epoch": 18.61, + "grad_norm": 1.54167640209198, + "learning_rate": 6.338190954773869e-06, + "loss": 0.0018, + "step": 36950 + }, + { + "epoch": 18.63, + "grad_norm": 1.0647872686386108, + "learning_rate": 6.3356783919597995e-06, + "loss": 0.0015, + "step": 36975 + }, + { + "epoch": 18.64, + "grad_norm": 0.15062101185321808, + "learning_rate": 6.333165829145729e-06, + "loss": 0.0013, + "step": 37000 + }, + { + "epoch": 18.64, + "eval_loss": 0.33555132150650024, + "eval_runtime": 644.609, + "eval_samples_per_second": 2.186, + "eval_steps_per_second": 2.186, + "eval_wer": 23.445174680041507, + "step": 37000 + }, + { + "epoch": 18.65, + "grad_norm": 0.6003009080886841, + "learning_rate": 6.330653266331659e-06, + "loss": 0.0015, + "step": 37025 + }, + { + "epoch": 18.66, + "grad_norm": 0.7250798344612122, + "learning_rate": 6.3281407035175875e-06, + "loss": 0.0016, + "step": 37050 + }, + { + "epoch": 18.68, + "grad_norm": 0.7910952568054199, + "learning_rate": 6.325628140703518e-06, + "loss": 0.0016, + "step": 37075 + }, + { + "epoch": 18.69, + "grad_norm": 1.4833486080169678, + "learning_rate": 6.323115577889448e-06, + "loss": 0.002, + "step": 37100 + }, + { + "epoch": 18.7, + "grad_norm": 0.8942164182662964, + "learning_rate": 6.320603015075377e-06, + "loss": 0.0018, + "step": 37125 + }, + { + "epoch": 18.72, + "grad_norm": 0.8438106179237366, + "learning_rate": 6.318090452261307e-06, + "loss": 0.002, + "step": 37150 + }, + { + "epoch": 18.73, + "grad_norm": 1.0023553371429443, + "learning_rate": 6.315577889447237e-06, + "loss": 0.0013, + "step": 37175 + }, + { + "epoch": 18.74, + "grad_norm": 0.8116686940193176, + "learning_rate": 6.313065326633167e-06, + "loss": 0.0015, + "step": 37200 + }, + { + "epoch": 18.75, + "grad_norm": 1.0903185606002808, + "learning_rate": 6.310552763819095e-06, + "loss": 0.0016, + "step": 37225 + }, + { + "epoch": 18.77, + "grad_norm": 1.1223067045211792, + "learning_rate": 6.3080402010050255e-06, + "loss": 0.0018, + "step": 37250 + }, + { + "epoch": 18.78, + "grad_norm": 1.5012390613555908, + "learning_rate": 6.305527638190956e-06, + "loss": 0.0018, + "step": 37275 + }, + { + "epoch": 18.79, + "grad_norm": 1.3460817337036133, + "learning_rate": 6.303015075376885e-06, + "loss": 0.0019, + "step": 37300 + }, + { + "epoch": 18.8, + "grad_norm": 1.7468082904815674, + "learning_rate": 6.300502512562815e-06, + "loss": 0.0019, + "step": 37325 + }, + { + "epoch": 18.82, + "grad_norm": 0.5250969529151917, + "learning_rate": 6.297989949748744e-06, + "loss": 0.0017, + "step": 37350 + }, + { + "epoch": 18.83, + "grad_norm": 0.2302069365978241, + "learning_rate": 6.295477386934674e-06, + "loss": 0.0013, + "step": 37375 + }, + { + "epoch": 18.84, + "grad_norm": 1.5310719013214111, + "learning_rate": 6.292964824120603e-06, + "loss": 0.0015, + "step": 37400 + }, + { + "epoch": 18.85, + "grad_norm": 1.8044565916061401, + "learning_rate": 6.290452261306533e-06, + "loss": 0.0014, + "step": 37425 + }, + { + "epoch": 18.87, + "grad_norm": 0.8181155920028687, + "learning_rate": 6.287939698492463e-06, + "loss": 0.0014, + "step": 37450 + }, + { + "epoch": 18.88, + "grad_norm": 0.8504043817520142, + "learning_rate": 6.285427135678393e-06, + "loss": 0.0018, + "step": 37475 + }, + { + "epoch": 18.89, + "grad_norm": 0.27127495408058167, + "learning_rate": 6.282914572864323e-06, + "loss": 0.0016, + "step": 37500 + }, + { + "epoch": 18.9, + "grad_norm": 1.2492486238479614, + "learning_rate": 6.280402010050251e-06, + "loss": 0.0015, + "step": 37525 + }, + { + "epoch": 18.92, + "grad_norm": 1.6787339448928833, + "learning_rate": 6.277889447236182e-06, + "loss": 0.0017, + "step": 37550 + }, + { + "epoch": 18.93, + "grad_norm": 2.014810800552368, + "learning_rate": 6.275376884422111e-06, + "loss": 0.002, + "step": 37575 + }, + { + "epoch": 18.94, + "grad_norm": 0.9987440705299377, + "learning_rate": 6.272864321608041e-06, + "loss": 0.002, + "step": 37600 + }, + { + "epoch": 18.95, + "grad_norm": 0.6803994178771973, + "learning_rate": 6.2703517587939696e-06, + "loss": 0.0021, + "step": 37625 + }, + { + "epoch": 18.97, + "grad_norm": 0.8169840574264526, + "learning_rate": 6.2678391959799e-06, + "loss": 0.0022, + "step": 37650 + }, + { + "epoch": 18.98, + "grad_norm": 1.3978486061096191, + "learning_rate": 6.265326633165829e-06, + "loss": 0.0018, + "step": 37675 + }, + { + "epoch": 18.99, + "grad_norm": 1.5592775344848633, + "learning_rate": 6.262814070351759e-06, + "loss": 0.0019, + "step": 37700 + }, + { + "epoch": 19.01, + "grad_norm": 1.0616681575775146, + "learning_rate": 6.260301507537689e-06, + "loss": 0.0017, + "step": 37725 + }, + { + "epoch": 19.02, + "grad_norm": 0.9332436919212341, + "learning_rate": 6.257788944723619e-06, + "loss": 0.0014, + "step": 37750 + }, + { + "epoch": 19.03, + "grad_norm": 1.051811933517456, + "learning_rate": 6.255276381909549e-06, + "loss": 0.0013, + "step": 37775 + }, + { + "epoch": 19.04, + "grad_norm": 1.1693936586380005, + "learning_rate": 6.252763819095477e-06, + "loss": 0.0014, + "step": 37800 + }, + { + "epoch": 19.06, + "grad_norm": 1.9111791849136353, + "learning_rate": 6.2502512562814075e-06, + "loss": 0.0014, + "step": 37825 + }, + { + "epoch": 19.07, + "grad_norm": 0.4390527904033661, + "learning_rate": 6.247738693467337e-06, + "loss": 0.0012, + "step": 37850 + }, + { + "epoch": 19.08, + "grad_norm": 2.4373393058776855, + "learning_rate": 6.245226130653267e-06, + "loss": 0.0013, + "step": 37875 + }, + { + "epoch": 19.09, + "grad_norm": 0.6409306526184082, + "learning_rate": 6.242713567839197e-06, + "loss": 0.0014, + "step": 37900 + }, + { + "epoch": 19.11, + "grad_norm": 0.4275980293750763, + "learning_rate": 6.240201005025126e-06, + "loss": 0.0011, + "step": 37925 + }, + { + "epoch": 19.12, + "grad_norm": 0.36015447974205017, + "learning_rate": 6.237688442211056e-06, + "loss": 0.0011, + "step": 37950 + }, + { + "epoch": 19.13, + "grad_norm": 0.4539172351360321, + "learning_rate": 6.235175879396985e-06, + "loss": 0.0012, + "step": 37975 + }, + { + "epoch": 19.14, + "grad_norm": 0.5768988132476807, + "learning_rate": 6.232663316582915e-06, + "loss": 0.001, + "step": 38000 + }, + { + "epoch": 19.14, + "eval_loss": 0.3325794041156769, + "eval_runtime": 648.1138, + "eval_samples_per_second": 2.174, + "eval_steps_per_second": 2.174, + "eval_wer": 23.12694569353165, + "step": 38000 + }, + { + "epoch": 19.16, + "grad_norm": 1.9239482879638672, + "learning_rate": 6.230150753768845e-06, + "loss": 0.0013, + "step": 38025 + }, + { + "epoch": 19.17, + "grad_norm": 0.30082157254219055, + "learning_rate": 6.227638190954775e-06, + "loss": 0.001, + "step": 38050 + }, + { + "epoch": 19.18, + "grad_norm": 0.20353496074676514, + "learning_rate": 6.225125628140703e-06, + "loss": 0.0011, + "step": 38075 + }, + { + "epoch": 19.19, + "grad_norm": 0.6615707278251648, + "learning_rate": 6.2226130653266335e-06, + "loss": 0.0012, + "step": 38100 + }, + { + "epoch": 19.21, + "grad_norm": 1.4539945125579834, + "learning_rate": 6.220100502512564e-06, + "loss": 0.0012, + "step": 38125 + }, + { + "epoch": 19.22, + "grad_norm": 0.6011916399002075, + "learning_rate": 6.217587939698493e-06, + "loss": 0.0012, + "step": 38150 + }, + { + "epoch": 19.23, + "grad_norm": 0.29558372497558594, + "learning_rate": 6.215075376884423e-06, + "loss": 0.0014, + "step": 38175 + }, + { + "epoch": 19.24, + "grad_norm": 1.2541766166687012, + "learning_rate": 6.212562814070352e-06, + "loss": 0.0011, + "step": 38200 + }, + { + "epoch": 19.26, + "grad_norm": 2.3364927768707275, + "learning_rate": 6.210050251256282e-06, + "loss": 0.0017, + "step": 38225 + }, + { + "epoch": 19.27, + "grad_norm": 0.5079712867736816, + "learning_rate": 6.207537688442211e-06, + "loss": 0.0013, + "step": 38250 + }, + { + "epoch": 19.28, + "grad_norm": 0.9235591292381287, + "learning_rate": 6.205025125628141e-06, + "loss": 0.0014, + "step": 38275 + }, + { + "epoch": 19.29, + "grad_norm": 0.31673333048820496, + "learning_rate": 6.202512562814071e-06, + "loss": 0.0013, + "step": 38300 + }, + { + "epoch": 19.31, + "grad_norm": 1.0981833934783936, + "learning_rate": 6.200000000000001e-06, + "loss": 0.0014, + "step": 38325 + }, + { + "epoch": 19.32, + "grad_norm": 0.6616347432136536, + "learning_rate": 6.197487437185931e-06, + "loss": 0.0014, + "step": 38350 + }, + { + "epoch": 19.33, + "grad_norm": 0.5207319259643555, + "learning_rate": 6.1949748743718594e-06, + "loss": 0.0013, + "step": 38375 + }, + { + "epoch": 19.35, + "grad_norm": 1.1700994968414307, + "learning_rate": 6.19246231155779e-06, + "loss": 0.0015, + "step": 38400 + }, + { + "epoch": 19.36, + "grad_norm": 1.4399977922439575, + "learning_rate": 6.189949748743719e-06, + "loss": 0.0014, + "step": 38425 + }, + { + "epoch": 19.37, + "grad_norm": 0.9737831354141235, + "learning_rate": 6.187437185929649e-06, + "loss": 0.0013, + "step": 38450 + }, + { + "epoch": 19.38, + "grad_norm": 0.8050452470779419, + "learning_rate": 6.1849246231155776e-06, + "loss": 0.0011, + "step": 38475 + }, + { + "epoch": 19.4, + "grad_norm": 0.5391014218330383, + "learning_rate": 6.182412060301508e-06, + "loss": 0.0012, + "step": 38500 + }, + { + "epoch": 19.41, + "grad_norm": 1.5151572227478027, + "learning_rate": 6.179899497487438e-06, + "loss": 0.0013, + "step": 38525 + }, + { + "epoch": 19.42, + "grad_norm": 0.6566374897956848, + "learning_rate": 6.177386934673367e-06, + "loss": 0.0012, + "step": 38550 + }, + { + "epoch": 19.43, + "grad_norm": 0.5387280583381653, + "learning_rate": 6.174874371859297e-06, + "loss": 0.0014, + "step": 38575 + }, + { + "epoch": 19.45, + "grad_norm": 2.2199933528900146, + "learning_rate": 6.172361809045227e-06, + "loss": 0.0015, + "step": 38600 + }, + { + "epoch": 19.46, + "grad_norm": 0.5629024505615234, + "learning_rate": 6.169849246231157e-06, + "loss": 0.0014, + "step": 38625 + }, + { + "epoch": 19.47, + "grad_norm": 1.4785996675491333, + "learning_rate": 6.167336683417085e-06, + "loss": 0.0019, + "step": 38650 + }, + { + "epoch": 19.48, + "grad_norm": 1.0027951002120972, + "learning_rate": 6.1648241206030155e-06, + "loss": 0.0016, + "step": 38675 + }, + { + "epoch": 19.5, + "grad_norm": 0.8754851222038269, + "learning_rate": 6.162311557788945e-06, + "loss": 0.0013, + "step": 38700 + }, + { + "epoch": 19.51, + "grad_norm": 1.2813969850540161, + "learning_rate": 6.159798994974875e-06, + "loss": 0.0012, + "step": 38725 + }, + { + "epoch": 19.52, + "grad_norm": 0.9958238005638123, + "learning_rate": 6.157286432160805e-06, + "loss": 0.0012, + "step": 38750 + }, + { + "epoch": 19.53, + "grad_norm": 1.3986196517944336, + "learning_rate": 6.154773869346734e-06, + "loss": 0.0013, + "step": 38775 + }, + { + "epoch": 19.55, + "grad_norm": 0.44907984137535095, + "learning_rate": 6.152261306532664e-06, + "loss": 0.0013, + "step": 38800 + }, + { + "epoch": 19.56, + "grad_norm": 1.5967319011688232, + "learning_rate": 6.149748743718593e-06, + "loss": 0.0017, + "step": 38825 + }, + { + "epoch": 19.57, + "grad_norm": 1.011804223060608, + "learning_rate": 6.147236180904523e-06, + "loss": 0.0013, + "step": 38850 + }, + { + "epoch": 19.58, + "grad_norm": 0.6981809139251709, + "learning_rate": 6.144723618090453e-06, + "loss": 0.0016, + "step": 38875 + }, + { + "epoch": 19.6, + "grad_norm": 1.2294851541519165, + "learning_rate": 6.142211055276383e-06, + "loss": 0.0017, + "step": 38900 + }, + { + "epoch": 19.61, + "grad_norm": 0.9731518030166626, + "learning_rate": 6.139698492462311e-06, + "loss": 0.0016, + "step": 38925 + }, + { + "epoch": 19.62, + "grad_norm": 1.7019362449645996, + "learning_rate": 6.1371859296482415e-06, + "loss": 0.0016, + "step": 38950 + }, + { + "epoch": 19.63, + "grad_norm": 0.35968101024627686, + "learning_rate": 6.134673366834172e-06, + "loss": 0.0016, + "step": 38975 + }, + { + "epoch": 19.65, + "grad_norm": 1.0781711339950562, + "learning_rate": 6.132160804020101e-06, + "loss": 0.0016, + "step": 39000 + }, + { + "epoch": 19.65, + "eval_loss": 0.34026119112968445, + "eval_runtime": 642.6075, + "eval_samples_per_second": 2.193, + "eval_steps_per_second": 2.193, + "eval_wer": 23.777239709443098, + "step": 39000 + }, + { + "epoch": 19.66, + "grad_norm": 0.6743261218070984, + "learning_rate": 6.129648241206031e-06, + "loss": 0.0017, + "step": 39025 + }, + { + "epoch": 19.67, + "grad_norm": 1.3148083686828613, + "learning_rate": 6.12713567839196e-06, + "loss": 0.0014, + "step": 39050 + }, + { + "epoch": 19.69, + "grad_norm": 1.3721927404403687, + "learning_rate": 6.12462311557789e-06, + "loss": 0.0014, + "step": 39075 + }, + { + "epoch": 19.7, + "grad_norm": 0.8227803707122803, + "learning_rate": 6.122211055276382e-06, + "loss": 0.0017, + "step": 39100 + }, + { + "epoch": 19.71, + "grad_norm": 1.0405676364898682, + "learning_rate": 6.1196984924623115e-06, + "loss": 0.0016, + "step": 39125 + }, + { + "epoch": 19.72, + "grad_norm": 0.7169470191001892, + "learning_rate": 6.117185929648242e-06, + "loss": 0.0017, + "step": 39150 + }, + { + "epoch": 19.74, + "grad_norm": 0.5327123999595642, + "learning_rate": 6.114673366834172e-06, + "loss": 0.0014, + "step": 39175 + }, + { + "epoch": 19.75, + "grad_norm": 1.0876247882843018, + "learning_rate": 6.112160804020101e-06, + "loss": 0.0015, + "step": 39200 + }, + { + "epoch": 19.76, + "grad_norm": 0.7583673596382141, + "learning_rate": 6.109648241206031e-06, + "loss": 0.0011, + "step": 39225 + }, + { + "epoch": 19.77, + "grad_norm": 0.7003112435340881, + "learning_rate": 6.10713567839196e-06, + "loss": 0.0013, + "step": 39250 + }, + { + "epoch": 19.79, + "grad_norm": 0.8923580646514893, + "learning_rate": 6.10462311557789e-06, + "loss": 0.0018, + "step": 39275 + }, + { + "epoch": 19.8, + "grad_norm": 1.0716352462768555, + "learning_rate": 6.102110552763819e-06, + "loss": 0.0012, + "step": 39300 + }, + { + "epoch": 19.81, + "grad_norm": 1.9225443601608276, + "learning_rate": 6.0995979899497495e-06, + "loss": 0.0016, + "step": 39325 + }, + { + "epoch": 19.82, + "grad_norm": 0.9054650664329529, + "learning_rate": 6.09708542713568e-06, + "loss": 0.0016, + "step": 39350 + }, + { + "epoch": 19.84, + "grad_norm": 0.6190009117126465, + "learning_rate": 6.094572864321608e-06, + "loss": 0.0012, + "step": 39375 + }, + { + "epoch": 19.85, + "grad_norm": 1.1662238836288452, + "learning_rate": 6.092060301507538e-06, + "loss": 0.0013, + "step": 39400 + }, + { + "epoch": 19.86, + "grad_norm": 1.0806312561035156, + "learning_rate": 6.089547738693468e-06, + "loss": 0.0019, + "step": 39425 + }, + { + "epoch": 19.87, + "grad_norm": 0.7853173017501831, + "learning_rate": 6.087035175879398e-06, + "loss": 0.0015, + "step": 39450 + }, + { + "epoch": 19.89, + "grad_norm": 0.9636842012405396, + "learning_rate": 6.084522613065327e-06, + "loss": 0.0014, + "step": 39475 + }, + { + "epoch": 19.9, + "grad_norm": 1.7559266090393066, + "learning_rate": 6.082010050251257e-06, + "loss": 0.0014, + "step": 39500 + }, + { + "epoch": 19.91, + "grad_norm": 0.9466189742088318, + "learning_rate": 6.079497487437186e-06, + "loss": 0.0014, + "step": 39525 + }, + { + "epoch": 19.92, + "grad_norm": 1.7009310722351074, + "learning_rate": 6.076984924623116e-06, + "loss": 0.0015, + "step": 39550 + }, + { + "epoch": 19.94, + "grad_norm": 1.0113627910614014, + "learning_rate": 6.074472361809046e-06, + "loss": 0.0013, + "step": 39575 + }, + { + "epoch": 19.95, + "grad_norm": 0.990430474281311, + "learning_rate": 6.071959798994975e-06, + "loss": 0.0016, + "step": 39600 + }, + { + "epoch": 19.96, + "grad_norm": 1.0662728548049927, + "learning_rate": 6.069447236180906e-06, + "loss": 0.0017, + "step": 39625 + }, + { + "epoch": 19.97, + "grad_norm": 1.152665138244629, + "learning_rate": 6.066934673366834e-06, + "loss": 0.0019, + "step": 39650 + }, + { + "epoch": 19.99, + "grad_norm": 2.0811469554901123, + "learning_rate": 6.064422110552764e-06, + "loss": 0.0018, + "step": 39675 + }, + { + "epoch": 20.0, + "grad_norm": 1.3459393978118896, + "learning_rate": 6.0619095477386936e-06, + "loss": 0.0021, + "step": 39700 + }, + { + "epoch": 20.01, + "grad_norm": 1.1473982334136963, + "learning_rate": 6.059396984924624e-06, + "loss": 0.0013, + "step": 39725 + }, + { + "epoch": 20.03, + "grad_norm": 1.0483380556106567, + "learning_rate": 6.056884422110553e-06, + "loss": 0.0012, + "step": 39750 + }, + { + "epoch": 20.04, + "grad_norm": 0.5056473016738892, + "learning_rate": 6.054371859296483e-06, + "loss": 0.0015, + "step": 39775 + }, + { + "epoch": 20.05, + "grad_norm": 0.9125507473945618, + "learning_rate": 6.051859296482413e-06, + "loss": 0.0013, + "step": 39800 + }, + { + "epoch": 20.06, + "grad_norm": 0.17925478518009186, + "learning_rate": 6.049346733668342e-06, + "loss": 0.0013, + "step": 39825 + }, + { + "epoch": 20.08, + "grad_norm": 0.492924302816391, + "learning_rate": 6.046834170854272e-06, + "loss": 0.0013, + "step": 39850 + }, + { + "epoch": 20.09, + "grad_norm": 1.4264193773269653, + "learning_rate": 6.044321608040201e-06, + "loss": 0.0014, + "step": 39875 + }, + { + "epoch": 20.1, + "grad_norm": 0.6481070518493652, + "learning_rate": 6.0418090452261315e-06, + "loss": 0.0014, + "step": 39900 + }, + { + "epoch": 20.11, + "grad_norm": 0.9014895558357239, + "learning_rate": 6.03929648241206e-06, + "loss": 0.0014, + "step": 39925 + }, + { + "epoch": 20.13, + "grad_norm": 1.6231021881103516, + "learning_rate": 6.03678391959799e-06, + "loss": 0.0014, + "step": 39950 + }, + { + "epoch": 20.14, + "grad_norm": 0.2752940058708191, + "learning_rate": 6.03427135678392e-06, + "loss": 0.0009, + "step": 39975 + }, + { + "epoch": 20.15, + "grad_norm": 0.902050256729126, + "learning_rate": 6.03175879396985e-06, + "loss": 0.0009, + "step": 40000 + }, + { + "epoch": 20.15, + "eval_loss": 0.3369000256061554, + "eval_runtime": 648.606, + "eval_samples_per_second": 2.172, + "eval_steps_per_second": 2.172, + "eval_wer": 23.24455205811138, + "step": 40000 + }, + { + "epoch": 20.16, + "grad_norm": 0.18782569468021393, + "learning_rate": 6.02924623115578e-06, + "loss": 0.0007, + "step": 40025 + }, + { + "epoch": 20.18, + "grad_norm": 0.6005980372428894, + "learning_rate": 6.026733668341709e-06, + "loss": 0.0008, + "step": 40050 + }, + { + "epoch": 20.19, + "grad_norm": 0.7301942110061646, + "learning_rate": 6.024221105527639e-06, + "loss": 0.0011, + "step": 40075 + }, + { + "epoch": 20.2, + "grad_norm": 0.4620230793952942, + "learning_rate": 6.021708542713568e-06, + "loss": 0.0012, + "step": 40100 + }, + { + "epoch": 20.21, + "grad_norm": 0.23639623820781708, + "learning_rate": 6.019195979899498e-06, + "loss": 0.001, + "step": 40125 + }, + { + "epoch": 20.23, + "grad_norm": 1.1007659435272217, + "learning_rate": 6.016683417085427e-06, + "loss": 0.0011, + "step": 40150 + }, + { + "epoch": 20.24, + "grad_norm": 0.7579511404037476, + "learning_rate": 6.0141708542713575e-06, + "loss": 0.0013, + "step": 40175 + }, + { + "epoch": 20.25, + "grad_norm": 0.17022021114826202, + "learning_rate": 6.011658291457288e-06, + "loss": 0.001, + "step": 40200 + }, + { + "epoch": 20.26, + "grad_norm": 0.919007420539856, + "learning_rate": 6.009145728643216e-06, + "loss": 0.0013, + "step": 40225 + }, + { + "epoch": 20.28, + "grad_norm": 0.8233655691146851, + "learning_rate": 6.006633165829146e-06, + "loss": 0.0011, + "step": 40250 + }, + { + "epoch": 20.29, + "grad_norm": 0.6930840611457825, + "learning_rate": 6.004120603015076e-06, + "loss": 0.0012, + "step": 40275 + }, + { + "epoch": 20.3, + "grad_norm": 0.4709855616092682, + "learning_rate": 6.001608040201006e-06, + "loss": 0.0018, + "step": 40300 + }, + { + "epoch": 20.31, + "grad_norm": 0.2110186368227005, + "learning_rate": 5.999095477386935e-06, + "loss": 0.0012, + "step": 40325 + }, + { + "epoch": 20.33, + "grad_norm": 0.48267343640327454, + "learning_rate": 5.996582914572865e-06, + "loss": 0.0012, + "step": 40350 + }, + { + "epoch": 20.34, + "grad_norm": 0.6853476762771606, + "learning_rate": 5.994070351758794e-06, + "loss": 0.0012, + "step": 40375 + }, + { + "epoch": 20.35, + "grad_norm": 0.9809117317199707, + "learning_rate": 5.991557788944724e-06, + "loss": 0.0012, + "step": 40400 + }, + { + "epoch": 20.37, + "grad_norm": 1.3809919357299805, + "learning_rate": 5.989045226130654e-06, + "loss": 0.0015, + "step": 40425 + }, + { + "epoch": 20.38, + "grad_norm": 1.5639605522155762, + "learning_rate": 5.9865326633165834e-06, + "loss": 0.002, + "step": 40450 + }, + { + "epoch": 20.39, + "grad_norm": 1.0514106750488281, + "learning_rate": 5.984020100502514e-06, + "loss": 0.0015, + "step": 40475 + }, + { + "epoch": 20.4, + "grad_norm": 0.9717534780502319, + "learning_rate": 5.981507537688442e-06, + "loss": 0.0017, + "step": 40500 + }, + { + "epoch": 20.42, + "grad_norm": 1.0924715995788574, + "learning_rate": 5.978994974874372e-06, + "loss": 0.0012, + "step": 40525 + }, + { + "epoch": 20.43, + "grad_norm": 1.3465650081634521, + "learning_rate": 5.9764824120603016e-06, + "loss": 0.0011, + "step": 40550 + }, + { + "epoch": 20.44, + "grad_norm": 0.6348648071289062, + "learning_rate": 5.973969849246232e-06, + "loss": 0.0016, + "step": 40575 + }, + { + "epoch": 20.45, + "grad_norm": 1.0228688716888428, + "learning_rate": 5.971457286432162e-06, + "loss": 0.0011, + "step": 40600 + }, + { + "epoch": 20.47, + "grad_norm": 1.5299664735794067, + "learning_rate": 5.968944723618091e-06, + "loss": 0.0012, + "step": 40625 + }, + { + "epoch": 20.48, + "grad_norm": 1.601320743560791, + "learning_rate": 5.966432160804021e-06, + "loss": 0.0014, + "step": 40650 + }, + { + "epoch": 20.49, + "grad_norm": 0.6638547778129578, + "learning_rate": 5.96391959798995e-06, + "loss": 0.0014, + "step": 40675 + }, + { + "epoch": 20.5, + "grad_norm": 2.6972315311431885, + "learning_rate": 5.96140703517588e-06, + "loss": 0.0016, + "step": 40700 + }, + { + "epoch": 20.52, + "grad_norm": 0.6832017302513123, + "learning_rate": 5.958894472361809e-06, + "loss": 0.0016, + "step": 40725 + }, + { + "epoch": 20.53, + "grad_norm": 0.46338987350463867, + "learning_rate": 5.9563819095477395e-06, + "loss": 0.0012, + "step": 40750 + }, + { + "epoch": 20.54, + "grad_norm": 0.3584813177585602, + "learning_rate": 5.953869346733668e-06, + "loss": 0.0015, + "step": 40775 + }, + { + "epoch": 20.55, + "grad_norm": 1.5687421560287476, + "learning_rate": 5.951356783919598e-06, + "loss": 0.0017, + "step": 40800 + }, + { + "epoch": 20.57, + "grad_norm": 0.5602162480354309, + "learning_rate": 5.948844221105528e-06, + "loss": 0.0012, + "step": 40825 + }, + { + "epoch": 20.58, + "grad_norm": 0.16096442937850952, + "learning_rate": 5.946331658291458e-06, + "loss": 0.0012, + "step": 40850 + }, + { + "epoch": 20.59, + "grad_norm": 0.5620841979980469, + "learning_rate": 5.943819095477388e-06, + "loss": 0.0014, + "step": 40875 + }, + { + "epoch": 20.6, + "grad_norm": 0.5683684349060059, + "learning_rate": 5.941306532663317e-06, + "loss": 0.0017, + "step": 40900 + }, + { + "epoch": 20.62, + "grad_norm": 1.4936867952346802, + "learning_rate": 5.938793969849247e-06, + "loss": 0.0015, + "step": 40925 + }, + { + "epoch": 20.63, + "grad_norm": 0.45212438702583313, + "learning_rate": 5.936281407035176e-06, + "loss": 0.0013, + "step": 40950 + }, + { + "epoch": 20.64, + "grad_norm": 1.8357038497924805, + "learning_rate": 5.933768844221106e-06, + "loss": 0.0011, + "step": 40975 + }, + { + "epoch": 20.65, + "grad_norm": 0.1379358172416687, + "learning_rate": 5.931256281407035e-06, + "loss": 0.0015, + "step": 41000 + }, + { + "epoch": 20.65, + "eval_loss": 0.3424818515777588, + "eval_runtime": 650.8582, + "eval_samples_per_second": 2.165, + "eval_steps_per_second": 2.165, + "eval_wer": 23.36215842269111, + "step": 41000 + }, + { + "epoch": 20.67, + "grad_norm": 0.342557817697525, + "learning_rate": 5.9287437185929655e-06, + "loss": 0.001, + "step": 41025 + }, + { + "epoch": 20.68, + "grad_norm": 0.7056984901428223, + "learning_rate": 5.926231155778896e-06, + "loss": 0.0013, + "step": 41050 + }, + { + "epoch": 20.69, + "grad_norm": 1.0098013877868652, + "learning_rate": 5.923718592964824e-06, + "loss": 0.0015, + "step": 41075 + }, + { + "epoch": 20.71, + "grad_norm": 0.6967382431030273, + "learning_rate": 5.921206030150754e-06, + "loss": 0.0014, + "step": 41100 + }, + { + "epoch": 20.72, + "grad_norm": 0.544989287853241, + "learning_rate": 5.918693467336684e-06, + "loss": 0.0016, + "step": 41125 + }, + { + "epoch": 20.73, + "grad_norm": 1.2400965690612793, + "learning_rate": 5.916180904522614e-06, + "loss": 0.0015, + "step": 41150 + }, + { + "epoch": 20.74, + "grad_norm": 0.926023006439209, + "learning_rate": 5.913668341708543e-06, + "loss": 0.0016, + "step": 41175 + }, + { + "epoch": 20.76, + "grad_norm": 1.1986762285232544, + "learning_rate": 5.911155778894473e-06, + "loss": 0.0016, + "step": 41200 + }, + { + "epoch": 20.77, + "grad_norm": 0.6431388854980469, + "learning_rate": 5.908643216080403e-06, + "loss": 0.0014, + "step": 41225 + }, + { + "epoch": 20.78, + "grad_norm": 1.1368434429168701, + "learning_rate": 5.906130653266332e-06, + "loss": 0.0013, + "step": 41250 + }, + { + "epoch": 20.79, + "grad_norm": 0.7638266086578369, + "learning_rate": 5.903618090452262e-06, + "loss": 0.0014, + "step": 41275 + }, + { + "epoch": 20.81, + "grad_norm": 1.2455073595046997, + "learning_rate": 5.9011055276381914e-06, + "loss": 0.0013, + "step": 41300 + }, + { + "epoch": 20.82, + "grad_norm": 0.9213681817054749, + "learning_rate": 5.898592964824122e-06, + "loss": 0.0015, + "step": 41325 + }, + { + "epoch": 20.83, + "grad_norm": 0.5151415467262268, + "learning_rate": 5.89608040201005e-06, + "loss": 0.0016, + "step": 41350 + }, + { + "epoch": 20.84, + "grad_norm": 0.7288360595703125, + "learning_rate": 5.89356783919598e-06, + "loss": 0.0017, + "step": 41375 + }, + { + "epoch": 20.86, + "grad_norm": 0.4819887578487396, + "learning_rate": 5.8910552763819096e-06, + "loss": 0.0013, + "step": 41400 + }, + { + "epoch": 20.87, + "grad_norm": 1.1284375190734863, + "learning_rate": 5.88854271356784e-06, + "loss": 0.0015, + "step": 41425 + }, + { + "epoch": 20.88, + "grad_norm": 0.31427863240242004, + "learning_rate": 5.88603015075377e-06, + "loss": 0.0014, + "step": 41450 + }, + { + "epoch": 20.89, + "grad_norm": 0.9035623669624329, + "learning_rate": 5.883517587939699e-06, + "loss": 0.0013, + "step": 41475 + }, + { + "epoch": 20.91, + "grad_norm": 1.357260823249817, + "learning_rate": 5.881005025125629e-06, + "loss": 0.0011, + "step": 41500 + }, + { + "epoch": 20.92, + "grad_norm": 0.8989688158035278, + "learning_rate": 5.878492462311558e-06, + "loss": 0.0016, + "step": 41525 + }, + { + "epoch": 20.93, + "grad_norm": 1.1103880405426025, + "learning_rate": 5.875979899497488e-06, + "loss": 0.0016, + "step": 41550 + }, + { + "epoch": 20.94, + "grad_norm": 0.8313987851142883, + "learning_rate": 5.873467336683417e-06, + "loss": 0.0012, + "step": 41575 + }, + { + "epoch": 20.96, + "grad_norm": 1.2921781539916992, + "learning_rate": 5.8709547738693475e-06, + "loss": 0.0013, + "step": 41600 + }, + { + "epoch": 20.97, + "grad_norm": 0.9626322984695435, + "learning_rate": 5.868442211055276e-06, + "loss": 0.0014, + "step": 41625 + }, + { + "epoch": 20.98, + "grad_norm": 1.2628862857818604, + "learning_rate": 5.865929648241206e-06, + "loss": 0.0012, + "step": 41650 + }, + { + "epoch": 20.99, + "grad_norm": 0.6756789088249207, + "learning_rate": 5.863417085427136e-06, + "loss": 0.0011, + "step": 41675 + }, + { + "epoch": 21.01, + "grad_norm": 0.42541012167930603, + "learning_rate": 5.860904522613066e-06, + "loss": 0.0012, + "step": 41700 + }, + { + "epoch": 21.02, + "grad_norm": 1.8883837461471558, + "learning_rate": 5.858391959798996e-06, + "loss": 0.0012, + "step": 41725 + }, + { + "epoch": 21.03, + "grad_norm": 0.9031746983528137, + "learning_rate": 5.855879396984925e-06, + "loss": 0.001, + "step": 41750 + }, + { + "epoch": 21.05, + "grad_norm": 0.30492404103279114, + "learning_rate": 5.853366834170855e-06, + "loss": 0.001, + "step": 41775 + }, + { + "epoch": 21.06, + "grad_norm": 0.8610237836837769, + "learning_rate": 5.850854271356784e-06, + "loss": 0.0008, + "step": 41800 + }, + { + "epoch": 21.07, + "grad_norm": 0.36193329095840454, + "learning_rate": 5.848341708542714e-06, + "loss": 0.001, + "step": 41825 + }, + { + "epoch": 21.08, + "grad_norm": 0.7772315740585327, + "learning_rate": 5.845829145728644e-06, + "loss": 0.0012, + "step": 41850 + }, + { + "epoch": 21.1, + "grad_norm": 0.4699445068836212, + "learning_rate": 5.8433165829145735e-06, + "loss": 0.0008, + "step": 41875 + }, + { + "epoch": 21.11, + "grad_norm": 1.3260185718536377, + "learning_rate": 5.840804020100504e-06, + "loss": 0.0009, + "step": 41900 + }, + { + "epoch": 21.12, + "grad_norm": 0.33898288011550903, + "learning_rate": 5.838291457286432e-06, + "loss": 0.0008, + "step": 41925 + }, + { + "epoch": 21.13, + "grad_norm": 1.4685719013214111, + "learning_rate": 5.835778894472362e-06, + "loss": 0.0011, + "step": 41950 + }, + { + "epoch": 21.15, + "grad_norm": 2.672056198120117, + "learning_rate": 5.833266331658292e-06, + "loss": 0.0011, + "step": 41975 + }, + { + "epoch": 21.16, + "grad_norm": 1.9097732305526733, + "learning_rate": 5.830753768844222e-06, + "loss": 0.0012, + "step": 42000 + }, + { + "epoch": 21.16, + "eval_loss": 0.33881473541259766, + "eval_runtime": 651.7527, + "eval_samples_per_second": 2.162, + "eval_steps_per_second": 2.162, + "eval_wer": 22.815634728467657, + "step": 42000 + }, + { + "epoch": 21.17, + "grad_norm": 1.823331356048584, + "learning_rate": 5.828241206030151e-06, + "loss": 0.0012, + "step": 42025 + }, + { + "epoch": 21.18, + "grad_norm": 0.6585187911987305, + "learning_rate": 5.825728643216081e-06, + "loss": 0.0007, + "step": 42050 + }, + { + "epoch": 21.2, + "grad_norm": 0.8023566603660583, + "learning_rate": 5.823216080402011e-06, + "loss": 0.0014, + "step": 42075 + }, + { + "epoch": 21.21, + "grad_norm": 0.2983376979827881, + "learning_rate": 5.82070351758794e-06, + "loss": 0.0011, + "step": 42100 + }, + { + "epoch": 21.22, + "grad_norm": 0.860016942024231, + "learning_rate": 5.81819095477387e-06, + "loss": 0.0014, + "step": 42125 + }, + { + "epoch": 21.23, + "grad_norm": 0.5695117115974426, + "learning_rate": 5.8156783919597994e-06, + "loss": 0.0011, + "step": 42150 + }, + { + "epoch": 21.25, + "grad_norm": 1.583242416381836, + "learning_rate": 5.81316582914573e-06, + "loss": 0.0009, + "step": 42175 + }, + { + "epoch": 21.26, + "grad_norm": 0.7320623397827148, + "learning_rate": 5.810653266331658e-06, + "loss": 0.0011, + "step": 42200 + }, + { + "epoch": 21.27, + "grad_norm": 0.40352964401245117, + "learning_rate": 5.808140703517588e-06, + "loss": 0.0011, + "step": 42225 + }, + { + "epoch": 21.28, + "grad_norm": 1.1269155740737915, + "learning_rate": 5.8056281407035176e-06, + "loss": 0.0012, + "step": 42250 + }, + { + "epoch": 21.3, + "grad_norm": 0.5032210350036621, + "learning_rate": 5.803216080402011e-06, + "loss": 0.0011, + "step": 42275 + }, + { + "epoch": 21.31, + "grad_norm": 0.9120995998382568, + "learning_rate": 5.80070351758794e-06, + "loss": 0.0009, + "step": 42300 + }, + { + "epoch": 21.32, + "grad_norm": 1.0807931423187256, + "learning_rate": 5.79819095477387e-06, + "loss": 0.001, + "step": 42325 + }, + { + "epoch": 21.34, + "grad_norm": 0.5654991269111633, + "learning_rate": 5.7956783919598e-06, + "loss": 0.0012, + "step": 42350 + }, + { + "epoch": 21.35, + "grad_norm": 0.4672817885875702, + "learning_rate": 5.793165829145729e-06, + "loss": 0.0013, + "step": 42375 + }, + { + "epoch": 21.36, + "grad_norm": 0.4968101978302002, + "learning_rate": 5.790653266331658e-06, + "loss": 0.0011, + "step": 42400 + }, + { + "epoch": 21.37, + "grad_norm": 1.5632412433624268, + "learning_rate": 5.7881407035175884e-06, + "loss": 0.0009, + "step": 42425 + }, + { + "epoch": 21.39, + "grad_norm": 1.279451847076416, + "learning_rate": 5.785628140703518e-06, + "loss": 0.0012, + "step": 42450 + }, + { + "epoch": 21.4, + "grad_norm": 0.2353668063879013, + "learning_rate": 5.783115577889448e-06, + "loss": 0.0012, + "step": 42475 + }, + { + "epoch": 21.41, + "grad_norm": 1.097109079360962, + "learning_rate": 5.780603015075378e-06, + "loss": 0.0013, + "step": 42500 + }, + { + "epoch": 21.42, + "grad_norm": 0.5137972235679626, + "learning_rate": 5.778090452261307e-06, + "loss": 0.0014, + "step": 42525 + }, + { + "epoch": 21.44, + "grad_norm": 0.7982520461082458, + "learning_rate": 5.775577889447237e-06, + "loss": 0.001, + "step": 42550 + }, + { + "epoch": 21.45, + "grad_norm": 4.075470924377441, + "learning_rate": 5.773065326633166e-06, + "loss": 0.0013, + "step": 42575 + }, + { + "epoch": 21.46, + "grad_norm": 0.4397733211517334, + "learning_rate": 5.770552763819096e-06, + "loss": 0.0012, + "step": 42600 + }, + { + "epoch": 21.47, + "grad_norm": 0.650394082069397, + "learning_rate": 5.7680402010050256e-06, + "loss": 0.0011, + "step": 42625 + }, + { + "epoch": 21.49, + "grad_norm": 1.209378957748413, + "learning_rate": 5.765527638190955e-06, + "loss": 0.0011, + "step": 42650 + }, + { + "epoch": 21.5, + "grad_norm": 0.47554540634155273, + "learning_rate": 5.763015075376885e-06, + "loss": 0.0012, + "step": 42675 + }, + { + "epoch": 21.51, + "grad_norm": 1.1790157556533813, + "learning_rate": 5.760502512562814e-06, + "loss": 0.0012, + "step": 42700 + }, + { + "epoch": 21.52, + "grad_norm": 1.1118874549865723, + "learning_rate": 5.7579899497487446e-06, + "loss": 0.0012, + "step": 42725 + }, + { + "epoch": 21.54, + "grad_norm": 0.956529974937439, + "learning_rate": 5.755477386934674e-06, + "loss": 0.0012, + "step": 42750 + }, + { + "epoch": 21.55, + "grad_norm": 0.8777883052825928, + "learning_rate": 5.752964824120604e-06, + "loss": 0.0014, + "step": 42775 + }, + { + "epoch": 21.56, + "grad_norm": 1.8465042114257812, + "learning_rate": 5.7504522613065325e-06, + "loss": 0.0013, + "step": 42800 + }, + { + "epoch": 21.57, + "grad_norm": 1.209672212600708, + "learning_rate": 5.747939698492463e-06, + "loss": 0.0014, + "step": 42825 + }, + { + "epoch": 21.59, + "grad_norm": 1.0998687744140625, + "learning_rate": 5.745427135678392e-06, + "loss": 0.0015, + "step": 42850 + }, + { + "epoch": 21.6, + "grad_norm": 1.5435659885406494, + "learning_rate": 5.742914572864322e-06, + "loss": 0.0016, + "step": 42875 + }, + { + "epoch": 21.61, + "grad_norm": 1.4470527172088623, + "learning_rate": 5.740402010050252e-06, + "loss": 0.0013, + "step": 42900 + }, + { + "epoch": 21.62, + "grad_norm": 0.8701749444007874, + "learning_rate": 5.737889447236181e-06, + "loss": 0.0013, + "step": 42925 + }, + { + "epoch": 21.64, + "grad_norm": 1.1655815839767456, + "learning_rate": 5.735376884422111e-06, + "loss": 0.001, + "step": 42950 + }, + { + "epoch": 21.65, + "grad_norm": 0.8317478895187378, + "learning_rate": 5.73286432160804e-06, + "loss": 0.0012, + "step": 42975 + }, + { + "epoch": 21.66, + "grad_norm": 0.2232298105955124, + "learning_rate": 5.7303517587939705e-06, + "loss": 0.0009, + "step": 43000 + }, + { + "epoch": 21.66, + "eval_loss": 0.34524887800216675, + "eval_runtime": 653.6286, + "eval_samples_per_second": 2.156, + "eval_steps_per_second": 2.156, + "eval_wer": 23.13386371497752, + "step": 43000 + }, + { + "epoch": 21.68, + "grad_norm": 1.433415412902832, + "learning_rate": 5.7278391959799e-06, + "loss": 0.0012, + "step": 43025 + }, + { + "epoch": 21.69, + "grad_norm": 1.2699315547943115, + "learning_rate": 5.72532663316583e-06, + "loss": 0.0013, + "step": 43050 + }, + { + "epoch": 21.7, + "grad_norm": 1.10042405128479, + "learning_rate": 5.7228140703517585e-06, + "loss": 0.0015, + "step": 43075 + }, + { + "epoch": 21.71, + "grad_norm": 1.3270542621612549, + "learning_rate": 5.720301507537689e-06, + "loss": 0.0012, + "step": 43100 + }, + { + "epoch": 21.73, + "grad_norm": 1.0051465034484863, + "learning_rate": 5.717788944723619e-06, + "loss": 0.0013, + "step": 43125 + }, + { + "epoch": 21.74, + "grad_norm": 2.064424514770508, + "learning_rate": 5.715276381909548e-06, + "loss": 0.0013, + "step": 43150 + }, + { + "epoch": 21.75, + "grad_norm": 1.45639967918396, + "learning_rate": 5.712763819095478e-06, + "loss": 0.0012, + "step": 43175 + }, + { + "epoch": 21.76, + "grad_norm": 0.627719521522522, + "learning_rate": 5.710251256281407e-06, + "loss": 0.0014, + "step": 43200 + }, + { + "epoch": 21.78, + "grad_norm": 0.1227678582072258, + "learning_rate": 5.707738693467337e-06, + "loss": 0.001, + "step": 43225 + }, + { + "epoch": 21.79, + "grad_norm": 0.5857130885124207, + "learning_rate": 5.705226130653266e-06, + "loss": 0.0011, + "step": 43250 + }, + { + "epoch": 21.8, + "grad_norm": 0.859379231929779, + "learning_rate": 5.7027135678391964e-06, + "loss": 0.0009, + "step": 43275 + }, + { + "epoch": 21.81, + "grad_norm": 0.30906084179878235, + "learning_rate": 5.700201005025127e-06, + "loss": 0.0012, + "step": 43300 + }, + { + "epoch": 21.83, + "grad_norm": 0.7923578023910522, + "learning_rate": 5.697688442211056e-06, + "loss": 0.0013, + "step": 43325 + }, + { + "epoch": 21.84, + "grad_norm": 0.5775353908538818, + "learning_rate": 5.695175879396986e-06, + "loss": 0.0012, + "step": 43350 + }, + { + "epoch": 21.85, + "grad_norm": 3.3586642742156982, + "learning_rate": 5.692663316582915e-06, + "loss": 0.0012, + "step": 43375 + }, + { + "epoch": 21.86, + "grad_norm": 1.088348388671875, + "learning_rate": 5.690150753768845e-06, + "loss": 0.0014, + "step": 43400 + }, + { + "epoch": 21.88, + "grad_norm": 1.9004027843475342, + "learning_rate": 5.687638190954774e-06, + "loss": 0.0015, + "step": 43425 + }, + { + "epoch": 21.89, + "grad_norm": 1.9646518230438232, + "learning_rate": 5.685125628140704e-06, + "loss": 0.0017, + "step": 43450 + }, + { + "epoch": 21.9, + "grad_norm": 2.539280891418457, + "learning_rate": 5.6826130653266336e-06, + "loss": 0.0013, + "step": 43475 + }, + { + "epoch": 21.91, + "grad_norm": 0.3101285398006439, + "learning_rate": 5.680100502512563e-06, + "loss": 0.0012, + "step": 43500 + }, + { + "epoch": 21.93, + "grad_norm": 2.127978563308716, + "learning_rate": 5.677587939698493e-06, + "loss": 0.0011, + "step": 43525 + }, + { + "epoch": 21.94, + "grad_norm": 0.44460466504096985, + "learning_rate": 5.675075376884422e-06, + "loss": 0.0016, + "step": 43550 + }, + { + "epoch": 21.95, + "grad_norm": 1.158146858215332, + "learning_rate": 5.6725628140703526e-06, + "loss": 0.0015, + "step": 43575 + }, + { + "epoch": 21.96, + "grad_norm": 1.6001086235046387, + "learning_rate": 5.670050251256282e-06, + "loss": 0.0015, + "step": 43600 + }, + { + "epoch": 21.98, + "grad_norm": 0.6981241703033447, + "learning_rate": 5.667537688442212e-06, + "loss": 0.0013, + "step": 43625 + }, + { + "epoch": 21.99, + "grad_norm": 0.45709845423698425, + "learning_rate": 5.6650251256281405e-06, + "loss": 0.0015, + "step": 43650 + }, + { + "epoch": 22.0, + "grad_norm": 1.2674909830093384, + "learning_rate": 5.662512562814071e-06, + "loss": 0.0015, + "step": 43675 + }, + { + "epoch": 22.02, + "grad_norm": 0.591598629951477, + "learning_rate": 5.66e-06, + "loss": 0.0008, + "step": 43700 + }, + { + "epoch": 22.03, + "grad_norm": 0.76810222864151, + "learning_rate": 5.65748743718593e-06, + "loss": 0.0009, + "step": 43725 + }, + { + "epoch": 22.04, + "grad_norm": 1.0374010801315308, + "learning_rate": 5.65497487437186e-06, + "loss": 0.0008, + "step": 43750 + }, + { + "epoch": 22.05, + "grad_norm": 1.3293951749801636, + "learning_rate": 5.652462311557789e-06, + "loss": 0.001, + "step": 43775 + }, + { + "epoch": 22.07, + "grad_norm": 1.8345214128494263, + "learning_rate": 5.649949748743719e-06, + "loss": 0.0007, + "step": 43800 + }, + { + "epoch": 22.08, + "grad_norm": 0.9170948266983032, + "learning_rate": 5.647437185929648e-06, + "loss": 0.0009, + "step": 43825 + }, + { + "epoch": 22.09, + "grad_norm": 0.6133562326431274, + "learning_rate": 5.6449246231155785e-06, + "loss": 0.0009, + "step": 43850 + }, + { + "epoch": 22.1, + "grad_norm": 0.9368806481361389, + "learning_rate": 5.642412060301508e-06, + "loss": 0.0011, + "step": 43875 + }, + { + "epoch": 22.12, + "grad_norm": 0.38559451699256897, + "learning_rate": 5.639899497487438e-06, + "loss": 0.0012, + "step": 43900 + }, + { + "epoch": 22.13, + "grad_norm": 0.1805734932422638, + "learning_rate": 5.637386934673368e-06, + "loss": 0.0008, + "step": 43925 + }, + { + "epoch": 22.14, + "grad_norm": 3.672781467437744, + "learning_rate": 5.634874371859297e-06, + "loss": 0.0007, + "step": 43950 + }, + { + "epoch": 22.15, + "grad_norm": 0.09901037812232971, + "learning_rate": 5.632361809045227e-06, + "loss": 0.0007, + "step": 43975 + }, + { + "epoch": 22.17, + "grad_norm": 0.24132972955703735, + "learning_rate": 5.629849246231156e-06, + "loss": 0.0007, + "step": 44000 + }, + { + "epoch": 22.17, + "eval_loss": 0.3424950838088989, + "eval_runtime": 653.4662, + "eval_samples_per_second": 2.156, + "eval_steps_per_second": 2.156, + "eval_wer": 22.725700449671393, + "step": 44000 + }, + { + "epoch": 22.18, + "grad_norm": 1.1592354774475098, + "learning_rate": 5.627336683417086e-06, + "loss": 0.001, + "step": 44025 + }, + { + "epoch": 22.19, + "grad_norm": 0.19992341101169586, + "learning_rate": 5.624824120603015e-06, + "loss": 0.0009, + "step": 44050 + }, + { + "epoch": 22.2, + "grad_norm": 0.8353447914123535, + "learning_rate": 5.622311557788945e-06, + "loss": 0.0009, + "step": 44075 + }, + { + "epoch": 22.22, + "grad_norm": 0.8957573771476746, + "learning_rate": 5.619798994974874e-06, + "loss": 0.0007, + "step": 44100 + }, + { + "epoch": 22.23, + "grad_norm": 0.22726494073867798, + "learning_rate": 5.6172864321608044e-06, + "loss": 0.0008, + "step": 44125 + }, + { + "epoch": 22.24, + "grad_norm": 0.44598016142845154, + "learning_rate": 5.614773869346735e-06, + "loss": 0.0011, + "step": 44150 + }, + { + "epoch": 22.25, + "grad_norm": 1.2066985368728638, + "learning_rate": 5.612261306532664e-06, + "loss": 0.0007, + "step": 44175 + }, + { + "epoch": 22.27, + "grad_norm": 0.41478270292282104, + "learning_rate": 5.609748743718594e-06, + "loss": 0.0008, + "step": 44200 + }, + { + "epoch": 22.28, + "grad_norm": 0.7415564656257629, + "learning_rate": 5.607236180904523e-06, + "loss": 0.0007, + "step": 44225 + }, + { + "epoch": 22.29, + "grad_norm": 0.26239436864852905, + "learning_rate": 5.604723618090453e-06, + "loss": 0.0007, + "step": 44250 + }, + { + "epoch": 22.3, + "grad_norm": 0.6609335541725159, + "learning_rate": 5.602211055276382e-06, + "loss": 0.0007, + "step": 44275 + }, + { + "epoch": 22.32, + "grad_norm": 0.35660025477409363, + "learning_rate": 5.599698492462312e-06, + "loss": 0.0008, + "step": 44300 + }, + { + "epoch": 22.33, + "grad_norm": 0.5038982629776001, + "learning_rate": 5.597185929648241e-06, + "loss": 0.0008, + "step": 44325 + }, + { + "epoch": 22.34, + "grad_norm": 1.157596468925476, + "learning_rate": 5.594673366834171e-06, + "loss": 0.0009, + "step": 44350 + }, + { + "epoch": 22.36, + "grad_norm": 1.0030407905578613, + "learning_rate": 5.592160804020101e-06, + "loss": 0.001, + "step": 44375 + }, + { + "epoch": 22.37, + "grad_norm": 0.47111421823501587, + "learning_rate": 5.58964824120603e-06, + "loss": 0.0008, + "step": 44400 + }, + { + "epoch": 22.38, + "grad_norm": 0.3429202139377594, + "learning_rate": 5.5871356783919606e-06, + "loss": 0.0014, + "step": 44425 + }, + { + "epoch": 22.39, + "grad_norm": 1.4424147605895996, + "learning_rate": 5.58462311557789e-06, + "loss": 0.0011, + "step": 44450 + }, + { + "epoch": 22.41, + "grad_norm": 0.48875829577445984, + "learning_rate": 5.58211055276382e-06, + "loss": 0.0008, + "step": 44475 + }, + { + "epoch": 22.42, + "grad_norm": 0.23549383878707886, + "learning_rate": 5.5795979899497485e-06, + "loss": 0.0007, + "step": 44500 + }, + { + "epoch": 22.43, + "grad_norm": 1.24130117893219, + "learning_rate": 5.577085427135679e-06, + "loss": 0.0014, + "step": 44525 + }, + { + "epoch": 22.44, + "grad_norm": 0.129581019282341, + "learning_rate": 5.574572864321609e-06, + "loss": 0.001, + "step": 44550 + }, + { + "epoch": 22.46, + "grad_norm": 1.358135461807251, + "learning_rate": 5.572060301507538e-06, + "loss": 0.0012, + "step": 44575 + }, + { + "epoch": 22.47, + "grad_norm": 0.5511311292648315, + "learning_rate": 5.569547738693468e-06, + "loss": 0.0011, + "step": 44600 + }, + { + "epoch": 22.48, + "grad_norm": 0.2945619225502014, + "learning_rate": 5.567035175879397e-06, + "loss": 0.0009, + "step": 44625 + }, + { + "epoch": 22.49, + "grad_norm": 1.2442690134048462, + "learning_rate": 5.564522613065327e-06, + "loss": 0.0011, + "step": 44650 + }, + { + "epoch": 22.51, + "grad_norm": 1.0728257894515991, + "learning_rate": 5.562010050251256e-06, + "loss": 0.0009, + "step": 44675 + }, + { + "epoch": 22.52, + "grad_norm": 0.51957768201828, + "learning_rate": 5.5594974874371865e-06, + "loss": 0.0012, + "step": 44700 + }, + { + "epoch": 22.53, + "grad_norm": 1.0889358520507812, + "learning_rate": 5.556984924623116e-06, + "loss": 0.0008, + "step": 44725 + }, + { + "epoch": 22.54, + "grad_norm": 1.2384743690490723, + "learning_rate": 5.554472361809046e-06, + "loss": 0.0009, + "step": 44750 + }, + { + "epoch": 22.56, + "grad_norm": 1.6399548053741455, + "learning_rate": 5.551959798994976e-06, + "loss": 0.0012, + "step": 44775 + }, + { + "epoch": 22.57, + "grad_norm": 1.122065544128418, + "learning_rate": 5.549447236180905e-06, + "loss": 0.0013, + "step": 44800 + }, + { + "epoch": 22.58, + "grad_norm": 1.5687880516052246, + "learning_rate": 5.546934673366835e-06, + "loss": 0.0011, + "step": 44825 + }, + { + "epoch": 22.59, + "grad_norm": 0.26494044065475464, + "learning_rate": 5.544422110552764e-06, + "loss": 0.0013, + "step": 44850 + }, + { + "epoch": 22.61, + "grad_norm": 2.1011672019958496, + "learning_rate": 5.541909547738694e-06, + "loss": 0.0012, + "step": 44875 + }, + { + "epoch": 22.62, + "grad_norm": 0.8654801249504089, + "learning_rate": 5.539396984924623e-06, + "loss": 0.0011, + "step": 44900 + }, + { + "epoch": 22.63, + "grad_norm": 0.3072070777416229, + "learning_rate": 5.536884422110553e-06, + "loss": 0.0009, + "step": 44925 + }, + { + "epoch": 22.64, + "grad_norm": 0.3950670659542084, + "learning_rate": 5.534371859296482e-06, + "loss": 0.0009, + "step": 44950 + }, + { + "epoch": 22.66, + "grad_norm": 0.8394802212715149, + "learning_rate": 5.5318592964824124e-06, + "loss": 0.0007, + "step": 44975 + }, + { + "epoch": 22.67, + "grad_norm": 0.3768616020679474, + "learning_rate": 5.529346733668343e-06, + "loss": 0.001, + "step": 45000 + }, + { + "epoch": 22.67, + "eval_loss": 0.34873273968696594, + "eval_runtime": 648.3792, + "eval_samples_per_second": 2.173, + "eval_steps_per_second": 2.173, + "eval_wer": 22.79488066413006, + "step": 45000 + }, + { + "epoch": 22.68, + "grad_norm": 0.8950992226600647, + "learning_rate": 5.526834170854272e-06, + "loss": 0.0012, + "step": 45025 + }, + { + "epoch": 22.7, + "grad_norm": 0.6401100754737854, + "learning_rate": 5.524321608040202e-06, + "loss": 0.0013, + "step": 45050 + }, + { + "epoch": 22.71, + "grad_norm": 1.1628910303115845, + "learning_rate": 5.521809045226131e-06, + "loss": 0.0012, + "step": 45075 + }, + { + "epoch": 22.72, + "grad_norm": 1.316792607307434, + "learning_rate": 5.519296482412061e-06, + "loss": 0.001, + "step": 45100 + }, + { + "epoch": 22.73, + "grad_norm": 1.3205770254135132, + "learning_rate": 5.51678391959799e-06, + "loss": 0.0013, + "step": 45125 + }, + { + "epoch": 22.75, + "grad_norm": 0.2712342441082001, + "learning_rate": 5.51427135678392e-06, + "loss": 0.0011, + "step": 45150 + }, + { + "epoch": 22.76, + "grad_norm": 0.47601330280303955, + "learning_rate": 5.51175879396985e-06, + "loss": 0.0011, + "step": 45175 + }, + { + "epoch": 22.77, + "grad_norm": 0.9388231039047241, + "learning_rate": 5.509246231155779e-06, + "loss": 0.0012, + "step": 45200 + }, + { + "epoch": 22.78, + "grad_norm": 1.183489203453064, + "learning_rate": 5.506834170854271e-06, + "loss": 0.0013, + "step": 45225 + }, + { + "epoch": 22.8, + "grad_norm": 1.0215598344802856, + "learning_rate": 5.5043216080402015e-06, + "loss": 0.0013, + "step": 45250 + }, + { + "epoch": 22.81, + "grad_norm": 0.5754547119140625, + "learning_rate": 5.501809045226131e-06, + "loss": 0.0012, + "step": 45275 + }, + { + "epoch": 22.82, + "grad_norm": 1.5252500772476196, + "learning_rate": 5.499296482412061e-06, + "loss": 0.0014, + "step": 45300 + }, + { + "epoch": 22.83, + "grad_norm": 0.5785127282142639, + "learning_rate": 5.49678391959799e-06, + "loss": 0.0011, + "step": 45325 + }, + { + "epoch": 22.85, + "grad_norm": 1.1003527641296387, + "learning_rate": 5.4942713567839204e-06, + "loss": 0.001, + "step": 45350 + }, + { + "epoch": 22.86, + "grad_norm": 1.1432653665542603, + "learning_rate": 5.491758793969851e-06, + "loss": 0.0012, + "step": 45375 + }, + { + "epoch": 22.87, + "grad_norm": 0.7556006908416748, + "learning_rate": 5.489246231155779e-06, + "loss": 0.0011, + "step": 45400 + }, + { + "epoch": 22.88, + "grad_norm": 0.6173690557479858, + "learning_rate": 5.486733668341709e-06, + "loss": 0.001, + "step": 45425 + }, + { + "epoch": 22.9, + "grad_norm": 0.3593469262123108, + "learning_rate": 5.484221105527639e-06, + "loss": 0.0014, + "step": 45450 + }, + { + "epoch": 22.91, + "grad_norm": 1.9169950485229492, + "learning_rate": 5.481708542713569e-06, + "loss": 0.0011, + "step": 45475 + }, + { + "epoch": 22.92, + "grad_norm": 0.8808764815330505, + "learning_rate": 5.479195979899497e-06, + "loss": 0.0014, + "step": 45500 + }, + { + "epoch": 22.93, + "grad_norm": 0.18010124564170837, + "learning_rate": 5.476683417085427e-06, + "loss": 0.0012, + "step": 45525 + }, + { + "epoch": 22.95, + "grad_norm": 0.6205843091011047, + "learning_rate": 5.474170854271357e-06, + "loss": 0.001, + "step": 45550 + }, + { + "epoch": 22.96, + "grad_norm": 1.8465744256973267, + "learning_rate": 5.471658291457287e-06, + "loss": 0.001, + "step": 45575 + }, + { + "epoch": 22.97, + "grad_norm": 0.47944799065589905, + "learning_rate": 5.469145728643217e-06, + "loss": 0.0012, + "step": 45600 + }, + { + "epoch": 22.98, + "grad_norm": 1.0778087377548218, + "learning_rate": 5.466633165829146e-06, + "loss": 0.0012, + "step": 45625 + }, + { + "epoch": 23.0, + "grad_norm": 1.2320517301559448, + "learning_rate": 5.4641206030150766e-06, + "loss": 0.001, + "step": 45650 + }, + { + "epoch": 23.01, + "grad_norm": 0.2702305316925049, + "learning_rate": 5.461608040201005e-06, + "loss": 0.0009, + "step": 45675 + }, + { + "epoch": 23.02, + "grad_norm": 0.4143355190753937, + "learning_rate": 5.459095477386935e-06, + "loss": 0.0007, + "step": 45700 + }, + { + "epoch": 23.04, + "grad_norm": 1.7724355459213257, + "learning_rate": 5.4565829145728645e-06, + "loss": 0.0007, + "step": 45725 + }, + { + "epoch": 23.05, + "grad_norm": 0.8267619609832764, + "learning_rate": 5.454070351758795e-06, + "loss": 0.0007, + "step": 45750 + }, + { + "epoch": 23.06, + "grad_norm": 0.7586312890052795, + "learning_rate": 5.451557788944723e-06, + "loss": 0.0007, + "step": 45775 + }, + { + "epoch": 23.07, + "grad_norm": 0.6827680468559265, + "learning_rate": 5.449045226130653e-06, + "loss": 0.001, + "step": 45800 + }, + { + "epoch": 23.09, + "grad_norm": 1.9296995401382446, + "learning_rate": 5.4465326633165835e-06, + "loss": 0.0011, + "step": 45825 + }, + { + "epoch": 23.1, + "grad_norm": 0.6591385006904602, + "learning_rate": 5.444020100502513e-06, + "loss": 0.001, + "step": 45850 + }, + { + "epoch": 23.11, + "grad_norm": 0.3265586793422699, + "learning_rate": 5.441507537688443e-06, + "loss": 0.0009, + "step": 45875 + }, + { + "epoch": 23.12, + "grad_norm": 1.2838494777679443, + "learning_rate": 5.438994974874372e-06, + "loss": 0.0008, + "step": 45900 + }, + { + "epoch": 23.14, + "grad_norm": 2.554136037826538, + "learning_rate": 5.4364824120603025e-06, + "loss": 0.0012, + "step": 45925 + }, + { + "epoch": 23.15, + "grad_norm": 1.0224181413650513, + "learning_rate": 5.433969849246231e-06, + "loss": 0.001, + "step": 45950 + }, + { + "epoch": 23.16, + "grad_norm": 1.341222882270813, + "learning_rate": 5.431457286432161e-06, + "loss": 0.0009, + "step": 45975 + }, + { + "epoch": 23.17, + "grad_norm": 0.49937868118286133, + "learning_rate": 5.428944723618091e-06, + "loss": 0.0007, + "step": 46000 + }, + { + "epoch": 23.17, + "eval_loss": 0.346920371055603, + "eval_runtime": 644.2541, + "eval_samples_per_second": 2.187, + "eval_steps_per_second": 2.187, + "eval_wer": 22.656520235212728, + "step": 46000 + }, + { + "epoch": 23.19, + "grad_norm": 0.7111514806747437, + "learning_rate": 5.426432160804021e-06, + "loss": 0.0008, + "step": 46025 + }, + { + "epoch": 23.2, + "grad_norm": 0.8799687623977661, + "learning_rate": 5.423919597989951e-06, + "loss": 0.0008, + "step": 46050 + }, + { + "epoch": 23.21, + "grad_norm": 0.3264644742012024, + "learning_rate": 5.421407035175879e-06, + "loss": 0.0012, + "step": 46075 + }, + { + "epoch": 23.22, + "grad_norm": 1.2488361597061157, + "learning_rate": 5.4188944723618095e-06, + "loss": 0.0011, + "step": 46100 + }, + { + "epoch": 23.24, + "grad_norm": 0.21208049356937408, + "learning_rate": 5.416381909547739e-06, + "loss": 0.001, + "step": 46125 + }, + { + "epoch": 23.25, + "grad_norm": 0.593122661113739, + "learning_rate": 5.413869346733669e-06, + "loss": 0.0008, + "step": 46150 + }, + { + "epoch": 23.26, + "grad_norm": 0.8040767312049866, + "learning_rate": 5.411356783919598e-06, + "loss": 0.0008, + "step": 46175 + }, + { + "epoch": 23.27, + "grad_norm": 0.21935276687145233, + "learning_rate": 5.4088442211055284e-06, + "loss": 0.0009, + "step": 46200 + }, + { + "epoch": 23.29, + "grad_norm": 0.16591764986515045, + "learning_rate": 5.406331658291459e-06, + "loss": 0.0007, + "step": 46225 + }, + { + "epoch": 23.3, + "grad_norm": 0.7102475762367249, + "learning_rate": 5.403819095477387e-06, + "loss": 0.0011, + "step": 46250 + }, + { + "epoch": 23.31, + "grad_norm": 2.245885133743286, + "learning_rate": 5.401306532663317e-06, + "loss": 0.0011, + "step": 46275 + }, + { + "epoch": 23.32, + "grad_norm": 0.5632694363594055, + "learning_rate": 5.398793969849247e-06, + "loss": 0.0008, + "step": 46300 + }, + { + "epoch": 23.34, + "grad_norm": 0.4642152190208435, + "learning_rate": 5.396281407035177e-06, + "loss": 0.0008, + "step": 46325 + }, + { + "epoch": 23.35, + "grad_norm": 1.0977600812911987, + "learning_rate": 5.393768844221105e-06, + "loss": 0.0012, + "step": 46350 + }, + { + "epoch": 23.36, + "grad_norm": 1.1424881219863892, + "learning_rate": 5.391256281407035e-06, + "loss": 0.0011, + "step": 46375 + }, + { + "epoch": 23.38, + "grad_norm": 1.7661696672439575, + "learning_rate": 5.388743718592965e-06, + "loss": 0.0009, + "step": 46400 + }, + { + "epoch": 23.39, + "grad_norm": 0.5764384269714355, + "learning_rate": 5.386231155778895e-06, + "loss": 0.0011, + "step": 46425 + }, + { + "epoch": 23.4, + "grad_norm": 0.5465607047080994, + "learning_rate": 5.383718592964825e-06, + "loss": 0.0015, + "step": 46450 + }, + { + "epoch": 23.41, + "grad_norm": 0.4862133264541626, + "learning_rate": 5.381206030150754e-06, + "loss": 0.0009, + "step": 46475 + }, + { + "epoch": 23.43, + "grad_norm": 0.23647759854793549, + "learning_rate": 5.3786934673366846e-06, + "loss": 0.0007, + "step": 46500 + }, + { + "epoch": 23.44, + "grad_norm": 0.3536996841430664, + "learning_rate": 5.376180904522613e-06, + "loss": 0.0007, + "step": 46525 + }, + { + "epoch": 23.45, + "grad_norm": 1.0129157304763794, + "learning_rate": 5.373668341708543e-06, + "loss": 0.0008, + "step": 46550 + }, + { + "epoch": 23.46, + "grad_norm": 0.41435906291007996, + "learning_rate": 5.3711557788944725e-06, + "loss": 0.0011, + "step": 46575 + }, + { + "epoch": 23.48, + "grad_norm": 0.33409273624420166, + "learning_rate": 5.368643216080403e-06, + "loss": 0.001, + "step": 46600 + }, + { + "epoch": 23.49, + "grad_norm": 0.2821144759654999, + "learning_rate": 5.366130653266333e-06, + "loss": 0.001, + "step": 46625 + }, + { + "epoch": 23.5, + "grad_norm": 1.0986402034759521, + "learning_rate": 5.363618090452261e-06, + "loss": 0.001, + "step": 46650 + }, + { + "epoch": 23.51, + "grad_norm": 1.016719102859497, + "learning_rate": 5.3611055276381915e-06, + "loss": 0.001, + "step": 46675 + }, + { + "epoch": 23.53, + "grad_norm": 0.6379337310791016, + "learning_rate": 5.358592964824121e-06, + "loss": 0.001, + "step": 46700 + }, + { + "epoch": 23.54, + "grad_norm": 0.8883301019668579, + "learning_rate": 5.356080402010051e-06, + "loss": 0.0008, + "step": 46725 + }, + { + "epoch": 23.55, + "grad_norm": 0.7816546559333801, + "learning_rate": 5.35356783919598e-06, + "loss": 0.0009, + "step": 46750 + }, + { + "epoch": 23.56, + "grad_norm": 0.9444398283958435, + "learning_rate": 5.3510552763819105e-06, + "loss": 0.0012, + "step": 46775 + }, + { + "epoch": 23.58, + "grad_norm": 0.7157164812088013, + "learning_rate": 5.348542713567839e-06, + "loss": 0.001, + "step": 46800 + }, + { + "epoch": 23.59, + "grad_norm": 0.31857380270957947, + "learning_rate": 5.346030150753769e-06, + "loss": 0.0011, + "step": 46825 + }, + { + "epoch": 23.6, + "grad_norm": 1.1218419075012207, + "learning_rate": 5.343517587939699e-06, + "loss": 0.0014, + "step": 46850 + }, + { + "epoch": 23.61, + "grad_norm": 1.2285215854644775, + "learning_rate": 5.341005025125629e-06, + "loss": 0.001, + "step": 46875 + }, + { + "epoch": 23.63, + "grad_norm": 1.1362957954406738, + "learning_rate": 5.338492462311559e-06, + "loss": 0.001, + "step": 46900 + }, + { + "epoch": 23.64, + "grad_norm": 1.0595365762710571, + "learning_rate": 5.335979899497487e-06, + "loss": 0.001, + "step": 46925 + }, + { + "epoch": 23.65, + "grad_norm": 0.9272093176841736, + "learning_rate": 5.3334673366834175e-06, + "loss": 0.0012, + "step": 46950 + }, + { + "epoch": 23.66, + "grad_norm": 1.4149200916290283, + "learning_rate": 5.330954773869347e-06, + "loss": 0.001, + "step": 46975 + }, + { + "epoch": 23.68, + "grad_norm": 0.7414202094078064, + "learning_rate": 5.328442211055277e-06, + "loss": 0.0015, + "step": 47000 + }, + { + "epoch": 23.68, + "eval_loss": 0.3519901931285858, + "eval_runtime": 650.2703, + "eval_samples_per_second": 2.167, + "eval_steps_per_second": 2.167, + "eval_wer": 22.905569007263924, + "step": 47000 + }, + { + "epoch": 23.69, + "grad_norm": 1.5265312194824219, + "learning_rate": 5.325929648241206e-06, + "loss": 0.0012, + "step": 47025 + }, + { + "epoch": 23.7, + "grad_norm": 0.3424956500530243, + "learning_rate": 5.3234170854271364e-06, + "loss": 0.001, + "step": 47050 + }, + { + "epoch": 23.72, + "grad_norm": 1.250054121017456, + "learning_rate": 5.320904522613067e-06, + "loss": 0.0012, + "step": 47075 + }, + { + "epoch": 23.73, + "grad_norm": 0.7167928218841553, + "learning_rate": 5.318391959798995e-06, + "loss": 0.0011, + "step": 47100 + }, + { + "epoch": 23.74, + "grad_norm": 1.2113206386566162, + "learning_rate": 5.315879396984925e-06, + "loss": 0.001, + "step": 47125 + }, + { + "epoch": 23.75, + "grad_norm": 0.683556079864502, + "learning_rate": 5.313366834170855e-06, + "loss": 0.0011, + "step": 47150 + }, + { + "epoch": 23.77, + "grad_norm": 0.2526809871196747, + "learning_rate": 5.310854271356785e-06, + "loss": 0.001, + "step": 47175 + }, + { + "epoch": 23.78, + "grad_norm": 1.4190630912780762, + "learning_rate": 5.308341708542713e-06, + "loss": 0.0012, + "step": 47200 + }, + { + "epoch": 23.79, + "grad_norm": 1.7319457530975342, + "learning_rate": 5.305829145728643e-06, + "loss": 0.0014, + "step": 47225 + }, + { + "epoch": 23.8, + "grad_norm": 0.9196786284446716, + "learning_rate": 5.3033165829145736e-06, + "loss": 0.0009, + "step": 47250 + }, + { + "epoch": 23.82, + "grad_norm": 0.6173463463783264, + "learning_rate": 5.300804020100503e-06, + "loss": 0.001, + "step": 47275 + }, + { + "epoch": 23.83, + "grad_norm": 0.6350324153900146, + "learning_rate": 5.298291457286433e-06, + "loss": 0.0012, + "step": 47300 + }, + { + "epoch": 23.84, + "grad_norm": 0.24510569870471954, + "learning_rate": 5.295778894472362e-06, + "loss": 0.0007, + "step": 47325 + }, + { + "epoch": 23.85, + "grad_norm": 0.6556370258331299, + "learning_rate": 5.2932663316582926e-06, + "loss": 0.0012, + "step": 47350 + }, + { + "epoch": 23.87, + "grad_norm": 0.13942670822143555, + "learning_rate": 5.290753768844221e-06, + "loss": 0.001, + "step": 47375 + }, + { + "epoch": 23.88, + "grad_norm": 0.5470798015594482, + "learning_rate": 5.288241206030151e-06, + "loss": 0.001, + "step": 47400 + }, + { + "epoch": 23.89, + "grad_norm": 0.18008272349834442, + "learning_rate": 5.2857286432160805e-06, + "loss": 0.0009, + "step": 47425 + }, + { + "epoch": 23.9, + "grad_norm": 0.6318380832672119, + "learning_rate": 5.283216080402011e-06, + "loss": 0.0011, + "step": 47450 + }, + { + "epoch": 23.92, + "grad_norm": 0.3588716387748718, + "learning_rate": 5.280703517587941e-06, + "loss": 0.0013, + "step": 47475 + }, + { + "epoch": 23.93, + "grad_norm": 0.29687148332595825, + "learning_rate": 5.278190954773869e-06, + "loss": 0.001, + "step": 47500 + }, + { + "epoch": 23.94, + "grad_norm": 0.864915132522583, + "learning_rate": 5.2756783919597995e-06, + "loss": 0.0012, + "step": 47525 + }, + { + "epoch": 23.95, + "grad_norm": 1.526944637298584, + "learning_rate": 5.273165829145729e-06, + "loss": 0.0011, + "step": 47550 + }, + { + "epoch": 23.97, + "grad_norm": 1.1919291019439697, + "learning_rate": 5.270653266331659e-06, + "loss": 0.0012, + "step": 47575 + }, + { + "epoch": 23.98, + "grad_norm": 0.46352332830429077, + "learning_rate": 5.268140703517588e-06, + "loss": 0.001, + "step": 47600 + }, + { + "epoch": 23.99, + "grad_norm": 1.840276837348938, + "learning_rate": 5.2656281407035185e-06, + "loss": 0.0011, + "step": 47625 + }, + { + "epoch": 24.01, + "grad_norm": 0.4287075996398926, + "learning_rate": 5.263115577889447e-06, + "loss": 0.001, + "step": 47650 + }, + { + "epoch": 24.02, + "grad_norm": 0.6221798062324524, + "learning_rate": 5.260603015075377e-06, + "loss": 0.0008, + "step": 47675 + }, + { + "epoch": 24.03, + "grad_norm": 0.7368704676628113, + "learning_rate": 5.258090452261307e-06, + "loss": 0.0009, + "step": 47700 + }, + { + "epoch": 24.04, + "grad_norm": 0.7908537983894348, + "learning_rate": 5.255577889447237e-06, + "loss": 0.0007, + "step": 47725 + }, + { + "epoch": 24.06, + "grad_norm": 0.32871556282043457, + "learning_rate": 5.253065326633167e-06, + "loss": 0.0006, + "step": 47750 + }, + { + "epoch": 24.07, + "grad_norm": 0.5463153719902039, + "learning_rate": 5.250552763819095e-06, + "loss": 0.0005, + "step": 47775 + }, + { + "epoch": 24.08, + "grad_norm": 1.1261115074157715, + "learning_rate": 5.2480402010050255e-06, + "loss": 0.0008, + "step": 47800 + }, + { + "epoch": 24.09, + "grad_norm": 0.19659703969955444, + "learning_rate": 5.245527638190955e-06, + "loss": 0.0007, + "step": 47825 + }, + { + "epoch": 24.11, + "grad_norm": 1.187845230102539, + "learning_rate": 5.243015075376885e-06, + "loss": 0.0005, + "step": 47850 + }, + { + "epoch": 24.12, + "grad_norm": 0.7721708416938782, + "learning_rate": 5.240502512562814e-06, + "loss": 0.0006, + "step": 47875 + }, + { + "epoch": 24.13, + "grad_norm": 1.14128577709198, + "learning_rate": 5.2379899497487444e-06, + "loss": 0.0007, + "step": 47900 + }, + { + "epoch": 24.14, + "grad_norm": 0.2921935021877289, + "learning_rate": 5.235477386934675e-06, + "loss": 0.0006, + "step": 47925 + }, + { + "epoch": 24.16, + "grad_norm": 0.542747437953949, + "learning_rate": 5.232964824120603e-06, + "loss": 0.0007, + "step": 47950 + }, + { + "epoch": 24.17, + "grad_norm": 0.1501588523387909, + "learning_rate": 5.230452261306533e-06, + "loss": 0.0006, + "step": 47975 + }, + { + "epoch": 24.18, + "grad_norm": 1.1452776193618774, + "learning_rate": 5.227939698492463e-06, + "loss": 0.0006, + "step": 48000 + }, + { + "epoch": 24.18, + "eval_loss": 0.3501649498939514, + "eval_runtime": 653.8098, + "eval_samples_per_second": 2.155, + "eval_steps_per_second": 2.155, + "eval_wer": 22.365963334486334, + "step": 48000 + }, + { + "epoch": 24.19, + "grad_norm": 0.4397837817668915, + "learning_rate": 5.225427135678393e-06, + "loss": 0.0008, + "step": 48025 + }, + { + "epoch": 24.21, + "grad_norm": 0.2055756002664566, + "learning_rate": 5.222914572864321e-06, + "loss": 0.0007, + "step": 48050 + }, + { + "epoch": 24.22, + "grad_norm": 0.5878810882568359, + "learning_rate": 5.220402010050251e-06, + "loss": 0.0006, + "step": 48075 + }, + { + "epoch": 24.23, + "grad_norm": 0.17719118297100067, + "learning_rate": 5.217889447236182e-06, + "loss": 0.0006, + "step": 48100 + }, + { + "epoch": 24.24, + "grad_norm": 0.6639860272407532, + "learning_rate": 5.215376884422111e-06, + "loss": 0.0007, + "step": 48125 + }, + { + "epoch": 24.26, + "grad_norm": 0.47352033853530884, + "learning_rate": 5.212864321608041e-06, + "loss": 0.0007, + "step": 48150 + }, + { + "epoch": 24.27, + "grad_norm": 0.5616294145584106, + "learning_rate": 5.21035175879397e-06, + "loss": 0.0007, + "step": 48175 + }, + { + "epoch": 24.28, + "grad_norm": 0.23458968102931976, + "learning_rate": 5.2078391959799006e-06, + "loss": 0.0005, + "step": 48200 + }, + { + "epoch": 24.29, + "grad_norm": 1.2819747924804688, + "learning_rate": 5.205326633165829e-06, + "loss": 0.0009, + "step": 48225 + }, + { + "epoch": 24.31, + "grad_norm": 0.9145079255104065, + "learning_rate": 5.202814070351759e-06, + "loss": 0.001, + "step": 48250 + }, + { + "epoch": 24.32, + "grad_norm": 0.3194675147533417, + "learning_rate": 5.2003015075376885e-06, + "loss": 0.0007, + "step": 48275 + }, + { + "epoch": 24.33, + "grad_norm": 0.7687875032424927, + "learning_rate": 5.197788944723619e-06, + "loss": 0.0007, + "step": 48300 + }, + { + "epoch": 24.35, + "grad_norm": 0.46240946650505066, + "learning_rate": 5.195276381909549e-06, + "loss": 0.0007, + "step": 48325 + }, + { + "epoch": 24.36, + "grad_norm": 0.9186506271362305, + "learning_rate": 5.192763819095477e-06, + "loss": 0.0007, + "step": 48350 + }, + { + "epoch": 24.37, + "grad_norm": 0.2463284581899643, + "learning_rate": 5.1902512562814075e-06, + "loss": 0.0008, + "step": 48375 + }, + { + "epoch": 24.38, + "grad_norm": 0.2767968773841858, + "learning_rate": 5.187738693467337e-06, + "loss": 0.001, + "step": 48400 + }, + { + "epoch": 24.4, + "grad_norm": 0.46523067355155945, + "learning_rate": 5.185226130653267e-06, + "loss": 0.0009, + "step": 48425 + }, + { + "epoch": 24.41, + "grad_norm": 0.23646961152553558, + "learning_rate": 5.182713567839196e-06, + "loss": 0.0009, + "step": 48450 + }, + { + "epoch": 24.42, + "grad_norm": 0.6883164048194885, + "learning_rate": 5.1802010050251265e-06, + "loss": 0.0007, + "step": 48475 + }, + { + "epoch": 24.43, + "grad_norm": 0.33344462513923645, + "learning_rate": 5.177688442211055e-06, + "loss": 0.0009, + "step": 48500 + }, + { + "epoch": 24.45, + "grad_norm": 0.1479116529226303, + "learning_rate": 5.175175879396985e-06, + "loss": 0.0007, + "step": 48525 + }, + { + "epoch": 24.46, + "grad_norm": 1.2230092287063599, + "learning_rate": 5.172663316582915e-06, + "loss": 0.0008, + "step": 48550 + }, + { + "epoch": 24.47, + "grad_norm": 0.17954891920089722, + "learning_rate": 5.170150753768845e-06, + "loss": 0.0009, + "step": 48575 + }, + { + "epoch": 24.48, + "grad_norm": 0.7569608092308044, + "learning_rate": 5.167638190954775e-06, + "loss": 0.0007, + "step": 48600 + }, + { + "epoch": 24.5, + "grad_norm": 0.9554746747016907, + "learning_rate": 5.165125628140703e-06, + "loss": 0.0009, + "step": 48625 + }, + { + "epoch": 24.51, + "grad_norm": 0.6516941785812378, + "learning_rate": 5.1626130653266335e-06, + "loss": 0.0009, + "step": 48650 + }, + { + "epoch": 24.52, + "grad_norm": 0.6526229381561279, + "learning_rate": 5.160100502512563e-06, + "loss": 0.001, + "step": 48675 + }, + { + "epoch": 24.53, + "grad_norm": 0.17622053623199463, + "learning_rate": 5.157587939698493e-06, + "loss": 0.0007, + "step": 48700 + }, + { + "epoch": 24.55, + "grad_norm": 0.8113358616828918, + "learning_rate": 5.155075376884423e-06, + "loss": 0.0009, + "step": 48725 + }, + { + "epoch": 24.56, + "grad_norm": 0.7531803846359253, + "learning_rate": 5.1525628140703525e-06, + "loss": 0.0008, + "step": 48750 + }, + { + "epoch": 24.57, + "grad_norm": 0.82627934217453, + "learning_rate": 5.150050251256283e-06, + "loss": 0.0007, + "step": 48775 + }, + { + "epoch": 24.58, + "grad_norm": 1.1042143106460571, + "learning_rate": 5.147537688442211e-06, + "loss": 0.0007, + "step": 48800 + }, + { + "epoch": 24.6, + "grad_norm": 0.34287524223327637, + "learning_rate": 5.145025125628141e-06, + "loss": 0.0006, + "step": 48825 + }, + { + "epoch": 24.61, + "grad_norm": 1.404876470565796, + "learning_rate": 5.142512562814071e-06, + "loss": 0.001, + "step": 48850 + }, + { + "epoch": 24.62, + "grad_norm": 0.3671759068965912, + "learning_rate": 5.140000000000001e-06, + "loss": 0.0009, + "step": 48875 + }, + { + "epoch": 24.63, + "grad_norm": 0.7027815580368042, + "learning_rate": 5.137487437185929e-06, + "loss": 0.0009, + "step": 48900 + }, + { + "epoch": 24.65, + "grad_norm": 0.8009297847747803, + "learning_rate": 5.134974874371859e-06, + "loss": 0.001, + "step": 48925 + }, + { + "epoch": 24.66, + "grad_norm": 1.0700089931488037, + "learning_rate": 5.13246231155779e-06, + "loss": 0.001, + "step": 48950 + }, + { + "epoch": 24.67, + "grad_norm": 0.4704926609992981, + "learning_rate": 5.129949748743719e-06, + "loss": 0.0014, + "step": 48975 + }, + { + "epoch": 24.69, + "grad_norm": 0.2424398958683014, + "learning_rate": 5.127437185929649e-06, + "loss": 0.0012, + "step": 49000 + }, + { + "epoch": 24.69, + "eval_loss": 0.3495071828365326, + "eval_runtime": 650.7727, + "eval_samples_per_second": 2.165, + "eval_steps_per_second": 2.165, + "eval_wer": 22.87097890003459, + "step": 49000 + }, + { + "epoch": 24.7, + "grad_norm": 0.5802572965621948, + "learning_rate": 5.124924623115578e-06, + "loss": 0.001, + "step": 49025 + }, + { + "epoch": 24.71, + "grad_norm": 0.9147945046424866, + "learning_rate": 5.1224120603015086e-06, + "loss": 0.001, + "step": 49050 + }, + { + "epoch": 24.72, + "grad_norm": 0.9649335145950317, + "learning_rate": 5.119899497487437e-06, + "loss": 0.0014, + "step": 49075 + }, + { + "epoch": 24.74, + "grad_norm": 0.6083235144615173, + "learning_rate": 5.117386934673367e-06, + "loss": 0.0009, + "step": 49100 + }, + { + "epoch": 24.75, + "grad_norm": 0.29622915387153625, + "learning_rate": 5.1148743718592965e-06, + "loss": 0.0015, + "step": 49125 + }, + { + "epoch": 24.76, + "grad_norm": 0.1976325958967209, + "learning_rate": 5.112361809045227e-06, + "loss": 0.0012, + "step": 49150 + }, + { + "epoch": 24.77, + "grad_norm": 0.6826661825180054, + "learning_rate": 5.109849246231157e-06, + "loss": 0.0012, + "step": 49175 + }, + { + "epoch": 24.79, + "grad_norm": 1.7492895126342773, + "learning_rate": 5.107336683417085e-06, + "loss": 0.0011, + "step": 49200 + }, + { + "epoch": 24.8, + "grad_norm": 1.210092306137085, + "learning_rate": 5.1048241206030155e-06, + "loss": 0.0009, + "step": 49225 + }, + { + "epoch": 24.81, + "grad_norm": 0.6438060998916626, + "learning_rate": 5.102311557788945e-06, + "loss": 0.0007, + "step": 49250 + }, + { + "epoch": 24.82, + "grad_norm": 1.0905343294143677, + "learning_rate": 5.099798994974875e-06, + "loss": 0.0009, + "step": 49275 + }, + { + "epoch": 24.84, + "grad_norm": 0.9838513135910034, + "learning_rate": 5.097286432160804e-06, + "loss": 0.0008, + "step": 49300 + }, + { + "epoch": 24.85, + "grad_norm": 0.906428337097168, + "learning_rate": 5.0947738693467345e-06, + "loss": 0.0006, + "step": 49325 + }, + { + "epoch": 24.86, + "grad_norm": 0.5822015404701233, + "learning_rate": 5.092261306532665e-06, + "loss": 0.001, + "step": 49350 + }, + { + "epoch": 24.87, + "grad_norm": 0.6224560141563416, + "learning_rate": 5.089748743718593e-06, + "loss": 0.0008, + "step": 49375 + }, + { + "epoch": 24.89, + "grad_norm": 0.12286537140607834, + "learning_rate": 5.087236180904523e-06, + "loss": 0.0008, + "step": 49400 + }, + { + "epoch": 24.9, + "grad_norm": 0.958941638469696, + "learning_rate": 5.084723618090453e-06, + "loss": 0.0008, + "step": 49425 + }, + { + "epoch": 24.91, + "grad_norm": 0.3192073106765747, + "learning_rate": 5.082211055276383e-06, + "loss": 0.0009, + "step": 49450 + }, + { + "epoch": 24.92, + "grad_norm": NaN, + "learning_rate": 5.079798994974875e-06, + "loss": 0.0009, + "step": 49475 + }, + { + "epoch": 24.94, + "grad_norm": 0.13195385038852692, + "learning_rate": 5.077286432160804e-06, + "loss": 0.0008, + "step": 49500 + }, + { + "epoch": 24.95, + "grad_norm": 1.2665826082229614, + "learning_rate": 5.074773869346734e-06, + "loss": 0.0009, + "step": 49525 + }, + { + "epoch": 24.96, + "grad_norm": 0.7236266732215881, + "learning_rate": 5.072261306532664e-06, + "loss": 0.0009, + "step": 49550 + }, + { + "epoch": 24.97, + "grad_norm": 1.4165126085281372, + "learning_rate": 5.069748743718593e-06, + "loss": 0.0011, + "step": 49575 + }, + { + "epoch": 24.99, + "grad_norm": 0.6968191266059875, + "learning_rate": 5.0672361809045235e-06, + "loss": 0.0011, + "step": 49600 + }, + { + "epoch": 25.0, + "grad_norm": 0.3853646218776703, + "learning_rate": 5.064723618090453e-06, + "loss": 0.001, + "step": 49625 + }, + { + "epoch": 25.01, + "grad_norm": 1.0228655338287354, + "learning_rate": 5.062211055276382e-06, + "loss": 0.0006, + "step": 49650 + }, + { + "epoch": 25.03, + "grad_norm": 0.07551419734954834, + "learning_rate": 5.0596984924623115e-06, + "loss": 0.0009, + "step": 49675 + }, + { + "epoch": 25.04, + "grad_norm": 0.9682655334472656, + "learning_rate": 5.057185929648242e-06, + "loss": 0.0009, + "step": 49700 + }, + { + "epoch": 25.05, + "grad_norm": 0.4476355016231537, + "learning_rate": 5.054673366834171e-06, + "loss": 0.0008, + "step": 49725 + }, + { + "epoch": 25.06, + "grad_norm": 0.44491493701934814, + "learning_rate": 5.052160804020101e-06, + "loss": 0.0009, + "step": 49750 + }, + { + "epoch": 25.08, + "grad_norm": 1.1377488374710083, + "learning_rate": 5.049648241206031e-06, + "loss": 0.0007, + "step": 49775 + }, + { + "epoch": 25.09, + "grad_norm": 0.2252766638994217, + "learning_rate": 5.04713567839196e-06, + "loss": 0.0006, + "step": 49800 + }, + { + "epoch": 25.1, + "grad_norm": 0.3592166006565094, + "learning_rate": 5.04462311557789e-06, + "loss": 0.0008, + "step": 49825 + }, + { + "epoch": 25.11, + "grad_norm": 0.2291008085012436, + "learning_rate": 5.042110552763819e-06, + "loss": 0.0005, + "step": 49850 + }, + { + "epoch": 25.13, + "grad_norm": 0.36598822474479675, + "learning_rate": 5.0395979899497495e-06, + "loss": 0.0004, + "step": 49875 + }, + { + "epoch": 25.14, + "grad_norm": 0.13307011127471924, + "learning_rate": 5.037085427135679e-06, + "loss": 0.0006, + "step": 49900 + }, + { + "epoch": 25.15, + "grad_norm": 0.5685657858848572, + "learning_rate": 5.034572864321608e-06, + "loss": 0.0007, + "step": 49925 + }, + { + "epoch": 25.16, + "grad_norm": 0.836059033870697, + "learning_rate": 5.0320603015075374e-06, + "loss": 0.0008, + "step": 49950 + }, + { + "epoch": 25.18, + "grad_norm": 0.8621478080749512, + "learning_rate": 5.029547738693468e-06, + "loss": 0.0007, + "step": 49975 + }, + { + "epoch": 25.19, + "grad_norm": 0.750243067741394, + "learning_rate": 5.027035175879398e-06, + "loss": 0.0007, + "step": 50000 + }, + { + "epoch": 25.19, + "eval_loss": 0.3517380654811859, + "eval_runtime": 645.2524, + "eval_samples_per_second": 2.184, + "eval_steps_per_second": 2.184, + "eval_wer": 23.189207886544448, + "step": 50000 + }, + { + "epoch": 25.2, + "grad_norm": 0.194803848862648, + "learning_rate": 5.024522613065327e-06, + "loss": 0.0007, + "step": 50025 + }, + { + "epoch": 25.21, + "grad_norm": 0.9492383599281311, + "learning_rate": 5.022010050251257e-06, + "loss": 0.0008, + "step": 50050 + }, + { + "epoch": 25.23, + "grad_norm": 0.3268399238586426, + "learning_rate": 5.019497487437186e-06, + "loss": 0.0005, + "step": 50075 + }, + { + "epoch": 25.24, + "grad_norm": 0.23588715493679047, + "learning_rate": 5.016984924623116e-06, + "loss": 0.0005, + "step": 50100 + }, + { + "epoch": 25.25, + "grad_norm": 0.3195103704929352, + "learning_rate": 5.014472361809045e-06, + "loss": 0.0006, + "step": 50125 + }, + { + "epoch": 25.26, + "grad_norm": 0.5224353075027466, + "learning_rate": 5.011959798994975e-06, + "loss": 0.0005, + "step": 50150 + }, + { + "epoch": 25.28, + "grad_norm": 0.3808083236217499, + "learning_rate": 5.009447236180906e-06, + "loss": 0.0006, + "step": 50175 + }, + { + "epoch": 25.29, + "grad_norm": 0.8282648921012878, + "learning_rate": 5.006934673366834e-06, + "loss": 0.0005, + "step": 50200 + }, + { + "epoch": 25.3, + "grad_norm": 1.0018339157104492, + "learning_rate": 5.004422110552764e-06, + "loss": 0.0005, + "step": 50225 + }, + { + "epoch": 25.31, + "grad_norm": 0.6774661540985107, + "learning_rate": 5.0019095477386935e-06, + "loss": 0.0007, + "step": 50250 + }, + { + "epoch": 25.33, + "grad_norm": 0.9828543663024902, + "learning_rate": 4.999396984924624e-06, + "loss": 0.0008, + "step": 50275 + }, + { + "epoch": 25.34, + "grad_norm": 0.5815138220787048, + "learning_rate": 4.996884422110553e-06, + "loss": 0.0007, + "step": 50300 + }, + { + "epoch": 25.35, + "grad_norm": 0.20348763465881348, + "learning_rate": 4.994371859296483e-06, + "loss": 0.001, + "step": 50325 + }, + { + "epoch": 25.37, + "grad_norm": 0.4329879879951477, + "learning_rate": 4.9918592964824125e-06, + "loss": 0.0013, + "step": 50350 + }, + { + "epoch": 25.38, + "grad_norm": 0.5430723428726196, + "learning_rate": 4.989346733668342e-06, + "loss": 0.0008, + "step": 50375 + }, + { + "epoch": 25.39, + "grad_norm": 1.5767520666122437, + "learning_rate": 4.986834170854272e-06, + "loss": 0.001, + "step": 50400 + }, + { + "epoch": 25.4, + "grad_norm": 0.2442205548286438, + "learning_rate": 4.984321608040201e-06, + "loss": 0.0005, + "step": 50425 + }, + { + "epoch": 25.42, + "grad_norm": 0.20690348744392395, + "learning_rate": 4.981809045226131e-06, + "loss": 0.0009, + "step": 50450 + }, + { + "epoch": 25.43, + "grad_norm": 0.6699907183647156, + "learning_rate": 4.979296482412061e-06, + "loss": 0.0007, + "step": 50475 + }, + { + "epoch": 25.44, + "grad_norm": 1.3233988285064697, + "learning_rate": 4.97678391959799e-06, + "loss": 0.0008, + "step": 50500 + }, + { + "epoch": 25.45, + "grad_norm": 0.44620636105537415, + "learning_rate": 4.97427135678392e-06, + "loss": 0.0006, + "step": 50525 + }, + { + "epoch": 25.47, + "grad_norm": 0.28157714009284973, + "learning_rate": 4.97175879396985e-06, + "loss": 0.0007, + "step": 50550 + }, + { + "epoch": 25.48, + "grad_norm": 0.821960985660553, + "learning_rate": 4.969246231155779e-06, + "loss": 0.0006, + "step": 50575 + }, + { + "epoch": 25.49, + "grad_norm": 0.5584134459495544, + "learning_rate": 4.966733668341709e-06, + "loss": 0.0007, + "step": 50600 + }, + { + "epoch": 25.5, + "grad_norm": 0.801690399646759, + "learning_rate": 4.9642211055276385e-06, + "loss": 0.0006, + "step": 50625 + }, + { + "epoch": 25.52, + "grad_norm": 1.2869949340820312, + "learning_rate": 4.961708542713568e-06, + "loss": 0.0008, + "step": 50650 + }, + { + "epoch": 25.53, + "grad_norm": 0.13358135521411896, + "learning_rate": 4.959195979899498e-06, + "loss": 0.0006, + "step": 50675 + }, + { + "epoch": 25.54, + "grad_norm": 0.5842620134353638, + "learning_rate": 4.956683417085428e-06, + "loss": 0.0008, + "step": 50700 + }, + { + "epoch": 25.55, + "grad_norm": 0.3312392234802246, + "learning_rate": 4.9541708542713575e-06, + "loss": 0.0007, + "step": 50725 + }, + { + "epoch": 25.57, + "grad_norm": 0.9442441463470459, + "learning_rate": 4.951658291457287e-06, + "loss": 0.0007, + "step": 50750 + }, + { + "epoch": 25.58, + "grad_norm": 0.8669309616088867, + "learning_rate": 4.949145728643216e-06, + "loss": 0.0009, + "step": 50775 + }, + { + "epoch": 25.59, + "grad_norm": 1.1537563800811768, + "learning_rate": 4.946633165829146e-06, + "loss": 0.0007, + "step": 50800 + }, + { + "epoch": 25.6, + "grad_norm": 0.9006689190864563, + "learning_rate": 4.944120603015076e-06, + "loss": 0.0006, + "step": 50825 + }, + { + "epoch": 25.62, + "grad_norm": 0.2254357486963272, + "learning_rate": 4.941608040201005e-06, + "loss": 0.0008, + "step": 50850 + }, + { + "epoch": 25.63, + "grad_norm": 0.6781788468360901, + "learning_rate": 4.939095477386935e-06, + "loss": 0.0009, + "step": 50875 + }, + { + "epoch": 25.64, + "grad_norm": 0.49428144097328186, + "learning_rate": 4.936582914572865e-06, + "loss": 0.0007, + "step": 50900 + }, + { + "epoch": 25.65, + "grad_norm": 0.15863998234272003, + "learning_rate": 4.934070351758795e-06, + "loss": 0.0007, + "step": 50925 + }, + { + "epoch": 25.67, + "grad_norm": 1.1742552518844604, + "learning_rate": 4.931557788944724e-06, + "loss": 0.001, + "step": 50950 + }, + { + "epoch": 25.68, + "grad_norm": 0.7124210596084595, + "learning_rate": 4.929045226130654e-06, + "loss": 0.0007, + "step": 50975 + }, + { + "epoch": 25.69, + "grad_norm": 0.11829496920108795, + "learning_rate": 4.926532663316583e-06, + "loss": 0.0007, + "step": 51000 + }, + { + "epoch": 25.69, + "eval_loss": 0.35887035727500916, + "eval_runtime": 648.5707, + "eval_samples_per_second": 2.172, + "eval_steps_per_second": 2.172, + "eval_wer": 22.656520235212728, + "step": 51000 + }, + { + "epoch": 25.71, + "grad_norm": 1.2925130128860474, + "learning_rate": 4.924020100502513e-06, + "loss": 0.0009, + "step": 51025 + }, + { + "epoch": 25.72, + "grad_norm": 0.9622329473495483, + "learning_rate": 4.921507537688442e-06, + "loss": 0.001, + "step": 51050 + }, + { + "epoch": 25.73, + "grad_norm": 0.15050731599330902, + "learning_rate": 4.918994974874372e-06, + "loss": 0.0009, + "step": 51075 + }, + { + "epoch": 25.74, + "grad_norm": 0.6646810173988342, + "learning_rate": 4.9164824120603015e-06, + "loss": 0.0008, + "step": 51100 + }, + { + "epoch": 25.76, + "grad_norm": 1.8066281080245972, + "learning_rate": 4.913969849246232e-06, + "loss": 0.0008, + "step": 51125 + }, + { + "epoch": 25.77, + "grad_norm": 1.1246289014816284, + "learning_rate": 4.911457286432161e-06, + "loss": 0.0007, + "step": 51150 + }, + { + "epoch": 25.78, + "grad_norm": 0.20243453979492188, + "learning_rate": 4.908944723618091e-06, + "loss": 0.0009, + "step": 51175 + }, + { + "epoch": 25.79, + "grad_norm": 1.6287262439727783, + "learning_rate": 4.9064321608040205e-06, + "loss": 0.0006, + "step": 51200 + }, + { + "epoch": 25.81, + "grad_norm": 0.7835111618041992, + "learning_rate": 4.90391959798995e-06, + "loss": 0.0005, + "step": 51225 + }, + { + "epoch": 25.82, + "grad_norm": 0.7937703132629395, + "learning_rate": 4.90140703517588e-06, + "loss": 0.0008, + "step": 51250 + }, + { + "epoch": 25.83, + "grad_norm": 0.4282771944999695, + "learning_rate": 4.898894472361809e-06, + "loss": 0.0009, + "step": 51275 + }, + { + "epoch": 25.84, + "grad_norm": 0.9795933365821838, + "learning_rate": 4.896381909547739e-06, + "loss": 0.0006, + "step": 51300 + }, + { + "epoch": 25.86, + "grad_norm": 0.2915053963661194, + "learning_rate": 4.893869346733669e-06, + "loss": 0.0007, + "step": 51325 + }, + { + "epoch": 25.87, + "grad_norm": 0.899832010269165, + "learning_rate": 4.891356783919598e-06, + "loss": 0.0007, + "step": 51350 + }, + { + "epoch": 25.88, + "grad_norm": 0.8909317851066589, + "learning_rate": 4.888844221105528e-06, + "loss": 0.0008, + "step": 51375 + }, + { + "epoch": 25.89, + "grad_norm": 0.46953698992729187, + "learning_rate": 4.886331658291458e-06, + "loss": 0.0009, + "step": 51400 + }, + { + "epoch": 25.91, + "grad_norm": 0.1676747351884842, + "learning_rate": 4.883819095477387e-06, + "loss": 0.0007, + "step": 51425 + }, + { + "epoch": 25.92, + "grad_norm": 1.4294859170913696, + "learning_rate": 4.881306532663317e-06, + "loss": 0.0007, + "step": 51450 + }, + { + "epoch": 25.93, + "grad_norm": 1.2342486381530762, + "learning_rate": 4.8787939698492465e-06, + "loss": 0.0006, + "step": 51475 + }, + { + "epoch": 25.94, + "grad_norm": 2.795978307723999, + "learning_rate": 4.876281407035176e-06, + "loss": 0.0009, + "step": 51500 + }, + { + "epoch": 25.96, + "grad_norm": 0.16276022791862488, + "learning_rate": 4.873768844221106e-06, + "loss": 0.0005, + "step": 51525 + }, + { + "epoch": 25.97, + "grad_norm": 0.4440419673919678, + "learning_rate": 4.871256281407036e-06, + "loss": 0.0007, + "step": 51550 + }, + { + "epoch": 25.98, + "grad_norm": 0.5978400707244873, + "learning_rate": 4.8687437185929655e-06, + "loss": 0.0007, + "step": 51575 + }, + { + "epoch": 25.99, + "grad_norm": 0.41632863879203796, + "learning_rate": 4.866231155778895e-06, + "loss": 0.0008, + "step": 51600 + }, + { + "epoch": 26.01, + "grad_norm": 0.6219983100891113, + "learning_rate": 4.863718592964824e-06, + "loss": 0.0005, + "step": 51625 + }, + { + "epoch": 26.02, + "grad_norm": 1.240051507949829, + "learning_rate": 4.861206030150754e-06, + "loss": 0.0009, + "step": 51650 + }, + { + "epoch": 26.03, + "grad_norm": 0.7890909314155579, + "learning_rate": 4.858793969849247e-06, + "loss": 0.0006, + "step": 51675 + }, + { + "epoch": 26.05, + "grad_norm": 0.1678403913974762, + "learning_rate": 4.856281407035176e-06, + "loss": 0.0006, + "step": 51700 + }, + { + "epoch": 26.06, + "grad_norm": 0.5384219884872437, + "learning_rate": 4.853768844221106e-06, + "loss": 0.0006, + "step": 51725 + }, + { + "epoch": 26.07, + "grad_norm": 0.4800323247909546, + "learning_rate": 4.8512562814070355e-06, + "loss": 0.0008, + "step": 51750 + }, + { + "epoch": 26.08, + "grad_norm": 0.088472880423069, + "learning_rate": 4.848743718592966e-06, + "loss": 0.0007, + "step": 51775 + }, + { + "epoch": 26.1, + "grad_norm": 0.28733333945274353, + "learning_rate": 4.846231155778895e-06, + "loss": 0.0006, + "step": 51800 + }, + { + "epoch": 26.11, + "grad_norm": 3.326415777206421, + "learning_rate": 4.843718592964824e-06, + "loss": 0.0004, + "step": 51825 + }, + { + "epoch": 26.12, + "grad_norm": 0.8401397466659546, + "learning_rate": 4.8412060301507545e-06, + "loss": 0.0004, + "step": 51850 + }, + { + "epoch": 26.13, + "grad_norm": 1.742140531539917, + "learning_rate": 4.838693467336684e-06, + "loss": 0.0005, + "step": 51875 + }, + { + "epoch": 26.15, + "grad_norm": 1.9404064416885376, + "learning_rate": 4.836180904522613e-06, + "loss": 0.0005, + "step": 51900 + }, + { + "epoch": 26.16, + "grad_norm": 0.9949504733085632, + "learning_rate": 4.833668341708543e-06, + "loss": 0.0005, + "step": 51925 + }, + { + "epoch": 26.17, + "grad_norm": 0.13971653580665588, + "learning_rate": 4.831155778894473e-06, + "loss": 0.0006, + "step": 51950 + }, + { + "epoch": 26.18, + "grad_norm": 0.31128737330436707, + "learning_rate": 4.828643216080403e-06, + "loss": 0.0005, + "step": 51975 + }, + { + "epoch": 26.2, + "grad_norm": 0.8576771020889282, + "learning_rate": 4.826130653266332e-06, + "loss": 0.0006, + "step": 52000 + }, + { + "epoch": 26.2, + "eval_loss": 0.354295015335083, + "eval_runtime": 645.4254, + "eval_samples_per_second": 2.183, + "eval_steps_per_second": 2.183, + "eval_wer": 22.960913178830854, + "step": 52000 + }, + { + "epoch": 26.21, + "grad_norm": 0.8186060786247253, + "learning_rate": 4.8236180904522614e-06, + "loss": 0.0006, + "step": 52025 + }, + { + "epoch": 26.22, + "grad_norm": 0.21395763754844666, + "learning_rate": 4.821105527638192e-06, + "loss": 0.0009, + "step": 52050 + }, + { + "epoch": 26.23, + "grad_norm": 0.28404101729393005, + "learning_rate": 4.818592964824121e-06, + "loss": 0.0005, + "step": 52075 + }, + { + "epoch": 26.25, + "grad_norm": 0.2926516830921173, + "learning_rate": 4.81608040201005e-06, + "loss": 0.0005, + "step": 52100 + }, + { + "epoch": 26.26, + "grad_norm": 0.8246799111366272, + "learning_rate": 4.81356783919598e-06, + "loss": 0.0005, + "step": 52125 + }, + { + "epoch": 26.27, + "grad_norm": 0.19814717769622803, + "learning_rate": 4.81105527638191e-06, + "loss": 0.0007, + "step": 52150 + }, + { + "epoch": 26.28, + "grad_norm": 0.2942245602607727, + "learning_rate": 4.80854271356784e-06, + "loss": 0.0006, + "step": 52175 + }, + { + "epoch": 26.3, + "grad_norm": 1.1679245233535767, + "learning_rate": 4.806030150753769e-06, + "loss": 0.0007, + "step": 52200 + }, + { + "epoch": 26.31, + "grad_norm": 0.19946590065956116, + "learning_rate": 4.8035175879396986e-06, + "loss": 0.0007, + "step": 52225 + }, + { + "epoch": 26.32, + "grad_norm": 0.5419365763664246, + "learning_rate": 4.801005025125629e-06, + "loss": 0.0004, + "step": 52250 + }, + { + "epoch": 26.34, + "grad_norm": 1.0101172924041748, + "learning_rate": 4.798492462311558e-06, + "loss": 0.0008, + "step": 52275 + }, + { + "epoch": 26.35, + "grad_norm": 0.7043997049331665, + "learning_rate": 4.795979899497487e-06, + "loss": 0.0005, + "step": 52300 + }, + { + "epoch": 26.36, + "grad_norm": 1.9109712839126587, + "learning_rate": 4.7934673366834175e-06, + "loss": 0.0005, + "step": 52325 + }, + { + "epoch": 26.37, + "grad_norm": 1.100644826889038, + "learning_rate": 4.790954773869348e-06, + "loss": 0.0009, + "step": 52350 + }, + { + "epoch": 26.39, + "grad_norm": 0.7146306037902832, + "learning_rate": 4.788442211055277e-06, + "loss": 0.0007, + "step": 52375 + }, + { + "epoch": 26.4, + "grad_norm": 0.21567130088806152, + "learning_rate": 4.785929648241206e-06, + "loss": 0.0007, + "step": 52400 + }, + { + "epoch": 26.41, + "grad_norm": 1.087694764137268, + "learning_rate": 4.7834170854271365e-06, + "loss": 0.001, + "step": 52425 + }, + { + "epoch": 26.42, + "grad_norm": 0.6008167862892151, + "learning_rate": 4.780904522613066e-06, + "loss": 0.001, + "step": 52450 + }, + { + "epoch": 26.44, + "grad_norm": 0.45899203419685364, + "learning_rate": 4.778391959798995e-06, + "loss": 0.0006, + "step": 52475 + }, + { + "epoch": 26.45, + "grad_norm": 0.5328181982040405, + "learning_rate": 4.7758793969849245e-06, + "loss": 0.0009, + "step": 52500 + }, + { + "epoch": 26.46, + "grad_norm": 0.1914442628622055, + "learning_rate": 4.773366834170855e-06, + "loss": 0.001, + "step": 52525 + }, + { + "epoch": 26.47, + "grad_norm": 0.6430690884590149, + "learning_rate": 4.770854271356785e-06, + "loss": 0.0009, + "step": 52550 + }, + { + "epoch": 26.49, + "grad_norm": 0.5766837000846863, + "learning_rate": 4.768341708542714e-06, + "loss": 0.0009, + "step": 52575 + }, + { + "epoch": 26.5, + "grad_norm": 0.6997876763343811, + "learning_rate": 4.7658291457286435e-06, + "loss": 0.0007, + "step": 52600 + }, + { + "epoch": 26.51, + "grad_norm": 1.365090250968933, + "learning_rate": 4.763316582914574e-06, + "loss": 0.001, + "step": 52625 + }, + { + "epoch": 26.52, + "grad_norm": 1.0649492740631104, + "learning_rate": 4.760804020100503e-06, + "loss": 0.0009, + "step": 52650 + }, + { + "epoch": 26.54, + "grad_norm": 1.1800321340560913, + "learning_rate": 4.758291457286432e-06, + "loss": 0.001, + "step": 52675 + }, + { + "epoch": 26.55, + "grad_norm": 0.5055447220802307, + "learning_rate": 4.7557788944723625e-06, + "loss": 0.0008, + "step": 52700 + }, + { + "epoch": 26.56, + "grad_norm": 0.6218178272247314, + "learning_rate": 4.753266331658292e-06, + "loss": 0.001, + "step": 52725 + }, + { + "epoch": 26.57, + "grad_norm": 0.49634939432144165, + "learning_rate": 4.750753768844221e-06, + "loss": 0.0007, + "step": 52750 + }, + { + "epoch": 26.59, + "grad_norm": 0.4345056116580963, + "learning_rate": 4.748241206030151e-06, + "loss": 0.0006, + "step": 52775 + }, + { + "epoch": 26.6, + "grad_norm": 0.8264731168746948, + "learning_rate": 4.745728643216081e-06, + "loss": 0.0008, + "step": 52800 + }, + { + "epoch": 26.61, + "grad_norm": 0.7606098651885986, + "learning_rate": 4.743216080402011e-06, + "loss": 0.0008, + "step": 52825 + }, + { + "epoch": 26.62, + "grad_norm": 0.13701969385147095, + "learning_rate": 4.74070351758794e-06, + "loss": 0.0008, + "step": 52850 + }, + { + "epoch": 26.64, + "grad_norm": 1.5023452043533325, + "learning_rate": 4.7381909547738694e-06, + "loss": 0.0008, + "step": 52875 + }, + { + "epoch": 26.65, + "grad_norm": 0.7520101070404053, + "learning_rate": 4.7356783919598e-06, + "loss": 0.0009, + "step": 52900 + }, + { + "epoch": 26.66, + "grad_norm": 0.5014305114746094, + "learning_rate": 4.733165829145729e-06, + "loss": 0.0007, + "step": 52925 + }, + { + "epoch": 26.68, + "grad_norm": 0.6294064521789551, + "learning_rate": 4.730653266331658e-06, + "loss": 0.0009, + "step": 52950 + }, + { + "epoch": 26.69, + "grad_norm": 0.1294821947813034, + "learning_rate": 4.728140703517588e-06, + "loss": 0.0007, + "step": 52975 + }, + { + "epoch": 26.7, + "grad_norm": 0.0864739790558815, + "learning_rate": 4.725628140703518e-06, + "loss": 0.0009, + "step": 53000 + }, + { + "epoch": 26.7, + "eval_loss": 0.3559441566467285, + "eval_runtime": 647.2062, + "eval_samples_per_second": 2.177, + "eval_steps_per_second": 2.177, + "eval_wer": 22.760290556900724, + "step": 53000 + }, + { + "epoch": 26.71, + "grad_norm": 0.9976471662521362, + "learning_rate": 4.723115577889448e-06, + "loss": 0.0007, + "step": 53025 + }, + { + "epoch": 26.73, + "grad_norm": 0.1680731177330017, + "learning_rate": 4.720603015075377e-06, + "loss": 0.0008, + "step": 53050 + }, + { + "epoch": 26.74, + "grad_norm": 0.7187339663505554, + "learning_rate": 4.7180904522613066e-06, + "loss": 0.0007, + "step": 53075 + }, + { + "epoch": 26.75, + "grad_norm": 2.314380645751953, + "learning_rate": 4.715577889447237e-06, + "loss": 0.001, + "step": 53100 + }, + { + "epoch": 26.76, + "grad_norm": 0.23373447358608246, + "learning_rate": 4.713065326633166e-06, + "loss": 0.0007, + "step": 53125 + }, + { + "epoch": 26.78, + "grad_norm": 0.07850030809640884, + "learning_rate": 4.710552763819095e-06, + "loss": 0.0008, + "step": 53150 + }, + { + "epoch": 26.79, + "grad_norm": 0.7005709409713745, + "learning_rate": 4.7080402010050256e-06, + "loss": 0.001, + "step": 53175 + }, + { + "epoch": 26.8, + "grad_norm": 1.0930556058883667, + "learning_rate": 4.705527638190956e-06, + "loss": 0.0006, + "step": 53200 + }, + { + "epoch": 26.81, + "grad_norm": 0.5338262319564819, + "learning_rate": 4.703015075376885e-06, + "loss": 0.0005, + "step": 53225 + }, + { + "epoch": 26.83, + "grad_norm": 0.08169445395469666, + "learning_rate": 4.700502512562814e-06, + "loss": 0.0005, + "step": 53250 + }, + { + "epoch": 26.84, + "grad_norm": 2.5466127395629883, + "learning_rate": 4.6979899497487445e-06, + "loss": 0.0007, + "step": 53275 + }, + { + "epoch": 26.85, + "grad_norm": 1.4362409114837646, + "learning_rate": 4.695477386934674e-06, + "loss": 0.0005, + "step": 53300 + }, + { + "epoch": 26.86, + "grad_norm": 1.0999122858047485, + "learning_rate": 4.692964824120603e-06, + "loss": 0.0008, + "step": 53325 + }, + { + "epoch": 26.88, + "grad_norm": 0.7979241609573364, + "learning_rate": 4.6904522613065325e-06, + "loss": 0.0009, + "step": 53350 + }, + { + "epoch": 26.89, + "grad_norm": 0.5806974172592163, + "learning_rate": 4.687939698492463e-06, + "loss": 0.0008, + "step": 53375 + }, + { + "epoch": 26.9, + "grad_norm": 1.0148537158966064, + "learning_rate": 4.685427135678393e-06, + "loss": 0.0008, + "step": 53400 + }, + { + "epoch": 26.91, + "grad_norm": 1.2113052606582642, + "learning_rate": 4.682914572864322e-06, + "loss": 0.0013, + "step": 53425 + }, + { + "epoch": 26.93, + "grad_norm": 0.730830729007721, + "learning_rate": 4.6804020100502515e-06, + "loss": 0.0006, + "step": 53450 + }, + { + "epoch": 26.94, + "grad_norm": 0.5694770216941833, + "learning_rate": 4.677889447236182e-06, + "loss": 0.0007, + "step": 53475 + }, + { + "epoch": 26.95, + "grad_norm": 0.7788098454475403, + "learning_rate": 4.675376884422111e-06, + "loss": 0.0006, + "step": 53500 + }, + { + "epoch": 26.96, + "grad_norm": 0.47340822219848633, + "learning_rate": 4.67286432160804e-06, + "loss": 0.0008, + "step": 53525 + }, + { + "epoch": 26.98, + "grad_norm": 0.4103614389896393, + "learning_rate": 4.6703517587939705e-06, + "loss": 0.0007, + "step": 53550 + }, + { + "epoch": 26.99, + "grad_norm": 1.1811082363128662, + "learning_rate": 4.6678391959799e-06, + "loss": 0.0006, + "step": 53575 + }, + { + "epoch": 27.0, + "grad_norm": 0.34475991129875183, + "learning_rate": 4.66532663316583e-06, + "loss": 0.0004, + "step": 53600 + }, + { + "epoch": 27.02, + "grad_norm": 0.060179028660058975, + "learning_rate": 4.662814070351759e-06, + "loss": 0.0004, + "step": 53625 + }, + { + "epoch": 27.03, + "grad_norm": 0.4929451048374176, + "learning_rate": 4.660301507537689e-06, + "loss": 0.0004, + "step": 53650 + }, + { + "epoch": 27.04, + "grad_norm": 0.3733210265636444, + "learning_rate": 4.657788944723619e-06, + "loss": 0.0005, + "step": 53675 + }, + { + "epoch": 27.05, + "grad_norm": 1.3154228925704956, + "learning_rate": 4.655276381909548e-06, + "loss": 0.0005, + "step": 53700 + }, + { + "epoch": 27.07, + "grad_norm": 0.241769477725029, + "learning_rate": 4.6527638190954774e-06, + "loss": 0.0004, + "step": 53725 + }, + { + "epoch": 27.08, + "grad_norm": 0.19261914491653442, + "learning_rate": 4.650251256281408e-06, + "loss": 0.0003, + "step": 53750 + }, + { + "epoch": 27.09, + "grad_norm": 0.19151557981967926, + "learning_rate": 4.647738693467337e-06, + "loss": 0.0002, + "step": 53775 + }, + { + "epoch": 27.1, + "grad_norm": 0.2657971680164337, + "learning_rate": 4.645226130653266e-06, + "loss": 0.0004, + "step": 53800 + }, + { + "epoch": 27.12, + "grad_norm": 0.09917689114809036, + "learning_rate": 4.6427135678391964e-06, + "loss": 0.0003, + "step": 53825 + }, + { + "epoch": 27.13, + "grad_norm": 0.08713795244693756, + "learning_rate": 4.640201005025126e-06, + "loss": 0.0002, + "step": 53850 + }, + { + "epoch": 27.14, + "grad_norm": 0.15994809567928314, + "learning_rate": 4.637688442211056e-06, + "loss": 0.0004, + "step": 53875 + }, + { + "epoch": 27.15, + "grad_norm": 0.1758558303117752, + "learning_rate": 4.635175879396985e-06, + "loss": 0.0004, + "step": 53900 + }, + { + "epoch": 27.17, + "grad_norm": 0.04318870231509209, + "learning_rate": 4.6326633165829146e-06, + "loss": 0.0004, + "step": 53925 + }, + { + "epoch": 27.18, + "grad_norm": 0.07124695926904678, + "learning_rate": 4.630150753768845e-06, + "loss": 0.0003, + "step": 53950 + }, + { + "epoch": 27.19, + "grad_norm": 0.6460732221603394, + "learning_rate": 4.627638190954774e-06, + "loss": 0.0005, + "step": 53975 + }, + { + "epoch": 27.2, + "grad_norm": 0.9753907322883606, + "learning_rate": 4.625125628140703e-06, + "loss": 0.0004, + "step": 54000 + }, + { + "epoch": 27.2, + "eval_loss": 0.3613799214363098, + "eval_runtime": 644.7858, + "eval_samples_per_second": 2.185, + "eval_steps_per_second": 2.185, + "eval_wer": 22.407471463161535, + "step": 54000 + }, + { + "epoch": 27.22, + "grad_norm": 0.14066103100776672, + "learning_rate": 4.6226130653266336e-06, + "loss": 0.0005, + "step": 54025 + }, + { + "epoch": 27.23, + "grad_norm": 0.20447216928005219, + "learning_rate": 4.620100502512564e-06, + "loss": 0.0003, + "step": 54050 + }, + { + "epoch": 27.24, + "grad_norm": 0.13957104086875916, + "learning_rate": 4.617587939698493e-06, + "loss": 0.0002, + "step": 54075 + }, + { + "epoch": 27.25, + "grad_norm": 0.8711459040641785, + "learning_rate": 4.615075376884422e-06, + "loss": 0.0003, + "step": 54100 + }, + { + "epoch": 27.27, + "grad_norm": 0.19493000209331512, + "learning_rate": 4.612562814070352e-06, + "loss": 0.0003, + "step": 54125 + }, + { + "epoch": 27.28, + "grad_norm": 0.16989558935165405, + "learning_rate": 4.610050251256282e-06, + "loss": 0.0004, + "step": 54150 + }, + { + "epoch": 27.29, + "grad_norm": 0.29881447553634644, + "learning_rate": 4.607537688442211e-06, + "loss": 0.0006, + "step": 54175 + }, + { + "epoch": 27.3, + "grad_norm": 1.1802809238433838, + "learning_rate": 4.6050251256281405e-06, + "loss": 0.0006, + "step": 54200 + }, + { + "epoch": 27.32, + "grad_norm": 0.17507942020893097, + "learning_rate": 4.602512562814071e-06, + "loss": 0.0005, + "step": 54225 + }, + { + "epoch": 27.33, + "grad_norm": 0.2276214212179184, + "learning_rate": 4.600000000000001e-06, + "loss": 0.0004, + "step": 54250 + }, + { + "epoch": 27.34, + "grad_norm": 0.4339704215526581, + "learning_rate": 4.59748743718593e-06, + "loss": 0.0006, + "step": 54275 + }, + { + "epoch": 27.36, + "grad_norm": 1.0907328128814697, + "learning_rate": 4.5949748743718595e-06, + "loss": 0.0006, + "step": 54300 + }, + { + "epoch": 27.37, + "grad_norm": 0.9420053362846375, + "learning_rate": 4.59246231155779e-06, + "loss": 0.0004, + "step": 54325 + }, + { + "epoch": 27.38, + "grad_norm": 0.5085152387619019, + "learning_rate": 4.589949748743719e-06, + "loss": 0.0006, + "step": 54350 + }, + { + "epoch": 27.39, + "grad_norm": 0.7022001147270203, + "learning_rate": 4.587437185929648e-06, + "loss": 0.0006, + "step": 54375 + }, + { + "epoch": 27.41, + "grad_norm": 0.4072805941104889, + "learning_rate": 4.5849246231155785e-06, + "loss": 0.0004, + "step": 54400 + }, + { + "epoch": 27.42, + "grad_norm": 0.6905921101570129, + "learning_rate": 4.582412060301508e-06, + "loss": 0.0007, + "step": 54425 + }, + { + "epoch": 27.43, + "grad_norm": 0.5085986852645874, + "learning_rate": 4.579899497487438e-06, + "loss": 0.0006, + "step": 54450 + }, + { + "epoch": 27.44, + "grad_norm": 0.7813529968261719, + "learning_rate": 4.577386934673367e-06, + "loss": 0.0006, + "step": 54475 + }, + { + "epoch": 27.46, + "grad_norm": 0.7074769139289856, + "learning_rate": 4.574874371859297e-06, + "loss": 0.0009, + "step": 54500 + }, + { + "epoch": 27.47, + "grad_norm": 1.1880906820297241, + "learning_rate": 4.572361809045227e-06, + "loss": 0.0008, + "step": 54525 + }, + { + "epoch": 27.48, + "grad_norm": 1.0316381454467773, + "learning_rate": 4.569849246231156e-06, + "loss": 0.0009, + "step": 54550 + }, + { + "epoch": 27.49, + "grad_norm": 1.5573595762252808, + "learning_rate": 4.5673366834170854e-06, + "loss": 0.0005, + "step": 54575 + }, + { + "epoch": 27.51, + "grad_norm": 3.481981039047241, + "learning_rate": 4.564824120603016e-06, + "loss": 0.001, + "step": 54600 + }, + { + "epoch": 27.52, + "grad_norm": 0.9076539874076843, + "learning_rate": 4.562311557788945e-06, + "loss": 0.0007, + "step": 54625 + }, + { + "epoch": 27.53, + "grad_norm": 2.5089993476867676, + "learning_rate": 4.559798994974875e-06, + "loss": 0.0007, + "step": 54650 + }, + { + "epoch": 27.54, + "grad_norm": 0.755713701248169, + "learning_rate": 4.5572864321608044e-06, + "loss": 0.001, + "step": 54675 + }, + { + "epoch": 27.56, + "grad_norm": 0.1939065009355545, + "learning_rate": 4.554773869346734e-06, + "loss": 0.0007, + "step": 54700 + }, + { + "epoch": 27.57, + "grad_norm": 0.29305967688560486, + "learning_rate": 4.552261306532664e-06, + "loss": 0.0008, + "step": 54725 + }, + { + "epoch": 27.58, + "grad_norm": 1.5190849304199219, + "learning_rate": 4.549748743718593e-06, + "loss": 0.0009, + "step": 54750 + }, + { + "epoch": 27.59, + "grad_norm": 0.15516288578510284, + "learning_rate": 4.5472361809045226e-06, + "loss": 0.0007, + "step": 54775 + }, + { + "epoch": 27.61, + "grad_norm": 0.9610015749931335, + "learning_rate": 4.544723618090453e-06, + "loss": 0.0007, + "step": 54800 + }, + { + "epoch": 27.62, + "grad_norm": 0.04598340019583702, + "learning_rate": 4.542211055276382e-06, + "loss": 0.0005, + "step": 54825 + }, + { + "epoch": 27.63, + "grad_norm": 0.8410586714744568, + "learning_rate": 4.539698492462312e-06, + "loss": 0.0006, + "step": 54850 + }, + { + "epoch": 27.64, + "grad_norm": 0.09928705543279648, + "learning_rate": 4.5371859296482416e-06, + "loss": 0.0008, + "step": 54875 + }, + { + "epoch": 27.66, + "grad_norm": 1.5359119176864624, + "learning_rate": 4.534673366834172e-06, + "loss": 0.0007, + "step": 54900 + }, + { + "epoch": 27.67, + "grad_norm": 0.13479715585708618, + "learning_rate": 4.532160804020101e-06, + "loss": 0.0006, + "step": 54925 + }, + { + "epoch": 27.68, + "grad_norm": 0.5345453023910522, + "learning_rate": 4.52964824120603e-06, + "loss": 0.0007, + "step": 54950 + }, + { + "epoch": 27.7, + "grad_norm": 0.29486083984375, + "learning_rate": 4.52713567839196e-06, + "loss": 0.0012, + "step": 54975 + }, + { + "epoch": 27.71, + "grad_norm": 1.310102939605713, + "learning_rate": 4.52462311557789e-06, + "loss": 0.0008, + "step": 55000 + }, + { + "epoch": 27.71, + "eval_loss": 0.3659009635448456, + "eval_runtime": 646.1523, + "eval_samples_per_second": 2.181, + "eval_steps_per_second": 2.181, + "eval_wer": 22.98858526461432, + "step": 55000 + }, + { + "epoch": 27.72, + "grad_norm": 0.18702644109725952, + "learning_rate": 4.522110552763819e-06, + "loss": 0.0009, + "step": 55025 + }, + { + "epoch": 27.73, + "grad_norm": 0.17531992495059967, + "learning_rate": 4.5195979899497485e-06, + "loss": 0.0006, + "step": 55050 + }, + { + "epoch": 27.75, + "grad_norm": 0.844012439250946, + "learning_rate": 4.517085427135679e-06, + "loss": 0.0005, + "step": 55075 + }, + { + "epoch": 27.76, + "grad_norm": 0.20820151269435883, + "learning_rate": 4.514572864321609e-06, + "loss": 0.0008, + "step": 55100 + }, + { + "epoch": 27.77, + "grad_norm": 2.1439287662506104, + "learning_rate": 4.512060301507538e-06, + "loss": 0.0009, + "step": 55125 + }, + { + "epoch": 27.78, + "grad_norm": 0.31553953886032104, + "learning_rate": 4.5095477386934675e-06, + "loss": 0.0007, + "step": 55150 + }, + { + "epoch": 27.8, + "grad_norm": 0.08655881881713867, + "learning_rate": 4.507035175879398e-06, + "loss": 0.0005, + "step": 55175 + }, + { + "epoch": 27.81, + "grad_norm": 0.0824466124176979, + "learning_rate": 4.504522613065327e-06, + "loss": 0.0005, + "step": 55200 + }, + { + "epoch": 27.82, + "grad_norm": 0.7539893984794617, + "learning_rate": 4.502010050251256e-06, + "loss": 0.0006, + "step": 55225 + }, + { + "epoch": 27.83, + "grad_norm": 0.1819518655538559, + "learning_rate": 4.499497487437186e-06, + "loss": 0.0011, + "step": 55250 + }, + { + "epoch": 27.85, + "grad_norm": 1.464959740638733, + "learning_rate": 4.496984924623116e-06, + "loss": 0.0009, + "step": 55275 + }, + { + "epoch": 27.86, + "grad_norm": 0.3855617344379425, + "learning_rate": 4.494472361809046e-06, + "loss": 0.0008, + "step": 55300 + }, + { + "epoch": 27.87, + "grad_norm": 1.3555275201797485, + "learning_rate": 4.491959798994975e-06, + "loss": 0.0012, + "step": 55325 + }, + { + "epoch": 27.88, + "grad_norm": 0.7826224565505981, + "learning_rate": 4.489447236180905e-06, + "loss": 0.001, + "step": 55350 + }, + { + "epoch": 27.9, + "grad_norm": 0.3365747630596161, + "learning_rate": 4.486934673366835e-06, + "loss": 0.0005, + "step": 55375 + }, + { + "epoch": 27.91, + "grad_norm": 0.28341349959373474, + "learning_rate": 4.484522613065327e-06, + "loss": 0.0008, + "step": 55400 + }, + { + "epoch": 27.92, + "grad_norm": 0.6870297789573669, + "learning_rate": 4.4820100502512565e-06, + "loss": 0.0006, + "step": 55425 + }, + { + "epoch": 27.93, + "grad_norm": 0.2579886019229889, + "learning_rate": 4.479497487437186e-06, + "loss": 0.0008, + "step": 55450 + }, + { + "epoch": 27.95, + "grad_norm": 1.2634528875350952, + "learning_rate": 4.476984924623116e-06, + "loss": 0.0008, + "step": 55475 + }, + { + "epoch": 27.96, + "grad_norm": 0.11361195892095566, + "learning_rate": 4.474472361809046e-06, + "loss": 0.0006, + "step": 55500 + }, + { + "epoch": 27.97, + "grad_norm": 0.15430136024951935, + "learning_rate": 4.4719597989949755e-06, + "loss": 0.0007, + "step": 55525 + }, + { + "epoch": 27.98, + "grad_norm": 0.3706219494342804, + "learning_rate": 4.469447236180905e-06, + "loss": 0.0007, + "step": 55550 + }, + { + "epoch": 28.0, + "grad_norm": 0.853947103023529, + "learning_rate": 4.466934673366834e-06, + "loss": 0.0006, + "step": 55575 + }, + { + "epoch": 28.01, + "grad_norm": 0.9473939538002014, + "learning_rate": 4.464422110552764e-06, + "loss": 0.0004, + "step": 55600 + }, + { + "epoch": 28.02, + "grad_norm": 1.0833967924118042, + "learning_rate": 4.461909547738694e-06, + "loss": 0.0006, + "step": 55625 + }, + { + "epoch": 28.04, + "grad_norm": 0.2835502028465271, + "learning_rate": 4.459396984924623e-06, + "loss": 0.0005, + "step": 55650 + }, + { + "epoch": 28.05, + "grad_norm": 0.5609690546989441, + "learning_rate": 4.456884422110553e-06, + "loss": 0.0005, + "step": 55675 + }, + { + "epoch": 28.06, + "grad_norm": 0.07128031551837921, + "learning_rate": 4.454371859296483e-06, + "loss": 0.0004, + "step": 55700 + }, + { + "epoch": 28.07, + "grad_norm": 0.7161921858787537, + "learning_rate": 4.451859296482413e-06, + "loss": 0.0003, + "step": 55725 + }, + { + "epoch": 28.09, + "grad_norm": 0.13013307750225067, + "learning_rate": 4.449346733668342e-06, + "loss": 0.0002, + "step": 55750 + }, + { + "epoch": 28.1, + "grad_norm": 0.13601085543632507, + "learning_rate": 4.446834170854272e-06, + "loss": 0.0004, + "step": 55775 + }, + { + "epoch": 28.11, + "grad_norm": 0.6023567318916321, + "learning_rate": 4.4443216080402014e-06, + "loss": 0.0003, + "step": 55800 + }, + { + "epoch": 28.12, + "grad_norm": 0.09424587339162827, + "learning_rate": 4.441809045226131e-06, + "loss": 0.0003, + "step": 55825 + }, + { + "epoch": 28.14, + "grad_norm": 0.04044128209352493, + "learning_rate": 4.43929648241206e-06, + "loss": 0.0003, + "step": 55850 + }, + { + "epoch": 28.15, + "grad_norm": 0.16100598871707916, + "learning_rate": 4.43678391959799e-06, + "loss": 0.0002, + "step": 55875 + }, + { + "epoch": 28.16, + "grad_norm": 0.3516765832901001, + "learning_rate": 4.4342713567839204e-06, + "loss": 0.0002, + "step": 55900 + }, + { + "epoch": 28.17, + "grad_norm": 0.2532273828983307, + "learning_rate": 4.43175879396985e-06, + "loss": 0.0002, + "step": 55925 + }, + { + "epoch": 28.19, + "grad_norm": 0.19065579771995544, + "learning_rate": 4.429246231155779e-06, + "loss": 0.0006, + "step": 55950 + }, + { + "epoch": 28.2, + "grad_norm": 0.30931228399276733, + "learning_rate": 4.426733668341709e-06, + "loss": 0.0006, + "step": 55975 + }, + { + "epoch": 28.21, + "grad_norm": 0.30858734250068665, + "learning_rate": 4.4242211055276386e-06, + "loss": 0.0003, + "step": 56000 + }, + { + "epoch": 28.21, + "eval_loss": 0.3624221980571747, + "eval_runtime": 781.7858, + "eval_samples_per_second": 1.802, + "eval_steps_per_second": 1.802, + "eval_wer": 22.172258734002074, + "step": 56000 + }, + { + "epoch": 28.22, + "grad_norm": 0.08408491313457489, + "learning_rate": 4.421708542713568e-06, + "loss": 0.0002, + "step": 56025 + }, + { + "epoch": 28.24, + "grad_norm": 0.06299348175525665, + "learning_rate": 4.419195979899498e-06, + "loss": 0.0005, + "step": 56050 + }, + { + "epoch": 28.25, + "grad_norm": 0.07827432453632355, + "learning_rate": 4.416683417085427e-06, + "loss": 0.0004, + "step": 56075 + }, + { + "epoch": 28.26, + "grad_norm": 0.8975947499275208, + "learning_rate": 4.4141708542713576e-06, + "loss": 0.0005, + "step": 56100 + }, + { + "epoch": 28.27, + "grad_norm": 1.2385715246200562, + "learning_rate": 4.411658291457287e-06, + "loss": 0.0005, + "step": 56125 + }, + { + "epoch": 28.29, + "grad_norm": 0.19437459111213684, + "learning_rate": 4.409145728643216e-06, + "loss": 0.0006, + "step": 56150 + }, + { + "epoch": 28.3, + "grad_norm": 1.1243386268615723, + "learning_rate": 4.406633165829146e-06, + "loss": 0.0006, + "step": 56175 + }, + { + "epoch": 28.31, + "grad_norm": 0.16806860268115997, + "learning_rate": 4.404120603015076e-06, + "loss": 0.0005, + "step": 56200 + }, + { + "epoch": 28.32, + "grad_norm": 0.5347501635551453, + "learning_rate": 4.401608040201005e-06, + "loss": 0.0003, + "step": 56225 + }, + { + "epoch": 28.34, + "grad_norm": 0.3039199113845825, + "learning_rate": 4.399095477386935e-06, + "loss": 0.0004, + "step": 56250 + }, + { + "epoch": 28.35, + "grad_norm": 0.7065151929855347, + "learning_rate": 4.3965829145728645e-06, + "loss": 0.0006, + "step": 56275 + }, + { + "epoch": 28.36, + "grad_norm": 0.7291182279586792, + "learning_rate": 4.394070351758795e-06, + "loss": 0.0006, + "step": 56300 + }, + { + "epoch": 28.38, + "grad_norm": 0.11983204632997513, + "learning_rate": 4.391557788944724e-06, + "loss": 0.0005, + "step": 56325 + }, + { + "epoch": 28.39, + "grad_norm": 0.32822325825691223, + "learning_rate": 4.389045226130654e-06, + "loss": 0.0011, + "step": 56350 + }, + { + "epoch": 28.4, + "grad_norm": 0.3108604848384857, + "learning_rate": 4.3865326633165835e-06, + "loss": 0.0005, + "step": 56375 + }, + { + "epoch": 28.41, + "grad_norm": 0.2673742175102234, + "learning_rate": 4.384020100502513e-06, + "loss": 0.0004, + "step": 56400 + }, + { + "epoch": 28.43, + "grad_norm": 0.8592258095741272, + "learning_rate": 4.381507537688442e-06, + "loss": 0.0004, + "step": 56425 + }, + { + "epoch": 28.44, + "grad_norm": 0.1229337826371193, + "learning_rate": 4.378994974874372e-06, + "loss": 0.0004, + "step": 56450 + }, + { + "epoch": 28.45, + "grad_norm": 0.470473051071167, + "learning_rate": 4.376482412060302e-06, + "loss": 0.0004, + "step": 56475 + }, + { + "epoch": 28.46, + "grad_norm": 0.09908214956521988, + "learning_rate": 4.373969849246231e-06, + "loss": 0.0004, + "step": 56500 + }, + { + "epoch": 28.48, + "grad_norm": 0.11872223764657974, + "learning_rate": 4.371457286432161e-06, + "loss": 0.0008, + "step": 56525 + }, + { + "epoch": 28.49, + "grad_norm": 1.120409607887268, + "learning_rate": 4.368944723618091e-06, + "loss": 0.0006, + "step": 56550 + }, + { + "epoch": 28.5, + "grad_norm": 0.6582888960838318, + "learning_rate": 4.366432160804021e-06, + "loss": 0.0008, + "step": 56575 + }, + { + "epoch": 28.51, + "grad_norm": 0.6543013453483582, + "learning_rate": 4.36391959798995e-06, + "loss": 0.0007, + "step": 56600 + }, + { + "epoch": 28.53, + "grad_norm": 0.4543153941631317, + "learning_rate": 4.36140703517588e-06, + "loss": 0.0011, + "step": 56625 + }, + { + "epoch": 28.54, + "grad_norm": 0.8596717119216919, + "learning_rate": 4.3588944723618094e-06, + "loss": 0.0007, + "step": 56650 + }, + { + "epoch": 28.55, + "grad_norm": 1.2596262693405151, + "learning_rate": 4.356381909547739e-06, + "loss": 0.0008, + "step": 56675 + }, + { + "epoch": 28.56, + "grad_norm": 0.8046451807022095, + "learning_rate": 4.353869346733668e-06, + "loss": 0.0009, + "step": 56700 + }, + { + "epoch": 28.58, + "grad_norm": 0.7610066533088684, + "learning_rate": 4.351356783919598e-06, + "loss": 0.0014, + "step": 56725 + }, + { + "epoch": 28.59, + "grad_norm": 0.3740193247795105, + "learning_rate": 4.3488442211055284e-06, + "loss": 0.0008, + "step": 56750 + }, + { + "epoch": 28.6, + "grad_norm": 0.3946913480758667, + "learning_rate": 4.346331658291458e-06, + "loss": 0.0013, + "step": 56775 + }, + { + "epoch": 28.61, + "grad_norm": 1.1675328016281128, + "learning_rate": 4.343819095477387e-06, + "loss": 0.0011, + "step": 56800 + }, + { + "epoch": 28.63, + "grad_norm": 0.23593860864639282, + "learning_rate": 4.341306532663317e-06, + "loss": 0.001, + "step": 56825 + }, + { + "epoch": 28.64, + "grad_norm": 0.12008998543024063, + "learning_rate": 4.3387939698492466e-06, + "loss": 0.0008, + "step": 56850 + }, + { + "epoch": 28.65, + "grad_norm": 0.1624538004398346, + "learning_rate": 4.336281407035176e-06, + "loss": 0.0007, + "step": 56875 + }, + { + "epoch": 28.66, + "grad_norm": 0.2177920937538147, + "learning_rate": 4.333768844221106e-06, + "loss": 0.0007, + "step": 56900 + }, + { + "epoch": 28.68, + "grad_norm": 0.23931287229061127, + "learning_rate": 4.331256281407035e-06, + "loss": 0.0005, + "step": 56925 + }, + { + "epoch": 28.69, + "grad_norm": 0.3201751410961151, + "learning_rate": 4.3287437185929656e-06, + "loss": 0.0009, + "step": 56950 + }, + { + "epoch": 28.7, + "grad_norm": 0.13656805455684662, + "learning_rate": 4.326231155778895e-06, + "loss": 0.0008, + "step": 56975 + }, + { + "epoch": 28.72, + "grad_norm": 0.4347066581249237, + "learning_rate": 4.323718592964824e-06, + "loss": 0.0006, + "step": 57000 + }, + { + "epoch": 28.72, + "eval_loss": 0.3676753342151642, + "eval_runtime": 647.1207, + "eval_samples_per_second": 2.177, + "eval_steps_per_second": 2.177, + "eval_wer": 22.73953649256313, + "step": 57000 + }, + { + "epoch": 28.73, + "grad_norm": 0.6821175217628479, + "learning_rate": 4.321206030150754e-06, + "loss": 0.0008, + "step": 57025 + }, + { + "epoch": 28.74, + "grad_norm": 0.8675858974456787, + "learning_rate": 4.318693467336684e-06, + "loss": 0.0007, + "step": 57050 + }, + { + "epoch": 28.75, + "grad_norm": 0.7905115485191345, + "learning_rate": 4.316180904522613e-06, + "loss": 0.0009, + "step": 57075 + }, + { + "epoch": 28.77, + "grad_norm": 0.3380034565925598, + "learning_rate": 4.313668341708543e-06, + "loss": 0.0008, + "step": 57100 + }, + { + "epoch": 28.78, + "grad_norm": 0.5678602457046509, + "learning_rate": 4.3111557788944725e-06, + "loss": 0.0007, + "step": 57125 + }, + { + "epoch": 28.79, + "grad_norm": 0.4038754105567932, + "learning_rate": 4.308643216080403e-06, + "loss": 0.0006, + "step": 57150 + }, + { + "epoch": 28.8, + "grad_norm": 0.8682851195335388, + "learning_rate": 4.306130653266332e-06, + "loss": 0.0005, + "step": 57175 + }, + { + "epoch": 28.82, + "grad_norm": 0.2736469507217407, + "learning_rate": 4.303618090452262e-06, + "loss": 0.0008, + "step": 57200 + }, + { + "epoch": 28.83, + "grad_norm": 0.056282587349414825, + "learning_rate": 4.3011055276381915e-06, + "loss": 0.0005, + "step": 57225 + }, + { + "epoch": 28.84, + "grad_norm": 0.7956998944282532, + "learning_rate": 4.298592964824121e-06, + "loss": 0.0006, + "step": 57250 + }, + { + "epoch": 28.85, + "grad_norm": 0.39681777358055115, + "learning_rate": 4.29608040201005e-06, + "loss": 0.0005, + "step": 57275 + }, + { + "epoch": 28.87, + "grad_norm": 1.7592027187347412, + "learning_rate": 4.29356783919598e-06, + "loss": 0.0005, + "step": 57300 + }, + { + "epoch": 28.88, + "grad_norm": 0.40772297978401184, + "learning_rate": 4.29105527638191e-06, + "loss": 0.0003, + "step": 57325 + }, + { + "epoch": 28.89, + "grad_norm": 0.3130989074707031, + "learning_rate": 4.28854271356784e-06, + "loss": 0.0003, + "step": 57350 + }, + { + "epoch": 28.9, + "grad_norm": 0.22975189983844757, + "learning_rate": 4.286030150753769e-06, + "loss": 0.0004, + "step": 57375 + }, + { + "epoch": 28.92, + "grad_norm": 0.8328010439872742, + "learning_rate": 4.283517587939699e-06, + "loss": 0.0006, + "step": 57400 + }, + { + "epoch": 28.93, + "grad_norm": 0.41630086302757263, + "learning_rate": 4.281005025125629e-06, + "loss": 0.0005, + "step": 57425 + }, + { + "epoch": 28.94, + "grad_norm": 0.48607903718948364, + "learning_rate": 4.278492462311558e-06, + "loss": 0.0005, + "step": 57450 + }, + { + "epoch": 28.95, + "grad_norm": 0.3274035155773163, + "learning_rate": 4.275979899497488e-06, + "loss": 0.0005, + "step": 57475 + }, + { + "epoch": 28.97, + "grad_norm": 0.2861725687980652, + "learning_rate": 4.2734673366834174e-06, + "loss": 0.0006, + "step": 57500 + }, + { + "epoch": 28.98, + "grad_norm": 0.6594395041465759, + "learning_rate": 4.270954773869347e-06, + "loss": 0.0005, + "step": 57525 + }, + { + "epoch": 28.99, + "grad_norm": 0.2788364887237549, + "learning_rate": 4.268442211055277e-06, + "loss": 0.0003, + "step": 57550 + }, + { + "epoch": 29.01, + "grad_norm": 0.4915536344051361, + "learning_rate": 4.265929648241206e-06, + "loss": 0.0004, + "step": 57575 + }, + { + "epoch": 29.02, + "grad_norm": 0.14363212883472443, + "learning_rate": 4.2634170854271364e-06, + "loss": 0.0003, + "step": 57600 + }, + { + "epoch": 29.03, + "grad_norm": 0.7467771172523499, + "learning_rate": 4.260904522613066e-06, + "loss": 0.0005, + "step": 57625 + }, + { + "epoch": 29.04, + "grad_norm": 0.7711644172668457, + "learning_rate": 4.258391959798995e-06, + "loss": 0.0005, + "step": 57650 + }, + { + "epoch": 29.06, + "grad_norm": 0.3670036494731903, + "learning_rate": 4.255879396984925e-06, + "loss": 0.0004, + "step": 57675 + }, + { + "epoch": 29.07, + "grad_norm": 0.2002963125705719, + "learning_rate": 4.2533668341708546e-06, + "loss": 0.0005, + "step": 57700 + }, + { + "epoch": 29.08, + "grad_norm": 0.13585589826107025, + "learning_rate": 4.250854271356784e-06, + "loss": 0.0003, + "step": 57725 + }, + { + "epoch": 29.09, + "grad_norm": 0.4625987112522125, + "learning_rate": 4.248341708542714e-06, + "loss": 0.0003, + "step": 57750 + }, + { + "epoch": 29.11, + "grad_norm": 0.43475794792175293, + "learning_rate": 4.245829145728643e-06, + "loss": 0.0003, + "step": 57775 + }, + { + "epoch": 29.12, + "grad_norm": 0.46216335892677307, + "learning_rate": 4.2433165829145736e-06, + "loss": 0.0004, + "step": 57800 + }, + { + "epoch": 29.13, + "grad_norm": 0.10933476686477661, + "learning_rate": 4.240804020100503e-06, + "loss": 0.0003, + "step": 57825 + }, + { + "epoch": 29.14, + "grad_norm": 0.3376076817512512, + "learning_rate": 4.238291457286432e-06, + "loss": 0.0005, + "step": 57850 + }, + { + "epoch": 29.16, + "grad_norm": 0.09248408675193787, + "learning_rate": 4.235778894472362e-06, + "loss": 0.0004, + "step": 57875 + }, + { + "epoch": 29.17, + "grad_norm": 0.165025994181633, + "learning_rate": 4.233266331658292e-06, + "loss": 0.0004, + "step": 57900 + }, + { + "epoch": 29.18, + "grad_norm": 0.20500481128692627, + "learning_rate": 4.230753768844221e-06, + "loss": 0.0002, + "step": 57925 + }, + { + "epoch": 29.19, + "grad_norm": 1.7176584005355835, + "learning_rate": 4.228241206030151e-06, + "loss": 0.0003, + "step": 57950 + }, + { + "epoch": 29.21, + "grad_norm": 0.04315100982785225, + "learning_rate": 4.225829145728644e-06, + "loss": 0.0004, + "step": 57975 + }, + { + "epoch": 29.22, + "grad_norm": 0.7562754154205322, + "learning_rate": 4.223316582914574e-06, + "loss": 0.0005, + "step": 58000 + }, + { + "epoch": 29.22, + "eval_loss": 0.36634162068367004, + "eval_runtime": 646.3446, + "eval_samples_per_second": 2.18, + "eval_steps_per_second": 2.18, + "eval_wer": 22.877896921480456, + "step": 58000 + }, + { + "epoch": 29.23, + "grad_norm": 0.18568870425224304, + "learning_rate": 4.220804020100503e-06, + "loss": 0.0003, + "step": 58025 + }, + { + "epoch": 29.24, + "grad_norm": 0.02903875522315502, + "learning_rate": 4.218291457286432e-06, + "loss": 0.0002, + "step": 58050 + }, + { + "epoch": 29.26, + "grad_norm": 0.5034275054931641, + "learning_rate": 4.215778894472362e-06, + "loss": 0.0003, + "step": 58075 + }, + { + "epoch": 29.27, + "grad_norm": 0.24400202929973602, + "learning_rate": 4.213266331658292e-06, + "loss": 0.0004, + "step": 58100 + }, + { + "epoch": 29.28, + "grad_norm": 0.43526986241340637, + "learning_rate": 4.210753768844221e-06, + "loss": 0.0004, + "step": 58125 + }, + { + "epoch": 29.29, + "grad_norm": 0.8774734735488892, + "learning_rate": 4.2082412060301505e-06, + "loss": 0.0003, + "step": 58150 + }, + { + "epoch": 29.31, + "grad_norm": 0.2758621275424957, + "learning_rate": 4.205728643216081e-06, + "loss": 0.0004, + "step": 58175 + }, + { + "epoch": 29.32, + "grad_norm": 0.09988962113857269, + "learning_rate": 4.203216080402011e-06, + "loss": 0.0002, + "step": 58200 + }, + { + "epoch": 29.33, + "grad_norm": 0.04445599764585495, + "learning_rate": 4.20070351758794e-06, + "loss": 0.0002, + "step": 58225 + }, + { + "epoch": 29.35, + "grad_norm": 0.6248559355735779, + "learning_rate": 4.1981909547738695e-06, + "loss": 0.0002, + "step": 58250 + }, + { + "epoch": 29.36, + "grad_norm": 0.5372090339660645, + "learning_rate": 4.1956783919598e-06, + "loss": 0.0003, + "step": 58275 + }, + { + "epoch": 29.37, + "grad_norm": 0.05950794741511345, + "learning_rate": 4.193165829145729e-06, + "loss": 0.0003, + "step": 58300 + }, + { + "epoch": 29.38, + "grad_norm": 0.2952395975589752, + "learning_rate": 4.190653266331658e-06, + "loss": 0.0005, + "step": 58325 + }, + { + "epoch": 29.4, + "grad_norm": 1.0344479084014893, + "learning_rate": 4.188140703517588e-06, + "loss": 0.0005, + "step": 58350 + }, + { + "epoch": 29.41, + "grad_norm": 0.07533106207847595, + "learning_rate": 4.185628140703518e-06, + "loss": 0.0008, + "step": 58375 + }, + { + "epoch": 29.42, + "grad_norm": 0.19977255165576935, + "learning_rate": 4.183115577889448e-06, + "loss": 0.0005, + "step": 58400 + }, + { + "epoch": 29.43, + "grad_norm": 0.21329541504383087, + "learning_rate": 4.180603015075377e-06, + "loss": 0.0003, + "step": 58425 + }, + { + "epoch": 29.45, + "grad_norm": 0.2345517873764038, + "learning_rate": 4.178090452261307e-06, + "loss": 0.0006, + "step": 58450 + }, + { + "epoch": 29.46, + "grad_norm": 0.028459738940000534, + "learning_rate": 4.175577889447237e-06, + "loss": 0.0004, + "step": 58475 + }, + { + "epoch": 29.47, + "grad_norm": 0.8179320693016052, + "learning_rate": 4.173065326633166e-06, + "loss": 0.0005, + "step": 58500 + }, + { + "epoch": 29.48, + "grad_norm": 0.8002499341964722, + "learning_rate": 4.1705527638190955e-06, + "loss": 0.0004, + "step": 58525 + }, + { + "epoch": 29.5, + "grad_norm": 0.18765227496623993, + "learning_rate": 4.168040201005026e-06, + "loss": 0.0004, + "step": 58550 + }, + { + "epoch": 29.51, + "grad_norm": 0.20206257700920105, + "learning_rate": 4.165527638190955e-06, + "loss": 0.0008, + "step": 58575 + }, + { + "epoch": 29.52, + "grad_norm": 0.7327660322189331, + "learning_rate": 4.163015075376885e-06, + "loss": 0.0005, + "step": 58600 + }, + { + "epoch": 29.53, + "grad_norm": 1.6903936862945557, + "learning_rate": 4.1605025125628145e-06, + "loss": 0.0007, + "step": 58625 + }, + { + "epoch": 29.55, + "grad_norm": 0.523650050163269, + "learning_rate": 4.157989949748744e-06, + "loss": 0.0004, + "step": 58650 + }, + { + "epoch": 29.56, + "grad_norm": 0.5737274289131165, + "learning_rate": 4.155477386934674e-06, + "loss": 0.0006, + "step": 58675 + }, + { + "epoch": 29.57, + "grad_norm": 0.43866389989852905, + "learning_rate": 4.152964824120603e-06, + "loss": 0.0005, + "step": 58700 + }, + { + "epoch": 29.58, + "grad_norm": 0.3836389183998108, + "learning_rate": 4.150452261306533e-06, + "loss": 0.0006, + "step": 58725 + }, + { + "epoch": 29.6, + "grad_norm": 0.2658005654811859, + "learning_rate": 4.147939698492463e-06, + "loss": 0.0005, + "step": 58750 + }, + { + "epoch": 29.61, + "grad_norm": 0.9957432150840759, + "learning_rate": 4.145427135678392e-06, + "loss": 0.0006, + "step": 58775 + }, + { + "epoch": 29.62, + "grad_norm": 0.4388526380062103, + "learning_rate": 4.142914572864322e-06, + "loss": 0.0005, + "step": 58800 + }, + { + "epoch": 29.63, + "grad_norm": 0.48335981369018555, + "learning_rate": 4.140402010050252e-06, + "loss": 0.0008, + "step": 58825 + }, + { + "epoch": 29.65, + "grad_norm": 0.6199666261672974, + "learning_rate": 4.137889447236182e-06, + "loss": 0.0006, + "step": 58850 + }, + { + "epoch": 29.66, + "grad_norm": 0.406076043844223, + "learning_rate": 4.135376884422111e-06, + "loss": 0.0006, + "step": 58875 + }, + { + "epoch": 29.67, + "grad_norm": 0.1077524796128273, + "learning_rate": 4.13286432160804e-06, + "loss": 0.0005, + "step": 58900 + }, + { + "epoch": 29.69, + "grad_norm": 0.3811541795730591, + "learning_rate": 4.13035175879397e-06, + "loss": 0.0007, + "step": 58925 + }, + { + "epoch": 29.7, + "grad_norm": 1.440758228302002, + "learning_rate": 4.1278391959799e-06, + "loss": 0.0006, + "step": 58950 + }, + { + "epoch": 29.71, + "grad_norm": 0.47515958547592163, + "learning_rate": 4.125326633165829e-06, + "loss": 0.0004, + "step": 58975 + }, + { + "epoch": 29.72, + "grad_norm": 0.956475019454956, + "learning_rate": 4.122814070351759e-06, + "loss": 0.0006, + "step": 59000 + }, + { + "epoch": 29.72, + "eval_loss": 0.37018460035324097, + "eval_runtime": 650.5122, + "eval_samples_per_second": 2.166, + "eval_steps_per_second": 2.166, + "eval_wer": 23.348322379799377, + "step": 59000 + }, + { + "epoch": 29.74, + "grad_norm": 1.6364458799362183, + "learning_rate": 4.120301507537689e-06, + "loss": 0.0008, + "step": 59025 + }, + { + "epoch": 29.75, + "grad_norm": 0.5471516251564026, + "learning_rate": 4.117788944723619e-06, + "loss": 0.0007, + "step": 59050 + }, + { + "epoch": 29.76, + "grad_norm": 0.9236280918121338, + "learning_rate": 4.115276381909548e-06, + "loss": 0.0007, + "step": 59075 + }, + { + "epoch": 29.77, + "grad_norm": 0.1981869339942932, + "learning_rate": 4.1127638190954775e-06, + "loss": 0.0007, + "step": 59100 + }, + { + "epoch": 29.79, + "grad_norm": 0.7604771852493286, + "learning_rate": 4.110251256281408e-06, + "loss": 0.0004, + "step": 59125 + }, + { + "epoch": 29.8, + "grad_norm": 0.5981962084770203, + "learning_rate": 4.107738693467337e-06, + "loss": 0.0007, + "step": 59150 + }, + { + "epoch": 29.81, + "grad_norm": 0.9889633655548096, + "learning_rate": 4.105226130653266e-06, + "loss": 0.0004, + "step": 59175 + }, + { + "epoch": 29.82, + "grad_norm": 0.33064815402030945, + "learning_rate": 4.102713567839196e-06, + "loss": 0.0007, + "step": 59200 + }, + { + "epoch": 29.84, + "grad_norm": 0.3253353238105774, + "learning_rate": 4.100201005025126e-06, + "loss": 0.0005, + "step": 59225 + }, + { + "epoch": 29.85, + "grad_norm": 1.7824808359146118, + "learning_rate": 4.097688442211056e-06, + "loss": 0.0006, + "step": 59250 + }, + { + "epoch": 29.86, + "grad_norm": 0.1777506172657013, + "learning_rate": 4.095175879396985e-06, + "loss": 0.0004, + "step": 59275 + }, + { + "epoch": 29.87, + "grad_norm": 0.09130828827619553, + "learning_rate": 4.092663316582915e-06, + "loss": 0.0005, + "step": 59300 + }, + { + "epoch": 29.89, + "grad_norm": 0.26124000549316406, + "learning_rate": 4.090150753768845e-06, + "loss": 0.0004, + "step": 59325 + }, + { + "epoch": 29.9, + "grad_norm": 0.2676754295825958, + "learning_rate": 4.087638190954774e-06, + "loss": 0.0004, + "step": 59350 + }, + { + "epoch": 29.91, + "grad_norm": 0.7923325896263123, + "learning_rate": 4.0851256281407035e-06, + "loss": 0.0004, + "step": 59375 + }, + { + "epoch": 29.92, + "grad_norm": 0.06875083595514297, + "learning_rate": 4.082613065326634e-06, + "loss": 0.0007, + "step": 59400 + }, + { + "epoch": 29.94, + "grad_norm": 1.1524797677993774, + "learning_rate": 4.080100502512564e-06, + "loss": 0.0006, + "step": 59425 + }, + { + "epoch": 29.95, + "grad_norm": 1.1805469989776611, + "learning_rate": 4.077587939698493e-06, + "loss": 0.0006, + "step": 59450 + }, + { + "epoch": 29.96, + "grad_norm": 0.523131251335144, + "learning_rate": 4.0750753768844225e-06, + "loss": 0.0005, + "step": 59475 + }, + { + "epoch": 29.97, + "grad_norm": 0.9448018074035645, + "learning_rate": 4.072562814070352e-06, + "loss": 0.0004, + "step": 59500 + }, + { + "epoch": 29.99, + "grad_norm": 1.3744333982467651, + "learning_rate": 4.070050251256282e-06, + "loss": 0.0007, + "step": 59525 + }, + { + "epoch": 30.0, + "grad_norm": 0.2589998245239258, + "learning_rate": 4.067537688442211e-06, + "loss": 0.0006, + "step": 59550 + }, + { + "epoch": 30.01, + "grad_norm": 0.44579869508743286, + "learning_rate": 4.065025125628141e-06, + "loss": 0.0004, + "step": 59575 + }, + { + "epoch": 30.03, + "grad_norm": 0.35231125354766846, + "learning_rate": 4.062512562814071e-06, + "loss": 0.0003, + "step": 59600 + }, + { + "epoch": 30.04, + "grad_norm": 1.131148099899292, + "learning_rate": 4.060000000000001e-06, + "loss": 0.0005, + "step": 59625 + }, + { + "epoch": 30.05, + "grad_norm": 0.154410719871521, + "learning_rate": 4.05748743718593e-06, + "loss": 0.0005, + "step": 59650 + }, + { + "epoch": 30.06, + "grad_norm": 0.5527713894844055, + "learning_rate": 4.05497487437186e-06, + "loss": 0.0006, + "step": 59675 + }, + { + "epoch": 30.08, + "grad_norm": 0.12832514941692352, + "learning_rate": 4.05246231155779e-06, + "loss": 0.0003, + "step": 59700 + }, + { + "epoch": 30.09, + "grad_norm": 0.059667546302080154, + "learning_rate": 4.049949748743719e-06, + "loss": 0.0006, + "step": 59725 + }, + { + "epoch": 30.1, + "grad_norm": 0.9049032330513, + "learning_rate": 4.047437185929648e-06, + "loss": 0.0004, + "step": 59750 + }, + { + "epoch": 30.11, + "grad_norm": 0.42407527565956116, + "learning_rate": 4.044924623115578e-06, + "loss": 0.0005, + "step": 59775 + }, + { + "epoch": 30.13, + "grad_norm": 0.2845335304737091, + "learning_rate": 4.042412060301508e-06, + "loss": 0.0002, + "step": 59800 + }, + { + "epoch": 30.14, + "grad_norm": 0.2934589087963104, + "learning_rate": 4.039899497487437e-06, + "loss": 0.0003, + "step": 59825 + }, + { + "epoch": 30.15, + "grad_norm": 0.12243688106536865, + "learning_rate": 4.037386934673367e-06, + "loss": 0.0002, + "step": 59850 + }, + { + "epoch": 30.16, + "grad_norm": 0.03510167449712753, + "learning_rate": 4.034874371859297e-06, + "loss": 0.0002, + "step": 59875 + }, + { + "epoch": 30.18, + "grad_norm": 0.10497930645942688, + "learning_rate": 4.032361809045227e-06, + "loss": 0.0004, + "step": 59900 + }, + { + "epoch": 30.19, + "grad_norm": 1.603574514389038, + "learning_rate": 4.029849246231156e-06, + "loss": 0.0003, + "step": 59925 + }, + { + "epoch": 30.2, + "grad_norm": 0.07143130153417587, + "learning_rate": 4.0273366834170855e-06, + "loss": 0.0003, + "step": 59950 + }, + { + "epoch": 30.21, + "grad_norm": 0.9120334386825562, + "learning_rate": 4.024824120603016e-06, + "loss": 0.0004, + "step": 59975 + }, + { + "epoch": 30.23, + "grad_norm": 0.6783850193023682, + "learning_rate": 4.022311557788945e-06, + "loss": 0.0003, + "step": 60000 + }, + { + "epoch": 30.23, + "eval_loss": 0.3732685446739197, + "eval_runtime": 646.3241, + "eval_samples_per_second": 2.18, + "eval_steps_per_second": 2.18, + "eval_wer": 22.490487720511933, + "step": 60000 + }, + { + "epoch": 30.24, + "grad_norm": 0.0949537456035614, + "learning_rate": 4.019798994974874e-06, + "loss": 0.0003, + "step": 60025 + }, + { + "epoch": 30.25, + "grad_norm": 0.08624821156263351, + "learning_rate": 4.0172864321608045e-06, + "loss": 0.0003, + "step": 60050 + }, + { + "epoch": 30.26, + "grad_norm": 0.613463819026947, + "learning_rate": 4.014874371859297e-06, + "loss": 0.0004, + "step": 60075 + }, + { + "epoch": 30.28, + "grad_norm": 1.7669380903244019, + "learning_rate": 4.012361809045226e-06, + "loss": 0.0004, + "step": 60100 + }, + { + "epoch": 30.29, + "grad_norm": 0.22005651891231537, + "learning_rate": 4.009849246231156e-06, + "loss": 0.0005, + "step": 60125 + }, + { + "epoch": 30.3, + "grad_norm": 0.448355108499527, + "learning_rate": 4.007336683417086e-06, + "loss": 0.0004, + "step": 60150 + }, + { + "epoch": 30.31, + "grad_norm": 0.3999320864677429, + "learning_rate": 4.004824120603015e-06, + "loss": 0.0004, + "step": 60175 + }, + { + "epoch": 30.33, + "grad_norm": 0.5650457143783569, + "learning_rate": 4.002311557788945e-06, + "loss": 0.0003, + "step": 60200 + }, + { + "epoch": 30.34, + "grad_norm": 0.3573535084724426, + "learning_rate": 3.9997989949748745e-06, + "loss": 0.0003, + "step": 60225 + }, + { + "epoch": 30.35, + "grad_norm": 0.04291848465800285, + "learning_rate": 3.997286432160805e-06, + "loss": 0.0004, + "step": 60250 + }, + { + "epoch": 30.37, + "grad_norm": 0.6211608052253723, + "learning_rate": 3.994773869346734e-06, + "loss": 0.0003, + "step": 60275 + }, + { + "epoch": 30.38, + "grad_norm": 0.09989487379789352, + "learning_rate": 3.992261306532663e-06, + "loss": 0.0003, + "step": 60300 + }, + { + "epoch": 30.39, + "grad_norm": 0.6828473210334778, + "learning_rate": 3.9897487437185935e-06, + "loss": 0.0005, + "step": 60325 + }, + { + "epoch": 30.4, + "grad_norm": 0.4750407338142395, + "learning_rate": 3.987236180904523e-06, + "loss": 0.0008, + "step": 60350 + }, + { + "epoch": 30.42, + "grad_norm": 0.12395069003105164, + "learning_rate": 3.984723618090452e-06, + "loss": 0.0006, + "step": 60375 + }, + { + "epoch": 30.43, + "grad_norm": 0.19850021600723267, + "learning_rate": 3.982211055276382e-06, + "loss": 0.0007, + "step": 60400 + }, + { + "epoch": 30.44, + "grad_norm": 1.2987253665924072, + "learning_rate": 3.979698492462312e-06, + "loss": 0.0005, + "step": 60425 + }, + { + "epoch": 30.45, + "grad_norm": 0.12956413626670837, + "learning_rate": 3.977185929648242e-06, + "loss": 0.0005, + "step": 60450 + }, + { + "epoch": 30.47, + "grad_norm": 0.8285446166992188, + "learning_rate": 3.974673366834171e-06, + "loss": 0.0004, + "step": 60475 + }, + { + "epoch": 30.48, + "grad_norm": 0.9099289774894714, + "learning_rate": 3.972160804020101e-06, + "loss": 0.0003, + "step": 60500 + }, + { + "epoch": 30.49, + "grad_norm": 1.001538634300232, + "learning_rate": 3.969648241206031e-06, + "loss": 0.0004, + "step": 60525 + }, + { + "epoch": 30.5, + "grad_norm": 0.4828273057937622, + "learning_rate": 3.96713567839196e-06, + "loss": 0.0003, + "step": 60550 + }, + { + "epoch": 30.52, + "grad_norm": 1.4622454643249512, + "learning_rate": 3.964623115577889e-06, + "loss": 0.0005, + "step": 60575 + }, + { + "epoch": 30.53, + "grad_norm": 0.2010238915681839, + "learning_rate": 3.9621105527638195e-06, + "loss": 0.0004, + "step": 60600 + }, + { + "epoch": 30.54, + "grad_norm": 0.11599469929933548, + "learning_rate": 3.959597989949749e-06, + "loss": 0.0007, + "step": 60625 + }, + { + "epoch": 30.55, + "grad_norm": 0.18616123497486115, + "learning_rate": 3.957085427135678e-06, + "loss": 0.0004, + "step": 60650 + }, + { + "epoch": 30.57, + "grad_norm": 0.4725811779499054, + "learning_rate": 3.954572864321608e-06, + "loss": 0.0005, + "step": 60675 + }, + { + "epoch": 30.58, + "grad_norm": 1.0674340724945068, + "learning_rate": 3.9520603015075385e-06, + "loss": 0.0006, + "step": 60700 + }, + { + "epoch": 30.59, + "grad_norm": 0.5848013758659363, + "learning_rate": 3.949547738693468e-06, + "loss": 0.0004, + "step": 60725 + }, + { + "epoch": 30.6, + "grad_norm": 0.26734229922294617, + "learning_rate": 3.947035175879397e-06, + "loss": 0.0006, + "step": 60750 + }, + { + "epoch": 30.62, + "grad_norm": 0.08038675040006638, + "learning_rate": 3.944522613065327e-06, + "loss": 0.0006, + "step": 60775 + }, + { + "epoch": 30.63, + "grad_norm": 0.4599511921405792, + "learning_rate": 3.942010050251257e-06, + "loss": 0.0005, + "step": 60800 + }, + { + "epoch": 30.64, + "grad_norm": 1.278779149055481, + "learning_rate": 3.939497487437186e-06, + "loss": 0.0005, + "step": 60825 + }, + { + "epoch": 30.65, + "grad_norm": 2.023319959640503, + "learning_rate": 3.936984924623115e-06, + "loss": 0.0006, + "step": 60850 + }, + { + "epoch": 30.67, + "grad_norm": 0.48264050483703613, + "learning_rate": 3.934472361809045e-06, + "loss": 0.0004, + "step": 60875 + }, + { + "epoch": 30.68, + "grad_norm": 0.313340961933136, + "learning_rate": 3.931959798994976e-06, + "loss": 0.0006, + "step": 60900 + }, + { + "epoch": 30.69, + "grad_norm": 0.23155102133750916, + "learning_rate": 3.929447236180905e-06, + "loss": 0.0005, + "step": 60925 + }, + { + "epoch": 30.71, + "grad_norm": 0.7064458727836609, + "learning_rate": 3.926934673366834e-06, + "loss": 0.0006, + "step": 60950 + }, + { + "epoch": 30.72, + "grad_norm": 0.0884753167629242, + "learning_rate": 3.924422110552764e-06, + "loss": 0.0004, + "step": 60975 + }, + { + "epoch": 30.73, + "grad_norm": 0.06730210036039352, + "learning_rate": 3.921909547738694e-06, + "loss": 0.0005, + "step": 61000 + }, + { + "epoch": 30.73, + "eval_loss": 0.37581801414489746, + "eval_runtime": 645.7901, + "eval_samples_per_second": 2.182, + "eval_steps_per_second": 2.182, + "eval_wer": 22.80871670702179, + "step": 61000 + }, + { + "epoch": 30.74, + "grad_norm": 0.38679373264312744, + "learning_rate": 3.919396984924623e-06, + "loss": 0.0006, + "step": 61025 + }, + { + "epoch": 30.76, + "grad_norm": 1.362016201019287, + "learning_rate": 3.916884422110553e-06, + "loss": 0.0005, + "step": 61050 + }, + { + "epoch": 30.77, + "grad_norm": 0.26858440041542053, + "learning_rate": 3.914371859296483e-06, + "loss": 0.0005, + "step": 61075 + }, + { + "epoch": 30.78, + "grad_norm": 0.19505997002124786, + "learning_rate": 3.911859296482413e-06, + "loss": 0.0003, + "step": 61100 + }, + { + "epoch": 30.79, + "grad_norm": 0.0629916712641716, + "learning_rate": 3.909346733668342e-06, + "loss": 0.0006, + "step": 61125 + }, + { + "epoch": 30.81, + "grad_norm": 0.16181036829948425, + "learning_rate": 3.906834170854271e-06, + "loss": 0.0004, + "step": 61150 + }, + { + "epoch": 30.82, + "grad_norm": 0.8519290685653687, + "learning_rate": 3.9043216080402015e-06, + "loss": 0.0004, + "step": 61175 + }, + { + "epoch": 30.83, + "grad_norm": 0.3916480541229248, + "learning_rate": 3.901809045226131e-06, + "loss": 0.0003, + "step": 61200 + }, + { + "epoch": 30.84, + "grad_norm": 0.1823578029870987, + "learning_rate": 3.89929648241206e-06, + "loss": 0.0005, + "step": 61225 + }, + { + "epoch": 30.86, + "grad_norm": 0.525839626789093, + "learning_rate": 3.89678391959799e-06, + "loss": 0.0006, + "step": 61250 + }, + { + "epoch": 30.87, + "grad_norm": 0.6914676427841187, + "learning_rate": 3.89427135678392e-06, + "loss": 0.0003, + "step": 61275 + }, + { + "epoch": 30.88, + "grad_norm": 1.1129640340805054, + "learning_rate": 3.89175879396985e-06, + "loss": 0.0005, + "step": 61300 + }, + { + "epoch": 30.89, + "grad_norm": 0.04879957437515259, + "learning_rate": 3.889246231155779e-06, + "loss": 0.0006, + "step": 61325 + }, + { + "epoch": 30.91, + "grad_norm": 1.1155563592910767, + "learning_rate": 3.886733668341709e-06, + "loss": 0.0006, + "step": 61350 + }, + { + "epoch": 30.92, + "grad_norm": 0.35383549332618713, + "learning_rate": 3.884221105527639e-06, + "loss": 0.0004, + "step": 61375 + }, + { + "epoch": 30.93, + "grad_norm": 0.38548916578292847, + "learning_rate": 3.881708542713568e-06, + "loss": 0.0004, + "step": 61400 + }, + { + "epoch": 30.94, + "grad_norm": 0.1265828013420105, + "learning_rate": 3.879195979899497e-06, + "loss": 0.0004, + "step": 61425 + }, + { + "epoch": 30.96, + "grad_norm": 0.2077447772026062, + "learning_rate": 3.8766834170854275e-06, + "loss": 0.0004, + "step": 61450 + }, + { + "epoch": 30.97, + "grad_norm": 0.43719515204429626, + "learning_rate": 3.874170854271357e-06, + "loss": 0.0004, + "step": 61475 + }, + { + "epoch": 30.98, + "grad_norm": 0.25596338510513306, + "learning_rate": 3.871658291457287e-06, + "loss": 0.0004, + "step": 61500 + }, + { + "epoch": 30.99, + "grad_norm": 0.24157127737998962, + "learning_rate": 3.869145728643216e-06, + "loss": 0.0005, + "step": 61525 + }, + { + "epoch": 31.01, + "grad_norm": 0.2741214334964752, + "learning_rate": 3.8666331658291465e-06, + "loss": 0.0006, + "step": 61550 + }, + { + "epoch": 31.02, + "grad_norm": 0.15417811274528503, + "learning_rate": 3.864120603015076e-06, + "loss": 0.0004, + "step": 61575 + }, + { + "epoch": 31.03, + "grad_norm": 0.18006564676761627, + "learning_rate": 3.861608040201005e-06, + "loss": 0.0004, + "step": 61600 + }, + { + "epoch": 31.05, + "grad_norm": 0.6540391445159912, + "learning_rate": 3.859095477386935e-06, + "loss": 0.0004, + "step": 61625 + }, + { + "epoch": 31.06, + "grad_norm": 0.21219852566719055, + "learning_rate": 3.856683417085428e-06, + "loss": 0.0004, + "step": 61650 + }, + { + "epoch": 31.07, + "grad_norm": 0.12796539068222046, + "learning_rate": 3.854170854271357e-06, + "loss": 0.0002, + "step": 61675 + }, + { + "epoch": 31.08, + "grad_norm": 0.03786884620785713, + "learning_rate": 3.851658291457287e-06, + "loss": 0.0002, + "step": 61700 + }, + { + "epoch": 31.1, + "grad_norm": 0.10505225509405136, + "learning_rate": 3.8491457286432165e-06, + "loss": 0.0002, + "step": 61725 + }, + { + "epoch": 31.11, + "grad_norm": 0.2574862837791443, + "learning_rate": 3.846633165829146e-06, + "loss": 0.0002, + "step": 61750 + }, + { + "epoch": 31.12, + "grad_norm": 0.09054882079362869, + "learning_rate": 3.844120603015076e-06, + "loss": 0.0002, + "step": 61775 + }, + { + "epoch": 31.13, + "grad_norm": 0.25638747215270996, + "learning_rate": 3.841608040201005e-06, + "loss": 0.0004, + "step": 61800 + }, + { + "epoch": 31.15, + "grad_norm": 0.5020123720169067, + "learning_rate": 3.839095477386935e-06, + "loss": 0.0004, + "step": 61825 + }, + { + "epoch": 31.16, + "grad_norm": 0.1703236848115921, + "learning_rate": 3.836582914572865e-06, + "loss": 0.0003, + "step": 61850 + }, + { + "epoch": 31.17, + "grad_norm": 0.22640874981880188, + "learning_rate": 3.834070351758794e-06, + "loss": 0.0003, + "step": 61875 + }, + { + "epoch": 31.18, + "grad_norm": 0.21768644452095032, + "learning_rate": 3.831557788944724e-06, + "loss": 0.0002, + "step": 61900 + }, + { + "epoch": 31.2, + "grad_norm": 0.32308635115623474, + "learning_rate": 3.829045226130654e-06, + "loss": 0.0003, + "step": 61925 + }, + { + "epoch": 31.21, + "grad_norm": 0.20229199528694153, + "learning_rate": 3.826532663316583e-06, + "loss": 0.0004, + "step": 61950 + }, + { + "epoch": 31.22, + "grad_norm": 0.10681883990764618, + "learning_rate": 3.824020100502513e-06, + "loss": 0.0005, + "step": 61975 + }, + { + "epoch": 31.23, + "grad_norm": 0.28597140312194824, + "learning_rate": 3.8215075376884424e-06, + "loss": 0.0002, + "step": 62000 + }, + { + "epoch": 31.23, + "eval_loss": 0.37976065278053284, + "eval_runtime": 653.558, + "eval_samples_per_second": 2.156, + "eval_steps_per_second": 2.156, + "eval_wer": 23.037011414735385, + "step": 62000 + }, + { + "epoch": 31.25, + "grad_norm": 0.4050130248069763, + "learning_rate": 3.818994974874372e-06, + "loss": 0.0002, + "step": 62025 + }, + { + "epoch": 31.26, + "grad_norm": 0.29295334219932556, + "learning_rate": 3.816482412060302e-06, + "loss": 0.0002, + "step": 62050 + }, + { + "epoch": 31.27, + "grad_norm": 0.05594494193792343, + "learning_rate": 3.8139698492462312e-06, + "loss": 0.0002, + "step": 62075 + }, + { + "epoch": 31.28, + "grad_norm": 0.06369101256132126, + "learning_rate": 3.811457286432161e-06, + "loss": 0.0002, + "step": 62100 + }, + { + "epoch": 31.3, + "grad_norm": 0.04477281868457794, + "learning_rate": 3.808944723618091e-06, + "loss": 0.0001, + "step": 62125 + }, + { + "epoch": 31.31, + "grad_norm": 0.09269160777330399, + "learning_rate": 3.8064321608040205e-06, + "loss": 0.0002, + "step": 62150 + }, + { + "epoch": 31.32, + "grad_norm": 0.5558028817176819, + "learning_rate": 3.8039195979899502e-06, + "loss": 0.0002, + "step": 62175 + }, + { + "epoch": 31.34, + "grad_norm": 0.03976639732718468, + "learning_rate": 3.8014070351758796e-06, + "loss": 0.0002, + "step": 62200 + }, + { + "epoch": 31.35, + "grad_norm": 0.10835079848766327, + "learning_rate": 3.7988944723618093e-06, + "loss": 0.0002, + "step": 62225 + }, + { + "epoch": 31.36, + "grad_norm": 1.0740280151367188, + "learning_rate": 3.796381909547739e-06, + "loss": 0.0002, + "step": 62250 + }, + { + "epoch": 31.37, + "grad_norm": 0.20788809657096863, + "learning_rate": 3.7938693467336684e-06, + "loss": 0.0003, + "step": 62275 + }, + { + "epoch": 31.39, + "grad_norm": 0.331663578748703, + "learning_rate": 3.791356783919598e-06, + "loss": 0.0005, + "step": 62300 + }, + { + "epoch": 31.4, + "grad_norm": 0.09669731557369232, + "learning_rate": 3.7888442211055283e-06, + "loss": 0.0002, + "step": 62325 + }, + { + "epoch": 31.41, + "grad_norm": 0.12564200162887573, + "learning_rate": 3.7863316582914576e-06, + "loss": 0.0002, + "step": 62350 + }, + { + "epoch": 31.42, + "grad_norm": 0.16016307473182678, + "learning_rate": 3.7838190954773874e-06, + "loss": 0.0003, + "step": 62375 + }, + { + "epoch": 31.44, + "grad_norm": 0.09345220774412155, + "learning_rate": 3.781306532663317e-06, + "loss": 0.0003, + "step": 62400 + }, + { + "epoch": 31.45, + "grad_norm": 1.2475889921188354, + "learning_rate": 3.7787939698492464e-06, + "loss": 0.0002, + "step": 62425 + }, + { + "epoch": 31.46, + "grad_norm": 0.4350406229496002, + "learning_rate": 3.776281407035176e-06, + "loss": 0.0003, + "step": 62450 + }, + { + "epoch": 31.47, + "grad_norm": 0.13535748422145844, + "learning_rate": 3.7737688442211055e-06, + "loss": 0.0002, + "step": 62475 + }, + { + "epoch": 31.49, + "grad_norm": 0.34315505623817444, + "learning_rate": 3.7712562814070352e-06, + "loss": 0.0004, + "step": 62500 + }, + { + "epoch": 31.5, + "grad_norm": 1.264566421508789, + "learning_rate": 3.7687437185929654e-06, + "loss": 0.0004, + "step": 62525 + }, + { + "epoch": 31.51, + "grad_norm": 0.1144946962594986, + "learning_rate": 3.766231155778895e-06, + "loss": 0.0004, + "step": 62550 + }, + { + "epoch": 31.52, + "grad_norm": 0.1550832986831665, + "learning_rate": 3.7637185929648245e-06, + "loss": 0.0007, + "step": 62575 + }, + { + "epoch": 31.54, + "grad_norm": 1.2980326414108276, + "learning_rate": 3.7612060301507542e-06, + "loss": 0.0006, + "step": 62600 + }, + { + "epoch": 31.55, + "grad_norm": 0.5655810832977295, + "learning_rate": 3.7586934673366836e-06, + "loss": 0.0004, + "step": 62625 + }, + { + "epoch": 31.56, + "grad_norm": 1.1469320058822632, + "learning_rate": 3.7561809045226133e-06, + "loss": 0.0003, + "step": 62650 + }, + { + "epoch": 31.57, + "grad_norm": 0.10017550736665726, + "learning_rate": 3.753668341708543e-06, + "loss": 0.0003, + "step": 62675 + }, + { + "epoch": 31.59, + "grad_norm": 0.0341104120016098, + "learning_rate": 3.7511557788944724e-06, + "loss": 0.0005, + "step": 62700 + }, + { + "epoch": 31.6, + "grad_norm": 0.1871260702610016, + "learning_rate": 3.748643216080402e-06, + "loss": 0.0004, + "step": 62725 + }, + { + "epoch": 31.61, + "grad_norm": 0.21561290323734283, + "learning_rate": 3.7461306532663323e-06, + "loss": 0.0003, + "step": 62750 + }, + { + "epoch": 31.62, + "grad_norm": 0.1019524484872818, + "learning_rate": 3.7436180904522616e-06, + "loss": 0.0004, + "step": 62775 + }, + { + "epoch": 31.64, + "grad_norm": 0.6602054834365845, + "learning_rate": 3.7411055276381914e-06, + "loss": 0.0003, + "step": 62800 + }, + { + "epoch": 31.65, + "grad_norm": 0.06545541435480118, + "learning_rate": 3.738592964824121e-06, + "loss": 0.0006, + "step": 62825 + }, + { + "epoch": 31.66, + "grad_norm": 0.6719912886619568, + "learning_rate": 3.7360804020100504e-06, + "loss": 0.0005, + "step": 62850 + }, + { + "epoch": 31.68, + "grad_norm": 0.23920761048793793, + "learning_rate": 3.73356783919598e-06, + "loss": 0.0004, + "step": 62875 + }, + { + "epoch": 31.69, + "grad_norm": 0.36367443203926086, + "learning_rate": 3.7310552763819095e-06, + "loss": 0.0004, + "step": 62900 + }, + { + "epoch": 31.7, + "grad_norm": 0.1650412529706955, + "learning_rate": 3.7285427135678392e-06, + "loss": 0.0004, + "step": 62925 + }, + { + "epoch": 31.71, + "grad_norm": 0.22094900906085968, + "learning_rate": 3.7260301507537694e-06, + "loss": 0.0003, + "step": 62950 + }, + { + "epoch": 31.73, + "grad_norm": 0.06936586648225784, + "learning_rate": 3.723517587939699e-06, + "loss": 0.0003, + "step": 62975 + }, + { + "epoch": 31.74, + "grad_norm": 0.3549976348876953, + "learning_rate": 3.7210050251256285e-06, + "loss": 0.0003, + "step": 63000 + }, + { + "epoch": 31.74, + "eval_loss": 0.375165730714798, + "eval_runtime": 654.2645, + "eval_samples_per_second": 2.154, + "eval_steps_per_second": 2.154, + "eval_wer": 22.912487028709787, + "step": 63000 + }, + { + "epoch": 31.75, + "grad_norm": 0.06242278590798378, + "learning_rate": 3.7184924623115582e-06, + "loss": 0.0003, + "step": 63025 + }, + { + "epoch": 31.76, + "grad_norm": 0.029787451028823853, + "learning_rate": 3.7159798994974876e-06, + "loss": 0.0005, + "step": 63050 + }, + { + "epoch": 31.78, + "grad_norm": 0.12795278429985046, + "learning_rate": 3.7134673366834173e-06, + "loss": 0.0004, + "step": 63075 + }, + { + "epoch": 31.79, + "grad_norm": 0.10121666640043259, + "learning_rate": 3.710954773869347e-06, + "loss": 0.0006, + "step": 63100 + }, + { + "epoch": 31.8, + "grad_norm": 0.10796695947647095, + "learning_rate": 3.7084422110552764e-06, + "loss": 0.0004, + "step": 63125 + }, + { + "epoch": 31.81, + "grad_norm": 0.06431049853563309, + "learning_rate": 3.7059296482412065e-06, + "loss": 0.0006, + "step": 63150 + }, + { + "epoch": 31.83, + "grad_norm": 0.07762473076581955, + "learning_rate": 3.7034170854271363e-06, + "loss": 0.0003, + "step": 63175 + }, + { + "epoch": 31.84, + "grad_norm": 0.19045744836330414, + "learning_rate": 3.7009045226130656e-06, + "loss": 0.0003, + "step": 63200 + }, + { + "epoch": 31.85, + "grad_norm": 0.495317667722702, + "learning_rate": 3.6983919597989954e-06, + "loss": 0.0008, + "step": 63225 + }, + { + "epoch": 31.86, + "grad_norm": 0.44762441515922546, + "learning_rate": 3.695879396984925e-06, + "loss": 0.0003, + "step": 63250 + }, + { + "epoch": 31.88, + "grad_norm": 0.7618858218193054, + "learning_rate": 3.6933668341708544e-06, + "loss": 0.0007, + "step": 63275 + }, + { + "epoch": 31.89, + "grad_norm": 0.24009497463703156, + "learning_rate": 3.690854271356784e-06, + "loss": 0.0003, + "step": 63300 + }, + { + "epoch": 31.9, + "grad_norm": 0.22943466901779175, + "learning_rate": 3.6883417085427135e-06, + "loss": 0.0003, + "step": 63325 + }, + { + "epoch": 31.91, + "grad_norm": 0.06694609671831131, + "learning_rate": 3.6858291457286432e-06, + "loss": 0.0005, + "step": 63350 + }, + { + "epoch": 31.93, + "grad_norm": 0.21812428534030914, + "learning_rate": 3.6833165829145734e-06, + "loss": 0.0003, + "step": 63375 + }, + { + "epoch": 31.94, + "grad_norm": 0.28397077322006226, + "learning_rate": 3.680804020100503e-06, + "loss": 0.0002, + "step": 63400 + }, + { + "epoch": 31.95, + "grad_norm": 1.7889050245285034, + "learning_rate": 3.6782914572864325e-06, + "loss": 0.0005, + "step": 63425 + }, + { + "epoch": 31.96, + "grad_norm": 0.6206020712852478, + "learning_rate": 3.6757788944723622e-06, + "loss": 0.0004, + "step": 63450 + }, + { + "epoch": 31.98, + "grad_norm": 0.1217232197523117, + "learning_rate": 3.6732663316582916e-06, + "loss": 0.0004, + "step": 63475 + }, + { + "epoch": 31.99, + "grad_norm": 0.545870840549469, + "learning_rate": 3.6707537688442213e-06, + "loss": 0.0003, + "step": 63500 + }, + { + "epoch": 32.0, + "grad_norm": 0.3442104160785675, + "learning_rate": 3.668241206030151e-06, + "loss": 0.0004, + "step": 63525 + }, + { + "epoch": 32.02, + "grad_norm": 0.10596601665019989, + "learning_rate": 3.6657286432160804e-06, + "loss": 0.0002, + "step": 63550 + }, + { + "epoch": 32.03, + "grad_norm": 0.12127941101789474, + "learning_rate": 3.6632160804020105e-06, + "loss": 0.0002, + "step": 63575 + }, + { + "epoch": 32.04, + "grad_norm": 0.05940447375178337, + "learning_rate": 3.6607035175879403e-06, + "loss": 0.0004, + "step": 63600 + }, + { + "epoch": 32.05, + "grad_norm": 0.21149925887584686, + "learning_rate": 3.6581909547738696e-06, + "loss": 0.0001, + "step": 63625 + }, + { + "epoch": 32.07, + "grad_norm": 1.6386170387268066, + "learning_rate": 3.6556783919597994e-06, + "loss": 0.0002, + "step": 63650 + }, + { + "epoch": 32.08, + "grad_norm": 1.5819129943847656, + "learning_rate": 3.653165829145729e-06, + "loss": 0.0004, + "step": 63675 + }, + { + "epoch": 32.09, + "grad_norm": 0.5431501865386963, + "learning_rate": 3.6506532663316584e-06, + "loss": 0.0002, + "step": 63700 + }, + { + "epoch": 32.1, + "grad_norm": 0.059780336916446686, + "learning_rate": 3.648140703517588e-06, + "loss": 0.0004, + "step": 63725 + }, + { + "epoch": 32.12, + "grad_norm": 0.3245247006416321, + "learning_rate": 3.6456281407035175e-06, + "loss": 0.0002, + "step": 63750 + }, + { + "epoch": 32.13, + "grad_norm": 0.27430394291877747, + "learning_rate": 3.6431155778894477e-06, + "loss": 0.0002, + "step": 63775 + }, + { + "epoch": 32.14, + "grad_norm": 0.3374156653881073, + "learning_rate": 3.6406030150753774e-06, + "loss": 0.0003, + "step": 63800 + }, + { + "epoch": 32.15, + "grad_norm": 0.5428460836410522, + "learning_rate": 3.638090452261307e-06, + "loss": 0.0002, + "step": 63825 + }, + { + "epoch": 32.17, + "grad_norm": 0.02909483201801777, + "learning_rate": 3.6355778894472365e-06, + "loss": 0.0003, + "step": 63850 + }, + { + "epoch": 32.18, + "grad_norm": 0.05827973410487175, + "learning_rate": 3.6330653266331662e-06, + "loss": 0.0003, + "step": 63875 + }, + { + "epoch": 32.19, + "grad_norm": 0.5720663666725159, + "learning_rate": 3.6305527638190956e-06, + "loss": 0.0003, + "step": 63900 + }, + { + "epoch": 32.2, + "grad_norm": 0.09780346602201462, + "learning_rate": 3.6280402010050253e-06, + "loss": 0.0002, + "step": 63925 + }, + { + "epoch": 32.22, + "grad_norm": 0.061296120285987854, + "learning_rate": 3.625527638190955e-06, + "loss": 0.0002, + "step": 63950 + }, + { + "epoch": 32.23, + "grad_norm": 0.14354734122753143, + "learning_rate": 3.6230150753768844e-06, + "loss": 0.0003, + "step": 63975 + }, + { + "epoch": 32.24, + "grad_norm": 0.1332835853099823, + "learning_rate": 3.6205025125628145e-06, + "loss": 0.0003, + "step": 64000 + }, + { + "epoch": 32.24, + "eval_loss": 0.37142670154571533, + "eval_runtime": 650.391, + "eval_samples_per_second": 2.166, + "eval_steps_per_second": 2.166, + "eval_wer": 22.359045313040472, + "step": 64000 + }, + { + "epoch": 32.25, + "grad_norm": 0.06854286044836044, + "learning_rate": 3.6179899497487443e-06, + "loss": 0.0002, + "step": 64025 + }, + { + "epoch": 32.27, + "grad_norm": 1.202950358390808, + "learning_rate": 3.6154773869346736e-06, + "loss": 0.0002, + "step": 64050 + }, + { + "epoch": 32.28, + "grad_norm": 0.23010912537574768, + "learning_rate": 3.6129648241206034e-06, + "loss": 0.0002, + "step": 64075 + }, + { + "epoch": 32.29, + "grad_norm": 0.044724371284246445, + "learning_rate": 3.610452261306533e-06, + "loss": 0.0002, + "step": 64100 + }, + { + "epoch": 32.3, + "grad_norm": 0.8325422406196594, + "learning_rate": 3.6079396984924624e-06, + "loss": 0.0003, + "step": 64125 + }, + { + "epoch": 32.32, + "grad_norm": 0.28481706976890564, + "learning_rate": 3.605427135678392e-06, + "loss": 0.0002, + "step": 64150 + }, + { + "epoch": 32.33, + "grad_norm": 0.5033039450645447, + "learning_rate": 3.6029145728643215e-06, + "loss": 0.0002, + "step": 64175 + }, + { + "epoch": 32.34, + "grad_norm": 0.07772762328386307, + "learning_rate": 3.6004020100502517e-06, + "loss": 0.0003, + "step": 64200 + }, + { + "epoch": 32.36, + "grad_norm": 0.13087120652198792, + "learning_rate": 3.5978894472361814e-06, + "loss": 0.0003, + "step": 64225 + }, + { + "epoch": 32.37, + "grad_norm": 0.05218727886676788, + "learning_rate": 3.595376884422111e-06, + "loss": 0.0002, + "step": 64250 + }, + { + "epoch": 32.38, + "grad_norm": 0.09140007197856903, + "learning_rate": 3.5928643216080405e-06, + "loss": 0.0002, + "step": 64275 + }, + { + "epoch": 32.39, + "grad_norm": 0.2148062288761139, + "learning_rate": 3.5903517587939702e-06, + "loss": 0.0002, + "step": 64300 + }, + { + "epoch": 32.41, + "grad_norm": 0.3405974805355072, + "learning_rate": 3.5878391959798996e-06, + "loss": 0.0003, + "step": 64325 + }, + { + "epoch": 32.42, + "grad_norm": 0.6667714715003967, + "learning_rate": 3.5853266331658293e-06, + "loss": 0.0006, + "step": 64350 + }, + { + "epoch": 32.43, + "grad_norm": 0.07142732292413712, + "learning_rate": 3.582814070351759e-06, + "loss": 0.0005, + "step": 64375 + }, + { + "epoch": 32.44, + "grad_norm": 0.34170547127723694, + "learning_rate": 3.5803015075376884e-06, + "loss": 0.0003, + "step": 64400 + }, + { + "epoch": 32.46, + "grad_norm": 0.051813945174217224, + "learning_rate": 3.5777889447236185e-06, + "loss": 0.0002, + "step": 64425 + }, + { + "epoch": 32.47, + "grad_norm": 0.035720087587833405, + "learning_rate": 3.5752763819095483e-06, + "loss": 0.0003, + "step": 64450 + }, + { + "epoch": 32.48, + "grad_norm": 0.26840466260910034, + "learning_rate": 3.5727638190954776e-06, + "loss": 0.0002, + "step": 64475 + }, + { + "epoch": 32.49, + "grad_norm": 0.23383192718029022, + "learning_rate": 3.5702512562814074e-06, + "loss": 0.0006, + "step": 64500 + }, + { + "epoch": 32.51, + "grad_norm": 0.6334074139595032, + "learning_rate": 3.567738693467337e-06, + "loss": 0.0003, + "step": 64525 + }, + { + "epoch": 32.52, + "grad_norm": 0.14389067888259888, + "learning_rate": 3.5652261306532664e-06, + "loss": 0.0003, + "step": 64550 + }, + { + "epoch": 32.53, + "grad_norm": 0.1418575793504715, + "learning_rate": 3.562713567839196e-06, + "loss": 0.0005, + "step": 64575 + }, + { + "epoch": 32.54, + "grad_norm": 1.003585934638977, + "learning_rate": 3.5602010050251255e-06, + "loss": 0.0006, + "step": 64600 + }, + { + "epoch": 32.56, + "grad_norm": 0.30060967803001404, + "learning_rate": 3.5576884422110557e-06, + "loss": 0.0004, + "step": 64625 + }, + { + "epoch": 32.57, + "grad_norm": 0.2555244266986847, + "learning_rate": 3.5551758793969854e-06, + "loss": 0.0005, + "step": 64650 + }, + { + "epoch": 32.58, + "grad_norm": 0.5695326328277588, + "learning_rate": 3.552663316582915e-06, + "loss": 0.0005, + "step": 64675 + }, + { + "epoch": 32.59, + "grad_norm": 1.1138029098510742, + "learning_rate": 3.5501507537688445e-06, + "loss": 0.0005, + "step": 64700 + }, + { + "epoch": 32.61, + "grad_norm": 0.999645471572876, + "learning_rate": 3.5476381909547742e-06, + "loss": 0.0002, + "step": 64725 + }, + { + "epoch": 32.62, + "grad_norm": 0.08828813582658768, + "learning_rate": 3.5451256281407036e-06, + "loss": 0.0005, + "step": 64750 + }, + { + "epoch": 32.63, + "grad_norm": 0.41186901926994324, + "learning_rate": 3.5426130653266333e-06, + "loss": 0.0004, + "step": 64775 + }, + { + "epoch": 32.64, + "grad_norm": 0.2822958528995514, + "learning_rate": 3.540100502512563e-06, + "loss": 0.0004, + "step": 64800 + }, + { + "epoch": 32.66, + "grad_norm": 0.0504324808716774, + "learning_rate": 3.5375879396984932e-06, + "loss": 0.0003, + "step": 64825 + }, + { + "epoch": 32.67, + "grad_norm": 0.19442777335643768, + "learning_rate": 3.5350753768844225e-06, + "loss": 0.0003, + "step": 64850 + }, + { + "epoch": 32.68, + "grad_norm": 0.16309334337711334, + "learning_rate": 3.5325628140703523e-06, + "loss": 0.0005, + "step": 64875 + }, + { + "epoch": 32.7, + "grad_norm": 0.21436728537082672, + "learning_rate": 3.5300502512562816e-06, + "loss": 0.0005, + "step": 64900 + }, + { + "epoch": 32.71, + "grad_norm": 0.6013718843460083, + "learning_rate": 3.5275376884422114e-06, + "loss": 0.0005, + "step": 64925 + }, + { + "epoch": 32.72, + "grad_norm": 0.22422859072685242, + "learning_rate": 3.525025125628141e-06, + "loss": 0.0003, + "step": 64950 + }, + { + "epoch": 32.73, + "grad_norm": 0.1287311613559723, + "learning_rate": 3.5225125628140704e-06, + "loss": 0.0003, + "step": 64975 + }, + { + "epoch": 32.75, + "grad_norm": 0.11317762732505798, + "learning_rate": 3.52e-06, + "loss": 0.0006, + "step": 65000 + }, + { + "epoch": 32.75, + "eval_loss": 0.37727558612823486, + "eval_runtime": 645.9956, + "eval_samples_per_second": 2.181, + "eval_steps_per_second": 2.181, + "eval_wer": 22.953995157384988, + "step": 65000 + }, + { + "epoch": 32.76, + "grad_norm": 0.30392730236053467, + "learning_rate": 3.5174874371859295e-06, + "loss": 0.0003, + "step": 65025 + }, + { + "epoch": 32.77, + "grad_norm": 0.1481235772371292, + "learning_rate": 3.5149748743718597e-06, + "loss": 0.0006, + "step": 65050 + }, + { + "epoch": 32.78, + "grad_norm": 0.7122224569320679, + "learning_rate": 3.5124623115577894e-06, + "loss": 0.0005, + "step": 65075 + }, + { + "epoch": 32.8, + "grad_norm": 0.9053061604499817, + "learning_rate": 3.509949748743719e-06, + "loss": 0.0004, + "step": 65100 + }, + { + "epoch": 32.81, + "grad_norm": 1.0469900369644165, + "learning_rate": 3.5074371859296485e-06, + "loss": 0.0004, + "step": 65125 + }, + { + "epoch": 32.82, + "grad_norm": 1.669203519821167, + "learning_rate": 3.5049246231155782e-06, + "loss": 0.0004, + "step": 65150 + }, + { + "epoch": 32.83, + "grad_norm": 1.147189974784851, + "learning_rate": 3.5024120603015076e-06, + "loss": 0.0006, + "step": 65175 + }, + { + "epoch": 32.85, + "grad_norm": 0.0693323016166687, + "learning_rate": 3.4998994974874373e-06, + "loss": 0.0004, + "step": 65200 + }, + { + "epoch": 32.86, + "grad_norm": 0.32327863574028015, + "learning_rate": 3.497386934673367e-06, + "loss": 0.0006, + "step": 65225 + }, + { + "epoch": 32.87, + "grad_norm": 0.20139850676059723, + "learning_rate": 3.494874371859297e-06, + "loss": 0.0005, + "step": 65250 + }, + { + "epoch": 32.88, + "grad_norm": 0.3941897749900818, + "learning_rate": 3.4923618090452265e-06, + "loss": 0.0003, + "step": 65275 + }, + { + "epoch": 32.9, + "grad_norm": 0.036489930003881454, + "learning_rate": 3.4898492462311563e-06, + "loss": 0.0006, + "step": 65300 + }, + { + "epoch": 32.91, + "grad_norm": 0.09961450099945068, + "learning_rate": 3.4873366834170856e-06, + "loss": 0.0004, + "step": 65325 + }, + { + "epoch": 32.92, + "grad_norm": 0.10360651463270187, + "learning_rate": 3.4848241206030154e-06, + "loss": 0.0004, + "step": 65350 + }, + { + "epoch": 32.93, + "grad_norm": 1.0170046091079712, + "learning_rate": 3.482311557788945e-06, + "loss": 0.0005, + "step": 65375 + }, + { + "epoch": 32.95, + "grad_norm": 0.1749623417854309, + "learning_rate": 3.4797989949748744e-06, + "loss": 0.0003, + "step": 65400 + }, + { + "epoch": 32.96, + "grad_norm": 1.7965832948684692, + "learning_rate": 3.477286432160804e-06, + "loss": 0.0004, + "step": 65425 + }, + { + "epoch": 32.97, + "grad_norm": 0.4475008547306061, + "learning_rate": 3.4747738693467344e-06, + "loss": 0.0004, + "step": 65450 + }, + { + "epoch": 32.98, + "grad_norm": 0.29991576075553894, + "learning_rate": 3.4722613065326637e-06, + "loss": 0.0004, + "step": 65475 + }, + { + "epoch": 33.0, + "grad_norm": 0.0659874826669693, + "learning_rate": 3.4697487437185934e-06, + "loss": 0.0005, + "step": 65500 + }, + { + "epoch": 33.01, + "grad_norm": 0.15273946523666382, + "learning_rate": 3.467336683417086e-06, + "loss": 0.0004, + "step": 65525 + }, + { + "epoch": 33.02, + "grad_norm": 0.09499111026525497, + "learning_rate": 3.464824120603015e-06, + "loss": 0.0005, + "step": 65550 + }, + { + "epoch": 33.04, + "grad_norm": 0.052725620567798615, + "learning_rate": 3.462311557788945e-06, + "loss": 0.0002, + "step": 65575 + }, + { + "epoch": 33.05, + "grad_norm": 0.30198460817337036, + "learning_rate": 3.4597989949748746e-06, + "loss": 0.0002, + "step": 65600 + }, + { + "epoch": 33.06, + "grad_norm": 0.12597815692424774, + "learning_rate": 3.457286432160804e-06, + "loss": 0.0003, + "step": 65625 + }, + { + "epoch": 33.07, + "grad_norm": 0.7220773100852966, + "learning_rate": 3.454773869346734e-06, + "loss": 0.0002, + "step": 65650 + }, + { + "epoch": 33.09, + "grad_norm": 0.14168275892734528, + "learning_rate": 3.452261306532664e-06, + "loss": 0.0002, + "step": 65675 + }, + { + "epoch": 33.1, + "grad_norm": 0.06495074182748795, + "learning_rate": 3.449748743718593e-06, + "loss": 0.0002, + "step": 65700 + }, + { + "epoch": 33.11, + "grad_norm": 0.06904838979244232, + "learning_rate": 3.447236180904523e-06, + "loss": 0.0001, + "step": 65725 + }, + { + "epoch": 33.12, + "grad_norm": 0.08821985125541687, + "learning_rate": 3.4447236180904527e-06, + "loss": 0.0001, + "step": 65750 + }, + { + "epoch": 33.14, + "grad_norm": 0.08516985177993774, + "learning_rate": 3.442211055276382e-06, + "loss": 0.0002, + "step": 65775 + }, + { + "epoch": 33.15, + "grad_norm": 0.05491633340716362, + "learning_rate": 3.4396984924623118e-06, + "loss": 0.0002, + "step": 65800 + }, + { + "epoch": 33.16, + "grad_norm": 0.09583797305822372, + "learning_rate": 3.437185929648241e-06, + "loss": 0.0001, + "step": 65825 + }, + { + "epoch": 33.17, + "grad_norm": 0.046035125851631165, + "learning_rate": 3.4346733668341712e-06, + "loss": 0.0002, + "step": 65850 + }, + { + "epoch": 33.19, + "grad_norm": 0.04913106560707092, + "learning_rate": 3.432160804020101e-06, + "loss": 0.0002, + "step": 65875 + }, + { + "epoch": 33.2, + "grad_norm": 0.0908324271440506, + "learning_rate": 3.4296482412060307e-06, + "loss": 0.0002, + "step": 65900 + }, + { + "epoch": 33.21, + "grad_norm": 0.1352541148662567, + "learning_rate": 3.42713567839196e-06, + "loss": 0.0001, + "step": 65925 + }, + { + "epoch": 33.22, + "grad_norm": 0.12912270426750183, + "learning_rate": 3.42462311557789e-06, + "loss": 0.0002, + "step": 65950 + }, + { + "epoch": 33.24, + "grad_norm": 0.19946007430553436, + "learning_rate": 3.422110552763819e-06, + "loss": 0.0001, + "step": 65975 + }, + { + "epoch": 33.25, + "grad_norm": 0.15442493557929993, + "learning_rate": 3.419597989949749e-06, + "loss": 0.0001, + "step": 66000 + }, + { + "epoch": 33.25, + "eval_loss": 0.37603169679641724, + "eval_runtime": 780.8153, + "eval_samples_per_second": 1.805, + "eval_steps_per_second": 1.805, + "eval_wer": 22.234520927014874, + "step": 66000 + } + ], + "logging_steps": 25, + "max_steps": 100000, + "num_input_tokens_seen": 0, + "num_train_epochs": 51, + "save_steps": 1000, + "total_flos": 2.0547646783488e+20, + "train_batch_size": 48, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/whisper-base/maithili/checkpoint-66000/training_args.bin b/checkpoints/whisper-base/maithili/checkpoint-66000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a67ae7873c02b91aa3048ac79ba72abfa386b691 --- /dev/null +++ b/checkpoints/whisper-base/maithili/checkpoint-66000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:272032a7c884c0d116afaec204a6853471aa52d2d64091c5bc5dbef7616ee016 +size 4667 diff --git a/checkpoints/whisper-base/marathi/checkpoint-19000/config.json b/checkpoints/whisper-base/marathi/checkpoint-19000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8589ece18ad2bf0607466f77951d19324866c734 --- /dev/null +++ b/checkpoints/whisper-base/marathi/checkpoint-19000/config.json @@ -0,0 +1,154 @@ +{ + "_name_or_path": "openai/whisper-base", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 512, + "decoder_attention_heads": 8, + "decoder_ffn_dim": 2048, + "decoder_layerdrop": 0.0, + "decoder_layers": 6, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 8, + "encoder_ffn_dim": 2048, + "encoder_layerdrop": 0.0, + "encoder_layers": 6, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50320 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 6, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "torch_dtype": "float32", + "transformers_version": "4.39.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoints/whisper-base/marathi/checkpoint-19000/generation_config.json b/checkpoints/whisper-base/marathi/checkpoint-19000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a12c5b82ac1e48f22fa79bdad1595064164bc2ab --- /dev/null +++ b/checkpoints/whisper-base/marathi/checkpoint-19000/generation_config.json @@ -0,0 +1,256 @@ +{ + "alignment_heads": [ + [ + 3, + 1 + ], + [ + 4, + 2 + ], + [ + 4, + 3 + ], + [ + 4, + 7 + ], + [ + 5, + 1 + ], + [ + 5, + 2 + ], + [ + 5, + 4 + ], + [ + 5, + 6 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.39.2" +} diff --git a/checkpoints/whisper-base/marathi/checkpoint-19000/model.safetensors b/checkpoints/whisper-base/marathi/checkpoint-19000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..85dccd8c28d046b8d459a0608f88792896d35e49 --- /dev/null +++ b/checkpoints/whisper-base/marathi/checkpoint-19000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbfd78e6f624b2b3465b43e49f88be31515b2b55c69a3952b12b5ab946fd4e0f +size 290403936 diff --git a/checkpoints/whisper-base/marathi/checkpoint-19000/optimizer.pt b/checkpoints/whisper-base/marathi/checkpoint-19000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7d8a126f62d7c1f5acd91787bf7b3b2991ea948b --- /dev/null +++ b/checkpoints/whisper-base/marathi/checkpoint-19000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f5b423e3f82a87f0fdfd05494270d6f9792fb5c9008e3b3169cdd9d330db8cb +size 574811077 diff --git a/checkpoints/whisper-base/marathi/checkpoint-19000/preprocessor_config.json b/checkpoints/whisper-base/marathi/checkpoint-19000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoints/whisper-base/marathi/checkpoint-19000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoints/whisper-base/marathi/checkpoint-19000/rng_state.pth b/checkpoints/whisper-base/marathi/checkpoint-19000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..032f07a7a7831b7587a4732982c7b6d80aeea117 --- /dev/null +++ b/checkpoints/whisper-base/marathi/checkpoint-19000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db1f80ff0467186b1efe074d629e15a625c6f7ce3ae8ddd551ab605b40d8230c +size 14575 diff --git a/checkpoints/whisper-base/marathi/checkpoint-19000/scheduler.pt b/checkpoints/whisper-base/marathi/checkpoint-19000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..43d56000d1ac84b8e5f534ffe0df6c322fb18232 --- /dev/null +++ b/checkpoints/whisper-base/marathi/checkpoint-19000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83fed1705b42d1f883190363472a00d3be8e9e6d5405c281ea94a9cf26f475d3 +size 627 diff --git a/checkpoints/whisper-base/marathi/checkpoint-19000/trainer_state.json b/checkpoints/whisper-base/marathi/checkpoint-19000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3ff209ad1fe1a8c34ec75f9147ad56c75ee19ac0 --- /dev/null +++ b/checkpoints/whisper-base/marathi/checkpoint-19000/trainer_state.json @@ -0,0 +1,5512 @@ +{ + "best_metric": 21.066572488080524, + "best_model_checkpoint": "results/whisper-base/marathi/checkpoint-9000", + "epoch": 9.571788413098236, + "eval_steps": 1000, + "global_step": 19000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "grad_norm": 27.45026206970215, + "learning_rate": 4.6000000000000004e-07, + "loss": 2.392, + "step": 25 + }, + { + "epoch": 0.03, + "grad_norm": 14.487224578857422, + "learning_rate": 9.600000000000001e-07, + "loss": 2.0193, + "step": 50 + }, + { + "epoch": 0.04, + "grad_norm": 9.061480522155762, + "learning_rate": 1.46e-06, + "loss": 1.5277, + "step": 75 + }, + { + "epoch": 0.05, + "grad_norm": 6.201880931854248, + "learning_rate": 1.9600000000000003e-06, + "loss": 1.2013, + "step": 100 + }, + { + "epoch": 0.06, + "grad_norm": 4.9469828605651855, + "learning_rate": 2.46e-06, + "loss": 0.9824, + "step": 125 + }, + { + "epoch": 0.08, + "grad_norm": 4.583366870880127, + "learning_rate": 2.96e-06, + "loss": 0.85, + "step": 150 + }, + { + "epoch": 0.09, + "grad_norm": 4.343874931335449, + "learning_rate": 3.46e-06, + "loss": 0.7533, + "step": 175 + }, + { + "epoch": 0.1, + "grad_norm": 4.21941614151001, + "learning_rate": 3.96e-06, + "loss": 0.6814, + "step": 200 + }, + { + "epoch": 0.11, + "grad_norm": 4.494288444519043, + "learning_rate": 4.4600000000000005e-06, + "loss": 0.6345, + "step": 225 + }, + { + "epoch": 0.13, + "grad_norm": 4.814334869384766, + "learning_rate": 4.960000000000001e-06, + "loss": 0.5919, + "step": 250 + }, + { + "epoch": 0.14, + "grad_norm": 3.8210599422454834, + "learning_rate": 5.460000000000001e-06, + "loss": 0.5562, + "step": 275 + }, + { + "epoch": 0.15, + "grad_norm": 4.545140743255615, + "learning_rate": 5.9600000000000005e-06, + "loss": 0.5353, + "step": 300 + }, + { + "epoch": 0.16, + "grad_norm": 4.103487968444824, + "learning_rate": 6.460000000000001e-06, + "loss": 0.4884, + "step": 325 + }, + { + "epoch": 0.18, + "grad_norm": 3.9907121658325195, + "learning_rate": 6.96e-06, + "loss": 0.4828, + "step": 350 + }, + { + "epoch": 0.19, + "grad_norm": 4.520524024963379, + "learning_rate": 7.4600000000000006e-06, + "loss": 0.4605, + "step": 375 + }, + { + "epoch": 0.2, + "grad_norm": 4.6123456954956055, + "learning_rate": 7.960000000000002e-06, + "loss": 0.4624, + "step": 400 + }, + { + "epoch": 0.21, + "grad_norm": 4.493947982788086, + "learning_rate": 8.46e-06, + "loss": 0.4226, + "step": 425 + }, + { + "epoch": 0.23, + "grad_norm": 4.640024185180664, + "learning_rate": 8.96e-06, + "loss": 0.4175, + "step": 450 + }, + { + "epoch": 0.24, + "grad_norm": 3.806760787963867, + "learning_rate": 9.460000000000001e-06, + "loss": 0.3957, + "step": 475 + }, + { + "epoch": 0.25, + "grad_norm": 4.0242414474487305, + "learning_rate": 9.960000000000001e-06, + "loss": 0.3848, + "step": 500 + }, + { + "epoch": 0.26, + "grad_norm": 4.135085105895996, + "learning_rate": 9.997688442211056e-06, + "loss": 0.3649, + "step": 525 + }, + { + "epoch": 0.28, + "grad_norm": 3.7966971397399902, + "learning_rate": 9.995175879396986e-06, + "loss": 0.3712, + "step": 550 + }, + { + "epoch": 0.29, + "grad_norm": 4.147885799407959, + "learning_rate": 9.992663316582915e-06, + "loss": 0.3625, + "step": 575 + }, + { + "epoch": 0.3, + "grad_norm": 4.364108562469482, + "learning_rate": 9.990150753768844e-06, + "loss": 0.3617, + "step": 600 + }, + { + "epoch": 0.31, + "grad_norm": 3.5569663047790527, + "learning_rate": 9.987638190954775e-06, + "loss": 0.348, + "step": 625 + }, + { + "epoch": 0.33, + "grad_norm": 3.975032329559326, + "learning_rate": 9.985125628140705e-06, + "loss": 0.3373, + "step": 650 + }, + { + "epoch": 0.34, + "grad_norm": 3.59627103805542, + "learning_rate": 9.982613065326634e-06, + "loss": 0.3267, + "step": 675 + }, + { + "epoch": 0.35, + "grad_norm": 3.887047052383423, + "learning_rate": 9.980100502512565e-06, + "loss": 0.3272, + "step": 700 + }, + { + "epoch": 0.37, + "grad_norm": 3.7019553184509277, + "learning_rate": 9.977587939698493e-06, + "loss": 0.3176, + "step": 725 + }, + { + "epoch": 0.38, + "grad_norm": 3.656590223312378, + "learning_rate": 9.975075376884424e-06, + "loss": 0.3064, + "step": 750 + }, + { + "epoch": 0.39, + "grad_norm": 3.574428081512451, + "learning_rate": 9.972562814070353e-06, + "loss": 0.3042, + "step": 775 + }, + { + "epoch": 0.4, + "grad_norm": 3.6685123443603516, + "learning_rate": 9.970050251256282e-06, + "loss": 0.2964, + "step": 800 + }, + { + "epoch": 0.42, + "grad_norm": 3.850036859512329, + "learning_rate": 9.967537688442212e-06, + "loss": 0.298, + "step": 825 + }, + { + "epoch": 0.43, + "grad_norm": 4.104265213012695, + "learning_rate": 9.965025125628141e-06, + "loss": 0.2937, + "step": 850 + }, + { + "epoch": 0.44, + "grad_norm": 3.6571717262268066, + "learning_rate": 9.96251256281407e-06, + "loss": 0.2943, + "step": 875 + }, + { + "epoch": 0.45, + "grad_norm": 3.770433187484741, + "learning_rate": 9.960000000000001e-06, + "loss": 0.2819, + "step": 900 + }, + { + "epoch": 0.47, + "grad_norm": 3.559394359588623, + "learning_rate": 9.95748743718593e-06, + "loss": 0.2814, + "step": 925 + }, + { + "epoch": 0.48, + "grad_norm": 3.674076795578003, + "learning_rate": 9.95497487437186e-06, + "loss": 0.2729, + "step": 950 + }, + { + "epoch": 0.49, + "grad_norm": 3.6425929069519043, + "learning_rate": 9.952462311557791e-06, + "loss": 0.2805, + "step": 975 + }, + { + "epoch": 0.5, + "grad_norm": 3.135524272918701, + "learning_rate": 9.949949748743718e-06, + "loss": 0.2712, + "step": 1000 + }, + { + "epoch": 0.5, + "eval_loss": 0.17855150997638702, + "eval_runtime": 680.9509, + "eval_samples_per_second": 2.035, + "eval_steps_per_second": 2.035, + "eval_wer": 37.46247571958326, + "step": 1000 + }, + { + "epoch": 0.52, + "grad_norm": 3.5001885890960693, + "learning_rate": 9.94743718592965e-06, + "loss": 0.2696, + "step": 1025 + }, + { + "epoch": 0.53, + "grad_norm": 3.610868453979492, + "learning_rate": 9.944924623115579e-06, + "loss": 0.2623, + "step": 1050 + }, + { + "epoch": 0.54, + "grad_norm": 3.7076351642608643, + "learning_rate": 9.942412060301508e-06, + "loss": 0.2676, + "step": 1075 + }, + { + "epoch": 0.55, + "grad_norm": 3.7802183628082275, + "learning_rate": 9.93989949748744e-06, + "loss": 0.2566, + "step": 1100 + }, + { + "epoch": 0.57, + "grad_norm": 3.6095058917999268, + "learning_rate": 9.937386934673367e-06, + "loss": 0.2453, + "step": 1125 + }, + { + "epoch": 0.58, + "grad_norm": 3.620966672897339, + "learning_rate": 9.934874371859298e-06, + "loss": 0.2466, + "step": 1150 + }, + { + "epoch": 0.59, + "grad_norm": 3.828181743621826, + "learning_rate": 9.932361809045227e-06, + "loss": 0.2522, + "step": 1175 + }, + { + "epoch": 0.6, + "grad_norm": 3.211886405944824, + "learning_rate": 9.929849246231156e-06, + "loss": 0.2415, + "step": 1200 + }, + { + "epoch": 0.62, + "grad_norm": 3.7555296421051025, + "learning_rate": 9.927336683417086e-06, + "loss": 0.2441, + "step": 1225 + }, + { + "epoch": 0.63, + "grad_norm": 3.3063595294952393, + "learning_rate": 9.924824120603017e-06, + "loss": 0.2476, + "step": 1250 + }, + { + "epoch": 0.64, + "grad_norm": 4.333239555358887, + "learning_rate": 9.922311557788944e-06, + "loss": 0.2404, + "step": 1275 + }, + { + "epoch": 0.65, + "grad_norm": 3.6734941005706787, + "learning_rate": 9.919798994974875e-06, + "loss": 0.2433, + "step": 1300 + }, + { + "epoch": 0.67, + "grad_norm": 3.4325389862060547, + "learning_rate": 9.917286432160805e-06, + "loss": 0.2389, + "step": 1325 + }, + { + "epoch": 0.68, + "grad_norm": 3.4099912643432617, + "learning_rate": 9.914773869346734e-06, + "loss": 0.2377, + "step": 1350 + }, + { + "epoch": 0.69, + "grad_norm": 3.6876649856567383, + "learning_rate": 9.912261306532665e-06, + "loss": 0.2337, + "step": 1375 + }, + { + "epoch": 0.71, + "grad_norm": 3.756155490875244, + "learning_rate": 9.909748743718593e-06, + "loss": 0.2294, + "step": 1400 + }, + { + "epoch": 0.72, + "grad_norm": 3.12317156791687, + "learning_rate": 9.907236180904524e-06, + "loss": 0.2302, + "step": 1425 + }, + { + "epoch": 0.73, + "grad_norm": 3.2286078929901123, + "learning_rate": 9.904723618090453e-06, + "loss": 0.2261, + "step": 1450 + }, + { + "epoch": 0.74, + "grad_norm": 3.188397169113159, + "learning_rate": 9.902211055276382e-06, + "loss": 0.2244, + "step": 1475 + }, + { + "epoch": 0.76, + "grad_norm": 3.2237186431884766, + "learning_rate": 9.899698492462312e-06, + "loss": 0.2291, + "step": 1500 + }, + { + "epoch": 0.77, + "grad_norm": 3.088027238845825, + "learning_rate": 9.897185929648243e-06, + "loss": 0.2344, + "step": 1525 + }, + { + "epoch": 0.78, + "grad_norm": 3.318286895751953, + "learning_rate": 9.894673366834172e-06, + "loss": 0.2162, + "step": 1550 + }, + { + "epoch": 0.79, + "grad_norm": 3.080158233642578, + "learning_rate": 9.892160804020101e-06, + "loss": 0.2142, + "step": 1575 + }, + { + "epoch": 0.81, + "grad_norm": 3.544671058654785, + "learning_rate": 9.88964824120603e-06, + "loss": 0.2217, + "step": 1600 + }, + { + "epoch": 0.82, + "grad_norm": 3.2301149368286133, + "learning_rate": 9.88713567839196e-06, + "loss": 0.2274, + "step": 1625 + }, + { + "epoch": 0.83, + "grad_norm": 3.344430923461914, + "learning_rate": 9.884623115577891e-06, + "loss": 0.2153, + "step": 1650 + }, + { + "epoch": 0.84, + "grad_norm": 3.3081424236297607, + "learning_rate": 9.882110552763819e-06, + "loss": 0.2094, + "step": 1675 + }, + { + "epoch": 0.86, + "grad_norm": 3.4240942001342773, + "learning_rate": 9.87959798994975e-06, + "loss": 0.2155, + "step": 1700 + }, + { + "epoch": 0.87, + "grad_norm": 3.0736613273620605, + "learning_rate": 9.877085427135679e-06, + "loss": 0.213, + "step": 1725 + }, + { + "epoch": 0.88, + "grad_norm": 3.1081628799438477, + "learning_rate": 9.874572864321608e-06, + "loss": 0.21, + "step": 1750 + }, + { + "epoch": 0.89, + "grad_norm": 3.522641658782959, + "learning_rate": 9.87206030150754e-06, + "loss": 0.2114, + "step": 1775 + }, + { + "epoch": 0.91, + "grad_norm": 3.2231297492980957, + "learning_rate": 9.869547738693469e-06, + "loss": 0.2111, + "step": 1800 + }, + { + "epoch": 0.92, + "grad_norm": 2.9056851863861084, + "learning_rate": 9.867035175879398e-06, + "loss": 0.2065, + "step": 1825 + }, + { + "epoch": 0.93, + "grad_norm": 3.1861538887023926, + "learning_rate": 9.864522613065327e-06, + "loss": 0.2035, + "step": 1850 + }, + { + "epoch": 0.94, + "grad_norm": 2.6379261016845703, + "learning_rate": 9.862010050251257e-06, + "loss": 0.197, + "step": 1875 + }, + { + "epoch": 0.96, + "grad_norm": 3.7076098918914795, + "learning_rate": 9.859497487437186e-06, + "loss": 0.2049, + "step": 1900 + }, + { + "epoch": 0.97, + "grad_norm": 3.373297691345215, + "learning_rate": 9.856984924623117e-06, + "loss": 0.2045, + "step": 1925 + }, + { + "epoch": 0.98, + "grad_norm": 2.997976303100586, + "learning_rate": 9.854472361809046e-06, + "loss": 0.1994, + "step": 1950 + }, + { + "epoch": 0.99, + "grad_norm": 3.2397022247314453, + "learning_rate": 9.851959798994976e-06, + "loss": 0.2026, + "step": 1975 + }, + { + "epoch": 1.01, + "grad_norm": 3.2261569499969482, + "learning_rate": 9.849447236180905e-06, + "loss": 0.1858, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.13345558941364288, + "eval_runtime": 831.0847, + "eval_samples_per_second": 1.668, + "eval_steps_per_second": 1.668, + "eval_wer": 28.65089175348755, + "step": 2000 + }, + { + "epoch": 1.02, + "grad_norm": 3.0637896060943604, + "learning_rate": 9.846934673366834e-06, + "loss": 0.185, + "step": 2025 + }, + { + "epoch": 1.03, + "grad_norm": 3.2531063556671143, + "learning_rate": 9.844422110552765e-06, + "loss": 0.1776, + "step": 2050 + }, + { + "epoch": 1.05, + "grad_norm": 3.188401699066162, + "learning_rate": 9.841909547738695e-06, + "loss": 0.1759, + "step": 2075 + }, + { + "epoch": 1.06, + "grad_norm": 3.404390335083008, + "learning_rate": 9.839396984924624e-06, + "loss": 0.1807, + "step": 2100 + }, + { + "epoch": 1.07, + "grad_norm": 3.146895170211792, + "learning_rate": 9.836884422110553e-06, + "loss": 0.1753, + "step": 2125 + }, + { + "epoch": 1.08, + "grad_norm": 3.1752853393554688, + "learning_rate": 9.834371859296483e-06, + "loss": 0.1696, + "step": 2150 + }, + { + "epoch": 1.1, + "grad_norm": 3.372060537338257, + "learning_rate": 9.831859296482414e-06, + "loss": 0.1729, + "step": 2175 + }, + { + "epoch": 1.11, + "grad_norm": 2.982743501663208, + "learning_rate": 9.829346733668343e-06, + "loss": 0.1652, + "step": 2200 + }, + { + "epoch": 1.12, + "grad_norm": 3.1472949981689453, + "learning_rate": 9.826834170854272e-06, + "loss": 0.1737, + "step": 2225 + }, + { + "epoch": 1.13, + "grad_norm": 2.841750383377075, + "learning_rate": 9.824321608040202e-06, + "loss": 0.18, + "step": 2250 + }, + { + "epoch": 1.15, + "grad_norm": 2.7576141357421875, + "learning_rate": 9.821809045226131e-06, + "loss": 0.1738, + "step": 2275 + }, + { + "epoch": 1.16, + "grad_norm": 2.5741350650787354, + "learning_rate": 9.81929648241206e-06, + "loss": 0.1733, + "step": 2300 + }, + { + "epoch": 1.17, + "grad_norm": 3.1259639263153076, + "learning_rate": 9.816783919597991e-06, + "loss": 0.1711, + "step": 2325 + }, + { + "epoch": 1.18, + "grad_norm": 2.9985835552215576, + "learning_rate": 9.81427135678392e-06, + "loss": 0.1732, + "step": 2350 + }, + { + "epoch": 1.2, + "grad_norm": 2.800180196762085, + "learning_rate": 9.81175879396985e-06, + "loss": 0.1702, + "step": 2375 + }, + { + "epoch": 1.21, + "grad_norm": 3.3550591468811035, + "learning_rate": 9.809246231155781e-06, + "loss": 0.1684, + "step": 2400 + }, + { + "epoch": 1.22, + "grad_norm": 3.0946404933929443, + "learning_rate": 9.806733668341709e-06, + "loss": 0.1711, + "step": 2425 + }, + { + "epoch": 1.23, + "grad_norm": 2.5824248790740967, + "learning_rate": 9.80422110552764e-06, + "loss": 0.1674, + "step": 2450 + }, + { + "epoch": 1.25, + "grad_norm": 3.285874843597412, + "learning_rate": 9.801708542713569e-06, + "loss": 0.1664, + "step": 2475 + }, + { + "epoch": 1.26, + "grad_norm": 2.6884765625, + "learning_rate": 9.799195979899498e-06, + "loss": 0.1639, + "step": 2500 + }, + { + "epoch": 1.27, + "grad_norm": 3.2607779502868652, + "learning_rate": 9.796683417085428e-06, + "loss": 0.1666, + "step": 2525 + }, + { + "epoch": 1.28, + "grad_norm": 3.109363079071045, + "learning_rate": 9.794170854271357e-06, + "loss": 0.1661, + "step": 2550 + }, + { + "epoch": 1.3, + "grad_norm": 2.6563594341278076, + "learning_rate": 9.791658291457288e-06, + "loss": 0.1612, + "step": 2575 + }, + { + "epoch": 1.31, + "grad_norm": 3.085439920425415, + "learning_rate": 9.789145728643217e-06, + "loss": 0.1658, + "step": 2600 + }, + { + "epoch": 1.32, + "grad_norm": 2.561068534851074, + "learning_rate": 9.786633165829147e-06, + "loss": 0.1664, + "step": 2625 + }, + { + "epoch": 1.34, + "grad_norm": 3.1549007892608643, + "learning_rate": 9.784120603015076e-06, + "loss": 0.1629, + "step": 2650 + }, + { + "epoch": 1.35, + "grad_norm": 3.237643003463745, + "learning_rate": 9.781608040201007e-06, + "loss": 0.1687, + "step": 2675 + }, + { + "epoch": 1.36, + "grad_norm": 2.8987040519714355, + "learning_rate": 9.779095477386934e-06, + "loss": 0.1675, + "step": 2700 + }, + { + "epoch": 1.37, + "grad_norm": 3.138195037841797, + "learning_rate": 9.776582914572866e-06, + "loss": 0.1623, + "step": 2725 + }, + { + "epoch": 1.39, + "grad_norm": 2.895622968673706, + "learning_rate": 9.774070351758795e-06, + "loss": 0.1648, + "step": 2750 + }, + { + "epoch": 1.4, + "grad_norm": 2.9179725646972656, + "learning_rate": 9.771557788944724e-06, + "loss": 0.1628, + "step": 2775 + }, + { + "epoch": 1.41, + "grad_norm": 2.7177937030792236, + "learning_rate": 9.769045226130655e-06, + "loss": 0.1556, + "step": 2800 + }, + { + "epoch": 1.42, + "grad_norm": 2.789480209350586, + "learning_rate": 9.766532663316583e-06, + "loss": 0.1603, + "step": 2825 + }, + { + "epoch": 1.44, + "grad_norm": 3.071665048599243, + "learning_rate": 9.764020100502514e-06, + "loss": 0.1566, + "step": 2850 + }, + { + "epoch": 1.45, + "grad_norm": 3.047726631164551, + "learning_rate": 9.761507537688443e-06, + "loss": 0.1588, + "step": 2875 + }, + { + "epoch": 1.46, + "grad_norm": 3.0661559104919434, + "learning_rate": 9.758994974874372e-06, + "loss": 0.1567, + "step": 2900 + }, + { + "epoch": 1.47, + "grad_norm": 3.447265863418579, + "learning_rate": 9.756482412060302e-06, + "loss": 0.1564, + "step": 2925 + }, + { + "epoch": 1.49, + "grad_norm": 2.684826374053955, + "learning_rate": 9.753969849246233e-06, + "loss": 0.1532, + "step": 2950 + }, + { + "epoch": 1.5, + "grad_norm": 2.6312644481658936, + "learning_rate": 9.75145728643216e-06, + "loss": 0.1551, + "step": 2975 + }, + { + "epoch": 1.51, + "grad_norm": 2.590045213699341, + "learning_rate": 9.748944723618091e-06, + "loss": 0.1575, + "step": 3000 + }, + { + "epoch": 1.51, + "eval_loss": 0.11542285233736038, + "eval_runtime": 823.5205, + "eval_samples_per_second": 1.683, + "eval_steps_per_second": 1.683, + "eval_wer": 24.783683559950557, + "step": 3000 + }, + { + "epoch": 1.52, + "grad_norm": 2.4911344051361084, + "learning_rate": 9.74643216080402e-06, + "loss": 0.1538, + "step": 3025 + }, + { + "epoch": 1.54, + "grad_norm": 3.283472776412964, + "learning_rate": 9.74391959798995e-06, + "loss": 0.153, + "step": 3050 + }, + { + "epoch": 1.55, + "grad_norm": 2.721425771713257, + "learning_rate": 9.741407035175881e-06, + "loss": 0.1584, + "step": 3075 + }, + { + "epoch": 1.56, + "grad_norm": 3.2605483531951904, + "learning_rate": 9.738894472361809e-06, + "loss": 0.1528, + "step": 3100 + }, + { + "epoch": 1.57, + "grad_norm": 2.660555362701416, + "learning_rate": 9.73638190954774e-06, + "loss": 0.1533, + "step": 3125 + }, + { + "epoch": 1.59, + "grad_norm": 3.2440178394317627, + "learning_rate": 9.733869346733669e-06, + "loss": 0.1481, + "step": 3150 + }, + { + "epoch": 1.6, + "grad_norm": 3.1019392013549805, + "learning_rate": 9.731356783919598e-06, + "loss": 0.1522, + "step": 3175 + }, + { + "epoch": 1.61, + "grad_norm": 2.9011878967285156, + "learning_rate": 9.72884422110553e-06, + "loss": 0.1533, + "step": 3200 + }, + { + "epoch": 1.62, + "grad_norm": 2.7694664001464844, + "learning_rate": 9.726331658291459e-06, + "loss": 0.1509, + "step": 3225 + }, + { + "epoch": 1.64, + "grad_norm": 2.4036076068878174, + "learning_rate": 9.723819095477388e-06, + "loss": 0.1515, + "step": 3250 + }, + { + "epoch": 1.65, + "grad_norm": 2.97019362449646, + "learning_rate": 9.721306532663317e-06, + "loss": 0.1543, + "step": 3275 + }, + { + "epoch": 1.66, + "grad_norm": 3.6240720748901367, + "learning_rate": 9.718793969849247e-06, + "loss": 0.1569, + "step": 3300 + }, + { + "epoch": 1.68, + "grad_norm": 3.1889963150024414, + "learning_rate": 9.716281407035176e-06, + "loss": 0.1527, + "step": 3325 + }, + { + "epoch": 1.69, + "grad_norm": 3.439154863357544, + "learning_rate": 9.713768844221107e-06, + "loss": 0.1493, + "step": 3350 + }, + { + "epoch": 1.7, + "grad_norm": 2.845076084136963, + "learning_rate": 9.711256281407035e-06, + "loss": 0.153, + "step": 3375 + }, + { + "epoch": 1.71, + "grad_norm": 2.901420831680298, + "learning_rate": 9.708743718592966e-06, + "loss": 0.1483, + "step": 3400 + }, + { + "epoch": 1.73, + "grad_norm": 3.0620715618133545, + "learning_rate": 9.706231155778895e-06, + "loss": 0.1482, + "step": 3425 + }, + { + "epoch": 1.74, + "grad_norm": 2.7020177841186523, + "learning_rate": 9.703718592964824e-06, + "loss": 0.1505, + "step": 3450 + }, + { + "epoch": 1.75, + "grad_norm": 2.7704052925109863, + "learning_rate": 9.701206030150755e-06, + "loss": 0.1448, + "step": 3475 + }, + { + "epoch": 1.76, + "grad_norm": 2.8301570415496826, + "learning_rate": 9.698693467336685e-06, + "loss": 0.1391, + "step": 3500 + }, + { + "epoch": 1.78, + "grad_norm": 3.325826644897461, + "learning_rate": 9.696180904522614e-06, + "loss": 0.1437, + "step": 3525 + }, + { + "epoch": 1.79, + "grad_norm": 2.8134355545043945, + "learning_rate": 9.693668341708543e-06, + "loss": 0.1443, + "step": 3550 + }, + { + "epoch": 1.8, + "grad_norm": 2.9018640518188477, + "learning_rate": 9.691155778894473e-06, + "loss": 0.1473, + "step": 3575 + }, + { + "epoch": 1.81, + "grad_norm": 2.7262465953826904, + "learning_rate": 9.688643216080402e-06, + "loss": 0.1495, + "step": 3600 + }, + { + "epoch": 1.83, + "grad_norm": 2.6015288829803467, + "learning_rate": 9.686130653266333e-06, + "loss": 0.1416, + "step": 3625 + }, + { + "epoch": 1.84, + "grad_norm": 2.819683074951172, + "learning_rate": 9.683618090452262e-06, + "loss": 0.1463, + "step": 3650 + }, + { + "epoch": 1.85, + "grad_norm": 2.581345796585083, + "learning_rate": 9.681105527638192e-06, + "loss": 0.1377, + "step": 3675 + }, + { + "epoch": 1.86, + "grad_norm": 3.0245673656463623, + "learning_rate": 9.678592964824121e-06, + "loss": 0.1459, + "step": 3700 + }, + { + "epoch": 1.88, + "grad_norm": 2.959941864013672, + "learning_rate": 9.67608040201005e-06, + "loss": 0.1418, + "step": 3725 + }, + { + "epoch": 1.89, + "grad_norm": 2.983257293701172, + "learning_rate": 9.673567839195981e-06, + "loss": 0.1392, + "step": 3750 + }, + { + "epoch": 1.9, + "grad_norm": 2.7178707122802734, + "learning_rate": 9.67105527638191e-06, + "loss": 0.136, + "step": 3775 + }, + { + "epoch": 1.91, + "grad_norm": 3.262977361679077, + "learning_rate": 9.66854271356784e-06, + "loss": 0.1461, + "step": 3800 + }, + { + "epoch": 1.93, + "grad_norm": 3.0053024291992188, + "learning_rate": 9.666030150753771e-06, + "loss": 0.1401, + "step": 3825 + }, + { + "epoch": 1.94, + "grad_norm": 2.8263909816741943, + "learning_rate": 9.663517587939699e-06, + "loss": 0.1463, + "step": 3850 + }, + { + "epoch": 1.95, + "grad_norm": 3.164536952972412, + "learning_rate": 9.66100502512563e-06, + "loss": 0.1487, + "step": 3875 + }, + { + "epoch": 1.96, + "grad_norm": 2.9771502017974854, + "learning_rate": 9.658492462311559e-06, + "loss": 0.1413, + "step": 3900 + }, + { + "epoch": 1.98, + "grad_norm": 2.8471906185150146, + "learning_rate": 9.655979899497488e-06, + "loss": 0.1377, + "step": 3925 + }, + { + "epoch": 1.99, + "grad_norm": 2.9240942001342773, + "learning_rate": 9.653467336683418e-06, + "loss": 0.1354, + "step": 3950 + }, + { + "epoch": 2.0, + "grad_norm": 2.4545745849609375, + "learning_rate": 9.650954773869347e-06, + "loss": 0.1332, + "step": 3975 + }, + { + "epoch": 2.02, + "grad_norm": 2.752307653427124, + "learning_rate": 9.648442211055276e-06, + "loss": 0.1193, + "step": 4000 + }, + { + "epoch": 2.02, + "eval_loss": 0.10661883652210236, + "eval_runtime": 830.7807, + "eval_samples_per_second": 1.668, + "eval_steps_per_second": 1.668, + "eval_wer": 23.38866325269292, + "step": 4000 + }, + { + "epoch": 2.03, + "grad_norm": 2.6383631229400635, + "learning_rate": 9.645929648241207e-06, + "loss": 0.1187, + "step": 4025 + }, + { + "epoch": 2.04, + "grad_norm": 2.845022201538086, + "learning_rate": 9.643417085427137e-06, + "loss": 0.1163, + "step": 4050 + }, + { + "epoch": 2.05, + "grad_norm": 2.8194046020507812, + "learning_rate": 9.640904522613066e-06, + "loss": 0.117, + "step": 4075 + }, + { + "epoch": 2.07, + "grad_norm": 2.6809606552124023, + "learning_rate": 9.638391959798997e-06, + "loss": 0.1099, + "step": 4100 + }, + { + "epoch": 2.08, + "grad_norm": 2.5562992095947266, + "learning_rate": 9.635879396984925e-06, + "loss": 0.1169, + "step": 4125 + }, + { + "epoch": 2.09, + "grad_norm": 2.6408324241638184, + "learning_rate": 9.633366834170856e-06, + "loss": 0.1179, + "step": 4150 + }, + { + "epoch": 2.1, + "grad_norm": 2.5334956645965576, + "learning_rate": 9.630854271356785e-06, + "loss": 0.1093, + "step": 4175 + }, + { + "epoch": 2.12, + "grad_norm": 2.9643609523773193, + "learning_rate": 9.628341708542714e-06, + "loss": 0.1172, + "step": 4200 + }, + { + "epoch": 2.13, + "grad_norm": 2.4382715225219727, + "learning_rate": 9.625829145728644e-06, + "loss": 0.1163, + "step": 4225 + }, + { + "epoch": 2.14, + "grad_norm": 2.6148650646209717, + "learning_rate": 9.623316582914573e-06, + "loss": 0.1155, + "step": 4250 + }, + { + "epoch": 2.15, + "grad_norm": 2.858917713165283, + "learning_rate": 9.620804020100504e-06, + "loss": 0.1164, + "step": 4275 + }, + { + "epoch": 2.17, + "grad_norm": 2.849001169204712, + "learning_rate": 9.618291457286433e-06, + "loss": 0.1164, + "step": 4300 + }, + { + "epoch": 2.18, + "grad_norm": 3.064525842666626, + "learning_rate": 9.615778894472363e-06, + "loss": 0.1172, + "step": 4325 + }, + { + "epoch": 2.19, + "grad_norm": 2.692760467529297, + "learning_rate": 9.613266331658292e-06, + "loss": 0.1152, + "step": 4350 + }, + { + "epoch": 2.2, + "grad_norm": 3.196589469909668, + "learning_rate": 9.610753768844223e-06, + "loss": 0.1217, + "step": 4375 + }, + { + "epoch": 2.22, + "grad_norm": 2.650524377822876, + "learning_rate": 9.60824120603015e-06, + "loss": 0.1121, + "step": 4400 + }, + { + "epoch": 2.23, + "grad_norm": 2.8220608234405518, + "learning_rate": 9.605728643216082e-06, + "loss": 0.117, + "step": 4425 + }, + { + "epoch": 2.24, + "grad_norm": 2.7866902351379395, + "learning_rate": 9.60321608040201e-06, + "loss": 0.1152, + "step": 4450 + }, + { + "epoch": 2.25, + "grad_norm": 2.7710187435150146, + "learning_rate": 9.60070351758794e-06, + "loss": 0.1138, + "step": 4475 + }, + { + "epoch": 2.27, + "grad_norm": 2.7889788150787354, + "learning_rate": 9.598190954773871e-06, + "loss": 0.1153, + "step": 4500 + }, + { + "epoch": 2.28, + "grad_norm": 2.7566347122192383, + "learning_rate": 9.595678391959799e-06, + "loss": 0.1131, + "step": 4525 + }, + { + "epoch": 2.29, + "grad_norm": 2.254423141479492, + "learning_rate": 9.59316582914573e-06, + "loss": 0.1167, + "step": 4550 + }, + { + "epoch": 2.3, + "grad_norm": 2.9766008853912354, + "learning_rate": 9.59065326633166e-06, + "loss": 0.1129, + "step": 4575 + }, + { + "epoch": 2.32, + "grad_norm": 2.869935989379883, + "learning_rate": 9.588140703517588e-06, + "loss": 0.1153, + "step": 4600 + }, + { + "epoch": 2.33, + "grad_norm": 3.0799245834350586, + "learning_rate": 9.585628140703518e-06, + "loss": 0.115, + "step": 4625 + }, + { + "epoch": 2.34, + "grad_norm": 2.8410706520080566, + "learning_rate": 9.583115577889449e-06, + "loss": 0.1143, + "step": 4650 + }, + { + "epoch": 2.36, + "grad_norm": 3.1906578540802, + "learning_rate": 9.580603015075378e-06, + "loss": 0.1169, + "step": 4675 + }, + { + "epoch": 2.37, + "grad_norm": 2.7719221115112305, + "learning_rate": 9.578090452261307e-06, + "loss": 0.116, + "step": 4700 + }, + { + "epoch": 2.38, + "grad_norm": 2.6311252117156982, + "learning_rate": 9.575577889447237e-06, + "loss": 0.1141, + "step": 4725 + }, + { + "epoch": 2.39, + "grad_norm": 2.827535390853882, + "learning_rate": 9.573065326633166e-06, + "loss": 0.1108, + "step": 4750 + }, + { + "epoch": 2.41, + "grad_norm": 3.1737442016601562, + "learning_rate": 9.570552763819097e-06, + "loss": 0.1108, + "step": 4775 + }, + { + "epoch": 2.42, + "grad_norm": 2.572115659713745, + "learning_rate": 9.568040201005025e-06, + "loss": 0.1092, + "step": 4800 + }, + { + "epoch": 2.43, + "grad_norm": 2.936357021331787, + "learning_rate": 9.565527638190956e-06, + "loss": 0.1104, + "step": 4825 + }, + { + "epoch": 2.44, + "grad_norm": 2.8647677898406982, + "learning_rate": 9.563015075376885e-06, + "loss": 0.1102, + "step": 4850 + }, + { + "epoch": 2.46, + "grad_norm": 2.1833436489105225, + "learning_rate": 9.560502512562814e-06, + "loss": 0.1101, + "step": 4875 + }, + { + "epoch": 2.47, + "grad_norm": 2.927780866622925, + "learning_rate": 9.557989949748745e-06, + "loss": 0.111, + "step": 4900 + }, + { + "epoch": 2.48, + "grad_norm": 2.7894015312194824, + "learning_rate": 9.555477386934675e-06, + "loss": 0.112, + "step": 4925 + }, + { + "epoch": 2.49, + "grad_norm": 2.5226616859436035, + "learning_rate": 9.552964824120604e-06, + "loss": 0.1104, + "step": 4950 + }, + { + "epoch": 2.51, + "grad_norm": 2.9857852458953857, + "learning_rate": 9.550452261306533e-06, + "loss": 0.1082, + "step": 4975 + }, + { + "epoch": 2.52, + "grad_norm": 3.424072504043579, + "learning_rate": 9.547939698492463e-06, + "loss": 0.1162, + "step": 5000 + }, + { + "epoch": 2.52, + "eval_loss": 0.10395942628383636, + "eval_runtime": 684.5513, + "eval_samples_per_second": 2.025, + "eval_steps_per_second": 2.025, + "eval_wer": 22.090764612396256, + "step": 5000 + }, + { + "epoch": 2.53, + "grad_norm": 2.9294180870056152, + "learning_rate": 9.545427135678392e-06, + "loss": 0.1176, + "step": 5025 + }, + { + "epoch": 2.54, + "grad_norm": 2.3632888793945312, + "learning_rate": 9.542914572864323e-06, + "loss": 0.1081, + "step": 5050 + }, + { + "epoch": 2.56, + "grad_norm": 2.7226736545562744, + "learning_rate": 9.540402010050252e-06, + "loss": 0.1119, + "step": 5075 + }, + { + "epoch": 2.57, + "grad_norm": 2.382338523864746, + "learning_rate": 9.537889447236182e-06, + "loss": 0.1086, + "step": 5100 + }, + { + "epoch": 2.58, + "grad_norm": 2.155691385269165, + "learning_rate": 9.535376884422111e-06, + "loss": 0.1081, + "step": 5125 + }, + { + "epoch": 2.59, + "grad_norm": 3.0607807636260986, + "learning_rate": 9.53286432160804e-06, + "loss": 0.1083, + "step": 5150 + }, + { + "epoch": 2.61, + "grad_norm": 2.922677755355835, + "learning_rate": 9.530351758793971e-06, + "loss": 0.1095, + "step": 5175 + }, + { + "epoch": 2.62, + "grad_norm": 2.5065267086029053, + "learning_rate": 9.5278391959799e-06, + "loss": 0.1062, + "step": 5200 + }, + { + "epoch": 2.63, + "grad_norm": 2.9763593673706055, + "learning_rate": 9.52532663316583e-06, + "loss": 0.1048, + "step": 5225 + }, + { + "epoch": 2.64, + "grad_norm": 3.1285924911499023, + "learning_rate": 9.52281407035176e-06, + "loss": 0.112, + "step": 5250 + }, + { + "epoch": 2.66, + "grad_norm": 2.183452844619751, + "learning_rate": 9.520301507537689e-06, + "loss": 0.1095, + "step": 5275 + }, + { + "epoch": 2.67, + "grad_norm": 2.8173439502716064, + "learning_rate": 9.51778894472362e-06, + "loss": 0.108, + "step": 5300 + }, + { + "epoch": 2.68, + "grad_norm": 3.2343697547912598, + "learning_rate": 9.515276381909549e-06, + "loss": 0.1112, + "step": 5325 + }, + { + "epoch": 2.7, + "grad_norm": 2.542264699935913, + "learning_rate": 9.512763819095478e-06, + "loss": 0.1039, + "step": 5350 + }, + { + "epoch": 2.71, + "grad_norm": 2.498128652572632, + "learning_rate": 9.510251256281408e-06, + "loss": 0.1059, + "step": 5375 + }, + { + "epoch": 2.72, + "grad_norm": 2.389651298522949, + "learning_rate": 9.507738693467337e-06, + "loss": 0.1075, + "step": 5400 + }, + { + "epoch": 2.73, + "grad_norm": 2.627925157546997, + "learning_rate": 9.505226130653266e-06, + "loss": 0.1089, + "step": 5425 + }, + { + "epoch": 2.75, + "grad_norm": 2.7367215156555176, + "learning_rate": 9.502713567839197e-06, + "loss": 0.1145, + "step": 5450 + }, + { + "epoch": 2.76, + "grad_norm": 2.7536168098449707, + "learning_rate": 9.500201005025127e-06, + "loss": 0.1072, + "step": 5475 + }, + { + "epoch": 2.77, + "grad_norm": 2.6179287433624268, + "learning_rate": 9.497688442211056e-06, + "loss": 0.1093, + "step": 5500 + }, + { + "epoch": 2.78, + "grad_norm": 2.66959810256958, + "learning_rate": 9.495175879396987e-06, + "loss": 0.1112, + "step": 5525 + }, + { + "epoch": 2.8, + "grad_norm": 2.562532663345337, + "learning_rate": 9.492663316582915e-06, + "loss": 0.1001, + "step": 5550 + }, + { + "epoch": 2.81, + "grad_norm": 2.526099443435669, + "learning_rate": 9.490150753768846e-06, + "loss": 0.1077, + "step": 5575 + }, + { + "epoch": 2.82, + "grad_norm": 2.9809153079986572, + "learning_rate": 9.487638190954775e-06, + "loss": 0.1102, + "step": 5600 + }, + { + "epoch": 2.83, + "grad_norm": 2.830005645751953, + "learning_rate": 9.485125628140704e-06, + "loss": 0.1052, + "step": 5625 + }, + { + "epoch": 2.85, + "grad_norm": 2.7690727710723877, + "learning_rate": 9.482613065326634e-06, + "loss": 0.1076, + "step": 5650 + }, + { + "epoch": 2.86, + "grad_norm": 2.552865505218506, + "learning_rate": 9.480100502512563e-06, + "loss": 0.1057, + "step": 5675 + }, + { + "epoch": 2.87, + "grad_norm": 2.65742826461792, + "learning_rate": 9.477587939698494e-06, + "loss": 0.1021, + "step": 5700 + }, + { + "epoch": 2.88, + "grad_norm": 2.8993513584136963, + "learning_rate": 9.475075376884423e-06, + "loss": 0.1053, + "step": 5725 + }, + { + "epoch": 2.9, + "grad_norm": 2.5606019496917725, + "learning_rate": 9.472562814070353e-06, + "loss": 0.1086, + "step": 5750 + }, + { + "epoch": 2.91, + "grad_norm": 2.7209970951080322, + "learning_rate": 9.470050251256282e-06, + "loss": 0.1096, + "step": 5775 + }, + { + "epoch": 2.92, + "grad_norm": 2.7295970916748047, + "learning_rate": 9.467537688442213e-06, + "loss": 0.1025, + "step": 5800 + }, + { + "epoch": 2.93, + "grad_norm": 2.570082426071167, + "learning_rate": 9.46502512562814e-06, + "loss": 0.1009, + "step": 5825 + }, + { + "epoch": 2.95, + "grad_norm": 2.7234127521514893, + "learning_rate": 9.462512562814072e-06, + "loss": 0.1018, + "step": 5850 + }, + { + "epoch": 2.96, + "grad_norm": 2.6980459690093994, + "learning_rate": 9.460000000000001e-06, + "loss": 0.1083, + "step": 5875 + }, + { + "epoch": 2.97, + "grad_norm": 2.5525951385498047, + "learning_rate": 9.45748743718593e-06, + "loss": 0.1059, + "step": 5900 + }, + { + "epoch": 2.98, + "grad_norm": 2.615046739578247, + "learning_rate": 9.454974874371861e-06, + "loss": 0.1038, + "step": 5925 + }, + { + "epoch": 3.0, + "grad_norm": 2.7675647735595703, + "learning_rate": 9.452462311557789e-06, + "loss": 0.1058, + "step": 5950 + }, + { + "epoch": 3.01, + "grad_norm": 2.4065475463867188, + "learning_rate": 9.44994974874372e-06, + "loss": 0.0885, + "step": 5975 + }, + { + "epoch": 3.02, + "grad_norm": 2.8514456748962402, + "learning_rate": 9.44743718592965e-06, + "loss": 0.0812, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.10044781863689423, + "eval_runtime": 688.3044, + "eval_samples_per_second": 2.014, + "eval_steps_per_second": 2.014, + "eval_wer": 21.57866855023839, + "step": 6000 + }, + { + "epoch": 3.04, + "grad_norm": 2.9714245796203613, + "learning_rate": 9.444924623115579e-06, + "loss": 0.0804, + "step": 6025 + }, + { + "epoch": 3.05, + "grad_norm": 2.43624210357666, + "learning_rate": 9.442412060301508e-06, + "loss": 0.0826, + "step": 6050 + }, + { + "epoch": 3.06, + "grad_norm": 2.297102928161621, + "learning_rate": 9.439899497487439e-06, + "loss": 0.0826, + "step": 6075 + }, + { + "epoch": 3.07, + "grad_norm": 2.279273509979248, + "learning_rate": 9.437386934673367e-06, + "loss": 0.0874, + "step": 6100 + }, + { + "epoch": 3.09, + "grad_norm": 2.602400064468384, + "learning_rate": 9.43497487437186e-06, + "loss": 0.0855, + "step": 6125 + }, + { + "epoch": 3.1, + "grad_norm": 2.684255599975586, + "learning_rate": 9.432462311557789e-06, + "loss": 0.0813, + "step": 6150 + }, + { + "epoch": 3.11, + "grad_norm": 2.5507473945617676, + "learning_rate": 9.42994974874372e-06, + "loss": 0.0832, + "step": 6175 + }, + { + "epoch": 3.12, + "grad_norm": 2.6240627765655518, + "learning_rate": 9.42743718592965e-06, + "loss": 0.0806, + "step": 6200 + }, + { + "epoch": 3.14, + "grad_norm": 2.8727405071258545, + "learning_rate": 9.424924623115579e-06, + "loss": 0.0831, + "step": 6225 + }, + { + "epoch": 3.15, + "grad_norm": 2.359065294265747, + "learning_rate": 9.422412060301508e-06, + "loss": 0.0829, + "step": 6250 + }, + { + "epoch": 3.16, + "grad_norm": 2.7856605052948, + "learning_rate": 9.419899497487437e-06, + "loss": 0.0809, + "step": 6275 + }, + { + "epoch": 3.17, + "grad_norm": 2.551359176635742, + "learning_rate": 9.417386934673367e-06, + "loss": 0.0832, + "step": 6300 + }, + { + "epoch": 3.19, + "grad_norm": 2.4178707599639893, + "learning_rate": 9.414874371859298e-06, + "loss": 0.0835, + "step": 6325 + }, + { + "epoch": 3.2, + "grad_norm": 2.3194596767425537, + "learning_rate": 9.412361809045227e-06, + "loss": 0.0815, + "step": 6350 + }, + { + "epoch": 3.21, + "grad_norm": 2.5335512161254883, + "learning_rate": 9.409849246231156e-06, + "loss": 0.0855, + "step": 6375 + }, + { + "epoch": 3.22, + "grad_norm": 2.448418140411377, + "learning_rate": 9.407336683417086e-06, + "loss": 0.0799, + "step": 6400 + }, + { + "epoch": 3.24, + "grad_norm": 2.961803436279297, + "learning_rate": 9.404824120603015e-06, + "loss": 0.0813, + "step": 6425 + }, + { + "epoch": 3.25, + "grad_norm": 2.424445152282715, + "learning_rate": 9.402311557788946e-06, + "loss": 0.0837, + "step": 6450 + }, + { + "epoch": 3.26, + "grad_norm": 2.404649257659912, + "learning_rate": 9.399798994974875e-06, + "loss": 0.0797, + "step": 6475 + }, + { + "epoch": 3.27, + "grad_norm": 2.8638429641723633, + "learning_rate": 9.397286432160805e-06, + "loss": 0.0814, + "step": 6500 + }, + { + "epoch": 3.29, + "grad_norm": 2.423931121826172, + "learning_rate": 9.394773869346736e-06, + "loss": 0.0837, + "step": 6525 + }, + { + "epoch": 3.3, + "grad_norm": 2.6237282752990723, + "learning_rate": 9.392261306532663e-06, + "loss": 0.0817, + "step": 6550 + }, + { + "epoch": 3.31, + "grad_norm": 2.2962746620178223, + "learning_rate": 9.389748743718594e-06, + "loss": 0.0833, + "step": 6575 + }, + { + "epoch": 3.32, + "grad_norm": 2.5188238620758057, + "learning_rate": 9.387236180904524e-06, + "loss": 0.0796, + "step": 6600 + }, + { + "epoch": 3.34, + "grad_norm": 2.524373769760132, + "learning_rate": 9.384723618090453e-06, + "loss": 0.0792, + "step": 6625 + }, + { + "epoch": 3.35, + "grad_norm": 3.000775098800659, + "learning_rate": 9.382211055276382e-06, + "loss": 0.0871, + "step": 6650 + }, + { + "epoch": 3.36, + "grad_norm": 2.506762981414795, + "learning_rate": 9.379698492462312e-06, + "loss": 0.0825, + "step": 6675 + }, + { + "epoch": 3.38, + "grad_norm": 2.430424928665161, + "learning_rate": 9.377185929648241e-06, + "loss": 0.0858, + "step": 6700 + }, + { + "epoch": 3.39, + "grad_norm": 2.7606923580169678, + "learning_rate": 9.374673366834172e-06, + "loss": 0.0837, + "step": 6725 + }, + { + "epoch": 3.4, + "grad_norm": 2.593744993209839, + "learning_rate": 9.372160804020101e-06, + "loss": 0.0817, + "step": 6750 + }, + { + "epoch": 3.41, + "grad_norm": 3.017228364944458, + "learning_rate": 9.36964824120603e-06, + "loss": 0.0829, + "step": 6775 + }, + { + "epoch": 3.43, + "grad_norm": 2.575165033340454, + "learning_rate": 9.367135678391962e-06, + "loss": 0.0908, + "step": 6800 + }, + { + "epoch": 3.44, + "grad_norm": 2.704267740249634, + "learning_rate": 9.36462311557789e-06, + "loss": 0.0851, + "step": 6825 + }, + { + "epoch": 3.45, + "grad_norm": 2.861065149307251, + "learning_rate": 9.362211055276383e-06, + "loss": 0.0827, + "step": 6850 + }, + { + "epoch": 3.46, + "grad_norm": 2.481764554977417, + "learning_rate": 9.359698492462312e-06, + "loss": 0.0792, + "step": 6875 + }, + { + "epoch": 3.48, + "grad_norm": 2.334287643432617, + "learning_rate": 9.357185929648241e-06, + "loss": 0.0823, + "step": 6900 + }, + { + "epoch": 3.49, + "grad_norm": 2.8507022857666016, + "learning_rate": 9.354673366834172e-06, + "loss": 0.0806, + "step": 6925 + }, + { + "epoch": 3.5, + "grad_norm": 2.3402962684631348, + "learning_rate": 9.352160804020101e-06, + "loss": 0.0805, + "step": 6950 + }, + { + "epoch": 3.51, + "grad_norm": 2.6283440589904785, + "learning_rate": 9.34964824120603e-06, + "loss": 0.0805, + "step": 6975 + }, + { + "epoch": 3.53, + "grad_norm": 2.3327245712280273, + "learning_rate": 9.34713567839196e-06, + "loss": 0.0795, + "step": 7000 + }, + { + "epoch": 3.53, + "eval_loss": 0.1032654270529747, + "eval_runtime": 689.3121, + "eval_samples_per_second": 2.011, + "eval_steps_per_second": 2.011, + "eval_wer": 22.126081582200246, + "step": 7000 + }, + { + "epoch": 3.54, + "grad_norm": 2.56040620803833, + "learning_rate": 9.34462311557789e-06, + "loss": 0.0771, + "step": 7025 + }, + { + "epoch": 3.55, + "grad_norm": 2.429685354232788, + "learning_rate": 9.34211055276382e-06, + "loss": 0.0829, + "step": 7050 + }, + { + "epoch": 3.56, + "grad_norm": 2.4736459255218506, + "learning_rate": 9.33959798994975e-06, + "loss": 0.0809, + "step": 7075 + }, + { + "epoch": 3.58, + "grad_norm": 2.6769251823425293, + "learning_rate": 9.337085427135679e-06, + "loss": 0.0811, + "step": 7100 + }, + { + "epoch": 3.59, + "grad_norm": 2.2839248180389404, + "learning_rate": 9.334572864321608e-06, + "loss": 0.0815, + "step": 7125 + }, + { + "epoch": 3.6, + "grad_norm": 2.581639528274536, + "learning_rate": 9.332060301507538e-06, + "loss": 0.0826, + "step": 7150 + }, + { + "epoch": 3.61, + "grad_norm": 2.6889891624450684, + "learning_rate": 9.329547738693469e-06, + "loss": 0.0817, + "step": 7175 + }, + { + "epoch": 3.63, + "grad_norm": 2.287019968032837, + "learning_rate": 9.327035175879398e-06, + "loss": 0.0788, + "step": 7200 + }, + { + "epoch": 3.64, + "grad_norm": 2.5703585147857666, + "learning_rate": 9.324522613065327e-06, + "loss": 0.0828, + "step": 7225 + }, + { + "epoch": 3.65, + "grad_norm": 2.6228976249694824, + "learning_rate": 9.322010050251257e-06, + "loss": 0.0805, + "step": 7250 + }, + { + "epoch": 3.66, + "grad_norm": 2.5100107192993164, + "learning_rate": 9.319497487437186e-06, + "loss": 0.0773, + "step": 7275 + }, + { + "epoch": 3.68, + "grad_norm": 2.550609588623047, + "learning_rate": 9.316984924623115e-06, + "loss": 0.0814, + "step": 7300 + }, + { + "epoch": 3.69, + "grad_norm": 2.935396671295166, + "learning_rate": 9.314472361809046e-06, + "loss": 0.0817, + "step": 7325 + }, + { + "epoch": 3.7, + "grad_norm": 3.0559613704681396, + "learning_rate": 9.311959798994976e-06, + "loss": 0.0835, + "step": 7350 + }, + { + "epoch": 3.72, + "grad_norm": 2.5695858001708984, + "learning_rate": 9.309447236180905e-06, + "loss": 0.0765, + "step": 7375 + }, + { + "epoch": 3.73, + "grad_norm": 2.3461496829986572, + "learning_rate": 9.306934673366836e-06, + "loss": 0.0755, + "step": 7400 + }, + { + "epoch": 3.74, + "grad_norm": 2.714787244796753, + "learning_rate": 9.304422110552764e-06, + "loss": 0.081, + "step": 7425 + }, + { + "epoch": 3.75, + "grad_norm": 2.6449363231658936, + "learning_rate": 9.301909547738695e-06, + "loss": 0.0779, + "step": 7450 + }, + { + "epoch": 3.77, + "grad_norm": 2.645785093307495, + "learning_rate": 9.299396984924624e-06, + "loss": 0.0815, + "step": 7475 + }, + { + "epoch": 3.78, + "grad_norm": 2.7624340057373047, + "learning_rate": 9.296884422110553e-06, + "loss": 0.0789, + "step": 7500 + }, + { + "epoch": 3.79, + "grad_norm": 2.475884199142456, + "learning_rate": 9.294371859296483e-06, + "loss": 0.0796, + "step": 7525 + }, + { + "epoch": 3.8, + "grad_norm": 2.9795217514038086, + "learning_rate": 9.291859296482412e-06, + "loss": 0.0802, + "step": 7550 + }, + { + "epoch": 3.82, + "grad_norm": 2.5900163650512695, + "learning_rate": 9.289346733668343e-06, + "loss": 0.0877, + "step": 7575 + }, + { + "epoch": 3.83, + "grad_norm": 2.870521068572998, + "learning_rate": 9.286834170854272e-06, + "loss": 0.0777, + "step": 7600 + }, + { + "epoch": 3.84, + "grad_norm": 2.9215402603149414, + "learning_rate": 9.284321608040202e-06, + "loss": 0.0775, + "step": 7625 + }, + { + "epoch": 3.85, + "grad_norm": 2.807645797729492, + "learning_rate": 9.281809045226131e-06, + "loss": 0.0846, + "step": 7650 + }, + { + "epoch": 3.87, + "grad_norm": 2.421593427658081, + "learning_rate": 9.279296482412062e-06, + "loss": 0.078, + "step": 7675 + }, + { + "epoch": 3.88, + "grad_norm": 2.331895589828491, + "learning_rate": 9.27678391959799e-06, + "loss": 0.0773, + "step": 7700 + }, + { + "epoch": 3.89, + "grad_norm": 2.512113094329834, + "learning_rate": 9.27427135678392e-06, + "loss": 0.0746, + "step": 7725 + }, + { + "epoch": 3.9, + "grad_norm": 2.3983047008514404, + "learning_rate": 9.27175879396985e-06, + "loss": 0.0765, + "step": 7750 + }, + { + "epoch": 3.92, + "grad_norm": 2.6165409088134766, + "learning_rate": 9.26924623115578e-06, + "loss": 0.0808, + "step": 7775 + }, + { + "epoch": 3.93, + "grad_norm": 2.5119307041168213, + "learning_rate": 9.26673366834171e-06, + "loss": 0.0763, + "step": 7800 + }, + { + "epoch": 3.94, + "grad_norm": 2.3341987133026123, + "learning_rate": 9.264221105527638e-06, + "loss": 0.0745, + "step": 7825 + }, + { + "epoch": 3.95, + "grad_norm": 2.8182175159454346, + "learning_rate": 9.261708542713569e-06, + "loss": 0.0808, + "step": 7850 + }, + { + "epoch": 3.97, + "grad_norm": 2.702430248260498, + "learning_rate": 9.259195979899498e-06, + "loss": 0.0808, + "step": 7875 + }, + { + "epoch": 3.98, + "grad_norm": 2.8926761150360107, + "learning_rate": 9.256683417085428e-06, + "loss": 0.0725, + "step": 7900 + }, + { + "epoch": 3.99, + "grad_norm": 2.6951711177825928, + "learning_rate": 9.254170854271357e-06, + "loss": 0.0798, + "step": 7925 + }, + { + "epoch": 4.01, + "grad_norm": 2.3534343242645264, + "learning_rate": 9.251658291457288e-06, + "loss": 0.0687, + "step": 7950 + }, + { + "epoch": 4.02, + "grad_norm": 2.36779522895813, + "learning_rate": 9.249145728643217e-06, + "loss": 0.0566, + "step": 7975 + }, + { + "epoch": 4.03, + "grad_norm": 2.28019642829895, + "learning_rate": 9.246633165829147e-06, + "loss": 0.0579, + "step": 8000 + }, + { + "epoch": 4.03, + "eval_loss": 0.10348565876483917, + "eval_runtime": 682.4716, + "eval_samples_per_second": 2.031, + "eval_steps_per_second": 2.031, + "eval_wer": 21.26964506445347, + "step": 8000 + }, + { + "epoch": 4.04, + "grad_norm": 1.8966443538665771, + "learning_rate": 9.244120603015076e-06, + "loss": 0.057, + "step": 8025 + }, + { + "epoch": 4.06, + "grad_norm": 2.654658317565918, + "learning_rate": 9.241608040201005e-06, + "loss": 0.0587, + "step": 8050 + }, + { + "epoch": 4.07, + "grad_norm": 2.6569743156433105, + "learning_rate": 9.239095477386936e-06, + "loss": 0.0611, + "step": 8075 + }, + { + "epoch": 4.08, + "grad_norm": 2.1448495388031006, + "learning_rate": 9.236582914572864e-06, + "loss": 0.0577, + "step": 8100 + }, + { + "epoch": 4.09, + "grad_norm": 2.5572445392608643, + "learning_rate": 9.234070351758795e-06, + "loss": 0.0539, + "step": 8125 + }, + { + "epoch": 4.11, + "grad_norm": 2.4587111473083496, + "learning_rate": 9.231557788944724e-06, + "loss": 0.0559, + "step": 8150 + }, + { + "epoch": 4.12, + "grad_norm": 1.9537981748580933, + "learning_rate": 9.229045226130654e-06, + "loss": 0.0554, + "step": 8175 + }, + { + "epoch": 4.13, + "grad_norm": 2.7970876693725586, + "learning_rate": 9.226532663316585e-06, + "loss": 0.0589, + "step": 8200 + }, + { + "epoch": 4.14, + "grad_norm": 2.512350559234619, + "learning_rate": 9.224020100502514e-06, + "loss": 0.0556, + "step": 8225 + }, + { + "epoch": 4.16, + "grad_norm": 2.613807439804077, + "learning_rate": 9.221507537688443e-06, + "loss": 0.0594, + "step": 8250 + }, + { + "epoch": 4.17, + "grad_norm": 2.4113881587982178, + "learning_rate": 9.218994974874373e-06, + "loss": 0.0572, + "step": 8275 + }, + { + "epoch": 4.18, + "grad_norm": 2.4259517192840576, + "learning_rate": 9.216482412060302e-06, + "loss": 0.0544, + "step": 8300 + }, + { + "epoch": 4.19, + "grad_norm": 2.5770509243011475, + "learning_rate": 9.213969849246231e-06, + "loss": 0.0645, + "step": 8325 + }, + { + "epoch": 4.21, + "grad_norm": 2.418510675430298, + "learning_rate": 9.211457286432162e-06, + "loss": 0.0617, + "step": 8350 + }, + { + "epoch": 4.22, + "grad_norm": 2.4126944541931152, + "learning_rate": 9.20894472361809e-06, + "loss": 0.0566, + "step": 8375 + }, + { + "epoch": 4.23, + "grad_norm": 2.5099265575408936, + "learning_rate": 9.206432160804021e-06, + "loss": 0.0616, + "step": 8400 + }, + { + "epoch": 4.24, + "grad_norm": 2.698033571243286, + "learning_rate": 9.20391959798995e-06, + "loss": 0.0596, + "step": 8425 + }, + { + "epoch": 4.26, + "grad_norm": 2.6324944496154785, + "learning_rate": 9.20140703517588e-06, + "loss": 0.0601, + "step": 8450 + }, + { + "epoch": 4.27, + "grad_norm": 2.5753655433654785, + "learning_rate": 9.19889447236181e-06, + "loss": 0.0577, + "step": 8475 + }, + { + "epoch": 4.28, + "grad_norm": 2.1048152446746826, + "learning_rate": 9.19638190954774e-06, + "loss": 0.057, + "step": 8500 + }, + { + "epoch": 4.29, + "grad_norm": 2.4474809169769287, + "learning_rate": 9.19386934673367e-06, + "loss": 0.0591, + "step": 8525 + }, + { + "epoch": 4.31, + "grad_norm": 2.498405694961548, + "learning_rate": 9.191356783919599e-06, + "loss": 0.0626, + "step": 8550 + }, + { + "epoch": 4.32, + "grad_norm": 2.4673190116882324, + "learning_rate": 9.188844221105528e-06, + "loss": 0.0574, + "step": 8575 + }, + { + "epoch": 4.33, + "grad_norm": 2.720036506652832, + "learning_rate": 9.186331658291459e-06, + "loss": 0.0574, + "step": 8600 + }, + { + "epoch": 4.35, + "grad_norm": 2.5103397369384766, + "learning_rate": 9.183819095477388e-06, + "loss": 0.0597, + "step": 8625 + }, + { + "epoch": 4.36, + "grad_norm": 2.4917893409729004, + "learning_rate": 9.181306532663317e-06, + "loss": 0.0569, + "step": 8650 + }, + { + "epoch": 4.37, + "grad_norm": 2.702958345413208, + "learning_rate": 9.178793969849247e-06, + "loss": 0.0595, + "step": 8675 + }, + { + "epoch": 4.38, + "grad_norm": 3.0826056003570557, + "learning_rate": 9.176281407035176e-06, + "loss": 0.0578, + "step": 8700 + }, + { + "epoch": 4.4, + "grad_norm": 2.49990177154541, + "learning_rate": 9.173768844221105e-06, + "loss": 0.0614, + "step": 8725 + }, + { + "epoch": 4.41, + "grad_norm": 2.6642534732818604, + "learning_rate": 9.171256281407036e-06, + "loss": 0.0595, + "step": 8750 + }, + { + "epoch": 4.42, + "grad_norm": 2.6862027645111084, + "learning_rate": 9.168743718592966e-06, + "loss": 0.0589, + "step": 8775 + }, + { + "epoch": 4.43, + "grad_norm": 2.0674421787261963, + "learning_rate": 9.166231155778895e-06, + "loss": 0.0596, + "step": 8800 + }, + { + "epoch": 4.45, + "grad_norm": 2.3206048011779785, + "learning_rate": 9.163718592964826e-06, + "loss": 0.0596, + "step": 8825 + }, + { + "epoch": 4.46, + "grad_norm": 2.4712817668914795, + "learning_rate": 9.161206030150754e-06, + "loss": 0.0617, + "step": 8850 + }, + { + "epoch": 4.47, + "grad_norm": 2.7893097400665283, + "learning_rate": 9.158693467336685e-06, + "loss": 0.0585, + "step": 8875 + }, + { + "epoch": 4.48, + "grad_norm": 3.075997829437256, + "learning_rate": 9.156180904522614e-06, + "loss": 0.0606, + "step": 8900 + }, + { + "epoch": 4.5, + "grad_norm": 2.303222894668579, + "learning_rate": 9.153668341708543e-06, + "loss": 0.0585, + "step": 8925 + }, + { + "epoch": 4.51, + "grad_norm": 2.209336996078491, + "learning_rate": 9.151155778894473e-06, + "loss": 0.0595, + "step": 8950 + }, + { + "epoch": 4.52, + "grad_norm": 2.351933479309082, + "learning_rate": 9.148643216080402e-06, + "loss": 0.059, + "step": 8975 + }, + { + "epoch": 4.53, + "grad_norm": 2.36325740814209, + "learning_rate": 9.146130653266331e-06, + "loss": 0.0579, + "step": 9000 + }, + { + "epoch": 4.53, + "eval_loss": 0.10895385593175888, + "eval_runtime": 682.3588, + "eval_samples_per_second": 2.031, + "eval_steps_per_second": 2.031, + "eval_wer": 21.066572488080524, + "step": 9000 + }, + { + "epoch": 4.55, + "grad_norm": 2.4499053955078125, + "learning_rate": 9.143618090452262e-06, + "loss": 0.058, + "step": 9025 + }, + { + "epoch": 4.56, + "grad_norm": 2.6125757694244385, + "learning_rate": 9.141105527638192e-06, + "loss": 0.0615, + "step": 9050 + }, + { + "epoch": 4.57, + "grad_norm": 2.384432077407837, + "learning_rate": 9.138592964824121e-06, + "loss": 0.0604, + "step": 9075 + }, + { + "epoch": 4.58, + "grad_norm": 2.026665210723877, + "learning_rate": 9.136080402010052e-06, + "loss": 0.0558, + "step": 9100 + }, + { + "epoch": 4.6, + "grad_norm": 2.405344009399414, + "learning_rate": 9.13356783919598e-06, + "loss": 0.0618, + "step": 9125 + }, + { + "epoch": 4.61, + "grad_norm": 2.589684247970581, + "learning_rate": 9.13105527638191e-06, + "loss": 0.0585, + "step": 9150 + }, + { + "epoch": 4.62, + "grad_norm": 2.6550981998443604, + "learning_rate": 9.12854271356784e-06, + "loss": 0.0626, + "step": 9175 + }, + { + "epoch": 4.63, + "grad_norm": 2.6266894340515137, + "learning_rate": 9.12603015075377e-06, + "loss": 0.0551, + "step": 9200 + }, + { + "epoch": 4.65, + "grad_norm": 2.422456741333008, + "learning_rate": 9.1235175879397e-06, + "loss": 0.0559, + "step": 9225 + }, + { + "epoch": 4.66, + "grad_norm": 2.3022892475128174, + "learning_rate": 9.121005025125628e-06, + "loss": 0.0591, + "step": 9250 + }, + { + "epoch": 4.67, + "grad_norm": 2.3445470333099365, + "learning_rate": 9.118492462311559e-06, + "loss": 0.0572, + "step": 9275 + }, + { + "epoch": 4.69, + "grad_norm": 2.532125949859619, + "learning_rate": 9.115979899497488e-06, + "loss": 0.0592, + "step": 9300 + }, + { + "epoch": 4.7, + "grad_norm": 2.816389322280884, + "learning_rate": 9.113467336683418e-06, + "loss": 0.0537, + "step": 9325 + }, + { + "epoch": 4.71, + "grad_norm": 2.7893483638763428, + "learning_rate": 9.110954773869347e-06, + "loss": 0.0575, + "step": 9350 + }, + { + "epoch": 4.72, + "grad_norm": 2.116706609725952, + "learning_rate": 9.108442211055278e-06, + "loss": 0.057, + "step": 9375 + }, + { + "epoch": 4.74, + "grad_norm": 2.599848747253418, + "learning_rate": 9.105929648241206e-06, + "loss": 0.0572, + "step": 9400 + }, + { + "epoch": 4.75, + "grad_norm": 2.7252182960510254, + "learning_rate": 9.103417085427137e-06, + "loss": 0.0608, + "step": 9425 + }, + { + "epoch": 4.76, + "grad_norm": 2.507817029953003, + "learning_rate": 9.100904522613066e-06, + "loss": 0.0573, + "step": 9450 + }, + { + "epoch": 4.77, + "grad_norm": 2.3740479946136475, + "learning_rate": 9.098391959798995e-06, + "loss": 0.0573, + "step": 9475 + }, + { + "epoch": 4.79, + "grad_norm": 2.669379234313965, + "learning_rate": 9.095879396984926e-06, + "loss": 0.0615, + "step": 9500 + }, + { + "epoch": 4.8, + "grad_norm": 2.681913375854492, + "learning_rate": 9.093366834170854e-06, + "loss": 0.0593, + "step": 9525 + }, + { + "epoch": 4.81, + "grad_norm": 2.2882165908813477, + "learning_rate": 9.090854271356785e-06, + "loss": 0.0563, + "step": 9550 + }, + { + "epoch": 4.82, + "grad_norm": 2.815711259841919, + "learning_rate": 9.088341708542714e-06, + "loss": 0.0583, + "step": 9575 + }, + { + "epoch": 4.84, + "grad_norm": 2.585646867752075, + "learning_rate": 9.085829145728644e-06, + "loss": 0.0567, + "step": 9600 + }, + { + "epoch": 4.85, + "grad_norm": 2.5605852603912354, + "learning_rate": 9.083316582914573e-06, + "loss": 0.0592, + "step": 9625 + }, + { + "epoch": 4.86, + "grad_norm": 2.3944308757781982, + "learning_rate": 9.080804020100504e-06, + "loss": 0.0566, + "step": 9650 + }, + { + "epoch": 4.87, + "grad_norm": 2.6934664249420166, + "learning_rate": 9.078291457286433e-06, + "loss": 0.0588, + "step": 9675 + }, + { + "epoch": 4.89, + "grad_norm": 2.5759880542755127, + "learning_rate": 9.075778894472363e-06, + "loss": 0.0592, + "step": 9700 + }, + { + "epoch": 4.9, + "grad_norm": 2.52605938911438, + "learning_rate": 9.073266331658292e-06, + "loss": 0.06, + "step": 9725 + }, + { + "epoch": 4.91, + "grad_norm": 2.3384010791778564, + "learning_rate": 9.070753768844221e-06, + "loss": 0.056, + "step": 9750 + }, + { + "epoch": 4.92, + "grad_norm": 2.5652034282684326, + "learning_rate": 9.068241206030152e-06, + "loss": 0.058, + "step": 9775 + }, + { + "epoch": 4.94, + "grad_norm": 2.557934522628784, + "learning_rate": 9.06572864321608e-06, + "loss": 0.0557, + "step": 9800 + }, + { + "epoch": 4.95, + "grad_norm": 2.7856483459472656, + "learning_rate": 9.063216080402011e-06, + "loss": 0.0594, + "step": 9825 + }, + { + "epoch": 4.96, + "grad_norm": 2.6743314266204834, + "learning_rate": 9.06070351758794e-06, + "loss": 0.0579, + "step": 9850 + }, + { + "epoch": 4.97, + "grad_norm": 2.5735857486724854, + "learning_rate": 9.05819095477387e-06, + "loss": 0.0563, + "step": 9875 + }, + { + "epoch": 4.99, + "grad_norm": 2.69311785697937, + "learning_rate": 9.0556783919598e-06, + "loss": 0.0558, + "step": 9900 + }, + { + "epoch": 5.0, + "grad_norm": 2.5358142852783203, + "learning_rate": 9.05316582914573e-06, + "loss": 0.0564, + "step": 9925 + }, + { + "epoch": 5.01, + "grad_norm": 2.6684141159057617, + "learning_rate": 9.05065326633166e-06, + "loss": 0.0413, + "step": 9950 + }, + { + "epoch": 5.03, + "grad_norm": 2.2356104850769043, + "learning_rate": 9.048140703517589e-06, + "loss": 0.0399, + "step": 9975 + }, + { + "epoch": 5.04, + "grad_norm": 1.8729658126831055, + "learning_rate": 9.045628140703518e-06, + "loss": 0.0367, + "step": 10000 + }, + { + "epoch": 5.04, + "eval_loss": 0.10888118296861649, + "eval_runtime": 682.0378, + "eval_samples_per_second": 2.032, + "eval_steps_per_second": 2.032, + "eval_wer": 21.19018188239449, + "step": 10000 + }, + { + "epoch": 5.05, + "grad_norm": 1.822731614112854, + "learning_rate": 9.043115577889447e-06, + "loss": 0.0419, + "step": 10025 + }, + { + "epoch": 5.06, + "grad_norm": 1.8527107238769531, + "learning_rate": 9.040603015075378e-06, + "loss": 0.0393, + "step": 10050 + }, + { + "epoch": 5.08, + "grad_norm": 2.0046181678771973, + "learning_rate": 9.038090452261308e-06, + "loss": 0.0416, + "step": 10075 + }, + { + "epoch": 5.09, + "grad_norm": 2.0824451446533203, + "learning_rate": 9.035577889447237e-06, + "loss": 0.0403, + "step": 10100 + }, + { + "epoch": 5.1, + "grad_norm": 2.072101354598999, + "learning_rate": 9.033065326633166e-06, + "loss": 0.0396, + "step": 10125 + }, + { + "epoch": 5.11, + "grad_norm": 2.1716184616088867, + "learning_rate": 9.030552763819096e-06, + "loss": 0.0396, + "step": 10150 + }, + { + "epoch": 5.13, + "grad_norm": 2.185249090194702, + "learning_rate": 9.028040201005027e-06, + "loss": 0.0431, + "step": 10175 + }, + { + "epoch": 5.14, + "grad_norm": 2.2256271839141846, + "learning_rate": 9.025527638190956e-06, + "loss": 0.0405, + "step": 10200 + }, + { + "epoch": 5.15, + "grad_norm": 2.185067653656006, + "learning_rate": 9.023015075376885e-06, + "loss": 0.0403, + "step": 10225 + }, + { + "epoch": 5.16, + "grad_norm": 2.307008743286133, + "learning_rate": 9.020502512562815e-06, + "loss": 0.0409, + "step": 10250 + }, + { + "epoch": 5.18, + "grad_norm": 2.2430601119995117, + "learning_rate": 9.017989949748744e-06, + "loss": 0.0387, + "step": 10275 + }, + { + "epoch": 5.19, + "grad_norm": 2.705752372741699, + "learning_rate": 9.015577889447237e-06, + "loss": 0.0416, + "step": 10300 + }, + { + "epoch": 5.2, + "grad_norm": 2.16209077835083, + "learning_rate": 9.013065326633166e-06, + "loss": 0.0412, + "step": 10325 + }, + { + "epoch": 5.21, + "grad_norm": 2.3688838481903076, + "learning_rate": 9.010552763819096e-06, + "loss": 0.0406, + "step": 10350 + }, + { + "epoch": 5.23, + "grad_norm": 2.389176368713379, + "learning_rate": 9.008040201005027e-06, + "loss": 0.0404, + "step": 10375 + }, + { + "epoch": 5.24, + "grad_norm": 2.1386139392852783, + "learning_rate": 9.005527638190954e-06, + "loss": 0.0403, + "step": 10400 + }, + { + "epoch": 5.25, + "grad_norm": 2.6484930515289307, + "learning_rate": 9.003015075376885e-06, + "loss": 0.0403, + "step": 10425 + }, + { + "epoch": 5.26, + "grad_norm": 2.0014092922210693, + "learning_rate": 9.000502512562815e-06, + "loss": 0.0386, + "step": 10450 + }, + { + "epoch": 5.28, + "grad_norm": 2.141289710998535, + "learning_rate": 8.997989949748744e-06, + "loss": 0.039, + "step": 10475 + }, + { + "epoch": 5.29, + "grad_norm": 2.1630661487579346, + "learning_rate": 8.995477386934675e-06, + "loss": 0.0398, + "step": 10500 + }, + { + "epoch": 5.3, + "grad_norm": 2.3761839866638184, + "learning_rate": 8.992964824120604e-06, + "loss": 0.0414, + "step": 10525 + }, + { + "epoch": 5.31, + "grad_norm": 2.495687961578369, + "learning_rate": 8.990452261306534e-06, + "loss": 0.0415, + "step": 10550 + }, + { + "epoch": 5.33, + "grad_norm": 2.2686784267425537, + "learning_rate": 8.987939698492463e-06, + "loss": 0.0405, + "step": 10575 + }, + { + "epoch": 5.34, + "grad_norm": 2.302217721939087, + "learning_rate": 8.985427135678392e-06, + "loss": 0.0455, + "step": 10600 + }, + { + "epoch": 5.35, + "grad_norm": 2.271423816680908, + "learning_rate": 8.982914572864322e-06, + "loss": 0.041, + "step": 10625 + }, + { + "epoch": 5.37, + "grad_norm": 1.9629069566726685, + "learning_rate": 8.980402010050253e-06, + "loss": 0.037, + "step": 10650 + }, + { + "epoch": 5.38, + "grad_norm": 2.7626171112060547, + "learning_rate": 8.977889447236182e-06, + "loss": 0.0418, + "step": 10675 + }, + { + "epoch": 5.39, + "grad_norm": 2.276355266571045, + "learning_rate": 8.975376884422111e-06, + "loss": 0.0389, + "step": 10700 + }, + { + "epoch": 5.4, + "grad_norm": 1.9179961681365967, + "learning_rate": 8.97286432160804e-06, + "loss": 0.042, + "step": 10725 + }, + { + "epoch": 5.42, + "grad_norm": 2.494594097137451, + "learning_rate": 8.97035175879397e-06, + "loss": 0.0394, + "step": 10750 + }, + { + "epoch": 5.43, + "grad_norm": 2.4041616916656494, + "learning_rate": 8.967839195979901e-06, + "loss": 0.0413, + "step": 10775 + }, + { + "epoch": 5.44, + "grad_norm": 2.0518686771392822, + "learning_rate": 8.96532663316583e-06, + "loss": 0.0403, + "step": 10800 + }, + { + "epoch": 5.45, + "grad_norm": 2.142915725708008, + "learning_rate": 8.96281407035176e-06, + "loss": 0.0375, + "step": 10825 + }, + { + "epoch": 5.47, + "grad_norm": 2.2804417610168457, + "learning_rate": 8.960301507537689e-06, + "loss": 0.0408, + "step": 10850 + }, + { + "epoch": 5.48, + "grad_norm": 2.2041561603546143, + "learning_rate": 8.957788944723618e-06, + "loss": 0.0407, + "step": 10875 + }, + { + "epoch": 5.49, + "grad_norm": 2.6075692176818848, + "learning_rate": 8.95527638190955e-06, + "loss": 0.0406, + "step": 10900 + }, + { + "epoch": 5.5, + "grad_norm": 2.644960641860962, + "learning_rate": 8.952763819095479e-06, + "loss": 0.0382, + "step": 10925 + }, + { + "epoch": 5.52, + "grad_norm": 2.2249841690063477, + "learning_rate": 8.950251256281408e-06, + "loss": 0.0415, + "step": 10950 + }, + { + "epoch": 5.53, + "grad_norm": 2.529858350753784, + "learning_rate": 8.947738693467337e-06, + "loss": 0.0384, + "step": 10975 + }, + { + "epoch": 5.54, + "grad_norm": 2.315721035003662, + "learning_rate": 8.945226130653267e-06, + "loss": 0.0418, + "step": 11000 + }, + { + "epoch": 5.54, + "eval_loss": 0.11790613830089569, + "eval_runtime": 684.8634, + "eval_samples_per_second": 2.024, + "eval_steps_per_second": 2.024, + "eval_wer": 21.861204308670317, + "step": 11000 + }, + { + "epoch": 5.55, + "grad_norm": 2.47187876701355, + "learning_rate": 8.942713567839196e-06, + "loss": 0.04, + "step": 11025 + }, + { + "epoch": 5.57, + "grad_norm": 2.6977782249450684, + "learning_rate": 8.940201005025127e-06, + "loss": 0.0414, + "step": 11050 + }, + { + "epoch": 5.58, + "grad_norm": 2.253974199295044, + "learning_rate": 8.937688442211056e-06, + "loss": 0.0416, + "step": 11075 + }, + { + "epoch": 5.59, + "grad_norm": 2.2278666496276855, + "learning_rate": 8.935175879396986e-06, + "loss": 0.039, + "step": 11100 + }, + { + "epoch": 5.6, + "grad_norm": 2.1646761894226074, + "learning_rate": 8.932663316582915e-06, + "loss": 0.0406, + "step": 11125 + }, + { + "epoch": 5.62, + "grad_norm": 2.7077107429504395, + "learning_rate": 8.930150753768844e-06, + "loss": 0.0435, + "step": 11150 + }, + { + "epoch": 5.63, + "grad_norm": 2.409734010696411, + "learning_rate": 8.927638190954775e-06, + "loss": 0.0411, + "step": 11175 + }, + { + "epoch": 5.64, + "grad_norm": 2.387725830078125, + "learning_rate": 8.925125628140705e-06, + "loss": 0.0412, + "step": 11200 + }, + { + "epoch": 5.65, + "grad_norm": 2.6777408123016357, + "learning_rate": 8.922613065326634e-06, + "loss": 0.0407, + "step": 11225 + }, + { + "epoch": 5.67, + "grad_norm": 2.643521547317505, + "learning_rate": 8.920100502512563e-06, + "loss": 0.0416, + "step": 11250 + }, + { + "epoch": 5.68, + "grad_norm": 1.9884231090545654, + "learning_rate": 8.917587939698493e-06, + "loss": 0.0393, + "step": 11275 + }, + { + "epoch": 5.69, + "grad_norm": 2.3139212131500244, + "learning_rate": 8.915075376884424e-06, + "loss": 0.0416, + "step": 11300 + }, + { + "epoch": 5.71, + "grad_norm": 2.495694637298584, + "learning_rate": 8.912562814070353e-06, + "loss": 0.0408, + "step": 11325 + }, + { + "epoch": 5.72, + "grad_norm": 2.485853910446167, + "learning_rate": 8.910050251256282e-06, + "loss": 0.0398, + "step": 11350 + }, + { + "epoch": 5.73, + "grad_norm": 2.0036816596984863, + "learning_rate": 8.907537688442212e-06, + "loss": 0.042, + "step": 11375 + }, + { + "epoch": 5.74, + "grad_norm": 2.6024739742279053, + "learning_rate": 8.905025125628143e-06, + "loss": 0.0401, + "step": 11400 + }, + { + "epoch": 5.76, + "grad_norm": 2.6003618240356445, + "learning_rate": 8.90251256281407e-06, + "loss": 0.0394, + "step": 11425 + }, + { + "epoch": 5.77, + "grad_norm": 3.1666805744171143, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0424, + "step": 11450 + }, + { + "epoch": 5.78, + "grad_norm": 2.3643226623535156, + "learning_rate": 8.89748743718593e-06, + "loss": 0.0394, + "step": 11475 + }, + { + "epoch": 5.79, + "grad_norm": 2.229434013366699, + "learning_rate": 8.89497487437186e-06, + "loss": 0.0408, + "step": 11500 + }, + { + "epoch": 5.81, + "grad_norm": 2.846489906311035, + "learning_rate": 8.892462311557791e-06, + "loss": 0.0411, + "step": 11525 + }, + { + "epoch": 5.82, + "grad_norm": 2.188871383666992, + "learning_rate": 8.889949748743718e-06, + "loss": 0.0444, + "step": 11550 + }, + { + "epoch": 5.83, + "grad_norm": 2.1809377670288086, + "learning_rate": 8.88743718592965e-06, + "loss": 0.0409, + "step": 11575 + }, + { + "epoch": 5.84, + "grad_norm": 2.315812587738037, + "learning_rate": 8.884924623115579e-06, + "loss": 0.0431, + "step": 11600 + }, + { + "epoch": 5.86, + "grad_norm": 2.387016773223877, + "learning_rate": 8.882412060301508e-06, + "loss": 0.0396, + "step": 11625 + }, + { + "epoch": 5.87, + "grad_norm": 2.207397222518921, + "learning_rate": 8.879899497487437e-06, + "loss": 0.0436, + "step": 11650 + }, + { + "epoch": 5.88, + "grad_norm": 2.3260140419006348, + "learning_rate": 8.877386934673368e-06, + "loss": 0.0415, + "step": 11675 + }, + { + "epoch": 5.89, + "grad_norm": 2.1954805850982666, + "learning_rate": 8.874874371859296e-06, + "loss": 0.0396, + "step": 11700 + }, + { + "epoch": 5.91, + "grad_norm": 2.604947328567505, + "learning_rate": 8.872361809045227e-06, + "loss": 0.0398, + "step": 11725 + }, + { + "epoch": 5.92, + "grad_norm": 3.1118898391723633, + "learning_rate": 8.869849246231156e-06, + "loss": 0.0418, + "step": 11750 + }, + { + "epoch": 5.93, + "grad_norm": 2.3423421382904053, + "learning_rate": 8.867336683417086e-06, + "loss": 0.0404, + "step": 11775 + }, + { + "epoch": 5.94, + "grad_norm": 2.1267640590667725, + "learning_rate": 8.864824120603017e-06, + "loss": 0.0404, + "step": 11800 + }, + { + "epoch": 5.96, + "grad_norm": 2.5646700859069824, + "learning_rate": 8.862311557788944e-06, + "loss": 0.042, + "step": 11825 + }, + { + "epoch": 5.97, + "grad_norm": 2.5446715354919434, + "learning_rate": 8.859798994974875e-06, + "loss": 0.0407, + "step": 11850 + }, + { + "epoch": 5.98, + "grad_norm": 2.5902607440948486, + "learning_rate": 8.857286432160805e-06, + "loss": 0.0417, + "step": 11875 + }, + { + "epoch": 5.99, + "grad_norm": 2.8114237785339355, + "learning_rate": 8.854773869346734e-06, + "loss": 0.0408, + "step": 11900 + }, + { + "epoch": 6.01, + "grad_norm": 2.047079086303711, + "learning_rate": 8.852261306532665e-06, + "loss": 0.0341, + "step": 11925 + }, + { + "epoch": 6.02, + "grad_norm": 2.2079648971557617, + "learning_rate": 8.849748743718594e-06, + "loss": 0.027, + "step": 11950 + }, + { + "epoch": 6.03, + "grad_norm": 2.1891496181488037, + "learning_rate": 8.847236180904524e-06, + "loss": 0.026, + "step": 11975 + }, + { + "epoch": 6.05, + "grad_norm": 1.988735556602478, + "learning_rate": 8.844723618090453e-06, + "loss": 0.0262, + "step": 12000 + }, + { + "epoch": 6.05, + "eval_loss": 0.11980433017015457, + "eval_runtime": 687.4611, + "eval_samples_per_second": 2.016, + "eval_steps_per_second": 2.016, + "eval_wer": 22.16139855200424, + "step": 12000 + }, + { + "epoch": 6.06, + "grad_norm": 2.108938217163086, + "learning_rate": 8.842211055276382e-06, + "loss": 0.0259, + "step": 12025 + }, + { + "epoch": 6.07, + "grad_norm": 1.8009157180786133, + "learning_rate": 8.839698492462312e-06, + "loss": 0.0264, + "step": 12050 + }, + { + "epoch": 6.08, + "grad_norm": 1.6953669786453247, + "learning_rate": 8.837185929648243e-06, + "loss": 0.0279, + "step": 12075 + }, + { + "epoch": 6.1, + "grad_norm": 1.9572482109069824, + "learning_rate": 8.83467336683417e-06, + "loss": 0.0275, + "step": 12100 + }, + { + "epoch": 6.11, + "grad_norm": 2.2209486961364746, + "learning_rate": 8.832160804020101e-06, + "loss": 0.0263, + "step": 12125 + }, + { + "epoch": 6.12, + "grad_norm": 2.3143117427825928, + "learning_rate": 8.82964824120603e-06, + "loss": 0.0276, + "step": 12150 + }, + { + "epoch": 6.13, + "grad_norm": 1.9758480787277222, + "learning_rate": 8.82713567839196e-06, + "loss": 0.0273, + "step": 12175 + }, + { + "epoch": 6.15, + "grad_norm": 1.8506464958190918, + "learning_rate": 8.824623115577891e-06, + "loss": 0.0289, + "step": 12200 + }, + { + "epoch": 6.16, + "grad_norm": 1.9196500778198242, + "learning_rate": 8.82211055276382e-06, + "loss": 0.027, + "step": 12225 + }, + { + "epoch": 6.17, + "grad_norm": 2.3553080558776855, + "learning_rate": 8.81959798994975e-06, + "loss": 0.0281, + "step": 12250 + }, + { + "epoch": 6.18, + "grad_norm": 1.7679775953292847, + "learning_rate": 8.817085427135679e-06, + "loss": 0.0263, + "step": 12275 + }, + { + "epoch": 6.2, + "grad_norm": 1.8708771467208862, + "learning_rate": 8.814572864321608e-06, + "loss": 0.0263, + "step": 12300 + }, + { + "epoch": 6.21, + "grad_norm": 2.276172161102295, + "learning_rate": 8.812060301507538e-06, + "loss": 0.0273, + "step": 12325 + }, + { + "epoch": 6.22, + "grad_norm": 2.381699562072754, + "learning_rate": 8.809547738693469e-06, + "loss": 0.0263, + "step": 12350 + }, + { + "epoch": 6.23, + "grad_norm": 2.407863140106201, + "learning_rate": 8.807035175879398e-06, + "loss": 0.0244, + "step": 12375 + }, + { + "epoch": 6.25, + "grad_norm": 1.9538224935531616, + "learning_rate": 8.804522613065327e-06, + "loss": 0.0281, + "step": 12400 + }, + { + "epoch": 6.26, + "grad_norm": 2.2681281566619873, + "learning_rate": 8.802010050251257e-06, + "loss": 0.0271, + "step": 12425 + }, + { + "epoch": 6.27, + "grad_norm": 2.2532448768615723, + "learning_rate": 8.799497487437186e-06, + "loss": 0.0266, + "step": 12450 + }, + { + "epoch": 6.28, + "grad_norm": 2.548567771911621, + "learning_rate": 8.796984924623117e-06, + "loss": 0.027, + "step": 12475 + }, + { + "epoch": 6.3, + "grad_norm": 1.9196661710739136, + "learning_rate": 8.794472361809046e-06, + "loss": 0.0282, + "step": 12500 + }, + { + "epoch": 6.31, + "grad_norm": 1.9314779043197632, + "learning_rate": 8.791959798994976e-06, + "loss": 0.0274, + "step": 12525 + }, + { + "epoch": 6.32, + "grad_norm": 2.1478655338287354, + "learning_rate": 8.789447236180905e-06, + "loss": 0.026, + "step": 12550 + }, + { + "epoch": 6.34, + "grad_norm": 2.146735906600952, + "learning_rate": 8.786934673366834e-06, + "loss": 0.027, + "step": 12575 + }, + { + "epoch": 6.35, + "grad_norm": 2.010021448135376, + "learning_rate": 8.784422110552765e-06, + "loss": 0.029, + "step": 12600 + }, + { + "epoch": 6.36, + "grad_norm": 1.9679367542266846, + "learning_rate": 8.781909547738695e-06, + "loss": 0.0261, + "step": 12625 + }, + { + "epoch": 6.37, + "grad_norm": 2.0475425720214844, + "learning_rate": 8.779396984924624e-06, + "loss": 0.0282, + "step": 12650 + }, + { + "epoch": 6.39, + "grad_norm": 2.1458871364593506, + "learning_rate": 8.776884422110553e-06, + "loss": 0.0276, + "step": 12675 + }, + { + "epoch": 6.4, + "grad_norm": 2.3427932262420654, + "learning_rate": 8.774371859296483e-06, + "loss": 0.0267, + "step": 12700 + }, + { + "epoch": 6.41, + "grad_norm": 1.50918710231781, + "learning_rate": 8.771859296482412e-06, + "loss": 0.0258, + "step": 12725 + }, + { + "epoch": 6.42, + "grad_norm": 2.064639091491699, + "learning_rate": 8.769346733668343e-06, + "loss": 0.0278, + "step": 12750 + }, + { + "epoch": 6.44, + "grad_norm": 2.1061792373657227, + "learning_rate": 8.766834170854272e-06, + "loss": 0.0282, + "step": 12775 + }, + { + "epoch": 6.45, + "grad_norm": 2.0918331146240234, + "learning_rate": 8.764321608040202e-06, + "loss": 0.0259, + "step": 12800 + }, + { + "epoch": 6.46, + "grad_norm": 2.2508010864257812, + "learning_rate": 8.761809045226131e-06, + "loss": 0.0289, + "step": 12825 + }, + { + "epoch": 6.47, + "grad_norm": 1.9812549352645874, + "learning_rate": 8.75929648241206e-06, + "loss": 0.0276, + "step": 12850 + }, + { + "epoch": 6.49, + "grad_norm": 1.9125194549560547, + "learning_rate": 8.756783919597991e-06, + "loss": 0.0269, + "step": 12875 + }, + { + "epoch": 6.5, + "grad_norm": 1.8923293352127075, + "learning_rate": 8.75427135678392e-06, + "loss": 0.0286, + "step": 12900 + }, + { + "epoch": 6.51, + "grad_norm": 2.0202274322509766, + "learning_rate": 8.75175879396985e-06, + "loss": 0.0274, + "step": 12925 + }, + { + "epoch": 6.52, + "grad_norm": 2.2985732555389404, + "learning_rate": 8.74924623115578e-06, + "loss": 0.0267, + "step": 12950 + }, + { + "epoch": 6.54, + "grad_norm": 2.264228105545044, + "learning_rate": 8.746733668341709e-06, + "loss": 0.0289, + "step": 12975 + }, + { + "epoch": 6.55, + "grad_norm": 2.0592288970947266, + "learning_rate": 8.74422110552764e-06, + "loss": 0.027, + "step": 13000 + }, + { + "epoch": 6.55, + "eval_loss": 0.12919044494628906, + "eval_runtime": 685.2054, + "eval_samples_per_second": 2.023, + "eval_steps_per_second": 2.023, + "eval_wer": 22.285007946318206, + "step": 13000 + }, + { + "epoch": 6.56, + "grad_norm": 2.146627187728882, + "learning_rate": 8.741708542713569e-06, + "loss": 0.027, + "step": 13025 + }, + { + "epoch": 6.57, + "grad_norm": 2.497652292251587, + "learning_rate": 8.739195979899498e-06, + "loss": 0.0293, + "step": 13050 + }, + { + "epoch": 6.59, + "grad_norm": 2.0765607357025146, + "learning_rate": 8.736683417085428e-06, + "loss": 0.0266, + "step": 13075 + }, + { + "epoch": 6.6, + "grad_norm": 2.277017593383789, + "learning_rate": 8.734170854271357e-06, + "loss": 0.0281, + "step": 13100 + }, + { + "epoch": 6.61, + "grad_norm": 1.8031073808670044, + "learning_rate": 8.731658291457286e-06, + "loss": 0.026, + "step": 13125 + }, + { + "epoch": 6.62, + "grad_norm": 2.0985844135284424, + "learning_rate": 8.729145728643217e-06, + "loss": 0.0271, + "step": 13150 + }, + { + "epoch": 6.64, + "grad_norm": 2.9153175354003906, + "learning_rate": 8.726633165829147e-06, + "loss": 0.0284, + "step": 13175 + }, + { + "epoch": 6.65, + "grad_norm": 3.378220796585083, + "learning_rate": 8.724120603015076e-06, + "loss": 0.0281, + "step": 13200 + }, + { + "epoch": 6.66, + "grad_norm": 2.2111120223999023, + "learning_rate": 8.721608040201007e-06, + "loss": 0.0268, + "step": 13225 + }, + { + "epoch": 6.68, + "grad_norm": 2.010902166366577, + "learning_rate": 8.719095477386934e-06, + "loss": 0.0292, + "step": 13250 + }, + { + "epoch": 6.69, + "grad_norm": 2.048523426055908, + "learning_rate": 8.716582914572866e-06, + "loss": 0.0259, + "step": 13275 + }, + { + "epoch": 6.7, + "grad_norm": 2.187990188598633, + "learning_rate": 8.714070351758795e-06, + "loss": 0.0285, + "step": 13300 + }, + { + "epoch": 6.71, + "grad_norm": 2.296088695526123, + "learning_rate": 8.711557788944724e-06, + "loss": 0.0279, + "step": 13325 + }, + { + "epoch": 6.73, + "grad_norm": 2.1088368892669678, + "learning_rate": 8.709045226130653e-06, + "loss": 0.028, + "step": 13350 + }, + { + "epoch": 6.74, + "grad_norm": 2.2216553688049316, + "learning_rate": 8.706532663316584e-06, + "loss": 0.0274, + "step": 13375 + }, + { + "epoch": 6.75, + "grad_norm": 2.3757803440093994, + "learning_rate": 8.704020100502514e-06, + "loss": 0.029, + "step": 13400 + }, + { + "epoch": 6.76, + "grad_norm": 2.1950957775115967, + "learning_rate": 8.701507537688443e-06, + "loss": 0.0277, + "step": 13425 + }, + { + "epoch": 6.78, + "grad_norm": 2.207559823989868, + "learning_rate": 8.698994974874372e-06, + "loss": 0.027, + "step": 13450 + }, + { + "epoch": 6.79, + "grad_norm": 2.1864428520202637, + "learning_rate": 8.696482412060302e-06, + "loss": 0.0295, + "step": 13475 + }, + { + "epoch": 6.8, + "grad_norm": 2.292668342590332, + "learning_rate": 8.693969849246233e-06, + "loss": 0.028, + "step": 13500 + }, + { + "epoch": 6.81, + "grad_norm": 2.494725465774536, + "learning_rate": 8.69145728643216e-06, + "loss": 0.0271, + "step": 13525 + }, + { + "epoch": 6.83, + "grad_norm": 2.605257272720337, + "learning_rate": 8.688944723618091e-06, + "loss": 0.0283, + "step": 13550 + }, + { + "epoch": 6.84, + "grad_norm": 2.3537776470184326, + "learning_rate": 8.68643216080402e-06, + "loss": 0.0284, + "step": 13575 + }, + { + "epoch": 6.85, + "grad_norm": 2.605717658996582, + "learning_rate": 8.68391959798995e-06, + "loss": 0.0296, + "step": 13600 + }, + { + "epoch": 6.86, + "grad_norm": 2.16481351852417, + "learning_rate": 8.681407035175881e-06, + "loss": 0.0273, + "step": 13625 + }, + { + "epoch": 6.88, + "grad_norm": 2.529783248901367, + "learning_rate": 8.67889447236181e-06, + "loss": 0.0295, + "step": 13650 + }, + { + "epoch": 6.89, + "grad_norm": 1.8574634790420532, + "learning_rate": 8.67638190954774e-06, + "loss": 0.0276, + "step": 13675 + }, + { + "epoch": 6.9, + "grad_norm": 2.314863443374634, + "learning_rate": 8.673869346733669e-06, + "loss": 0.0285, + "step": 13700 + }, + { + "epoch": 6.91, + "grad_norm": 2.790963888168335, + "learning_rate": 8.671356783919598e-06, + "loss": 0.0288, + "step": 13725 + }, + { + "epoch": 6.93, + "grad_norm": 2.0229947566986084, + "learning_rate": 8.668844221105528e-06, + "loss": 0.0294, + "step": 13750 + }, + { + "epoch": 6.94, + "grad_norm": 2.5222136974334717, + "learning_rate": 8.666331658291459e-06, + "loss": 0.0273, + "step": 13775 + }, + { + "epoch": 6.95, + "grad_norm": 2.157283067703247, + "learning_rate": 8.663819095477388e-06, + "loss": 0.0295, + "step": 13800 + }, + { + "epoch": 6.96, + "grad_norm": 2.2281863689422607, + "learning_rate": 8.661306532663317e-06, + "loss": 0.0282, + "step": 13825 + }, + { + "epoch": 6.98, + "grad_norm": 2.198316812515259, + "learning_rate": 8.658793969849247e-06, + "loss": 0.027, + "step": 13850 + }, + { + "epoch": 6.99, + "grad_norm": 2.2997336387634277, + "learning_rate": 8.656281407035176e-06, + "loss": 0.0296, + "step": 13875 + }, + { + "epoch": 7.0, + "grad_norm": 1.6744725704193115, + "learning_rate": 8.653768844221107e-06, + "loss": 0.0272, + "step": 13900 + }, + { + "epoch": 7.02, + "grad_norm": 1.8121318817138672, + "learning_rate": 8.651256281407036e-06, + "loss": 0.0168, + "step": 13925 + }, + { + "epoch": 7.03, + "grad_norm": 1.8642957210540771, + "learning_rate": 8.648743718592966e-06, + "loss": 0.0157, + "step": 13950 + }, + { + "epoch": 7.04, + "grad_norm": 1.6997615098953247, + "learning_rate": 8.646231155778895e-06, + "loss": 0.0161, + "step": 13975 + }, + { + "epoch": 7.05, + "grad_norm": 1.482770323753357, + "learning_rate": 8.643718592964824e-06, + "loss": 0.0172, + "step": 14000 + }, + { + "epoch": 7.05, + "eval_loss": 0.13476818799972534, + "eval_runtime": 683.5228, + "eval_samples_per_second": 2.028, + "eval_steps_per_second": 2.028, + "eval_wer": 21.940667490729297, + "step": 14000 + }, + { + "epoch": 7.07, + "grad_norm": 2.090759038925171, + "learning_rate": 8.641206030150755e-06, + "loss": 0.0158, + "step": 14025 + }, + { + "epoch": 7.08, + "grad_norm": 1.695804476737976, + "learning_rate": 8.638693467336685e-06, + "loss": 0.0168, + "step": 14050 + }, + { + "epoch": 7.09, + "grad_norm": 1.9545384645462036, + "learning_rate": 8.636180904522614e-06, + "loss": 0.0182, + "step": 14075 + }, + { + "epoch": 7.1, + "grad_norm": 1.9294627904891968, + "learning_rate": 8.633668341708543e-06, + "loss": 0.0168, + "step": 14100 + }, + { + "epoch": 7.12, + "grad_norm": 1.628083348274231, + "learning_rate": 8.631155778894473e-06, + "loss": 0.0163, + "step": 14125 + }, + { + "epoch": 7.13, + "grad_norm": 1.748562216758728, + "learning_rate": 8.628643216080402e-06, + "loss": 0.0183, + "step": 14150 + }, + { + "epoch": 7.14, + "grad_norm": 2.0441226959228516, + "learning_rate": 8.626130653266333e-06, + "loss": 0.0192, + "step": 14175 + }, + { + "epoch": 7.15, + "grad_norm": 1.5546677112579346, + "learning_rate": 8.623618090452262e-06, + "loss": 0.0181, + "step": 14200 + }, + { + "epoch": 7.17, + "grad_norm": 1.4045592546463013, + "learning_rate": 8.621105527638192e-06, + "loss": 0.0181, + "step": 14225 + }, + { + "epoch": 7.18, + "grad_norm": 1.764906406402588, + "learning_rate": 8.618592964824121e-06, + "loss": 0.0179, + "step": 14250 + }, + { + "epoch": 7.19, + "grad_norm": 1.443061351776123, + "learning_rate": 8.61608040201005e-06, + "loss": 0.0174, + "step": 14275 + }, + { + "epoch": 7.2, + "grad_norm": 1.4834390878677368, + "learning_rate": 8.613567839195981e-06, + "loss": 0.0178, + "step": 14300 + }, + { + "epoch": 7.22, + "grad_norm": 1.8098642826080322, + "learning_rate": 8.61105527638191e-06, + "loss": 0.0185, + "step": 14325 + }, + { + "epoch": 7.23, + "grad_norm": 1.3767249584197998, + "learning_rate": 8.60854271356784e-06, + "loss": 0.0181, + "step": 14350 + }, + { + "epoch": 7.24, + "grad_norm": 1.7370936870574951, + "learning_rate": 8.60603015075377e-06, + "loss": 0.0185, + "step": 14375 + }, + { + "epoch": 7.25, + "grad_norm": 2.194735527038574, + "learning_rate": 8.603517587939699e-06, + "loss": 0.0184, + "step": 14400 + }, + { + "epoch": 7.27, + "grad_norm": 1.8937729597091675, + "learning_rate": 8.601005025125628e-06, + "loss": 0.0166, + "step": 14425 + }, + { + "epoch": 7.28, + "grad_norm": 1.909907579421997, + "learning_rate": 8.598592964824121e-06, + "loss": 0.0176, + "step": 14450 + }, + { + "epoch": 7.29, + "grad_norm": 2.064690589904785, + "learning_rate": 8.59608040201005e-06, + "loss": 0.0167, + "step": 14475 + }, + { + "epoch": 7.3, + "grad_norm": 1.727542757987976, + "learning_rate": 8.593567839195981e-06, + "loss": 0.0194, + "step": 14500 + }, + { + "epoch": 7.32, + "grad_norm": 1.5077255964279175, + "learning_rate": 8.591055276381909e-06, + "loss": 0.0169, + "step": 14525 + }, + { + "epoch": 7.33, + "grad_norm": 2.0274789333343506, + "learning_rate": 8.58854271356784e-06, + "loss": 0.018, + "step": 14550 + }, + { + "epoch": 7.34, + "grad_norm": 1.509049892425537, + "learning_rate": 8.58603015075377e-06, + "loss": 0.0191, + "step": 14575 + }, + { + "epoch": 7.36, + "grad_norm": 2.3095312118530273, + "learning_rate": 8.583517587939699e-06, + "loss": 0.0174, + "step": 14600 + }, + { + "epoch": 7.37, + "grad_norm": 1.9731371402740479, + "learning_rate": 8.58100502512563e-06, + "loss": 0.018, + "step": 14625 + }, + { + "epoch": 7.38, + "grad_norm": 2.087909698486328, + "learning_rate": 8.578492462311559e-06, + "loss": 0.0203, + "step": 14650 + }, + { + "epoch": 7.39, + "grad_norm": 2.070875883102417, + "learning_rate": 8.575979899497488e-06, + "loss": 0.0181, + "step": 14675 + }, + { + "epoch": 7.41, + "grad_norm": 2.3846261501312256, + "learning_rate": 8.573467336683418e-06, + "loss": 0.0199, + "step": 14700 + }, + { + "epoch": 7.42, + "grad_norm": 2.822178602218628, + "learning_rate": 8.570954773869347e-06, + "loss": 0.0185, + "step": 14725 + }, + { + "epoch": 7.43, + "grad_norm": 2.13089656829834, + "learning_rate": 8.568442211055276e-06, + "loss": 0.0197, + "step": 14750 + }, + { + "epoch": 7.44, + "grad_norm": 1.9057590961456299, + "learning_rate": 8.565929648241207e-06, + "loss": 0.0182, + "step": 14775 + }, + { + "epoch": 7.46, + "grad_norm": 2.1908466815948486, + "learning_rate": 8.563417085427135e-06, + "loss": 0.02, + "step": 14800 + }, + { + "epoch": 7.47, + "grad_norm": 1.6239697933197021, + "learning_rate": 8.560904522613066e-06, + "loss": 0.0172, + "step": 14825 + }, + { + "epoch": 7.48, + "grad_norm": 2.3834826946258545, + "learning_rate": 8.558391959798995e-06, + "loss": 0.0185, + "step": 14850 + }, + { + "epoch": 7.49, + "grad_norm": 1.6830062866210938, + "learning_rate": 8.555879396984925e-06, + "loss": 0.0193, + "step": 14875 + }, + { + "epoch": 7.51, + "grad_norm": 2.089116334915161, + "learning_rate": 8.553366834170856e-06, + "loss": 0.02, + "step": 14900 + }, + { + "epoch": 7.52, + "grad_norm": 1.944875717163086, + "learning_rate": 8.550854271356785e-06, + "loss": 0.0187, + "step": 14925 + }, + { + "epoch": 7.53, + "grad_norm": 2.723844051361084, + "learning_rate": 8.548341708542714e-06, + "loss": 0.0176, + "step": 14950 + }, + { + "epoch": 7.54, + "grad_norm": 2.24113130569458, + "learning_rate": 8.545829145728644e-06, + "loss": 0.0211, + "step": 14975 + }, + { + "epoch": 7.56, + "grad_norm": 1.9414299726486206, + "learning_rate": 8.543316582914573e-06, + "loss": 0.0195, + "step": 15000 + }, + { + "epoch": 7.56, + "eval_loss": 0.14382417500019073, + "eval_runtime": 682.3305, + "eval_samples_per_second": 2.031, + "eval_steps_per_second": 2.031, + "eval_wer": 22.205544764259226, + "step": 15000 + }, + { + "epoch": 7.57, + "grad_norm": 2.293396472930908, + "learning_rate": 8.540804020100502e-06, + "loss": 0.019, + "step": 15025 + }, + { + "epoch": 7.58, + "grad_norm": 1.806918740272522, + "learning_rate": 8.538291457286433e-06, + "loss": 0.0166, + "step": 15050 + }, + { + "epoch": 7.59, + "grad_norm": 2.0897481441497803, + "learning_rate": 8.535778894472363e-06, + "loss": 0.0226, + "step": 15075 + }, + { + "epoch": 7.61, + "grad_norm": 2.1969363689422607, + "learning_rate": 8.533266331658292e-06, + "loss": 0.0199, + "step": 15100 + }, + { + "epoch": 7.62, + "grad_norm": 1.9305884838104248, + "learning_rate": 8.530753768844221e-06, + "loss": 0.0196, + "step": 15125 + }, + { + "epoch": 7.63, + "grad_norm": 2.255046844482422, + "learning_rate": 8.52824120603015e-06, + "loss": 0.0187, + "step": 15150 + }, + { + "epoch": 7.64, + "grad_norm": 2.4118850231170654, + "learning_rate": 8.525728643216082e-06, + "loss": 0.0185, + "step": 15175 + }, + { + "epoch": 7.66, + "grad_norm": 1.7440098524093628, + "learning_rate": 8.523216080402011e-06, + "loss": 0.0167, + "step": 15200 + }, + { + "epoch": 7.67, + "grad_norm": 2.088980197906494, + "learning_rate": 8.52070351758794e-06, + "loss": 0.0195, + "step": 15225 + }, + { + "epoch": 7.68, + "grad_norm": 2.2280168533325195, + "learning_rate": 8.518190954773871e-06, + "loss": 0.0174, + "step": 15250 + }, + { + "epoch": 7.7, + "grad_norm": 1.9718493223190308, + "learning_rate": 8.515678391959799e-06, + "loss": 0.0175, + "step": 15275 + }, + { + "epoch": 7.71, + "grad_norm": 1.8726997375488281, + "learning_rate": 8.51316582914573e-06, + "loss": 0.0185, + "step": 15300 + }, + { + "epoch": 7.72, + "grad_norm": 1.6222255229949951, + "learning_rate": 8.51065326633166e-06, + "loss": 0.0178, + "step": 15325 + }, + { + "epoch": 7.73, + "grad_norm": 1.9714434146881104, + "learning_rate": 8.508140703517589e-06, + "loss": 0.019, + "step": 15350 + }, + { + "epoch": 7.75, + "grad_norm": 2.765275716781616, + "learning_rate": 8.505628140703518e-06, + "loss": 0.0192, + "step": 15375 + }, + { + "epoch": 7.76, + "grad_norm": 1.8899182081222534, + "learning_rate": 8.503115577889447e-06, + "loss": 0.0164, + "step": 15400 + }, + { + "epoch": 7.77, + "grad_norm": 1.739270806312561, + "learning_rate": 8.500603015075377e-06, + "loss": 0.0161, + "step": 15425 + }, + { + "epoch": 7.78, + "grad_norm": 1.877384901046753, + "learning_rate": 8.498090452261308e-06, + "loss": 0.0181, + "step": 15450 + }, + { + "epoch": 7.8, + "grad_norm": 1.7370879650115967, + "learning_rate": 8.495577889447237e-06, + "loss": 0.0186, + "step": 15475 + }, + { + "epoch": 7.81, + "grad_norm": 2.162912368774414, + "learning_rate": 8.493065326633166e-06, + "loss": 0.0202, + "step": 15500 + }, + { + "epoch": 7.82, + "grad_norm": 1.857187271118164, + "learning_rate": 8.490552763819097e-06, + "loss": 0.0182, + "step": 15525 + }, + { + "epoch": 7.83, + "grad_norm": 1.929121494293213, + "learning_rate": 8.488140703517589e-06, + "loss": 0.0182, + "step": 15550 + }, + { + "epoch": 7.85, + "grad_norm": 2.038764715194702, + "learning_rate": 8.485628140703518e-06, + "loss": 0.0171, + "step": 15575 + }, + { + "epoch": 7.86, + "grad_norm": 1.9241057634353638, + "learning_rate": 8.483115577889447e-06, + "loss": 0.0193, + "step": 15600 + }, + { + "epoch": 7.87, + "grad_norm": 1.7794278860092163, + "learning_rate": 8.480603015075377e-06, + "loss": 0.019, + "step": 15625 + }, + { + "epoch": 7.88, + "grad_norm": 2.1196956634521484, + "learning_rate": 8.478090452261308e-06, + "loss": 0.0195, + "step": 15650 + }, + { + "epoch": 7.9, + "grad_norm": 1.9600090980529785, + "learning_rate": 8.475577889447237e-06, + "loss": 0.0172, + "step": 15675 + }, + { + "epoch": 7.91, + "grad_norm": 1.8310644626617432, + "learning_rate": 8.473065326633166e-06, + "loss": 0.0194, + "step": 15700 + }, + { + "epoch": 7.92, + "grad_norm": 2.5749664306640625, + "learning_rate": 8.470552763819096e-06, + "loss": 0.0203, + "step": 15725 + }, + { + "epoch": 7.93, + "grad_norm": 2.157122850418091, + "learning_rate": 8.468040201005025e-06, + "loss": 0.0184, + "step": 15750 + }, + { + "epoch": 7.95, + "grad_norm": 1.9399750232696533, + "learning_rate": 8.465527638190956e-06, + "loss": 0.0185, + "step": 15775 + }, + { + "epoch": 7.96, + "grad_norm": 2.4133381843566895, + "learning_rate": 8.463015075376885e-06, + "loss": 0.0187, + "step": 15800 + }, + { + "epoch": 7.97, + "grad_norm": 1.9787875413894653, + "learning_rate": 8.460502512562815e-06, + "loss": 0.0211, + "step": 15825 + }, + { + "epoch": 7.98, + "grad_norm": 2.32405161857605, + "learning_rate": 8.457989949748744e-06, + "loss": 0.0169, + "step": 15850 + }, + { + "epoch": 8.0, + "grad_norm": 1.9858806133270264, + "learning_rate": 8.455477386934673e-06, + "loss": 0.0178, + "step": 15875 + }, + { + "epoch": 8.01, + "grad_norm": 1.2623140811920166, + "learning_rate": 8.452964824120604e-06, + "loss": 0.0111, + "step": 15900 + }, + { + "epoch": 8.02, + "grad_norm": 1.3176425695419312, + "learning_rate": 8.450452261306534e-06, + "loss": 0.0091, + "step": 15925 + }, + { + "epoch": 8.04, + "grad_norm": 1.4442695379257202, + "learning_rate": 8.447939698492463e-06, + "loss": 0.0102, + "step": 15950 + }, + { + "epoch": 8.05, + "grad_norm": 1.3516316413879395, + "learning_rate": 8.445427135678392e-06, + "loss": 0.0133, + "step": 15975 + }, + { + "epoch": 8.06, + "grad_norm": 1.636526107788086, + "learning_rate": 8.442914572864322e-06, + "loss": 0.0119, + "step": 16000 + }, + { + "epoch": 8.06, + "eval_loss": 0.15012772381305695, + "eval_runtime": 684.4267, + "eval_samples_per_second": 2.025, + "eval_steps_per_second": 2.025, + "eval_wer": 22.42627582553417, + "step": 16000 + }, + { + "epoch": 8.07, + "grad_norm": 1.9799280166625977, + "learning_rate": 8.440402010050251e-06, + "loss": 0.0117, + "step": 16025 + }, + { + "epoch": 8.09, + "grad_norm": 2.0195772647857666, + "learning_rate": 8.437889447236182e-06, + "loss": 0.0096, + "step": 16050 + }, + { + "epoch": 8.1, + "grad_norm": 1.437741994857788, + "learning_rate": 8.435376884422111e-06, + "loss": 0.0109, + "step": 16075 + }, + { + "epoch": 8.11, + "grad_norm": 1.7432310581207275, + "learning_rate": 8.43286432160804e-06, + "loss": 0.0125, + "step": 16100 + }, + { + "epoch": 8.12, + "grad_norm": 1.5175271034240723, + "learning_rate": 8.430351758793972e-06, + "loss": 0.0129, + "step": 16125 + }, + { + "epoch": 8.14, + "grad_norm": 2.0003697872161865, + "learning_rate": 8.4278391959799e-06, + "loss": 0.0112, + "step": 16150 + }, + { + "epoch": 8.15, + "grad_norm": 1.8804725408554077, + "learning_rate": 8.42532663316583e-06, + "loss": 0.011, + "step": 16175 + }, + { + "epoch": 8.16, + "grad_norm": 1.5051791667938232, + "learning_rate": 8.42281407035176e-06, + "loss": 0.0124, + "step": 16200 + }, + { + "epoch": 8.17, + "grad_norm": 1.6766929626464844, + "learning_rate": 8.420301507537689e-06, + "loss": 0.0096, + "step": 16225 + }, + { + "epoch": 8.19, + "grad_norm": 1.5148377418518066, + "learning_rate": 8.417788944723618e-06, + "loss": 0.0119, + "step": 16250 + }, + { + "epoch": 8.2, + "grad_norm": 1.2489933967590332, + "learning_rate": 8.415276381909548e-06, + "loss": 0.0122, + "step": 16275 + }, + { + "epoch": 8.21, + "grad_norm": 1.851216197013855, + "learning_rate": 8.412763819095479e-06, + "loss": 0.0118, + "step": 16300 + }, + { + "epoch": 8.22, + "grad_norm": 1.6621123552322388, + "learning_rate": 8.410251256281408e-06, + "loss": 0.0118, + "step": 16325 + }, + { + "epoch": 8.24, + "grad_norm": 1.4307650327682495, + "learning_rate": 8.407738693467337e-06, + "loss": 0.0125, + "step": 16350 + }, + { + "epoch": 8.25, + "grad_norm": 1.8581053018569946, + "learning_rate": 8.405226130653267e-06, + "loss": 0.0115, + "step": 16375 + }, + { + "epoch": 8.26, + "grad_norm": 1.6031956672668457, + "learning_rate": 8.402713567839198e-06, + "loss": 0.0122, + "step": 16400 + }, + { + "epoch": 8.27, + "grad_norm": 1.7064120769500732, + "learning_rate": 8.400201005025125e-06, + "loss": 0.0113, + "step": 16425 + }, + { + "epoch": 8.29, + "grad_norm": 1.7176445722579956, + "learning_rate": 8.397688442211056e-06, + "loss": 0.012, + "step": 16450 + }, + { + "epoch": 8.3, + "grad_norm": 1.4885127544403076, + "learning_rate": 8.395175879396986e-06, + "loss": 0.0126, + "step": 16475 + }, + { + "epoch": 8.31, + "grad_norm": 1.8216053247451782, + "learning_rate": 8.392663316582915e-06, + "loss": 0.0131, + "step": 16500 + }, + { + "epoch": 8.32, + "grad_norm": 1.3706022500991821, + "learning_rate": 8.390150753768846e-06, + "loss": 0.0112, + "step": 16525 + }, + { + "epoch": 8.34, + "grad_norm": 1.3573179244995117, + "learning_rate": 8.387638190954774e-06, + "loss": 0.0119, + "step": 16550 + }, + { + "epoch": 8.35, + "grad_norm": 1.3211928606033325, + "learning_rate": 8.385125628140705e-06, + "loss": 0.0127, + "step": 16575 + }, + { + "epoch": 8.36, + "grad_norm": 1.2396631240844727, + "learning_rate": 8.382613065326634e-06, + "loss": 0.0117, + "step": 16600 + }, + { + "epoch": 8.38, + "grad_norm": 1.8558140993118286, + "learning_rate": 8.380100502512563e-06, + "loss": 0.0118, + "step": 16625 + }, + { + "epoch": 8.39, + "grad_norm": 1.743930459022522, + "learning_rate": 8.377587939698493e-06, + "loss": 0.0112, + "step": 16650 + }, + { + "epoch": 8.4, + "grad_norm": 1.9935976266860962, + "learning_rate": 8.375075376884424e-06, + "loss": 0.0122, + "step": 16675 + }, + { + "epoch": 8.41, + "grad_norm": 1.5850768089294434, + "learning_rate": 8.372562814070353e-06, + "loss": 0.0126, + "step": 16700 + }, + { + "epoch": 8.43, + "grad_norm": 2.4433388710021973, + "learning_rate": 8.370050251256282e-06, + "loss": 0.011, + "step": 16725 + }, + { + "epoch": 8.44, + "grad_norm": 1.5024391412734985, + "learning_rate": 8.367537688442212e-06, + "loss": 0.0128, + "step": 16750 + }, + { + "epoch": 8.45, + "grad_norm": 1.3797205686569214, + "learning_rate": 8.365025125628141e-06, + "loss": 0.013, + "step": 16775 + }, + { + "epoch": 8.46, + "grad_norm": 1.7880699634552002, + "learning_rate": 8.362512562814072e-06, + "loss": 0.0128, + "step": 16800 + }, + { + "epoch": 8.48, + "grad_norm": 1.6733183860778809, + "learning_rate": 8.36e-06, + "loss": 0.012, + "step": 16825 + }, + { + "epoch": 8.49, + "grad_norm": 1.9680142402648926, + "learning_rate": 8.35748743718593e-06, + "loss": 0.0117, + "step": 16850 + }, + { + "epoch": 8.5, + "grad_norm": 1.5728721618652344, + "learning_rate": 8.35497487437186e-06, + "loss": 0.012, + "step": 16875 + }, + { + "epoch": 8.51, + "grad_norm": 1.7440322637557983, + "learning_rate": 8.35246231155779e-06, + "loss": 0.0142, + "step": 16900 + }, + { + "epoch": 8.53, + "grad_norm": 1.4092472791671753, + "learning_rate": 8.34994974874372e-06, + "loss": 0.0131, + "step": 16925 + }, + { + "epoch": 8.54, + "grad_norm": 1.3555556535720825, + "learning_rate": 8.34743718592965e-06, + "loss": 0.0127, + "step": 16950 + }, + { + "epoch": 8.55, + "grad_norm": 1.9369627237319946, + "learning_rate": 8.344924623115579e-06, + "loss": 0.0119, + "step": 16975 + }, + { + "epoch": 8.56, + "grad_norm": 2.0047218799591064, + "learning_rate": 8.342412060301508e-06, + "loss": 0.0121, + "step": 17000 + }, + { + "epoch": 8.56, + "eval_loss": 0.15533125400543213, + "eval_runtime": 678.4481, + "eval_samples_per_second": 2.043, + "eval_steps_per_second": 2.043, + "eval_wer": 22.61168991700512, + "step": 17000 + }, + { + "epoch": 8.58, + "grad_norm": 1.6859157085418701, + "learning_rate": 8.339899497487438e-06, + "loss": 0.0117, + "step": 17025 + }, + { + "epoch": 8.59, + "grad_norm": 2.0076897144317627, + "learning_rate": 8.337386934673367e-06, + "loss": 0.0125, + "step": 17050 + }, + { + "epoch": 8.6, + "grad_norm": 1.5884569883346558, + "learning_rate": 8.334874371859298e-06, + "loss": 0.0118, + "step": 17075 + }, + { + "epoch": 8.61, + "grad_norm": 1.7873048782348633, + "learning_rate": 8.332361809045226e-06, + "loss": 0.0115, + "step": 17100 + }, + { + "epoch": 8.63, + "grad_norm": 2.042132616043091, + "learning_rate": 8.329849246231157e-06, + "loss": 0.0149, + "step": 17125 + }, + { + "epoch": 8.64, + "grad_norm": 1.9313766956329346, + "learning_rate": 8.327336683417086e-06, + "loss": 0.0134, + "step": 17150 + }, + { + "epoch": 8.65, + "grad_norm": 2.2513680458068848, + "learning_rate": 8.324824120603015e-06, + "loss": 0.0123, + "step": 17175 + }, + { + "epoch": 8.66, + "grad_norm": 1.3925609588623047, + "learning_rate": 8.322311557788946e-06, + "loss": 0.012, + "step": 17200 + }, + { + "epoch": 8.68, + "grad_norm": 1.567494511604309, + "learning_rate": 8.319798994974876e-06, + "loss": 0.012, + "step": 17225 + }, + { + "epoch": 8.69, + "grad_norm": 2.1530673503875732, + "learning_rate": 8.317286432160805e-06, + "loss": 0.0126, + "step": 17250 + }, + { + "epoch": 8.7, + "grad_norm": 2.3671629428863525, + "learning_rate": 8.314773869346734e-06, + "loss": 0.0118, + "step": 17275 + }, + { + "epoch": 8.72, + "grad_norm": 1.7874821424484253, + "learning_rate": 8.312261306532663e-06, + "loss": 0.0131, + "step": 17300 + }, + { + "epoch": 8.73, + "grad_norm": 1.90727698802948, + "learning_rate": 8.309748743718595e-06, + "loss": 0.0127, + "step": 17325 + }, + { + "epoch": 8.74, + "grad_norm": 1.7863373756408691, + "learning_rate": 8.307236180904524e-06, + "loss": 0.0124, + "step": 17350 + }, + { + "epoch": 8.75, + "grad_norm": 1.4252421855926514, + "learning_rate": 8.304723618090453e-06, + "loss": 0.0118, + "step": 17375 + }, + { + "epoch": 8.77, + "grad_norm": 1.6922516822814941, + "learning_rate": 8.302211055276382e-06, + "loss": 0.0131, + "step": 17400 + }, + { + "epoch": 8.78, + "grad_norm": 2.4107935428619385, + "learning_rate": 8.299698492462312e-06, + "loss": 0.0113, + "step": 17425 + }, + { + "epoch": 8.79, + "grad_norm": 1.967113971710205, + "learning_rate": 8.297185929648241e-06, + "loss": 0.0118, + "step": 17450 + }, + { + "epoch": 8.8, + "grad_norm": 2.036146402359009, + "learning_rate": 8.294673366834172e-06, + "loss": 0.0127, + "step": 17475 + }, + { + "epoch": 8.82, + "grad_norm": 1.7822296619415283, + "learning_rate": 8.292160804020101e-06, + "loss": 0.0127, + "step": 17500 + }, + { + "epoch": 8.83, + "grad_norm": 1.6727118492126465, + "learning_rate": 8.28964824120603e-06, + "loss": 0.0155, + "step": 17525 + }, + { + "epoch": 8.84, + "grad_norm": 1.797520399093628, + "learning_rate": 8.287135678391962e-06, + "loss": 0.0131, + "step": 17550 + }, + { + "epoch": 8.85, + "grad_norm": 1.868937373161316, + "learning_rate": 8.28462311557789e-06, + "loss": 0.0115, + "step": 17575 + }, + { + "epoch": 8.87, + "grad_norm": 1.9952425956726074, + "learning_rate": 8.28211055276382e-06, + "loss": 0.013, + "step": 17600 + }, + { + "epoch": 8.88, + "grad_norm": 1.518673062324524, + "learning_rate": 8.27959798994975e-06, + "loss": 0.0128, + "step": 17625 + }, + { + "epoch": 8.89, + "grad_norm": 1.8499948978424072, + "learning_rate": 8.277085427135679e-06, + "loss": 0.0123, + "step": 17650 + }, + { + "epoch": 8.9, + "grad_norm": 1.3737576007843018, + "learning_rate": 8.274572864321608e-06, + "loss": 0.0128, + "step": 17675 + }, + { + "epoch": 8.92, + "grad_norm": 1.9520583152770996, + "learning_rate": 8.272060301507538e-06, + "loss": 0.0139, + "step": 17700 + }, + { + "epoch": 8.93, + "grad_norm": 2.0861010551452637, + "learning_rate": 8.269547738693467e-06, + "loss": 0.012, + "step": 17725 + }, + { + "epoch": 8.94, + "grad_norm": 1.6822078227996826, + "learning_rate": 8.267035175879398e-06, + "loss": 0.0122, + "step": 17750 + }, + { + "epoch": 8.95, + "grad_norm": 1.8324058055877686, + "learning_rate": 8.264522613065327e-06, + "loss": 0.0137, + "step": 17775 + }, + { + "epoch": 8.97, + "grad_norm": 2.3645708560943604, + "learning_rate": 8.262010050251257e-06, + "loss": 0.0132, + "step": 17800 + }, + { + "epoch": 8.98, + "grad_norm": 1.782631754875183, + "learning_rate": 8.259497487437188e-06, + "loss": 0.0138, + "step": 17825 + }, + { + "epoch": 8.99, + "grad_norm": NaN, + "learning_rate": 8.25708542713568e-06, + "loss": 0.0139, + "step": 17850 + }, + { + "epoch": 9.01, + "grad_norm": 1.1217823028564453, + "learning_rate": 8.254572864321609e-06, + "loss": 0.0109, + "step": 17875 + }, + { + "epoch": 9.02, + "grad_norm": 1.0531671047210693, + "learning_rate": 8.252060301507538e-06, + "loss": 0.0069, + "step": 17900 + }, + { + "epoch": 9.03, + "grad_norm": 1.4888278245925903, + "learning_rate": 8.249547738693467e-06, + "loss": 0.0071, + "step": 17925 + }, + { + "epoch": 9.04, + "grad_norm": 1.1788146495819092, + "learning_rate": 8.247035175879398e-06, + "loss": 0.0084, + "step": 17950 + }, + { + "epoch": 9.06, + "grad_norm": 1.8617186546325684, + "learning_rate": 8.244522613065328e-06, + "loss": 0.0093, + "step": 17975 + }, + { + "epoch": 9.07, + "grad_norm": 1.6521105766296387, + "learning_rate": 8.242010050251257e-06, + "loss": 0.007, + "step": 18000 + }, + { + "epoch": 9.07, + "eval_loss": 0.16167142987251282, + "eval_runtime": 678.9847, + "eval_samples_per_second": 2.041, + "eval_steps_per_second": 2.041, + "eval_wer": 22.285007946318206, + "step": 18000 + }, + { + "epoch": 9.08, + "grad_norm": 1.9960633516311646, + "learning_rate": 8.239497487437186e-06, + "loss": 0.0079, + "step": 18025 + }, + { + "epoch": 9.09, + "grad_norm": 1.9460841417312622, + "learning_rate": 8.236984924623116e-06, + "loss": 0.0075, + "step": 18050 + }, + { + "epoch": 9.11, + "grad_norm": 1.6646573543548584, + "learning_rate": 8.234472361809047e-06, + "loss": 0.0074, + "step": 18075 + }, + { + "epoch": 9.12, + "grad_norm": 1.9850512742996216, + "learning_rate": 8.231959798994976e-06, + "loss": 0.0077, + "step": 18100 + }, + { + "epoch": 9.13, + "grad_norm": 0.8765420317649841, + "learning_rate": 8.229447236180905e-06, + "loss": 0.0076, + "step": 18125 + }, + { + "epoch": 9.14, + "grad_norm": 1.7315075397491455, + "learning_rate": 8.226934673366835e-06, + "loss": 0.0084, + "step": 18150 + }, + { + "epoch": 9.16, + "grad_norm": 1.7411330938339233, + "learning_rate": 8.224422110552764e-06, + "loss": 0.0069, + "step": 18175 + }, + { + "epoch": 9.17, + "grad_norm": 1.2183438539505005, + "learning_rate": 8.221909547738695e-06, + "loss": 0.0081, + "step": 18200 + }, + { + "epoch": 9.18, + "grad_norm": 1.7440475225448608, + "learning_rate": 8.219396984924624e-06, + "loss": 0.0089, + "step": 18225 + }, + { + "epoch": 9.19, + "grad_norm": 1.1629337072372437, + "learning_rate": 8.216884422110554e-06, + "loss": 0.0075, + "step": 18250 + }, + { + "epoch": 9.21, + "grad_norm": 1.1175649166107178, + "learning_rate": 8.214371859296483e-06, + "loss": 0.0074, + "step": 18275 + }, + { + "epoch": 9.22, + "grad_norm": 1.3962807655334473, + "learning_rate": 8.211859296482412e-06, + "loss": 0.0081, + "step": 18300 + }, + { + "epoch": 9.23, + "grad_norm": 1.1457552909851074, + "learning_rate": 8.209346733668342e-06, + "loss": 0.0085, + "step": 18325 + }, + { + "epoch": 9.24, + "grad_norm": 1.730384111404419, + "learning_rate": 8.206834170854273e-06, + "loss": 0.008, + "step": 18350 + }, + { + "epoch": 9.26, + "grad_norm": 1.4565281867980957, + "learning_rate": 8.204321608040202e-06, + "loss": 0.0084, + "step": 18375 + }, + { + "epoch": 9.27, + "grad_norm": 1.8538554906845093, + "learning_rate": 8.201809045226131e-06, + "loss": 0.0083, + "step": 18400 + }, + { + "epoch": 9.28, + "grad_norm": 1.8056268692016602, + "learning_rate": 8.19929648241206e-06, + "loss": 0.0074, + "step": 18425 + }, + { + "epoch": 9.29, + "grad_norm": 1.5125994682312012, + "learning_rate": 8.19678391959799e-06, + "loss": 0.0086, + "step": 18450 + }, + { + "epoch": 9.31, + "grad_norm": 1.7354553937911987, + "learning_rate": 8.194271356783921e-06, + "loss": 0.0089, + "step": 18475 + }, + { + "epoch": 9.32, + "grad_norm": 1.3382068872451782, + "learning_rate": 8.19175879396985e-06, + "loss": 0.0084, + "step": 18500 + }, + { + "epoch": 9.33, + "grad_norm": 1.3492069244384766, + "learning_rate": 8.18924623115578e-06, + "loss": 0.0074, + "step": 18525 + }, + { + "epoch": 9.35, + "grad_norm": 1.4362231492996216, + "learning_rate": 8.186733668341709e-06, + "loss": 0.0076, + "step": 18550 + }, + { + "epoch": 9.36, + "grad_norm": 1.6045366525650024, + "learning_rate": 8.184221105527638e-06, + "loss": 0.0082, + "step": 18575 + }, + { + "epoch": 9.37, + "grad_norm": 1.9008662700653076, + "learning_rate": 8.18170854271357e-06, + "loss": 0.0079, + "step": 18600 + }, + { + "epoch": 9.38, + "grad_norm": 1.4533487558364868, + "learning_rate": 8.179195979899498e-06, + "loss": 0.0078, + "step": 18625 + }, + { + "epoch": 9.4, + "grad_norm": 2.0346322059631348, + "learning_rate": 8.176683417085428e-06, + "loss": 0.0102, + "step": 18650 + }, + { + "epoch": 9.41, + "grad_norm": 1.469119668006897, + "learning_rate": 8.174170854271357e-06, + "loss": 0.0083, + "step": 18675 + }, + { + "epoch": 9.42, + "grad_norm": 1.5073950290679932, + "learning_rate": 8.171658291457286e-06, + "loss": 0.009, + "step": 18700 + }, + { + "epoch": 9.43, + "grad_norm": 1.6956956386566162, + "learning_rate": 8.169145728643216e-06, + "loss": 0.0084, + "step": 18725 + }, + { + "epoch": 9.45, + "grad_norm": 1.1238776445388794, + "learning_rate": 8.166633165829147e-06, + "loss": 0.0097, + "step": 18750 + }, + { + "epoch": 9.46, + "grad_norm": 1.440139889717102, + "learning_rate": 8.164120603015076e-06, + "loss": 0.0084, + "step": 18775 + }, + { + "epoch": 9.47, + "grad_norm": 1.5242620706558228, + "learning_rate": 8.161608040201005e-06, + "loss": 0.0088, + "step": 18800 + }, + { + "epoch": 9.48, + "grad_norm": 1.151222586631775, + "learning_rate": 8.159095477386936e-06, + "loss": 0.0076, + "step": 18825 + }, + { + "epoch": 9.5, + "grad_norm": 2.4563887119293213, + "learning_rate": 8.156582914572864e-06, + "loss": 0.0082, + "step": 18850 + }, + { + "epoch": 9.51, + "grad_norm": 1.9098182916641235, + "learning_rate": 8.154070351758795e-06, + "loss": 0.0095, + "step": 18875 + }, + { + "epoch": 9.52, + "grad_norm": 1.2939962148666382, + "learning_rate": 8.151557788944724e-06, + "loss": 0.0087, + "step": 18900 + }, + { + "epoch": 9.53, + "grad_norm": 1.7203131914138794, + "learning_rate": 8.149045226130654e-06, + "loss": 0.0083, + "step": 18925 + }, + { + "epoch": 9.55, + "grad_norm": 1.6244869232177734, + "learning_rate": 8.146532663316583e-06, + "loss": 0.0075, + "step": 18950 + }, + { + "epoch": 9.56, + "grad_norm": 1.9188730716705322, + "learning_rate": 8.144020100502512e-06, + "loss": 0.0088, + "step": 18975 + }, + { + "epoch": 9.57, + "grad_norm": 1.2826545238494873, + "learning_rate": 8.141507537688443e-06, + "loss": 0.0093, + "step": 19000 + }, + { + "epoch": 9.57, + "eval_loss": 0.1707531213760376, + "eval_runtime": 676.3601, + "eval_samples_per_second": 2.049, + "eval_steps_per_second": 2.049, + "eval_wer": 22.84125022073106, + "step": 19000 + } + ], + "logging_steps": 25, + "max_steps": 100000, + "num_input_tokens_seen": 0, + "num_train_epochs": 51, + "save_steps": 1000, + "total_flos": 5.915231649792e+19, + "train_batch_size": 48, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/whisper-base/marathi/checkpoint-19000/training_args.bin b/checkpoints/whisper-base/marathi/checkpoint-19000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..913ddd7b4c6b6b3952062a6222069ba3872ed6ba --- /dev/null +++ b/checkpoints/whisper-base/marathi/checkpoint-19000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dab703215b8c593a5e58a42aa59efb7764bdebdc73c59368eaa9b771f3d997b3 +size 4667 diff --git a/checkpoints/whisper-base/telugu/checkpoint-18000/config.json b/checkpoints/whisper-base/telugu/checkpoint-18000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9ef96a42f7d5b56f0729aaeb95882150334bf3fd --- /dev/null +++ b/checkpoints/whisper-base/telugu/checkpoint-18000/config.json @@ -0,0 +1,154 @@ +{ + "_name_or_path": "openai/whisper-base", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 512, + "decoder_attention_heads": 8, + "decoder_ffn_dim": 2048, + "decoder_layerdrop": 0.0, + "decoder_layers": 6, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 8, + "encoder_ffn_dim": 2048, + "encoder_layerdrop": 0.0, + "encoder_layers": 6, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50299 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 6, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "torch_dtype": "float32", + "transformers_version": "4.39.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoints/whisper-base/telugu/checkpoint-18000/generation_config.json b/checkpoints/whisper-base/telugu/checkpoint-18000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a12c5b82ac1e48f22fa79bdad1595064164bc2ab --- /dev/null +++ b/checkpoints/whisper-base/telugu/checkpoint-18000/generation_config.json @@ -0,0 +1,256 @@ +{ + "alignment_heads": [ + [ + 3, + 1 + ], + [ + 4, + 2 + ], + [ + 4, + 3 + ], + [ + 4, + 7 + ], + [ + 5, + 1 + ], + [ + 5, + 2 + ], + [ + 5, + 4 + ], + [ + 5, + 6 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.39.2" +} diff --git a/checkpoints/whisper-base/telugu/checkpoint-18000/model.safetensors b/checkpoints/whisper-base/telugu/checkpoint-18000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4f8196fcec1cb53367d42f3ae7439d6ee554b0b9 --- /dev/null +++ b/checkpoints/whisper-base/telugu/checkpoint-18000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:556ee7deeb0629251459d50a7b51d40104f5b5c2ea06dd40ec54600c447d947d +size 290403936 diff --git a/checkpoints/whisper-base/telugu/checkpoint-18000/optimizer.pt b/checkpoints/whisper-base/telugu/checkpoint-18000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..76f2f130e956631c3690f4da79c3c67155d90768 --- /dev/null +++ b/checkpoints/whisper-base/telugu/checkpoint-18000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd92fde6e1d893ef42c7ea412635d988b2e732ab1dcab2cc1932a53a8d6bf3ac +size 574811077 diff --git a/checkpoints/whisper-base/telugu/checkpoint-18000/preprocessor_config.json b/checkpoints/whisper-base/telugu/checkpoint-18000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoints/whisper-base/telugu/checkpoint-18000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoints/whisper-base/telugu/checkpoint-18000/rng_state.pth b/checkpoints/whisper-base/telugu/checkpoint-18000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5ddf4924fbdf202f077dc7b53616a190f4fd32df --- /dev/null +++ b/checkpoints/whisper-base/telugu/checkpoint-18000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80bb307bb24e07df50b59910636a329e535d036b8c444eeedc0f950ef12f080e +size 14575 diff --git a/checkpoints/whisper-base/telugu/checkpoint-18000/scheduler.pt b/checkpoints/whisper-base/telugu/checkpoint-18000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7d233145708936452ec959c52a2c26620acae7fa --- /dev/null +++ b/checkpoints/whisper-base/telugu/checkpoint-18000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:346606fbfc4456475a7c8e70abef87cf9fdada01185763731429a46bbc45ed57 +size 627 diff --git a/checkpoints/whisper-base/telugu/checkpoint-18000/trainer_state.json b/checkpoints/whisper-base/telugu/checkpoint-18000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d7af54ab5d3f6799cecb0831e04a326e671e945e --- /dev/null +++ b/checkpoints/whisper-base/telugu/checkpoint-18000/trainer_state.json @@ -0,0 +1,5223 @@ +{ + "best_metric": 27.0760336370007, + "best_model_checkpoint": "results/whisper-base/telugu/checkpoint-8000", + "epoch": 9.06801007556675, + "eval_steps": 1000, + "global_step": 18000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "grad_norm": 12.702515602111816, + "learning_rate": 4.800000000000001e-07, + "loss": 2.0476, + "step": 25 + }, + { + "epoch": 0.03, + "grad_norm": 7.611940383911133, + "learning_rate": 9.800000000000001e-07, + "loss": 1.8646, + "step": 50 + }, + { + "epoch": 0.04, + "grad_norm": 5.025814533233643, + "learning_rate": 1.48e-06, + "loss": 1.6637, + "step": 75 + }, + { + "epoch": 0.05, + "grad_norm": 4.979084491729736, + "learning_rate": 1.98e-06, + "loss": 1.5203, + "step": 100 + }, + { + "epoch": 0.06, + "grad_norm": 5.982580184936523, + "learning_rate": 2.4800000000000004e-06, + "loss": 1.4228, + "step": 125 + }, + { + "epoch": 0.08, + "grad_norm": 5.703189849853516, + "learning_rate": 2.9800000000000003e-06, + "loss": 1.3382, + "step": 150 + }, + { + "epoch": 0.09, + "grad_norm": 6.367642879486084, + "learning_rate": 3.48e-06, + "loss": 1.1531, + "step": 175 + }, + { + "epoch": 0.1, + "grad_norm": 5.832076072692871, + "learning_rate": 3.980000000000001e-06, + "loss": 0.8292, + "step": 200 + }, + { + "epoch": 0.11, + "grad_norm": 5.064639091491699, + "learning_rate": 4.48e-06, + "loss": 0.6093, + "step": 225 + }, + { + "epoch": 0.13, + "grad_norm": 3.851181745529175, + "learning_rate": 4.980000000000001e-06, + "loss": 0.4935, + "step": 250 + }, + { + "epoch": 0.14, + "grad_norm": 5.065381050109863, + "learning_rate": 5.480000000000001e-06, + "loss": 0.4316, + "step": 275 + }, + { + "epoch": 0.15, + "grad_norm": 5.137683868408203, + "learning_rate": 5.98e-06, + "loss": 0.3866, + "step": 300 + }, + { + "epoch": 0.16, + "grad_norm": 4.36338996887207, + "learning_rate": 6.480000000000001e-06, + "loss": 0.3597, + "step": 325 + }, + { + "epoch": 0.18, + "grad_norm": 4.078024387359619, + "learning_rate": 6.98e-06, + "loss": 0.3379, + "step": 350 + }, + { + "epoch": 0.19, + "grad_norm": 3.528249502182007, + "learning_rate": 7.48e-06, + "loss": 0.3192, + "step": 375 + }, + { + "epoch": 0.2, + "grad_norm": 3.491966724395752, + "learning_rate": 7.980000000000002e-06, + "loss": 0.3035, + "step": 400 + }, + { + "epoch": 0.21, + "grad_norm": 4.611189365386963, + "learning_rate": 8.48e-06, + "loss": 0.288, + "step": 425 + }, + { + "epoch": 0.23, + "grad_norm": 4.840874195098877, + "learning_rate": 8.98e-06, + "loss": 0.2803, + "step": 450 + }, + { + "epoch": 0.24, + "grad_norm": 4.43154239654541, + "learning_rate": 9.48e-06, + "loss": 0.2668, + "step": 475 + }, + { + "epoch": 0.25, + "grad_norm": 3.9943807125091553, + "learning_rate": 9.980000000000001e-06, + "loss": 0.2605, + "step": 500 + }, + { + "epoch": 0.26, + "grad_norm": 4.532464981079102, + "learning_rate": 9.997587939698492e-06, + "loss": 0.2597, + "step": 525 + }, + { + "epoch": 0.28, + "grad_norm": 3.5199360847473145, + "learning_rate": 9.995075376884423e-06, + "loss": 0.2519, + "step": 550 + }, + { + "epoch": 0.29, + "grad_norm": 3.353322744369507, + "learning_rate": 9.992562814070353e-06, + "loss": 0.2491, + "step": 575 + }, + { + "epoch": 0.3, + "grad_norm": 3.9937353134155273, + "learning_rate": 9.990050251256282e-06, + "loss": 0.2353, + "step": 600 + }, + { + "epoch": 0.31, + "grad_norm": 2.9040470123291016, + "learning_rate": 9.987537688442211e-06, + "loss": 0.237, + "step": 625 + }, + { + "epoch": 0.33, + "grad_norm": 2.9498767852783203, + "learning_rate": 9.985025125628142e-06, + "loss": 0.2274, + "step": 650 + }, + { + "epoch": 0.34, + "grad_norm": 3.0441360473632812, + "learning_rate": 9.98251256281407e-06, + "loss": 0.2233, + "step": 675 + }, + { + "epoch": 0.35, + "grad_norm": 3.1170895099639893, + "learning_rate": 9.980000000000001e-06, + "loss": 0.2215, + "step": 700 + }, + { + "epoch": 0.37, + "grad_norm": 3.9260220527648926, + "learning_rate": 9.97748743718593e-06, + "loss": 0.2084, + "step": 725 + }, + { + "epoch": 0.38, + "grad_norm": 3.2684454917907715, + "learning_rate": 9.97497487437186e-06, + "loss": 0.2177, + "step": 750 + }, + { + "epoch": 0.39, + "grad_norm": 2.700383186340332, + "learning_rate": 9.97246231155779e-06, + "loss": 0.2056, + "step": 775 + }, + { + "epoch": 0.4, + "grad_norm": 3.3448195457458496, + "learning_rate": 9.969949748743718e-06, + "loss": 0.2069, + "step": 800 + }, + { + "epoch": 0.42, + "grad_norm": 2.9168343544006348, + "learning_rate": 9.96743718592965e-06, + "loss": 0.2092, + "step": 825 + }, + { + "epoch": 0.43, + "grad_norm": 2.9108660221099854, + "learning_rate": 9.964924623115579e-06, + "loss": 0.1996, + "step": 850 + }, + { + "epoch": 0.44, + "grad_norm": 2.9813027381896973, + "learning_rate": 9.962412060301508e-06, + "loss": 0.201, + "step": 875 + }, + { + "epoch": 0.45, + "grad_norm": 2.779301166534424, + "learning_rate": 9.959899497487437e-06, + "loss": 0.197, + "step": 900 + }, + { + "epoch": 0.47, + "grad_norm": 2.4988577365875244, + "learning_rate": 9.957386934673368e-06, + "loss": 0.1977, + "step": 925 + }, + { + "epoch": 0.48, + "grad_norm": 2.7132952213287354, + "learning_rate": 9.954874371859298e-06, + "loss": 0.1915, + "step": 950 + }, + { + "epoch": 0.49, + "grad_norm": 2.8792788982391357, + "learning_rate": 9.952361809045227e-06, + "loss": 0.1856, + "step": 975 + }, + { + "epoch": 0.5, + "grad_norm": 2.981616973876953, + "learning_rate": 9.949849246231156e-06, + "loss": 0.1841, + "step": 1000 + }, + { + "epoch": 0.5, + "eval_loss": 0.1132877841591835, + "eval_runtime": 1463.4105, + "eval_samples_per_second": 0.983, + "eval_steps_per_second": 0.983, + "eval_wer": 46.329712683952344, + "step": 1000 + }, + { + "epoch": 0.52, + "grad_norm": 2.684494972229004, + "learning_rate": 9.947336683417086e-06, + "loss": 0.1871, + "step": 1025 + }, + { + "epoch": 0.53, + "grad_norm": 3.1530518531799316, + "learning_rate": 9.944824120603017e-06, + "loss": 0.1855, + "step": 1050 + }, + { + "epoch": 0.54, + "grad_norm": 2.869884729385376, + "learning_rate": 9.942311557788944e-06, + "loss": 0.1821, + "step": 1075 + }, + { + "epoch": 0.55, + "grad_norm": 3.2552778720855713, + "learning_rate": 9.939798994974875e-06, + "loss": 0.1812, + "step": 1100 + }, + { + "epoch": 0.57, + "grad_norm": 2.9390270709991455, + "learning_rate": 9.937286432160805e-06, + "loss": 0.1743, + "step": 1125 + }, + { + "epoch": 0.58, + "grad_norm": 2.3790814876556396, + "learning_rate": 9.934773869346734e-06, + "loss": 0.1717, + "step": 1150 + }, + { + "epoch": 0.59, + "grad_norm": 3.1541974544525146, + "learning_rate": 9.932261306532665e-06, + "loss": 0.1817, + "step": 1175 + }, + { + "epoch": 0.6, + "grad_norm": 2.353652238845825, + "learning_rate": 9.929748743718594e-06, + "loss": 0.175, + "step": 1200 + }, + { + "epoch": 0.62, + "grad_norm": 2.7457966804504395, + "learning_rate": 9.927236180904524e-06, + "loss": 0.1699, + "step": 1225 + }, + { + "epoch": 0.63, + "grad_norm": 2.5585060119628906, + "learning_rate": 9.924723618090453e-06, + "loss": 0.1742, + "step": 1250 + }, + { + "epoch": 0.64, + "grad_norm": 3.5861992835998535, + "learning_rate": 9.922211055276382e-06, + "loss": 0.1752, + "step": 1275 + }, + { + "epoch": 0.65, + "grad_norm": 2.6173129081726074, + "learning_rate": 9.919698492462312e-06, + "loss": 0.1656, + "step": 1300 + }, + { + "epoch": 0.67, + "grad_norm": 2.3428843021392822, + "learning_rate": 9.917185929648243e-06, + "loss": 0.1671, + "step": 1325 + }, + { + "epoch": 0.68, + "grad_norm": 2.9579219818115234, + "learning_rate": 9.914673366834172e-06, + "loss": 0.1696, + "step": 1350 + }, + { + "epoch": 0.69, + "grad_norm": 2.4652836322784424, + "learning_rate": 9.912160804020101e-06, + "loss": 0.1642, + "step": 1375 + }, + { + "epoch": 0.71, + "grad_norm": 2.6490142345428467, + "learning_rate": 9.90964824120603e-06, + "loss": 0.166, + "step": 1400 + }, + { + "epoch": 0.72, + "grad_norm": 2.7419941425323486, + "learning_rate": 9.90713567839196e-06, + "loss": 0.163, + "step": 1425 + }, + { + "epoch": 0.73, + "grad_norm": 2.951556444168091, + "learning_rate": 9.904623115577891e-06, + "loss": 0.1584, + "step": 1450 + }, + { + "epoch": 0.74, + "grad_norm": 2.3820886611938477, + "learning_rate": 9.90211055276382e-06, + "loss": 0.1578, + "step": 1475 + }, + { + "epoch": 0.76, + "grad_norm": 2.412790536880493, + "learning_rate": 9.89959798994975e-06, + "loss": 0.1623, + "step": 1500 + }, + { + "epoch": 0.77, + "grad_norm": 2.4267303943634033, + "learning_rate": 9.897085427135679e-06, + "loss": 0.1616, + "step": 1525 + }, + { + "epoch": 0.78, + "grad_norm": 2.4404618740081787, + "learning_rate": 9.894572864321608e-06, + "loss": 0.1563, + "step": 1550 + }, + { + "epoch": 0.79, + "grad_norm": 2.1398348808288574, + "learning_rate": 9.89206030150754e-06, + "loss": 0.1582, + "step": 1575 + }, + { + "epoch": 0.81, + "grad_norm": 2.2282469272613525, + "learning_rate": 9.889547738693469e-06, + "loss": 0.1558, + "step": 1600 + }, + { + "epoch": 0.82, + "grad_norm": 3.062638998031616, + "learning_rate": 9.887035175879398e-06, + "loss": 0.1554, + "step": 1625 + }, + { + "epoch": 0.83, + "grad_norm": 2.3317370414733887, + "learning_rate": 9.884522613065327e-06, + "loss": 0.1557, + "step": 1650 + }, + { + "epoch": 0.84, + "grad_norm": 2.257711410522461, + "learning_rate": 9.882010050251256e-06, + "loss": 0.1499, + "step": 1675 + }, + { + "epoch": 0.86, + "grad_norm": 2.282040596008301, + "learning_rate": 9.879497487437186e-06, + "loss": 0.1501, + "step": 1700 + }, + { + "epoch": 0.87, + "grad_norm": 2.383516550064087, + "learning_rate": 9.876984924623117e-06, + "loss": 0.1471, + "step": 1725 + }, + { + "epoch": 0.88, + "grad_norm": 2.779592752456665, + "learning_rate": 9.874472361809046e-06, + "loss": 0.1478, + "step": 1750 + }, + { + "epoch": 0.89, + "grad_norm": 2.644237995147705, + "learning_rate": 9.871959798994975e-06, + "loss": 0.1469, + "step": 1775 + }, + { + "epoch": 0.91, + "grad_norm": 2.391300678253174, + "learning_rate": 9.869447236180906e-06, + "loss": 0.1506, + "step": 1800 + }, + { + "epoch": 0.92, + "grad_norm": 2.8232030868530273, + "learning_rate": 9.866934673366834e-06, + "loss": 0.1526, + "step": 1825 + }, + { + "epoch": 0.93, + "grad_norm": 2.7611305713653564, + "learning_rate": 9.864422110552765e-06, + "loss": 0.1457, + "step": 1850 + }, + { + "epoch": 0.94, + "grad_norm": 2.7151498794555664, + "learning_rate": 9.861909547738694e-06, + "loss": 0.1482, + "step": 1875 + }, + { + "epoch": 0.96, + "grad_norm": 2.3955771923065186, + "learning_rate": 9.859396984924624e-06, + "loss": 0.1459, + "step": 1900 + }, + { + "epoch": 0.97, + "grad_norm": 2.276702880859375, + "learning_rate": 9.856884422110553e-06, + "loss": 0.1489, + "step": 1925 + }, + { + "epoch": 0.98, + "grad_norm": 2.40101957321167, + "learning_rate": 9.854371859296482e-06, + "loss": 0.1432, + "step": 1950 + }, + { + "epoch": 0.99, + "grad_norm": 2.29134464263916, + "learning_rate": 9.851859296482413e-06, + "loss": 0.1477, + "step": 1975 + }, + { + "epoch": 1.01, + "grad_norm": 2.223019599914551, + "learning_rate": 9.849346733668343e-06, + "loss": 0.1369, + "step": 2000 + }, + { + "epoch": 1.01, + "eval_loss": 0.08287883549928665, + "eval_runtime": 1212.8629, + "eval_samples_per_second": 1.186, + "eval_steps_per_second": 1.186, + "eval_wer": 34.10126138752628, + "step": 2000 + }, + { + "epoch": 1.02, + "grad_norm": 2.1359541416168213, + "learning_rate": 9.846834170854272e-06, + "loss": 0.1318, + "step": 2025 + }, + { + "epoch": 1.03, + "grad_norm": 2.398839235305786, + "learning_rate": 9.844321608040201e-06, + "loss": 0.1294, + "step": 2050 + }, + { + "epoch": 1.05, + "grad_norm": 2.3866488933563232, + "learning_rate": 9.841809045226132e-06, + "loss": 0.1346, + "step": 2075 + }, + { + "epoch": 1.06, + "grad_norm": 1.8926136493682861, + "learning_rate": 9.83929648241206e-06, + "loss": 0.1306, + "step": 2100 + }, + { + "epoch": 1.07, + "grad_norm": 2.599918842315674, + "learning_rate": 9.836783919597991e-06, + "loss": 0.1298, + "step": 2125 + }, + { + "epoch": 1.08, + "grad_norm": 2.2867398262023926, + "learning_rate": 9.83427135678392e-06, + "loss": 0.1271, + "step": 2150 + }, + { + "epoch": 1.1, + "grad_norm": 2.969252109527588, + "learning_rate": 9.83175879396985e-06, + "loss": 0.1288, + "step": 2175 + }, + { + "epoch": 1.11, + "grad_norm": 2.023502826690674, + "learning_rate": 9.82924623115578e-06, + "loss": 0.1296, + "step": 2200 + }, + { + "epoch": 1.12, + "grad_norm": 2.263474702835083, + "learning_rate": 9.826733668341708e-06, + "loss": 0.1281, + "step": 2225 + }, + { + "epoch": 1.13, + "grad_norm": 2.545093297958374, + "learning_rate": 9.82422110552764e-06, + "loss": 0.1286, + "step": 2250 + }, + { + "epoch": 1.15, + "grad_norm": 2.1411678791046143, + "learning_rate": 9.821708542713569e-06, + "loss": 0.1259, + "step": 2275 + }, + { + "epoch": 1.16, + "grad_norm": 2.099698305130005, + "learning_rate": 9.819195979899498e-06, + "loss": 0.1247, + "step": 2300 + }, + { + "epoch": 1.17, + "grad_norm": 2.27581524848938, + "learning_rate": 9.816683417085427e-06, + "loss": 0.1285, + "step": 2325 + }, + { + "epoch": 1.18, + "grad_norm": 2.4960713386535645, + "learning_rate": 9.814170854271358e-06, + "loss": 0.1271, + "step": 2350 + }, + { + "epoch": 1.2, + "grad_norm": 2.856011152267456, + "learning_rate": 9.811658291457288e-06, + "loss": 0.1276, + "step": 2375 + }, + { + "epoch": 1.21, + "grad_norm": 2.4386849403381348, + "learning_rate": 9.809145728643217e-06, + "loss": 0.1292, + "step": 2400 + }, + { + "epoch": 1.22, + "grad_norm": 2.506118059158325, + "learning_rate": 9.806633165829146e-06, + "loss": 0.1256, + "step": 2425 + }, + { + "epoch": 1.23, + "grad_norm": 2.3423237800598145, + "learning_rate": 9.804120603015076e-06, + "loss": 0.126, + "step": 2450 + }, + { + "epoch": 1.25, + "grad_norm": 2.4900598526000977, + "learning_rate": 9.801608040201007e-06, + "loss": 0.1232, + "step": 2475 + }, + { + "epoch": 1.26, + "grad_norm": 3.063587188720703, + "learning_rate": 9.799095477386934e-06, + "loss": 0.1202, + "step": 2500 + }, + { + "epoch": 1.27, + "grad_norm": 1.970401406288147, + "learning_rate": 9.796582914572865e-06, + "loss": 0.1198, + "step": 2525 + }, + { + "epoch": 1.28, + "grad_norm": 2.1684956550598145, + "learning_rate": 9.794070351758795e-06, + "loss": 0.1213, + "step": 2550 + }, + { + "epoch": 1.3, + "grad_norm": 2.1976563930511475, + "learning_rate": 9.791557788944724e-06, + "loss": 0.1236, + "step": 2575 + }, + { + "epoch": 1.31, + "grad_norm": 2.191643714904785, + "learning_rate": 9.789045226130655e-06, + "loss": 0.1197, + "step": 2600 + }, + { + "epoch": 1.32, + "grad_norm": 2.449549674987793, + "learning_rate": 9.786532663316584e-06, + "loss": 0.1219, + "step": 2625 + }, + { + "epoch": 1.34, + "grad_norm": 2.202345609664917, + "learning_rate": 9.784020100502514e-06, + "loss": 0.1214, + "step": 2650 + }, + { + "epoch": 1.35, + "grad_norm": 2.2722055912017822, + "learning_rate": 9.781507537688443e-06, + "loss": 0.1194, + "step": 2675 + }, + { + "epoch": 1.36, + "grad_norm": 2.258183479309082, + "learning_rate": 9.778994974874372e-06, + "loss": 0.1182, + "step": 2700 + }, + { + "epoch": 1.37, + "grad_norm": 2.007762908935547, + "learning_rate": 9.776482412060302e-06, + "loss": 0.1218, + "step": 2725 + }, + { + "epoch": 1.39, + "grad_norm": 2.2510693073272705, + "learning_rate": 9.773969849246233e-06, + "loss": 0.1169, + "step": 2750 + }, + { + "epoch": 1.4, + "grad_norm": 2.3634448051452637, + "learning_rate": 9.77145728643216e-06, + "loss": 0.1218, + "step": 2775 + }, + { + "epoch": 1.41, + "grad_norm": 1.966257929801941, + "learning_rate": 9.768944723618091e-06, + "loss": 0.1187, + "step": 2800 + }, + { + "epoch": 1.42, + "grad_norm": 2.051515817642212, + "learning_rate": 9.76643216080402e-06, + "loss": 0.1223, + "step": 2825 + }, + { + "epoch": 1.44, + "grad_norm": 2.1640067100524902, + "learning_rate": 9.76391959798995e-06, + "loss": 0.1224, + "step": 2850 + }, + { + "epoch": 1.45, + "grad_norm": 2.2217137813568115, + "learning_rate": 9.761407035175881e-06, + "loss": 0.1186, + "step": 2875 + }, + { + "epoch": 1.46, + "grad_norm": 2.293384313583374, + "learning_rate": 9.75889447236181e-06, + "loss": 0.1162, + "step": 2900 + }, + { + "epoch": 1.47, + "grad_norm": 2.0020222663879395, + "learning_rate": 9.75638190954774e-06, + "loss": 0.1152, + "step": 2925 + }, + { + "epoch": 1.49, + "grad_norm": 2.0081183910369873, + "learning_rate": 9.753869346733669e-06, + "loss": 0.119, + "step": 2950 + }, + { + "epoch": 1.5, + "grad_norm": 2.0339622497558594, + "learning_rate": 9.751356783919598e-06, + "loss": 0.1191, + "step": 2975 + }, + { + "epoch": 1.51, + "grad_norm": 2.0091047286987305, + "learning_rate": 9.74884422110553e-06, + "loss": 0.121, + "step": 3000 + }, + { + "epoch": 1.51, + "eval_loss": 0.07331898808479309, + "eval_runtime": 1469.0598, + "eval_samples_per_second": 0.979, + "eval_steps_per_second": 0.979, + "eval_wer": 31.026629292221443, + "step": 3000 + }, + { + "epoch": 1.52, + "grad_norm": 2.43293833732605, + "learning_rate": 9.746331658291459e-06, + "loss": 0.1176, + "step": 3025 + }, + { + "epoch": 1.54, + "grad_norm": 1.989294171333313, + "learning_rate": 9.743819095477388e-06, + "loss": 0.1174, + "step": 3050 + }, + { + "epoch": 1.55, + "grad_norm": 2.4088878631591797, + "learning_rate": 9.741306532663317e-06, + "loss": 0.1161, + "step": 3075 + }, + { + "epoch": 1.56, + "grad_norm": 2.0442941188812256, + "learning_rate": 9.738793969849247e-06, + "loss": 0.1159, + "step": 3100 + }, + { + "epoch": 1.57, + "grad_norm": 2.2759485244750977, + "learning_rate": 9.736281407035176e-06, + "loss": 0.1167, + "step": 3125 + }, + { + "epoch": 1.59, + "grad_norm": 2.138298511505127, + "learning_rate": 9.733768844221107e-06, + "loss": 0.1167, + "step": 3150 + }, + { + "epoch": 1.6, + "grad_norm": 2.080583333969116, + "learning_rate": 9.731256281407036e-06, + "loss": 0.1171, + "step": 3175 + }, + { + "epoch": 1.61, + "grad_norm": 2.295144557952881, + "learning_rate": 9.728743718592966e-06, + "loss": 0.1133, + "step": 3200 + }, + { + "epoch": 1.62, + "grad_norm": 1.9776804447174072, + "learning_rate": 9.726231155778897e-06, + "loss": 0.1163, + "step": 3225 + }, + { + "epoch": 1.64, + "grad_norm": 2.2790141105651855, + "learning_rate": 9.723718592964824e-06, + "loss": 0.1145, + "step": 3250 + }, + { + "epoch": 1.65, + "grad_norm": 2.377984046936035, + "learning_rate": 9.721206030150755e-06, + "loss": 0.1132, + "step": 3275 + }, + { + "epoch": 1.66, + "grad_norm": 2.402237892150879, + "learning_rate": 9.718693467336685e-06, + "loss": 0.1155, + "step": 3300 + }, + { + "epoch": 1.68, + "grad_norm": 2.0475170612335205, + "learning_rate": 9.716180904522614e-06, + "loss": 0.1106, + "step": 3325 + }, + { + "epoch": 1.69, + "grad_norm": 2.2506606578826904, + "learning_rate": 9.713668341708543e-06, + "loss": 0.1132, + "step": 3350 + }, + { + "epoch": 1.7, + "grad_norm": 1.8544366359710693, + "learning_rate": 9.711155778894472e-06, + "loss": 0.109, + "step": 3375 + }, + { + "epoch": 1.71, + "grad_norm": 1.8897806406021118, + "learning_rate": 9.708643216080402e-06, + "loss": 0.1117, + "step": 3400 + }, + { + "epoch": 1.73, + "grad_norm": 3.08508563041687, + "learning_rate": 9.706130653266333e-06, + "loss": 0.1112, + "step": 3425 + }, + { + "epoch": 1.74, + "grad_norm": 2.2754952907562256, + "learning_rate": 9.703618090452262e-06, + "loss": 0.1146, + "step": 3450 + }, + { + "epoch": 1.75, + "grad_norm": 1.8761200904846191, + "learning_rate": 9.701105527638191e-06, + "loss": 0.1114, + "step": 3475 + }, + { + "epoch": 1.76, + "grad_norm": 2.327530860900879, + "learning_rate": 9.698592964824122e-06, + "loss": 0.1093, + "step": 3500 + }, + { + "epoch": 1.78, + "grad_norm": 2.14005446434021, + "learning_rate": 9.69608040201005e-06, + "loss": 0.1115, + "step": 3525 + }, + { + "epoch": 1.79, + "grad_norm": 2.1868393421173096, + "learning_rate": 9.693567839195981e-06, + "loss": 0.1126, + "step": 3550 + }, + { + "epoch": 1.8, + "grad_norm": 1.6043177843093872, + "learning_rate": 9.69105527638191e-06, + "loss": 0.1089, + "step": 3575 + }, + { + "epoch": 1.81, + "grad_norm": 1.9660890102386475, + "learning_rate": 9.68854271356784e-06, + "loss": 0.1112, + "step": 3600 + }, + { + "epoch": 1.83, + "grad_norm": 1.877603530883789, + "learning_rate": 9.68603015075377e-06, + "loss": 0.1072, + "step": 3625 + }, + { + "epoch": 1.84, + "grad_norm": 2.0820486545562744, + "learning_rate": 9.683517587939698e-06, + "loss": 0.1082, + "step": 3650 + }, + { + "epoch": 1.85, + "grad_norm": 2.079158067703247, + "learning_rate": 9.68100502512563e-06, + "loss": 0.11, + "step": 3675 + }, + { + "epoch": 1.86, + "grad_norm": 1.625675082206726, + "learning_rate": 9.678492462311559e-06, + "loss": 0.1103, + "step": 3700 + }, + { + "epoch": 1.88, + "grad_norm": 2.325962781906128, + "learning_rate": 9.675979899497488e-06, + "loss": 0.1095, + "step": 3725 + }, + { + "epoch": 1.89, + "grad_norm": 1.9123986959457397, + "learning_rate": 9.673467336683417e-06, + "loss": 0.1085, + "step": 3750 + }, + { + "epoch": 1.9, + "grad_norm": 2.2163822650909424, + "learning_rate": 9.670954773869348e-06, + "loss": 0.1109, + "step": 3775 + }, + { + "epoch": 1.91, + "grad_norm": 2.0482916831970215, + "learning_rate": 9.668442211055276e-06, + "loss": 0.1105, + "step": 3800 + }, + { + "epoch": 1.93, + "grad_norm": 2.0795373916625977, + "learning_rate": 9.665929648241207e-06, + "loss": 0.1047, + "step": 3825 + }, + { + "epoch": 1.94, + "grad_norm": 1.8629438877105713, + "learning_rate": 9.663417085427136e-06, + "loss": 0.1069, + "step": 3850 + }, + { + "epoch": 1.95, + "grad_norm": 2.1041812896728516, + "learning_rate": 9.660904522613066e-06, + "loss": 0.1077, + "step": 3875 + }, + { + "epoch": 1.96, + "grad_norm": 2.074047327041626, + "learning_rate": 9.658391959798997e-06, + "loss": 0.1053, + "step": 3900 + }, + { + "epoch": 1.98, + "grad_norm": 2.0436389446258545, + "learning_rate": 9.655879396984924e-06, + "loss": 0.1084, + "step": 3925 + }, + { + "epoch": 1.99, + "grad_norm": 2.159497022628784, + "learning_rate": 9.653366834170855e-06, + "loss": 0.1047, + "step": 3950 + }, + { + "epoch": 2.0, + "grad_norm": 1.5994527339935303, + "learning_rate": 9.650854271356785e-06, + "loss": 0.1058, + "step": 3975 + }, + { + "epoch": 2.02, + "grad_norm": 2.1416232585906982, + "learning_rate": 9.648341708542714e-06, + "loss": 0.0907, + "step": 4000 + }, + { + "epoch": 2.02, + "eval_loss": 0.0677235797047615, + "eval_runtime": 1207.5236, + "eval_samples_per_second": 1.191, + "eval_steps_per_second": 1.191, + "eval_wer": 29.598808689558513, + "step": 4000 + }, + { + "epoch": 2.03, + "grad_norm": 1.6429836750030518, + "learning_rate": 9.645829145728643e-06, + "loss": 0.0945, + "step": 4025 + }, + { + "epoch": 2.04, + "grad_norm": 1.8807092905044556, + "learning_rate": 9.643417085427137e-06, + "loss": 0.0942, + "step": 4050 + }, + { + "epoch": 2.05, + "grad_norm": 1.8152906894683838, + "learning_rate": 9.640904522613066e-06, + "loss": 0.0946, + "step": 4075 + }, + { + "epoch": 2.07, + "grad_norm": 1.9201328754425049, + "learning_rate": 9.638391959798997e-06, + "loss": 0.0949, + "step": 4100 + }, + { + "epoch": 2.08, + "grad_norm": 1.817543625831604, + "learning_rate": 9.635879396984925e-06, + "loss": 0.0936, + "step": 4125 + }, + { + "epoch": 2.09, + "grad_norm": 2.114928722381592, + "learning_rate": 9.633366834170856e-06, + "loss": 0.0932, + "step": 4150 + }, + { + "epoch": 2.1, + "grad_norm": 2.240248680114746, + "learning_rate": 9.630854271356785e-06, + "loss": 0.0949, + "step": 4175 + }, + { + "epoch": 2.12, + "grad_norm": 1.7213176488876343, + "learning_rate": 9.628341708542714e-06, + "loss": 0.0906, + "step": 4200 + }, + { + "epoch": 2.13, + "grad_norm": 1.9887688159942627, + "learning_rate": 9.625829145728644e-06, + "loss": 0.0927, + "step": 4225 + }, + { + "epoch": 2.14, + "grad_norm": 1.9689302444458008, + "learning_rate": 9.623316582914573e-06, + "loss": 0.0941, + "step": 4250 + }, + { + "epoch": 2.15, + "grad_norm": 2.086944103240967, + "learning_rate": 9.620804020100504e-06, + "loss": 0.0946, + "step": 4275 + }, + { + "epoch": 2.17, + "grad_norm": 1.7573634386062622, + "learning_rate": 9.618291457286433e-06, + "loss": 0.0926, + "step": 4300 + }, + { + "epoch": 2.18, + "grad_norm": 2.47251033782959, + "learning_rate": 9.615778894472363e-06, + "loss": 0.0949, + "step": 4325 + }, + { + "epoch": 2.19, + "grad_norm": 1.8021899461746216, + "learning_rate": 9.613266331658292e-06, + "loss": 0.0929, + "step": 4350 + }, + { + "epoch": 2.2, + "grad_norm": 2.137491226196289, + "learning_rate": 9.610753768844223e-06, + "loss": 0.0938, + "step": 4375 + }, + { + "epoch": 2.22, + "grad_norm": 1.865257978439331, + "learning_rate": 9.60824120603015e-06, + "loss": 0.092, + "step": 4400 + }, + { + "epoch": 2.23, + "grad_norm": 2.0239343643188477, + "learning_rate": 9.605728643216082e-06, + "loss": 0.0923, + "step": 4425 + }, + { + "epoch": 2.24, + "grad_norm": 1.816542387008667, + "learning_rate": 9.60321608040201e-06, + "loss": 0.0931, + "step": 4450 + }, + { + "epoch": 2.25, + "grad_norm": 2.2828361988067627, + "learning_rate": 9.60070351758794e-06, + "loss": 0.0948, + "step": 4475 + }, + { + "epoch": 2.27, + "grad_norm": 1.955202579498291, + "learning_rate": 9.598190954773871e-06, + "loss": 0.0909, + "step": 4500 + }, + { + "epoch": 2.28, + "grad_norm": 1.6349833011627197, + "learning_rate": 9.595678391959799e-06, + "loss": 0.0915, + "step": 4525 + }, + { + "epoch": 2.29, + "grad_norm": 1.9101808071136475, + "learning_rate": 9.59316582914573e-06, + "loss": 0.0913, + "step": 4550 + }, + { + "epoch": 2.3, + "grad_norm": 2.068251371383667, + "learning_rate": 9.59065326633166e-06, + "loss": 0.0886, + "step": 4575 + }, + { + "epoch": 2.32, + "grad_norm": 1.9696294069290161, + "learning_rate": 9.588140703517588e-06, + "loss": 0.0923, + "step": 4600 + }, + { + "epoch": 2.33, + "grad_norm": 2.0129804611206055, + "learning_rate": 9.585628140703518e-06, + "loss": 0.091, + "step": 4625 + }, + { + "epoch": 2.34, + "grad_norm": 2.074808120727539, + "learning_rate": 9.583115577889449e-06, + "loss": 0.0923, + "step": 4650 + }, + { + "epoch": 2.36, + "grad_norm": 2.103775978088379, + "learning_rate": 9.580603015075378e-06, + "loss": 0.0912, + "step": 4675 + }, + { + "epoch": 2.37, + "grad_norm": 1.9787614345550537, + "learning_rate": 9.578090452261307e-06, + "loss": 0.0882, + "step": 4700 + }, + { + "epoch": 2.38, + "grad_norm": 1.884751319885254, + "learning_rate": 9.575577889447237e-06, + "loss": 0.0909, + "step": 4725 + }, + { + "epoch": 2.39, + "grad_norm": 2.057746410369873, + "learning_rate": 9.573065326633166e-06, + "loss": 0.0894, + "step": 4750 + }, + { + "epoch": 2.41, + "grad_norm": 1.8027749061584473, + "learning_rate": 9.570552763819097e-06, + "loss": 0.0921, + "step": 4775 + }, + { + "epoch": 2.42, + "grad_norm": 1.736527681350708, + "learning_rate": 9.568040201005025e-06, + "loss": 0.0892, + "step": 4800 + }, + { + "epoch": 2.43, + "grad_norm": 2.2050817012786865, + "learning_rate": 9.565527638190956e-06, + "loss": 0.0913, + "step": 4825 + }, + { + "epoch": 2.44, + "grad_norm": 1.9272867441177368, + "learning_rate": 9.563015075376885e-06, + "loss": 0.0912, + "step": 4850 + }, + { + "epoch": 2.46, + "grad_norm": 1.8158856630325317, + "learning_rate": 9.560502512562814e-06, + "loss": 0.0894, + "step": 4875 + }, + { + "epoch": 2.47, + "grad_norm": 1.7627719640731812, + "learning_rate": 9.557989949748745e-06, + "loss": 0.0863, + "step": 4900 + }, + { + "epoch": 2.48, + "grad_norm": 1.9478931427001953, + "learning_rate": 9.555477386934675e-06, + "loss": 0.0919, + "step": 4925 + }, + { + "epoch": 2.49, + "grad_norm": 1.8724310398101807, + "learning_rate": 9.552964824120604e-06, + "loss": 0.0873, + "step": 4950 + }, + { + "epoch": 2.51, + "grad_norm": 1.832992434501648, + "learning_rate": 9.550452261306533e-06, + "loss": 0.0877, + "step": 4975 + }, + { + "epoch": 2.52, + "grad_norm": 2.0178463459014893, + "learning_rate": 9.547939698492463e-06, + "loss": 0.0895, + "step": 5000 + }, + { + "epoch": 2.52, + "eval_loss": 0.0641447901725769, + "eval_runtime": 1201.1253, + "eval_samples_per_second": 1.197, + "eval_steps_per_second": 1.197, + "eval_wer": 28.32866152768045, + "step": 5000 + }, + { + "epoch": 2.53, + "grad_norm": 1.9620293378829956, + "learning_rate": 9.545427135678392e-06, + "loss": 0.0875, + "step": 5025 + }, + { + "epoch": 2.54, + "grad_norm": 1.908280372619629, + "learning_rate": 9.542914572864323e-06, + "loss": 0.0866, + "step": 5050 + }, + { + "epoch": 2.56, + "grad_norm": 2.1362922191619873, + "learning_rate": 9.540402010050252e-06, + "loss": 0.0914, + "step": 5075 + }, + { + "epoch": 2.57, + "grad_norm": 2.112835645675659, + "learning_rate": 9.537889447236182e-06, + "loss": 0.0875, + "step": 5100 + }, + { + "epoch": 2.58, + "grad_norm": 1.9028964042663574, + "learning_rate": 9.535376884422111e-06, + "loss": 0.0901, + "step": 5125 + }, + { + "epoch": 2.59, + "grad_norm": 1.8761686086654663, + "learning_rate": 9.53286432160804e-06, + "loss": 0.0857, + "step": 5150 + }, + { + "epoch": 2.61, + "grad_norm": 1.9913798570632935, + "learning_rate": 9.530351758793971e-06, + "loss": 0.0881, + "step": 5175 + }, + { + "epoch": 2.62, + "grad_norm": 2.4062094688415527, + "learning_rate": 9.5278391959799e-06, + "loss": 0.088, + "step": 5200 + }, + { + "epoch": 2.63, + "grad_norm": 1.7080777883529663, + "learning_rate": 9.52532663316583e-06, + "loss": 0.0873, + "step": 5225 + }, + { + "epoch": 2.64, + "grad_norm": 1.881579875946045, + "learning_rate": 9.52281407035176e-06, + "loss": 0.0878, + "step": 5250 + }, + { + "epoch": 2.66, + "grad_norm": 2.2955923080444336, + "learning_rate": 9.520301507537689e-06, + "loss": 0.087, + "step": 5275 + }, + { + "epoch": 2.67, + "grad_norm": 1.9166394472122192, + "learning_rate": 9.51778894472362e-06, + "loss": 0.0878, + "step": 5300 + }, + { + "epoch": 2.68, + "grad_norm": 2.095931053161621, + "learning_rate": 9.515276381909549e-06, + "loss": 0.087, + "step": 5325 + }, + { + "epoch": 2.7, + "grad_norm": 1.7723512649536133, + "learning_rate": 9.512763819095478e-06, + "loss": 0.0898, + "step": 5350 + }, + { + "epoch": 2.71, + "grad_norm": 1.7826398611068726, + "learning_rate": 9.510251256281408e-06, + "loss": 0.0846, + "step": 5375 + }, + { + "epoch": 2.72, + "grad_norm": 2.0131995677948, + "learning_rate": 9.507738693467337e-06, + "loss": 0.0869, + "step": 5400 + }, + { + "epoch": 2.73, + "grad_norm": 1.8363655805587769, + "learning_rate": 9.505226130653266e-06, + "loss": 0.0873, + "step": 5425 + }, + { + "epoch": 2.75, + "grad_norm": 2.036046028137207, + "learning_rate": 9.502713567839197e-06, + "loss": 0.0902, + "step": 5450 + }, + { + "epoch": 2.76, + "grad_norm": 2.271407127380371, + "learning_rate": 9.500201005025127e-06, + "loss": 0.0861, + "step": 5475 + }, + { + "epoch": 2.77, + "grad_norm": 2.0567522048950195, + "learning_rate": 9.497688442211056e-06, + "loss": 0.0923, + "step": 5500 + }, + { + "epoch": 2.78, + "grad_norm": 2.19276762008667, + "learning_rate": 9.495175879396987e-06, + "loss": 0.086, + "step": 5525 + }, + { + "epoch": 2.8, + "grad_norm": 2.4844324588775635, + "learning_rate": 9.492663316582915e-06, + "loss": 0.0884, + "step": 5550 + }, + { + "epoch": 2.81, + "grad_norm": 1.9846481084823608, + "learning_rate": 9.490150753768846e-06, + "loss": 0.0853, + "step": 5575 + }, + { + "epoch": 2.82, + "grad_norm": 1.8354753255844116, + "learning_rate": 9.487638190954775e-06, + "loss": 0.0874, + "step": 5600 + }, + { + "epoch": 2.83, + "grad_norm": 1.985903024673462, + "learning_rate": 9.485125628140704e-06, + "loss": 0.0887, + "step": 5625 + }, + { + "epoch": 2.85, + "grad_norm": 2.361621856689453, + "learning_rate": 9.482613065326634e-06, + "loss": 0.0881, + "step": 5650 + }, + { + "epoch": 2.86, + "grad_norm": 1.931437611579895, + "learning_rate": 9.480100502512563e-06, + "loss": 0.0881, + "step": 5675 + }, + { + "epoch": 2.87, + "grad_norm": 2.267496109008789, + "learning_rate": 9.477587939698494e-06, + "loss": 0.0871, + "step": 5700 + }, + { + "epoch": 2.88, + "grad_norm": 1.748360514640808, + "learning_rate": 9.475075376884423e-06, + "loss": 0.0848, + "step": 5725 + }, + { + "epoch": 2.9, + "grad_norm": 1.7653058767318726, + "learning_rate": 9.472562814070353e-06, + "loss": 0.0852, + "step": 5750 + }, + { + "epoch": 2.91, + "grad_norm": 1.9031683206558228, + "learning_rate": 9.470050251256282e-06, + "loss": 0.0868, + "step": 5775 + }, + { + "epoch": 2.92, + "grad_norm": 2.01723575592041, + "learning_rate": 9.467537688442213e-06, + "loss": 0.0875, + "step": 5800 + }, + { + "epoch": 2.93, + "grad_norm": 1.802963376045227, + "learning_rate": 9.46502512562814e-06, + "loss": 0.0839, + "step": 5825 + }, + { + "epoch": 2.95, + "grad_norm": 1.73123037815094, + "learning_rate": 9.462512562814072e-06, + "loss": 0.086, + "step": 5850 + }, + { + "epoch": 2.96, + "grad_norm": 2.067504644393921, + "learning_rate": 9.460000000000001e-06, + "loss": 0.0845, + "step": 5875 + }, + { + "epoch": 2.97, + "grad_norm": 1.7869842052459717, + "learning_rate": 9.45748743718593e-06, + "loss": 0.0862, + "step": 5900 + }, + { + "epoch": 2.98, + "grad_norm": 1.6336287260055542, + "learning_rate": 9.454974874371861e-06, + "loss": 0.0851, + "step": 5925 + }, + { + "epoch": 3.0, + "grad_norm": 1.8279975652694702, + "learning_rate": 9.452462311557789e-06, + "loss": 0.0849, + "step": 5950 + }, + { + "epoch": 3.01, + "grad_norm": 1.6764721870422363, + "learning_rate": 9.44994974874372e-06, + "loss": 0.0755, + "step": 5975 + }, + { + "epoch": 3.02, + "grad_norm": 1.6145353317260742, + "learning_rate": 9.44743718592965e-06, + "loss": 0.0729, + "step": 6000 + }, + { + "epoch": 3.02, + "eval_loss": 0.06323881447315216, + "eval_runtime": 1257.48, + "eval_samples_per_second": 1.144, + "eval_steps_per_second": 1.144, + "eval_wer": 27.741765942536787, + "step": 6000 + }, + { + "epoch": 3.04, + "grad_norm": 1.934796929359436, + "learning_rate": 9.444924623115579e-06, + "loss": 0.0733, + "step": 6025 + }, + { + "epoch": 3.05, + "grad_norm": 1.7537850141525269, + "learning_rate": 9.442412060301508e-06, + "loss": 0.0733, + "step": 6050 + }, + { + "epoch": 3.06, + "grad_norm": 1.6423039436340332, + "learning_rate": 9.439899497487439e-06, + "loss": 0.072, + "step": 6075 + }, + { + "epoch": 3.07, + "grad_norm": 1.7110779285430908, + "learning_rate": 9.437386934673367e-06, + "loss": 0.073, + "step": 6100 + }, + { + "epoch": 3.09, + "grad_norm": 1.7468650341033936, + "learning_rate": 9.43497487437186e-06, + "loss": 0.0739, + "step": 6125 + }, + { + "epoch": 3.1, + "grad_norm": 1.5944850444793701, + "learning_rate": 9.432462311557789e-06, + "loss": 0.0735, + "step": 6150 + }, + { + "epoch": 3.11, + "grad_norm": 1.7844575643539429, + "learning_rate": 9.42994974874372e-06, + "loss": 0.0706, + "step": 6175 + }, + { + "epoch": 3.12, + "grad_norm": 1.8277512788772583, + "learning_rate": 9.42743718592965e-06, + "loss": 0.071, + "step": 6200 + }, + { + "epoch": 3.14, + "grad_norm": 1.8363456726074219, + "learning_rate": 9.424924623115579e-06, + "loss": 0.0709, + "step": 6225 + }, + { + "epoch": 3.15, + "grad_norm": 1.765159249305725, + "learning_rate": 9.422412060301508e-06, + "loss": 0.0735, + "step": 6250 + }, + { + "epoch": 3.16, + "grad_norm": 1.6007137298583984, + "learning_rate": 9.419899497487437e-06, + "loss": 0.0717, + "step": 6275 + }, + { + "epoch": 3.17, + "grad_norm": 1.6842042207717896, + "learning_rate": 9.417386934673367e-06, + "loss": 0.0714, + "step": 6300 + }, + { + "epoch": 3.19, + "grad_norm": 1.9334796667099, + "learning_rate": 9.414874371859298e-06, + "loss": 0.0718, + "step": 6325 + }, + { + "epoch": 3.2, + "grad_norm": 1.8853099346160889, + "learning_rate": 9.412361809045227e-06, + "loss": 0.0701, + "step": 6350 + }, + { + "epoch": 3.21, + "grad_norm": 1.9158176183700562, + "learning_rate": 9.409849246231156e-06, + "loss": 0.0727, + "step": 6375 + }, + { + "epoch": 3.22, + "grad_norm": 1.6503363847732544, + "learning_rate": 9.407336683417086e-06, + "loss": 0.0734, + "step": 6400 + }, + { + "epoch": 3.24, + "grad_norm": 1.941066861152649, + "learning_rate": 9.404824120603015e-06, + "loss": 0.0746, + "step": 6425 + }, + { + "epoch": 3.25, + "grad_norm": 1.801092267036438, + "learning_rate": 9.402311557788946e-06, + "loss": 0.0718, + "step": 6450 + }, + { + "epoch": 3.26, + "grad_norm": 2.016005039215088, + "learning_rate": 9.399798994974875e-06, + "loss": 0.0733, + "step": 6475 + }, + { + "epoch": 3.27, + "grad_norm": 1.8908193111419678, + "learning_rate": 9.397286432160805e-06, + "loss": 0.0703, + "step": 6500 + }, + { + "epoch": 3.29, + "grad_norm": 1.853793978691101, + "learning_rate": 9.394773869346736e-06, + "loss": 0.0699, + "step": 6525 + }, + { + "epoch": 3.3, + "grad_norm": 1.915716528892517, + "learning_rate": 9.392261306532663e-06, + "loss": 0.0689, + "step": 6550 + }, + { + "epoch": 3.31, + "grad_norm": 1.8041150569915771, + "learning_rate": 9.389748743718594e-06, + "loss": 0.0734, + "step": 6575 + }, + { + "epoch": 3.32, + "grad_norm": 1.8417267799377441, + "learning_rate": 9.387236180904524e-06, + "loss": 0.0718, + "step": 6600 + }, + { + "epoch": 3.34, + "grad_norm": 1.6847732067108154, + "learning_rate": 9.384723618090453e-06, + "loss": 0.0683, + "step": 6625 + }, + { + "epoch": 3.35, + "grad_norm": 1.7263054847717285, + "learning_rate": 9.382211055276382e-06, + "loss": 0.0745, + "step": 6650 + }, + { + "epoch": 3.36, + "grad_norm": 1.6185723543167114, + "learning_rate": 9.379698492462312e-06, + "loss": 0.0737, + "step": 6675 + }, + { + "epoch": 3.38, + "grad_norm": 2.0743038654327393, + "learning_rate": 9.377185929648241e-06, + "loss": 0.0708, + "step": 6700 + }, + { + "epoch": 3.39, + "grad_norm": 1.9976160526275635, + "learning_rate": 9.374673366834172e-06, + "loss": 0.0694, + "step": 6725 + }, + { + "epoch": 3.4, + "grad_norm": 1.7928348779678345, + "learning_rate": 9.372160804020101e-06, + "loss": 0.0703, + "step": 6750 + }, + { + "epoch": 3.41, + "grad_norm": 2.041745901107788, + "learning_rate": 9.36964824120603e-06, + "loss": 0.0714, + "step": 6775 + }, + { + "epoch": 3.43, + "grad_norm": 1.9990183115005493, + "learning_rate": 9.367135678391962e-06, + "loss": 0.0719, + "step": 6800 + }, + { + "epoch": 3.44, + "grad_norm": 1.9959697723388672, + "learning_rate": 9.36462311557789e-06, + "loss": 0.0694, + "step": 6825 + }, + { + "epoch": 3.45, + "grad_norm": 1.7818009853363037, + "learning_rate": 9.36211055276382e-06, + "loss": 0.0715, + "step": 6850 + }, + { + "epoch": 3.46, + "grad_norm": 1.8054332733154297, + "learning_rate": 9.35959798994975e-06, + "loss": 0.0705, + "step": 6875 + }, + { + "epoch": 3.48, + "grad_norm": 1.93730628490448, + "learning_rate": 9.357085427135679e-06, + "loss": 0.0709, + "step": 6900 + }, + { + "epoch": 3.49, + "grad_norm": 1.7002273797988892, + "learning_rate": 9.354572864321608e-06, + "loss": 0.0736, + "step": 6925 + }, + { + "epoch": 3.5, + "grad_norm": 1.824280023574829, + "learning_rate": 9.352060301507538e-06, + "loss": 0.0718, + "step": 6950 + }, + { + "epoch": 3.51, + "grad_norm": 2.2500030994415283, + "learning_rate": 9.349547738693469e-06, + "loss": 0.0697, + "step": 6975 + }, + { + "epoch": 3.53, + "grad_norm": 1.672634243965149, + "learning_rate": 9.347035175879398e-06, + "loss": 0.0707, + "step": 7000 + }, + { + "epoch": 3.53, + "eval_loss": 0.06361080706119537, + "eval_runtime": 1200.4426, + "eval_samples_per_second": 1.198, + "eval_steps_per_second": 1.198, + "eval_wer": 27.57533286615277, + "step": 7000 + }, + { + "epoch": 3.54, + "grad_norm": 1.9189599752426147, + "learning_rate": 9.344522613065327e-06, + "loss": 0.0717, + "step": 7025 + }, + { + "epoch": 3.55, + "grad_norm": 1.5589381456375122, + "learning_rate": 9.342010050251257e-06, + "loss": 0.0701, + "step": 7050 + }, + { + "epoch": 3.56, + "grad_norm": 1.8173940181732178, + "learning_rate": 9.339497487437188e-06, + "loss": 0.0715, + "step": 7075 + }, + { + "epoch": 3.58, + "grad_norm": 1.9242130517959595, + "learning_rate": 9.336984924623115e-06, + "loss": 0.0733, + "step": 7100 + }, + { + "epoch": 3.59, + "grad_norm": 1.6270307302474976, + "learning_rate": 9.334472361809046e-06, + "loss": 0.0691, + "step": 7125 + }, + { + "epoch": 3.6, + "grad_norm": 1.916929841041565, + "learning_rate": 9.331959798994976e-06, + "loss": 0.0734, + "step": 7150 + }, + { + "epoch": 3.61, + "grad_norm": 1.842126727104187, + "learning_rate": 9.329447236180905e-06, + "loss": 0.0687, + "step": 7175 + }, + { + "epoch": 3.63, + "grad_norm": 1.85586416721344, + "learning_rate": 9.326934673366836e-06, + "loss": 0.0696, + "step": 7200 + }, + { + "epoch": 3.64, + "grad_norm": 1.7271820306777954, + "learning_rate": 9.324422110552764e-06, + "loss": 0.0714, + "step": 7225 + }, + { + "epoch": 3.65, + "grad_norm": 1.7081453800201416, + "learning_rate": 9.321909547738695e-06, + "loss": 0.0702, + "step": 7250 + }, + { + "epoch": 3.66, + "grad_norm": 1.9261188507080078, + "learning_rate": 9.319396984924624e-06, + "loss": 0.0703, + "step": 7275 + }, + { + "epoch": 3.68, + "grad_norm": 1.906442403793335, + "learning_rate": 9.316884422110553e-06, + "loss": 0.071, + "step": 7300 + }, + { + "epoch": 3.69, + "grad_norm": 1.8779722452163696, + "learning_rate": 9.314371859296483e-06, + "loss": 0.0706, + "step": 7325 + }, + { + "epoch": 3.7, + "grad_norm": 1.9097789525985718, + "learning_rate": 9.311859296482414e-06, + "loss": 0.0691, + "step": 7350 + }, + { + "epoch": 3.72, + "grad_norm": 1.9000424146652222, + "learning_rate": 9.309346733668343e-06, + "loss": 0.0723, + "step": 7375 + }, + { + "epoch": 3.73, + "grad_norm": 1.9254841804504395, + "learning_rate": 9.306834170854272e-06, + "loss": 0.0688, + "step": 7400 + }, + { + "epoch": 3.74, + "grad_norm": 1.6146787405014038, + "learning_rate": 9.304321608040201e-06, + "loss": 0.0714, + "step": 7425 + }, + { + "epoch": 3.75, + "grad_norm": 1.8870267868041992, + "learning_rate": 9.30180904522613e-06, + "loss": 0.0711, + "step": 7450 + }, + { + "epoch": 3.77, + "grad_norm": 2.058087110519409, + "learning_rate": 9.299296482412062e-06, + "loss": 0.0711, + "step": 7475 + }, + { + "epoch": 3.78, + "grad_norm": 1.984512209892273, + "learning_rate": 9.296783919597991e-06, + "loss": 0.0686, + "step": 7500 + }, + { + "epoch": 3.79, + "grad_norm": 2.231684923171997, + "learning_rate": 9.29427135678392e-06, + "loss": 0.0736, + "step": 7525 + }, + { + "epoch": 3.8, + "grad_norm": 1.6928215026855469, + "learning_rate": 9.29175879396985e-06, + "loss": 0.0696, + "step": 7550 + }, + { + "epoch": 3.82, + "grad_norm": 1.9177638292312622, + "learning_rate": 9.289246231155779e-06, + "loss": 0.0683, + "step": 7575 + }, + { + "epoch": 3.83, + "grad_norm": 2.279709815979004, + "learning_rate": 9.28673366834171e-06, + "loss": 0.071, + "step": 7600 + }, + { + "epoch": 3.84, + "grad_norm": 1.9360569715499878, + "learning_rate": 9.28422110552764e-06, + "loss": 0.0681, + "step": 7625 + }, + { + "epoch": 3.85, + "grad_norm": 1.7775554656982422, + "learning_rate": 9.281708542713569e-06, + "loss": 0.0679, + "step": 7650 + }, + { + "epoch": 3.87, + "grad_norm": 1.5504074096679688, + "learning_rate": 9.279195979899498e-06, + "loss": 0.0703, + "step": 7675 + }, + { + "epoch": 3.88, + "grad_norm": 1.9117960929870605, + "learning_rate": 9.276683417085427e-06, + "loss": 0.0702, + "step": 7700 + }, + { + "epoch": 3.89, + "grad_norm": 1.596816062927246, + "learning_rate": 9.274170854271357e-06, + "loss": 0.0691, + "step": 7725 + }, + { + "epoch": 3.9, + "grad_norm": 1.6213423013687134, + "learning_rate": 9.271658291457288e-06, + "loss": 0.0701, + "step": 7750 + }, + { + "epoch": 3.92, + "grad_norm": 1.880812168121338, + "learning_rate": 9.269145728643217e-06, + "loss": 0.069, + "step": 7775 + }, + { + "epoch": 3.93, + "grad_norm": 1.9358309507369995, + "learning_rate": 9.266633165829146e-06, + "loss": 0.0672, + "step": 7800 + }, + { + "epoch": 3.94, + "grad_norm": 1.8960965871810913, + "learning_rate": 9.264120603015076e-06, + "loss": 0.0718, + "step": 7825 + }, + { + "epoch": 3.95, + "grad_norm": 1.7037166357040405, + "learning_rate": 9.261608040201005e-06, + "loss": 0.0711, + "step": 7850 + }, + { + "epoch": 3.97, + "grad_norm": 1.725762128829956, + "learning_rate": 9.259095477386936e-06, + "loss": 0.069, + "step": 7875 + }, + { + "epoch": 3.98, + "grad_norm": 1.812739610671997, + "learning_rate": 9.256582914572865e-06, + "loss": 0.0685, + "step": 7900 + }, + { + "epoch": 3.99, + "grad_norm": 1.851357340812683, + "learning_rate": 9.254070351758795e-06, + "loss": 0.0682, + "step": 7925 + }, + { + "epoch": 4.01, + "grad_norm": 1.604232668876648, + "learning_rate": 9.251557788944724e-06, + "loss": 0.0651, + "step": 7950 + }, + { + "epoch": 4.02, + "grad_norm": 1.6635524034500122, + "learning_rate": 9.249045226130653e-06, + "loss": 0.0556, + "step": 7975 + }, + { + "epoch": 4.03, + "grad_norm": 1.5962978601455688, + "learning_rate": 9.246532663316584e-06, + "loss": 0.0573, + "step": 8000 + }, + { + "epoch": 4.03, + "eval_loss": 0.06466211378574371, + "eval_runtime": 1202.4866, + "eval_samples_per_second": 1.196, + "eval_steps_per_second": 1.196, + "eval_wer": 27.0760336370007, + "step": 8000 + }, + { + "epoch": 4.04, + "grad_norm": 1.9178351163864136, + "learning_rate": 9.244020100502514e-06, + "loss": 0.0584, + "step": 8025 + }, + { + "epoch": 4.06, + "grad_norm": 1.4233107566833496, + "learning_rate": 9.241507537688443e-06, + "loss": 0.0538, + "step": 8050 + }, + { + "epoch": 4.07, + "grad_norm": 1.7512229681015015, + "learning_rate": 9.238994974874372e-06, + "loss": 0.0553, + "step": 8075 + }, + { + "epoch": 4.08, + "grad_norm": 1.7430758476257324, + "learning_rate": 9.236482412060302e-06, + "loss": 0.054, + "step": 8100 + }, + { + "epoch": 4.09, + "grad_norm": 1.8794506788253784, + "learning_rate": 9.233969849246231e-06, + "loss": 0.0558, + "step": 8125 + }, + { + "epoch": 4.11, + "grad_norm": 1.8328619003295898, + "learning_rate": 9.231457286432162e-06, + "loss": 0.0557, + "step": 8150 + }, + { + "epoch": 4.12, + "grad_norm": 1.9322847127914429, + "learning_rate": 9.228944723618091e-06, + "loss": 0.0561, + "step": 8175 + }, + { + "epoch": 4.13, + "grad_norm": 1.9019103050231934, + "learning_rate": 9.22643216080402e-06, + "loss": 0.0561, + "step": 8200 + }, + { + "epoch": 4.14, + "grad_norm": 1.7293845415115356, + "learning_rate": 9.223919597989952e-06, + "loss": 0.057, + "step": 8225 + }, + { + "epoch": 4.16, + "grad_norm": 1.5923023223876953, + "learning_rate": 9.22140703517588e-06, + "loss": 0.0552, + "step": 8250 + }, + { + "epoch": 4.17, + "grad_norm": 1.744339942932129, + "learning_rate": 9.21889447236181e-06, + "loss": 0.0577, + "step": 8275 + }, + { + "epoch": 4.18, + "grad_norm": 1.762031078338623, + "learning_rate": 9.21638190954774e-06, + "loss": 0.0572, + "step": 8300 + }, + { + "epoch": 4.19, + "grad_norm": 2.1111905574798584, + "learning_rate": 9.213869346733669e-06, + "loss": 0.0579, + "step": 8325 + }, + { + "epoch": 4.21, + "grad_norm": 1.7039161920547485, + "learning_rate": 9.211356783919598e-06, + "loss": 0.0539, + "step": 8350 + }, + { + "epoch": 4.22, + "grad_norm": 1.5904098749160767, + "learning_rate": 9.208844221105528e-06, + "loss": 0.055, + "step": 8375 + }, + { + "epoch": 4.23, + "grad_norm": 1.8418642282485962, + "learning_rate": 9.206331658291459e-06, + "loss": 0.0535, + "step": 8400 + }, + { + "epoch": 4.24, + "grad_norm": 1.8058820962905884, + "learning_rate": 9.203819095477388e-06, + "loss": 0.0545, + "step": 8425 + }, + { + "epoch": 4.26, + "grad_norm": 2.075692653656006, + "learning_rate": 9.201306532663317e-06, + "loss": 0.0596, + "step": 8450 + }, + { + "epoch": 4.27, + "grad_norm": 1.578643560409546, + "learning_rate": 9.198793969849247e-06, + "loss": 0.0578, + "step": 8475 + }, + { + "epoch": 4.28, + "grad_norm": 1.9056496620178223, + "learning_rate": 9.196281407035178e-06, + "loss": 0.0581, + "step": 8500 + }, + { + "epoch": 4.29, + "grad_norm": 1.6518995761871338, + "learning_rate": 9.19386934673367e-06, + "loss": 0.0559, + "step": 8525 + }, + { + "epoch": 4.31, + "grad_norm": 1.8175824880599976, + "learning_rate": 9.191356783919599e-06, + "loss": 0.0571, + "step": 8550 + }, + { + "epoch": 4.32, + "grad_norm": 1.6456143856048584, + "learning_rate": 9.188844221105528e-06, + "loss": 0.057, + "step": 8575 + }, + { + "epoch": 4.33, + "grad_norm": 2.185619354248047, + "learning_rate": 9.186331658291459e-06, + "loss": 0.0551, + "step": 8600 + }, + { + "epoch": 4.35, + "grad_norm": 1.5809305906295776, + "learning_rate": 9.183819095477388e-06, + "loss": 0.0549, + "step": 8625 + }, + { + "epoch": 4.36, + "grad_norm": 1.754451036453247, + "learning_rate": 9.181306532663317e-06, + "loss": 0.0557, + "step": 8650 + }, + { + "epoch": 4.37, + "grad_norm": 1.9845269918441772, + "learning_rate": 9.178793969849247e-06, + "loss": 0.0553, + "step": 8675 + }, + { + "epoch": 4.38, + "grad_norm": 1.5123939514160156, + "learning_rate": 9.176281407035176e-06, + "loss": 0.0583, + "step": 8700 + }, + { + "epoch": 4.4, + "grad_norm": 1.8359405994415283, + "learning_rate": 9.173768844221105e-06, + "loss": 0.0578, + "step": 8725 + }, + { + "epoch": 4.41, + "grad_norm": 1.81514573097229, + "learning_rate": 9.171256281407036e-06, + "loss": 0.0559, + "step": 8750 + }, + { + "epoch": 4.42, + "grad_norm": 1.836904764175415, + "learning_rate": 9.168743718592966e-06, + "loss": 0.056, + "step": 8775 + }, + { + "epoch": 4.43, + "grad_norm": 1.668399691581726, + "learning_rate": 9.166231155778895e-06, + "loss": 0.0562, + "step": 8800 + }, + { + "epoch": 4.45, + "grad_norm": 1.891341209411621, + "learning_rate": 9.163718592964826e-06, + "loss": 0.0571, + "step": 8825 + }, + { + "epoch": 4.46, + "grad_norm": 1.5672218799591064, + "learning_rate": 9.161206030150754e-06, + "loss": 0.0563, + "step": 8850 + }, + { + "epoch": 4.47, + "grad_norm": 2.148014545440674, + "learning_rate": 9.158693467336685e-06, + "loss": 0.0569, + "step": 8875 + }, + { + "epoch": 4.48, + "grad_norm": 1.7797579765319824, + "learning_rate": 9.156180904522614e-06, + "loss": 0.0558, + "step": 8900 + }, + { + "epoch": 4.5, + "grad_norm": 2.187398910522461, + "learning_rate": 9.153668341708543e-06, + "loss": 0.0591, + "step": 8925 + }, + { + "epoch": 4.51, + "grad_norm": 1.8188409805297852, + "learning_rate": 9.151155778894473e-06, + "loss": 0.0559, + "step": 8950 + }, + { + "epoch": 4.52, + "grad_norm": 1.688591718673706, + "learning_rate": 9.148643216080402e-06, + "loss": 0.0548, + "step": 8975 + }, + { + "epoch": 4.53, + "grad_norm": 1.9121421575546265, + "learning_rate": 9.146130653266331e-06, + "loss": 0.0565, + "step": 9000 + }, + { + "epoch": 4.53, + "eval_loss": 0.06801605969667435, + "eval_runtime": 1199.2677, + "eval_samples_per_second": 1.199, + "eval_steps_per_second": 1.199, + "eval_wer": 27.838121934127543, + "step": 9000 + }, + { + "epoch": 4.55, + "grad_norm": 1.6285473108291626, + "learning_rate": 9.143618090452262e-06, + "loss": 0.0572, + "step": 9025 + }, + { + "epoch": 4.56, + "grad_norm": 1.636821985244751, + "learning_rate": 9.141105527638192e-06, + "loss": 0.0547, + "step": 9050 + }, + { + "epoch": 4.57, + "grad_norm": 2.2137198448181152, + "learning_rate": 9.138592964824121e-06, + "loss": 0.0574, + "step": 9075 + }, + { + "epoch": 4.58, + "grad_norm": 1.7288395166397095, + "learning_rate": 9.136080402010052e-06, + "loss": 0.0578, + "step": 9100 + }, + { + "epoch": 4.6, + "grad_norm": 1.7437584400177002, + "learning_rate": 9.13356783919598e-06, + "loss": 0.0575, + "step": 9125 + }, + { + "epoch": 4.61, + "grad_norm": 1.8756051063537598, + "learning_rate": 9.13105527638191e-06, + "loss": 0.0563, + "step": 9150 + }, + { + "epoch": 4.62, + "grad_norm": 1.8051542043685913, + "learning_rate": 9.12854271356784e-06, + "loss": 0.0552, + "step": 9175 + }, + { + "epoch": 4.63, + "grad_norm": 1.9158624410629272, + "learning_rate": 9.12603015075377e-06, + "loss": 0.0547, + "step": 9200 + }, + { + "epoch": 4.65, + "grad_norm": 1.767440915107727, + "learning_rate": 9.1235175879397e-06, + "loss": 0.0538, + "step": 9225 + }, + { + "epoch": 4.66, + "grad_norm": 1.6975661516189575, + "learning_rate": 9.121005025125628e-06, + "loss": 0.0568, + "step": 9250 + }, + { + "epoch": 4.67, + "grad_norm": 1.7115809917449951, + "learning_rate": 9.118492462311559e-06, + "loss": 0.0554, + "step": 9275 + }, + { + "epoch": 4.69, + "grad_norm": 1.9780707359313965, + "learning_rate": 9.115979899497488e-06, + "loss": 0.0569, + "step": 9300 + }, + { + "epoch": 4.7, + "grad_norm": 1.9622031450271606, + "learning_rate": 9.113467336683418e-06, + "loss": 0.0579, + "step": 9325 + }, + { + "epoch": 4.71, + "grad_norm": 2.0471725463867188, + "learning_rate": 9.110954773869347e-06, + "loss": 0.0577, + "step": 9350 + }, + { + "epoch": 4.72, + "grad_norm": 2.0643115043640137, + "learning_rate": 9.108442211055278e-06, + "loss": 0.0578, + "step": 9375 + }, + { + "epoch": 4.74, + "grad_norm": 1.960888147354126, + "learning_rate": 9.105929648241206e-06, + "loss": 0.0555, + "step": 9400 + }, + { + "epoch": 4.75, + "grad_norm": 1.8931657075881958, + "learning_rate": 9.103417085427137e-06, + "loss": 0.0577, + "step": 9425 + }, + { + "epoch": 4.76, + "grad_norm": 1.8803842067718506, + "learning_rate": 9.100904522613066e-06, + "loss": 0.0553, + "step": 9450 + }, + { + "epoch": 4.77, + "grad_norm": 1.645965337753296, + "learning_rate": 9.098391959798995e-06, + "loss": 0.0552, + "step": 9475 + }, + { + "epoch": 4.79, + "grad_norm": 1.923417568206787, + "learning_rate": 9.095879396984926e-06, + "loss": 0.0532, + "step": 9500 + }, + { + "epoch": 4.8, + "grad_norm": 1.7184605598449707, + "learning_rate": 9.093366834170854e-06, + "loss": 0.057, + "step": 9525 + }, + { + "epoch": 4.81, + "grad_norm": 2.031071424484253, + "learning_rate": 9.090854271356785e-06, + "loss": 0.0534, + "step": 9550 + }, + { + "epoch": 4.82, + "grad_norm": 1.6099382638931274, + "learning_rate": 9.088341708542714e-06, + "loss": 0.0546, + "step": 9575 + }, + { + "epoch": 4.84, + "grad_norm": 1.876230239868164, + "learning_rate": 9.085829145728644e-06, + "loss": 0.0549, + "step": 9600 + }, + { + "epoch": 4.85, + "grad_norm": 1.8508236408233643, + "learning_rate": 9.083316582914573e-06, + "loss": 0.0541, + "step": 9625 + }, + { + "epoch": 4.86, + "grad_norm": 1.9273607730865479, + "learning_rate": 9.080804020100504e-06, + "loss": 0.0573, + "step": 9650 + }, + { + "epoch": 4.87, + "grad_norm": 1.7420032024383545, + "learning_rate": 9.078291457286433e-06, + "loss": 0.0538, + "step": 9675 + }, + { + "epoch": 4.89, + "grad_norm": 1.9599754810333252, + "learning_rate": 9.075778894472363e-06, + "loss": 0.0547, + "step": 9700 + }, + { + "epoch": 4.9, + "grad_norm": 1.9415557384490967, + "learning_rate": 9.073266331658292e-06, + "loss": 0.0559, + "step": 9725 + }, + { + "epoch": 4.91, + "grad_norm": 1.9598019123077393, + "learning_rate": 9.070753768844221e-06, + "loss": 0.0558, + "step": 9750 + }, + { + "epoch": 4.92, + "grad_norm": 2.083695888519287, + "learning_rate": 9.068241206030152e-06, + "loss": 0.0549, + "step": 9775 + }, + { + "epoch": 4.94, + "grad_norm": 1.8142132759094238, + "learning_rate": 9.06572864321608e-06, + "loss": 0.0578, + "step": 9800 + }, + { + "epoch": 4.95, + "grad_norm": 1.8891414403915405, + "learning_rate": 9.063216080402011e-06, + "loss": 0.057, + "step": 9825 + }, + { + "epoch": 4.96, + "grad_norm": 1.8039603233337402, + "learning_rate": 9.06070351758794e-06, + "loss": 0.0581, + "step": 9850 + }, + { + "epoch": 4.97, + "grad_norm": 2.0118677616119385, + "learning_rate": 9.05819095477387e-06, + "loss": 0.0547, + "step": 9875 + }, + { + "epoch": 4.99, + "grad_norm": 1.7107410430908203, + "learning_rate": 9.0556783919598e-06, + "loss": 0.0558, + "step": 9900 + }, + { + "epoch": 5.0, + "grad_norm": 1.7881505489349365, + "learning_rate": 9.05316582914573e-06, + "loss": 0.0547, + "step": 9925 + }, + { + "epoch": 5.01, + "grad_norm": 1.6295499801635742, + "learning_rate": 9.05065326633166e-06, + "loss": 0.0426, + "step": 9950 + }, + { + "epoch": 5.03, + "grad_norm": 1.6531370878219604, + "learning_rate": 9.048140703517589e-06, + "loss": 0.0427, + "step": 9975 + }, + { + "epoch": 5.04, + "grad_norm": 1.2813820838928223, + "learning_rate": 9.045628140703518e-06, + "loss": 0.0432, + "step": 10000 + }, + { + "epoch": 5.04, + "eval_loss": 0.06725659221410751, + "eval_runtime": 1199.6004, + "eval_samples_per_second": 1.199, + "eval_steps_per_second": 1.199, + "eval_wer": 27.25122634898388, + "step": 10000 + }, + { + "epoch": 5.05, + "grad_norm": 2.1055963039398193, + "learning_rate": 9.043115577889447e-06, + "loss": 0.0428, + "step": 10025 + }, + { + "epoch": 5.06, + "grad_norm": 1.8277230262756348, + "learning_rate": 9.040603015075378e-06, + "loss": 0.0432, + "step": 10050 + }, + { + "epoch": 5.08, + "grad_norm": 1.7564107179641724, + "learning_rate": 9.038090452261308e-06, + "loss": 0.0432, + "step": 10075 + }, + { + "epoch": 5.09, + "grad_norm": 1.856735110282898, + "learning_rate": 9.035577889447237e-06, + "loss": 0.0425, + "step": 10100 + }, + { + "epoch": 5.1, + "grad_norm": 1.770043969154358, + "learning_rate": 9.033065326633166e-06, + "loss": 0.0434, + "step": 10125 + }, + { + "epoch": 5.11, + "grad_norm": 2.109482526779175, + "learning_rate": 9.030552763819096e-06, + "loss": 0.0448, + "step": 10150 + }, + { + "epoch": 5.13, + "grad_norm": 1.769639253616333, + "learning_rate": 9.028040201005027e-06, + "loss": 0.0443, + "step": 10175 + }, + { + "epoch": 5.14, + "grad_norm": 1.9761313199996948, + "learning_rate": 9.025527638190956e-06, + "loss": 0.0436, + "step": 10200 + }, + { + "epoch": 5.15, + "grad_norm": 1.7238720655441284, + "learning_rate": 9.023015075376885e-06, + "loss": 0.0422, + "step": 10225 + }, + { + "epoch": 5.16, + "grad_norm": 2.0109140872955322, + "learning_rate": 9.020502512562815e-06, + "loss": 0.0431, + "step": 10250 + }, + { + "epoch": 5.18, + "grad_norm": 1.659127116203308, + "learning_rate": 9.017989949748744e-06, + "loss": 0.0435, + "step": 10275 + }, + { + "epoch": 5.19, + "grad_norm": 1.7897344827651978, + "learning_rate": 9.015477386934675e-06, + "loss": 0.0442, + "step": 10300 + }, + { + "epoch": 5.2, + "grad_norm": 1.7567737102508545, + "learning_rate": 9.012964824120604e-06, + "loss": 0.0442, + "step": 10325 + }, + { + "epoch": 5.21, + "grad_norm": 1.7255431413650513, + "learning_rate": 9.010452261306533e-06, + "loss": 0.0444, + "step": 10350 + }, + { + "epoch": 5.23, + "grad_norm": 1.9510167837142944, + "learning_rate": 9.007939698492463e-06, + "loss": 0.0435, + "step": 10375 + }, + { + "epoch": 5.24, + "grad_norm": 1.9249686002731323, + "learning_rate": 9.005427135678392e-06, + "loss": 0.0435, + "step": 10400 + }, + { + "epoch": 5.25, + "grad_norm": 1.7436363697052002, + "learning_rate": 9.002914572864321e-06, + "loss": 0.0418, + "step": 10425 + }, + { + "epoch": 5.26, + "grad_norm": 2.0228302478790283, + "learning_rate": 9.000402010050252e-06, + "loss": 0.0448, + "step": 10450 + }, + { + "epoch": 5.28, + "grad_norm": 1.5448530912399292, + "learning_rate": 8.997889447236182e-06, + "loss": 0.0416, + "step": 10475 + }, + { + "epoch": 5.29, + "grad_norm": 1.4480316638946533, + "learning_rate": 8.995376884422111e-06, + "loss": 0.0435, + "step": 10500 + }, + { + "epoch": 5.3, + "grad_norm": 1.958336353302002, + "learning_rate": 8.992864321608042e-06, + "loss": 0.0439, + "step": 10525 + }, + { + "epoch": 5.31, + "grad_norm": 1.7703675031661987, + "learning_rate": 8.990452261306534e-06, + "loss": 0.043, + "step": 10550 + }, + { + "epoch": 5.33, + "grad_norm": 1.6196929216384888, + "learning_rate": 8.987939698492463e-06, + "loss": 0.044, + "step": 10575 + }, + { + "epoch": 5.34, + "grad_norm": 2.2365105152130127, + "learning_rate": 8.985427135678392e-06, + "loss": 0.0434, + "step": 10600 + }, + { + "epoch": 5.35, + "grad_norm": 1.8133589029312134, + "learning_rate": 8.982914572864322e-06, + "loss": 0.0438, + "step": 10625 + }, + { + "epoch": 5.37, + "grad_norm": 1.7513179779052734, + "learning_rate": 8.980402010050253e-06, + "loss": 0.0434, + "step": 10650 + }, + { + "epoch": 5.38, + "grad_norm": 1.8569616079330444, + "learning_rate": 8.977889447236182e-06, + "loss": 0.0424, + "step": 10675 + }, + { + "epoch": 5.39, + "grad_norm": 1.772046446800232, + "learning_rate": 8.975376884422111e-06, + "loss": 0.0435, + "step": 10700 + }, + { + "epoch": 5.4, + "grad_norm": 1.7474825382232666, + "learning_rate": 8.97286432160804e-06, + "loss": 0.0415, + "step": 10725 + }, + { + "epoch": 5.42, + "grad_norm": 1.8432515859603882, + "learning_rate": 8.97035175879397e-06, + "loss": 0.0437, + "step": 10750 + }, + { + "epoch": 5.43, + "grad_norm": 1.6579755544662476, + "learning_rate": 8.967839195979901e-06, + "loss": 0.0429, + "step": 10775 + }, + { + "epoch": 5.44, + "grad_norm": 2.0786705017089844, + "learning_rate": 8.96532663316583e-06, + "loss": 0.0439, + "step": 10800 + }, + { + "epoch": 5.45, + "grad_norm": 1.6731257438659668, + "learning_rate": 8.96281407035176e-06, + "loss": 0.0438, + "step": 10825 + }, + { + "epoch": 5.47, + "grad_norm": 1.733165979385376, + "learning_rate": 8.960301507537689e-06, + "loss": 0.0446, + "step": 10850 + }, + { + "epoch": 5.48, + "grad_norm": 1.8881007432937622, + "learning_rate": 8.957788944723618e-06, + "loss": 0.0445, + "step": 10875 + }, + { + "epoch": 5.49, + "grad_norm": 1.9717257022857666, + "learning_rate": 8.95527638190955e-06, + "loss": 0.0442, + "step": 10900 + }, + { + "epoch": 5.5, + "grad_norm": 1.909995675086975, + "learning_rate": 8.952763819095479e-06, + "loss": 0.0437, + "step": 10925 + }, + { + "epoch": 5.52, + "grad_norm": 1.9340059757232666, + "learning_rate": 8.950251256281408e-06, + "loss": 0.0445, + "step": 10950 + }, + { + "epoch": 5.53, + "grad_norm": 2.2826428413391113, + "learning_rate": 8.947738693467337e-06, + "loss": 0.0437, + "step": 10975 + }, + { + "epoch": 5.54, + "grad_norm": 1.7354209423065186, + "learning_rate": 8.945226130653267e-06, + "loss": 0.0452, + "step": 11000 + }, + { + "epoch": 5.54, + "eval_loss": 0.06997237354516983, + "eval_runtime": 1192.5459, + "eval_samples_per_second": 1.206, + "eval_steps_per_second": 1.206, + "eval_wer": 28.01331464611072, + "step": 11000 + }, + { + "epoch": 5.55, + "grad_norm": 1.8370596170425415, + "learning_rate": 8.942713567839196e-06, + "loss": 0.0442, + "step": 11025 + }, + { + "epoch": 5.57, + "grad_norm": 1.6998053789138794, + "learning_rate": 8.940201005025127e-06, + "loss": 0.0435, + "step": 11050 + }, + { + "epoch": 5.58, + "grad_norm": 2.0462050437927246, + "learning_rate": 8.937688442211056e-06, + "loss": 0.0436, + "step": 11075 + }, + { + "epoch": 5.59, + "grad_norm": 1.7438793182373047, + "learning_rate": 8.935175879396986e-06, + "loss": 0.044, + "step": 11100 + }, + { + "epoch": 5.6, + "grad_norm": 1.7278673648834229, + "learning_rate": 8.932663316582915e-06, + "loss": 0.044, + "step": 11125 + }, + { + "epoch": 5.62, + "grad_norm": 1.8218096494674683, + "learning_rate": 8.930150753768844e-06, + "loss": 0.0439, + "step": 11150 + }, + { + "epoch": 5.63, + "grad_norm": 1.6633522510528564, + "learning_rate": 8.927638190954775e-06, + "loss": 0.043, + "step": 11175 + }, + { + "epoch": 5.64, + "grad_norm": 1.8987059593200684, + "learning_rate": 8.925125628140705e-06, + "loss": 0.0445, + "step": 11200 + }, + { + "epoch": 5.65, + "grad_norm": 1.634253978729248, + "learning_rate": 8.922613065326634e-06, + "loss": 0.0432, + "step": 11225 + }, + { + "epoch": 5.67, + "grad_norm": 1.709099292755127, + "learning_rate": 8.920100502512563e-06, + "loss": 0.0433, + "step": 11250 + }, + { + "epoch": 5.68, + "grad_norm": 2.2129623889923096, + "learning_rate": 8.917587939698493e-06, + "loss": 0.0443, + "step": 11275 + }, + { + "epoch": 5.69, + "grad_norm": 1.7424851655960083, + "learning_rate": 8.915075376884424e-06, + "loss": 0.0452, + "step": 11300 + }, + { + "epoch": 5.71, + "grad_norm": 1.9226475954055786, + "learning_rate": 8.912562814070353e-06, + "loss": 0.0428, + "step": 11325 + }, + { + "epoch": 5.72, + "grad_norm": 1.7708911895751953, + "learning_rate": 8.910050251256282e-06, + "loss": 0.0426, + "step": 11350 + }, + { + "epoch": 5.73, + "grad_norm": 1.7315361499786377, + "learning_rate": 8.907537688442212e-06, + "loss": 0.0456, + "step": 11375 + }, + { + "epoch": 5.74, + "grad_norm": 1.8674744367599487, + "learning_rate": 8.905025125628143e-06, + "loss": 0.0438, + "step": 11400 + }, + { + "epoch": 5.76, + "grad_norm": 1.7499444484710693, + "learning_rate": 8.90251256281407e-06, + "loss": 0.0412, + "step": 11425 + }, + { + "epoch": 5.77, + "grad_norm": 2.062084436416626, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0425, + "step": 11450 + }, + { + "epoch": 5.78, + "grad_norm": 1.8608508110046387, + "learning_rate": 8.89748743718593e-06, + "loss": 0.0433, + "step": 11475 + }, + { + "epoch": 5.79, + "grad_norm": 1.6516690254211426, + "learning_rate": 8.89497487437186e-06, + "loss": 0.0432, + "step": 11500 + }, + { + "epoch": 5.81, + "grad_norm": 1.9903138875961304, + "learning_rate": 8.892462311557791e-06, + "loss": 0.0442, + "step": 11525 + }, + { + "epoch": 5.82, + "grad_norm": 1.770604133605957, + "learning_rate": 8.889949748743718e-06, + "loss": 0.044, + "step": 11550 + }, + { + "epoch": 5.83, + "grad_norm": 1.5155885219573975, + "learning_rate": 8.88743718592965e-06, + "loss": 0.0434, + "step": 11575 + }, + { + "epoch": 5.84, + "grad_norm": 1.9590301513671875, + "learning_rate": 8.884924623115579e-06, + "loss": 0.043, + "step": 11600 + }, + { + "epoch": 5.86, + "grad_norm": 2.0237135887145996, + "learning_rate": 8.882412060301508e-06, + "loss": 0.0434, + "step": 11625 + }, + { + "epoch": 5.87, + "grad_norm": 1.8854788541793823, + "learning_rate": 8.879899497487437e-06, + "loss": 0.0431, + "step": 11650 + }, + { + "epoch": 5.88, + "grad_norm": 2.121731758117676, + "learning_rate": 8.877386934673368e-06, + "loss": 0.0441, + "step": 11675 + }, + { + "epoch": 5.89, + "grad_norm": 1.866882562637329, + "learning_rate": 8.874874371859296e-06, + "loss": 0.0413, + "step": 11700 + }, + { + "epoch": 5.91, + "grad_norm": 1.6187278032302856, + "learning_rate": 8.872361809045227e-06, + "loss": 0.042, + "step": 11725 + }, + { + "epoch": 5.92, + "grad_norm": 1.5865144729614258, + "learning_rate": 8.869849246231156e-06, + "loss": 0.0421, + "step": 11750 + }, + { + "epoch": 5.93, + "grad_norm": 1.6032301187515259, + "learning_rate": 8.867336683417086e-06, + "loss": 0.0431, + "step": 11775 + }, + { + "epoch": 5.94, + "grad_norm": 1.7998621463775635, + "learning_rate": 8.864824120603017e-06, + "loss": 0.0449, + "step": 11800 + }, + { + "epoch": 5.96, + "grad_norm": 1.7616817951202393, + "learning_rate": 8.862311557788944e-06, + "loss": 0.0411, + "step": 11825 + }, + { + "epoch": 5.97, + "grad_norm": 1.875646710395813, + "learning_rate": 8.859798994974875e-06, + "loss": 0.0447, + "step": 11850 + }, + { + "epoch": 5.98, + "grad_norm": 1.6682502031326294, + "learning_rate": 8.857286432160805e-06, + "loss": 0.045, + "step": 11875 + }, + { + "epoch": 5.99, + "grad_norm": 1.723772644996643, + "learning_rate": 8.854773869346734e-06, + "loss": 0.0437, + "step": 11900 + }, + { + "epoch": 6.01, + "grad_norm": 1.4237196445465088, + "learning_rate": 8.852261306532665e-06, + "loss": 0.0352, + "step": 11925 + }, + { + "epoch": 6.02, + "grad_norm": 1.600555419921875, + "learning_rate": 8.849748743718594e-06, + "loss": 0.031, + "step": 11950 + }, + { + "epoch": 6.03, + "grad_norm": 1.674511432647705, + "learning_rate": 8.847236180904524e-06, + "loss": 0.0307, + "step": 11975 + }, + { + "epoch": 6.05, + "grad_norm": 1.8128514289855957, + "learning_rate": 8.844723618090453e-06, + "loss": 0.033, + "step": 12000 + }, + { + "epoch": 6.05, + "eval_loss": 0.07410162687301636, + "eval_runtime": 1196.2137, + "eval_samples_per_second": 1.202, + "eval_steps_per_second": 1.202, + "eval_wer": 28.258584442887173, + "step": 12000 + }, + { + "epoch": 6.06, + "grad_norm": 1.8300641775131226, + "learning_rate": 8.842211055276382e-06, + "loss": 0.0309, + "step": 12025 + }, + { + "epoch": 6.07, + "grad_norm": 1.3258792161941528, + "learning_rate": 8.839698492462312e-06, + "loss": 0.0319, + "step": 12050 + }, + { + "epoch": 6.08, + "grad_norm": 1.5426017045974731, + "learning_rate": 8.837185929648243e-06, + "loss": 0.0309, + "step": 12075 + }, + { + "epoch": 6.1, + "grad_norm": 1.6829867362976074, + "learning_rate": 8.83467336683417e-06, + "loss": 0.0316, + "step": 12100 + }, + { + "epoch": 6.11, + "grad_norm": 1.77582585811615, + "learning_rate": 8.832160804020101e-06, + "loss": 0.0317, + "step": 12125 + }, + { + "epoch": 6.12, + "grad_norm": 1.57148015499115, + "learning_rate": 8.82964824120603e-06, + "loss": 0.0318, + "step": 12150 + }, + { + "epoch": 6.13, + "grad_norm": 1.7856422662734985, + "learning_rate": 8.82713567839196e-06, + "loss": 0.032, + "step": 12175 + }, + { + "epoch": 6.15, + "grad_norm": 1.6071913242340088, + "learning_rate": 8.824623115577891e-06, + "loss": 0.0322, + "step": 12200 + }, + { + "epoch": 6.16, + "grad_norm": 1.7472176551818848, + "learning_rate": 8.82211055276382e-06, + "loss": 0.0318, + "step": 12225 + }, + { + "epoch": 6.17, + "grad_norm": 1.8545920848846436, + "learning_rate": 8.81959798994975e-06, + "loss": 0.0323, + "step": 12250 + }, + { + "epoch": 6.18, + "grad_norm": 1.4463839530944824, + "learning_rate": 8.817085427135679e-06, + "loss": 0.0325, + "step": 12275 + }, + { + "epoch": 6.2, + "grad_norm": 1.6108479499816895, + "learning_rate": 8.814572864321608e-06, + "loss": 0.0332, + "step": 12300 + }, + { + "epoch": 6.21, + "grad_norm": 1.7655978202819824, + "learning_rate": 8.812060301507538e-06, + "loss": 0.0317, + "step": 12325 + }, + { + "epoch": 6.22, + "grad_norm": 1.678285002708435, + "learning_rate": 8.809547738693469e-06, + "loss": 0.0322, + "step": 12350 + }, + { + "epoch": 6.23, + "grad_norm": 1.4292271137237549, + "learning_rate": 8.807035175879398e-06, + "loss": 0.0327, + "step": 12375 + }, + { + "epoch": 6.25, + "grad_norm": 1.5927743911743164, + "learning_rate": 8.804522613065327e-06, + "loss": 0.033, + "step": 12400 + }, + { + "epoch": 6.26, + "grad_norm": 1.4815298318862915, + "learning_rate": 8.802010050251257e-06, + "loss": 0.032, + "step": 12425 + }, + { + "epoch": 6.27, + "grad_norm": 1.8486990928649902, + "learning_rate": 8.799497487437186e-06, + "loss": 0.0332, + "step": 12450 + }, + { + "epoch": 6.28, + "grad_norm": 1.7962981462478638, + "learning_rate": 8.796984924623117e-06, + "loss": 0.0325, + "step": 12475 + }, + { + "epoch": 6.3, + "grad_norm": 1.7656302452087402, + "learning_rate": 8.794472361809046e-06, + "loss": 0.0331, + "step": 12500 + }, + { + "epoch": 6.31, + "grad_norm": 1.4246591329574585, + "learning_rate": 8.791959798994976e-06, + "loss": 0.0312, + "step": 12525 + }, + { + "epoch": 6.32, + "grad_norm": 2.132295608520508, + "learning_rate": 8.789447236180905e-06, + "loss": 0.0335, + "step": 12550 + }, + { + "epoch": 6.34, + "grad_norm": 2.011249303817749, + "learning_rate": 8.786934673366834e-06, + "loss": 0.032, + "step": 12575 + }, + { + "epoch": 6.35, + "grad_norm": 1.9529807567596436, + "learning_rate": 8.784422110552765e-06, + "loss": 0.0337, + "step": 12600 + }, + { + "epoch": 6.36, + "grad_norm": 1.7300220727920532, + "learning_rate": 8.781909547738695e-06, + "loss": 0.0332, + "step": 12625 + }, + { + "epoch": 6.37, + "grad_norm": 1.782228708267212, + "learning_rate": 8.779396984924624e-06, + "loss": 0.0339, + "step": 12650 + }, + { + "epoch": 6.39, + "grad_norm": 1.529548168182373, + "learning_rate": 8.776884422110553e-06, + "loss": 0.033, + "step": 12675 + }, + { + "epoch": 6.4, + "grad_norm": 2.0244641304016113, + "learning_rate": 8.774472361809045e-06, + "loss": 0.033, + "step": 12700 + }, + { + "epoch": 6.41, + "grad_norm": 2.0537354946136475, + "learning_rate": 8.771959798994976e-06, + "loss": 0.0322, + "step": 12725 + }, + { + "epoch": 6.42, + "grad_norm": 1.9863638877868652, + "learning_rate": 8.769447236180905e-06, + "loss": 0.0332, + "step": 12750 + }, + { + "epoch": 6.44, + "grad_norm": 1.812774896621704, + "learning_rate": 8.766934673366834e-06, + "loss": 0.0348, + "step": 12775 + }, + { + "epoch": 6.45, + "grad_norm": 2.0098791122436523, + "learning_rate": 8.764422110552765e-06, + "loss": 0.0336, + "step": 12800 + }, + { + "epoch": 6.46, + "grad_norm": 1.8118650913238525, + "learning_rate": 8.761909547738693e-06, + "loss": 0.0319, + "step": 12825 + }, + { + "epoch": 6.47, + "grad_norm": 1.8542656898498535, + "learning_rate": 8.759396984924624e-06, + "loss": 0.0325, + "step": 12850 + }, + { + "epoch": 6.49, + "grad_norm": 1.871066689491272, + "learning_rate": 8.756884422110553e-06, + "loss": 0.0337, + "step": 12875 + }, + { + "epoch": 6.5, + "grad_norm": 1.7253698110580444, + "learning_rate": 8.754371859296483e-06, + "loss": 0.0332, + "step": 12900 + }, + { + "epoch": 6.51, + "grad_norm": 1.613588571548462, + "learning_rate": 8.751859296482412e-06, + "loss": 0.0311, + "step": 12925 + }, + { + "epoch": 6.52, + "grad_norm": 1.8773123025894165, + "learning_rate": 8.749346733668343e-06, + "loss": 0.0344, + "step": 12950 + }, + { + "epoch": 6.54, + "grad_norm": 2.016659736633301, + "learning_rate": 8.746834170854272e-06, + "loss": 0.0346, + "step": 12975 + }, + { + "epoch": 6.55, + "grad_norm": 1.6099720001220703, + "learning_rate": 8.744321608040202e-06, + "loss": 0.0326, + "step": 13000 + }, + { + "epoch": 6.55, + "eval_loss": 0.07584689557552338, + "eval_runtime": 1214.702, + "eval_samples_per_second": 1.184, + "eval_steps_per_second": 1.184, + "eval_wer": 28.004555010511563, + "step": 13000 + }, + { + "epoch": 6.56, + "grad_norm": 1.5399038791656494, + "learning_rate": 8.741809045226131e-06, + "loss": 0.0322, + "step": 13025 + }, + { + "epoch": 6.57, + "grad_norm": 1.7464622259140015, + "learning_rate": 8.73929648241206e-06, + "loss": 0.0339, + "step": 13050 + }, + { + "epoch": 6.59, + "grad_norm": 1.5856624841690063, + "learning_rate": 8.736783919597991e-06, + "loss": 0.0322, + "step": 13075 + }, + { + "epoch": 6.6, + "grad_norm": 1.5305110216140747, + "learning_rate": 8.734271356783919e-06, + "loss": 0.0331, + "step": 13100 + }, + { + "epoch": 6.61, + "grad_norm": 1.9105631113052368, + "learning_rate": 8.731859296482412e-06, + "loss": 0.0321, + "step": 13125 + }, + { + "epoch": 6.62, + "grad_norm": 2.122291088104248, + "learning_rate": 8.729346733668342e-06, + "loss": 0.0332, + "step": 13150 + }, + { + "epoch": 6.64, + "grad_norm": 2.075965166091919, + "learning_rate": 8.726834170854273e-06, + "loss": 0.0327, + "step": 13175 + }, + { + "epoch": 6.65, + "grad_norm": 1.9258822202682495, + "learning_rate": 8.724321608040202e-06, + "loss": 0.0329, + "step": 13200 + }, + { + "epoch": 6.66, + "grad_norm": 2.0716440677642822, + "learning_rate": 8.721809045226131e-06, + "loss": 0.0327, + "step": 13225 + }, + { + "epoch": 6.68, + "grad_norm": 2.123682737350464, + "learning_rate": 8.71929648241206e-06, + "loss": 0.0339, + "step": 13250 + }, + { + "epoch": 6.69, + "grad_norm": 1.9673889875411987, + "learning_rate": 8.716783919597992e-06, + "loss": 0.0331, + "step": 13275 + }, + { + "epoch": 6.7, + "grad_norm": 1.8077858686447144, + "learning_rate": 8.71427135678392e-06, + "loss": 0.0334, + "step": 13300 + }, + { + "epoch": 6.71, + "grad_norm": 1.4252288341522217, + "learning_rate": 8.71175879396985e-06, + "loss": 0.0342, + "step": 13325 + }, + { + "epoch": 6.73, + "grad_norm": 1.6776965856552124, + "learning_rate": 8.70924623115578e-06, + "loss": 0.0324, + "step": 13350 + }, + { + "epoch": 6.74, + "grad_norm": 1.7198550701141357, + "learning_rate": 8.706733668341709e-06, + "loss": 0.0322, + "step": 13375 + }, + { + "epoch": 6.75, + "grad_norm": 1.6895558834075928, + "learning_rate": 8.70422110552764e-06, + "loss": 0.0325, + "step": 13400 + }, + { + "epoch": 6.76, + "grad_norm": 1.734336495399475, + "learning_rate": 8.701708542713568e-06, + "loss": 0.0324, + "step": 13425 + }, + { + "epoch": 6.78, + "grad_norm": 1.563240885734558, + "learning_rate": 8.699195979899499e-06, + "loss": 0.0325, + "step": 13450 + }, + { + "epoch": 6.79, + "grad_norm": 1.8251124620437622, + "learning_rate": 8.696683417085428e-06, + "loss": 0.0319, + "step": 13475 + }, + { + "epoch": 6.8, + "grad_norm": 2.081357479095459, + "learning_rate": 8.694170854271357e-06, + "loss": 0.0325, + "step": 13500 + }, + { + "epoch": 6.81, + "grad_norm": 2.0616848468780518, + "learning_rate": 8.691658291457287e-06, + "loss": 0.0331, + "step": 13525 + }, + { + "epoch": 6.83, + "grad_norm": 1.982967734336853, + "learning_rate": 8.689145728643218e-06, + "loss": 0.033, + "step": 13550 + }, + { + "epoch": 6.84, + "grad_norm": 1.5742464065551758, + "learning_rate": 8.686633165829147e-06, + "loss": 0.0344, + "step": 13575 + }, + { + "epoch": 6.85, + "grad_norm": 1.9220224618911743, + "learning_rate": 8.684120603015076e-06, + "loss": 0.0334, + "step": 13600 + }, + { + "epoch": 6.86, + "grad_norm": 1.8322572708129883, + "learning_rate": 8.681608040201006e-06, + "loss": 0.0331, + "step": 13625 + }, + { + "epoch": 6.88, + "grad_norm": 2.132880687713623, + "learning_rate": 8.679095477386935e-06, + "loss": 0.0323, + "step": 13650 + }, + { + "epoch": 6.89, + "grad_norm": 1.9286514520645142, + "learning_rate": 8.676582914572866e-06, + "loss": 0.0332, + "step": 13675 + }, + { + "epoch": 6.9, + "grad_norm": 2.148689031600952, + "learning_rate": 8.674070351758794e-06, + "loss": 0.0333, + "step": 13700 + }, + { + "epoch": 6.91, + "grad_norm": 1.8529285192489624, + "learning_rate": 8.671557788944725e-06, + "loss": 0.0335, + "step": 13725 + }, + { + "epoch": 6.93, + "grad_norm": 1.971163034439087, + "learning_rate": 8.669045226130654e-06, + "loss": 0.0338, + "step": 13750 + }, + { + "epoch": 6.94, + "grad_norm": 1.6017379760742188, + "learning_rate": 8.666532663316583e-06, + "loss": 0.0346, + "step": 13775 + }, + { + "epoch": 6.95, + "grad_norm": 1.7782551050186157, + "learning_rate": 8.664020100502514e-06, + "loss": 0.0329, + "step": 13800 + }, + { + "epoch": 6.96, + "grad_norm": 1.9955463409423828, + "learning_rate": 8.661507537688444e-06, + "loss": 0.0329, + "step": 13825 + }, + { + "epoch": 6.98, + "grad_norm": 1.7116156816482544, + "learning_rate": 8.658994974874373e-06, + "loss": 0.0336, + "step": 13850 + }, + { + "epoch": 6.99, + "grad_norm": 1.6123133897781372, + "learning_rate": 8.656482412060302e-06, + "loss": 0.0339, + "step": 13875 + }, + { + "epoch": 7.0, + "grad_norm": 1.4586131572723389, + "learning_rate": 8.653969849246231e-06, + "loss": 0.032, + "step": 13900 + }, + { + "epoch": 7.02, + "grad_norm": 1.4720200300216675, + "learning_rate": 8.65145728643216e-06, + "loss": 0.0235, + "step": 13925 + }, + { + "epoch": 7.03, + "grad_norm": 1.402979850769043, + "learning_rate": 8.648944723618092e-06, + "loss": 0.0221, + "step": 13950 + }, + { + "epoch": 7.04, + "grad_norm": 1.3352878093719482, + "learning_rate": 8.64643216080402e-06, + "loss": 0.0233, + "step": 13975 + }, + { + "epoch": 7.05, + "grad_norm": 1.4785394668579102, + "learning_rate": 8.64391959798995e-06, + "loss": 0.0231, + "step": 14000 + }, + { + "epoch": 7.05, + "eval_loss": 0.08159680664539337, + "eval_runtime": 1207.4623, + "eval_samples_per_second": 1.191, + "eval_steps_per_second": 1.191, + "eval_wer": 28.6177295024527, + "step": 14000 + }, + { + "epoch": 7.07, + "grad_norm": 1.5042158365249634, + "learning_rate": 8.64140703517588e-06, + "loss": 0.0228, + "step": 14025 + }, + { + "epoch": 7.08, + "grad_norm": 1.9230432510375977, + "learning_rate": 8.638894472361809e-06, + "loss": 0.0224, + "step": 14050 + }, + { + "epoch": 7.09, + "grad_norm": 1.7666635513305664, + "learning_rate": 8.63638190954774e-06, + "loss": 0.0232, + "step": 14075 + }, + { + "epoch": 7.1, + "grad_norm": 1.8460893630981445, + "learning_rate": 8.63386934673367e-06, + "loss": 0.0224, + "step": 14100 + }, + { + "epoch": 7.12, + "grad_norm": 1.4872196912765503, + "learning_rate": 8.631356783919599e-06, + "loss": 0.0236, + "step": 14125 + }, + { + "epoch": 7.13, + "grad_norm": 1.3978068828582764, + "learning_rate": 8.628844221105528e-06, + "loss": 0.0236, + "step": 14150 + }, + { + "epoch": 7.14, + "grad_norm": 1.8196702003479004, + "learning_rate": 8.626331658291457e-06, + "loss": 0.0227, + "step": 14175 + }, + { + "epoch": 7.15, + "grad_norm": 1.6016894578933716, + "learning_rate": 8.623819095477388e-06, + "loss": 0.0237, + "step": 14200 + }, + { + "epoch": 7.17, + "grad_norm": 1.49489426612854, + "learning_rate": 8.621306532663318e-06, + "loss": 0.023, + "step": 14225 + }, + { + "epoch": 7.18, + "grad_norm": 1.681240439414978, + "learning_rate": 8.618793969849247e-06, + "loss": 0.0242, + "step": 14250 + }, + { + "epoch": 7.19, + "grad_norm": 1.9929225444793701, + "learning_rate": 8.616281407035176e-06, + "loss": 0.0241, + "step": 14275 + }, + { + "epoch": 7.2, + "grad_norm": 1.7956491708755493, + "learning_rate": 8.613768844221106e-06, + "loss": 0.0239, + "step": 14300 + }, + { + "epoch": 7.22, + "grad_norm": 1.5692269802093506, + "learning_rate": 8.611256281407035e-06, + "loss": 0.0229, + "step": 14325 + }, + { + "epoch": 7.23, + "grad_norm": 1.5116734504699707, + "learning_rate": 8.608743718592966e-06, + "loss": 0.025, + "step": 14350 + }, + { + "epoch": 7.24, + "grad_norm": 2.0631189346313477, + "learning_rate": 8.606231155778895e-06, + "loss": 0.0242, + "step": 14375 + }, + { + "epoch": 7.25, + "grad_norm": 1.4671801328659058, + "learning_rate": 8.603718592964825e-06, + "loss": 0.0227, + "step": 14400 + }, + { + "epoch": 7.27, + "grad_norm": 1.5378831624984741, + "learning_rate": 8.601206030150756e-06, + "loss": 0.0248, + "step": 14425 + }, + { + "epoch": 7.28, + "grad_norm": 1.6136387586593628, + "learning_rate": 8.598693467336683e-06, + "loss": 0.022, + "step": 14450 + }, + { + "epoch": 7.29, + "grad_norm": 1.281409740447998, + "learning_rate": 8.596180904522614e-06, + "loss": 0.0237, + "step": 14475 + }, + { + "epoch": 7.3, + "grad_norm": 1.794678807258606, + "learning_rate": 8.593668341708544e-06, + "loss": 0.0243, + "step": 14500 + }, + { + "epoch": 7.32, + "grad_norm": 1.621233344078064, + "learning_rate": 8.591155778894473e-06, + "loss": 0.0245, + "step": 14525 + }, + { + "epoch": 7.33, + "grad_norm": 1.6823129653930664, + "learning_rate": 8.588643216080402e-06, + "loss": 0.0243, + "step": 14550 + }, + { + "epoch": 7.34, + "grad_norm": 1.74722421169281, + "learning_rate": 8.586130653266332e-06, + "loss": 0.0235, + "step": 14575 + }, + { + "epoch": 7.36, + "grad_norm": 1.7419253587722778, + "learning_rate": 8.583618090452261e-06, + "loss": 0.0248, + "step": 14600 + }, + { + "epoch": 7.37, + "grad_norm": 1.629596471786499, + "learning_rate": 8.581105527638192e-06, + "loss": 0.0254, + "step": 14625 + }, + { + "epoch": 7.38, + "grad_norm": 1.6490020751953125, + "learning_rate": 8.578592964824121e-06, + "loss": 0.0238, + "step": 14650 + }, + { + "epoch": 7.39, + "grad_norm": 1.8724379539489746, + "learning_rate": 8.57608040201005e-06, + "loss": 0.0244, + "step": 14675 + }, + { + "epoch": 7.41, + "grad_norm": 1.7706691026687622, + "learning_rate": 8.573567839195982e-06, + "loss": 0.0236, + "step": 14700 + }, + { + "epoch": 7.42, + "grad_norm": 1.673621416091919, + "learning_rate": 8.57105527638191e-06, + "loss": 0.0238, + "step": 14725 + }, + { + "epoch": 7.43, + "grad_norm": 1.762155532836914, + "learning_rate": 8.56854271356784e-06, + "loss": 0.0236, + "step": 14750 + }, + { + "epoch": 7.44, + "grad_norm": 2.047837972640991, + "learning_rate": 8.56603015075377e-06, + "loss": 0.0238, + "step": 14775 + }, + { + "epoch": 7.46, + "grad_norm": 1.6189721822738647, + "learning_rate": 8.563517587939699e-06, + "loss": 0.0243, + "step": 14800 + }, + { + "epoch": 7.47, + "grad_norm": 2.2522635459899902, + "learning_rate": 8.56100502512563e-06, + "loss": 0.0241, + "step": 14825 + }, + { + "epoch": 7.48, + "grad_norm": 1.8222557306289673, + "learning_rate": 8.558492462311558e-06, + "loss": 0.0235, + "step": 14850 + }, + { + "epoch": 7.49, + "grad_norm": 1.6307533979415894, + "learning_rate": 8.555979899497489e-06, + "loss": 0.0243, + "step": 14875 + }, + { + "epoch": 7.51, + "grad_norm": 1.7096924781799316, + "learning_rate": 8.553467336683418e-06, + "loss": 0.0253, + "step": 14900 + }, + { + "epoch": 7.52, + "grad_norm": 1.831829309463501, + "learning_rate": 8.550954773869347e-06, + "loss": 0.0235, + "step": 14925 + }, + { + "epoch": 7.53, + "grad_norm": 1.8073431253433228, + "learning_rate": 8.548442211055277e-06, + "loss": 0.0247, + "step": 14950 + }, + { + "epoch": 7.54, + "grad_norm": 1.9819813966751099, + "learning_rate": 8.545929648241208e-06, + "loss": 0.0258, + "step": 14975 + }, + { + "epoch": 7.56, + "grad_norm": 1.7819302082061768, + "learning_rate": 8.543417085427135e-06, + "loss": 0.0241, + "step": 15000 + }, + { + "epoch": 7.56, + "eval_loss": 0.0857674852013588, + "eval_runtime": 1206.8367, + "eval_samples_per_second": 1.192, + "eval_steps_per_second": 1.192, + "eval_wer": 29.25718290119131, + "step": 15000 + }, + { + "epoch": 7.57, + "grad_norm": 1.696399450302124, + "learning_rate": 8.540904522613066e-06, + "loss": 0.0247, + "step": 15025 + }, + { + "epoch": 7.58, + "grad_norm": 1.5385408401489258, + "learning_rate": 8.538391959798996e-06, + "loss": 0.0242, + "step": 15050 + }, + { + "epoch": 7.59, + "grad_norm": 1.780907392501831, + "learning_rate": 8.535879396984925e-06, + "loss": 0.0237, + "step": 15075 + }, + { + "epoch": 7.61, + "grad_norm": 1.6110178232192993, + "learning_rate": 8.533366834170856e-06, + "loss": 0.0245, + "step": 15100 + }, + { + "epoch": 7.62, + "grad_norm": 1.853152871131897, + "learning_rate": 8.530854271356784e-06, + "loss": 0.0251, + "step": 15125 + }, + { + "epoch": 7.63, + "grad_norm": 1.7407482862472534, + "learning_rate": 8.528341708542715e-06, + "loss": 0.0228, + "step": 15150 + }, + { + "epoch": 7.64, + "grad_norm": 1.794573187828064, + "learning_rate": 8.525829145728644e-06, + "loss": 0.025, + "step": 15175 + }, + { + "epoch": 7.66, + "grad_norm": 1.8514268398284912, + "learning_rate": 8.523316582914573e-06, + "loss": 0.0238, + "step": 15200 + }, + { + "epoch": 7.67, + "grad_norm": 1.906569242477417, + "learning_rate": 8.520804020100503e-06, + "loss": 0.0253, + "step": 15225 + }, + { + "epoch": 7.68, + "grad_norm": 1.5437448024749756, + "learning_rate": 8.518291457286434e-06, + "loss": 0.0234, + "step": 15250 + }, + { + "epoch": 7.7, + "grad_norm": 1.9541072845458984, + "learning_rate": 8.515778894472363e-06, + "loss": 0.0241, + "step": 15275 + }, + { + "epoch": 7.71, + "grad_norm": 1.7434765100479126, + "learning_rate": 8.513266331658292e-06, + "loss": 0.0233, + "step": 15300 + }, + { + "epoch": 7.72, + "grad_norm": 1.5526138544082642, + "learning_rate": 8.510753768844222e-06, + "loss": 0.0239, + "step": 15325 + }, + { + "epoch": 7.73, + "grad_norm": 1.8143752813339233, + "learning_rate": 8.508241206030151e-06, + "loss": 0.0249, + "step": 15350 + }, + { + "epoch": 7.75, + "grad_norm": 1.7671911716461182, + "learning_rate": 8.505728643216082e-06, + "loss": 0.0245, + "step": 15375 + }, + { + "epoch": 7.76, + "grad_norm": 1.6827150583267212, + "learning_rate": 8.50321608040201e-06, + "loss": 0.0254, + "step": 15400 + }, + { + "epoch": 7.77, + "grad_norm": 1.6813195943832397, + "learning_rate": 8.50070351758794e-06, + "loss": 0.025, + "step": 15425 + }, + { + "epoch": 7.78, + "grad_norm": 1.863885521888733, + "learning_rate": 8.49819095477387e-06, + "loss": 0.0254, + "step": 15450 + }, + { + "epoch": 7.8, + "grad_norm": 1.7295267581939697, + "learning_rate": 8.4956783919598e-06, + "loss": 0.0248, + "step": 15475 + }, + { + "epoch": 7.81, + "grad_norm": 1.4283534288406372, + "learning_rate": 8.49316582914573e-06, + "loss": 0.0245, + "step": 15500 + }, + { + "epoch": 7.82, + "grad_norm": 1.6985344886779785, + "learning_rate": 8.49065326633166e-06, + "loss": 0.0252, + "step": 15525 + }, + { + "epoch": 7.83, + "grad_norm": 1.6986379623413086, + "learning_rate": 8.488140703517589e-06, + "loss": 0.0251, + "step": 15550 + }, + { + "epoch": 7.85, + "grad_norm": 1.7748067378997803, + "learning_rate": 8.485628140703518e-06, + "loss": 0.0252, + "step": 15575 + }, + { + "epoch": 7.86, + "grad_norm": 1.5803635120391846, + "learning_rate": 8.483115577889447e-06, + "loss": 0.0249, + "step": 15600 + }, + { + "epoch": 7.87, + "grad_norm": 1.4853476285934448, + "learning_rate": 8.480603015075377e-06, + "loss": 0.0233, + "step": 15625 + }, + { + "epoch": 7.88, + "grad_norm": 1.99225914478302, + "learning_rate": 8.478090452261308e-06, + "loss": 0.0252, + "step": 15650 + }, + { + "epoch": 7.9, + "grad_norm": 1.680383563041687, + "learning_rate": 8.475577889447237e-06, + "loss": 0.0251, + "step": 15675 + }, + { + "epoch": 7.91, + "grad_norm": 1.6750810146331787, + "learning_rate": 8.473065326633166e-06, + "loss": 0.0257, + "step": 15700 + }, + { + "epoch": 7.92, + "grad_norm": 1.656472086906433, + "learning_rate": 8.470552763819096e-06, + "loss": 0.0266, + "step": 15725 + }, + { + "epoch": 7.93, + "grad_norm": 1.8951327800750732, + "learning_rate": 8.468040201005025e-06, + "loss": 0.0253, + "step": 15750 + }, + { + "epoch": 7.95, + "grad_norm": 1.742968201637268, + "learning_rate": 8.465527638190956e-06, + "loss": 0.0236, + "step": 15775 + }, + { + "epoch": 7.96, + "grad_norm": 1.8059569597244263, + "learning_rate": 8.463015075376885e-06, + "loss": 0.0245, + "step": 15800 + }, + { + "epoch": 7.97, + "grad_norm": 1.6708135604858398, + "learning_rate": 8.460502512562815e-06, + "loss": 0.0235, + "step": 15825 + }, + { + "epoch": 7.98, + "grad_norm": 1.7452640533447266, + "learning_rate": 8.457989949748744e-06, + "loss": 0.0252, + "step": 15850 + }, + { + "epoch": 8.0, + "grad_norm": 1.5547782182693481, + "learning_rate": 8.455477386934673e-06, + "loss": 0.025, + "step": 15875 + }, + { + "epoch": 8.01, + "grad_norm": 1.2804126739501953, + "learning_rate": 8.452964824120604e-06, + "loss": 0.0184, + "step": 15900 + }, + { + "epoch": 8.02, + "grad_norm": 1.3429714441299438, + "learning_rate": 8.450452261306534e-06, + "loss": 0.0168, + "step": 15925 + }, + { + "epoch": 8.04, + "grad_norm": 1.215593695640564, + "learning_rate": 8.447939698492463e-06, + "loss": 0.0168, + "step": 15950 + }, + { + "epoch": 8.05, + "grad_norm": 1.6081595420837402, + "learning_rate": 8.445427135678392e-06, + "loss": 0.0161, + "step": 15975 + }, + { + "epoch": 8.06, + "grad_norm": 1.3912684917449951, + "learning_rate": 8.442914572864322e-06, + "loss": 0.016, + "step": 16000 + }, + { + "epoch": 8.06, + "eval_loss": 0.09193716198205948, + "eval_runtime": 1206.8791, + "eval_samples_per_second": 1.192, + "eval_steps_per_second": 1.192, + "eval_wer": 28.652768044849335, + "step": 16000 + }, + { + "epoch": 8.07, + "grad_norm": 1.7438273429870605, + "learning_rate": 8.440402010050251e-06, + "loss": 0.0177, + "step": 16025 + }, + { + "epoch": 8.09, + "grad_norm": 1.4394763708114624, + "learning_rate": 8.437889447236182e-06, + "loss": 0.0168, + "step": 16050 + }, + { + "epoch": 8.1, + "grad_norm": 1.3874465227127075, + "learning_rate": 8.435376884422111e-06, + "loss": 0.0156, + "step": 16075 + }, + { + "epoch": 8.11, + "grad_norm": 1.4761991500854492, + "learning_rate": 8.43286432160804e-06, + "loss": 0.0178, + "step": 16100 + }, + { + "epoch": 8.12, + "grad_norm": 1.3524432182312012, + "learning_rate": 8.430351758793972e-06, + "loss": 0.0164, + "step": 16125 + }, + { + "epoch": 8.14, + "grad_norm": 1.8824248313903809, + "learning_rate": 8.4278391959799e-06, + "loss": 0.0167, + "step": 16150 + }, + { + "epoch": 8.15, + "grad_norm": 1.4899888038635254, + "learning_rate": 8.42532663316583e-06, + "loss": 0.017, + "step": 16175 + }, + { + "epoch": 8.16, + "grad_norm": 1.456059455871582, + "learning_rate": 8.42281407035176e-06, + "loss": 0.0173, + "step": 16200 + }, + { + "epoch": 8.17, + "grad_norm": 1.489370584487915, + "learning_rate": 8.420301507537689e-06, + "loss": 0.0174, + "step": 16225 + }, + { + "epoch": 8.19, + "grad_norm": 1.4678382873535156, + "learning_rate": 8.417788944723618e-06, + "loss": 0.0171, + "step": 16250 + }, + { + "epoch": 8.2, + "grad_norm": 1.7417044639587402, + "learning_rate": 8.415276381909548e-06, + "loss": 0.0164, + "step": 16275 + }, + { + "epoch": 8.21, + "grad_norm": 1.7125444412231445, + "learning_rate": 8.412763819095479e-06, + "loss": 0.018, + "step": 16300 + }, + { + "epoch": 8.22, + "grad_norm": 1.4648323059082031, + "learning_rate": 8.410251256281408e-06, + "loss": 0.0163, + "step": 16325 + }, + { + "epoch": 8.24, + "grad_norm": 1.2059556245803833, + "learning_rate": 8.407738693467337e-06, + "loss": 0.0172, + "step": 16350 + }, + { + "epoch": 8.25, + "grad_norm": 1.4995545148849487, + "learning_rate": 8.405226130653267e-06, + "loss": 0.016, + "step": 16375 + }, + { + "epoch": 8.26, + "grad_norm": 1.1712977886199951, + "learning_rate": 8.402713567839198e-06, + "loss": 0.0171, + "step": 16400 + }, + { + "epoch": 8.27, + "grad_norm": 1.6777487993240356, + "learning_rate": 8.400201005025125e-06, + "loss": 0.0169, + "step": 16425 + }, + { + "epoch": 8.29, + "grad_norm": 1.3354642391204834, + "learning_rate": 8.397688442211056e-06, + "loss": 0.0169, + "step": 16450 + }, + { + "epoch": 8.3, + "grad_norm": 1.3658968210220337, + "learning_rate": 8.395175879396986e-06, + "loss": 0.0169, + "step": 16475 + }, + { + "epoch": 8.31, + "grad_norm": 1.630730152130127, + "learning_rate": 8.392663316582915e-06, + "loss": 0.0167, + "step": 16500 + }, + { + "epoch": 8.32, + "grad_norm": 1.844902753829956, + "learning_rate": 8.390150753768846e-06, + "loss": 0.0175, + "step": 16525 + }, + { + "epoch": 8.34, + "grad_norm": 1.497649073600769, + "learning_rate": 8.387638190954774e-06, + "loss": 0.0167, + "step": 16550 + }, + { + "epoch": 8.35, + "grad_norm": 1.6085478067398071, + "learning_rate": 8.385125628140705e-06, + "loss": 0.0171, + "step": 16575 + }, + { + "epoch": 8.36, + "grad_norm": 1.4073646068572998, + "learning_rate": 8.382613065326634e-06, + "loss": 0.0172, + "step": 16600 + }, + { + "epoch": 8.38, + "grad_norm": 1.7068212032318115, + "learning_rate": 8.380100502512563e-06, + "loss": 0.0183, + "step": 16625 + }, + { + "epoch": 8.39, + "grad_norm": 1.4214227199554443, + "learning_rate": 8.377587939698493e-06, + "loss": 0.0169, + "step": 16650 + }, + { + "epoch": 8.4, + "grad_norm": 1.3248395919799805, + "learning_rate": 8.375075376884424e-06, + "loss": 0.0168, + "step": 16675 + }, + { + "epoch": 8.41, + "grad_norm": 1.825480580329895, + "learning_rate": 8.372562814070353e-06, + "loss": 0.0184, + "step": 16700 + }, + { + "epoch": 8.43, + "grad_norm": 1.814050555229187, + "learning_rate": 8.370050251256282e-06, + "loss": 0.0165, + "step": 16725 + }, + { + "epoch": 8.44, + "grad_norm": 1.3944083452224731, + "learning_rate": 8.367537688442212e-06, + "loss": 0.0172, + "step": 16750 + }, + { + "epoch": 8.45, + "grad_norm": 1.5155799388885498, + "learning_rate": 8.365025125628141e-06, + "loss": 0.0173, + "step": 16775 + }, + { + "epoch": 8.46, + "grad_norm": 1.3832693099975586, + "learning_rate": 8.362512562814072e-06, + "loss": 0.0174, + "step": 16800 + }, + { + "epoch": 8.48, + "grad_norm": 1.9296929836273193, + "learning_rate": 8.36e-06, + "loss": 0.0166, + "step": 16825 + }, + { + "epoch": 8.49, + "grad_norm": 1.7059978246688843, + "learning_rate": 8.35748743718593e-06, + "loss": 0.0182, + "step": 16850 + }, + { + "epoch": 8.5, + "grad_norm": 1.5897443294525146, + "learning_rate": 8.35497487437186e-06, + "loss": 0.0174, + "step": 16875 + }, + { + "epoch": 8.51, + "grad_norm": 1.6101067066192627, + "learning_rate": 8.35246231155779e-06, + "loss": 0.0179, + "step": 16900 + }, + { + "epoch": 8.53, + "grad_norm": 1.669481635093689, + "learning_rate": 8.34994974874372e-06, + "loss": 0.0188, + "step": 16925 + }, + { + "epoch": 8.54, + "grad_norm": 1.6964446306228638, + "learning_rate": 8.34743718592965e-06, + "loss": 0.0184, + "step": 16950 + }, + { + "epoch": 8.55, + "grad_norm": 1.4007657766342163, + "learning_rate": 8.344924623115579e-06, + "loss": 0.0176, + "step": 16975 + }, + { + "epoch": 8.56, + "grad_norm": 1.6609703302383423, + "learning_rate": 8.342412060301508e-06, + "loss": 0.018, + "step": 17000 + }, + { + "epoch": 8.56, + "eval_loss": 0.09488032758235931, + "eval_runtime": 1209.82, + "eval_samples_per_second": 1.189, + "eval_steps_per_second": 1.189, + "eval_wer": 29.274702172389627, + "step": 17000 + }, + { + "epoch": 8.58, + "grad_norm": 1.6576311588287354, + "learning_rate": 8.339899497487438e-06, + "loss": 0.0179, + "step": 17025 + }, + { + "epoch": 8.59, + "grad_norm": 1.9696978330612183, + "learning_rate": 8.337386934673367e-06, + "loss": 0.0183, + "step": 17050 + }, + { + "epoch": 8.6, + "grad_norm": 1.4279985427856445, + "learning_rate": 8.334874371859298e-06, + "loss": 0.0171, + "step": 17075 + }, + { + "epoch": 8.61, + "grad_norm": 1.8639880418777466, + "learning_rate": 8.332361809045226e-06, + "loss": 0.0179, + "step": 17100 + }, + { + "epoch": 8.63, + "grad_norm": 1.7033897638320923, + "learning_rate": 8.329849246231157e-06, + "loss": 0.0181, + "step": 17125 + }, + { + "epoch": 8.64, + "grad_norm": 1.543739914894104, + "learning_rate": 8.327336683417086e-06, + "loss": 0.0182, + "step": 17150 + }, + { + "epoch": 8.65, + "grad_norm": 1.7123262882232666, + "learning_rate": 8.324824120603015e-06, + "loss": 0.0181, + "step": 17175 + }, + { + "epoch": 8.66, + "grad_norm": 1.8813700675964355, + "learning_rate": 8.322412060301508e-06, + "loss": 0.0177, + "step": 17200 + }, + { + "epoch": 8.68, + "grad_norm": 1.5735827684402466, + "learning_rate": 8.319899497487438e-06, + "loss": 0.017, + "step": 17225 + }, + { + "epoch": 8.69, + "grad_norm": 1.5541824102401733, + "learning_rate": 8.317386934673367e-06, + "loss": 0.0174, + "step": 17250 + }, + { + "epoch": 8.7, + "grad_norm": 1.5821361541748047, + "learning_rate": 8.314874371859298e-06, + "loss": 0.0174, + "step": 17275 + }, + { + "epoch": 8.72, + "grad_norm": 1.6786561012268066, + "learning_rate": 8.312361809045226e-06, + "loss": 0.0169, + "step": 17300 + }, + { + "epoch": 8.73, + "grad_norm": 1.7321412563323975, + "learning_rate": 8.309849246231157e-06, + "loss": 0.017, + "step": 17325 + }, + { + "epoch": 8.74, + "grad_norm": 2.061835289001465, + "learning_rate": 8.307336683417086e-06, + "loss": 0.0177, + "step": 17350 + }, + { + "epoch": 8.75, + "grad_norm": 1.5133565664291382, + "learning_rate": 8.304824120603015e-06, + "loss": 0.0184, + "step": 17375 + }, + { + "epoch": 8.77, + "grad_norm": 1.6465957164764404, + "learning_rate": 8.302311557788946e-06, + "loss": 0.0185, + "step": 17400 + }, + { + "epoch": 8.78, + "grad_norm": 1.5401883125305176, + "learning_rate": 8.299798994974874e-06, + "loss": 0.0186, + "step": 17425 + }, + { + "epoch": 8.79, + "grad_norm": 1.6405322551727295, + "learning_rate": 8.297286432160805e-06, + "loss": 0.0181, + "step": 17450 + }, + { + "epoch": 8.8, + "grad_norm": 2.0827629566192627, + "learning_rate": 8.294773869346734e-06, + "loss": 0.019, + "step": 17475 + }, + { + "epoch": 8.82, + "grad_norm": 1.6564961671829224, + "learning_rate": 8.292261306532664e-06, + "loss": 0.0181, + "step": 17500 + }, + { + "epoch": 8.83, + "grad_norm": 2.093075752258301, + "learning_rate": 8.289748743718595e-06, + "loss": 0.0166, + "step": 17525 + }, + { + "epoch": 8.84, + "grad_norm": 1.772351861000061, + "learning_rate": 8.287236180904524e-06, + "loss": 0.0186, + "step": 17550 + }, + { + "epoch": 8.85, + "grad_norm": 1.454347014427185, + "learning_rate": 8.284723618090453e-06, + "loss": 0.0176, + "step": 17575 + }, + { + "epoch": 8.87, + "grad_norm": 1.5992451906204224, + "learning_rate": 8.282211055276383e-06, + "loss": 0.0173, + "step": 17600 + }, + { + "epoch": 8.88, + "grad_norm": 1.9028196334838867, + "learning_rate": 8.279698492462312e-06, + "loss": 0.0177, + "step": 17625 + }, + { + "epoch": 8.89, + "grad_norm": 1.634925365447998, + "learning_rate": 8.277185929648241e-06, + "loss": 0.0187, + "step": 17650 + }, + { + "epoch": 8.9, + "grad_norm": 2.2251663208007812, + "learning_rate": 8.274673366834172e-06, + "loss": 0.0179, + "step": 17675 + }, + { + "epoch": 8.92, + "grad_norm": 1.5682474374771118, + "learning_rate": 8.2721608040201e-06, + "loss": 0.0177, + "step": 17700 + }, + { + "epoch": 8.93, + "grad_norm": 1.909035086631775, + "learning_rate": 8.269648241206031e-06, + "loss": 0.018, + "step": 17725 + }, + { + "epoch": 8.94, + "grad_norm": 1.832359790802002, + "learning_rate": 8.26713567839196e-06, + "loss": 0.0178, + "step": 17750 + }, + { + "epoch": 8.95, + "grad_norm": 1.8671619892120361, + "learning_rate": 8.26462311557789e-06, + "loss": 0.0187, + "step": 17775 + }, + { + "epoch": 8.97, + "grad_norm": 1.5063493251800537, + "learning_rate": 8.26211055276382e-06, + "loss": 0.018, + "step": 17800 + }, + { + "epoch": 8.98, + "grad_norm": 1.4578840732574463, + "learning_rate": 8.25959798994975e-06, + "loss": 0.0178, + "step": 17825 + }, + { + "epoch": 8.99, + "grad_norm": 1.4467227458953857, + "learning_rate": 8.257185929648242e-06, + "loss": 0.0171, + "step": 17850 + }, + { + "epoch": 9.01, + "grad_norm": 1.1524723768234253, + "learning_rate": 8.25467336683417e-06, + "loss": 0.0158, + "step": 17875 + }, + { + "epoch": 9.02, + "grad_norm": 1.4715425968170166, + "learning_rate": 8.2521608040201e-06, + "loss": 0.0117, + "step": 17900 + }, + { + "epoch": 9.03, + "grad_norm": 1.2467890977859497, + "learning_rate": 8.249648241206031e-06, + "loss": 0.0114, + "step": 17925 + }, + { + "epoch": 9.04, + "grad_norm": 1.3281400203704834, + "learning_rate": 8.24713567839196e-06, + "loss": 0.0114, + "step": 17950 + }, + { + "epoch": 9.06, + "grad_norm": 1.1145596504211426, + "learning_rate": 8.24462311557789e-06, + "loss": 0.011, + "step": 17975 + }, + { + "epoch": 9.07, + "grad_norm": 1.309786319732666, + "learning_rate": 8.24211055276382e-06, + "loss": 0.0111, + "step": 18000 + }, + { + "epoch": 9.07, + "eval_loss": 0.10074544697999954, + "eval_runtime": 1200.1306, + "eval_samples_per_second": 1.198, + "eval_steps_per_second": 1.198, + "eval_wer": 28.950595655220745, + "step": 18000 + } + ], + "logging_steps": 25, + "max_steps": 100000, + "num_input_tokens_seen": 0, + "num_train_epochs": 51, + "save_steps": 1000, + "total_flos": 5.603903668224e+19, + "train_batch_size": 48, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/whisper-base/telugu/checkpoint-18000/training_args.bin b/checkpoints/whisper-base/telugu/checkpoint-18000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0fc9646a8a71ffbe2789ee4bd18622e82c1f9a91 --- /dev/null +++ b/checkpoints/whisper-base/telugu/checkpoint-18000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aca0b649a3494f0701f3e51430279fabc7cb526fd945eb724c45cca6c748b59f +size 4667 diff --git a/checkpoints/whisper-small/bengali/checkpoint-29000/config.json b/checkpoints/whisper-small/bengali/checkpoint-29000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..119d25ed9751c0132194051072c1e099ac77cfd2 --- /dev/null +++ b/checkpoints/whisper-small/bengali/checkpoint-29000/config.json @@ -0,0 +1,152 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50302 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50360, + 50361, + 50362 + ], + "torch_dtype": "float32", + "transformers_version": "4.39.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoints/whisper-small/bengali/checkpoint-29000/generation_config.json b/checkpoints/whisper-small/bengali/checkpoint-29000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7e9b1a3e3b5fb8d88730860d2b25f6cd310962c7 --- /dev/null +++ b/checkpoints/whisper-small/bengali/checkpoint-29000/generation_config.json @@ -0,0 +1,264 @@ +{ + "alignment_heads": [ + [ + 5, + 3 + ], + [ + 5, + 9 + ], + [ + 8, + 0 + ], + [ + 8, + 4 + ], + [ + 8, + 7 + ], + [ + 8, + 8 + ], + [ + 9, + 0 + ], + [ + 9, + 7 + ], + [ + 9, + 9 + ], + [ + 10, + 5 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.39.2" +} diff --git a/checkpoints/whisper-small/bengali/checkpoint-29000/model.safetensors b/checkpoints/whisper-small/bengali/checkpoint-29000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7a136df7d751aff94c23ad507607c55b30ccc3b4 --- /dev/null +++ b/checkpoints/whisper-small/bengali/checkpoint-29000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95eec4c31d465a15c49fab03fdcdd06ab432719210313c8770efe5905a167778 +size 966995080 diff --git a/checkpoints/whisper-small/bengali/checkpoint-29000/optimizer.pt b/checkpoints/whisper-small/bengali/checkpoint-29000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..28582053b37cb6cde48cc29c82be004751cacb0d --- /dev/null +++ b/checkpoints/whisper-small/bengali/checkpoint-29000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12f85ea4eb1895f322be52ce5112528b769a313ce4351e1d6a69d8c5183d25e3 +size 1925063607 diff --git a/checkpoints/whisper-small/bengali/checkpoint-29000/preprocessor_config.json b/checkpoints/whisper-small/bengali/checkpoint-29000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoints/whisper-small/bengali/checkpoint-29000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoints/whisper-small/bengali/checkpoint-29000/rng_state.pth b/checkpoints/whisper-small/bengali/checkpoint-29000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..417671d2bbf7ebef41c91c05c0dbf1269fa28ea1 --- /dev/null +++ b/checkpoints/whisper-small/bengali/checkpoint-29000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4758d8fbdc6ac8ea09b7a5f61c293a3ba862f8837496320bc324ffa8e270c090 +size 14575 diff --git a/checkpoints/whisper-small/bengali/checkpoint-29000/scheduler.pt b/checkpoints/whisper-small/bengali/checkpoint-29000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..01156bb5e47846d93b4446993209cc9085dea425 --- /dev/null +++ b/checkpoints/whisper-small/bengali/checkpoint-29000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dec8e5c60a03b921aab0c26ff0c351ce3febd632d1f90cb09cb7c42b4d785a1 +size 627 diff --git a/checkpoints/whisper-small/bengali/checkpoint-29000/trainer_state.json b/checkpoints/whisper-small/bengali/checkpoint-29000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..56842378eafb3a6047c437ace63d4955bad44ad9 --- /dev/null +++ b/checkpoints/whisper-small/bengali/checkpoint-29000/trainer_state.json @@ -0,0 +1,8402 @@ +{ + "best_metric": 19.953791220331862, + "best_model_checkpoint": "results/whisper-small/bengali/checkpoint-19000", + "epoch": 10.812826249067859, + "eval_steps": 1000, + "global_step": 29000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "grad_norm": 29.68082046508789, + "learning_rate": 4.2000000000000006e-07, + "loss": 2.354, + "step": 25 + }, + { + "epoch": 0.02, + "grad_norm": 10.84911823272705, + "learning_rate": 9.200000000000001e-07, + "loss": 2.0154, + "step": 50 + }, + { + "epoch": 0.03, + "grad_norm": 7.318769454956055, + "learning_rate": 1.42e-06, + "loss": 1.6157, + "step": 75 + }, + { + "epoch": 0.04, + "grad_norm": 7.689539909362793, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.2721, + "step": 100 + }, + { + "epoch": 0.05, + "grad_norm": 7.812243461608887, + "learning_rate": 2.42e-06, + "loss": 0.9215, + "step": 125 + }, + { + "epoch": 0.06, + "grad_norm": 7.1417694091796875, + "learning_rate": 2.92e-06, + "loss": 0.7077, + "step": 150 + }, + { + "epoch": 0.07, + "grad_norm": 4.889854431152344, + "learning_rate": 3.4200000000000007e-06, + "loss": 0.5553, + "step": 175 + }, + { + "epoch": 0.07, + "grad_norm": 5.62460994720459, + "learning_rate": 3.920000000000001e-06, + "loss": 0.4743, + "step": 200 + }, + { + "epoch": 0.08, + "grad_norm": 5.324848175048828, + "learning_rate": 4.42e-06, + "loss": 0.4235, + "step": 225 + }, + { + "epoch": 0.09, + "grad_norm": 4.074978828430176, + "learning_rate": 4.92e-06, + "loss": 0.39, + "step": 250 + }, + { + "epoch": 0.1, + "grad_norm": 5.585488796234131, + "learning_rate": 5.420000000000001e-06, + "loss": 0.3579, + "step": 275 + }, + { + "epoch": 0.11, + "grad_norm": 4.808184623718262, + "learning_rate": 5.92e-06, + "loss": 0.3285, + "step": 300 + }, + { + "epoch": 0.12, + "grad_norm": 4.539876937866211, + "learning_rate": 6.42e-06, + "loss": 0.3007, + "step": 325 + }, + { + "epoch": 0.13, + "grad_norm": 3.813603639602661, + "learning_rate": 6.92e-06, + "loss": 0.2848, + "step": 350 + }, + { + "epoch": 0.14, + "grad_norm": 5.4585981369018555, + "learning_rate": 7.420000000000001e-06, + "loss": 0.2453, + "step": 375 + }, + { + "epoch": 0.15, + "grad_norm": 4.614983081817627, + "learning_rate": 7.92e-06, + "loss": 0.2247, + "step": 400 + }, + { + "epoch": 0.16, + "grad_norm": 2.9942564964294434, + "learning_rate": 8.42e-06, + "loss": 0.211, + "step": 425 + }, + { + "epoch": 0.17, + "grad_norm": 4.1876220703125, + "learning_rate": 8.920000000000001e-06, + "loss": 0.2037, + "step": 450 + }, + { + "epoch": 0.18, + "grad_norm": 4.063242435455322, + "learning_rate": 9.42e-06, + "loss": 0.2075, + "step": 475 + }, + { + "epoch": 0.19, + "grad_norm": 4.509723663330078, + "learning_rate": 9.920000000000002e-06, + "loss": 0.1889, + "step": 500 + }, + { + "epoch": 0.2, + "grad_norm": 5.793299198150635, + "learning_rate": 9.997889447236182e-06, + "loss": 0.1854, + "step": 525 + }, + { + "epoch": 0.21, + "grad_norm": 3.8586199283599854, + "learning_rate": 9.995376884422112e-06, + "loss": 0.1772, + "step": 550 + }, + { + "epoch": 0.21, + "grad_norm": 4.540040969848633, + "learning_rate": 9.992864321608041e-06, + "loss": 0.1763, + "step": 575 + }, + { + "epoch": 0.22, + "grad_norm": 3.8078994750976562, + "learning_rate": 9.99035175879397e-06, + "loss": 0.1711, + "step": 600 + }, + { + "epoch": 0.23, + "grad_norm": 3.1277852058410645, + "learning_rate": 9.9878391959799e-06, + "loss": 0.1598, + "step": 625 + }, + { + "epoch": 0.24, + "grad_norm": 2.791945457458496, + "learning_rate": 9.98532663316583e-06, + "loss": 0.155, + "step": 650 + }, + { + "epoch": 0.25, + "grad_norm": 2.7729337215423584, + "learning_rate": 9.98281407035176e-06, + "loss": 0.1613, + "step": 675 + }, + { + "epoch": 0.26, + "grad_norm": 4.288908004760742, + "learning_rate": 9.98030150753769e-06, + "loss": 0.1586, + "step": 700 + }, + { + "epoch": 0.27, + "grad_norm": 2.746523380279541, + "learning_rate": 9.977788944723619e-06, + "loss": 0.1533, + "step": 725 + }, + { + "epoch": 0.28, + "grad_norm": 3.014827251434326, + "learning_rate": 9.975276381909548e-06, + "loss": 0.1498, + "step": 750 + }, + { + "epoch": 0.29, + "grad_norm": 2.123540163040161, + "learning_rate": 9.972763819095477e-06, + "loss": 0.1531, + "step": 775 + }, + { + "epoch": 0.3, + "grad_norm": 2.3613760471343994, + "learning_rate": 9.970251256281408e-06, + "loss": 0.1497, + "step": 800 + }, + { + "epoch": 0.31, + "grad_norm": 3.694166660308838, + "learning_rate": 9.967738693467338e-06, + "loss": 0.1444, + "step": 825 + }, + { + "epoch": 0.32, + "grad_norm": 2.472181558609009, + "learning_rate": 9.965226130653267e-06, + "loss": 0.1403, + "step": 850 + }, + { + "epoch": 0.33, + "grad_norm": 2.189764976501465, + "learning_rate": 9.962713567839198e-06, + "loss": 0.1409, + "step": 875 + }, + { + "epoch": 0.34, + "grad_norm": 3.6756112575531006, + "learning_rate": 9.960201005025126e-06, + "loss": 0.1362, + "step": 900 + }, + { + "epoch": 0.34, + "grad_norm": 2.4440977573394775, + "learning_rate": 9.957688442211057e-06, + "loss": 0.14, + "step": 925 + }, + { + "epoch": 0.35, + "grad_norm": 2.7009763717651367, + "learning_rate": 9.955175879396986e-06, + "loss": 0.1292, + "step": 950 + }, + { + "epoch": 0.36, + "grad_norm": 2.5733249187469482, + "learning_rate": 9.952663316582915e-06, + "loss": 0.1283, + "step": 975 + }, + { + "epoch": 0.37, + "grad_norm": 2.0757789611816406, + "learning_rate": 9.950150753768845e-06, + "loss": 0.1331, + "step": 1000 + }, + { + "epoch": 0.37, + "eval_loss": 0.10383214056491852, + "eval_runtime": 2259.832, + "eval_samples_per_second": 0.664, + "eval_steps_per_second": 0.664, + "eval_wer": 30.22474270111321, + "step": 1000 + }, + { + "epoch": 0.38, + "grad_norm": 2.975520133972168, + "learning_rate": 9.947638190954774e-06, + "loss": 0.1326, + "step": 1025 + }, + { + "epoch": 0.39, + "grad_norm": 3.2451345920562744, + "learning_rate": 9.945125628140703e-06, + "loss": 0.128, + "step": 1050 + }, + { + "epoch": 0.4, + "grad_norm": 2.2271206378936768, + "learning_rate": 9.942613065326634e-06, + "loss": 0.1233, + "step": 1075 + }, + { + "epoch": 0.41, + "grad_norm": 2.4491353034973145, + "learning_rate": 9.940100502512564e-06, + "loss": 0.124, + "step": 1100 + }, + { + "epoch": 0.42, + "grad_norm": 2.460125207901001, + "learning_rate": 9.937587939698493e-06, + "loss": 0.1265, + "step": 1125 + }, + { + "epoch": 0.43, + "grad_norm": 2.324075937271118, + "learning_rate": 9.935075376884424e-06, + "loss": 0.1218, + "step": 1150 + }, + { + "epoch": 0.44, + "grad_norm": 2.542706251144409, + "learning_rate": 9.932562814070352e-06, + "loss": 0.1232, + "step": 1175 + }, + { + "epoch": 0.45, + "grad_norm": 2.2721738815307617, + "learning_rate": 9.930050251256283e-06, + "loss": 0.118, + "step": 1200 + }, + { + "epoch": 0.46, + "grad_norm": 2.4723408222198486, + "learning_rate": 9.927537688442212e-06, + "loss": 0.1176, + "step": 1225 + }, + { + "epoch": 0.47, + "grad_norm": 1.8479586839675903, + "learning_rate": 9.925025125628141e-06, + "loss": 0.122, + "step": 1250 + }, + { + "epoch": 0.48, + "grad_norm": 2.357590675354004, + "learning_rate": 9.922512562814072e-06, + "loss": 0.1179, + "step": 1275 + }, + { + "epoch": 0.48, + "grad_norm": 2.1642332077026367, + "learning_rate": 9.920000000000002e-06, + "loss": 0.1158, + "step": 1300 + }, + { + "epoch": 0.49, + "grad_norm": 1.9894746541976929, + "learning_rate": 9.917487437185931e-06, + "loss": 0.1146, + "step": 1325 + }, + { + "epoch": 0.5, + "grad_norm": 2.185697555541992, + "learning_rate": 9.91497487437186e-06, + "loss": 0.118, + "step": 1350 + }, + { + "epoch": 0.51, + "grad_norm": 2.0995688438415527, + "learning_rate": 9.91246231155779e-06, + "loss": 0.1162, + "step": 1375 + }, + { + "epoch": 0.52, + "grad_norm": 2.0085673332214355, + "learning_rate": 9.909949748743719e-06, + "loss": 0.1092, + "step": 1400 + }, + { + "epoch": 0.53, + "grad_norm": 2.3055663108825684, + "learning_rate": 9.90743718592965e-06, + "loss": 0.1135, + "step": 1425 + }, + { + "epoch": 0.54, + "grad_norm": 3.1966633796691895, + "learning_rate": 9.904924623115578e-06, + "loss": 0.1096, + "step": 1450 + }, + { + "epoch": 0.55, + "grad_norm": 2.0577762126922607, + "learning_rate": 9.902412060301509e-06, + "loss": 0.1133, + "step": 1475 + }, + { + "epoch": 0.56, + "grad_norm": 2.4432296752929688, + "learning_rate": 9.899899497487438e-06, + "loss": 0.1066, + "step": 1500 + }, + { + "epoch": 0.57, + "grad_norm": 2.640073299407959, + "learning_rate": 9.897386934673367e-06, + "loss": 0.1088, + "step": 1525 + }, + { + "epoch": 0.58, + "grad_norm": 1.6365364789962769, + "learning_rate": 9.894874371859298e-06, + "loss": 0.111, + "step": 1550 + }, + { + "epoch": 0.59, + "grad_norm": 2.7300353050231934, + "learning_rate": 9.892361809045228e-06, + "loss": 0.1067, + "step": 1575 + }, + { + "epoch": 0.6, + "grad_norm": 2.282029867172241, + "learning_rate": 9.889849246231157e-06, + "loss": 0.1078, + "step": 1600 + }, + { + "epoch": 0.61, + "grad_norm": 1.7577533721923828, + "learning_rate": 9.887336683417086e-06, + "loss": 0.1108, + "step": 1625 + }, + { + "epoch": 0.62, + "grad_norm": 1.8237860202789307, + "learning_rate": 9.884824120603015e-06, + "loss": 0.101, + "step": 1650 + }, + { + "epoch": 0.62, + "grad_norm": 2.4958231449127197, + "learning_rate": 9.882311557788945e-06, + "loss": 0.1096, + "step": 1675 + }, + { + "epoch": 0.63, + "grad_norm": 2.0631487369537354, + "learning_rate": 9.879798994974876e-06, + "loss": 0.0993, + "step": 1700 + }, + { + "epoch": 0.64, + "grad_norm": 1.8554866313934326, + "learning_rate": 9.877286432160805e-06, + "loss": 0.1068, + "step": 1725 + }, + { + "epoch": 0.65, + "grad_norm": 1.77009916305542, + "learning_rate": 9.874773869346734e-06, + "loss": 0.1007, + "step": 1750 + }, + { + "epoch": 0.66, + "grad_norm": 2.1527929306030273, + "learning_rate": 9.872261306532664e-06, + "loss": 0.0996, + "step": 1775 + }, + { + "epoch": 0.67, + "grad_norm": 1.788855791091919, + "learning_rate": 9.869748743718593e-06, + "loss": 0.1017, + "step": 1800 + }, + { + "epoch": 0.68, + "grad_norm": 3.000561237335205, + "learning_rate": 9.867236180904524e-06, + "loss": 0.1014, + "step": 1825 + }, + { + "epoch": 0.69, + "grad_norm": 2.034292697906494, + "learning_rate": 9.864723618090453e-06, + "loss": 0.097, + "step": 1850 + }, + { + "epoch": 0.7, + "grad_norm": 2.392223358154297, + "learning_rate": 9.862211055276383e-06, + "loss": 0.0985, + "step": 1875 + }, + { + "epoch": 0.71, + "grad_norm": 2.4244911670684814, + "learning_rate": 9.859698492462312e-06, + "loss": 0.0999, + "step": 1900 + }, + { + "epoch": 0.72, + "grad_norm": 1.7819151878356934, + "learning_rate": 9.857185929648241e-06, + "loss": 0.0961, + "step": 1925 + }, + { + "epoch": 0.73, + "grad_norm": 1.7472807168960571, + "learning_rate": 9.854673366834172e-06, + "loss": 0.0954, + "step": 1950 + }, + { + "epoch": 0.74, + "grad_norm": 1.7027556896209717, + "learning_rate": 9.852160804020102e-06, + "loss": 0.0996, + "step": 1975 + }, + { + "epoch": 0.75, + "grad_norm": 2.4057886600494385, + "learning_rate": 9.849648241206031e-06, + "loss": 0.0982, + "step": 2000 + }, + { + "epoch": 0.75, + "eval_loss": 0.07871720939874649, + "eval_runtime": 2242.6134, + "eval_samples_per_second": 0.669, + "eval_steps_per_second": 0.669, + "eval_wer": 24.539662535881817, + "step": 2000 + }, + { + "epoch": 0.76, + "grad_norm": 1.9487968683242798, + "learning_rate": 9.84713567839196e-06, + "loss": 0.0939, + "step": 2025 + }, + { + "epoch": 0.76, + "grad_norm": 1.8165415525436401, + "learning_rate": 9.84462311557789e-06, + "loss": 0.0973, + "step": 2050 + }, + { + "epoch": 0.77, + "grad_norm": 2.6980443000793457, + "learning_rate": 9.842110552763819e-06, + "loss": 0.0949, + "step": 2075 + }, + { + "epoch": 0.78, + "grad_norm": 1.728190541267395, + "learning_rate": 9.83959798994975e-06, + "loss": 0.0883, + "step": 2100 + }, + { + "epoch": 0.79, + "grad_norm": 2.1086370944976807, + "learning_rate": 9.83708542713568e-06, + "loss": 0.0956, + "step": 2125 + }, + { + "epoch": 0.8, + "grad_norm": 1.8707637786865234, + "learning_rate": 9.834572864321609e-06, + "loss": 0.0936, + "step": 2150 + }, + { + "epoch": 0.81, + "grad_norm": 1.826344609260559, + "learning_rate": 9.832060301507538e-06, + "loss": 0.0946, + "step": 2175 + }, + { + "epoch": 0.82, + "grad_norm": 1.671091079711914, + "learning_rate": 9.829547738693467e-06, + "loss": 0.0902, + "step": 2200 + }, + { + "epoch": 0.83, + "grad_norm": 2.394937515258789, + "learning_rate": 9.827035175879398e-06, + "loss": 0.0877, + "step": 2225 + }, + { + "epoch": 0.84, + "grad_norm": 1.7420426607131958, + "learning_rate": 9.824522613065328e-06, + "loss": 0.0888, + "step": 2250 + }, + { + "epoch": 0.85, + "grad_norm": 2.1163082122802734, + "learning_rate": 9.822010050251257e-06, + "loss": 0.0969, + "step": 2275 + }, + { + "epoch": 0.86, + "grad_norm": 1.5306419134140015, + "learning_rate": 9.819497487437186e-06, + "loss": 0.0915, + "step": 2300 + }, + { + "epoch": 0.87, + "grad_norm": 1.746291995048523, + "learning_rate": 9.816984924623116e-06, + "loss": 0.0918, + "step": 2325 + }, + { + "epoch": 0.88, + "grad_norm": 1.730825424194336, + "learning_rate": 9.814472361809047e-06, + "loss": 0.0874, + "step": 2350 + }, + { + "epoch": 0.89, + "grad_norm": 1.3350924253463745, + "learning_rate": 9.811959798994976e-06, + "loss": 0.0866, + "step": 2375 + }, + { + "epoch": 0.89, + "grad_norm": 1.6973576545715332, + "learning_rate": 9.809447236180905e-06, + "loss": 0.0872, + "step": 2400 + }, + { + "epoch": 0.9, + "grad_norm": 2.076714515686035, + "learning_rate": 9.806934673366835e-06, + "loss": 0.0925, + "step": 2425 + }, + { + "epoch": 0.91, + "grad_norm": 2.2232306003570557, + "learning_rate": 9.804422110552764e-06, + "loss": 0.0867, + "step": 2450 + }, + { + "epoch": 0.92, + "grad_norm": 2.4923629760742188, + "learning_rate": 9.801909547738693e-06, + "loss": 0.0896, + "step": 2475 + }, + { + "epoch": 0.93, + "grad_norm": 2.05359148979187, + "learning_rate": 9.799396984924624e-06, + "loss": 0.0844, + "step": 2500 + }, + { + "epoch": 0.94, + "grad_norm": 1.98757803440094, + "learning_rate": 9.796884422110554e-06, + "loss": 0.0873, + "step": 2525 + }, + { + "epoch": 0.95, + "grad_norm": 1.6392455101013184, + "learning_rate": 9.794371859296483e-06, + "loss": 0.0862, + "step": 2550 + }, + { + "epoch": 0.96, + "grad_norm": 2.1512820720672607, + "learning_rate": 9.791859296482414e-06, + "loss": 0.0847, + "step": 2575 + }, + { + "epoch": 0.97, + "grad_norm": 1.7951105833053589, + "learning_rate": 9.789346733668342e-06, + "loss": 0.0864, + "step": 2600 + }, + { + "epoch": 0.98, + "grad_norm": 2.0735137462615967, + "learning_rate": 9.786834170854273e-06, + "loss": 0.0835, + "step": 2625 + }, + { + "epoch": 0.99, + "grad_norm": 1.6265958547592163, + "learning_rate": 9.784321608040202e-06, + "loss": 0.0844, + "step": 2650 + }, + { + "epoch": 1.0, + "grad_norm": 2.0009169578552246, + "learning_rate": 9.781809045226131e-06, + "loss": 0.0817, + "step": 2675 + }, + { + "epoch": 1.01, + "grad_norm": 1.7544523477554321, + "learning_rate": 9.77929648241206e-06, + "loss": 0.0742, + "step": 2700 + }, + { + "epoch": 1.02, + "grad_norm": 1.4134057760238647, + "learning_rate": 9.77678391959799e-06, + "loss": 0.0709, + "step": 2725 + }, + { + "epoch": 1.03, + "grad_norm": 1.7542369365692139, + "learning_rate": 9.774271356783921e-06, + "loss": 0.0693, + "step": 2750 + }, + { + "epoch": 1.03, + "grad_norm": 1.5171037912368774, + "learning_rate": 9.77175879396985e-06, + "loss": 0.07, + "step": 2775 + }, + { + "epoch": 1.04, + "grad_norm": 1.3078001737594604, + "learning_rate": 9.76924623115578e-06, + "loss": 0.0692, + "step": 2800 + }, + { + "epoch": 1.05, + "grad_norm": 1.4517347812652588, + "learning_rate": 9.766733668341709e-06, + "loss": 0.0702, + "step": 2825 + }, + { + "epoch": 1.06, + "grad_norm": 1.2909682989120483, + "learning_rate": 9.76422110552764e-06, + "loss": 0.0678, + "step": 2850 + }, + { + "epoch": 1.07, + "grad_norm": 1.7864623069763184, + "learning_rate": 9.761708542713568e-06, + "loss": 0.0655, + "step": 2875 + }, + { + "epoch": 1.08, + "grad_norm": 1.5588215589523315, + "learning_rate": 9.759195979899499e-06, + "loss": 0.0681, + "step": 2900 + }, + { + "epoch": 1.09, + "grad_norm": 1.7213218212127686, + "learning_rate": 9.756683417085428e-06, + "loss": 0.0677, + "step": 2925 + }, + { + "epoch": 1.1, + "grad_norm": 1.80104660987854, + "learning_rate": 9.754170854271357e-06, + "loss": 0.0637, + "step": 2950 + }, + { + "epoch": 1.11, + "grad_norm": 1.380069613456726, + "learning_rate": 9.751658291457288e-06, + "loss": 0.0688, + "step": 2975 + }, + { + "epoch": 1.12, + "grad_norm": 1.5853376388549805, + "learning_rate": 9.749145728643216e-06, + "loss": 0.0673, + "step": 3000 + }, + { + "epoch": 1.12, + "eval_loss": 0.06934670358896255, + "eval_runtime": 2236.9422, + "eval_samples_per_second": 0.671, + "eval_steps_per_second": 0.671, + "eval_wer": 21.921165021354057, + "step": 3000 + }, + { + "epoch": 1.13, + "grad_norm": 1.310860514640808, + "learning_rate": 9.746633165829147e-06, + "loss": 0.0661, + "step": 3025 + }, + { + "epoch": 1.14, + "grad_norm": 1.6232872009277344, + "learning_rate": 9.744120603015076e-06, + "loss": 0.061, + "step": 3050 + }, + { + "epoch": 1.15, + "grad_norm": 1.574886441230774, + "learning_rate": 9.741608040201006e-06, + "loss": 0.0675, + "step": 3075 + }, + { + "epoch": 1.16, + "grad_norm": 1.493812084197998, + "learning_rate": 9.739095477386935e-06, + "loss": 0.0687, + "step": 3100 + }, + { + "epoch": 1.17, + "grad_norm": 1.80805242061615, + "learning_rate": 9.736582914572866e-06, + "loss": 0.0681, + "step": 3125 + }, + { + "epoch": 1.17, + "grad_norm": 1.510184645652771, + "learning_rate": 9.734070351758794e-06, + "loss": 0.0668, + "step": 3150 + }, + { + "epoch": 1.18, + "grad_norm": 1.7584396600723267, + "learning_rate": 9.731557788944725e-06, + "loss": 0.0652, + "step": 3175 + }, + { + "epoch": 1.19, + "grad_norm": 1.8080196380615234, + "learning_rate": 9.729045226130654e-06, + "loss": 0.0626, + "step": 3200 + }, + { + "epoch": 1.2, + "grad_norm": 1.425897479057312, + "learning_rate": 9.726532663316583e-06, + "loss": 0.0668, + "step": 3225 + }, + { + "epoch": 1.21, + "grad_norm": 1.9712064266204834, + "learning_rate": 9.724020100502514e-06, + "loss": 0.0636, + "step": 3250 + }, + { + "epoch": 1.22, + "grad_norm": 1.4300082921981812, + "learning_rate": 9.721507537688444e-06, + "loss": 0.0613, + "step": 3275 + }, + { + "epoch": 1.23, + "grad_norm": 1.8500912189483643, + "learning_rate": 9.718994974874373e-06, + "loss": 0.064, + "step": 3300 + }, + { + "epoch": 1.24, + "grad_norm": 1.7373535633087158, + "learning_rate": 9.716482412060302e-06, + "loss": 0.064, + "step": 3325 + }, + { + "epoch": 1.25, + "grad_norm": 2.2244908809661865, + "learning_rate": 9.713969849246232e-06, + "loss": 0.0687, + "step": 3350 + }, + { + "epoch": 1.26, + "grad_norm": 1.6268609762191772, + "learning_rate": 9.711457286432163e-06, + "loss": 0.0653, + "step": 3375 + }, + { + "epoch": 1.27, + "grad_norm": 1.662995457649231, + "learning_rate": 9.708944723618092e-06, + "loss": 0.0627, + "step": 3400 + }, + { + "epoch": 1.28, + "grad_norm": 1.7642053365707397, + "learning_rate": 9.706432160804021e-06, + "loss": 0.0646, + "step": 3425 + }, + { + "epoch": 1.29, + "grad_norm": 1.6439318656921387, + "learning_rate": 9.70391959798995e-06, + "loss": 0.0633, + "step": 3450 + }, + { + "epoch": 1.3, + "grad_norm": 1.4607093334197998, + "learning_rate": 9.70140703517588e-06, + "loss": 0.0639, + "step": 3475 + }, + { + "epoch": 1.3, + "grad_norm": 2.177962064743042, + "learning_rate": 9.698894472361809e-06, + "loss": 0.0665, + "step": 3500 + }, + { + "epoch": 1.31, + "grad_norm": 1.2632800340652466, + "learning_rate": 9.69638190954774e-06, + "loss": 0.0594, + "step": 3525 + }, + { + "epoch": 1.32, + "grad_norm": 1.6567246913909912, + "learning_rate": 9.69386934673367e-06, + "loss": 0.061, + "step": 3550 + }, + { + "epoch": 1.33, + "grad_norm": 1.509310007095337, + "learning_rate": 9.691356783919599e-06, + "loss": 0.0618, + "step": 3575 + }, + { + "epoch": 1.34, + "grad_norm": 1.7150044441223145, + "learning_rate": 9.688844221105528e-06, + "loss": 0.0623, + "step": 3600 + }, + { + "epoch": 1.35, + "grad_norm": 1.6633011102676392, + "learning_rate": 9.686331658291457e-06, + "loss": 0.061, + "step": 3625 + }, + { + "epoch": 1.36, + "grad_norm": 1.5182716846466064, + "learning_rate": 9.683819095477388e-06, + "loss": 0.0611, + "step": 3650 + }, + { + "epoch": 1.37, + "grad_norm": 1.4435360431671143, + "learning_rate": 9.681306532663318e-06, + "loss": 0.0635, + "step": 3675 + }, + { + "epoch": 1.38, + "grad_norm": 1.765788197517395, + "learning_rate": 9.678793969849247e-06, + "loss": 0.0615, + "step": 3700 + }, + { + "epoch": 1.39, + "grad_norm": 2.060460329055786, + "learning_rate": 9.676281407035176e-06, + "loss": 0.0601, + "step": 3725 + }, + { + "epoch": 1.4, + "grad_norm": 1.5626099109649658, + "learning_rate": 9.673768844221106e-06, + "loss": 0.057, + "step": 3750 + }, + { + "epoch": 1.41, + "grad_norm": 1.668258547782898, + "learning_rate": 9.671256281407035e-06, + "loss": 0.0638, + "step": 3775 + }, + { + "epoch": 1.42, + "grad_norm": 1.33029305934906, + "learning_rate": 9.668743718592966e-06, + "loss": 0.0582, + "step": 3800 + }, + { + "epoch": 1.43, + "grad_norm": 1.4946404695510864, + "learning_rate": 9.666231155778895e-06, + "loss": 0.0621, + "step": 3825 + }, + { + "epoch": 1.44, + "grad_norm": 1.615717887878418, + "learning_rate": 9.663718592964825e-06, + "loss": 0.064, + "step": 3850 + }, + { + "epoch": 1.44, + "grad_norm": 1.4921592473983765, + "learning_rate": 9.661206030150754e-06, + "loss": 0.0626, + "step": 3875 + }, + { + "epoch": 1.45, + "grad_norm": 1.504957675933838, + "learning_rate": 9.658693467336683e-06, + "loss": 0.0615, + "step": 3900 + }, + { + "epoch": 1.46, + "grad_norm": 1.9894205331802368, + "learning_rate": 9.656180904522614e-06, + "loss": 0.0624, + "step": 3925 + }, + { + "epoch": 1.47, + "grad_norm": 1.508943796157837, + "learning_rate": 9.653668341708544e-06, + "loss": 0.0554, + "step": 3950 + }, + { + "epoch": 1.48, + "grad_norm": 2.001286745071411, + "learning_rate": 9.651155778894473e-06, + "loss": 0.0614, + "step": 3975 + }, + { + "epoch": 1.49, + "grad_norm": 1.762336015701294, + "learning_rate": 9.648643216080404e-06, + "loss": 0.0591, + "step": 4000 + }, + { + "epoch": 1.49, + "eval_loss": 0.06640864163637161, + "eval_runtime": 2227.4821, + "eval_samples_per_second": 0.673, + "eval_steps_per_second": 0.673, + "eval_wer": 21.816145067562836, + "step": 4000 + }, + { + "epoch": 1.5, + "grad_norm": 1.2514408826828003, + "learning_rate": 9.646130653266332e-06, + "loss": 0.0598, + "step": 4025 + }, + { + "epoch": 1.51, + "grad_norm": 1.247432827949524, + "learning_rate": 9.643618090452263e-06, + "loss": 0.0581, + "step": 4050 + }, + { + "epoch": 1.52, + "grad_norm": 1.4645581245422363, + "learning_rate": 9.641105527638192e-06, + "loss": 0.0604, + "step": 4075 + }, + { + "epoch": 1.53, + "grad_norm": 1.4691734313964844, + "learning_rate": 9.638592964824121e-06, + "loss": 0.0632, + "step": 4100 + }, + { + "epoch": 1.54, + "grad_norm": 1.670544147491455, + "learning_rate": 9.63608040201005e-06, + "loss": 0.0589, + "step": 4125 + }, + { + "epoch": 1.55, + "grad_norm": 1.5909396409988403, + "learning_rate": 9.63356783919598e-06, + "loss": 0.0623, + "step": 4150 + }, + { + "epoch": 1.56, + "grad_norm": 1.797534465789795, + "learning_rate": 9.63105527638191e-06, + "loss": 0.061, + "step": 4175 + }, + { + "epoch": 1.57, + "grad_norm": 1.4318104982376099, + "learning_rate": 9.62854271356784e-06, + "loss": 0.0608, + "step": 4200 + }, + { + "epoch": 1.58, + "grad_norm": 1.4532891511917114, + "learning_rate": 9.62603015075377e-06, + "loss": 0.057, + "step": 4225 + }, + { + "epoch": 1.58, + "grad_norm": 1.2360248565673828, + "learning_rate": 9.623517587939699e-06, + "loss": 0.0583, + "step": 4250 + }, + { + "epoch": 1.59, + "grad_norm": 1.6231979131698608, + "learning_rate": 9.62100502512563e-06, + "loss": 0.0632, + "step": 4275 + }, + { + "epoch": 1.6, + "grad_norm": 1.7850878238677979, + "learning_rate": 9.618492462311558e-06, + "loss": 0.061, + "step": 4300 + }, + { + "epoch": 1.61, + "grad_norm": 1.3940763473510742, + "learning_rate": 9.615979899497489e-06, + "loss": 0.0623, + "step": 4325 + }, + { + "epoch": 1.62, + "grad_norm": 1.4973020553588867, + "learning_rate": 9.613467336683418e-06, + "loss": 0.0565, + "step": 4350 + }, + { + "epoch": 1.63, + "grad_norm": 1.359133005142212, + "learning_rate": 9.610954773869347e-06, + "loss": 0.0577, + "step": 4375 + }, + { + "epoch": 1.64, + "grad_norm": 1.5089136362075806, + "learning_rate": 9.608442211055277e-06, + "loss": 0.0537, + "step": 4400 + }, + { + "epoch": 1.65, + "grad_norm": 1.5037823915481567, + "learning_rate": 9.605929648241206e-06, + "loss": 0.0566, + "step": 4425 + }, + { + "epoch": 1.66, + "grad_norm": 2.4112510681152344, + "learning_rate": 9.603417085427137e-06, + "loss": 0.0596, + "step": 4450 + }, + { + "epoch": 1.67, + "grad_norm": 1.4333964586257935, + "learning_rate": 9.600904522613066e-06, + "loss": 0.0587, + "step": 4475 + }, + { + "epoch": 1.68, + "grad_norm": 1.5937707424163818, + "learning_rate": 9.598391959798996e-06, + "loss": 0.055, + "step": 4500 + }, + { + "epoch": 1.69, + "grad_norm": 1.1385657787322998, + "learning_rate": 9.595879396984925e-06, + "loss": 0.0537, + "step": 4525 + }, + { + "epoch": 1.7, + "grad_norm": 1.7023475170135498, + "learning_rate": 9.593366834170856e-06, + "loss": 0.0584, + "step": 4550 + }, + { + "epoch": 1.71, + "grad_norm": 1.4307479858398438, + "learning_rate": 9.590854271356784e-06, + "loss": 0.0559, + "step": 4575 + }, + { + "epoch": 1.72, + "grad_norm": 2.2201151847839355, + "learning_rate": 9.588341708542715e-06, + "loss": 0.0563, + "step": 4600 + }, + { + "epoch": 1.72, + "grad_norm": 1.4959324598312378, + "learning_rate": 9.585829145728644e-06, + "loss": 0.0613, + "step": 4625 + }, + { + "epoch": 1.73, + "grad_norm": 1.7607614994049072, + "learning_rate": 9.583316582914573e-06, + "loss": 0.0532, + "step": 4650 + }, + { + "epoch": 1.74, + "grad_norm": 1.5020750761032104, + "learning_rate": 9.580804020100504e-06, + "loss": 0.0558, + "step": 4675 + }, + { + "epoch": 1.75, + "grad_norm": 1.5090382099151611, + "learning_rate": 9.578291457286432e-06, + "loss": 0.0534, + "step": 4700 + }, + { + "epoch": 1.76, + "grad_norm": 1.3799227476119995, + "learning_rate": 9.575778894472363e-06, + "loss": 0.0586, + "step": 4725 + }, + { + "epoch": 1.77, + "grad_norm": 1.396539330482483, + "learning_rate": 9.573266331658292e-06, + "loss": 0.0558, + "step": 4750 + }, + { + "epoch": 1.78, + "grad_norm": 1.4134207963943481, + "learning_rate": 9.570753768844222e-06, + "loss": 0.0553, + "step": 4775 + }, + { + "epoch": 1.79, + "grad_norm": 1.6647472381591797, + "learning_rate": 9.568241206030151e-06, + "loss": 0.056, + "step": 4800 + }, + { + "epoch": 1.8, + "grad_norm": 1.938499927520752, + "learning_rate": 9.565728643216082e-06, + "loss": 0.0554, + "step": 4825 + }, + { + "epoch": 1.81, + "grad_norm": 1.520825982093811, + "learning_rate": 9.563216080402011e-06, + "loss": 0.0566, + "step": 4850 + }, + { + "epoch": 1.82, + "grad_norm": 1.4681768417358398, + "learning_rate": 9.56070351758794e-06, + "loss": 0.0619, + "step": 4875 + }, + { + "epoch": 1.83, + "grad_norm": 1.7713901996612549, + "learning_rate": 9.55819095477387e-06, + "loss": 0.0544, + "step": 4900 + }, + { + "epoch": 1.84, + "grad_norm": 1.5401997566223145, + "learning_rate": 9.5556783919598e-06, + "loss": 0.0561, + "step": 4925 + }, + { + "epoch": 1.85, + "grad_norm": 1.4646130800247192, + "learning_rate": 9.55316582914573e-06, + "loss": 0.0546, + "step": 4950 + }, + { + "epoch": 1.85, + "grad_norm": 1.4006428718566895, + "learning_rate": 9.550653266331658e-06, + "loss": 0.0529, + "step": 4975 + }, + { + "epoch": 1.86, + "grad_norm": 1.3693753480911255, + "learning_rate": 9.548140703517589e-06, + "loss": 0.0507, + "step": 5000 + }, + { + "epoch": 1.86, + "eval_loss": 0.06456360965967178, + "eval_runtime": 2490.307, + "eval_samples_per_second": 0.602, + "eval_steps_per_second": 0.602, + "eval_wer": 21.578099838969404, + "step": 5000 + }, + { + "epoch": 1.87, + "grad_norm": 1.0426770448684692, + "learning_rate": 9.545628140703518e-06, + "loss": 0.056, + "step": 5025 + }, + { + "epoch": 1.88, + "grad_norm": 1.7432862520217896, + "learning_rate": 9.543115577889448e-06, + "loss": 0.0555, + "step": 5050 + }, + { + "epoch": 1.89, + "grad_norm": 1.4107834100723267, + "learning_rate": 9.540603015075379e-06, + "loss": 0.0563, + "step": 5075 + }, + { + "epoch": 1.9, + "grad_norm": 1.6217355728149414, + "learning_rate": 9.538090452261308e-06, + "loss": 0.0573, + "step": 5100 + }, + { + "epoch": 1.91, + "grad_norm": 1.5388691425323486, + "learning_rate": 9.535577889447237e-06, + "loss": 0.0547, + "step": 5125 + }, + { + "epoch": 1.92, + "grad_norm": 1.6691325902938843, + "learning_rate": 9.533065326633166e-06, + "loss": 0.0544, + "step": 5150 + }, + { + "epoch": 1.93, + "grad_norm": 1.5918898582458496, + "learning_rate": 9.530552763819096e-06, + "loss": 0.0555, + "step": 5175 + }, + { + "epoch": 1.94, + "grad_norm": 1.2522741556167603, + "learning_rate": 9.528040201005025e-06, + "loss": 0.0571, + "step": 5200 + }, + { + "epoch": 1.95, + "grad_norm": 1.325069546699524, + "learning_rate": 9.525527638190956e-06, + "loss": 0.0495, + "step": 5225 + }, + { + "epoch": 1.96, + "grad_norm": 1.659081220626831, + "learning_rate": 9.523015075376885e-06, + "loss": 0.0532, + "step": 5250 + }, + { + "epoch": 1.97, + "grad_norm": 1.368884563446045, + "learning_rate": 9.520502512562815e-06, + "loss": 0.0539, + "step": 5275 + }, + { + "epoch": 1.98, + "grad_norm": 1.2012147903442383, + "learning_rate": 9.517989949748744e-06, + "loss": 0.0526, + "step": 5300 + }, + { + "epoch": 1.99, + "grad_norm": 1.8066338300704956, + "learning_rate": 9.515477386934673e-06, + "loss": 0.0515, + "step": 5325 + }, + { + "epoch": 1.99, + "grad_norm": 1.5293304920196533, + "learning_rate": 9.512964824120604e-06, + "loss": 0.0519, + "step": 5350 + }, + { + "epoch": 2.0, + "grad_norm": 1.6484113931655884, + "learning_rate": 9.510452261306534e-06, + "loss": 0.0457, + "step": 5375 + }, + { + "epoch": 2.01, + "grad_norm": 1.034690022468567, + "learning_rate": 9.507939698492463e-06, + "loss": 0.0379, + "step": 5400 + }, + { + "epoch": 2.02, + "grad_norm": 1.2947874069213867, + "learning_rate": 9.505427135678392e-06, + "loss": 0.0358, + "step": 5425 + }, + { + "epoch": 2.03, + "grad_norm": 1.1398056745529175, + "learning_rate": 9.502914572864322e-06, + "loss": 0.0352, + "step": 5450 + }, + { + "epoch": 2.04, + "grad_norm": 1.2401403188705444, + "learning_rate": 9.500402010050253e-06, + "loss": 0.0381, + "step": 5475 + }, + { + "epoch": 2.05, + "grad_norm": 1.0696524381637573, + "learning_rate": 9.497889447236182e-06, + "loss": 0.0357, + "step": 5500 + }, + { + "epoch": 2.06, + "grad_norm": 1.1557295322418213, + "learning_rate": 9.495376884422111e-06, + "loss": 0.0395, + "step": 5525 + }, + { + "epoch": 2.07, + "grad_norm": 1.0979804992675781, + "learning_rate": 9.49286432160804e-06, + "loss": 0.0357, + "step": 5550 + }, + { + "epoch": 2.08, + "grad_norm": 1.4647314548492432, + "learning_rate": 9.49035175879397e-06, + "loss": 0.0349, + "step": 5575 + }, + { + "epoch": 2.09, + "grad_norm": 1.3784420490264893, + "learning_rate": 9.4878391959799e-06, + "loss": 0.0359, + "step": 5600 + }, + { + "epoch": 2.1, + "grad_norm": 1.1351981163024902, + "learning_rate": 9.48532663316583e-06, + "loss": 0.0371, + "step": 5625 + }, + { + "epoch": 2.11, + "grad_norm": 1.3296585083007812, + "learning_rate": 9.48281407035176e-06, + "loss": 0.0416, + "step": 5650 + }, + { + "epoch": 2.12, + "grad_norm": 1.1919134855270386, + "learning_rate": 9.480301507537689e-06, + "loss": 0.0355, + "step": 5675 + }, + { + "epoch": 2.13, + "grad_norm": 1.286449909210205, + "learning_rate": 9.47778894472362e-06, + "loss": 0.036, + "step": 5700 + }, + { + "epoch": 2.13, + "grad_norm": 1.2854489088058472, + "learning_rate": 9.475276381909548e-06, + "loss": 0.0347, + "step": 5725 + }, + { + "epoch": 2.14, + "grad_norm": 1.3352183103561401, + "learning_rate": 9.472763819095479e-06, + "loss": 0.0355, + "step": 5750 + }, + { + "epoch": 2.15, + "grad_norm": 1.5323823690414429, + "learning_rate": 9.470251256281408e-06, + "loss": 0.0351, + "step": 5775 + }, + { + "epoch": 2.16, + "grad_norm": 1.5249899625778198, + "learning_rate": 9.467738693467337e-06, + "loss": 0.0363, + "step": 5800 + }, + { + "epoch": 2.17, + "grad_norm": 1.213335633277893, + "learning_rate": 9.465226130653267e-06, + "loss": 0.0372, + "step": 5825 + }, + { + "epoch": 2.18, + "grad_norm": 1.2419792413711548, + "learning_rate": 9.462713567839196e-06, + "loss": 0.0346, + "step": 5850 + }, + { + "epoch": 2.19, + "grad_norm": 1.1097544431686401, + "learning_rate": 9.460201005025127e-06, + "loss": 0.0372, + "step": 5875 + }, + { + "epoch": 2.2, + "grad_norm": 0.9694799184799194, + "learning_rate": 9.457688442211056e-06, + "loss": 0.0353, + "step": 5900 + }, + { + "epoch": 2.21, + "grad_norm": 1.139106273651123, + "learning_rate": 9.455175879396986e-06, + "loss": 0.0394, + "step": 5925 + }, + { + "epoch": 2.22, + "grad_norm": 1.4645527601242065, + "learning_rate": 9.452663316582915e-06, + "loss": 0.0369, + "step": 5950 + }, + { + "epoch": 2.23, + "grad_norm": 1.127549648284912, + "learning_rate": 9.450150753768846e-06, + "loss": 0.0334, + "step": 5975 + }, + { + "epoch": 2.24, + "grad_norm": 1.4078933000564575, + "learning_rate": 9.447638190954774e-06, + "loss": 0.0366, + "step": 6000 + }, + { + "epoch": 2.24, + "eval_loss": 0.06788910180330276, + "eval_runtime": 2273.3683, + "eval_samples_per_second": 0.66, + "eval_steps_per_second": 0.66, + "eval_wer": 21.56409717846391, + "step": 6000 + }, + { + "epoch": 2.25, + "grad_norm": 1.0318453311920166, + "learning_rate": 9.445125628140705e-06, + "loss": 0.0348, + "step": 6025 + }, + { + "epoch": 2.26, + "grad_norm": 1.2691484689712524, + "learning_rate": 9.442613065326634e-06, + "loss": 0.0351, + "step": 6050 + }, + { + "epoch": 2.27, + "grad_norm": 1.408375859260559, + "learning_rate": 9.440100502512563e-06, + "loss": 0.0368, + "step": 6075 + }, + { + "epoch": 2.27, + "grad_norm": 1.3626328706741333, + "learning_rate": 9.437587939698494e-06, + "loss": 0.0342, + "step": 6100 + }, + { + "epoch": 2.28, + "grad_norm": 1.1270478963851929, + "learning_rate": 9.435075376884422e-06, + "loss": 0.0351, + "step": 6125 + }, + { + "epoch": 2.29, + "grad_norm": 1.1099445819854736, + "learning_rate": 9.432562814070353e-06, + "loss": 0.0352, + "step": 6150 + }, + { + "epoch": 2.3, + "grad_norm": 1.3389893770217896, + "learning_rate": 9.430050251256282e-06, + "loss": 0.0336, + "step": 6175 + }, + { + "epoch": 2.31, + "grad_norm": 1.3984408378601074, + "learning_rate": 9.427537688442212e-06, + "loss": 0.0348, + "step": 6200 + }, + { + "epoch": 2.32, + "grad_norm": 1.1349256038665771, + "learning_rate": 9.425025125628141e-06, + "loss": 0.0342, + "step": 6225 + }, + { + "epoch": 2.33, + "grad_norm": 1.330540657043457, + "learning_rate": 9.422512562814072e-06, + "loss": 0.037, + "step": 6250 + }, + { + "epoch": 2.34, + "grad_norm": 1.164422631263733, + "learning_rate": 9.42e-06, + "loss": 0.0346, + "step": 6275 + }, + { + "epoch": 2.35, + "grad_norm": 1.359889030456543, + "learning_rate": 9.41748743718593e-06, + "loss": 0.0362, + "step": 6300 + }, + { + "epoch": 2.36, + "grad_norm": 1.5384395122528076, + "learning_rate": 9.41497487437186e-06, + "loss": 0.0372, + "step": 6325 + }, + { + "epoch": 2.37, + "grad_norm": 1.5105488300323486, + "learning_rate": 9.41246231155779e-06, + "loss": 0.0355, + "step": 6350 + }, + { + "epoch": 2.38, + "grad_norm": 1.0129766464233398, + "learning_rate": 9.40994974874372e-06, + "loss": 0.0328, + "step": 6375 + }, + { + "epoch": 2.39, + "grad_norm": 1.4524303674697876, + "learning_rate": 9.407437185929648e-06, + "loss": 0.0336, + "step": 6400 + }, + { + "epoch": 2.4, + "grad_norm": 1.5792168378829956, + "learning_rate": 9.404924623115579e-06, + "loss": 0.0343, + "step": 6425 + }, + { + "epoch": 2.4, + "grad_norm": 1.205230712890625, + "learning_rate": 9.402412060301508e-06, + "loss": 0.0351, + "step": 6450 + }, + { + "epoch": 2.41, + "grad_norm": 1.2349803447723389, + "learning_rate": 9.399899497487438e-06, + "loss": 0.035, + "step": 6475 + }, + { + "epoch": 2.42, + "grad_norm": 1.246424913406372, + "learning_rate": 9.397386934673369e-06, + "loss": 0.0362, + "step": 6500 + }, + { + "epoch": 2.43, + "grad_norm": 1.2710341215133667, + "learning_rate": 9.394874371859298e-06, + "loss": 0.0359, + "step": 6525 + }, + { + "epoch": 2.44, + "grad_norm": 1.4082512855529785, + "learning_rate": 9.392361809045227e-06, + "loss": 0.0346, + "step": 6550 + }, + { + "epoch": 2.45, + "grad_norm": 1.184995412826538, + "learning_rate": 9.389849246231157e-06, + "loss": 0.0336, + "step": 6575 + }, + { + "epoch": 2.46, + "grad_norm": 1.1788533926010132, + "learning_rate": 9.387336683417086e-06, + "loss": 0.0349, + "step": 6600 + }, + { + "epoch": 2.47, + "grad_norm": 1.1376091241836548, + "learning_rate": 9.384824120603015e-06, + "loss": 0.0359, + "step": 6625 + }, + { + "epoch": 2.48, + "grad_norm": 1.0417834520339966, + "learning_rate": 9.382311557788946e-06, + "loss": 0.0332, + "step": 6650 + }, + { + "epoch": 2.49, + "grad_norm": 0.9300472140312195, + "learning_rate": 9.379798994974874e-06, + "loss": 0.034, + "step": 6675 + }, + { + "epoch": 2.5, + "grad_norm": 1.0279123783111572, + "learning_rate": 9.377286432160805e-06, + "loss": 0.0334, + "step": 6700 + }, + { + "epoch": 2.51, + "grad_norm": 1.1813184022903442, + "learning_rate": 9.374773869346734e-06, + "loss": 0.0344, + "step": 6725 + }, + { + "epoch": 2.52, + "grad_norm": 1.0494869947433472, + "learning_rate": 9.372261306532664e-06, + "loss": 0.0319, + "step": 6750 + }, + { + "epoch": 2.53, + "grad_norm": 1.355104923248291, + "learning_rate": 9.369748743718595e-06, + "loss": 0.0359, + "step": 6775 + }, + { + "epoch": 2.54, + "grad_norm": 1.0900795459747314, + "learning_rate": 9.367236180904524e-06, + "loss": 0.0323, + "step": 6800 + }, + { + "epoch": 2.54, + "grad_norm": 1.3175445795059204, + "learning_rate": 9.364723618090453e-06, + "loss": 0.0342, + "step": 6825 + }, + { + "epoch": 2.55, + "grad_norm": 1.5386576652526855, + "learning_rate": 9.362211055276383e-06, + "loss": 0.0333, + "step": 6850 + }, + { + "epoch": 2.56, + "grad_norm": 1.603754997253418, + "learning_rate": 9.359698492462312e-06, + "loss": 0.0348, + "step": 6875 + }, + { + "epoch": 2.57, + "grad_norm": 1.3384333848953247, + "learning_rate": 9.357185929648241e-06, + "loss": 0.0359, + "step": 6900 + }, + { + "epoch": 2.58, + "grad_norm": 1.3015717267990112, + "learning_rate": 9.354673366834172e-06, + "loss": 0.0348, + "step": 6925 + }, + { + "epoch": 2.59, + "grad_norm": 1.3948748111724854, + "learning_rate": 9.352160804020101e-06, + "loss": 0.036, + "step": 6950 + }, + { + "epoch": 2.6, + "grad_norm": 1.1874706745147705, + "learning_rate": 9.34964824120603e-06, + "loss": 0.0329, + "step": 6975 + }, + { + "epoch": 2.61, + "grad_norm": 1.3981599807739258, + "learning_rate": 9.34713567839196e-06, + "loss": 0.0338, + "step": 7000 + }, + { + "epoch": 2.61, + "eval_loss": 0.06913278251886368, + "eval_runtime": 2296.4069, + "eval_samples_per_second": 0.653, + "eval_steps_per_second": 0.653, + "eval_wer": 20.737940208639642, + "step": 7000 + }, + { + "epoch": 2.62, + "grad_norm": 1.461983561515808, + "learning_rate": 9.34462311557789e-06, + "loss": 0.0358, + "step": 7025 + }, + { + "epoch": 2.63, + "grad_norm": 1.5573300123214722, + "learning_rate": 9.34211055276382e-06, + "loss": 0.0328, + "step": 7050 + }, + { + "epoch": 2.64, + "grad_norm": 1.4316810369491577, + "learning_rate": 9.33959798994975e-06, + "loss": 0.036, + "step": 7075 + }, + { + "epoch": 2.65, + "grad_norm": 1.2934224605560303, + "learning_rate": 9.337085427135679e-06, + "loss": 0.0329, + "step": 7100 + }, + { + "epoch": 2.66, + "grad_norm": 1.132033109664917, + "learning_rate": 9.334572864321608e-06, + "loss": 0.0309, + "step": 7125 + }, + { + "epoch": 2.67, + "grad_norm": 1.1742990016937256, + "learning_rate": 9.332060301507538e-06, + "loss": 0.0343, + "step": 7150 + }, + { + "epoch": 2.68, + "grad_norm": 1.2794122695922852, + "learning_rate": 9.329547738693469e-06, + "loss": 0.0328, + "step": 7175 + }, + { + "epoch": 2.68, + "grad_norm": 1.195777177810669, + "learning_rate": 9.327035175879398e-06, + "loss": 0.0352, + "step": 7200 + }, + { + "epoch": 2.69, + "grad_norm": 1.2220577001571655, + "learning_rate": 9.324522613065327e-06, + "loss": 0.0332, + "step": 7225 + }, + { + "epoch": 2.7, + "grad_norm": 0.9668141603469849, + "learning_rate": 9.322010050251257e-06, + "loss": 0.0319, + "step": 7250 + }, + { + "epoch": 2.71, + "grad_norm": 1.3455289602279663, + "learning_rate": 9.319497487437186e-06, + "loss": 0.034, + "step": 7275 + }, + { + "epoch": 2.72, + "grad_norm": 1.315292239189148, + "learning_rate": 9.316984924623115e-06, + "loss": 0.0332, + "step": 7300 + }, + { + "epoch": 2.73, + "grad_norm": 1.3745348453521729, + "learning_rate": 9.314472361809046e-06, + "loss": 0.034, + "step": 7325 + }, + { + "epoch": 2.74, + "grad_norm": 1.467222809791565, + "learning_rate": 9.311959798994976e-06, + "loss": 0.0315, + "step": 7350 + }, + { + "epoch": 2.75, + "grad_norm": 1.1673460006713867, + "learning_rate": 9.309447236180905e-06, + "loss": 0.0324, + "step": 7375 + }, + { + "epoch": 2.76, + "grad_norm": 1.4068024158477783, + "learning_rate": 9.306934673366836e-06, + "loss": 0.0327, + "step": 7400 + }, + { + "epoch": 2.77, + "grad_norm": 1.301135778427124, + "learning_rate": 9.304422110552764e-06, + "loss": 0.0299, + "step": 7425 + }, + { + "epoch": 2.78, + "grad_norm": 1.3898648023605347, + "learning_rate": 9.301909547738695e-06, + "loss": 0.0332, + "step": 7450 + }, + { + "epoch": 2.79, + "grad_norm": 1.4496641159057617, + "learning_rate": 9.299396984924624e-06, + "loss": 0.0342, + "step": 7475 + }, + { + "epoch": 2.8, + "grad_norm": 1.2946183681488037, + "learning_rate": 9.296884422110553e-06, + "loss": 0.0345, + "step": 7500 + }, + { + "epoch": 2.81, + "grad_norm": 1.3121036291122437, + "learning_rate": 9.294371859296483e-06, + "loss": 0.0323, + "step": 7525 + }, + { + "epoch": 2.82, + "grad_norm": 1.4947439432144165, + "learning_rate": 9.291859296482412e-06, + "loss": 0.0357, + "step": 7550 + }, + { + "epoch": 2.82, + "grad_norm": 1.4542853832244873, + "learning_rate": 9.289346733668343e-06, + "loss": 0.0306, + "step": 7575 + }, + { + "epoch": 2.83, + "grad_norm": 1.4533597230911255, + "learning_rate": 9.286834170854272e-06, + "loss": 0.0337, + "step": 7600 + }, + { + "epoch": 2.84, + "grad_norm": 1.1914777755737305, + "learning_rate": 9.284321608040202e-06, + "loss": 0.0334, + "step": 7625 + }, + { + "epoch": 2.85, + "grad_norm": 1.0799322128295898, + "learning_rate": 9.281809045226131e-06, + "loss": 0.0336, + "step": 7650 + }, + { + "epoch": 2.86, + "grad_norm": 1.2178218364715576, + "learning_rate": 9.279296482412062e-06, + "loss": 0.0332, + "step": 7675 + }, + { + "epoch": 2.87, + "grad_norm": 1.2499667406082153, + "learning_rate": 9.27678391959799e-06, + "loss": 0.0308, + "step": 7700 + }, + { + "epoch": 2.88, + "grad_norm": 1.1616662740707397, + "learning_rate": 9.27427135678392e-06, + "loss": 0.0319, + "step": 7725 + }, + { + "epoch": 2.89, + "grad_norm": 1.478765606880188, + "learning_rate": 9.27175879396985e-06, + "loss": 0.031, + "step": 7750 + }, + { + "epoch": 2.9, + "grad_norm": 1.2567527294158936, + "learning_rate": 9.26924623115578e-06, + "loss": 0.0336, + "step": 7775 + }, + { + "epoch": 2.91, + "grad_norm": 1.185059666633606, + "learning_rate": 9.26673366834171e-06, + "loss": 0.0321, + "step": 7800 + }, + { + "epoch": 2.92, + "grad_norm": 1.3632742166519165, + "learning_rate": 9.264221105527638e-06, + "loss": 0.0313, + "step": 7825 + }, + { + "epoch": 2.93, + "grad_norm": 1.5476229190826416, + "learning_rate": 9.261708542713569e-06, + "loss": 0.0324, + "step": 7850 + }, + { + "epoch": 2.94, + "grad_norm": 1.2684345245361328, + "learning_rate": 9.259195979899498e-06, + "loss": 0.0324, + "step": 7875 + }, + { + "epoch": 2.95, + "grad_norm": 0.9792506098747253, + "learning_rate": 9.256683417085428e-06, + "loss": 0.0324, + "step": 7900 + }, + { + "epoch": 2.95, + "grad_norm": 1.3645673990249634, + "learning_rate": 9.254170854271357e-06, + "loss": 0.0303, + "step": 7925 + }, + { + "epoch": 2.96, + "grad_norm": 1.2746405601501465, + "learning_rate": 9.251658291457288e-06, + "loss": 0.0322, + "step": 7950 + }, + { + "epoch": 2.97, + "grad_norm": 1.353850245475769, + "learning_rate": 9.249145728643217e-06, + "loss": 0.0324, + "step": 7975 + }, + { + "epoch": 2.98, + "grad_norm": 1.4102460145950317, + "learning_rate": 9.246633165829147e-06, + "loss": 0.0316, + "step": 8000 + }, + { + "epoch": 2.98, + "eval_loss": 0.06896308809518814, + "eval_runtime": 2265.0339, + "eval_samples_per_second": 0.662, + "eval_steps_per_second": 0.662, + "eval_wer": 21.291045298606733, + "step": 8000 + }, + { + "epoch": 2.99, + "grad_norm": 1.450993299484253, + "learning_rate": 9.244120603015076e-06, + "loss": 0.0316, + "step": 8025 + }, + { + "epoch": 3.0, + "grad_norm": 0.8826387524604797, + "learning_rate": 9.241608040201005e-06, + "loss": 0.0305, + "step": 8050 + }, + { + "epoch": 3.01, + "grad_norm": 0.6101108193397522, + "learning_rate": 9.239095477386936e-06, + "loss": 0.0191, + "step": 8075 + }, + { + "epoch": 3.02, + "grad_norm": 0.8705685138702393, + "learning_rate": 9.236582914572864e-06, + "loss": 0.0178, + "step": 8100 + }, + { + "epoch": 3.03, + "grad_norm": 0.9695071578025818, + "learning_rate": 9.234070351758795e-06, + "loss": 0.018, + "step": 8125 + }, + { + "epoch": 3.04, + "grad_norm": 1.1801904439926147, + "learning_rate": 9.231557788944724e-06, + "loss": 0.0195, + "step": 8150 + }, + { + "epoch": 3.05, + "grad_norm": 0.9887425899505615, + "learning_rate": 9.229045226130654e-06, + "loss": 0.0198, + "step": 8175 + }, + { + "epoch": 3.06, + "grad_norm": 0.8782283663749695, + "learning_rate": 9.226532663316585e-06, + "loss": 0.0179, + "step": 8200 + }, + { + "epoch": 3.07, + "grad_norm": 1.4336392879486084, + "learning_rate": 9.224020100502514e-06, + "loss": 0.0184, + "step": 8225 + }, + { + "epoch": 3.08, + "grad_norm": 1.047086477279663, + "learning_rate": 9.221507537688443e-06, + "loss": 0.0177, + "step": 8250 + }, + { + "epoch": 3.09, + "grad_norm": 1.2881505489349365, + "learning_rate": 9.218994974874373e-06, + "loss": 0.0184, + "step": 8275 + }, + { + "epoch": 3.09, + "grad_norm": 1.1941720247268677, + "learning_rate": 9.216482412060302e-06, + "loss": 0.0204, + "step": 8300 + }, + { + "epoch": 3.1, + "grad_norm": 1.1765064001083374, + "learning_rate": 9.213969849246231e-06, + "loss": 0.0198, + "step": 8325 + }, + { + "epoch": 3.11, + "grad_norm": 1.0088229179382324, + "learning_rate": 9.211457286432162e-06, + "loss": 0.0185, + "step": 8350 + }, + { + "epoch": 3.12, + "grad_norm": 1.0748648643493652, + "learning_rate": 9.20894472361809e-06, + "loss": 0.018, + "step": 8375 + }, + { + "epoch": 3.13, + "grad_norm": 0.9797239899635315, + "learning_rate": 9.206432160804021e-06, + "loss": 0.0179, + "step": 8400 + }, + { + "epoch": 3.14, + "grad_norm": 1.220316767692566, + "learning_rate": 9.20391959798995e-06, + "loss": 0.0183, + "step": 8425 + }, + { + "epoch": 3.15, + "grad_norm": 1.1836146116256714, + "learning_rate": 9.20140703517588e-06, + "loss": 0.0183, + "step": 8450 + }, + { + "epoch": 3.16, + "grad_norm": 1.185672640800476, + "learning_rate": 9.19889447236181e-06, + "loss": 0.0183, + "step": 8475 + }, + { + "epoch": 3.17, + "grad_norm": 1.2399791479110718, + "learning_rate": 9.19638190954774e-06, + "loss": 0.0191, + "step": 8500 + }, + { + "epoch": 3.18, + "grad_norm": 1.1062277555465698, + "learning_rate": 9.19386934673367e-06, + "loss": 0.0188, + "step": 8525 + }, + { + "epoch": 3.19, + "grad_norm": 1.5900242328643799, + "learning_rate": 9.191356783919599e-06, + "loss": 0.021, + "step": 8550 + }, + { + "epoch": 3.2, + "grad_norm": 1.0431650876998901, + "learning_rate": 9.188844221105528e-06, + "loss": 0.0179, + "step": 8575 + }, + { + "epoch": 3.21, + "grad_norm": 1.3932257890701294, + "learning_rate": 9.186331658291459e-06, + "loss": 0.018, + "step": 8600 + }, + { + "epoch": 3.22, + "grad_norm": 1.3570228815078735, + "learning_rate": 9.183819095477388e-06, + "loss": 0.0218, + "step": 8625 + }, + { + "epoch": 3.23, + "grad_norm": 1.2214909791946411, + "learning_rate": 9.181306532663317e-06, + "loss": 0.0182, + "step": 8650 + }, + { + "epoch": 3.23, + "grad_norm": 0.9871219396591187, + "learning_rate": 9.178793969849247e-06, + "loss": 0.019, + "step": 8675 + }, + { + "epoch": 3.24, + "grad_norm": 1.422410488128662, + "learning_rate": 9.176281407035176e-06, + "loss": 0.019, + "step": 8700 + }, + { + "epoch": 3.25, + "grad_norm": 1.237478494644165, + "learning_rate": 9.173768844221105e-06, + "loss": 0.0211, + "step": 8725 + }, + { + "epoch": 3.26, + "grad_norm": 1.23516047000885, + "learning_rate": 9.171256281407036e-06, + "loss": 0.0177, + "step": 8750 + }, + { + "epoch": 3.27, + "grad_norm": 1.1393786668777466, + "learning_rate": 9.168743718592966e-06, + "loss": 0.0181, + "step": 8775 + }, + { + "epoch": 3.28, + "grad_norm": 1.2143114805221558, + "learning_rate": 9.166231155778895e-06, + "loss": 0.0203, + "step": 8800 + }, + { + "epoch": 3.29, + "grad_norm": 1.3092873096466064, + "learning_rate": 9.163718592964826e-06, + "loss": 0.0201, + "step": 8825 + }, + { + "epoch": 3.3, + "grad_norm": 1.2897233963012695, + "learning_rate": 9.161206030150754e-06, + "loss": 0.0204, + "step": 8850 + }, + { + "epoch": 3.31, + "grad_norm": 0.8430890440940857, + "learning_rate": 9.158693467336685e-06, + "loss": 0.0192, + "step": 8875 + }, + { + "epoch": 3.32, + "grad_norm": 1.2527220249176025, + "learning_rate": 9.156180904522614e-06, + "loss": 0.0189, + "step": 8900 + }, + { + "epoch": 3.33, + "grad_norm": 1.0102312564849854, + "learning_rate": 9.153668341708543e-06, + "loss": 0.0192, + "step": 8925 + }, + { + "epoch": 3.34, + "grad_norm": 1.4019834995269775, + "learning_rate": 9.151155778894473e-06, + "loss": 0.0188, + "step": 8950 + }, + { + "epoch": 3.35, + "grad_norm": 1.1616439819335938, + "learning_rate": 9.148643216080402e-06, + "loss": 0.019, + "step": 8975 + }, + { + "epoch": 3.36, + "grad_norm": 1.6693623065948486, + "learning_rate": 9.146130653266331e-06, + "loss": 0.0216, + "step": 9000 + }, + { + "epoch": 3.36, + "eval_loss": 0.07881014049053192, + "eval_runtime": 2249.2596, + "eval_samples_per_second": 0.667, + "eval_steps_per_second": 0.667, + "eval_wer": 22.306238185255197, + "step": 9000 + }, + { + "epoch": 3.37, + "grad_norm": 0.902431845664978, + "learning_rate": 9.143618090452262e-06, + "loss": 0.0169, + "step": 9025 + }, + { + "epoch": 3.37, + "grad_norm": 1.0685014724731445, + "learning_rate": 9.141105527638192e-06, + "loss": 0.0199, + "step": 9050 + }, + { + "epoch": 3.38, + "grad_norm": 1.0157417058944702, + "learning_rate": 9.138592964824121e-06, + "loss": 0.0194, + "step": 9075 + }, + { + "epoch": 3.39, + "grad_norm": 1.1761798858642578, + "learning_rate": 9.136080402010052e-06, + "loss": 0.02, + "step": 9100 + }, + { + "epoch": 3.4, + "grad_norm": 1.233697533607483, + "learning_rate": 9.13356783919598e-06, + "loss": 0.0195, + "step": 9125 + }, + { + "epoch": 3.41, + "grad_norm": 0.879963219165802, + "learning_rate": 9.13105527638191e-06, + "loss": 0.0177, + "step": 9150 + }, + { + "epoch": 3.42, + "grad_norm": 1.1067571640014648, + "learning_rate": 9.12854271356784e-06, + "loss": 0.018, + "step": 9175 + }, + { + "epoch": 3.43, + "grad_norm": 1.1815648078918457, + "learning_rate": 9.12603015075377e-06, + "loss": 0.0198, + "step": 9200 + }, + { + "epoch": 3.44, + "grad_norm": 1.2922234535217285, + "learning_rate": 9.1235175879397e-06, + "loss": 0.0201, + "step": 9225 + }, + { + "epoch": 3.45, + "grad_norm": 1.1947216987609863, + "learning_rate": 9.121005025125628e-06, + "loss": 0.0186, + "step": 9250 + }, + { + "epoch": 3.46, + "grad_norm": 1.0907317399978638, + "learning_rate": 9.118492462311559e-06, + "loss": 0.0179, + "step": 9275 + }, + { + "epoch": 3.47, + "grad_norm": 1.428252935409546, + "learning_rate": 9.115979899497488e-06, + "loss": 0.0208, + "step": 9300 + }, + { + "epoch": 3.48, + "grad_norm": 1.289925217628479, + "learning_rate": 9.113467336683418e-06, + "loss": 0.0208, + "step": 9325 + }, + { + "epoch": 3.49, + "grad_norm": 1.2157981395721436, + "learning_rate": 9.110954773869347e-06, + "loss": 0.0195, + "step": 9350 + }, + { + "epoch": 3.5, + "grad_norm": 1.0517023801803589, + "learning_rate": 9.108442211055278e-06, + "loss": 0.0201, + "step": 9375 + }, + { + "epoch": 3.5, + "grad_norm": 1.2014561891555786, + "learning_rate": 9.105929648241206e-06, + "loss": 0.0195, + "step": 9400 + }, + { + "epoch": 3.51, + "grad_norm": 1.2882165908813477, + "learning_rate": 9.103417085427137e-06, + "loss": 0.0193, + "step": 9425 + }, + { + "epoch": 3.52, + "grad_norm": 1.2931851148605347, + "learning_rate": 9.100904522613066e-06, + "loss": 0.0183, + "step": 9450 + }, + { + "epoch": 3.53, + "grad_norm": 0.9528850317001343, + "learning_rate": 9.098391959798995e-06, + "loss": 0.0185, + "step": 9475 + }, + { + "epoch": 3.54, + "grad_norm": 0.9467001557350159, + "learning_rate": 9.095879396984926e-06, + "loss": 0.0193, + "step": 9500 + }, + { + "epoch": 3.55, + "grad_norm": 1.000643014907837, + "learning_rate": 9.093366834170854e-06, + "loss": 0.019, + "step": 9525 + }, + { + "epoch": 3.56, + "grad_norm": 1.1276652812957764, + "learning_rate": 9.090854271356785e-06, + "loss": 0.0179, + "step": 9550 + }, + { + "epoch": 3.57, + "grad_norm": 1.1449360847473145, + "learning_rate": 9.088341708542714e-06, + "loss": 0.0196, + "step": 9575 + }, + { + "epoch": 3.58, + "grad_norm": 0.9223894476890564, + "learning_rate": 9.085829145728644e-06, + "loss": 0.0192, + "step": 9600 + }, + { + "epoch": 3.59, + "grad_norm": 1.1184065341949463, + "learning_rate": 9.083316582914573e-06, + "loss": 0.0184, + "step": 9625 + }, + { + "epoch": 3.6, + "grad_norm": 1.1901638507843018, + "learning_rate": 9.080804020100504e-06, + "loss": 0.0181, + "step": 9650 + }, + { + "epoch": 3.61, + "grad_norm": 1.408731460571289, + "learning_rate": 9.078291457286433e-06, + "loss": 0.0172, + "step": 9675 + }, + { + "epoch": 3.62, + "grad_norm": 1.0347180366516113, + "learning_rate": 9.075778894472363e-06, + "loss": 0.0198, + "step": 9700 + }, + { + "epoch": 3.63, + "grad_norm": 0.9249098300933838, + "learning_rate": 9.073266331658292e-06, + "loss": 0.0177, + "step": 9725 + }, + { + "epoch": 3.64, + "grad_norm": 1.2446262836456299, + "learning_rate": 9.070753768844221e-06, + "loss": 0.0186, + "step": 9750 + }, + { + "epoch": 3.64, + "grad_norm": 0.9974263906478882, + "learning_rate": 9.068241206030152e-06, + "loss": 0.019, + "step": 9775 + }, + { + "epoch": 3.65, + "grad_norm": 1.3222471475601196, + "learning_rate": 9.06572864321608e-06, + "loss": 0.0187, + "step": 9800 + }, + { + "epoch": 3.66, + "grad_norm": 1.1556633710861206, + "learning_rate": 9.063216080402011e-06, + "loss": 0.0193, + "step": 9825 + }, + { + "epoch": 3.67, + "grad_norm": 1.5632894039154053, + "learning_rate": 9.06070351758794e-06, + "loss": 0.0194, + "step": 9850 + }, + { + "epoch": 3.68, + "grad_norm": 1.054802417755127, + "learning_rate": 9.05819095477387e-06, + "loss": 0.0171, + "step": 9875 + }, + { + "epoch": 3.69, + "grad_norm": 1.2855322360992432, + "learning_rate": 9.0556783919598e-06, + "loss": 0.0201, + "step": 9900 + }, + { + "epoch": 3.7, + "grad_norm": 1.2807906866073608, + "learning_rate": 9.05316582914573e-06, + "loss": 0.0174, + "step": 9925 + }, + { + "epoch": 3.71, + "grad_norm": 1.4979244470596313, + "learning_rate": 9.05065326633166e-06, + "loss": 0.0175, + "step": 9950 + }, + { + "epoch": 3.72, + "grad_norm": 1.2225204706192017, + "learning_rate": 9.048140703517589e-06, + "loss": 0.0179, + "step": 9975 + }, + { + "epoch": 3.73, + "grad_norm": 1.0470653772354126, + "learning_rate": 9.045628140703518e-06, + "loss": 0.0189, + "step": 10000 + }, + { + "epoch": 3.73, + "eval_loss": 0.07924513518810272, + "eval_runtime": 2242.9353, + "eval_samples_per_second": 0.669, + "eval_steps_per_second": 0.669, + "eval_wer": 22.453266120562905, + "step": 10000 + }, + { + "epoch": 3.74, + "grad_norm": 1.3967472314834595, + "learning_rate": 9.043115577889447e-06, + "loss": 0.0182, + "step": 10025 + }, + { + "epoch": 3.75, + "grad_norm": 0.995073139667511, + "learning_rate": 9.040603015075378e-06, + "loss": 0.019, + "step": 10050 + }, + { + "epoch": 3.76, + "grad_norm": 1.1261428594589233, + "learning_rate": 9.038090452261308e-06, + "loss": 0.0194, + "step": 10075 + }, + { + "epoch": 3.77, + "grad_norm": 1.2833434343338013, + "learning_rate": 9.035577889447237e-06, + "loss": 0.0183, + "step": 10100 + }, + { + "epoch": 3.78, + "grad_norm": 1.4117071628570557, + "learning_rate": 9.033065326633166e-06, + "loss": 0.0186, + "step": 10125 + }, + { + "epoch": 3.78, + "grad_norm": 0.9993538856506348, + "learning_rate": 9.030552763819096e-06, + "loss": 0.0186, + "step": 10150 + }, + { + "epoch": 3.79, + "grad_norm": 1.641935110092163, + "learning_rate": 9.028040201005027e-06, + "loss": 0.018, + "step": 10175 + }, + { + "epoch": 3.8, + "grad_norm": 1.118219256401062, + "learning_rate": 9.025527638190956e-06, + "loss": 0.0168, + "step": 10200 + }, + { + "epoch": 3.81, + "grad_norm": 1.3346420526504517, + "learning_rate": 9.023015075376885e-06, + "loss": 0.0176, + "step": 10225 + }, + { + "epoch": 3.82, + "grad_norm": 1.3512012958526611, + "learning_rate": 9.020502512562815e-06, + "loss": 0.0169, + "step": 10250 + }, + { + "epoch": 3.83, + "grad_norm": 1.0588525533676147, + "learning_rate": 9.017989949748744e-06, + "loss": 0.0181, + "step": 10275 + }, + { + "epoch": 3.84, + "grad_norm": 1.0156222581863403, + "learning_rate": 9.015477386934675e-06, + "loss": 0.0184, + "step": 10300 + }, + { + "epoch": 3.85, + "grad_norm": 1.191192626953125, + "learning_rate": 9.012964824120604e-06, + "loss": 0.0185, + "step": 10325 + }, + { + "epoch": 3.86, + "grad_norm": 1.4096293449401855, + "learning_rate": 9.010452261306533e-06, + "loss": 0.0191, + "step": 10350 + }, + { + "epoch": 3.87, + "grad_norm": 1.2778398990631104, + "learning_rate": 9.007939698492463e-06, + "loss": 0.0185, + "step": 10375 + }, + { + "epoch": 3.88, + "grad_norm": 1.3192169666290283, + "learning_rate": 9.005427135678392e-06, + "loss": 0.0189, + "step": 10400 + }, + { + "epoch": 3.89, + "grad_norm": 1.0247093439102173, + "learning_rate": 9.002914572864321e-06, + "loss": 0.0186, + "step": 10425 + }, + { + "epoch": 3.9, + "grad_norm": 0.7680560350418091, + "learning_rate": 9.000402010050252e-06, + "loss": 0.0177, + "step": 10450 + }, + { + "epoch": 3.91, + "grad_norm": 1.1394760608673096, + "learning_rate": 8.997889447236182e-06, + "loss": 0.0201, + "step": 10475 + }, + { + "epoch": 3.91, + "grad_norm": 1.0631847381591797, + "learning_rate": 8.995376884422111e-06, + "loss": 0.0188, + "step": 10500 + }, + { + "epoch": 3.92, + "grad_norm": 1.8103065490722656, + "learning_rate": 8.992864321608042e-06, + "loss": 0.0186, + "step": 10525 + }, + { + "epoch": 3.93, + "grad_norm": 0.9321714043617249, + "learning_rate": 8.99035175879397e-06, + "loss": 0.0182, + "step": 10550 + }, + { + "epoch": 3.94, + "grad_norm": 1.402673602104187, + "learning_rate": 8.9878391959799e-06, + "loss": 0.0182, + "step": 10575 + }, + { + "epoch": 3.95, + "grad_norm": 1.2429150342941284, + "learning_rate": 8.98532663316583e-06, + "loss": 0.0172, + "step": 10600 + }, + { + "epoch": 3.96, + "grad_norm": 1.2009354829788208, + "learning_rate": 8.98281407035176e-06, + "loss": 0.0184, + "step": 10625 + }, + { + "epoch": 3.97, + "grad_norm": 1.2849290370941162, + "learning_rate": 8.980301507537689e-06, + "loss": 0.0167, + "step": 10650 + }, + { + "epoch": 3.98, + "grad_norm": 0.8760449886322021, + "learning_rate": 8.977788944723618e-06, + "loss": 0.0185, + "step": 10675 + }, + { + "epoch": 3.99, + "grad_norm": 1.2248800992965698, + "learning_rate": 8.975276381909549e-06, + "loss": 0.0202, + "step": 10700 + }, + { + "epoch": 4.0, + "grad_norm": 0.9887397289276123, + "learning_rate": 8.972763819095478e-06, + "loss": 0.0187, + "step": 10725 + }, + { + "epoch": 4.01, + "grad_norm": 0.8907557725906372, + "learning_rate": 8.970251256281408e-06, + "loss": 0.0114, + "step": 10750 + }, + { + "epoch": 4.02, + "grad_norm": 0.9812185764312744, + "learning_rate": 8.967738693467337e-06, + "loss": 0.0098, + "step": 10775 + }, + { + "epoch": 4.03, + "grad_norm": 1.049310326576233, + "learning_rate": 8.965226130653268e-06, + "loss": 0.0091, + "step": 10800 + }, + { + "epoch": 4.04, + "grad_norm": 0.7838900685310364, + "learning_rate": 8.962713567839196e-06, + "loss": 0.009, + "step": 10825 + }, + { + "epoch": 4.05, + "grad_norm": 1.006823182106018, + "learning_rate": 8.960201005025127e-06, + "loss": 0.0107, + "step": 10850 + }, + { + "epoch": 4.05, + "grad_norm": 0.7871661186218262, + "learning_rate": 8.957688442211056e-06, + "loss": 0.0101, + "step": 10875 + }, + { + "epoch": 4.06, + "grad_norm": 0.8435758948326111, + "learning_rate": 8.955175879396985e-06, + "loss": 0.0091, + "step": 10900 + }, + { + "epoch": 4.07, + "grad_norm": 0.7912623286247253, + "learning_rate": 8.952663316582916e-06, + "loss": 0.0086, + "step": 10925 + }, + { + "epoch": 4.08, + "grad_norm": 1.006279706954956, + "learning_rate": 8.950150753768844e-06, + "loss": 0.0109, + "step": 10950 + }, + { + "epoch": 4.09, + "grad_norm": 0.8999176621437073, + "learning_rate": 8.947638190954775e-06, + "loss": 0.0102, + "step": 10975 + }, + { + "epoch": 4.1, + "grad_norm": 1.0647088289260864, + "learning_rate": 8.945125628140704e-06, + "loss": 0.01, + "step": 11000 + }, + { + "epoch": 4.1, + "eval_loss": 0.08686651289463043, + "eval_runtime": 2251.8784, + "eval_samples_per_second": 0.666, + "eval_steps_per_second": 0.666, + "eval_wer": 20.653924245606664, + "step": 11000 + }, + { + "epoch": 4.11, + "grad_norm": 0.930008590221405, + "learning_rate": 8.942613065326634e-06, + "loss": 0.0106, + "step": 11025 + }, + { + "epoch": 4.12, + "grad_norm": 1.0203288793563843, + "learning_rate": 8.940100502512563e-06, + "loss": 0.0101, + "step": 11050 + }, + { + "epoch": 4.13, + "grad_norm": 1.1082353591918945, + "learning_rate": 8.937587939698494e-06, + "loss": 0.0099, + "step": 11075 + }, + { + "epoch": 4.14, + "grad_norm": 1.1812822818756104, + "learning_rate": 8.935075376884423e-06, + "loss": 0.0102, + "step": 11100 + }, + { + "epoch": 4.15, + "grad_norm": 1.004303216934204, + "learning_rate": 8.932562814070353e-06, + "loss": 0.0087, + "step": 11125 + }, + { + "epoch": 4.16, + "grad_norm": 0.6333739161491394, + "learning_rate": 8.930050251256282e-06, + "loss": 0.0091, + "step": 11150 + }, + { + "epoch": 4.17, + "grad_norm": 1.0522359609603882, + "learning_rate": 8.927537688442211e-06, + "loss": 0.0094, + "step": 11175 + }, + { + "epoch": 4.18, + "grad_norm": 0.800334095954895, + "learning_rate": 8.925025125628142e-06, + "loss": 0.0104, + "step": 11200 + }, + { + "epoch": 4.19, + "grad_norm": 1.0384297370910645, + "learning_rate": 8.92251256281407e-06, + "loss": 0.0106, + "step": 11225 + }, + { + "epoch": 4.19, + "grad_norm": 1.0753575563430786, + "learning_rate": 8.920000000000001e-06, + "loss": 0.011, + "step": 11250 + }, + { + "epoch": 4.2, + "grad_norm": 0.7628156542778015, + "learning_rate": 8.91748743718593e-06, + "loss": 0.0096, + "step": 11275 + }, + { + "epoch": 4.21, + "grad_norm": 0.9354447722434998, + "learning_rate": 8.91497487437186e-06, + "loss": 0.0102, + "step": 11300 + }, + { + "epoch": 4.22, + "grad_norm": 1.1708073616027832, + "learning_rate": 8.91246231155779e-06, + "loss": 0.0101, + "step": 11325 + }, + { + "epoch": 4.23, + "grad_norm": 1.3981791734695435, + "learning_rate": 8.90994974874372e-06, + "loss": 0.01, + "step": 11350 + }, + { + "epoch": 4.24, + "grad_norm": 0.6953191161155701, + "learning_rate": 8.90743718592965e-06, + "loss": 0.0115, + "step": 11375 + }, + { + "epoch": 4.25, + "grad_norm": 0.8920406103134155, + "learning_rate": 8.904924623115579e-06, + "loss": 0.0107, + "step": 11400 + }, + { + "epoch": 4.26, + "grad_norm": 0.8896722197532654, + "learning_rate": 8.902412060301508e-06, + "loss": 0.0107, + "step": 11425 + }, + { + "epoch": 4.27, + "grad_norm": 1.0321544408798218, + "learning_rate": 8.899899497487437e-06, + "loss": 0.011, + "step": 11450 + }, + { + "epoch": 4.28, + "grad_norm": 1.073574185371399, + "learning_rate": 8.897386934673368e-06, + "loss": 0.0101, + "step": 11475 + }, + { + "epoch": 4.29, + "grad_norm": 0.9501975774765015, + "learning_rate": 8.894874371859296e-06, + "loss": 0.0109, + "step": 11500 + }, + { + "epoch": 4.3, + "grad_norm": 0.9807615280151367, + "learning_rate": 8.892361809045227e-06, + "loss": 0.0099, + "step": 11525 + }, + { + "epoch": 4.31, + "grad_norm": 0.9472816586494446, + "learning_rate": 8.889849246231156e-06, + "loss": 0.0103, + "step": 11550 + }, + { + "epoch": 4.32, + "grad_norm": 0.7753023505210876, + "learning_rate": 8.887336683417086e-06, + "loss": 0.0106, + "step": 11575 + }, + { + "epoch": 4.33, + "grad_norm": 1.081858515739441, + "learning_rate": 8.884824120603017e-06, + "loss": 0.0113, + "step": 11600 + }, + { + "epoch": 4.33, + "grad_norm": 0.8008473515510559, + "learning_rate": 8.882311557788946e-06, + "loss": 0.0095, + "step": 11625 + }, + { + "epoch": 4.34, + "grad_norm": 0.972687304019928, + "learning_rate": 8.879798994974875e-06, + "loss": 0.0114, + "step": 11650 + }, + { + "epoch": 4.35, + "grad_norm": 0.9917134642601013, + "learning_rate": 8.877286432160805e-06, + "loss": 0.0107, + "step": 11675 + }, + { + "epoch": 4.36, + "grad_norm": 1.1898962259292603, + "learning_rate": 8.874773869346734e-06, + "loss": 0.0103, + "step": 11700 + }, + { + "epoch": 4.37, + "grad_norm": 0.8604434132575989, + "learning_rate": 8.872261306532665e-06, + "loss": 0.0098, + "step": 11725 + }, + { + "epoch": 4.38, + "grad_norm": 0.9942852854728699, + "learning_rate": 8.869748743718594e-06, + "loss": 0.0108, + "step": 11750 + }, + { + "epoch": 4.39, + "grad_norm": 1.1918721199035645, + "learning_rate": 8.867236180904524e-06, + "loss": 0.0099, + "step": 11775 + }, + { + "epoch": 4.4, + "grad_norm": 1.2590224742889404, + "learning_rate": 8.864723618090453e-06, + "loss": 0.011, + "step": 11800 + }, + { + "epoch": 4.41, + "grad_norm": 0.8995240926742554, + "learning_rate": 8.862211055276382e-06, + "loss": 0.0102, + "step": 11825 + }, + { + "epoch": 4.42, + "grad_norm": 0.9035536646842957, + "learning_rate": 8.859698492462312e-06, + "loss": 0.0093, + "step": 11850 + }, + { + "epoch": 4.43, + "grad_norm": 0.87698894739151, + "learning_rate": 8.857185929648243e-06, + "loss": 0.0111, + "step": 11875 + }, + { + "epoch": 4.44, + "grad_norm": 0.9641381502151489, + "learning_rate": 8.854673366834172e-06, + "loss": 0.0106, + "step": 11900 + }, + { + "epoch": 4.45, + "grad_norm": 1.0863053798675537, + "learning_rate": 8.852160804020101e-06, + "loss": 0.0108, + "step": 11925 + }, + { + "epoch": 4.46, + "grad_norm": 1.1828778982162476, + "learning_rate": 8.849648241206032e-06, + "loss": 0.0099, + "step": 11950 + }, + { + "epoch": 4.46, + "grad_norm": 1.395397424697876, + "learning_rate": 8.84713567839196e-06, + "loss": 0.0114, + "step": 11975 + }, + { + "epoch": 4.47, + "grad_norm": 1.041142225265503, + "learning_rate": 8.844623115577891e-06, + "loss": 0.0101, + "step": 12000 + }, + { + "epoch": 4.47, + "eval_loss": 0.09026249498128891, + "eval_runtime": 2229.4031, + "eval_samples_per_second": 0.673, + "eval_steps_per_second": 0.673, + "eval_wer": 22.117202268431, + "step": 12000 + }, + { + "epoch": 4.48, + "grad_norm": 1.1141619682312012, + "learning_rate": 8.84211055276382e-06, + "loss": 0.0111, + "step": 12025 + }, + { + "epoch": 4.49, + "grad_norm": 1.177196741104126, + "learning_rate": 8.839698492462312e-06, + "loss": 0.0096, + "step": 12050 + }, + { + "epoch": 4.5, + "grad_norm": 0.9316490292549133, + "learning_rate": 8.837185929648243e-06, + "loss": 0.0114, + "step": 12075 + }, + { + "epoch": 4.51, + "grad_norm": 1.024509310722351, + "learning_rate": 8.83467336683417e-06, + "loss": 0.0111, + "step": 12100 + }, + { + "epoch": 4.52, + "grad_norm": 0.8987429738044739, + "learning_rate": 8.832160804020101e-06, + "loss": 0.0106, + "step": 12125 + }, + { + "epoch": 4.53, + "grad_norm": 1.220070719718933, + "learning_rate": 8.82964824120603e-06, + "loss": 0.0109, + "step": 12150 + }, + { + "epoch": 4.54, + "grad_norm": 1.012709379196167, + "learning_rate": 8.82713567839196e-06, + "loss": 0.0112, + "step": 12175 + }, + { + "epoch": 4.55, + "grad_norm": 1.3486119508743286, + "learning_rate": 8.824623115577891e-06, + "loss": 0.0106, + "step": 12200 + }, + { + "epoch": 4.56, + "grad_norm": 1.1107289791107178, + "learning_rate": 8.82211055276382e-06, + "loss": 0.0107, + "step": 12225 + }, + { + "epoch": 4.57, + "grad_norm": 0.9246770143508911, + "learning_rate": 8.81959798994975e-06, + "loss": 0.0108, + "step": 12250 + }, + { + "epoch": 4.58, + "grad_norm": 0.9198755621910095, + "learning_rate": 8.817085427135679e-06, + "loss": 0.0109, + "step": 12275 + }, + { + "epoch": 4.59, + "grad_norm": 0.9172239303588867, + "learning_rate": 8.814572864321608e-06, + "loss": 0.0121, + "step": 12300 + }, + { + "epoch": 4.6, + "grad_norm": 1.4927749633789062, + "learning_rate": 8.812060301507538e-06, + "loss": 0.01, + "step": 12325 + }, + { + "epoch": 4.6, + "grad_norm": 1.0123810768127441, + "learning_rate": 8.809547738693469e-06, + "loss": 0.0113, + "step": 12350 + }, + { + "epoch": 4.61, + "grad_norm": 0.8682227730751038, + "learning_rate": 8.807035175879398e-06, + "loss": 0.0117, + "step": 12375 + }, + { + "epoch": 4.62, + "grad_norm": 1.235405683517456, + "learning_rate": 8.804522613065327e-06, + "loss": 0.0105, + "step": 12400 + }, + { + "epoch": 4.63, + "grad_norm": 0.8223084211349487, + "learning_rate": 8.802010050251257e-06, + "loss": 0.0096, + "step": 12425 + }, + { + "epoch": 4.64, + "grad_norm": 1.0910371541976929, + "learning_rate": 8.799497487437186e-06, + "loss": 0.0096, + "step": 12450 + }, + { + "epoch": 4.65, + "grad_norm": 1.1073914766311646, + "learning_rate": 8.796984924623117e-06, + "loss": 0.0115, + "step": 12475 + }, + { + "epoch": 4.66, + "grad_norm": 1.1131432056427002, + "learning_rate": 8.794472361809046e-06, + "loss": 0.0101, + "step": 12500 + }, + { + "epoch": 4.67, + "grad_norm": 1.0300774574279785, + "learning_rate": 8.791959798994976e-06, + "loss": 0.011, + "step": 12525 + }, + { + "epoch": 4.68, + "grad_norm": 1.3151012659072876, + "learning_rate": 8.789447236180905e-06, + "loss": 0.0121, + "step": 12550 + }, + { + "epoch": 4.69, + "grad_norm": 1.2374647855758667, + "learning_rate": 8.786934673366834e-06, + "loss": 0.0106, + "step": 12575 + }, + { + "epoch": 4.7, + "grad_norm": 1.092190146446228, + "learning_rate": 8.784422110552765e-06, + "loss": 0.0104, + "step": 12600 + }, + { + "epoch": 4.71, + "grad_norm": 0.8595845103263855, + "learning_rate": 8.781909547738695e-06, + "loss": 0.0106, + "step": 12625 + }, + { + "epoch": 4.72, + "grad_norm": 1.3056796789169312, + "learning_rate": 8.779396984924624e-06, + "loss": 0.0111, + "step": 12650 + }, + { + "epoch": 4.73, + "grad_norm": 1.2010694742202759, + "learning_rate": 8.776884422110553e-06, + "loss": 0.0108, + "step": 12675 + }, + { + "epoch": 4.74, + "grad_norm": 1.1994473934173584, + "learning_rate": 8.774371859296483e-06, + "loss": 0.0112, + "step": 12700 + }, + { + "epoch": 4.74, + "grad_norm": 1.4329885244369507, + "learning_rate": 8.771859296482412e-06, + "loss": 0.0096, + "step": 12725 + }, + { + "epoch": 4.75, + "grad_norm": 0.8921823501586914, + "learning_rate": 8.769346733668343e-06, + "loss": 0.0101, + "step": 12750 + }, + { + "epoch": 4.76, + "grad_norm": 1.1851333379745483, + "learning_rate": 8.766834170854272e-06, + "loss": 0.0111, + "step": 12775 + }, + { + "epoch": 4.77, + "grad_norm": 0.9671804308891296, + "learning_rate": 8.764321608040202e-06, + "loss": 0.0102, + "step": 12800 + }, + { + "epoch": 4.78, + "grad_norm": 1.231214165687561, + "learning_rate": 8.761809045226131e-06, + "loss": 0.0115, + "step": 12825 + }, + { + "epoch": 4.79, + "grad_norm": 0.934362530708313, + "learning_rate": 8.75929648241206e-06, + "loss": 0.0115, + "step": 12850 + }, + { + "epoch": 4.8, + "grad_norm": 1.0479446649551392, + "learning_rate": 8.756783919597991e-06, + "loss": 0.0123, + "step": 12875 + }, + { + "epoch": 4.81, + "grad_norm": 1.2102491855621338, + "learning_rate": 8.75427135678392e-06, + "loss": 0.0118, + "step": 12900 + }, + { + "epoch": 4.82, + "grad_norm": 1.3048062324523926, + "learning_rate": 8.75175879396985e-06, + "loss": 0.0102, + "step": 12925 + }, + { + "epoch": 4.83, + "grad_norm": 1.1670870780944824, + "learning_rate": 8.74924623115578e-06, + "loss": 0.0105, + "step": 12950 + }, + { + "epoch": 4.84, + "grad_norm": 0.788321316242218, + "learning_rate": 8.746733668341709e-06, + "loss": 0.0101, + "step": 12975 + }, + { + "epoch": 4.85, + "grad_norm": 1.1214163303375244, + "learning_rate": 8.74422110552764e-06, + "loss": 0.0112, + "step": 13000 + }, + { + "epoch": 4.85, + "eval_loss": 0.08956984430551529, + "eval_runtime": 2247.5649, + "eval_samples_per_second": 0.667, + "eval_steps_per_second": 0.667, + "eval_wer": 20.5279003010572, + "step": 13000 + }, + { + "epoch": 4.86, + "grad_norm": 1.1893272399902344, + "learning_rate": 8.741708542713569e-06, + "loss": 0.0115, + "step": 13025 + }, + { + "epoch": 4.87, + "grad_norm": 0.8776862621307373, + "learning_rate": 8.739195979899498e-06, + "loss": 0.01, + "step": 13050 + }, + { + "epoch": 4.88, + "grad_norm": 0.8226960301399231, + "learning_rate": 8.736683417085428e-06, + "loss": 0.0105, + "step": 13075 + }, + { + "epoch": 4.88, + "grad_norm": 0.9684033393859863, + "learning_rate": 8.734170854271357e-06, + "loss": 0.0111, + "step": 13100 + }, + { + "epoch": 4.89, + "grad_norm": 0.5087772607803345, + "learning_rate": 8.731658291457286e-06, + "loss": 0.0095, + "step": 13125 + }, + { + "epoch": 4.9, + "grad_norm": 1.243202805519104, + "learning_rate": 8.729145728643217e-06, + "loss": 0.0105, + "step": 13150 + }, + { + "epoch": 4.91, + "grad_norm": 0.9347864389419556, + "learning_rate": 8.726633165829147e-06, + "loss": 0.0101, + "step": 13175 + }, + { + "epoch": 4.92, + "grad_norm": 0.9326280355453491, + "learning_rate": 8.724120603015076e-06, + "loss": 0.0107, + "step": 13200 + }, + { + "epoch": 4.93, + "grad_norm": 1.2724134922027588, + "learning_rate": 8.721608040201007e-06, + "loss": 0.0119, + "step": 13225 + }, + { + "epoch": 4.94, + "grad_norm": 1.2051266431808472, + "learning_rate": 8.719095477386934e-06, + "loss": 0.0115, + "step": 13250 + }, + { + "epoch": 4.95, + "grad_norm": 0.8368582129478455, + "learning_rate": 8.716582914572866e-06, + "loss": 0.0096, + "step": 13275 + }, + { + "epoch": 4.96, + "grad_norm": 1.262902855873108, + "learning_rate": 8.714070351758795e-06, + "loss": 0.0114, + "step": 13300 + }, + { + "epoch": 4.97, + "grad_norm": 1.3418068885803223, + "learning_rate": 8.711557788944724e-06, + "loss": 0.0098, + "step": 13325 + }, + { + "epoch": 4.98, + "grad_norm": 1.0681864023208618, + "learning_rate": 8.709045226130653e-06, + "loss": 0.0106, + "step": 13350 + }, + { + "epoch": 4.99, + "grad_norm": 1.3879448175430298, + "learning_rate": 8.706532663316584e-06, + "loss": 0.0106, + "step": 13375 + }, + { + "epoch": 5.0, + "grad_norm": 1.3497010469436646, + "learning_rate": 8.704020100502514e-06, + "loss": 0.0113, + "step": 13400 + }, + { + "epoch": 5.01, + "grad_norm": 0.6780180335044861, + "learning_rate": 8.701507537688443e-06, + "loss": 0.0084, + "step": 13425 + }, + { + "epoch": 5.01, + "grad_norm": 0.6623857617378235, + "learning_rate": 8.698994974874372e-06, + "loss": 0.0054, + "step": 13450 + }, + { + "epoch": 5.02, + "grad_norm": 0.7492135763168335, + "learning_rate": 8.696482412060302e-06, + "loss": 0.0062, + "step": 13475 + }, + { + "epoch": 5.03, + "grad_norm": 1.3665269613265991, + "learning_rate": 8.693969849246233e-06, + "loss": 0.0055, + "step": 13500 + }, + { + "epoch": 5.04, + "grad_norm": 0.8366569876670837, + "learning_rate": 8.69145728643216e-06, + "loss": 0.005, + "step": 13525 + }, + { + "epoch": 5.05, + "grad_norm": 0.7672480344772339, + "learning_rate": 8.688944723618091e-06, + "loss": 0.0061, + "step": 13550 + }, + { + "epoch": 5.06, + "grad_norm": 0.8875272274017334, + "learning_rate": 8.68643216080402e-06, + "loss": 0.0049, + "step": 13575 + }, + { + "epoch": 5.07, + "grad_norm": 0.7921592593193054, + "learning_rate": 8.68391959798995e-06, + "loss": 0.0061, + "step": 13600 + }, + { + "epoch": 5.08, + "grad_norm": 0.975547194480896, + "learning_rate": 8.681407035175881e-06, + "loss": 0.0058, + "step": 13625 + }, + { + "epoch": 5.09, + "grad_norm": 1.0434356927871704, + "learning_rate": 8.67889447236181e-06, + "loss": 0.0057, + "step": 13650 + }, + { + "epoch": 5.1, + "grad_norm": 0.5633987784385681, + "learning_rate": 8.67638190954774e-06, + "loss": 0.0057, + "step": 13675 + }, + { + "epoch": 5.11, + "grad_norm": 0.7834965586662292, + "learning_rate": 8.673869346733669e-06, + "loss": 0.0066, + "step": 13700 + }, + { + "epoch": 5.12, + "grad_norm": 0.6757418513298035, + "learning_rate": 8.671356783919598e-06, + "loss": 0.0057, + "step": 13725 + }, + { + "epoch": 5.13, + "grad_norm": 0.5851700901985168, + "learning_rate": 8.668844221105528e-06, + "loss": 0.0055, + "step": 13750 + }, + { + "epoch": 5.14, + "grad_norm": 0.6827601194381714, + "learning_rate": 8.666331658291459e-06, + "loss": 0.0054, + "step": 13775 + }, + { + "epoch": 5.15, + "grad_norm": 0.9042696356773376, + "learning_rate": 8.663819095477388e-06, + "loss": 0.0063, + "step": 13800 + }, + { + "epoch": 5.15, + "grad_norm": 0.8920664191246033, + "learning_rate": 8.661306532663317e-06, + "loss": 0.006, + "step": 13825 + }, + { + "epoch": 5.16, + "grad_norm": 0.794365406036377, + "learning_rate": 8.658793969849247e-06, + "loss": 0.0056, + "step": 13850 + }, + { + "epoch": 5.17, + "grad_norm": 0.7295551300048828, + "learning_rate": 8.656281407035176e-06, + "loss": 0.006, + "step": 13875 + }, + { + "epoch": 5.18, + "grad_norm": 1.012995719909668, + "learning_rate": 8.653768844221107e-06, + "loss": 0.007, + "step": 13900 + }, + { + "epoch": 5.19, + "grad_norm": 0.6714737415313721, + "learning_rate": 8.651256281407036e-06, + "loss": 0.006, + "step": 13925 + }, + { + "epoch": 5.2, + "grad_norm": 0.9238806962966919, + "learning_rate": 8.648743718592966e-06, + "loss": 0.0064, + "step": 13950 + }, + { + "epoch": 5.21, + "grad_norm": 1.066407322883606, + "learning_rate": 8.646231155778895e-06, + "loss": 0.0066, + "step": 13975 + }, + { + "epoch": 5.22, + "grad_norm": 1.0410763025283813, + "learning_rate": 8.643718592964824e-06, + "loss": 0.0061, + "step": 14000 + }, + { + "epoch": 5.22, + "eval_loss": 0.09774301946163177, + "eval_runtime": 2612.2978, + "eval_samples_per_second": 0.574, + "eval_steps_per_second": 0.574, + "eval_wer": 21.62010782048589, + "step": 14000 + }, + { + "epoch": 5.23, + "grad_norm": 0.9920790195465088, + "learning_rate": 8.641206030150755e-06, + "loss": 0.0056, + "step": 14025 + }, + { + "epoch": 5.24, + "grad_norm": 1.043940544128418, + "learning_rate": 8.638693467336685e-06, + "loss": 0.007, + "step": 14050 + }, + { + "epoch": 5.25, + "grad_norm": 0.8365585207939148, + "learning_rate": 8.636180904522614e-06, + "loss": 0.0064, + "step": 14075 + }, + { + "epoch": 5.26, + "grad_norm": 0.7762661576271057, + "learning_rate": 8.633668341708543e-06, + "loss": 0.0067, + "step": 14100 + }, + { + "epoch": 5.27, + "grad_norm": 0.7610981464385986, + "learning_rate": 8.631155778894473e-06, + "loss": 0.0068, + "step": 14125 + }, + { + "epoch": 5.28, + "grad_norm": 0.9563359022140503, + "learning_rate": 8.628743718592966e-06, + "loss": 0.0064, + "step": 14150 + }, + { + "epoch": 5.29, + "grad_norm": 1.2927440404891968, + "learning_rate": 8.626231155778895e-06, + "loss": 0.0069, + "step": 14175 + }, + { + "epoch": 5.29, + "grad_norm": 0.8169412612915039, + "learning_rate": 8.623718592964825e-06, + "loss": 0.0069, + "step": 14200 + }, + { + "epoch": 5.3, + "grad_norm": 0.9048207998275757, + "learning_rate": 8.621206030150756e-06, + "loss": 0.0073, + "step": 14225 + }, + { + "epoch": 5.31, + "grad_norm": 0.80600506067276, + "learning_rate": 8.618693467336683e-06, + "loss": 0.0069, + "step": 14250 + }, + { + "epoch": 5.32, + "grad_norm": 0.5368210673332214, + "learning_rate": 8.616180904522614e-06, + "loss": 0.0067, + "step": 14275 + }, + { + "epoch": 5.33, + "grad_norm": 1.1072783470153809, + "learning_rate": 8.613668341708544e-06, + "loss": 0.0064, + "step": 14300 + }, + { + "epoch": 5.34, + "grad_norm": 0.6840161085128784, + "learning_rate": 8.611155778894473e-06, + "loss": 0.007, + "step": 14325 + }, + { + "epoch": 5.35, + "grad_norm": 0.9342491030693054, + "learning_rate": 8.608643216080402e-06, + "loss": 0.0069, + "step": 14350 + }, + { + "epoch": 5.36, + "grad_norm": 0.9371657967567444, + "learning_rate": 8.606130653266333e-06, + "loss": 0.0058, + "step": 14375 + }, + { + "epoch": 5.37, + "grad_norm": 0.853681206703186, + "learning_rate": 8.60361809045226e-06, + "loss": 0.006, + "step": 14400 + }, + { + "epoch": 5.38, + "grad_norm": 1.3315080404281616, + "learning_rate": 8.601105527638192e-06, + "loss": 0.0068, + "step": 14425 + }, + { + "epoch": 5.39, + "grad_norm": 0.5982239246368408, + "learning_rate": 8.598592964824121e-06, + "loss": 0.0061, + "step": 14450 + }, + { + "epoch": 5.4, + "grad_norm": 1.1555101871490479, + "learning_rate": 8.59608040201005e-06, + "loss": 0.0078, + "step": 14475 + }, + { + "epoch": 5.41, + "grad_norm": 0.6723020076751709, + "learning_rate": 8.593567839195981e-06, + "loss": 0.0064, + "step": 14500 + }, + { + "epoch": 5.42, + "grad_norm": 1.1656550168991089, + "learning_rate": 8.591055276381909e-06, + "loss": 0.0073, + "step": 14525 + }, + { + "epoch": 5.43, + "grad_norm": 0.7531421184539795, + "learning_rate": 8.58854271356784e-06, + "loss": 0.0074, + "step": 14550 + }, + { + "epoch": 5.43, + "grad_norm": 1.4016915559768677, + "learning_rate": 8.58603015075377e-06, + "loss": 0.0068, + "step": 14575 + }, + { + "epoch": 5.44, + "grad_norm": 0.9058694243431091, + "learning_rate": 8.583517587939699e-06, + "loss": 0.0066, + "step": 14600 + }, + { + "epoch": 5.45, + "grad_norm": 0.7191187739372253, + "learning_rate": 8.58100502512563e-06, + "loss": 0.0068, + "step": 14625 + }, + { + "epoch": 5.46, + "grad_norm": 0.6670116782188416, + "learning_rate": 8.578492462311559e-06, + "loss": 0.0067, + "step": 14650 + }, + { + "epoch": 5.47, + "grad_norm": 0.573491096496582, + "learning_rate": 8.575979899497488e-06, + "loss": 0.0062, + "step": 14675 + }, + { + "epoch": 5.48, + "grad_norm": 0.6331093907356262, + "learning_rate": 8.573467336683418e-06, + "loss": 0.0062, + "step": 14700 + }, + { + "epoch": 5.49, + "grad_norm": 0.8307035565376282, + "learning_rate": 8.570954773869347e-06, + "loss": 0.0064, + "step": 14725 + }, + { + "epoch": 5.5, + "grad_norm": 1.046892523765564, + "learning_rate": 8.568442211055276e-06, + "loss": 0.0066, + "step": 14750 + }, + { + "epoch": 5.51, + "grad_norm": 0.8250159621238708, + "learning_rate": 8.565929648241207e-06, + "loss": 0.0069, + "step": 14775 + }, + { + "epoch": 5.52, + "grad_norm": 0.6004950404167175, + "learning_rate": 8.563417085427135e-06, + "loss": 0.0065, + "step": 14800 + }, + { + "epoch": 5.53, + "grad_norm": 0.8049018979072571, + "learning_rate": 8.560904522613066e-06, + "loss": 0.0057, + "step": 14825 + }, + { + "epoch": 5.54, + "grad_norm": 0.9586244821548462, + "learning_rate": 8.558391959798995e-06, + "loss": 0.0078, + "step": 14850 + }, + { + "epoch": 5.55, + "grad_norm": 1.0678731203079224, + "learning_rate": 8.555879396984925e-06, + "loss": 0.007, + "step": 14875 + }, + { + "epoch": 5.56, + "grad_norm": 0.8579398989677429, + "learning_rate": 8.553366834170856e-06, + "loss": 0.0068, + "step": 14900 + }, + { + "epoch": 5.56, + "grad_norm": 0.6770957708358765, + "learning_rate": 8.550854271356785e-06, + "loss": 0.007, + "step": 14925 + }, + { + "epoch": 5.57, + "grad_norm": 0.8836690187454224, + "learning_rate": 8.548341708542714e-06, + "loss": 0.0073, + "step": 14950 + }, + { + "epoch": 5.58, + "grad_norm": 0.7474973797798157, + "learning_rate": 8.545829145728644e-06, + "loss": 0.0064, + "step": 14975 + }, + { + "epoch": 5.59, + "grad_norm": 0.7702472805976868, + "learning_rate": 8.543316582914573e-06, + "loss": 0.0064, + "step": 15000 + }, + { + "epoch": 5.59, + "eval_loss": 0.10448779165744781, + "eval_runtime": 2459.6499, + "eval_samples_per_second": 0.61, + "eval_steps_per_second": 0.61, + "eval_wer": 21.221031996079255, + "step": 15000 + }, + { + "epoch": 5.6, + "grad_norm": 0.8197509050369263, + "learning_rate": 8.540804020100502e-06, + "loss": 0.0071, + "step": 15025 + }, + { + "epoch": 5.61, + "grad_norm": 1.0859959125518799, + "learning_rate": 8.538291457286433e-06, + "loss": 0.0065, + "step": 15050 + }, + { + "epoch": 5.62, + "grad_norm": 1.171198844909668, + "learning_rate": 8.535778894472363e-06, + "loss": 0.0067, + "step": 15075 + }, + { + "epoch": 5.63, + "grad_norm": 0.7009487152099609, + "learning_rate": 8.533266331658292e-06, + "loss": 0.0072, + "step": 15100 + }, + { + "epoch": 5.64, + "grad_norm": 0.8731284737586975, + "learning_rate": 8.530753768844221e-06, + "loss": 0.007, + "step": 15125 + }, + { + "epoch": 5.65, + "grad_norm": 0.35863974690437317, + "learning_rate": 8.52824120603015e-06, + "loss": 0.0065, + "step": 15150 + }, + { + "epoch": 5.66, + "grad_norm": 0.6365551948547363, + "learning_rate": 8.525728643216082e-06, + "loss": 0.0068, + "step": 15175 + }, + { + "epoch": 5.67, + "grad_norm": 1.2321420907974243, + "learning_rate": 8.523216080402011e-06, + "loss": 0.0066, + "step": 15200 + }, + { + "epoch": 5.68, + "grad_norm": 0.8155936002731323, + "learning_rate": 8.52070351758794e-06, + "loss": 0.0069, + "step": 15225 + }, + { + "epoch": 5.69, + "grad_norm": 1.0119582414627075, + "learning_rate": 8.518190954773871e-06, + "loss": 0.007, + "step": 15250 + }, + { + "epoch": 5.7, + "grad_norm": 1.1386066675186157, + "learning_rate": 8.515678391959799e-06, + "loss": 0.0071, + "step": 15275 + }, + { + "epoch": 5.7, + "grad_norm": 1.0384410619735718, + "learning_rate": 8.51316582914573e-06, + "loss": 0.0059, + "step": 15300 + }, + { + "epoch": 5.71, + "grad_norm": 1.0309228897094727, + "learning_rate": 8.51065326633166e-06, + "loss": 0.0076, + "step": 15325 + }, + { + "epoch": 5.72, + "grad_norm": 0.9991047382354736, + "learning_rate": 8.508140703517589e-06, + "loss": 0.0059, + "step": 15350 + }, + { + "epoch": 5.73, + "grad_norm": 0.761254608631134, + "learning_rate": 8.505628140703518e-06, + "loss": 0.0072, + "step": 15375 + }, + { + "epoch": 5.74, + "grad_norm": 1.0932788848876953, + "learning_rate": 8.503115577889447e-06, + "loss": 0.0063, + "step": 15400 + }, + { + "epoch": 5.75, + "grad_norm": 1.0290889739990234, + "learning_rate": 8.500603015075377e-06, + "loss": 0.007, + "step": 15425 + }, + { + "epoch": 5.76, + "grad_norm": 0.8201502561569214, + "learning_rate": 8.498090452261308e-06, + "loss": 0.0073, + "step": 15450 + }, + { + "epoch": 5.77, + "grad_norm": 1.2674179077148438, + "learning_rate": 8.495577889447237e-06, + "loss": 0.0067, + "step": 15475 + }, + { + "epoch": 5.78, + "grad_norm": 0.9667775630950928, + "learning_rate": 8.493065326633166e-06, + "loss": 0.0072, + "step": 15500 + }, + { + "epoch": 5.79, + "grad_norm": 1.1093286275863647, + "learning_rate": 8.490552763819097e-06, + "loss": 0.0075, + "step": 15525 + }, + { + "epoch": 5.8, + "grad_norm": 0.9390110373497009, + "learning_rate": 8.488040201005025e-06, + "loss": 0.0074, + "step": 15550 + }, + { + "epoch": 5.81, + "grad_norm": 0.7439872026443481, + "learning_rate": 8.485527638190956e-06, + "loss": 0.007, + "step": 15575 + }, + { + "epoch": 5.82, + "grad_norm": 0.9710354208946228, + "learning_rate": 8.483015075376885e-06, + "loss": 0.0069, + "step": 15600 + }, + { + "epoch": 5.83, + "grad_norm": 1.4938459396362305, + "learning_rate": 8.480502512562815e-06, + "loss": 0.0075, + "step": 15625 + }, + { + "epoch": 5.84, + "grad_norm": 0.6854323148727417, + "learning_rate": 8.477989949748744e-06, + "loss": 0.0064, + "step": 15650 + }, + { + "epoch": 5.84, + "grad_norm": 0.5185632705688477, + "learning_rate": 8.475477386934673e-06, + "loss": 0.0063, + "step": 15675 + }, + { + "epoch": 5.85, + "grad_norm": 1.1037718057632446, + "learning_rate": 8.472964824120604e-06, + "loss": 0.0067, + "step": 15700 + }, + { + "epoch": 5.86, + "grad_norm": 1.2220135927200317, + "learning_rate": 8.470452261306534e-06, + "loss": 0.0068, + "step": 15725 + }, + { + "epoch": 5.87, + "grad_norm": 1.088472843170166, + "learning_rate": 8.467939698492463e-06, + "loss": 0.0065, + "step": 15750 + }, + { + "epoch": 5.88, + "grad_norm": 1.1179323196411133, + "learning_rate": 8.465427135678392e-06, + "loss": 0.0071, + "step": 15775 + }, + { + "epoch": 5.89, + "grad_norm": 1.0063631534576416, + "learning_rate": 8.462914572864323e-06, + "loss": 0.0073, + "step": 15800 + }, + { + "epoch": 5.9, + "grad_norm": 0.9123952984809875, + "learning_rate": 8.460402010050251e-06, + "loss": 0.0058, + "step": 15825 + }, + { + "epoch": 5.91, + "grad_norm": 0.8819885849952698, + "learning_rate": 8.457889447236182e-06, + "loss": 0.0065, + "step": 15850 + }, + { + "epoch": 5.92, + "grad_norm": 0.5204604268074036, + "learning_rate": 8.455376884422111e-06, + "loss": 0.0071, + "step": 15875 + }, + { + "epoch": 5.93, + "grad_norm": 1.012110710144043, + "learning_rate": 8.45286432160804e-06, + "loss": 0.0068, + "step": 15900 + }, + { + "epoch": 5.94, + "grad_norm": 1.481178641319275, + "learning_rate": 8.450351758793972e-06, + "loss": 0.0067, + "step": 15925 + }, + { + "epoch": 5.95, + "grad_norm": 0.864529550075531, + "learning_rate": 8.4478391959799e-06, + "loss": 0.007, + "step": 15950 + }, + { + "epoch": 5.96, + "grad_norm": 0.9231351017951965, + "learning_rate": 8.44532663316583e-06, + "loss": 0.0071, + "step": 15975 + }, + { + "epoch": 5.97, + "grad_norm": 0.6476284861564636, + "learning_rate": 8.44281407035176e-06, + "loss": 0.0067, + "step": 16000 + }, + { + "epoch": 5.97, + "eval_loss": 0.1004272997379303, + "eval_runtime": 2442.8935, + "eval_samples_per_second": 0.614, + "eval_steps_per_second": 0.614, + "eval_wer": 21.48708254568368, + "step": 16000 + }, + { + "epoch": 5.98, + "grad_norm": 0.9911161661148071, + "learning_rate": 8.440301507537689e-06, + "loss": 0.0063, + "step": 16025 + }, + { + "epoch": 5.98, + "grad_norm": 1.0486165285110474, + "learning_rate": 8.437788944723618e-06, + "loss": 0.0063, + "step": 16050 + }, + { + "epoch": 5.99, + "grad_norm": 1.1189517974853516, + "learning_rate": 8.43527638190955e-06, + "loss": 0.0064, + "step": 16075 + }, + { + "epoch": 6.0, + "grad_norm": 0.8747252821922302, + "learning_rate": 8.432763819095479e-06, + "loss": 0.0058, + "step": 16100 + }, + { + "epoch": 6.01, + "grad_norm": 0.7392064929008484, + "learning_rate": 8.430251256281408e-06, + "loss": 0.0042, + "step": 16125 + }, + { + "epoch": 6.02, + "grad_norm": 0.6404821872711182, + "learning_rate": 8.427738693467337e-06, + "loss": 0.0044, + "step": 16150 + }, + { + "epoch": 6.03, + "grad_norm": 0.8560150265693665, + "learning_rate": 8.42532663316583e-06, + "loss": 0.0041, + "step": 16175 + }, + { + "epoch": 6.04, + "grad_norm": 0.8960902690887451, + "learning_rate": 8.42281407035176e-06, + "loss": 0.0038, + "step": 16200 + }, + { + "epoch": 6.05, + "grad_norm": 0.9707881808280945, + "learning_rate": 8.420301507537689e-06, + "loss": 0.0039, + "step": 16225 + }, + { + "epoch": 6.06, + "grad_norm": 0.7721638679504395, + "learning_rate": 8.417788944723618e-06, + "loss": 0.0036, + "step": 16250 + }, + { + "epoch": 6.07, + "grad_norm": 0.8403162360191345, + "learning_rate": 8.415276381909548e-06, + "loss": 0.0035, + "step": 16275 + }, + { + "epoch": 6.08, + "grad_norm": 0.8735793828964233, + "learning_rate": 8.412763819095479e-06, + "loss": 0.0041, + "step": 16300 + }, + { + "epoch": 6.09, + "grad_norm": 0.8523971438407898, + "learning_rate": 8.410251256281408e-06, + "loss": 0.004, + "step": 16325 + }, + { + "epoch": 6.1, + "grad_norm": 1.0166010856628418, + "learning_rate": 8.407738693467337e-06, + "loss": 0.0045, + "step": 16350 + }, + { + "epoch": 6.11, + "grad_norm": 0.46614933013916016, + "learning_rate": 8.405226130653267e-06, + "loss": 0.005, + "step": 16375 + }, + { + "epoch": 6.11, + "grad_norm": 0.7276281714439392, + "learning_rate": 8.402713567839198e-06, + "loss": 0.0042, + "step": 16400 + }, + { + "epoch": 6.12, + "grad_norm": 0.6389389634132385, + "learning_rate": 8.400201005025125e-06, + "loss": 0.0041, + "step": 16425 + }, + { + "epoch": 6.13, + "grad_norm": 0.4248146712779999, + "learning_rate": 8.397688442211056e-06, + "loss": 0.0034, + "step": 16450 + }, + { + "epoch": 6.14, + "grad_norm": 0.5150933265686035, + "learning_rate": 8.395175879396986e-06, + "loss": 0.0034, + "step": 16475 + }, + { + "epoch": 6.15, + "grad_norm": 0.8916193842887878, + "learning_rate": 8.392663316582915e-06, + "loss": 0.003, + "step": 16500 + }, + { + "epoch": 6.16, + "grad_norm": 1.1272698640823364, + "learning_rate": 8.390150753768846e-06, + "loss": 0.0038, + "step": 16525 + }, + { + "epoch": 6.17, + "grad_norm": 0.6488180756568909, + "learning_rate": 8.387638190954774e-06, + "loss": 0.0044, + "step": 16550 + }, + { + "epoch": 6.18, + "grad_norm": 1.3851958513259888, + "learning_rate": 8.385125628140705e-06, + "loss": 0.0039, + "step": 16575 + }, + { + "epoch": 6.19, + "grad_norm": 1.0638848543167114, + "learning_rate": 8.382613065326634e-06, + "loss": 0.0045, + "step": 16600 + }, + { + "epoch": 6.2, + "grad_norm": 0.8646655678749084, + "learning_rate": 8.380100502512563e-06, + "loss": 0.0041, + "step": 16625 + }, + { + "epoch": 6.21, + "grad_norm": 0.9093518853187561, + "learning_rate": 8.377587939698493e-06, + "loss": 0.0044, + "step": 16650 + }, + { + "epoch": 6.22, + "grad_norm": 1.3703974485397339, + "learning_rate": 8.375075376884424e-06, + "loss": 0.0041, + "step": 16675 + }, + { + "epoch": 6.23, + "grad_norm": 0.5281303524971008, + "learning_rate": 8.372562814070353e-06, + "loss": 0.0044, + "step": 16700 + }, + { + "epoch": 6.24, + "grad_norm": 1.0478041172027588, + "learning_rate": 8.370050251256282e-06, + "loss": 0.004, + "step": 16725 + }, + { + "epoch": 6.25, + "grad_norm": 0.9260602593421936, + "learning_rate": 8.367537688442212e-06, + "loss": 0.004, + "step": 16750 + }, + { + "epoch": 6.25, + "grad_norm": 1.010270118713379, + "learning_rate": 8.365025125628141e-06, + "loss": 0.0045, + "step": 16775 + }, + { + "epoch": 6.26, + "grad_norm": 0.515651285648346, + "learning_rate": 8.362512562814072e-06, + "loss": 0.0041, + "step": 16800 + }, + { + "epoch": 6.27, + "grad_norm": 0.8165681958198547, + "learning_rate": 8.36e-06, + "loss": 0.0038, + "step": 16825 + }, + { + "epoch": 6.28, + "grad_norm": 0.7381798624992371, + "learning_rate": 8.35748743718593e-06, + "loss": 0.0051, + "step": 16850 + }, + { + "epoch": 6.29, + "grad_norm": 0.6414347887039185, + "learning_rate": 8.35497487437186e-06, + "loss": 0.0043, + "step": 16875 + }, + { + "epoch": 6.3, + "grad_norm": 1.4425163269042969, + "learning_rate": 8.35246231155779e-06, + "loss": 0.0045, + "step": 16900 + }, + { + "epoch": 6.31, + "grad_norm": 0.6824920177459717, + "learning_rate": 8.34994974874372e-06, + "loss": 0.0043, + "step": 16925 + }, + { + "epoch": 6.32, + "grad_norm": 0.9367761015892029, + "learning_rate": 8.34743718592965e-06, + "loss": 0.0041, + "step": 16950 + }, + { + "epoch": 6.33, + "grad_norm": 0.5670114159584045, + "learning_rate": 8.344924623115579e-06, + "loss": 0.0046, + "step": 16975 + }, + { + "epoch": 6.34, + "grad_norm": 0.7849371433258057, + "learning_rate": 8.342412060301508e-06, + "loss": 0.0056, + "step": 17000 + }, + { + "epoch": 6.34, + "eval_loss": 0.10946433246135712, + "eval_runtime": 2358.3839, + "eval_samples_per_second": 0.636, + "eval_steps_per_second": 0.636, + "eval_wer": 20.688930896870406, + "step": 17000 + }, + { + "epoch": 6.35, + "grad_norm": 0.8708887100219727, + "learning_rate": 8.339899497487438e-06, + "loss": 0.0045, + "step": 17025 + }, + { + "epoch": 6.36, + "grad_norm": 0.8711768984794617, + "learning_rate": 8.337386934673367e-06, + "loss": 0.0048, + "step": 17050 + }, + { + "epoch": 6.37, + "grad_norm": 0.860245943069458, + "learning_rate": 8.334874371859298e-06, + "loss": 0.0052, + "step": 17075 + }, + { + "epoch": 6.38, + "grad_norm": 0.9138718843460083, + "learning_rate": 8.332361809045226e-06, + "loss": 0.0043, + "step": 17100 + }, + { + "epoch": 6.39, + "grad_norm": 1.0963783264160156, + "learning_rate": 8.329849246231157e-06, + "loss": 0.0048, + "step": 17125 + }, + { + "epoch": 6.39, + "grad_norm": 0.8362494707107544, + "learning_rate": 8.327336683417086e-06, + "loss": 0.0049, + "step": 17150 + }, + { + "epoch": 6.4, + "grad_norm": 0.7999160289764404, + "learning_rate": 8.324824120603015e-06, + "loss": 0.0045, + "step": 17175 + }, + { + "epoch": 6.41, + "grad_norm": 1.2116003036499023, + "learning_rate": 8.322311557788946e-06, + "loss": 0.0051, + "step": 17200 + }, + { + "epoch": 6.42, + "grad_norm": 0.7496313452720642, + "learning_rate": 8.319798994974876e-06, + "loss": 0.0048, + "step": 17225 + }, + { + "epoch": 6.43, + "grad_norm": 1.0259166955947876, + "learning_rate": 8.317286432160805e-06, + "loss": 0.0052, + "step": 17250 + }, + { + "epoch": 6.44, + "grad_norm": 0.969132661819458, + "learning_rate": 8.314773869346734e-06, + "loss": 0.0045, + "step": 17275 + }, + { + "epoch": 6.45, + "grad_norm": 1.2644497156143188, + "learning_rate": 8.312261306532663e-06, + "loss": 0.0047, + "step": 17300 + }, + { + "epoch": 6.46, + "grad_norm": 1.406724452972412, + "learning_rate": 8.309748743718595e-06, + "loss": 0.0048, + "step": 17325 + }, + { + "epoch": 6.47, + "grad_norm": 0.9352630376815796, + "learning_rate": 8.307236180904524e-06, + "loss": 0.005, + "step": 17350 + }, + { + "epoch": 6.48, + "grad_norm": 1.0746114253997803, + "learning_rate": 8.304723618090453e-06, + "loss": 0.0048, + "step": 17375 + }, + { + "epoch": 6.49, + "grad_norm": 0.7143023610115051, + "learning_rate": 8.302211055276382e-06, + "loss": 0.0049, + "step": 17400 + }, + { + "epoch": 6.5, + "grad_norm": 0.6786660552024841, + "learning_rate": 8.299698492462312e-06, + "loss": 0.0048, + "step": 17425 + }, + { + "epoch": 6.51, + "grad_norm": 0.6959403157234192, + "learning_rate": 8.297185929648241e-06, + "loss": 0.005, + "step": 17450 + }, + { + "epoch": 6.52, + "grad_norm": 1.1008723974227905, + "learning_rate": 8.294673366834172e-06, + "loss": 0.0046, + "step": 17475 + }, + { + "epoch": 6.52, + "grad_norm": 0.9203583002090454, + "learning_rate": 8.292160804020101e-06, + "loss": 0.0054, + "step": 17500 + }, + { + "epoch": 6.53, + "grad_norm": 0.9769070744514465, + "learning_rate": 8.28964824120603e-06, + "loss": 0.0045, + "step": 17525 + }, + { + "epoch": 6.54, + "grad_norm": 0.4409109354019165, + "learning_rate": 8.287135678391962e-06, + "loss": 0.0044, + "step": 17550 + }, + { + "epoch": 6.55, + "grad_norm": 0.8446891903877258, + "learning_rate": 8.28462311557789e-06, + "loss": 0.0045, + "step": 17575 + }, + { + "epoch": 6.56, + "grad_norm": 1.257330298423767, + "learning_rate": 8.28211055276382e-06, + "loss": 0.0049, + "step": 17600 + }, + { + "epoch": 6.57, + "grad_norm": 0.9121269583702087, + "learning_rate": 8.27959798994975e-06, + "loss": 0.0045, + "step": 17625 + }, + { + "epoch": 6.58, + "grad_norm": 0.9846858978271484, + "learning_rate": 8.277085427135679e-06, + "loss": 0.0045, + "step": 17650 + }, + { + "epoch": 6.59, + "grad_norm": 0.7057098150253296, + "learning_rate": 8.274572864321608e-06, + "loss": 0.0044, + "step": 17675 + }, + { + "epoch": 6.6, + "grad_norm": 0.8779595494270325, + "learning_rate": 8.272060301507538e-06, + "loss": 0.0059, + "step": 17700 + }, + { + "epoch": 6.61, + "grad_norm": 1.0422426462173462, + "learning_rate": 8.269547738693467e-06, + "loss": 0.005, + "step": 17725 + }, + { + "epoch": 6.62, + "grad_norm": 0.8073680400848389, + "learning_rate": 8.267035175879398e-06, + "loss": 0.0045, + "step": 17750 + }, + { + "epoch": 6.63, + "grad_norm": 0.8581899404525757, + "learning_rate": 8.264522613065327e-06, + "loss": 0.0051, + "step": 17775 + }, + { + "epoch": 6.64, + "grad_norm": 1.0273178815841675, + "learning_rate": 8.262010050251257e-06, + "loss": 0.0055, + "step": 17800 + }, + { + "epoch": 6.65, + "grad_norm": 1.0193665027618408, + "learning_rate": 8.259497487437188e-06, + "loss": 0.0045, + "step": 17825 + }, + { + "epoch": 6.66, + "grad_norm": 0.6763816475868225, + "learning_rate": 8.256984924623115e-06, + "loss": 0.0052, + "step": 17850 + }, + { + "epoch": 6.66, + "grad_norm": 0.8810611367225647, + "learning_rate": 8.254472361809046e-06, + "loss": 0.0049, + "step": 17875 + }, + { + "epoch": 6.67, + "grad_norm": 0.8419938683509827, + "learning_rate": 8.251959798994976e-06, + "loss": 0.0048, + "step": 17900 + }, + { + "epoch": 6.68, + "grad_norm": 1.4374581575393677, + "learning_rate": 8.249447236180905e-06, + "loss": 0.0048, + "step": 17925 + }, + { + "epoch": 6.69, + "grad_norm": 0.4161253869533539, + "learning_rate": 8.246934673366836e-06, + "loss": 0.004, + "step": 17950 + }, + { + "epoch": 6.7, + "grad_norm": 1.0084340572357178, + "learning_rate": 8.244422110552764e-06, + "loss": 0.0058, + "step": 17975 + }, + { + "epoch": 6.71, + "grad_norm": 1.2393544912338257, + "learning_rate": 8.241909547738695e-06, + "loss": 0.0056, + "step": 18000 + }, + { + "epoch": 6.71, + "eval_loss": 0.1063193753361702, + "eval_runtime": 2303.0888, + "eval_samples_per_second": 0.651, + "eval_steps_per_second": 0.651, + "eval_wer": 20.660925575859412, + "step": 18000 + }, + { + "epoch": 6.72, + "grad_norm": 0.7703242301940918, + "learning_rate": 8.239396984924624e-06, + "loss": 0.0051, + "step": 18025 + }, + { + "epoch": 6.73, + "grad_norm": 1.362753987312317, + "learning_rate": 8.236884422110553e-06, + "loss": 0.005, + "step": 18050 + }, + { + "epoch": 6.74, + "grad_norm": 0.7990913391113281, + "learning_rate": 8.234371859296483e-06, + "loss": 0.0058, + "step": 18075 + }, + { + "epoch": 6.75, + "grad_norm": 0.8527544736862183, + "learning_rate": 8.231859296482414e-06, + "loss": 0.0051, + "step": 18100 + }, + { + "epoch": 6.76, + "grad_norm": 0.44885873794555664, + "learning_rate": 8.229346733668341e-06, + "loss": 0.0055, + "step": 18125 + }, + { + "epoch": 6.77, + "grad_norm": 0.7499191761016846, + "learning_rate": 8.226834170854272e-06, + "loss": 0.0044, + "step": 18150 + }, + { + "epoch": 6.78, + "grad_norm": 0.7297422289848328, + "learning_rate": 8.224321608040202e-06, + "loss": 0.0044, + "step": 18175 + }, + { + "epoch": 6.79, + "grad_norm": 1.0432955026626587, + "learning_rate": 8.221809045226131e-06, + "loss": 0.0053, + "step": 18200 + }, + { + "epoch": 6.8, + "grad_norm": 1.0367790460586548, + "learning_rate": 8.219296482412062e-06, + "loss": 0.005, + "step": 18225 + }, + { + "epoch": 6.8, + "grad_norm": 0.8273251056671143, + "learning_rate": 8.21678391959799e-06, + "loss": 0.0055, + "step": 18250 + }, + { + "epoch": 6.81, + "grad_norm": 0.7859262824058533, + "learning_rate": 8.21427135678392e-06, + "loss": 0.0043, + "step": 18275 + }, + { + "epoch": 6.82, + "grad_norm": 0.5762407183647156, + "learning_rate": 8.21175879396985e-06, + "loss": 0.005, + "step": 18300 + }, + { + "epoch": 6.83, + "grad_norm": 0.7186219692230225, + "learning_rate": 8.20924623115578e-06, + "loss": 0.0054, + "step": 18325 + }, + { + "epoch": 6.84, + "grad_norm": 1.0423905849456787, + "learning_rate": 8.206733668341709e-06, + "loss": 0.0058, + "step": 18350 + }, + { + "epoch": 6.85, + "grad_norm": 0.7775648832321167, + "learning_rate": 8.20422110552764e-06, + "loss": 0.0053, + "step": 18375 + }, + { + "epoch": 6.86, + "grad_norm": 1.0116063356399536, + "learning_rate": 8.201708542713569e-06, + "loss": 0.0056, + "step": 18400 + }, + { + "epoch": 6.87, + "grad_norm": 0.7143779993057251, + "learning_rate": 8.199195979899498e-06, + "loss": 0.0057, + "step": 18425 + }, + { + "epoch": 6.88, + "grad_norm": 0.7786298394203186, + "learning_rate": 8.196683417085428e-06, + "loss": 0.0049, + "step": 18450 + }, + { + "epoch": 6.89, + "grad_norm": 1.0309168100357056, + "learning_rate": 8.194170854271357e-06, + "loss": 0.0051, + "step": 18475 + }, + { + "epoch": 6.9, + "grad_norm": 0.6550295948982239, + "learning_rate": 8.191658291457288e-06, + "loss": 0.0047, + "step": 18500 + }, + { + "epoch": 6.91, + "grad_norm": 0.8786160945892334, + "learning_rate": 8.189145728643216e-06, + "loss": 0.0054, + "step": 18525 + }, + { + "epoch": 6.92, + "grad_norm": 0.8871691823005676, + "learning_rate": 8.186633165829147e-06, + "loss": 0.005, + "step": 18550 + }, + { + "epoch": 6.93, + "grad_norm": 0.9651385545730591, + "learning_rate": 8.184120603015076e-06, + "loss": 0.0047, + "step": 18575 + }, + { + "epoch": 6.94, + "grad_norm": 0.9228749871253967, + "learning_rate": 8.181608040201005e-06, + "loss": 0.0051, + "step": 18600 + }, + { + "epoch": 6.94, + "grad_norm": 0.9352297186851501, + "learning_rate": 8.179095477386936e-06, + "loss": 0.0052, + "step": 18625 + }, + { + "epoch": 6.95, + "grad_norm": 0.9588571786880493, + "learning_rate": 8.176582914572866e-06, + "loss": 0.0048, + "step": 18650 + }, + { + "epoch": 6.96, + "grad_norm": 1.1392850875854492, + "learning_rate": 8.174070351758795e-06, + "loss": 0.0049, + "step": 18675 + }, + { + "epoch": 6.97, + "grad_norm": 0.8982906341552734, + "learning_rate": 8.171557788944724e-06, + "loss": 0.0051, + "step": 18700 + }, + { + "epoch": 6.98, + "grad_norm": 0.8606371879577637, + "learning_rate": 8.169045226130654e-06, + "loss": 0.0047, + "step": 18725 + }, + { + "epoch": 6.99, + "grad_norm": 0.8528621196746826, + "learning_rate": 8.166532663316583e-06, + "loss": 0.0046, + "step": 18750 + }, + { + "epoch": 7.0, + "grad_norm": 0.5060096979141235, + "learning_rate": 8.164020100502514e-06, + "loss": 0.0051, + "step": 18775 + }, + { + "epoch": 7.01, + "grad_norm": 0.7054618000984192, + "learning_rate": 8.161507537688443e-06, + "loss": 0.003, + "step": 18800 + }, + { + "epoch": 7.02, + "grad_norm": 0.7383275032043457, + "learning_rate": 8.158994974874373e-06, + "loss": 0.003, + "step": 18825 + }, + { + "epoch": 7.03, + "grad_norm": 0.554303765296936, + "learning_rate": 8.156482412060302e-06, + "loss": 0.0031, + "step": 18850 + }, + { + "epoch": 7.04, + "grad_norm": 0.6616830229759216, + "learning_rate": 8.153969849246231e-06, + "loss": 0.0029, + "step": 18875 + }, + { + "epoch": 7.05, + "grad_norm": 0.6225329041481018, + "learning_rate": 8.151457286432162e-06, + "loss": 0.0032, + "step": 18900 + }, + { + "epoch": 7.06, + "grad_norm": 0.6179950833320618, + "learning_rate": 8.148944723618092e-06, + "loss": 0.0031, + "step": 18925 + }, + { + "epoch": 7.07, + "grad_norm": 0.3907131254673004, + "learning_rate": 8.146432160804021e-06, + "loss": 0.003, + "step": 18950 + }, + { + "epoch": 7.07, + "grad_norm": 1.0759258270263672, + "learning_rate": 8.14391959798995e-06, + "loss": 0.0032, + "step": 18975 + }, + { + "epoch": 7.08, + "grad_norm": 0.8064579963684082, + "learning_rate": 8.14140703517588e-06, + "loss": 0.0036, + "step": 19000 + }, + { + "epoch": 7.08, + "eval_loss": 0.11219295114278793, + "eval_runtime": 2311.2685, + "eval_samples_per_second": 0.649, + "eval_steps_per_second": 0.649, + "eval_wer": 19.953791220331862, + "step": 19000 + }, + { + "epoch": 7.09, + "grad_norm": 1.443750262260437, + "learning_rate": 8.13889447236181e-06, + "loss": 0.003, + "step": 19025 + }, + { + "epoch": 7.1, + "grad_norm": 0.7455021739006042, + "learning_rate": 8.13638190954774e-06, + "loss": 0.0031, + "step": 19050 + }, + { + "epoch": 7.11, + "grad_norm": 0.7175447344779968, + "learning_rate": 8.13386934673367e-06, + "loss": 0.0027, + "step": 19075 + }, + { + "epoch": 7.12, + "grad_norm": 0.9356100559234619, + "learning_rate": 8.131356783919598e-06, + "loss": 0.0029, + "step": 19100 + }, + { + "epoch": 7.13, + "grad_norm": 0.782476544380188, + "learning_rate": 8.128844221105528e-06, + "loss": 0.0025, + "step": 19125 + }, + { + "epoch": 7.14, + "grad_norm": 0.4530758559703827, + "learning_rate": 8.126331658291457e-06, + "loss": 0.0034, + "step": 19150 + }, + { + "epoch": 7.15, + "grad_norm": 0.7667471766471863, + "learning_rate": 8.123819095477388e-06, + "loss": 0.003, + "step": 19175 + }, + { + "epoch": 7.16, + "grad_norm": 1.1156351566314697, + "learning_rate": 8.121306532663317e-06, + "loss": 0.0028, + "step": 19200 + }, + { + "epoch": 7.17, + "grad_norm": 0.7230924963951111, + "learning_rate": 8.118793969849247e-06, + "loss": 0.0032, + "step": 19225 + }, + { + "epoch": 7.18, + "grad_norm": 0.7898219227790833, + "learning_rate": 8.116281407035178e-06, + "loss": 0.0033, + "step": 19250 + }, + { + "epoch": 7.19, + "grad_norm": 0.404952734708786, + "learning_rate": 8.113768844221105e-06, + "loss": 0.0038, + "step": 19275 + }, + { + "epoch": 7.2, + "grad_norm": 1.0400466918945312, + "learning_rate": 8.111256281407036e-06, + "loss": 0.0032, + "step": 19300 + }, + { + "epoch": 7.21, + "grad_norm": 0.7444201707839966, + "learning_rate": 8.108743718592966e-06, + "loss": 0.0033, + "step": 19325 + }, + { + "epoch": 7.21, + "grad_norm": 1.4148002862930298, + "learning_rate": 8.106231155778895e-06, + "loss": 0.0032, + "step": 19350 + }, + { + "epoch": 7.22, + "grad_norm": 1.0600794553756714, + "learning_rate": 8.103718592964824e-06, + "loss": 0.0035, + "step": 19375 + }, + { + "epoch": 7.23, + "grad_norm": 0.8777855634689331, + "learning_rate": 8.101206030150754e-06, + "loss": 0.0035, + "step": 19400 + }, + { + "epoch": 7.24, + "grad_norm": 0.7739002108573914, + "learning_rate": 8.098693467336685e-06, + "loss": 0.0035, + "step": 19425 + }, + { + "epoch": 7.25, + "grad_norm": 1.1467006206512451, + "learning_rate": 8.096180904522614e-06, + "loss": 0.0039, + "step": 19450 + }, + { + "epoch": 7.26, + "grad_norm": 0.8238193988800049, + "learning_rate": 8.093668341708543e-06, + "loss": 0.0032, + "step": 19475 + }, + { + "epoch": 7.27, + "grad_norm": 0.5104302167892456, + "learning_rate": 8.091155778894473e-06, + "loss": 0.0032, + "step": 19500 + }, + { + "epoch": 7.28, + "grad_norm": 0.703056275844574, + "learning_rate": 8.088643216080404e-06, + "loss": 0.0043, + "step": 19525 + }, + { + "epoch": 7.29, + "grad_norm": 0.9131667017936707, + "learning_rate": 8.086130653266331e-06, + "loss": 0.0037, + "step": 19550 + }, + { + "epoch": 7.3, + "grad_norm": 1.048483967781067, + "learning_rate": 8.083618090452262e-06, + "loss": 0.0035, + "step": 19575 + }, + { + "epoch": 7.31, + "grad_norm": 1.099968433380127, + "learning_rate": 8.081105527638192e-06, + "loss": 0.0035, + "step": 19600 + }, + { + "epoch": 7.32, + "grad_norm": 0.9601157307624817, + "learning_rate": 8.078592964824121e-06, + "loss": 0.0037, + "step": 19625 + }, + { + "epoch": 7.33, + "grad_norm": 1.477525234222412, + "learning_rate": 8.076080402010052e-06, + "loss": 0.0044, + "step": 19650 + }, + { + "epoch": 7.34, + "grad_norm": 0.8411869406700134, + "learning_rate": 8.07356783919598e-06, + "loss": 0.0041, + "step": 19675 + }, + { + "epoch": 7.35, + "grad_norm": 0.6640581488609314, + "learning_rate": 8.071155778894473e-06, + "loss": 0.0037, + "step": 19700 + }, + { + "epoch": 7.35, + "grad_norm": 0.5342215299606323, + "learning_rate": 8.068643216080402e-06, + "loss": 0.0036, + "step": 19725 + }, + { + "epoch": 7.36, + "grad_norm": 1.0184751749038696, + "learning_rate": 8.066130653266332e-06, + "loss": 0.0044, + "step": 19750 + }, + { + "epoch": 7.37, + "grad_norm": 0.8682355284690857, + "learning_rate": 8.063618090452263e-06, + "loss": 0.0042, + "step": 19775 + }, + { + "epoch": 7.38, + "grad_norm": 0.5461545586585999, + "learning_rate": 8.061105527638192e-06, + "loss": 0.004, + "step": 19800 + }, + { + "epoch": 7.39, + "grad_norm": 0.5802323818206787, + "learning_rate": 8.058592964824121e-06, + "loss": 0.0034, + "step": 19825 + }, + { + "epoch": 7.4, + "grad_norm": 0.5620261430740356, + "learning_rate": 8.05608040201005e-06, + "loss": 0.0037, + "step": 19850 + }, + { + "epoch": 7.41, + "grad_norm": 0.6866189241409302, + "learning_rate": 8.05356783919598e-06, + "loss": 0.0039, + "step": 19875 + }, + { + "epoch": 7.42, + "grad_norm": 0.7501659989356995, + "learning_rate": 8.051055276381911e-06, + "loss": 0.0035, + "step": 19900 + }, + { + "epoch": 7.43, + "grad_norm": 0.9695099592208862, + "learning_rate": 8.04854271356784e-06, + "loss": 0.0031, + "step": 19925 + }, + { + "epoch": 7.44, + "grad_norm": 0.35447439551353455, + "learning_rate": 8.04603015075377e-06, + "loss": 0.0032, + "step": 19950 + }, + { + "epoch": 7.45, + "grad_norm": 0.3252246677875519, + "learning_rate": 8.043517587939699e-06, + "loss": 0.0032, + "step": 19975 + }, + { + "epoch": 7.46, + "grad_norm": 1.2554246187210083, + "learning_rate": 8.041005025125628e-06, + "loss": 0.0035, + "step": 20000 + }, + { + "epoch": 7.46, + "eval_loss": 0.11424605548381805, + "eval_runtime": 2250.5807, + "eval_samples_per_second": 0.666, + "eval_steps_per_second": 0.666, + "eval_wer": 20.366869705243996, + "step": 20000 + }, + { + "epoch": 7.47, + "grad_norm": 0.9622787833213806, + "learning_rate": 8.03849246231156e-06, + "loss": 0.0039, + "step": 20025 + }, + { + "epoch": 7.48, + "grad_norm": 0.7185646891593933, + "learning_rate": 8.035979899497489e-06, + "loss": 0.0039, + "step": 20050 + }, + { + "epoch": 7.49, + "grad_norm": 1.0995391607284546, + "learning_rate": 8.033467336683418e-06, + "loss": 0.0035, + "step": 20075 + }, + { + "epoch": 7.49, + "grad_norm": 0.5398934483528137, + "learning_rate": 8.030954773869347e-06, + "loss": 0.0039, + "step": 20100 + }, + { + "epoch": 7.5, + "grad_norm": 0.9111922383308411, + "learning_rate": 8.028442211055277e-06, + "loss": 0.0034, + "step": 20125 + }, + { + "epoch": 7.51, + "grad_norm": 0.9583398699760437, + "learning_rate": 8.025929648241206e-06, + "loss": 0.0046, + "step": 20150 + }, + { + "epoch": 7.52, + "grad_norm": 0.9034853577613831, + "learning_rate": 8.023417085427137e-06, + "loss": 0.0037, + "step": 20175 + }, + { + "epoch": 7.53, + "grad_norm": 0.5930396318435669, + "learning_rate": 8.020904522613066e-06, + "loss": 0.0035, + "step": 20200 + }, + { + "epoch": 7.54, + "grad_norm": 0.6869273781776428, + "learning_rate": 8.018391959798996e-06, + "loss": 0.0034, + "step": 20225 + }, + { + "epoch": 7.55, + "grad_norm": 0.2264496386051178, + "learning_rate": 8.015879396984927e-06, + "loss": 0.0033, + "step": 20250 + }, + { + "epoch": 7.56, + "grad_norm": 0.7419082522392273, + "learning_rate": 8.013366834170854e-06, + "loss": 0.003, + "step": 20275 + }, + { + "epoch": 7.57, + "grad_norm": 0.58115154504776, + "learning_rate": 8.010854271356785e-06, + "loss": 0.0039, + "step": 20300 + }, + { + "epoch": 7.58, + "grad_norm": 0.7609200477600098, + "learning_rate": 8.008341708542714e-06, + "loss": 0.0034, + "step": 20325 + }, + { + "epoch": 7.59, + "grad_norm": 1.1072801351547241, + "learning_rate": 8.005829145728644e-06, + "loss": 0.0038, + "step": 20350 + }, + { + "epoch": 7.6, + "grad_norm": 1.3812626600265503, + "learning_rate": 8.003316582914573e-06, + "loss": 0.0037, + "step": 20375 + }, + { + "epoch": 7.61, + "grad_norm": 0.9944515228271484, + "learning_rate": 8.000804020100502e-06, + "loss": 0.0043, + "step": 20400 + }, + { + "epoch": 7.62, + "grad_norm": 0.942705512046814, + "learning_rate": 7.998291457286432e-06, + "loss": 0.0039, + "step": 20425 + }, + { + "epoch": 7.62, + "grad_norm": 1.0177104473114014, + "learning_rate": 7.995778894472363e-06, + "loss": 0.0034, + "step": 20450 + }, + { + "epoch": 7.63, + "grad_norm": 0.6853436231613159, + "learning_rate": 7.993266331658292e-06, + "loss": 0.0035, + "step": 20475 + }, + { + "epoch": 7.64, + "grad_norm": 1.3809473514556885, + "learning_rate": 7.990753768844221e-06, + "loss": 0.0035, + "step": 20500 + }, + { + "epoch": 7.65, + "grad_norm": 1.09543776512146, + "learning_rate": 7.988241206030152e-06, + "loss": 0.0043, + "step": 20525 + }, + { + "epoch": 7.66, + "grad_norm": 1.022188425064087, + "learning_rate": 7.98572864321608e-06, + "loss": 0.0048, + "step": 20550 + }, + { + "epoch": 7.67, + "grad_norm": 0.9681338667869568, + "learning_rate": 7.983216080402011e-06, + "loss": 0.0047, + "step": 20575 + }, + { + "epoch": 7.68, + "grad_norm": 1.2067780494689941, + "learning_rate": 7.98070351758794e-06, + "loss": 0.0037, + "step": 20600 + }, + { + "epoch": 7.69, + "grad_norm": 0.49820420145988464, + "learning_rate": 7.97819095477387e-06, + "loss": 0.0035, + "step": 20625 + }, + { + "epoch": 7.7, + "grad_norm": 0.5052514672279358, + "learning_rate": 7.975678391959799e-06, + "loss": 0.0041, + "step": 20650 + }, + { + "epoch": 7.71, + "grad_norm": 1.0826096534729004, + "learning_rate": 7.973165829145728e-06, + "loss": 0.0028, + "step": 20675 + }, + { + "epoch": 7.72, + "grad_norm": 0.5142925381660461, + "learning_rate": 7.97065326633166e-06, + "loss": 0.003, + "step": 20700 + }, + { + "epoch": 7.73, + "grad_norm": 1.1938055753707886, + "learning_rate": 7.968140703517589e-06, + "loss": 0.0038, + "step": 20725 + }, + { + "epoch": 7.74, + "grad_norm": 0.907762885093689, + "learning_rate": 7.965628140703518e-06, + "loss": 0.0039, + "step": 20750 + }, + { + "epoch": 7.75, + "grad_norm": 1.2881709337234497, + "learning_rate": 7.963115577889447e-06, + "loss": 0.004, + "step": 20775 + }, + { + "epoch": 7.76, + "grad_norm": 1.1500924825668335, + "learning_rate": 7.960603015075378e-06, + "loss": 0.0039, + "step": 20800 + }, + { + "epoch": 7.76, + "grad_norm": 0.7658544778823853, + "learning_rate": 7.958090452261306e-06, + "loss": 0.0031, + "step": 20825 + }, + { + "epoch": 7.77, + "grad_norm": 0.7022196054458618, + "learning_rate": 7.955577889447237e-06, + "loss": 0.004, + "step": 20850 + }, + { + "epoch": 7.78, + "grad_norm": 0.7589901089668274, + "learning_rate": 7.953065326633166e-06, + "loss": 0.0042, + "step": 20875 + }, + { + "epoch": 7.79, + "grad_norm": 0.7022444009780884, + "learning_rate": 7.950552763819096e-06, + "loss": 0.0032, + "step": 20900 + }, + { + "epoch": 7.8, + "grad_norm": 1.0723148584365845, + "learning_rate": 7.948040201005027e-06, + "loss": 0.0039, + "step": 20925 + }, + { + "epoch": 7.81, + "grad_norm": 0.5118756890296936, + "learning_rate": 7.945527638190954e-06, + "loss": 0.0037, + "step": 20950 + }, + { + "epoch": 7.82, + "grad_norm": 0.6073394417762756, + "learning_rate": 7.943015075376885e-06, + "loss": 0.0037, + "step": 20975 + }, + { + "epoch": 7.83, + "grad_norm": 1.1849571466445923, + "learning_rate": 7.940502512562815e-06, + "loss": 0.0042, + "step": 21000 + }, + { + "epoch": 7.83, + "eval_loss": 0.1132931336760521, + "eval_runtime": 2237.5638, + "eval_samples_per_second": 0.67, + "eval_steps_per_second": 0.67, + "eval_wer": 20.933977455716587, + "step": 21000 + }, + { + "epoch": 7.84, + "grad_norm": 0.7310337424278259, + "learning_rate": 7.937989949748744e-06, + "loss": 0.0036, + "step": 21025 + }, + { + "epoch": 7.85, + "grad_norm": 0.9675549268722534, + "learning_rate": 7.935477386934673e-06, + "loss": 0.0042, + "step": 21050 + }, + { + "epoch": 7.86, + "grad_norm": 0.7477439641952515, + "learning_rate": 7.933065326633167e-06, + "loss": 0.004, + "step": 21075 + }, + { + "epoch": 7.87, + "grad_norm": 0.751980721950531, + "learning_rate": 7.930552763819096e-06, + "loss": 0.0041, + "step": 21100 + }, + { + "epoch": 7.88, + "grad_norm": 0.9874993562698364, + "learning_rate": 7.928040201005027e-06, + "loss": 0.0036, + "step": 21125 + }, + { + "epoch": 7.89, + "grad_norm": 0.24822227656841278, + "learning_rate": 7.925527638190955e-06, + "loss": 0.0039, + "step": 21150 + }, + { + "epoch": 7.9, + "grad_norm": 0.8088057041168213, + "learning_rate": 7.923015075376886e-06, + "loss": 0.0039, + "step": 21175 + }, + { + "epoch": 7.9, + "grad_norm": 0.7441015243530273, + "learning_rate": 7.920502512562815e-06, + "loss": 0.0039, + "step": 21200 + }, + { + "epoch": 7.91, + "grad_norm": 0.548628568649292, + "learning_rate": 7.917989949748744e-06, + "loss": 0.004, + "step": 21225 + }, + { + "epoch": 7.92, + "grad_norm": 0.6077334880828857, + "learning_rate": 7.915477386934674e-06, + "loss": 0.0039, + "step": 21250 + }, + { + "epoch": 7.93, + "grad_norm": 0.9279152750968933, + "learning_rate": 7.912964824120603e-06, + "loss": 0.0044, + "step": 21275 + }, + { + "epoch": 7.94, + "grad_norm": 1.196356177330017, + "learning_rate": 7.910452261306534e-06, + "loss": 0.0043, + "step": 21300 + }, + { + "epoch": 7.95, + "grad_norm": 0.8726195096969604, + "learning_rate": 7.907939698492463e-06, + "loss": 0.0046, + "step": 21325 + }, + { + "epoch": 7.96, + "grad_norm": 0.7140462398529053, + "learning_rate": 7.905427135678393e-06, + "loss": 0.0036, + "step": 21350 + }, + { + "epoch": 7.97, + "grad_norm": 0.8200076818466187, + "learning_rate": 7.902914572864322e-06, + "loss": 0.0042, + "step": 21375 + }, + { + "epoch": 7.98, + "grad_norm": 1.0330411195755005, + "learning_rate": 7.900402010050253e-06, + "loss": 0.0034, + "step": 21400 + }, + { + "epoch": 7.99, + "grad_norm": 0.8223716616630554, + "learning_rate": 7.89788944723618e-06, + "loss": 0.0034, + "step": 21425 + }, + { + "epoch": 8.0, + "grad_norm": 0.5562478303909302, + "learning_rate": 7.895376884422111e-06, + "loss": 0.0037, + "step": 21450 + }, + { + "epoch": 8.01, + "grad_norm": 0.7021026015281677, + "learning_rate": 7.89286432160804e-06, + "loss": 0.0029, + "step": 21475 + }, + { + "epoch": 8.02, + "grad_norm": 0.8235811591148376, + "learning_rate": 7.89035175879397e-06, + "loss": 0.0022, + "step": 21500 + }, + { + "epoch": 8.03, + "grad_norm": 0.7300966382026672, + "learning_rate": 7.887839195979901e-06, + "loss": 0.0023, + "step": 21525 + }, + { + "epoch": 8.04, + "grad_norm": 0.469328910112381, + "learning_rate": 7.885326633165829e-06, + "loss": 0.0023, + "step": 21550 + }, + { + "epoch": 8.04, + "grad_norm": 1.2920448780059814, + "learning_rate": 7.88281407035176e-06, + "loss": 0.0026, + "step": 21575 + }, + { + "epoch": 8.05, + "grad_norm": 0.6624780893325806, + "learning_rate": 7.880301507537689e-06, + "loss": 0.0026, + "step": 21600 + }, + { + "epoch": 8.06, + "grad_norm": 0.544865608215332, + "learning_rate": 7.877788944723618e-06, + "loss": 0.0025, + "step": 21625 + }, + { + "epoch": 8.07, + "grad_norm": 0.38802388310432434, + "learning_rate": 7.875276381909548e-06, + "loss": 0.0019, + "step": 21650 + }, + { + "epoch": 8.08, + "grad_norm": 1.013079285621643, + "learning_rate": 7.872763819095479e-06, + "loss": 0.0021, + "step": 21675 + }, + { + "epoch": 8.09, + "grad_norm": 0.7242569327354431, + "learning_rate": 7.870251256281408e-06, + "loss": 0.0023, + "step": 21700 + }, + { + "epoch": 8.1, + "grad_norm": 0.6507025957107544, + "learning_rate": 7.867738693467337e-06, + "loss": 0.0021, + "step": 21725 + }, + { + "epoch": 8.11, + "grad_norm": 0.4160756468772888, + "learning_rate": 7.865226130653267e-06, + "loss": 0.0028, + "step": 21750 + }, + { + "epoch": 8.12, + "grad_norm": 0.3500078022480011, + "learning_rate": 7.862713567839196e-06, + "loss": 0.0028, + "step": 21775 + }, + { + "epoch": 8.13, + "grad_norm": 0.7190316319465637, + "learning_rate": 7.860201005025127e-06, + "loss": 0.0029, + "step": 21800 + }, + { + "epoch": 8.14, + "grad_norm": 0.6250056028366089, + "learning_rate": 7.857688442211055e-06, + "loss": 0.0028, + "step": 21825 + }, + { + "epoch": 8.15, + "grad_norm": 0.27939143776893616, + "learning_rate": 7.855175879396986e-06, + "loss": 0.0028, + "step": 21850 + }, + { + "epoch": 8.16, + "grad_norm": 1.0892853736877441, + "learning_rate": 7.852663316582915e-06, + "loss": 0.0029, + "step": 21875 + }, + { + "epoch": 8.17, + "grad_norm": 0.9106313586235046, + "learning_rate": 7.850150753768844e-06, + "loss": 0.0032, + "step": 21900 + }, + { + "epoch": 8.17, + "grad_norm": 0.7758311033248901, + "learning_rate": 7.847638190954775e-06, + "loss": 0.0026, + "step": 21925 + }, + { + "epoch": 8.18, + "grad_norm": 0.23125924170017242, + "learning_rate": 7.845125628140705e-06, + "loss": 0.0027, + "step": 21950 + }, + { + "epoch": 8.19, + "grad_norm": 0.7388342618942261, + "learning_rate": 7.842613065326634e-06, + "loss": 0.0031, + "step": 21975 + }, + { + "epoch": 8.2, + "grad_norm": 0.5707415342330933, + "learning_rate": 7.840100502512563e-06, + "loss": 0.003, + "step": 22000 + }, + { + "epoch": 8.2, + "eval_loss": 0.11463221907615662, + "eval_runtime": 2225.8174, + "eval_samples_per_second": 0.674, + "eval_steps_per_second": 0.674, + "eval_wer": 21.17202268431002, + "step": 22000 + }, + { + "epoch": 8.21, + "grad_norm": 0.5890433192253113, + "learning_rate": 7.837587939698493e-06, + "loss": 0.0025, + "step": 22025 + }, + { + "epoch": 8.22, + "grad_norm": 0.6282503008842468, + "learning_rate": 7.835075376884422e-06, + "loss": 0.0028, + "step": 22050 + }, + { + "epoch": 8.23, + "grad_norm": 0.970411479473114, + "learning_rate": 7.832562814070353e-06, + "loss": 0.0029, + "step": 22075 + }, + { + "epoch": 8.24, + "grad_norm": 0.3961445391178131, + "learning_rate": 7.830050251256282e-06, + "loss": 0.0026, + "step": 22100 + }, + { + "epoch": 8.25, + "grad_norm": 0.6173611879348755, + "learning_rate": 7.827537688442212e-06, + "loss": 0.0027, + "step": 22125 + }, + { + "epoch": 8.26, + "grad_norm": 0.4521360397338867, + "learning_rate": 7.825025125628141e-06, + "loss": 0.0023, + "step": 22150 + }, + { + "epoch": 8.27, + "grad_norm": 0.5012291669845581, + "learning_rate": 7.82251256281407e-06, + "loss": 0.0026, + "step": 22175 + }, + { + "epoch": 8.28, + "grad_norm": 0.688684344291687, + "learning_rate": 7.820000000000001e-06, + "loss": 0.0036, + "step": 22200 + }, + { + "epoch": 8.29, + "grad_norm": 0.643620491027832, + "learning_rate": 7.81748743718593e-06, + "loss": 0.002, + "step": 22225 + }, + { + "epoch": 8.3, + "grad_norm": 1.2066946029663086, + "learning_rate": 7.81497487437186e-06, + "loss": 0.0024, + "step": 22250 + }, + { + "epoch": 8.31, + "grad_norm": 0.5416052937507629, + "learning_rate": 7.81246231155779e-06, + "loss": 0.0026, + "step": 22275 + }, + { + "epoch": 8.31, + "grad_norm": 0.6430708765983582, + "learning_rate": 7.809949748743719e-06, + "loss": 0.0025, + "step": 22300 + }, + { + "epoch": 8.32, + "grad_norm": 0.6785486936569214, + "learning_rate": 7.80743718592965e-06, + "loss": 0.0031, + "step": 22325 + }, + { + "epoch": 8.33, + "grad_norm": 1.0664644241333008, + "learning_rate": 7.804924623115579e-06, + "loss": 0.0026, + "step": 22350 + }, + { + "epoch": 8.34, + "grad_norm": 0.5900282859802246, + "learning_rate": 7.802412060301508e-06, + "loss": 0.002, + "step": 22375 + }, + { + "epoch": 8.35, + "grad_norm": 0.998041570186615, + "learning_rate": 7.799899497487438e-06, + "loss": 0.0026, + "step": 22400 + }, + { + "epoch": 8.36, + "grad_norm": 0.6270501017570496, + "learning_rate": 7.797386934673367e-06, + "loss": 0.0029, + "step": 22425 + }, + { + "epoch": 8.37, + "grad_norm": 0.7496184706687927, + "learning_rate": 7.794874371859296e-06, + "loss": 0.0034, + "step": 22450 + }, + { + "epoch": 8.38, + "grad_norm": 0.6309234499931335, + "learning_rate": 7.792361809045227e-06, + "loss": 0.0031, + "step": 22475 + }, + { + "epoch": 8.39, + "grad_norm": 0.5981239080429077, + "learning_rate": 7.789849246231157e-06, + "loss": 0.0033, + "step": 22500 + }, + { + "epoch": 8.4, + "grad_norm": 0.7531147003173828, + "learning_rate": 7.787336683417086e-06, + "loss": 0.0028, + "step": 22525 + }, + { + "epoch": 8.41, + "grad_norm": 0.46384957432746887, + "learning_rate": 7.784824120603017e-06, + "loss": 0.0026, + "step": 22550 + }, + { + "epoch": 8.42, + "grad_norm": 0.8862837553024292, + "learning_rate": 7.782311557788945e-06, + "loss": 0.0037, + "step": 22575 + }, + { + "epoch": 8.43, + "grad_norm": 0.7283981442451477, + "learning_rate": 7.779798994974876e-06, + "loss": 0.0032, + "step": 22600 + }, + { + "epoch": 8.44, + "grad_norm": 0.7909378409385681, + "learning_rate": 7.777286432160805e-06, + "loss": 0.0033, + "step": 22625 + }, + { + "epoch": 8.45, + "grad_norm": 0.44968992471694946, + "learning_rate": 7.774773869346734e-06, + "loss": 0.0034, + "step": 22650 + }, + { + "epoch": 8.45, + "grad_norm": 0.9901082515716553, + "learning_rate": 7.772261306532664e-06, + "loss": 0.0026, + "step": 22675 + }, + { + "epoch": 8.46, + "grad_norm": 0.6165784001350403, + "learning_rate": 7.769748743718593e-06, + "loss": 0.0025, + "step": 22700 + }, + { + "epoch": 8.47, + "grad_norm": 0.46937233209609985, + "learning_rate": 7.767236180904522e-06, + "loss": 0.0031, + "step": 22725 + }, + { + "epoch": 8.48, + "grad_norm": 0.1812770515680313, + "learning_rate": 7.764723618090453e-06, + "loss": 0.0026, + "step": 22750 + }, + { + "epoch": 8.49, + "grad_norm": 0.6221500039100647, + "learning_rate": 7.762211055276383e-06, + "loss": 0.0036, + "step": 22775 + }, + { + "epoch": 8.5, + "grad_norm": 0.7196748852729797, + "learning_rate": 7.759698492462312e-06, + "loss": 0.0027, + "step": 22800 + }, + { + "epoch": 8.51, + "grad_norm": 0.7320850491523743, + "learning_rate": 7.757185929648243e-06, + "loss": 0.0033, + "step": 22825 + }, + { + "epoch": 8.52, + "grad_norm": 0.8708633780479431, + "learning_rate": 7.75467336683417e-06, + "loss": 0.0033, + "step": 22850 + }, + { + "epoch": 8.53, + "grad_norm": 0.7438067197799683, + "learning_rate": 7.752160804020102e-06, + "loss": 0.0032, + "step": 22875 + }, + { + "epoch": 8.54, + "grad_norm": 1.1094307899475098, + "learning_rate": 7.749648241206031e-06, + "loss": 0.0033, + "step": 22900 + }, + { + "epoch": 8.55, + "grad_norm": 0.8764252066612244, + "learning_rate": 7.74713567839196e-06, + "loss": 0.0033, + "step": 22925 + }, + { + "epoch": 8.56, + "grad_norm": 0.7047215104103088, + "learning_rate": 7.744623115577891e-06, + "loss": 0.0036, + "step": 22950 + }, + { + "epoch": 8.57, + "grad_norm": 0.507269024848938, + "learning_rate": 7.742110552763819e-06, + "loss": 0.0028, + "step": 22975 + }, + { + "epoch": 8.58, + "grad_norm": 0.8872129321098328, + "learning_rate": 7.73959798994975e-06, + "loss": 0.0031, + "step": 23000 + }, + { + "epoch": 8.58, + "eval_loss": 0.12000907212495804, + "eval_runtime": 2228.1818, + "eval_samples_per_second": 0.673, + "eval_steps_per_second": 0.673, + "eval_wer": 20.933977455716587, + "step": 23000 + }, + { + "epoch": 8.59, + "grad_norm": 1.0686720609664917, + "learning_rate": 7.73708542713568e-06, + "loss": 0.0036, + "step": 23025 + }, + { + "epoch": 8.59, + "grad_norm": 0.35889431834220886, + "learning_rate": 7.734572864321609e-06, + "loss": 0.0028, + "step": 23050 + }, + { + "epoch": 8.6, + "grad_norm": 0.35856014490127563, + "learning_rate": 7.732060301507538e-06, + "loss": 0.0029, + "step": 23075 + }, + { + "epoch": 8.61, + "grad_norm": 0.8146241307258606, + "learning_rate": 7.729547738693469e-06, + "loss": 0.0029, + "step": 23100 + }, + { + "epoch": 8.62, + "grad_norm": 0.6128034591674805, + "learning_rate": 7.727035175879396e-06, + "loss": 0.0029, + "step": 23125 + }, + { + "epoch": 8.63, + "grad_norm": 0.6472421288490295, + "learning_rate": 7.724522613065328e-06, + "loss": 0.0032, + "step": 23150 + }, + { + "epoch": 8.64, + "grad_norm": 0.852741539478302, + "learning_rate": 7.722010050251257e-06, + "loss": 0.0033, + "step": 23175 + }, + { + "epoch": 8.65, + "grad_norm": 0.756095290184021, + "learning_rate": 7.719497487437186e-06, + "loss": 0.0027, + "step": 23200 + }, + { + "epoch": 8.66, + "grad_norm": 0.7446980476379395, + "learning_rate": 7.716984924623117e-06, + "loss": 0.0029, + "step": 23225 + }, + { + "epoch": 8.67, + "grad_norm": 0.35505443811416626, + "learning_rate": 7.714472361809045e-06, + "loss": 0.0028, + "step": 23250 + }, + { + "epoch": 8.68, + "grad_norm": 0.6885910630226135, + "learning_rate": 7.711959798994976e-06, + "loss": 0.0027, + "step": 23275 + }, + { + "epoch": 8.69, + "grad_norm": 1.1037429571151733, + "learning_rate": 7.709447236180905e-06, + "loss": 0.0034, + "step": 23300 + }, + { + "epoch": 8.7, + "grad_norm": 0.6643605828285217, + "learning_rate": 7.706934673366834e-06, + "loss": 0.0028, + "step": 23325 + }, + { + "epoch": 8.71, + "grad_norm": 0.8734392523765564, + "learning_rate": 7.704422110552764e-06, + "loss": 0.0032, + "step": 23350 + }, + { + "epoch": 8.72, + "grad_norm": 0.6964989900588989, + "learning_rate": 7.701909547738695e-06, + "loss": 0.0033, + "step": 23375 + }, + { + "epoch": 8.72, + "grad_norm": 0.8545100688934326, + "learning_rate": 7.699396984924624e-06, + "loss": 0.0032, + "step": 23400 + }, + { + "epoch": 8.73, + "grad_norm": 0.3230343759059906, + "learning_rate": 7.696884422110553e-06, + "loss": 0.0035, + "step": 23425 + }, + { + "epoch": 8.74, + "grad_norm": 0.2485278844833374, + "learning_rate": 7.694371859296483e-06, + "loss": 0.0032, + "step": 23450 + }, + { + "epoch": 8.75, + "grad_norm": 0.8558734655380249, + "learning_rate": 7.691859296482412e-06, + "loss": 0.0032, + "step": 23475 + }, + { + "epoch": 8.76, + "grad_norm": 0.687673807144165, + "learning_rate": 7.689346733668343e-06, + "loss": 0.0031, + "step": 23500 + }, + { + "epoch": 8.77, + "grad_norm": 1.0531641244888306, + "learning_rate": 7.68683417085427e-06, + "loss": 0.0029, + "step": 23525 + }, + { + "epoch": 8.78, + "grad_norm": 0.8428241610527039, + "learning_rate": 7.684321608040202e-06, + "loss": 0.0033, + "step": 23550 + }, + { + "epoch": 8.79, + "grad_norm": 0.7222681045532227, + "learning_rate": 7.681809045226131e-06, + "loss": 0.003, + "step": 23575 + }, + { + "epoch": 8.8, + "grad_norm": 0.7146342396736145, + "learning_rate": 7.67929648241206e-06, + "loss": 0.0031, + "step": 23600 + }, + { + "epoch": 8.81, + "grad_norm": 0.576969563961029, + "learning_rate": 7.676783919597991e-06, + "loss": 0.0032, + "step": 23625 + }, + { + "epoch": 8.82, + "grad_norm": 0.6906344890594482, + "learning_rate": 7.67427135678392e-06, + "loss": 0.0032, + "step": 23650 + }, + { + "epoch": 8.83, + "grad_norm": 0.7604743242263794, + "learning_rate": 7.671859296482412e-06, + "loss": 0.0036, + "step": 23675 + }, + { + "epoch": 8.84, + "grad_norm": 0.4674755930900574, + "learning_rate": 7.669346733668343e-06, + "loss": 0.0034, + "step": 23700 + }, + { + "epoch": 8.85, + "grad_norm": 1.027595043182373, + "learning_rate": 7.666834170854271e-06, + "loss": 0.003, + "step": 23725 + }, + { + "epoch": 8.86, + "grad_norm": 0.34596532583236694, + "learning_rate": 7.664321608040202e-06, + "loss": 0.0029, + "step": 23750 + }, + { + "epoch": 8.86, + "grad_norm": 0.6402170658111572, + "learning_rate": 7.661809045226131e-06, + "loss": 0.0033, + "step": 23775 + }, + { + "epoch": 8.87, + "grad_norm": 0.3813222050666809, + "learning_rate": 7.65929648241206e-06, + "loss": 0.0031, + "step": 23800 + }, + { + "epoch": 8.88, + "grad_norm": 1.1988670825958252, + "learning_rate": 7.656783919597992e-06, + "loss": 0.003, + "step": 23825 + }, + { + "epoch": 8.89, + "grad_norm": 0.37532904744148254, + "learning_rate": 7.65427135678392e-06, + "loss": 0.0033, + "step": 23850 + }, + { + "epoch": 8.9, + "grad_norm": 0.8171999454498291, + "learning_rate": 7.65175879396985e-06, + "loss": 0.0034, + "step": 23875 + }, + { + "epoch": 8.91, + "grad_norm": 0.8571725487709045, + "learning_rate": 7.64924623115578e-06, + "loss": 0.0032, + "step": 23900 + }, + { + "epoch": 8.92, + "grad_norm": 0.39908072352409363, + "learning_rate": 7.646733668341709e-06, + "loss": 0.0031, + "step": 23925 + }, + { + "epoch": 8.93, + "grad_norm": 0.522006094455719, + "learning_rate": 7.644221105527638e-06, + "loss": 0.0029, + "step": 23950 + }, + { + "epoch": 8.94, + "grad_norm": 0.6860712766647339, + "learning_rate": 7.64170854271357e-06, + "loss": 0.0029, + "step": 23975 + }, + { + "epoch": 8.95, + "grad_norm": 0.7376927137374878, + "learning_rate": 7.639195979899499e-06, + "loss": 0.0036, + "step": 24000 + }, + { + "epoch": 8.95, + "eval_loss": 0.11920194327831268, + "eval_runtime": 2597.976, + "eval_samples_per_second": 0.577, + "eval_steps_per_second": 0.577, + "eval_wer": 20.85696282293636, + "step": 24000 + }, + { + "epoch": 8.96, + "grad_norm": 1.2151786088943481, + "learning_rate": 7.636683417085428e-06, + "loss": 0.0031, + "step": 24025 + }, + { + "epoch": 8.97, + "grad_norm": 0.5957273840904236, + "learning_rate": 7.634170854271357e-06, + "loss": 0.0032, + "step": 24050 + }, + { + "epoch": 8.98, + "grad_norm": 0.9559849500656128, + "learning_rate": 7.631658291457287e-06, + "loss": 0.0029, + "step": 24075 + }, + { + "epoch": 8.99, + "grad_norm": 0.6935826539993286, + "learning_rate": 7.629145728643217e-06, + "loss": 0.0034, + "step": 24100 + }, + { + "epoch": 9.0, + "grad_norm": 0.4264591634273529, + "learning_rate": 7.626633165829146e-06, + "loss": 0.0029, + "step": 24125 + }, + { + "epoch": 9.0, + "grad_norm": 0.36418071389198303, + "learning_rate": 7.624120603015076e-06, + "loss": 0.0025, + "step": 24150 + }, + { + "epoch": 9.01, + "grad_norm": 0.6766936182975769, + "learning_rate": 7.621608040201006e-06, + "loss": 0.0022, + "step": 24175 + }, + { + "epoch": 9.02, + "grad_norm": 0.6072465777397156, + "learning_rate": 7.619095477386935e-06, + "loss": 0.0018, + "step": 24200 + }, + { + "epoch": 9.03, + "grad_norm": 0.27691781520843506, + "learning_rate": 7.616582914572865e-06, + "loss": 0.002, + "step": 24225 + }, + { + "epoch": 9.04, + "grad_norm": 0.8497575521469116, + "learning_rate": 7.614070351758794e-06, + "loss": 0.002, + "step": 24250 + }, + { + "epoch": 9.05, + "grad_norm": 0.6827080845832825, + "learning_rate": 7.6115577889447245e-06, + "loss": 0.0017, + "step": 24275 + }, + { + "epoch": 9.06, + "grad_norm": 0.6206353306770325, + "learning_rate": 7.609045226130654e-06, + "loss": 0.0021, + "step": 24300 + }, + { + "epoch": 9.07, + "grad_norm": 0.9590671062469482, + "learning_rate": 7.606532663316584e-06, + "loss": 0.0017, + "step": 24325 + }, + { + "epoch": 9.08, + "grad_norm": 1.1309138536453247, + "learning_rate": 7.6040201005025125e-06, + "loss": 0.0025, + "step": 24350 + }, + { + "epoch": 9.09, + "grad_norm": 0.7747287750244141, + "learning_rate": 7.601507537688443e-06, + "loss": 0.0019, + "step": 24375 + }, + { + "epoch": 9.1, + "grad_norm": 0.872003972530365, + "learning_rate": 7.598994974874373e-06, + "loss": 0.0022, + "step": 24400 + }, + { + "epoch": 9.11, + "grad_norm": 0.3777676522731781, + "learning_rate": 7.596482412060302e-06, + "loss": 0.002, + "step": 24425 + }, + { + "epoch": 9.12, + "grad_norm": 0.4657047390937805, + "learning_rate": 7.593969849246232e-06, + "loss": 0.002, + "step": 24450 + }, + { + "epoch": 9.13, + "grad_norm": 0.7994202971458435, + "learning_rate": 7.591457286432161e-06, + "loss": 0.0021, + "step": 24475 + }, + { + "epoch": 9.13, + "grad_norm": 0.81644606590271, + "learning_rate": 7.588944723618091e-06, + "loss": 0.0016, + "step": 24500 + }, + { + "epoch": 9.14, + "grad_norm": 0.13512270152568817, + "learning_rate": 7.58643216080402e-06, + "loss": 0.002, + "step": 24525 + }, + { + "epoch": 9.15, + "grad_norm": 0.4120093882083893, + "learning_rate": 7.5839195979899505e-06, + "loss": 0.0027, + "step": 24550 + }, + { + "epoch": 9.16, + "grad_norm": 0.3551156520843506, + "learning_rate": 7.58140703517588e-06, + "loss": 0.0018, + "step": 24575 + }, + { + "epoch": 9.17, + "grad_norm": 1.3830488920211792, + "learning_rate": 7.57889447236181e-06, + "loss": 0.0025, + "step": 24600 + }, + { + "epoch": 9.18, + "grad_norm": 0.7109416723251343, + "learning_rate": 7.57638190954774e-06, + "loss": 0.0024, + "step": 24625 + }, + { + "epoch": 9.19, + "grad_norm": 0.6290833353996277, + "learning_rate": 7.573869346733669e-06, + "loss": 0.002, + "step": 24650 + }, + { + "epoch": 9.2, + "grad_norm": 0.8846685886383057, + "learning_rate": 7.571356783919599e-06, + "loss": 0.0024, + "step": 24675 + }, + { + "epoch": 9.21, + "grad_norm": 0.6995250582695007, + "learning_rate": 7.568844221105528e-06, + "loss": 0.0023, + "step": 24700 + }, + { + "epoch": 9.22, + "grad_norm": 0.47895458340644836, + "learning_rate": 7.566331658291458e-06, + "loss": 0.0022, + "step": 24725 + }, + { + "epoch": 9.23, + "grad_norm": 0.4684296250343323, + "learning_rate": 7.563819095477387e-06, + "loss": 0.0021, + "step": 24750 + }, + { + "epoch": 9.24, + "grad_norm": 0.8366491794586182, + "learning_rate": 7.561306532663317e-06, + "loss": 0.0026, + "step": 24775 + }, + { + "epoch": 9.25, + "grad_norm": 0.6313892006874084, + "learning_rate": 7.558793969849247e-06, + "loss": 0.0021, + "step": 24800 + }, + { + "epoch": 9.26, + "grad_norm": 0.7972798347473145, + "learning_rate": 7.556281407035176e-06, + "loss": 0.002, + "step": 24825 + }, + { + "epoch": 9.27, + "grad_norm": 0.6378545165061951, + "learning_rate": 7.5537688442211066e-06, + "loss": 0.0018, + "step": 24850 + }, + { + "epoch": 9.27, + "grad_norm": 0.5369585156440735, + "learning_rate": 7.551256281407036e-06, + "loss": 0.0026, + "step": 24875 + }, + { + "epoch": 9.28, + "grad_norm": 0.3946349620819092, + "learning_rate": 7.548743718592966e-06, + "loss": 0.0021, + "step": 24900 + }, + { + "epoch": 9.29, + "grad_norm": 0.7439101338386536, + "learning_rate": 7.5462311557788945e-06, + "loss": 0.0024, + "step": 24925 + }, + { + "epoch": 9.3, + "grad_norm": 1.0787619352340698, + "learning_rate": 7.543718592964825e-06, + "loss": 0.0027, + "step": 24950 + }, + { + "epoch": 9.31, + "grad_norm": 0.44602033495903015, + "learning_rate": 7.541206030150754e-06, + "loss": 0.0022, + "step": 24975 + }, + { + "epoch": 9.32, + "grad_norm": 0.46719348430633545, + "learning_rate": 7.538693467336684e-06, + "loss": 0.0026, + "step": 25000 + }, + { + "epoch": 9.32, + "eval_loss": 0.1235472708940506, + "eval_runtime": 2240.7244, + "eval_samples_per_second": 0.669, + "eval_steps_per_second": 0.669, + "eval_wer": 20.28985507246377, + "step": 25000 + }, + { + "epoch": 9.33, + "grad_norm": 0.4324175715446472, + "learning_rate": 7.536180904522614e-06, + "loss": 0.003, + "step": 25025 + }, + { + "epoch": 9.34, + "grad_norm": 0.9558855891227722, + "learning_rate": 7.533668341708543e-06, + "loss": 0.0026, + "step": 25050 + }, + { + "epoch": 9.35, + "grad_norm": 0.51041579246521, + "learning_rate": 7.531155778894473e-06, + "loss": 0.0026, + "step": 25075 + }, + { + "epoch": 9.36, + "grad_norm": 0.6973673701286316, + "learning_rate": 7.528643216080402e-06, + "loss": 0.0025, + "step": 25100 + }, + { + "epoch": 9.37, + "grad_norm": 0.5147315859794617, + "learning_rate": 7.5261306532663325e-06, + "loss": 0.0028, + "step": 25125 + }, + { + "epoch": 9.38, + "grad_norm": 0.9491252303123474, + "learning_rate": 7.523618090452262e-06, + "loss": 0.0023, + "step": 25150 + }, + { + "epoch": 9.39, + "grad_norm": 0.6976878046989441, + "learning_rate": 7.521105527638192e-06, + "loss": 0.0025, + "step": 25175 + }, + { + "epoch": 9.4, + "grad_norm": 0.7121328711509705, + "learning_rate": 7.5185929648241205e-06, + "loss": 0.0032, + "step": 25200 + }, + { + "epoch": 9.41, + "grad_norm": 0.8452693819999695, + "learning_rate": 7.516080402010051e-06, + "loss": 0.0027, + "step": 25225 + }, + { + "epoch": 9.41, + "grad_norm": 0.7852888703346252, + "learning_rate": 7.513567839195981e-06, + "loss": 0.0022, + "step": 25250 + }, + { + "epoch": 9.42, + "grad_norm": 0.6372689604759216, + "learning_rate": 7.51105527638191e-06, + "loss": 0.003, + "step": 25275 + }, + { + "epoch": 9.43, + "grad_norm": 0.698472797870636, + "learning_rate": 7.50854271356784e-06, + "loss": 0.0027, + "step": 25300 + }, + { + "epoch": 9.44, + "grad_norm": 1.0388001203536987, + "learning_rate": 7.506030150753769e-06, + "loss": 0.0027, + "step": 25325 + }, + { + "epoch": 9.45, + "grad_norm": 0.8022056818008423, + "learning_rate": 7.503517587939699e-06, + "loss": 0.0026, + "step": 25350 + }, + { + "epoch": 9.46, + "grad_norm": 0.6779054403305054, + "learning_rate": 7.501005025125628e-06, + "loss": 0.003, + "step": 25375 + }, + { + "epoch": 9.47, + "grad_norm": 0.5311080813407898, + "learning_rate": 7.4984924623115585e-06, + "loss": 0.0024, + "step": 25400 + }, + { + "epoch": 9.48, + "grad_norm": 0.47882595658302307, + "learning_rate": 7.495979899497488e-06, + "loss": 0.0027, + "step": 25425 + }, + { + "epoch": 9.49, + "grad_norm": 0.7868289351463318, + "learning_rate": 7.493467336683418e-06, + "loss": 0.002, + "step": 25450 + }, + { + "epoch": 9.5, + "grad_norm": 0.6130481958389282, + "learning_rate": 7.490954773869348e-06, + "loss": 0.0027, + "step": 25475 + }, + { + "epoch": 9.51, + "grad_norm": 0.9351800680160522, + "learning_rate": 7.488442211055277e-06, + "loss": 0.0031, + "step": 25500 + }, + { + "epoch": 9.52, + "grad_norm": 0.642665445804596, + "learning_rate": 7.485929648241207e-06, + "loss": 0.0023, + "step": 25525 + }, + { + "epoch": 9.53, + "grad_norm": 0.6717450618743896, + "learning_rate": 7.483417085427136e-06, + "loss": 0.0027, + "step": 25550 + }, + { + "epoch": 9.54, + "grad_norm": 0.34949925541877747, + "learning_rate": 7.480904522613066e-06, + "loss": 0.0024, + "step": 25575 + }, + { + "epoch": 9.55, + "grad_norm": 0.37889760732650757, + "learning_rate": 7.478391959798995e-06, + "loss": 0.0025, + "step": 25600 + }, + { + "epoch": 9.55, + "grad_norm": 0.7171522974967957, + "learning_rate": 7.475879396984925e-06, + "loss": 0.003, + "step": 25625 + }, + { + "epoch": 9.56, + "grad_norm": 0.5108080506324768, + "learning_rate": 7.473366834170855e-06, + "loss": 0.0031, + "step": 25650 + }, + { + "epoch": 9.57, + "grad_norm": 0.6153137683868408, + "learning_rate": 7.470854271356784e-06, + "loss": 0.0023, + "step": 25675 + }, + { + "epoch": 9.58, + "grad_norm": 0.7465835809707642, + "learning_rate": 7.4683417085427146e-06, + "loss": 0.0031, + "step": 25700 + }, + { + "epoch": 9.59, + "grad_norm": 0.4158353805541992, + "learning_rate": 7.465829145728644e-06, + "loss": 0.0028, + "step": 25725 + }, + { + "epoch": 9.6, + "grad_norm": 0.15867747366428375, + "learning_rate": 7.463316582914574e-06, + "loss": 0.0026, + "step": 25750 + }, + { + "epoch": 9.61, + "grad_norm": 0.9674032330513, + "learning_rate": 7.4608040201005025e-06, + "loss": 0.0027, + "step": 25775 + }, + { + "epoch": 9.62, + "grad_norm": 0.24024401605129242, + "learning_rate": 7.458291457286433e-06, + "loss": 0.0027, + "step": 25800 + }, + { + "epoch": 9.63, + "grad_norm": 0.9952046275138855, + "learning_rate": 7.455778894472362e-06, + "loss": 0.0027, + "step": 25825 + }, + { + "epoch": 9.64, + "grad_norm": 0.8376314640045166, + "learning_rate": 7.453266331658292e-06, + "loss": 0.0022, + "step": 25850 + }, + { + "epoch": 9.65, + "grad_norm": 0.6554194092750549, + "learning_rate": 7.450753768844222e-06, + "loss": 0.0025, + "step": 25875 + }, + { + "epoch": 9.66, + "grad_norm": 0.57167649269104, + "learning_rate": 7.448241206030151e-06, + "loss": 0.0026, + "step": 25900 + }, + { + "epoch": 9.67, + "grad_norm": 0.9849819540977478, + "learning_rate": 7.445728643216081e-06, + "loss": 0.0023, + "step": 25925 + }, + { + "epoch": 9.68, + "grad_norm": 0.580420196056366, + "learning_rate": 7.44321608040201e-06, + "loss": 0.0026, + "step": 25950 + }, + { + "epoch": 9.68, + "grad_norm": 1.1368515491485596, + "learning_rate": 7.4407035175879405e-06, + "loss": 0.0024, + "step": 25975 + }, + { + "epoch": 9.69, + "grad_norm": 0.6638866662979126, + "learning_rate": 7.43819095477387e-06, + "loss": 0.0026, + "step": 26000 + }, + { + "epoch": 9.69, + "eval_loss": 0.12459346652030945, + "eval_runtime": 2250.9129, + "eval_samples_per_second": 0.666, + "eval_steps_per_second": 0.666, + "eval_wer": 20.331863053980257, + "step": 26000 + }, + { + "epoch": 9.7, + "grad_norm": 0.5731566548347473, + "learning_rate": 7.4356783919598e-06, + "loss": 0.0025, + "step": 26025 + }, + { + "epoch": 9.71, + "grad_norm": 0.6382424831390381, + "learning_rate": 7.4331658291457285e-06, + "loss": 0.0023, + "step": 26050 + }, + { + "epoch": 9.72, + "grad_norm": 0.6052413582801819, + "learning_rate": 7.430653266331659e-06, + "loss": 0.0033, + "step": 26075 + }, + { + "epoch": 9.73, + "grad_norm": 1.007561445236206, + "learning_rate": 7.428140703517589e-06, + "loss": 0.003, + "step": 26100 + }, + { + "epoch": 9.74, + "grad_norm": 0.7845065593719482, + "learning_rate": 7.425628140703518e-06, + "loss": 0.0037, + "step": 26125 + }, + { + "epoch": 9.75, + "grad_norm": 0.6885499358177185, + "learning_rate": 7.423115577889448e-06, + "loss": 0.0028, + "step": 26150 + }, + { + "epoch": 9.76, + "grad_norm": 0.47259077429771423, + "learning_rate": 7.420603015075377e-06, + "loss": 0.0026, + "step": 26175 + }, + { + "epoch": 9.77, + "grad_norm": 0.5333340167999268, + "learning_rate": 7.418090452261307e-06, + "loss": 0.0027, + "step": 26200 + }, + { + "epoch": 9.78, + "grad_norm": 0.6758116483688354, + "learning_rate": 7.415678391959799e-06, + "loss": 0.003, + "step": 26225 + }, + { + "epoch": 9.79, + "grad_norm": 0.4132218062877655, + "learning_rate": 7.4131658291457295e-06, + "loss": 0.0026, + "step": 26250 + }, + { + "epoch": 9.8, + "grad_norm": 0.9175272583961487, + "learning_rate": 7.410653266331659e-06, + "loss": 0.0031, + "step": 26275 + }, + { + "epoch": 9.81, + "grad_norm": 0.84358811378479, + "learning_rate": 7.408140703517589e-06, + "loss": 0.0026, + "step": 26300 + }, + { + "epoch": 9.82, + "grad_norm": 0.7115461826324463, + "learning_rate": 7.405628140703518e-06, + "loss": 0.0035, + "step": 26325 + }, + { + "epoch": 9.82, + "grad_norm": 0.8064160346984863, + "learning_rate": 7.4031155778894485e-06, + "loss": 0.0031, + "step": 26350 + }, + { + "epoch": 9.83, + "grad_norm": 0.563237726688385, + "learning_rate": 7.400603015075377e-06, + "loss": 0.0026, + "step": 26375 + }, + { + "epoch": 9.84, + "grad_norm": 1.4972032308578491, + "learning_rate": 7.398090452261307e-06, + "loss": 0.0031, + "step": 26400 + }, + { + "epoch": 9.85, + "grad_norm": 0.8694472312927246, + "learning_rate": 7.3955778894472365e-06, + "loss": 0.0027, + "step": 26425 + }, + { + "epoch": 9.86, + "grad_norm": 0.8762236833572388, + "learning_rate": 7.393065326633167e-06, + "loss": 0.0029, + "step": 26450 + }, + { + "epoch": 9.87, + "grad_norm": 0.7527874112129211, + "learning_rate": 7.390552763819097e-06, + "loss": 0.0028, + "step": 26475 + }, + { + "epoch": 9.88, + "grad_norm": 0.745526134967804, + "learning_rate": 7.388040201005025e-06, + "loss": 0.0027, + "step": 26500 + }, + { + "epoch": 9.89, + "grad_norm": 0.2051459103822708, + "learning_rate": 7.3855276381909555e-06, + "loss": 0.0023, + "step": 26525 + }, + { + "epoch": 9.9, + "grad_norm": 0.6855773329734802, + "learning_rate": 7.383015075376885e-06, + "loss": 0.0036, + "step": 26550 + }, + { + "epoch": 9.91, + "grad_norm": 0.3529352843761444, + "learning_rate": 7.380502512562815e-06, + "loss": 0.0029, + "step": 26575 + }, + { + "epoch": 9.92, + "grad_norm": 0.7308042645454407, + "learning_rate": 7.377989949748744e-06, + "loss": 0.0029, + "step": 26600 + }, + { + "epoch": 9.93, + "grad_norm": 1.082743763923645, + "learning_rate": 7.3754773869346745e-06, + "loss": 0.0033, + "step": 26625 + }, + { + "epoch": 9.94, + "grad_norm": 1.1484291553497314, + "learning_rate": 7.372964824120603e-06, + "loss": 0.003, + "step": 26650 + }, + { + "epoch": 9.95, + "grad_norm": 0.35545462369918823, + "learning_rate": 7.370452261306533e-06, + "loss": 0.0026, + "step": 26675 + }, + { + "epoch": 9.96, + "grad_norm": 0.710952639579773, + "learning_rate": 7.367939698492463e-06, + "loss": 0.0029, + "step": 26700 + }, + { + "epoch": 9.96, + "grad_norm": 1.0012743473052979, + "learning_rate": 7.365427135678393e-06, + "loss": 0.0027, + "step": 26725 + }, + { + "epoch": 9.97, + "grad_norm": 0.8366944789886475, + "learning_rate": 7.362914572864323e-06, + "loss": 0.0027, + "step": 26750 + }, + { + "epoch": 9.98, + "grad_norm": 0.7722690105438232, + "learning_rate": 7.360402010050251e-06, + "loss": 0.003, + "step": 26775 + }, + { + "epoch": 9.99, + "grad_norm": 0.2665681540966034, + "learning_rate": 7.357889447236181e-06, + "loss": 0.0027, + "step": 26800 + }, + { + "epoch": 10.0, + "grad_norm": 0.9175248146057129, + "learning_rate": 7.355376884422111e-06, + "loss": 0.0028, + "step": 26825 + }, + { + "epoch": 10.01, + "grad_norm": 0.15538986027240753, + "learning_rate": 7.352864321608041e-06, + "loss": 0.0019, + "step": 26850 + }, + { + "epoch": 10.02, + "grad_norm": 0.46411919593811035, + "learning_rate": 7.350351758793971e-06, + "loss": 0.0018, + "step": 26875 + }, + { + "epoch": 10.03, + "grad_norm": 0.5720204710960388, + "learning_rate": 7.3478391959799e-06, + "loss": 0.0017, + "step": 26900 + }, + { + "epoch": 10.04, + "grad_norm": 0.6277053356170654, + "learning_rate": 7.3453266331658306e-06, + "loss": 0.0017, + "step": 26925 + }, + { + "epoch": 10.05, + "grad_norm": 0.749801754951477, + "learning_rate": 7.342814070351759e-06, + "loss": 0.0021, + "step": 26950 + }, + { + "epoch": 10.06, + "grad_norm": 1.4902451038360596, + "learning_rate": 7.340301507537689e-06, + "loss": 0.0021, + "step": 26975 + }, + { + "epoch": 10.07, + "grad_norm": 0.6082587838172913, + "learning_rate": 7.3377889447236185e-06, + "loss": 0.002, + "step": 27000 + }, + { + "epoch": 10.07, + "eval_loss": 0.11978112906217575, + "eval_runtime": 2259.2276, + "eval_samples_per_second": 0.664, + "eval_steps_per_second": 0.664, + "eval_wer": 20.471889659035217, + "step": 27000 + }, + { + "epoch": 10.08, + "grad_norm": 0.832643985748291, + "learning_rate": 7.335276381909549e-06, + "loss": 0.0016, + "step": 27025 + }, + { + "epoch": 10.09, + "grad_norm": 0.2771880626678467, + "learning_rate": 7.332763819095477e-06, + "loss": 0.0018, + "step": 27050 + }, + { + "epoch": 10.1, + "grad_norm": 0.6638143658638, + "learning_rate": 7.330251256281407e-06, + "loss": 0.0019, + "step": 27075 + }, + { + "epoch": 10.1, + "grad_norm": 0.1923985481262207, + "learning_rate": 7.3277386934673375e-06, + "loss": 0.0016, + "step": 27100 + }, + { + "epoch": 10.11, + "grad_norm": 0.2749788165092468, + "learning_rate": 7.325226130653267e-06, + "loss": 0.0019, + "step": 27125 + }, + { + "epoch": 10.12, + "grad_norm": 0.7522522807121277, + "learning_rate": 7.322713567839197e-06, + "loss": 0.0018, + "step": 27150 + }, + { + "epoch": 10.13, + "grad_norm": 0.6832616329193115, + "learning_rate": 7.320201005025126e-06, + "loss": 0.0018, + "step": 27175 + }, + { + "epoch": 10.14, + "grad_norm": 0.7304291129112244, + "learning_rate": 7.3176884422110565e-06, + "loss": 0.002, + "step": 27200 + }, + { + "epoch": 10.15, + "grad_norm": 1.1205469369888306, + "learning_rate": 7.315175879396985e-06, + "loss": 0.0021, + "step": 27225 + }, + { + "epoch": 10.16, + "grad_norm": 0.6146823167800903, + "learning_rate": 7.312663316582915e-06, + "loss": 0.0019, + "step": 27250 + }, + { + "epoch": 10.17, + "grad_norm": 0.535991370677948, + "learning_rate": 7.3101507537688445e-06, + "loss": 0.0025, + "step": 27275 + }, + { + "epoch": 10.18, + "grad_norm": 0.47560831904411316, + "learning_rate": 7.307638190954775e-06, + "loss": 0.0021, + "step": 27300 + }, + { + "epoch": 10.19, + "grad_norm": 0.7862025499343872, + "learning_rate": 7.305125628140705e-06, + "loss": 0.0021, + "step": 27325 + }, + { + "epoch": 10.2, + "grad_norm": 0.7947404980659485, + "learning_rate": 7.302613065326633e-06, + "loss": 0.002, + "step": 27350 + }, + { + "epoch": 10.21, + "grad_norm": 0.8369277119636536, + "learning_rate": 7.3001005025125635e-06, + "loss": 0.0021, + "step": 27375 + }, + { + "epoch": 10.22, + "grad_norm": 0.8190271258354187, + "learning_rate": 7.297587939698493e-06, + "loss": 0.0019, + "step": 27400 + }, + { + "epoch": 10.23, + "grad_norm": 0.23659798502922058, + "learning_rate": 7.295075376884423e-06, + "loss": 0.0018, + "step": 27425 + }, + { + "epoch": 10.23, + "grad_norm": 0.551953911781311, + "learning_rate": 7.292562814070352e-06, + "loss": 0.0017, + "step": 27450 + }, + { + "epoch": 10.24, + "grad_norm": 1.1152583360671997, + "learning_rate": 7.2900502512562825e-06, + "loss": 0.0019, + "step": 27475 + }, + { + "epoch": 10.25, + "grad_norm": 1.0111933946609497, + "learning_rate": 7.287537688442211e-06, + "loss": 0.0022, + "step": 27500 + }, + { + "epoch": 10.26, + "grad_norm": 0.49843910336494446, + "learning_rate": 7.285025125628141e-06, + "loss": 0.0023, + "step": 27525 + }, + { + "epoch": 10.27, + "grad_norm": 0.45180103182792664, + "learning_rate": 7.282512562814071e-06, + "loss": 0.0022, + "step": 27550 + }, + { + "epoch": 10.28, + "grad_norm": 0.5013245940208435, + "learning_rate": 7.280000000000001e-06, + "loss": 0.0022, + "step": 27575 + }, + { + "epoch": 10.29, + "grad_norm": 0.7200151681900024, + "learning_rate": 7.277487437185931e-06, + "loss": 0.0021, + "step": 27600 + }, + { + "epoch": 10.3, + "grad_norm": 0.8902664184570312, + "learning_rate": 7.274974874371859e-06, + "loss": 0.0029, + "step": 27625 + }, + { + "epoch": 10.31, + "grad_norm": 0.9237521290779114, + "learning_rate": 7.272462311557789e-06, + "loss": 0.0023, + "step": 27650 + }, + { + "epoch": 10.32, + "grad_norm": 0.67653888463974, + "learning_rate": 7.269949748743719e-06, + "loss": 0.0021, + "step": 27675 + }, + { + "epoch": 10.33, + "grad_norm": 0.5429190397262573, + "learning_rate": 7.267437185929649e-06, + "loss": 0.0018, + "step": 27700 + }, + { + "epoch": 10.34, + "grad_norm": 0.46214979887008667, + "learning_rate": 7.264924623115579e-06, + "loss": 0.0018, + "step": 27725 + }, + { + "epoch": 10.35, + "grad_norm": 0.7237080931663513, + "learning_rate": 7.262412060301508e-06, + "loss": 0.002, + "step": 27750 + }, + { + "epoch": 10.36, + "grad_norm": 0.2808217704296112, + "learning_rate": 7.259899497487439e-06, + "loss": 0.002, + "step": 27775 + }, + { + "epoch": 10.37, + "grad_norm": 0.3009781837463379, + "learning_rate": 7.257386934673367e-06, + "loss": 0.0025, + "step": 27800 + }, + { + "epoch": 10.37, + "grad_norm": 0.4462699294090271, + "learning_rate": 7.254874371859297e-06, + "loss": 0.0024, + "step": 27825 + }, + { + "epoch": 10.38, + "grad_norm": 0.7285372018814087, + "learning_rate": 7.2523618090452265e-06, + "loss": 0.0024, + "step": 27850 + }, + { + "epoch": 10.39, + "grad_norm": 0.8641657829284668, + "learning_rate": 7.249849246231157e-06, + "loss": 0.0025, + "step": 27875 + }, + { + "epoch": 10.4, + "grad_norm": 0.534547746181488, + "learning_rate": 7.247336683417085e-06, + "loss": 0.002, + "step": 27900 + }, + { + "epoch": 10.41, + "grad_norm": 0.2854542136192322, + "learning_rate": 7.244824120603015e-06, + "loss": 0.0025, + "step": 27925 + }, + { + "epoch": 10.42, + "grad_norm": 0.4530239403247833, + "learning_rate": 7.2423115577889455e-06, + "loss": 0.0023, + "step": 27950 + }, + { + "epoch": 10.43, + "grad_norm": 0.2864063084125519, + "learning_rate": 7.239798994974875e-06, + "loss": 0.0027, + "step": 27975 + }, + { + "epoch": 10.44, + "grad_norm": 0.6051667928695679, + "learning_rate": 7.237286432160805e-06, + "loss": 0.0029, + "step": 28000 + }, + { + "epoch": 10.44, + "eval_loss": 0.12297185510396957, + "eval_runtime": 2247.1739, + "eval_samples_per_second": 0.668, + "eval_steps_per_second": 0.668, + "eval_wer": 20.485892319540714, + "step": 28000 + }, + { + "epoch": 10.45, + "grad_norm": 0.4759089946746826, + "learning_rate": 7.234773869346734e-06, + "loss": 0.002, + "step": 28025 + }, + { + "epoch": 10.46, + "grad_norm": 0.40071722865104675, + "learning_rate": 7.2322613065326645e-06, + "loss": 0.0027, + "step": 28050 + }, + { + "epoch": 10.47, + "grad_norm": 0.7381801605224609, + "learning_rate": 7.229748743718593e-06, + "loss": 0.0024, + "step": 28075 + }, + { + "epoch": 10.48, + "grad_norm": 0.7796308398246765, + "learning_rate": 7.227236180904523e-06, + "loss": 0.0017, + "step": 28100 + }, + { + "epoch": 10.49, + "grad_norm": 0.27503687143325806, + "learning_rate": 7.2247236180904525e-06, + "loss": 0.0017, + "step": 28125 + }, + { + "epoch": 10.5, + "grad_norm": 0.5961235165596008, + "learning_rate": 7.222211055276383e-06, + "loss": 0.0022, + "step": 28150 + }, + { + "epoch": 10.51, + "grad_norm": 0.4509584903717041, + "learning_rate": 7.219698492462313e-06, + "loss": 0.0026, + "step": 28175 + }, + { + "epoch": 10.51, + "grad_norm": 0.6932278871536255, + "learning_rate": 7.217185929648241e-06, + "loss": 0.0021, + "step": 28200 + }, + { + "epoch": 10.52, + "grad_norm": 0.4963659644126892, + "learning_rate": 7.2146733668341715e-06, + "loss": 0.0022, + "step": 28225 + }, + { + "epoch": 10.53, + "grad_norm": 0.3931860327720642, + "learning_rate": 7.212160804020101e-06, + "loss": 0.0023, + "step": 28250 + }, + { + "epoch": 10.54, + "grad_norm": 0.6099101305007935, + "learning_rate": 7.209648241206031e-06, + "loss": 0.002, + "step": 28275 + }, + { + "epoch": 10.55, + "grad_norm": 0.5055291056632996, + "learning_rate": 7.20713567839196e-06, + "loss": 0.0021, + "step": 28300 + }, + { + "epoch": 10.56, + "grad_norm": 0.4469512403011322, + "learning_rate": 7.2046231155778905e-06, + "loss": 0.0025, + "step": 28325 + }, + { + "epoch": 10.57, + "grad_norm": 0.6653869152069092, + "learning_rate": 7.20211055276382e-06, + "loss": 0.0021, + "step": 28350 + }, + { + "epoch": 10.58, + "grad_norm": 0.6149924993515015, + "learning_rate": 7.199597989949749e-06, + "loss": 0.0018, + "step": 28375 + }, + { + "epoch": 10.59, + "grad_norm": 0.7183948159217834, + "learning_rate": 7.197085427135679e-06, + "loss": 0.0022, + "step": 28400 + }, + { + "epoch": 10.6, + "grad_norm": 0.5853865742683411, + "learning_rate": 7.194572864321609e-06, + "loss": 0.0022, + "step": 28425 + }, + { + "epoch": 10.61, + "grad_norm": 0.4621337354183197, + "learning_rate": 7.192060301507539e-06, + "loss": 0.0028, + "step": 28450 + }, + { + "epoch": 10.62, + "grad_norm": 0.7207924127578735, + "learning_rate": 7.189547738693467e-06, + "loss": 0.0022, + "step": 28475 + }, + { + "epoch": 10.63, + "grad_norm": 0.5073688626289368, + "learning_rate": 7.187035175879397e-06, + "loss": 0.0024, + "step": 28500 + }, + { + "epoch": 10.64, + "grad_norm": 0.9469834566116333, + "learning_rate": 7.184522613065327e-06, + "loss": 0.0019, + "step": 28525 + }, + { + "epoch": 10.65, + "grad_norm": 0.6493642926216125, + "learning_rate": 7.182010050251257e-06, + "loss": 0.002, + "step": 28550 + }, + { + "epoch": 10.65, + "grad_norm": 0.22303488850593567, + "learning_rate": 7.179497487437187e-06, + "loss": 0.0026, + "step": 28575 + }, + { + "epoch": 10.66, + "grad_norm": 0.4241914749145508, + "learning_rate": 7.176984924623116e-06, + "loss": 0.0022, + "step": 28600 + }, + { + "epoch": 10.67, + "grad_norm": 0.5274967551231384, + "learning_rate": 7.174472361809047e-06, + "loss": 0.0023, + "step": 28625 + }, + { + "epoch": 10.68, + "grad_norm": 0.9440633654594421, + "learning_rate": 7.171959798994975e-06, + "loss": 0.0023, + "step": 28650 + }, + { + "epoch": 10.69, + "grad_norm": 0.9090691208839417, + "learning_rate": 7.169447236180905e-06, + "loss": 0.0025, + "step": 28675 + }, + { + "epoch": 10.7, + "grad_norm": 0.48036059737205505, + "learning_rate": 7.1669346733668345e-06, + "loss": 0.0021, + "step": 28700 + }, + { + "epoch": 10.71, + "grad_norm": 0.7912079095840454, + "learning_rate": 7.164422110552765e-06, + "loss": 0.0023, + "step": 28725 + }, + { + "epoch": 10.72, + "grad_norm": 0.6011233925819397, + "learning_rate": 7.161909547738693e-06, + "loss": 0.0025, + "step": 28750 + }, + { + "epoch": 10.73, + "grad_norm": 0.648358166217804, + "learning_rate": 7.159396984924623e-06, + "loss": 0.0022, + "step": 28775 + }, + { + "epoch": 10.74, + "grad_norm": 0.7206164002418518, + "learning_rate": 7.1568844221105535e-06, + "loss": 0.0024, + "step": 28800 + }, + { + "epoch": 10.75, + "grad_norm": 0.795949399471283, + "learning_rate": 7.154371859296483e-06, + "loss": 0.0027, + "step": 28825 + }, + { + "epoch": 10.76, + "grad_norm": 0.9254144430160522, + "learning_rate": 7.151859296482413e-06, + "loss": 0.002, + "step": 28850 + }, + { + "epoch": 10.77, + "grad_norm": 0.6881835460662842, + "learning_rate": 7.149346733668342e-06, + "loss": 0.0023, + "step": 28875 + }, + { + "epoch": 10.78, + "grad_norm": 0.7478961944580078, + "learning_rate": 7.1468341708542725e-06, + "loss": 0.0021, + "step": 28900 + }, + { + "epoch": 10.78, + "grad_norm": 0.4348626732826233, + "learning_rate": 7.144321608040201e-06, + "loss": 0.0022, + "step": 28925 + }, + { + "epoch": 10.79, + "grad_norm": 0.6980159282684326, + "learning_rate": 7.141809045226131e-06, + "loss": 0.0021, + "step": 28950 + }, + { + "epoch": 10.8, + "grad_norm": 0.5783941149711609, + "learning_rate": 7.139296482412061e-06, + "loss": 0.0026, + "step": 28975 + }, + { + "epoch": 10.81, + "grad_norm": 0.7216029763221741, + "learning_rate": 7.136783919597991e-06, + "loss": 0.0025, + "step": 29000 + }, + { + "epoch": 10.81, + "eval_loss": 0.1270843744277954, + "eval_runtime": 2534.1238, + "eval_samples_per_second": 0.592, + "eval_steps_per_second": 0.592, + "eval_wer": 20.198837779178046, + "step": 29000 + } + ], + "logging_steps": 25, + "max_steps": 100000, + "num_input_tokens_seen": 0, + "num_train_epochs": 38, + "save_steps": 1000, + "total_flos": 2.677379910008832e+20, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/whisper-small/bengali/checkpoint-29000/training_args.bin b/checkpoints/whisper-small/bengali/checkpoint-29000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..58b83ffc0897297ac67d2305d1de8c635e9546ed --- /dev/null +++ b/checkpoints/whisper-small/bengali/checkpoint-29000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb6c4925891dda1e839d45f886691f1be33ea385ab1126415d18695ccf3d707c +size 4667 diff --git a/checkpoints/whisper-small/bhojpuri/checkpoint-14000/config.json b/checkpoints/whisper-small/bhojpuri/checkpoint-14000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8cb289bfc120ad77c5505b0ef210c56bf35075f5 --- /dev/null +++ b/checkpoints/whisper-small/bhojpuri/checkpoint-14000/config.json @@ -0,0 +1,152 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50276 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50360, + 50361, + 50362 + ], + "torch_dtype": "float32", + "transformers_version": "4.39.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoints/whisper-small/bhojpuri/checkpoint-14000/generation_config.json b/checkpoints/whisper-small/bhojpuri/checkpoint-14000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7e9b1a3e3b5fb8d88730860d2b25f6cd310962c7 --- /dev/null +++ b/checkpoints/whisper-small/bhojpuri/checkpoint-14000/generation_config.json @@ -0,0 +1,264 @@ +{ + "alignment_heads": [ + [ + 5, + 3 + ], + [ + 5, + 9 + ], + [ + 8, + 0 + ], + [ + 8, + 4 + ], + [ + 8, + 7 + ], + [ + 8, + 8 + ], + [ + 9, + 0 + ], + [ + 9, + 7 + ], + [ + 9, + 9 + ], + [ + 10, + 5 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.39.2" +} diff --git a/checkpoints/whisper-small/bhojpuri/checkpoint-14000/model.safetensors b/checkpoints/whisper-small/bhojpuri/checkpoint-14000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bb61f41bbd5df9dd4e52c3a387c146a69c3ec4a4 --- /dev/null +++ b/checkpoints/whisper-small/bhojpuri/checkpoint-14000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b273d415a9b49d0b0e1e319189de80d5e8f231062e7f1a73f7bbffc8e4619baa +size 966995080 diff --git a/checkpoints/whisper-small/bhojpuri/checkpoint-14000/optimizer.pt b/checkpoints/whisper-small/bhojpuri/checkpoint-14000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1f3f4ae8a3777176421ace9d60b657f8ea0e91cc --- /dev/null +++ b/checkpoints/whisper-small/bhojpuri/checkpoint-14000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f41e8dc188063f2ba83f75d455b3b39be2edd2607b4270670a1b901a3979689 +size 1925063607 diff --git a/checkpoints/whisper-small/bhojpuri/checkpoint-14000/preprocessor_config.json b/checkpoints/whisper-small/bhojpuri/checkpoint-14000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoints/whisper-small/bhojpuri/checkpoint-14000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoints/whisper-small/bhojpuri/checkpoint-14000/rng_state.pth b/checkpoints/whisper-small/bhojpuri/checkpoint-14000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..cb7356d83149fd32b69a5d86ecad8fd66d70c1b5 --- /dev/null +++ b/checkpoints/whisper-small/bhojpuri/checkpoint-14000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c924e12824025246823df07c6f8add501f4f31a15b652d5b500da4f8ce7a900 +size 14575 diff --git a/checkpoints/whisper-small/bhojpuri/checkpoint-14000/scheduler.pt b/checkpoints/whisper-small/bhojpuri/checkpoint-14000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..abb5755619f4b8239fb006dac73a22880aac55ec --- /dev/null +++ b/checkpoints/whisper-small/bhojpuri/checkpoint-14000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b90abd31e81f687a6b8310cf494b1144560ebd09929c26c01cd8c17d7d89a1b0 +size 627 diff --git a/checkpoints/whisper-small/bhojpuri/checkpoint-14000/trainer_state.json b/checkpoints/whisper-small/bhojpuri/checkpoint-14000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..64b3d52313557c5a54826b274564870b0068f37e --- /dev/null +++ b/checkpoints/whisper-small/bhojpuri/checkpoint-14000/trainer_state.json @@ -0,0 +1,4067 @@ +{ + "best_metric": 16.212092313560937, + "best_model_checkpoint": "results/whisper-small/bhojpuri/checkpoint-4000", + "epoch": 4.701141705842847, + "eval_steps": 1000, + "global_step": 14000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "grad_norm": 38.52085876464844, + "learning_rate": 4.4e-07, + "loss": 2.0864, + "step": 25 + }, + { + "epoch": 0.02, + "grad_norm": 14.146047592163086, + "learning_rate": 9.400000000000001e-07, + "loss": 1.6787, + "step": 50 + }, + { + "epoch": 0.03, + "grad_norm": 7.140896320343018, + "learning_rate": 1.44e-06, + "loss": 1.157, + "step": 75 + }, + { + "epoch": 0.03, + "grad_norm": 6.294301986694336, + "learning_rate": 1.94e-06, + "loss": 0.8862, + "step": 100 + }, + { + "epoch": 0.04, + "grad_norm": 5.385293960571289, + "learning_rate": 2.4400000000000004e-06, + "loss": 0.7589, + "step": 125 + }, + { + "epoch": 0.05, + "grad_norm": 6.019792556762695, + "learning_rate": 2.9400000000000002e-06, + "loss": 0.6783, + "step": 150 + }, + { + "epoch": 0.06, + "grad_norm": 5.567103862762451, + "learning_rate": 3.44e-06, + "loss": 0.6251, + "step": 175 + }, + { + "epoch": 0.07, + "grad_norm": 5.826569080352783, + "learning_rate": 3.94e-06, + "loss": 0.5913, + "step": 200 + }, + { + "epoch": 0.08, + "grad_norm": 5.927578926086426, + "learning_rate": 4.440000000000001e-06, + "loss": 0.5308, + "step": 225 + }, + { + "epoch": 0.08, + "grad_norm": 5.561408996582031, + "learning_rate": 4.94e-06, + "loss": 0.5101, + "step": 250 + }, + { + "epoch": 0.09, + "grad_norm": 5.24352502822876, + "learning_rate": 5.4400000000000004e-06, + "loss": 0.4584, + "step": 275 + }, + { + "epoch": 0.1, + "grad_norm": 5.5875139236450195, + "learning_rate": 5.94e-06, + "loss": 0.4001, + "step": 300 + }, + { + "epoch": 0.11, + "grad_norm": 4.692562103271484, + "learning_rate": 6.440000000000001e-06, + "loss": 0.3525, + "step": 325 + }, + { + "epoch": 0.12, + "grad_norm": 4.438690662384033, + "learning_rate": 6.9400000000000005e-06, + "loss": 0.3175, + "step": 350 + }, + { + "epoch": 0.13, + "grad_norm": 4.259439945220947, + "learning_rate": 7.440000000000001e-06, + "loss": 0.3018, + "step": 375 + }, + { + "epoch": 0.13, + "grad_norm": 3.5590274333953857, + "learning_rate": 7.94e-06, + "loss": 0.2985, + "step": 400 + }, + { + "epoch": 0.14, + "grad_norm": 3.6609902381896973, + "learning_rate": 8.44e-06, + "loss": 0.2848, + "step": 425 + }, + { + "epoch": 0.15, + "grad_norm": 3.56465220451355, + "learning_rate": 8.94e-06, + "loss": 0.2795, + "step": 450 + }, + { + "epoch": 0.16, + "grad_norm": 3.655308485031128, + "learning_rate": 9.440000000000001e-06, + "loss": 0.2807, + "step": 475 + }, + { + "epoch": 0.17, + "grad_norm": 4.50081205368042, + "learning_rate": 9.940000000000001e-06, + "loss": 0.2636, + "step": 500 + }, + { + "epoch": 0.18, + "grad_norm": 3.905694007873535, + "learning_rate": 9.997788944723618e-06, + "loss": 0.2654, + "step": 525 + }, + { + "epoch": 0.18, + "grad_norm": 3.305668354034424, + "learning_rate": 9.99527638190955e-06, + "loss": 0.2554, + "step": 550 + }, + { + "epoch": 0.19, + "grad_norm": 3.679680824279785, + "learning_rate": 9.992763819095477e-06, + "loss": 0.2505, + "step": 575 + }, + { + "epoch": 0.2, + "grad_norm": 3.7736847400665283, + "learning_rate": 9.990251256281408e-06, + "loss": 0.2366, + "step": 600 + }, + { + "epoch": 0.21, + "grad_norm": 3.487788677215576, + "learning_rate": 9.987738693467337e-06, + "loss": 0.2446, + "step": 625 + }, + { + "epoch": 0.22, + "grad_norm": 3.658851146697998, + "learning_rate": 9.985226130653267e-06, + "loss": 0.2432, + "step": 650 + }, + { + "epoch": 0.23, + "grad_norm": 3.0070464611053467, + "learning_rate": 9.982713567839198e-06, + "loss": 0.2241, + "step": 675 + }, + { + "epoch": 0.24, + "grad_norm": 3.4317736625671387, + "learning_rate": 9.980201005025127e-06, + "loss": 0.2282, + "step": 700 + }, + { + "epoch": 0.24, + "grad_norm": 3.389284610748291, + "learning_rate": 9.977688442211056e-06, + "loss": 0.2232, + "step": 725 + }, + { + "epoch": 0.25, + "grad_norm": 4.095077991485596, + "learning_rate": 9.975175879396986e-06, + "loss": 0.2174, + "step": 750 + }, + { + "epoch": 0.26, + "grad_norm": 3.4840753078460693, + "learning_rate": 9.972663316582915e-06, + "loss": 0.2207, + "step": 775 + }, + { + "epoch": 0.27, + "grad_norm": 3.5679471492767334, + "learning_rate": 9.970150753768844e-06, + "loss": 0.2098, + "step": 800 + }, + { + "epoch": 0.28, + "grad_norm": 3.352626323699951, + "learning_rate": 9.967638190954775e-06, + "loss": 0.2125, + "step": 825 + }, + { + "epoch": 0.29, + "grad_norm": 3.4174857139587402, + "learning_rate": 9.965125628140703e-06, + "loss": 0.2092, + "step": 850 + }, + { + "epoch": 0.29, + "grad_norm": 2.9747250080108643, + "learning_rate": 9.962613065326634e-06, + "loss": 0.2069, + "step": 875 + }, + { + "epoch": 0.3, + "grad_norm": 3.6843669414520264, + "learning_rate": 9.960100502512563e-06, + "loss": 0.1968, + "step": 900 + }, + { + "epoch": 0.31, + "grad_norm": 2.9592769145965576, + "learning_rate": 9.957587939698493e-06, + "loss": 0.2069, + "step": 925 + }, + { + "epoch": 0.32, + "grad_norm": 2.8881349563598633, + "learning_rate": 9.955075376884424e-06, + "loss": 0.1973, + "step": 950 + }, + { + "epoch": 0.33, + "grad_norm": 2.9347739219665527, + "learning_rate": 9.952562814070353e-06, + "loss": 0.1861, + "step": 975 + }, + { + "epoch": 0.34, + "grad_norm": 3.145840644836426, + "learning_rate": 9.950050251256282e-06, + "loss": 0.1931, + "step": 1000 + }, + { + "epoch": 0.34, + "eval_loss": 0.14424866437911987, + "eval_runtime": 1239.7062, + "eval_samples_per_second": 1.21, + "eval_steps_per_second": 1.21, + "eval_wer": 20.60525144637294, + "step": 1000 + }, + { + "epoch": 0.34, + "grad_norm": 3.016601085662842, + "learning_rate": 9.947537688442212e-06, + "loss": 0.1973, + "step": 1025 + }, + { + "epoch": 0.35, + "grad_norm": 2.5092906951904297, + "learning_rate": 9.945025125628141e-06, + "loss": 0.1916, + "step": 1050 + }, + { + "epoch": 0.36, + "grad_norm": 2.4269344806671143, + "learning_rate": 9.94251256281407e-06, + "loss": 0.2004, + "step": 1075 + }, + { + "epoch": 0.37, + "grad_norm": 2.499117851257324, + "learning_rate": 9.940000000000001e-06, + "loss": 0.194, + "step": 1100 + }, + { + "epoch": 0.38, + "grad_norm": 2.917263984680176, + "learning_rate": 9.93748743718593e-06, + "loss": 0.192, + "step": 1125 + }, + { + "epoch": 0.39, + "grad_norm": 2.6278860569000244, + "learning_rate": 9.93497487437186e-06, + "loss": 0.181, + "step": 1150 + }, + { + "epoch": 0.39, + "grad_norm": 2.7981085777282715, + "learning_rate": 9.93246231155779e-06, + "loss": 0.1798, + "step": 1175 + }, + { + "epoch": 0.4, + "grad_norm": 2.7456862926483154, + "learning_rate": 9.929949748743719e-06, + "loss": 0.1872, + "step": 1200 + }, + { + "epoch": 0.41, + "grad_norm": 3.088850736618042, + "learning_rate": 9.92743718592965e-06, + "loss": 0.1853, + "step": 1225 + }, + { + "epoch": 0.42, + "grad_norm": 2.890773057937622, + "learning_rate": 9.924924623115579e-06, + "loss": 0.1806, + "step": 1250 + }, + { + "epoch": 0.43, + "grad_norm": 2.7347309589385986, + "learning_rate": 9.922412060301508e-06, + "loss": 0.1779, + "step": 1275 + }, + { + "epoch": 0.44, + "grad_norm": 3.300636053085327, + "learning_rate": 9.91989949748744e-06, + "loss": 0.1891, + "step": 1300 + }, + { + "epoch": 0.44, + "grad_norm": 2.157642364501953, + "learning_rate": 9.917386934673367e-06, + "loss": 0.1821, + "step": 1325 + }, + { + "epoch": 0.45, + "grad_norm": 3.1858136653900146, + "learning_rate": 9.914874371859298e-06, + "loss": 0.1767, + "step": 1350 + }, + { + "epoch": 0.46, + "grad_norm": 2.603768825531006, + "learning_rate": 9.912361809045227e-06, + "loss": 0.1728, + "step": 1375 + }, + { + "epoch": 0.47, + "grad_norm": 2.8543457984924316, + "learning_rate": 9.909849246231157e-06, + "loss": 0.1766, + "step": 1400 + }, + { + "epoch": 0.48, + "grad_norm": 3.005479574203491, + "learning_rate": 9.907336683417086e-06, + "loss": 0.1707, + "step": 1425 + }, + { + "epoch": 0.49, + "grad_norm": 2.75410795211792, + "learning_rate": 9.904824120603015e-06, + "loss": 0.171, + "step": 1450 + }, + { + "epoch": 0.5, + "grad_norm": 3.1915152072906494, + "learning_rate": 9.902311557788945e-06, + "loss": 0.1762, + "step": 1475 + }, + { + "epoch": 0.5, + "grad_norm": 2.781294345855713, + "learning_rate": 9.899798994974876e-06, + "loss": 0.1711, + "step": 1500 + }, + { + "epoch": 0.51, + "grad_norm": 2.6292238235473633, + "learning_rate": 9.897286432160805e-06, + "loss": 0.1698, + "step": 1525 + }, + { + "epoch": 0.52, + "grad_norm": 2.779613733291626, + "learning_rate": 9.894773869346734e-06, + "loss": 0.1788, + "step": 1550 + }, + { + "epoch": 0.53, + "grad_norm": 2.761446952819824, + "learning_rate": 9.892261306532665e-06, + "loss": 0.169, + "step": 1575 + }, + { + "epoch": 0.54, + "grad_norm": 3.148118257522583, + "learning_rate": 9.889748743718593e-06, + "loss": 0.16, + "step": 1600 + }, + { + "epoch": 0.55, + "grad_norm": 2.50022554397583, + "learning_rate": 9.887236180904524e-06, + "loss": 0.1593, + "step": 1625 + }, + { + "epoch": 0.55, + "grad_norm": 3.156440258026123, + "learning_rate": 9.884723618090453e-06, + "loss": 0.1749, + "step": 1650 + }, + { + "epoch": 0.56, + "grad_norm": 2.396620273590088, + "learning_rate": 9.882211055276383e-06, + "loss": 0.1532, + "step": 1675 + }, + { + "epoch": 0.57, + "grad_norm": 2.9509918689727783, + "learning_rate": 9.879698492462312e-06, + "loss": 0.1607, + "step": 1700 + }, + { + "epoch": 0.58, + "grad_norm": 2.490330934524536, + "learning_rate": 9.877185929648241e-06, + "loss": 0.1563, + "step": 1725 + }, + { + "epoch": 0.59, + "grad_norm": 2.764500617980957, + "learning_rate": 9.874673366834172e-06, + "loss": 0.1653, + "step": 1750 + }, + { + "epoch": 0.6, + "grad_norm": 2.679232358932495, + "learning_rate": 9.872160804020102e-06, + "loss": 0.1642, + "step": 1775 + }, + { + "epoch": 0.6, + "grad_norm": 2.5432581901550293, + "learning_rate": 9.869648241206031e-06, + "loss": 0.1532, + "step": 1800 + }, + { + "epoch": 0.61, + "grad_norm": 2.749798059463501, + "learning_rate": 9.86713567839196e-06, + "loss": 0.1597, + "step": 1825 + }, + { + "epoch": 0.62, + "grad_norm": 2.4122934341430664, + "learning_rate": 9.864623115577891e-06, + "loss": 0.1559, + "step": 1850 + }, + { + "epoch": 0.63, + "grad_norm": 2.518965721130371, + "learning_rate": 9.862110552763819e-06, + "loss": 0.1581, + "step": 1875 + }, + { + "epoch": 0.64, + "grad_norm": 2.848419427871704, + "learning_rate": 9.85959798994975e-06, + "loss": 0.1589, + "step": 1900 + }, + { + "epoch": 0.65, + "grad_norm": 2.7931413650512695, + "learning_rate": 9.85708542713568e-06, + "loss": 0.1557, + "step": 1925 + }, + { + "epoch": 0.65, + "grad_norm": 2.6249282360076904, + "learning_rate": 9.854572864321609e-06, + "loss": 0.1552, + "step": 1950 + }, + { + "epoch": 0.66, + "grad_norm": 3.054999351501465, + "learning_rate": 9.85206030150754e-06, + "loss": 0.1515, + "step": 1975 + }, + { + "epoch": 0.67, + "grad_norm": 3.0409722328186035, + "learning_rate": 9.849547738693467e-06, + "loss": 0.1547, + "step": 2000 + }, + { + "epoch": 0.67, + "eval_loss": 0.12614324688911438, + "eval_runtime": 1209.1148, + "eval_samples_per_second": 1.241, + "eval_steps_per_second": 1.241, + "eval_wer": 18.32920083921419, + "step": 2000 + }, + { + "epoch": 0.68, + "grad_norm": 2.980689287185669, + "learning_rate": 9.847035175879398e-06, + "loss": 0.151, + "step": 2025 + }, + { + "epoch": 0.69, + "grad_norm": 2.4195737838745117, + "learning_rate": 9.844522613065328e-06, + "loss": 0.1552, + "step": 2050 + }, + { + "epoch": 0.7, + "grad_norm": 2.7788686752319336, + "learning_rate": 9.842010050251257e-06, + "loss": 0.1526, + "step": 2075 + }, + { + "epoch": 0.71, + "grad_norm": 2.528235912322998, + "learning_rate": 9.839497487437186e-06, + "loss": 0.1538, + "step": 2100 + }, + { + "epoch": 0.71, + "grad_norm": 2.8485114574432373, + "learning_rate": 9.836984924623117e-06, + "loss": 0.1513, + "step": 2125 + }, + { + "epoch": 0.72, + "grad_norm": 2.726245880126953, + "learning_rate": 9.834472361809047e-06, + "loss": 0.1522, + "step": 2150 + }, + { + "epoch": 0.73, + "grad_norm": 2.5462193489074707, + "learning_rate": 9.831959798994976e-06, + "loss": 0.1643, + "step": 2175 + }, + { + "epoch": 0.74, + "grad_norm": 2.344536066055298, + "learning_rate": 9.829447236180905e-06, + "loss": 0.1489, + "step": 2200 + }, + { + "epoch": 0.75, + "grad_norm": 2.3175227642059326, + "learning_rate": 9.826934673366834e-06, + "loss": 0.1438, + "step": 2225 + }, + { + "epoch": 0.76, + "grad_norm": 2.5934078693389893, + "learning_rate": 9.824422110552766e-06, + "loss": 0.1445, + "step": 2250 + }, + { + "epoch": 0.76, + "grad_norm": 2.4497594833374023, + "learning_rate": 9.821909547738693e-06, + "loss": 0.1537, + "step": 2275 + }, + { + "epoch": 0.77, + "grad_norm": 2.2248127460479736, + "learning_rate": 9.819396984924624e-06, + "loss": 0.1431, + "step": 2300 + }, + { + "epoch": 0.78, + "grad_norm": 2.724398136138916, + "learning_rate": 9.816884422110553e-06, + "loss": 0.1375, + "step": 2325 + }, + { + "epoch": 0.79, + "grad_norm": 2.9911482334136963, + "learning_rate": 9.814371859296483e-06, + "loss": 0.1461, + "step": 2350 + }, + { + "epoch": 0.8, + "grad_norm": 2.5092427730560303, + "learning_rate": 9.811859296482414e-06, + "loss": 0.1379, + "step": 2375 + }, + { + "epoch": 0.81, + "grad_norm": 1.8499549627304077, + "learning_rate": 9.809346733668343e-06, + "loss": 0.1478, + "step": 2400 + }, + { + "epoch": 0.81, + "grad_norm": 2.646444797515869, + "learning_rate": 9.806834170854272e-06, + "loss": 0.1357, + "step": 2425 + }, + { + "epoch": 0.82, + "grad_norm": 2.5554916858673096, + "learning_rate": 9.804321608040202e-06, + "loss": 0.142, + "step": 2450 + }, + { + "epoch": 0.83, + "grad_norm": 3.00333833694458, + "learning_rate": 9.801809045226131e-06, + "loss": 0.1447, + "step": 2475 + }, + { + "epoch": 0.84, + "grad_norm": 2.4438586235046387, + "learning_rate": 9.79929648241206e-06, + "loss": 0.1422, + "step": 2500 + }, + { + "epoch": 0.85, + "grad_norm": 2.054206371307373, + "learning_rate": 9.796783919597991e-06, + "loss": 0.1413, + "step": 2525 + }, + { + "epoch": 0.86, + "grad_norm": 2.2142333984375, + "learning_rate": 9.79427135678392e-06, + "loss": 0.1331, + "step": 2550 + }, + { + "epoch": 0.86, + "grad_norm": 3.076545000076294, + "learning_rate": 9.79175879396985e-06, + "loss": 0.1466, + "step": 2575 + }, + { + "epoch": 0.87, + "grad_norm": 2.522453546524048, + "learning_rate": 9.78924623115578e-06, + "loss": 0.1381, + "step": 2600 + }, + { + "epoch": 0.88, + "grad_norm": 2.927096128463745, + "learning_rate": 9.786733668341709e-06, + "loss": 0.1422, + "step": 2625 + }, + { + "epoch": 0.89, + "grad_norm": 2.5991053581237793, + "learning_rate": 9.78422110552764e-06, + "loss": 0.1417, + "step": 2650 + }, + { + "epoch": 0.9, + "grad_norm": 2.5746214389801025, + "learning_rate": 9.781708542713569e-06, + "loss": 0.1341, + "step": 2675 + }, + { + "epoch": 0.91, + "grad_norm": 2.3016889095306396, + "learning_rate": 9.779195979899498e-06, + "loss": 0.1466, + "step": 2700 + }, + { + "epoch": 0.92, + "grad_norm": 2.497462034225464, + "learning_rate": 9.776683417085428e-06, + "loss": 0.1389, + "step": 2725 + }, + { + "epoch": 0.92, + "grad_norm": 2.80090594291687, + "learning_rate": 9.774170854271357e-06, + "loss": 0.1364, + "step": 2750 + }, + { + "epoch": 0.93, + "grad_norm": 1.9670957326889038, + "learning_rate": 9.771658291457288e-06, + "loss": 0.1333, + "step": 2775 + }, + { + "epoch": 0.94, + "grad_norm": 2.1514508724212646, + "learning_rate": 9.769145728643217e-06, + "loss": 0.1354, + "step": 2800 + }, + { + "epoch": 0.95, + "grad_norm": 2.4486563205718994, + "learning_rate": 9.766633165829147e-06, + "loss": 0.138, + "step": 2825 + }, + { + "epoch": 0.96, + "grad_norm": 2.57344388961792, + "learning_rate": 9.764120603015076e-06, + "loss": 0.1314, + "step": 2850 + }, + { + "epoch": 0.97, + "grad_norm": 2.3406975269317627, + "learning_rate": 9.761608040201005e-06, + "loss": 0.1386, + "step": 2875 + }, + { + "epoch": 0.97, + "grad_norm": 2.5869216918945312, + "learning_rate": 9.759095477386935e-06, + "loss": 0.1461, + "step": 2900 + }, + { + "epoch": 0.98, + "grad_norm": 2.7753987312316895, + "learning_rate": 9.756582914572866e-06, + "loss": 0.1353, + "step": 2925 + }, + { + "epoch": 0.99, + "grad_norm": 2.578148365020752, + "learning_rate": 9.754070351758795e-06, + "loss": 0.1384, + "step": 2950 + }, + { + "epoch": 1.0, + "grad_norm": 3.007187843322754, + "learning_rate": 9.751557788944724e-06, + "loss": 0.1329, + "step": 2975 + }, + { + "epoch": 1.01, + "grad_norm": 2.3490207195281982, + "learning_rate": 9.749045226130654e-06, + "loss": 0.1107, + "step": 3000 + }, + { + "epoch": 1.01, + "eval_loss": 0.11443744599819183, + "eval_runtime": 1200.5531, + "eval_samples_per_second": 1.249, + "eval_steps_per_second": 1.249, + "eval_wer": 16.930510521965797, + "step": 3000 + }, + { + "epoch": 1.02, + "grad_norm": 1.9672869443893433, + "learning_rate": 9.746532663316583e-06, + "loss": 0.1021, + "step": 3025 + }, + { + "epoch": 1.02, + "grad_norm": 2.2786271572113037, + "learning_rate": 9.744020100502514e-06, + "loss": 0.1038, + "step": 3050 + }, + { + "epoch": 1.03, + "grad_norm": 2.115065813064575, + "learning_rate": 9.741507537688443e-06, + "loss": 0.1016, + "step": 3075 + }, + { + "epoch": 1.04, + "grad_norm": 2.0789096355438232, + "learning_rate": 9.738994974874373e-06, + "loss": 0.1057, + "step": 3100 + }, + { + "epoch": 1.05, + "grad_norm": 2.430433750152588, + "learning_rate": 9.736482412060302e-06, + "loss": 0.1002, + "step": 3125 + }, + { + "epoch": 1.06, + "grad_norm": 2.0710041522979736, + "learning_rate": 9.733969849246231e-06, + "loss": 0.0993, + "step": 3150 + }, + { + "epoch": 1.07, + "grad_norm": 2.312781810760498, + "learning_rate": 9.731457286432162e-06, + "loss": 0.1068, + "step": 3175 + }, + { + "epoch": 1.07, + "grad_norm": 2.0690701007843018, + "learning_rate": 9.728944723618092e-06, + "loss": 0.1089, + "step": 3200 + }, + { + "epoch": 1.08, + "grad_norm": 1.7267590761184692, + "learning_rate": 9.726432160804021e-06, + "loss": 0.0996, + "step": 3225 + }, + { + "epoch": 1.09, + "grad_norm": 1.9900727272033691, + "learning_rate": 9.72391959798995e-06, + "loss": 0.1037, + "step": 3250 + }, + { + "epoch": 1.1, + "grad_norm": 1.9031728506088257, + "learning_rate": 9.721407035175881e-06, + "loss": 0.1041, + "step": 3275 + }, + { + "epoch": 1.11, + "grad_norm": 2.0556740760803223, + "learning_rate": 9.718894472361809e-06, + "loss": 0.1082, + "step": 3300 + }, + { + "epoch": 1.12, + "grad_norm": 2.350287675857544, + "learning_rate": 9.71638190954774e-06, + "loss": 0.1071, + "step": 3325 + }, + { + "epoch": 1.12, + "grad_norm": 2.0579347610473633, + "learning_rate": 9.71386934673367e-06, + "loss": 0.1091, + "step": 3350 + }, + { + "epoch": 1.13, + "grad_norm": 2.152581214904785, + "learning_rate": 9.711356783919599e-06, + "loss": 0.1025, + "step": 3375 + }, + { + "epoch": 1.14, + "grad_norm": 2.1055026054382324, + "learning_rate": 9.70884422110553e-06, + "loss": 0.1045, + "step": 3400 + }, + { + "epoch": 1.15, + "grad_norm": 2.3664803504943848, + "learning_rate": 9.706331658291457e-06, + "loss": 0.1037, + "step": 3425 + }, + { + "epoch": 1.16, + "grad_norm": 2.2709498405456543, + "learning_rate": 9.703819095477388e-06, + "loss": 0.1035, + "step": 3450 + }, + { + "epoch": 1.17, + "grad_norm": 2.126365900039673, + "learning_rate": 9.701306532663318e-06, + "loss": 0.1078, + "step": 3475 + }, + { + "epoch": 1.18, + "grad_norm": 2.2287137508392334, + "learning_rate": 9.698793969849247e-06, + "loss": 0.1007, + "step": 3500 + }, + { + "epoch": 1.18, + "grad_norm": 2.4989378452301025, + "learning_rate": 9.696281407035176e-06, + "loss": 0.1072, + "step": 3525 + }, + { + "epoch": 1.19, + "grad_norm": 2.3748016357421875, + "learning_rate": 9.693768844221107e-06, + "loss": 0.1088, + "step": 3550 + }, + { + "epoch": 1.2, + "grad_norm": 1.9572185277938843, + "learning_rate": 9.691256281407035e-06, + "loss": 0.1062, + "step": 3575 + }, + { + "epoch": 1.21, + "grad_norm": 2.591372489929199, + "learning_rate": 9.688743718592966e-06, + "loss": 0.0993, + "step": 3600 + }, + { + "epoch": 1.22, + "grad_norm": 1.9727762937545776, + "learning_rate": 9.686231155778895e-06, + "loss": 0.1034, + "step": 3625 + }, + { + "epoch": 1.23, + "grad_norm": 1.9524997472763062, + "learning_rate": 9.683718592964825e-06, + "loss": 0.0981, + "step": 3650 + }, + { + "epoch": 1.23, + "grad_norm": 2.125340223312378, + "learning_rate": 9.681206030150756e-06, + "loss": 0.1038, + "step": 3675 + }, + { + "epoch": 1.24, + "grad_norm": 2.0692999362945557, + "learning_rate": 9.678693467336683e-06, + "loss": 0.0967, + "step": 3700 + }, + { + "epoch": 1.25, + "grad_norm": 2.5828864574432373, + "learning_rate": 9.676180904522614e-06, + "loss": 0.0984, + "step": 3725 + }, + { + "epoch": 1.26, + "grad_norm": 2.1964194774627686, + "learning_rate": 9.673668341708544e-06, + "loss": 0.1044, + "step": 3750 + }, + { + "epoch": 1.27, + "grad_norm": 1.9648118019104004, + "learning_rate": 9.671155778894473e-06, + "loss": 0.1032, + "step": 3775 + }, + { + "epoch": 1.28, + "grad_norm": 2.444783926010132, + "learning_rate": 9.668643216080404e-06, + "loss": 0.1009, + "step": 3800 + }, + { + "epoch": 1.28, + "grad_norm": 2.378009080886841, + "learning_rate": 9.666130653266333e-06, + "loss": 0.1029, + "step": 3825 + }, + { + "epoch": 1.29, + "grad_norm": 1.816193699836731, + "learning_rate": 9.663618090452263e-06, + "loss": 0.0986, + "step": 3850 + }, + { + "epoch": 1.3, + "grad_norm": 2.2548534870147705, + "learning_rate": 9.661105527638192e-06, + "loss": 0.0974, + "step": 3875 + }, + { + "epoch": 1.31, + "grad_norm": 1.793755292892456, + "learning_rate": 9.658592964824121e-06, + "loss": 0.1009, + "step": 3900 + }, + { + "epoch": 1.32, + "grad_norm": 2.2103044986724854, + "learning_rate": 9.65608040201005e-06, + "loss": 0.0938, + "step": 3925 + }, + { + "epoch": 1.33, + "grad_norm": 2.2393457889556885, + "learning_rate": 9.653567839195982e-06, + "loss": 0.0983, + "step": 3950 + }, + { + "epoch": 1.33, + "grad_norm": 2.3167622089385986, + "learning_rate": 9.651055276381909e-06, + "loss": 0.1009, + "step": 3975 + }, + { + "epoch": 1.34, + "grad_norm": 1.7563724517822266, + "learning_rate": 9.64854271356784e-06, + "loss": 0.0935, + "step": 4000 + }, + { + "epoch": 1.34, + "eval_loss": 0.11205999553203583, + "eval_runtime": 1203.958, + "eval_samples_per_second": 1.246, + "eval_steps_per_second": 1.246, + "eval_wer": 16.212092313560937, + "step": 4000 + }, + { + "epoch": 1.35, + "grad_norm": 2.0732035636901855, + "learning_rate": 9.64603015075377e-06, + "loss": 0.0942, + "step": 4025 + }, + { + "epoch": 1.36, + "grad_norm": 2.404982089996338, + "learning_rate": 9.643517587939699e-06, + "loss": 0.0994, + "step": 4050 + }, + { + "epoch": 1.37, + "grad_norm": 2.1737546920776367, + "learning_rate": 9.64100502512563e-06, + "loss": 0.0994, + "step": 4075 + }, + { + "epoch": 1.38, + "grad_norm": 2.2618534564971924, + "learning_rate": 9.638492462311559e-06, + "loss": 0.0944, + "step": 4100 + }, + { + "epoch": 1.39, + "grad_norm": 1.758551836013794, + "learning_rate": 9.635979899497488e-06, + "loss": 0.0928, + "step": 4125 + }, + { + "epoch": 1.39, + "grad_norm": 2.2565081119537354, + "learning_rate": 9.633467336683418e-06, + "loss": 0.1035, + "step": 4150 + }, + { + "epoch": 1.4, + "grad_norm": 2.13118052482605, + "learning_rate": 9.630954773869347e-06, + "loss": 0.1049, + "step": 4175 + }, + { + "epoch": 1.41, + "grad_norm": 1.8587472438812256, + "learning_rate": 9.628442211055276e-06, + "loss": 0.0979, + "step": 4200 + }, + { + "epoch": 1.42, + "grad_norm": 2.18764328956604, + "learning_rate": 9.625929648241207e-06, + "loss": 0.0907, + "step": 4225 + }, + { + "epoch": 1.43, + "grad_norm": 2.452057123184204, + "learning_rate": 9.623417085427137e-06, + "loss": 0.1028, + "step": 4250 + }, + { + "epoch": 1.44, + "grad_norm": 1.5477485656738281, + "learning_rate": 9.620904522613066e-06, + "loss": 0.101, + "step": 4275 + }, + { + "epoch": 1.44, + "grad_norm": 2.007200002670288, + "learning_rate": 9.618391959798995e-06, + "loss": 0.0974, + "step": 4300 + }, + { + "epoch": 1.45, + "grad_norm": 2.107224464416504, + "learning_rate": 9.615879396984925e-06, + "loss": 0.0955, + "step": 4325 + }, + { + "epoch": 1.46, + "grad_norm": 2.578660249710083, + "learning_rate": 9.613366834170856e-06, + "loss": 0.0975, + "step": 4350 + }, + { + "epoch": 1.47, + "grad_norm": 2.279788017272949, + "learning_rate": 9.610854271356785e-06, + "loss": 0.0979, + "step": 4375 + }, + { + "epoch": 1.48, + "grad_norm": 2.5049729347229004, + "learning_rate": 9.608341708542714e-06, + "loss": 0.0957, + "step": 4400 + }, + { + "epoch": 1.49, + "grad_norm": 1.9433900117874146, + "learning_rate": 9.605829145728644e-06, + "loss": 0.0919, + "step": 4425 + }, + { + "epoch": 1.49, + "grad_norm": 2.3982107639312744, + "learning_rate": 9.603316582914573e-06, + "loss": 0.0937, + "step": 4450 + }, + { + "epoch": 1.5, + "grad_norm": 2.1906967163085938, + "learning_rate": 9.600804020100504e-06, + "loss": 0.0928, + "step": 4475 + }, + { + "epoch": 1.51, + "grad_norm": 2.0660579204559326, + "learning_rate": 9.598291457286433e-06, + "loss": 0.0937, + "step": 4500 + }, + { + "epoch": 1.52, + "grad_norm": 2.1935133934020996, + "learning_rate": 9.595778894472363e-06, + "loss": 0.0993, + "step": 4525 + }, + { + "epoch": 1.53, + "grad_norm": 2.017202854156494, + "learning_rate": 9.593266331658292e-06, + "loss": 0.0936, + "step": 4550 + }, + { + "epoch": 1.54, + "grad_norm": 2.659418821334839, + "learning_rate": 9.590753768844221e-06, + "loss": 0.0957, + "step": 4575 + }, + { + "epoch": 1.54, + "grad_norm": 2.131288766860962, + "learning_rate": 9.58824120603015e-06, + "loss": 0.0957, + "step": 4600 + }, + { + "epoch": 1.55, + "grad_norm": 2.165925979614258, + "learning_rate": 9.585728643216082e-06, + "loss": 0.0944, + "step": 4625 + }, + { + "epoch": 1.56, + "grad_norm": 2.2247161865234375, + "learning_rate": 9.583216080402011e-06, + "loss": 0.0955, + "step": 4650 + }, + { + "epoch": 1.57, + "grad_norm": 2.370002508163452, + "learning_rate": 9.58070351758794e-06, + "loss": 0.0953, + "step": 4675 + }, + { + "epoch": 1.58, + "grad_norm": 2.50720477104187, + "learning_rate": 9.57819095477387e-06, + "loss": 0.0904, + "step": 4700 + }, + { + "epoch": 1.59, + "grad_norm": 2.377450466156006, + "learning_rate": 9.575678391959799e-06, + "loss": 0.0905, + "step": 4725 + }, + { + "epoch": 1.6, + "grad_norm": 2.114288330078125, + "learning_rate": 9.57316582914573e-06, + "loss": 0.0926, + "step": 4750 + }, + { + "epoch": 1.6, + "grad_norm": 2.515768527984619, + "learning_rate": 9.57065326633166e-06, + "loss": 0.0911, + "step": 4775 + }, + { + "epoch": 1.61, + "grad_norm": 2.3126296997070312, + "learning_rate": 9.568140703517589e-06, + "loss": 0.0932, + "step": 4800 + }, + { + "epoch": 1.62, + "grad_norm": 2.142967462539673, + "learning_rate": 9.565628140703518e-06, + "loss": 0.0875, + "step": 4825 + }, + { + "epoch": 1.63, + "grad_norm": 1.8645638227462769, + "learning_rate": 9.563115577889447e-06, + "loss": 0.0958, + "step": 4850 + }, + { + "epoch": 1.64, + "grad_norm": 2.11171817779541, + "learning_rate": 9.560603015075378e-06, + "loss": 0.0934, + "step": 4875 + }, + { + "epoch": 1.65, + "grad_norm": 2.1277499198913574, + "learning_rate": 9.558090452261308e-06, + "loss": 0.0977, + "step": 4900 + }, + { + "epoch": 1.65, + "grad_norm": 1.9110116958618164, + "learning_rate": 9.555577889447237e-06, + "loss": 0.0961, + "step": 4925 + }, + { + "epoch": 1.66, + "grad_norm": 2.294362783432007, + "learning_rate": 9.553065326633166e-06, + "loss": 0.0908, + "step": 4950 + }, + { + "epoch": 1.67, + "grad_norm": 1.882030963897705, + "learning_rate": 9.550552763819096e-06, + "loss": 0.0954, + "step": 4975 + }, + { + "epoch": 1.68, + "grad_norm": 2.1335039138793945, + "learning_rate": 9.548040201005025e-06, + "loss": 0.0922, + "step": 5000 + }, + { + "epoch": 1.68, + "eval_loss": 0.11593595147132874, + "eval_runtime": 1483.6182, + "eval_samples_per_second": 1.011, + "eval_steps_per_second": 1.011, + "eval_wer": 16.644414775255896, + "step": 5000 + }, + { + "epoch": 1.69, + "grad_norm": 2.190211772918701, + "learning_rate": 9.545527638190956e-06, + "loss": 0.0886, + "step": 5025 + }, + { + "epoch": 1.7, + "grad_norm": 2.289309024810791, + "learning_rate": 9.543015075376885e-06, + "loss": 0.0967, + "step": 5050 + }, + { + "epoch": 1.7, + "grad_norm": 1.9779537916183472, + "learning_rate": 9.540502512562815e-06, + "loss": 0.0917, + "step": 5075 + }, + { + "epoch": 1.71, + "grad_norm": 2.303128719329834, + "learning_rate": 9.537989949748746e-06, + "loss": 0.0931, + "step": 5100 + }, + { + "epoch": 1.72, + "grad_norm": 2.274324655532837, + "learning_rate": 9.535477386934673e-06, + "loss": 0.0938, + "step": 5125 + }, + { + "epoch": 1.73, + "grad_norm": 2.0302109718322754, + "learning_rate": 9.532964824120604e-06, + "loss": 0.0842, + "step": 5150 + }, + { + "epoch": 1.74, + "grad_norm": 2.228283166885376, + "learning_rate": 9.530452261306534e-06, + "loss": 0.0925, + "step": 5175 + }, + { + "epoch": 1.75, + "grad_norm": 1.6405620574951172, + "learning_rate": 9.527939698492463e-06, + "loss": 0.0802, + "step": 5200 + }, + { + "epoch": 1.75, + "grad_norm": 2.4471089839935303, + "learning_rate": 9.525427135678392e-06, + "loss": 0.09, + "step": 5225 + }, + { + "epoch": 1.76, + "grad_norm": 2.227238178253174, + "learning_rate": 9.522914572864322e-06, + "loss": 0.0854, + "step": 5250 + }, + { + "epoch": 1.77, + "grad_norm": 2.260730504989624, + "learning_rate": 9.520402010050253e-06, + "loss": 0.0922, + "step": 5275 + }, + { + "epoch": 1.78, + "grad_norm": 2.2782115936279297, + "learning_rate": 9.517889447236182e-06, + "loss": 0.0939, + "step": 5300 + }, + { + "epoch": 1.79, + "grad_norm": 2.3438827991485596, + "learning_rate": 9.515376884422111e-06, + "loss": 0.0931, + "step": 5325 + }, + { + "epoch": 1.8, + "grad_norm": 2.6374075412750244, + "learning_rate": 9.51286432160804e-06, + "loss": 0.0892, + "step": 5350 + }, + { + "epoch": 1.8, + "grad_norm": 2.2760393619537354, + "learning_rate": 9.510351758793972e-06, + "loss": 0.0888, + "step": 5375 + }, + { + "epoch": 1.81, + "grad_norm": 2.222639560699463, + "learning_rate": 9.5078391959799e-06, + "loss": 0.0868, + "step": 5400 + }, + { + "epoch": 1.82, + "grad_norm": 2.4721813201904297, + "learning_rate": 9.50532663316583e-06, + "loss": 0.0847, + "step": 5425 + }, + { + "epoch": 1.83, + "grad_norm": 2.0724403858184814, + "learning_rate": 9.50281407035176e-06, + "loss": 0.0886, + "step": 5450 + }, + { + "epoch": 1.84, + "grad_norm": 2.392240285873413, + "learning_rate": 9.500301507537689e-06, + "loss": 0.0894, + "step": 5475 + }, + { + "epoch": 1.85, + "grad_norm": 2.244722366333008, + "learning_rate": 9.49778894472362e-06, + "loss": 0.0853, + "step": 5500 + }, + { + "epoch": 1.86, + "grad_norm": 2.297687530517578, + "learning_rate": 9.49527638190955e-06, + "loss": 0.0837, + "step": 5525 + }, + { + "epoch": 1.86, + "grad_norm": 2.2951018810272217, + "learning_rate": 9.492763819095479e-06, + "loss": 0.0929, + "step": 5550 + }, + { + "epoch": 1.87, + "grad_norm": 1.9825172424316406, + "learning_rate": 9.490251256281408e-06, + "loss": 0.0867, + "step": 5575 + }, + { + "epoch": 1.88, + "grad_norm": 1.8870315551757812, + "learning_rate": 9.487738693467337e-06, + "loss": 0.0895, + "step": 5600 + }, + { + "epoch": 1.89, + "grad_norm": 2.1595964431762695, + "learning_rate": 9.485226130653267e-06, + "loss": 0.0841, + "step": 5625 + }, + { + "epoch": 1.9, + "grad_norm": 1.8686363697052002, + "learning_rate": 9.482713567839198e-06, + "loss": 0.0879, + "step": 5650 + }, + { + "epoch": 1.91, + "grad_norm": 2.127387046813965, + "learning_rate": 9.480201005025125e-06, + "loss": 0.0891, + "step": 5675 + }, + { + "epoch": 1.91, + "grad_norm": 1.3881630897521973, + "learning_rate": 9.477688442211056e-06, + "loss": 0.0845, + "step": 5700 + }, + { + "epoch": 1.92, + "grad_norm": 2.1334125995635986, + "learning_rate": 9.475175879396985e-06, + "loss": 0.085, + "step": 5725 + }, + { + "epoch": 1.93, + "grad_norm": 2.470689058303833, + "learning_rate": 9.472663316582915e-06, + "loss": 0.0848, + "step": 5750 + }, + { + "epoch": 1.94, + "grad_norm": 1.901123285293579, + "learning_rate": 9.470150753768846e-06, + "loss": 0.0871, + "step": 5775 + }, + { + "epoch": 1.95, + "grad_norm": 2.25144624710083, + "learning_rate": 9.467638190954775e-06, + "loss": 0.0802, + "step": 5800 + }, + { + "epoch": 1.96, + "grad_norm": 2.154329299926758, + "learning_rate": 9.465125628140704e-06, + "loss": 0.0865, + "step": 5825 + }, + { + "epoch": 1.96, + "grad_norm": 2.1356594562530518, + "learning_rate": 9.462613065326634e-06, + "loss": 0.0893, + "step": 5850 + }, + { + "epoch": 1.97, + "grad_norm": 2.561053991317749, + "learning_rate": 9.460100502512563e-06, + "loss": 0.0887, + "step": 5875 + }, + { + "epoch": 1.98, + "grad_norm": 2.1917669773101807, + "learning_rate": 9.457587939698494e-06, + "loss": 0.0893, + "step": 5900 + }, + { + "epoch": 1.99, + "grad_norm": 2.443690776824951, + "learning_rate": 9.455075376884423e-06, + "loss": 0.0868, + "step": 5925 + }, + { + "epoch": 2.0, + "grad_norm": 2.1945700645446777, + "learning_rate": 9.452562814070353e-06, + "loss": 0.0833, + "step": 5950 + }, + { + "epoch": 2.01, + "grad_norm": 1.7843177318572998, + "learning_rate": 9.450050251256282e-06, + "loss": 0.0648, + "step": 5975 + }, + { + "epoch": 2.01, + "grad_norm": 2.0493338108062744, + "learning_rate": 9.447537688442211e-06, + "loss": 0.0567, + "step": 6000 + }, + { + "epoch": 2.01, + "eval_loss": 0.11751247197389603, + "eval_runtime": 1204.8499, + "eval_samples_per_second": 1.245, + "eval_steps_per_second": 1.245, + "eval_wer": 17.044948820649754, + "step": 6000 + }, + { + "epoch": 2.02, + "grad_norm": 1.7244980335235596, + "learning_rate": 9.44502512562814e-06, + "loss": 0.0544, + "step": 6025 + }, + { + "epoch": 2.03, + "grad_norm": 1.6859246492385864, + "learning_rate": 9.442512562814072e-06, + "loss": 0.0569, + "step": 6050 + }, + { + "epoch": 2.04, + "grad_norm": 1.8256285190582275, + "learning_rate": 9.440000000000001e-06, + "loss": 0.0555, + "step": 6075 + }, + { + "epoch": 2.05, + "grad_norm": 1.8538511991500854, + "learning_rate": 9.43748743718593e-06, + "loss": 0.056, + "step": 6100 + }, + { + "epoch": 2.06, + "grad_norm": 1.9213556051254272, + "learning_rate": 9.43497487437186e-06, + "loss": 0.0545, + "step": 6125 + }, + { + "epoch": 2.07, + "grad_norm": 1.509080171585083, + "learning_rate": 9.432462311557789e-06, + "loss": 0.0529, + "step": 6150 + }, + { + "epoch": 2.07, + "grad_norm": 2.0828611850738525, + "learning_rate": 9.42994974874372e-06, + "loss": 0.0569, + "step": 6175 + }, + { + "epoch": 2.08, + "grad_norm": 2.4042632579803467, + "learning_rate": 9.42743718592965e-06, + "loss": 0.0548, + "step": 6200 + }, + { + "epoch": 2.09, + "grad_norm": 2.2747738361358643, + "learning_rate": 9.424924623115579e-06, + "loss": 0.0579, + "step": 6225 + }, + { + "epoch": 2.1, + "grad_norm": 2.0173397064208984, + "learning_rate": 9.422412060301508e-06, + "loss": 0.053, + "step": 6250 + }, + { + "epoch": 2.11, + "grad_norm": 2.1345813274383545, + "learning_rate": 9.419899497487437e-06, + "loss": 0.0543, + "step": 6275 + }, + { + "epoch": 2.12, + "grad_norm": 1.885754108428955, + "learning_rate": 9.417386934673367e-06, + "loss": 0.0565, + "step": 6300 + }, + { + "epoch": 2.12, + "grad_norm": 1.9867273569107056, + "learning_rate": 9.414874371859298e-06, + "loss": 0.0556, + "step": 6325 + }, + { + "epoch": 2.13, + "grad_norm": 1.931341290473938, + "learning_rate": 9.412361809045227e-06, + "loss": 0.0573, + "step": 6350 + }, + { + "epoch": 2.14, + "grad_norm": 2.4369380474090576, + "learning_rate": 9.409849246231156e-06, + "loss": 0.0566, + "step": 6375 + }, + { + "epoch": 2.15, + "grad_norm": 2.072962999343872, + "learning_rate": 9.407336683417086e-06, + "loss": 0.0529, + "step": 6400 + }, + { + "epoch": 2.16, + "grad_norm": 2.0641324520111084, + "learning_rate": 9.404824120603015e-06, + "loss": 0.0511, + "step": 6425 + }, + { + "epoch": 2.17, + "grad_norm": 1.9909549951553345, + "learning_rate": 9.402311557788946e-06, + "loss": 0.0547, + "step": 6450 + }, + { + "epoch": 2.17, + "grad_norm": 1.5756068229675293, + "learning_rate": 9.399798994974875e-06, + "loss": 0.0542, + "step": 6475 + }, + { + "epoch": 2.18, + "grad_norm": 1.9870116710662842, + "learning_rate": 9.397286432160805e-06, + "loss": 0.0575, + "step": 6500 + }, + { + "epoch": 2.19, + "grad_norm": 1.7489418983459473, + "learning_rate": 9.394773869346736e-06, + "loss": 0.0526, + "step": 6525 + }, + { + "epoch": 2.2, + "grad_norm": 2.0680105686187744, + "learning_rate": 9.392261306532663e-06, + "loss": 0.0584, + "step": 6550 + }, + { + "epoch": 2.21, + "grad_norm": 1.8634275197982788, + "learning_rate": 9.389748743718594e-06, + "loss": 0.0544, + "step": 6575 + }, + { + "epoch": 2.22, + "grad_norm": 1.9182085990905762, + "learning_rate": 9.387236180904524e-06, + "loss": 0.0552, + "step": 6600 + }, + { + "epoch": 2.22, + "grad_norm": 1.8247032165527344, + "learning_rate": 9.384723618090453e-06, + "loss": 0.0513, + "step": 6625 + }, + { + "epoch": 2.23, + "grad_norm": 1.880985140800476, + "learning_rate": 9.382211055276382e-06, + "loss": 0.0583, + "step": 6650 + }, + { + "epoch": 2.24, + "grad_norm": 1.7646541595458984, + "learning_rate": 9.379698492462312e-06, + "loss": 0.0551, + "step": 6675 + }, + { + "epoch": 2.25, + "grad_norm": 1.6189504861831665, + "learning_rate": 9.377185929648241e-06, + "loss": 0.0545, + "step": 6700 + }, + { + "epoch": 2.26, + "grad_norm": 1.6895216703414917, + "learning_rate": 9.374673366834172e-06, + "loss": 0.052, + "step": 6725 + }, + { + "epoch": 2.27, + "grad_norm": 1.923359990119934, + "learning_rate": 9.372160804020101e-06, + "loss": 0.0524, + "step": 6750 + }, + { + "epoch": 2.28, + "grad_norm": 1.8009233474731445, + "learning_rate": 9.36964824120603e-06, + "loss": 0.0533, + "step": 6775 + }, + { + "epoch": 2.28, + "grad_norm": 1.68576979637146, + "learning_rate": 9.367135678391962e-06, + "loss": 0.0539, + "step": 6800 + }, + { + "epoch": 2.29, + "grad_norm": 2.0319864749908447, + "learning_rate": 9.36462311557789e-06, + "loss": 0.0511, + "step": 6825 + }, + { + "epoch": 2.3, + "grad_norm": 1.9988038539886475, + "learning_rate": 9.36211055276382e-06, + "loss": 0.0511, + "step": 6850 + }, + { + "epoch": 2.31, + "grad_norm": 2.098829746246338, + "learning_rate": 9.35959798994975e-06, + "loss": 0.0473, + "step": 6875 + }, + { + "epoch": 2.32, + "grad_norm": 2.042389392852783, + "learning_rate": 9.357085427135679e-06, + "loss": 0.0504, + "step": 6900 + }, + { + "epoch": 2.33, + "grad_norm": 1.7377086877822876, + "learning_rate": 9.354572864321608e-06, + "loss": 0.0488, + "step": 6925 + }, + { + "epoch": 2.33, + "grad_norm": 1.8250439167022705, + "learning_rate": 9.352060301507538e-06, + "loss": 0.0576, + "step": 6950 + }, + { + "epoch": 2.34, + "grad_norm": 2.2422337532043457, + "learning_rate": 9.349547738693469e-06, + "loss": 0.0513, + "step": 6975 + }, + { + "epoch": 2.35, + "grad_norm": 1.5250093936920166, + "learning_rate": 9.347035175879398e-06, + "loss": 0.0493, + "step": 7000 + }, + { + "epoch": 2.35, + "eval_loss": 0.12509962916374207, + "eval_runtime": 1229.8903, + "eval_samples_per_second": 1.22, + "eval_steps_per_second": 1.22, + "eval_wer": 18.48178523745947, + "step": 7000 + }, + { + "epoch": 2.36, + "grad_norm": 2.134922981262207, + "learning_rate": 9.344522613065327e-06, + "loss": 0.045, + "step": 7025 + }, + { + "epoch": 2.37, + "grad_norm": 1.6122771501541138, + "learning_rate": 9.342010050251257e-06, + "loss": 0.0541, + "step": 7050 + }, + { + "epoch": 2.38, + "grad_norm": 1.9345308542251587, + "learning_rate": 9.339497487437188e-06, + "loss": 0.0562, + "step": 7075 + }, + { + "epoch": 2.38, + "grad_norm": 2.141618490219116, + "learning_rate": 9.336984924623115e-06, + "loss": 0.0545, + "step": 7100 + }, + { + "epoch": 2.39, + "grad_norm": 1.7193912267684937, + "learning_rate": 9.334472361809046e-06, + "loss": 0.0532, + "step": 7125 + }, + { + "epoch": 2.4, + "grad_norm": 2.0618209838867188, + "learning_rate": 9.331959798994976e-06, + "loss": 0.0517, + "step": 7150 + }, + { + "epoch": 2.41, + "grad_norm": 1.8230485916137695, + "learning_rate": 9.329447236180905e-06, + "loss": 0.0506, + "step": 7175 + }, + { + "epoch": 2.42, + "grad_norm": 1.9542351961135864, + "learning_rate": 9.326934673366836e-06, + "loss": 0.0475, + "step": 7200 + }, + { + "epoch": 2.43, + "grad_norm": 1.5875585079193115, + "learning_rate": 9.324422110552764e-06, + "loss": 0.0518, + "step": 7225 + }, + { + "epoch": 2.43, + "grad_norm": 1.6866785287857056, + "learning_rate": 9.321909547738695e-06, + "loss": 0.0519, + "step": 7250 + }, + { + "epoch": 2.44, + "grad_norm": 1.6547410488128662, + "learning_rate": 9.319396984924624e-06, + "loss": 0.0517, + "step": 7275 + }, + { + "epoch": 2.45, + "grad_norm": 1.8891346454620361, + "learning_rate": 9.316884422110553e-06, + "loss": 0.0495, + "step": 7300 + }, + { + "epoch": 2.46, + "grad_norm": 2.1349895000457764, + "learning_rate": 9.314371859296483e-06, + "loss": 0.0544, + "step": 7325 + }, + { + "epoch": 2.47, + "grad_norm": 1.6203229427337646, + "learning_rate": 9.311859296482414e-06, + "loss": 0.0511, + "step": 7350 + }, + { + "epoch": 2.48, + "grad_norm": 2.1466031074523926, + "learning_rate": 9.309346733668343e-06, + "loss": 0.051, + "step": 7375 + }, + { + "epoch": 2.48, + "grad_norm": 1.7905449867248535, + "learning_rate": 9.306834170854272e-06, + "loss": 0.0526, + "step": 7400 + }, + { + "epoch": 2.49, + "grad_norm": 1.8931719064712524, + "learning_rate": 9.304321608040201e-06, + "loss": 0.05, + "step": 7425 + }, + { + "epoch": 2.5, + "grad_norm": 1.751273274421692, + "learning_rate": 9.30180904522613e-06, + "loss": 0.0519, + "step": 7450 + }, + { + "epoch": 2.51, + "grad_norm": 2.2132203578948975, + "learning_rate": 9.299296482412062e-06, + "loss": 0.0473, + "step": 7475 + }, + { + "epoch": 2.52, + "grad_norm": 2.187291383743286, + "learning_rate": 9.296783919597991e-06, + "loss": 0.0532, + "step": 7500 + }, + { + "epoch": 2.53, + "grad_norm": 1.8353278636932373, + "learning_rate": 9.29427135678392e-06, + "loss": 0.0517, + "step": 7525 + }, + { + "epoch": 2.54, + "grad_norm": 1.8582594394683838, + "learning_rate": 9.29175879396985e-06, + "loss": 0.0533, + "step": 7550 + }, + { + "epoch": 2.54, + "grad_norm": 2.1756386756896973, + "learning_rate": 9.289246231155779e-06, + "loss": 0.0484, + "step": 7575 + }, + { + "epoch": 2.55, + "grad_norm": 1.943585753440857, + "learning_rate": 9.28673366834171e-06, + "loss": 0.0536, + "step": 7600 + }, + { + "epoch": 2.56, + "grad_norm": 2.170017957687378, + "learning_rate": 9.28422110552764e-06, + "loss": 0.0521, + "step": 7625 + }, + { + "epoch": 2.57, + "grad_norm": 2.048041820526123, + "learning_rate": 9.281708542713569e-06, + "loss": 0.0527, + "step": 7650 + }, + { + "epoch": 2.58, + "grad_norm": 1.9333562850952148, + "learning_rate": 9.279195979899498e-06, + "loss": 0.0508, + "step": 7675 + }, + { + "epoch": 2.59, + "grad_norm": 1.8871523141860962, + "learning_rate": 9.276683417085427e-06, + "loss": 0.0512, + "step": 7700 + }, + { + "epoch": 2.59, + "grad_norm": 2.0176618099212646, + "learning_rate": 9.274170854271357e-06, + "loss": 0.0518, + "step": 7725 + }, + { + "epoch": 2.6, + "grad_norm": 1.9599506855010986, + "learning_rate": 9.271658291457288e-06, + "loss": 0.0509, + "step": 7750 + }, + { + "epoch": 2.61, + "grad_norm": 2.2722082138061523, + "learning_rate": 9.269145728643217e-06, + "loss": 0.0501, + "step": 7775 + }, + { + "epoch": 2.62, + "grad_norm": 1.924984335899353, + "learning_rate": 9.266633165829146e-06, + "loss": 0.0517, + "step": 7800 + }, + { + "epoch": 2.63, + "grad_norm": 2.0645079612731934, + "learning_rate": 9.264120603015076e-06, + "loss": 0.048, + "step": 7825 + }, + { + "epoch": 2.64, + "grad_norm": 2.254093647003174, + "learning_rate": 9.261608040201005e-06, + "loss": 0.0537, + "step": 7850 + }, + { + "epoch": 2.64, + "grad_norm": 1.7244329452514648, + "learning_rate": 9.259095477386936e-06, + "loss": 0.0502, + "step": 7875 + }, + { + "epoch": 2.65, + "grad_norm": 2.4816091060638428, + "learning_rate": 9.256582914572865e-06, + "loss": 0.0531, + "step": 7900 + }, + { + "epoch": 2.66, + "grad_norm": 1.848834753036499, + "learning_rate": 9.254070351758795e-06, + "loss": 0.0488, + "step": 7925 + }, + { + "epoch": 2.67, + "grad_norm": 2.4960601329803467, + "learning_rate": 9.251557788944724e-06, + "loss": 0.0481, + "step": 7950 + }, + { + "epoch": 2.68, + "grad_norm": 1.609526515007019, + "learning_rate": 9.249045226130653e-06, + "loss": 0.0472, + "step": 7975 + }, + { + "epoch": 2.69, + "grad_norm": 1.4848350286483765, + "learning_rate": 9.246532663316584e-06, + "loss": 0.0479, + "step": 8000 + }, + { + "epoch": 2.69, + "eval_loss": 0.13280199468135834, + "eval_runtime": 1216.1012, + "eval_samples_per_second": 1.233, + "eval_steps_per_second": 1.233, + "eval_wer": 18.67887341852629, + "step": 8000 + }, + { + "epoch": 2.69, + "grad_norm": 2.1080193519592285, + "learning_rate": 9.244020100502514e-06, + "loss": 0.053, + "step": 8025 + }, + { + "epoch": 2.7, + "grad_norm": 1.6446177959442139, + "learning_rate": 9.241507537688443e-06, + "loss": 0.0492, + "step": 8050 + }, + { + "epoch": 2.71, + "grad_norm": 2.129936456680298, + "learning_rate": 9.238994974874372e-06, + "loss": 0.049, + "step": 8075 + }, + { + "epoch": 2.72, + "grad_norm": 1.4696322679519653, + "learning_rate": 9.236482412060302e-06, + "loss": 0.0542, + "step": 8100 + }, + { + "epoch": 2.73, + "grad_norm": 1.6145769357681274, + "learning_rate": 9.233969849246231e-06, + "loss": 0.0481, + "step": 8125 + }, + { + "epoch": 2.74, + "grad_norm": 1.8056437969207764, + "learning_rate": 9.231457286432162e-06, + "loss": 0.05, + "step": 8150 + }, + { + "epoch": 2.75, + "grad_norm": 1.7840901613235474, + "learning_rate": 9.228944723618091e-06, + "loss": 0.0487, + "step": 8175 + }, + { + "epoch": 2.75, + "grad_norm": 1.992786169052124, + "learning_rate": 9.22643216080402e-06, + "loss": 0.0493, + "step": 8200 + }, + { + "epoch": 2.76, + "grad_norm": 1.9195204973220825, + "learning_rate": 9.223919597989952e-06, + "loss": 0.0482, + "step": 8225 + }, + { + "epoch": 2.77, + "grad_norm": 2.0642037391662598, + "learning_rate": 9.22140703517588e-06, + "loss": 0.0486, + "step": 8250 + }, + { + "epoch": 2.78, + "grad_norm": 2.1523349285125732, + "learning_rate": 9.21889447236181e-06, + "loss": 0.0455, + "step": 8275 + }, + { + "epoch": 2.79, + "grad_norm": 1.719683289527893, + "learning_rate": 9.21638190954774e-06, + "loss": 0.0477, + "step": 8300 + }, + { + "epoch": 2.8, + "grad_norm": 1.8985258340835571, + "learning_rate": 9.213869346733669e-06, + "loss": 0.047, + "step": 8325 + }, + { + "epoch": 2.8, + "grad_norm": 1.533259630203247, + "learning_rate": 9.211356783919598e-06, + "loss": 0.0471, + "step": 8350 + }, + { + "epoch": 2.81, + "grad_norm": 2.1528825759887695, + "learning_rate": 9.208844221105528e-06, + "loss": 0.0472, + "step": 8375 + }, + { + "epoch": 2.82, + "grad_norm": 1.6441786289215088, + "learning_rate": 9.206331658291459e-06, + "loss": 0.0463, + "step": 8400 + }, + { + "epoch": 2.83, + "grad_norm": 2.01904559135437, + "learning_rate": 9.203819095477388e-06, + "loss": 0.0501, + "step": 8425 + }, + { + "epoch": 2.84, + "grad_norm": 2.2062911987304688, + "learning_rate": 9.201306532663317e-06, + "loss": 0.0467, + "step": 8450 + }, + { + "epoch": 2.85, + "grad_norm": 2.189694404602051, + "learning_rate": 9.198793969849247e-06, + "loss": 0.0482, + "step": 8475 + }, + { + "epoch": 2.85, + "grad_norm": 1.945075511932373, + "learning_rate": 9.196281407035178e-06, + "loss": 0.0502, + "step": 8500 + }, + { + "epoch": 2.86, + "grad_norm": 2.050605058670044, + "learning_rate": 9.193768844221105e-06, + "loss": 0.0507, + "step": 8525 + }, + { + "epoch": 2.87, + "grad_norm": 1.5125831365585327, + "learning_rate": 9.191256281407036e-06, + "loss": 0.0472, + "step": 8550 + }, + { + "epoch": 2.88, + "grad_norm": 1.734936237335205, + "learning_rate": 9.188743718592966e-06, + "loss": 0.0475, + "step": 8575 + }, + { + "epoch": 2.89, + "grad_norm": 2.0330302715301514, + "learning_rate": 9.186231155778895e-06, + "loss": 0.0528, + "step": 8600 + }, + { + "epoch": 2.9, + "grad_norm": 1.7558561563491821, + "learning_rate": 9.183718592964826e-06, + "loss": 0.0483, + "step": 8625 + }, + { + "epoch": 2.9, + "grad_norm": 1.9873623847961426, + "learning_rate": 9.181206030150754e-06, + "loss": 0.0508, + "step": 8650 + }, + { + "epoch": 2.91, + "grad_norm": 1.7399756908416748, + "learning_rate": 9.178693467336685e-06, + "loss": 0.0486, + "step": 8675 + }, + { + "epoch": 2.92, + "grad_norm": 1.4687803983688354, + "learning_rate": 9.176180904522614e-06, + "loss": 0.0488, + "step": 8700 + }, + { + "epoch": 2.93, + "grad_norm": 2.166177272796631, + "learning_rate": 9.173668341708543e-06, + "loss": 0.0459, + "step": 8725 + }, + { + "epoch": 2.94, + "grad_norm": 1.8612784147262573, + "learning_rate": 9.171155778894473e-06, + "loss": 0.0473, + "step": 8750 + }, + { + "epoch": 2.95, + "grad_norm": 1.7479339838027954, + "learning_rate": 9.168643216080404e-06, + "loss": 0.0483, + "step": 8775 + }, + { + "epoch": 2.96, + "grad_norm": 2.421147108078003, + "learning_rate": 9.166130653266331e-06, + "loss": 0.0465, + "step": 8800 + }, + { + "epoch": 2.96, + "grad_norm": 1.9080179929733276, + "learning_rate": 9.163618090452262e-06, + "loss": 0.0459, + "step": 8825 + }, + { + "epoch": 2.97, + "grad_norm": 1.8646163940429688, + "learning_rate": 9.161105527638192e-06, + "loss": 0.0496, + "step": 8850 + }, + { + "epoch": 2.98, + "grad_norm": 1.774420142173767, + "learning_rate": 9.158592964824121e-06, + "loss": 0.0447, + "step": 8875 + }, + { + "epoch": 2.99, + "grad_norm": 1.8640999794006348, + "learning_rate": 9.156080402010052e-06, + "loss": 0.0443, + "step": 8900 + }, + { + "epoch": 3.0, + "grad_norm": 1.5443248748779297, + "learning_rate": 9.15356783919598e-06, + "loss": 0.0481, + "step": 8925 + }, + { + "epoch": 3.01, + "grad_norm": 1.6135895252227783, + "learning_rate": 9.15105527638191e-06, + "loss": 0.0299, + "step": 8950 + }, + { + "epoch": 3.01, + "grad_norm": 1.4784936904907227, + "learning_rate": 9.14854271356784e-06, + "loss": 0.0257, + "step": 8975 + }, + { + "epoch": 3.02, + "grad_norm": 1.4821192026138306, + "learning_rate": 9.14603015075377e-06, + "loss": 0.026, + "step": 9000 + }, + { + "epoch": 3.02, + "eval_loss": 0.14111444354057312, + "eval_runtime": 1227.2224, + "eval_samples_per_second": 1.222, + "eval_steps_per_second": 1.222, + "eval_wer": 20.395447898785683, + "step": 9000 + }, + { + "epoch": 3.03, + "grad_norm": 1.7849085330963135, + "learning_rate": 9.1435175879397e-06, + "loss": 0.0249, + "step": 9025 + }, + { + "epoch": 3.04, + "grad_norm": 1.6293665170669556, + "learning_rate": 9.14100502512563e-06, + "loss": 0.0267, + "step": 9050 + }, + { + "epoch": 3.05, + "grad_norm": 1.5044128894805908, + "learning_rate": 9.138492462311559e-06, + "loss": 0.0236, + "step": 9075 + }, + { + "epoch": 3.06, + "grad_norm": 1.2493306398391724, + "learning_rate": 9.135979899497488e-06, + "loss": 0.0257, + "step": 9100 + }, + { + "epoch": 3.06, + "grad_norm": 1.608604073524475, + "learning_rate": 9.133467336683417e-06, + "loss": 0.0246, + "step": 9125 + }, + { + "epoch": 3.07, + "grad_norm": 1.4655194282531738, + "learning_rate": 9.130954773869347e-06, + "loss": 0.0246, + "step": 9150 + }, + { + "epoch": 3.08, + "grad_norm": 1.2053927183151245, + "learning_rate": 9.128442211055278e-06, + "loss": 0.0264, + "step": 9175 + }, + { + "epoch": 3.09, + "grad_norm": 1.8335455656051636, + "learning_rate": 9.125929648241205e-06, + "loss": 0.0251, + "step": 9200 + }, + { + "epoch": 3.1, + "grad_norm": 1.8344948291778564, + "learning_rate": 9.123417085427136e-06, + "loss": 0.0248, + "step": 9225 + }, + { + "epoch": 3.11, + "grad_norm": 1.6041722297668457, + "learning_rate": 9.120904522613066e-06, + "loss": 0.0268, + "step": 9250 + }, + { + "epoch": 3.11, + "grad_norm": 1.2757436037063599, + "learning_rate": 9.118391959798995e-06, + "loss": 0.0235, + "step": 9275 + }, + { + "epoch": 3.12, + "grad_norm": 1.965162754058838, + "learning_rate": 9.115879396984926e-06, + "loss": 0.0253, + "step": 9300 + }, + { + "epoch": 3.13, + "grad_norm": 1.536705732345581, + "learning_rate": 9.113366834170855e-06, + "loss": 0.026, + "step": 9325 + }, + { + "epoch": 3.14, + "grad_norm": 1.2444121837615967, + "learning_rate": 9.110854271356785e-06, + "loss": 0.0228, + "step": 9350 + }, + { + "epoch": 3.15, + "grad_norm": 1.5485920906066895, + "learning_rate": 9.108341708542714e-06, + "loss": 0.026, + "step": 9375 + }, + { + "epoch": 3.16, + "grad_norm": 1.8653204441070557, + "learning_rate": 9.105829145728643e-06, + "loss": 0.0287, + "step": 9400 + }, + { + "epoch": 3.16, + "grad_norm": 2.1442389488220215, + "learning_rate": 9.103316582914573e-06, + "loss": 0.0244, + "step": 9425 + }, + { + "epoch": 3.17, + "grad_norm": 1.9132002592086792, + "learning_rate": 9.100804020100504e-06, + "loss": 0.0263, + "step": 9450 + }, + { + "epoch": 3.18, + "grad_norm": 1.4968801736831665, + "learning_rate": 9.098291457286433e-06, + "loss": 0.025, + "step": 9475 + }, + { + "epoch": 3.19, + "grad_norm": 2.06813383102417, + "learning_rate": 9.095778894472362e-06, + "loss": 0.0262, + "step": 9500 + }, + { + "epoch": 3.2, + "grad_norm": 1.6704442501068115, + "learning_rate": 9.093266331658292e-06, + "loss": 0.0261, + "step": 9525 + }, + { + "epoch": 3.21, + "grad_norm": 1.139930248260498, + "learning_rate": 9.090753768844221e-06, + "loss": 0.0251, + "step": 9550 + }, + { + "epoch": 3.22, + "grad_norm": 1.5410466194152832, + "learning_rate": 9.088241206030152e-06, + "loss": 0.0239, + "step": 9575 + }, + { + "epoch": 3.22, + "grad_norm": 1.7888745069503784, + "learning_rate": 9.085728643216081e-06, + "loss": 0.0252, + "step": 9600 + }, + { + "epoch": 3.23, + "grad_norm": 2.0720157623291016, + "learning_rate": 9.08321608040201e-06, + "loss": 0.0261, + "step": 9625 + }, + { + "epoch": 3.24, + "grad_norm": 1.7985529899597168, + "learning_rate": 9.08070351758794e-06, + "loss": 0.0229, + "step": 9650 + }, + { + "epoch": 3.25, + "grad_norm": 1.487899661064148, + "learning_rate": 9.07819095477387e-06, + "loss": 0.0269, + "step": 9675 + }, + { + "epoch": 3.26, + "grad_norm": 1.234816312789917, + "learning_rate": 9.0756783919598e-06, + "loss": 0.0295, + "step": 9700 + }, + { + "epoch": 3.27, + "grad_norm": 1.8575265407562256, + "learning_rate": 9.07316582914573e-06, + "loss": 0.0253, + "step": 9725 + }, + { + "epoch": 3.27, + "grad_norm": 2.015505313873291, + "learning_rate": 9.070653266331659e-06, + "loss": 0.0272, + "step": 9750 + }, + { + "epoch": 3.28, + "grad_norm": 1.859294056892395, + "learning_rate": 9.068140703517588e-06, + "loss": 0.0265, + "step": 9775 + }, + { + "epoch": 3.29, + "grad_norm": 1.5879920721054077, + "learning_rate": 9.065628140703518e-06, + "loss": 0.0257, + "step": 9800 + }, + { + "epoch": 3.3, + "grad_norm": 1.7013201713562012, + "learning_rate": 9.063115577889447e-06, + "loss": 0.0239, + "step": 9825 + }, + { + "epoch": 3.31, + "grad_norm": 1.7041957378387451, + "learning_rate": 9.060603015075378e-06, + "loss": 0.0243, + "step": 9850 + }, + { + "epoch": 3.32, + "grad_norm": 2.0027599334716797, + "learning_rate": 9.058090452261307e-06, + "loss": 0.0236, + "step": 9875 + }, + { + "epoch": 3.32, + "grad_norm": 2.0531978607177734, + "learning_rate": 9.055577889447237e-06, + "loss": 0.0243, + "step": 9900 + }, + { + "epoch": 3.33, + "grad_norm": 1.3977818489074707, + "learning_rate": 9.053065326633168e-06, + "loss": 0.0274, + "step": 9925 + }, + { + "epoch": 3.34, + "grad_norm": 1.5756187438964844, + "learning_rate": 9.050552763819095e-06, + "loss": 0.0233, + "step": 9950 + }, + { + "epoch": 3.35, + "grad_norm": 1.6979589462280273, + "learning_rate": 9.048040201005026e-06, + "loss": 0.0262, + "step": 9975 + }, + { + "epoch": 3.36, + "grad_norm": 1.413805365562439, + "learning_rate": 9.045527638190956e-06, + "loss": 0.0246, + "step": 10000 + }, + { + "epoch": 3.36, + "eval_loss": 0.15126359462738037, + "eval_runtime": 1210.2348, + "eval_samples_per_second": 1.239, + "eval_steps_per_second": 1.239, + "eval_wer": 18.093966558586054, + "step": 10000 + }, + { + "epoch": 3.37, + "grad_norm": 1.5662438869476318, + "learning_rate": 9.043015075376885e-06, + "loss": 0.0244, + "step": 10025 + }, + { + "epoch": 3.37, + "grad_norm": 1.2593994140625, + "learning_rate": 9.040502512562814e-06, + "loss": 0.0246, + "step": 10050 + }, + { + "epoch": 3.38, + "grad_norm": 1.9165928363800049, + "learning_rate": 9.037989949748744e-06, + "loss": 0.0233, + "step": 10075 + }, + { + "epoch": 3.39, + "grad_norm": 2.0844316482543945, + "learning_rate": 9.035477386934675e-06, + "loss": 0.0287, + "step": 10100 + }, + { + "epoch": 3.4, + "grad_norm": 1.405656099319458, + "learning_rate": 9.032964824120604e-06, + "loss": 0.027, + "step": 10125 + }, + { + "epoch": 3.41, + "grad_norm": 1.3327072858810425, + "learning_rate": 9.030452261306533e-06, + "loss": 0.0254, + "step": 10150 + }, + { + "epoch": 3.42, + "grad_norm": 1.827623724937439, + "learning_rate": 9.027939698492463e-06, + "loss": 0.0238, + "step": 10175 + }, + { + "epoch": 3.43, + "grad_norm": 1.2745589017868042, + "learning_rate": 9.025427135678394e-06, + "loss": 0.0252, + "step": 10200 + }, + { + "epoch": 3.43, + "grad_norm": 1.9257359504699707, + "learning_rate": 9.022914572864321e-06, + "loss": 0.0237, + "step": 10225 + }, + { + "epoch": 3.44, + "grad_norm": 1.653174638748169, + "learning_rate": 9.020402010050252e-06, + "loss": 0.0263, + "step": 10250 + }, + { + "epoch": 3.45, + "grad_norm": 1.3280843496322632, + "learning_rate": 9.017889447236182e-06, + "loss": 0.0279, + "step": 10275 + }, + { + "epoch": 3.46, + "grad_norm": 1.7868784666061401, + "learning_rate": 9.015477386934675e-06, + "loss": 0.0257, + "step": 10300 + }, + { + "epoch": 3.47, + "grad_norm": 1.5542120933532715, + "learning_rate": 9.012964824120604e-06, + "loss": 0.0236, + "step": 10325 + }, + { + "epoch": 3.48, + "grad_norm": 1.4639595746994019, + "learning_rate": 9.010452261306533e-06, + "loss": 0.0245, + "step": 10350 + }, + { + "epoch": 3.48, + "grad_norm": 1.4016127586364746, + "learning_rate": 9.007939698492463e-06, + "loss": 0.025, + "step": 10375 + }, + { + "epoch": 3.49, + "grad_norm": 1.5482991933822632, + "learning_rate": 9.005427135678392e-06, + "loss": 0.0241, + "step": 10400 + }, + { + "epoch": 3.5, + "grad_norm": 1.7437814474105835, + "learning_rate": 9.002914572864321e-06, + "loss": 0.024, + "step": 10425 + }, + { + "epoch": 3.51, + "grad_norm": 1.9907480478286743, + "learning_rate": 9.000402010050252e-06, + "loss": 0.027, + "step": 10450 + }, + { + "epoch": 3.52, + "grad_norm": 1.7762255668640137, + "learning_rate": 8.997889447236182e-06, + "loss": 0.0234, + "step": 10475 + }, + { + "epoch": 3.53, + "grad_norm": 2.0566980838775635, + "learning_rate": 8.995376884422111e-06, + "loss": 0.026, + "step": 10500 + }, + { + "epoch": 3.53, + "grad_norm": 2.1568756103515625, + "learning_rate": 8.992864321608042e-06, + "loss": 0.0236, + "step": 10525 + }, + { + "epoch": 3.54, + "grad_norm": 1.3896421194076538, + "learning_rate": 8.99035175879397e-06, + "loss": 0.0266, + "step": 10550 + }, + { + "epoch": 3.55, + "grad_norm": 1.638390302658081, + "learning_rate": 8.9878391959799e-06, + "loss": 0.026, + "step": 10575 + }, + { + "epoch": 3.56, + "grad_norm": 1.4810292720794678, + "learning_rate": 8.98532663316583e-06, + "loss": 0.0254, + "step": 10600 + }, + { + "epoch": 3.57, + "grad_norm": 1.557291865348816, + "learning_rate": 8.98281407035176e-06, + "loss": 0.0244, + "step": 10625 + }, + { + "epoch": 3.58, + "grad_norm": 1.6239739656448364, + "learning_rate": 8.980301507537689e-06, + "loss": 0.0284, + "step": 10650 + }, + { + "epoch": 3.58, + "grad_norm": 1.821597933769226, + "learning_rate": 8.977788944723618e-06, + "loss": 0.0261, + "step": 10675 + }, + { + "epoch": 3.59, + "grad_norm": 1.4038645029067993, + "learning_rate": 8.975276381909549e-06, + "loss": 0.0253, + "step": 10700 + }, + { + "epoch": 3.6, + "grad_norm": 1.6850488185882568, + "learning_rate": 8.972763819095478e-06, + "loss": 0.0258, + "step": 10725 + }, + { + "epoch": 3.61, + "grad_norm": 1.3878096342086792, + "learning_rate": 8.970251256281408e-06, + "loss": 0.0232, + "step": 10750 + }, + { + "epoch": 3.62, + "grad_norm": 1.1536675691604614, + "learning_rate": 8.967738693467337e-06, + "loss": 0.0254, + "step": 10775 + }, + { + "epoch": 3.63, + "grad_norm": 1.2513306140899658, + "learning_rate": 8.965226130653268e-06, + "loss": 0.0227, + "step": 10800 + }, + { + "epoch": 3.63, + "grad_norm": 1.830490231513977, + "learning_rate": 8.962713567839196e-06, + "loss": 0.0261, + "step": 10825 + }, + { + "epoch": 3.64, + "grad_norm": 1.3813432455062866, + "learning_rate": 8.960201005025127e-06, + "loss": 0.0254, + "step": 10850 + }, + { + "epoch": 3.65, + "grad_norm": 1.691918134689331, + "learning_rate": 8.957688442211056e-06, + "loss": 0.0258, + "step": 10875 + }, + { + "epoch": 3.66, + "grad_norm": 1.3107486963272095, + "learning_rate": 8.955175879396985e-06, + "loss": 0.0265, + "step": 10900 + }, + { + "epoch": 3.67, + "grad_norm": 2.380476236343384, + "learning_rate": 8.952663316582916e-06, + "loss": 0.0255, + "step": 10925 + }, + { + "epoch": 3.68, + "grad_norm": 1.925578236579895, + "learning_rate": 8.950150753768844e-06, + "loss": 0.0263, + "step": 10950 + }, + { + "epoch": 3.69, + "grad_norm": 2.031912088394165, + "learning_rate": 8.947638190954775e-06, + "loss": 0.0257, + "step": 10975 + }, + { + "epoch": 3.69, + "grad_norm": 1.6951614618301392, + "learning_rate": 8.945125628140704e-06, + "loss": 0.0265, + "step": 11000 + }, + { + "epoch": 3.69, + "eval_loss": 0.1609112173318863, + "eval_runtime": 1217.1686, + "eval_samples_per_second": 1.232, + "eval_steps_per_second": 1.232, + "eval_wer": 18.55807743658211, + "step": 11000 + }, + { + "epoch": 3.7, + "grad_norm": 1.927398443222046, + "learning_rate": 8.942613065326634e-06, + "loss": 0.027, + "step": 11025 + }, + { + "epoch": 3.71, + "grad_norm": 1.8383222818374634, + "learning_rate": 8.940100502512563e-06, + "loss": 0.0231, + "step": 11050 + }, + { + "epoch": 3.72, + "grad_norm": 1.6643056869506836, + "learning_rate": 8.937587939698494e-06, + "loss": 0.0225, + "step": 11075 + }, + { + "epoch": 3.73, + "grad_norm": 1.926044225692749, + "learning_rate": 8.935075376884423e-06, + "loss": 0.0284, + "step": 11100 + }, + { + "epoch": 3.74, + "grad_norm": 1.4520562887191772, + "learning_rate": 8.932562814070353e-06, + "loss": 0.0246, + "step": 11125 + }, + { + "epoch": 3.74, + "grad_norm": 2.16593599319458, + "learning_rate": 8.930050251256282e-06, + "loss": 0.0257, + "step": 11150 + }, + { + "epoch": 3.75, + "grad_norm": 2.0475943088531494, + "learning_rate": 8.927537688442211e-06, + "loss": 0.0255, + "step": 11175 + }, + { + "epoch": 3.76, + "grad_norm": 1.6812808513641357, + "learning_rate": 8.925025125628142e-06, + "loss": 0.0221, + "step": 11200 + }, + { + "epoch": 3.77, + "grad_norm": 1.4607030153274536, + "learning_rate": 8.92251256281407e-06, + "loss": 0.0259, + "step": 11225 + }, + { + "epoch": 3.78, + "grad_norm": 1.668396234512329, + "learning_rate": 8.920000000000001e-06, + "loss": 0.0255, + "step": 11250 + }, + { + "epoch": 3.79, + "grad_norm": 1.4738633632659912, + "learning_rate": 8.91748743718593e-06, + "loss": 0.0254, + "step": 11275 + }, + { + "epoch": 3.79, + "grad_norm": 1.7113475799560547, + "learning_rate": 8.91497487437186e-06, + "loss": 0.024, + "step": 11300 + }, + { + "epoch": 3.8, + "grad_norm": 1.317020297050476, + "learning_rate": 8.91246231155779e-06, + "loss": 0.0234, + "step": 11325 + }, + { + "epoch": 3.81, + "grad_norm": 1.4190282821655273, + "learning_rate": 8.90994974874372e-06, + "loss": 0.0231, + "step": 11350 + }, + { + "epoch": 3.82, + "grad_norm": 2.5236496925354004, + "learning_rate": 8.90743718592965e-06, + "loss": 0.0271, + "step": 11375 + }, + { + "epoch": 3.83, + "grad_norm": 1.3071861267089844, + "learning_rate": 8.904924623115579e-06, + "loss": 0.0236, + "step": 11400 + }, + { + "epoch": 3.84, + "grad_norm": 1.817969560623169, + "learning_rate": 8.902412060301508e-06, + "loss": 0.0262, + "step": 11425 + }, + { + "epoch": 3.84, + "grad_norm": 2.126443862915039, + "learning_rate": 8.899899497487437e-06, + "loss": 0.0253, + "step": 11450 + }, + { + "epoch": 3.85, + "grad_norm": 1.4541178941726685, + "learning_rate": 8.897386934673368e-06, + "loss": 0.0239, + "step": 11475 + }, + { + "epoch": 3.86, + "grad_norm": 2.0034496784210205, + "learning_rate": 8.894874371859296e-06, + "loss": 0.0259, + "step": 11500 + }, + { + "epoch": 3.87, + "grad_norm": 1.5730000734329224, + "learning_rate": 8.892361809045227e-06, + "loss": 0.0254, + "step": 11525 + }, + { + "epoch": 3.88, + "grad_norm": 2.014230966567993, + "learning_rate": 8.889849246231156e-06, + "loss": 0.025, + "step": 11550 + }, + { + "epoch": 3.89, + "grad_norm": 1.0437594652175903, + "learning_rate": 8.887336683417086e-06, + "loss": 0.0252, + "step": 11575 + }, + { + "epoch": 3.9, + "grad_norm": 1.4632067680358887, + "learning_rate": 8.884824120603017e-06, + "loss": 0.0251, + "step": 11600 + }, + { + "epoch": 3.9, + "grad_norm": 1.6928845643997192, + "learning_rate": 8.882311557788946e-06, + "loss": 0.0247, + "step": 11625 + }, + { + "epoch": 3.91, + "grad_norm": 1.75115168094635, + "learning_rate": 8.879798994974875e-06, + "loss": 0.0226, + "step": 11650 + }, + { + "epoch": 3.92, + "grad_norm": 1.9505513906478882, + "learning_rate": 8.877286432160805e-06, + "loss": 0.0229, + "step": 11675 + }, + { + "epoch": 3.93, + "grad_norm": 1.622040033340454, + "learning_rate": 8.874773869346734e-06, + "loss": 0.0236, + "step": 11700 + }, + { + "epoch": 3.94, + "grad_norm": 1.624475359916687, + "learning_rate": 8.872261306532665e-06, + "loss": 0.0248, + "step": 11725 + }, + { + "epoch": 3.95, + "grad_norm": 1.5120714902877808, + "learning_rate": 8.869748743718594e-06, + "loss": 0.0237, + "step": 11750 + }, + { + "epoch": 3.95, + "grad_norm": 1.9245622158050537, + "learning_rate": 8.867236180904524e-06, + "loss": 0.0243, + "step": 11775 + }, + { + "epoch": 3.96, + "grad_norm": 2.173711061477661, + "learning_rate": 8.864723618090453e-06, + "loss": 0.0244, + "step": 11800 + }, + { + "epoch": 3.97, + "grad_norm": 1.2363667488098145, + "learning_rate": 8.862211055276382e-06, + "loss": 0.0251, + "step": 11825 + }, + { + "epoch": 3.98, + "grad_norm": 2.472435235977173, + "learning_rate": 8.859698492462312e-06, + "loss": 0.0254, + "step": 11850 + }, + { + "epoch": 3.99, + "grad_norm": 1.4050121307373047, + "learning_rate": 8.857185929648243e-06, + "loss": 0.025, + "step": 11875 + }, + { + "epoch": 4.0, + "grad_norm": 1.6363821029663086, + "learning_rate": 8.854673366834172e-06, + "loss": 0.0229, + "step": 11900 + }, + { + "epoch": 4.0, + "grad_norm": 0.9594578146934509, + "learning_rate": 8.852160804020101e-06, + "loss": 0.0202, + "step": 11925 + }, + { + "epoch": 4.01, + "grad_norm": 1.8297860622406006, + "learning_rate": 8.849648241206032e-06, + "loss": 0.013, + "step": 11950 + }, + { + "epoch": 4.02, + "grad_norm": 1.962995171546936, + "learning_rate": 8.84713567839196e-06, + "loss": 0.0124, + "step": 11975 + }, + { + "epoch": 4.03, + "grad_norm": 1.0236871242523193, + "learning_rate": 8.844623115577891e-06, + "loss": 0.0125, + "step": 12000 + }, + { + "epoch": 4.03, + "eval_loss": 0.16607043147087097, + "eval_runtime": 1223.2505, + "eval_samples_per_second": 1.226, + "eval_steps_per_second": 1.226, + "eval_wer": 18.284697056392652, + "step": 12000 + }, + { + "epoch": 4.04, + "grad_norm": 1.2196553945541382, + "learning_rate": 8.84211055276382e-06, + "loss": 0.0117, + "step": 12025 + }, + { + "epoch": 4.05, + "grad_norm": 1.3422659635543823, + "learning_rate": 8.83959798994975e-06, + "loss": 0.012, + "step": 12050 + }, + { + "epoch": 4.05, + "grad_norm": 0.8497050404548645, + "learning_rate": 8.837085427135679e-06, + "loss": 0.0115, + "step": 12075 + }, + { + "epoch": 4.06, + "grad_norm": 1.3210487365722656, + "learning_rate": 8.834572864321608e-06, + "loss": 0.0113, + "step": 12100 + }, + { + "epoch": 4.07, + "grad_norm": 1.1300355195999146, + "learning_rate": 8.832060301507537e-06, + "loss": 0.0121, + "step": 12125 + }, + { + "epoch": 4.08, + "grad_norm": 1.5882965326309204, + "learning_rate": 8.829547738693468e-06, + "loss": 0.0109, + "step": 12150 + }, + { + "epoch": 4.09, + "grad_norm": 1.2394572496414185, + "learning_rate": 8.827035175879398e-06, + "loss": 0.0115, + "step": 12175 + }, + { + "epoch": 4.1, + "grad_norm": 1.2953606843948364, + "learning_rate": 8.824522613065327e-06, + "loss": 0.0118, + "step": 12200 + }, + { + "epoch": 4.11, + "grad_norm": 1.279596209526062, + "learning_rate": 8.822010050251258e-06, + "loss": 0.0119, + "step": 12225 + }, + { + "epoch": 4.11, + "grad_norm": 1.1359317302703857, + "learning_rate": 8.819497487437186e-06, + "loss": 0.0133, + "step": 12250 + }, + { + "epoch": 4.12, + "grad_norm": 1.474710464477539, + "learning_rate": 8.816984924623117e-06, + "loss": 0.0126, + "step": 12275 + }, + { + "epoch": 4.13, + "grad_norm": 1.2166309356689453, + "learning_rate": 8.814472361809046e-06, + "loss": 0.0113, + "step": 12300 + }, + { + "epoch": 4.14, + "grad_norm": 1.2664378881454468, + "learning_rate": 8.811959798994975e-06, + "loss": 0.0129, + "step": 12325 + }, + { + "epoch": 4.15, + "grad_norm": 0.9362413883209229, + "learning_rate": 8.809447236180905e-06, + "loss": 0.012, + "step": 12350 + }, + { + "epoch": 4.16, + "grad_norm": 1.0413384437561035, + "learning_rate": 8.806934673366834e-06, + "loss": 0.0127, + "step": 12375 + }, + { + "epoch": 4.16, + "grad_norm": 1.438740611076355, + "learning_rate": 8.804422110552765e-06, + "loss": 0.0111, + "step": 12400 + }, + { + "epoch": 4.17, + "grad_norm": 1.1021223068237305, + "learning_rate": 8.801909547738694e-06, + "loss": 0.0134, + "step": 12425 + }, + { + "epoch": 4.18, + "grad_norm": 1.8371206521987915, + "learning_rate": 8.799396984924624e-06, + "loss": 0.0138, + "step": 12450 + }, + { + "epoch": 4.19, + "grad_norm": 1.1712536811828613, + "learning_rate": 8.796884422110553e-06, + "loss": 0.0129, + "step": 12475 + }, + { + "epoch": 4.2, + "grad_norm": 1.6232008934020996, + "learning_rate": 8.794371859296484e-06, + "loss": 0.0131, + "step": 12500 + }, + { + "epoch": 4.21, + "grad_norm": 1.2582530975341797, + "learning_rate": 8.791859296482412e-06, + "loss": 0.0133, + "step": 12525 + }, + { + "epoch": 4.21, + "grad_norm": 1.3944593667984009, + "learning_rate": 8.789346733668343e-06, + "loss": 0.0136, + "step": 12550 + }, + { + "epoch": 4.22, + "grad_norm": 1.4063080549240112, + "learning_rate": 8.786834170854272e-06, + "loss": 0.0122, + "step": 12575 + }, + { + "epoch": 4.23, + "grad_norm": 1.5515761375427246, + "learning_rate": 8.784321608040201e-06, + "loss": 0.0136, + "step": 12600 + }, + { + "epoch": 4.24, + "grad_norm": 1.2066869735717773, + "learning_rate": 8.781809045226132e-06, + "loss": 0.0126, + "step": 12625 + }, + { + "epoch": 4.25, + "grad_norm": 1.0400049686431885, + "learning_rate": 8.77929648241206e-06, + "loss": 0.013, + "step": 12650 + }, + { + "epoch": 4.26, + "grad_norm": 0.8062268495559692, + "learning_rate": 8.776783919597991e-06, + "loss": 0.0129, + "step": 12675 + }, + { + "epoch": 4.26, + "grad_norm": 1.3153908252716064, + "learning_rate": 8.77427135678392e-06, + "loss": 0.0118, + "step": 12700 + }, + { + "epoch": 4.27, + "grad_norm": 1.9787485599517822, + "learning_rate": 8.771859296482412e-06, + "loss": 0.0125, + "step": 12725 + }, + { + "epoch": 4.28, + "grad_norm": 0.9623187184333801, + "learning_rate": 8.769346733668343e-06, + "loss": 0.0127, + "step": 12750 + }, + { + "epoch": 4.29, + "grad_norm": 1.2571808099746704, + "learning_rate": 8.766834170854272e-06, + "loss": 0.0114, + "step": 12775 + }, + { + "epoch": 4.3, + "grad_norm": 1.086337924003601, + "learning_rate": 8.764321608040202e-06, + "loss": 0.0123, + "step": 12800 + }, + { + "epoch": 4.31, + "grad_norm": 1.4462535381317139, + "learning_rate": 8.761809045226131e-06, + "loss": 0.0124, + "step": 12825 + }, + { + "epoch": 4.31, + "grad_norm": 1.9632208347320557, + "learning_rate": 8.75929648241206e-06, + "loss": 0.0124, + "step": 12850 + }, + { + "epoch": 4.32, + "grad_norm": 2.053765296936035, + "learning_rate": 8.756783919597991e-06, + "loss": 0.0133, + "step": 12875 + }, + { + "epoch": 4.33, + "grad_norm": 1.3781039714813232, + "learning_rate": 8.75427135678392e-06, + "loss": 0.0131, + "step": 12900 + }, + { + "epoch": 4.34, + "grad_norm": 1.0171982049942017, + "learning_rate": 8.75175879396985e-06, + "loss": 0.0149, + "step": 12925 + }, + { + "epoch": 4.35, + "grad_norm": 1.336815595626831, + "learning_rate": 8.74924623115578e-06, + "loss": 0.0148, + "step": 12950 + }, + { + "epoch": 4.36, + "grad_norm": 1.1811809539794922, + "learning_rate": 8.746733668341709e-06, + "loss": 0.0143, + "step": 12975 + }, + { + "epoch": 4.37, + "grad_norm": 1.3389530181884766, + "learning_rate": 8.74422110552764e-06, + "loss": 0.0123, + "step": 13000 + }, + { + "epoch": 4.37, + "eval_loss": 0.1779339462518692, + "eval_runtime": 1213.7536, + "eval_samples_per_second": 1.236, + "eval_steps_per_second": 1.236, + "eval_wer": 17.375548350181194, + "step": 13000 + }, + { + "epoch": 4.37, + "grad_norm": 1.530860185623169, + "learning_rate": 8.741708542713569e-06, + "loss": 0.0129, + "step": 13025 + }, + { + "epoch": 4.38, + "grad_norm": 2.4870622158050537, + "learning_rate": 8.739195979899498e-06, + "loss": 0.014, + "step": 13050 + }, + { + "epoch": 4.39, + "grad_norm": 2.117098331451416, + "learning_rate": 8.736683417085428e-06, + "loss": 0.0129, + "step": 13075 + }, + { + "epoch": 4.4, + "grad_norm": 1.2488681077957153, + "learning_rate": 8.734170854271357e-06, + "loss": 0.0115, + "step": 13100 + }, + { + "epoch": 4.41, + "grad_norm": 1.82502019405365, + "learning_rate": 8.731658291457286e-06, + "loss": 0.0118, + "step": 13125 + }, + { + "epoch": 4.42, + "grad_norm": 1.9896068572998047, + "learning_rate": 8.729145728643217e-06, + "loss": 0.0134, + "step": 13150 + }, + { + "epoch": 4.42, + "grad_norm": 1.7069737911224365, + "learning_rate": 8.726633165829147e-06, + "loss": 0.0134, + "step": 13175 + }, + { + "epoch": 4.43, + "grad_norm": 1.8949508666992188, + "learning_rate": 8.724120603015076e-06, + "loss": 0.0142, + "step": 13200 + }, + { + "epoch": 4.44, + "grad_norm": 1.12700355052948, + "learning_rate": 8.721608040201007e-06, + "loss": 0.0137, + "step": 13225 + }, + { + "epoch": 4.45, + "grad_norm": 1.2305530309677124, + "learning_rate": 8.719095477386934e-06, + "loss": 0.012, + "step": 13250 + }, + { + "epoch": 4.46, + "grad_norm": 1.118619680404663, + "learning_rate": 8.716582914572866e-06, + "loss": 0.0127, + "step": 13275 + }, + { + "epoch": 4.47, + "grad_norm": 1.2148356437683105, + "learning_rate": 8.714070351758795e-06, + "loss": 0.0144, + "step": 13300 + }, + { + "epoch": 4.47, + "grad_norm": 1.1302140951156616, + "learning_rate": 8.711557788944724e-06, + "loss": 0.0129, + "step": 13325 + }, + { + "epoch": 4.48, + "grad_norm": 1.2748587131500244, + "learning_rate": 8.709045226130653e-06, + "loss": 0.0126, + "step": 13350 + }, + { + "epoch": 4.49, + "grad_norm": 1.5371756553649902, + "learning_rate": 8.706532663316584e-06, + "loss": 0.0133, + "step": 13375 + }, + { + "epoch": 4.5, + "grad_norm": 1.7364754676818848, + "learning_rate": 8.704020100502514e-06, + "loss": 0.0139, + "step": 13400 + }, + { + "epoch": 4.51, + "grad_norm": 1.1583504676818848, + "learning_rate": 8.701507537688443e-06, + "loss": 0.0139, + "step": 13425 + }, + { + "epoch": 4.52, + "grad_norm": 1.4039942026138306, + "learning_rate": 8.698994974874372e-06, + "loss": 0.0135, + "step": 13450 + }, + { + "epoch": 4.52, + "grad_norm": 1.5972754955291748, + "learning_rate": 8.696482412060302e-06, + "loss": 0.0137, + "step": 13475 + }, + { + "epoch": 4.53, + "grad_norm": 1.2807786464691162, + "learning_rate": 8.693969849246233e-06, + "loss": 0.0114, + "step": 13500 + }, + { + "epoch": 4.54, + "grad_norm": 0.8826859593391418, + "learning_rate": 8.69145728643216e-06, + "loss": 0.0134, + "step": 13525 + }, + { + "epoch": 4.55, + "grad_norm": 1.621842622756958, + "learning_rate": 8.688944723618091e-06, + "loss": 0.0115, + "step": 13550 + }, + { + "epoch": 4.56, + "grad_norm": 1.4587492942810059, + "learning_rate": 8.68643216080402e-06, + "loss": 0.015, + "step": 13575 + }, + { + "epoch": 4.57, + "grad_norm": 1.24857759475708, + "learning_rate": 8.68391959798995e-06, + "loss": 0.0113, + "step": 13600 + }, + { + "epoch": 4.58, + "grad_norm": 1.0602033138275146, + "learning_rate": 8.681407035175881e-06, + "loss": 0.0146, + "step": 13625 + }, + { + "epoch": 4.58, + "grad_norm": 1.8521127700805664, + "learning_rate": 8.67889447236181e-06, + "loss": 0.0136, + "step": 13650 + }, + { + "epoch": 4.59, + "grad_norm": 1.207698106765747, + "learning_rate": 8.67638190954774e-06, + "loss": 0.0123, + "step": 13675 + }, + { + "epoch": 4.6, + "grad_norm": 1.7286370992660522, + "learning_rate": 8.673869346733669e-06, + "loss": 0.0124, + "step": 13700 + }, + { + "epoch": 4.61, + "grad_norm": 1.7518972158432007, + "learning_rate": 8.671356783919598e-06, + "loss": 0.0129, + "step": 13725 + }, + { + "epoch": 4.62, + "grad_norm": 1.3220545053482056, + "learning_rate": 8.668844221105528e-06, + "loss": 0.013, + "step": 13750 + }, + { + "epoch": 4.63, + "grad_norm": 1.5152209997177124, + "learning_rate": 8.666331658291459e-06, + "loss": 0.0139, + "step": 13775 + }, + { + "epoch": 4.63, + "grad_norm": 1.1890395879745483, + "learning_rate": 8.663819095477388e-06, + "loss": 0.0148, + "step": 13800 + }, + { + "epoch": 4.64, + "grad_norm": 1.7012666463851929, + "learning_rate": 8.661306532663317e-06, + "loss": 0.0128, + "step": 13825 + }, + { + "epoch": 4.65, + "grad_norm": 1.5626181364059448, + "learning_rate": 8.658793969849247e-06, + "loss": 0.0131, + "step": 13850 + }, + { + "epoch": 4.66, + "grad_norm": 1.1755900382995605, + "learning_rate": 8.656281407035176e-06, + "loss": 0.0136, + "step": 13875 + }, + { + "epoch": 4.67, + "grad_norm": 1.2013492584228516, + "learning_rate": 8.653768844221107e-06, + "loss": 0.0129, + "step": 13900 + }, + { + "epoch": 4.68, + "grad_norm": 2.473351001739502, + "learning_rate": 8.651256281407036e-06, + "loss": 0.014, + "step": 13925 + }, + { + "epoch": 4.68, + "grad_norm": 1.392483115196228, + "learning_rate": 8.648743718592966e-06, + "loss": 0.0134, + "step": 13950 + }, + { + "epoch": 4.69, + "grad_norm": 1.7546114921569824, + "learning_rate": 8.646231155778895e-06, + "loss": 0.0146, + "step": 13975 + }, + { + "epoch": 4.7, + "grad_norm": 1.5298802852630615, + "learning_rate": 8.643718592964824e-06, + "loss": 0.0135, + "step": 14000 + }, + { + "epoch": 4.7, + "eval_loss": 0.17776234447956085, + "eval_runtime": 1560.5073, + "eval_samples_per_second": 0.961, + "eval_steps_per_second": 0.961, + "eval_wer": 17.553563481467354, + "step": 14000 + } + ], + "logging_steps": 25, + "max_steps": 100000, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 1000, + "total_flos": 1.2926778987773952e+20, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/whisper-small/bhojpuri/checkpoint-14000/training_args.bin b/checkpoints/whisper-small/bhojpuri/checkpoint-14000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e4c2d938c5456d6ce9da72d77c51cf6981fa348f --- /dev/null +++ b/checkpoints/whisper-small/bhojpuri/checkpoint-14000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b6d2fd2f0203c7e1baeb70c95aecb4cce31bbfbe8c15159deef16f53cde30a2 +size 4667 diff --git a/checkpoints/whisper-small/chattisgarhi/checkpoint-40000/config.json b/checkpoints/whisper-small/chattisgarhi/checkpoint-40000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8cb289bfc120ad77c5505b0ef210c56bf35075f5 --- /dev/null +++ b/checkpoints/whisper-small/chattisgarhi/checkpoint-40000/config.json @@ -0,0 +1,152 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50276 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50360, + 50361, + 50362 + ], + "torch_dtype": "float32", + "transformers_version": "4.39.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoints/whisper-small/chattisgarhi/checkpoint-40000/generation_config.json b/checkpoints/whisper-small/chattisgarhi/checkpoint-40000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7e9b1a3e3b5fb8d88730860d2b25f6cd310962c7 --- /dev/null +++ b/checkpoints/whisper-small/chattisgarhi/checkpoint-40000/generation_config.json @@ -0,0 +1,264 @@ +{ + "alignment_heads": [ + [ + 5, + 3 + ], + [ + 5, + 9 + ], + [ + 8, + 0 + ], + [ + 8, + 4 + ], + [ + 8, + 7 + ], + [ + 8, + 8 + ], + [ + 9, + 0 + ], + [ + 9, + 7 + ], + [ + 9, + 9 + ], + [ + 10, + 5 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.39.2" +} diff --git a/checkpoints/whisper-small/chattisgarhi/checkpoint-40000/model.safetensors b/checkpoints/whisper-small/chattisgarhi/checkpoint-40000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..34ce96f5f4999db7bc1e95c239083777631b8fd6 --- /dev/null +++ b/checkpoints/whisper-small/chattisgarhi/checkpoint-40000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aad562c1496b9e831905959017439fc3377ec70b37ffb852673c188a45e97b3e +size 966995080 diff --git a/checkpoints/whisper-small/chattisgarhi/checkpoint-40000/optimizer.pt b/checkpoints/whisper-small/chattisgarhi/checkpoint-40000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e112c3694c80effec14ed2c072e67a0ed70eeb68 --- /dev/null +++ b/checkpoints/whisper-small/chattisgarhi/checkpoint-40000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fc0e0e54ff95d23658ca6f9182c87389346e1f3177caf579998e21cfa202d07 +size 1925063607 diff --git a/checkpoints/whisper-small/chattisgarhi/checkpoint-40000/preprocessor_config.json b/checkpoints/whisper-small/chattisgarhi/checkpoint-40000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoints/whisper-small/chattisgarhi/checkpoint-40000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoints/whisper-small/chattisgarhi/checkpoint-40000/rng_state.pth b/checkpoints/whisper-small/chattisgarhi/checkpoint-40000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..43090fb2c5835c9362eff517d3009b5e52b8c0a5 --- /dev/null +++ b/checkpoints/whisper-small/chattisgarhi/checkpoint-40000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dc46688a93c47db7885b8f72f2302612cbef732fb0495e6653f797be0c427b4 +size 14575 diff --git a/checkpoints/whisper-small/chattisgarhi/checkpoint-40000/scheduler.pt b/checkpoints/whisper-small/chattisgarhi/checkpoint-40000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1c007d6b656c568b48d454490efdc5acf48891ba --- /dev/null +++ b/checkpoints/whisper-small/chattisgarhi/checkpoint-40000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa50bd94cc9c43deec233a2d252d90c2a5316c1e538abcf6c71c17a495ac536f +size 627 diff --git a/checkpoints/whisper-small/chattisgarhi/checkpoint-40000/trainer_state.json b/checkpoints/whisper-small/chattisgarhi/checkpoint-40000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..dd19287cc95fac990ea4dbbfb10b32ac4d5811b6 --- /dev/null +++ b/checkpoints/whisper-small/chattisgarhi/checkpoint-40000/trainer_state.json @@ -0,0 +1,11581 @@ +{ + "best_metric": 11.253667798170417, + "best_model_checkpoint": "results/whisper-small/chattisgarhi/checkpoint-30000", + "epoch": 14.914243102162565, + "eval_steps": 1000, + "global_step": 40000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "grad_norm": 29.867267608642578, + "learning_rate": 4.4e-07, + "loss": 1.8713, + "step": 25 + }, + { + "epoch": 0.02, + "grad_norm": 11.792779922485352, + "learning_rate": 9.400000000000001e-07, + "loss": 1.4815, + "step": 50 + }, + { + "epoch": 0.03, + "grad_norm": 5.747982501983643, + "learning_rate": 1.44e-06, + "loss": 0.9754, + "step": 75 + }, + { + "epoch": 0.04, + "grad_norm": 5.469888687133789, + "learning_rate": 1.94e-06, + "loss": 0.7033, + "step": 100 + }, + { + "epoch": 0.05, + "grad_norm": 4.985886096954346, + "learning_rate": 2.4400000000000004e-06, + "loss": 0.5983, + "step": 125 + }, + { + "epoch": 0.06, + "grad_norm": 4.677158832550049, + "learning_rate": 2.9400000000000002e-06, + "loss": 0.5396, + "step": 150 + }, + { + "epoch": 0.07, + "grad_norm": 4.741590976715088, + "learning_rate": 3.44e-06, + "loss": 0.4766, + "step": 175 + }, + { + "epoch": 0.07, + "grad_norm": 4.4780049324035645, + "learning_rate": 3.94e-06, + "loss": 0.4424, + "step": 200 + }, + { + "epoch": 0.08, + "grad_norm": 4.632696151733398, + "learning_rate": 4.440000000000001e-06, + "loss": 0.3995, + "step": 225 + }, + { + "epoch": 0.09, + "grad_norm": 4.548949718475342, + "learning_rate": 4.94e-06, + "loss": 0.3678, + "step": 250 + }, + { + "epoch": 0.1, + "grad_norm": 4.280113220214844, + "learning_rate": 5.4400000000000004e-06, + "loss": 0.3289, + "step": 275 + }, + { + "epoch": 0.11, + "grad_norm": 4.342599868774414, + "learning_rate": 5.94e-06, + "loss": 0.3009, + "step": 300 + }, + { + "epoch": 0.12, + "grad_norm": 3.1357805728912354, + "learning_rate": 6.440000000000001e-06, + "loss": 0.2491, + "step": 325 + }, + { + "epoch": 0.13, + "grad_norm": 2.8807430267333984, + "learning_rate": 6.9400000000000005e-06, + "loss": 0.2264, + "step": 350 + }, + { + "epoch": 0.14, + "grad_norm": 3.366670608520508, + "learning_rate": 7.440000000000001e-06, + "loss": 0.219, + "step": 375 + }, + { + "epoch": 0.15, + "grad_norm": 2.921708583831787, + "learning_rate": 7.94e-06, + "loss": 0.216, + "step": 400 + }, + { + "epoch": 0.16, + "grad_norm": 3.259023666381836, + "learning_rate": 8.44e-06, + "loss": 0.2026, + "step": 425 + }, + { + "epoch": 0.17, + "grad_norm": 3.651909589767456, + "learning_rate": 8.94e-06, + "loss": 0.1975, + "step": 450 + }, + { + "epoch": 0.18, + "grad_norm": 3.0628418922424316, + "learning_rate": 9.440000000000001e-06, + "loss": 0.197, + "step": 475 + }, + { + "epoch": 0.19, + "grad_norm": 2.526923418045044, + "learning_rate": 9.940000000000001e-06, + "loss": 0.1768, + "step": 500 + }, + { + "epoch": 0.2, + "grad_norm": 2.7128798961639404, + "learning_rate": 9.997788944723618e-06, + "loss": 0.1854, + "step": 525 + }, + { + "epoch": 0.21, + "grad_norm": 3.0416858196258545, + "learning_rate": 9.99527638190955e-06, + "loss": 0.1872, + "step": 550 + }, + { + "epoch": 0.21, + "grad_norm": 2.914285659790039, + "learning_rate": 9.992763819095477e-06, + "loss": 0.1766, + "step": 575 + }, + { + "epoch": 0.22, + "grad_norm": 3.1836862564086914, + "learning_rate": 9.990251256281408e-06, + "loss": 0.1667, + "step": 600 + }, + { + "epoch": 0.23, + "grad_norm": 2.986923933029175, + "learning_rate": 9.987738693467337e-06, + "loss": 0.1707, + "step": 625 + }, + { + "epoch": 0.24, + "grad_norm": 2.357895612716675, + "learning_rate": 9.985226130653267e-06, + "loss": 0.1731, + "step": 650 + }, + { + "epoch": 0.25, + "grad_norm": 2.406376838684082, + "learning_rate": 9.982713567839198e-06, + "loss": 0.1621, + "step": 675 + }, + { + "epoch": 0.26, + "grad_norm": 2.287628412246704, + "learning_rate": 9.980201005025127e-06, + "loss": 0.157, + "step": 700 + }, + { + "epoch": 0.27, + "grad_norm": 2.620927095413208, + "learning_rate": 9.977688442211056e-06, + "loss": 0.1552, + "step": 725 + }, + { + "epoch": 0.28, + "grad_norm": 2.593569755554199, + "learning_rate": 9.975175879396986e-06, + "loss": 0.1499, + "step": 750 + }, + { + "epoch": 0.29, + "grad_norm": 2.5778236389160156, + "learning_rate": 9.972663316582915e-06, + "loss": 0.148, + "step": 775 + }, + { + "epoch": 0.3, + "grad_norm": 2.575634002685547, + "learning_rate": 9.970150753768844e-06, + "loss": 0.1477, + "step": 800 + }, + { + "epoch": 0.31, + "grad_norm": 2.444276809692383, + "learning_rate": 9.967638190954775e-06, + "loss": 0.1491, + "step": 825 + }, + { + "epoch": 0.32, + "grad_norm": 2.2763702869415283, + "learning_rate": 9.965125628140703e-06, + "loss": 0.1457, + "step": 850 + }, + { + "epoch": 0.33, + "grad_norm": 2.4443540573120117, + "learning_rate": 9.962613065326634e-06, + "loss": 0.1456, + "step": 875 + }, + { + "epoch": 0.34, + "grad_norm": 2.275012969970703, + "learning_rate": 9.960100502512563e-06, + "loss": 0.144, + "step": 900 + }, + { + "epoch": 0.34, + "grad_norm": 2.4789230823516846, + "learning_rate": 9.957587939698493e-06, + "loss": 0.1426, + "step": 925 + }, + { + "epoch": 0.35, + "grad_norm": 2.470465660095215, + "learning_rate": 9.955075376884424e-06, + "loss": 0.1377, + "step": 950 + }, + { + "epoch": 0.36, + "grad_norm": 2.1992318630218506, + "learning_rate": 9.952562814070353e-06, + "loss": 0.1337, + "step": 975 + }, + { + "epoch": 0.37, + "grad_norm": 2.2264459133148193, + "learning_rate": 9.950050251256282e-06, + "loss": 0.1347, + "step": 1000 + }, + { + "epoch": 0.37, + "eval_loss": 0.1150127574801445, + "eval_runtime": 1211.9745, + "eval_samples_per_second": 1.166, + "eval_steps_per_second": 1.166, + "eval_wer": 16.167078994304127, + "step": 1000 + }, + { + "epoch": 0.38, + "grad_norm": 2.2867746353149414, + "learning_rate": 9.947537688442212e-06, + "loss": 0.1286, + "step": 1025 + }, + { + "epoch": 0.39, + "grad_norm": 2.5446059703826904, + "learning_rate": 9.945025125628141e-06, + "loss": 0.1366, + "step": 1050 + }, + { + "epoch": 0.4, + "grad_norm": 2.3198916912078857, + "learning_rate": 9.94251256281407e-06, + "loss": 0.1287, + "step": 1075 + }, + { + "epoch": 0.41, + "grad_norm": 2.601541519165039, + "learning_rate": 9.940000000000001e-06, + "loss": 0.1314, + "step": 1100 + }, + { + "epoch": 0.42, + "grad_norm": 2.16617488861084, + "learning_rate": 9.93748743718593e-06, + "loss": 0.123, + "step": 1125 + }, + { + "epoch": 0.43, + "grad_norm": 2.2360217571258545, + "learning_rate": 9.93497487437186e-06, + "loss": 0.1298, + "step": 1150 + }, + { + "epoch": 0.44, + "grad_norm": 2.3415329456329346, + "learning_rate": 9.93246231155779e-06, + "loss": 0.1198, + "step": 1175 + }, + { + "epoch": 0.45, + "grad_norm": 2.277651071548462, + "learning_rate": 9.929949748743719e-06, + "loss": 0.1318, + "step": 1200 + }, + { + "epoch": 0.46, + "grad_norm": 2.3392574787139893, + "learning_rate": 9.92743718592965e-06, + "loss": 0.1291, + "step": 1225 + }, + { + "epoch": 0.47, + "grad_norm": 2.3850834369659424, + "learning_rate": 9.924924623115579e-06, + "loss": 0.1319, + "step": 1250 + }, + { + "epoch": 0.48, + "grad_norm": 2.419739007949829, + "learning_rate": 9.922412060301508e-06, + "loss": 0.1205, + "step": 1275 + }, + { + "epoch": 0.48, + "grad_norm": 1.876503825187683, + "learning_rate": 9.91989949748744e-06, + "loss": 0.1203, + "step": 1300 + }, + { + "epoch": 0.49, + "grad_norm": 1.940808892250061, + "learning_rate": 9.917386934673367e-06, + "loss": 0.1206, + "step": 1325 + }, + { + "epoch": 0.5, + "grad_norm": 2.0517351627349854, + "learning_rate": 9.914874371859298e-06, + "loss": 0.1207, + "step": 1350 + }, + { + "epoch": 0.51, + "grad_norm": 2.0331389904022217, + "learning_rate": 9.912361809045227e-06, + "loss": 0.1206, + "step": 1375 + }, + { + "epoch": 0.52, + "grad_norm": 2.3498997688293457, + "learning_rate": 9.909849246231157e-06, + "loss": 0.1166, + "step": 1400 + }, + { + "epoch": 0.53, + "grad_norm": 2.2645528316497803, + "learning_rate": 9.907336683417086e-06, + "loss": 0.1187, + "step": 1425 + }, + { + "epoch": 0.54, + "grad_norm": 2.4206032752990723, + "learning_rate": 9.904824120603015e-06, + "loss": 0.1106, + "step": 1450 + }, + { + "epoch": 0.55, + "grad_norm": 2.1145567893981934, + "learning_rate": 9.902311557788945e-06, + "loss": 0.1162, + "step": 1475 + }, + { + "epoch": 0.56, + "grad_norm": 2.3337152004241943, + "learning_rate": 9.899798994974876e-06, + "loss": 0.1137, + "step": 1500 + }, + { + "epoch": 0.57, + "grad_norm": 1.8306084871292114, + "learning_rate": 9.897286432160805e-06, + "loss": 0.1093, + "step": 1525 + }, + { + "epoch": 0.58, + "grad_norm": 2.080470561981201, + "learning_rate": 9.894773869346734e-06, + "loss": 0.1164, + "step": 1550 + }, + { + "epoch": 0.59, + "grad_norm": 1.9600608348846436, + "learning_rate": 9.892261306532665e-06, + "loss": 0.1136, + "step": 1575 + }, + { + "epoch": 0.6, + "grad_norm": 1.8445073366165161, + "learning_rate": 9.889748743718593e-06, + "loss": 0.1103, + "step": 1600 + }, + { + "epoch": 0.61, + "grad_norm": 1.865199089050293, + "learning_rate": 9.887236180904524e-06, + "loss": 0.1121, + "step": 1625 + }, + { + "epoch": 0.62, + "grad_norm": 2.2303030490875244, + "learning_rate": 9.884723618090453e-06, + "loss": 0.1161, + "step": 1650 + }, + { + "epoch": 0.62, + "grad_norm": 2.2664577960968018, + "learning_rate": 9.882211055276383e-06, + "loss": 0.1061, + "step": 1675 + }, + { + "epoch": 0.63, + "grad_norm": 1.9744682312011719, + "learning_rate": 9.879698492462312e-06, + "loss": 0.104, + "step": 1700 + }, + { + "epoch": 0.64, + "grad_norm": 2.127598524093628, + "learning_rate": 9.877185929648241e-06, + "loss": 0.1097, + "step": 1725 + }, + { + "epoch": 0.65, + "grad_norm": 1.872571349143982, + "learning_rate": 9.874673366834172e-06, + "loss": 0.1042, + "step": 1750 + }, + { + "epoch": 0.66, + "grad_norm": 1.8354071378707886, + "learning_rate": 9.872160804020102e-06, + "loss": 0.1042, + "step": 1775 + }, + { + "epoch": 0.67, + "grad_norm": 1.808335542678833, + "learning_rate": 9.869648241206031e-06, + "loss": 0.1073, + "step": 1800 + }, + { + "epoch": 0.68, + "grad_norm": 2.1076080799102783, + "learning_rate": 9.86713567839196e-06, + "loss": 0.1044, + "step": 1825 + }, + { + "epoch": 0.69, + "grad_norm": 1.936588168144226, + "learning_rate": 9.864623115577891e-06, + "loss": 0.1007, + "step": 1850 + }, + { + "epoch": 0.7, + "grad_norm": 1.9783082008361816, + "learning_rate": 9.862110552763819e-06, + "loss": 0.1047, + "step": 1875 + }, + { + "epoch": 0.71, + "grad_norm": 1.8980565071105957, + "learning_rate": 9.85959798994975e-06, + "loss": 0.1034, + "step": 1900 + }, + { + "epoch": 0.72, + "grad_norm": 2.1652116775512695, + "learning_rate": 9.85708542713568e-06, + "loss": 0.1036, + "step": 1925 + }, + { + "epoch": 0.73, + "grad_norm": 2.120558977127075, + "learning_rate": 9.854572864321609e-06, + "loss": 0.1019, + "step": 1950 + }, + { + "epoch": 0.74, + "grad_norm": 1.9562137126922607, + "learning_rate": 9.85206030150754e-06, + "loss": 0.0952, + "step": 1975 + }, + { + "epoch": 0.75, + "grad_norm": 1.9723517894744873, + "learning_rate": 9.849547738693467e-06, + "loss": 0.1021, + "step": 2000 + }, + { + "epoch": 0.75, + "eval_loss": 0.08948463946580887, + "eval_runtime": 1176.6742, + "eval_samples_per_second": 1.201, + "eval_steps_per_second": 1.201, + "eval_wer": 13.36516886255106, + "step": 2000 + }, + { + "epoch": 0.76, + "grad_norm": 2.0531466007232666, + "learning_rate": 9.847035175879398e-06, + "loss": 0.1001, + "step": 2025 + }, + { + "epoch": 0.76, + "grad_norm": 1.9982943534851074, + "learning_rate": 9.844522613065328e-06, + "loss": 0.0998, + "step": 2050 + }, + { + "epoch": 0.77, + "grad_norm": 1.817077875137329, + "learning_rate": 9.842010050251257e-06, + "loss": 0.1053, + "step": 2075 + }, + { + "epoch": 0.78, + "grad_norm": 2.0366580486297607, + "learning_rate": 9.839497487437186e-06, + "loss": 0.0947, + "step": 2100 + }, + { + "epoch": 0.79, + "grad_norm": 1.8738093376159668, + "learning_rate": 9.836984924623117e-06, + "loss": 0.0977, + "step": 2125 + }, + { + "epoch": 0.8, + "grad_norm": 1.6907607316970825, + "learning_rate": 9.834472361809047e-06, + "loss": 0.1041, + "step": 2150 + }, + { + "epoch": 0.81, + "grad_norm": 1.742984414100647, + "learning_rate": 9.831959798994976e-06, + "loss": 0.1017, + "step": 2175 + }, + { + "epoch": 0.82, + "grad_norm": 1.9395456314086914, + "learning_rate": 9.829447236180905e-06, + "loss": 0.0946, + "step": 2200 + }, + { + "epoch": 0.83, + "grad_norm": 2.4414660930633545, + "learning_rate": 9.826934673366834e-06, + "loss": 0.0997, + "step": 2225 + }, + { + "epoch": 0.84, + "grad_norm": 1.7961848974227905, + "learning_rate": 9.824422110552766e-06, + "loss": 0.0969, + "step": 2250 + }, + { + "epoch": 0.85, + "grad_norm": 2.0520858764648438, + "learning_rate": 9.821909547738693e-06, + "loss": 0.0975, + "step": 2275 + }, + { + "epoch": 0.86, + "grad_norm": 2.136613130569458, + "learning_rate": 9.819396984924624e-06, + "loss": 0.0989, + "step": 2300 + }, + { + "epoch": 0.87, + "grad_norm": 2.3308963775634766, + "learning_rate": 9.816884422110553e-06, + "loss": 0.0978, + "step": 2325 + }, + { + "epoch": 0.88, + "grad_norm": 2.2686049938201904, + "learning_rate": 9.814371859296483e-06, + "loss": 0.0996, + "step": 2350 + }, + { + "epoch": 0.89, + "grad_norm": 2.1487512588500977, + "learning_rate": 9.811859296482414e-06, + "loss": 0.0966, + "step": 2375 + }, + { + "epoch": 0.89, + "grad_norm": 2.4512054920196533, + "learning_rate": 9.809346733668343e-06, + "loss": 0.0918, + "step": 2400 + }, + { + "epoch": 0.9, + "grad_norm": 1.9804916381835938, + "learning_rate": 9.806834170854272e-06, + "loss": 0.0988, + "step": 2425 + }, + { + "epoch": 0.91, + "grad_norm": 2.1110246181488037, + "learning_rate": 9.804321608040202e-06, + "loss": 0.0952, + "step": 2450 + }, + { + "epoch": 0.92, + "grad_norm": 1.9800487756729126, + "learning_rate": 9.801809045226131e-06, + "loss": 0.0933, + "step": 2475 + }, + { + "epoch": 0.93, + "grad_norm": 1.8126370906829834, + "learning_rate": 9.79929648241206e-06, + "loss": 0.0907, + "step": 2500 + }, + { + "epoch": 0.94, + "grad_norm": 2.254657030105591, + "learning_rate": 9.796783919597991e-06, + "loss": 0.0861, + "step": 2525 + }, + { + "epoch": 0.95, + "grad_norm": 2.068006753921509, + "learning_rate": 9.79427135678392e-06, + "loss": 0.0953, + "step": 2550 + }, + { + "epoch": 0.96, + "grad_norm": 2.031114101409912, + "learning_rate": 9.79175879396985e-06, + "loss": 0.0879, + "step": 2575 + }, + { + "epoch": 0.97, + "grad_norm": 1.8973454236984253, + "learning_rate": 9.78924623115578e-06, + "loss": 0.0982, + "step": 2600 + }, + { + "epoch": 0.98, + "grad_norm": 1.7601672410964966, + "learning_rate": 9.786733668341709e-06, + "loss": 0.0878, + "step": 2625 + }, + { + "epoch": 0.99, + "grad_norm": 1.4346294403076172, + "learning_rate": 9.78422110552764e-06, + "loss": 0.0924, + "step": 2650 + }, + { + "epoch": 1.0, + "grad_norm": 1.7578094005584717, + "learning_rate": 9.781708542713569e-06, + "loss": 0.0876, + "step": 2675 + }, + { + "epoch": 1.01, + "grad_norm": 1.4593627452850342, + "learning_rate": 9.779195979899498e-06, + "loss": 0.0747, + "step": 2700 + }, + { + "epoch": 1.02, + "grad_norm": 1.8549084663391113, + "learning_rate": 9.776683417085428e-06, + "loss": 0.0667, + "step": 2725 + }, + { + "epoch": 1.03, + "grad_norm": 1.4419506788253784, + "learning_rate": 9.774170854271357e-06, + "loss": 0.0727, + "step": 2750 + }, + { + "epoch": 1.03, + "grad_norm": 1.6193095445632935, + "learning_rate": 9.771658291457288e-06, + "loss": 0.0631, + "step": 2775 + }, + { + "epoch": 1.04, + "grad_norm": 1.7114448547363281, + "learning_rate": 9.769145728643217e-06, + "loss": 0.069, + "step": 2800 + }, + { + "epoch": 1.05, + "grad_norm": 1.8174458742141724, + "learning_rate": 9.766633165829147e-06, + "loss": 0.0686, + "step": 2825 + }, + { + "epoch": 1.06, + "grad_norm": 1.555214285850525, + "learning_rate": 9.764120603015076e-06, + "loss": 0.0695, + "step": 2850 + }, + { + "epoch": 1.07, + "grad_norm": 1.3829988241195679, + "learning_rate": 9.761608040201005e-06, + "loss": 0.0674, + "step": 2875 + }, + { + "epoch": 1.08, + "grad_norm": 1.4402384757995605, + "learning_rate": 9.759095477386935e-06, + "loss": 0.0678, + "step": 2900 + }, + { + "epoch": 1.09, + "grad_norm": 1.4730218648910522, + "learning_rate": 9.756582914572866e-06, + "loss": 0.069, + "step": 2925 + }, + { + "epoch": 1.1, + "grad_norm": 1.8429930210113525, + "learning_rate": 9.754070351758795e-06, + "loss": 0.0675, + "step": 2950 + }, + { + "epoch": 1.11, + "grad_norm": 1.5672721862792969, + "learning_rate": 9.751557788944724e-06, + "loss": 0.0662, + "step": 2975 + }, + { + "epoch": 1.12, + "grad_norm": 1.5485836267471313, + "learning_rate": 9.749045226130654e-06, + "loss": 0.0694, + "step": 3000 + }, + { + "epoch": 1.12, + "eval_loss": 0.08470375090837479, + "eval_runtime": 1183.0561, + "eval_samples_per_second": 1.194, + "eval_steps_per_second": 1.194, + "eval_wer": 13.278867729129509, + "step": 3000 + }, + { + "epoch": 1.13, + "grad_norm": 2.078941583633423, + "learning_rate": 9.746532663316583e-06, + "loss": 0.0707, + "step": 3025 + }, + { + "epoch": 1.14, + "grad_norm": 1.9515862464904785, + "learning_rate": 9.744020100502514e-06, + "loss": 0.0658, + "step": 3050 + }, + { + "epoch": 1.15, + "grad_norm": 1.7054415941238403, + "learning_rate": 9.741507537688443e-06, + "loss": 0.0684, + "step": 3075 + }, + { + "epoch": 1.16, + "grad_norm": 1.8182621002197266, + "learning_rate": 9.738994974874373e-06, + "loss": 0.0667, + "step": 3100 + }, + { + "epoch": 1.17, + "grad_norm": 1.6176235675811768, + "learning_rate": 9.736482412060302e-06, + "loss": 0.0674, + "step": 3125 + }, + { + "epoch": 1.17, + "grad_norm": 1.7387104034423828, + "learning_rate": 9.733969849246231e-06, + "loss": 0.0655, + "step": 3150 + }, + { + "epoch": 1.18, + "grad_norm": 1.6148914098739624, + "learning_rate": 9.731457286432162e-06, + "loss": 0.0689, + "step": 3175 + }, + { + "epoch": 1.19, + "grad_norm": 1.6195796728134155, + "learning_rate": 9.728944723618092e-06, + "loss": 0.0664, + "step": 3200 + }, + { + "epoch": 1.2, + "grad_norm": 1.7952879667282104, + "learning_rate": 9.726432160804021e-06, + "loss": 0.066, + "step": 3225 + }, + { + "epoch": 1.21, + "grad_norm": 1.4540854692459106, + "learning_rate": 9.72391959798995e-06, + "loss": 0.0641, + "step": 3250 + }, + { + "epoch": 1.22, + "grad_norm": 1.5699576139450073, + "learning_rate": 9.721407035175881e-06, + "loss": 0.0661, + "step": 3275 + }, + { + "epoch": 1.23, + "grad_norm": 1.6809080839157104, + "learning_rate": 9.718894472361809e-06, + "loss": 0.0676, + "step": 3300 + }, + { + "epoch": 1.24, + "grad_norm": 1.9932605028152466, + "learning_rate": 9.71638190954774e-06, + "loss": 0.0641, + "step": 3325 + }, + { + "epoch": 1.25, + "grad_norm": 1.6121385097503662, + "learning_rate": 9.71386934673367e-06, + "loss": 0.0629, + "step": 3350 + }, + { + "epoch": 1.26, + "grad_norm": 1.554274559020996, + "learning_rate": 9.711356783919599e-06, + "loss": 0.0666, + "step": 3375 + }, + { + "epoch": 1.27, + "grad_norm": 1.8960884809494019, + "learning_rate": 9.70884422110553e-06, + "loss": 0.0637, + "step": 3400 + }, + { + "epoch": 1.28, + "grad_norm": 1.7959742546081543, + "learning_rate": 9.706331658291457e-06, + "loss": 0.0646, + "step": 3425 + }, + { + "epoch": 1.29, + "grad_norm": 1.7671433687210083, + "learning_rate": 9.703819095477388e-06, + "loss": 0.0659, + "step": 3450 + }, + { + "epoch": 1.3, + "grad_norm": 1.9180034399032593, + "learning_rate": 9.701306532663318e-06, + "loss": 0.0628, + "step": 3475 + }, + { + "epoch": 1.3, + "grad_norm": 1.9972532987594604, + "learning_rate": 9.698793969849247e-06, + "loss": 0.0647, + "step": 3500 + }, + { + "epoch": 1.31, + "grad_norm": 2.001946210861206, + "learning_rate": 9.696281407035176e-06, + "loss": 0.0655, + "step": 3525 + }, + { + "epoch": 1.32, + "grad_norm": 1.7249242067337036, + "learning_rate": 9.693768844221107e-06, + "loss": 0.0656, + "step": 3550 + }, + { + "epoch": 1.33, + "grad_norm": 1.5508204698562622, + "learning_rate": 9.691256281407035e-06, + "loss": 0.0654, + "step": 3575 + }, + { + "epoch": 1.34, + "grad_norm": 1.5721642971038818, + "learning_rate": 9.688743718592966e-06, + "loss": 0.063, + "step": 3600 + }, + { + "epoch": 1.35, + "grad_norm": 1.9839472770690918, + "learning_rate": 9.686231155778895e-06, + "loss": 0.0639, + "step": 3625 + }, + { + "epoch": 1.36, + "grad_norm": 1.6074389219284058, + "learning_rate": 9.683718592964825e-06, + "loss": 0.0618, + "step": 3650 + }, + { + "epoch": 1.37, + "grad_norm": 1.2145272493362427, + "learning_rate": 9.681206030150756e-06, + "loss": 0.0615, + "step": 3675 + }, + { + "epoch": 1.38, + "grad_norm": 1.8397221565246582, + "learning_rate": 9.678693467336683e-06, + "loss": 0.0603, + "step": 3700 + }, + { + "epoch": 1.39, + "grad_norm": 1.5587263107299805, + "learning_rate": 9.676180904522614e-06, + "loss": 0.0607, + "step": 3725 + }, + { + "epoch": 1.4, + "grad_norm": 1.448466181755066, + "learning_rate": 9.673668341708544e-06, + "loss": 0.062, + "step": 3750 + }, + { + "epoch": 1.41, + "grad_norm": 1.5480372905731201, + "learning_rate": 9.671155778894473e-06, + "loss": 0.0622, + "step": 3775 + }, + { + "epoch": 1.42, + "grad_norm": 2.0672342777252197, + "learning_rate": 9.668643216080404e-06, + "loss": 0.0614, + "step": 3800 + }, + { + "epoch": 1.43, + "grad_norm": 1.6079210042953491, + "learning_rate": 9.666130653266333e-06, + "loss": 0.0623, + "step": 3825 + }, + { + "epoch": 1.44, + "grad_norm": 1.6762934923171997, + "learning_rate": 9.663618090452263e-06, + "loss": 0.0656, + "step": 3850 + }, + { + "epoch": 1.44, + "grad_norm": 1.6482523679733276, + "learning_rate": 9.661105527638192e-06, + "loss": 0.0611, + "step": 3875 + }, + { + "epoch": 1.45, + "grad_norm": 1.5271735191345215, + "learning_rate": 9.658592964824121e-06, + "loss": 0.0591, + "step": 3900 + }, + { + "epoch": 1.46, + "grad_norm": 1.5635854005813599, + "learning_rate": 9.65608040201005e-06, + "loss": 0.0633, + "step": 3925 + }, + { + "epoch": 1.47, + "grad_norm": 1.6505290269851685, + "learning_rate": 9.653567839195982e-06, + "loss": 0.0593, + "step": 3950 + }, + { + "epoch": 1.48, + "grad_norm": 1.6435421705245972, + "learning_rate": 9.651055276381909e-06, + "loss": 0.0619, + "step": 3975 + }, + { + "epoch": 1.49, + "grad_norm": 1.5520799160003662, + "learning_rate": 9.64854271356784e-06, + "loss": 0.0644, + "step": 4000 + }, + { + "epoch": 1.49, + "eval_loss": 0.08286147564649582, + "eval_runtime": 1179.36, + "eval_samples_per_second": 1.198, + "eval_steps_per_second": 1.198, + "eval_wer": 12.525171163914619, + "step": 4000 + }, + { + "epoch": 1.5, + "grad_norm": 1.597989559173584, + "learning_rate": 9.64603015075377e-06, + "loss": 0.0633, + "step": 4025 + }, + { + "epoch": 1.51, + "grad_norm": 1.8812222480773926, + "learning_rate": 9.643517587939699e-06, + "loss": 0.0637, + "step": 4050 + }, + { + "epoch": 1.52, + "grad_norm": 1.6954314708709717, + "learning_rate": 9.64100502512563e-06, + "loss": 0.0637, + "step": 4075 + }, + { + "epoch": 1.53, + "grad_norm": 1.7041165828704834, + "learning_rate": 9.638492462311559e-06, + "loss": 0.0553, + "step": 4100 + }, + { + "epoch": 1.54, + "grad_norm": 2.0794553756713867, + "learning_rate": 9.635979899497488e-06, + "loss": 0.0639, + "step": 4125 + }, + { + "epoch": 1.55, + "grad_norm": 1.3547425270080566, + "learning_rate": 9.633467336683418e-06, + "loss": 0.0591, + "step": 4150 + }, + { + "epoch": 1.56, + "grad_norm": 1.6858813762664795, + "learning_rate": 9.630954773869347e-06, + "loss": 0.0604, + "step": 4175 + }, + { + "epoch": 1.57, + "grad_norm": 1.7081996202468872, + "learning_rate": 9.628442211055276e-06, + "loss": 0.0609, + "step": 4200 + }, + { + "epoch": 1.58, + "grad_norm": 1.4670168161392212, + "learning_rate": 9.625929648241207e-06, + "loss": 0.0577, + "step": 4225 + }, + { + "epoch": 1.58, + "grad_norm": 1.8474513292312622, + "learning_rate": 9.623417085427137e-06, + "loss": 0.0596, + "step": 4250 + }, + { + "epoch": 1.59, + "grad_norm": 1.7979737520217896, + "learning_rate": 9.620904522613066e-06, + "loss": 0.0574, + "step": 4275 + }, + { + "epoch": 1.6, + "grad_norm": 1.5447591543197632, + "learning_rate": 9.618391959798995e-06, + "loss": 0.0557, + "step": 4300 + }, + { + "epoch": 1.61, + "grad_norm": 1.766141414642334, + "learning_rate": 9.615879396984925e-06, + "loss": 0.0633, + "step": 4325 + }, + { + "epoch": 1.62, + "grad_norm": 1.5890923738479614, + "learning_rate": 9.613366834170856e-06, + "loss": 0.0553, + "step": 4350 + }, + { + "epoch": 1.63, + "grad_norm": 1.620233416557312, + "learning_rate": 9.610854271356785e-06, + "loss": 0.0583, + "step": 4375 + }, + { + "epoch": 1.64, + "grad_norm": 1.8590590953826904, + "learning_rate": 9.608341708542714e-06, + "loss": 0.0559, + "step": 4400 + }, + { + "epoch": 1.65, + "grad_norm": 1.7164863348007202, + "learning_rate": 9.605829145728644e-06, + "loss": 0.0575, + "step": 4425 + }, + { + "epoch": 1.66, + "grad_norm": 1.6905485391616821, + "learning_rate": 9.603316582914573e-06, + "loss": 0.0587, + "step": 4450 + }, + { + "epoch": 1.67, + "grad_norm": 1.7918930053710938, + "learning_rate": 9.600804020100504e-06, + "loss": 0.054, + "step": 4475 + }, + { + "epoch": 1.68, + "grad_norm": 1.408998966217041, + "learning_rate": 9.598291457286433e-06, + "loss": 0.0575, + "step": 4500 + }, + { + "epoch": 1.69, + "grad_norm": 1.7751456499099731, + "learning_rate": 9.595778894472363e-06, + "loss": 0.0573, + "step": 4525 + }, + { + "epoch": 1.7, + "grad_norm": 1.3494666814804077, + "learning_rate": 9.593266331658292e-06, + "loss": 0.0564, + "step": 4550 + }, + { + "epoch": 1.71, + "grad_norm": 1.4382890462875366, + "learning_rate": 9.590753768844221e-06, + "loss": 0.0563, + "step": 4575 + }, + { + "epoch": 1.72, + "grad_norm": 1.2565054893493652, + "learning_rate": 9.58824120603015e-06, + "loss": 0.0538, + "step": 4600 + }, + { + "epoch": 1.72, + "grad_norm": 1.5751371383666992, + "learning_rate": 9.585728643216082e-06, + "loss": 0.0537, + "step": 4625 + }, + { + "epoch": 1.73, + "grad_norm": 1.5151078701019287, + "learning_rate": 9.583216080402011e-06, + "loss": 0.0593, + "step": 4650 + }, + { + "epoch": 1.74, + "grad_norm": 1.633866786956787, + "learning_rate": 9.58070351758794e-06, + "loss": 0.0526, + "step": 4675 + }, + { + "epoch": 1.75, + "grad_norm": 1.6772267818450928, + "learning_rate": 9.57819095477387e-06, + "loss": 0.0561, + "step": 4700 + }, + { + "epoch": 1.76, + "grad_norm": 2.041039228439331, + "learning_rate": 9.575678391959799e-06, + "loss": 0.0565, + "step": 4725 + }, + { + "epoch": 1.77, + "grad_norm": 1.6063660383224487, + "learning_rate": 9.57316582914573e-06, + "loss": 0.0594, + "step": 4750 + }, + { + "epoch": 1.78, + "grad_norm": 1.5024564266204834, + "learning_rate": 9.57065326633166e-06, + "loss": 0.0569, + "step": 4775 + }, + { + "epoch": 1.79, + "grad_norm": 1.7944880723953247, + "learning_rate": 9.568140703517589e-06, + "loss": 0.0534, + "step": 4800 + }, + { + "epoch": 1.8, + "grad_norm": 1.4228626489639282, + "learning_rate": 9.565628140703518e-06, + "loss": 0.0547, + "step": 4825 + }, + { + "epoch": 1.81, + "grad_norm": 1.7011035680770874, + "learning_rate": 9.563115577889447e-06, + "loss": 0.0517, + "step": 4850 + }, + { + "epoch": 1.82, + "grad_norm": 1.618829607963562, + "learning_rate": 9.560603015075378e-06, + "loss": 0.0531, + "step": 4875 + }, + { + "epoch": 1.83, + "grad_norm": 1.5317025184631348, + "learning_rate": 9.558090452261308e-06, + "loss": 0.0574, + "step": 4900 + }, + { + "epoch": 1.84, + "grad_norm": 1.9741162061691284, + "learning_rate": 9.555577889447237e-06, + "loss": 0.0576, + "step": 4925 + }, + { + "epoch": 1.85, + "grad_norm": 1.5694513320922852, + "learning_rate": 9.553065326633166e-06, + "loss": 0.0558, + "step": 4950 + }, + { + "epoch": 1.85, + "grad_norm": 1.3951575756072998, + "learning_rate": 9.550552763819096e-06, + "loss": 0.0545, + "step": 4975 + }, + { + "epoch": 1.86, + "grad_norm": 1.7064149379730225, + "learning_rate": 9.548040201005025e-06, + "loss": 0.0522, + "step": 5000 + }, + { + "epoch": 1.86, + "eval_loss": 0.08269327133893967, + "eval_runtime": 1173.3188, + "eval_samples_per_second": 1.204, + "eval_steps_per_second": 1.204, + "eval_wer": 13.871468845290835, + "step": 5000 + }, + { + "epoch": 1.87, + "grad_norm": 1.566739797592163, + "learning_rate": 9.545527638190956e-06, + "loss": 0.0562, + "step": 5025 + }, + { + "epoch": 1.88, + "grad_norm": 1.4498807191848755, + "learning_rate": 9.543015075376885e-06, + "loss": 0.051, + "step": 5050 + }, + { + "epoch": 1.89, + "grad_norm": 1.31383216381073, + "learning_rate": 9.540502512562815e-06, + "loss": 0.0527, + "step": 5075 + }, + { + "epoch": 1.9, + "grad_norm": 1.9847214221954346, + "learning_rate": 9.537989949748746e-06, + "loss": 0.0539, + "step": 5100 + }, + { + "epoch": 1.91, + "grad_norm": 1.351511836051941, + "learning_rate": 9.535477386934673e-06, + "loss": 0.0507, + "step": 5125 + }, + { + "epoch": 1.92, + "grad_norm": 1.414528727531433, + "learning_rate": 9.532964824120604e-06, + "loss": 0.0527, + "step": 5150 + }, + { + "epoch": 1.93, + "grad_norm": 1.6517146825790405, + "learning_rate": 9.530452261306534e-06, + "loss": 0.0525, + "step": 5175 + }, + { + "epoch": 1.94, + "grad_norm": 2.218883514404297, + "learning_rate": 9.527939698492463e-06, + "loss": 0.0566, + "step": 5200 + }, + { + "epoch": 1.95, + "grad_norm": 1.366365671157837, + "learning_rate": 9.525427135678392e-06, + "loss": 0.0505, + "step": 5225 + }, + { + "epoch": 1.96, + "grad_norm": 1.3672804832458496, + "learning_rate": 9.522914572864322e-06, + "loss": 0.0501, + "step": 5250 + }, + { + "epoch": 1.97, + "grad_norm": 1.6399633884429932, + "learning_rate": 9.520402010050253e-06, + "loss": 0.0522, + "step": 5275 + }, + { + "epoch": 1.98, + "grad_norm": 1.4472440481185913, + "learning_rate": 9.517889447236182e-06, + "loss": 0.0496, + "step": 5300 + }, + { + "epoch": 1.99, + "grad_norm": 1.3367663621902466, + "learning_rate": 9.515376884422111e-06, + "loss": 0.0491, + "step": 5325 + }, + { + "epoch": 1.99, + "grad_norm": 1.4397846460342407, + "learning_rate": 9.51286432160804e-06, + "loss": 0.0545, + "step": 5350 + }, + { + "epoch": 2.0, + "grad_norm": 1.2568572759628296, + "learning_rate": 9.510351758793972e-06, + "loss": 0.0433, + "step": 5375 + }, + { + "epoch": 2.01, + "grad_norm": 1.2722961902618408, + "learning_rate": 9.5078391959799e-06, + "loss": 0.0293, + "step": 5400 + }, + { + "epoch": 2.02, + "grad_norm": 1.2798501253128052, + "learning_rate": 9.50532663316583e-06, + "loss": 0.036, + "step": 5425 + }, + { + "epoch": 2.03, + "grad_norm": 1.2123713493347168, + "learning_rate": 9.50281407035176e-06, + "loss": 0.0325, + "step": 5450 + }, + { + "epoch": 2.04, + "grad_norm": 1.3735365867614746, + "learning_rate": 9.500301507537689e-06, + "loss": 0.0318, + "step": 5475 + }, + { + "epoch": 2.05, + "grad_norm": 1.3610255718231201, + "learning_rate": 9.49778894472362e-06, + "loss": 0.0334, + "step": 5500 + }, + { + "epoch": 2.06, + "grad_norm": 1.1126761436462402, + "learning_rate": 9.49527638190955e-06, + "loss": 0.0322, + "step": 5525 + }, + { + "epoch": 2.07, + "grad_norm": 1.4259611368179321, + "learning_rate": 9.492763819095479e-06, + "loss": 0.033, + "step": 5550 + }, + { + "epoch": 2.08, + "grad_norm": 1.4938602447509766, + "learning_rate": 9.490251256281408e-06, + "loss": 0.0313, + "step": 5575 + }, + { + "epoch": 2.09, + "grad_norm": 1.333979606628418, + "learning_rate": 9.487738693467337e-06, + "loss": 0.0323, + "step": 5600 + }, + { + "epoch": 2.1, + "grad_norm": 1.2366199493408203, + "learning_rate": 9.485226130653267e-06, + "loss": 0.0315, + "step": 5625 + }, + { + "epoch": 2.11, + "grad_norm": 1.1534584760665894, + "learning_rate": 9.482713567839198e-06, + "loss": 0.0306, + "step": 5650 + }, + { + "epoch": 2.12, + "grad_norm": 1.2650431394577026, + "learning_rate": 9.480201005025125e-06, + "loss": 0.0325, + "step": 5675 + }, + { + "epoch": 2.13, + "grad_norm": 1.254272699356079, + "learning_rate": 9.477688442211056e-06, + "loss": 0.033, + "step": 5700 + }, + { + "epoch": 2.13, + "grad_norm": 1.180070400238037, + "learning_rate": 9.475175879396985e-06, + "loss": 0.03, + "step": 5725 + }, + { + "epoch": 2.14, + "grad_norm": 1.7713221311569214, + "learning_rate": 9.472663316582915e-06, + "loss": 0.0329, + "step": 5750 + }, + { + "epoch": 2.15, + "grad_norm": 1.4151265621185303, + "learning_rate": 9.470150753768846e-06, + "loss": 0.0333, + "step": 5775 + }, + { + "epoch": 2.16, + "grad_norm": 1.4459096193313599, + "learning_rate": 9.467638190954775e-06, + "loss": 0.0312, + "step": 5800 + }, + { + "epoch": 2.17, + "grad_norm": 1.4060746431350708, + "learning_rate": 9.465125628140704e-06, + "loss": 0.033, + "step": 5825 + }, + { + "epoch": 2.18, + "grad_norm": 1.429726481437683, + "learning_rate": 9.462613065326634e-06, + "loss": 0.0297, + "step": 5850 + }, + { + "epoch": 2.19, + "grad_norm": 1.44221031665802, + "learning_rate": 9.460100502512563e-06, + "loss": 0.0293, + "step": 5875 + }, + { + "epoch": 2.2, + "grad_norm": 1.3243744373321533, + "learning_rate": 9.457587939698494e-06, + "loss": 0.0284, + "step": 5900 + }, + { + "epoch": 2.21, + "grad_norm": 1.473487138748169, + "learning_rate": 9.455075376884423e-06, + "loss": 0.0355, + "step": 5925 + }, + { + "epoch": 2.22, + "grad_norm": 1.149131417274475, + "learning_rate": 9.452562814070353e-06, + "loss": 0.0316, + "step": 5950 + }, + { + "epoch": 2.23, + "grad_norm": 1.4993467330932617, + "learning_rate": 9.450050251256282e-06, + "loss": 0.0302, + "step": 5975 + }, + { + "epoch": 2.24, + "grad_norm": 1.9292453527450562, + "learning_rate": 9.447537688442211e-06, + "loss": 0.0333, + "step": 6000 + }, + { + "epoch": 2.24, + "eval_loss": 0.09249301254749298, + "eval_runtime": 1170.2684, + "eval_samples_per_second": 1.207, + "eval_steps_per_second": 1.207, + "eval_wer": 12.57119843507278, + "step": 6000 + }, + { + "epoch": 2.25, + "grad_norm": 1.453111171722412, + "learning_rate": 9.44502512562814e-06, + "loss": 0.0301, + "step": 6025 + }, + { + "epoch": 2.26, + "grad_norm": 1.062298059463501, + "learning_rate": 9.442512562814072e-06, + "loss": 0.0296, + "step": 6050 + }, + { + "epoch": 2.27, + "grad_norm": 1.8951318264007568, + "learning_rate": 9.440000000000001e-06, + "loss": 0.0301, + "step": 6075 + }, + { + "epoch": 2.27, + "grad_norm": 1.3311182260513306, + "learning_rate": 9.43748743718593e-06, + "loss": 0.033, + "step": 6100 + }, + { + "epoch": 2.28, + "grad_norm": 1.1902508735656738, + "learning_rate": 9.43497487437186e-06, + "loss": 0.0301, + "step": 6125 + }, + { + "epoch": 2.29, + "grad_norm": 1.347910761833191, + "learning_rate": 9.432462311557789e-06, + "loss": 0.029, + "step": 6150 + }, + { + "epoch": 2.3, + "grad_norm": 1.316100835800171, + "learning_rate": 9.42994974874372e-06, + "loss": 0.0309, + "step": 6175 + }, + { + "epoch": 2.31, + "grad_norm": 1.6105176210403442, + "learning_rate": 9.42743718592965e-06, + "loss": 0.0279, + "step": 6200 + }, + { + "epoch": 2.32, + "grad_norm": 1.4175101518630981, + "learning_rate": 9.424924623115579e-06, + "loss": 0.0289, + "step": 6225 + }, + { + "epoch": 2.33, + "grad_norm": 1.428814172744751, + "learning_rate": 9.422412060301508e-06, + "loss": 0.0293, + "step": 6250 + }, + { + "epoch": 2.34, + "grad_norm": 1.3879013061523438, + "learning_rate": 9.419899497487437e-06, + "loss": 0.0313, + "step": 6275 + }, + { + "epoch": 2.35, + "grad_norm": 1.769910454750061, + "learning_rate": 9.417386934673367e-06, + "loss": 0.0308, + "step": 6300 + }, + { + "epoch": 2.36, + "grad_norm": 2.276278018951416, + "learning_rate": 9.414874371859298e-06, + "loss": 0.0315, + "step": 6325 + }, + { + "epoch": 2.37, + "grad_norm": 1.5268139839172363, + "learning_rate": 9.412361809045227e-06, + "loss": 0.0309, + "step": 6350 + }, + { + "epoch": 2.38, + "grad_norm": 1.1948000192642212, + "learning_rate": 9.409849246231156e-06, + "loss": 0.0275, + "step": 6375 + }, + { + "epoch": 2.39, + "grad_norm": 1.625555396080017, + "learning_rate": 9.407336683417086e-06, + "loss": 0.0298, + "step": 6400 + }, + { + "epoch": 2.4, + "grad_norm": 1.1815710067749023, + "learning_rate": 9.404824120603015e-06, + "loss": 0.0284, + "step": 6425 + }, + { + "epoch": 2.4, + "grad_norm": 1.322705864906311, + "learning_rate": 9.402311557788946e-06, + "loss": 0.0292, + "step": 6450 + }, + { + "epoch": 2.41, + "grad_norm": 1.2966747283935547, + "learning_rate": 9.399798994974875e-06, + "loss": 0.0304, + "step": 6475 + }, + { + "epoch": 2.42, + "grad_norm": 1.1954747438430786, + "learning_rate": 9.397286432160805e-06, + "loss": 0.031, + "step": 6500 + }, + { + "epoch": 2.43, + "grad_norm": 1.5523953437805176, + "learning_rate": 9.394773869346736e-06, + "loss": 0.0311, + "step": 6525 + }, + { + "epoch": 2.44, + "grad_norm": 1.5993328094482422, + "learning_rate": 9.392261306532663e-06, + "loss": 0.031, + "step": 6550 + }, + { + "epoch": 2.45, + "grad_norm": 1.2586721181869507, + "learning_rate": 9.389748743718594e-06, + "loss": 0.0272, + "step": 6575 + }, + { + "epoch": 2.46, + "grad_norm": 1.4716228246688843, + "learning_rate": 9.387236180904524e-06, + "loss": 0.0319, + "step": 6600 + }, + { + "epoch": 2.47, + "grad_norm": 1.5680776834487915, + "learning_rate": 9.384723618090453e-06, + "loss": 0.0286, + "step": 6625 + }, + { + "epoch": 2.48, + "grad_norm": 1.5674717426300049, + "learning_rate": 9.382211055276382e-06, + "loss": 0.0302, + "step": 6650 + }, + { + "epoch": 2.49, + "grad_norm": 1.0742074251174927, + "learning_rate": 9.379698492462312e-06, + "loss": 0.0278, + "step": 6675 + }, + { + "epoch": 2.5, + "grad_norm": 1.5309933423995972, + "learning_rate": 9.377185929648241e-06, + "loss": 0.0268, + "step": 6700 + }, + { + "epoch": 2.51, + "grad_norm": 1.2441209554672241, + "learning_rate": 9.374673366834172e-06, + "loss": 0.0261, + "step": 6725 + }, + { + "epoch": 2.52, + "grad_norm": 1.3913319110870361, + "learning_rate": 9.372160804020101e-06, + "loss": 0.0289, + "step": 6750 + }, + { + "epoch": 2.53, + "grad_norm": 1.2194600105285645, + "learning_rate": 9.36964824120603e-06, + "loss": 0.0279, + "step": 6775 + }, + { + "epoch": 2.54, + "grad_norm": 1.3486984968185425, + "learning_rate": 9.367135678391962e-06, + "loss": 0.0268, + "step": 6800 + }, + { + "epoch": 2.54, + "grad_norm": 1.065149188041687, + "learning_rate": 9.36462311557789e-06, + "loss": 0.0293, + "step": 6825 + }, + { + "epoch": 2.55, + "grad_norm": 1.1525932550430298, + "learning_rate": 9.36211055276382e-06, + "loss": 0.0263, + "step": 6850 + }, + { + "epoch": 2.56, + "grad_norm": 0.9991635680198669, + "learning_rate": 9.35959798994975e-06, + "loss": 0.0299, + "step": 6875 + }, + { + "epoch": 2.57, + "grad_norm": 1.3157044649124146, + "learning_rate": 9.357085427135679e-06, + "loss": 0.0294, + "step": 6900 + }, + { + "epoch": 2.58, + "grad_norm": 1.164072036743164, + "learning_rate": 9.354572864321608e-06, + "loss": 0.0292, + "step": 6925 + }, + { + "epoch": 2.59, + "grad_norm": 1.1912261247634888, + "learning_rate": 9.352060301507538e-06, + "loss": 0.0252, + "step": 6950 + }, + { + "epoch": 2.6, + "grad_norm": 1.6097770929336548, + "learning_rate": 9.349547738693469e-06, + "loss": 0.0276, + "step": 6975 + }, + { + "epoch": 2.61, + "grad_norm": 1.3521944284439087, + "learning_rate": 9.347035175879398e-06, + "loss": 0.0268, + "step": 7000 + }, + { + "epoch": 2.61, + "eval_loss": 0.09273683279752731, + "eval_runtime": 1174.4625, + "eval_samples_per_second": 1.203, + "eval_steps_per_second": 1.203, + "eval_wer": 14.39502905471492, + "step": 7000 + }, + { + "epoch": 2.62, + "grad_norm": 1.4225417375564575, + "learning_rate": 9.344522613065327e-06, + "loss": 0.028, + "step": 7025 + }, + { + "epoch": 2.63, + "grad_norm": 1.178984522819519, + "learning_rate": 9.342010050251257e-06, + "loss": 0.0305, + "step": 7050 + }, + { + "epoch": 2.64, + "grad_norm": 1.3164453506469727, + "learning_rate": 9.339497487437188e-06, + "loss": 0.0257, + "step": 7075 + }, + { + "epoch": 2.65, + "grad_norm": 1.8469127416610718, + "learning_rate": 9.336984924623115e-06, + "loss": 0.03, + "step": 7100 + }, + { + "epoch": 2.66, + "grad_norm": 1.2523382902145386, + "learning_rate": 9.334472361809046e-06, + "loss": 0.0276, + "step": 7125 + }, + { + "epoch": 2.67, + "grad_norm": 1.7086418867111206, + "learning_rate": 9.331959798994976e-06, + "loss": 0.028, + "step": 7150 + }, + { + "epoch": 2.68, + "grad_norm": 1.5325403213500977, + "learning_rate": 9.329447236180905e-06, + "loss": 0.0275, + "step": 7175 + }, + { + "epoch": 2.68, + "grad_norm": 1.8411564826965332, + "learning_rate": 9.326934673366836e-06, + "loss": 0.0268, + "step": 7200 + }, + { + "epoch": 2.69, + "grad_norm": 1.2798216342926025, + "learning_rate": 9.324422110552764e-06, + "loss": 0.0272, + "step": 7225 + }, + { + "epoch": 2.7, + "grad_norm": 1.5173702239990234, + "learning_rate": 9.321909547738695e-06, + "loss": 0.0246, + "step": 7250 + }, + { + "epoch": 2.71, + "grad_norm": 1.240264654159546, + "learning_rate": 9.319396984924624e-06, + "loss": 0.0282, + "step": 7275 + }, + { + "epoch": 2.72, + "grad_norm": 1.0784670114517212, + "learning_rate": 9.316884422110553e-06, + "loss": 0.029, + "step": 7300 + }, + { + "epoch": 2.73, + "grad_norm": 1.6074179410934448, + "learning_rate": 9.314371859296483e-06, + "loss": 0.0283, + "step": 7325 + }, + { + "epoch": 2.74, + "grad_norm": 1.0036736726760864, + "learning_rate": 9.311859296482414e-06, + "loss": 0.0256, + "step": 7350 + }, + { + "epoch": 2.75, + "grad_norm": 1.6763125658035278, + "learning_rate": 9.309346733668343e-06, + "loss": 0.0266, + "step": 7375 + }, + { + "epoch": 2.76, + "grad_norm": 1.2827513217926025, + "learning_rate": 9.306834170854272e-06, + "loss": 0.0273, + "step": 7400 + }, + { + "epoch": 2.77, + "grad_norm": 1.1760488748550415, + "learning_rate": 9.304321608040201e-06, + "loss": 0.0301, + "step": 7425 + }, + { + "epoch": 2.78, + "grad_norm": 1.419548749923706, + "learning_rate": 9.30180904522613e-06, + "loss": 0.0266, + "step": 7450 + }, + { + "epoch": 2.79, + "grad_norm": 1.0396534204483032, + "learning_rate": 9.299296482412062e-06, + "loss": 0.0266, + "step": 7475 + }, + { + "epoch": 2.8, + "grad_norm": 1.456385850906372, + "learning_rate": 9.296783919597991e-06, + "loss": 0.0277, + "step": 7500 + }, + { + "epoch": 2.81, + "grad_norm": 1.1357905864715576, + "learning_rate": 9.29427135678392e-06, + "loss": 0.0277, + "step": 7525 + }, + { + "epoch": 2.82, + "grad_norm": 1.3910702466964722, + "learning_rate": 9.29175879396985e-06, + "loss": 0.0278, + "step": 7550 + }, + { + "epoch": 2.82, + "grad_norm": 1.4589263200759888, + "learning_rate": 9.289246231155779e-06, + "loss": 0.0262, + "step": 7575 + }, + { + "epoch": 2.83, + "grad_norm": 1.3214805126190186, + "learning_rate": 9.28673366834171e-06, + "loss": 0.0265, + "step": 7600 + }, + { + "epoch": 2.84, + "grad_norm": 1.337793231010437, + "learning_rate": 9.28422110552764e-06, + "loss": 0.0278, + "step": 7625 + }, + { + "epoch": 2.85, + "grad_norm": 1.4365116357803345, + "learning_rate": 9.281708542713569e-06, + "loss": 0.0281, + "step": 7650 + }, + { + "epoch": 2.86, + "grad_norm": 1.3216900825500488, + "learning_rate": 9.279195979899498e-06, + "loss": 0.0266, + "step": 7675 + }, + { + "epoch": 2.87, + "grad_norm": 1.2244486808776855, + "learning_rate": 9.276683417085427e-06, + "loss": 0.0272, + "step": 7700 + }, + { + "epoch": 2.88, + "grad_norm": 1.3275609016418457, + "learning_rate": 9.274170854271357e-06, + "loss": 0.0285, + "step": 7725 + }, + { + "epoch": 2.89, + "grad_norm": 1.274181604385376, + "learning_rate": 9.271658291457288e-06, + "loss": 0.0265, + "step": 7750 + }, + { + "epoch": 2.9, + "grad_norm": 1.1313958168029785, + "learning_rate": 9.269145728643217e-06, + "loss": 0.0252, + "step": 7775 + }, + { + "epoch": 2.91, + "grad_norm": 0.9833647608757019, + "learning_rate": 9.266633165829146e-06, + "loss": 0.0268, + "step": 7800 + }, + { + "epoch": 2.92, + "grad_norm": 1.7907352447509766, + "learning_rate": 9.264120603015076e-06, + "loss": 0.03, + "step": 7825 + }, + { + "epoch": 2.93, + "grad_norm": 1.3479431867599487, + "learning_rate": 9.261608040201005e-06, + "loss": 0.0263, + "step": 7850 + }, + { + "epoch": 2.94, + "grad_norm": 1.4436614513397217, + "learning_rate": 9.259095477386936e-06, + "loss": 0.027, + "step": 7875 + }, + { + "epoch": 2.95, + "grad_norm": 0.9086975455284119, + "learning_rate": 9.256582914572865e-06, + "loss": 0.0278, + "step": 7900 + }, + { + "epoch": 2.95, + "grad_norm": 1.4098609685897827, + "learning_rate": 9.254070351758795e-06, + "loss": 0.0284, + "step": 7925 + }, + { + "epoch": 2.96, + "grad_norm": 1.5908530950546265, + "learning_rate": 9.251557788944724e-06, + "loss": 0.0259, + "step": 7950 + }, + { + "epoch": 2.97, + "grad_norm": 1.1229333877563477, + "learning_rate": 9.249045226130653e-06, + "loss": 0.0263, + "step": 7975 + }, + { + "epoch": 2.98, + "grad_norm": 1.2393673658370972, + "learning_rate": 9.246532663316584e-06, + "loss": 0.0264, + "step": 8000 + }, + { + "epoch": 2.98, + "eval_loss": 0.09835761785507202, + "eval_runtime": 1181.6064, + "eval_samples_per_second": 1.196, + "eval_steps_per_second": 1.196, + "eval_wer": 12.996950693285772, + "step": 8000 + }, + { + "epoch": 2.99, + "grad_norm": 1.546132206916809, + "learning_rate": 9.244020100502514e-06, + "loss": 0.0255, + "step": 8025 + }, + { + "epoch": 3.0, + "grad_norm": 0.9526233077049255, + "learning_rate": 9.241507537688443e-06, + "loss": 0.0254, + "step": 8050 + }, + { + "epoch": 3.01, + "grad_norm": 0.6263015270233154, + "learning_rate": 9.238994974874372e-06, + "loss": 0.0135, + "step": 8075 + }, + { + "epoch": 3.02, + "grad_norm": 0.822076678276062, + "learning_rate": 9.236482412060302e-06, + "loss": 0.0143, + "step": 8100 + }, + { + "epoch": 3.03, + "grad_norm": 0.596155047416687, + "learning_rate": 9.233969849246231e-06, + "loss": 0.0134, + "step": 8125 + }, + { + "epoch": 3.04, + "grad_norm": 0.9801079034805298, + "learning_rate": 9.231457286432162e-06, + "loss": 0.0125, + "step": 8150 + }, + { + "epoch": 3.05, + "grad_norm": 0.8703805208206177, + "learning_rate": 9.228944723618091e-06, + "loss": 0.0121, + "step": 8175 + }, + { + "epoch": 3.06, + "grad_norm": 0.8045217990875244, + "learning_rate": 9.22643216080402e-06, + "loss": 0.0134, + "step": 8200 + }, + { + "epoch": 3.07, + "grad_norm": 1.1099711656570435, + "learning_rate": 9.223919597989952e-06, + "loss": 0.0125, + "step": 8225 + }, + { + "epoch": 3.08, + "grad_norm": 0.9043372273445129, + "learning_rate": 9.22140703517588e-06, + "loss": 0.0122, + "step": 8250 + }, + { + "epoch": 3.09, + "grad_norm": 1.0510129928588867, + "learning_rate": 9.21889447236181e-06, + "loss": 0.0128, + "step": 8275 + }, + { + "epoch": 3.09, + "grad_norm": 0.9354584813117981, + "learning_rate": 9.21638190954774e-06, + "loss": 0.0108, + "step": 8300 + }, + { + "epoch": 3.1, + "grad_norm": 0.9032003283500671, + "learning_rate": 9.213869346733669e-06, + "loss": 0.013, + "step": 8325 + }, + { + "epoch": 3.11, + "grad_norm": 1.3349744081497192, + "learning_rate": 9.211356783919598e-06, + "loss": 0.0142, + "step": 8350 + }, + { + "epoch": 3.12, + "grad_norm": 1.2691020965576172, + "learning_rate": 9.208844221105528e-06, + "loss": 0.014, + "step": 8375 + }, + { + "epoch": 3.13, + "grad_norm": 0.8961933851242065, + "learning_rate": 9.206331658291459e-06, + "loss": 0.0135, + "step": 8400 + }, + { + "epoch": 3.14, + "grad_norm": 1.3278096914291382, + "learning_rate": 9.203819095477388e-06, + "loss": 0.0137, + "step": 8425 + }, + { + "epoch": 3.15, + "grad_norm": 1.0133564472198486, + "learning_rate": 9.201306532663317e-06, + "loss": 0.0126, + "step": 8450 + }, + { + "epoch": 3.16, + "grad_norm": 1.1428608894348145, + "learning_rate": 9.198793969849247e-06, + "loss": 0.013, + "step": 8475 + }, + { + "epoch": 3.17, + "grad_norm": 1.0068527460098267, + "learning_rate": 9.19638190954774e-06, + "loss": 0.0138, + "step": 8500 + }, + { + "epoch": 3.18, + "grad_norm": 2.149351119995117, + "learning_rate": 9.19386934673367e-06, + "loss": 0.0133, + "step": 8525 + }, + { + "epoch": 3.19, + "grad_norm": 1.089708685874939, + "learning_rate": 9.191356783919599e-06, + "loss": 0.0145, + "step": 8550 + }, + { + "epoch": 3.2, + "grad_norm": 0.9412585496902466, + "learning_rate": 9.188844221105528e-06, + "loss": 0.0133, + "step": 8575 + }, + { + "epoch": 3.21, + "grad_norm": 1.1263689994812012, + "learning_rate": 9.186331658291459e-06, + "loss": 0.0131, + "step": 8600 + }, + { + "epoch": 3.22, + "grad_norm": 1.278019905090332, + "learning_rate": 9.183819095477388e-06, + "loss": 0.0146, + "step": 8625 + }, + { + "epoch": 3.23, + "grad_norm": 0.968937873840332, + "learning_rate": 9.181306532663317e-06, + "loss": 0.0147, + "step": 8650 + }, + { + "epoch": 3.23, + "grad_norm": 1.4465516805648804, + "learning_rate": 9.178793969849247e-06, + "loss": 0.0132, + "step": 8675 + }, + { + "epoch": 3.24, + "grad_norm": 1.586835503578186, + "learning_rate": 9.176281407035176e-06, + "loss": 0.0148, + "step": 8700 + }, + { + "epoch": 3.25, + "grad_norm": 1.5610789060592651, + "learning_rate": 9.173768844221105e-06, + "loss": 0.0133, + "step": 8725 + }, + { + "epoch": 3.26, + "grad_norm": 1.5766916275024414, + "learning_rate": 9.171256281407036e-06, + "loss": 0.0149, + "step": 8750 + }, + { + "epoch": 3.27, + "grad_norm": 1.1484746932983398, + "learning_rate": 9.168743718592966e-06, + "loss": 0.0148, + "step": 8775 + }, + { + "epoch": 3.28, + "grad_norm": 1.193565845489502, + "learning_rate": 9.166231155778895e-06, + "loss": 0.0132, + "step": 8800 + }, + { + "epoch": 3.29, + "grad_norm": 1.6039817333221436, + "learning_rate": 9.163718592964826e-06, + "loss": 0.014, + "step": 8825 + }, + { + "epoch": 3.3, + "grad_norm": 1.0447310209274292, + "learning_rate": 9.161206030150754e-06, + "loss": 0.0139, + "step": 8850 + }, + { + "epoch": 3.31, + "grad_norm": 1.1180191040039062, + "learning_rate": 9.158693467336685e-06, + "loss": 0.0137, + "step": 8875 + }, + { + "epoch": 3.32, + "grad_norm": 0.9979492425918579, + "learning_rate": 9.156180904522614e-06, + "loss": 0.0137, + "step": 8900 + }, + { + "epoch": 3.33, + "grad_norm": 0.9189267754554749, + "learning_rate": 9.153668341708543e-06, + "loss": 0.0147, + "step": 8925 + }, + { + "epoch": 3.34, + "grad_norm": 0.935592770576477, + "learning_rate": 9.151155778894473e-06, + "loss": 0.0148, + "step": 8950 + }, + { + "epoch": 3.35, + "grad_norm": 1.166102409362793, + "learning_rate": 9.148643216080402e-06, + "loss": 0.0142, + "step": 8975 + }, + { + "epoch": 3.36, + "grad_norm": 1.0297759771347046, + "learning_rate": 9.146130653266331e-06, + "loss": 0.0121, + "step": 9000 + }, + { + "epoch": 3.36, + "eval_loss": 0.11247532814741135, + "eval_runtime": 1196.161, + "eval_samples_per_second": 1.181, + "eval_steps_per_second": 1.181, + "eval_wer": 13.399689315919684, + "step": 9000 + }, + { + "epoch": 3.37, + "grad_norm": 1.2145353555679321, + "learning_rate": 9.143618090452262e-06, + "loss": 0.0137, + "step": 9025 + }, + { + "epoch": 3.37, + "grad_norm": 1.0998971462249756, + "learning_rate": 9.141105527638192e-06, + "loss": 0.015, + "step": 9050 + }, + { + "epoch": 3.38, + "grad_norm": 1.33904230594635, + "learning_rate": 9.138592964824121e-06, + "loss": 0.0131, + "step": 9075 + }, + { + "epoch": 3.39, + "grad_norm": 1.1221343278884888, + "learning_rate": 9.136080402010052e-06, + "loss": 0.013, + "step": 9100 + }, + { + "epoch": 3.4, + "grad_norm": 0.6647489666938782, + "learning_rate": 9.13356783919598e-06, + "loss": 0.0128, + "step": 9125 + }, + { + "epoch": 3.41, + "grad_norm": 1.2413157224655151, + "learning_rate": 9.13105527638191e-06, + "loss": 0.0142, + "step": 9150 + }, + { + "epoch": 3.42, + "grad_norm": 1.1597723960876465, + "learning_rate": 9.12854271356784e-06, + "loss": 0.0144, + "step": 9175 + }, + { + "epoch": 3.43, + "grad_norm": 1.3676612377166748, + "learning_rate": 9.12603015075377e-06, + "loss": 0.0139, + "step": 9200 + }, + { + "epoch": 3.44, + "grad_norm": 1.3868343830108643, + "learning_rate": 9.1235175879397e-06, + "loss": 0.0133, + "step": 9225 + }, + { + "epoch": 3.45, + "grad_norm": 0.9649651050567627, + "learning_rate": 9.121005025125628e-06, + "loss": 0.015, + "step": 9250 + }, + { + "epoch": 3.46, + "grad_norm": 1.371127963066101, + "learning_rate": 9.118492462311559e-06, + "loss": 0.0135, + "step": 9275 + }, + { + "epoch": 3.47, + "grad_norm": 1.4016374349594116, + "learning_rate": 9.115979899497488e-06, + "loss": 0.0136, + "step": 9300 + }, + { + "epoch": 3.48, + "grad_norm": 1.100788950920105, + "learning_rate": 9.113467336683418e-06, + "loss": 0.0137, + "step": 9325 + }, + { + "epoch": 3.49, + "grad_norm": 1.7673946619033813, + "learning_rate": 9.110954773869347e-06, + "loss": 0.0147, + "step": 9350 + }, + { + "epoch": 3.5, + "grad_norm": 1.4324219226837158, + "learning_rate": 9.108442211055278e-06, + "loss": 0.0134, + "step": 9375 + }, + { + "epoch": 3.5, + "grad_norm": 1.5088505744934082, + "learning_rate": 9.105929648241206e-06, + "loss": 0.0136, + "step": 9400 + }, + { + "epoch": 3.51, + "grad_norm": 1.2630841732025146, + "learning_rate": 9.103417085427137e-06, + "loss": 0.0142, + "step": 9425 + }, + { + "epoch": 3.52, + "grad_norm": 1.4919805526733398, + "learning_rate": 9.100904522613066e-06, + "loss": 0.0135, + "step": 9450 + }, + { + "epoch": 3.53, + "grad_norm": 0.903571367263794, + "learning_rate": 9.098391959798995e-06, + "loss": 0.0136, + "step": 9475 + }, + { + "epoch": 3.54, + "grad_norm": 0.8802200555801392, + "learning_rate": 9.095879396984926e-06, + "loss": 0.0143, + "step": 9500 + }, + { + "epoch": 3.55, + "grad_norm": 0.8713164925575256, + "learning_rate": 9.093366834170854e-06, + "loss": 0.0145, + "step": 9525 + }, + { + "epoch": 3.56, + "grad_norm": 1.0385081768035889, + "learning_rate": 9.090854271356785e-06, + "loss": 0.014, + "step": 9550 + }, + { + "epoch": 3.57, + "grad_norm": 0.6580889225006104, + "learning_rate": 9.088341708542714e-06, + "loss": 0.0143, + "step": 9575 + }, + { + "epoch": 3.58, + "grad_norm": 1.3997721672058105, + "learning_rate": 9.085829145728644e-06, + "loss": 0.0153, + "step": 9600 + }, + { + "epoch": 3.59, + "grad_norm": 0.936346173286438, + "learning_rate": 9.083316582914573e-06, + "loss": 0.0148, + "step": 9625 + }, + { + "epoch": 3.6, + "grad_norm": 1.1294862031936646, + "learning_rate": 9.080804020100504e-06, + "loss": 0.0146, + "step": 9650 + }, + { + "epoch": 3.61, + "grad_norm": 1.1208100318908691, + "learning_rate": 9.078291457286433e-06, + "loss": 0.0144, + "step": 9675 + }, + { + "epoch": 3.62, + "grad_norm": 1.2666101455688477, + "learning_rate": 9.075778894472363e-06, + "loss": 0.0144, + "step": 9700 + }, + { + "epoch": 3.63, + "grad_norm": 1.0184346437454224, + "learning_rate": 9.073266331658292e-06, + "loss": 0.0131, + "step": 9725 + }, + { + "epoch": 3.64, + "grad_norm": 1.259779691696167, + "learning_rate": 9.070753768844221e-06, + "loss": 0.0146, + "step": 9750 + }, + { + "epoch": 3.64, + "grad_norm": 0.7908162474632263, + "learning_rate": 9.068241206030152e-06, + "loss": 0.012, + "step": 9775 + }, + { + "epoch": 3.65, + "grad_norm": 1.0079224109649658, + "learning_rate": 9.06572864321608e-06, + "loss": 0.0134, + "step": 9800 + }, + { + "epoch": 3.66, + "grad_norm": 1.4057514667510986, + "learning_rate": 9.063216080402011e-06, + "loss": 0.0117, + "step": 9825 + }, + { + "epoch": 3.67, + "grad_norm": 0.8843640685081482, + "learning_rate": 9.06070351758794e-06, + "loss": 0.0143, + "step": 9850 + }, + { + "epoch": 3.68, + "grad_norm": 1.4644583463668823, + "learning_rate": 9.05819095477387e-06, + "loss": 0.0136, + "step": 9875 + }, + { + "epoch": 3.69, + "grad_norm": 1.4285386800765991, + "learning_rate": 9.0556783919598e-06, + "loss": 0.0151, + "step": 9900 + }, + { + "epoch": 3.7, + "grad_norm": 1.2003490924835205, + "learning_rate": 9.05316582914573e-06, + "loss": 0.014, + "step": 9925 + }, + { + "epoch": 3.71, + "grad_norm": 0.949251115322113, + "learning_rate": 9.05065326633166e-06, + "loss": 0.0129, + "step": 9950 + }, + { + "epoch": 3.72, + "grad_norm": 0.6194056272506714, + "learning_rate": 9.048140703517589e-06, + "loss": 0.0125, + "step": 9975 + }, + { + "epoch": 3.73, + "grad_norm": 1.2650582790374756, + "learning_rate": 9.045628140703518e-06, + "loss": 0.0128, + "step": 10000 + }, + { + "epoch": 3.73, + "eval_loss": 0.11630302667617798, + "eval_runtime": 1181.4189, + "eval_samples_per_second": 1.196, + "eval_steps_per_second": 1.196, + "eval_wer": 12.807088199758358, + "step": 10000 + }, + { + "epoch": 3.74, + "grad_norm": 1.0901192426681519, + "learning_rate": 9.043115577889447e-06, + "loss": 0.0137, + "step": 10025 + }, + { + "epoch": 3.75, + "grad_norm": 1.1944717168807983, + "learning_rate": 9.040603015075378e-06, + "loss": 0.0138, + "step": 10050 + }, + { + "epoch": 3.76, + "grad_norm": 2.5932974815368652, + "learning_rate": 9.038090452261308e-06, + "loss": 0.0135, + "step": 10075 + }, + { + "epoch": 3.77, + "grad_norm": 1.1380863189697266, + "learning_rate": 9.035577889447237e-06, + "loss": 0.013, + "step": 10100 + }, + { + "epoch": 3.78, + "grad_norm": 1.3565905094146729, + "learning_rate": 9.033065326633166e-06, + "loss": 0.0141, + "step": 10125 + }, + { + "epoch": 3.78, + "grad_norm": 2.065535545349121, + "learning_rate": 9.030552763819096e-06, + "loss": 0.0132, + "step": 10150 + }, + { + "epoch": 3.79, + "grad_norm": 1.328614592552185, + "learning_rate": 9.028040201005027e-06, + "loss": 0.0119, + "step": 10175 + }, + { + "epoch": 3.8, + "grad_norm": 1.6466686725616455, + "learning_rate": 9.025527638190956e-06, + "loss": 0.0146, + "step": 10200 + }, + { + "epoch": 3.81, + "grad_norm": 1.2255367040634155, + "learning_rate": 9.023015075376885e-06, + "loss": 0.0135, + "step": 10225 + }, + { + "epoch": 3.82, + "grad_norm": 1.4685856103897095, + "learning_rate": 9.020502512562815e-06, + "loss": 0.0144, + "step": 10250 + }, + { + "epoch": 3.83, + "grad_norm": 1.6728172302246094, + "learning_rate": 9.017989949748744e-06, + "loss": 0.0143, + "step": 10275 + }, + { + "epoch": 3.84, + "grad_norm": 1.6052027940750122, + "learning_rate": 9.015577889447237e-06, + "loss": 0.0143, + "step": 10300 + }, + { + "epoch": 3.85, + "grad_norm": 1.3819653987884521, + "learning_rate": 9.013065326633166e-06, + "loss": 0.0132, + "step": 10325 + }, + { + "epoch": 3.86, + "grad_norm": 1.6082425117492676, + "learning_rate": 9.010552763819096e-06, + "loss": 0.013, + "step": 10350 + }, + { + "epoch": 3.87, + "grad_norm": 0.7308992147445679, + "learning_rate": 9.008040201005027e-06, + "loss": 0.0132, + "step": 10375 + }, + { + "epoch": 3.88, + "grad_norm": 1.5222536325454712, + "learning_rate": 9.005527638190954e-06, + "loss": 0.0159, + "step": 10400 + }, + { + "epoch": 3.89, + "grad_norm": 1.4115451574325562, + "learning_rate": 9.003015075376885e-06, + "loss": 0.0131, + "step": 10425 + }, + { + "epoch": 3.9, + "grad_norm": 1.0214425325393677, + "learning_rate": 9.000502512562815e-06, + "loss": 0.0134, + "step": 10450 + }, + { + "epoch": 3.91, + "grad_norm": 1.0680630207061768, + "learning_rate": 8.997989949748744e-06, + "loss": 0.0117, + "step": 10475 + }, + { + "epoch": 3.91, + "grad_norm": 1.2871536016464233, + "learning_rate": 8.995477386934675e-06, + "loss": 0.0121, + "step": 10500 + }, + { + "epoch": 3.92, + "grad_norm": 1.1821720600128174, + "learning_rate": 8.992964824120604e-06, + "loss": 0.0125, + "step": 10525 + }, + { + "epoch": 3.93, + "grad_norm": 1.4214861392974854, + "learning_rate": 8.990452261306534e-06, + "loss": 0.014, + "step": 10550 + }, + { + "epoch": 3.94, + "grad_norm": 1.0426212549209595, + "learning_rate": 8.987939698492463e-06, + "loss": 0.0123, + "step": 10575 + }, + { + "epoch": 3.95, + "grad_norm": 1.2392157316207886, + "learning_rate": 8.985427135678392e-06, + "loss": 0.0135, + "step": 10600 + }, + { + "epoch": 3.96, + "grad_norm": 1.5941933393478394, + "learning_rate": 8.982914572864322e-06, + "loss": 0.0128, + "step": 10625 + }, + { + "epoch": 3.97, + "grad_norm": 1.3431544303894043, + "learning_rate": 8.980402010050253e-06, + "loss": 0.0127, + "step": 10650 + }, + { + "epoch": 3.98, + "grad_norm": 1.0883890390396118, + "learning_rate": 8.977889447236182e-06, + "loss": 0.0146, + "step": 10675 + }, + { + "epoch": 3.99, + "grad_norm": 1.0997198820114136, + "learning_rate": 8.975376884422111e-06, + "loss": 0.0123, + "step": 10700 + }, + { + "epoch": 4.0, + "grad_norm": 1.3705791234970093, + "learning_rate": 8.97286432160804e-06, + "loss": 0.013, + "step": 10725 + }, + { + "epoch": 4.01, + "grad_norm": 0.8928425312042236, + "learning_rate": 8.97035175879397e-06, + "loss": 0.0076, + "step": 10750 + }, + { + "epoch": 4.02, + "grad_norm": 0.7046710252761841, + "learning_rate": 8.967839195979901e-06, + "loss": 0.0066, + "step": 10775 + }, + { + "epoch": 4.03, + "grad_norm": 0.9207999110221863, + "learning_rate": 8.96532663316583e-06, + "loss": 0.0059, + "step": 10800 + }, + { + "epoch": 4.04, + "grad_norm": 0.9753714203834534, + "learning_rate": 8.96281407035176e-06, + "loss": 0.0058, + "step": 10825 + }, + { + "epoch": 4.05, + "grad_norm": 0.8684583306312561, + "learning_rate": 8.960301507537689e-06, + "loss": 0.0059, + "step": 10850 + }, + { + "epoch": 4.05, + "grad_norm": 0.8826712965965271, + "learning_rate": 8.957788944723618e-06, + "loss": 0.0049, + "step": 10875 + }, + { + "epoch": 4.06, + "grad_norm": 0.7380805015563965, + "learning_rate": 8.95527638190955e-06, + "loss": 0.0068, + "step": 10900 + }, + { + "epoch": 4.07, + "grad_norm": 0.9850341081619263, + "learning_rate": 8.952763819095479e-06, + "loss": 0.0063, + "step": 10925 + }, + { + "epoch": 4.08, + "grad_norm": 1.135459065437317, + "learning_rate": 8.950251256281408e-06, + "loss": 0.0058, + "step": 10950 + }, + { + "epoch": 4.09, + "grad_norm": 0.7869279384613037, + "learning_rate": 8.947738693467337e-06, + "loss": 0.0055, + "step": 10975 + }, + { + "epoch": 4.1, + "grad_norm": 0.7898014783859253, + "learning_rate": 8.945226130653267e-06, + "loss": 0.007, + "step": 11000 + }, + { + "epoch": 4.1, + "eval_loss": 0.1266581416130066, + "eval_runtime": 1180.8249, + "eval_samples_per_second": 1.197, + "eval_steps_per_second": 1.197, + "eval_wer": 12.92790978654853, + "step": 11000 + }, + { + "epoch": 4.11, + "grad_norm": 1.6041311025619507, + "learning_rate": 8.942713567839196e-06, + "loss": 0.0066, + "step": 11025 + }, + { + "epoch": 4.12, + "grad_norm": 0.6932623982429504, + "learning_rate": 8.940201005025127e-06, + "loss": 0.0071, + "step": 11050 + }, + { + "epoch": 4.13, + "grad_norm": 0.8861172199249268, + "learning_rate": 8.937688442211056e-06, + "loss": 0.0058, + "step": 11075 + }, + { + "epoch": 4.14, + "grad_norm": 1.0110853910446167, + "learning_rate": 8.935175879396986e-06, + "loss": 0.0063, + "step": 11100 + }, + { + "epoch": 4.15, + "grad_norm": 1.0658797025680542, + "learning_rate": 8.932663316582915e-06, + "loss": 0.0071, + "step": 11125 + }, + { + "epoch": 4.16, + "grad_norm": 1.0285788774490356, + "learning_rate": 8.930150753768844e-06, + "loss": 0.0066, + "step": 11150 + }, + { + "epoch": 4.17, + "grad_norm": 0.9512462019920349, + "learning_rate": 8.927638190954775e-06, + "loss": 0.0069, + "step": 11175 + }, + { + "epoch": 4.18, + "grad_norm": 0.9749946594238281, + "learning_rate": 8.925125628140705e-06, + "loss": 0.0065, + "step": 11200 + }, + { + "epoch": 4.19, + "grad_norm": 0.7914917469024658, + "learning_rate": 8.922613065326634e-06, + "loss": 0.0062, + "step": 11225 + }, + { + "epoch": 4.19, + "grad_norm": 0.918881893157959, + "learning_rate": 8.920100502512563e-06, + "loss": 0.0066, + "step": 11250 + }, + { + "epoch": 4.2, + "grad_norm": 0.9715802669525146, + "learning_rate": 8.917587939698493e-06, + "loss": 0.007, + "step": 11275 + }, + { + "epoch": 4.21, + "grad_norm": 0.95139479637146, + "learning_rate": 8.915075376884424e-06, + "loss": 0.0078, + "step": 11300 + }, + { + "epoch": 4.22, + "grad_norm": 0.9076486825942993, + "learning_rate": 8.912562814070353e-06, + "loss": 0.0065, + "step": 11325 + }, + { + "epoch": 4.23, + "grad_norm": 0.6604435443878174, + "learning_rate": 8.910050251256282e-06, + "loss": 0.007, + "step": 11350 + }, + { + "epoch": 4.24, + "grad_norm": 0.7129841446876526, + "learning_rate": 8.907537688442212e-06, + "loss": 0.0069, + "step": 11375 + }, + { + "epoch": 4.25, + "grad_norm": 1.1522680521011353, + "learning_rate": 8.905025125628143e-06, + "loss": 0.0073, + "step": 11400 + }, + { + "epoch": 4.26, + "grad_norm": 1.9393774271011353, + "learning_rate": 8.90251256281407e-06, + "loss": 0.0067, + "step": 11425 + }, + { + "epoch": 4.27, + "grad_norm": 1.324690580368042, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0066, + "step": 11450 + }, + { + "epoch": 4.28, + "grad_norm": 1.2429922819137573, + "learning_rate": 8.89748743718593e-06, + "loss": 0.0078, + "step": 11475 + }, + { + "epoch": 4.29, + "grad_norm": 0.9183736443519592, + "learning_rate": 8.89497487437186e-06, + "loss": 0.0072, + "step": 11500 + }, + { + "epoch": 4.3, + "grad_norm": 1.0813913345336914, + "learning_rate": 8.892462311557791e-06, + "loss": 0.0074, + "step": 11525 + }, + { + "epoch": 4.31, + "grad_norm": 0.8189405798912048, + "learning_rate": 8.889949748743718e-06, + "loss": 0.0074, + "step": 11550 + }, + { + "epoch": 4.32, + "grad_norm": 1.2116942405700684, + "learning_rate": 8.88743718592965e-06, + "loss": 0.0064, + "step": 11575 + }, + { + "epoch": 4.33, + "grad_norm": 1.1405147314071655, + "learning_rate": 8.884924623115579e-06, + "loss": 0.0072, + "step": 11600 + }, + { + "epoch": 4.33, + "grad_norm": 0.9885891675949097, + "learning_rate": 8.882412060301508e-06, + "loss": 0.0073, + "step": 11625 + }, + { + "epoch": 4.34, + "grad_norm": 0.8411078453063965, + "learning_rate": 8.879899497487437e-06, + "loss": 0.0074, + "step": 11650 + }, + { + "epoch": 4.35, + "grad_norm": 1.0447450876235962, + "learning_rate": 8.877386934673368e-06, + "loss": 0.0068, + "step": 11675 + }, + { + "epoch": 4.36, + "grad_norm": 1.1312044858932495, + "learning_rate": 8.874874371859296e-06, + "loss": 0.0074, + "step": 11700 + }, + { + "epoch": 4.37, + "grad_norm": 1.0591412782669067, + "learning_rate": 8.872361809045227e-06, + "loss": 0.0084, + "step": 11725 + }, + { + "epoch": 4.38, + "grad_norm": 1.1448895931243896, + "learning_rate": 8.869849246231156e-06, + "loss": 0.0074, + "step": 11750 + }, + { + "epoch": 4.39, + "grad_norm": 1.2338271141052246, + "learning_rate": 8.867336683417086e-06, + "loss": 0.0066, + "step": 11775 + }, + { + "epoch": 4.4, + "grad_norm": 0.7907199859619141, + "learning_rate": 8.864824120603017e-06, + "loss": 0.0069, + "step": 11800 + }, + { + "epoch": 4.41, + "grad_norm": 1.3088089227676392, + "learning_rate": 8.862311557788944e-06, + "loss": 0.0071, + "step": 11825 + }, + { + "epoch": 4.42, + "grad_norm": 0.8548771739006042, + "learning_rate": 8.859798994974875e-06, + "loss": 0.0078, + "step": 11850 + }, + { + "epoch": 4.43, + "grad_norm": 1.2286207675933838, + "learning_rate": 8.857286432160805e-06, + "loss": 0.0068, + "step": 11875 + }, + { + "epoch": 4.44, + "grad_norm": 0.833550214767456, + "learning_rate": 8.854773869346734e-06, + "loss": 0.0071, + "step": 11900 + }, + { + "epoch": 4.45, + "grad_norm": 0.9001217484474182, + "learning_rate": 8.852261306532665e-06, + "loss": 0.0077, + "step": 11925 + }, + { + "epoch": 4.46, + "grad_norm": 1.0075615644454956, + "learning_rate": 8.849748743718594e-06, + "loss": 0.0066, + "step": 11950 + }, + { + "epoch": 4.46, + "grad_norm": 0.8722937107086182, + "learning_rate": 8.847236180904524e-06, + "loss": 0.0079, + "step": 11975 + }, + { + "epoch": 4.47, + "grad_norm": 0.8565125465393066, + "learning_rate": 8.844723618090453e-06, + "loss": 0.0067, + "step": 12000 + }, + { + "epoch": 4.47, + "eval_loss": 0.129161536693573, + "eval_runtime": 1190.2412, + "eval_samples_per_second": 1.187, + "eval_steps_per_second": 1.187, + "eval_wer": 12.54243139059893, + "step": 12000 + }, + { + "epoch": 4.48, + "grad_norm": 0.9348293542861938, + "learning_rate": 8.842211055276382e-06, + "loss": 0.0068, + "step": 12025 + }, + { + "epoch": 4.49, + "grad_norm": 1.134962558746338, + "learning_rate": 8.839698492462312e-06, + "loss": 0.0074, + "step": 12050 + }, + { + "epoch": 4.5, + "grad_norm": 0.4225083887577057, + "learning_rate": 8.837185929648243e-06, + "loss": 0.0083, + "step": 12075 + }, + { + "epoch": 4.51, + "grad_norm": 1.1217619180679321, + "learning_rate": 8.83467336683417e-06, + "loss": 0.008, + "step": 12100 + }, + { + "epoch": 4.52, + "grad_norm": 1.0287448167800903, + "learning_rate": 8.832160804020101e-06, + "loss": 0.0085, + "step": 12125 + }, + { + "epoch": 4.53, + "grad_norm": 0.9720180034637451, + "learning_rate": 8.82964824120603e-06, + "loss": 0.0079, + "step": 12150 + }, + { + "epoch": 4.54, + "grad_norm": 0.9195544719696045, + "learning_rate": 8.82713567839196e-06, + "loss": 0.0079, + "step": 12175 + }, + { + "epoch": 4.55, + "grad_norm": 1.044551968574524, + "learning_rate": 8.824623115577891e-06, + "loss": 0.0082, + "step": 12200 + }, + { + "epoch": 4.56, + "grad_norm": 0.8579127788543701, + "learning_rate": 8.82211055276382e-06, + "loss": 0.0081, + "step": 12225 + }, + { + "epoch": 4.57, + "grad_norm": 0.5324201583862305, + "learning_rate": 8.81959798994975e-06, + "loss": 0.007, + "step": 12250 + }, + { + "epoch": 4.58, + "grad_norm": 0.9890484809875488, + "learning_rate": 8.817085427135679e-06, + "loss": 0.0071, + "step": 12275 + }, + { + "epoch": 4.59, + "grad_norm": 1.459826111793518, + "learning_rate": 8.814572864321608e-06, + "loss": 0.008, + "step": 12300 + }, + { + "epoch": 4.6, + "grad_norm": 1.1874847412109375, + "learning_rate": 8.812060301507538e-06, + "loss": 0.0077, + "step": 12325 + }, + { + "epoch": 4.6, + "grad_norm": 1.705692172050476, + "learning_rate": 8.809547738693469e-06, + "loss": 0.0078, + "step": 12350 + }, + { + "epoch": 4.61, + "grad_norm": 0.7064186930656433, + "learning_rate": 8.807035175879398e-06, + "loss": 0.0086, + "step": 12375 + }, + { + "epoch": 4.62, + "grad_norm": 0.8892580270767212, + "learning_rate": 8.804522613065327e-06, + "loss": 0.0067, + "step": 12400 + }, + { + "epoch": 4.63, + "grad_norm": 1.0439404249191284, + "learning_rate": 8.802010050251257e-06, + "loss": 0.007, + "step": 12425 + }, + { + "epoch": 4.64, + "grad_norm": 0.778453528881073, + "learning_rate": 8.799497487437186e-06, + "loss": 0.0083, + "step": 12450 + }, + { + "epoch": 4.65, + "grad_norm": 1.1843183040618896, + "learning_rate": 8.796984924623117e-06, + "loss": 0.008, + "step": 12475 + }, + { + "epoch": 4.66, + "grad_norm": 1.4592788219451904, + "learning_rate": 8.794472361809046e-06, + "loss": 0.0079, + "step": 12500 + }, + { + "epoch": 4.67, + "grad_norm": 1.238295078277588, + "learning_rate": 8.791959798994976e-06, + "loss": 0.0086, + "step": 12525 + }, + { + "epoch": 4.68, + "grad_norm": 1.2225946187973022, + "learning_rate": 8.789447236180905e-06, + "loss": 0.0076, + "step": 12550 + }, + { + "epoch": 4.69, + "grad_norm": 1.202648401260376, + "learning_rate": 8.786934673366834e-06, + "loss": 0.0081, + "step": 12575 + }, + { + "epoch": 4.7, + "grad_norm": 1.1007366180419922, + "learning_rate": 8.784422110552765e-06, + "loss": 0.0074, + "step": 12600 + }, + { + "epoch": 4.71, + "grad_norm": 0.9371539354324341, + "learning_rate": 8.781909547738695e-06, + "loss": 0.0079, + "step": 12625 + }, + { + "epoch": 4.72, + "grad_norm": 0.8588752150535583, + "learning_rate": 8.779396984924624e-06, + "loss": 0.0076, + "step": 12650 + }, + { + "epoch": 4.73, + "grad_norm": 1.176652431488037, + "learning_rate": 8.776884422110553e-06, + "loss": 0.0089, + "step": 12675 + }, + { + "epoch": 4.74, + "grad_norm": 1.0860973596572876, + "learning_rate": 8.774371859296483e-06, + "loss": 0.0084, + "step": 12700 + }, + { + "epoch": 4.74, + "grad_norm": 1.4944326877593994, + "learning_rate": 8.771859296482412e-06, + "loss": 0.0081, + "step": 12725 + }, + { + "epoch": 4.75, + "grad_norm": 1.0701215267181396, + "learning_rate": 8.769346733668343e-06, + "loss": 0.0073, + "step": 12750 + }, + { + "epoch": 4.76, + "grad_norm": 1.1535862684249878, + "learning_rate": 8.766834170854272e-06, + "loss": 0.0078, + "step": 12775 + }, + { + "epoch": 4.77, + "grad_norm": 1.3056565523147583, + "learning_rate": 8.764321608040202e-06, + "loss": 0.0074, + "step": 12800 + }, + { + "epoch": 4.78, + "grad_norm": 0.673252522945404, + "learning_rate": 8.761809045226131e-06, + "loss": 0.0075, + "step": 12825 + }, + { + "epoch": 4.79, + "grad_norm": 1.3279640674591064, + "learning_rate": 8.75929648241206e-06, + "loss": 0.0078, + "step": 12850 + }, + { + "epoch": 4.8, + "grad_norm": 1.2661949396133423, + "learning_rate": 8.756783919597991e-06, + "loss": 0.0074, + "step": 12875 + }, + { + "epoch": 4.81, + "grad_norm": 1.1717098951339722, + "learning_rate": 8.75427135678392e-06, + "loss": 0.0077, + "step": 12900 + }, + { + "epoch": 4.82, + "grad_norm": 0.7967177033424377, + "learning_rate": 8.75175879396985e-06, + "loss": 0.0065, + "step": 12925 + }, + { + "epoch": 4.83, + "grad_norm": 1.308081865310669, + "learning_rate": 8.74924623115578e-06, + "loss": 0.0086, + "step": 12950 + }, + { + "epoch": 4.84, + "grad_norm": 1.0919630527496338, + "learning_rate": 8.746733668341709e-06, + "loss": 0.0072, + "step": 12975 + }, + { + "epoch": 4.85, + "grad_norm": 1.7233628034591675, + "learning_rate": 8.74422110552764e-06, + "loss": 0.0093, + "step": 13000 + }, + { + "epoch": 4.85, + "eval_loss": 0.13369779288768768, + "eval_runtime": 1263.3436, + "eval_samples_per_second": 1.118, + "eval_steps_per_second": 1.118, + "eval_wer": 12.853115470916517, + "step": 13000 + }, + { + "epoch": 4.86, + "grad_norm": 1.373559832572937, + "learning_rate": 8.741708542713569e-06, + "loss": 0.0089, + "step": 13025 + }, + { + "epoch": 4.87, + "grad_norm": 1.3601781129837036, + "learning_rate": 8.739195979899498e-06, + "loss": 0.0081, + "step": 13050 + }, + { + "epoch": 4.88, + "grad_norm": 1.4455060958862305, + "learning_rate": 8.736683417085428e-06, + "loss": 0.0086, + "step": 13075 + }, + { + "epoch": 4.88, + "grad_norm": 1.0117135047912598, + "learning_rate": 8.734170854271357e-06, + "loss": 0.0085, + "step": 13100 + }, + { + "epoch": 4.89, + "grad_norm": 1.1456060409545898, + "learning_rate": 8.731658291457286e-06, + "loss": 0.0079, + "step": 13125 + }, + { + "epoch": 4.9, + "grad_norm": 0.9606508612632751, + "learning_rate": 8.729145728643217e-06, + "loss": 0.0064, + "step": 13150 + }, + { + "epoch": 4.91, + "grad_norm": 1.1147844791412354, + "learning_rate": 8.726633165829147e-06, + "loss": 0.0069, + "step": 13175 + }, + { + "epoch": 4.92, + "grad_norm": 0.7555640935897827, + "learning_rate": 8.724120603015076e-06, + "loss": 0.0076, + "step": 13200 + }, + { + "epoch": 4.93, + "grad_norm": 0.7762534022331238, + "learning_rate": 8.721608040201007e-06, + "loss": 0.0071, + "step": 13225 + }, + { + "epoch": 4.94, + "grad_norm": 1.0324796438217163, + "learning_rate": 8.719095477386934e-06, + "loss": 0.0075, + "step": 13250 + }, + { + "epoch": 4.95, + "grad_norm": 1.050089955329895, + "learning_rate": 8.716582914572866e-06, + "loss": 0.0078, + "step": 13275 + }, + { + "epoch": 4.96, + "grad_norm": 1.3924318552017212, + "learning_rate": 8.714070351758795e-06, + "loss": 0.0076, + "step": 13300 + }, + { + "epoch": 4.97, + "grad_norm": 1.143563151359558, + "learning_rate": 8.711557788944724e-06, + "loss": 0.008, + "step": 13325 + }, + { + "epoch": 4.98, + "grad_norm": 0.9360879063606262, + "learning_rate": 8.709045226130653e-06, + "loss": 0.0079, + "step": 13350 + }, + { + "epoch": 4.99, + "grad_norm": 1.7375690937042236, + "learning_rate": 8.706532663316584e-06, + "loss": 0.0079, + "step": 13375 + }, + { + "epoch": 5.0, + "grad_norm": 0.9562373161315918, + "learning_rate": 8.704020100502514e-06, + "loss": 0.0083, + "step": 13400 + }, + { + "epoch": 5.01, + "grad_norm": 0.9804300665855408, + "learning_rate": 8.701507537688443e-06, + "loss": 0.0056, + "step": 13425 + }, + { + "epoch": 5.01, + "grad_norm": 0.6706047058105469, + "learning_rate": 8.698994974874372e-06, + "loss": 0.0039, + "step": 13450 + }, + { + "epoch": 5.02, + "grad_norm": 0.8503850698471069, + "learning_rate": 8.696482412060302e-06, + "loss": 0.0044, + "step": 13475 + }, + { + "epoch": 5.03, + "grad_norm": 0.5858597755432129, + "learning_rate": 8.693969849246233e-06, + "loss": 0.0038, + "step": 13500 + }, + { + "epoch": 5.04, + "grad_norm": 0.8867294192314148, + "learning_rate": 8.69145728643216e-06, + "loss": 0.0035, + "step": 13525 + }, + { + "epoch": 5.05, + "grad_norm": 1.2354111671447754, + "learning_rate": 8.688944723618091e-06, + "loss": 0.0046, + "step": 13550 + }, + { + "epoch": 5.06, + "grad_norm": 0.5521581768989563, + "learning_rate": 8.68643216080402e-06, + "loss": 0.0037, + "step": 13575 + }, + { + "epoch": 5.07, + "grad_norm": 0.7791966795921326, + "learning_rate": 8.68391959798995e-06, + "loss": 0.0042, + "step": 13600 + }, + { + "epoch": 5.08, + "grad_norm": 0.3394426703453064, + "learning_rate": 8.681407035175881e-06, + "loss": 0.0034, + "step": 13625 + }, + { + "epoch": 5.09, + "grad_norm": 1.2427767515182495, + "learning_rate": 8.67889447236181e-06, + "loss": 0.0041, + "step": 13650 + }, + { + "epoch": 5.1, + "grad_norm": 0.818544864654541, + "learning_rate": 8.67638190954774e-06, + "loss": 0.0042, + "step": 13675 + }, + { + "epoch": 5.11, + "grad_norm": 0.72127366065979, + "learning_rate": 8.673869346733669e-06, + "loss": 0.0043, + "step": 13700 + }, + { + "epoch": 5.12, + "grad_norm": 0.6904676556587219, + "learning_rate": 8.671356783919598e-06, + "loss": 0.0039, + "step": 13725 + }, + { + "epoch": 5.13, + "grad_norm": 0.5878466963768005, + "learning_rate": 8.668844221105528e-06, + "loss": 0.0038, + "step": 13750 + }, + { + "epoch": 5.14, + "grad_norm": 1.1879057884216309, + "learning_rate": 8.666331658291459e-06, + "loss": 0.0038, + "step": 13775 + }, + { + "epoch": 5.15, + "grad_norm": 1.023725986480713, + "learning_rate": 8.663819095477388e-06, + "loss": 0.0038, + "step": 13800 + }, + { + "epoch": 5.15, + "grad_norm": 1.003685712814331, + "learning_rate": 8.661306532663317e-06, + "loss": 0.0035, + "step": 13825 + }, + { + "epoch": 5.16, + "grad_norm": 1.9508416652679443, + "learning_rate": 8.658793969849247e-06, + "loss": 0.0044, + "step": 13850 + }, + { + "epoch": 5.17, + "grad_norm": 0.4348049759864807, + "learning_rate": 8.656281407035176e-06, + "loss": 0.0042, + "step": 13875 + }, + { + "epoch": 5.18, + "grad_norm": 1.0514025688171387, + "learning_rate": 8.653768844221107e-06, + "loss": 0.0042, + "step": 13900 + }, + { + "epoch": 5.19, + "grad_norm": 1.4452292919158936, + "learning_rate": 8.651256281407036e-06, + "loss": 0.0035, + "step": 13925 + }, + { + "epoch": 5.2, + "grad_norm": 0.6654188632965088, + "learning_rate": 8.648743718592966e-06, + "loss": 0.0039, + "step": 13950 + }, + { + "epoch": 5.21, + "grad_norm": 1.2663432359695435, + "learning_rate": 8.646231155778895e-06, + "loss": 0.0041, + "step": 13975 + }, + { + "epoch": 5.22, + "grad_norm": 1.1343486309051514, + "learning_rate": 8.643718592964824e-06, + "loss": 0.0041, + "step": 14000 + }, + { + "epoch": 5.22, + "eval_loss": 0.1350521594285965, + "eval_runtime": 1172.142, + "eval_samples_per_second": 1.205, + "eval_steps_per_second": 1.205, + "eval_wer": 12.11092572349117, + "step": 14000 + }, + { + "epoch": 5.23, + "grad_norm": 0.8997664451599121, + "learning_rate": 8.641206030150755e-06, + "loss": 0.0039, + "step": 14025 + }, + { + "epoch": 5.24, + "grad_norm": 1.2130979299545288, + "learning_rate": 8.638693467336685e-06, + "loss": 0.0041, + "step": 14050 + }, + { + "epoch": 5.25, + "grad_norm": 0.7974370121955872, + "learning_rate": 8.636180904522614e-06, + "loss": 0.0041, + "step": 14075 + }, + { + "epoch": 5.26, + "grad_norm": 0.6741376519203186, + "learning_rate": 8.633668341708543e-06, + "loss": 0.0041, + "step": 14100 + }, + { + "epoch": 5.27, + "grad_norm": 0.6719247102737427, + "learning_rate": 8.631155778894473e-06, + "loss": 0.0044, + "step": 14125 + }, + { + "epoch": 5.28, + "grad_norm": 1.40018892288208, + "learning_rate": 8.628643216080402e-06, + "loss": 0.0039, + "step": 14150 + }, + { + "epoch": 5.29, + "grad_norm": 0.91385817527771, + "learning_rate": 8.626130653266333e-06, + "loss": 0.004, + "step": 14175 + }, + { + "epoch": 5.29, + "grad_norm": 0.5976531505584717, + "learning_rate": 8.623618090452262e-06, + "loss": 0.0041, + "step": 14200 + }, + { + "epoch": 5.3, + "grad_norm": 0.9062372446060181, + "learning_rate": 8.621105527638192e-06, + "loss": 0.0048, + "step": 14225 + }, + { + "epoch": 5.31, + "grad_norm": 0.6214289665222168, + "learning_rate": 8.618592964824121e-06, + "loss": 0.0039, + "step": 14250 + }, + { + "epoch": 5.32, + "grad_norm": 2.3512585163116455, + "learning_rate": 8.61608040201005e-06, + "loss": 0.005, + "step": 14275 + }, + { + "epoch": 5.33, + "grad_norm": 1.6710751056671143, + "learning_rate": 8.613567839195981e-06, + "loss": 0.0041, + "step": 14300 + }, + { + "epoch": 5.34, + "grad_norm": 0.9921113848686218, + "learning_rate": 8.61105527638191e-06, + "loss": 0.0042, + "step": 14325 + }, + { + "epoch": 5.35, + "grad_norm": 0.7136538028717041, + "learning_rate": 8.60854271356784e-06, + "loss": 0.0053, + "step": 14350 + }, + { + "epoch": 5.36, + "grad_norm": 1.003262996673584, + "learning_rate": 8.60603015075377e-06, + "loss": 0.0047, + "step": 14375 + }, + { + "epoch": 5.37, + "grad_norm": 0.8921865820884705, + "learning_rate": 8.603517587939699e-06, + "loss": 0.004, + "step": 14400 + }, + { + "epoch": 5.38, + "grad_norm": 1.7925187349319458, + "learning_rate": 8.601005025125628e-06, + "loss": 0.0056, + "step": 14425 + }, + { + "epoch": 5.39, + "grad_norm": 1.2751425504684448, + "learning_rate": 8.598492462311559e-06, + "loss": 0.005, + "step": 14450 + }, + { + "epoch": 5.4, + "grad_norm": 1.2539774179458618, + "learning_rate": 8.595979899497488e-06, + "loss": 0.0061, + "step": 14475 + }, + { + "epoch": 5.41, + "grad_norm": 0.9144731163978577, + "learning_rate": 8.593467336683418e-06, + "loss": 0.0046, + "step": 14500 + }, + { + "epoch": 5.42, + "grad_norm": 0.9000234603881836, + "learning_rate": 8.590954773869347e-06, + "loss": 0.0053, + "step": 14525 + }, + { + "epoch": 5.43, + "grad_norm": 0.5587400197982788, + "learning_rate": 8.588442211055276e-06, + "loss": 0.0036, + "step": 14550 + }, + { + "epoch": 5.43, + "grad_norm": 0.738121747970581, + "learning_rate": 8.585929648241207e-06, + "loss": 0.0052, + "step": 14575 + }, + { + "epoch": 5.44, + "grad_norm": 0.9222437143325806, + "learning_rate": 8.583417085427137e-06, + "loss": 0.0052, + "step": 14600 + }, + { + "epoch": 5.45, + "grad_norm": 1.3740534782409668, + "learning_rate": 8.58100502512563e-06, + "loss": 0.0049, + "step": 14625 + }, + { + "epoch": 5.46, + "grad_norm": 1.112322211265564, + "learning_rate": 8.578492462311559e-06, + "loss": 0.0047, + "step": 14650 + }, + { + "epoch": 5.47, + "grad_norm": 0.6434328556060791, + "learning_rate": 8.575979899497488e-06, + "loss": 0.0048, + "step": 14675 + }, + { + "epoch": 5.48, + "grad_norm": 1.3476415872573853, + "learning_rate": 8.573467336683418e-06, + "loss": 0.005, + "step": 14700 + }, + { + "epoch": 5.49, + "grad_norm": 0.9998874664306641, + "learning_rate": 8.570954773869347e-06, + "loss": 0.0057, + "step": 14725 + }, + { + "epoch": 5.5, + "grad_norm": 0.5047935843467712, + "learning_rate": 8.568442211055276e-06, + "loss": 0.0053, + "step": 14750 + }, + { + "epoch": 5.51, + "grad_norm": 0.8994642496109009, + "learning_rate": 8.565929648241207e-06, + "loss": 0.005, + "step": 14775 + }, + { + "epoch": 5.52, + "grad_norm": 1.421217918395996, + "learning_rate": 8.563417085427135e-06, + "loss": 0.005, + "step": 14800 + }, + { + "epoch": 5.53, + "grad_norm": 0.7813400030136108, + "learning_rate": 8.560904522613066e-06, + "loss": 0.0055, + "step": 14825 + }, + { + "epoch": 5.54, + "grad_norm": 0.866107165813446, + "learning_rate": 8.558391959798995e-06, + "loss": 0.0044, + "step": 14850 + }, + { + "epoch": 5.55, + "grad_norm": 0.8704444169998169, + "learning_rate": 8.555879396984925e-06, + "loss": 0.0046, + "step": 14875 + }, + { + "epoch": 5.56, + "grad_norm": 1.567191243171692, + "learning_rate": 8.553366834170856e-06, + "loss": 0.0048, + "step": 14900 + }, + { + "epoch": 5.56, + "grad_norm": 1.4437520503997803, + "learning_rate": 8.550854271356785e-06, + "loss": 0.0052, + "step": 14925 + }, + { + "epoch": 5.57, + "grad_norm": 1.1087746620178223, + "learning_rate": 8.548341708542714e-06, + "loss": 0.0048, + "step": 14950 + }, + { + "epoch": 5.58, + "grad_norm": 1.324389100074768, + "learning_rate": 8.545829145728644e-06, + "loss": 0.0053, + "step": 14975 + }, + { + "epoch": 5.59, + "grad_norm": 1.2155736684799194, + "learning_rate": 8.543316582914573e-06, + "loss": 0.0053, + "step": 15000 + }, + { + "epoch": 5.59, + "eval_loss": 0.14199891686439514, + "eval_runtime": 1174.7278, + "eval_samples_per_second": 1.203, + "eval_steps_per_second": 1.203, + "eval_wer": 12.54243139059893, + "step": 15000 + }, + { + "epoch": 5.6, + "grad_norm": 0.6708987951278687, + "learning_rate": 8.540804020100502e-06, + "loss": 0.0057, + "step": 15025 + }, + { + "epoch": 5.61, + "grad_norm": 1.3157862424850464, + "learning_rate": 8.538291457286433e-06, + "loss": 0.0045, + "step": 15050 + }, + { + "epoch": 5.62, + "grad_norm": 1.3966444730758667, + "learning_rate": 8.535778894472363e-06, + "loss": 0.0057, + "step": 15075 + }, + { + "epoch": 5.63, + "grad_norm": 1.1613357067108154, + "learning_rate": 8.533266331658292e-06, + "loss": 0.0056, + "step": 15100 + }, + { + "epoch": 5.64, + "grad_norm": 1.493208646774292, + "learning_rate": 8.530753768844221e-06, + "loss": 0.005, + "step": 15125 + }, + { + "epoch": 5.65, + "grad_norm": 0.8131772875785828, + "learning_rate": 8.52824120603015e-06, + "loss": 0.005, + "step": 15150 + }, + { + "epoch": 5.66, + "grad_norm": 0.6082425117492676, + "learning_rate": 8.525728643216082e-06, + "loss": 0.0051, + "step": 15175 + }, + { + "epoch": 5.67, + "grad_norm": 1.0472604036331177, + "learning_rate": 8.523216080402011e-06, + "loss": 0.0054, + "step": 15200 + }, + { + "epoch": 5.68, + "grad_norm": 1.2453606128692627, + "learning_rate": 8.52070351758794e-06, + "loss": 0.0051, + "step": 15225 + }, + { + "epoch": 5.69, + "grad_norm": 1.1250741481781006, + "learning_rate": 8.518190954773871e-06, + "loss": 0.0059, + "step": 15250 + }, + { + "epoch": 5.7, + "grad_norm": 1.0060341358184814, + "learning_rate": 8.515678391959799e-06, + "loss": 0.005, + "step": 15275 + }, + { + "epoch": 5.7, + "grad_norm": 0.9378223419189453, + "learning_rate": 8.51316582914573e-06, + "loss": 0.0046, + "step": 15300 + }, + { + "epoch": 5.71, + "grad_norm": 1.1564043760299683, + "learning_rate": 8.51065326633166e-06, + "loss": 0.005, + "step": 15325 + }, + { + "epoch": 5.72, + "grad_norm": 0.9421935081481934, + "learning_rate": 8.508140703517589e-06, + "loss": 0.0049, + "step": 15350 + }, + { + "epoch": 5.73, + "grad_norm": 1.34913170337677, + "learning_rate": 8.505628140703518e-06, + "loss": 0.0054, + "step": 15375 + }, + { + "epoch": 5.74, + "grad_norm": 0.8817560076713562, + "learning_rate": 8.503115577889447e-06, + "loss": 0.0047, + "step": 15400 + }, + { + "epoch": 5.75, + "grad_norm": 1.432364821434021, + "learning_rate": 8.500603015075377e-06, + "loss": 0.0048, + "step": 15425 + }, + { + "epoch": 5.76, + "grad_norm": 0.576222836971283, + "learning_rate": 8.498090452261308e-06, + "loss": 0.0049, + "step": 15450 + }, + { + "epoch": 5.77, + "grad_norm": 1.9083431959152222, + "learning_rate": 8.495577889447237e-06, + "loss": 0.0059, + "step": 15475 + }, + { + "epoch": 5.78, + "grad_norm": 1.3128249645233154, + "learning_rate": 8.493065326633166e-06, + "loss": 0.0057, + "step": 15500 + }, + { + "epoch": 5.79, + "grad_norm": 1.3442699909210205, + "learning_rate": 8.490552763819097e-06, + "loss": 0.0055, + "step": 15525 + }, + { + "epoch": 5.8, + "grad_norm": 0.9286993145942688, + "learning_rate": 8.488040201005025e-06, + "loss": 0.0043, + "step": 15550 + }, + { + "epoch": 5.81, + "grad_norm": 1.1399116516113281, + "learning_rate": 8.485527638190956e-06, + "loss": 0.0049, + "step": 15575 + }, + { + "epoch": 5.82, + "grad_norm": 1.3293429613113403, + "learning_rate": 8.483015075376885e-06, + "loss": 0.0048, + "step": 15600 + }, + { + "epoch": 5.83, + "grad_norm": 0.8660935759544373, + "learning_rate": 8.480502512562815e-06, + "loss": 0.0057, + "step": 15625 + }, + { + "epoch": 5.84, + "grad_norm": 0.8582723140716553, + "learning_rate": 8.477989949748744e-06, + "loss": 0.0053, + "step": 15650 + }, + { + "epoch": 5.84, + "grad_norm": 1.086997151374817, + "learning_rate": 8.475477386934673e-06, + "loss": 0.0047, + "step": 15675 + }, + { + "epoch": 5.85, + "grad_norm": 1.7419377565383911, + "learning_rate": 8.472964824120604e-06, + "loss": 0.0047, + "step": 15700 + }, + { + "epoch": 5.86, + "grad_norm": 1.2005741596221924, + "learning_rate": 8.470452261306534e-06, + "loss": 0.0055, + "step": 15725 + }, + { + "epoch": 5.87, + "grad_norm": 1.1195741891860962, + "learning_rate": 8.467939698492463e-06, + "loss": 0.0052, + "step": 15750 + }, + { + "epoch": 5.88, + "grad_norm": 1.0887726545333862, + "learning_rate": 8.465427135678392e-06, + "loss": 0.0053, + "step": 15775 + }, + { + "epoch": 5.89, + "grad_norm": 1.1764453649520874, + "learning_rate": 8.462914572864323e-06, + "loss": 0.0054, + "step": 15800 + }, + { + "epoch": 5.9, + "grad_norm": 0.7761917114257812, + "learning_rate": 8.460402010050251e-06, + "loss": 0.0053, + "step": 15825 + }, + { + "epoch": 5.91, + "grad_norm": 1.6336493492126465, + "learning_rate": 8.457889447236182e-06, + "loss": 0.0057, + "step": 15850 + }, + { + "epoch": 5.92, + "grad_norm": 1.264352798461914, + "learning_rate": 8.455376884422111e-06, + "loss": 0.0069, + "step": 15875 + }, + { + "epoch": 5.93, + "grad_norm": 1.3722792863845825, + "learning_rate": 8.45286432160804e-06, + "loss": 0.0055, + "step": 15900 + }, + { + "epoch": 5.94, + "grad_norm": 1.4113227128982544, + "learning_rate": 8.450351758793972e-06, + "loss": 0.0058, + "step": 15925 + }, + { + "epoch": 5.95, + "grad_norm": 0.9782323241233826, + "learning_rate": 8.4478391959799e-06, + "loss": 0.0049, + "step": 15950 + }, + { + "epoch": 5.96, + "grad_norm": 0.718396782875061, + "learning_rate": 8.44532663316583e-06, + "loss": 0.0051, + "step": 15975 + }, + { + "epoch": 5.97, + "grad_norm": 1.2568669319152832, + "learning_rate": 8.44281407035176e-06, + "loss": 0.0051, + "step": 16000 + }, + { + "epoch": 5.97, + "eval_loss": 0.1442727893590927, + "eval_runtime": 1203.3476, + "eval_samples_per_second": 1.174, + "eval_steps_per_second": 1.174, + "eval_wer": 12.20298026580749, + "step": 16000 + }, + { + "epoch": 5.98, + "grad_norm": 0.8230323195457458, + "learning_rate": 8.440301507537689e-06, + "loss": 0.0044, + "step": 16025 + }, + { + "epoch": 5.98, + "grad_norm": 0.6400941014289856, + "learning_rate": 8.437788944723618e-06, + "loss": 0.005, + "step": 16050 + }, + { + "epoch": 5.99, + "grad_norm": 0.7799792289733887, + "learning_rate": 8.43527638190955e-06, + "loss": 0.005, + "step": 16075 + }, + { + "epoch": 6.0, + "grad_norm": 0.48810166120529175, + "learning_rate": 8.432763819095479e-06, + "loss": 0.0059, + "step": 16100 + }, + { + "epoch": 6.01, + "grad_norm": 0.8253574371337891, + "learning_rate": 8.430251256281408e-06, + "loss": 0.0039, + "step": 16125 + }, + { + "epoch": 6.02, + "grad_norm": 0.5456401705741882, + "learning_rate": 8.427738693467337e-06, + "loss": 0.0028, + "step": 16150 + }, + { + "epoch": 6.03, + "grad_norm": 0.8694278597831726, + "learning_rate": 8.425226130653266e-06, + "loss": 0.0032, + "step": 16175 + }, + { + "epoch": 6.04, + "grad_norm": 0.756402850151062, + "learning_rate": 8.422713567839198e-06, + "loss": 0.0028, + "step": 16200 + }, + { + "epoch": 6.05, + "grad_norm": 0.5762577056884766, + "learning_rate": 8.420201005025125e-06, + "loss": 0.003, + "step": 16225 + }, + { + "epoch": 6.06, + "grad_norm": 0.907863438129425, + "learning_rate": 8.417688442211056e-06, + "loss": 0.0027, + "step": 16250 + }, + { + "epoch": 6.07, + "grad_norm": 0.8144922852516174, + "learning_rate": 8.415175879396985e-06, + "loss": 0.003, + "step": 16275 + }, + { + "epoch": 6.08, + "grad_norm": 0.7962759137153625, + "learning_rate": 8.412663316582915e-06, + "loss": 0.003, + "step": 16300 + }, + { + "epoch": 6.09, + "grad_norm": 0.7962504029273987, + "learning_rate": 8.410150753768846e-06, + "loss": 0.0036, + "step": 16325 + }, + { + "epoch": 6.1, + "grad_norm": 0.518212080001831, + "learning_rate": 8.407638190954775e-06, + "loss": 0.004, + "step": 16350 + }, + { + "epoch": 6.11, + "grad_norm": 0.6916844248771667, + "learning_rate": 8.405125628140704e-06, + "loss": 0.0034, + "step": 16375 + }, + { + "epoch": 6.11, + "grad_norm": 1.0035253763198853, + "learning_rate": 8.402613065326634e-06, + "loss": 0.0036, + "step": 16400 + }, + { + "epoch": 6.12, + "grad_norm": 0.6233318448066711, + "learning_rate": 8.400100502512563e-06, + "loss": 0.0029, + "step": 16425 + }, + { + "epoch": 6.13, + "grad_norm": 0.3249460458755493, + "learning_rate": 8.397587939698492e-06, + "loss": 0.0031, + "step": 16450 + }, + { + "epoch": 6.14, + "grad_norm": 0.5242918133735657, + "learning_rate": 8.395075376884423e-06, + "loss": 0.0027, + "step": 16475 + }, + { + "epoch": 6.15, + "grad_norm": 0.34313616156578064, + "learning_rate": 8.392562814070351e-06, + "loss": 0.0031, + "step": 16500 + }, + { + "epoch": 6.16, + "grad_norm": 0.877734899520874, + "learning_rate": 8.390050251256282e-06, + "loss": 0.0025, + "step": 16525 + }, + { + "epoch": 6.17, + "grad_norm": 0.7292552590370178, + "learning_rate": 8.387537688442211e-06, + "loss": 0.0029, + "step": 16550 + }, + { + "epoch": 6.18, + "grad_norm": 0.43353238701820374, + "learning_rate": 8.38502512562814e-06, + "loss": 0.0027, + "step": 16575 + }, + { + "epoch": 6.19, + "grad_norm": 0.6906455755233765, + "learning_rate": 8.382512562814072e-06, + "loss": 0.0027, + "step": 16600 + }, + { + "epoch": 6.2, + "grad_norm": 0.5680913925170898, + "learning_rate": 8.380000000000001e-06, + "loss": 0.0029, + "step": 16625 + }, + { + "epoch": 6.21, + "grad_norm": 0.6154554486274719, + "learning_rate": 8.37748743718593e-06, + "loss": 0.0033, + "step": 16650 + }, + { + "epoch": 6.22, + "grad_norm": 0.47933229804039, + "learning_rate": 8.37497487437186e-06, + "loss": 0.0029, + "step": 16675 + }, + { + "epoch": 6.23, + "grad_norm": 1.4505053758621216, + "learning_rate": 8.372462311557789e-06, + "loss": 0.0025, + "step": 16700 + }, + { + "epoch": 6.24, + "grad_norm": 1.1999586820602417, + "learning_rate": 8.36994974874372e-06, + "loss": 0.003, + "step": 16725 + }, + { + "epoch": 6.25, + "grad_norm": 0.6804665923118591, + "learning_rate": 8.36743718592965e-06, + "loss": 0.0022, + "step": 16750 + }, + { + "epoch": 6.25, + "grad_norm": 0.3529142439365387, + "learning_rate": 8.364924623115579e-06, + "loss": 0.003, + "step": 16775 + }, + { + "epoch": 6.26, + "grad_norm": 0.842597246170044, + "learning_rate": 8.362412060301508e-06, + "loss": 0.0027, + "step": 16800 + }, + { + "epoch": 6.27, + "grad_norm": 0.9860122203826904, + "learning_rate": 8.359899497487437e-06, + "loss": 0.0034, + "step": 16825 + }, + { + "epoch": 6.28, + "grad_norm": 0.6472060680389404, + "learning_rate": 8.357386934673367e-06, + "loss": 0.0036, + "step": 16850 + }, + { + "epoch": 6.29, + "grad_norm": 0.7503908276557922, + "learning_rate": 8.354874371859298e-06, + "loss": 0.0033, + "step": 16875 + }, + { + "epoch": 6.3, + "grad_norm": 0.7145525217056274, + "learning_rate": 8.352361809045227e-06, + "loss": 0.0029, + "step": 16900 + }, + { + "epoch": 6.31, + "grad_norm": 0.5079861879348755, + "learning_rate": 8.349849246231156e-06, + "loss": 0.0035, + "step": 16925 + }, + { + "epoch": 6.32, + "grad_norm": 1.142829179763794, + "learning_rate": 8.347336683417087e-06, + "loss": 0.0032, + "step": 16950 + }, + { + "epoch": 6.33, + "grad_norm": 0.6998821496963501, + "learning_rate": 8.344824120603015e-06, + "loss": 0.0041, + "step": 16975 + }, + { + "epoch": 6.34, + "grad_norm": 0.8406864404678345, + "learning_rate": 8.342311557788946e-06, + "loss": 0.0038, + "step": 17000 + }, + { + "epoch": 6.34, + "eval_loss": 0.1456492394208908, + "eval_runtime": 1207.9017, + "eval_samples_per_second": 1.17, + "eval_steps_per_second": 1.17, + "eval_wer": 11.903803003279444, + "step": 17000 + }, + { + "epoch": 6.35, + "grad_norm": 0.9259448647499084, + "learning_rate": 8.339798994974875e-06, + "loss": 0.0031, + "step": 17025 + }, + { + "epoch": 6.36, + "grad_norm": 0.801064670085907, + "learning_rate": 8.337286432160805e-06, + "loss": 0.0034, + "step": 17050 + }, + { + "epoch": 6.37, + "grad_norm": 1.301962971687317, + "learning_rate": 8.334874371859298e-06, + "loss": 0.0036, + "step": 17075 + }, + { + "epoch": 6.38, + "grad_norm": 0.38119998574256897, + "learning_rate": 8.332361809045226e-06, + "loss": 0.0034, + "step": 17100 + }, + { + "epoch": 6.39, + "grad_norm": 0.6546849012374878, + "learning_rate": 8.329849246231157e-06, + "loss": 0.0033, + "step": 17125 + }, + { + "epoch": 6.39, + "grad_norm": 1.0658259391784668, + "learning_rate": 8.327336683417086e-06, + "loss": 0.0037, + "step": 17150 + }, + { + "epoch": 6.4, + "grad_norm": 0.9057626128196716, + "learning_rate": 8.324824120603015e-06, + "loss": 0.0034, + "step": 17175 + }, + { + "epoch": 6.41, + "grad_norm": 0.9853999018669128, + "learning_rate": 8.322311557788946e-06, + "loss": 0.0039, + "step": 17200 + }, + { + "epoch": 6.42, + "grad_norm": 0.6675621271133423, + "learning_rate": 8.319798994974876e-06, + "loss": 0.0032, + "step": 17225 + }, + { + "epoch": 6.43, + "grad_norm": 0.9039015769958496, + "learning_rate": 8.317286432160805e-06, + "loss": 0.0039, + "step": 17250 + }, + { + "epoch": 6.44, + "grad_norm": 0.66859370470047, + "learning_rate": 8.314773869346734e-06, + "loss": 0.0029, + "step": 17275 + }, + { + "epoch": 6.45, + "grad_norm": 1.0721858739852905, + "learning_rate": 8.312261306532663e-06, + "loss": 0.0044, + "step": 17300 + }, + { + "epoch": 6.46, + "grad_norm": 0.9672116637229919, + "learning_rate": 8.309748743718595e-06, + "loss": 0.0043, + "step": 17325 + }, + { + "epoch": 6.47, + "grad_norm": 0.9127078056335449, + "learning_rate": 8.307236180904524e-06, + "loss": 0.0035, + "step": 17350 + }, + { + "epoch": 6.48, + "grad_norm": 1.5191999673843384, + "learning_rate": 8.304723618090453e-06, + "loss": 0.0035, + "step": 17375 + }, + { + "epoch": 6.49, + "grad_norm": 0.8795561194419861, + "learning_rate": 8.302211055276382e-06, + "loss": 0.0036, + "step": 17400 + }, + { + "epoch": 6.5, + "grad_norm": 0.8869641423225403, + "learning_rate": 8.299698492462312e-06, + "loss": 0.0042, + "step": 17425 + }, + { + "epoch": 6.51, + "grad_norm": 0.9021925926208496, + "learning_rate": 8.297185929648241e-06, + "loss": 0.0048, + "step": 17450 + }, + { + "epoch": 6.52, + "grad_norm": 0.6960669159889221, + "learning_rate": 8.294673366834172e-06, + "loss": 0.0041, + "step": 17475 + }, + { + "epoch": 6.52, + "grad_norm": 0.745922327041626, + "learning_rate": 8.292160804020101e-06, + "loss": 0.0029, + "step": 17500 + }, + { + "epoch": 6.53, + "grad_norm": 1.005938172340393, + "learning_rate": 8.28964824120603e-06, + "loss": 0.0038, + "step": 17525 + }, + { + "epoch": 6.54, + "grad_norm": 1.1654744148254395, + "learning_rate": 8.287135678391962e-06, + "loss": 0.004, + "step": 17550 + }, + { + "epoch": 6.55, + "grad_norm": 1.1658011674880981, + "learning_rate": 8.28462311557789e-06, + "loss": 0.0038, + "step": 17575 + }, + { + "epoch": 6.56, + "grad_norm": 0.9024285078048706, + "learning_rate": 8.28211055276382e-06, + "loss": 0.0039, + "step": 17600 + }, + { + "epoch": 6.57, + "grad_norm": 1.745468020439148, + "learning_rate": 8.27959798994975e-06, + "loss": 0.0043, + "step": 17625 + }, + { + "epoch": 6.58, + "grad_norm": 1.1376612186431885, + "learning_rate": 8.277085427135679e-06, + "loss": 0.0037, + "step": 17650 + }, + { + "epoch": 6.59, + "grad_norm": 1.380776047706604, + "learning_rate": 8.274572864321608e-06, + "loss": 0.0041, + "step": 17675 + }, + { + "epoch": 6.6, + "grad_norm": 1.0260534286499023, + "learning_rate": 8.272060301507538e-06, + "loss": 0.0037, + "step": 17700 + }, + { + "epoch": 6.61, + "grad_norm": 1.846548318862915, + "learning_rate": 8.269547738693467e-06, + "loss": 0.0029, + "step": 17725 + }, + { + "epoch": 6.62, + "grad_norm": 0.7444543838500977, + "learning_rate": 8.267035175879398e-06, + "loss": 0.0037, + "step": 17750 + }, + { + "epoch": 6.63, + "grad_norm": 1.0161399841308594, + "learning_rate": 8.264522613065327e-06, + "loss": 0.0039, + "step": 17775 + }, + { + "epoch": 6.64, + "grad_norm": 0.6186051368713379, + "learning_rate": 8.262010050251257e-06, + "loss": 0.0043, + "step": 17800 + }, + { + "epoch": 6.65, + "grad_norm": 0.6954403519630432, + "learning_rate": 8.259497487437188e-06, + "loss": 0.004, + "step": 17825 + }, + { + "epoch": 6.66, + "grad_norm": 1.060530424118042, + "learning_rate": 8.256984924623115e-06, + "loss": 0.0043, + "step": 17850 + }, + { + "epoch": 6.66, + "grad_norm": 0.4287080764770508, + "learning_rate": 8.254472361809046e-06, + "loss": 0.0035, + "step": 17875 + }, + { + "epoch": 6.67, + "grad_norm": 0.4548826515674591, + "learning_rate": 8.251959798994976e-06, + "loss": 0.0043, + "step": 17900 + }, + { + "epoch": 6.68, + "grad_norm": 0.9160103797912598, + "learning_rate": 8.249447236180905e-06, + "loss": 0.0036, + "step": 17925 + }, + { + "epoch": 6.69, + "grad_norm": 0.5377803444862366, + "learning_rate": 8.246934673366836e-06, + "loss": 0.0038, + "step": 17950 + }, + { + "epoch": 6.7, + "grad_norm": 0.9143711924552917, + "learning_rate": 8.244422110552764e-06, + "loss": 0.004, + "step": 17975 + }, + { + "epoch": 6.71, + "grad_norm": 1.030299186706543, + "learning_rate": 8.241909547738695e-06, + "loss": 0.0037, + "step": 18000 + }, + { + "epoch": 6.71, + "eval_loss": 0.15315306186676025, + "eval_runtime": 1489.8957, + "eval_samples_per_second": 0.948, + "eval_steps_per_second": 0.948, + "eval_wer": 12.56544502617801, + "step": 18000 + }, + { + "epoch": 6.72, + "grad_norm": 1.3255141973495483, + "learning_rate": 8.239396984924624e-06, + "loss": 0.0037, + "step": 18025 + }, + { + "epoch": 6.73, + "grad_norm": 1.808417558670044, + "learning_rate": 8.236884422110553e-06, + "loss": 0.004, + "step": 18050 + }, + { + "epoch": 6.74, + "grad_norm": 0.5294639468193054, + "learning_rate": 8.234371859296483e-06, + "loss": 0.0038, + "step": 18075 + }, + { + "epoch": 6.75, + "grad_norm": 0.9570890069007874, + "learning_rate": 8.231859296482414e-06, + "loss": 0.004, + "step": 18100 + }, + { + "epoch": 6.76, + "grad_norm": 0.6822317838668823, + "learning_rate": 8.229346733668341e-06, + "loss": 0.0029, + "step": 18125 + }, + { + "epoch": 6.77, + "grad_norm": 0.9352298378944397, + "learning_rate": 8.226834170854272e-06, + "loss": 0.0043, + "step": 18150 + }, + { + "epoch": 6.78, + "grad_norm": 0.9919680953025818, + "learning_rate": 8.224321608040202e-06, + "loss": 0.0039, + "step": 18175 + }, + { + "epoch": 6.79, + "grad_norm": 1.1988261938095093, + "learning_rate": 8.221809045226131e-06, + "loss": 0.0036, + "step": 18200 + }, + { + "epoch": 6.8, + "grad_norm": 1.2278050184249878, + "learning_rate": 8.219296482412062e-06, + "loss": 0.0038, + "step": 18225 + }, + { + "epoch": 6.8, + "grad_norm": 0.8980326652526855, + "learning_rate": 8.21678391959799e-06, + "loss": 0.0051, + "step": 18250 + }, + { + "epoch": 6.81, + "grad_norm": 0.9148417711257935, + "learning_rate": 8.21427135678392e-06, + "loss": 0.0045, + "step": 18275 + }, + { + "epoch": 6.82, + "grad_norm": 1.2236504554748535, + "learning_rate": 8.21175879396985e-06, + "loss": 0.0042, + "step": 18300 + }, + { + "epoch": 6.83, + "grad_norm": 1.4134790897369385, + "learning_rate": 8.20924623115578e-06, + "loss": 0.0044, + "step": 18325 + }, + { + "epoch": 6.84, + "grad_norm": 0.934983491897583, + "learning_rate": 8.206733668341709e-06, + "loss": 0.0038, + "step": 18350 + }, + { + "epoch": 6.85, + "grad_norm": 1.104872465133667, + "learning_rate": 8.20422110552764e-06, + "loss": 0.0033, + "step": 18375 + }, + { + "epoch": 6.86, + "grad_norm": 0.6088386178016663, + "learning_rate": 8.201708542713569e-06, + "loss": 0.0043, + "step": 18400 + }, + { + "epoch": 6.87, + "grad_norm": 0.6338125467300415, + "learning_rate": 8.199195979899498e-06, + "loss": 0.0041, + "step": 18425 + }, + { + "epoch": 6.88, + "grad_norm": 0.7397853136062622, + "learning_rate": 8.196683417085428e-06, + "loss": 0.0038, + "step": 18450 + }, + { + "epoch": 6.89, + "grad_norm": 0.6360189318656921, + "learning_rate": 8.194170854271357e-06, + "loss": 0.005, + "step": 18475 + }, + { + "epoch": 6.9, + "grad_norm": 1.0252039432525635, + "learning_rate": 8.191658291457288e-06, + "loss": 0.0035, + "step": 18500 + }, + { + "epoch": 6.91, + "grad_norm": 0.6118385195732117, + "learning_rate": 8.189145728643216e-06, + "loss": 0.0036, + "step": 18525 + }, + { + "epoch": 6.92, + "grad_norm": 1.1586934328079224, + "learning_rate": 8.186633165829147e-06, + "loss": 0.0034, + "step": 18550 + }, + { + "epoch": 6.93, + "grad_norm": 1.2839738130569458, + "learning_rate": 8.184120603015076e-06, + "loss": 0.0033, + "step": 18575 + }, + { + "epoch": 6.94, + "grad_norm": 0.70010906457901, + "learning_rate": 8.181608040201005e-06, + "loss": 0.0043, + "step": 18600 + }, + { + "epoch": 6.94, + "grad_norm": 0.5367883443832397, + "learning_rate": 8.179095477386936e-06, + "loss": 0.0035, + "step": 18625 + }, + { + "epoch": 6.95, + "grad_norm": 0.641524076461792, + "learning_rate": 8.176582914572866e-06, + "loss": 0.0038, + "step": 18650 + }, + { + "epoch": 6.96, + "grad_norm": 0.6704615950584412, + "learning_rate": 8.174070351758795e-06, + "loss": 0.003, + "step": 18675 + }, + { + "epoch": 6.97, + "grad_norm": 0.9170188903808594, + "learning_rate": 8.171557788944724e-06, + "loss": 0.0034, + "step": 18700 + }, + { + "epoch": 6.98, + "grad_norm": 0.49597394466400146, + "learning_rate": 8.169045226130654e-06, + "loss": 0.0039, + "step": 18725 + }, + { + "epoch": 6.99, + "grad_norm": 0.5179845690727234, + "learning_rate": 8.166532663316583e-06, + "loss": 0.004, + "step": 18750 + }, + { + "epoch": 7.0, + "grad_norm": 0.2318136841058731, + "learning_rate": 8.164020100502514e-06, + "loss": 0.0038, + "step": 18775 + }, + { + "epoch": 7.01, + "grad_norm": 1.2135159969329834, + "learning_rate": 8.161507537688443e-06, + "loss": 0.0021, + "step": 18800 + }, + { + "epoch": 7.02, + "grad_norm": 0.46845996379852295, + "learning_rate": 8.158994974874373e-06, + "loss": 0.0025, + "step": 18825 + }, + { + "epoch": 7.03, + "grad_norm": 0.9924358129501343, + "learning_rate": 8.156482412060302e-06, + "loss": 0.0026, + "step": 18850 + }, + { + "epoch": 7.04, + "grad_norm": 0.7978739738464355, + "learning_rate": 8.153969849246231e-06, + "loss": 0.0029, + "step": 18875 + }, + { + "epoch": 7.05, + "grad_norm": 1.0953792333602905, + "learning_rate": 8.151457286432162e-06, + "loss": 0.0028, + "step": 18900 + }, + { + "epoch": 7.06, + "grad_norm": 0.8321471810340881, + "learning_rate": 8.148944723618092e-06, + "loss": 0.0026, + "step": 18925 + }, + { + "epoch": 7.07, + "grad_norm": 1.1186492443084717, + "learning_rate": 8.146432160804021e-06, + "loss": 0.0023, + "step": 18950 + }, + { + "epoch": 7.07, + "grad_norm": 0.8087504506111145, + "learning_rate": 8.14391959798995e-06, + "loss": 0.0023, + "step": 18975 + }, + { + "epoch": 7.08, + "grad_norm": 0.7073812484741211, + "learning_rate": 8.14140703517588e-06, + "loss": 0.0026, + "step": 19000 + }, + { + "epoch": 7.08, + "eval_loss": 0.14880883693695068, + "eval_runtime": 1491.1685, + "eval_samples_per_second": 0.948, + "eval_steps_per_second": 0.948, + "eval_wer": 11.81750186985789, + "step": 19000 + }, + { + "epoch": 7.09, + "grad_norm": 0.5086621046066284, + "learning_rate": 8.13889447236181e-06, + "loss": 0.002, + "step": 19025 + }, + { + "epoch": 7.1, + "grad_norm": 0.34143704175949097, + "learning_rate": 8.13638190954774e-06, + "loss": 0.0028, + "step": 19050 + }, + { + "epoch": 7.11, + "grad_norm": 0.6577348709106445, + "learning_rate": 8.13386934673367e-06, + "loss": 0.0029, + "step": 19075 + }, + { + "epoch": 7.12, + "grad_norm": 0.6750233769416809, + "learning_rate": 8.131356783919598e-06, + "loss": 0.0022, + "step": 19100 + }, + { + "epoch": 7.13, + "grad_norm": 0.7913119792938232, + "learning_rate": 8.128844221105528e-06, + "loss": 0.0022, + "step": 19125 + }, + { + "epoch": 7.14, + "grad_norm": 0.4945704936981201, + "learning_rate": 8.126331658291457e-06, + "loss": 0.0025, + "step": 19150 + }, + { + "epoch": 7.15, + "grad_norm": 0.49505919218063354, + "learning_rate": 8.123819095477388e-06, + "loss": 0.0026, + "step": 19175 + }, + { + "epoch": 7.16, + "grad_norm": 0.49595072865486145, + "learning_rate": 8.121306532663317e-06, + "loss": 0.0029, + "step": 19200 + }, + { + "epoch": 7.17, + "grad_norm": 1.3882575035095215, + "learning_rate": 8.118793969849247e-06, + "loss": 0.0026, + "step": 19225 + }, + { + "epoch": 7.18, + "grad_norm": 1.0370197296142578, + "learning_rate": 8.116281407035178e-06, + "loss": 0.003, + "step": 19250 + }, + { + "epoch": 7.19, + "grad_norm": 0.9196892976760864, + "learning_rate": 8.113768844221105e-06, + "loss": 0.0022, + "step": 19275 + }, + { + "epoch": 7.2, + "grad_norm": 0.368744820356369, + "learning_rate": 8.111256281407036e-06, + "loss": 0.0023, + "step": 19300 + }, + { + "epoch": 7.21, + "grad_norm": 0.6015002131462097, + "learning_rate": 8.108743718592966e-06, + "loss": 0.0022, + "step": 19325 + }, + { + "epoch": 7.21, + "grad_norm": 0.4250849783420563, + "learning_rate": 8.106231155778895e-06, + "loss": 0.0025, + "step": 19350 + }, + { + "epoch": 7.22, + "grad_norm": 1.067760705947876, + "learning_rate": 8.103718592964824e-06, + "loss": 0.0023, + "step": 19375 + }, + { + "epoch": 7.23, + "grad_norm": 1.087808609008789, + "learning_rate": 8.101206030150754e-06, + "loss": 0.0024, + "step": 19400 + }, + { + "epoch": 7.24, + "grad_norm": 0.4904612898826599, + "learning_rate": 8.098693467336685e-06, + "loss": 0.0031, + "step": 19425 + }, + { + "epoch": 7.25, + "grad_norm": 1.2440577745437622, + "learning_rate": 8.096180904522614e-06, + "loss": 0.0027, + "step": 19450 + }, + { + "epoch": 7.26, + "grad_norm": 0.9554286003112793, + "learning_rate": 8.093668341708543e-06, + "loss": 0.0021, + "step": 19475 + }, + { + "epoch": 7.27, + "grad_norm": 0.7498973608016968, + "learning_rate": 8.091155778894473e-06, + "loss": 0.0026, + "step": 19500 + }, + { + "epoch": 7.28, + "grad_norm": 0.6185832619667053, + "learning_rate": 8.088643216080404e-06, + "loss": 0.0025, + "step": 19525 + }, + { + "epoch": 7.29, + "grad_norm": 1.113226294517517, + "learning_rate": 8.086130653266331e-06, + "loss": 0.0026, + "step": 19550 + }, + { + "epoch": 7.3, + "grad_norm": 0.5915699005126953, + "learning_rate": 8.083618090452262e-06, + "loss": 0.0033, + "step": 19575 + }, + { + "epoch": 7.31, + "grad_norm": 0.5080116987228394, + "learning_rate": 8.081105527638192e-06, + "loss": 0.0029, + "step": 19600 + }, + { + "epoch": 7.32, + "grad_norm": 0.776193380355835, + "learning_rate": 8.078592964824121e-06, + "loss": 0.0029, + "step": 19625 + }, + { + "epoch": 7.33, + "grad_norm": 0.6700436472892761, + "learning_rate": 8.076180904522614e-06, + "loss": 0.0028, + "step": 19650 + }, + { + "epoch": 7.34, + "grad_norm": 0.23881955444812775, + "learning_rate": 8.073668341708544e-06, + "loss": 0.0029, + "step": 19675 + }, + { + "epoch": 7.35, + "grad_norm": 0.6887729167938232, + "learning_rate": 8.071155778894473e-06, + "loss": 0.0026, + "step": 19700 + }, + { + "epoch": 7.35, + "grad_norm": 1.652170181274414, + "learning_rate": 8.068643216080402e-06, + "loss": 0.0035, + "step": 19725 + }, + { + "epoch": 7.36, + "grad_norm": 0.7992013096809387, + "learning_rate": 8.066130653266332e-06, + "loss": 0.0034, + "step": 19750 + }, + { + "epoch": 7.37, + "grad_norm": 0.3672984838485718, + "learning_rate": 8.063618090452263e-06, + "loss": 0.0036, + "step": 19775 + }, + { + "epoch": 7.38, + "grad_norm": 1.066258430480957, + "learning_rate": 8.061105527638192e-06, + "loss": 0.003, + "step": 19800 + }, + { + "epoch": 7.39, + "grad_norm": 0.359713077545166, + "learning_rate": 8.058592964824121e-06, + "loss": 0.0029, + "step": 19825 + }, + { + "epoch": 7.4, + "grad_norm": 1.2625848054885864, + "learning_rate": 8.05608040201005e-06, + "loss": 0.0037, + "step": 19850 + }, + { + "epoch": 7.41, + "grad_norm": 1.0092577934265137, + "learning_rate": 8.05356783919598e-06, + "loss": 0.0035, + "step": 19875 + }, + { + "epoch": 7.42, + "grad_norm": 0.9425181150436401, + "learning_rate": 8.051055276381911e-06, + "loss": 0.0026, + "step": 19900 + }, + { + "epoch": 7.43, + "grad_norm": 0.7810484766960144, + "learning_rate": 8.04854271356784e-06, + "loss": 0.0024, + "step": 19925 + }, + { + "epoch": 7.44, + "grad_norm": 0.46279433369636536, + "learning_rate": 8.04603015075377e-06, + "loss": 0.0032, + "step": 19950 + }, + { + "epoch": 7.45, + "grad_norm": 0.8938005566596985, + "learning_rate": 8.043517587939699e-06, + "loss": 0.0029, + "step": 19975 + }, + { + "epoch": 7.46, + "grad_norm": 0.7018052339553833, + "learning_rate": 8.041005025125628e-06, + "loss": 0.0027, + "step": 20000 + }, + { + "epoch": 7.46, + "eval_loss": 0.15223734080791473, + "eval_runtime": 1215.2672, + "eval_samples_per_second": 1.163, + "eval_steps_per_second": 1.163, + "eval_wer": 11.978597318911454, + "step": 20000 + }, + { + "epoch": 7.47, + "grad_norm": 1.673030972480774, + "learning_rate": 8.03849246231156e-06, + "loss": 0.0037, + "step": 20025 + }, + { + "epoch": 7.48, + "grad_norm": 0.5767794847488403, + "learning_rate": 8.035979899497489e-06, + "loss": 0.003, + "step": 20050 + }, + { + "epoch": 7.49, + "grad_norm": 0.7382027506828308, + "learning_rate": 8.033467336683418e-06, + "loss": 0.0034, + "step": 20075 + }, + { + "epoch": 7.49, + "grad_norm": 0.5028126239776611, + "learning_rate": 8.030954773869347e-06, + "loss": 0.0031, + "step": 20100 + }, + { + "epoch": 7.5, + "grad_norm": 0.4465911388397217, + "learning_rate": 8.028442211055277e-06, + "loss": 0.0035, + "step": 20125 + }, + { + "epoch": 7.51, + "grad_norm": 0.7459107637405396, + "learning_rate": 8.025929648241206e-06, + "loss": 0.0031, + "step": 20150 + }, + { + "epoch": 7.52, + "grad_norm": 0.9949608445167542, + "learning_rate": 8.023417085427137e-06, + "loss": 0.0027, + "step": 20175 + }, + { + "epoch": 7.53, + "grad_norm": 1.0608025789260864, + "learning_rate": 8.020904522613066e-06, + "loss": 0.003, + "step": 20200 + }, + { + "epoch": 7.54, + "grad_norm": 1.3074733018875122, + "learning_rate": 8.018391959798996e-06, + "loss": 0.0036, + "step": 20225 + }, + { + "epoch": 7.55, + "grad_norm": 0.6239655017852783, + "learning_rate": 8.015879396984927e-06, + "loss": 0.0028, + "step": 20250 + }, + { + "epoch": 7.56, + "grad_norm": 0.6720955967903137, + "learning_rate": 8.013366834170854e-06, + "loss": 0.003, + "step": 20275 + }, + { + "epoch": 7.57, + "grad_norm": 1.3788305521011353, + "learning_rate": 8.010854271356785e-06, + "loss": 0.0036, + "step": 20300 + }, + { + "epoch": 7.58, + "grad_norm": 0.5315885543823242, + "learning_rate": 8.008341708542714e-06, + "loss": 0.0033, + "step": 20325 + }, + { + "epoch": 7.59, + "grad_norm": 0.392787367105484, + "learning_rate": 8.005829145728644e-06, + "loss": 0.0032, + "step": 20350 + }, + { + "epoch": 7.6, + "grad_norm": 0.8250320553779602, + "learning_rate": 8.003316582914573e-06, + "loss": 0.0039, + "step": 20375 + }, + { + "epoch": 7.61, + "grad_norm": 1.5900248289108276, + "learning_rate": 8.000804020100502e-06, + "loss": 0.0032, + "step": 20400 + }, + { + "epoch": 7.62, + "grad_norm": 0.9821562170982361, + "learning_rate": 7.998291457286432e-06, + "loss": 0.0034, + "step": 20425 + }, + { + "epoch": 7.62, + "grad_norm": 0.9219966530799866, + "learning_rate": 7.995778894472363e-06, + "loss": 0.0036, + "step": 20450 + }, + { + "epoch": 7.63, + "grad_norm": 0.9538794755935669, + "learning_rate": 7.993266331658292e-06, + "loss": 0.0031, + "step": 20475 + }, + { + "epoch": 7.64, + "grad_norm": 0.27444174885749817, + "learning_rate": 7.990753768844221e-06, + "loss": 0.0032, + "step": 20500 + }, + { + "epoch": 7.65, + "grad_norm": 1.3289494514465332, + "learning_rate": 7.988241206030152e-06, + "loss": 0.0043, + "step": 20525 + }, + { + "epoch": 7.66, + "grad_norm": 0.47693783044815063, + "learning_rate": 7.98572864321608e-06, + "loss": 0.0032, + "step": 20550 + }, + { + "epoch": 7.67, + "grad_norm": 1.0195008516311646, + "learning_rate": 7.983216080402011e-06, + "loss": 0.0026, + "step": 20575 + }, + { + "epoch": 7.68, + "grad_norm": 0.9297405481338501, + "learning_rate": 7.98070351758794e-06, + "loss": 0.003, + "step": 20600 + }, + { + "epoch": 7.69, + "grad_norm": 1.3922216892242432, + "learning_rate": 7.97819095477387e-06, + "loss": 0.0032, + "step": 20625 + }, + { + "epoch": 7.7, + "grad_norm": 0.4437565207481384, + "learning_rate": 7.975678391959799e-06, + "loss": 0.0026, + "step": 20650 + }, + { + "epoch": 7.71, + "grad_norm": 0.7683192491531372, + "learning_rate": 7.973165829145728e-06, + "loss": 0.0036, + "step": 20675 + }, + { + "epoch": 7.72, + "grad_norm": 0.39573270082473755, + "learning_rate": 7.97065326633166e-06, + "loss": 0.0031, + "step": 20700 + }, + { + "epoch": 7.73, + "grad_norm": 0.4444448947906494, + "learning_rate": 7.968140703517589e-06, + "loss": 0.0026, + "step": 20725 + }, + { + "epoch": 7.74, + "grad_norm": 1.2292596101760864, + "learning_rate": 7.965628140703518e-06, + "loss": 0.0038, + "step": 20750 + }, + { + "epoch": 7.75, + "grad_norm": 0.9848675727844238, + "learning_rate": 7.963115577889447e-06, + "loss": 0.0027, + "step": 20775 + }, + { + "epoch": 7.76, + "grad_norm": 0.9838907718658447, + "learning_rate": 7.960603015075378e-06, + "loss": 0.0036, + "step": 20800 + }, + { + "epoch": 7.76, + "grad_norm": 0.727644681930542, + "learning_rate": 7.958090452261306e-06, + "loss": 0.003, + "step": 20825 + }, + { + "epoch": 7.77, + "grad_norm": 0.5261633992195129, + "learning_rate": 7.955577889447237e-06, + "loss": 0.0032, + "step": 20850 + }, + { + "epoch": 7.78, + "grad_norm": 0.6768296957015991, + "learning_rate": 7.953065326633166e-06, + "loss": 0.0034, + "step": 20875 + }, + { + "epoch": 7.79, + "grad_norm": 0.8573777079582214, + "learning_rate": 7.950552763819096e-06, + "loss": 0.0024, + "step": 20900 + }, + { + "epoch": 7.8, + "grad_norm": 0.6300660967826843, + "learning_rate": 7.948040201005027e-06, + "loss": 0.0028, + "step": 20925 + }, + { + "epoch": 7.81, + "grad_norm": 1.7176119089126587, + "learning_rate": 7.945527638190954e-06, + "loss": 0.0026, + "step": 20950 + }, + { + "epoch": 7.82, + "grad_norm": 0.7420898675918579, + "learning_rate": 7.943015075376885e-06, + "loss": 0.0027, + "step": 20975 + }, + { + "epoch": 7.83, + "grad_norm": 0.6216511130332947, + "learning_rate": 7.940502512562815e-06, + "loss": 0.0029, + "step": 21000 + }, + { + "epoch": 7.83, + "eval_loss": 0.15361721813678741, + "eval_runtime": 1197.4007, + "eval_samples_per_second": 1.18, + "eval_steps_per_second": 1.18, + "eval_wer": 12.024624590069616, + "step": 21000 + }, + { + "epoch": 7.84, + "grad_norm": 1.3609764575958252, + "learning_rate": 7.937989949748744e-06, + "loss": 0.0033, + "step": 21025 + }, + { + "epoch": 7.85, + "grad_norm": 0.3877183198928833, + "learning_rate": 7.935477386934673e-06, + "loss": 0.0029, + "step": 21050 + }, + { + "epoch": 7.86, + "grad_norm": 0.9360409379005432, + "learning_rate": 7.933065326633167e-06, + "loss": 0.0028, + "step": 21075 + }, + { + "epoch": 7.87, + "grad_norm": 1.0260673761367798, + "learning_rate": 7.930552763819096e-06, + "loss": 0.0032, + "step": 21100 + }, + { + "epoch": 7.88, + "grad_norm": 0.9289771318435669, + "learning_rate": 7.928040201005027e-06, + "loss": 0.0036, + "step": 21125 + }, + { + "epoch": 7.89, + "grad_norm": 0.5237746238708496, + "learning_rate": 7.925527638190955e-06, + "loss": 0.0031, + "step": 21150 + }, + { + "epoch": 7.9, + "grad_norm": 0.8169993758201599, + "learning_rate": 7.923015075376886e-06, + "loss": 0.0033, + "step": 21175 + }, + { + "epoch": 7.9, + "grad_norm": 0.7323612570762634, + "learning_rate": 7.920502512562815e-06, + "loss": 0.0034, + "step": 21200 + }, + { + "epoch": 7.91, + "grad_norm": 0.776853084564209, + "learning_rate": 7.917989949748744e-06, + "loss": 0.0027, + "step": 21225 + }, + { + "epoch": 7.92, + "grad_norm": 2.519822597503662, + "learning_rate": 7.915477386934674e-06, + "loss": 0.0036, + "step": 21250 + }, + { + "epoch": 7.93, + "grad_norm": 1.0454492568969727, + "learning_rate": 7.912964824120603e-06, + "loss": 0.0026, + "step": 21275 + }, + { + "epoch": 7.94, + "grad_norm": 0.6331371665000916, + "learning_rate": 7.910452261306534e-06, + "loss": 0.0032, + "step": 21300 + }, + { + "epoch": 7.95, + "grad_norm": 1.1118364334106445, + "learning_rate": 7.907939698492463e-06, + "loss": 0.0039, + "step": 21325 + }, + { + "epoch": 7.96, + "grad_norm": 0.7063566446304321, + "learning_rate": 7.905427135678393e-06, + "loss": 0.0034, + "step": 21350 + }, + { + "epoch": 7.97, + "grad_norm": 0.9176387786865234, + "learning_rate": 7.902914572864322e-06, + "loss": 0.0034, + "step": 21375 + }, + { + "epoch": 7.98, + "grad_norm": 0.9189884066581726, + "learning_rate": 7.900402010050253e-06, + "loss": 0.0033, + "step": 21400 + }, + { + "epoch": 7.99, + "grad_norm": 0.6950334310531616, + "learning_rate": 7.89788944723618e-06, + "loss": 0.004, + "step": 21425 + }, + { + "epoch": 8.0, + "grad_norm": 1.3244831562042236, + "learning_rate": 7.895376884422111e-06, + "loss": 0.0037, + "step": 21450 + }, + { + "epoch": 8.01, + "grad_norm": 0.9946715235710144, + "learning_rate": 7.89286432160804e-06, + "loss": 0.0032, + "step": 21475 + }, + { + "epoch": 8.02, + "grad_norm": 1.016094446182251, + "learning_rate": 7.89035175879397e-06, + "loss": 0.0024, + "step": 21500 + }, + { + "epoch": 8.03, + "grad_norm": 0.3761056959629059, + "learning_rate": 7.887839195979901e-06, + "loss": 0.0018, + "step": 21525 + }, + { + "epoch": 8.04, + "grad_norm": 0.21905067563056946, + "learning_rate": 7.885326633165829e-06, + "loss": 0.0017, + "step": 21550 + }, + { + "epoch": 8.04, + "grad_norm": 0.9174709916114807, + "learning_rate": 7.88281407035176e-06, + "loss": 0.0018, + "step": 21575 + }, + { + "epoch": 8.05, + "grad_norm": 0.1346503496170044, + "learning_rate": 7.880301507537689e-06, + "loss": 0.0018, + "step": 21600 + }, + { + "epoch": 8.06, + "grad_norm": 0.8248207569122314, + "learning_rate": 7.877788944723618e-06, + "loss": 0.0015, + "step": 21625 + }, + { + "epoch": 8.07, + "grad_norm": 0.4554181694984436, + "learning_rate": 7.875276381909548e-06, + "loss": 0.0022, + "step": 21650 + }, + { + "epoch": 8.08, + "grad_norm": 0.35069286823272705, + "learning_rate": 7.872763819095479e-06, + "loss": 0.0016, + "step": 21675 + }, + { + "epoch": 8.09, + "grad_norm": 1.0459482669830322, + "learning_rate": 7.870251256281408e-06, + "loss": 0.0025, + "step": 21700 + }, + { + "epoch": 8.1, + "grad_norm": 0.807822585105896, + "learning_rate": 7.867738693467337e-06, + "loss": 0.0016, + "step": 21725 + }, + { + "epoch": 8.11, + "grad_norm": 0.5877746939659119, + "learning_rate": 7.865226130653267e-06, + "loss": 0.0017, + "step": 21750 + }, + { + "epoch": 8.12, + "grad_norm": 0.5680932402610779, + "learning_rate": 7.862713567839196e-06, + "loss": 0.0018, + "step": 21775 + }, + { + "epoch": 8.13, + "grad_norm": 0.24931855499744415, + "learning_rate": 7.860201005025127e-06, + "loss": 0.0022, + "step": 21800 + }, + { + "epoch": 8.14, + "grad_norm": 0.5789446830749512, + "learning_rate": 7.857688442211055e-06, + "loss": 0.0021, + "step": 21825 + }, + { + "epoch": 8.15, + "grad_norm": 0.8412415981292725, + "learning_rate": 7.855175879396986e-06, + "loss": 0.0021, + "step": 21850 + }, + { + "epoch": 8.16, + "grad_norm": 0.48601341247558594, + "learning_rate": 7.852663316582915e-06, + "loss": 0.0019, + "step": 21875 + }, + { + "epoch": 8.17, + "grad_norm": 0.43259197473526, + "learning_rate": 7.850150753768844e-06, + "loss": 0.0016, + "step": 21900 + }, + { + "epoch": 8.17, + "grad_norm": 0.7380715012550354, + "learning_rate": 7.847638190954775e-06, + "loss": 0.002, + "step": 21925 + }, + { + "epoch": 8.18, + "grad_norm": 0.6287863254547119, + "learning_rate": 7.845125628140705e-06, + "loss": 0.0019, + "step": 21950 + }, + { + "epoch": 8.19, + "grad_norm": 0.7070374488830566, + "learning_rate": 7.842613065326634e-06, + "loss": 0.0022, + "step": 21975 + }, + { + "epoch": 8.2, + "grad_norm": 0.6739636659622192, + "learning_rate": 7.840100502512563e-06, + "loss": 0.0025, + "step": 22000 + }, + { + "epoch": 8.2, + "eval_loss": 0.15638460218906403, + "eval_runtime": 1295.6577, + "eval_samples_per_second": 1.091, + "eval_steps_per_second": 1.091, + "eval_wer": 11.984350727806225, + "step": 22000 + }, + { + "epoch": 8.21, + "grad_norm": 0.1914980113506317, + "learning_rate": 7.837587939698493e-06, + "loss": 0.0023, + "step": 22025 + }, + { + "epoch": 8.22, + "grad_norm": 0.855567455291748, + "learning_rate": 7.835075376884422e-06, + "loss": 0.0022, + "step": 22050 + }, + { + "epoch": 8.23, + "grad_norm": 1.4587599039077759, + "learning_rate": 7.832562814070353e-06, + "loss": 0.0028, + "step": 22075 + }, + { + "epoch": 8.24, + "grad_norm": 1.2152149677276611, + "learning_rate": 7.830050251256282e-06, + "loss": 0.0021, + "step": 22100 + }, + { + "epoch": 8.25, + "grad_norm": 1.3158323764801025, + "learning_rate": 7.827537688442212e-06, + "loss": 0.0027, + "step": 22125 + }, + { + "epoch": 8.26, + "grad_norm": 0.8614988923072815, + "learning_rate": 7.825025125628141e-06, + "loss": 0.0023, + "step": 22150 + }, + { + "epoch": 8.27, + "grad_norm": 0.6189305782318115, + "learning_rate": 7.82251256281407e-06, + "loss": 0.0021, + "step": 22175 + }, + { + "epoch": 8.28, + "grad_norm": 1.157116174697876, + "learning_rate": 7.820000000000001e-06, + "loss": 0.0023, + "step": 22200 + }, + { + "epoch": 8.29, + "grad_norm": 0.8300706744194031, + "learning_rate": 7.81748743718593e-06, + "loss": 0.0025, + "step": 22225 + }, + { + "epoch": 8.3, + "grad_norm": 0.5130866765975952, + "learning_rate": 7.81497487437186e-06, + "loss": 0.0031, + "step": 22250 + }, + { + "epoch": 8.31, + "grad_norm": 1.1057721376419067, + "learning_rate": 7.81246231155779e-06, + "loss": 0.0029, + "step": 22275 + }, + { + "epoch": 8.31, + "grad_norm": 0.7881373167037964, + "learning_rate": 7.809949748743719e-06, + "loss": 0.0031, + "step": 22300 + }, + { + "epoch": 8.32, + "grad_norm": 0.4938255846500397, + "learning_rate": 7.80743718592965e-06, + "loss": 0.0022, + "step": 22325 + }, + { + "epoch": 8.33, + "grad_norm": 1.0976392030715942, + "learning_rate": 7.804924623115579e-06, + "loss": 0.0029, + "step": 22350 + }, + { + "epoch": 8.34, + "grad_norm": 0.8612115383148193, + "learning_rate": 7.802412060301508e-06, + "loss": 0.0033, + "step": 22375 + }, + { + "epoch": 8.35, + "grad_norm": 0.8556031584739685, + "learning_rate": 7.799899497487438e-06, + "loss": 0.0024, + "step": 22400 + }, + { + "epoch": 8.36, + "grad_norm": 0.9187542796134949, + "learning_rate": 7.797386934673367e-06, + "loss": 0.0027, + "step": 22425 + }, + { + "epoch": 8.37, + "grad_norm": 0.3562270700931549, + "learning_rate": 7.794874371859296e-06, + "loss": 0.0022, + "step": 22450 + }, + { + "epoch": 8.38, + "grad_norm": 0.8488346934318542, + "learning_rate": 7.792361809045227e-06, + "loss": 0.0024, + "step": 22475 + }, + { + "epoch": 8.39, + "grad_norm": 1.208843469619751, + "learning_rate": 7.789849246231157e-06, + "loss": 0.0028, + "step": 22500 + }, + { + "epoch": 8.4, + "grad_norm": 0.34106552600860596, + "learning_rate": 7.787336683417086e-06, + "loss": 0.002, + "step": 22525 + }, + { + "epoch": 8.41, + "grad_norm": 0.5932399034500122, + "learning_rate": 7.784824120603017e-06, + "loss": 0.0021, + "step": 22550 + }, + { + "epoch": 8.42, + "grad_norm": 0.2946527600288391, + "learning_rate": 7.782311557788945e-06, + "loss": 0.0026, + "step": 22575 + }, + { + "epoch": 8.43, + "grad_norm": 0.17027659714221954, + "learning_rate": 7.779798994974876e-06, + "loss": 0.002, + "step": 22600 + }, + { + "epoch": 8.44, + "grad_norm": 0.45884260535240173, + "learning_rate": 7.777286432160805e-06, + "loss": 0.002, + "step": 22625 + }, + { + "epoch": 8.45, + "grad_norm": 0.5448809266090393, + "learning_rate": 7.774773869346734e-06, + "loss": 0.0027, + "step": 22650 + }, + { + "epoch": 8.45, + "grad_norm": 0.854160726070404, + "learning_rate": 7.772261306532664e-06, + "loss": 0.0023, + "step": 22675 + }, + { + "epoch": 8.46, + "grad_norm": 0.8051543235778809, + "learning_rate": 7.769748743718593e-06, + "loss": 0.0026, + "step": 22700 + }, + { + "epoch": 8.47, + "grad_norm": 0.805406928062439, + "learning_rate": 7.767236180904522e-06, + "loss": 0.0024, + "step": 22725 + }, + { + "epoch": 8.48, + "grad_norm": 0.9297693371772766, + "learning_rate": 7.764723618090453e-06, + "loss": 0.0021, + "step": 22750 + }, + { + "epoch": 8.49, + "grad_norm": 1.1585173606872559, + "learning_rate": 7.762211055276383e-06, + "loss": 0.0029, + "step": 22775 + }, + { + "epoch": 8.5, + "grad_norm": 0.5646154284477234, + "learning_rate": 7.759698492462312e-06, + "loss": 0.0028, + "step": 22800 + }, + { + "epoch": 8.51, + "grad_norm": 0.5431241989135742, + "learning_rate": 7.757185929648243e-06, + "loss": 0.003, + "step": 22825 + }, + { + "epoch": 8.52, + "grad_norm": 0.65935218334198, + "learning_rate": 7.75467336683417e-06, + "loss": 0.0022, + "step": 22850 + }, + { + "epoch": 8.53, + "grad_norm": 1.577895164489746, + "learning_rate": 7.752160804020102e-06, + "loss": 0.0023, + "step": 22875 + }, + { + "epoch": 8.54, + "grad_norm": 0.47105926275253296, + "learning_rate": 7.749648241206031e-06, + "loss": 0.0025, + "step": 22900 + }, + { + "epoch": 8.55, + "grad_norm": 0.729434609413147, + "learning_rate": 7.74713567839196e-06, + "loss": 0.0029, + "step": 22925 + }, + { + "epoch": 8.56, + "grad_norm": 0.35437673330307007, + "learning_rate": 7.744623115577891e-06, + "loss": 0.0026, + "step": 22950 + }, + { + "epoch": 8.57, + "grad_norm": 0.39888742566108704, + "learning_rate": 7.742110552763819e-06, + "loss": 0.003, + "step": 22975 + }, + { + "epoch": 8.58, + "grad_norm": 0.8537907600402832, + "learning_rate": 7.73959798994975e-06, + "loss": 0.0028, + "step": 23000 + }, + { + "epoch": 8.58, + "eval_loss": 0.15827353298664093, + "eval_runtime": 1208.9223, + "eval_samples_per_second": 1.169, + "eval_steps_per_second": 1.169, + "eval_wer": 11.81174846096312, + "step": 23000 + }, + { + "epoch": 8.59, + "grad_norm": 1.4949895143508911, + "learning_rate": 7.73708542713568e-06, + "loss": 0.0026, + "step": 23025 + }, + { + "epoch": 8.59, + "grad_norm": 0.5800555348396301, + "learning_rate": 7.734572864321609e-06, + "loss": 0.0022, + "step": 23050 + }, + { + "epoch": 8.6, + "grad_norm": 1.25839364528656, + "learning_rate": 7.732060301507538e-06, + "loss": 0.0027, + "step": 23075 + }, + { + "epoch": 8.61, + "grad_norm": 0.5918219685554504, + "learning_rate": 7.729547738693469e-06, + "loss": 0.0028, + "step": 23100 + }, + { + "epoch": 8.62, + "grad_norm": 1.4078030586242676, + "learning_rate": 7.727035175879396e-06, + "loss": 0.0027, + "step": 23125 + }, + { + "epoch": 8.63, + "grad_norm": 1.0836372375488281, + "learning_rate": 7.724522613065328e-06, + "loss": 0.0031, + "step": 23150 + }, + { + "epoch": 8.64, + "grad_norm": 1.7132476568222046, + "learning_rate": 7.722010050251257e-06, + "loss": 0.0028, + "step": 23175 + }, + { + "epoch": 8.65, + "grad_norm": 1.075817584991455, + "learning_rate": 7.719497487437186e-06, + "loss": 0.0032, + "step": 23200 + }, + { + "epoch": 8.66, + "grad_norm": 1.431477665901184, + "learning_rate": 7.716984924623117e-06, + "loss": 0.003, + "step": 23225 + }, + { + "epoch": 8.67, + "grad_norm": 1.1809197664260864, + "learning_rate": 7.714472361809045e-06, + "loss": 0.0023, + "step": 23250 + }, + { + "epoch": 8.68, + "grad_norm": 0.401027649641037, + "learning_rate": 7.711959798994976e-06, + "loss": 0.0024, + "step": 23275 + }, + { + "epoch": 8.69, + "grad_norm": 1.0456233024597168, + "learning_rate": 7.709447236180905e-06, + "loss": 0.0021, + "step": 23300 + }, + { + "epoch": 8.7, + "grad_norm": 1.181854486465454, + "learning_rate": 7.706934673366834e-06, + "loss": 0.0024, + "step": 23325 + }, + { + "epoch": 8.71, + "grad_norm": 0.9962965846061707, + "learning_rate": 7.704422110552764e-06, + "loss": 0.0025, + "step": 23350 + }, + { + "epoch": 8.72, + "grad_norm": 1.5270806550979614, + "learning_rate": 7.701909547738695e-06, + "loss": 0.0031, + "step": 23375 + }, + { + "epoch": 8.72, + "grad_norm": 0.618634819984436, + "learning_rate": 7.699396984924624e-06, + "loss": 0.0029, + "step": 23400 + }, + { + "epoch": 8.73, + "grad_norm": 0.8564863801002502, + "learning_rate": 7.696884422110553e-06, + "loss": 0.0024, + "step": 23425 + }, + { + "epoch": 8.74, + "grad_norm": 0.9405599236488342, + "learning_rate": 7.694371859296483e-06, + "loss": 0.0031, + "step": 23450 + }, + { + "epoch": 8.75, + "grad_norm": 0.5836784839630127, + "learning_rate": 7.691859296482412e-06, + "loss": 0.0026, + "step": 23475 + }, + { + "epoch": 8.76, + "grad_norm": 0.6809453368186951, + "learning_rate": 7.689346733668343e-06, + "loss": 0.004, + "step": 23500 + }, + { + "epoch": 8.77, + "grad_norm": 0.36484068632125854, + "learning_rate": 7.68683417085427e-06, + "loss": 0.0027, + "step": 23525 + }, + { + "epoch": 8.78, + "grad_norm": 1.1994209289550781, + "learning_rate": 7.684321608040202e-06, + "loss": 0.0029, + "step": 23550 + }, + { + "epoch": 8.79, + "grad_norm": 0.274213969707489, + "learning_rate": 7.681809045226131e-06, + "loss": 0.0027, + "step": 23575 + }, + { + "epoch": 8.8, + "grad_norm": 0.724822998046875, + "learning_rate": 7.67929648241206e-06, + "loss": 0.0029, + "step": 23600 + }, + { + "epoch": 8.81, + "grad_norm": 0.4708680510520935, + "learning_rate": 7.676783919597991e-06, + "loss": 0.0026, + "step": 23625 + }, + { + "epoch": 8.82, + "grad_norm": 1.080647587776184, + "learning_rate": 7.67427135678392e-06, + "loss": 0.0025, + "step": 23650 + }, + { + "epoch": 8.83, + "grad_norm": 1.1523445844650269, + "learning_rate": 7.67175879396985e-06, + "loss": 0.0026, + "step": 23675 + }, + { + "epoch": 8.84, + "grad_norm": 0.5054668188095093, + "learning_rate": 7.66924623115578e-06, + "loss": 0.0028, + "step": 23700 + }, + { + "epoch": 8.85, + "grad_norm": 0.7604905366897583, + "learning_rate": 7.666733668341709e-06, + "loss": 0.003, + "step": 23725 + }, + { + "epoch": 8.86, + "grad_norm": 0.5578906536102295, + "learning_rate": 7.664221105527638e-06, + "loss": 0.0028, + "step": 23750 + }, + { + "epoch": 8.86, + "grad_norm": 1.1622586250305176, + "learning_rate": 7.661708542713569e-06, + "loss": 0.0028, + "step": 23775 + }, + { + "epoch": 8.87, + "grad_norm": 0.6902370452880859, + "learning_rate": 7.659195979899498e-06, + "loss": 0.0028, + "step": 23800 + }, + { + "epoch": 8.88, + "grad_norm": 0.4763517677783966, + "learning_rate": 7.656683417085428e-06, + "loss": 0.0026, + "step": 23825 + }, + { + "epoch": 8.89, + "grad_norm": 0.8834311962127686, + "learning_rate": 7.654170854271357e-06, + "loss": 0.0025, + "step": 23850 + }, + { + "epoch": 8.9, + "grad_norm": 0.449410080909729, + "learning_rate": 7.651658291457286e-06, + "loss": 0.0022, + "step": 23875 + }, + { + "epoch": 8.91, + "grad_norm": 0.7794365882873535, + "learning_rate": 7.649145728643217e-06, + "loss": 0.0026, + "step": 23900 + }, + { + "epoch": 8.92, + "grad_norm": 0.2633087933063507, + "learning_rate": 7.646633165829147e-06, + "loss": 0.0026, + "step": 23925 + }, + { + "epoch": 8.93, + "grad_norm": 1.467690110206604, + "learning_rate": 7.644120603015076e-06, + "loss": 0.0023, + "step": 23950 + }, + { + "epoch": 8.94, + "grad_norm": 1.4814653396606445, + "learning_rate": 7.641608040201005e-06, + "loss": 0.0022, + "step": 23975 + }, + { + "epoch": 8.95, + "grad_norm": 1.321621060371399, + "learning_rate": 7.639095477386935e-06, + "loss": 0.0025, + "step": 24000 + }, + { + "epoch": 8.95, + "eval_loss": 0.16205289959907532, + "eval_runtime": 1195.0402, + "eval_samples_per_second": 1.182, + "eval_steps_per_second": 1.182, + "eval_wer": 12.20873367470226, + "step": 24000 + }, + { + "epoch": 8.96, + "grad_norm": 0.8230905532836914, + "learning_rate": 7.636582914572866e-06, + "loss": 0.0024, + "step": 24025 + }, + { + "epoch": 8.97, + "grad_norm": 0.2987496554851532, + "learning_rate": 7.634070351758795e-06, + "loss": 0.0028, + "step": 24050 + }, + { + "epoch": 8.98, + "grad_norm": 1.0158071517944336, + "learning_rate": 7.631557788944724e-06, + "loss": 0.0033, + "step": 24075 + }, + { + "epoch": 8.99, + "grad_norm": 0.5084171295166016, + "learning_rate": 7.629045226130654e-06, + "loss": 0.0026, + "step": 24100 + }, + { + "epoch": 9.0, + "grad_norm": 0.7177903056144714, + "learning_rate": 7.626532663316584e-06, + "loss": 0.0031, + "step": 24125 + }, + { + "epoch": 9.0, + "grad_norm": 0.4768019914627075, + "learning_rate": 7.624020100502513e-06, + "loss": 0.0022, + "step": 24150 + }, + { + "epoch": 9.01, + "grad_norm": 0.5992189645767212, + "learning_rate": 7.6215075376884425e-06, + "loss": 0.0014, + "step": 24175 + }, + { + "epoch": 9.02, + "grad_norm": 0.30309563875198364, + "learning_rate": 7.618994974874373e-06, + "loss": 0.0014, + "step": 24200 + }, + { + "epoch": 9.03, + "grad_norm": 0.971168577671051, + "learning_rate": 7.616482412060302e-06, + "loss": 0.0015, + "step": 24225 + }, + { + "epoch": 9.04, + "grad_norm": 0.45704254508018494, + "learning_rate": 7.613969849246232e-06, + "loss": 0.0014, + "step": 24250 + }, + { + "epoch": 9.05, + "grad_norm": 0.28214672207832336, + "learning_rate": 7.6114572864321615e-06, + "loss": 0.0015, + "step": 24275 + }, + { + "epoch": 9.06, + "grad_norm": 0.5859740972518921, + "learning_rate": 7.608944723618092e-06, + "loss": 0.0018, + "step": 24300 + }, + { + "epoch": 9.07, + "grad_norm": 0.6066518425941467, + "learning_rate": 7.60643216080402e-06, + "loss": 0.0014, + "step": 24325 + }, + { + "epoch": 9.08, + "grad_norm": 0.5189049243927002, + "learning_rate": 7.60391959798995e-06, + "loss": 0.0017, + "step": 24350 + }, + { + "epoch": 9.09, + "grad_norm": 0.1726868748664856, + "learning_rate": 7.60140703517588e-06, + "loss": 0.0017, + "step": 24375 + }, + { + "epoch": 9.1, + "grad_norm": 0.36445239186286926, + "learning_rate": 7.59889447236181e-06, + "loss": 0.0017, + "step": 24400 + }, + { + "epoch": 9.11, + "grad_norm": 0.24085190892219543, + "learning_rate": 7.59638190954774e-06, + "loss": 0.0015, + "step": 24425 + }, + { + "epoch": 9.12, + "grad_norm": 0.6300321817398071, + "learning_rate": 7.593869346733668e-06, + "loss": 0.002, + "step": 24450 + }, + { + "epoch": 9.13, + "grad_norm": 0.8815165162086487, + "learning_rate": 7.591356783919599e-06, + "loss": 0.0016, + "step": 24475 + }, + { + "epoch": 9.13, + "grad_norm": 0.7230604290962219, + "learning_rate": 7.588844221105528e-06, + "loss": 0.0017, + "step": 24500 + }, + { + "epoch": 9.14, + "grad_norm": 0.7461475133895874, + "learning_rate": 7.586331658291458e-06, + "loss": 0.0022, + "step": 24525 + }, + { + "epoch": 9.15, + "grad_norm": 1.0013806819915771, + "learning_rate": 7.583819095477387e-06, + "loss": 0.0017, + "step": 24550 + }, + { + "epoch": 9.16, + "grad_norm": 0.09143200516700745, + "learning_rate": 7.5813065326633176e-06, + "loss": 0.002, + "step": 24575 + }, + { + "epoch": 9.17, + "grad_norm": 0.7841842770576477, + "learning_rate": 7.578793969849246e-06, + "loss": 0.0019, + "step": 24600 + }, + { + "epoch": 9.18, + "grad_norm": 0.2752855122089386, + "learning_rate": 7.576281407035176e-06, + "loss": 0.0016, + "step": 24625 + }, + { + "epoch": 9.19, + "grad_norm": 0.8698446154594421, + "learning_rate": 7.573768844221106e-06, + "loss": 0.0022, + "step": 24650 + }, + { + "epoch": 9.2, + "grad_norm": 0.3407110571861267, + "learning_rate": 7.571256281407036e-06, + "loss": 0.0024, + "step": 24675 + }, + { + "epoch": 9.21, + "grad_norm": 0.21751521527767181, + "learning_rate": 7.568743718592966e-06, + "loss": 0.0029, + "step": 24700 + }, + { + "epoch": 9.22, + "grad_norm": 1.6548364162445068, + "learning_rate": 7.566231155778895e-06, + "loss": 0.002, + "step": 24725 + }, + { + "epoch": 9.23, + "grad_norm": 0.52081698179245, + "learning_rate": 7.5637185929648245e-06, + "loss": 0.0022, + "step": 24750 + }, + { + "epoch": 9.24, + "grad_norm": 0.5885445475578308, + "learning_rate": 7.561206030150754e-06, + "loss": 0.002, + "step": 24775 + }, + { + "epoch": 9.25, + "grad_norm": 0.3084820806980133, + "learning_rate": 7.558693467336684e-06, + "loss": 0.0018, + "step": 24800 + }, + { + "epoch": 9.26, + "grad_norm": 0.5031408071517944, + "learning_rate": 7.556180904522614e-06, + "loss": 0.0019, + "step": 24825 + }, + { + "epoch": 9.27, + "grad_norm": 0.6160692572593689, + "learning_rate": 7.5536683417085435e-06, + "loss": 0.0021, + "step": 24850 + }, + { + "epoch": 9.27, + "grad_norm": 0.43084803223609924, + "learning_rate": 7.551155778894474e-06, + "loss": 0.0017, + "step": 24875 + }, + { + "epoch": 9.28, + "grad_norm": 0.7658066153526306, + "learning_rate": 7.548643216080402e-06, + "loss": 0.0024, + "step": 24900 + }, + { + "epoch": 9.29, + "grad_norm": 0.7070887684822083, + "learning_rate": 7.546130653266332e-06, + "loss": 0.0026, + "step": 24925 + }, + { + "epoch": 9.3, + "grad_norm": 0.3388858437538147, + "learning_rate": 7.543618090452262e-06, + "loss": 0.0017, + "step": 24950 + }, + { + "epoch": 9.31, + "grad_norm": 0.2915879487991333, + "learning_rate": 7.541105527638192e-06, + "loss": 0.0025, + "step": 24975 + }, + { + "epoch": 9.32, + "grad_norm": 1.0261073112487793, + "learning_rate": 7.538592964824121e-06, + "loss": 0.0022, + "step": 25000 + }, + { + "epoch": 9.32, + "eval_loss": 0.1605997085571289, + "eval_runtime": 1203.739, + "eval_samples_per_second": 1.174, + "eval_steps_per_second": 1.174, + "eval_wer": 12.082158679017317, + "step": 25000 + }, + { + "epoch": 9.33, + "grad_norm": 0.6575380563735962, + "learning_rate": 7.5360804020100505e-06, + "loss": 0.004, + "step": 25025 + }, + { + "epoch": 9.34, + "grad_norm": 0.5002018809318542, + "learning_rate": 7.533567839195981e-06, + "loss": 0.0024, + "step": 25050 + }, + { + "epoch": 9.35, + "grad_norm": 0.5523674488067627, + "learning_rate": 7.53105527638191e-06, + "loss": 0.0018, + "step": 25075 + }, + { + "epoch": 9.36, + "grad_norm": 0.9832343459129333, + "learning_rate": 7.52854271356784e-06, + "loss": 0.0016, + "step": 25100 + }, + { + "epoch": 9.37, + "grad_norm": 0.5187830328941345, + "learning_rate": 7.5260301507537695e-06, + "loss": 0.0017, + "step": 25125 + }, + { + "epoch": 9.38, + "grad_norm": 0.5425010919570923, + "learning_rate": 7.5235175879397e-06, + "loss": 0.0027, + "step": 25150 + }, + { + "epoch": 9.39, + "grad_norm": 0.6857979893684387, + "learning_rate": 7.521005025125628e-06, + "loss": 0.0021, + "step": 25175 + }, + { + "epoch": 9.4, + "grad_norm": 0.6824321746826172, + "learning_rate": 7.518492462311558e-06, + "loss": 0.0021, + "step": 25200 + }, + { + "epoch": 9.41, + "grad_norm": 1.1488239765167236, + "learning_rate": 7.515979899497488e-06, + "loss": 0.002, + "step": 25225 + }, + { + "epoch": 9.41, + "grad_norm": 0.7961787581443787, + "learning_rate": 7.513467336683418e-06, + "loss": 0.0019, + "step": 25250 + }, + { + "epoch": 9.42, + "grad_norm": 0.9047830104827881, + "learning_rate": 7.510954773869348e-06, + "loss": 0.0026, + "step": 25275 + }, + { + "epoch": 9.43, + "grad_norm": 0.9834209680557251, + "learning_rate": 7.508442211055276e-06, + "loss": 0.0022, + "step": 25300 + }, + { + "epoch": 9.44, + "grad_norm": 1.158664345741272, + "learning_rate": 7.505929648241207e-06, + "loss": 0.003, + "step": 25325 + }, + { + "epoch": 9.45, + "grad_norm": 0.8231046199798584, + "learning_rate": 7.503417085427136e-06, + "loss": 0.0025, + "step": 25350 + }, + { + "epoch": 9.46, + "grad_norm": 0.31176692247390747, + "learning_rate": 7.500904522613066e-06, + "loss": 0.0025, + "step": 25375 + }, + { + "epoch": 9.47, + "grad_norm": 0.6942262053489685, + "learning_rate": 7.498391959798995e-06, + "loss": 0.0028, + "step": 25400 + }, + { + "epoch": 9.48, + "grad_norm": 0.8983235359191895, + "learning_rate": 7.4958793969849256e-06, + "loss": 0.0027, + "step": 25425 + }, + { + "epoch": 9.49, + "grad_norm": 1.0925562381744385, + "learning_rate": 7.493366834170856e-06, + "loss": 0.0029, + "step": 25450 + }, + { + "epoch": 9.5, + "grad_norm": 0.8901735544204712, + "learning_rate": 7.490854271356784e-06, + "loss": 0.0024, + "step": 25475 + }, + { + "epoch": 9.51, + "grad_norm": 0.392694890499115, + "learning_rate": 7.488341708542714e-06, + "loss": 0.0024, + "step": 25500 + }, + { + "epoch": 9.52, + "grad_norm": 0.46536752581596375, + "learning_rate": 7.485829145728644e-06, + "loss": 0.0027, + "step": 25525 + }, + { + "epoch": 9.53, + "grad_norm": 1.4696451425552368, + "learning_rate": 7.483316582914574e-06, + "loss": 0.0024, + "step": 25550 + }, + { + "epoch": 9.54, + "grad_norm": 0.9704533815383911, + "learning_rate": 7.480804020100502e-06, + "loss": 0.0024, + "step": 25575 + }, + { + "epoch": 9.55, + "grad_norm": 0.9742372035980225, + "learning_rate": 7.4782914572864325e-06, + "loss": 0.0022, + "step": 25600 + }, + { + "epoch": 9.55, + "grad_norm": 1.004258155822754, + "learning_rate": 7.475778894472362e-06, + "loss": 0.0026, + "step": 25625 + }, + { + "epoch": 9.56, + "grad_norm": 0.7668594717979431, + "learning_rate": 7.473266331658292e-06, + "loss": 0.0021, + "step": 25650 + }, + { + "epoch": 9.57, + "grad_norm": 0.27932825684547424, + "learning_rate": 7.470753768844222e-06, + "loss": 0.0026, + "step": 25675 + }, + { + "epoch": 9.58, + "grad_norm": 0.9912621378898621, + "learning_rate": 7.4682412060301515e-06, + "loss": 0.0023, + "step": 25700 + }, + { + "epoch": 9.59, + "grad_norm": 0.9113653302192688, + "learning_rate": 7.465728643216082e-06, + "loss": 0.002, + "step": 25725 + }, + { + "epoch": 9.6, + "grad_norm": 0.2737920880317688, + "learning_rate": 7.46321608040201e-06, + "loss": 0.0018, + "step": 25750 + }, + { + "epoch": 9.61, + "grad_norm": 0.37901610136032104, + "learning_rate": 7.46070351758794e-06, + "loss": 0.0024, + "step": 25775 + }, + { + "epoch": 9.62, + "grad_norm": 0.37115776538848877, + "learning_rate": 7.45819095477387e-06, + "loss": 0.0025, + "step": 25800 + }, + { + "epoch": 9.63, + "grad_norm": 0.8304823040962219, + "learning_rate": 7.4556783919598e-06, + "loss": 0.0024, + "step": 25825 + }, + { + "epoch": 9.64, + "grad_norm": 1.2853976488113403, + "learning_rate": 7.453165829145729e-06, + "loss": 0.0021, + "step": 25850 + }, + { + "epoch": 9.65, + "grad_norm": 0.5573686361312866, + "learning_rate": 7.4506532663316585e-06, + "loss": 0.0021, + "step": 25875 + }, + { + "epoch": 9.66, + "grad_norm": 0.899629533290863, + "learning_rate": 7.448241206030151e-06, + "loss": 0.0025, + "step": 25900 + }, + { + "epoch": 9.67, + "grad_norm": 0.19213269650936127, + "learning_rate": 7.445728643216081e-06, + "loss": 0.0023, + "step": 25925 + }, + { + "epoch": 9.68, + "grad_norm": 0.20868626236915588, + "learning_rate": 7.44321608040201e-06, + "loss": 0.002, + "step": 25950 + }, + { + "epoch": 9.68, + "grad_norm": 0.918928861618042, + "learning_rate": 7.4407035175879405e-06, + "loss": 0.0019, + "step": 25975 + }, + { + "epoch": 9.69, + "grad_norm": 0.7060292959213257, + "learning_rate": 7.43819095477387e-06, + "loss": 0.0026, + "step": 26000 + }, + { + "epoch": 9.69, + "eval_loss": 0.15555742383003235, + "eval_runtime": 1198.1368, + "eval_samples_per_second": 1.179, + "eval_steps_per_second": 1.179, + "eval_wer": 11.7772280075945, + "step": 26000 + }, + { + "epoch": 9.7, + "grad_norm": 0.5442734360694885, + "learning_rate": 7.4356783919598e-06, + "loss": 0.002, + "step": 26025 + }, + { + "epoch": 9.71, + "grad_norm": 0.8008816242218018, + "learning_rate": 7.4331658291457285e-06, + "loss": 0.0018, + "step": 26050 + }, + { + "epoch": 9.72, + "grad_norm": 0.7982268929481506, + "learning_rate": 7.430653266331659e-06, + "loss": 0.002, + "step": 26075 + }, + { + "epoch": 9.73, + "grad_norm": 0.9123344421386719, + "learning_rate": 7.428140703517589e-06, + "loss": 0.0026, + "step": 26100 + }, + { + "epoch": 9.74, + "grad_norm": 0.6558927893638611, + "learning_rate": 7.425628140703518e-06, + "loss": 0.0023, + "step": 26125 + }, + { + "epoch": 9.75, + "grad_norm": 1.0989559888839722, + "learning_rate": 7.423115577889448e-06, + "loss": 0.0025, + "step": 26150 + }, + { + "epoch": 9.76, + "grad_norm": 0.6431317925453186, + "learning_rate": 7.420603015075377e-06, + "loss": 0.0028, + "step": 26175 + }, + { + "epoch": 9.77, + "grad_norm": 0.9317420125007629, + "learning_rate": 7.418090452261307e-06, + "loss": 0.002, + "step": 26200 + }, + { + "epoch": 9.78, + "grad_norm": 0.5087776780128479, + "learning_rate": 7.415577889447236e-06, + "loss": 0.0023, + "step": 26225 + }, + { + "epoch": 9.79, + "grad_norm": 0.9517159461975098, + "learning_rate": 7.4130653266331665e-06, + "loss": 0.0024, + "step": 26250 + }, + { + "epoch": 9.8, + "grad_norm": 0.42135387659072876, + "learning_rate": 7.410552763819097e-06, + "loss": 0.0023, + "step": 26275 + }, + { + "epoch": 9.81, + "grad_norm": 0.4365774095058441, + "learning_rate": 7.408040201005026e-06, + "loss": 0.0023, + "step": 26300 + }, + { + "epoch": 9.82, + "grad_norm": 0.580245852470398, + "learning_rate": 7.405527638190956e-06, + "loss": 0.0024, + "step": 26325 + }, + { + "epoch": 9.82, + "grad_norm": 0.6004770398139954, + "learning_rate": 7.403015075376885e-06, + "loss": 0.0025, + "step": 26350 + }, + { + "epoch": 9.83, + "grad_norm": 0.9545499682426453, + "learning_rate": 7.400502512562815e-06, + "loss": 0.0021, + "step": 26375 + }, + { + "epoch": 9.84, + "grad_norm": 0.5324497818946838, + "learning_rate": 7.397989949748744e-06, + "loss": 0.0021, + "step": 26400 + }, + { + "epoch": 9.85, + "grad_norm": 0.8661658763885498, + "learning_rate": 7.395477386934674e-06, + "loss": 0.0021, + "step": 26425 + }, + { + "epoch": 9.86, + "grad_norm": 0.6219410300254822, + "learning_rate": 7.392964824120603e-06, + "loss": 0.0016, + "step": 26450 + }, + { + "epoch": 9.87, + "grad_norm": 0.3257713317871094, + "learning_rate": 7.390452261306533e-06, + "loss": 0.0017, + "step": 26475 + }, + { + "epoch": 9.88, + "grad_norm": 0.9151502847671509, + "learning_rate": 7.387939698492463e-06, + "loss": 0.0022, + "step": 26500 + }, + { + "epoch": 9.89, + "grad_norm": 0.9244911670684814, + "learning_rate": 7.385427135678392e-06, + "loss": 0.002, + "step": 26525 + }, + { + "epoch": 9.9, + "grad_norm": 0.5015227794647217, + "learning_rate": 7.382914572864323e-06, + "loss": 0.0019, + "step": 26550 + }, + { + "epoch": 9.91, + "grad_norm": 0.4457055628299713, + "learning_rate": 7.380402010050252e-06, + "loss": 0.0024, + "step": 26575 + }, + { + "epoch": 9.92, + "grad_norm": 0.557113766670227, + "learning_rate": 7.377889447236182e-06, + "loss": 0.0017, + "step": 26600 + }, + { + "epoch": 9.93, + "grad_norm": 1.6261543035507202, + "learning_rate": 7.3753768844221105e-06, + "loss": 0.0022, + "step": 26625 + }, + { + "epoch": 9.94, + "grad_norm": 1.373177409172058, + "learning_rate": 7.372864321608041e-06, + "loss": 0.0024, + "step": 26650 + }, + { + "epoch": 9.95, + "grad_norm": 0.5716370344161987, + "learning_rate": 7.37035175879397e-06, + "loss": 0.0022, + "step": 26675 + }, + { + "epoch": 9.96, + "grad_norm": 0.8158998489379883, + "learning_rate": 7.3678391959799e-06, + "loss": 0.0021, + "step": 26700 + }, + { + "epoch": 9.96, + "grad_norm": 1.119882345199585, + "learning_rate": 7.36532663316583e-06, + "loss": 0.0024, + "step": 26725 + }, + { + "epoch": 9.97, + "grad_norm": 0.8210596442222595, + "learning_rate": 7.362814070351759e-06, + "loss": 0.0024, + "step": 26750 + }, + { + "epoch": 9.98, + "grad_norm": 0.47753021121025085, + "learning_rate": 7.360301507537689e-06, + "loss": 0.0023, + "step": 26775 + }, + { + "epoch": 9.99, + "grad_norm": 0.9976381063461304, + "learning_rate": 7.357788944723618e-06, + "loss": 0.0024, + "step": 26800 + }, + { + "epoch": 10.0, + "grad_norm": 0.37448805570602417, + "learning_rate": 7.3552763819095485e-06, + "loss": 0.0022, + "step": 26825 + }, + { + "epoch": 10.01, + "grad_norm": 0.4639699161052704, + "learning_rate": 7.352763819095478e-06, + "loss": 0.0024, + "step": 26850 + }, + { + "epoch": 10.02, + "grad_norm": 1.6538101434707642, + "learning_rate": 7.350251256281408e-06, + "loss": 0.0021, + "step": 26875 + }, + { + "epoch": 10.03, + "grad_norm": 0.08220924437046051, + "learning_rate": 7.347738693467338e-06, + "loss": 0.0019, + "step": 26900 + }, + { + "epoch": 10.04, + "grad_norm": 0.4740258455276489, + "learning_rate": 7.345226130653267e-06, + "loss": 0.0017, + "step": 26925 + }, + { + "epoch": 10.05, + "grad_norm": 0.4934789538383484, + "learning_rate": 7.342713567839197e-06, + "loss": 0.0015, + "step": 26950 + }, + { + "epoch": 10.06, + "grad_norm": 0.8804222345352173, + "learning_rate": 7.340201005025126e-06, + "loss": 0.0027, + "step": 26975 + }, + { + "epoch": 10.07, + "grad_norm": 0.8001572489738464, + "learning_rate": 7.337688442211056e-06, + "loss": 0.002, + "step": 27000 + }, + { + "epoch": 10.07, + "eval_loss": 0.15944212675094604, + "eval_runtime": 1196.998, + "eval_samples_per_second": 1.18, + "eval_steps_per_second": 1.18, + "eval_wer": 11.938323456648064, + "step": 27000 + }, + { + "epoch": 10.08, + "grad_norm": 0.28370368480682373, + "learning_rate": 7.335175879396985e-06, + "loss": 0.0017, + "step": 27025 + }, + { + "epoch": 10.09, + "grad_norm": 0.6169744729995728, + "learning_rate": 7.332663316582915e-06, + "loss": 0.0021, + "step": 27050 + }, + { + "epoch": 10.1, + "grad_norm": 1.3125170469284058, + "learning_rate": 7.330150753768844e-06, + "loss": 0.0019, + "step": 27075 + }, + { + "epoch": 10.1, + "grad_norm": 0.7638176679611206, + "learning_rate": 7.3276381909547745e-06, + "loss": 0.0015, + "step": 27100 + }, + { + "epoch": 10.11, + "grad_norm": 0.13667939603328705, + "learning_rate": 7.325125628140705e-06, + "loss": 0.0015, + "step": 27125 + }, + { + "epoch": 10.12, + "grad_norm": 1.1781333684921265, + "learning_rate": 7.322613065326634e-06, + "loss": 0.002, + "step": 27150 + }, + { + "epoch": 10.13, + "grad_norm": 0.2891380488872528, + "learning_rate": 7.320100502512564e-06, + "loss": 0.0018, + "step": 27175 + }, + { + "epoch": 10.14, + "grad_norm": 0.7880147099494934, + "learning_rate": 7.317587939698493e-06, + "loss": 0.002, + "step": 27200 + }, + { + "epoch": 10.15, + "grad_norm": 0.5468487739562988, + "learning_rate": 7.315075376884423e-06, + "loss": 0.0013, + "step": 27225 + }, + { + "epoch": 10.16, + "grad_norm": 0.7237880825996399, + "learning_rate": 7.312562814070352e-06, + "loss": 0.0015, + "step": 27250 + }, + { + "epoch": 10.17, + "grad_norm": 1.2654218673706055, + "learning_rate": 7.310050251256282e-06, + "loss": 0.0015, + "step": 27275 + }, + { + "epoch": 10.18, + "grad_norm": 0.8597329258918762, + "learning_rate": 7.307537688442211e-06, + "loss": 0.0017, + "step": 27300 + }, + { + "epoch": 10.19, + "grad_norm": 0.09852171689271927, + "learning_rate": 7.305025125628141e-06, + "loss": 0.0014, + "step": 27325 + }, + { + "epoch": 10.2, + "grad_norm": 0.25657331943511963, + "learning_rate": 7.302512562814071e-06, + "loss": 0.0014, + "step": 27350 + }, + { + "epoch": 10.21, + "grad_norm": 0.3590128421783447, + "learning_rate": 7.3e-06, + "loss": 0.0012, + "step": 27375 + }, + { + "epoch": 10.22, + "grad_norm": 0.6345021724700928, + "learning_rate": 7.297487437185931e-06, + "loss": 0.0017, + "step": 27400 + }, + { + "epoch": 10.23, + "grad_norm": 0.11524657905101776, + "learning_rate": 7.29497487437186e-06, + "loss": 0.001, + "step": 27425 + }, + { + "epoch": 10.23, + "grad_norm": 0.6993491649627686, + "learning_rate": 7.29246231155779e-06, + "loss": 0.0013, + "step": 27450 + }, + { + "epoch": 10.24, + "grad_norm": 1.1797758340835571, + "learning_rate": 7.2899497487437186e-06, + "loss": 0.0021, + "step": 27475 + }, + { + "epoch": 10.25, + "grad_norm": 0.8718804121017456, + "learning_rate": 7.287437185929649e-06, + "loss": 0.0017, + "step": 27500 + }, + { + "epoch": 10.26, + "grad_norm": 0.4268168807029724, + "learning_rate": 7.284924623115579e-06, + "loss": 0.0014, + "step": 27525 + }, + { + "epoch": 10.27, + "grad_norm": 0.65080726146698, + "learning_rate": 7.282412060301508e-06, + "loss": 0.0019, + "step": 27550 + }, + { + "epoch": 10.28, + "grad_norm": 0.9239036440849304, + "learning_rate": 7.279899497487438e-06, + "loss": 0.0017, + "step": 27575 + }, + { + "epoch": 10.29, + "grad_norm": 1.1439005136489868, + "learning_rate": 7.277386934673367e-06, + "loss": 0.0012, + "step": 27600 + }, + { + "epoch": 10.3, + "grad_norm": 0.8001730442047119, + "learning_rate": 7.274874371859297e-06, + "loss": 0.0017, + "step": 27625 + }, + { + "epoch": 10.31, + "grad_norm": 0.3007516860961914, + "learning_rate": 7.272361809045226e-06, + "loss": 0.0015, + "step": 27650 + }, + { + "epoch": 10.32, + "grad_norm": 0.6022579073905945, + "learning_rate": 7.2698492462311565e-06, + "loss": 0.0022, + "step": 27675 + }, + { + "epoch": 10.33, + "grad_norm": 0.876095175743103, + "learning_rate": 7.267336683417086e-06, + "loss": 0.0024, + "step": 27700 + }, + { + "epoch": 10.34, + "grad_norm": 0.7891772985458374, + "learning_rate": 7.264824120603016e-06, + "loss": 0.002, + "step": 27725 + }, + { + "epoch": 10.35, + "grad_norm": 0.542544960975647, + "learning_rate": 7.262311557788946e-06, + "loss": 0.0017, + "step": 27750 + }, + { + "epoch": 10.36, + "grad_norm": 1.3415969610214233, + "learning_rate": 7.259798994974875e-06, + "loss": 0.0018, + "step": 27775 + }, + { + "epoch": 10.37, + "grad_norm": 0.7220607399940491, + "learning_rate": 7.257286432160805e-06, + "loss": 0.0015, + "step": 27800 + }, + { + "epoch": 10.37, + "grad_norm": 0.5728715658187866, + "learning_rate": 7.254773869346734e-06, + "loss": 0.0014, + "step": 27825 + }, + { + "epoch": 10.38, + "grad_norm": 0.5352957844734192, + "learning_rate": 7.252261306532664e-06, + "loss": 0.0022, + "step": 27850 + }, + { + "epoch": 10.39, + "grad_norm": 0.24208775162696838, + "learning_rate": 7.249748743718593e-06, + "loss": 0.0026, + "step": 27875 + }, + { + "epoch": 10.4, + "grad_norm": 0.295696884393692, + "learning_rate": 7.247236180904523e-06, + "loss": 0.002, + "step": 27900 + }, + { + "epoch": 10.41, + "grad_norm": 1.11568021774292, + "learning_rate": 7.244723618090452e-06, + "loss": 0.0019, + "step": 27925 + }, + { + "epoch": 10.42, + "grad_norm": 0.7673165202140808, + "learning_rate": 7.2422110552763825e-06, + "loss": 0.0016, + "step": 27950 + }, + { + "epoch": 10.43, + "grad_norm": 0.7200753688812256, + "learning_rate": 7.239698492462313e-06, + "loss": 0.0015, + "step": 27975 + }, + { + "epoch": 10.44, + "grad_norm": 0.5427148342132568, + "learning_rate": 7.237185929648242e-06, + "loss": 0.0022, + "step": 28000 + }, + { + "epoch": 10.44, + "eval_loss": 0.1616099327802658, + "eval_runtime": 1192.2046, + "eval_samples_per_second": 1.185, + "eval_steps_per_second": 1.185, + "eval_wer": 11.731200736436339, + "step": 28000 + }, + { + "epoch": 10.45, + "grad_norm": 0.8471754193305969, + "learning_rate": 7.234673366834172e-06, + "loss": 0.0024, + "step": 28025 + }, + { + "epoch": 10.46, + "grad_norm": 0.2679501175880432, + "learning_rate": 7.232160804020101e-06, + "loss": 0.0022, + "step": 28050 + }, + { + "epoch": 10.47, + "grad_norm": 0.6746098399162292, + "learning_rate": 7.229648241206031e-06, + "loss": 0.0019, + "step": 28075 + }, + { + "epoch": 10.48, + "grad_norm": 0.7426679730415344, + "learning_rate": 7.22713567839196e-06, + "loss": 0.0023, + "step": 28100 + }, + { + "epoch": 10.49, + "grad_norm": 0.8457304239273071, + "learning_rate": 7.22462311557789e-06, + "loss": 0.0021, + "step": 28125 + }, + { + "epoch": 10.5, + "grad_norm": 0.5770073533058167, + "learning_rate": 7.2221105527638204e-06, + "loss": 0.0023, + "step": 28150 + }, + { + "epoch": 10.51, + "grad_norm": 0.7002407908439636, + "learning_rate": 7.219597989949749e-06, + "loss": 0.0024, + "step": 28175 + }, + { + "epoch": 10.51, + "grad_norm": 1.3684515953063965, + "learning_rate": 7.217085427135679e-06, + "loss": 0.0023, + "step": 28200 + }, + { + "epoch": 10.52, + "grad_norm": 0.8156173825263977, + "learning_rate": 7.214572864321608e-06, + "loss": 0.0026, + "step": 28225 + }, + { + "epoch": 10.53, + "grad_norm": 0.7433893084526062, + "learning_rate": 7.212060301507539e-06, + "loss": 0.0027, + "step": 28250 + }, + { + "epoch": 10.54, + "grad_norm": 0.3708101212978363, + "learning_rate": 7.209547738693468e-06, + "loss": 0.0018, + "step": 28275 + }, + { + "epoch": 10.55, + "grad_norm": 0.937624454498291, + "learning_rate": 7.207035175879398e-06, + "loss": 0.0022, + "step": 28300 + }, + { + "epoch": 10.56, + "grad_norm": 0.8503632545471191, + "learning_rate": 7.2045226130653266e-06, + "loss": 0.002, + "step": 28325 + }, + { + "epoch": 10.57, + "grad_norm": 0.6903231739997864, + "learning_rate": 7.202010050251257e-06, + "loss": 0.0018, + "step": 28350 + }, + { + "epoch": 10.58, + "grad_norm": 0.11478512734174728, + "learning_rate": 7.199497487437187e-06, + "loss": 0.0023, + "step": 28375 + }, + { + "epoch": 10.59, + "grad_norm": 0.7456556558609009, + "learning_rate": 7.196984924623116e-06, + "loss": 0.0018, + "step": 28400 + }, + { + "epoch": 10.6, + "grad_norm": 0.5295183658599854, + "learning_rate": 7.194472361809046e-06, + "loss": 0.0015, + "step": 28425 + }, + { + "epoch": 10.61, + "grad_norm": 1.297411322593689, + "learning_rate": 7.191959798994975e-06, + "loss": 0.002, + "step": 28450 + }, + { + "epoch": 10.62, + "grad_norm": 0.2821589410305023, + "learning_rate": 7.189447236180905e-06, + "loss": 0.002, + "step": 28475 + }, + { + "epoch": 10.63, + "grad_norm": 0.5842932462692261, + "learning_rate": 7.186934673366834e-06, + "loss": 0.0022, + "step": 28500 + }, + { + "epoch": 10.64, + "grad_norm": 1.079644799232483, + "learning_rate": 7.1844221105527645e-06, + "loss": 0.0025, + "step": 28525 + }, + { + "epoch": 10.65, + "grad_norm": 0.15925787389278412, + "learning_rate": 7.181909547738694e-06, + "loss": 0.0021, + "step": 28550 + }, + { + "epoch": 10.65, + "grad_norm": 0.8842344284057617, + "learning_rate": 7.179396984924624e-06, + "loss": 0.0024, + "step": 28575 + }, + { + "epoch": 10.66, + "grad_norm": 0.1410672664642334, + "learning_rate": 7.176884422110554e-06, + "loss": 0.0025, + "step": 28600 + }, + { + "epoch": 10.67, + "grad_norm": 0.7764862179756165, + "learning_rate": 7.174371859296483e-06, + "loss": 0.0022, + "step": 28625 + }, + { + "epoch": 10.68, + "grad_norm": 0.5060930252075195, + "learning_rate": 7.171859296482413e-06, + "loss": 0.0026, + "step": 28650 + }, + { + "epoch": 10.69, + "grad_norm": 1.773107647895813, + "learning_rate": 7.169346733668342e-06, + "loss": 0.0022, + "step": 28675 + }, + { + "epoch": 10.7, + "grad_norm": 0.9644919037818909, + "learning_rate": 7.166834170854272e-06, + "loss": 0.0017, + "step": 28700 + }, + { + "epoch": 10.71, + "grad_norm": 0.8103851079940796, + "learning_rate": 7.164321608040201e-06, + "loss": 0.0021, + "step": 28725 + }, + { + "epoch": 10.72, + "grad_norm": 0.5618832111358643, + "learning_rate": 7.161809045226131e-06, + "loss": 0.0018, + "step": 28750 + }, + { + "epoch": 10.73, + "grad_norm": 0.7643022537231445, + "learning_rate": 7.159296482412061e-06, + "loss": 0.0023, + "step": 28775 + }, + { + "epoch": 10.74, + "grad_norm": 0.6324716210365295, + "learning_rate": 7.1568844221105535e-06, + "loss": 0.002, + "step": 28800 + }, + { + "epoch": 10.75, + "grad_norm": 0.43490034341812134, + "learning_rate": 7.154371859296483e-06, + "loss": 0.002, + "step": 28825 + }, + { + "epoch": 10.76, + "grad_norm": 0.948773205280304, + "learning_rate": 7.151859296482413e-06, + "loss": 0.002, + "step": 28850 + }, + { + "epoch": 10.77, + "grad_norm": 0.7126588225364685, + "learning_rate": 7.149346733668342e-06, + "loss": 0.0023, + "step": 28875 + }, + { + "epoch": 10.78, + "grad_norm": 0.5905551314353943, + "learning_rate": 7.1468341708542725e-06, + "loss": 0.0021, + "step": 28900 + }, + { + "epoch": 10.78, + "grad_norm": 0.8351116180419922, + "learning_rate": 7.144321608040201e-06, + "loss": 0.0025, + "step": 28925 + }, + { + "epoch": 10.79, + "grad_norm": 0.5461848378181458, + "learning_rate": 7.141809045226131e-06, + "loss": 0.0023, + "step": 28950 + }, + { + "epoch": 10.8, + "grad_norm": 0.993982195854187, + "learning_rate": 7.139296482412061e-06, + "loss": 0.0021, + "step": 28975 + }, + { + "epoch": 10.81, + "grad_norm": 0.3459186255931854, + "learning_rate": 7.136783919597991e-06, + "loss": 0.002, + "step": 29000 + }, + { + "epoch": 10.81, + "eval_loss": 0.1665998250246048, + "eval_runtime": 1236.525, + "eval_samples_per_second": 1.143, + "eval_steps_per_second": 1.143, + "eval_wer": 11.541338242908923, + "step": 29000 + }, + { + "epoch": 10.82, + "grad_norm": 0.9631291627883911, + "learning_rate": 7.134271356783921e-06, + "loss": 0.0031, + "step": 29025 + }, + { + "epoch": 10.83, + "grad_norm": 0.7827233076095581, + "learning_rate": 7.131758793969849e-06, + "loss": 0.0022, + "step": 29050 + }, + { + "epoch": 10.84, + "grad_norm": 0.7316336631774902, + "learning_rate": 7.1292462311557795e-06, + "loss": 0.0016, + "step": 29075 + }, + { + "epoch": 10.85, + "grad_norm": 0.6392035484313965, + "learning_rate": 7.126733668341709e-06, + "loss": 0.0019, + "step": 29100 + }, + { + "epoch": 10.86, + "grad_norm": 0.7878302335739136, + "learning_rate": 7.124221105527639e-06, + "loss": 0.0017, + "step": 29125 + }, + { + "epoch": 10.87, + "grad_norm": 1.00039803981781, + "learning_rate": 7.121708542713568e-06, + "loss": 0.0017, + "step": 29150 + }, + { + "epoch": 10.88, + "grad_norm": 0.7358724474906921, + "learning_rate": 7.1191959798994985e-06, + "loss": 0.0017, + "step": 29175 + }, + { + "epoch": 10.89, + "grad_norm": 0.7320355772972107, + "learning_rate": 7.116683417085428e-06, + "loss": 0.002, + "step": 29200 + }, + { + "epoch": 10.9, + "grad_norm": 1.1476093530654907, + "learning_rate": 7.114170854271357e-06, + "loss": 0.002, + "step": 29225 + }, + { + "epoch": 10.91, + "grad_norm": 1.0275044441223145, + "learning_rate": 7.111658291457287e-06, + "loss": 0.0019, + "step": 29250 + }, + { + "epoch": 10.92, + "grad_norm": 0.7019745707511902, + "learning_rate": 7.109145728643217e-06, + "loss": 0.0016, + "step": 29275 + }, + { + "epoch": 10.92, + "grad_norm": 0.21695835888385773, + "learning_rate": 7.106633165829147e-06, + "loss": 0.0017, + "step": 29300 + }, + { + "epoch": 10.93, + "grad_norm": 2.7439684867858887, + "learning_rate": 7.104120603015075e-06, + "loss": 0.0024, + "step": 29325 + }, + { + "epoch": 10.94, + "grad_norm": 0.1143287718296051, + "learning_rate": 7.1016080402010054e-06, + "loss": 0.0021, + "step": 29350 + }, + { + "epoch": 10.95, + "grad_norm": 0.7645537257194519, + "learning_rate": 7.099095477386935e-06, + "loss": 0.0016, + "step": 29375 + }, + { + "epoch": 10.96, + "grad_norm": 0.7899655699729919, + "learning_rate": 7.096582914572865e-06, + "loss": 0.0019, + "step": 29400 + }, + { + "epoch": 10.97, + "grad_norm": 0.40438488125801086, + "learning_rate": 7.094070351758795e-06, + "loss": 0.0013, + "step": 29425 + }, + { + "epoch": 10.98, + "grad_norm": 0.5722082853317261, + "learning_rate": 7.091557788944724e-06, + "loss": 0.0015, + "step": 29450 + }, + { + "epoch": 10.99, + "grad_norm": 0.5217285752296448, + "learning_rate": 7.089045226130654e-06, + "loss": 0.0017, + "step": 29475 + }, + { + "epoch": 11.0, + "grad_norm": 0.385098934173584, + "learning_rate": 7.086532663316583e-06, + "loss": 0.0019, + "step": 29500 + }, + { + "epoch": 11.01, + "grad_norm": 0.43775343894958496, + "learning_rate": 7.084020100502513e-06, + "loss": 0.0018, + "step": 29525 + }, + { + "epoch": 11.02, + "grad_norm": 0.7949855327606201, + "learning_rate": 7.0815075376884426e-06, + "loss": 0.0015, + "step": 29550 + }, + { + "epoch": 11.03, + "grad_norm": 1.370039701461792, + "learning_rate": 7.078994974874373e-06, + "loss": 0.0011, + "step": 29575 + }, + { + "epoch": 11.04, + "grad_norm": 0.21901629865169525, + "learning_rate": 7.076582914572865e-06, + "loss": 0.0011, + "step": 29600 + }, + { + "epoch": 11.05, + "grad_norm": 0.1289396733045578, + "learning_rate": 7.074070351758795e-06, + "loss": 0.0014, + "step": 29625 + }, + { + "epoch": 11.06, + "grad_norm": 0.8111205697059631, + "learning_rate": 7.071557788944724e-06, + "loss": 0.0015, + "step": 29650 + }, + { + "epoch": 11.06, + "grad_norm": 0.3337787687778473, + "learning_rate": 7.069045226130654e-06, + "loss": 0.0012, + "step": 29675 + }, + { + "epoch": 11.07, + "grad_norm": 0.4662812352180481, + "learning_rate": 7.066532663316583e-06, + "loss": 0.0012, + "step": 29700 + }, + { + "epoch": 11.08, + "grad_norm": 0.7198769450187683, + "learning_rate": 7.064020100502513e-06, + "loss": 0.0018, + "step": 29725 + }, + { + "epoch": 11.09, + "grad_norm": 0.11490745097398758, + "learning_rate": 7.061507537688443e-06, + "loss": 0.0014, + "step": 29750 + }, + { + "epoch": 11.1, + "grad_norm": 0.2138548493385315, + "learning_rate": 7.058994974874372e-06, + "loss": 0.0015, + "step": 29775 + }, + { + "epoch": 11.11, + "grad_norm": 0.3914559781551361, + "learning_rate": 7.056482412060302e-06, + "loss": 0.0013, + "step": 29800 + }, + { + "epoch": 11.12, + "grad_norm": 0.8908401131629944, + "learning_rate": 7.0539698492462316e-06, + "loss": 0.0013, + "step": 29825 + }, + { + "epoch": 11.13, + "grad_norm": 0.25572383403778076, + "learning_rate": 7.051457286432162e-06, + "loss": 0.0015, + "step": 29850 + }, + { + "epoch": 11.14, + "grad_norm": 0.5163958072662354, + "learning_rate": 7.048944723618091e-06, + "loss": 0.0018, + "step": 29875 + }, + { + "epoch": 11.15, + "grad_norm": 0.1961149126291275, + "learning_rate": 7.046432160804021e-06, + "loss": 0.0012, + "step": 29900 + }, + { + "epoch": 11.16, + "grad_norm": 0.6480693221092224, + "learning_rate": 7.04391959798995e-06, + "loss": 0.0012, + "step": 29925 + }, + { + "epoch": 11.17, + "grad_norm": 0.16404402256011963, + "learning_rate": 7.04140703517588e-06, + "loss": 0.0012, + "step": 29950 + }, + { + "epoch": 11.18, + "grad_norm": 0.14302678406238556, + "learning_rate": 7.038894472361809e-06, + "loss": 0.0011, + "step": 29975 + }, + { + "epoch": 11.19, + "grad_norm": 0.27899500727653503, + "learning_rate": 7.036381909547739e-06, + "loss": 0.0016, + "step": 30000 + }, + { + "epoch": 11.19, + "eval_loss": 0.16515497863292694, + "eval_runtime": 1195.6116, + "eval_samples_per_second": 1.182, + "eval_steps_per_second": 1.182, + "eval_wer": 11.253667798170417, + "step": 30000 + }, + { + "epoch": 11.2, + "grad_norm": 1.3920704126358032, + "learning_rate": 7.0338693467336695e-06, + "loss": 0.0018, + "step": 30025 + }, + { + "epoch": 11.2, + "grad_norm": 0.1808546483516693, + "learning_rate": 7.031356783919598e-06, + "loss": 0.0011, + "step": 30050 + }, + { + "epoch": 11.21, + "grad_norm": 0.08965901285409927, + "learning_rate": 7.028844221105528e-06, + "loss": 0.0013, + "step": 30075 + }, + { + "epoch": 11.22, + "grad_norm": 0.14479298889636993, + "learning_rate": 7.0263316582914575e-06, + "loss": 0.0011, + "step": 30100 + }, + { + "epoch": 11.23, + "grad_norm": 0.6388674974441528, + "learning_rate": 7.023819095477388e-06, + "loss": 0.0014, + "step": 30125 + }, + { + "epoch": 11.24, + "grad_norm": 1.1684021949768066, + "learning_rate": 7.021306532663317e-06, + "loss": 0.002, + "step": 30150 + }, + { + "epoch": 11.25, + "grad_norm": 0.16625525057315826, + "learning_rate": 7.018793969849247e-06, + "loss": 0.0018, + "step": 30175 + }, + { + "epoch": 11.26, + "grad_norm": 0.965813934803009, + "learning_rate": 7.016281407035176e-06, + "loss": 0.0014, + "step": 30200 + }, + { + "epoch": 11.27, + "grad_norm": 1.2366468906402588, + "learning_rate": 7.013768844221106e-06, + "loss": 0.0012, + "step": 30225 + }, + { + "epoch": 11.28, + "grad_norm": 0.1777760535478592, + "learning_rate": 7.011256281407036e-06, + "loss": 0.0015, + "step": 30250 + }, + { + "epoch": 11.29, + "grad_norm": 0.5443910360336304, + "learning_rate": 7.008743718592965e-06, + "loss": 0.0016, + "step": 30275 + }, + { + "epoch": 11.3, + "grad_norm": 0.5980404615402222, + "learning_rate": 7.0062311557788955e-06, + "loss": 0.0018, + "step": 30300 + }, + { + "epoch": 11.31, + "grad_norm": 0.3459984064102173, + "learning_rate": 7.003718592964824e-06, + "loss": 0.0011, + "step": 30325 + }, + { + "epoch": 11.32, + "grad_norm": 0.23356947302818298, + "learning_rate": 7.001206030150754e-06, + "loss": 0.0011, + "step": 30350 + }, + { + "epoch": 11.33, + "grad_norm": 0.9677523970603943, + "learning_rate": 6.9986934673366834e-06, + "loss": 0.0012, + "step": 30375 + }, + { + "epoch": 11.33, + "grad_norm": 0.45927730202674866, + "learning_rate": 6.996180904522614e-06, + "loss": 0.002, + "step": 30400 + }, + { + "epoch": 11.34, + "grad_norm": 0.5095682144165039, + "learning_rate": 6.993668341708544e-06, + "loss": 0.0017, + "step": 30425 + }, + { + "epoch": 11.35, + "grad_norm": 0.39149999618530273, + "learning_rate": 6.991155778894473e-06, + "loss": 0.0016, + "step": 30450 + }, + { + "epoch": 11.36, + "grad_norm": 1.122555136680603, + "learning_rate": 6.988643216080403e-06, + "loss": 0.0019, + "step": 30475 + }, + { + "epoch": 11.37, + "grad_norm": 0.4082407057285309, + "learning_rate": 6.986130653266332e-06, + "loss": 0.0013, + "step": 30500 + }, + { + "epoch": 11.38, + "grad_norm": 0.824947714805603, + "learning_rate": 6.983618090452262e-06, + "loss": 0.0013, + "step": 30525 + }, + { + "epoch": 11.39, + "grad_norm": 0.9668367505073547, + "learning_rate": 6.981105527638191e-06, + "loss": 0.0022, + "step": 30550 + }, + { + "epoch": 11.4, + "grad_norm": 0.7918215990066528, + "learning_rate": 6.978592964824121e-06, + "loss": 0.0019, + "step": 30575 + }, + { + "epoch": 11.41, + "grad_norm": 0.6458659172058105, + "learning_rate": 6.976080402010051e-06, + "loss": 0.0015, + "step": 30600 + }, + { + "epoch": 11.42, + "grad_norm": 0.9010142683982849, + "learning_rate": 6.97356783919598e-06, + "loss": 0.0016, + "step": 30625 + }, + { + "epoch": 11.43, + "grad_norm": 0.4138459265232086, + "learning_rate": 6.97105527638191e-06, + "loss": 0.0021, + "step": 30650 + }, + { + "epoch": 11.44, + "grad_norm": 1.004984974861145, + "learning_rate": 6.9685427135678396e-06, + "loss": 0.0025, + "step": 30675 + }, + { + "epoch": 11.45, + "grad_norm": 0.5545822381973267, + "learning_rate": 6.96603015075377e-06, + "loss": 0.002, + "step": 30700 + }, + { + "epoch": 11.46, + "grad_norm": 0.4332880675792694, + "learning_rate": 6.963517587939699e-06, + "loss": 0.0021, + "step": 30725 + }, + { + "epoch": 11.47, + "grad_norm": 0.8049305081367493, + "learning_rate": 6.961005025125629e-06, + "loss": 0.0019, + "step": 30750 + }, + { + "epoch": 11.47, + "grad_norm": 0.17181991040706635, + "learning_rate": 6.958492462311558e-06, + "loss": 0.0018, + "step": 30775 + }, + { + "epoch": 11.48, + "grad_norm": 0.7950259447097778, + "learning_rate": 6.955979899497488e-06, + "loss": 0.0019, + "step": 30800 + }, + { + "epoch": 11.49, + "grad_norm": 0.7356134653091431, + "learning_rate": 6.953467336683417e-06, + "loss": 0.0024, + "step": 30825 + }, + { + "epoch": 11.5, + "grad_norm": 0.5160488486289978, + "learning_rate": 6.950954773869347e-06, + "loss": 0.0021, + "step": 30850 + }, + { + "epoch": 11.51, + "grad_norm": 0.11655743420124054, + "learning_rate": 6.9484422110552775e-06, + "loss": 0.0018, + "step": 30875 + }, + { + "epoch": 11.52, + "grad_norm": 0.6080338954925537, + "learning_rate": 6.945929648241206e-06, + "loss": 0.0013, + "step": 30900 + }, + { + "epoch": 11.53, + "grad_norm": 0.7907400131225586, + "learning_rate": 6.943417085427136e-06, + "loss": 0.0023, + "step": 30925 + }, + { + "epoch": 11.54, + "grad_norm": 0.6886609792709351, + "learning_rate": 6.9409045226130655e-06, + "loss": 0.0022, + "step": 30950 + }, + { + "epoch": 11.55, + "grad_norm": 1.2306044101715088, + "learning_rate": 6.938391959798996e-06, + "loss": 0.002, + "step": 30975 + }, + { + "epoch": 11.56, + "grad_norm": 0.4632844626903534, + "learning_rate": 6.935879396984925e-06, + "loss": 0.002, + "step": 31000 + }, + { + "epoch": 11.56, + "eval_loss": 0.16760937869548798, + "eval_runtime": 1193.5505, + "eval_samples_per_second": 1.184, + "eval_steps_per_second": 1.184, + "eval_wer": 11.995857545595765, + "step": 31000 + }, + { + "epoch": 11.57, + "grad_norm": 0.6228979229927063, + "learning_rate": 6.933366834170855e-06, + "loss": 0.0019, + "step": 31025 + }, + { + "epoch": 11.58, + "grad_norm": 0.7881886959075928, + "learning_rate": 6.930854271356785e-06, + "loss": 0.0018, + "step": 31050 + }, + { + "epoch": 11.59, + "grad_norm": 1.2799240350723267, + "learning_rate": 6.928341708542714e-06, + "loss": 0.0015, + "step": 31075 + }, + { + "epoch": 11.6, + "grad_norm": 0.5027578473091125, + "learning_rate": 6.925829145728644e-06, + "loss": 0.0016, + "step": 31100 + }, + { + "epoch": 11.61, + "grad_norm": 0.677718997001648, + "learning_rate": 6.923316582914573e-06, + "loss": 0.0013, + "step": 31125 + }, + { + "epoch": 11.61, + "grad_norm": 1.4926074743270874, + "learning_rate": 6.9208040201005035e-06, + "loss": 0.0012, + "step": 31150 + }, + { + "epoch": 11.62, + "grad_norm": 0.469292551279068, + "learning_rate": 6.918291457286432e-06, + "loss": 0.0017, + "step": 31175 + }, + { + "epoch": 11.63, + "grad_norm": 0.8309260606765747, + "learning_rate": 6.915778894472362e-06, + "loss": 0.0026, + "step": 31200 + }, + { + "epoch": 11.64, + "grad_norm": 0.45423808693885803, + "learning_rate": 6.9132663316582915e-06, + "loss": 0.0024, + "step": 31225 + }, + { + "epoch": 11.65, + "grad_norm": 0.21375156939029694, + "learning_rate": 6.910753768844222e-06, + "loss": 0.0019, + "step": 31250 + }, + { + "epoch": 11.66, + "grad_norm": 0.18665219843387604, + "learning_rate": 6.908241206030152e-06, + "loss": 0.0015, + "step": 31275 + }, + { + "epoch": 11.67, + "grad_norm": 0.23652435839176178, + "learning_rate": 6.905728643216081e-06, + "loss": 0.0019, + "step": 31300 + }, + { + "epoch": 11.68, + "grad_norm": 0.0909101590514183, + "learning_rate": 6.903216080402011e-06, + "loss": 0.002, + "step": 31325 + }, + { + "epoch": 11.69, + "grad_norm": 0.6744236350059509, + "learning_rate": 6.90070351758794e-06, + "loss": 0.0023, + "step": 31350 + }, + { + "epoch": 11.7, + "grad_norm": 0.750092625617981, + "learning_rate": 6.89819095477387e-06, + "loss": 0.0018, + "step": 31375 + }, + { + "epoch": 11.71, + "grad_norm": 0.801764965057373, + "learning_rate": 6.895678391959799e-06, + "loss": 0.0016, + "step": 31400 + }, + { + "epoch": 11.72, + "grad_norm": 0.6437927484512329, + "learning_rate": 6.8931658291457294e-06, + "loss": 0.0017, + "step": 31425 + }, + { + "epoch": 11.73, + "grad_norm": 0.5649538636207581, + "learning_rate": 6.890653266331658e-06, + "loss": 0.0016, + "step": 31450 + }, + { + "epoch": 11.74, + "grad_norm": 0.3197011351585388, + "learning_rate": 6.888140703517588e-06, + "loss": 0.002, + "step": 31475 + }, + { + "epoch": 11.74, + "grad_norm": 0.4196454584598541, + "learning_rate": 6.885628140703518e-06, + "loss": 0.0018, + "step": 31500 + }, + { + "epoch": 11.75, + "grad_norm": 0.16530758142471313, + "learning_rate": 6.8831155778894476e-06, + "loss": 0.0018, + "step": 31525 + }, + { + "epoch": 11.76, + "grad_norm": 0.16285721957683563, + "learning_rate": 6.880603015075378e-06, + "loss": 0.0013, + "step": 31550 + }, + { + "epoch": 11.77, + "grad_norm": 0.16170068085193634, + "learning_rate": 6.878090452261307e-06, + "loss": 0.0014, + "step": 31575 + }, + { + "epoch": 11.78, + "grad_norm": 1.2623178958892822, + "learning_rate": 6.875577889447237e-06, + "loss": 0.0014, + "step": 31600 + }, + { + "epoch": 11.79, + "grad_norm": 0.22028405964374542, + "learning_rate": 6.873065326633166e-06, + "loss": 0.0012, + "step": 31625 + }, + { + "epoch": 11.8, + "grad_norm": 0.2556901276111603, + "learning_rate": 6.870552763819096e-06, + "loss": 0.0017, + "step": 31650 + }, + { + "epoch": 11.81, + "grad_norm": 1.5282926559448242, + "learning_rate": 6.868040201005026e-06, + "loss": 0.0021, + "step": 31675 + }, + { + "epoch": 11.82, + "grad_norm": 0.4261831045150757, + "learning_rate": 6.865527638190955e-06, + "loss": 0.002, + "step": 31700 + }, + { + "epoch": 11.83, + "grad_norm": 0.641252338886261, + "learning_rate": 6.8630150753768855e-06, + "loss": 0.0019, + "step": 31725 + }, + { + "epoch": 11.84, + "grad_norm": 0.47793760895729065, + "learning_rate": 6.860502512562814e-06, + "loss": 0.0017, + "step": 31750 + }, + { + "epoch": 11.85, + "grad_norm": 0.5625618100166321, + "learning_rate": 6.857989949748744e-06, + "loss": 0.0017, + "step": 31775 + }, + { + "epoch": 11.86, + "grad_norm": 0.17436985671520233, + "learning_rate": 6.8554773869346735e-06, + "loss": 0.0019, + "step": 31800 + }, + { + "epoch": 11.87, + "grad_norm": 0.7221023440361023, + "learning_rate": 6.852964824120604e-06, + "loss": 0.0018, + "step": 31825 + }, + { + "epoch": 11.88, + "grad_norm": 0.17745402455329895, + "learning_rate": 6.850452261306533e-06, + "loss": 0.0017, + "step": 31850 + }, + { + "epoch": 11.88, + "grad_norm": 0.4452929198741913, + "learning_rate": 6.847939698492463e-06, + "loss": 0.0016, + "step": 31875 + }, + { + "epoch": 11.89, + "grad_norm": 0.5426244139671326, + "learning_rate": 6.845427135678393e-06, + "loss": 0.0016, + "step": 31900 + }, + { + "epoch": 11.9, + "grad_norm": 0.8728684782981873, + "learning_rate": 6.842914572864322e-06, + "loss": 0.0016, + "step": 31925 + }, + { + "epoch": 11.91, + "grad_norm": 0.6092917323112488, + "learning_rate": 6.840402010050252e-06, + "loss": 0.0017, + "step": 31950 + }, + { + "epoch": 11.92, + "grad_norm": 0.4408324658870697, + "learning_rate": 6.837889447236181e-06, + "loss": 0.0018, + "step": 31975 + }, + { + "epoch": 11.93, + "grad_norm": 0.6317141652107239, + "learning_rate": 6.8353768844221115e-06, + "loss": 0.002, + "step": 32000 + }, + { + "epoch": 11.93, + "eval_loss": 0.16544145345687866, + "eval_runtime": 1202.5516, + "eval_samples_per_second": 1.175, + "eval_steps_per_second": 1.175, + "eval_wer": 11.82900868764743, + "step": 32000 + }, + { + "epoch": 11.94, + "grad_norm": 0.46832558512687683, + "learning_rate": 6.83286432160804e-06, + "loss": 0.0014, + "step": 32025 + }, + { + "epoch": 11.95, + "grad_norm": 0.29301321506500244, + "learning_rate": 6.83035175879397e-06, + "loss": 0.0014, + "step": 32050 + }, + { + "epoch": 11.96, + "grad_norm": 1.0885530710220337, + "learning_rate": 6.8278391959798995e-06, + "loss": 0.0022, + "step": 32075 + }, + { + "epoch": 11.97, + "grad_norm": 0.24359260499477386, + "learning_rate": 6.82532663316583e-06, + "loss": 0.0018, + "step": 32100 + }, + { + "epoch": 11.98, + "grad_norm": 0.7350440621376038, + "learning_rate": 6.82281407035176e-06, + "loss": 0.0024, + "step": 32125 + }, + { + "epoch": 11.99, + "grad_norm": 0.834542453289032, + "learning_rate": 6.820301507537689e-06, + "loss": 0.0019, + "step": 32150 + }, + { + "epoch": 12.0, + "grad_norm": 1.1633702516555786, + "learning_rate": 6.817788944723619e-06, + "loss": 0.0016, + "step": 32175 + }, + { + "epoch": 12.01, + "grad_norm": 0.9268914461135864, + "learning_rate": 6.815276381909548e-06, + "loss": 0.0021, + "step": 32200 + }, + { + "epoch": 12.02, + "grad_norm": 1.1445002555847168, + "learning_rate": 6.812763819095478e-06, + "loss": 0.0016, + "step": 32225 + }, + { + "epoch": 12.02, + "grad_norm": 0.5903928875923157, + "learning_rate": 6.810251256281407e-06, + "loss": 0.0012, + "step": 32250 + }, + { + "epoch": 12.03, + "grad_norm": 1.298057198524475, + "learning_rate": 6.8077386934673374e-06, + "loss": 0.0017, + "step": 32275 + }, + { + "epoch": 12.04, + "grad_norm": 0.08739069104194641, + "learning_rate": 6.805226130653268e-06, + "loss": 0.0017, + "step": 32300 + }, + { + "epoch": 12.05, + "grad_norm": 1.3428621292114258, + "learning_rate": 6.802713567839196e-06, + "loss": 0.001, + "step": 32325 + }, + { + "epoch": 12.06, + "grad_norm": 0.5801416039466858, + "learning_rate": 6.800201005025126e-06, + "loss": 0.0011, + "step": 32350 + }, + { + "epoch": 12.07, + "grad_norm": 0.13903754949569702, + "learning_rate": 6.7976884422110556e-06, + "loss": 0.0012, + "step": 32375 + }, + { + "epoch": 12.08, + "grad_norm": 0.2634657025337219, + "learning_rate": 6.795175879396986e-06, + "loss": 0.0013, + "step": 32400 + }, + { + "epoch": 12.09, + "grad_norm": 0.11662042140960693, + "learning_rate": 6.792663316582915e-06, + "loss": 0.0011, + "step": 32425 + }, + { + "epoch": 12.1, + "grad_norm": 0.40534257888793945, + "learning_rate": 6.790150753768845e-06, + "loss": 0.0012, + "step": 32450 + }, + { + "epoch": 12.11, + "grad_norm": 0.24187898635864258, + "learning_rate": 6.787638190954774e-06, + "loss": 0.0012, + "step": 32475 + }, + { + "epoch": 12.12, + "grad_norm": 0.451885461807251, + "learning_rate": 6.785125628140704e-06, + "loss": 0.0011, + "step": 32500 + }, + { + "epoch": 12.13, + "grad_norm": 0.3062966763973236, + "learning_rate": 6.782613065326634e-06, + "loss": 0.0014, + "step": 32525 + }, + { + "epoch": 12.14, + "grad_norm": 1.2683846950531006, + "learning_rate": 6.780100502512563e-06, + "loss": 0.0016, + "step": 32550 + }, + { + "epoch": 12.15, + "grad_norm": 0.18792547285556793, + "learning_rate": 6.7775879396984935e-06, + "loss": 0.0013, + "step": 32575 + }, + { + "epoch": 12.16, + "grad_norm": 0.21958674490451813, + "learning_rate": 6.775075376884422e-06, + "loss": 0.001, + "step": 32600 + }, + { + "epoch": 12.16, + "grad_norm": 0.6611072421073914, + "learning_rate": 6.772562814070352e-06, + "loss": 0.0017, + "step": 32625 + }, + { + "epoch": 12.17, + "grad_norm": 0.5440460443496704, + "learning_rate": 6.7700502512562815e-06, + "loss": 0.001, + "step": 32650 + }, + { + "epoch": 12.18, + "grad_norm": 0.27300208806991577, + "learning_rate": 6.767537688442212e-06, + "loss": 0.0012, + "step": 32675 + }, + { + "epoch": 12.19, + "grad_norm": 0.3658203184604645, + "learning_rate": 6.765025125628141e-06, + "loss": 0.0013, + "step": 32700 + }, + { + "epoch": 12.2, + "grad_norm": 1.064744234085083, + "learning_rate": 6.762512562814071e-06, + "loss": 0.0011, + "step": 32725 + }, + { + "epoch": 12.21, + "grad_norm": 0.5413478016853333, + "learning_rate": 6.760000000000001e-06, + "loss": 0.0018, + "step": 32750 + }, + { + "epoch": 12.22, + "grad_norm": 0.21153147518634796, + "learning_rate": 6.75748743718593e-06, + "loss": 0.0012, + "step": 32775 + }, + { + "epoch": 12.23, + "grad_norm": 1.1338598728179932, + "learning_rate": 6.75497487437186e-06, + "loss": 0.0015, + "step": 32800 + }, + { + "epoch": 12.24, + "grad_norm": 0.2784353792667389, + "learning_rate": 6.752462311557789e-06, + "loss": 0.0011, + "step": 32825 + }, + { + "epoch": 12.25, + "grad_norm": 1.5147318840026855, + "learning_rate": 6.7499497487437195e-06, + "loss": 0.0018, + "step": 32850 + }, + { + "epoch": 12.26, + "grad_norm": 0.7832407355308533, + "learning_rate": 6.747437185929648e-06, + "loss": 0.0011, + "step": 32875 + }, + { + "epoch": 12.27, + "grad_norm": 0.5203331112861633, + "learning_rate": 6.744924623115578e-06, + "loss": 0.0021, + "step": 32900 + }, + { + "epoch": 12.28, + "grad_norm": 0.9559676051139832, + "learning_rate": 6.742412060301508e-06, + "loss": 0.0025, + "step": 32925 + }, + { + "epoch": 12.29, + "grad_norm": 0.7760629653930664, + "learning_rate": 6.739899497487438e-06, + "loss": 0.0018, + "step": 32950 + }, + { + "epoch": 12.29, + "grad_norm": 0.06986644119024277, + "learning_rate": 6.737386934673368e-06, + "loss": 0.0014, + "step": 32975 + }, + { + "epoch": 12.3, + "grad_norm": 0.06728184223175049, + "learning_rate": 6.734874371859297e-06, + "loss": 0.0017, + "step": 33000 + }, + { + "epoch": 12.3, + "eval_loss": 0.1745312362909317, + "eval_runtime": 1184.5214, + "eval_samples_per_second": 1.193, + "eval_steps_per_second": 1.193, + "eval_wer": 11.725447327541568, + "step": 33000 + }, + { + "epoch": 12.31, + "grad_norm": 0.5184839963912964, + "learning_rate": 6.732361809045227e-06, + "loss": 0.0018, + "step": 33025 + }, + { + "epoch": 12.32, + "grad_norm": 1.022531270980835, + "learning_rate": 6.729849246231156e-06, + "loss": 0.0014, + "step": 33050 + }, + { + "epoch": 12.33, + "grad_norm": 0.7171478867530823, + "learning_rate": 6.727336683417086e-06, + "loss": 0.0013, + "step": 33075 + }, + { + "epoch": 12.34, + "grad_norm": 0.8448019027709961, + "learning_rate": 6.724824120603015e-06, + "loss": 0.0015, + "step": 33100 + }, + { + "epoch": 12.35, + "grad_norm": 0.6848008036613464, + "learning_rate": 6.7223115577889454e-06, + "loss": 0.0015, + "step": 33125 + }, + { + "epoch": 12.36, + "grad_norm": 0.19665639102458954, + "learning_rate": 6.719798994974876e-06, + "loss": 0.0017, + "step": 33150 + }, + { + "epoch": 12.37, + "grad_norm": 1.5463953018188477, + "learning_rate": 6.717286432160804e-06, + "loss": 0.0015, + "step": 33175 + }, + { + "epoch": 12.38, + "grad_norm": 0.6252156496047974, + "learning_rate": 6.714773869346734e-06, + "loss": 0.0013, + "step": 33200 + }, + { + "epoch": 12.39, + "grad_norm": 0.37764787673950195, + "learning_rate": 6.7122613065326636e-06, + "loss": 0.0012, + "step": 33225 + }, + { + "epoch": 12.4, + "grad_norm": 0.40312016010284424, + "learning_rate": 6.709748743718594e-06, + "loss": 0.0014, + "step": 33250 + }, + { + "epoch": 12.41, + "grad_norm": 0.14096969366073608, + "learning_rate": 6.707236180904523e-06, + "loss": 0.0015, + "step": 33275 + }, + { + "epoch": 12.42, + "grad_norm": 0.2967342138290405, + "learning_rate": 6.704723618090453e-06, + "loss": 0.0011, + "step": 33300 + }, + { + "epoch": 12.43, + "grad_norm": 0.8194160461425781, + "learning_rate": 6.702211055276382e-06, + "loss": 0.0009, + "step": 33325 + }, + { + "epoch": 12.43, + "grad_norm": 0.266925185918808, + "learning_rate": 6.699698492462312e-06, + "loss": 0.0011, + "step": 33350 + }, + { + "epoch": 12.44, + "grad_norm": 0.44890767335891724, + "learning_rate": 6.697185929648242e-06, + "loss": 0.0011, + "step": 33375 + }, + { + "epoch": 12.45, + "grad_norm": 0.3097435534000397, + "learning_rate": 6.694673366834171e-06, + "loss": 0.0011, + "step": 33400 + }, + { + "epoch": 12.46, + "grad_norm": 0.45708009600639343, + "learning_rate": 6.6921608040201015e-06, + "loss": 0.0018, + "step": 33425 + }, + { + "epoch": 12.47, + "grad_norm": 1.0183743238449097, + "learning_rate": 6.68964824120603e-06, + "loss": 0.0012, + "step": 33450 + }, + { + "epoch": 12.48, + "grad_norm": 0.35620322823524475, + "learning_rate": 6.68713567839196e-06, + "loss": 0.0018, + "step": 33475 + }, + { + "epoch": 12.49, + "grad_norm": 0.6413122415542603, + "learning_rate": 6.6846231155778895e-06, + "loss": 0.0014, + "step": 33500 + }, + { + "epoch": 12.5, + "grad_norm": 0.5790813565254211, + "learning_rate": 6.68211055276382e-06, + "loss": 0.0011, + "step": 33525 + }, + { + "epoch": 12.51, + "grad_norm": 1.2664169073104858, + "learning_rate": 6.67959798994975e-06, + "loss": 0.0017, + "step": 33550 + }, + { + "epoch": 12.52, + "grad_norm": 0.39975398778915405, + "learning_rate": 6.677085427135679e-06, + "loss": 0.0014, + "step": 33575 + }, + { + "epoch": 12.53, + "grad_norm": 0.14527741074562073, + "learning_rate": 6.674572864321609e-06, + "loss": 0.0015, + "step": 33600 + }, + { + "epoch": 12.54, + "grad_norm": 0.49433064460754395, + "learning_rate": 6.672060301507538e-06, + "loss": 0.0016, + "step": 33625 + }, + { + "epoch": 12.55, + "grad_norm": 0.7464792132377625, + "learning_rate": 6.669547738693468e-06, + "loss": 0.002, + "step": 33650 + }, + { + "epoch": 12.56, + "grad_norm": 1.2230840921401978, + "learning_rate": 6.667035175879397e-06, + "loss": 0.0012, + "step": 33675 + }, + { + "epoch": 12.57, + "grad_norm": 0.6085410714149475, + "learning_rate": 6.6645226130653275e-06, + "loss": 0.0016, + "step": 33700 + }, + { + "epoch": 12.57, + "grad_norm": 0.6311704516410828, + "learning_rate": 6.662010050251256e-06, + "loss": 0.0016, + "step": 33725 + }, + { + "epoch": 12.58, + "grad_norm": 0.2663697302341461, + "learning_rate": 6.659497487437186e-06, + "loss": 0.0018, + "step": 33750 + }, + { + "epoch": 12.59, + "grad_norm": 0.9540956616401672, + "learning_rate": 6.656984924623116e-06, + "loss": 0.0017, + "step": 33775 + }, + { + "epoch": 12.6, + "grad_norm": 0.5491771697998047, + "learning_rate": 6.654472361809046e-06, + "loss": 0.0012, + "step": 33800 + }, + { + "epoch": 12.61, + "grad_norm": 0.6041386127471924, + "learning_rate": 6.651959798994976e-06, + "loss": 0.0014, + "step": 33825 + }, + { + "epoch": 12.62, + "grad_norm": 1.1351639032363892, + "learning_rate": 6.649447236180905e-06, + "loss": 0.0016, + "step": 33850 + }, + { + "epoch": 12.63, + "grad_norm": 0.4861287474632263, + "learning_rate": 6.646934673366835e-06, + "loss": 0.0016, + "step": 33875 + }, + { + "epoch": 12.64, + "grad_norm": 0.6033377051353455, + "learning_rate": 6.644422110552764e-06, + "loss": 0.0012, + "step": 33900 + }, + { + "epoch": 12.65, + "grad_norm": 0.4860702455043793, + "learning_rate": 6.641909547738694e-06, + "loss": 0.0013, + "step": 33925 + }, + { + "epoch": 12.66, + "grad_norm": 0.6022335290908813, + "learning_rate": 6.639396984924623e-06, + "loss": 0.0019, + "step": 33950 + }, + { + "epoch": 12.67, + "grad_norm": 0.652543842792511, + "learning_rate": 6.6368844221105534e-06, + "loss": 0.0014, + "step": 33975 + }, + { + "epoch": 12.68, + "grad_norm": 0.21340611577033997, + "learning_rate": 6.634371859296484e-06, + "loss": 0.0011, + "step": 34000 + }, + { + "epoch": 12.68, + "eval_loss": 0.17100512981414795, + "eval_runtime": 1203.7061, + "eval_samples_per_second": 1.174, + "eval_steps_per_second": 1.174, + "eval_wer": 11.679420056383409, + "step": 34000 + }, + { + "epoch": 12.69, + "grad_norm": 1.2491278648376465, + "learning_rate": 6.631859296482412e-06, + "loss": 0.0014, + "step": 34025 + }, + { + "epoch": 12.7, + "grad_norm": 0.7832294702529907, + "learning_rate": 6.629346733668342e-06, + "loss": 0.0017, + "step": 34050 + }, + { + "epoch": 12.71, + "grad_norm": 0.20314635336399078, + "learning_rate": 6.6268341708542716e-06, + "loss": 0.0019, + "step": 34075 + }, + { + "epoch": 12.71, + "grad_norm": 0.467129647731781, + "learning_rate": 6.624321608040202e-06, + "loss": 0.0012, + "step": 34100 + }, + { + "epoch": 12.72, + "grad_norm": 0.5026792883872986, + "learning_rate": 6.621909547738694e-06, + "loss": 0.0015, + "step": 34125 + }, + { + "epoch": 12.73, + "grad_norm": 0.2657577097415924, + "learning_rate": 6.6193969849246235e-06, + "loss": 0.0014, + "step": 34150 + }, + { + "epoch": 12.74, + "grad_norm": 0.14765813946723938, + "learning_rate": 6.616884422110554e-06, + "loss": 0.0011, + "step": 34175 + }, + { + "epoch": 12.75, + "grad_norm": 0.8260803818702698, + "learning_rate": 6.614371859296484e-06, + "loss": 0.0012, + "step": 34200 + }, + { + "epoch": 12.76, + "grad_norm": 0.7948583960533142, + "learning_rate": 6.611859296482412e-06, + "loss": 0.001, + "step": 34225 + }, + { + "epoch": 12.77, + "grad_norm": 1.1949807405471802, + "learning_rate": 6.6093467336683424e-06, + "loss": 0.0011, + "step": 34250 + }, + { + "epoch": 12.78, + "grad_norm": 0.8149120211601257, + "learning_rate": 6.606834170854272e-06, + "loss": 0.0018, + "step": 34275 + }, + { + "epoch": 12.79, + "grad_norm": 0.8906325697898865, + "learning_rate": 6.604321608040202e-06, + "loss": 0.0017, + "step": 34300 + }, + { + "epoch": 12.8, + "grad_norm": 0.2634568214416504, + "learning_rate": 6.60180904522613e-06, + "loss": 0.0023, + "step": 34325 + }, + { + "epoch": 12.81, + "grad_norm": 0.4221538305282593, + "learning_rate": 6.599296482412061e-06, + "loss": 0.0017, + "step": 34350 + }, + { + "epoch": 12.82, + "grad_norm": 0.7720416188240051, + "learning_rate": 6.596783919597991e-06, + "loss": 0.0019, + "step": 34375 + }, + { + "epoch": 12.83, + "grad_norm": 1.1318109035491943, + "learning_rate": 6.59427135678392e-06, + "loss": 0.0015, + "step": 34400 + }, + { + "epoch": 12.84, + "grad_norm": 0.635210394859314, + "learning_rate": 6.59175879396985e-06, + "loss": 0.0016, + "step": 34425 + }, + { + "epoch": 12.84, + "grad_norm": 0.8578208088874817, + "learning_rate": 6.5892462311557796e-06, + "loss": 0.002, + "step": 34450 + }, + { + "epoch": 12.85, + "grad_norm": 0.4534885585308075, + "learning_rate": 6.58673366834171e-06, + "loss": 0.0013, + "step": 34475 + }, + { + "epoch": 12.86, + "grad_norm": 0.17543089389801025, + "learning_rate": 6.584221105527638e-06, + "loss": 0.0016, + "step": 34500 + }, + { + "epoch": 12.87, + "grad_norm": 0.3007034957408905, + "learning_rate": 6.581708542713568e-06, + "loss": 0.0016, + "step": 34525 + }, + { + "epoch": 12.88, + "grad_norm": 0.3321305513381958, + "learning_rate": 6.579195979899498e-06, + "loss": 0.0013, + "step": 34550 + }, + { + "epoch": 12.89, + "grad_norm": 0.6195049285888672, + "learning_rate": 6.576683417085428e-06, + "loss": 0.0016, + "step": 34575 + }, + { + "epoch": 12.9, + "grad_norm": 0.8931993246078491, + "learning_rate": 6.574170854271358e-06, + "loss": 0.0019, + "step": 34600 + }, + { + "epoch": 12.91, + "grad_norm": 0.05149543657898903, + "learning_rate": 6.5716582914572865e-06, + "loss": 0.0018, + "step": 34625 + }, + { + "epoch": 12.92, + "grad_norm": 0.8216347098350525, + "learning_rate": 6.569145728643217e-06, + "loss": 0.0017, + "step": 34650 + }, + { + "epoch": 12.93, + "grad_norm": 0.5874665975570679, + "learning_rate": 6.566633165829146e-06, + "loss": 0.0014, + "step": 34675 + }, + { + "epoch": 12.94, + "grad_norm": 0.16500453650951385, + "learning_rate": 6.564120603015076e-06, + "loss": 0.0021, + "step": 34700 + }, + { + "epoch": 12.95, + "grad_norm": 0.586856484413147, + "learning_rate": 6.5616080402010055e-06, + "loss": 0.0016, + "step": 34725 + }, + { + "epoch": 12.96, + "grad_norm": 0.284855455160141, + "learning_rate": 6.559095477386936e-06, + "loss": 0.0012, + "step": 34750 + }, + { + "epoch": 12.97, + "grad_norm": 0.10117733478546143, + "learning_rate": 6.556582914572864e-06, + "loss": 0.0015, + "step": 34775 + }, + { + "epoch": 12.98, + "grad_norm": 1.010190725326538, + "learning_rate": 6.554070351758794e-06, + "loss": 0.0019, + "step": 34800 + }, + { + "epoch": 12.98, + "grad_norm": 0.20682497322559357, + "learning_rate": 6.5515577889447245e-06, + "loss": 0.0016, + "step": 34825 + }, + { + "epoch": 12.99, + "grad_norm": 1.0183665752410889, + "learning_rate": 6.549045226130654e-06, + "loss": 0.0021, + "step": 34850 + }, + { + "epoch": 13.0, + "grad_norm": 1.778483510017395, + "learning_rate": 6.546532663316584e-06, + "loss": 0.001, + "step": 34875 + }, + { + "epoch": 13.01, + "grad_norm": 0.4376201629638672, + "learning_rate": 6.5440201005025125e-06, + "loss": 0.0011, + "step": 34900 + }, + { + "epoch": 13.02, + "grad_norm": 0.4403265118598938, + "learning_rate": 6.541507537688443e-06, + "loss": 0.0013, + "step": 34925 + }, + { + "epoch": 13.03, + "grad_norm": 0.1373542845249176, + "learning_rate": 6.538994974874372e-06, + "loss": 0.001, + "step": 34950 + }, + { + "epoch": 13.04, + "grad_norm": 0.5296120643615723, + "learning_rate": 6.536482412060302e-06, + "loss": 0.0012, + "step": 34975 + }, + { + "epoch": 13.05, + "grad_norm": 0.38567090034484863, + "learning_rate": 6.533969849246232e-06, + "loss": 0.001, + "step": 35000 + }, + { + "epoch": 13.05, + "eval_loss": 0.1734369397163391, + "eval_runtime": 1202.5631, + "eval_samples_per_second": 1.175, + "eval_steps_per_second": 1.175, + "eval_wer": 11.713940509752028, + "step": 35000 + }, + { + "epoch": 13.06, + "grad_norm": 0.6175810098648071, + "learning_rate": 6.531457286432162e-06, + "loss": 0.0014, + "step": 35025 + }, + { + "epoch": 13.07, + "grad_norm": 0.6121529340744019, + "learning_rate": 6.528944723618092e-06, + "loss": 0.0009, + "step": 35050 + }, + { + "epoch": 13.08, + "grad_norm": 0.3716742694377899, + "learning_rate": 6.52643216080402e-06, + "loss": 0.0008, + "step": 35075 + }, + { + "epoch": 13.09, + "grad_norm": 0.3526933789253235, + "learning_rate": 6.5239195979899504e-06, + "loss": 0.0008, + "step": 35100 + }, + { + "epoch": 13.1, + "grad_norm": 0.9637678265571594, + "learning_rate": 6.52140703517588e-06, + "loss": 0.0006, + "step": 35125 + }, + { + "epoch": 13.11, + "grad_norm": 0.7074443101882935, + "learning_rate": 6.51889447236181e-06, + "loss": 0.0008, + "step": 35150 + }, + { + "epoch": 13.12, + "grad_norm": 0.18861229717731476, + "learning_rate": 6.516381909547738e-06, + "loss": 0.0006, + "step": 35175 + }, + { + "epoch": 13.12, + "grad_norm": 0.25994637608528137, + "learning_rate": 6.513869346733669e-06, + "loss": 0.001, + "step": 35200 + }, + { + "epoch": 13.13, + "grad_norm": 0.21246716380119324, + "learning_rate": 6.511356783919599e-06, + "loss": 0.0006, + "step": 35225 + }, + { + "epoch": 13.14, + "grad_norm": 0.10341060161590576, + "learning_rate": 6.508844221105528e-06, + "loss": 0.0006, + "step": 35250 + }, + { + "epoch": 13.15, + "grad_norm": 0.12540987133979797, + "learning_rate": 6.506331658291458e-06, + "loss": 0.0009, + "step": 35275 + }, + { + "epoch": 13.16, + "grad_norm": 0.5089366436004639, + "learning_rate": 6.5038190954773876e-06, + "loss": 0.0008, + "step": 35300 + }, + { + "epoch": 13.17, + "grad_norm": 0.24602873623371124, + "learning_rate": 6.501306532663318e-06, + "loss": 0.0012, + "step": 35325 + }, + { + "epoch": 13.18, + "grad_norm": 0.09930529445409775, + "learning_rate": 6.498793969849246e-06, + "loss": 0.0011, + "step": 35350 + }, + { + "epoch": 13.19, + "grad_norm": 0.6352788805961609, + "learning_rate": 6.496281407035176e-06, + "loss": 0.0011, + "step": 35375 + }, + { + "epoch": 13.2, + "grad_norm": 0.1404682844877243, + "learning_rate": 6.493768844221106e-06, + "loss": 0.0009, + "step": 35400 + }, + { + "epoch": 13.21, + "grad_norm": 0.20068921148777008, + "learning_rate": 6.491256281407036e-06, + "loss": 0.0016, + "step": 35425 + }, + { + "epoch": 13.22, + "grad_norm": 0.40016403794288635, + "learning_rate": 6.488743718592966e-06, + "loss": 0.0012, + "step": 35450 + }, + { + "epoch": 13.23, + "grad_norm": 0.7893569469451904, + "learning_rate": 6.4862311557788945e-06, + "loss": 0.0013, + "step": 35475 + }, + { + "epoch": 13.24, + "grad_norm": 0.2302406132221222, + "learning_rate": 6.483718592964825e-06, + "loss": 0.0008, + "step": 35500 + }, + { + "epoch": 13.25, + "grad_norm": 0.4055044651031494, + "learning_rate": 6.481206030150754e-06, + "loss": 0.001, + "step": 35525 + }, + { + "epoch": 13.26, + "grad_norm": 0.28635749220848083, + "learning_rate": 6.478693467336684e-06, + "loss": 0.0012, + "step": 35550 + }, + { + "epoch": 13.26, + "grad_norm": 0.2158677875995636, + "learning_rate": 6.4761809045226135e-06, + "loss": 0.0009, + "step": 35575 + }, + { + "epoch": 13.27, + "grad_norm": 0.14932021498680115, + "learning_rate": 6.473668341708544e-06, + "loss": 0.0011, + "step": 35600 + }, + { + "epoch": 13.28, + "grad_norm": 0.6396030783653259, + "learning_rate": 6.471155778894474e-06, + "loss": 0.0008, + "step": 35625 + }, + { + "epoch": 13.29, + "grad_norm": 0.6853278875350952, + "learning_rate": 6.468643216080402e-06, + "loss": 0.001, + "step": 35650 + }, + { + "epoch": 13.3, + "grad_norm": 0.8591411113739014, + "learning_rate": 6.4661306532663325e-06, + "loss": 0.0014, + "step": 35675 + }, + { + "epoch": 13.31, + "grad_norm": 0.31352707743644714, + "learning_rate": 6.463618090452262e-06, + "loss": 0.0011, + "step": 35700 + }, + { + "epoch": 13.32, + "grad_norm": 0.18712899088859558, + "learning_rate": 6.461105527638192e-06, + "loss": 0.0013, + "step": 35725 + }, + { + "epoch": 13.33, + "grad_norm": 0.08204478025436401, + "learning_rate": 6.4585929648241205e-06, + "loss": 0.0011, + "step": 35750 + }, + { + "epoch": 13.34, + "grad_norm": 0.2320220023393631, + "learning_rate": 6.456080402010051e-06, + "loss": 0.0013, + "step": 35775 + }, + { + "epoch": 13.35, + "grad_norm": 0.5585671067237854, + "learning_rate": 6.45356783919598e-06, + "loss": 0.0014, + "step": 35800 + }, + { + "epoch": 13.36, + "grad_norm": 0.48822319507598877, + "learning_rate": 6.45105527638191e-06, + "loss": 0.0015, + "step": 35825 + }, + { + "epoch": 13.37, + "grad_norm": 1.76108980178833, + "learning_rate": 6.44854271356784e-06, + "loss": 0.0017, + "step": 35850 + }, + { + "epoch": 13.38, + "grad_norm": 0.4784335196018219, + "learning_rate": 6.44603015075377e-06, + "loss": 0.0013, + "step": 35875 + }, + { + "epoch": 13.39, + "grad_norm": 0.6539181470870972, + "learning_rate": 6.4435175879397e-06, + "loss": 0.0017, + "step": 35900 + }, + { + "epoch": 13.39, + "grad_norm": 0.5955649614334106, + "learning_rate": 6.441005025125628e-06, + "loss": 0.0014, + "step": 35925 + }, + { + "epoch": 13.4, + "grad_norm": 0.07756952196359634, + "learning_rate": 6.4384924623115584e-06, + "loss": 0.0011, + "step": 35950 + }, + { + "epoch": 13.41, + "grad_norm": 0.9986905455589294, + "learning_rate": 6.435979899497488e-06, + "loss": 0.0011, + "step": 35975 + }, + { + "epoch": 13.42, + "grad_norm": 1.0876482725143433, + "learning_rate": 6.433467336683418e-06, + "loss": 0.001, + "step": 36000 + }, + { + "epoch": 13.42, + "eval_loss": 0.1782969981431961, + "eval_runtime": 1203.9996, + "eval_samples_per_second": 1.174, + "eval_steps_per_second": 1.174, + "eval_wer": 11.604625740751395, + "step": 36000 + }, + { + "epoch": 13.43, + "grad_norm": 0.4122927784919739, + "learning_rate": 6.430954773869346e-06, + "loss": 0.0011, + "step": 36025 + }, + { + "epoch": 13.44, + "grad_norm": 0.6763662695884705, + "learning_rate": 6.428442211055277e-06, + "loss": 0.0009, + "step": 36050 + }, + { + "epoch": 13.45, + "grad_norm": 0.8228371143341064, + "learning_rate": 6.425929648241207e-06, + "loss": 0.0008, + "step": 36075 + }, + { + "epoch": 13.46, + "grad_norm": 0.6568008661270142, + "learning_rate": 6.423417085427136e-06, + "loss": 0.001, + "step": 36100 + }, + { + "epoch": 13.47, + "grad_norm": 0.8863239884376526, + "learning_rate": 6.420904522613066e-06, + "loss": 0.0011, + "step": 36125 + }, + { + "epoch": 13.48, + "grad_norm": 0.41321441531181335, + "learning_rate": 6.4183919597989956e-06, + "loss": 0.0014, + "step": 36150 + }, + { + "epoch": 13.49, + "grad_norm": 0.9094031453132629, + "learning_rate": 6.415879396984926e-06, + "loss": 0.0014, + "step": 36175 + }, + { + "epoch": 13.5, + "grad_norm": 0.4131356179714203, + "learning_rate": 6.413366834170854e-06, + "loss": 0.0012, + "step": 36200 + }, + { + "epoch": 13.51, + "grad_norm": 0.6198714375495911, + "learning_rate": 6.410954773869347e-06, + "loss": 0.0022, + "step": 36225 + }, + { + "epoch": 13.52, + "grad_norm": 0.26316049695014954, + "learning_rate": 6.408442211055277e-06, + "loss": 0.0011, + "step": 36250 + }, + { + "epoch": 13.53, + "grad_norm": 0.6077039241790771, + "learning_rate": 6.405929648241207e-06, + "loss": 0.0009, + "step": 36275 + }, + { + "epoch": 13.53, + "grad_norm": 1.0651239156723022, + "learning_rate": 6.403417085427136e-06, + "loss": 0.0014, + "step": 36300 + }, + { + "epoch": 13.54, + "grad_norm": 0.4114861786365509, + "learning_rate": 6.4009045226130664e-06, + "loss": 0.0013, + "step": 36325 + }, + { + "epoch": 13.55, + "grad_norm": 0.8641828298568726, + "learning_rate": 6.398391959798995e-06, + "loss": 0.0017, + "step": 36350 + }, + { + "epoch": 13.56, + "grad_norm": 0.9550549983978271, + "learning_rate": 6.395879396984925e-06, + "loss": 0.0009, + "step": 36375 + }, + { + "epoch": 13.57, + "grad_norm": 0.9393150210380554, + "learning_rate": 6.393366834170854e-06, + "loss": 0.0013, + "step": 36400 + }, + { + "epoch": 13.58, + "grad_norm": 1.1625839471817017, + "learning_rate": 6.390854271356785e-06, + "loss": 0.0011, + "step": 36425 + }, + { + "epoch": 13.59, + "grad_norm": 0.6878630518913269, + "learning_rate": 6.388341708542715e-06, + "loss": 0.0012, + "step": 36450 + }, + { + "epoch": 13.6, + "grad_norm": 0.8978285193443298, + "learning_rate": 6.385829145728644e-06, + "loss": 0.0011, + "step": 36475 + }, + { + "epoch": 13.61, + "grad_norm": 0.7464192509651184, + "learning_rate": 6.383316582914573e-06, + "loss": 0.0019, + "step": 36500 + }, + { + "epoch": 13.62, + "grad_norm": 0.7901182174682617, + "learning_rate": 6.380804020100503e-06, + "loss": 0.001, + "step": 36525 + }, + { + "epoch": 13.63, + "grad_norm": 0.25635117292404175, + "learning_rate": 6.378291457286433e-06, + "loss": 0.0008, + "step": 36550 + }, + { + "epoch": 13.64, + "grad_norm": 0.4737764000892639, + "learning_rate": 6.375778894472362e-06, + "loss": 0.001, + "step": 36575 + }, + { + "epoch": 13.65, + "grad_norm": 0.05818548426032066, + "learning_rate": 6.373266331658292e-06, + "loss": 0.0012, + "step": 36600 + }, + { + "epoch": 13.66, + "grad_norm": 0.5568153858184814, + "learning_rate": 6.370753768844221e-06, + "loss": 0.0013, + "step": 36625 + }, + { + "epoch": 13.67, + "grad_norm": 0.8913341164588928, + "learning_rate": 6.368241206030151e-06, + "loss": 0.0013, + "step": 36650 + }, + { + "epoch": 13.67, + "grad_norm": 0.1981651484966278, + "learning_rate": 6.365728643216081e-06, + "loss": 0.0014, + "step": 36675 + }, + { + "epoch": 13.68, + "grad_norm": 0.6624981164932251, + "learning_rate": 6.3632160804020105e-06, + "loss": 0.0012, + "step": 36700 + }, + { + "epoch": 13.69, + "grad_norm": 0.24827371537685394, + "learning_rate": 6.360703517587941e-06, + "loss": 0.0013, + "step": 36725 + }, + { + "epoch": 13.7, + "grad_norm": 0.07665925472974777, + "learning_rate": 6.35819095477387e-06, + "loss": 0.0012, + "step": 36750 + }, + { + "epoch": 13.71, + "grad_norm": 0.9796579480171204, + "learning_rate": 6.355678391959799e-06, + "loss": 0.0014, + "step": 36775 + }, + { + "epoch": 13.72, + "grad_norm": 0.6202231645584106, + "learning_rate": 6.353165829145729e-06, + "loss": 0.001, + "step": 36800 + }, + { + "epoch": 13.73, + "grad_norm": 0.744452714920044, + "learning_rate": 6.350653266331659e-06, + "loss": 0.0013, + "step": 36825 + }, + { + "epoch": 13.74, + "grad_norm": 0.6491627097129822, + "learning_rate": 6.348140703517588e-06, + "loss": 0.0016, + "step": 36850 + }, + { + "epoch": 13.75, + "grad_norm": 0.03990064561367035, + "learning_rate": 6.345628140703518e-06, + "loss": 0.0014, + "step": 36875 + }, + { + "epoch": 13.76, + "grad_norm": 0.09105639159679413, + "learning_rate": 6.3431155778894485e-06, + "loss": 0.001, + "step": 36900 + }, + { + "epoch": 13.77, + "grad_norm": 0.1807670295238495, + "learning_rate": 6.340603015075377e-06, + "loss": 0.0006, + "step": 36925 + }, + { + "epoch": 13.78, + "grad_norm": 0.8046282529830933, + "learning_rate": 6.338090452261307e-06, + "loss": 0.0009, + "step": 36950 + }, + { + "epoch": 13.79, + "grad_norm": 0.6118570566177368, + "learning_rate": 6.3355778894472365e-06, + "loss": 0.0014, + "step": 36975 + }, + { + "epoch": 13.8, + "grad_norm": 0.18531079590320587, + "learning_rate": 6.333065326633167e-06, + "loss": 0.0015, + "step": 37000 + }, + { + "epoch": 13.8, + "eval_loss": 0.17097821831703186, + "eval_runtime": 1190.5356, + "eval_samples_per_second": 1.187, + "eval_steps_per_second": 1.187, + "eval_wer": 11.581612105172315, + "step": 37000 + }, + { + "epoch": 13.8, + "grad_norm": 1.6004773378372192, + "learning_rate": 6.330552763819096e-06, + "loss": 0.0012, + "step": 37025 + }, + { + "epoch": 13.81, + "grad_norm": 0.3555765450000763, + "learning_rate": 6.328040201005025e-06, + "loss": 0.0016, + "step": 37050 + }, + { + "epoch": 13.82, + "grad_norm": 0.34806108474731445, + "learning_rate": 6.3255276381909555e-06, + "loss": 0.0013, + "step": 37075 + }, + { + "epoch": 13.83, + "grad_norm": 0.5066863298416138, + "learning_rate": 6.323015075376885e-06, + "loss": 0.0018, + "step": 37100 + }, + { + "epoch": 13.84, + "grad_norm": 0.10312606394290924, + "learning_rate": 6.320502512562815e-06, + "loss": 0.0022, + "step": 37125 + }, + { + "epoch": 13.85, + "grad_norm": 0.5354475975036621, + "learning_rate": 6.317989949748744e-06, + "loss": 0.0018, + "step": 37150 + }, + { + "epoch": 13.86, + "grad_norm": 0.970860481262207, + "learning_rate": 6.3154773869346744e-06, + "loss": 0.0017, + "step": 37175 + }, + { + "epoch": 13.87, + "grad_norm": 0.6313734650611877, + "learning_rate": 6.312964824120603e-06, + "loss": 0.0021, + "step": 37200 + }, + { + "epoch": 13.88, + "grad_norm": 0.3161217272281647, + "learning_rate": 6.310452261306533e-06, + "loss": 0.0016, + "step": 37225 + }, + { + "epoch": 13.89, + "grad_norm": 0.5601320266723633, + "learning_rate": 6.307939698492462e-06, + "loss": 0.0024, + "step": 37250 + }, + { + "epoch": 13.9, + "grad_norm": 0.7407007217407227, + "learning_rate": 6.305427135678393e-06, + "loss": 0.0019, + "step": 37275 + }, + { + "epoch": 13.91, + "grad_norm": 0.32824578881263733, + "learning_rate": 6.302914572864323e-06, + "loss": 0.0011, + "step": 37300 + }, + { + "epoch": 13.92, + "grad_norm": 0.8153936266899109, + "learning_rate": 6.300402010050251e-06, + "loss": 0.0015, + "step": 37325 + }, + { + "epoch": 13.93, + "grad_norm": 0.5731088519096375, + "learning_rate": 6.297889447236181e-06, + "loss": 0.0018, + "step": 37350 + }, + { + "epoch": 13.94, + "grad_norm": 0.21145498752593994, + "learning_rate": 6.295376884422111e-06, + "loss": 0.0012, + "step": 37375 + }, + { + "epoch": 13.94, + "grad_norm": 0.5084758996963501, + "learning_rate": 6.292864321608041e-06, + "loss": 0.001, + "step": 37400 + }, + { + "epoch": 13.95, + "grad_norm": 0.22225376963615417, + "learning_rate": 6.29035175879397e-06, + "loss": 0.001, + "step": 37425 + }, + { + "epoch": 13.96, + "grad_norm": 0.10228783637285233, + "learning_rate": 6.2878391959799e-06, + "loss": 0.0012, + "step": 37450 + }, + { + "epoch": 13.97, + "grad_norm": 0.27593836188316345, + "learning_rate": 6.285326633165829e-06, + "loss": 0.0018, + "step": 37475 + }, + { + "epoch": 13.98, + "grad_norm": 0.34096312522888184, + "learning_rate": 6.282814070351759e-06, + "loss": 0.0013, + "step": 37500 + }, + { + "epoch": 13.99, + "grad_norm": 0.24173103272914886, + "learning_rate": 6.280301507537689e-06, + "loss": 0.0014, + "step": 37525 + }, + { + "epoch": 14.0, + "grad_norm": 0.9781358242034912, + "learning_rate": 6.2777889447236185e-06, + "loss": 0.0017, + "step": 37550 + }, + { + "epoch": 14.01, + "grad_norm": 0.11558689922094345, + "learning_rate": 6.275276381909549e-06, + "loss": 0.0013, + "step": 37575 + }, + { + "epoch": 14.02, + "grad_norm": 0.14351317286491394, + "learning_rate": 6.272763819095478e-06, + "loss": 0.0007, + "step": 37600 + }, + { + "epoch": 14.03, + "grad_norm": 0.10039152204990387, + "learning_rate": 6.270251256281407e-06, + "loss": 0.001, + "step": 37625 + }, + { + "epoch": 14.04, + "grad_norm": 0.2775709629058838, + "learning_rate": 6.267738693467337e-06, + "loss": 0.0007, + "step": 37650 + }, + { + "epoch": 14.05, + "grad_norm": 0.2972264289855957, + "learning_rate": 6.265226130653267e-06, + "loss": 0.0011, + "step": 37675 + }, + { + "epoch": 14.06, + "grad_norm": 0.7654500603675842, + "learning_rate": 6.262713567839197e-06, + "loss": 0.0015, + "step": 37700 + }, + { + "epoch": 14.07, + "grad_norm": 0.2722413241863251, + "learning_rate": 6.260201005025126e-06, + "loss": 0.0011, + "step": 37725 + }, + { + "epoch": 14.08, + "grad_norm": 0.1063547283411026, + "learning_rate": 6.2576884422110565e-06, + "loss": 0.0012, + "step": 37750 + }, + { + "epoch": 14.08, + "grad_norm": 0.7396189570426941, + "learning_rate": 6.255175879396985e-06, + "loss": 0.0012, + "step": 37775 + }, + { + "epoch": 14.09, + "grad_norm": 0.1573249250650406, + "learning_rate": 6.252663316582915e-06, + "loss": 0.0009, + "step": 37800 + }, + { + "epoch": 14.1, + "grad_norm": 0.7803904414176941, + "learning_rate": 6.2501507537688445e-06, + "loss": 0.001, + "step": 37825 + }, + { + "epoch": 14.11, + "grad_norm": 0.4246111214160919, + "learning_rate": 6.247638190954775e-06, + "loss": 0.001, + "step": 37850 + }, + { + "epoch": 14.12, + "grad_norm": 0.07195685058832169, + "learning_rate": 6.245125628140704e-06, + "loss": 0.0012, + "step": 37875 + }, + { + "epoch": 14.13, + "grad_norm": 0.11486300826072693, + "learning_rate": 6.242613065326633e-06, + "loss": 0.0007, + "step": 37900 + }, + { + "epoch": 14.14, + "grad_norm": 0.11316592246294022, + "learning_rate": 6.2401005025125635e-06, + "loss": 0.0011, + "step": 37925 + }, + { + "epoch": 14.15, + "grad_norm": 0.2502623200416565, + "learning_rate": 6.237587939698493e-06, + "loss": 0.0011, + "step": 37950 + }, + { + "epoch": 14.16, + "grad_norm": 0.3339388072490692, + "learning_rate": 6.235075376884423e-06, + "loss": 0.0008, + "step": 37975 + }, + { + "epoch": 14.17, + "grad_norm": 0.05428297817707062, + "learning_rate": 6.232562814070352e-06, + "loss": 0.0013, + "step": 38000 + }, + { + "epoch": 14.17, + "eval_loss": 0.1706358790397644, + "eval_runtime": 1190.2393, + "eval_samples_per_second": 1.187, + "eval_steps_per_second": 1.187, + "eval_wer": 11.518324607329843, + "step": 38000 + }, + { + "epoch": 14.18, + "grad_norm": 0.9048689603805542, + "learning_rate": 6.2300502512562824e-06, + "loss": 0.0008, + "step": 38025 + }, + { + "epoch": 14.19, + "grad_norm": 0.41266894340515137, + "learning_rate": 6.227537688442211e-06, + "loss": 0.001, + "step": 38050 + }, + { + "epoch": 14.2, + "grad_norm": 0.377180814743042, + "learning_rate": 6.225025125628141e-06, + "loss": 0.0011, + "step": 38075 + }, + { + "epoch": 14.21, + "grad_norm": 0.028144003823399544, + "learning_rate": 6.22251256281407e-06, + "loss": 0.0006, + "step": 38100 + }, + { + "epoch": 14.22, + "grad_norm": 1.0385684967041016, + "learning_rate": 6.220000000000001e-06, + "loss": 0.0008, + "step": 38125 + }, + { + "epoch": 14.22, + "grad_norm": 0.2878379821777344, + "learning_rate": 6.217487437185931e-06, + "loss": 0.0007, + "step": 38150 + }, + { + "epoch": 14.23, + "grad_norm": 0.6393439769744873, + "learning_rate": 6.214974874371859e-06, + "loss": 0.0009, + "step": 38175 + }, + { + "epoch": 14.24, + "grad_norm": 0.1318587064743042, + "learning_rate": 6.212462311557789e-06, + "loss": 0.0009, + "step": 38200 + }, + { + "epoch": 14.25, + "grad_norm": 0.49657195806503296, + "learning_rate": 6.209949748743719e-06, + "loss": 0.0007, + "step": 38225 + }, + { + "epoch": 14.26, + "grad_norm": 0.5079365372657776, + "learning_rate": 6.207437185929649e-06, + "loss": 0.001, + "step": 38250 + }, + { + "epoch": 14.27, + "grad_norm": 0.07314042001962662, + "learning_rate": 6.204924623115578e-06, + "loss": 0.0016, + "step": 38275 + }, + { + "epoch": 14.28, + "grad_norm": 0.16204428672790527, + "learning_rate": 6.202412060301508e-06, + "loss": 0.0008, + "step": 38300 + }, + { + "epoch": 14.29, + "grad_norm": 0.0775744691491127, + "learning_rate": 6.1998994974874386e-06, + "loss": 0.0006, + "step": 38325 + }, + { + "epoch": 14.3, + "grad_norm": 0.3019218146800995, + "learning_rate": 6.197386934673367e-06, + "loss": 0.0006, + "step": 38350 + }, + { + "epoch": 14.31, + "grad_norm": 0.2738451361656189, + "learning_rate": 6.194874371859297e-06, + "loss": 0.001, + "step": 38375 + }, + { + "epoch": 14.32, + "grad_norm": 0.17580489814281464, + "learning_rate": 6.1923618090452265e-06, + "loss": 0.0009, + "step": 38400 + }, + { + "epoch": 14.33, + "grad_norm": 0.5389268398284912, + "learning_rate": 6.189849246231157e-06, + "loss": 0.0006, + "step": 38425 + }, + { + "epoch": 14.34, + "grad_norm": 0.23791654407978058, + "learning_rate": 6.187336683417085e-06, + "loss": 0.001, + "step": 38450 + }, + { + "epoch": 14.35, + "grad_norm": 0.7023106813430786, + "learning_rate": 6.184824120603015e-06, + "loss": 0.0009, + "step": 38475 + }, + { + "epoch": 14.35, + "grad_norm": 0.05425882712006569, + "learning_rate": 6.182311557788945e-06, + "loss": 0.0008, + "step": 38500 + }, + { + "epoch": 14.36, + "grad_norm": 0.12073127925395966, + "learning_rate": 6.179798994974875e-06, + "loss": 0.0011, + "step": 38525 + }, + { + "epoch": 14.37, + "grad_norm": 0.2712104320526123, + "learning_rate": 6.177286432160805e-06, + "loss": 0.0012, + "step": 38550 + }, + { + "epoch": 14.38, + "grad_norm": 0.8051292300224304, + "learning_rate": 6.174773869346734e-06, + "loss": 0.0008, + "step": 38575 + }, + { + "epoch": 14.39, + "grad_norm": 0.3506217896938324, + "learning_rate": 6.1722613065326645e-06, + "loss": 0.0007, + "step": 38600 + }, + { + "epoch": 14.4, + "grad_norm": 0.18524500727653503, + "learning_rate": 6.169748743718593e-06, + "loss": 0.0013, + "step": 38625 + }, + { + "epoch": 14.41, + "grad_norm": 0.46906226873397827, + "learning_rate": 6.167236180904523e-06, + "loss": 0.0016, + "step": 38650 + }, + { + "epoch": 14.42, + "grad_norm": 0.7011106014251709, + "learning_rate": 6.1647236180904525e-06, + "loss": 0.0013, + "step": 38675 + }, + { + "epoch": 14.43, + "grad_norm": 0.5965808629989624, + "learning_rate": 6.162211055276383e-06, + "loss": 0.0015, + "step": 38700 + }, + { + "epoch": 14.44, + "grad_norm": 0.8887078762054443, + "learning_rate": 6.159698492462312e-06, + "loss": 0.0012, + "step": 38725 + }, + { + "epoch": 14.45, + "grad_norm": 0.07892042398452759, + "learning_rate": 6.157185929648241e-06, + "loss": 0.0015, + "step": 38750 + }, + { + "epoch": 14.46, + "grad_norm": 0.37976914644241333, + "learning_rate": 6.1546733668341715e-06, + "loss": 0.0012, + "step": 38775 + }, + { + "epoch": 14.47, + "grad_norm": 0.8067272901535034, + "learning_rate": 6.152160804020101e-06, + "loss": 0.0012, + "step": 38800 + }, + { + "epoch": 14.48, + "grad_norm": 0.5488851070404053, + "learning_rate": 6.149648241206031e-06, + "loss": 0.0016, + "step": 38825 + }, + { + "epoch": 14.49, + "grad_norm": 0.4933125078678131, + "learning_rate": 6.14713567839196e-06, + "loss": 0.0012, + "step": 38850 + }, + { + "epoch": 14.49, + "grad_norm": 0.5089290738105774, + "learning_rate": 6.1446231155778904e-06, + "loss": 0.0014, + "step": 38875 + }, + { + "epoch": 14.5, + "grad_norm": 1.1275123357772827, + "learning_rate": 6.142110552763819e-06, + "loss": 0.0014, + "step": 38900 + }, + { + "epoch": 14.51, + "grad_norm": 0.30623140931129456, + "learning_rate": 6.139597989949749e-06, + "loss": 0.0013, + "step": 38925 + }, + { + "epoch": 14.52, + "grad_norm": 0.6667945981025696, + "learning_rate": 6.137085427135678e-06, + "loss": 0.0016, + "step": 38950 + }, + { + "epoch": 14.53, + "grad_norm": 0.46617773175239563, + "learning_rate": 6.134572864321609e-06, + "loss": 0.0013, + "step": 38975 + }, + { + "epoch": 14.54, + "grad_norm": 0.12305791676044464, + "learning_rate": 6.132060301507539e-06, + "loss": 0.0014, + "step": 39000 + }, + { + "epoch": 14.54, + "eval_loss": 0.1737467497587204, + "eval_runtime": 1188.0675, + "eval_samples_per_second": 1.189, + "eval_steps_per_second": 1.189, + "eval_wer": 11.478050745066453, + "step": 39000 + }, + { + "epoch": 14.55, + "grad_norm": 0.9932394027709961, + "learning_rate": 6.129547738693467e-06, + "loss": 0.0013, + "step": 39025 + }, + { + "epoch": 14.56, + "grad_norm": 0.9461562037467957, + "learning_rate": 6.127035175879397e-06, + "loss": 0.0012, + "step": 39050 + }, + { + "epoch": 14.57, + "grad_norm": 0.6811251044273376, + "learning_rate": 6.124522613065327e-06, + "loss": 0.0011, + "step": 39075 + }, + { + "epoch": 14.58, + "grad_norm": 0.9879390001296997, + "learning_rate": 6.122010050251257e-06, + "loss": 0.0009, + "step": 39100 + }, + { + "epoch": 14.59, + "grad_norm": 1.0414375066757202, + "learning_rate": 6.119497487437186e-06, + "loss": 0.0009, + "step": 39125 + }, + { + "epoch": 14.6, + "grad_norm": 1.1732712984085083, + "learning_rate": 6.116984924623116e-06, + "loss": 0.0013, + "step": 39150 + }, + { + "epoch": 14.61, + "grad_norm": 1.3034459352493286, + "learning_rate": 6.1144723618090466e-06, + "loss": 0.0014, + "step": 39175 + }, + { + "epoch": 14.62, + "grad_norm": 0.5251243710517883, + "learning_rate": 6.111959798994975e-06, + "loss": 0.0017, + "step": 39200 + }, + { + "epoch": 14.63, + "grad_norm": 0.521616518497467, + "learning_rate": 6.109447236180905e-06, + "loss": 0.0014, + "step": 39225 + }, + { + "epoch": 14.63, + "grad_norm": 0.42824500799179077, + "learning_rate": 6.1069346733668345e-06, + "loss": 0.001, + "step": 39250 + }, + { + "epoch": 14.64, + "grad_norm": 0.2933714985847473, + "learning_rate": 6.104422110552765e-06, + "loss": 0.001, + "step": 39275 + }, + { + "epoch": 14.65, + "grad_norm": 0.9360739588737488, + "learning_rate": 6.101909547738693e-06, + "loss": 0.0014, + "step": 39300 + }, + { + "epoch": 14.66, + "grad_norm": 0.41315758228302, + "learning_rate": 6.099396984924623e-06, + "loss": 0.0017, + "step": 39325 + }, + { + "epoch": 14.67, + "grad_norm": 0.2345711886882782, + "learning_rate": 6.096884422110553e-06, + "loss": 0.0011, + "step": 39350 + }, + { + "epoch": 14.68, + "grad_norm": 0.9606124758720398, + "learning_rate": 6.094371859296483e-06, + "loss": 0.0013, + "step": 39375 + }, + { + "epoch": 14.69, + "grad_norm": 0.6566662192344666, + "learning_rate": 6.091859296482413e-06, + "loss": 0.0016, + "step": 39400 + }, + { + "epoch": 14.7, + "grad_norm": 0.48041006922721863, + "learning_rate": 6.089346733668342e-06, + "loss": 0.0015, + "step": 39425 + }, + { + "epoch": 14.71, + "grad_norm": 0.813255786895752, + "learning_rate": 6.0868341708542725e-06, + "loss": 0.0016, + "step": 39450 + }, + { + "epoch": 14.72, + "grad_norm": 1.092101812362671, + "learning_rate": 6.084321608040201e-06, + "loss": 0.0014, + "step": 39475 + }, + { + "epoch": 14.73, + "grad_norm": 0.2466084361076355, + "learning_rate": 6.081809045226131e-06, + "loss": 0.0013, + "step": 39500 + }, + { + "epoch": 14.74, + "grad_norm": 0.08917827159166336, + "learning_rate": 6.0792964824120605e-06, + "loss": 0.0008, + "step": 39525 + }, + { + "epoch": 14.75, + "grad_norm": 0.0429447703063488, + "learning_rate": 6.076783919597991e-06, + "loss": 0.0015, + "step": 39550 + }, + { + "epoch": 14.76, + "grad_norm": 0.5480964183807373, + "learning_rate": 6.07427135678392e-06, + "loss": 0.0013, + "step": 39575 + }, + { + "epoch": 14.77, + "grad_norm": 0.8283253312110901, + "learning_rate": 6.071758793969849e-06, + "loss": 0.0009, + "step": 39600 + }, + { + "epoch": 14.77, + "grad_norm": 0.11995526403188705, + "learning_rate": 6.0692462311557795e-06, + "loss": 0.0008, + "step": 39625 + }, + { + "epoch": 14.78, + "grad_norm": 0.41361692547798157, + "learning_rate": 6.066733668341709e-06, + "loss": 0.0012, + "step": 39650 + }, + { + "epoch": 14.79, + "grad_norm": 0.3913096487522125, + "learning_rate": 6.064221105527639e-06, + "loss": 0.0012, + "step": 39675 + }, + { + "epoch": 14.8, + "grad_norm": 0.6406510472297668, + "learning_rate": 6.061708542713568e-06, + "loss": 0.0013, + "step": 39700 + }, + { + "epoch": 14.81, + "grad_norm": 1.1124719381332397, + "learning_rate": 6.0591959798994985e-06, + "loss": 0.0012, + "step": 39725 + }, + { + "epoch": 14.82, + "grad_norm": 0.2417456954717636, + "learning_rate": 6.056683417085427e-06, + "loss": 0.0015, + "step": 39750 + }, + { + "epoch": 14.83, + "grad_norm": 0.6809617280960083, + "learning_rate": 6.054170854271357e-06, + "loss": 0.0012, + "step": 39775 + }, + { + "epoch": 14.84, + "grad_norm": 0.7579650282859802, + "learning_rate": 6.051658291457287e-06, + "loss": 0.0008, + "step": 39800 + }, + { + "epoch": 14.85, + "grad_norm": 0.19817301630973816, + "learning_rate": 6.049145728643217e-06, + "loss": 0.0011, + "step": 39825 + }, + { + "epoch": 14.86, + "grad_norm": 0.612936794757843, + "learning_rate": 6.046633165829147e-06, + "loss": 0.0011, + "step": 39850 + }, + { + "epoch": 14.87, + "grad_norm": 1.1427934169769287, + "learning_rate": 6.044120603015075e-06, + "loss": 0.0012, + "step": 39875 + }, + { + "epoch": 14.88, + "grad_norm": 0.6669180989265442, + "learning_rate": 6.041608040201005e-06, + "loss": 0.0017, + "step": 39900 + }, + { + "epoch": 14.89, + "grad_norm": 0.9421991109848022, + "learning_rate": 6.039095477386935e-06, + "loss": 0.0011, + "step": 39925 + }, + { + "epoch": 14.9, + "grad_norm": 0.5347155928611755, + "learning_rate": 6.036582914572865e-06, + "loss": 0.0011, + "step": 39950 + }, + { + "epoch": 14.9, + "grad_norm": 1.3131002187728882, + "learning_rate": 6.034070351758794e-06, + "loss": 0.0018, + "step": 39975 + }, + { + "epoch": 14.91, + "grad_norm": 0.06937739998102188, + "learning_rate": 6.031557788944724e-06, + "loss": 0.0016, + "step": 40000 + }, + { + "epoch": 14.91, + "eval_loss": 0.1745300441980362, + "eval_runtime": 1199.2375, + "eval_samples_per_second": 1.178, + "eval_steps_per_second": 1.178, + "eval_wer": 11.581612105172315, + "step": 40000 + } + ], + "logging_steps": 25, + "max_steps": 100000, + "num_input_tokens_seen": 0, + "num_train_epochs": 38, + "save_steps": 1000, + "total_flos": 3.692923477150925e+20, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/whisper-small/chattisgarhi/checkpoint-40000/training_args.bin b/checkpoints/whisper-small/chattisgarhi/checkpoint-40000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..cc4c8920f96adf9219daae9ac7e09dc034a6778e --- /dev/null +++ b/checkpoints/whisper-small/chattisgarhi/checkpoint-40000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73ccecf69ab414b25f2edaabbb4cb92699a814d521b3e48f3c0ea1559e9eaf2e +size 4667 diff --git a/checkpoints/whisper-small/hindi/checkpoint-15000/config.json b/checkpoints/whisper-small/hindi/checkpoint-15000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8cb289bfc120ad77c5505b0ef210c56bf35075f5 --- /dev/null +++ b/checkpoints/whisper-small/hindi/checkpoint-15000/config.json @@ -0,0 +1,152 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50276 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50360, + 50361, + 50362 + ], + "torch_dtype": "float32", + "transformers_version": "4.39.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoints/whisper-small/hindi/checkpoint-15000/generation_config.json b/checkpoints/whisper-small/hindi/checkpoint-15000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7e9b1a3e3b5fb8d88730860d2b25f6cd310962c7 --- /dev/null +++ b/checkpoints/whisper-small/hindi/checkpoint-15000/generation_config.json @@ -0,0 +1,264 @@ +{ + "alignment_heads": [ + [ + 5, + 3 + ], + [ + 5, + 9 + ], + [ + 8, + 0 + ], + [ + 8, + 4 + ], + [ + 8, + 7 + ], + [ + 8, + 8 + ], + [ + 9, + 0 + ], + [ + 9, + 7 + ], + [ + 9, + 9 + ], + [ + 10, + 5 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.39.2" +} diff --git a/checkpoints/whisper-small/hindi/checkpoint-15000/model.safetensors b/checkpoints/whisper-small/hindi/checkpoint-15000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ad3cd8326aa8bf10d11ba3e9f6b7294d4bea729a --- /dev/null +++ b/checkpoints/whisper-small/hindi/checkpoint-15000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05cce46c771dc8abb7f588590342524d547dcd3ae057a6c71f3067de6023c019 +size 966995080 diff --git a/checkpoints/whisper-small/hindi/checkpoint-15000/optimizer.pt b/checkpoints/whisper-small/hindi/checkpoint-15000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a29642e7dabec22bd836c78039359403f9ba6a9d --- /dev/null +++ b/checkpoints/whisper-small/hindi/checkpoint-15000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68f317e9ddd6da67b80ce82f3e3589c5de3966b22a48e913fab6bd031544b4bd +size 1925063607 diff --git a/checkpoints/whisper-small/hindi/checkpoint-15000/preprocessor_config.json b/checkpoints/whisper-small/hindi/checkpoint-15000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoints/whisper-small/hindi/checkpoint-15000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoints/whisper-small/hindi/checkpoint-15000/rng_state.pth b/checkpoints/whisper-small/hindi/checkpoint-15000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..6302a84b7b0b5ae431bbdfa6977bb2a22fe4beef --- /dev/null +++ b/checkpoints/whisper-small/hindi/checkpoint-15000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9d54ee2319da41ddef4044e5701f20f5505a127b57f1e6116855202c33fdeaf +size 14575 diff --git a/checkpoints/whisper-small/hindi/checkpoint-15000/scheduler.pt b/checkpoints/whisper-small/hindi/checkpoint-15000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7d37c3ac63af5080ec4146ece6e0c07b0544e114 --- /dev/null +++ b/checkpoints/whisper-small/hindi/checkpoint-15000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aed43d14cb2aef9875751e24a05d0321e5f21c7115fee27e01e99805122eb363 +size 627 diff --git a/checkpoints/whisper-small/hindi/checkpoint-15000/trainer_state.json b/checkpoints/whisper-small/hindi/checkpoint-15000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5a61baf49d2a55ba6f032b4fc325246c9f7cd47c --- /dev/null +++ b/checkpoints/whisper-small/hindi/checkpoint-15000/trainer_state.json @@ -0,0 +1,4356 @@ +{ + "best_metric": 11.442262739597943, + "best_model_checkpoint": "results/whisper-small/hindi/checkpoint-5000", + "epoch": 5.592841163310962, + "eval_steps": 1000, + "global_step": 15000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "grad_norm": 42.36537551879883, + "learning_rate": 4.4e-07, + "loss": 1.9326, + "step": 25 + }, + { + "epoch": 0.02, + "grad_norm": 11.799786567687988, + "learning_rate": 9.400000000000001e-07, + "loss": 1.5414, + "step": 50 + }, + { + "epoch": 0.03, + "grad_norm": 7.8190083503723145, + "learning_rate": 1.44e-06, + "loss": 1.079, + "step": 75 + }, + { + "epoch": 0.04, + "grad_norm": 5.357276916503906, + "learning_rate": 1.94e-06, + "loss": 0.7971, + "step": 100 + }, + { + "epoch": 0.05, + "grad_norm": 5.096272945404053, + "learning_rate": 2.4400000000000004e-06, + "loss": 0.6774, + "step": 125 + }, + { + "epoch": 0.06, + "grad_norm": 5.176544189453125, + "learning_rate": 2.9400000000000002e-06, + "loss": 0.6132, + "step": 150 + }, + { + "epoch": 0.07, + "grad_norm": 4.91416072845459, + "learning_rate": 3.44e-06, + "loss": 0.5613, + "step": 175 + }, + { + "epoch": 0.07, + "grad_norm": 5.431929111480713, + "learning_rate": 3.94e-06, + "loss": 0.5254, + "step": 200 + }, + { + "epoch": 0.08, + "grad_norm": 4.973642349243164, + "learning_rate": 4.440000000000001e-06, + "loss": 0.4676, + "step": 225 + }, + { + "epoch": 0.09, + "grad_norm": 5.493452072143555, + "learning_rate": 4.94e-06, + "loss": 0.4497, + "step": 250 + }, + { + "epoch": 0.1, + "grad_norm": 4.688079357147217, + "learning_rate": 5.4400000000000004e-06, + "loss": 0.3946, + "step": 275 + }, + { + "epoch": 0.11, + "grad_norm": 5.4174628257751465, + "learning_rate": 5.94e-06, + "loss": 0.3586, + "step": 300 + }, + { + "epoch": 0.12, + "grad_norm": 4.307477951049805, + "learning_rate": 6.440000000000001e-06, + "loss": 0.3177, + "step": 325 + }, + { + "epoch": 0.13, + "grad_norm": 3.377976655960083, + "learning_rate": 6.9400000000000005e-06, + "loss": 0.2832, + "step": 350 + }, + { + "epoch": 0.14, + "grad_norm": 3.681701898574829, + "learning_rate": 7.440000000000001e-06, + "loss": 0.2599, + "step": 375 + }, + { + "epoch": 0.15, + "grad_norm": 3.25416898727417, + "learning_rate": 7.94e-06, + "loss": 0.2528, + "step": 400 + }, + { + "epoch": 0.16, + "grad_norm": 3.107332706451416, + "learning_rate": 8.44e-06, + "loss": 0.2429, + "step": 425 + }, + { + "epoch": 0.17, + "grad_norm": 3.4654228687286377, + "learning_rate": 8.94e-06, + "loss": 0.2311, + "step": 450 + }, + { + "epoch": 0.18, + "grad_norm": 3.624549150466919, + "learning_rate": 9.440000000000001e-06, + "loss": 0.2317, + "step": 475 + }, + { + "epoch": 0.19, + "grad_norm": 3.4001588821411133, + "learning_rate": 9.940000000000001e-06, + "loss": 0.2191, + "step": 500 + }, + { + "epoch": 0.2, + "grad_norm": 3.214857339859009, + "learning_rate": 9.997788944723618e-06, + "loss": 0.2116, + "step": 525 + }, + { + "epoch": 0.21, + "grad_norm": 3.4731991291046143, + "learning_rate": 9.99527638190955e-06, + "loss": 0.2003, + "step": 550 + }, + { + "epoch": 0.21, + "grad_norm": 3.6737163066864014, + "learning_rate": 9.992763819095477e-06, + "loss": 0.1979, + "step": 575 + }, + { + "epoch": 0.22, + "grad_norm": 2.9863927364349365, + "learning_rate": 9.990251256281408e-06, + "loss": 0.1992, + "step": 600 + }, + { + "epoch": 0.23, + "grad_norm": 3.910396099090576, + "learning_rate": 9.987738693467337e-06, + "loss": 0.1904, + "step": 625 + }, + { + "epoch": 0.24, + "grad_norm": 3.3716039657592773, + "learning_rate": 9.985226130653267e-06, + "loss": 0.1863, + "step": 650 + }, + { + "epoch": 0.25, + "grad_norm": 2.951724052429199, + "learning_rate": 9.982713567839198e-06, + "loss": 0.1786, + "step": 675 + }, + { + "epoch": 0.26, + "grad_norm": 3.85339617729187, + "learning_rate": 9.980201005025127e-06, + "loss": 0.1866, + "step": 700 + }, + { + "epoch": 0.27, + "grad_norm": 2.7808704376220703, + "learning_rate": 9.977688442211056e-06, + "loss": 0.1787, + "step": 725 + }, + { + "epoch": 0.28, + "grad_norm": 2.9116945266723633, + "learning_rate": 9.975175879396986e-06, + "loss": 0.1693, + "step": 750 + }, + { + "epoch": 0.29, + "grad_norm": 3.1178367137908936, + "learning_rate": 9.972663316582915e-06, + "loss": 0.1753, + "step": 775 + }, + { + "epoch": 0.3, + "grad_norm": 2.8967998027801514, + "learning_rate": 9.970150753768844e-06, + "loss": 0.1678, + "step": 800 + }, + { + "epoch": 0.31, + "grad_norm": 3.3396878242492676, + "learning_rate": 9.967638190954775e-06, + "loss": 0.1641, + "step": 825 + }, + { + "epoch": 0.32, + "grad_norm": 2.955113172531128, + "learning_rate": 9.965125628140703e-06, + "loss": 0.1608, + "step": 850 + }, + { + "epoch": 0.33, + "grad_norm": 2.6730570793151855, + "learning_rate": 9.962613065326634e-06, + "loss": 0.1669, + "step": 875 + }, + { + "epoch": 0.34, + "grad_norm": 2.6133692264556885, + "learning_rate": 9.960100502512563e-06, + "loss": 0.1558, + "step": 900 + }, + { + "epoch": 0.34, + "grad_norm": 3.199889898300171, + "learning_rate": 9.957587939698493e-06, + "loss": 0.1535, + "step": 925 + }, + { + "epoch": 0.35, + "grad_norm": 2.7858588695526123, + "learning_rate": 9.955075376884424e-06, + "loss": 0.1481, + "step": 950 + }, + { + "epoch": 0.36, + "grad_norm": 2.809863805770874, + "learning_rate": 9.952562814070353e-06, + "loss": 0.1515, + "step": 975 + }, + { + "epoch": 0.37, + "grad_norm": 3.139817237854004, + "learning_rate": 9.950050251256282e-06, + "loss": 0.1541, + "step": 1000 + }, + { + "epoch": 0.37, + "eval_loss": 0.12340465933084488, + "eval_runtime": 1627.5048, + "eval_samples_per_second": 0.946, + "eval_steps_per_second": 0.946, + "eval_wer": 17.6367461430575, + "step": 1000 + }, + { + "epoch": 0.38, + "grad_norm": 3.355046272277832, + "learning_rate": 9.947537688442212e-06, + "loss": 0.151, + "step": 1025 + }, + { + "epoch": 0.39, + "grad_norm": 2.616912841796875, + "learning_rate": 9.945025125628141e-06, + "loss": 0.1459, + "step": 1050 + }, + { + "epoch": 0.4, + "grad_norm": 3.1603734493255615, + "learning_rate": 9.94251256281407e-06, + "loss": 0.1397, + "step": 1075 + }, + { + "epoch": 0.41, + "grad_norm": 2.615542411804199, + "learning_rate": 9.940000000000001e-06, + "loss": 0.1421, + "step": 1100 + }, + { + "epoch": 0.42, + "grad_norm": 2.8536531925201416, + "learning_rate": 9.93748743718593e-06, + "loss": 0.1365, + "step": 1125 + }, + { + "epoch": 0.43, + "grad_norm": 2.297004222869873, + "learning_rate": 9.93497487437186e-06, + "loss": 0.138, + "step": 1150 + }, + { + "epoch": 0.44, + "grad_norm": 2.5383195877075195, + "learning_rate": 9.93246231155779e-06, + "loss": 0.1307, + "step": 1175 + }, + { + "epoch": 0.45, + "grad_norm": 2.4351696968078613, + "learning_rate": 9.929949748743719e-06, + "loss": 0.135, + "step": 1200 + }, + { + "epoch": 0.46, + "grad_norm": 2.8478033542633057, + "learning_rate": 9.92743718592965e-06, + "loss": 0.1347, + "step": 1225 + }, + { + "epoch": 0.47, + "grad_norm": 2.3951833248138428, + "learning_rate": 9.924924623115579e-06, + "loss": 0.1335, + "step": 1250 + }, + { + "epoch": 0.48, + "grad_norm": 2.3995583057403564, + "learning_rate": 9.922412060301508e-06, + "loss": 0.134, + "step": 1275 + }, + { + "epoch": 0.48, + "grad_norm": 2.850933074951172, + "learning_rate": 9.91989949748744e-06, + "loss": 0.1333, + "step": 1300 + }, + { + "epoch": 0.49, + "grad_norm": 2.3719377517700195, + "learning_rate": 9.917386934673367e-06, + "loss": 0.124, + "step": 1325 + }, + { + "epoch": 0.5, + "grad_norm": 2.802485227584839, + "learning_rate": 9.914874371859298e-06, + "loss": 0.1256, + "step": 1350 + }, + { + "epoch": 0.51, + "grad_norm": 3.1516923904418945, + "learning_rate": 9.912361809045227e-06, + "loss": 0.1268, + "step": 1375 + }, + { + "epoch": 0.52, + "grad_norm": 2.585190773010254, + "learning_rate": 9.909849246231157e-06, + "loss": 0.128, + "step": 1400 + }, + { + "epoch": 0.53, + "grad_norm": 2.4954349994659424, + "learning_rate": 9.907336683417086e-06, + "loss": 0.1231, + "step": 1425 + }, + { + "epoch": 0.54, + "grad_norm": 2.8973469734191895, + "learning_rate": 9.904824120603015e-06, + "loss": 0.1193, + "step": 1450 + }, + { + "epoch": 0.55, + "grad_norm": 2.7043771743774414, + "learning_rate": 9.902311557788945e-06, + "loss": 0.12, + "step": 1475 + }, + { + "epoch": 0.56, + "grad_norm": 2.3871688842773438, + "learning_rate": 9.899798994974876e-06, + "loss": 0.1185, + "step": 1500 + }, + { + "epoch": 0.57, + "grad_norm": 2.6781187057495117, + "learning_rate": 9.897286432160805e-06, + "loss": 0.1132, + "step": 1525 + }, + { + "epoch": 0.58, + "grad_norm": 2.2694790363311768, + "learning_rate": 9.894773869346734e-06, + "loss": 0.1174, + "step": 1550 + }, + { + "epoch": 0.59, + "grad_norm": 2.536362648010254, + "learning_rate": 9.892261306532665e-06, + "loss": 0.119, + "step": 1575 + }, + { + "epoch": 0.6, + "grad_norm": 2.4464328289031982, + "learning_rate": 9.889748743718593e-06, + "loss": 0.1201, + "step": 1600 + }, + { + "epoch": 0.61, + "grad_norm": 2.339357614517212, + "learning_rate": 9.887236180904524e-06, + "loss": 0.1148, + "step": 1625 + }, + { + "epoch": 0.62, + "grad_norm": 2.885040760040283, + "learning_rate": 9.884723618090453e-06, + "loss": 0.1135, + "step": 1650 + }, + { + "epoch": 0.62, + "grad_norm": 2.481039047241211, + "learning_rate": 9.882211055276383e-06, + "loss": 0.1122, + "step": 1675 + }, + { + "epoch": 0.63, + "grad_norm": 2.4352877140045166, + "learning_rate": 9.879698492462312e-06, + "loss": 0.1134, + "step": 1700 + }, + { + "epoch": 0.64, + "grad_norm": 2.7708117961883545, + "learning_rate": 9.877185929648241e-06, + "loss": 0.1096, + "step": 1725 + }, + { + "epoch": 0.65, + "grad_norm": 2.27917218208313, + "learning_rate": 9.874673366834172e-06, + "loss": 0.1092, + "step": 1750 + }, + { + "epoch": 0.66, + "grad_norm": 2.4441990852355957, + "learning_rate": 9.872160804020102e-06, + "loss": 0.1116, + "step": 1775 + }, + { + "epoch": 0.67, + "grad_norm": 2.4655399322509766, + "learning_rate": 9.869648241206031e-06, + "loss": 0.1084, + "step": 1800 + }, + { + "epoch": 0.68, + "grad_norm": 2.45590877532959, + "learning_rate": 9.86713567839196e-06, + "loss": 0.1079, + "step": 1825 + }, + { + "epoch": 0.69, + "grad_norm": 2.7584080696105957, + "learning_rate": 9.864623115577891e-06, + "loss": 0.1043, + "step": 1850 + }, + { + "epoch": 0.7, + "grad_norm": 2.7407121658325195, + "learning_rate": 9.862110552763819e-06, + "loss": 0.1041, + "step": 1875 + }, + { + "epoch": 0.71, + "grad_norm": 2.306201696395874, + "learning_rate": 9.85959798994975e-06, + "loss": 0.1096, + "step": 1900 + }, + { + "epoch": 0.72, + "grad_norm": 2.5924527645111084, + "learning_rate": 9.85708542713568e-06, + "loss": 0.1088, + "step": 1925 + }, + { + "epoch": 0.73, + "grad_norm": 2.1319239139556885, + "learning_rate": 9.854572864321609e-06, + "loss": 0.1041, + "step": 1950 + }, + { + "epoch": 0.74, + "grad_norm": 2.9542977809906006, + "learning_rate": 9.85206030150754e-06, + "loss": 0.1012, + "step": 1975 + }, + { + "epoch": 0.75, + "grad_norm": 2.4561843872070312, + "learning_rate": 9.849547738693467e-06, + "loss": 0.1038, + "step": 2000 + }, + { + "epoch": 0.75, + "eval_loss": 0.0937967598438263, + "eval_runtime": 1341.7486, + "eval_samples_per_second": 1.147, + "eval_steps_per_second": 1.147, + "eval_wer": 14.177185600748013, + "step": 2000 + }, + { + "epoch": 0.76, + "grad_norm": 2.2162787914276123, + "learning_rate": 9.847035175879398e-06, + "loss": 0.1064, + "step": 2025 + }, + { + "epoch": 0.76, + "grad_norm": 2.2295587062835693, + "learning_rate": 9.844522613065328e-06, + "loss": 0.1057, + "step": 2050 + }, + { + "epoch": 0.77, + "grad_norm": 2.446345567703247, + "learning_rate": 9.842010050251257e-06, + "loss": 0.0987, + "step": 2075 + }, + { + "epoch": 0.78, + "grad_norm": 2.5574097633361816, + "learning_rate": 9.839497487437186e-06, + "loss": 0.0999, + "step": 2100 + }, + { + "epoch": 0.79, + "grad_norm": 2.475861072540283, + "learning_rate": 9.836984924623117e-06, + "loss": 0.1003, + "step": 2125 + }, + { + "epoch": 0.8, + "grad_norm": 2.487347364425659, + "learning_rate": 9.834472361809047e-06, + "loss": 0.0985, + "step": 2150 + }, + { + "epoch": 0.81, + "grad_norm": 2.2822072505950928, + "learning_rate": 9.831959798994976e-06, + "loss": 0.1021, + "step": 2175 + }, + { + "epoch": 0.82, + "grad_norm": 2.432718276977539, + "learning_rate": 9.829447236180905e-06, + "loss": 0.1013, + "step": 2200 + }, + { + "epoch": 0.83, + "grad_norm": 2.2421412467956543, + "learning_rate": 9.826934673366834e-06, + "loss": 0.0987, + "step": 2225 + }, + { + "epoch": 0.84, + "grad_norm": 2.462313175201416, + "learning_rate": 9.824422110552766e-06, + "loss": 0.1004, + "step": 2250 + }, + { + "epoch": 0.85, + "grad_norm": 2.2158570289611816, + "learning_rate": 9.821909547738693e-06, + "loss": 0.0971, + "step": 2275 + }, + { + "epoch": 0.86, + "grad_norm": 2.201918125152588, + "learning_rate": 9.819396984924624e-06, + "loss": 0.0965, + "step": 2300 + }, + { + "epoch": 0.87, + "grad_norm": 2.2607669830322266, + "learning_rate": 9.816884422110553e-06, + "loss": 0.0985, + "step": 2325 + }, + { + "epoch": 0.88, + "grad_norm": 2.4958341121673584, + "learning_rate": 9.814371859296483e-06, + "loss": 0.0941, + "step": 2350 + }, + { + "epoch": 0.89, + "grad_norm": 2.724621295928955, + "learning_rate": 9.811859296482414e-06, + "loss": 0.0981, + "step": 2375 + }, + { + "epoch": 0.89, + "grad_norm": 1.9915828704833984, + "learning_rate": 9.809346733668343e-06, + "loss": 0.0941, + "step": 2400 + }, + { + "epoch": 0.9, + "grad_norm": 2.2708346843719482, + "learning_rate": 9.806834170854272e-06, + "loss": 0.0867, + "step": 2425 + }, + { + "epoch": 0.91, + "grad_norm": 2.277890205383301, + "learning_rate": 9.804321608040202e-06, + "loss": 0.0946, + "step": 2450 + }, + { + "epoch": 0.92, + "grad_norm": 2.1180121898651123, + "learning_rate": 9.801809045226131e-06, + "loss": 0.0905, + "step": 2475 + }, + { + "epoch": 0.93, + "grad_norm": 2.430920362472534, + "learning_rate": 9.79929648241206e-06, + "loss": 0.1008, + "step": 2500 + }, + { + "epoch": 0.94, + "grad_norm": 2.246851682662964, + "learning_rate": 9.796783919597991e-06, + "loss": 0.0905, + "step": 2525 + }, + { + "epoch": 0.95, + "grad_norm": 2.147915840148926, + "learning_rate": 9.79427135678392e-06, + "loss": 0.0948, + "step": 2550 + }, + { + "epoch": 0.96, + "grad_norm": 2.084587335586548, + "learning_rate": 9.79175879396985e-06, + "loss": 0.0981, + "step": 2575 + }, + { + "epoch": 0.97, + "grad_norm": 2.3861725330352783, + "learning_rate": 9.78924623115578e-06, + "loss": 0.0916, + "step": 2600 + }, + { + "epoch": 0.98, + "grad_norm": 2.7367637157440186, + "learning_rate": 9.786733668341709e-06, + "loss": 0.0959, + "step": 2625 + }, + { + "epoch": 0.99, + "grad_norm": 1.803769588470459, + "learning_rate": 9.78422110552764e-06, + "loss": 0.0861, + "step": 2650 + }, + { + "epoch": 1.0, + "grad_norm": 2.1459879875183105, + "learning_rate": 9.781708542713569e-06, + "loss": 0.0912, + "step": 2675 + }, + { + "epoch": 1.01, + "grad_norm": 1.5662623643875122, + "learning_rate": 9.779195979899498e-06, + "loss": 0.0731, + "step": 2700 + }, + { + "epoch": 1.02, + "grad_norm": 1.8727729320526123, + "learning_rate": 9.776683417085428e-06, + "loss": 0.0639, + "step": 2725 + }, + { + "epoch": 1.03, + "grad_norm": 1.4986780881881714, + "learning_rate": 9.774170854271357e-06, + "loss": 0.0641, + "step": 2750 + }, + { + "epoch": 1.03, + "grad_norm": 1.8644243478775024, + "learning_rate": 9.771658291457288e-06, + "loss": 0.0623, + "step": 2775 + }, + { + "epoch": 1.04, + "grad_norm": 1.840723991394043, + "learning_rate": 9.769145728643217e-06, + "loss": 0.0631, + "step": 2800 + }, + { + "epoch": 1.05, + "grad_norm": 2.1584184169769287, + "learning_rate": 9.766633165829147e-06, + "loss": 0.0628, + "step": 2825 + }, + { + "epoch": 1.06, + "grad_norm": 1.919477939605713, + "learning_rate": 9.764120603015076e-06, + "loss": 0.0639, + "step": 2850 + }, + { + "epoch": 1.07, + "grad_norm": 1.8495159149169922, + "learning_rate": 9.761608040201005e-06, + "loss": 0.0666, + "step": 2875 + }, + { + "epoch": 1.08, + "grad_norm": 1.8795503377914429, + "learning_rate": 9.759095477386935e-06, + "loss": 0.0608, + "step": 2900 + }, + { + "epoch": 1.09, + "grad_norm": 2.77213454246521, + "learning_rate": 9.756582914572866e-06, + "loss": 0.0634, + "step": 2925 + }, + { + "epoch": 1.1, + "grad_norm": 1.7321393489837646, + "learning_rate": 9.754070351758795e-06, + "loss": 0.059, + "step": 2950 + }, + { + "epoch": 1.11, + "grad_norm": 1.782431721687317, + "learning_rate": 9.751557788944724e-06, + "loss": 0.0595, + "step": 2975 + }, + { + "epoch": 1.12, + "grad_norm": 1.6734726428985596, + "learning_rate": 9.749045226130654e-06, + "loss": 0.0647, + "step": 3000 + }, + { + "epoch": 1.12, + "eval_loss": 0.08574168384075165, + "eval_runtime": 1335.588, + "eval_samples_per_second": 1.152, + "eval_steps_per_second": 1.152, + "eval_wer": 12.616877045348293, + "step": 3000 + }, + { + "epoch": 1.13, + "grad_norm": 1.8858898878097534, + "learning_rate": 9.746532663316583e-06, + "loss": 0.0604, + "step": 3025 + }, + { + "epoch": 1.14, + "grad_norm": 2.0081357955932617, + "learning_rate": 9.744020100502514e-06, + "loss": 0.0546, + "step": 3050 + }, + { + "epoch": 1.15, + "grad_norm": 2.1094696521759033, + "learning_rate": 9.741507537688443e-06, + "loss": 0.0615, + "step": 3075 + }, + { + "epoch": 1.16, + "grad_norm": 1.7623571157455444, + "learning_rate": 9.738994974874373e-06, + "loss": 0.0623, + "step": 3100 + }, + { + "epoch": 1.17, + "grad_norm": 1.5311427116394043, + "learning_rate": 9.736482412060302e-06, + "loss": 0.0592, + "step": 3125 + }, + { + "epoch": 1.17, + "grad_norm": 1.8093671798706055, + "learning_rate": 9.733969849246231e-06, + "loss": 0.0623, + "step": 3150 + }, + { + "epoch": 1.18, + "grad_norm": 2.207470655441284, + "learning_rate": 9.731457286432162e-06, + "loss": 0.066, + "step": 3175 + }, + { + "epoch": 1.19, + "grad_norm": 2.0668675899505615, + "learning_rate": 9.728944723618092e-06, + "loss": 0.0592, + "step": 3200 + }, + { + "epoch": 1.2, + "grad_norm": 1.8952662944793701, + "learning_rate": 9.726432160804021e-06, + "loss": 0.064, + "step": 3225 + }, + { + "epoch": 1.21, + "grad_norm": 2.031898021697998, + "learning_rate": 9.72391959798995e-06, + "loss": 0.0623, + "step": 3250 + }, + { + "epoch": 1.22, + "grad_norm": 1.7648451328277588, + "learning_rate": 9.721407035175881e-06, + "loss": 0.0634, + "step": 3275 + }, + { + "epoch": 1.23, + "grad_norm": 2.5269434452056885, + "learning_rate": 9.718894472361809e-06, + "loss": 0.0639, + "step": 3300 + }, + { + "epoch": 1.24, + "grad_norm": 2.2838757038116455, + "learning_rate": 9.71638190954774e-06, + "loss": 0.0584, + "step": 3325 + }, + { + "epoch": 1.25, + "grad_norm": 1.8754956722259521, + "learning_rate": 9.71386934673367e-06, + "loss": 0.063, + "step": 3350 + }, + { + "epoch": 1.26, + "grad_norm": 1.8852887153625488, + "learning_rate": 9.711356783919599e-06, + "loss": 0.0598, + "step": 3375 + }, + { + "epoch": 1.27, + "grad_norm": 1.7666773796081543, + "learning_rate": 9.70884422110553e-06, + "loss": 0.0567, + "step": 3400 + }, + { + "epoch": 1.28, + "grad_norm": 1.6645004749298096, + "learning_rate": 9.706331658291457e-06, + "loss": 0.0578, + "step": 3425 + }, + { + "epoch": 1.29, + "grad_norm": 1.5022914409637451, + "learning_rate": 9.703819095477388e-06, + "loss": 0.057, + "step": 3450 + }, + { + "epoch": 1.3, + "grad_norm": 1.6727757453918457, + "learning_rate": 9.701306532663318e-06, + "loss": 0.0601, + "step": 3475 + }, + { + "epoch": 1.3, + "grad_norm": 2.136777877807617, + "learning_rate": 9.698793969849247e-06, + "loss": 0.061, + "step": 3500 + }, + { + "epoch": 1.31, + "grad_norm": 2.248758316040039, + "learning_rate": 9.696281407035176e-06, + "loss": 0.0611, + "step": 3525 + }, + { + "epoch": 1.32, + "grad_norm": 1.8014044761657715, + "learning_rate": 9.693768844221107e-06, + "loss": 0.0588, + "step": 3550 + }, + { + "epoch": 1.33, + "grad_norm": 1.6692901849746704, + "learning_rate": 9.691256281407035e-06, + "loss": 0.0548, + "step": 3575 + }, + { + "epoch": 1.34, + "grad_norm": 1.7921254634857178, + "learning_rate": 9.688743718592966e-06, + "loss": 0.0552, + "step": 3600 + }, + { + "epoch": 1.35, + "grad_norm": 1.8720943927764893, + "learning_rate": 9.686231155778895e-06, + "loss": 0.0547, + "step": 3625 + }, + { + "epoch": 1.36, + "grad_norm": 2.383504867553711, + "learning_rate": 9.683718592964825e-06, + "loss": 0.0595, + "step": 3650 + }, + { + "epoch": 1.37, + "grad_norm": 1.4674872159957886, + "learning_rate": 9.681206030150756e-06, + "loss": 0.0538, + "step": 3675 + }, + { + "epoch": 1.38, + "grad_norm": 2.11032772064209, + "learning_rate": 9.678693467336683e-06, + "loss": 0.0578, + "step": 3700 + }, + { + "epoch": 1.39, + "grad_norm": 1.5534149408340454, + "learning_rate": 9.676180904522614e-06, + "loss": 0.0586, + "step": 3725 + }, + { + "epoch": 1.4, + "grad_norm": 2.2164382934570312, + "learning_rate": 9.673668341708544e-06, + "loss": 0.0609, + "step": 3750 + }, + { + "epoch": 1.41, + "grad_norm": 1.359474539756775, + "learning_rate": 9.671155778894473e-06, + "loss": 0.0557, + "step": 3775 + }, + { + "epoch": 1.42, + "grad_norm": 1.8293066024780273, + "learning_rate": 9.668643216080404e-06, + "loss": 0.0594, + "step": 3800 + }, + { + "epoch": 1.43, + "grad_norm": 2.477381706237793, + "learning_rate": 9.666130653266333e-06, + "loss": 0.0573, + "step": 3825 + }, + { + "epoch": 1.44, + "grad_norm": 2.0538620948791504, + "learning_rate": 9.663618090452263e-06, + "loss": 0.0606, + "step": 3850 + }, + { + "epoch": 1.44, + "grad_norm": 2.131457567214966, + "learning_rate": 9.661105527638192e-06, + "loss": 0.0558, + "step": 3875 + }, + { + "epoch": 1.45, + "grad_norm": 2.1076457500457764, + "learning_rate": 9.658592964824121e-06, + "loss": 0.0596, + "step": 3900 + }, + { + "epoch": 1.46, + "grad_norm": 1.7258329391479492, + "learning_rate": 9.65608040201005e-06, + "loss": 0.0574, + "step": 3925 + }, + { + "epoch": 1.47, + "grad_norm": 1.6699299812316895, + "learning_rate": 9.653567839195982e-06, + "loss": 0.0578, + "step": 3950 + }, + { + "epoch": 1.48, + "grad_norm": 1.855513095855713, + "learning_rate": 9.651055276381909e-06, + "loss": 0.0564, + "step": 3975 + }, + { + "epoch": 1.49, + "grad_norm": 2.4874002933502197, + "learning_rate": 9.64854271356784e-06, + "loss": 0.0554, + "step": 4000 + }, + { + "epoch": 1.49, + "eval_loss": 0.08447124063968658, + "eval_runtime": 1761.7443, + "eval_samples_per_second": 0.874, + "eval_steps_per_second": 0.874, + "eval_wer": 12.40065451145395, + "step": 4000 + }, + { + "epoch": 1.5, + "grad_norm": 1.5181629657745361, + "learning_rate": 9.64603015075377e-06, + "loss": 0.0547, + "step": 4025 + }, + { + "epoch": 1.51, + "grad_norm": 2.127600908279419, + "learning_rate": 9.643517587939699e-06, + "loss": 0.0594, + "step": 4050 + }, + { + "epoch": 1.52, + "grad_norm": 1.996532917022705, + "learning_rate": 9.64100502512563e-06, + "loss": 0.0565, + "step": 4075 + }, + { + "epoch": 1.53, + "grad_norm": 1.3940479755401611, + "learning_rate": 9.638492462311559e-06, + "loss": 0.0539, + "step": 4100 + }, + { + "epoch": 1.54, + "grad_norm": 1.7169098854064941, + "learning_rate": 9.635979899497488e-06, + "loss": 0.0564, + "step": 4125 + }, + { + "epoch": 1.55, + "grad_norm": 1.6934572458267212, + "learning_rate": 9.633467336683418e-06, + "loss": 0.0567, + "step": 4150 + }, + { + "epoch": 1.56, + "grad_norm": 1.8401086330413818, + "learning_rate": 9.630954773869347e-06, + "loss": 0.0518, + "step": 4175 + }, + { + "epoch": 1.57, + "grad_norm": 1.794960618019104, + "learning_rate": 9.628442211055276e-06, + "loss": 0.054, + "step": 4200 + }, + { + "epoch": 1.58, + "grad_norm": 1.6625791788101196, + "learning_rate": 9.625929648241207e-06, + "loss": 0.0535, + "step": 4225 + }, + { + "epoch": 1.58, + "grad_norm": 1.652037262916565, + "learning_rate": 9.623417085427137e-06, + "loss": 0.0537, + "step": 4250 + }, + { + "epoch": 1.59, + "grad_norm": 1.8379573822021484, + "learning_rate": 9.620904522613066e-06, + "loss": 0.0525, + "step": 4275 + }, + { + "epoch": 1.6, + "grad_norm": 1.7845590114593506, + "learning_rate": 9.618391959798995e-06, + "loss": 0.0554, + "step": 4300 + }, + { + "epoch": 1.61, + "grad_norm": 2.1383659839630127, + "learning_rate": 9.615879396984925e-06, + "loss": 0.0526, + "step": 4325 + }, + { + "epoch": 1.62, + "grad_norm": 1.7804546356201172, + "learning_rate": 9.613366834170856e-06, + "loss": 0.0544, + "step": 4350 + }, + { + "epoch": 1.63, + "grad_norm": 2.0541396141052246, + "learning_rate": 9.610854271356785e-06, + "loss": 0.0545, + "step": 4375 + }, + { + "epoch": 1.64, + "grad_norm": 1.7501418590545654, + "learning_rate": 9.608341708542714e-06, + "loss": 0.0542, + "step": 4400 + }, + { + "epoch": 1.65, + "grad_norm": 1.7225345373153687, + "learning_rate": 9.605829145728644e-06, + "loss": 0.0526, + "step": 4425 + }, + { + "epoch": 1.66, + "grad_norm": 1.866979718208313, + "learning_rate": 9.603316582914573e-06, + "loss": 0.052, + "step": 4450 + }, + { + "epoch": 1.67, + "grad_norm": 1.465701699256897, + "learning_rate": 9.600804020100504e-06, + "loss": 0.0531, + "step": 4475 + }, + { + "epoch": 1.68, + "grad_norm": 2.063372850418091, + "learning_rate": 9.598291457286433e-06, + "loss": 0.0496, + "step": 4500 + }, + { + "epoch": 1.69, + "grad_norm": 1.7311581373214722, + "learning_rate": 9.595778894472363e-06, + "loss": 0.0542, + "step": 4525 + }, + { + "epoch": 1.7, + "grad_norm": 1.5659544467926025, + "learning_rate": 9.593266331658292e-06, + "loss": 0.0531, + "step": 4550 + }, + { + "epoch": 1.71, + "grad_norm": 1.9148707389831543, + "learning_rate": 9.590753768844221e-06, + "loss": 0.055, + "step": 4575 + }, + { + "epoch": 1.72, + "grad_norm": 1.5600001811981201, + "learning_rate": 9.58824120603015e-06, + "loss": 0.0471, + "step": 4600 + }, + { + "epoch": 1.72, + "grad_norm": 1.795706868171692, + "learning_rate": 9.585728643216082e-06, + "loss": 0.0512, + "step": 4625 + }, + { + "epoch": 1.73, + "grad_norm": 1.784351110458374, + "learning_rate": 9.583216080402011e-06, + "loss": 0.0502, + "step": 4650 + }, + { + "epoch": 1.74, + "grad_norm": 1.73702073097229, + "learning_rate": 9.58070351758794e-06, + "loss": 0.0538, + "step": 4675 + }, + { + "epoch": 1.75, + "grad_norm": 1.6473242044448853, + "learning_rate": 9.57819095477387e-06, + "loss": 0.0495, + "step": 4700 + }, + { + "epoch": 1.76, + "grad_norm": 1.6917632818222046, + "learning_rate": 9.575678391959799e-06, + "loss": 0.0513, + "step": 4725 + }, + { + "epoch": 1.77, + "grad_norm": 2.1214990615844727, + "learning_rate": 9.57316582914573e-06, + "loss": 0.0535, + "step": 4750 + }, + { + "epoch": 1.78, + "grad_norm": 2.116617202758789, + "learning_rate": 9.57065326633166e-06, + "loss": 0.0537, + "step": 4775 + }, + { + "epoch": 1.79, + "grad_norm": 2.0225181579589844, + "learning_rate": 9.568140703517589e-06, + "loss": 0.0517, + "step": 4800 + }, + { + "epoch": 1.8, + "grad_norm": 1.902759075164795, + "learning_rate": 9.565628140703518e-06, + "loss": 0.0464, + "step": 4825 + }, + { + "epoch": 1.81, + "grad_norm": 2.3589694499969482, + "learning_rate": 9.563115577889447e-06, + "loss": 0.0526, + "step": 4850 + }, + { + "epoch": 1.82, + "grad_norm": 1.6248234510421753, + "learning_rate": 9.560603015075378e-06, + "loss": 0.0518, + "step": 4875 + }, + { + "epoch": 1.83, + "grad_norm": 2.02026104927063, + "learning_rate": 9.558090452261308e-06, + "loss": 0.0524, + "step": 4900 + }, + { + "epoch": 1.84, + "grad_norm": 1.5857410430908203, + "learning_rate": 9.555577889447237e-06, + "loss": 0.0547, + "step": 4925 + }, + { + "epoch": 1.85, + "grad_norm": 1.7978618144989014, + "learning_rate": 9.553065326633166e-06, + "loss": 0.0464, + "step": 4950 + }, + { + "epoch": 1.85, + "grad_norm": 1.8926589488983154, + "learning_rate": 9.550552763819096e-06, + "loss": 0.0496, + "step": 4975 + }, + { + "epoch": 1.86, + "grad_norm": 2.0343477725982666, + "learning_rate": 9.548040201005025e-06, + "loss": 0.0488, + "step": 5000 + }, + { + "epoch": 1.86, + "eval_loss": 0.08067873865365982, + "eval_runtime": 1355.6558, + "eval_samples_per_second": 1.135, + "eval_steps_per_second": 1.135, + "eval_wer": 11.442262739597943, + "step": 5000 + }, + { + "epoch": 1.87, + "grad_norm": 2.0140702724456787, + "learning_rate": 9.545527638190956e-06, + "loss": 0.0483, + "step": 5025 + }, + { + "epoch": 1.88, + "grad_norm": 1.8517276048660278, + "learning_rate": 9.543015075376885e-06, + "loss": 0.0523, + "step": 5050 + }, + { + "epoch": 1.89, + "grad_norm": 1.7463667392730713, + "learning_rate": 9.540502512562815e-06, + "loss": 0.0501, + "step": 5075 + }, + { + "epoch": 1.9, + "grad_norm": 1.9998018741607666, + "learning_rate": 9.537989949748746e-06, + "loss": 0.0482, + "step": 5100 + }, + { + "epoch": 1.91, + "grad_norm": 1.8619129657745361, + "learning_rate": 9.535477386934673e-06, + "loss": 0.0511, + "step": 5125 + }, + { + "epoch": 1.92, + "grad_norm": 1.8974577188491821, + "learning_rate": 9.532964824120604e-06, + "loss": 0.0484, + "step": 5150 + }, + { + "epoch": 1.93, + "grad_norm": 1.7518725395202637, + "learning_rate": 9.530452261306534e-06, + "loss": 0.0469, + "step": 5175 + }, + { + "epoch": 1.94, + "grad_norm": 1.999036192893982, + "learning_rate": 9.527939698492463e-06, + "loss": 0.0474, + "step": 5200 + }, + { + "epoch": 1.95, + "grad_norm": 1.842881202697754, + "learning_rate": 9.525427135678392e-06, + "loss": 0.05, + "step": 5225 + }, + { + "epoch": 1.96, + "grad_norm": 1.7021516561508179, + "learning_rate": 9.522914572864322e-06, + "loss": 0.0499, + "step": 5250 + }, + { + "epoch": 1.97, + "grad_norm": 1.891011357307434, + "learning_rate": 9.520402010050253e-06, + "loss": 0.0497, + "step": 5275 + }, + { + "epoch": 1.98, + "grad_norm": 1.7404553890228271, + "learning_rate": 9.517889447236182e-06, + "loss": 0.0468, + "step": 5300 + }, + { + "epoch": 1.99, + "grad_norm": 1.9893351793289185, + "learning_rate": 9.515376884422111e-06, + "loss": 0.0512, + "step": 5325 + }, + { + "epoch": 1.99, + "grad_norm": 2.1288208961486816, + "learning_rate": 9.51286432160804e-06, + "loss": 0.0454, + "step": 5350 + }, + { + "epoch": 2.0, + "grad_norm": 1.3322736024856567, + "learning_rate": 9.510351758793972e-06, + "loss": 0.0406, + "step": 5375 + }, + { + "epoch": 2.01, + "grad_norm": 1.2802850008010864, + "learning_rate": 9.5078391959799e-06, + "loss": 0.0276, + "step": 5400 + }, + { + "epoch": 2.02, + "grad_norm": 1.2592177391052246, + "learning_rate": 9.50532663316583e-06, + "loss": 0.0264, + "step": 5425 + }, + { + "epoch": 2.03, + "grad_norm": 1.0537230968475342, + "learning_rate": 9.50281407035176e-06, + "loss": 0.0272, + "step": 5450 + }, + { + "epoch": 2.04, + "grad_norm": 1.7407728433609009, + "learning_rate": 9.500301507537689e-06, + "loss": 0.0277, + "step": 5475 + }, + { + "epoch": 2.05, + "grad_norm": 1.7214492559432983, + "learning_rate": 9.49778894472362e-06, + "loss": 0.0248, + "step": 5500 + }, + { + "epoch": 2.06, + "grad_norm": 1.4147610664367676, + "learning_rate": 9.49527638190955e-06, + "loss": 0.0267, + "step": 5525 + }, + { + "epoch": 2.07, + "grad_norm": 1.3702739477157593, + "learning_rate": 9.492763819095479e-06, + "loss": 0.0258, + "step": 5550 + }, + { + "epoch": 2.08, + "grad_norm": 1.6547328233718872, + "learning_rate": 9.490251256281408e-06, + "loss": 0.0263, + "step": 5575 + }, + { + "epoch": 2.09, + "grad_norm": 1.3563820123672485, + "learning_rate": 9.487738693467337e-06, + "loss": 0.0266, + "step": 5600 + }, + { + "epoch": 2.1, + "grad_norm": 1.3001940250396729, + "learning_rate": 9.485226130653267e-06, + "loss": 0.0252, + "step": 5625 + }, + { + "epoch": 2.11, + "grad_norm": 1.6878540515899658, + "learning_rate": 9.482713567839198e-06, + "loss": 0.0286, + "step": 5650 + }, + { + "epoch": 2.12, + "grad_norm": 1.6218816041946411, + "learning_rate": 9.480201005025125e-06, + "loss": 0.0272, + "step": 5675 + }, + { + "epoch": 2.13, + "grad_norm": 1.257480263710022, + "learning_rate": 9.477688442211056e-06, + "loss": 0.0258, + "step": 5700 + }, + { + "epoch": 2.13, + "grad_norm": 1.4039446115493774, + "learning_rate": 9.475175879396985e-06, + "loss": 0.027, + "step": 5725 + }, + { + "epoch": 2.14, + "grad_norm": 1.1557291746139526, + "learning_rate": 9.472663316582915e-06, + "loss": 0.0265, + "step": 5750 + }, + { + "epoch": 2.15, + "grad_norm": 1.446360468864441, + "learning_rate": 9.470150753768846e-06, + "loss": 0.0289, + "step": 5775 + }, + { + "epoch": 2.16, + "grad_norm": 1.4463669061660767, + "learning_rate": 9.467638190954775e-06, + "loss": 0.0278, + "step": 5800 + }, + { + "epoch": 2.17, + "grad_norm": 1.2906705141067505, + "learning_rate": 9.465125628140704e-06, + "loss": 0.0264, + "step": 5825 + }, + { + "epoch": 2.18, + "grad_norm": 1.7094852924346924, + "learning_rate": 9.462613065326634e-06, + "loss": 0.0252, + "step": 5850 + }, + { + "epoch": 2.19, + "grad_norm": 2.1449317932128906, + "learning_rate": 9.460100502512563e-06, + "loss": 0.025, + "step": 5875 + }, + { + "epoch": 2.2, + "grad_norm": 1.7814464569091797, + "learning_rate": 9.457587939698494e-06, + "loss": 0.0265, + "step": 5900 + }, + { + "epoch": 2.21, + "grad_norm": 1.1942181587219238, + "learning_rate": 9.455075376884423e-06, + "loss": 0.025, + "step": 5925 + }, + { + "epoch": 2.22, + "grad_norm": 1.6804550886154175, + "learning_rate": 9.452562814070353e-06, + "loss": 0.0252, + "step": 5950 + }, + { + "epoch": 2.23, + "grad_norm": 1.6111584901809692, + "learning_rate": 9.450050251256282e-06, + "loss": 0.0259, + "step": 5975 + }, + { + "epoch": 2.24, + "grad_norm": 1.395193338394165, + "learning_rate": 9.447537688442211e-06, + "loss": 0.0249, + "step": 6000 + }, + { + "epoch": 2.24, + "eval_loss": 0.09078923612833023, + "eval_runtime": 1355.1443, + "eval_samples_per_second": 1.136, + "eval_steps_per_second": 1.136, + "eval_wer": 12.073398784478728, + "step": 6000 + }, + { + "epoch": 2.25, + "grad_norm": 1.538610816001892, + "learning_rate": 9.44502512562814e-06, + "loss": 0.0277, + "step": 6025 + }, + { + "epoch": 2.26, + "grad_norm": 1.4144881963729858, + "learning_rate": 9.442512562814072e-06, + "loss": 0.0278, + "step": 6050 + }, + { + "epoch": 2.27, + "grad_norm": 1.336661696434021, + "learning_rate": 9.440000000000001e-06, + "loss": 0.0238, + "step": 6075 + }, + { + "epoch": 2.27, + "grad_norm": 1.2063597440719604, + "learning_rate": 9.43748743718593e-06, + "loss": 0.0272, + "step": 6100 + }, + { + "epoch": 2.28, + "grad_norm": 1.381706714630127, + "learning_rate": 9.43497487437186e-06, + "loss": 0.0241, + "step": 6125 + }, + { + "epoch": 2.29, + "grad_norm": 1.7141902446746826, + "learning_rate": 9.432462311557789e-06, + "loss": 0.0246, + "step": 6150 + }, + { + "epoch": 2.3, + "grad_norm": 1.6166282892227173, + "learning_rate": 9.42994974874372e-06, + "loss": 0.0254, + "step": 6175 + }, + { + "epoch": 2.31, + "grad_norm": 1.4941551685333252, + "learning_rate": 9.42743718592965e-06, + "loss": 0.0253, + "step": 6200 + }, + { + "epoch": 2.32, + "grad_norm": 1.3123071193695068, + "learning_rate": 9.424924623115579e-06, + "loss": 0.0231, + "step": 6225 + }, + { + "epoch": 2.33, + "grad_norm": 1.487336277961731, + "learning_rate": 9.422412060301508e-06, + "loss": 0.0252, + "step": 6250 + }, + { + "epoch": 2.34, + "grad_norm": 2.501349687576294, + "learning_rate": 9.419899497487437e-06, + "loss": 0.0261, + "step": 6275 + }, + { + "epoch": 2.35, + "grad_norm": 1.5969834327697754, + "learning_rate": 9.417386934673367e-06, + "loss": 0.0262, + "step": 6300 + }, + { + "epoch": 2.36, + "grad_norm": 1.7855936288833618, + "learning_rate": 9.414874371859298e-06, + "loss": 0.0248, + "step": 6325 + }, + { + "epoch": 2.37, + "grad_norm": 1.3986784219741821, + "learning_rate": 9.412361809045227e-06, + "loss": 0.0276, + "step": 6350 + }, + { + "epoch": 2.38, + "grad_norm": 1.4814369678497314, + "learning_rate": 9.409849246231156e-06, + "loss": 0.0267, + "step": 6375 + }, + { + "epoch": 2.39, + "grad_norm": 1.7178635597229004, + "learning_rate": 9.407336683417086e-06, + "loss": 0.0239, + "step": 6400 + }, + { + "epoch": 2.4, + "grad_norm": 1.6143213510513306, + "learning_rate": 9.404824120603015e-06, + "loss": 0.0261, + "step": 6425 + }, + { + "epoch": 2.4, + "grad_norm": 1.8462485074996948, + "learning_rate": 9.402311557788946e-06, + "loss": 0.0237, + "step": 6450 + }, + { + "epoch": 2.41, + "grad_norm": 1.60640549659729, + "learning_rate": 9.399798994974875e-06, + "loss": 0.0247, + "step": 6475 + }, + { + "epoch": 2.42, + "grad_norm": 1.8515684604644775, + "learning_rate": 9.397286432160805e-06, + "loss": 0.026, + "step": 6500 + }, + { + "epoch": 2.43, + "grad_norm": 2.1577906608581543, + "learning_rate": 9.394773869346736e-06, + "loss": 0.027, + "step": 6525 + }, + { + "epoch": 2.44, + "grad_norm": 1.4974435567855835, + "learning_rate": 9.392261306532663e-06, + "loss": 0.0237, + "step": 6550 + }, + { + "epoch": 2.45, + "grad_norm": 1.3728352785110474, + "learning_rate": 9.389748743718594e-06, + "loss": 0.0221, + "step": 6575 + }, + { + "epoch": 2.46, + "grad_norm": 1.2850501537322998, + "learning_rate": 9.387236180904524e-06, + "loss": 0.0259, + "step": 6600 + }, + { + "epoch": 2.47, + "grad_norm": 1.4264410734176636, + "learning_rate": 9.384723618090453e-06, + "loss": 0.0246, + "step": 6625 + }, + { + "epoch": 2.48, + "grad_norm": 1.3455443382263184, + "learning_rate": 9.382211055276382e-06, + "loss": 0.0236, + "step": 6650 + }, + { + "epoch": 2.49, + "grad_norm": 1.1940556764602661, + "learning_rate": 9.379698492462312e-06, + "loss": 0.0246, + "step": 6675 + }, + { + "epoch": 2.5, + "grad_norm": 1.2464253902435303, + "learning_rate": 9.377185929648241e-06, + "loss": 0.0253, + "step": 6700 + }, + { + "epoch": 2.51, + "grad_norm": 2.1854212284088135, + "learning_rate": 9.374673366834172e-06, + "loss": 0.0254, + "step": 6725 + }, + { + "epoch": 2.52, + "grad_norm": 1.9597482681274414, + "learning_rate": 9.372160804020101e-06, + "loss": 0.0242, + "step": 6750 + }, + { + "epoch": 2.53, + "grad_norm": 1.4772197008132935, + "learning_rate": 9.36964824120603e-06, + "loss": 0.0265, + "step": 6775 + }, + { + "epoch": 2.54, + "grad_norm": 1.5959612131118774, + "learning_rate": 9.367135678391962e-06, + "loss": 0.0262, + "step": 6800 + }, + { + "epoch": 2.54, + "grad_norm": 1.3742398023605347, + "learning_rate": 9.36462311557789e-06, + "loss": 0.0243, + "step": 6825 + }, + { + "epoch": 2.55, + "grad_norm": 1.2986515760421753, + "learning_rate": 9.36211055276382e-06, + "loss": 0.0246, + "step": 6850 + }, + { + "epoch": 2.56, + "grad_norm": 1.3551253080368042, + "learning_rate": 9.35959798994975e-06, + "loss": 0.0244, + "step": 6875 + }, + { + "epoch": 2.57, + "grad_norm": 1.5336631536483765, + "learning_rate": 9.357085427135679e-06, + "loss": 0.025, + "step": 6900 + }, + { + "epoch": 2.58, + "grad_norm": 1.4160338640213013, + "learning_rate": 9.354572864321608e-06, + "loss": 0.0256, + "step": 6925 + }, + { + "epoch": 2.59, + "grad_norm": 1.4917676448822021, + "learning_rate": 9.352060301507538e-06, + "loss": 0.0237, + "step": 6950 + }, + { + "epoch": 2.6, + "grad_norm": 1.2010515928268433, + "learning_rate": 9.349547738693469e-06, + "loss": 0.0231, + "step": 6975 + }, + { + "epoch": 2.61, + "grad_norm": 1.373472809791565, + "learning_rate": 9.347035175879398e-06, + "loss": 0.0246, + "step": 7000 + }, + { + "epoch": 2.61, + "eval_loss": 0.09180256724357605, + "eval_runtime": 1346.8834, + "eval_samples_per_second": 1.143, + "eval_steps_per_second": 1.143, + "eval_wer": 11.845488546049555, + "step": 7000 + }, + { + "epoch": 2.62, + "grad_norm": 1.645215630531311, + "learning_rate": 9.344522613065327e-06, + "loss": 0.0262, + "step": 7025 + }, + { + "epoch": 2.63, + "grad_norm": 1.2551765441894531, + "learning_rate": 9.342010050251257e-06, + "loss": 0.023, + "step": 7050 + }, + { + "epoch": 2.64, + "grad_norm": 1.2499644756317139, + "learning_rate": 9.339497487437188e-06, + "loss": 0.0262, + "step": 7075 + }, + { + "epoch": 2.65, + "grad_norm": 1.8427801132202148, + "learning_rate": 9.336984924623115e-06, + "loss": 0.0246, + "step": 7100 + }, + { + "epoch": 2.66, + "grad_norm": 1.2263388633728027, + "learning_rate": 9.334472361809046e-06, + "loss": 0.0223, + "step": 7125 + }, + { + "epoch": 2.67, + "grad_norm": 1.5520656108856201, + "learning_rate": 9.331959798994976e-06, + "loss": 0.0229, + "step": 7150 + }, + { + "epoch": 2.68, + "grad_norm": 1.6962997913360596, + "learning_rate": 9.329447236180905e-06, + "loss": 0.0256, + "step": 7175 + }, + { + "epoch": 2.68, + "grad_norm": 1.421910047531128, + "learning_rate": 9.326934673366836e-06, + "loss": 0.025, + "step": 7200 + }, + { + "epoch": 2.69, + "grad_norm": 1.2547729015350342, + "learning_rate": 9.324422110552764e-06, + "loss": 0.0255, + "step": 7225 + }, + { + "epoch": 2.7, + "grad_norm": 1.793110728263855, + "learning_rate": 9.321909547738695e-06, + "loss": 0.0254, + "step": 7250 + }, + { + "epoch": 2.71, + "grad_norm": 1.7255946397781372, + "learning_rate": 9.319396984924624e-06, + "loss": 0.0235, + "step": 7275 + }, + { + "epoch": 2.72, + "grad_norm": 1.2067201137542725, + "learning_rate": 9.316884422110553e-06, + "loss": 0.0224, + "step": 7300 + }, + { + "epoch": 2.73, + "grad_norm": 1.4098964929580688, + "learning_rate": 9.314371859296483e-06, + "loss": 0.0254, + "step": 7325 + }, + { + "epoch": 2.74, + "grad_norm": 1.0775898694992065, + "learning_rate": 9.311859296482414e-06, + "loss": 0.0278, + "step": 7350 + }, + { + "epoch": 2.75, + "grad_norm": 1.5975059270858765, + "learning_rate": 9.309346733668343e-06, + "loss": 0.0241, + "step": 7375 + }, + { + "epoch": 2.76, + "grad_norm": 1.7272921800613403, + "learning_rate": 9.306834170854272e-06, + "loss": 0.0255, + "step": 7400 + }, + { + "epoch": 2.77, + "grad_norm": 1.746527075767517, + "learning_rate": 9.304321608040201e-06, + "loss": 0.0228, + "step": 7425 + }, + { + "epoch": 2.78, + "grad_norm": 1.6307902336120605, + "learning_rate": 9.30180904522613e-06, + "loss": 0.0224, + "step": 7450 + }, + { + "epoch": 2.79, + "grad_norm": 1.4680006504058838, + "learning_rate": 9.299296482412062e-06, + "loss": 0.0238, + "step": 7475 + }, + { + "epoch": 2.8, + "grad_norm": 1.1557899713516235, + "learning_rate": 9.296783919597991e-06, + "loss": 0.0236, + "step": 7500 + }, + { + "epoch": 2.81, + "grad_norm": 1.8319536447525024, + "learning_rate": 9.29427135678392e-06, + "loss": 0.0263, + "step": 7525 + }, + { + "epoch": 2.82, + "grad_norm": 1.736344337463379, + "learning_rate": 9.29175879396985e-06, + "loss": 0.0244, + "step": 7550 + }, + { + "epoch": 2.82, + "grad_norm": 1.5658966302871704, + "learning_rate": 9.289246231155779e-06, + "loss": 0.0217, + "step": 7575 + }, + { + "epoch": 2.83, + "grad_norm": 1.2981235980987549, + "learning_rate": 9.28673366834171e-06, + "loss": 0.0212, + "step": 7600 + }, + { + "epoch": 2.84, + "grad_norm": 1.5218373537063599, + "learning_rate": 9.28422110552764e-06, + "loss": 0.0212, + "step": 7625 + }, + { + "epoch": 2.85, + "grad_norm": 1.2633391618728638, + "learning_rate": 9.281708542713569e-06, + "loss": 0.0227, + "step": 7650 + }, + { + "epoch": 2.86, + "grad_norm": 1.3998918533325195, + "learning_rate": 9.279195979899498e-06, + "loss": 0.0226, + "step": 7675 + }, + { + "epoch": 2.87, + "grad_norm": 1.5886342525482178, + "learning_rate": 9.276683417085427e-06, + "loss": 0.0258, + "step": 7700 + }, + { + "epoch": 2.88, + "grad_norm": 1.5842448472976685, + "learning_rate": 9.274170854271357e-06, + "loss": 0.0239, + "step": 7725 + }, + { + "epoch": 2.89, + "grad_norm": 1.1926193237304688, + "learning_rate": 9.271658291457288e-06, + "loss": 0.0234, + "step": 7750 + }, + { + "epoch": 2.9, + "grad_norm": 1.1781537532806396, + "learning_rate": 9.269145728643217e-06, + "loss": 0.0243, + "step": 7775 + }, + { + "epoch": 2.91, + "grad_norm": 1.7634261846542358, + "learning_rate": 9.266633165829146e-06, + "loss": 0.0233, + "step": 7800 + }, + { + "epoch": 2.92, + "grad_norm": 1.4530330896377563, + "learning_rate": 9.264120603015076e-06, + "loss": 0.0214, + "step": 7825 + }, + { + "epoch": 2.93, + "grad_norm": 1.665136694908142, + "learning_rate": 9.261608040201005e-06, + "loss": 0.0232, + "step": 7850 + }, + { + "epoch": 2.94, + "grad_norm": 1.6826722621917725, + "learning_rate": 9.259095477386936e-06, + "loss": 0.0231, + "step": 7875 + }, + { + "epoch": 2.95, + "grad_norm": 1.9273606538772583, + "learning_rate": 9.256582914572865e-06, + "loss": 0.0217, + "step": 7900 + }, + { + "epoch": 2.95, + "grad_norm": 1.243353247642517, + "learning_rate": 9.254070351758795e-06, + "loss": 0.0257, + "step": 7925 + }, + { + "epoch": 2.96, + "grad_norm": 1.5765725374221802, + "learning_rate": 9.251557788944724e-06, + "loss": 0.0248, + "step": 7950 + }, + { + "epoch": 2.97, + "grad_norm": 1.725707769393921, + "learning_rate": 9.249045226130653e-06, + "loss": 0.0214, + "step": 7975 + }, + { + "epoch": 2.98, + "grad_norm": 1.5565807819366455, + "learning_rate": 9.246532663316584e-06, + "loss": 0.0234, + "step": 8000 + }, + { + "epoch": 2.98, + "eval_loss": 0.09421704709529877, + "eval_runtime": 1342.5083, + "eval_samples_per_second": 1.146, + "eval_steps_per_second": 1.146, + "eval_wer": 11.658485273492285, + "step": 8000 + }, + { + "epoch": 2.99, + "grad_norm": 1.548851728439331, + "learning_rate": 9.244020100502514e-06, + "loss": 0.0235, + "step": 8025 + }, + { + "epoch": 3.0, + "grad_norm": 0.9938942790031433, + "learning_rate": 9.241507537688443e-06, + "loss": 0.0188, + "step": 8050 + }, + { + "epoch": 3.01, + "grad_norm": 1.0653603076934814, + "learning_rate": 9.238994974874372e-06, + "loss": 0.0134, + "step": 8075 + }, + { + "epoch": 3.02, + "grad_norm": 1.2165602445602417, + "learning_rate": 9.236482412060302e-06, + "loss": 0.0105, + "step": 8100 + }, + { + "epoch": 3.03, + "grad_norm": 1.3267370462417603, + "learning_rate": 9.233969849246231e-06, + "loss": 0.0121, + "step": 8125 + }, + { + "epoch": 3.04, + "grad_norm": 0.923784613609314, + "learning_rate": 9.231457286432162e-06, + "loss": 0.0106, + "step": 8150 + }, + { + "epoch": 3.05, + "grad_norm": 1.5998246669769287, + "learning_rate": 9.228944723618091e-06, + "loss": 0.01, + "step": 8175 + }, + { + "epoch": 3.06, + "grad_norm": 0.9393593072891235, + "learning_rate": 9.22643216080402e-06, + "loss": 0.0101, + "step": 8200 + }, + { + "epoch": 3.07, + "grad_norm": 1.8202104568481445, + "learning_rate": 9.223919597989952e-06, + "loss": 0.0104, + "step": 8225 + }, + { + "epoch": 3.08, + "grad_norm": 1.4404969215393066, + "learning_rate": 9.22140703517588e-06, + "loss": 0.0106, + "step": 8250 + }, + { + "epoch": 3.09, + "grad_norm": 1.178905725479126, + "learning_rate": 9.21889447236181e-06, + "loss": 0.01, + "step": 8275 + }, + { + "epoch": 3.09, + "grad_norm": 1.2568556070327759, + "learning_rate": 9.21638190954774e-06, + "loss": 0.011, + "step": 8300 + }, + { + "epoch": 3.1, + "grad_norm": 1.3396077156066895, + "learning_rate": 9.213869346733669e-06, + "loss": 0.012, + "step": 8325 + }, + { + "epoch": 3.11, + "grad_norm": 1.1287044286727905, + "learning_rate": 9.211356783919598e-06, + "loss": 0.0119, + "step": 8350 + }, + { + "epoch": 3.12, + "grad_norm": 0.88282710313797, + "learning_rate": 9.208844221105528e-06, + "loss": 0.0115, + "step": 8375 + }, + { + "epoch": 3.13, + "grad_norm": 0.929076611995697, + "learning_rate": 9.206331658291459e-06, + "loss": 0.0121, + "step": 8400 + }, + { + "epoch": 3.14, + "grad_norm": 0.8082321882247925, + "learning_rate": 9.203819095477388e-06, + "loss": 0.0112, + "step": 8425 + }, + { + "epoch": 3.15, + "grad_norm": 1.2249951362609863, + "learning_rate": 9.201306532663317e-06, + "loss": 0.0119, + "step": 8450 + }, + { + "epoch": 3.16, + "grad_norm": 1.204393982887268, + "learning_rate": 9.198793969849247e-06, + "loss": 0.0119, + "step": 8475 + }, + { + "epoch": 3.17, + "grad_norm": 1.069985032081604, + "learning_rate": 9.196281407035178e-06, + "loss": 0.011, + "step": 8500 + }, + { + "epoch": 3.18, + "grad_norm": 0.7777710556983948, + "learning_rate": 9.193768844221105e-06, + "loss": 0.0117, + "step": 8525 + }, + { + "epoch": 3.19, + "grad_norm": 1.2583168745040894, + "learning_rate": 9.191256281407036e-06, + "loss": 0.0118, + "step": 8550 + }, + { + "epoch": 3.2, + "grad_norm": 1.312868595123291, + "learning_rate": 9.188743718592966e-06, + "loss": 0.0115, + "step": 8575 + }, + { + "epoch": 3.21, + "grad_norm": 1.1423325538635254, + "learning_rate": 9.186231155778895e-06, + "loss": 0.0113, + "step": 8600 + }, + { + "epoch": 3.22, + "grad_norm": 1.4350777864456177, + "learning_rate": 9.183718592964826e-06, + "loss": 0.0111, + "step": 8625 + }, + { + "epoch": 3.23, + "grad_norm": 0.823022723197937, + "learning_rate": 9.181206030150754e-06, + "loss": 0.0109, + "step": 8650 + }, + { + "epoch": 3.23, + "grad_norm": 1.0628478527069092, + "learning_rate": 9.178693467336685e-06, + "loss": 0.0106, + "step": 8675 + }, + { + "epoch": 3.24, + "grad_norm": 1.1002813577651978, + "learning_rate": 9.176180904522614e-06, + "loss": 0.0111, + "step": 8700 + }, + { + "epoch": 3.25, + "grad_norm": 1.1403617858886719, + "learning_rate": 9.173668341708543e-06, + "loss": 0.0105, + "step": 8725 + }, + { + "epoch": 3.26, + "grad_norm": 1.2129608392715454, + "learning_rate": 9.171155778894473e-06, + "loss": 0.0107, + "step": 8750 + }, + { + "epoch": 3.27, + "grad_norm": 0.8481287956237793, + "learning_rate": 9.168643216080404e-06, + "loss": 0.0116, + "step": 8775 + }, + { + "epoch": 3.28, + "grad_norm": 0.7288660407066345, + "learning_rate": 9.166130653266331e-06, + "loss": 0.0107, + "step": 8800 + }, + { + "epoch": 3.29, + "grad_norm": 1.5864964723587036, + "learning_rate": 9.163618090452262e-06, + "loss": 0.0122, + "step": 8825 + }, + { + "epoch": 3.3, + "grad_norm": 0.976284921169281, + "learning_rate": 9.161105527638192e-06, + "loss": 0.0127, + "step": 8850 + }, + { + "epoch": 3.31, + "grad_norm": 1.0508469343185425, + "learning_rate": 9.158592964824121e-06, + "loss": 0.0112, + "step": 8875 + }, + { + "epoch": 3.32, + "grad_norm": 0.6463491320610046, + "learning_rate": 9.156080402010052e-06, + "loss": 0.0117, + "step": 8900 + }, + { + "epoch": 3.33, + "grad_norm": 1.1070150136947632, + "learning_rate": 9.15356783919598e-06, + "loss": 0.01, + "step": 8925 + }, + { + "epoch": 3.34, + "grad_norm": 0.7133483290672302, + "learning_rate": 9.15105527638191e-06, + "loss": 0.0107, + "step": 8950 + }, + { + "epoch": 3.35, + "grad_norm": 0.8966423273086548, + "learning_rate": 9.14854271356784e-06, + "loss": 0.0099, + "step": 8975 + }, + { + "epoch": 3.36, + "grad_norm": 1.401126742362976, + "learning_rate": 9.14603015075377e-06, + "loss": 0.0128, + "step": 9000 + }, + { + "epoch": 3.36, + "eval_loss": 0.10322480648756027, + "eval_runtime": 1703.6617, + "eval_samples_per_second": 0.903, + "eval_steps_per_second": 0.903, + "eval_wer": 11.974053295932679, + "step": 9000 + }, + { + "epoch": 3.37, + "grad_norm": 1.164815068244934, + "learning_rate": 9.1435175879397e-06, + "loss": 0.0111, + "step": 9025 + }, + { + "epoch": 3.37, + "grad_norm": 1.4556667804718018, + "learning_rate": 9.14100502512563e-06, + "loss": 0.0108, + "step": 9050 + }, + { + "epoch": 3.38, + "grad_norm": 1.23093843460083, + "learning_rate": 9.138492462311559e-06, + "loss": 0.0125, + "step": 9075 + }, + { + "epoch": 3.39, + "grad_norm": 1.0123471021652222, + "learning_rate": 9.135979899497488e-06, + "loss": 0.0109, + "step": 9100 + }, + { + "epoch": 3.4, + "grad_norm": 1.4089877605438232, + "learning_rate": 9.133467336683417e-06, + "loss": 0.0116, + "step": 9125 + }, + { + "epoch": 3.41, + "grad_norm": 0.9169012904167175, + "learning_rate": 9.130954773869347e-06, + "loss": 0.0118, + "step": 9150 + }, + { + "epoch": 3.42, + "grad_norm": 1.5742087364196777, + "learning_rate": 9.128442211055278e-06, + "loss": 0.0124, + "step": 9175 + }, + { + "epoch": 3.43, + "grad_norm": 1.3126877546310425, + "learning_rate": 9.125929648241205e-06, + "loss": 0.0116, + "step": 9200 + }, + { + "epoch": 3.44, + "grad_norm": 1.4019488096237183, + "learning_rate": 9.123417085427136e-06, + "loss": 0.0106, + "step": 9225 + }, + { + "epoch": 3.45, + "grad_norm": 1.4090949296951294, + "learning_rate": 9.120904522613066e-06, + "loss": 0.0111, + "step": 9250 + }, + { + "epoch": 3.46, + "grad_norm": 1.0924110412597656, + "learning_rate": 9.118391959798995e-06, + "loss": 0.0115, + "step": 9275 + }, + { + "epoch": 3.47, + "grad_norm": 1.5053499937057495, + "learning_rate": 9.115879396984926e-06, + "loss": 0.0105, + "step": 9300 + }, + { + "epoch": 3.48, + "grad_norm": 1.1245368719100952, + "learning_rate": 9.113366834170855e-06, + "loss": 0.0126, + "step": 9325 + }, + { + "epoch": 3.49, + "grad_norm": 1.0686638355255127, + "learning_rate": 9.110854271356785e-06, + "loss": 0.0126, + "step": 9350 + }, + { + "epoch": 3.5, + "grad_norm": 1.4731690883636475, + "learning_rate": 9.108341708542714e-06, + "loss": 0.0124, + "step": 9375 + }, + { + "epoch": 3.5, + "grad_norm": 1.167031168937683, + "learning_rate": 9.105829145728643e-06, + "loss": 0.012, + "step": 9400 + }, + { + "epoch": 3.51, + "grad_norm": 0.8757056593894958, + "learning_rate": 9.103316582914573e-06, + "loss": 0.0115, + "step": 9425 + }, + { + "epoch": 3.52, + "grad_norm": 1.5996369123458862, + "learning_rate": 9.100804020100504e-06, + "loss": 0.0114, + "step": 9450 + }, + { + "epoch": 3.53, + "grad_norm": 1.2265523672103882, + "learning_rate": 9.098291457286433e-06, + "loss": 0.0116, + "step": 9475 + }, + { + "epoch": 3.54, + "grad_norm": 1.2360514402389526, + "learning_rate": 9.095778894472362e-06, + "loss": 0.013, + "step": 9500 + }, + { + "epoch": 3.55, + "grad_norm": 1.0130717754364014, + "learning_rate": 9.093266331658292e-06, + "loss": 0.0117, + "step": 9525 + }, + { + "epoch": 3.56, + "grad_norm": 1.1693044900894165, + "learning_rate": 9.090753768844221e-06, + "loss": 0.0129, + "step": 9550 + }, + { + "epoch": 3.57, + "grad_norm": 1.7469942569732666, + "learning_rate": 9.088241206030152e-06, + "loss": 0.0118, + "step": 9575 + }, + { + "epoch": 3.58, + "grad_norm": 1.6423958539962769, + "learning_rate": 9.085728643216081e-06, + "loss": 0.0134, + "step": 9600 + }, + { + "epoch": 3.59, + "grad_norm": 1.6349661350250244, + "learning_rate": 9.08321608040201e-06, + "loss": 0.0122, + "step": 9625 + }, + { + "epoch": 3.6, + "grad_norm": 0.9867326021194458, + "learning_rate": 9.08070351758794e-06, + "loss": 0.0115, + "step": 9650 + }, + { + "epoch": 3.61, + "grad_norm": 1.2597832679748535, + "learning_rate": 9.07819095477387e-06, + "loss": 0.011, + "step": 9675 + }, + { + "epoch": 3.62, + "grad_norm": 1.1682568788528442, + "learning_rate": 9.0756783919598e-06, + "loss": 0.011, + "step": 9700 + }, + { + "epoch": 3.63, + "grad_norm": 1.4560456275939941, + "learning_rate": 9.07316582914573e-06, + "loss": 0.0125, + "step": 9725 + }, + { + "epoch": 3.64, + "grad_norm": 1.5717931985855103, + "learning_rate": 9.070653266331659e-06, + "loss": 0.0107, + "step": 9750 + }, + { + "epoch": 3.64, + "grad_norm": 1.4575409889221191, + "learning_rate": 9.068140703517588e-06, + "loss": 0.011, + "step": 9775 + }, + { + "epoch": 3.65, + "grad_norm": 1.221413016319275, + "learning_rate": 9.065628140703518e-06, + "loss": 0.0115, + "step": 9800 + }, + { + "epoch": 3.66, + "grad_norm": 1.2630122900009155, + "learning_rate": 9.063115577889447e-06, + "loss": 0.0122, + "step": 9825 + }, + { + "epoch": 3.67, + "grad_norm": 1.1305288076400757, + "learning_rate": 9.060603015075378e-06, + "loss": 0.012, + "step": 9850 + }, + { + "epoch": 3.68, + "grad_norm": 0.9360828995704651, + "learning_rate": 9.058090452261307e-06, + "loss": 0.0116, + "step": 9875 + }, + { + "epoch": 3.69, + "grad_norm": 1.012657642364502, + "learning_rate": 9.055577889447237e-06, + "loss": 0.0098, + "step": 9900 + }, + { + "epoch": 3.7, + "grad_norm": 1.2711548805236816, + "learning_rate": 9.053065326633168e-06, + "loss": 0.0122, + "step": 9925 + }, + { + "epoch": 3.71, + "grad_norm": 3.114434242248535, + "learning_rate": 9.050552763819095e-06, + "loss": 0.0129, + "step": 9950 + }, + { + "epoch": 3.72, + "grad_norm": 1.0616588592529297, + "learning_rate": 9.048040201005026e-06, + "loss": 0.0129, + "step": 9975 + }, + { + "epoch": 3.73, + "grad_norm": 1.4607118368148804, + "learning_rate": 9.045527638190956e-06, + "loss": 0.013, + "step": 10000 + }, + { + "epoch": 3.73, + "eval_loss": 0.10749949514865875, + "eval_runtime": 1383.0076, + "eval_samples_per_second": 1.113, + "eval_steps_per_second": 1.113, + "eval_wer": 11.927302477793361, + "step": 10000 + }, + { + "epoch": 3.74, + "grad_norm": 1.3405953645706177, + "learning_rate": 9.043015075376885e-06, + "loss": 0.0109, + "step": 10025 + }, + { + "epoch": 3.75, + "grad_norm": 1.0837912559509277, + "learning_rate": 9.040502512562814e-06, + "loss": 0.0136, + "step": 10050 + }, + { + "epoch": 3.76, + "grad_norm": 1.6078901290893555, + "learning_rate": 9.037989949748744e-06, + "loss": 0.0102, + "step": 10075 + }, + { + "epoch": 3.77, + "grad_norm": 1.4270018339157104, + "learning_rate": 9.035477386934675e-06, + "loss": 0.0116, + "step": 10100 + }, + { + "epoch": 3.78, + "grad_norm": 0.9668207764625549, + "learning_rate": 9.033065326633166e-06, + "loss": 0.0128, + "step": 10125 + }, + { + "epoch": 3.78, + "grad_norm": 1.3851810693740845, + "learning_rate": 9.030552763819096e-06, + "loss": 0.011, + "step": 10150 + }, + { + "epoch": 3.79, + "grad_norm": 0.8297833204269409, + "learning_rate": 9.028040201005027e-06, + "loss": 0.0118, + "step": 10175 + }, + { + "epoch": 3.8, + "grad_norm": 1.5634695291519165, + "learning_rate": 9.025527638190956e-06, + "loss": 0.0117, + "step": 10200 + }, + { + "epoch": 3.81, + "grad_norm": 1.0718178749084473, + "learning_rate": 9.023015075376885e-06, + "loss": 0.0113, + "step": 10225 + }, + { + "epoch": 3.82, + "grad_norm": 0.9375820159912109, + "learning_rate": 9.020502512562815e-06, + "loss": 0.0104, + "step": 10250 + }, + { + "epoch": 3.83, + "grad_norm": 1.1648296117782593, + "learning_rate": 9.017989949748744e-06, + "loss": 0.0112, + "step": 10275 + }, + { + "epoch": 3.84, + "grad_norm": 1.1030336618423462, + "learning_rate": 9.015477386934675e-06, + "loss": 0.0101, + "step": 10300 + }, + { + "epoch": 3.85, + "grad_norm": 1.4810899496078491, + "learning_rate": 9.012964824120604e-06, + "loss": 0.011, + "step": 10325 + }, + { + "epoch": 3.86, + "grad_norm": 1.3186051845550537, + "learning_rate": 9.010452261306533e-06, + "loss": 0.0122, + "step": 10350 + }, + { + "epoch": 3.87, + "grad_norm": 1.151374101638794, + "learning_rate": 9.007939698492463e-06, + "loss": 0.0117, + "step": 10375 + }, + { + "epoch": 3.88, + "grad_norm": 1.8594458103179932, + "learning_rate": 9.005427135678392e-06, + "loss": 0.0127, + "step": 10400 + }, + { + "epoch": 3.89, + "grad_norm": 1.4096499681472778, + "learning_rate": 9.002914572864321e-06, + "loss": 0.0122, + "step": 10425 + }, + { + "epoch": 3.9, + "grad_norm": 0.8398318290710449, + "learning_rate": 9.000402010050252e-06, + "loss": 0.0126, + "step": 10450 + }, + { + "epoch": 3.91, + "grad_norm": 1.0874656438827515, + "learning_rate": 8.997889447236182e-06, + "loss": 0.0123, + "step": 10475 + }, + { + "epoch": 3.91, + "grad_norm": 1.4017316102981567, + "learning_rate": 8.995376884422111e-06, + "loss": 0.0118, + "step": 10500 + }, + { + "epoch": 3.92, + "grad_norm": 1.6179299354553223, + "learning_rate": 8.992864321608042e-06, + "loss": 0.0115, + "step": 10525 + }, + { + "epoch": 3.93, + "grad_norm": 1.3014582395553589, + "learning_rate": 8.99035175879397e-06, + "loss": 0.0127, + "step": 10550 + }, + { + "epoch": 3.94, + "grad_norm": 1.3122581243515015, + "learning_rate": 8.9878391959799e-06, + "loss": 0.0123, + "step": 10575 + }, + { + "epoch": 3.95, + "grad_norm": 1.5960350036621094, + "learning_rate": 8.98532663316583e-06, + "loss": 0.0118, + "step": 10600 + }, + { + "epoch": 3.96, + "grad_norm": 0.9008790850639343, + "learning_rate": 8.98281407035176e-06, + "loss": 0.0123, + "step": 10625 + }, + { + "epoch": 3.97, + "grad_norm": 1.3239306211471558, + "learning_rate": 8.980301507537689e-06, + "loss": 0.0113, + "step": 10650 + }, + { + "epoch": 3.98, + "grad_norm": 0.9531524181365967, + "learning_rate": 8.977788944723618e-06, + "loss": 0.0122, + "step": 10675 + }, + { + "epoch": 3.99, + "grad_norm": 1.2649215459823608, + "learning_rate": 8.975276381909549e-06, + "loss": 0.0099, + "step": 10700 + }, + { + "epoch": 4.0, + "grad_norm": 1.0959484577178955, + "learning_rate": 8.972763819095478e-06, + "loss": 0.0118, + "step": 10725 + }, + { + "epoch": 4.01, + "grad_norm": 0.4551525115966797, + "learning_rate": 8.97035175879397e-06, + "loss": 0.007, + "step": 10750 + }, + { + "epoch": 4.02, + "grad_norm": 0.7382945418357849, + "learning_rate": 8.967839195979901e-06, + "loss": 0.0065, + "step": 10775 + }, + { + "epoch": 4.03, + "grad_norm": 0.7998570799827576, + "learning_rate": 8.96532663316583e-06, + "loss": 0.0065, + "step": 10800 + }, + { + "epoch": 4.04, + "grad_norm": 0.6924498081207275, + "learning_rate": 8.96281407035176e-06, + "loss": 0.0052, + "step": 10825 + }, + { + "epoch": 4.05, + "grad_norm": 1.1552438735961914, + "learning_rate": 8.960301507537689e-06, + "loss": 0.0061, + "step": 10850 + }, + { + "epoch": 4.05, + "grad_norm": 0.8845757842063904, + "learning_rate": 8.957788944723618e-06, + "loss": 0.0058, + "step": 10875 + }, + { + "epoch": 4.06, + "grad_norm": 0.9015403985977173, + "learning_rate": 8.95527638190955e-06, + "loss": 0.0052, + "step": 10900 + }, + { + "epoch": 4.07, + "grad_norm": 1.3177391290664673, + "learning_rate": 8.952763819095479e-06, + "loss": 0.0056, + "step": 10925 + }, + { + "epoch": 4.08, + "grad_norm": 1.4660435914993286, + "learning_rate": 8.950251256281408e-06, + "loss": 0.0059, + "step": 10950 + }, + { + "epoch": 4.09, + "grad_norm": 1.4967607259750366, + "learning_rate": 8.947738693467337e-06, + "loss": 0.0058, + "step": 10975 + }, + { + "epoch": 4.1, + "grad_norm": 0.5830510258674622, + "learning_rate": 8.945226130653267e-06, + "loss": 0.005, + "step": 11000 + }, + { + "epoch": 4.1, + "eval_loss": 0.1122257262468338, + "eval_runtime": 1384.8568, + "eval_samples_per_second": 1.111, + "eval_steps_per_second": 1.111, + "eval_wer": 11.746143057503506, + "step": 11000 + }, + { + "epoch": 4.11, + "grad_norm": 0.8599172234535217, + "learning_rate": 8.942713567839196e-06, + "loss": 0.0057, + "step": 11025 + }, + { + "epoch": 4.12, + "grad_norm": 0.930284321308136, + "learning_rate": 8.940201005025127e-06, + "loss": 0.0065, + "step": 11050 + }, + { + "epoch": 4.13, + "grad_norm": 0.5343501567840576, + "learning_rate": 8.937688442211056e-06, + "loss": 0.0055, + "step": 11075 + }, + { + "epoch": 4.14, + "grad_norm": 1.0837323665618896, + "learning_rate": 8.935175879396986e-06, + "loss": 0.0058, + "step": 11100 + }, + { + "epoch": 4.15, + "grad_norm": 1.0465070009231567, + "learning_rate": 8.932663316582915e-06, + "loss": 0.0059, + "step": 11125 + }, + { + "epoch": 4.16, + "grad_norm": 0.8390216827392578, + "learning_rate": 8.930150753768844e-06, + "loss": 0.0055, + "step": 11150 + }, + { + "epoch": 4.17, + "grad_norm": 0.7231023907661438, + "learning_rate": 8.927638190954775e-06, + "loss": 0.0063, + "step": 11175 + }, + { + "epoch": 4.18, + "grad_norm": 0.7358904480934143, + "learning_rate": 8.925125628140705e-06, + "loss": 0.007, + "step": 11200 + }, + { + "epoch": 4.19, + "grad_norm": 0.8032443523406982, + "learning_rate": 8.922613065326634e-06, + "loss": 0.0062, + "step": 11225 + }, + { + "epoch": 4.19, + "grad_norm": 0.5706096887588501, + "learning_rate": 8.920100502512563e-06, + "loss": 0.006, + "step": 11250 + }, + { + "epoch": 4.2, + "grad_norm": 1.7987509965896606, + "learning_rate": 8.917587939698493e-06, + "loss": 0.0053, + "step": 11275 + }, + { + "epoch": 4.21, + "grad_norm": 0.7131609916687012, + "learning_rate": 8.915075376884424e-06, + "loss": 0.0056, + "step": 11300 + }, + { + "epoch": 4.22, + "grad_norm": 1.078340768814087, + "learning_rate": 8.912562814070353e-06, + "loss": 0.0059, + "step": 11325 + }, + { + "epoch": 4.23, + "grad_norm": 1.1634280681610107, + "learning_rate": 8.910050251256282e-06, + "loss": 0.0067, + "step": 11350 + }, + { + "epoch": 4.24, + "grad_norm": 1.3319824934005737, + "learning_rate": 8.907537688442212e-06, + "loss": 0.0056, + "step": 11375 + }, + { + "epoch": 4.25, + "grad_norm": 1.1389780044555664, + "learning_rate": 8.905025125628143e-06, + "loss": 0.0063, + "step": 11400 + }, + { + "epoch": 4.26, + "grad_norm": 1.0715914964675903, + "learning_rate": 8.90251256281407e-06, + "loss": 0.0057, + "step": 11425 + }, + { + "epoch": 4.27, + "grad_norm": 1.425121784210205, + "learning_rate": 8.900000000000001e-06, + "loss": 0.006, + "step": 11450 + }, + { + "epoch": 4.28, + "grad_norm": 0.7106032967567444, + "learning_rate": 8.89748743718593e-06, + "loss": 0.0066, + "step": 11475 + }, + { + "epoch": 4.29, + "grad_norm": 1.1608814001083374, + "learning_rate": 8.89497487437186e-06, + "loss": 0.0073, + "step": 11500 + }, + { + "epoch": 4.3, + "grad_norm": 0.7312257289886475, + "learning_rate": 8.892462311557791e-06, + "loss": 0.0057, + "step": 11525 + }, + { + "epoch": 4.31, + "grad_norm": 2.0339419841766357, + "learning_rate": 8.889949748743718e-06, + "loss": 0.0061, + "step": 11550 + }, + { + "epoch": 4.32, + "grad_norm": 0.8926384449005127, + "learning_rate": 8.88743718592965e-06, + "loss": 0.0062, + "step": 11575 + }, + { + "epoch": 4.33, + "grad_norm": 1.247017741203308, + "learning_rate": 8.884924623115579e-06, + "loss": 0.0065, + "step": 11600 + }, + { + "epoch": 4.33, + "grad_norm": 1.608703374862671, + "learning_rate": 8.882412060301508e-06, + "loss": 0.0058, + "step": 11625 + }, + { + "epoch": 4.34, + "grad_norm": 0.9165087342262268, + "learning_rate": 8.879899497487437e-06, + "loss": 0.0065, + "step": 11650 + }, + { + "epoch": 4.35, + "grad_norm": 1.0708649158477783, + "learning_rate": 8.877386934673368e-06, + "loss": 0.0056, + "step": 11675 + }, + { + "epoch": 4.36, + "grad_norm": 0.8924406170845032, + "learning_rate": 8.874874371859296e-06, + "loss": 0.0075, + "step": 11700 + }, + { + "epoch": 4.37, + "grad_norm": 1.0663999319076538, + "learning_rate": 8.872361809045227e-06, + "loss": 0.0067, + "step": 11725 + }, + { + "epoch": 4.38, + "grad_norm": 1.2272683382034302, + "learning_rate": 8.869849246231156e-06, + "loss": 0.0066, + "step": 11750 + }, + { + "epoch": 4.39, + "grad_norm": 1.5852762460708618, + "learning_rate": 8.867336683417086e-06, + "loss": 0.0068, + "step": 11775 + }, + { + "epoch": 4.4, + "grad_norm": 0.6523461937904358, + "learning_rate": 8.864824120603017e-06, + "loss": 0.0065, + "step": 11800 + }, + { + "epoch": 4.41, + "grad_norm": 1.5423625707626343, + "learning_rate": 8.862311557788944e-06, + "loss": 0.0076, + "step": 11825 + }, + { + "epoch": 4.42, + "grad_norm": 1.118537425994873, + "learning_rate": 8.859798994974875e-06, + "loss": 0.0064, + "step": 11850 + }, + { + "epoch": 4.43, + "grad_norm": 1.0647450685501099, + "learning_rate": 8.857286432160805e-06, + "loss": 0.0061, + "step": 11875 + }, + { + "epoch": 4.44, + "grad_norm": 1.1086052656173706, + "learning_rate": 8.854773869346734e-06, + "loss": 0.0067, + "step": 11900 + }, + { + "epoch": 4.45, + "grad_norm": 1.6664087772369385, + "learning_rate": 8.852261306532665e-06, + "loss": 0.0079, + "step": 11925 + }, + { + "epoch": 4.46, + "grad_norm": 1.6590911149978638, + "learning_rate": 8.849748743718594e-06, + "loss": 0.0078, + "step": 11950 + }, + { + "epoch": 4.46, + "grad_norm": 0.5454104542732239, + "learning_rate": 8.847236180904524e-06, + "loss": 0.0072, + "step": 11975 + }, + { + "epoch": 4.47, + "grad_norm": 0.586004376411438, + "learning_rate": 8.844723618090453e-06, + "loss": 0.0076, + "step": 12000 + }, + { + "epoch": 4.47, + "eval_loss": 0.11764564365148544, + "eval_runtime": 1390.4019, + "eval_samples_per_second": 1.107, + "eval_steps_per_second": 1.107, + "eval_wer": 12.114305750350631, + "step": 12000 + }, + { + "epoch": 4.48, + "grad_norm": 1.3217562437057495, + "learning_rate": 8.842211055276382e-06, + "loss": 0.0065, + "step": 12025 + }, + { + "epoch": 4.49, + "grad_norm": 0.7202808260917664, + "learning_rate": 8.839698492462312e-06, + "loss": 0.0063, + "step": 12050 + }, + { + "epoch": 4.5, + "grad_norm": 1.0311901569366455, + "learning_rate": 8.837185929648243e-06, + "loss": 0.0069, + "step": 12075 + }, + { + "epoch": 4.51, + "grad_norm": 0.9058762192726135, + "learning_rate": 8.83467336683417e-06, + "loss": 0.0066, + "step": 12100 + }, + { + "epoch": 4.52, + "grad_norm": 1.036055088043213, + "learning_rate": 8.832160804020101e-06, + "loss": 0.0069, + "step": 12125 + }, + { + "epoch": 4.53, + "grad_norm": 0.9981549382209778, + "learning_rate": 8.82964824120603e-06, + "loss": 0.007, + "step": 12150 + }, + { + "epoch": 4.54, + "grad_norm": 1.0264970064163208, + "learning_rate": 8.82713567839196e-06, + "loss": 0.0064, + "step": 12175 + }, + { + "epoch": 4.55, + "grad_norm": 1.4338140487670898, + "learning_rate": 8.824623115577891e-06, + "loss": 0.0071, + "step": 12200 + }, + { + "epoch": 4.56, + "grad_norm": 1.1356769800186157, + "learning_rate": 8.82211055276382e-06, + "loss": 0.0059, + "step": 12225 + }, + { + "epoch": 4.57, + "grad_norm": 1.246682047843933, + "learning_rate": 8.81959798994975e-06, + "loss": 0.0062, + "step": 12250 + }, + { + "epoch": 4.58, + "grad_norm": 1.1155672073364258, + "learning_rate": 8.817085427135679e-06, + "loss": 0.0062, + "step": 12275 + }, + { + "epoch": 4.59, + "grad_norm": 1.3337323665618896, + "learning_rate": 8.814572864321608e-06, + "loss": 0.007, + "step": 12300 + }, + { + "epoch": 4.6, + "grad_norm": 1.0217392444610596, + "learning_rate": 8.812060301507538e-06, + "loss": 0.007, + "step": 12325 + }, + { + "epoch": 4.6, + "grad_norm": 0.8679639101028442, + "learning_rate": 8.809547738693469e-06, + "loss": 0.0072, + "step": 12350 + }, + { + "epoch": 4.61, + "grad_norm": 0.8417842984199524, + "learning_rate": 8.807035175879398e-06, + "loss": 0.007, + "step": 12375 + }, + { + "epoch": 4.62, + "grad_norm": 1.3473575115203857, + "learning_rate": 8.804522613065327e-06, + "loss": 0.0064, + "step": 12400 + }, + { + "epoch": 4.63, + "grad_norm": 1.4292433261871338, + "learning_rate": 8.802010050251257e-06, + "loss": 0.0068, + "step": 12425 + }, + { + "epoch": 4.64, + "grad_norm": 1.3612487316131592, + "learning_rate": 8.799497487437186e-06, + "loss": 0.0063, + "step": 12450 + }, + { + "epoch": 4.65, + "grad_norm": 0.6271512508392334, + "learning_rate": 8.796984924623117e-06, + "loss": 0.0059, + "step": 12475 + }, + { + "epoch": 4.66, + "grad_norm": 1.135820746421814, + "learning_rate": 8.794472361809046e-06, + "loss": 0.0071, + "step": 12500 + }, + { + "epoch": 4.67, + "grad_norm": 0.8408219814300537, + "learning_rate": 8.791959798994976e-06, + "loss": 0.0057, + "step": 12525 + }, + { + "epoch": 4.68, + "grad_norm": 1.6371828317642212, + "learning_rate": 8.789447236180905e-06, + "loss": 0.0065, + "step": 12550 + }, + { + "epoch": 4.69, + "grad_norm": 0.5912973880767822, + "learning_rate": 8.786934673366834e-06, + "loss": 0.0068, + "step": 12575 + }, + { + "epoch": 4.7, + "grad_norm": 1.5609745979309082, + "learning_rate": 8.784422110552765e-06, + "loss": 0.0074, + "step": 12600 + }, + { + "epoch": 4.71, + "grad_norm": 1.5562773942947388, + "learning_rate": 8.781909547738695e-06, + "loss": 0.0069, + "step": 12625 + }, + { + "epoch": 4.72, + "grad_norm": 0.8265652656555176, + "learning_rate": 8.779396984924624e-06, + "loss": 0.0063, + "step": 12650 + }, + { + "epoch": 4.73, + "grad_norm": 1.3204679489135742, + "learning_rate": 8.776884422110553e-06, + "loss": 0.0064, + "step": 12675 + }, + { + "epoch": 4.74, + "grad_norm": 1.5161316394805908, + "learning_rate": 8.774371859296483e-06, + "loss": 0.0063, + "step": 12700 + }, + { + "epoch": 4.74, + "grad_norm": 0.9623692035675049, + "learning_rate": 8.771859296482412e-06, + "loss": 0.0067, + "step": 12725 + }, + { + "epoch": 4.75, + "grad_norm": 0.7185370326042175, + "learning_rate": 8.769346733668343e-06, + "loss": 0.0061, + "step": 12750 + }, + { + "epoch": 4.76, + "grad_norm": 0.9054931998252869, + "learning_rate": 8.766834170854272e-06, + "loss": 0.0077, + "step": 12775 + }, + { + "epoch": 4.77, + "grad_norm": 2.253526449203491, + "learning_rate": 8.764321608040202e-06, + "loss": 0.0066, + "step": 12800 + }, + { + "epoch": 4.78, + "grad_norm": 0.6722708940505981, + "learning_rate": 8.761809045226131e-06, + "loss": 0.0069, + "step": 12825 + }, + { + "epoch": 4.79, + "grad_norm": 1.226464033126831, + "learning_rate": 8.75929648241206e-06, + "loss": 0.0073, + "step": 12850 + }, + { + "epoch": 4.8, + "grad_norm": 1.2252705097198486, + "learning_rate": 8.756783919597991e-06, + "loss": 0.0081, + "step": 12875 + }, + { + "epoch": 4.81, + "grad_norm": 0.7237691879272461, + "learning_rate": 8.75427135678392e-06, + "loss": 0.0072, + "step": 12900 + }, + { + "epoch": 4.82, + "grad_norm": 0.4789365231990814, + "learning_rate": 8.75175879396985e-06, + "loss": 0.0079, + "step": 12925 + }, + { + "epoch": 4.83, + "grad_norm": 1.2914701700210571, + "learning_rate": 8.74924623115578e-06, + "loss": 0.0056, + "step": 12950 + }, + { + "epoch": 4.84, + "grad_norm": 1.6777392625808716, + "learning_rate": 8.746733668341709e-06, + "loss": 0.0064, + "step": 12975 + }, + { + "epoch": 4.85, + "grad_norm": 0.9470864534378052, + "learning_rate": 8.74422110552764e-06, + "loss": 0.0072, + "step": 13000 + }, + { + "epoch": 4.85, + "eval_loss": 0.11686770617961884, + "eval_runtime": 1383.7887, + "eval_samples_per_second": 1.112, + "eval_steps_per_second": 1.112, + "eval_wer": 11.535764375876578, + "step": 13000 + }, + { + "epoch": 4.86, + "grad_norm": 0.9994572997093201, + "learning_rate": 8.741708542713569e-06, + "loss": 0.0076, + "step": 13025 + }, + { + "epoch": 4.87, + "grad_norm": 1.1132986545562744, + "learning_rate": 8.739195979899498e-06, + "loss": 0.0082, + "step": 13050 + }, + { + "epoch": 4.88, + "grad_norm": 1.0640430450439453, + "learning_rate": 8.736683417085428e-06, + "loss": 0.0061, + "step": 13075 + }, + { + "epoch": 4.88, + "grad_norm": 1.5341507196426392, + "learning_rate": 8.734170854271357e-06, + "loss": 0.0076, + "step": 13100 + }, + { + "epoch": 4.89, + "grad_norm": 0.9299920201301575, + "learning_rate": 8.731658291457286e-06, + "loss": 0.0061, + "step": 13125 + }, + { + "epoch": 4.9, + "grad_norm": 1.2792538404464722, + "learning_rate": 8.729145728643217e-06, + "loss": 0.0062, + "step": 13150 + }, + { + "epoch": 4.91, + "grad_norm": 0.7286988496780396, + "learning_rate": 8.726633165829147e-06, + "loss": 0.0077, + "step": 13175 + }, + { + "epoch": 4.92, + "grad_norm": 1.207006812095642, + "learning_rate": 8.724120603015076e-06, + "loss": 0.0083, + "step": 13200 + }, + { + "epoch": 4.93, + "grad_norm": 1.3205249309539795, + "learning_rate": 8.721608040201007e-06, + "loss": 0.0072, + "step": 13225 + }, + { + "epoch": 4.94, + "grad_norm": 1.924622893333435, + "learning_rate": 8.719095477386934e-06, + "loss": 0.008, + "step": 13250 + }, + { + "epoch": 4.95, + "grad_norm": 1.5854564905166626, + "learning_rate": 8.716582914572866e-06, + "loss": 0.0075, + "step": 13275 + }, + { + "epoch": 4.96, + "grad_norm": 0.9026398062705994, + "learning_rate": 8.714070351758795e-06, + "loss": 0.0058, + "step": 13300 + }, + { + "epoch": 4.97, + "grad_norm": 1.19556725025177, + "learning_rate": 8.711557788944724e-06, + "loss": 0.0065, + "step": 13325 + }, + { + "epoch": 4.98, + "grad_norm": 0.7392479777336121, + "learning_rate": 8.709045226130653e-06, + "loss": 0.0067, + "step": 13350 + }, + { + "epoch": 4.99, + "grad_norm": 0.7667269110679626, + "learning_rate": 8.706532663316584e-06, + "loss": 0.0079, + "step": 13375 + }, + { + "epoch": 5.0, + "grad_norm": 1.5071501731872559, + "learning_rate": 8.704020100502514e-06, + "loss": 0.0071, + "step": 13400 + }, + { + "epoch": 5.01, + "grad_norm": 1.168750524520874, + "learning_rate": 8.701507537688443e-06, + "loss": 0.005, + "step": 13425 + }, + { + "epoch": 5.01, + "grad_norm": 1.172910213470459, + "learning_rate": 8.698994974874372e-06, + "loss": 0.0036, + "step": 13450 + }, + { + "epoch": 5.02, + "grad_norm": 0.5257753133773804, + "learning_rate": 8.696482412060302e-06, + "loss": 0.0032, + "step": 13475 + }, + { + "epoch": 5.03, + "grad_norm": 1.3346056938171387, + "learning_rate": 8.693969849246233e-06, + "loss": 0.0036, + "step": 13500 + }, + { + "epoch": 5.04, + "grad_norm": 0.8845632672309875, + "learning_rate": 8.69145728643216e-06, + "loss": 0.0025, + "step": 13525 + }, + { + "epoch": 5.05, + "grad_norm": 1.1631428003311157, + "learning_rate": 8.688944723618091e-06, + "loss": 0.0026, + "step": 13550 + }, + { + "epoch": 5.06, + "grad_norm": 1.0743290185928345, + "learning_rate": 8.68643216080402e-06, + "loss": 0.0036, + "step": 13575 + }, + { + "epoch": 5.07, + "grad_norm": 1.3920220136642456, + "learning_rate": 8.68391959798995e-06, + "loss": 0.0038, + "step": 13600 + }, + { + "epoch": 5.08, + "grad_norm": 0.4156602919101715, + "learning_rate": 8.681407035175881e-06, + "loss": 0.0033, + "step": 13625 + }, + { + "epoch": 5.09, + "grad_norm": 0.5446797609329224, + "learning_rate": 8.67889447236181e-06, + "loss": 0.0034, + "step": 13650 + }, + { + "epoch": 5.1, + "grad_norm": 1.2357569932937622, + "learning_rate": 8.67638190954774e-06, + "loss": 0.0038, + "step": 13675 + }, + { + "epoch": 5.11, + "grad_norm": 0.9178337454795837, + "learning_rate": 8.673869346733669e-06, + "loss": 0.0037, + "step": 13700 + }, + { + "epoch": 5.12, + "grad_norm": 1.1388508081436157, + "learning_rate": 8.671356783919598e-06, + "loss": 0.004, + "step": 13725 + }, + { + "epoch": 5.13, + "grad_norm": 0.48430079221725464, + "learning_rate": 8.668844221105528e-06, + "loss": 0.0035, + "step": 13750 + }, + { + "epoch": 5.14, + "grad_norm": 0.8630781173706055, + "learning_rate": 8.666331658291459e-06, + "loss": 0.0043, + "step": 13775 + }, + { + "epoch": 5.15, + "grad_norm": 0.8276799917221069, + "learning_rate": 8.663819095477388e-06, + "loss": 0.0038, + "step": 13800 + }, + { + "epoch": 5.15, + "grad_norm": 1.2825649976730347, + "learning_rate": 8.661306532663317e-06, + "loss": 0.0045, + "step": 13825 + }, + { + "epoch": 5.16, + "grad_norm": 1.0889519453048706, + "learning_rate": 8.658793969849247e-06, + "loss": 0.0045, + "step": 13850 + }, + { + "epoch": 5.17, + "grad_norm": 0.5203460454940796, + "learning_rate": 8.656281407035176e-06, + "loss": 0.0036, + "step": 13875 + }, + { + "epoch": 5.18, + "grad_norm": 1.3109155893325806, + "learning_rate": 8.653768844221107e-06, + "loss": 0.0043, + "step": 13900 + }, + { + "epoch": 5.19, + "grad_norm": 1.0895193815231323, + "learning_rate": 8.651256281407036e-06, + "loss": 0.0055, + "step": 13925 + }, + { + "epoch": 5.2, + "grad_norm": 0.8247916102409363, + "learning_rate": 8.648743718592966e-06, + "loss": 0.0036, + "step": 13950 + }, + { + "epoch": 5.21, + "grad_norm": 1.3961807489395142, + "learning_rate": 8.646231155778895e-06, + "loss": 0.004, + "step": 13975 + }, + { + "epoch": 5.22, + "grad_norm": 0.7943711876869202, + "learning_rate": 8.643718592964824e-06, + "loss": 0.0049, + "step": 14000 + }, + { + "epoch": 5.22, + "eval_loss": 0.12259133160114288, + "eval_runtime": 1388.6621, + "eval_samples_per_second": 1.108, + "eval_steps_per_second": 1.108, + "eval_wer": 11.734455352968677, + "step": 14000 + }, + { + "epoch": 5.23, + "grad_norm": 0.3929145336151123, + "learning_rate": 8.641206030150755e-06, + "loss": 0.0041, + "step": 14025 + }, + { + "epoch": 5.24, + "grad_norm": 0.775174081325531, + "learning_rate": 8.638693467336685e-06, + "loss": 0.0042, + "step": 14050 + }, + { + "epoch": 5.25, + "grad_norm": 0.8072852492332458, + "learning_rate": 8.636180904522614e-06, + "loss": 0.004, + "step": 14075 + }, + { + "epoch": 5.26, + "grad_norm": 1.128271222114563, + "learning_rate": 8.633668341708543e-06, + "loss": 0.004, + "step": 14100 + }, + { + "epoch": 5.27, + "grad_norm": 0.6456555128097534, + "learning_rate": 8.631155778894473e-06, + "loss": 0.0034, + "step": 14125 + }, + { + "epoch": 5.28, + "grad_norm": 0.715814471244812, + "learning_rate": 8.628643216080402e-06, + "loss": 0.0044, + "step": 14150 + }, + { + "epoch": 5.29, + "grad_norm": 0.7880674004554749, + "learning_rate": 8.626130653266333e-06, + "loss": 0.0043, + "step": 14175 + }, + { + "epoch": 5.29, + "grad_norm": 0.5693988800048828, + "learning_rate": 8.623618090452262e-06, + "loss": 0.004, + "step": 14200 + }, + { + "epoch": 5.3, + "grad_norm": 0.7927157282829285, + "learning_rate": 8.621105527638192e-06, + "loss": 0.0046, + "step": 14225 + }, + { + "epoch": 5.31, + "grad_norm": 0.4134327471256256, + "learning_rate": 8.618592964824121e-06, + "loss": 0.005, + "step": 14250 + }, + { + "epoch": 5.32, + "grad_norm": 0.6669222116470337, + "learning_rate": 8.61608040201005e-06, + "loss": 0.0039, + "step": 14275 + }, + { + "epoch": 5.33, + "grad_norm": 1.0080457925796509, + "learning_rate": 8.613567839195981e-06, + "loss": 0.0042, + "step": 14300 + }, + { + "epoch": 5.34, + "grad_norm": 1.133906364440918, + "learning_rate": 8.61105527638191e-06, + "loss": 0.0039, + "step": 14325 + }, + { + "epoch": 5.35, + "grad_norm": 2.293273448944092, + "learning_rate": 8.60854271356784e-06, + "loss": 0.0046, + "step": 14350 + }, + { + "epoch": 5.36, + "grad_norm": 0.8966982364654541, + "learning_rate": 8.60603015075377e-06, + "loss": 0.0044, + "step": 14375 + }, + { + "epoch": 5.37, + "grad_norm": 0.7878703474998474, + "learning_rate": 8.603517587939699e-06, + "loss": 0.0033, + "step": 14400 + }, + { + "epoch": 5.38, + "grad_norm": 0.8511309027671814, + "learning_rate": 8.601005025125628e-06, + "loss": 0.0039, + "step": 14425 + }, + { + "epoch": 5.39, + "grad_norm": 2.262268543243408, + "learning_rate": 8.598492462311559e-06, + "loss": 0.0047, + "step": 14450 + }, + { + "epoch": 5.4, + "grad_norm": 0.7046410441398621, + "learning_rate": 8.595979899497488e-06, + "loss": 0.0047, + "step": 14475 + }, + { + "epoch": 5.41, + "grad_norm": 0.9158058166503906, + "learning_rate": 8.593467336683418e-06, + "loss": 0.0043, + "step": 14500 + }, + { + "epoch": 5.42, + "grad_norm": 0.991746723651886, + "learning_rate": 8.590954773869347e-06, + "loss": 0.0044, + "step": 14525 + }, + { + "epoch": 5.43, + "grad_norm": 0.9608747959136963, + "learning_rate": 8.588442211055276e-06, + "loss": 0.0046, + "step": 14550 + }, + { + "epoch": 5.43, + "grad_norm": 0.5906420350074768, + "learning_rate": 8.585929648241207e-06, + "loss": 0.0044, + "step": 14575 + }, + { + "epoch": 5.44, + "grad_norm": 0.747791051864624, + "learning_rate": 8.583417085427137e-06, + "loss": 0.0037, + "step": 14600 + }, + { + "epoch": 5.45, + "grad_norm": 1.2632840871810913, + "learning_rate": 8.580904522613066e-06, + "loss": 0.0048, + "step": 14625 + }, + { + "epoch": 5.46, + "grad_norm": 0.6208354234695435, + "learning_rate": 8.578391959798997e-06, + "loss": 0.0045, + "step": 14650 + }, + { + "epoch": 5.47, + "grad_norm": 0.7109549641609192, + "learning_rate": 8.575879396984925e-06, + "loss": 0.0041, + "step": 14675 + }, + { + "epoch": 5.48, + "grad_norm": 1.7869445085525513, + "learning_rate": 8.573366834170856e-06, + "loss": 0.0044, + "step": 14700 + }, + { + "epoch": 5.49, + "grad_norm": 1.3007566928863525, + "learning_rate": 8.570854271356785e-06, + "loss": 0.0038, + "step": 14725 + }, + { + "epoch": 5.5, + "grad_norm": 2.078573226928711, + "learning_rate": 8.568341708542714e-06, + "loss": 0.0047, + "step": 14750 + }, + { + "epoch": 5.51, + "grad_norm": 0.5940185189247131, + "learning_rate": 8.565829145728644e-06, + "loss": 0.0051, + "step": 14775 + }, + { + "epoch": 5.52, + "grad_norm": 1.274936318397522, + "learning_rate": 8.563417085427135e-06, + "loss": 0.0044, + "step": 14800 + }, + { + "epoch": 5.53, + "grad_norm": 0.6416276097297668, + "learning_rate": 8.560904522613066e-06, + "loss": 0.0044, + "step": 14825 + }, + { + "epoch": 5.54, + "grad_norm": 1.1560136079788208, + "learning_rate": 8.558391959798995e-06, + "loss": 0.0043, + "step": 14850 + }, + { + "epoch": 5.55, + "grad_norm": 1.0217888355255127, + "learning_rate": 8.555879396984925e-06, + "loss": 0.0052, + "step": 14875 + }, + { + "epoch": 5.56, + "grad_norm": 0.8242034316062927, + "learning_rate": 8.553366834170856e-06, + "loss": 0.0046, + "step": 14900 + }, + { + "epoch": 5.56, + "grad_norm": 1.5154880285263062, + "learning_rate": 8.550854271356785e-06, + "loss": 0.0048, + "step": 14925 + }, + { + "epoch": 5.57, + "grad_norm": 0.8583939671516418, + "learning_rate": 8.548341708542714e-06, + "loss": 0.0049, + "step": 14950 + }, + { + "epoch": 5.58, + "grad_norm": 0.8030581474304199, + "learning_rate": 8.545829145728644e-06, + "loss": 0.0046, + "step": 14975 + }, + { + "epoch": 5.59, + "grad_norm": 1.1248334646224976, + "learning_rate": 8.543316582914573e-06, + "loss": 0.0058, + "step": 15000 + }, + { + "epoch": 5.59, + "eval_loss": 0.13044269382953644, + "eval_runtime": 1341.4969, + "eval_samples_per_second": 1.147, + "eval_steps_per_second": 1.147, + "eval_wer": 11.483169705469845, + "step": 15000 + } + ], + "logging_steps": 25, + "max_steps": 100000, + "num_input_tokens_seen": 0, + "num_train_epochs": 38, + "save_steps": 1000, + "total_flos": 1.384863619055616e+20, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/whisper-small/hindi/checkpoint-15000/training_args.bin b/checkpoints/whisper-small/hindi/checkpoint-15000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3e3c89339087a419fb593c1350d33618bfc097d --- /dev/null +++ b/checkpoints/whisper-small/hindi/checkpoint-15000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01247db72c5c0ba93822c208b9e020b3a48498f68c5093adf816fbc1df2fd9c0 +size 4667 diff --git a/checkpoints/whisper-small/kannada/checkpoint-16000/config.json b/checkpoints/whisper-small/kannada/checkpoint-16000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..443d7e310bde8de5c4d242eb7fe45148bc60e671 --- /dev/null +++ b/checkpoints/whisper-small/kannada/checkpoint-16000/config.json @@ -0,0 +1,152 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50306 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50360, + 50361, + 50362 + ], + "torch_dtype": "float32", + "transformers_version": "4.39.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoints/whisper-small/kannada/checkpoint-16000/generation_config.json b/checkpoints/whisper-small/kannada/checkpoint-16000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7e9b1a3e3b5fb8d88730860d2b25f6cd310962c7 --- /dev/null +++ b/checkpoints/whisper-small/kannada/checkpoint-16000/generation_config.json @@ -0,0 +1,264 @@ +{ + "alignment_heads": [ + [ + 5, + 3 + ], + [ + 5, + 9 + ], + [ + 8, + 0 + ], + [ + 8, + 4 + ], + [ + 8, + 7 + ], + [ + 8, + 8 + ], + [ + 9, + 0 + ], + [ + 9, + 7 + ], + [ + 9, + 9 + ], + [ + 10, + 5 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.39.2" +} diff --git a/checkpoints/whisper-small/kannada/checkpoint-16000/model.safetensors b/checkpoints/whisper-small/kannada/checkpoint-16000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c50e32a30b294aef3083a7867891476f7e214ca4 --- /dev/null +++ b/checkpoints/whisper-small/kannada/checkpoint-16000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7de48a02cbbe0c1f954853285b95e792ac651c9b4b9dca17b27e11faa683192c +size 966995080 diff --git a/checkpoints/whisper-small/kannada/checkpoint-16000/optimizer.pt b/checkpoints/whisper-small/kannada/checkpoint-16000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1d052dd1ced0e3f44e124980dcc73ba4790d4ab2 --- /dev/null +++ b/checkpoints/whisper-small/kannada/checkpoint-16000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6559f39940752069b2898952b920c59939976b9d4b590b762ca7faa30b226889 +size 1925063607 diff --git a/checkpoints/whisper-small/kannada/checkpoint-16000/preprocessor_config.json b/checkpoints/whisper-small/kannada/checkpoint-16000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoints/whisper-small/kannada/checkpoint-16000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoints/whisper-small/kannada/checkpoint-16000/rng_state.pth b/checkpoints/whisper-small/kannada/checkpoint-16000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ec34873b56f538bb5f1d33d7e0c7c2ac9867fde3 --- /dev/null +++ b/checkpoints/whisper-small/kannada/checkpoint-16000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2520cde6baab815dbdd43548b073cef8dff411d971b0bbed0184396c22eeb7d7 +size 14575 diff --git a/checkpoints/whisper-small/kannada/checkpoint-16000/scheduler.pt b/checkpoints/whisper-small/kannada/checkpoint-16000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b85689f26af4d2482594d11b6f9861451acc028c --- /dev/null +++ b/checkpoints/whisper-small/kannada/checkpoint-16000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:727c44cc2dfca6bcc704367add7e1b518ee46ed6858801c1ed3468b1905332f2 +size 627 diff --git a/checkpoints/whisper-small/kannada/checkpoint-16000/trainer_state.json b/checkpoints/whisper-small/kannada/checkpoint-16000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..07376557d8c1570f74a83bef9e4cd1cefa297ae6 --- /dev/null +++ b/checkpoints/whisper-small/kannada/checkpoint-16000/trainer_state.json @@ -0,0 +1,4645 @@ +{ + "best_metric": 26.814889674919996, + "best_model_checkpoint": "results/whisper-small/kannada/checkpoint-6000", + "epoch": 5.965697240865026, + "eval_steps": 1000, + "global_step": 16000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "grad_norm": 19.54661750793457, + "learning_rate": 4.6000000000000004e-07, + "loss": 2.2719, + "step": 25 + }, + { + "epoch": 0.02, + "grad_norm": 8.432901382446289, + "learning_rate": 9.600000000000001e-07, + "loss": 2.0086, + "step": 50 + }, + { + "epoch": 0.03, + "grad_norm": 6.714489936828613, + "learning_rate": 1.46e-06, + "loss": 1.7534, + "step": 75 + }, + { + "epoch": 0.04, + "grad_norm": 7.9497175216674805, + "learning_rate": 1.9600000000000003e-06, + "loss": 1.4195, + "step": 100 + }, + { + "epoch": 0.05, + "grad_norm": 6.485223770141602, + "learning_rate": 2.46e-06, + "loss": 0.94, + "step": 125 + }, + { + "epoch": 0.06, + "grad_norm": 5.046996116638184, + "learning_rate": 2.96e-06, + "loss": 0.6742, + "step": 150 + }, + { + "epoch": 0.07, + "grad_norm": 4.568698406219482, + "learning_rate": 3.46e-06, + "loss": 0.5456, + "step": 175 + }, + { + "epoch": 0.07, + "grad_norm": 4.546382904052734, + "learning_rate": 3.96e-06, + "loss": 0.4758, + "step": 200 + }, + { + "epoch": 0.08, + "grad_norm": 4.394645690917969, + "learning_rate": 4.4600000000000005e-06, + "loss": 0.425, + "step": 225 + }, + { + "epoch": 0.09, + "grad_norm": 4.370824337005615, + "learning_rate": 4.960000000000001e-06, + "loss": 0.3924, + "step": 250 + }, + { + "epoch": 0.1, + "grad_norm": 5.9032392501831055, + "learning_rate": 5.460000000000001e-06, + "loss": 0.369, + "step": 275 + }, + { + "epoch": 0.11, + "grad_norm": 4.0830607414245605, + "learning_rate": 5.9600000000000005e-06, + "loss": 0.3595, + "step": 300 + }, + { + "epoch": 0.12, + "grad_norm": 3.7972252368927, + "learning_rate": 6.460000000000001e-06, + "loss": 0.3322, + "step": 325 + }, + { + "epoch": 0.13, + "grad_norm": 4.245180130004883, + "learning_rate": 6.96e-06, + "loss": 0.3171, + "step": 350 + }, + { + "epoch": 0.14, + "grad_norm": 4.390026092529297, + "learning_rate": 7.4600000000000006e-06, + "loss": 0.2875, + "step": 375 + }, + { + "epoch": 0.15, + "grad_norm": 4.736093044281006, + "learning_rate": 7.960000000000002e-06, + "loss": 0.2391, + "step": 400 + }, + { + "epoch": 0.16, + "grad_norm": 3.4611575603485107, + "learning_rate": 8.46e-06, + "loss": 0.2196, + "step": 425 + }, + { + "epoch": 0.17, + "grad_norm": 3.1301281452178955, + "learning_rate": 8.96e-06, + "loss": 0.2144, + "step": 450 + }, + { + "epoch": 0.18, + "grad_norm": 2.9947493076324463, + "learning_rate": 9.460000000000001e-06, + "loss": 0.203, + "step": 475 + }, + { + "epoch": 0.19, + "grad_norm": 3.4344539642333984, + "learning_rate": 9.960000000000001e-06, + "loss": 0.2101, + "step": 500 + }, + { + "epoch": 0.2, + "grad_norm": 2.3570396900177, + "learning_rate": 9.997688442211056e-06, + "loss": 0.1969, + "step": 525 + }, + { + "epoch": 0.21, + "grad_norm": 3.5369765758514404, + "learning_rate": 9.995175879396986e-06, + "loss": 0.1988, + "step": 550 + }, + { + "epoch": 0.21, + "grad_norm": 3.8397271633148193, + "learning_rate": 9.992663316582915e-06, + "loss": 0.1852, + "step": 575 + }, + { + "epoch": 0.22, + "grad_norm": 3.602774143218994, + "learning_rate": 9.990150753768844e-06, + "loss": 0.1808, + "step": 600 + }, + { + "epoch": 0.23, + "grad_norm": 2.9827609062194824, + "learning_rate": 9.987638190954775e-06, + "loss": 0.179, + "step": 625 + }, + { + "epoch": 0.24, + "grad_norm": 2.8767776489257812, + "learning_rate": 9.985125628140705e-06, + "loss": 0.1862, + "step": 650 + }, + { + "epoch": 0.25, + "grad_norm": 2.4987285137176514, + "learning_rate": 9.982613065326634e-06, + "loss": 0.1803, + "step": 675 + }, + { + "epoch": 0.26, + "grad_norm": 3.1002371311187744, + "learning_rate": 9.980100502512565e-06, + "loss": 0.1787, + "step": 700 + }, + { + "epoch": 0.27, + "grad_norm": 3.162116050720215, + "learning_rate": 9.977587939698493e-06, + "loss": 0.1732, + "step": 725 + }, + { + "epoch": 0.28, + "grad_norm": 2.616926908493042, + "learning_rate": 9.975075376884424e-06, + "loss": 0.1658, + "step": 750 + }, + { + "epoch": 0.29, + "grad_norm": 2.8701069355010986, + "learning_rate": 9.972562814070353e-06, + "loss": 0.1599, + "step": 775 + }, + { + "epoch": 0.3, + "grad_norm": 2.451019287109375, + "learning_rate": 9.970050251256282e-06, + "loss": 0.1621, + "step": 800 + }, + { + "epoch": 0.31, + "grad_norm": 2.7227072715759277, + "learning_rate": 9.967537688442212e-06, + "loss": 0.1625, + "step": 825 + }, + { + "epoch": 0.32, + "grad_norm": 2.3797459602355957, + "learning_rate": 9.965025125628141e-06, + "loss": 0.1578, + "step": 850 + }, + { + "epoch": 0.33, + "grad_norm": 2.4450623989105225, + "learning_rate": 9.96251256281407e-06, + "loss": 0.1624, + "step": 875 + }, + { + "epoch": 0.34, + "grad_norm": 1.7471354007720947, + "learning_rate": 9.960000000000001e-06, + "loss": 0.1583, + "step": 900 + }, + { + "epoch": 0.34, + "grad_norm": 2.4757730960845947, + "learning_rate": 9.95748743718593e-06, + "loss": 0.1524, + "step": 925 + }, + { + "epoch": 0.35, + "grad_norm": 2.065958023071289, + "learning_rate": 9.95497487437186e-06, + "loss": 0.1477, + "step": 950 + }, + { + "epoch": 0.36, + "grad_norm": 2.376991033554077, + "learning_rate": 9.952462311557791e-06, + "loss": 0.1481, + "step": 975 + }, + { + "epoch": 0.37, + "grad_norm": 2.0526649951934814, + "learning_rate": 9.949949748743718e-06, + "loss": 0.148, + "step": 1000 + }, + { + "epoch": 0.37, + "eval_loss": 0.10108648240566254, + "eval_runtime": 2207.6331, + "eval_samples_per_second": 0.648, + "eval_steps_per_second": 0.648, + "eval_wer": 35.32086912582112, + "step": 1000 + }, + { + "epoch": 0.38, + "grad_norm": 2.077599287033081, + "learning_rate": 9.94743718592965e-06, + "loss": 0.1465, + "step": 1025 + }, + { + "epoch": 0.39, + "grad_norm": 2.1805808544158936, + "learning_rate": 9.944924623115579e-06, + "loss": 0.1487, + "step": 1050 + }, + { + "epoch": 0.4, + "grad_norm": 2.461416721343994, + "learning_rate": 9.942412060301508e-06, + "loss": 0.1513, + "step": 1075 + }, + { + "epoch": 0.41, + "grad_norm": 2.7310924530029297, + "learning_rate": 9.93989949748744e-06, + "loss": 0.1477, + "step": 1100 + }, + { + "epoch": 0.42, + "grad_norm": 2.1721439361572266, + "learning_rate": 9.937386934673367e-06, + "loss": 0.1427, + "step": 1125 + }, + { + "epoch": 0.43, + "grad_norm": 1.9372196197509766, + "learning_rate": 9.934874371859298e-06, + "loss": 0.1393, + "step": 1150 + }, + { + "epoch": 0.44, + "grad_norm": 2.3955681324005127, + "learning_rate": 9.932361809045227e-06, + "loss": 0.1417, + "step": 1175 + }, + { + "epoch": 0.45, + "grad_norm": 2.336836338043213, + "learning_rate": 9.929849246231156e-06, + "loss": 0.1387, + "step": 1200 + }, + { + "epoch": 0.46, + "grad_norm": 1.8596028089523315, + "learning_rate": 9.927336683417086e-06, + "loss": 0.137, + "step": 1225 + }, + { + "epoch": 0.47, + "grad_norm": 2.0576162338256836, + "learning_rate": 9.924824120603017e-06, + "loss": 0.1341, + "step": 1250 + }, + { + "epoch": 0.48, + "grad_norm": 1.6474709510803223, + "learning_rate": 9.922311557788944e-06, + "loss": 0.1361, + "step": 1275 + }, + { + "epoch": 0.48, + "grad_norm": 2.3481051921844482, + "learning_rate": 9.919798994974875e-06, + "loss": 0.1291, + "step": 1300 + }, + { + "epoch": 0.49, + "grad_norm": 2.3562281131744385, + "learning_rate": 9.917286432160805e-06, + "loss": 0.1348, + "step": 1325 + }, + { + "epoch": 0.5, + "grad_norm": 2.0347254276275635, + "learning_rate": 9.914773869346734e-06, + "loss": 0.1301, + "step": 1350 + }, + { + "epoch": 0.51, + "grad_norm": 1.6649969816207886, + "learning_rate": 9.912261306532665e-06, + "loss": 0.1345, + "step": 1375 + }, + { + "epoch": 0.52, + "grad_norm": 1.5177819728851318, + "learning_rate": 9.909748743718593e-06, + "loss": 0.1264, + "step": 1400 + }, + { + "epoch": 0.53, + "grad_norm": 1.9148095846176147, + "learning_rate": 9.907236180904524e-06, + "loss": 0.1307, + "step": 1425 + }, + { + "epoch": 0.54, + "grad_norm": 2.0510241985321045, + "learning_rate": 9.904723618090453e-06, + "loss": 0.1345, + "step": 1450 + }, + { + "epoch": 0.55, + "grad_norm": 1.8996473550796509, + "learning_rate": 9.902211055276382e-06, + "loss": 0.1231, + "step": 1475 + }, + { + "epoch": 0.56, + "grad_norm": 1.995151400566101, + "learning_rate": 9.899698492462312e-06, + "loss": 0.1335, + "step": 1500 + }, + { + "epoch": 0.57, + "grad_norm": 2.5189664363861084, + "learning_rate": 9.897185929648243e-06, + "loss": 0.1269, + "step": 1525 + }, + { + "epoch": 0.58, + "grad_norm": 1.9099814891815186, + "learning_rate": 9.894673366834172e-06, + "loss": 0.1267, + "step": 1550 + }, + { + "epoch": 0.59, + "grad_norm": 2.036682605743408, + "learning_rate": 9.892160804020101e-06, + "loss": 0.1182, + "step": 1575 + }, + { + "epoch": 0.6, + "grad_norm": 1.6200529336929321, + "learning_rate": 9.88964824120603e-06, + "loss": 0.1249, + "step": 1600 + }, + { + "epoch": 0.61, + "grad_norm": 1.5812511444091797, + "learning_rate": 9.88713567839196e-06, + "loss": 0.1186, + "step": 1625 + }, + { + "epoch": 0.62, + "grad_norm": 1.9737355709075928, + "learning_rate": 9.884623115577891e-06, + "loss": 0.1294, + "step": 1650 + }, + { + "epoch": 0.62, + "grad_norm": 2.052192449569702, + "learning_rate": 9.882110552763819e-06, + "loss": 0.1269, + "step": 1675 + }, + { + "epoch": 0.63, + "grad_norm": 1.9128429889678955, + "learning_rate": 9.87959798994975e-06, + "loss": 0.1185, + "step": 1700 + }, + { + "epoch": 0.64, + "grad_norm": 1.6939224004745483, + "learning_rate": 9.877085427135679e-06, + "loss": 0.1173, + "step": 1725 + }, + { + "epoch": 0.65, + "grad_norm": 1.7194849252700806, + "learning_rate": 9.874572864321608e-06, + "loss": 0.1148, + "step": 1750 + }, + { + "epoch": 0.66, + "grad_norm": 1.8419278860092163, + "learning_rate": 9.87206030150754e-06, + "loss": 0.1219, + "step": 1775 + }, + { + "epoch": 0.67, + "grad_norm": 2.1548714637756348, + "learning_rate": 9.869547738693469e-06, + "loss": 0.1203, + "step": 1800 + }, + { + "epoch": 0.68, + "grad_norm": 1.3571678400039673, + "learning_rate": 9.867035175879398e-06, + "loss": 0.1206, + "step": 1825 + }, + { + "epoch": 0.69, + "grad_norm": 2.4942431449890137, + "learning_rate": 9.864522613065327e-06, + "loss": 0.1187, + "step": 1850 + }, + { + "epoch": 0.7, + "grad_norm": 2.081808567047119, + "learning_rate": 9.862010050251257e-06, + "loss": 0.1156, + "step": 1875 + }, + { + "epoch": 0.71, + "grad_norm": 1.7759159803390503, + "learning_rate": 9.859497487437186e-06, + "loss": 0.1187, + "step": 1900 + }, + { + "epoch": 0.72, + "grad_norm": 1.7987964153289795, + "learning_rate": 9.856984924623117e-06, + "loss": 0.1142, + "step": 1925 + }, + { + "epoch": 0.73, + "grad_norm": 2.140822410583496, + "learning_rate": 9.854472361809046e-06, + "loss": 0.1119, + "step": 1950 + }, + { + "epoch": 0.74, + "grad_norm": 1.8687655925750732, + "learning_rate": 9.851959798994976e-06, + "loss": 0.1161, + "step": 1975 + }, + { + "epoch": 0.75, + "grad_norm": 1.6021441221237183, + "learning_rate": 9.849447236180905e-06, + "loss": 0.1177, + "step": 2000 + }, + { + "epoch": 0.75, + "eval_loss": 0.08234985172748566, + "eval_runtime": 2080.4705, + "eval_samples_per_second": 0.687, + "eval_steps_per_second": 0.687, + "eval_wer": 29.231935320869123, + "step": 2000 + }, + { + "epoch": 0.76, + "grad_norm": 2.1560757160186768, + "learning_rate": 9.846934673366834e-06, + "loss": 0.1122, + "step": 2025 + }, + { + "epoch": 0.76, + "grad_norm": 1.7129706144332886, + "learning_rate": 9.844422110552765e-06, + "loss": 0.1126, + "step": 2050 + }, + { + "epoch": 0.77, + "grad_norm": 1.7894198894500732, + "learning_rate": 9.841909547738695e-06, + "loss": 0.1151, + "step": 2075 + }, + { + "epoch": 0.78, + "grad_norm": 1.6729037761688232, + "learning_rate": 9.839396984924624e-06, + "loss": 0.1164, + "step": 2100 + }, + { + "epoch": 0.79, + "grad_norm": 2.048466920852661, + "learning_rate": 9.836884422110553e-06, + "loss": 0.1156, + "step": 2125 + }, + { + "epoch": 0.8, + "grad_norm": 2.4437124729156494, + "learning_rate": 9.834371859296483e-06, + "loss": 0.1123, + "step": 2150 + }, + { + "epoch": 0.81, + "grad_norm": 1.8519604206085205, + "learning_rate": 9.831859296482414e-06, + "loss": 0.1128, + "step": 2175 + }, + { + "epoch": 0.82, + "grad_norm": 1.8417565822601318, + "learning_rate": 9.829346733668343e-06, + "loss": 0.107, + "step": 2200 + }, + { + "epoch": 0.83, + "grad_norm": 1.7907986640930176, + "learning_rate": 9.826834170854272e-06, + "loss": 0.1127, + "step": 2225 + }, + { + "epoch": 0.84, + "grad_norm": 1.9644109010696411, + "learning_rate": 9.824321608040202e-06, + "loss": 0.1091, + "step": 2250 + }, + { + "epoch": 0.85, + "grad_norm": 1.648077368736267, + "learning_rate": 9.821809045226131e-06, + "loss": 0.1079, + "step": 2275 + }, + { + "epoch": 0.86, + "grad_norm": 1.745536208152771, + "learning_rate": 9.81929648241206e-06, + "loss": 0.1079, + "step": 2300 + }, + { + "epoch": 0.87, + "grad_norm": 1.887178659439087, + "learning_rate": 9.816783919597991e-06, + "loss": 0.1132, + "step": 2325 + }, + { + "epoch": 0.88, + "grad_norm": 1.9075114727020264, + "learning_rate": 9.81427135678392e-06, + "loss": 0.1079, + "step": 2350 + }, + { + "epoch": 0.89, + "grad_norm": 2.1346499919891357, + "learning_rate": 9.81175879396985e-06, + "loss": 0.1048, + "step": 2375 + }, + { + "epoch": 0.89, + "grad_norm": 1.8460966348648071, + "learning_rate": 9.809246231155781e-06, + "loss": 0.1057, + "step": 2400 + }, + { + "epoch": 0.9, + "grad_norm": 2.111588478088379, + "learning_rate": 9.806733668341709e-06, + "loss": 0.1029, + "step": 2425 + }, + { + "epoch": 0.91, + "grad_norm": 1.6844435930252075, + "learning_rate": 9.80422110552764e-06, + "loss": 0.1084, + "step": 2450 + }, + { + "epoch": 0.92, + "grad_norm": 1.4896743297576904, + "learning_rate": 9.801708542713569e-06, + "loss": 0.1086, + "step": 2475 + }, + { + "epoch": 0.93, + "grad_norm": 2.183370351791382, + "learning_rate": 9.799195979899498e-06, + "loss": 0.112, + "step": 2500 + }, + { + "epoch": 0.94, + "grad_norm": 1.516802191734314, + "learning_rate": 9.796683417085428e-06, + "loss": 0.1013, + "step": 2525 + }, + { + "epoch": 0.95, + "grad_norm": 2.0089340209960938, + "learning_rate": 9.794170854271357e-06, + "loss": 0.1075, + "step": 2550 + }, + { + "epoch": 0.96, + "grad_norm": 1.6168724298477173, + "learning_rate": 9.791658291457288e-06, + "loss": 0.1047, + "step": 2575 + }, + { + "epoch": 0.97, + "grad_norm": 1.8499126434326172, + "learning_rate": 9.789145728643217e-06, + "loss": 0.1054, + "step": 2600 + }, + { + "epoch": 0.98, + "grad_norm": 1.8129630088806152, + "learning_rate": 9.786633165829147e-06, + "loss": 0.1073, + "step": 2625 + }, + { + "epoch": 0.99, + "grad_norm": 1.6130260229110718, + "learning_rate": 9.784120603015076e-06, + "loss": 0.1053, + "step": 2650 + }, + { + "epoch": 1.0, + "grad_norm": 1.805712342262268, + "learning_rate": 9.781608040201007e-06, + "loss": 0.1016, + "step": 2675 + }, + { + "epoch": 1.01, + "grad_norm": 1.7830971479415894, + "learning_rate": 9.779095477386934e-06, + "loss": 0.0903, + "step": 2700 + }, + { + "epoch": 1.02, + "grad_norm": 1.607303500175476, + "learning_rate": 9.776582914572866e-06, + "loss": 0.084, + "step": 2725 + }, + { + "epoch": 1.03, + "grad_norm": 1.420632243156433, + "learning_rate": 9.774070351758795e-06, + "loss": 0.0823, + "step": 2750 + }, + { + "epoch": 1.03, + "grad_norm": 1.438372254371643, + "learning_rate": 9.771557788944724e-06, + "loss": 0.087, + "step": 2775 + }, + { + "epoch": 1.04, + "grad_norm": 2.064958095550537, + "learning_rate": 9.769045226130655e-06, + "loss": 0.0875, + "step": 2800 + }, + { + "epoch": 1.05, + "grad_norm": 1.52982759475708, + "learning_rate": 9.766532663316583e-06, + "loss": 0.0881, + "step": 2825 + }, + { + "epoch": 1.06, + "grad_norm": 1.8631987571716309, + "learning_rate": 9.764020100502514e-06, + "loss": 0.0879, + "step": 2850 + }, + { + "epoch": 1.07, + "grad_norm": 1.5333583354949951, + "learning_rate": 9.761507537688443e-06, + "loss": 0.0846, + "step": 2875 + }, + { + "epoch": 1.08, + "grad_norm": 1.4840537309646606, + "learning_rate": 9.758994974874372e-06, + "loss": 0.087, + "step": 2900 + }, + { + "epoch": 1.09, + "grad_norm": 1.5765411853790283, + "learning_rate": 9.756482412060302e-06, + "loss": 0.0825, + "step": 2925 + }, + { + "epoch": 1.1, + "grad_norm": 2.056797981262207, + "learning_rate": 9.753969849246233e-06, + "loss": 0.0819, + "step": 2950 + }, + { + "epoch": 1.11, + "grad_norm": 1.4179672002792358, + "learning_rate": 9.75145728643216e-06, + "loss": 0.0832, + "step": 2975 + }, + { + "epoch": 1.12, + "grad_norm": 1.7067224979400635, + "learning_rate": 9.748944723618091e-06, + "loss": 0.0819, + "step": 3000 + }, + { + "epoch": 1.12, + "eval_loss": 0.07525772601366043, + "eval_runtime": 2548.8313, + "eval_samples_per_second": 0.561, + "eval_steps_per_second": 0.561, + "eval_wer": 27.985514569647968, + "step": 3000 + }, + { + "epoch": 1.13, + "grad_norm": 2.2775192260742188, + "learning_rate": 9.74643216080402e-06, + "loss": 0.0827, + "step": 3025 + }, + { + "epoch": 1.14, + "grad_norm": 1.7249153852462769, + "learning_rate": 9.74391959798995e-06, + "loss": 0.0834, + "step": 3050 + }, + { + "epoch": 1.15, + "grad_norm": 1.5962520837783813, + "learning_rate": 9.741407035175881e-06, + "loss": 0.0849, + "step": 3075 + }, + { + "epoch": 1.16, + "grad_norm": 1.49488365650177, + "learning_rate": 9.738894472361809e-06, + "loss": 0.0858, + "step": 3100 + }, + { + "epoch": 1.17, + "grad_norm": 1.618238925933838, + "learning_rate": 9.73638190954774e-06, + "loss": 0.0866, + "step": 3125 + }, + { + "epoch": 1.17, + "grad_norm": 1.5865789651870728, + "learning_rate": 9.733869346733669e-06, + "loss": 0.0844, + "step": 3150 + }, + { + "epoch": 1.18, + "grad_norm": 1.9913901090621948, + "learning_rate": 9.731356783919598e-06, + "loss": 0.0826, + "step": 3175 + }, + { + "epoch": 1.19, + "grad_norm": 1.913456916809082, + "learning_rate": 9.72884422110553e-06, + "loss": 0.0843, + "step": 3200 + }, + { + "epoch": 1.2, + "grad_norm": 1.6997488737106323, + "learning_rate": 9.726331658291459e-06, + "loss": 0.0821, + "step": 3225 + }, + { + "epoch": 1.21, + "grad_norm": 2.0870800018310547, + "learning_rate": 9.723819095477388e-06, + "loss": 0.0838, + "step": 3250 + }, + { + "epoch": 1.22, + "grad_norm": 2.2650699615478516, + "learning_rate": 9.721306532663317e-06, + "loss": 0.0845, + "step": 3275 + }, + { + "epoch": 1.23, + "grad_norm": 1.4418113231658936, + "learning_rate": 9.718793969849247e-06, + "loss": 0.0821, + "step": 3300 + }, + { + "epoch": 1.24, + "grad_norm": 1.711054801940918, + "learning_rate": 9.716281407035176e-06, + "loss": 0.0856, + "step": 3325 + }, + { + "epoch": 1.25, + "grad_norm": 1.4169772863388062, + "learning_rate": 9.713768844221107e-06, + "loss": 0.0769, + "step": 3350 + }, + { + "epoch": 1.26, + "grad_norm": 1.403168797492981, + "learning_rate": 9.711256281407035e-06, + "loss": 0.0794, + "step": 3375 + }, + { + "epoch": 1.27, + "grad_norm": 1.6217478513717651, + "learning_rate": 9.708743718592966e-06, + "loss": 0.0791, + "step": 3400 + }, + { + "epoch": 1.28, + "grad_norm": 1.4793338775634766, + "learning_rate": 9.706231155778895e-06, + "loss": 0.0771, + "step": 3425 + }, + { + "epoch": 1.29, + "grad_norm": 1.977168083190918, + "learning_rate": 9.703718592964824e-06, + "loss": 0.0805, + "step": 3450 + }, + { + "epoch": 1.3, + "grad_norm": 1.5506858825683594, + "learning_rate": 9.701206030150755e-06, + "loss": 0.0831, + "step": 3475 + }, + { + "epoch": 1.3, + "grad_norm": 1.2950645685195923, + "learning_rate": 9.698693467336685e-06, + "loss": 0.0798, + "step": 3500 + }, + { + "epoch": 1.31, + "grad_norm": 1.6253548860549927, + "learning_rate": 9.696180904522614e-06, + "loss": 0.0777, + "step": 3525 + }, + { + "epoch": 1.32, + "grad_norm": 1.7112475633621216, + "learning_rate": 9.693668341708543e-06, + "loss": 0.0856, + "step": 3550 + }, + { + "epoch": 1.33, + "grad_norm": 1.2717041969299316, + "learning_rate": 9.691155778894473e-06, + "loss": 0.0819, + "step": 3575 + }, + { + "epoch": 1.34, + "grad_norm": 1.477927803993225, + "learning_rate": 9.688643216080402e-06, + "loss": 0.0794, + "step": 3600 + }, + { + "epoch": 1.35, + "grad_norm": 1.5506901741027832, + "learning_rate": 9.686130653266333e-06, + "loss": 0.0831, + "step": 3625 + }, + { + "epoch": 1.36, + "grad_norm": 1.7045730352401733, + "learning_rate": 9.683618090452262e-06, + "loss": 0.0795, + "step": 3650 + }, + { + "epoch": 1.37, + "grad_norm": 1.914917230606079, + "learning_rate": 9.681105527638192e-06, + "loss": 0.0837, + "step": 3675 + }, + { + "epoch": 1.38, + "grad_norm": 1.6002886295318604, + "learning_rate": 9.678592964824121e-06, + "loss": 0.0769, + "step": 3700 + }, + { + "epoch": 1.39, + "grad_norm": 1.6318720579147339, + "learning_rate": 9.67608040201005e-06, + "loss": 0.0807, + "step": 3725 + }, + { + "epoch": 1.4, + "grad_norm": 1.5081806182861328, + "learning_rate": 9.673567839195981e-06, + "loss": 0.0779, + "step": 3750 + }, + { + "epoch": 1.41, + "grad_norm": 1.889849066734314, + "learning_rate": 9.67105527638191e-06, + "loss": 0.0825, + "step": 3775 + }, + { + "epoch": 1.42, + "grad_norm": 1.6444274187088013, + "learning_rate": 9.66854271356784e-06, + "loss": 0.0801, + "step": 3800 + }, + { + "epoch": 1.43, + "grad_norm": 1.6678863763809204, + "learning_rate": 9.666030150753771e-06, + "loss": 0.0782, + "step": 3825 + }, + { + "epoch": 1.44, + "grad_norm": 1.569734811782837, + "learning_rate": 9.663517587939699e-06, + "loss": 0.0774, + "step": 3850 + }, + { + "epoch": 1.44, + "grad_norm": 1.6846463680267334, + "learning_rate": 9.66100502512563e-06, + "loss": 0.0781, + "step": 3875 + }, + { + "epoch": 1.45, + "grad_norm": 1.6751489639282227, + "learning_rate": 9.658492462311559e-06, + "loss": 0.0812, + "step": 3900 + }, + { + "epoch": 1.46, + "grad_norm": 1.1891255378723145, + "learning_rate": 9.655979899497488e-06, + "loss": 0.0789, + "step": 3925 + }, + { + "epoch": 1.47, + "grad_norm": 1.2462875843048096, + "learning_rate": 9.653467336683418e-06, + "loss": 0.0758, + "step": 3950 + }, + { + "epoch": 1.48, + "grad_norm": 1.4290119409561157, + "learning_rate": 9.650954773869347e-06, + "loss": 0.0742, + "step": 3975 + }, + { + "epoch": 1.49, + "grad_norm": 1.4148045778274536, + "learning_rate": 9.648442211055276e-06, + "loss": 0.075, + "step": 4000 + }, + { + "epoch": 1.49, + "eval_loss": 0.07192394882440567, + "eval_runtime": 2538.4971, + "eval_samples_per_second": 0.563, + "eval_steps_per_second": 0.563, + "eval_wer": 27.101229577227553, + "step": 4000 + }, + { + "epoch": 1.5, + "grad_norm": 1.1804524660110474, + "learning_rate": 9.645929648241207e-06, + "loss": 0.0773, + "step": 4025 + }, + { + "epoch": 1.51, + "grad_norm": 1.6045362949371338, + "learning_rate": 9.643417085427137e-06, + "loss": 0.0779, + "step": 4050 + }, + { + "epoch": 1.52, + "grad_norm": 1.5315287113189697, + "learning_rate": 9.640904522613066e-06, + "loss": 0.0742, + "step": 4075 + }, + { + "epoch": 1.53, + "grad_norm": 1.2856786251068115, + "learning_rate": 9.638391959798997e-06, + "loss": 0.0747, + "step": 4100 + }, + { + "epoch": 1.54, + "grad_norm": 1.7166868448257446, + "learning_rate": 9.635879396984925e-06, + "loss": 0.0715, + "step": 4125 + }, + { + "epoch": 1.55, + "grad_norm": 1.4262725114822388, + "learning_rate": 9.633366834170856e-06, + "loss": 0.0765, + "step": 4150 + }, + { + "epoch": 1.56, + "grad_norm": 1.4135369062423706, + "learning_rate": 9.630854271356785e-06, + "loss": 0.0791, + "step": 4175 + }, + { + "epoch": 1.57, + "grad_norm": 1.4433858394622803, + "learning_rate": 9.628341708542714e-06, + "loss": 0.0737, + "step": 4200 + }, + { + "epoch": 1.58, + "grad_norm": 1.856028437614441, + "learning_rate": 9.625829145728644e-06, + "loss": 0.0764, + "step": 4225 + }, + { + "epoch": 1.58, + "grad_norm": 1.7072802782058716, + "learning_rate": 9.623316582914573e-06, + "loss": 0.0742, + "step": 4250 + }, + { + "epoch": 1.59, + "grad_norm": 1.7220603227615356, + "learning_rate": 9.620804020100504e-06, + "loss": 0.0738, + "step": 4275 + }, + { + "epoch": 1.6, + "grad_norm": 1.5442018508911133, + "learning_rate": 9.618291457286433e-06, + "loss": 0.0766, + "step": 4300 + }, + { + "epoch": 1.61, + "grad_norm": 1.3857547044754028, + "learning_rate": 9.615778894472363e-06, + "loss": 0.0763, + "step": 4325 + }, + { + "epoch": 1.62, + "grad_norm": 1.442107081413269, + "learning_rate": 9.613266331658292e-06, + "loss": 0.0748, + "step": 4350 + }, + { + "epoch": 1.63, + "grad_norm": 1.5021737813949585, + "learning_rate": 9.610753768844223e-06, + "loss": 0.0806, + "step": 4375 + }, + { + "epoch": 1.64, + "grad_norm": 1.7725205421447754, + "learning_rate": 9.60824120603015e-06, + "loss": 0.0783, + "step": 4400 + }, + { + "epoch": 1.65, + "grad_norm": 1.302195429801941, + "learning_rate": 9.605728643216082e-06, + "loss": 0.0772, + "step": 4425 + }, + { + "epoch": 1.66, + "grad_norm": 1.4408564567565918, + "learning_rate": 9.60321608040201e-06, + "loss": 0.0717, + "step": 4450 + }, + { + "epoch": 1.67, + "grad_norm": 1.348891019821167, + "learning_rate": 9.60070351758794e-06, + "loss": 0.0763, + "step": 4475 + }, + { + "epoch": 1.68, + "grad_norm": 1.7258626222610474, + "learning_rate": 9.598190954773871e-06, + "loss": 0.0763, + "step": 4500 + }, + { + "epoch": 1.69, + "grad_norm": 1.4240843057632446, + "learning_rate": 9.595678391959799e-06, + "loss": 0.0714, + "step": 4525 + }, + { + "epoch": 1.7, + "grad_norm": 1.5008916854858398, + "learning_rate": 9.59316582914573e-06, + "loss": 0.0748, + "step": 4550 + }, + { + "epoch": 1.71, + "grad_norm": 1.4647960662841797, + "learning_rate": 9.59065326633166e-06, + "loss": 0.0699, + "step": 4575 + }, + { + "epoch": 1.72, + "grad_norm": 1.4205151796340942, + "learning_rate": 9.588140703517588e-06, + "loss": 0.076, + "step": 4600 + }, + { + "epoch": 1.72, + "grad_norm": 2.113665819168091, + "learning_rate": 9.585628140703518e-06, + "loss": 0.0704, + "step": 4625 + }, + { + "epoch": 1.73, + "grad_norm": 1.5110183954238892, + "learning_rate": 9.583115577889449e-06, + "loss": 0.0706, + "step": 4650 + }, + { + "epoch": 1.74, + "grad_norm": 1.6676175594329834, + "learning_rate": 9.580603015075378e-06, + "loss": 0.0739, + "step": 4675 + }, + { + "epoch": 1.75, + "grad_norm": 1.5836423635482788, + "learning_rate": 9.578090452261307e-06, + "loss": 0.0664, + "step": 4700 + }, + { + "epoch": 1.76, + "grad_norm": 1.5296155214309692, + "learning_rate": 9.575577889447237e-06, + "loss": 0.0746, + "step": 4725 + }, + { + "epoch": 1.77, + "grad_norm": 1.4183201789855957, + "learning_rate": 9.573065326633166e-06, + "loss": 0.0707, + "step": 4750 + }, + { + "epoch": 1.78, + "grad_norm": 2.2198328971862793, + "learning_rate": 9.570552763819097e-06, + "loss": 0.0742, + "step": 4775 + }, + { + "epoch": 1.79, + "grad_norm": 1.6951853036880493, + "learning_rate": 9.568040201005025e-06, + "loss": 0.0693, + "step": 4800 + }, + { + "epoch": 1.8, + "grad_norm": 1.451962947845459, + "learning_rate": 9.565527638190956e-06, + "loss": 0.0733, + "step": 4825 + }, + { + "epoch": 1.81, + "grad_norm": 1.2215192317962646, + "learning_rate": 9.563015075376885e-06, + "loss": 0.0727, + "step": 4850 + }, + { + "epoch": 1.82, + "grad_norm": 1.4376420974731445, + "learning_rate": 9.560502512562814e-06, + "loss": 0.0705, + "step": 4875 + }, + { + "epoch": 1.83, + "grad_norm": 1.842991590499878, + "learning_rate": 9.557989949748745e-06, + "loss": 0.0662, + "step": 4900 + }, + { + "epoch": 1.84, + "grad_norm": 1.3064733743667603, + "learning_rate": 9.555477386934675e-06, + "loss": 0.0719, + "step": 4925 + }, + { + "epoch": 1.85, + "grad_norm": 1.3885329961776733, + "learning_rate": 9.552964824120604e-06, + "loss": 0.0681, + "step": 4950 + }, + { + "epoch": 1.85, + "grad_norm": 1.4466100931167603, + "learning_rate": 9.550452261306533e-06, + "loss": 0.0712, + "step": 4975 + }, + { + "epoch": 1.86, + "grad_norm": 1.4837807416915894, + "learning_rate": 9.547939698492463e-06, + "loss": 0.0708, + "step": 5000 + }, + { + "epoch": 1.86, + "eval_loss": 0.07066036760807037, + "eval_runtime": 2053.122, + "eval_samples_per_second": 0.697, + "eval_steps_per_second": 0.697, + "eval_wer": 26.974903149738928, + "step": 5000 + }, + { + "epoch": 1.87, + "grad_norm": 1.3302252292633057, + "learning_rate": 9.545427135678392e-06, + "loss": 0.07, + "step": 5025 + }, + { + "epoch": 1.88, + "grad_norm": 1.3409128189086914, + "learning_rate": 9.542914572864323e-06, + "loss": 0.0662, + "step": 5050 + }, + { + "epoch": 1.89, + "grad_norm": 1.5259170532226562, + "learning_rate": 9.540402010050252e-06, + "loss": 0.072, + "step": 5075 + }, + { + "epoch": 1.9, + "grad_norm": 1.7194525003433228, + "learning_rate": 9.537889447236182e-06, + "loss": 0.0736, + "step": 5100 + }, + { + "epoch": 1.91, + "grad_norm": 1.5715749263763428, + "learning_rate": 9.535376884422111e-06, + "loss": 0.072, + "step": 5125 + }, + { + "epoch": 1.92, + "grad_norm": 1.4237885475158691, + "learning_rate": 9.53286432160804e-06, + "loss": 0.0698, + "step": 5150 + }, + { + "epoch": 1.93, + "grad_norm": 1.629292607307434, + "learning_rate": 9.530351758793971e-06, + "loss": 0.0695, + "step": 5175 + }, + { + "epoch": 1.94, + "grad_norm": 1.6069539785385132, + "learning_rate": 9.5278391959799e-06, + "loss": 0.0682, + "step": 5200 + }, + { + "epoch": 1.95, + "grad_norm": 1.3231395483016968, + "learning_rate": 9.52532663316583e-06, + "loss": 0.0692, + "step": 5225 + }, + { + "epoch": 1.96, + "grad_norm": 1.516494631767273, + "learning_rate": 9.52281407035176e-06, + "loss": 0.071, + "step": 5250 + }, + { + "epoch": 1.97, + "grad_norm": 1.3763819932937622, + "learning_rate": 9.520301507537689e-06, + "loss": 0.0694, + "step": 5275 + }, + { + "epoch": 1.98, + "grad_norm": 1.843558430671692, + "learning_rate": 9.51778894472362e-06, + "loss": 0.0685, + "step": 5300 + }, + { + "epoch": 1.99, + "grad_norm": 1.553704023361206, + "learning_rate": 9.515276381909549e-06, + "loss": 0.0718, + "step": 5325 + }, + { + "epoch": 1.99, + "grad_norm": 1.6482255458831787, + "learning_rate": 9.512763819095478e-06, + "loss": 0.0745, + "step": 5350 + }, + { + "epoch": 2.0, + "grad_norm": 1.3200596570968628, + "learning_rate": 9.510251256281408e-06, + "loss": 0.0602, + "step": 5375 + }, + { + "epoch": 2.01, + "grad_norm": 1.1243267059326172, + "learning_rate": 9.507738693467337e-06, + "loss": 0.0506, + "step": 5400 + }, + { + "epoch": 2.02, + "grad_norm": 1.385810375213623, + "learning_rate": 9.505226130653266e-06, + "loss": 0.0498, + "step": 5425 + }, + { + "epoch": 2.03, + "grad_norm": 1.539893627166748, + "learning_rate": 9.502713567839197e-06, + "loss": 0.0453, + "step": 5450 + }, + { + "epoch": 2.04, + "grad_norm": 1.29123854637146, + "learning_rate": 9.500201005025127e-06, + "loss": 0.0508, + "step": 5475 + }, + { + "epoch": 2.05, + "grad_norm": 1.6902750730514526, + "learning_rate": 9.497688442211056e-06, + "loss": 0.0474, + "step": 5500 + }, + { + "epoch": 2.06, + "grad_norm": 1.492300033569336, + "learning_rate": 9.495175879396987e-06, + "loss": 0.0474, + "step": 5525 + }, + { + "epoch": 2.07, + "grad_norm": 1.174280047416687, + "learning_rate": 9.492663316582915e-06, + "loss": 0.0495, + "step": 5550 + }, + { + "epoch": 2.08, + "grad_norm": 1.428352952003479, + "learning_rate": 9.490150753768846e-06, + "loss": 0.0466, + "step": 5575 + }, + { + "epoch": 2.09, + "grad_norm": 1.340778112411499, + "learning_rate": 9.487638190954775e-06, + "loss": 0.0477, + "step": 5600 + }, + { + "epoch": 2.1, + "grad_norm": 1.2431505918502808, + "learning_rate": 9.485125628140704e-06, + "loss": 0.0466, + "step": 5625 + }, + { + "epoch": 2.11, + "grad_norm": 1.3293657302856445, + "learning_rate": 9.482613065326634e-06, + "loss": 0.046, + "step": 5650 + }, + { + "epoch": 2.12, + "grad_norm": 1.2925738096237183, + "learning_rate": 9.480100502512563e-06, + "loss": 0.0483, + "step": 5675 + }, + { + "epoch": 2.13, + "grad_norm": 1.3222073316574097, + "learning_rate": 9.477587939698494e-06, + "loss": 0.0489, + "step": 5700 + }, + { + "epoch": 2.13, + "grad_norm": 1.2681841850280762, + "learning_rate": 9.475075376884423e-06, + "loss": 0.0474, + "step": 5725 + }, + { + "epoch": 2.14, + "grad_norm": 1.3339241743087769, + "learning_rate": 9.472562814070353e-06, + "loss": 0.0457, + "step": 5750 + }, + { + "epoch": 2.15, + "grad_norm": 1.3247716426849365, + "learning_rate": 9.470050251256282e-06, + "loss": 0.0491, + "step": 5775 + }, + { + "epoch": 2.16, + "grad_norm": 1.3950749635696411, + "learning_rate": 9.467537688442213e-06, + "loss": 0.0486, + "step": 5800 + }, + { + "epoch": 2.17, + "grad_norm": 1.4458516836166382, + "learning_rate": 9.46502512562814e-06, + "loss": 0.0453, + "step": 5825 + }, + { + "epoch": 2.18, + "grad_norm": 1.3284378051757812, + "learning_rate": 9.462512562814072e-06, + "loss": 0.0487, + "step": 5850 + }, + { + "epoch": 2.19, + "grad_norm": 1.4833680391311646, + "learning_rate": 9.460000000000001e-06, + "loss": 0.046, + "step": 5875 + }, + { + "epoch": 2.2, + "grad_norm": 1.2249044179916382, + "learning_rate": 9.45748743718593e-06, + "loss": 0.0481, + "step": 5900 + }, + { + "epoch": 2.21, + "grad_norm": 1.4438598155975342, + "learning_rate": 9.454974874371861e-06, + "loss": 0.0442, + "step": 5925 + }, + { + "epoch": 2.22, + "grad_norm": 1.2253786325454712, + "learning_rate": 9.452462311557789e-06, + "loss": 0.0468, + "step": 5950 + }, + { + "epoch": 2.23, + "grad_norm": 1.2840815782546997, + "learning_rate": 9.44994974874372e-06, + "loss": 0.0479, + "step": 5975 + }, + { + "epoch": 2.24, + "grad_norm": 1.3700655698776245, + "learning_rate": 9.44743718592965e-06, + "loss": 0.0481, + "step": 6000 + }, + { + "epoch": 2.24, + "eval_loss": 0.07441301643848419, + "eval_runtime": 2558.0511, + "eval_samples_per_second": 0.559, + "eval_steps_per_second": 0.559, + "eval_wer": 26.814889674919996, + "step": 6000 + }, + { + "epoch": 2.25, + "grad_norm": 1.4929529428482056, + "learning_rate": 9.444924623115579e-06, + "loss": 0.0477, + "step": 6025 + }, + { + "epoch": 2.26, + "grad_norm": 1.4263715744018555, + "learning_rate": 9.442412060301508e-06, + "loss": 0.0488, + "step": 6050 + }, + { + "epoch": 2.27, + "grad_norm": 1.387528419494629, + "learning_rate": 9.439899497487439e-06, + "loss": 0.044, + "step": 6075 + }, + { + "epoch": 2.27, + "grad_norm": 1.5023152828216553, + "learning_rate": 9.437386934673367e-06, + "loss": 0.048, + "step": 6100 + }, + { + "epoch": 2.28, + "grad_norm": 1.3187355995178223, + "learning_rate": 9.434874371859298e-06, + "loss": 0.0488, + "step": 6125 + }, + { + "epoch": 2.29, + "grad_norm": 1.6799228191375732, + "learning_rate": 9.432361809045227e-06, + "loss": 0.0487, + "step": 6150 + }, + { + "epoch": 2.3, + "grad_norm": 1.1507903337478638, + "learning_rate": 9.429849246231156e-06, + "loss": 0.0458, + "step": 6175 + }, + { + "epoch": 2.31, + "grad_norm": 1.8209435939788818, + "learning_rate": 9.427336683417087e-06, + "loss": 0.0473, + "step": 6200 + }, + { + "epoch": 2.32, + "grad_norm": 1.503057837486267, + "learning_rate": 9.424824120603015e-06, + "loss": 0.0441, + "step": 6225 + }, + { + "epoch": 2.33, + "grad_norm": 1.2354737520217896, + "learning_rate": 9.422311557788946e-06, + "loss": 0.0445, + "step": 6250 + }, + { + "epoch": 2.34, + "grad_norm": 1.677775502204895, + "learning_rate": 9.419798994974875e-06, + "loss": 0.0475, + "step": 6275 + }, + { + "epoch": 2.35, + "grad_norm": 1.3887990713119507, + "learning_rate": 9.417286432160804e-06, + "loss": 0.0429, + "step": 6300 + }, + { + "epoch": 2.36, + "grad_norm": 1.3618272542953491, + "learning_rate": 9.414773869346736e-06, + "loss": 0.0467, + "step": 6325 + }, + { + "epoch": 2.37, + "grad_norm": 1.3968923091888428, + "learning_rate": 9.412261306532665e-06, + "loss": 0.0466, + "step": 6350 + }, + { + "epoch": 2.38, + "grad_norm": 1.424878478050232, + "learning_rate": 9.409748743718594e-06, + "loss": 0.044, + "step": 6375 + }, + { + "epoch": 2.39, + "grad_norm": 1.4087820053100586, + "learning_rate": 9.407236180904523e-06, + "loss": 0.047, + "step": 6400 + }, + { + "epoch": 2.4, + "grad_norm": 1.6095998287200928, + "learning_rate": 9.404723618090453e-06, + "loss": 0.0473, + "step": 6425 + }, + { + "epoch": 2.4, + "grad_norm": 1.641311764717102, + "learning_rate": 9.402211055276382e-06, + "loss": 0.0455, + "step": 6450 + }, + { + "epoch": 2.41, + "grad_norm": 1.6309386491775513, + "learning_rate": 9.399698492462313e-06, + "loss": 0.0453, + "step": 6475 + }, + { + "epoch": 2.42, + "grad_norm": 1.3663755655288696, + "learning_rate": 9.39718592964824e-06, + "loss": 0.0439, + "step": 6500 + }, + { + "epoch": 2.43, + "grad_norm": 1.2939667701721191, + "learning_rate": 9.394673366834172e-06, + "loss": 0.0445, + "step": 6525 + }, + { + "epoch": 2.44, + "grad_norm": 1.5285240411758423, + "learning_rate": 9.392160804020101e-06, + "loss": 0.0465, + "step": 6550 + }, + { + "epoch": 2.45, + "grad_norm": 1.1799960136413574, + "learning_rate": 9.38964824120603e-06, + "loss": 0.0467, + "step": 6575 + }, + { + "epoch": 2.46, + "grad_norm": 1.8286042213439941, + "learning_rate": 9.387135678391961e-06, + "loss": 0.0453, + "step": 6600 + }, + { + "epoch": 2.47, + "grad_norm": 1.4312978982925415, + "learning_rate": 9.38462311557789e-06, + "loss": 0.0466, + "step": 6625 + }, + { + "epoch": 2.48, + "grad_norm": 1.3106894493103027, + "learning_rate": 9.38211055276382e-06, + "loss": 0.045, + "step": 6650 + }, + { + "epoch": 2.49, + "grad_norm": 1.3154587745666504, + "learning_rate": 9.37959798994975e-06, + "loss": 0.0451, + "step": 6675 + }, + { + "epoch": 2.5, + "grad_norm": 1.7067325115203857, + "learning_rate": 9.377085427135679e-06, + "loss": 0.0451, + "step": 6700 + }, + { + "epoch": 2.51, + "grad_norm": 1.834667444229126, + "learning_rate": 9.374572864321608e-06, + "loss": 0.0415, + "step": 6725 + }, + { + "epoch": 2.52, + "grad_norm": 1.4745657444000244, + "learning_rate": 9.372060301507539e-06, + "loss": 0.0448, + "step": 6750 + }, + { + "epoch": 2.53, + "grad_norm": 1.3801542520523071, + "learning_rate": 9.369547738693468e-06, + "loss": 0.0445, + "step": 6775 + }, + { + "epoch": 2.54, + "grad_norm": 1.36532461643219, + "learning_rate": 9.367035175879398e-06, + "loss": 0.0409, + "step": 6800 + }, + { + "epoch": 2.54, + "grad_norm": 1.5043935775756836, + "learning_rate": 9.364522613065327e-06, + "loss": 0.0445, + "step": 6825 + }, + { + "epoch": 2.55, + "grad_norm": 1.465977430343628, + "learning_rate": 9.362010050251256e-06, + "loss": 0.0421, + "step": 6850 + }, + { + "epoch": 2.56, + "grad_norm": 1.352036476135254, + "learning_rate": 9.359497487437187e-06, + "loss": 0.0451, + "step": 6875 + }, + { + "epoch": 2.57, + "grad_norm": 1.3503711223602295, + "learning_rate": 9.356984924623117e-06, + "loss": 0.0453, + "step": 6900 + }, + { + "epoch": 2.58, + "grad_norm": 1.5069199800491333, + "learning_rate": 9.354472361809046e-06, + "loss": 0.0448, + "step": 6925 + }, + { + "epoch": 2.59, + "grad_norm": 1.4854568243026733, + "learning_rate": 9.351959798994975e-06, + "loss": 0.0457, + "step": 6950 + }, + { + "epoch": 2.6, + "grad_norm": 1.2841435670852661, + "learning_rate": 9.349447236180905e-06, + "loss": 0.0409, + "step": 6975 + }, + { + "epoch": 2.61, + "grad_norm": 1.3051304817199707, + "learning_rate": 9.346934673366836e-06, + "loss": 0.041, + "step": 7000 + }, + { + "epoch": 2.61, + "eval_loss": 0.0759524330496788, + "eval_runtime": 2261.4528, + "eval_samples_per_second": 0.632, + "eval_steps_per_second": 0.632, + "eval_wer": 27.825501094829036, + "step": 7000 + }, + { + "epoch": 2.62, + "grad_norm": 1.6449871063232422, + "learning_rate": 9.344422110552765e-06, + "loss": 0.0408, + "step": 7025 + }, + { + "epoch": 2.63, + "grad_norm": 1.274962067604065, + "learning_rate": 9.341909547738694e-06, + "loss": 0.0434, + "step": 7050 + }, + { + "epoch": 2.64, + "grad_norm": 1.6334000825881958, + "learning_rate": 9.339396984924624e-06, + "loss": 0.0449, + "step": 7075 + }, + { + "epoch": 2.65, + "grad_norm": 1.4305092096328735, + "learning_rate": 9.336884422110553e-06, + "loss": 0.0449, + "step": 7100 + }, + { + "epoch": 2.66, + "grad_norm": 1.6582748889923096, + "learning_rate": 9.334371859296482e-06, + "loss": 0.043, + "step": 7125 + }, + { + "epoch": 2.67, + "grad_norm": 1.4577999114990234, + "learning_rate": 9.331859296482413e-06, + "loss": 0.0451, + "step": 7150 + }, + { + "epoch": 2.68, + "grad_norm": 1.34367036819458, + "learning_rate": 9.329346733668343e-06, + "loss": 0.0431, + "step": 7175 + }, + { + "epoch": 2.68, + "grad_norm": 1.554431676864624, + "learning_rate": 9.326934673366836e-06, + "loss": 0.0436, + "step": 7200 + }, + { + "epoch": 2.69, + "grad_norm": 1.451438069343567, + "learning_rate": 9.324422110552764e-06, + "loss": 0.0433, + "step": 7225 + }, + { + "epoch": 2.7, + "grad_norm": 1.483153223991394, + "learning_rate": 9.321909547738695e-06, + "loss": 0.0406, + "step": 7250 + }, + { + "epoch": 2.71, + "grad_norm": 1.5430688858032227, + "learning_rate": 9.319396984924624e-06, + "loss": 0.044, + "step": 7275 + }, + { + "epoch": 2.72, + "grad_norm": 1.3673375844955444, + "learning_rate": 9.316884422110553e-06, + "loss": 0.0423, + "step": 7300 + }, + { + "epoch": 2.73, + "grad_norm": 1.2490190267562866, + "learning_rate": 9.314371859296483e-06, + "loss": 0.0445, + "step": 7325 + }, + { + "epoch": 2.74, + "grad_norm": 1.334100365638733, + "learning_rate": 9.311859296482414e-06, + "loss": 0.0394, + "step": 7350 + }, + { + "epoch": 2.75, + "grad_norm": 1.4591741561889648, + "learning_rate": 9.309346733668343e-06, + "loss": 0.0424, + "step": 7375 + }, + { + "epoch": 2.76, + "grad_norm": 1.3707964420318604, + "learning_rate": 9.306834170854272e-06, + "loss": 0.042, + "step": 7400 + }, + { + "epoch": 2.77, + "grad_norm": 1.410624384880066, + "learning_rate": 9.304321608040201e-06, + "loss": 0.0439, + "step": 7425 + }, + { + "epoch": 2.78, + "grad_norm": 1.4336150884628296, + "learning_rate": 9.30180904522613e-06, + "loss": 0.0414, + "step": 7450 + }, + { + "epoch": 2.79, + "grad_norm": 1.4089946746826172, + "learning_rate": 9.299296482412062e-06, + "loss": 0.0418, + "step": 7475 + }, + { + "epoch": 2.8, + "grad_norm": 1.5229631662368774, + "learning_rate": 9.296783919597991e-06, + "loss": 0.0445, + "step": 7500 + }, + { + "epoch": 2.81, + "grad_norm": 1.6150914430618286, + "learning_rate": 9.29427135678392e-06, + "loss": 0.0405, + "step": 7525 + }, + { + "epoch": 2.82, + "grad_norm": 1.3024122714996338, + "learning_rate": 9.29175879396985e-06, + "loss": 0.0434, + "step": 7550 + }, + { + "epoch": 2.82, + "grad_norm": 1.8231717348098755, + "learning_rate": 9.289246231155779e-06, + "loss": 0.0435, + "step": 7575 + }, + { + "epoch": 2.83, + "grad_norm": 1.4020862579345703, + "learning_rate": 9.28673366834171e-06, + "loss": 0.0422, + "step": 7600 + }, + { + "epoch": 2.84, + "grad_norm": 1.8010534048080444, + "learning_rate": 9.28422110552764e-06, + "loss": 0.0408, + "step": 7625 + }, + { + "epoch": 2.85, + "grad_norm": 1.3733175992965698, + "learning_rate": 9.281708542713569e-06, + "loss": 0.0424, + "step": 7650 + }, + { + "epoch": 2.86, + "grad_norm": 1.4369171857833862, + "learning_rate": 9.279195979899498e-06, + "loss": 0.0372, + "step": 7675 + }, + { + "epoch": 2.87, + "grad_norm": 1.156508207321167, + "learning_rate": 9.276683417085427e-06, + "loss": 0.0405, + "step": 7700 + }, + { + "epoch": 2.88, + "grad_norm": 1.4055359363555908, + "learning_rate": 9.274170854271357e-06, + "loss": 0.0393, + "step": 7725 + }, + { + "epoch": 2.89, + "grad_norm": 1.3161166906356812, + "learning_rate": 9.271658291457288e-06, + "loss": 0.04, + "step": 7750 + }, + { + "epoch": 2.9, + "grad_norm": 0.8929275274276733, + "learning_rate": 9.269145728643217e-06, + "loss": 0.0379, + "step": 7775 + }, + { + "epoch": 2.91, + "grad_norm": 1.257416844367981, + "learning_rate": 9.266633165829146e-06, + "loss": 0.0429, + "step": 7800 + }, + { + "epoch": 2.92, + "grad_norm": 1.3220840692520142, + "learning_rate": 9.264120603015076e-06, + "loss": 0.0406, + "step": 7825 + }, + { + "epoch": 2.93, + "grad_norm": 1.566659688949585, + "learning_rate": 9.261608040201005e-06, + "loss": 0.0425, + "step": 7850 + }, + { + "epoch": 2.94, + "grad_norm": 1.373203158378601, + "learning_rate": 9.259095477386936e-06, + "loss": 0.0412, + "step": 7875 + }, + { + "epoch": 2.95, + "grad_norm": 1.3783193826675415, + "learning_rate": 9.256582914572865e-06, + "loss": 0.042, + "step": 7900 + }, + { + "epoch": 2.95, + "grad_norm": 1.5386418104171753, + "learning_rate": 9.254070351758795e-06, + "loss": 0.043, + "step": 7925 + }, + { + "epoch": 2.96, + "grad_norm": 1.2207667827606201, + "learning_rate": 9.251557788944724e-06, + "loss": 0.0406, + "step": 7950 + }, + { + "epoch": 2.97, + "grad_norm": 1.4816629886627197, + "learning_rate": 9.249045226130653e-06, + "loss": 0.0411, + "step": 7975 + }, + { + "epoch": 2.98, + "grad_norm": 1.4936460256576538, + "learning_rate": 9.246532663316584e-06, + "loss": 0.0399, + "step": 8000 + }, + { + "epoch": 2.98, + "eval_loss": 0.08076569437980652, + "eval_runtime": 2070.2137, + "eval_samples_per_second": 0.691, + "eval_steps_per_second": 0.691, + "eval_wer": 28.347650328448708, + "step": 8000 + }, + { + "epoch": 2.99, + "grad_norm": 1.5378543138504028, + "learning_rate": 9.244020100502514e-06, + "loss": 0.0388, + "step": 8025 + }, + { + "epoch": 3.0, + "grad_norm": 1.2581841945648193, + "learning_rate": 9.241507537688443e-06, + "loss": 0.0394, + "step": 8050 + }, + { + "epoch": 3.01, + "grad_norm": 1.1343461275100708, + "learning_rate": 9.238994974874372e-06, + "loss": 0.0224, + "step": 8075 + }, + { + "epoch": 3.02, + "grad_norm": 1.1591546535491943, + "learning_rate": 9.236482412060302e-06, + "loss": 0.0219, + "step": 8100 + }, + { + "epoch": 3.03, + "grad_norm": 1.0029828548431396, + "learning_rate": 9.233969849246231e-06, + "loss": 0.0222, + "step": 8125 + }, + { + "epoch": 3.04, + "grad_norm": 1.1032546758651733, + "learning_rate": 9.231457286432162e-06, + "loss": 0.023, + "step": 8150 + }, + { + "epoch": 3.05, + "grad_norm": 1.2624322175979614, + "learning_rate": 9.228944723618091e-06, + "loss": 0.022, + "step": 8175 + }, + { + "epoch": 3.06, + "grad_norm": 1.1514827013015747, + "learning_rate": 9.22643216080402e-06, + "loss": 0.0222, + "step": 8200 + }, + { + "epoch": 3.07, + "grad_norm": 1.4524542093276978, + "learning_rate": 9.223919597989952e-06, + "loss": 0.0231, + "step": 8225 + }, + { + "epoch": 3.08, + "grad_norm": 1.2228906154632568, + "learning_rate": 9.22140703517588e-06, + "loss": 0.0244, + "step": 8250 + }, + { + "epoch": 3.09, + "grad_norm": 1.0957272052764893, + "learning_rate": 9.21889447236181e-06, + "loss": 0.0231, + "step": 8275 + }, + { + "epoch": 3.09, + "grad_norm": 1.1255600452423096, + "learning_rate": 9.21638190954774e-06, + "loss": 0.0224, + "step": 8300 + }, + { + "epoch": 3.1, + "grad_norm": 1.1788476705551147, + "learning_rate": 9.213869346733669e-06, + "loss": 0.0219, + "step": 8325 + }, + { + "epoch": 3.11, + "grad_norm": 1.8348101377487183, + "learning_rate": 9.211356783919598e-06, + "loss": 0.0233, + "step": 8350 + }, + { + "epoch": 3.12, + "grad_norm": 1.285946011543274, + "learning_rate": 9.208844221105528e-06, + "loss": 0.0229, + "step": 8375 + }, + { + "epoch": 3.13, + "grad_norm": 1.570494294166565, + "learning_rate": 9.206331658291459e-06, + "loss": 0.022, + "step": 8400 + }, + { + "epoch": 3.14, + "grad_norm": 1.2288076877593994, + "learning_rate": 9.203819095477388e-06, + "loss": 0.0246, + "step": 8425 + }, + { + "epoch": 3.15, + "grad_norm": 1.2688758373260498, + "learning_rate": 9.201306532663317e-06, + "loss": 0.0219, + "step": 8450 + }, + { + "epoch": 3.16, + "grad_norm": 1.3383485078811646, + "learning_rate": 9.198793969849247e-06, + "loss": 0.0235, + "step": 8475 + }, + { + "epoch": 3.17, + "grad_norm": 1.1670931577682495, + "learning_rate": 9.196281407035178e-06, + "loss": 0.0231, + "step": 8500 + }, + { + "epoch": 3.18, + "grad_norm": 1.1317856311798096, + "learning_rate": 9.193768844221105e-06, + "loss": 0.0214, + "step": 8525 + }, + { + "epoch": 3.19, + "grad_norm": 0.990886926651001, + "learning_rate": 9.191256281407036e-06, + "loss": 0.0215, + "step": 8550 + }, + { + "epoch": 3.2, + "grad_norm": 1.1847631931304932, + "learning_rate": 9.188743718592966e-06, + "loss": 0.0232, + "step": 8575 + }, + { + "epoch": 3.21, + "grad_norm": 1.1338376998901367, + "learning_rate": 9.186231155778895e-06, + "loss": 0.0225, + "step": 8600 + }, + { + "epoch": 3.22, + "grad_norm": 1.567566156387329, + "learning_rate": 9.183718592964826e-06, + "loss": 0.0231, + "step": 8625 + }, + { + "epoch": 3.23, + "grad_norm": 1.4684127569198608, + "learning_rate": 9.181206030150754e-06, + "loss": 0.0217, + "step": 8650 + }, + { + "epoch": 3.23, + "grad_norm": 1.3630014657974243, + "learning_rate": 9.178693467336685e-06, + "loss": 0.0223, + "step": 8675 + }, + { + "epoch": 3.24, + "grad_norm": 1.2702186107635498, + "learning_rate": 9.176180904522614e-06, + "loss": 0.0209, + "step": 8700 + }, + { + "epoch": 3.25, + "grad_norm": 0.9680288434028625, + "learning_rate": 9.173668341708543e-06, + "loss": 0.0221, + "step": 8725 + }, + { + "epoch": 3.26, + "grad_norm": 1.273041009902954, + "learning_rate": 9.171155778894473e-06, + "loss": 0.0221, + "step": 8750 + }, + { + "epoch": 3.27, + "grad_norm": 1.4807661771774292, + "learning_rate": 9.168643216080404e-06, + "loss": 0.0233, + "step": 8775 + }, + { + "epoch": 3.28, + "grad_norm": 1.3645988702774048, + "learning_rate": 9.166130653266331e-06, + "loss": 0.0215, + "step": 8800 + }, + { + "epoch": 3.29, + "grad_norm": 1.3120235204696655, + "learning_rate": 9.163618090452262e-06, + "loss": 0.0213, + "step": 8825 + }, + { + "epoch": 3.3, + "grad_norm": 1.4748727083206177, + "learning_rate": 9.161105527638192e-06, + "loss": 0.0231, + "step": 8850 + }, + { + "epoch": 3.31, + "grad_norm": 1.5657958984375, + "learning_rate": 9.158592964824121e-06, + "loss": 0.0233, + "step": 8875 + }, + { + "epoch": 3.32, + "grad_norm": 0.909569263458252, + "learning_rate": 9.156080402010052e-06, + "loss": 0.0219, + "step": 8900 + }, + { + "epoch": 3.33, + "grad_norm": 1.479006290435791, + "learning_rate": 9.15356783919598e-06, + "loss": 0.0226, + "step": 8925 + }, + { + "epoch": 3.34, + "grad_norm": 1.2991622686386108, + "learning_rate": 9.15105527638191e-06, + "loss": 0.0233, + "step": 8950 + }, + { + "epoch": 3.35, + "grad_norm": 1.171926498413086, + "learning_rate": 9.14854271356784e-06, + "loss": 0.023, + "step": 8975 + }, + { + "epoch": 3.36, + "grad_norm": 1.4084528684616089, + "learning_rate": 9.14603015075377e-06, + "loss": 0.0235, + "step": 9000 + }, + { + "epoch": 3.36, + "eval_loss": 0.0916091725230217, + "eval_runtime": 2059.6095, + "eval_samples_per_second": 0.694, + "eval_steps_per_second": 0.694, + "eval_wer": 29.28246589186458, + "step": 9000 + }, + { + "epoch": 3.37, + "grad_norm": 1.6503973007202148, + "learning_rate": 9.1435175879397e-06, + "loss": 0.0225, + "step": 9025 + }, + { + "epoch": 3.37, + "grad_norm": 2.2475039958953857, + "learning_rate": 9.14100502512563e-06, + "loss": 0.0216, + "step": 9050 + }, + { + "epoch": 3.38, + "grad_norm": 1.525094985961914, + "learning_rate": 9.138492462311559e-06, + "loss": 0.0238, + "step": 9075 + }, + { + "epoch": 3.39, + "grad_norm": 1.1858001947402954, + "learning_rate": 9.135979899497488e-06, + "loss": 0.0215, + "step": 9100 + }, + { + "epoch": 3.4, + "grad_norm": 1.2602155208587646, + "learning_rate": 9.133467336683417e-06, + "loss": 0.0207, + "step": 9125 + }, + { + "epoch": 3.41, + "grad_norm": 1.455419659614563, + "learning_rate": 9.130954773869347e-06, + "loss": 0.0213, + "step": 9150 + }, + { + "epoch": 3.42, + "grad_norm": 1.2125282287597656, + "learning_rate": 9.128442211055278e-06, + "loss": 0.0214, + "step": 9175 + }, + { + "epoch": 3.43, + "grad_norm": 1.2888023853302002, + "learning_rate": 9.125929648241205e-06, + "loss": 0.0246, + "step": 9200 + }, + { + "epoch": 3.44, + "grad_norm": 1.296097993850708, + "learning_rate": 9.123417085427136e-06, + "loss": 0.0222, + "step": 9225 + }, + { + "epoch": 3.45, + "grad_norm": 1.334660291671753, + "learning_rate": 9.120904522613066e-06, + "loss": 0.023, + "step": 9250 + }, + { + "epoch": 3.46, + "grad_norm": 1.369049072265625, + "learning_rate": 9.118391959798995e-06, + "loss": 0.0225, + "step": 9275 + }, + { + "epoch": 3.47, + "grad_norm": 1.5956454277038574, + "learning_rate": 9.115879396984926e-06, + "loss": 0.025, + "step": 9300 + }, + { + "epoch": 3.48, + "grad_norm": 1.2478264570236206, + "learning_rate": 9.113366834170855e-06, + "loss": 0.0229, + "step": 9325 + }, + { + "epoch": 3.49, + "grad_norm": 1.3444815874099731, + "learning_rate": 9.110854271356785e-06, + "loss": 0.0238, + "step": 9350 + }, + { + "epoch": 3.5, + "grad_norm": 1.1363239288330078, + "learning_rate": 9.108341708542714e-06, + "loss": 0.0231, + "step": 9375 + }, + { + "epoch": 3.5, + "grad_norm": 1.3815995454788208, + "learning_rate": 9.105829145728643e-06, + "loss": 0.0219, + "step": 9400 + }, + { + "epoch": 3.51, + "grad_norm": 1.3486477136611938, + "learning_rate": 9.103316582914573e-06, + "loss": 0.0226, + "step": 9425 + }, + { + "epoch": 3.52, + "grad_norm": 1.5094380378723145, + "learning_rate": 9.100804020100504e-06, + "loss": 0.0222, + "step": 9450 + }, + { + "epoch": 3.53, + "grad_norm": 1.4953993558883667, + "learning_rate": 9.098291457286433e-06, + "loss": 0.0215, + "step": 9475 + }, + { + "epoch": 3.54, + "grad_norm": 1.314180612564087, + "learning_rate": 9.095778894472362e-06, + "loss": 0.0228, + "step": 9500 + }, + { + "epoch": 3.55, + "grad_norm": 1.254289984703064, + "learning_rate": 9.093266331658292e-06, + "loss": 0.022, + "step": 9525 + }, + { + "epoch": 3.56, + "grad_norm": 1.3754682540893555, + "learning_rate": 9.090753768844221e-06, + "loss": 0.0221, + "step": 9550 + }, + { + "epoch": 3.57, + "grad_norm": 1.8688709735870361, + "learning_rate": 9.088241206030152e-06, + "loss": 0.0231, + "step": 9575 + }, + { + "epoch": 3.58, + "grad_norm": 1.2806031703948975, + "learning_rate": 9.085728643216081e-06, + "loss": 0.0229, + "step": 9600 + }, + { + "epoch": 3.59, + "grad_norm": 1.5163094997406006, + "learning_rate": 9.08321608040201e-06, + "loss": 0.023, + "step": 9625 + }, + { + "epoch": 3.6, + "grad_norm": 1.0854531526565552, + "learning_rate": 9.08070351758794e-06, + "loss": 0.0227, + "step": 9650 + }, + { + "epoch": 3.61, + "grad_norm": 1.3915247917175293, + "learning_rate": 9.07819095477387e-06, + "loss": 0.0225, + "step": 9675 + }, + { + "epoch": 3.62, + "grad_norm": 1.3098303079605103, + "learning_rate": 9.0756783919598e-06, + "loss": 0.0224, + "step": 9700 + }, + { + "epoch": 3.63, + "grad_norm": 1.368969440460205, + "learning_rate": 9.07316582914573e-06, + "loss": 0.0223, + "step": 9725 + }, + { + "epoch": 3.64, + "grad_norm": 1.0863829851150513, + "learning_rate": 9.070653266331659e-06, + "loss": 0.0215, + "step": 9750 + }, + { + "epoch": 3.64, + "grad_norm": 1.222556471824646, + "learning_rate": 9.068140703517588e-06, + "loss": 0.0213, + "step": 9775 + }, + { + "epoch": 3.65, + "grad_norm": 1.591573715209961, + "learning_rate": 9.065628140703518e-06, + "loss": 0.0206, + "step": 9800 + }, + { + "epoch": 3.66, + "grad_norm": 1.8961296081542969, + "learning_rate": 9.063115577889447e-06, + "loss": 0.0236, + "step": 9825 + }, + { + "epoch": 3.67, + "grad_norm": 1.2452623844146729, + "learning_rate": 9.060603015075378e-06, + "loss": 0.0223, + "step": 9850 + }, + { + "epoch": 3.68, + "grad_norm": 1.3407517671585083, + "learning_rate": 9.058090452261307e-06, + "loss": 0.0211, + "step": 9875 + }, + { + "epoch": 3.69, + "grad_norm": 1.4327746629714966, + "learning_rate": 9.055577889447237e-06, + "loss": 0.0224, + "step": 9900 + }, + { + "epoch": 3.7, + "grad_norm": 1.6977804899215698, + "learning_rate": 9.053065326633168e-06, + "loss": 0.0229, + "step": 9925 + }, + { + "epoch": 3.71, + "grad_norm": 1.4939321279525757, + "learning_rate": 9.050552763819095e-06, + "loss": 0.0224, + "step": 9950 + }, + { + "epoch": 3.72, + "grad_norm": 1.162123680114746, + "learning_rate": 9.048040201005026e-06, + "loss": 0.0225, + "step": 9975 + }, + { + "epoch": 3.73, + "grad_norm": 1.370092511177063, + "learning_rate": 9.045527638190956e-06, + "loss": 0.0215, + "step": 10000 + }, + { + "epoch": 3.73, + "eval_loss": 0.09312493354082108, + "eval_runtime": 2062.2409, + "eval_samples_per_second": 0.693, + "eval_steps_per_second": 0.693, + "eval_wer": 27.89287518948964, + "step": 10000 + }, + { + "epoch": 3.74, + "grad_norm": 1.731217861175537, + "learning_rate": 9.043015075376885e-06, + "loss": 0.0226, + "step": 10025 + }, + { + "epoch": 3.75, + "grad_norm": 0.9824772477149963, + "learning_rate": 9.040502512562814e-06, + "loss": 0.0213, + "step": 10050 + }, + { + "epoch": 3.76, + "grad_norm": 1.4142457246780396, + "learning_rate": 9.037989949748744e-06, + "loss": 0.0221, + "step": 10075 + }, + { + "epoch": 3.77, + "grad_norm": 1.3302693367004395, + "learning_rate": 9.035477386934675e-06, + "loss": 0.0223, + "step": 10100 + }, + { + "epoch": 3.78, + "grad_norm": 1.2113432884216309, + "learning_rate": 9.032964824120604e-06, + "loss": 0.0214, + "step": 10125 + }, + { + "epoch": 3.78, + "grad_norm": 1.1918665170669556, + "learning_rate": 9.030452261306533e-06, + "loss": 0.0209, + "step": 10150 + }, + { + "epoch": 3.79, + "grad_norm": 1.5560879707336426, + "learning_rate": 9.027939698492463e-06, + "loss": 0.022, + "step": 10175 + }, + { + "epoch": 3.8, + "grad_norm": 1.1221671104431152, + "learning_rate": 9.025427135678394e-06, + "loss": 0.0224, + "step": 10200 + }, + { + "epoch": 3.81, + "grad_norm": 1.6139975786209106, + "learning_rate": 9.022914572864321e-06, + "loss": 0.0212, + "step": 10225 + }, + { + "epoch": 3.82, + "grad_norm": 1.7258100509643555, + "learning_rate": 9.020402010050252e-06, + "loss": 0.0209, + "step": 10250 + }, + { + "epoch": 3.83, + "grad_norm": 1.365613579750061, + "learning_rate": 9.017889447236182e-06, + "loss": 0.0205, + "step": 10275 + }, + { + "epoch": 3.84, + "grad_norm": 1.0940942764282227, + "learning_rate": 9.015376884422111e-06, + "loss": 0.0207, + "step": 10300 + }, + { + "epoch": 3.85, + "grad_norm": 1.6622213125228882, + "learning_rate": 9.012864321608042e-06, + "loss": 0.0217, + "step": 10325 + }, + { + "epoch": 3.86, + "grad_norm": 1.1353604793548584, + "learning_rate": 9.01035175879397e-06, + "loss": 0.0222, + "step": 10350 + }, + { + "epoch": 3.87, + "grad_norm": 1.1212279796600342, + "learning_rate": 9.0078391959799e-06, + "loss": 0.0199, + "step": 10375 + }, + { + "epoch": 3.88, + "grad_norm": 1.0758684873580933, + "learning_rate": 9.00532663316583e-06, + "loss": 0.0211, + "step": 10400 + }, + { + "epoch": 3.89, + "grad_norm": 0.9904615879058838, + "learning_rate": 9.00281407035176e-06, + "loss": 0.021, + "step": 10425 + }, + { + "epoch": 3.9, + "grad_norm": 1.3735851049423218, + "learning_rate": 9.000301507537689e-06, + "loss": 0.0218, + "step": 10450 + }, + { + "epoch": 3.91, + "grad_norm": 1.4021342992782593, + "learning_rate": 8.99778894472362e-06, + "loss": 0.0208, + "step": 10475 + }, + { + "epoch": 3.91, + "grad_norm": 1.2619400024414062, + "learning_rate": 8.995276381909549e-06, + "loss": 0.0211, + "step": 10500 + }, + { + "epoch": 3.92, + "grad_norm": 1.6856536865234375, + "learning_rate": 8.992763819095478e-06, + "loss": 0.023, + "step": 10525 + }, + { + "epoch": 3.93, + "grad_norm": 1.2466765642166138, + "learning_rate": 8.990251256281408e-06, + "loss": 0.0235, + "step": 10550 + }, + { + "epoch": 3.94, + "grad_norm": 1.3491064310073853, + "learning_rate": 8.987738693467337e-06, + "loss": 0.0206, + "step": 10575 + }, + { + "epoch": 3.95, + "grad_norm": 1.325589656829834, + "learning_rate": 8.985226130653268e-06, + "loss": 0.0214, + "step": 10600 + }, + { + "epoch": 3.96, + "grad_norm": 1.654358148574829, + "learning_rate": 8.982713567839196e-06, + "loss": 0.0212, + "step": 10625 + }, + { + "epoch": 3.97, + "grad_norm": 1.3201229572296143, + "learning_rate": 8.980201005025127e-06, + "loss": 0.0201, + "step": 10650 + }, + { + "epoch": 3.98, + "grad_norm": 1.1882002353668213, + "learning_rate": 8.977688442211056e-06, + "loss": 0.0206, + "step": 10675 + }, + { + "epoch": 3.99, + "grad_norm": 1.4994813203811646, + "learning_rate": 8.975175879396985e-06, + "loss": 0.0218, + "step": 10700 + }, + { + "epoch": 4.0, + "grad_norm": 1.1313855648040771, + "learning_rate": 8.972663316582916e-06, + "loss": 0.0215, + "step": 10725 + }, + { + "epoch": 4.01, + "grad_norm": 1.0412638187408447, + "learning_rate": 8.970150753768846e-06, + "loss": 0.0119, + "step": 10750 + }, + { + "epoch": 4.02, + "grad_norm": 0.9016923308372498, + "learning_rate": 8.967638190954775e-06, + "loss": 0.0105, + "step": 10775 + }, + { + "epoch": 4.03, + "grad_norm": 0.8955936431884766, + "learning_rate": 8.965125628140704e-06, + "loss": 0.01, + "step": 10800 + }, + { + "epoch": 4.04, + "grad_norm": 0.8133251070976257, + "learning_rate": 8.962613065326634e-06, + "loss": 0.011, + "step": 10825 + }, + { + "epoch": 4.05, + "grad_norm": 0.9683551788330078, + "learning_rate": 8.960100502512563e-06, + "loss": 0.0095, + "step": 10850 + }, + { + "epoch": 4.05, + "grad_norm": 1.3145838975906372, + "learning_rate": 8.957587939698494e-06, + "loss": 0.0101, + "step": 10875 + }, + { + "epoch": 4.06, + "grad_norm": 0.9112092852592468, + "learning_rate": 8.955075376884421e-06, + "loss": 0.0105, + "step": 10900 + }, + { + "epoch": 4.07, + "grad_norm": 0.7387055158615112, + "learning_rate": 8.952562814070352e-06, + "loss": 0.0108, + "step": 10925 + }, + { + "epoch": 4.08, + "grad_norm": 0.9458868503570557, + "learning_rate": 8.950050251256282e-06, + "loss": 0.0096, + "step": 10950 + }, + { + "epoch": 4.09, + "grad_norm": 1.0740615129470825, + "learning_rate": 8.947537688442211e-06, + "loss": 0.0106, + "step": 10975 + }, + { + "epoch": 4.1, + "grad_norm": 1.0569562911987305, + "learning_rate": 8.945025125628142e-06, + "loss": 0.0107, + "step": 11000 + }, + { + "epoch": 4.1, + "eval_loss": 0.10462144762277603, + "eval_runtime": 2570.1318, + "eval_samples_per_second": 0.556, + "eval_steps_per_second": 0.556, + "eval_wer": 27.042277244399525, + "step": 11000 + }, + { + "epoch": 4.11, + "grad_norm": 1.6794267892837524, + "learning_rate": 8.942512562814071e-06, + "loss": 0.0117, + "step": 11025 + }, + { + "epoch": 4.12, + "grad_norm": 1.3400967121124268, + "learning_rate": 8.94e-06, + "loss": 0.0102, + "step": 11050 + }, + { + "epoch": 4.13, + "grad_norm": 1.1266950368881226, + "learning_rate": 8.93748743718593e-06, + "loss": 0.0114, + "step": 11075 + }, + { + "epoch": 4.14, + "grad_norm": 1.2375482320785522, + "learning_rate": 8.93497487437186e-06, + "loss": 0.011, + "step": 11100 + }, + { + "epoch": 4.15, + "grad_norm": 0.8881027698516846, + "learning_rate": 8.93246231155779e-06, + "loss": 0.0111, + "step": 11125 + }, + { + "epoch": 4.16, + "grad_norm": 1.1559758186340332, + "learning_rate": 8.92994974874372e-06, + "loss": 0.0101, + "step": 11150 + }, + { + "epoch": 4.17, + "grad_norm": 0.9337453842163086, + "learning_rate": 8.927437185929649e-06, + "loss": 0.0119, + "step": 11175 + }, + { + "epoch": 4.18, + "grad_norm": 1.3291007280349731, + "learning_rate": 8.924924623115578e-06, + "loss": 0.011, + "step": 11200 + }, + { + "epoch": 4.19, + "grad_norm": 1.021558165550232, + "learning_rate": 8.922412060301508e-06, + "loss": 0.0111, + "step": 11225 + }, + { + "epoch": 4.19, + "grad_norm": 1.432157039642334, + "learning_rate": 8.919899497487437e-06, + "loss": 0.0108, + "step": 11250 + }, + { + "epoch": 4.2, + "grad_norm": 1.3071788549423218, + "learning_rate": 8.917386934673368e-06, + "loss": 0.0108, + "step": 11275 + }, + { + "epoch": 4.21, + "grad_norm": 1.5548590421676636, + "learning_rate": 8.914874371859297e-06, + "loss": 0.0121, + "step": 11300 + }, + { + "epoch": 4.22, + "grad_norm": 1.1843417882919312, + "learning_rate": 8.912361809045227e-06, + "loss": 0.0104, + "step": 11325 + }, + { + "epoch": 4.23, + "grad_norm": 0.9825242757797241, + "learning_rate": 8.909849246231158e-06, + "loss": 0.0111, + "step": 11350 + }, + { + "epoch": 4.24, + "grad_norm": 1.344247579574585, + "learning_rate": 8.907336683417085e-06, + "loss": 0.0117, + "step": 11375 + }, + { + "epoch": 4.25, + "grad_norm": 1.1454766988754272, + "learning_rate": 8.904824120603016e-06, + "loss": 0.0111, + "step": 11400 + }, + { + "epoch": 4.26, + "grad_norm": 0.9100978374481201, + "learning_rate": 8.902412060301508e-06, + "loss": 0.0117, + "step": 11425 + }, + { + "epoch": 4.27, + "grad_norm": 1.235771894454956, + "learning_rate": 8.899899497487437e-06, + "loss": 0.0109, + "step": 11450 + }, + { + "epoch": 4.28, + "grad_norm": 1.5570495128631592, + "learning_rate": 8.897386934673368e-06, + "loss": 0.011, + "step": 11475 + }, + { + "epoch": 4.29, + "grad_norm": 0.9546844363212585, + "learning_rate": 8.894874371859296e-06, + "loss": 0.0118, + "step": 11500 + }, + { + "epoch": 4.3, + "grad_norm": 1.4748715162277222, + "learning_rate": 8.892361809045227e-06, + "loss": 0.0104, + "step": 11525 + }, + { + "epoch": 4.31, + "grad_norm": 1.2033954858779907, + "learning_rate": 8.889849246231156e-06, + "loss": 0.0113, + "step": 11550 + }, + { + "epoch": 4.32, + "grad_norm": 1.2569265365600586, + "learning_rate": 8.887336683417086e-06, + "loss": 0.0112, + "step": 11575 + }, + { + "epoch": 4.33, + "grad_norm": 1.050119400024414, + "learning_rate": 8.884824120603017e-06, + "loss": 0.0103, + "step": 11600 + }, + { + "epoch": 4.33, + "grad_norm": 1.4159622192382812, + "learning_rate": 8.882311557788946e-06, + "loss": 0.0121, + "step": 11625 + }, + { + "epoch": 4.34, + "grad_norm": 1.118747591972351, + "learning_rate": 8.879798994974875e-06, + "loss": 0.0114, + "step": 11650 + }, + { + "epoch": 4.35, + "grad_norm": 1.1599119901657104, + "learning_rate": 8.877286432160805e-06, + "loss": 0.0122, + "step": 11675 + }, + { + "epoch": 4.36, + "grad_norm": 1.498646855354309, + "learning_rate": 8.874773869346734e-06, + "loss": 0.0118, + "step": 11700 + }, + { + "epoch": 4.37, + "grad_norm": 1.4249510765075684, + "learning_rate": 8.872261306532665e-06, + "loss": 0.011, + "step": 11725 + }, + { + "epoch": 4.38, + "grad_norm": 1.2759448289871216, + "learning_rate": 8.869748743718594e-06, + "loss": 0.0112, + "step": 11750 + }, + { + "epoch": 4.39, + "grad_norm": 1.153402328491211, + "learning_rate": 8.867236180904524e-06, + "loss": 0.0119, + "step": 11775 + }, + { + "epoch": 4.4, + "grad_norm": 1.4031355381011963, + "learning_rate": 8.864723618090453e-06, + "loss": 0.0109, + "step": 11800 + }, + { + "epoch": 4.41, + "grad_norm": 1.214074730873108, + "learning_rate": 8.862211055276382e-06, + "loss": 0.0109, + "step": 11825 + }, + { + "epoch": 4.42, + "grad_norm": 1.482516884803772, + "learning_rate": 8.859698492462312e-06, + "loss": 0.0121, + "step": 11850 + }, + { + "epoch": 4.43, + "grad_norm": 1.637488603591919, + "learning_rate": 8.857185929648243e-06, + "loss": 0.0113, + "step": 11875 + }, + { + "epoch": 4.44, + "grad_norm": 1.1511622667312622, + "learning_rate": 8.854673366834172e-06, + "loss": 0.0122, + "step": 11900 + }, + { + "epoch": 4.45, + "grad_norm": 0.867973268032074, + "learning_rate": 8.852160804020101e-06, + "loss": 0.0113, + "step": 11925 + }, + { + "epoch": 4.46, + "grad_norm": 1.062778353691101, + "learning_rate": 8.849648241206032e-06, + "loss": 0.0112, + "step": 11950 + }, + { + "epoch": 4.46, + "grad_norm": 0.9882897734642029, + "learning_rate": 8.84713567839196e-06, + "loss": 0.0117, + "step": 11975 + }, + { + "epoch": 4.47, + "grad_norm": 1.3055555820465088, + "learning_rate": 8.844623115577891e-06, + "loss": 0.0112, + "step": 12000 + }, + { + "epoch": 4.47, + "eval_loss": 0.10874040424823761, + "eval_runtime": 2566.5719, + "eval_samples_per_second": 0.557, + "eval_steps_per_second": 0.557, + "eval_wer": 28.044466902476, + "step": 12000 + }, + { + "epoch": 4.48, + "grad_norm": 1.3959226608276367, + "learning_rate": 8.84211055276382e-06, + "loss": 0.0108, + "step": 12025 + }, + { + "epoch": 4.49, + "grad_norm": 1.2117880582809448, + "learning_rate": 8.83959798994975e-06, + "loss": 0.0109, + "step": 12050 + }, + { + "epoch": 4.5, + "grad_norm": 1.3157877922058105, + "learning_rate": 8.837085427135679e-06, + "loss": 0.0101, + "step": 12075 + }, + { + "epoch": 4.51, + "grad_norm": 1.5360324382781982, + "learning_rate": 8.834572864321608e-06, + "loss": 0.0117, + "step": 12100 + }, + { + "epoch": 4.52, + "grad_norm": 1.192575216293335, + "learning_rate": 8.832060301507537e-06, + "loss": 0.0125, + "step": 12125 + }, + { + "epoch": 4.53, + "grad_norm": 1.0945703983306885, + "learning_rate": 8.829547738693468e-06, + "loss": 0.0109, + "step": 12150 + }, + { + "epoch": 4.54, + "grad_norm": 1.2559436559677124, + "learning_rate": 8.827035175879398e-06, + "loss": 0.0105, + "step": 12175 + }, + { + "epoch": 4.55, + "grad_norm": 0.9956606030464172, + "learning_rate": 8.824522613065327e-06, + "loss": 0.0105, + "step": 12200 + }, + { + "epoch": 4.56, + "grad_norm": 1.2873191833496094, + "learning_rate": 8.822010050251258e-06, + "loss": 0.011, + "step": 12225 + }, + { + "epoch": 4.57, + "grad_norm": 1.1171514987945557, + "learning_rate": 8.819497487437186e-06, + "loss": 0.0104, + "step": 12250 + }, + { + "epoch": 4.58, + "grad_norm": 1.206233263015747, + "learning_rate": 8.816984924623117e-06, + "loss": 0.0105, + "step": 12275 + }, + { + "epoch": 4.59, + "grad_norm": 1.81877863407135, + "learning_rate": 8.814472361809046e-06, + "loss": 0.0119, + "step": 12300 + }, + { + "epoch": 4.6, + "grad_norm": 1.2874281406402588, + "learning_rate": 8.811959798994975e-06, + "loss": 0.0116, + "step": 12325 + }, + { + "epoch": 4.6, + "grad_norm": 1.074051856994629, + "learning_rate": 8.809447236180905e-06, + "loss": 0.0122, + "step": 12350 + }, + { + "epoch": 4.61, + "grad_norm": 1.2170064449310303, + "learning_rate": 8.806934673366834e-06, + "loss": 0.0112, + "step": 12375 + }, + { + "epoch": 4.62, + "grad_norm": 1.2714022397994995, + "learning_rate": 8.804422110552765e-06, + "loss": 0.0121, + "step": 12400 + }, + { + "epoch": 4.63, + "grad_norm": 1.2775418758392334, + "learning_rate": 8.801909547738694e-06, + "loss": 0.0112, + "step": 12425 + }, + { + "epoch": 4.64, + "grad_norm": 1.063226580619812, + "learning_rate": 8.799396984924624e-06, + "loss": 0.0108, + "step": 12450 + }, + { + "epoch": 4.65, + "grad_norm": 1.0954666137695312, + "learning_rate": 8.796884422110553e-06, + "loss": 0.0124, + "step": 12475 + }, + { + "epoch": 4.66, + "grad_norm": 0.9798392057418823, + "learning_rate": 8.794371859296484e-06, + "loss": 0.011, + "step": 12500 + }, + { + "epoch": 4.67, + "grad_norm": 1.3391318321228027, + "learning_rate": 8.791859296482412e-06, + "loss": 0.0121, + "step": 12525 + }, + { + "epoch": 4.68, + "grad_norm": 1.2697949409484863, + "learning_rate": 8.789346733668343e-06, + "loss": 0.0129, + "step": 12550 + }, + { + "epoch": 4.69, + "grad_norm": 1.0142897367477417, + "learning_rate": 8.786834170854272e-06, + "loss": 0.0117, + "step": 12575 + }, + { + "epoch": 4.7, + "grad_norm": 1.136461615562439, + "learning_rate": 8.784321608040201e-06, + "loss": 0.0113, + "step": 12600 + }, + { + "epoch": 4.71, + "grad_norm": 1.1551543474197388, + "learning_rate": 8.781809045226132e-06, + "loss": 0.0121, + "step": 12625 + }, + { + "epoch": 4.72, + "grad_norm": 1.2630085945129395, + "learning_rate": 8.77929648241206e-06, + "loss": 0.0104, + "step": 12650 + }, + { + "epoch": 4.73, + "grad_norm": 1.0296798944473267, + "learning_rate": 8.776783919597991e-06, + "loss": 0.0117, + "step": 12675 + }, + { + "epoch": 4.74, + "grad_norm": 1.458978295326233, + "learning_rate": 8.77427135678392e-06, + "loss": 0.0116, + "step": 12700 + }, + { + "epoch": 4.74, + "grad_norm": 1.4575459957122803, + "learning_rate": 8.77175879396985e-06, + "loss": 0.0106, + "step": 12725 + }, + { + "epoch": 4.75, + "grad_norm": 0.853424608707428, + "learning_rate": 8.769246231155779e-06, + "loss": 0.0119, + "step": 12750 + }, + { + "epoch": 4.76, + "grad_norm": 1.091800332069397, + "learning_rate": 8.76673366834171e-06, + "loss": 0.0115, + "step": 12775 + }, + { + "epoch": 4.77, + "grad_norm": 1.2517153024673462, + "learning_rate": 8.76422110552764e-06, + "loss": 0.0118, + "step": 12800 + }, + { + "epoch": 4.78, + "grad_norm": 1.2899531126022339, + "learning_rate": 8.761708542713569e-06, + "loss": 0.0116, + "step": 12825 + }, + { + "epoch": 4.79, + "grad_norm": 1.052538514137268, + "learning_rate": 8.759195979899498e-06, + "loss": 0.013, + "step": 12850 + }, + { + "epoch": 4.8, + "grad_norm": 1.2071914672851562, + "learning_rate": 8.756683417085427e-06, + "loss": 0.011, + "step": 12875 + }, + { + "epoch": 4.81, + "grad_norm": 1.8557744026184082, + "learning_rate": 8.754170854271358e-06, + "loss": 0.0116, + "step": 12900 + }, + { + "epoch": 4.82, + "grad_norm": 1.353334665298462, + "learning_rate": 8.751658291457286e-06, + "loss": 0.0111, + "step": 12925 + }, + { + "epoch": 4.83, + "grad_norm": 1.1100677251815796, + "learning_rate": 8.749145728643217e-06, + "loss": 0.0126, + "step": 12950 + }, + { + "epoch": 4.84, + "grad_norm": 1.0451818704605103, + "learning_rate": 8.746633165829146e-06, + "loss": 0.0114, + "step": 12975 + }, + { + "epoch": 4.85, + "grad_norm": 0.9931843876838684, + "learning_rate": 8.744120603015076e-06, + "loss": 0.0121, + "step": 13000 + }, + { + "epoch": 4.85, + "eval_loss": 0.11524338275194168, + "eval_runtime": 2158.3842, + "eval_samples_per_second": 0.663, + "eval_steps_per_second": 0.663, + "eval_wer": 28.768738420077476, + "step": 13000 + }, + { + "epoch": 4.86, + "grad_norm": 1.5944328308105469, + "learning_rate": 8.741608040201007e-06, + "loss": 0.0114, + "step": 13025 + }, + { + "epoch": 4.87, + "grad_norm": 1.505807876586914, + "learning_rate": 8.739095477386936e-06, + "loss": 0.0119, + "step": 13050 + }, + { + "epoch": 4.88, + "grad_norm": 1.3619110584259033, + "learning_rate": 8.736582914572865e-06, + "loss": 0.011, + "step": 13075 + }, + { + "epoch": 4.88, + "grad_norm": 1.2681009769439697, + "learning_rate": 8.734070351758795e-06, + "loss": 0.0117, + "step": 13100 + }, + { + "epoch": 4.89, + "grad_norm": 1.348471760749817, + "learning_rate": 8.731557788944724e-06, + "loss": 0.0108, + "step": 13125 + }, + { + "epoch": 4.9, + "grad_norm": 0.9727448225021362, + "learning_rate": 8.729045226130653e-06, + "loss": 0.0118, + "step": 13150 + }, + { + "epoch": 4.91, + "grad_norm": 1.00346040725708, + "learning_rate": 8.726532663316584e-06, + "loss": 0.0116, + "step": 13175 + }, + { + "epoch": 4.92, + "grad_norm": 1.531886100769043, + "learning_rate": 8.724020100502514e-06, + "loss": 0.0114, + "step": 13200 + }, + { + "epoch": 4.93, + "grad_norm": 0.9477930665016174, + "learning_rate": 8.721507537688443e-06, + "loss": 0.0129, + "step": 13225 + }, + { + "epoch": 4.94, + "grad_norm": 0.9415209889411926, + "learning_rate": 8.718994974874372e-06, + "loss": 0.0108, + "step": 13250 + }, + { + "epoch": 4.95, + "grad_norm": 1.0455098152160645, + "learning_rate": 8.716482412060302e-06, + "loss": 0.0096, + "step": 13275 + }, + { + "epoch": 4.96, + "grad_norm": 0.9145622253417969, + "learning_rate": 8.713969849246233e-06, + "loss": 0.0117, + "step": 13300 + }, + { + "epoch": 4.97, + "grad_norm": 1.1689749956130981, + "learning_rate": 8.711457286432162e-06, + "loss": 0.0112, + "step": 13325 + }, + { + "epoch": 4.98, + "grad_norm": 1.604429841041565, + "learning_rate": 8.708944723618091e-06, + "loss": 0.0114, + "step": 13350 + }, + { + "epoch": 4.99, + "grad_norm": 1.2108241319656372, + "learning_rate": 8.70643216080402e-06, + "loss": 0.0116, + "step": 13375 + }, + { + "epoch": 5.0, + "grad_norm": 0.9991540312767029, + "learning_rate": 8.70391959798995e-06, + "loss": 0.0114, + "step": 13400 + }, + { + "epoch": 5.01, + "grad_norm": 0.9700791835784912, + "learning_rate": 8.701407035175881e-06, + "loss": 0.0078, + "step": 13425 + }, + { + "epoch": 5.01, + "grad_norm": 0.6362177729606628, + "learning_rate": 8.69889447236181e-06, + "loss": 0.0055, + "step": 13450 + }, + { + "epoch": 5.02, + "grad_norm": 0.8142854571342468, + "learning_rate": 8.69638190954774e-06, + "loss": 0.0059, + "step": 13475 + }, + { + "epoch": 5.03, + "grad_norm": 1.0601310729980469, + "learning_rate": 8.693869346733669e-06, + "loss": 0.006, + "step": 13500 + }, + { + "epoch": 5.04, + "grad_norm": 0.6800270080566406, + "learning_rate": 8.691356783919598e-06, + "loss": 0.0061, + "step": 13525 + }, + { + "epoch": 5.05, + "grad_norm": 1.1405975818634033, + "learning_rate": 8.688844221105528e-06, + "loss": 0.006, + "step": 13550 + }, + { + "epoch": 5.06, + "grad_norm": 1.004633903503418, + "learning_rate": 8.686331658291459e-06, + "loss": 0.0064, + "step": 13575 + }, + { + "epoch": 5.07, + "grad_norm": 0.7572785019874573, + "learning_rate": 8.683819095477388e-06, + "loss": 0.0063, + "step": 13600 + }, + { + "epoch": 5.08, + "grad_norm": 0.6280177235603333, + "learning_rate": 8.681306532663317e-06, + "loss": 0.0061, + "step": 13625 + }, + { + "epoch": 5.09, + "grad_norm": 1.0561246871948242, + "learning_rate": 8.678793969849248e-06, + "loss": 0.0056, + "step": 13650 + }, + { + "epoch": 5.1, + "grad_norm": 1.4883924722671509, + "learning_rate": 8.676281407035176e-06, + "loss": 0.0058, + "step": 13675 + }, + { + "epoch": 5.11, + "grad_norm": 1.0619280338287354, + "learning_rate": 8.673768844221107e-06, + "loss": 0.0065, + "step": 13700 + }, + { + "epoch": 5.12, + "grad_norm": 1.1832932233810425, + "learning_rate": 8.671256281407036e-06, + "loss": 0.0055, + "step": 13725 + }, + { + "epoch": 5.13, + "grad_norm": 0.8864865303039551, + "learning_rate": 8.668743718592966e-06, + "loss": 0.0064, + "step": 13750 + }, + { + "epoch": 5.14, + "grad_norm": 1.2815179824829102, + "learning_rate": 8.666231155778895e-06, + "loss": 0.0069, + "step": 13775 + }, + { + "epoch": 5.15, + "grad_norm": 0.7940617203712463, + "learning_rate": 8.663718592964824e-06, + "loss": 0.0059, + "step": 13800 + }, + { + "epoch": 5.15, + "grad_norm": 0.8112738728523254, + "learning_rate": 8.661206030150755e-06, + "loss": 0.0059, + "step": 13825 + }, + { + "epoch": 5.16, + "grad_norm": 0.9151795506477356, + "learning_rate": 8.658693467336684e-06, + "loss": 0.0068, + "step": 13850 + }, + { + "epoch": 5.17, + "grad_norm": 0.801234781742096, + "learning_rate": 8.656180904522614e-06, + "loss": 0.0061, + "step": 13875 + }, + { + "epoch": 5.18, + "grad_norm": 1.079856038093567, + "learning_rate": 8.653768844221107e-06, + "loss": 0.0066, + "step": 13900 + }, + { + "epoch": 5.19, + "grad_norm": 0.6894454956054688, + "learning_rate": 8.651256281407036e-06, + "loss": 0.0055, + "step": 13925 + }, + { + "epoch": 5.2, + "grad_norm": 1.0342192649841309, + "learning_rate": 8.648743718592966e-06, + "loss": 0.0065, + "step": 13950 + }, + { + "epoch": 5.21, + "grad_norm": 1.0424178838729858, + "learning_rate": 8.646231155778895e-06, + "loss": 0.007, + "step": 13975 + }, + { + "epoch": 5.22, + "grad_norm": 1.096771478652954, + "learning_rate": 8.643718592964824e-06, + "loss": 0.0064, + "step": 14000 + }, + { + "epoch": 5.22, + "eval_loss": 0.1236206591129303, + "eval_runtime": 2045.2805, + "eval_samples_per_second": 0.699, + "eval_steps_per_second": 0.699, + "eval_wer": 27.446521812363144, + "step": 14000 + }, + { + "epoch": 5.23, + "grad_norm": 0.8363531231880188, + "learning_rate": 8.641206030150755e-06, + "loss": 0.0065, + "step": 14025 + }, + { + "epoch": 5.24, + "grad_norm": 0.9518529176712036, + "learning_rate": 8.638693467336685e-06, + "loss": 0.0064, + "step": 14050 + }, + { + "epoch": 5.25, + "grad_norm": 1.2325410842895508, + "learning_rate": 8.636180904522614e-06, + "loss": 0.0066, + "step": 14075 + }, + { + "epoch": 5.26, + "grad_norm": 0.921227753162384, + "learning_rate": 8.633668341708543e-06, + "loss": 0.0066, + "step": 14100 + }, + { + "epoch": 5.27, + "grad_norm": 1.0446927547454834, + "learning_rate": 8.631155778894473e-06, + "loss": 0.0063, + "step": 14125 + }, + { + "epoch": 5.28, + "grad_norm": 1.3974566459655762, + "learning_rate": 8.628643216080402e-06, + "loss": 0.0066, + "step": 14150 + }, + { + "epoch": 5.29, + "grad_norm": 1.2447640895843506, + "learning_rate": 8.626130653266333e-06, + "loss": 0.0062, + "step": 14175 + }, + { + "epoch": 5.29, + "grad_norm": 1.0701993703842163, + "learning_rate": 8.623618090452262e-06, + "loss": 0.007, + "step": 14200 + }, + { + "epoch": 5.3, + "grad_norm": 1.0960869789123535, + "learning_rate": 8.621105527638192e-06, + "loss": 0.0065, + "step": 14225 + }, + { + "epoch": 5.31, + "grad_norm": 1.0952740907669067, + "learning_rate": 8.618592964824121e-06, + "loss": 0.0067, + "step": 14250 + }, + { + "epoch": 5.32, + "grad_norm": 1.176464319229126, + "learning_rate": 8.61608040201005e-06, + "loss": 0.0058, + "step": 14275 + }, + { + "epoch": 5.33, + "grad_norm": 1.0034589767456055, + "learning_rate": 8.613567839195981e-06, + "loss": 0.0073, + "step": 14300 + }, + { + "epoch": 5.34, + "grad_norm": 1.0626839399337769, + "learning_rate": 8.61105527638191e-06, + "loss": 0.0065, + "step": 14325 + }, + { + "epoch": 5.35, + "grad_norm": 0.7016169428825378, + "learning_rate": 8.60854271356784e-06, + "loss": 0.0072, + "step": 14350 + }, + { + "epoch": 5.36, + "grad_norm": 1.035622000694275, + "learning_rate": 8.60603015075377e-06, + "loss": 0.0072, + "step": 14375 + }, + { + "epoch": 5.37, + "grad_norm": 1.241018295288086, + "learning_rate": 8.603517587939699e-06, + "loss": 0.0068, + "step": 14400 + }, + { + "epoch": 5.38, + "grad_norm": 0.9622945189476013, + "learning_rate": 8.601005025125628e-06, + "loss": 0.0067, + "step": 14425 + }, + { + "epoch": 5.39, + "grad_norm": 0.6068817377090454, + "learning_rate": 8.598492462311559e-06, + "loss": 0.0067, + "step": 14450 + }, + { + "epoch": 5.4, + "grad_norm": 1.0171995162963867, + "learning_rate": 8.595979899497488e-06, + "loss": 0.0076, + "step": 14475 + }, + { + "epoch": 5.41, + "grad_norm": 1.1998450756072998, + "learning_rate": 8.593467336683418e-06, + "loss": 0.0073, + "step": 14500 + }, + { + "epoch": 5.42, + "grad_norm": 1.4210116863250732, + "learning_rate": 8.590954773869347e-06, + "loss": 0.0067, + "step": 14525 + }, + { + "epoch": 5.43, + "grad_norm": 1.256176471710205, + "learning_rate": 8.588442211055276e-06, + "loss": 0.0075, + "step": 14550 + }, + { + "epoch": 5.43, + "grad_norm": 1.3829436302185059, + "learning_rate": 8.585929648241207e-06, + "loss": 0.0077, + "step": 14575 + }, + { + "epoch": 5.44, + "grad_norm": 0.8739194869995117, + "learning_rate": 8.583417085427137e-06, + "loss": 0.0069, + "step": 14600 + }, + { + "epoch": 5.45, + "grad_norm": 1.3086152076721191, + "learning_rate": 8.580904522613066e-06, + "loss": 0.0078, + "step": 14625 + }, + { + "epoch": 5.46, + "grad_norm": 1.245772123336792, + "learning_rate": 8.578391959798997e-06, + "loss": 0.007, + "step": 14650 + }, + { + "epoch": 5.47, + "grad_norm": 1.1863294839859009, + "learning_rate": 8.575879396984925e-06, + "loss": 0.0071, + "step": 14675 + }, + { + "epoch": 5.48, + "grad_norm": 1.032676339149475, + "learning_rate": 8.573366834170856e-06, + "loss": 0.0072, + "step": 14700 + }, + { + "epoch": 5.49, + "grad_norm": 1.1402857303619385, + "learning_rate": 8.570854271356785e-06, + "loss": 0.0074, + "step": 14725 + }, + { + "epoch": 5.5, + "grad_norm": 1.088283896446228, + "learning_rate": 8.568341708542714e-06, + "loss": 0.0074, + "step": 14750 + }, + { + "epoch": 5.51, + "grad_norm": 1.1104509830474854, + "learning_rate": 8.565829145728644e-06, + "loss": 0.0075, + "step": 14775 + }, + { + "epoch": 5.52, + "grad_norm": 1.1338770389556885, + "learning_rate": 8.563316582914573e-06, + "loss": 0.0071, + "step": 14800 + }, + { + "epoch": 5.53, + "grad_norm": 1.280633807182312, + "learning_rate": 8.560804020100502e-06, + "loss": 0.0073, + "step": 14825 + }, + { + "epoch": 5.54, + "grad_norm": 1.213628888130188, + "learning_rate": 8.558291457286433e-06, + "loss": 0.0072, + "step": 14850 + }, + { + "epoch": 5.55, + "grad_norm": 1.1627049446105957, + "learning_rate": 8.555778894472363e-06, + "loss": 0.0068, + "step": 14875 + }, + { + "epoch": 5.56, + "grad_norm": 1.1929972171783447, + "learning_rate": 8.553266331658292e-06, + "loss": 0.0067, + "step": 14900 + }, + { + "epoch": 5.56, + "grad_norm": 0.9693466424942017, + "learning_rate": 8.550753768844223e-06, + "loss": 0.0066, + "step": 14925 + }, + { + "epoch": 5.57, + "grad_norm": 1.1447923183441162, + "learning_rate": 8.54824120603015e-06, + "loss": 0.0065, + "step": 14950 + }, + { + "epoch": 5.58, + "grad_norm": 0.6487345099449158, + "learning_rate": 8.545728643216082e-06, + "loss": 0.0066, + "step": 14975 + }, + { + "epoch": 5.59, + "grad_norm": 0.9641698598861694, + "learning_rate": 8.54321608040201e-06, + "loss": 0.0073, + "step": 15000 + }, + { + "epoch": 5.59, + "eval_loss": 0.1262369006872177, + "eval_runtime": 2061.1973, + "eval_samples_per_second": 0.694, + "eval_steps_per_second": 0.694, + "eval_wer": 27.800235809331316, + "step": 15000 + }, + { + "epoch": 5.6, + "grad_norm": 0.5662176012992859, + "learning_rate": 8.54070351758794e-06, + "loss": 0.0069, + "step": 15025 + }, + { + "epoch": 5.61, + "grad_norm": 1.1098682880401611, + "learning_rate": 8.53819095477387e-06, + "loss": 0.0074, + "step": 15050 + }, + { + "epoch": 5.62, + "grad_norm": 1.0387194156646729, + "learning_rate": 8.535678391959799e-06, + "loss": 0.0072, + "step": 15075 + }, + { + "epoch": 5.63, + "grad_norm": 0.9752185344696045, + "learning_rate": 8.53316582914573e-06, + "loss": 0.0068, + "step": 15100 + }, + { + "epoch": 5.64, + "grad_norm": 1.153000831604004, + "learning_rate": 8.530653266331659e-06, + "loss": 0.0069, + "step": 15125 + }, + { + "epoch": 5.65, + "grad_norm": 0.7557522654533386, + "learning_rate": 8.528140703517588e-06, + "loss": 0.0071, + "step": 15150 + }, + { + "epoch": 5.66, + "grad_norm": 0.9823975563049316, + "learning_rate": 8.525628140703518e-06, + "loss": 0.0074, + "step": 15175 + }, + { + "epoch": 5.67, + "grad_norm": 1.383996844291687, + "learning_rate": 8.523115577889449e-06, + "loss": 0.0064, + "step": 15200 + }, + { + "epoch": 5.68, + "grad_norm": 0.5517428517341614, + "learning_rate": 8.520603015075376e-06, + "loss": 0.007, + "step": 15225 + }, + { + "epoch": 5.69, + "grad_norm": 0.8920483589172363, + "learning_rate": 8.518090452261307e-06, + "loss": 0.0073, + "step": 15250 + }, + { + "epoch": 5.7, + "grad_norm": 1.2948217391967773, + "learning_rate": 8.515577889447237e-06, + "loss": 0.0073, + "step": 15275 + }, + { + "epoch": 5.7, + "grad_norm": 1.1646603345870972, + "learning_rate": 8.513065326633166e-06, + "loss": 0.0075, + "step": 15300 + }, + { + "epoch": 5.71, + "grad_norm": 1.123123288154602, + "learning_rate": 8.510552763819097e-06, + "loss": 0.0071, + "step": 15325 + }, + { + "epoch": 5.72, + "grad_norm": 1.1207715272903442, + "learning_rate": 8.508040201005025e-06, + "loss": 0.0065, + "step": 15350 + }, + { + "epoch": 5.73, + "grad_norm": 1.1988489627838135, + "learning_rate": 8.505527638190956e-06, + "loss": 0.0068, + "step": 15375 + }, + { + "epoch": 5.74, + "grad_norm": 0.9853920936584473, + "learning_rate": 8.503015075376885e-06, + "loss": 0.007, + "step": 15400 + }, + { + "epoch": 5.75, + "grad_norm": 1.0888432264328003, + "learning_rate": 8.500502512562814e-06, + "loss": 0.0075, + "step": 15425 + }, + { + "epoch": 5.76, + "grad_norm": 1.3345853090286255, + "learning_rate": 8.497989949748744e-06, + "loss": 0.0067, + "step": 15450 + }, + { + "epoch": 5.77, + "grad_norm": 0.7219342589378357, + "learning_rate": 8.495477386934675e-06, + "loss": 0.0071, + "step": 15475 + }, + { + "epoch": 5.78, + "grad_norm": 1.024172067642212, + "learning_rate": 8.492964824120604e-06, + "loss": 0.007, + "step": 15500 + }, + { + "epoch": 5.79, + "grad_norm": 1.3627147674560547, + "learning_rate": 8.490452261306533e-06, + "loss": 0.0073, + "step": 15525 + }, + { + "epoch": 5.8, + "grad_norm": 1.2411257028579712, + "learning_rate": 8.487939698492463e-06, + "loss": 0.0073, + "step": 15550 + }, + { + "epoch": 5.81, + "grad_norm": 1.0183546543121338, + "learning_rate": 8.485427135678392e-06, + "loss": 0.0079, + "step": 15575 + }, + { + "epoch": 5.82, + "grad_norm": 1.0401129722595215, + "learning_rate": 8.482914572864323e-06, + "loss": 0.0075, + "step": 15600 + }, + { + "epoch": 5.83, + "grad_norm": 0.8255440592765808, + "learning_rate": 8.480402010050252e-06, + "loss": 0.0077, + "step": 15625 + }, + { + "epoch": 5.84, + "grad_norm": 0.8791080117225647, + "learning_rate": 8.477889447236182e-06, + "loss": 0.0076, + "step": 15650 + }, + { + "epoch": 5.84, + "grad_norm": 1.3627198934555054, + "learning_rate": 8.475376884422111e-06, + "loss": 0.0071, + "step": 15675 + }, + { + "epoch": 5.85, + "grad_norm": 0.7942774295806885, + "learning_rate": 8.47286432160804e-06, + "loss": 0.0069, + "step": 15700 + }, + { + "epoch": 5.86, + "grad_norm": 1.1304864883422852, + "learning_rate": 8.470351758793971e-06, + "loss": 0.0073, + "step": 15725 + }, + { + "epoch": 5.87, + "grad_norm": 1.1838277578353882, + "learning_rate": 8.4678391959799e-06, + "loss": 0.0074, + "step": 15750 + }, + { + "epoch": 5.88, + "grad_norm": 1.0582891702651978, + "learning_rate": 8.46532663316583e-06, + "loss": 0.0078, + "step": 15775 + }, + { + "epoch": 5.89, + "grad_norm": 1.187500238418579, + "learning_rate": 8.46281407035176e-06, + "loss": 0.0077, + "step": 15800 + }, + { + "epoch": 5.9, + "grad_norm": 0.9973338842391968, + "learning_rate": 8.460301507537689e-06, + "loss": 0.0077, + "step": 15825 + }, + { + "epoch": 5.91, + "grad_norm": 0.8751574754714966, + "learning_rate": 8.457788944723618e-06, + "loss": 0.0076, + "step": 15850 + }, + { + "epoch": 5.92, + "grad_norm": 1.2862788438796997, + "learning_rate": 8.455276381909549e-06, + "loss": 0.0068, + "step": 15875 + }, + { + "epoch": 5.93, + "grad_norm": 1.0356941223144531, + "learning_rate": 8.452763819095478e-06, + "loss": 0.0072, + "step": 15900 + }, + { + "epoch": 5.94, + "grad_norm": 1.052286982536316, + "learning_rate": 8.450251256281408e-06, + "loss": 0.0069, + "step": 15925 + }, + { + "epoch": 5.95, + "grad_norm": 0.7516260147094727, + "learning_rate": 8.447738693467337e-06, + "loss": 0.0069, + "step": 15950 + }, + { + "epoch": 5.96, + "grad_norm": 0.9705252647399902, + "learning_rate": 8.445226130653266e-06, + "loss": 0.0078, + "step": 15975 + }, + { + "epoch": 5.97, + "grad_norm": 1.2846840620040894, + "learning_rate": 8.442713567839197e-06, + "loss": 0.0069, + "step": 16000 + }, + { + "epoch": 5.97, + "eval_loss": 0.12774182856082916, + "eval_runtime": 2053.9998, + "eval_samples_per_second": 0.696, + "eval_steps_per_second": 0.696, + "eval_wer": 28.339228566616136, + "step": 16000 + } + ], + "logging_steps": 25, + "max_steps": 100000, + "num_input_tokens_seen": 0, + "num_train_epochs": 38, + "save_steps": 1000, + "total_flos": 1.477210947158016e+20, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/whisper-small/kannada/checkpoint-16000/training_args.bin b/checkpoints/whisper-small/kannada/checkpoint-16000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..43cc5ea534819fc917b386a201982e2ad117e2a9 --- /dev/null +++ b/checkpoints/whisper-small/kannada/checkpoint-16000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:820ae6ad1dd2f9b222bd11de202257d55500f3ec0b91b4fd5962b9e5c2940218 +size 4667 diff --git a/checkpoints/whisper-small/magahi/checkpoint-18000/config.json b/checkpoints/whisper-small/magahi/checkpoint-18000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8cb289bfc120ad77c5505b0ef210c56bf35075f5 --- /dev/null +++ b/checkpoints/whisper-small/magahi/checkpoint-18000/config.json @@ -0,0 +1,152 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50276 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50360, + 50361, + 50362 + ], + "torch_dtype": "float32", + "transformers_version": "4.39.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoints/whisper-small/magahi/checkpoint-18000/generation_config.json b/checkpoints/whisper-small/magahi/checkpoint-18000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7e9b1a3e3b5fb8d88730860d2b25f6cd310962c7 --- /dev/null +++ b/checkpoints/whisper-small/magahi/checkpoint-18000/generation_config.json @@ -0,0 +1,264 @@ +{ + "alignment_heads": [ + [ + 5, + 3 + ], + [ + 5, + 9 + ], + [ + 8, + 0 + ], + [ + 8, + 4 + ], + [ + 8, + 7 + ], + [ + 8, + 8 + ], + [ + 9, + 0 + ], + [ + 9, + 7 + ], + [ + 9, + 9 + ], + [ + 10, + 5 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.39.2" +} diff --git a/checkpoints/whisper-small/magahi/checkpoint-18000/model.safetensors b/checkpoints/whisper-small/magahi/checkpoint-18000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7c4976b7ed81e8bb599e5cbe64fa84631ed4df4b --- /dev/null +++ b/checkpoints/whisper-small/magahi/checkpoint-18000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0018d3493a460a4cf52ad04f823098c4029e0021f6671af36f371d19e5a01c51 +size 966995080 diff --git a/checkpoints/whisper-small/magahi/checkpoint-18000/optimizer.pt b/checkpoints/whisper-small/magahi/checkpoint-18000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf2e11cdebd0b4bebe1ed0200791cf97630dd83d --- /dev/null +++ b/checkpoints/whisper-small/magahi/checkpoint-18000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f89aba9e382418814d8900d5085a3df74d7951723f640acafad33e1ab482e3a5 +size 1925063607 diff --git a/checkpoints/whisper-small/magahi/checkpoint-18000/preprocessor_config.json b/checkpoints/whisper-small/magahi/checkpoint-18000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoints/whisper-small/magahi/checkpoint-18000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoints/whisper-small/magahi/checkpoint-18000/rng_state.pth b/checkpoints/whisper-small/magahi/checkpoint-18000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..2627ab4de1eb88a9940f9e121fec5127e6733f5f --- /dev/null +++ b/checkpoints/whisper-small/magahi/checkpoint-18000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b69a27641aa88b6d5ff41645170a697874fc583e4b2cab75db605ef90a0e5053 +size 14575 diff --git a/checkpoints/whisper-small/magahi/checkpoint-18000/scheduler.pt b/checkpoints/whisper-small/magahi/checkpoint-18000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..de9c429e9999f66f2a1deb8667a2dbd659fdf3df --- /dev/null +++ b/checkpoints/whisper-small/magahi/checkpoint-18000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d376bb44853fda280719e130a08f368d538fd19cb27ef67e8eef3394f13c1673 +size 627 diff --git a/checkpoints/whisper-small/magahi/checkpoint-18000/trainer_state.json b/checkpoints/whisper-small/magahi/checkpoint-18000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ef1d3ccf6c073c250737c8ed5ad5929bef52ee54 --- /dev/null +++ b/checkpoints/whisper-small/magahi/checkpoint-18000/trainer_state.json @@ -0,0 +1,5223 @@ +{ + "best_metric": 21.008629989212512, + "best_model_checkpoint": "results/whisper-small/magahi/checkpoint-8000", + "epoch": 6.044325050369375, + "eval_steps": 1000, + "global_step": 18000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "grad_norm": 39.72129821777344, + "learning_rate": 4.4e-07, + "loss": 2.145, + "step": 25 + }, + { + "epoch": 0.02, + "grad_norm": 13.177146911621094, + "learning_rate": 9.400000000000001e-07, + "loss": 1.7341, + "step": 50 + }, + { + "epoch": 0.03, + "grad_norm": 6.820940971374512, + "learning_rate": 1.44e-06, + "loss": 1.2007, + "step": 75 + }, + { + "epoch": 0.03, + "grad_norm": 6.103167533874512, + "learning_rate": 1.94e-06, + "loss": 0.9461, + "step": 100 + }, + { + "epoch": 0.04, + "grad_norm": 5.999841213226318, + "learning_rate": 2.4400000000000004e-06, + "loss": 0.8268, + "step": 125 + }, + { + "epoch": 0.05, + "grad_norm": 5.943469047546387, + "learning_rate": 2.9400000000000002e-06, + "loss": 0.7609, + "step": 150 + }, + { + "epoch": 0.06, + "grad_norm": 5.298623085021973, + "learning_rate": 3.44e-06, + "loss": 0.6925, + "step": 175 + }, + { + "epoch": 0.07, + "grad_norm": 5.349728107452393, + "learning_rate": 3.94e-06, + "loss": 0.6551, + "step": 200 + }, + { + "epoch": 0.08, + "grad_norm": 5.519735813140869, + "learning_rate": 4.440000000000001e-06, + "loss": 0.5966, + "step": 225 + }, + { + "epoch": 0.08, + "grad_norm": 5.527182579040527, + "learning_rate": 4.94e-06, + "loss": 0.5711, + "step": 250 + }, + { + "epoch": 0.09, + "grad_norm": 5.563872814178467, + "learning_rate": 5.4400000000000004e-06, + "loss": 0.5219, + "step": 275 + }, + { + "epoch": 0.1, + "grad_norm": 5.318006992340088, + "learning_rate": 5.94e-06, + "loss": 0.487, + "step": 300 + }, + { + "epoch": 0.11, + "grad_norm": 5.711111068725586, + "learning_rate": 6.440000000000001e-06, + "loss": 0.4227, + "step": 325 + }, + { + "epoch": 0.12, + "grad_norm": 4.308486461639404, + "learning_rate": 6.9400000000000005e-06, + "loss": 0.3994, + "step": 350 + }, + { + "epoch": 0.13, + "grad_norm": 4.903069019317627, + "learning_rate": 7.440000000000001e-06, + "loss": 0.3877, + "step": 375 + }, + { + "epoch": 0.13, + "grad_norm": 4.160550594329834, + "learning_rate": 7.94e-06, + "loss": 0.3572, + "step": 400 + }, + { + "epoch": 0.14, + "grad_norm": 4.42545223236084, + "learning_rate": 8.44e-06, + "loss": 0.3579, + "step": 425 + }, + { + "epoch": 0.15, + "grad_norm": 4.183320045471191, + "learning_rate": 8.94e-06, + "loss": 0.3471, + "step": 450 + }, + { + "epoch": 0.16, + "grad_norm": 3.7942774295806885, + "learning_rate": 9.440000000000001e-06, + "loss": 0.3231, + "step": 475 + }, + { + "epoch": 0.17, + "grad_norm": 4.382524490356445, + "learning_rate": 9.940000000000001e-06, + "loss": 0.331, + "step": 500 + }, + { + "epoch": 0.18, + "grad_norm": 4.386434078216553, + "learning_rate": 9.997788944723618e-06, + "loss": 0.3255, + "step": 525 + }, + { + "epoch": 0.18, + "grad_norm": 3.838149070739746, + "learning_rate": 9.99527638190955e-06, + "loss": 0.3324, + "step": 550 + }, + { + "epoch": 0.19, + "grad_norm": 4.193848609924316, + "learning_rate": 9.992763819095477e-06, + "loss": 0.3156, + "step": 575 + }, + { + "epoch": 0.2, + "grad_norm": 4.455413818359375, + "learning_rate": 9.990251256281408e-06, + "loss": 0.3028, + "step": 600 + }, + { + "epoch": 0.21, + "grad_norm": 3.4735782146453857, + "learning_rate": 9.987738693467337e-06, + "loss": 0.3006, + "step": 625 + }, + { + "epoch": 0.22, + "grad_norm": 3.6454074382781982, + "learning_rate": 9.985226130653267e-06, + "loss": 0.3035, + "step": 650 + }, + { + "epoch": 0.23, + "grad_norm": 4.208563327789307, + "learning_rate": 9.982713567839198e-06, + "loss": 0.2961, + "step": 675 + }, + { + "epoch": 0.24, + "grad_norm": 3.822723388671875, + "learning_rate": 9.980201005025127e-06, + "loss": 0.2915, + "step": 700 + }, + { + "epoch": 0.24, + "grad_norm": 3.3925743103027344, + "learning_rate": 9.977688442211056e-06, + "loss": 0.277, + "step": 725 + }, + { + "epoch": 0.25, + "grad_norm": 3.1764349937438965, + "learning_rate": 9.975175879396986e-06, + "loss": 0.2738, + "step": 750 + }, + { + "epoch": 0.26, + "grad_norm": 3.313093662261963, + "learning_rate": 9.972663316582915e-06, + "loss": 0.2656, + "step": 775 + }, + { + "epoch": 0.27, + "grad_norm": 3.511521577835083, + "learning_rate": 9.970150753768844e-06, + "loss": 0.2683, + "step": 800 + }, + { + "epoch": 0.28, + "grad_norm": 4.0056867599487305, + "learning_rate": 9.967638190954775e-06, + "loss": 0.2625, + "step": 825 + }, + { + "epoch": 0.29, + "grad_norm": 3.4474174976348877, + "learning_rate": 9.965125628140703e-06, + "loss": 0.2619, + "step": 850 + }, + { + "epoch": 0.29, + "grad_norm": 3.7113592624664307, + "learning_rate": 9.962613065326634e-06, + "loss": 0.272, + "step": 875 + }, + { + "epoch": 0.3, + "grad_norm": 2.93918776512146, + "learning_rate": 9.960100502512563e-06, + "loss": 0.2549, + "step": 900 + }, + { + "epoch": 0.31, + "grad_norm": 3.437969446182251, + "learning_rate": 9.957587939698493e-06, + "loss": 0.2595, + "step": 925 + }, + { + "epoch": 0.32, + "grad_norm": 3.2106266021728516, + "learning_rate": 9.955075376884424e-06, + "loss": 0.2523, + "step": 950 + }, + { + "epoch": 0.33, + "grad_norm": 3.5032262802124023, + "learning_rate": 9.952562814070353e-06, + "loss": 0.259, + "step": 975 + }, + { + "epoch": 0.34, + "grad_norm": 3.6168978214263916, + "learning_rate": 9.950050251256282e-06, + "loss": 0.259, + "step": 1000 + }, + { + "epoch": 0.34, + "eval_loss": 0.1873607337474823, + "eval_runtime": 1201.5382, + "eval_samples_per_second": 1.191, + "eval_steps_per_second": 1.191, + "eval_wer": 27.467637540453076, + "step": 1000 + }, + { + "epoch": 0.34, + "grad_norm": 3.2681338787078857, + "learning_rate": 9.947537688442212e-06, + "loss": 0.2494, + "step": 1025 + }, + { + "epoch": 0.35, + "grad_norm": 2.803635835647583, + "learning_rate": 9.945025125628141e-06, + "loss": 0.2496, + "step": 1050 + }, + { + "epoch": 0.36, + "grad_norm": 3.473599672317505, + "learning_rate": 9.94251256281407e-06, + "loss": 0.2332, + "step": 1075 + }, + { + "epoch": 0.37, + "grad_norm": 2.8453943729400635, + "learning_rate": 9.940000000000001e-06, + "loss": 0.2376, + "step": 1100 + }, + { + "epoch": 0.38, + "grad_norm": 3.031691312789917, + "learning_rate": 9.93748743718593e-06, + "loss": 0.2448, + "step": 1125 + }, + { + "epoch": 0.39, + "grad_norm": 3.272526979446411, + "learning_rate": 9.93497487437186e-06, + "loss": 0.2438, + "step": 1150 + }, + { + "epoch": 0.39, + "grad_norm": 2.9693005084991455, + "learning_rate": 9.93246231155779e-06, + "loss": 0.228, + "step": 1175 + }, + { + "epoch": 0.4, + "grad_norm": 3.198035478591919, + "learning_rate": 9.929949748743719e-06, + "loss": 0.2329, + "step": 1200 + }, + { + "epoch": 0.41, + "grad_norm": 3.4829468727111816, + "learning_rate": 9.92743718592965e-06, + "loss": 0.2419, + "step": 1225 + }, + { + "epoch": 0.42, + "grad_norm": 2.673646926879883, + "learning_rate": 9.924924623115579e-06, + "loss": 0.2324, + "step": 1250 + }, + { + "epoch": 0.43, + "grad_norm": 3.2711358070373535, + "learning_rate": 9.922412060301508e-06, + "loss": 0.2395, + "step": 1275 + }, + { + "epoch": 0.44, + "grad_norm": 3.1074557304382324, + "learning_rate": 9.91989949748744e-06, + "loss": 0.2328, + "step": 1300 + }, + { + "epoch": 0.44, + "grad_norm": 3.7197329998016357, + "learning_rate": 9.917386934673367e-06, + "loss": 0.2299, + "step": 1325 + }, + { + "epoch": 0.45, + "grad_norm": 3.25182843208313, + "learning_rate": 9.914874371859298e-06, + "loss": 0.2199, + "step": 1350 + }, + { + "epoch": 0.46, + "grad_norm": 3.1656246185302734, + "learning_rate": 9.912361809045227e-06, + "loss": 0.2213, + "step": 1375 + }, + { + "epoch": 0.47, + "grad_norm": 3.196014165878296, + "learning_rate": 9.909849246231157e-06, + "loss": 0.2296, + "step": 1400 + }, + { + "epoch": 0.48, + "grad_norm": 2.7442800998687744, + "learning_rate": 9.907336683417086e-06, + "loss": 0.228, + "step": 1425 + }, + { + "epoch": 0.49, + "grad_norm": 3.120338201522827, + "learning_rate": 9.904824120603015e-06, + "loss": 0.2152, + "step": 1450 + }, + { + "epoch": 0.5, + "grad_norm": 2.885051965713501, + "learning_rate": 9.902311557788945e-06, + "loss": 0.2121, + "step": 1475 + }, + { + "epoch": 0.5, + "grad_norm": 2.547811269760132, + "learning_rate": 9.899798994974876e-06, + "loss": 0.2207, + "step": 1500 + }, + { + "epoch": 0.51, + "grad_norm": 3.5329601764678955, + "learning_rate": 9.897286432160805e-06, + "loss": 0.2258, + "step": 1525 + }, + { + "epoch": 0.52, + "grad_norm": 3.5064892768859863, + "learning_rate": 9.894773869346734e-06, + "loss": 0.22, + "step": 1550 + }, + { + "epoch": 0.53, + "grad_norm": 3.5325844287872314, + "learning_rate": 9.892261306532665e-06, + "loss": 0.2066, + "step": 1575 + }, + { + "epoch": 0.54, + "grad_norm": 2.5220048427581787, + "learning_rate": 9.889748743718593e-06, + "loss": 0.2114, + "step": 1600 + }, + { + "epoch": 0.55, + "grad_norm": 2.9812257289886475, + "learning_rate": 9.887236180904524e-06, + "loss": 0.214, + "step": 1625 + }, + { + "epoch": 0.55, + "grad_norm": 3.280869483947754, + "learning_rate": 9.884723618090453e-06, + "loss": 0.2134, + "step": 1650 + }, + { + "epoch": 0.56, + "grad_norm": 2.7900075912475586, + "learning_rate": 9.882211055276383e-06, + "loss": 0.2136, + "step": 1675 + }, + { + "epoch": 0.57, + "grad_norm": 2.5155417919158936, + "learning_rate": 9.879698492462312e-06, + "loss": 0.2139, + "step": 1700 + }, + { + "epoch": 0.58, + "grad_norm": 2.686643362045288, + "learning_rate": 9.877185929648241e-06, + "loss": 0.2148, + "step": 1725 + }, + { + "epoch": 0.59, + "grad_norm": 2.580108165740967, + "learning_rate": 9.874673366834172e-06, + "loss": 0.2171, + "step": 1750 + }, + { + "epoch": 0.6, + "grad_norm": 2.4279048442840576, + "learning_rate": 9.872160804020102e-06, + "loss": 0.2009, + "step": 1775 + }, + { + "epoch": 0.6, + "grad_norm": 2.398742198944092, + "learning_rate": 9.869648241206031e-06, + "loss": 0.206, + "step": 1800 + }, + { + "epoch": 0.61, + "grad_norm": 2.8015499114990234, + "learning_rate": 9.86713567839196e-06, + "loss": 0.2012, + "step": 1825 + }, + { + "epoch": 0.62, + "grad_norm": 2.9622669219970703, + "learning_rate": 9.864623115577891e-06, + "loss": 0.2089, + "step": 1850 + }, + { + "epoch": 0.63, + "grad_norm": 3.0445189476013184, + "learning_rate": 9.862110552763819e-06, + "loss": 0.1968, + "step": 1875 + }, + { + "epoch": 0.64, + "grad_norm": 2.9392101764678955, + "learning_rate": 9.85959798994975e-06, + "loss": 0.1987, + "step": 1900 + }, + { + "epoch": 0.65, + "grad_norm": 3.27531099319458, + "learning_rate": 9.85708542713568e-06, + "loss": 0.2016, + "step": 1925 + }, + { + "epoch": 0.65, + "grad_norm": 2.4386723041534424, + "learning_rate": 9.854572864321609e-06, + "loss": 0.1955, + "step": 1950 + }, + { + "epoch": 0.66, + "grad_norm": 3.147075653076172, + "learning_rate": 9.85206030150754e-06, + "loss": 0.2023, + "step": 1975 + }, + { + "epoch": 0.67, + "grad_norm": 2.635172128677368, + "learning_rate": 9.849547738693467e-06, + "loss": 0.199, + "step": 2000 + }, + { + "epoch": 0.67, + "eval_loss": 0.151046022772789, + "eval_runtime": 1167.8275, + "eval_samples_per_second": 1.225, + "eval_steps_per_second": 1.225, + "eval_wer": 23.361650485436893, + "step": 2000 + }, + { + "epoch": 0.68, + "grad_norm": 2.468536376953125, + "learning_rate": 9.847035175879398e-06, + "loss": 0.1998, + "step": 2025 + }, + { + "epoch": 0.69, + "grad_norm": 2.5597379207611084, + "learning_rate": 9.844522613065328e-06, + "loss": 0.1927, + "step": 2050 + }, + { + "epoch": 0.7, + "grad_norm": 3.1645500659942627, + "learning_rate": 9.842010050251257e-06, + "loss": 0.1905, + "step": 2075 + }, + { + "epoch": 0.71, + "grad_norm": 2.961488723754883, + "learning_rate": 9.839497487437186e-06, + "loss": 0.1883, + "step": 2100 + }, + { + "epoch": 0.71, + "grad_norm": 3.1177005767822266, + "learning_rate": 9.836984924623117e-06, + "loss": 0.1927, + "step": 2125 + }, + { + "epoch": 0.72, + "grad_norm": 2.724562406539917, + "learning_rate": 9.834472361809047e-06, + "loss": 0.1969, + "step": 2150 + }, + { + "epoch": 0.73, + "grad_norm": 2.5879526138305664, + "learning_rate": 9.831959798994976e-06, + "loss": 0.1971, + "step": 2175 + }, + { + "epoch": 0.74, + "grad_norm": 3.7805097103118896, + "learning_rate": 9.829447236180905e-06, + "loss": 0.1914, + "step": 2200 + }, + { + "epoch": 0.75, + "grad_norm": 2.707535743713379, + "learning_rate": 9.826934673366834e-06, + "loss": 0.1939, + "step": 2225 + }, + { + "epoch": 0.76, + "grad_norm": 2.698673725128174, + "learning_rate": 9.824422110552766e-06, + "loss": 0.2016, + "step": 2250 + }, + { + "epoch": 0.76, + "grad_norm": 2.9778943061828613, + "learning_rate": 9.821909547738693e-06, + "loss": 0.1994, + "step": 2275 + }, + { + "epoch": 0.77, + "grad_norm": 3.004328727722168, + "learning_rate": 9.819396984924624e-06, + "loss": 0.1871, + "step": 2300 + }, + { + "epoch": 0.78, + "grad_norm": 2.208750009536743, + "learning_rate": 9.816884422110553e-06, + "loss": 0.189, + "step": 2325 + }, + { + "epoch": 0.79, + "grad_norm": 2.8214900493621826, + "learning_rate": 9.814371859296483e-06, + "loss": 0.1839, + "step": 2350 + }, + { + "epoch": 0.8, + "grad_norm": 3.2557690143585205, + "learning_rate": 9.811859296482414e-06, + "loss": 0.1944, + "step": 2375 + }, + { + "epoch": 0.81, + "grad_norm": 2.8927676677703857, + "learning_rate": 9.809346733668343e-06, + "loss": 0.1889, + "step": 2400 + }, + { + "epoch": 0.81, + "grad_norm": 3.0161094665527344, + "learning_rate": 9.806834170854272e-06, + "loss": 0.191, + "step": 2425 + }, + { + "epoch": 0.82, + "grad_norm": 2.7951087951660156, + "learning_rate": 9.804321608040202e-06, + "loss": 0.1857, + "step": 2450 + }, + { + "epoch": 0.83, + "grad_norm": 2.7918081283569336, + "learning_rate": 9.801809045226131e-06, + "loss": 0.1892, + "step": 2475 + }, + { + "epoch": 0.84, + "grad_norm": 2.923698902130127, + "learning_rate": 9.79929648241206e-06, + "loss": 0.182, + "step": 2500 + }, + { + "epoch": 0.85, + "grad_norm": 2.4802989959716797, + "learning_rate": 9.796783919597991e-06, + "loss": 0.1798, + "step": 2525 + }, + { + "epoch": 0.86, + "grad_norm": 3.703587055206299, + "learning_rate": 9.79427135678392e-06, + "loss": 0.1927, + "step": 2550 + }, + { + "epoch": 0.86, + "grad_norm": 3.0629055500030518, + "learning_rate": 9.79175879396985e-06, + "loss": 0.1825, + "step": 2575 + }, + { + "epoch": 0.87, + "grad_norm": 2.7950501441955566, + "learning_rate": 9.78924623115578e-06, + "loss": 0.1866, + "step": 2600 + }, + { + "epoch": 0.88, + "grad_norm": 2.4640932083129883, + "learning_rate": 9.786733668341709e-06, + "loss": 0.1785, + "step": 2625 + }, + { + "epoch": 0.89, + "grad_norm": 2.7699997425079346, + "learning_rate": 9.78422110552764e-06, + "loss": 0.1826, + "step": 2650 + }, + { + "epoch": 0.9, + "grad_norm": 2.592036485671997, + "learning_rate": 9.781708542713569e-06, + "loss": 0.1726, + "step": 2675 + }, + { + "epoch": 0.91, + "grad_norm": 2.5791454315185547, + "learning_rate": 9.779195979899498e-06, + "loss": 0.1771, + "step": 2700 + }, + { + "epoch": 0.92, + "grad_norm": 2.283203601837158, + "learning_rate": 9.776683417085428e-06, + "loss": 0.1708, + "step": 2725 + }, + { + "epoch": 0.92, + "grad_norm": 2.4471659660339355, + "learning_rate": 9.774170854271357e-06, + "loss": 0.1749, + "step": 2750 + }, + { + "epoch": 0.93, + "grad_norm": 2.5702602863311768, + "learning_rate": 9.771658291457288e-06, + "loss": 0.1709, + "step": 2775 + }, + { + "epoch": 0.94, + "grad_norm": 2.9458553791046143, + "learning_rate": 9.769145728643217e-06, + "loss": 0.1798, + "step": 2800 + }, + { + "epoch": 0.95, + "grad_norm": 2.439492702484131, + "learning_rate": 9.766633165829147e-06, + "loss": 0.1738, + "step": 2825 + }, + { + "epoch": 0.96, + "grad_norm": 2.6137142181396484, + "learning_rate": 9.764120603015076e-06, + "loss": 0.1722, + "step": 2850 + }, + { + "epoch": 0.97, + "grad_norm": 2.6502785682678223, + "learning_rate": 9.761608040201005e-06, + "loss": 0.1708, + "step": 2875 + }, + { + "epoch": 0.97, + "grad_norm": 2.2714056968688965, + "learning_rate": 9.759095477386935e-06, + "loss": 0.1741, + "step": 2900 + }, + { + "epoch": 0.98, + "grad_norm": 2.8880789279937744, + "learning_rate": 9.756582914572866e-06, + "loss": 0.1737, + "step": 2925 + }, + { + "epoch": 0.99, + "grad_norm": 2.721179485321045, + "learning_rate": 9.754070351758795e-06, + "loss": 0.1761, + "step": 2950 + }, + { + "epoch": 1.0, + "grad_norm": 3.0108931064605713, + "learning_rate": 9.751557788944724e-06, + "loss": 0.1689, + "step": 2975 + }, + { + "epoch": 1.01, + "grad_norm": 2.459514617919922, + "learning_rate": 9.749045226130654e-06, + "loss": 0.1482, + "step": 3000 + }, + { + "epoch": 1.01, + "eval_loss": 0.14289654791355133, + "eval_runtime": 1227.1882, + "eval_samples_per_second": 1.166, + "eval_steps_per_second": 1.166, + "eval_wer": 22.06715210355987, + "step": 3000 + }, + { + "epoch": 1.02, + "grad_norm": 2.221174716949463, + "learning_rate": 9.746532663316583e-06, + "loss": 0.1391, + "step": 3025 + }, + { + "epoch": 1.02, + "grad_norm": 2.442368268966675, + "learning_rate": 9.744020100502514e-06, + "loss": 0.1372, + "step": 3050 + }, + { + "epoch": 1.03, + "grad_norm": 2.1104307174682617, + "learning_rate": 9.741507537688443e-06, + "loss": 0.136, + "step": 3075 + }, + { + "epoch": 1.04, + "grad_norm": 2.358562469482422, + "learning_rate": 9.738994974874373e-06, + "loss": 0.1391, + "step": 3100 + }, + { + "epoch": 1.05, + "grad_norm": 2.236595392227173, + "learning_rate": 9.736482412060302e-06, + "loss": 0.1416, + "step": 3125 + }, + { + "epoch": 1.06, + "grad_norm": 2.8690948486328125, + "learning_rate": 9.733969849246231e-06, + "loss": 0.1418, + "step": 3150 + }, + { + "epoch": 1.07, + "grad_norm": 2.6488163471221924, + "learning_rate": 9.731457286432162e-06, + "loss": 0.1475, + "step": 3175 + }, + { + "epoch": 1.07, + "grad_norm": 2.4958205223083496, + "learning_rate": 9.728944723618092e-06, + "loss": 0.1366, + "step": 3200 + }, + { + "epoch": 1.08, + "grad_norm": 2.585240364074707, + "learning_rate": 9.726432160804021e-06, + "loss": 0.1328, + "step": 3225 + }, + { + "epoch": 1.09, + "grad_norm": 2.27949857711792, + "learning_rate": 9.72391959798995e-06, + "loss": 0.1349, + "step": 3250 + }, + { + "epoch": 1.1, + "grad_norm": 2.327016592025757, + "learning_rate": 9.721407035175881e-06, + "loss": 0.1408, + "step": 3275 + }, + { + "epoch": 1.11, + "grad_norm": 2.265430450439453, + "learning_rate": 9.718894472361809e-06, + "loss": 0.1364, + "step": 3300 + }, + { + "epoch": 1.12, + "grad_norm": 2.482043504714966, + "learning_rate": 9.71638190954774e-06, + "loss": 0.1371, + "step": 3325 + }, + { + "epoch": 1.12, + "grad_norm": 2.1749870777130127, + "learning_rate": 9.71386934673367e-06, + "loss": 0.141, + "step": 3350 + }, + { + "epoch": 1.13, + "grad_norm": 2.319316864013672, + "learning_rate": 9.711356783919599e-06, + "loss": 0.1454, + "step": 3375 + }, + { + "epoch": 1.14, + "grad_norm": 2.275709867477417, + "learning_rate": 9.70884422110553e-06, + "loss": 0.1334, + "step": 3400 + }, + { + "epoch": 1.15, + "grad_norm": 2.788618564605713, + "learning_rate": 9.706331658291457e-06, + "loss": 0.1342, + "step": 3425 + }, + { + "epoch": 1.16, + "grad_norm": 2.423830032348633, + "learning_rate": 9.703819095477388e-06, + "loss": 0.1336, + "step": 3450 + }, + { + "epoch": 1.17, + "grad_norm": 2.8827359676361084, + "learning_rate": 9.701306532663318e-06, + "loss": 0.1384, + "step": 3475 + }, + { + "epoch": 1.18, + "grad_norm": 2.0542421340942383, + "learning_rate": 9.698793969849247e-06, + "loss": 0.1305, + "step": 3500 + }, + { + "epoch": 1.18, + "grad_norm": 2.383535385131836, + "learning_rate": 9.696281407035176e-06, + "loss": 0.1298, + "step": 3525 + }, + { + "epoch": 1.19, + "grad_norm": 2.556319236755371, + "learning_rate": 9.693768844221107e-06, + "loss": 0.14, + "step": 3550 + }, + { + "epoch": 1.2, + "grad_norm": 2.6829705238342285, + "learning_rate": 9.691256281407035e-06, + "loss": 0.1402, + "step": 3575 + }, + { + "epoch": 1.21, + "grad_norm": 2.322615385055542, + "learning_rate": 9.688743718592966e-06, + "loss": 0.1293, + "step": 3600 + }, + { + "epoch": 1.22, + "grad_norm": 2.2521719932556152, + "learning_rate": 9.686231155778895e-06, + "loss": 0.1306, + "step": 3625 + }, + { + "epoch": 1.23, + "grad_norm": 3.193779468536377, + "learning_rate": 9.683718592964825e-06, + "loss": 0.1387, + "step": 3650 + }, + { + "epoch": 1.23, + "grad_norm": 2.440735340118408, + "learning_rate": 9.681206030150756e-06, + "loss": 0.1329, + "step": 3675 + }, + { + "epoch": 1.24, + "grad_norm": 2.5718159675598145, + "learning_rate": 9.678693467336683e-06, + "loss": 0.1297, + "step": 3700 + }, + { + "epoch": 1.25, + "grad_norm": 2.4102578163146973, + "learning_rate": 9.676180904522614e-06, + "loss": 0.1331, + "step": 3725 + }, + { + "epoch": 1.26, + "grad_norm": 2.4205849170684814, + "learning_rate": 9.673668341708544e-06, + "loss": 0.136, + "step": 3750 + }, + { + "epoch": 1.27, + "grad_norm": 2.6619298458099365, + "learning_rate": 9.671155778894473e-06, + "loss": 0.1249, + "step": 3775 + }, + { + "epoch": 1.28, + "grad_norm": 2.537912607192993, + "learning_rate": 9.668643216080404e-06, + "loss": 0.1345, + "step": 3800 + }, + { + "epoch": 1.28, + "grad_norm": 2.5684969425201416, + "learning_rate": 9.666130653266333e-06, + "loss": 0.1278, + "step": 3825 + }, + { + "epoch": 1.29, + "grad_norm": 2.6710333824157715, + "learning_rate": 9.663618090452263e-06, + "loss": 0.1273, + "step": 3850 + }, + { + "epoch": 1.3, + "grad_norm": 2.2383315563201904, + "learning_rate": 9.661105527638192e-06, + "loss": 0.1316, + "step": 3875 + }, + { + "epoch": 1.31, + "grad_norm": 2.774614095687866, + "learning_rate": 9.658592964824121e-06, + "loss": 0.1407, + "step": 3900 + }, + { + "epoch": 1.32, + "grad_norm": 2.545363664627075, + "learning_rate": 9.65608040201005e-06, + "loss": 0.1294, + "step": 3925 + }, + { + "epoch": 1.33, + "grad_norm": 2.440474510192871, + "learning_rate": 9.653567839195982e-06, + "loss": 0.1254, + "step": 3950 + }, + { + "epoch": 1.33, + "grad_norm": 2.898801565170288, + "learning_rate": 9.651055276381909e-06, + "loss": 0.1269, + "step": 3975 + }, + { + "epoch": 1.34, + "grad_norm": 2.3400776386260986, + "learning_rate": 9.64854271356784e-06, + "loss": 0.1362, + "step": 4000 + }, + { + "epoch": 1.34, + "eval_loss": 0.13717922568321228, + "eval_runtime": 1174.7815, + "eval_samples_per_second": 1.218, + "eval_steps_per_second": 1.218, + "eval_wer": 21.062567421790725, + "step": 4000 + }, + { + "epoch": 1.35, + "grad_norm": 2.4273762702941895, + "learning_rate": 9.64603015075377e-06, + "loss": 0.133, + "step": 4025 + }, + { + "epoch": 1.36, + "grad_norm": 2.504239797592163, + "learning_rate": 9.643517587939699e-06, + "loss": 0.126, + "step": 4050 + }, + { + "epoch": 1.37, + "grad_norm": 2.410475254058838, + "learning_rate": 9.64100502512563e-06, + "loss": 0.1292, + "step": 4075 + }, + { + "epoch": 1.38, + "grad_norm": 2.9249308109283447, + "learning_rate": 9.638492462311559e-06, + "loss": 0.1264, + "step": 4100 + }, + { + "epoch": 1.39, + "grad_norm": 2.323030471801758, + "learning_rate": 9.635979899497488e-06, + "loss": 0.1335, + "step": 4125 + }, + { + "epoch": 1.39, + "grad_norm": 2.552671432495117, + "learning_rate": 9.633467336683418e-06, + "loss": 0.1309, + "step": 4150 + }, + { + "epoch": 1.4, + "grad_norm": 2.60703182220459, + "learning_rate": 9.630954773869347e-06, + "loss": 0.1197, + "step": 4175 + }, + { + "epoch": 1.41, + "grad_norm": 2.547861099243164, + "learning_rate": 9.628442211055276e-06, + "loss": 0.1362, + "step": 4200 + }, + { + "epoch": 1.42, + "grad_norm": 2.373659133911133, + "learning_rate": 9.625929648241207e-06, + "loss": 0.1286, + "step": 4225 + }, + { + "epoch": 1.43, + "grad_norm": 2.3864593505859375, + "learning_rate": 9.623417085427137e-06, + "loss": 0.1235, + "step": 4250 + }, + { + "epoch": 1.44, + "grad_norm": 2.7396862506866455, + "learning_rate": 9.620904522613066e-06, + "loss": 0.1259, + "step": 4275 + }, + { + "epoch": 1.44, + "grad_norm": 2.421220541000366, + "learning_rate": 9.618391959798995e-06, + "loss": 0.1232, + "step": 4300 + }, + { + "epoch": 1.45, + "grad_norm": 2.4374351501464844, + "learning_rate": 9.615879396984925e-06, + "loss": 0.1261, + "step": 4325 + }, + { + "epoch": 1.46, + "grad_norm": 2.3585216999053955, + "learning_rate": 9.613366834170856e-06, + "loss": 0.1207, + "step": 4350 + }, + { + "epoch": 1.47, + "grad_norm": 2.4970037937164307, + "learning_rate": 9.610854271356785e-06, + "loss": 0.1237, + "step": 4375 + }, + { + "epoch": 1.48, + "grad_norm": 2.2579543590545654, + "learning_rate": 9.608341708542714e-06, + "loss": 0.1193, + "step": 4400 + }, + { + "epoch": 1.49, + "grad_norm": 2.5506229400634766, + "learning_rate": 9.605829145728644e-06, + "loss": 0.1184, + "step": 4425 + }, + { + "epoch": 1.49, + "grad_norm": 2.4709718227386475, + "learning_rate": 9.603316582914573e-06, + "loss": 0.1209, + "step": 4450 + }, + { + "epoch": 1.5, + "grad_norm": 2.628450870513916, + "learning_rate": 9.600804020100504e-06, + "loss": 0.1274, + "step": 4475 + }, + { + "epoch": 1.51, + "grad_norm": 2.216569185256958, + "learning_rate": 9.598291457286433e-06, + "loss": 0.124, + "step": 4500 + }, + { + "epoch": 1.52, + "grad_norm": 2.750291347503662, + "learning_rate": 9.595778894472363e-06, + "loss": 0.1221, + "step": 4525 + }, + { + "epoch": 1.53, + "grad_norm": 2.2653887271881104, + "learning_rate": 9.593266331658292e-06, + "loss": 0.1204, + "step": 4550 + }, + { + "epoch": 1.54, + "grad_norm": 2.8464314937591553, + "learning_rate": 9.590753768844221e-06, + "loss": 0.1256, + "step": 4575 + }, + { + "epoch": 1.54, + "grad_norm": 2.5501961708068848, + "learning_rate": 9.58824120603015e-06, + "loss": 0.1203, + "step": 4600 + }, + { + "epoch": 1.55, + "grad_norm": 2.6528685092926025, + "learning_rate": 9.585728643216082e-06, + "loss": 0.1274, + "step": 4625 + }, + { + "epoch": 1.56, + "grad_norm": 2.4446773529052734, + "learning_rate": 9.583216080402011e-06, + "loss": 0.1248, + "step": 4650 + }, + { + "epoch": 1.57, + "grad_norm": 2.576296091079712, + "learning_rate": 9.58070351758794e-06, + "loss": 0.1199, + "step": 4675 + }, + { + "epoch": 1.58, + "grad_norm": 2.827054738998413, + "learning_rate": 9.57819095477387e-06, + "loss": 0.1157, + "step": 4700 + }, + { + "epoch": 1.59, + "grad_norm": 2.611633777618408, + "learning_rate": 9.575678391959799e-06, + "loss": 0.1249, + "step": 4725 + }, + { + "epoch": 1.6, + "grad_norm": 2.2765488624572754, + "learning_rate": 9.57316582914573e-06, + "loss": 0.1184, + "step": 4750 + }, + { + "epoch": 1.6, + "grad_norm": 2.6610405445098877, + "learning_rate": 9.57065326633166e-06, + "loss": 0.1187, + "step": 4775 + }, + { + "epoch": 1.61, + "grad_norm": 2.481962203979492, + "learning_rate": 9.568140703517589e-06, + "loss": 0.1234, + "step": 4800 + }, + { + "epoch": 1.62, + "grad_norm": 2.317012071609497, + "learning_rate": 9.565628140703518e-06, + "loss": 0.1155, + "step": 4825 + }, + { + "epoch": 1.63, + "grad_norm": 2.381338596343994, + "learning_rate": 9.563115577889447e-06, + "loss": 0.1178, + "step": 4850 + }, + { + "epoch": 1.64, + "grad_norm": 2.443439483642578, + "learning_rate": 9.560603015075378e-06, + "loss": 0.1224, + "step": 4875 + }, + { + "epoch": 1.65, + "grad_norm": 2.2828879356384277, + "learning_rate": 9.558090452261308e-06, + "loss": 0.1181, + "step": 4900 + }, + { + "epoch": 1.65, + "grad_norm": 2.5565879344940186, + "learning_rate": 9.555577889447237e-06, + "loss": 0.1161, + "step": 4925 + }, + { + "epoch": 1.66, + "grad_norm": 2.2376959323883057, + "learning_rate": 9.553065326633166e-06, + "loss": 0.1216, + "step": 4950 + }, + { + "epoch": 1.67, + "grad_norm": 2.560154438018799, + "learning_rate": 9.550552763819096e-06, + "loss": 0.1161, + "step": 4975 + }, + { + "epoch": 1.68, + "grad_norm": 2.5663681030273438, + "learning_rate": 9.548040201005025e-06, + "loss": 0.1199, + "step": 5000 + }, + { + "epoch": 1.68, + "eval_loss": 0.13724832236766815, + "eval_runtime": 1170.5114, + "eval_samples_per_second": 1.223, + "eval_steps_per_second": 1.223, + "eval_wer": 21.419902912621357, + "step": 5000 + }, + { + "epoch": 1.69, + "grad_norm": 2.176666498184204, + "learning_rate": 9.545527638190956e-06, + "loss": 0.1176, + "step": 5025 + }, + { + "epoch": 1.7, + "grad_norm": 2.5469439029693604, + "learning_rate": 9.543015075376885e-06, + "loss": 0.1122, + "step": 5050 + }, + { + "epoch": 1.7, + "grad_norm": 2.704345703125, + "learning_rate": 9.540502512562815e-06, + "loss": 0.1178, + "step": 5075 + }, + { + "epoch": 1.71, + "grad_norm": 2.304443359375, + "learning_rate": 9.537989949748746e-06, + "loss": 0.1156, + "step": 5100 + }, + { + "epoch": 1.72, + "grad_norm": 2.933821678161621, + "learning_rate": 9.535477386934673e-06, + "loss": 0.121, + "step": 5125 + }, + { + "epoch": 1.73, + "grad_norm": 2.4044809341430664, + "learning_rate": 9.532964824120604e-06, + "loss": 0.1111, + "step": 5150 + }, + { + "epoch": 1.74, + "grad_norm": 2.482370138168335, + "learning_rate": 9.530452261306534e-06, + "loss": 0.1171, + "step": 5175 + }, + { + "epoch": 1.75, + "grad_norm": 2.282125949859619, + "learning_rate": 9.527939698492463e-06, + "loss": 0.1157, + "step": 5200 + }, + { + "epoch": 1.75, + "grad_norm": 2.149691104888916, + "learning_rate": 9.525427135678392e-06, + "loss": 0.113, + "step": 5225 + }, + { + "epoch": 1.76, + "grad_norm": 2.284886121749878, + "learning_rate": 9.522914572864322e-06, + "loss": 0.1199, + "step": 5250 + }, + { + "epoch": 1.77, + "grad_norm": 2.6984002590179443, + "learning_rate": 9.520402010050253e-06, + "loss": 0.1158, + "step": 5275 + }, + { + "epoch": 1.78, + "grad_norm": 2.3796732425689697, + "learning_rate": 9.517889447236182e-06, + "loss": 0.1154, + "step": 5300 + }, + { + "epoch": 1.79, + "grad_norm": 2.3246266841888428, + "learning_rate": 9.515376884422111e-06, + "loss": 0.1137, + "step": 5325 + }, + { + "epoch": 1.8, + "grad_norm": 2.189887046813965, + "learning_rate": 9.51286432160804e-06, + "loss": 0.1231, + "step": 5350 + }, + { + "epoch": 1.8, + "grad_norm": 2.841104745864868, + "learning_rate": 9.510351758793972e-06, + "loss": 0.1176, + "step": 5375 + }, + { + "epoch": 1.81, + "grad_norm": 2.5323596000671387, + "learning_rate": 9.5078391959799e-06, + "loss": 0.1105, + "step": 5400 + }, + { + "epoch": 1.82, + "grad_norm": 2.476283311843872, + "learning_rate": 9.50532663316583e-06, + "loss": 0.1157, + "step": 5425 + }, + { + "epoch": 1.83, + "grad_norm": 2.3976869583129883, + "learning_rate": 9.50281407035176e-06, + "loss": 0.1164, + "step": 5450 + }, + { + "epoch": 1.84, + "grad_norm": 2.546203136444092, + "learning_rate": 9.500301507537689e-06, + "loss": 0.1126, + "step": 5475 + }, + { + "epoch": 1.85, + "grad_norm": 2.3656904697418213, + "learning_rate": 9.49778894472362e-06, + "loss": 0.1068, + "step": 5500 + }, + { + "epoch": 1.86, + "grad_norm": 2.4013137817382812, + "learning_rate": 9.49527638190955e-06, + "loss": 0.1101, + "step": 5525 + }, + { + "epoch": 1.86, + "grad_norm": 2.3575029373168945, + "learning_rate": 9.492763819095479e-06, + "loss": 0.1122, + "step": 5550 + }, + { + "epoch": 1.87, + "grad_norm": 2.934225082397461, + "learning_rate": 9.490251256281408e-06, + "loss": 0.115, + "step": 5575 + }, + { + "epoch": 1.88, + "grad_norm": 2.4692862033843994, + "learning_rate": 9.487738693467337e-06, + "loss": 0.1145, + "step": 5600 + }, + { + "epoch": 1.89, + "grad_norm": 2.4839847087860107, + "learning_rate": 9.485226130653267e-06, + "loss": 0.1129, + "step": 5625 + }, + { + "epoch": 1.9, + "grad_norm": 2.724759340286255, + "learning_rate": 9.482713567839198e-06, + "loss": 0.1109, + "step": 5650 + }, + { + "epoch": 1.91, + "grad_norm": 2.7479400634765625, + "learning_rate": 9.480201005025125e-06, + "loss": 0.1139, + "step": 5675 + }, + { + "epoch": 1.91, + "grad_norm": 2.3324952125549316, + "learning_rate": 9.477688442211056e-06, + "loss": 0.1088, + "step": 5700 + }, + { + "epoch": 1.92, + "grad_norm": 2.6855146884918213, + "learning_rate": 9.475175879396985e-06, + "loss": 0.1053, + "step": 5725 + }, + { + "epoch": 1.93, + "grad_norm": 2.812222957611084, + "learning_rate": 9.472663316582915e-06, + "loss": 0.1055, + "step": 5750 + }, + { + "epoch": 1.94, + "grad_norm": 2.0930349826812744, + "learning_rate": 9.470150753768846e-06, + "loss": 0.1096, + "step": 5775 + }, + { + "epoch": 1.95, + "grad_norm": 2.530984401702881, + "learning_rate": 9.467638190954775e-06, + "loss": 0.1048, + "step": 5800 + }, + { + "epoch": 1.96, + "grad_norm": 2.376852512359619, + "learning_rate": 9.465125628140704e-06, + "loss": 0.108, + "step": 5825 + }, + { + "epoch": 1.96, + "grad_norm": 2.802478313446045, + "learning_rate": 9.462613065326634e-06, + "loss": 0.1053, + "step": 5850 + }, + { + "epoch": 1.97, + "grad_norm": 2.7700185775756836, + "learning_rate": 9.460100502512563e-06, + "loss": 0.1083, + "step": 5875 + }, + { + "epoch": 1.98, + "grad_norm": 2.488320827484131, + "learning_rate": 9.457587939698494e-06, + "loss": 0.1065, + "step": 5900 + }, + { + "epoch": 1.99, + "grad_norm": 2.453340768814087, + "learning_rate": 9.455075376884423e-06, + "loss": 0.1086, + "step": 5925 + }, + { + "epoch": 2.0, + "grad_norm": 2.5484070777893066, + "learning_rate": 9.452562814070353e-06, + "loss": 0.1128, + "step": 5950 + }, + { + "epoch": 2.01, + "grad_norm": 1.8283330202102661, + "learning_rate": 9.450050251256282e-06, + "loss": 0.08, + "step": 5975 + }, + { + "epoch": 2.01, + "grad_norm": 2.0970919132232666, + "learning_rate": 9.447537688442211e-06, + "loss": 0.074, + "step": 6000 + }, + { + "epoch": 2.01, + "eval_loss": 0.14364008605480194, + "eval_runtime": 1165.6292, + "eval_samples_per_second": 1.228, + "eval_steps_per_second": 1.228, + "eval_wer": 21.291801510248114, + "step": 6000 + }, + { + "epoch": 2.02, + "grad_norm": 1.8447577953338623, + "learning_rate": 9.44502512562814e-06, + "loss": 0.0694, + "step": 6025 + }, + { + "epoch": 2.03, + "grad_norm": 2.342254161834717, + "learning_rate": 9.442512562814072e-06, + "loss": 0.0741, + "step": 6050 + }, + { + "epoch": 2.04, + "grad_norm": 2.114255428314209, + "learning_rate": 9.440000000000001e-06, + "loss": 0.0745, + "step": 6075 + }, + { + "epoch": 2.05, + "grad_norm": 2.4025626182556152, + "learning_rate": 9.43748743718593e-06, + "loss": 0.068, + "step": 6100 + }, + { + "epoch": 2.06, + "grad_norm": 2.389948606491089, + "learning_rate": 9.43497487437186e-06, + "loss": 0.0669, + "step": 6125 + }, + { + "epoch": 2.07, + "grad_norm": 2.3741841316223145, + "learning_rate": 9.432462311557789e-06, + "loss": 0.0765, + "step": 6150 + }, + { + "epoch": 2.07, + "grad_norm": 2.056225061416626, + "learning_rate": 9.42994974874372e-06, + "loss": 0.0704, + "step": 6175 + }, + { + "epoch": 2.08, + "grad_norm": 2.3612024784088135, + "learning_rate": 9.42743718592965e-06, + "loss": 0.0673, + "step": 6200 + }, + { + "epoch": 2.09, + "grad_norm": 2.1008825302124023, + "learning_rate": 9.424924623115579e-06, + "loss": 0.077, + "step": 6225 + }, + { + "epoch": 2.1, + "grad_norm": 2.0549325942993164, + "learning_rate": 9.422412060301508e-06, + "loss": 0.0684, + "step": 6250 + }, + { + "epoch": 2.11, + "grad_norm": 1.9537848234176636, + "learning_rate": 9.419899497487437e-06, + "loss": 0.0713, + "step": 6275 + }, + { + "epoch": 2.12, + "grad_norm": 2.2059621810913086, + "learning_rate": 9.417386934673367e-06, + "loss": 0.0757, + "step": 6300 + }, + { + "epoch": 2.12, + "grad_norm": 2.234764337539673, + "learning_rate": 9.414874371859298e-06, + "loss": 0.0694, + "step": 6325 + }, + { + "epoch": 2.13, + "grad_norm": 1.97952401638031, + "learning_rate": 9.412361809045227e-06, + "loss": 0.0726, + "step": 6350 + }, + { + "epoch": 2.14, + "grad_norm": 2.240694522857666, + "learning_rate": 9.409849246231156e-06, + "loss": 0.0685, + "step": 6375 + }, + { + "epoch": 2.15, + "grad_norm": 1.9172332286834717, + "learning_rate": 9.407336683417086e-06, + "loss": 0.0753, + "step": 6400 + }, + { + "epoch": 2.16, + "grad_norm": 2.6712541580200195, + "learning_rate": 9.404824120603015e-06, + "loss": 0.0708, + "step": 6425 + }, + { + "epoch": 2.17, + "grad_norm": 2.2933807373046875, + "learning_rate": 9.402311557788946e-06, + "loss": 0.0754, + "step": 6450 + }, + { + "epoch": 2.17, + "grad_norm": 2.6441142559051514, + "learning_rate": 9.399798994974875e-06, + "loss": 0.0729, + "step": 6475 + }, + { + "epoch": 2.18, + "grad_norm": 2.5856730937957764, + "learning_rate": 9.397286432160805e-06, + "loss": 0.0648, + "step": 6500 + }, + { + "epoch": 2.19, + "grad_norm": 2.601001739501953, + "learning_rate": 9.394773869346736e-06, + "loss": 0.0685, + "step": 6525 + }, + { + "epoch": 2.2, + "grad_norm": 2.273202657699585, + "learning_rate": 9.392261306532663e-06, + "loss": 0.0689, + "step": 6550 + }, + { + "epoch": 2.21, + "grad_norm": 2.585813283920288, + "learning_rate": 9.389748743718594e-06, + "loss": 0.0729, + "step": 6575 + }, + { + "epoch": 2.22, + "grad_norm": 2.3242039680480957, + "learning_rate": 9.387236180904524e-06, + "loss": 0.0682, + "step": 6600 + }, + { + "epoch": 2.22, + "grad_norm": 2.071826934814453, + "learning_rate": 9.384723618090453e-06, + "loss": 0.0679, + "step": 6625 + }, + { + "epoch": 2.23, + "grad_norm": 2.253406286239624, + "learning_rate": 9.382211055276382e-06, + "loss": 0.0716, + "step": 6650 + }, + { + "epoch": 2.24, + "grad_norm": 2.4425997734069824, + "learning_rate": 9.379698492462312e-06, + "loss": 0.0697, + "step": 6675 + }, + { + "epoch": 2.25, + "grad_norm": 2.0006203651428223, + "learning_rate": 9.377185929648241e-06, + "loss": 0.076, + "step": 6700 + }, + { + "epoch": 2.26, + "grad_norm": 2.226357936859131, + "learning_rate": 9.374673366834172e-06, + "loss": 0.0729, + "step": 6725 + }, + { + "epoch": 2.27, + "grad_norm": 2.242588520050049, + "learning_rate": 9.372160804020101e-06, + "loss": 0.0693, + "step": 6750 + }, + { + "epoch": 2.28, + "grad_norm": 2.6291513442993164, + "learning_rate": 9.36964824120603e-06, + "loss": 0.069, + "step": 6775 + }, + { + "epoch": 2.28, + "grad_norm": 2.3540093898773193, + "learning_rate": 9.367135678391962e-06, + "loss": 0.0681, + "step": 6800 + }, + { + "epoch": 2.29, + "grad_norm": 2.225234031677246, + "learning_rate": 9.36462311557789e-06, + "loss": 0.0602, + "step": 6825 + }, + { + "epoch": 2.3, + "grad_norm": 2.5975372791290283, + "learning_rate": 9.36211055276382e-06, + "loss": 0.0679, + "step": 6850 + }, + { + "epoch": 2.31, + "grad_norm": 2.1644558906555176, + "learning_rate": 9.35959798994975e-06, + "loss": 0.0655, + "step": 6875 + }, + { + "epoch": 2.32, + "grad_norm": 2.3095717430114746, + "learning_rate": 9.357085427135679e-06, + "loss": 0.0677, + "step": 6900 + }, + { + "epoch": 2.33, + "grad_norm": 1.897213339805603, + "learning_rate": 9.354572864321608e-06, + "loss": 0.0691, + "step": 6925 + }, + { + "epoch": 2.33, + "grad_norm": 1.9625407457351685, + "learning_rate": 9.352060301507538e-06, + "loss": 0.0681, + "step": 6950 + }, + { + "epoch": 2.34, + "grad_norm": 2.3655757904052734, + "learning_rate": 9.349547738693469e-06, + "loss": 0.0682, + "step": 6975 + }, + { + "epoch": 2.35, + "grad_norm": 2.3345818519592285, + "learning_rate": 9.347035175879398e-06, + "loss": 0.0636, + "step": 7000 + }, + { + "epoch": 2.35, + "eval_loss": 0.15349695086479187, + "eval_runtime": 1167.3961, + "eval_samples_per_second": 1.226, + "eval_steps_per_second": 1.226, + "eval_wer": 21.24460625674218, + "step": 7000 + }, + { + "epoch": 2.36, + "grad_norm": 2.286100387573242, + "learning_rate": 9.344522613065327e-06, + "loss": 0.0685, + "step": 7025 + }, + { + "epoch": 2.37, + "grad_norm": 2.8543434143066406, + "learning_rate": 9.342010050251257e-06, + "loss": 0.0699, + "step": 7050 + }, + { + "epoch": 2.38, + "grad_norm": 2.3908469676971436, + "learning_rate": 9.339497487437188e-06, + "loss": 0.0696, + "step": 7075 + }, + { + "epoch": 2.38, + "grad_norm": 2.284416437149048, + "learning_rate": 9.336984924623115e-06, + "loss": 0.0683, + "step": 7100 + }, + { + "epoch": 2.39, + "grad_norm": 2.841937780380249, + "learning_rate": 9.334472361809046e-06, + "loss": 0.0711, + "step": 7125 + }, + { + "epoch": 2.4, + "grad_norm": 1.9832344055175781, + "learning_rate": 9.331959798994976e-06, + "loss": 0.0658, + "step": 7150 + }, + { + "epoch": 2.41, + "grad_norm": 2.4458470344543457, + "learning_rate": 9.329447236180905e-06, + "loss": 0.0644, + "step": 7175 + }, + { + "epoch": 2.42, + "grad_norm": 2.4556922912597656, + "learning_rate": 9.326934673366836e-06, + "loss": 0.069, + "step": 7200 + }, + { + "epoch": 2.43, + "grad_norm": 2.0189619064331055, + "learning_rate": 9.324422110552764e-06, + "loss": 0.0657, + "step": 7225 + }, + { + "epoch": 2.43, + "grad_norm": 1.662239909172058, + "learning_rate": 9.321909547738695e-06, + "loss": 0.0675, + "step": 7250 + }, + { + "epoch": 2.44, + "grad_norm": 2.4998669624328613, + "learning_rate": 9.319396984924624e-06, + "loss": 0.0656, + "step": 7275 + }, + { + "epoch": 2.45, + "grad_norm": 2.4205777645111084, + "learning_rate": 9.316884422110553e-06, + "loss": 0.0691, + "step": 7300 + }, + { + "epoch": 2.46, + "grad_norm": 2.3498284816741943, + "learning_rate": 9.314371859296483e-06, + "loss": 0.069, + "step": 7325 + }, + { + "epoch": 2.47, + "grad_norm": 2.7720181941986084, + "learning_rate": 9.311859296482414e-06, + "loss": 0.0625, + "step": 7350 + }, + { + "epoch": 2.48, + "grad_norm": 2.488546371459961, + "learning_rate": 9.309346733668343e-06, + "loss": 0.0617, + "step": 7375 + }, + { + "epoch": 2.48, + "grad_norm": 2.362468719482422, + "learning_rate": 9.306834170854272e-06, + "loss": 0.0725, + "step": 7400 + }, + { + "epoch": 2.49, + "grad_norm": 2.5624284744262695, + "learning_rate": 9.304321608040201e-06, + "loss": 0.065, + "step": 7425 + }, + { + "epoch": 2.5, + "grad_norm": 2.0342113971710205, + "learning_rate": 9.30180904522613e-06, + "loss": 0.0658, + "step": 7450 + }, + { + "epoch": 2.51, + "grad_norm": 2.1466732025146484, + "learning_rate": 9.299296482412062e-06, + "loss": 0.0671, + "step": 7475 + }, + { + "epoch": 2.52, + "grad_norm": 2.955054521560669, + "learning_rate": 9.296783919597991e-06, + "loss": 0.0644, + "step": 7500 + }, + { + "epoch": 2.53, + "grad_norm": 2.078531265258789, + "learning_rate": 9.29427135678392e-06, + "loss": 0.0634, + "step": 7525 + }, + { + "epoch": 2.54, + "grad_norm": 2.080968141555786, + "learning_rate": 9.29175879396985e-06, + "loss": 0.0683, + "step": 7550 + }, + { + "epoch": 2.54, + "grad_norm": 2.057974100112915, + "learning_rate": 9.289246231155779e-06, + "loss": 0.0695, + "step": 7575 + }, + { + "epoch": 2.55, + "grad_norm": 2.6082162857055664, + "learning_rate": 9.28673366834171e-06, + "loss": 0.0669, + "step": 7600 + }, + { + "epoch": 2.56, + "grad_norm": 2.4610090255737305, + "learning_rate": 9.28422110552764e-06, + "loss": 0.0675, + "step": 7625 + }, + { + "epoch": 2.57, + "grad_norm": 2.265695810317993, + "learning_rate": 9.281708542713569e-06, + "loss": 0.0641, + "step": 7650 + }, + { + "epoch": 2.58, + "grad_norm": 2.125345468521118, + "learning_rate": 9.279195979899498e-06, + "loss": 0.066, + "step": 7675 + }, + { + "epoch": 2.59, + "grad_norm": 2.7315406799316406, + "learning_rate": 9.276683417085427e-06, + "loss": 0.0672, + "step": 7700 + }, + { + "epoch": 2.59, + "grad_norm": 2.691891670227051, + "learning_rate": 9.274170854271357e-06, + "loss": 0.0643, + "step": 7725 + }, + { + "epoch": 2.6, + "grad_norm": 2.516859769821167, + "learning_rate": 9.271658291457288e-06, + "loss": 0.0631, + "step": 7750 + }, + { + "epoch": 2.61, + "grad_norm": 2.192343235015869, + "learning_rate": 9.269145728643217e-06, + "loss": 0.0684, + "step": 7775 + }, + { + "epoch": 2.62, + "grad_norm": 2.995460033416748, + "learning_rate": 9.266633165829146e-06, + "loss": 0.067, + "step": 7800 + }, + { + "epoch": 2.63, + "grad_norm": 2.3527371883392334, + "learning_rate": 9.264120603015076e-06, + "loss": 0.0615, + "step": 7825 + }, + { + "epoch": 2.64, + "grad_norm": 2.3677992820739746, + "learning_rate": 9.261608040201005e-06, + "loss": 0.0652, + "step": 7850 + }, + { + "epoch": 2.64, + "grad_norm": 2.290177583694458, + "learning_rate": 9.259095477386936e-06, + "loss": 0.0656, + "step": 7875 + }, + { + "epoch": 2.65, + "grad_norm": 2.375612497329712, + "learning_rate": 9.256582914572865e-06, + "loss": 0.0614, + "step": 7900 + }, + { + "epoch": 2.66, + "grad_norm": 2.614301919937134, + "learning_rate": 9.254070351758795e-06, + "loss": 0.0664, + "step": 7925 + }, + { + "epoch": 2.67, + "grad_norm": 2.3204245567321777, + "learning_rate": 9.251557788944724e-06, + "loss": 0.063, + "step": 7950 + }, + { + "epoch": 2.68, + "grad_norm": 2.324167251586914, + "learning_rate": 9.249045226130653e-06, + "loss": 0.0677, + "step": 7975 + }, + { + "epoch": 2.69, + "grad_norm": 2.2451322078704834, + "learning_rate": 9.246532663316584e-06, + "loss": 0.0644, + "step": 8000 + }, + { + "epoch": 2.69, + "eval_loss": 0.15533991158008575, + "eval_runtime": 1170.412, + "eval_samples_per_second": 1.223, + "eval_steps_per_second": 1.223, + "eval_wer": 21.008629989212512, + "step": 8000 + }, + { + "epoch": 2.69, + "grad_norm": 2.1731984615325928, + "learning_rate": 9.244020100502514e-06, + "loss": 0.0601, + "step": 8025 + }, + { + "epoch": 2.7, + "grad_norm": 2.4491796493530273, + "learning_rate": 9.241507537688443e-06, + "loss": 0.0634, + "step": 8050 + }, + { + "epoch": 2.71, + "grad_norm": 2.1626639366149902, + "learning_rate": 9.238994974874372e-06, + "loss": 0.0623, + "step": 8075 + }, + { + "epoch": 2.72, + "grad_norm": 2.6674344539642334, + "learning_rate": 9.236482412060302e-06, + "loss": 0.0675, + "step": 8100 + }, + { + "epoch": 2.73, + "grad_norm": 2.7441794872283936, + "learning_rate": 9.233969849246231e-06, + "loss": 0.0624, + "step": 8125 + }, + { + "epoch": 2.74, + "grad_norm": 2.2594010829925537, + "learning_rate": 9.231457286432162e-06, + "loss": 0.0635, + "step": 8150 + }, + { + "epoch": 2.75, + "grad_norm": 2.347475051879883, + "learning_rate": 9.228944723618091e-06, + "loss": 0.0643, + "step": 8175 + }, + { + "epoch": 2.75, + "grad_norm": 2.758185625076294, + "learning_rate": 9.22643216080402e-06, + "loss": 0.0607, + "step": 8200 + }, + { + "epoch": 2.76, + "grad_norm": 2.1400153636932373, + "learning_rate": 9.223919597989952e-06, + "loss": 0.0658, + "step": 8225 + }, + { + "epoch": 2.77, + "grad_norm": 2.571418046951294, + "learning_rate": 9.22140703517588e-06, + "loss": 0.0626, + "step": 8250 + }, + { + "epoch": 2.78, + "grad_norm": 2.232682228088379, + "learning_rate": 9.21889447236181e-06, + "loss": 0.0629, + "step": 8275 + }, + { + "epoch": 2.79, + "grad_norm": 2.4848692417144775, + "learning_rate": 9.21638190954774e-06, + "loss": 0.0631, + "step": 8300 + }, + { + "epoch": 2.8, + "grad_norm": 1.9396302700042725, + "learning_rate": 9.213869346733669e-06, + "loss": 0.061, + "step": 8325 + }, + { + "epoch": 2.8, + "grad_norm": 2.587855339050293, + "learning_rate": 9.211356783919598e-06, + "loss": 0.0624, + "step": 8350 + }, + { + "epoch": 2.81, + "grad_norm": 2.3923330307006836, + "learning_rate": 9.208844221105528e-06, + "loss": 0.0544, + "step": 8375 + }, + { + "epoch": 2.82, + "grad_norm": 2.8366191387176514, + "learning_rate": 9.206331658291459e-06, + "loss": 0.0558, + "step": 8400 + }, + { + "epoch": 2.83, + "grad_norm": 2.1988399028778076, + "learning_rate": 9.203819095477388e-06, + "loss": 0.0641, + "step": 8425 + }, + { + "epoch": 2.84, + "grad_norm": 2.073111057281494, + "learning_rate": 9.201306532663317e-06, + "loss": 0.063, + "step": 8450 + }, + { + "epoch": 2.85, + "grad_norm": 2.5373809337615967, + "learning_rate": 9.198793969849247e-06, + "loss": 0.0587, + "step": 8475 + }, + { + "epoch": 2.85, + "grad_norm": 2.5859715938568115, + "learning_rate": 9.196281407035178e-06, + "loss": 0.0602, + "step": 8500 + }, + { + "epoch": 2.86, + "grad_norm": 2.5359930992126465, + "learning_rate": 9.193768844221105e-06, + "loss": 0.0564, + "step": 8525 + }, + { + "epoch": 2.87, + "grad_norm": 2.078687906265259, + "learning_rate": 9.191256281407036e-06, + "loss": 0.0594, + "step": 8550 + }, + { + "epoch": 2.88, + "grad_norm": 2.7826240062713623, + "learning_rate": 9.188743718592966e-06, + "loss": 0.0604, + "step": 8575 + }, + { + "epoch": 2.89, + "grad_norm": 2.3181347846984863, + "learning_rate": 9.186231155778895e-06, + "loss": 0.0631, + "step": 8600 + }, + { + "epoch": 2.9, + "grad_norm": 2.6481542587280273, + "learning_rate": 9.183718592964826e-06, + "loss": 0.0642, + "step": 8625 + }, + { + "epoch": 2.9, + "grad_norm": 2.3537025451660156, + "learning_rate": 9.181206030150754e-06, + "loss": 0.0557, + "step": 8650 + }, + { + "epoch": 2.91, + "grad_norm": 1.9336975812911987, + "learning_rate": 9.178693467336685e-06, + "loss": 0.0601, + "step": 8675 + }, + { + "epoch": 2.92, + "grad_norm": 2.6612730026245117, + "learning_rate": 9.176180904522614e-06, + "loss": 0.0594, + "step": 8700 + }, + { + "epoch": 2.93, + "grad_norm": 2.078516960144043, + "learning_rate": 9.173668341708543e-06, + "loss": 0.0562, + "step": 8725 + }, + { + "epoch": 2.94, + "grad_norm": 2.288940191268921, + "learning_rate": 9.171155778894473e-06, + "loss": 0.0588, + "step": 8750 + }, + { + "epoch": 2.95, + "grad_norm": 2.0204780101776123, + "learning_rate": 9.168643216080404e-06, + "loss": 0.063, + "step": 8775 + }, + { + "epoch": 2.96, + "grad_norm": 2.958662748336792, + "learning_rate": 9.166130653266331e-06, + "loss": 0.0605, + "step": 8800 + }, + { + "epoch": 2.96, + "grad_norm": 2.3344972133636475, + "learning_rate": 9.163618090452262e-06, + "loss": 0.0607, + "step": 8825 + }, + { + "epoch": 2.97, + "grad_norm": 2.310370922088623, + "learning_rate": 9.161105527638192e-06, + "loss": 0.0609, + "step": 8850 + }, + { + "epoch": 2.98, + "grad_norm": 2.226121664047241, + "learning_rate": 9.158592964824121e-06, + "loss": 0.0572, + "step": 8875 + }, + { + "epoch": 2.99, + "grad_norm": 2.631788730621338, + "learning_rate": 9.156080402010052e-06, + "loss": 0.0622, + "step": 8900 + }, + { + "epoch": 3.0, + "grad_norm": 2.1354424953460693, + "learning_rate": 9.15356783919598e-06, + "loss": 0.0596, + "step": 8925 + }, + { + "epoch": 3.01, + "grad_norm": 1.4407596588134766, + "learning_rate": 9.15105527638191e-06, + "loss": 0.0418, + "step": 8950 + }, + { + "epoch": 3.01, + "grad_norm": 2.0394985675811768, + "learning_rate": 9.14854271356784e-06, + "loss": 0.0335, + "step": 8975 + }, + { + "epoch": 3.02, + "grad_norm": 1.7346376180648804, + "learning_rate": 9.14603015075377e-06, + "loss": 0.0352, + "step": 9000 + }, + { + "epoch": 3.02, + "eval_loss": 0.17252571880817413, + "eval_runtime": 1228.8997, + "eval_samples_per_second": 1.164, + "eval_steps_per_second": 1.164, + "eval_wer": 22.04692556634304, + "step": 9000 + }, + { + "epoch": 3.03, + "grad_norm": 1.6283141374588013, + "learning_rate": 9.1435175879397e-06, + "loss": 0.0325, + "step": 9025 + }, + { + "epoch": 3.04, + "grad_norm": 1.6144049167633057, + "learning_rate": 9.14100502512563e-06, + "loss": 0.0308, + "step": 9050 + }, + { + "epoch": 3.05, + "grad_norm": 1.5018326044082642, + "learning_rate": 9.138492462311559e-06, + "loss": 0.0304, + "step": 9075 + }, + { + "epoch": 3.06, + "grad_norm": 1.7110459804534912, + "learning_rate": 9.135979899497488e-06, + "loss": 0.0322, + "step": 9100 + }, + { + "epoch": 3.06, + "grad_norm": 1.8574200868606567, + "learning_rate": 9.133467336683417e-06, + "loss": 0.0327, + "step": 9125 + }, + { + "epoch": 3.07, + "grad_norm": 1.7182424068450928, + "learning_rate": 9.130954773869347e-06, + "loss": 0.0308, + "step": 9150 + }, + { + "epoch": 3.08, + "grad_norm": 2.0133166313171387, + "learning_rate": 9.128442211055278e-06, + "loss": 0.0302, + "step": 9175 + }, + { + "epoch": 3.09, + "grad_norm": 1.585935354232788, + "learning_rate": 9.125929648241205e-06, + "loss": 0.033, + "step": 9200 + }, + { + "epoch": 3.1, + "grad_norm": 1.8639785051345825, + "learning_rate": 9.123417085427136e-06, + "loss": 0.0307, + "step": 9225 + }, + { + "epoch": 3.11, + "grad_norm": 1.8666003942489624, + "learning_rate": 9.120904522613066e-06, + "loss": 0.0319, + "step": 9250 + }, + { + "epoch": 3.11, + "grad_norm": 1.9627740383148193, + "learning_rate": 9.118391959798995e-06, + "loss": 0.0303, + "step": 9275 + }, + { + "epoch": 3.12, + "grad_norm": 1.8668369054794312, + "learning_rate": 9.115879396984926e-06, + "loss": 0.031, + "step": 9300 + }, + { + "epoch": 3.13, + "grad_norm": 1.8304191827774048, + "learning_rate": 9.113366834170855e-06, + "loss": 0.0312, + "step": 9325 + }, + { + "epoch": 3.14, + "grad_norm": 1.7791739702224731, + "learning_rate": 9.110854271356785e-06, + "loss": 0.0319, + "step": 9350 + }, + { + "epoch": 3.15, + "grad_norm": 1.655661940574646, + "learning_rate": 9.108341708542714e-06, + "loss": 0.0326, + "step": 9375 + }, + { + "epoch": 3.16, + "grad_norm": 2.107893943786621, + "learning_rate": 9.105829145728643e-06, + "loss": 0.0317, + "step": 9400 + }, + { + "epoch": 3.16, + "grad_norm": 1.5082181692123413, + "learning_rate": 9.103316582914573e-06, + "loss": 0.0337, + "step": 9425 + }, + { + "epoch": 3.17, + "grad_norm": 2.034398078918457, + "learning_rate": 9.100804020100504e-06, + "loss": 0.033, + "step": 9450 + }, + { + "epoch": 3.18, + "grad_norm": 1.460350751876831, + "learning_rate": 9.098291457286433e-06, + "loss": 0.0313, + "step": 9475 + }, + { + "epoch": 3.19, + "grad_norm": 1.1869457960128784, + "learning_rate": 9.095879396984926e-06, + "loss": 0.0278, + "step": 9500 + }, + { + "epoch": 3.2, + "grad_norm": 1.8277338743209839, + "learning_rate": 9.093366834170854e-06, + "loss": 0.0327, + "step": 9525 + }, + { + "epoch": 3.21, + "grad_norm": 2.2573649883270264, + "learning_rate": 9.090854271356785e-06, + "loss": 0.0323, + "step": 9550 + }, + { + "epoch": 3.22, + "grad_norm": 1.4396376609802246, + "learning_rate": 9.088341708542714e-06, + "loss": 0.0317, + "step": 9575 + }, + { + "epoch": 3.22, + "grad_norm": 1.3070216178894043, + "learning_rate": 9.085829145728644e-06, + "loss": 0.0303, + "step": 9600 + }, + { + "epoch": 3.23, + "grad_norm": 1.8946232795715332, + "learning_rate": 9.083316582914573e-06, + "loss": 0.0319, + "step": 9625 + }, + { + "epoch": 3.24, + "grad_norm": 1.6618894338607788, + "learning_rate": 9.080804020100504e-06, + "loss": 0.0348, + "step": 9650 + }, + { + "epoch": 3.25, + "grad_norm": 1.5836551189422607, + "learning_rate": 9.078291457286433e-06, + "loss": 0.0317, + "step": 9675 + }, + { + "epoch": 3.26, + "grad_norm": 2.0778791904449463, + "learning_rate": 9.075778894472363e-06, + "loss": 0.0323, + "step": 9700 + }, + { + "epoch": 3.27, + "grad_norm": 2.2287397384643555, + "learning_rate": 9.073266331658292e-06, + "loss": 0.0321, + "step": 9725 + }, + { + "epoch": 3.27, + "grad_norm": 1.1347562074661255, + "learning_rate": 9.070753768844221e-06, + "loss": 0.0308, + "step": 9750 + }, + { + "epoch": 3.28, + "grad_norm": 1.7544713020324707, + "learning_rate": 9.068241206030152e-06, + "loss": 0.0322, + "step": 9775 + }, + { + "epoch": 3.29, + "grad_norm": 1.623368501663208, + "learning_rate": 9.06572864321608e-06, + "loss": 0.0311, + "step": 9800 + }, + { + "epoch": 3.3, + "grad_norm": 1.7735966444015503, + "learning_rate": 9.063216080402011e-06, + "loss": 0.033, + "step": 9825 + }, + { + "epoch": 3.31, + "grad_norm": 1.7006261348724365, + "learning_rate": 9.06070351758794e-06, + "loss": 0.0337, + "step": 9850 + }, + { + "epoch": 3.32, + "grad_norm": 2.117072105407715, + "learning_rate": 9.05819095477387e-06, + "loss": 0.0336, + "step": 9875 + }, + { + "epoch": 3.32, + "grad_norm": 1.9019418954849243, + "learning_rate": 9.0556783919598e-06, + "loss": 0.0288, + "step": 9900 + }, + { + "epoch": 3.33, + "grad_norm": 1.8735451698303223, + "learning_rate": 9.05316582914573e-06, + "loss": 0.0289, + "step": 9925 + }, + { + "epoch": 3.34, + "grad_norm": 2.3279552459716797, + "learning_rate": 9.05065326633166e-06, + "loss": 0.0317, + "step": 9950 + }, + { + "epoch": 3.35, + "grad_norm": 2.024847984313965, + "learning_rate": 9.048140703517589e-06, + "loss": 0.032, + "step": 9975 + }, + { + "epoch": 3.36, + "grad_norm": 1.8242645263671875, + "learning_rate": 9.045628140703518e-06, + "loss": 0.0328, + "step": 10000 + }, + { + "epoch": 3.36, + "eval_loss": 0.18398000299930573, + "eval_runtime": 1213.1677, + "eval_samples_per_second": 1.18, + "eval_steps_per_second": 1.18, + "eval_wer": 21.905339805825243, + "step": 10000 + }, + { + "epoch": 3.37, + "grad_norm": 2.0562586784362793, + "learning_rate": 9.043115577889447e-06, + "loss": 0.0317, + "step": 10025 + }, + { + "epoch": 3.37, + "grad_norm": 2.2208852767944336, + "learning_rate": 9.040603015075378e-06, + "loss": 0.0333, + "step": 10050 + }, + { + "epoch": 3.38, + "grad_norm": 1.973963975906372, + "learning_rate": 9.038090452261308e-06, + "loss": 0.0318, + "step": 10075 + }, + { + "epoch": 3.39, + "grad_norm": 2.5031886100769043, + "learning_rate": 9.035577889447237e-06, + "loss": 0.0308, + "step": 10100 + }, + { + "epoch": 3.4, + "grad_norm": 1.7364188432693481, + "learning_rate": 9.033065326633166e-06, + "loss": 0.0316, + "step": 10125 + }, + { + "epoch": 3.41, + "grad_norm": 1.4862676858901978, + "learning_rate": 9.030552763819096e-06, + "loss": 0.0305, + "step": 10150 + }, + { + "epoch": 3.42, + "grad_norm": 2.1286962032318115, + "learning_rate": 9.028040201005027e-06, + "loss": 0.0338, + "step": 10175 + }, + { + "epoch": 3.43, + "grad_norm": 2.2715303897857666, + "learning_rate": 9.025527638190956e-06, + "loss": 0.0328, + "step": 10200 + }, + { + "epoch": 3.43, + "grad_norm": 2.114025592803955, + "learning_rate": 9.023015075376885e-06, + "loss": 0.031, + "step": 10225 + }, + { + "epoch": 3.44, + "grad_norm": 2.1440210342407227, + "learning_rate": 9.020502512562815e-06, + "loss": 0.0329, + "step": 10250 + }, + { + "epoch": 3.45, + "grad_norm": 2.435289144515991, + "learning_rate": 9.017989949748744e-06, + "loss": 0.0296, + "step": 10275 + }, + { + "epoch": 3.46, + "grad_norm": 2.004673957824707, + "learning_rate": 9.015477386934675e-06, + "loss": 0.0313, + "step": 10300 + }, + { + "epoch": 3.47, + "grad_norm": 1.5920778512954712, + "learning_rate": 9.012964824120604e-06, + "loss": 0.0286, + "step": 10325 + }, + { + "epoch": 3.48, + "grad_norm": 2.2107348442077637, + "learning_rate": 9.010452261306533e-06, + "loss": 0.0337, + "step": 10350 + }, + { + "epoch": 3.48, + "grad_norm": 1.422105312347412, + "learning_rate": 9.007939698492463e-06, + "loss": 0.0286, + "step": 10375 + }, + { + "epoch": 3.49, + "grad_norm": 1.6970330476760864, + "learning_rate": 9.005427135678392e-06, + "loss": 0.0294, + "step": 10400 + }, + { + "epoch": 3.5, + "grad_norm": 2.2645931243896484, + "learning_rate": 9.002914572864321e-06, + "loss": 0.0314, + "step": 10425 + }, + { + "epoch": 3.51, + "grad_norm": 2.3905222415924072, + "learning_rate": 9.000402010050252e-06, + "loss": 0.0334, + "step": 10450 + }, + { + "epoch": 3.52, + "grad_norm": 1.5324386358261108, + "learning_rate": 8.997889447236182e-06, + "loss": 0.0336, + "step": 10475 + }, + { + "epoch": 3.53, + "grad_norm": 2.076995849609375, + "learning_rate": 8.995376884422111e-06, + "loss": 0.0317, + "step": 10500 + }, + { + "epoch": 3.53, + "grad_norm": 2.1357431411743164, + "learning_rate": 8.992864321608042e-06, + "loss": 0.0275, + "step": 10525 + }, + { + "epoch": 3.54, + "grad_norm": 2.4065749645233154, + "learning_rate": 8.99035175879397e-06, + "loss": 0.0368, + "step": 10550 + }, + { + "epoch": 3.55, + "grad_norm": 1.972467303276062, + "learning_rate": 8.9878391959799e-06, + "loss": 0.0337, + "step": 10575 + }, + { + "epoch": 3.56, + "grad_norm": 2.157633066177368, + "learning_rate": 8.98532663316583e-06, + "loss": 0.0325, + "step": 10600 + }, + { + "epoch": 3.57, + "grad_norm": 1.7335753440856934, + "learning_rate": 8.98281407035176e-06, + "loss": 0.0295, + "step": 10625 + }, + { + "epoch": 3.58, + "grad_norm": 2.220749855041504, + "learning_rate": 8.980301507537689e-06, + "loss": 0.0308, + "step": 10650 + }, + { + "epoch": 3.58, + "grad_norm": 2.1866307258605957, + "learning_rate": 8.977788944723618e-06, + "loss": 0.0321, + "step": 10675 + }, + { + "epoch": 3.59, + "grad_norm": 1.6002860069274902, + "learning_rate": 8.975276381909549e-06, + "loss": 0.0331, + "step": 10700 + }, + { + "epoch": 3.6, + "grad_norm": 2.461554527282715, + "learning_rate": 8.972763819095478e-06, + "loss": 0.0307, + "step": 10725 + }, + { + "epoch": 3.61, + "grad_norm": 1.7671587467193604, + "learning_rate": 8.970251256281408e-06, + "loss": 0.0297, + "step": 10750 + }, + { + "epoch": 3.62, + "grad_norm": 1.8277167081832886, + "learning_rate": 8.967738693467337e-06, + "loss": 0.0302, + "step": 10775 + }, + { + "epoch": 3.63, + "grad_norm": 1.7027074098587036, + "learning_rate": 8.965226130653268e-06, + "loss": 0.0312, + "step": 10800 + }, + { + "epoch": 3.63, + "grad_norm": 1.9117529392242432, + "learning_rate": 8.962713567839196e-06, + "loss": 0.0305, + "step": 10825 + }, + { + "epoch": 3.64, + "grad_norm": 1.987209439277649, + "learning_rate": 8.960201005025127e-06, + "loss": 0.0287, + "step": 10850 + }, + { + "epoch": 3.65, + "grad_norm": 2.245042324066162, + "learning_rate": 8.957688442211056e-06, + "loss": 0.0308, + "step": 10875 + }, + { + "epoch": 3.66, + "grad_norm": 1.8201255798339844, + "learning_rate": 8.955175879396985e-06, + "loss": 0.0324, + "step": 10900 + }, + { + "epoch": 3.67, + "grad_norm": 1.6048423051834106, + "learning_rate": 8.952663316582916e-06, + "loss": 0.0313, + "step": 10925 + }, + { + "epoch": 3.68, + "grad_norm": 2.373978614807129, + "learning_rate": 8.950150753768844e-06, + "loss": 0.0321, + "step": 10950 + }, + { + "epoch": 3.69, + "grad_norm": 1.9072449207305908, + "learning_rate": 8.947638190954775e-06, + "loss": 0.0301, + "step": 10975 + }, + { + "epoch": 3.69, + "grad_norm": 1.9899463653564453, + "learning_rate": 8.945125628140704e-06, + "loss": 0.0323, + "step": 11000 + }, + { + "epoch": 3.69, + "eval_loss": 0.18821263313293457, + "eval_runtime": 1192.9944, + "eval_samples_per_second": 1.2, + "eval_steps_per_second": 1.2, + "eval_wer": 22.215480043149945, + "step": 11000 + }, + { + "epoch": 3.7, + "grad_norm": 2.485443592071533, + "learning_rate": 8.942613065326634e-06, + "loss": 0.03, + "step": 11025 + }, + { + "epoch": 3.71, + "grad_norm": 2.0685226917266846, + "learning_rate": 8.940100502512563e-06, + "loss": 0.034, + "step": 11050 + }, + { + "epoch": 3.72, + "grad_norm": 2.1792824268341064, + "learning_rate": 8.937587939698494e-06, + "loss": 0.0293, + "step": 11075 + }, + { + "epoch": 3.73, + "grad_norm": 1.8061559200286865, + "learning_rate": 8.935075376884423e-06, + "loss": 0.0288, + "step": 11100 + }, + { + "epoch": 3.74, + "grad_norm": 1.608508586883545, + "learning_rate": 8.932562814070353e-06, + "loss": 0.0339, + "step": 11125 + }, + { + "epoch": 3.74, + "grad_norm": 2.188870906829834, + "learning_rate": 8.930050251256282e-06, + "loss": 0.0321, + "step": 11150 + }, + { + "epoch": 3.75, + "grad_norm": 1.7846341133117676, + "learning_rate": 8.927537688442211e-06, + "loss": 0.0293, + "step": 11175 + }, + { + "epoch": 3.76, + "grad_norm": 2.1922011375427246, + "learning_rate": 8.925025125628142e-06, + "loss": 0.0317, + "step": 11200 + }, + { + "epoch": 3.77, + "grad_norm": 1.792603850364685, + "learning_rate": 8.92251256281407e-06, + "loss": 0.0309, + "step": 11225 + }, + { + "epoch": 3.78, + "grad_norm": 2.243236541748047, + "learning_rate": 8.920000000000001e-06, + "loss": 0.0307, + "step": 11250 + }, + { + "epoch": 3.79, + "grad_norm": 2.5722618103027344, + "learning_rate": 8.91748743718593e-06, + "loss": 0.0346, + "step": 11275 + }, + { + "epoch": 3.79, + "grad_norm": 1.6505095958709717, + "learning_rate": 8.91497487437186e-06, + "loss": 0.0291, + "step": 11300 + }, + { + "epoch": 3.8, + "grad_norm": 1.8172647953033447, + "learning_rate": 8.91246231155779e-06, + "loss": 0.0307, + "step": 11325 + }, + { + "epoch": 3.81, + "grad_norm": 2.18789005279541, + "learning_rate": 8.90994974874372e-06, + "loss": 0.0292, + "step": 11350 + }, + { + "epoch": 3.82, + "grad_norm": 2.0088040828704834, + "learning_rate": 8.90743718592965e-06, + "loss": 0.0318, + "step": 11375 + }, + { + "epoch": 3.83, + "grad_norm": 2.486415147781372, + "learning_rate": 8.904924623115579e-06, + "loss": 0.0282, + "step": 11400 + }, + { + "epoch": 3.84, + "grad_norm": 2.3458974361419678, + "learning_rate": 8.902412060301508e-06, + "loss": 0.0285, + "step": 11425 + }, + { + "epoch": 3.84, + "grad_norm": 1.9003041982650757, + "learning_rate": 8.899899497487437e-06, + "loss": 0.0328, + "step": 11450 + }, + { + "epoch": 3.85, + "grad_norm": 2.1546921730041504, + "learning_rate": 8.897386934673368e-06, + "loss": 0.0303, + "step": 11475 + }, + { + "epoch": 3.86, + "grad_norm": 2.2788491249084473, + "learning_rate": 8.894874371859296e-06, + "loss": 0.0326, + "step": 11500 + }, + { + "epoch": 3.87, + "grad_norm": 2.1286659240722656, + "learning_rate": 8.892361809045227e-06, + "loss": 0.0311, + "step": 11525 + }, + { + "epoch": 3.88, + "grad_norm": 1.9993877410888672, + "learning_rate": 8.889849246231156e-06, + "loss": 0.0323, + "step": 11550 + }, + { + "epoch": 3.89, + "grad_norm": 2.0409862995147705, + "learning_rate": 8.887336683417086e-06, + "loss": 0.0325, + "step": 11575 + }, + { + "epoch": 3.9, + "grad_norm": 1.9293345212936401, + "learning_rate": 8.884824120603017e-06, + "loss": 0.0283, + "step": 11600 + }, + { + "epoch": 3.9, + "grad_norm": 1.9005767107009888, + "learning_rate": 8.882311557788946e-06, + "loss": 0.0316, + "step": 11625 + }, + { + "epoch": 3.91, + "grad_norm": 2.3250808715820312, + "learning_rate": 8.879798994974875e-06, + "loss": 0.032, + "step": 11650 + }, + { + "epoch": 3.92, + "grad_norm": 1.7766000032424927, + "learning_rate": 8.877286432160805e-06, + "loss": 0.0324, + "step": 11675 + }, + { + "epoch": 3.93, + "grad_norm": 2.183643102645874, + "learning_rate": 8.874773869346734e-06, + "loss": 0.0319, + "step": 11700 + }, + { + "epoch": 3.94, + "grad_norm": 1.82970130443573, + "learning_rate": 8.872261306532665e-06, + "loss": 0.0252, + "step": 11725 + }, + { + "epoch": 3.95, + "grad_norm": 1.9144885540008545, + "learning_rate": 8.869748743718594e-06, + "loss": 0.0299, + "step": 11750 + }, + { + "epoch": 3.95, + "grad_norm": 2.4158880710601807, + "learning_rate": 8.867236180904524e-06, + "loss": 0.0316, + "step": 11775 + }, + { + "epoch": 3.96, + "grad_norm": 2.048321008682251, + "learning_rate": 8.864723618090453e-06, + "loss": 0.0271, + "step": 11800 + }, + { + "epoch": 3.97, + "grad_norm": 2.0783748626708984, + "learning_rate": 8.862211055276382e-06, + "loss": 0.0302, + "step": 11825 + }, + { + "epoch": 3.98, + "grad_norm": 1.8487720489501953, + "learning_rate": 8.859698492462312e-06, + "loss": 0.0295, + "step": 11850 + }, + { + "epoch": 3.99, + "grad_norm": 1.2774975299835205, + "learning_rate": 8.857185929648243e-06, + "loss": 0.0284, + "step": 11875 + }, + { + "epoch": 4.0, + "grad_norm": 2.4841320514678955, + "learning_rate": 8.854673366834172e-06, + "loss": 0.0292, + "step": 11900 + }, + { + "epoch": 4.0, + "grad_norm": 1.1434420347213745, + "learning_rate": 8.852160804020101e-06, + "loss": 0.0212, + "step": 11925 + }, + { + "epoch": 4.01, + "grad_norm": 2.0010673999786377, + "learning_rate": 8.849648241206032e-06, + "loss": 0.0155, + "step": 11950 + }, + { + "epoch": 4.02, + "grad_norm": 1.2914950847625732, + "learning_rate": 8.84713567839196e-06, + "loss": 0.0138, + "step": 11975 + }, + { + "epoch": 4.03, + "grad_norm": 1.5573234558105469, + "learning_rate": 8.844623115577891e-06, + "loss": 0.0152, + "step": 12000 + }, + { + "epoch": 4.03, + "eval_loss": 0.2059727907180786, + "eval_runtime": 1197.925, + "eval_samples_per_second": 1.195, + "eval_steps_per_second": 1.195, + "eval_wer": 22.053667745415318, + "step": 12000 + }, + { + "epoch": 4.04, + "grad_norm": 1.0142135620117188, + "learning_rate": 8.84211055276382e-06, + "loss": 0.0149, + "step": 12025 + }, + { + "epoch": 4.05, + "grad_norm": 1.4854488372802734, + "learning_rate": 8.83959798994975e-06, + "loss": 0.016, + "step": 12050 + }, + { + "epoch": 4.05, + "grad_norm": 1.5629256963729858, + "learning_rate": 8.837085427135679e-06, + "loss": 0.0146, + "step": 12075 + }, + { + "epoch": 4.06, + "grad_norm": 1.623558521270752, + "learning_rate": 8.834572864321608e-06, + "loss": 0.0167, + "step": 12100 + }, + { + "epoch": 4.07, + "grad_norm": 1.624794602394104, + "learning_rate": 8.832060301507537e-06, + "loss": 0.0155, + "step": 12125 + }, + { + "epoch": 4.08, + "grad_norm": 1.517223834991455, + "learning_rate": 8.829547738693468e-06, + "loss": 0.0153, + "step": 12150 + }, + { + "epoch": 4.09, + "grad_norm": 1.9318969249725342, + "learning_rate": 8.827035175879398e-06, + "loss": 0.0187, + "step": 12175 + }, + { + "epoch": 4.1, + "grad_norm": 1.870975375175476, + "learning_rate": 8.824522613065327e-06, + "loss": 0.0155, + "step": 12200 + }, + { + "epoch": 4.11, + "grad_norm": 1.5423206090927124, + "learning_rate": 8.822010050251258e-06, + "loss": 0.014, + "step": 12225 + }, + { + "epoch": 4.11, + "grad_norm": 1.4466434717178345, + "learning_rate": 8.819497487437186e-06, + "loss": 0.0156, + "step": 12250 + }, + { + "epoch": 4.12, + "grad_norm": 1.4133529663085938, + "learning_rate": 8.816984924623117e-06, + "loss": 0.0172, + "step": 12275 + }, + { + "epoch": 4.13, + "grad_norm": 2.117969274520874, + "learning_rate": 8.814472361809046e-06, + "loss": 0.0136, + "step": 12300 + }, + { + "epoch": 4.14, + "grad_norm": 1.2700644731521606, + "learning_rate": 8.811959798994975e-06, + "loss": 0.0143, + "step": 12325 + }, + { + "epoch": 4.15, + "grad_norm": 1.3233404159545898, + "learning_rate": 8.809447236180905e-06, + "loss": 0.0153, + "step": 12350 + }, + { + "epoch": 4.16, + "grad_norm": 1.4087698459625244, + "learning_rate": 8.806934673366834e-06, + "loss": 0.0165, + "step": 12375 + }, + { + "epoch": 4.16, + "grad_norm": 1.9031394720077515, + "learning_rate": 8.804422110552765e-06, + "loss": 0.0155, + "step": 12400 + }, + { + "epoch": 4.17, + "grad_norm": 1.8332024812698364, + "learning_rate": 8.801909547738694e-06, + "loss": 0.016, + "step": 12425 + }, + { + "epoch": 4.18, + "grad_norm": 1.466334342956543, + "learning_rate": 8.799396984924624e-06, + "loss": 0.0161, + "step": 12450 + }, + { + "epoch": 4.19, + "grad_norm": 1.563001275062561, + "learning_rate": 8.796884422110553e-06, + "loss": 0.0149, + "step": 12475 + }, + { + "epoch": 4.2, + "grad_norm": 1.5624135732650757, + "learning_rate": 8.794371859296484e-06, + "loss": 0.0144, + "step": 12500 + }, + { + "epoch": 4.21, + "grad_norm": 2.0249340534210205, + "learning_rate": 8.791859296482412e-06, + "loss": 0.0154, + "step": 12525 + }, + { + "epoch": 4.21, + "grad_norm": 1.7392789125442505, + "learning_rate": 8.789346733668343e-06, + "loss": 0.0152, + "step": 12550 + }, + { + "epoch": 4.22, + "grad_norm": 1.6178935766220093, + "learning_rate": 8.786834170854272e-06, + "loss": 0.0153, + "step": 12575 + }, + { + "epoch": 4.23, + "grad_norm": 1.6920667886734009, + "learning_rate": 8.784321608040201e-06, + "loss": 0.0131, + "step": 12600 + }, + { + "epoch": 4.24, + "grad_norm": 2.357971668243408, + "learning_rate": 8.781809045226132e-06, + "loss": 0.0159, + "step": 12625 + }, + { + "epoch": 4.25, + "grad_norm": 1.7053987979888916, + "learning_rate": 8.77929648241206e-06, + "loss": 0.0155, + "step": 12650 + }, + { + "epoch": 4.26, + "grad_norm": 1.608075499534607, + "learning_rate": 8.776783919597991e-06, + "loss": 0.0151, + "step": 12675 + }, + { + "epoch": 4.26, + "grad_norm": 1.928497076034546, + "learning_rate": 8.77427135678392e-06, + "loss": 0.0171, + "step": 12700 + }, + { + "epoch": 4.27, + "grad_norm": 1.8793927431106567, + "learning_rate": 8.77175879396985e-06, + "loss": 0.0157, + "step": 12725 + }, + { + "epoch": 4.28, + "grad_norm": 1.9218591451644897, + "learning_rate": 8.769246231155779e-06, + "loss": 0.0159, + "step": 12750 + }, + { + "epoch": 4.29, + "grad_norm": 2.084500312805176, + "learning_rate": 8.76673366834171e-06, + "loss": 0.0153, + "step": 12775 + }, + { + "epoch": 4.3, + "grad_norm": 1.7483826875686646, + "learning_rate": 8.76422110552764e-06, + "loss": 0.0164, + "step": 12800 + }, + { + "epoch": 4.31, + "grad_norm": 1.5623873472213745, + "learning_rate": 8.761708542713569e-06, + "loss": 0.016, + "step": 12825 + }, + { + "epoch": 4.31, + "grad_norm": 1.2766278982162476, + "learning_rate": 8.759195979899498e-06, + "loss": 0.0176, + "step": 12850 + }, + { + "epoch": 4.32, + "grad_norm": 1.3006055355072021, + "learning_rate": 8.756683417085427e-06, + "loss": 0.0161, + "step": 12875 + }, + { + "epoch": 4.33, + "grad_norm": 1.075271725654602, + "learning_rate": 8.754170854271358e-06, + "loss": 0.0141, + "step": 12900 + }, + { + "epoch": 4.34, + "grad_norm": 1.0574754476547241, + "learning_rate": 8.751658291457286e-06, + "loss": 0.0145, + "step": 12925 + }, + { + "epoch": 4.35, + "grad_norm": 1.054284930229187, + "learning_rate": 8.749145728643217e-06, + "loss": 0.0136, + "step": 12950 + }, + { + "epoch": 4.36, + "grad_norm": 2.29689621925354, + "learning_rate": 8.746633165829146e-06, + "loss": 0.0155, + "step": 12975 + }, + { + "epoch": 4.37, + "grad_norm": 1.872876524925232, + "learning_rate": 8.744120603015076e-06, + "loss": 0.0154, + "step": 13000 + }, + { + "epoch": 4.37, + "eval_loss": 0.2130448818206787, + "eval_runtime": 1202.0777, + "eval_samples_per_second": 1.19, + "eval_steps_per_second": 1.19, + "eval_wer": 21.669363538295578, + "step": 13000 + }, + { + "epoch": 4.37, + "grad_norm": 1.7077223062515259, + "learning_rate": 8.741608040201007e-06, + "loss": 0.0169, + "step": 13025 + }, + { + "epoch": 4.38, + "grad_norm": 1.8535040616989136, + "learning_rate": 8.739095477386936e-06, + "loss": 0.015, + "step": 13050 + }, + { + "epoch": 4.39, + "grad_norm": 1.2556935548782349, + "learning_rate": 8.736582914572865e-06, + "loss": 0.0136, + "step": 13075 + }, + { + "epoch": 4.4, + "grad_norm": 2.2203209400177, + "learning_rate": 8.734070351758795e-06, + "loss": 0.0159, + "step": 13100 + }, + { + "epoch": 4.41, + "grad_norm": 2.353564500808716, + "learning_rate": 8.731557788944724e-06, + "loss": 0.0173, + "step": 13125 + }, + { + "epoch": 4.42, + "grad_norm": 1.4246625900268555, + "learning_rate": 8.729045226130653e-06, + "loss": 0.0156, + "step": 13150 + }, + { + "epoch": 4.42, + "grad_norm": 1.8444842100143433, + "learning_rate": 8.726532663316584e-06, + "loss": 0.0167, + "step": 13175 + }, + { + "epoch": 4.43, + "grad_norm": 2.3171043395996094, + "learning_rate": 8.724020100502514e-06, + "loss": 0.0143, + "step": 13200 + }, + { + "epoch": 4.44, + "grad_norm": 1.4160494804382324, + "learning_rate": 8.721507537688443e-06, + "loss": 0.0144, + "step": 13225 + }, + { + "epoch": 4.45, + "grad_norm": 1.9673594236373901, + "learning_rate": 8.718994974874372e-06, + "loss": 0.0156, + "step": 13250 + }, + { + "epoch": 4.46, + "grad_norm": 2.0636637210845947, + "learning_rate": 8.716482412060302e-06, + "loss": 0.0152, + "step": 13275 + }, + { + "epoch": 4.47, + "grad_norm": 1.5010149478912354, + "learning_rate": 8.713969849246233e-06, + "loss": 0.0172, + "step": 13300 + }, + { + "epoch": 4.47, + "grad_norm": 2.3201329708099365, + "learning_rate": 8.711457286432162e-06, + "loss": 0.0183, + "step": 13325 + }, + { + "epoch": 4.48, + "grad_norm": 1.6143250465393066, + "learning_rate": 8.708944723618091e-06, + "loss": 0.0158, + "step": 13350 + }, + { + "epoch": 4.49, + "grad_norm": 1.8575571775436401, + "learning_rate": 8.70643216080402e-06, + "loss": 0.017, + "step": 13375 + }, + { + "epoch": 4.5, + "grad_norm": 1.9903631210327148, + "learning_rate": 8.70391959798995e-06, + "loss": 0.0167, + "step": 13400 + }, + { + "epoch": 4.51, + "grad_norm": 1.7888177633285522, + "learning_rate": 8.701407035175881e-06, + "loss": 0.0161, + "step": 13425 + }, + { + "epoch": 4.52, + "grad_norm": 2.0395116806030273, + "learning_rate": 8.69889447236181e-06, + "loss": 0.0162, + "step": 13450 + }, + { + "epoch": 4.52, + "grad_norm": 1.3136216402053833, + "learning_rate": 8.69638190954774e-06, + "loss": 0.0165, + "step": 13475 + }, + { + "epoch": 4.53, + "grad_norm": 2.354081869125366, + "learning_rate": 8.693869346733669e-06, + "loss": 0.0155, + "step": 13500 + }, + { + "epoch": 4.54, + "grad_norm": 1.6151707172393799, + "learning_rate": 8.691356783919598e-06, + "loss": 0.0166, + "step": 13525 + }, + { + "epoch": 4.55, + "grad_norm": 1.6143455505371094, + "learning_rate": 8.688844221105528e-06, + "loss": 0.014, + "step": 13550 + }, + { + "epoch": 4.56, + "grad_norm": 2.086638927459717, + "learning_rate": 8.686331658291459e-06, + "loss": 0.0172, + "step": 13575 + }, + { + "epoch": 4.57, + "grad_norm": 2.2153995037078857, + "learning_rate": 8.683819095477388e-06, + "loss": 0.0189, + "step": 13600 + }, + { + "epoch": 4.58, + "grad_norm": 1.5306668281555176, + "learning_rate": 8.681306532663317e-06, + "loss": 0.0162, + "step": 13625 + }, + { + "epoch": 4.58, + "grad_norm": 1.593883752822876, + "learning_rate": 8.678793969849248e-06, + "loss": 0.0138, + "step": 13650 + }, + { + "epoch": 4.59, + "grad_norm": 2.366638422012329, + "learning_rate": 8.67638190954774e-06, + "loss": 0.0155, + "step": 13675 + }, + { + "epoch": 4.6, + "grad_norm": 1.474041223526001, + "learning_rate": 8.673869346733669e-06, + "loss": 0.0154, + "step": 13700 + }, + { + "epoch": 4.61, + "grad_norm": 2.152134418487549, + "learning_rate": 8.671356783919598e-06, + "loss": 0.0176, + "step": 13725 + }, + { + "epoch": 4.62, + "grad_norm": 1.4458969831466675, + "learning_rate": 8.668844221105528e-06, + "loss": 0.0149, + "step": 13750 + }, + { + "epoch": 4.63, + "grad_norm": 1.7053142786026, + "learning_rate": 8.666331658291459e-06, + "loss": 0.0161, + "step": 13775 + }, + { + "epoch": 4.63, + "grad_norm": 1.8481920957565308, + "learning_rate": 8.663819095477388e-06, + "loss": 0.0179, + "step": 13800 + }, + { + "epoch": 4.64, + "grad_norm": 1.9341260194778442, + "learning_rate": 8.661306532663317e-06, + "loss": 0.0174, + "step": 13825 + }, + { + "epoch": 4.65, + "grad_norm": 1.7287882566452026, + "learning_rate": 8.658793969849247e-06, + "loss": 0.0162, + "step": 13850 + }, + { + "epoch": 4.66, + "grad_norm": 1.9068552255630493, + "learning_rate": 8.656281407035176e-06, + "loss": 0.0179, + "step": 13875 + }, + { + "epoch": 4.67, + "grad_norm": 2.0747196674346924, + "learning_rate": 8.653768844221107e-06, + "loss": 0.0161, + "step": 13900 + }, + { + "epoch": 4.68, + "grad_norm": 1.4294641017913818, + "learning_rate": 8.651256281407036e-06, + "loss": 0.0166, + "step": 13925 + }, + { + "epoch": 4.68, + "grad_norm": 1.8812909126281738, + "learning_rate": 8.648743718592966e-06, + "loss": 0.0168, + "step": 13950 + }, + { + "epoch": 4.69, + "grad_norm": 1.803090214729309, + "learning_rate": 8.646231155778895e-06, + "loss": 0.015, + "step": 13975 + }, + { + "epoch": 4.7, + "grad_norm": 1.4690502882003784, + "learning_rate": 8.643718592964824e-06, + "loss": 0.0181, + "step": 14000 + }, + { + "epoch": 4.7, + "eval_loss": 0.21771018207073212, + "eval_runtime": 1213.4681, + "eval_samples_per_second": 1.179, + "eval_steps_per_second": 1.179, + "eval_wer": 22.39077669902913, + "step": 14000 + }, + { + "epoch": 4.71, + "grad_norm": 1.1451553106307983, + "learning_rate": 8.641206030150755e-06, + "loss": 0.0152, + "step": 14025 + }, + { + "epoch": 4.72, + "grad_norm": 1.8692649602890015, + "learning_rate": 8.638693467336685e-06, + "loss": 0.0157, + "step": 14050 + }, + { + "epoch": 4.73, + "grad_norm": 2.1651480197906494, + "learning_rate": 8.636180904522614e-06, + "loss": 0.0173, + "step": 14075 + }, + { + "epoch": 4.73, + "grad_norm": 2.4039623737335205, + "learning_rate": 8.633668341708543e-06, + "loss": 0.0159, + "step": 14100 + }, + { + "epoch": 4.74, + "grad_norm": 2.1604299545288086, + "learning_rate": 8.631155778894473e-06, + "loss": 0.0173, + "step": 14125 + }, + { + "epoch": 4.75, + "grad_norm": 2.4227333068847656, + "learning_rate": 8.628643216080402e-06, + "loss": 0.017, + "step": 14150 + }, + { + "epoch": 4.76, + "grad_norm": 1.7914457321166992, + "learning_rate": 8.626130653266333e-06, + "loss": 0.0172, + "step": 14175 + }, + { + "epoch": 4.77, + "grad_norm": 1.6655550003051758, + "learning_rate": 8.623618090452262e-06, + "loss": 0.0166, + "step": 14200 + }, + { + "epoch": 4.78, + "grad_norm": 1.7403589487075806, + "learning_rate": 8.621105527638192e-06, + "loss": 0.0197, + "step": 14225 + }, + { + "epoch": 4.79, + "grad_norm": 2.1573069095611572, + "learning_rate": 8.618592964824121e-06, + "loss": 0.0143, + "step": 14250 + }, + { + "epoch": 4.79, + "grad_norm": 1.9980624914169312, + "learning_rate": 8.61608040201005e-06, + "loss": 0.0159, + "step": 14275 + }, + { + "epoch": 4.8, + "grad_norm": 1.5816279649734497, + "learning_rate": 8.613567839195981e-06, + "loss": 0.0181, + "step": 14300 + }, + { + "epoch": 4.81, + "grad_norm": 2.171492576599121, + "learning_rate": 8.61105527638191e-06, + "loss": 0.0162, + "step": 14325 + }, + { + "epoch": 4.82, + "grad_norm": 1.9162753820419312, + "learning_rate": 8.60854271356784e-06, + "loss": 0.0149, + "step": 14350 + }, + { + "epoch": 4.83, + "grad_norm": 1.741087555885315, + "learning_rate": 8.60603015075377e-06, + "loss": 0.0164, + "step": 14375 + }, + { + "epoch": 4.84, + "grad_norm": 2.9275858402252197, + "learning_rate": 8.603517587939699e-06, + "loss": 0.0166, + "step": 14400 + }, + { + "epoch": 4.84, + "grad_norm": 2.1988613605499268, + "learning_rate": 8.601005025125628e-06, + "loss": 0.0169, + "step": 14425 + }, + { + "epoch": 4.85, + "grad_norm": 2.5306332111358643, + "learning_rate": 8.598492462311559e-06, + "loss": 0.0178, + "step": 14450 + }, + { + "epoch": 4.86, + "grad_norm": 2.151973247528076, + "learning_rate": 8.595979899497488e-06, + "loss": 0.0173, + "step": 14475 + }, + { + "epoch": 4.87, + "grad_norm": 1.4058828353881836, + "learning_rate": 8.593467336683418e-06, + "loss": 0.0182, + "step": 14500 + }, + { + "epoch": 4.88, + "grad_norm": 2.706510305404663, + "learning_rate": 8.590954773869347e-06, + "loss": 0.0195, + "step": 14525 + }, + { + "epoch": 4.89, + "grad_norm": 2.0286056995391846, + "learning_rate": 8.588442211055276e-06, + "loss": 0.018, + "step": 14550 + }, + { + "epoch": 4.89, + "grad_norm": 1.7664135694503784, + "learning_rate": 8.585929648241207e-06, + "loss": 0.0166, + "step": 14575 + }, + { + "epoch": 4.9, + "grad_norm": 1.5562934875488281, + "learning_rate": 8.583417085427137e-06, + "loss": 0.0156, + "step": 14600 + }, + { + "epoch": 4.91, + "grad_norm": 2.3522746562957764, + "learning_rate": 8.580904522613066e-06, + "loss": 0.016, + "step": 14625 + }, + { + "epoch": 4.92, + "grad_norm": 1.2259352207183838, + "learning_rate": 8.578391959798997e-06, + "loss": 0.0155, + "step": 14650 + }, + { + "epoch": 4.93, + "grad_norm": 2.1873855590820312, + "learning_rate": 8.575879396984925e-06, + "loss": 0.0157, + "step": 14675 + }, + { + "epoch": 4.94, + "grad_norm": 1.7226554155349731, + "learning_rate": 8.573366834170856e-06, + "loss": 0.0182, + "step": 14700 + }, + { + "epoch": 4.94, + "grad_norm": 1.4701647758483887, + "learning_rate": 8.570854271356785e-06, + "loss": 0.0157, + "step": 14725 + }, + { + "epoch": 4.95, + "grad_norm": 1.5648847818374634, + "learning_rate": 8.568341708542714e-06, + "loss": 0.0151, + "step": 14750 + }, + { + "epoch": 4.96, + "grad_norm": 1.6337006092071533, + "learning_rate": 8.565829145728644e-06, + "loss": 0.0154, + "step": 14775 + }, + { + "epoch": 4.97, + "grad_norm": 1.5339856147766113, + "learning_rate": 8.563316582914573e-06, + "loss": 0.0169, + "step": 14800 + }, + { + "epoch": 4.98, + "grad_norm": 1.6300182342529297, + "learning_rate": 8.560804020100502e-06, + "loss": 0.0173, + "step": 14825 + }, + { + "epoch": 4.99, + "grad_norm": 3.04838490486145, + "learning_rate": 8.558291457286433e-06, + "loss": 0.016, + "step": 14850 + }, + { + "epoch": 4.99, + "grad_norm": 1.9024686813354492, + "learning_rate": 8.555778894472363e-06, + "loss": 0.0157, + "step": 14875 + }, + { + "epoch": 5.0, + "grad_norm": 0.9240014553070068, + "learning_rate": 8.553266331658292e-06, + "loss": 0.0145, + "step": 14900 + }, + { + "epoch": 5.01, + "grad_norm": 2.04496693611145, + "learning_rate": 8.550753768844223e-06, + "loss": 0.0083, + "step": 14925 + }, + { + "epoch": 5.02, + "grad_norm": 1.2840524911880493, + "learning_rate": 8.54824120603015e-06, + "loss": 0.0073, + "step": 14950 + }, + { + "epoch": 5.03, + "grad_norm": 1.4686466455459595, + "learning_rate": 8.545728643216082e-06, + "loss": 0.0089, + "step": 14975 + }, + { + "epoch": 5.04, + "grad_norm": 1.1511826515197754, + "learning_rate": 8.54321608040201e-06, + "loss": 0.0089, + "step": 15000 + }, + { + "epoch": 5.04, + "eval_loss": 0.23282939195632935, + "eval_runtime": 1395.2586, + "eval_samples_per_second": 1.026, + "eval_steps_per_second": 1.026, + "eval_wer": 22.957119741100325, + "step": 15000 + }, + { + "epoch": 5.05, + "grad_norm": 1.0920934677124023, + "learning_rate": 8.54070351758794e-06, + "loss": 0.0081, + "step": 15025 + }, + { + "epoch": 5.05, + "grad_norm": 1.2407475709915161, + "learning_rate": 8.53819095477387e-06, + "loss": 0.0083, + "step": 15050 + }, + { + "epoch": 5.06, + "grad_norm": 0.9825206995010376, + "learning_rate": 8.535678391959799e-06, + "loss": 0.0078, + "step": 15075 + }, + { + "epoch": 5.07, + "grad_norm": 1.5308531522750854, + "learning_rate": 8.53316582914573e-06, + "loss": 0.0084, + "step": 15100 + }, + { + "epoch": 5.08, + "grad_norm": 1.1251449584960938, + "learning_rate": 8.530653266331659e-06, + "loss": 0.0085, + "step": 15125 + }, + { + "epoch": 5.09, + "grad_norm": 1.4674361944198608, + "learning_rate": 8.528140703517588e-06, + "loss": 0.009, + "step": 15150 + }, + { + "epoch": 5.1, + "grad_norm": 1.6712734699249268, + "learning_rate": 8.525628140703518e-06, + "loss": 0.008, + "step": 15175 + }, + { + "epoch": 5.1, + "grad_norm": 1.3005361557006836, + "learning_rate": 8.523115577889449e-06, + "loss": 0.0099, + "step": 15200 + }, + { + "epoch": 5.11, + "grad_norm": 1.4076645374298096, + "learning_rate": 8.520603015075376e-06, + "loss": 0.0091, + "step": 15225 + }, + { + "epoch": 5.12, + "grad_norm": 1.6017881631851196, + "learning_rate": 8.518090452261307e-06, + "loss": 0.0092, + "step": 15250 + }, + { + "epoch": 5.13, + "grad_norm": 1.4405258893966675, + "learning_rate": 8.515577889447237e-06, + "loss": 0.0076, + "step": 15275 + }, + { + "epoch": 5.14, + "grad_norm": 1.9285825490951538, + "learning_rate": 8.513065326633166e-06, + "loss": 0.009, + "step": 15300 + }, + { + "epoch": 5.15, + "grad_norm": 1.2953617572784424, + "learning_rate": 8.510552763819097e-06, + "loss": 0.0085, + "step": 15325 + }, + { + "epoch": 5.15, + "grad_norm": 1.7505429983139038, + "learning_rate": 8.508040201005025e-06, + "loss": 0.0109, + "step": 15350 + }, + { + "epoch": 5.16, + "grad_norm": 2.122034788131714, + "learning_rate": 8.505527638190956e-06, + "loss": 0.009, + "step": 15375 + }, + { + "epoch": 5.17, + "grad_norm": 1.507896900177002, + "learning_rate": 8.503015075376885e-06, + "loss": 0.0091, + "step": 15400 + }, + { + "epoch": 5.18, + "grad_norm": 1.5246798992156982, + "learning_rate": 8.500502512562814e-06, + "loss": 0.009, + "step": 15425 + }, + { + "epoch": 5.19, + "grad_norm": 1.1891722679138184, + "learning_rate": 8.497989949748744e-06, + "loss": 0.0076, + "step": 15450 + }, + { + "epoch": 5.2, + "grad_norm": 1.9574344158172607, + "learning_rate": 8.495477386934675e-06, + "loss": 0.0092, + "step": 15475 + }, + { + "epoch": 5.2, + "grad_norm": 1.59243643283844, + "learning_rate": 8.492964824120604e-06, + "loss": 0.0104, + "step": 15500 + }, + { + "epoch": 5.21, + "grad_norm": 1.8593735694885254, + "learning_rate": 8.490452261306533e-06, + "loss": 0.0091, + "step": 15525 + }, + { + "epoch": 5.22, + "grad_norm": 1.5269137620925903, + "learning_rate": 8.487939698492463e-06, + "loss": 0.0099, + "step": 15550 + }, + { + "epoch": 5.23, + "grad_norm": 1.0559868812561035, + "learning_rate": 8.485427135678392e-06, + "loss": 0.0095, + "step": 15575 + }, + { + "epoch": 5.24, + "grad_norm": 1.3573254346847534, + "learning_rate": 8.482914572864323e-06, + "loss": 0.009, + "step": 15600 + }, + { + "epoch": 5.25, + "grad_norm": 1.2926121950149536, + "learning_rate": 8.480402010050252e-06, + "loss": 0.0091, + "step": 15625 + }, + { + "epoch": 5.26, + "grad_norm": 1.4079946279525757, + "learning_rate": 8.477889447236182e-06, + "loss": 0.0102, + "step": 15650 + }, + { + "epoch": 5.26, + "grad_norm": 1.8209011554718018, + "learning_rate": 8.475376884422111e-06, + "loss": 0.0094, + "step": 15675 + }, + { + "epoch": 5.27, + "grad_norm": 2.0887951850891113, + "learning_rate": 8.47286432160804e-06, + "loss": 0.0095, + "step": 15700 + }, + { + "epoch": 5.28, + "grad_norm": 1.5334664583206177, + "learning_rate": 8.470452261306534e-06, + "loss": 0.0102, + "step": 15725 + }, + { + "epoch": 5.29, + "grad_norm": 1.5743765830993652, + "learning_rate": 8.467939698492463e-06, + "loss": 0.0108, + "step": 15750 + }, + { + "epoch": 5.3, + "grad_norm": 1.870396614074707, + "learning_rate": 8.465427135678392e-06, + "loss": 0.0093, + "step": 15775 + }, + { + "epoch": 5.31, + "grad_norm": 1.4454959630966187, + "learning_rate": 8.462914572864323e-06, + "loss": 0.0104, + "step": 15800 + }, + { + "epoch": 5.31, + "grad_norm": 1.3657257556915283, + "learning_rate": 8.460402010050251e-06, + "loss": 0.0093, + "step": 15825 + }, + { + "epoch": 5.32, + "grad_norm": 1.7964661121368408, + "learning_rate": 8.457889447236182e-06, + "loss": 0.0092, + "step": 15850 + }, + { + "epoch": 5.33, + "grad_norm": 1.6284058094024658, + "learning_rate": 8.455376884422111e-06, + "loss": 0.0099, + "step": 15875 + }, + { + "epoch": 5.34, + "grad_norm": 1.2714693546295166, + "learning_rate": 8.45286432160804e-06, + "loss": 0.0108, + "step": 15900 + }, + { + "epoch": 5.35, + "grad_norm": 1.7909234762191772, + "learning_rate": 8.450351758793972e-06, + "loss": 0.0097, + "step": 15925 + }, + { + "epoch": 5.36, + "grad_norm": 1.3508201837539673, + "learning_rate": 8.4478391959799e-06, + "loss": 0.0102, + "step": 15950 + }, + { + "epoch": 5.36, + "grad_norm": 1.5359046459197998, + "learning_rate": 8.44532663316583e-06, + "loss": 0.0095, + "step": 15975 + }, + { + "epoch": 5.37, + "grad_norm": 1.6034541130065918, + "learning_rate": 8.44281407035176e-06, + "loss": 0.0095, + "step": 16000 + }, + { + "epoch": 5.37, + "eval_loss": 0.24346992373466492, + "eval_runtime": 1162.8815, + "eval_samples_per_second": 1.231, + "eval_steps_per_second": 1.231, + "eval_wer": 22.141316073354908, + "step": 16000 + }, + { + "epoch": 5.38, + "grad_norm": 2.031728506088257, + "learning_rate": 8.440301507537689e-06, + "loss": 0.0099, + "step": 16025 + }, + { + "epoch": 5.39, + "grad_norm": 1.5829148292541504, + "learning_rate": 8.437788944723618e-06, + "loss": 0.0106, + "step": 16050 + }, + { + "epoch": 5.4, + "grad_norm": 1.5966347455978394, + "learning_rate": 8.43527638190955e-06, + "loss": 0.0104, + "step": 16075 + }, + { + "epoch": 5.41, + "grad_norm": 1.5900301933288574, + "learning_rate": 8.432763819095479e-06, + "loss": 0.0099, + "step": 16100 + }, + { + "epoch": 5.41, + "grad_norm": 2.288588523864746, + "learning_rate": 8.430251256281408e-06, + "loss": 0.0096, + "step": 16125 + }, + { + "epoch": 5.42, + "grad_norm": 1.4461873769760132, + "learning_rate": 8.427738693467337e-06, + "loss": 0.0095, + "step": 16150 + }, + { + "epoch": 5.43, + "grad_norm": 1.5728780031204224, + "learning_rate": 8.425226130653266e-06, + "loss": 0.0094, + "step": 16175 + }, + { + "epoch": 5.44, + "grad_norm": 1.2544329166412354, + "learning_rate": 8.422713567839198e-06, + "loss": 0.0099, + "step": 16200 + }, + { + "epoch": 5.45, + "grad_norm": 0.6544979214668274, + "learning_rate": 8.420201005025125e-06, + "loss": 0.0098, + "step": 16225 + }, + { + "epoch": 5.46, + "grad_norm": 1.2845455408096313, + "learning_rate": 8.417688442211056e-06, + "loss": 0.0103, + "step": 16250 + }, + { + "epoch": 5.47, + "grad_norm": 1.0437331199645996, + "learning_rate": 8.415175879396985e-06, + "loss": 0.0092, + "step": 16275 + }, + { + "epoch": 5.47, + "grad_norm": 1.7763166427612305, + "learning_rate": 8.412663316582915e-06, + "loss": 0.0098, + "step": 16300 + }, + { + "epoch": 5.48, + "grad_norm": 1.9162542819976807, + "learning_rate": 8.410150753768846e-06, + "loss": 0.0095, + "step": 16325 + }, + { + "epoch": 5.49, + "grad_norm": 1.8343557119369507, + "learning_rate": 8.407638190954775e-06, + "loss": 0.0093, + "step": 16350 + }, + { + "epoch": 5.5, + "grad_norm": 2.2764599323272705, + "learning_rate": 8.405125628140704e-06, + "loss": 0.0101, + "step": 16375 + }, + { + "epoch": 5.51, + "grad_norm": 1.4925143718719482, + "learning_rate": 8.402613065326634e-06, + "loss": 0.0112, + "step": 16400 + }, + { + "epoch": 5.52, + "grad_norm": 1.8595383167266846, + "learning_rate": 8.400100502512563e-06, + "loss": 0.0102, + "step": 16425 + }, + { + "epoch": 5.52, + "grad_norm": 1.3778181076049805, + "learning_rate": 8.397688442211056e-06, + "loss": 0.0091, + "step": 16450 + }, + { + "epoch": 5.53, + "grad_norm": 1.9314804077148438, + "learning_rate": 8.395175879396986e-06, + "loss": 0.0098, + "step": 16475 + }, + { + "epoch": 5.54, + "grad_norm": 1.2433537244796753, + "learning_rate": 8.392663316582915e-06, + "loss": 0.0095, + "step": 16500 + }, + { + "epoch": 5.55, + "grad_norm": 1.4811224937438965, + "learning_rate": 8.390150753768846e-06, + "loss": 0.0095, + "step": 16525 + }, + { + "epoch": 5.56, + "grad_norm": 1.4142324924468994, + "learning_rate": 8.387638190954774e-06, + "loss": 0.0096, + "step": 16550 + }, + { + "epoch": 5.57, + "grad_norm": 1.3471238613128662, + "learning_rate": 8.385125628140705e-06, + "loss": 0.0095, + "step": 16575 + }, + { + "epoch": 5.57, + "grad_norm": 2.064746618270874, + "learning_rate": 8.382613065326634e-06, + "loss": 0.0085, + "step": 16600 + }, + { + "epoch": 5.58, + "grad_norm": 1.988454818725586, + "learning_rate": 8.380100502512563e-06, + "loss": 0.0097, + "step": 16625 + }, + { + "epoch": 5.59, + "grad_norm": 1.3841418027877808, + "learning_rate": 8.377587939698493e-06, + "loss": 0.0097, + "step": 16650 + }, + { + "epoch": 5.6, + "grad_norm": 1.286037802696228, + "learning_rate": 8.375075376884424e-06, + "loss": 0.0095, + "step": 16675 + }, + { + "epoch": 5.61, + "grad_norm": 1.8321795463562012, + "learning_rate": 8.372562814070353e-06, + "loss": 0.0102, + "step": 16700 + }, + { + "epoch": 5.62, + "grad_norm": 1.3074469566345215, + "learning_rate": 8.370050251256282e-06, + "loss": 0.0104, + "step": 16725 + }, + { + "epoch": 5.62, + "grad_norm": 1.7363487482070923, + "learning_rate": 8.367537688442212e-06, + "loss": 0.01, + "step": 16750 + }, + { + "epoch": 5.63, + "grad_norm": 2.446805953979492, + "learning_rate": 8.365025125628141e-06, + "loss": 0.0117, + "step": 16775 + }, + { + "epoch": 5.64, + "grad_norm": 1.3895010948181152, + "learning_rate": 8.362512562814072e-06, + "loss": 0.0106, + "step": 16800 + }, + { + "epoch": 5.65, + "grad_norm": 2.303978443145752, + "learning_rate": 8.36e-06, + "loss": 0.0093, + "step": 16825 + }, + { + "epoch": 5.66, + "grad_norm": 1.615275263786316, + "learning_rate": 8.35748743718593e-06, + "loss": 0.0104, + "step": 16850 + }, + { + "epoch": 5.67, + "grad_norm": 1.887229323387146, + "learning_rate": 8.35497487437186e-06, + "loss": 0.0116, + "step": 16875 + }, + { + "epoch": 5.67, + "grad_norm": 1.9556479454040527, + "learning_rate": 8.35246231155779e-06, + "loss": 0.01, + "step": 16900 + }, + { + "epoch": 5.68, + "grad_norm": 1.2290297746658325, + "learning_rate": 8.34994974874372e-06, + "loss": 0.0093, + "step": 16925 + }, + { + "epoch": 5.69, + "grad_norm": 0.8272302746772766, + "learning_rate": 8.34743718592965e-06, + "loss": 0.0096, + "step": 16950 + }, + { + "epoch": 5.7, + "grad_norm": 1.9584299325942993, + "learning_rate": 8.344924623115579e-06, + "loss": 0.0098, + "step": 16975 + }, + { + "epoch": 5.71, + "grad_norm": 0.9767048358917236, + "learning_rate": 8.342412060301508e-06, + "loss": 0.0096, + "step": 17000 + }, + { + "epoch": 5.71, + "eval_loss": 0.2490449696779251, + "eval_runtime": 1163.5407, + "eval_samples_per_second": 1.23, + "eval_steps_per_second": 1.23, + "eval_wer": 22.86272923408846, + "step": 17000 + }, + { + "epoch": 5.72, + "grad_norm": 2.144178867340088, + "learning_rate": 8.339899497487438e-06, + "loss": 0.0095, + "step": 17025 + }, + { + "epoch": 5.73, + "grad_norm": 1.332624912261963, + "learning_rate": 8.337386934673367e-06, + "loss": 0.0111, + "step": 17050 + }, + { + "epoch": 5.73, + "grad_norm": 1.2533376216888428, + "learning_rate": 8.334874371859298e-06, + "loss": 0.0098, + "step": 17075 + }, + { + "epoch": 5.74, + "grad_norm": 1.3574457168579102, + "learning_rate": 8.332361809045226e-06, + "loss": 0.0104, + "step": 17100 + }, + { + "epoch": 5.75, + "grad_norm": 2.0363218784332275, + "learning_rate": 8.329849246231157e-06, + "loss": 0.0102, + "step": 17125 + }, + { + "epoch": 5.76, + "grad_norm": 2.148798942565918, + "learning_rate": 8.327336683417086e-06, + "loss": 0.0111, + "step": 17150 + }, + { + "epoch": 5.77, + "grad_norm": 1.7296149730682373, + "learning_rate": 8.324824120603015e-06, + "loss": 0.0094, + "step": 17175 + }, + { + "epoch": 5.78, + "grad_norm": 1.4388765096664429, + "learning_rate": 8.322311557788946e-06, + "loss": 0.0114, + "step": 17200 + }, + { + "epoch": 5.78, + "grad_norm": 1.191123604774475, + "learning_rate": 8.319798994974876e-06, + "loss": 0.0091, + "step": 17225 + }, + { + "epoch": 5.79, + "grad_norm": 1.878985047340393, + "learning_rate": 8.317286432160805e-06, + "loss": 0.0115, + "step": 17250 + }, + { + "epoch": 5.8, + "grad_norm": 1.9197694063186646, + "learning_rate": 8.314773869346734e-06, + "loss": 0.0102, + "step": 17275 + }, + { + "epoch": 5.81, + "grad_norm": 1.7159335613250732, + "learning_rate": 8.312261306532663e-06, + "loss": 0.0101, + "step": 17300 + }, + { + "epoch": 5.82, + "grad_norm": 1.1487634181976318, + "learning_rate": 8.309748743718595e-06, + "loss": 0.0098, + "step": 17325 + }, + { + "epoch": 5.83, + "grad_norm": 1.5059070587158203, + "learning_rate": 8.307236180904524e-06, + "loss": 0.0095, + "step": 17350 + }, + { + "epoch": 5.83, + "grad_norm": 1.0859476327896118, + "learning_rate": 8.304723618090453e-06, + "loss": 0.0102, + "step": 17375 + }, + { + "epoch": 5.84, + "grad_norm": 1.6882295608520508, + "learning_rate": 8.302211055276382e-06, + "loss": 0.0098, + "step": 17400 + }, + { + "epoch": 5.85, + "grad_norm": 1.2606046199798584, + "learning_rate": 8.299698492462312e-06, + "loss": 0.0095, + "step": 17425 + }, + { + "epoch": 5.86, + "grad_norm": 1.4468713998794556, + "learning_rate": 8.297185929648241e-06, + "loss": 0.0095, + "step": 17450 + }, + { + "epoch": 5.87, + "grad_norm": 1.4419797658920288, + "learning_rate": 8.294673366834172e-06, + "loss": 0.0105, + "step": 17475 + }, + { + "epoch": 5.88, + "grad_norm": 1.4575175046920776, + "learning_rate": 8.292160804020101e-06, + "loss": 0.0104, + "step": 17500 + }, + { + "epoch": 5.88, + "grad_norm": 1.8338183164596558, + "learning_rate": 8.28964824120603e-06, + "loss": 0.0103, + "step": 17525 + }, + { + "epoch": 5.89, + "grad_norm": 1.2326171398162842, + "learning_rate": 8.287135678391962e-06, + "loss": 0.0096, + "step": 17550 + }, + { + "epoch": 5.9, + "grad_norm": 1.3379076719284058, + "learning_rate": 8.28462311557789e-06, + "loss": 0.0112, + "step": 17575 + }, + { + "epoch": 5.91, + "grad_norm": 2.0886213779449463, + "learning_rate": 8.28211055276382e-06, + "loss": 0.0088, + "step": 17600 + }, + { + "epoch": 5.92, + "grad_norm": 1.3104745149612427, + "learning_rate": 8.27959798994975e-06, + "loss": 0.0096, + "step": 17625 + }, + { + "epoch": 5.93, + "grad_norm": 1.0289757251739502, + "learning_rate": 8.277085427135679e-06, + "loss": 0.0109, + "step": 17650 + }, + { + "epoch": 5.94, + "grad_norm": 1.9728070497512817, + "learning_rate": 8.274572864321608e-06, + "loss": 0.0096, + "step": 17675 + }, + { + "epoch": 5.94, + "grad_norm": 1.6403915882110596, + "learning_rate": 8.272060301507538e-06, + "loss": 0.0106, + "step": 17700 + }, + { + "epoch": 5.95, + "grad_norm": 1.690259337425232, + "learning_rate": 8.269547738693467e-06, + "loss": 0.0094, + "step": 17725 + }, + { + "epoch": 5.96, + "grad_norm": 2.1658172607421875, + "learning_rate": 8.267035175879398e-06, + "loss": 0.0101, + "step": 17750 + }, + { + "epoch": 5.97, + "grad_norm": 1.4383872747421265, + "learning_rate": 8.264522613065327e-06, + "loss": 0.0104, + "step": 17775 + }, + { + "epoch": 5.98, + "grad_norm": 0.9239019155502319, + "learning_rate": 8.262010050251257e-06, + "loss": 0.0097, + "step": 17800 + }, + { + "epoch": 5.99, + "grad_norm": 1.193589448928833, + "learning_rate": 8.259497487437188e-06, + "loss": 0.0099, + "step": 17825 + }, + { + "epoch": 5.99, + "grad_norm": 1.991866946220398, + "learning_rate": 8.256984924623115e-06, + "loss": 0.0099, + "step": 17850 + }, + { + "epoch": 6.0, + "grad_norm": 0.9639148116111755, + "learning_rate": 8.254472361809046e-06, + "loss": 0.0091, + "step": 17875 + }, + { + "epoch": 6.01, + "grad_norm": 0.8314787745475769, + "learning_rate": 8.251959798994976e-06, + "loss": 0.0056, + "step": 17900 + }, + { + "epoch": 6.02, + "grad_norm": 0.6857895851135254, + "learning_rate": 8.249447236180905e-06, + "loss": 0.0058, + "step": 17925 + }, + { + "epoch": 6.03, + "grad_norm": 1.2608420848846436, + "learning_rate": 8.246934673366836e-06, + "loss": 0.0057, + "step": 17950 + }, + { + "epoch": 6.04, + "grad_norm": 1.0544570684432983, + "learning_rate": 8.244422110552764e-06, + "loss": 0.0057, + "step": 17975 + }, + { + "epoch": 6.04, + "grad_norm": 1.4456110000610352, + "learning_rate": 8.241909547738695e-06, + "loss": 0.0052, + "step": 18000 + }, + { + "epoch": 6.04, + "eval_loss": 0.2437412291765213, + "eval_runtime": 1426.6437, + "eval_samples_per_second": 1.003, + "eval_steps_per_second": 1.003, + "eval_wer": 21.069309600862997, + "step": 18000 + } + ], + "logging_steps": 25, + "max_steps": 100000, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 1000, + "total_flos": 1.6619748638588928e+20, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/whisper-small/magahi/checkpoint-18000/training_args.bin b/checkpoints/whisper-small/magahi/checkpoint-18000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..270b892563368b7e4ebb27b06920704a61e3789a --- /dev/null +++ b/checkpoints/whisper-small/magahi/checkpoint-18000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d009372cf4c2970fdc43e9ca31d0693d81c2de494c2b288225380901e0cb814 +size 4667 diff --git a/checkpoints/whisper-small/maithili/checkpoint-47000/config.json b/checkpoints/whisper-small/maithili/checkpoint-47000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8cb289bfc120ad77c5505b0ef210c56bf35075f5 --- /dev/null +++ b/checkpoints/whisper-small/maithili/checkpoint-47000/config.json @@ -0,0 +1,152 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50276 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50360, + 50361, + 50362 + ], + "torch_dtype": "float32", + "transformers_version": "4.39.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoints/whisper-small/maithili/checkpoint-47000/generation_config.json b/checkpoints/whisper-small/maithili/checkpoint-47000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7e9b1a3e3b5fb8d88730860d2b25f6cd310962c7 --- /dev/null +++ b/checkpoints/whisper-small/maithili/checkpoint-47000/generation_config.json @@ -0,0 +1,264 @@ +{ + "alignment_heads": [ + [ + 5, + 3 + ], + [ + 5, + 9 + ], + [ + 8, + 0 + ], + [ + 8, + 4 + ], + [ + 8, + 7 + ], + [ + 8, + 8 + ], + [ + 9, + 0 + ], + [ + 9, + 7 + ], + [ + 9, + 9 + ], + [ + 10, + 5 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.39.2" +} diff --git a/checkpoints/whisper-small/maithili/checkpoint-47000/model.safetensors b/checkpoints/whisper-small/maithili/checkpoint-47000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9ebc12078230883e892706328a333af565a4ec4d --- /dev/null +++ b/checkpoints/whisper-small/maithili/checkpoint-47000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:829da7ee8fa71c9c04e7d2c78d97865f0f7a9262ce7a42310ac22f29688b67b6 +size 966995080 diff --git a/checkpoints/whisper-small/maithili/checkpoint-47000/optimizer.pt b/checkpoints/whisper-small/maithili/checkpoint-47000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3d8f514d1c0588720b117882e2c5fe63352588b5 --- /dev/null +++ b/checkpoints/whisper-small/maithili/checkpoint-47000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b012b1391c43e9ce05621f269a94e62ace8d0aec943f404707895208d1e1db9a +size 1925063607 diff --git a/checkpoints/whisper-small/maithili/checkpoint-47000/preprocessor_config.json b/checkpoints/whisper-small/maithili/checkpoint-47000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoints/whisper-small/maithili/checkpoint-47000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoints/whisper-small/maithili/checkpoint-47000/rng_state.pth b/checkpoints/whisper-small/maithili/checkpoint-47000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f1a20393cbb5a6dbc4003ebf73123eb4a4aee452 --- /dev/null +++ b/checkpoints/whisper-small/maithili/checkpoint-47000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46f2a6c537aee530cb01ca4196abe615367d65c29457f7394bf8a82352cf7223 +size 14575 diff --git a/checkpoints/whisper-small/maithili/checkpoint-47000/scheduler.pt b/checkpoints/whisper-small/maithili/checkpoint-47000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2e45ebcb778fe3fc32ec8fc00fd52fc77f6c7199 --- /dev/null +++ b/checkpoints/whisper-small/maithili/checkpoint-47000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e89c1805bb349e11209419ed17b01fe2848b5da029f6d7d0cfa17ef7a26ac233 +size 627 diff --git a/checkpoints/whisper-small/maithili/checkpoint-47000/trainer_state.json b/checkpoints/whisper-small/maithili/checkpoint-47000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..33f95473a9ef1d8c05d8b0667a1cd125f0a7476a --- /dev/null +++ b/checkpoints/whisper-small/maithili/checkpoint-47000/trainer_state.json @@ -0,0 +1,13604 @@ +{ + "best_metric": 18.7409200968523, + "best_model_checkpoint": "results/whisper-small/maithili/checkpoint-37000", + "epoch": 15.782404298186702, + "eval_steps": 1000, + "global_step": 47000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "grad_norm": 39.73728561401367, + "learning_rate": 4.4e-07, + "loss": 2.173, + "step": 25 + }, + { + "epoch": 0.02, + "grad_norm": 12.378250122070312, + "learning_rate": 9.400000000000001e-07, + "loss": 1.7046, + "step": 50 + }, + { + "epoch": 0.03, + "grad_norm": 7.275376319885254, + "learning_rate": 1.44e-06, + "loss": 1.1677, + "step": 75 + }, + { + "epoch": 0.03, + "grad_norm": 5.86676025390625, + "learning_rate": 1.94e-06, + "loss": 0.8734, + "step": 100 + }, + { + "epoch": 0.04, + "grad_norm": 6.0481648445129395, + "learning_rate": 2.4400000000000004e-06, + "loss": 0.7826, + "step": 125 + }, + { + "epoch": 0.05, + "grad_norm": 5.926331520080566, + "learning_rate": 2.9400000000000002e-06, + "loss": 0.7014, + "step": 150 + }, + { + "epoch": 0.06, + "grad_norm": 5.258420944213867, + "learning_rate": 3.44e-06, + "loss": 0.619, + "step": 175 + }, + { + "epoch": 0.07, + "grad_norm": 5.1574387550354, + "learning_rate": 3.94e-06, + "loss": 0.5734, + "step": 200 + }, + { + "epoch": 0.08, + "grad_norm": 6.101303577423096, + "learning_rate": 4.440000000000001e-06, + "loss": 0.5415, + "step": 225 + }, + { + "epoch": 0.08, + "grad_norm": 4.921416282653809, + "learning_rate": 4.94e-06, + "loss": 0.4952, + "step": 250 + }, + { + "epoch": 0.09, + "grad_norm": 5.394608974456787, + "learning_rate": 5.4400000000000004e-06, + "loss": 0.4611, + "step": 275 + }, + { + "epoch": 0.1, + "grad_norm": 5.153737545013428, + "learning_rate": 5.94e-06, + "loss": 0.4073, + "step": 300 + }, + { + "epoch": 0.11, + "grad_norm": 4.0041961669921875, + "learning_rate": 6.440000000000001e-06, + "loss": 0.3481, + "step": 325 + }, + { + "epoch": 0.12, + "grad_norm": 3.877138376235962, + "learning_rate": 6.9400000000000005e-06, + "loss": 0.325, + "step": 350 + }, + { + "epoch": 0.13, + "grad_norm": 3.7406184673309326, + "learning_rate": 7.440000000000001e-06, + "loss": 0.3004, + "step": 375 + }, + { + "epoch": 0.13, + "grad_norm": 4.0310378074646, + "learning_rate": 7.94e-06, + "loss": 0.3063, + "step": 400 + }, + { + "epoch": 0.14, + "grad_norm": 3.819737672805786, + "learning_rate": 8.44e-06, + "loss": 0.2921, + "step": 425 + }, + { + "epoch": 0.15, + "grad_norm": 3.841536521911621, + "learning_rate": 8.94e-06, + "loss": 0.2824, + "step": 450 + }, + { + "epoch": 0.16, + "grad_norm": 3.840888738632202, + "learning_rate": 9.440000000000001e-06, + "loss": 0.2658, + "step": 475 + }, + { + "epoch": 0.17, + "grad_norm": 3.957975149154663, + "learning_rate": 9.940000000000001e-06, + "loss": 0.2579, + "step": 500 + }, + { + "epoch": 0.18, + "grad_norm": 2.91758131980896, + "learning_rate": 9.997788944723618e-06, + "loss": 0.25, + "step": 525 + }, + { + "epoch": 0.18, + "grad_norm": 3.779358148574829, + "learning_rate": 9.99527638190955e-06, + "loss": 0.2495, + "step": 550 + }, + { + "epoch": 0.19, + "grad_norm": 3.6938350200653076, + "learning_rate": 9.992763819095477e-06, + "loss": 0.2463, + "step": 575 + }, + { + "epoch": 0.2, + "grad_norm": 3.2886269092559814, + "learning_rate": 9.990251256281408e-06, + "loss": 0.2318, + "step": 600 + }, + { + "epoch": 0.21, + "grad_norm": 3.177528142929077, + "learning_rate": 9.987738693467337e-06, + "loss": 0.2313, + "step": 625 + }, + { + "epoch": 0.22, + "grad_norm": 2.8503806591033936, + "learning_rate": 9.985226130653267e-06, + "loss": 0.2346, + "step": 650 + }, + { + "epoch": 0.23, + "grad_norm": 2.749896764755249, + "learning_rate": 9.982713567839198e-06, + "loss": 0.2234, + "step": 675 + }, + { + "epoch": 0.24, + "grad_norm": 3.356577157974243, + "learning_rate": 9.980201005025127e-06, + "loss": 0.2207, + "step": 700 + }, + { + "epoch": 0.24, + "grad_norm": 3.2268624305725098, + "learning_rate": 9.977688442211056e-06, + "loss": 0.2227, + "step": 725 + }, + { + "epoch": 0.25, + "grad_norm": 3.688149929046631, + "learning_rate": 9.975175879396986e-06, + "loss": 0.21, + "step": 750 + }, + { + "epoch": 0.26, + "grad_norm": 3.3108460903167725, + "learning_rate": 9.972663316582915e-06, + "loss": 0.2061, + "step": 775 + }, + { + "epoch": 0.27, + "grad_norm": 3.251323938369751, + "learning_rate": 9.970150753768844e-06, + "loss": 0.2071, + "step": 800 + }, + { + "epoch": 0.28, + "grad_norm": 3.0427768230438232, + "learning_rate": 9.967638190954775e-06, + "loss": 0.1968, + "step": 825 + }, + { + "epoch": 0.29, + "grad_norm": 3.4174671173095703, + "learning_rate": 9.965125628140703e-06, + "loss": 0.1933, + "step": 850 + }, + { + "epoch": 0.29, + "grad_norm": 3.2404325008392334, + "learning_rate": 9.962613065326634e-06, + "loss": 0.2026, + "step": 875 + }, + { + "epoch": 0.3, + "grad_norm": 3.457878828048706, + "learning_rate": 9.960100502512563e-06, + "loss": 0.2041, + "step": 900 + }, + { + "epoch": 0.31, + "grad_norm": 3.118767023086548, + "learning_rate": 9.957587939698493e-06, + "loss": 0.1942, + "step": 925 + }, + { + "epoch": 0.32, + "grad_norm": 3.290300130844116, + "learning_rate": 9.955075376884424e-06, + "loss": 0.1921, + "step": 950 + }, + { + "epoch": 0.33, + "grad_norm": 2.682091236114502, + "learning_rate": 9.952562814070353e-06, + "loss": 0.1859, + "step": 975 + }, + { + "epoch": 0.34, + "grad_norm": 2.9445226192474365, + "learning_rate": 9.950050251256282e-06, + "loss": 0.183, + "step": 1000 + }, + { + "epoch": 0.34, + "eval_loss": 0.17240363359451294, + "eval_runtime": 1159.7286, + "eval_samples_per_second": 1.215, + "eval_steps_per_second": 1.215, + "eval_wer": 27.063299896229676, + "step": 1000 + }, + { + "epoch": 0.34, + "grad_norm": 2.873839855194092, + "learning_rate": 9.947537688442212e-06, + "loss": 0.1833, + "step": 1025 + }, + { + "epoch": 0.35, + "grad_norm": 3.2827227115631104, + "learning_rate": 9.945025125628141e-06, + "loss": 0.1814, + "step": 1050 + }, + { + "epoch": 0.36, + "grad_norm": 2.5443227291107178, + "learning_rate": 9.94251256281407e-06, + "loss": 0.1822, + "step": 1075 + }, + { + "epoch": 0.37, + "grad_norm": 2.54311203956604, + "learning_rate": 9.940000000000001e-06, + "loss": 0.1857, + "step": 1100 + }, + { + "epoch": 0.38, + "grad_norm": 2.750683546066284, + "learning_rate": 9.93748743718593e-06, + "loss": 0.1819, + "step": 1125 + }, + { + "epoch": 0.39, + "grad_norm": 3.255824327468872, + "learning_rate": 9.93497487437186e-06, + "loss": 0.1795, + "step": 1150 + }, + { + "epoch": 0.39, + "grad_norm": 2.7917044162750244, + "learning_rate": 9.93246231155779e-06, + "loss": 0.1713, + "step": 1175 + }, + { + "epoch": 0.4, + "grad_norm": 2.924205780029297, + "learning_rate": 9.929949748743719e-06, + "loss": 0.1701, + "step": 1200 + }, + { + "epoch": 0.41, + "grad_norm": 3.649719476699829, + "learning_rate": 9.92743718592965e-06, + "loss": 0.1775, + "step": 1225 + }, + { + "epoch": 0.42, + "grad_norm": 3.1156651973724365, + "learning_rate": 9.924924623115579e-06, + "loss": 0.1722, + "step": 1250 + }, + { + "epoch": 0.43, + "grad_norm": 2.7341501712799072, + "learning_rate": 9.922412060301508e-06, + "loss": 0.1591, + "step": 1275 + }, + { + "epoch": 0.44, + "grad_norm": 2.2841298580169678, + "learning_rate": 9.91989949748744e-06, + "loss": 0.1651, + "step": 1300 + }, + { + "epoch": 0.44, + "grad_norm": 2.838805913925171, + "learning_rate": 9.917386934673367e-06, + "loss": 0.1627, + "step": 1325 + }, + { + "epoch": 0.45, + "grad_norm": 2.5481374263763428, + "learning_rate": 9.914874371859298e-06, + "loss": 0.1671, + "step": 1350 + }, + { + "epoch": 0.46, + "grad_norm": 2.5288920402526855, + "learning_rate": 9.912361809045227e-06, + "loss": 0.1623, + "step": 1375 + }, + { + "epoch": 0.47, + "grad_norm": 2.9147531986236572, + "learning_rate": 9.909849246231157e-06, + "loss": 0.1659, + "step": 1400 + }, + { + "epoch": 0.48, + "grad_norm": 2.9429116249084473, + "learning_rate": 9.907336683417086e-06, + "loss": 0.1559, + "step": 1425 + }, + { + "epoch": 0.49, + "grad_norm": 2.872370958328247, + "learning_rate": 9.904824120603015e-06, + "loss": 0.1692, + "step": 1450 + }, + { + "epoch": 0.5, + "grad_norm": 2.7327332496643066, + "learning_rate": 9.902311557788945e-06, + "loss": 0.1635, + "step": 1475 + }, + { + "epoch": 0.5, + "grad_norm": 2.7440950870513916, + "learning_rate": 9.899798994974876e-06, + "loss": 0.1559, + "step": 1500 + }, + { + "epoch": 0.51, + "grad_norm": 2.6842617988586426, + "learning_rate": 9.897286432160805e-06, + "loss": 0.1551, + "step": 1525 + }, + { + "epoch": 0.52, + "grad_norm": 3.0940253734588623, + "learning_rate": 9.894773869346734e-06, + "loss": 0.1513, + "step": 1550 + }, + { + "epoch": 0.53, + "grad_norm": 2.6800715923309326, + "learning_rate": 9.892261306532665e-06, + "loss": 0.1544, + "step": 1575 + }, + { + "epoch": 0.54, + "grad_norm": 2.7565371990203857, + "learning_rate": 9.889748743718593e-06, + "loss": 0.1521, + "step": 1600 + }, + { + "epoch": 0.55, + "grad_norm": 2.308212995529175, + "learning_rate": 9.887236180904524e-06, + "loss": 0.156, + "step": 1625 + }, + { + "epoch": 0.55, + "grad_norm": 2.8361761569976807, + "learning_rate": 9.884723618090453e-06, + "loss": 0.1565, + "step": 1650 + }, + { + "epoch": 0.56, + "grad_norm": 3.1111385822296143, + "learning_rate": 9.882211055276383e-06, + "loss": 0.1518, + "step": 1675 + }, + { + "epoch": 0.57, + "grad_norm": 2.5291364192962646, + "learning_rate": 9.879698492462312e-06, + "loss": 0.1479, + "step": 1700 + }, + { + "epoch": 0.58, + "grad_norm": 2.814749240875244, + "learning_rate": 9.877185929648241e-06, + "loss": 0.141, + "step": 1725 + }, + { + "epoch": 0.59, + "grad_norm": 2.4505727291107178, + "learning_rate": 9.874673366834172e-06, + "loss": 0.1487, + "step": 1750 + }, + { + "epoch": 0.6, + "grad_norm": 2.551849842071533, + "learning_rate": 9.872160804020102e-06, + "loss": 0.1458, + "step": 1775 + }, + { + "epoch": 0.6, + "grad_norm": 2.5314488410949707, + "learning_rate": 9.869648241206031e-06, + "loss": 0.1503, + "step": 1800 + }, + { + "epoch": 0.61, + "grad_norm": 2.8743624687194824, + "learning_rate": 9.86713567839196e-06, + "loss": 0.1417, + "step": 1825 + }, + { + "epoch": 0.62, + "grad_norm": 2.543147563934326, + "learning_rate": 9.864623115577891e-06, + "loss": 0.1441, + "step": 1850 + }, + { + "epoch": 0.63, + "grad_norm": 2.8325717449188232, + "learning_rate": 9.862110552763819e-06, + "loss": 0.1483, + "step": 1875 + }, + { + "epoch": 0.64, + "grad_norm": 2.821229934692383, + "learning_rate": 9.85959798994975e-06, + "loss": 0.1421, + "step": 1900 + }, + { + "epoch": 0.65, + "grad_norm": 2.813852310180664, + "learning_rate": 9.85708542713568e-06, + "loss": 0.1464, + "step": 1925 + }, + { + "epoch": 0.65, + "grad_norm": 2.6710205078125, + "learning_rate": 9.854572864321609e-06, + "loss": 0.1404, + "step": 1950 + }, + { + "epoch": 0.66, + "grad_norm": 2.504669666290283, + "learning_rate": 9.85206030150754e-06, + "loss": 0.142, + "step": 1975 + }, + { + "epoch": 0.67, + "grad_norm": 2.6456685066223145, + "learning_rate": 9.849547738693467e-06, + "loss": 0.1385, + "step": 2000 + }, + { + "epoch": 0.67, + "eval_loss": 0.14223554730415344, + "eval_runtime": 1133.6945, + "eval_samples_per_second": 1.243, + "eval_steps_per_second": 1.243, + "eval_wer": 23.037011414735385, + "step": 2000 + }, + { + "epoch": 0.68, + "grad_norm": 2.337949514389038, + "learning_rate": 9.847035175879398e-06, + "loss": 0.1355, + "step": 2025 + }, + { + "epoch": 0.69, + "grad_norm": 2.315049171447754, + "learning_rate": 9.844522613065328e-06, + "loss": 0.1371, + "step": 2050 + }, + { + "epoch": 0.7, + "grad_norm": 2.985461473464966, + "learning_rate": 9.842010050251257e-06, + "loss": 0.1424, + "step": 2075 + }, + { + "epoch": 0.71, + "grad_norm": 2.7826623916625977, + "learning_rate": 9.839497487437186e-06, + "loss": 0.1346, + "step": 2100 + }, + { + "epoch": 0.71, + "grad_norm": 2.7374391555786133, + "learning_rate": 9.836984924623117e-06, + "loss": 0.138, + "step": 2125 + }, + { + "epoch": 0.72, + "grad_norm": 2.5535800457000732, + "learning_rate": 9.834472361809047e-06, + "loss": 0.1336, + "step": 2150 + }, + { + "epoch": 0.73, + "grad_norm": 2.709087371826172, + "learning_rate": 9.831959798994976e-06, + "loss": 0.1337, + "step": 2175 + }, + { + "epoch": 0.74, + "grad_norm": 2.2993767261505127, + "learning_rate": 9.829447236180905e-06, + "loss": 0.1413, + "step": 2200 + }, + { + "epoch": 0.75, + "grad_norm": 2.410872220993042, + "learning_rate": 9.826934673366834e-06, + "loss": 0.1323, + "step": 2225 + }, + { + "epoch": 0.76, + "grad_norm": 2.5389323234558105, + "learning_rate": 9.824422110552766e-06, + "loss": 0.1314, + "step": 2250 + }, + { + "epoch": 0.76, + "grad_norm": 2.6514055728912354, + "learning_rate": 9.821909547738693e-06, + "loss": 0.1354, + "step": 2275 + }, + { + "epoch": 0.77, + "grad_norm": 2.604208469390869, + "learning_rate": 9.819396984924624e-06, + "loss": 0.1296, + "step": 2300 + }, + { + "epoch": 0.78, + "grad_norm": 2.539126396179199, + "learning_rate": 9.816884422110553e-06, + "loss": 0.1362, + "step": 2325 + }, + { + "epoch": 0.79, + "grad_norm": 2.744215726852417, + "learning_rate": 9.814371859296483e-06, + "loss": 0.1291, + "step": 2350 + }, + { + "epoch": 0.8, + "grad_norm": 2.2987353801727295, + "learning_rate": 9.811859296482414e-06, + "loss": 0.1274, + "step": 2375 + }, + { + "epoch": 0.81, + "grad_norm": 2.9888525009155273, + "learning_rate": 9.809346733668343e-06, + "loss": 0.131, + "step": 2400 + }, + { + "epoch": 0.81, + "grad_norm": 2.4542999267578125, + "learning_rate": 9.806834170854272e-06, + "loss": 0.1336, + "step": 2425 + }, + { + "epoch": 0.82, + "grad_norm": 2.657578706741333, + "learning_rate": 9.804321608040202e-06, + "loss": 0.1252, + "step": 2450 + }, + { + "epoch": 0.83, + "grad_norm": 2.250540256500244, + "learning_rate": 9.801809045226131e-06, + "loss": 0.1288, + "step": 2475 + }, + { + "epoch": 0.84, + "grad_norm": 2.9292681217193604, + "learning_rate": 9.79929648241206e-06, + "loss": 0.1277, + "step": 2500 + }, + { + "epoch": 0.85, + "grad_norm": 2.657921552658081, + "learning_rate": 9.796783919597991e-06, + "loss": 0.1248, + "step": 2525 + }, + { + "epoch": 0.86, + "grad_norm": 2.5667836666107178, + "learning_rate": 9.79427135678392e-06, + "loss": 0.1305, + "step": 2550 + }, + { + "epoch": 0.86, + "grad_norm": 2.048830270767212, + "learning_rate": 9.79175879396985e-06, + "loss": 0.1296, + "step": 2575 + }, + { + "epoch": 0.87, + "grad_norm": 2.6684229373931885, + "learning_rate": 9.78924623115578e-06, + "loss": 0.1279, + "step": 2600 + }, + { + "epoch": 0.88, + "grad_norm": 2.2407984733581543, + "learning_rate": 9.786733668341709e-06, + "loss": 0.1165, + "step": 2625 + }, + { + "epoch": 0.89, + "grad_norm": 2.3654708862304688, + "learning_rate": 9.78422110552764e-06, + "loss": 0.1233, + "step": 2650 + }, + { + "epoch": 0.9, + "grad_norm": 2.657275438308716, + "learning_rate": 9.781708542713569e-06, + "loss": 0.1278, + "step": 2675 + }, + { + "epoch": 0.91, + "grad_norm": 2.3946664333343506, + "learning_rate": 9.779195979899498e-06, + "loss": 0.1215, + "step": 2700 + }, + { + "epoch": 0.92, + "grad_norm": 2.002594232559204, + "learning_rate": 9.776683417085428e-06, + "loss": 0.1169, + "step": 2725 + }, + { + "epoch": 0.92, + "grad_norm": 2.3366785049438477, + "learning_rate": 9.774170854271357e-06, + "loss": 0.1149, + "step": 2750 + }, + { + "epoch": 0.93, + "grad_norm": 2.594217538833618, + "learning_rate": 9.771658291457288e-06, + "loss": 0.115, + "step": 2775 + }, + { + "epoch": 0.94, + "grad_norm": 2.208904504776001, + "learning_rate": 9.769145728643217e-06, + "loss": 0.1156, + "step": 2800 + }, + { + "epoch": 0.95, + "grad_norm": 2.3672680854797363, + "learning_rate": 9.766633165829147e-06, + "loss": 0.1217, + "step": 2825 + }, + { + "epoch": 0.96, + "grad_norm": 2.3146979808807373, + "learning_rate": 9.764120603015076e-06, + "loss": 0.1247, + "step": 2850 + }, + { + "epoch": 0.97, + "grad_norm": 2.3579561710357666, + "learning_rate": 9.761608040201005e-06, + "loss": 0.1171, + "step": 2875 + }, + { + "epoch": 0.97, + "grad_norm": 2.3683693408966064, + "learning_rate": 9.759095477386935e-06, + "loss": 0.1102, + "step": 2900 + }, + { + "epoch": 0.98, + "grad_norm": 1.865391731262207, + "learning_rate": 9.756582914572866e-06, + "loss": 0.1115, + "step": 2925 + }, + { + "epoch": 0.99, + "grad_norm": 2.1046814918518066, + "learning_rate": 9.754070351758795e-06, + "loss": 0.1186, + "step": 2950 + }, + { + "epoch": 1.0, + "grad_norm": 2.148693323135376, + "learning_rate": 9.751557788944724e-06, + "loss": 0.114, + "step": 2975 + }, + { + "epoch": 1.01, + "grad_norm": 2.259674072265625, + "learning_rate": 9.749045226130654e-06, + "loss": 0.0981, + "step": 3000 + }, + { + "epoch": 1.01, + "eval_loss": 0.129165917634964, + "eval_runtime": 1139.4698, + "eval_samples_per_second": 1.237, + "eval_steps_per_second": 1.237, + "eval_wer": 21.798685575925287, + "step": 3000 + }, + { + "epoch": 1.02, + "grad_norm": 2.2977211475372314, + "learning_rate": 9.746532663316583e-06, + "loss": 0.0996, + "step": 3025 + }, + { + "epoch": 1.02, + "grad_norm": 2.0338549613952637, + "learning_rate": 9.744020100502514e-06, + "loss": 0.0895, + "step": 3050 + }, + { + "epoch": 1.03, + "grad_norm": 1.9413831233978271, + "learning_rate": 9.741507537688443e-06, + "loss": 0.0909, + "step": 3075 + }, + { + "epoch": 1.04, + "grad_norm": 1.9627686738967896, + "learning_rate": 9.738994974874373e-06, + "loss": 0.089, + "step": 3100 + }, + { + "epoch": 1.05, + "grad_norm": 1.7762622833251953, + "learning_rate": 9.736482412060302e-06, + "loss": 0.0862, + "step": 3125 + }, + { + "epoch": 1.06, + "grad_norm": 2.2398643493652344, + "learning_rate": 9.733969849246231e-06, + "loss": 0.0891, + "step": 3150 + }, + { + "epoch": 1.07, + "grad_norm": 2.176713466644287, + "learning_rate": 9.731457286432162e-06, + "loss": 0.0966, + "step": 3175 + }, + { + "epoch": 1.07, + "grad_norm": 2.0719587802886963, + "learning_rate": 9.728944723618092e-06, + "loss": 0.0898, + "step": 3200 + }, + { + "epoch": 1.08, + "grad_norm": 2.1981639862060547, + "learning_rate": 9.726432160804021e-06, + "loss": 0.0913, + "step": 3225 + }, + { + "epoch": 1.09, + "grad_norm": 2.131726026535034, + "learning_rate": 9.72391959798995e-06, + "loss": 0.0873, + "step": 3250 + }, + { + "epoch": 1.1, + "grad_norm": 2.262434959411621, + "learning_rate": 9.721407035175881e-06, + "loss": 0.0939, + "step": 3275 + }, + { + "epoch": 1.11, + "grad_norm": 2.2906582355499268, + "learning_rate": 9.718894472361809e-06, + "loss": 0.0922, + "step": 3300 + }, + { + "epoch": 1.12, + "grad_norm": 2.1660315990448, + "learning_rate": 9.71638190954774e-06, + "loss": 0.0825, + "step": 3325 + }, + { + "epoch": 1.12, + "grad_norm": 2.021197557449341, + "learning_rate": 9.71386934673367e-06, + "loss": 0.0899, + "step": 3350 + }, + { + "epoch": 1.13, + "grad_norm": 2.1120781898498535, + "learning_rate": 9.711356783919599e-06, + "loss": 0.0909, + "step": 3375 + }, + { + "epoch": 1.14, + "grad_norm": 2.031515598297119, + "learning_rate": 9.70884422110553e-06, + "loss": 0.0827, + "step": 3400 + }, + { + "epoch": 1.15, + "grad_norm": 2.274888277053833, + "learning_rate": 9.706331658291457e-06, + "loss": 0.0865, + "step": 3425 + }, + { + "epoch": 1.16, + "grad_norm": 2.2474935054779053, + "learning_rate": 9.703819095477388e-06, + "loss": 0.087, + "step": 3450 + }, + { + "epoch": 1.17, + "grad_norm": 2.2877719402313232, + "learning_rate": 9.701306532663318e-06, + "loss": 0.088, + "step": 3475 + }, + { + "epoch": 1.18, + "grad_norm": 1.9595692157745361, + "learning_rate": 9.698793969849247e-06, + "loss": 0.0835, + "step": 3500 + }, + { + "epoch": 1.18, + "grad_norm": 2.163682222366333, + "learning_rate": 9.696281407035176e-06, + "loss": 0.0841, + "step": 3525 + }, + { + "epoch": 1.19, + "grad_norm": 2.4079339504241943, + "learning_rate": 9.693768844221107e-06, + "loss": 0.0925, + "step": 3550 + }, + { + "epoch": 1.2, + "grad_norm": 1.8654513359069824, + "learning_rate": 9.691256281407035e-06, + "loss": 0.0834, + "step": 3575 + }, + { + "epoch": 1.21, + "grad_norm": 2.1059200763702393, + "learning_rate": 9.688743718592966e-06, + "loss": 0.0848, + "step": 3600 + }, + { + "epoch": 1.22, + "grad_norm": 1.7615976333618164, + "learning_rate": 9.686231155778895e-06, + "loss": 0.0842, + "step": 3625 + }, + { + "epoch": 1.23, + "grad_norm": 1.8055098056793213, + "learning_rate": 9.683718592964825e-06, + "loss": 0.0868, + "step": 3650 + }, + { + "epoch": 1.23, + "grad_norm": 1.9537371397018433, + "learning_rate": 9.681206030150756e-06, + "loss": 0.0868, + "step": 3675 + }, + { + "epoch": 1.24, + "grad_norm": 2.2797625064849854, + "learning_rate": 9.678693467336683e-06, + "loss": 0.0822, + "step": 3700 + }, + { + "epoch": 1.25, + "grad_norm": 2.3578338623046875, + "learning_rate": 9.676180904522614e-06, + "loss": 0.0832, + "step": 3725 + }, + { + "epoch": 1.26, + "grad_norm": 2.4285616874694824, + "learning_rate": 9.673668341708544e-06, + "loss": 0.0867, + "step": 3750 + }, + { + "epoch": 1.27, + "grad_norm": 2.110700845718384, + "learning_rate": 9.671155778894473e-06, + "loss": 0.0794, + "step": 3775 + }, + { + "epoch": 1.28, + "grad_norm": 2.6813302040100098, + "learning_rate": 9.668643216080404e-06, + "loss": 0.0833, + "step": 3800 + }, + { + "epoch": 1.28, + "grad_norm": 2.1010050773620605, + "learning_rate": 9.666130653266333e-06, + "loss": 0.0803, + "step": 3825 + }, + { + "epoch": 1.29, + "grad_norm": 1.9233365058898926, + "learning_rate": 9.663618090452263e-06, + "loss": 0.085, + "step": 3850 + }, + { + "epoch": 1.3, + "grad_norm": 2.1443631649017334, + "learning_rate": 9.661105527638192e-06, + "loss": 0.0795, + "step": 3875 + }, + { + "epoch": 1.31, + "grad_norm": 2.643442392349243, + "learning_rate": 9.658592964824121e-06, + "loss": 0.0842, + "step": 3900 + }, + { + "epoch": 1.32, + "grad_norm": 1.9736140966415405, + "learning_rate": 9.65608040201005e-06, + "loss": 0.0862, + "step": 3925 + }, + { + "epoch": 1.33, + "grad_norm": 1.7952789068222046, + "learning_rate": 9.653567839195982e-06, + "loss": 0.0776, + "step": 3950 + }, + { + "epoch": 1.33, + "grad_norm": 2.705756664276123, + "learning_rate": 9.651055276381909e-06, + "loss": 0.083, + "step": 3975 + }, + { + "epoch": 1.34, + "grad_norm": 2.0052833557128906, + "learning_rate": 9.64854271356784e-06, + "loss": 0.0794, + "step": 4000 + }, + { + "epoch": 1.34, + "eval_loss": 0.12286534905433655, + "eval_runtime": 1140.4632, + "eval_samples_per_second": 1.235, + "eval_steps_per_second": 1.235, + "eval_wer": 22.158422691110342, + "step": 4000 + }, + { + "epoch": 1.35, + "grad_norm": 2.3845431804656982, + "learning_rate": 9.64603015075377e-06, + "loss": 0.0857, + "step": 4025 + }, + { + "epoch": 1.36, + "grad_norm": 1.749596357345581, + "learning_rate": 9.643517587939699e-06, + "loss": 0.0834, + "step": 4050 + }, + { + "epoch": 1.37, + "grad_norm": 2.1161949634552, + "learning_rate": 9.64100502512563e-06, + "loss": 0.0815, + "step": 4075 + }, + { + "epoch": 1.38, + "grad_norm": 2.0484824180603027, + "learning_rate": 9.638492462311559e-06, + "loss": 0.0811, + "step": 4100 + }, + { + "epoch": 1.39, + "grad_norm": 2.1350505352020264, + "learning_rate": 9.635979899497488e-06, + "loss": 0.0822, + "step": 4125 + }, + { + "epoch": 1.39, + "grad_norm": 2.062763214111328, + "learning_rate": 9.633467336683418e-06, + "loss": 0.0786, + "step": 4150 + }, + { + "epoch": 1.4, + "grad_norm": 2.1323740482330322, + "learning_rate": 9.630954773869347e-06, + "loss": 0.0831, + "step": 4175 + }, + { + "epoch": 1.41, + "grad_norm": 2.4143643379211426, + "learning_rate": 9.628442211055276e-06, + "loss": 0.0812, + "step": 4200 + }, + { + "epoch": 1.42, + "grad_norm": 2.039340019226074, + "learning_rate": 9.625929648241207e-06, + "loss": 0.0764, + "step": 4225 + }, + { + "epoch": 1.43, + "grad_norm": 2.4119646549224854, + "learning_rate": 9.623417085427137e-06, + "loss": 0.0795, + "step": 4250 + }, + { + "epoch": 1.44, + "grad_norm": 2.029726266860962, + "learning_rate": 9.620904522613066e-06, + "loss": 0.0784, + "step": 4275 + }, + { + "epoch": 1.44, + "grad_norm": 2.0107481479644775, + "learning_rate": 9.618391959798995e-06, + "loss": 0.0806, + "step": 4300 + }, + { + "epoch": 1.45, + "grad_norm": 1.9550453424453735, + "learning_rate": 9.615879396984925e-06, + "loss": 0.0805, + "step": 4325 + }, + { + "epoch": 1.46, + "grad_norm": 2.27432918548584, + "learning_rate": 9.613366834170856e-06, + "loss": 0.0775, + "step": 4350 + }, + { + "epoch": 1.47, + "grad_norm": 2.272207021713257, + "learning_rate": 9.610854271356785e-06, + "loss": 0.0795, + "step": 4375 + }, + { + "epoch": 1.48, + "grad_norm": 1.999552607536316, + "learning_rate": 9.608341708542714e-06, + "loss": 0.0802, + "step": 4400 + }, + { + "epoch": 1.49, + "grad_norm": 2.4127838611602783, + "learning_rate": 9.605829145728644e-06, + "loss": 0.0798, + "step": 4425 + }, + { + "epoch": 1.49, + "grad_norm": 2.055750608444214, + "learning_rate": 9.603316582914573e-06, + "loss": 0.0745, + "step": 4450 + }, + { + "epoch": 1.5, + "grad_norm": 2.6142053604125977, + "learning_rate": 9.600804020100504e-06, + "loss": 0.0765, + "step": 4475 + }, + { + "epoch": 1.51, + "grad_norm": 2.169769763946533, + "learning_rate": 9.598291457286433e-06, + "loss": 0.0794, + "step": 4500 + }, + { + "epoch": 1.52, + "grad_norm": 2.814781904220581, + "learning_rate": 9.595778894472363e-06, + "loss": 0.0765, + "step": 4525 + }, + { + "epoch": 1.53, + "grad_norm": 1.9292739629745483, + "learning_rate": 9.593266331658292e-06, + "loss": 0.0808, + "step": 4550 + }, + { + "epoch": 1.54, + "grad_norm": 2.4609463214874268, + "learning_rate": 9.590753768844221e-06, + "loss": 0.0797, + "step": 4575 + }, + { + "epoch": 1.54, + "grad_norm": 2.239750385284424, + "learning_rate": 9.58824120603015e-06, + "loss": 0.0775, + "step": 4600 + }, + { + "epoch": 1.55, + "grad_norm": 1.8403880596160889, + "learning_rate": 9.585728643216082e-06, + "loss": 0.0782, + "step": 4625 + }, + { + "epoch": 1.56, + "grad_norm": 1.947909951210022, + "learning_rate": 9.583216080402011e-06, + "loss": 0.0692, + "step": 4650 + }, + { + "epoch": 1.57, + "grad_norm": 2.1656014919281006, + "learning_rate": 9.58070351758794e-06, + "loss": 0.0743, + "step": 4675 + }, + { + "epoch": 1.58, + "grad_norm": 1.8881808519363403, + "learning_rate": 9.57819095477387e-06, + "loss": 0.0735, + "step": 4700 + }, + { + "epoch": 1.59, + "grad_norm": 2.051849365234375, + "learning_rate": 9.575678391959799e-06, + "loss": 0.0763, + "step": 4725 + }, + { + "epoch": 1.6, + "grad_norm": 1.9382680654525757, + "learning_rate": 9.57316582914573e-06, + "loss": 0.0796, + "step": 4750 + }, + { + "epoch": 1.6, + "grad_norm": 2.5144362449645996, + "learning_rate": 9.57065326633166e-06, + "loss": 0.0746, + "step": 4775 + }, + { + "epoch": 1.61, + "grad_norm": 1.8668988943099976, + "learning_rate": 9.568140703517589e-06, + "loss": 0.0741, + "step": 4800 + }, + { + "epoch": 1.62, + "grad_norm": 2.1542413234710693, + "learning_rate": 9.565628140703518e-06, + "loss": 0.0761, + "step": 4825 + }, + { + "epoch": 1.63, + "grad_norm": 1.8769080638885498, + "learning_rate": 9.563115577889447e-06, + "loss": 0.0721, + "step": 4850 + }, + { + "epoch": 1.64, + "grad_norm": 1.8785624504089355, + "learning_rate": 9.560603015075378e-06, + "loss": 0.0733, + "step": 4875 + }, + { + "epoch": 1.65, + "grad_norm": 1.9905091524124146, + "learning_rate": 9.558090452261308e-06, + "loss": 0.076, + "step": 4900 + }, + { + "epoch": 1.65, + "grad_norm": 1.6048266887664795, + "learning_rate": 9.555577889447237e-06, + "loss": 0.0771, + "step": 4925 + }, + { + "epoch": 1.66, + "grad_norm": 2.2200305461883545, + "learning_rate": 9.553065326633166e-06, + "loss": 0.074, + "step": 4950 + }, + { + "epoch": 1.67, + "grad_norm": 2.3125860691070557, + "learning_rate": 9.550552763819096e-06, + "loss": 0.0722, + "step": 4975 + }, + { + "epoch": 1.68, + "grad_norm": 2.1221413612365723, + "learning_rate": 9.548040201005025e-06, + "loss": 0.0756, + "step": 5000 + }, + { + "epoch": 1.68, + "eval_loss": 0.12283406406641006, + "eval_runtime": 1156.6482, + "eval_samples_per_second": 1.218, + "eval_steps_per_second": 1.218, + "eval_wer": 20.747146316153582, + "step": 5000 + }, + { + "epoch": 1.69, + "grad_norm": 2.2898526191711426, + "learning_rate": 9.545527638190956e-06, + "loss": 0.0696, + "step": 5025 + }, + { + "epoch": 1.7, + "grad_norm": 1.766842007637024, + "learning_rate": 9.543015075376885e-06, + "loss": 0.0715, + "step": 5050 + }, + { + "epoch": 1.7, + "grad_norm": 2.225700616836548, + "learning_rate": 9.540502512562815e-06, + "loss": 0.0702, + "step": 5075 + }, + { + "epoch": 1.71, + "grad_norm": 2.2052948474884033, + "learning_rate": 9.537989949748746e-06, + "loss": 0.0708, + "step": 5100 + }, + { + "epoch": 1.72, + "grad_norm": 1.7056403160095215, + "learning_rate": 9.535477386934673e-06, + "loss": 0.0665, + "step": 5125 + }, + { + "epoch": 1.73, + "grad_norm": 2.8999364376068115, + "learning_rate": 9.532964824120604e-06, + "loss": 0.0696, + "step": 5150 + }, + { + "epoch": 1.74, + "grad_norm": 2.419360399246216, + "learning_rate": 9.530452261306534e-06, + "loss": 0.0714, + "step": 5175 + }, + { + "epoch": 1.75, + "grad_norm": 1.9290015697479248, + "learning_rate": 9.527939698492463e-06, + "loss": 0.0715, + "step": 5200 + }, + { + "epoch": 1.75, + "grad_norm": 2.1120193004608154, + "learning_rate": 9.525427135678392e-06, + "loss": 0.0689, + "step": 5225 + }, + { + "epoch": 1.76, + "grad_norm": 2.5146524906158447, + "learning_rate": 9.522914572864322e-06, + "loss": 0.0709, + "step": 5250 + }, + { + "epoch": 1.77, + "grad_norm": 2.12321138381958, + "learning_rate": 9.520402010050253e-06, + "loss": 0.0685, + "step": 5275 + }, + { + "epoch": 1.78, + "grad_norm": 1.86492121219635, + "learning_rate": 9.517889447236182e-06, + "loss": 0.0695, + "step": 5300 + }, + { + "epoch": 1.79, + "grad_norm": 1.9847769737243652, + "learning_rate": 9.515376884422111e-06, + "loss": 0.071, + "step": 5325 + }, + { + "epoch": 1.8, + "grad_norm": 1.7826789617538452, + "learning_rate": 9.51286432160804e-06, + "loss": 0.0674, + "step": 5350 + }, + { + "epoch": 1.8, + "grad_norm": 2.098600149154663, + "learning_rate": 9.510351758793972e-06, + "loss": 0.069, + "step": 5375 + }, + { + "epoch": 1.81, + "grad_norm": 2.2986836433410645, + "learning_rate": 9.5078391959799e-06, + "loss": 0.0662, + "step": 5400 + }, + { + "epoch": 1.82, + "grad_norm": 2.072977066040039, + "learning_rate": 9.50532663316583e-06, + "loss": 0.0675, + "step": 5425 + }, + { + "epoch": 1.83, + "grad_norm": 2.501917600631714, + "learning_rate": 9.50281407035176e-06, + "loss": 0.0734, + "step": 5450 + }, + { + "epoch": 1.84, + "grad_norm": 2.4345171451568604, + "learning_rate": 9.500301507537689e-06, + "loss": 0.0698, + "step": 5475 + }, + { + "epoch": 1.85, + "grad_norm": 2.157480239868164, + "learning_rate": 9.49778894472362e-06, + "loss": 0.0675, + "step": 5500 + }, + { + "epoch": 1.86, + "grad_norm": 2.3825862407684326, + "learning_rate": 9.49527638190955e-06, + "loss": 0.0695, + "step": 5525 + }, + { + "epoch": 1.86, + "grad_norm": 2.4216952323913574, + "learning_rate": 9.492763819095479e-06, + "loss": 0.0678, + "step": 5550 + }, + { + "epoch": 1.87, + "grad_norm": 1.85026216506958, + "learning_rate": 9.490251256281408e-06, + "loss": 0.0631, + "step": 5575 + }, + { + "epoch": 1.88, + "grad_norm": 2.0813212394714355, + "learning_rate": 9.487738693467337e-06, + "loss": 0.0691, + "step": 5600 + }, + { + "epoch": 1.89, + "grad_norm": 2.152451753616333, + "learning_rate": 9.485226130653267e-06, + "loss": 0.0708, + "step": 5625 + }, + { + "epoch": 1.9, + "grad_norm": 2.0022130012512207, + "learning_rate": 9.482713567839198e-06, + "loss": 0.0705, + "step": 5650 + }, + { + "epoch": 1.91, + "grad_norm": 2.2159368991851807, + "learning_rate": 9.480201005025125e-06, + "loss": 0.0643, + "step": 5675 + }, + { + "epoch": 1.91, + "grad_norm": 1.8199883699417114, + "learning_rate": 9.477688442211056e-06, + "loss": 0.0664, + "step": 5700 + }, + { + "epoch": 1.92, + "grad_norm": 1.8099710941314697, + "learning_rate": 9.475175879396985e-06, + "loss": 0.0673, + "step": 5725 + }, + { + "epoch": 1.93, + "grad_norm": 2.4796226024627686, + "learning_rate": 9.472663316582915e-06, + "loss": 0.0685, + "step": 5750 + }, + { + "epoch": 1.94, + "grad_norm": 2.085402250289917, + "learning_rate": 9.470150753768846e-06, + "loss": 0.0676, + "step": 5775 + }, + { + "epoch": 1.95, + "grad_norm": 2.3015823364257812, + "learning_rate": 9.467638190954775e-06, + "loss": 0.0624, + "step": 5800 + }, + { + "epoch": 1.96, + "grad_norm": 1.5518728494644165, + "learning_rate": 9.465125628140704e-06, + "loss": 0.0638, + "step": 5825 + }, + { + "epoch": 1.96, + "grad_norm": 2.30011248588562, + "learning_rate": 9.462613065326634e-06, + "loss": 0.0675, + "step": 5850 + }, + { + "epoch": 1.97, + "grad_norm": 1.7813180685043335, + "learning_rate": 9.460100502512563e-06, + "loss": 0.0617, + "step": 5875 + }, + { + "epoch": 1.98, + "grad_norm": 2.010237216949463, + "learning_rate": 9.457587939698494e-06, + "loss": 0.065, + "step": 5900 + }, + { + "epoch": 1.99, + "grad_norm": 2.0169990062713623, + "learning_rate": 9.455075376884423e-06, + "loss": 0.065, + "step": 5925 + }, + { + "epoch": 2.0, + "grad_norm": 1.9987738132476807, + "learning_rate": 9.452562814070353e-06, + "loss": 0.064, + "step": 5950 + }, + { + "epoch": 2.01, + "grad_norm": 1.7468911409378052, + "learning_rate": 9.450050251256282e-06, + "loss": 0.0443, + "step": 5975 + }, + { + "epoch": 2.01, + "grad_norm": 1.3861793279647827, + "learning_rate": 9.447537688442211e-06, + "loss": 0.0419, + "step": 6000 + }, + { + "epoch": 2.01, + "eval_loss": 0.12894819676876068, + "eval_runtime": 1144.5765, + "eval_samples_per_second": 1.231, + "eval_steps_per_second": 1.231, + "eval_wer": 20.809408509166378, + "step": 6000 + }, + { + "epoch": 2.02, + "grad_norm": 1.886191964149475, + "learning_rate": 9.44502512562814e-06, + "loss": 0.0358, + "step": 6025 + }, + { + "epoch": 2.03, + "grad_norm": 1.757878303527832, + "learning_rate": 9.442512562814072e-06, + "loss": 0.0383, + "step": 6050 + }, + { + "epoch": 2.04, + "grad_norm": 1.679871916770935, + "learning_rate": 9.440000000000001e-06, + "loss": 0.0383, + "step": 6075 + }, + { + "epoch": 2.05, + "grad_norm": 1.9209476709365845, + "learning_rate": 9.43748743718593e-06, + "loss": 0.0391, + "step": 6100 + }, + { + "epoch": 2.06, + "grad_norm": 1.8202593326568604, + "learning_rate": 9.43497487437186e-06, + "loss": 0.0412, + "step": 6125 + }, + { + "epoch": 2.07, + "grad_norm": 1.593234658241272, + "learning_rate": 9.432462311557789e-06, + "loss": 0.0376, + "step": 6150 + }, + { + "epoch": 2.07, + "grad_norm": 1.4269791841506958, + "learning_rate": 9.42994974874372e-06, + "loss": 0.0361, + "step": 6175 + }, + { + "epoch": 2.08, + "grad_norm": 1.8575375080108643, + "learning_rate": 9.42743718592965e-06, + "loss": 0.0374, + "step": 6200 + }, + { + "epoch": 2.09, + "grad_norm": 1.4942901134490967, + "learning_rate": 9.424924623115579e-06, + "loss": 0.0399, + "step": 6225 + }, + { + "epoch": 2.1, + "grad_norm": 1.8410648107528687, + "learning_rate": 9.422412060301508e-06, + "loss": 0.0392, + "step": 6250 + }, + { + "epoch": 2.11, + "grad_norm": 1.687562108039856, + "learning_rate": 9.419899497487437e-06, + "loss": 0.0386, + "step": 6275 + }, + { + "epoch": 2.12, + "grad_norm": 1.599827527999878, + "learning_rate": 9.417386934673367e-06, + "loss": 0.035, + "step": 6300 + }, + { + "epoch": 2.12, + "grad_norm": 1.4508821964263916, + "learning_rate": 9.414874371859298e-06, + "loss": 0.0394, + "step": 6325 + }, + { + "epoch": 2.13, + "grad_norm": 2.0084352493286133, + "learning_rate": 9.412361809045227e-06, + "loss": 0.0381, + "step": 6350 + }, + { + "epoch": 2.14, + "grad_norm": 1.8546168804168701, + "learning_rate": 9.409849246231156e-06, + "loss": 0.0373, + "step": 6375 + }, + { + "epoch": 2.15, + "grad_norm": 1.789834976196289, + "learning_rate": 9.407336683417086e-06, + "loss": 0.0385, + "step": 6400 + }, + { + "epoch": 2.16, + "grad_norm": 1.9399482011795044, + "learning_rate": 9.404824120603015e-06, + "loss": 0.04, + "step": 6425 + }, + { + "epoch": 2.17, + "grad_norm": 1.54327392578125, + "learning_rate": 9.402311557788946e-06, + "loss": 0.0414, + "step": 6450 + }, + { + "epoch": 2.17, + "grad_norm": 1.7804811000823975, + "learning_rate": 9.399798994974875e-06, + "loss": 0.0405, + "step": 6475 + }, + { + "epoch": 2.18, + "grad_norm": 1.6186919212341309, + "learning_rate": 9.397286432160805e-06, + "loss": 0.0391, + "step": 6500 + }, + { + "epoch": 2.19, + "grad_norm": 2.084867477416992, + "learning_rate": 9.394773869346736e-06, + "loss": 0.0382, + "step": 6525 + }, + { + "epoch": 2.2, + "grad_norm": 1.6806780099868774, + "learning_rate": 9.392261306532663e-06, + "loss": 0.0371, + "step": 6550 + }, + { + "epoch": 2.21, + "grad_norm": 2.0451955795288086, + "learning_rate": 9.389748743718594e-06, + "loss": 0.0367, + "step": 6575 + }, + { + "epoch": 2.22, + "grad_norm": 2.277158737182617, + "learning_rate": 9.387236180904524e-06, + "loss": 0.0355, + "step": 6600 + }, + { + "epoch": 2.22, + "grad_norm": 1.4519309997558594, + "learning_rate": 9.384723618090453e-06, + "loss": 0.0368, + "step": 6625 + }, + { + "epoch": 2.23, + "grad_norm": 1.9681559801101685, + "learning_rate": 9.382211055276382e-06, + "loss": 0.037, + "step": 6650 + }, + { + "epoch": 2.24, + "grad_norm": 1.582107663154602, + "learning_rate": 9.379698492462312e-06, + "loss": 0.0368, + "step": 6675 + }, + { + "epoch": 2.25, + "grad_norm": 1.7207388877868652, + "learning_rate": 9.377185929648241e-06, + "loss": 0.0392, + "step": 6700 + }, + { + "epoch": 2.26, + "grad_norm": 1.8714826107025146, + "learning_rate": 9.374673366834172e-06, + "loss": 0.0384, + "step": 6725 + }, + { + "epoch": 2.27, + "grad_norm": 1.485115885734558, + "learning_rate": 9.372160804020101e-06, + "loss": 0.039, + "step": 6750 + }, + { + "epoch": 2.28, + "grad_norm": 1.7778412103652954, + "learning_rate": 9.36964824120603e-06, + "loss": 0.0359, + "step": 6775 + }, + { + "epoch": 2.28, + "grad_norm": 1.6911503076553345, + "learning_rate": 9.367135678391962e-06, + "loss": 0.0355, + "step": 6800 + }, + { + "epoch": 2.29, + "grad_norm": 1.1767462491989136, + "learning_rate": 9.36462311557789e-06, + "loss": 0.0377, + "step": 6825 + }, + { + "epoch": 2.3, + "grad_norm": 1.8133790493011475, + "learning_rate": 9.36211055276382e-06, + "loss": 0.0352, + "step": 6850 + }, + { + "epoch": 2.31, + "grad_norm": 1.820476770401001, + "learning_rate": 9.35959798994975e-06, + "loss": 0.0357, + "step": 6875 + }, + { + "epoch": 2.32, + "grad_norm": 1.828348159790039, + "learning_rate": 9.357085427135679e-06, + "loss": 0.0358, + "step": 6900 + }, + { + "epoch": 2.33, + "grad_norm": 2.406266450881958, + "learning_rate": 9.354572864321608e-06, + "loss": 0.038, + "step": 6925 + }, + { + "epoch": 2.33, + "grad_norm": 1.8843538761138916, + "learning_rate": 9.352060301507538e-06, + "loss": 0.0372, + "step": 6950 + }, + { + "epoch": 2.34, + "grad_norm": 2.1403121948242188, + "learning_rate": 9.349547738693469e-06, + "loss": 0.0358, + "step": 6975 + }, + { + "epoch": 2.35, + "grad_norm": 1.3958196640014648, + "learning_rate": 9.347035175879398e-06, + "loss": 0.0347, + "step": 7000 + }, + { + "epoch": 2.35, + "eval_loss": 0.13649247586727142, + "eval_runtime": 1131.2458, + "eval_samples_per_second": 1.246, + "eval_steps_per_second": 1.246, + "eval_wer": 21.01694915254237, + "step": 7000 + }, + { + "epoch": 2.36, + "grad_norm": 1.5389511585235596, + "learning_rate": 9.344522613065327e-06, + "loss": 0.037, + "step": 7025 + }, + { + "epoch": 2.37, + "grad_norm": 1.703018307685852, + "learning_rate": 9.342010050251257e-06, + "loss": 0.0341, + "step": 7050 + }, + { + "epoch": 2.38, + "grad_norm": 1.9468741416931152, + "learning_rate": 9.339497487437188e-06, + "loss": 0.0357, + "step": 7075 + }, + { + "epoch": 2.38, + "grad_norm": 2.2654669284820557, + "learning_rate": 9.336984924623115e-06, + "loss": 0.0355, + "step": 7100 + }, + { + "epoch": 2.39, + "grad_norm": 2.465904712677002, + "learning_rate": 9.334472361809046e-06, + "loss": 0.0344, + "step": 7125 + }, + { + "epoch": 2.4, + "grad_norm": 2.071824550628662, + "learning_rate": 9.331959798994976e-06, + "loss": 0.0366, + "step": 7150 + }, + { + "epoch": 2.41, + "grad_norm": 1.538020133972168, + "learning_rate": 9.329447236180905e-06, + "loss": 0.0354, + "step": 7175 + }, + { + "epoch": 2.42, + "grad_norm": 1.8215054273605347, + "learning_rate": 9.326934673366836e-06, + "loss": 0.0333, + "step": 7200 + }, + { + "epoch": 2.43, + "grad_norm": 1.3622437715530396, + "learning_rate": 9.324422110552764e-06, + "loss": 0.0361, + "step": 7225 + }, + { + "epoch": 2.43, + "grad_norm": 1.4142067432403564, + "learning_rate": 9.321909547738695e-06, + "loss": 0.0341, + "step": 7250 + }, + { + "epoch": 2.44, + "grad_norm": 2.4575562477111816, + "learning_rate": 9.319396984924624e-06, + "loss": 0.0366, + "step": 7275 + }, + { + "epoch": 2.45, + "grad_norm": 1.409419298171997, + "learning_rate": 9.316884422110553e-06, + "loss": 0.0338, + "step": 7300 + }, + { + "epoch": 2.46, + "grad_norm": 1.89165198802948, + "learning_rate": 9.314371859296483e-06, + "loss": 0.0343, + "step": 7325 + }, + { + "epoch": 2.47, + "grad_norm": 1.7426328659057617, + "learning_rate": 9.311859296482414e-06, + "loss": 0.0374, + "step": 7350 + }, + { + "epoch": 2.48, + "grad_norm": 2.032276153564453, + "learning_rate": 9.309346733668343e-06, + "loss": 0.0336, + "step": 7375 + }, + { + "epoch": 2.48, + "grad_norm": 1.5048723220825195, + "learning_rate": 9.306834170854272e-06, + "loss": 0.0349, + "step": 7400 + }, + { + "epoch": 2.49, + "grad_norm": 2.052701473236084, + "learning_rate": 9.304321608040201e-06, + "loss": 0.0344, + "step": 7425 + }, + { + "epoch": 2.5, + "grad_norm": 1.4750709533691406, + "learning_rate": 9.30180904522613e-06, + "loss": 0.0341, + "step": 7450 + }, + { + "epoch": 2.51, + "grad_norm": 1.86838698387146, + "learning_rate": 9.299296482412062e-06, + "loss": 0.0371, + "step": 7475 + }, + { + "epoch": 2.52, + "grad_norm": 1.7041974067687988, + "learning_rate": 9.296783919597991e-06, + "loss": 0.0332, + "step": 7500 + }, + { + "epoch": 2.53, + "grad_norm": 1.7302571535110474, + "learning_rate": 9.29427135678392e-06, + "loss": 0.0331, + "step": 7525 + }, + { + "epoch": 2.54, + "grad_norm": 1.622873306274414, + "learning_rate": 9.29175879396985e-06, + "loss": 0.0349, + "step": 7550 + }, + { + "epoch": 2.54, + "grad_norm": 1.652262806892395, + "learning_rate": 9.289246231155779e-06, + "loss": 0.0353, + "step": 7575 + }, + { + "epoch": 2.55, + "grad_norm": 1.6170547008514404, + "learning_rate": 9.28673366834171e-06, + "loss": 0.0316, + "step": 7600 + }, + { + "epoch": 2.56, + "grad_norm": 1.7558672428131104, + "learning_rate": 9.28422110552764e-06, + "loss": 0.0349, + "step": 7625 + }, + { + "epoch": 2.57, + "grad_norm": 1.9710546731948853, + "learning_rate": 9.281708542713569e-06, + "loss": 0.0352, + "step": 7650 + }, + { + "epoch": 2.58, + "grad_norm": 1.3506332635879517, + "learning_rate": 9.279195979899498e-06, + "loss": 0.0366, + "step": 7675 + }, + { + "epoch": 2.59, + "grad_norm": 2.082754611968994, + "learning_rate": 9.276683417085427e-06, + "loss": 0.0321, + "step": 7700 + }, + { + "epoch": 2.59, + "grad_norm": 2.243713855743408, + "learning_rate": 9.274170854271357e-06, + "loss": 0.0368, + "step": 7725 + }, + { + "epoch": 2.6, + "grad_norm": 1.964044451713562, + "learning_rate": 9.271658291457288e-06, + "loss": 0.0341, + "step": 7750 + }, + { + "epoch": 2.61, + "grad_norm": 2.244699239730835, + "learning_rate": 9.269145728643217e-06, + "loss": 0.0331, + "step": 7775 + }, + { + "epoch": 2.62, + "grad_norm": 1.94173264503479, + "learning_rate": 9.266633165829146e-06, + "loss": 0.0325, + "step": 7800 + }, + { + "epoch": 2.63, + "grad_norm": 1.9841606616973877, + "learning_rate": 9.264120603015076e-06, + "loss": 0.0349, + "step": 7825 + }, + { + "epoch": 2.64, + "grad_norm": 2.179622173309326, + "learning_rate": 9.261608040201005e-06, + "loss": 0.0334, + "step": 7850 + }, + { + "epoch": 2.64, + "grad_norm": 1.6984986066818237, + "learning_rate": 9.259095477386936e-06, + "loss": 0.0306, + "step": 7875 + }, + { + "epoch": 2.65, + "grad_norm": 1.4788258075714111, + "learning_rate": 9.256582914572865e-06, + "loss": 0.0297, + "step": 7900 + }, + { + "epoch": 2.66, + "grad_norm": 1.864365816116333, + "learning_rate": 9.254070351758795e-06, + "loss": 0.0335, + "step": 7925 + }, + { + "epoch": 2.67, + "grad_norm": 1.7515934705734253, + "learning_rate": 9.251557788944724e-06, + "loss": 0.0295, + "step": 7950 + }, + { + "epoch": 2.68, + "grad_norm": 2.0559329986572266, + "learning_rate": 9.249045226130653e-06, + "loss": 0.0315, + "step": 7975 + }, + { + "epoch": 2.69, + "grad_norm": 1.5548522472381592, + "learning_rate": 9.246532663316584e-06, + "loss": 0.0333, + "step": 8000 + }, + { + "epoch": 2.69, + "eval_loss": 0.14287370443344116, + "eval_runtime": 1148.7612, + "eval_samples_per_second": 1.227, + "eval_steps_per_second": 1.227, + "eval_wer": 20.283638879280524, + "step": 8000 + }, + { + "epoch": 2.69, + "grad_norm": 1.7783691883087158, + "learning_rate": 9.244020100502514e-06, + "loss": 0.0337, + "step": 8025 + }, + { + "epoch": 2.7, + "grad_norm": 2.074395179748535, + "learning_rate": 9.241507537688443e-06, + "loss": 0.0326, + "step": 8050 + }, + { + "epoch": 2.71, + "grad_norm": 1.316449761390686, + "learning_rate": 9.238994974874372e-06, + "loss": 0.0312, + "step": 8075 + }, + { + "epoch": 2.72, + "grad_norm": 1.9802296161651611, + "learning_rate": 9.236482412060302e-06, + "loss": 0.0302, + "step": 8100 + }, + { + "epoch": 2.73, + "grad_norm": 1.5073142051696777, + "learning_rate": 9.233969849246231e-06, + "loss": 0.032, + "step": 8125 + }, + { + "epoch": 2.74, + "grad_norm": 1.6512932777404785, + "learning_rate": 9.231457286432162e-06, + "loss": 0.0318, + "step": 8150 + }, + { + "epoch": 2.75, + "grad_norm": 2.0232787132263184, + "learning_rate": 9.228944723618091e-06, + "loss": 0.0333, + "step": 8175 + }, + { + "epoch": 2.75, + "grad_norm": 1.8028162717819214, + "learning_rate": 9.22643216080402e-06, + "loss": 0.0332, + "step": 8200 + }, + { + "epoch": 2.76, + "grad_norm": 1.8167335987091064, + "learning_rate": 9.223919597989952e-06, + "loss": 0.0318, + "step": 8225 + }, + { + "epoch": 2.77, + "grad_norm": 1.9492465257644653, + "learning_rate": 9.22140703517588e-06, + "loss": 0.0312, + "step": 8250 + }, + { + "epoch": 2.78, + "grad_norm": 1.5155192613601685, + "learning_rate": 9.21889447236181e-06, + "loss": 0.0321, + "step": 8275 + }, + { + "epoch": 2.79, + "grad_norm": 1.7698118686676025, + "learning_rate": 9.21638190954774e-06, + "loss": 0.0333, + "step": 8300 + }, + { + "epoch": 2.8, + "grad_norm": 1.4418883323669434, + "learning_rate": 9.213869346733669e-06, + "loss": 0.0337, + "step": 8325 + }, + { + "epoch": 2.8, + "grad_norm": 1.9272536039352417, + "learning_rate": 9.211356783919598e-06, + "loss": 0.0314, + "step": 8350 + }, + { + "epoch": 2.81, + "grad_norm": 1.6724305152893066, + "learning_rate": 9.208844221105528e-06, + "loss": 0.0311, + "step": 8375 + }, + { + "epoch": 2.82, + "grad_norm": 1.9506512880325317, + "learning_rate": 9.206331658291459e-06, + "loss": 0.0311, + "step": 8400 + }, + { + "epoch": 2.83, + "grad_norm": 1.7949283123016357, + "learning_rate": 9.203819095477388e-06, + "loss": 0.0306, + "step": 8425 + }, + { + "epoch": 2.84, + "grad_norm": 1.6787230968475342, + "learning_rate": 9.201306532663317e-06, + "loss": 0.0322, + "step": 8450 + }, + { + "epoch": 2.85, + "grad_norm": 1.795274257659912, + "learning_rate": 9.198793969849247e-06, + "loss": 0.029, + "step": 8475 + }, + { + "epoch": 2.85, + "grad_norm": 1.7604349851608276, + "learning_rate": 9.196281407035178e-06, + "loss": 0.029, + "step": 8500 + }, + { + "epoch": 2.86, + "grad_norm": 1.5884255170822144, + "learning_rate": 9.193768844221105e-06, + "loss": 0.0307, + "step": 8525 + }, + { + "epoch": 2.87, + "grad_norm": 2.6448774337768555, + "learning_rate": 9.191256281407036e-06, + "loss": 0.0331, + "step": 8550 + }, + { + "epoch": 2.88, + "grad_norm": 1.5177228450775146, + "learning_rate": 9.188743718592966e-06, + "loss": 0.029, + "step": 8575 + }, + { + "epoch": 2.89, + "grad_norm": 1.7545777559280396, + "learning_rate": 9.186231155778895e-06, + "loss": 0.0287, + "step": 8600 + }, + { + "epoch": 2.9, + "grad_norm": 1.4506813287734985, + "learning_rate": 9.183718592964826e-06, + "loss": 0.0313, + "step": 8625 + }, + { + "epoch": 2.9, + "grad_norm": 1.9890118837356567, + "learning_rate": 9.181206030150754e-06, + "loss": 0.0314, + "step": 8650 + }, + { + "epoch": 2.91, + "grad_norm": 1.7269412279129028, + "learning_rate": 9.178693467336685e-06, + "loss": 0.0319, + "step": 8675 + }, + { + "epoch": 2.92, + "grad_norm": 2.024498224258423, + "learning_rate": 9.176180904522614e-06, + "loss": 0.032, + "step": 8700 + }, + { + "epoch": 2.93, + "grad_norm": 1.9361039400100708, + "learning_rate": 9.173668341708543e-06, + "loss": 0.0295, + "step": 8725 + }, + { + "epoch": 2.94, + "grad_norm": 1.7754948139190674, + "learning_rate": 9.171155778894473e-06, + "loss": 0.0291, + "step": 8750 + }, + { + "epoch": 2.95, + "grad_norm": 1.6525825262069702, + "learning_rate": 9.168643216080404e-06, + "loss": 0.0304, + "step": 8775 + }, + { + "epoch": 2.96, + "grad_norm": 2.3476269245147705, + "learning_rate": 9.166130653266331e-06, + "loss": 0.0318, + "step": 8800 + }, + { + "epoch": 2.96, + "grad_norm": 1.694828987121582, + "learning_rate": 9.163618090452262e-06, + "loss": 0.0286, + "step": 8825 + }, + { + "epoch": 2.97, + "grad_norm": 1.9927901029586792, + "learning_rate": 9.161105527638192e-06, + "loss": 0.0323, + "step": 8850 + }, + { + "epoch": 2.98, + "grad_norm": 1.9842555522918701, + "learning_rate": 9.158592964824121e-06, + "loss": 0.03, + "step": 8875 + }, + { + "epoch": 2.99, + "grad_norm": 2.5348129272460938, + "learning_rate": 9.156080402010052e-06, + "loss": 0.0317, + "step": 8900 + }, + { + "epoch": 3.0, + "grad_norm": 1.5902265310287476, + "learning_rate": 9.15356783919598e-06, + "loss": 0.0302, + "step": 8925 + }, + { + "epoch": 3.01, + "grad_norm": 1.307062029838562, + "learning_rate": 9.15105527638191e-06, + "loss": 0.021, + "step": 8950 + }, + { + "epoch": 3.01, + "grad_norm": 1.4134482145309448, + "learning_rate": 9.14854271356784e-06, + "loss": 0.0164, + "step": 8975 + }, + { + "epoch": 3.02, + "grad_norm": 1.1575281620025635, + "learning_rate": 9.14603015075377e-06, + "loss": 0.0149, + "step": 9000 + }, + { + "epoch": 3.02, + "eval_loss": 0.14784948527812958, + "eval_runtime": 1174.0603, + "eval_samples_per_second": 1.2, + "eval_steps_per_second": 1.2, + "eval_wer": 21.667243168453822, + "step": 9000 + }, + { + "epoch": 3.03, + "grad_norm": 1.6445032358169556, + "learning_rate": 9.1435175879397e-06, + "loss": 0.0148, + "step": 9025 + }, + { + "epoch": 3.04, + "grad_norm": 1.3716453313827515, + "learning_rate": 9.14100502512563e-06, + "loss": 0.0155, + "step": 9050 + }, + { + "epoch": 3.05, + "grad_norm": 0.9623987078666687, + "learning_rate": 9.138492462311559e-06, + "loss": 0.0138, + "step": 9075 + }, + { + "epoch": 3.06, + "grad_norm": 1.3288583755493164, + "learning_rate": 9.135979899497488e-06, + "loss": 0.0147, + "step": 9100 + }, + { + "epoch": 3.06, + "grad_norm": 1.3147351741790771, + "learning_rate": 9.133467336683417e-06, + "loss": 0.017, + "step": 9125 + }, + { + "epoch": 3.07, + "grad_norm": 1.5939329862594604, + "learning_rate": 9.130954773869347e-06, + "loss": 0.0155, + "step": 9150 + }, + { + "epoch": 3.08, + "grad_norm": 1.328973650932312, + "learning_rate": 9.128442211055278e-06, + "loss": 0.0163, + "step": 9175 + }, + { + "epoch": 3.09, + "grad_norm": 1.122519850730896, + "learning_rate": 9.125929648241205e-06, + "loss": 0.0147, + "step": 9200 + }, + { + "epoch": 3.1, + "grad_norm": 1.4043760299682617, + "learning_rate": 9.123417085427136e-06, + "loss": 0.0149, + "step": 9225 + }, + { + "epoch": 3.11, + "grad_norm": 1.5257760286331177, + "learning_rate": 9.120904522613066e-06, + "loss": 0.0159, + "step": 9250 + }, + { + "epoch": 3.11, + "grad_norm": 1.9292327165603638, + "learning_rate": 9.118391959798995e-06, + "loss": 0.0158, + "step": 9275 + }, + { + "epoch": 3.12, + "grad_norm": 0.8486630320549011, + "learning_rate": 9.115879396984926e-06, + "loss": 0.0168, + "step": 9300 + }, + { + "epoch": 3.13, + "grad_norm": 1.5127559900283813, + "learning_rate": 9.113467336683418e-06, + "loss": 0.015, + "step": 9325 + }, + { + "epoch": 3.14, + "grad_norm": 1.367360234260559, + "learning_rate": 9.110954773869347e-06, + "loss": 0.0162, + "step": 9350 + }, + { + "epoch": 3.15, + "grad_norm": 1.8258417844772339, + "learning_rate": 9.108442211055278e-06, + "loss": 0.0142, + "step": 9375 + }, + { + "epoch": 3.16, + "grad_norm": 1.5560498237609863, + "learning_rate": 9.105929648241206e-06, + "loss": 0.0151, + "step": 9400 + }, + { + "epoch": 3.16, + "grad_norm": 1.1532343626022339, + "learning_rate": 9.103417085427137e-06, + "loss": 0.014, + "step": 9425 + }, + { + "epoch": 3.17, + "grad_norm": 1.3081587553024292, + "learning_rate": 9.100904522613066e-06, + "loss": 0.0165, + "step": 9450 + }, + { + "epoch": 3.18, + "grad_norm": 1.2694733142852783, + "learning_rate": 9.098391959798995e-06, + "loss": 0.0149, + "step": 9475 + }, + { + "epoch": 3.19, + "grad_norm": 2.1223630905151367, + "learning_rate": 9.095879396984926e-06, + "loss": 0.0163, + "step": 9500 + }, + { + "epoch": 3.2, + "grad_norm": 1.4513421058654785, + "learning_rate": 9.093366834170854e-06, + "loss": 0.0152, + "step": 9525 + }, + { + "epoch": 3.21, + "grad_norm": 1.3429062366485596, + "learning_rate": 9.090854271356785e-06, + "loss": 0.0147, + "step": 9550 + }, + { + "epoch": 3.22, + "grad_norm": 2.071218967437744, + "learning_rate": 9.088341708542714e-06, + "loss": 0.017, + "step": 9575 + }, + { + "epoch": 3.22, + "grad_norm": 1.2968500852584839, + "learning_rate": 9.085829145728644e-06, + "loss": 0.0157, + "step": 9600 + }, + { + "epoch": 3.23, + "grad_norm": 1.6869624853134155, + "learning_rate": 9.083316582914573e-06, + "loss": 0.0151, + "step": 9625 + }, + { + "epoch": 3.24, + "grad_norm": 1.294914722442627, + "learning_rate": 9.080804020100504e-06, + "loss": 0.0149, + "step": 9650 + }, + { + "epoch": 3.25, + "grad_norm": 1.2467395067214966, + "learning_rate": 9.078291457286433e-06, + "loss": 0.0143, + "step": 9675 + }, + { + "epoch": 3.26, + "grad_norm": 1.2003506422042847, + "learning_rate": 9.075778894472363e-06, + "loss": 0.016, + "step": 9700 + }, + { + "epoch": 3.27, + "grad_norm": 1.455265998840332, + "learning_rate": 9.073266331658292e-06, + "loss": 0.0153, + "step": 9725 + }, + { + "epoch": 3.27, + "grad_norm": 1.3461107015609741, + "learning_rate": 9.070753768844221e-06, + "loss": 0.0159, + "step": 9750 + }, + { + "epoch": 3.28, + "grad_norm": 1.5192843675613403, + "learning_rate": 9.068241206030152e-06, + "loss": 0.0155, + "step": 9775 + }, + { + "epoch": 3.29, + "grad_norm": 1.446125864982605, + "learning_rate": 9.06572864321608e-06, + "loss": 0.0148, + "step": 9800 + }, + { + "epoch": 3.3, + "grad_norm": 1.5783380270004272, + "learning_rate": 9.063216080402011e-06, + "loss": 0.0149, + "step": 9825 + }, + { + "epoch": 3.31, + "grad_norm": 1.9398548603057861, + "learning_rate": 9.06070351758794e-06, + "loss": 0.0178, + "step": 9850 + }, + { + "epoch": 3.32, + "grad_norm": 1.7308599948883057, + "learning_rate": 9.05819095477387e-06, + "loss": 0.0154, + "step": 9875 + }, + { + "epoch": 3.32, + "grad_norm": 2.027097225189209, + "learning_rate": 9.0556783919598e-06, + "loss": 0.0158, + "step": 9900 + }, + { + "epoch": 3.33, + "grad_norm": 1.572900414466858, + "learning_rate": 9.05316582914573e-06, + "loss": 0.0156, + "step": 9925 + }, + { + "epoch": 3.34, + "grad_norm": 1.6647284030914307, + "learning_rate": 9.05065326633166e-06, + "loss": 0.016, + "step": 9950 + }, + { + "epoch": 3.35, + "grad_norm": 2.017935276031494, + "learning_rate": 9.048140703517589e-06, + "loss": 0.0163, + "step": 9975 + }, + { + "epoch": 3.36, + "grad_norm": 1.9541265964508057, + "learning_rate": 9.045628140703518e-06, + "loss": 0.0155, + "step": 10000 + }, + { + "epoch": 3.36, + "eval_loss": 0.16353896260261536, + "eval_runtime": 1172.0723, + "eval_samples_per_second": 1.202, + "eval_steps_per_second": 1.202, + "eval_wer": 22.455897613282602, + "step": 10000 + }, + { + "epoch": 3.37, + "grad_norm": 1.482588291168213, + "learning_rate": 9.043115577889447e-06, + "loss": 0.0152, + "step": 10025 + }, + { + "epoch": 3.37, + "grad_norm": 1.3735016584396362, + "learning_rate": 9.040603015075378e-06, + "loss": 0.017, + "step": 10050 + }, + { + "epoch": 3.38, + "grad_norm": 1.5062556266784668, + "learning_rate": 9.038090452261308e-06, + "loss": 0.0158, + "step": 10075 + }, + { + "epoch": 3.39, + "grad_norm": 1.2790143489837646, + "learning_rate": 9.035577889447237e-06, + "loss": 0.0171, + "step": 10100 + }, + { + "epoch": 3.4, + "grad_norm": 1.3697443008422852, + "learning_rate": 9.033065326633166e-06, + "loss": 0.0151, + "step": 10125 + }, + { + "epoch": 3.41, + "grad_norm": 1.3963229656219482, + "learning_rate": 9.030552763819096e-06, + "loss": 0.0155, + "step": 10150 + }, + { + "epoch": 3.42, + "grad_norm": 1.4214519262313843, + "learning_rate": 9.028040201005027e-06, + "loss": 0.0168, + "step": 10175 + }, + { + "epoch": 3.43, + "grad_norm": 2.1044397354125977, + "learning_rate": 9.025527638190956e-06, + "loss": 0.0159, + "step": 10200 + }, + { + "epoch": 3.43, + "grad_norm": 1.0504701137542725, + "learning_rate": 9.023015075376885e-06, + "loss": 0.0144, + "step": 10225 + }, + { + "epoch": 3.44, + "grad_norm": 1.0660099983215332, + "learning_rate": 9.020502512562815e-06, + "loss": 0.0167, + "step": 10250 + }, + { + "epoch": 3.45, + "grad_norm": 1.667517900466919, + "learning_rate": 9.017989949748744e-06, + "loss": 0.0155, + "step": 10275 + }, + { + "epoch": 3.46, + "grad_norm": 1.8125948905944824, + "learning_rate": 9.015477386934675e-06, + "loss": 0.0162, + "step": 10300 + }, + { + "epoch": 3.47, + "grad_norm": 1.234658122062683, + "learning_rate": 9.012964824120604e-06, + "loss": 0.0156, + "step": 10325 + }, + { + "epoch": 3.48, + "grad_norm": 1.7538673877716064, + "learning_rate": 9.010452261306533e-06, + "loss": 0.0164, + "step": 10350 + }, + { + "epoch": 3.48, + "grad_norm": 1.3441877365112305, + "learning_rate": 9.007939698492463e-06, + "loss": 0.0156, + "step": 10375 + }, + { + "epoch": 3.49, + "grad_norm": 1.6516789197921753, + "learning_rate": 9.005427135678392e-06, + "loss": 0.0151, + "step": 10400 + }, + { + "epoch": 3.5, + "grad_norm": 1.4577858448028564, + "learning_rate": 9.002914572864321e-06, + "loss": 0.0157, + "step": 10425 + }, + { + "epoch": 3.51, + "grad_norm": 1.4146549701690674, + "learning_rate": 9.000402010050252e-06, + "loss": 0.0145, + "step": 10450 + }, + { + "epoch": 3.52, + "grad_norm": 1.9326874017715454, + "learning_rate": 8.997889447236182e-06, + "loss": 0.017, + "step": 10475 + }, + { + "epoch": 3.53, + "grad_norm": 1.2907094955444336, + "learning_rate": 8.995376884422111e-06, + "loss": 0.0162, + "step": 10500 + }, + { + "epoch": 3.53, + "grad_norm": 1.9718196392059326, + "learning_rate": 8.992864321608042e-06, + "loss": 0.0152, + "step": 10525 + }, + { + "epoch": 3.54, + "grad_norm": 1.6341532468795776, + "learning_rate": 8.99035175879397e-06, + "loss": 0.0158, + "step": 10550 + }, + { + "epoch": 3.55, + "grad_norm": 1.8656972646713257, + "learning_rate": 8.9878391959799e-06, + "loss": 0.0147, + "step": 10575 + }, + { + "epoch": 3.56, + "grad_norm": 0.7883699536323547, + "learning_rate": 8.98532663316583e-06, + "loss": 0.0168, + "step": 10600 + }, + { + "epoch": 3.57, + "grad_norm": 1.5029000043869019, + "learning_rate": 8.98281407035176e-06, + "loss": 0.0153, + "step": 10625 + }, + { + "epoch": 3.58, + "grad_norm": 1.3907344341278076, + "learning_rate": 8.980301507537689e-06, + "loss": 0.0158, + "step": 10650 + }, + { + "epoch": 3.58, + "grad_norm": 1.550833821296692, + "learning_rate": 8.977788944723618e-06, + "loss": 0.0162, + "step": 10675 + }, + { + "epoch": 3.59, + "grad_norm": 1.560996413230896, + "learning_rate": 8.975276381909549e-06, + "loss": 0.0161, + "step": 10700 + }, + { + "epoch": 3.6, + "grad_norm": 2.6945698261260986, + "learning_rate": 8.972763819095478e-06, + "loss": 0.0152, + "step": 10725 + }, + { + "epoch": 3.61, + "grad_norm": 1.2239935398101807, + "learning_rate": 8.970251256281408e-06, + "loss": 0.0145, + "step": 10750 + }, + { + "epoch": 3.62, + "grad_norm": 1.352990984916687, + "learning_rate": 8.967738693467337e-06, + "loss": 0.0144, + "step": 10775 + }, + { + "epoch": 3.63, + "grad_norm": 1.1862568855285645, + "learning_rate": 8.965226130653268e-06, + "loss": 0.0145, + "step": 10800 + }, + { + "epoch": 3.63, + "grad_norm": 1.3756226301193237, + "learning_rate": 8.962713567839196e-06, + "loss": 0.0164, + "step": 10825 + }, + { + "epoch": 3.64, + "grad_norm": 1.5663750171661377, + "learning_rate": 8.960201005025127e-06, + "loss": 0.016, + "step": 10850 + }, + { + "epoch": 3.65, + "grad_norm": 1.0998464822769165, + "learning_rate": 8.957688442211056e-06, + "loss": 0.0156, + "step": 10875 + }, + { + "epoch": 3.66, + "grad_norm": 1.6103864908218384, + "learning_rate": 8.955175879396985e-06, + "loss": 0.0149, + "step": 10900 + }, + { + "epoch": 3.67, + "grad_norm": 1.1600780487060547, + "learning_rate": 8.952663316582916e-06, + "loss": 0.0166, + "step": 10925 + }, + { + "epoch": 3.68, + "grad_norm": 1.215840220451355, + "learning_rate": 8.950150753768844e-06, + "loss": 0.014, + "step": 10950 + }, + { + "epoch": 3.69, + "grad_norm": 1.9106181859970093, + "learning_rate": 8.947638190954775e-06, + "loss": 0.0155, + "step": 10975 + }, + { + "epoch": 3.69, + "grad_norm": 1.4332627058029175, + "learning_rate": 8.945125628140704e-06, + "loss": 0.0159, + "step": 11000 + }, + { + "epoch": 3.69, + "eval_loss": 0.16833148896694183, + "eval_runtime": 1177.9362, + "eval_samples_per_second": 1.196, + "eval_steps_per_second": 1.196, + "eval_wer": 21.404358353510897, + "step": 11000 + }, + { + "epoch": 3.7, + "grad_norm": 1.6779950857162476, + "learning_rate": 8.942613065326634e-06, + "loss": 0.0152, + "step": 11025 + }, + { + "epoch": 3.71, + "grad_norm": 1.1543523073196411, + "learning_rate": 8.940100502512563e-06, + "loss": 0.0161, + "step": 11050 + }, + { + "epoch": 3.72, + "grad_norm": 1.0819700956344604, + "learning_rate": 8.937587939698494e-06, + "loss": 0.0148, + "step": 11075 + }, + { + "epoch": 3.73, + "grad_norm": 1.5612729787826538, + "learning_rate": 8.935075376884423e-06, + "loss": 0.0149, + "step": 11100 + }, + { + "epoch": 3.74, + "grad_norm": 1.9480060338974, + "learning_rate": 8.932562814070353e-06, + "loss": 0.0152, + "step": 11125 + }, + { + "epoch": 3.74, + "grad_norm": 1.4087862968444824, + "learning_rate": 8.930050251256282e-06, + "loss": 0.0153, + "step": 11150 + }, + { + "epoch": 3.75, + "grad_norm": 1.7054636478424072, + "learning_rate": 8.927537688442211e-06, + "loss": 0.0141, + "step": 11175 + }, + { + "epoch": 3.76, + "grad_norm": 1.112928032875061, + "learning_rate": 8.925025125628142e-06, + "loss": 0.0147, + "step": 11200 + }, + { + "epoch": 3.77, + "grad_norm": 1.1360586881637573, + "learning_rate": 8.92251256281407e-06, + "loss": 0.0133, + "step": 11225 + }, + { + "epoch": 3.78, + "grad_norm": 1.1549252271652222, + "learning_rate": 8.920000000000001e-06, + "loss": 0.0158, + "step": 11250 + }, + { + "epoch": 3.79, + "grad_norm": 1.2874222993850708, + "learning_rate": 8.91748743718593e-06, + "loss": 0.0155, + "step": 11275 + }, + { + "epoch": 3.79, + "grad_norm": 2.1797983646392822, + "learning_rate": 8.91497487437186e-06, + "loss": 0.0147, + "step": 11300 + }, + { + "epoch": 3.8, + "grad_norm": 1.5171695947647095, + "learning_rate": 8.91246231155779e-06, + "loss": 0.0153, + "step": 11325 + }, + { + "epoch": 3.81, + "grad_norm": 1.231627106666565, + "learning_rate": 8.90994974874372e-06, + "loss": 0.013, + "step": 11350 + }, + { + "epoch": 3.82, + "grad_norm": 2.021658420562744, + "learning_rate": 8.90743718592965e-06, + "loss": 0.0162, + "step": 11375 + }, + { + "epoch": 3.83, + "grad_norm": 1.6405866146087646, + "learning_rate": 8.904924623115579e-06, + "loss": 0.0166, + "step": 11400 + }, + { + "epoch": 3.84, + "grad_norm": 3.639925956726074, + "learning_rate": 8.902412060301508e-06, + "loss": 0.0164, + "step": 11425 + }, + { + "epoch": 3.84, + "grad_norm": 1.5596216917037964, + "learning_rate": 8.899899497487437e-06, + "loss": 0.0158, + "step": 11450 + }, + { + "epoch": 3.85, + "grad_norm": 1.4825770854949951, + "learning_rate": 8.897386934673368e-06, + "loss": 0.0156, + "step": 11475 + }, + { + "epoch": 3.86, + "grad_norm": 1.3950417041778564, + "learning_rate": 8.894874371859296e-06, + "loss": 0.0149, + "step": 11500 + }, + { + "epoch": 3.87, + "grad_norm": 1.1231906414031982, + "learning_rate": 8.892361809045227e-06, + "loss": 0.0144, + "step": 11525 + }, + { + "epoch": 3.88, + "grad_norm": 1.5393916368484497, + "learning_rate": 8.889849246231156e-06, + "loss": 0.015, + "step": 11550 + }, + { + "epoch": 3.89, + "grad_norm": 1.1754271984100342, + "learning_rate": 8.887336683417086e-06, + "loss": 0.0137, + "step": 11575 + }, + { + "epoch": 3.9, + "grad_norm": 1.2998003959655762, + "learning_rate": 8.884824120603017e-06, + "loss": 0.0168, + "step": 11600 + }, + { + "epoch": 3.9, + "grad_norm": 1.5406385660171509, + "learning_rate": 8.882311557788946e-06, + "loss": 0.0159, + "step": 11625 + }, + { + "epoch": 3.91, + "grad_norm": 1.8749077320098877, + "learning_rate": 8.879798994974875e-06, + "loss": 0.0142, + "step": 11650 + }, + { + "epoch": 3.92, + "grad_norm": 1.4809610843658447, + "learning_rate": 8.877286432160805e-06, + "loss": 0.0152, + "step": 11675 + }, + { + "epoch": 3.93, + "grad_norm": 1.3040724992752075, + "learning_rate": 8.874773869346734e-06, + "loss": 0.015, + "step": 11700 + }, + { + "epoch": 3.94, + "grad_norm": 1.2648557424545288, + "learning_rate": 8.872261306532665e-06, + "loss": 0.0141, + "step": 11725 + }, + { + "epoch": 3.95, + "grad_norm": 2.0717904567718506, + "learning_rate": 8.869748743718594e-06, + "loss": 0.0147, + "step": 11750 + }, + { + "epoch": 3.95, + "grad_norm": 1.4995003938674927, + "learning_rate": 8.867236180904524e-06, + "loss": 0.0155, + "step": 11775 + }, + { + "epoch": 3.96, + "grad_norm": 1.7686614990234375, + "learning_rate": 8.864723618090453e-06, + "loss": 0.0154, + "step": 11800 + }, + { + "epoch": 3.97, + "grad_norm": 1.2384917736053467, + "learning_rate": 8.862211055276382e-06, + "loss": 0.0169, + "step": 11825 + }, + { + "epoch": 3.98, + "grad_norm": 1.683022379875183, + "learning_rate": 8.859698492462312e-06, + "loss": 0.0137, + "step": 11850 + }, + { + "epoch": 3.99, + "grad_norm": 1.6258783340454102, + "learning_rate": 8.857185929648243e-06, + "loss": 0.0145, + "step": 11875 + }, + { + "epoch": 4.0, + "grad_norm": 1.887223720550537, + "learning_rate": 8.854673366834172e-06, + "loss": 0.0149, + "step": 11900 + }, + { + "epoch": 4.0, + "grad_norm": 2.4872851371765137, + "learning_rate": 8.852361809045227e-06, + "loss": 0.0108, + "step": 11925 + }, + { + "epoch": 4.01, + "grad_norm": 0.8641329407691956, + "learning_rate": 8.849849246231157e-06, + "loss": 0.008, + "step": 11950 + }, + { + "epoch": 4.02, + "grad_norm": 1.2309380769729614, + "learning_rate": 8.847336683417086e-06, + "loss": 0.008, + "step": 11975 + }, + { + "epoch": 4.03, + "grad_norm": 0.9507299661636353, + "learning_rate": 8.844824120603015e-06, + "loss": 0.006, + "step": 12000 + }, + { + "epoch": 4.03, + "eval_loss": 0.17884328961372375, + "eval_runtime": 1177.823, + "eval_samples_per_second": 1.196, + "eval_steps_per_second": 1.196, + "eval_wer": 21.743341404358354, + "step": 12000 + }, + { + "epoch": 4.04, + "grad_norm": 0.8718534111976624, + "learning_rate": 8.842311557788945e-06, + "loss": 0.0065, + "step": 12025 + }, + { + "epoch": 4.05, + "grad_norm": 0.8362686634063721, + "learning_rate": 8.839798994974876e-06, + "loss": 0.0075, + "step": 12050 + }, + { + "epoch": 4.05, + "grad_norm": 0.743304967880249, + "learning_rate": 8.837286432160805e-06, + "loss": 0.0069, + "step": 12075 + }, + { + "epoch": 4.06, + "grad_norm": 1.7320317029953003, + "learning_rate": 8.834773869346734e-06, + "loss": 0.0082, + "step": 12100 + }, + { + "epoch": 4.07, + "grad_norm": 1.2012836933135986, + "learning_rate": 8.832261306532665e-06, + "loss": 0.007, + "step": 12125 + }, + { + "epoch": 4.08, + "grad_norm": 0.9258669018745422, + "learning_rate": 8.829748743718593e-06, + "loss": 0.0078, + "step": 12150 + }, + { + "epoch": 4.09, + "grad_norm": 1.442779541015625, + "learning_rate": 8.827236180904524e-06, + "loss": 0.0072, + "step": 12175 + }, + { + "epoch": 4.1, + "grad_norm": 1.3170878887176514, + "learning_rate": 8.824723618090453e-06, + "loss": 0.0079, + "step": 12200 + }, + { + "epoch": 4.11, + "grad_norm": 1.357541561126709, + "learning_rate": 8.822211055276383e-06, + "loss": 0.0089, + "step": 12225 + }, + { + "epoch": 4.11, + "grad_norm": 1.0645322799682617, + "learning_rate": 8.819698492462312e-06, + "loss": 0.0081, + "step": 12250 + }, + { + "epoch": 4.12, + "grad_norm": 0.9434393644332886, + "learning_rate": 8.817185929648241e-06, + "loss": 0.0075, + "step": 12275 + }, + { + "epoch": 4.13, + "grad_norm": 1.649266242980957, + "learning_rate": 8.81467336683417e-06, + "loss": 0.0077, + "step": 12300 + }, + { + "epoch": 4.14, + "grad_norm": 1.1792644262313843, + "learning_rate": 8.812160804020102e-06, + "loss": 0.0083, + "step": 12325 + }, + { + "epoch": 4.15, + "grad_norm": 0.8059808015823364, + "learning_rate": 8.809648241206031e-06, + "loss": 0.0075, + "step": 12350 + }, + { + "epoch": 4.16, + "grad_norm": 0.8899474143981934, + "learning_rate": 8.80713567839196e-06, + "loss": 0.0078, + "step": 12375 + }, + { + "epoch": 4.16, + "grad_norm": 1.3673797845840454, + "learning_rate": 8.804623115577891e-06, + "loss": 0.0078, + "step": 12400 + }, + { + "epoch": 4.17, + "grad_norm": 1.74778413772583, + "learning_rate": 8.802110552763819e-06, + "loss": 0.008, + "step": 12425 + }, + { + "epoch": 4.18, + "grad_norm": 1.7424367666244507, + "learning_rate": 8.79959798994975e-06, + "loss": 0.0086, + "step": 12450 + }, + { + "epoch": 4.19, + "grad_norm": 1.2718677520751953, + "learning_rate": 8.79708542713568e-06, + "loss": 0.0062, + "step": 12475 + }, + { + "epoch": 4.2, + "grad_norm": 1.4235409498214722, + "learning_rate": 8.794572864321609e-06, + "loss": 0.0087, + "step": 12500 + }, + { + "epoch": 4.21, + "grad_norm": 1.011344313621521, + "learning_rate": 8.792060301507538e-06, + "loss": 0.0082, + "step": 12525 + }, + { + "epoch": 4.21, + "grad_norm": 1.2923675775527954, + "learning_rate": 8.789547738693467e-06, + "loss": 0.0081, + "step": 12550 + }, + { + "epoch": 4.22, + "grad_norm": 1.0401259660720825, + "learning_rate": 8.787035175879398e-06, + "loss": 0.0075, + "step": 12575 + }, + { + "epoch": 4.23, + "grad_norm": 1.3539245128631592, + "learning_rate": 8.784522613065328e-06, + "loss": 0.0082, + "step": 12600 + }, + { + "epoch": 4.24, + "grad_norm": 1.2022430896759033, + "learning_rate": 8.782010050251257e-06, + "loss": 0.0085, + "step": 12625 + }, + { + "epoch": 4.25, + "grad_norm": 1.6091457605361938, + "learning_rate": 8.779497487437186e-06, + "loss": 0.0083, + "step": 12650 + }, + { + "epoch": 4.26, + "grad_norm": 0.8161540031433105, + "learning_rate": 8.776984924623117e-06, + "loss": 0.0078, + "step": 12675 + }, + { + "epoch": 4.26, + "grad_norm": 1.3511227369308472, + "learning_rate": 8.774472361809045e-06, + "loss": 0.0082, + "step": 12700 + }, + { + "epoch": 4.27, + "grad_norm": 1.1058855056762695, + "learning_rate": 8.771959798994976e-06, + "loss": 0.0092, + "step": 12725 + }, + { + "epoch": 4.28, + "grad_norm": 0.6631322503089905, + "learning_rate": 8.769447236180905e-06, + "loss": 0.0077, + "step": 12750 + }, + { + "epoch": 4.29, + "grad_norm": 1.72735595703125, + "learning_rate": 8.766934673366834e-06, + "loss": 0.0092, + "step": 12775 + }, + { + "epoch": 4.3, + "grad_norm": 0.9632154703140259, + "learning_rate": 8.764422110552765e-06, + "loss": 0.008, + "step": 12800 + }, + { + "epoch": 4.31, + "grad_norm": 1.2338560819625854, + "learning_rate": 8.761909547738693e-06, + "loss": 0.0086, + "step": 12825 + }, + { + "epoch": 4.31, + "grad_norm": 1.2995047569274902, + "learning_rate": 8.759396984924624e-06, + "loss": 0.0089, + "step": 12850 + }, + { + "epoch": 4.32, + "grad_norm": 1.831378698348999, + "learning_rate": 8.756884422110553e-06, + "loss": 0.0076, + "step": 12875 + }, + { + "epoch": 4.33, + "grad_norm": 0.9021074175834656, + "learning_rate": 8.754371859296483e-06, + "loss": 0.0068, + "step": 12900 + }, + { + "epoch": 4.34, + "grad_norm": 1.6024397611618042, + "learning_rate": 8.751859296482412e-06, + "loss": 0.0091, + "step": 12925 + }, + { + "epoch": 4.35, + "grad_norm": 1.4566843509674072, + "learning_rate": 8.749346733668343e-06, + "loss": 0.0093, + "step": 12950 + }, + { + "epoch": 4.36, + "grad_norm": 1.2139644622802734, + "learning_rate": 8.746834170854272e-06, + "loss": 0.0091, + "step": 12975 + }, + { + "epoch": 4.37, + "grad_norm": 1.7454314231872559, + "learning_rate": 8.744321608040202e-06, + "loss": 0.0088, + "step": 13000 + }, + { + "epoch": 4.37, + "eval_loss": 0.18665467202663422, + "eval_runtime": 1142.0665, + "eval_samples_per_second": 1.234, + "eval_steps_per_second": 1.234, + "eval_wer": 20.15911449325493, + "step": 13000 + }, + { + "epoch": 4.37, + "grad_norm": 1.0341402292251587, + "learning_rate": 8.741809045226131e-06, + "loss": 0.0077, + "step": 13025 + }, + { + "epoch": 4.38, + "grad_norm": 1.9587894678115845, + "learning_rate": 8.73929648241206e-06, + "loss": 0.0078, + "step": 13050 + }, + { + "epoch": 4.39, + "grad_norm": 1.4976274967193604, + "learning_rate": 8.736783919597991e-06, + "loss": 0.0085, + "step": 13075 + }, + { + "epoch": 4.4, + "grad_norm": 2.1455583572387695, + "learning_rate": 8.734271356783919e-06, + "loss": 0.0084, + "step": 13100 + }, + { + "epoch": 4.41, + "grad_norm": 1.5965981483459473, + "learning_rate": 8.73175879396985e-06, + "loss": 0.0104, + "step": 13125 + }, + { + "epoch": 4.42, + "grad_norm": 1.1849967241287231, + "learning_rate": 8.72924623115578e-06, + "loss": 0.0088, + "step": 13150 + }, + { + "epoch": 4.42, + "grad_norm": 1.1066514253616333, + "learning_rate": 8.726733668341709e-06, + "loss": 0.0082, + "step": 13175 + }, + { + "epoch": 4.43, + "grad_norm": 1.325682520866394, + "learning_rate": 8.72422110552764e-06, + "loss": 0.01, + "step": 13200 + }, + { + "epoch": 4.44, + "grad_norm": 1.6839710474014282, + "learning_rate": 8.721708542713569e-06, + "loss": 0.0085, + "step": 13225 + }, + { + "epoch": 4.45, + "grad_norm": 1.288825511932373, + "learning_rate": 8.719195979899498e-06, + "loss": 0.0083, + "step": 13250 + }, + { + "epoch": 4.46, + "grad_norm": 1.429227352142334, + "learning_rate": 8.716683417085428e-06, + "loss": 0.0091, + "step": 13275 + }, + { + "epoch": 4.47, + "grad_norm": 0.9173932671546936, + "learning_rate": 8.714170854271357e-06, + "loss": 0.0078, + "step": 13300 + }, + { + "epoch": 4.47, + "grad_norm": 1.6002618074417114, + "learning_rate": 8.711658291457286e-06, + "loss": 0.0081, + "step": 13325 + }, + { + "epoch": 4.48, + "grad_norm": 0.8578880429267883, + "learning_rate": 8.709145728643217e-06, + "loss": 0.0084, + "step": 13350 + }, + { + "epoch": 4.49, + "grad_norm": 1.4286712408065796, + "learning_rate": 8.706633165829147e-06, + "loss": 0.0079, + "step": 13375 + }, + { + "epoch": 4.5, + "grad_norm": 1.6728731393814087, + "learning_rate": 8.704120603015076e-06, + "loss": 0.0076, + "step": 13400 + }, + { + "epoch": 4.51, + "grad_norm": 0.5604600310325623, + "learning_rate": 8.701608040201005e-06, + "loss": 0.0087, + "step": 13425 + }, + { + "epoch": 4.52, + "grad_norm": 1.2406845092773438, + "learning_rate": 8.699095477386935e-06, + "loss": 0.0102, + "step": 13450 + }, + { + "epoch": 4.52, + "grad_norm": 1.5522938966751099, + "learning_rate": 8.696582914572866e-06, + "loss": 0.01, + "step": 13475 + }, + { + "epoch": 4.53, + "grad_norm": 1.0700762271881104, + "learning_rate": 8.694070351758795e-06, + "loss": 0.0097, + "step": 13500 + }, + { + "epoch": 4.54, + "grad_norm": 1.5748265981674194, + "learning_rate": 8.691557788944724e-06, + "loss": 0.0087, + "step": 13525 + }, + { + "epoch": 4.55, + "grad_norm": 1.4244635105133057, + "learning_rate": 8.689045226130654e-06, + "loss": 0.0093, + "step": 13550 + }, + { + "epoch": 4.56, + "grad_norm": 0.9729836583137512, + "learning_rate": 8.686532663316583e-06, + "loss": 0.0091, + "step": 13575 + }, + { + "epoch": 4.57, + "grad_norm": 2.009826898574829, + "learning_rate": 8.684020100502514e-06, + "loss": 0.0094, + "step": 13600 + }, + { + "epoch": 4.58, + "grad_norm": 1.3668720722198486, + "learning_rate": 8.681507537688443e-06, + "loss": 0.0086, + "step": 13625 + }, + { + "epoch": 4.58, + "grad_norm": 1.144881010055542, + "learning_rate": 8.678994974874373e-06, + "loss": 0.0091, + "step": 13650 + }, + { + "epoch": 4.59, + "grad_norm": 2.2346184253692627, + "learning_rate": 8.676482412060302e-06, + "loss": 0.0089, + "step": 13675 + }, + { + "epoch": 4.6, + "grad_norm": 2.122943162918091, + "learning_rate": 8.673969849246231e-06, + "loss": 0.0087, + "step": 13700 + }, + { + "epoch": 4.61, + "grad_norm": 1.4628700017929077, + "learning_rate": 8.67145728643216e-06, + "loss": 0.008, + "step": 13725 + }, + { + "epoch": 4.62, + "grad_norm": 1.320510983467102, + "learning_rate": 8.668944723618092e-06, + "loss": 0.009, + "step": 13750 + }, + { + "epoch": 4.63, + "grad_norm": 1.0465457439422607, + "learning_rate": 8.666432160804021e-06, + "loss": 0.0074, + "step": 13775 + }, + { + "epoch": 4.63, + "grad_norm": 0.951018750667572, + "learning_rate": 8.66391959798995e-06, + "loss": 0.0086, + "step": 13800 + }, + { + "epoch": 4.64, + "grad_norm": 1.3639943599700928, + "learning_rate": 8.661407035175881e-06, + "loss": 0.0092, + "step": 13825 + }, + { + "epoch": 4.65, + "grad_norm": 1.0459792613983154, + "learning_rate": 8.658894472361809e-06, + "loss": 0.0091, + "step": 13850 + }, + { + "epoch": 4.66, + "grad_norm": 1.1339634656906128, + "learning_rate": 8.65638190954774e-06, + "loss": 0.0091, + "step": 13875 + }, + { + "epoch": 4.67, + "grad_norm": 1.3917441368103027, + "learning_rate": 8.65386934673367e-06, + "loss": 0.0088, + "step": 13900 + }, + { + "epoch": 4.68, + "grad_norm": 1.7123651504516602, + "learning_rate": 8.651356783919599e-06, + "loss": 0.0092, + "step": 13925 + }, + { + "epoch": 4.68, + "grad_norm": 1.4685548543930054, + "learning_rate": 8.648844221105528e-06, + "loss": 0.0088, + "step": 13950 + }, + { + "epoch": 4.69, + "grad_norm": 1.6241487264633179, + "learning_rate": 8.646331658291457e-06, + "loss": 0.0089, + "step": 13975 + }, + { + "epoch": 4.7, + "grad_norm": 1.7665762901306152, + "learning_rate": 8.643819095477388e-06, + "loss": 0.0077, + "step": 14000 + }, + { + "epoch": 4.7, + "eval_loss": 0.19219698011875153, + "eval_runtime": 1136.5442, + "eval_samples_per_second": 1.24, + "eval_steps_per_second": 1.24, + "eval_wer": 19.965409892770666, + "step": 14000 + }, + { + "epoch": 4.71, + "grad_norm": 1.324368953704834, + "learning_rate": 8.641306532663318e-06, + "loss": 0.0081, + "step": 14025 + }, + { + "epoch": 4.72, + "grad_norm": 1.3064219951629639, + "learning_rate": 8.638793969849247e-06, + "loss": 0.0073, + "step": 14050 + }, + { + "epoch": 4.73, + "grad_norm": 1.2301180362701416, + "learning_rate": 8.636281407035176e-06, + "loss": 0.0093, + "step": 14075 + }, + { + "epoch": 4.73, + "grad_norm": 1.2756959199905396, + "learning_rate": 8.633768844221107e-06, + "loss": 0.01, + "step": 14100 + }, + { + "epoch": 4.74, + "grad_norm": 1.3681153059005737, + "learning_rate": 8.631256281407035e-06, + "loss": 0.0093, + "step": 14125 + }, + { + "epoch": 4.75, + "grad_norm": 1.6471937894821167, + "learning_rate": 8.628743718592966e-06, + "loss": 0.0089, + "step": 14150 + }, + { + "epoch": 4.76, + "grad_norm": 1.767603874206543, + "learning_rate": 8.626231155778895e-06, + "loss": 0.0084, + "step": 14175 + }, + { + "epoch": 4.77, + "grad_norm": 1.3735233545303345, + "learning_rate": 8.623718592964825e-06, + "loss": 0.01, + "step": 14200 + }, + { + "epoch": 4.78, + "grad_norm": 1.1155452728271484, + "learning_rate": 8.621206030150756e-06, + "loss": 0.0084, + "step": 14225 + }, + { + "epoch": 4.79, + "grad_norm": 1.4901820421218872, + "learning_rate": 8.618693467336683e-06, + "loss": 0.0078, + "step": 14250 + }, + { + "epoch": 4.79, + "grad_norm": 1.1895307302474976, + "learning_rate": 8.616180904522614e-06, + "loss": 0.008, + "step": 14275 + }, + { + "epoch": 4.8, + "grad_norm": 1.3474719524383545, + "learning_rate": 8.613668341708544e-06, + "loss": 0.0095, + "step": 14300 + }, + { + "epoch": 4.81, + "grad_norm": 0.9168118834495544, + "learning_rate": 8.611155778894473e-06, + "loss": 0.0086, + "step": 14325 + }, + { + "epoch": 4.82, + "grad_norm": 1.2263232469558716, + "learning_rate": 8.608643216080402e-06, + "loss": 0.0088, + "step": 14350 + }, + { + "epoch": 4.83, + "grad_norm": 1.534096360206604, + "learning_rate": 8.606130653266333e-06, + "loss": 0.0077, + "step": 14375 + }, + { + "epoch": 4.84, + "grad_norm": 1.1762771606445312, + "learning_rate": 8.60361809045226e-06, + "loss": 0.0086, + "step": 14400 + }, + { + "epoch": 4.84, + "grad_norm": 1.559659719467163, + "learning_rate": 8.601105527638192e-06, + "loss": 0.008, + "step": 14425 + }, + { + "epoch": 4.85, + "grad_norm": 0.8290969133377075, + "learning_rate": 8.598592964824121e-06, + "loss": 0.0082, + "step": 14450 + }, + { + "epoch": 4.86, + "grad_norm": 1.5971109867095947, + "learning_rate": 8.59608040201005e-06, + "loss": 0.0085, + "step": 14475 + }, + { + "epoch": 4.87, + "grad_norm": 1.2989814281463623, + "learning_rate": 8.593567839195981e-06, + "loss": 0.0084, + "step": 14500 + }, + { + "epoch": 4.88, + "grad_norm": 1.1127673387527466, + "learning_rate": 8.591055276381909e-06, + "loss": 0.0093, + "step": 14525 + }, + { + "epoch": 4.89, + "grad_norm": 1.1839593648910522, + "learning_rate": 8.58854271356784e-06, + "loss": 0.0083, + "step": 14550 + }, + { + "epoch": 4.89, + "grad_norm": 1.9112977981567383, + "learning_rate": 8.58603015075377e-06, + "loss": 0.0087, + "step": 14575 + }, + { + "epoch": 4.9, + "grad_norm": 1.3866177797317505, + "learning_rate": 8.583517587939699e-06, + "loss": 0.0081, + "step": 14600 + }, + { + "epoch": 4.91, + "grad_norm": 1.5093895196914673, + "learning_rate": 8.58100502512563e-06, + "loss": 0.009, + "step": 14625 + }, + { + "epoch": 4.92, + "grad_norm": 1.9570719003677368, + "learning_rate": 8.578492462311559e-06, + "loss": 0.0085, + "step": 14650 + }, + { + "epoch": 4.93, + "grad_norm": 1.9499284029006958, + "learning_rate": 8.575979899497488e-06, + "loss": 0.0093, + "step": 14675 + }, + { + "epoch": 4.94, + "grad_norm": 1.0813087224960327, + "learning_rate": 8.573467336683418e-06, + "loss": 0.008, + "step": 14700 + }, + { + "epoch": 4.94, + "grad_norm": 1.3226896524429321, + "learning_rate": 8.570954773869347e-06, + "loss": 0.0091, + "step": 14725 + }, + { + "epoch": 4.95, + "grad_norm": 0.9363839030265808, + "learning_rate": 8.568442211055276e-06, + "loss": 0.0071, + "step": 14750 + }, + { + "epoch": 4.96, + "grad_norm": 1.1874040365219116, + "learning_rate": 8.565929648241207e-06, + "loss": 0.0082, + "step": 14775 + }, + { + "epoch": 4.97, + "grad_norm": 1.3074381351470947, + "learning_rate": 8.563417085427135e-06, + "loss": 0.0089, + "step": 14800 + }, + { + "epoch": 4.98, + "grad_norm": 1.4175150394439697, + "learning_rate": 8.560904522613066e-06, + "loss": 0.0101, + "step": 14825 + }, + { + "epoch": 4.99, + "grad_norm": 1.2041213512420654, + "learning_rate": 8.558391959798995e-06, + "loss": 0.0082, + "step": 14850 + }, + { + "epoch": 4.99, + "grad_norm": 0.8847649097442627, + "learning_rate": 8.555879396984925e-06, + "loss": 0.0085, + "step": 14875 + }, + { + "epoch": 5.0, + "grad_norm": 1.6313598155975342, + "learning_rate": 8.553366834170856e-06, + "loss": 0.0085, + "step": 14900 + }, + { + "epoch": 5.01, + "grad_norm": 1.5751765966415405, + "learning_rate": 8.550854271356785e-06, + "loss": 0.0048, + "step": 14925 + }, + { + "epoch": 5.02, + "grad_norm": 1.6055039167404175, + "learning_rate": 8.548341708542714e-06, + "loss": 0.0052, + "step": 14950 + }, + { + "epoch": 5.03, + "grad_norm": 0.6424621939659119, + "learning_rate": 8.545829145728644e-06, + "loss": 0.0045, + "step": 14975 + }, + { + "epoch": 5.04, + "grad_norm": 1.156960368156433, + "learning_rate": 8.543316582914573e-06, + "loss": 0.0048, + "step": 15000 + }, + { + "epoch": 5.04, + "eval_loss": 0.19765983521938324, + "eval_runtime": 1136.5989, + "eval_samples_per_second": 1.24, + "eval_steps_per_second": 1.24, + "eval_wer": 19.674852992044276, + "step": 15000 + }, + { + "epoch": 5.05, + "grad_norm": 1.0967705249786377, + "learning_rate": 8.540804020100502e-06, + "loss": 0.0047, + "step": 15025 + }, + { + "epoch": 5.05, + "grad_norm": 0.8752351403236389, + "learning_rate": 8.538291457286433e-06, + "loss": 0.0047, + "step": 15050 + }, + { + "epoch": 5.06, + "grad_norm": 1.3648464679718018, + "learning_rate": 8.535778894472363e-06, + "loss": 0.0049, + "step": 15075 + }, + { + "epoch": 5.07, + "grad_norm": 1.3227735757827759, + "learning_rate": 8.533266331658292e-06, + "loss": 0.0049, + "step": 15100 + }, + { + "epoch": 5.08, + "grad_norm": 0.9258247017860413, + "learning_rate": 8.530753768844221e-06, + "loss": 0.0049, + "step": 15125 + }, + { + "epoch": 5.09, + "grad_norm": 1.752871036529541, + "learning_rate": 8.52824120603015e-06, + "loss": 0.0063, + "step": 15150 + }, + { + "epoch": 5.1, + "grad_norm": 0.8259284496307373, + "learning_rate": 8.525728643216082e-06, + "loss": 0.0052, + "step": 15175 + }, + { + "epoch": 5.1, + "grad_norm": 0.5580319762229919, + "learning_rate": 8.523216080402011e-06, + "loss": 0.0049, + "step": 15200 + }, + { + "epoch": 5.11, + "grad_norm": 1.1873167753219604, + "learning_rate": 8.52070351758794e-06, + "loss": 0.0054, + "step": 15225 + }, + { + "epoch": 5.12, + "grad_norm": 1.077828049659729, + "learning_rate": 8.518190954773871e-06, + "loss": 0.0056, + "step": 15250 + }, + { + "epoch": 5.13, + "grad_norm": 1.5488917827606201, + "learning_rate": 8.515678391959799e-06, + "loss": 0.0041, + "step": 15275 + }, + { + "epoch": 5.14, + "grad_norm": 0.6617769002914429, + "learning_rate": 8.51316582914573e-06, + "loss": 0.0047, + "step": 15300 + }, + { + "epoch": 5.15, + "grad_norm": 0.8626261949539185, + "learning_rate": 8.51065326633166e-06, + "loss": 0.0048, + "step": 15325 + }, + { + "epoch": 5.15, + "grad_norm": 0.6809443235397339, + "learning_rate": 8.508140703517589e-06, + "loss": 0.0058, + "step": 15350 + }, + { + "epoch": 5.16, + "grad_norm": 0.8307203054428101, + "learning_rate": 8.505628140703518e-06, + "loss": 0.0045, + "step": 15375 + }, + { + "epoch": 5.17, + "grad_norm": 0.8473632335662842, + "learning_rate": 8.503115577889447e-06, + "loss": 0.004, + "step": 15400 + }, + { + "epoch": 5.18, + "grad_norm": 1.200290322303772, + "learning_rate": 8.500603015075377e-06, + "loss": 0.0048, + "step": 15425 + }, + { + "epoch": 5.19, + "grad_norm": 0.6473767161369324, + "learning_rate": 8.498090452261308e-06, + "loss": 0.0045, + "step": 15450 + }, + { + "epoch": 5.2, + "grad_norm": 1.6799564361572266, + "learning_rate": 8.495577889447237e-06, + "loss": 0.0063, + "step": 15475 + }, + { + "epoch": 5.2, + "grad_norm": 0.8493944406509399, + "learning_rate": 8.493065326633166e-06, + "loss": 0.0058, + "step": 15500 + }, + { + "epoch": 5.21, + "grad_norm": 1.5808863639831543, + "learning_rate": 8.490552763819097e-06, + "loss": 0.0058, + "step": 15525 + }, + { + "epoch": 5.22, + "grad_norm": 1.0716179609298706, + "learning_rate": 8.488040201005025e-06, + "loss": 0.0051, + "step": 15550 + }, + { + "epoch": 5.23, + "grad_norm": 0.8646402359008789, + "learning_rate": 8.485527638190956e-06, + "loss": 0.0056, + "step": 15575 + }, + { + "epoch": 5.24, + "grad_norm": 1.3220386505126953, + "learning_rate": 8.483015075376885e-06, + "loss": 0.0056, + "step": 15600 + }, + { + "epoch": 5.25, + "grad_norm": 0.5538511276245117, + "learning_rate": 8.480502512562815e-06, + "loss": 0.0055, + "step": 15625 + }, + { + "epoch": 5.26, + "grad_norm": 1.3050535917282104, + "learning_rate": 8.477989949748744e-06, + "loss": 0.0049, + "step": 15650 + }, + { + "epoch": 5.26, + "grad_norm": 1.2690178155899048, + "learning_rate": 8.475477386934673e-06, + "loss": 0.0059, + "step": 15675 + }, + { + "epoch": 5.27, + "grad_norm": 1.3259929418563843, + "learning_rate": 8.472964824120604e-06, + "loss": 0.0069, + "step": 15700 + }, + { + "epoch": 5.28, + "grad_norm": 0.5073191523551941, + "learning_rate": 8.470452261306534e-06, + "loss": 0.0049, + "step": 15725 + }, + { + "epoch": 5.29, + "grad_norm": 0.890322208404541, + "learning_rate": 8.467939698492463e-06, + "loss": 0.0051, + "step": 15750 + }, + { + "epoch": 5.3, + "grad_norm": 0.8370198607444763, + "learning_rate": 8.465427135678392e-06, + "loss": 0.0056, + "step": 15775 + }, + { + "epoch": 5.31, + "grad_norm": 1.3678605556488037, + "learning_rate": 8.462914572864323e-06, + "loss": 0.0059, + "step": 15800 + }, + { + "epoch": 5.31, + "grad_norm": 1.4868431091308594, + "learning_rate": 8.460402010050251e-06, + "loss": 0.0055, + "step": 15825 + }, + { + "epoch": 5.32, + "grad_norm": 1.2407320737838745, + "learning_rate": 8.457889447236182e-06, + "loss": 0.0055, + "step": 15850 + }, + { + "epoch": 5.33, + "grad_norm": 1.0031400918960571, + "learning_rate": 8.455376884422111e-06, + "loss": 0.005, + "step": 15875 + }, + { + "epoch": 5.34, + "grad_norm": 0.7211542725563049, + "learning_rate": 8.45286432160804e-06, + "loss": 0.0055, + "step": 15900 + }, + { + "epoch": 5.35, + "grad_norm": 1.0149610042572021, + "learning_rate": 8.450351758793972e-06, + "loss": 0.0048, + "step": 15925 + }, + { + "epoch": 5.36, + "grad_norm": 1.0901191234588623, + "learning_rate": 8.4478391959799e-06, + "loss": 0.0054, + "step": 15950 + }, + { + "epoch": 5.36, + "grad_norm": 0.9167507886886597, + "learning_rate": 8.44532663316583e-06, + "loss": 0.0062, + "step": 15975 + }, + { + "epoch": 5.37, + "grad_norm": 1.4308139085769653, + "learning_rate": 8.44281407035176e-06, + "loss": 0.0064, + "step": 16000 + }, + { + "epoch": 5.37, + "eval_loss": 0.21626679599285126, + "eval_runtime": 1140.5043, + "eval_samples_per_second": 1.235, + "eval_steps_per_second": 1.235, + "eval_wer": 20.33206502940159, + "step": 16000 + }, + { + "epoch": 5.38, + "grad_norm": 1.031640887260437, + "learning_rate": 8.440301507537689e-06, + "loss": 0.0067, + "step": 16025 + }, + { + "epoch": 5.39, + "grad_norm": 1.782353401184082, + "learning_rate": 8.437788944723618e-06, + "loss": 0.0061, + "step": 16050 + }, + { + "epoch": 5.4, + "grad_norm": 0.8117456436157227, + "learning_rate": 8.43527638190955e-06, + "loss": 0.0057, + "step": 16075 + }, + { + "epoch": 5.41, + "grad_norm": 1.0795190334320068, + "learning_rate": 8.432763819095479e-06, + "loss": 0.0054, + "step": 16100 + }, + { + "epoch": 5.41, + "grad_norm": 1.6058735847473145, + "learning_rate": 8.430251256281408e-06, + "loss": 0.006, + "step": 16125 + }, + { + "epoch": 5.42, + "grad_norm": 1.4797030687332153, + "learning_rate": 8.427738693467337e-06, + "loss": 0.0057, + "step": 16150 + }, + { + "epoch": 5.43, + "grad_norm": 1.3480744361877441, + "learning_rate": 8.425226130653266e-06, + "loss": 0.0053, + "step": 16175 + }, + { + "epoch": 5.44, + "grad_norm": 1.2944945096969604, + "learning_rate": 8.422713567839198e-06, + "loss": 0.0061, + "step": 16200 + }, + { + "epoch": 5.45, + "grad_norm": 1.7134745121002197, + "learning_rate": 8.420201005025125e-06, + "loss": 0.0056, + "step": 16225 + }, + { + "epoch": 5.46, + "grad_norm": 1.0810943841934204, + "learning_rate": 8.417688442211056e-06, + "loss": 0.0062, + "step": 16250 + }, + { + "epoch": 5.47, + "grad_norm": 0.7196752429008484, + "learning_rate": 8.415175879396985e-06, + "loss": 0.0057, + "step": 16275 + }, + { + "epoch": 5.47, + "grad_norm": 1.319303274154663, + "learning_rate": 8.412663316582915e-06, + "loss": 0.0057, + "step": 16300 + }, + { + "epoch": 5.48, + "grad_norm": 0.8662716746330261, + "learning_rate": 8.410150753768846e-06, + "loss": 0.0063, + "step": 16325 + }, + { + "epoch": 5.49, + "grad_norm": 1.1055669784545898, + "learning_rate": 8.407638190954775e-06, + "loss": 0.0067, + "step": 16350 + }, + { + "epoch": 5.5, + "grad_norm": 0.5754470825195312, + "learning_rate": 8.405125628140704e-06, + "loss": 0.0052, + "step": 16375 + }, + { + "epoch": 5.51, + "grad_norm": 2.025892734527588, + "learning_rate": 8.402613065326634e-06, + "loss": 0.0058, + "step": 16400 + }, + { + "epoch": 5.52, + "grad_norm": 1.4449704885482788, + "learning_rate": 8.400100502512563e-06, + "loss": 0.0058, + "step": 16425 + }, + { + "epoch": 5.52, + "grad_norm": 0.9058473706245422, + "learning_rate": 8.397587939698492e-06, + "loss": 0.0051, + "step": 16450 + }, + { + "epoch": 5.53, + "grad_norm": 1.4664002656936646, + "learning_rate": 8.395075376884423e-06, + "loss": 0.0043, + "step": 16475 + }, + { + "epoch": 5.54, + "grad_norm": 1.2150840759277344, + "learning_rate": 8.392562814070351e-06, + "loss": 0.0065, + "step": 16500 + }, + { + "epoch": 5.55, + "grad_norm": 1.5144007205963135, + "learning_rate": 8.390050251256282e-06, + "loss": 0.0058, + "step": 16525 + }, + { + "epoch": 5.56, + "grad_norm": 1.0534707307815552, + "learning_rate": 8.387537688442211e-06, + "loss": 0.0056, + "step": 16550 + }, + { + "epoch": 5.57, + "grad_norm": 1.4486539363861084, + "learning_rate": 8.38502512562814e-06, + "loss": 0.0059, + "step": 16575 + }, + { + "epoch": 5.57, + "grad_norm": 0.8479118347167969, + "learning_rate": 8.382512562814072e-06, + "loss": 0.005, + "step": 16600 + }, + { + "epoch": 5.58, + "grad_norm": 1.4241670370101929, + "learning_rate": 8.380000000000001e-06, + "loss": 0.0055, + "step": 16625 + }, + { + "epoch": 5.59, + "grad_norm": 1.0508249998092651, + "learning_rate": 8.37748743718593e-06, + "loss": 0.006, + "step": 16650 + }, + { + "epoch": 5.6, + "grad_norm": 1.1433639526367188, + "learning_rate": 8.37497487437186e-06, + "loss": 0.0061, + "step": 16675 + }, + { + "epoch": 5.61, + "grad_norm": 1.5427868366241455, + "learning_rate": 8.372462311557789e-06, + "loss": 0.0055, + "step": 16700 + }, + { + "epoch": 5.62, + "grad_norm": 1.5735931396484375, + "learning_rate": 8.36994974874372e-06, + "loss": 0.0066, + "step": 16725 + }, + { + "epoch": 5.62, + "grad_norm": 1.667438268661499, + "learning_rate": 8.36743718592965e-06, + "loss": 0.0062, + "step": 16750 + }, + { + "epoch": 5.63, + "grad_norm": 1.1762360334396362, + "learning_rate": 8.364924623115579e-06, + "loss": 0.0061, + "step": 16775 + }, + { + "epoch": 5.64, + "grad_norm": 1.3685826063156128, + "learning_rate": 8.362412060301508e-06, + "loss": 0.0063, + "step": 16800 + }, + { + "epoch": 5.65, + "grad_norm": 1.0297006368637085, + "learning_rate": 8.359899497487437e-06, + "loss": 0.0067, + "step": 16825 + }, + { + "epoch": 5.66, + "grad_norm": 1.0801314115524292, + "learning_rate": 8.357386934673367e-06, + "loss": 0.0059, + "step": 16850 + }, + { + "epoch": 5.67, + "grad_norm": 1.1853493452072144, + "learning_rate": 8.354874371859298e-06, + "loss": 0.0062, + "step": 16875 + }, + { + "epoch": 5.67, + "grad_norm": 1.5508214235305786, + "learning_rate": 8.352361809045227e-06, + "loss": 0.0059, + "step": 16900 + }, + { + "epoch": 5.68, + "grad_norm": 1.4781749248504639, + "learning_rate": 8.349849246231156e-06, + "loss": 0.0062, + "step": 16925 + }, + { + "epoch": 5.69, + "grad_norm": 1.9500316381454468, + "learning_rate": 8.347336683417087e-06, + "loss": 0.0062, + "step": 16950 + }, + { + "epoch": 5.7, + "grad_norm": 1.1584771871566772, + "learning_rate": 8.344824120603015e-06, + "loss": 0.0063, + "step": 16975 + }, + { + "epoch": 5.71, + "grad_norm": 1.6125706434249878, + "learning_rate": 8.342311557788946e-06, + "loss": 0.0061, + "step": 17000 + }, + { + "epoch": 5.71, + "eval_loss": 0.20213934779167175, + "eval_runtime": 1142.7007, + "eval_samples_per_second": 1.233, + "eval_steps_per_second": 1.233, + "eval_wer": 19.315115876859217, + "step": 17000 + }, + { + "epoch": 5.72, + "grad_norm": 1.7958917617797852, + "learning_rate": 8.339798994974875e-06, + "loss": 0.0057, + "step": 17025 + }, + { + "epoch": 5.73, + "grad_norm": 1.5658222436904907, + "learning_rate": 8.337286432160805e-06, + "loss": 0.006, + "step": 17050 + }, + { + "epoch": 5.73, + "grad_norm": 1.2584779262542725, + "learning_rate": 8.334773869346734e-06, + "loss": 0.0064, + "step": 17075 + }, + { + "epoch": 5.74, + "grad_norm": 0.751603364944458, + "learning_rate": 8.332261306532663e-06, + "loss": 0.0068, + "step": 17100 + }, + { + "epoch": 5.75, + "grad_norm": 2.127077579498291, + "learning_rate": 8.329748743718593e-06, + "loss": 0.0071, + "step": 17125 + }, + { + "epoch": 5.76, + "grad_norm": 1.1249884366989136, + "learning_rate": 8.327236180904524e-06, + "loss": 0.0056, + "step": 17150 + }, + { + "epoch": 5.77, + "grad_norm": 0.49796977639198303, + "learning_rate": 8.324723618090453e-06, + "loss": 0.0052, + "step": 17175 + }, + { + "epoch": 5.78, + "grad_norm": 1.0872794389724731, + "learning_rate": 8.322211055276382e-06, + "loss": 0.0053, + "step": 17200 + }, + { + "epoch": 5.78, + "grad_norm": 1.2205846309661865, + "learning_rate": 8.319698492462313e-06, + "loss": 0.0053, + "step": 17225 + }, + { + "epoch": 5.79, + "grad_norm": 0.8647512197494507, + "learning_rate": 8.317185929648241e-06, + "loss": 0.0063, + "step": 17250 + }, + { + "epoch": 5.8, + "grad_norm": 1.3274365663528442, + "learning_rate": 8.314673366834172e-06, + "loss": 0.0067, + "step": 17275 + }, + { + "epoch": 5.81, + "grad_norm": 0.9262849688529968, + "learning_rate": 8.312160804020101e-06, + "loss": 0.0062, + "step": 17300 + }, + { + "epoch": 5.82, + "grad_norm": 1.2801549434661865, + "learning_rate": 8.30964824120603e-06, + "loss": 0.0075, + "step": 17325 + }, + { + "epoch": 5.83, + "grad_norm": 1.201541543006897, + "learning_rate": 8.307135678391962e-06, + "loss": 0.0058, + "step": 17350 + }, + { + "epoch": 5.83, + "grad_norm": 0.5877547264099121, + "learning_rate": 8.30462311557789e-06, + "loss": 0.0057, + "step": 17375 + }, + { + "epoch": 5.84, + "grad_norm": 0.8262943625450134, + "learning_rate": 8.30211055276382e-06, + "loss": 0.0056, + "step": 17400 + }, + { + "epoch": 5.85, + "grad_norm": 1.6069191694259644, + "learning_rate": 8.29959798994975e-06, + "loss": 0.0051, + "step": 17425 + }, + { + "epoch": 5.86, + "grad_norm": 0.9255255460739136, + "learning_rate": 8.297085427135679e-06, + "loss": 0.0051, + "step": 17450 + }, + { + "epoch": 5.87, + "grad_norm": 1.2819695472717285, + "learning_rate": 8.294572864321608e-06, + "loss": 0.0059, + "step": 17475 + }, + { + "epoch": 5.88, + "grad_norm": 0.8711588978767395, + "learning_rate": 8.29206030150754e-06, + "loss": 0.0052, + "step": 17500 + }, + { + "epoch": 5.88, + "grad_norm": 1.4883580207824707, + "learning_rate": 8.289547738693467e-06, + "loss": 0.0063, + "step": 17525 + }, + { + "epoch": 5.89, + "grad_norm": 1.1265593767166138, + "learning_rate": 8.287035175879398e-06, + "loss": 0.0056, + "step": 17550 + }, + { + "epoch": 5.9, + "grad_norm": 1.7278519868850708, + "learning_rate": 8.284522613065327e-06, + "loss": 0.0068, + "step": 17575 + }, + { + "epoch": 5.91, + "grad_norm": 1.4937666654586792, + "learning_rate": 8.282010050251257e-06, + "loss": 0.0058, + "step": 17600 + }, + { + "epoch": 5.92, + "grad_norm": 1.1914496421813965, + "learning_rate": 8.279497487437188e-06, + "loss": 0.0053, + "step": 17625 + }, + { + "epoch": 5.93, + "grad_norm": 1.0224494934082031, + "learning_rate": 8.276984924623115e-06, + "loss": 0.0065, + "step": 17650 + }, + { + "epoch": 5.94, + "grad_norm": 1.2841668128967285, + "learning_rate": 8.274472361809046e-06, + "loss": 0.0064, + "step": 17675 + }, + { + "epoch": 5.94, + "grad_norm": 0.7552938461303711, + "learning_rate": 8.271959798994976e-06, + "loss": 0.0061, + "step": 17700 + }, + { + "epoch": 5.95, + "grad_norm": 1.3661631345748901, + "learning_rate": 8.269447236180905e-06, + "loss": 0.0052, + "step": 17725 + }, + { + "epoch": 5.96, + "grad_norm": 1.42498779296875, + "learning_rate": 8.266934673366834e-06, + "loss": 0.0058, + "step": 17750 + }, + { + "epoch": 5.97, + "grad_norm": 1.2563202381134033, + "learning_rate": 8.264422110552765e-06, + "loss": 0.0071, + "step": 17775 + }, + { + "epoch": 5.98, + "grad_norm": 1.2082328796386719, + "learning_rate": 8.261909547738695e-06, + "loss": 0.0057, + "step": 17800 + }, + { + "epoch": 5.99, + "grad_norm": 0.7292001247406006, + "learning_rate": 8.259396984924624e-06, + "loss": 0.0057, + "step": 17825 + }, + { + "epoch": 5.99, + "grad_norm": 1.3593088388442993, + "learning_rate": 8.256884422110553e-06, + "loss": 0.0053, + "step": 17850 + }, + { + "epoch": 6.0, + "grad_norm": 0.8307278752326965, + "learning_rate": 8.254371859296482e-06, + "loss": 0.0054, + "step": 17875 + }, + { + "epoch": 6.01, + "grad_norm": 1.1477514505386353, + "learning_rate": 8.251859296482414e-06, + "loss": 0.0041, + "step": 17900 + }, + { + "epoch": 6.02, + "grad_norm": 0.5901256203651428, + "learning_rate": 8.249346733668341e-06, + "loss": 0.0036, + "step": 17925 + }, + { + "epoch": 6.03, + "grad_norm": 0.5596385598182678, + "learning_rate": 8.246834170854272e-06, + "loss": 0.0038, + "step": 17950 + }, + { + "epoch": 6.04, + "grad_norm": 0.6528720259666443, + "learning_rate": 8.244321608040201e-06, + "loss": 0.0034, + "step": 17975 + }, + { + "epoch": 6.04, + "grad_norm": 0.5483542084693909, + "learning_rate": 8.24180904522613e-06, + "loss": 0.0035, + "step": 18000 + }, + { + "epoch": 6.04, + "eval_loss": 0.2091396450996399, + "eval_runtime": 1132.7562, + "eval_samples_per_second": 1.244, + "eval_steps_per_second": 1.244, + "eval_wer": 19.494984434451748, + "step": 18000 + }, + { + "epoch": 6.05, + "grad_norm": 0.9501282572746277, + "learning_rate": 8.239296482412062e-06, + "loss": 0.0035, + "step": 18025 + }, + { + "epoch": 6.06, + "grad_norm": 1.168597936630249, + "learning_rate": 8.236884422110553e-06, + "loss": 0.0045, + "step": 18050 + }, + { + "epoch": 6.07, + "grad_norm": 1.1545159816741943, + "learning_rate": 8.234371859296483e-06, + "loss": 0.0034, + "step": 18075 + }, + { + "epoch": 6.08, + "grad_norm": 1.315651297569275, + "learning_rate": 8.231859296482414e-06, + "loss": 0.0042, + "step": 18100 + }, + { + "epoch": 6.09, + "grad_norm": 0.5124503970146179, + "learning_rate": 8.229346733668341e-06, + "loss": 0.0037, + "step": 18125 + }, + { + "epoch": 6.09, + "grad_norm": 0.3464219570159912, + "learning_rate": 8.226834170854272e-06, + "loss": 0.0032, + "step": 18150 + }, + { + "epoch": 6.1, + "grad_norm": 1.3346083164215088, + "learning_rate": 8.224321608040202e-06, + "loss": 0.0039, + "step": 18175 + }, + { + "epoch": 6.11, + "grad_norm": 1.0005745887756348, + "learning_rate": 8.221809045226131e-06, + "loss": 0.0035, + "step": 18200 + }, + { + "epoch": 6.12, + "grad_norm": 1.2348576784133911, + "learning_rate": 8.219296482412062e-06, + "loss": 0.0041, + "step": 18225 + }, + { + "epoch": 6.13, + "grad_norm": 0.9986111521720886, + "learning_rate": 8.21678391959799e-06, + "loss": 0.0036, + "step": 18250 + }, + { + "epoch": 6.14, + "grad_norm": 2.7926268577575684, + "learning_rate": 8.21427135678392e-06, + "loss": 0.0039, + "step": 18275 + }, + { + "epoch": 6.15, + "grad_norm": 1.0568386316299438, + "learning_rate": 8.21175879396985e-06, + "loss": 0.004, + "step": 18300 + }, + { + "epoch": 6.15, + "grad_norm": 1.2773184776306152, + "learning_rate": 8.20924623115578e-06, + "loss": 0.004, + "step": 18325 + }, + { + "epoch": 6.16, + "grad_norm": 0.5578204989433289, + "learning_rate": 8.206733668341709e-06, + "loss": 0.0037, + "step": 18350 + }, + { + "epoch": 6.17, + "grad_norm": 0.9124101996421814, + "learning_rate": 8.20422110552764e-06, + "loss": 0.0036, + "step": 18375 + }, + { + "epoch": 6.18, + "grad_norm": 0.7174603343009949, + "learning_rate": 8.201708542713569e-06, + "loss": 0.0044, + "step": 18400 + }, + { + "epoch": 6.19, + "grad_norm": 1.390791654586792, + "learning_rate": 8.199195979899498e-06, + "loss": 0.0039, + "step": 18425 + }, + { + "epoch": 6.2, + "grad_norm": 1.0443471670150757, + "learning_rate": 8.196683417085428e-06, + "loss": 0.0037, + "step": 18450 + }, + { + "epoch": 6.2, + "grad_norm": 1.0924633741378784, + "learning_rate": 8.194170854271357e-06, + "loss": 0.004, + "step": 18475 + }, + { + "epoch": 6.21, + "grad_norm": 2.3563449382781982, + "learning_rate": 8.191658291457288e-06, + "loss": 0.0041, + "step": 18500 + }, + { + "epoch": 6.22, + "grad_norm": 0.9040802717208862, + "learning_rate": 8.189145728643216e-06, + "loss": 0.004, + "step": 18525 + }, + { + "epoch": 6.23, + "grad_norm": 1.0325512886047363, + "learning_rate": 8.186633165829147e-06, + "loss": 0.0042, + "step": 18550 + }, + { + "epoch": 6.24, + "grad_norm": 0.7814849615097046, + "learning_rate": 8.184120603015076e-06, + "loss": 0.0033, + "step": 18575 + }, + { + "epoch": 6.25, + "grad_norm": 1.7087482213974, + "learning_rate": 8.181608040201005e-06, + "loss": 0.0043, + "step": 18600 + }, + { + "epoch": 6.25, + "grad_norm": 0.509449303150177, + "learning_rate": 8.179095477386936e-06, + "loss": 0.0047, + "step": 18625 + }, + { + "epoch": 6.26, + "grad_norm": 1.178519368171692, + "learning_rate": 8.176582914572866e-06, + "loss": 0.0039, + "step": 18650 + }, + { + "epoch": 6.27, + "grad_norm": 0.8573196530342102, + "learning_rate": 8.174070351758795e-06, + "loss": 0.0039, + "step": 18675 + }, + { + "epoch": 6.28, + "grad_norm": 0.6404018998146057, + "learning_rate": 8.171557788944724e-06, + "loss": 0.0038, + "step": 18700 + }, + { + "epoch": 6.29, + "grad_norm": 0.6280046105384827, + "learning_rate": 8.169045226130654e-06, + "loss": 0.0042, + "step": 18725 + }, + { + "epoch": 6.3, + "grad_norm": 1.5814555883407593, + "learning_rate": 8.166532663316583e-06, + "loss": 0.0039, + "step": 18750 + }, + { + "epoch": 6.3, + "grad_norm": 1.5362895727157593, + "learning_rate": 8.164020100502514e-06, + "loss": 0.0041, + "step": 18775 + }, + { + "epoch": 6.31, + "grad_norm": 1.0262995958328247, + "learning_rate": 8.161507537688443e-06, + "loss": 0.0044, + "step": 18800 + }, + { + "epoch": 6.32, + "grad_norm": 0.5640472769737244, + "learning_rate": 8.158994974874373e-06, + "loss": 0.0037, + "step": 18825 + }, + { + "epoch": 6.33, + "grad_norm": 1.0055278539657593, + "learning_rate": 8.156482412060302e-06, + "loss": 0.0051, + "step": 18850 + }, + { + "epoch": 6.34, + "grad_norm": 1.6860655546188354, + "learning_rate": 8.153969849246231e-06, + "loss": 0.0046, + "step": 18875 + }, + { + "epoch": 6.35, + "grad_norm": 1.573367714881897, + "learning_rate": 8.151457286432162e-06, + "loss": 0.0049, + "step": 18900 + }, + { + "epoch": 6.35, + "grad_norm": 0.8385373950004578, + "learning_rate": 8.148944723618092e-06, + "loss": 0.0045, + "step": 18925 + }, + { + "epoch": 6.36, + "grad_norm": 1.2663919925689697, + "learning_rate": 8.146532663316583e-06, + "loss": 0.0048, + "step": 18950 + }, + { + "epoch": 6.37, + "grad_norm": 1.1477230787277222, + "learning_rate": 8.144020100502512e-06, + "loss": 0.0051, + "step": 18975 + }, + { + "epoch": 6.38, + "grad_norm": 0.7359607815742493, + "learning_rate": 8.141507537688443e-06, + "loss": 0.0039, + "step": 19000 + }, + { + "epoch": 6.38, + "eval_loss": 0.21638032793998718, + "eval_runtime": 1142.4333, + "eval_samples_per_second": 1.233, + "eval_steps_per_second": 1.233, + "eval_wer": 19.785541335178138, + "step": 19000 + }, + { + "epoch": 6.39, + "grad_norm": 1.0653220415115356, + "learning_rate": 8.138994974874373e-06, + "loss": 0.0044, + "step": 19025 + }, + { + "epoch": 6.4, + "grad_norm": 1.3658396005630493, + "learning_rate": 8.136482412060302e-06, + "loss": 0.0051, + "step": 19050 + }, + { + "epoch": 6.41, + "grad_norm": 1.113297462463379, + "learning_rate": 8.133969849246231e-06, + "loss": 0.0045, + "step": 19075 + }, + { + "epoch": 6.41, + "grad_norm": 0.9167206883430481, + "learning_rate": 8.131457286432162e-06, + "loss": 0.0037, + "step": 19100 + }, + { + "epoch": 6.42, + "grad_norm": 0.4488808214664459, + "learning_rate": 8.12894472361809e-06, + "loss": 0.0038, + "step": 19125 + }, + { + "epoch": 6.43, + "grad_norm": 1.3861228227615356, + "learning_rate": 8.126432160804021e-06, + "loss": 0.0048, + "step": 19150 + }, + { + "epoch": 6.44, + "grad_norm": 1.306933045387268, + "learning_rate": 8.12391959798995e-06, + "loss": 0.0047, + "step": 19175 + }, + { + "epoch": 6.45, + "grad_norm": 0.8613525629043579, + "learning_rate": 8.12140703517588e-06, + "loss": 0.0045, + "step": 19200 + }, + { + "epoch": 6.46, + "grad_norm": 0.9457516074180603, + "learning_rate": 8.11889447236181e-06, + "loss": 0.0053, + "step": 19225 + }, + { + "epoch": 6.46, + "grad_norm": 0.7847390174865723, + "learning_rate": 8.11638190954774e-06, + "loss": 0.0048, + "step": 19250 + }, + { + "epoch": 6.47, + "grad_norm": 1.1853164434432983, + "learning_rate": 8.11386934673367e-06, + "loss": 0.0038, + "step": 19275 + }, + { + "epoch": 6.48, + "grad_norm": 0.613201379776001, + "learning_rate": 8.111356783919599e-06, + "loss": 0.0042, + "step": 19300 + }, + { + "epoch": 6.49, + "grad_norm": 1.7954857349395752, + "learning_rate": 8.108844221105528e-06, + "loss": 0.0053, + "step": 19325 + }, + { + "epoch": 6.5, + "grad_norm": 0.9282506704330444, + "learning_rate": 8.106331658291457e-06, + "loss": 0.0047, + "step": 19350 + }, + { + "epoch": 6.51, + "grad_norm": 2.2625465393066406, + "learning_rate": 8.103819095477388e-06, + "loss": 0.0047, + "step": 19375 + }, + { + "epoch": 6.51, + "grad_norm": 0.9260226488113403, + "learning_rate": 8.101306532663318e-06, + "loss": 0.0048, + "step": 19400 + }, + { + "epoch": 6.52, + "grad_norm": 0.865955114364624, + "learning_rate": 8.098793969849247e-06, + "loss": 0.0049, + "step": 19425 + }, + { + "epoch": 6.53, + "grad_norm": 1.231589674949646, + "learning_rate": 8.096281407035176e-06, + "loss": 0.0045, + "step": 19450 + }, + { + "epoch": 6.54, + "grad_norm": 0.9643070697784424, + "learning_rate": 8.093768844221106e-06, + "loss": 0.0043, + "step": 19475 + }, + { + "epoch": 6.55, + "grad_norm": 1.302992343902588, + "learning_rate": 8.091256281407037e-06, + "loss": 0.0035, + "step": 19500 + }, + { + "epoch": 6.56, + "grad_norm": 1.5927190780639648, + "learning_rate": 8.088743718592966e-06, + "loss": 0.0058, + "step": 19525 + }, + { + "epoch": 6.56, + "grad_norm": 1.38823401927948, + "learning_rate": 8.086231155778895e-06, + "loss": 0.0042, + "step": 19550 + }, + { + "epoch": 6.57, + "grad_norm": 0.7761903405189514, + "learning_rate": 8.083718592964825e-06, + "loss": 0.0051, + "step": 19575 + }, + { + "epoch": 6.58, + "grad_norm": 1.2647398710250854, + "learning_rate": 8.081206030150754e-06, + "loss": 0.0047, + "step": 19600 + }, + { + "epoch": 6.59, + "grad_norm": 1.5912060737609863, + "learning_rate": 8.078693467336685e-06, + "loss": 0.0046, + "step": 19625 + }, + { + "epoch": 6.6, + "grad_norm": 1.0181852579116821, + "learning_rate": 8.076180904522614e-06, + "loss": 0.0047, + "step": 19650 + }, + { + "epoch": 6.61, + "grad_norm": 1.081905722618103, + "learning_rate": 8.073668341708544e-06, + "loss": 0.0052, + "step": 19675 + }, + { + "epoch": 6.62, + "grad_norm": 0.9470379948616028, + "learning_rate": 8.071155778894473e-06, + "loss": 0.0039, + "step": 19700 + }, + { + "epoch": 6.62, + "grad_norm": 0.9424958229064941, + "learning_rate": 8.068643216080402e-06, + "loss": 0.0043, + "step": 19725 + }, + { + "epoch": 6.63, + "grad_norm": 0.7546243071556091, + "learning_rate": 8.066130653266332e-06, + "loss": 0.004, + "step": 19750 + }, + { + "epoch": 6.64, + "grad_norm": 2.176284074783325, + "learning_rate": 8.063618090452263e-06, + "loss": 0.0046, + "step": 19775 + }, + { + "epoch": 6.65, + "grad_norm": 1.416082501411438, + "learning_rate": 8.061105527638192e-06, + "loss": 0.004, + "step": 19800 + }, + { + "epoch": 6.66, + "grad_norm": 1.1266497373580933, + "learning_rate": 8.058592964824121e-06, + "loss": 0.0048, + "step": 19825 + }, + { + "epoch": 6.67, + "grad_norm": 0.4912588596343994, + "learning_rate": 8.05608040201005e-06, + "loss": 0.005, + "step": 19850 + }, + { + "epoch": 6.67, + "grad_norm": 1.6279491186141968, + "learning_rate": 8.05356783919598e-06, + "loss": 0.0057, + "step": 19875 + }, + { + "epoch": 6.68, + "grad_norm": 0.48726755380630493, + "learning_rate": 8.051055276381911e-06, + "loss": 0.0043, + "step": 19900 + }, + { + "epoch": 6.69, + "grad_norm": 1.0227928161621094, + "learning_rate": 8.04854271356784e-06, + "loss": 0.0047, + "step": 19925 + }, + { + "epoch": 6.7, + "grad_norm": 1.0988606214523315, + "learning_rate": 8.04603015075377e-06, + "loss": 0.0055, + "step": 19950 + }, + { + "epoch": 6.71, + "grad_norm": 0.4092867374420166, + "learning_rate": 8.043517587939699e-06, + "loss": 0.005, + "step": 19975 + }, + { + "epoch": 6.72, + "grad_norm": 1.853768229484558, + "learning_rate": 8.041005025125628e-06, + "loss": 0.0052, + "step": 20000 + }, + { + "epoch": 6.72, + "eval_loss": 0.21625065803527832, + "eval_runtime": 1134.2162, + "eval_samples_per_second": 1.242, + "eval_steps_per_second": 1.242, + "eval_wer": 19.833967485299205, + "step": 20000 + }, + { + "epoch": 6.72, + "grad_norm": 1.8680448532104492, + "learning_rate": 8.03849246231156e-06, + "loss": 0.004, + "step": 20025 + }, + { + "epoch": 6.73, + "grad_norm": 1.3317962884902954, + "learning_rate": 8.035979899497489e-06, + "loss": 0.0046, + "step": 20050 + }, + { + "epoch": 6.74, + "grad_norm": 0.4756714403629303, + "learning_rate": 8.033467336683418e-06, + "loss": 0.004, + "step": 20075 + }, + { + "epoch": 6.75, + "grad_norm": 0.921876847743988, + "learning_rate": 8.030954773869347e-06, + "loss": 0.0044, + "step": 20100 + }, + { + "epoch": 6.76, + "grad_norm": 1.0129247903823853, + "learning_rate": 8.028442211055277e-06, + "loss": 0.0041, + "step": 20125 + }, + { + "epoch": 6.77, + "grad_norm": 1.4895037412643433, + "learning_rate": 8.025929648241206e-06, + "loss": 0.0039, + "step": 20150 + }, + { + "epoch": 6.77, + "grad_norm": 0.7363907098770142, + "learning_rate": 8.023417085427137e-06, + "loss": 0.0042, + "step": 20175 + }, + { + "epoch": 6.78, + "grad_norm": 0.7118470072746277, + "learning_rate": 8.020904522613066e-06, + "loss": 0.0037, + "step": 20200 + }, + { + "epoch": 6.79, + "grad_norm": 1.0945580005645752, + "learning_rate": 8.018391959798996e-06, + "loss": 0.0048, + "step": 20225 + }, + { + "epoch": 6.8, + "grad_norm": 1.0533359050750732, + "learning_rate": 8.015879396984927e-06, + "loss": 0.0052, + "step": 20250 + }, + { + "epoch": 6.81, + "grad_norm": 1.2242259979248047, + "learning_rate": 8.013366834170854e-06, + "loss": 0.0042, + "step": 20275 + }, + { + "epoch": 6.82, + "grad_norm": 1.247016429901123, + "learning_rate": 8.010854271356785e-06, + "loss": 0.0044, + "step": 20300 + }, + { + "epoch": 6.83, + "grad_norm": 0.8392201066017151, + "learning_rate": 8.008341708542714e-06, + "loss": 0.0045, + "step": 20325 + }, + { + "epoch": 6.83, + "grad_norm": 1.8123444318771362, + "learning_rate": 8.005829145728644e-06, + "loss": 0.0045, + "step": 20350 + }, + { + "epoch": 6.84, + "grad_norm": 0.5449398159980774, + "learning_rate": 8.003316582914573e-06, + "loss": 0.0045, + "step": 20375 + }, + { + "epoch": 6.85, + "grad_norm": 1.2952990531921387, + "learning_rate": 8.000804020100502e-06, + "loss": 0.0051, + "step": 20400 + }, + { + "epoch": 6.86, + "grad_norm": 1.5595630407333374, + "learning_rate": 7.998291457286432e-06, + "loss": 0.0045, + "step": 20425 + }, + { + "epoch": 6.87, + "grad_norm": 1.369549036026001, + "learning_rate": 7.995778894472363e-06, + "loss": 0.0046, + "step": 20450 + }, + { + "epoch": 6.88, + "grad_norm": 1.4814302921295166, + "learning_rate": 7.993266331658292e-06, + "loss": 0.005, + "step": 20475 + }, + { + "epoch": 6.88, + "grad_norm": 1.102768063545227, + "learning_rate": 7.990753768844221e-06, + "loss": 0.0044, + "step": 20500 + }, + { + "epoch": 6.89, + "grad_norm": 1.531099796295166, + "learning_rate": 7.988241206030152e-06, + "loss": 0.0049, + "step": 20525 + }, + { + "epoch": 6.9, + "grad_norm": 1.2672648429870605, + "learning_rate": 7.98572864321608e-06, + "loss": 0.0047, + "step": 20550 + }, + { + "epoch": 6.91, + "grad_norm": 1.3831889629364014, + "learning_rate": 7.983216080402011e-06, + "loss": 0.0052, + "step": 20575 + }, + { + "epoch": 6.92, + "grad_norm": 1.4892693758010864, + "learning_rate": 7.98070351758794e-06, + "loss": 0.0057, + "step": 20600 + }, + { + "epoch": 6.93, + "grad_norm": 0.8420122265815735, + "learning_rate": 7.97819095477387e-06, + "loss": 0.0043, + "step": 20625 + }, + { + "epoch": 6.93, + "grad_norm": 1.1232908964157104, + "learning_rate": 7.975678391959799e-06, + "loss": 0.0047, + "step": 20650 + }, + { + "epoch": 6.94, + "grad_norm": 0.5324325561523438, + "learning_rate": 7.973165829145728e-06, + "loss": 0.0036, + "step": 20675 + }, + { + "epoch": 6.95, + "grad_norm": 1.3700491189956665, + "learning_rate": 7.97065326633166e-06, + "loss": 0.0045, + "step": 20700 + }, + { + "epoch": 6.96, + "grad_norm": 0.8309997320175171, + "learning_rate": 7.968140703517589e-06, + "loss": 0.0037, + "step": 20725 + }, + { + "epoch": 6.97, + "grad_norm": 0.881514310836792, + "learning_rate": 7.965628140703518e-06, + "loss": 0.0049, + "step": 20750 + }, + { + "epoch": 6.98, + "grad_norm": 1.1853270530700684, + "learning_rate": 7.963115577889447e-06, + "loss": 0.0044, + "step": 20775 + }, + { + "epoch": 6.98, + "grad_norm": 1.07291579246521, + "learning_rate": 7.960603015075378e-06, + "loss": 0.0052, + "step": 20800 + }, + { + "epoch": 6.99, + "grad_norm": 1.352036714553833, + "learning_rate": 7.958090452261306e-06, + "loss": 0.0041, + "step": 20825 + }, + { + "epoch": 7.0, + "grad_norm": 0.9473835229873657, + "learning_rate": 7.955577889447237e-06, + "loss": 0.0049, + "step": 20850 + }, + { + "epoch": 7.01, + "grad_norm": 0.9891611933708191, + "learning_rate": 7.953065326633166e-06, + "loss": 0.0034, + "step": 20875 + }, + { + "epoch": 7.02, + "grad_norm": 1.1521122455596924, + "learning_rate": 7.950552763819096e-06, + "loss": 0.0033, + "step": 20900 + }, + { + "epoch": 7.03, + "grad_norm": 0.8888075947761536, + "learning_rate": 7.948040201005027e-06, + "loss": 0.0035, + "step": 20925 + }, + { + "epoch": 7.03, + "grad_norm": 0.5583088397979736, + "learning_rate": 7.945527638190954e-06, + "loss": 0.0025, + "step": 20950 + }, + { + "epoch": 7.04, + "grad_norm": 0.5203286409378052, + "learning_rate": 7.943015075376885e-06, + "loss": 0.0027, + "step": 20975 + }, + { + "epoch": 7.05, + "grad_norm": 0.8802359700202942, + "learning_rate": 7.940502512562815e-06, + "loss": 0.0032, + "step": 21000 + }, + { + "epoch": 7.05, + "eval_loss": 0.22171403467655182, + "eval_runtime": 1133.5447, + "eval_samples_per_second": 1.243, + "eval_steps_per_second": 1.243, + "eval_wer": 19.239017640954685, + "step": 21000 + }, + { + "epoch": 7.06, + "grad_norm": 0.5751068592071533, + "learning_rate": 7.937989949748744e-06, + "loss": 0.0022, + "step": 21025 + }, + { + "epoch": 7.07, + "grad_norm": 0.5913766026496887, + "learning_rate": 7.935477386934673e-06, + "loss": 0.0028, + "step": 21050 + }, + { + "epoch": 7.08, + "grad_norm": 0.48097094893455505, + "learning_rate": 7.932964824120604e-06, + "loss": 0.0025, + "step": 21075 + }, + { + "epoch": 7.09, + "grad_norm": 1.1281042098999023, + "learning_rate": 7.930452261306534e-06, + "loss": 0.0027, + "step": 21100 + }, + { + "epoch": 7.09, + "grad_norm": 0.7849037647247314, + "learning_rate": 7.927939698492463e-06, + "loss": 0.0027, + "step": 21125 + }, + { + "epoch": 7.1, + "grad_norm": 1.0072838068008423, + "learning_rate": 7.925427135678392e-06, + "loss": 0.0031, + "step": 21150 + }, + { + "epoch": 7.11, + "grad_norm": 1.0067269802093506, + "learning_rate": 7.922914572864322e-06, + "loss": 0.0026, + "step": 21175 + }, + { + "epoch": 7.12, + "grad_norm": 1.1733664274215698, + "learning_rate": 7.920402010050253e-06, + "loss": 0.0028, + "step": 21200 + }, + { + "epoch": 7.13, + "grad_norm": 1.2997864484786987, + "learning_rate": 7.91788944723618e-06, + "loss": 0.0032, + "step": 21225 + }, + { + "epoch": 7.14, + "grad_norm": 0.45230573415756226, + "learning_rate": 7.915376884422111e-06, + "loss": 0.0032, + "step": 21250 + }, + { + "epoch": 7.14, + "grad_norm": 0.6211918592453003, + "learning_rate": 7.91286432160804e-06, + "loss": 0.0034, + "step": 21275 + }, + { + "epoch": 7.15, + "grad_norm": 0.7041113376617432, + "learning_rate": 7.91035175879397e-06, + "loss": 0.003, + "step": 21300 + }, + { + "epoch": 7.16, + "grad_norm": 1.014319896697998, + "learning_rate": 7.907839195979901e-06, + "loss": 0.0027, + "step": 21325 + }, + { + "epoch": 7.17, + "grad_norm": 1.203870177268982, + "learning_rate": 7.90532663316583e-06, + "loss": 0.0029, + "step": 21350 + }, + { + "epoch": 7.18, + "grad_norm": 0.7178922295570374, + "learning_rate": 7.90281407035176e-06, + "loss": 0.0035, + "step": 21375 + }, + { + "epoch": 7.19, + "grad_norm": 1.5415914058685303, + "learning_rate": 7.900301507537689e-06, + "loss": 0.0034, + "step": 21400 + }, + { + "epoch": 7.19, + "grad_norm": 1.7796553373336792, + "learning_rate": 7.897788944723618e-06, + "loss": 0.0037, + "step": 21425 + }, + { + "epoch": 7.2, + "grad_norm": 0.5549774765968323, + "learning_rate": 7.895276381909548e-06, + "loss": 0.0031, + "step": 21450 + }, + { + "epoch": 7.21, + "grad_norm": 0.8039197325706482, + "learning_rate": 7.892763819095479e-06, + "loss": 0.0034, + "step": 21475 + }, + { + "epoch": 7.22, + "grad_norm": 1.164984107017517, + "learning_rate": 7.890251256281408e-06, + "loss": 0.0032, + "step": 21500 + }, + { + "epoch": 7.23, + "grad_norm": 1.2070322036743164, + "learning_rate": 7.887738693467337e-06, + "loss": 0.0027, + "step": 21525 + }, + { + "epoch": 7.24, + "grad_norm": 0.9367673397064209, + "learning_rate": 7.885226130653267e-06, + "loss": 0.0033, + "step": 21550 + }, + { + "epoch": 7.24, + "grad_norm": NaN, + "learning_rate": 7.88281407035176e-06, + "loss": 0.0035, + "step": 21575 + }, + { + "epoch": 7.25, + "grad_norm": 2.351975440979004, + "learning_rate": 7.880301507537689e-06, + "loss": 0.0042, + "step": 21600 + }, + { + "epoch": 7.26, + "grad_norm": 0.4294515550136566, + "learning_rate": 7.877788944723618e-06, + "loss": 0.003, + "step": 21625 + }, + { + "epoch": 7.27, + "grad_norm": 0.7105314135551453, + "learning_rate": 7.875276381909548e-06, + "loss": 0.0031, + "step": 21650 + }, + { + "epoch": 7.28, + "grad_norm": 1.2415978908538818, + "learning_rate": 7.872763819095479e-06, + "loss": 0.0031, + "step": 21675 + }, + { + "epoch": 7.29, + "grad_norm": 2.5769107341766357, + "learning_rate": 7.870251256281408e-06, + "loss": 0.0044, + "step": 21700 + }, + { + "epoch": 7.3, + "grad_norm": 1.028355360031128, + "learning_rate": 7.867738693467337e-06, + "loss": 0.0031, + "step": 21725 + }, + { + "epoch": 7.3, + "grad_norm": 1.0116665363311768, + "learning_rate": 7.865226130653267e-06, + "loss": 0.0035, + "step": 21750 + }, + { + "epoch": 7.31, + "grad_norm": 0.8269513249397278, + "learning_rate": 7.862713567839196e-06, + "loss": 0.0035, + "step": 21775 + }, + { + "epoch": 7.32, + "grad_norm": 0.7678688168525696, + "learning_rate": 7.860201005025127e-06, + "loss": 0.0029, + "step": 21800 + }, + { + "epoch": 7.33, + "grad_norm": 0.9065297842025757, + "learning_rate": 7.857688442211055e-06, + "loss": 0.0027, + "step": 21825 + }, + { + "epoch": 7.34, + "grad_norm": 0.7604646682739258, + "learning_rate": 7.855175879396986e-06, + "loss": 0.0041, + "step": 21850 + }, + { + "epoch": 7.35, + "grad_norm": 1.2727622985839844, + "learning_rate": 7.852663316582915e-06, + "loss": 0.0031, + "step": 21875 + }, + { + "epoch": 7.35, + "grad_norm": 1.452862024307251, + "learning_rate": 7.850150753768844e-06, + "loss": 0.0036, + "step": 21900 + }, + { + "epoch": 7.36, + "grad_norm": 0.771116316318512, + "learning_rate": 7.847638190954775e-06, + "loss": 0.0039, + "step": 21925 + }, + { + "epoch": 7.37, + "grad_norm": 0.8122366070747375, + "learning_rate": 7.845125628140705e-06, + "loss": 0.0038, + "step": 21950 + }, + { + "epoch": 7.38, + "grad_norm": 0.4498709738254547, + "learning_rate": 7.842613065326634e-06, + "loss": 0.0038, + "step": 21975 + }, + { + "epoch": 7.39, + "grad_norm": 1.0268696546554565, + "learning_rate": 7.840100502512563e-06, + "loss": 0.0034, + "step": 22000 + }, + { + "epoch": 7.39, + "eval_loss": 0.21882756054401398, + "eval_runtime": 1135.4398, + "eval_samples_per_second": 1.241, + "eval_steps_per_second": 1.241, + "eval_wer": 19.32895191975095, + "step": 22000 + }, + { + "epoch": 7.4, + "grad_norm": 1.3854974508285522, + "learning_rate": 7.837587939698493e-06, + "loss": 0.0038, + "step": 22025 + }, + { + "epoch": 7.4, + "grad_norm": 0.9560887813568115, + "learning_rate": 7.835075376884422e-06, + "loss": 0.0037, + "step": 22050 + }, + { + "epoch": 7.41, + "grad_norm": 0.596481442451477, + "learning_rate": 7.832562814070353e-06, + "loss": 0.0036, + "step": 22075 + }, + { + "epoch": 7.42, + "grad_norm": 0.49616700410842896, + "learning_rate": 7.830050251256282e-06, + "loss": 0.0033, + "step": 22100 + }, + { + "epoch": 7.43, + "grad_norm": 0.6972934007644653, + "learning_rate": 7.827537688442212e-06, + "loss": 0.0033, + "step": 22125 + }, + { + "epoch": 7.44, + "grad_norm": 1.401375412940979, + "learning_rate": 7.825025125628141e-06, + "loss": 0.0033, + "step": 22150 + }, + { + "epoch": 7.45, + "grad_norm": 1.8831095695495605, + "learning_rate": 7.82251256281407e-06, + "loss": 0.0046, + "step": 22175 + }, + { + "epoch": 7.45, + "grad_norm": 1.2625112533569336, + "learning_rate": 7.820000000000001e-06, + "loss": 0.004, + "step": 22200 + }, + { + "epoch": 7.46, + "grad_norm": 0.45980942249298096, + "learning_rate": 7.81748743718593e-06, + "loss": 0.0035, + "step": 22225 + }, + { + "epoch": 7.47, + "grad_norm": 1.117140531539917, + "learning_rate": 7.81497487437186e-06, + "loss": 0.004, + "step": 22250 + }, + { + "epoch": 7.48, + "grad_norm": 0.7067911624908447, + "learning_rate": 7.81246231155779e-06, + "loss": 0.0034, + "step": 22275 + }, + { + "epoch": 7.49, + "grad_norm": 1.3817204236984253, + "learning_rate": 7.809949748743719e-06, + "loss": 0.0036, + "step": 22300 + }, + { + "epoch": 7.5, + "grad_norm": 0.8797362446784973, + "learning_rate": 7.80743718592965e-06, + "loss": 0.003, + "step": 22325 + }, + { + "epoch": 7.51, + "grad_norm": 0.4352262318134308, + "learning_rate": 7.804924623115579e-06, + "loss": 0.0035, + "step": 22350 + }, + { + "epoch": 7.51, + "grad_norm": 1.1777235269546509, + "learning_rate": 7.802412060301508e-06, + "loss": 0.004, + "step": 22375 + }, + { + "epoch": 7.52, + "grad_norm": 0.6568429470062256, + "learning_rate": 7.799899497487438e-06, + "loss": 0.0037, + "step": 22400 + }, + { + "epoch": 7.53, + "grad_norm": 0.7451076507568359, + "learning_rate": 7.797386934673367e-06, + "loss": 0.0043, + "step": 22425 + }, + { + "epoch": 7.54, + "grad_norm": 0.7880164384841919, + "learning_rate": 7.794874371859296e-06, + "loss": 0.004, + "step": 22450 + }, + { + "epoch": 7.55, + "grad_norm": 0.5575277209281921, + "learning_rate": 7.792361809045227e-06, + "loss": 0.0031, + "step": 22475 + }, + { + "epoch": 7.56, + "grad_norm": 1.4756628274917603, + "learning_rate": 7.789849246231157e-06, + "loss": 0.0041, + "step": 22500 + }, + { + "epoch": 7.56, + "grad_norm": 1.7986561059951782, + "learning_rate": 7.787336683417086e-06, + "loss": 0.0037, + "step": 22525 + }, + { + "epoch": 7.57, + "grad_norm": 0.5861666798591614, + "learning_rate": 7.784824120603017e-06, + "loss": 0.0035, + "step": 22550 + }, + { + "epoch": 7.58, + "grad_norm": 1.2560958862304688, + "learning_rate": 7.782311557788945e-06, + "loss": 0.004, + "step": 22575 + }, + { + "epoch": 7.59, + "grad_norm": 0.37111377716064453, + "learning_rate": 7.779798994974876e-06, + "loss": 0.0038, + "step": 22600 + }, + { + "epoch": 7.6, + "grad_norm": 0.5457039475440979, + "learning_rate": 7.777286432160805e-06, + "loss": 0.0031, + "step": 22625 + }, + { + "epoch": 7.61, + "grad_norm": 1.3897887468338013, + "learning_rate": 7.774773869346734e-06, + "loss": 0.0044, + "step": 22650 + }, + { + "epoch": 7.61, + "grad_norm": 1.063726544380188, + "learning_rate": 7.772261306532664e-06, + "loss": 0.0034, + "step": 22675 + }, + { + "epoch": 7.62, + "grad_norm": 1.112552285194397, + "learning_rate": 7.769748743718593e-06, + "loss": 0.0039, + "step": 22700 + }, + { + "epoch": 7.63, + "grad_norm": 0.9013391733169556, + "learning_rate": 7.767236180904522e-06, + "loss": 0.0033, + "step": 22725 + }, + { + "epoch": 7.64, + "grad_norm": 1.4491323232650757, + "learning_rate": 7.764723618090453e-06, + "loss": 0.003, + "step": 22750 + }, + { + "epoch": 7.65, + "grad_norm": 0.7179682850837708, + "learning_rate": 7.762211055276383e-06, + "loss": 0.0037, + "step": 22775 + }, + { + "epoch": 7.66, + "grad_norm": 1.2890406847000122, + "learning_rate": 7.759698492462312e-06, + "loss": 0.0036, + "step": 22800 + }, + { + "epoch": 7.66, + "grad_norm": 2.0040292739868164, + "learning_rate": 7.757185929648243e-06, + "loss": 0.004, + "step": 22825 + }, + { + "epoch": 7.67, + "grad_norm": 1.3585739135742188, + "learning_rate": 7.75467336683417e-06, + "loss": 0.0038, + "step": 22850 + }, + { + "epoch": 7.68, + "grad_norm": 0.6756640076637268, + "learning_rate": 7.752160804020102e-06, + "loss": 0.0037, + "step": 22875 + }, + { + "epoch": 7.69, + "grad_norm": 1.2041339874267578, + "learning_rate": 7.749648241206031e-06, + "loss": 0.0047, + "step": 22900 + }, + { + "epoch": 7.7, + "grad_norm": 0.9716305136680603, + "learning_rate": 7.74713567839196e-06, + "loss": 0.0041, + "step": 22925 + }, + { + "epoch": 7.71, + "grad_norm": 1.3930187225341797, + "learning_rate": 7.744623115577891e-06, + "loss": 0.0039, + "step": 22950 + }, + { + "epoch": 7.71, + "grad_norm": 0.8271166682243347, + "learning_rate": 7.742110552763819e-06, + "loss": 0.0039, + "step": 22975 + }, + { + "epoch": 7.72, + "grad_norm": 1.1029390096664429, + "learning_rate": 7.73959798994975e-06, + "loss": 0.0042, + "step": 23000 + }, + { + "epoch": 7.72, + "eval_loss": 0.22809052467346191, + "eval_runtime": 1134.821, + "eval_samples_per_second": 1.242, + "eval_steps_per_second": 1.242, + "eval_wer": 19.01072293324109, + "step": 23000 + }, + { + "epoch": 7.73, + "grad_norm": 1.685168743133545, + "learning_rate": 7.73708542713568e-06, + "loss": 0.0038, + "step": 23025 + }, + { + "epoch": 7.74, + "grad_norm": 0.8908345103263855, + "learning_rate": 7.734572864321609e-06, + "loss": 0.0038, + "step": 23050 + }, + { + "epoch": 7.75, + "grad_norm": 1.481060266494751, + "learning_rate": 7.732060301507538e-06, + "loss": 0.0041, + "step": 23075 + }, + { + "epoch": 7.76, + "grad_norm": 0.7862600088119507, + "learning_rate": 7.729547738693469e-06, + "loss": 0.0036, + "step": 23100 + }, + { + "epoch": 7.77, + "grad_norm": 0.6775239109992981, + "learning_rate": 7.727035175879396e-06, + "loss": 0.0036, + "step": 23125 + }, + { + "epoch": 7.77, + "grad_norm": 1.1475903987884521, + "learning_rate": 7.724522613065328e-06, + "loss": 0.0041, + "step": 23150 + }, + { + "epoch": 7.78, + "grad_norm": 1.0898088216781616, + "learning_rate": 7.722010050251257e-06, + "loss": 0.0029, + "step": 23175 + }, + { + "epoch": 7.79, + "grad_norm": 1.3829331398010254, + "learning_rate": 7.719497487437186e-06, + "loss": 0.0045, + "step": 23200 + }, + { + "epoch": 7.8, + "grad_norm": 0.7977848649024963, + "learning_rate": 7.716984924623117e-06, + "loss": 0.0034, + "step": 23225 + }, + { + "epoch": 7.81, + "grad_norm": 1.0220732688903809, + "learning_rate": 7.714472361809045e-06, + "loss": 0.0043, + "step": 23250 + }, + { + "epoch": 7.82, + "grad_norm": 1.0392690896987915, + "learning_rate": 7.711959798994976e-06, + "loss": 0.0036, + "step": 23275 + }, + { + "epoch": 7.82, + "grad_norm": 0.6904581189155579, + "learning_rate": 7.709447236180905e-06, + "loss": 0.004, + "step": 23300 + }, + { + "epoch": 7.83, + "grad_norm": 1.6081840991973877, + "learning_rate": 7.706934673366834e-06, + "loss": 0.0033, + "step": 23325 + }, + { + "epoch": 7.84, + "grad_norm": 0.27222031354904175, + "learning_rate": 7.704422110552764e-06, + "loss": 0.0045, + "step": 23350 + }, + { + "epoch": 7.85, + "grad_norm": 1.5829079151153564, + "learning_rate": 7.701909547738695e-06, + "loss": 0.0042, + "step": 23375 + }, + { + "epoch": 7.86, + "grad_norm": 1.0002962350845337, + "learning_rate": 7.699396984924624e-06, + "loss": 0.0046, + "step": 23400 + }, + { + "epoch": 7.87, + "grad_norm": 1.2926607131958008, + "learning_rate": 7.696884422110553e-06, + "loss": 0.004, + "step": 23425 + }, + { + "epoch": 7.87, + "grad_norm": 1.2218358516693115, + "learning_rate": 7.694371859296483e-06, + "loss": 0.0035, + "step": 23450 + }, + { + "epoch": 7.88, + "grad_norm": 0.8419932723045349, + "learning_rate": 7.691859296482412e-06, + "loss": 0.0029, + "step": 23475 + }, + { + "epoch": 7.89, + "grad_norm": 0.2974546253681183, + "learning_rate": 7.689346733668343e-06, + "loss": 0.0042, + "step": 23500 + }, + { + "epoch": 7.9, + "grad_norm": 1.0124133825302124, + "learning_rate": 7.68683417085427e-06, + "loss": 0.0042, + "step": 23525 + }, + { + "epoch": 7.91, + "grad_norm": 0.9606423377990723, + "learning_rate": 7.684321608040202e-06, + "loss": 0.004, + "step": 23550 + }, + { + "epoch": 7.92, + "grad_norm": 0.3798360228538513, + "learning_rate": 7.681809045226131e-06, + "loss": 0.0043, + "step": 23575 + }, + { + "epoch": 7.92, + "grad_norm": 1.3667808771133423, + "learning_rate": 7.67929648241206e-06, + "loss": 0.0039, + "step": 23600 + }, + { + "epoch": 7.93, + "grad_norm": 0.9404433369636536, + "learning_rate": 7.676783919597991e-06, + "loss": 0.0035, + "step": 23625 + }, + { + "epoch": 7.94, + "grad_norm": 0.6253132224082947, + "learning_rate": 7.67427135678392e-06, + "loss": 0.0037, + "step": 23650 + }, + { + "epoch": 7.95, + "grad_norm": 1.069404125213623, + "learning_rate": 7.67175879396985e-06, + "loss": 0.0032, + "step": 23675 + }, + { + "epoch": 7.96, + "grad_norm": 0.47420430183410645, + "learning_rate": 7.66924623115578e-06, + "loss": 0.0029, + "step": 23700 + }, + { + "epoch": 7.97, + "grad_norm": 0.6767124533653259, + "learning_rate": 7.666733668341709e-06, + "loss": 0.0033, + "step": 23725 + }, + { + "epoch": 7.98, + "grad_norm": 1.3642899990081787, + "learning_rate": 7.664221105527638e-06, + "loss": 0.0034, + "step": 23750 + }, + { + "epoch": 7.98, + "grad_norm": 0.3773951828479767, + "learning_rate": 7.661708542713569e-06, + "loss": 0.0036, + "step": 23775 + }, + { + "epoch": 7.99, + "grad_norm": 1.2624307870864868, + "learning_rate": 7.659195979899498e-06, + "loss": 0.0046, + "step": 23800 + }, + { + "epoch": 8.0, + "grad_norm": 0.4458150267601013, + "learning_rate": 7.656683417085428e-06, + "loss": 0.0034, + "step": 23825 + }, + { + "epoch": 8.01, + "grad_norm": 1.1295353174209595, + "learning_rate": 7.654170854271357e-06, + "loss": 0.0035, + "step": 23850 + }, + { + "epoch": 8.02, + "grad_norm": 1.0554399490356445, + "learning_rate": 7.651658291457286e-06, + "loss": 0.0022, + "step": 23875 + }, + { + "epoch": 8.03, + "grad_norm": 0.7403746843338013, + "learning_rate": 7.649145728643217e-06, + "loss": 0.0024, + "step": 23900 + }, + { + "epoch": 8.03, + "grad_norm": 0.6169100999832153, + "learning_rate": 7.646633165829147e-06, + "loss": 0.0027, + "step": 23925 + }, + { + "epoch": 8.04, + "grad_norm": 0.9679386019706726, + "learning_rate": 7.644120603015076e-06, + "loss": 0.0028, + "step": 23950 + }, + { + "epoch": 8.05, + "grad_norm": 0.6035418510437012, + "learning_rate": 7.641608040201005e-06, + "loss": 0.0026, + "step": 23975 + }, + { + "epoch": 8.06, + "grad_norm": 0.6911060810089111, + "learning_rate": 7.639095477386935e-06, + "loss": 0.0028, + "step": 24000 + }, + { + "epoch": 8.06, + "eval_loss": 0.23089872300624847, + "eval_runtime": 1146.1624, + "eval_samples_per_second": 1.229, + "eval_steps_per_second": 1.229, + "eval_wer": 19.19750951227949, + "step": 24000 + }, + { + "epoch": 8.07, + "grad_norm": 0.20233117043972015, + "learning_rate": 7.636582914572866e-06, + "loss": 0.002, + "step": 24025 + }, + { + "epoch": 8.08, + "grad_norm": 1.3792070150375366, + "learning_rate": 7.634070351758795e-06, + "loss": 0.0021, + "step": 24050 + }, + { + "epoch": 8.08, + "grad_norm": 0.33055415749549866, + "learning_rate": 7.631557788944724e-06, + "loss": 0.0024, + "step": 24075 + }, + { + "epoch": 8.09, + "grad_norm": 0.12022224813699722, + "learning_rate": 7.629045226130654e-06, + "loss": 0.0027, + "step": 24100 + }, + { + "epoch": 8.1, + "grad_norm": 1.153826117515564, + "learning_rate": 7.626532663316584e-06, + "loss": 0.0021, + "step": 24125 + }, + { + "epoch": 8.11, + "grad_norm": 0.759722888469696, + "learning_rate": 7.624020100502513e-06, + "loss": 0.0021, + "step": 24150 + }, + { + "epoch": 8.12, + "grad_norm": 0.863078773021698, + "learning_rate": 7.6215075376884425e-06, + "loss": 0.0023, + "step": 24175 + }, + { + "epoch": 8.13, + "grad_norm": 1.1714695692062378, + "learning_rate": 7.618994974874373e-06, + "loss": 0.0029, + "step": 24200 + }, + { + "epoch": 8.13, + "grad_norm": 0.42314743995666504, + "learning_rate": 7.616482412060302e-06, + "loss": 0.0022, + "step": 24225 + }, + { + "epoch": 8.14, + "grad_norm": 0.3463688790798187, + "learning_rate": 7.613969849246232e-06, + "loss": 0.0037, + "step": 24250 + }, + { + "epoch": 8.15, + "grad_norm": 0.7753547430038452, + "learning_rate": 7.6114572864321615e-06, + "loss": 0.0027, + "step": 24275 + }, + { + "epoch": 8.16, + "grad_norm": 1.3231377601623535, + "learning_rate": 7.608944723618092e-06, + "loss": 0.0025, + "step": 24300 + }, + { + "epoch": 8.17, + "grad_norm": 0.6858195662498474, + "learning_rate": 7.60643216080402e-06, + "loss": 0.0027, + "step": 24325 + }, + { + "epoch": 8.18, + "grad_norm": 0.18892864882946014, + "learning_rate": 7.60391959798995e-06, + "loss": 0.0028, + "step": 24350 + }, + { + "epoch": 8.19, + "grad_norm": 0.7225956320762634, + "learning_rate": 7.60140703517588e-06, + "loss": 0.0025, + "step": 24375 + }, + { + "epoch": 8.19, + "grad_norm": 1.3571077585220337, + "learning_rate": 7.59889447236181e-06, + "loss": 0.0032, + "step": 24400 + }, + { + "epoch": 8.2, + "grad_norm": 0.44963952898979187, + "learning_rate": 7.59638190954774e-06, + "loss": 0.0029, + "step": 24425 + }, + { + "epoch": 8.21, + "grad_norm": 0.9708372950553894, + "learning_rate": 7.593869346733668e-06, + "loss": 0.0024, + "step": 24450 + }, + { + "epoch": 8.22, + "grad_norm": 0.3011151850223541, + "learning_rate": 7.591356783919599e-06, + "loss": 0.0027, + "step": 24475 + }, + { + "epoch": 8.23, + "grad_norm": 0.6526892781257629, + "learning_rate": 7.588844221105528e-06, + "loss": 0.0024, + "step": 24500 + }, + { + "epoch": 8.24, + "grad_norm": 0.7894423604011536, + "learning_rate": 7.586331658291458e-06, + "loss": 0.0026, + "step": 24525 + }, + { + "epoch": 8.24, + "grad_norm": 0.9269857406616211, + "learning_rate": 7.583819095477387e-06, + "loss": 0.0035, + "step": 24550 + }, + { + "epoch": 8.25, + "grad_norm": 0.6088006496429443, + "learning_rate": 7.5813065326633176e-06, + "loss": 0.003, + "step": 24575 + }, + { + "epoch": 8.26, + "grad_norm": 0.9723014831542969, + "learning_rate": 7.578793969849246e-06, + "loss": 0.0028, + "step": 24600 + }, + { + "epoch": 8.27, + "grad_norm": 0.7305650115013123, + "learning_rate": 7.57638190954774e-06, + "loss": 0.0028, + "step": 24625 + }, + { + "epoch": 8.28, + "grad_norm": 1.096291184425354, + "learning_rate": 7.573869346733669e-06, + "loss": 0.0033, + "step": 24650 + }, + { + "epoch": 8.29, + "grad_norm": 0.3499845862388611, + "learning_rate": 7.571356783919599e-06, + "loss": 0.0028, + "step": 24675 + }, + { + "epoch": 8.29, + "grad_norm": 0.7494030594825745, + "learning_rate": 7.568844221105528e-06, + "loss": 0.0028, + "step": 24700 + }, + { + "epoch": 8.3, + "grad_norm": 0.9328449368476868, + "learning_rate": 7.566331658291458e-06, + "loss": 0.0021, + "step": 24725 + }, + { + "epoch": 8.31, + "grad_norm": 1.1542141437530518, + "learning_rate": 7.563819095477387e-06, + "loss": 0.003, + "step": 24750 + }, + { + "epoch": 8.32, + "grad_norm": 1.0172189474105835, + "learning_rate": 7.561306532663317e-06, + "loss": 0.0026, + "step": 24775 + }, + { + "epoch": 8.33, + "grad_norm": 0.611617386341095, + "learning_rate": 7.558793969849247e-06, + "loss": 0.0028, + "step": 24800 + }, + { + "epoch": 8.34, + "grad_norm": 1.064241886138916, + "learning_rate": 7.556281407035176e-06, + "loss": 0.0027, + "step": 24825 + }, + { + "epoch": 8.34, + "grad_norm": 1.0542187690734863, + "learning_rate": 7.5537688442211066e-06, + "loss": 0.003, + "step": 24850 + }, + { + "epoch": 8.35, + "grad_norm": 1.1126364469528198, + "learning_rate": 7.551256281407036e-06, + "loss": 0.0035, + "step": 24875 + }, + { + "epoch": 8.36, + "grad_norm": 1.5171892642974854, + "learning_rate": 7.548743718592966e-06, + "loss": 0.0029, + "step": 24900 + }, + { + "epoch": 8.37, + "grad_norm": 0.8418084383010864, + "learning_rate": 7.5462311557788945e-06, + "loss": 0.0037, + "step": 24925 + }, + { + "epoch": 8.38, + "grad_norm": 1.6541335582733154, + "learning_rate": 7.543718592964825e-06, + "loss": 0.0031, + "step": 24950 + }, + { + "epoch": 8.39, + "grad_norm": 1.2167327404022217, + "learning_rate": 7.541206030150754e-06, + "loss": 0.0025, + "step": 24975 + }, + { + "epoch": 8.39, + "grad_norm": 1.5647732019424438, + "learning_rate": 7.538693467336684e-06, + "loss": 0.003, + "step": 25000 + }, + { + "epoch": 8.39, + "eval_loss": 0.23907245695590973, + "eval_runtime": 1165.1229, + "eval_samples_per_second": 1.209, + "eval_steps_per_second": 1.209, + "eval_wer": 19.80629539951574, + "step": 25000 + }, + { + "epoch": 8.4, + "grad_norm": 1.0820497274398804, + "learning_rate": 7.536180904522614e-06, + "loss": 0.0027, + "step": 25025 + }, + { + "epoch": 8.41, + "grad_norm": 1.1128334999084473, + "learning_rate": 7.533668341708543e-06, + "loss": 0.003, + "step": 25050 + }, + { + "epoch": 8.42, + "grad_norm": 0.3314770758152008, + "learning_rate": 7.531155778894473e-06, + "loss": 0.0029, + "step": 25075 + }, + { + "epoch": 8.43, + "grad_norm": 0.8979112505912781, + "learning_rate": 7.528643216080402e-06, + "loss": 0.003, + "step": 25100 + }, + { + "epoch": 8.44, + "grad_norm": 0.5131418704986572, + "learning_rate": 7.5261306532663325e-06, + "loss": 0.003, + "step": 25125 + }, + { + "epoch": 8.45, + "grad_norm": 1.7427021265029907, + "learning_rate": 7.523618090452262e-06, + "loss": 0.0025, + "step": 25150 + }, + { + "epoch": 8.45, + "grad_norm": 1.709036111831665, + "learning_rate": 7.521105527638192e-06, + "loss": 0.0029, + "step": 25175 + }, + { + "epoch": 8.46, + "grad_norm": 0.916296124458313, + "learning_rate": 7.5185929648241205e-06, + "loss": 0.0034, + "step": 25200 + }, + { + "epoch": 8.47, + "grad_norm": 2.136697769165039, + "learning_rate": 7.516080402010051e-06, + "loss": 0.0037, + "step": 25225 + }, + { + "epoch": 8.48, + "grad_norm": 0.7835884094238281, + "learning_rate": 7.513567839195981e-06, + "loss": 0.004, + "step": 25250 + }, + { + "epoch": 8.49, + "grad_norm": 0.9431770443916321, + "learning_rate": 7.51105527638191e-06, + "loss": 0.0033, + "step": 25275 + }, + { + "epoch": 8.5, + "grad_norm": 1.2670321464538574, + "learning_rate": 7.50854271356784e-06, + "loss": 0.0037, + "step": 25300 + }, + { + "epoch": 8.5, + "grad_norm": 0.8385361433029175, + "learning_rate": 7.506030150753769e-06, + "loss": 0.0031, + "step": 25325 + }, + { + "epoch": 8.51, + "grad_norm": 1.2313780784606934, + "learning_rate": 7.503517587939699e-06, + "loss": 0.004, + "step": 25350 + }, + { + "epoch": 8.52, + "grad_norm": 0.9829389452934265, + "learning_rate": 7.501005025125628e-06, + "loss": 0.0037, + "step": 25375 + }, + { + "epoch": 8.53, + "grad_norm": 0.8213529586791992, + "learning_rate": 7.4984924623115585e-06, + "loss": 0.0028, + "step": 25400 + }, + { + "epoch": 8.54, + "grad_norm": 0.927603542804718, + "learning_rate": 7.495979899497488e-06, + "loss": 0.0032, + "step": 25425 + }, + { + "epoch": 8.55, + "grad_norm": 1.1416429281234741, + "learning_rate": 7.493467336683418e-06, + "loss": 0.0023, + "step": 25450 + }, + { + "epoch": 8.55, + "grad_norm": 1.8299716711044312, + "learning_rate": 7.490954773869348e-06, + "loss": 0.0027, + "step": 25475 + }, + { + "epoch": 8.56, + "grad_norm": 1.3916014432907104, + "learning_rate": 7.488442211055277e-06, + "loss": 0.0025, + "step": 25500 + }, + { + "epoch": 8.57, + "grad_norm": 1.0202393531799316, + "learning_rate": 7.485929648241207e-06, + "loss": 0.0027, + "step": 25525 + }, + { + "epoch": 8.58, + "grad_norm": 1.3210216760635376, + "learning_rate": 7.483417085427136e-06, + "loss": 0.0027, + "step": 25550 + }, + { + "epoch": 8.59, + "grad_norm": 1.6474629640579224, + "learning_rate": 7.480904522613066e-06, + "loss": 0.0038, + "step": 25575 + }, + { + "epoch": 8.6, + "grad_norm": 1.4217314720153809, + "learning_rate": 7.478391959798995e-06, + "loss": 0.0029, + "step": 25600 + }, + { + "epoch": 8.6, + "grad_norm": 1.4871481657028198, + "learning_rate": 7.475879396984925e-06, + "loss": 0.0035, + "step": 25625 + }, + { + "epoch": 8.61, + "grad_norm": 1.0962460041046143, + "learning_rate": 7.473366834170855e-06, + "loss": 0.0045, + "step": 25650 + }, + { + "epoch": 8.62, + "grad_norm": 0.5831522941589355, + "learning_rate": 7.470854271356784e-06, + "loss": 0.0032, + "step": 25675 + }, + { + "epoch": 8.63, + "grad_norm": 0.5289164185523987, + "learning_rate": 7.4683417085427146e-06, + "loss": 0.0032, + "step": 25700 + }, + { + "epoch": 8.64, + "grad_norm": 1.8914228677749634, + "learning_rate": 7.465829145728644e-06, + "loss": 0.0028, + "step": 25725 + }, + { + "epoch": 8.65, + "grad_norm": 0.8113170862197876, + "learning_rate": 7.463316582914574e-06, + "loss": 0.0033, + "step": 25750 + }, + { + "epoch": 8.66, + "grad_norm": 1.257564663887024, + "learning_rate": 7.4608040201005025e-06, + "loss": 0.0036, + "step": 25775 + }, + { + "epoch": 8.66, + "grad_norm": 0.9220947623252869, + "learning_rate": 7.458291457286433e-06, + "loss": 0.0033, + "step": 25800 + }, + { + "epoch": 8.67, + "grad_norm": 0.9442367553710938, + "learning_rate": 7.455778894472362e-06, + "loss": 0.0035, + "step": 25825 + }, + { + "epoch": 8.68, + "grad_norm": 1.0244179964065552, + "learning_rate": 7.453266331658292e-06, + "loss": 0.0033, + "step": 25850 + }, + { + "epoch": 8.69, + "grad_norm": 1.0032771825790405, + "learning_rate": 7.450753768844222e-06, + "loss": 0.0032, + "step": 25875 + }, + { + "epoch": 8.7, + "grad_norm": 0.5517337918281555, + "learning_rate": 7.448241206030151e-06, + "loss": 0.003, + "step": 25900 + }, + { + "epoch": 8.71, + "grad_norm": 0.12826769053936005, + "learning_rate": 7.445728643216081e-06, + "loss": 0.003, + "step": 25925 + }, + { + "epoch": 8.71, + "grad_norm": 0.8813045620918274, + "learning_rate": 7.44321608040201e-06, + "loss": 0.0031, + "step": 25950 + }, + { + "epoch": 8.72, + "grad_norm": 1.0569912195205688, + "learning_rate": 7.4407035175879405e-06, + "loss": 0.0032, + "step": 25975 + }, + { + "epoch": 8.73, + "grad_norm": 0.9567935466766357, + "learning_rate": 7.43819095477387e-06, + "loss": 0.0033, + "step": 26000 + }, + { + "epoch": 8.73, + "eval_loss": 0.23041118681430817, + "eval_runtime": 1167.6763, + "eval_samples_per_second": 1.207, + "eval_steps_per_second": 1.207, + "eval_wer": 19.584918713248012, + "step": 26000 + }, + { + "epoch": 8.74, + "grad_norm": 0.8944895267486572, + "learning_rate": 7.4356783919598e-06, + "loss": 0.0028, + "step": 26025 + }, + { + "epoch": 8.75, + "grad_norm": 1.4520360231399536, + "learning_rate": 7.4331658291457285e-06, + "loss": 0.0038, + "step": 26050 + }, + { + "epoch": 8.76, + "grad_norm": 1.022255778312683, + "learning_rate": 7.430653266331659e-06, + "loss": 0.0033, + "step": 26075 + }, + { + "epoch": 8.76, + "grad_norm": 0.6342647671699524, + "learning_rate": 7.428140703517589e-06, + "loss": 0.0036, + "step": 26100 + }, + { + "epoch": 8.77, + "grad_norm": 1.061218500137329, + "learning_rate": 7.425628140703518e-06, + "loss": 0.0033, + "step": 26125 + }, + { + "epoch": 8.78, + "grad_norm": 0.2652261555194855, + "learning_rate": 7.423115577889448e-06, + "loss": 0.0034, + "step": 26150 + }, + { + "epoch": 8.79, + "grad_norm": 1.2033612728118896, + "learning_rate": 7.420603015075377e-06, + "loss": 0.0032, + "step": 26175 + }, + { + "epoch": 8.8, + "grad_norm": 0.9555964469909668, + "learning_rate": 7.418090452261307e-06, + "loss": 0.0032, + "step": 26200 + }, + { + "epoch": 8.81, + "grad_norm": 3.9597322940826416, + "learning_rate": 7.415577889447236e-06, + "loss": 0.0038, + "step": 26225 + }, + { + "epoch": 8.81, + "grad_norm": 1.7834392786026, + "learning_rate": 7.4130653266331665e-06, + "loss": 0.0031, + "step": 26250 + }, + { + "epoch": 8.82, + "grad_norm": 1.0024807453155518, + "learning_rate": 7.410552763819097e-06, + "loss": 0.0038, + "step": 26275 + }, + { + "epoch": 8.83, + "grad_norm": 0.9197081923484802, + "learning_rate": 7.408040201005026e-06, + "loss": 0.0035, + "step": 26300 + }, + { + "epoch": 8.84, + "grad_norm": 0.7029849886894226, + "learning_rate": 7.405527638190956e-06, + "loss": 0.0026, + "step": 26325 + }, + { + "epoch": 8.85, + "grad_norm": 1.1914668083190918, + "learning_rate": 7.403015075376885e-06, + "loss": 0.0028, + "step": 26350 + }, + { + "epoch": 8.86, + "grad_norm": 0.6103140115737915, + "learning_rate": 7.400502512562815e-06, + "loss": 0.003, + "step": 26375 + }, + { + "epoch": 8.87, + "grad_norm": 0.5546879172325134, + "learning_rate": 7.397989949748744e-06, + "loss": 0.0028, + "step": 26400 + }, + { + "epoch": 8.87, + "grad_norm": 0.6913254261016846, + "learning_rate": 7.395477386934674e-06, + "loss": 0.003, + "step": 26425 + }, + { + "epoch": 8.88, + "grad_norm": 1.2254917621612549, + "learning_rate": 7.392964824120603e-06, + "loss": 0.0038, + "step": 26450 + }, + { + "epoch": 8.89, + "grad_norm": 1.299473524093628, + "learning_rate": 7.390452261306533e-06, + "loss": 0.0034, + "step": 26475 + }, + { + "epoch": 8.9, + "grad_norm": 0.8463153839111328, + "learning_rate": 7.387939698492463e-06, + "loss": 0.0029, + "step": 26500 + }, + { + "epoch": 8.91, + "grad_norm": 1.0341131687164307, + "learning_rate": 7.385427135678392e-06, + "loss": 0.0034, + "step": 26525 + }, + { + "epoch": 8.92, + "grad_norm": 1.3592936992645264, + "learning_rate": 7.382914572864323e-06, + "loss": 0.0037, + "step": 26550 + }, + { + "epoch": 8.92, + "grad_norm": 0.8993094563484192, + "learning_rate": 7.380402010050252e-06, + "loss": 0.0036, + "step": 26575 + }, + { + "epoch": 8.93, + "grad_norm": 0.8611413836479187, + "learning_rate": 7.377889447236182e-06, + "loss": 0.0029, + "step": 26600 + }, + { + "epoch": 8.94, + "grad_norm": 0.818349301815033, + "learning_rate": 7.3753768844221105e-06, + "loss": 0.0031, + "step": 26625 + }, + { + "epoch": 8.95, + "grad_norm": 1.6943047046661377, + "learning_rate": 7.372864321608041e-06, + "loss": 0.004, + "step": 26650 + }, + { + "epoch": 8.96, + "grad_norm": 1.330994963645935, + "learning_rate": 7.37035175879397e-06, + "loss": 0.0035, + "step": 26675 + }, + { + "epoch": 8.97, + "grad_norm": 0.9049479365348816, + "learning_rate": 7.3678391959799e-06, + "loss": 0.0037, + "step": 26700 + }, + { + "epoch": 8.97, + "grad_norm": 1.3824459314346313, + "learning_rate": 7.36532663316583e-06, + "loss": 0.003, + "step": 26725 + }, + { + "epoch": 8.98, + "grad_norm": 0.9364242553710938, + "learning_rate": 7.362814070351759e-06, + "loss": 0.0033, + "step": 26750 + }, + { + "epoch": 8.99, + "grad_norm": 1.133497714996338, + "learning_rate": 7.360301507537689e-06, + "loss": 0.0028, + "step": 26775 + }, + { + "epoch": 9.0, + "grad_norm": 0.5863677263259888, + "learning_rate": 7.357788944723618e-06, + "loss": 0.0025, + "step": 26800 + }, + { + "epoch": 9.01, + "grad_norm": 0.5361488461494446, + "learning_rate": 7.3552763819095485e-06, + "loss": 0.0025, + "step": 26825 + }, + { + "epoch": 9.02, + "grad_norm": 0.3354671895503998, + "learning_rate": 7.352763819095478e-06, + "loss": 0.0018, + "step": 26850 + }, + { + "epoch": 9.02, + "grad_norm": 1.1034018993377686, + "learning_rate": 7.350251256281408e-06, + "loss": 0.0027, + "step": 26875 + }, + { + "epoch": 9.03, + "grad_norm": 0.5471905469894409, + "learning_rate": 7.347738693467338e-06, + "loss": 0.0016, + "step": 26900 + }, + { + "epoch": 9.04, + "grad_norm": 1.0119125843048096, + "learning_rate": 7.345226130653267e-06, + "loss": 0.0016, + "step": 26925 + }, + { + "epoch": 9.05, + "grad_norm": 0.4191970229148865, + "learning_rate": 7.342713567839197e-06, + "loss": 0.0021, + "step": 26950 + }, + { + "epoch": 9.06, + "grad_norm": 0.5145137906074524, + "learning_rate": 7.340201005025126e-06, + "loss": 0.0015, + "step": 26975 + }, + { + "epoch": 9.07, + "grad_norm": 0.41932475566864014, + "learning_rate": 7.337688442211056e-06, + "loss": 0.0014, + "step": 27000 + }, + { + "epoch": 9.07, + "eval_loss": 0.2344958633184433, + "eval_runtime": 1169.2193, + "eval_samples_per_second": 1.205, + "eval_steps_per_second": 1.205, + "eval_wer": 19.128329297820823, + "step": 27000 + }, + { + "epoch": 9.07, + "grad_norm": 1.3588162660598755, + "learning_rate": 7.335175879396985e-06, + "loss": 0.0015, + "step": 27025 + }, + { + "epoch": 9.08, + "grad_norm": 0.9418744444847107, + "learning_rate": 7.332663316582915e-06, + "loss": 0.0019, + "step": 27050 + }, + { + "epoch": 9.09, + "grad_norm": 0.9857268929481506, + "learning_rate": 7.330150753768844e-06, + "loss": 0.002, + "step": 27075 + }, + { + "epoch": 9.1, + "grad_norm": 1.0877827405929565, + "learning_rate": 7.3277386934673375e-06, + "loss": 0.0018, + "step": 27100 + }, + { + "epoch": 9.11, + "grad_norm": 0.45444586873054504, + "learning_rate": 7.325226130653267e-06, + "loss": 0.0016, + "step": 27125 + }, + { + "epoch": 9.12, + "grad_norm": 0.5933060646057129, + "learning_rate": 7.322713567839197e-06, + "loss": 0.002, + "step": 27150 + }, + { + "epoch": 9.13, + "grad_norm": 0.3800506591796875, + "learning_rate": 7.320201005025126e-06, + "loss": 0.0018, + "step": 27175 + }, + { + "epoch": 9.13, + "grad_norm": 1.0094574689865112, + "learning_rate": 7.3176884422110565e-06, + "loss": 0.0016, + "step": 27200 + }, + { + "epoch": 9.14, + "grad_norm": 0.6337624192237854, + "learning_rate": 7.315175879396985e-06, + "loss": 0.0017, + "step": 27225 + }, + { + "epoch": 9.15, + "grad_norm": 0.702015221118927, + "learning_rate": 7.312663316582915e-06, + "loss": 0.0022, + "step": 27250 + }, + { + "epoch": 9.16, + "grad_norm": 0.3081364333629608, + "learning_rate": 7.3101507537688445e-06, + "loss": 0.0023, + "step": 27275 + }, + { + "epoch": 9.17, + "grad_norm": 0.9434763789176941, + "learning_rate": 7.307638190954775e-06, + "loss": 0.0022, + "step": 27300 + }, + { + "epoch": 9.18, + "grad_norm": 0.8367679119110107, + "learning_rate": 7.305125628140705e-06, + "loss": 0.0023, + "step": 27325 + }, + { + "epoch": 9.18, + "grad_norm": 1.0480945110321045, + "learning_rate": 7.302613065326633e-06, + "loss": 0.002, + "step": 27350 + }, + { + "epoch": 9.19, + "grad_norm": 0.9110416173934937, + "learning_rate": 7.3001005025125635e-06, + "loss": 0.0021, + "step": 27375 + }, + { + "epoch": 9.2, + "grad_norm": 0.4593145549297333, + "learning_rate": 7.297587939698493e-06, + "loss": 0.0022, + "step": 27400 + }, + { + "epoch": 9.21, + "grad_norm": 0.38496044278144836, + "learning_rate": 7.295075376884423e-06, + "loss": 0.0021, + "step": 27425 + }, + { + "epoch": 9.22, + "grad_norm": 0.41791558265686035, + "learning_rate": 7.292562814070352e-06, + "loss": 0.002, + "step": 27450 + }, + { + "epoch": 9.23, + "grad_norm": 1.3201723098754883, + "learning_rate": 7.2900502512562825e-06, + "loss": 0.0029, + "step": 27475 + }, + { + "epoch": 9.23, + "grad_norm": 1.3307435512542725, + "learning_rate": 7.287537688442211e-06, + "loss": 0.0025, + "step": 27500 + }, + { + "epoch": 9.24, + "grad_norm": 2.097564220428467, + "learning_rate": 7.285025125628141e-06, + "loss": 0.0033, + "step": 27525 + }, + { + "epoch": 9.25, + "grad_norm": 0.6265026330947876, + "learning_rate": 7.282512562814071e-06, + "loss": 0.0023, + "step": 27550 + }, + { + "epoch": 9.26, + "grad_norm": 0.9419044852256775, + "learning_rate": 7.280000000000001e-06, + "loss": 0.0029, + "step": 27575 + }, + { + "epoch": 9.27, + "grad_norm": 0.46975478529930115, + "learning_rate": 7.277487437185931e-06, + "loss": 0.002, + "step": 27600 + }, + { + "epoch": 9.28, + "grad_norm": 1.3989331722259521, + "learning_rate": 7.274974874371859e-06, + "loss": 0.0027, + "step": 27625 + }, + { + "epoch": 9.28, + "grad_norm": 0.8892830610275269, + "learning_rate": 7.272462311557789e-06, + "loss": 0.0027, + "step": 27650 + }, + { + "epoch": 9.29, + "grad_norm": 0.9639279246330261, + "learning_rate": 7.269949748743719e-06, + "loss": 0.0024, + "step": 27675 + }, + { + "epoch": 9.3, + "grad_norm": 0.619758665561676, + "learning_rate": 7.267437185929649e-06, + "loss": 0.0021, + "step": 27700 + }, + { + "epoch": 9.31, + "grad_norm": 0.8998656868934631, + "learning_rate": 7.264924623115579e-06, + "loss": 0.0024, + "step": 27725 + }, + { + "epoch": 9.32, + "grad_norm": 0.177900031208992, + "learning_rate": 7.262412060301508e-06, + "loss": 0.0026, + "step": 27750 + }, + { + "epoch": 9.33, + "grad_norm": 1.3987410068511963, + "learning_rate": 7.259899497487439e-06, + "loss": 0.0031, + "step": 27775 + }, + { + "epoch": 9.34, + "grad_norm": 0.6877896785736084, + "learning_rate": 7.257386934673367e-06, + "loss": 0.0025, + "step": 27800 + }, + { + "epoch": 9.34, + "grad_norm": 1.1209075450897217, + "learning_rate": 7.254874371859297e-06, + "loss": 0.0033, + "step": 27825 + }, + { + "epoch": 9.35, + "grad_norm": 0.7882634401321411, + "learning_rate": 7.2523618090452265e-06, + "loss": 0.0028, + "step": 27850 + }, + { + "epoch": 9.36, + "grad_norm": 0.8098593354225159, + "learning_rate": 7.249849246231157e-06, + "loss": 0.0023, + "step": 27875 + }, + { + "epoch": 9.37, + "grad_norm": 0.813771665096283, + "learning_rate": 7.247336683417085e-06, + "loss": 0.0021, + "step": 27900 + }, + { + "epoch": 9.38, + "grad_norm": 0.8249033093452454, + "learning_rate": 7.244824120603015e-06, + "loss": 0.0024, + "step": 27925 + }, + { + "epoch": 9.39, + "grad_norm": 0.6500429511070251, + "learning_rate": 7.2423115577889455e-06, + "loss": 0.0024, + "step": 27950 + }, + { + "epoch": 9.39, + "grad_norm": 1.303624153137207, + "learning_rate": 7.239798994974875e-06, + "loss": 0.0031, + "step": 27975 + }, + { + "epoch": 9.4, + "grad_norm": 1.118630051612854, + "learning_rate": 7.237286432160805e-06, + "loss": 0.0033, + "step": 28000 + }, + { + "epoch": 9.4, + "eval_loss": 0.2395189106464386, + "eval_runtime": 1150.7901, + "eval_samples_per_second": 1.224, + "eval_steps_per_second": 1.224, + "eval_wer": 19.46731234866828, + "step": 28000 + }, + { + "epoch": 9.41, + "grad_norm": 0.7862541675567627, + "learning_rate": 7.234773869346734e-06, + "loss": 0.0026, + "step": 28025 + }, + { + "epoch": 9.42, + "grad_norm": 0.7369349002838135, + "learning_rate": 7.2322613065326645e-06, + "loss": 0.0024, + "step": 28050 + }, + { + "epoch": 9.43, + "grad_norm": 1.5228699445724487, + "learning_rate": 7.229748743718593e-06, + "loss": 0.0024, + "step": 28075 + }, + { + "epoch": 9.44, + "grad_norm": 0.4970576763153076, + "learning_rate": 7.227236180904523e-06, + "loss": 0.0024, + "step": 28100 + }, + { + "epoch": 9.44, + "grad_norm": 0.6479400396347046, + "learning_rate": 7.2247236180904525e-06, + "loss": 0.0027, + "step": 28125 + }, + { + "epoch": 9.45, + "grad_norm": 0.8323472738265991, + "learning_rate": 7.222211055276383e-06, + "loss": 0.0025, + "step": 28150 + }, + { + "epoch": 9.46, + "grad_norm": 1.6049880981445312, + "learning_rate": 7.219698492462313e-06, + "loss": 0.0031, + "step": 28175 + }, + { + "epoch": 9.47, + "grad_norm": 0.5006821155548096, + "learning_rate": 7.217185929648241e-06, + "loss": 0.0025, + "step": 28200 + }, + { + "epoch": 9.48, + "grad_norm": 1.2726538181304932, + "learning_rate": 7.2146733668341715e-06, + "loss": 0.0027, + "step": 28225 + }, + { + "epoch": 9.49, + "grad_norm": 0.7906463146209717, + "learning_rate": 7.212160804020101e-06, + "loss": 0.0036, + "step": 28250 + }, + { + "epoch": 9.49, + "grad_norm": 0.6896291971206665, + "learning_rate": 7.209648241206031e-06, + "loss": 0.0023, + "step": 28275 + }, + { + "epoch": 9.5, + "grad_norm": 1.8330118656158447, + "learning_rate": 7.20713567839196e-06, + "loss": 0.0027, + "step": 28300 + }, + { + "epoch": 9.51, + "grad_norm": 0.35804665088653564, + "learning_rate": 7.2046231155778905e-06, + "loss": 0.002, + "step": 28325 + }, + { + "epoch": 9.52, + "grad_norm": 1.7927500009536743, + "learning_rate": 7.20211055276382e-06, + "loss": 0.0029, + "step": 28350 + }, + { + "epoch": 9.53, + "grad_norm": 1.2823092937469482, + "learning_rate": 7.199597989949749e-06, + "loss": 0.0032, + "step": 28375 + }, + { + "epoch": 9.54, + "grad_norm": 0.7372425198554993, + "learning_rate": 7.197085427135679e-06, + "loss": 0.0036, + "step": 28400 + }, + { + "epoch": 9.54, + "grad_norm": 0.8190767168998718, + "learning_rate": 7.194572864321609e-06, + "loss": 0.0026, + "step": 28425 + }, + { + "epoch": 9.55, + "grad_norm": 0.44716012477874756, + "learning_rate": 7.192060301507539e-06, + "loss": 0.0027, + "step": 28450 + }, + { + "epoch": 9.56, + "grad_norm": 1.507477045059204, + "learning_rate": 7.189547738693467e-06, + "loss": 0.003, + "step": 28475 + }, + { + "epoch": 9.57, + "grad_norm": 1.2912808656692505, + "learning_rate": 7.187035175879397e-06, + "loss": 0.0027, + "step": 28500 + }, + { + "epoch": 9.58, + "grad_norm": 0.6646180748939514, + "learning_rate": 7.184522613065327e-06, + "loss": 0.0036, + "step": 28525 + }, + { + "epoch": 9.59, + "grad_norm": 0.20453223586082458, + "learning_rate": 7.182010050251257e-06, + "loss": 0.0026, + "step": 28550 + }, + { + "epoch": 9.6, + "grad_norm": 0.4956296682357788, + "learning_rate": 7.179497487437187e-06, + "loss": 0.0029, + "step": 28575 + }, + { + "epoch": 9.6, + "grad_norm": 1.528809666633606, + "learning_rate": 7.176984924623116e-06, + "loss": 0.0031, + "step": 28600 + }, + { + "epoch": 9.61, + "grad_norm": 0.7272359132766724, + "learning_rate": 7.174472361809047e-06, + "loss": 0.0024, + "step": 28625 + }, + { + "epoch": 9.62, + "grad_norm": 1.7348214387893677, + "learning_rate": 7.171959798994975e-06, + "loss": 0.0023, + "step": 28650 + }, + { + "epoch": 9.63, + "grad_norm": 0.6340247392654419, + "learning_rate": 7.169447236180905e-06, + "loss": 0.0022, + "step": 28675 + }, + { + "epoch": 9.64, + "grad_norm": 0.7998942732810974, + "learning_rate": 7.1669346733668345e-06, + "loss": 0.0029, + "step": 28700 + }, + { + "epoch": 9.65, + "grad_norm": 0.9592726230621338, + "learning_rate": 7.164422110552765e-06, + "loss": 0.0029, + "step": 28725 + }, + { + "epoch": 9.65, + "grad_norm": 0.8831053972244263, + "learning_rate": 7.161909547738693e-06, + "loss": 0.0023, + "step": 28750 + }, + { + "epoch": 9.66, + "grad_norm": 0.35640767216682434, + "learning_rate": 7.159396984924623e-06, + "loss": 0.0028, + "step": 28775 + }, + { + "epoch": 9.67, + "grad_norm": 1.0943571329116821, + "learning_rate": 7.1568844221105535e-06, + "loss": 0.0029, + "step": 28800 + }, + { + "epoch": 9.68, + "grad_norm": 0.7068827748298645, + "learning_rate": 7.154371859296483e-06, + "loss": 0.0037, + "step": 28825 + }, + { + "epoch": 9.69, + "grad_norm": 1.2181477546691895, + "learning_rate": 7.151859296482413e-06, + "loss": 0.0032, + "step": 28850 + }, + { + "epoch": 9.7, + "grad_norm": 1.41114342212677, + "learning_rate": 7.149346733668342e-06, + "loss": 0.0026, + "step": 28875 + }, + { + "epoch": 9.7, + "grad_norm": 1.2762014865875244, + "learning_rate": 7.1468341708542725e-06, + "loss": 0.0024, + "step": 28900 + }, + { + "epoch": 9.71, + "grad_norm": 1.4293628931045532, + "learning_rate": 7.144321608040201e-06, + "loss": 0.0023, + "step": 28925 + }, + { + "epoch": 9.72, + "grad_norm": 0.6978052258491516, + "learning_rate": 7.141809045226131e-06, + "loss": 0.0024, + "step": 28950 + }, + { + "epoch": 9.73, + "grad_norm": 0.10047564655542374, + "learning_rate": 7.139296482412061e-06, + "loss": 0.0025, + "step": 28975 + }, + { + "epoch": 9.74, + "grad_norm": 0.7088422179222107, + "learning_rate": 7.136783919597991e-06, + "loss": 0.0033, + "step": 29000 + }, + { + "epoch": 9.74, + "eval_loss": 0.23278680443763733, + "eval_runtime": 1150.1634, + "eval_samples_per_second": 1.225, + "eval_steps_per_second": 1.225, + "eval_wer": 19.315115876859217, + "step": 29000 + }, + { + "epoch": 9.75, + "grad_norm": 0.6070300936698914, + "learning_rate": 7.134271356783921e-06, + "loss": 0.0025, + "step": 29025 + }, + { + "epoch": 9.75, + "grad_norm": 0.6514598727226257, + "learning_rate": 7.131758793969849e-06, + "loss": 0.0023, + "step": 29050 + }, + { + "epoch": 9.76, + "grad_norm": 0.844368577003479, + "learning_rate": 7.1292462311557795e-06, + "loss": 0.0028, + "step": 29075 + }, + { + "epoch": 9.77, + "grad_norm": 0.8267163038253784, + "learning_rate": 7.126733668341709e-06, + "loss": 0.0031, + "step": 29100 + }, + { + "epoch": 9.78, + "grad_norm": 1.5775738954544067, + "learning_rate": 7.124221105527639e-06, + "loss": 0.0029, + "step": 29125 + }, + { + "epoch": 9.79, + "grad_norm": 0.7115822434425354, + "learning_rate": 7.121708542713568e-06, + "loss": 0.0028, + "step": 29150 + }, + { + "epoch": 9.8, + "grad_norm": 1.2840808629989624, + "learning_rate": 7.1191959798994985e-06, + "loss": 0.0026, + "step": 29175 + }, + { + "epoch": 9.81, + "grad_norm": 0.7586176991462708, + "learning_rate": 7.116683417085428e-06, + "loss": 0.0029, + "step": 29200 + }, + { + "epoch": 9.81, + "grad_norm": 0.9922399520874023, + "learning_rate": 7.114170854271357e-06, + "loss": 0.0032, + "step": 29225 + }, + { + "epoch": 9.82, + "grad_norm": 1.1848280429840088, + "learning_rate": 7.111658291457287e-06, + "loss": 0.0032, + "step": 29250 + }, + { + "epoch": 9.83, + "grad_norm": 0.936791181564331, + "learning_rate": 7.109145728643217e-06, + "loss": 0.0022, + "step": 29275 + }, + { + "epoch": 9.84, + "grad_norm": 0.5760902762413025, + "learning_rate": 7.106633165829147e-06, + "loss": 0.003, + "step": 29300 + }, + { + "epoch": 9.85, + "grad_norm": 1.1719589233398438, + "learning_rate": 7.104120603015075e-06, + "loss": 0.0034, + "step": 29325 + }, + { + "epoch": 9.86, + "grad_norm": 0.34876397252082825, + "learning_rate": 7.1016080402010054e-06, + "loss": 0.0034, + "step": 29350 + }, + { + "epoch": 9.86, + "grad_norm": 0.7336764931678772, + "learning_rate": 7.099095477386935e-06, + "loss": 0.0028, + "step": 29375 + }, + { + "epoch": 9.87, + "grad_norm": 1.2421153783798218, + "learning_rate": 7.096582914572865e-06, + "loss": 0.0026, + "step": 29400 + }, + { + "epoch": 9.88, + "grad_norm": 1.903676152229309, + "learning_rate": 7.094070351758795e-06, + "loss": 0.0032, + "step": 29425 + }, + { + "epoch": 9.89, + "grad_norm": 1.26242196559906, + "learning_rate": 7.091557788944724e-06, + "loss": 0.003, + "step": 29450 + }, + { + "epoch": 9.9, + "grad_norm": 0.906496524810791, + "learning_rate": 7.089045226130654e-06, + "loss": 0.0022, + "step": 29475 + }, + { + "epoch": 9.91, + "grad_norm": 0.5974650382995605, + "learning_rate": 7.086532663316583e-06, + "loss": 0.0029, + "step": 29500 + }, + { + "epoch": 9.91, + "grad_norm": 0.4112184941768646, + "learning_rate": 7.084020100502513e-06, + "loss": 0.0024, + "step": 29525 + }, + { + "epoch": 9.92, + "grad_norm": 0.8164620995521545, + "learning_rate": 7.0815075376884426e-06, + "loss": 0.0029, + "step": 29550 + }, + { + "epoch": 9.93, + "grad_norm": 0.7692451477050781, + "learning_rate": 7.078994974874373e-06, + "loss": 0.0019, + "step": 29575 + }, + { + "epoch": 9.94, + "grad_norm": 1.1109453439712524, + "learning_rate": 7.076482412060303e-06, + "loss": 0.0026, + "step": 29600 + }, + { + "epoch": 9.95, + "grad_norm": 0.6734873652458191, + "learning_rate": 7.073969849246231e-06, + "loss": 0.0013, + "step": 29625 + }, + { + "epoch": 9.96, + "grad_norm": 1.082666277885437, + "learning_rate": 7.0714572864321615e-06, + "loss": 0.0017, + "step": 29650 + }, + { + "epoch": 9.96, + "grad_norm": 0.6312141418457031, + "learning_rate": 7.068944723618091e-06, + "loss": 0.0028, + "step": 29675 + }, + { + "epoch": 9.97, + "grad_norm": 0.511552095413208, + "learning_rate": 7.066432160804021e-06, + "loss": 0.0021, + "step": 29700 + }, + { + "epoch": 9.98, + "grad_norm": 0.6476980447769165, + "learning_rate": 7.06391959798995e-06, + "loss": 0.002, + "step": 29725 + }, + { + "epoch": 9.99, + "grad_norm": 0.5089647769927979, + "learning_rate": 7.0614070351758805e-06, + "loss": 0.0033, + "step": 29750 + }, + { + "epoch": 10.0, + "grad_norm": 1.6847554445266724, + "learning_rate": 7.058894472361809e-06, + "loss": 0.0026, + "step": 29775 + }, + { + "epoch": 10.01, + "grad_norm": 0.5910786390304565, + "learning_rate": 7.056381909547739e-06, + "loss": 0.0019, + "step": 29800 + }, + { + "epoch": 10.02, + "grad_norm": 0.40282145142555237, + "learning_rate": 7.053869346733669e-06, + "loss": 0.0015, + "step": 29825 + }, + { + "epoch": 10.02, + "grad_norm": 0.8705132007598877, + "learning_rate": 7.051356783919599e-06, + "loss": 0.0018, + "step": 29850 + }, + { + "epoch": 10.03, + "grad_norm": 0.18961696326732635, + "learning_rate": 7.048844221105529e-06, + "loss": 0.0013, + "step": 29875 + }, + { + "epoch": 10.04, + "grad_norm": 0.33756712079048157, + "learning_rate": 7.046331658291457e-06, + "loss": 0.0016, + "step": 29900 + }, + { + "epoch": 10.05, + "grad_norm": 0.9828222393989563, + "learning_rate": 7.0438190954773875e-06, + "loss": 0.0021, + "step": 29925 + }, + { + "epoch": 10.06, + "grad_norm": 0.7343199849128723, + "learning_rate": 7.041306532663317e-06, + "loss": 0.0014, + "step": 29950 + }, + { + "epoch": 10.07, + "grad_norm": 1.0286571979522705, + "learning_rate": 7.038793969849247e-06, + "loss": 0.0014, + "step": 29975 + }, + { + "epoch": 10.07, + "grad_norm": 0.518287181854248, + "learning_rate": 7.036281407035176e-06, + "loss": 0.002, + "step": 30000 + }, + { + "epoch": 10.07, + "eval_loss": 0.2479378879070282, + "eval_runtime": 1175.5033, + "eval_samples_per_second": 1.199, + "eval_steps_per_second": 1.199, + "eval_wer": 19.10065721203736, + "step": 30000 + }, + { + "epoch": 10.08, + "grad_norm": 0.4254533052444458, + "learning_rate": 7.0337688442211065e-06, + "loss": 0.0019, + "step": 30025 + }, + { + "epoch": 10.09, + "grad_norm": 1.0164378881454468, + "learning_rate": 7.031256281407036e-06, + "loss": 0.0016, + "step": 30050 + }, + { + "epoch": 10.1, + "grad_norm": 0.7982966303825378, + "learning_rate": 7.028743718592965e-06, + "loss": 0.0013, + "step": 30075 + }, + { + "epoch": 10.11, + "grad_norm": 1.5639647245407104, + "learning_rate": 7.026231155778895e-06, + "loss": 0.0014, + "step": 30100 + }, + { + "epoch": 10.12, + "grad_norm": 0.2855882942676544, + "learning_rate": 7.023718592964825e-06, + "loss": 0.0013, + "step": 30125 + }, + { + "epoch": 10.12, + "grad_norm": 1.1018420457839966, + "learning_rate": 7.021306532663317e-06, + "loss": 0.0019, + "step": 30150 + }, + { + "epoch": 10.13, + "grad_norm": 0.2073344886302948, + "learning_rate": 7.018793969849247e-06, + "loss": 0.002, + "step": 30175 + }, + { + "epoch": 10.14, + "grad_norm": 1.1245734691619873, + "learning_rate": 7.016281407035176e-06, + "loss": 0.0019, + "step": 30200 + }, + { + "epoch": 10.15, + "grad_norm": 0.694485068321228, + "learning_rate": 7.013768844221106e-06, + "loss": 0.0015, + "step": 30225 + }, + { + "epoch": 10.16, + "grad_norm": 0.035505905747413635, + "learning_rate": 7.011256281407036e-06, + "loss": 0.0018, + "step": 30250 + }, + { + "epoch": 10.17, + "grad_norm": 0.36209836602211, + "learning_rate": 7.008743718592965e-06, + "loss": 0.0013, + "step": 30275 + }, + { + "epoch": 10.17, + "grad_norm": 0.7434169054031372, + "learning_rate": 7.0062311557788955e-06, + "loss": 0.0018, + "step": 30300 + }, + { + "epoch": 10.18, + "grad_norm": 0.12397071719169617, + "learning_rate": 7.003718592964824e-06, + "loss": 0.0026, + "step": 30325 + }, + { + "epoch": 10.19, + "grad_norm": 0.4074834883213043, + "learning_rate": 7.001206030150754e-06, + "loss": 0.0021, + "step": 30350 + }, + { + "epoch": 10.2, + "grad_norm": 0.8125656247138977, + "learning_rate": 6.9986934673366834e-06, + "loss": 0.0022, + "step": 30375 + }, + { + "epoch": 10.21, + "grad_norm": 1.0662682056427002, + "learning_rate": 6.996180904522614e-06, + "loss": 0.0025, + "step": 30400 + }, + { + "epoch": 10.22, + "grad_norm": 0.9037520885467529, + "learning_rate": 6.993668341708544e-06, + "loss": 0.0026, + "step": 30425 + }, + { + "epoch": 10.22, + "grad_norm": 0.6139481663703918, + "learning_rate": 6.991155778894473e-06, + "loss": 0.0027, + "step": 30450 + }, + { + "epoch": 10.23, + "grad_norm": 1.5295249223709106, + "learning_rate": 6.988643216080403e-06, + "loss": 0.0021, + "step": 30475 + }, + { + "epoch": 10.24, + "grad_norm": 1.6004770994186401, + "learning_rate": 6.986130653266332e-06, + "loss": 0.0022, + "step": 30500 + }, + { + "epoch": 10.25, + "grad_norm": 1.2199455499649048, + "learning_rate": 6.983618090452262e-06, + "loss": 0.0018, + "step": 30525 + }, + { + "epoch": 10.26, + "grad_norm": 0.8074867129325867, + "learning_rate": 6.981105527638191e-06, + "loss": 0.0019, + "step": 30550 + }, + { + "epoch": 10.27, + "grad_norm": 1.235579252243042, + "learning_rate": 6.978592964824121e-06, + "loss": 0.0026, + "step": 30575 + }, + { + "epoch": 10.28, + "grad_norm": 1.0096278190612793, + "learning_rate": 6.976080402010051e-06, + "loss": 0.002, + "step": 30600 + }, + { + "epoch": 10.28, + "grad_norm": 0.8041825890541077, + "learning_rate": 6.97356783919598e-06, + "loss": 0.0025, + "step": 30625 + }, + { + "epoch": 10.29, + "grad_norm": 0.14406386017799377, + "learning_rate": 6.97105527638191e-06, + "loss": 0.0015, + "step": 30650 + }, + { + "epoch": 10.3, + "grad_norm": 0.8297044634819031, + "learning_rate": 6.9685427135678396e-06, + "loss": 0.0018, + "step": 30675 + }, + { + "epoch": 10.31, + "grad_norm": 0.2588374614715576, + "learning_rate": 6.96603015075377e-06, + "loss": 0.0025, + "step": 30700 + }, + { + "epoch": 10.32, + "grad_norm": 0.5624496936798096, + "learning_rate": 6.963517587939699e-06, + "loss": 0.002, + "step": 30725 + }, + { + "epoch": 10.33, + "grad_norm": 0.8605515956878662, + "learning_rate": 6.961005025125629e-06, + "loss": 0.0023, + "step": 30750 + }, + { + "epoch": 10.33, + "grad_norm": 0.9707594513893127, + "learning_rate": 6.958492462311558e-06, + "loss": 0.0021, + "step": 30775 + }, + { + "epoch": 10.34, + "grad_norm": 0.7225198149681091, + "learning_rate": 6.955979899497488e-06, + "loss": 0.0021, + "step": 30800 + }, + { + "epoch": 10.35, + "grad_norm": 0.8352168798446655, + "learning_rate": 6.953467336683417e-06, + "loss": 0.0023, + "step": 30825 + }, + { + "epoch": 10.36, + "grad_norm": 1.499873161315918, + "learning_rate": 6.950954773869347e-06, + "loss": 0.0018, + "step": 30850 + }, + { + "epoch": 10.37, + "grad_norm": 0.7369962930679321, + "learning_rate": 6.9484422110552775e-06, + "loss": 0.0022, + "step": 30875 + }, + { + "epoch": 10.38, + "grad_norm": 0.4829622507095337, + "learning_rate": 6.945929648241206e-06, + "loss": 0.0016, + "step": 30900 + }, + { + "epoch": 10.38, + "grad_norm": 1.0433244705200195, + "learning_rate": 6.943417085427136e-06, + "loss": 0.0025, + "step": 30925 + }, + { + "epoch": 10.39, + "grad_norm": 0.42215368151664734, + "learning_rate": 6.9409045226130655e-06, + "loss": 0.0022, + "step": 30950 + }, + { + "epoch": 10.4, + "grad_norm": 0.6348364949226379, + "learning_rate": 6.938391959798996e-06, + "loss": 0.0027, + "step": 30975 + }, + { + "epoch": 10.41, + "grad_norm": 1.542001485824585, + "learning_rate": 6.935879396984925e-06, + "loss": 0.0019, + "step": 31000 + }, + { + "epoch": 10.41, + "eval_loss": 0.24769435822963715, + "eval_runtime": 1168.0841, + "eval_samples_per_second": 1.206, + "eval_steps_per_second": 1.206, + "eval_wer": 18.851608439986165, + "step": 31000 + }, + { + "epoch": 10.42, + "grad_norm": 1.2974648475646973, + "learning_rate": 6.933366834170855e-06, + "loss": 0.0022, + "step": 31025 + }, + { + "epoch": 10.43, + "grad_norm": 0.3424508273601532, + "learning_rate": 6.930854271356785e-06, + "loss": 0.0018, + "step": 31050 + }, + { + "epoch": 10.43, + "grad_norm": 1.4966243505477905, + "learning_rate": 6.928341708542714e-06, + "loss": 0.0026, + "step": 31075 + }, + { + "epoch": 10.44, + "grad_norm": 0.9041695594787598, + "learning_rate": 6.925829145728644e-06, + "loss": 0.0022, + "step": 31100 + }, + { + "epoch": 10.45, + "grad_norm": 0.457237184047699, + "learning_rate": 6.923316582914573e-06, + "loss": 0.002, + "step": 31125 + }, + { + "epoch": 10.46, + "grad_norm": 0.38130852580070496, + "learning_rate": 6.9208040201005035e-06, + "loss": 0.0017, + "step": 31150 + }, + { + "epoch": 10.47, + "grad_norm": 1.135223627090454, + "learning_rate": 6.918291457286432e-06, + "loss": 0.0021, + "step": 31175 + }, + { + "epoch": 10.48, + "grad_norm": 1.015674352645874, + "learning_rate": 6.915778894472362e-06, + "loss": 0.0023, + "step": 31200 + }, + { + "epoch": 10.49, + "grad_norm": 1.0870518684387207, + "learning_rate": 6.9132663316582915e-06, + "loss": 0.0021, + "step": 31225 + }, + { + "epoch": 10.49, + "grad_norm": 1.0007672309875488, + "learning_rate": 6.910753768844222e-06, + "loss": 0.0024, + "step": 31250 + }, + { + "epoch": 10.5, + "grad_norm": 0.43743759393692017, + "learning_rate": 6.908241206030152e-06, + "loss": 0.0028, + "step": 31275 + }, + { + "epoch": 10.51, + "grad_norm": 1.5357941389083862, + "learning_rate": 6.905728643216081e-06, + "loss": 0.0021, + "step": 31300 + }, + { + "epoch": 10.52, + "grad_norm": 0.7542394995689392, + "learning_rate": 6.903216080402011e-06, + "loss": 0.0022, + "step": 31325 + }, + { + "epoch": 10.53, + "grad_norm": 0.4761495888233185, + "learning_rate": 6.90070351758794e-06, + "loss": 0.0025, + "step": 31350 + }, + { + "epoch": 10.54, + "grad_norm": 0.5561754107475281, + "learning_rate": 6.89819095477387e-06, + "loss": 0.0019, + "step": 31375 + }, + { + "epoch": 10.54, + "grad_norm": 1.1723294258117676, + "learning_rate": 6.895678391959799e-06, + "loss": 0.0023, + "step": 31400 + }, + { + "epoch": 10.55, + "grad_norm": 0.7220836281776428, + "learning_rate": 6.8931658291457294e-06, + "loss": 0.0028, + "step": 31425 + }, + { + "epoch": 10.56, + "grad_norm": 0.15899284183979034, + "learning_rate": 6.890653266331658e-06, + "loss": 0.0015, + "step": 31450 + }, + { + "epoch": 10.57, + "grad_norm": 1.0979615449905396, + "learning_rate": 6.888140703517588e-06, + "loss": 0.0018, + "step": 31475 + }, + { + "epoch": 10.58, + "grad_norm": 1.9237209558486938, + "learning_rate": 6.885628140703518e-06, + "loss": 0.0027, + "step": 31500 + }, + { + "epoch": 10.59, + "grad_norm": 1.4298456907272339, + "learning_rate": 6.8831155778894476e-06, + "loss": 0.0021, + "step": 31525 + }, + { + "epoch": 10.59, + "grad_norm": 0.69566810131073, + "learning_rate": 6.880603015075378e-06, + "loss": 0.0021, + "step": 31550 + }, + { + "epoch": 10.6, + "grad_norm": 0.6619472503662109, + "learning_rate": 6.878090452261307e-06, + "loss": 0.0024, + "step": 31575 + }, + { + "epoch": 10.61, + "grad_norm": 1.240385890007019, + "learning_rate": 6.875577889447237e-06, + "loss": 0.0016, + "step": 31600 + }, + { + "epoch": 10.62, + "grad_norm": 0.4001914858818054, + "learning_rate": 6.873065326633166e-06, + "loss": 0.002, + "step": 31625 + }, + { + "epoch": 10.63, + "grad_norm": 0.5349460244178772, + "learning_rate": 6.870552763819096e-06, + "loss": 0.0021, + "step": 31650 + }, + { + "epoch": 10.64, + "grad_norm": 1.4519171714782715, + "learning_rate": 6.868040201005026e-06, + "loss": 0.0035, + "step": 31675 + }, + { + "epoch": 10.64, + "grad_norm": 0.2717239260673523, + "learning_rate": 6.865527638190955e-06, + "loss": 0.0027, + "step": 31700 + }, + { + "epoch": 10.65, + "grad_norm": 0.4394519627094269, + "learning_rate": 6.8630150753768855e-06, + "loss": 0.0023, + "step": 31725 + }, + { + "epoch": 10.66, + "grad_norm": 1.5142409801483154, + "learning_rate": 6.860502512562814e-06, + "loss": 0.0021, + "step": 31750 + }, + { + "epoch": 10.67, + "grad_norm": 0.749886691570282, + "learning_rate": 6.857989949748744e-06, + "loss": 0.002, + "step": 31775 + }, + { + "epoch": 10.68, + "grad_norm": 0.7697947025299072, + "learning_rate": 6.8554773869346735e-06, + "loss": 0.0017, + "step": 31800 + }, + { + "epoch": 10.69, + "grad_norm": 1.3914836645126343, + "learning_rate": 6.852964824120604e-06, + "loss": 0.0024, + "step": 31825 + }, + { + "epoch": 10.7, + "grad_norm": 0.6113418340682983, + "learning_rate": 6.850452261306533e-06, + "loss": 0.0028, + "step": 31850 + }, + { + "epoch": 10.7, + "grad_norm": 0.5193037986755371, + "learning_rate": 6.847939698492463e-06, + "loss": 0.0029, + "step": 31875 + }, + { + "epoch": 10.71, + "grad_norm": 0.9664058089256287, + "learning_rate": 6.845427135678393e-06, + "loss": 0.0026, + "step": 31900 + }, + { + "epoch": 10.72, + "grad_norm": 2.086005926132202, + "learning_rate": 6.842914572864322e-06, + "loss": 0.002, + "step": 31925 + }, + { + "epoch": 10.73, + "grad_norm": 0.4560588002204895, + "learning_rate": 6.840402010050252e-06, + "loss": 0.0019, + "step": 31950 + }, + { + "epoch": 10.74, + "grad_norm": 0.22951118648052216, + "learning_rate": 6.837889447236181e-06, + "loss": 0.0026, + "step": 31975 + }, + { + "epoch": 10.75, + "grad_norm": 1.112345576286316, + "learning_rate": 6.8353768844221115e-06, + "loss": 0.0023, + "step": 32000 + }, + { + "epoch": 10.75, + "eval_loss": 0.2456785887479782, + "eval_runtime": 1167.3706, + "eval_samples_per_second": 1.207, + "eval_steps_per_second": 1.207, + "eval_wer": 19.501902455897614, + "step": 32000 + }, + { + "epoch": 10.75, + "grad_norm": 2.1152195930480957, + "learning_rate": 6.83286432160804e-06, + "loss": 0.0027, + "step": 32025 + }, + { + "epoch": 10.76, + "grad_norm": 0.8529070615768433, + "learning_rate": 6.83035175879397e-06, + "loss": 0.0022, + "step": 32050 + }, + { + "epoch": 10.77, + "grad_norm": 1.6170414686203003, + "learning_rate": 6.8278391959798995e-06, + "loss": 0.0029, + "step": 32075 + }, + { + "epoch": 10.78, + "grad_norm": 0.6770104169845581, + "learning_rate": 6.82532663316583e-06, + "loss": 0.0021, + "step": 32100 + }, + { + "epoch": 10.79, + "grad_norm": 0.9939437508583069, + "learning_rate": 6.82281407035176e-06, + "loss": 0.0028, + "step": 32125 + }, + { + "epoch": 10.8, + "grad_norm": 1.097970962524414, + "learning_rate": 6.820301507537689e-06, + "loss": 0.0026, + "step": 32150 + }, + { + "epoch": 10.8, + "grad_norm": 1.0503737926483154, + "learning_rate": 6.817788944723619e-06, + "loss": 0.0022, + "step": 32175 + }, + { + "epoch": 10.81, + "grad_norm": 0.1565825641155243, + "learning_rate": 6.815276381909548e-06, + "loss": 0.0024, + "step": 32200 + }, + { + "epoch": 10.82, + "grad_norm": 1.320650577545166, + "learning_rate": 6.812763819095478e-06, + "loss": 0.0023, + "step": 32225 + }, + { + "epoch": 10.83, + "grad_norm": 0.9462971091270447, + "learning_rate": 6.810251256281407e-06, + "loss": 0.0026, + "step": 32250 + }, + { + "epoch": 10.84, + "grad_norm": 0.6280996203422546, + "learning_rate": 6.8078391959799e-06, + "loss": 0.003, + "step": 32275 + }, + { + "epoch": 10.85, + "grad_norm": 1.0932303667068481, + "learning_rate": 6.80532663316583e-06, + "loss": 0.0026, + "step": 32300 + }, + { + "epoch": 10.85, + "grad_norm": 0.8741762042045593, + "learning_rate": 6.80281407035176e-06, + "loss": 0.0017, + "step": 32325 + }, + { + "epoch": 10.86, + "grad_norm": 0.43325525522232056, + "learning_rate": 6.8003015075376885e-06, + "loss": 0.0015, + "step": 32350 + }, + { + "epoch": 10.87, + "grad_norm": 1.437196969985962, + "learning_rate": 6.797788944723619e-06, + "loss": 0.0028, + "step": 32375 + }, + { + "epoch": 10.88, + "grad_norm": 1.0322321653366089, + "learning_rate": 6.795276381909548e-06, + "loss": 0.0016, + "step": 32400 + }, + { + "epoch": 10.89, + "grad_norm": 0.5999236106872559, + "learning_rate": 6.792763819095478e-06, + "loss": 0.002, + "step": 32425 + }, + { + "epoch": 10.9, + "grad_norm": 0.7355241775512695, + "learning_rate": 6.7902512562814074e-06, + "loss": 0.0023, + "step": 32450 + }, + { + "epoch": 10.9, + "grad_norm": 0.8690030574798584, + "learning_rate": 6.787738693467338e-06, + "loss": 0.0027, + "step": 32475 + }, + { + "epoch": 10.91, + "grad_norm": 0.8388867974281311, + "learning_rate": 6.785226130653268e-06, + "loss": 0.0021, + "step": 32500 + }, + { + "epoch": 10.92, + "grad_norm": 1.197745442390442, + "learning_rate": 6.782713567839196e-06, + "loss": 0.0023, + "step": 32525 + }, + { + "epoch": 10.93, + "grad_norm": 0.95402991771698, + "learning_rate": 6.7802010050251264e-06, + "loss": 0.0019, + "step": 32550 + }, + { + "epoch": 10.94, + "grad_norm": 0.23213282227516174, + "learning_rate": 6.777688442211056e-06, + "loss": 0.0017, + "step": 32575 + }, + { + "epoch": 10.95, + "grad_norm": 0.1814933866262436, + "learning_rate": 6.775175879396986e-06, + "loss": 0.0019, + "step": 32600 + }, + { + "epoch": 10.96, + "grad_norm": 0.3899034559726715, + "learning_rate": 6.772663316582914e-06, + "loss": 0.0023, + "step": 32625 + }, + { + "epoch": 10.96, + "grad_norm": 0.570696234703064, + "learning_rate": 6.7701507537688446e-06, + "loss": 0.0023, + "step": 32650 + }, + { + "epoch": 10.97, + "grad_norm": 0.7496944665908813, + "learning_rate": 6.767638190954774e-06, + "loss": 0.002, + "step": 32675 + }, + { + "epoch": 10.98, + "grad_norm": 0.5882190465927124, + "learning_rate": 6.765125628140704e-06, + "loss": 0.0032, + "step": 32700 + }, + { + "epoch": 10.99, + "grad_norm": 1.8866983652114868, + "learning_rate": 6.762613065326634e-06, + "loss": 0.0029, + "step": 32725 + }, + { + "epoch": 11.0, + "grad_norm": 1.1194075345993042, + "learning_rate": 6.7601005025125636e-06, + "loss": 0.0016, + "step": 32750 + }, + { + "epoch": 11.01, + "grad_norm": 0.6840092539787292, + "learning_rate": 6.757587939698494e-06, + "loss": 0.0018, + "step": 32775 + }, + { + "epoch": 11.01, + "grad_norm": 0.42216795682907104, + "learning_rate": 6.755075376884422e-06, + "loss": 0.0011, + "step": 32800 + }, + { + "epoch": 11.02, + "grad_norm": 0.22165241837501526, + "learning_rate": 6.752562814070352e-06, + "loss": 0.0013, + "step": 32825 + }, + { + "epoch": 11.03, + "grad_norm": 0.27133262157440186, + "learning_rate": 6.750050251256282e-06, + "loss": 0.0014, + "step": 32850 + }, + { + "epoch": 11.04, + "grad_norm": 0.2668532133102417, + "learning_rate": 6.747537688442212e-06, + "loss": 0.0017, + "step": 32875 + }, + { + "epoch": 11.05, + "grad_norm": 0.9007813334465027, + "learning_rate": 6.74502512562814e-06, + "loss": 0.001, + "step": 32900 + }, + { + "epoch": 11.06, + "grad_norm": 0.7606000304222107, + "learning_rate": 6.7425125628140705e-06, + "loss": 0.0012, + "step": 32925 + }, + { + "epoch": 11.06, + "grad_norm": 0.13741832971572876, + "learning_rate": 6.740000000000001e-06, + "loss": 0.0012, + "step": 32950 + }, + { + "epoch": 11.07, + "grad_norm": 0.25934314727783203, + "learning_rate": 6.73748743718593e-06, + "loss": 0.0013, + "step": 32975 + }, + { + "epoch": 11.08, + "grad_norm": 0.5373954772949219, + "learning_rate": 6.73497487437186e-06, + "loss": 0.0016, + "step": 33000 + }, + { + "epoch": 11.08, + "eval_loss": 0.24234539270401, + "eval_runtime": 1167.612, + "eval_samples_per_second": 1.207, + "eval_steps_per_second": 1.207, + "eval_wer": 18.865444482877898, + "step": 33000 + }, + { + "epoch": 11.09, + "grad_norm": 1.108511209487915, + "learning_rate": 6.7324623115577895e-06, + "loss": 0.0016, + "step": 33025 + }, + { + "epoch": 11.1, + "grad_norm": 1.9591296911239624, + "learning_rate": 6.72994974874372e-06, + "loss": 0.0018, + "step": 33050 + }, + { + "epoch": 11.11, + "grad_norm": 1.0572986602783203, + "learning_rate": 6.727437185929648e-06, + "loss": 0.0013, + "step": 33075 + }, + { + "epoch": 11.11, + "grad_norm": 0.7267471551895142, + "learning_rate": 6.724924623115578e-06, + "loss": 0.0016, + "step": 33100 + }, + { + "epoch": 11.12, + "grad_norm": 0.23198021948337555, + "learning_rate": 6.7224120603015085e-06, + "loss": 0.0012, + "step": 33125 + }, + { + "epoch": 11.13, + "grad_norm": 1.3598743677139282, + "learning_rate": 6.719899497487438e-06, + "loss": 0.0014, + "step": 33150 + }, + { + "epoch": 11.14, + "grad_norm": 0.14323030412197113, + "learning_rate": 6.717386934673368e-06, + "loss": 0.0011, + "step": 33175 + }, + { + "epoch": 11.15, + "grad_norm": 0.5138562321662903, + "learning_rate": 6.7148743718592965e-06, + "loss": 0.002, + "step": 33200 + }, + { + "epoch": 11.16, + "grad_norm": 0.836828887462616, + "learning_rate": 6.712361809045227e-06, + "loss": 0.0018, + "step": 33225 + }, + { + "epoch": 11.17, + "grad_norm": 0.7540555000305176, + "learning_rate": 6.709849246231156e-06, + "loss": 0.002, + "step": 33250 + }, + { + "epoch": 11.17, + "grad_norm": 0.9265022277832031, + "learning_rate": 6.707336683417086e-06, + "loss": 0.0018, + "step": 33275 + }, + { + "epoch": 11.18, + "grad_norm": 0.18388992547988892, + "learning_rate": 6.7048241206030155e-06, + "loss": 0.0015, + "step": 33300 + }, + { + "epoch": 11.19, + "grad_norm": 0.959537923336029, + "learning_rate": 6.702311557788946e-06, + "loss": 0.002, + "step": 33325 + }, + { + "epoch": 11.2, + "grad_norm": 0.8531920313835144, + "learning_rate": 6.699798994974876e-06, + "loss": 0.0019, + "step": 33350 + }, + { + "epoch": 11.21, + "grad_norm": 0.8115883469581604, + "learning_rate": 6.697286432160804e-06, + "loss": 0.0025, + "step": 33375 + }, + { + "epoch": 11.22, + "grad_norm": 0.8204888105392456, + "learning_rate": 6.6947738693467344e-06, + "loss": 0.0014, + "step": 33400 + }, + { + "epoch": 11.22, + "grad_norm": 0.31209179759025574, + "learning_rate": 6.692261306532664e-06, + "loss": 0.0015, + "step": 33425 + }, + { + "epoch": 11.23, + "grad_norm": 1.3184469938278198, + "learning_rate": 6.689748743718594e-06, + "loss": 0.0014, + "step": 33450 + }, + { + "epoch": 11.24, + "grad_norm": 0.8174030184745789, + "learning_rate": 6.687236180904522e-06, + "loss": 0.0021, + "step": 33475 + }, + { + "epoch": 11.25, + "grad_norm": 0.7501024603843689, + "learning_rate": 6.684723618090453e-06, + "loss": 0.002, + "step": 33500 + }, + { + "epoch": 11.26, + "grad_norm": 0.9184140563011169, + "learning_rate": 6.682211055276382e-06, + "loss": 0.0019, + "step": 33525 + }, + { + "epoch": 11.27, + "grad_norm": 0.16426929831504822, + "learning_rate": 6.679698492462312e-06, + "loss": 0.0015, + "step": 33550 + }, + { + "epoch": 11.27, + "grad_norm": 0.16186100244522095, + "learning_rate": 6.677185929648242e-06, + "loss": 0.0015, + "step": 33575 + }, + { + "epoch": 11.28, + "grad_norm": 1.0120738744735718, + "learning_rate": 6.6746733668341716e-06, + "loss": 0.0016, + "step": 33600 + }, + { + "epoch": 11.29, + "grad_norm": 0.33094656467437744, + "learning_rate": 6.672160804020102e-06, + "loss": 0.0018, + "step": 33625 + }, + { + "epoch": 11.3, + "grad_norm": 1.1612293720245361, + "learning_rate": 6.66964824120603e-06, + "loss": 0.0013, + "step": 33650 + }, + { + "epoch": 11.31, + "grad_norm": 0.45450013875961304, + "learning_rate": 6.66713567839196e-06, + "loss": 0.0012, + "step": 33675 + }, + { + "epoch": 11.32, + "grad_norm": 0.3503556251525879, + "learning_rate": 6.66462311557789e-06, + "loss": 0.0018, + "step": 33700 + }, + { + "epoch": 11.32, + "grad_norm": 0.5025960803031921, + "learning_rate": 6.66211055276382e-06, + "loss": 0.0017, + "step": 33725 + }, + { + "epoch": 11.33, + "grad_norm": 0.8199736475944519, + "learning_rate": 6.65959798994975e-06, + "loss": 0.0018, + "step": 33750 + }, + { + "epoch": 11.34, + "grad_norm": 0.49033477902412415, + "learning_rate": 6.6570854271356785e-06, + "loss": 0.0019, + "step": 33775 + }, + { + "epoch": 11.35, + "grad_norm": 1.1336920261383057, + "learning_rate": 6.654572864321609e-06, + "loss": 0.0016, + "step": 33800 + }, + { + "epoch": 11.36, + "grad_norm": 0.35600781440734863, + "learning_rate": 6.652060301507538e-06, + "loss": 0.0017, + "step": 33825 + }, + { + "epoch": 11.37, + "grad_norm": 0.5744491815567017, + "learning_rate": 6.649547738693468e-06, + "loss": 0.0024, + "step": 33850 + }, + { + "epoch": 11.38, + "grad_norm": 0.9103775024414062, + "learning_rate": 6.6470351758793975e-06, + "loss": 0.0028, + "step": 33875 + }, + { + "epoch": 11.38, + "grad_norm": 0.7461689710617065, + "learning_rate": 6.644522613065328e-06, + "loss": 0.0025, + "step": 33900 + }, + { + "epoch": 11.39, + "grad_norm": 0.8179909586906433, + "learning_rate": 6.642010050251256e-06, + "loss": 0.0022, + "step": 33925 + }, + { + "epoch": 11.4, + "grad_norm": 1.125429630279541, + "learning_rate": 6.639497487437186e-06, + "loss": 0.0022, + "step": 33950 + }, + { + "epoch": 11.41, + "grad_norm": 0.563933789730072, + "learning_rate": 6.6369849246231165e-06, + "loss": 0.0025, + "step": 33975 + }, + { + "epoch": 11.42, + "grad_norm": 0.18651027977466583, + "learning_rate": 6.634472361809046e-06, + "loss": 0.0016, + "step": 34000 + }, + { + "epoch": 11.42, + "eval_loss": 0.2484196424484253, + "eval_runtime": 1132.0504, + "eval_samples_per_second": 1.245, + "eval_steps_per_second": 1.245, + "eval_wer": 19.612590799031477, + "step": 34000 + }, + { + "epoch": 11.43, + "grad_norm": 1.0618479251861572, + "learning_rate": 6.631959798994976e-06, + "loss": 0.0019, + "step": 34025 + }, + { + "epoch": 11.43, + "grad_norm": 0.5403735041618347, + "learning_rate": 6.6294472361809045e-06, + "loss": 0.0017, + "step": 34050 + }, + { + "epoch": 11.44, + "grad_norm": 0.27478355169296265, + "learning_rate": 6.626934673366835e-06, + "loss": 0.0018, + "step": 34075 + }, + { + "epoch": 11.45, + "grad_norm": 1.1219574213027954, + "learning_rate": 6.624422110552764e-06, + "loss": 0.0017, + "step": 34100 + }, + { + "epoch": 11.46, + "grad_norm": 0.5096073150634766, + "learning_rate": 6.621909547738694e-06, + "loss": 0.0018, + "step": 34125 + }, + { + "epoch": 11.47, + "grad_norm": 0.3355829417705536, + "learning_rate": 6.6193969849246235e-06, + "loss": 0.0016, + "step": 34150 + }, + { + "epoch": 11.48, + "grad_norm": 1.3589503765106201, + "learning_rate": 6.616884422110554e-06, + "loss": 0.0022, + "step": 34175 + }, + { + "epoch": 11.48, + "grad_norm": 0.32955533266067505, + "learning_rate": 6.614371859296484e-06, + "loss": 0.0018, + "step": 34200 + }, + { + "epoch": 11.49, + "grad_norm": 0.18016181886196136, + "learning_rate": 6.611859296482412e-06, + "loss": 0.0017, + "step": 34225 + }, + { + "epoch": 11.5, + "grad_norm": 1.3165570497512817, + "learning_rate": 6.6093467336683424e-06, + "loss": 0.0018, + "step": 34250 + }, + { + "epoch": 11.51, + "grad_norm": 0.15402089059352875, + "learning_rate": 6.606834170854272e-06, + "loss": 0.0019, + "step": 34275 + }, + { + "epoch": 11.52, + "grad_norm": 0.6390677094459534, + "learning_rate": 6.604321608040202e-06, + "loss": 0.002, + "step": 34300 + }, + { + "epoch": 11.53, + "grad_norm": 0.37741905450820923, + "learning_rate": 6.60180904522613e-06, + "loss": 0.0022, + "step": 34325 + }, + { + "epoch": 11.53, + "grad_norm": 0.6571829915046692, + "learning_rate": 6.599296482412061e-06, + "loss": 0.0013, + "step": 34350 + }, + { + "epoch": 11.54, + "grad_norm": 1.0481172800064087, + "learning_rate": 6.596783919597991e-06, + "loss": 0.0021, + "step": 34375 + }, + { + "epoch": 11.55, + "grad_norm": 0.8072773218154907, + "learning_rate": 6.59427135678392e-06, + "loss": 0.0023, + "step": 34400 + }, + { + "epoch": 11.56, + "grad_norm": 0.9526398777961731, + "learning_rate": 6.59175879396985e-06, + "loss": 0.0023, + "step": 34425 + }, + { + "epoch": 11.57, + "grad_norm": 0.9781013131141663, + "learning_rate": 6.5892462311557796e-06, + "loss": 0.0019, + "step": 34450 + }, + { + "epoch": 11.58, + "grad_norm": 0.6457613706588745, + "learning_rate": 6.58673366834171e-06, + "loss": 0.0016, + "step": 34475 + }, + { + "epoch": 11.58, + "grad_norm": 1.1452596187591553, + "learning_rate": 6.584221105527638e-06, + "loss": 0.0027, + "step": 34500 + }, + { + "epoch": 11.59, + "grad_norm": 0.7925880551338196, + "learning_rate": 6.581708542713568e-06, + "loss": 0.0019, + "step": 34525 + }, + { + "epoch": 11.6, + "grad_norm": 0.6951744556427002, + "learning_rate": 6.579195979899498e-06, + "loss": 0.0019, + "step": 34550 + }, + { + "epoch": 11.61, + "grad_norm": 0.12126651406288147, + "learning_rate": 6.576683417085428e-06, + "loss": 0.0018, + "step": 34575 + }, + { + "epoch": 11.62, + "grad_norm": 0.24097226560115814, + "learning_rate": 6.574170854271358e-06, + "loss": 0.0018, + "step": 34600 + }, + { + "epoch": 11.63, + "grad_norm": 0.5861985683441162, + "learning_rate": 6.5716582914572865e-06, + "loss": 0.0015, + "step": 34625 + }, + { + "epoch": 11.64, + "grad_norm": 1.2098243236541748, + "learning_rate": 6.569145728643217e-06, + "loss": 0.0017, + "step": 34650 + }, + { + "epoch": 11.64, + "grad_norm": 0.9485503435134888, + "learning_rate": 6.566633165829146e-06, + "loss": 0.0018, + "step": 34675 + }, + { + "epoch": 11.65, + "grad_norm": 0.4863835871219635, + "learning_rate": 6.564120603015076e-06, + "loss": 0.0021, + "step": 34700 + }, + { + "epoch": 11.66, + "grad_norm": 0.6300594806671143, + "learning_rate": 6.5616080402010055e-06, + "loss": 0.002, + "step": 34725 + }, + { + "epoch": 11.67, + "grad_norm": 0.6393245458602905, + "learning_rate": 6.559095477386936e-06, + "loss": 0.0021, + "step": 34750 + }, + { + "epoch": 11.68, + "grad_norm": 1.0828789472579956, + "learning_rate": 6.556683417085428e-06, + "loss": 0.0024, + "step": 34775 + }, + { + "epoch": 11.69, + "grad_norm": 0.6080409288406372, + "learning_rate": 6.554170854271357e-06, + "loss": 0.0015, + "step": 34800 + }, + { + "epoch": 11.69, + "grad_norm": 0.4709225296974182, + "learning_rate": 6.551658291457287e-06, + "loss": 0.0017, + "step": 34825 + }, + { + "epoch": 11.7, + "grad_norm": 1.0536298751831055, + "learning_rate": 6.549145728643217e-06, + "loss": 0.0023, + "step": 34850 + }, + { + "epoch": 11.71, + "grad_norm": 0.24995240569114685, + "learning_rate": 6.546633165829146e-06, + "loss": 0.0015, + "step": 34875 + }, + { + "epoch": 11.72, + "grad_norm": 0.61530601978302, + "learning_rate": 6.544120603015076e-06, + "loss": 0.0022, + "step": 34900 + }, + { + "epoch": 11.73, + "grad_norm": 0.9885349869728088, + "learning_rate": 6.541608040201005e-06, + "loss": 0.0021, + "step": 34925 + }, + { + "epoch": 11.74, + "grad_norm": 0.14273986220359802, + "learning_rate": 6.539095477386935e-06, + "loss": 0.0025, + "step": 34950 + }, + { + "epoch": 11.74, + "grad_norm": 0.6179981827735901, + "learning_rate": 6.536582914572864e-06, + "loss": 0.0025, + "step": 34975 + }, + { + "epoch": 11.75, + "grad_norm": 0.500377357006073, + "learning_rate": 6.5340703517587945e-06, + "loss": 0.0018, + "step": 35000 + }, + { + "epoch": 11.75, + "eval_loss": 0.2530178725719452, + "eval_runtime": 1135.6317, + "eval_samples_per_second": 1.241, + "eval_steps_per_second": 1.241, + "eval_wer": 19.045313040470425, + "step": 35000 + }, + { + "epoch": 11.76, + "grad_norm": 1.029817819595337, + "learning_rate": 6.531557788944725e-06, + "loss": 0.0017, + "step": 35025 + }, + { + "epoch": 11.77, + "grad_norm": 0.7394125461578369, + "learning_rate": 6.529045226130654e-06, + "loss": 0.0018, + "step": 35050 + }, + { + "epoch": 11.78, + "grad_norm": 0.4833681285381317, + "learning_rate": 6.526532663316583e-06, + "loss": 0.0022, + "step": 35075 + }, + { + "epoch": 11.79, + "grad_norm": 0.8174492120742798, + "learning_rate": 6.524020100502513e-06, + "loss": 0.0022, + "step": 35100 + }, + { + "epoch": 11.79, + "grad_norm": 0.5029131174087524, + "learning_rate": 6.521507537688443e-06, + "loss": 0.0017, + "step": 35125 + }, + { + "epoch": 11.8, + "grad_norm": 0.8391981720924377, + "learning_rate": 6.518994974874372e-06, + "loss": 0.0019, + "step": 35150 + }, + { + "epoch": 11.81, + "grad_norm": 1.0692981481552124, + "learning_rate": 6.516482412060302e-06, + "loss": 0.0019, + "step": 35175 + }, + { + "epoch": 11.82, + "grad_norm": 1.5914671421051025, + "learning_rate": 6.5139698492462325e-06, + "loss": 0.0027, + "step": 35200 + }, + { + "epoch": 11.83, + "grad_norm": 0.37589961290359497, + "learning_rate": 6.511457286432161e-06, + "loss": 0.0024, + "step": 35225 + }, + { + "epoch": 11.84, + "grad_norm": 0.4075678288936615, + "learning_rate": 6.508944723618091e-06, + "loss": 0.0027, + "step": 35250 + }, + { + "epoch": 11.85, + "grad_norm": 1.1088883876800537, + "learning_rate": 6.5064321608040205e-06, + "loss": 0.0026, + "step": 35275 + }, + { + "epoch": 11.85, + "grad_norm": 1.5964425802230835, + "learning_rate": 6.503919597989951e-06, + "loss": 0.0021, + "step": 35300 + }, + { + "epoch": 11.86, + "grad_norm": 0.7802677750587463, + "learning_rate": 6.50140703517588e-06, + "loss": 0.002, + "step": 35325 + }, + { + "epoch": 11.87, + "grad_norm": 0.3625680208206177, + "learning_rate": 6.498894472361809e-06, + "loss": 0.0011, + "step": 35350 + }, + { + "epoch": 11.88, + "grad_norm": 0.8808245062828064, + "learning_rate": 6.496381909547739e-06, + "loss": 0.0018, + "step": 35375 + }, + { + "epoch": 11.89, + "grad_norm": 0.5885035991668701, + "learning_rate": 6.493869346733669e-06, + "loss": 0.0019, + "step": 35400 + }, + { + "epoch": 11.9, + "grad_norm": 1.0593116283416748, + "learning_rate": 6.491356783919599e-06, + "loss": 0.0017, + "step": 35425 + }, + { + "epoch": 11.9, + "grad_norm": 0.6890765428543091, + "learning_rate": 6.488844221105528e-06, + "loss": 0.0013, + "step": 35450 + }, + { + "epoch": 11.91, + "grad_norm": 0.749198853969574, + "learning_rate": 6.4863316582914584e-06, + "loss": 0.0019, + "step": 35475 + }, + { + "epoch": 11.92, + "grad_norm": 0.2506113350391388, + "learning_rate": 6.483819095477387e-06, + "loss": 0.0022, + "step": 35500 + }, + { + "epoch": 11.93, + "grad_norm": 0.9690123796463013, + "learning_rate": 6.481306532663317e-06, + "loss": 0.0028, + "step": 35525 + }, + { + "epoch": 11.94, + "grad_norm": 0.812484860420227, + "learning_rate": 6.478793969849246e-06, + "loss": 0.0023, + "step": 35550 + }, + { + "epoch": 11.95, + "grad_norm": 0.9528934359550476, + "learning_rate": 6.476281407035177e-06, + "loss": 0.0022, + "step": 35575 + }, + { + "epoch": 11.95, + "grad_norm": 0.5397972464561462, + "learning_rate": 6.473768844221106e-06, + "loss": 0.0021, + "step": 35600 + }, + { + "epoch": 11.96, + "grad_norm": 0.8131850361824036, + "learning_rate": 6.471256281407036e-06, + "loss": 0.0021, + "step": 35625 + }, + { + "epoch": 11.97, + "grad_norm": 0.2996496260166168, + "learning_rate": 6.468743718592965e-06, + "loss": 0.002, + "step": 35650 + }, + { + "epoch": 11.98, + "grad_norm": 0.6951238512992859, + "learning_rate": 6.466231155778895e-06, + "loss": 0.0019, + "step": 35675 + }, + { + "epoch": 11.99, + "grad_norm": 0.3747531473636627, + "learning_rate": 6.463718592964825e-06, + "loss": 0.0023, + "step": 35700 + }, + { + "epoch": 12.0, + "grad_norm": 0.16589127480983734, + "learning_rate": 6.461206030150754e-06, + "loss": 0.0019, + "step": 35725 + }, + { + "epoch": 12.0, + "grad_norm": 0.21695904433727264, + "learning_rate": 6.458693467336684e-06, + "loss": 0.0017, + "step": 35750 + }, + { + "epoch": 12.01, + "grad_norm": 0.9151232838630676, + "learning_rate": 6.456180904522613e-06, + "loss": 0.0012, + "step": 35775 + }, + { + "epoch": 12.02, + "grad_norm": 0.2827753722667694, + "learning_rate": 6.453668341708543e-06, + "loss": 0.0016, + "step": 35800 + }, + { + "epoch": 12.03, + "grad_norm": 0.8096039295196533, + "learning_rate": 6.451155778894473e-06, + "loss": 0.0016, + "step": 35825 + }, + { + "epoch": 12.04, + "grad_norm": 0.3802209794521332, + "learning_rate": 6.4486432160804025e-06, + "loss": 0.0011, + "step": 35850 + }, + { + "epoch": 12.05, + "grad_norm": 0.1768757402896881, + "learning_rate": 6.446130653266333e-06, + "loss": 0.001, + "step": 35875 + }, + { + "epoch": 12.06, + "grad_norm": 0.49120259284973145, + "learning_rate": 6.443618090452262e-06, + "loss": 0.0013, + "step": 35900 + }, + { + "epoch": 12.06, + "grad_norm": 0.3009275496006012, + "learning_rate": 6.441105527638191e-06, + "loss": 0.0016, + "step": 35925 + }, + { + "epoch": 12.07, + "grad_norm": 0.782677173614502, + "learning_rate": 6.438592964824121e-06, + "loss": 0.0015, + "step": 35950 + }, + { + "epoch": 12.08, + "grad_norm": 1.8881429433822632, + "learning_rate": 6.436080402010051e-06, + "loss": 0.0018, + "step": 35975 + }, + { + "epoch": 12.09, + "grad_norm": 0.07092029601335526, + "learning_rate": 6.43356783919598e-06, + "loss": 0.0009, + "step": 36000 + }, + { + "epoch": 12.09, + "eval_loss": 0.25471925735473633, + "eval_runtime": 1155.166, + "eval_samples_per_second": 1.22, + "eval_steps_per_second": 1.22, + "eval_wer": 18.830854375648563, + "step": 36000 + }, + { + "epoch": 12.1, + "grad_norm": 0.3697056770324707, + "learning_rate": 6.43105527638191e-06, + "loss": 0.001, + "step": 36025 + }, + { + "epoch": 12.11, + "grad_norm": 0.310007780790329, + "learning_rate": 6.4285427135678405e-06, + "loss": 0.0018, + "step": 36050 + }, + { + "epoch": 12.11, + "grad_norm": 0.21309152245521545, + "learning_rate": 6.426030150753769e-06, + "loss": 0.0012, + "step": 36075 + }, + { + "epoch": 12.12, + "grad_norm": 0.49402427673339844, + "learning_rate": 6.423517587939699e-06, + "loss": 0.0017, + "step": 36100 + }, + { + "epoch": 12.13, + "grad_norm": 0.6617681980133057, + "learning_rate": 6.4210050251256285e-06, + "loss": 0.0014, + "step": 36125 + }, + { + "epoch": 12.14, + "grad_norm": 0.6788381934165955, + "learning_rate": 6.418492462311559e-06, + "loss": 0.001, + "step": 36150 + }, + { + "epoch": 12.15, + "grad_norm": 0.34545156359672546, + "learning_rate": 6.415979899497488e-06, + "loss": 0.0014, + "step": 36175 + }, + { + "epoch": 12.16, + "grad_norm": 0.7100117206573486, + "learning_rate": 6.413467336683417e-06, + "loss": 0.0013, + "step": 36200 + }, + { + "epoch": 12.16, + "grad_norm": 0.12973542511463165, + "learning_rate": 6.410954773869347e-06, + "loss": 0.0014, + "step": 36225 + }, + { + "epoch": 12.17, + "grad_norm": 0.22144758701324463, + "learning_rate": 6.408442211055277e-06, + "loss": 0.0013, + "step": 36250 + }, + { + "epoch": 12.18, + "grad_norm": 0.7628818154335022, + "learning_rate": 6.405929648241207e-06, + "loss": 0.0016, + "step": 36275 + }, + { + "epoch": 12.19, + "grad_norm": 0.4911034405231476, + "learning_rate": 6.403417085427136e-06, + "loss": 0.0016, + "step": 36300 + }, + { + "epoch": 12.2, + "grad_norm": 1.023901343345642, + "learning_rate": 6.4009045226130664e-06, + "loss": 0.0013, + "step": 36325 + }, + { + "epoch": 12.21, + "grad_norm": 1.0526654720306396, + "learning_rate": 6.398391959798995e-06, + "loss": 0.0011, + "step": 36350 + }, + { + "epoch": 12.21, + "grad_norm": 0.7969370484352112, + "learning_rate": 6.395879396984925e-06, + "loss": 0.0013, + "step": 36375 + }, + { + "epoch": 12.22, + "grad_norm": 0.2445056140422821, + "learning_rate": 6.393366834170854e-06, + "loss": 0.0011, + "step": 36400 + }, + { + "epoch": 12.23, + "grad_norm": 0.46808141469955444, + "learning_rate": 6.390854271356785e-06, + "loss": 0.0014, + "step": 36425 + }, + { + "epoch": 12.24, + "grad_norm": 0.48099666833877563, + "learning_rate": 6.388341708542715e-06, + "loss": 0.0011, + "step": 36450 + }, + { + "epoch": 12.25, + "grad_norm": 1.6462708711624146, + "learning_rate": 6.385829145728644e-06, + "loss": 0.0016, + "step": 36475 + }, + { + "epoch": 12.26, + "grad_norm": 0.8384543061256409, + "learning_rate": 6.383316582914573e-06, + "loss": 0.0017, + "step": 36500 + }, + { + "epoch": 12.26, + "grad_norm": 0.5767937302589417, + "learning_rate": 6.380804020100503e-06, + "loss": 0.0016, + "step": 36525 + }, + { + "epoch": 12.27, + "grad_norm": 0.6230292320251465, + "learning_rate": 6.378291457286433e-06, + "loss": 0.001, + "step": 36550 + }, + { + "epoch": 12.28, + "grad_norm": 1.345990538597107, + "learning_rate": 6.375778894472362e-06, + "loss": 0.001, + "step": 36575 + }, + { + "epoch": 12.29, + "grad_norm": 1.0651533603668213, + "learning_rate": 6.373266331658292e-06, + "loss": 0.0018, + "step": 36600 + }, + { + "epoch": 12.3, + "grad_norm": 0.6757230758666992, + "learning_rate": 6.370753768844221e-06, + "loss": 0.0015, + "step": 36625 + }, + { + "epoch": 12.31, + "grad_norm": 0.27358171343803406, + "learning_rate": 6.368241206030151e-06, + "loss": 0.0019, + "step": 36650 + }, + { + "epoch": 12.32, + "grad_norm": 0.3637424111366272, + "learning_rate": 6.365728643216081e-06, + "loss": 0.0018, + "step": 36675 + }, + { + "epoch": 12.32, + "grad_norm": 0.7091617584228516, + "learning_rate": 6.3632160804020105e-06, + "loss": 0.0021, + "step": 36700 + }, + { + "epoch": 12.33, + "grad_norm": 1.2485692501068115, + "learning_rate": 6.360703517587941e-06, + "loss": 0.0017, + "step": 36725 + }, + { + "epoch": 12.34, + "grad_norm": 0.3660537898540497, + "learning_rate": 6.35819095477387e-06, + "loss": 0.0019, + "step": 36750 + }, + { + "epoch": 12.35, + "grad_norm": 1.1992560625076294, + "learning_rate": 6.355678391959799e-06, + "loss": 0.0016, + "step": 36775 + }, + { + "epoch": 12.36, + "grad_norm": 0.6606042385101318, + "learning_rate": 6.353165829145729e-06, + "loss": 0.002, + "step": 36800 + }, + { + "epoch": 12.37, + "grad_norm": 0.9240416884422302, + "learning_rate": 6.350653266331659e-06, + "loss": 0.0014, + "step": 36825 + }, + { + "epoch": 12.37, + "grad_norm": 0.9122438430786133, + "learning_rate": 6.348140703517588e-06, + "loss": 0.0019, + "step": 36850 + }, + { + "epoch": 12.38, + "grad_norm": 0.21742796897888184, + "learning_rate": 6.345628140703518e-06, + "loss": 0.0016, + "step": 36875 + }, + { + "epoch": 12.39, + "grad_norm": 0.11225738376379013, + "learning_rate": 6.343216080402011e-06, + "loss": 0.0017, + "step": 36900 + }, + { + "epoch": 12.4, + "grad_norm": 0.2686758041381836, + "learning_rate": 6.340703517587941e-06, + "loss": 0.001, + "step": 36925 + }, + { + "epoch": 12.41, + "grad_norm": 0.36088281869888306, + "learning_rate": 6.338190954773869e-06, + "loss": 0.0013, + "step": 36950 + }, + { + "epoch": 12.42, + "grad_norm": 0.1312825083732605, + "learning_rate": 6.3356783919597995e-06, + "loss": 0.0008, + "step": 36975 + }, + { + "epoch": 12.42, + "grad_norm": 0.44308337569236755, + "learning_rate": 6.333165829145729e-06, + "loss": 0.0012, + "step": 37000 + }, + { + "epoch": 12.42, + "eval_loss": 0.2529711425304413, + "eval_runtime": 1150.7985, + "eval_samples_per_second": 1.224, + "eval_steps_per_second": 1.224, + "eval_wer": 18.7409200968523, + "step": 37000 + }, + { + "epoch": 12.43, + "grad_norm": 0.10189375281333923, + "learning_rate": 6.330653266331659e-06, + "loss": 0.001, + "step": 37025 + }, + { + "epoch": 12.44, + "grad_norm": 0.43479302525520325, + "learning_rate": 6.3281407035175875e-06, + "loss": 0.0009, + "step": 37050 + }, + { + "epoch": 12.45, + "grad_norm": 0.08887296915054321, + "learning_rate": 6.325628140703518e-06, + "loss": 0.001, + "step": 37075 + }, + { + "epoch": 12.46, + "grad_norm": 0.09414894878864288, + "learning_rate": 6.323115577889448e-06, + "loss": 0.0015, + "step": 37100 + }, + { + "epoch": 12.47, + "grad_norm": 0.10403262078762054, + "learning_rate": 6.320603015075377e-06, + "loss": 0.0007, + "step": 37125 + }, + { + "epoch": 12.47, + "grad_norm": 0.6799802184104919, + "learning_rate": 6.318090452261307e-06, + "loss": 0.0014, + "step": 37150 + }, + { + "epoch": 12.48, + "grad_norm": 0.5933218002319336, + "learning_rate": 6.315577889447237e-06, + "loss": 0.0014, + "step": 37175 + }, + { + "epoch": 12.49, + "grad_norm": 0.9599027633666992, + "learning_rate": 6.313065326633167e-06, + "loss": 0.001, + "step": 37200 + }, + { + "epoch": 12.5, + "grad_norm": 0.23961205780506134, + "learning_rate": 6.310552763819095e-06, + "loss": 0.0016, + "step": 37225 + }, + { + "epoch": 12.51, + "grad_norm": 0.4147547483444214, + "learning_rate": 6.3080402010050255e-06, + "loss": 0.0019, + "step": 37250 + }, + { + "epoch": 12.52, + "grad_norm": 0.41044384241104126, + "learning_rate": 6.305527638190956e-06, + "loss": 0.0015, + "step": 37275 + }, + { + "epoch": 12.53, + "grad_norm": 0.21577022969722748, + "learning_rate": 6.303015075376885e-06, + "loss": 0.001, + "step": 37300 + }, + { + "epoch": 12.53, + "grad_norm": 1.1710948944091797, + "learning_rate": 6.300502512562815e-06, + "loss": 0.0015, + "step": 37325 + }, + { + "epoch": 12.54, + "grad_norm": 1.9177687168121338, + "learning_rate": 6.297989949748744e-06, + "loss": 0.0018, + "step": 37350 + }, + { + "epoch": 12.55, + "grad_norm": 0.1851605474948883, + "learning_rate": 6.295477386934674e-06, + "loss": 0.0015, + "step": 37375 + }, + { + "epoch": 12.56, + "grad_norm": 0.3106563687324524, + "learning_rate": 6.292964824120603e-06, + "loss": 0.0016, + "step": 37400 + }, + { + "epoch": 12.57, + "grad_norm": 0.6073814034461975, + "learning_rate": 6.290452261306533e-06, + "loss": 0.0019, + "step": 37425 + }, + { + "epoch": 12.58, + "grad_norm": 1.0178412199020386, + "learning_rate": 6.287939698492463e-06, + "loss": 0.0028, + "step": 37450 + }, + { + "epoch": 12.58, + "grad_norm": 0.3523646891117096, + "learning_rate": 6.285427135678393e-06, + "loss": 0.0013, + "step": 37475 + }, + { + "epoch": 12.59, + "grad_norm": 0.6647047996520996, + "learning_rate": 6.282914572864323e-06, + "loss": 0.0021, + "step": 37500 + }, + { + "epoch": 12.6, + "grad_norm": 0.3417402505874634, + "learning_rate": 6.280402010050251e-06, + "loss": 0.002, + "step": 37525 + }, + { + "epoch": 12.61, + "grad_norm": 0.995600700378418, + "learning_rate": 6.277889447236182e-06, + "loss": 0.0016, + "step": 37550 + }, + { + "epoch": 12.62, + "grad_norm": 0.22635191679000854, + "learning_rate": 6.275376884422111e-06, + "loss": 0.0017, + "step": 37575 + }, + { + "epoch": 12.63, + "grad_norm": 0.5624616146087646, + "learning_rate": 6.272864321608041e-06, + "loss": 0.0021, + "step": 37600 + }, + { + "epoch": 12.63, + "grad_norm": 0.8629630208015442, + "learning_rate": 6.2703517587939696e-06, + "loss": 0.0015, + "step": 37625 + }, + { + "epoch": 12.64, + "grad_norm": 0.31206464767456055, + "learning_rate": 6.2678391959799e-06, + "loss": 0.0027, + "step": 37650 + }, + { + "epoch": 12.65, + "grad_norm": 1.4867948293685913, + "learning_rate": 6.265326633165829e-06, + "loss": 0.0017, + "step": 37675 + }, + { + "epoch": 12.66, + "grad_norm": 1.1645559072494507, + "learning_rate": 6.262814070351759e-06, + "loss": 0.0026, + "step": 37700 + }, + { + "epoch": 12.67, + "grad_norm": 0.765744686126709, + "learning_rate": 6.260301507537689e-06, + "loss": 0.0015, + "step": 37725 + }, + { + "epoch": 12.68, + "grad_norm": 1.6351370811462402, + "learning_rate": 6.257788944723619e-06, + "loss": 0.0016, + "step": 37750 + }, + { + "epoch": 12.68, + "grad_norm": 1.2652709484100342, + "learning_rate": 6.255276381909549e-06, + "loss": 0.0015, + "step": 37775 + }, + { + "epoch": 12.69, + "grad_norm": 0.2899409234523773, + "learning_rate": 6.252763819095477e-06, + "loss": 0.0014, + "step": 37800 + }, + { + "epoch": 12.7, + "grad_norm": 1.149211049079895, + "learning_rate": 6.2502512562814075e-06, + "loss": 0.0014, + "step": 37825 + }, + { + "epoch": 12.71, + "grad_norm": 0.19880491495132446, + "learning_rate": 6.247738693467337e-06, + "loss": 0.0018, + "step": 37850 + }, + { + "epoch": 12.72, + "grad_norm": 0.7025607228279114, + "learning_rate": 6.245226130653267e-06, + "loss": 0.0018, + "step": 37875 + }, + { + "epoch": 12.73, + "grad_norm": 0.5735000967979431, + "learning_rate": 6.242713567839197e-06, + "loss": 0.0016, + "step": 37900 + }, + { + "epoch": 12.74, + "grad_norm": 2.293802261352539, + "learning_rate": 6.240201005025126e-06, + "loss": 0.0018, + "step": 37925 + }, + { + "epoch": 12.74, + "grad_norm": 0.8874403238296509, + "learning_rate": 6.237688442211056e-06, + "loss": 0.0014, + "step": 37950 + }, + { + "epoch": 12.75, + "grad_norm": 0.19391514360904694, + "learning_rate": 6.235175879396985e-06, + "loss": 0.0024, + "step": 37975 + }, + { + "epoch": 12.76, + "grad_norm": 1.2764421701431274, + "learning_rate": 6.232663316582915e-06, + "loss": 0.0019, + "step": 38000 + }, + { + "epoch": 12.76, + "eval_loss": 0.2590956687927246, + "eval_runtime": 1159.9283, + "eval_samples_per_second": 1.215, + "eval_steps_per_second": 1.215, + "eval_wer": 19.031476997578693, + "step": 38000 + }, + { + "epoch": 12.77, + "grad_norm": 1.4413776397705078, + "learning_rate": 6.230150753768845e-06, + "loss": 0.0022, + "step": 38025 + }, + { + "epoch": 12.78, + "grad_norm": 0.3140653073787689, + "learning_rate": 6.227638190954775e-06, + "loss": 0.0018, + "step": 38050 + }, + { + "epoch": 12.79, + "grad_norm": 0.6811867952346802, + "learning_rate": 6.225125628140703e-06, + "loss": 0.0018, + "step": 38075 + }, + { + "epoch": 12.79, + "grad_norm": 1.4947307109832764, + "learning_rate": 6.2226130653266335e-06, + "loss": 0.0016, + "step": 38100 + }, + { + "epoch": 12.8, + "grad_norm": 1.3698049783706665, + "learning_rate": 6.220100502512564e-06, + "loss": 0.0016, + "step": 38125 + }, + { + "epoch": 12.81, + "grad_norm": 0.6601163148880005, + "learning_rate": 6.217587939698493e-06, + "loss": 0.0016, + "step": 38150 + }, + { + "epoch": 12.82, + "grad_norm": 0.29854071140289307, + "learning_rate": 6.215075376884423e-06, + "loss": 0.0017, + "step": 38175 + }, + { + "epoch": 12.83, + "grad_norm": 0.333444744348526, + "learning_rate": 6.212562814070352e-06, + "loss": 0.0017, + "step": 38200 + }, + { + "epoch": 12.84, + "grad_norm": 0.7837780117988586, + "learning_rate": 6.210050251256282e-06, + "loss": 0.0014, + "step": 38225 + }, + { + "epoch": 12.84, + "grad_norm": 0.6200121641159058, + "learning_rate": 6.207537688442211e-06, + "loss": 0.002, + "step": 38250 + }, + { + "epoch": 12.85, + "grad_norm": 0.4954937696456909, + "learning_rate": 6.205025125628141e-06, + "loss": 0.0013, + "step": 38275 + }, + { + "epoch": 12.86, + "grad_norm": 0.08349131047725677, + "learning_rate": 6.202512562814071e-06, + "loss": 0.0016, + "step": 38300 + }, + { + "epoch": 12.87, + "grad_norm": 0.6063538193702698, + "learning_rate": 6.200000000000001e-06, + "loss": 0.0017, + "step": 38325 + }, + { + "epoch": 12.88, + "grad_norm": 0.6830250024795532, + "learning_rate": 6.197487437185931e-06, + "loss": 0.0014, + "step": 38350 + }, + { + "epoch": 12.89, + "grad_norm": 1.0236159563064575, + "learning_rate": 6.1949748743718594e-06, + "loss": 0.0015, + "step": 38375 + }, + { + "epoch": 12.89, + "grad_norm": 0.4944305121898651, + "learning_rate": 6.19246231155779e-06, + "loss": 0.0014, + "step": 38400 + }, + { + "epoch": 12.9, + "grad_norm": 0.7056916952133179, + "learning_rate": 6.189949748743719e-06, + "loss": 0.0018, + "step": 38425 + }, + { + "epoch": 12.91, + "grad_norm": 0.477714866399765, + "learning_rate": 6.187437185929649e-06, + "loss": 0.0013, + "step": 38450 + }, + { + "epoch": 12.92, + "grad_norm": 0.39366820454597473, + "learning_rate": 6.1849246231155776e-06, + "loss": 0.0014, + "step": 38475 + }, + { + "epoch": 12.93, + "grad_norm": 0.1957242339849472, + "learning_rate": 6.182412060301508e-06, + "loss": 0.002, + "step": 38500 + }, + { + "epoch": 12.94, + "grad_norm": 0.8882048726081848, + "learning_rate": 6.179899497487438e-06, + "loss": 0.002, + "step": 38525 + }, + { + "epoch": 12.94, + "grad_norm": 0.16187544167041779, + "learning_rate": 6.177386934673367e-06, + "loss": 0.0015, + "step": 38550 + }, + { + "epoch": 12.95, + "grad_norm": 0.7847512364387512, + "learning_rate": 6.174874371859297e-06, + "loss": 0.0024, + "step": 38575 + }, + { + "epoch": 12.96, + "grad_norm": 0.7238224744796753, + "learning_rate": 6.172361809045227e-06, + "loss": 0.0018, + "step": 38600 + }, + { + "epoch": 12.97, + "grad_norm": 1.2138075828552246, + "learning_rate": 6.169849246231157e-06, + "loss": 0.0018, + "step": 38625 + }, + { + "epoch": 12.98, + "grad_norm": 0.8547137975692749, + "learning_rate": 6.167336683417085e-06, + "loss": 0.002, + "step": 38650 + }, + { + "epoch": 12.99, + "grad_norm": 0.4376111626625061, + "learning_rate": 6.1648241206030155e-06, + "loss": 0.0024, + "step": 38675 + }, + { + "epoch": 13.0, + "grad_norm": 1.2384926080703735, + "learning_rate": 6.162311557788945e-06, + "loss": 0.0021, + "step": 38700 + }, + { + "epoch": 13.0, + "grad_norm": 0.14090225100517273, + "learning_rate": 6.159798994974875e-06, + "loss": 0.0012, + "step": 38725 + }, + { + "epoch": 13.01, + "grad_norm": 0.6098170280456543, + "learning_rate": 6.157286432160805e-06, + "loss": 0.001, + "step": 38750 + }, + { + "epoch": 13.02, + "grad_norm": 0.4469490647315979, + "learning_rate": 6.154773869346734e-06, + "loss": 0.0008, + "step": 38775 + }, + { + "epoch": 13.03, + "grad_norm": 1.09054696559906, + "learning_rate": 6.152261306532664e-06, + "loss": 0.0007, + "step": 38800 + }, + { + "epoch": 13.04, + "grad_norm": 0.8087511658668518, + "learning_rate": 6.149748743718593e-06, + "loss": 0.001, + "step": 38825 + }, + { + "epoch": 13.05, + "grad_norm": 0.05882269889116287, + "learning_rate": 6.147236180904523e-06, + "loss": 0.001, + "step": 38850 + }, + { + "epoch": 13.05, + "grad_norm": 0.5103879570960999, + "learning_rate": 6.144723618090453e-06, + "loss": 0.0011, + "step": 38875 + }, + { + "epoch": 13.06, + "grad_norm": 2.09248685836792, + "learning_rate": 6.142211055276383e-06, + "loss": 0.0013, + "step": 38900 + }, + { + "epoch": 13.07, + "grad_norm": 1.219306230545044, + "learning_rate": 6.139698492462311e-06, + "loss": 0.0019, + "step": 38925 + }, + { + "epoch": 13.08, + "grad_norm": 1.2841269969940186, + "learning_rate": 6.1371859296482415e-06, + "loss": 0.0012, + "step": 38950 + }, + { + "epoch": 13.09, + "grad_norm": 0.058904197067022324, + "learning_rate": 6.134673366834172e-06, + "loss": 0.0009, + "step": 38975 + }, + { + "epoch": 13.1, + "grad_norm": 0.1482638269662857, + "learning_rate": 6.132160804020101e-06, + "loss": 0.001, + "step": 39000 + }, + { + "epoch": 13.1, + "eval_loss": 0.2508487403392792, + "eval_runtime": 1156.9541, + "eval_samples_per_second": 1.218, + "eval_steps_per_second": 1.218, + "eval_wer": 19.093739190591492, + "step": 39000 + }, + { + "epoch": 13.1, + "grad_norm": 1.083548903465271, + "learning_rate": 6.129648241206031e-06, + "loss": 0.0009, + "step": 39025 + }, + { + "epoch": 13.11, + "grad_norm": 0.22930461168289185, + "learning_rate": 6.12713567839196e-06, + "loss": 0.0017, + "step": 39050 + }, + { + "epoch": 13.12, + "grad_norm": 0.9381701350212097, + "learning_rate": 6.12462311557789e-06, + "loss": 0.0015, + "step": 39075 + }, + { + "epoch": 13.13, + "grad_norm": 0.4138481318950653, + "learning_rate": 6.122110552763819e-06, + "loss": 0.0014, + "step": 39100 + }, + { + "epoch": 13.14, + "grad_norm": 0.08874215930700302, + "learning_rate": 6.119597989949749e-06, + "loss": 0.0011, + "step": 39125 + }, + { + "epoch": 13.15, + "grad_norm": 0.65035080909729, + "learning_rate": 6.1170854271356795e-06, + "loss": 0.0012, + "step": 39150 + }, + { + "epoch": 13.15, + "grad_norm": 1.1208595037460327, + "learning_rate": 6.114572864321609e-06, + "loss": 0.0019, + "step": 39175 + }, + { + "epoch": 13.16, + "grad_norm": 1.0692840814590454, + "learning_rate": 6.112060301507539e-06, + "loss": 0.0017, + "step": 39200 + }, + { + "epoch": 13.17, + "grad_norm": 2.0083467960357666, + "learning_rate": 6.1095477386934674e-06, + "loss": 0.0012, + "step": 39225 + }, + { + "epoch": 13.18, + "grad_norm": 0.0621347650885582, + "learning_rate": 6.107035175879398e-06, + "loss": 0.0014, + "step": 39250 + }, + { + "epoch": 13.19, + "grad_norm": 0.621574878692627, + "learning_rate": 6.104522613065327e-06, + "loss": 0.0015, + "step": 39275 + }, + { + "epoch": 13.2, + "grad_norm": 0.7055924534797668, + "learning_rate": 6.102010050251257e-06, + "loss": 0.0008, + "step": 39300 + }, + { + "epoch": 13.21, + "grad_norm": 0.7267388701438904, + "learning_rate": 6.0994974874371856e-06, + "loss": 0.0017, + "step": 39325 + }, + { + "epoch": 13.21, + "grad_norm": 1.2473030090332031, + "learning_rate": 6.096984924623116e-06, + "loss": 0.0016, + "step": 39350 + }, + { + "epoch": 13.22, + "grad_norm": 0.9252501726150513, + "learning_rate": 6.094472361809046e-06, + "loss": 0.0014, + "step": 39375 + }, + { + "epoch": 13.23, + "grad_norm": 1.1190935373306274, + "learning_rate": 6.091959798994975e-06, + "loss": 0.0013, + "step": 39400 + }, + { + "epoch": 13.24, + "grad_norm": 1.1021645069122314, + "learning_rate": 6.089447236180905e-06, + "loss": 0.0013, + "step": 39425 + }, + { + "epoch": 13.25, + "grad_norm": 0.38343313336372375, + "learning_rate": 6.086934673366835e-06, + "loss": 0.0012, + "step": 39450 + }, + { + "epoch": 13.26, + "grad_norm": 0.16145436465740204, + "learning_rate": 6.084422110552765e-06, + "loss": 0.0009, + "step": 39475 + }, + { + "epoch": 13.26, + "grad_norm": 0.2185475081205368, + "learning_rate": 6.081909547738693e-06, + "loss": 0.0012, + "step": 39500 + }, + { + "epoch": 13.27, + "grad_norm": 0.12538164854049683, + "learning_rate": 6.0793969849246235e-06, + "loss": 0.0012, + "step": 39525 + }, + { + "epoch": 13.28, + "grad_norm": 0.4014580249786377, + "learning_rate": 6.076884422110553e-06, + "loss": 0.0012, + "step": 39550 + }, + { + "epoch": 13.29, + "grad_norm": 0.7935028076171875, + "learning_rate": 6.074371859296483e-06, + "loss": 0.0018, + "step": 39575 + }, + { + "epoch": 13.3, + "grad_norm": 0.6906373500823975, + "learning_rate": 6.071859296482413e-06, + "loss": 0.0016, + "step": 39600 + }, + { + "epoch": 13.31, + "grad_norm": 0.8890399932861328, + "learning_rate": 6.069346733668342e-06, + "loss": 0.0014, + "step": 39625 + }, + { + "epoch": 13.31, + "grad_norm": 0.49261996150016785, + "learning_rate": 6.066834170854272e-06, + "loss": 0.0015, + "step": 39650 + }, + { + "epoch": 13.32, + "grad_norm": 0.5787969827651978, + "learning_rate": 6.064321608040201e-06, + "loss": 0.001, + "step": 39675 + }, + { + "epoch": 13.33, + "grad_norm": 1.0418930053710938, + "learning_rate": 6.061809045226131e-06, + "loss": 0.001, + "step": 39700 + }, + { + "epoch": 13.34, + "grad_norm": 0.5551608800888062, + "learning_rate": 6.059296482412061e-06, + "loss": 0.0011, + "step": 39725 + }, + { + "epoch": 13.35, + "grad_norm": 0.6483166217803955, + "learning_rate": 6.056783919597991e-06, + "loss": 0.0016, + "step": 39750 + }, + { + "epoch": 13.36, + "grad_norm": 1.0185385942459106, + "learning_rate": 6.054271356783921e-06, + "loss": 0.0018, + "step": 39775 + }, + { + "epoch": 13.36, + "grad_norm": 0.591634213924408, + "learning_rate": 6.0517587939698495e-06, + "loss": 0.0017, + "step": 39800 + }, + { + "epoch": 13.37, + "grad_norm": 1.8336039781570435, + "learning_rate": 6.04924623115578e-06, + "loss": 0.0016, + "step": 39825 + }, + { + "epoch": 13.38, + "grad_norm": 0.7766602635383606, + "learning_rate": 6.046733668341709e-06, + "loss": 0.0015, + "step": 39850 + }, + { + "epoch": 13.39, + "grad_norm": 0.9195897579193115, + "learning_rate": 6.044221105527639e-06, + "loss": 0.0015, + "step": 39875 + }, + { + "epoch": 13.4, + "grad_norm": 0.3276890516281128, + "learning_rate": 6.041708542713568e-06, + "loss": 0.0012, + "step": 39900 + }, + { + "epoch": 13.41, + "grad_norm": 1.4782788753509521, + "learning_rate": 6.039195979899498e-06, + "loss": 0.0019, + "step": 39925 + }, + { + "epoch": 13.42, + "grad_norm": 1.3895055055618286, + "learning_rate": 6.036683417085427e-06, + "loss": 0.0012, + "step": 39950 + }, + { + "epoch": 13.42, + "grad_norm": 0.9377069473266602, + "learning_rate": 6.034170854271357e-06, + "loss": 0.0013, + "step": 39975 + }, + { + "epoch": 13.43, + "grad_norm": 0.42760491371154785, + "learning_rate": 6.0316582914572875e-06, + "loss": 0.0012, + "step": 40000 + }, + { + "epoch": 13.43, + "eval_loss": 0.2572503685951233, + "eval_runtime": 1161.8589, + "eval_samples_per_second": 1.213, + "eval_steps_per_second": 1.213, + "eval_wer": 19.107575233483225, + "step": 40000 + }, + { + "epoch": 13.44, + "grad_norm": 0.5923931002616882, + "learning_rate": 6.029145728643217e-06, + "loss": 0.0018, + "step": 40025 + }, + { + "epoch": 13.45, + "grad_norm": 1.0503114461898804, + "learning_rate": 6.026633165829147e-06, + "loss": 0.0016, + "step": 40050 + }, + { + "epoch": 13.46, + "grad_norm": 0.46090346574783325, + "learning_rate": 6.0241206030150754e-06, + "loss": 0.0016, + "step": 40075 + }, + { + "epoch": 13.47, + "grad_norm": 0.4889087677001953, + "learning_rate": 6.021608040201006e-06, + "loss": 0.0012, + "step": 40100 + }, + { + "epoch": 13.47, + "grad_norm": 0.3197808265686035, + "learning_rate": 6.019095477386935e-06, + "loss": 0.0014, + "step": 40125 + }, + { + "epoch": 13.48, + "grad_norm": 0.1967504471540451, + "learning_rate": 6.016582914572865e-06, + "loss": 0.0014, + "step": 40150 + }, + { + "epoch": 13.49, + "grad_norm": 0.7475374937057495, + "learning_rate": 6.0140703517587936e-06, + "loss": 0.0011, + "step": 40175 + }, + { + "epoch": 13.5, + "grad_norm": 0.18247902393341064, + "learning_rate": 6.011557788944724e-06, + "loss": 0.0016, + "step": 40200 + }, + { + "epoch": 13.51, + "grad_norm": 0.40298062562942505, + "learning_rate": 6.009045226130654e-06, + "loss": 0.0015, + "step": 40225 + }, + { + "epoch": 13.52, + "grad_norm": 0.9599213004112244, + "learning_rate": 6.006532663316583e-06, + "loss": 0.002, + "step": 40250 + }, + { + "epoch": 13.52, + "grad_norm": 0.43046364188194275, + "learning_rate": 6.004020100502513e-06, + "loss": 0.0017, + "step": 40275 + }, + { + "epoch": 13.53, + "grad_norm": 0.11050378531217575, + "learning_rate": 6.001507537688443e-06, + "loss": 0.0022, + "step": 40300 + }, + { + "epoch": 13.54, + "grad_norm": 1.1679822206497192, + "learning_rate": 5.998994974874373e-06, + "loss": 0.0011, + "step": 40325 + }, + { + "epoch": 13.55, + "grad_norm": 0.7015272378921509, + "learning_rate": 5.996482412060301e-06, + "loss": 0.0016, + "step": 40350 + }, + { + "epoch": 13.56, + "grad_norm": 0.7837432622909546, + "learning_rate": 5.9939698492462315e-06, + "loss": 0.0015, + "step": 40375 + }, + { + "epoch": 13.57, + "grad_norm": 0.18917316198349, + "learning_rate": 5.991457286432162e-06, + "loss": 0.0014, + "step": 40400 + }, + { + "epoch": 13.57, + "grad_norm": 0.19746346771717072, + "learning_rate": 5.988944723618091e-06, + "loss": 0.0015, + "step": 40425 + }, + { + "epoch": 13.58, + "grad_norm": 0.5986410975456238, + "learning_rate": 5.986432160804021e-06, + "loss": 0.002, + "step": 40450 + }, + { + "epoch": 13.59, + "grad_norm": 0.6012015342712402, + "learning_rate": 5.98391959798995e-06, + "loss": 0.0019, + "step": 40475 + }, + { + "epoch": 13.6, + "grad_norm": 0.5986531376838684, + "learning_rate": 5.98140703517588e-06, + "loss": 0.0021, + "step": 40500 + }, + { + "epoch": 13.61, + "grad_norm": 1.1143161058425903, + "learning_rate": 5.978894472361809e-06, + "loss": 0.0014, + "step": 40525 + }, + { + "epoch": 13.62, + "grad_norm": 1.4039273262023926, + "learning_rate": 5.976381909547739e-06, + "loss": 0.0018, + "step": 40550 + }, + { + "epoch": 13.62, + "grad_norm": 0.8931780457496643, + "learning_rate": 5.973869346733669e-06, + "loss": 0.0013, + "step": 40575 + }, + { + "epoch": 13.63, + "grad_norm": 0.3512198030948639, + "learning_rate": 5.971356783919599e-06, + "loss": 0.0016, + "step": 40600 + }, + { + "epoch": 13.64, + "grad_norm": 0.39568156003952026, + "learning_rate": 5.968844221105529e-06, + "loss": 0.0017, + "step": 40625 + }, + { + "epoch": 13.65, + "grad_norm": 1.064471960067749, + "learning_rate": 5.9663316582914575e-06, + "loss": 0.0014, + "step": 40650 + }, + { + "epoch": 13.66, + "grad_norm": 0.2831660211086273, + "learning_rate": 5.963819095477388e-06, + "loss": 0.0012, + "step": 40675 + }, + { + "epoch": 13.67, + "grad_norm": 0.17729076743125916, + "learning_rate": 5.961306532663317e-06, + "loss": 0.0015, + "step": 40700 + }, + { + "epoch": 13.68, + "grad_norm": 0.22697359323501587, + "learning_rate": 5.958793969849247e-06, + "loss": 0.0016, + "step": 40725 + }, + { + "epoch": 13.68, + "grad_norm": 1.0629873275756836, + "learning_rate": 5.956281407035176e-06, + "loss": 0.0014, + "step": 40750 + }, + { + "epoch": 13.69, + "grad_norm": 0.32973712682724, + "learning_rate": 5.953768844221106e-06, + "loss": 0.0014, + "step": 40775 + }, + { + "epoch": 13.7, + "grad_norm": 0.042080968618392944, + "learning_rate": 5.951256281407035e-06, + "loss": 0.0015, + "step": 40800 + }, + { + "epoch": 13.71, + "grad_norm": 0.7598047852516174, + "learning_rate": 5.948743718592965e-06, + "loss": 0.0018, + "step": 40825 + }, + { + "epoch": 13.72, + "grad_norm": 1.012855887413025, + "learning_rate": 5.9462311557788955e-06, + "loss": 0.0017, + "step": 40850 + }, + { + "epoch": 13.73, + "grad_norm": 0.9808406233787537, + "learning_rate": 5.943718592964825e-06, + "loss": 0.0014, + "step": 40875 + }, + { + "epoch": 13.73, + "grad_norm": 0.1286841630935669, + "learning_rate": 5.941206030150755e-06, + "loss": 0.0009, + "step": 40900 + }, + { + "epoch": 13.74, + "grad_norm": 0.24371092021465302, + "learning_rate": 5.9386934673366834e-06, + "loss": 0.0014, + "step": 40925 + }, + { + "epoch": 13.75, + "grad_norm": 0.24188636243343353, + "learning_rate": 5.936180904522614e-06, + "loss": 0.001, + "step": 40950 + }, + { + "epoch": 13.76, + "grad_norm": 0.7869747281074524, + "learning_rate": 5.933668341708543e-06, + "loss": 0.0018, + "step": 40975 + }, + { + "epoch": 13.77, + "grad_norm": 0.2793295979499817, + "learning_rate": 5.931155778894473e-06, + "loss": 0.0016, + "step": 41000 + }, + { + "epoch": 13.77, + "eval_loss": 0.2596408724784851, + "eval_runtime": 1166.8493, + "eval_samples_per_second": 1.208, + "eval_steps_per_second": 1.208, + "eval_wer": 19.280525769629886, + "step": 41000 + }, + { + "epoch": 13.78, + "grad_norm": 0.21328850090503693, + "learning_rate": 5.9286432160804016e-06, + "loss": 0.0013, + "step": 41025 + }, + { + "epoch": 13.78, + "grad_norm": 0.5808143615722656, + "learning_rate": 5.926130653266332e-06, + "loss": 0.0017, + "step": 41050 + }, + { + "epoch": 13.79, + "grad_norm": 0.44151127338409424, + "learning_rate": 5.923618090452262e-06, + "loss": 0.0019, + "step": 41075 + }, + { + "epoch": 13.8, + "grad_norm": 0.5460540652275085, + "learning_rate": 5.921105527638191e-06, + "loss": 0.0012, + "step": 41100 + }, + { + "epoch": 13.81, + "grad_norm": 0.7787694334983826, + "learning_rate": 5.918592964824121e-06, + "loss": 0.0014, + "step": 41125 + }, + { + "epoch": 13.82, + "grad_norm": 1.2834769487380981, + "learning_rate": 5.916080402010051e-06, + "loss": 0.0017, + "step": 41150 + }, + { + "epoch": 13.83, + "grad_norm": 0.8843485116958618, + "learning_rate": 5.913567839195981e-06, + "loss": 0.0015, + "step": 41175 + }, + { + "epoch": 13.83, + "grad_norm": 0.28491199016571045, + "learning_rate": 5.911055276381909e-06, + "loss": 0.0015, + "step": 41200 + }, + { + "epoch": 13.84, + "grad_norm": 1.7666897773742676, + "learning_rate": 5.9085427135678395e-06, + "loss": 0.0019, + "step": 41225 + }, + { + "epoch": 13.85, + "grad_norm": 0.40713760256767273, + "learning_rate": 5.90603015075377e-06, + "loss": 0.0017, + "step": 41250 + }, + { + "epoch": 13.86, + "grad_norm": 1.3919709920883179, + "learning_rate": 5.903517587939699e-06, + "loss": 0.0017, + "step": 41275 + }, + { + "epoch": 13.87, + "grad_norm": 1.508120059967041, + "learning_rate": 5.901005025125629e-06, + "loss": 0.0018, + "step": 41300 + }, + { + "epoch": 13.88, + "grad_norm": 0.4422523081302643, + "learning_rate": 5.898492462311558e-06, + "loss": 0.0018, + "step": 41325 + }, + { + "epoch": 13.89, + "grad_norm": 1.4669214487075806, + "learning_rate": 5.895979899497488e-06, + "loss": 0.0023, + "step": 41350 + }, + { + "epoch": 13.89, + "grad_norm": 0.5543438196182251, + "learning_rate": 5.893467336683417e-06, + "loss": 0.0018, + "step": 41375 + }, + { + "epoch": 13.9, + "grad_norm": 0.4752007722854614, + "learning_rate": 5.890954773869347e-06, + "loss": 0.0016, + "step": 41400 + }, + { + "epoch": 13.91, + "grad_norm": 0.29779595136642456, + "learning_rate": 5.888442211055277e-06, + "loss": 0.0016, + "step": 41425 + }, + { + "epoch": 13.92, + "grad_norm": 1.4396846294403076, + "learning_rate": 5.885929648241207e-06, + "loss": 0.0012, + "step": 41450 + }, + { + "epoch": 13.93, + "grad_norm": 0.2424653321504593, + "learning_rate": 5.883417085427137e-06, + "loss": 0.0013, + "step": 41475 + }, + { + "epoch": 13.94, + "grad_norm": 0.9351645708084106, + "learning_rate": 5.881005025125629e-06, + "loss": 0.0017, + "step": 41500 + }, + { + "epoch": 13.94, + "grad_norm": 0.31049320101737976, + "learning_rate": 5.878492462311558e-06, + "loss": 0.0014, + "step": 41525 + }, + { + "epoch": 13.95, + "grad_norm": 0.8638837337493896, + "learning_rate": 5.875979899497488e-06, + "loss": 0.0015, + "step": 41550 + }, + { + "epoch": 13.96, + "grad_norm": 0.6051950454711914, + "learning_rate": 5.873467336683417e-06, + "loss": 0.002, + "step": 41575 + }, + { + "epoch": 13.97, + "grad_norm": 0.42730197310447693, + "learning_rate": 5.8709547738693475e-06, + "loss": 0.0017, + "step": 41600 + }, + { + "epoch": 13.98, + "grad_norm": 1.2609761953353882, + "learning_rate": 5.868442211055276e-06, + "loss": 0.002, + "step": 41625 + }, + { + "epoch": 13.99, + "grad_norm": 0.64671790599823, + "learning_rate": 5.865929648241206e-06, + "loss": 0.0014, + "step": 41650 + }, + { + "epoch": 13.99, + "grad_norm": 0.2689552903175354, + "learning_rate": 5.863417085427136e-06, + "loss": 0.001, + "step": 41675 + }, + { + "epoch": 14.0, + "grad_norm": 0.6900052428245544, + "learning_rate": 5.860904522613066e-06, + "loss": 0.0013, + "step": 41700 + }, + { + "epoch": 14.01, + "grad_norm": 0.5760495662689209, + "learning_rate": 5.858391959798996e-06, + "loss": 0.0011, + "step": 41725 + }, + { + "epoch": 14.02, + "grad_norm": 0.764080822467804, + "learning_rate": 5.855879396984925e-06, + "loss": 0.0014, + "step": 41750 + }, + { + "epoch": 14.03, + "grad_norm": 0.13676817715168, + "learning_rate": 5.853366834170855e-06, + "loss": 0.0009, + "step": 41775 + }, + { + "epoch": 14.04, + "grad_norm": 0.27136245369911194, + "learning_rate": 5.850854271356784e-06, + "loss": 0.0009, + "step": 41800 + }, + { + "epoch": 14.04, + "grad_norm": 0.31248903274536133, + "learning_rate": 5.848341708542714e-06, + "loss": 0.0008, + "step": 41825 + }, + { + "epoch": 14.05, + "grad_norm": 0.056231625378131866, + "learning_rate": 5.845829145728644e-06, + "loss": 0.0008, + "step": 41850 + }, + { + "epoch": 14.06, + "grad_norm": 0.14273740351200104, + "learning_rate": 5.8433165829145735e-06, + "loss": 0.0012, + "step": 41875 + }, + { + "epoch": 14.07, + "grad_norm": 0.24090011417865753, + "learning_rate": 5.840804020100504e-06, + "loss": 0.001, + "step": 41900 + }, + { + "epoch": 14.08, + "grad_norm": 0.33006253838539124, + "learning_rate": 5.838291457286432e-06, + "loss": 0.0012, + "step": 41925 + }, + { + "epoch": 14.09, + "grad_norm": 0.2741028666496277, + "learning_rate": 5.835778894472362e-06, + "loss": 0.0004, + "step": 41950 + }, + { + "epoch": 14.1, + "grad_norm": 0.8874049782752991, + "learning_rate": 5.833266331658292e-06, + "loss": 0.0008, + "step": 41975 + }, + { + "epoch": 14.1, + "grad_norm": 0.2518490254878998, + "learning_rate": 5.830753768844222e-06, + "loss": 0.0012, + "step": 42000 + }, + { + "epoch": 14.1, + "eval_loss": 0.2560809254646301, + "eval_runtime": 1161.432, + "eval_samples_per_second": 1.213, + "eval_steps_per_second": 1.213, + "eval_wer": 19.13524731926669, + "step": 42000 + }, + { + "epoch": 14.11, + "grad_norm": 0.03873791545629501, + "learning_rate": 5.828241206030151e-06, + "loss": 0.0012, + "step": 42025 + }, + { + "epoch": 14.12, + "grad_norm": 0.907171368598938, + "learning_rate": 5.825728643216081e-06, + "loss": 0.001, + "step": 42050 + }, + { + "epoch": 14.13, + "grad_norm": 0.7698354125022888, + "learning_rate": 5.823216080402011e-06, + "loss": 0.0008, + "step": 42075 + }, + { + "epoch": 14.14, + "grad_norm": 0.10847024619579315, + "learning_rate": 5.82070351758794e-06, + "loss": 0.0008, + "step": 42100 + }, + { + "epoch": 14.15, + "grad_norm": 0.2588258683681488, + "learning_rate": 5.81819095477387e-06, + "loss": 0.0009, + "step": 42125 + }, + { + "epoch": 14.15, + "grad_norm": 0.37868186831474304, + "learning_rate": 5.8156783919597994e-06, + "loss": 0.0009, + "step": 42150 + }, + { + "epoch": 14.16, + "grad_norm": 0.3429957628250122, + "learning_rate": 5.81316582914573e-06, + "loss": 0.001, + "step": 42175 + }, + { + "epoch": 14.17, + "grad_norm": 0.08993576467037201, + "learning_rate": 5.810653266331658e-06, + "loss": 0.0009, + "step": 42200 + }, + { + "epoch": 14.18, + "grad_norm": 0.6597422361373901, + "learning_rate": 5.808140703517588e-06, + "loss": 0.001, + "step": 42225 + }, + { + "epoch": 14.19, + "grad_norm": 0.18693377077579498, + "learning_rate": 5.8056281407035176e-06, + "loss": 0.0008, + "step": 42250 + }, + { + "epoch": 14.2, + "grad_norm": 0.25261393189430237, + "learning_rate": 5.803115577889448e-06, + "loss": 0.0014, + "step": 42275 + }, + { + "epoch": 14.2, + "grad_norm": 0.6918069124221802, + "learning_rate": 5.800603015075378e-06, + "loss": 0.0008, + "step": 42300 + }, + { + "epoch": 14.21, + "grad_norm": 0.31569036841392517, + "learning_rate": 5.798090452261307e-06, + "loss": 0.001, + "step": 42325 + }, + { + "epoch": 14.22, + "grad_norm": 0.40848106145858765, + "learning_rate": 5.795577889447237e-06, + "loss": 0.0007, + "step": 42350 + }, + { + "epoch": 14.23, + "grad_norm": 0.02483958937227726, + "learning_rate": 5.793065326633166e-06, + "loss": 0.0011, + "step": 42375 + }, + { + "epoch": 14.24, + "grad_norm": 0.81584233045578, + "learning_rate": 5.790552763819096e-06, + "loss": 0.0013, + "step": 42400 + }, + { + "epoch": 14.25, + "grad_norm": 0.11518280953168869, + "learning_rate": 5.788040201005025e-06, + "loss": 0.0009, + "step": 42425 + }, + { + "epoch": 14.25, + "grad_norm": 0.1501922607421875, + "learning_rate": 5.7855276381909555e-06, + "loss": 0.0007, + "step": 42450 + }, + { + "epoch": 14.26, + "grad_norm": 0.5037822127342224, + "learning_rate": 5.783015075376886e-06, + "loss": 0.0007, + "step": 42475 + }, + { + "epoch": 14.27, + "grad_norm": 0.34381383657455444, + "learning_rate": 5.780502512562814e-06, + "loss": 0.0008, + "step": 42500 + }, + { + "epoch": 14.28, + "grad_norm": 0.3097711205482483, + "learning_rate": 5.777989949748744e-06, + "loss": 0.001, + "step": 42525 + }, + { + "epoch": 14.29, + "grad_norm": 0.6727478504180908, + "learning_rate": 5.775477386934674e-06, + "loss": 0.0007, + "step": 42550 + }, + { + "epoch": 14.3, + "grad_norm": 0.12777842581272125, + "learning_rate": 5.772964824120604e-06, + "loss": 0.0011, + "step": 42575 + }, + { + "epoch": 14.3, + "grad_norm": 0.263676255941391, + "learning_rate": 5.770452261306533e-06, + "loss": 0.0015, + "step": 42600 + }, + { + "epoch": 14.31, + "grad_norm": 0.2247818261384964, + "learning_rate": 5.767939698492463e-06, + "loss": 0.0011, + "step": 42625 + }, + { + "epoch": 14.32, + "grad_norm": 0.19879934191703796, + "learning_rate": 5.765427135678392e-06, + "loss": 0.0012, + "step": 42650 + }, + { + "epoch": 14.33, + "grad_norm": 1.2253483533859253, + "learning_rate": 5.762914572864322e-06, + "loss": 0.0014, + "step": 42675 + }, + { + "epoch": 14.34, + "grad_norm": 0.4955553710460663, + "learning_rate": 5.760402010050252e-06, + "loss": 0.0011, + "step": 42700 + }, + { + "epoch": 14.35, + "grad_norm": 0.6222733855247498, + "learning_rate": 5.7578894472361815e-06, + "loss": 0.0013, + "step": 42725 + }, + { + "epoch": 14.36, + "grad_norm": 0.7207738757133484, + "learning_rate": 5.755376884422112e-06, + "loss": 0.0012, + "step": 42750 + }, + { + "epoch": 14.36, + "grad_norm": 0.6752997040748596, + "learning_rate": 5.75286432160804e-06, + "loss": 0.001, + "step": 42775 + }, + { + "epoch": 14.37, + "grad_norm": 0.34977149963378906, + "learning_rate": 5.75035175879397e-06, + "loss": 0.0014, + "step": 42800 + }, + { + "epoch": 14.38, + "grad_norm": 1.5013421773910522, + "learning_rate": 5.7478391959799e-06, + "loss": 0.0021, + "step": 42825 + }, + { + "epoch": 14.39, + "grad_norm": 0.7281990647315979, + "learning_rate": 5.74532663316583e-06, + "loss": 0.0013, + "step": 42850 + }, + { + "epoch": 14.4, + "grad_norm": 1.4403456449508667, + "learning_rate": 5.742814070351759e-06, + "loss": 0.0013, + "step": 42875 + }, + { + "epoch": 14.41, + "grad_norm": 1.3536635637283325, + "learning_rate": 5.740301507537689e-06, + "loss": 0.0014, + "step": 42900 + }, + { + "epoch": 14.41, + "grad_norm": 1.002593755722046, + "learning_rate": 5.737788944723619e-06, + "loss": 0.0011, + "step": 42925 + }, + { + "epoch": 14.42, + "grad_norm": 0.8772359490394592, + "learning_rate": 5.735276381909548e-06, + "loss": 0.0011, + "step": 42950 + }, + { + "epoch": 14.43, + "grad_norm": 1.1332440376281738, + "learning_rate": 5.732763819095478e-06, + "loss": 0.0014, + "step": 42975 + }, + { + "epoch": 14.44, + "grad_norm": 0.7701210975646973, + "learning_rate": 5.7302512562814074e-06, + "loss": 0.0012, + "step": 43000 + }, + { + "epoch": 14.44, + "eval_loss": 0.2592407763004303, + "eval_runtime": 1158.3408, + "eval_samples_per_second": 1.216, + "eval_steps_per_second": 1.216, + "eval_wer": 18.913870632998965, + "step": 43000 + }, + { + "epoch": 14.45, + "grad_norm": 0.9310274124145508, + "learning_rate": 5.727738693467338e-06, + "loss": 0.0019, + "step": 43025 + }, + { + "epoch": 14.46, + "grad_norm": 0.3086060881614685, + "learning_rate": 5.725226130653266e-06, + "loss": 0.0011, + "step": 43050 + }, + { + "epoch": 14.46, + "grad_norm": 0.4486881196498871, + "learning_rate": 5.722713567839196e-06, + "loss": 0.0015, + "step": 43075 + }, + { + "epoch": 14.47, + "grad_norm": 0.2454104870557785, + "learning_rate": 5.7202010050251256e-06, + "loss": 0.0013, + "step": 43100 + }, + { + "epoch": 14.48, + "grad_norm": 0.5272873044013977, + "learning_rate": 5.717688442211056e-06, + "loss": 0.0013, + "step": 43125 + }, + { + "epoch": 14.49, + "grad_norm": 0.04413667693734169, + "learning_rate": 5.715175879396986e-06, + "loss": 0.0015, + "step": 43150 + }, + { + "epoch": 14.5, + "grad_norm": 0.14794039726257324, + "learning_rate": 5.712663316582915e-06, + "loss": 0.002, + "step": 43175 + }, + { + "epoch": 14.51, + "grad_norm": 0.677588701248169, + "learning_rate": 5.7101507537688446e-06, + "loss": 0.0027, + "step": 43200 + }, + { + "epoch": 14.51, + "grad_norm": 0.7181364297866821, + "learning_rate": 5.707638190954774e-06, + "loss": 0.0015, + "step": 43225 + }, + { + "epoch": 14.52, + "grad_norm": 0.998053252696991, + "learning_rate": 5.705125628140704e-06, + "loss": 0.0012, + "step": 43250 + }, + { + "epoch": 14.53, + "grad_norm": 0.7012277841567993, + "learning_rate": 5.702613065326633e-06, + "loss": 0.0016, + "step": 43275 + }, + { + "epoch": 14.54, + "grad_norm": 0.990146815776825, + "learning_rate": 5.7001005025125635e-06, + "loss": 0.0015, + "step": 43300 + }, + { + "epoch": 14.55, + "grad_norm": 1.2369849681854248, + "learning_rate": 5.697587939698494e-06, + "loss": 0.0016, + "step": 43325 + }, + { + "epoch": 14.56, + "grad_norm": 0.4904821813106537, + "learning_rate": 5.695075376884422e-06, + "loss": 0.0019, + "step": 43350 + }, + { + "epoch": 14.57, + "grad_norm": 0.4473670721054077, + "learning_rate": 5.692562814070352e-06, + "loss": 0.0014, + "step": 43375 + }, + { + "epoch": 14.57, + "grad_norm": 0.5323807001113892, + "learning_rate": 5.690050251256282e-06, + "loss": 0.0013, + "step": 43400 + }, + { + "epoch": 14.58, + "grad_norm": 1.0064618587493896, + "learning_rate": 5.687537688442212e-06, + "loss": 0.0015, + "step": 43425 + }, + { + "epoch": 14.59, + "grad_norm": 0.6155166625976562, + "learning_rate": 5.685025125628141e-06, + "loss": 0.0016, + "step": 43450 + }, + { + "epoch": 14.6, + "grad_norm": 0.7152019143104553, + "learning_rate": 5.682512562814071e-06, + "loss": 0.0012, + "step": 43475 + }, + { + "epoch": 14.61, + "grad_norm": 0.8592880964279175, + "learning_rate": 5.68e-06, + "loss": 0.0015, + "step": 43500 + }, + { + "epoch": 14.62, + "grad_norm": 0.4414939284324646, + "learning_rate": 5.67748743718593e-06, + "loss": 0.0013, + "step": 43525 + }, + { + "epoch": 14.62, + "grad_norm": 0.1515030562877655, + "learning_rate": 5.67497487437186e-06, + "loss": 0.0014, + "step": 43550 + }, + { + "epoch": 14.63, + "grad_norm": 0.26357215642929077, + "learning_rate": 5.6724623115577895e-06, + "loss": 0.0011, + "step": 43575 + }, + { + "epoch": 14.64, + "grad_norm": 1.149932622909546, + "learning_rate": 5.66994974874372e-06, + "loss": 0.0019, + "step": 43600 + }, + { + "epoch": 14.65, + "grad_norm": 0.15162578225135803, + "learning_rate": 5.667437185929648e-06, + "loss": 0.0017, + "step": 43625 + }, + { + "epoch": 14.66, + "grad_norm": 0.6022353172302246, + "learning_rate": 5.664924623115578e-06, + "loss": 0.0016, + "step": 43650 + }, + { + "epoch": 14.67, + "grad_norm": 0.31174349784851074, + "learning_rate": 5.662412060301508e-06, + "loss": 0.0017, + "step": 43675 + }, + { + "epoch": 14.67, + "grad_norm": 0.3649553060531616, + "learning_rate": 5.66e-06, + "loss": 0.0016, + "step": 43700 + }, + { + "epoch": 14.68, + "grad_norm": 0.44043630361557007, + "learning_rate": 5.65748743718593e-06, + "loss": 0.0018, + "step": 43725 + }, + { + "epoch": 14.69, + "grad_norm": 0.25661978125572205, + "learning_rate": 5.65497487437186e-06, + "loss": 0.0018, + "step": 43750 + }, + { + "epoch": 14.7, + "grad_norm": 0.18040047585964203, + "learning_rate": 5.652462311557789e-06, + "loss": 0.0013, + "step": 43775 + }, + { + "epoch": 14.71, + "grad_norm": 0.5250380635261536, + "learning_rate": 5.649949748743719e-06, + "loss": 0.0015, + "step": 43800 + }, + { + "epoch": 14.72, + "grad_norm": 0.38725194334983826, + "learning_rate": 5.647437185929648e-06, + "loss": 0.0014, + "step": 43825 + }, + { + "epoch": 14.72, + "grad_norm": 0.754550576210022, + "learning_rate": 5.6449246231155785e-06, + "loss": 0.0012, + "step": 43850 + }, + { + "epoch": 14.73, + "grad_norm": 0.660594642162323, + "learning_rate": 5.642412060301508e-06, + "loss": 0.0016, + "step": 43875 + }, + { + "epoch": 14.74, + "grad_norm": 0.1631971299648285, + "learning_rate": 5.639899497487438e-06, + "loss": 0.0016, + "step": 43900 + }, + { + "epoch": 14.75, + "grad_norm": 0.14780732989311218, + "learning_rate": 5.637386934673368e-06, + "loss": 0.0012, + "step": 43925 + }, + { + "epoch": 14.76, + "grad_norm": 1.1590347290039062, + "learning_rate": 5.634874371859297e-06, + "loss": 0.0011, + "step": 43950 + }, + { + "epoch": 14.77, + "grad_norm": 0.06167951598763466, + "learning_rate": 5.632361809045227e-06, + "loss": 0.0013, + "step": 43975 + }, + { + "epoch": 14.78, + "grad_norm": 0.9754897952079773, + "learning_rate": 5.629849246231156e-06, + "loss": 0.0009, + "step": 44000 + }, + { + "epoch": 14.78, + "eval_loss": 0.25302672386169434, + "eval_runtime": 1165.8735, + "eval_samples_per_second": 1.209, + "eval_steps_per_second": 1.209, + "eval_wer": 19.024558976132827, + "step": 44000 + }, + { + "epoch": 14.78, + "grad_norm": 0.0651739239692688, + "learning_rate": 5.627336683417086e-06, + "loss": 0.0008, + "step": 44025 + }, + { + "epoch": 14.79, + "grad_norm": 0.13579769432544708, + "learning_rate": 5.624824120603015e-06, + "loss": 0.0016, + "step": 44050 + }, + { + "epoch": 14.8, + "grad_norm": 0.04283386841416359, + "learning_rate": 5.622311557788945e-06, + "loss": 0.0016, + "step": 44075 + }, + { + "epoch": 14.81, + "grad_norm": 0.7215760946273804, + "learning_rate": 5.619798994974874e-06, + "loss": 0.0013, + "step": 44100 + }, + { + "epoch": 14.82, + "grad_norm": 0.18281732499599457, + "learning_rate": 5.6172864321608044e-06, + "loss": 0.0008, + "step": 44125 + }, + { + "epoch": 14.83, + "grad_norm": 0.11862493306398392, + "learning_rate": 5.614773869346735e-06, + "loss": 0.0012, + "step": 44150 + }, + { + "epoch": 14.83, + "grad_norm": 0.6536908149719238, + "learning_rate": 5.612261306532664e-06, + "loss": 0.0012, + "step": 44175 + }, + { + "epoch": 14.84, + "grad_norm": 0.24267266690731049, + "learning_rate": 5.609748743718594e-06, + "loss": 0.001, + "step": 44200 + }, + { + "epoch": 14.85, + "grad_norm": 0.8674318194389343, + "learning_rate": 5.607236180904523e-06, + "loss": 0.0015, + "step": 44225 + }, + { + "epoch": 14.86, + "grad_norm": 1.0413219928741455, + "learning_rate": 5.604723618090453e-06, + "loss": 0.0018, + "step": 44250 + }, + { + "epoch": 14.87, + "grad_norm": 0.3875278830528259, + "learning_rate": 5.602211055276382e-06, + "loss": 0.0014, + "step": 44275 + }, + { + "epoch": 14.88, + "grad_norm": 0.898213803768158, + "learning_rate": 5.599698492462312e-06, + "loss": 0.001, + "step": 44300 + }, + { + "epoch": 14.88, + "grad_norm": 1.5082194805145264, + "learning_rate": 5.597185929648241e-06, + "loss": 0.0011, + "step": 44325 + }, + { + "epoch": 14.89, + "grad_norm": 0.700634777545929, + "learning_rate": 5.594673366834171e-06, + "loss": 0.001, + "step": 44350 + }, + { + "epoch": 14.9, + "grad_norm": 0.5329068303108215, + "learning_rate": 5.592160804020101e-06, + "loss": 0.0022, + "step": 44375 + }, + { + "epoch": 14.91, + "grad_norm": 0.48474615812301636, + "learning_rate": 5.58964824120603e-06, + "loss": 0.0015, + "step": 44400 + }, + { + "epoch": 14.92, + "grad_norm": 0.16658170521259308, + "learning_rate": 5.5871356783919606e-06, + "loss": 0.0012, + "step": 44425 + }, + { + "epoch": 14.93, + "grad_norm": 0.8300848603248596, + "learning_rate": 5.58462311557789e-06, + "loss": 0.0013, + "step": 44450 + }, + { + "epoch": 14.93, + "grad_norm": 0.45337405800819397, + "learning_rate": 5.58211055276382e-06, + "loss": 0.0014, + "step": 44475 + }, + { + "epoch": 14.94, + "grad_norm": 0.9672030210494995, + "learning_rate": 5.5795979899497485e-06, + "loss": 0.0017, + "step": 44500 + }, + { + "epoch": 14.95, + "grad_norm": 1.3718664646148682, + "learning_rate": 5.577085427135679e-06, + "loss": 0.0012, + "step": 44525 + }, + { + "epoch": 14.96, + "grad_norm": 0.450096994638443, + "learning_rate": 5.574572864321609e-06, + "loss": 0.0013, + "step": 44550 + }, + { + "epoch": 14.97, + "grad_norm": 1.0601868629455566, + "learning_rate": 5.572060301507538e-06, + "loss": 0.0011, + "step": 44575 + }, + { + "epoch": 14.98, + "grad_norm": 0.12035061419010162, + "learning_rate": 5.569547738693468e-06, + "loss": 0.0013, + "step": 44600 + }, + { + "epoch": 14.98, + "grad_norm": 0.24827928841114044, + "learning_rate": 5.567035175879397e-06, + "loss": 0.0026, + "step": 44625 + }, + { + "epoch": 14.99, + "grad_norm": 0.48974061012268066, + "learning_rate": 5.564522613065327e-06, + "loss": 0.001, + "step": 44650 + }, + { + "epoch": 15.0, + "grad_norm": 0.116686150431633, + "learning_rate": 5.562010050251256e-06, + "loss": 0.0018, + "step": 44675 + }, + { + "epoch": 15.01, + "grad_norm": 0.6925620436668396, + "learning_rate": 5.5594974874371865e-06, + "loss": 0.0014, + "step": 44700 + }, + { + "epoch": 15.02, + "grad_norm": 0.5164862275123596, + "learning_rate": 5.556984924623116e-06, + "loss": 0.001, + "step": 44725 + }, + { + "epoch": 15.03, + "grad_norm": 0.02969621866941452, + "learning_rate": 5.554472361809046e-06, + "loss": 0.0006, + "step": 44750 + }, + { + "epoch": 15.04, + "grad_norm": 0.33094948530197144, + "learning_rate": 5.551959798994976e-06, + "loss": 0.0007, + "step": 44775 + }, + { + "epoch": 15.04, + "grad_norm": 0.14733915030956268, + "learning_rate": 5.549447236180905e-06, + "loss": 0.0007, + "step": 44800 + }, + { + "epoch": 15.05, + "grad_norm": 0.0323140025138855, + "learning_rate": 5.546934673366835e-06, + "loss": 0.0006, + "step": 44825 + }, + { + "epoch": 15.06, + "grad_norm": 1.877301573753357, + "learning_rate": 5.544422110552764e-06, + "loss": 0.0008, + "step": 44850 + }, + { + "epoch": 15.07, + "grad_norm": 0.16313853859901428, + "learning_rate": 5.541909547738694e-06, + "loss": 0.0005, + "step": 44875 + }, + { + "epoch": 15.08, + "grad_norm": 0.9675766825675964, + "learning_rate": 5.539396984924623e-06, + "loss": 0.0005, + "step": 44900 + }, + { + "epoch": 15.09, + "grad_norm": 0.03807753324508667, + "learning_rate": 5.536884422110553e-06, + "loss": 0.001, + "step": 44925 + }, + { + "epoch": 15.09, + "grad_norm": 0.4907311201095581, + "learning_rate": 5.534371859296482e-06, + "loss": 0.0006, + "step": 44950 + }, + { + "epoch": 15.1, + "grad_norm": 0.4150041341781616, + "learning_rate": 5.5318592964824124e-06, + "loss": 0.0005, + "step": 44975 + }, + { + "epoch": 15.11, + "grad_norm": 0.11915123462677002, + "learning_rate": 5.529346733668343e-06, + "loss": 0.0008, + "step": 45000 + }, + { + "epoch": 15.11, + "eval_loss": 0.25997817516326904, + "eval_runtime": 1159.0382, + "eval_samples_per_second": 1.216, + "eval_steps_per_second": 1.216, + "eval_wer": 19.245935662400555, + "step": 45000 + }, + { + "epoch": 15.12, + "grad_norm": 0.06512346863746643, + "learning_rate": 5.526834170854272e-06, + "loss": 0.0006, + "step": 45025 + }, + { + "epoch": 15.13, + "grad_norm": 0.04392273351550102, + "learning_rate": 5.524321608040202e-06, + "loss": 0.0008, + "step": 45050 + }, + { + "epoch": 15.14, + "grad_norm": 0.06722340732812881, + "learning_rate": 5.521809045226131e-06, + "loss": 0.0008, + "step": 45075 + }, + { + "epoch": 15.14, + "grad_norm": 0.2564724385738373, + "learning_rate": 5.519296482412061e-06, + "loss": 0.0009, + "step": 45100 + }, + { + "epoch": 15.15, + "grad_norm": 0.5248950719833374, + "learning_rate": 5.51678391959799e-06, + "loss": 0.0007, + "step": 45125 + }, + { + "epoch": 15.16, + "grad_norm": 0.10055634379386902, + "learning_rate": 5.51427135678392e-06, + "loss": 0.001, + "step": 45150 + }, + { + "epoch": 15.17, + "grad_norm": 0.21809829771518707, + "learning_rate": 5.51175879396985e-06, + "loss": 0.0006, + "step": 45175 + }, + { + "epoch": 15.18, + "grad_norm": 0.23687508702278137, + "learning_rate": 5.509246231155779e-06, + "loss": 0.0006, + "step": 45200 + }, + { + "epoch": 15.19, + "grad_norm": 0.0417819544672966, + "learning_rate": 5.506733668341709e-06, + "loss": 0.0004, + "step": 45225 + }, + { + "epoch": 15.19, + "grad_norm": 0.5304688811302185, + "learning_rate": 5.504221105527638e-06, + "loss": 0.0007, + "step": 45250 + }, + { + "epoch": 15.2, + "grad_norm": 1.3457707166671753, + "learning_rate": 5.5017085427135686e-06, + "loss": 0.0006, + "step": 45275 + }, + { + "epoch": 15.21, + "grad_norm": 0.41790375113487244, + "learning_rate": 5.499195979899498e-06, + "loss": 0.0004, + "step": 45300 + }, + { + "epoch": 15.22, + "grad_norm": 0.09887022525072098, + "learning_rate": 5.496683417085428e-06, + "loss": 0.0009, + "step": 45325 + }, + { + "epoch": 15.23, + "grad_norm": 1.404980182647705, + "learning_rate": 5.4941708542713565e-06, + "loss": 0.0009, + "step": 45350 + }, + { + "epoch": 15.24, + "grad_norm": 0.8648458123207092, + "learning_rate": 5.491658291457287e-06, + "loss": 0.0012, + "step": 45375 + }, + { + "epoch": 15.25, + "grad_norm": 0.2609533965587616, + "learning_rate": 5.489145728643217e-06, + "loss": 0.001, + "step": 45400 + }, + { + "epoch": 15.25, + "grad_norm": 0.24903236329555511, + "learning_rate": 5.486633165829146e-06, + "loss": 0.0006, + "step": 45425 + }, + { + "epoch": 15.26, + "grad_norm": 0.40059176087379456, + "learning_rate": 5.484120603015076e-06, + "loss": 0.0013, + "step": 45450 + }, + { + "epoch": 15.27, + "grad_norm": 0.073471799492836, + "learning_rate": 5.481608040201005e-06, + "loss": 0.001, + "step": 45475 + }, + { + "epoch": 15.28, + "grad_norm": 0.09258909523487091, + "learning_rate": 5.479095477386935e-06, + "loss": 0.0012, + "step": 45500 + }, + { + "epoch": 15.29, + "grad_norm": 0.22275954484939575, + "learning_rate": 5.476582914572864e-06, + "loss": 0.0006, + "step": 45525 + }, + { + "epoch": 15.3, + "grad_norm": 0.16128885746002197, + "learning_rate": 5.4740703517587945e-06, + "loss": 0.0011, + "step": 45550 + }, + { + "epoch": 15.3, + "grad_norm": 1.0559899806976318, + "learning_rate": 5.471557788944724e-06, + "loss": 0.0011, + "step": 45575 + }, + { + "epoch": 15.31, + "grad_norm": 1.2226771116256714, + "learning_rate": 5.469045226130654e-06, + "loss": 0.0009, + "step": 45600 + }, + { + "epoch": 15.32, + "grad_norm": 0.2521926462650299, + "learning_rate": 5.466532663316584e-06, + "loss": 0.001, + "step": 45625 + }, + { + "epoch": 15.33, + "grad_norm": 1.1741470098495483, + "learning_rate": 5.464020100502513e-06, + "loss": 0.001, + "step": 45650 + }, + { + "epoch": 15.34, + "grad_norm": 0.2982543408870697, + "learning_rate": 5.461507537688443e-06, + "loss": 0.0009, + "step": 45675 + }, + { + "epoch": 15.35, + "grad_norm": 0.27279847860336304, + "learning_rate": 5.458994974874372e-06, + "loss": 0.0008, + "step": 45700 + }, + { + "epoch": 15.35, + "grad_norm": 0.16742606461048126, + "learning_rate": 5.456482412060302e-06, + "loss": 0.001, + "step": 45725 + }, + { + "epoch": 15.36, + "grad_norm": 0.5723238587379456, + "learning_rate": 5.453969849246231e-06, + "loss": 0.001, + "step": 45750 + }, + { + "epoch": 15.37, + "grad_norm": 0.2030552476644516, + "learning_rate": 5.451457286432161e-06, + "loss": 0.0011, + "step": 45775 + }, + { + "epoch": 15.38, + "grad_norm": 0.168288454413414, + "learning_rate": 5.44894472361809e-06, + "loss": 0.0007, + "step": 45800 + }, + { + "epoch": 15.39, + "grad_norm": 0.46041226387023926, + "learning_rate": 5.4464321608040204e-06, + "loss": 0.0009, + "step": 45825 + }, + { + "epoch": 15.4, + "grad_norm": 0.5023003220558167, + "learning_rate": 5.443919597989951e-06, + "loss": 0.0008, + "step": 45850 + }, + { + "epoch": 15.4, + "grad_norm": 1.0514823198318481, + "learning_rate": 5.44140703517588e-06, + "loss": 0.0014, + "step": 45875 + }, + { + "epoch": 15.41, + "grad_norm": 0.4136808514595032, + "learning_rate": 5.43889447236181e-06, + "loss": 0.0009, + "step": 45900 + }, + { + "epoch": 15.42, + "grad_norm": 0.19774511456489563, + "learning_rate": 5.436381909547739e-06, + "loss": 0.0007, + "step": 45925 + }, + { + "epoch": 15.43, + "grad_norm": 0.9684527516365051, + "learning_rate": 5.433869346733669e-06, + "loss": 0.0009, + "step": 45950 + }, + { + "epoch": 15.44, + "grad_norm": 0.09493457525968552, + "learning_rate": 5.431356783919598e-06, + "loss": 0.0008, + "step": 45975 + }, + { + "epoch": 15.45, + "grad_norm": 0.504849374294281, + "learning_rate": 5.428844221105528e-06, + "loss": 0.0009, + "step": 46000 + }, + { + "epoch": 15.45, + "eval_loss": 0.2668718695640564, + "eval_runtime": 1154.8452, + "eval_samples_per_second": 1.22, + "eval_steps_per_second": 1.22, + "eval_wer": 19.05223106191629, + "step": 46000 + }, + { + "epoch": 15.46, + "grad_norm": 0.6717623472213745, + "learning_rate": 5.426331658291458e-06, + "loss": 0.0012, + "step": 46025 + }, + { + "epoch": 15.46, + "grad_norm": 0.9771360158920288, + "learning_rate": 5.423819095477387e-06, + "loss": 0.0014, + "step": 46050 + }, + { + "epoch": 15.47, + "grad_norm": 1.1335792541503906, + "learning_rate": 5.421306532663317e-06, + "loss": 0.001, + "step": 46075 + }, + { + "epoch": 15.48, + "grad_norm": 0.17994563281536102, + "learning_rate": 5.418793969849246e-06, + "loss": 0.0008, + "step": 46100 + }, + { + "epoch": 15.49, + "grad_norm": 0.15336956083774567, + "learning_rate": 5.4162814070351766e-06, + "loss": 0.0006, + "step": 46125 + }, + { + "epoch": 15.5, + "grad_norm": 0.18147267401218414, + "learning_rate": 5.413768844221106e-06, + "loss": 0.0008, + "step": 46150 + }, + { + "epoch": 15.51, + "grad_norm": 1.572707176208496, + "learning_rate": 5.411256281407036e-06, + "loss": 0.0016, + "step": 46175 + }, + { + "epoch": 15.51, + "grad_norm": 0.04582465440034866, + "learning_rate": 5.4087437185929645e-06, + "loss": 0.0008, + "step": 46200 + }, + { + "epoch": 15.52, + "grad_norm": 1.3432215452194214, + "learning_rate": 5.406231155778895e-06, + "loss": 0.0013, + "step": 46225 + }, + { + "epoch": 15.53, + "grad_norm": 0.11735803633928299, + "learning_rate": 5.403718592964825e-06, + "loss": 0.0012, + "step": 46250 + }, + { + "epoch": 15.54, + "grad_norm": 0.06766407936811447, + "learning_rate": 5.401206030150754e-06, + "loss": 0.0011, + "step": 46275 + }, + { + "epoch": 15.55, + "grad_norm": 0.6037635803222656, + "learning_rate": 5.398693467336684e-06, + "loss": 0.0011, + "step": 46300 + }, + { + "epoch": 15.56, + "grad_norm": 0.026819320395588875, + "learning_rate": 5.396180904522613e-06, + "loss": 0.0012, + "step": 46325 + }, + { + "epoch": 15.56, + "grad_norm": 0.4241013526916504, + "learning_rate": 5.393668341708543e-06, + "loss": 0.0017, + "step": 46350 + }, + { + "epoch": 15.57, + "grad_norm": 0.6957968473434448, + "learning_rate": 5.391155778894472e-06, + "loss": 0.0016, + "step": 46375 + }, + { + "epoch": 15.58, + "grad_norm": 0.6102727055549622, + "learning_rate": 5.3886432160804025e-06, + "loss": 0.0015, + "step": 46400 + }, + { + "epoch": 15.59, + "grad_norm": 0.35109901428222656, + "learning_rate": 5.386130653266332e-06, + "loss": 0.0015, + "step": 46425 + }, + { + "epoch": 15.6, + "grad_norm": 0.6426429152488708, + "learning_rate": 5.383618090452262e-06, + "loss": 0.0015, + "step": 46450 + }, + { + "epoch": 15.61, + "grad_norm": 0.2735039293766022, + "learning_rate": 5.381105527638192e-06, + "loss": 0.0014, + "step": 46475 + }, + { + "epoch": 15.61, + "grad_norm": 0.11153319478034973, + "learning_rate": 5.378592964824121e-06, + "loss": 0.0014, + "step": 46500 + }, + { + "epoch": 15.62, + "grad_norm": 0.7899156808853149, + "learning_rate": 5.376080402010051e-06, + "loss": 0.0011, + "step": 46525 + }, + { + "epoch": 15.63, + "grad_norm": 0.5147865414619446, + "learning_rate": 5.37356783919598e-06, + "loss": 0.0008, + "step": 46550 + }, + { + "epoch": 15.64, + "grad_norm": 0.29838889837265015, + "learning_rate": 5.37105527638191e-06, + "loss": 0.0009, + "step": 46575 + }, + { + "epoch": 15.65, + "grad_norm": 0.45745232701301575, + "learning_rate": 5.368542713567839e-06, + "loss": 0.0011, + "step": 46600 + }, + { + "epoch": 15.66, + "grad_norm": 0.3206990659236908, + "learning_rate": 5.366030150753769e-06, + "loss": 0.0013, + "step": 46625 + }, + { + "epoch": 15.66, + "grad_norm": 0.24014590680599213, + "learning_rate": 5.363517587939699e-06, + "loss": 0.0014, + "step": 46650 + }, + { + "epoch": 15.67, + "grad_norm": 0.45811983942985535, + "learning_rate": 5.3611055276381915e-06, + "loss": 0.0016, + "step": 46675 + }, + { + "epoch": 15.68, + "grad_norm": 0.05055844783782959, + "learning_rate": 5.358592964824121e-06, + "loss": 0.0009, + "step": 46700 + }, + { + "epoch": 15.69, + "grad_norm": 0.7755090594291687, + "learning_rate": 5.356080402010051e-06, + "loss": 0.0016, + "step": 46725 + }, + { + "epoch": 15.7, + "grad_norm": 1.5003502368927002, + "learning_rate": 5.35356783919598e-06, + "loss": 0.0015, + "step": 46750 + }, + { + "epoch": 15.71, + "grad_norm": 0.10321252793073654, + "learning_rate": 5.3510552763819105e-06, + "loss": 0.0009, + "step": 46775 + }, + { + "epoch": 15.72, + "grad_norm": 0.6728261113166809, + "learning_rate": 5.348643216080403e-06, + "loss": 0.0013, + "step": 46800 + }, + { + "epoch": 15.72, + "grad_norm": 0.4052286744117737, + "learning_rate": 5.346130653266332e-06, + "loss": 0.001, + "step": 46825 + }, + { + "epoch": 15.73, + "grad_norm": 0.9077565670013428, + "learning_rate": 5.3436180904522615e-06, + "loss": 0.001, + "step": 46850 + }, + { + "epoch": 15.74, + "grad_norm": 0.18722866475582123, + "learning_rate": 5.341105527638192e-06, + "loss": 0.001, + "step": 46875 + }, + { + "epoch": 15.75, + "grad_norm": 0.2173491269350052, + "learning_rate": 5.338592964824121e-06, + "loss": 0.0009, + "step": 46900 + }, + { + "epoch": 15.76, + "grad_norm": 0.25214883685112, + "learning_rate": 5.336080402010051e-06, + "loss": 0.0008, + "step": 46925 + }, + { + "epoch": 15.77, + "grad_norm": 0.09612807631492615, + "learning_rate": 5.33356783919598e-06, + "loss": 0.001, + "step": 46950 + }, + { + "epoch": 15.77, + "grad_norm": 0.47880762815475464, + "learning_rate": 5.33105527638191e-06, + "loss": 0.001, + "step": 46975 + }, + { + "epoch": 15.78, + "grad_norm": 1.3311128616333008, + "learning_rate": 5.328542713567839e-06, + "loss": 0.0013, + "step": 47000 + }, + { + "epoch": 15.78, + "eval_loss": 0.2613765299320221, + "eval_runtime": 1160.8889, + "eval_samples_per_second": 1.214, + "eval_steps_per_second": 1.214, + "eval_wer": 18.93462469733656, + "step": 47000 + } + ], + "logging_steps": 25, + "max_steps": 100000, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 1000, + "total_flos": 4.339631815852032e+20, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/whisper-small/maithili/checkpoint-47000/training_args.bin b/checkpoints/whisper-small/maithili/checkpoint-47000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4089dd648c79bd2ca3ea6ddb53106e16e93d0239 --- /dev/null +++ b/checkpoints/whisper-small/maithili/checkpoint-47000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6192564c579d774b07d82c73dccc65d141f9bbb09e747ec430f650e7e8a5ca3 +size 4667 diff --git a/checkpoints/whisper-small/marathi/checkpoint-27000/config.json b/checkpoints/whisper-small/marathi/checkpoint-27000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2028b4620af874d35b873d854b5b82dff46f3006 --- /dev/null +++ b/checkpoints/whisper-small/marathi/checkpoint-27000/config.json @@ -0,0 +1,152 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50320 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50360, + 50361, + 50362 + ], + "torch_dtype": "float32", + "transformers_version": "4.39.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoints/whisper-small/marathi/checkpoint-27000/generation_config.json b/checkpoints/whisper-small/marathi/checkpoint-27000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7e9b1a3e3b5fb8d88730860d2b25f6cd310962c7 --- /dev/null +++ b/checkpoints/whisper-small/marathi/checkpoint-27000/generation_config.json @@ -0,0 +1,264 @@ +{ + "alignment_heads": [ + [ + 5, + 3 + ], + [ + 5, + 9 + ], + [ + 8, + 0 + ], + [ + 8, + 4 + ], + [ + 8, + 7 + ], + [ + 8, + 8 + ], + [ + 9, + 0 + ], + [ + 9, + 7 + ], + [ + 9, + 9 + ], + [ + 10, + 5 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.39.2" +} diff --git a/checkpoints/whisper-small/marathi/checkpoint-27000/model.safetensors b/checkpoints/whisper-small/marathi/checkpoint-27000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9066344da53ebbd30fbb0a7b560a9642d912621b --- /dev/null +++ b/checkpoints/whisper-small/marathi/checkpoint-27000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8179b916f3212bebb1179751d2b7406160c97192e1b76370514320cce7ddc38f +size 966995080 diff --git a/checkpoints/whisper-small/marathi/checkpoint-27000/optimizer.pt b/checkpoints/whisper-small/marathi/checkpoint-27000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..182eb5d49fd7449d84f06fa18a08fba9f310faf7 --- /dev/null +++ b/checkpoints/whisper-small/marathi/checkpoint-27000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8d465328e9e979f063fd72491f7f17f06f60d77d13bcff87f9ce460ce52bf98 +size 1925063607 diff --git a/checkpoints/whisper-small/marathi/checkpoint-27000/preprocessor_config.json b/checkpoints/whisper-small/marathi/checkpoint-27000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoints/whisper-small/marathi/checkpoint-27000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoints/whisper-small/marathi/checkpoint-27000/rng_state.pth b/checkpoints/whisper-small/marathi/checkpoint-27000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..67da8c77ac5321d108b04d5dcd6bfeeac0c4d7af --- /dev/null +++ b/checkpoints/whisper-small/marathi/checkpoint-27000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:091562ac765606b2d1e5959ec5265fddefc5a1ac27e21c4d3f5b67725759b611 +size 14575 diff --git a/checkpoints/whisper-small/marathi/checkpoint-27000/scheduler.pt b/checkpoints/whisper-small/marathi/checkpoint-27000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f369d4eeca459635fd073d40e108eae9733f146f --- /dev/null +++ b/checkpoints/whisper-small/marathi/checkpoint-27000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c62d069baa78bbf12b0f29e6b1a2904ae9169e82a8881965c9bd71cb078ecd88 +size 627 diff --git a/checkpoints/whisper-small/marathi/checkpoint-27000/trainer_state.json b/checkpoints/whisper-small/marathi/checkpoint-27000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8b2e833899b8c9e61991232710470e70c848e33b --- /dev/null +++ b/checkpoints/whisper-small/marathi/checkpoint-27000/trainer_state.json @@ -0,0 +1,7824 @@ +{ + "best_metric": 16.422390958855733, + "best_model_checkpoint": "results/whisper-small/marathi/checkpoint-17000", + "epoch": 9.066487575554063, + "eval_steps": 1000, + "global_step": 27000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "grad_norm": 41.549041748046875, + "learning_rate": 4.4e-07, + "loss": 2.145, + "step": 25 + }, + { + "epoch": 0.02, + "grad_norm": 14.274330139160156, + "learning_rate": 9.400000000000001e-07, + "loss": 1.6731, + "step": 50 + }, + { + "epoch": 0.03, + "grad_norm": 6.5371623039245605, + "learning_rate": 1.44e-06, + "loss": 1.1597, + "step": 75 + }, + { + "epoch": 0.03, + "grad_norm": 6.048511505126953, + "learning_rate": 1.94e-06, + "loss": 0.9, + "step": 100 + }, + { + "epoch": 0.04, + "grad_norm": 5.208028793334961, + "learning_rate": 2.4400000000000004e-06, + "loss": 0.8052, + "step": 125 + }, + { + "epoch": 0.05, + "grad_norm": 5.455846786499023, + "learning_rate": 2.9400000000000002e-06, + "loss": 0.6974, + "step": 150 + }, + { + "epoch": 0.06, + "grad_norm": 5.493101596832275, + "learning_rate": 3.44e-06, + "loss": 0.6509, + "step": 175 + }, + { + "epoch": 0.07, + "grad_norm": 4.952133655548096, + "learning_rate": 3.94e-06, + "loss": 0.5753, + "step": 200 + }, + { + "epoch": 0.08, + "grad_norm": 4.658987998962402, + "learning_rate": 4.440000000000001e-06, + "loss": 0.5567, + "step": 225 + }, + { + "epoch": 0.08, + "grad_norm": 4.80238151550293, + "learning_rate": 4.94e-06, + "loss": 0.5158, + "step": 250 + }, + { + "epoch": 0.09, + "grad_norm": 5.003881454467773, + "learning_rate": 5.4400000000000004e-06, + "loss": 0.4673, + "step": 275 + }, + { + "epoch": 0.1, + "grad_norm": 4.965816020965576, + "learning_rate": 5.94e-06, + "loss": 0.4411, + "step": 300 + }, + { + "epoch": 0.11, + "grad_norm": 4.522903919219971, + "learning_rate": 6.440000000000001e-06, + "loss": 0.3962, + "step": 325 + }, + { + "epoch": 0.12, + "grad_norm": 4.218821048736572, + "learning_rate": 6.9400000000000005e-06, + "loss": 0.3385, + "step": 350 + }, + { + "epoch": 0.13, + "grad_norm": 4.2727952003479, + "learning_rate": 7.440000000000001e-06, + "loss": 0.3114, + "step": 375 + }, + { + "epoch": 0.13, + "grad_norm": 3.8129806518554688, + "learning_rate": 7.94e-06, + "loss": 0.2962, + "step": 400 + }, + { + "epoch": 0.14, + "grad_norm": 4.224565505981445, + "learning_rate": 8.44e-06, + "loss": 0.287, + "step": 425 + }, + { + "epoch": 0.15, + "grad_norm": 4.096001625061035, + "learning_rate": 8.94e-06, + "loss": 0.2834, + "step": 450 + }, + { + "epoch": 0.16, + "grad_norm": 3.4027295112609863, + "learning_rate": 9.440000000000001e-06, + "loss": 0.2549, + "step": 475 + }, + { + "epoch": 0.17, + "grad_norm": 3.7686753273010254, + "learning_rate": 9.940000000000001e-06, + "loss": 0.2675, + "step": 500 + }, + { + "epoch": 0.18, + "grad_norm": 3.5863723754882812, + "learning_rate": 9.997788944723618e-06, + "loss": 0.2491, + "step": 525 + }, + { + "epoch": 0.18, + "grad_norm": 3.8767905235290527, + "learning_rate": 9.99527638190955e-06, + "loss": 0.2441, + "step": 550 + }, + { + "epoch": 0.19, + "grad_norm": 3.476703405380249, + "learning_rate": 9.992763819095477e-06, + "loss": 0.2442, + "step": 575 + }, + { + "epoch": 0.2, + "grad_norm": 3.102367401123047, + "learning_rate": 9.990251256281408e-06, + "loss": 0.2507, + "step": 600 + }, + { + "epoch": 0.21, + "grad_norm": 3.154153823852539, + "learning_rate": 9.987738693467337e-06, + "loss": 0.2233, + "step": 625 + }, + { + "epoch": 0.22, + "grad_norm": 3.499101161956787, + "learning_rate": 9.985226130653267e-06, + "loss": 0.2209, + "step": 650 + }, + { + "epoch": 0.23, + "grad_norm": 3.4435086250305176, + "learning_rate": 9.982713567839198e-06, + "loss": 0.2255, + "step": 675 + }, + { + "epoch": 0.24, + "grad_norm": 2.431523561477661, + "learning_rate": 9.980201005025127e-06, + "loss": 0.2103, + "step": 700 + }, + { + "epoch": 0.24, + "grad_norm": 3.6072354316711426, + "learning_rate": 9.977688442211056e-06, + "loss": 0.2058, + "step": 725 + }, + { + "epoch": 0.25, + "grad_norm": 3.2165238857269287, + "learning_rate": 9.975175879396986e-06, + "loss": 0.2112, + "step": 750 + }, + { + "epoch": 0.26, + "grad_norm": 3.0812106132507324, + "learning_rate": 9.972663316582915e-06, + "loss": 0.1984, + "step": 775 + }, + { + "epoch": 0.27, + "grad_norm": 2.8091812133789062, + "learning_rate": 9.970150753768844e-06, + "loss": 0.2023, + "step": 800 + }, + { + "epoch": 0.28, + "grad_norm": 2.8220300674438477, + "learning_rate": 9.967638190954775e-06, + "loss": 0.2037, + "step": 825 + }, + { + "epoch": 0.29, + "grad_norm": 2.826014995574951, + "learning_rate": 9.965125628140703e-06, + "loss": 0.1966, + "step": 850 + }, + { + "epoch": 0.29, + "grad_norm": 3.0548243522644043, + "learning_rate": 9.962613065326634e-06, + "loss": 0.198, + "step": 875 + }, + { + "epoch": 0.3, + "grad_norm": 3.264997720718384, + "learning_rate": 9.960100502512563e-06, + "loss": 0.1947, + "step": 900 + }, + { + "epoch": 0.31, + "grad_norm": 3.695375442504883, + "learning_rate": 9.957587939698493e-06, + "loss": 0.1969, + "step": 925 + }, + { + "epoch": 0.32, + "grad_norm": 2.866607427597046, + "learning_rate": 9.955075376884424e-06, + "loss": 0.1757, + "step": 950 + }, + { + "epoch": 0.33, + "grad_norm": 2.9225738048553467, + "learning_rate": 9.952562814070353e-06, + "loss": 0.1827, + "step": 975 + }, + { + "epoch": 0.34, + "grad_norm": 2.7022693157196045, + "learning_rate": 9.950050251256282e-06, + "loss": 0.1796, + "step": 1000 + }, + { + "epoch": 0.34, + "eval_loss": 0.11966908723115921, + "eval_runtime": 1241.8147, + "eval_samples_per_second": 1.116, + "eval_steps_per_second": 1.116, + "eval_wer": 25.30460886455942, + "step": 1000 + }, + { + "epoch": 0.34, + "grad_norm": 3.097360134124756, + "learning_rate": 9.947537688442212e-06, + "loss": 0.1775, + "step": 1025 + }, + { + "epoch": 0.35, + "grad_norm": 3.068483591079712, + "learning_rate": 9.945025125628141e-06, + "loss": 0.1824, + "step": 1050 + }, + { + "epoch": 0.36, + "grad_norm": 2.5846781730651855, + "learning_rate": 9.94251256281407e-06, + "loss": 0.1658, + "step": 1075 + }, + { + "epoch": 0.37, + "grad_norm": 2.3406286239624023, + "learning_rate": 9.940000000000001e-06, + "loss": 0.174, + "step": 1100 + }, + { + "epoch": 0.38, + "grad_norm": 2.944446086883545, + "learning_rate": 9.93748743718593e-06, + "loss": 0.1675, + "step": 1125 + }, + { + "epoch": 0.39, + "grad_norm": 2.748361587524414, + "learning_rate": 9.93497487437186e-06, + "loss": 0.1694, + "step": 1150 + }, + { + "epoch": 0.39, + "grad_norm": 2.679593086242676, + "learning_rate": 9.93246231155779e-06, + "loss": 0.1598, + "step": 1175 + }, + { + "epoch": 0.4, + "grad_norm": 2.694540500640869, + "learning_rate": 9.929949748743719e-06, + "loss": 0.163, + "step": 1200 + }, + { + "epoch": 0.41, + "grad_norm": 2.7351558208465576, + "learning_rate": 9.92743718592965e-06, + "loss": 0.1632, + "step": 1225 + }, + { + "epoch": 0.42, + "grad_norm": 3.2272815704345703, + "learning_rate": 9.924924623115579e-06, + "loss": 0.1685, + "step": 1250 + }, + { + "epoch": 0.43, + "grad_norm": 2.9162490367889404, + "learning_rate": 9.922412060301508e-06, + "loss": 0.1614, + "step": 1275 + }, + { + "epoch": 0.44, + "grad_norm": 2.3532989025115967, + "learning_rate": 9.91989949748744e-06, + "loss": 0.1586, + "step": 1300 + }, + { + "epoch": 0.44, + "grad_norm": 2.7829198837280273, + "learning_rate": 9.917386934673367e-06, + "loss": 0.1599, + "step": 1325 + }, + { + "epoch": 0.45, + "grad_norm": 2.8058085441589355, + "learning_rate": 9.914874371859298e-06, + "loss": 0.1515, + "step": 1350 + }, + { + "epoch": 0.46, + "grad_norm": 2.5609302520751953, + "learning_rate": 9.912361809045227e-06, + "loss": 0.1532, + "step": 1375 + }, + { + "epoch": 0.47, + "grad_norm": 2.3817877769470215, + "learning_rate": 9.909849246231157e-06, + "loss": 0.1559, + "step": 1400 + }, + { + "epoch": 0.48, + "grad_norm": 2.5465707778930664, + "learning_rate": 9.907336683417086e-06, + "loss": 0.1499, + "step": 1425 + }, + { + "epoch": 0.49, + "grad_norm": 3.1504642963409424, + "learning_rate": 9.904824120603015e-06, + "loss": 0.16, + "step": 1450 + }, + { + "epoch": 0.5, + "grad_norm": 2.907113552093506, + "learning_rate": 9.902311557788945e-06, + "loss": 0.1521, + "step": 1475 + }, + { + "epoch": 0.5, + "grad_norm": 2.053865432739258, + "learning_rate": 9.899798994974876e-06, + "loss": 0.1504, + "step": 1500 + }, + { + "epoch": 0.51, + "grad_norm": 2.680102586746216, + "learning_rate": 9.897286432160805e-06, + "loss": 0.1504, + "step": 1525 + }, + { + "epoch": 0.52, + "grad_norm": 2.428081750869751, + "learning_rate": 9.894773869346734e-06, + "loss": 0.1465, + "step": 1550 + }, + { + "epoch": 0.53, + "grad_norm": 2.538038969039917, + "learning_rate": 9.892261306532665e-06, + "loss": 0.1452, + "step": 1575 + }, + { + "epoch": 0.54, + "grad_norm": 3.0585122108459473, + "learning_rate": 9.889748743718593e-06, + "loss": 0.1479, + "step": 1600 + }, + { + "epoch": 0.55, + "grad_norm": 2.7058053016662598, + "learning_rate": 9.887236180904524e-06, + "loss": 0.1545, + "step": 1625 + }, + { + "epoch": 0.55, + "grad_norm": 2.7370285987854004, + "learning_rate": 9.884723618090453e-06, + "loss": 0.1352, + "step": 1650 + }, + { + "epoch": 0.56, + "grad_norm": 3.2852165699005127, + "learning_rate": 9.882211055276383e-06, + "loss": 0.1404, + "step": 1675 + }, + { + "epoch": 0.57, + "grad_norm": 2.3595335483551025, + "learning_rate": 9.879698492462312e-06, + "loss": 0.1328, + "step": 1700 + }, + { + "epoch": 0.58, + "grad_norm": 2.5697829723358154, + "learning_rate": 9.877185929648241e-06, + "loss": 0.1371, + "step": 1725 + }, + { + "epoch": 0.59, + "grad_norm": 2.8670291900634766, + "learning_rate": 9.874673366834172e-06, + "loss": 0.1411, + "step": 1750 + }, + { + "epoch": 0.6, + "grad_norm": 2.652900218963623, + "learning_rate": 9.872160804020102e-06, + "loss": 0.1383, + "step": 1775 + }, + { + "epoch": 0.6, + "grad_norm": 2.3750619888305664, + "learning_rate": 9.869648241206031e-06, + "loss": 0.1322, + "step": 1800 + }, + { + "epoch": 0.61, + "grad_norm": 2.5834813117980957, + "learning_rate": 9.86713567839196e-06, + "loss": 0.1346, + "step": 1825 + }, + { + "epoch": 0.62, + "grad_norm": 2.2527308464050293, + "learning_rate": 9.864623115577891e-06, + "loss": 0.1385, + "step": 1850 + }, + { + "epoch": 0.63, + "grad_norm": 2.785240411758423, + "learning_rate": 9.862110552763819e-06, + "loss": 0.1355, + "step": 1875 + }, + { + "epoch": 0.64, + "grad_norm": 1.9619439840316772, + "learning_rate": 9.85959798994975e-06, + "loss": 0.1327, + "step": 1900 + }, + { + "epoch": 0.65, + "grad_norm": 2.1625936031341553, + "learning_rate": 9.85708542713568e-06, + "loss": 0.134, + "step": 1925 + }, + { + "epoch": 0.65, + "grad_norm": 2.4821691513061523, + "learning_rate": 9.854572864321609e-06, + "loss": 0.1377, + "step": 1950 + }, + { + "epoch": 0.66, + "grad_norm": 2.7250044345855713, + "learning_rate": 9.85206030150754e-06, + "loss": 0.1355, + "step": 1975 + }, + { + "epoch": 0.67, + "grad_norm": 2.5310420989990234, + "learning_rate": 9.849547738693467e-06, + "loss": 0.135, + "step": 2000 + }, + { + "epoch": 0.67, + "eval_loss": 0.0919841080904007, + "eval_runtime": 1222.8507, + "eval_samples_per_second": 1.133, + "eval_steps_per_second": 1.133, + "eval_wer": 20.554476425922655, + "step": 2000 + }, + { + "epoch": 0.68, + "grad_norm": 2.251912832260132, + "learning_rate": 9.847035175879398e-06, + "loss": 0.1303, + "step": 2025 + }, + { + "epoch": 0.69, + "grad_norm": 2.1909947395324707, + "learning_rate": 9.844522613065328e-06, + "loss": 0.1292, + "step": 2050 + }, + { + "epoch": 0.7, + "grad_norm": 2.83449649810791, + "learning_rate": 9.842010050251257e-06, + "loss": 0.1252, + "step": 2075 + }, + { + "epoch": 0.71, + "grad_norm": 2.993854284286499, + "learning_rate": 9.839497487437186e-06, + "loss": 0.1273, + "step": 2100 + }, + { + "epoch": 0.71, + "grad_norm": 2.281494140625, + "learning_rate": 9.836984924623117e-06, + "loss": 0.1341, + "step": 2125 + }, + { + "epoch": 0.72, + "grad_norm": 2.747234344482422, + "learning_rate": 9.834472361809047e-06, + "loss": 0.1293, + "step": 2150 + }, + { + "epoch": 0.73, + "grad_norm": 2.4248714447021484, + "learning_rate": 9.831959798994976e-06, + "loss": 0.122, + "step": 2175 + }, + { + "epoch": 0.74, + "grad_norm": 2.446444511413574, + "learning_rate": 9.829447236180905e-06, + "loss": 0.13, + "step": 2200 + }, + { + "epoch": 0.75, + "grad_norm": 2.1780622005462646, + "learning_rate": 9.826934673366834e-06, + "loss": 0.1272, + "step": 2225 + }, + { + "epoch": 0.76, + "grad_norm": 2.8826751708984375, + "learning_rate": 9.824422110552766e-06, + "loss": 0.1235, + "step": 2250 + }, + { + "epoch": 0.76, + "grad_norm": 2.162856340408325, + "learning_rate": 9.821909547738693e-06, + "loss": 0.1375, + "step": 2275 + }, + { + "epoch": 0.77, + "grad_norm": 2.3841776847839355, + "learning_rate": 9.819396984924624e-06, + "loss": 0.1253, + "step": 2300 + }, + { + "epoch": 0.78, + "grad_norm": 2.577899217605591, + "learning_rate": 9.816884422110553e-06, + "loss": 0.1171, + "step": 2325 + }, + { + "epoch": 0.79, + "grad_norm": 2.0833990573883057, + "learning_rate": 9.814371859296483e-06, + "loss": 0.1186, + "step": 2350 + }, + { + "epoch": 0.8, + "grad_norm": 2.3612773418426514, + "learning_rate": 9.811859296482414e-06, + "loss": 0.122, + "step": 2375 + }, + { + "epoch": 0.81, + "grad_norm": 2.7865421772003174, + "learning_rate": 9.809346733668343e-06, + "loss": 0.1186, + "step": 2400 + }, + { + "epoch": 0.81, + "grad_norm": 2.1974422931671143, + "learning_rate": 9.806834170854272e-06, + "loss": 0.1311, + "step": 2425 + }, + { + "epoch": 0.82, + "grad_norm": 2.08581280708313, + "learning_rate": 9.804321608040202e-06, + "loss": 0.1199, + "step": 2450 + }, + { + "epoch": 0.83, + "grad_norm": 2.3689684867858887, + "learning_rate": 9.801809045226131e-06, + "loss": 0.1182, + "step": 2475 + }, + { + "epoch": 0.84, + "grad_norm": 2.4708657264709473, + "learning_rate": 9.79929648241206e-06, + "loss": 0.1139, + "step": 2500 + }, + { + "epoch": 0.85, + "grad_norm": 2.3578500747680664, + "learning_rate": 9.796783919597991e-06, + "loss": 0.1203, + "step": 2525 + }, + { + "epoch": 0.86, + "grad_norm": 2.4924559593200684, + "learning_rate": 9.79427135678392e-06, + "loss": 0.1196, + "step": 2550 + }, + { + "epoch": 0.86, + "grad_norm": 2.2446844577789307, + "learning_rate": 9.79175879396985e-06, + "loss": 0.1243, + "step": 2575 + }, + { + "epoch": 0.87, + "grad_norm": 1.8431588411331177, + "learning_rate": 9.78924623115578e-06, + "loss": 0.1183, + "step": 2600 + }, + { + "epoch": 0.88, + "grad_norm": 2.0669355392456055, + "learning_rate": 9.786733668341709e-06, + "loss": 0.1176, + "step": 2625 + }, + { + "epoch": 0.89, + "grad_norm": 2.244729518890381, + "learning_rate": 9.78422110552764e-06, + "loss": 0.1188, + "step": 2650 + }, + { + "epoch": 0.9, + "grad_norm": 2.064343214035034, + "learning_rate": 9.781708542713569e-06, + "loss": 0.1197, + "step": 2675 + }, + { + "epoch": 0.91, + "grad_norm": 2.2781174182891846, + "learning_rate": 9.779195979899498e-06, + "loss": 0.1176, + "step": 2700 + }, + { + "epoch": 0.92, + "grad_norm": 2.1597466468811035, + "learning_rate": 9.776683417085428e-06, + "loss": 0.1154, + "step": 2725 + }, + { + "epoch": 0.92, + "grad_norm": 2.389716148376465, + "learning_rate": 9.774170854271357e-06, + "loss": 0.1128, + "step": 2750 + }, + { + "epoch": 0.93, + "grad_norm": 2.0880024433135986, + "learning_rate": 9.771658291457288e-06, + "loss": 0.1135, + "step": 2775 + }, + { + "epoch": 0.94, + "grad_norm": 2.4547035694122314, + "learning_rate": 9.769145728643217e-06, + "loss": 0.1082, + "step": 2800 + }, + { + "epoch": 0.95, + "grad_norm": 2.4442145824432373, + "learning_rate": 9.766633165829147e-06, + "loss": 0.1118, + "step": 2825 + }, + { + "epoch": 0.96, + "grad_norm": 2.5627267360687256, + "learning_rate": 9.764120603015076e-06, + "loss": 0.112, + "step": 2850 + }, + { + "epoch": 0.97, + "grad_norm": 2.37778377532959, + "learning_rate": 9.761608040201005e-06, + "loss": 0.1123, + "step": 2875 + }, + { + "epoch": 0.97, + "grad_norm": 1.9279662370681763, + "learning_rate": 9.759095477386935e-06, + "loss": 0.1152, + "step": 2900 + }, + { + "epoch": 0.98, + "grad_norm": 2.322328567504883, + "learning_rate": 9.756582914572866e-06, + "loss": 0.1098, + "step": 2925 + }, + { + "epoch": 0.99, + "grad_norm": 2.3049869537353516, + "learning_rate": 9.754070351758795e-06, + "loss": 0.1126, + "step": 2950 + }, + { + "epoch": 1.0, + "grad_norm": 2.0780746936798096, + "learning_rate": 9.751557788944724e-06, + "loss": 0.1095, + "step": 2975 + }, + { + "epoch": 1.01, + "grad_norm": 2.193786859512329, + "learning_rate": 9.749045226130654e-06, + "loss": 0.0892, + "step": 3000 + }, + { + "epoch": 1.01, + "eval_loss": 0.08323577046394348, + "eval_runtime": 1221.7695, + "eval_samples_per_second": 1.134, + "eval_steps_per_second": 1.134, + "eval_wer": 18.37365354052622, + "step": 3000 + }, + { + "epoch": 1.02, + "grad_norm": 1.6854056119918823, + "learning_rate": 9.746532663316583e-06, + "loss": 0.0851, + "step": 3025 + }, + { + "epoch": 1.02, + "grad_norm": 1.9452706575393677, + "learning_rate": 9.744020100502514e-06, + "loss": 0.0814, + "step": 3050 + }, + { + "epoch": 1.03, + "grad_norm": 2.0260298252105713, + "learning_rate": 9.741507537688443e-06, + "loss": 0.086, + "step": 3075 + }, + { + "epoch": 1.04, + "grad_norm": 2.049137830734253, + "learning_rate": 9.738994974874373e-06, + "loss": 0.0798, + "step": 3100 + }, + { + "epoch": 1.05, + "grad_norm": 1.832176923751831, + "learning_rate": 9.736482412060302e-06, + "loss": 0.0871, + "step": 3125 + }, + { + "epoch": 1.06, + "grad_norm": 2.0564773082733154, + "learning_rate": 9.733969849246231e-06, + "loss": 0.0846, + "step": 3150 + }, + { + "epoch": 1.07, + "grad_norm": 2.1609017848968506, + "learning_rate": 9.731457286432162e-06, + "loss": 0.0807, + "step": 3175 + }, + { + "epoch": 1.07, + "grad_norm": 1.7849456071853638, + "learning_rate": 9.728944723618092e-06, + "loss": 0.0863, + "step": 3200 + }, + { + "epoch": 1.08, + "grad_norm": 1.9563733339309692, + "learning_rate": 9.726432160804021e-06, + "loss": 0.0783, + "step": 3225 + }, + { + "epoch": 1.09, + "grad_norm": 1.7156349420547485, + "learning_rate": 9.72391959798995e-06, + "loss": 0.0837, + "step": 3250 + }, + { + "epoch": 1.1, + "grad_norm": 2.1490707397460938, + "learning_rate": 9.721407035175881e-06, + "loss": 0.0818, + "step": 3275 + }, + { + "epoch": 1.11, + "grad_norm": 1.6266313791275024, + "learning_rate": 9.718894472361809e-06, + "loss": 0.0746, + "step": 3300 + }, + { + "epoch": 1.12, + "grad_norm": 1.9222297668457031, + "learning_rate": 9.71638190954774e-06, + "loss": 0.0816, + "step": 3325 + }, + { + "epoch": 1.12, + "grad_norm": 1.7396388053894043, + "learning_rate": 9.71386934673367e-06, + "loss": 0.0846, + "step": 3350 + }, + { + "epoch": 1.13, + "grad_norm": 1.715120792388916, + "learning_rate": 9.711356783919599e-06, + "loss": 0.084, + "step": 3375 + }, + { + "epoch": 1.14, + "grad_norm": 2.528108596801758, + "learning_rate": 9.70884422110553e-06, + "loss": 0.0881, + "step": 3400 + }, + { + "epoch": 1.15, + "grad_norm": 2.5393166542053223, + "learning_rate": 9.706331658291457e-06, + "loss": 0.0795, + "step": 3425 + }, + { + "epoch": 1.16, + "grad_norm": 1.653799295425415, + "learning_rate": 9.703819095477388e-06, + "loss": 0.0835, + "step": 3450 + }, + { + "epoch": 1.17, + "grad_norm": 1.8596760034561157, + "learning_rate": 9.701306532663318e-06, + "loss": 0.0785, + "step": 3475 + }, + { + "epoch": 1.18, + "grad_norm": 1.8667290210723877, + "learning_rate": 9.698793969849247e-06, + "loss": 0.0812, + "step": 3500 + }, + { + "epoch": 1.18, + "grad_norm": 1.8936610221862793, + "learning_rate": 9.696281407035176e-06, + "loss": 0.0842, + "step": 3525 + }, + { + "epoch": 1.19, + "grad_norm": 1.804520606994629, + "learning_rate": 9.693768844221107e-06, + "loss": 0.0808, + "step": 3550 + }, + { + "epoch": 1.2, + "grad_norm": 2.023256540298462, + "learning_rate": 9.691256281407035e-06, + "loss": 0.0809, + "step": 3575 + }, + { + "epoch": 1.21, + "grad_norm": 2.1787447929382324, + "learning_rate": 9.688743718592966e-06, + "loss": 0.0816, + "step": 3600 + }, + { + "epoch": 1.22, + "grad_norm": 2.094620943069458, + "learning_rate": 9.686231155778895e-06, + "loss": 0.079, + "step": 3625 + }, + { + "epoch": 1.23, + "grad_norm": 2.283487558364868, + "learning_rate": 9.683718592964825e-06, + "loss": 0.0855, + "step": 3650 + }, + { + "epoch": 1.23, + "grad_norm": 1.86136794090271, + "learning_rate": 9.681206030150756e-06, + "loss": 0.0772, + "step": 3675 + }, + { + "epoch": 1.24, + "grad_norm": 2.4620625972747803, + "learning_rate": 9.678693467336683e-06, + "loss": 0.0804, + "step": 3700 + }, + { + "epoch": 1.25, + "grad_norm": 1.708632230758667, + "learning_rate": 9.676180904522614e-06, + "loss": 0.0789, + "step": 3725 + }, + { + "epoch": 1.26, + "grad_norm": 1.6484177112579346, + "learning_rate": 9.673668341708544e-06, + "loss": 0.0762, + "step": 3750 + }, + { + "epoch": 1.27, + "grad_norm": 1.8386435508728027, + "learning_rate": 9.671155778894473e-06, + "loss": 0.0787, + "step": 3775 + }, + { + "epoch": 1.28, + "grad_norm": 1.7124305963516235, + "learning_rate": 9.668643216080404e-06, + "loss": 0.0804, + "step": 3800 + }, + { + "epoch": 1.28, + "grad_norm": 1.7885063886642456, + "learning_rate": 9.666130653266333e-06, + "loss": 0.0771, + "step": 3825 + }, + { + "epoch": 1.29, + "grad_norm": 2.2022058963775635, + "learning_rate": 9.663618090452263e-06, + "loss": 0.075, + "step": 3850 + }, + { + "epoch": 1.3, + "grad_norm": 1.7463427782058716, + "learning_rate": 9.661105527638192e-06, + "loss": 0.0759, + "step": 3875 + }, + { + "epoch": 1.31, + "grad_norm": 2.068225860595703, + "learning_rate": 9.658592964824121e-06, + "loss": 0.0802, + "step": 3900 + }, + { + "epoch": 1.32, + "grad_norm": 2.3084919452667236, + "learning_rate": 9.65608040201005e-06, + "loss": 0.0822, + "step": 3925 + }, + { + "epoch": 1.33, + "grad_norm": 2.022242546081543, + "learning_rate": 9.653567839195982e-06, + "loss": 0.0775, + "step": 3950 + }, + { + "epoch": 1.33, + "grad_norm": 2.266552209854126, + "learning_rate": 9.651055276381909e-06, + "loss": 0.0788, + "step": 3975 + }, + { + "epoch": 1.34, + "grad_norm": 2.2885866165161133, + "learning_rate": 9.64854271356784e-06, + "loss": 0.0813, + "step": 4000 + }, + { + "epoch": 1.34, + "eval_loss": 0.08090142905712128, + "eval_runtime": 1223.2771, + "eval_samples_per_second": 1.133, + "eval_steps_per_second": 1.133, + "eval_wer": 17.437753840720465, + "step": 4000 + }, + { + "epoch": 1.35, + "grad_norm": 1.9813694953918457, + "learning_rate": 9.64603015075377e-06, + "loss": 0.0748, + "step": 4025 + }, + { + "epoch": 1.36, + "grad_norm": 2.207836866378784, + "learning_rate": 9.643517587939699e-06, + "loss": 0.0816, + "step": 4050 + }, + { + "epoch": 1.37, + "grad_norm": 2.056307315826416, + "learning_rate": 9.64100502512563e-06, + "loss": 0.0739, + "step": 4075 + }, + { + "epoch": 1.38, + "grad_norm": 2.115478277206421, + "learning_rate": 9.638492462311559e-06, + "loss": 0.0773, + "step": 4100 + }, + { + "epoch": 1.39, + "grad_norm": 2.251664400100708, + "learning_rate": 9.635979899497488e-06, + "loss": 0.0822, + "step": 4125 + }, + { + "epoch": 1.39, + "grad_norm": 1.6594916582107544, + "learning_rate": 9.633467336683418e-06, + "loss": 0.0735, + "step": 4150 + }, + { + "epoch": 1.4, + "grad_norm": 2.0214011669158936, + "learning_rate": 9.630954773869347e-06, + "loss": 0.0782, + "step": 4175 + }, + { + "epoch": 1.41, + "grad_norm": 2.0953197479248047, + "learning_rate": 9.628442211055276e-06, + "loss": 0.0742, + "step": 4200 + }, + { + "epoch": 1.42, + "grad_norm": 1.6488879919052124, + "learning_rate": 9.625929648241207e-06, + "loss": 0.0763, + "step": 4225 + }, + { + "epoch": 1.43, + "grad_norm": 2.0073978900909424, + "learning_rate": 9.623417085427137e-06, + "loss": 0.0736, + "step": 4250 + }, + { + "epoch": 1.44, + "grad_norm": 2.135430097579956, + "learning_rate": 9.620904522613066e-06, + "loss": 0.0753, + "step": 4275 + }, + { + "epoch": 1.44, + "grad_norm": 2.2100822925567627, + "learning_rate": 9.618391959798995e-06, + "loss": 0.0774, + "step": 4300 + }, + { + "epoch": 1.45, + "grad_norm": 1.6964852809906006, + "learning_rate": 9.615879396984925e-06, + "loss": 0.0737, + "step": 4325 + }, + { + "epoch": 1.46, + "grad_norm": 1.945204257965088, + "learning_rate": 9.613366834170856e-06, + "loss": 0.0738, + "step": 4350 + }, + { + "epoch": 1.47, + "grad_norm": 1.8037047386169434, + "learning_rate": 9.610854271356785e-06, + "loss": 0.0749, + "step": 4375 + }, + { + "epoch": 1.48, + "grad_norm": 1.8553427457809448, + "learning_rate": 9.608341708542714e-06, + "loss": 0.0756, + "step": 4400 + }, + { + "epoch": 1.49, + "grad_norm": 1.9079605340957642, + "learning_rate": 9.605829145728644e-06, + "loss": 0.0787, + "step": 4425 + }, + { + "epoch": 1.49, + "grad_norm": 2.1437880992889404, + "learning_rate": 9.603316582914573e-06, + "loss": 0.0738, + "step": 4450 + }, + { + "epoch": 1.5, + "grad_norm": 1.8772354125976562, + "learning_rate": 9.600804020100504e-06, + "loss": 0.0767, + "step": 4475 + }, + { + "epoch": 1.51, + "grad_norm": 1.851017713546753, + "learning_rate": 9.598291457286433e-06, + "loss": 0.0756, + "step": 4500 + }, + { + "epoch": 1.52, + "grad_norm": 1.9189295768737793, + "learning_rate": 9.595778894472363e-06, + "loss": 0.0742, + "step": 4525 + }, + { + "epoch": 1.53, + "grad_norm": 2.267245054244995, + "learning_rate": 9.593266331658292e-06, + "loss": 0.0737, + "step": 4550 + }, + { + "epoch": 1.54, + "grad_norm": 1.8712221384048462, + "learning_rate": 9.590753768844221e-06, + "loss": 0.0726, + "step": 4575 + }, + { + "epoch": 1.54, + "grad_norm": 1.7565487623214722, + "learning_rate": 9.58824120603015e-06, + "loss": 0.0789, + "step": 4600 + }, + { + "epoch": 1.55, + "grad_norm": 2.036895275115967, + "learning_rate": 9.585728643216082e-06, + "loss": 0.0722, + "step": 4625 + }, + { + "epoch": 1.56, + "grad_norm": 2.4377431869506836, + "learning_rate": 9.583216080402011e-06, + "loss": 0.0739, + "step": 4650 + }, + { + "epoch": 1.57, + "grad_norm": 1.9701563119888306, + "learning_rate": 9.58070351758794e-06, + "loss": 0.073, + "step": 4675 + }, + { + "epoch": 1.58, + "grad_norm": 1.9451273679733276, + "learning_rate": 9.57819095477387e-06, + "loss": 0.0703, + "step": 4700 + }, + { + "epoch": 1.59, + "grad_norm": 2.4862875938415527, + "learning_rate": 9.575678391959799e-06, + "loss": 0.0668, + "step": 4725 + }, + { + "epoch": 1.6, + "grad_norm": 1.8485660552978516, + "learning_rate": 9.57316582914573e-06, + "loss": 0.0747, + "step": 4750 + }, + { + "epoch": 1.6, + "grad_norm": 2.181363344192505, + "learning_rate": 9.57065326633166e-06, + "loss": 0.0688, + "step": 4775 + }, + { + "epoch": 1.61, + "grad_norm": 2.0164616107940674, + "learning_rate": 9.568140703517589e-06, + "loss": 0.0715, + "step": 4800 + }, + { + "epoch": 1.62, + "grad_norm": 1.8742849826812744, + "learning_rate": 9.565628140703518e-06, + "loss": 0.0726, + "step": 4825 + }, + { + "epoch": 1.63, + "grad_norm": 2.4254283905029297, + "learning_rate": 9.563115577889447e-06, + "loss": 0.072, + "step": 4850 + }, + { + "epoch": 1.64, + "grad_norm": 1.6741983890533447, + "learning_rate": 9.560603015075378e-06, + "loss": 0.0748, + "step": 4875 + }, + { + "epoch": 1.65, + "grad_norm": 2.0665688514709473, + "learning_rate": 9.558090452261308e-06, + "loss": 0.0718, + "step": 4900 + }, + { + "epoch": 1.65, + "grad_norm": 1.8600083589553833, + "learning_rate": 9.555577889447237e-06, + "loss": 0.0782, + "step": 4925 + }, + { + "epoch": 1.66, + "grad_norm": 2.3301470279693604, + "learning_rate": 9.553065326633166e-06, + "loss": 0.0722, + "step": 4950 + }, + { + "epoch": 1.67, + "grad_norm": 2.079394578933716, + "learning_rate": 9.550552763819096e-06, + "loss": 0.0718, + "step": 4975 + }, + { + "epoch": 1.68, + "grad_norm": 2.194088935852051, + "learning_rate": 9.548040201005025e-06, + "loss": 0.0736, + "step": 5000 + }, + { + "epoch": 1.68, + "eval_loss": 0.08014192432165146, + "eval_runtime": 1219.1772, + "eval_samples_per_second": 1.137, + "eval_steps_per_second": 1.137, + "eval_wer": 17.45541232562246, + "step": 5000 + }, + { + "epoch": 1.69, + "grad_norm": 1.943081021308899, + "learning_rate": 9.545527638190956e-06, + "loss": 0.0698, + "step": 5025 + }, + { + "epoch": 1.7, + "grad_norm": 2.306774377822876, + "learning_rate": 9.543015075376885e-06, + "loss": 0.0714, + "step": 5050 + }, + { + "epoch": 1.7, + "grad_norm": 1.9444692134857178, + "learning_rate": 9.540502512562815e-06, + "loss": 0.0725, + "step": 5075 + }, + { + "epoch": 1.71, + "grad_norm": 2.2193167209625244, + "learning_rate": 9.537989949748746e-06, + "loss": 0.072, + "step": 5100 + }, + { + "epoch": 1.72, + "grad_norm": 1.9501636028289795, + "learning_rate": 9.535477386934673e-06, + "loss": 0.0699, + "step": 5125 + }, + { + "epoch": 1.73, + "grad_norm": 1.5049554109573364, + "learning_rate": 9.532964824120604e-06, + "loss": 0.0651, + "step": 5150 + }, + { + "epoch": 1.74, + "grad_norm": 2.0886285305023193, + "learning_rate": 9.530452261306534e-06, + "loss": 0.075, + "step": 5175 + }, + { + "epoch": 1.75, + "grad_norm": 2.0082366466522217, + "learning_rate": 9.527939698492463e-06, + "loss": 0.0669, + "step": 5200 + }, + { + "epoch": 1.75, + "grad_norm": 2.1055715084075928, + "learning_rate": 9.525427135678392e-06, + "loss": 0.0679, + "step": 5225 + }, + { + "epoch": 1.76, + "grad_norm": 1.856635332107544, + "learning_rate": 9.522914572864322e-06, + "loss": 0.062, + "step": 5250 + }, + { + "epoch": 1.77, + "grad_norm": 1.8389759063720703, + "learning_rate": 9.520402010050253e-06, + "loss": 0.0672, + "step": 5275 + }, + { + "epoch": 1.78, + "grad_norm": 2.2727222442626953, + "learning_rate": 9.517889447236182e-06, + "loss": 0.0702, + "step": 5300 + }, + { + "epoch": 1.79, + "grad_norm": 1.668586015701294, + "learning_rate": 9.515376884422111e-06, + "loss": 0.0635, + "step": 5325 + }, + { + "epoch": 1.8, + "grad_norm": 1.77827787399292, + "learning_rate": 9.51286432160804e-06, + "loss": 0.0654, + "step": 5350 + }, + { + "epoch": 1.8, + "grad_norm": 2.057682991027832, + "learning_rate": 9.510351758793972e-06, + "loss": 0.0722, + "step": 5375 + }, + { + "epoch": 1.81, + "grad_norm": 1.3326081037521362, + "learning_rate": 9.5078391959799e-06, + "loss": 0.0699, + "step": 5400 + }, + { + "epoch": 1.82, + "grad_norm": 1.9826327562332153, + "learning_rate": 9.50532663316583e-06, + "loss": 0.0662, + "step": 5425 + }, + { + "epoch": 1.83, + "grad_norm": 2.1651649475097656, + "learning_rate": 9.50281407035176e-06, + "loss": 0.0671, + "step": 5450 + }, + { + "epoch": 1.84, + "grad_norm": 1.8695662021636963, + "learning_rate": 9.500301507537689e-06, + "loss": 0.0715, + "step": 5475 + }, + { + "epoch": 1.85, + "grad_norm": 1.8919188976287842, + "learning_rate": 9.49778894472362e-06, + "loss": 0.0623, + "step": 5500 + }, + { + "epoch": 1.86, + "grad_norm": 1.6708858013153076, + "learning_rate": 9.49527638190955e-06, + "loss": 0.0686, + "step": 5525 + }, + { + "epoch": 1.86, + "grad_norm": 1.7214843034744263, + "learning_rate": 9.492763819095479e-06, + "loss": 0.064, + "step": 5550 + }, + { + "epoch": 1.87, + "grad_norm": 1.8973783254623413, + "learning_rate": 9.490251256281408e-06, + "loss": 0.0652, + "step": 5575 + }, + { + "epoch": 1.88, + "grad_norm": 1.8864316940307617, + "learning_rate": 9.487738693467337e-06, + "loss": 0.0655, + "step": 5600 + }, + { + "epoch": 1.89, + "grad_norm": 1.7592251300811768, + "learning_rate": 9.485226130653267e-06, + "loss": 0.0652, + "step": 5625 + }, + { + "epoch": 1.9, + "grad_norm": 1.7695480585098267, + "learning_rate": 9.482713567839198e-06, + "loss": 0.0634, + "step": 5650 + }, + { + "epoch": 1.91, + "grad_norm": 1.930935025215149, + "learning_rate": 9.480201005025125e-06, + "loss": 0.0641, + "step": 5675 + }, + { + "epoch": 1.91, + "grad_norm": 2.164590358734131, + "learning_rate": 9.477688442211056e-06, + "loss": 0.0699, + "step": 5700 + }, + { + "epoch": 1.92, + "grad_norm": 1.3509349822998047, + "learning_rate": 9.475175879396985e-06, + "loss": 0.0644, + "step": 5725 + }, + { + "epoch": 1.93, + "grad_norm": 1.7218430042266846, + "learning_rate": 9.472663316582915e-06, + "loss": 0.0679, + "step": 5750 + }, + { + "epoch": 1.94, + "grad_norm": 1.8750897645950317, + "learning_rate": 9.470150753768846e-06, + "loss": 0.0675, + "step": 5775 + }, + { + "epoch": 1.95, + "grad_norm": 2.2228214740753174, + "learning_rate": 9.467638190954775e-06, + "loss": 0.0664, + "step": 5800 + }, + { + "epoch": 1.96, + "grad_norm": 1.9837381839752197, + "learning_rate": 9.465125628140704e-06, + "loss": 0.0652, + "step": 5825 + }, + { + "epoch": 1.96, + "grad_norm": 2.264638900756836, + "learning_rate": 9.462613065326634e-06, + "loss": 0.0659, + "step": 5850 + }, + { + "epoch": 1.97, + "grad_norm": 1.6855463981628418, + "learning_rate": 9.460100502512563e-06, + "loss": 0.0651, + "step": 5875 + }, + { + "epoch": 1.98, + "grad_norm": 1.9479776620864868, + "learning_rate": 9.457587939698494e-06, + "loss": 0.065, + "step": 5900 + }, + { + "epoch": 1.99, + "grad_norm": 1.9869670867919922, + "learning_rate": 9.455075376884423e-06, + "loss": 0.0601, + "step": 5925 + }, + { + "epoch": 2.0, + "grad_norm": 1.8373864889144897, + "learning_rate": 9.452562814070353e-06, + "loss": 0.0664, + "step": 5950 + }, + { + "epoch": 2.01, + "grad_norm": 1.3462257385253906, + "learning_rate": 9.450050251256282e-06, + "loss": 0.0438, + "step": 5975 + }, + { + "epoch": 2.01, + "grad_norm": 1.6361795663833618, + "learning_rate": 9.447537688442211e-06, + "loss": 0.0417, + "step": 6000 + }, + { + "epoch": 2.01, + "eval_loss": 0.08171622455120087, + "eval_runtime": 1237.9757, + "eval_samples_per_second": 1.12, + "eval_steps_per_second": 1.12, + "eval_wer": 16.943316263464595, + "step": 6000 + }, + { + "epoch": 2.02, + "grad_norm": 1.5196746587753296, + "learning_rate": 9.44502512562814e-06, + "loss": 0.0395, + "step": 6025 + }, + { + "epoch": 2.03, + "grad_norm": 1.500688076019287, + "learning_rate": 9.442512562814072e-06, + "loss": 0.0367, + "step": 6050 + }, + { + "epoch": 2.04, + "grad_norm": 1.6962965726852417, + "learning_rate": 9.440000000000001e-06, + "loss": 0.039, + "step": 6075 + }, + { + "epoch": 2.05, + "grad_norm": 1.439050555229187, + "learning_rate": 9.43748743718593e-06, + "loss": 0.04, + "step": 6100 + }, + { + "epoch": 2.06, + "grad_norm": 1.5257843732833862, + "learning_rate": 9.43497487437186e-06, + "loss": 0.038, + "step": 6125 + }, + { + "epoch": 2.07, + "grad_norm": 1.868520975112915, + "learning_rate": 9.432462311557789e-06, + "loss": 0.0357, + "step": 6150 + }, + { + "epoch": 2.07, + "grad_norm": 1.506712555885315, + "learning_rate": 9.42994974874372e-06, + "loss": 0.039, + "step": 6175 + }, + { + "epoch": 2.08, + "grad_norm": 1.7489590644836426, + "learning_rate": 9.42743718592965e-06, + "loss": 0.04, + "step": 6200 + }, + { + "epoch": 2.09, + "grad_norm": 1.3327454328536987, + "learning_rate": 9.424924623115579e-06, + "loss": 0.0395, + "step": 6225 + }, + { + "epoch": 2.1, + "grad_norm": 1.4141664505004883, + "learning_rate": 9.422412060301508e-06, + "loss": 0.0348, + "step": 6250 + }, + { + "epoch": 2.11, + "grad_norm": 1.5759655237197876, + "learning_rate": 9.419899497487437e-06, + "loss": 0.0373, + "step": 6275 + }, + { + "epoch": 2.12, + "grad_norm": 1.5537060499191284, + "learning_rate": 9.417386934673367e-06, + "loss": 0.0404, + "step": 6300 + }, + { + "epoch": 2.12, + "grad_norm": 1.4839963912963867, + "learning_rate": 9.414874371859298e-06, + "loss": 0.0429, + "step": 6325 + }, + { + "epoch": 2.13, + "grad_norm": 1.7436524629592896, + "learning_rate": 9.412361809045227e-06, + "loss": 0.0389, + "step": 6350 + }, + { + "epoch": 2.14, + "grad_norm": 1.4596096277236938, + "learning_rate": 9.409849246231156e-06, + "loss": 0.0365, + "step": 6375 + }, + { + "epoch": 2.15, + "grad_norm": 1.931618094444275, + "learning_rate": 9.407336683417086e-06, + "loss": 0.0412, + "step": 6400 + }, + { + "epoch": 2.16, + "grad_norm": 1.5418370962142944, + "learning_rate": 9.404824120603015e-06, + "loss": 0.0407, + "step": 6425 + }, + { + "epoch": 2.17, + "grad_norm": 1.6493052244186401, + "learning_rate": 9.402311557788946e-06, + "loss": 0.0389, + "step": 6450 + }, + { + "epoch": 2.17, + "grad_norm": 1.8735350370407104, + "learning_rate": 9.399798994974875e-06, + "loss": 0.0436, + "step": 6475 + }, + { + "epoch": 2.18, + "grad_norm": 1.6055018901824951, + "learning_rate": 9.397286432160805e-06, + "loss": 0.0376, + "step": 6500 + }, + { + "epoch": 2.19, + "grad_norm": 1.6827505826950073, + "learning_rate": 9.394773869346736e-06, + "loss": 0.0388, + "step": 6525 + }, + { + "epoch": 2.2, + "grad_norm": 2.0560061931610107, + "learning_rate": 9.392261306532663e-06, + "loss": 0.0401, + "step": 6550 + }, + { + "epoch": 2.21, + "grad_norm": 1.6090270280838013, + "learning_rate": 9.389748743718594e-06, + "loss": 0.0375, + "step": 6575 + }, + { + "epoch": 2.22, + "grad_norm": 1.93809974193573, + "learning_rate": 9.387236180904524e-06, + "loss": 0.0374, + "step": 6600 + }, + { + "epoch": 2.22, + "grad_norm": 1.5843024253845215, + "learning_rate": 9.384723618090453e-06, + "loss": 0.0398, + "step": 6625 + }, + { + "epoch": 2.23, + "grad_norm": 1.6570930480957031, + "learning_rate": 9.382211055276382e-06, + "loss": 0.0409, + "step": 6650 + }, + { + "epoch": 2.24, + "grad_norm": 1.3183377981185913, + "learning_rate": 9.379698492462312e-06, + "loss": 0.0385, + "step": 6675 + }, + { + "epoch": 2.25, + "grad_norm": 1.5377205610275269, + "learning_rate": 9.377185929648241e-06, + "loss": 0.0364, + "step": 6700 + }, + { + "epoch": 2.26, + "grad_norm": 1.461680293083191, + "learning_rate": 9.374673366834172e-06, + "loss": 0.0384, + "step": 6725 + }, + { + "epoch": 2.27, + "grad_norm": 1.7837620973587036, + "learning_rate": 9.372160804020101e-06, + "loss": 0.0407, + "step": 6750 + }, + { + "epoch": 2.28, + "grad_norm": 1.3506888151168823, + "learning_rate": 9.36964824120603e-06, + "loss": 0.0376, + "step": 6775 + }, + { + "epoch": 2.28, + "grad_norm": 1.571994662284851, + "learning_rate": 9.367135678391962e-06, + "loss": 0.0362, + "step": 6800 + }, + { + "epoch": 2.29, + "grad_norm": 1.5411869287490845, + "learning_rate": 9.36462311557789e-06, + "loss": 0.0404, + "step": 6825 + }, + { + "epoch": 2.3, + "grad_norm": 1.7785247564315796, + "learning_rate": 9.36211055276382e-06, + "loss": 0.039, + "step": 6850 + }, + { + "epoch": 2.31, + "grad_norm": 1.951112985610962, + "learning_rate": 9.35959798994975e-06, + "loss": 0.038, + "step": 6875 + }, + { + "epoch": 2.32, + "grad_norm": 1.4598815441131592, + "learning_rate": 9.357085427135679e-06, + "loss": 0.0362, + "step": 6900 + }, + { + "epoch": 2.33, + "grad_norm": 1.6336860656738281, + "learning_rate": 9.354572864321608e-06, + "loss": 0.0369, + "step": 6925 + }, + { + "epoch": 2.33, + "grad_norm": 1.8212099075317383, + "learning_rate": 9.352060301507538e-06, + "loss": 0.0411, + "step": 6950 + }, + { + "epoch": 2.34, + "grad_norm": 2.0050110816955566, + "learning_rate": 9.349547738693469e-06, + "loss": 0.0389, + "step": 6975 + }, + { + "epoch": 2.35, + "grad_norm": 1.7982875108718872, + "learning_rate": 9.347035175879398e-06, + "loss": 0.0376, + "step": 7000 + }, + { + "epoch": 2.35, + "eval_loss": 0.08886675536632538, + "eval_runtime": 1217.6825, + "eval_samples_per_second": 1.138, + "eval_steps_per_second": 1.138, + "eval_wer": 17.967508387780327, + "step": 7000 + }, + { + "epoch": 2.36, + "grad_norm": 1.475287675857544, + "learning_rate": 9.344522613065327e-06, + "loss": 0.0413, + "step": 7025 + }, + { + "epoch": 2.37, + "grad_norm": 1.7483357191085815, + "learning_rate": 9.342010050251257e-06, + "loss": 0.0381, + "step": 7050 + }, + { + "epoch": 2.38, + "grad_norm": 1.8892991542816162, + "learning_rate": 9.339497487437188e-06, + "loss": 0.0394, + "step": 7075 + }, + { + "epoch": 2.38, + "grad_norm": 1.6506677865982056, + "learning_rate": 9.336984924623115e-06, + "loss": 0.0359, + "step": 7100 + }, + { + "epoch": 2.39, + "grad_norm": 1.702620267868042, + "learning_rate": 9.334472361809046e-06, + "loss": 0.0389, + "step": 7125 + }, + { + "epoch": 2.4, + "grad_norm": 1.4655669927597046, + "learning_rate": 9.331959798994976e-06, + "loss": 0.0378, + "step": 7150 + }, + { + "epoch": 2.41, + "grad_norm": 1.70259690284729, + "learning_rate": 9.329447236180905e-06, + "loss": 0.0367, + "step": 7175 + }, + { + "epoch": 2.42, + "grad_norm": 1.4199962615966797, + "learning_rate": 9.326934673366836e-06, + "loss": 0.0377, + "step": 7200 + }, + { + "epoch": 2.43, + "grad_norm": 1.7344260215759277, + "learning_rate": 9.324422110552764e-06, + "loss": 0.0405, + "step": 7225 + }, + { + "epoch": 2.43, + "grad_norm": 1.4277095794677734, + "learning_rate": 9.321909547738695e-06, + "loss": 0.0364, + "step": 7250 + }, + { + "epoch": 2.44, + "grad_norm": 1.6871662139892578, + "learning_rate": 9.319396984924624e-06, + "loss": 0.0377, + "step": 7275 + }, + { + "epoch": 2.45, + "grad_norm": 1.5578590631484985, + "learning_rate": 9.316884422110553e-06, + "loss": 0.037, + "step": 7300 + }, + { + "epoch": 2.46, + "grad_norm": 1.849335789680481, + "learning_rate": 9.314371859296483e-06, + "loss": 0.0365, + "step": 7325 + }, + { + "epoch": 2.47, + "grad_norm": 1.8265397548675537, + "learning_rate": 9.311859296482414e-06, + "loss": 0.0335, + "step": 7350 + }, + { + "epoch": 2.48, + "grad_norm": 1.5853670835494995, + "learning_rate": 9.309346733668343e-06, + "loss": 0.0366, + "step": 7375 + }, + { + "epoch": 2.48, + "grad_norm": 1.9164550304412842, + "learning_rate": 9.306834170854272e-06, + "loss": 0.0366, + "step": 7400 + }, + { + "epoch": 2.49, + "grad_norm": 1.4100239276885986, + "learning_rate": 9.304321608040201e-06, + "loss": 0.0363, + "step": 7425 + }, + { + "epoch": 2.5, + "grad_norm": 1.3203496932983398, + "learning_rate": 9.30180904522613e-06, + "loss": 0.0369, + "step": 7450 + }, + { + "epoch": 2.51, + "grad_norm": 1.39760422706604, + "learning_rate": 9.299296482412062e-06, + "loss": 0.0374, + "step": 7475 + }, + { + "epoch": 2.52, + "grad_norm": 2.0733561515808105, + "learning_rate": 9.296783919597991e-06, + "loss": 0.0392, + "step": 7500 + }, + { + "epoch": 2.53, + "grad_norm": 1.8401012420654297, + "learning_rate": 9.29427135678392e-06, + "loss": 0.0409, + "step": 7525 + }, + { + "epoch": 2.54, + "grad_norm": 2.0598928928375244, + "learning_rate": 9.29175879396985e-06, + "loss": 0.0391, + "step": 7550 + }, + { + "epoch": 2.54, + "grad_norm": 1.7229574918746948, + "learning_rate": 9.289246231155779e-06, + "loss": 0.0352, + "step": 7575 + }, + { + "epoch": 2.55, + "grad_norm": 1.8677914142608643, + "learning_rate": 9.28673366834171e-06, + "loss": 0.0369, + "step": 7600 + }, + { + "epoch": 2.56, + "grad_norm": 1.9619063138961792, + "learning_rate": 9.28422110552764e-06, + "loss": 0.0357, + "step": 7625 + }, + { + "epoch": 2.57, + "grad_norm": 1.7991969585418701, + "learning_rate": 9.281708542713569e-06, + "loss": 0.0373, + "step": 7650 + }, + { + "epoch": 2.58, + "grad_norm": 2.0016562938690186, + "learning_rate": 9.279195979899498e-06, + "loss": 0.037, + "step": 7675 + }, + { + "epoch": 2.59, + "grad_norm": 2.1665875911712646, + "learning_rate": 9.276683417085427e-06, + "loss": 0.0319, + "step": 7700 + }, + { + "epoch": 2.59, + "grad_norm": 2.198800563812256, + "learning_rate": 9.274170854271357e-06, + "loss": 0.038, + "step": 7725 + }, + { + "epoch": 2.6, + "grad_norm": 1.4412776231765747, + "learning_rate": 9.271658291457288e-06, + "loss": 0.0387, + "step": 7750 + }, + { + "epoch": 2.61, + "grad_norm": 1.5660271644592285, + "learning_rate": 9.269145728643217e-06, + "loss": 0.0308, + "step": 7775 + }, + { + "epoch": 2.62, + "grad_norm": 1.699459195137024, + "learning_rate": 9.266633165829146e-06, + "loss": 0.0359, + "step": 7800 + }, + { + "epoch": 2.63, + "grad_norm": 1.3810155391693115, + "learning_rate": 9.264120603015076e-06, + "loss": 0.0352, + "step": 7825 + }, + { + "epoch": 2.64, + "grad_norm": 1.773938536643982, + "learning_rate": 9.261608040201005e-06, + "loss": 0.0343, + "step": 7850 + }, + { + "epoch": 2.64, + "grad_norm": 1.2856162786483765, + "learning_rate": 9.259095477386936e-06, + "loss": 0.0365, + "step": 7875 + }, + { + "epoch": 2.65, + "grad_norm": 1.6593718528747559, + "learning_rate": 9.256582914572865e-06, + "loss": 0.0366, + "step": 7900 + }, + { + "epoch": 2.66, + "grad_norm": 1.409193992614746, + "learning_rate": 9.254070351758795e-06, + "loss": 0.0338, + "step": 7925 + }, + { + "epoch": 2.67, + "grad_norm": 1.9222149848937988, + "learning_rate": 9.251557788944724e-06, + "loss": 0.0375, + "step": 7950 + }, + { + "epoch": 2.68, + "grad_norm": 1.7340257167816162, + "learning_rate": 9.249045226130653e-06, + "loss": 0.0357, + "step": 7975 + }, + { + "epoch": 2.69, + "grad_norm": 1.5344984531402588, + "learning_rate": 9.246532663316584e-06, + "loss": 0.036, + "step": 8000 + }, + { + "epoch": 2.69, + "eval_loss": 0.09019232541322708, + "eval_runtime": 1227.9814, + "eval_samples_per_second": 1.129, + "eval_steps_per_second": 1.129, + "eval_wer": 18.055800812290308, + "step": 8000 + }, + { + "epoch": 2.69, + "grad_norm": 2.366217613220215, + "learning_rate": 9.244020100502514e-06, + "loss": 0.0352, + "step": 8025 + }, + { + "epoch": 2.7, + "grad_norm": 2.3861887454986572, + "learning_rate": 9.241507537688443e-06, + "loss": 0.0339, + "step": 8050 + }, + { + "epoch": 2.71, + "grad_norm": 1.544856309890747, + "learning_rate": 9.238994974874372e-06, + "loss": 0.0316, + "step": 8075 + }, + { + "epoch": 2.72, + "grad_norm": 1.6370819807052612, + "learning_rate": 9.236482412060302e-06, + "loss": 0.0364, + "step": 8100 + }, + { + "epoch": 2.73, + "grad_norm": 1.2956650257110596, + "learning_rate": 9.233969849246231e-06, + "loss": 0.0345, + "step": 8125 + }, + { + "epoch": 2.74, + "grad_norm": 1.513643503189087, + "learning_rate": 9.231457286432162e-06, + "loss": 0.04, + "step": 8150 + }, + { + "epoch": 2.75, + "grad_norm": 1.9474529027938843, + "learning_rate": 9.228944723618091e-06, + "loss": 0.0376, + "step": 8175 + }, + { + "epoch": 2.75, + "grad_norm": 1.8262574672698975, + "learning_rate": 9.22643216080402e-06, + "loss": 0.0343, + "step": 8200 + }, + { + "epoch": 2.76, + "grad_norm": 1.9003005027770996, + "learning_rate": 9.223919597989952e-06, + "loss": 0.0347, + "step": 8225 + }, + { + "epoch": 2.77, + "grad_norm": 1.4719185829162598, + "learning_rate": 9.22140703517588e-06, + "loss": 0.0392, + "step": 8250 + }, + { + "epoch": 2.78, + "grad_norm": 1.702439308166504, + "learning_rate": 9.21889447236181e-06, + "loss": 0.0386, + "step": 8275 + }, + { + "epoch": 2.79, + "grad_norm": 1.4902780055999756, + "learning_rate": 9.21638190954774e-06, + "loss": 0.0352, + "step": 8300 + }, + { + "epoch": 2.8, + "grad_norm": 1.4435367584228516, + "learning_rate": 9.213869346733669e-06, + "loss": 0.0317, + "step": 8325 + }, + { + "epoch": 2.8, + "grad_norm": 1.5381484031677246, + "learning_rate": 9.211356783919598e-06, + "loss": 0.0328, + "step": 8350 + }, + { + "epoch": 2.81, + "grad_norm": 1.9940435886383057, + "learning_rate": 9.208844221105528e-06, + "loss": 0.0375, + "step": 8375 + }, + { + "epoch": 2.82, + "grad_norm": 1.8520588874816895, + "learning_rate": 9.206331658291459e-06, + "loss": 0.0354, + "step": 8400 + }, + { + "epoch": 2.83, + "grad_norm": 1.6623708009719849, + "learning_rate": 9.203819095477388e-06, + "loss": 0.0324, + "step": 8425 + }, + { + "epoch": 2.84, + "grad_norm": 2.138735294342041, + "learning_rate": 9.201306532663317e-06, + "loss": 0.0352, + "step": 8450 + }, + { + "epoch": 2.85, + "grad_norm": 1.7829039096832275, + "learning_rate": 9.198793969849247e-06, + "loss": 0.0379, + "step": 8475 + }, + { + "epoch": 2.85, + "grad_norm": 1.5767414569854736, + "learning_rate": 9.196281407035178e-06, + "loss": 0.0344, + "step": 8500 + }, + { + "epoch": 2.86, + "grad_norm": 1.3995085954666138, + "learning_rate": 9.193768844221105e-06, + "loss": 0.0316, + "step": 8525 + }, + { + "epoch": 2.87, + "grad_norm": 1.8053728342056274, + "learning_rate": 9.191256281407036e-06, + "loss": 0.0325, + "step": 8550 + }, + { + "epoch": 2.88, + "grad_norm": 1.44207763671875, + "learning_rate": 9.188743718592966e-06, + "loss": 0.0338, + "step": 8575 + }, + { + "epoch": 2.89, + "grad_norm": 1.9735956192016602, + "learning_rate": 9.186231155778895e-06, + "loss": 0.0351, + "step": 8600 + }, + { + "epoch": 2.9, + "grad_norm": 1.6435621976852417, + "learning_rate": 9.183718592964826e-06, + "loss": 0.0323, + "step": 8625 + }, + { + "epoch": 2.9, + "grad_norm": 1.8617935180664062, + "learning_rate": 9.181306532663317e-06, + "loss": 0.0363, + "step": 8650 + }, + { + "epoch": 2.91, + "grad_norm": 1.5676214694976807, + "learning_rate": 9.178793969849247e-06, + "loss": 0.0314, + "step": 8675 + }, + { + "epoch": 2.92, + "grad_norm": 1.7310137748718262, + "learning_rate": 9.176281407035176e-06, + "loss": 0.0337, + "step": 8700 + }, + { + "epoch": 2.93, + "grad_norm": 1.7133023738861084, + "learning_rate": 9.173768844221105e-06, + "loss": 0.0307, + "step": 8725 + }, + { + "epoch": 2.94, + "grad_norm": 1.4701021909713745, + "learning_rate": 9.171256281407036e-06, + "loss": 0.0321, + "step": 8750 + }, + { + "epoch": 2.95, + "grad_norm": 1.6799181699752808, + "learning_rate": 9.168743718592966e-06, + "loss": 0.0344, + "step": 8775 + }, + { + "epoch": 2.96, + "grad_norm": 2.066694736480713, + "learning_rate": 9.166231155778895e-06, + "loss": 0.0344, + "step": 8800 + }, + { + "epoch": 2.96, + "grad_norm": 1.3533351421356201, + "learning_rate": 9.163718592964826e-06, + "loss": 0.0356, + "step": 8825 + }, + { + "epoch": 2.97, + "grad_norm": 2.062370777130127, + "learning_rate": 9.161206030150754e-06, + "loss": 0.0329, + "step": 8850 + }, + { + "epoch": 2.98, + "grad_norm": 1.7933417558670044, + "learning_rate": 9.158693467336685e-06, + "loss": 0.0334, + "step": 8875 + }, + { + "epoch": 2.99, + "grad_norm": 1.265754222869873, + "learning_rate": 9.156180904522614e-06, + "loss": 0.0366, + "step": 8900 + }, + { + "epoch": 3.0, + "grad_norm": 1.7003251314163208, + "learning_rate": 9.153668341708543e-06, + "loss": 0.0337, + "step": 8925 + }, + { + "epoch": 3.01, + "grad_norm": 1.2837584018707275, + "learning_rate": 9.151155778894473e-06, + "loss": 0.0272, + "step": 8950 + }, + { + "epoch": 3.01, + "grad_norm": 1.313975214958191, + "learning_rate": 9.148643216080402e-06, + "loss": 0.0189, + "step": 8975 + }, + { + "epoch": 3.02, + "grad_norm": 1.4120932817459106, + "learning_rate": 9.146130653266331e-06, + "loss": 0.0159, + "step": 9000 + }, + { + "epoch": 3.02, + "eval_loss": 0.09644545614719391, + "eval_runtime": 1223.7027, + "eval_samples_per_second": 1.133, + "eval_steps_per_second": 1.133, + "eval_wer": 16.952145505915592, + "step": 9000 + }, + { + "epoch": 3.03, + "grad_norm": 1.0550419092178345, + "learning_rate": 9.143618090452262e-06, + "loss": 0.017, + "step": 9025 + }, + { + "epoch": 3.04, + "grad_norm": 1.2225825786590576, + "learning_rate": 9.141105527638192e-06, + "loss": 0.0181, + "step": 9050 + }, + { + "epoch": 3.05, + "grad_norm": 0.9521111845970154, + "learning_rate": 9.138592964824121e-06, + "loss": 0.0165, + "step": 9075 + }, + { + "epoch": 3.06, + "grad_norm": 1.4750945568084717, + "learning_rate": 9.136080402010052e-06, + "loss": 0.0165, + "step": 9100 + }, + { + "epoch": 3.06, + "grad_norm": 1.3045191764831543, + "learning_rate": 9.13356783919598e-06, + "loss": 0.0192, + "step": 9125 + }, + { + "epoch": 3.07, + "grad_norm": 1.6955196857452393, + "learning_rate": 9.13105527638191e-06, + "loss": 0.0191, + "step": 9150 + }, + { + "epoch": 3.08, + "grad_norm": 1.7448921203613281, + "learning_rate": 9.12854271356784e-06, + "loss": 0.018, + "step": 9175 + }, + { + "epoch": 3.09, + "grad_norm": 1.2588932514190674, + "learning_rate": 9.12603015075377e-06, + "loss": 0.0185, + "step": 9200 + }, + { + "epoch": 3.1, + "grad_norm": 1.091917872428894, + "learning_rate": 9.1235175879397e-06, + "loss": 0.017, + "step": 9225 + }, + { + "epoch": 3.11, + "grad_norm": 0.9950605630874634, + "learning_rate": 9.121005025125628e-06, + "loss": 0.0173, + "step": 9250 + }, + { + "epoch": 3.11, + "grad_norm": 0.83653724193573, + "learning_rate": 9.118492462311559e-06, + "loss": 0.0154, + "step": 9275 + }, + { + "epoch": 3.12, + "grad_norm": 1.6638997793197632, + "learning_rate": 9.115979899497488e-06, + "loss": 0.0198, + "step": 9300 + }, + { + "epoch": 3.13, + "grad_norm": 1.5297540426254272, + "learning_rate": 9.113467336683418e-06, + "loss": 0.0172, + "step": 9325 + }, + { + "epoch": 3.14, + "grad_norm": 1.2249935865402222, + "learning_rate": 9.110954773869347e-06, + "loss": 0.018, + "step": 9350 + }, + { + "epoch": 3.15, + "grad_norm": 1.761459231376648, + "learning_rate": 9.108442211055278e-06, + "loss": 0.0193, + "step": 9375 + }, + { + "epoch": 3.16, + "grad_norm": 1.0996549129486084, + "learning_rate": 9.105929648241206e-06, + "loss": 0.0192, + "step": 9400 + }, + { + "epoch": 3.16, + "grad_norm": 1.5097150802612305, + "learning_rate": 9.103417085427137e-06, + "loss": 0.0171, + "step": 9425 + }, + { + "epoch": 3.17, + "grad_norm": 1.1290212869644165, + "learning_rate": 9.100904522613066e-06, + "loss": 0.0165, + "step": 9450 + }, + { + "epoch": 3.18, + "grad_norm": 1.1305938959121704, + "learning_rate": 9.098391959798995e-06, + "loss": 0.0181, + "step": 9475 + }, + { + "epoch": 3.19, + "grad_norm": 1.5552388429641724, + "learning_rate": 9.095879396984926e-06, + "loss": 0.0171, + "step": 9500 + }, + { + "epoch": 3.2, + "grad_norm": 1.223320484161377, + "learning_rate": 9.093366834170854e-06, + "loss": 0.019, + "step": 9525 + }, + { + "epoch": 3.21, + "grad_norm": 0.9795104265213013, + "learning_rate": 9.090854271356785e-06, + "loss": 0.0196, + "step": 9550 + }, + { + "epoch": 3.22, + "grad_norm": 1.1193124055862427, + "learning_rate": 9.088341708542714e-06, + "loss": 0.018, + "step": 9575 + }, + { + "epoch": 3.22, + "grad_norm": 1.2560914754867554, + "learning_rate": 9.085829145728644e-06, + "loss": 0.0162, + "step": 9600 + }, + { + "epoch": 3.23, + "grad_norm": 1.1668236255645752, + "learning_rate": 9.083316582914573e-06, + "loss": 0.0185, + "step": 9625 + }, + { + "epoch": 3.24, + "grad_norm": 1.0570001602172852, + "learning_rate": 9.080804020100504e-06, + "loss": 0.0161, + "step": 9650 + }, + { + "epoch": 3.25, + "grad_norm": 1.1503427028656006, + "learning_rate": 9.078291457286433e-06, + "loss": 0.0177, + "step": 9675 + }, + { + "epoch": 3.26, + "grad_norm": 1.6310176849365234, + "learning_rate": 9.075778894472363e-06, + "loss": 0.0165, + "step": 9700 + }, + { + "epoch": 3.27, + "grad_norm": 1.4443089962005615, + "learning_rate": 9.073266331658292e-06, + "loss": 0.0173, + "step": 9725 + }, + { + "epoch": 3.27, + "grad_norm": 1.148603081703186, + "learning_rate": 9.070753768844221e-06, + "loss": 0.015, + "step": 9750 + }, + { + "epoch": 3.28, + "grad_norm": 1.17886221408844, + "learning_rate": 9.068241206030152e-06, + "loss": 0.02, + "step": 9775 + }, + { + "epoch": 3.29, + "grad_norm": 1.5388753414154053, + "learning_rate": 9.06572864321608e-06, + "loss": 0.0178, + "step": 9800 + }, + { + "epoch": 3.3, + "grad_norm": 1.6313765048980713, + "learning_rate": 9.063216080402011e-06, + "loss": 0.0186, + "step": 9825 + }, + { + "epoch": 3.31, + "grad_norm": 1.4232983589172363, + "learning_rate": 9.06070351758794e-06, + "loss": 0.0179, + "step": 9850 + }, + { + "epoch": 3.32, + "grad_norm": 1.557934045791626, + "learning_rate": 9.05819095477387e-06, + "loss": 0.0188, + "step": 9875 + }, + { + "epoch": 3.32, + "grad_norm": 1.2740799188613892, + "learning_rate": 9.0556783919598e-06, + "loss": 0.0178, + "step": 9900 + }, + { + "epoch": 3.33, + "grad_norm": 1.3133853673934937, + "learning_rate": 9.05316582914573e-06, + "loss": 0.0177, + "step": 9925 + }, + { + "epoch": 3.34, + "grad_norm": 1.692417025566101, + "learning_rate": 9.05065326633166e-06, + "loss": 0.0192, + "step": 9950 + }, + { + "epoch": 3.35, + "grad_norm": 1.2050764560699463, + "learning_rate": 9.048140703517589e-06, + "loss": 0.0188, + "step": 9975 + }, + { + "epoch": 3.36, + "grad_norm": 1.6175525188446045, + "learning_rate": 9.045628140703518e-06, + "loss": 0.0185, + "step": 10000 + }, + { + "epoch": 3.36, + "eval_loss": 0.09874902665615082, + "eval_runtime": 1214.2688, + "eval_samples_per_second": 1.141, + "eval_steps_per_second": 1.141, + "eval_wer": 17.614338689740418, + "step": 10000 + }, + { + "epoch": 3.37, + "grad_norm": 1.7626513242721558, + "learning_rate": 9.043115577889447e-06, + "loss": 0.0183, + "step": 10025 + }, + { + "epoch": 3.37, + "grad_norm": 1.2578681707382202, + "learning_rate": 9.040603015075378e-06, + "loss": 0.0212, + "step": 10050 + }, + { + "epoch": 3.38, + "grad_norm": 1.274777889251709, + "learning_rate": 9.038090452261308e-06, + "loss": 0.0177, + "step": 10075 + }, + { + "epoch": 3.39, + "grad_norm": 2.1128435134887695, + "learning_rate": 9.035577889447237e-06, + "loss": 0.0187, + "step": 10100 + }, + { + "epoch": 3.4, + "grad_norm": 1.4603064060211182, + "learning_rate": 9.033065326633166e-06, + "loss": 0.0175, + "step": 10125 + }, + { + "epoch": 3.41, + "grad_norm": 1.3411498069763184, + "learning_rate": 9.030552763819096e-06, + "loss": 0.0194, + "step": 10150 + }, + { + "epoch": 3.42, + "grad_norm": 1.1267813444137573, + "learning_rate": 9.028040201005027e-06, + "loss": 0.0191, + "step": 10175 + }, + { + "epoch": 3.43, + "grad_norm": 1.5770483016967773, + "learning_rate": 9.025527638190956e-06, + "loss": 0.0196, + "step": 10200 + }, + { + "epoch": 3.43, + "grad_norm": 1.372748613357544, + "learning_rate": 9.023015075376885e-06, + "loss": 0.0184, + "step": 10225 + }, + { + "epoch": 3.44, + "grad_norm": 1.4407280683517456, + "learning_rate": 9.020502512562815e-06, + "loss": 0.0217, + "step": 10250 + }, + { + "epoch": 3.45, + "grad_norm": 1.3116562366485596, + "learning_rate": 9.017989949748744e-06, + "loss": 0.0165, + "step": 10275 + }, + { + "epoch": 3.46, + "grad_norm": 1.472308874130249, + "learning_rate": 9.015477386934675e-06, + "loss": 0.0168, + "step": 10300 + }, + { + "epoch": 3.47, + "grad_norm": 1.7614247798919678, + "learning_rate": 9.012964824120604e-06, + "loss": 0.0189, + "step": 10325 + }, + { + "epoch": 3.48, + "grad_norm": 1.281989336013794, + "learning_rate": 9.010452261306533e-06, + "loss": 0.0167, + "step": 10350 + }, + { + "epoch": 3.48, + "grad_norm": 1.0861455202102661, + "learning_rate": 9.007939698492463e-06, + "loss": 0.0169, + "step": 10375 + }, + { + "epoch": 3.49, + "grad_norm": 1.5814651250839233, + "learning_rate": 9.005427135678392e-06, + "loss": 0.0183, + "step": 10400 + }, + { + "epoch": 3.5, + "grad_norm": 1.2197859287261963, + "learning_rate": 9.002914572864321e-06, + "loss": 0.0177, + "step": 10425 + }, + { + "epoch": 3.51, + "grad_norm": 1.2773247957229614, + "learning_rate": 9.000402010050252e-06, + "loss": 0.017, + "step": 10450 + }, + { + "epoch": 3.52, + "grad_norm": 1.4277900457382202, + "learning_rate": 8.997889447236182e-06, + "loss": 0.0191, + "step": 10475 + }, + { + "epoch": 3.53, + "grad_norm": 1.7514662742614746, + "learning_rate": 8.995376884422111e-06, + "loss": 0.017, + "step": 10500 + }, + { + "epoch": 3.53, + "grad_norm": 1.8297747373580933, + "learning_rate": 8.992864321608042e-06, + "loss": 0.0163, + "step": 10525 + }, + { + "epoch": 3.54, + "grad_norm": 1.7265169620513916, + "learning_rate": 8.99035175879397e-06, + "loss": 0.0171, + "step": 10550 + }, + { + "epoch": 3.55, + "grad_norm": 1.4066264629364014, + "learning_rate": 8.9878391959799e-06, + "loss": 0.0176, + "step": 10575 + }, + { + "epoch": 3.56, + "grad_norm": 1.4531276226043701, + "learning_rate": 8.98532663316583e-06, + "loss": 0.0184, + "step": 10600 + }, + { + "epoch": 3.57, + "grad_norm": 1.1889498233795166, + "learning_rate": 8.98281407035176e-06, + "loss": 0.0166, + "step": 10625 + }, + { + "epoch": 3.58, + "grad_norm": 1.3078460693359375, + "learning_rate": 8.980301507537689e-06, + "loss": 0.017, + "step": 10650 + }, + { + "epoch": 3.58, + "grad_norm": 1.6299117803573608, + "learning_rate": 8.977788944723618e-06, + "loss": 0.0178, + "step": 10675 + }, + { + "epoch": 3.59, + "grad_norm": 1.263308048248291, + "learning_rate": 8.975276381909549e-06, + "loss": 0.0193, + "step": 10700 + }, + { + "epoch": 3.6, + "grad_norm": 1.951819896697998, + "learning_rate": 8.972763819095478e-06, + "loss": 0.0193, + "step": 10725 + }, + { + "epoch": 3.61, + "grad_norm": 1.4302527904510498, + "learning_rate": 8.970251256281408e-06, + "loss": 0.0157, + "step": 10750 + }, + { + "epoch": 3.62, + "grad_norm": 1.5280959606170654, + "learning_rate": 8.967738693467337e-06, + "loss": 0.0185, + "step": 10775 + }, + { + "epoch": 3.63, + "grad_norm": 1.6734501123428345, + "learning_rate": 8.965226130653268e-06, + "loss": 0.018, + "step": 10800 + }, + { + "epoch": 3.63, + "grad_norm": 1.4680553674697876, + "learning_rate": 8.962713567839196e-06, + "loss": 0.0185, + "step": 10825 + }, + { + "epoch": 3.64, + "grad_norm": 1.2785632610321045, + "learning_rate": 8.960201005025127e-06, + "loss": 0.0184, + "step": 10850 + }, + { + "epoch": 3.65, + "grad_norm": 1.4811291694641113, + "learning_rate": 8.957688442211056e-06, + "loss": 0.0153, + "step": 10875 + }, + { + "epoch": 3.66, + "grad_norm": 1.5703322887420654, + "learning_rate": 8.955175879396985e-06, + "loss": 0.0156, + "step": 10900 + }, + { + "epoch": 3.67, + "grad_norm": 1.048566222190857, + "learning_rate": 8.952663316582916e-06, + "loss": 0.0182, + "step": 10925 + }, + { + "epoch": 3.68, + "grad_norm": 1.6426318883895874, + "learning_rate": 8.950251256281408e-06, + "loss": 0.0181, + "step": 10950 + }, + { + "epoch": 3.69, + "grad_norm": 1.105263113975525, + "learning_rate": 8.947738693467337e-06, + "loss": 0.0175, + "step": 10975 + }, + { + "epoch": 3.69, + "grad_norm": 1.543891429901123, + "learning_rate": 8.945226130653267e-06, + "loss": 0.0183, + "step": 11000 + }, + { + "epoch": 3.69, + "eval_loss": 0.10644808411598206, + "eval_runtime": 1218.2804, + "eval_samples_per_second": 1.138, + "eval_steps_per_second": 1.138, + "eval_wer": 17.269998234151508, + "step": 11000 + }, + { + "epoch": 3.7, + "grad_norm": 1.1473368406295776, + "learning_rate": 8.942713567839196e-06, + "loss": 0.0178, + "step": 11025 + }, + { + "epoch": 3.71, + "grad_norm": 1.475974440574646, + "learning_rate": 8.940201005025127e-06, + "loss": 0.0182, + "step": 11050 + }, + { + "epoch": 3.72, + "grad_norm": 1.5397669076919556, + "learning_rate": 8.937688442211056e-06, + "loss": 0.0167, + "step": 11075 + }, + { + "epoch": 3.73, + "grad_norm": 1.0959078073501587, + "learning_rate": 8.935175879396986e-06, + "loss": 0.015, + "step": 11100 + }, + { + "epoch": 3.74, + "grad_norm": 2.048746109008789, + "learning_rate": 8.932663316582915e-06, + "loss": 0.0186, + "step": 11125 + }, + { + "epoch": 3.74, + "grad_norm": 1.5749069452285767, + "learning_rate": 8.930150753768844e-06, + "loss": 0.0174, + "step": 11150 + }, + { + "epoch": 3.75, + "grad_norm": 1.3588178157806396, + "learning_rate": 8.927638190954775e-06, + "loss": 0.0161, + "step": 11175 + }, + { + "epoch": 3.76, + "grad_norm": 2.375582695007324, + "learning_rate": 8.925125628140705e-06, + "loss": 0.0188, + "step": 11200 + }, + { + "epoch": 3.77, + "grad_norm": 1.4740511178970337, + "learning_rate": 8.922613065326634e-06, + "loss": 0.0186, + "step": 11225 + }, + { + "epoch": 3.78, + "grad_norm": 1.3088107109069824, + "learning_rate": 8.920100502512563e-06, + "loss": 0.0188, + "step": 11250 + }, + { + "epoch": 3.79, + "grad_norm": 2.1584980487823486, + "learning_rate": 8.917587939698493e-06, + "loss": 0.0196, + "step": 11275 + }, + { + "epoch": 3.79, + "grad_norm": 1.4890257120132446, + "learning_rate": 8.915075376884424e-06, + "loss": 0.0171, + "step": 11300 + }, + { + "epoch": 3.8, + "grad_norm": 1.3736598491668701, + "learning_rate": 8.912562814070353e-06, + "loss": 0.0177, + "step": 11325 + }, + { + "epoch": 3.81, + "grad_norm": 1.3275282382965088, + "learning_rate": 8.910050251256282e-06, + "loss": 0.0233, + "step": 11350 + }, + { + "epoch": 3.82, + "grad_norm": 1.7816600799560547, + "learning_rate": 8.907537688442212e-06, + "loss": 0.017, + "step": 11375 + }, + { + "epoch": 3.83, + "grad_norm": 1.2577104568481445, + "learning_rate": 8.905025125628143e-06, + "loss": 0.018, + "step": 11400 + }, + { + "epoch": 3.84, + "grad_norm": 1.8558993339538574, + "learning_rate": 8.90251256281407e-06, + "loss": 0.0164, + "step": 11425 + }, + { + "epoch": 3.84, + "grad_norm": 1.663554310798645, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0192, + "step": 11450 + }, + { + "epoch": 3.85, + "grad_norm": 1.9370921850204468, + "learning_rate": 8.89748743718593e-06, + "loss": 0.0192, + "step": 11475 + }, + { + "epoch": 3.86, + "grad_norm": 1.4039660692214966, + "learning_rate": 8.89497487437186e-06, + "loss": 0.0189, + "step": 11500 + }, + { + "epoch": 3.87, + "grad_norm": 1.6285878419876099, + "learning_rate": 8.892462311557791e-06, + "loss": 0.0175, + "step": 11525 + }, + { + "epoch": 3.88, + "grad_norm": 1.3806816339492798, + "learning_rate": 8.889949748743718e-06, + "loss": 0.0157, + "step": 11550 + }, + { + "epoch": 3.89, + "grad_norm": 1.2733079195022583, + "learning_rate": 8.88743718592965e-06, + "loss": 0.0161, + "step": 11575 + }, + { + "epoch": 3.9, + "grad_norm": 1.6549055576324463, + "learning_rate": 8.884924623115579e-06, + "loss": 0.0167, + "step": 11600 + }, + { + "epoch": 3.9, + "grad_norm": 1.663223147392273, + "learning_rate": 8.882412060301508e-06, + "loss": 0.0162, + "step": 11625 + }, + { + "epoch": 3.91, + "grad_norm": 1.6521025896072388, + "learning_rate": 8.879899497487437e-06, + "loss": 0.0169, + "step": 11650 + }, + { + "epoch": 3.92, + "grad_norm": 1.7297382354736328, + "learning_rate": 8.877386934673368e-06, + "loss": 0.0198, + "step": 11675 + }, + { + "epoch": 3.93, + "grad_norm": 1.4555562734603882, + "learning_rate": 8.874874371859296e-06, + "loss": 0.017, + "step": 11700 + }, + { + "epoch": 3.94, + "grad_norm": 1.1925603151321411, + "learning_rate": 8.872361809045227e-06, + "loss": 0.0165, + "step": 11725 + }, + { + "epoch": 3.95, + "grad_norm": 1.3676649332046509, + "learning_rate": 8.869849246231156e-06, + "loss": 0.0162, + "step": 11750 + }, + { + "epoch": 3.95, + "grad_norm": 1.7233623266220093, + "learning_rate": 8.867336683417086e-06, + "loss": 0.0167, + "step": 11775 + }, + { + "epoch": 3.96, + "grad_norm": 1.6979261636734009, + "learning_rate": 8.864824120603017e-06, + "loss": 0.0169, + "step": 11800 + }, + { + "epoch": 3.97, + "grad_norm": 1.0576210021972656, + "learning_rate": 8.862311557788944e-06, + "loss": 0.0145, + "step": 11825 + }, + { + "epoch": 3.98, + "grad_norm": 1.2051951885223389, + "learning_rate": 8.859798994974875e-06, + "loss": 0.016, + "step": 11850 + }, + { + "epoch": 3.99, + "grad_norm": 1.689719557762146, + "learning_rate": 8.857286432160805e-06, + "loss": 0.0181, + "step": 11875 + }, + { + "epoch": 4.0, + "grad_norm": 1.7545164823532104, + "learning_rate": 8.854773869346734e-06, + "loss": 0.0191, + "step": 11900 + }, + { + "epoch": 4.0, + "grad_norm": 1.4458829164505005, + "learning_rate": 8.852261306532665e-06, + "loss": 0.0142, + "step": 11925 + }, + { + "epoch": 4.01, + "grad_norm": 1.1005758047103882, + "learning_rate": 8.849748743718594e-06, + "loss": 0.0086, + "step": 11950 + }, + { + "epoch": 4.02, + "grad_norm": 0.7758035063743591, + "learning_rate": 8.847236180904524e-06, + "loss": 0.0085, + "step": 11975 + }, + { + "epoch": 4.03, + "grad_norm": 1.0270209312438965, + "learning_rate": 8.844723618090453e-06, + "loss": 0.0091, + "step": 12000 + }, + { + "epoch": 4.03, + "eval_loss": 0.1053709015250206, + "eval_runtime": 1213.4142, + "eval_samples_per_second": 1.142, + "eval_steps_per_second": 1.142, + "eval_wer": 16.89034080875861, + "step": 12000 + }, + { + "epoch": 4.04, + "grad_norm": 1.3312368392944336, + "learning_rate": 8.842211055276382e-06, + "loss": 0.0092, + "step": 12025 + }, + { + "epoch": 4.05, + "grad_norm": 1.718754529953003, + "learning_rate": 8.839698492462312e-06, + "loss": 0.0086, + "step": 12050 + }, + { + "epoch": 4.05, + "grad_norm": 0.9969571232795715, + "learning_rate": 8.837185929648243e-06, + "loss": 0.0082, + "step": 12075 + }, + { + "epoch": 4.06, + "grad_norm": 0.7550638318061829, + "learning_rate": 8.83467336683417e-06, + "loss": 0.0086, + "step": 12100 + }, + { + "epoch": 4.07, + "grad_norm": 1.5295040607452393, + "learning_rate": 8.832160804020101e-06, + "loss": 0.0089, + "step": 12125 + }, + { + "epoch": 4.08, + "grad_norm": 0.9218119382858276, + "learning_rate": 8.82964824120603e-06, + "loss": 0.0086, + "step": 12150 + }, + { + "epoch": 4.09, + "grad_norm": 0.892400324344635, + "learning_rate": 8.82713567839196e-06, + "loss": 0.008, + "step": 12175 + }, + { + "epoch": 4.1, + "grad_norm": 0.9287202954292297, + "learning_rate": 8.824623115577891e-06, + "loss": 0.0085, + "step": 12200 + }, + { + "epoch": 4.11, + "grad_norm": 0.8496940732002258, + "learning_rate": 8.82211055276382e-06, + "loss": 0.0083, + "step": 12225 + }, + { + "epoch": 4.11, + "grad_norm": 1.3032245635986328, + "learning_rate": 8.81959798994975e-06, + "loss": 0.009, + "step": 12250 + }, + { + "epoch": 4.12, + "grad_norm": 0.9694374203681946, + "learning_rate": 8.817085427135679e-06, + "loss": 0.0075, + "step": 12275 + }, + { + "epoch": 4.13, + "grad_norm": 1.3418890237808228, + "learning_rate": 8.814572864321608e-06, + "loss": 0.0086, + "step": 12300 + }, + { + "epoch": 4.14, + "grad_norm": 1.4376976490020752, + "learning_rate": 8.812060301507538e-06, + "loss": 0.0082, + "step": 12325 + }, + { + "epoch": 4.15, + "grad_norm": 1.7107688188552856, + "learning_rate": 8.809547738693469e-06, + "loss": 0.0087, + "step": 12350 + }, + { + "epoch": 4.16, + "grad_norm": 1.146453857421875, + "learning_rate": 8.807035175879398e-06, + "loss": 0.0093, + "step": 12375 + }, + { + "epoch": 4.16, + "grad_norm": 1.3553314208984375, + "learning_rate": 8.804522613065327e-06, + "loss": 0.0086, + "step": 12400 + }, + { + "epoch": 4.17, + "grad_norm": 1.2053191661834717, + "learning_rate": 8.802010050251257e-06, + "loss": 0.0097, + "step": 12425 + }, + { + "epoch": 4.18, + "grad_norm": 1.8913241624832153, + "learning_rate": 8.799497487437186e-06, + "loss": 0.0079, + "step": 12450 + }, + { + "epoch": 4.19, + "grad_norm": 1.2542129755020142, + "learning_rate": 8.796984924623117e-06, + "loss": 0.009, + "step": 12475 + }, + { + "epoch": 4.2, + "grad_norm": 1.353072166442871, + "learning_rate": 8.794472361809046e-06, + "loss": 0.0113, + "step": 12500 + }, + { + "epoch": 4.21, + "grad_norm": 1.098225712776184, + "learning_rate": 8.791959798994976e-06, + "loss": 0.0102, + "step": 12525 + }, + { + "epoch": 4.21, + "grad_norm": 1.3385556936264038, + "learning_rate": 8.789447236180905e-06, + "loss": 0.0083, + "step": 12550 + }, + { + "epoch": 4.22, + "grad_norm": 1.6041862964630127, + "learning_rate": 8.786934673366834e-06, + "loss": 0.01, + "step": 12575 + }, + { + "epoch": 4.23, + "grad_norm": 1.2044506072998047, + "learning_rate": 8.784422110552765e-06, + "loss": 0.0086, + "step": 12600 + }, + { + "epoch": 4.24, + "grad_norm": 0.9524179697036743, + "learning_rate": 8.781909547738695e-06, + "loss": 0.0088, + "step": 12625 + }, + { + "epoch": 4.25, + "grad_norm": 1.1507515907287598, + "learning_rate": 8.779396984924624e-06, + "loss": 0.0117, + "step": 12650 + }, + { + "epoch": 4.26, + "grad_norm": 1.0932849645614624, + "learning_rate": 8.776884422110553e-06, + "loss": 0.0097, + "step": 12675 + }, + { + "epoch": 4.26, + "grad_norm": 1.0530253648757935, + "learning_rate": 8.774371859296483e-06, + "loss": 0.009, + "step": 12700 + }, + { + "epoch": 4.27, + "grad_norm": 1.2916374206542969, + "learning_rate": 8.771859296482412e-06, + "loss": 0.0096, + "step": 12725 + }, + { + "epoch": 4.28, + "grad_norm": 0.8565784096717834, + "learning_rate": 8.769346733668343e-06, + "loss": 0.0081, + "step": 12750 + }, + { + "epoch": 4.29, + "grad_norm": 1.0703548192977905, + "learning_rate": 8.766834170854272e-06, + "loss": 0.0092, + "step": 12775 + }, + { + "epoch": 4.3, + "grad_norm": 1.082779884338379, + "learning_rate": 8.764321608040202e-06, + "loss": 0.009, + "step": 12800 + }, + { + "epoch": 4.31, + "grad_norm": 1.2387831211090088, + "learning_rate": 8.761809045226131e-06, + "loss": 0.0107, + "step": 12825 + }, + { + "epoch": 4.31, + "grad_norm": 1.3851344585418701, + "learning_rate": 8.75929648241206e-06, + "loss": 0.0115, + "step": 12850 + }, + { + "epoch": 4.32, + "grad_norm": 0.9613246321678162, + "learning_rate": 8.756783919597991e-06, + "loss": 0.0083, + "step": 12875 + }, + { + "epoch": 4.33, + "grad_norm": 1.1457337141036987, + "learning_rate": 8.75427135678392e-06, + "loss": 0.0097, + "step": 12900 + }, + { + "epoch": 4.34, + "grad_norm": 1.6947778463363647, + "learning_rate": 8.75175879396985e-06, + "loss": 0.0096, + "step": 12925 + }, + { + "epoch": 4.35, + "grad_norm": 1.1812468767166138, + "learning_rate": 8.74924623115578e-06, + "loss": 0.0086, + "step": 12950 + }, + { + "epoch": 4.36, + "grad_norm": 1.173264980316162, + "learning_rate": 8.746733668341709e-06, + "loss": 0.0098, + "step": 12975 + }, + { + "epoch": 4.37, + "grad_norm": 1.4958430528640747, + "learning_rate": 8.74422110552764e-06, + "loss": 0.0096, + "step": 13000 + }, + { + "epoch": 4.37, + "eval_loss": 0.11601292341947556, + "eval_runtime": 1210.8606, + "eval_samples_per_second": 1.145, + "eval_steps_per_second": 1.145, + "eval_wer": 17.075754900229562, + "step": 13000 + }, + { + "epoch": 4.37, + "grad_norm": 1.6800485849380493, + "learning_rate": 8.741708542713569e-06, + "loss": 0.0108, + "step": 13025 + }, + { + "epoch": 4.38, + "grad_norm": 1.0755212306976318, + "learning_rate": 8.739195979899498e-06, + "loss": 0.0089, + "step": 13050 + }, + { + "epoch": 4.39, + "grad_norm": 1.0578029155731201, + "learning_rate": 8.736683417085428e-06, + "loss": 0.0136, + "step": 13075 + }, + { + "epoch": 4.4, + "grad_norm": 0.8931235074996948, + "learning_rate": 8.734170854271357e-06, + "loss": 0.0092, + "step": 13100 + }, + { + "epoch": 4.41, + "grad_norm": 1.305550217628479, + "learning_rate": 8.731658291457286e-06, + "loss": 0.0088, + "step": 13125 + }, + { + "epoch": 4.42, + "grad_norm": 1.6387649774551392, + "learning_rate": 8.729145728643217e-06, + "loss": 0.0096, + "step": 13150 + }, + { + "epoch": 4.42, + "grad_norm": 1.4161145687103271, + "learning_rate": 8.726633165829147e-06, + "loss": 0.0096, + "step": 13175 + }, + { + "epoch": 4.43, + "grad_norm": 1.6912627220153809, + "learning_rate": 8.724120603015076e-06, + "loss": 0.0099, + "step": 13200 + }, + { + "epoch": 4.44, + "grad_norm": 0.9292682409286499, + "learning_rate": 8.721608040201007e-06, + "loss": 0.0109, + "step": 13225 + }, + { + "epoch": 4.45, + "grad_norm": 1.7090272903442383, + "learning_rate": 8.719095477386934e-06, + "loss": 0.0094, + "step": 13250 + }, + { + "epoch": 4.46, + "grad_norm": 1.4396581649780273, + "learning_rate": 8.716582914572866e-06, + "loss": 0.013, + "step": 13275 + }, + { + "epoch": 4.47, + "grad_norm": 1.4984877109527588, + "learning_rate": 8.714070351758795e-06, + "loss": 0.0095, + "step": 13300 + }, + { + "epoch": 4.47, + "grad_norm": 1.4311236143112183, + "learning_rate": 8.711557788944724e-06, + "loss": 0.0103, + "step": 13325 + }, + { + "epoch": 4.48, + "grad_norm": 1.0900646448135376, + "learning_rate": 8.709045226130653e-06, + "loss": 0.0102, + "step": 13350 + }, + { + "epoch": 4.49, + "grad_norm": 1.1889293193817139, + "learning_rate": 8.706532663316584e-06, + "loss": 0.0097, + "step": 13375 + }, + { + "epoch": 4.5, + "grad_norm": 1.3314672708511353, + "learning_rate": 8.704020100502514e-06, + "loss": 0.0114, + "step": 13400 + }, + { + "epoch": 4.51, + "grad_norm": 1.433939814567566, + "learning_rate": 8.701507537688443e-06, + "loss": 0.0098, + "step": 13425 + }, + { + "epoch": 4.52, + "grad_norm": 1.367415189743042, + "learning_rate": 8.698994974874372e-06, + "loss": 0.0104, + "step": 13450 + }, + { + "epoch": 4.52, + "grad_norm": 1.0066732168197632, + "learning_rate": 8.696482412060302e-06, + "loss": 0.0101, + "step": 13475 + }, + { + "epoch": 4.53, + "grad_norm": 1.5748649835586548, + "learning_rate": 8.693969849246233e-06, + "loss": 0.0114, + "step": 13500 + }, + { + "epoch": 4.54, + "grad_norm": 1.697103500366211, + "learning_rate": 8.69145728643216e-06, + "loss": 0.0106, + "step": 13525 + }, + { + "epoch": 4.55, + "grad_norm": 1.1022950410842896, + "learning_rate": 8.688944723618091e-06, + "loss": 0.0099, + "step": 13550 + }, + { + "epoch": 4.56, + "grad_norm": 1.6443973779678345, + "learning_rate": 8.68643216080402e-06, + "loss": 0.0099, + "step": 13575 + }, + { + "epoch": 4.57, + "grad_norm": 1.4115012884140015, + "learning_rate": 8.68391959798995e-06, + "loss": 0.0096, + "step": 13600 + }, + { + "epoch": 4.58, + "grad_norm": 1.5429692268371582, + "learning_rate": 8.681407035175881e-06, + "loss": 0.0098, + "step": 13625 + }, + { + "epoch": 4.58, + "grad_norm": 1.7618130445480347, + "learning_rate": 8.67889447236181e-06, + "loss": 0.0095, + "step": 13650 + }, + { + "epoch": 4.59, + "grad_norm": 1.3618040084838867, + "learning_rate": 8.67638190954774e-06, + "loss": 0.0123, + "step": 13675 + }, + { + "epoch": 4.6, + "grad_norm": 1.8731156587600708, + "learning_rate": 8.673869346733669e-06, + "loss": 0.0095, + "step": 13700 + }, + { + "epoch": 4.61, + "grad_norm": 0.9101090431213379, + "learning_rate": 8.671356783919598e-06, + "loss": 0.0095, + "step": 13725 + }, + { + "epoch": 4.62, + "grad_norm": 0.993123471736908, + "learning_rate": 8.668844221105528e-06, + "loss": 0.0121, + "step": 13750 + }, + { + "epoch": 4.63, + "grad_norm": 1.2065584659576416, + "learning_rate": 8.666331658291459e-06, + "loss": 0.0093, + "step": 13775 + }, + { + "epoch": 4.63, + "grad_norm": 1.4158481359481812, + "learning_rate": 8.663819095477388e-06, + "loss": 0.0093, + "step": 13800 + }, + { + "epoch": 4.64, + "grad_norm": 1.120752215385437, + "learning_rate": 8.661306532663317e-06, + "loss": 0.0082, + "step": 13825 + }, + { + "epoch": 4.65, + "grad_norm": 1.2931292057037354, + "learning_rate": 8.658793969849247e-06, + "loss": 0.0106, + "step": 13850 + }, + { + "epoch": 4.66, + "grad_norm": 1.6148487329483032, + "learning_rate": 8.656281407035176e-06, + "loss": 0.0106, + "step": 13875 + }, + { + "epoch": 4.67, + "grad_norm": 1.0475926399230957, + "learning_rate": 8.653768844221107e-06, + "loss": 0.0087, + "step": 13900 + }, + { + "epoch": 4.68, + "grad_norm": 1.8488844633102417, + "learning_rate": 8.651256281407036e-06, + "loss": 0.0111, + "step": 13925 + }, + { + "epoch": 4.68, + "grad_norm": 1.0069080591201782, + "learning_rate": 8.648743718592966e-06, + "loss": 0.0095, + "step": 13950 + }, + { + "epoch": 4.69, + "grad_norm": 1.4261776208877563, + "learning_rate": 8.646231155778895e-06, + "loss": 0.009, + "step": 13975 + }, + { + "epoch": 4.7, + "grad_norm": 1.3766064643859863, + "learning_rate": 8.643718592964824e-06, + "loss": 0.0084, + "step": 14000 + }, + { + "epoch": 4.7, + "eval_loss": 0.1198548898100853, + "eval_runtime": 1219.3279, + "eval_samples_per_second": 1.137, + "eval_steps_per_second": 1.137, + "eval_wer": 17.27882747660251, + "step": 14000 + }, + { + "epoch": 4.71, + "grad_norm": 1.130373477935791, + "learning_rate": 8.641206030150755e-06, + "loss": 0.01, + "step": 14025 + }, + { + "epoch": 4.72, + "grad_norm": 1.066453218460083, + "learning_rate": 8.638693467336685e-06, + "loss": 0.0092, + "step": 14050 + }, + { + "epoch": 4.73, + "grad_norm": 1.2010557651519775, + "learning_rate": 8.636180904522614e-06, + "loss": 0.0096, + "step": 14075 + }, + { + "epoch": 4.73, + "grad_norm": 1.196480631828308, + "learning_rate": 8.633668341708543e-06, + "loss": 0.0098, + "step": 14100 + }, + { + "epoch": 4.74, + "grad_norm": 1.0107922554016113, + "learning_rate": 8.631155778894473e-06, + "loss": 0.0105, + "step": 14125 + }, + { + "epoch": 4.75, + "grad_norm": 1.2658953666687012, + "learning_rate": 8.628643216080402e-06, + "loss": 0.0089, + "step": 14150 + }, + { + "epoch": 4.76, + "grad_norm": 1.3953138589859009, + "learning_rate": 8.626130653266333e-06, + "loss": 0.0115, + "step": 14175 + }, + { + "epoch": 4.77, + "grad_norm": 1.243082046508789, + "learning_rate": 8.623618090452262e-06, + "loss": 0.01, + "step": 14200 + }, + { + "epoch": 4.78, + "grad_norm": 1.260443091392517, + "learning_rate": 8.621105527638192e-06, + "loss": 0.0094, + "step": 14225 + }, + { + "epoch": 4.79, + "grad_norm": 1.4856200218200684, + "learning_rate": 8.618592964824121e-06, + "loss": 0.0115, + "step": 14250 + }, + { + "epoch": 4.79, + "grad_norm": 0.9096400141716003, + "learning_rate": 8.61608040201005e-06, + "loss": 0.0096, + "step": 14275 + }, + { + "epoch": 4.8, + "grad_norm": 0.8690270781517029, + "learning_rate": 8.613567839195981e-06, + "loss": 0.0104, + "step": 14300 + }, + { + "epoch": 4.81, + "grad_norm": 1.695749282836914, + "learning_rate": 8.61105527638191e-06, + "loss": 0.0109, + "step": 14325 + }, + { + "epoch": 4.82, + "grad_norm": 1.866538166999817, + "learning_rate": 8.60854271356784e-06, + "loss": 0.0106, + "step": 14350 + }, + { + "epoch": 4.83, + "grad_norm": 1.5150864124298096, + "learning_rate": 8.60603015075377e-06, + "loss": 0.0109, + "step": 14375 + }, + { + "epoch": 4.84, + "grad_norm": 1.589615821838379, + "learning_rate": 8.603517587939699e-06, + "loss": 0.0103, + "step": 14400 + }, + { + "epoch": 4.84, + "grad_norm": 1.182092547416687, + "learning_rate": 8.601005025125628e-06, + "loss": 0.0105, + "step": 14425 + }, + { + "epoch": 4.85, + "grad_norm": 0.9403494000434875, + "learning_rate": 8.598492462311559e-06, + "loss": 0.0123, + "step": 14450 + }, + { + "epoch": 4.86, + "grad_norm": 0.747241735458374, + "learning_rate": 8.595979899497488e-06, + "loss": 0.0095, + "step": 14475 + }, + { + "epoch": 4.87, + "grad_norm": 1.2985069751739502, + "learning_rate": 8.593467336683418e-06, + "loss": 0.0095, + "step": 14500 + }, + { + "epoch": 4.88, + "grad_norm": 1.0741736888885498, + "learning_rate": 8.590954773869347e-06, + "loss": 0.0098, + "step": 14525 + }, + { + "epoch": 4.89, + "grad_norm": 1.5043480396270752, + "learning_rate": 8.588442211055276e-06, + "loss": 0.0109, + "step": 14550 + }, + { + "epoch": 4.89, + "grad_norm": 1.5984162092208862, + "learning_rate": 8.585929648241207e-06, + "loss": 0.0107, + "step": 14575 + }, + { + "epoch": 4.9, + "grad_norm": 1.6896255016326904, + "learning_rate": 8.583417085427137e-06, + "loss": 0.0113, + "step": 14600 + }, + { + "epoch": 4.91, + "grad_norm": 1.4255273342132568, + "learning_rate": 8.580904522613066e-06, + "loss": 0.01, + "step": 14625 + }, + { + "epoch": 4.92, + "grad_norm": 1.412419080734253, + "learning_rate": 8.578391959798997e-06, + "loss": 0.0104, + "step": 14650 + }, + { + "epoch": 4.93, + "grad_norm": 1.3407171964645386, + "learning_rate": 8.575879396984925e-06, + "loss": 0.0102, + "step": 14675 + }, + { + "epoch": 4.94, + "grad_norm": 1.4571243524551392, + "learning_rate": 8.573366834170856e-06, + "loss": 0.0109, + "step": 14700 + }, + { + "epoch": 4.94, + "grad_norm": 0.5709521174430847, + "learning_rate": 8.570854271356785e-06, + "loss": 0.0089, + "step": 14725 + }, + { + "epoch": 4.95, + "grad_norm": 1.4444150924682617, + "learning_rate": 8.568341708542714e-06, + "loss": 0.01, + "step": 14750 + }, + { + "epoch": 4.96, + "grad_norm": 0.9555776715278625, + "learning_rate": 8.565829145728644e-06, + "loss": 0.0088, + "step": 14775 + }, + { + "epoch": 4.97, + "grad_norm": 1.0951452255249023, + "learning_rate": 8.563316582914573e-06, + "loss": 0.0109, + "step": 14800 + }, + { + "epoch": 4.98, + "grad_norm": 0.8551018834114075, + "learning_rate": 8.560804020100502e-06, + "loss": 0.0097, + "step": 14825 + }, + { + "epoch": 4.99, + "grad_norm": 1.3151156902313232, + "learning_rate": 8.558291457286433e-06, + "loss": 0.0092, + "step": 14850 + }, + { + "epoch": 4.99, + "grad_norm": 1.1482994556427002, + "learning_rate": 8.555778894472363e-06, + "loss": 0.0096, + "step": 14875 + }, + { + "epoch": 5.0, + "grad_norm": 0.7510390281677246, + "learning_rate": 8.553266331658292e-06, + "loss": 0.0085, + "step": 14900 + }, + { + "epoch": 5.01, + "grad_norm": 0.8392704725265503, + "learning_rate": 8.550753768844223e-06, + "loss": 0.0055, + "step": 14925 + }, + { + "epoch": 5.02, + "grad_norm": 1.0651525259017944, + "learning_rate": 8.54824120603015e-06, + "loss": 0.0042, + "step": 14950 + }, + { + "epoch": 5.03, + "grad_norm": 0.3889058828353882, + "learning_rate": 8.545728643216082e-06, + "loss": 0.0058, + "step": 14975 + }, + { + "epoch": 5.04, + "grad_norm": 0.5206564664840698, + "learning_rate": 8.54321608040201e-06, + "loss": 0.004, + "step": 15000 + }, + { + "epoch": 5.04, + "eval_loss": 0.12624329328536987, + "eval_runtime": 1215.8966, + "eval_samples_per_second": 1.14, + "eval_steps_per_second": 1.14, + "eval_wer": 17.22585202189652, + "step": 15000 + }, + { + "epoch": 5.05, + "grad_norm": 1.0367577075958252, + "learning_rate": 8.54070351758794e-06, + "loss": 0.0054, + "step": 15025 + }, + { + "epoch": 5.05, + "grad_norm": 1.0904309749603271, + "learning_rate": 8.53819095477387e-06, + "loss": 0.0057, + "step": 15050 + }, + { + "epoch": 5.06, + "grad_norm": 1.5024206638336182, + "learning_rate": 8.535678391959799e-06, + "loss": 0.0061, + "step": 15075 + }, + { + "epoch": 5.07, + "grad_norm": 1.370377540588379, + "learning_rate": 8.53316582914573e-06, + "loss": 0.0052, + "step": 15100 + }, + { + "epoch": 5.08, + "grad_norm": 1.0550916194915771, + "learning_rate": 8.530653266331659e-06, + "loss": 0.0051, + "step": 15125 + }, + { + "epoch": 5.09, + "grad_norm": 0.5810117721557617, + "learning_rate": 8.528140703517588e-06, + "loss": 0.0046, + "step": 15150 + }, + { + "epoch": 5.1, + "grad_norm": 1.0238065719604492, + "learning_rate": 8.525628140703518e-06, + "loss": 0.0049, + "step": 15175 + }, + { + "epoch": 5.1, + "grad_norm": 1.5375535488128662, + "learning_rate": 8.523115577889449e-06, + "loss": 0.0057, + "step": 15200 + }, + { + "epoch": 5.11, + "grad_norm": 1.1557241678237915, + "learning_rate": 8.520603015075376e-06, + "loss": 0.0056, + "step": 15225 + }, + { + "epoch": 5.12, + "grad_norm": 1.640147089958191, + "learning_rate": 8.518090452261307e-06, + "loss": 0.0058, + "step": 15250 + }, + { + "epoch": 5.13, + "grad_norm": 0.9477593898773193, + "learning_rate": 8.515577889447237e-06, + "loss": 0.0061, + "step": 15275 + }, + { + "epoch": 5.14, + "grad_norm": 1.4299824237823486, + "learning_rate": 8.51316582914573e-06, + "loss": 0.0058, + "step": 15300 + }, + { + "epoch": 5.15, + "grad_norm": 0.6578424572944641, + "learning_rate": 8.51065326633166e-06, + "loss": 0.0051, + "step": 15325 + }, + { + "epoch": 5.15, + "grad_norm": 1.0026063919067383, + "learning_rate": 8.508140703517589e-06, + "loss": 0.0051, + "step": 15350 + }, + { + "epoch": 5.16, + "grad_norm": 0.9299719333648682, + "learning_rate": 8.505628140703518e-06, + "loss": 0.0053, + "step": 15375 + }, + { + "epoch": 5.17, + "grad_norm": 1.6236618757247925, + "learning_rate": 8.503115577889447e-06, + "loss": 0.0056, + "step": 15400 + }, + { + "epoch": 5.18, + "grad_norm": 0.8792663812637329, + "learning_rate": 8.500603015075377e-06, + "loss": 0.0054, + "step": 15425 + }, + { + "epoch": 5.19, + "grad_norm": 0.8390566110610962, + "learning_rate": 8.498090452261308e-06, + "loss": 0.0068, + "step": 15450 + }, + { + "epoch": 5.2, + "grad_norm": 1.5235528945922852, + "learning_rate": 8.495577889447237e-06, + "loss": 0.0068, + "step": 15475 + }, + { + "epoch": 5.2, + "grad_norm": 0.8700294494628906, + "learning_rate": 8.493065326633166e-06, + "loss": 0.0063, + "step": 15500 + }, + { + "epoch": 5.21, + "grad_norm": 0.7127369046211243, + "learning_rate": 8.490552763819097e-06, + "loss": 0.0054, + "step": 15525 + }, + { + "epoch": 5.22, + "grad_norm": 2.712425947189331, + "learning_rate": 8.488040201005025e-06, + "loss": 0.0056, + "step": 15550 + }, + { + "epoch": 5.23, + "grad_norm": 1.2807471752166748, + "learning_rate": 8.485527638190956e-06, + "loss": 0.0056, + "step": 15575 + }, + { + "epoch": 5.24, + "grad_norm": 1.2333037853240967, + "learning_rate": 8.483015075376885e-06, + "loss": 0.0059, + "step": 15600 + }, + { + "epoch": 5.25, + "grad_norm": 0.6642898321151733, + "learning_rate": 8.480502512562815e-06, + "loss": 0.0059, + "step": 15625 + }, + { + "epoch": 5.26, + "grad_norm": 0.6553583145141602, + "learning_rate": 8.477989949748744e-06, + "loss": 0.005, + "step": 15650 + }, + { + "epoch": 5.26, + "grad_norm": 0.9028275012969971, + "learning_rate": 8.475477386934673e-06, + "loss": 0.0061, + "step": 15675 + }, + { + "epoch": 5.27, + "grad_norm": 1.860561490058899, + "learning_rate": 8.472964824120604e-06, + "loss": 0.0055, + "step": 15700 + }, + { + "epoch": 5.28, + "grad_norm": 1.1844035387039185, + "learning_rate": 8.470452261306534e-06, + "loss": 0.0055, + "step": 15725 + }, + { + "epoch": 5.29, + "grad_norm": 1.0906175374984741, + "learning_rate": 8.467939698492463e-06, + "loss": 0.0064, + "step": 15750 + }, + { + "epoch": 5.3, + "grad_norm": 0.9445136189460754, + "learning_rate": 8.465427135678392e-06, + "loss": 0.0068, + "step": 15775 + }, + { + "epoch": 5.31, + "grad_norm": 0.7756630182266235, + "learning_rate": 8.462914572864323e-06, + "loss": 0.0056, + "step": 15800 + }, + { + "epoch": 5.31, + "grad_norm": 1.9760853052139282, + "learning_rate": 8.460502512562815e-06, + "loss": 0.0068, + "step": 15825 + }, + { + "epoch": 5.32, + "grad_norm": 1.144864797592163, + "learning_rate": 8.457989949748744e-06, + "loss": 0.0061, + "step": 15850 + }, + { + "epoch": 5.33, + "grad_norm": 1.529433012008667, + "learning_rate": 8.455477386934673e-06, + "loss": 0.0069, + "step": 15875 + }, + { + "epoch": 5.34, + "grad_norm": 0.8753407597541809, + "learning_rate": 8.452964824120604e-06, + "loss": 0.0084, + "step": 15900 + }, + { + "epoch": 5.35, + "grad_norm": 1.126513123512268, + "learning_rate": 8.450452261306534e-06, + "loss": 0.0063, + "step": 15925 + }, + { + "epoch": 5.36, + "grad_norm": 1.0150917768478394, + "learning_rate": 8.447939698492463e-06, + "loss": 0.0056, + "step": 15950 + }, + { + "epoch": 5.36, + "grad_norm": 1.1704034805297852, + "learning_rate": 8.445427135678392e-06, + "loss": 0.0063, + "step": 15975 + }, + { + "epoch": 5.37, + "grad_norm": 0.665397584438324, + "learning_rate": 8.442914572864322e-06, + "loss": 0.0058, + "step": 16000 + }, + { + "epoch": 5.37, + "eval_loss": 0.13040673732757568, + "eval_runtime": 1220.8311, + "eval_samples_per_second": 1.135, + "eval_steps_per_second": 1.135, + "eval_wer": 17.35829065866149, + "step": 16000 + }, + { + "epoch": 5.38, + "grad_norm": 2.3762154579162598, + "learning_rate": 8.440402010050251e-06, + "loss": 0.0065, + "step": 16025 + }, + { + "epoch": 5.39, + "grad_norm": 1.1395248174667358, + "learning_rate": 8.437889447236182e-06, + "loss": 0.0059, + "step": 16050 + }, + { + "epoch": 5.4, + "grad_norm": 1.049620270729065, + "learning_rate": 8.435376884422111e-06, + "loss": 0.0066, + "step": 16075 + }, + { + "epoch": 5.41, + "grad_norm": 1.377968668937683, + "learning_rate": 8.43286432160804e-06, + "loss": 0.0066, + "step": 16100 + }, + { + "epoch": 5.41, + "grad_norm": 1.0629204511642456, + "learning_rate": 8.430351758793972e-06, + "loss": 0.0058, + "step": 16125 + }, + { + "epoch": 5.42, + "grad_norm": 0.98250412940979, + "learning_rate": 8.4278391959799e-06, + "loss": 0.007, + "step": 16150 + }, + { + "epoch": 5.43, + "grad_norm": 1.1195024251937866, + "learning_rate": 8.42532663316583e-06, + "loss": 0.0065, + "step": 16175 + }, + { + "epoch": 5.44, + "grad_norm": 1.307854413986206, + "learning_rate": 8.42281407035176e-06, + "loss": 0.006, + "step": 16200 + }, + { + "epoch": 5.45, + "grad_norm": 1.235283613204956, + "learning_rate": 8.420301507537689e-06, + "loss": 0.006, + "step": 16225 + }, + { + "epoch": 5.46, + "grad_norm": 1.2871832847595215, + "learning_rate": 8.417788944723618e-06, + "loss": 0.0063, + "step": 16250 + }, + { + "epoch": 5.47, + "grad_norm": 2.0028076171875, + "learning_rate": 8.415276381909548e-06, + "loss": 0.0067, + "step": 16275 + }, + { + "epoch": 5.47, + "grad_norm": 1.1824889183044434, + "learning_rate": 8.412763819095479e-06, + "loss": 0.0068, + "step": 16300 + }, + { + "epoch": 5.48, + "grad_norm": 0.9897297024726868, + "learning_rate": 8.410251256281408e-06, + "loss": 0.0071, + "step": 16325 + }, + { + "epoch": 5.49, + "grad_norm": 0.870624303817749, + "learning_rate": 8.407738693467337e-06, + "loss": 0.0063, + "step": 16350 + }, + { + "epoch": 5.5, + "grad_norm": 1.4575295448303223, + "learning_rate": 8.405226130653267e-06, + "loss": 0.0065, + "step": 16375 + }, + { + "epoch": 5.51, + "grad_norm": 1.0074716806411743, + "learning_rate": 8.402713567839198e-06, + "loss": 0.0057, + "step": 16400 + }, + { + "epoch": 5.52, + "grad_norm": 0.6549272537231445, + "learning_rate": 8.400201005025125e-06, + "loss": 0.0066, + "step": 16425 + }, + { + "epoch": 5.52, + "grad_norm": 0.966979444026947, + "learning_rate": 8.397688442211056e-06, + "loss": 0.0051, + "step": 16450 + }, + { + "epoch": 5.53, + "grad_norm": 1.1880178451538086, + "learning_rate": 8.395175879396986e-06, + "loss": 0.0066, + "step": 16475 + }, + { + "epoch": 5.54, + "grad_norm": 1.6947048902511597, + "learning_rate": 8.392663316582915e-06, + "loss": 0.0069, + "step": 16500 + }, + { + "epoch": 5.55, + "grad_norm": 1.8184447288513184, + "learning_rate": 8.390150753768846e-06, + "loss": 0.0066, + "step": 16525 + }, + { + "epoch": 5.56, + "grad_norm": 1.0880745649337769, + "learning_rate": 8.387638190954774e-06, + "loss": 0.006, + "step": 16550 + }, + { + "epoch": 5.57, + "grad_norm": 1.1165281534194946, + "learning_rate": 8.385125628140705e-06, + "loss": 0.0064, + "step": 16575 + }, + { + "epoch": 5.57, + "grad_norm": 0.47200649976730347, + "learning_rate": 8.382613065326634e-06, + "loss": 0.0062, + "step": 16600 + }, + { + "epoch": 5.58, + "grad_norm": 1.070189118385315, + "learning_rate": 8.380100502512563e-06, + "loss": 0.0064, + "step": 16625 + }, + { + "epoch": 5.59, + "grad_norm": 1.1195865869522095, + "learning_rate": 8.377587939698493e-06, + "loss": 0.0076, + "step": 16650 + }, + { + "epoch": 5.6, + "grad_norm": 1.43657386302948, + "learning_rate": 8.375075376884424e-06, + "loss": 0.0066, + "step": 16675 + }, + { + "epoch": 5.61, + "grad_norm": 0.9581249356269836, + "learning_rate": 8.372562814070353e-06, + "loss": 0.0065, + "step": 16700 + }, + { + "epoch": 5.62, + "grad_norm": 1.2223182916641235, + "learning_rate": 8.370050251256282e-06, + "loss": 0.0077, + "step": 16725 + }, + { + "epoch": 5.62, + "grad_norm": 1.3056018352508545, + "learning_rate": 8.367537688442212e-06, + "loss": 0.0075, + "step": 16750 + }, + { + "epoch": 5.63, + "grad_norm": 1.2805579900741577, + "learning_rate": 8.365025125628141e-06, + "loss": 0.0061, + "step": 16775 + }, + { + "epoch": 5.64, + "grad_norm": 0.8457928895950317, + "learning_rate": 8.362512562814072e-06, + "loss": 0.0059, + "step": 16800 + }, + { + "epoch": 5.65, + "grad_norm": 0.5992647409439087, + "learning_rate": 8.36e-06, + "loss": 0.0068, + "step": 16825 + }, + { + "epoch": 5.66, + "grad_norm": 1.612676978111267, + "learning_rate": 8.35748743718593e-06, + "loss": 0.0063, + "step": 16850 + }, + { + "epoch": 5.67, + "grad_norm": 1.2854987382888794, + "learning_rate": 8.35497487437186e-06, + "loss": 0.0064, + "step": 16875 + }, + { + "epoch": 5.67, + "grad_norm": 1.0457987785339355, + "learning_rate": 8.35246231155779e-06, + "loss": 0.0066, + "step": 16900 + }, + { + "epoch": 5.68, + "grad_norm": 1.6373506784439087, + "learning_rate": 8.34994974874372e-06, + "loss": 0.007, + "step": 16925 + }, + { + "epoch": 5.69, + "grad_norm": 1.3122998476028442, + "learning_rate": 8.34743718592965e-06, + "loss": 0.0059, + "step": 16950 + }, + { + "epoch": 5.7, + "grad_norm": 1.0710303783416748, + "learning_rate": 8.344924623115579e-06, + "loss": 0.0058, + "step": 16975 + }, + { + "epoch": 5.71, + "grad_norm": 1.4472990036010742, + "learning_rate": 8.342412060301508e-06, + "loss": 0.007, + "step": 17000 + }, + { + "epoch": 5.71, + "eval_loss": 0.1318867951631546, + "eval_runtime": 1222.7002, + "eval_samples_per_second": 1.134, + "eval_steps_per_second": 1.134, + "eval_wer": 16.422390958855733, + "step": 17000 + }, + { + "epoch": 5.72, + "grad_norm": 1.2172038555145264, + "learning_rate": 8.339899497487438e-06, + "loss": 0.0076, + "step": 17025 + }, + { + "epoch": 5.73, + "grad_norm": 1.0388529300689697, + "learning_rate": 8.337386934673367e-06, + "loss": 0.0071, + "step": 17050 + }, + { + "epoch": 5.73, + "grad_norm": 1.059638261795044, + "learning_rate": 8.334874371859298e-06, + "loss": 0.0058, + "step": 17075 + }, + { + "epoch": 5.74, + "grad_norm": 1.357620358467102, + "learning_rate": 8.332361809045226e-06, + "loss": 0.0068, + "step": 17100 + }, + { + "epoch": 5.75, + "grad_norm": 1.1725753545761108, + "learning_rate": 8.329849246231157e-06, + "loss": 0.0057, + "step": 17125 + }, + { + "epoch": 5.76, + "grad_norm": 1.2212777137756348, + "learning_rate": 8.327336683417086e-06, + "loss": 0.0068, + "step": 17150 + }, + { + "epoch": 5.77, + "grad_norm": 1.659661054611206, + "learning_rate": 8.324824120603015e-06, + "loss": 0.0066, + "step": 17175 + }, + { + "epoch": 5.78, + "grad_norm": 0.9219920635223389, + "learning_rate": 8.322311557788946e-06, + "loss": 0.0086, + "step": 17200 + }, + { + "epoch": 5.78, + "grad_norm": 0.7135714888572693, + "learning_rate": 8.319798994974876e-06, + "loss": 0.006, + "step": 17225 + }, + { + "epoch": 5.79, + "grad_norm": 1.2897686958312988, + "learning_rate": 8.317286432160805e-06, + "loss": 0.0074, + "step": 17250 + }, + { + "epoch": 5.8, + "grad_norm": 0.8991630673408508, + "learning_rate": 8.314773869346734e-06, + "loss": 0.0061, + "step": 17275 + }, + { + "epoch": 5.81, + "grad_norm": 1.0983035564422607, + "learning_rate": 8.312261306532663e-06, + "loss": 0.0069, + "step": 17300 + }, + { + "epoch": 5.82, + "grad_norm": 1.2060651779174805, + "learning_rate": 8.309748743718595e-06, + "loss": 0.007, + "step": 17325 + }, + { + "epoch": 5.83, + "grad_norm": 1.2288886308670044, + "learning_rate": 8.307236180904524e-06, + "loss": 0.007, + "step": 17350 + }, + { + "epoch": 5.83, + "grad_norm": 0.6468270421028137, + "learning_rate": 8.304723618090453e-06, + "loss": 0.0069, + "step": 17375 + }, + { + "epoch": 5.84, + "grad_norm": 0.2994486093521118, + "learning_rate": 8.302211055276382e-06, + "loss": 0.0084, + "step": 17400 + }, + { + "epoch": 5.85, + "grad_norm": 1.6546576023101807, + "learning_rate": 8.299698492462312e-06, + "loss": 0.0071, + "step": 17425 + }, + { + "epoch": 5.86, + "grad_norm": 0.9513535499572754, + "learning_rate": 8.297185929648241e-06, + "loss": 0.007, + "step": 17450 + }, + { + "epoch": 5.87, + "grad_norm": 0.820580005645752, + "learning_rate": 8.294673366834172e-06, + "loss": 0.0066, + "step": 17475 + }, + { + "epoch": 5.88, + "grad_norm": 1.577744483947754, + "learning_rate": 8.292160804020101e-06, + "loss": 0.0064, + "step": 17500 + }, + { + "epoch": 5.88, + "grad_norm": 1.0302276611328125, + "learning_rate": 8.28964824120603e-06, + "loss": 0.0062, + "step": 17525 + }, + { + "epoch": 5.89, + "grad_norm": 0.9173238277435303, + "learning_rate": 8.287135678391962e-06, + "loss": 0.0065, + "step": 17550 + }, + { + "epoch": 5.9, + "grad_norm": 1.0657161474227905, + "learning_rate": 8.28462311557789e-06, + "loss": 0.0061, + "step": 17575 + }, + { + "epoch": 5.91, + "grad_norm": 1.6949005126953125, + "learning_rate": 8.28211055276382e-06, + "loss": 0.0057, + "step": 17600 + }, + { + "epoch": 5.92, + "grad_norm": 0.5395543575286865, + "learning_rate": 8.27959798994975e-06, + "loss": 0.0067, + "step": 17625 + }, + { + "epoch": 5.93, + "grad_norm": 1.3431799411773682, + "learning_rate": 8.277085427135679e-06, + "loss": 0.0069, + "step": 17650 + }, + { + "epoch": 5.94, + "grad_norm": 1.2795543670654297, + "learning_rate": 8.274572864321608e-06, + "loss": 0.0066, + "step": 17675 + }, + { + "epoch": 5.94, + "grad_norm": 0.5239821076393127, + "learning_rate": 8.272060301507538e-06, + "loss": 0.0067, + "step": 17700 + }, + { + "epoch": 5.95, + "grad_norm": 0.7477108836174011, + "learning_rate": 8.269547738693467e-06, + "loss": 0.007, + "step": 17725 + }, + { + "epoch": 5.96, + "grad_norm": 0.9321283102035522, + "learning_rate": 8.267035175879398e-06, + "loss": 0.0066, + "step": 17750 + }, + { + "epoch": 5.97, + "grad_norm": 1.6927801370620728, + "learning_rate": 8.264522613065327e-06, + "loss": 0.0055, + "step": 17775 + }, + { + "epoch": 5.98, + "grad_norm": 1.2552069425582886, + "learning_rate": 8.262010050251257e-06, + "loss": 0.0075, + "step": 17800 + }, + { + "epoch": 5.99, + "grad_norm": 1.3818002939224243, + "learning_rate": 8.259497487437188e-06, + "loss": 0.0063, + "step": 17825 + }, + { + "epoch": 5.99, + "grad_norm": 1.2107528448104858, + "learning_rate": 8.256984924623115e-06, + "loss": 0.0068, + "step": 17850 + }, + { + "epoch": 6.0, + "grad_norm": 0.36283719539642334, + "learning_rate": 8.254472361809046e-06, + "loss": 0.0081, + "step": 17875 + }, + { + "epoch": 6.01, + "grad_norm": 0.5087275505065918, + "learning_rate": 8.251959798994976e-06, + "loss": 0.0041, + "step": 17900 + }, + { + "epoch": 6.02, + "grad_norm": 0.6631815433502197, + "learning_rate": 8.249447236180905e-06, + "loss": 0.0036, + "step": 17925 + }, + { + "epoch": 6.03, + "grad_norm": 1.233224630355835, + "learning_rate": 8.246934673366836e-06, + "loss": 0.0032, + "step": 17950 + }, + { + "epoch": 6.04, + "grad_norm": 1.3717970848083496, + "learning_rate": 8.244422110552764e-06, + "loss": 0.0036, + "step": 17975 + }, + { + "epoch": 6.04, + "grad_norm": 0.5834024548530579, + "learning_rate": 8.241909547738695e-06, + "loss": 0.0038, + "step": 18000 + }, + { + "epoch": 6.04, + "eval_loss": 0.13147088885307312, + "eval_runtime": 1217.9277, + "eval_samples_per_second": 1.138, + "eval_steps_per_second": 1.138, + "eval_wer": 17.3053152039555, + "step": 18000 + }, + { + "epoch": 6.05, + "grad_norm": 1.0247628688812256, + "learning_rate": 8.239396984924624e-06, + "loss": 0.0042, + "step": 18025 + }, + { + "epoch": 6.06, + "grad_norm": 1.1065958738327026, + "learning_rate": 8.236884422110553e-06, + "loss": 0.0038, + "step": 18050 + }, + { + "epoch": 6.07, + "grad_norm": 0.9071959853172302, + "learning_rate": 8.234371859296483e-06, + "loss": 0.0041, + "step": 18075 + }, + { + "epoch": 6.08, + "grad_norm": 1.4560977220535278, + "learning_rate": 8.231859296482414e-06, + "loss": 0.0057, + "step": 18100 + }, + { + "epoch": 6.09, + "grad_norm": 1.2624982595443726, + "learning_rate": 8.229346733668341e-06, + "loss": 0.0039, + "step": 18125 + }, + { + "epoch": 6.09, + "grad_norm": 0.6906869411468506, + "learning_rate": 8.226834170854272e-06, + "loss": 0.004, + "step": 18150 + }, + { + "epoch": 6.1, + "grad_norm": 0.8660603165626526, + "learning_rate": 8.224321608040202e-06, + "loss": 0.0039, + "step": 18175 + }, + { + "epoch": 6.11, + "grad_norm": 1.06183922290802, + "learning_rate": 8.221809045226131e-06, + "loss": 0.0038, + "step": 18200 + }, + { + "epoch": 6.12, + "grad_norm": 1.0678050518035889, + "learning_rate": 8.219296482412062e-06, + "loss": 0.0046, + "step": 18225 + }, + { + "epoch": 6.13, + "grad_norm": 0.803714394569397, + "learning_rate": 8.21678391959799e-06, + "loss": 0.0043, + "step": 18250 + }, + { + "epoch": 6.14, + "grad_norm": 0.8352630138397217, + "learning_rate": 8.21427135678392e-06, + "loss": 0.0042, + "step": 18275 + }, + { + "epoch": 6.15, + "grad_norm": 0.5487251877784729, + "learning_rate": 8.21175879396985e-06, + "loss": 0.0045, + "step": 18300 + }, + { + "epoch": 6.15, + "grad_norm": 0.5100066065788269, + "learning_rate": 8.20924623115578e-06, + "loss": 0.0048, + "step": 18325 + }, + { + "epoch": 6.16, + "grad_norm": 0.8503884673118591, + "learning_rate": 8.206733668341709e-06, + "loss": 0.0038, + "step": 18350 + }, + { + "epoch": 6.17, + "grad_norm": 0.7612094283103943, + "learning_rate": 8.20422110552764e-06, + "loss": 0.0042, + "step": 18375 + }, + { + "epoch": 6.18, + "grad_norm": 0.7139018177986145, + "learning_rate": 8.201708542713569e-06, + "loss": 0.0034, + "step": 18400 + }, + { + "epoch": 6.19, + "grad_norm": 1.2460196018218994, + "learning_rate": 8.199195979899498e-06, + "loss": 0.0038, + "step": 18425 + }, + { + "epoch": 6.2, + "grad_norm": 1.2828315496444702, + "learning_rate": 8.196683417085428e-06, + "loss": 0.0039, + "step": 18450 + }, + { + "epoch": 6.2, + "grad_norm": 1.2708101272583008, + "learning_rate": 8.194170854271357e-06, + "loss": 0.0044, + "step": 18475 + }, + { + "epoch": 6.21, + "grad_norm": 0.8804450631141663, + "learning_rate": 8.191658291457288e-06, + "loss": 0.0042, + "step": 18500 + }, + { + "epoch": 6.22, + "grad_norm": 1.1122368574142456, + "learning_rate": 8.189145728643216e-06, + "loss": 0.0039, + "step": 18525 + }, + { + "epoch": 6.23, + "grad_norm": 1.0757254362106323, + "learning_rate": 8.186633165829147e-06, + "loss": 0.0032, + "step": 18550 + }, + { + "epoch": 6.24, + "grad_norm": 0.9536485075950623, + "learning_rate": 8.184120603015076e-06, + "loss": 0.0037, + "step": 18575 + }, + { + "epoch": 6.25, + "grad_norm": 0.35071513056755066, + "learning_rate": 8.181608040201005e-06, + "loss": 0.0054, + "step": 18600 + }, + { + "epoch": 6.25, + "grad_norm": 1.1988258361816406, + "learning_rate": 8.179095477386936e-06, + "loss": 0.0038, + "step": 18625 + }, + { + "epoch": 6.26, + "grad_norm": 0.8112755417823792, + "learning_rate": 8.176582914572866e-06, + "loss": 0.0042, + "step": 18650 + }, + { + "epoch": 6.27, + "grad_norm": 0.727368950843811, + "learning_rate": 8.174070351758795e-06, + "loss": 0.0034, + "step": 18675 + }, + { + "epoch": 6.28, + "grad_norm": 0.8512629270553589, + "learning_rate": 8.171557788944724e-06, + "loss": 0.0034, + "step": 18700 + }, + { + "epoch": 6.29, + "grad_norm": 0.7930422425270081, + "learning_rate": 8.169045226130654e-06, + "loss": 0.0053, + "step": 18725 + }, + { + "epoch": 6.3, + "grad_norm": 1.1358853578567505, + "learning_rate": 8.166532663316583e-06, + "loss": 0.0044, + "step": 18750 + }, + { + "epoch": 6.3, + "grad_norm": 0.7192019820213318, + "learning_rate": 8.164020100502514e-06, + "loss": 0.0039, + "step": 18775 + }, + { + "epoch": 6.31, + "grad_norm": 0.7389631271362305, + "learning_rate": 8.161507537688443e-06, + "loss": 0.0044, + "step": 18800 + }, + { + "epoch": 6.32, + "grad_norm": 0.9794608950614929, + "learning_rate": 8.158994974874373e-06, + "loss": 0.0041, + "step": 18825 + }, + { + "epoch": 6.33, + "grad_norm": 0.8795326948165894, + "learning_rate": 8.156482412060302e-06, + "loss": 0.0046, + "step": 18850 + }, + { + "epoch": 6.34, + "grad_norm": 0.7421190738677979, + "learning_rate": 8.153969849246231e-06, + "loss": 0.0045, + "step": 18875 + }, + { + "epoch": 6.35, + "grad_norm": 1.0794202089309692, + "learning_rate": 8.151457286432162e-06, + "loss": 0.0059, + "step": 18900 + }, + { + "epoch": 6.35, + "grad_norm": 1.1628130674362183, + "learning_rate": 8.148944723618092e-06, + "loss": 0.0056, + "step": 18925 + }, + { + "epoch": 6.36, + "grad_norm": 0.8464459776878357, + "learning_rate": 8.146432160804021e-06, + "loss": 0.0041, + "step": 18950 + }, + { + "epoch": 6.37, + "grad_norm": 1.1157279014587402, + "learning_rate": 8.14391959798995e-06, + "loss": 0.005, + "step": 18975 + }, + { + "epoch": 6.38, + "grad_norm": 0.9261577129364014, + "learning_rate": 8.14140703517588e-06, + "loss": 0.0041, + "step": 19000 + }, + { + "epoch": 6.38, + "eval_loss": 0.1382196992635727, + "eval_runtime": 1271.999, + "eval_samples_per_second": 1.09, + "eval_steps_per_second": 1.09, + "eval_wer": 16.519512625816706, + "step": 19000 + }, + { + "epoch": 6.39, + "grad_norm": 0.4045741558074951, + "learning_rate": 8.13889447236181e-06, + "loss": 0.0048, + "step": 19025 + }, + { + "epoch": 6.4, + "grad_norm": 1.5411362648010254, + "learning_rate": 8.13638190954774e-06, + "loss": 0.0041, + "step": 19050 + }, + { + "epoch": 6.41, + "grad_norm": 0.9811854362487793, + "learning_rate": 8.13386934673367e-06, + "loss": 0.0047, + "step": 19075 + }, + { + "epoch": 6.41, + "grad_norm": 1.2694803476333618, + "learning_rate": 8.131356783919598e-06, + "loss": 0.0046, + "step": 19100 + }, + { + "epoch": 6.42, + "grad_norm": 0.8116133809089661, + "learning_rate": 8.128844221105528e-06, + "loss": 0.0041, + "step": 19125 + }, + { + "epoch": 6.43, + "grad_norm": 0.9315871596336365, + "learning_rate": 8.126331658291457e-06, + "loss": 0.0045, + "step": 19150 + }, + { + "epoch": 6.44, + "grad_norm": 0.7914777994155884, + "learning_rate": 8.123819095477388e-06, + "loss": 0.0052, + "step": 19175 + }, + { + "epoch": 6.45, + "grad_norm": 1.344946026802063, + "learning_rate": 8.121306532663317e-06, + "loss": 0.0049, + "step": 19200 + }, + { + "epoch": 6.46, + "grad_norm": 1.9953036308288574, + "learning_rate": 8.118793969849247e-06, + "loss": 0.0048, + "step": 19225 + }, + { + "epoch": 6.46, + "grad_norm": 0.9706830978393555, + "learning_rate": 8.116281407035178e-06, + "loss": 0.0054, + "step": 19250 + }, + { + "epoch": 6.47, + "grad_norm": 1.2996464967727661, + "learning_rate": 8.113768844221105e-06, + "loss": 0.0052, + "step": 19275 + }, + { + "epoch": 6.48, + "grad_norm": 0.6263596415519714, + "learning_rate": 8.111356783919599e-06, + "loss": 0.0043, + "step": 19300 + }, + { + "epoch": 6.49, + "grad_norm": 0.8302187323570251, + "learning_rate": 8.108844221105528e-06, + "loss": 0.0048, + "step": 19325 + }, + { + "epoch": 6.5, + "grad_norm": 1.2249444723129272, + "learning_rate": 8.106331658291457e-06, + "loss": 0.0056, + "step": 19350 + }, + { + "epoch": 6.51, + "grad_norm": 1.5222480297088623, + "learning_rate": 8.103819095477388e-06, + "loss": 0.0052, + "step": 19375 + }, + { + "epoch": 6.51, + "grad_norm": 0.8501996397972107, + "learning_rate": 8.101306532663318e-06, + "loss": 0.0051, + "step": 19400 + }, + { + "epoch": 6.52, + "grad_norm": 1.1221652030944824, + "learning_rate": 8.098793969849247e-06, + "loss": 0.0044, + "step": 19425 + }, + { + "epoch": 6.53, + "grad_norm": 1.4059791564941406, + "learning_rate": 8.096281407035176e-06, + "loss": 0.0055, + "step": 19450 + }, + { + "epoch": 6.54, + "grad_norm": 1.1946256160736084, + "learning_rate": 8.093768844221106e-06, + "loss": 0.0045, + "step": 19475 + }, + { + "epoch": 6.55, + "grad_norm": 1.4187369346618652, + "learning_rate": 8.091256281407037e-06, + "loss": 0.0045, + "step": 19500 + }, + { + "epoch": 6.56, + "grad_norm": 1.309688925743103, + "learning_rate": 8.088743718592966e-06, + "loss": 0.0049, + "step": 19525 + }, + { + "epoch": 6.56, + "grad_norm": 1.3272817134857178, + "learning_rate": 8.086231155778895e-06, + "loss": 0.0056, + "step": 19550 + }, + { + "epoch": 6.57, + "grad_norm": 0.8634605407714844, + "learning_rate": 8.083718592964825e-06, + "loss": 0.0052, + "step": 19575 + }, + { + "epoch": 6.58, + "grad_norm": 0.6873275637626648, + "learning_rate": 8.081206030150754e-06, + "loss": 0.0041, + "step": 19600 + }, + { + "epoch": 6.59, + "grad_norm": 1.2435672283172607, + "learning_rate": 8.078693467336685e-06, + "loss": 0.0055, + "step": 19625 + }, + { + "epoch": 6.6, + "grad_norm": 0.8391593098640442, + "learning_rate": 8.076180904522614e-06, + "loss": 0.0052, + "step": 19650 + }, + { + "epoch": 6.61, + "grad_norm": 0.7029734253883362, + "learning_rate": 8.073668341708544e-06, + "loss": 0.0036, + "step": 19675 + }, + { + "epoch": 6.62, + "grad_norm": 1.28307044506073, + "learning_rate": 8.071155778894473e-06, + "loss": 0.0045, + "step": 19700 + }, + { + "epoch": 6.62, + "grad_norm": 1.5007773637771606, + "learning_rate": 8.068643216080402e-06, + "loss": 0.0046, + "step": 19725 + }, + { + "epoch": 6.63, + "grad_norm": 1.3657234907150269, + "learning_rate": 8.066130653266332e-06, + "loss": 0.0054, + "step": 19750 + }, + { + "epoch": 6.64, + "grad_norm": 1.3915965557098389, + "learning_rate": 8.063618090452263e-06, + "loss": 0.0053, + "step": 19775 + }, + { + "epoch": 6.65, + "grad_norm": 0.5053869485855103, + "learning_rate": 8.061105527638192e-06, + "loss": 0.005, + "step": 19800 + }, + { + "epoch": 6.66, + "grad_norm": 1.4511886835098267, + "learning_rate": 8.058592964824121e-06, + "loss": 0.0057, + "step": 19825 + }, + { + "epoch": 6.67, + "grad_norm": 1.0388401746749878, + "learning_rate": 8.05608040201005e-06, + "loss": 0.0049, + "step": 19850 + }, + { + "epoch": 6.67, + "grad_norm": 0.4926684498786926, + "learning_rate": 8.05356783919598e-06, + "loss": 0.0052, + "step": 19875 + }, + { + "epoch": 6.68, + "grad_norm": 1.7179065942764282, + "learning_rate": 8.051055276381911e-06, + "loss": 0.0055, + "step": 19900 + }, + { + "epoch": 6.69, + "grad_norm": 1.3244882822036743, + "learning_rate": 8.04854271356784e-06, + "loss": 0.0044, + "step": 19925 + }, + { + "epoch": 6.7, + "grad_norm": 0.9854592680931091, + "learning_rate": 8.04603015075377e-06, + "loss": 0.0042, + "step": 19950 + }, + { + "epoch": 6.71, + "grad_norm": 1.3770145177841187, + "learning_rate": 8.043517587939699e-06, + "loss": 0.0049, + "step": 19975 + }, + { + "epoch": 6.72, + "grad_norm": 0.9621798396110535, + "learning_rate": 8.041005025125628e-06, + "loss": 0.0055, + "step": 20000 + }, + { + "epoch": 6.72, + "eval_loss": 0.1406363546848297, + "eval_runtime": 1258.0014, + "eval_samples_per_second": 1.102, + "eval_steps_per_second": 1.102, + "eval_wer": 16.57248808052269, + "step": 20000 + }, + { + "epoch": 6.72, + "grad_norm": 1.568862795829773, + "learning_rate": 8.03849246231156e-06, + "loss": 0.0052, + "step": 20025 + }, + { + "epoch": 6.73, + "grad_norm": 1.4103065729141235, + "learning_rate": 8.035979899497489e-06, + "loss": 0.0045, + "step": 20050 + }, + { + "epoch": 6.74, + "grad_norm": 1.0747122764587402, + "learning_rate": 8.033467336683418e-06, + "loss": 0.0057, + "step": 20075 + }, + { + "epoch": 6.75, + "grad_norm": 1.3271406888961792, + "learning_rate": 8.030954773869347e-06, + "loss": 0.0057, + "step": 20100 + }, + { + "epoch": 6.76, + "grad_norm": 0.7828665375709534, + "learning_rate": 8.028442211055277e-06, + "loss": 0.005, + "step": 20125 + }, + { + "epoch": 6.77, + "grad_norm": 1.747380256652832, + "learning_rate": 8.025929648241206e-06, + "loss": 0.0047, + "step": 20150 + }, + { + "epoch": 6.77, + "grad_norm": 1.1636756658554077, + "learning_rate": 8.023417085427137e-06, + "loss": 0.0048, + "step": 20175 + }, + { + "epoch": 6.78, + "grad_norm": 1.028845191001892, + "learning_rate": 8.020904522613066e-06, + "loss": 0.0052, + "step": 20200 + }, + { + "epoch": 6.79, + "grad_norm": 0.8782851696014404, + "learning_rate": 8.018391959798996e-06, + "loss": 0.0045, + "step": 20225 + }, + { + "epoch": 6.8, + "grad_norm": 1.4748945236206055, + "learning_rate": 8.015879396984927e-06, + "loss": 0.0045, + "step": 20250 + }, + { + "epoch": 6.81, + "grad_norm": 0.8660835027694702, + "learning_rate": 8.013366834170854e-06, + "loss": 0.004, + "step": 20275 + }, + { + "epoch": 6.82, + "grad_norm": 1.0635696649551392, + "learning_rate": 8.010854271356785e-06, + "loss": 0.0051, + "step": 20300 + }, + { + "epoch": 6.83, + "grad_norm": 1.0971332788467407, + "learning_rate": 8.008341708542714e-06, + "loss": 0.0048, + "step": 20325 + }, + { + "epoch": 6.83, + "grad_norm": 1.1037646532058716, + "learning_rate": 8.005829145728644e-06, + "loss": 0.0053, + "step": 20350 + }, + { + "epoch": 6.84, + "grad_norm": 1.4992175102233887, + "learning_rate": 8.003316582914573e-06, + "loss": 0.0059, + "step": 20375 + }, + { + "epoch": 6.85, + "grad_norm": 0.9717215895652771, + "learning_rate": 8.000804020100502e-06, + "loss": 0.0045, + "step": 20400 + }, + { + "epoch": 6.86, + "grad_norm": 0.8256064653396606, + "learning_rate": 7.998291457286432e-06, + "loss": 0.0048, + "step": 20425 + }, + { + "epoch": 6.87, + "grad_norm": 0.8844844698905945, + "learning_rate": 7.995778894472363e-06, + "loss": 0.0051, + "step": 20450 + }, + { + "epoch": 6.88, + "grad_norm": 0.9884155988693237, + "learning_rate": 7.993266331658292e-06, + "loss": 0.0053, + "step": 20475 + }, + { + "epoch": 6.88, + "grad_norm": 0.9467431306838989, + "learning_rate": 7.990753768844221e-06, + "loss": 0.0045, + "step": 20500 + }, + { + "epoch": 6.89, + "grad_norm": 0.6817843914031982, + "learning_rate": 7.988241206030152e-06, + "loss": 0.0052, + "step": 20525 + }, + { + "epoch": 6.9, + "grad_norm": 1.2929531335830688, + "learning_rate": 7.98572864321608e-06, + "loss": 0.0048, + "step": 20550 + }, + { + "epoch": 6.91, + "grad_norm": 1.4893114566802979, + "learning_rate": 7.983216080402011e-06, + "loss": 0.0045, + "step": 20575 + }, + { + "epoch": 6.92, + "grad_norm": 1.5255932807922363, + "learning_rate": 7.98070351758794e-06, + "loss": 0.0059, + "step": 20600 + }, + { + "epoch": 6.93, + "grad_norm": 1.9872088432312012, + "learning_rate": 7.97819095477387e-06, + "loss": 0.0058, + "step": 20625 + }, + { + "epoch": 6.93, + "grad_norm": 2.448641061782837, + "learning_rate": 7.975678391959799e-06, + "loss": 0.0058, + "step": 20650 + }, + { + "epoch": 6.94, + "grad_norm": 1.0486491918563843, + "learning_rate": 7.973165829145728e-06, + "loss": 0.0059, + "step": 20675 + }, + { + "epoch": 6.95, + "grad_norm": 1.1047635078430176, + "learning_rate": 7.97065326633166e-06, + "loss": 0.0052, + "step": 20700 + }, + { + "epoch": 6.96, + "grad_norm": 0.8545504808425903, + "learning_rate": 7.968140703517589e-06, + "loss": 0.0063, + "step": 20725 + }, + { + "epoch": 6.97, + "grad_norm": 1.2570091485977173, + "learning_rate": 7.965628140703518e-06, + "loss": 0.0055, + "step": 20750 + }, + { + "epoch": 6.98, + "grad_norm": 0.5869073867797852, + "learning_rate": 7.963115577889447e-06, + "loss": 0.0047, + "step": 20775 + }, + { + "epoch": 6.98, + "grad_norm": 1.3176310062408447, + "learning_rate": 7.960603015075378e-06, + "loss": 0.0048, + "step": 20800 + }, + { + "epoch": 6.99, + "grad_norm": 1.83822500705719, + "learning_rate": 7.958090452261306e-06, + "loss": 0.0049, + "step": 20825 + }, + { + "epoch": 7.0, + "grad_norm": 1.317239761352539, + "learning_rate": 7.955577889447237e-06, + "loss": 0.0051, + "step": 20850 + }, + { + "epoch": 7.01, + "grad_norm": 0.48859530687332153, + "learning_rate": 7.953065326633166e-06, + "loss": 0.0032, + "step": 20875 + }, + { + "epoch": 7.02, + "grad_norm": 0.9015602469444275, + "learning_rate": 7.950552763819096e-06, + "loss": 0.0028, + "step": 20900 + }, + { + "epoch": 7.03, + "grad_norm": 0.5388469099998474, + "learning_rate": 7.948040201005027e-06, + "loss": 0.0027, + "step": 20925 + }, + { + "epoch": 7.03, + "grad_norm": 0.7052870988845825, + "learning_rate": 7.945527638190954e-06, + "loss": 0.0033, + "step": 20950 + }, + { + "epoch": 7.04, + "grad_norm": 0.5997990369796753, + "learning_rate": 7.943015075376885e-06, + "loss": 0.003, + "step": 20975 + }, + { + "epoch": 7.05, + "grad_norm": 1.3913544416427612, + "learning_rate": 7.940502512562815e-06, + "loss": 0.0026, + "step": 21000 + }, + { + "epoch": 7.05, + "eval_loss": 0.14443615078926086, + "eval_runtime": 1247.1814, + "eval_samples_per_second": 1.111, + "eval_steps_per_second": 1.111, + "eval_wer": 17.18170580964153, + "step": 21000 + }, + { + "epoch": 7.06, + "grad_norm": 0.8821436762809753, + "learning_rate": 7.937989949748744e-06, + "loss": 0.003, + "step": 21025 + }, + { + "epoch": 7.07, + "grad_norm": 0.8566175699234009, + "learning_rate": 7.935477386934673e-06, + "loss": 0.0027, + "step": 21050 + }, + { + "epoch": 7.08, + "grad_norm": 1.187877893447876, + "learning_rate": 7.932964824120604e-06, + "loss": 0.0041, + "step": 21075 + }, + { + "epoch": 7.09, + "grad_norm": 0.8279963731765747, + "learning_rate": 7.930452261306534e-06, + "loss": 0.0036, + "step": 21100 + }, + { + "epoch": 7.09, + "grad_norm": 1.1592994928359985, + "learning_rate": 7.927939698492463e-06, + "loss": 0.0035, + "step": 21125 + }, + { + "epoch": 7.1, + "grad_norm": 1.148371934890747, + "learning_rate": 7.925527638190955e-06, + "loss": 0.0028, + "step": 21150 + }, + { + "epoch": 7.11, + "grad_norm": 0.9421920776367188, + "learning_rate": 7.923015075376886e-06, + "loss": 0.0027, + "step": 21175 + }, + { + "epoch": 7.12, + "grad_norm": 1.0658944845199585, + "learning_rate": 7.920502512562815e-06, + "loss": 0.0026, + "step": 21200 + }, + { + "epoch": 7.13, + "grad_norm": 0.9727050065994263, + "learning_rate": 7.917989949748744e-06, + "loss": 0.0038, + "step": 21225 + }, + { + "epoch": 7.14, + "grad_norm": 0.7338525652885437, + "learning_rate": 7.915477386934674e-06, + "loss": 0.003, + "step": 21250 + }, + { + "epoch": 7.14, + "grad_norm": 0.3680053949356079, + "learning_rate": 7.912964824120603e-06, + "loss": 0.003, + "step": 21275 + }, + { + "epoch": 7.15, + "grad_norm": 1.0262119770050049, + "learning_rate": 7.910452261306534e-06, + "loss": 0.0035, + "step": 21300 + }, + { + "epoch": 7.16, + "grad_norm": 1.3404074907302856, + "learning_rate": 7.907939698492463e-06, + "loss": 0.0036, + "step": 21325 + }, + { + "epoch": 7.17, + "grad_norm": 0.40326249599456787, + "learning_rate": 7.905427135678393e-06, + "loss": 0.0031, + "step": 21350 + }, + { + "epoch": 7.18, + "grad_norm": 1.1811436414718628, + "learning_rate": 7.902914572864322e-06, + "loss": 0.0032, + "step": 21375 + }, + { + "epoch": 7.19, + "grad_norm": 0.5947225689888, + "learning_rate": 7.900402010050253e-06, + "loss": 0.0032, + "step": 21400 + }, + { + "epoch": 7.19, + "grad_norm": 1.2388511896133423, + "learning_rate": 7.89788944723618e-06, + "loss": 0.0032, + "step": 21425 + }, + { + "epoch": 7.2, + "grad_norm": 1.096849799156189, + "learning_rate": 7.895376884422111e-06, + "loss": 0.0036, + "step": 21450 + }, + { + "epoch": 7.21, + "grad_norm": 0.43637222051620483, + "learning_rate": 7.89286432160804e-06, + "loss": 0.003, + "step": 21475 + }, + { + "epoch": 7.22, + "grad_norm": 1.4117275476455688, + "learning_rate": 7.89035175879397e-06, + "loss": 0.0034, + "step": 21500 + }, + { + "epoch": 7.23, + "grad_norm": 1.1322073936462402, + "learning_rate": 7.887839195979901e-06, + "loss": 0.0038, + "step": 21525 + }, + { + "epoch": 7.24, + "grad_norm": 1.4066225290298462, + "learning_rate": 7.885326633165829e-06, + "loss": 0.0029, + "step": 21550 + }, + { + "epoch": 7.24, + "grad_norm": 0.9453209042549133, + "learning_rate": 7.88281407035176e-06, + "loss": 0.0039, + "step": 21575 + }, + { + "epoch": 7.25, + "grad_norm": 0.659117579460144, + "learning_rate": 7.880301507537689e-06, + "loss": 0.0041, + "step": 21600 + }, + { + "epoch": 7.26, + "grad_norm": 0.6344634294509888, + "learning_rate": 7.877788944723618e-06, + "loss": 0.0038, + "step": 21625 + }, + { + "epoch": 7.27, + "grad_norm": 1.1060161590576172, + "learning_rate": 7.875276381909548e-06, + "loss": 0.0044, + "step": 21650 + }, + { + "epoch": 7.28, + "grad_norm": 0.9809552431106567, + "learning_rate": 7.872763819095479e-06, + "loss": 0.0032, + "step": 21675 + }, + { + "epoch": 7.29, + "grad_norm": 0.9834388494491577, + "learning_rate": 7.870251256281408e-06, + "loss": 0.004, + "step": 21700 + }, + { + "epoch": 7.3, + "grad_norm": 0.8758970499038696, + "learning_rate": 7.867738693467337e-06, + "loss": 0.0032, + "step": 21725 + }, + { + "epoch": 7.3, + "grad_norm": 1.1081793308258057, + "learning_rate": 7.865226130653267e-06, + "loss": 0.0037, + "step": 21750 + }, + { + "epoch": 7.31, + "grad_norm": 0.6024441123008728, + "learning_rate": 7.862713567839196e-06, + "loss": 0.0033, + "step": 21775 + }, + { + "epoch": 7.32, + "grad_norm": 1.2287644147872925, + "learning_rate": 7.860201005025127e-06, + "loss": 0.0042, + "step": 21800 + }, + { + "epoch": 7.33, + "grad_norm": 0.7304167747497559, + "learning_rate": 7.857688442211055e-06, + "loss": 0.0035, + "step": 21825 + }, + { + "epoch": 7.34, + "grad_norm": 0.8437144160270691, + "learning_rate": 7.855175879396986e-06, + "loss": 0.0035, + "step": 21850 + }, + { + "epoch": 7.35, + "grad_norm": 0.7386027574539185, + "learning_rate": 7.852663316582915e-06, + "loss": 0.0034, + "step": 21875 + }, + { + "epoch": 7.35, + "grad_norm": 0.45231881737709045, + "learning_rate": 7.850150753768844e-06, + "loss": 0.0031, + "step": 21900 + }, + { + "epoch": 7.36, + "grad_norm": 1.0200552940368652, + "learning_rate": 7.847638190954775e-06, + "loss": 0.0044, + "step": 21925 + }, + { + "epoch": 7.37, + "grad_norm": 1.8359675407409668, + "learning_rate": 7.845125628140705e-06, + "loss": 0.0032, + "step": 21950 + }, + { + "epoch": 7.38, + "grad_norm": 0.6993480324745178, + "learning_rate": 7.842613065326634e-06, + "loss": 0.0039, + "step": 21975 + }, + { + "epoch": 7.39, + "grad_norm": 1.5828216075897217, + "learning_rate": 7.840100502512563e-06, + "loss": 0.0042, + "step": 22000 + }, + { + "epoch": 7.39, + "eval_loss": 0.1452910304069519, + "eval_runtime": 1251.4589, + "eval_samples_per_second": 1.108, + "eval_steps_per_second": 1.108, + "eval_wer": 17.04926717287657, + "step": 22000 + }, + { + "epoch": 7.4, + "grad_norm": 0.6540045142173767, + "learning_rate": 7.837587939698493e-06, + "loss": 0.0041, + "step": 22025 + }, + { + "epoch": 7.4, + "grad_norm": 1.2413445711135864, + "learning_rate": 7.835075376884422e-06, + "loss": 0.0038, + "step": 22050 + }, + { + "epoch": 7.41, + "grad_norm": 1.149349331855774, + "learning_rate": 7.832562814070353e-06, + "loss": 0.0035, + "step": 22075 + }, + { + "epoch": 7.42, + "grad_norm": 1.0378621816635132, + "learning_rate": 7.830050251256282e-06, + "loss": 0.0036, + "step": 22100 + }, + { + "epoch": 7.43, + "grad_norm": 1.6311428546905518, + "learning_rate": 7.827537688442212e-06, + "loss": 0.0043, + "step": 22125 + }, + { + "epoch": 7.44, + "grad_norm": 0.681912899017334, + "learning_rate": 7.825025125628141e-06, + "loss": 0.0038, + "step": 22150 + }, + { + "epoch": 7.45, + "grad_norm": 1.1675896644592285, + "learning_rate": 7.82251256281407e-06, + "loss": 0.0043, + "step": 22175 + }, + { + "epoch": 7.45, + "grad_norm": 1.080066442489624, + "learning_rate": 7.820000000000001e-06, + "loss": 0.0045, + "step": 22200 + }, + { + "epoch": 7.46, + "grad_norm": 1.0546478033065796, + "learning_rate": 7.81748743718593e-06, + "loss": 0.0035, + "step": 22225 + }, + { + "epoch": 7.47, + "grad_norm": 0.8899776935577393, + "learning_rate": 7.81497487437186e-06, + "loss": 0.0035, + "step": 22250 + }, + { + "epoch": 7.48, + "grad_norm": 1.575193166732788, + "learning_rate": 7.81246231155779e-06, + "loss": 0.004, + "step": 22275 + }, + { + "epoch": 7.49, + "grad_norm": 1.2534974813461304, + "learning_rate": 7.809949748743719e-06, + "loss": 0.0036, + "step": 22300 + }, + { + "epoch": 7.5, + "grad_norm": 1.5573606491088867, + "learning_rate": 7.80743718592965e-06, + "loss": 0.004, + "step": 22325 + }, + { + "epoch": 7.51, + "grad_norm": 0.940830409526825, + "learning_rate": 7.804924623115579e-06, + "loss": 0.0037, + "step": 22350 + }, + { + "epoch": 7.51, + "grad_norm": 0.7923431396484375, + "learning_rate": 7.802412060301508e-06, + "loss": 0.0037, + "step": 22375 + }, + { + "epoch": 7.52, + "grad_norm": 1.4266488552093506, + "learning_rate": 7.799899497487438e-06, + "loss": 0.0041, + "step": 22400 + }, + { + "epoch": 7.53, + "grad_norm": 1.4400182962417603, + "learning_rate": 7.797386934673367e-06, + "loss": 0.0036, + "step": 22425 + }, + { + "epoch": 7.54, + "grad_norm": 1.180802345275879, + "learning_rate": 7.794874371859296e-06, + "loss": 0.0044, + "step": 22450 + }, + { + "epoch": 7.55, + "grad_norm": 1.069970965385437, + "learning_rate": 7.792361809045227e-06, + "loss": 0.004, + "step": 22475 + }, + { + "epoch": 7.56, + "grad_norm": 0.869617760181427, + "learning_rate": 7.789849246231157e-06, + "loss": 0.0044, + "step": 22500 + }, + { + "epoch": 7.56, + "grad_norm": 1.1012663841247559, + "learning_rate": 7.787336683417086e-06, + "loss": 0.0038, + "step": 22525 + }, + { + "epoch": 7.57, + "grad_norm": 0.8968724012374878, + "learning_rate": 7.784824120603017e-06, + "loss": 0.0047, + "step": 22550 + }, + { + "epoch": 7.58, + "grad_norm": 0.8427612781524658, + "learning_rate": 7.782311557788945e-06, + "loss": 0.0041, + "step": 22575 + }, + { + "epoch": 7.59, + "grad_norm": 1.2295962572097778, + "learning_rate": 7.779798994974876e-06, + "loss": 0.0044, + "step": 22600 + }, + { + "epoch": 7.6, + "grad_norm": 0.7206667065620422, + "learning_rate": 7.777286432160805e-06, + "loss": 0.0049, + "step": 22625 + }, + { + "epoch": 7.61, + "grad_norm": 1.3633325099945068, + "learning_rate": 7.774773869346734e-06, + "loss": 0.0036, + "step": 22650 + }, + { + "epoch": 7.61, + "grad_norm": 1.4048559665679932, + "learning_rate": 7.772261306532664e-06, + "loss": 0.0047, + "step": 22675 + }, + { + "epoch": 7.62, + "grad_norm": 1.9601320028305054, + "learning_rate": 7.769748743718593e-06, + "loss": 0.0056, + "step": 22700 + }, + { + "epoch": 7.63, + "grad_norm": 1.58735191822052, + "learning_rate": 7.767236180904522e-06, + "loss": 0.0035, + "step": 22725 + }, + { + "epoch": 7.64, + "grad_norm": 1.4717185497283936, + "learning_rate": 7.764723618090453e-06, + "loss": 0.004, + "step": 22750 + }, + { + "epoch": 7.65, + "grad_norm": 0.6318027973175049, + "learning_rate": 7.762211055276383e-06, + "loss": 0.0027, + "step": 22775 + }, + { + "epoch": 7.66, + "grad_norm": 1.1870336532592773, + "learning_rate": 7.759698492462312e-06, + "loss": 0.0042, + "step": 22800 + }, + { + "epoch": 7.66, + "grad_norm": 0.5323120355606079, + "learning_rate": 7.757185929648243e-06, + "loss": 0.0034, + "step": 22825 + }, + { + "epoch": 7.67, + "grad_norm": 0.9398724436759949, + "learning_rate": 7.75467336683417e-06, + "loss": 0.0039, + "step": 22850 + }, + { + "epoch": 7.68, + "grad_norm": 0.7132120728492737, + "learning_rate": 7.752160804020102e-06, + "loss": 0.0036, + "step": 22875 + }, + { + "epoch": 7.69, + "grad_norm": 0.36196961998939514, + "learning_rate": 7.749648241206031e-06, + "loss": 0.0032, + "step": 22900 + }, + { + "epoch": 7.7, + "grad_norm": 1.0564132928848267, + "learning_rate": 7.74713567839196e-06, + "loss": 0.0038, + "step": 22925 + }, + { + "epoch": 7.71, + "grad_norm": 0.754163920879364, + "learning_rate": 7.744623115577891e-06, + "loss": 0.0038, + "step": 22950 + }, + { + "epoch": 7.71, + "grad_norm": 0.5302484035491943, + "learning_rate": 7.742110552763819e-06, + "loss": 0.0036, + "step": 22975 + }, + { + "epoch": 7.72, + "grad_norm": 1.1171857118606567, + "learning_rate": 7.73959798994975e-06, + "loss": 0.0038, + "step": 23000 + }, + { + "epoch": 7.72, + "eval_loss": 0.1452731043100357, + "eval_runtime": 1235.6895, + "eval_samples_per_second": 1.122, + "eval_steps_per_second": 1.122, + "eval_wer": 17.11990111248455, + "step": 23000 + }, + { + "epoch": 7.73, + "grad_norm": 1.1636919975280762, + "learning_rate": 7.73708542713568e-06, + "loss": 0.0035, + "step": 23025 + }, + { + "epoch": 7.74, + "grad_norm": 0.8591735363006592, + "learning_rate": 7.734572864321609e-06, + "loss": 0.0037, + "step": 23050 + }, + { + "epoch": 7.75, + "grad_norm": 1.977515697479248, + "learning_rate": 7.732060301507538e-06, + "loss": 0.0038, + "step": 23075 + }, + { + "epoch": 7.76, + "grad_norm": 1.2409968376159668, + "learning_rate": 7.729547738693469e-06, + "loss": 0.004, + "step": 23100 + }, + { + "epoch": 7.77, + "grad_norm": 0.8031742572784424, + "learning_rate": 7.727035175879396e-06, + "loss": 0.0035, + "step": 23125 + }, + { + "epoch": 7.77, + "grad_norm": 1.029489517211914, + "learning_rate": 7.724522613065328e-06, + "loss": 0.0042, + "step": 23150 + }, + { + "epoch": 7.78, + "grad_norm": 0.9670629501342773, + "learning_rate": 7.722010050251257e-06, + "loss": 0.0041, + "step": 23175 + }, + { + "epoch": 7.79, + "grad_norm": 0.9877786040306091, + "learning_rate": 7.719497487437186e-06, + "loss": 0.0044, + "step": 23200 + }, + { + "epoch": 7.8, + "grad_norm": 1.7212706804275513, + "learning_rate": 7.716984924623117e-06, + "loss": 0.0031, + "step": 23225 + }, + { + "epoch": 7.81, + "grad_norm": 1.2006925344467163, + "learning_rate": 7.714472361809045e-06, + "loss": 0.0037, + "step": 23250 + }, + { + "epoch": 7.82, + "grad_norm": 0.9345006942749023, + "learning_rate": 7.711959798994976e-06, + "loss": 0.0041, + "step": 23275 + }, + { + "epoch": 7.82, + "grad_norm": 0.3879813253879547, + "learning_rate": 7.709447236180905e-06, + "loss": 0.004, + "step": 23300 + }, + { + "epoch": 7.83, + "grad_norm": 1.2130013704299927, + "learning_rate": 7.706934673366834e-06, + "loss": 0.0043, + "step": 23325 + }, + { + "epoch": 7.84, + "grad_norm": 0.836334764957428, + "learning_rate": 7.704422110552764e-06, + "loss": 0.0044, + "step": 23350 + }, + { + "epoch": 7.85, + "grad_norm": 1.1955255270004272, + "learning_rate": 7.701909547738695e-06, + "loss": 0.004, + "step": 23375 + }, + { + "epoch": 7.86, + "grad_norm": 1.5131502151489258, + "learning_rate": 7.699396984924624e-06, + "loss": 0.0046, + "step": 23400 + }, + { + "epoch": 7.87, + "grad_norm": 1.6400395631790161, + "learning_rate": 7.696884422110553e-06, + "loss": 0.0037, + "step": 23425 + }, + { + "epoch": 7.87, + "grad_norm": 1.097846269607544, + "learning_rate": 7.694371859296483e-06, + "loss": 0.0045, + "step": 23450 + }, + { + "epoch": 7.88, + "grad_norm": 0.9199663996696472, + "learning_rate": 7.691859296482412e-06, + "loss": 0.004, + "step": 23475 + }, + { + "epoch": 7.89, + "grad_norm": 0.5069578886032104, + "learning_rate": 7.689346733668343e-06, + "loss": 0.0038, + "step": 23500 + }, + { + "epoch": 7.9, + "grad_norm": 0.5371918082237244, + "learning_rate": 7.68683417085427e-06, + "loss": 0.0028, + "step": 23525 + }, + { + "epoch": 7.91, + "grad_norm": 0.9709948301315308, + "learning_rate": 7.684321608040202e-06, + "loss": 0.0031, + "step": 23550 + }, + { + "epoch": 7.92, + "grad_norm": 0.4764575660228729, + "learning_rate": 7.681809045226131e-06, + "loss": 0.0029, + "step": 23575 + }, + { + "epoch": 7.92, + "grad_norm": 1.0294822454452515, + "learning_rate": 7.67929648241206e-06, + "loss": 0.0039, + "step": 23600 + }, + { + "epoch": 7.93, + "grad_norm": 1.5437979698181152, + "learning_rate": 7.676783919597991e-06, + "loss": 0.0042, + "step": 23625 + }, + { + "epoch": 7.94, + "grad_norm": 0.3096916377544403, + "learning_rate": 7.67427135678392e-06, + "loss": 0.0032, + "step": 23650 + }, + { + "epoch": 7.95, + "grad_norm": 1.1700612306594849, + "learning_rate": 7.67175879396985e-06, + "loss": 0.0036, + "step": 23675 + }, + { + "epoch": 7.96, + "grad_norm": 1.180234670639038, + "learning_rate": 7.66924623115578e-06, + "loss": 0.0045, + "step": 23700 + }, + { + "epoch": 7.97, + "grad_norm": 1.2827788591384888, + "learning_rate": 7.666733668341709e-06, + "loss": 0.0045, + "step": 23725 + }, + { + "epoch": 7.98, + "grad_norm": 1.2147058248519897, + "learning_rate": 7.664221105527638e-06, + "loss": 0.0042, + "step": 23750 + }, + { + "epoch": 7.98, + "grad_norm": 0.8624280095100403, + "learning_rate": 7.661708542713569e-06, + "loss": 0.0042, + "step": 23775 + }, + { + "epoch": 7.99, + "grad_norm": 1.502445101737976, + "learning_rate": 7.659195979899498e-06, + "loss": 0.005, + "step": 23800 + }, + { + "epoch": 8.0, + "grad_norm": 0.9702975749969482, + "learning_rate": 7.656683417085428e-06, + "loss": 0.004, + "step": 23825 + }, + { + "epoch": 8.01, + "grad_norm": 0.9540894627571106, + "learning_rate": 7.654170854271357e-06, + "loss": 0.002, + "step": 23850 + }, + { + "epoch": 8.02, + "grad_norm": 0.851203203201294, + "learning_rate": 7.651658291457286e-06, + "loss": 0.0025, + "step": 23875 + }, + { + "epoch": 8.03, + "grad_norm": 0.9900088906288147, + "learning_rate": 7.649145728643217e-06, + "loss": 0.0025, + "step": 23900 + }, + { + "epoch": 8.03, + "grad_norm": 0.22598521411418915, + "learning_rate": 7.646633165829147e-06, + "loss": 0.0022, + "step": 23925 + }, + { + "epoch": 8.04, + "grad_norm": 0.6535062193870544, + "learning_rate": 7.644120603015076e-06, + "loss": 0.0024, + "step": 23950 + }, + { + "epoch": 8.05, + "grad_norm": 0.3541930913925171, + "learning_rate": 7.641608040201005e-06, + "loss": 0.0026, + "step": 23975 + }, + { + "epoch": 8.06, + "grad_norm": 0.4876807928085327, + "learning_rate": 7.639095477386935e-06, + "loss": 0.0026, + "step": 24000 + }, + { + "epoch": 8.06, + "eval_loss": 0.14492394030094147, + "eval_runtime": 1240.2314, + "eval_samples_per_second": 1.118, + "eval_steps_per_second": 1.118, + "eval_wer": 16.678438989934662, + "step": 24000 + }, + { + "epoch": 8.07, + "grad_norm": 0.8399091362953186, + "learning_rate": 7.636582914572866e-06, + "loss": 0.0023, + "step": 24025 + }, + { + "epoch": 8.08, + "grad_norm": 1.0705106258392334, + "learning_rate": 7.634070351758795e-06, + "loss": 0.0021, + "step": 24050 + }, + { + "epoch": 8.08, + "grad_norm": 1.1488925218582153, + "learning_rate": 7.631557788944724e-06, + "loss": 0.0023, + "step": 24075 + }, + { + "epoch": 8.09, + "grad_norm": 0.5587943196296692, + "learning_rate": 7.629045226130654e-06, + "loss": 0.002, + "step": 24100 + }, + { + "epoch": 8.1, + "grad_norm": 0.2684304118156433, + "learning_rate": 7.626532663316584e-06, + "loss": 0.0024, + "step": 24125 + }, + { + "epoch": 8.11, + "grad_norm": 0.719109833240509, + "learning_rate": 7.624020100502513e-06, + "loss": 0.0027, + "step": 24150 + }, + { + "epoch": 8.12, + "grad_norm": 0.3437405526638031, + "learning_rate": 7.6215075376884425e-06, + "loss": 0.0023, + "step": 24175 + }, + { + "epoch": 8.13, + "grad_norm": 0.5939251780509949, + "learning_rate": 7.618994974874373e-06, + "loss": 0.0028, + "step": 24200 + }, + { + "epoch": 8.13, + "grad_norm": 1.4179428815841675, + "learning_rate": 7.616482412060302e-06, + "loss": 0.0027, + "step": 24225 + }, + { + "epoch": 8.14, + "grad_norm": 1.6048904657363892, + "learning_rate": 7.613969849246232e-06, + "loss": 0.003, + "step": 24250 + }, + { + "epoch": 8.15, + "grad_norm": 0.5035224556922913, + "learning_rate": 7.6114572864321615e-06, + "loss": 0.0021, + "step": 24275 + }, + { + "epoch": 8.16, + "grad_norm": 0.7912376523017883, + "learning_rate": 7.608944723618092e-06, + "loss": 0.0025, + "step": 24300 + }, + { + "epoch": 8.17, + "grad_norm": 0.7618343234062195, + "learning_rate": 7.60643216080402e-06, + "loss": 0.0024, + "step": 24325 + }, + { + "epoch": 8.18, + "grad_norm": 0.7294089794158936, + "learning_rate": 7.60391959798995e-06, + "loss": 0.0031, + "step": 24350 + }, + { + "epoch": 8.19, + "grad_norm": 0.8557175397872925, + "learning_rate": 7.60140703517588e-06, + "loss": 0.003, + "step": 24375 + }, + { + "epoch": 8.19, + "grad_norm": 1.634049415588379, + "learning_rate": 7.59889447236181e-06, + "loss": 0.0035, + "step": 24400 + }, + { + "epoch": 8.2, + "grad_norm": 0.8753783702850342, + "learning_rate": 7.59638190954774e-06, + "loss": 0.0031, + "step": 24425 + }, + { + "epoch": 8.21, + "grad_norm": 0.5376533269882202, + "learning_rate": 7.593869346733668e-06, + "loss": 0.0025, + "step": 24450 + }, + { + "epoch": 8.22, + "grad_norm": 0.8154192566871643, + "learning_rate": 7.591356783919599e-06, + "loss": 0.0027, + "step": 24475 + }, + { + "epoch": 8.23, + "grad_norm": 0.5541477799415588, + "learning_rate": 7.588844221105528e-06, + "loss": 0.0037, + "step": 24500 + }, + { + "epoch": 8.24, + "grad_norm": 0.940403938293457, + "learning_rate": 7.586331658291458e-06, + "loss": 0.0033, + "step": 24525 + }, + { + "epoch": 8.24, + "grad_norm": 0.9532537460327148, + "learning_rate": 7.583819095477387e-06, + "loss": 0.0033, + "step": 24550 + }, + { + "epoch": 8.25, + "grad_norm": 0.9647436141967773, + "learning_rate": 7.5813065326633176e-06, + "loss": 0.0034, + "step": 24575 + }, + { + "epoch": 8.26, + "grad_norm": 0.3858321011066437, + "learning_rate": 7.578793969849246e-06, + "loss": 0.003, + "step": 24600 + }, + { + "epoch": 8.27, + "grad_norm": 0.7824556827545166, + "learning_rate": 7.576281407035176e-06, + "loss": 0.0024, + "step": 24625 + }, + { + "epoch": 8.28, + "grad_norm": 0.9664915204048157, + "learning_rate": 7.573768844221106e-06, + "loss": 0.0031, + "step": 24650 + }, + { + "epoch": 8.29, + "grad_norm": 0.6527593731880188, + "learning_rate": 7.571256281407036e-06, + "loss": 0.0028, + "step": 24675 + }, + { + "epoch": 8.29, + "grad_norm": 0.548693060874939, + "learning_rate": 7.568743718592966e-06, + "loss": 0.0022, + "step": 24700 + }, + { + "epoch": 8.3, + "grad_norm": 1.194994568824768, + "learning_rate": 7.566231155778895e-06, + "loss": 0.0027, + "step": 24725 + }, + { + "epoch": 8.31, + "grad_norm": 0.548291802406311, + "learning_rate": 7.5637185929648245e-06, + "loss": 0.0029, + "step": 24750 + }, + { + "epoch": 8.32, + "grad_norm": 1.1913295984268188, + "learning_rate": 7.561206030150754e-06, + "loss": 0.003, + "step": 24775 + }, + { + "epoch": 8.33, + "grad_norm": 1.198673129081726, + "learning_rate": 7.558693467336684e-06, + "loss": 0.0031, + "step": 24800 + }, + { + "epoch": 8.34, + "grad_norm": 0.8129249811172485, + "learning_rate": 7.556180904522614e-06, + "loss": 0.0032, + "step": 24825 + }, + { + "epoch": 8.34, + "grad_norm": 0.5922027230262756, + "learning_rate": 7.5536683417085435e-06, + "loss": 0.0028, + "step": 24850 + }, + { + "epoch": 8.35, + "grad_norm": 1.4008985757827759, + "learning_rate": 7.551155778894474e-06, + "loss": 0.0028, + "step": 24875 + }, + { + "epoch": 8.36, + "grad_norm": 0.8061394691467285, + "learning_rate": 7.548643216080402e-06, + "loss": 0.0034, + "step": 24900 + }, + { + "epoch": 8.37, + "grad_norm": 1.0583748817443848, + "learning_rate": 7.546130653266332e-06, + "loss": 0.0032, + "step": 24925 + }, + { + "epoch": 8.38, + "grad_norm": 0.8661954998970032, + "learning_rate": 7.543618090452262e-06, + "loss": 0.0029, + "step": 24950 + }, + { + "epoch": 8.39, + "grad_norm": 1.3357280492782593, + "learning_rate": 7.541105527638192e-06, + "loss": 0.0033, + "step": 24975 + }, + { + "epoch": 8.39, + "grad_norm": 1.119999647140503, + "learning_rate": 7.538592964824121e-06, + "loss": 0.0026, + "step": 25000 + }, + { + "epoch": 8.39, + "eval_loss": 0.1473708599805832, + "eval_runtime": 1213.8769, + "eval_samples_per_second": 1.142, + "eval_steps_per_second": 1.142, + "eval_wer": 16.58131732297369, + "step": 25000 + }, + { + "epoch": 8.4, + "grad_norm": 1.4539777040481567, + "learning_rate": 7.5360804020100505e-06, + "loss": 0.0035, + "step": 25025 + }, + { + "epoch": 8.41, + "grad_norm": 1.3422738313674927, + "learning_rate": 7.533567839195981e-06, + "loss": 0.0034, + "step": 25050 + }, + { + "epoch": 8.42, + "grad_norm": 0.8910520076751709, + "learning_rate": 7.53105527638191e-06, + "loss": 0.0032, + "step": 25075 + }, + { + "epoch": 8.43, + "grad_norm": 0.8484017848968506, + "learning_rate": 7.52854271356784e-06, + "loss": 0.0029, + "step": 25100 + }, + { + "epoch": 8.44, + "grad_norm": 1.8000329732894897, + "learning_rate": 7.5260301507537695e-06, + "loss": 0.0033, + "step": 25125 + }, + { + "epoch": 8.45, + "grad_norm": 0.46434566378593445, + "learning_rate": 7.5235175879397e-06, + "loss": 0.0033, + "step": 25150 + }, + { + "epoch": 8.45, + "grad_norm": 0.8976455330848694, + "learning_rate": 7.521005025125628e-06, + "loss": 0.0031, + "step": 25175 + }, + { + "epoch": 8.46, + "grad_norm": 0.5379835367202759, + "learning_rate": 7.518492462311558e-06, + "loss": 0.003, + "step": 25200 + }, + { + "epoch": 8.47, + "grad_norm": 1.199825644493103, + "learning_rate": 7.515979899497488e-06, + "loss": 0.0034, + "step": 25225 + }, + { + "epoch": 8.48, + "grad_norm": 1.4240609407424927, + "learning_rate": 7.513467336683418e-06, + "loss": 0.0018, + "step": 25250 + }, + { + "epoch": 8.49, + "grad_norm": 1.0422276258468628, + "learning_rate": 7.510954773869348e-06, + "loss": 0.0032, + "step": 25275 + }, + { + "epoch": 8.5, + "grad_norm": 1.8243926763534546, + "learning_rate": 7.508442211055276e-06, + "loss": 0.0039, + "step": 25300 + }, + { + "epoch": 8.5, + "grad_norm": 0.997635006904602, + "learning_rate": 7.505929648241207e-06, + "loss": 0.0041, + "step": 25325 + }, + { + "epoch": 8.51, + "grad_norm": 1.3524260520935059, + "learning_rate": 7.503417085427136e-06, + "loss": 0.0035, + "step": 25350 + }, + { + "epoch": 8.52, + "grad_norm": 1.3868261575698853, + "learning_rate": 7.500904522613066e-06, + "loss": 0.0031, + "step": 25375 + }, + { + "epoch": 8.53, + "grad_norm": 0.4519669711589813, + "learning_rate": 7.498391959798995e-06, + "loss": 0.0032, + "step": 25400 + }, + { + "epoch": 8.54, + "grad_norm": 0.502500593662262, + "learning_rate": 7.4958793969849256e-06, + "loss": 0.0031, + "step": 25425 + }, + { + "epoch": 8.55, + "grad_norm": 0.5736572742462158, + "learning_rate": 7.493366834170856e-06, + "loss": 0.003, + "step": 25450 + }, + { + "epoch": 8.55, + "grad_norm": 0.6688116192817688, + "learning_rate": 7.490854271356784e-06, + "loss": 0.0028, + "step": 25475 + }, + { + "epoch": 8.56, + "grad_norm": 0.4909062385559082, + "learning_rate": 7.488341708542714e-06, + "loss": 0.0035, + "step": 25500 + }, + { + "epoch": 8.57, + "grad_norm": 0.47991904616355896, + "learning_rate": 7.485829145728644e-06, + "loss": 0.0024, + "step": 25525 + }, + { + "epoch": 8.58, + "grad_norm": 1.171148657798767, + "learning_rate": 7.483316582914574e-06, + "loss": 0.003, + "step": 25550 + }, + { + "epoch": 8.59, + "grad_norm": 0.966607928276062, + "learning_rate": 7.480804020100502e-06, + "loss": 0.0028, + "step": 25575 + }, + { + "epoch": 8.6, + "grad_norm": 1.0861948728561401, + "learning_rate": 7.4782914572864325e-06, + "loss": 0.0029, + "step": 25600 + }, + { + "epoch": 8.6, + "grad_norm": 1.2554888725280762, + "learning_rate": 7.475778894472362e-06, + "loss": 0.0033, + "step": 25625 + }, + { + "epoch": 8.61, + "grad_norm": 0.8138603568077087, + "learning_rate": 7.473266331658292e-06, + "loss": 0.0031, + "step": 25650 + }, + { + "epoch": 8.62, + "grad_norm": 0.4648093283176422, + "learning_rate": 7.470753768844222e-06, + "loss": 0.0034, + "step": 25675 + }, + { + "epoch": 8.63, + "grad_norm": 1.3727223873138428, + "learning_rate": 7.4682412060301515e-06, + "loss": 0.004, + "step": 25700 + }, + { + "epoch": 8.64, + "grad_norm": 0.6261406540870667, + "learning_rate": 7.465728643216082e-06, + "loss": 0.0026, + "step": 25725 + }, + { + "epoch": 8.65, + "grad_norm": 0.8047516942024231, + "learning_rate": 7.46321608040201e-06, + "loss": 0.003, + "step": 25750 + }, + { + "epoch": 8.66, + "grad_norm": 0.9954025149345398, + "learning_rate": 7.46070351758794e-06, + "loss": 0.0028, + "step": 25775 + }, + { + "epoch": 8.66, + "grad_norm": 0.788108766078949, + "learning_rate": 7.45819095477387e-06, + "loss": 0.0033, + "step": 25800 + }, + { + "epoch": 8.67, + "grad_norm": 0.958503246307373, + "learning_rate": 7.4556783919598e-06, + "loss": 0.003, + "step": 25825 + }, + { + "epoch": 8.68, + "grad_norm": 0.8485898971557617, + "learning_rate": 7.453165829145729e-06, + "loss": 0.0034, + "step": 25850 + }, + { + "epoch": 8.69, + "grad_norm": 1.0816404819488525, + "learning_rate": 7.4506532663316585e-06, + "loss": 0.0035, + "step": 25875 + }, + { + "epoch": 8.7, + "grad_norm": 1.1007174253463745, + "learning_rate": 7.448140703517589e-06, + "loss": 0.0034, + "step": 25900 + }, + { + "epoch": 8.71, + "grad_norm": 1.0120466947555542, + "learning_rate": 7.445628140703518e-06, + "loss": 0.0037, + "step": 25925 + }, + { + "epoch": 8.71, + "grad_norm": 0.3762897253036499, + "learning_rate": 7.443115577889448e-06, + "loss": 0.0025, + "step": 25950 + }, + { + "epoch": 8.72, + "grad_norm": 0.9834430813789368, + "learning_rate": 7.4406030150753775e-06, + "loss": 0.0035, + "step": 25975 + }, + { + "epoch": 8.73, + "grad_norm": 0.9426266551017761, + "learning_rate": 7.438090452261308e-06, + "loss": 0.0044, + "step": 26000 + }, + { + "epoch": 8.73, + "eval_loss": 0.1469849795103073, + "eval_runtime": 1217.4266, + "eval_samples_per_second": 1.138, + "eval_steps_per_second": 1.138, + "eval_wer": 16.987462475719582, + "step": 26000 + }, + { + "epoch": 8.74, + "grad_norm": 1.4924899339675903, + "learning_rate": 7.435577889447236e-06, + "loss": 0.0032, + "step": 26025 + }, + { + "epoch": 8.75, + "grad_norm": 1.1420689821243286, + "learning_rate": 7.433065326633166e-06, + "loss": 0.0038, + "step": 26050 + }, + { + "epoch": 8.76, + "grad_norm": 1.1251734495162964, + "learning_rate": 7.4305527638190964e-06, + "loss": 0.0031, + "step": 26075 + }, + { + "epoch": 8.76, + "grad_norm": 1.5867334604263306, + "learning_rate": 7.428040201005026e-06, + "loss": 0.0037, + "step": 26100 + }, + { + "epoch": 8.77, + "grad_norm": 0.3228246867656708, + "learning_rate": 7.425527638190956e-06, + "loss": 0.0027, + "step": 26125 + }, + { + "epoch": 8.78, + "grad_norm": 1.4515109062194824, + "learning_rate": 7.423015075376884e-06, + "loss": 0.0033, + "step": 26150 + }, + { + "epoch": 8.79, + "grad_norm": 1.7462551593780518, + "learning_rate": 7.420502512562815e-06, + "loss": 0.0034, + "step": 26175 + }, + { + "epoch": 8.8, + "grad_norm": 0.7006013989448547, + "learning_rate": 7.417989949748744e-06, + "loss": 0.0032, + "step": 26200 + }, + { + "epoch": 8.81, + "grad_norm": 0.8237917423248291, + "learning_rate": 7.415477386934674e-06, + "loss": 0.0036, + "step": 26225 + }, + { + "epoch": 8.81, + "grad_norm": 1.0685662031173706, + "learning_rate": 7.412964824120603e-06, + "loss": 0.0031, + "step": 26250 + }, + { + "epoch": 8.82, + "grad_norm": 1.713134527206421, + "learning_rate": 7.4104522613065336e-06, + "loss": 0.0045, + "step": 26275 + }, + { + "epoch": 8.83, + "grad_norm": 1.1465978622436523, + "learning_rate": 7.407939698492464e-06, + "loss": 0.0037, + "step": 26300 + }, + { + "epoch": 8.84, + "grad_norm": 0.950587272644043, + "learning_rate": 7.405427135678392e-06, + "loss": 0.0028, + "step": 26325 + }, + { + "epoch": 8.85, + "grad_norm": 0.767049252986908, + "learning_rate": 7.402914572864322e-06, + "loss": 0.0031, + "step": 26350 + }, + { + "epoch": 8.86, + "grad_norm": 1.0947566032409668, + "learning_rate": 7.400402010050252e-06, + "loss": 0.0033, + "step": 26375 + }, + { + "epoch": 8.87, + "grad_norm": 0.3729996979236603, + "learning_rate": 7.397889447236182e-06, + "loss": 0.0031, + "step": 26400 + }, + { + "epoch": 8.87, + "grad_norm": 0.5393033623695374, + "learning_rate": 7.39537688442211e-06, + "loss": 0.0036, + "step": 26425 + }, + { + "epoch": 8.88, + "grad_norm": 1.278348445892334, + "learning_rate": 7.3928643216080405e-06, + "loss": 0.0033, + "step": 26450 + }, + { + "epoch": 8.89, + "grad_norm": 3.021594762802124, + "learning_rate": 7.390452261306533e-06, + "loss": 0.0037, + "step": 26475 + }, + { + "epoch": 8.9, + "grad_norm": 1.9808621406555176, + "learning_rate": 7.387939698492463e-06, + "loss": 0.0032, + "step": 26500 + }, + { + "epoch": 8.91, + "grad_norm": 1.0880686044692993, + "learning_rate": 7.385427135678392e-06, + "loss": 0.0029, + "step": 26525 + }, + { + "epoch": 8.92, + "grad_norm": 0.9103013873100281, + "learning_rate": 7.382914572864323e-06, + "loss": 0.0036, + "step": 26550 + }, + { + "epoch": 8.92, + "grad_norm": 0.9491569995880127, + "learning_rate": 7.380402010050252e-06, + "loss": 0.0026, + "step": 26575 + }, + { + "epoch": 8.93, + "grad_norm": 0.6740662455558777, + "learning_rate": 7.377889447236182e-06, + "loss": 0.003, + "step": 26600 + }, + { + "epoch": 8.94, + "grad_norm": 0.46928125619888306, + "learning_rate": 7.3753768844221105e-06, + "loss": 0.0031, + "step": 26625 + }, + { + "epoch": 8.95, + "grad_norm": 1.0837490558624268, + "learning_rate": 7.372864321608041e-06, + "loss": 0.0037, + "step": 26650 + }, + { + "epoch": 8.96, + "grad_norm": 1.3564307689666748, + "learning_rate": 7.37035175879397e-06, + "loss": 0.0038, + "step": 26675 + }, + { + "epoch": 8.97, + "grad_norm": 0.7133674025535583, + "learning_rate": 7.3678391959799e-06, + "loss": 0.0032, + "step": 26700 + }, + { + "epoch": 8.97, + "grad_norm": 1.2626358270645142, + "learning_rate": 7.36532663316583e-06, + "loss": 0.0035, + "step": 26725 + }, + { + "epoch": 8.98, + "grad_norm": 1.3427034616470337, + "learning_rate": 7.362814070351759e-06, + "loss": 0.0029, + "step": 26750 + }, + { + "epoch": 8.99, + "grad_norm": 1.5758247375488281, + "learning_rate": 7.360301507537689e-06, + "loss": 0.0034, + "step": 26775 + }, + { + "epoch": 9.0, + "grad_norm": 1.1591298580169678, + "learning_rate": 7.357788944723618e-06, + "loss": 0.0028, + "step": 26800 + }, + { + "epoch": 9.01, + "grad_norm": 0.7788950204849243, + "learning_rate": 7.3552763819095485e-06, + "loss": 0.0022, + "step": 26825 + }, + { + "epoch": 9.02, + "grad_norm": 0.43883341550827026, + "learning_rate": 7.352763819095478e-06, + "loss": 0.0014, + "step": 26850 + }, + { + "epoch": 9.02, + "grad_norm": 1.1321437358856201, + "learning_rate": 7.350251256281408e-06, + "loss": 0.0021, + "step": 26875 + }, + { + "epoch": 9.03, + "grad_norm": 0.6570883989334106, + "learning_rate": 7.347738693467338e-06, + "loss": 0.0018, + "step": 26900 + }, + { + "epoch": 9.04, + "grad_norm": 0.4177149534225464, + "learning_rate": 7.345226130653267e-06, + "loss": 0.0019, + "step": 26925 + }, + { + "epoch": 9.05, + "grad_norm": 1.0608900785446167, + "learning_rate": 7.342713567839197e-06, + "loss": 0.0026, + "step": 26950 + }, + { + "epoch": 9.06, + "grad_norm": 1.3245147466659546, + "learning_rate": 7.340201005025126e-06, + "loss": 0.0022, + "step": 26975 + }, + { + "epoch": 9.07, + "grad_norm": 1.6233601570129395, + "learning_rate": 7.337688442211056e-06, + "loss": 0.0022, + "step": 27000 + }, + { + "epoch": 9.07, + "eval_loss": 0.1511046439409256, + "eval_runtime": 1219.6447, + "eval_samples_per_second": 1.136, + "eval_steps_per_second": 1.136, + "eval_wer": 16.96980399081759, + "step": 27000 + } + ], + "logging_steps": 25, + "max_steps": 100000, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 1000, + "total_flos": 2.4929622957883392e+20, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/whisper-small/marathi/checkpoint-27000/training_args.bin b/checkpoints/whisper-small/marathi/checkpoint-27000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e8b70dbc145f38b571ef80590648085f56ec7ab1 --- /dev/null +++ b/checkpoints/whisper-small/marathi/checkpoint-27000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:530123a9eead9bf66bfd2200307d18ea3260c5085e079a36b9431e817d1660c0 +size 4667 diff --git a/checkpoints/whisper-small/telugu/checkpoint-15000/config.json b/checkpoints/whisper-small/telugu/checkpoint-15000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..99279ebbf867e45e76fed66a7e74542c3680297f --- /dev/null +++ b/checkpoints/whisper-small/telugu/checkpoint-15000/config.json @@ -0,0 +1,152 @@ +{ + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50299 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50360, + 50361, + 50362 + ], + "torch_dtype": "float32", + "transformers_version": "4.39.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoints/whisper-small/telugu/checkpoint-15000/generation_config.json b/checkpoints/whisper-small/telugu/checkpoint-15000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7e9b1a3e3b5fb8d88730860d2b25f6cd310962c7 --- /dev/null +++ b/checkpoints/whisper-small/telugu/checkpoint-15000/generation_config.json @@ -0,0 +1,264 @@ +{ + "alignment_heads": [ + [ + 5, + 3 + ], + [ + 5, + 9 + ], + [ + 8, + 0 + ], + [ + 8, + 4 + ], + [ + 8, + 7 + ], + [ + 8, + 8 + ], + [ + 9, + 0 + ], + [ + 9, + 7 + ], + [ + 9, + 9 + ], + [ + 10, + 5 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.39.2" +} diff --git a/checkpoints/whisper-small/telugu/checkpoint-15000/model.safetensors b/checkpoints/whisper-small/telugu/checkpoint-15000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..183558c98ba63a13b450c241a0445514d3ae1c94 --- /dev/null +++ b/checkpoints/whisper-small/telugu/checkpoint-15000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11c1cc81c5e9ef4aa20ec6f53719182c09fbbff836f0f8d5d864e71323bbb770 +size 966995080 diff --git a/checkpoints/whisper-small/telugu/checkpoint-15000/optimizer.pt b/checkpoints/whisper-small/telugu/checkpoint-15000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c10984bd4c0fa5b55860f712fb4a6f806e07275d --- /dev/null +++ b/checkpoints/whisper-small/telugu/checkpoint-15000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2929310510a837022902c410501995ed3fc81a69634144b0423ba8b42b38521f +size 1925063607 diff --git a/checkpoints/whisper-small/telugu/checkpoint-15000/preprocessor_config.json b/checkpoints/whisper-small/telugu/checkpoint-15000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoints/whisper-small/telugu/checkpoint-15000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoints/whisper-small/telugu/checkpoint-15000/rng_state.pth b/checkpoints/whisper-small/telugu/checkpoint-15000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..6302a84b7b0b5ae431bbdfa6977bb2a22fe4beef --- /dev/null +++ b/checkpoints/whisper-small/telugu/checkpoint-15000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9d54ee2319da41ddef4044e5701f20f5505a127b57f1e6116855202c33fdeaf +size 14575 diff --git a/checkpoints/whisper-small/telugu/checkpoint-15000/scheduler.pt b/checkpoints/whisper-small/telugu/checkpoint-15000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7d37c3ac63af5080ec4146ece6e0c07b0544e114 --- /dev/null +++ b/checkpoints/whisper-small/telugu/checkpoint-15000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aed43d14cb2aef9875751e24a05d0321e5f21c7115fee27e01e99805122eb363 +size 627 diff --git a/checkpoints/whisper-small/telugu/checkpoint-15000/trainer_state.json b/checkpoints/whisper-small/telugu/checkpoint-15000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a2618d01a89b0395704c1b8f2da7d1a0c5b2d253 --- /dev/null +++ b/checkpoints/whisper-small/telugu/checkpoint-15000/trainer_state.json @@ -0,0 +1,4356 @@ +{ + "best_metric": 24.842326559215135, + "best_model_checkpoint": "results/whisper-small/telugu/checkpoint-5000", + "epoch": 5.0369375419744795, + "eval_steps": 1000, + "global_step": 15000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "grad_norm": 19.275182723999023, + "learning_rate": 4.4e-07, + "loss": 2.1442, + "step": 25 + }, + { + "epoch": 0.02, + "grad_norm": 7.7145867347717285, + "learning_rate": 9.400000000000001e-07, + "loss": 1.8764, + "step": 50 + }, + { + "epoch": 0.03, + "grad_norm": 7.178260326385498, + "learning_rate": 1.44e-06, + "loss": 1.6194, + "step": 75 + }, + { + "epoch": 0.03, + "grad_norm": 7.505893707275391, + "learning_rate": 1.94e-06, + "loss": 1.413, + "step": 100 + }, + { + "epoch": 0.04, + "grad_norm": 6.6697678565979, + "learning_rate": 2.4400000000000004e-06, + "loss": 1.0252, + "step": 125 + }, + { + "epoch": 0.05, + "grad_norm": 5.555347442626953, + "learning_rate": 2.9400000000000002e-06, + "loss": 0.6851, + "step": 150 + }, + { + "epoch": 0.06, + "grad_norm": 5.515409469604492, + "learning_rate": 3.44e-06, + "loss": 0.5317, + "step": 175 + }, + { + "epoch": 0.07, + "grad_norm": 5.558961868286133, + "learning_rate": 3.94e-06, + "loss": 0.4497, + "step": 200 + }, + { + "epoch": 0.08, + "grad_norm": 4.354099273681641, + "learning_rate": 4.440000000000001e-06, + "loss": 0.4085, + "step": 225 + }, + { + "epoch": 0.08, + "grad_norm": 4.59345817565918, + "learning_rate": 4.94e-06, + "loss": 0.3757, + "step": 250 + }, + { + "epoch": 0.09, + "grad_norm": 4.814145088195801, + "learning_rate": 5.4400000000000004e-06, + "loss": 0.3475, + "step": 275 + }, + { + "epoch": 0.1, + "grad_norm": 5.045070171356201, + "learning_rate": 5.94e-06, + "loss": 0.3183, + "step": 300 + }, + { + "epoch": 0.11, + "grad_norm": 4.037272930145264, + "learning_rate": 6.440000000000001e-06, + "loss": 0.3031, + "step": 325 + }, + { + "epoch": 0.12, + "grad_norm": 4.159480094909668, + "learning_rate": 6.9400000000000005e-06, + "loss": 0.2687, + "step": 350 + }, + { + "epoch": 0.13, + "grad_norm": 3.1533331871032715, + "learning_rate": 7.440000000000001e-06, + "loss": 0.2212, + "step": 375 + }, + { + "epoch": 0.13, + "grad_norm": 4.2195940017700195, + "learning_rate": 7.94e-06, + "loss": 0.219, + "step": 400 + }, + { + "epoch": 0.14, + "grad_norm": 3.113003730773926, + "learning_rate": 8.44e-06, + "loss": 0.207, + "step": 425 + }, + { + "epoch": 0.15, + "grad_norm": 3.3837645053863525, + "learning_rate": 8.94e-06, + "loss": 0.1986, + "step": 450 + }, + { + "epoch": 0.16, + "grad_norm": 3.3013455867767334, + "learning_rate": 9.440000000000001e-06, + "loss": 0.1938, + "step": 475 + }, + { + "epoch": 0.17, + "grad_norm": 3.8643412590026855, + "learning_rate": 9.940000000000001e-06, + "loss": 0.1817, + "step": 500 + }, + { + "epoch": 0.18, + "grad_norm": 4.444339752197266, + "learning_rate": 9.997788944723618e-06, + "loss": 0.1893, + "step": 525 + }, + { + "epoch": 0.18, + "grad_norm": 3.334296464920044, + "learning_rate": 9.99527638190955e-06, + "loss": 0.18, + "step": 550 + }, + { + "epoch": 0.19, + "grad_norm": 2.859130859375, + "learning_rate": 9.992763819095477e-06, + "loss": 0.1685, + "step": 575 + }, + { + "epoch": 0.2, + "grad_norm": 2.8547170162200928, + "learning_rate": 9.990251256281408e-06, + "loss": 0.1717, + "step": 600 + }, + { + "epoch": 0.21, + "grad_norm": 2.7878305912017822, + "learning_rate": 9.987738693467337e-06, + "loss": 0.1622, + "step": 625 + }, + { + "epoch": 0.22, + "grad_norm": 3.1621363162994385, + "learning_rate": 9.985226130653267e-06, + "loss": 0.1622, + "step": 650 + }, + { + "epoch": 0.23, + "grad_norm": 2.8213343620300293, + "learning_rate": 9.982713567839198e-06, + "loss": 0.1582, + "step": 675 + }, + { + "epoch": 0.24, + "grad_norm": 2.776296615600586, + "learning_rate": 9.980201005025127e-06, + "loss": 0.1518, + "step": 700 + }, + { + "epoch": 0.24, + "grad_norm": 2.0700817108154297, + "learning_rate": 9.977688442211056e-06, + "loss": 0.158, + "step": 725 + }, + { + "epoch": 0.25, + "grad_norm": 2.3245046138763428, + "learning_rate": 9.975175879396986e-06, + "loss": 0.1491, + "step": 750 + }, + { + "epoch": 0.26, + "grad_norm": 2.2235426902770996, + "learning_rate": 9.972663316582915e-06, + "loss": 0.1469, + "step": 775 + }, + { + "epoch": 0.27, + "grad_norm": 1.877974033355713, + "learning_rate": 9.970150753768844e-06, + "loss": 0.1496, + "step": 800 + }, + { + "epoch": 0.28, + "grad_norm": 2.1602160930633545, + "learning_rate": 9.967638190954775e-06, + "loss": 0.1408, + "step": 825 + }, + { + "epoch": 0.29, + "grad_norm": 2.3887486457824707, + "learning_rate": 9.965125628140703e-06, + "loss": 0.1468, + "step": 850 + }, + { + "epoch": 0.29, + "grad_norm": 2.392728328704834, + "learning_rate": 9.962613065326634e-06, + "loss": 0.1386, + "step": 875 + }, + { + "epoch": 0.3, + "grad_norm": 2.7344298362731934, + "learning_rate": 9.960100502512563e-06, + "loss": 0.1376, + "step": 900 + }, + { + "epoch": 0.31, + "grad_norm": 2.3548548221588135, + "learning_rate": 9.957587939698493e-06, + "loss": 0.1396, + "step": 925 + }, + { + "epoch": 0.32, + "grad_norm": 2.409946918487549, + "learning_rate": 9.955075376884424e-06, + "loss": 0.1391, + "step": 950 + }, + { + "epoch": 0.33, + "grad_norm": 2.83879017829895, + "learning_rate": 9.952562814070353e-06, + "loss": 0.1303, + "step": 975 + }, + { + "epoch": 0.34, + "grad_norm": 2.249248504638672, + "learning_rate": 9.950050251256282e-06, + "loss": 0.1321, + "step": 1000 + }, + { + "epoch": 0.34, + "eval_loss": 0.07621680200099945, + "eval_runtime": 2276.0275, + "eval_samples_per_second": 0.632, + "eval_steps_per_second": 0.632, + "eval_wer": 34.56552207428171, + "step": 1000 + }, + { + "epoch": 0.34, + "grad_norm": 2.2931532859802246, + "learning_rate": 9.947537688442212e-06, + "loss": 0.1285, + "step": 1025 + }, + { + "epoch": 0.35, + "grad_norm": 2.337979793548584, + "learning_rate": 9.945025125628141e-06, + "loss": 0.1299, + "step": 1050 + }, + { + "epoch": 0.36, + "grad_norm": 1.749815821647644, + "learning_rate": 9.94251256281407e-06, + "loss": 0.1202, + "step": 1075 + }, + { + "epoch": 0.37, + "grad_norm": 2.114003896713257, + "learning_rate": 9.940000000000001e-06, + "loss": 0.127, + "step": 1100 + }, + { + "epoch": 0.38, + "grad_norm": 2.3560590744018555, + "learning_rate": 9.93748743718593e-06, + "loss": 0.128, + "step": 1125 + }, + { + "epoch": 0.39, + "grad_norm": 2.498201847076416, + "learning_rate": 9.93497487437186e-06, + "loss": 0.1234, + "step": 1150 + }, + { + "epoch": 0.39, + "grad_norm": 2.401297092437744, + "learning_rate": 9.93246231155779e-06, + "loss": 0.1204, + "step": 1175 + }, + { + "epoch": 0.4, + "grad_norm": 1.7143532037734985, + "learning_rate": 9.929949748743719e-06, + "loss": 0.1235, + "step": 1200 + }, + { + "epoch": 0.41, + "grad_norm": 1.8615599870681763, + "learning_rate": 9.92743718592965e-06, + "loss": 0.1244, + "step": 1225 + }, + { + "epoch": 0.42, + "grad_norm": 2.2458105087280273, + "learning_rate": 9.924924623115579e-06, + "loss": 0.1224, + "step": 1250 + }, + { + "epoch": 0.43, + "grad_norm": 1.7599648237228394, + "learning_rate": 9.922412060301508e-06, + "loss": 0.1214, + "step": 1275 + }, + { + "epoch": 0.44, + "grad_norm": 1.9716145992279053, + "learning_rate": 9.91989949748744e-06, + "loss": 0.1196, + "step": 1300 + }, + { + "epoch": 0.44, + "grad_norm": 1.9741605520248413, + "learning_rate": 9.917386934673367e-06, + "loss": 0.1221, + "step": 1325 + }, + { + "epoch": 0.45, + "grad_norm": 2.42278790473938, + "learning_rate": 9.914874371859298e-06, + "loss": 0.1199, + "step": 1350 + }, + { + "epoch": 0.46, + "grad_norm": 1.792262077331543, + "learning_rate": 9.912361809045227e-06, + "loss": 0.1168, + "step": 1375 + }, + { + "epoch": 0.47, + "grad_norm": 2.1270103454589844, + "learning_rate": 9.909849246231157e-06, + "loss": 0.1175, + "step": 1400 + }, + { + "epoch": 0.48, + "grad_norm": 1.8129425048828125, + "learning_rate": 9.907336683417086e-06, + "loss": 0.1166, + "step": 1425 + }, + { + "epoch": 0.49, + "grad_norm": 1.9687169790267944, + "learning_rate": 9.904824120603015e-06, + "loss": 0.1156, + "step": 1450 + }, + { + "epoch": 0.5, + "grad_norm": 1.9565420150756836, + "learning_rate": 9.902311557788945e-06, + "loss": 0.1089, + "step": 1475 + }, + { + "epoch": 0.5, + "grad_norm": 2.1018614768981934, + "learning_rate": 9.899798994974876e-06, + "loss": 0.1082, + "step": 1500 + }, + { + "epoch": 0.51, + "grad_norm": 2.0656349658966064, + "learning_rate": 9.897286432160805e-06, + "loss": 0.1156, + "step": 1525 + }, + { + "epoch": 0.52, + "grad_norm": 1.8497759103775024, + "learning_rate": 9.894773869346734e-06, + "loss": 0.112, + "step": 1550 + }, + { + "epoch": 0.53, + "grad_norm": 1.5840171575546265, + "learning_rate": 9.892261306532665e-06, + "loss": 0.1134, + "step": 1575 + }, + { + "epoch": 0.54, + "grad_norm": 2.09623384475708, + "learning_rate": 9.889748743718593e-06, + "loss": 0.1099, + "step": 1600 + }, + { + "epoch": 0.55, + "grad_norm": 1.7288364171981812, + "learning_rate": 9.887236180904524e-06, + "loss": 0.11, + "step": 1625 + }, + { + "epoch": 0.55, + "grad_norm": 2.4955129623413086, + "learning_rate": 9.884723618090453e-06, + "loss": 0.1086, + "step": 1650 + }, + { + "epoch": 0.56, + "grad_norm": 2.2839179039001465, + "learning_rate": 9.882211055276383e-06, + "loss": 0.1049, + "step": 1675 + }, + { + "epoch": 0.57, + "grad_norm": 1.9825496673583984, + "learning_rate": 9.879698492462312e-06, + "loss": 0.1028, + "step": 1700 + }, + { + "epoch": 0.58, + "grad_norm": 2.0077452659606934, + "learning_rate": 9.877185929648241e-06, + "loss": 0.1038, + "step": 1725 + }, + { + "epoch": 0.59, + "grad_norm": 1.7164676189422607, + "learning_rate": 9.874673366834172e-06, + "loss": 0.111, + "step": 1750 + }, + { + "epoch": 0.6, + "grad_norm": 2.1434521675109863, + "learning_rate": 9.872160804020102e-06, + "loss": 0.1079, + "step": 1775 + }, + { + "epoch": 0.6, + "grad_norm": 1.460113763809204, + "learning_rate": 9.869648241206031e-06, + "loss": 0.1052, + "step": 1800 + }, + { + "epoch": 0.61, + "grad_norm": 1.7322907447814941, + "learning_rate": 9.86713567839196e-06, + "loss": 0.0987, + "step": 1825 + }, + { + "epoch": 0.62, + "grad_norm": 1.6619248390197754, + "learning_rate": 9.864623115577891e-06, + "loss": 0.1094, + "step": 1850 + }, + { + "epoch": 0.63, + "grad_norm": 1.9155193567276, + "learning_rate": 9.862110552763819e-06, + "loss": 0.1056, + "step": 1875 + }, + { + "epoch": 0.64, + "grad_norm": 2.330946922302246, + "learning_rate": 9.85959798994975e-06, + "loss": 0.1065, + "step": 1900 + }, + { + "epoch": 0.65, + "grad_norm": 1.804771065711975, + "learning_rate": 9.85708542713568e-06, + "loss": 0.1003, + "step": 1925 + }, + { + "epoch": 0.65, + "grad_norm": 1.6926372051239014, + "learning_rate": 9.854572864321609e-06, + "loss": 0.1012, + "step": 1950 + }, + { + "epoch": 0.66, + "grad_norm": 1.5719518661499023, + "learning_rate": 9.85206030150754e-06, + "loss": 0.1037, + "step": 1975 + }, + { + "epoch": 0.67, + "grad_norm": 1.9551806449890137, + "learning_rate": 9.849547738693467e-06, + "loss": 0.1035, + "step": 2000 + }, + { + "epoch": 0.67, + "eval_loss": 0.06008382514119148, + "eval_runtime": 2211.618, + "eval_samples_per_second": 0.65, + "eval_steps_per_second": 0.65, + "eval_wer": 26.585494043447795, + "step": 2000 + }, + { + "epoch": 0.68, + "grad_norm": 2.013542652130127, + "learning_rate": 9.847035175879398e-06, + "loss": 0.1024, + "step": 2025 + }, + { + "epoch": 0.69, + "grad_norm": 1.6302798986434937, + "learning_rate": 9.844522613065328e-06, + "loss": 0.0997, + "step": 2050 + }, + { + "epoch": 0.7, + "grad_norm": 1.9915053844451904, + "learning_rate": 9.842010050251257e-06, + "loss": 0.1019, + "step": 2075 + }, + { + "epoch": 0.71, + "grad_norm": 2.324920415878296, + "learning_rate": 9.839497487437186e-06, + "loss": 0.1017, + "step": 2100 + }, + { + "epoch": 0.71, + "grad_norm": 1.572554111480713, + "learning_rate": 9.836984924623117e-06, + "loss": 0.1005, + "step": 2125 + }, + { + "epoch": 0.72, + "grad_norm": 1.4730935096740723, + "learning_rate": 9.834472361809047e-06, + "loss": 0.0946, + "step": 2150 + }, + { + "epoch": 0.73, + "grad_norm": 2.2633378505706787, + "learning_rate": 9.831959798994976e-06, + "loss": 0.0959, + "step": 2175 + }, + { + "epoch": 0.74, + "grad_norm": 1.6513453722000122, + "learning_rate": 9.829447236180905e-06, + "loss": 0.094, + "step": 2200 + }, + { + "epoch": 0.75, + "grad_norm": 1.747527837753296, + "learning_rate": 9.826934673366834e-06, + "loss": 0.0992, + "step": 2225 + }, + { + "epoch": 0.76, + "grad_norm": 1.8063627481460571, + "learning_rate": 9.824422110552766e-06, + "loss": 0.0998, + "step": 2250 + }, + { + "epoch": 0.76, + "grad_norm": 1.720791220664978, + "learning_rate": 9.821909547738693e-06, + "loss": 0.0999, + "step": 2275 + }, + { + "epoch": 0.77, + "grad_norm": 1.7780026197433472, + "learning_rate": 9.819396984924624e-06, + "loss": 0.0974, + "step": 2300 + }, + { + "epoch": 0.78, + "grad_norm": 1.9727784395217896, + "learning_rate": 9.816884422110553e-06, + "loss": 0.0963, + "step": 2325 + }, + { + "epoch": 0.79, + "grad_norm": 1.98048734664917, + "learning_rate": 9.814371859296483e-06, + "loss": 0.0968, + "step": 2350 + }, + { + "epoch": 0.8, + "grad_norm": 1.720804214477539, + "learning_rate": 9.811859296482414e-06, + "loss": 0.0958, + "step": 2375 + }, + { + "epoch": 0.81, + "grad_norm": 1.6495928764343262, + "learning_rate": 9.809346733668343e-06, + "loss": 0.095, + "step": 2400 + }, + { + "epoch": 0.81, + "grad_norm": 1.5512957572937012, + "learning_rate": 9.806834170854272e-06, + "loss": 0.0967, + "step": 2425 + }, + { + "epoch": 0.82, + "grad_norm": 2.0224955081939697, + "learning_rate": 9.804321608040202e-06, + "loss": 0.0937, + "step": 2450 + }, + { + "epoch": 0.83, + "grad_norm": 1.7854912281036377, + "learning_rate": 9.801809045226131e-06, + "loss": 0.0953, + "step": 2475 + }, + { + "epoch": 0.84, + "grad_norm": 1.8577855825424194, + "learning_rate": 9.79929648241206e-06, + "loss": 0.0938, + "step": 2500 + }, + { + "epoch": 0.85, + "grad_norm": 1.8208414316177368, + "learning_rate": 9.796783919597991e-06, + "loss": 0.0897, + "step": 2525 + }, + { + "epoch": 0.86, + "grad_norm": 1.408633828163147, + "learning_rate": 9.79427135678392e-06, + "loss": 0.0917, + "step": 2550 + }, + { + "epoch": 0.86, + "grad_norm": 1.8168509006500244, + "learning_rate": 9.79175879396985e-06, + "loss": 0.0909, + "step": 2575 + }, + { + "epoch": 0.87, + "grad_norm": 1.3138091564178467, + "learning_rate": 9.78924623115578e-06, + "loss": 0.0891, + "step": 2600 + }, + { + "epoch": 0.88, + "grad_norm": 1.6888105869293213, + "learning_rate": 9.786733668341709e-06, + "loss": 0.0915, + "step": 2625 + }, + { + "epoch": 0.89, + "grad_norm": 1.4557052850723267, + "learning_rate": 9.78422110552764e-06, + "loss": 0.0866, + "step": 2650 + }, + { + "epoch": 0.9, + "grad_norm": 2.0429742336273193, + "learning_rate": 9.781708542713569e-06, + "loss": 0.0921, + "step": 2675 + }, + { + "epoch": 0.91, + "grad_norm": 1.6368101835250854, + "learning_rate": 9.779195979899498e-06, + "loss": 0.0906, + "step": 2700 + }, + { + "epoch": 0.92, + "grad_norm": 1.6119431257247925, + "learning_rate": 9.776683417085428e-06, + "loss": 0.0918, + "step": 2725 + }, + { + "epoch": 0.92, + "grad_norm": 1.8471872806549072, + "learning_rate": 9.774170854271357e-06, + "loss": 0.0923, + "step": 2750 + }, + { + "epoch": 0.93, + "grad_norm": 2.208280324935913, + "learning_rate": 9.771658291457288e-06, + "loss": 0.0906, + "step": 2775 + }, + { + "epoch": 0.94, + "grad_norm": 1.9980486631393433, + "learning_rate": 9.769145728643217e-06, + "loss": 0.0894, + "step": 2800 + }, + { + "epoch": 0.95, + "grad_norm": 1.8759722709655762, + "learning_rate": 9.766633165829147e-06, + "loss": 0.0889, + "step": 2825 + }, + { + "epoch": 0.96, + "grad_norm": 1.5572909116744995, + "learning_rate": 9.764120603015076e-06, + "loss": 0.0912, + "step": 2850 + }, + { + "epoch": 0.97, + "grad_norm": 1.6484200954437256, + "learning_rate": 9.761608040201005e-06, + "loss": 0.095, + "step": 2875 + }, + { + "epoch": 0.97, + "grad_norm": 1.8597869873046875, + "learning_rate": 9.759095477386935e-06, + "loss": 0.0881, + "step": 2900 + }, + { + "epoch": 0.98, + "grad_norm": 2.061100959777832, + "learning_rate": 9.756582914572866e-06, + "loss": 0.087, + "step": 2925 + }, + { + "epoch": 0.99, + "grad_norm": 1.2775734663009644, + "learning_rate": 9.754070351758795e-06, + "loss": 0.0897, + "step": 2950 + }, + { + "epoch": 1.0, + "grad_norm": 1.3132025003433228, + "learning_rate": 9.751557788944724e-06, + "loss": 0.0893, + "step": 2975 + }, + { + "epoch": 1.01, + "grad_norm": 1.2416397333145142, + "learning_rate": 9.749045226130654e-06, + "loss": 0.0758, + "step": 3000 + }, + { + "epoch": 1.01, + "eval_loss": 0.05583362653851509, + "eval_runtime": 2248.3469, + "eval_samples_per_second": 0.64, + "eval_steps_per_second": 0.64, + "eval_wer": 24.99124036440084, + "step": 3000 + }, + { + "epoch": 1.02, + "grad_norm": 1.5315955877304077, + "learning_rate": 9.746532663316583e-06, + "loss": 0.0721, + "step": 3025 + }, + { + "epoch": 1.02, + "grad_norm": 1.4425287246704102, + "learning_rate": 9.744020100502514e-06, + "loss": 0.0703, + "step": 3050 + }, + { + "epoch": 1.03, + "grad_norm": 1.7169386148452759, + "learning_rate": 9.741507537688443e-06, + "loss": 0.0717, + "step": 3075 + }, + { + "epoch": 1.04, + "grad_norm": 1.6365302801132202, + "learning_rate": 9.738994974874373e-06, + "loss": 0.0729, + "step": 3100 + }, + { + "epoch": 1.05, + "grad_norm": 1.6059309244155884, + "learning_rate": 9.736482412060302e-06, + "loss": 0.0714, + "step": 3125 + }, + { + "epoch": 1.06, + "grad_norm": 1.4160854816436768, + "learning_rate": 9.733969849246231e-06, + "loss": 0.0722, + "step": 3150 + }, + { + "epoch": 1.07, + "grad_norm": 1.4330259561538696, + "learning_rate": 9.731457286432162e-06, + "loss": 0.07, + "step": 3175 + }, + { + "epoch": 1.07, + "grad_norm": 1.688765048980713, + "learning_rate": 9.728944723618092e-06, + "loss": 0.0738, + "step": 3200 + }, + { + "epoch": 1.08, + "grad_norm": 1.9040700197219849, + "learning_rate": 9.726432160804021e-06, + "loss": 0.0689, + "step": 3225 + }, + { + "epoch": 1.09, + "grad_norm": 1.4937708377838135, + "learning_rate": 9.72391959798995e-06, + "loss": 0.0695, + "step": 3250 + }, + { + "epoch": 1.1, + "grad_norm": 1.706620693206787, + "learning_rate": 9.721407035175881e-06, + "loss": 0.0737, + "step": 3275 + }, + { + "epoch": 1.11, + "grad_norm": 1.5625635385513306, + "learning_rate": 9.718894472361809e-06, + "loss": 0.0703, + "step": 3300 + }, + { + "epoch": 1.12, + "grad_norm": 1.5054141283035278, + "learning_rate": 9.71638190954774e-06, + "loss": 0.0711, + "step": 3325 + }, + { + "epoch": 1.12, + "grad_norm": 1.4986242055892944, + "learning_rate": 9.71386934673367e-06, + "loss": 0.0677, + "step": 3350 + }, + { + "epoch": 1.13, + "grad_norm": 1.4758636951446533, + "learning_rate": 9.711356783919599e-06, + "loss": 0.0709, + "step": 3375 + }, + { + "epoch": 1.14, + "grad_norm": 1.7234348058700562, + "learning_rate": 9.70884422110553e-06, + "loss": 0.0713, + "step": 3400 + }, + { + "epoch": 1.15, + "grad_norm": 1.3207378387451172, + "learning_rate": 9.706331658291457e-06, + "loss": 0.0672, + "step": 3425 + }, + { + "epoch": 1.16, + "grad_norm": 1.4856157302856445, + "learning_rate": 9.703819095477388e-06, + "loss": 0.0713, + "step": 3450 + }, + { + "epoch": 1.17, + "grad_norm": 1.5950653553009033, + "learning_rate": 9.701306532663318e-06, + "loss": 0.0704, + "step": 3475 + }, + { + "epoch": 1.18, + "grad_norm": 1.1743134260177612, + "learning_rate": 9.698793969849247e-06, + "loss": 0.0715, + "step": 3500 + }, + { + "epoch": 1.18, + "grad_norm": 1.413001537322998, + "learning_rate": 9.696281407035176e-06, + "loss": 0.0702, + "step": 3525 + }, + { + "epoch": 1.19, + "grad_norm": 1.457444667816162, + "learning_rate": 9.693768844221107e-06, + "loss": 0.072, + "step": 3550 + }, + { + "epoch": 1.2, + "grad_norm": 1.4083564281463623, + "learning_rate": 9.691256281407035e-06, + "loss": 0.0729, + "step": 3575 + }, + { + "epoch": 1.21, + "grad_norm": 1.7830222845077515, + "learning_rate": 9.688743718592966e-06, + "loss": 0.0717, + "step": 3600 + }, + { + "epoch": 1.22, + "grad_norm": 1.3286856412887573, + "learning_rate": 9.686231155778895e-06, + "loss": 0.0704, + "step": 3625 + }, + { + "epoch": 1.23, + "grad_norm": 1.8605308532714844, + "learning_rate": 9.683718592964825e-06, + "loss": 0.0725, + "step": 3650 + }, + { + "epoch": 1.23, + "grad_norm": 1.32564377784729, + "learning_rate": 9.681206030150756e-06, + "loss": 0.0689, + "step": 3675 + }, + { + "epoch": 1.24, + "grad_norm": 1.4492007493972778, + "learning_rate": 9.678693467336683e-06, + "loss": 0.0698, + "step": 3700 + }, + { + "epoch": 1.25, + "grad_norm": 1.6394785642623901, + "learning_rate": 9.676180904522614e-06, + "loss": 0.0696, + "step": 3725 + }, + { + "epoch": 1.26, + "grad_norm": 1.514087200164795, + "learning_rate": 9.673668341708544e-06, + "loss": 0.0677, + "step": 3750 + }, + { + "epoch": 1.27, + "grad_norm": 1.5464473962783813, + "learning_rate": 9.671155778894473e-06, + "loss": 0.0668, + "step": 3775 + }, + { + "epoch": 1.28, + "grad_norm": 1.5240578651428223, + "learning_rate": 9.668643216080404e-06, + "loss": 0.0669, + "step": 3800 + }, + { + "epoch": 1.28, + "grad_norm": 1.4395424127578735, + "learning_rate": 9.666130653266333e-06, + "loss": 0.068, + "step": 3825 + }, + { + "epoch": 1.29, + "grad_norm": 1.7017180919647217, + "learning_rate": 9.663618090452263e-06, + "loss": 0.0708, + "step": 3850 + }, + { + "epoch": 1.3, + "grad_norm": 1.3078066110610962, + "learning_rate": 9.661105527638192e-06, + "loss": 0.0635, + "step": 3875 + }, + { + "epoch": 1.31, + "grad_norm": 1.2067632675170898, + "learning_rate": 9.658592964824121e-06, + "loss": 0.0671, + "step": 3900 + }, + { + "epoch": 1.32, + "grad_norm": 1.6356010437011719, + "learning_rate": 9.65608040201005e-06, + "loss": 0.0657, + "step": 3925 + }, + { + "epoch": 1.33, + "grad_norm": 1.656516194343567, + "learning_rate": 9.653567839195982e-06, + "loss": 0.0716, + "step": 3950 + }, + { + "epoch": 1.33, + "grad_norm": 1.3678832054138184, + "learning_rate": 9.651055276381909e-06, + "loss": 0.0662, + "step": 3975 + }, + { + "epoch": 1.34, + "grad_norm": 1.2949965000152588, + "learning_rate": 9.64854271356784e-06, + "loss": 0.0681, + "step": 4000 + }, + { + "epoch": 1.34, + "eval_loss": 0.05457675829529762, + "eval_runtime": 2193.2675, + "eval_samples_per_second": 0.656, + "eval_steps_per_second": 0.656, + "eval_wer": 27.627890679747725, + "step": 4000 + }, + { + "epoch": 1.35, + "grad_norm": 1.1665066480636597, + "learning_rate": 9.64603015075377e-06, + "loss": 0.0655, + "step": 4025 + }, + { + "epoch": 1.36, + "grad_norm": 1.4676045179367065, + "learning_rate": 9.643517587939699e-06, + "loss": 0.0659, + "step": 4050 + }, + { + "epoch": 1.37, + "grad_norm": 1.527917504310608, + "learning_rate": 9.64100502512563e-06, + "loss": 0.0673, + "step": 4075 + }, + { + "epoch": 1.38, + "grad_norm": 1.4785560369491577, + "learning_rate": 9.638492462311559e-06, + "loss": 0.0655, + "step": 4100 + }, + { + "epoch": 1.39, + "grad_norm": 1.4437861442565918, + "learning_rate": 9.635979899497488e-06, + "loss": 0.0663, + "step": 4125 + }, + { + "epoch": 1.39, + "grad_norm": 1.1895109415054321, + "learning_rate": 9.633467336683418e-06, + "loss": 0.0673, + "step": 4150 + }, + { + "epoch": 1.4, + "grad_norm": 1.1187784671783447, + "learning_rate": 9.630954773869347e-06, + "loss": 0.0655, + "step": 4175 + }, + { + "epoch": 1.41, + "grad_norm": 1.287685513496399, + "learning_rate": 9.628442211055276e-06, + "loss": 0.0664, + "step": 4200 + }, + { + "epoch": 1.42, + "grad_norm": 1.5795636177062988, + "learning_rate": 9.625929648241207e-06, + "loss": 0.0681, + "step": 4225 + }, + { + "epoch": 1.43, + "grad_norm": 1.3589763641357422, + "learning_rate": 9.623417085427137e-06, + "loss": 0.0685, + "step": 4250 + }, + { + "epoch": 1.44, + "grad_norm": 1.4575799703598022, + "learning_rate": 9.620904522613066e-06, + "loss": 0.0686, + "step": 4275 + }, + { + "epoch": 1.44, + "grad_norm": 1.6060283184051514, + "learning_rate": 9.618391959798995e-06, + "loss": 0.0678, + "step": 4300 + }, + { + "epoch": 1.45, + "grad_norm": 1.43328058719635, + "learning_rate": 9.615879396984925e-06, + "loss": 0.0664, + "step": 4325 + }, + { + "epoch": 1.46, + "grad_norm": 1.4207333326339722, + "learning_rate": 9.613366834170856e-06, + "loss": 0.0633, + "step": 4350 + }, + { + "epoch": 1.47, + "grad_norm": 1.650315284729004, + "learning_rate": 9.610854271356785e-06, + "loss": 0.0665, + "step": 4375 + }, + { + "epoch": 1.48, + "grad_norm": 1.8327782154083252, + "learning_rate": 9.608341708542714e-06, + "loss": 0.0627, + "step": 4400 + }, + { + "epoch": 1.49, + "grad_norm": 1.3812270164489746, + "learning_rate": 9.605829145728644e-06, + "loss": 0.0685, + "step": 4425 + }, + { + "epoch": 1.49, + "grad_norm": 1.5655412673950195, + "learning_rate": 9.603316582914573e-06, + "loss": 0.0677, + "step": 4450 + }, + { + "epoch": 1.5, + "grad_norm": 1.2979263067245483, + "learning_rate": 9.600804020100504e-06, + "loss": 0.0644, + "step": 4475 + }, + { + "epoch": 1.51, + "grad_norm": 1.3595566749572754, + "learning_rate": 9.598291457286433e-06, + "loss": 0.0687, + "step": 4500 + }, + { + "epoch": 1.52, + "grad_norm": 1.352481722831726, + "learning_rate": 9.595778894472363e-06, + "loss": 0.0655, + "step": 4525 + }, + { + "epoch": 1.53, + "grad_norm": 1.5599027872085571, + "learning_rate": 9.593266331658292e-06, + "loss": 0.0639, + "step": 4550 + }, + { + "epoch": 1.54, + "grad_norm": 1.548474669456482, + "learning_rate": 9.590753768844221e-06, + "loss": 0.0671, + "step": 4575 + }, + { + "epoch": 1.54, + "grad_norm": 1.419819951057434, + "learning_rate": 9.58824120603015e-06, + "loss": 0.0622, + "step": 4600 + }, + { + "epoch": 1.55, + "grad_norm": 1.5403597354888916, + "learning_rate": 9.585728643216082e-06, + "loss": 0.0652, + "step": 4625 + }, + { + "epoch": 1.56, + "grad_norm": 1.5437499284744263, + "learning_rate": 9.583216080402011e-06, + "loss": 0.0662, + "step": 4650 + }, + { + "epoch": 1.57, + "grad_norm": 1.476802945137024, + "learning_rate": 9.58070351758794e-06, + "loss": 0.0648, + "step": 4675 + }, + { + "epoch": 1.58, + "grad_norm": 1.7093713283538818, + "learning_rate": 9.57819095477387e-06, + "loss": 0.0654, + "step": 4700 + }, + { + "epoch": 1.59, + "grad_norm": 1.2631677389144897, + "learning_rate": 9.575678391959799e-06, + "loss": 0.0642, + "step": 4725 + }, + { + "epoch": 1.6, + "grad_norm": 1.4027448892593384, + "learning_rate": 9.57316582914573e-06, + "loss": 0.0629, + "step": 4750 + }, + { + "epoch": 1.6, + "grad_norm": 1.7192860841751099, + "learning_rate": 9.57065326633166e-06, + "loss": 0.0658, + "step": 4775 + }, + { + "epoch": 1.61, + "grad_norm": 1.7765398025512695, + "learning_rate": 9.568140703517589e-06, + "loss": 0.0654, + "step": 4800 + }, + { + "epoch": 1.62, + "grad_norm": 1.2420257329940796, + "learning_rate": 9.565628140703518e-06, + "loss": 0.0651, + "step": 4825 + }, + { + "epoch": 1.63, + "grad_norm": 1.178631067276001, + "learning_rate": 9.563115577889447e-06, + "loss": 0.0661, + "step": 4850 + }, + { + "epoch": 1.64, + "grad_norm": 1.65060293674469, + "learning_rate": 9.560603015075378e-06, + "loss": 0.0633, + "step": 4875 + }, + { + "epoch": 1.65, + "grad_norm": 1.7282522916793823, + "learning_rate": 9.558090452261308e-06, + "loss": 0.0622, + "step": 4900 + }, + { + "epoch": 1.65, + "grad_norm": 1.5822139978408813, + "learning_rate": 9.555577889447237e-06, + "loss": 0.0638, + "step": 4925 + }, + { + "epoch": 1.66, + "grad_norm": 1.4373313188552856, + "learning_rate": 9.553065326633166e-06, + "loss": 0.0653, + "step": 4950 + }, + { + "epoch": 1.67, + "grad_norm": 1.348206639289856, + "learning_rate": 9.550552763819096e-06, + "loss": 0.0611, + "step": 4975 + }, + { + "epoch": 1.68, + "grad_norm": 1.4973466396331787, + "learning_rate": 9.548040201005025e-06, + "loss": 0.0636, + "step": 5000 + }, + { + "epoch": 1.68, + "eval_loss": 0.05344749614596367, + "eval_runtime": 2200.63, + "eval_samples_per_second": 0.653, + "eval_steps_per_second": 0.653, + "eval_wer": 24.842326559215135, + "step": 5000 + }, + { + "epoch": 1.69, + "grad_norm": 1.403596043586731, + "learning_rate": 9.545527638190956e-06, + "loss": 0.0631, + "step": 5025 + }, + { + "epoch": 1.7, + "grad_norm": 1.2089179754257202, + "learning_rate": 9.543015075376885e-06, + "loss": 0.0617, + "step": 5050 + }, + { + "epoch": 1.7, + "grad_norm": 1.3374905586242676, + "learning_rate": 9.540502512562815e-06, + "loss": 0.0625, + "step": 5075 + }, + { + "epoch": 1.71, + "grad_norm": 1.1531893014907837, + "learning_rate": 9.537989949748746e-06, + "loss": 0.0628, + "step": 5100 + }, + { + "epoch": 1.72, + "grad_norm": 1.2645255327224731, + "learning_rate": 9.535477386934673e-06, + "loss": 0.0635, + "step": 5125 + }, + { + "epoch": 1.73, + "grad_norm": 1.2286183834075928, + "learning_rate": 9.532964824120604e-06, + "loss": 0.0613, + "step": 5150 + }, + { + "epoch": 1.74, + "grad_norm": 1.7261433601379395, + "learning_rate": 9.530452261306534e-06, + "loss": 0.0624, + "step": 5175 + }, + { + "epoch": 1.75, + "grad_norm": 1.5666346549987793, + "learning_rate": 9.527939698492463e-06, + "loss": 0.0625, + "step": 5200 + }, + { + "epoch": 1.75, + "grad_norm": 1.6719902753829956, + "learning_rate": 9.525427135678392e-06, + "loss": 0.0624, + "step": 5225 + }, + { + "epoch": 1.76, + "grad_norm": 1.2648427486419678, + "learning_rate": 9.522914572864322e-06, + "loss": 0.0596, + "step": 5250 + }, + { + "epoch": 1.77, + "grad_norm": 1.4514129161834717, + "learning_rate": 9.520402010050253e-06, + "loss": 0.0624, + "step": 5275 + }, + { + "epoch": 1.78, + "grad_norm": 1.3863974809646606, + "learning_rate": 9.517889447236182e-06, + "loss": 0.0599, + "step": 5300 + }, + { + "epoch": 1.79, + "grad_norm": 1.3497973680496216, + "learning_rate": 9.515376884422111e-06, + "loss": 0.0645, + "step": 5325 + }, + { + "epoch": 1.8, + "grad_norm": 1.4993109703063965, + "learning_rate": 9.51286432160804e-06, + "loss": 0.0584, + "step": 5350 + }, + { + "epoch": 1.8, + "grad_norm": 1.5340311527252197, + "learning_rate": 9.510351758793972e-06, + "loss": 0.0611, + "step": 5375 + }, + { + "epoch": 1.81, + "grad_norm": 1.6285430192947388, + "learning_rate": 9.5078391959799e-06, + "loss": 0.0658, + "step": 5400 + }, + { + "epoch": 1.82, + "grad_norm": 1.299028992652893, + "learning_rate": 9.50532663316583e-06, + "loss": 0.06, + "step": 5425 + }, + { + "epoch": 1.83, + "grad_norm": 1.51743483543396, + "learning_rate": 9.50281407035176e-06, + "loss": 0.0609, + "step": 5450 + }, + { + "epoch": 1.84, + "grad_norm": 1.3315948247909546, + "learning_rate": 9.500301507537689e-06, + "loss": 0.06, + "step": 5475 + }, + { + "epoch": 1.85, + "grad_norm": 1.2751985788345337, + "learning_rate": 9.49778894472362e-06, + "loss": 0.0619, + "step": 5500 + }, + { + "epoch": 1.86, + "grad_norm": 1.4583544731140137, + "learning_rate": 9.49527638190955e-06, + "loss": 0.0621, + "step": 5525 + }, + { + "epoch": 1.86, + "grad_norm": 1.140053391456604, + "learning_rate": 9.492763819095479e-06, + "loss": 0.0614, + "step": 5550 + }, + { + "epoch": 1.87, + "grad_norm": 1.4022659063339233, + "learning_rate": 9.490251256281408e-06, + "loss": 0.0617, + "step": 5575 + }, + { + "epoch": 1.88, + "grad_norm": 1.331568956375122, + "learning_rate": 9.487738693467337e-06, + "loss": 0.0606, + "step": 5600 + }, + { + "epoch": 1.89, + "grad_norm": 1.3748645782470703, + "learning_rate": 9.485226130653267e-06, + "loss": 0.0576, + "step": 5625 + }, + { + "epoch": 1.9, + "grad_norm": 1.6632026433944702, + "learning_rate": 9.482713567839198e-06, + "loss": 0.06, + "step": 5650 + }, + { + "epoch": 1.91, + "grad_norm": 1.6703838109970093, + "learning_rate": 9.480201005025125e-06, + "loss": 0.0606, + "step": 5675 + }, + { + "epoch": 1.91, + "grad_norm": 1.1467951536178589, + "learning_rate": 9.477688442211056e-06, + "loss": 0.061, + "step": 5700 + }, + { + "epoch": 1.92, + "grad_norm": 1.6099380254745483, + "learning_rate": 9.475175879396985e-06, + "loss": 0.0554, + "step": 5725 + }, + { + "epoch": 1.93, + "grad_norm": 1.3673566579818726, + "learning_rate": 9.472663316582915e-06, + "loss": 0.0575, + "step": 5750 + }, + { + "epoch": 1.94, + "grad_norm": 1.2057411670684814, + "learning_rate": 9.470150753768846e-06, + "loss": 0.0585, + "step": 5775 + }, + { + "epoch": 1.95, + "grad_norm": 1.3576023578643799, + "learning_rate": 9.467638190954775e-06, + "loss": 0.0623, + "step": 5800 + }, + { + "epoch": 1.96, + "grad_norm": 1.4387112855911255, + "learning_rate": 9.465125628140704e-06, + "loss": 0.0582, + "step": 5825 + }, + { + "epoch": 1.96, + "grad_norm": 1.3731517791748047, + "learning_rate": 9.462613065326634e-06, + "loss": 0.0563, + "step": 5850 + }, + { + "epoch": 1.97, + "grad_norm": 1.6258333921432495, + "learning_rate": 9.460100502512563e-06, + "loss": 0.0595, + "step": 5875 + }, + { + "epoch": 1.98, + "grad_norm": 1.3161087036132812, + "learning_rate": 9.457587939698494e-06, + "loss": 0.0609, + "step": 5900 + }, + { + "epoch": 1.99, + "grad_norm": 1.6884005069732666, + "learning_rate": 9.455075376884423e-06, + "loss": 0.0595, + "step": 5925 + }, + { + "epoch": 2.0, + "grad_norm": 1.4603941440582275, + "learning_rate": 9.452562814070353e-06, + "loss": 0.0578, + "step": 5950 + }, + { + "epoch": 2.01, + "grad_norm": 1.2040290832519531, + "learning_rate": 9.450050251256282e-06, + "loss": 0.0474, + "step": 5975 + }, + { + "epoch": 2.01, + "grad_norm": 1.2578160762786865, + "learning_rate": 9.447537688442211e-06, + "loss": 0.0399, + "step": 6000 + }, + { + "epoch": 2.01, + "eval_loss": 0.05515974387526512, + "eval_runtime": 2149.0297, + "eval_samples_per_second": 0.669, + "eval_steps_per_second": 0.669, + "eval_wer": 28.311142256482132, + "step": 6000 + }, + { + "epoch": 2.02, + "grad_norm": 1.1583691835403442, + "learning_rate": 9.44502512562814e-06, + "loss": 0.0404, + "step": 6025 + }, + { + "epoch": 2.03, + "grad_norm": 1.1341968774795532, + "learning_rate": 9.442512562814072e-06, + "loss": 0.0422, + "step": 6050 + }, + { + "epoch": 2.04, + "grad_norm": 1.2784273624420166, + "learning_rate": 9.440000000000001e-06, + "loss": 0.0437, + "step": 6075 + }, + { + "epoch": 2.05, + "grad_norm": 1.1828523874282837, + "learning_rate": 9.43748743718593e-06, + "loss": 0.0395, + "step": 6100 + }, + { + "epoch": 2.06, + "grad_norm": 1.2141575813293457, + "learning_rate": 9.43497487437186e-06, + "loss": 0.0423, + "step": 6125 + }, + { + "epoch": 2.07, + "grad_norm": 1.345527172088623, + "learning_rate": 9.432462311557789e-06, + "loss": 0.0408, + "step": 6150 + }, + { + "epoch": 2.07, + "grad_norm": 1.5167073011398315, + "learning_rate": 9.42994974874372e-06, + "loss": 0.0419, + "step": 6175 + }, + { + "epoch": 2.08, + "grad_norm": 1.0509347915649414, + "learning_rate": 9.42743718592965e-06, + "loss": 0.0412, + "step": 6200 + }, + { + "epoch": 2.09, + "grad_norm": 1.3531029224395752, + "learning_rate": 9.424924623115579e-06, + "loss": 0.0397, + "step": 6225 + }, + { + "epoch": 2.1, + "grad_norm": 1.1723557710647583, + "learning_rate": 9.422412060301508e-06, + "loss": 0.0425, + "step": 6250 + }, + { + "epoch": 2.11, + "grad_norm": 1.3973031044006348, + "learning_rate": 9.419899497487437e-06, + "loss": 0.0415, + "step": 6275 + }, + { + "epoch": 2.12, + "grad_norm": 1.0195714235305786, + "learning_rate": 9.417386934673367e-06, + "loss": 0.0403, + "step": 6300 + }, + { + "epoch": 2.12, + "grad_norm": 1.1668431758880615, + "learning_rate": 9.414874371859298e-06, + "loss": 0.0414, + "step": 6325 + }, + { + "epoch": 2.13, + "grad_norm": 1.5427662134170532, + "learning_rate": 9.412361809045227e-06, + "loss": 0.0392, + "step": 6350 + }, + { + "epoch": 2.14, + "grad_norm": 1.1287201642990112, + "learning_rate": 9.409849246231156e-06, + "loss": 0.041, + "step": 6375 + }, + { + "epoch": 2.15, + "grad_norm": 1.2142645120620728, + "learning_rate": 9.407336683417086e-06, + "loss": 0.0417, + "step": 6400 + }, + { + "epoch": 2.16, + "grad_norm": 1.2047431468963623, + "learning_rate": 9.404824120603015e-06, + "loss": 0.0427, + "step": 6425 + }, + { + "epoch": 2.17, + "grad_norm": 1.3130313158035278, + "learning_rate": 9.402311557788946e-06, + "loss": 0.0424, + "step": 6450 + }, + { + "epoch": 2.17, + "grad_norm": 1.4417389631271362, + "learning_rate": 9.399798994974875e-06, + "loss": 0.04, + "step": 6475 + }, + { + "epoch": 2.18, + "grad_norm": 1.1858882904052734, + "learning_rate": 9.397286432160805e-06, + "loss": 0.0408, + "step": 6500 + }, + { + "epoch": 2.19, + "grad_norm": 1.2733402252197266, + "learning_rate": 9.394773869346736e-06, + "loss": 0.0423, + "step": 6525 + }, + { + "epoch": 2.2, + "grad_norm": 1.317887783050537, + "learning_rate": 9.392261306532663e-06, + "loss": 0.0422, + "step": 6550 + }, + { + "epoch": 2.21, + "grad_norm": 1.1487162113189697, + "learning_rate": 9.389748743718594e-06, + "loss": 0.0388, + "step": 6575 + }, + { + "epoch": 2.22, + "grad_norm": 1.2091419696807861, + "learning_rate": 9.387236180904524e-06, + "loss": 0.041, + "step": 6600 + }, + { + "epoch": 2.22, + "grad_norm": 1.0671520233154297, + "learning_rate": 9.384723618090453e-06, + "loss": 0.0412, + "step": 6625 + }, + { + "epoch": 2.23, + "grad_norm": 1.522486686706543, + "learning_rate": 9.382211055276382e-06, + "loss": 0.0407, + "step": 6650 + }, + { + "epoch": 2.24, + "grad_norm": 1.225422978401184, + "learning_rate": 9.379698492462312e-06, + "loss": 0.0397, + "step": 6675 + }, + { + "epoch": 2.25, + "grad_norm": 1.7508400678634644, + "learning_rate": 9.377185929648241e-06, + "loss": 0.0413, + "step": 6700 + }, + { + "epoch": 2.26, + "grad_norm": 1.5747374296188354, + "learning_rate": 9.374673366834172e-06, + "loss": 0.0412, + "step": 6725 + }, + { + "epoch": 2.27, + "grad_norm": 1.268565058708191, + "learning_rate": 9.372160804020101e-06, + "loss": 0.0385, + "step": 6750 + }, + { + "epoch": 2.28, + "grad_norm": 1.2624239921569824, + "learning_rate": 9.36964824120603e-06, + "loss": 0.0396, + "step": 6775 + }, + { + "epoch": 2.28, + "grad_norm": 1.2683887481689453, + "learning_rate": 9.367135678391962e-06, + "loss": 0.0418, + "step": 6800 + }, + { + "epoch": 2.29, + "grad_norm": 1.058915376663208, + "learning_rate": 9.36462311557789e-06, + "loss": 0.0385, + "step": 6825 + }, + { + "epoch": 2.3, + "grad_norm": 1.4409563541412354, + "learning_rate": 9.36211055276382e-06, + "loss": 0.0398, + "step": 6850 + }, + { + "epoch": 2.31, + "grad_norm": 1.3603601455688477, + "learning_rate": 9.35959798994975e-06, + "loss": 0.0396, + "step": 6875 + }, + { + "epoch": 2.32, + "grad_norm": 1.3363993167877197, + "learning_rate": 9.357085427135679e-06, + "loss": 0.0402, + "step": 6900 + }, + { + "epoch": 2.33, + "grad_norm": 1.4132412672042847, + "learning_rate": 9.354572864321608e-06, + "loss": 0.041, + "step": 6925 + }, + { + "epoch": 2.33, + "grad_norm": 1.6067790985107422, + "learning_rate": 9.352060301507538e-06, + "loss": 0.0405, + "step": 6950 + }, + { + "epoch": 2.34, + "grad_norm": 1.297771692276001, + "learning_rate": 9.349547738693469e-06, + "loss": 0.0395, + "step": 6975 + }, + { + "epoch": 2.35, + "grad_norm": 1.3299397230148315, + "learning_rate": 9.347035175879398e-06, + "loss": 0.0398, + "step": 7000 + }, + { + "epoch": 2.35, + "eval_loss": 0.058693744242191315, + "eval_runtime": 2184.7163, + "eval_samples_per_second": 0.658, + "eval_steps_per_second": 0.658, + "eval_wer": 28.512613875262787, + "step": 7000 + }, + { + "epoch": 2.36, + "grad_norm": 1.3611425161361694, + "learning_rate": 9.344522613065327e-06, + "loss": 0.0387, + "step": 7025 + }, + { + "epoch": 2.37, + "grad_norm": 1.075129508972168, + "learning_rate": 9.342010050251257e-06, + "loss": 0.0383, + "step": 7050 + }, + { + "epoch": 2.38, + "grad_norm": 1.2161786556243896, + "learning_rate": 9.339497487437188e-06, + "loss": 0.0397, + "step": 7075 + }, + { + "epoch": 2.38, + "grad_norm": 1.1765416860580444, + "learning_rate": 9.336984924623115e-06, + "loss": 0.0397, + "step": 7100 + }, + { + "epoch": 2.39, + "grad_norm": 1.375571608543396, + "learning_rate": 9.334472361809046e-06, + "loss": 0.0392, + "step": 7125 + }, + { + "epoch": 2.4, + "grad_norm": 1.2378919124603271, + "learning_rate": 9.331959798994976e-06, + "loss": 0.0406, + "step": 7150 + }, + { + "epoch": 2.41, + "grad_norm": 1.4299308061599731, + "learning_rate": 9.329447236180905e-06, + "loss": 0.0384, + "step": 7175 + }, + { + "epoch": 2.42, + "grad_norm": 1.066771149635315, + "learning_rate": 9.326934673366836e-06, + "loss": 0.0405, + "step": 7200 + }, + { + "epoch": 2.43, + "grad_norm": 1.3687233924865723, + "learning_rate": 9.324422110552764e-06, + "loss": 0.0398, + "step": 7225 + }, + { + "epoch": 2.43, + "grad_norm": 1.0777060985565186, + "learning_rate": 9.321909547738695e-06, + "loss": 0.0377, + "step": 7250 + }, + { + "epoch": 2.44, + "grad_norm": 1.261992335319519, + "learning_rate": 9.319396984924624e-06, + "loss": 0.0424, + "step": 7275 + }, + { + "epoch": 2.45, + "grad_norm": 1.2443946599960327, + "learning_rate": 9.316884422110553e-06, + "loss": 0.0391, + "step": 7300 + }, + { + "epoch": 2.46, + "grad_norm": 1.1624213457107544, + "learning_rate": 9.314371859296483e-06, + "loss": 0.0356, + "step": 7325 + }, + { + "epoch": 2.47, + "grad_norm": 1.412139892578125, + "learning_rate": 9.311859296482414e-06, + "loss": 0.0384, + "step": 7350 + }, + { + "epoch": 2.48, + "grad_norm": 1.092795968055725, + "learning_rate": 9.309346733668343e-06, + "loss": 0.0409, + "step": 7375 + }, + { + "epoch": 2.48, + "grad_norm": 1.305854082107544, + "learning_rate": 9.306834170854272e-06, + "loss": 0.04, + "step": 7400 + }, + { + "epoch": 2.49, + "grad_norm": 1.5060044527053833, + "learning_rate": 9.304321608040201e-06, + "loss": 0.0381, + "step": 7425 + }, + { + "epoch": 2.5, + "grad_norm": 1.3781262636184692, + "learning_rate": 9.30180904522613e-06, + "loss": 0.0376, + "step": 7450 + }, + { + "epoch": 2.51, + "grad_norm": 1.3776516914367676, + "learning_rate": 9.299296482412062e-06, + "loss": 0.0405, + "step": 7475 + }, + { + "epoch": 2.52, + "grad_norm": 1.2429224252700806, + "learning_rate": 9.296783919597991e-06, + "loss": 0.0367, + "step": 7500 + }, + { + "epoch": 2.53, + "grad_norm": 1.194620966911316, + "learning_rate": 9.29427135678392e-06, + "loss": 0.0375, + "step": 7525 + }, + { + "epoch": 2.54, + "grad_norm": 1.097766637802124, + "learning_rate": 9.29175879396985e-06, + "loss": 0.0372, + "step": 7550 + }, + { + "epoch": 2.54, + "grad_norm": 1.3045562505722046, + "learning_rate": 9.289246231155779e-06, + "loss": 0.0365, + "step": 7575 + }, + { + "epoch": 2.55, + "grad_norm": 1.3821510076522827, + "learning_rate": 9.28673366834171e-06, + "loss": 0.0387, + "step": 7600 + }, + { + "epoch": 2.56, + "grad_norm": 1.241038203239441, + "learning_rate": 9.28422110552764e-06, + "loss": 0.0383, + "step": 7625 + }, + { + "epoch": 2.57, + "grad_norm": 1.4758143424987793, + "learning_rate": 9.281708542713569e-06, + "loss": 0.0375, + "step": 7650 + }, + { + "epoch": 2.58, + "grad_norm": 1.464656114578247, + "learning_rate": 9.279195979899498e-06, + "loss": 0.0381, + "step": 7675 + }, + { + "epoch": 2.59, + "grad_norm": 1.2333124876022339, + "learning_rate": 9.276683417085427e-06, + "loss": 0.0354, + "step": 7700 + }, + { + "epoch": 2.59, + "grad_norm": 1.5094608068466187, + "learning_rate": 9.274170854271357e-06, + "loss": 0.0385, + "step": 7725 + }, + { + "epoch": 2.6, + "grad_norm": 1.379455327987671, + "learning_rate": 9.271658291457288e-06, + "loss": 0.0387, + "step": 7750 + }, + { + "epoch": 2.61, + "grad_norm": 1.3588508367538452, + "learning_rate": 9.269145728643217e-06, + "loss": 0.0374, + "step": 7775 + }, + { + "epoch": 2.62, + "grad_norm": 1.3320001363754272, + "learning_rate": 9.266633165829146e-06, + "loss": 0.037, + "step": 7800 + }, + { + "epoch": 2.63, + "grad_norm": 1.1679881811141968, + "learning_rate": 9.264120603015076e-06, + "loss": 0.0379, + "step": 7825 + }, + { + "epoch": 2.64, + "grad_norm": 1.121272087097168, + "learning_rate": 9.261608040201005e-06, + "loss": 0.0373, + "step": 7850 + }, + { + "epoch": 2.64, + "grad_norm": 1.4823328256607056, + "learning_rate": 9.259095477386936e-06, + "loss": 0.0371, + "step": 7875 + }, + { + "epoch": 2.65, + "grad_norm": 1.6181988716125488, + "learning_rate": 9.256582914572865e-06, + "loss": 0.0373, + "step": 7900 + }, + { + "epoch": 2.66, + "grad_norm": 0.8980041146278381, + "learning_rate": 9.254070351758795e-06, + "loss": 0.0376, + "step": 7925 + }, + { + "epoch": 2.67, + "grad_norm": 1.2680145502090454, + "learning_rate": 9.251557788944724e-06, + "loss": 0.038, + "step": 7950 + }, + { + "epoch": 2.68, + "grad_norm": 1.2755582332611084, + "learning_rate": 9.249045226130653e-06, + "loss": 0.0367, + "step": 7975 + }, + { + "epoch": 2.69, + "grad_norm": 1.2242423295974731, + "learning_rate": 9.246532663316584e-06, + "loss": 0.0382, + "step": 8000 + }, + { + "epoch": 2.69, + "eval_loss": 0.05650435760617256, + "eval_runtime": 2178.1343, + "eval_samples_per_second": 0.66, + "eval_steps_per_second": 0.66, + "eval_wer": 26.261387526278906, + "step": 8000 + }, + { + "epoch": 2.69, + "grad_norm": 1.3400089740753174, + "learning_rate": 9.244020100502514e-06, + "loss": 0.0379, + "step": 8025 + }, + { + "epoch": 2.7, + "grad_norm": 1.370528221130371, + "learning_rate": 9.241507537688443e-06, + "loss": 0.0362, + "step": 8050 + }, + { + "epoch": 2.71, + "grad_norm": 1.332824945449829, + "learning_rate": 9.238994974874372e-06, + "loss": 0.0371, + "step": 8075 + }, + { + "epoch": 2.72, + "grad_norm": 1.1182409524917603, + "learning_rate": 9.236482412060302e-06, + "loss": 0.0349, + "step": 8100 + }, + { + "epoch": 2.73, + "grad_norm": 1.254818320274353, + "learning_rate": 9.233969849246231e-06, + "loss": 0.0378, + "step": 8125 + }, + { + "epoch": 2.74, + "grad_norm": 1.3172489404678345, + "learning_rate": 9.231457286432162e-06, + "loss": 0.0371, + "step": 8150 + }, + { + "epoch": 2.75, + "grad_norm": 1.3246852159500122, + "learning_rate": 9.228944723618091e-06, + "loss": 0.0401, + "step": 8175 + }, + { + "epoch": 2.75, + "grad_norm": 1.1008661985397339, + "learning_rate": 9.22643216080402e-06, + "loss": 0.0358, + "step": 8200 + }, + { + "epoch": 2.76, + "grad_norm": 1.6776241064071655, + "learning_rate": 9.223919597989952e-06, + "loss": 0.0381, + "step": 8225 + }, + { + "epoch": 2.77, + "grad_norm": 1.2852270603179932, + "learning_rate": 9.22140703517588e-06, + "loss": 0.0388, + "step": 8250 + }, + { + "epoch": 2.78, + "grad_norm": 1.1616917848587036, + "learning_rate": 9.21889447236181e-06, + "loss": 0.0363, + "step": 8275 + }, + { + "epoch": 2.79, + "grad_norm": 1.3984943628311157, + "learning_rate": 9.21638190954774e-06, + "loss": 0.0385, + "step": 8300 + }, + { + "epoch": 2.8, + "grad_norm": 1.3237658739089966, + "learning_rate": 9.213869346733669e-06, + "loss": 0.0378, + "step": 8325 + }, + { + "epoch": 2.8, + "grad_norm": 1.4625046253204346, + "learning_rate": 9.211356783919598e-06, + "loss": 0.0357, + "step": 8350 + }, + { + "epoch": 2.81, + "grad_norm": 1.1589903831481934, + "learning_rate": 9.208844221105528e-06, + "loss": 0.0367, + "step": 8375 + }, + { + "epoch": 2.82, + "grad_norm": 1.1017351150512695, + "learning_rate": 9.206331658291459e-06, + "loss": 0.0363, + "step": 8400 + }, + { + "epoch": 2.83, + "grad_norm": 1.2461220026016235, + "learning_rate": 9.203819095477388e-06, + "loss": 0.0362, + "step": 8425 + }, + { + "epoch": 2.84, + "grad_norm": 1.4164782762527466, + "learning_rate": 9.201306532663317e-06, + "loss": 0.0372, + "step": 8450 + }, + { + "epoch": 2.85, + "grad_norm": 1.4098445177078247, + "learning_rate": 9.198793969849247e-06, + "loss": 0.0369, + "step": 8475 + }, + { + "epoch": 2.85, + "grad_norm": 1.4906907081604004, + "learning_rate": 9.196281407035178e-06, + "loss": 0.0376, + "step": 8500 + }, + { + "epoch": 2.86, + "grad_norm": 1.3866301774978638, + "learning_rate": 9.193768844221105e-06, + "loss": 0.0356, + "step": 8525 + }, + { + "epoch": 2.87, + "grad_norm": 1.538131594657898, + "learning_rate": 9.191256281407036e-06, + "loss": 0.0363, + "step": 8550 + }, + { + "epoch": 2.88, + "grad_norm": 1.3459762334823608, + "learning_rate": 9.188743718592966e-06, + "loss": 0.0352, + "step": 8575 + }, + { + "epoch": 2.89, + "grad_norm": 1.2548068761825562, + "learning_rate": 9.186231155778895e-06, + "loss": 0.0342, + "step": 8600 + }, + { + "epoch": 2.9, + "grad_norm": 1.3523027896881104, + "learning_rate": 9.183718592964826e-06, + "loss": 0.0336, + "step": 8625 + }, + { + "epoch": 2.9, + "grad_norm": 1.4613780975341797, + "learning_rate": 9.181206030150754e-06, + "loss": 0.0359, + "step": 8650 + }, + { + "epoch": 2.91, + "grad_norm": 1.3093277215957642, + "learning_rate": 9.178693467336685e-06, + "loss": 0.0366, + "step": 8675 + }, + { + "epoch": 2.92, + "grad_norm": 1.5090382099151611, + "learning_rate": 9.176180904522614e-06, + "loss": 0.0371, + "step": 8700 + }, + { + "epoch": 2.93, + "grad_norm": 1.3228135108947754, + "learning_rate": 9.173668341708543e-06, + "loss": 0.0332, + "step": 8725 + }, + { + "epoch": 2.94, + "grad_norm": 1.3964694738388062, + "learning_rate": 9.171155778894473e-06, + "loss": 0.0373, + "step": 8750 + }, + { + "epoch": 2.95, + "grad_norm": 1.3430994749069214, + "learning_rate": 9.168643216080404e-06, + "loss": 0.035, + "step": 8775 + }, + { + "epoch": 2.96, + "grad_norm": 1.3736649751663208, + "learning_rate": 9.166130653266331e-06, + "loss": 0.0366, + "step": 8800 + }, + { + "epoch": 2.96, + "grad_norm": 1.468558430671692, + "learning_rate": 9.163618090452262e-06, + "loss": 0.0352, + "step": 8825 + }, + { + "epoch": 2.97, + "grad_norm": 1.3824299573898315, + "learning_rate": 9.161105527638192e-06, + "loss": 0.0341, + "step": 8850 + }, + { + "epoch": 2.98, + "grad_norm": 1.4083284139633179, + "learning_rate": 9.158592964824121e-06, + "loss": 0.035, + "step": 8875 + }, + { + "epoch": 2.99, + "grad_norm": 0.9604634046554565, + "learning_rate": 9.156080402010052e-06, + "loss": 0.035, + "step": 8900 + }, + { + "epoch": 3.0, + "grad_norm": 1.2706329822540283, + "learning_rate": 9.15356783919598e-06, + "loss": 0.0347, + "step": 8925 + }, + { + "epoch": 3.01, + "grad_norm": 0.9310716390609741, + "learning_rate": 9.15105527638191e-06, + "loss": 0.0286, + "step": 8950 + }, + { + "epoch": 3.01, + "grad_norm": 1.156808614730835, + "learning_rate": 9.14854271356784e-06, + "loss": 0.0209, + "step": 8975 + }, + { + "epoch": 3.02, + "grad_norm": 1.1196666955947876, + "learning_rate": 9.14603015075377e-06, + "loss": 0.0205, + "step": 9000 + }, + { + "epoch": 3.02, + "eval_loss": 0.0635053962469101, + "eval_runtime": 2185.1357, + "eval_samples_per_second": 0.658, + "eval_steps_per_second": 0.658, + "eval_wer": 25.87596355991591, + "step": 9000 + }, + { + "epoch": 3.03, + "grad_norm": 1.4327765703201294, + "learning_rate": 9.1435175879397e-06, + "loss": 0.0202, + "step": 9025 + }, + { + "epoch": 3.04, + "grad_norm": 1.3396183252334595, + "learning_rate": 9.14100502512563e-06, + "loss": 0.0212, + "step": 9050 + }, + { + "epoch": 3.05, + "grad_norm": 1.025956392288208, + "learning_rate": 9.138492462311559e-06, + "loss": 0.0199, + "step": 9075 + }, + { + "epoch": 3.06, + "grad_norm": 1.254798412322998, + "learning_rate": 9.135979899497488e-06, + "loss": 0.0213, + "step": 9100 + }, + { + "epoch": 3.06, + "grad_norm": 1.3673968315124512, + "learning_rate": 9.133467336683417e-06, + "loss": 0.0204, + "step": 9125 + }, + { + "epoch": 3.07, + "grad_norm": 1.0758674144744873, + "learning_rate": 9.130954773869347e-06, + "loss": 0.021, + "step": 9150 + }, + { + "epoch": 3.08, + "grad_norm": 0.9783607721328735, + "learning_rate": 9.128442211055278e-06, + "loss": 0.0214, + "step": 9175 + }, + { + "epoch": 3.09, + "grad_norm": 1.1596916913986206, + "learning_rate": 9.125929648241205e-06, + "loss": 0.0208, + "step": 9200 + }, + { + "epoch": 3.1, + "grad_norm": 1.1976155042648315, + "learning_rate": 9.123417085427136e-06, + "loss": 0.0218, + "step": 9225 + }, + { + "epoch": 3.11, + "grad_norm": 1.141706943511963, + "learning_rate": 9.120904522613066e-06, + "loss": 0.0203, + "step": 9250 + }, + { + "epoch": 3.11, + "grad_norm": 1.3297709226608276, + "learning_rate": 9.118391959798995e-06, + "loss": 0.0195, + "step": 9275 + }, + { + "epoch": 3.12, + "grad_norm": 1.1250659227371216, + "learning_rate": 9.115879396984926e-06, + "loss": 0.0206, + "step": 9300 + }, + { + "epoch": 3.13, + "grad_norm": 0.8867733478546143, + "learning_rate": 9.113366834170855e-06, + "loss": 0.0207, + "step": 9325 + }, + { + "epoch": 3.14, + "grad_norm": 1.0141582489013672, + "learning_rate": 9.110854271356785e-06, + "loss": 0.0214, + "step": 9350 + }, + { + "epoch": 3.15, + "grad_norm": 1.3556747436523438, + "learning_rate": 9.108341708542714e-06, + "loss": 0.0207, + "step": 9375 + }, + { + "epoch": 3.16, + "grad_norm": 0.9926304221153259, + "learning_rate": 9.105829145728643e-06, + "loss": 0.0214, + "step": 9400 + }, + { + "epoch": 3.16, + "grad_norm": 1.402377724647522, + "learning_rate": 9.103316582914573e-06, + "loss": 0.0206, + "step": 9425 + }, + { + "epoch": 3.17, + "grad_norm": 0.9551749229431152, + "learning_rate": 9.100804020100504e-06, + "loss": 0.0206, + "step": 9450 + }, + { + "epoch": 3.18, + "grad_norm": 1.2923333644866943, + "learning_rate": 9.098291457286433e-06, + "loss": 0.0195, + "step": 9475 + }, + { + "epoch": 3.19, + "grad_norm": 1.0486456155776978, + "learning_rate": 9.095778894472362e-06, + "loss": 0.0215, + "step": 9500 + }, + { + "epoch": 3.2, + "grad_norm": 0.9971963763237, + "learning_rate": 9.093266331658292e-06, + "loss": 0.0215, + "step": 9525 + }, + { + "epoch": 3.21, + "grad_norm": 1.1273738145828247, + "learning_rate": 9.090753768844221e-06, + "loss": 0.0215, + "step": 9550 + }, + { + "epoch": 3.22, + "grad_norm": 1.2288731336593628, + "learning_rate": 9.088241206030152e-06, + "loss": 0.0201, + "step": 9575 + }, + { + "epoch": 3.22, + "grad_norm": 0.9889112114906311, + "learning_rate": 9.085728643216081e-06, + "loss": 0.0215, + "step": 9600 + }, + { + "epoch": 3.23, + "grad_norm": 1.0797648429870605, + "learning_rate": 9.08321608040201e-06, + "loss": 0.0199, + "step": 9625 + }, + { + "epoch": 3.24, + "grad_norm": 1.1388682126998901, + "learning_rate": 9.08070351758794e-06, + "loss": 0.0202, + "step": 9650 + }, + { + "epoch": 3.25, + "grad_norm": 1.1548923254013062, + "learning_rate": 9.07819095477387e-06, + "loss": 0.0209, + "step": 9675 + }, + { + "epoch": 3.26, + "grad_norm": 0.9999666213989258, + "learning_rate": 9.0756783919598e-06, + "loss": 0.021, + "step": 9700 + }, + { + "epoch": 3.27, + "grad_norm": 1.0486820936203003, + "learning_rate": 9.07316582914573e-06, + "loss": 0.0199, + "step": 9725 + }, + { + "epoch": 3.27, + "grad_norm": 0.9334202408790588, + "learning_rate": 9.070653266331659e-06, + "loss": 0.0202, + "step": 9750 + }, + { + "epoch": 3.28, + "grad_norm": 1.1530152559280396, + "learning_rate": 9.068140703517588e-06, + "loss": 0.0206, + "step": 9775 + }, + { + "epoch": 3.29, + "grad_norm": 1.3730748891830444, + "learning_rate": 9.065628140703518e-06, + "loss": 0.0201, + "step": 9800 + }, + { + "epoch": 3.3, + "grad_norm": 1.0372618436813354, + "learning_rate": 9.063115577889447e-06, + "loss": 0.0193, + "step": 9825 + }, + { + "epoch": 3.31, + "grad_norm": 1.283441424369812, + "learning_rate": 9.060603015075378e-06, + "loss": 0.0213, + "step": 9850 + }, + { + "epoch": 3.32, + "grad_norm": 1.1138241291046143, + "learning_rate": 9.058090452261307e-06, + "loss": 0.0212, + "step": 9875 + }, + { + "epoch": 3.32, + "grad_norm": 1.1349443197250366, + "learning_rate": 9.055577889447237e-06, + "loss": 0.0211, + "step": 9900 + }, + { + "epoch": 3.33, + "grad_norm": 0.9560257792472839, + "learning_rate": 9.053065326633168e-06, + "loss": 0.0196, + "step": 9925 + }, + { + "epoch": 3.34, + "grad_norm": 1.356440544128418, + "learning_rate": 9.050552763819095e-06, + "loss": 0.0202, + "step": 9950 + }, + { + "epoch": 3.35, + "grad_norm": 1.2191940546035767, + "learning_rate": 9.048040201005026e-06, + "loss": 0.0226, + "step": 9975 + }, + { + "epoch": 3.36, + "grad_norm": 1.2141897678375244, + "learning_rate": 9.045527638190956e-06, + "loss": 0.0194, + "step": 10000 + }, + { + "epoch": 3.36, + "eval_loss": 0.07093147188425064, + "eval_runtime": 2203.2793, + "eval_samples_per_second": 0.653, + "eval_steps_per_second": 0.653, + "eval_wer": 26.524176594253678, + "step": 10000 + }, + { + "epoch": 3.37, + "grad_norm": 1.1654024124145508, + "learning_rate": 9.043015075376885e-06, + "loss": 0.0212, + "step": 10025 + }, + { + "epoch": 3.37, + "grad_norm": 1.2797439098358154, + "learning_rate": 9.040502512562814e-06, + "loss": 0.0207, + "step": 10050 + }, + { + "epoch": 3.38, + "grad_norm": 1.0568851232528687, + "learning_rate": 9.037989949748744e-06, + "loss": 0.0199, + "step": 10075 + }, + { + "epoch": 3.39, + "grad_norm": 1.2576158046722412, + "learning_rate": 9.035477386934675e-06, + "loss": 0.0197, + "step": 10100 + }, + { + "epoch": 3.4, + "grad_norm": 1.0588635206222534, + "learning_rate": 9.032964824120604e-06, + "loss": 0.0212, + "step": 10125 + }, + { + "epoch": 3.41, + "grad_norm": 1.187954306602478, + "learning_rate": 9.030452261306533e-06, + "loss": 0.021, + "step": 10150 + }, + { + "epoch": 3.42, + "grad_norm": 1.495354175567627, + "learning_rate": 9.027939698492463e-06, + "loss": 0.0222, + "step": 10175 + }, + { + "epoch": 3.43, + "grad_norm": 1.1584885120391846, + "learning_rate": 9.025427135678394e-06, + "loss": 0.0205, + "step": 10200 + }, + { + "epoch": 3.43, + "grad_norm": 1.053249478340149, + "learning_rate": 9.022914572864321e-06, + "loss": 0.0199, + "step": 10225 + }, + { + "epoch": 3.44, + "grad_norm": 1.1618053913116455, + "learning_rate": 9.020402010050252e-06, + "loss": 0.0195, + "step": 10250 + }, + { + "epoch": 3.45, + "grad_norm": 1.1426554918289185, + "learning_rate": 9.017889447236182e-06, + "loss": 0.0206, + "step": 10275 + }, + { + "epoch": 3.46, + "grad_norm": 1.0312869548797607, + "learning_rate": 9.015376884422111e-06, + "loss": 0.0205, + "step": 10300 + }, + { + "epoch": 3.47, + "grad_norm": 1.024728536605835, + "learning_rate": 9.012864321608042e-06, + "loss": 0.0189, + "step": 10325 + }, + { + "epoch": 3.48, + "grad_norm": 0.9234597086906433, + "learning_rate": 9.01035175879397e-06, + "loss": 0.021, + "step": 10350 + }, + { + "epoch": 3.48, + "grad_norm": 1.5802397727966309, + "learning_rate": 9.0078391959799e-06, + "loss": 0.0196, + "step": 10375 + }, + { + "epoch": 3.49, + "grad_norm": 0.9125033020973206, + "learning_rate": 9.00532663316583e-06, + "loss": 0.0217, + "step": 10400 + }, + { + "epoch": 3.5, + "grad_norm": 0.9372380971908569, + "learning_rate": 9.00281407035176e-06, + "loss": 0.0198, + "step": 10425 + }, + { + "epoch": 3.51, + "grad_norm": 1.144573450088501, + "learning_rate": 9.000301507537689e-06, + "loss": 0.0207, + "step": 10450 + }, + { + "epoch": 3.52, + "grad_norm": 1.5041736364364624, + "learning_rate": 8.99778894472362e-06, + "loss": 0.0222, + "step": 10475 + }, + { + "epoch": 3.53, + "grad_norm": 1.2791879177093506, + "learning_rate": 8.995276381909549e-06, + "loss": 0.0214, + "step": 10500 + }, + { + "epoch": 3.53, + "grad_norm": 1.284491777420044, + "learning_rate": 8.992763819095478e-06, + "loss": 0.0201, + "step": 10525 + }, + { + "epoch": 3.54, + "grad_norm": 1.1425551176071167, + "learning_rate": 8.990251256281408e-06, + "loss": 0.0201, + "step": 10550 + }, + { + "epoch": 3.55, + "grad_norm": 0.9736939072608948, + "learning_rate": 8.987738693467337e-06, + "loss": 0.0197, + "step": 10575 + }, + { + "epoch": 3.56, + "grad_norm": 0.9984138011932373, + "learning_rate": 8.98532663316583e-06, + "loss": 0.0206, + "step": 10600 + }, + { + "epoch": 3.57, + "grad_norm": 1.4503074884414673, + "learning_rate": 8.98281407035176e-06, + "loss": 0.0195, + "step": 10625 + }, + { + "epoch": 3.58, + "grad_norm": 1.1678603887557983, + "learning_rate": 8.980301507537689e-06, + "loss": 0.0213, + "step": 10650 + }, + { + "epoch": 3.58, + "grad_norm": 1.0813093185424805, + "learning_rate": 8.977788944723618e-06, + "loss": 0.0197, + "step": 10675 + }, + { + "epoch": 3.59, + "grad_norm": 1.4429411888122559, + "learning_rate": 8.975276381909549e-06, + "loss": 0.0196, + "step": 10700 + }, + { + "epoch": 3.6, + "grad_norm": 1.4884651899337769, + "learning_rate": 8.972763819095478e-06, + "loss": 0.0205, + "step": 10725 + }, + { + "epoch": 3.61, + "grad_norm": 1.4401743412017822, + "learning_rate": 8.970251256281408e-06, + "loss": 0.0198, + "step": 10750 + }, + { + "epoch": 3.62, + "grad_norm": 1.2085216045379639, + "learning_rate": 8.967738693467337e-06, + "loss": 0.02, + "step": 10775 + }, + { + "epoch": 3.63, + "grad_norm": 1.544008731842041, + "learning_rate": 8.965226130653268e-06, + "loss": 0.0197, + "step": 10800 + }, + { + "epoch": 3.63, + "grad_norm": 1.2484915256500244, + "learning_rate": 8.962713567839196e-06, + "loss": 0.0217, + "step": 10825 + }, + { + "epoch": 3.64, + "grad_norm": 1.400039792060852, + "learning_rate": 8.960201005025127e-06, + "loss": 0.019, + "step": 10850 + }, + { + "epoch": 3.65, + "grad_norm": 0.9738909602165222, + "learning_rate": 8.957688442211056e-06, + "loss": 0.0209, + "step": 10875 + }, + { + "epoch": 3.66, + "grad_norm": 1.314047932624817, + "learning_rate": 8.955175879396985e-06, + "loss": 0.0195, + "step": 10900 + }, + { + "epoch": 3.67, + "grad_norm": 1.3660850524902344, + "learning_rate": 8.952663316582916e-06, + "loss": 0.0202, + "step": 10925 + }, + { + "epoch": 3.68, + "grad_norm": 1.1629201173782349, + "learning_rate": 8.950150753768844e-06, + "loss": 0.0192, + "step": 10950 + }, + { + "epoch": 3.69, + "grad_norm": 1.0940172672271729, + "learning_rate": 8.947638190954775e-06, + "loss": 0.0207, + "step": 10975 + }, + { + "epoch": 3.69, + "grad_norm": 1.1368433237075806, + "learning_rate": 8.945125628140704e-06, + "loss": 0.0208, + "step": 11000 + }, + { + "epoch": 3.69, + "eval_loss": 0.0689547061920166, + "eval_runtime": 2225.8789, + "eval_samples_per_second": 0.646, + "eval_steps_per_second": 0.646, + "eval_wer": 26.31394533987386, + "step": 11000 + }, + { + "epoch": 3.7, + "grad_norm": 1.216958999633789, + "learning_rate": 8.942613065326634e-06, + "loss": 0.0187, + "step": 11025 + }, + { + "epoch": 3.71, + "grad_norm": 1.0632380247116089, + "learning_rate": 8.940100502512563e-06, + "loss": 0.0197, + "step": 11050 + }, + { + "epoch": 3.72, + "grad_norm": 1.1978670358657837, + "learning_rate": 8.937587939698494e-06, + "loss": 0.0188, + "step": 11075 + }, + { + "epoch": 3.73, + "grad_norm": 1.2583171129226685, + "learning_rate": 8.935075376884423e-06, + "loss": 0.0191, + "step": 11100 + }, + { + "epoch": 3.74, + "grad_norm": 1.1806267499923706, + "learning_rate": 8.932562814070353e-06, + "loss": 0.0207, + "step": 11125 + }, + { + "epoch": 3.74, + "grad_norm": 1.2513149976730347, + "learning_rate": 8.930050251256282e-06, + "loss": 0.0193, + "step": 11150 + }, + { + "epoch": 3.75, + "grad_norm": 1.3589982986450195, + "learning_rate": 8.927537688442211e-06, + "loss": 0.022, + "step": 11175 + }, + { + "epoch": 3.76, + "grad_norm": 1.2002583742141724, + "learning_rate": 8.925025125628142e-06, + "loss": 0.0201, + "step": 11200 + }, + { + "epoch": 3.77, + "grad_norm": 1.006583333015442, + "learning_rate": 8.92251256281407e-06, + "loss": 0.0192, + "step": 11225 + }, + { + "epoch": 3.78, + "grad_norm": 1.2450827360153198, + "learning_rate": 8.920000000000001e-06, + "loss": 0.019, + "step": 11250 + }, + { + "epoch": 3.79, + "grad_norm": 1.3628928661346436, + "learning_rate": 8.91748743718593e-06, + "loss": 0.0212, + "step": 11275 + }, + { + "epoch": 3.79, + "grad_norm": 1.1357342004776, + "learning_rate": 8.91497487437186e-06, + "loss": 0.0207, + "step": 11300 + }, + { + "epoch": 3.8, + "grad_norm": 1.1282624006271362, + "learning_rate": 8.91246231155779e-06, + "loss": 0.0191, + "step": 11325 + }, + { + "epoch": 3.81, + "grad_norm": 1.0978237390518188, + "learning_rate": 8.90994974874372e-06, + "loss": 0.0184, + "step": 11350 + }, + { + "epoch": 3.82, + "grad_norm": 1.1369669437408447, + "learning_rate": 8.90743718592965e-06, + "loss": 0.0205, + "step": 11375 + }, + { + "epoch": 3.83, + "grad_norm": 1.4883924722671509, + "learning_rate": 8.904924623115579e-06, + "loss": 0.0216, + "step": 11400 + }, + { + "epoch": 3.84, + "grad_norm": 1.3058867454528809, + "learning_rate": 8.902412060301508e-06, + "loss": 0.0189, + "step": 11425 + }, + { + "epoch": 3.84, + "grad_norm": 1.3146320581436157, + "learning_rate": 8.899899497487437e-06, + "loss": 0.0191, + "step": 11450 + }, + { + "epoch": 3.85, + "grad_norm": 1.1274038553237915, + "learning_rate": 8.897386934673368e-06, + "loss": 0.02, + "step": 11475 + }, + { + "epoch": 3.86, + "grad_norm": 1.1110966205596924, + "learning_rate": 8.894874371859296e-06, + "loss": 0.0213, + "step": 11500 + }, + { + "epoch": 3.87, + "grad_norm": 1.4053088426589966, + "learning_rate": 8.892361809045227e-06, + "loss": 0.0214, + "step": 11525 + }, + { + "epoch": 3.88, + "grad_norm": 1.317447543144226, + "learning_rate": 8.889849246231156e-06, + "loss": 0.0199, + "step": 11550 + }, + { + "epoch": 3.89, + "grad_norm": 1.065755844116211, + "learning_rate": 8.887336683417086e-06, + "loss": 0.0173, + "step": 11575 + }, + { + "epoch": 3.9, + "grad_norm": 1.2681610584259033, + "learning_rate": 8.884824120603017e-06, + "loss": 0.0187, + "step": 11600 + }, + { + "epoch": 3.9, + "grad_norm": 1.0049889087677002, + "learning_rate": 8.882311557788946e-06, + "loss": 0.02, + "step": 11625 + }, + { + "epoch": 3.91, + "grad_norm": 1.0553700923919678, + "learning_rate": 8.879798994974875e-06, + "loss": 0.0198, + "step": 11650 + }, + { + "epoch": 3.92, + "grad_norm": 1.1015956401824951, + "learning_rate": 8.877286432160805e-06, + "loss": 0.0189, + "step": 11675 + }, + { + "epoch": 3.93, + "grad_norm": 1.3183544874191284, + "learning_rate": 8.874773869346734e-06, + "loss": 0.0185, + "step": 11700 + }, + { + "epoch": 3.94, + "grad_norm": 1.1317213773727417, + "learning_rate": 8.872261306532665e-06, + "loss": 0.0205, + "step": 11725 + }, + { + "epoch": 3.95, + "grad_norm": 1.2522101402282715, + "learning_rate": 8.869748743718594e-06, + "loss": 0.0202, + "step": 11750 + }, + { + "epoch": 3.95, + "grad_norm": 1.2194178104400635, + "learning_rate": 8.867236180904524e-06, + "loss": 0.0209, + "step": 11775 + }, + { + "epoch": 3.96, + "grad_norm": 1.118335247039795, + "learning_rate": 8.864723618090453e-06, + "loss": 0.0205, + "step": 11800 + }, + { + "epoch": 3.97, + "grad_norm": 1.1367900371551514, + "learning_rate": 8.862211055276382e-06, + "loss": 0.0203, + "step": 11825 + }, + { + "epoch": 3.98, + "grad_norm": 1.4416879415512085, + "learning_rate": 8.859698492462312e-06, + "loss": 0.0195, + "step": 11850 + }, + { + "epoch": 3.99, + "grad_norm": 1.1529754400253296, + "learning_rate": 8.857185929648243e-06, + "loss": 0.0213, + "step": 11875 + }, + { + "epoch": 4.0, + "grad_norm": 1.152527928352356, + "learning_rate": 8.854673366834172e-06, + "loss": 0.0185, + "step": 11900 + }, + { + "epoch": 4.0, + "grad_norm": 1.2627662420272827, + "learning_rate": 8.852160804020101e-06, + "loss": 0.0147, + "step": 11925 + }, + { + "epoch": 4.01, + "grad_norm": 0.630208432674408, + "learning_rate": 8.849648241206032e-06, + "loss": 0.0097, + "step": 11950 + }, + { + "epoch": 4.02, + "grad_norm": 0.9759485721588135, + "learning_rate": 8.84713567839196e-06, + "loss": 0.0097, + "step": 11975 + }, + { + "epoch": 4.03, + "grad_norm": 1.0542025566101074, + "learning_rate": 8.844623115577891e-06, + "loss": 0.0097, + "step": 12000 + }, + { + "epoch": 4.03, + "eval_loss": 0.07671801000833511, + "eval_runtime": 2218.0909, + "eval_samples_per_second": 0.648, + "eval_steps_per_second": 0.648, + "eval_wer": 25.718290119131044, + "step": 12000 + }, + { + "epoch": 4.04, + "grad_norm": 1.0903425216674805, + "learning_rate": 8.84211055276382e-06, + "loss": 0.0093, + "step": 12025 + }, + { + "epoch": 4.05, + "grad_norm": 0.9077092409133911, + "learning_rate": 8.83959798994975e-06, + "loss": 0.009, + "step": 12050 + }, + { + "epoch": 4.05, + "grad_norm": 1.0161209106445312, + "learning_rate": 8.837085427135679e-06, + "loss": 0.0103, + "step": 12075 + }, + { + "epoch": 4.06, + "grad_norm": 0.9369003772735596, + "learning_rate": 8.834572864321608e-06, + "loss": 0.0096, + "step": 12100 + }, + { + "epoch": 4.07, + "grad_norm": 0.7047864198684692, + "learning_rate": 8.832060301507537e-06, + "loss": 0.0096, + "step": 12125 + }, + { + "epoch": 4.08, + "grad_norm": 0.9905818104743958, + "learning_rate": 8.829547738693468e-06, + "loss": 0.0096, + "step": 12150 + }, + { + "epoch": 4.09, + "grad_norm": 0.8698284029960632, + "learning_rate": 8.827035175879398e-06, + "loss": 0.0096, + "step": 12175 + }, + { + "epoch": 4.1, + "grad_norm": 0.8159314393997192, + "learning_rate": 8.824522613065327e-06, + "loss": 0.01, + "step": 12200 + }, + { + "epoch": 4.11, + "grad_norm": 1.2592952251434326, + "learning_rate": 8.822010050251258e-06, + "loss": 0.0102, + "step": 12225 + }, + { + "epoch": 4.11, + "grad_norm": 0.7465018630027771, + "learning_rate": 8.819497487437186e-06, + "loss": 0.0092, + "step": 12250 + }, + { + "epoch": 4.12, + "grad_norm": 0.984822690486908, + "learning_rate": 8.816984924623117e-06, + "loss": 0.01, + "step": 12275 + }, + { + "epoch": 4.13, + "grad_norm": 1.1753443479537964, + "learning_rate": 8.814472361809046e-06, + "loss": 0.01, + "step": 12300 + }, + { + "epoch": 4.14, + "grad_norm": 1.0591659545898438, + "learning_rate": 8.811959798994975e-06, + "loss": 0.0102, + "step": 12325 + }, + { + "epoch": 4.15, + "grad_norm": 0.9369853138923645, + "learning_rate": 8.809447236180905e-06, + "loss": 0.0102, + "step": 12350 + }, + { + "epoch": 4.16, + "grad_norm": 0.9273166060447693, + "learning_rate": 8.806934673366834e-06, + "loss": 0.0097, + "step": 12375 + }, + { + "epoch": 4.16, + "grad_norm": 0.8711921572685242, + "learning_rate": 8.804422110552765e-06, + "loss": 0.0109, + "step": 12400 + }, + { + "epoch": 4.17, + "grad_norm": 1.0128122568130493, + "learning_rate": 8.801909547738694e-06, + "loss": 0.0106, + "step": 12425 + }, + { + "epoch": 4.18, + "grad_norm": 1.1748135089874268, + "learning_rate": 8.799396984924624e-06, + "loss": 0.0099, + "step": 12450 + }, + { + "epoch": 4.19, + "grad_norm": 1.01030695438385, + "learning_rate": 8.796884422110553e-06, + "loss": 0.0108, + "step": 12475 + }, + { + "epoch": 4.2, + "grad_norm": 0.9983416795730591, + "learning_rate": 8.794371859296484e-06, + "loss": 0.0101, + "step": 12500 + }, + { + "epoch": 4.21, + "grad_norm": 1.2152098417282104, + "learning_rate": 8.791859296482412e-06, + "loss": 0.0098, + "step": 12525 + }, + { + "epoch": 4.21, + "grad_norm": 1.2319709062576294, + "learning_rate": 8.789346733668343e-06, + "loss": 0.0104, + "step": 12550 + }, + { + "epoch": 4.22, + "grad_norm": 1.3658519983291626, + "learning_rate": 8.786834170854272e-06, + "loss": 0.0108, + "step": 12575 + }, + { + "epoch": 4.23, + "grad_norm": 1.9016681909561157, + "learning_rate": 8.784321608040201e-06, + "loss": 0.0099, + "step": 12600 + }, + { + "epoch": 4.24, + "grad_norm": 0.9044590592384338, + "learning_rate": 8.781809045226132e-06, + "loss": 0.0104, + "step": 12625 + }, + { + "epoch": 4.25, + "grad_norm": 1.0562890768051147, + "learning_rate": 8.77929648241206e-06, + "loss": 0.0109, + "step": 12650 + }, + { + "epoch": 4.26, + "grad_norm": 1.1044700145721436, + "learning_rate": 8.776783919597991e-06, + "loss": 0.0116, + "step": 12675 + }, + { + "epoch": 4.26, + "grad_norm": 1.037757396697998, + "learning_rate": 8.77427135678392e-06, + "loss": 0.0105, + "step": 12700 + }, + { + "epoch": 4.27, + "grad_norm": 0.9418870210647583, + "learning_rate": 8.77175879396985e-06, + "loss": 0.0114, + "step": 12725 + }, + { + "epoch": 4.28, + "grad_norm": 1.2983318567276, + "learning_rate": 8.769346733668343e-06, + "loss": 0.0121, + "step": 12750 + }, + { + "epoch": 4.29, + "grad_norm": 1.0516164302825928, + "learning_rate": 8.766834170854272e-06, + "loss": 0.0108, + "step": 12775 + }, + { + "epoch": 4.3, + "grad_norm": 1.3343629837036133, + "learning_rate": 8.764321608040202e-06, + "loss": 0.0116, + "step": 12800 + }, + { + "epoch": 4.31, + "grad_norm": 1.0538562536239624, + "learning_rate": 8.761809045226131e-06, + "loss": 0.0108, + "step": 12825 + }, + { + "epoch": 4.31, + "grad_norm": 0.9237893223762512, + "learning_rate": 8.75929648241206e-06, + "loss": 0.0114, + "step": 12850 + }, + { + "epoch": 4.32, + "grad_norm": 0.9574481248855591, + "learning_rate": 8.756783919597991e-06, + "loss": 0.011, + "step": 12875 + }, + { + "epoch": 4.33, + "grad_norm": 1.0971503257751465, + "learning_rate": 8.75427135678392e-06, + "loss": 0.011, + "step": 12900 + }, + { + "epoch": 4.34, + "grad_norm": 0.8400355577468872, + "learning_rate": 8.75175879396985e-06, + "loss": 0.0101, + "step": 12925 + }, + { + "epoch": 4.35, + "grad_norm": 0.8911335468292236, + "learning_rate": 8.74924623115578e-06, + "loss": 0.0105, + "step": 12950 + }, + { + "epoch": 4.36, + "grad_norm": 1.2806458473205566, + "learning_rate": 8.746733668341709e-06, + "loss": 0.0103, + "step": 12975 + }, + { + "epoch": 4.37, + "grad_norm": 0.8399025201797485, + "learning_rate": 8.74422110552764e-06, + "loss": 0.0099, + "step": 13000 + }, + { + "epoch": 4.37, + "eval_loss": 0.0814567357301712, + "eval_runtime": 2184.9526, + "eval_samples_per_second": 0.658, + "eval_steps_per_second": 0.658, + "eval_wer": 26.594253679046954, + "step": 13000 + }, + { + "epoch": 4.37, + "grad_norm": 1.1353908777236938, + "learning_rate": 8.741708542713569e-06, + "loss": 0.0103, + "step": 13025 + }, + { + "epoch": 4.38, + "grad_norm": 1.1343674659729004, + "learning_rate": 8.739195979899498e-06, + "loss": 0.0116, + "step": 13050 + }, + { + "epoch": 4.39, + "grad_norm": 0.9133092761039734, + "learning_rate": 8.736683417085428e-06, + "loss": 0.0114, + "step": 13075 + }, + { + "epoch": 4.4, + "grad_norm": 0.9172067642211914, + "learning_rate": 8.734170854271357e-06, + "loss": 0.0105, + "step": 13100 + }, + { + "epoch": 4.41, + "grad_norm": 0.8791282176971436, + "learning_rate": 8.731658291457286e-06, + "loss": 0.0111, + "step": 13125 + }, + { + "epoch": 4.42, + "grad_norm": 1.261222004890442, + "learning_rate": 8.729145728643217e-06, + "loss": 0.0113, + "step": 13150 + }, + { + "epoch": 4.42, + "grad_norm": 1.037269949913025, + "learning_rate": 8.726633165829147e-06, + "loss": 0.0105, + "step": 13175 + }, + { + "epoch": 4.43, + "grad_norm": 0.9850906729698181, + "learning_rate": 8.724120603015076e-06, + "loss": 0.0112, + "step": 13200 + }, + { + "epoch": 4.44, + "grad_norm": 1.4384980201721191, + "learning_rate": 8.721608040201007e-06, + "loss": 0.0114, + "step": 13225 + }, + { + "epoch": 4.45, + "grad_norm": 0.843083918094635, + "learning_rate": 8.719095477386934e-06, + "loss": 0.0106, + "step": 13250 + }, + { + "epoch": 4.46, + "grad_norm": 1.214704990386963, + "learning_rate": 8.716582914572866e-06, + "loss": 0.0121, + "step": 13275 + }, + { + "epoch": 4.47, + "grad_norm": 1.1973131895065308, + "learning_rate": 8.714070351758795e-06, + "loss": 0.0105, + "step": 13300 + }, + { + "epoch": 4.47, + "grad_norm": 0.8236217498779297, + "learning_rate": 8.711557788944724e-06, + "loss": 0.0114, + "step": 13325 + }, + { + "epoch": 4.48, + "grad_norm": 0.7302666902542114, + "learning_rate": 8.709045226130653e-06, + "loss": 0.0108, + "step": 13350 + }, + { + "epoch": 4.49, + "grad_norm": 0.8673868179321289, + "learning_rate": 8.706532663316584e-06, + "loss": 0.011, + "step": 13375 + }, + { + "epoch": 4.5, + "grad_norm": 1.2909928560256958, + "learning_rate": 8.704020100502514e-06, + "loss": 0.0112, + "step": 13400 + }, + { + "epoch": 4.51, + "grad_norm": 0.9593124985694885, + "learning_rate": 8.701507537688443e-06, + "loss": 0.0107, + "step": 13425 + }, + { + "epoch": 4.52, + "grad_norm": 1.0825756788253784, + "learning_rate": 8.698994974874372e-06, + "loss": 0.0109, + "step": 13450 + }, + { + "epoch": 4.52, + "grad_norm": 0.8791283965110779, + "learning_rate": 8.696482412060302e-06, + "loss": 0.0101, + "step": 13475 + }, + { + "epoch": 4.53, + "grad_norm": 1.0033494234085083, + "learning_rate": 8.693969849246233e-06, + "loss": 0.0115, + "step": 13500 + }, + { + "epoch": 4.54, + "grad_norm": 1.1432744264602661, + "learning_rate": 8.69145728643216e-06, + "loss": 0.0111, + "step": 13525 + }, + { + "epoch": 4.55, + "grad_norm": 0.8269137144088745, + "learning_rate": 8.688944723618091e-06, + "loss": 0.0113, + "step": 13550 + }, + { + "epoch": 4.56, + "grad_norm": 1.2070598602294922, + "learning_rate": 8.68643216080402e-06, + "loss": 0.0106, + "step": 13575 + }, + { + "epoch": 4.57, + "grad_norm": 1.2351694107055664, + "learning_rate": 8.68391959798995e-06, + "loss": 0.0106, + "step": 13600 + }, + { + "epoch": 4.58, + "grad_norm": 0.9835574626922607, + "learning_rate": 8.681407035175881e-06, + "loss": 0.0108, + "step": 13625 + }, + { + "epoch": 4.58, + "grad_norm": 1.0029293298721313, + "learning_rate": 8.67889447236181e-06, + "loss": 0.012, + "step": 13650 + }, + { + "epoch": 4.59, + "grad_norm": 1.0094988346099854, + "learning_rate": 8.67638190954774e-06, + "loss": 0.0129, + "step": 13675 + }, + { + "epoch": 4.6, + "grad_norm": 1.1272472143173218, + "learning_rate": 8.673869346733669e-06, + "loss": 0.0098, + "step": 13700 + }, + { + "epoch": 4.61, + "grad_norm": 1.0893923044204712, + "learning_rate": 8.671356783919598e-06, + "loss": 0.0109, + "step": 13725 + }, + { + "epoch": 4.62, + "grad_norm": 1.2676475048065186, + "learning_rate": 8.668844221105528e-06, + "loss": 0.0113, + "step": 13750 + }, + { + "epoch": 4.63, + "grad_norm": 1.2751808166503906, + "learning_rate": 8.666331658291459e-06, + "loss": 0.0106, + "step": 13775 + }, + { + "epoch": 4.63, + "grad_norm": 0.9188461899757385, + "learning_rate": 8.663819095477388e-06, + "loss": 0.0107, + "step": 13800 + }, + { + "epoch": 4.64, + "grad_norm": 1.101523756980896, + "learning_rate": 8.661306532663317e-06, + "loss": 0.0111, + "step": 13825 + }, + { + "epoch": 4.65, + "grad_norm": 1.0097473859786987, + "learning_rate": 8.658793969849247e-06, + "loss": 0.0113, + "step": 13850 + }, + { + "epoch": 4.66, + "grad_norm": 0.9993265271186829, + "learning_rate": 8.656281407035176e-06, + "loss": 0.0103, + "step": 13875 + }, + { + "epoch": 4.67, + "grad_norm": 1.0219284296035767, + "learning_rate": 8.653768844221107e-06, + "loss": 0.0113, + "step": 13900 + }, + { + "epoch": 4.68, + "grad_norm": 1.0482418537139893, + "learning_rate": 8.651256281407036e-06, + "loss": 0.01, + "step": 13925 + }, + { + "epoch": 4.68, + "grad_norm": 1.2051645517349243, + "learning_rate": 8.648743718592966e-06, + "loss": 0.0119, + "step": 13950 + }, + { + "epoch": 4.69, + "grad_norm": 1.0789493322372437, + "learning_rate": 8.646231155778895e-06, + "loss": 0.0116, + "step": 13975 + }, + { + "epoch": 4.7, + "grad_norm": 1.0950485467910767, + "learning_rate": 8.643718592964824e-06, + "loss": 0.0112, + "step": 14000 + }, + { + "epoch": 4.7, + "eval_loss": 0.08312532305717468, + "eval_runtime": 2186.931, + "eval_samples_per_second": 0.658, + "eval_steps_per_second": 0.658, + "eval_wer": 26.129992992291523, + "step": 14000 + }, + { + "epoch": 4.71, + "grad_norm": 0.9560486078262329, + "learning_rate": 8.641206030150755e-06, + "loss": 0.0117, + "step": 14025 + }, + { + "epoch": 4.72, + "grad_norm": 1.2727199792861938, + "learning_rate": 8.638693467336685e-06, + "loss": 0.0113, + "step": 14050 + }, + { + "epoch": 4.73, + "grad_norm": 1.2321146726608276, + "learning_rate": 8.636180904522614e-06, + "loss": 0.0117, + "step": 14075 + }, + { + "epoch": 4.73, + "grad_norm": 0.9502439498901367, + "learning_rate": 8.633668341708543e-06, + "loss": 0.0112, + "step": 14100 + }, + { + "epoch": 4.74, + "grad_norm": 1.1416475772857666, + "learning_rate": 8.631155778894473e-06, + "loss": 0.0113, + "step": 14125 + }, + { + "epoch": 4.75, + "grad_norm": 1.085327386856079, + "learning_rate": 8.628643216080402e-06, + "loss": 0.0116, + "step": 14150 + }, + { + "epoch": 4.76, + "grad_norm": 1.0319383144378662, + "learning_rate": 8.626130653266333e-06, + "loss": 0.011, + "step": 14175 + }, + { + "epoch": 4.77, + "grad_norm": 1.1536074876785278, + "learning_rate": 8.623618090452262e-06, + "loss": 0.011, + "step": 14200 + }, + { + "epoch": 4.78, + "grad_norm": 1.141798734664917, + "learning_rate": 8.621105527638192e-06, + "loss": 0.0104, + "step": 14225 + }, + { + "epoch": 4.79, + "grad_norm": 1.0708343982696533, + "learning_rate": 8.618592964824121e-06, + "loss": 0.0113, + "step": 14250 + }, + { + "epoch": 4.79, + "grad_norm": 1.1060547828674316, + "learning_rate": 8.61608040201005e-06, + "loss": 0.012, + "step": 14275 + }, + { + "epoch": 4.8, + "grad_norm": 1.1168172359466553, + "learning_rate": 8.613567839195981e-06, + "loss": 0.0114, + "step": 14300 + }, + { + "epoch": 4.81, + "grad_norm": 1.1789426803588867, + "learning_rate": 8.61105527638191e-06, + "loss": 0.0106, + "step": 14325 + }, + { + "epoch": 4.82, + "grad_norm": 0.9686365127563477, + "learning_rate": 8.60854271356784e-06, + "loss": 0.0101, + "step": 14350 + }, + { + "epoch": 4.83, + "grad_norm": 1.6355328559875488, + "learning_rate": 8.60603015075377e-06, + "loss": 0.0112, + "step": 14375 + }, + { + "epoch": 4.84, + "grad_norm": 0.9446016550064087, + "learning_rate": 8.603517587939699e-06, + "loss": 0.0112, + "step": 14400 + }, + { + "epoch": 4.84, + "grad_norm": 1.2103934288024902, + "learning_rate": 8.601005025125628e-06, + "loss": 0.0105, + "step": 14425 + }, + { + "epoch": 4.85, + "grad_norm": 0.8974562883377075, + "learning_rate": 8.598492462311559e-06, + "loss": 0.0114, + "step": 14450 + }, + { + "epoch": 4.86, + "grad_norm": 1.3880276679992676, + "learning_rate": 8.595979899497488e-06, + "loss": 0.0121, + "step": 14475 + }, + { + "epoch": 4.87, + "grad_norm": 0.9181758165359497, + "learning_rate": 8.593467336683418e-06, + "loss": 0.0112, + "step": 14500 + }, + { + "epoch": 4.88, + "grad_norm": 1.5292812585830688, + "learning_rate": 8.590954773869347e-06, + "loss": 0.011, + "step": 14525 + }, + { + "epoch": 4.89, + "grad_norm": 1.1014959812164307, + "learning_rate": 8.588442211055276e-06, + "loss": 0.0115, + "step": 14550 + }, + { + "epoch": 4.89, + "grad_norm": 0.8538802266120911, + "learning_rate": 8.585929648241207e-06, + "loss": 0.0117, + "step": 14575 + }, + { + "epoch": 4.9, + "grad_norm": 0.7399652004241943, + "learning_rate": 8.583417085427137e-06, + "loss": 0.0115, + "step": 14600 + }, + { + "epoch": 4.91, + "grad_norm": 0.7713037729263306, + "learning_rate": 8.580904522613066e-06, + "loss": 0.0102, + "step": 14625 + }, + { + "epoch": 4.92, + "grad_norm": 0.7357013821601868, + "learning_rate": 8.578391959798997e-06, + "loss": 0.0107, + "step": 14650 + }, + { + "epoch": 4.93, + "grad_norm": 1.0477229356765747, + "learning_rate": 8.575879396984925e-06, + "loss": 0.0115, + "step": 14675 + }, + { + "epoch": 4.94, + "grad_norm": 1.1834394931793213, + "learning_rate": 8.573366834170856e-06, + "loss": 0.0104, + "step": 14700 + }, + { + "epoch": 4.94, + "grad_norm": 1.4713468551635742, + "learning_rate": 8.570854271356785e-06, + "loss": 0.0123, + "step": 14725 + }, + { + "epoch": 4.95, + "grad_norm": 1.037386178970337, + "learning_rate": 8.568341708542714e-06, + "loss": 0.0106, + "step": 14750 + }, + { + "epoch": 4.96, + "grad_norm": 1.0278129577636719, + "learning_rate": 8.565829145728644e-06, + "loss": 0.0111, + "step": 14775 + }, + { + "epoch": 4.97, + "grad_norm": 0.8479203581809998, + "learning_rate": 8.563316582914573e-06, + "loss": 0.01, + "step": 14800 + }, + { + "epoch": 4.98, + "grad_norm": 1.02461576461792, + "learning_rate": 8.560804020100502e-06, + "loss": 0.0109, + "step": 14825 + }, + { + "epoch": 4.99, + "grad_norm": 0.8603954911231995, + "learning_rate": 8.558391959798995e-06, + "loss": 0.0104, + "step": 14850 + }, + { + "epoch": 4.99, + "grad_norm": 0.9691808819770813, + "learning_rate": 8.555879396984925e-06, + "loss": 0.0107, + "step": 14875 + }, + { + "epoch": 5.0, + "grad_norm": 0.5671743750572205, + "learning_rate": 8.553366834170856e-06, + "loss": 0.0089, + "step": 14900 + }, + { + "epoch": 5.01, + "grad_norm": 0.7860472202301025, + "learning_rate": 8.550854271356785e-06, + "loss": 0.0055, + "step": 14925 + }, + { + "epoch": 5.02, + "grad_norm": 0.7847031354904175, + "learning_rate": 8.548341708542714e-06, + "loss": 0.0051, + "step": 14950 + }, + { + "epoch": 5.03, + "grad_norm": 0.7335106730461121, + "learning_rate": 8.545829145728644e-06, + "loss": 0.0053, + "step": 14975 + }, + { + "epoch": 5.04, + "grad_norm": 0.5943393707275391, + "learning_rate": 8.543316582914573e-06, + "loss": 0.0055, + "step": 15000 + }, + { + "epoch": 5.04, + "eval_loss": 0.08730152994394302, + "eval_runtime": 2202.8821, + "eval_samples_per_second": 0.653, + "eval_steps_per_second": 0.653, + "eval_wer": 26.23510861948143, + "step": 15000 + } + ], + "logging_steps": 25, + "max_steps": 100000, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 1000, + "total_flos": 1.384979053215744e+20, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/whisper-small/telugu/checkpoint-15000/training_args.bin b/checkpoints/whisper-small/telugu/checkpoint-15000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c399a963304b21623aa77bef1761cea72ac14868 --- /dev/null +++ b/checkpoints/whisper-small/telugu/checkpoint-15000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eec09d90290931996f70757cca6b46529dc08cede70f6e4d9eda09604737feee +size 4667 diff --git a/checkpoints/whisper-tiny/bengali/checkpoint-24000/config.json b/checkpoints/whisper-tiny/bengali/checkpoint-24000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..016474873b18aa450490c38e41d6e46d52905326 --- /dev/null +++ b/checkpoints/whisper-tiny/bengali/checkpoint-24000/config.json @@ -0,0 +1,154 @@ +{ + "_name_or_path": "openai/whisper-tiny", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 384, + "decoder_attention_heads": 6, + "decoder_ffn_dim": 1536, + "decoder_layerdrop": 0.0, + "decoder_layers": 4, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 6, + "encoder_ffn_dim": 1536, + "encoder_layerdrop": 0.0, + "encoder_layers": 4, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50302 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 4, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "torch_dtype": "float32", + "transformers_version": "4.39.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoints/whisper-tiny/bengali/checkpoint-24000/generation_config.json b/checkpoints/whisper-tiny/bengali/checkpoint-24000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c4857895fba6cdefb862460b5d33969e1892aa71 --- /dev/null +++ b/checkpoints/whisper-tiny/bengali/checkpoint-24000/generation_config.json @@ -0,0 +1,248 @@ +{ + "alignment_heads": [ + [ + 2, + 2 + ], + [ + 3, + 0 + ], + [ + 3, + 2 + ], + [ + 3, + 3 + ], + [ + 3, + 4 + ], + [ + 3, + 5 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.39.2" +} diff --git a/checkpoints/whisper-tiny/bengali/checkpoint-24000/model.safetensors b/checkpoints/whisper-tiny/bengali/checkpoint-24000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5f4f5e9ffafa8e9b269a2576111fd8646a4ab565 --- /dev/null +++ b/checkpoints/whisper-tiny/bengali/checkpoint-24000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2cac10ad4d4f9af918a596f32a706c4ed042ca20f31eea96204a1504348592b +size 151061672 diff --git a/checkpoints/whisper-tiny/bengali/checkpoint-24000/optimizer.pt b/checkpoints/whisper-tiny/bengali/checkpoint-24000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5450bd9a366a2d3b75462296935645d45923b4f0 --- /dev/null +++ b/checkpoints/whisper-tiny/bengali/checkpoint-24000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:253af6c7d40e0e1fbc51f53fdb89045127e9f7ee646e46e7acd8a7d3cfb43e97 +size 297615749 diff --git a/checkpoints/whisper-tiny/bengali/checkpoint-24000/preprocessor_config.json b/checkpoints/whisper-tiny/bengali/checkpoint-24000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoints/whisper-tiny/bengali/checkpoint-24000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoints/whisper-tiny/bengali/checkpoint-24000/rng_state.pth b/checkpoints/whisper-tiny/bengali/checkpoint-24000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..bb0c199e751e3eecfa1e31dede88c6faa0315830 --- /dev/null +++ b/checkpoints/whisper-tiny/bengali/checkpoint-24000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6af54fa6568f862fc89bdc093a98576f63e5a2bea29d38494d1c52a412a63db +size 14575 diff --git a/checkpoints/whisper-tiny/bengali/checkpoint-24000/scheduler.pt b/checkpoints/whisper-tiny/bengali/checkpoint-24000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3f125a2a3a0e1e24b444e82e2c4cbfb087fc9168 --- /dev/null +++ b/checkpoints/whisper-tiny/bengali/checkpoint-24000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bc672aa3c9e2038c7eab9551798d77eff82ab1cb587df10c2323892c34b0ff2 +size 627 diff --git a/checkpoints/whisper-tiny/bengali/checkpoint-24000/trainer_state.json b/checkpoints/whisper-tiny/bengali/checkpoint-24000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e90b34f11f4405ff30f013b203b790c57481e175 --- /dev/null +++ b/checkpoints/whisper-tiny/bengali/checkpoint-24000/trainer_state.json @@ -0,0 +1,6957 @@ +{ + "best_metric": 31.940068613036477, + "best_model_checkpoint": "results/whisper-tiny/bengali/checkpoint-14000", + "epoch": 17.89709172259508, + "eval_steps": 1000, + "global_step": 24000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "grad_norm": 86.77481842041016, + "learning_rate": 4.4e-07, + "loss": 3.1552, + "step": 25 + }, + { + "epoch": 0.04, + "grad_norm": 31.03898811340332, + "learning_rate": 9.400000000000001e-07, + "loss": 2.6774, + "step": 50 + }, + { + "epoch": 0.06, + "grad_norm": 11.466958045959473, + "learning_rate": 1.44e-06, + "loss": 2.1626, + "step": 75 + }, + { + "epoch": 0.07, + "grad_norm": 7.000813961029053, + "learning_rate": 1.94e-06, + "loss": 1.7902, + "step": 100 + }, + { + "epoch": 0.09, + "grad_norm": 6.864625453948975, + "learning_rate": 2.4400000000000004e-06, + "loss": 1.5376, + "step": 125 + }, + { + "epoch": 0.11, + "grad_norm": 8.48773193359375, + "learning_rate": 2.9400000000000002e-06, + "loss": 1.394, + "step": 150 + }, + { + "epoch": 0.13, + "grad_norm": 7.0890278816223145, + "learning_rate": 3.44e-06, + "loss": 1.3294, + "step": 175 + }, + { + "epoch": 0.15, + "grad_norm": 6.769587993621826, + "learning_rate": 3.94e-06, + "loss": 1.2935, + "step": 200 + }, + { + "epoch": 0.17, + "grad_norm": 9.4171781539917, + "learning_rate": 4.440000000000001e-06, + "loss": 1.261, + "step": 225 + }, + { + "epoch": 0.19, + "grad_norm": 7.843432426452637, + "learning_rate": 4.94e-06, + "loss": 1.2313, + "step": 250 + }, + { + "epoch": 0.21, + "grad_norm": 7.717562198638916, + "learning_rate": 5.4400000000000004e-06, + "loss": 1.2028, + "step": 275 + }, + { + "epoch": 0.22, + "grad_norm": 8.582967758178711, + "learning_rate": 5.94e-06, + "loss": 1.1842, + "step": 300 + }, + { + "epoch": 0.24, + "grad_norm": 8.355676651000977, + "learning_rate": 6.440000000000001e-06, + "loss": 1.1579, + "step": 325 + }, + { + "epoch": 0.26, + "grad_norm": 8.123638153076172, + "learning_rate": 6.9400000000000005e-06, + "loss": 1.1327, + "step": 350 + }, + { + "epoch": 0.28, + "grad_norm": 8.312814712524414, + "learning_rate": 7.440000000000001e-06, + "loss": 1.1038, + "step": 375 + }, + { + "epoch": 0.3, + "grad_norm": 10.63394832611084, + "learning_rate": 7.94e-06, + "loss": 1.0431, + "step": 400 + }, + { + "epoch": 0.32, + "grad_norm": 13.0150785446167, + "learning_rate": 8.44e-06, + "loss": 0.9093, + "step": 425 + }, + { + "epoch": 0.34, + "grad_norm": 12.24046516418457, + "learning_rate": 8.94e-06, + "loss": 0.7252, + "step": 450 + }, + { + "epoch": 0.35, + "grad_norm": 9.706815719604492, + "learning_rate": 9.440000000000001e-06, + "loss": 0.5819, + "step": 475 + }, + { + "epoch": 0.37, + "grad_norm": 6.65410041809082, + "learning_rate": 9.940000000000001e-06, + "loss": 0.4929, + "step": 500 + }, + { + "epoch": 0.39, + "grad_norm": 6.4019012451171875, + "learning_rate": 9.997788944723618e-06, + "loss": 0.4392, + "step": 525 + }, + { + "epoch": 0.41, + "grad_norm": 6.811336994171143, + "learning_rate": 9.99527638190955e-06, + "loss": 0.4035, + "step": 550 + }, + { + "epoch": 0.43, + "grad_norm": 6.439798355102539, + "learning_rate": 9.992763819095477e-06, + "loss": 0.3763, + "step": 575 + }, + { + "epoch": 0.45, + "grad_norm": 4.595059394836426, + "learning_rate": 9.990251256281408e-06, + "loss": 0.361, + "step": 600 + }, + { + "epoch": 0.47, + "grad_norm": 4.745821475982666, + "learning_rate": 9.987738693467337e-06, + "loss": 0.3459, + "step": 625 + }, + { + "epoch": 0.48, + "grad_norm": 5.3555779457092285, + "learning_rate": 9.985226130653267e-06, + "loss": 0.3253, + "step": 650 + }, + { + "epoch": 0.5, + "grad_norm": 4.771772861480713, + "learning_rate": 9.982713567839198e-06, + "loss": 0.3231, + "step": 675 + }, + { + "epoch": 0.52, + "grad_norm": 6.4770612716674805, + "learning_rate": 9.980201005025127e-06, + "loss": 0.3113, + "step": 700 + }, + { + "epoch": 0.54, + "grad_norm": 4.604433059692383, + "learning_rate": 9.977688442211056e-06, + "loss": 0.304, + "step": 725 + }, + { + "epoch": 0.56, + "grad_norm": 4.439276218414307, + "learning_rate": 9.975175879396986e-06, + "loss": 0.2958, + "step": 750 + }, + { + "epoch": 0.58, + "grad_norm": 6.557934761047363, + "learning_rate": 9.972663316582915e-06, + "loss": 0.2869, + "step": 775 + }, + { + "epoch": 0.6, + "grad_norm": 4.368564128875732, + "learning_rate": 9.970150753768844e-06, + "loss": 0.2831, + "step": 800 + }, + { + "epoch": 0.62, + "grad_norm": 5.274904251098633, + "learning_rate": 9.967638190954775e-06, + "loss": 0.2777, + "step": 825 + }, + { + "epoch": 0.63, + "grad_norm": 3.7388675212860107, + "learning_rate": 9.965125628140703e-06, + "loss": 0.2734, + "step": 850 + }, + { + "epoch": 0.65, + "grad_norm": 4.695866107940674, + "learning_rate": 9.962613065326634e-06, + "loss": 0.2666, + "step": 875 + }, + { + "epoch": 0.67, + "grad_norm": 7.215860366821289, + "learning_rate": 9.960100502512563e-06, + "loss": 0.2629, + "step": 900 + }, + { + "epoch": 0.69, + "grad_norm": 5.912736415863037, + "learning_rate": 9.957587939698493e-06, + "loss": 0.2574, + "step": 925 + }, + { + "epoch": 0.71, + "grad_norm": 4.312127113342285, + "learning_rate": 9.955075376884424e-06, + "loss": 0.2572, + "step": 950 + }, + { + "epoch": 0.73, + "grad_norm": 6.4269490242004395, + "learning_rate": 9.952562814070353e-06, + "loss": 0.2454, + "step": 975 + }, + { + "epoch": 0.75, + "grad_norm": 6.280921936035156, + "learning_rate": 9.950050251256282e-06, + "loss": 0.2516, + "step": 1000 + }, + { + "epoch": 0.75, + "eval_loss": 0.20643219351768494, + "eval_runtime": 1158.3972, + "eval_samples_per_second": 1.295, + "eval_steps_per_second": 1.295, + "eval_wer": 57.3408947700063, + "step": 1000 + }, + { + "epoch": 0.76, + "grad_norm": 4.765142917633057, + "learning_rate": 9.947537688442212e-06, + "loss": 0.2448, + "step": 1025 + }, + { + "epoch": 0.78, + "grad_norm": 4.306541919708252, + "learning_rate": 9.945025125628141e-06, + "loss": 0.24, + "step": 1050 + }, + { + "epoch": 0.8, + "grad_norm": 5.98037576675415, + "learning_rate": 9.94251256281407e-06, + "loss": 0.2388, + "step": 1075 + }, + { + "epoch": 0.82, + "grad_norm": 4.161713123321533, + "learning_rate": 9.940000000000001e-06, + "loss": 0.2383, + "step": 1100 + }, + { + "epoch": 0.84, + "grad_norm": 5.226271629333496, + "learning_rate": 9.93748743718593e-06, + "loss": 0.2265, + "step": 1125 + }, + { + "epoch": 0.86, + "grad_norm": 4.507941722869873, + "learning_rate": 9.93497487437186e-06, + "loss": 0.2367, + "step": 1150 + }, + { + "epoch": 0.88, + "grad_norm": 4.829678058624268, + "learning_rate": 9.93246231155779e-06, + "loss": 0.2283, + "step": 1175 + }, + { + "epoch": 0.89, + "grad_norm": 6.902363300323486, + "learning_rate": 9.929949748743719e-06, + "loss": 0.2281, + "step": 1200 + }, + { + "epoch": 0.91, + "grad_norm": 4.969267845153809, + "learning_rate": 9.92743718592965e-06, + "loss": 0.2273, + "step": 1225 + }, + { + "epoch": 0.93, + "grad_norm": 3.942322254180908, + "learning_rate": 9.924924623115579e-06, + "loss": 0.2221, + "step": 1250 + }, + { + "epoch": 0.95, + "grad_norm": 3.712048292160034, + "learning_rate": 9.922412060301508e-06, + "loss": 0.2214, + "step": 1275 + }, + { + "epoch": 0.97, + "grad_norm": 4.119071960449219, + "learning_rate": 9.91989949748744e-06, + "loss": 0.215, + "step": 1300 + }, + { + "epoch": 0.99, + "grad_norm": 5.248748779296875, + "learning_rate": 9.917386934673367e-06, + "loss": 0.2148, + "step": 1325 + }, + { + "epoch": 1.01, + "grad_norm": 4.173226356506348, + "learning_rate": 9.914874371859298e-06, + "loss": 0.2128, + "step": 1350 + }, + { + "epoch": 1.03, + "grad_norm": 5.430807590484619, + "learning_rate": 9.912361809045227e-06, + "loss": 0.2075, + "step": 1375 + }, + { + "epoch": 1.04, + "grad_norm": 3.387723445892334, + "learning_rate": 9.909849246231157e-06, + "loss": 0.2045, + "step": 1400 + }, + { + "epoch": 1.06, + "grad_norm": 3.2586870193481445, + "learning_rate": 9.907336683417086e-06, + "loss": 0.204, + "step": 1425 + }, + { + "epoch": 1.08, + "grad_norm": 4.557243824005127, + "learning_rate": 9.904824120603015e-06, + "loss": 0.1982, + "step": 1450 + }, + { + "epoch": 1.1, + "grad_norm": 6.699730396270752, + "learning_rate": 9.902311557788945e-06, + "loss": 0.1992, + "step": 1475 + }, + { + "epoch": 1.12, + "grad_norm": 5.1028337478637695, + "learning_rate": 9.899798994974876e-06, + "loss": 0.2005, + "step": 1500 + }, + { + "epoch": 1.14, + "grad_norm": 4.508855819702148, + "learning_rate": 9.897286432160805e-06, + "loss": 0.1916, + "step": 1525 + }, + { + "epoch": 1.16, + "grad_norm": 4.233107566833496, + "learning_rate": 9.894773869346734e-06, + "loss": 0.1956, + "step": 1550 + }, + { + "epoch": 1.17, + "grad_norm": 4.481318950653076, + "learning_rate": 9.892261306532665e-06, + "loss": 0.1901, + "step": 1575 + }, + { + "epoch": 1.19, + "grad_norm": 3.6122169494628906, + "learning_rate": 9.889748743718593e-06, + "loss": 0.1898, + "step": 1600 + }, + { + "epoch": 1.21, + "grad_norm": 4.238250255584717, + "learning_rate": 9.887236180904524e-06, + "loss": 0.189, + "step": 1625 + }, + { + "epoch": 1.23, + "grad_norm": 5.4916558265686035, + "learning_rate": 9.884723618090453e-06, + "loss": 0.1877, + "step": 1650 + }, + { + "epoch": 1.25, + "grad_norm": 4.582568645477295, + "learning_rate": 9.882211055276383e-06, + "loss": 0.1913, + "step": 1675 + }, + { + "epoch": 1.27, + "grad_norm": 4.710480690002441, + "learning_rate": 9.879698492462312e-06, + "loss": 0.1855, + "step": 1700 + }, + { + "epoch": 1.29, + "grad_norm": 3.208433151245117, + "learning_rate": 9.877185929648241e-06, + "loss": 0.1846, + "step": 1725 + }, + { + "epoch": 1.3, + "grad_norm": 6.394577503204346, + "learning_rate": 9.874673366834172e-06, + "loss": 0.1857, + "step": 1750 + }, + { + "epoch": 1.32, + "grad_norm": 4.682005882263184, + "learning_rate": 9.872160804020102e-06, + "loss": 0.1805, + "step": 1775 + }, + { + "epoch": 1.34, + "grad_norm": 6.934329509735107, + "learning_rate": 9.869648241206031e-06, + "loss": 0.1832, + "step": 1800 + }, + { + "epoch": 1.36, + "grad_norm": 4.456650733947754, + "learning_rate": 9.86713567839196e-06, + "loss": 0.1792, + "step": 1825 + }, + { + "epoch": 1.38, + "grad_norm": 4.386739730834961, + "learning_rate": 9.864623115577891e-06, + "loss": 0.1829, + "step": 1850 + }, + { + "epoch": 1.4, + "grad_norm": 3.6534488201141357, + "learning_rate": 9.862110552763819e-06, + "loss": 0.1765, + "step": 1875 + }, + { + "epoch": 1.42, + "grad_norm": 6.135868549346924, + "learning_rate": 9.85959798994975e-06, + "loss": 0.1805, + "step": 1900 + }, + { + "epoch": 1.44, + "grad_norm": 4.06704044342041, + "learning_rate": 9.85708542713568e-06, + "loss": 0.1837, + "step": 1925 + }, + { + "epoch": 1.45, + "grad_norm": 5.943455219268799, + "learning_rate": 9.854572864321609e-06, + "loss": 0.1802, + "step": 1950 + }, + { + "epoch": 1.47, + "grad_norm": 3.6257193088531494, + "learning_rate": 9.85206030150754e-06, + "loss": 0.1743, + "step": 1975 + }, + { + "epoch": 1.49, + "grad_norm": 3.4661548137664795, + "learning_rate": 9.849547738693467e-06, + "loss": 0.1741, + "step": 2000 + }, + { + "epoch": 1.49, + "eval_loss": 0.148699089884758, + "eval_runtime": 982.0203, + "eval_samples_per_second": 1.527, + "eval_steps_per_second": 1.527, + "eval_wer": 47.602044388433804, + "step": 2000 + }, + { + "epoch": 1.51, + "grad_norm": 3.295217990875244, + "learning_rate": 9.847035175879398e-06, + "loss": 0.1711, + "step": 2025 + }, + { + "epoch": 1.53, + "grad_norm": 4.375446796417236, + "learning_rate": 9.844522613065328e-06, + "loss": 0.1761, + "step": 2050 + }, + { + "epoch": 1.55, + "grad_norm": 5.860068321228027, + "learning_rate": 9.842010050251257e-06, + "loss": 0.1729, + "step": 2075 + }, + { + "epoch": 1.57, + "grad_norm": 5.135326862335205, + "learning_rate": 9.839497487437186e-06, + "loss": 0.1779, + "step": 2100 + }, + { + "epoch": 1.58, + "grad_norm": 5.165567874908447, + "learning_rate": 9.836984924623117e-06, + "loss": 0.1692, + "step": 2125 + }, + { + "epoch": 1.6, + "grad_norm": 3.2267580032348633, + "learning_rate": 9.834572864321609e-06, + "loss": 0.1732, + "step": 2150 + }, + { + "epoch": 1.62, + "grad_norm": 3.2158052921295166, + "learning_rate": 9.832060301507538e-06, + "loss": 0.1692, + "step": 2175 + }, + { + "epoch": 1.64, + "grad_norm": 4.236759662628174, + "learning_rate": 9.829547738693467e-06, + "loss": 0.1647, + "step": 2200 + }, + { + "epoch": 1.66, + "grad_norm": 4.310646057128906, + "learning_rate": 9.827035175879398e-06, + "loss": 0.1707, + "step": 2225 + }, + { + "epoch": 1.68, + "grad_norm": 4.7067108154296875, + "learning_rate": 9.824522613065328e-06, + "loss": 0.1695, + "step": 2250 + }, + { + "epoch": 1.7, + "grad_norm": 3.3700754642486572, + "learning_rate": 9.822010050251257e-06, + "loss": 0.1672, + "step": 2275 + }, + { + "epoch": 1.72, + "grad_norm": 4.211385726928711, + "learning_rate": 9.819497487437186e-06, + "loss": 0.1665, + "step": 2300 + }, + { + "epoch": 1.73, + "grad_norm": 4.043334007263184, + "learning_rate": 9.816984924623116e-06, + "loss": 0.1666, + "step": 2325 + }, + { + "epoch": 1.75, + "grad_norm": 3.1525399684906006, + "learning_rate": 9.814472361809047e-06, + "loss": 0.1653, + "step": 2350 + }, + { + "epoch": 1.77, + "grad_norm": 3.392470359802246, + "learning_rate": 9.811959798994976e-06, + "loss": 0.1659, + "step": 2375 + }, + { + "epoch": 1.79, + "grad_norm": 3.508985996246338, + "learning_rate": 9.809447236180905e-06, + "loss": 0.1645, + "step": 2400 + }, + { + "epoch": 1.81, + "grad_norm": 4.224145889282227, + "learning_rate": 9.806934673366835e-06, + "loss": 0.1619, + "step": 2425 + }, + { + "epoch": 1.83, + "grad_norm": 4.105640888214111, + "learning_rate": 9.804422110552764e-06, + "loss": 0.166, + "step": 2450 + }, + { + "epoch": 1.85, + "grad_norm": 6.54763650894165, + "learning_rate": 9.801909547738693e-06, + "loss": 0.1597, + "step": 2475 + }, + { + "epoch": 1.86, + "grad_norm": 3.7038025856018066, + "learning_rate": 9.799396984924624e-06, + "loss": 0.1576, + "step": 2500 + }, + { + "epoch": 1.88, + "grad_norm": 6.584764003753662, + "learning_rate": 9.796884422110554e-06, + "loss": 0.1596, + "step": 2525 + }, + { + "epoch": 1.9, + "grad_norm": 3.1333792209625244, + "learning_rate": 9.794371859296483e-06, + "loss": 0.1607, + "step": 2550 + }, + { + "epoch": 1.92, + "grad_norm": 4.786000728607178, + "learning_rate": 9.791859296482414e-06, + "loss": 0.158, + "step": 2575 + }, + { + "epoch": 1.94, + "grad_norm": 4.721449851989746, + "learning_rate": 9.789346733668342e-06, + "loss": 0.1629, + "step": 2600 + }, + { + "epoch": 1.96, + "grad_norm": 3.9033567905426025, + "learning_rate": 9.786834170854273e-06, + "loss": 0.1557, + "step": 2625 + }, + { + "epoch": 1.98, + "grad_norm": 3.953597068786621, + "learning_rate": 9.784321608040202e-06, + "loss": 0.1557, + "step": 2650 + }, + { + "epoch": 1.99, + "grad_norm": 3.8848977088928223, + "learning_rate": 9.781809045226131e-06, + "loss": 0.1519, + "step": 2675 + }, + { + "epoch": 2.01, + "grad_norm": 3.4725067615509033, + "learning_rate": 9.77929648241206e-06, + "loss": 0.1494, + "step": 2700 + }, + { + "epoch": 2.03, + "grad_norm": 4.614296913146973, + "learning_rate": 9.77678391959799e-06, + "loss": 0.1435, + "step": 2725 + }, + { + "epoch": 2.05, + "grad_norm": 3.526057243347168, + "learning_rate": 9.774271356783921e-06, + "loss": 0.1452, + "step": 2750 + }, + { + "epoch": 2.07, + "grad_norm": 3.55466365814209, + "learning_rate": 9.77175879396985e-06, + "loss": 0.1478, + "step": 2775 + }, + { + "epoch": 2.09, + "grad_norm": 3.407679319381714, + "learning_rate": 9.76924623115578e-06, + "loss": 0.1426, + "step": 2800 + }, + { + "epoch": 2.11, + "grad_norm": 3.1033308506011963, + "learning_rate": 9.766733668341709e-06, + "loss": 0.1522, + "step": 2825 + }, + { + "epoch": 2.13, + "grad_norm": 4.427501201629639, + "learning_rate": 9.76422110552764e-06, + "loss": 0.141, + "step": 2850 + }, + { + "epoch": 2.14, + "grad_norm": 4.598984718322754, + "learning_rate": 9.761708542713568e-06, + "loss": 0.1424, + "step": 2875 + }, + { + "epoch": 2.16, + "grad_norm": 2.9090452194213867, + "learning_rate": 9.759195979899499e-06, + "loss": 0.1393, + "step": 2900 + }, + { + "epoch": 2.18, + "grad_norm": 3.3033642768859863, + "learning_rate": 9.756683417085428e-06, + "loss": 0.1437, + "step": 2925 + }, + { + "epoch": 2.2, + "grad_norm": 4.2938337326049805, + "learning_rate": 9.754170854271357e-06, + "loss": 0.1431, + "step": 2950 + }, + { + "epoch": 2.22, + "grad_norm": 3.623213052749634, + "learning_rate": 9.751658291457288e-06, + "loss": 0.1431, + "step": 2975 + }, + { + "epoch": 2.24, + "grad_norm": 4.266867637634277, + "learning_rate": 9.749145728643216e-06, + "loss": 0.1421, + "step": 3000 + }, + { + "epoch": 2.24, + "eval_loss": 0.12692005932331085, + "eval_runtime": 977.4002, + "eval_samples_per_second": 1.535, + "eval_steps_per_second": 1.535, + "eval_wer": 43.562276832598194, + "step": 3000 + }, + { + "epoch": 2.26, + "grad_norm": 4.4215087890625, + "learning_rate": 9.746633165829147e-06, + "loss": 0.1429, + "step": 3025 + }, + { + "epoch": 2.27, + "grad_norm": 3.8426625728607178, + "learning_rate": 9.744120603015076e-06, + "loss": 0.1398, + "step": 3050 + }, + { + "epoch": 2.29, + "grad_norm": 4.037528991699219, + "learning_rate": 9.741608040201006e-06, + "loss": 0.1404, + "step": 3075 + }, + { + "epoch": 2.31, + "grad_norm": 4.368360996246338, + "learning_rate": 9.739095477386935e-06, + "loss": 0.1374, + "step": 3100 + }, + { + "epoch": 2.33, + "grad_norm": 3.4746017456054688, + "learning_rate": 9.736582914572866e-06, + "loss": 0.143, + "step": 3125 + }, + { + "epoch": 2.35, + "grad_norm": 2.9636313915252686, + "learning_rate": 9.734070351758794e-06, + "loss": 0.137, + "step": 3150 + }, + { + "epoch": 2.37, + "grad_norm": 3.382560968399048, + "learning_rate": 9.731557788944725e-06, + "loss": 0.1383, + "step": 3175 + }, + { + "epoch": 2.39, + "grad_norm": 3.2588999271392822, + "learning_rate": 9.729045226130654e-06, + "loss": 0.1365, + "step": 3200 + }, + { + "epoch": 2.4, + "grad_norm": 3.0374488830566406, + "learning_rate": 9.726532663316583e-06, + "loss": 0.1384, + "step": 3225 + }, + { + "epoch": 2.42, + "grad_norm": 6.621469020843506, + "learning_rate": 9.724020100502514e-06, + "loss": 0.1369, + "step": 3250 + }, + { + "epoch": 2.44, + "grad_norm": 3.6496496200561523, + "learning_rate": 9.721507537688444e-06, + "loss": 0.1382, + "step": 3275 + }, + { + "epoch": 2.46, + "grad_norm": 3.0143158435821533, + "learning_rate": 9.718994974874373e-06, + "loss": 0.1402, + "step": 3300 + }, + { + "epoch": 2.48, + "grad_norm": 3.539630174636841, + "learning_rate": 9.716482412060302e-06, + "loss": 0.1353, + "step": 3325 + }, + { + "epoch": 2.5, + "grad_norm": 3.2626328468322754, + "learning_rate": 9.713969849246232e-06, + "loss": 0.1348, + "step": 3350 + }, + { + "epoch": 2.52, + "grad_norm": 3.385319948196411, + "learning_rate": 9.711457286432163e-06, + "loss": 0.1375, + "step": 3375 + }, + { + "epoch": 2.54, + "grad_norm": 2.935211181640625, + "learning_rate": 9.708944723618092e-06, + "loss": 0.136, + "step": 3400 + }, + { + "epoch": 2.55, + "grad_norm": 4.809500217437744, + "learning_rate": 9.706432160804021e-06, + "loss": 0.1353, + "step": 3425 + }, + { + "epoch": 2.57, + "grad_norm": 3.515108346939087, + "learning_rate": 9.70391959798995e-06, + "loss": 0.1344, + "step": 3450 + }, + { + "epoch": 2.59, + "grad_norm": 3.5611541271209717, + "learning_rate": 9.70140703517588e-06, + "loss": 0.1378, + "step": 3475 + }, + { + "epoch": 2.61, + "grad_norm": 3.026041269302368, + "learning_rate": 9.698894472361809e-06, + "loss": 0.1347, + "step": 3500 + }, + { + "epoch": 2.63, + "grad_norm": 3.2884161472320557, + "learning_rate": 9.69638190954774e-06, + "loss": 0.1334, + "step": 3525 + }, + { + "epoch": 2.65, + "grad_norm": 4.46406888961792, + "learning_rate": 9.69386934673367e-06, + "loss": 0.1366, + "step": 3550 + }, + { + "epoch": 2.67, + "grad_norm": 4.556339263916016, + "learning_rate": 9.691356783919599e-06, + "loss": 0.1294, + "step": 3575 + }, + { + "epoch": 2.68, + "grad_norm": 3.9441299438476562, + "learning_rate": 9.688844221105528e-06, + "loss": 0.135, + "step": 3600 + }, + { + "epoch": 2.7, + "grad_norm": 3.093155860900879, + "learning_rate": 9.686331658291457e-06, + "loss": 0.13, + "step": 3625 + }, + { + "epoch": 2.72, + "grad_norm": 4.64517879486084, + "learning_rate": 9.683819095477388e-06, + "loss": 0.1326, + "step": 3650 + }, + { + "epoch": 2.74, + "grad_norm": 5.574061393737793, + "learning_rate": 9.681306532663318e-06, + "loss": 0.131, + "step": 3675 + }, + { + "epoch": 2.76, + "grad_norm": 3.4427688121795654, + "learning_rate": 9.678793969849247e-06, + "loss": 0.1309, + "step": 3700 + }, + { + "epoch": 2.78, + "grad_norm": 2.958904504776001, + "learning_rate": 9.676281407035176e-06, + "loss": 0.1286, + "step": 3725 + }, + { + "epoch": 2.8, + "grad_norm": 3.8922340869903564, + "learning_rate": 9.673768844221106e-06, + "loss": 0.1326, + "step": 3750 + }, + { + "epoch": 2.82, + "grad_norm": 3.769803524017334, + "learning_rate": 9.671256281407035e-06, + "loss": 0.1323, + "step": 3775 + }, + { + "epoch": 2.83, + "grad_norm": 2.791001558303833, + "learning_rate": 9.668743718592966e-06, + "loss": 0.1294, + "step": 3800 + }, + { + "epoch": 2.85, + "grad_norm": 3.000278949737549, + "learning_rate": 9.666231155778895e-06, + "loss": 0.1251, + "step": 3825 + }, + { + "epoch": 2.87, + "grad_norm": 3.5593388080596924, + "learning_rate": 9.663718592964825e-06, + "loss": 0.1286, + "step": 3850 + }, + { + "epoch": 2.89, + "grad_norm": 4.313052654266357, + "learning_rate": 9.661206030150754e-06, + "loss": 0.1263, + "step": 3875 + }, + { + "epoch": 2.91, + "grad_norm": 2.9328017234802246, + "learning_rate": 9.658693467336683e-06, + "loss": 0.1274, + "step": 3900 + }, + { + "epoch": 2.93, + "grad_norm": 3.3465633392333984, + "learning_rate": 9.656180904522614e-06, + "loss": 0.1274, + "step": 3925 + }, + { + "epoch": 2.95, + "grad_norm": 4.0553131103515625, + "learning_rate": 9.653668341708544e-06, + "loss": 0.1276, + "step": 3950 + }, + { + "epoch": 2.96, + "grad_norm": 2.859919309616089, + "learning_rate": 9.651155778894473e-06, + "loss": 0.1258, + "step": 3975 + }, + { + "epoch": 2.98, + "grad_norm": 3.8674328327178955, + "learning_rate": 9.648643216080404e-06, + "loss": 0.125, + "step": 4000 + }, + { + "epoch": 2.98, + "eval_loss": 0.1145973727107048, + "eval_runtime": 966.7203, + "eval_samples_per_second": 1.552, + "eval_steps_per_second": 1.552, + "eval_wer": 40.71273541972975, + "step": 4000 + }, + { + "epoch": 3.0, + "grad_norm": 3.36872935295105, + "learning_rate": 9.646130653266332e-06, + "loss": 0.1284, + "step": 4025 + }, + { + "epoch": 3.02, + "grad_norm": 2.6161723136901855, + "learning_rate": 9.643618090452263e-06, + "loss": 0.1163, + "step": 4050 + }, + { + "epoch": 3.04, + "grad_norm": 4.413143157958984, + "learning_rate": 9.641105527638192e-06, + "loss": 0.118, + "step": 4075 + }, + { + "epoch": 3.06, + "grad_norm": 2.698725700378418, + "learning_rate": 9.638592964824121e-06, + "loss": 0.1162, + "step": 4100 + }, + { + "epoch": 3.08, + "grad_norm": 5.349793910980225, + "learning_rate": 9.63608040201005e-06, + "loss": 0.1176, + "step": 4125 + }, + { + "epoch": 3.09, + "grad_norm": 3.1425278186798096, + "learning_rate": 9.63356783919598e-06, + "loss": 0.1159, + "step": 4150 + }, + { + "epoch": 3.11, + "grad_norm": 2.651480197906494, + "learning_rate": 9.63105527638191e-06, + "loss": 0.1167, + "step": 4175 + }, + { + "epoch": 3.13, + "grad_norm": 2.931429862976074, + "learning_rate": 9.62854271356784e-06, + "loss": 0.1156, + "step": 4200 + }, + { + "epoch": 3.15, + "grad_norm": 2.817364454269409, + "learning_rate": 9.62603015075377e-06, + "loss": 0.115, + "step": 4225 + }, + { + "epoch": 3.17, + "grad_norm": 3.2311747074127197, + "learning_rate": 9.623517587939699e-06, + "loss": 0.1128, + "step": 4250 + }, + { + "epoch": 3.19, + "grad_norm": 3.497722864151001, + "learning_rate": 9.62100502512563e-06, + "loss": 0.1177, + "step": 4275 + }, + { + "epoch": 3.21, + "grad_norm": 3.0804145336151123, + "learning_rate": 9.618492462311558e-06, + "loss": 0.1126, + "step": 4300 + }, + { + "epoch": 3.23, + "grad_norm": 3.734846830368042, + "learning_rate": 9.615979899497489e-06, + "loss": 0.1181, + "step": 4325 + }, + { + "epoch": 3.24, + "grad_norm": 3.648483991622925, + "learning_rate": 9.613467336683418e-06, + "loss": 0.1149, + "step": 4350 + }, + { + "epoch": 3.26, + "grad_norm": 3.1508536338806152, + "learning_rate": 9.610954773869347e-06, + "loss": 0.1187, + "step": 4375 + }, + { + "epoch": 3.28, + "grad_norm": 3.452153444290161, + "learning_rate": 9.608442211055277e-06, + "loss": 0.1143, + "step": 4400 + }, + { + "epoch": 3.3, + "grad_norm": 4.818159103393555, + "learning_rate": 9.605929648241206e-06, + "loss": 0.116, + "step": 4425 + }, + { + "epoch": 3.32, + "grad_norm": 3.1910297870635986, + "learning_rate": 9.603417085427137e-06, + "loss": 0.1157, + "step": 4450 + }, + { + "epoch": 3.34, + "grad_norm": 3.4651875495910645, + "learning_rate": 9.600904522613066e-06, + "loss": 0.1139, + "step": 4475 + }, + { + "epoch": 3.36, + "grad_norm": 2.86487078666687, + "learning_rate": 9.598391959798996e-06, + "loss": 0.1143, + "step": 4500 + }, + { + "epoch": 3.37, + "grad_norm": 3.3568084239959717, + "learning_rate": 9.595879396984925e-06, + "loss": 0.1097, + "step": 4525 + }, + { + "epoch": 3.39, + "grad_norm": 2.9622488021850586, + "learning_rate": 9.593366834170856e-06, + "loss": 0.1146, + "step": 4550 + }, + { + "epoch": 3.41, + "grad_norm": 3.594905138015747, + "learning_rate": 9.590854271356784e-06, + "loss": 0.1146, + "step": 4575 + }, + { + "epoch": 3.43, + "grad_norm": 3.208555221557617, + "learning_rate": 9.588341708542715e-06, + "loss": 0.1146, + "step": 4600 + }, + { + "epoch": 3.45, + "grad_norm": 3.5209977626800537, + "learning_rate": 9.585829145728644e-06, + "loss": 0.1149, + "step": 4625 + }, + { + "epoch": 3.47, + "grad_norm": 3.144443988800049, + "learning_rate": 9.583316582914573e-06, + "loss": 0.1136, + "step": 4650 + }, + { + "epoch": 3.49, + "grad_norm": 3.1589362621307373, + "learning_rate": 9.580804020100504e-06, + "loss": 0.1174, + "step": 4675 + }, + { + "epoch": 3.5, + "grad_norm": 3.0426950454711914, + "learning_rate": 9.578291457286432e-06, + "loss": 0.1142, + "step": 4700 + }, + { + "epoch": 3.52, + "grad_norm": 3.5835533142089844, + "learning_rate": 9.575778894472363e-06, + "loss": 0.1139, + "step": 4725 + }, + { + "epoch": 3.54, + "grad_norm": 3.933162212371826, + "learning_rate": 9.573266331658292e-06, + "loss": 0.1139, + "step": 4750 + }, + { + "epoch": 3.56, + "grad_norm": 3.745992660522461, + "learning_rate": 9.570753768844222e-06, + "loss": 0.113, + "step": 4775 + }, + { + "epoch": 3.58, + "grad_norm": 6.814022541046143, + "learning_rate": 9.568341708542715e-06, + "loss": 0.1136, + "step": 4800 + }, + { + "epoch": 3.6, + "grad_norm": 3.4825470447540283, + "learning_rate": 9.565829145728644e-06, + "loss": 0.1133, + "step": 4825 + }, + { + "epoch": 3.62, + "grad_norm": 3.908992052078247, + "learning_rate": 9.563316582914573e-06, + "loss": 0.1118, + "step": 4850 + }, + { + "epoch": 3.64, + "grad_norm": 2.949420928955078, + "learning_rate": 9.560804020100504e-06, + "loss": 0.1126, + "step": 4875 + }, + { + "epoch": 3.65, + "grad_norm": 4.322375297546387, + "learning_rate": 9.558291457286432e-06, + "loss": 0.1158, + "step": 4900 + }, + { + "epoch": 3.67, + "grad_norm": 3.289518356323242, + "learning_rate": 9.555778894472363e-06, + "loss": 0.1098, + "step": 4925 + }, + { + "epoch": 3.69, + "grad_norm": 3.4194021224975586, + "learning_rate": 9.553266331658292e-06, + "loss": 0.1127, + "step": 4950 + }, + { + "epoch": 3.71, + "grad_norm": 3.2885825634002686, + "learning_rate": 9.550753768844222e-06, + "loss": 0.1075, + "step": 4975 + }, + { + "epoch": 3.73, + "grad_norm": 3.5224156379699707, + "learning_rate": 9.548241206030151e-06, + "loss": 0.1098, + "step": 5000 + }, + { + "epoch": 3.73, + "eval_loss": 0.10625702142715454, + "eval_runtime": 990.234, + "eval_samples_per_second": 1.515, + "eval_steps_per_second": 1.515, + "eval_wer": 36.45592662605895, + "step": 5000 + }, + { + "epoch": 3.75, + "grad_norm": 2.747551202774048, + "learning_rate": 9.54572864321608e-06, + "loss": 0.11, + "step": 5025 + }, + { + "epoch": 3.77, + "grad_norm": 3.113426446914673, + "learning_rate": 9.543216080402011e-06, + "loss": 0.1109, + "step": 5050 + }, + { + "epoch": 3.78, + "grad_norm": 3.2000625133514404, + "learning_rate": 9.54070351758794e-06, + "loss": 0.11, + "step": 5075 + }, + { + "epoch": 3.8, + "grad_norm": 4.045854091644287, + "learning_rate": 9.53819095477387e-06, + "loss": 0.1102, + "step": 5100 + }, + { + "epoch": 3.82, + "grad_norm": 4.9057183265686035, + "learning_rate": 9.5356783919598e-06, + "loss": 0.1088, + "step": 5125 + }, + { + "epoch": 3.84, + "grad_norm": 4.639792442321777, + "learning_rate": 9.53316582914573e-06, + "loss": 0.1073, + "step": 5150 + }, + { + "epoch": 3.86, + "grad_norm": 3.7225656509399414, + "learning_rate": 9.530653266331658e-06, + "loss": 0.1108, + "step": 5175 + }, + { + "epoch": 3.88, + "grad_norm": 3.4669575691223145, + "learning_rate": 9.528140703517589e-06, + "loss": 0.1109, + "step": 5200 + }, + { + "epoch": 3.9, + "grad_norm": 2.9355220794677734, + "learning_rate": 9.525628140703518e-06, + "loss": 0.1089, + "step": 5225 + }, + { + "epoch": 3.91, + "grad_norm": 4.149091720581055, + "learning_rate": 9.523115577889448e-06, + "loss": 0.1117, + "step": 5250 + }, + { + "epoch": 3.93, + "grad_norm": 2.9327330589294434, + "learning_rate": 9.520603015075379e-06, + "loss": 0.1079, + "step": 5275 + }, + { + "epoch": 3.95, + "grad_norm": 3.010422468185425, + "learning_rate": 9.518090452261306e-06, + "loss": 0.1065, + "step": 5300 + }, + { + "epoch": 3.97, + "grad_norm": 2.7165846824645996, + "learning_rate": 9.515577889447237e-06, + "loss": 0.1067, + "step": 5325 + }, + { + "epoch": 3.99, + "grad_norm": 3.8210859298706055, + "learning_rate": 9.513065326633167e-06, + "loss": 0.1121, + "step": 5350 + }, + { + "epoch": 4.01, + "grad_norm": 4.379154682159424, + "learning_rate": 9.510552763819096e-06, + "loss": 0.104, + "step": 5375 + }, + { + "epoch": 4.03, + "grad_norm": 3.7906744480133057, + "learning_rate": 9.508040201005025e-06, + "loss": 0.0956, + "step": 5400 + }, + { + "epoch": 4.05, + "grad_norm": 3.575955390930176, + "learning_rate": 9.505527638190956e-06, + "loss": 0.0999, + "step": 5425 + }, + { + "epoch": 4.06, + "grad_norm": 4.781909942626953, + "learning_rate": 9.503015075376886e-06, + "loss": 0.0975, + "step": 5450 + }, + { + "epoch": 4.08, + "grad_norm": 2.889066457748413, + "learning_rate": 9.500502512562815e-06, + "loss": 0.097, + "step": 5475 + }, + { + "epoch": 4.1, + "grad_norm": 3.012756109237671, + "learning_rate": 9.497989949748744e-06, + "loss": 0.0985, + "step": 5500 + }, + { + "epoch": 4.12, + "grad_norm": 3.0829362869262695, + "learning_rate": 9.495477386934674e-06, + "loss": 0.0992, + "step": 5525 + }, + { + "epoch": 4.14, + "grad_norm": 2.8998055458068848, + "learning_rate": 9.492964824120605e-06, + "loss": 0.097, + "step": 5550 + }, + { + "epoch": 4.16, + "grad_norm": 3.6682493686676025, + "learning_rate": 9.490452261306532e-06, + "loss": 0.0934, + "step": 5575 + }, + { + "epoch": 4.18, + "grad_norm": 3.4019980430603027, + "learning_rate": 9.487939698492463e-06, + "loss": 0.0987, + "step": 5600 + }, + { + "epoch": 4.19, + "grad_norm": 2.9836018085479736, + "learning_rate": 9.485427135678393e-06, + "loss": 0.102, + "step": 5625 + }, + { + "epoch": 4.21, + "grad_norm": 3.6548168659210205, + "learning_rate": 9.482914572864322e-06, + "loss": 0.0944, + "step": 5650 + }, + { + "epoch": 4.23, + "grad_norm": 2.87402081489563, + "learning_rate": 9.480402010050253e-06, + "loss": 0.0981, + "step": 5675 + }, + { + "epoch": 4.25, + "grad_norm": 2.9002676010131836, + "learning_rate": 9.477889447236182e-06, + "loss": 0.1013, + "step": 5700 + }, + { + "epoch": 4.27, + "grad_norm": 3.0780720710754395, + "learning_rate": 9.475376884422112e-06, + "loss": 0.0982, + "step": 5725 + }, + { + "epoch": 4.29, + "grad_norm": 3.4772732257843018, + "learning_rate": 9.472864321608041e-06, + "loss": 0.0997, + "step": 5750 + }, + { + "epoch": 4.31, + "grad_norm": 3.805375814437866, + "learning_rate": 9.47035175879397e-06, + "loss": 0.0966, + "step": 5775 + }, + { + "epoch": 4.33, + "grad_norm": 2.432993173599243, + "learning_rate": 9.4678391959799e-06, + "loss": 0.0978, + "step": 5800 + }, + { + "epoch": 4.34, + "grad_norm": 3.1627695560455322, + "learning_rate": 9.46532663316583e-06, + "loss": 0.099, + "step": 5825 + }, + { + "epoch": 4.36, + "grad_norm": 3.060960292816162, + "learning_rate": 9.462814070351758e-06, + "loss": 0.0975, + "step": 5850 + }, + { + "epoch": 4.38, + "grad_norm": 3.1228978633880615, + "learning_rate": 9.46030150753769e-06, + "loss": 0.0996, + "step": 5875 + }, + { + "epoch": 4.4, + "grad_norm": 3.501793384552002, + "learning_rate": 9.457788944723619e-06, + "loss": 0.0976, + "step": 5900 + }, + { + "epoch": 4.42, + "grad_norm": 3.3384509086608887, + "learning_rate": 9.455276381909548e-06, + "loss": 0.0943, + "step": 5925 + }, + { + "epoch": 4.44, + "grad_norm": 3.4676194190979004, + "learning_rate": 9.452763819095479e-06, + "loss": 0.098, + "step": 5950 + }, + { + "epoch": 4.46, + "grad_norm": 2.8938655853271484, + "learning_rate": 9.450251256281408e-06, + "loss": 0.0981, + "step": 5975 + }, + { + "epoch": 4.47, + "grad_norm": 3.701399564743042, + "learning_rate": 9.447738693467338e-06, + "loss": 0.0976, + "step": 6000 + }, + { + "epoch": 4.47, + "eval_loss": 0.10364207625389099, + "eval_runtime": 976.7188, + "eval_samples_per_second": 1.536, + "eval_steps_per_second": 1.536, + "eval_wer": 34.39053420149828, + "step": 6000 + }, + { + "epoch": 4.49, + "grad_norm": 2.885831832885742, + "learning_rate": 9.445226130653267e-06, + "loss": 0.0955, + "step": 6025 + }, + { + "epoch": 4.51, + "grad_norm": 3.1397621631622314, + "learning_rate": 9.442713567839196e-06, + "loss": 0.0985, + "step": 6050 + }, + { + "epoch": 4.53, + "grad_norm": 2.836707592010498, + "learning_rate": 9.440201005025127e-06, + "loss": 0.0964, + "step": 6075 + }, + { + "epoch": 4.55, + "grad_norm": 3.1758687496185303, + "learning_rate": 9.437688442211057e-06, + "loss": 0.0962, + "step": 6100 + }, + { + "epoch": 4.57, + "grad_norm": 3.0306639671325684, + "learning_rate": 9.435175879396986e-06, + "loss": 0.098, + "step": 6125 + }, + { + "epoch": 4.59, + "grad_norm": 4.048830986022949, + "learning_rate": 9.432663316582915e-06, + "loss": 0.0983, + "step": 6150 + }, + { + "epoch": 4.6, + "grad_norm": 4.1528754234313965, + "learning_rate": 9.430150753768845e-06, + "loss": 0.0971, + "step": 6175 + }, + { + "epoch": 4.62, + "grad_norm": 3.259349822998047, + "learning_rate": 9.427638190954774e-06, + "loss": 0.0983, + "step": 6200 + }, + { + "epoch": 4.64, + "grad_norm": 3.266369581222534, + "learning_rate": 9.425125628140705e-06, + "loss": 0.0977, + "step": 6225 + }, + { + "epoch": 4.66, + "grad_norm": 2.7385644912719727, + "learning_rate": 9.422613065326634e-06, + "loss": 0.0972, + "step": 6250 + }, + { + "epoch": 4.68, + "grad_norm": 3.5642640590667725, + "learning_rate": 9.420100502512564e-06, + "loss": 0.096, + "step": 6275 + }, + { + "epoch": 4.7, + "grad_norm": 3.3383567333221436, + "learning_rate": 9.417587939698495e-06, + "loss": 0.0946, + "step": 6300 + }, + { + "epoch": 4.72, + "grad_norm": 3.162820339202881, + "learning_rate": 9.415075376884422e-06, + "loss": 0.0983, + "step": 6325 + }, + { + "epoch": 4.74, + "grad_norm": 3.035754919052124, + "learning_rate": 9.412562814070353e-06, + "loss": 0.0959, + "step": 6350 + }, + { + "epoch": 4.75, + "grad_norm": 2.7970900535583496, + "learning_rate": 9.410050251256282e-06, + "loss": 0.0953, + "step": 6375 + }, + { + "epoch": 4.77, + "grad_norm": 3.539583683013916, + "learning_rate": 9.407537688442212e-06, + "loss": 0.0933, + "step": 6400 + }, + { + "epoch": 4.79, + "grad_norm": 2.706026315689087, + "learning_rate": 9.405025125628141e-06, + "loss": 0.096, + "step": 6425 + }, + { + "epoch": 4.81, + "grad_norm": 3.601503372192383, + "learning_rate": 9.40251256281407e-06, + "loss": 0.0957, + "step": 6450 + }, + { + "epoch": 4.83, + "grad_norm": 3.3797447681427, + "learning_rate": 9.4e-06, + "loss": 0.0933, + "step": 6475 + }, + { + "epoch": 4.85, + "grad_norm": 3.2108614444732666, + "learning_rate": 9.39748743718593e-06, + "loss": 0.0958, + "step": 6500 + }, + { + "epoch": 4.87, + "grad_norm": 2.8835432529449463, + "learning_rate": 9.39497487437186e-06, + "loss": 0.0968, + "step": 6525 + }, + { + "epoch": 4.88, + "grad_norm": 2.7169957160949707, + "learning_rate": 9.39246231155779e-06, + "loss": 0.093, + "step": 6550 + }, + { + "epoch": 4.9, + "grad_norm": 2.7604525089263916, + "learning_rate": 9.38994974874372e-06, + "loss": 0.0938, + "step": 6575 + }, + { + "epoch": 4.92, + "grad_norm": 3.6646928787231445, + "learning_rate": 9.387437185929648e-06, + "loss": 0.0946, + "step": 6600 + }, + { + "epoch": 4.94, + "grad_norm": 3.6720802783966064, + "learning_rate": 9.384924623115579e-06, + "loss": 0.0969, + "step": 6625 + }, + { + "epoch": 4.96, + "grad_norm": 3.600146532058716, + "learning_rate": 9.382412060301508e-06, + "loss": 0.0904, + "step": 6650 + }, + { + "epoch": 4.98, + "grad_norm": 2.9608378410339355, + "learning_rate": 9.379899497487438e-06, + "loss": 0.0945, + "step": 6675 + }, + { + "epoch": 5.0, + "grad_norm": 3.736933469772339, + "learning_rate": 9.377386934673369e-06, + "loss": 0.0936, + "step": 6700 + }, + { + "epoch": 5.01, + "grad_norm": 3.064671516418457, + "learning_rate": 9.374874371859296e-06, + "loss": 0.0856, + "step": 6725 + }, + { + "epoch": 5.03, + "grad_norm": 2.8640642166137695, + "learning_rate": 9.372361809045227e-06, + "loss": 0.0853, + "step": 6750 + }, + { + "epoch": 5.05, + "grad_norm": 2.9311769008636475, + "learning_rate": 9.369849246231157e-06, + "loss": 0.0841, + "step": 6775 + }, + { + "epoch": 5.07, + "grad_norm": 3.0264854431152344, + "learning_rate": 9.367336683417086e-06, + "loss": 0.0846, + "step": 6800 + }, + { + "epoch": 5.09, + "grad_norm": 3.242312431335449, + "learning_rate": 9.364824120603015e-06, + "loss": 0.0858, + "step": 6825 + }, + { + "epoch": 5.11, + "grad_norm": 3.493234872817993, + "learning_rate": 9.362311557788946e-06, + "loss": 0.0841, + "step": 6850 + }, + { + "epoch": 5.13, + "grad_norm": 3.2190945148468018, + "learning_rate": 9.359798994974874e-06, + "loss": 0.084, + "step": 6875 + }, + { + "epoch": 5.15, + "grad_norm": 3.3341429233551025, + "learning_rate": 9.357286432160805e-06, + "loss": 0.0833, + "step": 6900 + }, + { + "epoch": 5.16, + "grad_norm": 3.226764440536499, + "learning_rate": 9.354773869346734e-06, + "loss": 0.0858, + "step": 6925 + }, + { + "epoch": 5.18, + "grad_norm": 2.716411590576172, + "learning_rate": 9.352261306532664e-06, + "loss": 0.0849, + "step": 6950 + }, + { + "epoch": 5.2, + "grad_norm": 2.5581178665161133, + "learning_rate": 9.349748743718595e-06, + "loss": 0.0851, + "step": 6975 + }, + { + "epoch": 5.22, + "grad_norm": 2.713921546936035, + "learning_rate": 9.347236180904522e-06, + "loss": 0.0861, + "step": 7000 + }, + { + "epoch": 5.22, + "eval_loss": 0.09906119108200073, + "eval_runtime": 981.473, + "eval_samples_per_second": 1.528, + "eval_steps_per_second": 1.528, + "eval_wer": 33.70440383672898, + "step": 7000 + }, + { + "epoch": 5.24, + "grad_norm": 2.9551596641540527, + "learning_rate": 9.344723618090453e-06, + "loss": 0.0837, + "step": 7025 + }, + { + "epoch": 5.26, + "grad_norm": 2.6876981258392334, + "learning_rate": 9.342211055276383e-06, + "loss": 0.084, + "step": 7050 + }, + { + "epoch": 5.28, + "grad_norm": 2.8718082904815674, + "learning_rate": 9.339698492462312e-06, + "loss": 0.0843, + "step": 7075 + }, + { + "epoch": 5.29, + "grad_norm": 3.2987959384918213, + "learning_rate": 9.337185929648241e-06, + "loss": 0.0868, + "step": 7100 + }, + { + "epoch": 5.31, + "grad_norm": 3.432389974594116, + "learning_rate": 9.334673366834172e-06, + "loss": 0.0837, + "step": 7125 + }, + { + "epoch": 5.33, + "grad_norm": 2.9370830059051514, + "learning_rate": 9.332160804020102e-06, + "loss": 0.0816, + "step": 7150 + }, + { + "epoch": 5.35, + "grad_norm": 2.775156259536743, + "learning_rate": 9.329648241206031e-06, + "loss": 0.0846, + "step": 7175 + }, + { + "epoch": 5.37, + "grad_norm": 3.0964481830596924, + "learning_rate": 9.32713567839196e-06, + "loss": 0.0807, + "step": 7200 + }, + { + "epoch": 5.39, + "grad_norm": 2.8320086002349854, + "learning_rate": 9.32462311557789e-06, + "loss": 0.0848, + "step": 7225 + }, + { + "epoch": 5.41, + "grad_norm": 2.908804416656494, + "learning_rate": 9.32211055276382e-06, + "loss": 0.0869, + "step": 7250 + }, + { + "epoch": 5.43, + "grad_norm": 3.0360639095306396, + "learning_rate": 9.319597989949748e-06, + "loss": 0.0861, + "step": 7275 + }, + { + "epoch": 5.44, + "grad_norm": 2.543710708618164, + "learning_rate": 9.31708542713568e-06, + "loss": 0.0822, + "step": 7300 + }, + { + "epoch": 5.46, + "grad_norm": 3.3147566318511963, + "learning_rate": 9.314572864321609e-06, + "loss": 0.0862, + "step": 7325 + }, + { + "epoch": 5.48, + "grad_norm": 2.942568778991699, + "learning_rate": 9.312060301507538e-06, + "loss": 0.0858, + "step": 7350 + }, + { + "epoch": 5.5, + "grad_norm": 3.200373411178589, + "learning_rate": 9.309547738693469e-06, + "loss": 0.0841, + "step": 7375 + }, + { + "epoch": 5.52, + "grad_norm": 2.5632355213165283, + "learning_rate": 9.307035175879398e-06, + "loss": 0.0853, + "step": 7400 + }, + { + "epoch": 5.54, + "grad_norm": 3.1872360706329346, + "learning_rate": 9.304522613065328e-06, + "loss": 0.0857, + "step": 7425 + }, + { + "epoch": 5.56, + "grad_norm": 2.425218105316162, + "learning_rate": 9.302010050251257e-06, + "loss": 0.0834, + "step": 7450 + }, + { + "epoch": 5.57, + "grad_norm": 2.7362546920776367, + "learning_rate": 9.299497487437186e-06, + "loss": 0.0837, + "step": 7475 + }, + { + "epoch": 5.59, + "grad_norm": 3.2834975719451904, + "learning_rate": 9.296984924623116e-06, + "loss": 0.0819, + "step": 7500 + }, + { + "epoch": 5.61, + "grad_norm": 3.05967378616333, + "learning_rate": 9.294472361809047e-06, + "loss": 0.0851, + "step": 7525 + }, + { + "epoch": 5.63, + "grad_norm": 3.0693886280059814, + "learning_rate": 9.291959798994976e-06, + "loss": 0.084, + "step": 7550 + }, + { + "epoch": 5.65, + "grad_norm": 2.7909011840820312, + "learning_rate": 9.289447236180905e-06, + "loss": 0.0824, + "step": 7575 + }, + { + "epoch": 5.67, + "grad_norm": 3.538365602493286, + "learning_rate": 9.286934673366835e-06, + "loss": 0.0833, + "step": 7600 + }, + { + "epoch": 5.69, + "grad_norm": 3.3091254234313965, + "learning_rate": 9.284422110552764e-06, + "loss": 0.0841, + "step": 7625 + }, + { + "epoch": 5.7, + "grad_norm": 2.901761054992676, + "learning_rate": 9.281909547738695e-06, + "loss": 0.0826, + "step": 7650 + }, + { + "epoch": 5.72, + "grad_norm": 2.5421626567840576, + "learning_rate": 9.279396984924624e-06, + "loss": 0.0834, + "step": 7675 + }, + { + "epoch": 5.74, + "grad_norm": 2.7622311115264893, + "learning_rate": 9.276884422110554e-06, + "loss": 0.0814, + "step": 7700 + }, + { + "epoch": 5.76, + "grad_norm": 2.5512490272521973, + "learning_rate": 9.274371859296483e-06, + "loss": 0.085, + "step": 7725 + }, + { + "epoch": 5.78, + "grad_norm": 3.2723119258880615, + "learning_rate": 9.271859296482412e-06, + "loss": 0.0826, + "step": 7750 + }, + { + "epoch": 5.8, + "grad_norm": 2.391287326812744, + "learning_rate": 9.269346733668343e-06, + "loss": 0.0851, + "step": 7775 + }, + { + "epoch": 5.82, + "grad_norm": 3.2524197101593018, + "learning_rate": 9.266834170854273e-06, + "loss": 0.0837, + "step": 7800 + }, + { + "epoch": 5.84, + "grad_norm": 2.9285120964050293, + "learning_rate": 9.264321608040202e-06, + "loss": 0.0813, + "step": 7825 + }, + { + "epoch": 5.85, + "grad_norm": 3.5253846645355225, + "learning_rate": 9.261809045226131e-06, + "loss": 0.083, + "step": 7850 + }, + { + "epoch": 5.87, + "grad_norm": 2.8845863342285156, + "learning_rate": 9.25929648241206e-06, + "loss": 0.0845, + "step": 7875 + }, + { + "epoch": 5.89, + "grad_norm": 2.894850969314575, + "learning_rate": 9.25678391959799e-06, + "loss": 0.0826, + "step": 7900 + }, + { + "epoch": 5.91, + "grad_norm": 3.492852210998535, + "learning_rate": 9.254271356783921e-06, + "loss": 0.0805, + "step": 7925 + }, + { + "epoch": 5.93, + "grad_norm": 2.590986490249634, + "learning_rate": 9.25175879396985e-06, + "loss": 0.0841, + "step": 7950 + }, + { + "epoch": 5.95, + "grad_norm": 3.3909456729888916, + "learning_rate": 9.24924623115578e-06, + "loss": 0.0832, + "step": 7975 + }, + { + "epoch": 5.97, + "grad_norm": 2.7903807163238525, + "learning_rate": 9.24673366834171e-06, + "loss": 0.0813, + "step": 8000 + }, + { + "epoch": 5.97, + "eval_loss": 0.09740345925092697, + "eval_runtime": 992.4464, + "eval_samples_per_second": 1.511, + "eval_steps_per_second": 1.511, + "eval_wer": 33.06028145347616, + "step": 8000 + }, + { + "epoch": 5.98, + "grad_norm": 2.8432505130767822, + "learning_rate": 9.244221105527638e-06, + "loss": 0.0835, + "step": 8025 + }, + { + "epoch": 6.0, + "grad_norm": 3.0541317462921143, + "learning_rate": 9.24170854271357e-06, + "loss": 0.079, + "step": 8050 + }, + { + "epoch": 6.02, + "grad_norm": 3.0062572956085205, + "learning_rate": 9.239195979899498e-06, + "loss": 0.075, + "step": 8075 + }, + { + "epoch": 6.04, + "grad_norm": 2.757521867752075, + "learning_rate": 9.236683417085428e-06, + "loss": 0.0736, + "step": 8100 + }, + { + "epoch": 6.06, + "grad_norm": 2.510943651199341, + "learning_rate": 9.234170854271357e-06, + "loss": 0.0718, + "step": 8125 + }, + { + "epoch": 6.08, + "grad_norm": 2.8263604640960693, + "learning_rate": 9.231658291457286e-06, + "loss": 0.0715, + "step": 8150 + }, + { + "epoch": 6.1, + "grad_norm": 2.7902908325195312, + "learning_rate": 9.229145728643217e-06, + "loss": 0.0744, + "step": 8175 + }, + { + "epoch": 6.11, + "grad_norm": 2.6819334030151367, + "learning_rate": 9.226633165829147e-06, + "loss": 0.073, + "step": 8200 + }, + { + "epoch": 6.13, + "grad_norm": 2.502225399017334, + "learning_rate": 9.224120603015076e-06, + "loss": 0.0713, + "step": 8225 + }, + { + "epoch": 6.15, + "grad_norm": 2.910869836807251, + "learning_rate": 9.221608040201005e-06, + "loss": 0.0733, + "step": 8250 + }, + { + "epoch": 6.17, + "grad_norm": 3.235252618789673, + "learning_rate": 9.219095477386936e-06, + "loss": 0.0755, + "step": 8275 + }, + { + "epoch": 6.19, + "grad_norm": 2.8991072177886963, + "learning_rate": 9.216582914572864e-06, + "loss": 0.0737, + "step": 8300 + }, + { + "epoch": 6.21, + "grad_norm": 3.0763397216796875, + "learning_rate": 9.214070351758795e-06, + "loss": 0.0746, + "step": 8325 + }, + { + "epoch": 6.23, + "grad_norm": 2.4867208003997803, + "learning_rate": 9.211557788944724e-06, + "loss": 0.0723, + "step": 8350 + }, + { + "epoch": 6.25, + "grad_norm": 2.7545785903930664, + "learning_rate": 9.209045226130654e-06, + "loss": 0.0682, + "step": 8375 + }, + { + "epoch": 6.26, + "grad_norm": 3.296511173248291, + "learning_rate": 9.206532663316585e-06, + "loss": 0.0726, + "step": 8400 + }, + { + "epoch": 6.28, + "grad_norm": 3.025658369064331, + "learning_rate": 9.204020100502512e-06, + "loss": 0.0712, + "step": 8425 + }, + { + "epoch": 6.3, + "grad_norm": 3.283193349838257, + "learning_rate": 9.201507537688443e-06, + "loss": 0.0753, + "step": 8450 + }, + { + "epoch": 6.32, + "grad_norm": 3.1915130615234375, + "learning_rate": 9.198994974874373e-06, + "loss": 0.074, + "step": 8475 + }, + { + "epoch": 6.34, + "grad_norm": 2.7137105464935303, + "learning_rate": 9.196482412060302e-06, + "loss": 0.0726, + "step": 8500 + }, + { + "epoch": 6.36, + "grad_norm": 3.2077836990356445, + "learning_rate": 9.193969849246231e-06, + "loss": 0.0712, + "step": 8525 + }, + { + "epoch": 6.38, + "grad_norm": 2.7213048934936523, + "learning_rate": 9.191457286432162e-06, + "loss": 0.0758, + "step": 8550 + }, + { + "epoch": 6.39, + "grad_norm": 2.7132604122161865, + "learning_rate": 9.188944723618092e-06, + "loss": 0.0733, + "step": 8575 + }, + { + "epoch": 6.41, + "grad_norm": 3.0986220836639404, + "learning_rate": 9.186432160804021e-06, + "loss": 0.0751, + "step": 8600 + }, + { + "epoch": 6.43, + "grad_norm": 2.7947447299957275, + "learning_rate": 9.18391959798995e-06, + "loss": 0.0727, + "step": 8625 + }, + { + "epoch": 6.45, + "grad_norm": 3.246277332305908, + "learning_rate": 9.18140703517588e-06, + "loss": 0.0734, + "step": 8650 + }, + { + "epoch": 6.47, + "grad_norm": 3.050459623336792, + "learning_rate": 9.17889447236181e-06, + "loss": 0.0729, + "step": 8675 + }, + { + "epoch": 6.49, + "grad_norm": 3.0157370567321777, + "learning_rate": 9.176381909547738e-06, + "loss": 0.0735, + "step": 8700 + }, + { + "epoch": 6.51, + "grad_norm": 3.357234001159668, + "learning_rate": 9.17386934673367e-06, + "loss": 0.0738, + "step": 8725 + }, + { + "epoch": 6.52, + "grad_norm": 3.099461078643799, + "learning_rate": 9.171356783919599e-06, + "loss": 0.0735, + "step": 8750 + }, + { + "epoch": 6.54, + "grad_norm": 3.3645858764648438, + "learning_rate": 9.168844221105528e-06, + "loss": 0.0705, + "step": 8775 + }, + { + "epoch": 6.56, + "grad_norm": 3.1133999824523926, + "learning_rate": 9.166331658291459e-06, + "loss": 0.0728, + "step": 8800 + }, + { + "epoch": 6.58, + "grad_norm": 3.3950767517089844, + "learning_rate": 9.163819095477388e-06, + "loss": 0.0737, + "step": 8825 + }, + { + "epoch": 6.6, + "grad_norm": 2.904306173324585, + "learning_rate": 9.161306532663318e-06, + "loss": 0.0734, + "step": 8850 + }, + { + "epoch": 6.62, + "grad_norm": 2.572333812713623, + "learning_rate": 9.15889447236181e-06, + "loss": 0.0745, + "step": 8875 + }, + { + "epoch": 6.64, + "grad_norm": 3.0234625339508057, + "learning_rate": 9.156381909547739e-06, + "loss": 0.0725, + "step": 8900 + }, + { + "epoch": 6.66, + "grad_norm": 2.611837387084961, + "learning_rate": 9.15386934673367e-06, + "loss": 0.0712, + "step": 8925 + }, + { + "epoch": 6.67, + "grad_norm": 3.362272262573242, + "learning_rate": 9.151356783919599e-06, + "loss": 0.0726, + "step": 8950 + }, + { + "epoch": 6.69, + "grad_norm": 2.2291293144226074, + "learning_rate": 9.148844221105528e-06, + "loss": 0.0709, + "step": 8975 + }, + { + "epoch": 6.71, + "grad_norm": 3.122919797897339, + "learning_rate": 9.14633165829146e-06, + "loss": 0.0747, + "step": 9000 + }, + { + "epoch": 6.71, + "eval_loss": 0.09830078482627869, + "eval_runtime": 990.4791, + "eval_samples_per_second": 1.514, + "eval_steps_per_second": 1.514, + "eval_wer": 32.2061191626409, + "step": 9000 + }, + { + "epoch": 6.73, + "grad_norm": 2.4696991443634033, + "learning_rate": 9.143819095477387e-06, + "loss": 0.073, + "step": 9025 + }, + { + "epoch": 6.75, + "grad_norm": 3.1644959449768066, + "learning_rate": 9.141306532663318e-06, + "loss": 0.0727, + "step": 9050 + }, + { + "epoch": 6.77, + "grad_norm": 2.957810401916504, + "learning_rate": 9.138793969849247e-06, + "loss": 0.0725, + "step": 9075 + }, + { + "epoch": 6.79, + "grad_norm": 2.617823600769043, + "learning_rate": 9.136281407035177e-06, + "loss": 0.0735, + "step": 9100 + }, + { + "epoch": 6.8, + "grad_norm": 3.1562681198120117, + "learning_rate": 9.133768844221106e-06, + "loss": 0.0716, + "step": 9125 + }, + { + "epoch": 6.82, + "grad_norm": 2.3564324378967285, + "learning_rate": 9.131256281407037e-06, + "loss": 0.0698, + "step": 9150 + }, + { + "epoch": 6.84, + "grad_norm": 3.1773228645324707, + "learning_rate": 9.128743718592964e-06, + "loss": 0.0723, + "step": 9175 + }, + { + "epoch": 6.86, + "grad_norm": 3.045433282852173, + "learning_rate": 9.126231155778896e-06, + "loss": 0.0737, + "step": 9200 + }, + { + "epoch": 6.88, + "grad_norm": 2.69769024848938, + "learning_rate": 9.123718592964825e-06, + "loss": 0.0717, + "step": 9225 + }, + { + "epoch": 6.9, + "grad_norm": 2.905315399169922, + "learning_rate": 9.121206030150754e-06, + "loss": 0.0731, + "step": 9250 + }, + { + "epoch": 6.92, + "grad_norm": 2.5915000438690186, + "learning_rate": 9.118693467336685e-06, + "loss": 0.0736, + "step": 9275 + }, + { + "epoch": 6.94, + "grad_norm": 2.906013250350952, + "learning_rate": 9.116180904522613e-06, + "loss": 0.0733, + "step": 9300 + }, + { + "epoch": 6.95, + "grad_norm": 2.6286356449127197, + "learning_rate": 9.113668341708544e-06, + "loss": 0.0731, + "step": 9325 + }, + { + "epoch": 6.97, + "grad_norm": 2.7987544536590576, + "learning_rate": 9.111155778894473e-06, + "loss": 0.0728, + "step": 9350 + }, + { + "epoch": 6.99, + "grad_norm": 2.8682780265808105, + "learning_rate": 9.108643216080402e-06, + "loss": 0.0731, + "step": 9375 + }, + { + "epoch": 7.01, + "grad_norm": 2.5095415115356445, + "learning_rate": 9.106130653266333e-06, + "loss": 0.0676, + "step": 9400 + }, + { + "epoch": 7.03, + "grad_norm": 3.153899908065796, + "learning_rate": 9.103618090452263e-06, + "loss": 0.0642, + "step": 9425 + }, + { + "epoch": 7.05, + "grad_norm": 2.9896531105041504, + "learning_rate": 9.101105527638192e-06, + "loss": 0.0659, + "step": 9450 + }, + { + "epoch": 7.07, + "grad_norm": 2.548534393310547, + "learning_rate": 9.098592964824121e-06, + "loss": 0.0629, + "step": 9475 + }, + { + "epoch": 7.08, + "grad_norm": 2.403630018234253, + "learning_rate": 9.09608040201005e-06, + "loss": 0.0644, + "step": 9500 + }, + { + "epoch": 7.1, + "grad_norm": 2.717442274093628, + "learning_rate": 9.09356783919598e-06, + "loss": 0.0616, + "step": 9525 + }, + { + "epoch": 7.12, + "grad_norm": 2.7704763412475586, + "learning_rate": 9.091055276381911e-06, + "loss": 0.0613, + "step": 9550 + }, + { + "epoch": 7.14, + "grad_norm": 2.490023374557495, + "learning_rate": 9.088542713567839e-06, + "loss": 0.0608, + "step": 9575 + }, + { + "epoch": 7.16, + "grad_norm": 2.893310546875, + "learning_rate": 9.08603015075377e-06, + "loss": 0.0632, + "step": 9600 + }, + { + "epoch": 7.18, + "grad_norm": 2.5858371257781982, + "learning_rate": 9.083517587939699e-06, + "loss": 0.063, + "step": 9625 + }, + { + "epoch": 7.2, + "grad_norm": 2.809083938598633, + "learning_rate": 9.081005025125628e-06, + "loss": 0.0644, + "step": 9650 + }, + { + "epoch": 7.21, + "grad_norm": 3.398388624191284, + "learning_rate": 9.07849246231156e-06, + "loss": 0.064, + "step": 9675 + }, + { + "epoch": 7.23, + "grad_norm": 2.946995973587036, + "learning_rate": 9.075979899497489e-06, + "loss": 0.062, + "step": 9700 + }, + { + "epoch": 7.25, + "grad_norm": 3.179126024246216, + "learning_rate": 9.073467336683418e-06, + "loss": 0.0623, + "step": 9725 + }, + { + "epoch": 7.27, + "grad_norm": 2.7881975173950195, + "learning_rate": 9.070954773869347e-06, + "loss": 0.062, + "step": 9750 + }, + { + "epoch": 7.29, + "grad_norm": 2.541093587875366, + "learning_rate": 9.068442211055277e-06, + "loss": 0.0644, + "step": 9775 + }, + { + "epoch": 7.31, + "grad_norm": 2.692385673522949, + "learning_rate": 9.065929648241206e-06, + "loss": 0.0623, + "step": 9800 + }, + { + "epoch": 7.33, + "grad_norm": 3.003221035003662, + "learning_rate": 9.063417085427137e-06, + "loss": 0.0641, + "step": 9825 + }, + { + "epoch": 7.35, + "grad_norm": 2.868403673171997, + "learning_rate": 9.060904522613066e-06, + "loss": 0.0629, + "step": 9850 + }, + { + "epoch": 7.36, + "grad_norm": 2.8488333225250244, + "learning_rate": 9.058391959798996e-06, + "loss": 0.0659, + "step": 9875 + }, + { + "epoch": 7.38, + "grad_norm": 2.658318519592285, + "learning_rate": 9.055879396984925e-06, + "loss": 0.0638, + "step": 9900 + }, + { + "epoch": 7.4, + "grad_norm": 2.549736261367798, + "learning_rate": 9.053366834170854e-06, + "loss": 0.0657, + "step": 9925 + }, + { + "epoch": 7.42, + "grad_norm": 2.3940868377685547, + "learning_rate": 9.050854271356785e-06, + "loss": 0.0637, + "step": 9950 + }, + { + "epoch": 7.44, + "grad_norm": 2.845806360244751, + "learning_rate": 9.048341708542715e-06, + "loss": 0.0643, + "step": 9975 + }, + { + "epoch": 7.46, + "grad_norm": 2.919236660003662, + "learning_rate": 9.045829145728644e-06, + "loss": 0.0641, + "step": 10000 + }, + { + "epoch": 7.46, + "eval_loss": 0.09652712196111679, + "eval_runtime": 979.29, + "eval_samples_per_second": 1.532, + "eval_steps_per_second": 1.532, + "eval_wer": 32.09409787859693, + "step": 10000 + }, + { + "epoch": 7.48, + "grad_norm": 2.520693302154541, + "learning_rate": 9.043316582914573e-06, + "loss": 0.0671, + "step": 10025 + }, + { + "epoch": 7.49, + "grad_norm": 3.0288069248199463, + "learning_rate": 9.040804020100503e-06, + "loss": 0.0634, + "step": 10050 + }, + { + "epoch": 7.51, + "grad_norm": 2.675863742828369, + "learning_rate": 9.038291457286434e-06, + "loss": 0.0658, + "step": 10075 + }, + { + "epoch": 7.53, + "grad_norm": 2.5510778427124023, + "learning_rate": 9.035778894472363e-06, + "loss": 0.065, + "step": 10100 + }, + { + "epoch": 7.55, + "grad_norm": 2.5188705921173096, + "learning_rate": 9.033266331658292e-06, + "loss": 0.0627, + "step": 10125 + }, + { + "epoch": 7.57, + "grad_norm": 2.984208106994629, + "learning_rate": 9.030753768844222e-06, + "loss": 0.0624, + "step": 10150 + }, + { + "epoch": 7.59, + "grad_norm": 4.029730319976807, + "learning_rate": 9.028241206030151e-06, + "loss": 0.064, + "step": 10175 + }, + { + "epoch": 7.61, + "grad_norm": 2.5905139446258545, + "learning_rate": 9.02572864321608e-06, + "loss": 0.0642, + "step": 10200 + }, + { + "epoch": 7.62, + "grad_norm": 2.7279016971588135, + "learning_rate": 9.023216080402011e-06, + "loss": 0.0617, + "step": 10225 + }, + { + "epoch": 7.64, + "grad_norm": 2.3911263942718506, + "learning_rate": 9.02070351758794e-06, + "loss": 0.066, + "step": 10250 + }, + { + "epoch": 7.66, + "grad_norm": 2.3298537731170654, + "learning_rate": 9.01819095477387e-06, + "loss": 0.0645, + "step": 10275 + }, + { + "epoch": 7.68, + "grad_norm": 2.924443006515503, + "learning_rate": 9.0156783919598e-06, + "loss": 0.0652, + "step": 10300 + }, + { + "epoch": 7.7, + "grad_norm": 2.9539990425109863, + "learning_rate": 9.013165829145729e-06, + "loss": 0.0629, + "step": 10325 + }, + { + "epoch": 7.72, + "grad_norm": 2.5762901306152344, + "learning_rate": 9.01065326633166e-06, + "loss": 0.0634, + "step": 10350 + }, + { + "epoch": 7.74, + "grad_norm": 2.887725591659546, + "learning_rate": 9.008140703517589e-06, + "loss": 0.0659, + "step": 10375 + }, + { + "epoch": 7.76, + "grad_norm": 3.138012409210205, + "learning_rate": 9.005628140703518e-06, + "loss": 0.0655, + "step": 10400 + }, + { + "epoch": 7.77, + "grad_norm": 2.738485336303711, + "learning_rate": 9.003115577889448e-06, + "loss": 0.0643, + "step": 10425 + }, + { + "epoch": 7.79, + "grad_norm": 2.50685977935791, + "learning_rate": 9.000603015075377e-06, + "loss": 0.064, + "step": 10450 + }, + { + "epoch": 7.81, + "grad_norm": 3.3891656398773193, + "learning_rate": 8.998090452261308e-06, + "loss": 0.0621, + "step": 10475 + }, + { + "epoch": 7.83, + "grad_norm": 2.862926959991455, + "learning_rate": 8.995577889447237e-06, + "loss": 0.0624, + "step": 10500 + }, + { + "epoch": 7.85, + "grad_norm": 2.5449576377868652, + "learning_rate": 8.993065326633167e-06, + "loss": 0.0627, + "step": 10525 + }, + { + "epoch": 7.87, + "grad_norm": 3.1202919483184814, + "learning_rate": 8.990552763819096e-06, + "loss": 0.064, + "step": 10550 + }, + { + "epoch": 7.89, + "grad_norm": 2.526926279067993, + "learning_rate": 8.988040201005025e-06, + "loss": 0.0653, + "step": 10575 + }, + { + "epoch": 7.9, + "grad_norm": 2.954207181930542, + "learning_rate": 8.985527638190955e-06, + "loss": 0.0627, + "step": 10600 + }, + { + "epoch": 7.92, + "grad_norm": 2.5419247150421143, + "learning_rate": 8.983015075376886e-06, + "loss": 0.0644, + "step": 10625 + }, + { + "epoch": 7.94, + "grad_norm": 2.6540229320526123, + "learning_rate": 8.980502512562815e-06, + "loss": 0.0632, + "step": 10650 + }, + { + "epoch": 7.96, + "grad_norm": 2.9723212718963623, + "learning_rate": 8.977989949748744e-06, + "loss": 0.066, + "step": 10675 + }, + { + "epoch": 7.98, + "grad_norm": 3.0502119064331055, + "learning_rate": 8.975477386934675e-06, + "loss": 0.0635, + "step": 10700 + }, + { + "epoch": 8.0, + "grad_norm": 2.8327510356903076, + "learning_rate": 8.972964824120603e-06, + "loss": 0.0631, + "step": 10725 + }, + { + "epoch": 8.02, + "grad_norm": 2.5462911128997803, + "learning_rate": 8.970452261306534e-06, + "loss": 0.054, + "step": 10750 + }, + { + "epoch": 8.04, + "grad_norm": 2.294522523880005, + "learning_rate": 8.967939698492463e-06, + "loss": 0.054, + "step": 10775 + }, + { + "epoch": 8.05, + "grad_norm": 2.775587320327759, + "learning_rate": 8.965427135678393e-06, + "loss": 0.0573, + "step": 10800 + }, + { + "epoch": 8.07, + "grad_norm": 3.1233370304107666, + "learning_rate": 8.962914572864322e-06, + "loss": 0.0536, + "step": 10825 + }, + { + "epoch": 8.09, + "grad_norm": 2.6669986248016357, + "learning_rate": 8.960402010050251e-06, + "loss": 0.0537, + "step": 10850 + }, + { + "epoch": 8.11, + "grad_norm": 2.7004992961883545, + "learning_rate": 8.957889447236182e-06, + "loss": 0.0556, + "step": 10875 + }, + { + "epoch": 8.13, + "grad_norm": 2.3690073490142822, + "learning_rate": 8.955376884422112e-06, + "loss": 0.0554, + "step": 10900 + }, + { + "epoch": 8.15, + "grad_norm": 3.217271327972412, + "learning_rate": 8.95286432160804e-06, + "loss": 0.0544, + "step": 10925 + }, + { + "epoch": 8.17, + "grad_norm": 3.1186749935150146, + "learning_rate": 8.95035175879397e-06, + "loss": 0.0551, + "step": 10950 + }, + { + "epoch": 8.18, + "grad_norm": 2.2177891731262207, + "learning_rate": 8.947839195979901e-06, + "loss": 0.0534, + "step": 10975 + }, + { + "epoch": 8.2, + "grad_norm": 3.0183002948760986, + "learning_rate": 8.945326633165829e-06, + "loss": 0.0549, + "step": 11000 + }, + { + "epoch": 8.2, + "eval_loss": 0.10014721751213074, + "eval_runtime": 978.1323, + "eval_samples_per_second": 1.534, + "eval_steps_per_second": 1.534, + "eval_wer": 32.75922425260799, + "step": 11000 + }, + { + "epoch": 8.22, + "grad_norm": 3.2706174850463867, + "learning_rate": 8.94281407035176e-06, + "loss": 0.0551, + "step": 11025 + }, + { + "epoch": 8.24, + "grad_norm": 2.38909649848938, + "learning_rate": 8.940301507537689e-06, + "loss": 0.0564, + "step": 11050 + }, + { + "epoch": 8.26, + "grad_norm": 2.643484115600586, + "learning_rate": 8.937788944723618e-06, + "loss": 0.0567, + "step": 11075 + }, + { + "epoch": 8.28, + "grad_norm": 2.5971357822418213, + "learning_rate": 8.93527638190955e-06, + "loss": 0.0561, + "step": 11100 + }, + { + "epoch": 8.3, + "grad_norm": 3.0723023414611816, + "learning_rate": 8.932763819095477e-06, + "loss": 0.0529, + "step": 11125 + }, + { + "epoch": 8.31, + "grad_norm": 2.734071969985962, + "learning_rate": 8.930251256281408e-06, + "loss": 0.0571, + "step": 11150 + }, + { + "epoch": 8.33, + "grad_norm": 2.5458946228027344, + "learning_rate": 8.927738693467337e-06, + "loss": 0.0566, + "step": 11175 + }, + { + "epoch": 8.35, + "grad_norm": 3.319462537765503, + "learning_rate": 8.925226130653267e-06, + "loss": 0.0567, + "step": 11200 + }, + { + "epoch": 8.37, + "grad_norm": 2.833836317062378, + "learning_rate": 8.922713567839196e-06, + "loss": 0.0555, + "step": 11225 + }, + { + "epoch": 8.39, + "grad_norm": 2.4368691444396973, + "learning_rate": 8.920201005025127e-06, + "loss": 0.0564, + "step": 11250 + }, + { + "epoch": 8.41, + "grad_norm": 2.6237728595733643, + "learning_rate": 8.917688442211055e-06, + "loss": 0.0557, + "step": 11275 + }, + { + "epoch": 8.43, + "grad_norm": 2.3838891983032227, + "learning_rate": 8.915175879396986e-06, + "loss": 0.0586, + "step": 11300 + }, + { + "epoch": 8.45, + "grad_norm": 2.5601353645324707, + "learning_rate": 8.912663316582915e-06, + "loss": 0.0566, + "step": 11325 + }, + { + "epoch": 8.46, + "grad_norm": 2.306411027908325, + "learning_rate": 8.910150753768844e-06, + "loss": 0.0538, + "step": 11350 + }, + { + "epoch": 8.48, + "grad_norm": 2.762286424636841, + "learning_rate": 8.907638190954775e-06, + "loss": 0.057, + "step": 11375 + }, + { + "epoch": 8.5, + "grad_norm": 2.7822818756103516, + "learning_rate": 8.905125628140705e-06, + "loss": 0.0548, + "step": 11400 + }, + { + "epoch": 8.52, + "grad_norm": 2.678818464279175, + "learning_rate": 8.902613065326634e-06, + "loss": 0.0562, + "step": 11425 + }, + { + "epoch": 8.54, + "grad_norm": 2.8526322841644287, + "learning_rate": 8.900100502512563e-06, + "loss": 0.0549, + "step": 11450 + }, + { + "epoch": 8.56, + "grad_norm": 2.569629430770874, + "learning_rate": 8.897587939698493e-06, + "loss": 0.0574, + "step": 11475 + }, + { + "epoch": 8.58, + "grad_norm": 2.898500919342041, + "learning_rate": 8.895075376884424e-06, + "loss": 0.0556, + "step": 11500 + }, + { + "epoch": 8.59, + "grad_norm": 3.0148086547851562, + "learning_rate": 8.892562814070353e-06, + "loss": 0.0537, + "step": 11525 + }, + { + "epoch": 8.61, + "grad_norm": 2.704127550125122, + "learning_rate": 8.890050251256282e-06, + "loss": 0.0565, + "step": 11550 + }, + { + "epoch": 8.63, + "grad_norm": 2.5527191162109375, + "learning_rate": 8.887537688442212e-06, + "loss": 0.0552, + "step": 11575 + }, + { + "epoch": 8.65, + "grad_norm": 2.5493600368499756, + "learning_rate": 8.885025125628141e-06, + "loss": 0.0549, + "step": 11600 + }, + { + "epoch": 8.67, + "grad_norm": 2.955414056777954, + "learning_rate": 8.88251256281407e-06, + "loss": 0.0556, + "step": 11625 + }, + { + "epoch": 8.69, + "grad_norm": 2.574990749359131, + "learning_rate": 8.880000000000001e-06, + "loss": 0.0535, + "step": 11650 + }, + { + "epoch": 8.71, + "grad_norm": 2.601780652999878, + "learning_rate": 8.87748743718593e-06, + "loss": 0.0542, + "step": 11675 + }, + { + "epoch": 8.72, + "grad_norm": 2.6815454959869385, + "learning_rate": 8.87497487437186e-06, + "loss": 0.0559, + "step": 11700 + }, + { + "epoch": 8.74, + "grad_norm": 2.87306809425354, + "learning_rate": 8.87246231155779e-06, + "loss": 0.0557, + "step": 11725 + }, + { + "epoch": 8.76, + "grad_norm": 3.1633527278900146, + "learning_rate": 8.869949748743719e-06, + "loss": 0.0568, + "step": 11750 + }, + { + "epoch": 8.78, + "grad_norm": 3.3581395149230957, + "learning_rate": 8.86743718592965e-06, + "loss": 0.0576, + "step": 11775 + }, + { + "epoch": 8.8, + "grad_norm": 3.02787709236145, + "learning_rate": 8.864924623115579e-06, + "loss": 0.0566, + "step": 11800 + }, + { + "epoch": 8.82, + "grad_norm": 2.753390073776245, + "learning_rate": 8.862412060301508e-06, + "loss": 0.0566, + "step": 11825 + }, + { + "epoch": 8.84, + "grad_norm": 2.28647518157959, + "learning_rate": 8.859899497487438e-06, + "loss": 0.0583, + "step": 11850 + }, + { + "epoch": 8.86, + "grad_norm": 3.049910306930542, + "learning_rate": 8.857386934673367e-06, + "loss": 0.056, + "step": 11875 + }, + { + "epoch": 8.87, + "grad_norm": 2.8235912322998047, + "learning_rate": 8.854874371859296e-06, + "loss": 0.0556, + "step": 11900 + }, + { + "epoch": 8.89, + "grad_norm": 2.701807975769043, + "learning_rate": 8.852361809045227e-06, + "loss": 0.0555, + "step": 11925 + }, + { + "epoch": 8.91, + "grad_norm": 3.087759256362915, + "learning_rate": 8.849849246231157e-06, + "loss": 0.0578, + "step": 11950 + }, + { + "epoch": 8.93, + "grad_norm": 2.8260905742645264, + "learning_rate": 8.847336683417086e-06, + "loss": 0.055, + "step": 11975 + }, + { + "epoch": 8.95, + "grad_norm": 2.4795854091644287, + "learning_rate": 8.844824120603015e-06, + "loss": 0.0568, + "step": 12000 + }, + { + "epoch": 8.95, + "eval_loss": 0.09952405840158463, + "eval_runtime": 986.3501, + "eval_samples_per_second": 1.521, + "eval_steps_per_second": 1.521, + "eval_wer": 32.129104529860676, + "step": 12000 + }, + { + "epoch": 8.97, + "grad_norm": 3.354093551635742, + "learning_rate": 8.842311557788945e-06, + "loss": 0.0559, + "step": 12025 + }, + { + "epoch": 8.99, + "grad_norm": 3.225405216217041, + "learning_rate": 8.839798994974876e-06, + "loss": 0.0541, + "step": 12050 + }, + { + "epoch": 9.0, + "grad_norm": 2.5506389141082764, + "learning_rate": 8.837286432160805e-06, + "loss": 0.0536, + "step": 12075 + }, + { + "epoch": 9.02, + "grad_norm": 2.859487295150757, + "learning_rate": 8.834773869346734e-06, + "loss": 0.0444, + "step": 12100 + }, + { + "epoch": 9.04, + "grad_norm": 2.8366897106170654, + "learning_rate": 8.832261306532665e-06, + "loss": 0.0487, + "step": 12125 + }, + { + "epoch": 9.06, + "grad_norm": 2.697500228881836, + "learning_rate": 8.829748743718593e-06, + "loss": 0.0451, + "step": 12150 + }, + { + "epoch": 9.08, + "grad_norm": 2.4420406818389893, + "learning_rate": 8.827236180904524e-06, + "loss": 0.045, + "step": 12175 + }, + { + "epoch": 9.1, + "grad_norm": 2.5096065998077393, + "learning_rate": 8.824723618090453e-06, + "loss": 0.0471, + "step": 12200 + }, + { + "epoch": 9.12, + "grad_norm": 2.755467653274536, + "learning_rate": 8.822211055276383e-06, + "loss": 0.0487, + "step": 12225 + }, + { + "epoch": 9.13, + "grad_norm": 2.202929735183716, + "learning_rate": 8.819698492462312e-06, + "loss": 0.0465, + "step": 12250 + }, + { + "epoch": 9.15, + "grad_norm": 2.323772668838501, + "learning_rate": 8.817185929648241e-06, + "loss": 0.0492, + "step": 12275 + }, + { + "epoch": 9.17, + "grad_norm": 2.747600793838501, + "learning_rate": 8.81467336683417e-06, + "loss": 0.0463, + "step": 12300 + }, + { + "epoch": 9.19, + "grad_norm": 2.6838107109069824, + "learning_rate": 8.812160804020102e-06, + "loss": 0.0487, + "step": 12325 + }, + { + "epoch": 9.21, + "grad_norm": 2.692500114440918, + "learning_rate": 8.809648241206031e-06, + "loss": 0.0488, + "step": 12350 + }, + { + "epoch": 9.23, + "grad_norm": 2.5534379482269287, + "learning_rate": 8.80713567839196e-06, + "loss": 0.0473, + "step": 12375 + }, + { + "epoch": 9.25, + "grad_norm": 2.942004442214966, + "learning_rate": 8.804623115577891e-06, + "loss": 0.0481, + "step": 12400 + }, + { + "epoch": 9.27, + "grad_norm": 3.044095754623413, + "learning_rate": 8.802110552763819e-06, + "loss": 0.0475, + "step": 12425 + }, + { + "epoch": 9.28, + "grad_norm": 2.4278528690338135, + "learning_rate": 8.79959798994975e-06, + "loss": 0.0485, + "step": 12450 + }, + { + "epoch": 9.3, + "grad_norm": 2.6863367557525635, + "learning_rate": 8.79708542713568e-06, + "loss": 0.0477, + "step": 12475 + }, + { + "epoch": 9.32, + "grad_norm": 2.252525806427002, + "learning_rate": 8.794572864321609e-06, + "loss": 0.0474, + "step": 12500 + }, + { + "epoch": 9.34, + "grad_norm": 2.963892698287964, + "learning_rate": 8.792060301507538e-06, + "loss": 0.0482, + "step": 12525 + }, + { + "epoch": 9.36, + "grad_norm": 2.5950465202331543, + "learning_rate": 8.789547738693467e-06, + "loss": 0.0479, + "step": 12550 + }, + { + "epoch": 9.38, + "grad_norm": 2.468050956726074, + "learning_rate": 8.787035175879398e-06, + "loss": 0.0491, + "step": 12575 + }, + { + "epoch": 9.4, + "grad_norm": 3.3994994163513184, + "learning_rate": 8.784522613065328e-06, + "loss": 0.0494, + "step": 12600 + }, + { + "epoch": 9.41, + "grad_norm": 2.911231756210327, + "learning_rate": 8.782010050251257e-06, + "loss": 0.0497, + "step": 12625 + }, + { + "epoch": 9.43, + "grad_norm": 3.431365489959717, + "learning_rate": 8.779497487437186e-06, + "loss": 0.0497, + "step": 12650 + }, + { + "epoch": 9.45, + "grad_norm": 2.093797206878662, + "learning_rate": 8.776984924623117e-06, + "loss": 0.0484, + "step": 12675 + }, + { + "epoch": 9.47, + "grad_norm": 2.5852432250976562, + "learning_rate": 8.774472361809045e-06, + "loss": 0.0481, + "step": 12700 + }, + { + "epoch": 9.49, + "grad_norm": 3.1304728984832764, + "learning_rate": 8.771959798994976e-06, + "loss": 0.0504, + "step": 12725 + }, + { + "epoch": 9.51, + "grad_norm": 2.6161022186279297, + "learning_rate": 8.769447236180905e-06, + "loss": 0.0479, + "step": 12750 + }, + { + "epoch": 9.53, + "grad_norm": 2.9081978797912598, + "learning_rate": 8.766934673366834e-06, + "loss": 0.0476, + "step": 12775 + }, + { + "epoch": 9.55, + "grad_norm": 2.9430465698242188, + "learning_rate": 8.764422110552765e-06, + "loss": 0.0479, + "step": 12800 + }, + { + "epoch": 9.56, + "grad_norm": 2.9035348892211914, + "learning_rate": 8.761909547738693e-06, + "loss": 0.0514, + "step": 12825 + }, + { + "epoch": 9.58, + "grad_norm": 2.58109450340271, + "learning_rate": 8.759396984924624e-06, + "loss": 0.049, + "step": 12850 + }, + { + "epoch": 9.6, + "grad_norm": 2.151991128921509, + "learning_rate": 8.756884422110553e-06, + "loss": 0.0479, + "step": 12875 + }, + { + "epoch": 9.62, + "grad_norm": 2.7676093578338623, + "learning_rate": 8.754472361809045e-06, + "loss": 0.0487, + "step": 12900 + }, + { + "epoch": 9.64, + "grad_norm": 2.506352424621582, + "learning_rate": 8.751959798994976e-06, + "loss": 0.0479, + "step": 12925 + }, + { + "epoch": 9.66, + "grad_norm": 2.9588358402252197, + "learning_rate": 8.749447236180905e-06, + "loss": 0.0497, + "step": 12950 + }, + { + "epoch": 9.68, + "grad_norm": 2.459167718887329, + "learning_rate": 8.746934673366835e-06, + "loss": 0.0503, + "step": 12975 + }, + { + "epoch": 9.69, + "grad_norm": 2.3731677532196045, + "learning_rate": 8.744422110552766e-06, + "loss": 0.0514, + "step": 13000 + }, + { + "epoch": 9.69, + "eval_loss": 0.10116799920797348, + "eval_runtime": 983.2692, + "eval_samples_per_second": 1.526, + "eval_steps_per_second": 1.526, + "eval_wer": 31.947069943289225, + "step": 13000 + }, + { + "epoch": 9.71, + "grad_norm": 2.814642906188965, + "learning_rate": 8.741909547738693e-06, + "loss": 0.0496, + "step": 13025 + }, + { + "epoch": 9.73, + "grad_norm": 2.5198683738708496, + "learning_rate": 8.739396984924624e-06, + "loss": 0.0509, + "step": 13050 + }, + { + "epoch": 9.75, + "grad_norm": 2.6404521465301514, + "learning_rate": 8.736884422110554e-06, + "loss": 0.0496, + "step": 13075 + }, + { + "epoch": 9.77, + "grad_norm": 2.6061878204345703, + "learning_rate": 8.734371859296483e-06, + "loss": 0.0483, + "step": 13100 + }, + { + "epoch": 9.79, + "grad_norm": 2.4351799488067627, + "learning_rate": 8.731859296482412e-06, + "loss": 0.0505, + "step": 13125 + }, + { + "epoch": 9.81, + "grad_norm": 2.9255177974700928, + "learning_rate": 8.729346733668342e-06, + "loss": 0.0492, + "step": 13150 + }, + { + "epoch": 9.82, + "grad_norm": 2.719418525695801, + "learning_rate": 8.726834170854273e-06, + "loss": 0.0479, + "step": 13175 + }, + { + "epoch": 9.84, + "grad_norm": 2.907984733581543, + "learning_rate": 8.724321608040202e-06, + "loss": 0.0498, + "step": 13200 + }, + { + "epoch": 9.86, + "grad_norm": 2.5703907012939453, + "learning_rate": 8.721809045226131e-06, + "loss": 0.0494, + "step": 13225 + }, + { + "epoch": 9.88, + "grad_norm": 3.058525323867798, + "learning_rate": 8.71929648241206e-06, + "loss": 0.0504, + "step": 13250 + }, + { + "epoch": 9.9, + "grad_norm": 2.3988988399505615, + "learning_rate": 8.716783919597992e-06, + "loss": 0.0483, + "step": 13275 + }, + { + "epoch": 9.92, + "grad_norm": 3.078761577606201, + "learning_rate": 8.71427135678392e-06, + "loss": 0.048, + "step": 13300 + }, + { + "epoch": 9.94, + "grad_norm": 3.758516550064087, + "learning_rate": 8.71175879396985e-06, + "loss": 0.0501, + "step": 13325 + }, + { + "epoch": 9.96, + "grad_norm": 3.1110596656799316, + "learning_rate": 8.70924623115578e-06, + "loss": 0.0488, + "step": 13350 + }, + { + "epoch": 9.97, + "grad_norm": 2.7019145488739014, + "learning_rate": 8.706733668341709e-06, + "loss": 0.047, + "step": 13375 + }, + { + "epoch": 9.99, + "grad_norm": 2.389631748199463, + "learning_rate": 8.70422110552764e-06, + "loss": 0.0483, + "step": 13400 + }, + { + "epoch": 10.01, + "grad_norm": 2.371737480163574, + "learning_rate": 8.701708542713568e-06, + "loss": 0.0433, + "step": 13425 + }, + { + "epoch": 10.03, + "grad_norm": 2.2495388984680176, + "learning_rate": 8.699195979899499e-06, + "loss": 0.0396, + "step": 13450 + }, + { + "epoch": 10.05, + "grad_norm": 2.543123245239258, + "learning_rate": 8.696683417085428e-06, + "loss": 0.0407, + "step": 13475 + }, + { + "epoch": 10.07, + "grad_norm": 2.9471771717071533, + "learning_rate": 8.694170854271357e-06, + "loss": 0.0416, + "step": 13500 + }, + { + "epoch": 10.09, + "grad_norm": 2.6326029300689697, + "learning_rate": 8.691658291457287e-06, + "loss": 0.0403, + "step": 13525 + }, + { + "epoch": 10.1, + "grad_norm": 2.2728137969970703, + "learning_rate": 8.689145728643218e-06, + "loss": 0.0394, + "step": 13550 + }, + { + "epoch": 10.12, + "grad_norm": 2.9023725986480713, + "learning_rate": 8.686633165829147e-06, + "loss": 0.0409, + "step": 13575 + }, + { + "epoch": 10.14, + "grad_norm": 2.8963992595672607, + "learning_rate": 8.684120603015076e-06, + "loss": 0.0397, + "step": 13600 + }, + { + "epoch": 10.16, + "grad_norm": 2.5825443267822266, + "learning_rate": 8.681608040201006e-06, + "loss": 0.0417, + "step": 13625 + }, + { + "epoch": 10.18, + "grad_norm": 2.352583885192871, + "learning_rate": 8.679095477386935e-06, + "loss": 0.0407, + "step": 13650 + }, + { + "epoch": 10.2, + "grad_norm": 2.5204579830169678, + "learning_rate": 8.676582914572866e-06, + "loss": 0.0401, + "step": 13675 + }, + { + "epoch": 10.22, + "grad_norm": 2.3940703868865967, + "learning_rate": 8.674070351758794e-06, + "loss": 0.0411, + "step": 13700 + }, + { + "epoch": 10.23, + "grad_norm": 2.2686827182769775, + "learning_rate": 8.671557788944725e-06, + "loss": 0.0385, + "step": 13725 + }, + { + "epoch": 10.25, + "grad_norm": 2.7726383209228516, + "learning_rate": 8.669045226130654e-06, + "loss": 0.0398, + "step": 13750 + }, + { + "epoch": 10.27, + "grad_norm": 2.424121379852295, + "learning_rate": 8.666532663316583e-06, + "loss": 0.0431, + "step": 13775 + }, + { + "epoch": 10.29, + "grad_norm": 2.94406795501709, + "learning_rate": 8.664020100502514e-06, + "loss": 0.0423, + "step": 13800 + }, + { + "epoch": 10.31, + "grad_norm": 2.6730449199676514, + "learning_rate": 8.661507537688444e-06, + "loss": 0.0412, + "step": 13825 + }, + { + "epoch": 10.33, + "grad_norm": 2.556748390197754, + "learning_rate": 8.658994974874373e-06, + "loss": 0.0403, + "step": 13850 + }, + { + "epoch": 10.35, + "grad_norm": 2.869516134262085, + "learning_rate": 8.656482412060302e-06, + "loss": 0.0419, + "step": 13875 + }, + { + "epoch": 10.37, + "grad_norm": 2.7538869380950928, + "learning_rate": 8.653969849246231e-06, + "loss": 0.0458, + "step": 13900 + }, + { + "epoch": 10.38, + "grad_norm": 2.456782817840576, + "learning_rate": 8.65145728643216e-06, + "loss": 0.0439, + "step": 13925 + }, + { + "epoch": 10.4, + "grad_norm": 2.6303584575653076, + "learning_rate": 8.648944723618092e-06, + "loss": 0.0411, + "step": 13950 + }, + { + "epoch": 10.42, + "grad_norm": 2.5896425247192383, + "learning_rate": 8.64643216080402e-06, + "loss": 0.0432, + "step": 13975 + }, + { + "epoch": 10.44, + "grad_norm": 2.3941783905029297, + "learning_rate": 8.64391959798995e-06, + "loss": 0.0447, + "step": 14000 + }, + { + "epoch": 10.44, + "eval_loss": 0.10543013364076614, + "eval_runtime": 983.7235, + "eval_samples_per_second": 1.525, + "eval_steps_per_second": 1.525, + "eval_wer": 31.940068613036477, + "step": 14000 + }, + { + "epoch": 10.46, + "grad_norm": 2.4298009872436523, + "learning_rate": 8.64140703517588e-06, + "loss": 0.0429, + "step": 14025 + }, + { + "epoch": 10.48, + "grad_norm": 2.5128190517425537, + "learning_rate": 8.638894472361809e-06, + "loss": 0.0435, + "step": 14050 + }, + { + "epoch": 10.5, + "grad_norm": 2.113642454147339, + "learning_rate": 8.63638190954774e-06, + "loss": 0.0405, + "step": 14075 + }, + { + "epoch": 10.51, + "grad_norm": 2.33547306060791, + "learning_rate": 8.63386934673367e-06, + "loss": 0.0409, + "step": 14100 + }, + { + "epoch": 10.53, + "grad_norm": 2.553858995437622, + "learning_rate": 8.631356783919599e-06, + "loss": 0.0417, + "step": 14125 + }, + { + "epoch": 10.55, + "grad_norm": 2.5846853256225586, + "learning_rate": 8.628844221105528e-06, + "loss": 0.0429, + "step": 14150 + }, + { + "epoch": 10.57, + "grad_norm": 2.5041072368621826, + "learning_rate": 8.626331658291457e-06, + "loss": 0.043, + "step": 14175 + }, + { + "epoch": 10.59, + "grad_norm": 2.587430238723755, + "learning_rate": 8.623819095477388e-06, + "loss": 0.0432, + "step": 14200 + }, + { + "epoch": 10.61, + "grad_norm": 2.6839005947113037, + "learning_rate": 8.621306532663318e-06, + "loss": 0.0425, + "step": 14225 + }, + { + "epoch": 10.63, + "grad_norm": 2.552704095840454, + "learning_rate": 8.618793969849247e-06, + "loss": 0.0421, + "step": 14250 + }, + { + "epoch": 10.65, + "grad_norm": 2.1958324909210205, + "learning_rate": 8.616281407035176e-06, + "loss": 0.0408, + "step": 14275 + }, + { + "epoch": 10.66, + "grad_norm": 2.64367938041687, + "learning_rate": 8.613768844221106e-06, + "loss": 0.0427, + "step": 14300 + }, + { + "epoch": 10.68, + "grad_norm": 2.889110565185547, + "learning_rate": 8.611256281407035e-06, + "loss": 0.0418, + "step": 14325 + }, + { + "epoch": 10.7, + "grad_norm": 3.006659984588623, + "learning_rate": 8.608743718592966e-06, + "loss": 0.0424, + "step": 14350 + }, + { + "epoch": 10.72, + "grad_norm": 2.810326099395752, + "learning_rate": 8.606231155778895e-06, + "loss": 0.043, + "step": 14375 + }, + { + "epoch": 10.74, + "grad_norm": 2.8459999561309814, + "learning_rate": 8.603718592964825e-06, + "loss": 0.0426, + "step": 14400 + }, + { + "epoch": 10.76, + "grad_norm": 3.0856242179870605, + "learning_rate": 8.601206030150756e-06, + "loss": 0.0408, + "step": 14425 + }, + { + "epoch": 10.78, + "grad_norm": 2.6378345489501953, + "learning_rate": 8.598693467336683e-06, + "loss": 0.0437, + "step": 14450 + }, + { + "epoch": 10.79, + "grad_norm": 2.8206963539123535, + "learning_rate": 8.596180904522614e-06, + "loss": 0.0443, + "step": 14475 + }, + { + "epoch": 10.81, + "grad_norm": 2.7250428199768066, + "learning_rate": 8.593668341708544e-06, + "loss": 0.0438, + "step": 14500 + }, + { + "epoch": 10.83, + "grad_norm": 2.585697889328003, + "learning_rate": 8.591155778894473e-06, + "loss": 0.0432, + "step": 14525 + }, + { + "epoch": 10.85, + "grad_norm": 2.4110260009765625, + "learning_rate": 8.588643216080402e-06, + "loss": 0.0417, + "step": 14550 + }, + { + "epoch": 10.87, + "grad_norm": 2.242178201675415, + "learning_rate": 8.586130653266332e-06, + "loss": 0.0405, + "step": 14575 + }, + { + "epoch": 10.89, + "grad_norm": 2.80928373336792, + "learning_rate": 8.583618090452261e-06, + "loss": 0.043, + "step": 14600 + }, + { + "epoch": 10.91, + "grad_norm": 2.9590799808502197, + "learning_rate": 8.581105527638192e-06, + "loss": 0.0437, + "step": 14625 + }, + { + "epoch": 10.92, + "grad_norm": 2.6904921531677246, + "learning_rate": 8.578592964824121e-06, + "loss": 0.0421, + "step": 14650 + }, + { + "epoch": 10.94, + "grad_norm": 2.5186736583709717, + "learning_rate": 8.57608040201005e-06, + "loss": 0.0434, + "step": 14675 + }, + { + "epoch": 10.96, + "grad_norm": 3.10874342918396, + "learning_rate": 8.573567839195982e-06, + "loss": 0.0437, + "step": 14700 + }, + { + "epoch": 10.98, + "grad_norm": 2.724733591079712, + "learning_rate": 8.57105527638191e-06, + "loss": 0.0408, + "step": 14725 + }, + { + "epoch": 11.0, + "grad_norm": 2.8046507835388184, + "learning_rate": 8.56854271356784e-06, + "loss": 0.0429, + "step": 14750 + }, + { + "epoch": 11.02, + "grad_norm": 2.272761106491089, + "learning_rate": 8.56603015075377e-06, + "loss": 0.0339, + "step": 14775 + }, + { + "epoch": 11.04, + "grad_norm": 2.357423782348633, + "learning_rate": 8.563517587939699e-06, + "loss": 0.0333, + "step": 14800 + }, + { + "epoch": 11.06, + "grad_norm": 2.353254556655884, + "learning_rate": 8.56100502512563e-06, + "loss": 0.0348, + "step": 14825 + }, + { + "epoch": 11.07, + "grad_norm": 2.3883306980133057, + "learning_rate": 8.558492462311558e-06, + "loss": 0.0344, + "step": 14850 + }, + { + "epoch": 11.09, + "grad_norm": 2.1833341121673584, + "learning_rate": 8.555979899497489e-06, + "loss": 0.0328, + "step": 14875 + }, + { + "epoch": 11.11, + "grad_norm": 2.0716607570648193, + "learning_rate": 8.553467336683418e-06, + "loss": 0.0344, + "step": 14900 + }, + { + "epoch": 11.13, + "grad_norm": 2.0776782035827637, + "learning_rate": 8.550954773869347e-06, + "loss": 0.0361, + "step": 14925 + }, + { + "epoch": 11.15, + "grad_norm": 2.598073720932007, + "learning_rate": 8.548442211055277e-06, + "loss": 0.0351, + "step": 14950 + }, + { + "epoch": 11.17, + "grad_norm": 2.3758792877197266, + "learning_rate": 8.545929648241208e-06, + "loss": 0.0354, + "step": 14975 + }, + { + "epoch": 11.19, + "grad_norm": 2.5444610118865967, + "learning_rate": 8.543417085427135e-06, + "loss": 0.0362, + "step": 15000 + }, + { + "epoch": 11.19, + "eval_loss": 0.11122602224349976, + "eval_runtime": 985.633, + "eval_samples_per_second": 1.522, + "eval_steps_per_second": 1.522, + "eval_wer": 33.26332003080586, + "step": 15000 + }, + { + "epoch": 11.2, + "grad_norm": 2.4064242839813232, + "learning_rate": 8.540904522613066e-06, + "loss": 0.0358, + "step": 15025 + }, + { + "epoch": 11.22, + "grad_norm": 2.600449323654175, + "learning_rate": 8.538391959798996e-06, + "loss": 0.0357, + "step": 15050 + }, + { + "epoch": 11.24, + "grad_norm": 2.550495147705078, + "learning_rate": 8.535879396984925e-06, + "loss": 0.0375, + "step": 15075 + }, + { + "epoch": 11.26, + "grad_norm": 2.295846939086914, + "learning_rate": 8.533366834170856e-06, + "loss": 0.035, + "step": 15100 + }, + { + "epoch": 11.28, + "grad_norm": 2.265958309173584, + "learning_rate": 8.530854271356784e-06, + "loss": 0.0349, + "step": 15125 + }, + { + "epoch": 11.3, + "grad_norm": 2.2195990085601807, + "learning_rate": 8.528341708542715e-06, + "loss": 0.0353, + "step": 15150 + }, + { + "epoch": 11.32, + "grad_norm": 2.561805009841919, + "learning_rate": 8.525929648241206e-06, + "loss": 0.0374, + "step": 15175 + }, + { + "epoch": 11.33, + "grad_norm": 2.560361862182617, + "learning_rate": 8.523417085427135e-06, + "loss": 0.0356, + "step": 15200 + }, + { + "epoch": 11.35, + "grad_norm": 2.919983148574829, + "learning_rate": 8.520904522613066e-06, + "loss": 0.0358, + "step": 15225 + }, + { + "epoch": 11.37, + "grad_norm": 2.575775146484375, + "learning_rate": 8.518391959798996e-06, + "loss": 0.0371, + "step": 15250 + }, + { + "epoch": 11.39, + "grad_norm": 2.4455316066741943, + "learning_rate": 8.515879396984925e-06, + "loss": 0.0362, + "step": 15275 + }, + { + "epoch": 11.41, + "grad_norm": 2.1297264099121094, + "learning_rate": 8.513366834170856e-06, + "loss": 0.0359, + "step": 15300 + }, + { + "epoch": 11.43, + "grad_norm": 2.219409704208374, + "learning_rate": 8.510854271356784e-06, + "loss": 0.0364, + "step": 15325 + }, + { + "epoch": 11.45, + "grad_norm": 2.7272024154663086, + "learning_rate": 8.508341708542715e-06, + "loss": 0.036, + "step": 15350 + }, + { + "epoch": 11.47, + "grad_norm": 2.3709800243377686, + "learning_rate": 8.505829145728644e-06, + "loss": 0.0362, + "step": 15375 + }, + { + "epoch": 11.48, + "grad_norm": 2.563309669494629, + "learning_rate": 8.503316582914573e-06, + "loss": 0.0358, + "step": 15400 + }, + { + "epoch": 11.5, + "grad_norm": 2.762065887451172, + "learning_rate": 8.500804020100504e-06, + "loss": 0.037, + "step": 15425 + }, + { + "epoch": 11.52, + "grad_norm": 2.4129960536956787, + "learning_rate": 8.498291457286432e-06, + "loss": 0.0354, + "step": 15450 + }, + { + "epoch": 11.54, + "grad_norm": 2.785916328430176, + "learning_rate": 8.495778894472363e-06, + "loss": 0.0371, + "step": 15475 + }, + { + "epoch": 11.56, + "grad_norm": 2.4233784675598145, + "learning_rate": 8.493266331658292e-06, + "loss": 0.0372, + "step": 15500 + }, + { + "epoch": 11.58, + "grad_norm": 2.582101821899414, + "learning_rate": 8.490753768844222e-06, + "loss": 0.0354, + "step": 15525 + }, + { + "epoch": 11.6, + "grad_norm": 2.580369710922241, + "learning_rate": 8.488241206030151e-06, + "loss": 0.0369, + "step": 15550 + }, + { + "epoch": 11.61, + "grad_norm": 2.5387279987335205, + "learning_rate": 8.485728643216082e-06, + "loss": 0.0373, + "step": 15575 + }, + { + "epoch": 11.63, + "grad_norm": 2.291714668273926, + "learning_rate": 8.48321608040201e-06, + "loss": 0.0362, + "step": 15600 + }, + { + "epoch": 11.65, + "grad_norm": 3.2521932125091553, + "learning_rate": 8.48070351758794e-06, + "loss": 0.0354, + "step": 15625 + }, + { + "epoch": 11.67, + "grad_norm": 2.6111679077148438, + "learning_rate": 8.47819095477387e-06, + "loss": 0.0364, + "step": 15650 + }, + { + "epoch": 11.69, + "grad_norm": 2.853839874267578, + "learning_rate": 8.4756783919598e-06, + "loss": 0.0388, + "step": 15675 + }, + { + "epoch": 11.71, + "grad_norm": 2.6592774391174316, + "learning_rate": 8.47316582914573e-06, + "loss": 0.0371, + "step": 15700 + }, + { + "epoch": 11.73, + "grad_norm": 2.529933452606201, + "learning_rate": 8.470653266331658e-06, + "loss": 0.0377, + "step": 15725 + }, + { + "epoch": 11.74, + "grad_norm": 2.2929601669311523, + "learning_rate": 8.468140703517589e-06, + "loss": 0.0382, + "step": 15750 + }, + { + "epoch": 11.76, + "grad_norm": 2.8813633918762207, + "learning_rate": 8.465628140703518e-06, + "loss": 0.0374, + "step": 15775 + }, + { + "epoch": 11.78, + "grad_norm": 2.4930412769317627, + "learning_rate": 8.463115577889448e-06, + "loss": 0.0372, + "step": 15800 + }, + { + "epoch": 11.8, + "grad_norm": 3.093811273574829, + "learning_rate": 8.460603015075377e-06, + "loss": 0.0368, + "step": 15825 + }, + { + "epoch": 11.82, + "grad_norm": 2.7937066555023193, + "learning_rate": 8.458090452261308e-06, + "loss": 0.0373, + "step": 15850 + }, + { + "epoch": 11.84, + "grad_norm": 2.5498275756835938, + "learning_rate": 8.455577889447237e-06, + "loss": 0.0372, + "step": 15875 + }, + { + "epoch": 11.86, + "grad_norm": 2.395296335220337, + "learning_rate": 8.453065326633167e-06, + "loss": 0.0359, + "step": 15900 + }, + { + "epoch": 11.88, + "grad_norm": 2.2825675010681152, + "learning_rate": 8.450552763819096e-06, + "loss": 0.0339, + "step": 15925 + }, + { + "epoch": 11.89, + "grad_norm": 2.5040225982666016, + "learning_rate": 8.448040201005025e-06, + "loss": 0.0357, + "step": 15950 + }, + { + "epoch": 11.91, + "grad_norm": 2.358630418777466, + "learning_rate": 8.445527638190956e-06, + "loss": 0.0369, + "step": 15975 + }, + { + "epoch": 11.93, + "grad_norm": 2.525726318359375, + "learning_rate": 8.443015075376884e-06, + "loss": 0.0367, + "step": 16000 + }, + { + "epoch": 11.93, + "eval_loss": 0.11301389336585999, + "eval_runtime": 966.6668, + "eval_samples_per_second": 1.552, + "eval_steps_per_second": 1.552, + "eval_wer": 32.2201218231464, + "step": 16000 + }, + { + "epoch": 11.95, + "grad_norm": 2.822854518890381, + "learning_rate": 8.440502512562815e-06, + "loss": 0.0362, + "step": 16025 + }, + { + "epoch": 11.97, + "grad_norm": 2.5917813777923584, + "learning_rate": 8.437989949748744e-06, + "loss": 0.0391, + "step": 16050 + }, + { + "epoch": 11.99, + "grad_norm": 2.6897950172424316, + "learning_rate": 8.435477386934674e-06, + "loss": 0.0374, + "step": 16075 + }, + { + "epoch": 12.01, + "grad_norm": 2.37255859375, + "learning_rate": 8.432964824120605e-06, + "loss": 0.0351, + "step": 16100 + }, + { + "epoch": 12.02, + "grad_norm": 1.9258285760879517, + "learning_rate": 8.430452261306534e-06, + "loss": 0.0297, + "step": 16125 + }, + { + "epoch": 12.04, + "grad_norm": 2.0240468978881836, + "learning_rate": 8.427939698492463e-06, + "loss": 0.0298, + "step": 16150 + }, + { + "epoch": 12.06, + "grad_norm": 2.1883256435394287, + "learning_rate": 8.425427135678393e-06, + "loss": 0.0293, + "step": 16175 + }, + { + "epoch": 12.08, + "grad_norm": 2.712958574295044, + "learning_rate": 8.422914572864322e-06, + "loss": 0.0285, + "step": 16200 + }, + { + "epoch": 12.1, + "grad_norm": 2.3346736431121826, + "learning_rate": 8.420402010050251e-06, + "loss": 0.028, + "step": 16225 + }, + { + "epoch": 12.12, + "grad_norm": 2.317542791366577, + "learning_rate": 8.417889447236182e-06, + "loss": 0.0296, + "step": 16250 + }, + { + "epoch": 12.14, + "grad_norm": 2.213181257247925, + "learning_rate": 8.415376884422112e-06, + "loss": 0.0278, + "step": 16275 + }, + { + "epoch": 12.16, + "grad_norm": 2.4293649196624756, + "learning_rate": 8.412864321608041e-06, + "loss": 0.0306, + "step": 16300 + }, + { + "epoch": 12.17, + "grad_norm": 2.542948007583618, + "learning_rate": 8.41035175879397e-06, + "loss": 0.0309, + "step": 16325 + }, + { + "epoch": 12.19, + "grad_norm": 2.207859992980957, + "learning_rate": 8.4078391959799e-06, + "loss": 0.0324, + "step": 16350 + }, + { + "epoch": 12.21, + "grad_norm": 2.201171398162842, + "learning_rate": 8.40532663316583e-06, + "loss": 0.0296, + "step": 16375 + }, + { + "epoch": 12.23, + "grad_norm": 2.472698211669922, + "learning_rate": 8.40281407035176e-06, + "loss": 0.0298, + "step": 16400 + }, + { + "epoch": 12.25, + "grad_norm": 2.2383639812469482, + "learning_rate": 8.40030150753769e-06, + "loss": 0.0297, + "step": 16425 + }, + { + "epoch": 12.27, + "grad_norm": 2.404345750808716, + "learning_rate": 8.397788944723619e-06, + "loss": 0.0303, + "step": 16450 + }, + { + "epoch": 12.29, + "grad_norm": 2.5482709407806396, + "learning_rate": 8.395276381909548e-06, + "loss": 0.03, + "step": 16475 + }, + { + "epoch": 12.3, + "grad_norm": 2.250312566757202, + "learning_rate": 8.392763819095479e-06, + "loss": 0.0308, + "step": 16500 + }, + { + "epoch": 12.32, + "grad_norm": 2.4988269805908203, + "learning_rate": 8.390251256281408e-06, + "loss": 0.0306, + "step": 16525 + }, + { + "epoch": 12.34, + "grad_norm": 2.4547650814056396, + "learning_rate": 8.387738693467338e-06, + "loss": 0.0309, + "step": 16550 + }, + { + "epoch": 12.36, + "grad_norm": 2.4281766414642334, + "learning_rate": 8.385226130653267e-06, + "loss": 0.031, + "step": 16575 + }, + { + "epoch": 12.38, + "grad_norm": 2.3334124088287354, + "learning_rate": 8.382713567839196e-06, + "loss": 0.0292, + "step": 16600 + }, + { + "epoch": 12.4, + "grad_norm": 2.224034070968628, + "learning_rate": 8.380201005025126e-06, + "loss": 0.0301, + "step": 16625 + }, + { + "epoch": 12.42, + "grad_norm": 2.313802719116211, + "learning_rate": 8.377688442211057e-06, + "loss": 0.0317, + "step": 16650 + }, + { + "epoch": 12.43, + "grad_norm": 2.5924999713897705, + "learning_rate": 8.375175879396986e-06, + "loss": 0.0318, + "step": 16675 + }, + { + "epoch": 12.45, + "grad_norm": 2.385542392730713, + "learning_rate": 8.372663316582915e-06, + "loss": 0.0311, + "step": 16700 + }, + { + "epoch": 12.47, + "grad_norm": 2.573235273361206, + "learning_rate": 8.370150753768845e-06, + "loss": 0.0312, + "step": 16725 + }, + { + "epoch": 12.49, + "grad_norm": 2.3993160724639893, + "learning_rate": 8.367638190954774e-06, + "loss": 0.0314, + "step": 16750 + }, + { + "epoch": 12.51, + "grad_norm": 2.366703510284424, + "learning_rate": 8.365125628140705e-06, + "loss": 0.03, + "step": 16775 + }, + { + "epoch": 12.53, + "grad_norm": 2.68515944480896, + "learning_rate": 8.362613065326634e-06, + "loss": 0.0319, + "step": 16800 + }, + { + "epoch": 12.55, + "grad_norm": 2.3880069255828857, + "learning_rate": 8.360100502512563e-06, + "loss": 0.0297, + "step": 16825 + }, + { + "epoch": 12.57, + "grad_norm": 2.3045992851257324, + "learning_rate": 8.357587939698493e-06, + "loss": 0.03, + "step": 16850 + }, + { + "epoch": 12.58, + "grad_norm": 2.5263333320617676, + "learning_rate": 8.355075376884422e-06, + "loss": 0.0316, + "step": 16875 + }, + { + "epoch": 12.6, + "grad_norm": 2.6109235286712646, + "learning_rate": 8.352562814070353e-06, + "loss": 0.0308, + "step": 16900 + }, + { + "epoch": 12.62, + "grad_norm": 2.5221216678619385, + "learning_rate": 8.350050251256282e-06, + "loss": 0.0319, + "step": 16925 + }, + { + "epoch": 12.64, + "grad_norm": 2.394711494445801, + "learning_rate": 8.347537688442212e-06, + "loss": 0.0324, + "step": 16950 + }, + { + "epoch": 12.66, + "grad_norm": 2.415128469467163, + "learning_rate": 8.345025125628141e-06, + "loss": 0.0322, + "step": 16975 + }, + { + "epoch": 12.68, + "grad_norm": 2.6447741985321045, + "learning_rate": 8.34251256281407e-06, + "loss": 0.0316, + "step": 17000 + }, + { + "epoch": 12.68, + "eval_loss": 0.11787088960409164, + "eval_runtime": 964.7629, + "eval_samples_per_second": 1.555, + "eval_steps_per_second": 1.555, + "eval_wer": 32.69621228033326, + "step": 17000 + }, + { + "epoch": 12.7, + "grad_norm": 2.299283027648926, + "learning_rate": 8.34e-06, + "loss": 0.0338, + "step": 17025 + }, + { + "epoch": 12.71, + "grad_norm": 2.6792774200439453, + "learning_rate": 8.33748743718593e-06, + "loss": 0.0323, + "step": 17050 + }, + { + "epoch": 12.73, + "grad_norm": 2.6819100379943848, + "learning_rate": 8.33497487437186e-06, + "loss": 0.0302, + "step": 17075 + }, + { + "epoch": 12.75, + "grad_norm": 2.3408172130584717, + "learning_rate": 8.33246231155779e-06, + "loss": 0.0313, + "step": 17100 + }, + { + "epoch": 12.77, + "grad_norm": 2.55956768989563, + "learning_rate": 8.32994974874372e-06, + "loss": 0.031, + "step": 17125 + }, + { + "epoch": 12.79, + "grad_norm": 2.3811748027801514, + "learning_rate": 8.327437185929648e-06, + "loss": 0.0328, + "step": 17150 + }, + { + "epoch": 12.81, + "grad_norm": 2.9043076038360596, + "learning_rate": 8.324924623115579e-06, + "loss": 0.0329, + "step": 17175 + }, + { + "epoch": 12.83, + "grad_norm": 2.5691237449645996, + "learning_rate": 8.322412060301508e-06, + "loss": 0.0321, + "step": 17200 + }, + { + "epoch": 12.84, + "grad_norm": 2.2787132263183594, + "learning_rate": 8.319899497487438e-06, + "loss": 0.0318, + "step": 17225 + }, + { + "epoch": 12.86, + "grad_norm": 2.4713504314422607, + "learning_rate": 8.317386934673367e-06, + "loss": 0.0311, + "step": 17250 + }, + { + "epoch": 12.88, + "grad_norm": 2.348418712615967, + "learning_rate": 8.314874371859298e-06, + "loss": 0.0323, + "step": 17275 + }, + { + "epoch": 12.9, + "grad_norm": 2.223663330078125, + "learning_rate": 8.312361809045226e-06, + "loss": 0.0313, + "step": 17300 + }, + { + "epoch": 12.92, + "grad_norm": 2.386359453201294, + "learning_rate": 8.309849246231157e-06, + "loss": 0.0325, + "step": 17325 + }, + { + "epoch": 12.94, + "grad_norm": 3.227418899536133, + "learning_rate": 8.307336683417086e-06, + "loss": 0.0323, + "step": 17350 + }, + { + "epoch": 12.96, + "grad_norm": 2.8610100746154785, + "learning_rate": 8.304824120603015e-06, + "loss": 0.0325, + "step": 17375 + }, + { + "epoch": 12.98, + "grad_norm": 2.4445221424102783, + "learning_rate": 8.302311557788946e-06, + "loss": 0.0315, + "step": 17400 + }, + { + "epoch": 12.99, + "grad_norm": 2.7373239994049072, + "learning_rate": 8.299798994974874e-06, + "loss": 0.0317, + "step": 17425 + }, + { + "epoch": 13.01, + "grad_norm": 2.7063283920288086, + "learning_rate": 8.297286432160805e-06, + "loss": 0.0258, + "step": 17450 + }, + { + "epoch": 13.03, + "grad_norm": 2.675114393234253, + "learning_rate": 8.294773869346734e-06, + "loss": 0.025, + "step": 17475 + }, + { + "epoch": 13.05, + "grad_norm": 1.8393616676330566, + "learning_rate": 8.292261306532664e-06, + "loss": 0.0245, + "step": 17500 + }, + { + "epoch": 13.07, + "grad_norm": 2.294482946395874, + "learning_rate": 8.289748743718595e-06, + "loss": 0.0239, + "step": 17525 + }, + { + "epoch": 13.09, + "grad_norm": 2.205127716064453, + "learning_rate": 8.287236180904524e-06, + "loss": 0.0257, + "step": 17550 + }, + { + "epoch": 13.11, + "grad_norm": 2.368788480758667, + "learning_rate": 8.284723618090453e-06, + "loss": 0.0235, + "step": 17575 + }, + { + "epoch": 13.12, + "grad_norm": 2.4354734420776367, + "learning_rate": 8.282211055276383e-06, + "loss": 0.0251, + "step": 17600 + }, + { + "epoch": 13.14, + "grad_norm": 2.027024745941162, + "learning_rate": 8.279698492462312e-06, + "loss": 0.0248, + "step": 17625 + }, + { + "epoch": 13.16, + "grad_norm": 2.4112343788146973, + "learning_rate": 8.277185929648241e-06, + "loss": 0.0263, + "step": 17650 + }, + { + "epoch": 13.18, + "grad_norm": 2.392082929611206, + "learning_rate": 8.274673366834172e-06, + "loss": 0.0252, + "step": 17675 + }, + { + "epoch": 13.2, + "grad_norm": 2.2729547023773193, + "learning_rate": 8.2721608040201e-06, + "loss": 0.0238, + "step": 17700 + }, + { + "epoch": 13.22, + "grad_norm": 2.178835153579712, + "learning_rate": 8.269648241206031e-06, + "loss": 0.0257, + "step": 17725 + }, + { + "epoch": 13.24, + "grad_norm": 2.67167067527771, + "learning_rate": 8.26713567839196e-06, + "loss": 0.027, + "step": 17750 + }, + { + "epoch": 13.26, + "grad_norm": 2.5054757595062256, + "learning_rate": 8.26462311557789e-06, + "loss": 0.0258, + "step": 17775 + }, + { + "epoch": 13.27, + "grad_norm": 2.4703803062438965, + "learning_rate": 8.26211055276382e-06, + "loss": 0.024, + "step": 17800 + }, + { + "epoch": 13.29, + "grad_norm": 3.0355288982391357, + "learning_rate": 8.25959798994975e-06, + "loss": 0.0241, + "step": 17825 + }, + { + "epoch": 13.31, + "grad_norm": 2.6055102348327637, + "learning_rate": 8.25708542713568e-06, + "loss": 0.026, + "step": 17850 + }, + { + "epoch": 13.33, + "grad_norm": 2.566568613052368, + "learning_rate": 8.254572864321609e-06, + "loss": 0.0273, + "step": 17875 + }, + { + "epoch": 13.35, + "grad_norm": 2.46081805229187, + "learning_rate": 8.252060301507538e-06, + "loss": 0.0254, + "step": 17900 + }, + { + "epoch": 13.37, + "grad_norm": 2.4750471115112305, + "learning_rate": 8.249547738693467e-06, + "loss": 0.027, + "step": 17925 + }, + { + "epoch": 13.39, + "grad_norm": 2.1886961460113525, + "learning_rate": 8.247035175879398e-06, + "loss": 0.0263, + "step": 17950 + }, + { + "epoch": 13.4, + "grad_norm": 2.4685685634613037, + "learning_rate": 8.244522613065328e-06, + "loss": 0.0266, + "step": 17975 + }, + { + "epoch": 13.42, + "grad_norm": 2.1025149822235107, + "learning_rate": 8.242010050251257e-06, + "loss": 0.0253, + "step": 18000 + }, + { + "epoch": 13.42, + "eval_loss": 0.12431913614273071, + "eval_runtime": 960.0947, + "eval_samples_per_second": 1.562, + "eval_steps_per_second": 1.562, + "eval_wer": 33.1092907652454, + "step": 18000 + }, + { + "epoch": 13.44, + "grad_norm": 2.4917831420898438, + "learning_rate": 8.239497487437186e-06, + "loss": 0.0273, + "step": 18025 + }, + { + "epoch": 13.46, + "grad_norm": 2.939317464828491, + "learning_rate": 8.236984924623116e-06, + "loss": 0.0272, + "step": 18050 + }, + { + "epoch": 13.48, + "grad_norm": 2.603675603866577, + "learning_rate": 8.234472361809047e-06, + "loss": 0.0276, + "step": 18075 + }, + { + "epoch": 13.5, + "grad_norm": 2.719520092010498, + "learning_rate": 8.231959798994976e-06, + "loss": 0.026, + "step": 18100 + }, + { + "epoch": 13.52, + "grad_norm": 2.309699773788452, + "learning_rate": 8.229447236180905e-06, + "loss": 0.0249, + "step": 18125 + }, + { + "epoch": 13.53, + "grad_norm": 3.0950756072998047, + "learning_rate": 8.226934673366835e-06, + "loss": 0.0248, + "step": 18150 + }, + { + "epoch": 13.55, + "grad_norm": 2.453995704650879, + "learning_rate": 8.224422110552764e-06, + "loss": 0.0266, + "step": 18175 + }, + { + "epoch": 13.57, + "grad_norm": 2.563749313354492, + "learning_rate": 8.221909547738695e-06, + "loss": 0.0258, + "step": 18200 + }, + { + "epoch": 13.59, + "grad_norm": 2.8392231464385986, + "learning_rate": 8.219396984924624e-06, + "loss": 0.0263, + "step": 18225 + }, + { + "epoch": 13.61, + "grad_norm": 2.0023348331451416, + "learning_rate": 8.216884422110554e-06, + "loss": 0.0265, + "step": 18250 + }, + { + "epoch": 13.63, + "grad_norm": 2.600170850753784, + "learning_rate": 8.214371859296483e-06, + "loss": 0.0285, + "step": 18275 + }, + { + "epoch": 13.65, + "grad_norm": 2.9098427295684814, + "learning_rate": 8.211859296482412e-06, + "loss": 0.0265, + "step": 18300 + }, + { + "epoch": 13.67, + "grad_norm": 2.0197994709014893, + "learning_rate": 8.209346733668342e-06, + "loss": 0.026, + "step": 18325 + }, + { + "epoch": 13.68, + "grad_norm": 2.754202127456665, + "learning_rate": 8.206834170854273e-06, + "loss": 0.0258, + "step": 18350 + }, + { + "epoch": 13.7, + "grad_norm": 3.1637275218963623, + "learning_rate": 8.204321608040202e-06, + "loss": 0.026, + "step": 18375 + }, + { + "epoch": 13.72, + "grad_norm": 2.467360734939575, + "learning_rate": 8.201809045226131e-06, + "loss": 0.0271, + "step": 18400 + }, + { + "epoch": 13.74, + "grad_norm": 3.000537633895874, + "learning_rate": 8.19929648241206e-06, + "loss": 0.0281, + "step": 18425 + }, + { + "epoch": 13.76, + "grad_norm": 2.671025276184082, + "learning_rate": 8.19678391959799e-06, + "loss": 0.0277, + "step": 18450 + }, + { + "epoch": 13.78, + "grad_norm": 2.780348300933838, + "learning_rate": 8.194271356783921e-06, + "loss": 0.0258, + "step": 18475 + }, + { + "epoch": 13.8, + "grad_norm": 2.078457832336426, + "learning_rate": 8.19175879396985e-06, + "loss": 0.0252, + "step": 18500 + }, + { + "epoch": 13.81, + "grad_norm": 2.70040225982666, + "learning_rate": 8.18924623115578e-06, + "loss": 0.0272, + "step": 18525 + }, + { + "epoch": 13.83, + "grad_norm": 2.292285680770874, + "learning_rate": 8.186733668341709e-06, + "loss": 0.0269, + "step": 18550 + }, + { + "epoch": 13.85, + "grad_norm": 2.305100202560425, + "learning_rate": 8.184221105527638e-06, + "loss": 0.027, + "step": 18575 + }, + { + "epoch": 13.87, + "grad_norm": 2.794457197189331, + "learning_rate": 8.18170854271357e-06, + "loss": 0.0295, + "step": 18600 + }, + { + "epoch": 13.89, + "grad_norm": 2.753180980682373, + "learning_rate": 8.179195979899498e-06, + "loss": 0.0275, + "step": 18625 + }, + { + "epoch": 13.91, + "grad_norm": 2.5390548706054688, + "learning_rate": 8.176683417085428e-06, + "loss": 0.0271, + "step": 18650 + }, + { + "epoch": 13.93, + "grad_norm": 2.4420042037963867, + "learning_rate": 8.174170854271357e-06, + "loss": 0.027, + "step": 18675 + }, + { + "epoch": 13.94, + "grad_norm": 2.1948373317718506, + "learning_rate": 8.171658291457286e-06, + "loss": 0.0275, + "step": 18700 + }, + { + "epoch": 13.96, + "grad_norm": 2.374937057495117, + "learning_rate": 8.169145728643216e-06, + "loss": 0.027, + "step": 18725 + }, + { + "epoch": 13.98, + "grad_norm": 2.525052547454834, + "learning_rate": 8.166633165829147e-06, + "loss": 0.0289, + "step": 18750 + }, + { + "epoch": 14.0, + "grad_norm": 1.8312453031539917, + "learning_rate": 8.164120603015076e-06, + "loss": 0.0284, + "step": 18775 + }, + { + "epoch": 14.02, + "grad_norm": 1.8313333988189697, + "learning_rate": 8.161608040201005e-06, + "loss": 0.0213, + "step": 18800 + }, + { + "epoch": 14.04, + "grad_norm": 2.235586404800415, + "learning_rate": 8.159095477386936e-06, + "loss": 0.0214, + "step": 18825 + }, + { + "epoch": 14.06, + "grad_norm": 2.9958503246307373, + "learning_rate": 8.156582914572864e-06, + "loss": 0.0205, + "step": 18850 + }, + { + "epoch": 14.08, + "grad_norm": 2.11965274810791, + "learning_rate": 8.154070351758795e-06, + "loss": 0.0213, + "step": 18875 + }, + { + "epoch": 14.09, + "grad_norm": 2.2701382637023926, + "learning_rate": 8.151557788944724e-06, + "loss": 0.021, + "step": 18900 + }, + { + "epoch": 14.11, + "grad_norm": 2.152397632598877, + "learning_rate": 8.149045226130654e-06, + "loss": 0.0204, + "step": 18925 + }, + { + "epoch": 14.13, + "grad_norm": 1.9499527215957642, + "learning_rate": 8.146532663316583e-06, + "loss": 0.0207, + "step": 18950 + }, + { + "epoch": 14.15, + "grad_norm": 1.8760676383972168, + "learning_rate": 8.144020100502512e-06, + "loss": 0.0202, + "step": 18975 + }, + { + "epoch": 14.17, + "grad_norm": 2.155259609222412, + "learning_rate": 8.141507537688443e-06, + "loss": 0.0204, + "step": 19000 + }, + { + "epoch": 14.17, + "eval_loss": 0.13031300902366638, + "eval_runtime": 966.0448, + "eval_samples_per_second": 1.553, + "eval_steps_per_second": 1.553, + "eval_wer": 32.60519498704754, + "step": 19000 + }, + { + "epoch": 14.19, + "grad_norm": 3.0113847255706787, + "learning_rate": 8.138994974874373e-06, + "loss": 0.0218, + "step": 19025 + }, + { + "epoch": 14.21, + "grad_norm": 2.679889440536499, + "learning_rate": 8.136482412060302e-06, + "loss": 0.0227, + "step": 19050 + }, + { + "epoch": 14.22, + "grad_norm": 1.983940839767456, + "learning_rate": 8.133969849246231e-06, + "loss": 0.0208, + "step": 19075 + }, + { + "epoch": 14.24, + "grad_norm": 2.231234073638916, + "learning_rate": 8.131457286432162e-06, + "loss": 0.0214, + "step": 19100 + }, + { + "epoch": 14.26, + "grad_norm": 2.6263012886047363, + "learning_rate": 8.12894472361809e-06, + "loss": 0.0208, + "step": 19125 + }, + { + "epoch": 14.28, + "grad_norm": 2.3262438774108887, + "learning_rate": 8.126432160804021e-06, + "loss": 0.0225, + "step": 19150 + }, + { + "epoch": 14.3, + "grad_norm": 2.1026012897491455, + "learning_rate": 8.124020100502513e-06, + "loss": 0.022, + "step": 19175 + }, + { + "epoch": 14.32, + "grad_norm": 2.324632406234741, + "learning_rate": 8.121507537688444e-06, + "loss": 0.0206, + "step": 19200 + }, + { + "epoch": 14.34, + "grad_norm": 2.6240227222442627, + "learning_rate": 8.118994974874373e-06, + "loss": 0.0233, + "step": 19225 + }, + { + "epoch": 14.35, + "grad_norm": 1.8270357847213745, + "learning_rate": 8.116482412060302e-06, + "loss": 0.0218, + "step": 19250 + }, + { + "epoch": 14.37, + "grad_norm": 1.8880401849746704, + "learning_rate": 8.113969849246232e-06, + "loss": 0.0214, + "step": 19275 + }, + { + "epoch": 14.39, + "grad_norm": 2.090501546859741, + "learning_rate": 8.111457286432161e-06, + "loss": 0.0212, + "step": 19300 + }, + { + "epoch": 14.41, + "grad_norm": 2.4207563400268555, + "learning_rate": 8.10894472361809e-06, + "loss": 0.0216, + "step": 19325 + }, + { + "epoch": 14.43, + "grad_norm": 2.8410065174102783, + "learning_rate": 8.106432160804021e-06, + "loss": 0.0229, + "step": 19350 + }, + { + "epoch": 14.45, + "grad_norm": 2.5641822814941406, + "learning_rate": 8.10391959798995e-06, + "loss": 0.0218, + "step": 19375 + }, + { + "epoch": 14.47, + "grad_norm": 2.480525016784668, + "learning_rate": 8.10140703517588e-06, + "loss": 0.022, + "step": 19400 + }, + { + "epoch": 14.49, + "grad_norm": 2.083963632583618, + "learning_rate": 8.098894472361811e-06, + "loss": 0.0215, + "step": 19425 + }, + { + "epoch": 14.5, + "grad_norm": 2.725046157836914, + "learning_rate": 8.096381909547739e-06, + "loss": 0.0229, + "step": 19450 + }, + { + "epoch": 14.52, + "grad_norm": 2.3301069736480713, + "learning_rate": 8.09386934673367e-06, + "loss": 0.0219, + "step": 19475 + }, + { + "epoch": 14.54, + "grad_norm": 2.8076956272125244, + "learning_rate": 8.091356783919599e-06, + "loss": 0.0226, + "step": 19500 + }, + { + "epoch": 14.56, + "grad_norm": 2.387206792831421, + "learning_rate": 8.088844221105528e-06, + "loss": 0.0229, + "step": 19525 + }, + { + "epoch": 14.58, + "grad_norm": 2.6538498401641846, + "learning_rate": 8.086331658291458e-06, + "loss": 0.0233, + "step": 19550 + }, + { + "epoch": 14.6, + "grad_norm": 2.888737916946411, + "learning_rate": 8.083819095477387e-06, + "loss": 0.0239, + "step": 19575 + }, + { + "epoch": 14.62, + "grad_norm": 2.595784902572632, + "learning_rate": 8.081306532663318e-06, + "loss": 0.0238, + "step": 19600 + }, + { + "epoch": 14.63, + "grad_norm": 2.498194694519043, + "learning_rate": 8.078793969849247e-06, + "loss": 0.0217, + "step": 19625 + }, + { + "epoch": 14.65, + "grad_norm": 3.2817816734313965, + "learning_rate": 8.076281407035177e-06, + "loss": 0.0225, + "step": 19650 + }, + { + "epoch": 14.67, + "grad_norm": 3.2992820739746094, + "learning_rate": 8.073768844221106e-06, + "loss": 0.0237, + "step": 19675 + }, + { + "epoch": 14.69, + "grad_norm": 2.8622806072235107, + "learning_rate": 8.071256281407037e-06, + "loss": 0.0229, + "step": 19700 + }, + { + "epoch": 14.71, + "grad_norm": 2.3647477626800537, + "learning_rate": 8.068743718592964e-06, + "loss": 0.0213, + "step": 19725 + }, + { + "epoch": 14.73, + "grad_norm": 2.4072659015655518, + "learning_rate": 8.066231155778895e-06, + "loss": 0.0231, + "step": 19750 + }, + { + "epoch": 14.75, + "grad_norm": 2.923861265182495, + "learning_rate": 8.063718592964825e-06, + "loss": 0.0226, + "step": 19775 + }, + { + "epoch": 14.77, + "grad_norm": 2.25437068939209, + "learning_rate": 8.061206030150754e-06, + "loss": 0.0232, + "step": 19800 + }, + { + "epoch": 14.78, + "grad_norm": 2.5043392181396484, + "learning_rate": 8.058693467336685e-06, + "loss": 0.0234, + "step": 19825 + }, + { + "epoch": 14.8, + "grad_norm": 1.85288667678833, + "learning_rate": 8.056180904522613e-06, + "loss": 0.0223, + "step": 19850 + }, + { + "epoch": 14.82, + "grad_norm": 2.796004295349121, + "learning_rate": 8.053668341708544e-06, + "loss": 0.0229, + "step": 19875 + }, + { + "epoch": 14.84, + "grad_norm": 2.4344449043273926, + "learning_rate": 8.051155778894473e-06, + "loss": 0.0221, + "step": 19900 + }, + { + "epoch": 14.86, + "grad_norm": 2.8644461631774902, + "learning_rate": 8.048643216080402e-06, + "loss": 0.0231, + "step": 19925 + }, + { + "epoch": 14.88, + "grad_norm": 2.608741044998169, + "learning_rate": 8.046130653266332e-06, + "loss": 0.024, + "step": 19950 + }, + { + "epoch": 14.9, + "grad_norm": 2.9157910346984863, + "learning_rate": 8.043618090452263e-06, + "loss": 0.0238, + "step": 19975 + }, + { + "epoch": 14.91, + "grad_norm": 2.142277240753174, + "learning_rate": 8.04110552763819e-06, + "loss": 0.0232, + "step": 20000 + }, + { + "epoch": 14.91, + "eval_loss": 0.13146443665027618, + "eval_runtime": 970.7625, + "eval_samples_per_second": 1.545, + "eval_steps_per_second": 1.545, + "eval_wer": 33.26332003080586, + "step": 20000 + }, + { + "epoch": 14.93, + "grad_norm": 3.1528232097625732, + "learning_rate": 8.038592964824121e-06, + "loss": 0.0237, + "step": 20025 + }, + { + "epoch": 14.95, + "grad_norm": 2.538665533065796, + "learning_rate": 8.03608040201005e-06, + "loss": 0.0229, + "step": 20050 + }, + { + "epoch": 14.97, + "grad_norm": 2.5796642303466797, + "learning_rate": 8.03356783919598e-06, + "loss": 0.0237, + "step": 20075 + }, + { + "epoch": 14.99, + "grad_norm": 2.3634748458862305, + "learning_rate": 8.031055276381911e-06, + "loss": 0.0228, + "step": 20100 + }, + { + "epoch": 15.01, + "grad_norm": 2.143162250518799, + "learning_rate": 8.028542713567839e-06, + "loss": 0.0198, + "step": 20125 + }, + { + "epoch": 15.03, + "grad_norm": 1.9507482051849365, + "learning_rate": 8.02603015075377e-06, + "loss": 0.0176, + "step": 20150 + }, + { + "epoch": 15.04, + "grad_norm": 2.084425210952759, + "learning_rate": 8.023517587939699e-06, + "loss": 0.0175, + "step": 20175 + }, + { + "epoch": 15.06, + "grad_norm": 2.005608081817627, + "learning_rate": 8.021005025125628e-06, + "loss": 0.0164, + "step": 20200 + }, + { + "epoch": 15.08, + "grad_norm": 1.978476643562317, + "learning_rate": 8.01849246231156e-06, + "loss": 0.0162, + "step": 20225 + }, + { + "epoch": 15.1, + "grad_norm": 1.9234790802001953, + "learning_rate": 8.015979899497489e-06, + "loss": 0.0179, + "step": 20250 + }, + { + "epoch": 15.12, + "grad_norm": 2.139409303665161, + "learning_rate": 8.013467336683418e-06, + "loss": 0.0182, + "step": 20275 + }, + { + "epoch": 15.14, + "grad_norm": 2.2407007217407227, + "learning_rate": 8.010954773869347e-06, + "loss": 0.0179, + "step": 20300 + }, + { + "epoch": 15.16, + "grad_norm": 2.244349479675293, + "learning_rate": 8.008442211055277e-06, + "loss": 0.0179, + "step": 20325 + }, + { + "epoch": 15.18, + "grad_norm": 2.2500510215759277, + "learning_rate": 8.005929648241206e-06, + "loss": 0.0186, + "step": 20350 + }, + { + "epoch": 15.19, + "grad_norm": 2.2667760848999023, + "learning_rate": 8.003417085427137e-06, + "loss": 0.0183, + "step": 20375 + }, + { + "epoch": 15.21, + "grad_norm": 2.4621121883392334, + "learning_rate": 8.000904522613065e-06, + "loss": 0.0177, + "step": 20400 + }, + { + "epoch": 15.23, + "grad_norm": 2.11179256439209, + "learning_rate": 7.998391959798996e-06, + "loss": 0.018, + "step": 20425 + }, + { + "epoch": 15.25, + "grad_norm": 2.3915293216705322, + "learning_rate": 7.995879396984925e-06, + "loss": 0.0187, + "step": 20450 + }, + { + "epoch": 15.27, + "grad_norm": 1.6997781991958618, + "learning_rate": 7.993366834170854e-06, + "loss": 0.0178, + "step": 20475 + }, + { + "epoch": 15.29, + "grad_norm": 2.012526512145996, + "learning_rate": 7.990854271356785e-06, + "loss": 0.0178, + "step": 20500 + }, + { + "epoch": 15.31, + "grad_norm": 2.4469387531280518, + "learning_rate": 7.988341708542715e-06, + "loss": 0.0182, + "step": 20525 + }, + { + "epoch": 15.32, + "grad_norm": 2.031877040863037, + "learning_rate": 7.985829145728644e-06, + "loss": 0.019, + "step": 20550 + }, + { + "epoch": 15.34, + "grad_norm": 2.5539660453796387, + "learning_rate": 7.983316582914573e-06, + "loss": 0.0183, + "step": 20575 + }, + { + "epoch": 15.36, + "grad_norm": 2.0725064277648926, + "learning_rate": 7.980804020100503e-06, + "loss": 0.0187, + "step": 20600 + }, + { + "epoch": 15.38, + "grad_norm": 2.037991523742676, + "learning_rate": 7.978291457286432e-06, + "loss": 0.0186, + "step": 20625 + }, + { + "epoch": 15.4, + "grad_norm": 1.964439868927002, + "learning_rate": 7.975778894472363e-06, + "loss": 0.018, + "step": 20650 + }, + { + "epoch": 15.42, + "grad_norm": 1.9162101745605469, + "learning_rate": 7.973266331658292e-06, + "loss": 0.0199, + "step": 20675 + }, + { + "epoch": 15.44, + "grad_norm": 2.071002721786499, + "learning_rate": 7.970753768844222e-06, + "loss": 0.0187, + "step": 20700 + }, + { + "epoch": 15.45, + "grad_norm": 2.1936488151550293, + "learning_rate": 7.968241206030151e-06, + "loss": 0.0196, + "step": 20725 + }, + { + "epoch": 15.47, + "grad_norm": 2.305346965789795, + "learning_rate": 7.96572864321608e-06, + "loss": 0.0181, + "step": 20750 + }, + { + "epoch": 15.49, + "grad_norm": 2.4075098037719727, + "learning_rate": 7.963216080402011e-06, + "loss": 0.0179, + "step": 20775 + }, + { + "epoch": 15.51, + "grad_norm": 1.9996544122695923, + "learning_rate": 7.96070351758794e-06, + "loss": 0.0198, + "step": 20800 + }, + { + "epoch": 15.53, + "grad_norm": 1.9924840927124023, + "learning_rate": 7.95819095477387e-06, + "loss": 0.0191, + "step": 20825 + }, + { + "epoch": 15.55, + "grad_norm": 2.1631603240966797, + "learning_rate": 7.955678391959801e-06, + "loss": 0.0187, + "step": 20850 + }, + { + "epoch": 15.57, + "grad_norm": 2.4710841178894043, + "learning_rate": 7.953165829145729e-06, + "loss": 0.0185, + "step": 20875 + }, + { + "epoch": 15.59, + "grad_norm": 2.433011531829834, + "learning_rate": 7.95065326633166e-06, + "loss": 0.0185, + "step": 20900 + }, + { + "epoch": 15.6, + "grad_norm": 2.867295026779175, + "learning_rate": 7.948140703517589e-06, + "loss": 0.0187, + "step": 20925 + }, + { + "epoch": 15.62, + "grad_norm": 1.865167260169983, + "learning_rate": 7.945628140703518e-06, + "loss": 0.0185, + "step": 20950 + }, + { + "epoch": 15.64, + "grad_norm": 2.51581072807312, + "learning_rate": 7.943115577889448e-06, + "loss": 0.0184, + "step": 20975 + }, + { + "epoch": 15.66, + "grad_norm": 2.5112855434417725, + "learning_rate": 7.940603015075377e-06, + "loss": 0.0195, + "step": 21000 + }, + { + "epoch": 15.66, + "eval_loss": 0.1381530612707138, + "eval_runtime": 973.2777, + "eval_samples_per_second": 1.541, + "eval_steps_per_second": 1.541, + "eval_wer": 33.38934397535532, + "step": 21000 + }, + { + "epoch": 15.68, + "grad_norm": 2.3672492504119873, + "learning_rate": 7.938090452261306e-06, + "loss": 0.0197, + "step": 21025 + }, + { + "epoch": 15.7, + "grad_norm": 2.0841610431671143, + "learning_rate": 7.935577889447237e-06, + "loss": 0.019, + "step": 21050 + }, + { + "epoch": 15.72, + "grad_norm": 2.094703197479248, + "learning_rate": 7.933065326633167e-06, + "loss": 0.0195, + "step": 21075 + }, + { + "epoch": 15.73, + "grad_norm": 2.3787906169891357, + "learning_rate": 7.930552763819096e-06, + "loss": 0.0192, + "step": 21100 + }, + { + "epoch": 15.75, + "grad_norm": 2.1654438972473145, + "learning_rate": 7.928040201005027e-06, + "loss": 0.0192, + "step": 21125 + }, + { + "epoch": 15.77, + "grad_norm": 2.525909900665283, + "learning_rate": 7.925527638190955e-06, + "loss": 0.0193, + "step": 21150 + }, + { + "epoch": 15.79, + "grad_norm": 2.03261661529541, + "learning_rate": 7.923015075376886e-06, + "loss": 0.0198, + "step": 21175 + }, + { + "epoch": 15.81, + "grad_norm": 2.389220952987671, + "learning_rate": 7.920603015075377e-06, + "loss": 0.0193, + "step": 21200 + }, + { + "epoch": 15.83, + "grad_norm": 2.507336378097534, + "learning_rate": 7.918090452261306e-06, + "loss": 0.0198, + "step": 21225 + }, + { + "epoch": 15.85, + "grad_norm": 2.1838316917419434, + "learning_rate": 7.915577889447237e-06, + "loss": 0.0198, + "step": 21250 + }, + { + "epoch": 15.87, + "grad_norm": 2.358428716659546, + "learning_rate": 7.913065326633167e-06, + "loss": 0.0187, + "step": 21275 + }, + { + "epoch": 15.88, + "grad_norm": 2.237143039703369, + "learning_rate": 7.910552763819096e-06, + "loss": 0.0188, + "step": 21300 + }, + { + "epoch": 15.9, + "grad_norm": 2.642364025115967, + "learning_rate": 7.908040201005025e-06, + "loss": 0.0188, + "step": 21325 + }, + { + "epoch": 15.92, + "grad_norm": 2.524646043777466, + "learning_rate": 7.905527638190955e-06, + "loss": 0.0192, + "step": 21350 + }, + { + "epoch": 15.94, + "grad_norm": 2.07145357131958, + "learning_rate": 7.903015075376886e-06, + "loss": 0.02, + "step": 21375 + }, + { + "epoch": 15.96, + "grad_norm": 2.5146021842956543, + "learning_rate": 7.900502512562815e-06, + "loss": 0.0201, + "step": 21400 + }, + { + "epoch": 15.98, + "grad_norm": 2.6486377716064453, + "learning_rate": 7.897989949748744e-06, + "loss": 0.0189, + "step": 21425 + }, + { + "epoch": 16.0, + "grad_norm": 2.1782066822052, + "learning_rate": 7.895477386934674e-06, + "loss": 0.0182, + "step": 21450 + }, + { + "epoch": 16.01, + "grad_norm": 1.9258722066879272, + "learning_rate": 7.892964824120603e-06, + "loss": 0.0159, + "step": 21475 + }, + { + "epoch": 16.03, + "grad_norm": 2.241471290588379, + "learning_rate": 7.890452261306534e-06, + "loss": 0.0137, + "step": 21500 + }, + { + "epoch": 16.05, + "grad_norm": 3.325223207473755, + "learning_rate": 7.887939698492463e-06, + "loss": 0.0133, + "step": 21525 + }, + { + "epoch": 16.07, + "grad_norm": 1.8376882076263428, + "learning_rate": 7.885427135678393e-06, + "loss": 0.0141, + "step": 21550 + }, + { + "epoch": 16.09, + "grad_norm": 2.037733554840088, + "learning_rate": 7.882914572864322e-06, + "loss": 0.0141, + "step": 21575 + }, + { + "epoch": 16.11, + "grad_norm": 1.9613585472106934, + "learning_rate": 7.880402010050251e-06, + "loss": 0.0146, + "step": 21600 + }, + { + "epoch": 16.13, + "grad_norm": 2.037428140640259, + "learning_rate": 7.87788944723618e-06, + "loss": 0.0143, + "step": 21625 + }, + { + "epoch": 16.14, + "grad_norm": 1.6798501014709473, + "learning_rate": 7.875376884422112e-06, + "loss": 0.0144, + "step": 21650 + }, + { + "epoch": 16.16, + "grad_norm": 2.9304885864257812, + "learning_rate": 7.872864321608041e-06, + "loss": 0.0151, + "step": 21675 + }, + { + "epoch": 16.18, + "grad_norm": 2.07914662361145, + "learning_rate": 7.87035175879397e-06, + "loss": 0.0148, + "step": 21700 + }, + { + "epoch": 16.2, + "grad_norm": 1.4654254913330078, + "learning_rate": 7.867839195979901e-06, + "loss": 0.0153, + "step": 21725 + }, + { + "epoch": 16.22, + "grad_norm": 1.9239964485168457, + "learning_rate": 7.865326633165829e-06, + "loss": 0.0154, + "step": 21750 + }, + { + "epoch": 16.24, + "grad_norm": 2.4603769779205322, + "learning_rate": 7.86281407035176e-06, + "loss": 0.0149, + "step": 21775 + }, + { + "epoch": 16.26, + "grad_norm": 2.202749490737915, + "learning_rate": 7.86030150753769e-06, + "loss": 0.0152, + "step": 21800 + }, + { + "epoch": 16.28, + "grad_norm": 2.0669233798980713, + "learning_rate": 7.857788944723619e-06, + "loss": 0.0155, + "step": 21825 + }, + { + "epoch": 16.29, + "grad_norm": 1.9414321184158325, + "learning_rate": 7.855276381909548e-06, + "loss": 0.0155, + "step": 21850 + }, + { + "epoch": 16.31, + "grad_norm": 1.966099739074707, + "learning_rate": 7.852763819095477e-06, + "loss": 0.0145, + "step": 21875 + }, + { + "epoch": 16.33, + "grad_norm": 2.047466278076172, + "learning_rate": 7.850251256281408e-06, + "loss": 0.0144, + "step": 21900 + }, + { + "epoch": 16.35, + "grad_norm": 2.3069145679473877, + "learning_rate": 7.847738693467338e-06, + "loss": 0.0156, + "step": 21925 + }, + { + "epoch": 16.37, + "grad_norm": 2.095024585723877, + "learning_rate": 7.845226130653267e-06, + "loss": 0.0156, + "step": 21950 + }, + { + "epoch": 16.39, + "grad_norm": 2.251624584197998, + "learning_rate": 7.842713567839196e-06, + "loss": 0.0158, + "step": 21975 + }, + { + "epoch": 16.41, + "grad_norm": 1.8011457920074463, + "learning_rate": 7.840201005025127e-06, + "loss": 0.0151, + "step": 22000 + }, + { + "epoch": 16.41, + "eval_loss": 0.14575624465942383, + "eval_runtime": 969.8802, + "eval_samples_per_second": 1.547, + "eval_steps_per_second": 1.547, + "eval_wer": 33.14429741650914, + "step": 22000 + }, + { + "epoch": 16.42, + "grad_norm": 2.2493553161621094, + "learning_rate": 7.837688442211055e-06, + "loss": 0.0152, + "step": 22025 + }, + { + "epoch": 16.44, + "grad_norm": 2.520751476287842, + "learning_rate": 7.835175879396986e-06, + "loss": 0.0163, + "step": 22050 + }, + { + "epoch": 16.46, + "grad_norm": 2.245366096496582, + "learning_rate": 7.832663316582915e-06, + "loss": 0.0162, + "step": 22075 + }, + { + "epoch": 16.48, + "grad_norm": 1.7499501705169678, + "learning_rate": 7.830150753768845e-06, + "loss": 0.015, + "step": 22100 + }, + { + "epoch": 16.5, + "grad_norm": 2.375232458114624, + "learning_rate": 7.827638190954776e-06, + "loss": 0.016, + "step": 22125 + }, + { + "epoch": 16.52, + "grad_norm": 2.235980987548828, + "learning_rate": 7.825125628140703e-06, + "loss": 0.0155, + "step": 22150 + }, + { + "epoch": 16.54, + "grad_norm": 2.5591914653778076, + "learning_rate": 7.822613065326634e-06, + "loss": 0.0157, + "step": 22175 + }, + { + "epoch": 16.55, + "grad_norm": 2.162997245788574, + "learning_rate": 7.820100502512564e-06, + "loss": 0.0167, + "step": 22200 + }, + { + "epoch": 16.57, + "grad_norm": 1.8830596208572388, + "learning_rate": 7.817587939698493e-06, + "loss": 0.0158, + "step": 22225 + }, + { + "epoch": 16.59, + "grad_norm": 2.430551528930664, + "learning_rate": 7.815075376884422e-06, + "loss": 0.0153, + "step": 22250 + }, + { + "epoch": 16.61, + "grad_norm": 1.9371283054351807, + "learning_rate": 7.812562814070353e-06, + "loss": 0.0149, + "step": 22275 + }, + { + "epoch": 16.63, + "grad_norm": 2.3505172729492188, + "learning_rate": 7.810050251256283e-06, + "loss": 0.0152, + "step": 22300 + }, + { + "epoch": 16.65, + "grad_norm": 1.8498647212982178, + "learning_rate": 7.807537688442212e-06, + "loss": 0.0165, + "step": 22325 + }, + { + "epoch": 16.67, + "grad_norm": 1.9203202724456787, + "learning_rate": 7.805025125628141e-06, + "loss": 0.0166, + "step": 22350 + }, + { + "epoch": 16.69, + "grad_norm": 2.3050386905670166, + "learning_rate": 7.80251256281407e-06, + "loss": 0.0163, + "step": 22375 + }, + { + "epoch": 16.7, + "grad_norm": 2.5182175636291504, + "learning_rate": 7.800000000000002e-06, + "loss": 0.0165, + "step": 22400 + }, + { + "epoch": 16.72, + "grad_norm": 2.285977363586426, + "learning_rate": 7.79748743718593e-06, + "loss": 0.017, + "step": 22425 + }, + { + "epoch": 16.74, + "grad_norm": 2.180234432220459, + "learning_rate": 7.79497487437186e-06, + "loss": 0.0167, + "step": 22450 + }, + { + "epoch": 16.76, + "grad_norm": 2.7478532791137695, + "learning_rate": 7.79246231155779e-06, + "loss": 0.0167, + "step": 22475 + }, + { + "epoch": 16.78, + "grad_norm": 2.5909104347229004, + "learning_rate": 7.789949748743719e-06, + "loss": 0.0161, + "step": 22500 + }, + { + "epoch": 16.8, + "grad_norm": 1.912582278251648, + "learning_rate": 7.78743718592965e-06, + "loss": 0.0166, + "step": 22525 + }, + { + "epoch": 16.82, + "grad_norm": 2.2607853412628174, + "learning_rate": 7.78492462311558e-06, + "loss": 0.0155, + "step": 22550 + }, + { + "epoch": 16.83, + "grad_norm": 2.2137200832366943, + "learning_rate": 7.782412060301509e-06, + "loss": 0.0162, + "step": 22575 + }, + { + "epoch": 16.85, + "grad_norm": 2.707362651824951, + "learning_rate": 7.779899497487438e-06, + "loss": 0.0166, + "step": 22600 + }, + { + "epoch": 16.87, + "grad_norm": 2.1094329357147217, + "learning_rate": 7.777386934673367e-06, + "loss": 0.0162, + "step": 22625 + }, + { + "epoch": 16.89, + "grad_norm": 2.7931389808654785, + "learning_rate": 7.774874371859296e-06, + "loss": 0.0159, + "step": 22650 + }, + { + "epoch": 16.91, + "grad_norm": 2.40301513671875, + "learning_rate": 7.772361809045227e-06, + "loss": 0.0159, + "step": 22675 + }, + { + "epoch": 16.93, + "grad_norm": 1.9981954097747803, + "learning_rate": 7.769849246231155e-06, + "loss": 0.0167, + "step": 22700 + }, + { + "epoch": 16.95, + "grad_norm": 2.4317140579223633, + "learning_rate": 7.767336683417086e-06, + "loss": 0.0159, + "step": 22725 + }, + { + "epoch": 16.96, + "grad_norm": 2.975865125656128, + "learning_rate": 7.764824120603015e-06, + "loss": 0.0164, + "step": 22750 + }, + { + "epoch": 16.98, + "grad_norm": 2.714162826538086, + "learning_rate": 7.762311557788945e-06, + "loss": 0.017, + "step": 22775 + }, + { + "epoch": 17.0, + "grad_norm": 1.508992314338684, + "learning_rate": 7.759798994974876e-06, + "loss": 0.0153, + "step": 22800 + }, + { + "epoch": 17.02, + "grad_norm": 1.9678831100463867, + "learning_rate": 7.757286432160805e-06, + "loss": 0.0114, + "step": 22825 + }, + { + "epoch": 17.04, + "grad_norm": 2.6066534519195557, + "learning_rate": 7.754773869346734e-06, + "loss": 0.0117, + "step": 22850 + }, + { + "epoch": 17.06, + "grad_norm": 1.8610827922821045, + "learning_rate": 7.752261306532664e-06, + "loss": 0.0113, + "step": 22875 + }, + { + "epoch": 17.08, + "grad_norm": 1.692564606666565, + "learning_rate": 7.749748743718593e-06, + "loss": 0.012, + "step": 22900 + }, + { + "epoch": 17.1, + "grad_norm": 1.891134262084961, + "learning_rate": 7.747236180904524e-06, + "loss": 0.0122, + "step": 22925 + }, + { + "epoch": 17.11, + "grad_norm": 1.8932124376296997, + "learning_rate": 7.744723618090453e-06, + "loss": 0.0115, + "step": 22950 + }, + { + "epoch": 17.13, + "grad_norm": 1.7240322828292847, + "learning_rate": 7.742211055276383e-06, + "loss": 0.0118, + "step": 22975 + }, + { + "epoch": 17.15, + "grad_norm": 1.7123637199401855, + "learning_rate": 7.739698492462312e-06, + "loss": 0.0125, + "step": 23000 + }, + { + "epoch": 17.15, + "eval_loss": 0.15314434468746185, + "eval_runtime": 969.6316, + "eval_samples_per_second": 1.547, + "eval_steps_per_second": 1.547, + "eval_wer": 33.56437723167402, + "step": 23000 + }, + { + "epoch": 17.17, + "grad_norm": 2.0029029846191406, + "learning_rate": 7.737185929648241e-06, + "loss": 0.0121, + "step": 23025 + }, + { + "epoch": 17.19, + "grad_norm": 2.6654932498931885, + "learning_rate": 7.73467336683417e-06, + "loss": 0.0114, + "step": 23050 + }, + { + "epoch": 17.21, + "grad_norm": 1.9739927053451538, + "learning_rate": 7.732160804020102e-06, + "loss": 0.0125, + "step": 23075 + }, + { + "epoch": 17.23, + "grad_norm": 2.5358164310455322, + "learning_rate": 7.729648241206031e-06, + "loss": 0.0134, + "step": 23100 + }, + { + "epoch": 17.24, + "grad_norm": 2.044454574584961, + "learning_rate": 7.72713567839196e-06, + "loss": 0.0127, + "step": 23125 + }, + { + "epoch": 17.26, + "grad_norm": 1.9000550508499146, + "learning_rate": 7.724623115577891e-06, + "loss": 0.012, + "step": 23150 + }, + { + "epoch": 17.28, + "grad_norm": 2.30303955078125, + "learning_rate": 7.722110552763819e-06, + "loss": 0.0127, + "step": 23175 + }, + { + "epoch": 17.3, + "grad_norm": 2.000797748565674, + "learning_rate": 7.71959798994975e-06, + "loss": 0.0124, + "step": 23200 + }, + { + "epoch": 17.32, + "grad_norm": 2.4026284217834473, + "learning_rate": 7.71708542713568e-06, + "loss": 0.0131, + "step": 23225 + }, + { + "epoch": 17.34, + "grad_norm": 2.1855828762054443, + "learning_rate": 7.714572864321609e-06, + "loss": 0.0128, + "step": 23250 + }, + { + "epoch": 17.36, + "grad_norm": 1.6409577131271362, + "learning_rate": 7.712060301507538e-06, + "loss": 0.013, + "step": 23275 + }, + { + "epoch": 17.38, + "grad_norm": 1.8143736124038696, + "learning_rate": 7.709547738693467e-06, + "loss": 0.0128, + "step": 23300 + }, + { + "epoch": 17.39, + "grad_norm": 2.066652297973633, + "learning_rate": 7.707035175879397e-06, + "loss": 0.0131, + "step": 23325 + }, + { + "epoch": 17.41, + "grad_norm": 2.3731346130371094, + "learning_rate": 7.704522613065328e-06, + "loss": 0.014, + "step": 23350 + }, + { + "epoch": 17.43, + "grad_norm": 2.0847504138946533, + "learning_rate": 7.702010050251257e-06, + "loss": 0.0128, + "step": 23375 + }, + { + "epoch": 17.45, + "grad_norm": 2.4992780685424805, + "learning_rate": 7.699497487437186e-06, + "loss": 0.0134, + "step": 23400 + }, + { + "epoch": 17.47, + "grad_norm": 1.6949553489685059, + "learning_rate": 7.696984924623117e-06, + "loss": 0.0132, + "step": 23425 + }, + { + "epoch": 17.49, + "grad_norm": 2.239267110824585, + "learning_rate": 7.694472361809045e-06, + "loss": 0.0133, + "step": 23450 + }, + { + "epoch": 17.51, + "grad_norm": 2.3655240535736084, + "learning_rate": 7.691959798994976e-06, + "loss": 0.0137, + "step": 23475 + }, + { + "epoch": 17.52, + "grad_norm": 1.8351551294326782, + "learning_rate": 7.689447236180905e-06, + "loss": 0.0126, + "step": 23500 + }, + { + "epoch": 17.54, + "grad_norm": 1.985090970993042, + "learning_rate": 7.686934673366835e-06, + "loss": 0.0129, + "step": 23525 + }, + { + "epoch": 17.56, + "grad_norm": 2.0521388053894043, + "learning_rate": 7.684422110552766e-06, + "loss": 0.0132, + "step": 23550 + }, + { + "epoch": 17.58, + "grad_norm": 1.9500457048416138, + "learning_rate": 7.681909547738693e-06, + "loss": 0.013, + "step": 23575 + }, + { + "epoch": 17.6, + "grad_norm": 2.2526726722717285, + "learning_rate": 7.679396984924624e-06, + "loss": 0.0125, + "step": 23600 + }, + { + "epoch": 17.62, + "grad_norm": 2.0612640380859375, + "learning_rate": 7.676884422110554e-06, + "loss": 0.0137, + "step": 23625 + }, + { + "epoch": 17.64, + "grad_norm": 1.8333067893981934, + "learning_rate": 7.674371859296483e-06, + "loss": 0.0142, + "step": 23650 + }, + { + "epoch": 17.65, + "grad_norm": 1.822888970375061, + "learning_rate": 7.671859296482412e-06, + "loss": 0.0123, + "step": 23675 + }, + { + "epoch": 17.67, + "grad_norm": 2.0566201210021973, + "learning_rate": 7.669346733668343e-06, + "loss": 0.0129, + "step": 23700 + }, + { + "epoch": 17.69, + "grad_norm": 2.4392213821411133, + "learning_rate": 7.666834170854271e-06, + "loss": 0.0132, + "step": 23725 + }, + { + "epoch": 17.71, + "grad_norm": 2.1865992546081543, + "learning_rate": 7.664321608040202e-06, + "loss": 0.0134, + "step": 23750 + }, + { + "epoch": 17.73, + "grad_norm": 2.0196189880371094, + "learning_rate": 7.661809045226131e-06, + "loss": 0.0139, + "step": 23775 + }, + { + "epoch": 17.75, + "grad_norm": 2.1822714805603027, + "learning_rate": 7.65929648241206e-06, + "loss": 0.0134, + "step": 23800 + }, + { + "epoch": 17.77, + "grad_norm": 2.3028154373168945, + "learning_rate": 7.656884422110554e-06, + "loss": 0.014, + "step": 23825 + }, + { + "epoch": 17.79, + "grad_norm": 2.3591182231903076, + "learning_rate": 7.654371859296483e-06, + "loss": 0.0143, + "step": 23850 + }, + { + "epoch": 17.8, + "grad_norm": 2.1942739486694336, + "learning_rate": 7.651859296482412e-06, + "loss": 0.0135, + "step": 23875 + }, + { + "epoch": 17.82, + "grad_norm": 2.3814451694488525, + "learning_rate": 7.649346733668342e-06, + "loss": 0.0138, + "step": 23900 + }, + { + "epoch": 17.84, + "grad_norm": 2.091588258743286, + "learning_rate": 7.646834170854271e-06, + "loss": 0.014, + "step": 23925 + }, + { + "epoch": 17.86, + "grad_norm": 2.1415188312530518, + "learning_rate": 7.644321608040202e-06, + "loss": 0.0136, + "step": 23950 + }, + { + "epoch": 17.88, + "grad_norm": 2.615408420562744, + "learning_rate": 7.641809045226131e-06, + "loss": 0.0138, + "step": 23975 + }, + { + "epoch": 17.9, + "grad_norm": 2.471316337585449, + "learning_rate": 7.63929648241206e-06, + "loss": 0.0141, + "step": 24000 + }, + { + "epoch": 17.9, + "eval_loss": 0.15876464545726776, + "eval_runtime": 975.3325, + "eval_samples_per_second": 1.538, + "eval_steps_per_second": 1.538, + "eval_wer": 33.53637191066303, + "step": 24000 + } + ], + "logging_steps": 25, + "max_steps": 100000, + "num_input_tokens_seen": 0, + "num_train_epochs": 75, + "save_steps": 1000, + "total_flos": 3.780456303919104e+19, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/whisper-tiny/bengali/checkpoint-24000/training_args.bin b/checkpoints/whisper-tiny/bengali/checkpoint-24000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..461536254ec32c9d9812ec20b842ff6b9e9db1b9 --- /dev/null +++ b/checkpoints/whisper-tiny/bengali/checkpoint-24000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:594eddb7f87cdf4bb55675a8f3d8010c01cc7a90258346f1486908fe2db725ac +size 4667 diff --git a/checkpoints/whisper-tiny/bhojpuri/checkpoint-22000/config.json b/checkpoints/whisper-tiny/bhojpuri/checkpoint-22000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c47e7ae5f6c65847b8952aa0e827c7f13a489891 --- /dev/null +++ b/checkpoints/whisper-tiny/bhojpuri/checkpoint-22000/config.json @@ -0,0 +1,154 @@ +{ + "_name_or_path": "openai/whisper-tiny", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 384, + "decoder_attention_heads": 6, + "decoder_ffn_dim": 1536, + "decoder_layerdrop": 0.0, + "decoder_layers": 4, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 6, + "encoder_ffn_dim": 1536, + "encoder_layerdrop": 0.0, + "encoder_layers": 4, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50276 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 4, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "torch_dtype": "float32", + "transformers_version": "4.39.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoints/whisper-tiny/bhojpuri/checkpoint-22000/generation_config.json b/checkpoints/whisper-tiny/bhojpuri/checkpoint-22000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c4857895fba6cdefb862460b5d33969e1892aa71 --- /dev/null +++ b/checkpoints/whisper-tiny/bhojpuri/checkpoint-22000/generation_config.json @@ -0,0 +1,248 @@ +{ + "alignment_heads": [ + [ + 2, + 2 + ], + [ + 3, + 0 + ], + [ + 3, + 2 + ], + [ + 3, + 3 + ], + [ + 3, + 4 + ], + [ + 3, + 5 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.39.2" +} diff --git a/checkpoints/whisper-tiny/bhojpuri/checkpoint-22000/model.safetensors b/checkpoints/whisper-tiny/bhojpuri/checkpoint-22000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e2ee1993b7b5394b780829e491482ac927b1d72e --- /dev/null +++ b/checkpoints/whisper-tiny/bhojpuri/checkpoint-22000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dae0a19b12813fba6b2504bda8931ad32c00e8e76983beb3a22d2c89e72e2f7 +size 151061672 diff --git a/checkpoints/whisper-tiny/bhojpuri/checkpoint-22000/optimizer.pt b/checkpoints/whisper-tiny/bhojpuri/checkpoint-22000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..31be4293c94a91a59319f139a4a1784fd5be510f --- /dev/null +++ b/checkpoints/whisper-tiny/bhojpuri/checkpoint-22000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fa657101e3fd0cb7e934de01ab03c607c3601ec164396925407974cc30b8f0e +size 297615749 diff --git a/checkpoints/whisper-tiny/bhojpuri/checkpoint-22000/preprocessor_config.json b/checkpoints/whisper-tiny/bhojpuri/checkpoint-22000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoints/whisper-tiny/bhojpuri/checkpoint-22000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoints/whisper-tiny/bhojpuri/checkpoint-22000/rng_state.pth b/checkpoints/whisper-tiny/bhojpuri/checkpoint-22000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e385a347368cee441a67343c57bcc352cbe9c9dc --- /dev/null +++ b/checkpoints/whisper-tiny/bhojpuri/checkpoint-22000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6510c25e2e9cfe854cd3187c44894dad0388d38169ebc5d34f6d054b53e2777c +size 14575 diff --git a/checkpoints/whisper-tiny/bhojpuri/checkpoint-22000/scheduler.pt b/checkpoints/whisper-tiny/bhojpuri/checkpoint-22000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9b5db5e519f4668cfca578bbdc8da10a0f40821d --- /dev/null +++ b/checkpoints/whisper-tiny/bhojpuri/checkpoint-22000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a54a11b92af0098a08a5f24c7ca905131539cf0fed97b3b9febed573384f5c2 +size 627 diff --git a/checkpoints/whisper-tiny/bhojpuri/checkpoint-22000/trainer_state.json b/checkpoints/whisper-tiny/bhojpuri/checkpoint-22000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..714d2cb593fceacc80cf784a36ff3de5324d85e9 --- /dev/null +++ b/checkpoints/whisper-tiny/bhojpuri/checkpoint-22000/trainer_state.json @@ -0,0 +1,6379 @@ +{ + "best_metric": 22.849513637230594, + "best_model_checkpoint": "results/whisper-tiny/bhojpuri/checkpoint-12000", + "epoch": 14.775016789791806, + "eval_steps": 1000, + "global_step": 22000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "grad_norm": 93.86495971679688, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8433, + "step": 25 + }, + { + "epoch": 0.03, + "grad_norm": 36.642452239990234, + "learning_rate": 9.200000000000001e-07, + "loss": 3.2495, + "step": 50 + }, + { + "epoch": 0.05, + "grad_norm": 14.583272933959961, + "learning_rate": 1.42e-06, + "loss": 2.4154, + "step": 75 + }, + { + "epoch": 0.07, + "grad_norm": 9.200639724731445, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.8445, + "step": 100 + }, + { + "epoch": 0.08, + "grad_norm": 7.3153076171875, + "learning_rate": 2.42e-06, + "loss": 1.4241, + "step": 125 + }, + { + "epoch": 0.1, + "grad_norm": 6.031806945800781, + "learning_rate": 2.92e-06, + "loss": 1.1806, + "step": 150 + }, + { + "epoch": 0.12, + "grad_norm": 5.5370097160339355, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.0051, + "step": 175 + }, + { + "epoch": 0.13, + "grad_norm": 5.470795154571533, + "learning_rate": 3.920000000000001e-06, + "loss": 0.8696, + "step": 200 + }, + { + "epoch": 0.15, + "grad_norm": 5.800634384155273, + "learning_rate": 4.42e-06, + "loss": 0.7843, + "step": 225 + }, + { + "epoch": 0.17, + "grad_norm": 5.190384387969971, + "learning_rate": 4.92e-06, + "loss": 0.7328, + "step": 250 + }, + { + "epoch": 0.18, + "grad_norm": 4.960707187652588, + "learning_rate": 5.420000000000001e-06, + "loss": 0.6819, + "step": 275 + }, + { + "epoch": 0.2, + "grad_norm": 5.073268413543701, + "learning_rate": 5.92e-06, + "loss": 0.6311, + "step": 300 + }, + { + "epoch": 0.22, + "grad_norm": 5.326979160308838, + "learning_rate": 6.42e-06, + "loss": 0.6084, + "step": 325 + }, + { + "epoch": 0.24, + "grad_norm": 4.908077239990234, + "learning_rate": 6.92e-06, + "loss": 0.5755, + "step": 350 + }, + { + "epoch": 0.25, + "grad_norm": 4.982746601104736, + "learning_rate": 7.420000000000001e-06, + "loss": 0.5526, + "step": 375 + }, + { + "epoch": 0.27, + "grad_norm": 5.369379997253418, + "learning_rate": 7.92e-06, + "loss": 0.5384, + "step": 400 + }, + { + "epoch": 0.29, + "grad_norm": 4.967843055725098, + "learning_rate": 8.42e-06, + "loss": 0.5164, + "step": 425 + }, + { + "epoch": 0.3, + "grad_norm": 5.226546287536621, + "learning_rate": 8.920000000000001e-06, + "loss": 0.4977, + "step": 450 + }, + { + "epoch": 0.32, + "grad_norm": 5.117010593414307, + "learning_rate": 9.42e-06, + "loss": 0.4826, + "step": 475 + }, + { + "epoch": 0.34, + "grad_norm": 4.946351528167725, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4619, + "step": 500 + }, + { + "epoch": 0.35, + "grad_norm": 4.794252872467041, + "learning_rate": 9.997889447236182e-06, + "loss": 0.4601, + "step": 525 + }, + { + "epoch": 0.37, + "grad_norm": 4.325902938842773, + "learning_rate": 9.995376884422112e-06, + "loss": 0.4608, + "step": 550 + }, + { + "epoch": 0.39, + "grad_norm": 5.033708095550537, + "learning_rate": 9.992864321608041e-06, + "loss": 0.4396, + "step": 575 + }, + { + "epoch": 0.4, + "grad_norm": 4.8322553634643555, + "learning_rate": 9.99035175879397e-06, + "loss": 0.4265, + "step": 600 + }, + { + "epoch": 0.42, + "grad_norm": 4.581171989440918, + "learning_rate": 9.9878391959799e-06, + "loss": 0.4203, + "step": 625 + }, + { + "epoch": 0.44, + "grad_norm": 5.21734619140625, + "learning_rate": 9.98532663316583e-06, + "loss": 0.4164, + "step": 650 + }, + { + "epoch": 0.45, + "grad_norm": 5.352939128875732, + "learning_rate": 9.98281407035176e-06, + "loss": 0.4149, + "step": 675 + }, + { + "epoch": 0.47, + "grad_norm": 4.945975303649902, + "learning_rate": 9.98030150753769e-06, + "loss": 0.4028, + "step": 700 + }, + { + "epoch": 0.49, + "grad_norm": 4.5429277420043945, + "learning_rate": 9.977788944723619e-06, + "loss": 0.3901, + "step": 725 + }, + { + "epoch": 0.5, + "grad_norm": 4.99998140335083, + "learning_rate": 9.975276381909548e-06, + "loss": 0.3903, + "step": 750 + }, + { + "epoch": 0.52, + "grad_norm": 4.785731315612793, + "learning_rate": 9.972763819095477e-06, + "loss": 0.3882, + "step": 775 + }, + { + "epoch": 0.54, + "grad_norm": 4.536025047302246, + "learning_rate": 9.970251256281408e-06, + "loss": 0.3735, + "step": 800 + }, + { + "epoch": 0.55, + "grad_norm": 4.807986259460449, + "learning_rate": 9.967738693467338e-06, + "loss": 0.3768, + "step": 825 + }, + { + "epoch": 0.57, + "grad_norm": 4.798527717590332, + "learning_rate": 9.965226130653267e-06, + "loss": 0.3575, + "step": 850 + }, + { + "epoch": 0.59, + "grad_norm": 4.359195232391357, + "learning_rate": 9.962713567839198e-06, + "loss": 0.3595, + "step": 875 + }, + { + "epoch": 0.6, + "grad_norm": 4.056990146636963, + "learning_rate": 9.960201005025126e-06, + "loss": 0.3566, + "step": 900 + }, + { + "epoch": 0.62, + "grad_norm": 4.493648052215576, + "learning_rate": 9.957688442211057e-06, + "loss": 0.35, + "step": 925 + }, + { + "epoch": 0.64, + "grad_norm": 5.328251361846924, + "learning_rate": 9.955175879396986e-06, + "loss": 0.3531, + "step": 950 + }, + { + "epoch": 0.65, + "grad_norm": 4.6250128746032715, + "learning_rate": 9.952663316582915e-06, + "loss": 0.3464, + "step": 975 + }, + { + "epoch": 0.67, + "grad_norm": 4.900501728057861, + "learning_rate": 9.950150753768845e-06, + "loss": 0.3393, + "step": 1000 + }, + { + "epoch": 0.67, + "eval_loss": 0.24503503739833832, + "eval_runtime": 557.0141, + "eval_samples_per_second": 2.693, + "eval_steps_per_second": 2.693, + "eval_wer": 35.215207578358445, + "step": 1000 + }, + { + "epoch": 0.69, + "grad_norm": 4.576656341552734, + "learning_rate": 9.947638190954774e-06, + "loss": 0.3418, + "step": 1025 + }, + { + "epoch": 0.71, + "grad_norm": 4.09963321685791, + "learning_rate": 9.945125628140703e-06, + "loss": 0.3447, + "step": 1050 + }, + { + "epoch": 0.72, + "grad_norm": 4.740942478179932, + "learning_rate": 9.942613065326634e-06, + "loss": 0.3381, + "step": 1075 + }, + { + "epoch": 0.74, + "grad_norm": 5.114734172821045, + "learning_rate": 9.940100502512564e-06, + "loss": 0.3362, + "step": 1100 + }, + { + "epoch": 0.76, + "grad_norm": 3.942279577255249, + "learning_rate": 9.937587939698493e-06, + "loss": 0.3249, + "step": 1125 + }, + { + "epoch": 0.77, + "grad_norm": 4.875680923461914, + "learning_rate": 9.935075376884424e-06, + "loss": 0.3301, + "step": 1150 + }, + { + "epoch": 0.79, + "grad_norm": 5.077151775360107, + "learning_rate": 9.932562814070352e-06, + "loss": 0.3165, + "step": 1175 + }, + { + "epoch": 0.81, + "grad_norm": 4.760519504547119, + "learning_rate": 9.930050251256283e-06, + "loss": 0.3172, + "step": 1200 + }, + { + "epoch": 0.82, + "grad_norm": 4.142284870147705, + "learning_rate": 9.927537688442212e-06, + "loss": 0.3063, + "step": 1225 + }, + { + "epoch": 0.84, + "grad_norm": 5.10738468170166, + "learning_rate": 9.925025125628141e-06, + "loss": 0.3218, + "step": 1250 + }, + { + "epoch": 0.86, + "grad_norm": 3.8499255180358887, + "learning_rate": 9.922512562814072e-06, + "loss": 0.3055, + "step": 1275 + }, + { + "epoch": 0.87, + "grad_norm": 4.915480136871338, + "learning_rate": 9.920000000000002e-06, + "loss": 0.3125, + "step": 1300 + }, + { + "epoch": 0.89, + "grad_norm": 4.258148670196533, + "learning_rate": 9.917487437185931e-06, + "loss": 0.313, + "step": 1325 + }, + { + "epoch": 0.91, + "grad_norm": 4.383869171142578, + "learning_rate": 9.91497487437186e-06, + "loss": 0.3088, + "step": 1350 + }, + { + "epoch": 0.92, + "grad_norm": 4.475909233093262, + "learning_rate": 9.91246231155779e-06, + "loss": 0.3049, + "step": 1375 + }, + { + "epoch": 0.94, + "grad_norm": 4.367929458618164, + "learning_rate": 9.909949748743719e-06, + "loss": 0.3007, + "step": 1400 + }, + { + "epoch": 0.96, + "grad_norm": 4.255512237548828, + "learning_rate": 9.90743718592965e-06, + "loss": 0.2975, + "step": 1425 + }, + { + "epoch": 0.97, + "grad_norm": 5.021753311157227, + "learning_rate": 9.904924623115578e-06, + "loss": 0.3091, + "step": 1450 + }, + { + "epoch": 0.99, + "grad_norm": 4.327148914337158, + "learning_rate": 9.902412060301509e-06, + "loss": 0.2957, + "step": 1475 + }, + { + "epoch": 1.01, + "grad_norm": 4.765533447265625, + "learning_rate": 9.899899497487438e-06, + "loss": 0.2907, + "step": 1500 + }, + { + "epoch": 1.02, + "grad_norm": 4.4516167640686035, + "learning_rate": 9.897386934673367e-06, + "loss": 0.276, + "step": 1525 + }, + { + "epoch": 1.04, + "grad_norm": 4.525915145874023, + "learning_rate": 9.894874371859298e-06, + "loss": 0.2719, + "step": 1550 + }, + { + "epoch": 1.06, + "grad_norm": 4.117883205413818, + "learning_rate": 9.892361809045228e-06, + "loss": 0.275, + "step": 1575 + }, + { + "epoch": 1.07, + "grad_norm": 4.974952697753906, + "learning_rate": 9.889849246231157e-06, + "loss": 0.275, + "step": 1600 + }, + { + "epoch": 1.09, + "grad_norm": 4.063706874847412, + "learning_rate": 9.887336683417086e-06, + "loss": 0.2705, + "step": 1625 + }, + { + "epoch": 1.11, + "grad_norm": 4.389718055725098, + "learning_rate": 9.884824120603015e-06, + "loss": 0.2696, + "step": 1650 + }, + { + "epoch": 1.12, + "grad_norm": 4.082433223724365, + "learning_rate": 9.882311557788945e-06, + "loss": 0.276, + "step": 1675 + }, + { + "epoch": 1.14, + "grad_norm": 4.3471784591674805, + "learning_rate": 9.879798994974876e-06, + "loss": 0.27, + "step": 1700 + }, + { + "epoch": 1.16, + "grad_norm": 3.9222960472106934, + "learning_rate": 9.877286432160805e-06, + "loss": 0.2705, + "step": 1725 + }, + { + "epoch": 1.18, + "grad_norm": 4.525430679321289, + "learning_rate": 9.874773869346734e-06, + "loss": 0.2713, + "step": 1750 + }, + { + "epoch": 1.19, + "grad_norm": 4.413844585418701, + "learning_rate": 9.872261306532664e-06, + "loss": 0.278, + "step": 1775 + }, + { + "epoch": 1.21, + "grad_norm": 4.307147026062012, + "learning_rate": 9.869748743718593e-06, + "loss": 0.269, + "step": 1800 + }, + { + "epoch": 1.23, + "grad_norm": 3.638044834136963, + "learning_rate": 9.867236180904524e-06, + "loss": 0.2651, + "step": 1825 + }, + { + "epoch": 1.24, + "grad_norm": 4.082454681396484, + "learning_rate": 9.864723618090453e-06, + "loss": 0.2601, + "step": 1850 + }, + { + "epoch": 1.26, + "grad_norm": 4.468039512634277, + "learning_rate": 9.862211055276383e-06, + "loss": 0.2583, + "step": 1875 + }, + { + "epoch": 1.28, + "grad_norm": 4.431980609893799, + "learning_rate": 9.859698492462312e-06, + "loss": 0.2587, + "step": 1900 + }, + { + "epoch": 1.29, + "grad_norm": 4.469857692718506, + "learning_rate": 9.857185929648241e-06, + "loss": 0.2604, + "step": 1925 + }, + { + "epoch": 1.31, + "grad_norm": 3.7308578491210938, + "learning_rate": 9.854673366834172e-06, + "loss": 0.257, + "step": 1950 + }, + { + "epoch": 1.33, + "grad_norm": 4.491827964782715, + "learning_rate": 9.852160804020102e-06, + "loss": 0.2518, + "step": 1975 + }, + { + "epoch": 1.34, + "grad_norm": 4.016375541687012, + "learning_rate": 9.849648241206031e-06, + "loss": 0.2568, + "step": 2000 + }, + { + "epoch": 1.34, + "eval_loss": 0.19339896738529205, + "eval_runtime": 523.6956, + "eval_samples_per_second": 2.864, + "eval_steps_per_second": 2.864, + "eval_wer": 28.33619429080043, + "step": 2000 + }, + { + "epoch": 1.36, + "grad_norm": 4.35910701751709, + "learning_rate": 9.84713567839196e-06, + "loss": 0.2582, + "step": 2025 + }, + { + "epoch": 1.38, + "grad_norm": 4.6040825843811035, + "learning_rate": 9.84462311557789e-06, + "loss": 0.2509, + "step": 2050 + }, + { + "epoch": 1.39, + "grad_norm": 4.547247886657715, + "learning_rate": 9.842110552763819e-06, + "loss": 0.2513, + "step": 2075 + }, + { + "epoch": 1.41, + "grad_norm": 4.306642532348633, + "learning_rate": 9.83959798994975e-06, + "loss": 0.2515, + "step": 2100 + }, + { + "epoch": 1.43, + "grad_norm": 4.359508991241455, + "learning_rate": 9.83708542713568e-06, + "loss": 0.2523, + "step": 2125 + }, + { + "epoch": 1.44, + "grad_norm": 3.9355013370513916, + "learning_rate": 9.834572864321609e-06, + "loss": 0.2567, + "step": 2150 + }, + { + "epoch": 1.46, + "grad_norm": 4.31467866897583, + "learning_rate": 9.832060301507538e-06, + "loss": 0.2446, + "step": 2175 + }, + { + "epoch": 1.48, + "grad_norm": 4.036788463592529, + "learning_rate": 9.829547738693467e-06, + "loss": 0.2482, + "step": 2200 + }, + { + "epoch": 1.49, + "grad_norm": 4.066323280334473, + "learning_rate": 9.827035175879398e-06, + "loss": 0.2479, + "step": 2225 + }, + { + "epoch": 1.51, + "grad_norm": 4.225846767425537, + "learning_rate": 9.824522613065328e-06, + "loss": 0.2425, + "step": 2250 + }, + { + "epoch": 1.53, + "grad_norm": 4.04661226272583, + "learning_rate": 9.822010050251257e-06, + "loss": 0.2475, + "step": 2275 + }, + { + "epoch": 1.54, + "grad_norm": 3.9240965843200684, + "learning_rate": 9.819497487437186e-06, + "loss": 0.2465, + "step": 2300 + }, + { + "epoch": 1.56, + "grad_norm": 3.3833847045898438, + "learning_rate": 9.816984924623116e-06, + "loss": 0.2455, + "step": 2325 + }, + { + "epoch": 1.58, + "grad_norm": 4.136446952819824, + "learning_rate": 9.814472361809047e-06, + "loss": 0.2455, + "step": 2350 + }, + { + "epoch": 1.6, + "grad_norm": 4.159794330596924, + "learning_rate": 9.811959798994976e-06, + "loss": 0.2431, + "step": 2375 + }, + { + "epoch": 1.61, + "grad_norm": 4.149526596069336, + "learning_rate": 9.809447236180905e-06, + "loss": 0.2418, + "step": 2400 + }, + { + "epoch": 1.63, + "grad_norm": 4.135148525238037, + "learning_rate": 9.806934673366835e-06, + "loss": 0.2448, + "step": 2425 + }, + { + "epoch": 1.65, + "grad_norm": 4.220961570739746, + "learning_rate": 9.804422110552764e-06, + "loss": 0.245, + "step": 2450 + }, + { + "epoch": 1.66, + "grad_norm": 4.032737731933594, + "learning_rate": 9.801909547738693e-06, + "loss": 0.2409, + "step": 2475 + }, + { + "epoch": 1.68, + "grad_norm": 4.216493129730225, + "learning_rate": 9.799396984924624e-06, + "loss": 0.2422, + "step": 2500 + }, + { + "epoch": 1.7, + "grad_norm": 3.8624844551086426, + "learning_rate": 9.796884422110554e-06, + "loss": 0.2428, + "step": 2525 + }, + { + "epoch": 1.71, + "grad_norm": 4.267681121826172, + "learning_rate": 9.794371859296483e-06, + "loss": 0.2401, + "step": 2550 + }, + { + "epoch": 1.73, + "grad_norm": 4.177845478057861, + "learning_rate": 9.791859296482414e-06, + "loss": 0.2395, + "step": 2575 + }, + { + "epoch": 1.75, + "grad_norm": 3.7445781230926514, + "learning_rate": 9.789346733668342e-06, + "loss": 0.2301, + "step": 2600 + }, + { + "epoch": 1.76, + "grad_norm": 4.072554588317871, + "learning_rate": 9.786834170854273e-06, + "loss": 0.2298, + "step": 2625 + }, + { + "epoch": 1.78, + "grad_norm": 4.0833868980407715, + "learning_rate": 9.784321608040202e-06, + "loss": 0.2403, + "step": 2650 + }, + { + "epoch": 1.8, + "grad_norm": 4.180334091186523, + "learning_rate": 9.781809045226131e-06, + "loss": 0.2375, + "step": 2675 + }, + { + "epoch": 1.81, + "grad_norm": 4.024491786956787, + "learning_rate": 9.77929648241206e-06, + "loss": 0.2361, + "step": 2700 + }, + { + "epoch": 1.83, + "grad_norm": 3.74503231048584, + "learning_rate": 9.77678391959799e-06, + "loss": 0.2336, + "step": 2725 + }, + { + "epoch": 1.85, + "grad_norm": 4.0356903076171875, + "learning_rate": 9.774271356783921e-06, + "loss": 0.232, + "step": 2750 + }, + { + "epoch": 1.86, + "grad_norm": 4.234691619873047, + "learning_rate": 9.77175879396985e-06, + "loss": 0.2359, + "step": 2775 + }, + { + "epoch": 1.88, + "grad_norm": 4.122931003570557, + "learning_rate": 9.76924623115578e-06, + "loss": 0.2311, + "step": 2800 + }, + { + "epoch": 1.9, + "grad_norm": 3.84405255317688, + "learning_rate": 9.766733668341709e-06, + "loss": 0.2296, + "step": 2825 + }, + { + "epoch": 1.91, + "grad_norm": 3.5006816387176514, + "learning_rate": 9.76422110552764e-06, + "loss": 0.2272, + "step": 2850 + }, + { + "epoch": 1.93, + "grad_norm": 4.376037120819092, + "learning_rate": 9.761708542713568e-06, + "loss": 0.2268, + "step": 2875 + }, + { + "epoch": 1.95, + "grad_norm": 4.146139621734619, + "learning_rate": 9.759195979899499e-06, + "loss": 0.2295, + "step": 2900 + }, + { + "epoch": 1.96, + "grad_norm": 3.523825168609619, + "learning_rate": 9.756683417085428e-06, + "loss": 0.2298, + "step": 2925 + }, + { + "epoch": 1.98, + "grad_norm": 4.570476531982422, + "learning_rate": 9.754170854271357e-06, + "loss": 0.2339, + "step": 2950 + }, + { + "epoch": 2.0, + "grad_norm": 3.915780544281006, + "learning_rate": 9.751658291457288e-06, + "loss": 0.2264, + "step": 2975 + }, + { + "epoch": 2.01, + "grad_norm": 4.270796298980713, + "learning_rate": 9.749145728643216e-06, + "loss": 0.2094, + "step": 3000 + }, + { + "epoch": 2.01, + "eval_loss": 0.17294400930404663, + "eval_runtime": 525.8054, + "eval_samples_per_second": 2.853, + "eval_steps_per_second": 2.853, + "eval_wer": 26.60054676076038, + "step": 3000 + }, + { + "epoch": 2.03, + "grad_norm": 4.024322986602783, + "learning_rate": 9.746633165829147e-06, + "loss": 0.2065, + "step": 3025 + }, + { + "epoch": 2.05, + "grad_norm": 3.5738699436187744, + "learning_rate": 9.744120603015076e-06, + "loss": 0.2057, + "step": 3050 + }, + { + "epoch": 2.07, + "grad_norm": 3.299962282180786, + "learning_rate": 9.741608040201006e-06, + "loss": 0.2069, + "step": 3075 + }, + { + "epoch": 2.08, + "grad_norm": 4.297542095184326, + "learning_rate": 9.739095477386935e-06, + "loss": 0.2106, + "step": 3100 + }, + { + "epoch": 2.1, + "grad_norm": 3.977832317352295, + "learning_rate": 9.736582914572866e-06, + "loss": 0.2088, + "step": 3125 + }, + { + "epoch": 2.12, + "grad_norm": 4.090686798095703, + "learning_rate": 9.734070351758794e-06, + "loss": 0.2093, + "step": 3150 + }, + { + "epoch": 2.13, + "grad_norm": 4.0888848304748535, + "learning_rate": 9.731557788944725e-06, + "loss": 0.2095, + "step": 3175 + }, + { + "epoch": 2.15, + "grad_norm": 3.4452478885650635, + "learning_rate": 9.729045226130654e-06, + "loss": 0.205, + "step": 3200 + }, + { + "epoch": 2.17, + "grad_norm": 4.072412967681885, + "learning_rate": 9.726532663316583e-06, + "loss": 0.2, + "step": 3225 + }, + { + "epoch": 2.18, + "grad_norm": 3.86810564994812, + "learning_rate": 9.724020100502514e-06, + "loss": 0.2099, + "step": 3250 + }, + { + "epoch": 2.2, + "grad_norm": 3.9418833255767822, + "learning_rate": 9.721507537688444e-06, + "loss": 0.2045, + "step": 3275 + }, + { + "epoch": 2.22, + "grad_norm": 4.120280742645264, + "learning_rate": 9.718994974874373e-06, + "loss": 0.201, + "step": 3300 + }, + { + "epoch": 2.23, + "grad_norm": 3.843355655670166, + "learning_rate": 9.716482412060302e-06, + "loss": 0.2024, + "step": 3325 + }, + { + "epoch": 2.25, + "grad_norm": 4.146347522735596, + "learning_rate": 9.713969849246232e-06, + "loss": 0.2014, + "step": 3350 + }, + { + "epoch": 2.27, + "grad_norm": 3.8597683906555176, + "learning_rate": 9.711457286432163e-06, + "loss": 0.2038, + "step": 3375 + }, + { + "epoch": 2.28, + "grad_norm": 3.2864789962768555, + "learning_rate": 9.708944723618092e-06, + "loss": 0.2055, + "step": 3400 + }, + { + "epoch": 2.3, + "grad_norm": 3.3450353145599365, + "learning_rate": 9.706432160804021e-06, + "loss": 0.1973, + "step": 3425 + }, + { + "epoch": 2.32, + "grad_norm": 3.5855422019958496, + "learning_rate": 9.70391959798995e-06, + "loss": 0.1928, + "step": 3450 + }, + { + "epoch": 2.33, + "grad_norm": 3.5417439937591553, + "learning_rate": 9.70140703517588e-06, + "loss": 0.2011, + "step": 3475 + }, + { + "epoch": 2.35, + "grad_norm": 3.7623558044433594, + "learning_rate": 9.698894472361809e-06, + "loss": 0.1978, + "step": 3500 + }, + { + "epoch": 2.37, + "grad_norm": 4.645803928375244, + "learning_rate": 9.69638190954774e-06, + "loss": 0.1977, + "step": 3525 + }, + { + "epoch": 2.38, + "grad_norm": 3.9017233848571777, + "learning_rate": 9.69386934673367e-06, + "loss": 0.2034, + "step": 3550 + }, + { + "epoch": 2.4, + "grad_norm": 4.497140884399414, + "learning_rate": 9.691356783919599e-06, + "loss": 0.1999, + "step": 3575 + }, + { + "epoch": 2.42, + "grad_norm": 3.3209054470062256, + "learning_rate": 9.688844221105528e-06, + "loss": 0.1916, + "step": 3600 + }, + { + "epoch": 2.43, + "grad_norm": 3.5207507610321045, + "learning_rate": 9.686331658291457e-06, + "loss": 0.1997, + "step": 3625 + }, + { + "epoch": 2.45, + "grad_norm": 3.8511710166931152, + "learning_rate": 9.683819095477388e-06, + "loss": 0.1922, + "step": 3650 + }, + { + "epoch": 2.47, + "grad_norm": 3.6267056465148926, + "learning_rate": 9.681306532663318e-06, + "loss": 0.2002, + "step": 3675 + }, + { + "epoch": 2.48, + "grad_norm": 3.7468578815460205, + "learning_rate": 9.678793969849247e-06, + "loss": 0.1983, + "step": 3700 + }, + { + "epoch": 2.5, + "grad_norm": 3.8952009677886963, + "learning_rate": 9.676281407035176e-06, + "loss": 0.1944, + "step": 3725 + }, + { + "epoch": 2.52, + "grad_norm": 3.952911615371704, + "learning_rate": 9.673768844221106e-06, + "loss": 0.1971, + "step": 3750 + }, + { + "epoch": 2.54, + "grad_norm": 3.7936244010925293, + "learning_rate": 9.671256281407035e-06, + "loss": 0.2007, + "step": 3775 + }, + { + "epoch": 2.55, + "grad_norm": 3.7978813648223877, + "learning_rate": 9.668743718592966e-06, + "loss": 0.1987, + "step": 3800 + }, + { + "epoch": 2.57, + "grad_norm": 4.050068378448486, + "learning_rate": 9.666231155778895e-06, + "loss": 0.2025, + "step": 3825 + }, + { + "epoch": 2.59, + "grad_norm": 4.271592617034912, + "learning_rate": 9.663718592964825e-06, + "loss": 0.1926, + "step": 3850 + }, + { + "epoch": 2.6, + "grad_norm": 4.0325398445129395, + "learning_rate": 9.661206030150754e-06, + "loss": 0.1984, + "step": 3875 + }, + { + "epoch": 2.62, + "grad_norm": 3.879234790802002, + "learning_rate": 9.658693467336683e-06, + "loss": 0.1992, + "step": 3900 + }, + { + "epoch": 2.64, + "grad_norm": 3.843987226486206, + "learning_rate": 9.656180904522614e-06, + "loss": 0.1946, + "step": 3925 + }, + { + "epoch": 2.65, + "grad_norm": 4.271450996398926, + "learning_rate": 9.653668341708544e-06, + "loss": 0.1981, + "step": 3950 + }, + { + "epoch": 2.67, + "grad_norm": 3.955652952194214, + "learning_rate": 9.651155778894473e-06, + "loss": 0.1889, + "step": 3975 + }, + { + "epoch": 2.69, + "grad_norm": 3.9547550678253174, + "learning_rate": 9.648643216080404e-06, + "loss": 0.1915, + "step": 4000 + }, + { + "epoch": 2.69, + "eval_loss": 0.16053272783756256, + "eval_runtime": 535.5172, + "eval_samples_per_second": 2.801, + "eval_steps_per_second": 2.801, + "eval_wer": 24.140123339055247, + "step": 4000 + }, + { + "epoch": 2.7, + "grad_norm": 3.53666090965271, + "learning_rate": 9.646130653266332e-06, + "loss": 0.1921, + "step": 4025 + }, + { + "epoch": 2.72, + "grad_norm": 3.416452407836914, + "learning_rate": 9.643618090452263e-06, + "loss": 0.1968, + "step": 4050 + }, + { + "epoch": 2.74, + "grad_norm": 3.490684747695923, + "learning_rate": 9.641105527638192e-06, + "loss": 0.1857, + "step": 4075 + }, + { + "epoch": 2.75, + "grad_norm": 3.653369426727295, + "learning_rate": 9.638592964824121e-06, + "loss": 0.193, + "step": 4100 + }, + { + "epoch": 2.77, + "grad_norm": 4.1537275314331055, + "learning_rate": 9.63608040201005e-06, + "loss": 0.1925, + "step": 4125 + }, + { + "epoch": 2.79, + "grad_norm": 3.6067721843719482, + "learning_rate": 9.63356783919598e-06, + "loss": 0.1897, + "step": 4150 + }, + { + "epoch": 2.8, + "grad_norm": 3.677155017852783, + "learning_rate": 9.63105527638191e-06, + "loss": 0.1874, + "step": 4175 + }, + { + "epoch": 2.82, + "grad_norm": 4.1185173988342285, + "learning_rate": 9.62854271356784e-06, + "loss": 0.1939, + "step": 4200 + }, + { + "epoch": 2.84, + "grad_norm": 4.170986175537109, + "learning_rate": 9.62603015075377e-06, + "loss": 0.1895, + "step": 4225 + }, + { + "epoch": 2.85, + "grad_norm": 3.946958065032959, + "learning_rate": 9.623517587939699e-06, + "loss": 0.1894, + "step": 4250 + }, + { + "epoch": 2.87, + "grad_norm": 3.661827802658081, + "learning_rate": 9.62100502512563e-06, + "loss": 0.194, + "step": 4275 + }, + { + "epoch": 2.89, + "grad_norm": 4.1762309074401855, + "learning_rate": 9.618492462311558e-06, + "loss": 0.1952, + "step": 4300 + }, + { + "epoch": 2.9, + "grad_norm": 3.9049060344696045, + "learning_rate": 9.615979899497489e-06, + "loss": 0.1944, + "step": 4325 + }, + { + "epoch": 2.92, + "grad_norm": 4.298369407653809, + "learning_rate": 9.613467336683418e-06, + "loss": 0.1931, + "step": 4350 + }, + { + "epoch": 2.94, + "grad_norm": 3.9322237968444824, + "learning_rate": 9.610954773869347e-06, + "loss": 0.1906, + "step": 4375 + }, + { + "epoch": 2.96, + "grad_norm": 4.344098091125488, + "learning_rate": 9.608442211055277e-06, + "loss": 0.1883, + "step": 4400 + }, + { + "epoch": 2.97, + "grad_norm": 4.100615501403809, + "learning_rate": 9.605929648241206e-06, + "loss": 0.1856, + "step": 4425 + }, + { + "epoch": 2.99, + "grad_norm": 3.869065284729004, + "learning_rate": 9.603417085427137e-06, + "loss": 0.1857, + "step": 4450 + }, + { + "epoch": 3.01, + "grad_norm": 3.521681547164917, + "learning_rate": 9.600904522613066e-06, + "loss": 0.1788, + "step": 4475 + }, + { + "epoch": 3.02, + "grad_norm": 3.541217803955078, + "learning_rate": 9.598391959798996e-06, + "loss": 0.1695, + "step": 4500 + }, + { + "epoch": 3.04, + "grad_norm": 3.708303689956665, + "learning_rate": 9.595879396984925e-06, + "loss": 0.1672, + "step": 4525 + }, + { + "epoch": 3.06, + "grad_norm": 3.6521341800689697, + "learning_rate": 9.593366834170856e-06, + "loss": 0.1683, + "step": 4550 + }, + { + "epoch": 3.07, + "grad_norm": 3.3679146766662598, + "learning_rate": 9.590854271356784e-06, + "loss": 0.1657, + "step": 4575 + }, + { + "epoch": 3.09, + "grad_norm": 3.454932689666748, + "learning_rate": 9.588341708542715e-06, + "loss": 0.1725, + "step": 4600 + }, + { + "epoch": 3.11, + "grad_norm": 3.9628400802612305, + "learning_rate": 9.585829145728644e-06, + "loss": 0.1684, + "step": 4625 + }, + { + "epoch": 3.12, + "grad_norm": 3.66259503364563, + "learning_rate": 9.583316582914573e-06, + "loss": 0.171, + "step": 4650 + }, + { + "epoch": 3.14, + "grad_norm": 3.386406421661377, + "learning_rate": 9.580804020100504e-06, + "loss": 0.1677, + "step": 4675 + }, + { + "epoch": 3.16, + "grad_norm": 3.990445613861084, + "learning_rate": 9.578291457286432e-06, + "loss": 0.1704, + "step": 4700 + }, + { + "epoch": 3.17, + "grad_norm": 3.9500279426574707, + "learning_rate": 9.575778894472363e-06, + "loss": 0.1668, + "step": 4725 + }, + { + "epoch": 3.19, + "grad_norm": 3.404106378555298, + "learning_rate": 9.573266331658292e-06, + "loss": 0.1697, + "step": 4750 + }, + { + "epoch": 3.21, + "grad_norm": 3.4902865886688232, + "learning_rate": 9.570753768844222e-06, + "loss": 0.1655, + "step": 4775 + }, + { + "epoch": 3.22, + "grad_norm": 3.1782569885253906, + "learning_rate": 9.568241206030151e-06, + "loss": 0.1671, + "step": 4800 + }, + { + "epoch": 3.24, + "grad_norm": 3.9952385425567627, + "learning_rate": 9.565728643216082e-06, + "loss": 0.1731, + "step": 4825 + }, + { + "epoch": 3.26, + "grad_norm": 3.627669334411621, + "learning_rate": 9.563216080402011e-06, + "loss": 0.1694, + "step": 4850 + }, + { + "epoch": 3.27, + "grad_norm": 4.337616443634033, + "learning_rate": 9.56070351758794e-06, + "loss": 0.1677, + "step": 4875 + }, + { + "epoch": 3.29, + "grad_norm": 3.4955434799194336, + "learning_rate": 9.55819095477387e-06, + "loss": 0.1731, + "step": 4900 + }, + { + "epoch": 3.31, + "grad_norm": 3.6910362243652344, + "learning_rate": 9.5556783919598e-06, + "loss": 0.1626, + "step": 4925 + }, + { + "epoch": 3.32, + "grad_norm": 3.5826051235198975, + "learning_rate": 9.55316582914573e-06, + "loss": 0.1613, + "step": 4950 + }, + { + "epoch": 3.34, + "grad_norm": 4.079131603240967, + "learning_rate": 9.550653266331658e-06, + "loss": 0.1664, + "step": 4975 + }, + { + "epoch": 3.36, + "grad_norm": 3.497771978378296, + "learning_rate": 9.548140703517589e-06, + "loss": 0.1721, + "step": 5000 + }, + { + "epoch": 3.36, + "eval_loss": 0.15562395751476288, + "eval_runtime": 528.9765, + "eval_samples_per_second": 2.836, + "eval_steps_per_second": 2.836, + "eval_wer": 24.051115773412167, + "step": 5000 + }, + { + "epoch": 3.37, + "grad_norm": 3.598926544189453, + "learning_rate": 9.545628140703518e-06, + "loss": 0.1637, + "step": 5025 + }, + { + "epoch": 3.39, + "grad_norm": 3.5462443828582764, + "learning_rate": 9.543115577889448e-06, + "loss": 0.1681, + "step": 5050 + }, + { + "epoch": 3.41, + "grad_norm": 4.153628349304199, + "learning_rate": 9.540603015075379e-06, + "loss": 0.1691, + "step": 5075 + }, + { + "epoch": 3.43, + "grad_norm": 3.390615940093994, + "learning_rate": 9.538090452261308e-06, + "loss": 0.1687, + "step": 5100 + }, + { + "epoch": 3.44, + "grad_norm": 3.897580862045288, + "learning_rate": 9.535577889447237e-06, + "loss": 0.1629, + "step": 5125 + }, + { + "epoch": 3.46, + "grad_norm": 3.2517032623291016, + "learning_rate": 9.533065326633166e-06, + "loss": 0.1683, + "step": 5150 + }, + { + "epoch": 3.48, + "grad_norm": 3.3776931762695312, + "learning_rate": 9.530552763819096e-06, + "loss": 0.165, + "step": 5175 + }, + { + "epoch": 3.49, + "grad_norm": 3.77608323097229, + "learning_rate": 9.528040201005025e-06, + "loss": 0.1685, + "step": 5200 + }, + { + "epoch": 3.51, + "grad_norm": 3.482889413833618, + "learning_rate": 9.525527638190956e-06, + "loss": 0.1691, + "step": 5225 + }, + { + "epoch": 3.53, + "grad_norm": 3.990103006362915, + "learning_rate": 9.523015075376885e-06, + "loss": 0.167, + "step": 5250 + }, + { + "epoch": 3.54, + "grad_norm": 3.7123677730560303, + "learning_rate": 9.520502512562815e-06, + "loss": 0.1643, + "step": 5275 + }, + { + "epoch": 3.56, + "grad_norm": 3.493159532546997, + "learning_rate": 9.517989949748744e-06, + "loss": 0.1663, + "step": 5300 + }, + { + "epoch": 3.58, + "grad_norm": 3.689345598220825, + "learning_rate": 9.515477386934673e-06, + "loss": 0.1643, + "step": 5325 + }, + { + "epoch": 3.59, + "grad_norm": 3.335120439529419, + "learning_rate": 9.512964824120604e-06, + "loss": 0.164, + "step": 5350 + }, + { + "epoch": 3.61, + "grad_norm": 3.7256486415863037, + "learning_rate": 9.510452261306534e-06, + "loss": 0.1632, + "step": 5375 + }, + { + "epoch": 3.63, + "grad_norm": 4.001396656036377, + "learning_rate": 9.507939698492463e-06, + "loss": 0.1655, + "step": 5400 + }, + { + "epoch": 3.64, + "grad_norm": 3.6571249961853027, + "learning_rate": 9.505427135678392e-06, + "loss": 0.1695, + "step": 5425 + }, + { + "epoch": 3.66, + "grad_norm": 3.379574775695801, + "learning_rate": 9.502914572864322e-06, + "loss": 0.1642, + "step": 5450 + }, + { + "epoch": 3.68, + "grad_norm": 4.23717737197876, + "learning_rate": 9.500402010050253e-06, + "loss": 0.1662, + "step": 5475 + }, + { + "epoch": 3.69, + "grad_norm": 3.5864949226379395, + "learning_rate": 9.497889447236182e-06, + "loss": 0.1658, + "step": 5500 + }, + { + "epoch": 3.71, + "grad_norm": 3.587003707885742, + "learning_rate": 9.495376884422111e-06, + "loss": 0.1639, + "step": 5525 + }, + { + "epoch": 3.73, + "grad_norm": 4.125646591186523, + "learning_rate": 9.49286432160804e-06, + "loss": 0.1678, + "step": 5550 + }, + { + "epoch": 3.74, + "grad_norm": 3.5048937797546387, + "learning_rate": 9.49035175879397e-06, + "loss": 0.1643, + "step": 5575 + }, + { + "epoch": 3.76, + "grad_norm": 3.347818613052368, + "learning_rate": 9.4878391959799e-06, + "loss": 0.1589, + "step": 5600 + }, + { + "epoch": 3.78, + "grad_norm": 3.8734469413757324, + "learning_rate": 9.48532663316583e-06, + "loss": 0.1675, + "step": 5625 + }, + { + "epoch": 3.79, + "grad_norm": 3.782532215118408, + "learning_rate": 9.48281407035176e-06, + "loss": 0.1619, + "step": 5650 + }, + { + "epoch": 3.81, + "grad_norm": 3.64475417137146, + "learning_rate": 9.480301507537689e-06, + "loss": 0.1628, + "step": 5675 + }, + { + "epoch": 3.83, + "grad_norm": 3.863438606262207, + "learning_rate": 9.47778894472362e-06, + "loss": 0.1616, + "step": 5700 + }, + { + "epoch": 3.84, + "grad_norm": 3.8744213581085205, + "learning_rate": 9.475276381909548e-06, + "loss": 0.165, + "step": 5725 + }, + { + "epoch": 3.86, + "grad_norm": 3.275042772293091, + "learning_rate": 9.472763819095479e-06, + "loss": 0.1641, + "step": 5750 + }, + { + "epoch": 3.88, + "grad_norm": 4.030235290527344, + "learning_rate": 9.470251256281408e-06, + "loss": 0.1624, + "step": 5775 + }, + { + "epoch": 3.9, + "grad_norm": 3.390822649002075, + "learning_rate": 9.467738693467337e-06, + "loss": 0.1598, + "step": 5800 + }, + { + "epoch": 3.91, + "grad_norm": 3.500739812850952, + "learning_rate": 9.465226130653267e-06, + "loss": 0.1595, + "step": 5825 + }, + { + "epoch": 3.93, + "grad_norm": 3.5276894569396973, + "learning_rate": 9.462713567839196e-06, + "loss": 0.1669, + "step": 5850 + }, + { + "epoch": 3.95, + "grad_norm": 3.8741538524627686, + "learning_rate": 9.460201005025127e-06, + "loss": 0.1593, + "step": 5875 + }, + { + "epoch": 3.96, + "grad_norm": 3.9853954315185547, + "learning_rate": 9.457688442211056e-06, + "loss": 0.1586, + "step": 5900 + }, + { + "epoch": 3.98, + "grad_norm": 4.14481782913208, + "learning_rate": 9.455175879396986e-06, + "loss": 0.165, + "step": 5925 + }, + { + "epoch": 4.0, + "grad_norm": 3.37463641166687, + "learning_rate": 9.452663316582915e-06, + "loss": 0.1592, + "step": 5950 + }, + { + "epoch": 4.01, + "grad_norm": 3.4719796180725098, + "learning_rate": 9.450150753768846e-06, + "loss": 0.1461, + "step": 5975 + }, + { + "epoch": 4.03, + "grad_norm": 2.9724063873291016, + "learning_rate": 9.447638190954774e-06, + "loss": 0.1458, + "step": 6000 + }, + { + "epoch": 4.03, + "eval_loss": 0.152227982878685, + "eval_runtime": 522.7909, + "eval_samples_per_second": 2.869, + "eval_steps_per_second": 2.869, + "eval_wer": 22.894017420052133, + "step": 6000 + }, + { + "epoch": 4.05, + "grad_norm": 3.552699565887451, + "learning_rate": 9.445125628140705e-06, + "loss": 0.1449, + "step": 6025 + }, + { + "epoch": 4.06, + "grad_norm": 3.233750343322754, + "learning_rate": 9.442613065326634e-06, + "loss": 0.1377, + "step": 6050 + }, + { + "epoch": 4.08, + "grad_norm": 3.8562185764312744, + "learning_rate": 9.440100502512563e-06, + "loss": 0.143, + "step": 6075 + }, + { + "epoch": 4.1, + "grad_norm": 3.666841745376587, + "learning_rate": 9.437587939698494e-06, + "loss": 0.1419, + "step": 6100 + }, + { + "epoch": 4.11, + "grad_norm": 3.7837460041046143, + "learning_rate": 9.435075376884422e-06, + "loss": 0.1421, + "step": 6125 + }, + { + "epoch": 4.13, + "grad_norm": 3.5664258003234863, + "learning_rate": 9.432562814070353e-06, + "loss": 0.1418, + "step": 6150 + }, + { + "epoch": 4.15, + "grad_norm": 3.5211708545684814, + "learning_rate": 9.430050251256282e-06, + "loss": 0.1443, + "step": 6175 + }, + { + "epoch": 4.16, + "grad_norm": 3.563143253326416, + "learning_rate": 9.427537688442212e-06, + "loss": 0.1425, + "step": 6200 + }, + { + "epoch": 4.18, + "grad_norm": 3.902482748031616, + "learning_rate": 9.425025125628141e-06, + "loss": 0.1443, + "step": 6225 + }, + { + "epoch": 4.2, + "grad_norm": 3.4230475425720215, + "learning_rate": 9.422512562814072e-06, + "loss": 0.1478, + "step": 6250 + }, + { + "epoch": 4.21, + "grad_norm": 3.680765390396118, + "learning_rate": 9.42e-06, + "loss": 0.1415, + "step": 6275 + }, + { + "epoch": 4.23, + "grad_norm": 3.4559743404388428, + "learning_rate": 9.41748743718593e-06, + "loss": 0.1438, + "step": 6300 + }, + { + "epoch": 4.25, + "grad_norm": 3.4006574153900146, + "learning_rate": 9.41497487437186e-06, + "loss": 0.1458, + "step": 6325 + }, + { + "epoch": 4.26, + "grad_norm": 3.941504716873169, + "learning_rate": 9.41246231155779e-06, + "loss": 0.1423, + "step": 6350 + }, + { + "epoch": 4.28, + "grad_norm": 3.3677427768707275, + "learning_rate": 9.40994974874372e-06, + "loss": 0.1397, + "step": 6375 + }, + { + "epoch": 4.3, + "grad_norm": 3.6690163612365723, + "learning_rate": 9.407437185929648e-06, + "loss": 0.1397, + "step": 6400 + }, + { + "epoch": 4.31, + "grad_norm": 3.7739992141723633, + "learning_rate": 9.404924623115579e-06, + "loss": 0.1419, + "step": 6425 + }, + { + "epoch": 4.33, + "grad_norm": 3.5713489055633545, + "learning_rate": 9.402412060301508e-06, + "loss": 0.1442, + "step": 6450 + }, + { + "epoch": 4.35, + "grad_norm": 3.625218629837036, + "learning_rate": 9.399899497487438e-06, + "loss": 0.151, + "step": 6475 + }, + { + "epoch": 4.37, + "grad_norm": 3.4426498413085938, + "learning_rate": 9.397386934673369e-06, + "loss": 0.1418, + "step": 6500 + }, + { + "epoch": 4.38, + "grad_norm": 3.476062774658203, + "learning_rate": 9.394874371859298e-06, + "loss": 0.1434, + "step": 6525 + }, + { + "epoch": 4.4, + "grad_norm": 3.774726629257202, + "learning_rate": 9.392361809045227e-06, + "loss": 0.1424, + "step": 6550 + }, + { + "epoch": 4.42, + "grad_norm": 3.63863205909729, + "learning_rate": 9.389849246231157e-06, + "loss": 0.1474, + "step": 6575 + }, + { + "epoch": 4.43, + "grad_norm": 3.746408700942993, + "learning_rate": 9.387336683417086e-06, + "loss": 0.1429, + "step": 6600 + }, + { + "epoch": 4.45, + "grad_norm": 3.290235757827759, + "learning_rate": 9.384824120603015e-06, + "loss": 0.1388, + "step": 6625 + }, + { + "epoch": 4.47, + "grad_norm": 3.706719160079956, + "learning_rate": 9.382311557788946e-06, + "loss": 0.1412, + "step": 6650 + }, + { + "epoch": 4.48, + "grad_norm": 3.4738802909851074, + "learning_rate": 9.379798994974874e-06, + "loss": 0.1445, + "step": 6675 + }, + { + "epoch": 4.5, + "grad_norm": 3.6719963550567627, + "learning_rate": 9.377286432160805e-06, + "loss": 0.1433, + "step": 6700 + }, + { + "epoch": 4.52, + "grad_norm": 3.193850517272949, + "learning_rate": 9.374773869346734e-06, + "loss": 0.1419, + "step": 6725 + }, + { + "epoch": 4.53, + "grad_norm": 3.3374290466308594, + "learning_rate": 9.372261306532664e-06, + "loss": 0.1417, + "step": 6750 + }, + { + "epoch": 4.55, + "grad_norm": 3.652451515197754, + "learning_rate": 9.369748743718595e-06, + "loss": 0.1428, + "step": 6775 + }, + { + "epoch": 4.57, + "grad_norm": 3.366788148880005, + "learning_rate": 9.367236180904524e-06, + "loss": 0.142, + "step": 6800 + }, + { + "epoch": 4.58, + "grad_norm": 3.5370426177978516, + "learning_rate": 9.364723618090453e-06, + "loss": 0.1439, + "step": 6825 + }, + { + "epoch": 4.6, + "grad_norm": 4.256290912628174, + "learning_rate": 9.362211055276383e-06, + "loss": 0.146, + "step": 6850 + }, + { + "epoch": 4.62, + "grad_norm": 3.593064308166504, + "learning_rate": 9.359698492462312e-06, + "loss": 0.1448, + "step": 6875 + }, + { + "epoch": 4.63, + "grad_norm": 3.459979295730591, + "learning_rate": 9.357185929648241e-06, + "loss": 0.1461, + "step": 6900 + }, + { + "epoch": 4.65, + "grad_norm": 4.026700973510742, + "learning_rate": 9.354673366834172e-06, + "loss": 0.1456, + "step": 6925 + }, + { + "epoch": 4.67, + "grad_norm": 3.2010562419891357, + "learning_rate": 9.352160804020101e-06, + "loss": 0.1411, + "step": 6950 + }, + { + "epoch": 4.68, + "grad_norm": 3.5833041667938232, + "learning_rate": 9.34964824120603e-06, + "loss": 0.1396, + "step": 6975 + }, + { + "epoch": 4.7, + "grad_norm": 3.518792152404785, + "learning_rate": 9.34713567839196e-06, + "loss": 0.1451, + "step": 7000 + }, + { + "epoch": 4.7, + "eval_loss": 0.1500084400177002, + "eval_runtime": 524.0746, + "eval_samples_per_second": 2.862, + "eval_steps_per_second": 2.862, + "eval_wer": 23.04024413503719, + "step": 7000 + }, + { + "epoch": 4.72, + "grad_norm": 3.8232195377349854, + "learning_rate": 9.34462311557789e-06, + "loss": 0.143, + "step": 7025 + }, + { + "epoch": 4.73, + "grad_norm": 3.682361125946045, + "learning_rate": 9.34211055276382e-06, + "loss": 0.1398, + "step": 7050 + }, + { + "epoch": 4.75, + "grad_norm": 3.5071334838867188, + "learning_rate": 9.33959798994975e-06, + "loss": 0.1367, + "step": 7075 + }, + { + "epoch": 4.77, + "grad_norm": 3.8157904148101807, + "learning_rate": 9.337085427135679e-06, + "loss": 0.1455, + "step": 7100 + }, + { + "epoch": 4.79, + "grad_norm": 3.548407793045044, + "learning_rate": 9.334572864321608e-06, + "loss": 0.1425, + "step": 7125 + }, + { + "epoch": 4.8, + "grad_norm": 3.420259952545166, + "learning_rate": 9.332060301507538e-06, + "loss": 0.1443, + "step": 7150 + }, + { + "epoch": 4.82, + "grad_norm": 3.3060126304626465, + "learning_rate": 9.329547738693469e-06, + "loss": 0.1417, + "step": 7175 + }, + { + "epoch": 4.84, + "grad_norm": 3.425542116165161, + "learning_rate": 9.327035175879398e-06, + "loss": 0.1357, + "step": 7200 + }, + { + "epoch": 4.85, + "grad_norm": 3.7544076442718506, + "learning_rate": 9.324522613065327e-06, + "loss": 0.1399, + "step": 7225 + }, + { + "epoch": 4.87, + "grad_norm": 3.5834131240844727, + "learning_rate": 9.322010050251257e-06, + "loss": 0.1399, + "step": 7250 + }, + { + "epoch": 4.89, + "grad_norm": 3.256308078765869, + "learning_rate": 9.319497487437186e-06, + "loss": 0.1443, + "step": 7275 + }, + { + "epoch": 4.9, + "grad_norm": 3.996049404144287, + "learning_rate": 9.316984924623115e-06, + "loss": 0.1372, + "step": 7300 + }, + { + "epoch": 4.92, + "grad_norm": 3.7429847717285156, + "learning_rate": 9.314472361809046e-06, + "loss": 0.1436, + "step": 7325 + }, + { + "epoch": 4.94, + "grad_norm": 3.6226377487182617, + "learning_rate": 9.311959798994976e-06, + "loss": 0.1382, + "step": 7350 + }, + { + "epoch": 4.95, + "grad_norm": 4.194127082824707, + "learning_rate": 9.309447236180905e-06, + "loss": 0.1469, + "step": 7375 + }, + { + "epoch": 4.97, + "grad_norm": 3.6666419506073, + "learning_rate": 9.306934673366836e-06, + "loss": 0.1377, + "step": 7400 + }, + { + "epoch": 4.99, + "grad_norm": 3.43436861038208, + "learning_rate": 9.304422110552764e-06, + "loss": 0.1421, + "step": 7425 + }, + { + "epoch": 5.0, + "grad_norm": 3.158731698989868, + "learning_rate": 9.301909547738695e-06, + "loss": 0.133, + "step": 7450 + }, + { + "epoch": 5.02, + "grad_norm": 3.21460223197937, + "learning_rate": 9.299396984924624e-06, + "loss": 0.1247, + "step": 7475 + }, + { + "epoch": 5.04, + "grad_norm": 3.4981017112731934, + "learning_rate": 9.296884422110553e-06, + "loss": 0.1268, + "step": 7500 + }, + { + "epoch": 5.05, + "grad_norm": 3.081725835800171, + "learning_rate": 9.294371859296483e-06, + "loss": 0.1174, + "step": 7525 + }, + { + "epoch": 5.07, + "grad_norm": 3.652824878692627, + "learning_rate": 9.291859296482412e-06, + "loss": 0.1176, + "step": 7550 + }, + { + "epoch": 5.09, + "grad_norm": 3.3587093353271484, + "learning_rate": 9.289346733668343e-06, + "loss": 0.1232, + "step": 7575 + }, + { + "epoch": 5.1, + "grad_norm": 3.242204189300537, + "learning_rate": 9.286834170854272e-06, + "loss": 0.1251, + "step": 7600 + }, + { + "epoch": 5.12, + "grad_norm": 3.507436990737915, + "learning_rate": 9.284321608040202e-06, + "loss": 0.1204, + "step": 7625 + }, + { + "epoch": 5.14, + "grad_norm": 3.432806968688965, + "learning_rate": 9.281809045226131e-06, + "loss": 0.1206, + "step": 7650 + }, + { + "epoch": 5.15, + "grad_norm": 3.6819324493408203, + "learning_rate": 9.279296482412062e-06, + "loss": 0.1247, + "step": 7675 + }, + { + "epoch": 5.17, + "grad_norm": 3.679011821746826, + "learning_rate": 9.27678391959799e-06, + "loss": 0.1237, + "step": 7700 + }, + { + "epoch": 5.19, + "grad_norm": 3.618797540664673, + "learning_rate": 9.27427135678392e-06, + "loss": 0.1242, + "step": 7725 + }, + { + "epoch": 5.2, + "grad_norm": 3.7126893997192383, + "learning_rate": 9.27175879396985e-06, + "loss": 0.1226, + "step": 7750 + }, + { + "epoch": 5.22, + "grad_norm": 3.2769668102264404, + "learning_rate": 9.26924623115578e-06, + "loss": 0.1265, + "step": 7775 + }, + { + "epoch": 5.24, + "grad_norm": 3.2427358627319336, + "learning_rate": 9.26673366834171e-06, + "loss": 0.1262, + "step": 7800 + }, + { + "epoch": 5.26, + "grad_norm": 3.112070322036743, + "learning_rate": 9.264221105527638e-06, + "loss": 0.1267, + "step": 7825 + }, + { + "epoch": 5.27, + "grad_norm": 3.590747833251953, + "learning_rate": 9.261708542713569e-06, + "loss": 0.124, + "step": 7850 + }, + { + "epoch": 5.29, + "grad_norm": 3.6226933002471924, + "learning_rate": 9.259195979899498e-06, + "loss": 0.1235, + "step": 7875 + }, + { + "epoch": 5.31, + "grad_norm": 3.0846543312072754, + "learning_rate": 9.256683417085428e-06, + "loss": 0.1203, + "step": 7900 + }, + { + "epoch": 5.32, + "grad_norm": 3.4763987064361572, + "learning_rate": 9.254170854271357e-06, + "loss": 0.1238, + "step": 7925 + }, + { + "epoch": 5.34, + "grad_norm": 3.2179813385009766, + "learning_rate": 9.251658291457288e-06, + "loss": 0.1247, + "step": 7950 + }, + { + "epoch": 5.36, + "grad_norm": 3.537705659866333, + "learning_rate": 9.249145728643217e-06, + "loss": 0.1286, + "step": 7975 + }, + { + "epoch": 5.37, + "grad_norm": 3.482163667678833, + "learning_rate": 9.246633165829147e-06, + "loss": 0.1267, + "step": 8000 + }, + { + "epoch": 5.37, + "eval_loss": 0.1509549915790558, + "eval_runtime": 533.7159, + "eval_samples_per_second": 2.81, + "eval_steps_per_second": 2.81, + "eval_wer": 23.20554389980291, + "step": 8000 + }, + { + "epoch": 5.39, + "grad_norm": 3.4809768199920654, + "learning_rate": 9.244120603015076e-06, + "loss": 0.1214, + "step": 8025 + }, + { + "epoch": 5.41, + "grad_norm": 2.904242992401123, + "learning_rate": 9.241608040201005e-06, + "loss": 0.1215, + "step": 8050 + }, + { + "epoch": 5.42, + "grad_norm": 3.497542142868042, + "learning_rate": 9.239095477386936e-06, + "loss": 0.1262, + "step": 8075 + }, + { + "epoch": 5.44, + "grad_norm": 3.053152561187744, + "learning_rate": 9.236582914572864e-06, + "loss": 0.1191, + "step": 8100 + }, + { + "epoch": 5.46, + "grad_norm": 3.6195197105407715, + "learning_rate": 9.234070351758795e-06, + "loss": 0.1259, + "step": 8125 + }, + { + "epoch": 5.47, + "grad_norm": 3.1742255687713623, + "learning_rate": 9.231557788944724e-06, + "loss": 0.1268, + "step": 8150 + }, + { + "epoch": 5.49, + "grad_norm": 3.8259294033050537, + "learning_rate": 9.229045226130654e-06, + "loss": 0.1264, + "step": 8175 + }, + { + "epoch": 5.51, + "grad_norm": 3.4226787090301514, + "learning_rate": 9.226532663316585e-06, + "loss": 0.1231, + "step": 8200 + }, + { + "epoch": 5.52, + "grad_norm": 3.2176647186279297, + "learning_rate": 9.224020100502514e-06, + "loss": 0.1225, + "step": 8225 + }, + { + "epoch": 5.54, + "grad_norm": 3.659058094024658, + "learning_rate": 9.221507537688443e-06, + "loss": 0.1234, + "step": 8250 + }, + { + "epoch": 5.56, + "grad_norm": 3.3935298919677734, + "learning_rate": 9.218994974874373e-06, + "loss": 0.1221, + "step": 8275 + }, + { + "epoch": 5.57, + "grad_norm": 3.6841583251953125, + "learning_rate": 9.216482412060302e-06, + "loss": 0.1205, + "step": 8300 + }, + { + "epoch": 5.59, + "grad_norm": 3.7282485961914062, + "learning_rate": 9.213969849246231e-06, + "loss": 0.1245, + "step": 8325 + }, + { + "epoch": 5.61, + "grad_norm": 3.0772526264190674, + "learning_rate": 9.211457286432162e-06, + "loss": 0.1268, + "step": 8350 + }, + { + "epoch": 5.62, + "grad_norm": 3.1029367446899414, + "learning_rate": 9.20894472361809e-06, + "loss": 0.1264, + "step": 8375 + }, + { + "epoch": 5.64, + "grad_norm": 3.3531413078308105, + "learning_rate": 9.206432160804021e-06, + "loss": 0.1245, + "step": 8400 + }, + { + "epoch": 5.66, + "grad_norm": 3.5135834217071533, + "learning_rate": 9.20391959798995e-06, + "loss": 0.1245, + "step": 8425 + }, + { + "epoch": 5.67, + "grad_norm": 3.2247912883758545, + "learning_rate": 9.20140703517588e-06, + "loss": 0.1211, + "step": 8450 + }, + { + "epoch": 5.69, + "grad_norm": 3.61098051071167, + "learning_rate": 9.19889447236181e-06, + "loss": 0.124, + "step": 8475 + }, + { + "epoch": 5.71, + "grad_norm": 3.8434205055236816, + "learning_rate": 9.19638190954774e-06, + "loss": 0.1233, + "step": 8500 + }, + { + "epoch": 5.73, + "grad_norm": 3.2903194427490234, + "learning_rate": 9.19386934673367e-06, + "loss": 0.1238, + "step": 8525 + }, + { + "epoch": 5.74, + "grad_norm": 3.582935333251953, + "learning_rate": 9.191356783919599e-06, + "loss": 0.1232, + "step": 8550 + }, + { + "epoch": 5.76, + "grad_norm": 3.25006365776062, + "learning_rate": 9.188844221105528e-06, + "loss": 0.1183, + "step": 8575 + }, + { + "epoch": 5.78, + "grad_norm": 3.75384259223938, + "learning_rate": 9.186331658291459e-06, + "loss": 0.1223, + "step": 8600 + }, + { + "epoch": 5.79, + "grad_norm": 3.0634305477142334, + "learning_rate": 9.183819095477388e-06, + "loss": 0.1221, + "step": 8625 + }, + { + "epoch": 5.81, + "grad_norm": 3.526851177215576, + "learning_rate": 9.181306532663317e-06, + "loss": 0.1237, + "step": 8650 + }, + { + "epoch": 5.83, + "grad_norm": 2.885937213897705, + "learning_rate": 9.178793969849247e-06, + "loss": 0.1244, + "step": 8675 + }, + { + "epoch": 5.84, + "grad_norm": 3.731842279434204, + "learning_rate": 9.176281407035176e-06, + "loss": 0.1247, + "step": 8700 + }, + { + "epoch": 5.86, + "grad_norm": 3.2645487785339355, + "learning_rate": 9.173768844221105e-06, + "loss": 0.1224, + "step": 8725 + }, + { + "epoch": 5.88, + "grad_norm": 2.777378797531128, + "learning_rate": 9.171256281407036e-06, + "loss": 0.1232, + "step": 8750 + }, + { + "epoch": 5.89, + "grad_norm": 3.580059051513672, + "learning_rate": 9.168743718592966e-06, + "loss": 0.1242, + "step": 8775 + }, + { + "epoch": 5.91, + "grad_norm": 3.807997465133667, + "learning_rate": 9.166231155778895e-06, + "loss": 0.1313, + "step": 8800 + }, + { + "epoch": 5.93, + "grad_norm": 3.5003485679626465, + "learning_rate": 9.163718592964826e-06, + "loss": 0.1234, + "step": 8825 + }, + { + "epoch": 5.94, + "grad_norm": 3.740471601486206, + "learning_rate": 9.161206030150754e-06, + "loss": 0.1209, + "step": 8850 + }, + { + "epoch": 5.96, + "grad_norm": 3.201195478439331, + "learning_rate": 9.158693467336685e-06, + "loss": 0.1249, + "step": 8875 + }, + { + "epoch": 5.98, + "grad_norm": 3.6571664810180664, + "learning_rate": 9.156180904522614e-06, + "loss": 0.1234, + "step": 8900 + }, + { + "epoch": 5.99, + "grad_norm": 3.4738378524780273, + "learning_rate": 9.153668341708543e-06, + "loss": 0.1199, + "step": 8925 + }, + { + "epoch": 6.01, + "grad_norm": 3.0215229988098145, + "learning_rate": 9.151155778894473e-06, + "loss": 0.1105, + "step": 8950 + }, + { + "epoch": 6.03, + "grad_norm": 3.199676513671875, + "learning_rate": 9.148643216080402e-06, + "loss": 0.1013, + "step": 8975 + }, + { + "epoch": 6.04, + "grad_norm": 3.3466367721557617, + "learning_rate": 9.146130653266331e-06, + "loss": 0.1067, + "step": 9000 + }, + { + "epoch": 6.04, + "eval_loss": 0.1519739329814911, + "eval_runtime": 528.6667, + "eval_samples_per_second": 2.837, + "eval_steps_per_second": 2.837, + "eval_wer": 23.21825926632335, + "step": 9000 + }, + { + "epoch": 6.06, + "grad_norm": 2.9277305603027344, + "learning_rate": 9.143618090452262e-06, + "loss": 0.1036, + "step": 9025 + }, + { + "epoch": 6.08, + "grad_norm": 3.5725760459899902, + "learning_rate": 9.141105527638192e-06, + "loss": 0.1033, + "step": 9050 + }, + { + "epoch": 6.09, + "grad_norm": 3.224262237548828, + "learning_rate": 9.138592964824121e-06, + "loss": 0.1079, + "step": 9075 + }, + { + "epoch": 6.11, + "grad_norm": 3.307270050048828, + "learning_rate": 9.136080402010052e-06, + "loss": 0.1088, + "step": 9100 + }, + { + "epoch": 6.13, + "grad_norm": 3.1983275413513184, + "learning_rate": 9.13356783919598e-06, + "loss": 0.1095, + "step": 9125 + }, + { + "epoch": 6.15, + "grad_norm": 3.2087817192077637, + "learning_rate": 9.13105527638191e-06, + "loss": 0.1066, + "step": 9150 + }, + { + "epoch": 6.16, + "grad_norm": 3.339865207672119, + "learning_rate": 9.12854271356784e-06, + "loss": 0.1077, + "step": 9175 + }, + { + "epoch": 6.18, + "grad_norm": 3.7997775077819824, + "learning_rate": 9.12603015075377e-06, + "loss": 0.1081, + "step": 9200 + }, + { + "epoch": 6.2, + "grad_norm": 3.1159470081329346, + "learning_rate": 9.1235175879397e-06, + "loss": 0.1076, + "step": 9225 + }, + { + "epoch": 6.21, + "grad_norm": 3.421818494796753, + "learning_rate": 9.121005025125628e-06, + "loss": 0.1058, + "step": 9250 + }, + { + "epoch": 6.23, + "grad_norm": 3.3204290866851807, + "learning_rate": 9.118492462311559e-06, + "loss": 0.1048, + "step": 9275 + }, + { + "epoch": 6.25, + "grad_norm": 3.4838173389434814, + "learning_rate": 9.115979899497488e-06, + "loss": 0.1038, + "step": 9300 + }, + { + "epoch": 6.26, + "grad_norm": 3.1742591857910156, + "learning_rate": 9.113467336683418e-06, + "loss": 0.105, + "step": 9325 + }, + { + "epoch": 6.28, + "grad_norm": 3.874668598175049, + "learning_rate": 9.110954773869347e-06, + "loss": 0.1062, + "step": 9350 + }, + { + "epoch": 6.3, + "grad_norm": 3.6045989990234375, + "learning_rate": 9.108442211055278e-06, + "loss": 0.1065, + "step": 9375 + }, + { + "epoch": 6.31, + "grad_norm": 3.52018404006958, + "learning_rate": 9.105929648241206e-06, + "loss": 0.1097, + "step": 9400 + }, + { + "epoch": 6.33, + "grad_norm": 3.378875494003296, + "learning_rate": 9.103417085427137e-06, + "loss": 0.1087, + "step": 9425 + }, + { + "epoch": 6.35, + "grad_norm": 3.0573809146881104, + "learning_rate": 9.100904522613066e-06, + "loss": 0.1048, + "step": 9450 + }, + { + "epoch": 6.36, + "grad_norm": 3.3048455715179443, + "learning_rate": 9.098391959798995e-06, + "loss": 0.109, + "step": 9475 + }, + { + "epoch": 6.38, + "grad_norm": 2.8129143714904785, + "learning_rate": 9.095879396984926e-06, + "loss": 0.107, + "step": 9500 + }, + { + "epoch": 6.4, + "grad_norm": 3.053544282913208, + "learning_rate": 9.093366834170854e-06, + "loss": 0.1093, + "step": 9525 + }, + { + "epoch": 6.41, + "grad_norm": 3.3816685676574707, + "learning_rate": 9.090854271356785e-06, + "loss": 0.1113, + "step": 9550 + }, + { + "epoch": 6.43, + "grad_norm": 3.471092462539673, + "learning_rate": 9.088341708542714e-06, + "loss": 0.1115, + "step": 9575 + }, + { + "epoch": 6.45, + "grad_norm": 3.223968029022217, + "learning_rate": 9.085829145728644e-06, + "loss": 0.1103, + "step": 9600 + }, + { + "epoch": 6.46, + "grad_norm": 3.509681463241577, + "learning_rate": 9.083316582914573e-06, + "loss": 0.1037, + "step": 9625 + }, + { + "epoch": 6.48, + "grad_norm": 3.1730353832244873, + "learning_rate": 9.080804020100504e-06, + "loss": 0.1101, + "step": 9650 + }, + { + "epoch": 6.5, + "grad_norm": 3.4042608737945557, + "learning_rate": 9.078291457286433e-06, + "loss": 0.1085, + "step": 9675 + }, + { + "epoch": 6.51, + "grad_norm": 3.1175334453582764, + "learning_rate": 9.075778894472363e-06, + "loss": 0.1077, + "step": 9700 + }, + { + "epoch": 6.53, + "grad_norm": 3.4862351417541504, + "learning_rate": 9.073266331658292e-06, + "loss": 0.1089, + "step": 9725 + }, + { + "epoch": 6.55, + "grad_norm": 3.259397506713867, + "learning_rate": 9.070753768844221e-06, + "loss": 0.1047, + "step": 9750 + }, + { + "epoch": 6.56, + "grad_norm": 3.607745409011841, + "learning_rate": 9.068241206030152e-06, + "loss": 0.1053, + "step": 9775 + }, + { + "epoch": 6.58, + "grad_norm": 3.3122737407684326, + "learning_rate": 9.06572864321608e-06, + "loss": 0.1044, + "step": 9800 + }, + { + "epoch": 6.6, + "grad_norm": 3.451970338821411, + "learning_rate": 9.063216080402011e-06, + "loss": 0.1089, + "step": 9825 + }, + { + "epoch": 6.62, + "grad_norm": 3.42439341545105, + "learning_rate": 9.06070351758794e-06, + "loss": 0.1059, + "step": 9850 + }, + { + "epoch": 6.63, + "grad_norm": 3.752906084060669, + "learning_rate": 9.05819095477387e-06, + "loss": 0.1047, + "step": 9875 + }, + { + "epoch": 6.65, + "grad_norm": 3.5151407718658447, + "learning_rate": 9.0556783919598e-06, + "loss": 0.1058, + "step": 9900 + }, + { + "epoch": 6.67, + "grad_norm": 3.6519527435302734, + "learning_rate": 9.05316582914573e-06, + "loss": 0.1107, + "step": 9925 + }, + { + "epoch": 6.68, + "grad_norm": 3.9024133682250977, + "learning_rate": 9.05065326633166e-06, + "loss": 0.1066, + "step": 9950 + }, + { + "epoch": 6.7, + "grad_norm": 3.3172409534454346, + "learning_rate": 9.048140703517589e-06, + "loss": 0.1067, + "step": 9975 + }, + { + "epoch": 6.72, + "grad_norm": 3.981006622314453, + "learning_rate": 9.045628140703518e-06, + "loss": 0.1081, + "step": 10000 + }, + { + "epoch": 6.72, + "eval_loss": 0.15203164517879486, + "eval_runtime": 523.9703, + "eval_samples_per_second": 2.863, + "eval_steps_per_second": 2.863, + "eval_wer": 23.103820967639393, + "step": 10000 + }, + { + "epoch": 6.73, + "grad_norm": 3.7116029262542725, + "learning_rate": 9.043115577889447e-06, + "loss": 0.1108, + "step": 10025 + }, + { + "epoch": 6.75, + "grad_norm": 3.094069004058838, + "learning_rate": 9.040603015075378e-06, + "loss": 0.1098, + "step": 10050 + }, + { + "epoch": 6.77, + "grad_norm": 3.040309429168701, + "learning_rate": 9.03819095477387e-06, + "loss": 0.1114, + "step": 10075 + }, + { + "epoch": 6.78, + "grad_norm": 3.3585753440856934, + "learning_rate": 9.0356783919598e-06, + "loss": 0.1077, + "step": 10100 + }, + { + "epoch": 6.8, + "grad_norm": 3.1752421855926514, + "learning_rate": 9.033165829145728e-06, + "loss": 0.1051, + "step": 10125 + }, + { + "epoch": 6.82, + "grad_norm": 3.7417702674865723, + "learning_rate": 9.03065326633166e-06, + "loss": 0.1077, + "step": 10150 + }, + { + "epoch": 6.83, + "grad_norm": 3.4831295013427734, + "learning_rate": 9.028140703517589e-06, + "loss": 0.1074, + "step": 10175 + }, + { + "epoch": 6.85, + "grad_norm": 3.990248680114746, + "learning_rate": 9.025628140703518e-06, + "loss": 0.1065, + "step": 10200 + }, + { + "epoch": 6.87, + "grad_norm": 3.233635902404785, + "learning_rate": 9.023115577889447e-06, + "loss": 0.1098, + "step": 10225 + }, + { + "epoch": 6.88, + "grad_norm": 3.163641929626465, + "learning_rate": 9.020603015075378e-06, + "loss": 0.1082, + "step": 10250 + }, + { + "epoch": 6.9, + "grad_norm": 3.6084415912628174, + "learning_rate": 9.018090452261308e-06, + "loss": 0.11, + "step": 10275 + }, + { + "epoch": 6.92, + "grad_norm": 3.1879305839538574, + "learning_rate": 9.015577889447237e-06, + "loss": 0.1056, + "step": 10300 + }, + { + "epoch": 6.93, + "grad_norm": 3.4012796878814697, + "learning_rate": 9.013065326633166e-06, + "loss": 0.1092, + "step": 10325 + }, + { + "epoch": 6.95, + "grad_norm": 3.3554317951202393, + "learning_rate": 9.010552763819096e-06, + "loss": 0.1071, + "step": 10350 + }, + { + "epoch": 6.97, + "grad_norm": 3.3967669010162354, + "learning_rate": 9.008040201005027e-06, + "loss": 0.1073, + "step": 10375 + }, + { + "epoch": 6.98, + "grad_norm": 3.470402240753174, + "learning_rate": 9.005527638190954e-06, + "loss": 0.1077, + "step": 10400 + }, + { + "epoch": 7.0, + "grad_norm": 3.253338575363159, + "learning_rate": 9.003015075376885e-06, + "loss": 0.1054, + "step": 10425 + }, + { + "epoch": 7.02, + "grad_norm": 2.9654347896575928, + "learning_rate": 9.000502512562815e-06, + "loss": 0.0899, + "step": 10450 + }, + { + "epoch": 7.03, + "grad_norm": 2.9675121307373047, + "learning_rate": 8.997989949748744e-06, + "loss": 0.0937, + "step": 10475 + }, + { + "epoch": 7.05, + "grad_norm": 3.095313787460327, + "learning_rate": 8.995477386934675e-06, + "loss": 0.0911, + "step": 10500 + }, + { + "epoch": 7.07, + "grad_norm": 2.7510364055633545, + "learning_rate": 8.992964824120604e-06, + "loss": 0.0928, + "step": 10525 + }, + { + "epoch": 7.09, + "grad_norm": 2.8978002071380615, + "learning_rate": 8.990452261306534e-06, + "loss": 0.0885, + "step": 10550 + }, + { + "epoch": 7.1, + "grad_norm": 2.862161159515381, + "learning_rate": 8.987939698492463e-06, + "loss": 0.0895, + "step": 10575 + }, + { + "epoch": 7.12, + "grad_norm": 3.322599411010742, + "learning_rate": 8.985427135678392e-06, + "loss": 0.093, + "step": 10600 + }, + { + "epoch": 7.14, + "grad_norm": 3.432081937789917, + "learning_rate": 8.982914572864322e-06, + "loss": 0.0877, + "step": 10625 + }, + { + "epoch": 7.15, + "grad_norm": 3.7976574897766113, + "learning_rate": 8.980402010050253e-06, + "loss": 0.0964, + "step": 10650 + }, + { + "epoch": 7.17, + "grad_norm": 3.3133833408355713, + "learning_rate": 8.977889447236182e-06, + "loss": 0.088, + "step": 10675 + }, + { + "epoch": 7.19, + "grad_norm": 3.2121050357818604, + "learning_rate": 8.975376884422111e-06, + "loss": 0.095, + "step": 10700 + }, + { + "epoch": 7.2, + "grad_norm": 3.1484248638153076, + "learning_rate": 8.97286432160804e-06, + "loss": 0.0938, + "step": 10725 + }, + { + "epoch": 7.22, + "grad_norm": 2.9462034702301025, + "learning_rate": 8.97035175879397e-06, + "loss": 0.0947, + "step": 10750 + }, + { + "epoch": 7.24, + "grad_norm": 4.007068157196045, + "learning_rate": 8.967839195979901e-06, + "loss": 0.0911, + "step": 10775 + }, + { + "epoch": 7.25, + "grad_norm": 3.250214099884033, + "learning_rate": 8.96532663316583e-06, + "loss": 0.0891, + "step": 10800 + }, + { + "epoch": 7.27, + "grad_norm": 3.2990474700927734, + "learning_rate": 8.96281407035176e-06, + "loss": 0.0921, + "step": 10825 + }, + { + "epoch": 7.29, + "grad_norm": 3.303574800491333, + "learning_rate": 8.960301507537689e-06, + "loss": 0.094, + "step": 10850 + }, + { + "epoch": 7.3, + "grad_norm": 3.0713677406311035, + "learning_rate": 8.957788944723618e-06, + "loss": 0.0943, + "step": 10875 + }, + { + "epoch": 7.32, + "grad_norm": 3.67795467376709, + "learning_rate": 8.95527638190955e-06, + "loss": 0.0935, + "step": 10900 + }, + { + "epoch": 7.34, + "grad_norm": 3.0342748165130615, + "learning_rate": 8.952763819095479e-06, + "loss": 0.0919, + "step": 10925 + }, + { + "epoch": 7.35, + "grad_norm": 3.713961362838745, + "learning_rate": 8.950251256281408e-06, + "loss": 0.0962, + "step": 10950 + }, + { + "epoch": 7.37, + "grad_norm": 3.1030521392822266, + "learning_rate": 8.947738693467337e-06, + "loss": 0.0943, + "step": 10975 + }, + { + "epoch": 7.39, + "grad_norm": 3.1254990100860596, + "learning_rate": 8.945226130653267e-06, + "loss": 0.0917, + "step": 11000 + }, + { + "epoch": 7.39, + "eval_loss": 0.15760564804077148, + "eval_runtime": 523.2931, + "eval_samples_per_second": 2.866, + "eval_steps_per_second": 2.866, + "eval_wer": 23.05931718481785, + "step": 11000 + }, + { + "epoch": 7.4, + "grad_norm": 3.142110824584961, + "learning_rate": 8.942713567839196e-06, + "loss": 0.0911, + "step": 11025 + }, + { + "epoch": 7.42, + "grad_norm": 3.2968199253082275, + "learning_rate": 8.940201005025127e-06, + "loss": 0.0909, + "step": 11050 + }, + { + "epoch": 7.44, + "grad_norm": 3.1385409832000732, + "learning_rate": 8.937688442211056e-06, + "loss": 0.0917, + "step": 11075 + }, + { + "epoch": 7.45, + "grad_norm": 3.684631109237671, + "learning_rate": 8.935175879396986e-06, + "loss": 0.0938, + "step": 11100 + }, + { + "epoch": 7.47, + "grad_norm": 3.794281244277954, + "learning_rate": 8.932663316582915e-06, + "loss": 0.0943, + "step": 11125 + }, + { + "epoch": 7.49, + "grad_norm": 3.4074881076812744, + "learning_rate": 8.930150753768844e-06, + "loss": 0.0912, + "step": 11150 + }, + { + "epoch": 7.51, + "grad_norm": 3.5372540950775146, + "learning_rate": 8.927638190954775e-06, + "loss": 0.0923, + "step": 11175 + }, + { + "epoch": 7.52, + "grad_norm": 3.0035581588745117, + "learning_rate": 8.925125628140705e-06, + "loss": 0.0925, + "step": 11200 + }, + { + "epoch": 7.54, + "grad_norm": 4.143095016479492, + "learning_rate": 8.922613065326634e-06, + "loss": 0.0986, + "step": 11225 + }, + { + "epoch": 7.56, + "grad_norm": 3.611417531967163, + "learning_rate": 8.920100502512563e-06, + "loss": 0.0954, + "step": 11250 + }, + { + "epoch": 7.57, + "grad_norm": 3.5623996257781982, + "learning_rate": 8.917587939698493e-06, + "loss": 0.0917, + "step": 11275 + }, + { + "epoch": 7.59, + "grad_norm": 3.2755441665649414, + "learning_rate": 8.915075376884424e-06, + "loss": 0.0925, + "step": 11300 + }, + { + "epoch": 7.61, + "grad_norm": 3.863847255706787, + "learning_rate": 8.912562814070353e-06, + "loss": 0.0921, + "step": 11325 + }, + { + "epoch": 7.62, + "grad_norm": 3.420957326889038, + "learning_rate": 8.910050251256282e-06, + "loss": 0.094, + "step": 11350 + }, + { + "epoch": 7.64, + "grad_norm": 3.3743951320648193, + "learning_rate": 8.907537688442212e-06, + "loss": 0.0959, + "step": 11375 + }, + { + "epoch": 7.66, + "grad_norm": 3.5184097290039062, + "learning_rate": 8.905025125628143e-06, + "loss": 0.0935, + "step": 11400 + }, + { + "epoch": 7.67, + "grad_norm": 3.334580659866333, + "learning_rate": 8.90251256281407e-06, + "loss": 0.0923, + "step": 11425 + }, + { + "epoch": 7.69, + "grad_norm": 3.1036903858184814, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0927, + "step": 11450 + }, + { + "epoch": 7.71, + "grad_norm": 3.1578311920166016, + "learning_rate": 8.89748743718593e-06, + "loss": 0.0923, + "step": 11475 + }, + { + "epoch": 7.72, + "grad_norm": 3.138322114944458, + "learning_rate": 8.89497487437186e-06, + "loss": 0.0937, + "step": 11500 + }, + { + "epoch": 7.74, + "grad_norm": 3.6078078746795654, + "learning_rate": 8.892462311557791e-06, + "loss": 0.0922, + "step": 11525 + }, + { + "epoch": 7.76, + "grad_norm": 3.355168342590332, + "learning_rate": 8.889949748743718e-06, + "loss": 0.0909, + "step": 11550 + }, + { + "epoch": 7.77, + "grad_norm": 3.383136749267578, + "learning_rate": 8.88743718592965e-06, + "loss": 0.0929, + "step": 11575 + }, + { + "epoch": 7.79, + "grad_norm": 3.6381771564483643, + "learning_rate": 8.884924623115579e-06, + "loss": 0.0939, + "step": 11600 + }, + { + "epoch": 7.81, + "grad_norm": 3.3504719734191895, + "learning_rate": 8.882412060301508e-06, + "loss": 0.0932, + "step": 11625 + }, + { + "epoch": 7.82, + "grad_norm": 3.722961664199829, + "learning_rate": 8.879899497487437e-06, + "loss": 0.0969, + "step": 11650 + }, + { + "epoch": 7.84, + "grad_norm": 3.478809356689453, + "learning_rate": 8.877386934673368e-06, + "loss": 0.0941, + "step": 11675 + }, + { + "epoch": 7.86, + "grad_norm": 3.445310592651367, + "learning_rate": 8.874874371859296e-06, + "loss": 0.093, + "step": 11700 + }, + { + "epoch": 7.87, + "grad_norm": 3.5152416229248047, + "learning_rate": 8.872361809045227e-06, + "loss": 0.0915, + "step": 11725 + }, + { + "epoch": 7.89, + "grad_norm": 3.7985150814056396, + "learning_rate": 8.869849246231156e-06, + "loss": 0.0956, + "step": 11750 + }, + { + "epoch": 7.91, + "grad_norm": 3.5400753021240234, + "learning_rate": 8.867336683417086e-06, + "loss": 0.0937, + "step": 11775 + }, + { + "epoch": 7.92, + "grad_norm": 3.507793426513672, + "learning_rate": 8.864824120603017e-06, + "loss": 0.0931, + "step": 11800 + }, + { + "epoch": 7.94, + "grad_norm": 3.755192279815674, + "learning_rate": 8.862311557788944e-06, + "loss": 0.0956, + "step": 11825 + }, + { + "epoch": 7.96, + "grad_norm": 3.2599802017211914, + "learning_rate": 8.859798994974875e-06, + "loss": 0.0939, + "step": 11850 + }, + { + "epoch": 7.98, + "grad_norm": 3.3789443969726562, + "learning_rate": 8.857286432160805e-06, + "loss": 0.0899, + "step": 11875 + }, + { + "epoch": 7.99, + "grad_norm": 4.048016548156738, + "learning_rate": 8.854773869346734e-06, + "loss": 0.0967, + "step": 11900 + }, + { + "epoch": 8.01, + "grad_norm": 3.053467273712158, + "learning_rate": 8.852261306532665e-06, + "loss": 0.0858, + "step": 11925 + }, + { + "epoch": 8.03, + "grad_norm": 3.262021064758301, + "learning_rate": 8.849748743718594e-06, + "loss": 0.0779, + "step": 11950 + }, + { + "epoch": 8.04, + "grad_norm": 3.319021224975586, + "learning_rate": 8.847236180904524e-06, + "loss": 0.0801, + "step": 11975 + }, + { + "epoch": 8.06, + "grad_norm": 2.8061749935150146, + "learning_rate": 8.844723618090453e-06, + "loss": 0.0743, + "step": 12000 + }, + { + "epoch": 8.06, + "eval_loss": 0.1595088541507721, + "eval_runtime": 530.3717, + "eval_samples_per_second": 2.828, + "eval_steps_per_second": 2.828, + "eval_wer": 22.849513637230594, + "step": 12000 + }, + { + "epoch": 8.08, + "grad_norm": 3.4051458835601807, + "learning_rate": 8.842211055276382e-06, + "loss": 0.0748, + "step": 12025 + }, + { + "epoch": 8.09, + "grad_norm": 3.370293378829956, + "learning_rate": 8.839698492462312e-06, + "loss": 0.0815, + "step": 12050 + }, + { + "epoch": 8.11, + "grad_norm": 3.276702880859375, + "learning_rate": 8.837185929648243e-06, + "loss": 0.0764, + "step": 12075 + }, + { + "epoch": 8.13, + "grad_norm": 3.5147619247436523, + "learning_rate": 8.83467336683417e-06, + "loss": 0.0825, + "step": 12100 + }, + { + "epoch": 8.14, + "grad_norm": 3.2118067741394043, + "learning_rate": 8.832160804020101e-06, + "loss": 0.0807, + "step": 12125 + }, + { + "epoch": 8.16, + "grad_norm": 3.101900339126587, + "learning_rate": 8.82964824120603e-06, + "loss": 0.0777, + "step": 12150 + }, + { + "epoch": 8.18, + "grad_norm": 3.2511894702911377, + "learning_rate": 8.827236180904524e-06, + "loss": 0.0792, + "step": 12175 + }, + { + "epoch": 8.19, + "grad_norm": 3.685574531555176, + "learning_rate": 8.824723618090453e-06, + "loss": 0.0801, + "step": 12200 + }, + { + "epoch": 8.21, + "grad_norm": 3.1535677909851074, + "learning_rate": 8.822211055276383e-06, + "loss": 0.081, + "step": 12225 + }, + { + "epoch": 8.23, + "grad_norm": 3.368541717529297, + "learning_rate": 8.819698492462312e-06, + "loss": 0.0787, + "step": 12250 + }, + { + "epoch": 8.24, + "grad_norm": 3.5627801418304443, + "learning_rate": 8.817185929648241e-06, + "loss": 0.0802, + "step": 12275 + }, + { + "epoch": 8.26, + "grad_norm": 3.359714984893799, + "learning_rate": 8.81467336683417e-06, + "loss": 0.0806, + "step": 12300 + }, + { + "epoch": 8.28, + "grad_norm": 3.026019334793091, + "learning_rate": 8.812160804020102e-06, + "loss": 0.081, + "step": 12325 + }, + { + "epoch": 8.29, + "grad_norm": 3.3980915546417236, + "learning_rate": 8.809648241206031e-06, + "loss": 0.08, + "step": 12350 + }, + { + "epoch": 8.31, + "grad_norm": 4.118892669677734, + "learning_rate": 8.80713567839196e-06, + "loss": 0.0819, + "step": 12375 + }, + { + "epoch": 8.33, + "grad_norm": 3.55472993850708, + "learning_rate": 8.804623115577891e-06, + "loss": 0.0794, + "step": 12400 + }, + { + "epoch": 8.34, + "grad_norm": 2.832378625869751, + "learning_rate": 8.802110552763819e-06, + "loss": 0.0795, + "step": 12425 + }, + { + "epoch": 8.36, + "grad_norm": 3.0574495792388916, + "learning_rate": 8.79959798994975e-06, + "loss": 0.0778, + "step": 12450 + }, + { + "epoch": 8.38, + "grad_norm": 3.1083178520202637, + "learning_rate": 8.79708542713568e-06, + "loss": 0.0828, + "step": 12475 + }, + { + "epoch": 8.39, + "grad_norm": 3.164285659790039, + "learning_rate": 8.794572864321609e-06, + "loss": 0.0759, + "step": 12500 + }, + { + "epoch": 8.41, + "grad_norm": 3.803083658218384, + "learning_rate": 8.792060301507538e-06, + "loss": 0.0822, + "step": 12525 + }, + { + "epoch": 8.43, + "grad_norm": 3.0981814861297607, + "learning_rate": 8.789547738693467e-06, + "loss": 0.0804, + "step": 12550 + }, + { + "epoch": 8.45, + "grad_norm": 3.3646838665008545, + "learning_rate": 8.787035175879398e-06, + "loss": 0.0822, + "step": 12575 + }, + { + "epoch": 8.46, + "grad_norm": 3.3468716144561768, + "learning_rate": 8.784522613065328e-06, + "loss": 0.0813, + "step": 12600 + }, + { + "epoch": 8.48, + "grad_norm": 3.187147617340088, + "learning_rate": 8.782010050251257e-06, + "loss": 0.0799, + "step": 12625 + }, + { + "epoch": 8.5, + "grad_norm": 3.593662977218628, + "learning_rate": 8.779497487437186e-06, + "loss": 0.0791, + "step": 12650 + }, + { + "epoch": 8.51, + "grad_norm": 2.93940806388855, + "learning_rate": 8.776984924623117e-06, + "loss": 0.081, + "step": 12675 + }, + { + "epoch": 8.53, + "grad_norm": 3.682798147201538, + "learning_rate": 8.774472361809045e-06, + "loss": 0.0801, + "step": 12700 + }, + { + "epoch": 8.55, + "grad_norm": 3.581777572631836, + "learning_rate": 8.771959798994976e-06, + "loss": 0.0775, + "step": 12725 + }, + { + "epoch": 8.56, + "grad_norm": 3.2758090496063232, + "learning_rate": 8.769447236180905e-06, + "loss": 0.0805, + "step": 12750 + }, + { + "epoch": 8.58, + "grad_norm": 3.4519293308258057, + "learning_rate": 8.766934673366834e-06, + "loss": 0.081, + "step": 12775 + }, + { + "epoch": 8.6, + "grad_norm": 3.2009847164154053, + "learning_rate": 8.764422110552765e-06, + "loss": 0.0807, + "step": 12800 + }, + { + "epoch": 8.61, + "grad_norm": 3.625211000442505, + "learning_rate": 8.761909547738693e-06, + "loss": 0.0804, + "step": 12825 + }, + { + "epoch": 8.63, + "grad_norm": 3.505444288253784, + "learning_rate": 8.759396984924624e-06, + "loss": 0.0782, + "step": 12850 + }, + { + "epoch": 8.65, + "grad_norm": 3.376492977142334, + "learning_rate": 8.756884422110553e-06, + "loss": 0.0786, + "step": 12875 + }, + { + "epoch": 8.66, + "grad_norm": 3.2919809818267822, + "learning_rate": 8.754371859296483e-06, + "loss": 0.0798, + "step": 12900 + }, + { + "epoch": 8.68, + "grad_norm": 3.340373992919922, + "learning_rate": 8.751859296482412e-06, + "loss": 0.0774, + "step": 12925 + }, + { + "epoch": 8.7, + "grad_norm": 3.158954381942749, + "learning_rate": 8.749346733668343e-06, + "loss": 0.0815, + "step": 12950 + }, + { + "epoch": 8.71, + "grad_norm": 3.667062759399414, + "learning_rate": 8.746834170854272e-06, + "loss": 0.0814, + "step": 12975 + }, + { + "epoch": 8.73, + "grad_norm": 3.5727102756500244, + "learning_rate": 8.744321608040202e-06, + "loss": 0.0808, + "step": 13000 + }, + { + "epoch": 8.73, + "eval_loss": 0.16444097459316254, + "eval_runtime": 533.9596, + "eval_samples_per_second": 2.809, + "eval_steps_per_second": 2.809, + "eval_wer": 23.86038527560557, + "step": 13000 + }, + { + "epoch": 8.75, + "grad_norm": 3.3775618076324463, + "learning_rate": 8.741809045226131e-06, + "loss": 0.079, + "step": 13025 + }, + { + "epoch": 8.76, + "grad_norm": 3.2551448345184326, + "learning_rate": 8.73929648241206e-06, + "loss": 0.0829, + "step": 13050 + }, + { + "epoch": 8.78, + "grad_norm": 3.457756757736206, + "learning_rate": 8.736783919597991e-06, + "loss": 0.0811, + "step": 13075 + }, + { + "epoch": 8.8, + "grad_norm": 3.2216482162475586, + "learning_rate": 8.734271356783919e-06, + "loss": 0.0772, + "step": 13100 + }, + { + "epoch": 8.81, + "grad_norm": 3.4713871479034424, + "learning_rate": 8.73175879396985e-06, + "loss": 0.079, + "step": 13125 + }, + { + "epoch": 8.83, + "grad_norm": 3.578930377960205, + "learning_rate": 8.72924623115578e-06, + "loss": 0.0796, + "step": 13150 + }, + { + "epoch": 8.85, + "grad_norm": 4.023955345153809, + "learning_rate": 8.726733668341709e-06, + "loss": 0.0837, + "step": 13175 + }, + { + "epoch": 8.87, + "grad_norm": 3.7292842864990234, + "learning_rate": 8.72422110552764e-06, + "loss": 0.081, + "step": 13200 + }, + { + "epoch": 8.88, + "grad_norm": 3.5397958755493164, + "learning_rate": 8.721708542713569e-06, + "loss": 0.0792, + "step": 13225 + }, + { + "epoch": 8.9, + "grad_norm": 3.324944257736206, + "learning_rate": 8.719195979899498e-06, + "loss": 0.083, + "step": 13250 + }, + { + "epoch": 8.92, + "grad_norm": 3.645512104034424, + "learning_rate": 8.716683417085428e-06, + "loss": 0.0827, + "step": 13275 + }, + { + "epoch": 8.93, + "grad_norm": 3.737316370010376, + "learning_rate": 8.714170854271357e-06, + "loss": 0.0803, + "step": 13300 + }, + { + "epoch": 8.95, + "grad_norm": 3.5094964504241943, + "learning_rate": 8.711658291457286e-06, + "loss": 0.0809, + "step": 13325 + }, + { + "epoch": 8.97, + "grad_norm": 3.4542152881622314, + "learning_rate": 8.709145728643217e-06, + "loss": 0.0792, + "step": 13350 + }, + { + "epoch": 8.98, + "grad_norm": 3.293144464492798, + "learning_rate": 8.706633165829147e-06, + "loss": 0.0834, + "step": 13375 + }, + { + "epoch": 9.0, + "grad_norm": 3.5231029987335205, + "learning_rate": 8.704120603015076e-06, + "loss": 0.0786, + "step": 13400 + }, + { + "epoch": 9.02, + "grad_norm": 2.9241116046905518, + "learning_rate": 8.701608040201005e-06, + "loss": 0.0644, + "step": 13425 + }, + { + "epoch": 9.03, + "grad_norm": 3.070408821105957, + "learning_rate": 8.699095477386935e-06, + "loss": 0.0681, + "step": 13450 + }, + { + "epoch": 9.05, + "grad_norm": 3.106449842453003, + "learning_rate": 8.696582914572866e-06, + "loss": 0.0685, + "step": 13475 + }, + { + "epoch": 9.07, + "grad_norm": 2.9623613357543945, + "learning_rate": 8.694070351758795e-06, + "loss": 0.0673, + "step": 13500 + }, + { + "epoch": 9.08, + "grad_norm": 2.9884512424468994, + "learning_rate": 8.691557788944724e-06, + "loss": 0.0698, + "step": 13525 + }, + { + "epoch": 9.1, + "grad_norm": 3.1581308841705322, + "learning_rate": 8.689045226130654e-06, + "loss": 0.0671, + "step": 13550 + }, + { + "epoch": 9.12, + "grad_norm": 3.728541612625122, + "learning_rate": 8.686532663316583e-06, + "loss": 0.0658, + "step": 13575 + }, + { + "epoch": 9.13, + "grad_norm": 3.1801505088806152, + "learning_rate": 8.684020100502514e-06, + "loss": 0.0665, + "step": 13600 + }, + { + "epoch": 9.15, + "grad_norm": 3.133427381515503, + "learning_rate": 8.681507537688443e-06, + "loss": 0.0661, + "step": 13625 + }, + { + "epoch": 9.17, + "grad_norm": 3.23095703125, + "learning_rate": 8.678994974874373e-06, + "loss": 0.067, + "step": 13650 + }, + { + "epoch": 9.18, + "grad_norm": 3.293841600418091, + "learning_rate": 8.676482412060302e-06, + "loss": 0.0669, + "step": 13675 + }, + { + "epoch": 9.2, + "grad_norm": 3.128143787384033, + "learning_rate": 8.673969849246231e-06, + "loss": 0.0665, + "step": 13700 + }, + { + "epoch": 9.22, + "grad_norm": 2.9551079273223877, + "learning_rate": 8.67145728643216e-06, + "loss": 0.067, + "step": 13725 + }, + { + "epoch": 9.23, + "grad_norm": 3.058426856994629, + "learning_rate": 8.668944723618092e-06, + "loss": 0.0697, + "step": 13750 + }, + { + "epoch": 9.25, + "grad_norm": 3.19051194190979, + "learning_rate": 8.666432160804021e-06, + "loss": 0.0669, + "step": 13775 + }, + { + "epoch": 9.27, + "grad_norm": 3.351942300796509, + "learning_rate": 8.66391959798995e-06, + "loss": 0.065, + "step": 13800 + }, + { + "epoch": 9.28, + "grad_norm": 3.4563798904418945, + "learning_rate": 8.661407035175881e-06, + "loss": 0.0686, + "step": 13825 + }, + { + "epoch": 9.3, + "grad_norm": 3.5615477561950684, + "learning_rate": 8.658894472361809e-06, + "loss": 0.0669, + "step": 13850 + }, + { + "epoch": 9.32, + "grad_norm": 2.921133041381836, + "learning_rate": 8.65638190954774e-06, + "loss": 0.067, + "step": 13875 + }, + { + "epoch": 9.34, + "grad_norm": 3.049394130706787, + "learning_rate": 8.65386934673367e-06, + "loss": 0.0668, + "step": 13900 + }, + { + "epoch": 9.35, + "grad_norm": 3.2662315368652344, + "learning_rate": 8.651356783919599e-06, + "loss": 0.0669, + "step": 13925 + }, + { + "epoch": 9.37, + "grad_norm": 3.5041072368621826, + "learning_rate": 8.648844221105528e-06, + "loss": 0.0693, + "step": 13950 + }, + { + "epoch": 9.39, + "grad_norm": 3.1484646797180176, + "learning_rate": 8.646331658291457e-06, + "loss": 0.0709, + "step": 13975 + }, + { + "epoch": 9.4, + "grad_norm": 3.3017284870147705, + "learning_rate": 8.643819095477388e-06, + "loss": 0.0669, + "step": 14000 + }, + { + "epoch": 9.4, + "eval_loss": 0.1703759729862213, + "eval_runtime": 524.9084, + "eval_samples_per_second": 2.858, + "eval_steps_per_second": 2.858, + "eval_wer": 23.59972026193655, + "step": 14000 + }, + { + "epoch": 9.42, + "grad_norm": 3.1865592002868652, + "learning_rate": 8.641306532663318e-06, + "loss": 0.0712, + "step": 14025 + }, + { + "epoch": 9.44, + "grad_norm": 3.0822553634643555, + "learning_rate": 8.638793969849247e-06, + "loss": 0.0645, + "step": 14050 + }, + { + "epoch": 9.45, + "grad_norm": 3.784472942352295, + "learning_rate": 8.636281407035176e-06, + "loss": 0.0681, + "step": 14075 + }, + { + "epoch": 9.47, + "grad_norm": 3.4210364818573, + "learning_rate": 8.633768844221107e-06, + "loss": 0.0675, + "step": 14100 + }, + { + "epoch": 9.49, + "grad_norm": 3.6208882331848145, + "learning_rate": 8.631256281407035e-06, + "loss": 0.0722, + "step": 14125 + }, + { + "epoch": 9.5, + "grad_norm": 3.499337911605835, + "learning_rate": 8.628743718592966e-06, + "loss": 0.0674, + "step": 14150 + }, + { + "epoch": 9.52, + "grad_norm": 3.3727025985717773, + "learning_rate": 8.626231155778895e-06, + "loss": 0.0707, + "step": 14175 + }, + { + "epoch": 9.54, + "grad_norm": 3.3646292686462402, + "learning_rate": 8.623718592964825e-06, + "loss": 0.0664, + "step": 14200 + }, + { + "epoch": 9.55, + "grad_norm": 2.975327968597412, + "learning_rate": 8.621206030150756e-06, + "loss": 0.069, + "step": 14225 + }, + { + "epoch": 9.57, + "grad_norm": 3.130474090576172, + "learning_rate": 8.618693467336683e-06, + "loss": 0.0671, + "step": 14250 + }, + { + "epoch": 9.59, + "grad_norm": 3.511791706085205, + "learning_rate": 8.616180904522614e-06, + "loss": 0.0695, + "step": 14275 + }, + { + "epoch": 9.6, + "grad_norm": 2.9313547611236572, + "learning_rate": 8.613668341708544e-06, + "loss": 0.0681, + "step": 14300 + }, + { + "epoch": 9.62, + "grad_norm": 2.9438576698303223, + "learning_rate": 8.611155778894473e-06, + "loss": 0.0692, + "step": 14325 + }, + { + "epoch": 9.64, + "grad_norm": 3.139831781387329, + "learning_rate": 8.608643216080402e-06, + "loss": 0.0696, + "step": 14350 + }, + { + "epoch": 9.65, + "grad_norm": 3.385382652282715, + "learning_rate": 8.606231155778895e-06, + "loss": 0.0686, + "step": 14375 + }, + { + "epoch": 9.67, + "grad_norm": 3.4755852222442627, + "learning_rate": 8.603718592964825e-06, + "loss": 0.0661, + "step": 14400 + }, + { + "epoch": 9.69, + "grad_norm": 3.1254165172576904, + "learning_rate": 8.601206030150756e-06, + "loss": 0.0701, + "step": 14425 + }, + { + "epoch": 9.7, + "grad_norm": 2.7716310024261475, + "learning_rate": 8.598693467336683e-06, + "loss": 0.0692, + "step": 14450 + }, + { + "epoch": 9.72, + "grad_norm": 3.621020793914795, + "learning_rate": 8.596180904522614e-06, + "loss": 0.067, + "step": 14475 + }, + { + "epoch": 9.74, + "grad_norm": 3.138324499130249, + "learning_rate": 8.593668341708544e-06, + "loss": 0.0686, + "step": 14500 + }, + { + "epoch": 9.75, + "grad_norm": 3.230107307434082, + "learning_rate": 8.591155778894473e-06, + "loss": 0.0648, + "step": 14525 + }, + { + "epoch": 9.77, + "grad_norm": 3.515187978744507, + "learning_rate": 8.588643216080402e-06, + "loss": 0.0671, + "step": 14550 + }, + { + "epoch": 9.79, + "grad_norm": 3.1676883697509766, + "learning_rate": 8.586130653266332e-06, + "loss": 0.0714, + "step": 14575 + }, + { + "epoch": 9.81, + "grad_norm": 3.538196563720703, + "learning_rate": 8.583618090452261e-06, + "loss": 0.0704, + "step": 14600 + }, + { + "epoch": 9.82, + "grad_norm": 3.0159358978271484, + "learning_rate": 8.581105527638192e-06, + "loss": 0.0686, + "step": 14625 + }, + { + "epoch": 9.84, + "grad_norm": 3.2625274658203125, + "learning_rate": 8.578592964824121e-06, + "loss": 0.0706, + "step": 14650 + }, + { + "epoch": 9.86, + "grad_norm": 3.3771753311157227, + "learning_rate": 8.57608040201005e-06, + "loss": 0.0715, + "step": 14675 + }, + { + "epoch": 9.87, + "grad_norm": 3.0817110538482666, + "learning_rate": 8.573567839195982e-06, + "loss": 0.0695, + "step": 14700 + }, + { + "epoch": 9.89, + "grad_norm": 3.064793586730957, + "learning_rate": 8.57105527638191e-06, + "loss": 0.0718, + "step": 14725 + }, + { + "epoch": 9.91, + "grad_norm": 3.3682265281677246, + "learning_rate": 8.56854271356784e-06, + "loss": 0.0678, + "step": 14750 + }, + { + "epoch": 9.92, + "grad_norm": 3.33795166015625, + "learning_rate": 8.56603015075377e-06, + "loss": 0.0711, + "step": 14775 + }, + { + "epoch": 9.94, + "grad_norm": 3.2200284004211426, + "learning_rate": 8.563517587939699e-06, + "loss": 0.0686, + "step": 14800 + }, + { + "epoch": 9.96, + "grad_norm": 3.157776355743408, + "learning_rate": 8.56100502512563e-06, + "loss": 0.0713, + "step": 14825 + }, + { + "epoch": 9.97, + "grad_norm": 3.2998759746551514, + "learning_rate": 8.558492462311558e-06, + "loss": 0.0686, + "step": 14850 + }, + { + "epoch": 9.99, + "grad_norm": 3.296419620513916, + "learning_rate": 8.555979899497489e-06, + "loss": 0.0711, + "step": 14875 + }, + { + "epoch": 10.01, + "grad_norm": 3.7058863639831543, + "learning_rate": 8.553467336683418e-06, + "loss": 0.0661, + "step": 14900 + }, + { + "epoch": 10.02, + "grad_norm": 2.8335165977478027, + "learning_rate": 8.550954773869347e-06, + "loss": 0.057, + "step": 14925 + }, + { + "epoch": 10.04, + "grad_norm": 3.0153000354766846, + "learning_rate": 8.548442211055277e-06, + "loss": 0.0554, + "step": 14950 + }, + { + "epoch": 10.06, + "grad_norm": 3.5260610580444336, + "learning_rate": 8.545929648241208e-06, + "loss": 0.0549, + "step": 14975 + }, + { + "epoch": 10.07, + "grad_norm": 2.98427152633667, + "learning_rate": 8.543417085427135e-06, + "loss": 0.0541, + "step": 15000 + }, + { + "epoch": 10.07, + "eval_loss": 0.1763867735862732, + "eval_runtime": 523.8516, + "eval_samples_per_second": 2.863, + "eval_steps_per_second": 2.863, + "eval_wer": 23.83495454256469, + "step": 15000 + }, + { + "epoch": 10.09, + "grad_norm": 2.8480138778686523, + "learning_rate": 8.540904522613066e-06, + "loss": 0.0555, + "step": 15025 + }, + { + "epoch": 10.11, + "grad_norm": 3.086395740509033, + "learning_rate": 8.538391959798996e-06, + "loss": 0.0566, + "step": 15050 + }, + { + "epoch": 10.12, + "grad_norm": 3.1931493282318115, + "learning_rate": 8.535879396984925e-06, + "loss": 0.0545, + "step": 15075 + }, + { + "epoch": 10.14, + "grad_norm": 2.55671763420105, + "learning_rate": 8.533366834170856e-06, + "loss": 0.0558, + "step": 15100 + }, + { + "epoch": 10.16, + "grad_norm": 2.841392755508423, + "learning_rate": 8.530854271356784e-06, + "loss": 0.0548, + "step": 15125 + }, + { + "epoch": 10.17, + "grad_norm": 2.8783414363861084, + "learning_rate": 8.528341708542715e-06, + "loss": 0.0551, + "step": 15150 + }, + { + "epoch": 10.19, + "grad_norm": 2.816607713699341, + "learning_rate": 8.525829145728644e-06, + "loss": 0.0549, + "step": 15175 + }, + { + "epoch": 10.21, + "grad_norm": 2.8094279766082764, + "learning_rate": 8.523316582914573e-06, + "loss": 0.0582, + "step": 15200 + }, + { + "epoch": 10.22, + "grad_norm": 2.9535470008850098, + "learning_rate": 8.520804020100503e-06, + "loss": 0.0554, + "step": 15225 + }, + { + "epoch": 10.24, + "grad_norm": 2.994957447052002, + "learning_rate": 8.518291457286434e-06, + "loss": 0.0589, + "step": 15250 + }, + { + "epoch": 10.26, + "grad_norm": 3.430732488632202, + "learning_rate": 8.515778894472363e-06, + "loss": 0.0563, + "step": 15275 + }, + { + "epoch": 10.28, + "grad_norm": 3.2503697872161865, + "learning_rate": 8.513266331658292e-06, + "loss": 0.0571, + "step": 15300 + }, + { + "epoch": 10.29, + "grad_norm": 3.037585973739624, + "learning_rate": 8.510753768844222e-06, + "loss": 0.0553, + "step": 15325 + }, + { + "epoch": 10.31, + "grad_norm": 3.266929864883423, + "learning_rate": 8.508241206030151e-06, + "loss": 0.0572, + "step": 15350 + }, + { + "epoch": 10.33, + "grad_norm": 3.382246494293213, + "learning_rate": 8.505728643216082e-06, + "loss": 0.0564, + "step": 15375 + }, + { + "epoch": 10.34, + "grad_norm": 2.7206432819366455, + "learning_rate": 8.50321608040201e-06, + "loss": 0.0561, + "step": 15400 + }, + { + "epoch": 10.36, + "grad_norm": 2.924337148666382, + "learning_rate": 8.50070351758794e-06, + "loss": 0.0577, + "step": 15425 + }, + { + "epoch": 10.38, + "grad_norm": 3.172940492630005, + "learning_rate": 8.49819095477387e-06, + "loss": 0.0569, + "step": 15450 + }, + { + "epoch": 10.39, + "grad_norm": 3.134603261947632, + "learning_rate": 8.4956783919598e-06, + "loss": 0.0575, + "step": 15475 + }, + { + "epoch": 10.41, + "grad_norm": 3.1646554470062256, + "learning_rate": 8.49316582914573e-06, + "loss": 0.0576, + "step": 15500 + }, + { + "epoch": 10.43, + "grad_norm": 3.6714649200439453, + "learning_rate": 8.49065326633166e-06, + "loss": 0.0577, + "step": 15525 + }, + { + "epoch": 10.44, + "grad_norm": 3.527547836303711, + "learning_rate": 8.488140703517589e-06, + "loss": 0.0588, + "step": 15550 + }, + { + "epoch": 10.46, + "grad_norm": 3.169933795928955, + "learning_rate": 8.485628140703518e-06, + "loss": 0.0591, + "step": 15575 + }, + { + "epoch": 10.48, + "grad_norm": 3.4818196296691895, + "learning_rate": 8.483115577889447e-06, + "loss": 0.0585, + "step": 15600 + }, + { + "epoch": 10.49, + "grad_norm": 3.1078174114227295, + "learning_rate": 8.480603015075377e-06, + "loss": 0.0581, + "step": 15625 + }, + { + "epoch": 10.51, + "grad_norm": 3.1560535430908203, + "learning_rate": 8.478090452261308e-06, + "loss": 0.0591, + "step": 15650 + }, + { + "epoch": 10.53, + "grad_norm": 3.250458240509033, + "learning_rate": 8.475577889447237e-06, + "loss": 0.0562, + "step": 15675 + }, + { + "epoch": 10.54, + "grad_norm": 3.549868583679199, + "learning_rate": 8.473065326633166e-06, + "loss": 0.0604, + "step": 15700 + }, + { + "epoch": 10.56, + "grad_norm": 2.7020392417907715, + "learning_rate": 8.470552763819096e-06, + "loss": 0.0587, + "step": 15725 + }, + { + "epoch": 10.58, + "grad_norm": 3.6079905033111572, + "learning_rate": 8.468040201005025e-06, + "loss": 0.0559, + "step": 15750 + }, + { + "epoch": 10.59, + "grad_norm": 4.109276294708252, + "learning_rate": 8.465527638190956e-06, + "loss": 0.058, + "step": 15775 + }, + { + "epoch": 10.61, + "grad_norm": 3.356374502182007, + "learning_rate": 8.463015075376885e-06, + "loss": 0.059, + "step": 15800 + }, + { + "epoch": 10.63, + "grad_norm": 3.9105756282806396, + "learning_rate": 8.460502512562815e-06, + "loss": 0.059, + "step": 15825 + }, + { + "epoch": 10.64, + "grad_norm": 3.5960891246795654, + "learning_rate": 8.457989949748744e-06, + "loss": 0.0604, + "step": 15850 + }, + { + "epoch": 10.66, + "grad_norm": 2.927706003189087, + "learning_rate": 8.455477386934673e-06, + "loss": 0.0607, + "step": 15875 + }, + { + "epoch": 10.68, + "grad_norm": 3.4405734539031982, + "learning_rate": 8.452964824120604e-06, + "loss": 0.0585, + "step": 15900 + }, + { + "epoch": 10.7, + "grad_norm": 3.0499801635742188, + "learning_rate": 8.450452261306534e-06, + "loss": 0.0586, + "step": 15925 + }, + { + "epoch": 10.71, + "grad_norm": 3.5506386756896973, + "learning_rate": 8.447939698492463e-06, + "loss": 0.0574, + "step": 15950 + }, + { + "epoch": 10.73, + "grad_norm": 3.07831072807312, + "learning_rate": 8.445427135678392e-06, + "loss": 0.0621, + "step": 15975 + }, + { + "epoch": 10.75, + "grad_norm": 3.5043013095855713, + "learning_rate": 8.442914572864322e-06, + "loss": 0.0598, + "step": 16000 + }, + { + "epoch": 10.75, + "eval_loss": 0.17944234609603882, + "eval_runtime": 527.8067, + "eval_samples_per_second": 2.842, + "eval_steps_per_second": 2.842, + "eval_wer": 24.299065420560748, + "step": 16000 + }, + { + "epoch": 10.76, + "grad_norm": 3.2237677574157715, + "learning_rate": 8.440402010050251e-06, + "loss": 0.0586, + "step": 16025 + }, + { + "epoch": 10.78, + "grad_norm": 4.6375579833984375, + "learning_rate": 8.437889447236182e-06, + "loss": 0.0598, + "step": 16050 + }, + { + "epoch": 10.8, + "grad_norm": 3.4795613288879395, + "learning_rate": 8.435376884422111e-06, + "loss": 0.0586, + "step": 16075 + }, + { + "epoch": 10.81, + "grad_norm": 3.4080770015716553, + "learning_rate": 8.43286432160804e-06, + "loss": 0.0567, + "step": 16100 + }, + { + "epoch": 10.83, + "grad_norm": 3.0154454708099365, + "learning_rate": 8.430351758793972e-06, + "loss": 0.0586, + "step": 16125 + }, + { + "epoch": 10.85, + "grad_norm": 3.2184910774230957, + "learning_rate": 8.4278391959799e-06, + "loss": 0.0623, + "step": 16150 + }, + { + "epoch": 10.86, + "grad_norm": 3.691345453262329, + "learning_rate": 8.42532663316583e-06, + "loss": 0.0617, + "step": 16175 + }, + { + "epoch": 10.88, + "grad_norm": 3.59759783744812, + "learning_rate": 8.42281407035176e-06, + "loss": 0.058, + "step": 16200 + }, + { + "epoch": 10.9, + "grad_norm": 3.6665070056915283, + "learning_rate": 8.420301507537689e-06, + "loss": 0.058, + "step": 16225 + }, + { + "epoch": 10.91, + "grad_norm": 3.3831355571746826, + "learning_rate": 8.417788944723618e-06, + "loss": 0.059, + "step": 16250 + }, + { + "epoch": 10.93, + "grad_norm": 3.108693838119507, + "learning_rate": 8.415276381909548e-06, + "loss": 0.061, + "step": 16275 + }, + { + "epoch": 10.95, + "grad_norm": 2.955349922180176, + "learning_rate": 8.412763819095479e-06, + "loss": 0.0562, + "step": 16300 + }, + { + "epoch": 10.96, + "grad_norm": 3.5719997882843018, + "learning_rate": 8.410251256281408e-06, + "loss": 0.0616, + "step": 16325 + }, + { + "epoch": 10.98, + "grad_norm": 3.5424628257751465, + "learning_rate": 8.407738693467337e-06, + "loss": 0.0597, + "step": 16350 + }, + { + "epoch": 11.0, + "grad_norm": 3.626642942428589, + "learning_rate": 8.405226130653267e-06, + "loss": 0.0576, + "step": 16375 + }, + { + "epoch": 11.01, + "grad_norm": 2.8286736011505127, + "learning_rate": 8.402713567839198e-06, + "loss": 0.0478, + "step": 16400 + }, + { + "epoch": 11.03, + "grad_norm": 3.1407525539398193, + "learning_rate": 8.400201005025125e-06, + "loss": 0.0446, + "step": 16425 + }, + { + "epoch": 11.05, + "grad_norm": 2.7093505859375, + "learning_rate": 8.397688442211056e-06, + "loss": 0.0449, + "step": 16450 + }, + { + "epoch": 11.06, + "grad_norm": 2.744410753250122, + "learning_rate": 8.395175879396986e-06, + "loss": 0.0462, + "step": 16475 + }, + { + "epoch": 11.08, + "grad_norm": 2.8199570178985596, + "learning_rate": 8.392663316582915e-06, + "loss": 0.0461, + "step": 16500 + }, + { + "epoch": 11.1, + "grad_norm": 2.78971529006958, + "learning_rate": 8.390150753768846e-06, + "loss": 0.047, + "step": 16525 + }, + { + "epoch": 11.11, + "grad_norm": 2.914783477783203, + "learning_rate": 8.387638190954774e-06, + "loss": 0.0481, + "step": 16550 + }, + { + "epoch": 11.13, + "grad_norm": 2.591996669769287, + "learning_rate": 8.385125628140705e-06, + "loss": 0.046, + "step": 16575 + }, + { + "epoch": 11.15, + "grad_norm": 3.0212557315826416, + "learning_rate": 8.382613065326634e-06, + "loss": 0.0477, + "step": 16600 + }, + { + "epoch": 11.17, + "grad_norm": 2.852074384689331, + "learning_rate": 8.380100502512563e-06, + "loss": 0.0468, + "step": 16625 + }, + { + "epoch": 11.18, + "grad_norm": 3.132408857345581, + "learning_rate": 8.377587939698493e-06, + "loss": 0.047, + "step": 16650 + }, + { + "epoch": 11.2, + "grad_norm": 3.148129940032959, + "learning_rate": 8.375075376884424e-06, + "loss": 0.0442, + "step": 16675 + }, + { + "epoch": 11.22, + "grad_norm": 3.1252541542053223, + "learning_rate": 8.372562814070353e-06, + "loss": 0.0454, + "step": 16700 + }, + { + "epoch": 11.23, + "grad_norm": 3.270667552947998, + "learning_rate": 8.370050251256282e-06, + "loss": 0.0477, + "step": 16725 + }, + { + "epoch": 11.25, + "grad_norm": 2.7048935890197754, + "learning_rate": 8.367537688442212e-06, + "loss": 0.0477, + "step": 16750 + }, + { + "epoch": 11.27, + "grad_norm": 3.114800214767456, + "learning_rate": 8.365025125628141e-06, + "loss": 0.0463, + "step": 16775 + }, + { + "epoch": 11.28, + "grad_norm": 3.10003924369812, + "learning_rate": 8.362512562814072e-06, + "loss": 0.0482, + "step": 16800 + }, + { + "epoch": 11.3, + "grad_norm": 3.3660390377044678, + "learning_rate": 8.36e-06, + "loss": 0.0472, + "step": 16825 + }, + { + "epoch": 11.32, + "grad_norm": 3.058326244354248, + "learning_rate": 8.35748743718593e-06, + "loss": 0.0482, + "step": 16850 + }, + { + "epoch": 11.33, + "grad_norm": 2.949298858642578, + "learning_rate": 8.35497487437186e-06, + "loss": 0.0482, + "step": 16875 + }, + { + "epoch": 11.35, + "grad_norm": 3.0453925132751465, + "learning_rate": 8.35246231155779e-06, + "loss": 0.0514, + "step": 16900 + }, + { + "epoch": 11.37, + "grad_norm": 3.445995807647705, + "learning_rate": 8.34994974874372e-06, + "loss": 0.0492, + "step": 16925 + }, + { + "epoch": 11.38, + "grad_norm": 2.9511842727661133, + "learning_rate": 8.34743718592965e-06, + "loss": 0.0492, + "step": 16950 + }, + { + "epoch": 11.4, + "grad_norm": 2.805635452270508, + "learning_rate": 8.344924623115579e-06, + "loss": 0.0479, + "step": 16975 + }, + { + "epoch": 11.42, + "grad_norm": 3.061455726623535, + "learning_rate": 8.342412060301508e-06, + "loss": 0.0466, + "step": 17000 + }, + { + "epoch": 11.42, + "eval_loss": 0.18968722224235535, + "eval_runtime": 529.8154, + "eval_samples_per_second": 2.831, + "eval_steps_per_second": 2.831, + "eval_wer": 24.642380316612627, + "step": 17000 + }, + { + "epoch": 11.43, + "grad_norm": 3.1339917182922363, + "learning_rate": 8.339899497487438e-06, + "loss": 0.051, + "step": 17025 + }, + { + "epoch": 11.45, + "grad_norm": 3.173086404800415, + "learning_rate": 8.337386934673367e-06, + "loss": 0.0489, + "step": 17050 + }, + { + "epoch": 11.47, + "grad_norm": 2.7811384201049805, + "learning_rate": 8.334874371859298e-06, + "loss": 0.0482, + "step": 17075 + }, + { + "epoch": 11.48, + "grad_norm": 3.593419313430786, + "learning_rate": 8.332361809045226e-06, + "loss": 0.0502, + "step": 17100 + }, + { + "epoch": 11.5, + "grad_norm": 3.3394999504089355, + "learning_rate": 8.329849246231157e-06, + "loss": 0.0506, + "step": 17125 + }, + { + "epoch": 11.52, + "grad_norm": 2.9718403816223145, + "learning_rate": 8.327336683417086e-06, + "loss": 0.0496, + "step": 17150 + }, + { + "epoch": 11.53, + "grad_norm": 4.067986965179443, + "learning_rate": 8.324824120603015e-06, + "loss": 0.0489, + "step": 17175 + }, + { + "epoch": 11.55, + "grad_norm": 2.9739620685577393, + "learning_rate": 8.322311557788946e-06, + "loss": 0.0472, + "step": 17200 + }, + { + "epoch": 11.57, + "grad_norm": 3.255797863006592, + "learning_rate": 8.319798994974876e-06, + "loss": 0.0504, + "step": 17225 + }, + { + "epoch": 11.58, + "grad_norm": 3.0710818767547607, + "learning_rate": 8.317286432160805e-06, + "loss": 0.0518, + "step": 17250 + }, + { + "epoch": 11.6, + "grad_norm": 3.5728416442871094, + "learning_rate": 8.314773869346734e-06, + "loss": 0.0502, + "step": 17275 + }, + { + "epoch": 11.62, + "grad_norm": 3.5892174243927, + "learning_rate": 8.312261306532663e-06, + "loss": 0.0517, + "step": 17300 + }, + { + "epoch": 11.64, + "grad_norm": 3.1643269062042236, + "learning_rate": 8.309748743718595e-06, + "loss": 0.0467, + "step": 17325 + }, + { + "epoch": 11.65, + "grad_norm": 3.353294610977173, + "learning_rate": 8.307236180904524e-06, + "loss": 0.0489, + "step": 17350 + }, + { + "epoch": 11.67, + "grad_norm": 3.494023084640503, + "learning_rate": 8.304723618090453e-06, + "loss": 0.0499, + "step": 17375 + }, + { + "epoch": 11.69, + "grad_norm": 3.0589919090270996, + "learning_rate": 8.302211055276382e-06, + "loss": 0.0492, + "step": 17400 + }, + { + "epoch": 11.7, + "grad_norm": 2.7905213832855225, + "learning_rate": 8.299698492462312e-06, + "loss": 0.0503, + "step": 17425 + }, + { + "epoch": 11.72, + "grad_norm": 3.2393035888671875, + "learning_rate": 8.297185929648241e-06, + "loss": 0.0491, + "step": 17450 + }, + { + "epoch": 11.74, + "grad_norm": 3.0723655223846436, + "learning_rate": 8.294673366834172e-06, + "loss": 0.0488, + "step": 17475 + }, + { + "epoch": 11.75, + "grad_norm": 2.898193836212158, + "learning_rate": 8.292160804020101e-06, + "loss": 0.0484, + "step": 17500 + }, + { + "epoch": 11.77, + "grad_norm": 2.987506866455078, + "learning_rate": 8.28964824120603e-06, + "loss": 0.0482, + "step": 17525 + }, + { + "epoch": 11.79, + "grad_norm": 3.154498338699341, + "learning_rate": 8.287135678391962e-06, + "loss": 0.0496, + "step": 17550 + }, + { + "epoch": 11.8, + "grad_norm": 3.053910732269287, + "learning_rate": 8.28462311557789e-06, + "loss": 0.046, + "step": 17575 + }, + { + "epoch": 11.82, + "grad_norm": 2.9024808406829834, + "learning_rate": 8.28211055276382e-06, + "loss": 0.049, + "step": 17600 + }, + { + "epoch": 11.84, + "grad_norm": 3.414426326751709, + "learning_rate": 8.27959798994975e-06, + "loss": 0.0529, + "step": 17625 + }, + { + "epoch": 11.85, + "grad_norm": 2.9988064765930176, + "learning_rate": 8.277085427135679e-06, + "loss": 0.0511, + "step": 17650 + }, + { + "epoch": 11.87, + "grad_norm": 2.9790713787078857, + "learning_rate": 8.274572864321608e-06, + "loss": 0.0498, + "step": 17675 + }, + { + "epoch": 11.89, + "grad_norm": 3.700747489929199, + "learning_rate": 8.272060301507538e-06, + "loss": 0.0503, + "step": 17700 + }, + { + "epoch": 11.9, + "grad_norm": 3.167201519012451, + "learning_rate": 8.269547738693467e-06, + "loss": 0.0524, + "step": 17725 + }, + { + "epoch": 11.92, + "grad_norm": 3.502377510070801, + "learning_rate": 8.267035175879398e-06, + "loss": 0.0492, + "step": 17750 + }, + { + "epoch": 11.94, + "grad_norm": 3.5718843936920166, + "learning_rate": 8.264522613065327e-06, + "loss": 0.0497, + "step": 17775 + }, + { + "epoch": 11.95, + "grad_norm": 3.4213879108428955, + "learning_rate": 8.262010050251257e-06, + "loss": 0.0483, + "step": 17800 + }, + { + "epoch": 11.97, + "grad_norm": 3.0552310943603516, + "learning_rate": 8.259497487437188e-06, + "loss": 0.0475, + "step": 17825 + }, + { + "epoch": 11.99, + "grad_norm": 3.108612537384033, + "learning_rate": 8.256984924623115e-06, + "loss": 0.0499, + "step": 17850 + }, + { + "epoch": 12.0, + "grad_norm": 3.0540547370910645, + "learning_rate": 8.254472361809046e-06, + "loss": 0.0489, + "step": 17875 + }, + { + "epoch": 12.02, + "grad_norm": 2.9247024059295654, + "learning_rate": 8.251959798994976e-06, + "loss": 0.0381, + "step": 17900 + }, + { + "epoch": 12.04, + "grad_norm": 2.979076862335205, + "learning_rate": 8.249447236180905e-06, + "loss": 0.0396, + "step": 17925 + }, + { + "epoch": 12.06, + "grad_norm": 3.075889825820923, + "learning_rate": 8.246934673366836e-06, + "loss": 0.0393, + "step": 17950 + }, + { + "epoch": 12.07, + "grad_norm": 2.895512580871582, + "learning_rate": 8.244422110552764e-06, + "loss": 0.0375, + "step": 17975 + }, + { + "epoch": 12.09, + "grad_norm": 2.6981096267700195, + "learning_rate": 8.241909547738695e-06, + "loss": 0.0376, + "step": 18000 + }, + { + "epoch": 12.09, + "eval_loss": 0.1978389322757721, + "eval_runtime": 527.2678, + "eval_samples_per_second": 2.845, + "eval_steps_per_second": 2.845, + "eval_wer": 25.233644859813083, + "step": 18000 + }, + { + "epoch": 12.11, + "grad_norm": 2.6519410610198975, + "learning_rate": 8.239396984924624e-06, + "loss": 0.0384, + "step": 18025 + }, + { + "epoch": 12.12, + "grad_norm": 2.8268957138061523, + "learning_rate": 8.236884422110553e-06, + "loss": 0.038, + "step": 18050 + }, + { + "epoch": 12.14, + "grad_norm": 3.3254096508026123, + "learning_rate": 8.234371859296483e-06, + "loss": 0.0388, + "step": 18075 + }, + { + "epoch": 12.16, + "grad_norm": 2.7252891063690186, + "learning_rate": 8.231859296482414e-06, + "loss": 0.0382, + "step": 18100 + }, + { + "epoch": 12.17, + "grad_norm": 2.273592233657837, + "learning_rate": 8.229346733668341e-06, + "loss": 0.038, + "step": 18125 + }, + { + "epoch": 12.19, + "grad_norm": 2.82323956489563, + "learning_rate": 8.226834170854272e-06, + "loss": 0.0388, + "step": 18150 + }, + { + "epoch": 12.21, + "grad_norm": 2.8901491165161133, + "learning_rate": 8.224321608040202e-06, + "loss": 0.0374, + "step": 18175 + }, + { + "epoch": 12.22, + "grad_norm": 2.679884672164917, + "learning_rate": 8.221809045226131e-06, + "loss": 0.0392, + "step": 18200 + }, + { + "epoch": 12.24, + "grad_norm": 2.8876633644104004, + "learning_rate": 8.219296482412062e-06, + "loss": 0.036, + "step": 18225 + }, + { + "epoch": 12.26, + "grad_norm": 2.3431971073150635, + "learning_rate": 8.21678391959799e-06, + "loss": 0.0389, + "step": 18250 + }, + { + "epoch": 12.27, + "grad_norm": 3.0814380645751953, + "learning_rate": 8.21427135678392e-06, + "loss": 0.04, + "step": 18275 + }, + { + "epoch": 12.29, + "grad_norm": 3.1290934085845947, + "learning_rate": 8.21175879396985e-06, + "loss": 0.0418, + "step": 18300 + }, + { + "epoch": 12.31, + "grad_norm": 3.116100549697876, + "learning_rate": 8.20924623115578e-06, + "loss": 0.038, + "step": 18325 + }, + { + "epoch": 12.32, + "grad_norm": 2.7955732345581055, + "learning_rate": 8.206733668341709e-06, + "loss": 0.0384, + "step": 18350 + }, + { + "epoch": 12.34, + "grad_norm": 3.000404119491577, + "learning_rate": 8.204321608040202e-06, + "loss": 0.041, + "step": 18375 + }, + { + "epoch": 12.36, + "grad_norm": 3.2639877796173096, + "learning_rate": 8.201809045226131e-06, + "loss": 0.0405, + "step": 18400 + }, + { + "epoch": 12.37, + "grad_norm": 3.2814202308654785, + "learning_rate": 8.19929648241206e-06, + "loss": 0.0401, + "step": 18425 + }, + { + "epoch": 12.39, + "grad_norm": 3.4487671852111816, + "learning_rate": 8.19678391959799e-06, + "loss": 0.0408, + "step": 18450 + }, + { + "epoch": 12.41, + "grad_norm": 3.642317771911621, + "learning_rate": 8.194271356783921e-06, + "loss": 0.0388, + "step": 18475 + }, + { + "epoch": 12.42, + "grad_norm": 2.9732296466827393, + "learning_rate": 8.19175879396985e-06, + "loss": 0.0407, + "step": 18500 + }, + { + "epoch": 12.44, + "grad_norm": 2.721803665161133, + "learning_rate": 8.18924623115578e-06, + "loss": 0.0396, + "step": 18525 + }, + { + "epoch": 12.46, + "grad_norm": 3.185227155685425, + "learning_rate": 8.186733668341709e-06, + "loss": 0.0411, + "step": 18550 + }, + { + "epoch": 12.47, + "grad_norm": 3.0955049991607666, + "learning_rate": 8.184221105527638e-06, + "loss": 0.0393, + "step": 18575 + }, + { + "epoch": 12.49, + "grad_norm": 3.548126220703125, + "learning_rate": 8.18170854271357e-06, + "loss": 0.0408, + "step": 18600 + }, + { + "epoch": 12.51, + "grad_norm": 3.2723681926727295, + "learning_rate": 8.179195979899498e-06, + "loss": 0.0399, + "step": 18625 + }, + { + "epoch": 12.53, + "grad_norm": 3.2339518070220947, + "learning_rate": 8.176683417085428e-06, + "loss": 0.0393, + "step": 18650 + }, + { + "epoch": 12.54, + "grad_norm": 2.612379312515259, + "learning_rate": 8.174170854271357e-06, + "loss": 0.0397, + "step": 18675 + }, + { + "epoch": 12.56, + "grad_norm": 3.353308916091919, + "learning_rate": 8.171658291457286e-06, + "loss": 0.0386, + "step": 18700 + }, + { + "epoch": 12.58, + "grad_norm": 3.403552770614624, + "learning_rate": 8.169145728643216e-06, + "loss": 0.0374, + "step": 18725 + }, + { + "epoch": 12.59, + "grad_norm": 3.065211772918701, + "learning_rate": 8.166633165829147e-06, + "loss": 0.0409, + "step": 18750 + }, + { + "epoch": 12.61, + "grad_norm": 3.414477825164795, + "learning_rate": 8.164120603015076e-06, + "loss": 0.043, + "step": 18775 + }, + { + "epoch": 12.63, + "grad_norm": 3.2342004776000977, + "learning_rate": 8.161608040201005e-06, + "loss": 0.0414, + "step": 18800 + }, + { + "epoch": 12.64, + "grad_norm": 2.7432329654693604, + "learning_rate": 8.159095477386936e-06, + "loss": 0.0419, + "step": 18825 + }, + { + "epoch": 12.66, + "grad_norm": 4.053291320800781, + "learning_rate": 8.156582914572864e-06, + "loss": 0.0398, + "step": 18850 + }, + { + "epoch": 12.68, + "grad_norm": 3.6283562183380127, + "learning_rate": 8.154070351758795e-06, + "loss": 0.0418, + "step": 18875 + }, + { + "epoch": 12.69, + "grad_norm": 3.271876811981201, + "learning_rate": 8.151557788944724e-06, + "loss": 0.0406, + "step": 18900 + }, + { + "epoch": 12.71, + "grad_norm": 3.3353631496429443, + "learning_rate": 8.149045226130654e-06, + "loss": 0.041, + "step": 18925 + }, + { + "epoch": 12.73, + "grad_norm": 2.927227735519409, + "learning_rate": 8.146532663316583e-06, + "loss": 0.0415, + "step": 18950 + }, + { + "epoch": 12.74, + "grad_norm": 3.3876254558563232, + "learning_rate": 8.144020100502512e-06, + "loss": 0.042, + "step": 18975 + }, + { + "epoch": 12.76, + "grad_norm": 3.018205165863037, + "learning_rate": 8.141507537688443e-06, + "loss": 0.0406, + "step": 19000 + }, + { + "epoch": 12.76, + "eval_loss": 0.20187248289585114, + "eval_runtime": 526.9269, + "eval_samples_per_second": 2.847, + "eval_steps_per_second": 2.847, + "eval_wer": 25.316294742195943, + "step": 19000 + }, + { + "epoch": 12.78, + "grad_norm": 3.279440402984619, + "learning_rate": 8.138994974874373e-06, + "loss": 0.0397, + "step": 19025 + }, + { + "epoch": 12.79, + "grad_norm": 2.9568395614624023, + "learning_rate": 8.136482412060302e-06, + "loss": 0.0411, + "step": 19050 + }, + { + "epoch": 12.81, + "grad_norm": 3.1550333499908447, + "learning_rate": 8.133969849246231e-06, + "loss": 0.0432, + "step": 19075 + }, + { + "epoch": 12.83, + "grad_norm": 3.4548349380493164, + "learning_rate": 8.131457286432162e-06, + "loss": 0.0407, + "step": 19100 + }, + { + "epoch": 12.84, + "grad_norm": 3.5642495155334473, + "learning_rate": 8.12894472361809e-06, + "loss": 0.0421, + "step": 19125 + }, + { + "epoch": 12.86, + "grad_norm": 2.8931829929351807, + "learning_rate": 8.126432160804021e-06, + "loss": 0.042, + "step": 19150 + }, + { + "epoch": 12.88, + "grad_norm": 3.754613161087036, + "learning_rate": 8.12391959798995e-06, + "loss": 0.0404, + "step": 19175 + }, + { + "epoch": 12.89, + "grad_norm": 3.0954208374023438, + "learning_rate": 8.12140703517588e-06, + "loss": 0.0391, + "step": 19200 + }, + { + "epoch": 12.91, + "grad_norm": 3.1250925064086914, + "learning_rate": 8.11889447236181e-06, + "loss": 0.0421, + "step": 19225 + }, + { + "epoch": 12.93, + "grad_norm": 3.2028305530548096, + "learning_rate": 8.11638190954774e-06, + "loss": 0.0407, + "step": 19250 + }, + { + "epoch": 12.94, + "grad_norm": 3.48286509513855, + "learning_rate": 8.11386934673367e-06, + "loss": 0.043, + "step": 19275 + }, + { + "epoch": 12.96, + "grad_norm": 3.6675736904144287, + "learning_rate": 8.111356783919599e-06, + "loss": 0.0426, + "step": 19300 + }, + { + "epoch": 12.98, + "grad_norm": 2.9302334785461426, + "learning_rate": 8.108844221105528e-06, + "loss": 0.0432, + "step": 19325 + }, + { + "epoch": 13.0, + "grad_norm": 3.1114678382873535, + "learning_rate": 8.106331658291457e-06, + "loss": 0.0418, + "step": 19350 + }, + { + "epoch": 13.01, + "grad_norm": 2.9001448154449463, + "learning_rate": 8.103819095477388e-06, + "loss": 0.0343, + "step": 19375 + }, + { + "epoch": 13.03, + "grad_norm": 3.0393106937408447, + "learning_rate": 8.101306532663318e-06, + "loss": 0.0311, + "step": 19400 + }, + { + "epoch": 13.05, + "grad_norm": 2.531508445739746, + "learning_rate": 8.098793969849247e-06, + "loss": 0.0304, + "step": 19425 + }, + { + "epoch": 13.06, + "grad_norm": 2.6556875705718994, + "learning_rate": 8.096281407035176e-06, + "loss": 0.0317, + "step": 19450 + }, + { + "epoch": 13.08, + "grad_norm": 3.10502552986145, + "learning_rate": 8.093768844221106e-06, + "loss": 0.0309, + "step": 19475 + }, + { + "epoch": 13.1, + "grad_norm": 2.639723300933838, + "learning_rate": 8.091256281407037e-06, + "loss": 0.0303, + "step": 19500 + }, + { + "epoch": 13.11, + "grad_norm": 2.8939049243927, + "learning_rate": 8.088743718592966e-06, + "loss": 0.0323, + "step": 19525 + }, + { + "epoch": 13.13, + "grad_norm": 3.07916522026062, + "learning_rate": 8.086231155778895e-06, + "loss": 0.0314, + "step": 19550 + }, + { + "epoch": 13.15, + "grad_norm": 2.5476186275482178, + "learning_rate": 8.083718592964825e-06, + "loss": 0.0311, + "step": 19575 + }, + { + "epoch": 13.16, + "grad_norm": 2.825096845626831, + "learning_rate": 8.081206030150754e-06, + "loss": 0.0307, + "step": 19600 + }, + { + "epoch": 13.18, + "grad_norm": 2.5810999870300293, + "learning_rate": 8.078693467336685e-06, + "loss": 0.0315, + "step": 19625 + }, + { + "epoch": 13.2, + "grad_norm": 3.012585401535034, + "learning_rate": 8.076180904522614e-06, + "loss": 0.0318, + "step": 19650 + }, + { + "epoch": 13.21, + "grad_norm": 2.7976012229919434, + "learning_rate": 8.073668341708544e-06, + "loss": 0.0302, + "step": 19675 + }, + { + "epoch": 13.23, + "grad_norm": 2.5770974159240723, + "learning_rate": 8.071155778894473e-06, + "loss": 0.0304, + "step": 19700 + }, + { + "epoch": 13.25, + "grad_norm": 2.689742088317871, + "learning_rate": 8.068643216080402e-06, + "loss": 0.031, + "step": 19725 + }, + { + "epoch": 13.26, + "grad_norm": 2.8981127738952637, + "learning_rate": 8.066130653266332e-06, + "loss": 0.0319, + "step": 19750 + }, + { + "epoch": 13.28, + "grad_norm": 2.9146578311920166, + "learning_rate": 8.063618090452263e-06, + "loss": 0.0318, + "step": 19775 + }, + { + "epoch": 13.3, + "grad_norm": 2.2684590816497803, + "learning_rate": 8.061105527638192e-06, + "loss": 0.0306, + "step": 19800 + }, + { + "epoch": 13.31, + "grad_norm": 2.7601232528686523, + "learning_rate": 8.058592964824121e-06, + "loss": 0.0315, + "step": 19825 + }, + { + "epoch": 13.33, + "grad_norm": 2.9197874069213867, + "learning_rate": 8.05608040201005e-06, + "loss": 0.0325, + "step": 19850 + }, + { + "epoch": 13.35, + "grad_norm": 3.2532525062561035, + "learning_rate": 8.05356783919598e-06, + "loss": 0.0321, + "step": 19875 + }, + { + "epoch": 13.36, + "grad_norm": 2.7587943077087402, + "learning_rate": 8.051055276381911e-06, + "loss": 0.0306, + "step": 19900 + }, + { + "epoch": 13.38, + "grad_norm": 2.950467586517334, + "learning_rate": 8.04854271356784e-06, + "loss": 0.0323, + "step": 19925 + }, + { + "epoch": 13.4, + "grad_norm": 2.9366438388824463, + "learning_rate": 8.04603015075377e-06, + "loss": 0.0339, + "step": 19950 + }, + { + "epoch": 13.42, + "grad_norm": 3.4724960327148438, + "learning_rate": 8.043517587939699e-06, + "loss": 0.0348, + "step": 19975 + }, + { + "epoch": 13.43, + "grad_norm": 2.8667757511138916, + "learning_rate": 8.041005025125628e-06, + "loss": 0.033, + "step": 20000 + }, + { + "epoch": 13.43, + "eval_loss": 0.21555839478969574, + "eval_runtime": 528.0342, + "eval_samples_per_second": 2.841, + "eval_steps_per_second": 2.841, + "eval_wer": 25.265433276114184, + "step": 20000 + }, + { + "epoch": 13.45, + "grad_norm": 3.216893196105957, + "learning_rate": 8.03849246231156e-06, + "loss": 0.0318, + "step": 20025 + }, + { + "epoch": 13.47, + "grad_norm": 2.641538619995117, + "learning_rate": 8.035979899497489e-06, + "loss": 0.0326, + "step": 20050 + }, + { + "epoch": 13.48, + "grad_norm": 2.9073219299316406, + "learning_rate": 8.033467336683418e-06, + "loss": 0.0337, + "step": 20075 + }, + { + "epoch": 13.5, + "grad_norm": 3.219444990158081, + "learning_rate": 8.030954773869347e-06, + "loss": 0.0349, + "step": 20100 + }, + { + "epoch": 13.52, + "grad_norm": 3.1302082538604736, + "learning_rate": 8.028442211055277e-06, + "loss": 0.0316, + "step": 20125 + }, + { + "epoch": 13.53, + "grad_norm": 2.8819210529327393, + "learning_rate": 8.025929648241206e-06, + "loss": 0.034, + "step": 20150 + }, + { + "epoch": 13.55, + "grad_norm": 2.68033504486084, + "learning_rate": 8.023417085427137e-06, + "loss": 0.0328, + "step": 20175 + }, + { + "epoch": 13.57, + "grad_norm": 2.9281654357910156, + "learning_rate": 8.020904522613066e-06, + "loss": 0.0338, + "step": 20200 + }, + { + "epoch": 13.58, + "grad_norm": 3.1869583129882812, + "learning_rate": 8.018391959798996e-06, + "loss": 0.0337, + "step": 20225 + }, + { + "epoch": 13.6, + "grad_norm": 3.3346383571624756, + "learning_rate": 8.015879396984927e-06, + "loss": 0.0333, + "step": 20250 + }, + { + "epoch": 13.62, + "grad_norm": 3.4311933517456055, + "learning_rate": 8.013366834170854e-06, + "loss": 0.0349, + "step": 20275 + }, + { + "epoch": 13.63, + "grad_norm": 2.5445518493652344, + "learning_rate": 8.010854271356785e-06, + "loss": 0.0328, + "step": 20300 + }, + { + "epoch": 13.65, + "grad_norm": 3.098491907119751, + "learning_rate": 8.008341708542714e-06, + "loss": 0.0346, + "step": 20325 + }, + { + "epoch": 13.67, + "grad_norm": 3.2504217624664307, + "learning_rate": 8.005829145728644e-06, + "loss": 0.0347, + "step": 20350 + }, + { + "epoch": 13.68, + "grad_norm": 2.842876672744751, + "learning_rate": 8.003417085427137e-06, + "loss": 0.0359, + "step": 20375 + }, + { + "epoch": 13.7, + "grad_norm": 3.1873013973236084, + "learning_rate": 8.000904522613065e-06, + "loss": 0.0336, + "step": 20400 + }, + { + "epoch": 13.72, + "grad_norm": 3.3582944869995117, + "learning_rate": 7.998391959798996e-06, + "loss": 0.0361, + "step": 20425 + }, + { + "epoch": 13.73, + "grad_norm": 3.2533841133117676, + "learning_rate": 7.995879396984925e-06, + "loss": 0.0338, + "step": 20450 + }, + { + "epoch": 13.75, + "grad_norm": 3.1147780418395996, + "learning_rate": 7.993366834170854e-06, + "loss": 0.0343, + "step": 20475 + }, + { + "epoch": 13.77, + "grad_norm": 3.2679054737091064, + "learning_rate": 7.990854271356785e-06, + "loss": 0.0327, + "step": 20500 + }, + { + "epoch": 13.78, + "grad_norm": 2.788322925567627, + "learning_rate": 7.988341708542715e-06, + "loss": 0.0331, + "step": 20525 + }, + { + "epoch": 13.8, + "grad_norm": 3.6216132640838623, + "learning_rate": 7.985829145728644e-06, + "loss": 0.0349, + "step": 20550 + }, + { + "epoch": 13.82, + "grad_norm": 3.1277170181274414, + "learning_rate": 7.983316582914573e-06, + "loss": 0.0322, + "step": 20575 + }, + { + "epoch": 13.83, + "grad_norm": 2.9005231857299805, + "learning_rate": 7.980804020100503e-06, + "loss": 0.0349, + "step": 20600 + }, + { + "epoch": 13.85, + "grad_norm": 3.0568697452545166, + "learning_rate": 7.978291457286432e-06, + "loss": 0.0352, + "step": 20625 + }, + { + "epoch": 13.87, + "grad_norm": 3.529184103012085, + "learning_rate": 7.975778894472363e-06, + "loss": 0.0317, + "step": 20650 + }, + { + "epoch": 13.89, + "grad_norm": 3.288602352142334, + "learning_rate": 7.973266331658292e-06, + "loss": 0.0338, + "step": 20675 + }, + { + "epoch": 13.9, + "grad_norm": 2.9690539836883545, + "learning_rate": 7.970753768844222e-06, + "loss": 0.0339, + "step": 20700 + }, + { + "epoch": 13.92, + "grad_norm": 2.77999210357666, + "learning_rate": 7.968241206030151e-06, + "loss": 0.0325, + "step": 20725 + }, + { + "epoch": 13.94, + "grad_norm": 2.7282400131225586, + "learning_rate": 7.96572864321608e-06, + "loss": 0.0303, + "step": 20750 + }, + { + "epoch": 13.95, + "grad_norm": 3.0671041011810303, + "learning_rate": 7.963216080402011e-06, + "loss": 0.0344, + "step": 20775 + }, + { + "epoch": 13.97, + "grad_norm": 3.3215699195861816, + "learning_rate": 7.96070351758794e-06, + "loss": 0.0341, + "step": 20800 + }, + { + "epoch": 13.99, + "grad_norm": 2.7825751304626465, + "learning_rate": 7.95819095477387e-06, + "loss": 0.0338, + "step": 20825 + }, + { + "epoch": 14.0, + "grad_norm": 2.2416014671325684, + "learning_rate": 7.955678391959801e-06, + "loss": 0.0332, + "step": 20850 + }, + { + "epoch": 14.02, + "grad_norm": 2.198612689971924, + "learning_rate": 7.953165829145729e-06, + "loss": 0.0255, + "step": 20875 + }, + { + "epoch": 14.04, + "grad_norm": 3.1882541179656982, + "learning_rate": 7.95065326633166e-06, + "loss": 0.0238, + "step": 20900 + }, + { + "epoch": 14.05, + "grad_norm": 2.464125633239746, + "learning_rate": 7.948140703517589e-06, + "loss": 0.0256, + "step": 20925 + }, + { + "epoch": 14.07, + "grad_norm": 2.613816261291504, + "learning_rate": 7.945628140703518e-06, + "loss": 0.0261, + "step": 20950 + }, + { + "epoch": 14.09, + "grad_norm": 3.0007505416870117, + "learning_rate": 7.943115577889448e-06, + "loss": 0.0258, + "step": 20975 + }, + { + "epoch": 14.1, + "grad_norm": 2.689883232116699, + "learning_rate": 7.940603015075377e-06, + "loss": 0.0247, + "step": 21000 + }, + { + "epoch": 14.1, + "eval_loss": 0.21880781650543213, + "eval_runtime": 530.071, + "eval_samples_per_second": 2.83, + "eval_steps_per_second": 2.83, + "eval_wer": 24.858541547460106, + "step": 21000 + }, + { + "epoch": 14.12, + "grad_norm": 3.0284547805786133, + "learning_rate": 7.938090452261306e-06, + "loss": 0.0264, + "step": 21025 + }, + { + "epoch": 14.14, + "grad_norm": 2.7010715007781982, + "learning_rate": 7.935577889447237e-06, + "loss": 0.0252, + "step": 21050 + }, + { + "epoch": 14.15, + "grad_norm": 2.5919244289398193, + "learning_rate": 7.933065326633167e-06, + "loss": 0.0246, + "step": 21075 + }, + { + "epoch": 14.17, + "grad_norm": 2.3864364624023438, + "learning_rate": 7.930552763819096e-06, + "loss": 0.0255, + "step": 21100 + }, + { + "epoch": 14.19, + "grad_norm": 1.9557113647460938, + "learning_rate": 7.928040201005027e-06, + "loss": 0.0257, + "step": 21125 + }, + { + "epoch": 14.2, + "grad_norm": 2.4666495323181152, + "learning_rate": 7.925527638190955e-06, + "loss": 0.0247, + "step": 21150 + }, + { + "epoch": 14.22, + "grad_norm": 3.078350782394409, + "learning_rate": 7.923015075376886e-06, + "loss": 0.0249, + "step": 21175 + }, + { + "epoch": 14.24, + "grad_norm": 2.8025596141815186, + "learning_rate": 7.920502512562815e-06, + "loss": 0.0242, + "step": 21200 + }, + { + "epoch": 14.25, + "grad_norm": 2.336594820022583, + "learning_rate": 7.917989949748744e-06, + "loss": 0.0262, + "step": 21225 + }, + { + "epoch": 14.27, + "grad_norm": 2.3702356815338135, + "learning_rate": 7.915477386934674e-06, + "loss": 0.0259, + "step": 21250 + }, + { + "epoch": 14.29, + "grad_norm": 3.149120807647705, + "learning_rate": 7.912964824120603e-06, + "loss": 0.0262, + "step": 21275 + }, + { + "epoch": 14.3, + "grad_norm": 2.343489408493042, + "learning_rate": 7.910452261306534e-06, + "loss": 0.026, + "step": 21300 + }, + { + "epoch": 14.32, + "grad_norm": 2.536985158920288, + "learning_rate": 7.907939698492463e-06, + "loss": 0.0244, + "step": 21325 + }, + { + "epoch": 14.34, + "grad_norm": 2.3428032398223877, + "learning_rate": 7.905427135678393e-06, + "loss": 0.0257, + "step": 21350 + }, + { + "epoch": 14.36, + "grad_norm": 2.834862470626831, + "learning_rate": 7.902914572864322e-06, + "loss": 0.0266, + "step": 21375 + }, + { + "epoch": 14.37, + "grad_norm": 2.620488405227661, + "learning_rate": 7.900402010050253e-06, + "loss": 0.0266, + "step": 21400 + }, + { + "epoch": 14.39, + "grad_norm": 2.644897699356079, + "learning_rate": 7.89788944723618e-06, + "loss": 0.0262, + "step": 21425 + }, + { + "epoch": 14.41, + "grad_norm": 2.836057662963867, + "learning_rate": 7.895376884422111e-06, + "loss": 0.0267, + "step": 21450 + }, + { + "epoch": 14.42, + "grad_norm": 2.670128345489502, + "learning_rate": 7.89286432160804e-06, + "loss": 0.0262, + "step": 21475 + }, + { + "epoch": 14.44, + "grad_norm": 2.603405475616455, + "learning_rate": 7.89035175879397e-06, + "loss": 0.0259, + "step": 21500 + }, + { + "epoch": 14.46, + "grad_norm": 2.957242488861084, + "learning_rate": 7.887839195979901e-06, + "loss": 0.0274, + "step": 21525 + }, + { + "epoch": 14.47, + "grad_norm": 2.787645101547241, + "learning_rate": 7.885326633165829e-06, + "loss": 0.0268, + "step": 21550 + }, + { + "epoch": 14.49, + "grad_norm": 2.7353100776672363, + "learning_rate": 7.88281407035176e-06, + "loss": 0.0252, + "step": 21575 + }, + { + "epoch": 14.51, + "grad_norm": 2.630357265472412, + "learning_rate": 7.880301507537689e-06, + "loss": 0.0262, + "step": 21600 + }, + { + "epoch": 14.52, + "grad_norm": 2.316859483718872, + "learning_rate": 7.877788944723618e-06, + "loss": 0.0297, + "step": 21625 + }, + { + "epoch": 14.54, + "grad_norm": 3.4617390632629395, + "learning_rate": 7.875276381909548e-06, + "loss": 0.0277, + "step": 21650 + }, + { + "epoch": 14.56, + "grad_norm": 2.5444860458374023, + "learning_rate": 7.872763819095479e-06, + "loss": 0.0272, + "step": 21675 + }, + { + "epoch": 14.57, + "grad_norm": 3.038832187652588, + "learning_rate": 7.870251256281408e-06, + "loss": 0.0263, + "step": 21700 + }, + { + "epoch": 14.59, + "grad_norm": 2.8435111045837402, + "learning_rate": 7.867738693467337e-06, + "loss": 0.0268, + "step": 21725 + }, + { + "epoch": 14.61, + "grad_norm": 2.7067975997924805, + "learning_rate": 7.865226130653267e-06, + "loss": 0.0258, + "step": 21750 + }, + { + "epoch": 14.62, + "grad_norm": 2.7530405521392822, + "learning_rate": 7.862713567839196e-06, + "loss": 0.027, + "step": 21775 + }, + { + "epoch": 14.64, + "grad_norm": 3.0732839107513428, + "learning_rate": 7.860201005025127e-06, + "loss": 0.0272, + "step": 21800 + }, + { + "epoch": 14.66, + "grad_norm": 2.4972004890441895, + "learning_rate": 7.857688442211055e-06, + "loss": 0.0273, + "step": 21825 + }, + { + "epoch": 14.67, + "grad_norm": 2.874180316925049, + "learning_rate": 7.855175879396986e-06, + "loss": 0.028, + "step": 21850 + }, + { + "epoch": 14.69, + "grad_norm": 2.939277172088623, + "learning_rate": 7.852663316582915e-06, + "loss": 0.0273, + "step": 21875 + }, + { + "epoch": 14.71, + "grad_norm": 2.6884384155273438, + "learning_rate": 7.850150753768844e-06, + "loss": 0.0269, + "step": 21900 + }, + { + "epoch": 14.72, + "grad_norm": 2.766014337539673, + "learning_rate": 7.847638190954775e-06, + "loss": 0.0268, + "step": 21925 + }, + { + "epoch": 14.74, + "grad_norm": 3.3808324337005615, + "learning_rate": 7.845125628140705e-06, + "loss": 0.0285, + "step": 21950 + }, + { + "epoch": 14.76, + "grad_norm": 2.250244617462158, + "learning_rate": 7.842613065326634e-06, + "loss": 0.0268, + "step": 21975 + }, + { + "epoch": 14.78, + "grad_norm": 2.8447093963623047, + "learning_rate": 7.840100502512563e-06, + "loss": 0.0265, + "step": 22000 + }, + { + "epoch": 14.78, + "eval_loss": 0.22979824244976044, + "eval_runtime": 526.9417, + "eval_samples_per_second": 2.847, + "eval_steps_per_second": 2.847, + "eval_wer": 25.443448407400343, + "step": 22000 + } + ], + "logging_steps": 25, + "max_steps": 100000, + "num_input_tokens_seen": 0, + "num_train_epochs": 68, + "save_steps": 1000, + "total_flos": 3.465787561869312e+19, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/whisper-tiny/bhojpuri/checkpoint-22000/training_args.bin b/checkpoints/whisper-tiny/bhojpuri/checkpoint-22000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..5987a45158b1f635227ea1a972f8b3e48ce989fc --- /dev/null +++ b/checkpoints/whisper-tiny/bhojpuri/checkpoint-22000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80e2f578417e87f8c4d1fff61970ede76888d9cfc1a6f1d8039d2f7e06588245 +size 4667 diff --git a/checkpoints/whisper-tiny/chattisgarhi/checkpoint-18000/config.json b/checkpoints/whisper-tiny/chattisgarhi/checkpoint-18000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c47e7ae5f6c65847b8952aa0e827c7f13a489891 --- /dev/null +++ b/checkpoints/whisper-tiny/chattisgarhi/checkpoint-18000/config.json @@ -0,0 +1,154 @@ +{ + "_name_or_path": "openai/whisper-tiny", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 384, + "decoder_attention_heads": 6, + "decoder_ffn_dim": 1536, + "decoder_layerdrop": 0.0, + "decoder_layers": 4, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 6, + "encoder_ffn_dim": 1536, + "encoder_layerdrop": 0.0, + "encoder_layers": 4, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50276 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 4, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "torch_dtype": "float32", + "transformers_version": "4.39.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoints/whisper-tiny/chattisgarhi/checkpoint-18000/generation_config.json b/checkpoints/whisper-tiny/chattisgarhi/checkpoint-18000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c4857895fba6cdefb862460b5d33969e1892aa71 --- /dev/null +++ b/checkpoints/whisper-tiny/chattisgarhi/checkpoint-18000/generation_config.json @@ -0,0 +1,248 @@ +{ + "alignment_heads": [ + [ + 2, + 2 + ], + [ + 3, + 0 + ], + [ + 3, + 2 + ], + [ + 3, + 3 + ], + [ + 3, + 4 + ], + [ + 3, + 5 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.39.2" +} diff --git a/checkpoints/whisper-tiny/chattisgarhi/checkpoint-18000/model.safetensors b/checkpoints/whisper-tiny/chattisgarhi/checkpoint-18000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1443063035d622521ec75bb3f1e41dfacae29699 --- /dev/null +++ b/checkpoints/whisper-tiny/chattisgarhi/checkpoint-18000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb241ca5a55d49d92ff2f24ee42f4b155f18e06521f6ef8492d97d53428eb485 +size 151061672 diff --git a/checkpoints/whisper-tiny/chattisgarhi/checkpoint-18000/optimizer.pt b/checkpoints/whisper-tiny/chattisgarhi/checkpoint-18000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..bdb007cbf903d04a149b95eca67bed1d434c1067 --- /dev/null +++ b/checkpoints/whisper-tiny/chattisgarhi/checkpoint-18000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb717ff787376aee2d16064e8653bdd5a100db306cb5ab75932eac907f6494f9 +size 297615749 diff --git a/checkpoints/whisper-tiny/chattisgarhi/checkpoint-18000/preprocessor_config.json b/checkpoints/whisper-tiny/chattisgarhi/checkpoint-18000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoints/whisper-tiny/chattisgarhi/checkpoint-18000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoints/whisper-tiny/chattisgarhi/checkpoint-18000/rng_state.pth b/checkpoints/whisper-tiny/chattisgarhi/checkpoint-18000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a69bf467cdd7bc4ee9314e4d1c6f4ad11cc456b6 --- /dev/null +++ b/checkpoints/whisper-tiny/chattisgarhi/checkpoint-18000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9c4f4b74e293244b942e94ddce009e9a06fcc324a4c16a37a9df9d35733868a +size 14575 diff --git a/checkpoints/whisper-tiny/chattisgarhi/checkpoint-18000/scheduler.pt b/checkpoints/whisper-tiny/chattisgarhi/checkpoint-18000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..de9c429e9999f66f2a1deb8667a2dbd659fdf3df --- /dev/null +++ b/checkpoints/whisper-tiny/chattisgarhi/checkpoint-18000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d376bb44853fda280719e130a08f368d538fd19cb27ef67e8eef3394f13c1673 +size 627 diff --git a/checkpoints/whisper-tiny/chattisgarhi/checkpoint-18000/trainer_state.json b/checkpoints/whisper-tiny/chattisgarhi/checkpoint-18000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d0d1923d0f334c833f5fa0ff39240394d5961b9f --- /dev/null +++ b/checkpoints/whisper-tiny/chattisgarhi/checkpoint-18000/trainer_state.json @@ -0,0 +1,5223 @@ +{ + "best_metric": 16.70214602151775, + "best_model_checkpoint": "results/whisper-tiny/chattisgarhi/checkpoint-8000", + "epoch": 13.422818791946309, + "eval_steps": 1000, + "global_step": 18000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "grad_norm": 70.02006530761719, + "learning_rate": 4.4e-07, + "loss": 3.6717, + "step": 25 + }, + { + "epoch": 0.04, + "grad_norm": 41.00192642211914, + "learning_rate": 9.200000000000001e-07, + "loss": 3.1345, + "step": 50 + }, + { + "epoch": 0.06, + "grad_norm": 13.001567840576172, + "learning_rate": 1.42e-06, + "loss": 2.3536, + "step": 75 + }, + { + "epoch": 0.07, + "grad_norm": 9.573976516723633, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.8039, + "step": 100 + }, + { + "epoch": 0.09, + "grad_norm": 6.7950944900512695, + "learning_rate": 2.42e-06, + "loss": 1.3672, + "step": 125 + }, + { + "epoch": 0.11, + "grad_norm": 5.863748073577881, + "learning_rate": 2.92e-06, + "loss": 1.0629, + "step": 150 + }, + { + "epoch": 0.13, + "grad_norm": 5.522293567657471, + "learning_rate": 3.4200000000000007e-06, + "loss": 0.8498, + "step": 175 + }, + { + "epoch": 0.15, + "grad_norm": 4.660111427307129, + "learning_rate": 3.920000000000001e-06, + "loss": 0.7196, + "step": 200 + }, + { + "epoch": 0.17, + "grad_norm": 5.2002129554748535, + "learning_rate": 4.42e-06, + "loss": 0.6305, + "step": 225 + }, + { + "epoch": 0.19, + "grad_norm": 4.548714637756348, + "learning_rate": 4.92e-06, + "loss": 0.5627, + "step": 250 + }, + { + "epoch": 0.21, + "grad_norm": 4.328067302703857, + "learning_rate": 5.420000000000001e-06, + "loss": 0.525, + "step": 275 + }, + { + "epoch": 0.22, + "grad_norm": 4.1530680656433105, + "learning_rate": 5.92e-06, + "loss": 0.4879, + "step": 300 + }, + { + "epoch": 0.24, + "grad_norm": 3.9321088790893555, + "learning_rate": 6.42e-06, + "loss": 0.4685, + "step": 325 + }, + { + "epoch": 0.26, + "grad_norm": 4.24140739440918, + "learning_rate": 6.92e-06, + "loss": 0.4307, + "step": 350 + }, + { + "epoch": 0.28, + "grad_norm": 4.857550144195557, + "learning_rate": 7.420000000000001e-06, + "loss": 0.4124, + "step": 375 + }, + { + "epoch": 0.3, + "grad_norm": 3.55281400680542, + "learning_rate": 7.92e-06, + "loss": 0.3937, + "step": 400 + }, + { + "epoch": 0.32, + "grad_norm": 4.189201831817627, + "learning_rate": 8.42e-06, + "loss": 0.3843, + "step": 425 + }, + { + "epoch": 0.34, + "grad_norm": 3.513540267944336, + "learning_rate": 8.920000000000001e-06, + "loss": 0.3733, + "step": 450 + }, + { + "epoch": 0.35, + "grad_norm": 4.552852630615234, + "learning_rate": 9.42e-06, + "loss": 0.3559, + "step": 475 + }, + { + "epoch": 0.37, + "grad_norm": 4.108123302459717, + "learning_rate": 9.920000000000002e-06, + "loss": 0.3418, + "step": 500 + }, + { + "epoch": 0.39, + "grad_norm": 4.031134605407715, + "learning_rate": 9.997889447236182e-06, + "loss": 0.3323, + "step": 525 + }, + { + "epoch": 0.41, + "grad_norm": 4.042705535888672, + "learning_rate": 9.995376884422112e-06, + "loss": 0.3211, + "step": 550 + }, + { + "epoch": 0.43, + "grad_norm": 4.086574077606201, + "learning_rate": 9.992864321608041e-06, + "loss": 0.3162, + "step": 575 + }, + { + "epoch": 0.45, + "grad_norm": 3.878255844116211, + "learning_rate": 9.99035175879397e-06, + "loss": 0.3044, + "step": 600 + }, + { + "epoch": 0.47, + "grad_norm": 4.248219966888428, + "learning_rate": 9.9878391959799e-06, + "loss": 0.3151, + "step": 625 + }, + { + "epoch": 0.48, + "grad_norm": 3.6040706634521484, + "learning_rate": 9.98532663316583e-06, + "loss": 0.2944, + "step": 650 + }, + { + "epoch": 0.5, + "grad_norm": 3.8102729320526123, + "learning_rate": 9.98281407035176e-06, + "loss": 0.2904, + "step": 675 + }, + { + "epoch": 0.52, + "grad_norm": 3.6261520385742188, + "learning_rate": 9.98030150753769e-06, + "loss": 0.2908, + "step": 700 + }, + { + "epoch": 0.54, + "grad_norm": 3.751326084136963, + "learning_rate": 9.977788944723619e-06, + "loss": 0.2751, + "step": 725 + }, + { + "epoch": 0.56, + "grad_norm": 3.732649326324463, + "learning_rate": 9.975276381909548e-06, + "loss": 0.2782, + "step": 750 + }, + { + "epoch": 0.58, + "grad_norm": 3.414696455001831, + "learning_rate": 9.972763819095477e-06, + "loss": 0.2726, + "step": 775 + }, + { + "epoch": 0.6, + "grad_norm": 3.3486568927764893, + "learning_rate": 9.970251256281408e-06, + "loss": 0.2645, + "step": 800 + }, + { + "epoch": 0.62, + "grad_norm": 3.6677911281585693, + "learning_rate": 9.967738693467338e-06, + "loss": 0.2691, + "step": 825 + }, + { + "epoch": 0.63, + "grad_norm": 3.7658097743988037, + "learning_rate": 9.965226130653267e-06, + "loss": 0.2591, + "step": 850 + }, + { + "epoch": 0.65, + "grad_norm": 4.024987697601318, + "learning_rate": 9.962713567839198e-06, + "loss": 0.2618, + "step": 875 + }, + { + "epoch": 0.67, + "grad_norm": 3.4980733394622803, + "learning_rate": 9.960201005025126e-06, + "loss": 0.2523, + "step": 900 + }, + { + "epoch": 0.69, + "grad_norm": 3.7521917819976807, + "learning_rate": 9.957688442211057e-06, + "loss": 0.2486, + "step": 925 + }, + { + "epoch": 0.71, + "grad_norm": 3.7922661304473877, + "learning_rate": 9.955175879396986e-06, + "loss": 0.2473, + "step": 950 + }, + { + "epoch": 0.73, + "grad_norm": 3.1705334186553955, + "learning_rate": 9.952663316582915e-06, + "loss": 0.2427, + "step": 975 + }, + { + "epoch": 0.75, + "grad_norm": 3.846872329711914, + "learning_rate": 9.950150753768845e-06, + "loss": 0.238, + "step": 1000 + }, + { + "epoch": 0.75, + "eval_loss": 0.1972341388463974, + "eval_runtime": 652.5422, + "eval_samples_per_second": 2.165, + "eval_steps_per_second": 2.165, + "eval_wer": 29.63005580806628, + "step": 1000 + }, + { + "epoch": 0.76, + "grad_norm": 3.4909417629241943, + "learning_rate": 9.947638190954774e-06, + "loss": 0.2401, + "step": 1025 + }, + { + "epoch": 0.78, + "grad_norm": 4.181436061859131, + "learning_rate": 9.945125628140703e-06, + "loss": 0.2413, + "step": 1050 + }, + { + "epoch": 0.8, + "grad_norm": 3.644803047180176, + "learning_rate": 9.942613065326634e-06, + "loss": 0.2388, + "step": 1075 + }, + { + "epoch": 0.82, + "grad_norm": 3.2332091331481934, + "learning_rate": 9.940100502512564e-06, + "loss": 0.2359, + "step": 1100 + }, + { + "epoch": 0.84, + "grad_norm": 3.3667097091674805, + "learning_rate": 9.937587939698493e-06, + "loss": 0.2323, + "step": 1125 + }, + { + "epoch": 0.86, + "grad_norm": 3.790893793106079, + "learning_rate": 9.935075376884424e-06, + "loss": 0.2319, + "step": 1150 + }, + { + "epoch": 0.88, + "grad_norm": 4.438441753387451, + "learning_rate": 9.932562814070352e-06, + "loss": 0.2327, + "step": 1175 + }, + { + "epoch": 0.89, + "grad_norm": 3.682677745819092, + "learning_rate": 9.930050251256283e-06, + "loss": 0.2233, + "step": 1200 + }, + { + "epoch": 0.91, + "grad_norm": 3.3926162719726562, + "learning_rate": 9.927537688442212e-06, + "loss": 0.2266, + "step": 1225 + }, + { + "epoch": 0.93, + "grad_norm": 3.2791497707366943, + "learning_rate": 9.925025125628141e-06, + "loss": 0.2211, + "step": 1250 + }, + { + "epoch": 0.95, + "grad_norm": 3.6198647022247314, + "learning_rate": 9.922512562814072e-06, + "loss": 0.2157, + "step": 1275 + }, + { + "epoch": 0.97, + "grad_norm": 3.7074167728424072, + "learning_rate": 9.920000000000002e-06, + "loss": 0.2167, + "step": 1300 + }, + { + "epoch": 0.99, + "grad_norm": 3.6291451454162598, + "learning_rate": 9.917487437185931e-06, + "loss": 0.2168, + "step": 1325 + }, + { + "epoch": 1.01, + "grad_norm": 3.4960734844207764, + "learning_rate": 9.91497487437186e-06, + "loss": 0.2093, + "step": 1350 + }, + { + "epoch": 1.03, + "grad_norm": 3.367835760116577, + "learning_rate": 9.91246231155779e-06, + "loss": 0.2035, + "step": 1375 + }, + { + "epoch": 1.04, + "grad_norm": 3.3607234954833984, + "learning_rate": 9.909949748743719e-06, + "loss": 0.1955, + "step": 1400 + }, + { + "epoch": 1.06, + "grad_norm": 3.3430323600769043, + "learning_rate": 9.90743718592965e-06, + "loss": 0.2008, + "step": 1425 + }, + { + "epoch": 1.08, + "grad_norm": 3.6138670444488525, + "learning_rate": 9.904924623115578e-06, + "loss": 0.1993, + "step": 1450 + }, + { + "epoch": 1.1, + "grad_norm": 3.558518409729004, + "learning_rate": 9.902412060301509e-06, + "loss": 0.1946, + "step": 1475 + }, + { + "epoch": 1.12, + "grad_norm": 3.070223093032837, + "learning_rate": 9.899899497487438e-06, + "loss": 0.1954, + "step": 1500 + }, + { + "epoch": 1.14, + "grad_norm": 3.3455593585968018, + "learning_rate": 9.897386934673367e-06, + "loss": 0.1924, + "step": 1525 + }, + { + "epoch": 1.16, + "grad_norm": 3.395688772201538, + "learning_rate": 9.894874371859298e-06, + "loss": 0.1959, + "step": 1550 + }, + { + "epoch": 1.17, + "grad_norm": 3.3449766635894775, + "learning_rate": 9.892361809045228e-06, + "loss": 0.1931, + "step": 1575 + }, + { + "epoch": 1.19, + "grad_norm": 3.115980863571167, + "learning_rate": 9.889849246231157e-06, + "loss": 0.1948, + "step": 1600 + }, + { + "epoch": 1.21, + "grad_norm": 3.1577816009521484, + "learning_rate": 9.887336683417086e-06, + "loss": 0.1867, + "step": 1625 + }, + { + "epoch": 1.23, + "grad_norm": 3.2870922088623047, + "learning_rate": 9.884824120603015e-06, + "loss": 0.1903, + "step": 1650 + }, + { + "epoch": 1.25, + "grad_norm": 3.1169934272766113, + "learning_rate": 9.882311557788945e-06, + "loss": 0.1848, + "step": 1675 + }, + { + "epoch": 1.27, + "grad_norm": 3.266202688217163, + "learning_rate": 9.879798994974876e-06, + "loss": 0.1883, + "step": 1700 + }, + { + "epoch": 1.29, + "grad_norm": 3.8794736862182617, + "learning_rate": 9.877286432160805e-06, + "loss": 0.1919, + "step": 1725 + }, + { + "epoch": 1.3, + "grad_norm": 3.3005239963531494, + "learning_rate": 9.874773869346734e-06, + "loss": 0.1868, + "step": 1750 + }, + { + "epoch": 1.32, + "grad_norm": 3.5627248287200928, + "learning_rate": 9.872261306532664e-06, + "loss": 0.1855, + "step": 1775 + }, + { + "epoch": 1.34, + "grad_norm": 3.644388437271118, + "learning_rate": 9.869748743718593e-06, + "loss": 0.1799, + "step": 1800 + }, + { + "epoch": 1.36, + "grad_norm": 3.8418381214141846, + "learning_rate": 9.867236180904524e-06, + "loss": 0.1856, + "step": 1825 + }, + { + "epoch": 1.38, + "grad_norm": 3.5702195167541504, + "learning_rate": 9.864723618090453e-06, + "loss": 0.1792, + "step": 1850 + }, + { + "epoch": 1.4, + "grad_norm": 3.221525192260742, + "learning_rate": 9.862211055276383e-06, + "loss": 0.1749, + "step": 1875 + }, + { + "epoch": 1.42, + "grad_norm": 3.606254816055298, + "learning_rate": 9.859698492462312e-06, + "loss": 0.1768, + "step": 1900 + }, + { + "epoch": 1.44, + "grad_norm": 3.580653190612793, + "learning_rate": 9.857185929648241e-06, + "loss": 0.1849, + "step": 1925 + }, + { + "epoch": 1.45, + "grad_norm": 3.0190389156341553, + "learning_rate": 9.854673366834172e-06, + "loss": 0.1785, + "step": 1950 + }, + { + "epoch": 1.47, + "grad_norm": 3.406170129776001, + "learning_rate": 9.852160804020102e-06, + "loss": 0.1819, + "step": 1975 + }, + { + "epoch": 1.49, + "grad_norm": 3.1115918159484863, + "learning_rate": 9.849648241206031e-06, + "loss": 0.1825, + "step": 2000 + }, + { + "epoch": 1.49, + "eval_loss": 0.15438029170036316, + "eval_runtime": 619.7422, + "eval_samples_per_second": 2.28, + "eval_steps_per_second": 2.28, + "eval_wer": 23.704044646453024, + "step": 2000 + }, + { + "epoch": 1.51, + "grad_norm": 3.2072248458862305, + "learning_rate": 9.84713567839196e-06, + "loss": 0.1827, + "step": 2025 + }, + { + "epoch": 1.53, + "grad_norm": 2.6771724224090576, + "learning_rate": 9.84462311557789e-06, + "loss": 0.1731, + "step": 2050 + }, + { + "epoch": 1.55, + "grad_norm": 3.7248549461364746, + "learning_rate": 9.842110552763819e-06, + "loss": 0.1807, + "step": 2075 + }, + { + "epoch": 1.57, + "grad_norm": 3.272280216217041, + "learning_rate": 9.83959798994975e-06, + "loss": 0.1739, + "step": 2100 + }, + { + "epoch": 1.58, + "grad_norm": 3.2716176509857178, + "learning_rate": 9.83708542713568e-06, + "loss": 0.1742, + "step": 2125 + }, + { + "epoch": 1.6, + "grad_norm": 3.574812889099121, + "learning_rate": 9.834572864321609e-06, + "loss": 0.1659, + "step": 2150 + }, + { + "epoch": 1.62, + "grad_norm": 3.4505491256713867, + "learning_rate": 9.832060301507538e-06, + "loss": 0.1724, + "step": 2175 + }, + { + "epoch": 1.64, + "grad_norm": 2.802779197692871, + "learning_rate": 9.829547738693467e-06, + "loss": 0.1703, + "step": 2200 + }, + { + "epoch": 1.66, + "grad_norm": 3.396120071411133, + "learning_rate": 9.827035175879398e-06, + "loss": 0.1733, + "step": 2225 + }, + { + "epoch": 1.68, + "grad_norm": 3.4162375926971436, + "learning_rate": 9.824522613065328e-06, + "loss": 0.1698, + "step": 2250 + }, + { + "epoch": 1.7, + "grad_norm": 3.352968692779541, + "learning_rate": 9.822010050251257e-06, + "loss": 0.1692, + "step": 2275 + }, + { + "epoch": 1.72, + "grad_norm": 2.944345235824585, + "learning_rate": 9.819497487437186e-06, + "loss": 0.1662, + "step": 2300 + }, + { + "epoch": 1.73, + "grad_norm": 3.2152178287506104, + "learning_rate": 9.816984924623116e-06, + "loss": 0.1677, + "step": 2325 + }, + { + "epoch": 1.75, + "grad_norm": 2.8394665718078613, + "learning_rate": 9.814472361809047e-06, + "loss": 0.1659, + "step": 2350 + }, + { + "epoch": 1.77, + "grad_norm": 3.3643667697906494, + "learning_rate": 9.811959798994976e-06, + "loss": 0.1668, + "step": 2375 + }, + { + "epoch": 1.79, + "grad_norm": 3.13779616355896, + "learning_rate": 9.809447236180905e-06, + "loss": 0.1643, + "step": 2400 + }, + { + "epoch": 1.81, + "grad_norm": 3.149639844894409, + "learning_rate": 9.806934673366835e-06, + "loss": 0.1601, + "step": 2425 + }, + { + "epoch": 1.83, + "grad_norm": 3.480457067489624, + "learning_rate": 9.804422110552764e-06, + "loss": 0.1667, + "step": 2450 + }, + { + "epoch": 1.85, + "grad_norm": 3.2492711544036865, + "learning_rate": 9.801909547738693e-06, + "loss": 0.165, + "step": 2475 + }, + { + "epoch": 1.86, + "grad_norm": 3.6878271102905273, + "learning_rate": 9.799396984924624e-06, + "loss": 0.1638, + "step": 2500 + }, + { + "epoch": 1.88, + "grad_norm": 3.0366616249084473, + "learning_rate": 9.796884422110554e-06, + "loss": 0.1621, + "step": 2525 + }, + { + "epoch": 1.9, + "grad_norm": 3.1855359077453613, + "learning_rate": 9.794371859296483e-06, + "loss": 0.1646, + "step": 2550 + }, + { + "epoch": 1.92, + "grad_norm": 2.7991111278533936, + "learning_rate": 9.791859296482414e-06, + "loss": 0.1636, + "step": 2575 + }, + { + "epoch": 1.94, + "grad_norm": 3.3600916862487793, + "learning_rate": 9.789346733668342e-06, + "loss": 0.1611, + "step": 2600 + }, + { + "epoch": 1.96, + "grad_norm": 3.7731375694274902, + "learning_rate": 9.786834170854273e-06, + "loss": 0.1604, + "step": 2625 + }, + { + "epoch": 1.98, + "grad_norm": 3.259002208709717, + "learning_rate": 9.784321608040202e-06, + "loss": 0.1603, + "step": 2650 + }, + { + "epoch": 1.99, + "grad_norm": 3.4593253135681152, + "learning_rate": 9.781809045226131e-06, + "loss": 0.1591, + "step": 2675 + }, + { + "epoch": 2.01, + "grad_norm": 2.7796614170074463, + "learning_rate": 9.77929648241206e-06, + "loss": 0.1486, + "step": 2700 + }, + { + "epoch": 2.03, + "grad_norm": 2.7607486248016357, + "learning_rate": 9.77678391959799e-06, + "loss": 0.1492, + "step": 2725 + }, + { + "epoch": 2.05, + "grad_norm": 3.0492544174194336, + "learning_rate": 9.774271356783921e-06, + "loss": 0.1454, + "step": 2750 + }, + { + "epoch": 2.07, + "grad_norm": 3.018125534057617, + "learning_rate": 9.77175879396985e-06, + "loss": 0.1468, + "step": 2775 + }, + { + "epoch": 2.09, + "grad_norm": 3.7352709770202637, + "learning_rate": 9.76924623115578e-06, + "loss": 0.1418, + "step": 2800 + }, + { + "epoch": 2.11, + "grad_norm": 2.829341173171997, + "learning_rate": 9.766733668341709e-06, + "loss": 0.1403, + "step": 2825 + }, + { + "epoch": 2.13, + "grad_norm": 3.056603193283081, + "learning_rate": 9.76422110552764e-06, + "loss": 0.1446, + "step": 2850 + }, + { + "epoch": 2.14, + "grad_norm": 3.147531747817993, + "learning_rate": 9.761708542713568e-06, + "loss": 0.1424, + "step": 2875 + }, + { + "epoch": 2.16, + "grad_norm": 2.998420238494873, + "learning_rate": 9.759195979899499e-06, + "loss": 0.144, + "step": 2900 + }, + { + "epoch": 2.18, + "grad_norm": 3.006249189376831, + "learning_rate": 9.756683417085428e-06, + "loss": 0.1433, + "step": 2925 + }, + { + "epoch": 2.2, + "grad_norm": 3.428668975830078, + "learning_rate": 9.754170854271357e-06, + "loss": 0.1402, + "step": 2950 + }, + { + "epoch": 2.22, + "grad_norm": 2.9462413787841797, + "learning_rate": 9.751658291457288e-06, + "loss": 0.1457, + "step": 2975 + }, + { + "epoch": 2.24, + "grad_norm": 3.6020772457122803, + "learning_rate": 9.749145728643216e-06, + "loss": 0.1468, + "step": 3000 + }, + { + "epoch": 2.24, + "eval_loss": 0.1377723067998886, + "eval_runtime": 622.1316, + "eval_samples_per_second": 2.271, + "eval_steps_per_second": 2.271, + "eval_wer": 20.83309360796272, + "step": 3000 + }, + { + "epoch": 2.26, + "grad_norm": 3.0949978828430176, + "learning_rate": 9.746633165829147e-06, + "loss": 0.14, + "step": 3025 + }, + { + "epoch": 2.27, + "grad_norm": 3.136107921600342, + "learning_rate": 9.744120603015076e-06, + "loss": 0.1412, + "step": 3050 + }, + { + "epoch": 2.29, + "grad_norm": 3.032890558242798, + "learning_rate": 9.741608040201006e-06, + "loss": 0.1387, + "step": 3075 + }, + { + "epoch": 2.31, + "grad_norm": 3.0579066276550293, + "learning_rate": 9.739095477386935e-06, + "loss": 0.1401, + "step": 3100 + }, + { + "epoch": 2.33, + "grad_norm": 2.9747979640960693, + "learning_rate": 9.736582914572866e-06, + "loss": 0.1399, + "step": 3125 + }, + { + "epoch": 2.35, + "grad_norm": 2.9104321002960205, + "learning_rate": 9.734070351758794e-06, + "loss": 0.1413, + "step": 3150 + }, + { + "epoch": 2.37, + "grad_norm": 3.0312438011169434, + "learning_rate": 9.731557788944725e-06, + "loss": 0.1412, + "step": 3175 + }, + { + "epoch": 2.39, + "grad_norm": 3.1052236557006836, + "learning_rate": 9.729045226130654e-06, + "loss": 0.1356, + "step": 3200 + }, + { + "epoch": 2.4, + "grad_norm": 2.7737526893615723, + "learning_rate": 9.726532663316583e-06, + "loss": 0.1387, + "step": 3225 + }, + { + "epoch": 2.42, + "grad_norm": 2.7783849239349365, + "learning_rate": 9.724020100502514e-06, + "loss": 0.1384, + "step": 3250 + }, + { + "epoch": 2.44, + "grad_norm": 3.2298660278320312, + "learning_rate": 9.721507537688444e-06, + "loss": 0.1421, + "step": 3275 + }, + { + "epoch": 2.46, + "grad_norm": 2.9137816429138184, + "learning_rate": 9.718994974874373e-06, + "loss": 0.1377, + "step": 3300 + }, + { + "epoch": 2.48, + "grad_norm": 3.574551582336426, + "learning_rate": 9.716482412060302e-06, + "loss": 0.1403, + "step": 3325 + }, + { + "epoch": 2.5, + "grad_norm": 3.0157809257507324, + "learning_rate": 9.713969849246232e-06, + "loss": 0.1344, + "step": 3350 + }, + { + "epoch": 2.52, + "grad_norm": 2.818612813949585, + "learning_rate": 9.711457286432163e-06, + "loss": 0.1314, + "step": 3375 + }, + { + "epoch": 2.54, + "grad_norm": 2.9883813858032227, + "learning_rate": 9.708944723618092e-06, + "loss": 0.1358, + "step": 3400 + }, + { + "epoch": 2.55, + "grad_norm": 2.5743350982666016, + "learning_rate": 9.706432160804021e-06, + "loss": 0.13, + "step": 3425 + }, + { + "epoch": 2.57, + "grad_norm": 2.929107427597046, + "learning_rate": 9.70391959798995e-06, + "loss": 0.1369, + "step": 3450 + }, + { + "epoch": 2.59, + "grad_norm": 2.659060001373291, + "learning_rate": 9.70140703517588e-06, + "loss": 0.1332, + "step": 3475 + }, + { + "epoch": 2.61, + "grad_norm": 2.9099462032318115, + "learning_rate": 9.698894472361809e-06, + "loss": 0.137, + "step": 3500 + }, + { + "epoch": 2.63, + "grad_norm": 3.1518211364746094, + "learning_rate": 9.69638190954774e-06, + "loss": 0.1361, + "step": 3525 + }, + { + "epoch": 2.65, + "grad_norm": 3.211294174194336, + "learning_rate": 9.69386934673367e-06, + "loss": 0.1312, + "step": 3550 + }, + { + "epoch": 2.67, + "grad_norm": 3.115027666091919, + "learning_rate": 9.691356783919599e-06, + "loss": 0.1287, + "step": 3575 + }, + { + "epoch": 2.68, + "grad_norm": 3.5546798706054688, + "learning_rate": 9.688844221105528e-06, + "loss": 0.1325, + "step": 3600 + }, + { + "epoch": 2.7, + "grad_norm": 3.1676909923553467, + "learning_rate": 9.686331658291457e-06, + "loss": 0.1269, + "step": 3625 + }, + { + "epoch": 2.72, + "grad_norm": 3.1102020740509033, + "learning_rate": 9.683819095477388e-06, + "loss": 0.1354, + "step": 3650 + }, + { + "epoch": 2.74, + "grad_norm": 2.616607427597046, + "learning_rate": 9.681306532663318e-06, + "loss": 0.1318, + "step": 3675 + }, + { + "epoch": 2.76, + "grad_norm": 2.877201795578003, + "learning_rate": 9.678793969849247e-06, + "loss": 0.1325, + "step": 3700 + }, + { + "epoch": 2.78, + "grad_norm": 2.7412326335906982, + "learning_rate": 9.676281407035176e-06, + "loss": 0.1349, + "step": 3725 + }, + { + "epoch": 2.8, + "grad_norm": 2.9739856719970703, + "learning_rate": 9.673768844221106e-06, + "loss": 0.1342, + "step": 3750 + }, + { + "epoch": 2.82, + "grad_norm": 2.7162415981292725, + "learning_rate": 9.671256281407035e-06, + "loss": 0.1339, + "step": 3775 + }, + { + "epoch": 2.83, + "grad_norm": 2.8346199989318848, + "learning_rate": 9.668743718592966e-06, + "loss": 0.132, + "step": 3800 + }, + { + "epoch": 2.85, + "grad_norm": 2.7721316814422607, + "learning_rate": 9.666231155778895e-06, + "loss": 0.1369, + "step": 3825 + }, + { + "epoch": 2.87, + "grad_norm": 2.6235415935516357, + "learning_rate": 9.663718592964825e-06, + "loss": 0.1321, + "step": 3850 + }, + { + "epoch": 2.89, + "grad_norm": 3.2611429691314697, + "learning_rate": 9.661206030150754e-06, + "loss": 0.1328, + "step": 3875 + }, + { + "epoch": 2.91, + "grad_norm": 2.7950031757354736, + "learning_rate": 9.658693467336683e-06, + "loss": 0.1281, + "step": 3900 + }, + { + "epoch": 2.93, + "grad_norm": 2.8077211380004883, + "learning_rate": 9.656180904522614e-06, + "loss": 0.133, + "step": 3925 + }, + { + "epoch": 2.95, + "grad_norm": 2.9517579078674316, + "learning_rate": 9.653668341708544e-06, + "loss": 0.1327, + "step": 3950 + }, + { + "epoch": 2.96, + "grad_norm": 3.1304502487182617, + "learning_rate": 9.651155778894473e-06, + "loss": 0.1324, + "step": 3975 + }, + { + "epoch": 2.98, + "grad_norm": 3.1927201747894287, + "learning_rate": 9.648643216080404e-06, + "loss": 0.1311, + "step": 4000 + }, + { + "epoch": 2.98, + "eval_loss": 0.1259957253932953, + "eval_runtime": 625.631, + "eval_samples_per_second": 2.259, + "eval_steps_per_second": 2.259, + "eval_wer": 18.73885277026638, + "step": 4000 + }, + { + "epoch": 3.0, + "grad_norm": 2.493851900100708, + "learning_rate": 9.646130653266332e-06, + "loss": 0.1307, + "step": 4025 + }, + { + "epoch": 3.02, + "grad_norm": 2.628967761993408, + "learning_rate": 9.643618090452263e-06, + "loss": 0.1124, + "step": 4050 + }, + { + "epoch": 3.04, + "grad_norm": 2.8370718955993652, + "learning_rate": 9.641105527638192e-06, + "loss": 0.1174, + "step": 4075 + }, + { + "epoch": 3.06, + "grad_norm": 2.5665454864501953, + "learning_rate": 9.638592964824121e-06, + "loss": 0.1163, + "step": 4100 + }, + { + "epoch": 3.08, + "grad_norm": 3.4480860233306885, + "learning_rate": 9.63608040201005e-06, + "loss": 0.1142, + "step": 4125 + }, + { + "epoch": 3.09, + "grad_norm": 2.786684036254883, + "learning_rate": 9.63356783919598e-06, + "loss": 0.1111, + "step": 4150 + }, + { + "epoch": 3.11, + "grad_norm": 2.5044002532958984, + "learning_rate": 9.63105527638191e-06, + "loss": 0.1148, + "step": 4175 + }, + { + "epoch": 3.13, + "grad_norm": 2.5025031566619873, + "learning_rate": 9.62854271356784e-06, + "loss": 0.1154, + "step": 4200 + }, + { + "epoch": 3.15, + "grad_norm": 3.005053997039795, + "learning_rate": 9.62603015075377e-06, + "loss": 0.1136, + "step": 4225 + }, + { + "epoch": 3.17, + "grad_norm": 2.6677279472351074, + "learning_rate": 9.623517587939699e-06, + "loss": 0.1135, + "step": 4250 + }, + { + "epoch": 3.19, + "grad_norm": 2.920172691345215, + "learning_rate": 9.62100502512563e-06, + "loss": 0.1172, + "step": 4275 + }, + { + "epoch": 3.21, + "grad_norm": 2.61612606048584, + "learning_rate": 9.618492462311558e-06, + "loss": 0.1144, + "step": 4300 + }, + { + "epoch": 3.23, + "grad_norm": 2.645781993865967, + "learning_rate": 9.615979899497489e-06, + "loss": 0.1162, + "step": 4325 + }, + { + "epoch": 3.24, + "grad_norm": 3.1597182750701904, + "learning_rate": 9.613467336683418e-06, + "loss": 0.1148, + "step": 4350 + }, + { + "epoch": 3.26, + "grad_norm": 2.9629948139190674, + "learning_rate": 9.610954773869347e-06, + "loss": 0.1174, + "step": 4375 + }, + { + "epoch": 3.28, + "grad_norm": 2.4299910068511963, + "learning_rate": 9.608442211055277e-06, + "loss": 0.1123, + "step": 4400 + }, + { + "epoch": 3.3, + "grad_norm": 2.5754826068878174, + "learning_rate": 9.605929648241206e-06, + "loss": 0.1203, + "step": 4425 + }, + { + "epoch": 3.32, + "grad_norm": 2.5775868892669678, + "learning_rate": 9.603417085427137e-06, + "loss": 0.1092, + "step": 4450 + }, + { + "epoch": 3.34, + "grad_norm": 2.6025407314300537, + "learning_rate": 9.600904522613066e-06, + "loss": 0.1192, + "step": 4475 + }, + { + "epoch": 3.36, + "grad_norm": 2.9499661922454834, + "learning_rate": 9.598391959798996e-06, + "loss": 0.1103, + "step": 4500 + }, + { + "epoch": 3.37, + "grad_norm": 2.6477084159851074, + "learning_rate": 9.595879396984925e-06, + "loss": 0.1119, + "step": 4525 + }, + { + "epoch": 3.39, + "grad_norm": 2.8561007976531982, + "learning_rate": 9.593366834170856e-06, + "loss": 0.1093, + "step": 4550 + }, + { + "epoch": 3.41, + "grad_norm": 2.9450645446777344, + "learning_rate": 9.590854271356784e-06, + "loss": 0.1088, + "step": 4575 + }, + { + "epoch": 3.43, + "grad_norm": 2.721266031265259, + "learning_rate": 9.588341708542715e-06, + "loss": 0.1159, + "step": 4600 + }, + { + "epoch": 3.45, + "grad_norm": 2.830723762512207, + "learning_rate": 9.585829145728644e-06, + "loss": 0.1166, + "step": 4625 + }, + { + "epoch": 3.47, + "grad_norm": 2.6763341426849365, + "learning_rate": 9.583316582914573e-06, + "loss": 0.1123, + "step": 4650 + }, + { + "epoch": 3.49, + "grad_norm": 2.893439769744873, + "learning_rate": 9.580804020100504e-06, + "loss": 0.1116, + "step": 4675 + }, + { + "epoch": 3.5, + "grad_norm": 3.128810167312622, + "learning_rate": 9.578291457286432e-06, + "loss": 0.112, + "step": 4700 + }, + { + "epoch": 3.52, + "grad_norm": 2.8856942653656006, + "learning_rate": 9.575778894472363e-06, + "loss": 0.1142, + "step": 4725 + }, + { + "epoch": 3.54, + "grad_norm": 2.902357578277588, + "learning_rate": 9.573266331658292e-06, + "loss": 0.1116, + "step": 4750 + }, + { + "epoch": 3.56, + "grad_norm": 2.683309316635132, + "learning_rate": 9.570753768844222e-06, + "loss": 0.1152, + "step": 4775 + }, + { + "epoch": 3.58, + "grad_norm": 2.457899808883667, + "learning_rate": 9.568241206030151e-06, + "loss": 0.1147, + "step": 4800 + }, + { + "epoch": 3.6, + "grad_norm": 3.0307376384735107, + "learning_rate": 9.565728643216082e-06, + "loss": 0.1144, + "step": 4825 + }, + { + "epoch": 3.62, + "grad_norm": 3.5656275749206543, + "learning_rate": 9.563216080402011e-06, + "loss": 0.1142, + "step": 4850 + }, + { + "epoch": 3.64, + "grad_norm": 2.9088006019592285, + "learning_rate": 9.56070351758794e-06, + "loss": 0.1148, + "step": 4875 + }, + { + "epoch": 3.65, + "grad_norm": 2.3279759883880615, + "learning_rate": 9.55819095477387e-06, + "loss": 0.1115, + "step": 4900 + }, + { + "epoch": 3.67, + "grad_norm": 2.830371856689453, + "learning_rate": 9.5556783919598e-06, + "loss": 0.1141, + "step": 4925 + }, + { + "epoch": 3.69, + "grad_norm": 3.1147847175598145, + "learning_rate": 9.55316582914573e-06, + "loss": 0.1131, + "step": 4950 + }, + { + "epoch": 3.71, + "grad_norm": 2.9858827590942383, + "learning_rate": 9.550653266331658e-06, + "loss": 0.1144, + "step": 4975 + }, + { + "epoch": 3.73, + "grad_norm": 3.0632736682891846, + "learning_rate": 9.548140703517589e-06, + "loss": 0.1133, + "step": 5000 + }, + { + "epoch": 3.73, + "eval_loss": 0.12082495540380478, + "eval_runtime": 627.9839, + "eval_samples_per_second": 2.25, + "eval_steps_per_second": 2.25, + "eval_wer": 17.898855071629942, + "step": 5000 + }, + { + "epoch": 3.75, + "grad_norm": 2.6842565536499023, + "learning_rate": 9.545628140703518e-06, + "loss": 0.1135, + "step": 5025 + }, + { + "epoch": 3.77, + "grad_norm": 3.0329549312591553, + "learning_rate": 9.543115577889448e-06, + "loss": 0.1055, + "step": 5050 + }, + { + "epoch": 3.78, + "grad_norm": 2.978516101837158, + "learning_rate": 9.540603015075379e-06, + "loss": 0.1079, + "step": 5075 + }, + { + "epoch": 3.8, + "grad_norm": 2.8893721103668213, + "learning_rate": 9.538090452261308e-06, + "loss": 0.1084, + "step": 5100 + }, + { + "epoch": 3.82, + "grad_norm": 2.9212534427642822, + "learning_rate": 9.535577889447237e-06, + "loss": 0.1099, + "step": 5125 + }, + { + "epoch": 3.84, + "grad_norm": 2.7262823581695557, + "learning_rate": 9.533065326633166e-06, + "loss": 0.1102, + "step": 5150 + }, + { + "epoch": 3.86, + "grad_norm": 2.4592931270599365, + "learning_rate": 9.530552763819096e-06, + "loss": 0.1107, + "step": 5175 + }, + { + "epoch": 3.88, + "grad_norm": 2.513550043106079, + "learning_rate": 9.528040201005025e-06, + "loss": 0.1068, + "step": 5200 + }, + { + "epoch": 3.9, + "grad_norm": 2.8959248065948486, + "learning_rate": 9.525527638190956e-06, + "loss": 0.1116, + "step": 5225 + }, + { + "epoch": 3.91, + "grad_norm": 2.741910934448242, + "learning_rate": 9.523015075376885e-06, + "loss": 0.1072, + "step": 5250 + }, + { + "epoch": 3.93, + "grad_norm": 2.5687618255615234, + "learning_rate": 9.520502512562815e-06, + "loss": 0.106, + "step": 5275 + }, + { + "epoch": 3.95, + "grad_norm": 2.730055809020996, + "learning_rate": 9.517989949748744e-06, + "loss": 0.108, + "step": 5300 + }, + { + "epoch": 3.97, + "grad_norm": 2.6482930183410645, + "learning_rate": 9.515477386934673e-06, + "loss": 0.1076, + "step": 5325 + }, + { + "epoch": 3.99, + "grad_norm": 2.7305054664611816, + "learning_rate": 9.512964824120604e-06, + "loss": 0.1044, + "step": 5350 + }, + { + "epoch": 4.01, + "grad_norm": 2.5868592262268066, + "learning_rate": 9.510452261306534e-06, + "loss": 0.1039, + "step": 5375 + }, + { + "epoch": 4.03, + "grad_norm": 2.9337029457092285, + "learning_rate": 9.507939698492463e-06, + "loss": 0.0981, + "step": 5400 + }, + { + "epoch": 4.05, + "grad_norm": 2.613283395767212, + "learning_rate": 9.505427135678392e-06, + "loss": 0.09, + "step": 5425 + }, + { + "epoch": 4.06, + "grad_norm": 2.5850093364715576, + "learning_rate": 9.502914572864322e-06, + "loss": 0.0954, + "step": 5450 + }, + { + "epoch": 4.08, + "grad_norm": 2.7797646522521973, + "learning_rate": 9.500402010050253e-06, + "loss": 0.0964, + "step": 5475 + }, + { + "epoch": 4.1, + "grad_norm": 2.49691104888916, + "learning_rate": 9.497889447236182e-06, + "loss": 0.0939, + "step": 5500 + }, + { + "epoch": 4.12, + "grad_norm": 2.49479079246521, + "learning_rate": 9.495376884422111e-06, + "loss": 0.0953, + "step": 5525 + }, + { + "epoch": 4.14, + "grad_norm": 3.1818389892578125, + "learning_rate": 9.49286432160804e-06, + "loss": 0.0945, + "step": 5550 + }, + { + "epoch": 4.16, + "grad_norm": 2.3987441062927246, + "learning_rate": 9.49035175879397e-06, + "loss": 0.0951, + "step": 5575 + }, + { + "epoch": 4.18, + "grad_norm": 2.848787307739258, + "learning_rate": 9.4878391959799e-06, + "loss": 0.0916, + "step": 5600 + }, + { + "epoch": 4.19, + "grad_norm": 2.5835928916931152, + "learning_rate": 9.48532663316583e-06, + "loss": 0.0938, + "step": 5625 + }, + { + "epoch": 4.21, + "grad_norm": 2.560399293899536, + "learning_rate": 9.48281407035176e-06, + "loss": 0.0939, + "step": 5650 + }, + { + "epoch": 4.23, + "grad_norm": 2.881833553314209, + "learning_rate": 9.480301507537689e-06, + "loss": 0.0973, + "step": 5675 + }, + { + "epoch": 4.25, + "grad_norm": 2.45007061958313, + "learning_rate": 9.47778894472362e-06, + "loss": 0.0916, + "step": 5700 + }, + { + "epoch": 4.27, + "grad_norm": 3.0262973308563232, + "learning_rate": 9.475276381909548e-06, + "loss": 0.0942, + "step": 5725 + }, + { + "epoch": 4.29, + "grad_norm": 2.3173537254333496, + "learning_rate": 9.472763819095479e-06, + "loss": 0.0941, + "step": 5750 + }, + { + "epoch": 4.31, + "grad_norm": 2.8149092197418213, + "learning_rate": 9.470251256281408e-06, + "loss": 0.0973, + "step": 5775 + }, + { + "epoch": 4.33, + "grad_norm": 2.584592342376709, + "learning_rate": 9.467738693467337e-06, + "loss": 0.0916, + "step": 5800 + }, + { + "epoch": 4.34, + "grad_norm": 2.751286745071411, + "learning_rate": 9.465226130653267e-06, + "loss": 0.096, + "step": 5825 + }, + { + "epoch": 4.36, + "grad_norm": 2.570629358291626, + "learning_rate": 9.462713567839196e-06, + "loss": 0.0951, + "step": 5850 + }, + { + "epoch": 4.38, + "grad_norm": 3.0312952995300293, + "learning_rate": 9.460201005025127e-06, + "loss": 0.0968, + "step": 5875 + }, + { + "epoch": 4.4, + "grad_norm": 2.486086368560791, + "learning_rate": 9.457688442211056e-06, + "loss": 0.0939, + "step": 5900 + }, + { + "epoch": 4.42, + "grad_norm": 2.887410879135132, + "learning_rate": 9.455175879396986e-06, + "loss": 0.0915, + "step": 5925 + }, + { + "epoch": 4.44, + "grad_norm": 2.7203288078308105, + "learning_rate": 9.452663316582915e-06, + "loss": 0.0945, + "step": 5950 + }, + { + "epoch": 4.46, + "grad_norm": 2.7018909454345703, + "learning_rate": 9.450150753768846e-06, + "loss": 0.0911, + "step": 5975 + }, + { + "epoch": 4.47, + "grad_norm": 2.6788134574890137, + "learning_rate": 9.447638190954774e-06, + "loss": 0.0902, + "step": 6000 + }, + { + "epoch": 4.47, + "eval_loss": 0.11888829618692398, + "eval_runtime": 525.7812, + "eval_samples_per_second": 2.687, + "eval_steps_per_second": 2.687, + "eval_wer": 17.277486910994764, + "step": 6000 + }, + { + "epoch": 4.49, + "grad_norm": 2.459397792816162, + "learning_rate": 9.445125628140705e-06, + "loss": 0.0921, + "step": 6025 + }, + { + "epoch": 4.51, + "grad_norm": 2.927412748336792, + "learning_rate": 9.442613065326634e-06, + "loss": 0.0941, + "step": 6050 + }, + { + "epoch": 4.53, + "grad_norm": 2.8186159133911133, + "learning_rate": 9.440100502512563e-06, + "loss": 0.0983, + "step": 6075 + }, + { + "epoch": 4.55, + "grad_norm": 2.5435428619384766, + "learning_rate": 9.437587939698494e-06, + "loss": 0.0952, + "step": 6100 + }, + { + "epoch": 4.57, + "grad_norm": 2.695784568786621, + "learning_rate": 9.435075376884422e-06, + "loss": 0.0964, + "step": 6125 + }, + { + "epoch": 4.59, + "grad_norm": 2.5206716060638428, + "learning_rate": 9.432562814070353e-06, + "loss": 0.0942, + "step": 6150 + }, + { + "epoch": 4.6, + "grad_norm": 2.9596691131591797, + "learning_rate": 9.430050251256282e-06, + "loss": 0.0959, + "step": 6175 + }, + { + "epoch": 4.62, + "grad_norm": 2.278632164001465, + "learning_rate": 9.427537688442212e-06, + "loss": 0.0964, + "step": 6200 + }, + { + "epoch": 4.64, + "grad_norm": 2.5129709243774414, + "learning_rate": 9.425025125628141e-06, + "loss": 0.0935, + "step": 6225 + }, + { + "epoch": 4.66, + "grad_norm": 2.6840085983276367, + "learning_rate": 9.422512562814072e-06, + "loss": 0.093, + "step": 6250 + }, + { + "epoch": 4.68, + "grad_norm": 2.7437448501586914, + "learning_rate": 9.42e-06, + "loss": 0.0949, + "step": 6275 + }, + { + "epoch": 4.7, + "grad_norm": 2.6373398303985596, + "learning_rate": 9.41748743718593e-06, + "loss": 0.0941, + "step": 6300 + }, + { + "epoch": 4.72, + "grad_norm": 2.7973101139068604, + "learning_rate": 9.41497487437186e-06, + "loss": 0.0931, + "step": 6325 + }, + { + "epoch": 4.74, + "grad_norm": 2.9248576164245605, + "learning_rate": 9.41246231155779e-06, + "loss": 0.0984, + "step": 6350 + }, + { + "epoch": 4.75, + "grad_norm": 2.5695719718933105, + "learning_rate": 9.40994974874372e-06, + "loss": 0.0925, + "step": 6375 + }, + { + "epoch": 4.77, + "grad_norm": 2.850865364074707, + "learning_rate": 9.407437185929648e-06, + "loss": 0.0949, + "step": 6400 + }, + { + "epoch": 4.79, + "grad_norm": 2.6445276737213135, + "learning_rate": 9.404924623115579e-06, + "loss": 0.0945, + "step": 6425 + }, + { + "epoch": 4.81, + "grad_norm": 2.5714385509490967, + "learning_rate": 9.402412060301508e-06, + "loss": 0.0938, + "step": 6450 + }, + { + "epoch": 4.83, + "grad_norm": 2.959686279296875, + "learning_rate": 9.399899497487438e-06, + "loss": 0.0917, + "step": 6475 + }, + { + "epoch": 4.85, + "grad_norm": 2.7412126064300537, + "learning_rate": 9.397386934673369e-06, + "loss": 0.0952, + "step": 6500 + }, + { + "epoch": 4.87, + "grad_norm": 2.6376516819000244, + "learning_rate": 9.394874371859298e-06, + "loss": 0.0972, + "step": 6525 + }, + { + "epoch": 4.88, + "grad_norm": 2.879997491836548, + "learning_rate": 9.392361809045227e-06, + "loss": 0.0981, + "step": 6550 + }, + { + "epoch": 4.9, + "grad_norm": 2.678006410598755, + "learning_rate": 9.389849246231157e-06, + "loss": 0.0937, + "step": 6575 + }, + { + "epoch": 4.92, + "grad_norm": 2.8364152908325195, + "learning_rate": 9.387336683417086e-06, + "loss": 0.0903, + "step": 6600 + }, + { + "epoch": 4.94, + "grad_norm": 2.4137039184570312, + "learning_rate": 9.384824120603015e-06, + "loss": 0.0927, + "step": 6625 + }, + { + "epoch": 4.96, + "grad_norm": 2.8660030364990234, + "learning_rate": 9.382311557788946e-06, + "loss": 0.0906, + "step": 6650 + }, + { + "epoch": 4.98, + "grad_norm": 2.9367213249206543, + "learning_rate": 9.379798994974874e-06, + "loss": 0.0942, + "step": 6675 + }, + { + "epoch": 5.0, + "grad_norm": 2.6545767784118652, + "learning_rate": 9.377286432160805e-06, + "loss": 0.0941, + "step": 6700 + }, + { + "epoch": 5.01, + "grad_norm": 2.2008678913116455, + "learning_rate": 9.374773869346734e-06, + "loss": 0.0793, + "step": 6725 + }, + { + "epoch": 5.03, + "grad_norm": 2.626599073410034, + "learning_rate": 9.372261306532664e-06, + "loss": 0.0813, + "step": 6750 + }, + { + "epoch": 5.05, + "grad_norm": 2.47953462600708, + "learning_rate": 9.369748743718595e-06, + "loss": 0.08, + "step": 6775 + }, + { + "epoch": 5.07, + "grad_norm": 2.6219120025634766, + "learning_rate": 9.367236180904524e-06, + "loss": 0.0791, + "step": 6800 + }, + { + "epoch": 5.09, + "grad_norm": 2.6530261039733887, + "learning_rate": 9.364723618090453e-06, + "loss": 0.0779, + "step": 6825 + }, + { + "epoch": 5.11, + "grad_norm": 2.659306287765503, + "learning_rate": 9.362211055276383e-06, + "loss": 0.0797, + "step": 6850 + }, + { + "epoch": 5.13, + "grad_norm": 2.2885117530822754, + "learning_rate": 9.359698492462312e-06, + "loss": 0.0817, + "step": 6875 + }, + { + "epoch": 5.15, + "grad_norm": 2.8688712120056152, + "learning_rate": 9.357185929648241e-06, + "loss": 0.077, + "step": 6900 + }, + { + "epoch": 5.16, + "grad_norm": 2.576960563659668, + "learning_rate": 9.354673366834172e-06, + "loss": 0.0769, + "step": 6925 + }, + { + "epoch": 5.18, + "grad_norm": 2.5532870292663574, + "learning_rate": 9.352160804020101e-06, + "loss": 0.0802, + "step": 6950 + }, + { + "epoch": 5.2, + "grad_norm": 3.135333299636841, + "learning_rate": 9.34964824120603e-06, + "loss": 0.0782, + "step": 6975 + }, + { + "epoch": 5.22, + "grad_norm": 2.7503457069396973, + "learning_rate": 9.34713567839196e-06, + "loss": 0.081, + "step": 7000 + }, + { + "epoch": 5.22, + "eval_loss": 0.11886344105005264, + "eval_runtime": 526.7517, + "eval_samples_per_second": 2.682, + "eval_steps_per_second": 2.682, + "eval_wer": 17.47310281341695, + "step": 7000 + }, + { + "epoch": 5.24, + "grad_norm": 2.478942632675171, + "learning_rate": 9.34462311557789e-06, + "loss": 0.0764, + "step": 7025 + }, + { + "epoch": 5.26, + "grad_norm": 3.202638864517212, + "learning_rate": 9.34211055276382e-06, + "loss": 0.0779, + "step": 7050 + }, + { + "epoch": 5.28, + "grad_norm": 2.856942653656006, + "learning_rate": 9.33959798994975e-06, + "loss": 0.0787, + "step": 7075 + }, + { + "epoch": 5.29, + "grad_norm": 2.406794548034668, + "learning_rate": 9.337085427135679e-06, + "loss": 0.0811, + "step": 7100 + }, + { + "epoch": 5.31, + "grad_norm": 2.759646415710449, + "learning_rate": 9.334572864321608e-06, + "loss": 0.0802, + "step": 7125 + }, + { + "epoch": 5.33, + "grad_norm": 2.362739086151123, + "learning_rate": 9.332060301507538e-06, + "loss": 0.0797, + "step": 7150 + }, + { + "epoch": 5.35, + "grad_norm": 2.588900327682495, + "learning_rate": 9.329547738693469e-06, + "loss": 0.0773, + "step": 7175 + }, + { + "epoch": 5.37, + "grad_norm": 2.5271754264831543, + "learning_rate": 9.327035175879398e-06, + "loss": 0.0791, + "step": 7200 + }, + { + "epoch": 5.39, + "grad_norm": 2.596848726272583, + "learning_rate": 9.324522613065327e-06, + "loss": 0.0814, + "step": 7225 + }, + { + "epoch": 5.41, + "grad_norm": 2.6552720069885254, + "learning_rate": 9.322010050251257e-06, + "loss": 0.0855, + "step": 7250 + }, + { + "epoch": 5.43, + "grad_norm": 2.7991130352020264, + "learning_rate": 9.319497487437186e-06, + "loss": 0.0805, + "step": 7275 + }, + { + "epoch": 5.44, + "grad_norm": 2.766306161880493, + "learning_rate": 9.316984924623115e-06, + "loss": 0.0809, + "step": 7300 + }, + { + "epoch": 5.46, + "grad_norm": 2.3060312271118164, + "learning_rate": 9.314472361809046e-06, + "loss": 0.0785, + "step": 7325 + }, + { + "epoch": 5.48, + "grad_norm": 2.705929756164551, + "learning_rate": 9.311959798994976e-06, + "loss": 0.0808, + "step": 7350 + }, + { + "epoch": 5.5, + "grad_norm": 2.6532864570617676, + "learning_rate": 9.309447236180905e-06, + "loss": 0.0792, + "step": 7375 + }, + { + "epoch": 5.52, + "grad_norm": 2.4048655033111572, + "learning_rate": 9.306934673366836e-06, + "loss": 0.0791, + "step": 7400 + }, + { + "epoch": 5.54, + "grad_norm": 2.630340814590454, + "learning_rate": 9.304422110552764e-06, + "loss": 0.0761, + "step": 7425 + }, + { + "epoch": 5.56, + "grad_norm": 2.262911319732666, + "learning_rate": 9.301909547738695e-06, + "loss": 0.0777, + "step": 7450 + }, + { + "epoch": 5.57, + "grad_norm": 2.7349486351013184, + "learning_rate": 9.299396984924624e-06, + "loss": 0.0811, + "step": 7475 + }, + { + "epoch": 5.59, + "grad_norm": 2.899777889251709, + "learning_rate": 9.296884422110553e-06, + "loss": 0.0838, + "step": 7500 + }, + { + "epoch": 5.61, + "grad_norm": 2.4346723556518555, + "learning_rate": 9.294371859296483e-06, + "loss": 0.0782, + "step": 7525 + }, + { + "epoch": 5.63, + "grad_norm": 2.637939929962158, + "learning_rate": 9.291859296482412e-06, + "loss": 0.0787, + "step": 7550 + }, + { + "epoch": 5.65, + "grad_norm": 3.0418901443481445, + "learning_rate": 9.289346733668343e-06, + "loss": 0.0802, + "step": 7575 + }, + { + "epoch": 5.67, + "grad_norm": 2.628983497619629, + "learning_rate": 9.286834170854272e-06, + "loss": 0.0826, + "step": 7600 + }, + { + "epoch": 5.69, + "grad_norm": 2.5833048820495605, + "learning_rate": 9.284321608040202e-06, + "loss": 0.0779, + "step": 7625 + }, + { + "epoch": 5.7, + "grad_norm": 2.729011297225952, + "learning_rate": 9.281809045226131e-06, + "loss": 0.0768, + "step": 7650 + }, + { + "epoch": 5.72, + "grad_norm": 2.94572377204895, + "learning_rate": 9.279296482412062e-06, + "loss": 0.0822, + "step": 7675 + }, + { + "epoch": 5.74, + "grad_norm": 2.6565351486206055, + "learning_rate": 9.27678391959799e-06, + "loss": 0.08, + "step": 7700 + }, + { + "epoch": 5.76, + "grad_norm": 2.5395452976226807, + "learning_rate": 9.27427135678392e-06, + "loss": 0.0786, + "step": 7725 + }, + { + "epoch": 5.78, + "grad_norm": 2.815863847732544, + "learning_rate": 9.27175879396985e-06, + "loss": 0.0804, + "step": 7750 + }, + { + "epoch": 5.8, + "grad_norm": 2.5001206398010254, + "learning_rate": 9.26924623115578e-06, + "loss": 0.0817, + "step": 7775 + }, + { + "epoch": 5.82, + "grad_norm": 2.8792672157287598, + "learning_rate": 9.26673366834171e-06, + "loss": 0.0781, + "step": 7800 + }, + { + "epoch": 5.84, + "grad_norm": 2.4145798683166504, + "learning_rate": 9.264221105527638e-06, + "loss": 0.0815, + "step": 7825 + }, + { + "epoch": 5.85, + "grad_norm": 3.4708967208862305, + "learning_rate": 9.261708542713569e-06, + "loss": 0.0804, + "step": 7850 + }, + { + "epoch": 5.87, + "grad_norm": 2.7841672897338867, + "learning_rate": 9.259195979899498e-06, + "loss": 0.0793, + "step": 7875 + }, + { + "epoch": 5.89, + "grad_norm": 2.7582249641418457, + "learning_rate": 9.256683417085428e-06, + "loss": 0.0789, + "step": 7900 + }, + { + "epoch": 5.91, + "grad_norm": 2.650385618209839, + "learning_rate": 9.254170854271357e-06, + "loss": 0.0793, + "step": 7925 + }, + { + "epoch": 5.93, + "grad_norm": 2.4578394889831543, + "learning_rate": 9.251658291457288e-06, + "loss": 0.0807, + "step": 7950 + }, + { + "epoch": 5.95, + "grad_norm": 2.9877288341522217, + "learning_rate": 9.249145728643217e-06, + "loss": 0.0797, + "step": 7975 + }, + { + "epoch": 5.97, + "grad_norm": 2.7380425930023193, + "learning_rate": 9.246633165829147e-06, + "loss": 0.0815, + "step": 8000 + }, + { + "epoch": 5.97, + "eval_loss": 0.11607593297958374, + "eval_runtime": 524.3746, + "eval_samples_per_second": 2.695, + "eval_steps_per_second": 2.695, + "eval_wer": 16.70214602151775, + "step": 8000 + }, + { + "epoch": 5.98, + "grad_norm": 2.818375587463379, + "learning_rate": 9.244120603015076e-06, + "loss": 0.0801, + "step": 8025 + }, + { + "epoch": 6.0, + "grad_norm": 2.0879435539245605, + "learning_rate": 9.241608040201005e-06, + "loss": 0.0763, + "step": 8050 + }, + { + "epoch": 6.02, + "grad_norm": 2.658724069595337, + "learning_rate": 9.239095477386936e-06, + "loss": 0.0656, + "step": 8075 + }, + { + "epoch": 6.04, + "grad_norm": 2.1113784313201904, + "learning_rate": 9.236582914572864e-06, + "loss": 0.0634, + "step": 8100 + }, + { + "epoch": 6.06, + "grad_norm": 2.561121940612793, + "learning_rate": 9.234070351758795e-06, + "loss": 0.0656, + "step": 8125 + }, + { + "epoch": 6.08, + "grad_norm": 2.8066837787628174, + "learning_rate": 9.231557788944724e-06, + "loss": 0.0657, + "step": 8150 + }, + { + "epoch": 6.1, + "grad_norm": 2.3623180389404297, + "learning_rate": 9.229045226130654e-06, + "loss": 0.068, + "step": 8175 + }, + { + "epoch": 6.11, + "grad_norm": 2.712350606918335, + "learning_rate": 9.226532663316585e-06, + "loss": 0.0697, + "step": 8200 + }, + { + "epoch": 6.13, + "grad_norm": 2.6369781494140625, + "learning_rate": 9.224020100502514e-06, + "loss": 0.0706, + "step": 8225 + }, + { + "epoch": 6.15, + "grad_norm": 2.520799160003662, + "learning_rate": 9.221507537688443e-06, + "loss": 0.067, + "step": 8250 + }, + { + "epoch": 6.17, + "grad_norm": 2.4132792949676514, + "learning_rate": 9.218994974874373e-06, + "loss": 0.0671, + "step": 8275 + }, + { + "epoch": 6.19, + "grad_norm": 2.770197868347168, + "learning_rate": 9.216482412060302e-06, + "loss": 0.0651, + "step": 8300 + }, + { + "epoch": 6.21, + "grad_norm": 2.388321876525879, + "learning_rate": 9.213969849246231e-06, + "loss": 0.0683, + "step": 8325 + }, + { + "epoch": 6.23, + "grad_norm": 2.646831750869751, + "learning_rate": 9.211457286432162e-06, + "loss": 0.0655, + "step": 8350 + }, + { + "epoch": 6.25, + "grad_norm": 2.5207486152648926, + "learning_rate": 9.20894472361809e-06, + "loss": 0.0651, + "step": 8375 + }, + { + "epoch": 6.26, + "grad_norm": 2.63950252532959, + "learning_rate": 9.206432160804021e-06, + "loss": 0.0689, + "step": 8400 + }, + { + "epoch": 6.28, + "grad_norm": 2.7703464031219482, + "learning_rate": 9.20391959798995e-06, + "loss": 0.0654, + "step": 8425 + }, + { + "epoch": 6.3, + "grad_norm": 2.3458714485168457, + "learning_rate": 9.20140703517588e-06, + "loss": 0.0688, + "step": 8450 + }, + { + "epoch": 6.32, + "grad_norm": 2.308528184890747, + "learning_rate": 9.19889447236181e-06, + "loss": 0.0662, + "step": 8475 + }, + { + "epoch": 6.34, + "grad_norm": 2.815115213394165, + "learning_rate": 9.19638190954774e-06, + "loss": 0.07, + "step": 8500 + }, + { + "epoch": 6.36, + "grad_norm": 2.394446849822998, + "learning_rate": 9.19386934673367e-06, + "loss": 0.0691, + "step": 8525 + }, + { + "epoch": 6.38, + "grad_norm": 2.2454166412353516, + "learning_rate": 9.191356783919599e-06, + "loss": 0.067, + "step": 8550 + }, + { + "epoch": 6.39, + "grad_norm": 2.315883159637451, + "learning_rate": 9.188844221105528e-06, + "loss": 0.0696, + "step": 8575 + }, + { + "epoch": 6.41, + "grad_norm": 2.3557519912719727, + "learning_rate": 9.186331658291459e-06, + "loss": 0.0659, + "step": 8600 + }, + { + "epoch": 6.43, + "grad_norm": 2.296260118484497, + "learning_rate": 9.183819095477388e-06, + "loss": 0.0672, + "step": 8625 + }, + { + "epoch": 6.45, + "grad_norm": 2.4965035915374756, + "learning_rate": 9.181306532663317e-06, + "loss": 0.0651, + "step": 8650 + }, + { + "epoch": 6.47, + "grad_norm": 2.9689860343933105, + "learning_rate": 9.178793969849247e-06, + "loss": 0.07, + "step": 8675 + }, + { + "epoch": 6.49, + "grad_norm": 3.0257556438446045, + "learning_rate": 9.176281407035176e-06, + "loss": 0.0666, + "step": 8700 + }, + { + "epoch": 6.51, + "grad_norm": 2.7041847705841064, + "learning_rate": 9.173768844221105e-06, + "loss": 0.0681, + "step": 8725 + }, + { + "epoch": 6.52, + "grad_norm": 2.5931856632232666, + "learning_rate": 9.171256281407036e-06, + "loss": 0.066, + "step": 8750 + }, + { + "epoch": 6.54, + "grad_norm": 2.4164304733276367, + "learning_rate": 9.168743718592966e-06, + "loss": 0.0665, + "step": 8775 + }, + { + "epoch": 6.56, + "grad_norm": 2.47843599319458, + "learning_rate": 9.166231155778895e-06, + "loss": 0.0684, + "step": 8800 + }, + { + "epoch": 6.58, + "grad_norm": 3.116563081741333, + "learning_rate": 9.163718592964826e-06, + "loss": 0.0649, + "step": 8825 + }, + { + "epoch": 6.6, + "grad_norm": 2.4568521976470947, + "learning_rate": 9.161206030150754e-06, + "loss": 0.0659, + "step": 8850 + }, + { + "epoch": 6.62, + "grad_norm": 2.624093532562256, + "learning_rate": 9.158693467336685e-06, + "loss": 0.066, + "step": 8875 + }, + { + "epoch": 6.64, + "grad_norm": 2.445589542388916, + "learning_rate": 9.156180904522614e-06, + "loss": 0.067, + "step": 8900 + }, + { + "epoch": 6.66, + "grad_norm": 2.6671736240386963, + "learning_rate": 9.153668341708543e-06, + "loss": 0.0668, + "step": 8925 + }, + { + "epoch": 6.67, + "grad_norm": 2.806246042251587, + "learning_rate": 9.151155778894473e-06, + "loss": 0.0658, + "step": 8950 + }, + { + "epoch": 6.69, + "grad_norm": 2.763399839401245, + "learning_rate": 9.148643216080402e-06, + "loss": 0.0643, + "step": 8975 + }, + { + "epoch": 6.71, + "grad_norm": 2.814584255218506, + "learning_rate": 9.146130653266331e-06, + "loss": 0.0686, + "step": 9000 + }, + { + "epoch": 6.71, + "eval_loss": 0.1219184398651123, + "eval_runtime": 528.2857, + "eval_samples_per_second": 2.675, + "eval_steps_per_second": 2.675, + "eval_wer": 17.024336919624876, + "step": 9000 + }, + { + "epoch": 6.73, + "grad_norm": 2.9625027179718018, + "learning_rate": 9.143618090452262e-06, + "loss": 0.065, + "step": 9025 + }, + { + "epoch": 6.75, + "grad_norm": 2.625436544418335, + "learning_rate": 9.141105527638192e-06, + "loss": 0.0699, + "step": 9050 + }, + { + "epoch": 6.77, + "grad_norm": 2.832059860229492, + "learning_rate": 9.138592964824121e-06, + "loss": 0.0683, + "step": 9075 + }, + { + "epoch": 6.79, + "grad_norm": 2.421719551086426, + "learning_rate": 9.136080402010052e-06, + "loss": 0.069, + "step": 9100 + }, + { + "epoch": 6.8, + "grad_norm": 2.4227962493896484, + "learning_rate": 9.13356783919598e-06, + "loss": 0.0687, + "step": 9125 + }, + { + "epoch": 6.82, + "grad_norm": 2.5285165309906006, + "learning_rate": 9.13105527638191e-06, + "loss": 0.0675, + "step": 9150 + }, + { + "epoch": 6.84, + "grad_norm": 2.8832037448883057, + "learning_rate": 9.12854271356784e-06, + "loss": 0.07, + "step": 9175 + }, + { + "epoch": 6.86, + "grad_norm": 2.8587591648101807, + "learning_rate": 9.12603015075377e-06, + "loss": 0.0694, + "step": 9200 + }, + { + "epoch": 6.88, + "grad_norm": 2.7257273197174072, + "learning_rate": 9.1235175879397e-06, + "loss": 0.0681, + "step": 9225 + }, + { + "epoch": 6.9, + "grad_norm": 2.755156993865967, + "learning_rate": 9.121005025125628e-06, + "loss": 0.0697, + "step": 9250 + }, + { + "epoch": 6.92, + "grad_norm": 2.4882071018218994, + "learning_rate": 9.118492462311559e-06, + "loss": 0.0652, + "step": 9275 + }, + { + "epoch": 6.94, + "grad_norm": 2.7772982120513916, + "learning_rate": 9.115979899497488e-06, + "loss": 0.0653, + "step": 9300 + }, + { + "epoch": 6.95, + "grad_norm": 2.7261431217193604, + "learning_rate": 9.113467336683418e-06, + "loss": 0.0698, + "step": 9325 + }, + { + "epoch": 6.97, + "grad_norm": 2.5697736740112305, + "learning_rate": 9.110954773869347e-06, + "loss": 0.0655, + "step": 9350 + }, + { + "epoch": 6.99, + "grad_norm": 2.5291333198547363, + "learning_rate": 9.108442211055278e-06, + "loss": 0.0705, + "step": 9375 + }, + { + "epoch": 7.01, + "grad_norm": 2.427992343902588, + "learning_rate": 9.105929648241206e-06, + "loss": 0.0613, + "step": 9400 + }, + { + "epoch": 7.03, + "grad_norm": 2.3502893447875977, + "learning_rate": 9.103417085427137e-06, + "loss": 0.0538, + "step": 9425 + }, + { + "epoch": 7.05, + "grad_norm": 2.700376272201538, + "learning_rate": 9.100904522613066e-06, + "loss": 0.0521, + "step": 9450 + }, + { + "epoch": 7.07, + "grad_norm": 2.652817487716675, + "learning_rate": 9.098391959798995e-06, + "loss": 0.0572, + "step": 9475 + }, + { + "epoch": 7.08, + "grad_norm": 2.4263458251953125, + "learning_rate": 9.095879396984926e-06, + "loss": 0.0586, + "step": 9500 + }, + { + "epoch": 7.1, + "grad_norm": 2.5875256061553955, + "learning_rate": 9.093366834170854e-06, + "loss": 0.0554, + "step": 9525 + }, + { + "epoch": 7.12, + "grad_norm": 2.599013090133667, + "learning_rate": 9.090854271356785e-06, + "loss": 0.0596, + "step": 9550 + }, + { + "epoch": 7.14, + "grad_norm": 2.264355421066284, + "learning_rate": 9.088341708542714e-06, + "loss": 0.0575, + "step": 9575 + }, + { + "epoch": 7.16, + "grad_norm": 2.4413743019104004, + "learning_rate": 9.085829145728644e-06, + "loss": 0.0541, + "step": 9600 + }, + { + "epoch": 7.18, + "grad_norm": 2.583949089050293, + "learning_rate": 9.083316582914573e-06, + "loss": 0.0551, + "step": 9625 + }, + { + "epoch": 7.2, + "grad_norm": 2.3766191005706787, + "learning_rate": 9.080804020100504e-06, + "loss": 0.0538, + "step": 9650 + }, + { + "epoch": 7.21, + "grad_norm": 2.3384575843811035, + "learning_rate": 9.078291457286433e-06, + "loss": 0.0552, + "step": 9675 + }, + { + "epoch": 7.23, + "grad_norm": 2.4539551734924316, + "learning_rate": 9.075778894472363e-06, + "loss": 0.0564, + "step": 9700 + }, + { + "epoch": 7.25, + "grad_norm": 2.704414129257202, + "learning_rate": 9.073266331658292e-06, + "loss": 0.0572, + "step": 9725 + }, + { + "epoch": 7.27, + "grad_norm": 2.3002564907073975, + "learning_rate": 9.070753768844221e-06, + "loss": 0.0541, + "step": 9750 + }, + { + "epoch": 7.29, + "grad_norm": 2.2708096504211426, + "learning_rate": 9.068241206030152e-06, + "loss": 0.0542, + "step": 9775 + }, + { + "epoch": 7.31, + "grad_norm": 2.4148447513580322, + "learning_rate": 9.06572864321608e-06, + "loss": 0.0563, + "step": 9800 + }, + { + "epoch": 7.33, + "grad_norm": 2.684415102005005, + "learning_rate": 9.063216080402011e-06, + "loss": 0.0567, + "step": 9825 + }, + { + "epoch": 7.35, + "grad_norm": 2.036191463470459, + "learning_rate": 9.06070351758794e-06, + "loss": 0.056, + "step": 9850 + }, + { + "epoch": 7.36, + "grad_norm": 2.8445303440093994, + "learning_rate": 9.05819095477387e-06, + "loss": 0.0566, + "step": 9875 + }, + { + "epoch": 7.38, + "grad_norm": 2.451037883758545, + "learning_rate": 9.0556783919598e-06, + "loss": 0.0566, + "step": 9900 + }, + { + "epoch": 7.4, + "grad_norm": 2.5518717765808105, + "learning_rate": 9.05316582914573e-06, + "loss": 0.0561, + "step": 9925 + }, + { + "epoch": 7.42, + "grad_norm": 2.274202346801758, + "learning_rate": 9.05065326633166e-06, + "loss": 0.0582, + "step": 9950 + }, + { + "epoch": 7.44, + "grad_norm": 2.502936601638794, + "learning_rate": 9.048140703517589e-06, + "loss": 0.0573, + "step": 9975 + }, + { + "epoch": 7.46, + "grad_norm": 2.8603873252868652, + "learning_rate": 9.045628140703518e-06, + "loss": 0.0564, + "step": 10000 + }, + { + "epoch": 7.46, + "eval_loss": 0.1263103485107422, + "eval_runtime": 524.6449, + "eval_samples_per_second": 2.693, + "eval_steps_per_second": 2.693, + "eval_wer": 17.56515735573327, + "step": 10000 + }, + { + "epoch": 7.48, + "grad_norm": 2.1221184730529785, + "learning_rate": 9.043115577889447e-06, + "loss": 0.0547, + "step": 10025 + }, + { + "epoch": 7.49, + "grad_norm": 2.2443692684173584, + "learning_rate": 9.040603015075378e-06, + "loss": 0.0553, + "step": 10050 + }, + { + "epoch": 7.51, + "grad_norm": 3.2224836349487305, + "learning_rate": 9.038090452261308e-06, + "loss": 0.0588, + "step": 10075 + }, + { + "epoch": 7.53, + "grad_norm": 2.7183916568756104, + "learning_rate": 9.035577889447237e-06, + "loss": 0.0563, + "step": 10100 + }, + { + "epoch": 7.55, + "grad_norm": 2.885378122329712, + "learning_rate": 9.033065326633166e-06, + "loss": 0.0575, + "step": 10125 + }, + { + "epoch": 7.57, + "grad_norm": 2.867901563644409, + "learning_rate": 9.030552763819096e-06, + "loss": 0.0536, + "step": 10150 + }, + { + "epoch": 7.59, + "grad_norm": 2.6064682006835938, + "learning_rate": 9.028040201005027e-06, + "loss": 0.0569, + "step": 10175 + }, + { + "epoch": 7.61, + "grad_norm": 2.735574722290039, + "learning_rate": 9.025527638190956e-06, + "loss": 0.0594, + "step": 10200 + }, + { + "epoch": 7.62, + "grad_norm": 2.527926206588745, + "learning_rate": 9.023015075376885e-06, + "loss": 0.0552, + "step": 10225 + }, + { + "epoch": 7.64, + "grad_norm": 2.404273509979248, + "learning_rate": 9.020502512562815e-06, + "loss": 0.0568, + "step": 10250 + }, + { + "epoch": 7.66, + "grad_norm": 2.460718870162964, + "learning_rate": 9.017989949748744e-06, + "loss": 0.0578, + "step": 10275 + }, + { + "epoch": 7.68, + "grad_norm": 2.2130017280578613, + "learning_rate": 9.015477386934675e-06, + "loss": 0.0561, + "step": 10300 + }, + { + "epoch": 7.7, + "grad_norm": 2.4703452587127686, + "learning_rate": 9.012964824120604e-06, + "loss": 0.0568, + "step": 10325 + }, + { + "epoch": 7.72, + "grad_norm": 2.347930908203125, + "learning_rate": 9.010452261306533e-06, + "loss": 0.0563, + "step": 10350 + }, + { + "epoch": 7.74, + "grad_norm": 2.73305344581604, + "learning_rate": 9.007939698492463e-06, + "loss": 0.0581, + "step": 10375 + }, + { + "epoch": 7.76, + "grad_norm": 2.654165506362915, + "learning_rate": 9.005427135678392e-06, + "loss": 0.0565, + "step": 10400 + }, + { + "epoch": 7.77, + "grad_norm": 2.240495443344116, + "learning_rate": 9.002914572864321e-06, + "loss": 0.0558, + "step": 10425 + }, + { + "epoch": 7.79, + "grad_norm": 2.3524112701416016, + "learning_rate": 9.000502512562815e-06, + "loss": 0.058, + "step": 10450 + }, + { + "epoch": 7.81, + "grad_norm": 2.866004467010498, + "learning_rate": 8.997989949748744e-06, + "loss": 0.057, + "step": 10475 + }, + { + "epoch": 7.83, + "grad_norm": 2.3817074298858643, + "learning_rate": 8.995477386934675e-06, + "loss": 0.0552, + "step": 10500 + }, + { + "epoch": 7.85, + "grad_norm": 2.675328493118286, + "learning_rate": 8.992964824120604e-06, + "loss": 0.057, + "step": 10525 + }, + { + "epoch": 7.87, + "grad_norm": 2.7061307430267334, + "learning_rate": 8.990452261306534e-06, + "loss": 0.0565, + "step": 10550 + }, + { + "epoch": 7.89, + "grad_norm": 2.3317878246307373, + "learning_rate": 8.987939698492463e-06, + "loss": 0.0566, + "step": 10575 + }, + { + "epoch": 7.9, + "grad_norm": 2.4283952713012695, + "learning_rate": 8.985427135678392e-06, + "loss": 0.0566, + "step": 10600 + }, + { + "epoch": 7.92, + "grad_norm": 2.5792980194091797, + "learning_rate": 8.982914572864322e-06, + "loss": 0.0567, + "step": 10625 + }, + { + "epoch": 7.94, + "grad_norm": 2.5003788471221924, + "learning_rate": 8.980402010050253e-06, + "loss": 0.0572, + "step": 10650 + }, + { + "epoch": 7.96, + "grad_norm": 2.2855348587036133, + "learning_rate": 8.977889447236182e-06, + "loss": 0.0574, + "step": 10675 + }, + { + "epoch": 7.98, + "grad_norm": 2.79231333732605, + "learning_rate": 8.975376884422111e-06, + "loss": 0.056, + "step": 10700 + }, + { + "epoch": 8.0, + "grad_norm": 2.5847811698913574, + "learning_rate": 8.97286432160804e-06, + "loss": 0.0587, + "step": 10725 + }, + { + "epoch": 8.02, + "grad_norm": 2.39858341217041, + "learning_rate": 8.97035175879397e-06, + "loss": 0.0449, + "step": 10750 + }, + { + "epoch": 8.04, + "grad_norm": 2.9097330570220947, + "learning_rate": 8.967839195979901e-06, + "loss": 0.0445, + "step": 10775 + }, + { + "epoch": 8.05, + "grad_norm": 2.0941648483276367, + "learning_rate": 8.96532663316583e-06, + "loss": 0.0458, + "step": 10800 + }, + { + "epoch": 8.07, + "grad_norm": 2.30513072013855, + "learning_rate": 8.96281407035176e-06, + "loss": 0.0458, + "step": 10825 + }, + { + "epoch": 8.09, + "grad_norm": 2.406348466873169, + "learning_rate": 8.960301507537689e-06, + "loss": 0.0443, + "step": 10850 + }, + { + "epoch": 8.11, + "grad_norm": 2.2640342712402344, + "learning_rate": 8.957788944723618e-06, + "loss": 0.0473, + "step": 10875 + }, + { + "epoch": 8.13, + "grad_norm": 2.3050174713134766, + "learning_rate": 8.95527638190955e-06, + "loss": 0.0484, + "step": 10900 + }, + { + "epoch": 8.15, + "grad_norm": 2.411106824874878, + "learning_rate": 8.952763819095479e-06, + "loss": 0.0473, + "step": 10925 + }, + { + "epoch": 8.17, + "grad_norm": 2.473135471343994, + "learning_rate": 8.950251256281408e-06, + "loss": 0.0445, + "step": 10950 + }, + { + "epoch": 8.18, + "grad_norm": 2.092379093170166, + "learning_rate": 8.947738693467337e-06, + "loss": 0.0467, + "step": 10975 + }, + { + "epoch": 8.2, + "grad_norm": 2.36466121673584, + "learning_rate": 8.945226130653267e-06, + "loss": 0.0479, + "step": 11000 + }, + { + "epoch": 8.2, + "eval_loss": 0.12981772422790527, + "eval_runtime": 527.5201, + "eval_samples_per_second": 2.679, + "eval_steps_per_second": 2.679, + "eval_wer": 17.41556872446925, + "step": 11000 + }, + { + "epoch": 8.22, + "grad_norm": 2.4271321296691895, + "learning_rate": 8.942713567839196e-06, + "loss": 0.0471, + "step": 11025 + }, + { + "epoch": 8.24, + "grad_norm": 2.6245696544647217, + "learning_rate": 8.940201005025127e-06, + "loss": 0.0484, + "step": 11050 + }, + { + "epoch": 8.26, + "grad_norm": 2.1920242309570312, + "learning_rate": 8.937688442211056e-06, + "loss": 0.0452, + "step": 11075 + }, + { + "epoch": 8.28, + "grad_norm": 2.5347073078155518, + "learning_rate": 8.935175879396986e-06, + "loss": 0.0478, + "step": 11100 + }, + { + "epoch": 8.3, + "grad_norm": 2.4235422611236572, + "learning_rate": 8.932663316582915e-06, + "loss": 0.0438, + "step": 11125 + }, + { + "epoch": 8.31, + "grad_norm": 2.3191330432891846, + "learning_rate": 8.930150753768844e-06, + "loss": 0.0449, + "step": 11150 + }, + { + "epoch": 8.33, + "grad_norm": 2.2708163261413574, + "learning_rate": 8.927638190954775e-06, + "loss": 0.0459, + "step": 11175 + }, + { + "epoch": 8.35, + "grad_norm": 2.291452169418335, + "learning_rate": 8.925125628140705e-06, + "loss": 0.0446, + "step": 11200 + }, + { + "epoch": 8.37, + "grad_norm": 2.3159213066101074, + "learning_rate": 8.922613065326634e-06, + "loss": 0.0483, + "step": 11225 + }, + { + "epoch": 8.39, + "grad_norm": 2.0743844509124756, + "learning_rate": 8.920100502512563e-06, + "loss": 0.0463, + "step": 11250 + }, + { + "epoch": 8.41, + "grad_norm": 2.056180715560913, + "learning_rate": 8.917587939698493e-06, + "loss": 0.0472, + "step": 11275 + }, + { + "epoch": 8.43, + "grad_norm": 2.3234338760375977, + "learning_rate": 8.915075376884424e-06, + "loss": 0.0466, + "step": 11300 + }, + { + "epoch": 8.45, + "grad_norm": 2.4676475524902344, + "learning_rate": 8.912562814070353e-06, + "loss": 0.0464, + "step": 11325 + }, + { + "epoch": 8.46, + "grad_norm": 2.3016417026519775, + "learning_rate": 8.910050251256282e-06, + "loss": 0.046, + "step": 11350 + }, + { + "epoch": 8.48, + "grad_norm": 2.453453540802002, + "learning_rate": 8.907537688442212e-06, + "loss": 0.0438, + "step": 11375 + }, + { + "epoch": 8.5, + "grad_norm": 2.567861318588257, + "learning_rate": 8.905025125628143e-06, + "loss": 0.0492, + "step": 11400 + }, + { + "epoch": 8.52, + "grad_norm": 2.790985584259033, + "learning_rate": 8.90251256281407e-06, + "loss": 0.0496, + "step": 11425 + }, + { + "epoch": 8.54, + "grad_norm": 2.6874001026153564, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0479, + "step": 11450 + }, + { + "epoch": 8.56, + "grad_norm": 2.3589653968811035, + "learning_rate": 8.89748743718593e-06, + "loss": 0.0471, + "step": 11475 + }, + { + "epoch": 8.58, + "grad_norm": 2.821506977081299, + "learning_rate": 8.89497487437186e-06, + "loss": 0.049, + "step": 11500 + }, + { + "epoch": 8.59, + "grad_norm": 2.40663743019104, + "learning_rate": 8.892462311557791e-06, + "loss": 0.0487, + "step": 11525 + }, + { + "epoch": 8.61, + "grad_norm": 2.2932064533233643, + "learning_rate": 8.889949748743718e-06, + "loss": 0.0474, + "step": 11550 + }, + { + "epoch": 8.63, + "grad_norm": 2.6077702045440674, + "learning_rate": 8.88743718592965e-06, + "loss": 0.0476, + "step": 11575 + }, + { + "epoch": 8.65, + "grad_norm": 2.4617981910705566, + "learning_rate": 8.884924623115579e-06, + "loss": 0.0482, + "step": 11600 + }, + { + "epoch": 8.67, + "grad_norm": 2.3797831535339355, + "learning_rate": 8.882412060301508e-06, + "loss": 0.0459, + "step": 11625 + }, + { + "epoch": 8.69, + "grad_norm": 3.409358501434326, + "learning_rate": 8.879899497487437e-06, + "loss": 0.0475, + "step": 11650 + }, + { + "epoch": 8.71, + "grad_norm": 2.383876323699951, + "learning_rate": 8.877386934673368e-06, + "loss": 0.0455, + "step": 11675 + }, + { + "epoch": 8.72, + "grad_norm": 2.247157573699951, + "learning_rate": 8.874874371859296e-06, + "loss": 0.0478, + "step": 11700 + }, + { + "epoch": 8.74, + "grad_norm": 2.533935070037842, + "learning_rate": 8.872361809045227e-06, + "loss": 0.0475, + "step": 11725 + }, + { + "epoch": 8.76, + "grad_norm": 2.3319623470306396, + "learning_rate": 8.869849246231156e-06, + "loss": 0.0487, + "step": 11750 + }, + { + "epoch": 8.78, + "grad_norm": 2.692502975463867, + "learning_rate": 8.867336683417086e-06, + "loss": 0.0481, + "step": 11775 + }, + { + "epoch": 8.8, + "grad_norm": 2.428013801574707, + "learning_rate": 8.864824120603017e-06, + "loss": 0.0484, + "step": 11800 + }, + { + "epoch": 8.82, + "grad_norm": 2.6830077171325684, + "learning_rate": 8.862311557788944e-06, + "loss": 0.0503, + "step": 11825 + }, + { + "epoch": 8.84, + "grad_norm": 2.7636404037475586, + "learning_rate": 8.859798994974875e-06, + "loss": 0.0493, + "step": 11850 + }, + { + "epoch": 8.86, + "grad_norm": 2.7341721057891846, + "learning_rate": 8.857286432160805e-06, + "loss": 0.0455, + "step": 11875 + }, + { + "epoch": 8.87, + "grad_norm": 2.3989248275756836, + "learning_rate": 8.854773869346734e-06, + "loss": 0.0468, + "step": 11900 + }, + { + "epoch": 8.89, + "grad_norm": 2.5256223678588867, + "learning_rate": 8.852261306532665e-06, + "loss": 0.0477, + "step": 11925 + }, + { + "epoch": 8.91, + "grad_norm": 2.298966646194458, + "learning_rate": 8.849748743718594e-06, + "loss": 0.0444, + "step": 11950 + }, + { + "epoch": 8.93, + "grad_norm": 2.243450164794922, + "learning_rate": 8.847236180904524e-06, + "loss": 0.0475, + "step": 11975 + }, + { + "epoch": 8.95, + "grad_norm": 2.559311628341675, + "learning_rate": 8.844723618090453e-06, + "loss": 0.0481, + "step": 12000 + }, + { + "epoch": 8.95, + "eval_loss": 0.13365834951400757, + "eval_runtime": 523.18, + "eval_samples_per_second": 2.701, + "eval_steps_per_second": 2.701, + "eval_wer": 17.398308497784935, + "step": 12000 + }, + { + "epoch": 8.97, + "grad_norm": 2.4802896976470947, + "learning_rate": 8.842211055276382e-06, + "loss": 0.0485, + "step": 12025 + }, + { + "epoch": 8.99, + "grad_norm": 2.2994909286499023, + "learning_rate": 8.839698492462312e-06, + "loss": 0.0458, + "step": 12050 + }, + { + "epoch": 9.0, + "grad_norm": 1.8295291662216187, + "learning_rate": 8.837185929648243e-06, + "loss": 0.0445, + "step": 12075 + }, + { + "epoch": 9.02, + "grad_norm": 2.4171090126037598, + "learning_rate": 8.83467336683417e-06, + "loss": 0.0351, + "step": 12100 + }, + { + "epoch": 9.04, + "grad_norm": 2.173022985458374, + "learning_rate": 8.832160804020101e-06, + "loss": 0.0372, + "step": 12125 + }, + { + "epoch": 9.06, + "grad_norm": 2.1474905014038086, + "learning_rate": 8.82964824120603e-06, + "loss": 0.0392, + "step": 12150 + }, + { + "epoch": 9.08, + "grad_norm": 2.0238566398620605, + "learning_rate": 8.82713567839196e-06, + "loss": 0.0359, + "step": 12175 + }, + { + "epoch": 9.1, + "grad_norm": 2.009171485900879, + "learning_rate": 8.824623115577891e-06, + "loss": 0.0365, + "step": 12200 + }, + { + "epoch": 9.12, + "grad_norm": 2.1323530673980713, + "learning_rate": 8.82211055276382e-06, + "loss": 0.0363, + "step": 12225 + }, + { + "epoch": 9.13, + "grad_norm": 1.9491668939590454, + "learning_rate": 8.81959798994975e-06, + "loss": 0.0376, + "step": 12250 + }, + { + "epoch": 9.15, + "grad_norm": 2.129634380340576, + "learning_rate": 8.817085427135679e-06, + "loss": 0.0372, + "step": 12275 + }, + { + "epoch": 9.17, + "grad_norm": 2.457087993621826, + "learning_rate": 8.814572864321608e-06, + "loss": 0.0374, + "step": 12300 + }, + { + "epoch": 9.19, + "grad_norm": 2.1215717792510986, + "learning_rate": 8.812060301507538e-06, + "loss": 0.0379, + "step": 12325 + }, + { + "epoch": 9.21, + "grad_norm": 2.1492676734924316, + "learning_rate": 8.809547738693469e-06, + "loss": 0.0383, + "step": 12350 + }, + { + "epoch": 9.23, + "grad_norm": 2.5527706146240234, + "learning_rate": 8.807035175879398e-06, + "loss": 0.0381, + "step": 12375 + }, + { + "epoch": 9.25, + "grad_norm": 2.6321067810058594, + "learning_rate": 8.804522613065327e-06, + "loss": 0.0394, + "step": 12400 + }, + { + "epoch": 9.27, + "grad_norm": 2.058642625808716, + "learning_rate": 8.802010050251257e-06, + "loss": 0.0384, + "step": 12425 + }, + { + "epoch": 9.28, + "grad_norm": 2.2854552268981934, + "learning_rate": 8.799497487437186e-06, + "loss": 0.0402, + "step": 12450 + }, + { + "epoch": 9.3, + "grad_norm": 2.161755084991455, + "learning_rate": 8.796984924623117e-06, + "loss": 0.0371, + "step": 12475 + }, + { + "epoch": 9.32, + "grad_norm": 2.008902072906494, + "learning_rate": 8.794472361809046e-06, + "loss": 0.0388, + "step": 12500 + }, + { + "epoch": 9.34, + "grad_norm": 2.165576934814453, + "learning_rate": 8.791959798994976e-06, + "loss": 0.0386, + "step": 12525 + }, + { + "epoch": 9.36, + "grad_norm": 2.42983341217041, + "learning_rate": 8.789447236180905e-06, + "loss": 0.0375, + "step": 12550 + }, + { + "epoch": 9.38, + "grad_norm": 2.611006259918213, + "learning_rate": 8.786934673366834e-06, + "loss": 0.0385, + "step": 12575 + }, + { + "epoch": 9.4, + "grad_norm": 2.73616623878479, + "learning_rate": 8.784422110552765e-06, + "loss": 0.038, + "step": 12600 + }, + { + "epoch": 9.41, + "grad_norm": 2.2862296104431152, + "learning_rate": 8.781909547738695e-06, + "loss": 0.0372, + "step": 12625 + }, + { + "epoch": 9.43, + "grad_norm": 3.200859546661377, + "learning_rate": 8.779396984924624e-06, + "loss": 0.0386, + "step": 12650 + }, + { + "epoch": 9.45, + "grad_norm": 2.338107109069824, + "learning_rate": 8.776884422110553e-06, + "loss": 0.038, + "step": 12675 + }, + { + "epoch": 9.47, + "grad_norm": 2.3664588928222656, + "learning_rate": 8.774371859296483e-06, + "loss": 0.0396, + "step": 12700 + }, + { + "epoch": 9.49, + "grad_norm": 2.532569169998169, + "learning_rate": 8.771859296482412e-06, + "loss": 0.0398, + "step": 12725 + }, + { + "epoch": 9.51, + "grad_norm": 2.475496292114258, + "learning_rate": 8.769346733668343e-06, + "loss": 0.0388, + "step": 12750 + }, + { + "epoch": 9.53, + "grad_norm": 2.295660972595215, + "learning_rate": 8.766834170854272e-06, + "loss": 0.0402, + "step": 12775 + }, + { + "epoch": 9.55, + "grad_norm": 2.3478331565856934, + "learning_rate": 8.764321608040202e-06, + "loss": 0.0396, + "step": 12800 + }, + { + "epoch": 9.56, + "grad_norm": 2.2942416667938232, + "learning_rate": 8.761809045226131e-06, + "loss": 0.0392, + "step": 12825 + }, + { + "epoch": 9.58, + "grad_norm": 2.4933559894561768, + "learning_rate": 8.75929648241206e-06, + "loss": 0.0403, + "step": 12850 + }, + { + "epoch": 9.6, + "grad_norm": 2.589550495147705, + "learning_rate": 8.756783919597991e-06, + "loss": 0.0378, + "step": 12875 + }, + { + "epoch": 9.62, + "grad_norm": 1.9608898162841797, + "learning_rate": 8.75427135678392e-06, + "loss": 0.0392, + "step": 12900 + }, + { + "epoch": 9.64, + "grad_norm": 2.488668203353882, + "learning_rate": 8.75175879396985e-06, + "loss": 0.0402, + "step": 12925 + }, + { + "epoch": 9.66, + "grad_norm": 2.8168153762817383, + "learning_rate": 8.74924623115578e-06, + "loss": 0.0388, + "step": 12950 + }, + { + "epoch": 9.68, + "grad_norm": 2.5907788276672363, + "learning_rate": 8.746733668341709e-06, + "loss": 0.0384, + "step": 12975 + }, + { + "epoch": 9.69, + "grad_norm": 2.6195602416992188, + "learning_rate": 8.74422110552764e-06, + "loss": 0.0408, + "step": 13000 + }, + { + "epoch": 9.69, + "eval_loss": 0.13921278715133667, + "eval_runtime": 523.1837, + "eval_samples_per_second": 2.701, + "eval_steps_per_second": 2.701, + "eval_wer": 17.53063690236465, + "step": 13000 + }, + { + "epoch": 9.71, + "grad_norm": 2.4477555751800537, + "learning_rate": 8.741708542713569e-06, + "loss": 0.0402, + "step": 13025 + }, + { + "epoch": 9.73, + "grad_norm": 2.435985565185547, + "learning_rate": 8.739195979899498e-06, + "loss": 0.0387, + "step": 13050 + }, + { + "epoch": 9.75, + "grad_norm": 2.4475860595703125, + "learning_rate": 8.736683417085428e-06, + "loss": 0.0375, + "step": 13075 + }, + { + "epoch": 9.77, + "grad_norm": 2.8508286476135254, + "learning_rate": 8.734170854271357e-06, + "loss": 0.0404, + "step": 13100 + }, + { + "epoch": 9.79, + "grad_norm": 2.639309883117676, + "learning_rate": 8.731658291457286e-06, + "loss": 0.039, + "step": 13125 + }, + { + "epoch": 9.81, + "grad_norm": 2.2203943729400635, + "learning_rate": 8.729145728643217e-06, + "loss": 0.0397, + "step": 13150 + }, + { + "epoch": 9.82, + "grad_norm": 2.1939353942871094, + "learning_rate": 8.726633165829147e-06, + "loss": 0.0397, + "step": 13175 + }, + { + "epoch": 9.84, + "grad_norm": 2.546492099761963, + "learning_rate": 8.724120603015076e-06, + "loss": 0.0384, + "step": 13200 + }, + { + "epoch": 9.86, + "grad_norm": 2.625026226043701, + "learning_rate": 8.721608040201007e-06, + "loss": 0.0387, + "step": 13225 + }, + { + "epoch": 9.88, + "grad_norm": 2.573110818862915, + "learning_rate": 8.719095477386934e-06, + "loss": 0.039, + "step": 13250 + }, + { + "epoch": 9.9, + "grad_norm": 2.784832239151001, + "learning_rate": 8.716582914572866e-06, + "loss": 0.0394, + "step": 13275 + }, + { + "epoch": 9.92, + "grad_norm": 2.9438185691833496, + "learning_rate": 8.714070351758795e-06, + "loss": 0.0387, + "step": 13300 + }, + { + "epoch": 9.94, + "grad_norm": 2.6287431716918945, + "learning_rate": 8.711557788944724e-06, + "loss": 0.0417, + "step": 13325 + }, + { + "epoch": 9.96, + "grad_norm": 2.6113340854644775, + "learning_rate": 8.709045226130653e-06, + "loss": 0.039, + "step": 13350 + }, + { + "epoch": 9.97, + "grad_norm": 1.9732367992401123, + "learning_rate": 8.706532663316584e-06, + "loss": 0.0389, + "step": 13375 + }, + { + "epoch": 9.99, + "grad_norm": 3.0322277545928955, + "learning_rate": 8.704020100502514e-06, + "loss": 0.0407, + "step": 13400 + }, + { + "epoch": 10.01, + "grad_norm": 2.040106773376465, + "learning_rate": 8.701507537688443e-06, + "loss": 0.0341, + "step": 13425 + }, + { + "epoch": 10.03, + "grad_norm": 2.1850950717926025, + "learning_rate": 8.698994974874372e-06, + "loss": 0.0301, + "step": 13450 + }, + { + "epoch": 10.05, + "grad_norm": 2.085787296295166, + "learning_rate": 8.696482412060302e-06, + "loss": 0.0295, + "step": 13475 + }, + { + "epoch": 10.07, + "grad_norm": 2.014439582824707, + "learning_rate": 8.693969849246233e-06, + "loss": 0.0307, + "step": 13500 + }, + { + "epoch": 10.09, + "grad_norm": 2.04779052734375, + "learning_rate": 8.69145728643216e-06, + "loss": 0.03, + "step": 13525 + }, + { + "epoch": 10.1, + "grad_norm": 2.7308907508850098, + "learning_rate": 8.688944723618091e-06, + "loss": 0.0283, + "step": 13550 + }, + { + "epoch": 10.12, + "grad_norm": 2.168508529663086, + "learning_rate": 8.68643216080402e-06, + "loss": 0.0321, + "step": 13575 + }, + { + "epoch": 10.14, + "grad_norm": 2.4602952003479004, + "learning_rate": 8.68391959798995e-06, + "loss": 0.0298, + "step": 13600 + }, + { + "epoch": 10.16, + "grad_norm": 2.121040105819702, + "learning_rate": 8.681407035175881e-06, + "loss": 0.0305, + "step": 13625 + }, + { + "epoch": 10.18, + "grad_norm": 2.3157694339752197, + "learning_rate": 8.67889447236181e-06, + "loss": 0.0311, + "step": 13650 + }, + { + "epoch": 10.2, + "grad_norm": 2.092249870300293, + "learning_rate": 8.67638190954774e-06, + "loss": 0.0314, + "step": 13675 + }, + { + "epoch": 10.22, + "grad_norm": 2.029391050338745, + "learning_rate": 8.673869346733669e-06, + "loss": 0.0302, + "step": 13700 + }, + { + "epoch": 10.23, + "grad_norm": 2.2448408603668213, + "learning_rate": 8.671356783919598e-06, + "loss": 0.0309, + "step": 13725 + }, + { + "epoch": 10.25, + "grad_norm": 2.4657583236694336, + "learning_rate": 8.668844221105528e-06, + "loss": 0.0319, + "step": 13750 + }, + { + "epoch": 10.27, + "grad_norm": 2.855191707611084, + "learning_rate": 8.666331658291459e-06, + "loss": 0.0305, + "step": 13775 + }, + { + "epoch": 10.29, + "grad_norm": 2.2069427967071533, + "learning_rate": 8.663819095477388e-06, + "loss": 0.0291, + "step": 13800 + }, + { + "epoch": 10.31, + "grad_norm": 2.453369379043579, + "learning_rate": 8.661306532663317e-06, + "loss": 0.0301, + "step": 13825 + }, + { + "epoch": 10.33, + "grad_norm": 2.1747844219207764, + "learning_rate": 8.658793969849247e-06, + "loss": 0.0325, + "step": 13850 + }, + { + "epoch": 10.35, + "grad_norm": 2.405985116958618, + "learning_rate": 8.656281407035176e-06, + "loss": 0.0316, + "step": 13875 + }, + { + "epoch": 10.37, + "grad_norm": 2.3156964778900146, + "learning_rate": 8.653768844221107e-06, + "loss": 0.0315, + "step": 13900 + }, + { + "epoch": 10.38, + "grad_norm": 2.574984073638916, + "learning_rate": 8.651256281407036e-06, + "loss": 0.0323, + "step": 13925 + }, + { + "epoch": 10.4, + "grad_norm": 2.072178602218628, + "learning_rate": 8.648743718592966e-06, + "loss": 0.0315, + "step": 13950 + }, + { + "epoch": 10.42, + "grad_norm": 1.8985644578933716, + "learning_rate": 8.646231155778895e-06, + "loss": 0.0298, + "step": 13975 + }, + { + "epoch": 10.44, + "grad_norm": 3.530825138092041, + "learning_rate": 8.643718592964824e-06, + "loss": 0.0339, + "step": 14000 + }, + { + "epoch": 10.44, + "eval_loss": 0.1479218751192093, + "eval_runtime": 530.6897, + "eval_samples_per_second": 2.663, + "eval_steps_per_second": 2.663, + "eval_wer": 17.76652666705023, + "step": 14000 + }, + { + "epoch": 10.46, + "grad_norm": 2.206495761871338, + "learning_rate": 8.641206030150755e-06, + "loss": 0.0329, + "step": 14025 + }, + { + "epoch": 10.48, + "grad_norm": 1.746001124382019, + "learning_rate": 8.638693467336685e-06, + "loss": 0.0317, + "step": 14050 + }, + { + "epoch": 10.5, + "grad_norm": 2.5039286613464355, + "learning_rate": 8.636180904522614e-06, + "loss": 0.0316, + "step": 14075 + }, + { + "epoch": 10.51, + "grad_norm": 1.9761366844177246, + "learning_rate": 8.633668341708543e-06, + "loss": 0.0311, + "step": 14100 + }, + { + "epoch": 10.53, + "grad_norm": 2.4016542434692383, + "learning_rate": 8.631155778894473e-06, + "loss": 0.0316, + "step": 14125 + }, + { + "epoch": 10.55, + "grad_norm": 2.6310629844665527, + "learning_rate": 8.628643216080402e-06, + "loss": 0.0298, + "step": 14150 + }, + { + "epoch": 10.57, + "grad_norm": 2.5426523685455322, + "learning_rate": 8.626130653266333e-06, + "loss": 0.0309, + "step": 14175 + }, + { + "epoch": 10.59, + "grad_norm": 2.386469602584839, + "learning_rate": 8.623618090452262e-06, + "loss": 0.0323, + "step": 14200 + }, + { + "epoch": 10.61, + "grad_norm": 2.2023651599884033, + "learning_rate": 8.621105527638192e-06, + "loss": 0.0305, + "step": 14225 + }, + { + "epoch": 10.63, + "grad_norm": 2.4758481979370117, + "learning_rate": 8.618592964824121e-06, + "loss": 0.0312, + "step": 14250 + }, + { + "epoch": 10.65, + "grad_norm": 2.0338943004608154, + "learning_rate": 8.61608040201005e-06, + "loss": 0.0315, + "step": 14275 + }, + { + "epoch": 10.66, + "grad_norm": 2.190237045288086, + "learning_rate": 8.613567839195981e-06, + "loss": 0.0316, + "step": 14300 + }, + { + "epoch": 10.68, + "grad_norm": 2.8525807857513428, + "learning_rate": 8.61105527638191e-06, + "loss": 0.0312, + "step": 14325 + }, + { + "epoch": 10.7, + "grad_norm": 2.3655707836151123, + "learning_rate": 8.60854271356784e-06, + "loss": 0.0334, + "step": 14350 + }, + { + "epoch": 10.72, + "grad_norm": 2.7469046115875244, + "learning_rate": 8.60603015075377e-06, + "loss": 0.0321, + "step": 14375 + }, + { + "epoch": 10.74, + "grad_norm": 2.1317009925842285, + "learning_rate": 8.603517587939699e-06, + "loss": 0.0311, + "step": 14400 + }, + { + "epoch": 10.76, + "grad_norm": 2.4330532550811768, + "learning_rate": 8.601005025125628e-06, + "loss": 0.0333, + "step": 14425 + }, + { + "epoch": 10.78, + "grad_norm": 2.674049139022827, + "learning_rate": 8.598592964824121e-06, + "loss": 0.0324, + "step": 14450 + }, + { + "epoch": 10.79, + "grad_norm": 2.2558140754699707, + "learning_rate": 8.59608040201005e-06, + "loss": 0.033, + "step": 14475 + }, + { + "epoch": 10.81, + "grad_norm": 2.6020634174346924, + "learning_rate": 8.593567839195981e-06, + "loss": 0.0322, + "step": 14500 + }, + { + "epoch": 10.83, + "grad_norm": 2.644313097000122, + "learning_rate": 8.591055276381909e-06, + "loss": 0.0328, + "step": 14525 + }, + { + "epoch": 10.85, + "grad_norm": 2.725046396255493, + "learning_rate": 8.58854271356784e-06, + "loss": 0.0336, + "step": 14550 + }, + { + "epoch": 10.87, + "grad_norm": 2.506295919418335, + "learning_rate": 8.58603015075377e-06, + "loss": 0.0316, + "step": 14575 + }, + { + "epoch": 10.89, + "grad_norm": 2.294926881790161, + "learning_rate": 8.583517587939699e-06, + "loss": 0.0331, + "step": 14600 + }, + { + "epoch": 10.91, + "grad_norm": 2.505840301513672, + "learning_rate": 8.58100502512563e-06, + "loss": 0.0319, + "step": 14625 + }, + { + "epoch": 10.92, + "grad_norm": 2.6005473136901855, + "learning_rate": 8.578492462311559e-06, + "loss": 0.0311, + "step": 14650 + }, + { + "epoch": 10.94, + "grad_norm": 2.3736581802368164, + "learning_rate": 8.575979899497488e-06, + "loss": 0.033, + "step": 14675 + }, + { + "epoch": 10.96, + "grad_norm": 1.9671599864959717, + "learning_rate": 8.573467336683418e-06, + "loss": 0.0321, + "step": 14700 + }, + { + "epoch": 10.98, + "grad_norm": 2.1572256088256836, + "learning_rate": 8.570954773869347e-06, + "loss": 0.0313, + "step": 14725 + }, + { + "epoch": 11.0, + "grad_norm": 2.419848918914795, + "learning_rate": 8.568442211055276e-06, + "loss": 0.0322, + "step": 14750 + }, + { + "epoch": 11.02, + "grad_norm": 1.9697504043579102, + "learning_rate": 8.565929648241207e-06, + "loss": 0.0247, + "step": 14775 + }, + { + "epoch": 11.04, + "grad_norm": 2.745821237564087, + "learning_rate": 8.563417085427135e-06, + "loss": 0.0245, + "step": 14800 + }, + { + "epoch": 11.06, + "grad_norm": 2.0653817653656006, + "learning_rate": 8.560904522613066e-06, + "loss": 0.025, + "step": 14825 + }, + { + "epoch": 11.07, + "grad_norm": 2.0453221797943115, + "learning_rate": 8.558391959798995e-06, + "loss": 0.0225, + "step": 14850 + }, + { + "epoch": 11.09, + "grad_norm": 2.0905613899230957, + "learning_rate": 8.555879396984925e-06, + "loss": 0.0233, + "step": 14875 + }, + { + "epoch": 11.11, + "grad_norm": 2.231900215148926, + "learning_rate": 8.553366834170856e-06, + "loss": 0.0243, + "step": 14900 + }, + { + "epoch": 11.13, + "grad_norm": 1.7659270763397217, + "learning_rate": 8.550854271356785e-06, + "loss": 0.0241, + "step": 14925 + }, + { + "epoch": 11.15, + "grad_norm": 2.011756181716919, + "learning_rate": 8.548341708542714e-06, + "loss": 0.0236, + "step": 14950 + }, + { + "epoch": 11.17, + "grad_norm": 1.793749213218689, + "learning_rate": 8.545829145728644e-06, + "loss": 0.0238, + "step": 14975 + }, + { + "epoch": 11.19, + "grad_norm": 1.9638214111328125, + "learning_rate": 8.543316582914573e-06, + "loss": 0.0236, + "step": 15000 + }, + { + "epoch": 11.19, + "eval_loss": 0.15754041075706482, + "eval_runtime": 529.1543, + "eval_samples_per_second": 2.67, + "eval_steps_per_second": 2.67, + "eval_wer": 17.778033484839767, + "step": 15000 + }, + { + "epoch": 11.2, + "grad_norm": 2.186391592025757, + "learning_rate": 8.540804020100502e-06, + "loss": 0.0253, + "step": 15025 + }, + { + "epoch": 11.22, + "grad_norm": 2.084564208984375, + "learning_rate": 8.538291457286433e-06, + "loss": 0.0245, + "step": 15050 + }, + { + "epoch": 11.24, + "grad_norm": 2.3312909603118896, + "learning_rate": 8.535778894472363e-06, + "loss": 0.025, + "step": 15075 + }, + { + "epoch": 11.26, + "grad_norm": 1.8815104961395264, + "learning_rate": 8.533266331658292e-06, + "loss": 0.0248, + "step": 15100 + }, + { + "epoch": 11.28, + "grad_norm": 2.1759684085845947, + "learning_rate": 8.530753768844221e-06, + "loss": 0.0239, + "step": 15125 + }, + { + "epoch": 11.3, + "grad_norm": 1.994605541229248, + "learning_rate": 8.52824120603015e-06, + "loss": 0.0248, + "step": 15150 + }, + { + "epoch": 11.32, + "grad_norm": 2.298074245452881, + "learning_rate": 8.525728643216082e-06, + "loss": 0.0236, + "step": 15175 + }, + { + "epoch": 11.33, + "grad_norm": 2.180392026901245, + "learning_rate": 8.523216080402011e-06, + "loss": 0.0241, + "step": 15200 + }, + { + "epoch": 11.35, + "grad_norm": 2.2325847148895264, + "learning_rate": 8.52070351758794e-06, + "loss": 0.0235, + "step": 15225 + }, + { + "epoch": 11.37, + "grad_norm": 1.8550736904144287, + "learning_rate": 8.518190954773871e-06, + "loss": 0.0248, + "step": 15250 + }, + { + "epoch": 11.39, + "grad_norm": 2.232196569442749, + "learning_rate": 8.515678391959799e-06, + "loss": 0.025, + "step": 15275 + }, + { + "epoch": 11.41, + "grad_norm": 2.383640766143799, + "learning_rate": 8.51316582914573e-06, + "loss": 0.026, + "step": 15300 + }, + { + "epoch": 11.43, + "grad_norm": 1.9078830480575562, + "learning_rate": 8.51065326633166e-06, + "loss": 0.0251, + "step": 15325 + }, + { + "epoch": 11.45, + "grad_norm": 2.413806915283203, + "learning_rate": 8.508140703517589e-06, + "loss": 0.0253, + "step": 15350 + }, + { + "epoch": 11.47, + "grad_norm": 2.220574378967285, + "learning_rate": 8.505628140703518e-06, + "loss": 0.0248, + "step": 15375 + }, + { + "epoch": 11.48, + "grad_norm": 1.932139277458191, + "learning_rate": 8.503115577889447e-06, + "loss": 0.0246, + "step": 15400 + }, + { + "epoch": 11.5, + "grad_norm": 1.8589519262313843, + "learning_rate": 8.500603015075377e-06, + "loss": 0.0262, + "step": 15425 + }, + { + "epoch": 11.52, + "grad_norm": 2.145256757736206, + "learning_rate": 8.498090452261308e-06, + "loss": 0.0267, + "step": 15450 + }, + { + "epoch": 11.54, + "grad_norm": 2.082838535308838, + "learning_rate": 8.495577889447237e-06, + "loss": 0.0266, + "step": 15475 + }, + { + "epoch": 11.56, + "grad_norm": 2.3860409259796143, + "learning_rate": 8.493065326633166e-06, + "loss": 0.025, + "step": 15500 + }, + { + "epoch": 11.58, + "grad_norm": 2.3640079498291016, + "learning_rate": 8.490552763819097e-06, + "loss": 0.0246, + "step": 15525 + }, + { + "epoch": 11.6, + "grad_norm": 2.2166640758514404, + "learning_rate": 8.488040201005025e-06, + "loss": 0.0249, + "step": 15550 + }, + { + "epoch": 11.61, + "grad_norm": 2.043422222137451, + "learning_rate": 8.485527638190956e-06, + "loss": 0.0272, + "step": 15575 + }, + { + "epoch": 11.63, + "grad_norm": 2.6145598888397217, + "learning_rate": 8.483015075376885e-06, + "loss": 0.0255, + "step": 15600 + }, + { + "epoch": 11.65, + "grad_norm": 2.0896239280700684, + "learning_rate": 8.480502512562815e-06, + "loss": 0.0271, + "step": 15625 + }, + { + "epoch": 11.67, + "grad_norm": 2.092186450958252, + "learning_rate": 8.477989949748744e-06, + "loss": 0.0255, + "step": 15650 + }, + { + "epoch": 11.69, + "grad_norm": 2.3531675338745117, + "learning_rate": 8.475477386934673e-06, + "loss": 0.0261, + "step": 15675 + }, + { + "epoch": 11.71, + "grad_norm": 2.47927188873291, + "learning_rate": 8.472964824120604e-06, + "loss": 0.0249, + "step": 15700 + }, + { + "epoch": 11.73, + "grad_norm": 2.224541664123535, + "learning_rate": 8.470452261306534e-06, + "loss": 0.026, + "step": 15725 + }, + { + "epoch": 11.74, + "grad_norm": 2.464637041091919, + "learning_rate": 8.467939698492463e-06, + "loss": 0.0263, + "step": 15750 + }, + { + "epoch": 11.76, + "grad_norm": 2.447274923324585, + "learning_rate": 8.465427135678392e-06, + "loss": 0.0256, + "step": 15775 + }, + { + "epoch": 11.78, + "grad_norm": 2.4529521465301514, + "learning_rate": 8.462914572864323e-06, + "loss": 0.0263, + "step": 15800 + }, + { + "epoch": 11.8, + "grad_norm": 2.1988816261291504, + "learning_rate": 8.460402010050251e-06, + "loss": 0.0254, + "step": 15825 + }, + { + "epoch": 11.82, + "grad_norm": 2.0715203285217285, + "learning_rate": 8.457889447236182e-06, + "loss": 0.026, + "step": 15850 + }, + { + "epoch": 11.84, + "grad_norm": 2.083505630493164, + "learning_rate": 8.455376884422111e-06, + "loss": 0.0283, + "step": 15875 + }, + { + "epoch": 11.86, + "grad_norm": 2.030698776245117, + "learning_rate": 8.45286432160804e-06, + "loss": 0.0266, + "step": 15900 + }, + { + "epoch": 11.88, + "grad_norm": 1.8481398820877075, + "learning_rate": 8.450351758793972e-06, + "loss": 0.0256, + "step": 15925 + }, + { + "epoch": 11.89, + "grad_norm": 2.2139275074005127, + "learning_rate": 8.4478391959799e-06, + "loss": 0.0257, + "step": 15950 + }, + { + "epoch": 11.91, + "grad_norm": 2.2692158222198486, + "learning_rate": 8.44532663316583e-06, + "loss": 0.0268, + "step": 15975 + }, + { + "epoch": 11.93, + "grad_norm": 2.467280387878418, + "learning_rate": 8.44281407035176e-06, + "loss": 0.0257, + "step": 16000 + }, + { + "epoch": 11.93, + "eval_loss": 0.16068989038467407, + "eval_runtime": 531.2612, + "eval_samples_per_second": 2.66, + "eval_steps_per_second": 2.66, + "eval_wer": 17.8873482538404, + "step": 16000 + }, + { + "epoch": 11.95, + "grad_norm": 2.2023231983184814, + "learning_rate": 8.440301507537689e-06, + "loss": 0.0272, + "step": 16025 + }, + { + "epoch": 11.97, + "grad_norm": 2.8707969188690186, + "learning_rate": 8.437788944723618e-06, + "loss": 0.026, + "step": 16050 + }, + { + "epoch": 11.99, + "grad_norm": 2.5377700328826904, + "learning_rate": 8.43527638190955e-06, + "loss": 0.0264, + "step": 16075 + }, + { + "epoch": 12.01, + "grad_norm": 2.178784132003784, + "learning_rate": 8.432763819095479e-06, + "loss": 0.0234, + "step": 16100 + }, + { + "epoch": 12.02, + "grad_norm": 1.767102837562561, + "learning_rate": 8.430251256281408e-06, + "loss": 0.0185, + "step": 16125 + }, + { + "epoch": 12.04, + "grad_norm": 2.1668245792388916, + "learning_rate": 8.427738693467337e-06, + "loss": 0.0185, + "step": 16150 + }, + { + "epoch": 12.06, + "grad_norm": 1.7037166357040405, + "learning_rate": 8.425226130653266e-06, + "loss": 0.0186, + "step": 16175 + }, + { + "epoch": 12.08, + "grad_norm": 1.9696314334869385, + "learning_rate": 8.422713567839198e-06, + "loss": 0.0172, + "step": 16200 + }, + { + "epoch": 12.1, + "grad_norm": 1.9075572490692139, + "learning_rate": 8.420201005025125e-06, + "loss": 0.0184, + "step": 16225 + }, + { + "epoch": 12.12, + "grad_norm": 1.4202022552490234, + "learning_rate": 8.417688442211056e-06, + "loss": 0.0185, + "step": 16250 + }, + { + "epoch": 12.14, + "grad_norm": 1.9497981071472168, + "learning_rate": 8.415175879396985e-06, + "loss": 0.0193, + "step": 16275 + }, + { + "epoch": 12.16, + "grad_norm": 1.66078519821167, + "learning_rate": 8.412663316582915e-06, + "loss": 0.0197, + "step": 16300 + }, + { + "epoch": 12.17, + "grad_norm": 1.9186277389526367, + "learning_rate": 8.410150753768846e-06, + "loss": 0.0195, + "step": 16325 + }, + { + "epoch": 12.19, + "grad_norm": 1.925475835800171, + "learning_rate": 8.407638190954775e-06, + "loss": 0.0197, + "step": 16350 + }, + { + "epoch": 12.21, + "grad_norm": 1.5120387077331543, + "learning_rate": 8.405125628140704e-06, + "loss": 0.0196, + "step": 16375 + }, + { + "epoch": 12.23, + "grad_norm": 1.9534211158752441, + "learning_rate": 8.402613065326634e-06, + "loss": 0.0185, + "step": 16400 + }, + { + "epoch": 12.25, + "grad_norm": 1.9716321229934692, + "learning_rate": 8.400100502512563e-06, + "loss": 0.0204, + "step": 16425 + }, + { + "epoch": 12.27, + "grad_norm": 2.1148011684417725, + "learning_rate": 8.397587939698492e-06, + "loss": 0.0195, + "step": 16450 + }, + { + "epoch": 12.29, + "grad_norm": 1.692244291305542, + "learning_rate": 8.395075376884423e-06, + "loss": 0.0203, + "step": 16475 + }, + { + "epoch": 12.3, + "grad_norm": 1.2903988361358643, + "learning_rate": 8.392562814070351e-06, + "loss": 0.0199, + "step": 16500 + }, + { + "epoch": 12.32, + "grad_norm": 2.2840662002563477, + "learning_rate": 8.390050251256282e-06, + "loss": 0.02, + "step": 16525 + }, + { + "epoch": 12.34, + "grad_norm": 2.0871994495391846, + "learning_rate": 8.387537688442211e-06, + "loss": 0.0201, + "step": 16550 + }, + { + "epoch": 12.36, + "grad_norm": 2.469174385070801, + "learning_rate": 8.38502512562814e-06, + "loss": 0.0205, + "step": 16575 + }, + { + "epoch": 12.38, + "grad_norm": 1.7435818910598755, + "learning_rate": 8.382512562814072e-06, + "loss": 0.0197, + "step": 16600 + }, + { + "epoch": 12.4, + "grad_norm": 2.4080371856689453, + "learning_rate": 8.380000000000001e-06, + "loss": 0.0196, + "step": 16625 + }, + { + "epoch": 12.42, + "grad_norm": 2.2950546741485596, + "learning_rate": 8.37748743718593e-06, + "loss": 0.0208, + "step": 16650 + }, + { + "epoch": 12.43, + "grad_norm": 1.7712024450302124, + "learning_rate": 8.37497487437186e-06, + "loss": 0.02, + "step": 16675 + }, + { + "epoch": 12.45, + "grad_norm": 1.8142921924591064, + "learning_rate": 8.372462311557789e-06, + "loss": 0.0202, + "step": 16700 + }, + { + "epoch": 12.47, + "grad_norm": 2.4354050159454346, + "learning_rate": 8.36994974874372e-06, + "loss": 0.0204, + "step": 16725 + }, + { + "epoch": 12.49, + "grad_norm": 2.2695727348327637, + "learning_rate": 8.367537688442212e-06, + "loss": 0.0195, + "step": 16750 + }, + { + "epoch": 12.51, + "grad_norm": 2.256756067276001, + "learning_rate": 8.365025125628141e-06, + "loss": 0.0212, + "step": 16775 + }, + { + "epoch": 12.53, + "grad_norm": 2.492504596710205, + "learning_rate": 8.362512562814072e-06, + "loss": 0.0208, + "step": 16800 + }, + { + "epoch": 12.55, + "grad_norm": 2.025836229324341, + "learning_rate": 8.36e-06, + "loss": 0.0196, + "step": 16825 + }, + { + "epoch": 12.57, + "grad_norm": 2.2775118350982666, + "learning_rate": 8.35748743718593e-06, + "loss": 0.0203, + "step": 16850 + }, + { + "epoch": 12.58, + "grad_norm": 2.259349822998047, + "learning_rate": 8.35497487437186e-06, + "loss": 0.0208, + "step": 16875 + }, + { + "epoch": 12.6, + "grad_norm": 2.1865718364715576, + "learning_rate": 8.35246231155779e-06, + "loss": 0.0201, + "step": 16900 + }, + { + "epoch": 12.62, + "grad_norm": 1.971570611000061, + "learning_rate": 8.34994974874372e-06, + "loss": 0.0204, + "step": 16925 + }, + { + "epoch": 12.64, + "grad_norm": 2.1629433631896973, + "learning_rate": 8.34743718592965e-06, + "loss": 0.0203, + "step": 16950 + }, + { + "epoch": 12.66, + "grad_norm": 2.0858287811279297, + "learning_rate": 8.344924623115579e-06, + "loss": 0.0206, + "step": 16975 + }, + { + "epoch": 12.68, + "grad_norm": 2.2317278385162354, + "learning_rate": 8.342412060301508e-06, + "loss": 0.0211, + "step": 17000 + }, + { + "epoch": 12.68, + "eval_loss": 0.17009878158569336, + "eval_runtime": 623.4488, + "eval_samples_per_second": 2.266, + "eval_steps_per_second": 2.266, + "eval_wer": 18.186525516368448, + "step": 17000 + }, + { + "epoch": 12.7, + "grad_norm": 2.0520148277282715, + "learning_rate": 8.339899497487438e-06, + "loss": 0.0219, + "step": 17025 + }, + { + "epoch": 12.71, + "grad_norm": 2.284017324447632, + "learning_rate": 8.337386934673367e-06, + "loss": 0.0202, + "step": 17050 + }, + { + "epoch": 12.73, + "grad_norm": 2.1444568634033203, + "learning_rate": 8.334874371859298e-06, + "loss": 0.0192, + "step": 17075 + }, + { + "epoch": 12.75, + "grad_norm": 1.5463666915893555, + "learning_rate": 8.332361809045226e-06, + "loss": 0.0211, + "step": 17100 + }, + { + "epoch": 12.77, + "grad_norm": 1.9309874773025513, + "learning_rate": 8.329849246231157e-06, + "loss": 0.0207, + "step": 17125 + }, + { + "epoch": 12.79, + "grad_norm": 2.5459399223327637, + "learning_rate": 8.327336683417086e-06, + "loss": 0.0223, + "step": 17150 + }, + { + "epoch": 12.81, + "grad_norm": 2.1787266731262207, + "learning_rate": 8.324824120603015e-06, + "loss": 0.0206, + "step": 17175 + }, + { + "epoch": 12.83, + "grad_norm": 2.0539345741271973, + "learning_rate": 8.322311557788946e-06, + "loss": 0.021, + "step": 17200 + }, + { + "epoch": 12.84, + "grad_norm": 2.2832024097442627, + "learning_rate": 8.319798994974876e-06, + "loss": 0.0219, + "step": 17225 + }, + { + "epoch": 12.86, + "grad_norm": 1.9527398347854614, + "learning_rate": 8.317286432160805e-06, + "loss": 0.0202, + "step": 17250 + }, + { + "epoch": 12.88, + "grad_norm": 1.9685579538345337, + "learning_rate": 8.314773869346734e-06, + "loss": 0.0205, + "step": 17275 + }, + { + "epoch": 12.9, + "grad_norm": 1.802525281906128, + "learning_rate": 8.312261306532663e-06, + "loss": 0.0201, + "step": 17300 + }, + { + "epoch": 12.92, + "grad_norm": 1.95085871219635, + "learning_rate": 8.309748743718595e-06, + "loss": 0.0204, + "step": 17325 + }, + { + "epoch": 12.94, + "grad_norm": 2.0641379356384277, + "learning_rate": 8.307236180904524e-06, + "loss": 0.0194, + "step": 17350 + }, + { + "epoch": 12.96, + "grad_norm": 2.0279335975646973, + "learning_rate": 8.304723618090453e-06, + "loss": 0.0213, + "step": 17375 + }, + { + "epoch": 12.98, + "grad_norm": 2.145747661590576, + "learning_rate": 8.302211055276382e-06, + "loss": 0.0205, + "step": 17400 + }, + { + "epoch": 12.99, + "grad_norm": 2.2134642601013184, + "learning_rate": 8.299698492462312e-06, + "loss": 0.0219, + "step": 17425 + }, + { + "epoch": 13.01, + "grad_norm": 1.4274406433105469, + "learning_rate": 8.297185929648241e-06, + "loss": 0.0167, + "step": 17450 + }, + { + "epoch": 13.03, + "grad_norm": 1.8475873470306396, + "learning_rate": 8.294673366834172e-06, + "loss": 0.0142, + "step": 17475 + }, + { + "epoch": 13.05, + "grad_norm": 1.4130116701126099, + "learning_rate": 8.292160804020101e-06, + "loss": 0.0138, + "step": 17500 + }, + { + "epoch": 13.07, + "grad_norm": 1.62507963180542, + "learning_rate": 8.28964824120603e-06, + "loss": 0.0143, + "step": 17525 + }, + { + "epoch": 13.09, + "grad_norm": 1.7610334157943726, + "learning_rate": 8.287135678391962e-06, + "loss": 0.0147, + "step": 17550 + }, + { + "epoch": 13.11, + "grad_norm": 1.8449413776397705, + "learning_rate": 8.28462311557789e-06, + "loss": 0.015, + "step": 17575 + }, + { + "epoch": 13.12, + "grad_norm": 1.8660368919372559, + "learning_rate": 8.28211055276382e-06, + "loss": 0.0147, + "step": 17600 + }, + { + "epoch": 13.14, + "grad_norm": 1.68376624584198, + "learning_rate": 8.27959798994975e-06, + "loss": 0.0142, + "step": 17625 + }, + { + "epoch": 13.16, + "grad_norm": 1.951582431793213, + "learning_rate": 8.277085427135679e-06, + "loss": 0.0165, + "step": 17650 + }, + { + "epoch": 13.18, + "grad_norm": 2.091298818588257, + "learning_rate": 8.274572864321608e-06, + "loss": 0.014, + "step": 17675 + }, + { + "epoch": 13.2, + "grad_norm": 1.7085005044937134, + "learning_rate": 8.272060301507538e-06, + "loss": 0.0163, + "step": 17700 + }, + { + "epoch": 13.22, + "grad_norm": 2.411794424057007, + "learning_rate": 8.269547738693467e-06, + "loss": 0.0157, + "step": 17725 + }, + { + "epoch": 13.24, + "grad_norm": 1.9730066061019897, + "learning_rate": 8.267035175879398e-06, + "loss": 0.016, + "step": 17750 + }, + { + "epoch": 13.26, + "grad_norm": 1.7919137477874756, + "learning_rate": 8.264522613065327e-06, + "loss": 0.0147, + "step": 17775 + }, + { + "epoch": 13.27, + "grad_norm": 1.5649666786193848, + "learning_rate": 8.262010050251257e-06, + "loss": 0.0151, + "step": 17800 + }, + { + "epoch": 13.29, + "grad_norm": 1.8545132875442505, + "learning_rate": 8.259497487437188e-06, + "loss": 0.0158, + "step": 17825 + }, + { + "epoch": 13.31, + "grad_norm": 1.8907432556152344, + "learning_rate": 8.256984924623115e-06, + "loss": 0.0159, + "step": 17850 + }, + { + "epoch": 13.33, + "grad_norm": 1.572027564048767, + "learning_rate": 8.254472361809046e-06, + "loss": 0.0155, + "step": 17875 + }, + { + "epoch": 13.35, + "grad_norm": 1.978667140007019, + "learning_rate": 8.251959798994976e-06, + "loss": 0.0161, + "step": 17900 + }, + { + "epoch": 13.37, + "grad_norm": 1.7438974380493164, + "learning_rate": 8.249447236180905e-06, + "loss": 0.016, + "step": 17925 + }, + { + "epoch": 13.39, + "grad_norm": 2.624887466430664, + "learning_rate": 8.246934673366836e-06, + "loss": 0.015, + "step": 17950 + }, + { + "epoch": 13.4, + "grad_norm": 1.5545618534088135, + "learning_rate": 8.244422110552764e-06, + "loss": 0.0156, + "step": 17975 + }, + { + "epoch": 13.42, + "grad_norm": 1.6309289932250977, + "learning_rate": 8.241909547738695e-06, + "loss": 0.0154, + "step": 18000 + }, + { + "epoch": 13.42, + "eval_loss": 0.17835278809070587, + "eval_runtime": 621.027, + "eval_samples_per_second": 2.275, + "eval_steps_per_second": 2.275, + "eval_wer": 18.123238018525978, + "step": 18000 + } + ], + "logging_steps": 25, + "max_steps": 100000, + "num_input_tokens_seen": 0, + "num_train_epochs": 75, + "save_steps": 1000, + "total_flos": 2.835327456608256e+19, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/whisper-tiny/chattisgarhi/checkpoint-18000/training_args.bin b/checkpoints/whisper-tiny/chattisgarhi/checkpoint-18000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2d7c04904d41911b4e0d6f51bda321e38e3f412b --- /dev/null +++ b/checkpoints/whisper-tiny/chattisgarhi/checkpoint-18000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e06711efe37faa26e9aa5fe6cea2f2002d6d82b27ef905a72e8b3282541e1dbc +size 4667 diff --git a/checkpoints/whisper-tiny/hindi/checkpoint-23000/config.json b/checkpoints/whisper-tiny/hindi/checkpoint-23000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c47e7ae5f6c65847b8952aa0e827c7f13a489891 --- /dev/null +++ b/checkpoints/whisper-tiny/hindi/checkpoint-23000/config.json @@ -0,0 +1,154 @@ +{ + "_name_or_path": "openai/whisper-tiny", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 384, + "decoder_attention_heads": 6, + "decoder_ffn_dim": 1536, + "decoder_layerdrop": 0.0, + "decoder_layers": 4, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 6, + "encoder_ffn_dim": 1536, + "encoder_layerdrop": 0.0, + "encoder_layers": 4, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50276 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 4, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "torch_dtype": "float32", + "transformers_version": "4.39.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoints/whisper-tiny/hindi/checkpoint-23000/generation_config.json b/checkpoints/whisper-tiny/hindi/checkpoint-23000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c4857895fba6cdefb862460b5d33969e1892aa71 --- /dev/null +++ b/checkpoints/whisper-tiny/hindi/checkpoint-23000/generation_config.json @@ -0,0 +1,248 @@ +{ + "alignment_heads": [ + [ + 2, + 2 + ], + [ + 3, + 0 + ], + [ + 3, + 2 + ], + [ + 3, + 3 + ], + [ + 3, + 4 + ], + [ + 3, + 5 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.39.2" +} diff --git a/checkpoints/whisper-tiny/hindi/checkpoint-23000/model.safetensors b/checkpoints/whisper-tiny/hindi/checkpoint-23000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b930820bcdeb49e7964e0efa88eb5b4113cb884a --- /dev/null +++ b/checkpoints/whisper-tiny/hindi/checkpoint-23000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8f848c2dd4a6a9c79643ee6e441ae9e297cd5c5558fde84148a692fd94a622c +size 151061672 diff --git a/checkpoints/whisper-tiny/hindi/checkpoint-23000/optimizer.pt b/checkpoints/whisper-tiny/hindi/checkpoint-23000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6953b81f976a76c9efe06fd03f3744ad99816560 --- /dev/null +++ b/checkpoints/whisper-tiny/hindi/checkpoint-23000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:885de4ef7880b7a3adb07232b1270738577ad707b5c41004a6ffcaf103c3af74 +size 297615749 diff --git a/checkpoints/whisper-tiny/hindi/checkpoint-23000/preprocessor_config.json b/checkpoints/whisper-tiny/hindi/checkpoint-23000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoints/whisper-tiny/hindi/checkpoint-23000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoints/whisper-tiny/hindi/checkpoint-23000/rng_state.pth b/checkpoints/whisper-tiny/hindi/checkpoint-23000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b7a32ba8704c5abde9e985db3ed33f0e6f4e24d0 --- /dev/null +++ b/checkpoints/whisper-tiny/hindi/checkpoint-23000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7454959d02ca5f5c98111f4d96721b021192ab7d312b7480effa79f1d997876 +size 14575 diff --git a/checkpoints/whisper-tiny/hindi/checkpoint-23000/scheduler.pt b/checkpoints/whisper-tiny/hindi/checkpoint-23000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9d659f3561a83198def440a136b49574d145a19a --- /dev/null +++ b/checkpoints/whisper-tiny/hindi/checkpoint-23000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4e9aead6f8f3eea0b041af6e0903d21767c35722a1f7886c31c8d8e271862b3 +size 627 diff --git a/checkpoints/whisper-tiny/hindi/checkpoint-23000/trainer_state.json b/checkpoints/whisper-tiny/hindi/checkpoint-23000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..97ddb9f1cdde88dce99f5babdb5fc4ca5a300586 --- /dev/null +++ b/checkpoints/whisper-tiny/hindi/checkpoint-23000/trainer_state.json @@ -0,0 +1,6668 @@ +{ + "best_metric": 21.06708742402992, + "best_model_checkpoint": "results/whisper-tiny/hindi/checkpoint-13000", + "epoch": 17.15137956748695, + "eval_steps": 1000, + "global_step": 23000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "grad_norm": 87.41815948486328, + "learning_rate": 4.4e-07, + "loss": 3.6889, + "step": 25 + }, + { + "epoch": 0.04, + "grad_norm": 35.009037017822266, + "learning_rate": 9.200000000000001e-07, + "loss": 3.0464, + "step": 50 + }, + { + "epoch": 0.06, + "grad_norm": 13.666839599609375, + "learning_rate": 1.42e-06, + "loss": 2.2662, + "step": 75 + }, + { + "epoch": 0.07, + "grad_norm": 8.23681354522705, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.7415, + "step": 100 + }, + { + "epoch": 0.09, + "grad_norm": 6.896854877471924, + "learning_rate": 2.42e-06, + "loss": 1.3766, + "step": 125 + }, + { + "epoch": 0.11, + "grad_norm": 5.9597015380859375, + "learning_rate": 2.92e-06, + "loss": 1.1438, + "step": 150 + }, + { + "epoch": 0.13, + "grad_norm": 5.173588275909424, + "learning_rate": 3.4200000000000007e-06, + "loss": 0.9888, + "step": 175 + }, + { + "epoch": 0.15, + "grad_norm": 5.153362274169922, + "learning_rate": 3.920000000000001e-06, + "loss": 0.8535, + "step": 200 + }, + { + "epoch": 0.17, + "grad_norm": 5.125275611877441, + "learning_rate": 4.42e-06, + "loss": 0.7809, + "step": 225 + }, + { + "epoch": 0.19, + "grad_norm": 5.827782154083252, + "learning_rate": 4.92e-06, + "loss": 0.7158, + "step": 250 + }, + { + "epoch": 0.21, + "grad_norm": 4.852942943572998, + "learning_rate": 5.420000000000001e-06, + "loss": 0.6542, + "step": 275 + }, + { + "epoch": 0.22, + "grad_norm": 4.883583068847656, + "learning_rate": 5.92e-06, + "loss": 0.6229, + "step": 300 + }, + { + "epoch": 0.24, + "grad_norm": 5.2296833992004395, + "learning_rate": 6.42e-06, + "loss": 0.5852, + "step": 325 + }, + { + "epoch": 0.26, + "grad_norm": 4.740891933441162, + "learning_rate": 6.92e-06, + "loss": 0.5539, + "step": 350 + }, + { + "epoch": 0.28, + "grad_norm": 4.822671413421631, + "learning_rate": 7.420000000000001e-06, + "loss": 0.533, + "step": 375 + }, + { + "epoch": 0.3, + "grad_norm": 4.947971820831299, + "learning_rate": 7.92e-06, + "loss": 0.5191, + "step": 400 + }, + { + "epoch": 0.32, + "grad_norm": 4.511556625366211, + "learning_rate": 8.42e-06, + "loss": 0.5025, + "step": 425 + }, + { + "epoch": 0.34, + "grad_norm": 4.311334609985352, + "learning_rate": 8.920000000000001e-06, + "loss": 0.4819, + "step": 450 + }, + { + "epoch": 0.35, + "grad_norm": 4.7911787033081055, + "learning_rate": 9.42e-06, + "loss": 0.4654, + "step": 475 + }, + { + "epoch": 0.37, + "grad_norm": 4.4758477210998535, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4543, + "step": 500 + }, + { + "epoch": 0.39, + "grad_norm": 3.953019142150879, + "learning_rate": 9.997889447236182e-06, + "loss": 0.4426, + "step": 525 + }, + { + "epoch": 0.41, + "grad_norm": 5.1154093742370605, + "learning_rate": 9.995376884422112e-06, + "loss": 0.4279, + "step": 550 + }, + { + "epoch": 0.43, + "grad_norm": 4.461019039154053, + "learning_rate": 9.992864321608041e-06, + "loss": 0.4147, + "step": 575 + }, + { + "epoch": 0.45, + "grad_norm": 4.9064507484436035, + "learning_rate": 9.99035175879397e-06, + "loss": 0.4051, + "step": 600 + }, + { + "epoch": 0.47, + "grad_norm": 4.7476372718811035, + "learning_rate": 9.9878391959799e-06, + "loss": 0.3963, + "step": 625 + }, + { + "epoch": 0.48, + "grad_norm": 5.045162677764893, + "learning_rate": 9.98532663316583e-06, + "loss": 0.396, + "step": 650 + }, + { + "epoch": 0.5, + "grad_norm": 4.967711925506592, + "learning_rate": 9.98281407035176e-06, + "loss": 0.3815, + "step": 675 + }, + { + "epoch": 0.52, + "grad_norm": 4.287027835845947, + "learning_rate": 9.98030150753769e-06, + "loss": 0.3768, + "step": 700 + }, + { + "epoch": 0.54, + "grad_norm": 4.72049617767334, + "learning_rate": 9.977788944723619e-06, + "loss": 0.372, + "step": 725 + }, + { + "epoch": 0.56, + "grad_norm": 4.140354633331299, + "learning_rate": 9.975276381909548e-06, + "loss": 0.3657, + "step": 750 + }, + { + "epoch": 0.58, + "grad_norm": 4.916764259338379, + "learning_rate": 9.972763819095477e-06, + "loss": 0.3578, + "step": 775 + }, + { + "epoch": 0.6, + "grad_norm": 4.392751216888428, + "learning_rate": 9.970251256281408e-06, + "loss": 0.3555, + "step": 800 + }, + { + "epoch": 0.62, + "grad_norm": 5.812331199645996, + "learning_rate": 9.967738693467338e-06, + "loss": 0.3472, + "step": 825 + }, + { + "epoch": 0.63, + "grad_norm": 5.153542995452881, + "learning_rate": 9.965226130653267e-06, + "loss": 0.3366, + "step": 850 + }, + { + "epoch": 0.65, + "grad_norm": 5.041461944580078, + "learning_rate": 9.962713567839198e-06, + "loss": 0.3347, + "step": 875 + }, + { + "epoch": 0.67, + "grad_norm": 4.402337074279785, + "learning_rate": 9.960201005025126e-06, + "loss": 0.3336, + "step": 900 + }, + { + "epoch": 0.69, + "grad_norm": 4.338579177856445, + "learning_rate": 9.957688442211057e-06, + "loss": 0.3219, + "step": 925 + }, + { + "epoch": 0.71, + "grad_norm": 4.370389938354492, + "learning_rate": 9.955175879396986e-06, + "loss": 0.3286, + "step": 950 + }, + { + "epoch": 0.73, + "grad_norm": 4.774099349975586, + "learning_rate": 9.952663316582915e-06, + "loss": 0.3249, + "step": 975 + }, + { + "epoch": 0.75, + "grad_norm": 4.903811931610107, + "learning_rate": 9.950150753768845e-06, + "loss": 0.3165, + "step": 1000 + }, + { + "epoch": 0.75, + "eval_loss": 0.25924113392829895, + "eval_runtime": 615.3646, + "eval_samples_per_second": 2.501, + "eval_steps_per_second": 2.501, + "eval_wer": 37.184431977559605, + "step": 1000 + }, + { + "epoch": 0.76, + "grad_norm": 4.409754276275635, + "learning_rate": 9.947638190954774e-06, + "loss": 0.3167, + "step": 1025 + }, + { + "epoch": 0.78, + "grad_norm": 4.626646518707275, + "learning_rate": 9.945125628140703e-06, + "loss": 0.3074, + "step": 1050 + }, + { + "epoch": 0.8, + "grad_norm": 5.072508335113525, + "learning_rate": 9.942613065326634e-06, + "loss": 0.3088, + "step": 1075 + }, + { + "epoch": 0.82, + "grad_norm": 4.44353723526001, + "learning_rate": 9.940100502512564e-06, + "loss": 0.3058, + "step": 1100 + }, + { + "epoch": 0.84, + "grad_norm": 4.155320644378662, + "learning_rate": 9.937587939698493e-06, + "loss": 0.2995, + "step": 1125 + }, + { + "epoch": 0.86, + "grad_norm": 4.692741394042969, + "learning_rate": 9.935075376884424e-06, + "loss": 0.2968, + "step": 1150 + }, + { + "epoch": 0.88, + "grad_norm": 4.704728603363037, + "learning_rate": 9.932562814070352e-06, + "loss": 0.2905, + "step": 1175 + }, + { + "epoch": 0.89, + "grad_norm": 4.1347880363464355, + "learning_rate": 9.930050251256283e-06, + "loss": 0.2946, + "step": 1200 + }, + { + "epoch": 0.91, + "grad_norm": 4.096576690673828, + "learning_rate": 9.927537688442212e-06, + "loss": 0.2845, + "step": 1225 + }, + { + "epoch": 0.93, + "grad_norm": 4.58539342880249, + "learning_rate": 9.925025125628141e-06, + "loss": 0.2888, + "step": 1250 + }, + { + "epoch": 0.95, + "grad_norm": 4.089907169342041, + "learning_rate": 9.922512562814072e-06, + "loss": 0.28, + "step": 1275 + }, + { + "epoch": 0.97, + "grad_norm": 4.57423734664917, + "learning_rate": 9.920000000000002e-06, + "loss": 0.2883, + "step": 1300 + }, + { + "epoch": 0.99, + "grad_norm": 3.9139890670776367, + "learning_rate": 9.917487437185931e-06, + "loss": 0.2805, + "step": 1325 + }, + { + "epoch": 1.01, + "grad_norm": 4.066978454589844, + "learning_rate": 9.91497487437186e-06, + "loss": 0.2793, + "step": 1350 + }, + { + "epoch": 1.03, + "grad_norm": 4.5573954582214355, + "learning_rate": 9.91246231155779e-06, + "loss": 0.2649, + "step": 1375 + }, + { + "epoch": 1.04, + "grad_norm": 4.314223289489746, + "learning_rate": 9.909949748743719e-06, + "loss": 0.2557, + "step": 1400 + }, + { + "epoch": 1.06, + "grad_norm": 4.305091857910156, + "learning_rate": 9.90743718592965e-06, + "loss": 0.2561, + "step": 1425 + }, + { + "epoch": 1.08, + "grad_norm": 4.08223295211792, + "learning_rate": 9.904924623115578e-06, + "loss": 0.2571, + "step": 1450 + }, + { + "epoch": 1.1, + "grad_norm": 4.517662525177002, + "learning_rate": 9.902412060301509e-06, + "loss": 0.2513, + "step": 1475 + }, + { + "epoch": 1.12, + "grad_norm": 4.008381366729736, + "learning_rate": 9.899899497487438e-06, + "loss": 0.2552, + "step": 1500 + }, + { + "epoch": 1.14, + "grad_norm": 4.0856709480285645, + "learning_rate": 9.897386934673367e-06, + "loss": 0.2478, + "step": 1525 + }, + { + "epoch": 1.16, + "grad_norm": 3.9554502964019775, + "learning_rate": 9.894874371859298e-06, + "loss": 0.2543, + "step": 1550 + }, + { + "epoch": 1.17, + "grad_norm": 4.4893879890441895, + "learning_rate": 9.892361809045228e-06, + "loss": 0.2492, + "step": 1575 + }, + { + "epoch": 1.19, + "grad_norm": 4.591585159301758, + "learning_rate": 9.889849246231157e-06, + "loss": 0.2509, + "step": 1600 + }, + { + "epoch": 1.21, + "grad_norm": 4.05668830871582, + "learning_rate": 9.887336683417086e-06, + "loss": 0.2467, + "step": 1625 + }, + { + "epoch": 1.23, + "grad_norm": 4.19999361038208, + "learning_rate": 9.884824120603015e-06, + "loss": 0.2484, + "step": 1650 + }, + { + "epoch": 1.25, + "grad_norm": 4.646579742431641, + "learning_rate": 9.882311557788945e-06, + "loss": 0.2431, + "step": 1675 + }, + { + "epoch": 1.27, + "grad_norm": 4.078315258026123, + "learning_rate": 9.879798994974876e-06, + "loss": 0.2375, + "step": 1700 + }, + { + "epoch": 1.29, + "grad_norm": 4.169734954833984, + "learning_rate": 9.877286432160805e-06, + "loss": 0.2369, + "step": 1725 + }, + { + "epoch": 1.3, + "grad_norm": 4.2412590980529785, + "learning_rate": 9.874773869346734e-06, + "loss": 0.2371, + "step": 1750 + }, + { + "epoch": 1.32, + "grad_norm": 4.363594055175781, + "learning_rate": 9.872261306532664e-06, + "loss": 0.2392, + "step": 1775 + }, + { + "epoch": 1.34, + "grad_norm": 3.7578232288360596, + "learning_rate": 9.869748743718593e-06, + "loss": 0.2287, + "step": 1800 + }, + { + "epoch": 1.36, + "grad_norm": 3.997183322906494, + "learning_rate": 9.867236180904524e-06, + "loss": 0.2349, + "step": 1825 + }, + { + "epoch": 1.38, + "grad_norm": 4.028231143951416, + "learning_rate": 9.864723618090453e-06, + "loss": 0.2266, + "step": 1850 + }, + { + "epoch": 1.4, + "grad_norm": 4.515061855316162, + "learning_rate": 9.862211055276383e-06, + "loss": 0.2308, + "step": 1875 + }, + { + "epoch": 1.42, + "grad_norm": 4.001887321472168, + "learning_rate": 9.859698492462312e-06, + "loss": 0.2325, + "step": 1900 + }, + { + "epoch": 1.44, + "grad_norm": 4.515941619873047, + "learning_rate": 9.857185929648241e-06, + "loss": 0.233, + "step": 1925 + }, + { + "epoch": 1.45, + "grad_norm": 4.453752040863037, + "learning_rate": 9.854673366834172e-06, + "loss": 0.2324, + "step": 1950 + }, + { + "epoch": 1.47, + "grad_norm": 3.809417486190796, + "learning_rate": 9.852160804020102e-06, + "loss": 0.2255, + "step": 1975 + }, + { + "epoch": 1.49, + "grad_norm": 4.373373508453369, + "learning_rate": 9.849648241206031e-06, + "loss": 0.2236, + "step": 2000 + }, + { + "epoch": 1.49, + "eval_loss": 0.1916063129901886, + "eval_runtime": 575.5428, + "eval_samples_per_second": 2.674, + "eval_steps_per_second": 2.674, + "eval_wer": 30.037400654511455, + "step": 2000 + }, + { + "epoch": 1.51, + "grad_norm": 4.349323749542236, + "learning_rate": 9.84713567839196e-06, + "loss": 0.2267, + "step": 2025 + }, + { + "epoch": 1.53, + "grad_norm": 4.126310348510742, + "learning_rate": 9.84462311557789e-06, + "loss": 0.2295, + "step": 2050 + }, + { + "epoch": 1.55, + "grad_norm": 4.731456756591797, + "learning_rate": 9.842110552763819e-06, + "loss": 0.2225, + "step": 2075 + }, + { + "epoch": 1.57, + "grad_norm": 3.8882362842559814, + "learning_rate": 9.83959798994975e-06, + "loss": 0.2144, + "step": 2100 + }, + { + "epoch": 1.58, + "grad_norm": 3.9339442253112793, + "learning_rate": 9.83708542713568e-06, + "loss": 0.2198, + "step": 2125 + }, + { + "epoch": 1.6, + "grad_norm": 4.030758857727051, + "learning_rate": 9.834572864321609e-06, + "loss": 0.2199, + "step": 2150 + }, + { + "epoch": 1.62, + "grad_norm": 3.753197431564331, + "learning_rate": 9.832060301507538e-06, + "loss": 0.2143, + "step": 2175 + }, + { + "epoch": 1.64, + "grad_norm": 3.7082479000091553, + "learning_rate": 9.829547738693467e-06, + "loss": 0.2158, + "step": 2200 + }, + { + "epoch": 1.66, + "grad_norm": 4.238487720489502, + "learning_rate": 9.827035175879398e-06, + "loss": 0.2141, + "step": 2225 + }, + { + "epoch": 1.68, + "grad_norm": 3.7853856086730957, + "learning_rate": 9.824522613065328e-06, + "loss": 0.2176, + "step": 2250 + }, + { + "epoch": 1.7, + "grad_norm": 4.099862575531006, + "learning_rate": 9.822010050251257e-06, + "loss": 0.2118, + "step": 2275 + }, + { + "epoch": 1.72, + "grad_norm": 4.271936416625977, + "learning_rate": 9.819497487437186e-06, + "loss": 0.2115, + "step": 2300 + }, + { + "epoch": 1.73, + "grad_norm": 4.104953289031982, + "learning_rate": 9.816984924623116e-06, + "loss": 0.2082, + "step": 2325 + }, + { + "epoch": 1.75, + "grad_norm": 3.9651215076446533, + "learning_rate": 9.814472361809047e-06, + "loss": 0.2126, + "step": 2350 + }, + { + "epoch": 1.77, + "grad_norm": 3.9831976890563965, + "learning_rate": 9.811959798994976e-06, + "loss": 0.2088, + "step": 2375 + }, + { + "epoch": 1.79, + "grad_norm": 4.315521717071533, + "learning_rate": 9.809447236180905e-06, + "loss": 0.2133, + "step": 2400 + }, + { + "epoch": 1.81, + "grad_norm": 4.58767557144165, + "learning_rate": 9.806934673366835e-06, + "loss": 0.2063, + "step": 2425 + }, + { + "epoch": 1.83, + "grad_norm": 3.8159916400909424, + "learning_rate": 9.804422110552764e-06, + "loss": 0.2082, + "step": 2450 + }, + { + "epoch": 1.85, + "grad_norm": 3.47652268409729, + "learning_rate": 9.801909547738693e-06, + "loss": 0.2103, + "step": 2475 + }, + { + "epoch": 1.86, + "grad_norm": 4.544437408447266, + "learning_rate": 9.799396984924624e-06, + "loss": 0.207, + "step": 2500 + }, + { + "epoch": 1.88, + "grad_norm": 3.7717416286468506, + "learning_rate": 9.796884422110554e-06, + "loss": 0.2024, + "step": 2525 + }, + { + "epoch": 1.9, + "grad_norm": 4.249086856842041, + "learning_rate": 9.794371859296483e-06, + "loss": 0.2043, + "step": 2550 + }, + { + "epoch": 1.92, + "grad_norm": 3.9931509494781494, + "learning_rate": 9.791859296482414e-06, + "loss": 0.2038, + "step": 2575 + }, + { + "epoch": 1.94, + "grad_norm": 3.830528736114502, + "learning_rate": 9.789346733668342e-06, + "loss": 0.1956, + "step": 2600 + }, + { + "epoch": 1.96, + "grad_norm": 3.678571939468384, + "learning_rate": 9.786834170854273e-06, + "loss": 0.2041, + "step": 2625 + }, + { + "epoch": 1.98, + "grad_norm": 3.7052464485168457, + "learning_rate": 9.784321608040202e-06, + "loss": 0.1977, + "step": 2650 + }, + { + "epoch": 1.99, + "grad_norm": 3.7513132095336914, + "learning_rate": 9.781809045226131e-06, + "loss": 0.1995, + "step": 2675 + }, + { + "epoch": 2.01, + "grad_norm": 3.349226713180542, + "learning_rate": 9.77929648241206e-06, + "loss": 0.1875, + "step": 2700 + }, + { + "epoch": 2.03, + "grad_norm": 4.356109142303467, + "learning_rate": 9.77678391959799e-06, + "loss": 0.1769, + "step": 2725 + }, + { + "epoch": 2.05, + "grad_norm": 3.999945640563965, + "learning_rate": 9.774271356783921e-06, + "loss": 0.1807, + "step": 2750 + }, + { + "epoch": 2.07, + "grad_norm": 4.030362606048584, + "learning_rate": 9.77175879396985e-06, + "loss": 0.1745, + "step": 2775 + }, + { + "epoch": 2.09, + "grad_norm": 3.639435291290283, + "learning_rate": 9.76924623115578e-06, + "loss": 0.18, + "step": 2800 + }, + { + "epoch": 2.11, + "grad_norm": 3.786973237991333, + "learning_rate": 9.766733668341709e-06, + "loss": 0.1828, + "step": 2825 + }, + { + "epoch": 2.13, + "grad_norm": 4.052228927612305, + "learning_rate": 9.76422110552764e-06, + "loss": 0.1746, + "step": 2850 + }, + { + "epoch": 2.14, + "grad_norm": 3.4842100143432617, + "learning_rate": 9.761708542713568e-06, + "loss": 0.1767, + "step": 2875 + }, + { + "epoch": 2.16, + "grad_norm": 4.029205322265625, + "learning_rate": 9.759195979899499e-06, + "loss": 0.185, + "step": 2900 + }, + { + "epoch": 2.18, + "grad_norm": 3.4828429222106934, + "learning_rate": 9.756683417085428e-06, + "loss": 0.1753, + "step": 2925 + }, + { + "epoch": 2.2, + "grad_norm": 3.8920607566833496, + "learning_rate": 9.754170854271357e-06, + "loss": 0.1763, + "step": 2950 + }, + { + "epoch": 2.22, + "grad_norm": 3.6511199474334717, + "learning_rate": 9.751658291457288e-06, + "loss": 0.1743, + "step": 2975 + }, + { + "epoch": 2.24, + "grad_norm": 3.634979724884033, + "learning_rate": 9.749145728643216e-06, + "loss": 0.1759, + "step": 3000 + }, + { + "epoch": 2.24, + "eval_loss": 0.16328808665275574, + "eval_runtime": 579.7601, + "eval_samples_per_second": 2.655, + "eval_steps_per_second": 2.655, + "eval_wer": 26.420056100981764, + "step": 3000 + }, + { + "epoch": 2.26, + "grad_norm": 4.122844219207764, + "learning_rate": 9.746633165829147e-06, + "loss": 0.1743, + "step": 3025 + }, + { + "epoch": 2.27, + "grad_norm": 3.673856258392334, + "learning_rate": 9.744120603015076e-06, + "loss": 0.1725, + "step": 3050 + }, + { + "epoch": 2.29, + "grad_norm": 4.1624579429626465, + "learning_rate": 9.741608040201006e-06, + "loss": 0.1675, + "step": 3075 + }, + { + "epoch": 2.31, + "grad_norm": 4.554078102111816, + "learning_rate": 9.739095477386935e-06, + "loss": 0.1737, + "step": 3100 + }, + { + "epoch": 2.33, + "grad_norm": 4.045281410217285, + "learning_rate": 9.736582914572866e-06, + "loss": 0.1719, + "step": 3125 + }, + { + "epoch": 2.35, + "grad_norm": 3.8828933238983154, + "learning_rate": 9.734070351758794e-06, + "loss": 0.1713, + "step": 3150 + }, + { + "epoch": 2.37, + "grad_norm": 4.16017484664917, + "learning_rate": 9.731557788944725e-06, + "loss": 0.1783, + "step": 3175 + }, + { + "epoch": 2.39, + "grad_norm": 3.866107225418091, + "learning_rate": 9.729045226130654e-06, + "loss": 0.1712, + "step": 3200 + }, + { + "epoch": 2.4, + "grad_norm": 4.2903900146484375, + "learning_rate": 9.726532663316583e-06, + "loss": 0.1686, + "step": 3225 + }, + { + "epoch": 2.42, + "grad_norm": 4.0998382568359375, + "learning_rate": 9.724020100502514e-06, + "loss": 0.1724, + "step": 3250 + }, + { + "epoch": 2.44, + "grad_norm": 3.6465792655944824, + "learning_rate": 9.721507537688444e-06, + "loss": 0.1704, + "step": 3275 + }, + { + "epoch": 2.46, + "grad_norm": 3.6287083625793457, + "learning_rate": 9.718994974874373e-06, + "loss": 0.1635, + "step": 3300 + }, + { + "epoch": 2.48, + "grad_norm": 4.552248001098633, + "learning_rate": 9.716482412060302e-06, + "loss": 0.169, + "step": 3325 + }, + { + "epoch": 2.5, + "grad_norm": 3.657130718231201, + "learning_rate": 9.713969849246232e-06, + "loss": 0.164, + "step": 3350 + }, + { + "epoch": 2.52, + "grad_norm": 3.996135950088501, + "learning_rate": 9.711457286432163e-06, + "loss": 0.1692, + "step": 3375 + }, + { + "epoch": 2.54, + "grad_norm": 3.7055537700653076, + "learning_rate": 9.708944723618092e-06, + "loss": 0.1681, + "step": 3400 + }, + { + "epoch": 2.55, + "grad_norm": 3.784309148788452, + "learning_rate": 9.706432160804021e-06, + "loss": 0.1715, + "step": 3425 + }, + { + "epoch": 2.57, + "grad_norm": 3.9796767234802246, + "learning_rate": 9.70391959798995e-06, + "loss": 0.1616, + "step": 3450 + }, + { + "epoch": 2.59, + "grad_norm": 3.5646283626556396, + "learning_rate": 9.70140703517588e-06, + "loss": 0.1693, + "step": 3475 + }, + { + "epoch": 2.61, + "grad_norm": 3.7155604362487793, + "learning_rate": 9.698894472361809e-06, + "loss": 0.1623, + "step": 3500 + }, + { + "epoch": 2.63, + "grad_norm": 3.5570499897003174, + "learning_rate": 9.69638190954774e-06, + "loss": 0.1615, + "step": 3525 + }, + { + "epoch": 2.65, + "grad_norm": 3.9085853099823, + "learning_rate": 9.69386934673367e-06, + "loss": 0.1685, + "step": 3550 + }, + { + "epoch": 2.67, + "grad_norm": 3.761129140853882, + "learning_rate": 9.691356783919599e-06, + "loss": 0.1627, + "step": 3575 + }, + { + "epoch": 2.68, + "grad_norm": 4.001754283905029, + "learning_rate": 9.688844221105528e-06, + "loss": 0.1695, + "step": 3600 + }, + { + "epoch": 2.7, + "grad_norm": 3.7110769748687744, + "learning_rate": 9.686331658291457e-06, + "loss": 0.1599, + "step": 3625 + }, + { + "epoch": 2.72, + "grad_norm": 3.774944305419922, + "learning_rate": 9.683819095477388e-06, + "loss": 0.1607, + "step": 3650 + }, + { + "epoch": 2.74, + "grad_norm": 3.790067195892334, + "learning_rate": 9.681306532663318e-06, + "loss": 0.1612, + "step": 3675 + }, + { + "epoch": 2.76, + "grad_norm": 3.9624180793762207, + "learning_rate": 9.678793969849247e-06, + "loss": 0.1632, + "step": 3700 + }, + { + "epoch": 2.78, + "grad_norm": 3.2799196243286133, + "learning_rate": 9.676281407035176e-06, + "loss": 0.1622, + "step": 3725 + }, + { + "epoch": 2.8, + "grad_norm": 3.3562066555023193, + "learning_rate": 9.673768844221106e-06, + "loss": 0.1622, + "step": 3750 + }, + { + "epoch": 2.82, + "grad_norm": 3.9907753467559814, + "learning_rate": 9.671256281407035e-06, + "loss": 0.1639, + "step": 3775 + }, + { + "epoch": 2.83, + "grad_norm": 3.3562815189361572, + "learning_rate": 9.668743718592966e-06, + "loss": 0.1572, + "step": 3800 + }, + { + "epoch": 2.85, + "grad_norm": 3.978476047515869, + "learning_rate": 9.666231155778895e-06, + "loss": 0.1601, + "step": 3825 + }, + { + "epoch": 2.87, + "grad_norm": 3.809995174407959, + "learning_rate": 9.663718592964825e-06, + "loss": 0.1592, + "step": 3850 + }, + { + "epoch": 2.89, + "grad_norm": 4.045068740844727, + "learning_rate": 9.661206030150754e-06, + "loss": 0.1613, + "step": 3875 + }, + { + "epoch": 2.91, + "grad_norm": 3.7526280879974365, + "learning_rate": 9.658693467336683e-06, + "loss": 0.1618, + "step": 3900 + }, + { + "epoch": 2.93, + "grad_norm": 3.554475784301758, + "learning_rate": 9.656180904522614e-06, + "loss": 0.1581, + "step": 3925 + }, + { + "epoch": 2.95, + "grad_norm": 3.790214776992798, + "learning_rate": 9.653668341708544e-06, + "loss": 0.1543, + "step": 3950 + }, + { + "epoch": 2.96, + "grad_norm": 3.729609251022339, + "learning_rate": 9.651155778894473e-06, + "loss": 0.1576, + "step": 3975 + }, + { + "epoch": 2.98, + "grad_norm": 3.6014366149902344, + "learning_rate": 9.648643216080404e-06, + "loss": 0.1539, + "step": 4000 + }, + { + "epoch": 2.98, + "eval_loss": 0.14688095450401306, + "eval_runtime": 587.6728, + "eval_samples_per_second": 2.619, + "eval_steps_per_second": 2.619, + "eval_wer": 25.023375409069658, + "step": 4000 + }, + { + "epoch": 3.0, + "grad_norm": 3.0823733806610107, + "learning_rate": 9.646130653266332e-06, + "loss": 0.1506, + "step": 4025 + }, + { + "epoch": 3.02, + "grad_norm": 3.512983798980713, + "learning_rate": 9.643618090452263e-06, + "loss": 0.1408, + "step": 4050 + }, + { + "epoch": 3.04, + "grad_norm": 3.3957369327545166, + "learning_rate": 9.641105527638192e-06, + "loss": 0.1397, + "step": 4075 + }, + { + "epoch": 3.06, + "grad_norm": 3.293747901916504, + "learning_rate": 9.638592964824121e-06, + "loss": 0.1353, + "step": 4100 + }, + { + "epoch": 3.08, + "grad_norm": 3.3634533882141113, + "learning_rate": 9.63608040201005e-06, + "loss": 0.1377, + "step": 4125 + }, + { + "epoch": 3.09, + "grad_norm": 3.3200225830078125, + "learning_rate": 9.63356783919598e-06, + "loss": 0.132, + "step": 4150 + }, + { + "epoch": 3.11, + "grad_norm": 3.8606081008911133, + "learning_rate": 9.63105527638191e-06, + "loss": 0.1391, + "step": 4175 + }, + { + "epoch": 3.13, + "grad_norm": 3.438164710998535, + "learning_rate": 9.62854271356784e-06, + "loss": 0.136, + "step": 4200 + }, + { + "epoch": 3.15, + "grad_norm": 4.000027656555176, + "learning_rate": 9.62603015075377e-06, + "loss": 0.1406, + "step": 4225 + }, + { + "epoch": 3.17, + "grad_norm": 3.455836057662964, + "learning_rate": 9.623517587939699e-06, + "loss": 0.1394, + "step": 4250 + }, + { + "epoch": 3.19, + "grad_norm": 3.4425647258758545, + "learning_rate": 9.62100502512563e-06, + "loss": 0.1402, + "step": 4275 + }, + { + "epoch": 3.21, + "grad_norm": 3.686373710632324, + "learning_rate": 9.618492462311558e-06, + "loss": 0.134, + "step": 4300 + }, + { + "epoch": 3.23, + "grad_norm": 3.138533115386963, + "learning_rate": 9.615979899497489e-06, + "loss": 0.1315, + "step": 4325 + }, + { + "epoch": 3.24, + "grad_norm": 3.6843111515045166, + "learning_rate": 9.613467336683418e-06, + "loss": 0.1377, + "step": 4350 + }, + { + "epoch": 3.26, + "grad_norm": 3.469071388244629, + "learning_rate": 9.610954773869347e-06, + "loss": 0.1379, + "step": 4375 + }, + { + "epoch": 3.28, + "grad_norm": 3.524423599243164, + "learning_rate": 9.608442211055277e-06, + "loss": 0.1376, + "step": 4400 + }, + { + "epoch": 3.3, + "grad_norm": 3.221924066543579, + "learning_rate": 9.605929648241206e-06, + "loss": 0.1372, + "step": 4425 + }, + { + "epoch": 3.32, + "grad_norm": 3.1295394897460938, + "learning_rate": 9.603417085427137e-06, + "loss": 0.133, + "step": 4450 + }, + { + "epoch": 3.34, + "grad_norm": 4.17547607421875, + "learning_rate": 9.600904522613066e-06, + "loss": 0.1354, + "step": 4475 + }, + { + "epoch": 3.36, + "grad_norm": 3.5507819652557373, + "learning_rate": 9.598391959798996e-06, + "loss": 0.1373, + "step": 4500 + }, + { + "epoch": 3.37, + "grad_norm": 3.371926784515381, + "learning_rate": 9.595879396984925e-06, + "loss": 0.1354, + "step": 4525 + }, + { + "epoch": 3.39, + "grad_norm": 3.7322168350219727, + "learning_rate": 9.593366834170856e-06, + "loss": 0.1334, + "step": 4550 + }, + { + "epoch": 3.41, + "grad_norm": 4.184456825256348, + "learning_rate": 9.590854271356784e-06, + "loss": 0.1356, + "step": 4575 + }, + { + "epoch": 3.43, + "grad_norm": 3.5868937969207764, + "learning_rate": 9.588341708542715e-06, + "loss": 0.1351, + "step": 4600 + }, + { + "epoch": 3.45, + "grad_norm": 3.41158127784729, + "learning_rate": 9.585829145728644e-06, + "loss": 0.1328, + "step": 4625 + }, + { + "epoch": 3.47, + "grad_norm": 3.7883119583129883, + "learning_rate": 9.583316582914573e-06, + "loss": 0.1361, + "step": 4650 + }, + { + "epoch": 3.49, + "grad_norm": 3.5131418704986572, + "learning_rate": 9.580804020100504e-06, + "loss": 0.1357, + "step": 4675 + }, + { + "epoch": 3.5, + "grad_norm": 3.53794264793396, + "learning_rate": 9.578291457286432e-06, + "loss": 0.1355, + "step": 4700 + }, + { + "epoch": 3.52, + "grad_norm": 3.370880365371704, + "learning_rate": 9.575778894472363e-06, + "loss": 0.133, + "step": 4725 + }, + { + "epoch": 3.54, + "grad_norm": 3.702864408493042, + "learning_rate": 9.573266331658292e-06, + "loss": 0.1344, + "step": 4750 + }, + { + "epoch": 3.56, + "grad_norm": 3.534820556640625, + "learning_rate": 9.570753768844222e-06, + "loss": 0.1362, + "step": 4775 + }, + { + "epoch": 3.58, + "grad_norm": 3.612382650375366, + "learning_rate": 9.568241206030151e-06, + "loss": 0.1295, + "step": 4800 + }, + { + "epoch": 3.6, + "grad_norm": 3.3591573238372803, + "learning_rate": 9.565728643216082e-06, + "loss": 0.1314, + "step": 4825 + }, + { + "epoch": 3.62, + "grad_norm": 3.476229429244995, + "learning_rate": 9.563216080402011e-06, + "loss": 0.1288, + "step": 4850 + }, + { + "epoch": 3.64, + "grad_norm": 3.081845283508301, + "learning_rate": 9.56070351758794e-06, + "loss": 0.1308, + "step": 4875 + }, + { + "epoch": 3.65, + "grad_norm": 3.755112648010254, + "learning_rate": 9.55819095477387e-06, + "loss": 0.1318, + "step": 4900 + }, + { + "epoch": 3.67, + "grad_norm": 3.234389543533325, + "learning_rate": 9.5556783919598e-06, + "loss": 0.134, + "step": 4925 + }, + { + "epoch": 3.69, + "grad_norm": 3.564835548400879, + "learning_rate": 9.55316582914573e-06, + "loss": 0.1323, + "step": 4950 + }, + { + "epoch": 3.71, + "grad_norm": 3.182039499282837, + "learning_rate": 9.550653266331658e-06, + "loss": 0.132, + "step": 4975 + }, + { + "epoch": 3.73, + "grad_norm": 3.184821605682373, + "learning_rate": 9.548140703517589e-06, + "loss": 0.1327, + "step": 5000 + }, + { + "epoch": 3.73, + "eval_loss": 0.13945698738098145, + "eval_runtime": 581.5247, + "eval_samples_per_second": 2.646, + "eval_steps_per_second": 2.646, + "eval_wer": 23.357877512856476, + "step": 5000 + }, + { + "epoch": 3.75, + "grad_norm": 3.2023651599884033, + "learning_rate": 9.545628140703518e-06, + "loss": 0.1287, + "step": 5025 + }, + { + "epoch": 3.77, + "grad_norm": 3.4853386878967285, + "learning_rate": 9.543115577889448e-06, + "loss": 0.1297, + "step": 5050 + }, + { + "epoch": 3.78, + "grad_norm": 3.5476901531219482, + "learning_rate": 9.540603015075379e-06, + "loss": 0.1309, + "step": 5075 + }, + { + "epoch": 3.8, + "grad_norm": 3.764289617538452, + "learning_rate": 9.538090452261308e-06, + "loss": 0.1311, + "step": 5100 + }, + { + "epoch": 3.82, + "grad_norm": 2.7996716499328613, + "learning_rate": 9.535577889447237e-06, + "loss": 0.1294, + "step": 5125 + }, + { + "epoch": 3.84, + "grad_norm": 3.1961171627044678, + "learning_rate": 9.533065326633166e-06, + "loss": 0.1257, + "step": 5150 + }, + { + "epoch": 3.86, + "grad_norm": 3.388976812362671, + "learning_rate": 9.530552763819096e-06, + "loss": 0.1256, + "step": 5175 + }, + { + "epoch": 3.88, + "grad_norm": 3.795241355895996, + "learning_rate": 9.528040201005025e-06, + "loss": 0.1319, + "step": 5200 + }, + { + "epoch": 3.9, + "grad_norm": 3.5693612098693848, + "learning_rate": 9.525527638190956e-06, + "loss": 0.1291, + "step": 5225 + }, + { + "epoch": 3.91, + "grad_norm": 3.237334728240967, + "learning_rate": 9.523015075376885e-06, + "loss": 0.1298, + "step": 5250 + }, + { + "epoch": 3.93, + "grad_norm": 3.833705425262451, + "learning_rate": 9.520502512562815e-06, + "loss": 0.1291, + "step": 5275 + }, + { + "epoch": 3.95, + "grad_norm": 3.5801639556884766, + "learning_rate": 9.517989949748744e-06, + "loss": 0.1267, + "step": 5300 + }, + { + "epoch": 3.97, + "grad_norm": 3.3994545936584473, + "learning_rate": 9.515477386934673e-06, + "loss": 0.1303, + "step": 5325 + }, + { + "epoch": 3.99, + "grad_norm": 3.8046116828918457, + "learning_rate": 9.512964824120604e-06, + "loss": 0.1268, + "step": 5350 + }, + { + "epoch": 4.01, + "grad_norm": 3.1093950271606445, + "learning_rate": 9.510452261306534e-06, + "loss": 0.1241, + "step": 5375 + }, + { + "epoch": 4.03, + "grad_norm": 3.4190762042999268, + "learning_rate": 9.507939698492463e-06, + "loss": 0.1109, + "step": 5400 + }, + { + "epoch": 4.05, + "grad_norm": 2.984553575515747, + "learning_rate": 9.505427135678392e-06, + "loss": 0.1103, + "step": 5425 + }, + { + "epoch": 4.06, + "grad_norm": 3.871157646179199, + "learning_rate": 9.502914572864322e-06, + "loss": 0.1143, + "step": 5450 + }, + { + "epoch": 4.08, + "grad_norm": 3.0750741958618164, + "learning_rate": 9.500402010050253e-06, + "loss": 0.1103, + "step": 5475 + }, + { + "epoch": 4.1, + "grad_norm": 3.5931217670440674, + "learning_rate": 9.497889447236182e-06, + "loss": 0.1101, + "step": 5500 + }, + { + "epoch": 4.12, + "grad_norm": 3.3157413005828857, + "learning_rate": 9.495376884422111e-06, + "loss": 0.1099, + "step": 5525 + }, + { + "epoch": 4.14, + "grad_norm": 3.389704942703247, + "learning_rate": 9.49286432160804e-06, + "loss": 0.1112, + "step": 5550 + }, + { + "epoch": 4.16, + "grad_norm": 3.1446218490600586, + "learning_rate": 9.49035175879397e-06, + "loss": 0.1101, + "step": 5575 + }, + { + "epoch": 4.18, + "grad_norm": 3.8250577449798584, + "learning_rate": 9.4878391959799e-06, + "loss": 0.1159, + "step": 5600 + }, + { + "epoch": 4.19, + "grad_norm": 3.381117582321167, + "learning_rate": 9.48532663316583e-06, + "loss": 0.1121, + "step": 5625 + }, + { + "epoch": 4.21, + "grad_norm": 3.3357720375061035, + "learning_rate": 9.48281407035176e-06, + "loss": 0.1095, + "step": 5650 + }, + { + "epoch": 4.23, + "grad_norm": 3.7618515491485596, + "learning_rate": 9.480301507537689e-06, + "loss": 0.1129, + "step": 5675 + }, + { + "epoch": 4.25, + "grad_norm": 3.5188496112823486, + "learning_rate": 9.47778894472362e-06, + "loss": 0.1091, + "step": 5700 + }, + { + "epoch": 4.27, + "grad_norm": 3.367509126663208, + "learning_rate": 9.475276381909548e-06, + "loss": 0.1075, + "step": 5725 + }, + { + "epoch": 4.29, + "grad_norm": 3.5425543785095215, + "learning_rate": 9.472763819095479e-06, + "loss": 0.1102, + "step": 5750 + }, + { + "epoch": 4.31, + "grad_norm": 3.612241268157959, + "learning_rate": 9.470251256281408e-06, + "loss": 0.1132, + "step": 5775 + }, + { + "epoch": 4.33, + "grad_norm": 3.579725503921509, + "learning_rate": 9.467738693467337e-06, + "loss": 0.1076, + "step": 5800 + }, + { + "epoch": 4.34, + "grad_norm": 2.9764626026153564, + "learning_rate": 9.465226130653267e-06, + "loss": 0.1094, + "step": 5825 + }, + { + "epoch": 4.36, + "grad_norm": 3.2718448638916016, + "learning_rate": 9.462713567839196e-06, + "loss": 0.1066, + "step": 5850 + }, + { + "epoch": 4.38, + "grad_norm": 3.6336781978607178, + "learning_rate": 9.460201005025127e-06, + "loss": 0.1112, + "step": 5875 + }, + { + "epoch": 4.4, + "grad_norm": 2.886262893676758, + "learning_rate": 9.457688442211056e-06, + "loss": 0.1081, + "step": 5900 + }, + { + "epoch": 4.42, + "grad_norm": 3.3497533798217773, + "learning_rate": 9.455175879396986e-06, + "loss": 0.1089, + "step": 5925 + }, + { + "epoch": 4.44, + "grad_norm": 3.777381181716919, + "learning_rate": 9.452663316582915e-06, + "loss": 0.1092, + "step": 5950 + }, + { + "epoch": 4.46, + "grad_norm": 3.6995489597320557, + "learning_rate": 9.450150753768846e-06, + "loss": 0.1062, + "step": 5975 + }, + { + "epoch": 4.47, + "grad_norm": 3.1740853786468506, + "learning_rate": 9.447638190954774e-06, + "loss": 0.11, + "step": 6000 + }, + { + "epoch": 4.47, + "eval_loss": 0.1346590667963028, + "eval_runtime": 578.7185, + "eval_samples_per_second": 2.659, + "eval_steps_per_second": 2.659, + "eval_wer": 22.206638616175784, + "step": 6000 + }, + { + "epoch": 4.49, + "grad_norm": 3.4268691539764404, + "learning_rate": 9.445125628140705e-06, + "loss": 0.1096, + "step": 6025 + }, + { + "epoch": 4.51, + "grad_norm": 3.5935819149017334, + "learning_rate": 9.442613065326634e-06, + "loss": 0.1138, + "step": 6050 + }, + { + "epoch": 4.53, + "grad_norm": 3.4896225929260254, + "learning_rate": 9.440100502512563e-06, + "loss": 0.1092, + "step": 6075 + }, + { + "epoch": 4.55, + "grad_norm": 3.754469156265259, + "learning_rate": 9.437587939698494e-06, + "loss": 0.1099, + "step": 6100 + }, + { + "epoch": 4.57, + "grad_norm": 3.136467933654785, + "learning_rate": 9.435075376884422e-06, + "loss": 0.1082, + "step": 6125 + }, + { + "epoch": 4.59, + "grad_norm": 3.126904249191284, + "learning_rate": 9.432562814070353e-06, + "loss": 0.1067, + "step": 6150 + }, + { + "epoch": 4.6, + "grad_norm": 3.243851900100708, + "learning_rate": 9.430050251256282e-06, + "loss": 0.1087, + "step": 6175 + }, + { + "epoch": 4.62, + "grad_norm": 3.725548505783081, + "learning_rate": 9.427537688442212e-06, + "loss": 0.1057, + "step": 6200 + }, + { + "epoch": 4.64, + "grad_norm": 3.6674633026123047, + "learning_rate": 9.425025125628141e-06, + "loss": 0.1106, + "step": 6225 + }, + { + "epoch": 4.66, + "grad_norm": 4.121982574462891, + "learning_rate": 9.422512562814072e-06, + "loss": 0.1097, + "step": 6250 + }, + { + "epoch": 4.68, + "grad_norm": 3.1855649948120117, + "learning_rate": 9.42e-06, + "loss": 0.107, + "step": 6275 + }, + { + "epoch": 4.7, + "grad_norm": 3.5082411766052246, + "learning_rate": 9.41748743718593e-06, + "loss": 0.1077, + "step": 6300 + }, + { + "epoch": 4.72, + "grad_norm": 3.2072770595550537, + "learning_rate": 9.41497487437186e-06, + "loss": 0.1113, + "step": 6325 + }, + { + "epoch": 4.74, + "grad_norm": 3.2020761966705322, + "learning_rate": 9.41246231155779e-06, + "loss": 0.1046, + "step": 6350 + }, + { + "epoch": 4.75, + "grad_norm": 3.229517698287964, + "learning_rate": 9.40994974874372e-06, + "loss": 0.1107, + "step": 6375 + }, + { + "epoch": 4.77, + "grad_norm": 3.6394400596618652, + "learning_rate": 9.407437185929648e-06, + "loss": 0.1083, + "step": 6400 + }, + { + "epoch": 4.79, + "grad_norm": 3.3170835971832275, + "learning_rate": 9.404924623115579e-06, + "loss": 0.1091, + "step": 6425 + }, + { + "epoch": 4.81, + "grad_norm": 3.338113307952881, + "learning_rate": 9.402412060301508e-06, + "loss": 0.1098, + "step": 6450 + }, + { + "epoch": 4.83, + "grad_norm": 3.080709457397461, + "learning_rate": 9.399899497487438e-06, + "loss": 0.1063, + "step": 6475 + }, + { + "epoch": 4.85, + "grad_norm": 3.196829319000244, + "learning_rate": 9.397386934673369e-06, + "loss": 0.1052, + "step": 6500 + }, + { + "epoch": 4.87, + "grad_norm": 3.379059076309204, + "learning_rate": 9.394874371859298e-06, + "loss": 0.1076, + "step": 6525 + }, + { + "epoch": 4.88, + "grad_norm": 3.577249765396118, + "learning_rate": 9.392361809045227e-06, + "loss": 0.1062, + "step": 6550 + }, + { + "epoch": 4.9, + "grad_norm": 3.073493003845215, + "learning_rate": 9.389849246231157e-06, + "loss": 0.1045, + "step": 6575 + }, + { + "epoch": 4.92, + "grad_norm": 3.040921926498413, + "learning_rate": 9.387336683417086e-06, + "loss": 0.1059, + "step": 6600 + }, + { + "epoch": 4.94, + "grad_norm": 3.779496669769287, + "learning_rate": 9.384824120603015e-06, + "loss": 0.108, + "step": 6625 + }, + { + "epoch": 4.96, + "grad_norm": 3.5758798122406006, + "learning_rate": 9.382311557788946e-06, + "loss": 0.1036, + "step": 6650 + }, + { + "epoch": 4.98, + "grad_norm": 3.7807416915893555, + "learning_rate": 9.379798994974874e-06, + "loss": 0.1067, + "step": 6675 + }, + { + "epoch": 5.0, + "grad_norm": 3.37501859664917, + "learning_rate": 9.377286432160805e-06, + "loss": 0.107, + "step": 6700 + }, + { + "epoch": 5.01, + "grad_norm": 2.9533002376556396, + "learning_rate": 9.374773869346734e-06, + "loss": 0.0899, + "step": 6725 + }, + { + "epoch": 5.03, + "grad_norm": 3.157862901687622, + "learning_rate": 9.372261306532664e-06, + "loss": 0.0917, + "step": 6750 + }, + { + "epoch": 5.05, + "grad_norm": 3.407156229019165, + "learning_rate": 9.369748743718595e-06, + "loss": 0.0906, + "step": 6775 + }, + { + "epoch": 5.07, + "grad_norm": 2.9692556858062744, + "learning_rate": 9.367236180904524e-06, + "loss": 0.0884, + "step": 6800 + }, + { + "epoch": 5.09, + "grad_norm": 3.108790159225464, + "learning_rate": 9.364723618090453e-06, + "loss": 0.0863, + "step": 6825 + }, + { + "epoch": 5.11, + "grad_norm": 3.1673364639282227, + "learning_rate": 9.362211055276383e-06, + "loss": 0.0914, + "step": 6850 + }, + { + "epoch": 5.13, + "grad_norm": 3.152794122695923, + "learning_rate": 9.359698492462312e-06, + "loss": 0.0905, + "step": 6875 + }, + { + "epoch": 5.15, + "grad_norm": 3.3404650688171387, + "learning_rate": 9.357185929648241e-06, + "loss": 0.0871, + "step": 6900 + }, + { + "epoch": 5.16, + "grad_norm": 3.175661563873291, + "learning_rate": 9.354673366834172e-06, + "loss": 0.0923, + "step": 6925 + }, + { + "epoch": 5.18, + "grad_norm": 3.107692003250122, + "learning_rate": 9.352160804020101e-06, + "loss": 0.0901, + "step": 6950 + }, + { + "epoch": 5.2, + "grad_norm": 3.353799819946289, + "learning_rate": 9.34964824120603e-06, + "loss": 0.085, + "step": 6975 + }, + { + "epoch": 5.22, + "grad_norm": 2.901813268661499, + "learning_rate": 9.34713567839196e-06, + "loss": 0.0926, + "step": 7000 + }, + { + "epoch": 5.22, + "eval_loss": 0.1327398419380188, + "eval_runtime": 583.5442, + "eval_samples_per_second": 2.637, + "eval_steps_per_second": 2.637, + "eval_wer": 22.884525479195887, + "step": 7000 + }, + { + "epoch": 5.24, + "grad_norm": 2.9111287593841553, + "learning_rate": 9.34462311557789e-06, + "loss": 0.0891, + "step": 7025 + }, + { + "epoch": 5.26, + "grad_norm": 2.990967273712158, + "learning_rate": 9.34211055276382e-06, + "loss": 0.0912, + "step": 7050 + }, + { + "epoch": 5.28, + "grad_norm": 2.912088394165039, + "learning_rate": 9.33959798994975e-06, + "loss": 0.0904, + "step": 7075 + }, + { + "epoch": 5.29, + "grad_norm": 3.1051769256591797, + "learning_rate": 9.337085427135679e-06, + "loss": 0.0913, + "step": 7100 + }, + { + "epoch": 5.31, + "grad_norm": 3.291818141937256, + "learning_rate": 9.334572864321608e-06, + "loss": 0.0941, + "step": 7125 + }, + { + "epoch": 5.33, + "grad_norm": 3.4875032901763916, + "learning_rate": 9.332060301507538e-06, + "loss": 0.0909, + "step": 7150 + }, + { + "epoch": 5.35, + "grad_norm": 3.259011745452881, + "learning_rate": 9.329547738693469e-06, + "loss": 0.0875, + "step": 7175 + }, + { + "epoch": 5.37, + "grad_norm": 2.8745439052581787, + "learning_rate": 9.327035175879398e-06, + "loss": 0.0887, + "step": 7200 + }, + { + "epoch": 5.39, + "grad_norm": 3.000544786453247, + "learning_rate": 9.324522613065327e-06, + "loss": 0.0901, + "step": 7225 + }, + { + "epoch": 5.41, + "grad_norm": 3.3648929595947266, + "learning_rate": 9.322010050251257e-06, + "loss": 0.0885, + "step": 7250 + }, + { + "epoch": 5.43, + "grad_norm": 3.0053303241729736, + "learning_rate": 9.319497487437186e-06, + "loss": 0.0924, + "step": 7275 + }, + { + "epoch": 5.44, + "grad_norm": 2.817584753036499, + "learning_rate": 9.316984924623115e-06, + "loss": 0.0879, + "step": 7300 + }, + { + "epoch": 5.46, + "grad_norm": 3.136594772338867, + "learning_rate": 9.314472361809046e-06, + "loss": 0.0914, + "step": 7325 + }, + { + "epoch": 5.48, + "grad_norm": 3.5466651916503906, + "learning_rate": 9.311959798994976e-06, + "loss": 0.0885, + "step": 7350 + }, + { + "epoch": 5.5, + "grad_norm": 3.23614501953125, + "learning_rate": 9.309447236180905e-06, + "loss": 0.0873, + "step": 7375 + }, + { + "epoch": 5.52, + "grad_norm": 2.9725496768951416, + "learning_rate": 9.306934673366836e-06, + "loss": 0.0916, + "step": 7400 + }, + { + "epoch": 5.54, + "grad_norm": 2.7509000301361084, + "learning_rate": 9.304422110552764e-06, + "loss": 0.0912, + "step": 7425 + }, + { + "epoch": 5.56, + "grad_norm": 3.397359848022461, + "learning_rate": 9.301909547738695e-06, + "loss": 0.0872, + "step": 7450 + }, + { + "epoch": 5.57, + "grad_norm": 3.472618341445923, + "learning_rate": 9.299396984924624e-06, + "loss": 0.0894, + "step": 7475 + }, + { + "epoch": 5.59, + "grad_norm": 3.545205593109131, + "learning_rate": 9.296884422110553e-06, + "loss": 0.0908, + "step": 7500 + }, + { + "epoch": 5.61, + "grad_norm": 3.6060521602630615, + "learning_rate": 9.294371859296483e-06, + "loss": 0.0937, + "step": 7525 + }, + { + "epoch": 5.63, + "grad_norm": 3.2242794036865234, + "learning_rate": 9.291859296482412e-06, + "loss": 0.0901, + "step": 7550 + }, + { + "epoch": 5.65, + "grad_norm": 3.2847938537597656, + "learning_rate": 9.289346733668343e-06, + "loss": 0.091, + "step": 7575 + }, + { + "epoch": 5.67, + "grad_norm": 3.446183681488037, + "learning_rate": 9.286834170854272e-06, + "loss": 0.0897, + "step": 7600 + }, + { + "epoch": 5.69, + "grad_norm": 3.0921542644500732, + "learning_rate": 9.284321608040202e-06, + "loss": 0.0919, + "step": 7625 + }, + { + "epoch": 5.7, + "grad_norm": 3.3843624591827393, + "learning_rate": 9.281809045226131e-06, + "loss": 0.0929, + "step": 7650 + }, + { + "epoch": 5.72, + "grad_norm": 3.195985794067383, + "learning_rate": 9.279296482412062e-06, + "loss": 0.0919, + "step": 7675 + }, + { + "epoch": 5.74, + "grad_norm": 3.053060531616211, + "learning_rate": 9.27678391959799e-06, + "loss": 0.0883, + "step": 7700 + }, + { + "epoch": 5.76, + "grad_norm": 3.5800364017486572, + "learning_rate": 9.27427135678392e-06, + "loss": 0.0893, + "step": 7725 + }, + { + "epoch": 5.78, + "grad_norm": 3.4376699924468994, + "learning_rate": 9.27175879396985e-06, + "loss": 0.0917, + "step": 7750 + }, + { + "epoch": 5.8, + "grad_norm": 3.2491955757141113, + "learning_rate": 9.26924623115578e-06, + "loss": 0.0893, + "step": 7775 + }, + { + "epoch": 5.82, + "grad_norm": 3.224717378616333, + "learning_rate": 9.26673366834171e-06, + "loss": 0.0917, + "step": 7800 + }, + { + "epoch": 5.84, + "grad_norm": 2.9739181995391846, + "learning_rate": 9.264221105527638e-06, + "loss": 0.0922, + "step": 7825 + }, + { + "epoch": 5.85, + "grad_norm": 3.209761619567871, + "learning_rate": 9.261708542713569e-06, + "loss": 0.0899, + "step": 7850 + }, + { + "epoch": 5.87, + "grad_norm": 3.712392568588257, + "learning_rate": 9.259195979899498e-06, + "loss": 0.086, + "step": 7875 + }, + { + "epoch": 5.89, + "grad_norm": 3.2784647941589355, + "learning_rate": 9.256683417085428e-06, + "loss": 0.0871, + "step": 7900 + }, + { + "epoch": 5.91, + "grad_norm": 3.5322415828704834, + "learning_rate": 9.254170854271357e-06, + "loss": 0.0866, + "step": 7925 + }, + { + "epoch": 5.93, + "grad_norm": 3.879502773284912, + "learning_rate": 9.251658291457288e-06, + "loss": 0.0926, + "step": 7950 + }, + { + "epoch": 5.95, + "grad_norm": 3.022221326828003, + "learning_rate": 9.249145728643217e-06, + "loss": 0.0875, + "step": 7975 + }, + { + "epoch": 5.97, + "grad_norm": 2.8471436500549316, + "learning_rate": 9.246633165829147e-06, + "loss": 0.0913, + "step": 8000 + }, + { + "epoch": 5.97, + "eval_loss": 0.129538893699646, + "eval_runtime": 581.531, + "eval_samples_per_second": 2.646, + "eval_steps_per_second": 2.646, + "eval_wer": 21.63978494623656, + "step": 8000 + }, + { + "epoch": 5.98, + "grad_norm": 3.7146530151367188, + "learning_rate": 9.244120603015076e-06, + "loss": 0.0876, + "step": 8025 + }, + { + "epoch": 6.0, + "grad_norm": 2.7357022762298584, + "learning_rate": 9.241608040201005e-06, + "loss": 0.091, + "step": 8050 + }, + { + "epoch": 6.02, + "grad_norm": 2.9366495609283447, + "learning_rate": 9.239095477386936e-06, + "loss": 0.0739, + "step": 8075 + }, + { + "epoch": 6.04, + "grad_norm": 3.077509880065918, + "learning_rate": 9.236582914572864e-06, + "loss": 0.0748, + "step": 8100 + }, + { + "epoch": 6.06, + "grad_norm": 2.791722536087036, + "learning_rate": 9.234070351758795e-06, + "loss": 0.0752, + "step": 8125 + }, + { + "epoch": 6.08, + "grad_norm": 3.175020933151245, + "learning_rate": 9.231557788944724e-06, + "loss": 0.0764, + "step": 8150 + }, + { + "epoch": 6.1, + "grad_norm": 3.012601852416992, + "learning_rate": 9.229045226130654e-06, + "loss": 0.0726, + "step": 8175 + }, + { + "epoch": 6.11, + "grad_norm": 3.3020412921905518, + "learning_rate": 9.226532663316585e-06, + "loss": 0.0762, + "step": 8200 + }, + { + "epoch": 6.13, + "grad_norm": 3.018819808959961, + "learning_rate": 9.224020100502514e-06, + "loss": 0.0727, + "step": 8225 + }, + { + "epoch": 6.15, + "grad_norm": 2.6143438816070557, + "learning_rate": 9.221507537688443e-06, + "loss": 0.0754, + "step": 8250 + }, + { + "epoch": 6.17, + "grad_norm": 2.89058518409729, + "learning_rate": 9.218994974874373e-06, + "loss": 0.0724, + "step": 8275 + }, + { + "epoch": 6.19, + "grad_norm": 3.119272470474243, + "learning_rate": 9.216482412060302e-06, + "loss": 0.0737, + "step": 8300 + }, + { + "epoch": 6.21, + "grad_norm": 2.9511525630950928, + "learning_rate": 9.213969849246231e-06, + "loss": 0.0742, + "step": 8325 + }, + { + "epoch": 6.23, + "grad_norm": 3.098759174346924, + "learning_rate": 9.211457286432162e-06, + "loss": 0.077, + "step": 8350 + }, + { + "epoch": 6.25, + "grad_norm": 3.50549054145813, + "learning_rate": 9.20894472361809e-06, + "loss": 0.0725, + "step": 8375 + }, + { + "epoch": 6.26, + "grad_norm": 2.914422035217285, + "learning_rate": 9.206432160804021e-06, + "loss": 0.0747, + "step": 8400 + }, + { + "epoch": 6.28, + "grad_norm": 3.109224319458008, + "learning_rate": 9.20391959798995e-06, + "loss": 0.0746, + "step": 8425 + }, + { + "epoch": 6.3, + "grad_norm": 2.9399197101593018, + "learning_rate": 9.20140703517588e-06, + "loss": 0.0733, + "step": 8450 + }, + { + "epoch": 6.32, + "grad_norm": 2.878178596496582, + "learning_rate": 9.19889447236181e-06, + "loss": 0.075, + "step": 8475 + }, + { + "epoch": 6.34, + "grad_norm": 3.163640022277832, + "learning_rate": 9.19638190954774e-06, + "loss": 0.0726, + "step": 8500 + }, + { + "epoch": 6.36, + "grad_norm": 2.8171629905700684, + "learning_rate": 9.19386934673367e-06, + "loss": 0.0736, + "step": 8525 + }, + { + "epoch": 6.38, + "grad_norm": 3.2045319080352783, + "learning_rate": 9.191356783919599e-06, + "loss": 0.0719, + "step": 8550 + }, + { + "epoch": 6.39, + "grad_norm": 3.039659023284912, + "learning_rate": 9.188844221105528e-06, + "loss": 0.0753, + "step": 8575 + }, + { + "epoch": 6.41, + "grad_norm": 3.253077507019043, + "learning_rate": 9.186331658291459e-06, + "loss": 0.0787, + "step": 8600 + }, + { + "epoch": 6.43, + "grad_norm": 3.128434896469116, + "learning_rate": 9.183819095477388e-06, + "loss": 0.0728, + "step": 8625 + }, + { + "epoch": 6.45, + "grad_norm": 3.166520833969116, + "learning_rate": 9.181306532663317e-06, + "loss": 0.0727, + "step": 8650 + }, + { + "epoch": 6.47, + "grad_norm": 3.312800407409668, + "learning_rate": 9.178793969849247e-06, + "loss": 0.075, + "step": 8675 + }, + { + "epoch": 6.49, + "grad_norm": 3.42051100730896, + "learning_rate": 9.176281407035176e-06, + "loss": 0.0796, + "step": 8700 + }, + { + "epoch": 6.51, + "grad_norm": 2.6824166774749756, + "learning_rate": 9.173768844221105e-06, + "loss": 0.0724, + "step": 8725 + }, + { + "epoch": 6.52, + "grad_norm": 3.0126898288726807, + "learning_rate": 9.171256281407036e-06, + "loss": 0.0742, + "step": 8750 + }, + { + "epoch": 6.54, + "grad_norm": 3.119722366333008, + "learning_rate": 9.168743718592966e-06, + "loss": 0.0748, + "step": 8775 + }, + { + "epoch": 6.56, + "grad_norm": 3.195042133331299, + "learning_rate": 9.166231155778895e-06, + "loss": 0.0739, + "step": 8800 + }, + { + "epoch": 6.58, + "grad_norm": 3.652412176132202, + "learning_rate": 9.163718592964826e-06, + "loss": 0.0751, + "step": 8825 + }, + { + "epoch": 6.6, + "grad_norm": 3.4120712280273438, + "learning_rate": 9.161206030150754e-06, + "loss": 0.075, + "step": 8850 + }, + { + "epoch": 6.62, + "grad_norm": 3.3996012210845947, + "learning_rate": 9.158693467336685e-06, + "loss": 0.0746, + "step": 8875 + }, + { + "epoch": 6.64, + "grad_norm": 3.1607606410980225, + "learning_rate": 9.156180904522614e-06, + "loss": 0.0712, + "step": 8900 + }, + { + "epoch": 6.66, + "grad_norm": 3.5317158699035645, + "learning_rate": 9.153668341708543e-06, + "loss": 0.0756, + "step": 8925 + }, + { + "epoch": 6.67, + "grad_norm": 2.9540088176727295, + "learning_rate": 9.151155778894473e-06, + "loss": 0.0743, + "step": 8950 + }, + { + "epoch": 6.69, + "grad_norm": 3.031726837158203, + "learning_rate": 9.148643216080402e-06, + "loss": 0.0727, + "step": 8975 + }, + { + "epoch": 6.71, + "grad_norm": 2.942370653152466, + "learning_rate": 9.146130653266331e-06, + "loss": 0.0758, + "step": 9000 + }, + { + "epoch": 6.71, + "eval_loss": 0.13152052462100983, + "eval_runtime": 583.4445, + "eval_samples_per_second": 2.638, + "eval_steps_per_second": 2.638, + "eval_wer": 21.6164095371669, + "step": 9000 + }, + { + "epoch": 6.73, + "grad_norm": 2.9992763996124268, + "learning_rate": 9.143618090452262e-06, + "loss": 0.0768, + "step": 9025 + }, + { + "epoch": 6.75, + "grad_norm": 3.2120981216430664, + "learning_rate": 9.141105527638192e-06, + "loss": 0.0759, + "step": 9050 + }, + { + "epoch": 6.77, + "grad_norm": 3.1878435611724854, + "learning_rate": 9.138592964824121e-06, + "loss": 0.0769, + "step": 9075 + }, + { + "epoch": 6.79, + "grad_norm": 2.712501287460327, + "learning_rate": 9.136080402010052e-06, + "loss": 0.0735, + "step": 9100 + }, + { + "epoch": 6.8, + "grad_norm": 3.429577112197876, + "learning_rate": 9.13356783919598e-06, + "loss": 0.0772, + "step": 9125 + }, + { + "epoch": 6.82, + "grad_norm": 3.3869950771331787, + "learning_rate": 9.13105527638191e-06, + "loss": 0.0739, + "step": 9150 + }, + { + "epoch": 6.84, + "grad_norm": 3.0881145000457764, + "learning_rate": 9.12854271356784e-06, + "loss": 0.0768, + "step": 9175 + }, + { + "epoch": 6.86, + "grad_norm": 3.2937190532684326, + "learning_rate": 9.12603015075377e-06, + "loss": 0.0754, + "step": 9200 + }, + { + "epoch": 6.88, + "grad_norm": 3.203866481781006, + "learning_rate": 9.1235175879397e-06, + "loss": 0.0734, + "step": 9225 + }, + { + "epoch": 6.9, + "grad_norm": 3.2549185752868652, + "learning_rate": 9.121005025125628e-06, + "loss": 0.0724, + "step": 9250 + }, + { + "epoch": 6.92, + "grad_norm": 3.0252959728240967, + "learning_rate": 9.118492462311559e-06, + "loss": 0.0751, + "step": 9275 + }, + { + "epoch": 6.94, + "grad_norm": 3.3162567615509033, + "learning_rate": 9.115979899497488e-06, + "loss": 0.0734, + "step": 9300 + }, + { + "epoch": 6.95, + "grad_norm": 3.1085686683654785, + "learning_rate": 9.113467336683418e-06, + "loss": 0.0754, + "step": 9325 + }, + { + "epoch": 6.97, + "grad_norm": 3.605956792831421, + "learning_rate": 9.110954773869347e-06, + "loss": 0.0744, + "step": 9350 + }, + { + "epoch": 6.99, + "grad_norm": 3.1098530292510986, + "learning_rate": 9.108442211055278e-06, + "loss": 0.0723, + "step": 9375 + }, + { + "epoch": 7.01, + "grad_norm": 2.8826663494110107, + "learning_rate": 9.105929648241206e-06, + "loss": 0.0633, + "step": 9400 + }, + { + "epoch": 7.03, + "grad_norm": 3.199134588241577, + "learning_rate": 9.103417085427137e-06, + "loss": 0.0589, + "step": 9425 + }, + { + "epoch": 7.05, + "grad_norm": 2.9372718334198, + "learning_rate": 9.100904522613066e-06, + "loss": 0.0613, + "step": 9450 + }, + { + "epoch": 7.07, + "grad_norm": 2.7198410034179688, + "learning_rate": 9.098391959798995e-06, + "loss": 0.0596, + "step": 9475 + }, + { + "epoch": 7.08, + "grad_norm": 3.215033531188965, + "learning_rate": 9.095879396984926e-06, + "loss": 0.0586, + "step": 9500 + }, + { + "epoch": 7.1, + "grad_norm": 2.704068183898926, + "learning_rate": 9.093366834170854e-06, + "loss": 0.0609, + "step": 9525 + }, + { + "epoch": 7.12, + "grad_norm": 2.9168524742126465, + "learning_rate": 9.090854271356785e-06, + "loss": 0.0604, + "step": 9550 + }, + { + "epoch": 7.14, + "grad_norm": 2.9030890464782715, + "learning_rate": 9.088341708542714e-06, + "loss": 0.058, + "step": 9575 + }, + { + "epoch": 7.16, + "grad_norm": 2.5047571659088135, + "learning_rate": 9.085829145728644e-06, + "loss": 0.0589, + "step": 9600 + }, + { + "epoch": 7.18, + "grad_norm": 2.736067056655884, + "learning_rate": 9.083316582914573e-06, + "loss": 0.0593, + "step": 9625 + }, + { + "epoch": 7.2, + "grad_norm": 3.116854667663574, + "learning_rate": 9.080804020100504e-06, + "loss": 0.0595, + "step": 9650 + }, + { + "epoch": 7.21, + "grad_norm": 3.01143217086792, + "learning_rate": 9.078291457286433e-06, + "loss": 0.0603, + "step": 9675 + }, + { + "epoch": 7.23, + "grad_norm": 2.859469413757324, + "learning_rate": 9.075778894472363e-06, + "loss": 0.0621, + "step": 9700 + }, + { + "epoch": 7.25, + "grad_norm": 3.0408596992492676, + "learning_rate": 9.073266331658292e-06, + "loss": 0.0623, + "step": 9725 + }, + { + "epoch": 7.27, + "grad_norm": 3.3987040519714355, + "learning_rate": 9.070753768844221e-06, + "loss": 0.0634, + "step": 9750 + }, + { + "epoch": 7.29, + "grad_norm": 2.963766098022461, + "learning_rate": 9.068241206030152e-06, + "loss": 0.0604, + "step": 9775 + }, + { + "epoch": 7.31, + "grad_norm": 3.3916544914245605, + "learning_rate": 9.06572864321608e-06, + "loss": 0.0611, + "step": 9800 + }, + { + "epoch": 7.33, + "grad_norm": 2.8690526485443115, + "learning_rate": 9.063216080402011e-06, + "loss": 0.0629, + "step": 9825 + }, + { + "epoch": 7.35, + "grad_norm": 3.066652536392212, + "learning_rate": 9.06070351758794e-06, + "loss": 0.0609, + "step": 9850 + }, + { + "epoch": 7.36, + "grad_norm": 3.0055248737335205, + "learning_rate": 9.05819095477387e-06, + "loss": 0.0616, + "step": 9875 + }, + { + "epoch": 7.38, + "grad_norm": 3.0258870124816895, + "learning_rate": 9.0556783919598e-06, + "loss": 0.0616, + "step": 9900 + }, + { + "epoch": 7.4, + "grad_norm": 2.7785232067108154, + "learning_rate": 9.05316582914573e-06, + "loss": 0.0593, + "step": 9925 + }, + { + "epoch": 7.42, + "grad_norm": 2.823655366897583, + "learning_rate": 9.05065326633166e-06, + "loss": 0.0602, + "step": 9950 + }, + { + "epoch": 7.44, + "grad_norm": 3.2807633876800537, + "learning_rate": 9.048140703517589e-06, + "loss": 0.0613, + "step": 9975 + }, + { + "epoch": 7.46, + "grad_norm": 2.9766130447387695, + "learning_rate": 9.045628140703518e-06, + "loss": 0.0591, + "step": 10000 + }, + { + "epoch": 7.46, + "eval_loss": 0.13414278626441956, + "eval_runtime": 580.6353, + "eval_samples_per_second": 2.651, + "eval_steps_per_second": 2.651, + "eval_wer": 21.242402992052362, + "step": 10000 + }, + { + "epoch": 7.48, + "grad_norm": 2.81675386428833, + "learning_rate": 9.043115577889447e-06, + "loss": 0.061, + "step": 10025 + }, + { + "epoch": 7.49, + "grad_norm": 2.9592761993408203, + "learning_rate": 9.040603015075378e-06, + "loss": 0.0619, + "step": 10050 + }, + { + "epoch": 7.51, + "grad_norm": 3.7855329513549805, + "learning_rate": 9.038090452261308e-06, + "loss": 0.0612, + "step": 10075 + }, + { + "epoch": 7.53, + "grad_norm": 3.2071595191955566, + "learning_rate": 9.0356783919598e-06, + "loss": 0.0638, + "step": 10100 + }, + { + "epoch": 7.55, + "grad_norm": 2.748765707015991, + "learning_rate": 9.033165829145728e-06, + "loss": 0.0619, + "step": 10125 + }, + { + "epoch": 7.57, + "grad_norm": 2.9931349754333496, + "learning_rate": 9.03065326633166e-06, + "loss": 0.0608, + "step": 10150 + }, + { + "epoch": 7.59, + "grad_norm": 2.9422237873077393, + "learning_rate": 9.028140703517589e-06, + "loss": 0.0614, + "step": 10175 + }, + { + "epoch": 7.61, + "grad_norm": 3.285006046295166, + "learning_rate": 9.025628140703518e-06, + "loss": 0.0618, + "step": 10200 + }, + { + "epoch": 7.62, + "grad_norm": 3.100308895111084, + "learning_rate": 9.023115577889447e-06, + "loss": 0.0635, + "step": 10225 + }, + { + "epoch": 7.64, + "grad_norm": 2.9649345874786377, + "learning_rate": 9.020603015075378e-06, + "loss": 0.0615, + "step": 10250 + }, + { + "epoch": 7.66, + "grad_norm": 2.6549410820007324, + "learning_rate": 9.018090452261308e-06, + "loss": 0.0628, + "step": 10275 + }, + { + "epoch": 7.68, + "grad_norm": 3.2120449542999268, + "learning_rate": 9.015577889447237e-06, + "loss": 0.0648, + "step": 10300 + }, + { + "epoch": 7.7, + "grad_norm": 3.467102527618408, + "learning_rate": 9.013065326633166e-06, + "loss": 0.0596, + "step": 10325 + }, + { + "epoch": 7.72, + "grad_norm": 2.8694305419921875, + "learning_rate": 9.010552763819096e-06, + "loss": 0.0619, + "step": 10350 + }, + { + "epoch": 7.74, + "grad_norm": 3.2277913093566895, + "learning_rate": 9.008040201005027e-06, + "loss": 0.0638, + "step": 10375 + }, + { + "epoch": 7.76, + "grad_norm": 3.360274314880371, + "learning_rate": 9.005527638190954e-06, + "loss": 0.0621, + "step": 10400 + }, + { + "epoch": 7.77, + "grad_norm": 3.1984667778015137, + "learning_rate": 9.003015075376885e-06, + "loss": 0.0612, + "step": 10425 + }, + { + "epoch": 7.79, + "grad_norm": 3.1755056381225586, + "learning_rate": 9.000502512562815e-06, + "loss": 0.0605, + "step": 10450 + }, + { + "epoch": 7.81, + "grad_norm": 3.153629779815674, + "learning_rate": 8.997989949748744e-06, + "loss": 0.0644, + "step": 10475 + }, + { + "epoch": 7.83, + "grad_norm": 2.7790756225585938, + "learning_rate": 8.995477386934675e-06, + "loss": 0.0612, + "step": 10500 + }, + { + "epoch": 7.85, + "grad_norm": 2.994464635848999, + "learning_rate": 8.992964824120604e-06, + "loss": 0.0609, + "step": 10525 + }, + { + "epoch": 7.87, + "grad_norm": 3.365454912185669, + "learning_rate": 8.990452261306534e-06, + "loss": 0.0605, + "step": 10550 + }, + { + "epoch": 7.89, + "grad_norm": 3.0024936199188232, + "learning_rate": 8.987939698492463e-06, + "loss": 0.0613, + "step": 10575 + }, + { + "epoch": 7.9, + "grad_norm": 3.366381883621216, + "learning_rate": 8.985427135678392e-06, + "loss": 0.0623, + "step": 10600 + }, + { + "epoch": 7.92, + "grad_norm": 2.9788715839385986, + "learning_rate": 8.982914572864322e-06, + "loss": 0.0623, + "step": 10625 + }, + { + "epoch": 7.94, + "grad_norm": 3.1529853343963623, + "learning_rate": 8.980402010050253e-06, + "loss": 0.0624, + "step": 10650 + }, + { + "epoch": 7.96, + "grad_norm": 2.8945047855377197, + "learning_rate": 8.977889447236182e-06, + "loss": 0.0593, + "step": 10675 + }, + { + "epoch": 7.98, + "grad_norm": 2.9230380058288574, + "learning_rate": 8.975376884422111e-06, + "loss": 0.0635, + "step": 10700 + }, + { + "epoch": 8.0, + "grad_norm": 2.887291431427002, + "learning_rate": 8.97286432160804e-06, + "loss": 0.0623, + "step": 10725 + }, + { + "epoch": 8.02, + "grad_norm": 2.505648612976074, + "learning_rate": 8.97035175879397e-06, + "loss": 0.0494, + "step": 10750 + }, + { + "epoch": 8.04, + "grad_norm": 2.8424465656280518, + "learning_rate": 8.967839195979901e-06, + "loss": 0.0489, + "step": 10775 + }, + { + "epoch": 8.05, + "grad_norm": 2.823469877243042, + "learning_rate": 8.96532663316583e-06, + "loss": 0.0462, + "step": 10800 + }, + { + "epoch": 8.07, + "grad_norm": 2.831852674484253, + "learning_rate": 8.96281407035176e-06, + "loss": 0.047, + "step": 10825 + }, + { + "epoch": 8.09, + "grad_norm": 2.764580249786377, + "learning_rate": 8.960301507537689e-06, + "loss": 0.0482, + "step": 10850 + }, + { + "epoch": 8.11, + "grad_norm": 2.9389917850494385, + "learning_rate": 8.957788944723618e-06, + "loss": 0.0487, + "step": 10875 + }, + { + "epoch": 8.13, + "grad_norm": 2.892709255218506, + "learning_rate": 8.95527638190955e-06, + "loss": 0.048, + "step": 10900 + }, + { + "epoch": 8.15, + "grad_norm": 2.4463553428649902, + "learning_rate": 8.952763819095479e-06, + "loss": 0.0485, + "step": 10925 + }, + { + "epoch": 8.17, + "grad_norm": 2.9676852226257324, + "learning_rate": 8.950251256281408e-06, + "loss": 0.0489, + "step": 10950 + }, + { + "epoch": 8.18, + "grad_norm": 2.553262948989868, + "learning_rate": 8.947738693467337e-06, + "loss": 0.0482, + "step": 10975 + }, + { + "epoch": 8.2, + "grad_norm": 2.994267225265503, + "learning_rate": 8.945226130653267e-06, + "loss": 0.0515, + "step": 11000 + }, + { + "epoch": 8.2, + "eval_loss": 0.13818520307540894, + "eval_runtime": 586.6832, + "eval_samples_per_second": 2.623, + "eval_steps_per_second": 2.623, + "eval_wer": 21.482000935016362, + "step": 11000 + }, + { + "epoch": 8.22, + "grad_norm": 2.780748128890991, + "learning_rate": 8.942713567839196e-06, + "loss": 0.0483, + "step": 11025 + }, + { + "epoch": 8.24, + "grad_norm": 2.5693514347076416, + "learning_rate": 8.940201005025127e-06, + "loss": 0.0468, + "step": 11050 + }, + { + "epoch": 8.26, + "grad_norm": 3.0335822105407715, + "learning_rate": 8.937688442211056e-06, + "loss": 0.0498, + "step": 11075 + }, + { + "epoch": 8.28, + "grad_norm": 3.279219150543213, + "learning_rate": 8.935175879396986e-06, + "loss": 0.0493, + "step": 11100 + }, + { + "epoch": 8.3, + "grad_norm": 3.2019317150115967, + "learning_rate": 8.932663316582915e-06, + "loss": 0.0482, + "step": 11125 + }, + { + "epoch": 8.31, + "grad_norm": 2.788463830947876, + "learning_rate": 8.930150753768844e-06, + "loss": 0.0515, + "step": 11150 + }, + { + "epoch": 8.33, + "grad_norm": 3.0198886394500732, + "learning_rate": 8.927638190954775e-06, + "loss": 0.051, + "step": 11175 + }, + { + "epoch": 8.35, + "grad_norm": 2.639540433883667, + "learning_rate": 8.925125628140705e-06, + "loss": 0.0511, + "step": 11200 + }, + { + "epoch": 8.37, + "grad_norm": 2.8239197731018066, + "learning_rate": 8.922613065326634e-06, + "loss": 0.0502, + "step": 11225 + }, + { + "epoch": 8.39, + "grad_norm": 2.8736987113952637, + "learning_rate": 8.920100502512563e-06, + "loss": 0.0482, + "step": 11250 + }, + { + "epoch": 8.41, + "grad_norm": 2.9021623134613037, + "learning_rate": 8.917587939698493e-06, + "loss": 0.0479, + "step": 11275 + }, + { + "epoch": 8.43, + "grad_norm": 3.075807809829712, + "learning_rate": 8.915075376884424e-06, + "loss": 0.0498, + "step": 11300 + }, + { + "epoch": 8.45, + "grad_norm": 3.1207997798919678, + "learning_rate": 8.912562814070353e-06, + "loss": 0.0508, + "step": 11325 + }, + { + "epoch": 8.46, + "grad_norm": 3.3176045417785645, + "learning_rate": 8.910050251256282e-06, + "loss": 0.0515, + "step": 11350 + }, + { + "epoch": 8.48, + "grad_norm": 2.8398916721343994, + "learning_rate": 8.907537688442212e-06, + "loss": 0.0504, + "step": 11375 + }, + { + "epoch": 8.5, + "grad_norm": 2.6343865394592285, + "learning_rate": 8.905025125628143e-06, + "loss": 0.0515, + "step": 11400 + }, + { + "epoch": 8.52, + "grad_norm": 3.1102194786071777, + "learning_rate": 8.90251256281407e-06, + "loss": 0.0497, + "step": 11425 + }, + { + "epoch": 8.54, + "grad_norm": 2.820125102996826, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0506, + "step": 11450 + }, + { + "epoch": 8.56, + "grad_norm": 2.951397657394409, + "learning_rate": 8.89748743718593e-06, + "loss": 0.0504, + "step": 11475 + }, + { + "epoch": 8.58, + "grad_norm": 2.8679819107055664, + "learning_rate": 8.89497487437186e-06, + "loss": 0.0502, + "step": 11500 + }, + { + "epoch": 8.59, + "grad_norm": 3.0331969261169434, + "learning_rate": 8.892462311557791e-06, + "loss": 0.0516, + "step": 11525 + }, + { + "epoch": 8.61, + "grad_norm": 3.374516487121582, + "learning_rate": 8.889949748743718e-06, + "loss": 0.0507, + "step": 11550 + }, + { + "epoch": 8.63, + "grad_norm": 2.732095718383789, + "learning_rate": 8.88743718592965e-06, + "loss": 0.0508, + "step": 11575 + }, + { + "epoch": 8.65, + "grad_norm": 3.1193432807922363, + "learning_rate": 8.884924623115579e-06, + "loss": 0.0493, + "step": 11600 + }, + { + "epoch": 8.67, + "grad_norm": 2.8102569580078125, + "learning_rate": 8.882412060301508e-06, + "loss": 0.0517, + "step": 11625 + }, + { + "epoch": 8.69, + "grad_norm": 2.9370856285095215, + "learning_rate": 8.879899497487437e-06, + "loss": 0.0521, + "step": 11650 + }, + { + "epoch": 8.71, + "grad_norm": 3.0999062061309814, + "learning_rate": 8.877386934673368e-06, + "loss": 0.0497, + "step": 11675 + }, + { + "epoch": 8.72, + "grad_norm": 2.8342604637145996, + "learning_rate": 8.874874371859296e-06, + "loss": 0.0528, + "step": 11700 + }, + { + "epoch": 8.74, + "grad_norm": 3.0086517333984375, + "learning_rate": 8.872361809045227e-06, + "loss": 0.051, + "step": 11725 + }, + { + "epoch": 8.76, + "grad_norm": 3.4945380687713623, + "learning_rate": 8.869849246231156e-06, + "loss": 0.0516, + "step": 11750 + }, + { + "epoch": 8.78, + "grad_norm": 3.1250336170196533, + "learning_rate": 8.867336683417086e-06, + "loss": 0.0523, + "step": 11775 + }, + { + "epoch": 8.8, + "grad_norm": 2.8303897380828857, + "learning_rate": 8.864824120603017e-06, + "loss": 0.0497, + "step": 11800 + }, + { + "epoch": 8.82, + "grad_norm": 2.7749826908111572, + "learning_rate": 8.862311557788944e-06, + "loss": 0.0499, + "step": 11825 + }, + { + "epoch": 8.84, + "grad_norm": 2.6520028114318848, + "learning_rate": 8.859798994974875e-06, + "loss": 0.0496, + "step": 11850 + }, + { + "epoch": 8.86, + "grad_norm": 2.981229305267334, + "learning_rate": 8.857286432160805e-06, + "loss": 0.0517, + "step": 11875 + }, + { + "epoch": 8.87, + "grad_norm": 3.4659037590026855, + "learning_rate": 8.854773869346734e-06, + "loss": 0.0537, + "step": 11900 + }, + { + "epoch": 8.89, + "grad_norm": 2.977062702178955, + "learning_rate": 8.852261306532665e-06, + "loss": 0.0496, + "step": 11925 + }, + { + "epoch": 8.91, + "grad_norm": 3.3194692134857178, + "learning_rate": 8.849748743718594e-06, + "loss": 0.0508, + "step": 11950 + }, + { + "epoch": 8.93, + "grad_norm": 2.7847280502319336, + "learning_rate": 8.847236180904524e-06, + "loss": 0.0509, + "step": 11975 + }, + { + "epoch": 8.95, + "grad_norm": 2.9476842880249023, + "learning_rate": 8.844723618090453e-06, + "loss": 0.0507, + "step": 12000 + }, + { + "epoch": 8.95, + "eval_loss": 0.14063476026058197, + "eval_runtime": 586.6409, + "eval_samples_per_second": 2.623, + "eval_steps_per_second": 2.623, + "eval_wer": 21.35343618513324, + "step": 12000 + }, + { + "epoch": 8.97, + "grad_norm": 3.0471765995025635, + "learning_rate": 8.842211055276382e-06, + "loss": 0.0488, + "step": 12025 + }, + { + "epoch": 8.99, + "grad_norm": 2.9426567554473877, + "learning_rate": 8.839698492462312e-06, + "loss": 0.0548, + "step": 12050 + }, + { + "epoch": 9.0, + "grad_norm": 2.492532730102539, + "learning_rate": 8.837185929648243e-06, + "loss": 0.0453, + "step": 12075 + }, + { + "epoch": 9.02, + "grad_norm": 2.685091018676758, + "learning_rate": 8.83467336683417e-06, + "loss": 0.0402, + "step": 12100 + }, + { + "epoch": 9.04, + "grad_norm": 2.2960050106048584, + "learning_rate": 8.832160804020101e-06, + "loss": 0.0377, + "step": 12125 + }, + { + "epoch": 9.06, + "grad_norm": 2.86344575881958, + "learning_rate": 8.82964824120603e-06, + "loss": 0.0377, + "step": 12150 + }, + { + "epoch": 9.08, + "grad_norm": 2.5468199253082275, + "learning_rate": 8.82713567839196e-06, + "loss": 0.0387, + "step": 12175 + }, + { + "epoch": 9.1, + "grad_norm": 2.2638940811157227, + "learning_rate": 8.824623115577891e-06, + "loss": 0.0382, + "step": 12200 + }, + { + "epoch": 9.12, + "grad_norm": 2.6062216758728027, + "learning_rate": 8.82211055276382e-06, + "loss": 0.038, + "step": 12225 + }, + { + "epoch": 9.13, + "grad_norm": 2.5012047290802, + "learning_rate": 8.81959798994975e-06, + "loss": 0.0409, + "step": 12250 + }, + { + "epoch": 9.15, + "grad_norm": 2.5335803031921387, + "learning_rate": 8.817085427135679e-06, + "loss": 0.04, + "step": 12275 + }, + { + "epoch": 9.17, + "grad_norm": 2.7183868885040283, + "learning_rate": 8.814572864321608e-06, + "loss": 0.0392, + "step": 12300 + }, + { + "epoch": 9.19, + "grad_norm": 2.759749174118042, + "learning_rate": 8.812060301507538e-06, + "loss": 0.0396, + "step": 12325 + }, + { + "epoch": 9.21, + "grad_norm": 2.9348032474517822, + "learning_rate": 8.809547738693469e-06, + "loss": 0.0413, + "step": 12350 + }, + { + "epoch": 9.23, + "grad_norm": 2.6073756217956543, + "learning_rate": 8.807035175879398e-06, + "loss": 0.0375, + "step": 12375 + }, + { + "epoch": 9.25, + "grad_norm": 2.963667631149292, + "learning_rate": 8.804522613065327e-06, + "loss": 0.0407, + "step": 12400 + }, + { + "epoch": 9.27, + "grad_norm": 2.6174774169921875, + "learning_rate": 8.802010050251257e-06, + "loss": 0.0388, + "step": 12425 + }, + { + "epoch": 9.28, + "grad_norm": 3.046851396560669, + "learning_rate": 8.799497487437186e-06, + "loss": 0.0389, + "step": 12450 + }, + { + "epoch": 9.3, + "grad_norm": 2.523590326309204, + "learning_rate": 8.796984924623117e-06, + "loss": 0.0411, + "step": 12475 + }, + { + "epoch": 9.32, + "grad_norm": 2.8878679275512695, + "learning_rate": 8.794472361809046e-06, + "loss": 0.0389, + "step": 12500 + }, + { + "epoch": 9.34, + "grad_norm": 3.3341503143310547, + "learning_rate": 8.791959798994976e-06, + "loss": 0.0393, + "step": 12525 + }, + { + "epoch": 9.36, + "grad_norm": 2.5811662673950195, + "learning_rate": 8.789447236180905e-06, + "loss": 0.0392, + "step": 12550 + }, + { + "epoch": 9.38, + "grad_norm": 3.1110525131225586, + "learning_rate": 8.786934673366834e-06, + "loss": 0.0413, + "step": 12575 + }, + { + "epoch": 9.4, + "grad_norm": 2.619075059890747, + "learning_rate": 8.784422110552765e-06, + "loss": 0.0405, + "step": 12600 + }, + { + "epoch": 9.41, + "grad_norm": 3.3128104209899902, + "learning_rate": 8.781909547738695e-06, + "loss": 0.0394, + "step": 12625 + }, + { + "epoch": 9.43, + "grad_norm": 2.6949732303619385, + "learning_rate": 8.779396984924624e-06, + "loss": 0.0391, + "step": 12650 + }, + { + "epoch": 9.45, + "grad_norm": 3.1838645935058594, + "learning_rate": 8.776884422110553e-06, + "loss": 0.0386, + "step": 12675 + }, + { + "epoch": 9.47, + "grad_norm": 3.470961332321167, + "learning_rate": 8.774371859296483e-06, + "loss": 0.0391, + "step": 12700 + }, + { + "epoch": 9.49, + "grad_norm": 2.6914422512054443, + "learning_rate": 8.771859296482412e-06, + "loss": 0.0415, + "step": 12725 + }, + { + "epoch": 9.51, + "grad_norm": 2.9086146354675293, + "learning_rate": 8.769346733668343e-06, + "loss": 0.0401, + "step": 12750 + }, + { + "epoch": 9.53, + "grad_norm": 2.762099266052246, + "learning_rate": 8.766834170854272e-06, + "loss": 0.0414, + "step": 12775 + }, + { + "epoch": 9.55, + "grad_norm": 2.6897356510162354, + "learning_rate": 8.764321608040202e-06, + "loss": 0.0411, + "step": 12800 + }, + { + "epoch": 9.56, + "grad_norm": 2.8862860202789307, + "learning_rate": 8.761809045226131e-06, + "loss": 0.0402, + "step": 12825 + }, + { + "epoch": 9.58, + "grad_norm": 2.8265528678894043, + "learning_rate": 8.759396984924624e-06, + "loss": 0.0397, + "step": 12850 + }, + { + "epoch": 9.6, + "grad_norm": 2.717787027359009, + "learning_rate": 8.756884422110553e-06, + "loss": 0.0418, + "step": 12875 + }, + { + "epoch": 9.62, + "grad_norm": 2.8657326698303223, + "learning_rate": 8.754371859296483e-06, + "loss": 0.0393, + "step": 12900 + }, + { + "epoch": 9.64, + "grad_norm": 2.863874673843384, + "learning_rate": 8.751859296482412e-06, + "loss": 0.042, + "step": 12925 + }, + { + "epoch": 9.66, + "grad_norm": 2.6851563453674316, + "learning_rate": 8.749346733668343e-06, + "loss": 0.0413, + "step": 12950 + }, + { + "epoch": 9.68, + "grad_norm": 2.8892135620117188, + "learning_rate": 8.746834170854272e-06, + "loss": 0.0428, + "step": 12975 + }, + { + "epoch": 9.69, + "grad_norm": 2.953158378601074, + "learning_rate": 8.744321608040202e-06, + "loss": 0.0416, + "step": 13000 + }, + { + "epoch": 9.69, + "eval_loss": 0.14570310711860657, + "eval_runtime": 581.256, + "eval_samples_per_second": 2.648, + "eval_steps_per_second": 2.648, + "eval_wer": 21.06708742402992, + "step": 13000 + }, + { + "epoch": 9.71, + "grad_norm": 3.2315938472747803, + "learning_rate": 8.741809045226131e-06, + "loss": 0.0419, + "step": 13025 + }, + { + "epoch": 9.73, + "grad_norm": 2.657891273498535, + "learning_rate": 8.73929648241206e-06, + "loss": 0.0416, + "step": 13050 + }, + { + "epoch": 9.75, + "grad_norm": 2.72910213470459, + "learning_rate": 8.736783919597991e-06, + "loss": 0.0404, + "step": 13075 + }, + { + "epoch": 9.77, + "grad_norm": 2.5925345420837402, + "learning_rate": 8.734271356783919e-06, + "loss": 0.0409, + "step": 13100 + }, + { + "epoch": 9.79, + "grad_norm": 2.748267650604248, + "learning_rate": 8.73175879396985e-06, + "loss": 0.0403, + "step": 13125 + }, + { + "epoch": 9.81, + "grad_norm": 3.223501682281494, + "learning_rate": 8.72924623115578e-06, + "loss": 0.0393, + "step": 13150 + }, + { + "epoch": 9.82, + "grad_norm": 2.9742627143859863, + "learning_rate": 8.726733668341709e-06, + "loss": 0.0443, + "step": 13175 + }, + { + "epoch": 9.84, + "grad_norm": 2.690173625946045, + "learning_rate": 8.72422110552764e-06, + "loss": 0.0394, + "step": 13200 + }, + { + "epoch": 9.86, + "grad_norm": 2.729637622833252, + "learning_rate": 8.721708542713569e-06, + "loss": 0.0393, + "step": 13225 + }, + { + "epoch": 9.88, + "grad_norm": 3.2454795837402344, + "learning_rate": 8.719195979899498e-06, + "loss": 0.0412, + "step": 13250 + }, + { + "epoch": 9.9, + "grad_norm": 3.3076694011688232, + "learning_rate": 8.716683417085428e-06, + "loss": 0.0397, + "step": 13275 + }, + { + "epoch": 9.92, + "grad_norm": 2.757852554321289, + "learning_rate": 8.714170854271357e-06, + "loss": 0.0429, + "step": 13300 + }, + { + "epoch": 9.94, + "grad_norm": 2.9310712814331055, + "learning_rate": 8.711658291457286e-06, + "loss": 0.0418, + "step": 13325 + }, + { + "epoch": 9.96, + "grad_norm": 2.8508920669555664, + "learning_rate": 8.709145728643217e-06, + "loss": 0.0418, + "step": 13350 + }, + { + "epoch": 9.97, + "grad_norm": 3.071153163909912, + "learning_rate": 8.706633165829147e-06, + "loss": 0.0414, + "step": 13375 + }, + { + "epoch": 9.99, + "grad_norm": 2.8715882301330566, + "learning_rate": 8.704120603015076e-06, + "loss": 0.0419, + "step": 13400 + }, + { + "epoch": 10.01, + "grad_norm": 2.475613594055176, + "learning_rate": 8.701608040201005e-06, + "loss": 0.0343, + "step": 13425 + }, + { + "epoch": 10.03, + "grad_norm": 2.269005060195923, + "learning_rate": 8.699095477386935e-06, + "loss": 0.0301, + "step": 13450 + }, + { + "epoch": 10.05, + "grad_norm": 2.73382568359375, + "learning_rate": 8.696582914572866e-06, + "loss": 0.0302, + "step": 13475 + }, + { + "epoch": 10.07, + "grad_norm": 2.925926923751831, + "learning_rate": 8.694070351758795e-06, + "loss": 0.0294, + "step": 13500 + }, + { + "epoch": 10.09, + "grad_norm": 2.064060926437378, + "learning_rate": 8.691557788944724e-06, + "loss": 0.032, + "step": 13525 + }, + { + "epoch": 10.1, + "grad_norm": 2.5724496841430664, + "learning_rate": 8.689045226130654e-06, + "loss": 0.0308, + "step": 13550 + }, + { + "epoch": 10.12, + "grad_norm": 2.3711605072021484, + "learning_rate": 8.686532663316583e-06, + "loss": 0.0319, + "step": 13575 + }, + { + "epoch": 10.14, + "grad_norm": 2.6255903244018555, + "learning_rate": 8.684020100502514e-06, + "loss": 0.0306, + "step": 13600 + }, + { + "epoch": 10.16, + "grad_norm": 3.0303702354431152, + "learning_rate": 8.681507537688443e-06, + "loss": 0.0319, + "step": 13625 + }, + { + "epoch": 10.18, + "grad_norm": 2.4169704914093018, + "learning_rate": 8.678994974874373e-06, + "loss": 0.0309, + "step": 13650 + }, + { + "epoch": 10.2, + "grad_norm": 2.815736770629883, + "learning_rate": 8.676482412060302e-06, + "loss": 0.0329, + "step": 13675 + }, + { + "epoch": 10.22, + "grad_norm": 2.7465853691101074, + "learning_rate": 8.673969849246231e-06, + "loss": 0.0306, + "step": 13700 + }, + { + "epoch": 10.23, + "grad_norm": 2.5515809059143066, + "learning_rate": 8.67145728643216e-06, + "loss": 0.0314, + "step": 13725 + }, + { + "epoch": 10.25, + "grad_norm": 2.475783109664917, + "learning_rate": 8.668944723618092e-06, + "loss": 0.0324, + "step": 13750 + }, + { + "epoch": 10.27, + "grad_norm": 2.4548418521881104, + "learning_rate": 8.666432160804021e-06, + "loss": 0.0316, + "step": 13775 + }, + { + "epoch": 10.29, + "grad_norm": 2.794523239135742, + "learning_rate": 8.66391959798995e-06, + "loss": 0.0315, + "step": 13800 + }, + { + "epoch": 10.31, + "grad_norm": 2.4502298831939697, + "learning_rate": 8.661407035175881e-06, + "loss": 0.0307, + "step": 13825 + }, + { + "epoch": 10.33, + "grad_norm": 2.5723605155944824, + "learning_rate": 8.658894472361809e-06, + "loss": 0.0329, + "step": 13850 + }, + { + "epoch": 10.35, + "grad_norm": 2.854327917098999, + "learning_rate": 8.65638190954774e-06, + "loss": 0.0312, + "step": 13875 + }, + { + "epoch": 10.37, + "grad_norm": 2.8989133834838867, + "learning_rate": 8.65386934673367e-06, + "loss": 0.0323, + "step": 13900 + }, + { + "epoch": 10.38, + "grad_norm": 2.6203129291534424, + "learning_rate": 8.651356783919599e-06, + "loss": 0.0322, + "step": 13925 + }, + { + "epoch": 10.4, + "grad_norm": 2.2104990482330322, + "learning_rate": 8.648844221105528e-06, + "loss": 0.0316, + "step": 13950 + }, + { + "epoch": 10.42, + "grad_norm": 2.5123138427734375, + "learning_rate": 8.646331658291457e-06, + "loss": 0.0317, + "step": 13975 + }, + { + "epoch": 10.44, + "grad_norm": 2.294405221939087, + "learning_rate": 8.643819095477388e-06, + "loss": 0.0313, + "step": 14000 + }, + { + "epoch": 10.44, + "eval_loss": 0.15456141531467438, + "eval_runtime": 585.2606, + "eval_samples_per_second": 2.63, + "eval_steps_per_second": 2.63, + "eval_wer": 21.633941093969145, + "step": 14000 + }, + { + "epoch": 10.46, + "grad_norm": 2.563169240951538, + "learning_rate": 8.641306532663318e-06, + "loss": 0.0333, + "step": 14025 + }, + { + "epoch": 10.48, + "grad_norm": 2.7425684928894043, + "learning_rate": 8.638793969849247e-06, + "loss": 0.0325, + "step": 14050 + }, + { + "epoch": 10.5, + "grad_norm": 2.894617795944214, + "learning_rate": 8.636281407035176e-06, + "loss": 0.0314, + "step": 14075 + }, + { + "epoch": 10.51, + "grad_norm": 2.774939775466919, + "learning_rate": 8.633768844221107e-06, + "loss": 0.0307, + "step": 14100 + }, + { + "epoch": 10.53, + "grad_norm": 2.662224292755127, + "learning_rate": 8.631256281407035e-06, + "loss": 0.0326, + "step": 14125 + }, + { + "epoch": 10.55, + "grad_norm": 2.6058950424194336, + "learning_rate": 8.628743718592966e-06, + "loss": 0.0326, + "step": 14150 + }, + { + "epoch": 10.57, + "grad_norm": 2.759054183959961, + "learning_rate": 8.626231155778895e-06, + "loss": 0.0325, + "step": 14175 + }, + { + "epoch": 10.59, + "grad_norm": 2.4028570652008057, + "learning_rate": 8.623718592964825e-06, + "loss": 0.0323, + "step": 14200 + }, + { + "epoch": 10.61, + "grad_norm": 2.6831328868865967, + "learning_rate": 8.621206030150756e-06, + "loss": 0.0327, + "step": 14225 + }, + { + "epoch": 10.63, + "grad_norm": 2.7555370330810547, + "learning_rate": 8.618693467336683e-06, + "loss": 0.0328, + "step": 14250 + }, + { + "epoch": 10.65, + "grad_norm": 2.4091877937316895, + "learning_rate": 8.616180904522614e-06, + "loss": 0.0327, + "step": 14275 + }, + { + "epoch": 10.66, + "grad_norm": 3.6348488330841064, + "learning_rate": 8.613668341708544e-06, + "loss": 0.0319, + "step": 14300 + }, + { + "epoch": 10.68, + "grad_norm": 2.6795148849487305, + "learning_rate": 8.611155778894473e-06, + "loss": 0.032, + "step": 14325 + }, + { + "epoch": 10.7, + "grad_norm": 2.8836655616760254, + "learning_rate": 8.608643216080402e-06, + "loss": 0.0328, + "step": 14350 + }, + { + "epoch": 10.72, + "grad_norm": 2.3982625007629395, + "learning_rate": 8.606130653266333e-06, + "loss": 0.0342, + "step": 14375 + }, + { + "epoch": 10.74, + "grad_norm": 2.8531017303466797, + "learning_rate": 8.60361809045226e-06, + "loss": 0.0328, + "step": 14400 + }, + { + "epoch": 10.76, + "grad_norm": 2.7594876289367676, + "learning_rate": 8.601105527638192e-06, + "loss": 0.0343, + "step": 14425 + }, + { + "epoch": 10.78, + "grad_norm": 2.532412528991699, + "learning_rate": 8.598592964824121e-06, + "loss": 0.0312, + "step": 14450 + }, + { + "epoch": 10.79, + "grad_norm": 2.4836552143096924, + "learning_rate": 8.59608040201005e-06, + "loss": 0.0316, + "step": 14475 + }, + { + "epoch": 10.81, + "grad_norm": 2.7464237213134766, + "learning_rate": 8.593567839195981e-06, + "loss": 0.0311, + "step": 14500 + }, + { + "epoch": 10.83, + "grad_norm": 3.00374436378479, + "learning_rate": 8.591055276381909e-06, + "loss": 0.0324, + "step": 14525 + }, + { + "epoch": 10.85, + "grad_norm": 2.243666410446167, + "learning_rate": 8.58854271356784e-06, + "loss": 0.0329, + "step": 14550 + }, + { + "epoch": 10.87, + "grad_norm": 2.496725082397461, + "learning_rate": 8.58603015075377e-06, + "loss": 0.0313, + "step": 14575 + }, + { + "epoch": 10.89, + "grad_norm": 2.627030372619629, + "learning_rate": 8.583517587939699e-06, + "loss": 0.0338, + "step": 14600 + }, + { + "epoch": 10.91, + "grad_norm": 3.1479392051696777, + "learning_rate": 8.58100502512563e-06, + "loss": 0.035, + "step": 14625 + }, + { + "epoch": 10.92, + "grad_norm": 2.673372268676758, + "learning_rate": 8.578492462311559e-06, + "loss": 0.0329, + "step": 14650 + }, + { + "epoch": 10.94, + "grad_norm": 3.368115186691284, + "learning_rate": 8.575979899497488e-06, + "loss": 0.0327, + "step": 14675 + }, + { + "epoch": 10.96, + "grad_norm": 3.0253243446350098, + "learning_rate": 8.573467336683418e-06, + "loss": 0.0329, + "step": 14700 + }, + { + "epoch": 10.98, + "grad_norm": 2.8820085525512695, + "learning_rate": 8.570954773869347e-06, + "loss": 0.0325, + "step": 14725 + }, + { + "epoch": 11.0, + "grad_norm": 3.2273361682891846, + "learning_rate": 8.568442211055276e-06, + "loss": 0.0334, + "step": 14750 + }, + { + "epoch": 11.02, + "grad_norm": 2.6551361083984375, + "learning_rate": 8.565929648241207e-06, + "loss": 0.023, + "step": 14775 + }, + { + "epoch": 11.04, + "grad_norm": 2.077531099319458, + "learning_rate": 8.563417085427135e-06, + "loss": 0.0228, + "step": 14800 + }, + { + "epoch": 11.06, + "grad_norm": 2.0028645992279053, + "learning_rate": 8.560904522613066e-06, + "loss": 0.0236, + "step": 14825 + }, + { + "epoch": 11.07, + "grad_norm": 2.517749786376953, + "learning_rate": 8.558391959798995e-06, + "loss": 0.0234, + "step": 14850 + }, + { + "epoch": 11.09, + "grad_norm": 2.57539701461792, + "learning_rate": 8.555879396984925e-06, + "loss": 0.0246, + "step": 14875 + }, + { + "epoch": 11.11, + "grad_norm": 2.665876626968384, + "learning_rate": 8.553366834170856e-06, + "loss": 0.0247, + "step": 14900 + }, + { + "epoch": 11.13, + "grad_norm": 2.446589708328247, + "learning_rate": 8.550854271356785e-06, + "loss": 0.0246, + "step": 14925 + }, + { + "epoch": 11.15, + "grad_norm": 2.37813663482666, + "learning_rate": 8.548341708542714e-06, + "loss": 0.0249, + "step": 14950 + }, + { + "epoch": 11.17, + "grad_norm": 2.0749411582946777, + "learning_rate": 8.545829145728644e-06, + "loss": 0.0229, + "step": 14975 + }, + { + "epoch": 11.19, + "grad_norm": 2.318493604660034, + "learning_rate": 8.543316582914573e-06, + "loss": 0.0235, + "step": 15000 + }, + { + "epoch": 11.19, + "eval_loss": 0.16017158329486847, + "eval_runtime": 580.9814, + "eval_samples_per_second": 2.649, + "eval_steps_per_second": 2.649, + "eval_wer": 21.388499298737727, + "step": 15000 + }, + { + "epoch": 11.2, + "grad_norm": 2.244503974914551, + "learning_rate": 8.540804020100502e-06, + "loss": 0.0249, + "step": 15025 + }, + { + "epoch": 11.22, + "grad_norm": 2.4577200412750244, + "learning_rate": 8.538291457286433e-06, + "loss": 0.0246, + "step": 15050 + }, + { + "epoch": 11.24, + "grad_norm": 2.5807406902313232, + "learning_rate": 8.535778894472363e-06, + "loss": 0.0246, + "step": 15075 + }, + { + "epoch": 11.26, + "grad_norm": 4.660642623901367, + "learning_rate": 8.533266331658292e-06, + "loss": 0.0268, + "step": 15100 + }, + { + "epoch": 11.28, + "grad_norm": 2.227071523666382, + "learning_rate": 8.530753768844221e-06, + "loss": 0.0234, + "step": 15125 + }, + { + "epoch": 11.3, + "grad_norm": 2.185781955718994, + "learning_rate": 8.52824120603015e-06, + "loss": 0.0247, + "step": 15150 + }, + { + "epoch": 11.32, + "grad_norm": 2.399092435836792, + "learning_rate": 8.525728643216082e-06, + "loss": 0.0239, + "step": 15175 + }, + { + "epoch": 11.33, + "grad_norm": 2.4906694889068604, + "learning_rate": 8.523216080402011e-06, + "loss": 0.0247, + "step": 15200 + }, + { + "epoch": 11.35, + "grad_norm": 2.74886417388916, + "learning_rate": 8.52070351758794e-06, + "loss": 0.0257, + "step": 15225 + }, + { + "epoch": 11.37, + "grad_norm": 2.2723824977874756, + "learning_rate": 8.518190954773871e-06, + "loss": 0.0252, + "step": 15250 + }, + { + "epoch": 11.39, + "grad_norm": 2.5348899364471436, + "learning_rate": 8.515678391959799e-06, + "loss": 0.0248, + "step": 15275 + }, + { + "epoch": 11.41, + "grad_norm": 2.5375797748565674, + "learning_rate": 8.51316582914573e-06, + "loss": 0.0239, + "step": 15300 + }, + { + "epoch": 11.43, + "grad_norm": 2.1369495391845703, + "learning_rate": 8.51065326633166e-06, + "loss": 0.0261, + "step": 15325 + }, + { + "epoch": 11.45, + "grad_norm": 2.160895347595215, + "learning_rate": 8.508140703517589e-06, + "loss": 0.0238, + "step": 15350 + }, + { + "epoch": 11.47, + "grad_norm": 2.7245688438415527, + "learning_rate": 8.505628140703518e-06, + "loss": 0.0266, + "step": 15375 + }, + { + "epoch": 11.48, + "grad_norm": 2.555924892425537, + "learning_rate": 8.503115577889447e-06, + "loss": 0.0242, + "step": 15400 + }, + { + "epoch": 11.5, + "grad_norm": 2.615945816040039, + "learning_rate": 8.500603015075377e-06, + "loss": 0.0257, + "step": 15425 + }, + { + "epoch": 11.52, + "grad_norm": 2.7066407203674316, + "learning_rate": 8.498090452261308e-06, + "loss": 0.0263, + "step": 15450 + }, + { + "epoch": 11.54, + "grad_norm": 2.056220531463623, + "learning_rate": 8.495577889447237e-06, + "loss": 0.025, + "step": 15475 + }, + { + "epoch": 11.56, + "grad_norm": 2.4821150302886963, + "learning_rate": 8.493065326633166e-06, + "loss": 0.0245, + "step": 15500 + }, + { + "epoch": 11.58, + "grad_norm": 2.8089566230773926, + "learning_rate": 8.490552763819097e-06, + "loss": 0.0247, + "step": 15525 + }, + { + "epoch": 11.6, + "grad_norm": 2.256821393966675, + "learning_rate": 8.488040201005025e-06, + "loss": 0.0257, + "step": 15550 + }, + { + "epoch": 11.61, + "grad_norm": 2.6438982486724854, + "learning_rate": 8.485527638190956e-06, + "loss": 0.0262, + "step": 15575 + }, + { + "epoch": 11.63, + "grad_norm": 2.734562873840332, + "learning_rate": 8.483015075376885e-06, + "loss": 0.0252, + "step": 15600 + }, + { + "epoch": 11.65, + "grad_norm": 2.1922998428344727, + "learning_rate": 8.480502512562815e-06, + "loss": 0.0238, + "step": 15625 + }, + { + "epoch": 11.67, + "grad_norm": 2.2121617794036865, + "learning_rate": 8.477989949748744e-06, + "loss": 0.0245, + "step": 15650 + }, + { + "epoch": 11.69, + "grad_norm": 2.4471170902252197, + "learning_rate": 8.475477386934673e-06, + "loss": 0.026, + "step": 15675 + }, + { + "epoch": 11.71, + "grad_norm": 2.5253961086273193, + "learning_rate": 8.472964824120604e-06, + "loss": 0.0257, + "step": 15700 + }, + { + "epoch": 11.73, + "grad_norm": 2.622615337371826, + "learning_rate": 8.470452261306534e-06, + "loss": 0.0245, + "step": 15725 + }, + { + "epoch": 11.74, + "grad_norm": 2.3720972537994385, + "learning_rate": 8.467939698492463e-06, + "loss": 0.0254, + "step": 15750 + }, + { + "epoch": 11.76, + "grad_norm": 2.4467880725860596, + "learning_rate": 8.465427135678392e-06, + "loss": 0.0254, + "step": 15775 + }, + { + "epoch": 11.78, + "grad_norm": 2.5315792560577393, + "learning_rate": 8.462914572864323e-06, + "loss": 0.0264, + "step": 15800 + }, + { + "epoch": 11.8, + "grad_norm": 2.8129148483276367, + "learning_rate": 8.460402010050251e-06, + "loss": 0.0266, + "step": 15825 + }, + { + "epoch": 11.82, + "grad_norm": 1.8961018323898315, + "learning_rate": 8.457889447236182e-06, + "loss": 0.0256, + "step": 15850 + }, + { + "epoch": 11.84, + "grad_norm": 2.528249979019165, + "learning_rate": 8.455376884422111e-06, + "loss": 0.0262, + "step": 15875 + }, + { + "epoch": 11.86, + "grad_norm": 2.792339563369751, + "learning_rate": 8.45286432160804e-06, + "loss": 0.0252, + "step": 15900 + }, + { + "epoch": 11.88, + "grad_norm": 2.6689741611480713, + "learning_rate": 8.450351758793972e-06, + "loss": 0.0268, + "step": 15925 + }, + { + "epoch": 11.89, + "grad_norm": 2.7561771869659424, + "learning_rate": 8.4478391959799e-06, + "loss": 0.0268, + "step": 15950 + }, + { + "epoch": 11.91, + "grad_norm": 2.529155969619751, + "learning_rate": 8.44532663316583e-06, + "loss": 0.0259, + "step": 15975 + }, + { + "epoch": 11.93, + "grad_norm": 2.80694317817688, + "learning_rate": 8.44281407035176e-06, + "loss": 0.0263, + "step": 16000 + }, + { + "epoch": 11.93, + "eval_loss": 0.16326633095741272, + "eval_runtime": 585.3742, + "eval_samples_per_second": 2.629, + "eval_steps_per_second": 2.629, + "eval_wer": 21.785881252921925, + "step": 16000 + }, + { + "epoch": 11.95, + "grad_norm": 2.916386365890503, + "learning_rate": 8.440301507537689e-06, + "loss": 0.0276, + "step": 16025 + }, + { + "epoch": 11.97, + "grad_norm": 2.3397979736328125, + "learning_rate": 8.437788944723618e-06, + "loss": 0.0265, + "step": 16050 + }, + { + "epoch": 11.99, + "grad_norm": 2.704742908477783, + "learning_rate": 8.43527638190955e-06, + "loss": 0.0267, + "step": 16075 + }, + { + "epoch": 12.01, + "grad_norm": 1.918897271156311, + "learning_rate": 8.432763819095479e-06, + "loss": 0.0233, + "step": 16100 + }, + { + "epoch": 12.02, + "grad_norm": 3.354050874710083, + "learning_rate": 8.430351758793972e-06, + "loss": 0.0177, + "step": 16125 + }, + { + "epoch": 12.04, + "grad_norm": 2.034259557723999, + "learning_rate": 8.4278391959799e-06, + "loss": 0.0179, + "step": 16150 + }, + { + "epoch": 12.06, + "grad_norm": 1.7880001068115234, + "learning_rate": 8.42532663316583e-06, + "loss": 0.0177, + "step": 16175 + }, + { + "epoch": 12.08, + "grad_norm": 2.7571427822113037, + "learning_rate": 8.42281407035176e-06, + "loss": 0.0182, + "step": 16200 + }, + { + "epoch": 12.1, + "grad_norm": 2.4336163997650146, + "learning_rate": 8.420301507537689e-06, + "loss": 0.0185, + "step": 16225 + }, + { + "epoch": 12.12, + "grad_norm": 1.5725680589675903, + "learning_rate": 8.417788944723618e-06, + "loss": 0.0186, + "step": 16250 + }, + { + "epoch": 12.14, + "grad_norm": 2.3285794258117676, + "learning_rate": 8.415276381909548e-06, + "loss": 0.0189, + "step": 16275 + }, + { + "epoch": 12.16, + "grad_norm": 2.4096875190734863, + "learning_rate": 8.412763819095479e-06, + "loss": 0.0177, + "step": 16300 + }, + { + "epoch": 12.17, + "grad_norm": 1.8351141214370728, + "learning_rate": 8.410251256281408e-06, + "loss": 0.0179, + "step": 16325 + }, + { + "epoch": 12.19, + "grad_norm": 2.3434078693389893, + "learning_rate": 8.407738693467337e-06, + "loss": 0.0186, + "step": 16350 + }, + { + "epoch": 12.21, + "grad_norm": 2.207956552505493, + "learning_rate": 8.405226130653267e-06, + "loss": 0.0184, + "step": 16375 + }, + { + "epoch": 12.23, + "grad_norm": 2.5958642959594727, + "learning_rate": 8.402713567839198e-06, + "loss": 0.0191, + "step": 16400 + }, + { + "epoch": 12.25, + "grad_norm": 1.676741123199463, + "learning_rate": 8.400201005025125e-06, + "loss": 0.0188, + "step": 16425 + }, + { + "epoch": 12.27, + "grad_norm": 2.0212693214416504, + "learning_rate": 8.397688442211056e-06, + "loss": 0.0198, + "step": 16450 + }, + { + "epoch": 12.29, + "grad_norm": 3.062551975250244, + "learning_rate": 8.395175879396986e-06, + "loss": 0.0184, + "step": 16475 + }, + { + "epoch": 12.3, + "grad_norm": 2.772487163543701, + "learning_rate": 8.392663316582915e-06, + "loss": 0.0187, + "step": 16500 + }, + { + "epoch": 12.32, + "grad_norm": 2.138462543487549, + "learning_rate": 8.390150753768846e-06, + "loss": 0.0185, + "step": 16525 + }, + { + "epoch": 12.34, + "grad_norm": 2.295807361602783, + "learning_rate": 8.387638190954774e-06, + "loss": 0.0187, + "step": 16550 + }, + { + "epoch": 12.36, + "grad_norm": 2.6196656227111816, + "learning_rate": 8.385125628140705e-06, + "loss": 0.0208, + "step": 16575 + }, + { + "epoch": 12.38, + "grad_norm": 2.273681879043579, + "learning_rate": 8.382613065326634e-06, + "loss": 0.0201, + "step": 16600 + }, + { + "epoch": 12.4, + "grad_norm": 2.3581721782684326, + "learning_rate": 8.380100502512563e-06, + "loss": 0.0202, + "step": 16625 + }, + { + "epoch": 12.42, + "grad_norm": 2.0838265419006348, + "learning_rate": 8.377587939698493e-06, + "loss": 0.0187, + "step": 16650 + }, + { + "epoch": 12.43, + "grad_norm": 3.0522046089172363, + "learning_rate": 8.375075376884424e-06, + "loss": 0.0201, + "step": 16675 + }, + { + "epoch": 12.45, + "grad_norm": 2.4429802894592285, + "learning_rate": 8.372562814070353e-06, + "loss": 0.019, + "step": 16700 + }, + { + "epoch": 12.47, + "grad_norm": 2.169727325439453, + "learning_rate": 8.370050251256282e-06, + "loss": 0.0184, + "step": 16725 + }, + { + "epoch": 12.49, + "grad_norm": 2.391089677810669, + "learning_rate": 8.367537688442212e-06, + "loss": 0.02, + "step": 16750 + }, + { + "epoch": 12.51, + "grad_norm": 2.3086490631103516, + "learning_rate": 8.365025125628141e-06, + "loss": 0.0195, + "step": 16775 + }, + { + "epoch": 12.53, + "grad_norm": 2.659498691558838, + "learning_rate": 8.362512562814072e-06, + "loss": 0.0205, + "step": 16800 + }, + { + "epoch": 12.55, + "grad_norm": 2.5223937034606934, + "learning_rate": 8.36e-06, + "loss": 0.0194, + "step": 16825 + }, + { + "epoch": 12.57, + "grad_norm": 2.1157000064849854, + "learning_rate": 8.35748743718593e-06, + "loss": 0.0192, + "step": 16850 + }, + { + "epoch": 12.58, + "grad_norm": 2.483126401901245, + "learning_rate": 8.35497487437186e-06, + "loss": 0.0186, + "step": 16875 + }, + { + "epoch": 12.6, + "grad_norm": 2.163722038269043, + "learning_rate": 8.35246231155779e-06, + "loss": 0.0192, + "step": 16900 + }, + { + "epoch": 12.62, + "grad_norm": 2.246340751647949, + "learning_rate": 8.34994974874372e-06, + "loss": 0.0208, + "step": 16925 + }, + { + "epoch": 12.64, + "grad_norm": 2.1684188842773438, + "learning_rate": 8.34743718592965e-06, + "loss": 0.02, + "step": 16950 + }, + { + "epoch": 12.66, + "grad_norm": 2.7529144287109375, + "learning_rate": 8.344924623115579e-06, + "loss": 0.0211, + "step": 16975 + }, + { + "epoch": 12.68, + "grad_norm": 2.061251163482666, + "learning_rate": 8.342412060301508e-06, + "loss": 0.0197, + "step": 17000 + }, + { + "epoch": 12.68, + "eval_loss": 0.17200389504432678, + "eval_runtime": 587.9392, + "eval_samples_per_second": 2.618, + "eval_steps_per_second": 2.618, + "eval_wer": 21.791725105189343, + "step": 17000 + }, + { + "epoch": 12.7, + "grad_norm": 2.7183477878570557, + "learning_rate": 8.339899497487438e-06, + "loss": 0.0206, + "step": 17025 + }, + { + "epoch": 12.71, + "grad_norm": 2.1563830375671387, + "learning_rate": 8.337386934673367e-06, + "loss": 0.0205, + "step": 17050 + }, + { + "epoch": 12.73, + "grad_norm": 2.2405691146850586, + "learning_rate": 8.334874371859298e-06, + "loss": 0.0205, + "step": 17075 + }, + { + "epoch": 12.75, + "grad_norm": 2.293025255203247, + "learning_rate": 8.332361809045226e-06, + "loss": 0.0205, + "step": 17100 + }, + { + "epoch": 12.77, + "grad_norm": 2.0758073329925537, + "learning_rate": 8.329849246231157e-06, + "loss": 0.0194, + "step": 17125 + }, + { + "epoch": 12.79, + "grad_norm": 2.6992814540863037, + "learning_rate": 8.327336683417086e-06, + "loss": 0.0209, + "step": 17150 + }, + { + "epoch": 12.81, + "grad_norm": 2.4190897941589355, + "learning_rate": 8.324824120603015e-06, + "loss": 0.0195, + "step": 17175 + }, + { + "epoch": 12.83, + "grad_norm": 2.4841878414154053, + "learning_rate": 8.322311557788946e-06, + "loss": 0.0198, + "step": 17200 + }, + { + "epoch": 12.84, + "grad_norm": 2.7904245853424072, + "learning_rate": 8.319798994974876e-06, + "loss": 0.0197, + "step": 17225 + }, + { + "epoch": 12.86, + "grad_norm": 3.5382156372070312, + "learning_rate": 8.317286432160805e-06, + "loss": 0.0206, + "step": 17250 + }, + { + "epoch": 12.88, + "grad_norm": 2.7995846271514893, + "learning_rate": 8.314773869346734e-06, + "loss": 0.0208, + "step": 17275 + }, + { + "epoch": 12.9, + "grad_norm": 2.6690526008605957, + "learning_rate": 8.312261306532663e-06, + "loss": 0.0199, + "step": 17300 + }, + { + "epoch": 12.92, + "grad_norm": 2.7140305042266846, + "learning_rate": 8.309748743718595e-06, + "loss": 0.02, + "step": 17325 + }, + { + "epoch": 12.94, + "grad_norm": 2.5674378871917725, + "learning_rate": 8.307236180904524e-06, + "loss": 0.0206, + "step": 17350 + }, + { + "epoch": 12.96, + "grad_norm": 2.5535342693328857, + "learning_rate": 8.304723618090453e-06, + "loss": 0.0219, + "step": 17375 + }, + { + "epoch": 12.98, + "grad_norm": 2.4910037517547607, + "learning_rate": 8.302211055276382e-06, + "loss": 0.021, + "step": 17400 + }, + { + "epoch": 12.99, + "grad_norm": 2.8390157222747803, + "learning_rate": 8.299698492462312e-06, + "loss": 0.0185, + "step": 17425 + }, + { + "epoch": 13.01, + "grad_norm": 2.229580879211426, + "learning_rate": 8.297185929648241e-06, + "loss": 0.0166, + "step": 17450 + }, + { + "epoch": 13.03, + "grad_norm": 1.1999571323394775, + "learning_rate": 8.294673366834172e-06, + "loss": 0.0131, + "step": 17475 + }, + { + "epoch": 13.05, + "grad_norm": 1.4625461101531982, + "learning_rate": 8.292160804020101e-06, + "loss": 0.0134, + "step": 17500 + }, + { + "epoch": 13.07, + "grad_norm": 1.9511964321136475, + "learning_rate": 8.28964824120603e-06, + "loss": 0.0144, + "step": 17525 + }, + { + "epoch": 13.09, + "grad_norm": 2.2535626888275146, + "learning_rate": 8.287135678391962e-06, + "loss": 0.0137, + "step": 17550 + }, + { + "epoch": 13.11, + "grad_norm": 2.267462968826294, + "learning_rate": 8.28462311557789e-06, + "loss": 0.0135, + "step": 17575 + }, + { + "epoch": 13.12, + "grad_norm": 1.8903417587280273, + "learning_rate": 8.28211055276382e-06, + "loss": 0.0138, + "step": 17600 + }, + { + "epoch": 13.14, + "grad_norm": 2.1125903129577637, + "learning_rate": 8.27959798994975e-06, + "loss": 0.0137, + "step": 17625 + }, + { + "epoch": 13.16, + "grad_norm": 1.7965856790542603, + "learning_rate": 8.277085427135679e-06, + "loss": 0.0132, + "step": 17650 + }, + { + "epoch": 13.18, + "grad_norm": 2.060821056365967, + "learning_rate": 8.274572864321608e-06, + "loss": 0.0133, + "step": 17675 + }, + { + "epoch": 13.2, + "grad_norm": 1.898992896080017, + "learning_rate": 8.272060301507538e-06, + "loss": 0.0153, + "step": 17700 + }, + { + "epoch": 13.22, + "grad_norm": 1.5920830965042114, + "learning_rate": 8.269547738693467e-06, + "loss": 0.0136, + "step": 17725 + }, + { + "epoch": 13.24, + "grad_norm": 1.9222551584243774, + "learning_rate": 8.267035175879398e-06, + "loss": 0.0136, + "step": 17750 + }, + { + "epoch": 13.26, + "grad_norm": 1.8400964736938477, + "learning_rate": 8.264522613065327e-06, + "loss": 0.0143, + "step": 17775 + }, + { + "epoch": 13.27, + "grad_norm": 2.0800414085388184, + "learning_rate": 8.262010050251257e-06, + "loss": 0.0145, + "step": 17800 + }, + { + "epoch": 13.29, + "grad_norm": 2.2902474403381348, + "learning_rate": 8.259497487437188e-06, + "loss": 0.0145, + "step": 17825 + }, + { + "epoch": 13.31, + "grad_norm": 2.158294916152954, + "learning_rate": 8.256984924623115e-06, + "loss": 0.0141, + "step": 17850 + }, + { + "epoch": 13.33, + "grad_norm": 2.3175899982452393, + "learning_rate": 8.254472361809046e-06, + "loss": 0.0155, + "step": 17875 + }, + { + "epoch": 13.35, + "grad_norm": 1.9599988460540771, + "learning_rate": 8.251959798994976e-06, + "loss": 0.0149, + "step": 17900 + }, + { + "epoch": 13.37, + "grad_norm": 1.8904727697372437, + "learning_rate": 8.249447236180905e-06, + "loss": 0.0155, + "step": 17925 + }, + { + "epoch": 13.39, + "grad_norm": 2.377938985824585, + "learning_rate": 8.246934673366836e-06, + "loss": 0.0143, + "step": 17950 + }, + { + "epoch": 13.4, + "grad_norm": 2.185609817504883, + "learning_rate": 8.244422110552764e-06, + "loss": 0.0148, + "step": 17975 + }, + { + "epoch": 13.42, + "grad_norm": 1.8885729312896729, + "learning_rate": 8.241909547738695e-06, + "loss": 0.0146, + "step": 18000 + }, + { + "epoch": 13.42, + "eval_loss": 0.18107624351978302, + "eval_runtime": 585.0637, + "eval_samples_per_second": 2.63, + "eval_steps_per_second": 2.63, + "eval_wer": 21.42940626460963, + "step": 18000 + }, + { + "epoch": 13.44, + "grad_norm": 2.179326057434082, + "learning_rate": 8.239396984924624e-06, + "loss": 0.0158, + "step": 18025 + }, + { + "epoch": 13.46, + "grad_norm": 1.8478564023971558, + "learning_rate": 8.236884422110553e-06, + "loss": 0.0169, + "step": 18050 + }, + { + "epoch": 13.48, + "grad_norm": 2.4728593826293945, + "learning_rate": 8.234371859296483e-06, + "loss": 0.0151, + "step": 18075 + }, + { + "epoch": 13.5, + "grad_norm": 2.417924642562866, + "learning_rate": 8.231859296482414e-06, + "loss": 0.0151, + "step": 18100 + }, + { + "epoch": 13.52, + "grad_norm": 2.445129156112671, + "learning_rate": 8.229346733668341e-06, + "loss": 0.0159, + "step": 18125 + }, + { + "epoch": 13.53, + "grad_norm": 1.936056137084961, + "learning_rate": 8.226834170854272e-06, + "loss": 0.0155, + "step": 18150 + }, + { + "epoch": 13.55, + "grad_norm": 2.5618040561676025, + "learning_rate": 8.224321608040202e-06, + "loss": 0.0155, + "step": 18175 + }, + { + "epoch": 13.57, + "grad_norm": 2.5118329524993896, + "learning_rate": 8.221809045226131e-06, + "loss": 0.0144, + "step": 18200 + }, + { + "epoch": 13.59, + "grad_norm": 1.6736969947814941, + "learning_rate": 8.219296482412062e-06, + "loss": 0.0141, + "step": 18225 + }, + { + "epoch": 13.61, + "grad_norm": 3.074496030807495, + "learning_rate": 8.21678391959799e-06, + "loss": 0.0152, + "step": 18250 + }, + { + "epoch": 13.63, + "grad_norm": 1.7108162641525269, + "learning_rate": 8.21427135678392e-06, + "loss": 0.0148, + "step": 18275 + }, + { + "epoch": 13.65, + "grad_norm": 1.9194002151489258, + "learning_rate": 8.21175879396985e-06, + "loss": 0.0158, + "step": 18300 + }, + { + "epoch": 13.67, + "grad_norm": 2.134629964828491, + "learning_rate": 8.20924623115578e-06, + "loss": 0.0156, + "step": 18325 + }, + { + "epoch": 13.68, + "grad_norm": 2.182422637939453, + "learning_rate": 8.206733668341709e-06, + "loss": 0.0153, + "step": 18350 + }, + { + "epoch": 13.7, + "grad_norm": 2.316084861755371, + "learning_rate": 8.20422110552764e-06, + "loss": 0.0161, + "step": 18375 + }, + { + "epoch": 13.72, + "grad_norm": 2.117938995361328, + "learning_rate": 8.201708542713569e-06, + "loss": 0.0158, + "step": 18400 + }, + { + "epoch": 13.74, + "grad_norm": 1.9725098609924316, + "learning_rate": 8.199195979899498e-06, + "loss": 0.0163, + "step": 18425 + }, + { + "epoch": 13.76, + "grad_norm": 2.1144094467163086, + "learning_rate": 8.196683417085428e-06, + "loss": 0.0159, + "step": 18450 + }, + { + "epoch": 13.78, + "grad_norm": 2.307309627532959, + "learning_rate": 8.194170854271357e-06, + "loss": 0.0161, + "step": 18475 + }, + { + "epoch": 13.8, + "grad_norm": 2.5187482833862305, + "learning_rate": 8.191658291457288e-06, + "loss": 0.0162, + "step": 18500 + }, + { + "epoch": 13.81, + "grad_norm": 2.5622193813323975, + "learning_rate": 8.189145728643216e-06, + "loss": 0.0158, + "step": 18525 + }, + { + "epoch": 13.83, + "grad_norm": 1.6929138898849487, + "learning_rate": 8.186633165829147e-06, + "loss": 0.0157, + "step": 18550 + }, + { + "epoch": 13.85, + "grad_norm": 3.2649691104888916, + "learning_rate": 8.184120603015076e-06, + "loss": 0.0149, + "step": 18575 + }, + { + "epoch": 13.87, + "grad_norm": 2.3972930908203125, + "learning_rate": 8.181608040201005e-06, + "loss": 0.0156, + "step": 18600 + }, + { + "epoch": 13.89, + "grad_norm": 2.3530404567718506, + "learning_rate": 8.179095477386936e-06, + "loss": 0.0145, + "step": 18625 + }, + { + "epoch": 13.91, + "grad_norm": 1.7542129755020142, + "learning_rate": 8.176582914572866e-06, + "loss": 0.0164, + "step": 18650 + }, + { + "epoch": 13.93, + "grad_norm": 1.8659591674804688, + "learning_rate": 8.174070351758795e-06, + "loss": 0.0151, + "step": 18675 + }, + { + "epoch": 13.94, + "grad_norm": 2.4409241676330566, + "learning_rate": 8.171557788944724e-06, + "loss": 0.0153, + "step": 18700 + }, + { + "epoch": 13.96, + "grad_norm": 2.9759609699249268, + "learning_rate": 8.169045226130654e-06, + "loss": 0.0166, + "step": 18725 + }, + { + "epoch": 13.98, + "grad_norm": 1.8240071535110474, + "learning_rate": 8.166532663316583e-06, + "loss": 0.0154, + "step": 18750 + }, + { + "epoch": 14.0, + "grad_norm": 1.7029523849487305, + "learning_rate": 8.164020100502514e-06, + "loss": 0.0157, + "step": 18775 + }, + { + "epoch": 14.02, + "grad_norm": 1.2470253705978394, + "learning_rate": 8.161507537688443e-06, + "loss": 0.0103, + "step": 18800 + }, + { + "epoch": 14.04, + "grad_norm": 1.7034786939620972, + "learning_rate": 8.158994974874373e-06, + "loss": 0.0106, + "step": 18825 + }, + { + "epoch": 14.06, + "grad_norm": 1.9833647012710571, + "learning_rate": 8.156482412060302e-06, + "loss": 0.0103, + "step": 18850 + }, + { + "epoch": 14.08, + "grad_norm": 1.9003299474716187, + "learning_rate": 8.153969849246231e-06, + "loss": 0.0107, + "step": 18875 + }, + { + "epoch": 14.09, + "grad_norm": 1.9848977327346802, + "learning_rate": 8.151457286432162e-06, + "loss": 0.0108, + "step": 18900 + }, + { + "epoch": 14.11, + "grad_norm": 1.5483384132385254, + "learning_rate": 8.148944723618092e-06, + "loss": 0.0102, + "step": 18925 + }, + { + "epoch": 14.13, + "grad_norm": 1.6742889881134033, + "learning_rate": 8.146432160804021e-06, + "loss": 0.0102, + "step": 18950 + }, + { + "epoch": 14.15, + "grad_norm": 1.9698781967163086, + "learning_rate": 8.14391959798995e-06, + "loss": 0.0103, + "step": 18975 + }, + { + "epoch": 14.17, + "grad_norm": 1.6127244234085083, + "learning_rate": 8.14140703517588e-06, + "loss": 0.0113, + "step": 19000 + }, + { + "epoch": 14.17, + "eval_loss": 0.1869630515575409, + "eval_runtime": 587.7493, + "eval_samples_per_second": 2.618, + "eval_steps_per_second": 2.618, + "eval_wer": 21.522907900888267, + "step": 19000 + }, + { + "epoch": 14.19, + "grad_norm": 2.108584403991699, + "learning_rate": 8.13889447236181e-06, + "loss": 0.0116, + "step": 19025 + }, + { + "epoch": 14.21, + "grad_norm": 1.3327244520187378, + "learning_rate": 8.13638190954774e-06, + "loss": 0.0102, + "step": 19050 + }, + { + "epoch": 14.22, + "grad_norm": 2.1547276973724365, + "learning_rate": 8.13386934673367e-06, + "loss": 0.0111, + "step": 19075 + }, + { + "epoch": 14.24, + "grad_norm": 1.6848840713500977, + "learning_rate": 8.131356783919598e-06, + "loss": 0.0111, + "step": 19100 + }, + { + "epoch": 14.26, + "grad_norm": 2.3645429611206055, + "learning_rate": 8.128844221105528e-06, + "loss": 0.0113, + "step": 19125 + }, + { + "epoch": 14.28, + "grad_norm": 1.71700119972229, + "learning_rate": 8.126331658291457e-06, + "loss": 0.011, + "step": 19150 + }, + { + "epoch": 14.3, + "grad_norm": 1.6674860715866089, + "learning_rate": 8.123819095477388e-06, + "loss": 0.0109, + "step": 19175 + }, + { + "epoch": 14.32, + "grad_norm": 2.090177536010742, + "learning_rate": 8.121306532663317e-06, + "loss": 0.0108, + "step": 19200 + }, + { + "epoch": 14.34, + "grad_norm": 2.199313163757324, + "learning_rate": 8.118793969849247e-06, + "loss": 0.0109, + "step": 19225 + }, + { + "epoch": 14.35, + "grad_norm": 1.409297227859497, + "learning_rate": 8.116281407035178e-06, + "loss": 0.0111, + "step": 19250 + }, + { + "epoch": 14.37, + "grad_norm": 2.175391435623169, + "learning_rate": 8.113768844221105e-06, + "loss": 0.0112, + "step": 19275 + }, + { + "epoch": 14.39, + "grad_norm": 2.1688599586486816, + "learning_rate": 8.111256281407036e-06, + "loss": 0.0102, + "step": 19300 + }, + { + "epoch": 14.41, + "grad_norm": 3.070899486541748, + "learning_rate": 8.108743718592966e-06, + "loss": 0.0119, + "step": 19325 + }, + { + "epoch": 14.43, + "grad_norm": 1.9805489778518677, + "learning_rate": 8.106231155778895e-06, + "loss": 0.0116, + "step": 19350 + }, + { + "epoch": 14.45, + "grad_norm": 1.9202370643615723, + "learning_rate": 8.103718592964824e-06, + "loss": 0.0125, + "step": 19375 + }, + { + "epoch": 14.47, + "grad_norm": 2.12168288230896, + "learning_rate": 8.101206030150754e-06, + "loss": 0.0117, + "step": 19400 + }, + { + "epoch": 14.49, + "grad_norm": 2.2517611980438232, + "learning_rate": 8.098693467336685e-06, + "loss": 0.0112, + "step": 19425 + }, + { + "epoch": 14.5, + "grad_norm": 2.267876625061035, + "learning_rate": 8.096180904522614e-06, + "loss": 0.0116, + "step": 19450 + }, + { + "epoch": 14.52, + "grad_norm": 2.480056047439575, + "learning_rate": 8.093668341708543e-06, + "loss": 0.0123, + "step": 19475 + }, + { + "epoch": 14.54, + "grad_norm": 1.6950886249542236, + "learning_rate": 8.091155778894473e-06, + "loss": 0.0125, + "step": 19500 + }, + { + "epoch": 14.56, + "grad_norm": 2.288228750228882, + "learning_rate": 8.088643216080404e-06, + "loss": 0.0113, + "step": 19525 + }, + { + "epoch": 14.58, + "grad_norm": 1.8283995389938354, + "learning_rate": 8.086130653266331e-06, + "loss": 0.012, + "step": 19550 + }, + { + "epoch": 14.6, + "grad_norm": 1.998721957206726, + "learning_rate": 8.083618090452262e-06, + "loss": 0.0121, + "step": 19575 + }, + { + "epoch": 14.62, + "grad_norm": 1.9252220392227173, + "learning_rate": 8.081105527638192e-06, + "loss": 0.0119, + "step": 19600 + }, + { + "epoch": 14.63, + "grad_norm": 2.3433918952941895, + "learning_rate": 8.078592964824121e-06, + "loss": 0.0119, + "step": 19625 + }, + { + "epoch": 14.65, + "grad_norm": 2.180340051651001, + "learning_rate": 8.076080402010052e-06, + "loss": 0.011, + "step": 19650 + }, + { + "epoch": 14.67, + "grad_norm": 2.14115571975708, + "learning_rate": 8.07356783919598e-06, + "loss": 0.0116, + "step": 19675 + }, + { + "epoch": 14.69, + "grad_norm": 2.0081052780151367, + "learning_rate": 8.07105527638191e-06, + "loss": 0.0116, + "step": 19700 + }, + { + "epoch": 14.71, + "grad_norm": 1.9204707145690918, + "learning_rate": 8.06854271356784e-06, + "loss": 0.0113, + "step": 19725 + }, + { + "epoch": 14.73, + "grad_norm": 1.6993181705474854, + "learning_rate": 8.06603015075377e-06, + "loss": 0.0122, + "step": 19750 + }, + { + "epoch": 14.75, + "grad_norm": 2.3597218990325928, + "learning_rate": 8.063517587939699e-06, + "loss": 0.0119, + "step": 19775 + }, + { + "epoch": 14.77, + "grad_norm": 2.304776906967163, + "learning_rate": 8.06100502512563e-06, + "loss": 0.0124, + "step": 19800 + }, + { + "epoch": 14.78, + "grad_norm": 1.9260263442993164, + "learning_rate": 8.058492462311557e-06, + "loss": 0.0112, + "step": 19825 + }, + { + "epoch": 14.8, + "grad_norm": 2.530879020690918, + "learning_rate": 8.055979899497488e-06, + "loss": 0.012, + "step": 19850 + }, + { + "epoch": 14.82, + "grad_norm": 1.8805254697799683, + "learning_rate": 8.053467336683418e-06, + "loss": 0.0116, + "step": 19875 + }, + { + "epoch": 14.84, + "grad_norm": 2.0274715423583984, + "learning_rate": 8.050954773869347e-06, + "loss": 0.0123, + "step": 19900 + }, + { + "epoch": 14.86, + "grad_norm": 2.7650601863861084, + "learning_rate": 8.048442211055278e-06, + "loss": 0.0127, + "step": 19925 + }, + { + "epoch": 14.88, + "grad_norm": 2.345353603363037, + "learning_rate": 8.045929648241206e-06, + "loss": 0.0124, + "step": 19950 + }, + { + "epoch": 14.9, + "grad_norm": 1.8999271392822266, + "learning_rate": 8.043417085427137e-06, + "loss": 0.0116, + "step": 19975 + }, + { + "epoch": 14.91, + "grad_norm": 1.558822751045227, + "learning_rate": 8.040904522613066e-06, + "loss": 0.0118, + "step": 20000 + }, + { + "epoch": 14.91, + "eval_loss": 0.19275790452957153, + "eval_runtime": 581.1286, + "eval_samples_per_second": 2.648, + "eval_steps_per_second": 2.648, + "eval_wer": 22.031323048153343, + "step": 20000 + }, + { + "epoch": 14.93, + "grad_norm": 2.535437822341919, + "learning_rate": 8.038391959798995e-06, + "loss": 0.0122, + "step": 20025 + }, + { + "epoch": 14.95, + "grad_norm": 2.0672223567962646, + "learning_rate": 8.035879396984926e-06, + "loss": 0.0135, + "step": 20050 + }, + { + "epoch": 14.97, + "grad_norm": 2.2344706058502197, + "learning_rate": 8.033366834170856e-06, + "loss": 0.0128, + "step": 20075 + }, + { + "epoch": 14.99, + "grad_norm": 1.878891944885254, + "learning_rate": 8.030854271356785e-06, + "loss": 0.0128, + "step": 20100 + }, + { + "epoch": 15.01, + "grad_norm": 1.5701360702514648, + "learning_rate": 8.028341708542714e-06, + "loss": 0.0101, + "step": 20125 + }, + { + "epoch": 15.03, + "grad_norm": 1.3182779550552368, + "learning_rate": 8.025829145728644e-06, + "loss": 0.0074, + "step": 20150 + }, + { + "epoch": 15.04, + "grad_norm": 1.7312322854995728, + "learning_rate": 8.023316582914573e-06, + "loss": 0.0083, + "step": 20175 + }, + { + "epoch": 15.06, + "grad_norm": 1.4511685371398926, + "learning_rate": 8.020804020100504e-06, + "loss": 0.0085, + "step": 20200 + }, + { + "epoch": 15.08, + "grad_norm": 1.359107255935669, + "learning_rate": 8.018291457286432e-06, + "loss": 0.0072, + "step": 20225 + }, + { + "epoch": 15.1, + "grad_norm": 1.4807746410369873, + "learning_rate": 8.015778894472363e-06, + "loss": 0.0084, + "step": 20250 + }, + { + "epoch": 15.12, + "grad_norm": 1.7090073823928833, + "learning_rate": 8.013366834170854e-06, + "loss": 0.0083, + "step": 20275 + }, + { + "epoch": 15.14, + "grad_norm": 1.890470266342163, + "learning_rate": 8.010854271356785e-06, + "loss": 0.0094, + "step": 20300 + }, + { + "epoch": 15.16, + "grad_norm": 1.6577571630477905, + "learning_rate": 8.008341708542714e-06, + "loss": 0.0083, + "step": 20325 + }, + { + "epoch": 15.18, + "grad_norm": 1.9787050485610962, + "learning_rate": 8.005829145728644e-06, + "loss": 0.0081, + "step": 20350 + }, + { + "epoch": 15.19, + "grad_norm": 1.142236590385437, + "learning_rate": 8.003316582914573e-06, + "loss": 0.0087, + "step": 20375 + }, + { + "epoch": 15.21, + "grad_norm": 1.2740939855575562, + "learning_rate": 8.000804020100502e-06, + "loss": 0.0087, + "step": 20400 + }, + { + "epoch": 15.23, + "grad_norm": 1.6506059169769287, + "learning_rate": 7.998291457286432e-06, + "loss": 0.0082, + "step": 20425 + }, + { + "epoch": 15.25, + "grad_norm": 1.8885892629623413, + "learning_rate": 7.995778894472363e-06, + "loss": 0.0082, + "step": 20450 + }, + { + "epoch": 15.27, + "grad_norm": 1.530429482460022, + "learning_rate": 7.993266331658292e-06, + "loss": 0.0089, + "step": 20475 + }, + { + "epoch": 15.29, + "grad_norm": 1.5838526487350464, + "learning_rate": 7.990753768844221e-06, + "loss": 0.0085, + "step": 20500 + }, + { + "epoch": 15.31, + "grad_norm": 2.1945927143096924, + "learning_rate": 7.988241206030152e-06, + "loss": 0.0092, + "step": 20525 + }, + { + "epoch": 15.32, + "grad_norm": 1.6975407600402832, + "learning_rate": 7.98572864321608e-06, + "loss": 0.0082, + "step": 20550 + }, + { + "epoch": 15.34, + "grad_norm": 1.7640454769134521, + "learning_rate": 7.983216080402011e-06, + "loss": 0.0088, + "step": 20575 + }, + { + "epoch": 15.36, + "grad_norm": 2.03951096534729, + "learning_rate": 7.98070351758794e-06, + "loss": 0.0086, + "step": 20600 + }, + { + "epoch": 15.38, + "grad_norm": 1.6242202520370483, + "learning_rate": 7.97819095477387e-06, + "loss": 0.0093, + "step": 20625 + }, + { + "epoch": 15.4, + "grad_norm": 1.9065381288528442, + "learning_rate": 7.975678391959799e-06, + "loss": 0.0082, + "step": 20650 + }, + { + "epoch": 15.42, + "grad_norm": 1.5574641227722168, + "learning_rate": 7.973165829145728e-06, + "loss": 0.0092, + "step": 20675 + }, + { + "epoch": 15.44, + "grad_norm": 1.5223567485809326, + "learning_rate": 7.97065326633166e-06, + "loss": 0.0091, + "step": 20700 + }, + { + "epoch": 15.45, + "grad_norm": 1.652739405632019, + "learning_rate": 7.968140703517589e-06, + "loss": 0.008, + "step": 20725 + }, + { + "epoch": 15.47, + "grad_norm": 1.4620931148529053, + "learning_rate": 7.965628140703518e-06, + "loss": 0.0092, + "step": 20750 + }, + { + "epoch": 15.49, + "grad_norm": 2.04807448387146, + "learning_rate": 7.963115577889447e-06, + "loss": 0.0093, + "step": 20775 + }, + { + "epoch": 15.51, + "grad_norm": 2.479281187057495, + "learning_rate": 7.960603015075378e-06, + "loss": 0.0091, + "step": 20800 + }, + { + "epoch": 15.53, + "grad_norm": 1.4453626871109009, + "learning_rate": 7.958090452261306e-06, + "loss": 0.0089, + "step": 20825 + }, + { + "epoch": 15.55, + "grad_norm": 1.7642931938171387, + "learning_rate": 7.955577889447237e-06, + "loss": 0.0086, + "step": 20850 + }, + { + "epoch": 15.57, + "grad_norm": 1.9889525175094604, + "learning_rate": 7.953065326633166e-06, + "loss": 0.0082, + "step": 20875 + }, + { + "epoch": 15.59, + "grad_norm": 2.764052391052246, + "learning_rate": 7.950552763819096e-06, + "loss": 0.0089, + "step": 20900 + }, + { + "epoch": 15.6, + "grad_norm": 2.933439016342163, + "learning_rate": 7.948040201005027e-06, + "loss": 0.0098, + "step": 20925 + }, + { + "epoch": 15.62, + "grad_norm": 1.7179831266403198, + "learning_rate": 7.945527638190954e-06, + "loss": 0.0091, + "step": 20950 + }, + { + "epoch": 15.64, + "grad_norm": 2.032393455505371, + "learning_rate": 7.943015075376885e-06, + "loss": 0.0096, + "step": 20975 + }, + { + "epoch": 15.66, + "grad_norm": 1.7456022500991821, + "learning_rate": 7.940502512562815e-06, + "loss": 0.0092, + "step": 21000 + }, + { + "epoch": 15.66, + "eval_loss": 0.19948403537273407, + "eval_runtime": 582.5679, + "eval_samples_per_second": 2.642, + "eval_steps_per_second": 2.642, + "eval_wer": 21.815100514259, + "step": 21000 + }, + { + "epoch": 15.68, + "grad_norm": 2.0885918140411377, + "learning_rate": 7.937989949748744e-06, + "loss": 0.0093, + "step": 21025 + }, + { + "epoch": 15.7, + "grad_norm": 1.7001230716705322, + "learning_rate": 7.935477386934673e-06, + "loss": 0.0085, + "step": 21050 + }, + { + "epoch": 15.72, + "grad_norm": 1.2074350118637085, + "learning_rate": 7.932964824120604e-06, + "loss": 0.0084, + "step": 21075 + }, + { + "epoch": 15.73, + "grad_norm": 1.6197521686553955, + "learning_rate": 7.930452261306534e-06, + "loss": 0.0086, + "step": 21100 + }, + { + "epoch": 15.75, + "grad_norm": 1.3899601697921753, + "learning_rate": 7.927939698492463e-06, + "loss": 0.0087, + "step": 21125 + }, + { + "epoch": 15.77, + "grad_norm": 2.0573723316192627, + "learning_rate": 7.925427135678392e-06, + "loss": 0.0097, + "step": 21150 + }, + { + "epoch": 15.79, + "grad_norm": 1.9244797229766846, + "learning_rate": 7.922914572864322e-06, + "loss": 0.0095, + "step": 21175 + }, + { + "epoch": 15.81, + "grad_norm": 1.9728559255599976, + "learning_rate": 7.920402010050253e-06, + "loss": 0.0095, + "step": 21200 + }, + { + "epoch": 15.83, + "grad_norm": 2.2746589183807373, + "learning_rate": 7.91788944723618e-06, + "loss": 0.0086, + "step": 21225 + }, + { + "epoch": 15.85, + "grad_norm": 2.4133591651916504, + "learning_rate": 7.915376884422111e-06, + "loss": 0.0093, + "step": 21250 + }, + { + "epoch": 15.87, + "grad_norm": 1.5227149724960327, + "learning_rate": 7.91286432160804e-06, + "loss": 0.0089, + "step": 21275 + }, + { + "epoch": 15.88, + "grad_norm": 1.9698967933654785, + "learning_rate": 7.91035175879397e-06, + "loss": 0.0094, + "step": 21300 + }, + { + "epoch": 15.9, + "grad_norm": 1.7919690608978271, + "learning_rate": 7.907839195979901e-06, + "loss": 0.009, + "step": 21325 + }, + { + "epoch": 15.92, + "grad_norm": 2.053586959838867, + "learning_rate": 7.90532663316583e-06, + "loss": 0.0096, + "step": 21350 + }, + { + "epoch": 15.94, + "grad_norm": 1.8401426076889038, + "learning_rate": 7.90281407035176e-06, + "loss": 0.0101, + "step": 21375 + }, + { + "epoch": 15.96, + "grad_norm": 2.4755380153656006, + "learning_rate": 7.900301507537689e-06, + "loss": 0.0098, + "step": 21400 + }, + { + "epoch": 15.98, + "grad_norm": 1.8482918739318848, + "learning_rate": 7.897788944723618e-06, + "loss": 0.0092, + "step": 21425 + }, + { + "epoch": 16.0, + "grad_norm": 2.088010549545288, + "learning_rate": 7.895276381909548e-06, + "loss": 0.0099, + "step": 21450 + }, + { + "epoch": 16.01, + "grad_norm": 1.4468934535980225, + "learning_rate": 7.892763819095479e-06, + "loss": 0.0078, + "step": 21475 + }, + { + "epoch": 16.03, + "grad_norm": 2.218935251235962, + "learning_rate": 7.890251256281408e-06, + "loss": 0.0062, + "step": 21500 + }, + { + "epoch": 16.05, + "grad_norm": 1.6065248250961304, + "learning_rate": 7.887738693467337e-06, + "loss": 0.0069, + "step": 21525 + }, + { + "epoch": 16.07, + "grad_norm": 1.3065677881240845, + "learning_rate": 7.885226130653267e-06, + "loss": 0.0061, + "step": 21550 + }, + { + "epoch": 16.09, + "grad_norm": 1.6935917139053345, + "learning_rate": 7.882713567839196e-06, + "loss": 0.0062, + "step": 21575 + }, + { + "epoch": 16.11, + "grad_norm": 3.3058383464813232, + "learning_rate": 7.880201005025127e-06, + "loss": 0.0061, + "step": 21600 + }, + { + "epoch": 16.13, + "grad_norm": 1.0785917043685913, + "learning_rate": 7.877688442211056e-06, + "loss": 0.0061, + "step": 21625 + }, + { + "epoch": 16.14, + "grad_norm": 1.997591495513916, + "learning_rate": 7.875175879396986e-06, + "loss": 0.0064, + "step": 21650 + }, + { + "epoch": 16.16, + "grad_norm": 1.8726153373718262, + "learning_rate": 7.872663316582915e-06, + "loss": 0.0061, + "step": 21675 + }, + { + "epoch": 16.18, + "grad_norm": 1.675241470336914, + "learning_rate": 7.870150753768844e-06, + "loss": 0.0062, + "step": 21700 + }, + { + "epoch": 16.2, + "grad_norm": 1.338757038116455, + "learning_rate": 7.867638190954775e-06, + "loss": 0.0068, + "step": 21725 + }, + { + "epoch": 16.22, + "grad_norm": 1.4911285638809204, + "learning_rate": 7.865125628140705e-06, + "loss": 0.0064, + "step": 21750 + }, + { + "epoch": 16.24, + "grad_norm": 1.6424635648727417, + "learning_rate": 7.862613065326634e-06, + "loss": 0.0071, + "step": 21775 + }, + { + "epoch": 16.26, + "grad_norm": 1.7161250114440918, + "learning_rate": 7.860100502512563e-06, + "loss": 0.0064, + "step": 21800 + }, + { + "epoch": 16.28, + "grad_norm": 1.6127575635910034, + "learning_rate": 7.857587939698493e-06, + "loss": 0.0068, + "step": 21825 + }, + { + "epoch": 16.29, + "grad_norm": 1.5250511169433594, + "learning_rate": 7.855075376884422e-06, + "loss": 0.0068, + "step": 21850 + }, + { + "epoch": 16.31, + "grad_norm": 1.6260666847229004, + "learning_rate": 7.852562814070353e-06, + "loss": 0.0069, + "step": 21875 + }, + { + "epoch": 16.33, + "grad_norm": 1.171433687210083, + "learning_rate": 7.850150753768844e-06, + "loss": 0.0065, + "step": 21900 + }, + { + "epoch": 16.35, + "grad_norm": 1.8016295433044434, + "learning_rate": 7.847638190954775e-06, + "loss": 0.007, + "step": 21925 + }, + { + "epoch": 16.37, + "grad_norm": 1.445003628730774, + "learning_rate": 7.845125628140705e-06, + "loss": 0.0069, + "step": 21950 + }, + { + "epoch": 16.39, + "grad_norm": 2.1213252544403076, + "learning_rate": 7.842613065326634e-06, + "loss": 0.0067, + "step": 21975 + }, + { + "epoch": 16.41, + "grad_norm": 2.07407808303833, + "learning_rate": 7.840100502512563e-06, + "loss": 0.0067, + "step": 22000 + }, + { + "epoch": 16.41, + "eval_loss": 0.21039938926696777, + "eval_runtime": 584.3904, + "eval_samples_per_second": 2.634, + "eval_steps_per_second": 2.634, + "eval_wer": 21.59303412809724, + "step": 22000 + }, + { + "epoch": 16.42, + "grad_norm": 1.563987135887146, + "learning_rate": 7.837587939698493e-06, + "loss": 0.0067, + "step": 22025 + }, + { + "epoch": 16.44, + "grad_norm": 1.6784090995788574, + "learning_rate": 7.835075376884422e-06, + "loss": 0.0076, + "step": 22050 + }, + { + "epoch": 16.46, + "grad_norm": 1.4715943336486816, + "learning_rate": 7.832562814070353e-06, + "loss": 0.0072, + "step": 22075 + }, + { + "epoch": 16.48, + "grad_norm": 2.9437522888183594, + "learning_rate": 7.830050251256282e-06, + "loss": 0.0076, + "step": 22100 + }, + { + "epoch": 16.5, + "grad_norm": 1.9213895797729492, + "learning_rate": 7.827537688442212e-06, + "loss": 0.0069, + "step": 22125 + }, + { + "epoch": 16.52, + "grad_norm": 1.8817442655563354, + "learning_rate": 7.825025125628141e-06, + "loss": 0.0068, + "step": 22150 + }, + { + "epoch": 16.54, + "grad_norm": 1.8839951753616333, + "learning_rate": 7.82251256281407e-06, + "loss": 0.0073, + "step": 22175 + }, + { + "epoch": 16.55, + "grad_norm": 1.7412254810333252, + "learning_rate": 7.820000000000001e-06, + "loss": 0.0067, + "step": 22200 + }, + { + "epoch": 16.57, + "grad_norm": 1.6018824577331543, + "learning_rate": 7.81748743718593e-06, + "loss": 0.0073, + "step": 22225 + }, + { + "epoch": 16.59, + "grad_norm": 2.0350027084350586, + "learning_rate": 7.81497487437186e-06, + "loss": 0.0075, + "step": 22250 + }, + { + "epoch": 16.61, + "grad_norm": 1.411873698234558, + "learning_rate": 7.81246231155779e-06, + "loss": 0.0077, + "step": 22275 + }, + { + "epoch": 16.63, + "grad_norm": 1.52738618850708, + "learning_rate": 7.809949748743719e-06, + "loss": 0.007, + "step": 22300 + }, + { + "epoch": 16.65, + "grad_norm": 1.8119453191757202, + "learning_rate": 7.80743718592965e-06, + "loss": 0.0072, + "step": 22325 + }, + { + "epoch": 16.67, + "grad_norm": 1.2568570375442505, + "learning_rate": 7.804924623115579e-06, + "loss": 0.0066, + "step": 22350 + }, + { + "epoch": 16.69, + "grad_norm": 2.1735363006591797, + "learning_rate": 7.802412060301508e-06, + "loss": 0.007, + "step": 22375 + }, + { + "epoch": 16.7, + "grad_norm": 1.4114694595336914, + "learning_rate": 7.799899497487438e-06, + "loss": 0.0079, + "step": 22400 + }, + { + "epoch": 16.72, + "grad_norm": 1.7781214714050293, + "learning_rate": 7.797386934673367e-06, + "loss": 0.008, + "step": 22425 + }, + { + "epoch": 16.74, + "grad_norm": 1.9170448780059814, + "learning_rate": 7.794874371859296e-06, + "loss": 0.0066, + "step": 22450 + }, + { + "epoch": 16.76, + "grad_norm": 2.2247982025146484, + "learning_rate": 7.792361809045227e-06, + "loss": 0.0073, + "step": 22475 + }, + { + "epoch": 16.78, + "grad_norm": 1.8319737911224365, + "learning_rate": 7.789849246231157e-06, + "loss": 0.007, + "step": 22500 + }, + { + "epoch": 16.8, + "grad_norm": 2.009772300720215, + "learning_rate": 7.787336683417086e-06, + "loss": 0.0074, + "step": 22525 + }, + { + "epoch": 16.82, + "grad_norm": 1.8966844081878662, + "learning_rate": 7.784824120603017e-06, + "loss": 0.0071, + "step": 22550 + }, + { + "epoch": 16.83, + "grad_norm": 1.7694721221923828, + "learning_rate": 7.782311557788945e-06, + "loss": 0.0073, + "step": 22575 + }, + { + "epoch": 16.85, + "grad_norm": 1.6632591485977173, + "learning_rate": 7.779798994974876e-06, + "loss": 0.0068, + "step": 22600 + }, + { + "epoch": 16.87, + "grad_norm": 2.6048595905303955, + "learning_rate": 7.777286432160805e-06, + "loss": 0.0075, + "step": 22625 + }, + { + "epoch": 16.89, + "grad_norm": 2.3275516033172607, + "learning_rate": 7.774773869346734e-06, + "loss": 0.0069, + "step": 22650 + }, + { + "epoch": 16.91, + "grad_norm": 1.402176022529602, + "learning_rate": 7.772261306532664e-06, + "loss": 0.0073, + "step": 22675 + }, + { + "epoch": 16.93, + "grad_norm": 1.3192116022109985, + "learning_rate": 7.769748743718593e-06, + "loss": 0.0073, + "step": 22700 + }, + { + "epoch": 16.95, + "grad_norm": 1.4128471612930298, + "learning_rate": 7.767236180904522e-06, + "loss": 0.008, + "step": 22725 + }, + { + "epoch": 16.96, + "grad_norm": 1.7826400995254517, + "learning_rate": 7.764723618090453e-06, + "loss": 0.007, + "step": 22750 + }, + { + "epoch": 16.98, + "grad_norm": 1.6689640283584595, + "learning_rate": 7.762211055276383e-06, + "loss": 0.0077, + "step": 22775 + }, + { + "epoch": 17.0, + "grad_norm": 1.2625550031661987, + "learning_rate": 7.759698492462312e-06, + "loss": 0.0067, + "step": 22800 + }, + { + "epoch": 17.02, + "grad_norm": 1.2076127529144287, + "learning_rate": 7.757185929648243e-06, + "loss": 0.0052, + "step": 22825 + }, + { + "epoch": 17.04, + "grad_norm": 1.6117262840270996, + "learning_rate": 7.75467336683417e-06, + "loss": 0.0052, + "step": 22850 + }, + { + "epoch": 17.06, + "grad_norm": 1.1802167892456055, + "learning_rate": 7.752160804020102e-06, + "loss": 0.0055, + "step": 22875 + }, + { + "epoch": 17.08, + "grad_norm": 1.4015257358551025, + "learning_rate": 7.749648241206031e-06, + "loss": 0.0049, + "step": 22900 + }, + { + "epoch": 17.1, + "grad_norm": 1.216342568397522, + "learning_rate": 7.74713567839196e-06, + "loss": 0.005, + "step": 22925 + }, + { + "epoch": 17.11, + "grad_norm": 1.106349229812622, + "learning_rate": 7.744623115577891e-06, + "loss": 0.0049, + "step": 22950 + }, + { + "epoch": 17.13, + "grad_norm": 2.354029893875122, + "learning_rate": 7.742110552763819e-06, + "loss": 0.0051, + "step": 22975 + }, + { + "epoch": 17.15, + "grad_norm": 2.2517521381378174, + "learning_rate": 7.73959798994975e-06, + "loss": 0.0048, + "step": 23000 + }, + { + "epoch": 17.15, + "eval_loss": 0.2125159502029419, + "eval_runtime": 585.3409, + "eval_samples_per_second": 2.629, + "eval_steps_per_second": 2.629, + "eval_wer": 21.522907900888267, + "step": 23000 + } + ], + "logging_steps": 25, + "max_steps": 100000, + "num_input_tokens_seen": 0, + "num_train_epochs": 75, + "save_steps": 1000, + "total_flos": 3.622895439151104e+19, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/whisper-tiny/hindi/checkpoint-23000/training_args.bin b/checkpoints/whisper-tiny/hindi/checkpoint-23000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bcbd61b6b1758f0ea20d68e472d41ff902c5f679 --- /dev/null +++ b/checkpoints/whisper-tiny/hindi/checkpoint-23000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bb0d3c7fe17680c45c711e779f46d2dc266a47ba4914294442bd4383cff2368 +size 4667 diff --git a/checkpoints/whisper-tiny/kannada/checkpoint-23000/config.json b/checkpoints/whisper-tiny/kannada/checkpoint-23000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8904a6303d55e7641f727eb10bafc9da187e11f2 --- /dev/null +++ b/checkpoints/whisper-tiny/kannada/checkpoint-23000/config.json @@ -0,0 +1,154 @@ +{ + "_name_or_path": "openai/whisper-tiny", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 384, + "decoder_attention_heads": 6, + "decoder_ffn_dim": 1536, + "decoder_layerdrop": 0.0, + "decoder_layers": 4, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 6, + "encoder_ffn_dim": 1536, + "encoder_layerdrop": 0.0, + "encoder_layers": 4, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50306 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 4, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "torch_dtype": "float32", + "transformers_version": "4.39.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoints/whisper-tiny/kannada/checkpoint-23000/generation_config.json b/checkpoints/whisper-tiny/kannada/checkpoint-23000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c4857895fba6cdefb862460b5d33969e1892aa71 --- /dev/null +++ b/checkpoints/whisper-tiny/kannada/checkpoint-23000/generation_config.json @@ -0,0 +1,248 @@ +{ + "alignment_heads": [ + [ + 2, + 2 + ], + [ + 3, + 0 + ], + [ + 3, + 2 + ], + [ + 3, + 3 + ], + [ + 3, + 4 + ], + [ + 3, + 5 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.39.2" +} diff --git a/checkpoints/whisper-tiny/kannada/checkpoint-23000/model.safetensors b/checkpoints/whisper-tiny/kannada/checkpoint-23000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f20f46b4e3580446f679fd777ce3dcb00cd6e1ac --- /dev/null +++ b/checkpoints/whisper-tiny/kannada/checkpoint-23000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52e679ad5fb91858f176c283595e1201b94784e3f0890f9339039486b4b68786 +size 151061672 diff --git a/checkpoints/whisper-tiny/kannada/checkpoint-23000/optimizer.pt b/checkpoints/whisper-tiny/kannada/checkpoint-23000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d7c5f758fc6162114c70dac2ee8ca8b35f1f86cc --- /dev/null +++ b/checkpoints/whisper-tiny/kannada/checkpoint-23000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8940c05567108bc33ddac7f3beae8f2cda4e3fff6736df8556d7a64c251629cb +size 297615749 diff --git a/checkpoints/whisper-tiny/kannada/checkpoint-23000/preprocessor_config.json b/checkpoints/whisper-tiny/kannada/checkpoint-23000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoints/whisper-tiny/kannada/checkpoint-23000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoints/whisper-tiny/kannada/checkpoint-23000/rng_state.pth b/checkpoints/whisper-tiny/kannada/checkpoint-23000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b7a32ba8704c5abde9e985db3ed33f0e6f4e24d0 --- /dev/null +++ b/checkpoints/whisper-tiny/kannada/checkpoint-23000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7454959d02ca5f5c98111f4d96721b021192ab7d312b7480effa79f1d997876 +size 14575 diff --git a/checkpoints/whisper-tiny/kannada/checkpoint-23000/scheduler.pt b/checkpoints/whisper-tiny/kannada/checkpoint-23000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4c4df4212a49d07553ec0b89198489848d8b1383 --- /dev/null +++ b/checkpoints/whisper-tiny/kannada/checkpoint-23000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1760ffbaa3bb511449a8d86fee85baa13c35110f1c7b476534e62ef575939478 +size 627 diff --git a/checkpoints/whisper-tiny/kannada/checkpoint-23000/trainer_state.json b/checkpoints/whisper-tiny/kannada/checkpoint-23000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4e8a947938878fbe4b3b57d7d47e1d207f742fa5 --- /dev/null +++ b/checkpoints/whisper-tiny/kannada/checkpoint-23000/trainer_state.json @@ -0,0 +1,6668 @@ +{ + "best_metric": 42.65622368199427, + "best_model_checkpoint": "results/whisper-tiny/kannada/checkpoint-13000", + "epoch": 17.15137956748695, + "eval_steps": 1000, + "global_step": 23000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "grad_norm": 40.25240707397461, + "learning_rate": 4.4e-07, + "loss": 2.8733, + "step": 25 + }, + { + "epoch": 0.04, + "grad_norm": 14.837316513061523, + "learning_rate": 9.400000000000001e-07, + "loss": 2.5746, + "step": 50 + }, + { + "epoch": 0.06, + "grad_norm": 7.687209606170654, + "learning_rate": 1.44e-06, + "loss": 2.2413, + "step": 75 + }, + { + "epoch": 0.07, + "grad_norm": 6.452078819274902, + "learning_rate": 1.94e-06, + "loss": 1.9716, + "step": 100 + }, + { + "epoch": 0.09, + "grad_norm": 4.549066543579102, + "learning_rate": 2.4400000000000004e-06, + "loss": 1.7604, + "step": 125 + }, + { + "epoch": 0.11, + "grad_norm": 5.471508979797363, + "learning_rate": 2.9400000000000002e-06, + "loss": 1.6618, + "step": 150 + }, + { + "epoch": 0.13, + "grad_norm": 5.876540184020996, + "learning_rate": 3.44e-06, + "loss": 1.6022, + "step": 175 + }, + { + "epoch": 0.15, + "grad_norm": 5.24710750579834, + "learning_rate": 3.94e-06, + "loss": 1.5584, + "step": 200 + }, + { + "epoch": 0.17, + "grad_norm": 6.607402324676514, + "learning_rate": 4.440000000000001e-06, + "loss": 1.5177, + "step": 225 + }, + { + "epoch": 0.19, + "grad_norm": 7.097057342529297, + "learning_rate": 4.94e-06, + "loss": 1.4865, + "step": 250 + }, + { + "epoch": 0.21, + "grad_norm": 6.310657978057861, + "learning_rate": 5.4400000000000004e-06, + "loss": 1.4546, + "step": 275 + }, + { + "epoch": 0.22, + "grad_norm": 9.082182884216309, + "learning_rate": 5.94e-06, + "loss": 1.4274, + "step": 300 + }, + { + "epoch": 0.24, + "grad_norm": 8.356383323669434, + "learning_rate": 6.440000000000001e-06, + "loss": 1.4101, + "step": 325 + }, + { + "epoch": 0.26, + "grad_norm": 11.813643455505371, + "learning_rate": 6.9400000000000005e-06, + "loss": 1.3788, + "step": 350 + }, + { + "epoch": 0.28, + "grad_norm": 10.426423072814941, + "learning_rate": 7.440000000000001e-06, + "loss": 1.3496, + "step": 375 + }, + { + "epoch": 0.3, + "grad_norm": 8.947437286376953, + "learning_rate": 7.94e-06, + "loss": 1.3209, + "step": 400 + }, + { + "epoch": 0.32, + "grad_norm": 7.913174152374268, + "learning_rate": 8.44e-06, + "loss": 1.2748, + "step": 425 + }, + { + "epoch": 0.34, + "grad_norm": 11.724613189697266, + "learning_rate": 8.94e-06, + "loss": 1.1937, + "step": 450 + }, + { + "epoch": 0.35, + "grad_norm": 20.35866928100586, + "learning_rate": 9.440000000000001e-06, + "loss": 1.0563, + "step": 475 + }, + { + "epoch": 0.37, + "grad_norm": 9.35606575012207, + "learning_rate": 9.940000000000001e-06, + "loss": 0.9029, + "step": 500 + }, + { + "epoch": 0.39, + "grad_norm": 8.49641227722168, + "learning_rate": 9.997788944723618e-06, + "loss": 0.7288, + "step": 525 + }, + { + "epoch": 0.41, + "grad_norm": 7.754798889160156, + "learning_rate": 9.99527638190955e-06, + "loss": 0.5919, + "step": 550 + }, + { + "epoch": 0.43, + "grad_norm": 6.644587993621826, + "learning_rate": 9.992763819095477e-06, + "loss": 0.5056, + "step": 575 + }, + { + "epoch": 0.45, + "grad_norm": 8.433005332946777, + "learning_rate": 9.990251256281408e-06, + "loss": 0.4389, + "step": 600 + }, + { + "epoch": 0.47, + "grad_norm": 6.307461738586426, + "learning_rate": 9.987738693467337e-06, + "loss": 0.4068, + "step": 625 + }, + { + "epoch": 0.48, + "grad_norm": 5.551212787628174, + "learning_rate": 9.985226130653267e-06, + "loss": 0.3847, + "step": 650 + }, + { + "epoch": 0.5, + "grad_norm": 6.110048294067383, + "learning_rate": 9.982713567839198e-06, + "loss": 0.3661, + "step": 675 + }, + { + "epoch": 0.52, + "grad_norm": 6.122501373291016, + "learning_rate": 9.980201005025127e-06, + "loss": 0.3558, + "step": 700 + }, + { + "epoch": 0.54, + "grad_norm": 5.635471343994141, + "learning_rate": 9.977688442211056e-06, + "loss": 0.3448, + "step": 725 + }, + { + "epoch": 0.56, + "grad_norm": 6.95956563949585, + "learning_rate": 9.975175879396986e-06, + "loss": 0.3319, + "step": 750 + }, + { + "epoch": 0.58, + "grad_norm": 4.571022987365723, + "learning_rate": 9.972663316582915e-06, + "loss": 0.3202, + "step": 775 + }, + { + "epoch": 0.6, + "grad_norm": 4.143917083740234, + "learning_rate": 9.970150753768844e-06, + "loss": 0.311, + "step": 800 + }, + { + "epoch": 0.62, + "grad_norm": 4.9314656257629395, + "learning_rate": 9.967638190954775e-06, + "loss": 0.3046, + "step": 825 + }, + { + "epoch": 0.63, + "grad_norm": 3.9453394412994385, + "learning_rate": 9.965125628140703e-06, + "loss": 0.2961, + "step": 850 + }, + { + "epoch": 0.65, + "grad_norm": 4.675088405609131, + "learning_rate": 9.962613065326634e-06, + "loss": 0.291, + "step": 875 + }, + { + "epoch": 0.67, + "grad_norm": 4.770447254180908, + "learning_rate": 9.960100502512563e-06, + "loss": 0.2941, + "step": 900 + }, + { + "epoch": 0.69, + "grad_norm": 4.900439262390137, + "learning_rate": 9.957587939698493e-06, + "loss": 0.284, + "step": 925 + }, + { + "epoch": 0.71, + "grad_norm": 4.612481117248535, + "learning_rate": 9.955075376884424e-06, + "loss": 0.2799, + "step": 950 + }, + { + "epoch": 0.73, + "grad_norm": 4.1810622215271, + "learning_rate": 9.952562814070353e-06, + "loss": 0.2724, + "step": 975 + }, + { + "epoch": 0.75, + "grad_norm": 4.587222099304199, + "learning_rate": 9.950050251256282e-06, + "loss": 0.276, + "step": 1000 + }, + { + "epoch": 0.75, + "eval_loss": 0.1996370255947113, + "eval_runtime": 881.1977, + "eval_samples_per_second": 1.623, + "eval_steps_per_second": 1.623, + "eval_wer": 185.35455617315142, + "step": 1000 + }, + { + "epoch": 0.76, + "grad_norm": 4.766111850738525, + "learning_rate": 9.947537688442212e-06, + "loss": 0.2691, + "step": 1025 + }, + { + "epoch": 0.78, + "grad_norm": 5.039989948272705, + "learning_rate": 9.945025125628141e-06, + "loss": 0.2732, + "step": 1050 + }, + { + "epoch": 0.8, + "grad_norm": 5.168909072875977, + "learning_rate": 9.94251256281407e-06, + "loss": 0.2677, + "step": 1075 + }, + { + "epoch": 0.82, + "grad_norm": 4.653995037078857, + "learning_rate": 9.940000000000001e-06, + "loss": 0.2577, + "step": 1100 + }, + { + "epoch": 0.84, + "grad_norm": 5.03933048248291, + "learning_rate": 9.93748743718593e-06, + "loss": 0.2594, + "step": 1125 + }, + { + "epoch": 0.86, + "grad_norm": 4.273940563201904, + "learning_rate": 9.93497487437186e-06, + "loss": 0.2481, + "step": 1150 + }, + { + "epoch": 0.88, + "grad_norm": 4.954524993896484, + "learning_rate": 9.93246231155779e-06, + "loss": 0.2558, + "step": 1175 + }, + { + "epoch": 0.89, + "grad_norm": 3.3275134563446045, + "learning_rate": 9.929949748743719e-06, + "loss": 0.2434, + "step": 1200 + }, + { + "epoch": 0.91, + "grad_norm": 5.204573154449463, + "learning_rate": 9.92743718592965e-06, + "loss": 0.2448, + "step": 1225 + }, + { + "epoch": 0.93, + "grad_norm": 5.293919086456299, + "learning_rate": 9.924924623115579e-06, + "loss": 0.2539, + "step": 1250 + }, + { + "epoch": 0.95, + "grad_norm": 4.689702987670898, + "learning_rate": 9.922412060301508e-06, + "loss": 0.2382, + "step": 1275 + }, + { + "epoch": 0.97, + "grad_norm": 4.284536838531494, + "learning_rate": 9.91989949748744e-06, + "loss": 0.2429, + "step": 1300 + }, + { + "epoch": 0.99, + "grad_norm": 3.931393623352051, + "learning_rate": 9.917386934673367e-06, + "loss": 0.2447, + "step": 1325 + }, + { + "epoch": 1.01, + "grad_norm": 3.6255593299865723, + "learning_rate": 9.914874371859298e-06, + "loss": 0.2327, + "step": 1350 + }, + { + "epoch": 1.03, + "grad_norm": 3.935384511947632, + "learning_rate": 9.912361809045227e-06, + "loss": 0.2236, + "step": 1375 + }, + { + "epoch": 1.04, + "grad_norm": 4.718512058258057, + "learning_rate": 9.909849246231157e-06, + "loss": 0.2292, + "step": 1400 + }, + { + "epoch": 1.06, + "grad_norm": 3.8811581134796143, + "learning_rate": 9.907336683417086e-06, + "loss": 0.2306, + "step": 1425 + }, + { + "epoch": 1.08, + "grad_norm": 3.8438000679016113, + "learning_rate": 9.904824120603015e-06, + "loss": 0.2267, + "step": 1450 + }, + { + "epoch": 1.1, + "grad_norm": 3.841702699661255, + "learning_rate": 9.902311557788945e-06, + "loss": 0.217, + "step": 1475 + }, + { + "epoch": 1.12, + "grad_norm": 3.3814170360565186, + "learning_rate": 9.899798994974876e-06, + "loss": 0.2166, + "step": 1500 + }, + { + "epoch": 1.14, + "grad_norm": 4.266124725341797, + "learning_rate": 9.897286432160805e-06, + "loss": 0.2197, + "step": 1525 + }, + { + "epoch": 1.16, + "grad_norm": 3.69116473197937, + "learning_rate": 9.894773869346734e-06, + "loss": 0.2212, + "step": 1550 + }, + { + "epoch": 1.17, + "grad_norm": 3.6657826900482178, + "learning_rate": 9.892261306532665e-06, + "loss": 0.2198, + "step": 1575 + }, + { + "epoch": 1.19, + "grad_norm": 6.318333625793457, + "learning_rate": 9.889748743718593e-06, + "loss": 0.2182, + "step": 1600 + }, + { + "epoch": 1.21, + "grad_norm": 4.844775676727295, + "learning_rate": 9.887236180904524e-06, + "loss": 0.2141, + "step": 1625 + }, + { + "epoch": 1.23, + "grad_norm": 3.495378255844116, + "learning_rate": 9.884723618090453e-06, + "loss": 0.218, + "step": 1650 + }, + { + "epoch": 1.25, + "grad_norm": 3.420531988143921, + "learning_rate": 9.882211055276383e-06, + "loss": 0.212, + "step": 1675 + }, + { + "epoch": 1.27, + "grad_norm": 3.743772268295288, + "learning_rate": 9.879698492462312e-06, + "loss": 0.2073, + "step": 1700 + }, + { + "epoch": 1.29, + "grad_norm": 3.7927937507629395, + "learning_rate": 9.877185929648241e-06, + "loss": 0.2079, + "step": 1725 + }, + { + "epoch": 1.3, + "grad_norm": 4.131795406341553, + "learning_rate": 9.874673366834172e-06, + "loss": 0.2059, + "step": 1750 + }, + { + "epoch": 1.32, + "grad_norm": 3.9832723140716553, + "learning_rate": 9.872160804020102e-06, + "loss": 0.2095, + "step": 1775 + }, + { + "epoch": 1.34, + "grad_norm": 4.179487705230713, + "learning_rate": 9.869648241206031e-06, + "loss": 0.2038, + "step": 1800 + }, + { + "epoch": 1.36, + "grad_norm": 3.59555983543396, + "learning_rate": 9.86713567839196e-06, + "loss": 0.2104, + "step": 1825 + }, + { + "epoch": 1.38, + "grad_norm": 4.045324325561523, + "learning_rate": 9.864623115577891e-06, + "loss": 0.208, + "step": 1850 + }, + { + "epoch": 1.4, + "grad_norm": 3.8177411556243896, + "learning_rate": 9.862110552763819e-06, + "loss": 0.2028, + "step": 1875 + }, + { + "epoch": 1.42, + "grad_norm": 3.908677339553833, + "learning_rate": 9.85959798994975e-06, + "loss": 0.209, + "step": 1900 + }, + { + "epoch": 1.44, + "grad_norm": 4.026361465454102, + "learning_rate": 9.85708542713568e-06, + "loss": 0.2045, + "step": 1925 + }, + { + "epoch": 1.45, + "grad_norm": 3.5339152812957764, + "learning_rate": 9.854572864321609e-06, + "loss": 0.2061, + "step": 1950 + }, + { + "epoch": 1.47, + "grad_norm": 3.208533763885498, + "learning_rate": 9.85206030150754e-06, + "loss": 0.1976, + "step": 1975 + }, + { + "epoch": 1.49, + "grad_norm": 4.069692134857178, + "learning_rate": 9.849547738693467e-06, + "loss": 0.1978, + "step": 2000 + }, + { + "epoch": 1.49, + "eval_loss": 0.14536768198013306, + "eval_runtime": 859.7299, + "eval_samples_per_second": 1.663, + "eval_steps_per_second": 1.663, + "eval_wer": 166.75088428499242, + "step": 2000 + }, + { + "epoch": 1.51, + "grad_norm": 4.06554651260376, + "learning_rate": 9.847035175879398e-06, + "loss": 0.1988, + "step": 2025 + }, + { + "epoch": 1.53, + "grad_norm": 3.8319058418273926, + "learning_rate": 9.84462311557789e-06, + "loss": 0.1949, + "step": 2050 + }, + { + "epoch": 1.55, + "grad_norm": 3.909388780593872, + "learning_rate": 9.842110552763819e-06, + "loss": 0.1948, + "step": 2075 + }, + { + "epoch": 1.57, + "grad_norm": 3.4232914447784424, + "learning_rate": 9.83959798994975e-06, + "loss": 0.1971, + "step": 2100 + }, + { + "epoch": 1.58, + "grad_norm": 4.4802565574646, + "learning_rate": 9.83708542713568e-06, + "loss": 0.1948, + "step": 2125 + }, + { + "epoch": 1.6, + "grad_norm": 3.7474277019500732, + "learning_rate": 9.834572864321609e-06, + "loss": 0.1943, + "step": 2150 + }, + { + "epoch": 1.62, + "grad_norm": 5.71710729598999, + "learning_rate": 9.832060301507538e-06, + "loss": 0.1933, + "step": 2175 + }, + { + "epoch": 1.64, + "grad_norm": 4.31823205947876, + "learning_rate": 9.829547738693467e-06, + "loss": 0.2012, + "step": 2200 + }, + { + "epoch": 1.66, + "grad_norm": 3.359407901763916, + "learning_rate": 9.827035175879398e-06, + "loss": 0.1946, + "step": 2225 + }, + { + "epoch": 1.68, + "grad_norm": 5.8007330894470215, + "learning_rate": 9.824522613065328e-06, + "loss": 0.1956, + "step": 2250 + }, + { + "epoch": 1.7, + "grad_norm": 3.252018690109253, + "learning_rate": 9.822010050251257e-06, + "loss": 0.1899, + "step": 2275 + }, + { + "epoch": 1.72, + "grad_norm": 5.584671974182129, + "learning_rate": 9.819497487437186e-06, + "loss": 0.1925, + "step": 2300 + }, + { + "epoch": 1.73, + "grad_norm": 3.8987181186676025, + "learning_rate": 9.816984924623116e-06, + "loss": 0.1873, + "step": 2325 + }, + { + "epoch": 1.75, + "grad_norm": 4.848237037658691, + "learning_rate": 9.814472361809047e-06, + "loss": 0.1879, + "step": 2350 + }, + { + "epoch": 1.77, + "grad_norm": 3.5970137119293213, + "learning_rate": 9.811959798994976e-06, + "loss": 0.1904, + "step": 2375 + }, + { + "epoch": 1.79, + "grad_norm": 4.078869819641113, + "learning_rate": 9.809447236180905e-06, + "loss": 0.1906, + "step": 2400 + }, + { + "epoch": 1.81, + "grad_norm": 4.647728443145752, + "learning_rate": 9.806934673366835e-06, + "loss": 0.1863, + "step": 2425 + }, + { + "epoch": 1.83, + "grad_norm": 5.3670783042907715, + "learning_rate": 9.804422110552764e-06, + "loss": 0.1818, + "step": 2450 + }, + { + "epoch": 1.85, + "grad_norm": 4.0041093826293945, + "learning_rate": 9.801909547738693e-06, + "loss": 0.184, + "step": 2475 + }, + { + "epoch": 1.86, + "grad_norm": 3.1168735027313232, + "learning_rate": 9.799396984924624e-06, + "loss": 0.1849, + "step": 2500 + }, + { + "epoch": 1.88, + "grad_norm": 3.316239356994629, + "learning_rate": 9.796884422110554e-06, + "loss": 0.1841, + "step": 2525 + }, + { + "epoch": 1.9, + "grad_norm": 3.5666942596435547, + "learning_rate": 9.794371859296483e-06, + "loss": 0.1876, + "step": 2550 + }, + { + "epoch": 1.92, + "grad_norm": 3.7967987060546875, + "learning_rate": 9.791859296482414e-06, + "loss": 0.1837, + "step": 2575 + }, + { + "epoch": 1.94, + "grad_norm": 4.934964656829834, + "learning_rate": 9.789346733668342e-06, + "loss": 0.1813, + "step": 2600 + }, + { + "epoch": 1.96, + "grad_norm": 3.415073871612549, + "learning_rate": 9.786834170854273e-06, + "loss": 0.1806, + "step": 2625 + }, + { + "epoch": 1.98, + "grad_norm": 4.888739109039307, + "learning_rate": 9.784321608040202e-06, + "loss": 0.1785, + "step": 2650 + }, + { + "epoch": 1.99, + "grad_norm": 4.397167682647705, + "learning_rate": 9.781809045226131e-06, + "loss": 0.1853, + "step": 2675 + }, + { + "epoch": 2.01, + "grad_norm": 3.36556077003479, + "learning_rate": 9.77929648241206e-06, + "loss": 0.1748, + "step": 2700 + }, + { + "epoch": 2.03, + "grad_norm": 4.873407363891602, + "learning_rate": 9.77678391959799e-06, + "loss": 0.1688, + "step": 2725 + }, + { + "epoch": 2.05, + "grad_norm": 3.3307507038116455, + "learning_rate": 9.774271356783921e-06, + "loss": 0.1704, + "step": 2750 + }, + { + "epoch": 2.07, + "grad_norm": 3.619224786758423, + "learning_rate": 9.77175879396985e-06, + "loss": 0.1682, + "step": 2775 + }, + { + "epoch": 2.09, + "grad_norm": 6.015592098236084, + "learning_rate": 9.76924623115578e-06, + "loss": 0.1685, + "step": 2800 + }, + { + "epoch": 2.11, + "grad_norm": 4.117185115814209, + "learning_rate": 9.766733668341709e-06, + "loss": 0.1655, + "step": 2825 + }, + { + "epoch": 2.13, + "grad_norm": 3.6932685375213623, + "learning_rate": 9.76422110552764e-06, + "loss": 0.1696, + "step": 2850 + }, + { + "epoch": 2.14, + "grad_norm": 3.161186456680298, + "learning_rate": 9.761708542713568e-06, + "loss": 0.1676, + "step": 2875 + }, + { + "epoch": 2.16, + "grad_norm": 4.813553333282471, + "learning_rate": 9.759195979899499e-06, + "loss": 0.1669, + "step": 2900 + }, + { + "epoch": 2.18, + "grad_norm": 3.5299124717712402, + "learning_rate": 9.756683417085428e-06, + "loss": 0.1676, + "step": 2925 + }, + { + "epoch": 2.2, + "grad_norm": 3.0108706951141357, + "learning_rate": 9.754170854271357e-06, + "loss": 0.1666, + "step": 2950 + }, + { + "epoch": 2.22, + "grad_norm": 3.8795621395111084, + "learning_rate": 9.751658291457288e-06, + "loss": 0.1597, + "step": 2975 + }, + { + "epoch": 2.24, + "grad_norm": 3.7273056507110596, + "learning_rate": 9.749145728643216e-06, + "loss": 0.1657, + "step": 3000 + }, + { + "epoch": 2.24, + "eval_loss": 0.12594151496887207, + "eval_runtime": 863.9481, + "eval_samples_per_second": 1.655, + "eval_steps_per_second": 1.655, + "eval_wer": 110.62826343271013, + "step": 3000 + }, + { + "epoch": 2.26, + "grad_norm": 3.2462053298950195, + "learning_rate": 9.746633165829147e-06, + "loss": 0.1691, + "step": 3025 + }, + { + "epoch": 2.27, + "grad_norm": 2.944606065750122, + "learning_rate": 9.744120603015076e-06, + "loss": 0.1639, + "step": 3050 + }, + { + "epoch": 2.29, + "grad_norm": 3.623324155807495, + "learning_rate": 9.741608040201006e-06, + "loss": 0.1699, + "step": 3075 + }, + { + "epoch": 2.31, + "grad_norm": 4.068861484527588, + "learning_rate": 9.739095477386935e-06, + "loss": 0.1688, + "step": 3100 + }, + { + "epoch": 2.33, + "grad_norm": 4.898096084594727, + "learning_rate": 9.736582914572866e-06, + "loss": 0.1596, + "step": 3125 + }, + { + "epoch": 2.35, + "grad_norm": 3.7359135150909424, + "learning_rate": 9.734070351758794e-06, + "loss": 0.1629, + "step": 3150 + }, + { + "epoch": 2.37, + "grad_norm": 3.598522186279297, + "learning_rate": 9.731557788944725e-06, + "loss": 0.1618, + "step": 3175 + }, + { + "epoch": 2.39, + "grad_norm": 3.5260026454925537, + "learning_rate": 9.729045226130654e-06, + "loss": 0.1604, + "step": 3200 + }, + { + "epoch": 2.4, + "grad_norm": 3.492182970046997, + "learning_rate": 9.726532663316583e-06, + "loss": 0.163, + "step": 3225 + }, + { + "epoch": 2.42, + "grad_norm": 3.374004602432251, + "learning_rate": 9.724020100502514e-06, + "loss": 0.1613, + "step": 3250 + }, + { + "epoch": 2.44, + "grad_norm": 4.1993231773376465, + "learning_rate": 9.721507537688444e-06, + "loss": 0.1598, + "step": 3275 + }, + { + "epoch": 2.46, + "grad_norm": 5.450031757354736, + "learning_rate": 9.718994974874373e-06, + "loss": 0.1615, + "step": 3300 + }, + { + "epoch": 2.48, + "grad_norm": 3.9467153549194336, + "learning_rate": 9.716482412060302e-06, + "loss": 0.1646, + "step": 3325 + }, + { + "epoch": 2.5, + "grad_norm": 3.712852954864502, + "learning_rate": 9.713969849246232e-06, + "loss": 0.1623, + "step": 3350 + }, + { + "epoch": 2.52, + "grad_norm": 3.6807360649108887, + "learning_rate": 9.711457286432163e-06, + "loss": 0.1547, + "step": 3375 + }, + { + "epoch": 2.54, + "grad_norm": 3.671624183654785, + "learning_rate": 9.708944723618092e-06, + "loss": 0.1561, + "step": 3400 + }, + { + "epoch": 2.55, + "grad_norm": 3.013315200805664, + "learning_rate": 9.706432160804021e-06, + "loss": 0.1545, + "step": 3425 + }, + { + "epoch": 2.57, + "grad_norm": 3.1106438636779785, + "learning_rate": 9.70391959798995e-06, + "loss": 0.1601, + "step": 3450 + }, + { + "epoch": 2.59, + "grad_norm": 3.231281042098999, + "learning_rate": 9.70140703517588e-06, + "loss": 0.1623, + "step": 3475 + }, + { + "epoch": 2.61, + "grad_norm": 2.9513332843780518, + "learning_rate": 9.698894472361809e-06, + "loss": 0.1517, + "step": 3500 + }, + { + "epoch": 2.63, + "grad_norm": 3.1729929447174072, + "learning_rate": 9.69638190954774e-06, + "loss": 0.1544, + "step": 3525 + }, + { + "epoch": 2.65, + "grad_norm": 5.006084442138672, + "learning_rate": 9.69386934673367e-06, + "loss": 0.161, + "step": 3550 + }, + { + "epoch": 2.67, + "grad_norm": 3.6920952796936035, + "learning_rate": 9.691356783919599e-06, + "loss": 0.1579, + "step": 3575 + }, + { + "epoch": 2.68, + "grad_norm": 3.2814626693725586, + "learning_rate": 9.688844221105528e-06, + "loss": 0.1581, + "step": 3600 + }, + { + "epoch": 2.7, + "grad_norm": 3.5771803855895996, + "learning_rate": 9.686331658291457e-06, + "loss": 0.1573, + "step": 3625 + }, + { + "epoch": 2.72, + "grad_norm": 3.598724365234375, + "learning_rate": 9.683819095477388e-06, + "loss": 0.1555, + "step": 3650 + }, + { + "epoch": 2.74, + "grad_norm": 3.747723340988159, + "learning_rate": 9.681306532663318e-06, + "loss": 0.1571, + "step": 3675 + }, + { + "epoch": 2.76, + "grad_norm": 3.455930471420288, + "learning_rate": 9.678793969849247e-06, + "loss": 0.1568, + "step": 3700 + }, + { + "epoch": 2.78, + "grad_norm": 3.2310731410980225, + "learning_rate": 9.676281407035176e-06, + "loss": 0.1578, + "step": 3725 + }, + { + "epoch": 2.8, + "grad_norm": 3.125164031982422, + "learning_rate": 9.673768844221106e-06, + "loss": 0.1572, + "step": 3750 + }, + { + "epoch": 2.82, + "grad_norm": 3.170626640319824, + "learning_rate": 9.671256281407035e-06, + "loss": 0.1523, + "step": 3775 + }, + { + "epoch": 2.83, + "grad_norm": 3.85634183883667, + "learning_rate": 9.668743718592966e-06, + "loss": 0.1592, + "step": 3800 + }, + { + "epoch": 2.85, + "grad_norm": 3.448958158493042, + "learning_rate": 9.666231155778895e-06, + "loss": 0.1512, + "step": 3825 + }, + { + "epoch": 2.87, + "grad_norm": 2.96707820892334, + "learning_rate": 9.663718592964825e-06, + "loss": 0.1488, + "step": 3850 + }, + { + "epoch": 2.89, + "grad_norm": 4.030455589294434, + "learning_rate": 9.661206030150754e-06, + "loss": 0.1519, + "step": 3875 + }, + { + "epoch": 2.91, + "grad_norm": 3.8180503845214844, + "learning_rate": 9.658693467336683e-06, + "loss": 0.152, + "step": 3900 + }, + { + "epoch": 2.93, + "grad_norm": 3.0710768699645996, + "learning_rate": 9.656180904522614e-06, + "loss": 0.1513, + "step": 3925 + }, + { + "epoch": 2.95, + "grad_norm": 3.9558193683624268, + "learning_rate": 9.653668341708544e-06, + "loss": 0.1533, + "step": 3950 + }, + { + "epoch": 2.96, + "grad_norm": 3.843493938446045, + "learning_rate": 9.651155778894473e-06, + "loss": 0.1548, + "step": 3975 + }, + { + "epoch": 2.98, + "grad_norm": 3.4822869300842285, + "learning_rate": 9.648643216080404e-06, + "loss": 0.1538, + "step": 4000 + }, + { + "epoch": 2.98, + "eval_loss": 0.11330027878284454, + "eval_runtime": 868.557, + "eval_samples_per_second": 1.646, + "eval_steps_per_second": 1.646, + "eval_wer": 97.23766211891528, + "step": 4000 + }, + { + "epoch": 3.0, + "grad_norm": 3.898665428161621, + "learning_rate": 9.646130653266332e-06, + "loss": 0.1518, + "step": 4025 + }, + { + "epoch": 3.02, + "grad_norm": 3.3252246379852295, + "learning_rate": 9.643618090452263e-06, + "loss": 0.141, + "step": 4050 + }, + { + "epoch": 3.04, + "grad_norm": 2.971804141998291, + "learning_rate": 9.641105527638192e-06, + "loss": 0.1394, + "step": 4075 + }, + { + "epoch": 3.06, + "grad_norm": 2.5654091835021973, + "learning_rate": 9.638592964824121e-06, + "loss": 0.1398, + "step": 4100 + }, + { + "epoch": 3.08, + "grad_norm": 3.226285457611084, + "learning_rate": 9.63608040201005e-06, + "loss": 0.142, + "step": 4125 + }, + { + "epoch": 3.09, + "grad_norm": 3.3913159370422363, + "learning_rate": 9.63356783919598e-06, + "loss": 0.1419, + "step": 4150 + }, + { + "epoch": 3.11, + "grad_norm": 3.4066696166992188, + "learning_rate": 9.63105527638191e-06, + "loss": 0.1407, + "step": 4175 + }, + { + "epoch": 3.13, + "grad_norm": 3.3519022464752197, + "learning_rate": 9.62854271356784e-06, + "loss": 0.1419, + "step": 4200 + }, + { + "epoch": 3.15, + "grad_norm": 3.84570574760437, + "learning_rate": 9.62603015075377e-06, + "loss": 0.1384, + "step": 4225 + }, + { + "epoch": 3.17, + "grad_norm": 2.4297237396240234, + "learning_rate": 9.623517587939699e-06, + "loss": 0.1393, + "step": 4250 + }, + { + "epoch": 3.19, + "grad_norm": 2.716996192932129, + "learning_rate": 9.62100502512563e-06, + "loss": 0.1378, + "step": 4275 + }, + { + "epoch": 3.21, + "grad_norm": 3.032196521759033, + "learning_rate": 9.618492462311558e-06, + "loss": 0.1351, + "step": 4300 + }, + { + "epoch": 3.23, + "grad_norm": 3.2521302700042725, + "learning_rate": 9.615979899497489e-06, + "loss": 0.1383, + "step": 4325 + }, + { + "epoch": 3.24, + "grad_norm": 4.257608890533447, + "learning_rate": 9.613467336683418e-06, + "loss": 0.1386, + "step": 4350 + }, + { + "epoch": 3.26, + "grad_norm": 3.2418127059936523, + "learning_rate": 9.610954773869347e-06, + "loss": 0.1392, + "step": 4375 + }, + { + "epoch": 3.28, + "grad_norm": 4.032801151275635, + "learning_rate": 9.608442211055277e-06, + "loss": 0.1363, + "step": 4400 + }, + { + "epoch": 3.3, + "grad_norm": 3.7143077850341797, + "learning_rate": 9.605929648241206e-06, + "loss": 0.1419, + "step": 4425 + }, + { + "epoch": 3.32, + "grad_norm": 3.218097686767578, + "learning_rate": 9.603417085427137e-06, + "loss": 0.1386, + "step": 4450 + }, + { + "epoch": 3.34, + "grad_norm": 3.4057090282440186, + "learning_rate": 9.600904522613066e-06, + "loss": 0.1387, + "step": 4475 + }, + { + "epoch": 3.36, + "grad_norm": 4.470545291900635, + "learning_rate": 9.598391959798996e-06, + "loss": 0.1359, + "step": 4500 + }, + { + "epoch": 3.37, + "grad_norm": 3.358156442642212, + "learning_rate": 9.595879396984925e-06, + "loss": 0.1379, + "step": 4525 + }, + { + "epoch": 3.39, + "grad_norm": 3.367884397506714, + "learning_rate": 9.593366834170856e-06, + "loss": 0.1377, + "step": 4550 + }, + { + "epoch": 3.41, + "grad_norm": 3.0671370029449463, + "learning_rate": 9.590854271356784e-06, + "loss": 0.1358, + "step": 4575 + }, + { + "epoch": 3.43, + "grad_norm": 2.9402806758880615, + "learning_rate": 9.588341708542715e-06, + "loss": 0.1376, + "step": 4600 + }, + { + "epoch": 3.45, + "grad_norm": 3.424687623977661, + "learning_rate": 9.585829145728644e-06, + "loss": 0.1355, + "step": 4625 + }, + { + "epoch": 3.47, + "grad_norm": 2.993894338607788, + "learning_rate": 9.583316582914573e-06, + "loss": 0.1403, + "step": 4650 + }, + { + "epoch": 3.49, + "grad_norm": 5.100373268127441, + "learning_rate": 9.580804020100504e-06, + "loss": 0.141, + "step": 4675 + }, + { + "epoch": 3.5, + "grad_norm": 4.335752487182617, + "learning_rate": 9.578291457286432e-06, + "loss": 0.1358, + "step": 4700 + }, + { + "epoch": 3.52, + "grad_norm": 3.1622116565704346, + "learning_rate": 9.575778894472363e-06, + "loss": 0.1371, + "step": 4725 + }, + { + "epoch": 3.54, + "grad_norm": 3.415160894393921, + "learning_rate": 9.573266331658292e-06, + "loss": 0.134, + "step": 4750 + }, + { + "epoch": 3.56, + "grad_norm": 3.572795867919922, + "learning_rate": 9.570753768844222e-06, + "loss": 0.1369, + "step": 4775 + }, + { + "epoch": 3.58, + "grad_norm": 3.245574951171875, + "learning_rate": 9.568241206030151e-06, + "loss": 0.1375, + "step": 4800 + }, + { + "epoch": 3.6, + "grad_norm": 3.0904273986816406, + "learning_rate": 9.565728643216082e-06, + "loss": 0.1396, + "step": 4825 + }, + { + "epoch": 3.62, + "grad_norm": 3.040719509124756, + "learning_rate": 9.563216080402011e-06, + "loss": 0.1363, + "step": 4850 + }, + { + "epoch": 3.64, + "grad_norm": 3.1514360904693604, + "learning_rate": 9.56070351758794e-06, + "loss": 0.1339, + "step": 4875 + }, + { + "epoch": 3.65, + "grad_norm": 2.5969741344451904, + "learning_rate": 9.55819095477387e-06, + "loss": 0.1364, + "step": 4900 + }, + { + "epoch": 3.67, + "grad_norm": 3.3572769165039062, + "learning_rate": 9.5556783919598e-06, + "loss": 0.1373, + "step": 4925 + }, + { + "epoch": 3.69, + "grad_norm": 3.377455949783325, + "learning_rate": 9.55316582914573e-06, + "loss": 0.1318, + "step": 4950 + }, + { + "epoch": 3.71, + "grad_norm": 3.2793562412261963, + "learning_rate": 9.550653266331658e-06, + "loss": 0.1367, + "step": 4975 + }, + { + "epoch": 3.73, + "grad_norm": 3.9434964656829834, + "learning_rate": 9.548140703517589e-06, + "loss": 0.1335, + "step": 5000 + }, + { + "epoch": 3.73, + "eval_loss": 0.10747358202934265, + "eval_runtime": 872.5305, + "eval_samples_per_second": 1.639, + "eval_steps_per_second": 1.639, + "eval_wer": 68.68788950648475, + "step": 5000 + }, + { + "epoch": 3.75, + "grad_norm": 3.994626998901367, + "learning_rate": 9.545628140703518e-06, + "loss": 0.1379, + "step": 5025 + }, + { + "epoch": 3.77, + "grad_norm": 3.6516830921173096, + "learning_rate": 9.543115577889448e-06, + "loss": 0.1348, + "step": 5050 + }, + { + "epoch": 3.78, + "grad_norm": 3.349698305130005, + "learning_rate": 9.540603015075379e-06, + "loss": 0.1347, + "step": 5075 + }, + { + "epoch": 3.8, + "grad_norm": 4.174395561218262, + "learning_rate": 9.538090452261308e-06, + "loss": 0.1377, + "step": 5100 + }, + { + "epoch": 3.82, + "grad_norm": 3.398993968963623, + "learning_rate": 9.535577889447237e-06, + "loss": 0.134, + "step": 5125 + }, + { + "epoch": 3.84, + "grad_norm": 4.449344635009766, + "learning_rate": 9.533065326633166e-06, + "loss": 0.1332, + "step": 5150 + }, + { + "epoch": 3.86, + "grad_norm": 4.033114910125732, + "learning_rate": 9.530552763819096e-06, + "loss": 0.1364, + "step": 5175 + }, + { + "epoch": 3.88, + "grad_norm": 4.546070098876953, + "learning_rate": 9.528040201005025e-06, + "loss": 0.133, + "step": 5200 + }, + { + "epoch": 3.9, + "grad_norm": 3.256990909576416, + "learning_rate": 9.525527638190956e-06, + "loss": 0.136, + "step": 5225 + }, + { + "epoch": 3.91, + "grad_norm": 3.9650678634643555, + "learning_rate": 9.523015075376885e-06, + "loss": 0.1289, + "step": 5250 + }, + { + "epoch": 3.93, + "grad_norm": 2.965697765350342, + "learning_rate": 9.520502512562815e-06, + "loss": 0.1337, + "step": 5275 + }, + { + "epoch": 3.95, + "grad_norm": 3.3348875045776367, + "learning_rate": 9.517989949748744e-06, + "loss": 0.131, + "step": 5300 + }, + { + "epoch": 3.97, + "grad_norm": 3.765479326248169, + "learning_rate": 9.515477386934673e-06, + "loss": 0.1342, + "step": 5325 + }, + { + "epoch": 3.99, + "grad_norm": 2.6047260761260986, + "learning_rate": 9.512964824120604e-06, + "loss": 0.1334, + "step": 5350 + }, + { + "epoch": 4.01, + "grad_norm": 3.2198264598846436, + "learning_rate": 9.510452261306534e-06, + "loss": 0.1278, + "step": 5375 + }, + { + "epoch": 4.03, + "grad_norm": 2.809319257736206, + "learning_rate": 9.507939698492463e-06, + "loss": 0.1195, + "step": 5400 + }, + { + "epoch": 4.05, + "grad_norm": 3.5453593730926514, + "learning_rate": 9.505427135678392e-06, + "loss": 0.1219, + "step": 5425 + }, + { + "epoch": 4.06, + "grad_norm": 3.748731851577759, + "learning_rate": 9.502914572864322e-06, + "loss": 0.1225, + "step": 5450 + }, + { + "epoch": 4.08, + "grad_norm": 3.3607723712921143, + "learning_rate": 9.500402010050253e-06, + "loss": 0.123, + "step": 5475 + }, + { + "epoch": 4.1, + "grad_norm": 3.4686179161071777, + "learning_rate": 9.497889447236182e-06, + "loss": 0.1209, + "step": 5500 + }, + { + "epoch": 4.12, + "grad_norm": 3.530701160430908, + "learning_rate": 9.495376884422111e-06, + "loss": 0.1256, + "step": 5525 + }, + { + "epoch": 4.14, + "grad_norm": 2.734616756439209, + "learning_rate": 9.49286432160804e-06, + "loss": 0.123, + "step": 5550 + }, + { + "epoch": 4.16, + "grad_norm": 3.3604576587677, + "learning_rate": 9.49035175879397e-06, + "loss": 0.1239, + "step": 5575 + }, + { + "epoch": 4.18, + "grad_norm": 3.060483932495117, + "learning_rate": 9.4878391959799e-06, + "loss": 0.1225, + "step": 5600 + }, + { + "epoch": 4.19, + "grad_norm": 3.019864082336426, + "learning_rate": 9.48532663316583e-06, + "loss": 0.1203, + "step": 5625 + }, + { + "epoch": 4.21, + "grad_norm": 3.3760275840759277, + "learning_rate": 9.48281407035176e-06, + "loss": 0.1213, + "step": 5650 + }, + { + "epoch": 4.23, + "grad_norm": 2.7274367809295654, + "learning_rate": 9.480301507537689e-06, + "loss": 0.1189, + "step": 5675 + }, + { + "epoch": 4.25, + "grad_norm": 3.7094032764434814, + "learning_rate": 9.47778894472362e-06, + "loss": 0.1211, + "step": 5700 + }, + { + "epoch": 4.27, + "grad_norm": 2.889448642730713, + "learning_rate": 9.475276381909548e-06, + "loss": 0.1231, + "step": 5725 + }, + { + "epoch": 4.29, + "grad_norm": 3.1419894695281982, + "learning_rate": 9.472763819095479e-06, + "loss": 0.1224, + "step": 5750 + }, + { + "epoch": 4.31, + "grad_norm": 2.8822243213653564, + "learning_rate": 9.470251256281408e-06, + "loss": 0.1229, + "step": 5775 + }, + { + "epoch": 4.33, + "grad_norm": 2.711674690246582, + "learning_rate": 9.467738693467337e-06, + "loss": 0.1184, + "step": 5800 + }, + { + "epoch": 4.34, + "grad_norm": 3.15612530708313, + "learning_rate": 9.465226130653267e-06, + "loss": 0.1238, + "step": 5825 + }, + { + "epoch": 4.36, + "grad_norm": 2.3605618476867676, + "learning_rate": 9.462713567839196e-06, + "loss": 0.1235, + "step": 5850 + }, + { + "epoch": 4.38, + "grad_norm": 2.894465446472168, + "learning_rate": 9.460201005025127e-06, + "loss": 0.12, + "step": 5875 + }, + { + "epoch": 4.4, + "grad_norm": 3.396977186203003, + "learning_rate": 9.457688442211056e-06, + "loss": 0.1219, + "step": 5900 + }, + { + "epoch": 4.42, + "grad_norm": 3.3591597080230713, + "learning_rate": 9.455175879396986e-06, + "loss": 0.1222, + "step": 5925 + }, + { + "epoch": 4.44, + "grad_norm": 3.7236545085906982, + "learning_rate": 9.452663316582915e-06, + "loss": 0.1194, + "step": 5950 + }, + { + "epoch": 4.46, + "grad_norm": 3.719618558883667, + "learning_rate": 9.450150753768846e-06, + "loss": 0.1235, + "step": 5975 + }, + { + "epoch": 4.47, + "grad_norm": 3.7669448852539062, + "learning_rate": 9.447638190954774e-06, + "loss": 0.1176, + "step": 6000 + }, + { + "epoch": 4.47, + "eval_loss": 0.10224699974060059, + "eval_runtime": 859.7981, + "eval_samples_per_second": 1.663, + "eval_steps_per_second": 1.663, + "eval_wer": 70.49014653865588, + "step": 6000 + }, + { + "epoch": 4.49, + "grad_norm": 3.172691822052002, + "learning_rate": 9.445125628140705e-06, + "loss": 0.1218, + "step": 6025 + }, + { + "epoch": 4.51, + "grad_norm": 3.501983880996704, + "learning_rate": 9.442613065326634e-06, + "loss": 0.1205, + "step": 6050 + }, + { + "epoch": 4.53, + "grad_norm": 3.797752857208252, + "learning_rate": 9.440100502512563e-06, + "loss": 0.1211, + "step": 6075 + }, + { + "epoch": 4.55, + "grad_norm": 2.7185802459716797, + "learning_rate": 9.437587939698494e-06, + "loss": 0.1161, + "step": 6100 + }, + { + "epoch": 4.57, + "grad_norm": 2.667250633239746, + "learning_rate": 9.435175879396986e-06, + "loss": 0.1204, + "step": 6125 + }, + { + "epoch": 4.59, + "grad_norm": 2.558450937271118, + "learning_rate": 9.432663316582915e-06, + "loss": 0.1193, + "step": 6150 + }, + { + "epoch": 4.6, + "grad_norm": 2.86273193359375, + "learning_rate": 9.430150753768845e-06, + "loss": 0.1201, + "step": 6175 + }, + { + "epoch": 4.62, + "grad_norm": 3.511570692062378, + "learning_rate": 9.427638190954774e-06, + "loss": 0.1183, + "step": 6200 + }, + { + "epoch": 4.64, + "grad_norm": 3.2370543479919434, + "learning_rate": 9.425125628140705e-06, + "loss": 0.1173, + "step": 6225 + }, + { + "epoch": 4.66, + "grad_norm": 3.509000062942505, + "learning_rate": 9.422613065326634e-06, + "loss": 0.1181, + "step": 6250 + }, + { + "epoch": 4.68, + "grad_norm": 2.950002908706665, + "learning_rate": 9.420100502512564e-06, + "loss": 0.1216, + "step": 6275 + }, + { + "epoch": 4.7, + "grad_norm": 3.221897602081299, + "learning_rate": 9.417587939698495e-06, + "loss": 0.1184, + "step": 6300 + }, + { + "epoch": 4.72, + "grad_norm": 2.500420093536377, + "learning_rate": 9.415075376884422e-06, + "loss": 0.1194, + "step": 6325 + }, + { + "epoch": 4.74, + "grad_norm": 3.119014263153076, + "learning_rate": 9.412562814070353e-06, + "loss": 0.1176, + "step": 6350 + }, + { + "epoch": 4.75, + "grad_norm": 2.7762842178344727, + "learning_rate": 9.410050251256282e-06, + "loss": 0.1169, + "step": 6375 + }, + { + "epoch": 4.77, + "grad_norm": 3.0419962406158447, + "learning_rate": 9.407537688442212e-06, + "loss": 0.1189, + "step": 6400 + }, + { + "epoch": 4.79, + "grad_norm": 3.2145416736602783, + "learning_rate": 9.405025125628141e-06, + "loss": 0.1187, + "step": 6425 + }, + { + "epoch": 4.81, + "grad_norm": 3.7330620288848877, + "learning_rate": 9.40251256281407e-06, + "loss": 0.119, + "step": 6450 + }, + { + "epoch": 4.83, + "grad_norm": 3.1952309608459473, + "learning_rate": 9.4e-06, + "loss": 0.1186, + "step": 6475 + }, + { + "epoch": 4.85, + "grad_norm": 2.680539131164551, + "learning_rate": 9.39748743718593e-06, + "loss": 0.1161, + "step": 6500 + }, + { + "epoch": 4.87, + "grad_norm": 3.737576723098755, + "learning_rate": 9.39497487437186e-06, + "loss": 0.1167, + "step": 6525 + }, + { + "epoch": 4.88, + "grad_norm": 3.1687490940093994, + "learning_rate": 9.39246231155779e-06, + "loss": 0.119, + "step": 6550 + }, + { + "epoch": 4.9, + "grad_norm": 3.069887161254883, + "learning_rate": 9.38994974874372e-06, + "loss": 0.1187, + "step": 6575 + }, + { + "epoch": 4.92, + "grad_norm": 3.349846363067627, + "learning_rate": 9.387437185929648e-06, + "loss": 0.1181, + "step": 6600 + }, + { + "epoch": 4.94, + "grad_norm": 3.110581398010254, + "learning_rate": 9.384924623115579e-06, + "loss": 0.1202, + "step": 6625 + }, + { + "epoch": 4.96, + "grad_norm": 3.0284602642059326, + "learning_rate": 9.382412060301508e-06, + "loss": 0.1143, + "step": 6650 + }, + { + "epoch": 4.98, + "grad_norm": 3.1900837421417236, + "learning_rate": 9.379899497487438e-06, + "loss": 0.1159, + "step": 6675 + }, + { + "epoch": 5.0, + "grad_norm": 3.2235963344573975, + "learning_rate": 9.377386934673369e-06, + "loss": 0.1204, + "step": 6700 + }, + { + "epoch": 5.01, + "grad_norm": 2.49748158454895, + "learning_rate": 9.374874371859296e-06, + "loss": 0.1114, + "step": 6725 + }, + { + "epoch": 5.03, + "grad_norm": 3.261122703552246, + "learning_rate": 9.372361809045227e-06, + "loss": 0.108, + "step": 6750 + }, + { + "epoch": 5.05, + "grad_norm": 3.2039365768432617, + "learning_rate": 9.369849246231157e-06, + "loss": 0.1079, + "step": 6775 + }, + { + "epoch": 5.07, + "grad_norm": 2.9098169803619385, + "learning_rate": 9.367336683417086e-06, + "loss": 0.1104, + "step": 6800 + }, + { + "epoch": 5.09, + "grad_norm": 3.5935113430023193, + "learning_rate": 9.364824120603015e-06, + "loss": 0.1091, + "step": 6825 + }, + { + "epoch": 5.11, + "grad_norm": 3.3264999389648438, + "learning_rate": 9.362311557788946e-06, + "loss": 0.1042, + "step": 6850 + }, + { + "epoch": 5.13, + "grad_norm": 2.5229296684265137, + "learning_rate": 9.359798994974874e-06, + "loss": 0.1067, + "step": 6875 + }, + { + "epoch": 5.15, + "grad_norm": 2.907130002975464, + "learning_rate": 9.357286432160805e-06, + "loss": 0.1081, + "step": 6900 + }, + { + "epoch": 5.16, + "grad_norm": 2.8225080966949463, + "learning_rate": 9.354773869346734e-06, + "loss": 0.1069, + "step": 6925 + }, + { + "epoch": 5.18, + "grad_norm": 3.0250368118286133, + "learning_rate": 9.352261306532664e-06, + "loss": 0.1088, + "step": 6950 + }, + { + "epoch": 5.2, + "grad_norm": 3.325882911682129, + "learning_rate": 9.349748743718595e-06, + "loss": 0.108, + "step": 6975 + }, + { + "epoch": 5.22, + "grad_norm": 2.304255962371826, + "learning_rate": 9.347236180904522e-06, + "loss": 0.1047, + "step": 7000 + }, + { + "epoch": 5.22, + "eval_loss": 0.09980080276727676, + "eval_runtime": 867.172, + "eval_samples_per_second": 1.649, + "eval_steps_per_second": 1.649, + "eval_wer": 52.84655549941048, + "step": 7000 + }, + { + "epoch": 5.24, + "grad_norm": 3.4057774543762207, + "learning_rate": 9.344723618090453e-06, + "loss": 0.1044, + "step": 7025 + }, + { + "epoch": 5.26, + "grad_norm": 3.100834369659424, + "learning_rate": 9.342211055276383e-06, + "loss": 0.1079, + "step": 7050 + }, + { + "epoch": 5.28, + "grad_norm": 2.4007840156555176, + "learning_rate": 9.339698492462312e-06, + "loss": 0.1076, + "step": 7075 + }, + { + "epoch": 5.29, + "grad_norm": 3.98038387298584, + "learning_rate": 9.337185929648241e-06, + "loss": 0.1085, + "step": 7100 + }, + { + "epoch": 5.31, + "grad_norm": 2.635648727416992, + "learning_rate": 9.334673366834172e-06, + "loss": 0.1109, + "step": 7125 + }, + { + "epoch": 5.33, + "grad_norm": 2.746239423751831, + "learning_rate": 9.332160804020102e-06, + "loss": 0.1059, + "step": 7150 + }, + { + "epoch": 5.35, + "grad_norm": 2.759892702102661, + "learning_rate": 9.329648241206031e-06, + "loss": 0.1075, + "step": 7175 + }, + { + "epoch": 5.37, + "grad_norm": 2.783717393875122, + "learning_rate": 9.32713567839196e-06, + "loss": 0.1078, + "step": 7200 + }, + { + "epoch": 5.39, + "grad_norm": 2.5716328620910645, + "learning_rate": 9.32462311557789e-06, + "loss": 0.1032, + "step": 7225 + }, + { + "epoch": 5.41, + "grad_norm": 2.8785512447357178, + "learning_rate": 9.32211055276382e-06, + "loss": 0.1088, + "step": 7250 + }, + { + "epoch": 5.43, + "grad_norm": 2.9656403064727783, + "learning_rate": 9.319597989949748e-06, + "loss": 0.1062, + "step": 7275 + }, + { + "epoch": 5.44, + "grad_norm": 2.79738187789917, + "learning_rate": 9.31708542713568e-06, + "loss": 0.1101, + "step": 7300 + }, + { + "epoch": 5.46, + "grad_norm": 3.2677037715911865, + "learning_rate": 9.314572864321609e-06, + "loss": 0.1078, + "step": 7325 + }, + { + "epoch": 5.48, + "grad_norm": 2.686990737915039, + "learning_rate": 9.312060301507538e-06, + "loss": 0.1074, + "step": 7350 + }, + { + "epoch": 5.5, + "grad_norm": 3.2435858249664307, + "learning_rate": 9.309547738693469e-06, + "loss": 0.1066, + "step": 7375 + }, + { + "epoch": 5.52, + "grad_norm": 3.07719349861145, + "learning_rate": 9.307035175879398e-06, + "loss": 0.1051, + "step": 7400 + }, + { + "epoch": 5.54, + "grad_norm": 3.0376791954040527, + "learning_rate": 9.304522613065328e-06, + "loss": 0.1073, + "step": 7425 + }, + { + "epoch": 5.56, + "grad_norm": 2.8973543643951416, + "learning_rate": 9.302010050251257e-06, + "loss": 0.1037, + "step": 7450 + }, + { + "epoch": 5.57, + "grad_norm": 3.2189388275146484, + "learning_rate": 9.299497487437186e-06, + "loss": 0.1067, + "step": 7475 + }, + { + "epoch": 5.59, + "grad_norm": 2.77380108833313, + "learning_rate": 9.296984924623116e-06, + "loss": 0.1025, + "step": 7500 + }, + { + "epoch": 5.61, + "grad_norm": 2.885195255279541, + "learning_rate": 9.294472361809047e-06, + "loss": 0.1066, + "step": 7525 + }, + { + "epoch": 5.63, + "grad_norm": 2.535860776901245, + "learning_rate": 9.291959798994976e-06, + "loss": 0.1065, + "step": 7550 + }, + { + "epoch": 5.65, + "grad_norm": 2.898127794265747, + "learning_rate": 9.289447236180905e-06, + "loss": 0.1078, + "step": 7575 + }, + { + "epoch": 5.67, + "grad_norm": 2.5811409950256348, + "learning_rate": 9.286934673366835e-06, + "loss": 0.1058, + "step": 7600 + }, + { + "epoch": 5.69, + "grad_norm": 3.1811914443969727, + "learning_rate": 9.284422110552764e-06, + "loss": 0.1069, + "step": 7625 + }, + { + "epoch": 5.7, + "grad_norm": 3.5402581691741943, + "learning_rate": 9.281909547738695e-06, + "loss": 0.108, + "step": 7650 + }, + { + "epoch": 5.72, + "grad_norm": 2.753355026245117, + "learning_rate": 9.279396984924624e-06, + "loss": 0.1036, + "step": 7675 + }, + { + "epoch": 5.74, + "grad_norm": 2.7296597957611084, + "learning_rate": 9.276884422110554e-06, + "loss": 0.1033, + "step": 7700 + }, + { + "epoch": 5.76, + "grad_norm": 2.6939804553985596, + "learning_rate": 9.274371859296483e-06, + "loss": 0.1066, + "step": 7725 + }, + { + "epoch": 5.78, + "grad_norm": 3.371785879135132, + "learning_rate": 9.271859296482412e-06, + "loss": 0.1061, + "step": 7750 + }, + { + "epoch": 5.8, + "grad_norm": 2.9980766773223877, + "learning_rate": 9.269346733668343e-06, + "loss": 0.1028, + "step": 7775 + }, + { + "epoch": 5.82, + "grad_norm": 2.9186758995056152, + "learning_rate": 9.266834170854273e-06, + "loss": 0.1067, + "step": 7800 + }, + { + "epoch": 5.84, + "grad_norm": 2.6286396980285645, + "learning_rate": 9.264321608040202e-06, + "loss": 0.1103, + "step": 7825 + }, + { + "epoch": 5.85, + "grad_norm": 3.2292611598968506, + "learning_rate": 9.261809045226131e-06, + "loss": 0.1074, + "step": 7850 + }, + { + "epoch": 5.87, + "grad_norm": 3.274819850921631, + "learning_rate": 9.25929648241206e-06, + "loss": 0.1053, + "step": 7875 + }, + { + "epoch": 5.89, + "grad_norm": 3.0836021900177, + "learning_rate": 9.25678391959799e-06, + "loss": 0.1075, + "step": 7900 + }, + { + "epoch": 5.91, + "grad_norm": 3.3748483657836914, + "learning_rate": 9.254271356783921e-06, + "loss": 0.1084, + "step": 7925 + }, + { + "epoch": 5.93, + "grad_norm": 3.031656503677368, + "learning_rate": 9.25175879396985e-06, + "loss": 0.1029, + "step": 7950 + }, + { + "epoch": 5.95, + "grad_norm": 2.5397934913635254, + "learning_rate": 9.24924623115578e-06, + "loss": 0.1045, + "step": 7975 + }, + { + "epoch": 5.97, + "grad_norm": 3.0323400497436523, + "learning_rate": 9.24673366834171e-06, + "loss": 0.1059, + "step": 8000 + }, + { + "epoch": 5.97, + "eval_loss": 0.09790264070034027, + "eval_runtime": 869.5252, + "eval_samples_per_second": 1.645, + "eval_steps_per_second": 1.645, + "eval_wer": 48.07983830217282, + "step": 8000 + }, + { + "epoch": 5.98, + "grad_norm": 3.0417635440826416, + "learning_rate": 9.244221105527638e-06, + "loss": 0.1028, + "step": 8025 + }, + { + "epoch": 6.0, + "grad_norm": 2.7787845134735107, + "learning_rate": 9.24170854271357e-06, + "loss": 0.1021, + "step": 8050 + }, + { + "epoch": 6.02, + "grad_norm": 2.63568377494812, + "learning_rate": 9.239195979899498e-06, + "loss": 0.0959, + "step": 8075 + }, + { + "epoch": 6.04, + "grad_norm": 2.8485007286071777, + "learning_rate": 9.236683417085428e-06, + "loss": 0.0931, + "step": 8100 + }, + { + "epoch": 6.06, + "grad_norm": 2.5535314083099365, + "learning_rate": 9.234170854271357e-06, + "loss": 0.0977, + "step": 8125 + }, + { + "epoch": 6.08, + "grad_norm": 2.5797688961029053, + "learning_rate": 9.231658291457286e-06, + "loss": 0.093, + "step": 8150 + }, + { + "epoch": 6.1, + "grad_norm": 2.9626121520996094, + "learning_rate": 9.22924623115578e-06, + "loss": 0.0953, + "step": 8175 + }, + { + "epoch": 6.11, + "grad_norm": 3.388925313949585, + "learning_rate": 9.226733668341709e-06, + "loss": 0.0929, + "step": 8200 + }, + { + "epoch": 6.13, + "grad_norm": 3.1336917877197266, + "learning_rate": 9.224221105527638e-06, + "loss": 0.0956, + "step": 8225 + }, + { + "epoch": 6.15, + "grad_norm": 3.6391232013702393, + "learning_rate": 9.22170854271357e-06, + "loss": 0.0929, + "step": 8250 + }, + { + "epoch": 6.17, + "grad_norm": 2.8681113719940186, + "learning_rate": 9.219195979899499e-06, + "loss": 0.0976, + "step": 8275 + }, + { + "epoch": 6.19, + "grad_norm": 2.476613998413086, + "learning_rate": 9.216683417085428e-06, + "loss": 0.0947, + "step": 8300 + }, + { + "epoch": 6.21, + "grad_norm": 3.155181646347046, + "learning_rate": 9.214170854271357e-06, + "loss": 0.0954, + "step": 8325 + }, + { + "epoch": 6.23, + "grad_norm": 3.1895008087158203, + "learning_rate": 9.211658291457287e-06, + "loss": 0.0939, + "step": 8350 + }, + { + "epoch": 6.25, + "grad_norm": 2.820235252380371, + "learning_rate": 9.209145728643218e-06, + "loss": 0.0977, + "step": 8375 + }, + { + "epoch": 6.26, + "grad_norm": 2.5515928268432617, + "learning_rate": 9.206633165829147e-06, + "loss": 0.0959, + "step": 8400 + }, + { + "epoch": 6.28, + "grad_norm": 2.5176925659179688, + "learning_rate": 9.204120603015076e-06, + "loss": 0.0934, + "step": 8425 + }, + { + "epoch": 6.3, + "grad_norm": 2.5230116844177246, + "learning_rate": 9.201608040201006e-06, + "loss": 0.0957, + "step": 8450 + }, + { + "epoch": 6.32, + "grad_norm": 2.342954158782959, + "learning_rate": 9.199095477386935e-06, + "loss": 0.094, + "step": 8475 + }, + { + "epoch": 6.34, + "grad_norm": 2.912733793258667, + "learning_rate": 9.196582914572864e-06, + "loss": 0.0973, + "step": 8500 + }, + { + "epoch": 6.36, + "grad_norm": 3.1186792850494385, + "learning_rate": 9.194070351758795e-06, + "loss": 0.0936, + "step": 8525 + }, + { + "epoch": 6.38, + "grad_norm": 2.7295432090759277, + "learning_rate": 9.191557788944725e-06, + "loss": 0.0945, + "step": 8550 + }, + { + "epoch": 6.39, + "grad_norm": 2.8469088077545166, + "learning_rate": 9.189045226130654e-06, + "loss": 0.0975, + "step": 8575 + }, + { + "epoch": 6.41, + "grad_norm": 2.2491581439971924, + "learning_rate": 9.186532663316583e-06, + "loss": 0.0964, + "step": 8600 + }, + { + "epoch": 6.43, + "grad_norm": 2.673143148422241, + "learning_rate": 9.184020100502513e-06, + "loss": 0.0941, + "step": 8625 + }, + { + "epoch": 6.45, + "grad_norm": 2.620373249053955, + "learning_rate": 9.181507537688444e-06, + "loss": 0.0973, + "step": 8650 + }, + { + "epoch": 6.47, + "grad_norm": 3.0308456420898438, + "learning_rate": 9.178994974874373e-06, + "loss": 0.0926, + "step": 8675 + }, + { + "epoch": 6.49, + "grad_norm": 2.615203619003296, + "learning_rate": 9.176482412060302e-06, + "loss": 0.0955, + "step": 8700 + }, + { + "epoch": 6.51, + "grad_norm": 2.7052366733551025, + "learning_rate": 9.173969849246232e-06, + "loss": 0.0974, + "step": 8725 + }, + { + "epoch": 6.52, + "grad_norm": 4.061867713928223, + "learning_rate": 9.171457286432161e-06, + "loss": 0.0955, + "step": 8750 + }, + { + "epoch": 6.54, + "grad_norm": 3.293619394302368, + "learning_rate": 9.168944723618092e-06, + "loss": 0.096, + "step": 8775 + }, + { + "epoch": 6.56, + "grad_norm": 2.444633722305298, + "learning_rate": 9.166432160804021e-06, + "loss": 0.0975, + "step": 8800 + }, + { + "epoch": 6.58, + "grad_norm": 3.025348663330078, + "learning_rate": 9.16391959798995e-06, + "loss": 0.0953, + "step": 8825 + }, + { + "epoch": 6.6, + "grad_norm": 3.7620279788970947, + "learning_rate": 9.16140703517588e-06, + "loss": 0.0947, + "step": 8850 + }, + { + "epoch": 6.62, + "grad_norm": 2.6658921241760254, + "learning_rate": 9.15889447236181e-06, + "loss": 0.0943, + "step": 8875 + }, + { + "epoch": 6.64, + "grad_norm": 3.0233051776885986, + "learning_rate": 9.156381909547739e-06, + "loss": 0.0958, + "step": 8900 + }, + { + "epoch": 6.66, + "grad_norm": 2.543853759765625, + "learning_rate": 9.15386934673367e-06, + "loss": 0.0987, + "step": 8925 + }, + { + "epoch": 6.67, + "grad_norm": 3.436647415161133, + "learning_rate": 9.151356783919599e-06, + "loss": 0.0934, + "step": 8950 + }, + { + "epoch": 6.69, + "grad_norm": 3.208487033843994, + "learning_rate": 9.148844221105528e-06, + "loss": 0.0939, + "step": 8975 + }, + { + "epoch": 6.71, + "grad_norm": 2.282184600830078, + "learning_rate": 9.14633165829146e-06, + "loss": 0.0962, + "step": 9000 + }, + { + "epoch": 6.71, + "eval_loss": 0.09796562045812607, + "eval_runtime": 875.5369, + "eval_samples_per_second": 1.633, + "eval_steps_per_second": 1.633, + "eval_wer": 44.610072427151756, + "step": 9000 + }, + { + "epoch": 6.73, + "grad_norm": 2.6516947746276855, + "learning_rate": 9.143819095477387e-06, + "loss": 0.0959, + "step": 9025 + }, + { + "epoch": 6.75, + "grad_norm": 3.4010109901428223, + "learning_rate": 9.141306532663318e-06, + "loss": 0.0965, + "step": 9050 + }, + { + "epoch": 6.77, + "grad_norm": 2.9493467807769775, + "learning_rate": 9.138793969849247e-06, + "loss": 0.0963, + "step": 9075 + }, + { + "epoch": 6.79, + "grad_norm": 2.8108808994293213, + "learning_rate": 9.136281407035177e-06, + "loss": 0.0954, + "step": 9100 + }, + { + "epoch": 6.8, + "grad_norm": 3.271930456161499, + "learning_rate": 9.133768844221106e-06, + "loss": 0.0963, + "step": 9125 + }, + { + "epoch": 6.82, + "grad_norm": 2.78194260597229, + "learning_rate": 9.131256281407037e-06, + "loss": 0.0951, + "step": 9150 + }, + { + "epoch": 6.84, + "grad_norm": 2.7084765434265137, + "learning_rate": 9.128743718592964e-06, + "loss": 0.0978, + "step": 9175 + }, + { + "epoch": 6.86, + "grad_norm": 3.3792669773101807, + "learning_rate": 9.126231155778896e-06, + "loss": 0.0959, + "step": 9200 + }, + { + "epoch": 6.88, + "grad_norm": 2.8912172317504883, + "learning_rate": 9.123718592964825e-06, + "loss": 0.0953, + "step": 9225 + }, + { + "epoch": 6.9, + "grad_norm": 2.5888986587524414, + "learning_rate": 9.121206030150754e-06, + "loss": 0.0943, + "step": 9250 + }, + { + "epoch": 6.92, + "grad_norm": 2.8307971954345703, + "learning_rate": 9.118693467336685e-06, + "loss": 0.0916, + "step": 9275 + }, + { + "epoch": 6.94, + "grad_norm": 2.978024959564209, + "learning_rate": 9.116180904522613e-06, + "loss": 0.0961, + "step": 9300 + }, + { + "epoch": 6.95, + "grad_norm": 3.0127475261688232, + "learning_rate": 9.113668341708544e-06, + "loss": 0.0939, + "step": 9325 + }, + { + "epoch": 6.97, + "grad_norm": 2.8795394897460938, + "learning_rate": 9.111155778894473e-06, + "loss": 0.0934, + "step": 9350 + }, + { + "epoch": 6.99, + "grad_norm": 3.225996255874634, + "learning_rate": 9.108643216080402e-06, + "loss": 0.0946, + "step": 9375 + }, + { + "epoch": 7.01, + "grad_norm": 2.6519815921783447, + "learning_rate": 9.106130653266333e-06, + "loss": 0.0898, + "step": 9400 + }, + { + "epoch": 7.03, + "grad_norm": 3.109593152999878, + "learning_rate": 9.103618090452263e-06, + "loss": 0.0844, + "step": 9425 + }, + { + "epoch": 7.05, + "grad_norm": 2.5524463653564453, + "learning_rate": 9.101105527638192e-06, + "loss": 0.0851, + "step": 9450 + }, + { + "epoch": 7.07, + "grad_norm": 3.2387499809265137, + "learning_rate": 9.098592964824121e-06, + "loss": 0.0854, + "step": 9475 + }, + { + "epoch": 7.08, + "grad_norm": 2.5405783653259277, + "learning_rate": 9.09608040201005e-06, + "loss": 0.0852, + "step": 9500 + }, + { + "epoch": 7.1, + "grad_norm": 2.6179444789886475, + "learning_rate": 9.09356783919598e-06, + "loss": 0.0801, + "step": 9525 + }, + { + "epoch": 7.12, + "grad_norm": 2.5344157218933105, + "learning_rate": 9.091055276381911e-06, + "loss": 0.0861, + "step": 9550 + }, + { + "epoch": 7.14, + "grad_norm": 3.0818774700164795, + "learning_rate": 9.088542713567839e-06, + "loss": 0.0841, + "step": 9575 + }, + { + "epoch": 7.16, + "grad_norm": 2.905480146408081, + "learning_rate": 9.08603015075377e-06, + "loss": 0.0852, + "step": 9600 + }, + { + "epoch": 7.18, + "grad_norm": 2.33247971534729, + "learning_rate": 9.083517587939699e-06, + "loss": 0.0862, + "step": 9625 + }, + { + "epoch": 7.2, + "grad_norm": 4.099214553833008, + "learning_rate": 9.081005025125628e-06, + "loss": 0.0843, + "step": 9650 + }, + { + "epoch": 7.21, + "grad_norm": 2.5766942501068115, + "learning_rate": 9.07849246231156e-06, + "loss": 0.082, + "step": 9675 + }, + { + "epoch": 7.23, + "grad_norm": 2.9876275062561035, + "learning_rate": 9.075979899497489e-06, + "loss": 0.085, + "step": 9700 + }, + { + "epoch": 7.25, + "grad_norm": 2.7773313522338867, + "learning_rate": 9.073467336683418e-06, + "loss": 0.0832, + "step": 9725 + }, + { + "epoch": 7.27, + "grad_norm": 2.6343369483947754, + "learning_rate": 9.070954773869347e-06, + "loss": 0.0869, + "step": 9750 + }, + { + "epoch": 7.29, + "grad_norm": 2.740156412124634, + "learning_rate": 9.068442211055277e-06, + "loss": 0.0846, + "step": 9775 + }, + { + "epoch": 7.31, + "grad_norm": 2.4850776195526123, + "learning_rate": 9.065929648241206e-06, + "loss": 0.0859, + "step": 9800 + }, + { + "epoch": 7.33, + "grad_norm": 4.074528217315674, + "learning_rate": 9.063417085427137e-06, + "loss": 0.0834, + "step": 9825 + }, + { + "epoch": 7.35, + "grad_norm": 3.1747617721557617, + "learning_rate": 9.060904522613066e-06, + "loss": 0.0886, + "step": 9850 + }, + { + "epoch": 7.36, + "grad_norm": 2.9504904747009277, + "learning_rate": 9.058391959798996e-06, + "loss": 0.0885, + "step": 9875 + }, + { + "epoch": 7.38, + "grad_norm": 2.522550344467163, + "learning_rate": 9.055879396984925e-06, + "loss": 0.0819, + "step": 9900 + }, + { + "epoch": 7.4, + "grad_norm": 3.017293930053711, + "learning_rate": 9.053366834170854e-06, + "loss": 0.0873, + "step": 9925 + }, + { + "epoch": 7.42, + "grad_norm": 2.7242226600646973, + "learning_rate": 9.050854271356785e-06, + "loss": 0.0856, + "step": 9950 + }, + { + "epoch": 7.44, + "grad_norm": 2.6961162090301514, + "learning_rate": 9.048341708542715e-06, + "loss": 0.0828, + "step": 9975 + }, + { + "epoch": 7.46, + "grad_norm": 2.8511786460876465, + "learning_rate": 9.045829145728644e-06, + "loss": 0.0842, + "step": 10000 + }, + { + "epoch": 7.46, + "eval_loss": 0.09877289086580276, + "eval_runtime": 881.105, + "eval_samples_per_second": 1.623, + "eval_steps_per_second": 1.623, + "eval_wer": 46.81657402728651, + "step": 10000 + }, + { + "epoch": 7.48, + "grad_norm": 3.275576114654541, + "learning_rate": 9.043316582914573e-06, + "loss": 0.0852, + "step": 10025 + }, + { + "epoch": 7.49, + "grad_norm": 2.9215855598449707, + "learning_rate": 9.040804020100503e-06, + "loss": 0.0859, + "step": 10050 + }, + { + "epoch": 7.51, + "grad_norm": 3.081780195236206, + "learning_rate": 9.038291457286434e-06, + "loss": 0.0877, + "step": 10075 + }, + { + "epoch": 7.53, + "grad_norm": 2.6712520122528076, + "learning_rate": 9.035778894472363e-06, + "loss": 0.0857, + "step": 10100 + }, + { + "epoch": 7.55, + "grad_norm": 2.9668095111846924, + "learning_rate": 9.033266331658292e-06, + "loss": 0.0841, + "step": 10125 + }, + { + "epoch": 7.57, + "grad_norm": 2.4688756465911865, + "learning_rate": 9.030753768844222e-06, + "loss": 0.0833, + "step": 10150 + }, + { + "epoch": 7.59, + "grad_norm": 3.0937860012054443, + "learning_rate": 9.028241206030151e-06, + "loss": 0.0845, + "step": 10175 + }, + { + "epoch": 7.61, + "grad_norm": 3.2708189487457275, + "learning_rate": 9.02572864321608e-06, + "loss": 0.0864, + "step": 10200 + }, + { + "epoch": 7.62, + "grad_norm": 2.865565299987793, + "learning_rate": 9.023316582914574e-06, + "loss": 0.0864, + "step": 10225 + }, + { + "epoch": 7.64, + "grad_norm": 3.6417179107666016, + "learning_rate": 9.020804020100503e-06, + "loss": 0.0878, + "step": 10250 + }, + { + "epoch": 7.66, + "grad_norm": 2.6132729053497314, + "learning_rate": 9.018291457286434e-06, + "loss": 0.0869, + "step": 10275 + }, + { + "epoch": 7.68, + "grad_norm": 3.6640384197235107, + "learning_rate": 9.015778894472362e-06, + "loss": 0.0887, + "step": 10300 + }, + { + "epoch": 7.7, + "grad_norm": 2.9905197620391846, + "learning_rate": 9.013266331658293e-06, + "loss": 0.0886, + "step": 10325 + }, + { + "epoch": 7.72, + "grad_norm": 2.6526947021484375, + "learning_rate": 9.010753768844222e-06, + "loss": 0.083, + "step": 10350 + }, + { + "epoch": 7.74, + "grad_norm": 2.7942092418670654, + "learning_rate": 9.008241206030151e-06, + "loss": 0.0868, + "step": 10375 + }, + { + "epoch": 7.76, + "grad_norm": 2.4601199626922607, + "learning_rate": 9.00572864321608e-06, + "loss": 0.0848, + "step": 10400 + }, + { + "epoch": 7.77, + "grad_norm": 2.9632792472839355, + "learning_rate": 9.003216080402012e-06, + "loss": 0.0832, + "step": 10425 + }, + { + "epoch": 7.79, + "grad_norm": 3.50264310836792, + "learning_rate": 9.00070351758794e-06, + "loss": 0.0866, + "step": 10450 + }, + { + "epoch": 7.81, + "grad_norm": 2.720573663711548, + "learning_rate": 8.99819095477387e-06, + "loss": 0.0839, + "step": 10475 + }, + { + "epoch": 7.83, + "grad_norm": 3.166140556335449, + "learning_rate": 8.9956783919598e-06, + "loss": 0.0854, + "step": 10500 + }, + { + "epoch": 7.85, + "grad_norm": 3.429577589035034, + "learning_rate": 8.993165829145729e-06, + "loss": 0.0876, + "step": 10525 + }, + { + "epoch": 7.87, + "grad_norm": 2.7060961723327637, + "learning_rate": 8.99065326633166e-06, + "loss": 0.0882, + "step": 10550 + }, + { + "epoch": 7.89, + "grad_norm": 2.6174957752227783, + "learning_rate": 8.988140703517587e-06, + "loss": 0.0856, + "step": 10575 + }, + { + "epoch": 7.9, + "grad_norm": 2.4340643882751465, + "learning_rate": 8.985628140703518e-06, + "loss": 0.0855, + "step": 10600 + }, + { + "epoch": 7.92, + "grad_norm": 2.879676103591919, + "learning_rate": 8.983115577889448e-06, + "loss": 0.0864, + "step": 10625 + }, + { + "epoch": 7.94, + "grad_norm": 2.9836432933807373, + "learning_rate": 8.980603015075377e-06, + "loss": 0.0818, + "step": 10650 + }, + { + "epoch": 7.96, + "grad_norm": 2.6707403659820557, + "learning_rate": 8.978090452261308e-06, + "loss": 0.0863, + "step": 10675 + }, + { + "epoch": 7.98, + "grad_norm": 3.287686824798584, + "learning_rate": 8.975577889447237e-06, + "loss": 0.0848, + "step": 10700 + }, + { + "epoch": 8.0, + "grad_norm": 2.701420783996582, + "learning_rate": 8.973065326633167e-06, + "loss": 0.0878, + "step": 10725 + }, + { + "epoch": 8.02, + "grad_norm": 2.7174181938171387, + "learning_rate": 8.970552763819096e-06, + "loss": 0.0756, + "step": 10750 + }, + { + "epoch": 8.04, + "grad_norm": 2.4819700717926025, + "learning_rate": 8.968040201005025e-06, + "loss": 0.0744, + "step": 10775 + }, + { + "epoch": 8.05, + "grad_norm": 2.852555990219116, + "learning_rate": 8.965527638190955e-06, + "loss": 0.077, + "step": 10800 + }, + { + "epoch": 8.07, + "grad_norm": 2.653494358062744, + "learning_rate": 8.963015075376886e-06, + "loss": 0.0769, + "step": 10825 + }, + { + "epoch": 8.09, + "grad_norm": 2.4860899448394775, + "learning_rate": 8.960502512562815e-06, + "loss": 0.0776, + "step": 10850 + }, + { + "epoch": 8.11, + "grad_norm": 3.096435070037842, + "learning_rate": 8.957989949748744e-06, + "loss": 0.0762, + "step": 10875 + }, + { + "epoch": 8.13, + "grad_norm": 2.6246683597564697, + "learning_rate": 8.955477386934674e-06, + "loss": 0.0747, + "step": 10900 + }, + { + "epoch": 8.15, + "grad_norm": 2.9552509784698486, + "learning_rate": 8.952964824120603e-06, + "loss": 0.0754, + "step": 10925 + }, + { + "epoch": 8.17, + "grad_norm": 2.922856569290161, + "learning_rate": 8.950452261306534e-06, + "loss": 0.0739, + "step": 10950 + }, + { + "epoch": 8.18, + "grad_norm": 2.4575533866882324, + "learning_rate": 8.947939698492463e-06, + "loss": 0.0747, + "step": 10975 + }, + { + "epoch": 8.2, + "grad_norm": 3.29339861869812, + "learning_rate": 8.945427135678393e-06, + "loss": 0.0762, + "step": 11000 + }, + { + "epoch": 8.2, + "eval_loss": 0.10095544904470444, + "eval_runtime": 871.2194, + "eval_samples_per_second": 1.641, + "eval_steps_per_second": 1.641, + "eval_wer": 43.24574701027455, + "step": 11000 + }, + { + "epoch": 8.22, + "grad_norm": 2.4127423763275146, + "learning_rate": 8.942914572864322e-06, + "loss": 0.0745, + "step": 11025 + }, + { + "epoch": 8.24, + "grad_norm": 2.3887367248535156, + "learning_rate": 8.940402010050251e-06, + "loss": 0.0759, + "step": 11050 + }, + { + "epoch": 8.26, + "grad_norm": 2.5653114318847656, + "learning_rate": 8.937889447236182e-06, + "loss": 0.0741, + "step": 11075 + }, + { + "epoch": 8.28, + "grad_norm": 2.3605544567108154, + "learning_rate": 8.935376884422112e-06, + "loss": 0.0747, + "step": 11100 + }, + { + "epoch": 8.3, + "grad_norm": 3.295729398727417, + "learning_rate": 8.932864321608041e-06, + "loss": 0.075, + "step": 11125 + }, + { + "epoch": 8.31, + "grad_norm": 3.0733535289764404, + "learning_rate": 8.93035175879397e-06, + "loss": 0.0773, + "step": 11150 + }, + { + "epoch": 8.33, + "grad_norm": 3.013725757598877, + "learning_rate": 8.9278391959799e-06, + "loss": 0.0759, + "step": 11175 + }, + { + "epoch": 8.35, + "grad_norm": 3.0741019248962402, + "learning_rate": 8.925326633165829e-06, + "loss": 0.0756, + "step": 11200 + }, + { + "epoch": 8.37, + "grad_norm": 2.9668312072753906, + "learning_rate": 8.92281407035176e-06, + "loss": 0.0772, + "step": 11225 + }, + { + "epoch": 8.39, + "grad_norm": 3.0257551670074463, + "learning_rate": 8.92030150753769e-06, + "loss": 0.0773, + "step": 11250 + }, + { + "epoch": 8.41, + "grad_norm": 2.3677291870117188, + "learning_rate": 8.917788944723619e-06, + "loss": 0.0748, + "step": 11275 + }, + { + "epoch": 8.43, + "grad_norm": 2.7547364234924316, + "learning_rate": 8.91527638190955e-06, + "loss": 0.0785, + "step": 11300 + }, + { + "epoch": 8.45, + "grad_norm": 2.3571038246154785, + "learning_rate": 8.912763819095477e-06, + "loss": 0.0773, + "step": 11325 + }, + { + "epoch": 8.46, + "grad_norm": 2.910677671432495, + "learning_rate": 8.910251256281408e-06, + "loss": 0.0749, + "step": 11350 + }, + { + "epoch": 8.48, + "grad_norm": 2.758967399597168, + "learning_rate": 8.907738693467338e-06, + "loss": 0.0785, + "step": 11375 + }, + { + "epoch": 8.5, + "grad_norm": 4.015144348144531, + "learning_rate": 8.905226130653267e-06, + "loss": 0.0789, + "step": 11400 + }, + { + "epoch": 8.52, + "grad_norm": 2.9863064289093018, + "learning_rate": 8.902713567839196e-06, + "loss": 0.0762, + "step": 11425 + }, + { + "epoch": 8.54, + "grad_norm": 2.6029670238494873, + "learning_rate": 8.900201005025126e-06, + "loss": 0.0785, + "step": 11450 + }, + { + "epoch": 8.56, + "grad_norm": 3.145047903060913, + "learning_rate": 8.897688442211057e-06, + "loss": 0.0789, + "step": 11475 + }, + { + "epoch": 8.58, + "grad_norm": 2.8789596557617188, + "learning_rate": 8.895175879396986e-06, + "loss": 0.0763, + "step": 11500 + }, + { + "epoch": 8.59, + "grad_norm": 3.3093395233154297, + "learning_rate": 8.892663316582915e-06, + "loss": 0.076, + "step": 11525 + }, + { + "epoch": 8.61, + "grad_norm": 2.912888765335083, + "learning_rate": 8.890150753768845e-06, + "loss": 0.078, + "step": 11550 + }, + { + "epoch": 8.63, + "grad_norm": 2.856971263885498, + "learning_rate": 8.887638190954776e-06, + "loss": 0.0746, + "step": 11575 + }, + { + "epoch": 8.65, + "grad_norm": 2.38920259475708, + "learning_rate": 8.885125628140703e-06, + "loss": 0.0751, + "step": 11600 + }, + { + "epoch": 8.67, + "grad_norm": 2.6467623710632324, + "learning_rate": 8.882613065326634e-06, + "loss": 0.0792, + "step": 11625 + }, + { + "epoch": 8.69, + "grad_norm": 3.0557491779327393, + "learning_rate": 8.880100502512564e-06, + "loss": 0.0761, + "step": 11650 + }, + { + "epoch": 8.71, + "grad_norm": 2.948601484298706, + "learning_rate": 8.877587939698493e-06, + "loss": 0.0783, + "step": 11675 + }, + { + "epoch": 8.72, + "grad_norm": 2.9704699516296387, + "learning_rate": 8.875075376884424e-06, + "loss": 0.0781, + "step": 11700 + }, + { + "epoch": 8.74, + "grad_norm": 3.0390591621398926, + "learning_rate": 8.872562814070352e-06, + "loss": 0.0748, + "step": 11725 + }, + { + "epoch": 8.76, + "grad_norm": 2.658689022064209, + "learning_rate": 8.870050251256283e-06, + "loss": 0.0754, + "step": 11750 + }, + { + "epoch": 8.78, + "grad_norm": 2.9106252193450928, + "learning_rate": 8.867537688442212e-06, + "loss": 0.0777, + "step": 11775 + }, + { + "epoch": 8.8, + "grad_norm": 2.824779748916626, + "learning_rate": 8.865025125628141e-06, + "loss": 0.0764, + "step": 11800 + }, + { + "epoch": 8.82, + "grad_norm": 3.1903724670410156, + "learning_rate": 8.86251256281407e-06, + "loss": 0.0785, + "step": 11825 + }, + { + "epoch": 8.84, + "grad_norm": 3.019960880279541, + "learning_rate": 8.860000000000002e-06, + "loss": 0.0797, + "step": 11850 + }, + { + "epoch": 8.86, + "grad_norm": 2.868473529815674, + "learning_rate": 8.85748743718593e-06, + "loss": 0.0757, + "step": 11875 + }, + { + "epoch": 8.87, + "grad_norm": 2.245260238647461, + "learning_rate": 8.85497487437186e-06, + "loss": 0.0753, + "step": 11900 + }, + { + "epoch": 8.89, + "grad_norm": 2.7010350227355957, + "learning_rate": 8.85246231155779e-06, + "loss": 0.0744, + "step": 11925 + }, + { + "epoch": 8.91, + "grad_norm": 2.7405307292938232, + "learning_rate": 8.849949748743719e-06, + "loss": 0.0796, + "step": 11950 + }, + { + "epoch": 8.93, + "grad_norm": 2.9547982215881348, + "learning_rate": 8.84743718592965e-06, + "loss": 0.0756, + "step": 11975 + }, + { + "epoch": 8.95, + "grad_norm": 3.142212390899658, + "learning_rate": 8.844924623115578e-06, + "loss": 0.075, + "step": 12000 + }, + { + "epoch": 8.95, + "eval_loss": 0.10038630664348602, + "eval_runtime": 876.3963, + "eval_samples_per_second": 1.632, + "eval_steps_per_second": 1.632, + "eval_wer": 44.104766717197236, + "step": 12000 + }, + { + "epoch": 8.97, + "grad_norm": 2.5948305130004883, + "learning_rate": 8.842412060301509e-06, + "loss": 0.0753, + "step": 12025 + }, + { + "epoch": 8.99, + "grad_norm": 2.885580062866211, + "learning_rate": 8.839899497487438e-06, + "loss": 0.0767, + "step": 12050 + }, + { + "epoch": 9.0, + "grad_norm": 2.7916576862335205, + "learning_rate": 8.837386934673367e-06, + "loss": 0.0718, + "step": 12075 + }, + { + "epoch": 9.02, + "grad_norm": 2.3431386947631836, + "learning_rate": 8.834874371859298e-06, + "loss": 0.0672, + "step": 12100 + }, + { + "epoch": 9.04, + "grad_norm": 3.5959362983703613, + "learning_rate": 8.832361809045228e-06, + "loss": 0.0641, + "step": 12125 + }, + { + "epoch": 9.06, + "grad_norm": 2.409857988357544, + "learning_rate": 8.829849246231157e-06, + "loss": 0.0683, + "step": 12150 + }, + { + "epoch": 9.08, + "grad_norm": 2.503124475479126, + "learning_rate": 8.827336683417086e-06, + "loss": 0.0636, + "step": 12175 + }, + { + "epoch": 9.1, + "grad_norm": 2.3510255813598633, + "learning_rate": 8.824824120603015e-06, + "loss": 0.0647, + "step": 12200 + }, + { + "epoch": 9.12, + "grad_norm": 2.891695976257324, + "learning_rate": 8.822311557788945e-06, + "loss": 0.0679, + "step": 12225 + }, + { + "epoch": 9.13, + "grad_norm": 2.93817400932312, + "learning_rate": 8.819798994974876e-06, + "loss": 0.0666, + "step": 12250 + }, + { + "epoch": 9.15, + "grad_norm": 2.7336504459381104, + "learning_rate": 8.817286432160803e-06, + "loss": 0.0675, + "step": 12275 + }, + { + "epoch": 9.17, + "grad_norm": 2.4548795223236084, + "learning_rate": 8.814773869346734e-06, + "loss": 0.0667, + "step": 12300 + }, + { + "epoch": 9.19, + "grad_norm": 2.895308494567871, + "learning_rate": 8.812261306532664e-06, + "loss": 0.0666, + "step": 12325 + }, + { + "epoch": 9.21, + "grad_norm": 2.789829969406128, + "learning_rate": 8.809748743718593e-06, + "loss": 0.0675, + "step": 12350 + }, + { + "epoch": 9.23, + "grad_norm": 3.011406183242798, + "learning_rate": 8.807236180904524e-06, + "loss": 0.0694, + "step": 12375 + }, + { + "epoch": 9.25, + "grad_norm": 2.8584184646606445, + "learning_rate": 8.804723618090453e-06, + "loss": 0.0664, + "step": 12400 + }, + { + "epoch": 9.27, + "grad_norm": 2.9399349689483643, + "learning_rate": 8.802211055276383e-06, + "loss": 0.0682, + "step": 12425 + }, + { + "epoch": 9.28, + "grad_norm": 2.5611989498138428, + "learning_rate": 8.799698492462312e-06, + "loss": 0.0682, + "step": 12450 + }, + { + "epoch": 9.3, + "grad_norm": 2.7409863471984863, + "learning_rate": 8.797185929648241e-06, + "loss": 0.0663, + "step": 12475 + }, + { + "epoch": 9.32, + "grad_norm": 2.620455265045166, + "learning_rate": 8.79467336683417e-06, + "loss": 0.0695, + "step": 12500 + }, + { + "epoch": 9.34, + "grad_norm": 2.643861770629883, + "learning_rate": 8.792160804020102e-06, + "loss": 0.0714, + "step": 12525 + }, + { + "epoch": 9.36, + "grad_norm": 2.566765546798706, + "learning_rate": 8.789648241206031e-06, + "loss": 0.0653, + "step": 12550 + }, + { + "epoch": 9.38, + "grad_norm": 3.4304237365722656, + "learning_rate": 8.78713567839196e-06, + "loss": 0.0689, + "step": 12575 + }, + { + "epoch": 9.4, + "grad_norm": 2.5509471893310547, + "learning_rate": 8.78462311557789e-06, + "loss": 0.0658, + "step": 12600 + }, + { + "epoch": 9.41, + "grad_norm": 3.1590113639831543, + "learning_rate": 8.782110552763819e-06, + "loss": 0.0682, + "step": 12625 + }, + { + "epoch": 9.43, + "grad_norm": 2.5462427139282227, + "learning_rate": 8.77959798994975e-06, + "loss": 0.0685, + "step": 12650 + }, + { + "epoch": 9.45, + "grad_norm": 2.6460676193237305, + "learning_rate": 8.77708542713568e-06, + "loss": 0.0667, + "step": 12675 + }, + { + "epoch": 9.47, + "grad_norm": 3.086369037628174, + "learning_rate": 8.774572864321609e-06, + "loss": 0.0668, + "step": 12700 + }, + { + "epoch": 9.49, + "grad_norm": 2.6479835510253906, + "learning_rate": 8.77206030150754e-06, + "loss": 0.0673, + "step": 12725 + }, + { + "epoch": 9.51, + "grad_norm": 2.575604200363159, + "learning_rate": 8.769547738693467e-06, + "loss": 0.0688, + "step": 12750 + }, + { + "epoch": 9.53, + "grad_norm": 2.5485081672668457, + "learning_rate": 8.767035175879398e-06, + "loss": 0.0664, + "step": 12775 + }, + { + "epoch": 9.55, + "grad_norm": 2.862621545791626, + "learning_rate": 8.764522613065328e-06, + "loss": 0.0685, + "step": 12800 + }, + { + "epoch": 9.56, + "grad_norm": 2.7342214584350586, + "learning_rate": 8.762010050251257e-06, + "loss": 0.0685, + "step": 12825 + }, + { + "epoch": 9.58, + "grad_norm": 2.5740914344787598, + "learning_rate": 8.759497487437186e-06, + "loss": 0.068, + "step": 12850 + }, + { + "epoch": 9.6, + "grad_norm": 2.610764980316162, + "learning_rate": 8.756984924623116e-06, + "loss": 0.0702, + "step": 12875 + }, + { + "epoch": 9.62, + "grad_norm": 3.1418581008911133, + "learning_rate": 8.754472361809045e-06, + "loss": 0.0685, + "step": 12900 + }, + { + "epoch": 9.64, + "grad_norm": 2.6357016563415527, + "learning_rate": 8.751959798994976e-06, + "loss": 0.0668, + "step": 12925 + }, + { + "epoch": 9.66, + "grad_norm": 2.9498605728149414, + "learning_rate": 8.749447236180905e-06, + "loss": 0.069, + "step": 12950 + }, + { + "epoch": 9.68, + "grad_norm": 3.4358696937561035, + "learning_rate": 8.746934673366835e-06, + "loss": 0.0683, + "step": 12975 + }, + { + "epoch": 9.69, + "grad_norm": 2.851529598236084, + "learning_rate": 8.744422110552766e-06, + "loss": 0.0705, + "step": 13000 + }, + { + "epoch": 9.69, + "eval_loss": 0.10326718538999557, + "eval_runtime": 876.2388, + "eval_samples_per_second": 1.632, + "eval_steps_per_second": 1.632, + "eval_wer": 42.65622368199427, + "step": 13000 + }, + { + "epoch": 9.71, + "grad_norm": 3.0542941093444824, + "learning_rate": 8.741909547738693e-06, + "loss": 0.0694, + "step": 13025 + }, + { + "epoch": 9.73, + "grad_norm": 3.1901473999023438, + "learning_rate": 8.739396984924624e-06, + "loss": 0.0688, + "step": 13050 + }, + { + "epoch": 9.75, + "grad_norm": 2.64933443069458, + "learning_rate": 8.736884422110554e-06, + "loss": 0.0678, + "step": 13075 + }, + { + "epoch": 9.77, + "grad_norm": 3.004073143005371, + "learning_rate": 8.734371859296483e-06, + "loss": 0.071, + "step": 13100 + }, + { + "epoch": 9.79, + "grad_norm": 2.6631827354431152, + "learning_rate": 8.731859296482412e-06, + "loss": 0.0687, + "step": 13125 + }, + { + "epoch": 9.81, + "grad_norm": 2.720034599304199, + "learning_rate": 8.729346733668342e-06, + "loss": 0.069, + "step": 13150 + }, + { + "epoch": 9.82, + "grad_norm": 3.5523383617401123, + "learning_rate": 8.726834170854273e-06, + "loss": 0.0683, + "step": 13175 + }, + { + "epoch": 9.84, + "grad_norm": 2.6626040935516357, + "learning_rate": 8.724321608040202e-06, + "loss": 0.0702, + "step": 13200 + }, + { + "epoch": 9.86, + "grad_norm": 3.047654867172241, + "learning_rate": 8.721809045226131e-06, + "loss": 0.0682, + "step": 13225 + }, + { + "epoch": 9.88, + "grad_norm": 2.5979316234588623, + "learning_rate": 8.71929648241206e-06, + "loss": 0.0676, + "step": 13250 + }, + { + "epoch": 9.9, + "grad_norm": 3.4393625259399414, + "learning_rate": 8.716783919597992e-06, + "loss": 0.0673, + "step": 13275 + }, + { + "epoch": 9.92, + "grad_norm": 3.3265433311462402, + "learning_rate": 8.71427135678392e-06, + "loss": 0.0671, + "step": 13300 + }, + { + "epoch": 9.94, + "grad_norm": 2.559882164001465, + "learning_rate": 8.71175879396985e-06, + "loss": 0.0669, + "step": 13325 + }, + { + "epoch": 9.96, + "grad_norm": 2.55971097946167, + "learning_rate": 8.70924623115578e-06, + "loss": 0.0687, + "step": 13350 + }, + { + "epoch": 9.97, + "grad_norm": 3.147305488586426, + "learning_rate": 8.706733668341709e-06, + "loss": 0.0718, + "step": 13375 + }, + { + "epoch": 9.99, + "grad_norm": 2.222311496734619, + "learning_rate": 8.70422110552764e-06, + "loss": 0.0678, + "step": 13400 + }, + { + "epoch": 10.01, + "grad_norm": 3.410762310028076, + "learning_rate": 8.701708542713568e-06, + "loss": 0.0636, + "step": 13425 + }, + { + "epoch": 10.03, + "grad_norm": 3.420356512069702, + "learning_rate": 8.699195979899499e-06, + "loss": 0.0592, + "step": 13450 + }, + { + "epoch": 10.05, + "grad_norm": 2.3900392055511475, + "learning_rate": 8.696683417085428e-06, + "loss": 0.0571, + "step": 13475 + }, + { + "epoch": 10.07, + "grad_norm": 2.5646119117736816, + "learning_rate": 8.694170854271357e-06, + "loss": 0.0606, + "step": 13500 + }, + { + "epoch": 10.09, + "grad_norm": 2.888441801071167, + "learning_rate": 8.691658291457287e-06, + "loss": 0.057, + "step": 13525 + }, + { + "epoch": 10.1, + "grad_norm": 2.1263973712921143, + "learning_rate": 8.689145728643218e-06, + "loss": 0.0588, + "step": 13550 + }, + { + "epoch": 10.12, + "grad_norm": 2.61118221282959, + "learning_rate": 8.686633165829147e-06, + "loss": 0.0601, + "step": 13575 + }, + { + "epoch": 10.14, + "grad_norm": 2.882094621658325, + "learning_rate": 8.684120603015076e-06, + "loss": 0.0593, + "step": 13600 + }, + { + "epoch": 10.16, + "grad_norm": 2.899109363555908, + "learning_rate": 8.681608040201006e-06, + "loss": 0.0611, + "step": 13625 + }, + { + "epoch": 10.18, + "grad_norm": 2.828953504562378, + "learning_rate": 8.679095477386935e-06, + "loss": 0.0598, + "step": 13650 + }, + { + "epoch": 10.2, + "grad_norm": 2.8404812812805176, + "learning_rate": 8.676582914572866e-06, + "loss": 0.0591, + "step": 13675 + }, + { + "epoch": 10.22, + "grad_norm": 2.5158917903900146, + "learning_rate": 8.674070351758794e-06, + "loss": 0.0595, + "step": 13700 + }, + { + "epoch": 10.23, + "grad_norm": 3.024646282196045, + "learning_rate": 8.671557788944725e-06, + "loss": 0.0585, + "step": 13725 + }, + { + "epoch": 10.25, + "grad_norm": 2.863865852355957, + "learning_rate": 8.669045226130654e-06, + "loss": 0.0597, + "step": 13750 + }, + { + "epoch": 10.27, + "grad_norm": 2.6605565547943115, + "learning_rate": 8.666532663316583e-06, + "loss": 0.0608, + "step": 13775 + }, + { + "epoch": 10.29, + "grad_norm": 2.5665063858032227, + "learning_rate": 8.664020100502514e-06, + "loss": 0.057, + "step": 13800 + }, + { + "epoch": 10.31, + "grad_norm": 2.745556116104126, + "learning_rate": 8.661507537688444e-06, + "loss": 0.0585, + "step": 13825 + }, + { + "epoch": 10.33, + "grad_norm": 3.113863706588745, + "learning_rate": 8.658994974874373e-06, + "loss": 0.0583, + "step": 13850 + }, + { + "epoch": 10.35, + "grad_norm": 2.9757370948791504, + "learning_rate": 8.656482412060302e-06, + "loss": 0.0624, + "step": 13875 + }, + { + "epoch": 10.37, + "grad_norm": 2.6034061908721924, + "learning_rate": 8.653969849246231e-06, + "loss": 0.0575, + "step": 13900 + }, + { + "epoch": 10.38, + "grad_norm": 3.006136894226074, + "learning_rate": 8.65145728643216e-06, + "loss": 0.0586, + "step": 13925 + }, + { + "epoch": 10.4, + "grad_norm": 2.6489036083221436, + "learning_rate": 8.648944723618092e-06, + "loss": 0.0627, + "step": 13950 + }, + { + "epoch": 10.42, + "grad_norm": 2.67799711227417, + "learning_rate": 8.64643216080402e-06, + "loss": 0.0594, + "step": 13975 + }, + { + "epoch": 10.44, + "grad_norm": 3.2471539974212646, + "learning_rate": 8.64391959798995e-06, + "loss": 0.0619, + "step": 14000 + }, + { + "epoch": 10.44, + "eval_loss": 0.1073550432920456, + "eval_runtime": 880.6821, + "eval_samples_per_second": 1.624, + "eval_steps_per_second": 1.624, + "eval_wer": 43.00993767896244, + "step": 14000 + }, + { + "epoch": 10.46, + "grad_norm": 2.6475391387939453, + "learning_rate": 8.64140703517588e-06, + "loss": 0.0592, + "step": 14025 + }, + { + "epoch": 10.48, + "grad_norm": 3.1630160808563232, + "learning_rate": 8.638894472361809e-06, + "loss": 0.0631, + "step": 14050 + }, + { + "epoch": 10.5, + "grad_norm": 2.6944804191589355, + "learning_rate": 8.63638190954774e-06, + "loss": 0.0595, + "step": 14075 + }, + { + "epoch": 10.51, + "grad_norm": 2.741638422012329, + "learning_rate": 8.63386934673367e-06, + "loss": 0.0605, + "step": 14100 + }, + { + "epoch": 10.53, + "grad_norm": 3.0575332641601562, + "learning_rate": 8.631356783919599e-06, + "loss": 0.0614, + "step": 14125 + }, + { + "epoch": 10.55, + "grad_norm": 2.904282569885254, + "learning_rate": 8.628844221105528e-06, + "loss": 0.0598, + "step": 14150 + }, + { + "epoch": 10.57, + "grad_norm": 2.4661941528320312, + "learning_rate": 8.626331658291457e-06, + "loss": 0.0618, + "step": 14175 + }, + { + "epoch": 10.59, + "grad_norm": 2.784205913543701, + "learning_rate": 8.623819095477388e-06, + "loss": 0.0622, + "step": 14200 + }, + { + "epoch": 10.61, + "grad_norm": 2.7724556922912598, + "learning_rate": 8.621306532663318e-06, + "loss": 0.0606, + "step": 14225 + }, + { + "epoch": 10.63, + "grad_norm": 2.6208887100219727, + "learning_rate": 8.618793969849247e-06, + "loss": 0.0605, + "step": 14250 + }, + { + "epoch": 10.65, + "grad_norm": 2.284531593322754, + "learning_rate": 8.61638190954774e-06, + "loss": 0.063, + "step": 14275 + }, + { + "epoch": 10.66, + "grad_norm": 2.889529228210449, + "learning_rate": 8.613869346733668e-06, + "loss": 0.0584, + "step": 14300 + }, + { + "epoch": 10.68, + "grad_norm": 3.0093719959259033, + "learning_rate": 8.611356783919599e-06, + "loss": 0.0617, + "step": 14325 + }, + { + "epoch": 10.7, + "grad_norm": 2.6686503887176514, + "learning_rate": 8.608844221105528e-06, + "loss": 0.0617, + "step": 14350 + }, + { + "epoch": 10.72, + "grad_norm": 2.6857852935791016, + "learning_rate": 8.606331658291458e-06, + "loss": 0.0593, + "step": 14375 + }, + { + "epoch": 10.74, + "grad_norm": 2.3685672283172607, + "learning_rate": 8.603819095477389e-06, + "loss": 0.0594, + "step": 14400 + }, + { + "epoch": 10.76, + "grad_norm": 2.7583415508270264, + "learning_rate": 8.601306532663318e-06, + "loss": 0.0622, + "step": 14425 + }, + { + "epoch": 10.78, + "grad_norm": 3.1092944145202637, + "learning_rate": 8.598793969849247e-06, + "loss": 0.0596, + "step": 14450 + }, + { + "epoch": 10.79, + "grad_norm": 2.64941668510437, + "learning_rate": 8.596281407035177e-06, + "loss": 0.0634, + "step": 14475 + }, + { + "epoch": 10.81, + "grad_norm": 2.8536019325256348, + "learning_rate": 8.593768844221106e-06, + "loss": 0.061, + "step": 14500 + }, + { + "epoch": 10.83, + "grad_norm": 2.9746649265289307, + "learning_rate": 8.591256281407035e-06, + "loss": 0.0625, + "step": 14525 + }, + { + "epoch": 10.85, + "grad_norm": 2.7053146362304688, + "learning_rate": 8.588743718592966e-06, + "loss": 0.0607, + "step": 14550 + }, + { + "epoch": 10.87, + "grad_norm": 2.8970115184783936, + "learning_rate": 8.586231155778894e-06, + "loss": 0.0608, + "step": 14575 + }, + { + "epoch": 10.89, + "grad_norm": 2.5786519050598145, + "learning_rate": 8.583718592964825e-06, + "loss": 0.0604, + "step": 14600 + }, + { + "epoch": 10.91, + "grad_norm": 2.6070234775543213, + "learning_rate": 8.581206030150754e-06, + "loss": 0.0614, + "step": 14625 + }, + { + "epoch": 10.92, + "grad_norm": 3.1030519008636475, + "learning_rate": 8.578693467336684e-06, + "loss": 0.0625, + "step": 14650 + }, + { + "epoch": 10.94, + "grad_norm": 2.8987107276916504, + "learning_rate": 8.576180904522615e-06, + "loss": 0.0624, + "step": 14675 + }, + { + "epoch": 10.96, + "grad_norm": 2.558865785598755, + "learning_rate": 8.573668341708544e-06, + "loss": 0.0621, + "step": 14700 + }, + { + "epoch": 10.98, + "grad_norm": 3.0490760803222656, + "learning_rate": 8.571155778894473e-06, + "loss": 0.0598, + "step": 14725 + }, + { + "epoch": 11.0, + "grad_norm": 2.8807432651519775, + "learning_rate": 8.568643216080403e-06, + "loss": 0.0595, + "step": 14750 + }, + { + "epoch": 11.02, + "grad_norm": 2.630389451980591, + "learning_rate": 8.566130653266332e-06, + "loss": 0.0514, + "step": 14775 + }, + { + "epoch": 11.04, + "grad_norm": 2.4784369468688965, + "learning_rate": 8.563618090452263e-06, + "loss": 0.0513, + "step": 14800 + }, + { + "epoch": 11.06, + "grad_norm": 2.2122538089752197, + "learning_rate": 8.561105527638192e-06, + "loss": 0.0504, + "step": 14825 + }, + { + "epoch": 11.07, + "grad_norm": 2.8626670837402344, + "learning_rate": 8.558592964824122e-06, + "loss": 0.0518, + "step": 14850 + }, + { + "epoch": 11.09, + "grad_norm": 2.6000030040740967, + "learning_rate": 8.556080402010051e-06, + "loss": 0.0513, + "step": 14875 + }, + { + "epoch": 11.11, + "grad_norm": 2.769547462463379, + "learning_rate": 8.55356783919598e-06, + "loss": 0.0519, + "step": 14900 + }, + { + "epoch": 11.13, + "grad_norm": 2.722727060317993, + "learning_rate": 8.55105527638191e-06, + "loss": 0.053, + "step": 14925 + }, + { + "epoch": 11.15, + "grad_norm": 2.4627251625061035, + "learning_rate": 8.54854271356784e-06, + "loss": 0.0531, + "step": 14950 + }, + { + "epoch": 11.17, + "grad_norm": 3.0005409717559814, + "learning_rate": 8.54603015075377e-06, + "loss": 0.0511, + "step": 14975 + }, + { + "epoch": 11.19, + "grad_norm": 2.3783016204833984, + "learning_rate": 8.5435175879397e-06, + "loss": 0.0536, + "step": 15000 + }, + { + "epoch": 11.19, + "eval_loss": 0.11192228645086288, + "eval_runtime": 873.8668, + "eval_samples_per_second": 1.636, + "eval_steps_per_second": 1.636, + "eval_wer": 46.90921340744484, + "step": 15000 + }, + { + "epoch": 11.2, + "grad_norm": 2.7064828872680664, + "learning_rate": 8.54100502512563e-06, + "loss": 0.0495, + "step": 15025 + }, + { + "epoch": 11.22, + "grad_norm": 2.5048270225524902, + "learning_rate": 8.538492462311558e-06, + "loss": 0.0515, + "step": 15050 + }, + { + "epoch": 11.24, + "grad_norm": 3.313308000564575, + "learning_rate": 8.535979899497489e-06, + "loss": 0.0509, + "step": 15075 + }, + { + "epoch": 11.26, + "grad_norm": 2.4727749824523926, + "learning_rate": 8.533467336683418e-06, + "loss": 0.0541, + "step": 15100 + }, + { + "epoch": 11.28, + "grad_norm": 2.974033832550049, + "learning_rate": 8.530954773869347e-06, + "loss": 0.0523, + "step": 15125 + }, + { + "epoch": 11.3, + "grad_norm": 2.612154722213745, + "learning_rate": 8.528442211055277e-06, + "loss": 0.054, + "step": 15150 + }, + { + "epoch": 11.32, + "grad_norm": 2.6749489307403564, + "learning_rate": 8.525929648241206e-06, + "loss": 0.052, + "step": 15175 + }, + { + "epoch": 11.33, + "grad_norm": 2.481614589691162, + "learning_rate": 8.523417085427135e-06, + "loss": 0.0534, + "step": 15200 + }, + { + "epoch": 11.35, + "grad_norm": 2.6133806705474854, + "learning_rate": 8.520904522613066e-06, + "loss": 0.0505, + "step": 15225 + }, + { + "epoch": 11.37, + "grad_norm": 2.948282480239868, + "learning_rate": 8.518391959798996e-06, + "loss": 0.0545, + "step": 15250 + }, + { + "epoch": 11.39, + "grad_norm": 2.5260355472564697, + "learning_rate": 8.515879396984925e-06, + "loss": 0.0547, + "step": 15275 + }, + { + "epoch": 11.41, + "grad_norm": 2.566420555114746, + "learning_rate": 8.513366834170856e-06, + "loss": 0.0552, + "step": 15300 + }, + { + "epoch": 11.43, + "grad_norm": 3.0955796241760254, + "learning_rate": 8.510854271356784e-06, + "loss": 0.0545, + "step": 15325 + }, + { + "epoch": 11.45, + "grad_norm": 3.0555806159973145, + "learning_rate": 8.508341708542715e-06, + "loss": 0.0524, + "step": 15350 + }, + { + "epoch": 11.47, + "grad_norm": 2.456239700317383, + "learning_rate": 8.505829145728644e-06, + "loss": 0.054, + "step": 15375 + }, + { + "epoch": 11.48, + "grad_norm": 2.8665714263916016, + "learning_rate": 8.503316582914573e-06, + "loss": 0.053, + "step": 15400 + }, + { + "epoch": 11.5, + "grad_norm": 2.9251291751861572, + "learning_rate": 8.500804020100504e-06, + "loss": 0.0546, + "step": 15425 + }, + { + "epoch": 11.52, + "grad_norm": 2.7795040607452393, + "learning_rate": 8.498291457286432e-06, + "loss": 0.0554, + "step": 15450 + }, + { + "epoch": 11.54, + "grad_norm": 2.5011777877807617, + "learning_rate": 8.495778894472363e-06, + "loss": 0.0538, + "step": 15475 + }, + { + "epoch": 11.56, + "grad_norm": 2.7621777057647705, + "learning_rate": 8.493266331658292e-06, + "loss": 0.055, + "step": 15500 + }, + { + "epoch": 11.58, + "grad_norm": 2.877533435821533, + "learning_rate": 8.490753768844222e-06, + "loss": 0.0548, + "step": 15525 + }, + { + "epoch": 11.6, + "grad_norm": 2.7816832065582275, + "learning_rate": 8.488241206030151e-06, + "loss": 0.0535, + "step": 15550 + }, + { + "epoch": 11.61, + "grad_norm": 2.6336607933044434, + "learning_rate": 8.485728643216082e-06, + "loss": 0.0536, + "step": 15575 + }, + { + "epoch": 11.63, + "grad_norm": 2.8519816398620605, + "learning_rate": 8.48321608040201e-06, + "loss": 0.054, + "step": 15600 + }, + { + "epoch": 11.65, + "grad_norm": 2.3937814235687256, + "learning_rate": 8.48070351758794e-06, + "loss": 0.0522, + "step": 15625 + }, + { + "epoch": 11.67, + "grad_norm": 3.410144567489624, + "learning_rate": 8.47819095477387e-06, + "loss": 0.0525, + "step": 15650 + }, + { + "epoch": 11.69, + "grad_norm": 2.5799050331115723, + "learning_rate": 8.4756783919598e-06, + "loss": 0.0544, + "step": 15675 + }, + { + "epoch": 11.71, + "grad_norm": 2.68683123588562, + "learning_rate": 8.47316582914573e-06, + "loss": 0.0531, + "step": 15700 + }, + { + "epoch": 11.73, + "grad_norm": 2.3904669284820557, + "learning_rate": 8.470653266331658e-06, + "loss": 0.0526, + "step": 15725 + }, + { + "epoch": 11.74, + "grad_norm": 2.722712993621826, + "learning_rate": 8.468140703517589e-06, + "loss": 0.0536, + "step": 15750 + }, + { + "epoch": 11.76, + "grad_norm": 3.117938756942749, + "learning_rate": 8.465628140703518e-06, + "loss": 0.0528, + "step": 15775 + }, + { + "epoch": 11.78, + "grad_norm": 2.675419807434082, + "learning_rate": 8.463115577889448e-06, + "loss": 0.054, + "step": 15800 + }, + { + "epoch": 11.8, + "grad_norm": 2.925652265548706, + "learning_rate": 8.460603015075377e-06, + "loss": 0.0547, + "step": 15825 + }, + { + "epoch": 11.82, + "grad_norm": 2.883999824523926, + "learning_rate": 8.458090452261308e-06, + "loss": 0.0557, + "step": 15850 + }, + { + "epoch": 11.84, + "grad_norm": 3.167105197906494, + "learning_rate": 8.455577889447237e-06, + "loss": 0.0556, + "step": 15875 + }, + { + "epoch": 11.86, + "grad_norm": 2.4618799686431885, + "learning_rate": 8.453065326633167e-06, + "loss": 0.0523, + "step": 15900 + }, + { + "epoch": 11.88, + "grad_norm": 2.8751628398895264, + "learning_rate": 8.450552763819096e-06, + "loss": 0.0549, + "step": 15925 + }, + { + "epoch": 11.89, + "grad_norm": 2.357063055038452, + "learning_rate": 8.448040201005025e-06, + "loss": 0.0524, + "step": 15950 + }, + { + "epoch": 11.91, + "grad_norm": 2.768460273742676, + "learning_rate": 8.445527638190956e-06, + "loss": 0.0524, + "step": 15975 + }, + { + "epoch": 11.93, + "grad_norm": 2.936572551727295, + "learning_rate": 8.443015075376884e-06, + "loss": 0.0549, + "step": 16000 + }, + { + "epoch": 11.93, + "eval_loss": 0.11203037202358246, + "eval_runtime": 876.4477, + "eval_samples_per_second": 1.632, + "eval_steps_per_second": 1.632, + "eval_wer": 44.525854808826004, + "step": 16000 + }, + { + "epoch": 11.95, + "grad_norm": 2.615638256072998, + "learning_rate": 8.440502512562815e-06, + "loss": 0.0531, + "step": 16025 + }, + { + "epoch": 11.97, + "grad_norm": 2.4253170490264893, + "learning_rate": 8.437989949748744e-06, + "loss": 0.0515, + "step": 16050 + }, + { + "epoch": 11.99, + "grad_norm": 2.9026854038238525, + "learning_rate": 8.435477386934674e-06, + "loss": 0.054, + "step": 16075 + }, + { + "epoch": 12.01, + "grad_norm": 3.067901611328125, + "learning_rate": 8.432964824120605e-06, + "loss": 0.0529, + "step": 16100 + }, + { + "epoch": 12.02, + "grad_norm": 2.4910571575164795, + "learning_rate": 8.430452261306534e-06, + "loss": 0.0436, + "step": 16125 + }, + { + "epoch": 12.04, + "grad_norm": 2.241611957550049, + "learning_rate": 8.427939698492463e-06, + "loss": 0.0446, + "step": 16150 + }, + { + "epoch": 12.06, + "grad_norm": 2.8241424560546875, + "learning_rate": 8.425427135678393e-06, + "loss": 0.0452, + "step": 16175 + }, + { + "epoch": 12.08, + "grad_norm": 2.5876190662384033, + "learning_rate": 8.422914572864322e-06, + "loss": 0.0445, + "step": 16200 + }, + { + "epoch": 12.1, + "grad_norm": 2.204615592956543, + "learning_rate": 8.420402010050251e-06, + "loss": 0.0439, + "step": 16225 + }, + { + "epoch": 12.12, + "grad_norm": 2.666674852371216, + "learning_rate": 8.417889447236182e-06, + "loss": 0.0458, + "step": 16250 + }, + { + "epoch": 12.14, + "grad_norm": 3.2046256065368652, + "learning_rate": 8.415376884422112e-06, + "loss": 0.0443, + "step": 16275 + }, + { + "epoch": 12.16, + "grad_norm": 2.5278756618499756, + "learning_rate": 8.412864321608041e-06, + "loss": 0.0463, + "step": 16300 + }, + { + "epoch": 12.17, + "grad_norm": 2.957080364227295, + "learning_rate": 8.410452261306532e-06, + "loss": 0.0462, + "step": 16325 + }, + { + "epoch": 12.19, + "grad_norm": 2.471741199493408, + "learning_rate": 8.407939698492463e-06, + "loss": 0.0453, + "step": 16350 + }, + { + "epoch": 12.21, + "grad_norm": 3.0031425952911377, + "learning_rate": 8.405427135678393e-06, + "loss": 0.0465, + "step": 16375 + }, + { + "epoch": 12.23, + "grad_norm": 2.7527480125427246, + "learning_rate": 8.402914572864322e-06, + "loss": 0.0461, + "step": 16400 + }, + { + "epoch": 12.25, + "grad_norm": 2.517364740371704, + "learning_rate": 8.400402010050251e-06, + "loss": 0.0449, + "step": 16425 + }, + { + "epoch": 12.27, + "grad_norm": 2.8448832035064697, + "learning_rate": 8.39788944723618e-06, + "loss": 0.0449, + "step": 16450 + }, + { + "epoch": 12.29, + "grad_norm": 2.887563467025757, + "learning_rate": 8.395376884422112e-06, + "loss": 0.0462, + "step": 16475 + }, + { + "epoch": 12.3, + "grad_norm": 2.847557783126831, + "learning_rate": 8.392864321608041e-06, + "loss": 0.0452, + "step": 16500 + }, + { + "epoch": 12.32, + "grad_norm": 3.120497465133667, + "learning_rate": 8.39035175879397e-06, + "loss": 0.0445, + "step": 16525 + }, + { + "epoch": 12.34, + "grad_norm": 2.9528555870056152, + "learning_rate": 8.3878391959799e-06, + "loss": 0.0462, + "step": 16550 + }, + { + "epoch": 12.36, + "grad_norm": 2.6423401832580566, + "learning_rate": 8.38532663316583e-06, + "loss": 0.0451, + "step": 16575 + }, + { + "epoch": 12.38, + "grad_norm": 3.219329357147217, + "learning_rate": 8.382814070351758e-06, + "loss": 0.0467, + "step": 16600 + }, + { + "epoch": 12.4, + "grad_norm": 3.049623489379883, + "learning_rate": 8.38030150753769e-06, + "loss": 0.0485, + "step": 16625 + }, + { + "epoch": 12.42, + "grad_norm": 2.7645978927612305, + "learning_rate": 8.377788944723619e-06, + "loss": 0.0468, + "step": 16650 + }, + { + "epoch": 12.43, + "grad_norm": 2.7569730281829834, + "learning_rate": 8.375276381909548e-06, + "loss": 0.0464, + "step": 16675 + }, + { + "epoch": 12.45, + "grad_norm": 2.9958302974700928, + "learning_rate": 8.372763819095479e-06, + "loss": 0.045, + "step": 16700 + }, + { + "epoch": 12.47, + "grad_norm": 2.894927740097046, + "learning_rate": 8.370251256281407e-06, + "loss": 0.0475, + "step": 16725 + }, + { + "epoch": 12.49, + "grad_norm": 2.8208131790161133, + "learning_rate": 8.367738693467338e-06, + "loss": 0.0467, + "step": 16750 + }, + { + "epoch": 12.51, + "grad_norm": 3.0496814250946045, + "learning_rate": 8.365226130653267e-06, + "loss": 0.0489, + "step": 16775 + }, + { + "epoch": 12.53, + "grad_norm": 2.7081875801086426, + "learning_rate": 8.362713567839196e-06, + "loss": 0.0482, + "step": 16800 + }, + { + "epoch": 12.55, + "grad_norm": 3.1533963680267334, + "learning_rate": 8.360201005025126e-06, + "loss": 0.0457, + "step": 16825 + }, + { + "epoch": 12.57, + "grad_norm": 3.154571294784546, + "learning_rate": 8.357688442211057e-06, + "loss": 0.0449, + "step": 16850 + }, + { + "epoch": 12.58, + "grad_norm": 2.589935779571533, + "learning_rate": 8.355175879396986e-06, + "loss": 0.0459, + "step": 16875 + }, + { + "epoch": 12.6, + "grad_norm": 2.843365430831909, + "learning_rate": 8.352663316582915e-06, + "loss": 0.0467, + "step": 16900 + }, + { + "epoch": 12.62, + "grad_norm": 2.482649326324463, + "learning_rate": 8.350150753768845e-06, + "loss": 0.0469, + "step": 16925 + }, + { + "epoch": 12.64, + "grad_norm": 2.2285611629486084, + "learning_rate": 8.347638190954774e-06, + "loss": 0.0474, + "step": 16950 + }, + { + "epoch": 12.66, + "grad_norm": 2.874464511871338, + "learning_rate": 8.345125628140705e-06, + "loss": 0.047, + "step": 16975 + }, + { + "epoch": 12.68, + "grad_norm": 2.841932535171509, + "learning_rate": 8.342613065326633e-06, + "loss": 0.0488, + "step": 17000 + }, + { + "epoch": 12.68, + "eval_loss": 0.11660941690206528, + "eval_runtime": 878.3961, + "eval_samples_per_second": 1.628, + "eval_steps_per_second": 1.628, + "eval_wer": 45.048004042445676, + "step": 17000 + }, + { + "epoch": 12.7, + "grad_norm": 3.142643451690674, + "learning_rate": 8.340100502512564e-06, + "loss": 0.0467, + "step": 17025 + }, + { + "epoch": 12.71, + "grad_norm": 2.773329973220825, + "learning_rate": 8.337587939698493e-06, + "loss": 0.0487, + "step": 17050 + }, + { + "epoch": 12.73, + "grad_norm": 2.481163501739502, + "learning_rate": 8.335075376884422e-06, + "loss": 0.0471, + "step": 17075 + }, + { + "epoch": 12.75, + "grad_norm": 2.6322290897369385, + "learning_rate": 8.332562814070353e-06, + "loss": 0.0464, + "step": 17100 + }, + { + "epoch": 12.77, + "grad_norm": 2.888289213180542, + "learning_rate": 8.330050251256283e-06, + "loss": 0.0462, + "step": 17125 + }, + { + "epoch": 12.79, + "grad_norm": 3.1189942359924316, + "learning_rate": 8.327537688442212e-06, + "loss": 0.0472, + "step": 17150 + }, + { + "epoch": 12.81, + "grad_norm": 2.9474704265594482, + "learning_rate": 8.325025125628141e-06, + "loss": 0.0473, + "step": 17175 + }, + { + "epoch": 12.83, + "grad_norm": 2.949190616607666, + "learning_rate": 8.32251256281407e-06, + "loss": 0.0476, + "step": 17200 + }, + { + "epoch": 12.84, + "grad_norm": 2.976539373397827, + "learning_rate": 8.32e-06, + "loss": 0.0477, + "step": 17225 + }, + { + "epoch": 12.86, + "grad_norm": 2.8568050861358643, + "learning_rate": 8.317487437185931e-06, + "loss": 0.0487, + "step": 17250 + }, + { + "epoch": 12.88, + "grad_norm": 3.044435501098633, + "learning_rate": 8.31497487437186e-06, + "loss": 0.0477, + "step": 17275 + }, + { + "epoch": 12.9, + "grad_norm": 2.7982301712036133, + "learning_rate": 8.31246231155779e-06, + "loss": 0.0481, + "step": 17300 + }, + { + "epoch": 12.92, + "grad_norm": 3.075779914855957, + "learning_rate": 8.309949748743719e-06, + "loss": 0.0477, + "step": 17325 + }, + { + "epoch": 12.94, + "grad_norm": 2.6341068744659424, + "learning_rate": 8.307437185929648e-06, + "loss": 0.0471, + "step": 17350 + }, + { + "epoch": 12.96, + "grad_norm": 3.5678842067718506, + "learning_rate": 8.30492462311558e-06, + "loss": 0.0504, + "step": 17375 + }, + { + "epoch": 12.98, + "grad_norm": 2.4678618907928467, + "learning_rate": 8.302412060301509e-06, + "loss": 0.0488, + "step": 17400 + }, + { + "epoch": 12.99, + "grad_norm": 2.738983154296875, + "learning_rate": 8.299899497487438e-06, + "loss": 0.0483, + "step": 17425 + }, + { + "epoch": 13.01, + "grad_norm": 2.0846645832061768, + "learning_rate": 8.297386934673367e-06, + "loss": 0.0398, + "step": 17450 + }, + { + "epoch": 13.03, + "grad_norm": 2.4392218589782715, + "learning_rate": 8.294874371859297e-06, + "loss": 0.0371, + "step": 17475 + }, + { + "epoch": 13.05, + "grad_norm": 2.3937807083129883, + "learning_rate": 8.292361809045228e-06, + "loss": 0.0379, + "step": 17500 + }, + { + "epoch": 13.07, + "grad_norm": 2.584233283996582, + "learning_rate": 8.289849246231157e-06, + "loss": 0.0387, + "step": 17525 + }, + { + "epoch": 13.09, + "grad_norm": 2.5976123809814453, + "learning_rate": 8.287336683417086e-06, + "loss": 0.0392, + "step": 17550 + }, + { + "epoch": 13.11, + "grad_norm": 2.4918408393859863, + "learning_rate": 8.284824120603016e-06, + "loss": 0.0378, + "step": 17575 + }, + { + "epoch": 13.12, + "grad_norm": 2.4994266033172607, + "learning_rate": 8.282311557788945e-06, + "loss": 0.0381, + "step": 17600 + }, + { + "epoch": 13.14, + "grad_norm": 3.1311471462249756, + "learning_rate": 8.279798994974874e-06, + "loss": 0.0391, + "step": 17625 + }, + { + "epoch": 13.16, + "grad_norm": 2.6331562995910645, + "learning_rate": 8.277286432160805e-06, + "loss": 0.0393, + "step": 17650 + }, + { + "epoch": 13.18, + "grad_norm": 3.0139119625091553, + "learning_rate": 8.274773869346735e-06, + "loss": 0.0392, + "step": 17675 + }, + { + "epoch": 13.2, + "grad_norm": 2.712106943130493, + "learning_rate": 8.272261306532664e-06, + "loss": 0.0406, + "step": 17700 + }, + { + "epoch": 13.22, + "grad_norm": 2.958265781402588, + "learning_rate": 8.269748743718595e-06, + "loss": 0.0405, + "step": 17725 + }, + { + "epoch": 13.24, + "grad_norm": 2.717831611633301, + "learning_rate": 8.267236180904523e-06, + "loss": 0.0397, + "step": 17750 + }, + { + "epoch": 13.26, + "grad_norm": 2.927593231201172, + "learning_rate": 8.264723618090454e-06, + "loss": 0.0412, + "step": 17775 + }, + { + "epoch": 13.27, + "grad_norm": 2.675755739212036, + "learning_rate": 8.262211055276383e-06, + "loss": 0.0407, + "step": 17800 + }, + { + "epoch": 13.29, + "grad_norm": 2.620959520339966, + "learning_rate": 8.259698492462312e-06, + "loss": 0.0385, + "step": 17825 + }, + { + "epoch": 13.31, + "grad_norm": 2.6173691749572754, + "learning_rate": 8.257185929648242e-06, + "loss": 0.0396, + "step": 17850 + }, + { + "epoch": 13.33, + "grad_norm": 2.5244550704956055, + "learning_rate": 8.25467336683417e-06, + "loss": 0.0412, + "step": 17875 + }, + { + "epoch": 13.35, + "grad_norm": 3.040226697921753, + "learning_rate": 8.2521608040201e-06, + "loss": 0.04, + "step": 17900 + }, + { + "epoch": 13.37, + "grad_norm": 2.9432833194732666, + "learning_rate": 8.249648241206031e-06, + "loss": 0.0402, + "step": 17925 + }, + { + "epoch": 13.39, + "grad_norm": 2.435591459274292, + "learning_rate": 8.24713567839196e-06, + "loss": 0.04, + "step": 17950 + }, + { + "epoch": 13.4, + "grad_norm": 2.56547474861145, + "learning_rate": 8.24462311557789e-06, + "loss": 0.0402, + "step": 17975 + }, + { + "epoch": 13.42, + "grad_norm": 3.421908378601074, + "learning_rate": 8.24211055276382e-06, + "loss": 0.0418, + "step": 18000 + }, + { + "epoch": 13.42, + "eval_loss": 0.12045598775148392, + "eval_runtime": 878.8191, + "eval_samples_per_second": 1.627, + "eval_steps_per_second": 1.627, + "eval_wer": 44.887990567626744, + "step": 18000 + }, + { + "epoch": 13.44, + "grad_norm": 2.382695436477661, + "learning_rate": 8.239597989949748e-06, + "loss": 0.0405, + "step": 18025 + }, + { + "epoch": 13.46, + "grad_norm": 2.6586036682128906, + "learning_rate": 8.23708542713568e-06, + "loss": 0.041, + "step": 18050 + }, + { + "epoch": 13.48, + "grad_norm": 2.901378631591797, + "learning_rate": 8.234572864321609e-06, + "loss": 0.0408, + "step": 18075 + }, + { + "epoch": 13.5, + "grad_norm": 2.8789255619049072, + "learning_rate": 8.232060301507538e-06, + "loss": 0.0407, + "step": 18100 + }, + { + "epoch": 13.52, + "grad_norm": 2.6065218448638916, + "learning_rate": 8.229547738693467e-06, + "loss": 0.04, + "step": 18125 + }, + { + "epoch": 13.53, + "grad_norm": 2.7479989528656006, + "learning_rate": 8.227035175879397e-06, + "loss": 0.0416, + "step": 18150 + }, + { + "epoch": 13.55, + "grad_norm": 2.5820047855377197, + "learning_rate": 8.224522613065328e-06, + "loss": 0.0401, + "step": 18175 + }, + { + "epoch": 13.57, + "grad_norm": 2.7071280479431152, + "learning_rate": 8.222010050251257e-06, + "loss": 0.0411, + "step": 18200 + }, + { + "epoch": 13.59, + "grad_norm": 3.0302774906158447, + "learning_rate": 8.219497487437186e-06, + "loss": 0.0419, + "step": 18225 + }, + { + "epoch": 13.61, + "grad_norm": 2.518003463745117, + "learning_rate": 8.216984924623116e-06, + "loss": 0.0431, + "step": 18250 + }, + { + "epoch": 13.63, + "grad_norm": 2.947669744491577, + "learning_rate": 8.214472361809047e-06, + "loss": 0.0411, + "step": 18275 + }, + { + "epoch": 13.65, + "grad_norm": 2.497828960418701, + "learning_rate": 8.211959798994974e-06, + "loss": 0.0419, + "step": 18300 + }, + { + "epoch": 13.67, + "grad_norm": 2.551356554031372, + "learning_rate": 8.209447236180905e-06, + "loss": 0.041, + "step": 18325 + }, + { + "epoch": 13.68, + "grad_norm": 2.9529895782470703, + "learning_rate": 8.206934673366835e-06, + "loss": 0.0413, + "step": 18350 + }, + { + "epoch": 13.7, + "grad_norm": 2.5652713775634766, + "learning_rate": 8.204422110552764e-06, + "loss": 0.0429, + "step": 18375 + }, + { + "epoch": 13.72, + "grad_norm": 3.011899471282959, + "learning_rate": 8.201909547738695e-06, + "loss": 0.0433, + "step": 18400 + }, + { + "epoch": 13.74, + "grad_norm": 3.12748384475708, + "learning_rate": 8.199396984924623e-06, + "loss": 0.0413, + "step": 18425 + }, + { + "epoch": 13.76, + "grad_norm": 2.771794080734253, + "learning_rate": 8.196884422110554e-06, + "loss": 0.0424, + "step": 18450 + }, + { + "epoch": 13.78, + "grad_norm": 2.6435468196868896, + "learning_rate": 8.194371859296483e-06, + "loss": 0.0404, + "step": 18475 + }, + { + "epoch": 13.8, + "grad_norm": 3.0825629234313965, + "learning_rate": 8.191859296482412e-06, + "loss": 0.0416, + "step": 18500 + }, + { + "epoch": 13.81, + "grad_norm": 2.4663116931915283, + "learning_rate": 8.189346733668342e-06, + "loss": 0.0403, + "step": 18525 + }, + { + "epoch": 13.83, + "grad_norm": 2.4246201515197754, + "learning_rate": 8.186834170854273e-06, + "loss": 0.0413, + "step": 18550 + }, + { + "epoch": 13.85, + "grad_norm": 2.561300039291382, + "learning_rate": 8.184321608040202e-06, + "loss": 0.0425, + "step": 18575 + }, + { + "epoch": 13.87, + "grad_norm": 2.941437005996704, + "learning_rate": 8.181809045226131e-06, + "loss": 0.0406, + "step": 18600 + }, + { + "epoch": 13.89, + "grad_norm": 2.7284204959869385, + "learning_rate": 8.17929648241206e-06, + "loss": 0.0401, + "step": 18625 + }, + { + "epoch": 13.91, + "grad_norm": 2.472852945327759, + "learning_rate": 8.17678391959799e-06, + "loss": 0.0401, + "step": 18650 + }, + { + "epoch": 13.93, + "grad_norm": 2.8096718788146973, + "learning_rate": 8.174271356783921e-06, + "loss": 0.0405, + "step": 18675 + }, + { + "epoch": 13.94, + "grad_norm": 2.6405413150787354, + "learning_rate": 8.171758793969849e-06, + "loss": 0.0421, + "step": 18700 + }, + { + "epoch": 13.96, + "grad_norm": 2.62223482131958, + "learning_rate": 8.16924623115578e-06, + "loss": 0.0415, + "step": 18725 + }, + { + "epoch": 13.98, + "grad_norm": 2.9442789554595947, + "learning_rate": 8.166733668341709e-06, + "loss": 0.0404, + "step": 18750 + }, + { + "epoch": 14.0, + "grad_norm": 2.5075149536132812, + "learning_rate": 8.164221105527638e-06, + "loss": 0.0407, + "step": 18775 + }, + { + "epoch": 14.02, + "grad_norm": 2.4428842067718506, + "learning_rate": 8.16170854271357e-06, + "loss": 0.034, + "step": 18800 + }, + { + "epoch": 14.04, + "grad_norm": 2.307425022125244, + "learning_rate": 8.159195979899499e-06, + "loss": 0.0331, + "step": 18825 + }, + { + "epoch": 14.06, + "grad_norm": 2.5993454456329346, + "learning_rate": 8.156683417085428e-06, + "loss": 0.0335, + "step": 18850 + }, + { + "epoch": 14.08, + "grad_norm": 2.4332356452941895, + "learning_rate": 8.154170854271357e-06, + "loss": 0.0316, + "step": 18875 + }, + { + "epoch": 14.09, + "grad_norm": 2.3169617652893066, + "learning_rate": 8.151658291457287e-06, + "loss": 0.0333, + "step": 18900 + }, + { + "epoch": 14.11, + "grad_norm": 2.3596982955932617, + "learning_rate": 8.149145728643216e-06, + "loss": 0.032, + "step": 18925 + }, + { + "epoch": 14.13, + "grad_norm": 2.875915288925171, + "learning_rate": 8.146633165829147e-06, + "loss": 0.0342, + "step": 18950 + }, + { + "epoch": 14.15, + "grad_norm": 2.4446523189544678, + "learning_rate": 8.144120603015076e-06, + "loss": 0.0328, + "step": 18975 + }, + { + "epoch": 14.17, + "grad_norm": 2.6276607513427734, + "learning_rate": 8.141608040201006e-06, + "loss": 0.0353, + "step": 19000 + }, + { + "epoch": 14.17, + "eval_loss": 0.12785743176937103, + "eval_runtime": 866.567, + "eval_samples_per_second": 1.65, + "eval_steps_per_second": 1.65, + "eval_wer": 45.23328280276234, + "step": 19000 + }, + { + "epoch": 14.19, + "grad_norm": 2.6396186351776123, + "learning_rate": 8.139095477386935e-06, + "loss": 0.0336, + "step": 19025 + }, + { + "epoch": 14.21, + "grad_norm": 2.456615686416626, + "learning_rate": 8.136582914572864e-06, + "loss": 0.0346, + "step": 19050 + }, + { + "epoch": 14.22, + "grad_norm": 2.4193451404571533, + "learning_rate": 8.134070351758795e-06, + "loss": 0.0335, + "step": 19075 + }, + { + "epoch": 14.24, + "grad_norm": 2.589301347732544, + "learning_rate": 8.131557788944725e-06, + "loss": 0.0344, + "step": 19100 + }, + { + "epoch": 14.26, + "grad_norm": 2.819026470184326, + "learning_rate": 8.129045226130654e-06, + "loss": 0.0331, + "step": 19125 + }, + { + "epoch": 14.28, + "grad_norm": 3.079618215560913, + "learning_rate": 8.126532663316583e-06, + "loss": 0.0354, + "step": 19150 + }, + { + "epoch": 14.3, + "grad_norm": 2.9702916145324707, + "learning_rate": 8.124020100502513e-06, + "loss": 0.0342, + "step": 19175 + }, + { + "epoch": 14.32, + "grad_norm": 2.773499011993408, + "learning_rate": 8.121507537688444e-06, + "loss": 0.034, + "step": 19200 + }, + { + "epoch": 14.34, + "grad_norm": 2.756190538406372, + "learning_rate": 8.118994974874373e-06, + "loss": 0.0356, + "step": 19225 + }, + { + "epoch": 14.35, + "grad_norm": 2.0953781604766846, + "learning_rate": 8.116482412060302e-06, + "loss": 0.0337, + "step": 19250 + }, + { + "epoch": 14.37, + "grad_norm": 2.559443950653076, + "learning_rate": 8.113969849246232e-06, + "loss": 0.0343, + "step": 19275 + }, + { + "epoch": 14.39, + "grad_norm": 2.3746700286865234, + "learning_rate": 8.111457286432161e-06, + "loss": 0.034, + "step": 19300 + }, + { + "epoch": 14.41, + "grad_norm": 2.7488675117492676, + "learning_rate": 8.10894472361809e-06, + "loss": 0.0346, + "step": 19325 + }, + { + "epoch": 14.43, + "grad_norm": 2.894341468811035, + "learning_rate": 8.106432160804021e-06, + "loss": 0.0358, + "step": 19350 + }, + { + "epoch": 14.45, + "grad_norm": 2.5863711833953857, + "learning_rate": 8.10391959798995e-06, + "loss": 0.0364, + "step": 19375 + }, + { + "epoch": 14.47, + "grad_norm": 2.7974531650543213, + "learning_rate": 8.10140703517588e-06, + "loss": 0.0345, + "step": 19400 + }, + { + "epoch": 14.49, + "grad_norm": 2.56164813041687, + "learning_rate": 8.098894472361811e-06, + "loss": 0.036, + "step": 19425 + }, + { + "epoch": 14.5, + "grad_norm": 2.5223586559295654, + "learning_rate": 8.096381909547739e-06, + "loss": 0.036, + "step": 19450 + }, + { + "epoch": 14.52, + "grad_norm": 2.4472038745880127, + "learning_rate": 8.09386934673367e-06, + "loss": 0.0361, + "step": 19475 + }, + { + "epoch": 14.54, + "grad_norm": 2.5200254917144775, + "learning_rate": 8.091356783919599e-06, + "loss": 0.0337, + "step": 19500 + }, + { + "epoch": 14.56, + "grad_norm": 2.621685743331909, + "learning_rate": 8.088844221105528e-06, + "loss": 0.0348, + "step": 19525 + }, + { + "epoch": 14.58, + "grad_norm": 3.0153393745422363, + "learning_rate": 8.086331658291458e-06, + "loss": 0.0352, + "step": 19550 + }, + { + "epoch": 14.6, + "grad_norm": 2.689079999923706, + "learning_rate": 8.083819095477387e-06, + "loss": 0.0349, + "step": 19575 + }, + { + "epoch": 14.62, + "grad_norm": 2.801084518432617, + "learning_rate": 8.081306532663318e-06, + "loss": 0.0348, + "step": 19600 + }, + { + "epoch": 14.63, + "grad_norm": 2.7863168716430664, + "learning_rate": 8.078793969849247e-06, + "loss": 0.0368, + "step": 19625 + }, + { + "epoch": 14.65, + "grad_norm": 4.021426200866699, + "learning_rate": 8.076281407035177e-06, + "loss": 0.0369, + "step": 19650 + }, + { + "epoch": 14.67, + "grad_norm": 2.8181521892547607, + "learning_rate": 8.073768844221106e-06, + "loss": 0.0351, + "step": 19675 + }, + { + "epoch": 14.69, + "grad_norm": 3.030831813812256, + "learning_rate": 8.071256281407037e-06, + "loss": 0.0363, + "step": 19700 + }, + { + "epoch": 14.71, + "grad_norm": 2.4212806224823, + "learning_rate": 8.068743718592964e-06, + "loss": 0.0353, + "step": 19725 + }, + { + "epoch": 14.73, + "grad_norm": 2.6584081649780273, + "learning_rate": 8.066231155778895e-06, + "loss": 0.0356, + "step": 19750 + }, + { + "epoch": 14.75, + "grad_norm": 2.8886303901672363, + "learning_rate": 8.063718592964825e-06, + "loss": 0.0353, + "step": 19775 + }, + { + "epoch": 14.77, + "grad_norm": 3.4711225032806396, + "learning_rate": 8.061206030150754e-06, + "loss": 0.0352, + "step": 19800 + }, + { + "epoch": 14.78, + "grad_norm": 2.8529305458068848, + "learning_rate": 8.058693467336685e-06, + "loss": 0.0373, + "step": 19825 + }, + { + "epoch": 14.8, + "grad_norm": 2.8307113647460938, + "learning_rate": 8.056180904522613e-06, + "loss": 0.0367, + "step": 19850 + }, + { + "epoch": 14.82, + "grad_norm": 2.772686719894409, + "learning_rate": 8.053668341708544e-06, + "loss": 0.0354, + "step": 19875 + }, + { + "epoch": 14.84, + "grad_norm": 2.689441204071045, + "learning_rate": 8.051155778894473e-06, + "loss": 0.0361, + "step": 19900 + }, + { + "epoch": 14.86, + "grad_norm": 2.8578641414642334, + "learning_rate": 8.048643216080402e-06, + "loss": 0.0374, + "step": 19925 + }, + { + "epoch": 14.88, + "grad_norm": 2.540038824081421, + "learning_rate": 8.046130653266332e-06, + "loss": 0.0352, + "step": 19950 + }, + { + "epoch": 14.9, + "grad_norm": 2.837907314300537, + "learning_rate": 8.043618090452263e-06, + "loss": 0.0344, + "step": 19975 + }, + { + "epoch": 14.91, + "grad_norm": 3.3113791942596436, + "learning_rate": 8.04110552763819e-06, + "loss": 0.0359, + "step": 20000 + }, + { + "epoch": 14.91, + "eval_loss": 0.12943382561206818, + "eval_runtime": 871.2652, + "eval_samples_per_second": 1.641, + "eval_steps_per_second": 1.641, + "eval_wer": 45.70490146538656, + "step": 20000 + }, + { + "epoch": 14.93, + "grad_norm": 2.5073697566986084, + "learning_rate": 8.038592964824121e-06, + "loss": 0.0362, + "step": 20025 + }, + { + "epoch": 14.95, + "grad_norm": 2.6841866970062256, + "learning_rate": 8.03608040201005e-06, + "loss": 0.0375, + "step": 20050 + }, + { + "epoch": 14.97, + "grad_norm": 2.755596160888672, + "learning_rate": 8.03356783919598e-06, + "loss": 0.0357, + "step": 20075 + }, + { + "epoch": 14.99, + "grad_norm": 3.0566914081573486, + "learning_rate": 8.031055276381911e-06, + "loss": 0.0351, + "step": 20100 + }, + { + "epoch": 15.01, + "grad_norm": 2.398969888687134, + "learning_rate": 8.028542713567839e-06, + "loss": 0.0338, + "step": 20125 + }, + { + "epoch": 15.03, + "grad_norm": 2.3726375102996826, + "learning_rate": 8.02603015075377e-06, + "loss": 0.029, + "step": 20150 + }, + { + "epoch": 15.04, + "grad_norm": 2.5769636631011963, + "learning_rate": 8.023517587939699e-06, + "loss": 0.0277, + "step": 20175 + }, + { + "epoch": 15.06, + "grad_norm": 2.630457878112793, + "learning_rate": 8.021005025125628e-06, + "loss": 0.0284, + "step": 20200 + }, + { + "epoch": 15.08, + "grad_norm": 2.880613327026367, + "learning_rate": 8.01849246231156e-06, + "loss": 0.027, + "step": 20225 + }, + { + "epoch": 15.1, + "grad_norm": 2.649902105331421, + "learning_rate": 8.015979899497489e-06, + "loss": 0.0295, + "step": 20250 + }, + { + "epoch": 15.12, + "grad_norm": 2.4288432598114014, + "learning_rate": 8.013467336683418e-06, + "loss": 0.029, + "step": 20275 + }, + { + "epoch": 15.14, + "grad_norm": 2.623577117919922, + "learning_rate": 8.010954773869347e-06, + "loss": 0.0288, + "step": 20300 + }, + { + "epoch": 15.16, + "grad_norm": 2.706085681915283, + "learning_rate": 8.008542713567839e-06, + "loss": 0.0284, + "step": 20325 + }, + { + "epoch": 15.18, + "grad_norm": 2.5566787719726562, + "learning_rate": 8.00603015075377e-06, + "loss": 0.0282, + "step": 20350 + }, + { + "epoch": 15.19, + "grad_norm": 2.7475993633270264, + "learning_rate": 8.0035175879397e-06, + "loss": 0.0294, + "step": 20375 + }, + { + "epoch": 15.21, + "grad_norm": 2.7118611335754395, + "learning_rate": 8.001005025125629e-06, + "loss": 0.0286, + "step": 20400 + }, + { + "epoch": 15.23, + "grad_norm": 2.7689175605773926, + "learning_rate": 7.99849246231156e-06, + "loss": 0.0283, + "step": 20425 + }, + { + "epoch": 15.25, + "grad_norm": 2.6991918087005615, + "learning_rate": 7.995979899497487e-06, + "loss": 0.029, + "step": 20450 + }, + { + "epoch": 15.27, + "grad_norm": 2.4419167041778564, + "learning_rate": 7.993467336683418e-06, + "loss": 0.0293, + "step": 20475 + }, + { + "epoch": 15.29, + "grad_norm": 2.408665657043457, + "learning_rate": 7.990954773869348e-06, + "loss": 0.0291, + "step": 20500 + }, + { + "epoch": 15.31, + "grad_norm": 2.898989200592041, + "learning_rate": 7.988442211055277e-06, + "loss": 0.0288, + "step": 20525 + }, + { + "epoch": 15.32, + "grad_norm": 2.5297908782958984, + "learning_rate": 7.985929648241206e-06, + "loss": 0.0294, + "step": 20550 + }, + { + "epoch": 15.34, + "grad_norm": 2.8034379482269287, + "learning_rate": 7.983417085427137e-06, + "loss": 0.0294, + "step": 20575 + }, + { + "epoch": 15.36, + "grad_norm": 2.8478405475616455, + "learning_rate": 7.980904522613065e-06, + "loss": 0.0301, + "step": 20600 + }, + { + "epoch": 15.38, + "grad_norm": 2.7582578659057617, + "learning_rate": 7.978391959798996e-06, + "loss": 0.0297, + "step": 20625 + }, + { + "epoch": 15.4, + "grad_norm": 2.5621745586395264, + "learning_rate": 7.975879396984925e-06, + "loss": 0.0302, + "step": 20650 + }, + { + "epoch": 15.42, + "grad_norm": 2.894596815109253, + "learning_rate": 7.973366834170855e-06, + "loss": 0.0291, + "step": 20675 + }, + { + "epoch": 15.44, + "grad_norm": 2.725759267807007, + "learning_rate": 7.970854271356786e-06, + "loss": 0.0298, + "step": 20700 + }, + { + "epoch": 15.45, + "grad_norm": 2.462378740310669, + "learning_rate": 7.968341708542713e-06, + "loss": 0.0302, + "step": 20725 + }, + { + "epoch": 15.47, + "grad_norm": 2.789185047149658, + "learning_rate": 7.965829145728644e-06, + "loss": 0.0297, + "step": 20750 + }, + { + "epoch": 15.49, + "grad_norm": 2.587120532989502, + "learning_rate": 7.963316582914574e-06, + "loss": 0.0298, + "step": 20775 + }, + { + "epoch": 15.51, + "grad_norm": 2.54654598236084, + "learning_rate": 7.960804020100503e-06, + "loss": 0.031, + "step": 20800 + }, + { + "epoch": 15.53, + "grad_norm": 3.1608569622039795, + "learning_rate": 7.958291457286432e-06, + "loss": 0.0301, + "step": 20825 + }, + { + "epoch": 15.55, + "grad_norm": 2.4028961658477783, + "learning_rate": 7.955778894472363e-06, + "loss": 0.0299, + "step": 20850 + }, + { + "epoch": 15.57, + "grad_norm": 2.572965383529663, + "learning_rate": 7.953266331658293e-06, + "loss": 0.0307, + "step": 20875 + }, + { + "epoch": 15.59, + "grad_norm": 2.7725861072540283, + "learning_rate": 7.950753768844222e-06, + "loss": 0.03, + "step": 20900 + }, + { + "epoch": 15.6, + "grad_norm": 2.8942480087280273, + "learning_rate": 7.948241206030151e-06, + "loss": 0.0309, + "step": 20925 + }, + { + "epoch": 15.62, + "grad_norm": 2.417304754257202, + "learning_rate": 7.94572864321608e-06, + "loss": 0.0302, + "step": 20950 + }, + { + "epoch": 15.64, + "grad_norm": 2.3493833541870117, + "learning_rate": 7.943216080402011e-06, + "loss": 0.0299, + "step": 20975 + }, + { + "epoch": 15.66, + "grad_norm": 2.494300127029419, + "learning_rate": 7.940703517587939e-06, + "loss": 0.0301, + "step": 21000 + }, + { + "epoch": 15.66, + "eval_loss": 0.1375339776277542, + "eval_runtime": 871.9748, + "eval_samples_per_second": 1.64, + "eval_steps_per_second": 1.64, + "eval_wer": 46.37864241199259, + "step": 21000 + }, + { + "epoch": 15.68, + "grad_norm": 2.878296375274658, + "learning_rate": 7.93819095477387e-06, + "loss": 0.0298, + "step": 21025 + }, + { + "epoch": 15.7, + "grad_norm": 2.557225227355957, + "learning_rate": 7.9356783919598e-06, + "loss": 0.0302, + "step": 21050 + }, + { + "epoch": 15.72, + "grad_norm": 2.3407583236694336, + "learning_rate": 7.933165829145729e-06, + "loss": 0.0311, + "step": 21075 + }, + { + "epoch": 15.73, + "grad_norm": 2.975478172302246, + "learning_rate": 7.93065326633166e-06, + "loss": 0.0319, + "step": 21100 + }, + { + "epoch": 15.75, + "grad_norm": 2.8398232460021973, + "learning_rate": 7.928140703517589e-06, + "loss": 0.0312, + "step": 21125 + }, + { + "epoch": 15.77, + "grad_norm": 2.9499335289001465, + "learning_rate": 7.925628140703518e-06, + "loss": 0.0296, + "step": 21150 + }, + { + "epoch": 15.79, + "grad_norm": 2.2356555461883545, + "learning_rate": 7.923115577889448e-06, + "loss": 0.0308, + "step": 21175 + }, + { + "epoch": 15.81, + "grad_norm": 2.9444031715393066, + "learning_rate": 7.920603015075377e-06, + "loss": 0.0308, + "step": 21200 + }, + { + "epoch": 15.83, + "grad_norm": 2.7165870666503906, + "learning_rate": 7.918090452261306e-06, + "loss": 0.0318, + "step": 21225 + }, + { + "epoch": 15.85, + "grad_norm": 2.6576056480407715, + "learning_rate": 7.915577889447237e-06, + "loss": 0.0314, + "step": 21250 + }, + { + "epoch": 15.87, + "grad_norm": 3.351743221282959, + "learning_rate": 7.913065326633167e-06, + "loss": 0.0311, + "step": 21275 + }, + { + "epoch": 15.88, + "grad_norm": 2.8830385208129883, + "learning_rate": 7.910552763819096e-06, + "loss": 0.0322, + "step": 21300 + }, + { + "epoch": 15.9, + "grad_norm": 2.5103542804718018, + "learning_rate": 7.908040201005025e-06, + "loss": 0.031, + "step": 21325 + }, + { + "epoch": 15.92, + "grad_norm": 2.431440830230713, + "learning_rate": 7.905527638190955e-06, + "loss": 0.0312, + "step": 21350 + }, + { + "epoch": 15.94, + "grad_norm": 2.7950165271759033, + "learning_rate": 7.903015075376886e-06, + "loss": 0.0281, + "step": 21375 + }, + { + "epoch": 15.96, + "grad_norm": 2.9214999675750732, + "learning_rate": 7.900502512562815e-06, + "loss": 0.0305, + "step": 21400 + }, + { + "epoch": 15.98, + "grad_norm": 2.7640044689178467, + "learning_rate": 7.897989949748744e-06, + "loss": 0.0312, + "step": 21425 + }, + { + "epoch": 16.0, + "grad_norm": 2.8408172130584717, + "learning_rate": 7.895477386934674e-06, + "loss": 0.0304, + "step": 21450 + }, + { + "epoch": 16.01, + "grad_norm": 2.303056001663208, + "learning_rate": 7.892964824120603e-06, + "loss": 0.0266, + "step": 21475 + }, + { + "epoch": 16.03, + "grad_norm": 2.054548978805542, + "learning_rate": 7.890452261306534e-06, + "loss": 0.0223, + "step": 21500 + }, + { + "epoch": 16.05, + "grad_norm": 2.2544515132904053, + "learning_rate": 7.887939698492463e-06, + "loss": 0.0235, + "step": 21525 + }, + { + "epoch": 16.07, + "grad_norm": 2.3971099853515625, + "learning_rate": 7.885427135678393e-06, + "loss": 0.0239, + "step": 21550 + }, + { + "epoch": 16.09, + "grad_norm": 2.250056266784668, + "learning_rate": 7.882914572864322e-06, + "loss": 0.0241, + "step": 21575 + }, + { + "epoch": 16.11, + "grad_norm": 2.741617202758789, + "learning_rate": 7.880402010050251e-06, + "loss": 0.0239, + "step": 21600 + }, + { + "epoch": 16.13, + "grad_norm": 2.226234197616577, + "learning_rate": 7.87788944723618e-06, + "loss": 0.0232, + "step": 21625 + }, + { + "epoch": 16.14, + "grad_norm": 3.014906644821167, + "learning_rate": 7.875376884422112e-06, + "loss": 0.0237, + "step": 21650 + }, + { + "epoch": 16.16, + "grad_norm": 2.388359546661377, + "learning_rate": 7.872864321608041e-06, + "loss": 0.0253, + "step": 21675 + }, + { + "epoch": 16.18, + "grad_norm": 2.519435167312622, + "learning_rate": 7.87035175879397e-06, + "loss": 0.0242, + "step": 21700 + }, + { + "epoch": 16.2, + "grad_norm": 2.638505697250366, + "learning_rate": 7.867839195979901e-06, + "loss": 0.0262, + "step": 21725 + }, + { + "epoch": 16.22, + "grad_norm": 2.9850778579711914, + "learning_rate": 7.865326633165829e-06, + "loss": 0.0254, + "step": 21750 + }, + { + "epoch": 16.24, + "grad_norm": 2.991757392883301, + "learning_rate": 7.86281407035176e-06, + "loss": 0.0256, + "step": 21775 + }, + { + "epoch": 16.26, + "grad_norm": 2.7221174240112305, + "learning_rate": 7.86030150753769e-06, + "loss": 0.0251, + "step": 21800 + }, + { + "epoch": 16.28, + "grad_norm": 2.793869972229004, + "learning_rate": 7.857788944723619e-06, + "loss": 0.025, + "step": 21825 + }, + { + "epoch": 16.29, + "grad_norm": 2.4716544151306152, + "learning_rate": 7.855276381909548e-06, + "loss": 0.0255, + "step": 21850 + }, + { + "epoch": 16.31, + "grad_norm": 2.3220651149749756, + "learning_rate": 7.852763819095477e-06, + "loss": 0.0246, + "step": 21875 + }, + { + "epoch": 16.33, + "grad_norm": 3.093780517578125, + "learning_rate": 7.850251256281408e-06, + "loss": 0.0258, + "step": 21900 + }, + { + "epoch": 16.35, + "grad_norm": 2.5063087940216064, + "learning_rate": 7.847738693467338e-06, + "loss": 0.0247, + "step": 21925 + }, + { + "epoch": 16.37, + "grad_norm": 2.403881788253784, + "learning_rate": 7.845226130653267e-06, + "loss": 0.0254, + "step": 21950 + }, + { + "epoch": 16.39, + "grad_norm": 2.2216076850891113, + "learning_rate": 7.842713567839196e-06, + "loss": 0.0256, + "step": 21975 + }, + { + "epoch": 16.41, + "grad_norm": 3.1980884075164795, + "learning_rate": 7.840201005025127e-06, + "loss": 0.025, + "step": 22000 + }, + { + "epoch": 16.41, + "eval_loss": 0.14710868895053864, + "eval_runtime": 878.3575, + "eval_samples_per_second": 1.628, + "eval_steps_per_second": 1.628, + "eval_wer": 45.039582280613104, + "step": 22000 + }, + { + "epoch": 16.42, + "grad_norm": 3.514983892440796, + "learning_rate": 7.837688442211055e-06, + "loss": 0.0258, + "step": 22025 + }, + { + "epoch": 16.44, + "grad_norm": 2.8000857830047607, + "learning_rate": 7.835175879396986e-06, + "loss": 0.0242, + "step": 22050 + }, + { + "epoch": 16.46, + "grad_norm": 2.8655552864074707, + "learning_rate": 7.832663316582915e-06, + "loss": 0.0255, + "step": 22075 + }, + { + "epoch": 16.48, + "grad_norm": 2.435239315032959, + "learning_rate": 7.830150753768845e-06, + "loss": 0.0249, + "step": 22100 + }, + { + "epoch": 16.5, + "grad_norm": 2.5320513248443604, + "learning_rate": 7.827638190954776e-06, + "loss": 0.0244, + "step": 22125 + }, + { + "epoch": 16.52, + "grad_norm": 2.857060432434082, + "learning_rate": 7.825125628140703e-06, + "loss": 0.0249, + "step": 22150 + }, + { + "epoch": 16.54, + "grad_norm": 2.78132963180542, + "learning_rate": 7.822613065326634e-06, + "loss": 0.0263, + "step": 22175 + }, + { + "epoch": 16.55, + "grad_norm": 2.5782041549682617, + "learning_rate": 7.820100502512564e-06, + "loss": 0.0243, + "step": 22200 + }, + { + "epoch": 16.57, + "grad_norm": 2.748749256134033, + "learning_rate": 7.817587939698493e-06, + "loss": 0.0255, + "step": 22225 + }, + { + "epoch": 16.59, + "grad_norm": 2.703235387802124, + "learning_rate": 7.815075376884422e-06, + "loss": 0.0252, + "step": 22250 + }, + { + "epoch": 16.61, + "grad_norm": 2.8718206882476807, + "learning_rate": 7.812562814070353e-06, + "loss": 0.0266, + "step": 22275 + }, + { + "epoch": 16.63, + "grad_norm": 2.35718035697937, + "learning_rate": 7.810050251256283e-06, + "loss": 0.0256, + "step": 22300 + }, + { + "epoch": 16.65, + "grad_norm": 2.5189247131347656, + "learning_rate": 7.807537688442212e-06, + "loss": 0.0265, + "step": 22325 + }, + { + "epoch": 16.67, + "grad_norm": 2.6707308292388916, + "learning_rate": 7.805025125628141e-06, + "loss": 0.0253, + "step": 22350 + }, + { + "epoch": 16.69, + "grad_norm": 2.598799705505371, + "learning_rate": 7.80251256281407e-06, + "loss": 0.0262, + "step": 22375 + }, + { + "epoch": 16.7, + "grad_norm": 3.1068062782287598, + "learning_rate": 7.800000000000002e-06, + "loss": 0.0265, + "step": 22400 + }, + { + "epoch": 16.72, + "grad_norm": 3.0019896030426025, + "learning_rate": 7.797587939698493e-06, + "loss": 0.0264, + "step": 22425 + }, + { + "epoch": 16.74, + "grad_norm": 2.391791343688965, + "learning_rate": 7.795075376884422e-06, + "loss": 0.0257, + "step": 22450 + }, + { + "epoch": 16.76, + "grad_norm": 2.6421167850494385, + "learning_rate": 7.792562814070352e-06, + "loss": 0.0265, + "step": 22475 + }, + { + "epoch": 16.78, + "grad_norm": 2.1298773288726807, + "learning_rate": 7.790050251256283e-06, + "loss": 0.0266, + "step": 22500 + }, + { + "epoch": 16.8, + "grad_norm": 2.9784798622131348, + "learning_rate": 7.787537688442212e-06, + "loss": 0.025, + "step": 22525 + }, + { + "epoch": 16.82, + "grad_norm": 2.962423801422119, + "learning_rate": 7.785025125628141e-06, + "loss": 0.0253, + "step": 22550 + }, + { + "epoch": 16.83, + "grad_norm": 2.8620307445526123, + "learning_rate": 7.78251256281407e-06, + "loss": 0.0264, + "step": 22575 + }, + { + "epoch": 16.85, + "grad_norm": 2.7692620754241943, + "learning_rate": 7.78e-06, + "loss": 0.0279, + "step": 22600 + }, + { + "epoch": 16.87, + "grad_norm": 2.234581232070923, + "learning_rate": 7.77748743718593e-06, + "loss": 0.0263, + "step": 22625 + }, + { + "epoch": 16.89, + "grad_norm": 2.3411405086517334, + "learning_rate": 7.77497487437186e-06, + "loss": 0.0263, + "step": 22650 + }, + { + "epoch": 16.91, + "grad_norm": 2.6945881843566895, + "learning_rate": 7.77246231155779e-06, + "loss": 0.0265, + "step": 22675 + }, + { + "epoch": 16.93, + "grad_norm": 2.614096164703369, + "learning_rate": 7.769949748743719e-06, + "loss": 0.0264, + "step": 22700 + }, + { + "epoch": 16.95, + "grad_norm": 3.1221401691436768, + "learning_rate": 7.76743718592965e-06, + "loss": 0.0265, + "step": 22725 + }, + { + "epoch": 16.96, + "grad_norm": 2.3062634468078613, + "learning_rate": 7.764924623115578e-06, + "loss": 0.0258, + "step": 22750 + }, + { + "epoch": 16.98, + "grad_norm": 2.862062692642212, + "learning_rate": 7.762412060301509e-06, + "loss": 0.0267, + "step": 22775 + }, + { + "epoch": 17.0, + "grad_norm": 2.654369354248047, + "learning_rate": 7.759899497487438e-06, + "loss": 0.0255, + "step": 22800 + }, + { + "epoch": 17.02, + "grad_norm": 2.3804445266723633, + "learning_rate": 7.757386934673367e-06, + "loss": 0.0188, + "step": 22825 + }, + { + "epoch": 17.04, + "grad_norm": 2.364847183227539, + "learning_rate": 7.754874371859297e-06, + "loss": 0.0205, + "step": 22850 + }, + { + "epoch": 17.06, + "grad_norm": 1.920720100402832, + "learning_rate": 7.752361809045226e-06, + "loss": 0.0188, + "step": 22875 + }, + { + "epoch": 17.08, + "grad_norm": 2.399815559387207, + "learning_rate": 7.749849246231155e-06, + "loss": 0.0205, + "step": 22900 + }, + { + "epoch": 17.1, + "grad_norm": 2.8286452293395996, + "learning_rate": 7.747336683417086e-06, + "loss": 0.02, + "step": 22925 + }, + { + "epoch": 17.11, + "grad_norm": 2.2097768783569336, + "learning_rate": 7.744824120603016e-06, + "loss": 0.0209, + "step": 22950 + }, + { + "epoch": 17.13, + "grad_norm": 2.3570361137390137, + "learning_rate": 7.742311557788945e-06, + "loss": 0.0203, + "step": 22975 + }, + { + "epoch": 17.15, + "grad_norm": 2.299593925476074, + "learning_rate": 7.739798994974876e-06, + "loss": 0.0203, + "step": 23000 + }, + { + "epoch": 17.15, + "eval_loss": 0.1516934484243393, + "eval_runtime": 875.1241, + "eval_samples_per_second": 1.634, + "eval_steps_per_second": 1.634, + "eval_wer": 45.250126326427484, + "step": 23000 + } + ], + "logging_steps": 25, + "max_steps": 100000, + "num_input_tokens_seen": 0, + "num_train_epochs": 75, + "save_steps": 1000, + "total_flos": 3.622895439151104e+19, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/whisper-tiny/kannada/checkpoint-23000/training_args.bin b/checkpoints/whisper-tiny/kannada/checkpoint-23000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..5c3fc6dd9a02104e7e362ad7885fa5056fb98985 --- /dev/null +++ b/checkpoints/whisper-tiny/kannada/checkpoint-23000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7693059c6fb44f4fe6b4032d08c89ada0aac450990ef5414e0cd46a99904eb71 +size 4667 diff --git a/checkpoints/whisper-tiny/magahi/checkpoint-20000/config.json b/checkpoints/whisper-tiny/magahi/checkpoint-20000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c47e7ae5f6c65847b8952aa0e827c7f13a489891 --- /dev/null +++ b/checkpoints/whisper-tiny/magahi/checkpoint-20000/config.json @@ -0,0 +1,154 @@ +{ + "_name_or_path": "openai/whisper-tiny", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 384, + "decoder_attention_heads": 6, + "decoder_ffn_dim": 1536, + "decoder_layerdrop": 0.0, + "decoder_layers": 4, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 6, + "encoder_ffn_dim": 1536, + "encoder_layerdrop": 0.0, + "encoder_layers": 4, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50276 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 4, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "torch_dtype": "float32", + "transformers_version": "4.39.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoints/whisper-tiny/magahi/checkpoint-20000/generation_config.json b/checkpoints/whisper-tiny/magahi/checkpoint-20000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c4857895fba6cdefb862460b5d33969e1892aa71 --- /dev/null +++ b/checkpoints/whisper-tiny/magahi/checkpoint-20000/generation_config.json @@ -0,0 +1,248 @@ +{ + "alignment_heads": [ + [ + 2, + 2 + ], + [ + 3, + 0 + ], + [ + 3, + 2 + ], + [ + 3, + 3 + ], + [ + 3, + 4 + ], + [ + 3, + 5 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.39.2" +} diff --git a/checkpoints/whisper-tiny/magahi/checkpoint-20000/model.safetensors b/checkpoints/whisper-tiny/magahi/checkpoint-20000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..63733d788700aa8d6d820a973d00d996d14072fd --- /dev/null +++ b/checkpoints/whisper-tiny/magahi/checkpoint-20000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87ee3bdcc4363c7d04980a11c8855bc5addb170415339c86ca0532af195af788 +size 151061672 diff --git a/checkpoints/whisper-tiny/magahi/checkpoint-20000/optimizer.pt b/checkpoints/whisper-tiny/magahi/checkpoint-20000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7e3aecaae6e0ce4c48ae3c2d0ef0a36e54e851c0 --- /dev/null +++ b/checkpoints/whisper-tiny/magahi/checkpoint-20000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb71e14da6a57488dcab560236912a9d4e042ca15b755bab3b6618badb7b4da5 +size 297615749 diff --git a/checkpoints/whisper-tiny/magahi/checkpoint-20000/preprocessor_config.json b/checkpoints/whisper-tiny/magahi/checkpoint-20000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoints/whisper-tiny/magahi/checkpoint-20000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoints/whisper-tiny/magahi/checkpoint-20000/rng_state.pth b/checkpoints/whisper-tiny/magahi/checkpoint-20000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..6f1e6022bc62ea805086e336e039eb6fec891ad0 --- /dev/null +++ b/checkpoints/whisper-tiny/magahi/checkpoint-20000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:001dd829c061f4e1a06cddc9c203239a2e3cb058358d5726cd42527a9c3668a0 +size 14575 diff --git a/checkpoints/whisper-tiny/magahi/checkpoint-20000/scheduler.pt b/checkpoints/whisper-tiny/magahi/checkpoint-20000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3362a23888da3971ecbdf83bb5c0766680abb929 --- /dev/null +++ b/checkpoints/whisper-tiny/magahi/checkpoint-20000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a4c57795bc7e8298d05a0d53fb436efd8d26c13f7d48d21b5090c941852d177 +size 627 diff --git a/checkpoints/whisper-tiny/magahi/checkpoint-20000/trainer_state.json b/checkpoints/whisper-tiny/magahi/checkpoint-20000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9a4064b269d778d3fc7b0c8ca810f33026d6d9d3 --- /dev/null +++ b/checkpoints/whisper-tiny/magahi/checkpoint-20000/trainer_state.json @@ -0,0 +1,5801 @@ +{ + "best_metric": 29.948759439050704, + "best_model_checkpoint": "results/whisper-tiny/magahi/checkpoint-10000", + "epoch": 13.431833445265278, + "eval_steps": 1000, + "global_step": 20000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "grad_norm": 81.60848236083984, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.8537, + "step": 25 + }, + { + "epoch": 0.03, + "grad_norm": 36.962913513183594, + "learning_rate": 9.200000000000001e-07, + "loss": 3.2377, + "step": 50 + }, + { + "epoch": 0.05, + "grad_norm": 13.970915794372559, + "learning_rate": 1.42e-06, + "loss": 2.4456, + "step": 75 + }, + { + "epoch": 0.07, + "grad_norm": 9.826818466186523, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.8967, + "step": 100 + }, + { + "epoch": 0.08, + "grad_norm": 6.355788707733154, + "learning_rate": 2.42e-06, + "loss": 1.4978, + "step": 125 + }, + { + "epoch": 0.1, + "grad_norm": 5.724748134613037, + "learning_rate": 2.92e-06, + "loss": 1.2541, + "step": 150 + }, + { + "epoch": 0.12, + "grad_norm": 5.8415608406066895, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.0823, + "step": 175 + }, + { + "epoch": 0.13, + "grad_norm": 5.578986644744873, + "learning_rate": 3.920000000000001e-06, + "loss": 0.9513, + "step": 200 + }, + { + "epoch": 0.15, + "grad_norm": 5.194921493530273, + "learning_rate": 4.42e-06, + "loss": 0.8688, + "step": 225 + }, + { + "epoch": 0.17, + "grad_norm": 5.798856258392334, + "learning_rate": 4.92e-06, + "loss": 0.7889, + "step": 250 + }, + { + "epoch": 0.18, + "grad_norm": 4.961001873016357, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7621, + "step": 275 + }, + { + "epoch": 0.2, + "grad_norm": 5.300374984741211, + "learning_rate": 5.92e-06, + "loss": 0.7132, + "step": 300 + }, + { + "epoch": 0.22, + "grad_norm": 5.880134582519531, + "learning_rate": 6.42e-06, + "loss": 0.6887, + "step": 325 + }, + { + "epoch": 0.24, + "grad_norm": 4.557385444641113, + "learning_rate": 6.92e-06, + "loss": 0.6626, + "step": 350 + }, + { + "epoch": 0.25, + "grad_norm": 4.767248630523682, + "learning_rate": 7.420000000000001e-06, + "loss": 0.6286, + "step": 375 + }, + { + "epoch": 0.27, + "grad_norm": 5.60834264755249, + "learning_rate": 7.92e-06, + "loss": 0.6003, + "step": 400 + }, + { + "epoch": 0.29, + "grad_norm": 4.829370498657227, + "learning_rate": 8.42e-06, + "loss": 0.5789, + "step": 425 + }, + { + "epoch": 0.3, + "grad_norm": 4.650497913360596, + "learning_rate": 8.920000000000001e-06, + "loss": 0.5829, + "step": 450 + }, + { + "epoch": 0.32, + "grad_norm": 4.928510665893555, + "learning_rate": 9.42e-06, + "loss": 0.5634, + "step": 475 + }, + { + "epoch": 0.34, + "grad_norm": 5.1075544357299805, + "learning_rate": 9.920000000000002e-06, + "loss": 0.5549, + "step": 500 + }, + { + "epoch": 0.35, + "grad_norm": 5.032267093658447, + "learning_rate": 9.997889447236182e-06, + "loss": 0.5347, + "step": 525 + }, + { + "epoch": 0.37, + "grad_norm": 4.869894027709961, + "learning_rate": 9.995376884422112e-06, + "loss": 0.5105, + "step": 550 + }, + { + "epoch": 0.39, + "grad_norm": 4.951045513153076, + "learning_rate": 9.992864321608041e-06, + "loss": 0.5154, + "step": 575 + }, + { + "epoch": 0.4, + "grad_norm": 4.784714221954346, + "learning_rate": 9.99035175879397e-06, + "loss": 0.4969, + "step": 600 + }, + { + "epoch": 0.42, + "grad_norm": 4.97604513168335, + "learning_rate": 9.9878391959799e-06, + "loss": 0.503, + "step": 625 + }, + { + "epoch": 0.44, + "grad_norm": 5.806248188018799, + "learning_rate": 9.98532663316583e-06, + "loss": 0.4963, + "step": 650 + }, + { + "epoch": 0.45, + "grad_norm": 4.283050537109375, + "learning_rate": 9.98281407035176e-06, + "loss": 0.4787, + "step": 675 + }, + { + "epoch": 0.47, + "grad_norm": 5.257750034332275, + "learning_rate": 9.98030150753769e-06, + "loss": 0.4662, + "step": 700 + }, + { + "epoch": 0.49, + "grad_norm": 4.808835506439209, + "learning_rate": 9.977788944723619e-06, + "loss": 0.4713, + "step": 725 + }, + { + "epoch": 0.5, + "grad_norm": 4.096215724945068, + "learning_rate": 9.975276381909548e-06, + "loss": 0.4612, + "step": 750 + }, + { + "epoch": 0.52, + "grad_norm": 5.140923500061035, + "learning_rate": 9.972763819095477e-06, + "loss": 0.4648, + "step": 775 + }, + { + "epoch": 0.54, + "grad_norm": 4.5748419761657715, + "learning_rate": 9.970251256281408e-06, + "loss": 0.4388, + "step": 800 + }, + { + "epoch": 0.55, + "grad_norm": 4.589542388916016, + "learning_rate": 9.967738693467338e-06, + "loss": 0.4463, + "step": 825 + }, + { + "epoch": 0.57, + "grad_norm": 4.443384647369385, + "learning_rate": 9.965226130653267e-06, + "loss": 0.4455, + "step": 850 + }, + { + "epoch": 0.59, + "grad_norm": 4.558892726898193, + "learning_rate": 9.962713567839198e-06, + "loss": 0.4427, + "step": 875 + }, + { + "epoch": 0.6, + "grad_norm": 4.095139980316162, + "learning_rate": 9.960201005025126e-06, + "loss": 0.4256, + "step": 900 + }, + { + "epoch": 0.62, + "grad_norm": 4.792667865753174, + "learning_rate": 9.957688442211057e-06, + "loss": 0.4285, + "step": 925 + }, + { + "epoch": 0.64, + "grad_norm": 4.6431427001953125, + "learning_rate": 9.955175879396986e-06, + "loss": 0.4166, + "step": 950 + }, + { + "epoch": 0.65, + "grad_norm": 4.524608612060547, + "learning_rate": 9.952663316582915e-06, + "loss": 0.4145, + "step": 975 + }, + { + "epoch": 0.67, + "grad_norm": 4.866757392883301, + "learning_rate": 9.950150753768845e-06, + "loss": 0.4235, + "step": 1000 + }, + { + "epoch": 0.67, + "eval_loss": 0.3119436800479889, + "eval_runtime": 551.2132, + "eval_samples_per_second": 2.596, + "eval_steps_per_second": 2.596, + "eval_wer": 46.00188781014023, + "step": 1000 + }, + { + "epoch": 0.69, + "grad_norm": 4.603618144989014, + "learning_rate": 9.947638190954774e-06, + "loss": 0.4122, + "step": 1025 + }, + { + "epoch": 0.71, + "grad_norm": 4.810600757598877, + "learning_rate": 9.945125628140703e-06, + "loss": 0.4031, + "step": 1050 + }, + { + "epoch": 0.72, + "grad_norm": 4.622437477111816, + "learning_rate": 9.942613065326634e-06, + "loss": 0.4008, + "step": 1075 + }, + { + "epoch": 0.74, + "grad_norm": 5.4308695793151855, + "learning_rate": 9.940100502512564e-06, + "loss": 0.402, + "step": 1100 + }, + { + "epoch": 0.76, + "grad_norm": 4.4006452560424805, + "learning_rate": 9.937587939698493e-06, + "loss": 0.4071, + "step": 1125 + }, + { + "epoch": 0.77, + "grad_norm": 4.235116958618164, + "learning_rate": 9.935075376884424e-06, + "loss": 0.399, + "step": 1150 + }, + { + "epoch": 0.79, + "grad_norm": 4.2634124755859375, + "learning_rate": 9.932562814070352e-06, + "loss": 0.3922, + "step": 1175 + }, + { + "epoch": 0.81, + "grad_norm": 4.755196571350098, + "learning_rate": 9.930050251256283e-06, + "loss": 0.3941, + "step": 1200 + }, + { + "epoch": 0.82, + "grad_norm": 4.6733317375183105, + "learning_rate": 9.927537688442212e-06, + "loss": 0.3881, + "step": 1225 + }, + { + "epoch": 0.84, + "grad_norm": 4.650494575500488, + "learning_rate": 9.925025125628141e-06, + "loss": 0.3869, + "step": 1250 + }, + { + "epoch": 0.86, + "grad_norm": 4.952308654785156, + "learning_rate": 9.922512562814072e-06, + "loss": 0.3842, + "step": 1275 + }, + { + "epoch": 0.87, + "grad_norm": 4.7242817878723145, + "learning_rate": 9.920000000000002e-06, + "loss": 0.3792, + "step": 1300 + }, + { + "epoch": 0.89, + "grad_norm": 5.240590572357178, + "learning_rate": 9.917487437185931e-06, + "loss": 0.3804, + "step": 1325 + }, + { + "epoch": 0.91, + "grad_norm": 4.585945129394531, + "learning_rate": 9.91497487437186e-06, + "loss": 0.3706, + "step": 1350 + }, + { + "epoch": 0.92, + "grad_norm": 4.993523597717285, + "learning_rate": 9.91246231155779e-06, + "loss": 0.3667, + "step": 1375 + }, + { + "epoch": 0.94, + "grad_norm": 4.7278666496276855, + "learning_rate": 9.909949748743719e-06, + "loss": 0.3723, + "step": 1400 + }, + { + "epoch": 0.96, + "grad_norm": 4.946122646331787, + "learning_rate": 9.90743718592965e-06, + "loss": 0.3653, + "step": 1425 + }, + { + "epoch": 0.97, + "grad_norm": 4.801915645599365, + "learning_rate": 9.904924623115578e-06, + "loss": 0.3639, + "step": 1450 + }, + { + "epoch": 0.99, + "grad_norm": 4.4633355140686035, + "learning_rate": 9.902412060301509e-06, + "loss": 0.3693, + "step": 1475 + }, + { + "epoch": 1.01, + "grad_norm": 5.037844181060791, + "learning_rate": 9.899899497487438e-06, + "loss": 0.3561, + "step": 1500 + }, + { + "epoch": 1.02, + "grad_norm": 3.9556565284729004, + "learning_rate": 9.897386934673367e-06, + "loss": 0.3409, + "step": 1525 + }, + { + "epoch": 1.04, + "grad_norm": 4.554931640625, + "learning_rate": 9.894874371859298e-06, + "loss": 0.3385, + "step": 1550 + }, + { + "epoch": 1.06, + "grad_norm": 5.0028767585754395, + "learning_rate": 9.892361809045228e-06, + "loss": 0.3461, + "step": 1575 + }, + { + "epoch": 1.07, + "grad_norm": 4.830097198486328, + "learning_rate": 9.889849246231157e-06, + "loss": 0.3477, + "step": 1600 + }, + { + "epoch": 1.09, + "grad_norm": 5.008993625640869, + "learning_rate": 9.887336683417086e-06, + "loss": 0.3359, + "step": 1625 + }, + { + "epoch": 1.11, + "grad_norm": 4.475279808044434, + "learning_rate": 9.884824120603015e-06, + "loss": 0.3396, + "step": 1650 + }, + { + "epoch": 1.12, + "grad_norm": 4.6282758712768555, + "learning_rate": 9.882311557788945e-06, + "loss": 0.3374, + "step": 1675 + }, + { + "epoch": 1.14, + "grad_norm": 3.9324779510498047, + "learning_rate": 9.879798994974876e-06, + "loss": 0.3398, + "step": 1700 + }, + { + "epoch": 1.16, + "grad_norm": 4.605551242828369, + "learning_rate": 9.877286432160805e-06, + "loss": 0.337, + "step": 1725 + }, + { + "epoch": 1.18, + "grad_norm": 4.509819984436035, + "learning_rate": 9.874773869346734e-06, + "loss": 0.3307, + "step": 1750 + }, + { + "epoch": 1.19, + "grad_norm": 4.387190341949463, + "learning_rate": 9.872261306532664e-06, + "loss": 0.3282, + "step": 1775 + }, + { + "epoch": 1.21, + "grad_norm": 4.236643314361572, + "learning_rate": 9.869748743718593e-06, + "loss": 0.3292, + "step": 1800 + }, + { + "epoch": 1.23, + "grad_norm": 5.186654090881348, + "learning_rate": 9.867236180904524e-06, + "loss": 0.3206, + "step": 1825 + }, + { + "epoch": 1.24, + "grad_norm": 5.238602638244629, + "learning_rate": 9.864723618090453e-06, + "loss": 0.3282, + "step": 1850 + }, + { + "epoch": 1.26, + "grad_norm": 4.52903413772583, + "learning_rate": 9.862211055276383e-06, + "loss": 0.3291, + "step": 1875 + }, + { + "epoch": 1.28, + "grad_norm": 4.356954574584961, + "learning_rate": 9.859698492462312e-06, + "loss": 0.3221, + "step": 1900 + }, + { + "epoch": 1.29, + "grad_norm": 4.341065406799316, + "learning_rate": 9.857185929648241e-06, + "loss": 0.3161, + "step": 1925 + }, + { + "epoch": 1.31, + "grad_norm": 4.98447847366333, + "learning_rate": 9.854673366834172e-06, + "loss": 0.3268, + "step": 1950 + }, + { + "epoch": 1.33, + "grad_norm": 4.600680351257324, + "learning_rate": 9.852160804020102e-06, + "loss": 0.3207, + "step": 1975 + }, + { + "epoch": 1.34, + "grad_norm": 4.508650302886963, + "learning_rate": 9.849648241206031e-06, + "loss": 0.3237, + "step": 2000 + }, + { + "epoch": 1.34, + "eval_loss": 0.24654684960842133, + "eval_runtime": 516.9369, + "eval_samples_per_second": 2.768, + "eval_steps_per_second": 2.768, + "eval_wer": 38.70685005393743, + "step": 2000 + }, + { + "epoch": 1.36, + "grad_norm": 5.697445392608643, + "learning_rate": 9.84713567839196e-06, + "loss": 0.3182, + "step": 2025 + }, + { + "epoch": 1.38, + "grad_norm": 4.22543478012085, + "learning_rate": 9.84462311557789e-06, + "loss": 0.3129, + "step": 2050 + }, + { + "epoch": 1.39, + "grad_norm": 4.219532012939453, + "learning_rate": 9.842110552763819e-06, + "loss": 0.32, + "step": 2075 + }, + { + "epoch": 1.41, + "grad_norm": 4.789764404296875, + "learning_rate": 9.83959798994975e-06, + "loss": 0.3142, + "step": 2100 + }, + { + "epoch": 1.43, + "grad_norm": 4.291354656219482, + "learning_rate": 9.83708542713568e-06, + "loss": 0.3182, + "step": 2125 + }, + { + "epoch": 1.44, + "grad_norm": 4.049543857574463, + "learning_rate": 9.834572864321609e-06, + "loss": 0.3133, + "step": 2150 + }, + { + "epoch": 1.46, + "grad_norm": 4.41581392288208, + "learning_rate": 9.832060301507538e-06, + "loss": 0.3075, + "step": 2175 + }, + { + "epoch": 1.48, + "grad_norm": 4.20607852935791, + "learning_rate": 9.829547738693467e-06, + "loss": 0.3089, + "step": 2200 + }, + { + "epoch": 1.49, + "grad_norm": 3.958967924118042, + "learning_rate": 9.827035175879398e-06, + "loss": 0.3016, + "step": 2225 + }, + { + "epoch": 1.51, + "grad_norm": 4.230363845825195, + "learning_rate": 9.824522613065328e-06, + "loss": 0.3091, + "step": 2250 + }, + { + "epoch": 1.53, + "grad_norm": 4.313295841217041, + "learning_rate": 9.822010050251257e-06, + "loss": 0.2994, + "step": 2275 + }, + { + "epoch": 1.54, + "grad_norm": 5.157324314117432, + "learning_rate": 9.819497487437186e-06, + "loss": 0.3056, + "step": 2300 + }, + { + "epoch": 1.56, + "grad_norm": 4.223514556884766, + "learning_rate": 9.816984924623116e-06, + "loss": 0.3085, + "step": 2325 + }, + { + "epoch": 1.58, + "grad_norm": 4.843072891235352, + "learning_rate": 9.814472361809047e-06, + "loss": 0.2964, + "step": 2350 + }, + { + "epoch": 1.6, + "grad_norm": 3.7074038982391357, + "learning_rate": 9.811959798994976e-06, + "loss": 0.3036, + "step": 2375 + }, + { + "epoch": 1.61, + "grad_norm": 5.545840263366699, + "learning_rate": 9.809447236180905e-06, + "loss": 0.303, + "step": 2400 + }, + { + "epoch": 1.63, + "grad_norm": 4.584773063659668, + "learning_rate": 9.806934673366835e-06, + "loss": 0.2987, + "step": 2425 + }, + { + "epoch": 1.65, + "grad_norm": 4.879556655883789, + "learning_rate": 9.804422110552764e-06, + "loss": 0.3033, + "step": 2450 + }, + { + "epoch": 1.66, + "grad_norm": 5.2190752029418945, + "learning_rate": 9.801909547738693e-06, + "loss": 0.2972, + "step": 2475 + }, + { + "epoch": 1.68, + "grad_norm": 4.443481922149658, + "learning_rate": 9.799396984924624e-06, + "loss": 0.2947, + "step": 2500 + }, + { + "epoch": 1.7, + "grad_norm": 4.317480564117432, + "learning_rate": 9.796884422110554e-06, + "loss": 0.292, + "step": 2525 + }, + { + "epoch": 1.71, + "grad_norm": 4.594488620758057, + "learning_rate": 9.794371859296483e-06, + "loss": 0.2978, + "step": 2550 + }, + { + "epoch": 1.73, + "grad_norm": 4.8549418449401855, + "learning_rate": 9.791859296482414e-06, + "loss": 0.2982, + "step": 2575 + }, + { + "epoch": 1.75, + "grad_norm": 4.091700077056885, + "learning_rate": 9.789346733668342e-06, + "loss": 0.2928, + "step": 2600 + }, + { + "epoch": 1.76, + "grad_norm": 4.379096508026123, + "learning_rate": 9.786834170854273e-06, + "loss": 0.2936, + "step": 2625 + }, + { + "epoch": 1.78, + "grad_norm": 4.24893045425415, + "learning_rate": 9.784321608040202e-06, + "loss": 0.2929, + "step": 2650 + }, + { + "epoch": 1.8, + "grad_norm": 4.743540287017822, + "learning_rate": 9.781809045226131e-06, + "loss": 0.3008, + "step": 2675 + }, + { + "epoch": 1.81, + "grad_norm": 4.754593372344971, + "learning_rate": 9.77929648241206e-06, + "loss": 0.2923, + "step": 2700 + }, + { + "epoch": 1.83, + "grad_norm": 4.649506568908691, + "learning_rate": 9.77678391959799e-06, + "loss": 0.2943, + "step": 2725 + }, + { + "epoch": 1.85, + "grad_norm": 3.8674979209899902, + "learning_rate": 9.774271356783921e-06, + "loss": 0.2891, + "step": 2750 + }, + { + "epoch": 1.86, + "grad_norm": 4.128345966339111, + "learning_rate": 9.77175879396985e-06, + "loss": 0.2849, + "step": 2775 + }, + { + "epoch": 1.88, + "grad_norm": 4.888911247253418, + "learning_rate": 9.76924623115578e-06, + "loss": 0.2883, + "step": 2800 + }, + { + "epoch": 1.9, + "grad_norm": 4.524785041809082, + "learning_rate": 9.766733668341709e-06, + "loss": 0.2845, + "step": 2825 + }, + { + "epoch": 1.91, + "grad_norm": 4.13386344909668, + "learning_rate": 9.76422110552764e-06, + "loss": 0.2854, + "step": 2850 + }, + { + "epoch": 1.93, + "grad_norm": 4.727872371673584, + "learning_rate": 9.761708542713568e-06, + "loss": 0.278, + "step": 2875 + }, + { + "epoch": 1.95, + "grad_norm": 4.263580799102783, + "learning_rate": 9.759195979899499e-06, + "loss": 0.2802, + "step": 2900 + }, + { + "epoch": 1.96, + "grad_norm": 4.340731143951416, + "learning_rate": 9.756683417085428e-06, + "loss": 0.28, + "step": 2925 + }, + { + "epoch": 1.98, + "grad_norm": 4.077319145202637, + "learning_rate": 9.754170854271357e-06, + "loss": 0.2759, + "step": 2950 + }, + { + "epoch": 2.0, + "grad_norm": 4.3723015785217285, + "learning_rate": 9.751658291457288e-06, + "loss": 0.293, + "step": 2975 + }, + { + "epoch": 2.01, + "grad_norm": 3.3506383895874023, + "learning_rate": 9.749145728643216e-06, + "loss": 0.2656, + "step": 3000 + }, + { + "epoch": 2.01, + "eval_loss": 0.2192731350660324, + "eval_runtime": 517.7457, + "eval_samples_per_second": 2.764, + "eval_steps_per_second": 2.764, + "eval_wer": 34.7559331175836, + "step": 3000 + }, + { + "epoch": 2.03, + "grad_norm": 4.1629109382629395, + "learning_rate": 9.746633165829147e-06, + "loss": 0.2592, + "step": 3025 + }, + { + "epoch": 2.05, + "grad_norm": 3.8931798934936523, + "learning_rate": 9.744120603015076e-06, + "loss": 0.2608, + "step": 3050 + }, + { + "epoch": 2.07, + "grad_norm": 4.144586086273193, + "learning_rate": 9.741608040201006e-06, + "loss": 0.261, + "step": 3075 + }, + { + "epoch": 2.08, + "grad_norm": 3.9056999683380127, + "learning_rate": 9.739095477386935e-06, + "loss": 0.2605, + "step": 3100 + }, + { + "epoch": 2.1, + "grad_norm": 3.959594488143921, + "learning_rate": 9.736582914572866e-06, + "loss": 0.2616, + "step": 3125 + }, + { + "epoch": 2.12, + "grad_norm": 3.9977684020996094, + "learning_rate": 9.734070351758794e-06, + "loss": 0.2659, + "step": 3150 + }, + { + "epoch": 2.13, + "grad_norm": 3.6124355792999268, + "learning_rate": 9.731557788944725e-06, + "loss": 0.2613, + "step": 3175 + }, + { + "epoch": 2.15, + "grad_norm": 4.071595191955566, + "learning_rate": 9.729045226130654e-06, + "loss": 0.2619, + "step": 3200 + }, + { + "epoch": 2.17, + "grad_norm": 4.441699028015137, + "learning_rate": 9.726532663316583e-06, + "loss": 0.2651, + "step": 3225 + }, + { + "epoch": 2.18, + "grad_norm": 4.801173686981201, + "learning_rate": 9.724020100502514e-06, + "loss": 0.258, + "step": 3250 + }, + { + "epoch": 2.2, + "grad_norm": 4.048007488250732, + "learning_rate": 9.721507537688444e-06, + "loss": 0.2534, + "step": 3275 + }, + { + "epoch": 2.22, + "grad_norm": 3.9200425148010254, + "learning_rate": 9.718994974874373e-06, + "loss": 0.2587, + "step": 3300 + }, + { + "epoch": 2.23, + "grad_norm": 4.207849502563477, + "learning_rate": 9.716482412060302e-06, + "loss": 0.2536, + "step": 3325 + }, + { + "epoch": 2.25, + "grad_norm": 4.019932270050049, + "learning_rate": 9.713969849246232e-06, + "loss": 0.2598, + "step": 3350 + }, + { + "epoch": 2.27, + "grad_norm": 4.2346415519714355, + "learning_rate": 9.711457286432163e-06, + "loss": 0.2644, + "step": 3375 + }, + { + "epoch": 2.28, + "grad_norm": 4.0433807373046875, + "learning_rate": 9.708944723618092e-06, + "loss": 0.2528, + "step": 3400 + }, + { + "epoch": 2.3, + "grad_norm": 4.44325590133667, + "learning_rate": 9.706432160804021e-06, + "loss": 0.2466, + "step": 3425 + }, + { + "epoch": 2.32, + "grad_norm": 4.150941371917725, + "learning_rate": 9.70391959798995e-06, + "loss": 0.2533, + "step": 3450 + }, + { + "epoch": 2.33, + "grad_norm": 4.390870571136475, + "learning_rate": 9.70140703517588e-06, + "loss": 0.2563, + "step": 3475 + }, + { + "epoch": 2.35, + "grad_norm": 3.9863109588623047, + "learning_rate": 9.698894472361809e-06, + "loss": 0.2493, + "step": 3500 + }, + { + "epoch": 2.37, + "grad_norm": 4.822299480438232, + "learning_rate": 9.69638190954774e-06, + "loss": 0.2578, + "step": 3525 + }, + { + "epoch": 2.38, + "grad_norm": 4.331967353820801, + "learning_rate": 9.69386934673367e-06, + "loss": 0.2558, + "step": 3550 + }, + { + "epoch": 2.4, + "grad_norm": 3.9770114421844482, + "learning_rate": 9.691356783919599e-06, + "loss": 0.2524, + "step": 3575 + }, + { + "epoch": 2.42, + "grad_norm": 4.360179901123047, + "learning_rate": 9.688844221105528e-06, + "loss": 0.2543, + "step": 3600 + }, + { + "epoch": 2.43, + "grad_norm": 4.046729564666748, + "learning_rate": 9.686331658291457e-06, + "loss": 0.2469, + "step": 3625 + }, + { + "epoch": 2.45, + "grad_norm": 4.182729244232178, + "learning_rate": 9.683819095477388e-06, + "loss": 0.251, + "step": 3650 + }, + { + "epoch": 2.47, + "grad_norm": 4.014837265014648, + "learning_rate": 9.681306532663318e-06, + "loss": 0.2517, + "step": 3675 + }, + { + "epoch": 2.48, + "grad_norm": 4.939628601074219, + "learning_rate": 9.678793969849247e-06, + "loss": 0.2546, + "step": 3700 + }, + { + "epoch": 2.5, + "grad_norm": 3.961083173751831, + "learning_rate": 9.676281407035176e-06, + "loss": 0.2436, + "step": 3725 + }, + { + "epoch": 2.52, + "grad_norm": 4.393324851989746, + "learning_rate": 9.673768844221106e-06, + "loss": 0.2529, + "step": 3750 + }, + { + "epoch": 2.54, + "grad_norm": 3.596787929534912, + "learning_rate": 9.671256281407035e-06, + "loss": 0.2497, + "step": 3775 + }, + { + "epoch": 2.55, + "grad_norm": 4.059005260467529, + "learning_rate": 9.668743718592966e-06, + "loss": 0.2545, + "step": 3800 + }, + { + "epoch": 2.57, + "grad_norm": 3.7618324756622314, + "learning_rate": 9.666231155778895e-06, + "loss": 0.2473, + "step": 3825 + }, + { + "epoch": 2.59, + "grad_norm": 4.647791862487793, + "learning_rate": 9.663718592964825e-06, + "loss": 0.2518, + "step": 3850 + }, + { + "epoch": 2.6, + "grad_norm": 4.5009565353393555, + "learning_rate": 9.661206030150754e-06, + "loss": 0.2469, + "step": 3875 + }, + { + "epoch": 2.62, + "grad_norm": 4.780633926391602, + "learning_rate": 9.658693467336683e-06, + "loss": 0.25, + "step": 3900 + }, + { + "epoch": 2.64, + "grad_norm": 3.617025375366211, + "learning_rate": 9.656180904522614e-06, + "loss": 0.2474, + "step": 3925 + }, + { + "epoch": 2.65, + "grad_norm": 3.9833314418792725, + "learning_rate": 9.653668341708544e-06, + "loss": 0.2483, + "step": 3950 + }, + { + "epoch": 2.67, + "grad_norm": 5.007567405700684, + "learning_rate": 9.651155778894473e-06, + "loss": 0.2517, + "step": 3975 + }, + { + "epoch": 2.69, + "grad_norm": 4.315629005432129, + "learning_rate": 9.648643216080404e-06, + "loss": 0.2474, + "step": 4000 + }, + { + "epoch": 2.69, + "eval_loss": 0.20293590426445007, + "eval_runtime": 516.9846, + "eval_samples_per_second": 2.768, + "eval_steps_per_second": 2.768, + "eval_wer": 33.06364617044228, + "step": 4000 + }, + { + "epoch": 2.7, + "grad_norm": 4.8804931640625, + "learning_rate": 9.646130653266332e-06, + "loss": 0.2449, + "step": 4025 + }, + { + "epoch": 2.72, + "grad_norm": 4.163693904876709, + "learning_rate": 9.643618090452263e-06, + "loss": 0.2461, + "step": 4050 + }, + { + "epoch": 2.74, + "grad_norm": 3.948927879333496, + "learning_rate": 9.641105527638192e-06, + "loss": 0.2425, + "step": 4075 + }, + { + "epoch": 2.75, + "grad_norm": 4.300779342651367, + "learning_rate": 9.638592964824121e-06, + "loss": 0.2406, + "step": 4100 + }, + { + "epoch": 2.77, + "grad_norm": 4.105258464813232, + "learning_rate": 9.63608040201005e-06, + "loss": 0.2475, + "step": 4125 + }, + { + "epoch": 2.79, + "grad_norm": 3.944981336593628, + "learning_rate": 9.63356783919598e-06, + "loss": 0.239, + "step": 4150 + }, + { + "epoch": 2.8, + "grad_norm": 4.136698246002197, + "learning_rate": 9.63105527638191e-06, + "loss": 0.2423, + "step": 4175 + }, + { + "epoch": 2.82, + "grad_norm": 4.178077220916748, + "learning_rate": 9.62854271356784e-06, + "loss": 0.2317, + "step": 4200 + }, + { + "epoch": 2.84, + "grad_norm": 3.7692461013793945, + "learning_rate": 9.62603015075377e-06, + "loss": 0.2461, + "step": 4225 + }, + { + "epoch": 2.85, + "grad_norm": 4.537982940673828, + "learning_rate": 9.623517587939699e-06, + "loss": 0.2355, + "step": 4250 + }, + { + "epoch": 2.87, + "grad_norm": 3.853121757507324, + "learning_rate": 9.62100502512563e-06, + "loss": 0.2303, + "step": 4275 + }, + { + "epoch": 2.89, + "grad_norm": 4.2317094802856445, + "learning_rate": 9.618492462311558e-06, + "loss": 0.2371, + "step": 4300 + }, + { + "epoch": 2.9, + "grad_norm": 4.333843231201172, + "learning_rate": 9.615979899497489e-06, + "loss": 0.2464, + "step": 4325 + }, + { + "epoch": 2.92, + "grad_norm": 3.657205104827881, + "learning_rate": 9.613467336683418e-06, + "loss": 0.2408, + "step": 4350 + }, + { + "epoch": 2.94, + "grad_norm": 4.541226863861084, + "learning_rate": 9.610954773869347e-06, + "loss": 0.2322, + "step": 4375 + }, + { + "epoch": 2.96, + "grad_norm": 4.435959815979004, + "learning_rate": 9.608442211055277e-06, + "loss": 0.244, + "step": 4400 + }, + { + "epoch": 2.97, + "grad_norm": 4.102321147918701, + "learning_rate": 9.605929648241206e-06, + "loss": 0.244, + "step": 4425 + }, + { + "epoch": 2.99, + "grad_norm": 4.229015827178955, + "learning_rate": 9.603417085427137e-06, + "loss": 0.2378, + "step": 4450 + }, + { + "epoch": 3.01, + "grad_norm": 3.373239517211914, + "learning_rate": 9.600904522613066e-06, + "loss": 0.233, + "step": 4475 + }, + { + "epoch": 3.02, + "grad_norm": 3.904553174972534, + "learning_rate": 9.598391959798996e-06, + "loss": 0.2208, + "step": 4500 + }, + { + "epoch": 3.04, + "grad_norm": 4.035851955413818, + "learning_rate": 9.595879396984925e-06, + "loss": 0.212, + "step": 4525 + }, + { + "epoch": 3.06, + "grad_norm": 4.0682454109191895, + "learning_rate": 9.593366834170856e-06, + "loss": 0.2101, + "step": 4550 + }, + { + "epoch": 3.07, + "grad_norm": 3.6210427284240723, + "learning_rate": 9.590854271356784e-06, + "loss": 0.2195, + "step": 4575 + }, + { + "epoch": 3.09, + "grad_norm": 3.864409923553467, + "learning_rate": 9.588341708542715e-06, + "loss": 0.2179, + "step": 4600 + }, + { + "epoch": 3.11, + "grad_norm": 3.8520665168762207, + "learning_rate": 9.585829145728644e-06, + "loss": 0.217, + "step": 4625 + }, + { + "epoch": 3.12, + "grad_norm": 4.255833148956299, + "learning_rate": 9.583316582914573e-06, + "loss": 0.2189, + "step": 4650 + }, + { + "epoch": 3.14, + "grad_norm": 4.133042812347412, + "learning_rate": 9.580804020100504e-06, + "loss": 0.2177, + "step": 4675 + }, + { + "epoch": 3.16, + "grad_norm": 4.218935489654541, + "learning_rate": 9.578291457286432e-06, + "loss": 0.2175, + "step": 4700 + }, + { + "epoch": 3.17, + "grad_norm": 3.9894680976867676, + "learning_rate": 9.575778894472363e-06, + "loss": 0.2253, + "step": 4725 + }, + { + "epoch": 3.19, + "grad_norm": 3.797673225402832, + "learning_rate": 9.573266331658292e-06, + "loss": 0.2191, + "step": 4750 + }, + { + "epoch": 3.21, + "grad_norm": 4.655117988586426, + "learning_rate": 9.570753768844222e-06, + "loss": 0.2162, + "step": 4775 + }, + { + "epoch": 3.22, + "grad_norm": 4.167984962463379, + "learning_rate": 9.568241206030151e-06, + "loss": 0.2144, + "step": 4800 + }, + { + "epoch": 3.24, + "grad_norm": 4.3113017082214355, + "learning_rate": 9.565728643216082e-06, + "loss": 0.2168, + "step": 4825 + }, + { + "epoch": 3.26, + "grad_norm": 4.172042369842529, + "learning_rate": 9.563216080402011e-06, + "loss": 0.2157, + "step": 4850 + }, + { + "epoch": 3.27, + "grad_norm": 3.704195737838745, + "learning_rate": 9.56070351758794e-06, + "loss": 0.2138, + "step": 4875 + }, + { + "epoch": 3.29, + "grad_norm": 3.992530584335327, + "learning_rate": 9.55819095477387e-06, + "loss": 0.2192, + "step": 4900 + }, + { + "epoch": 3.31, + "grad_norm": 3.8575868606567383, + "learning_rate": 9.5556783919598e-06, + "loss": 0.2212, + "step": 4925 + }, + { + "epoch": 3.32, + "grad_norm": 4.139746189117432, + "learning_rate": 9.55316582914573e-06, + "loss": 0.2153, + "step": 4950 + }, + { + "epoch": 3.34, + "grad_norm": 4.19871187210083, + "learning_rate": 9.550653266331658e-06, + "loss": 0.2137, + "step": 4975 + }, + { + "epoch": 3.36, + "grad_norm": 4.0228447914123535, + "learning_rate": 9.548140703517589e-06, + "loss": 0.22, + "step": 5000 + }, + { + "epoch": 3.36, + "eval_loss": 0.19356615841388702, + "eval_runtime": 523.3642, + "eval_samples_per_second": 2.734, + "eval_steps_per_second": 2.734, + "eval_wer": 32.08603020496225, + "step": 5000 + }, + { + "epoch": 3.37, + "grad_norm": 4.335786819458008, + "learning_rate": 9.545628140703518e-06, + "loss": 0.2189, + "step": 5025 + }, + { + "epoch": 3.39, + "grad_norm": 4.135392189025879, + "learning_rate": 9.543115577889448e-06, + "loss": 0.2148, + "step": 5050 + }, + { + "epoch": 3.41, + "grad_norm": 3.931087017059326, + "learning_rate": 9.540603015075379e-06, + "loss": 0.2138, + "step": 5075 + }, + { + "epoch": 3.43, + "grad_norm": 3.844536781311035, + "learning_rate": 9.538090452261308e-06, + "loss": 0.2196, + "step": 5100 + }, + { + "epoch": 3.44, + "grad_norm": 4.489307880401611, + "learning_rate": 9.535577889447237e-06, + "loss": 0.2096, + "step": 5125 + }, + { + "epoch": 3.46, + "grad_norm": 4.250260353088379, + "learning_rate": 9.533065326633166e-06, + "loss": 0.2096, + "step": 5150 + }, + { + "epoch": 3.48, + "grad_norm": 4.139286994934082, + "learning_rate": 9.530552763819096e-06, + "loss": 0.2125, + "step": 5175 + }, + { + "epoch": 3.49, + "grad_norm": 3.7281787395477295, + "learning_rate": 9.528040201005025e-06, + "loss": 0.212, + "step": 5200 + }, + { + "epoch": 3.51, + "grad_norm": 4.496286392211914, + "learning_rate": 9.525527638190956e-06, + "loss": 0.2116, + "step": 5225 + }, + { + "epoch": 3.53, + "grad_norm": 3.8112242221832275, + "learning_rate": 9.523015075376885e-06, + "loss": 0.2194, + "step": 5250 + }, + { + "epoch": 3.54, + "grad_norm": 3.8952436447143555, + "learning_rate": 9.520502512562815e-06, + "loss": 0.2138, + "step": 5275 + }, + { + "epoch": 3.56, + "grad_norm": 3.7573258876800537, + "learning_rate": 9.517989949748744e-06, + "loss": 0.2148, + "step": 5300 + }, + { + "epoch": 3.58, + "grad_norm": 4.134673595428467, + "learning_rate": 9.515477386934673e-06, + "loss": 0.209, + "step": 5325 + }, + { + "epoch": 3.59, + "grad_norm": 4.357089519500732, + "learning_rate": 9.512964824120604e-06, + "loss": 0.2175, + "step": 5350 + }, + { + "epoch": 3.61, + "grad_norm": 4.2735371589660645, + "learning_rate": 9.510452261306534e-06, + "loss": 0.206, + "step": 5375 + }, + { + "epoch": 3.63, + "grad_norm": 4.213156223297119, + "learning_rate": 9.507939698492463e-06, + "loss": 0.2112, + "step": 5400 + }, + { + "epoch": 3.64, + "grad_norm": 3.576127767562866, + "learning_rate": 9.505427135678392e-06, + "loss": 0.2092, + "step": 5425 + }, + { + "epoch": 3.66, + "grad_norm": 3.772277593612671, + "learning_rate": 9.502914572864322e-06, + "loss": 0.2107, + "step": 5450 + }, + { + "epoch": 3.68, + "grad_norm": 3.772357225418091, + "learning_rate": 9.500402010050253e-06, + "loss": 0.2103, + "step": 5475 + }, + { + "epoch": 3.69, + "grad_norm": 3.4314069747924805, + "learning_rate": 9.497889447236182e-06, + "loss": 0.2132, + "step": 5500 + }, + { + "epoch": 3.71, + "grad_norm": 3.846630096435547, + "learning_rate": 9.495376884422111e-06, + "loss": 0.213, + "step": 5525 + }, + { + "epoch": 3.73, + "grad_norm": 4.058349132537842, + "learning_rate": 9.49286432160804e-06, + "loss": 0.2126, + "step": 5550 + }, + { + "epoch": 3.74, + "grad_norm": 4.073215484619141, + "learning_rate": 9.49035175879397e-06, + "loss": 0.2161, + "step": 5575 + }, + { + "epoch": 3.76, + "grad_norm": 3.6516330242156982, + "learning_rate": 9.4878391959799e-06, + "loss": 0.2075, + "step": 5600 + }, + { + "epoch": 3.78, + "grad_norm": 4.082644939422607, + "learning_rate": 9.48532663316583e-06, + "loss": 0.2135, + "step": 5625 + }, + { + "epoch": 3.79, + "grad_norm": 4.039773464202881, + "learning_rate": 9.48281407035176e-06, + "loss": 0.2084, + "step": 5650 + }, + { + "epoch": 3.81, + "grad_norm": 3.782559394836426, + "learning_rate": 9.480301507537689e-06, + "loss": 0.2077, + "step": 5675 + }, + { + "epoch": 3.83, + "grad_norm": 4.098997592926025, + "learning_rate": 9.47778894472362e-06, + "loss": 0.2075, + "step": 5700 + }, + { + "epoch": 3.84, + "grad_norm": 4.381975173950195, + "learning_rate": 9.475276381909548e-06, + "loss": 0.2089, + "step": 5725 + }, + { + "epoch": 3.86, + "grad_norm": 3.9519519805908203, + "learning_rate": 9.472763819095479e-06, + "loss": 0.2114, + "step": 5750 + }, + { + "epoch": 3.88, + "grad_norm": 4.029834270477295, + "learning_rate": 9.470251256281408e-06, + "loss": 0.2123, + "step": 5775 + }, + { + "epoch": 3.9, + "grad_norm": 3.926435708999634, + "learning_rate": 9.467738693467337e-06, + "loss": 0.2082, + "step": 5800 + }, + { + "epoch": 3.91, + "grad_norm": 4.404311180114746, + "learning_rate": 9.465226130653267e-06, + "loss": 0.2164, + "step": 5825 + }, + { + "epoch": 3.93, + "grad_norm": 4.088269233703613, + "learning_rate": 9.462713567839196e-06, + "loss": 0.2136, + "step": 5850 + }, + { + "epoch": 3.95, + "grad_norm": 3.824185371398926, + "learning_rate": 9.460201005025127e-06, + "loss": 0.1983, + "step": 5875 + }, + { + "epoch": 3.96, + "grad_norm": 4.1361775398254395, + "learning_rate": 9.457688442211056e-06, + "loss": 0.204, + "step": 5900 + }, + { + "epoch": 3.98, + "grad_norm": 3.670318841934204, + "learning_rate": 9.455175879396986e-06, + "loss": 0.2105, + "step": 5925 + }, + { + "epoch": 4.0, + "grad_norm": 4.021397113800049, + "learning_rate": 9.452663316582915e-06, + "loss": 0.2037, + "step": 5950 + }, + { + "epoch": 4.01, + "grad_norm": 3.5920333862304688, + "learning_rate": 9.450150753768846e-06, + "loss": 0.1911, + "step": 5975 + }, + { + "epoch": 4.03, + "grad_norm": 4.00855827331543, + "learning_rate": 9.447638190954774e-06, + "loss": 0.1886, + "step": 6000 + }, + { + "epoch": 4.03, + "eval_loss": 0.1894950270652771, + "eval_runtime": 521.4529, + "eval_samples_per_second": 2.744, + "eval_steps_per_second": 2.744, + "eval_wer": 30.980312837108954, + "step": 6000 + }, + { + "epoch": 4.05, + "grad_norm": 4.088005542755127, + "learning_rate": 9.445125628140705e-06, + "loss": 0.1923, + "step": 6025 + }, + { + "epoch": 4.06, + "grad_norm": 3.7774393558502197, + "learning_rate": 9.442613065326634e-06, + "loss": 0.1845, + "step": 6050 + }, + { + "epoch": 4.08, + "grad_norm": 3.628690481185913, + "learning_rate": 9.440100502512563e-06, + "loss": 0.1919, + "step": 6075 + }, + { + "epoch": 4.1, + "grad_norm": 4.070960998535156, + "learning_rate": 9.437587939698494e-06, + "loss": 0.1944, + "step": 6100 + }, + { + "epoch": 4.11, + "grad_norm": 3.8999123573303223, + "learning_rate": 9.435075376884422e-06, + "loss": 0.1856, + "step": 6125 + }, + { + "epoch": 4.13, + "grad_norm": 4.0126495361328125, + "learning_rate": 9.432562814070353e-06, + "loss": 0.1905, + "step": 6150 + }, + { + "epoch": 4.15, + "grad_norm": 3.762664794921875, + "learning_rate": 9.430050251256282e-06, + "loss": 0.1856, + "step": 6175 + }, + { + "epoch": 4.16, + "grad_norm": 3.986987352371216, + "learning_rate": 9.427537688442212e-06, + "loss": 0.1941, + "step": 6200 + }, + { + "epoch": 4.18, + "grad_norm": 4.1226019859313965, + "learning_rate": 9.425025125628141e-06, + "loss": 0.1906, + "step": 6225 + }, + { + "epoch": 4.2, + "grad_norm": 4.174398422241211, + "learning_rate": 9.422512562814072e-06, + "loss": 0.1864, + "step": 6250 + }, + { + "epoch": 4.21, + "grad_norm": 3.9154255390167236, + "learning_rate": 9.42e-06, + "loss": 0.1873, + "step": 6275 + }, + { + "epoch": 4.23, + "grad_norm": 3.6772525310516357, + "learning_rate": 9.41748743718593e-06, + "loss": 0.1889, + "step": 6300 + }, + { + "epoch": 4.25, + "grad_norm": 4.016811847686768, + "learning_rate": 9.41497487437186e-06, + "loss": 0.1876, + "step": 6325 + }, + { + "epoch": 4.26, + "grad_norm": 4.262787818908691, + "learning_rate": 9.41246231155779e-06, + "loss": 0.1879, + "step": 6350 + }, + { + "epoch": 4.28, + "grad_norm": 4.128027439117432, + "learning_rate": 9.40994974874372e-06, + "loss": 0.1888, + "step": 6375 + }, + { + "epoch": 4.3, + "grad_norm": 3.4837915897369385, + "learning_rate": 9.407437185929648e-06, + "loss": 0.1935, + "step": 6400 + }, + { + "epoch": 4.31, + "grad_norm": 3.5972988605499268, + "learning_rate": 9.404924623115579e-06, + "loss": 0.186, + "step": 6425 + }, + { + "epoch": 4.33, + "grad_norm": 3.5777218341827393, + "learning_rate": 9.402412060301508e-06, + "loss": 0.1909, + "step": 6450 + }, + { + "epoch": 4.35, + "grad_norm": 3.8867275714874268, + "learning_rate": 9.399899497487438e-06, + "loss": 0.1834, + "step": 6475 + }, + { + "epoch": 4.37, + "grad_norm": 3.789663553237915, + "learning_rate": 9.397386934673369e-06, + "loss": 0.1848, + "step": 6500 + }, + { + "epoch": 4.38, + "grad_norm": 4.078841686248779, + "learning_rate": 9.394874371859298e-06, + "loss": 0.1864, + "step": 6525 + }, + { + "epoch": 4.4, + "grad_norm": 4.126681804656982, + "learning_rate": 9.392361809045227e-06, + "loss": 0.1792, + "step": 6550 + }, + { + "epoch": 4.42, + "grad_norm": 3.509882688522339, + "learning_rate": 9.389849246231157e-06, + "loss": 0.1858, + "step": 6575 + }, + { + "epoch": 4.43, + "grad_norm": 3.845262289047241, + "learning_rate": 9.387336683417086e-06, + "loss": 0.1868, + "step": 6600 + }, + { + "epoch": 4.45, + "grad_norm": 3.672685146331787, + "learning_rate": 9.384824120603015e-06, + "loss": 0.1855, + "step": 6625 + }, + { + "epoch": 4.47, + "grad_norm": 4.21498441696167, + "learning_rate": 9.382311557788946e-06, + "loss": 0.1866, + "step": 6650 + }, + { + "epoch": 4.48, + "grad_norm": 4.204169750213623, + "learning_rate": 9.379798994974874e-06, + "loss": 0.1875, + "step": 6675 + }, + { + "epoch": 4.5, + "grad_norm": 4.332289695739746, + "learning_rate": 9.377286432160805e-06, + "loss": 0.1869, + "step": 6700 + }, + { + "epoch": 4.52, + "grad_norm": 3.827389717102051, + "learning_rate": 9.374773869346734e-06, + "loss": 0.1836, + "step": 6725 + }, + { + "epoch": 4.53, + "grad_norm": 3.8911168575286865, + "learning_rate": 9.372261306532664e-06, + "loss": 0.1838, + "step": 6750 + }, + { + "epoch": 4.55, + "grad_norm": 4.5454206466674805, + "learning_rate": 9.369748743718595e-06, + "loss": 0.1849, + "step": 6775 + }, + { + "epoch": 4.57, + "grad_norm": 3.978107213973999, + "learning_rate": 9.367236180904524e-06, + "loss": 0.1927, + "step": 6800 + }, + { + "epoch": 4.58, + "grad_norm": 3.946856737136841, + "learning_rate": 9.364723618090453e-06, + "loss": 0.1846, + "step": 6825 + }, + { + "epoch": 4.6, + "grad_norm": 3.528416156768799, + "learning_rate": 9.362211055276383e-06, + "loss": 0.1815, + "step": 6850 + }, + { + "epoch": 4.62, + "grad_norm": 3.661539077758789, + "learning_rate": 9.359698492462312e-06, + "loss": 0.184, + "step": 6875 + }, + { + "epoch": 4.63, + "grad_norm": 4.0110602378845215, + "learning_rate": 9.357185929648241e-06, + "loss": 0.1858, + "step": 6900 + }, + { + "epoch": 4.65, + "grad_norm": 4.164252281188965, + "learning_rate": 9.354673366834172e-06, + "loss": 0.1885, + "step": 6925 + }, + { + "epoch": 4.67, + "grad_norm": 3.6966872215270996, + "learning_rate": 9.352160804020101e-06, + "loss": 0.1858, + "step": 6950 + }, + { + "epoch": 4.68, + "grad_norm": 3.9824459552764893, + "learning_rate": 9.34964824120603e-06, + "loss": 0.1827, + "step": 6975 + }, + { + "epoch": 4.7, + "grad_norm": 4.003323078155518, + "learning_rate": 9.34713567839196e-06, + "loss": 0.1818, + "step": 7000 + }, + { + "epoch": 4.7, + "eval_loss": 0.1873241513967514, + "eval_runtime": 525.4362, + "eval_samples_per_second": 2.723, + "eval_steps_per_second": 2.723, + "eval_wer": 31.155609492988134, + "step": 7000 + }, + { + "epoch": 4.72, + "grad_norm": 3.490771770477295, + "learning_rate": 9.34462311557789e-06, + "loss": 0.1847, + "step": 7025 + }, + { + "epoch": 4.73, + "grad_norm": 4.085097312927246, + "learning_rate": 9.34211055276382e-06, + "loss": 0.1845, + "step": 7050 + }, + { + "epoch": 4.75, + "grad_norm": 3.688411235809326, + "learning_rate": 9.33959798994975e-06, + "loss": 0.1895, + "step": 7075 + }, + { + "epoch": 4.77, + "grad_norm": 3.6227452754974365, + "learning_rate": 9.337085427135679e-06, + "loss": 0.1864, + "step": 7100 + }, + { + "epoch": 4.79, + "grad_norm": 3.7417030334472656, + "learning_rate": 9.334572864321608e-06, + "loss": 0.1865, + "step": 7125 + }, + { + "epoch": 4.8, + "grad_norm": 4.023078441619873, + "learning_rate": 9.332060301507538e-06, + "loss": 0.1886, + "step": 7150 + }, + { + "epoch": 4.82, + "grad_norm": 3.916606903076172, + "learning_rate": 9.329547738693469e-06, + "loss": 0.1798, + "step": 7175 + }, + { + "epoch": 4.84, + "grad_norm": 3.924947500228882, + "learning_rate": 9.327035175879398e-06, + "loss": 0.1839, + "step": 7200 + }, + { + "epoch": 4.85, + "grad_norm": 4.0358805656433105, + "learning_rate": 9.324522613065327e-06, + "loss": 0.186, + "step": 7225 + }, + { + "epoch": 4.87, + "grad_norm": 3.8041045665740967, + "learning_rate": 9.322010050251257e-06, + "loss": 0.1874, + "step": 7250 + }, + { + "epoch": 4.89, + "grad_norm": 3.771125316619873, + "learning_rate": 9.319497487437186e-06, + "loss": 0.192, + "step": 7275 + }, + { + "epoch": 4.9, + "grad_norm": 3.7178351879119873, + "learning_rate": 9.316984924623115e-06, + "loss": 0.1831, + "step": 7300 + }, + { + "epoch": 4.92, + "grad_norm": 4.632425785064697, + "learning_rate": 9.314472361809046e-06, + "loss": 0.1834, + "step": 7325 + }, + { + "epoch": 4.94, + "grad_norm": 4.040483474731445, + "learning_rate": 9.311959798994976e-06, + "loss": 0.1822, + "step": 7350 + }, + { + "epoch": 4.95, + "grad_norm": 4.393119812011719, + "learning_rate": 9.309447236180905e-06, + "loss": 0.1823, + "step": 7375 + }, + { + "epoch": 4.97, + "grad_norm": 3.679018020629883, + "learning_rate": 9.306934673366836e-06, + "loss": 0.1771, + "step": 7400 + }, + { + "epoch": 4.99, + "grad_norm": 3.8613052368164062, + "learning_rate": 9.304422110552764e-06, + "loss": 0.1823, + "step": 7425 + }, + { + "epoch": 5.0, + "grad_norm": 3.1367316246032715, + "learning_rate": 9.301909547738695e-06, + "loss": 0.1791, + "step": 7450 + }, + { + "epoch": 5.02, + "grad_norm": 3.4209072589874268, + "learning_rate": 9.299396984924624e-06, + "loss": 0.1618, + "step": 7475 + }, + { + "epoch": 5.04, + "grad_norm": 3.3665506839752197, + "learning_rate": 9.296884422110553e-06, + "loss": 0.1616, + "step": 7500 + }, + { + "epoch": 5.05, + "grad_norm": 3.5807788372039795, + "learning_rate": 9.294371859296483e-06, + "loss": 0.1624, + "step": 7525 + }, + { + "epoch": 5.07, + "grad_norm": 3.678154706954956, + "learning_rate": 9.291859296482412e-06, + "loss": 0.1625, + "step": 7550 + }, + { + "epoch": 5.09, + "grad_norm": 3.671475887298584, + "learning_rate": 9.289346733668343e-06, + "loss": 0.1616, + "step": 7575 + }, + { + "epoch": 5.1, + "grad_norm": 3.7041079998016357, + "learning_rate": 9.286834170854272e-06, + "loss": 0.1616, + "step": 7600 + }, + { + "epoch": 5.12, + "grad_norm": 3.8696985244750977, + "learning_rate": 9.284321608040202e-06, + "loss": 0.1677, + "step": 7625 + }, + { + "epoch": 5.14, + "grad_norm": 3.842712640762329, + "learning_rate": 9.281809045226131e-06, + "loss": 0.1662, + "step": 7650 + }, + { + "epoch": 5.15, + "grad_norm": 3.8467085361480713, + "learning_rate": 9.279296482412062e-06, + "loss": 0.1635, + "step": 7675 + }, + { + "epoch": 5.17, + "grad_norm": 3.928945541381836, + "learning_rate": 9.27678391959799e-06, + "loss": 0.1655, + "step": 7700 + }, + { + "epoch": 5.19, + "grad_norm": 3.4808826446533203, + "learning_rate": 9.27427135678392e-06, + "loss": 0.161, + "step": 7725 + }, + { + "epoch": 5.2, + "grad_norm": 3.430126667022705, + "learning_rate": 9.27175879396985e-06, + "loss": 0.1654, + "step": 7750 + }, + { + "epoch": 5.22, + "grad_norm": 3.807945966720581, + "learning_rate": 9.26924623115578e-06, + "loss": 0.1647, + "step": 7775 + }, + { + "epoch": 5.24, + "grad_norm": 3.749256134033203, + "learning_rate": 9.26673366834171e-06, + "loss": 0.1695, + "step": 7800 + }, + { + "epoch": 5.26, + "grad_norm": 3.9342730045318604, + "learning_rate": 9.264221105527638e-06, + "loss": 0.1654, + "step": 7825 + }, + { + "epoch": 5.27, + "grad_norm": 3.688267946243286, + "learning_rate": 9.261708542713569e-06, + "loss": 0.1618, + "step": 7850 + }, + { + "epoch": 5.29, + "grad_norm": 4.157882213592529, + "learning_rate": 9.259195979899498e-06, + "loss": 0.1659, + "step": 7875 + }, + { + "epoch": 5.31, + "grad_norm": 3.783386468887329, + "learning_rate": 9.256683417085428e-06, + "loss": 0.1611, + "step": 7900 + }, + { + "epoch": 5.32, + "grad_norm": 4.049051284790039, + "learning_rate": 9.254170854271357e-06, + "loss": 0.1647, + "step": 7925 + }, + { + "epoch": 5.34, + "grad_norm": 3.9535739421844482, + "learning_rate": 9.251658291457288e-06, + "loss": 0.1683, + "step": 7950 + }, + { + "epoch": 5.36, + "grad_norm": 3.933593988418579, + "learning_rate": 9.249145728643217e-06, + "loss": 0.1649, + "step": 7975 + }, + { + "epoch": 5.37, + "grad_norm": 3.9193124771118164, + "learning_rate": 9.246633165829147e-06, + "loss": 0.1635, + "step": 8000 + }, + { + "epoch": 5.37, + "eval_loss": 0.18448637425899506, + "eval_runtime": 519.8929, + "eval_samples_per_second": 2.752, + "eval_steps_per_second": 2.752, + "eval_wer": 30.48139158576052, + "step": 8000 + }, + { + "epoch": 5.39, + "grad_norm": 3.881075620651245, + "learning_rate": 9.244120603015076e-06, + "loss": 0.1719, + "step": 8025 + }, + { + "epoch": 5.41, + "grad_norm": 3.9071929454803467, + "learning_rate": 9.241608040201005e-06, + "loss": 0.1741, + "step": 8050 + }, + { + "epoch": 5.42, + "grad_norm": 4.394382476806641, + "learning_rate": 9.239095477386936e-06, + "loss": 0.1648, + "step": 8075 + }, + { + "epoch": 5.44, + "grad_norm": 3.7738540172576904, + "learning_rate": 9.236683417085428e-06, + "loss": 0.1649, + "step": 8100 + }, + { + "epoch": 5.46, + "grad_norm": 3.805447816848755, + "learning_rate": 9.234170854271357e-06, + "loss": 0.1638, + "step": 8125 + }, + { + "epoch": 5.47, + "grad_norm": 3.6185922622680664, + "learning_rate": 9.231658291457286e-06, + "loss": 0.1652, + "step": 8150 + }, + { + "epoch": 5.49, + "grad_norm": 4.127386569976807, + "learning_rate": 9.229145728643217e-06, + "loss": 0.1642, + "step": 8175 + }, + { + "epoch": 5.51, + "grad_norm": 4.005887985229492, + "learning_rate": 9.226633165829147e-06, + "loss": 0.164, + "step": 8200 + }, + { + "epoch": 5.52, + "grad_norm": 4.461826801300049, + "learning_rate": 9.224120603015076e-06, + "loss": 0.1676, + "step": 8225 + }, + { + "epoch": 5.54, + "grad_norm": 3.9404098987579346, + "learning_rate": 9.221608040201005e-06, + "loss": 0.1636, + "step": 8250 + }, + { + "epoch": 5.56, + "grad_norm": 3.6533539295196533, + "learning_rate": 9.219095477386936e-06, + "loss": 0.1638, + "step": 8275 + }, + { + "epoch": 5.57, + "grad_norm": 3.8982412815093994, + "learning_rate": 9.216582914572864e-06, + "loss": 0.1702, + "step": 8300 + }, + { + "epoch": 5.59, + "grad_norm": 3.9691884517669678, + "learning_rate": 9.214070351758795e-06, + "loss": 0.1656, + "step": 8325 + }, + { + "epoch": 5.61, + "grad_norm": 3.790567636489868, + "learning_rate": 9.211557788944724e-06, + "loss": 0.1633, + "step": 8350 + }, + { + "epoch": 5.62, + "grad_norm": 3.9435319900512695, + "learning_rate": 9.209045226130654e-06, + "loss": 0.1643, + "step": 8375 + }, + { + "epoch": 5.64, + "grad_norm": 3.846001625061035, + "learning_rate": 9.206532663316585e-06, + "loss": 0.1603, + "step": 8400 + }, + { + "epoch": 5.66, + "grad_norm": 3.5040123462677, + "learning_rate": 9.204020100502512e-06, + "loss": 0.1645, + "step": 8425 + }, + { + "epoch": 5.67, + "grad_norm": 4.317410469055176, + "learning_rate": 9.201507537688443e-06, + "loss": 0.1658, + "step": 8450 + }, + { + "epoch": 5.69, + "grad_norm": 3.8459880352020264, + "learning_rate": 9.198994974874373e-06, + "loss": 0.1624, + "step": 8475 + }, + { + "epoch": 5.71, + "grad_norm": 3.7885921001434326, + "learning_rate": 9.196482412060302e-06, + "loss": 0.1586, + "step": 8500 + }, + { + "epoch": 5.73, + "grad_norm": 3.5857863426208496, + "learning_rate": 9.193969849246231e-06, + "loss": 0.1657, + "step": 8525 + }, + { + "epoch": 5.74, + "grad_norm": 3.735806941986084, + "learning_rate": 9.191457286432162e-06, + "loss": 0.1594, + "step": 8550 + }, + { + "epoch": 5.76, + "grad_norm": 3.8138506412506104, + "learning_rate": 9.188944723618092e-06, + "loss": 0.1651, + "step": 8575 + }, + { + "epoch": 5.78, + "grad_norm": 3.7869691848754883, + "learning_rate": 9.186432160804021e-06, + "loss": 0.1618, + "step": 8600 + }, + { + "epoch": 5.79, + "grad_norm": 3.880380630493164, + "learning_rate": 9.18391959798995e-06, + "loss": 0.1647, + "step": 8625 + }, + { + "epoch": 5.81, + "grad_norm": 3.979027271270752, + "learning_rate": 9.18140703517588e-06, + "loss": 0.1566, + "step": 8650 + }, + { + "epoch": 5.83, + "grad_norm": 3.6661736965179443, + "learning_rate": 9.17889447236181e-06, + "loss": 0.1604, + "step": 8675 + }, + { + "epoch": 5.84, + "grad_norm": 4.11058235168457, + "learning_rate": 9.176381909547738e-06, + "loss": 0.1657, + "step": 8700 + }, + { + "epoch": 5.86, + "grad_norm": 3.898141622543335, + "learning_rate": 9.17386934673367e-06, + "loss": 0.1606, + "step": 8725 + }, + { + "epoch": 5.88, + "grad_norm": 4.01162052154541, + "learning_rate": 9.171356783919599e-06, + "loss": 0.1672, + "step": 8750 + }, + { + "epoch": 5.89, + "grad_norm": 3.9955649375915527, + "learning_rate": 9.168844221105528e-06, + "loss": 0.1612, + "step": 8775 + }, + { + "epoch": 5.91, + "grad_norm": 3.9245057106018066, + "learning_rate": 9.166331658291459e-06, + "loss": 0.1662, + "step": 8800 + }, + { + "epoch": 5.93, + "grad_norm": 4.075827598571777, + "learning_rate": 9.163819095477388e-06, + "loss": 0.1626, + "step": 8825 + }, + { + "epoch": 5.94, + "grad_norm": 4.169577121734619, + "learning_rate": 9.161306532663318e-06, + "loss": 0.1608, + "step": 8850 + }, + { + "epoch": 5.96, + "grad_norm": 4.060514450073242, + "learning_rate": 9.158793969849247e-06, + "loss": 0.1628, + "step": 8875 + }, + { + "epoch": 5.98, + "grad_norm": 3.8610999584198, + "learning_rate": 9.156281407035176e-06, + "loss": 0.1591, + "step": 8900 + }, + { + "epoch": 5.99, + "grad_norm": 3.159731864929199, + "learning_rate": 9.153768844221106e-06, + "loss": 0.1605, + "step": 8925 + }, + { + "epoch": 6.01, + "grad_norm": 3.4550232887268066, + "learning_rate": 9.151256281407037e-06, + "loss": 0.1533, + "step": 8950 + }, + { + "epoch": 6.03, + "grad_norm": 3.5820775032043457, + "learning_rate": 9.148743718592964e-06, + "loss": 0.1462, + "step": 8975 + }, + { + "epoch": 6.04, + "grad_norm": 3.7673840522766113, + "learning_rate": 9.146231155778895e-06, + "loss": 0.1464, + "step": 9000 + }, + { + "epoch": 6.04, + "eval_loss": 0.1867646723985672, + "eval_runtime": 522.0651, + "eval_samples_per_second": 2.741, + "eval_steps_per_second": 2.741, + "eval_wer": 30.373516720604098, + "step": 9000 + }, + { + "epoch": 6.06, + "grad_norm": 3.7958223819732666, + "learning_rate": 9.143718592964825e-06, + "loss": 0.1456, + "step": 9025 + }, + { + "epoch": 6.08, + "grad_norm": 3.619978427886963, + "learning_rate": 9.141206030150754e-06, + "loss": 0.1446, + "step": 9050 + }, + { + "epoch": 6.09, + "grad_norm": 3.5674071311950684, + "learning_rate": 9.138693467336685e-06, + "loss": 0.1448, + "step": 9075 + }, + { + "epoch": 6.11, + "grad_norm": 3.3518311977386475, + "learning_rate": 9.136180904522614e-06, + "loss": 0.1411, + "step": 9100 + }, + { + "epoch": 6.13, + "grad_norm": 3.2948806285858154, + "learning_rate": 9.133668341708544e-06, + "loss": 0.141, + "step": 9125 + }, + { + "epoch": 6.15, + "grad_norm": 4.018036365509033, + "learning_rate": 9.131155778894473e-06, + "loss": 0.1463, + "step": 9150 + }, + { + "epoch": 6.16, + "grad_norm": 3.571462869644165, + "learning_rate": 9.128643216080402e-06, + "loss": 0.1472, + "step": 9175 + }, + { + "epoch": 6.18, + "grad_norm": 3.661487340927124, + "learning_rate": 9.126130653266332e-06, + "loss": 0.1443, + "step": 9200 + }, + { + "epoch": 6.2, + "grad_norm": 3.995366096496582, + "learning_rate": 9.123618090452263e-06, + "loss": 0.1451, + "step": 9225 + }, + { + "epoch": 6.21, + "grad_norm": 3.56868052482605, + "learning_rate": 9.121105527638192e-06, + "loss": 0.1456, + "step": 9250 + }, + { + "epoch": 6.23, + "grad_norm": 3.9354019165039062, + "learning_rate": 9.118592964824121e-06, + "loss": 0.1411, + "step": 9275 + }, + { + "epoch": 6.25, + "grad_norm": 3.9782636165618896, + "learning_rate": 9.11608040201005e-06, + "loss": 0.1477, + "step": 9300 + }, + { + "epoch": 6.26, + "grad_norm": 3.93599009513855, + "learning_rate": 9.11356783919598e-06, + "loss": 0.1538, + "step": 9325 + }, + { + "epoch": 6.28, + "grad_norm": 4.221218585968018, + "learning_rate": 9.111055276381911e-06, + "loss": 0.1407, + "step": 9350 + }, + { + "epoch": 6.3, + "grad_norm": 3.736660957336426, + "learning_rate": 9.10854271356784e-06, + "loss": 0.1431, + "step": 9375 + }, + { + "epoch": 6.31, + "grad_norm": 3.9257709980010986, + "learning_rate": 9.10603015075377e-06, + "loss": 0.1412, + "step": 9400 + }, + { + "epoch": 6.33, + "grad_norm": 4.14724588394165, + "learning_rate": 9.1035175879397e-06, + "loss": 0.1479, + "step": 9425 + }, + { + "epoch": 6.35, + "grad_norm": 3.960895299911499, + "learning_rate": 9.101005025125628e-06, + "loss": 0.1405, + "step": 9450 + }, + { + "epoch": 6.36, + "grad_norm": 4.3491411209106445, + "learning_rate": 9.09849246231156e-06, + "loss": 0.1458, + "step": 9475 + }, + { + "epoch": 6.38, + "grad_norm": 3.9251201152801514, + "learning_rate": 9.095979899497489e-06, + "loss": 0.1457, + "step": 9500 + }, + { + "epoch": 6.4, + "grad_norm": 4.136743545532227, + "learning_rate": 9.093467336683418e-06, + "loss": 0.1454, + "step": 9525 + }, + { + "epoch": 6.41, + "grad_norm": 4.047736167907715, + "learning_rate": 9.090954773869347e-06, + "loss": 0.1505, + "step": 9550 + }, + { + "epoch": 6.43, + "grad_norm": 4.12973165512085, + "learning_rate": 9.088442211055277e-06, + "loss": 0.1483, + "step": 9575 + }, + { + "epoch": 6.45, + "grad_norm": 4.175168514251709, + "learning_rate": 9.085929648241206e-06, + "loss": 0.145, + "step": 9600 + }, + { + "epoch": 6.46, + "grad_norm": 3.976602077484131, + "learning_rate": 9.083417085427137e-06, + "loss": 0.1392, + "step": 9625 + }, + { + "epoch": 6.48, + "grad_norm": 4.090387344360352, + "learning_rate": 9.080904522613066e-06, + "loss": 0.1469, + "step": 9650 + }, + { + "epoch": 6.5, + "grad_norm": 3.7028279304504395, + "learning_rate": 9.078391959798996e-06, + "loss": 0.1429, + "step": 9675 + }, + { + "epoch": 6.51, + "grad_norm": 4.258456707000732, + "learning_rate": 9.075879396984927e-06, + "loss": 0.1439, + "step": 9700 + }, + { + "epoch": 6.53, + "grad_norm": 3.8077964782714844, + "learning_rate": 9.073366834170854e-06, + "loss": 0.1443, + "step": 9725 + }, + { + "epoch": 6.55, + "grad_norm": 3.620601177215576, + "learning_rate": 9.070854271356785e-06, + "loss": 0.1474, + "step": 9750 + }, + { + "epoch": 6.56, + "grad_norm": 3.8309972286224365, + "learning_rate": 9.068341708542715e-06, + "loss": 0.1464, + "step": 9775 + }, + { + "epoch": 6.58, + "grad_norm": 4.041671276092529, + "learning_rate": 9.065829145728644e-06, + "loss": 0.1436, + "step": 9800 + }, + { + "epoch": 6.6, + "grad_norm": 4.29490327835083, + "learning_rate": 9.063316582914573e-06, + "loss": 0.1507, + "step": 9825 + }, + { + "epoch": 6.62, + "grad_norm": 4.407804012298584, + "learning_rate": 9.060804020100502e-06, + "loss": 0.1449, + "step": 9850 + }, + { + "epoch": 6.63, + "grad_norm": 4.063084125518799, + "learning_rate": 9.058291457286433e-06, + "loss": 0.1519, + "step": 9875 + }, + { + "epoch": 6.65, + "grad_norm": 3.6831226348876953, + "learning_rate": 9.055778894472363e-06, + "loss": 0.143, + "step": 9900 + }, + { + "epoch": 6.67, + "grad_norm": 4.614268779754639, + "learning_rate": 9.053266331658292e-06, + "loss": 0.1424, + "step": 9925 + }, + { + "epoch": 6.68, + "grad_norm": 3.930100679397583, + "learning_rate": 9.050753768844221e-06, + "loss": 0.1449, + "step": 9950 + }, + { + "epoch": 6.7, + "grad_norm": 3.9550886154174805, + "learning_rate": 9.048241206030152e-06, + "loss": 0.1392, + "step": 9975 + }, + { + "epoch": 6.72, + "grad_norm": 4.000819683074951, + "learning_rate": 9.04572864321608e-06, + "loss": 0.1503, + "step": 10000 + }, + { + "epoch": 6.72, + "eval_loss": 0.18567724525928497, + "eval_runtime": 520.2231, + "eval_samples_per_second": 2.751, + "eval_steps_per_second": 2.751, + "eval_wer": 29.948759439050704, + "step": 10000 + }, + { + "epoch": 6.73, + "grad_norm": 4.0167155265808105, + "learning_rate": 9.043216080402011e-06, + "loss": 0.1465, + "step": 10025 + }, + { + "epoch": 6.75, + "grad_norm": 3.7915775775909424, + "learning_rate": 9.04070351758794e-06, + "loss": 0.1426, + "step": 10050 + }, + { + "epoch": 6.77, + "grad_norm": 4.242292404174805, + "learning_rate": 9.03819095477387e-06, + "loss": 0.1487, + "step": 10075 + }, + { + "epoch": 6.78, + "grad_norm": 3.8141515254974365, + "learning_rate": 9.0356783919598e-06, + "loss": 0.1454, + "step": 10100 + }, + { + "epoch": 6.8, + "grad_norm": 3.645862579345703, + "learning_rate": 9.033165829145728e-06, + "loss": 0.1451, + "step": 10125 + }, + { + "epoch": 6.82, + "grad_norm": 3.5161218643188477, + "learning_rate": 9.03065326633166e-06, + "loss": 0.1413, + "step": 10150 + }, + { + "epoch": 6.83, + "grad_norm": 3.873978614807129, + "learning_rate": 9.028140703517589e-06, + "loss": 0.1426, + "step": 10175 + }, + { + "epoch": 6.85, + "grad_norm": 4.028696537017822, + "learning_rate": 9.025628140703518e-06, + "loss": 0.1417, + "step": 10200 + }, + { + "epoch": 6.87, + "grad_norm": 4.0096259117126465, + "learning_rate": 9.023115577889447e-06, + "loss": 0.1456, + "step": 10225 + }, + { + "epoch": 6.88, + "grad_norm": 3.550913095474243, + "learning_rate": 9.020603015075378e-06, + "loss": 0.1442, + "step": 10250 + }, + { + "epoch": 6.9, + "grad_norm": 3.9434022903442383, + "learning_rate": 9.018090452261308e-06, + "loss": 0.1522, + "step": 10275 + }, + { + "epoch": 6.92, + "grad_norm": 3.8213093280792236, + "learning_rate": 9.015577889447237e-06, + "loss": 0.1425, + "step": 10300 + }, + { + "epoch": 6.93, + "grad_norm": 3.8195369243621826, + "learning_rate": 9.013065326633166e-06, + "loss": 0.1458, + "step": 10325 + }, + { + "epoch": 6.95, + "grad_norm": 3.89908766746521, + "learning_rate": 9.010552763819096e-06, + "loss": 0.1414, + "step": 10350 + }, + { + "epoch": 6.97, + "grad_norm": 3.8848259449005127, + "learning_rate": 9.008040201005027e-06, + "loss": 0.1427, + "step": 10375 + }, + { + "epoch": 6.98, + "grad_norm": 3.5469753742218018, + "learning_rate": 9.005527638190954e-06, + "loss": 0.1423, + "step": 10400 + }, + { + "epoch": 7.0, + "grad_norm": 3.697833776473999, + "learning_rate": 9.003015075376885e-06, + "loss": 0.1418, + "step": 10425 + }, + { + "epoch": 7.02, + "grad_norm": 3.422151803970337, + "learning_rate": 9.000502512562815e-06, + "loss": 0.1231, + "step": 10450 + }, + { + "epoch": 7.03, + "grad_norm": 3.3331618309020996, + "learning_rate": 8.997989949748744e-06, + "loss": 0.1217, + "step": 10475 + }, + { + "epoch": 7.05, + "grad_norm": 3.9095921516418457, + "learning_rate": 8.995477386934675e-06, + "loss": 0.1266, + "step": 10500 + }, + { + "epoch": 7.07, + "grad_norm": 3.5641751289367676, + "learning_rate": 8.992964824120604e-06, + "loss": 0.1215, + "step": 10525 + }, + { + "epoch": 7.09, + "grad_norm": 3.49638032913208, + "learning_rate": 8.990452261306534e-06, + "loss": 0.1232, + "step": 10550 + }, + { + "epoch": 7.1, + "grad_norm": 3.8626105785369873, + "learning_rate": 8.987939698492463e-06, + "loss": 0.1281, + "step": 10575 + }, + { + "epoch": 7.12, + "grad_norm": 3.5636463165283203, + "learning_rate": 8.985427135678392e-06, + "loss": 0.1249, + "step": 10600 + }, + { + "epoch": 7.14, + "grad_norm": 4.087505340576172, + "learning_rate": 8.982914572864322e-06, + "loss": 0.1251, + "step": 10625 + }, + { + "epoch": 7.15, + "grad_norm": 3.7042148113250732, + "learning_rate": 8.980402010050253e-06, + "loss": 0.1294, + "step": 10650 + }, + { + "epoch": 7.17, + "grad_norm": 3.6752560138702393, + "learning_rate": 8.977889447236182e-06, + "loss": 0.1296, + "step": 10675 + }, + { + "epoch": 7.19, + "grad_norm": 3.649087429046631, + "learning_rate": 8.975376884422111e-06, + "loss": 0.1258, + "step": 10700 + }, + { + "epoch": 7.2, + "grad_norm": 4.038895130157471, + "learning_rate": 8.97286432160804e-06, + "loss": 0.1287, + "step": 10725 + }, + { + "epoch": 7.22, + "grad_norm": 3.67850399017334, + "learning_rate": 8.97035175879397e-06, + "loss": 0.1263, + "step": 10750 + }, + { + "epoch": 7.24, + "grad_norm": 3.763619899749756, + "learning_rate": 8.967839195979901e-06, + "loss": 0.127, + "step": 10775 + }, + { + "epoch": 7.25, + "grad_norm": 3.4795069694519043, + "learning_rate": 8.96532663316583e-06, + "loss": 0.1275, + "step": 10800 + }, + { + "epoch": 7.27, + "grad_norm": 3.7174344062805176, + "learning_rate": 8.96281407035176e-06, + "loss": 0.1279, + "step": 10825 + }, + { + "epoch": 7.29, + "grad_norm": 3.8897461891174316, + "learning_rate": 8.960301507537689e-06, + "loss": 0.1247, + "step": 10850 + }, + { + "epoch": 7.3, + "grad_norm": 4.073455333709717, + "learning_rate": 8.957788944723618e-06, + "loss": 0.1296, + "step": 10875 + }, + { + "epoch": 7.32, + "grad_norm": 3.9033734798431396, + "learning_rate": 8.95527638190955e-06, + "loss": 0.1312, + "step": 10900 + }, + { + "epoch": 7.34, + "grad_norm": 4.1628899574279785, + "learning_rate": 8.952763819095479e-06, + "loss": 0.1291, + "step": 10925 + }, + { + "epoch": 7.35, + "grad_norm": 3.760683059692383, + "learning_rate": 8.950251256281408e-06, + "loss": 0.1268, + "step": 10950 + }, + { + "epoch": 7.37, + "grad_norm": 3.9215078353881836, + "learning_rate": 8.947738693467337e-06, + "loss": 0.1323, + "step": 10975 + }, + { + "epoch": 7.39, + "grad_norm": 4.2469482421875, + "learning_rate": 8.945226130653267e-06, + "loss": 0.1288, + "step": 11000 + }, + { + "epoch": 7.39, + "eval_loss": 0.19037967920303345, + "eval_runtime": 519.858, + "eval_samples_per_second": 2.753, + "eval_steps_per_second": 2.753, + "eval_wer": 30.636461704422867, + "step": 11000 + }, + { + "epoch": 7.4, + "grad_norm": 3.990900993347168, + "learning_rate": 8.942713567839196e-06, + "loss": 0.1251, + "step": 11025 + }, + { + "epoch": 7.42, + "grad_norm": 4.272309303283691, + "learning_rate": 8.940201005025127e-06, + "loss": 0.1349, + "step": 11050 + }, + { + "epoch": 7.44, + "grad_norm": 4.357758045196533, + "learning_rate": 8.937688442211056e-06, + "loss": 0.128, + "step": 11075 + }, + { + "epoch": 7.45, + "grad_norm": 3.5985326766967773, + "learning_rate": 8.935175879396986e-06, + "loss": 0.1245, + "step": 11100 + }, + { + "epoch": 7.47, + "grad_norm": 4.144040107727051, + "learning_rate": 8.932663316582915e-06, + "loss": 0.1252, + "step": 11125 + }, + { + "epoch": 7.49, + "grad_norm": 3.5244176387786865, + "learning_rate": 8.930150753768844e-06, + "loss": 0.1284, + "step": 11150 + }, + { + "epoch": 7.51, + "grad_norm": 4.079344272613525, + "learning_rate": 8.927638190954775e-06, + "loss": 0.1295, + "step": 11175 + }, + { + "epoch": 7.52, + "grad_norm": 4.141294479370117, + "learning_rate": 8.925125628140705e-06, + "loss": 0.1295, + "step": 11200 + }, + { + "epoch": 7.54, + "grad_norm": 3.4114084243774414, + "learning_rate": 8.922613065326634e-06, + "loss": 0.1253, + "step": 11225 + }, + { + "epoch": 7.56, + "grad_norm": 3.77829909324646, + "learning_rate": 8.920100502512563e-06, + "loss": 0.1262, + "step": 11250 + }, + { + "epoch": 7.57, + "grad_norm": 4.175596237182617, + "learning_rate": 8.917587939698493e-06, + "loss": 0.1304, + "step": 11275 + }, + { + "epoch": 7.59, + "grad_norm": 4.313751220703125, + "learning_rate": 8.915075376884424e-06, + "loss": 0.1274, + "step": 11300 + }, + { + "epoch": 7.61, + "grad_norm": 3.971757411956787, + "learning_rate": 8.912562814070353e-06, + "loss": 0.1261, + "step": 11325 + }, + { + "epoch": 7.62, + "grad_norm": 4.1452131271362305, + "learning_rate": 8.910050251256282e-06, + "loss": 0.1279, + "step": 11350 + }, + { + "epoch": 7.64, + "grad_norm": 4.34934663772583, + "learning_rate": 8.907537688442212e-06, + "loss": 0.1278, + "step": 11375 + }, + { + "epoch": 7.66, + "grad_norm": 4.227243900299072, + "learning_rate": 8.905025125628143e-06, + "loss": 0.1282, + "step": 11400 + }, + { + "epoch": 7.67, + "grad_norm": 4.010955333709717, + "learning_rate": 8.90251256281407e-06, + "loss": 0.1305, + "step": 11425 + }, + { + "epoch": 7.69, + "grad_norm": 4.299724102020264, + "learning_rate": 8.900000000000001e-06, + "loss": 0.1277, + "step": 11450 + }, + { + "epoch": 7.71, + "grad_norm": 4.1884684562683105, + "learning_rate": 8.89748743718593e-06, + "loss": 0.1293, + "step": 11475 + }, + { + "epoch": 7.72, + "grad_norm": 3.6471798419952393, + "learning_rate": 8.89497487437186e-06, + "loss": 0.1241, + "step": 11500 + }, + { + "epoch": 7.74, + "grad_norm": 3.7731807231903076, + "learning_rate": 8.892462311557791e-06, + "loss": 0.125, + "step": 11525 + }, + { + "epoch": 7.76, + "grad_norm": 4.476314067840576, + "learning_rate": 8.889949748743718e-06, + "loss": 0.1312, + "step": 11550 + }, + { + "epoch": 7.77, + "grad_norm": 3.9213693141937256, + "learning_rate": 8.88743718592965e-06, + "loss": 0.1284, + "step": 11575 + }, + { + "epoch": 7.79, + "grad_norm": 3.8584096431732178, + "learning_rate": 8.884924623115579e-06, + "loss": 0.128, + "step": 11600 + }, + { + "epoch": 7.81, + "grad_norm": 3.8317983150482178, + "learning_rate": 8.882412060301508e-06, + "loss": 0.1294, + "step": 11625 + }, + { + "epoch": 7.82, + "grad_norm": 4.090360164642334, + "learning_rate": 8.879899497487437e-06, + "loss": 0.1325, + "step": 11650 + }, + { + "epoch": 7.84, + "grad_norm": 3.4047842025756836, + "learning_rate": 8.877386934673368e-06, + "loss": 0.1324, + "step": 11675 + }, + { + "epoch": 7.86, + "grad_norm": 4.2123870849609375, + "learning_rate": 8.874874371859296e-06, + "loss": 0.1312, + "step": 11700 + }, + { + "epoch": 7.87, + "grad_norm": 4.135654926300049, + "learning_rate": 8.872361809045227e-06, + "loss": 0.1317, + "step": 11725 + }, + { + "epoch": 7.89, + "grad_norm": 3.5492568016052246, + "learning_rate": 8.869849246231156e-06, + "loss": 0.1244, + "step": 11750 + }, + { + "epoch": 7.91, + "grad_norm": 3.8317863941192627, + "learning_rate": 8.867336683417086e-06, + "loss": 0.1293, + "step": 11775 + }, + { + "epoch": 7.92, + "grad_norm": 3.6691598892211914, + "learning_rate": 8.864824120603017e-06, + "loss": 0.1269, + "step": 11800 + }, + { + "epoch": 7.94, + "grad_norm": 3.5726985931396484, + "learning_rate": 8.862311557788944e-06, + "loss": 0.129, + "step": 11825 + }, + { + "epoch": 7.96, + "grad_norm": 4.284350872039795, + "learning_rate": 8.859798994974875e-06, + "loss": 0.1237, + "step": 11850 + }, + { + "epoch": 7.98, + "grad_norm": 3.696242570877075, + "learning_rate": 8.857286432160805e-06, + "loss": 0.1291, + "step": 11875 + }, + { + "epoch": 7.99, + "grad_norm": 4.06398344039917, + "learning_rate": 8.854773869346734e-06, + "loss": 0.1288, + "step": 11900 + }, + { + "epoch": 8.01, + "grad_norm": 3.8039207458496094, + "learning_rate": 8.852261306532665e-06, + "loss": 0.1212, + "step": 11925 + }, + { + "epoch": 8.03, + "grad_norm": 3.6124823093414307, + "learning_rate": 8.849748743718594e-06, + "loss": 0.1109, + "step": 11950 + }, + { + "epoch": 8.04, + "grad_norm": 3.471539258956909, + "learning_rate": 8.847236180904524e-06, + "loss": 0.108, + "step": 11975 + }, + { + "epoch": 8.06, + "grad_norm": 3.9906015396118164, + "learning_rate": 8.844723618090453e-06, + "loss": 0.1106, + "step": 12000 + }, + { + "epoch": 8.06, + "eval_loss": 0.19243241846561432, + "eval_runtime": 524.7446, + "eval_samples_per_second": 2.727, + "eval_steps_per_second": 2.727, + "eval_wer": 30.784789644012946, + "step": 12000 + }, + { + "epoch": 8.08, + "grad_norm": 3.3895809650421143, + "learning_rate": 8.842211055276382e-06, + "loss": 0.1123, + "step": 12025 + }, + { + "epoch": 8.09, + "grad_norm": 3.547009229660034, + "learning_rate": 8.839698492462312e-06, + "loss": 0.1118, + "step": 12050 + }, + { + "epoch": 8.11, + "grad_norm": 3.6668307781219482, + "learning_rate": 8.837185929648243e-06, + "loss": 0.1103, + "step": 12075 + }, + { + "epoch": 8.13, + "grad_norm": 3.8766493797302246, + "learning_rate": 8.834773869346734e-06, + "loss": 0.1114, + "step": 12100 + }, + { + "epoch": 8.14, + "grad_norm": 3.7508063316345215, + "learning_rate": 8.832261306532665e-06, + "loss": 0.1087, + "step": 12125 + }, + { + "epoch": 8.16, + "grad_norm": 3.3632142543792725, + "learning_rate": 8.829748743718593e-06, + "loss": 0.1138, + "step": 12150 + }, + { + "epoch": 8.18, + "grad_norm": 3.8771419525146484, + "learning_rate": 8.827236180904524e-06, + "loss": 0.1102, + "step": 12175 + }, + { + "epoch": 8.19, + "grad_norm": 3.5378189086914062, + "learning_rate": 8.824723618090453e-06, + "loss": 0.1097, + "step": 12200 + }, + { + "epoch": 8.21, + "grad_norm": 4.218005657196045, + "learning_rate": 8.822211055276383e-06, + "loss": 0.1114, + "step": 12225 + }, + { + "epoch": 8.23, + "grad_norm": 3.4406065940856934, + "learning_rate": 8.819698492462312e-06, + "loss": 0.1096, + "step": 12250 + }, + { + "epoch": 8.24, + "grad_norm": 3.978188991546631, + "learning_rate": 8.817185929648241e-06, + "loss": 0.1156, + "step": 12275 + }, + { + "epoch": 8.26, + "grad_norm": 3.540112018585205, + "learning_rate": 8.81467336683417e-06, + "loss": 0.1117, + "step": 12300 + }, + { + "epoch": 8.28, + "grad_norm": 4.328541278839111, + "learning_rate": 8.812160804020102e-06, + "loss": 0.1125, + "step": 12325 + }, + { + "epoch": 8.29, + "grad_norm": 3.8371388912200928, + "learning_rate": 8.809648241206031e-06, + "loss": 0.1126, + "step": 12350 + }, + { + "epoch": 8.31, + "grad_norm": 3.855050802230835, + "learning_rate": 8.80713567839196e-06, + "loss": 0.1135, + "step": 12375 + }, + { + "epoch": 8.33, + "grad_norm": 4.206212520599365, + "learning_rate": 8.804623115577891e-06, + "loss": 0.1129, + "step": 12400 + }, + { + "epoch": 8.34, + "grad_norm": 4.154353618621826, + "learning_rate": 8.802110552763819e-06, + "loss": 0.1154, + "step": 12425 + }, + { + "epoch": 8.36, + "grad_norm": 4.117897033691406, + "learning_rate": 8.79959798994975e-06, + "loss": 0.115, + "step": 12450 + }, + { + "epoch": 8.38, + "grad_norm": 3.8379404544830322, + "learning_rate": 8.79708542713568e-06, + "loss": 0.1127, + "step": 12475 + }, + { + "epoch": 8.39, + "grad_norm": 3.723242998123169, + "learning_rate": 8.794572864321609e-06, + "loss": 0.1121, + "step": 12500 + }, + { + "epoch": 8.41, + "grad_norm": 3.8169524669647217, + "learning_rate": 8.792060301507538e-06, + "loss": 0.1134, + "step": 12525 + }, + { + "epoch": 8.43, + "grad_norm": 3.87909197807312, + "learning_rate": 8.789547738693467e-06, + "loss": 0.1106, + "step": 12550 + }, + { + "epoch": 8.45, + "grad_norm": 3.8375084400177, + "learning_rate": 8.787035175879398e-06, + "loss": 0.1167, + "step": 12575 + }, + { + "epoch": 8.46, + "grad_norm": 3.783355236053467, + "learning_rate": 8.784522613065328e-06, + "loss": 0.11, + "step": 12600 + }, + { + "epoch": 8.48, + "grad_norm": 3.8292758464813232, + "learning_rate": 8.782010050251257e-06, + "loss": 0.1131, + "step": 12625 + }, + { + "epoch": 8.5, + "grad_norm": 3.8735578060150146, + "learning_rate": 8.779497487437186e-06, + "loss": 0.1162, + "step": 12650 + }, + { + "epoch": 8.51, + "grad_norm": 3.9293081760406494, + "learning_rate": 8.776984924623117e-06, + "loss": 0.1095, + "step": 12675 + }, + { + "epoch": 8.53, + "grad_norm": 3.9596035480499268, + "learning_rate": 8.774472361809045e-06, + "loss": 0.1137, + "step": 12700 + }, + { + "epoch": 8.55, + "grad_norm": 4.5228471755981445, + "learning_rate": 8.771959798994976e-06, + "loss": 0.1142, + "step": 12725 + }, + { + "epoch": 8.56, + "grad_norm": 4.001628398895264, + "learning_rate": 8.769447236180905e-06, + "loss": 0.1154, + "step": 12750 + }, + { + "epoch": 8.58, + "grad_norm": 3.9996211528778076, + "learning_rate": 8.766934673366834e-06, + "loss": 0.1128, + "step": 12775 + }, + { + "epoch": 8.6, + "grad_norm": 4.095664024353027, + "learning_rate": 8.764422110552765e-06, + "loss": 0.1126, + "step": 12800 + }, + { + "epoch": 8.61, + "grad_norm": 4.040421962738037, + "learning_rate": 8.761909547738693e-06, + "loss": 0.1126, + "step": 12825 + }, + { + "epoch": 8.63, + "grad_norm": 3.561824321746826, + "learning_rate": 8.759396984924624e-06, + "loss": 0.1118, + "step": 12850 + }, + { + "epoch": 8.65, + "grad_norm": 3.931749105453491, + "learning_rate": 8.756884422110553e-06, + "loss": 0.1141, + "step": 12875 + }, + { + "epoch": 8.66, + "grad_norm": 3.8188819885253906, + "learning_rate": 8.754371859296483e-06, + "loss": 0.1143, + "step": 12900 + }, + { + "epoch": 8.68, + "grad_norm": 4.068775177001953, + "learning_rate": 8.751859296482412e-06, + "loss": 0.1154, + "step": 12925 + }, + { + "epoch": 8.7, + "grad_norm": 3.881679058074951, + "learning_rate": 8.749346733668343e-06, + "loss": 0.1097, + "step": 12950 + }, + { + "epoch": 8.71, + "grad_norm": 3.8605895042419434, + "learning_rate": 8.746834170854272e-06, + "loss": 0.1072, + "step": 12975 + }, + { + "epoch": 8.73, + "grad_norm": 3.550114870071411, + "learning_rate": 8.744321608040202e-06, + "loss": 0.11, + "step": 13000 + }, + { + "epoch": 8.73, + "eval_loss": 0.1934715360403061, + "eval_runtime": 521.9984, + "eval_samples_per_second": 2.741, + "eval_steps_per_second": 2.741, + "eval_wer": 30.656688241639696, + "step": 13000 + }, + { + "epoch": 8.75, + "grad_norm": 3.651111125946045, + "learning_rate": 8.741809045226131e-06, + "loss": 0.109, + "step": 13025 + }, + { + "epoch": 8.76, + "grad_norm": 4.114384174346924, + "learning_rate": 8.73929648241206e-06, + "loss": 0.1123, + "step": 13050 + }, + { + "epoch": 8.78, + "grad_norm": 3.7194125652313232, + "learning_rate": 8.736783919597991e-06, + "loss": 0.1117, + "step": 13075 + }, + { + "epoch": 8.8, + "grad_norm": 3.789829730987549, + "learning_rate": 8.734271356783919e-06, + "loss": 0.1129, + "step": 13100 + }, + { + "epoch": 8.81, + "grad_norm": 3.899853229522705, + "learning_rate": 8.73175879396985e-06, + "loss": 0.1117, + "step": 13125 + }, + { + "epoch": 8.83, + "grad_norm": 3.770836114883423, + "learning_rate": 8.72924623115578e-06, + "loss": 0.1103, + "step": 13150 + }, + { + "epoch": 8.85, + "grad_norm": 3.759908676147461, + "learning_rate": 8.726733668341709e-06, + "loss": 0.1141, + "step": 13175 + }, + { + "epoch": 8.87, + "grad_norm": 4.557126522064209, + "learning_rate": 8.72422110552764e-06, + "loss": 0.1165, + "step": 13200 + }, + { + "epoch": 8.88, + "grad_norm": 4.287385940551758, + "learning_rate": 8.721708542713569e-06, + "loss": 0.1133, + "step": 13225 + }, + { + "epoch": 8.9, + "grad_norm": 4.241641521453857, + "learning_rate": 8.719195979899498e-06, + "loss": 0.1134, + "step": 13250 + }, + { + "epoch": 8.92, + "grad_norm": 3.602527618408203, + "learning_rate": 8.716683417085428e-06, + "loss": 0.1107, + "step": 13275 + }, + { + "epoch": 8.93, + "grad_norm": 3.8521766662597656, + "learning_rate": 8.714170854271357e-06, + "loss": 0.1126, + "step": 13300 + }, + { + "epoch": 8.95, + "grad_norm": 3.493685483932495, + "learning_rate": 8.711658291457286e-06, + "loss": 0.1117, + "step": 13325 + }, + { + "epoch": 8.97, + "grad_norm": 3.7629683017730713, + "learning_rate": 8.709145728643217e-06, + "loss": 0.1108, + "step": 13350 + }, + { + "epoch": 8.98, + "grad_norm": 4.330238342285156, + "learning_rate": 8.706633165829147e-06, + "loss": 0.1099, + "step": 13375 + }, + { + "epoch": 9.0, + "grad_norm": 4.409573554992676, + "learning_rate": 8.704120603015076e-06, + "loss": 0.1164, + "step": 13400 + }, + { + "epoch": 9.02, + "grad_norm": 3.7009077072143555, + "learning_rate": 8.701608040201005e-06, + "loss": 0.0946, + "step": 13425 + }, + { + "epoch": 9.03, + "grad_norm": 3.161803960800171, + "learning_rate": 8.699095477386935e-06, + "loss": 0.0978, + "step": 13450 + }, + { + "epoch": 9.05, + "grad_norm": 4.2677717208862305, + "learning_rate": 8.696582914572866e-06, + "loss": 0.0969, + "step": 13475 + }, + { + "epoch": 9.07, + "grad_norm": 3.902024984359741, + "learning_rate": 8.694070351758795e-06, + "loss": 0.0953, + "step": 13500 + }, + { + "epoch": 9.08, + "grad_norm": 3.634183168411255, + "learning_rate": 8.691557788944724e-06, + "loss": 0.0976, + "step": 13525 + }, + { + "epoch": 9.1, + "grad_norm": 3.7253787517547607, + "learning_rate": 8.689045226130654e-06, + "loss": 0.0997, + "step": 13550 + }, + { + "epoch": 9.12, + "grad_norm": 4.212484836578369, + "learning_rate": 8.686532663316583e-06, + "loss": 0.0967, + "step": 13575 + }, + { + "epoch": 9.13, + "grad_norm": 3.986149787902832, + "learning_rate": 8.684020100502514e-06, + "loss": 0.0969, + "step": 13600 + }, + { + "epoch": 9.15, + "grad_norm": 3.533597707748413, + "learning_rate": 8.681507537688443e-06, + "loss": 0.0972, + "step": 13625 + }, + { + "epoch": 9.17, + "grad_norm": 3.9616947174072266, + "learning_rate": 8.678994974874373e-06, + "loss": 0.096, + "step": 13650 + }, + { + "epoch": 9.18, + "grad_norm": 4.0185227394104, + "learning_rate": 8.676482412060302e-06, + "loss": 0.0989, + "step": 13675 + }, + { + "epoch": 9.2, + "grad_norm": 3.445549249649048, + "learning_rate": 8.673969849246231e-06, + "loss": 0.0979, + "step": 13700 + }, + { + "epoch": 9.22, + "grad_norm": 3.303227424621582, + "learning_rate": 8.67145728643216e-06, + "loss": 0.0945, + "step": 13725 + }, + { + "epoch": 9.23, + "grad_norm": 3.339839458465576, + "learning_rate": 8.668944723618092e-06, + "loss": 0.0958, + "step": 13750 + }, + { + "epoch": 9.25, + "grad_norm": 3.9664270877838135, + "learning_rate": 8.666432160804021e-06, + "loss": 0.0999, + "step": 13775 + }, + { + "epoch": 9.27, + "grad_norm": 3.428934335708618, + "learning_rate": 8.66391959798995e-06, + "loss": 0.0963, + "step": 13800 + }, + { + "epoch": 9.28, + "grad_norm": 3.64572811126709, + "learning_rate": 8.661407035175881e-06, + "loss": 0.0992, + "step": 13825 + }, + { + "epoch": 9.3, + "grad_norm": 3.687570333480835, + "learning_rate": 8.658894472361809e-06, + "loss": 0.0973, + "step": 13850 + }, + { + "epoch": 9.32, + "grad_norm": 3.700096368789673, + "learning_rate": 8.65638190954774e-06, + "loss": 0.098, + "step": 13875 + }, + { + "epoch": 9.34, + "grad_norm": 3.7237064838409424, + "learning_rate": 8.65386934673367e-06, + "loss": 0.1007, + "step": 13900 + }, + { + "epoch": 9.35, + "grad_norm": 3.944458246231079, + "learning_rate": 8.651356783919599e-06, + "loss": 0.0994, + "step": 13925 + }, + { + "epoch": 9.37, + "grad_norm": 3.877009630203247, + "learning_rate": 8.648844221105528e-06, + "loss": 0.1008, + "step": 13950 + }, + { + "epoch": 9.39, + "grad_norm": 4.268415451049805, + "learning_rate": 8.646331658291457e-06, + "loss": 0.0943, + "step": 13975 + }, + { + "epoch": 9.4, + "grad_norm": 3.4393837451934814, + "learning_rate": 8.643819095477388e-06, + "loss": 0.0944, + "step": 14000 + }, + { + "epoch": 9.4, + "eval_loss": 0.20406368374824524, + "eval_runtime": 524.38, + "eval_samples_per_second": 2.729, + "eval_steps_per_second": 2.729, + "eval_wer": 31.405070118662355, + "step": 14000 + }, + { + "epoch": 9.42, + "grad_norm": 3.8169572353363037, + "learning_rate": 8.641306532663318e-06, + "loss": 0.0947, + "step": 14025 + }, + { + "epoch": 9.44, + "grad_norm": 4.168436050415039, + "learning_rate": 8.638793969849247e-06, + "loss": 0.0935, + "step": 14050 + }, + { + "epoch": 9.45, + "grad_norm": 4.047606945037842, + "learning_rate": 8.636281407035176e-06, + "loss": 0.0991, + "step": 14075 + }, + { + "epoch": 9.47, + "grad_norm": 3.9507668018341064, + "learning_rate": 8.633768844221107e-06, + "loss": 0.0983, + "step": 14100 + }, + { + "epoch": 9.49, + "grad_norm": 3.9071078300476074, + "learning_rate": 8.631256281407035e-06, + "loss": 0.0959, + "step": 14125 + }, + { + "epoch": 9.5, + "grad_norm": 3.8475658893585205, + "learning_rate": 8.628743718592966e-06, + "loss": 0.0987, + "step": 14150 + }, + { + "epoch": 9.52, + "grad_norm": 3.9091365337371826, + "learning_rate": 8.626231155778895e-06, + "loss": 0.0998, + "step": 14175 + }, + { + "epoch": 9.54, + "grad_norm": 4.004209518432617, + "learning_rate": 8.623718592964825e-06, + "loss": 0.0999, + "step": 14200 + }, + { + "epoch": 9.55, + "grad_norm": 3.8058245182037354, + "learning_rate": 8.621206030150756e-06, + "loss": 0.0983, + "step": 14225 + }, + { + "epoch": 9.57, + "grad_norm": 3.4497172832489014, + "learning_rate": 8.618693467336683e-06, + "loss": 0.0976, + "step": 14250 + }, + { + "epoch": 9.59, + "grad_norm": 4.2004313468933105, + "learning_rate": 8.616180904522614e-06, + "loss": 0.0991, + "step": 14275 + }, + { + "epoch": 9.6, + "grad_norm": 3.6409597396850586, + "learning_rate": 8.613668341708544e-06, + "loss": 0.1012, + "step": 14300 + }, + { + "epoch": 9.62, + "grad_norm": 3.7109363079071045, + "learning_rate": 8.611256281407035e-06, + "loss": 0.0991, + "step": 14325 + }, + { + "epoch": 9.64, + "grad_norm": 4.490045547485352, + "learning_rate": 8.608743718592966e-06, + "loss": 0.0962, + "step": 14350 + }, + { + "epoch": 9.65, + "grad_norm": 3.639218807220459, + "learning_rate": 8.606231155778895e-06, + "loss": 0.0959, + "step": 14375 + }, + { + "epoch": 9.67, + "grad_norm": 3.822178363800049, + "learning_rate": 8.603718592964825e-06, + "loss": 0.0989, + "step": 14400 + }, + { + "epoch": 9.69, + "grad_norm": 3.8218026161193848, + "learning_rate": 8.601206030150756e-06, + "loss": 0.1007, + "step": 14425 + }, + { + "epoch": 9.7, + "grad_norm": 4.038983345031738, + "learning_rate": 8.598693467336683e-06, + "loss": 0.1071, + "step": 14450 + }, + { + "epoch": 9.72, + "grad_norm": 4.16408634185791, + "learning_rate": 8.596180904522614e-06, + "loss": 0.099, + "step": 14475 + }, + { + "epoch": 9.74, + "grad_norm": 3.7407357692718506, + "learning_rate": 8.593668341708544e-06, + "loss": 0.1001, + "step": 14500 + }, + { + "epoch": 9.75, + "grad_norm": 3.6288223266601562, + "learning_rate": 8.591155778894473e-06, + "loss": 0.1006, + "step": 14525 + }, + { + "epoch": 9.77, + "grad_norm": 3.865605354309082, + "learning_rate": 8.588643216080402e-06, + "loss": 0.0994, + "step": 14550 + }, + { + "epoch": 9.79, + "grad_norm": 4.16700553894043, + "learning_rate": 8.586130653266332e-06, + "loss": 0.1002, + "step": 14575 + }, + { + "epoch": 9.81, + "grad_norm": 4.108582496643066, + "learning_rate": 8.583618090452261e-06, + "loss": 0.0985, + "step": 14600 + }, + { + "epoch": 9.82, + "grad_norm": 3.601989269256592, + "learning_rate": 8.581105527638192e-06, + "loss": 0.1002, + "step": 14625 + }, + { + "epoch": 9.84, + "grad_norm": 3.893113851547241, + "learning_rate": 8.578592964824121e-06, + "loss": 0.0998, + "step": 14650 + }, + { + "epoch": 9.86, + "grad_norm": 4.066426753997803, + "learning_rate": 8.57608040201005e-06, + "loss": 0.1018, + "step": 14675 + }, + { + "epoch": 9.87, + "grad_norm": 3.8850743770599365, + "learning_rate": 8.573567839195982e-06, + "loss": 0.0953, + "step": 14700 + }, + { + "epoch": 9.89, + "grad_norm": 3.5306355953216553, + "learning_rate": 8.57105527638191e-06, + "loss": 0.0979, + "step": 14725 + }, + { + "epoch": 9.91, + "grad_norm": 4.356175899505615, + "learning_rate": 8.56854271356784e-06, + "loss": 0.0936, + "step": 14750 + }, + { + "epoch": 9.92, + "grad_norm": 4.128498554229736, + "learning_rate": 8.56603015075377e-06, + "loss": 0.1007, + "step": 14775 + }, + { + "epoch": 9.94, + "grad_norm": 3.573347568511963, + "learning_rate": 8.563517587939699e-06, + "loss": 0.1026, + "step": 14800 + }, + { + "epoch": 9.96, + "grad_norm": 3.949913740158081, + "learning_rate": 8.56100502512563e-06, + "loss": 0.0944, + "step": 14825 + }, + { + "epoch": 9.97, + "grad_norm": 4.933910369873047, + "learning_rate": 8.558492462311558e-06, + "loss": 0.1043, + "step": 14850 + }, + { + "epoch": 9.99, + "grad_norm": 3.930229902267456, + "learning_rate": 8.555979899497489e-06, + "loss": 0.0925, + "step": 14875 + }, + { + "epoch": 10.01, + "grad_norm": 3.3385329246520996, + "learning_rate": 8.553467336683418e-06, + "loss": 0.0921, + "step": 14900 + }, + { + "epoch": 10.02, + "grad_norm": 3.7033607959747314, + "learning_rate": 8.550954773869347e-06, + "loss": 0.0843, + "step": 14925 + }, + { + "epoch": 10.04, + "grad_norm": 3.665747880935669, + "learning_rate": 8.548442211055277e-06, + "loss": 0.0842, + "step": 14950 + }, + { + "epoch": 10.06, + "grad_norm": 3.6828763484954834, + "learning_rate": 8.545929648241208e-06, + "loss": 0.0801, + "step": 14975 + }, + { + "epoch": 10.07, + "grad_norm": 3.8735620975494385, + "learning_rate": 8.543417085427135e-06, + "loss": 0.0853, + "step": 15000 + }, + { + "epoch": 10.07, + "eval_loss": 0.2068934440612793, + "eval_runtime": 517.4528, + "eval_samples_per_second": 2.765, + "eval_steps_per_second": 2.765, + "eval_wer": 31.290453074433657, + "step": 15000 + }, + { + "epoch": 10.09, + "grad_norm": 3.589425802230835, + "learning_rate": 8.540904522613066e-06, + "loss": 0.0825, + "step": 15025 + }, + { + "epoch": 10.11, + "grad_norm": 4.506229877471924, + "learning_rate": 8.538391959798996e-06, + "loss": 0.0847, + "step": 15050 + }, + { + "epoch": 10.12, + "grad_norm": 3.8063483238220215, + "learning_rate": 8.535879396984925e-06, + "loss": 0.0817, + "step": 15075 + }, + { + "epoch": 10.14, + "grad_norm": 3.8157806396484375, + "learning_rate": 8.533366834170856e-06, + "loss": 0.0831, + "step": 15100 + }, + { + "epoch": 10.16, + "grad_norm": 3.6821351051330566, + "learning_rate": 8.530854271356784e-06, + "loss": 0.0853, + "step": 15125 + }, + { + "epoch": 10.17, + "grad_norm": 3.997706890106201, + "learning_rate": 8.528341708542715e-06, + "loss": 0.0839, + "step": 15150 + }, + { + "epoch": 10.19, + "grad_norm": 3.7132394313812256, + "learning_rate": 8.525829145728644e-06, + "loss": 0.0849, + "step": 15175 + }, + { + "epoch": 10.21, + "grad_norm": 3.757101535797119, + "learning_rate": 8.523316582914573e-06, + "loss": 0.0829, + "step": 15200 + }, + { + "epoch": 10.22, + "grad_norm": 3.461876630783081, + "learning_rate": 8.520804020100503e-06, + "loss": 0.0837, + "step": 15225 + }, + { + "epoch": 10.24, + "grad_norm": 3.6833889484405518, + "learning_rate": 8.518291457286434e-06, + "loss": 0.0863, + "step": 15250 + }, + { + "epoch": 10.26, + "grad_norm": 3.6270928382873535, + "learning_rate": 8.515778894472363e-06, + "loss": 0.0845, + "step": 15275 + }, + { + "epoch": 10.28, + "grad_norm": 4.094012260437012, + "learning_rate": 8.513266331658292e-06, + "loss": 0.0863, + "step": 15300 + }, + { + "epoch": 10.29, + "grad_norm": 3.6051464080810547, + "learning_rate": 8.510753768844222e-06, + "loss": 0.0852, + "step": 15325 + }, + { + "epoch": 10.31, + "grad_norm": 3.6701347827911377, + "learning_rate": 8.508241206030151e-06, + "loss": 0.0827, + "step": 15350 + }, + { + "epoch": 10.33, + "grad_norm": 4.2522501945495605, + "learning_rate": 8.505728643216082e-06, + "loss": 0.085, + "step": 15375 + }, + { + "epoch": 10.34, + "grad_norm": 3.596078634262085, + "learning_rate": 8.50321608040201e-06, + "loss": 0.0851, + "step": 15400 + }, + { + "epoch": 10.36, + "grad_norm": 3.8116157054901123, + "learning_rate": 8.50070351758794e-06, + "loss": 0.0882, + "step": 15425 + }, + { + "epoch": 10.38, + "grad_norm": 3.717686891555786, + "learning_rate": 8.49819095477387e-06, + "loss": 0.0834, + "step": 15450 + }, + { + "epoch": 10.39, + "grad_norm": 4.043933868408203, + "learning_rate": 8.4956783919598e-06, + "loss": 0.084, + "step": 15475 + }, + { + "epoch": 10.41, + "grad_norm": 3.7038233280181885, + "learning_rate": 8.49316582914573e-06, + "loss": 0.0837, + "step": 15500 + }, + { + "epoch": 10.43, + "grad_norm": 3.8469133377075195, + "learning_rate": 8.49065326633166e-06, + "loss": 0.0898, + "step": 15525 + }, + { + "epoch": 10.44, + "grad_norm": 3.7264349460601807, + "learning_rate": 8.488140703517589e-06, + "loss": 0.0843, + "step": 15550 + }, + { + "epoch": 10.46, + "grad_norm": 3.5248398780822754, + "learning_rate": 8.485628140703518e-06, + "loss": 0.0812, + "step": 15575 + }, + { + "epoch": 10.48, + "grad_norm": 3.4301891326904297, + "learning_rate": 8.483115577889447e-06, + "loss": 0.0844, + "step": 15600 + }, + { + "epoch": 10.49, + "grad_norm": 3.7679526805877686, + "learning_rate": 8.480603015075377e-06, + "loss": 0.0826, + "step": 15625 + }, + { + "epoch": 10.51, + "grad_norm": 3.818513870239258, + "learning_rate": 8.478090452261308e-06, + "loss": 0.0875, + "step": 15650 + }, + { + "epoch": 10.53, + "grad_norm": 3.678086519241333, + "learning_rate": 8.475577889447237e-06, + "loss": 0.0869, + "step": 15675 + }, + { + "epoch": 10.54, + "grad_norm": 3.691681385040283, + "learning_rate": 8.473065326633166e-06, + "loss": 0.0849, + "step": 15700 + }, + { + "epoch": 10.56, + "grad_norm": 4.105345726013184, + "learning_rate": 8.470552763819096e-06, + "loss": 0.0839, + "step": 15725 + }, + { + "epoch": 10.58, + "grad_norm": 3.996507167816162, + "learning_rate": 8.468040201005025e-06, + "loss": 0.0865, + "step": 15750 + }, + { + "epoch": 10.59, + "grad_norm": 4.219810485839844, + "learning_rate": 8.465527638190956e-06, + "loss": 0.0837, + "step": 15775 + }, + { + "epoch": 10.61, + "grad_norm": 4.238875865936279, + "learning_rate": 8.463015075376885e-06, + "loss": 0.087, + "step": 15800 + }, + { + "epoch": 10.63, + "grad_norm": 3.784708023071289, + "learning_rate": 8.460502512562815e-06, + "loss": 0.0861, + "step": 15825 + }, + { + "epoch": 10.64, + "grad_norm": 4.196766376495361, + "learning_rate": 8.457989949748744e-06, + "loss": 0.0843, + "step": 15850 + }, + { + "epoch": 10.66, + "grad_norm": 3.8529000282287598, + "learning_rate": 8.455477386934673e-06, + "loss": 0.0867, + "step": 15875 + }, + { + "epoch": 10.68, + "grad_norm": 3.939037799835205, + "learning_rate": 8.452964824120604e-06, + "loss": 0.084, + "step": 15900 + }, + { + "epoch": 10.7, + "grad_norm": 4.0684685707092285, + "learning_rate": 8.450452261306534e-06, + "loss": 0.088, + "step": 15925 + }, + { + "epoch": 10.71, + "grad_norm": 4.006753444671631, + "learning_rate": 8.447939698492463e-06, + "loss": 0.0819, + "step": 15950 + }, + { + "epoch": 10.73, + "grad_norm": 4.7473464012146, + "learning_rate": 8.445427135678392e-06, + "loss": 0.0863, + "step": 15975 + }, + { + "epoch": 10.75, + "grad_norm": 3.692169666290283, + "learning_rate": 8.442914572864322e-06, + "loss": 0.0838, + "step": 16000 + }, + { + "epoch": 10.75, + "eval_loss": 0.21320676803588867, + "eval_runtime": 514.2938, + "eval_samples_per_second": 2.782, + "eval_steps_per_second": 2.782, + "eval_wer": 31.77588996763754, + "step": 16000 + }, + { + "epoch": 10.76, + "grad_norm": 3.927593469619751, + "learning_rate": 8.440402010050251e-06, + "loss": 0.0856, + "step": 16025 + }, + { + "epoch": 10.78, + "grad_norm": 4.0144524574279785, + "learning_rate": 8.437889447236182e-06, + "loss": 0.0841, + "step": 16050 + }, + { + "epoch": 10.8, + "grad_norm": 4.02933406829834, + "learning_rate": 8.435376884422111e-06, + "loss": 0.0891, + "step": 16075 + }, + { + "epoch": 10.81, + "grad_norm": 3.5581908226013184, + "learning_rate": 8.43286432160804e-06, + "loss": 0.0866, + "step": 16100 + }, + { + "epoch": 10.83, + "grad_norm": 4.252247333526611, + "learning_rate": 8.430351758793972e-06, + "loss": 0.0894, + "step": 16125 + }, + { + "epoch": 10.85, + "grad_norm": 3.6094064712524414, + "learning_rate": 8.4278391959799e-06, + "loss": 0.0858, + "step": 16150 + }, + { + "epoch": 10.86, + "grad_norm": 3.95078182220459, + "learning_rate": 8.42532663316583e-06, + "loss": 0.0861, + "step": 16175 + }, + { + "epoch": 10.88, + "grad_norm": 4.000092506408691, + "learning_rate": 8.42281407035176e-06, + "loss": 0.089, + "step": 16200 + }, + { + "epoch": 10.9, + "grad_norm": 4.0420403480529785, + "learning_rate": 8.420301507537689e-06, + "loss": 0.0873, + "step": 16225 + }, + { + "epoch": 10.91, + "grad_norm": 3.5944478511810303, + "learning_rate": 8.417788944723618e-06, + "loss": 0.0872, + "step": 16250 + }, + { + "epoch": 10.93, + "grad_norm": 3.625338554382324, + "learning_rate": 8.415276381909548e-06, + "loss": 0.0863, + "step": 16275 + }, + { + "epoch": 10.95, + "grad_norm": 3.951098918914795, + "learning_rate": 8.412763819095479e-06, + "loss": 0.0846, + "step": 16300 + }, + { + "epoch": 10.96, + "grad_norm": 3.828604221343994, + "learning_rate": 8.410251256281408e-06, + "loss": 0.0876, + "step": 16325 + }, + { + "epoch": 10.98, + "grad_norm": 3.8777964115142822, + "learning_rate": 8.407738693467337e-06, + "loss": 0.0868, + "step": 16350 + }, + { + "epoch": 11.0, + "grad_norm": 3.7804203033447266, + "learning_rate": 8.405226130653267e-06, + "loss": 0.0868, + "step": 16375 + }, + { + "epoch": 11.01, + "grad_norm": 3.232404947280884, + "learning_rate": 8.402713567839198e-06, + "loss": 0.0704, + "step": 16400 + }, + { + "epoch": 11.03, + "grad_norm": 3.874542474746704, + "learning_rate": 8.400201005025125e-06, + "loss": 0.0687, + "step": 16425 + }, + { + "epoch": 11.05, + "grad_norm": 3.4492666721343994, + "learning_rate": 8.397688442211056e-06, + "loss": 0.0714, + "step": 16450 + }, + { + "epoch": 11.06, + "grad_norm": 3.7225394248962402, + "learning_rate": 8.395175879396986e-06, + "loss": 0.0704, + "step": 16475 + }, + { + "epoch": 11.08, + "grad_norm": 3.4964494705200195, + "learning_rate": 8.392663316582915e-06, + "loss": 0.0689, + "step": 16500 + }, + { + "epoch": 11.1, + "grad_norm": 3.3233611583709717, + "learning_rate": 8.390150753768846e-06, + "loss": 0.0711, + "step": 16525 + }, + { + "epoch": 11.11, + "grad_norm": 3.5476720333099365, + "learning_rate": 8.387638190954774e-06, + "loss": 0.0695, + "step": 16550 + }, + { + "epoch": 11.13, + "grad_norm": 3.4129252433776855, + "learning_rate": 8.385125628140705e-06, + "loss": 0.0727, + "step": 16575 + }, + { + "epoch": 11.15, + "grad_norm": 3.5327062606811523, + "learning_rate": 8.382613065326634e-06, + "loss": 0.0699, + "step": 16600 + }, + { + "epoch": 11.17, + "grad_norm": 3.601076364517212, + "learning_rate": 8.380100502512563e-06, + "loss": 0.0727, + "step": 16625 + }, + { + "epoch": 11.18, + "grad_norm": 3.3089394569396973, + "learning_rate": 8.377587939698493e-06, + "loss": 0.0702, + "step": 16650 + }, + { + "epoch": 11.2, + "grad_norm": 3.6880123615264893, + "learning_rate": 8.375075376884424e-06, + "loss": 0.0714, + "step": 16675 + }, + { + "epoch": 11.22, + "grad_norm": 4.032638072967529, + "learning_rate": 8.372562814070353e-06, + "loss": 0.0717, + "step": 16700 + }, + { + "epoch": 11.23, + "grad_norm": 3.913663387298584, + "learning_rate": 8.370050251256282e-06, + "loss": 0.0727, + "step": 16725 + }, + { + "epoch": 11.25, + "grad_norm": 3.2770915031433105, + "learning_rate": 8.367537688442212e-06, + "loss": 0.0711, + "step": 16750 + }, + { + "epoch": 11.27, + "grad_norm": 3.490957736968994, + "learning_rate": 8.365025125628141e-06, + "loss": 0.0684, + "step": 16775 + }, + { + "epoch": 11.28, + "grad_norm": 4.257595539093018, + "learning_rate": 8.362512562814072e-06, + "loss": 0.0747, + "step": 16800 + }, + { + "epoch": 11.3, + "grad_norm": 3.4682705402374268, + "learning_rate": 8.36e-06, + "loss": 0.0762, + "step": 16825 + }, + { + "epoch": 11.32, + "grad_norm": 3.721459150314331, + "learning_rate": 8.35748743718593e-06, + "loss": 0.0738, + "step": 16850 + }, + { + "epoch": 11.33, + "grad_norm": 3.7301228046417236, + "learning_rate": 8.35497487437186e-06, + "loss": 0.0762, + "step": 16875 + }, + { + "epoch": 11.35, + "grad_norm": 3.43595814704895, + "learning_rate": 8.35246231155779e-06, + "loss": 0.0718, + "step": 16900 + }, + { + "epoch": 11.37, + "grad_norm": 3.4584271907806396, + "learning_rate": 8.34994974874372e-06, + "loss": 0.0721, + "step": 16925 + }, + { + "epoch": 11.38, + "grad_norm": 3.697406768798828, + "learning_rate": 8.34743718592965e-06, + "loss": 0.0765, + "step": 16950 + }, + { + "epoch": 11.4, + "grad_norm": 3.936040163040161, + "learning_rate": 8.344924623115579e-06, + "loss": 0.0719, + "step": 16975 + }, + { + "epoch": 11.42, + "grad_norm": 4.001731872558594, + "learning_rate": 8.342412060301508e-06, + "loss": 0.0737, + "step": 17000 + }, + { + "epoch": 11.42, + "eval_loss": 0.22083650529384613, + "eval_runtime": 513.2844, + "eval_samples_per_second": 2.788, + "eval_steps_per_second": 2.788, + "eval_wer": 31.998381877022652, + "step": 17000 + }, + { + "epoch": 11.43, + "grad_norm": 4.0719804763793945, + "learning_rate": 8.339899497487438e-06, + "loss": 0.0727, + "step": 17025 + }, + { + "epoch": 11.45, + "grad_norm": 4.128214359283447, + "learning_rate": 8.337386934673367e-06, + "loss": 0.0718, + "step": 17050 + }, + { + "epoch": 11.47, + "grad_norm": 3.991882085800171, + "learning_rate": 8.334874371859298e-06, + "loss": 0.0748, + "step": 17075 + }, + { + "epoch": 11.48, + "grad_norm": 3.8897290229797363, + "learning_rate": 8.332361809045226e-06, + "loss": 0.0721, + "step": 17100 + }, + { + "epoch": 11.5, + "grad_norm": 4.137451171875, + "learning_rate": 8.329849246231157e-06, + "loss": 0.0741, + "step": 17125 + }, + { + "epoch": 11.52, + "grad_norm": 4.077966213226318, + "learning_rate": 8.327336683417086e-06, + "loss": 0.0717, + "step": 17150 + }, + { + "epoch": 11.53, + "grad_norm": 3.4739699363708496, + "learning_rate": 8.324824120603015e-06, + "loss": 0.0749, + "step": 17175 + }, + { + "epoch": 11.55, + "grad_norm": 3.764387607574463, + "learning_rate": 8.322311557788946e-06, + "loss": 0.0732, + "step": 17200 + }, + { + "epoch": 11.57, + "grad_norm": 3.6361474990844727, + "learning_rate": 8.319798994974876e-06, + "loss": 0.0738, + "step": 17225 + }, + { + "epoch": 11.58, + "grad_norm": 3.635225772857666, + "learning_rate": 8.317286432160805e-06, + "loss": 0.0772, + "step": 17250 + }, + { + "epoch": 11.6, + "grad_norm": 3.676656723022461, + "learning_rate": 8.314773869346734e-06, + "loss": 0.0727, + "step": 17275 + }, + { + "epoch": 11.62, + "grad_norm": 3.6993844509124756, + "learning_rate": 8.312261306532663e-06, + "loss": 0.0726, + "step": 17300 + }, + { + "epoch": 11.64, + "grad_norm": 3.6938228607177734, + "learning_rate": 8.309748743718595e-06, + "loss": 0.0721, + "step": 17325 + }, + { + "epoch": 11.65, + "grad_norm": 3.891838312149048, + "learning_rate": 8.307236180904524e-06, + "loss": 0.0762, + "step": 17350 + }, + { + "epoch": 11.67, + "grad_norm": 3.730008602142334, + "learning_rate": 8.304723618090453e-06, + "loss": 0.0739, + "step": 17375 + }, + { + "epoch": 11.69, + "grad_norm": 3.9901716709136963, + "learning_rate": 8.302211055276382e-06, + "loss": 0.0765, + "step": 17400 + }, + { + "epoch": 11.7, + "grad_norm": 4.361217021942139, + "learning_rate": 8.299698492462312e-06, + "loss": 0.074, + "step": 17425 + }, + { + "epoch": 11.72, + "grad_norm": 3.491443634033203, + "learning_rate": 8.297185929648241e-06, + "loss": 0.0715, + "step": 17450 + }, + { + "epoch": 11.74, + "grad_norm": 4.188762664794922, + "learning_rate": 8.294673366834172e-06, + "loss": 0.074, + "step": 17475 + }, + { + "epoch": 11.75, + "grad_norm": 3.507265567779541, + "learning_rate": 8.292160804020101e-06, + "loss": 0.0737, + "step": 17500 + }, + { + "epoch": 11.77, + "grad_norm": 3.9274561405181885, + "learning_rate": 8.28964824120603e-06, + "loss": 0.0724, + "step": 17525 + }, + { + "epoch": 11.79, + "grad_norm": 3.7808096408843994, + "learning_rate": 8.287135678391962e-06, + "loss": 0.0746, + "step": 17550 + }, + { + "epoch": 11.8, + "grad_norm": 3.5119686126708984, + "learning_rate": 8.28462311557789e-06, + "loss": 0.0739, + "step": 17575 + }, + { + "epoch": 11.82, + "grad_norm": 4.072975158691406, + "learning_rate": 8.28211055276382e-06, + "loss": 0.0768, + "step": 17600 + }, + { + "epoch": 11.84, + "grad_norm": 3.5535285472869873, + "learning_rate": 8.27959798994975e-06, + "loss": 0.0755, + "step": 17625 + }, + { + "epoch": 11.85, + "grad_norm": 3.8786537647247314, + "learning_rate": 8.277085427135679e-06, + "loss": 0.0743, + "step": 17650 + }, + { + "epoch": 11.87, + "grad_norm": 3.4643757343292236, + "learning_rate": 8.274572864321608e-06, + "loss": 0.0736, + "step": 17675 + }, + { + "epoch": 11.89, + "grad_norm": 3.5160932540893555, + "learning_rate": 8.272060301507538e-06, + "loss": 0.078, + "step": 17700 + }, + { + "epoch": 11.9, + "grad_norm": 3.9371297359466553, + "learning_rate": 8.269547738693467e-06, + "loss": 0.0757, + "step": 17725 + }, + { + "epoch": 11.92, + "grad_norm": 3.9696269035339355, + "learning_rate": 8.267035175879398e-06, + "loss": 0.074, + "step": 17750 + }, + { + "epoch": 11.94, + "grad_norm": 4.303014755249023, + "learning_rate": 8.264522613065327e-06, + "loss": 0.0743, + "step": 17775 + }, + { + "epoch": 11.95, + "grad_norm": 4.200374603271484, + "learning_rate": 8.262010050251257e-06, + "loss": 0.0763, + "step": 17800 + }, + { + "epoch": 11.97, + "grad_norm": 4.343672275543213, + "learning_rate": 8.259497487437188e-06, + "loss": 0.0758, + "step": 17825 + }, + { + "epoch": 11.99, + "grad_norm": 3.9587512016296387, + "learning_rate": 8.256984924623115e-06, + "loss": 0.0756, + "step": 17850 + }, + { + "epoch": 12.0, + "grad_norm": 3.4314370155334473, + "learning_rate": 8.254472361809046e-06, + "loss": 0.0719, + "step": 17875 + }, + { + "epoch": 12.02, + "grad_norm": 3.144179344177246, + "learning_rate": 8.251959798994976e-06, + "loss": 0.0579, + "step": 17900 + }, + { + "epoch": 12.04, + "grad_norm": 3.7249393463134766, + "learning_rate": 8.249447236180905e-06, + "loss": 0.0565, + "step": 17925 + }, + { + "epoch": 12.06, + "grad_norm": 3.5111277103424072, + "learning_rate": 8.246934673366836e-06, + "loss": 0.0579, + "step": 17950 + }, + { + "epoch": 12.07, + "grad_norm": 3.397099256515503, + "learning_rate": 8.244422110552764e-06, + "loss": 0.0602, + "step": 17975 + }, + { + "epoch": 12.09, + "grad_norm": 3.352158784866333, + "learning_rate": 8.241909547738695e-06, + "loss": 0.0613, + "step": 18000 + }, + { + "epoch": 12.09, + "eval_loss": 0.23096969723701477, + "eval_runtime": 512.4613, + "eval_samples_per_second": 2.792, + "eval_steps_per_second": 2.792, + "eval_wer": 32.80070118662351, + "step": 18000 + }, + { + "epoch": 12.11, + "grad_norm": 3.94340181350708, + "learning_rate": 8.239396984924624e-06, + "loss": 0.059, + "step": 18025 + }, + { + "epoch": 12.12, + "grad_norm": 3.9054033756256104, + "learning_rate": 8.236884422110553e-06, + "loss": 0.0609, + "step": 18050 + }, + { + "epoch": 12.14, + "grad_norm": 3.1687161922454834, + "learning_rate": 8.234371859296483e-06, + "loss": 0.0595, + "step": 18075 + }, + { + "epoch": 12.16, + "grad_norm": 3.580167770385742, + "learning_rate": 8.231859296482414e-06, + "loss": 0.0623, + "step": 18100 + }, + { + "epoch": 12.17, + "grad_norm": 3.5732204914093018, + "learning_rate": 8.229346733668341e-06, + "loss": 0.0605, + "step": 18125 + }, + { + "epoch": 12.19, + "grad_norm": 4.061110019683838, + "learning_rate": 8.226834170854272e-06, + "loss": 0.0607, + "step": 18150 + }, + { + "epoch": 12.21, + "grad_norm": 3.8575000762939453, + "learning_rate": 8.224321608040202e-06, + "loss": 0.0597, + "step": 18175 + }, + { + "epoch": 12.22, + "grad_norm": 3.2625975608825684, + "learning_rate": 8.221809045226131e-06, + "loss": 0.061, + "step": 18200 + }, + { + "epoch": 12.24, + "grad_norm": 3.329768657684326, + "learning_rate": 8.219296482412062e-06, + "loss": 0.0593, + "step": 18225 + }, + { + "epoch": 12.26, + "grad_norm": 3.4822239875793457, + "learning_rate": 8.21678391959799e-06, + "loss": 0.058, + "step": 18250 + }, + { + "epoch": 12.27, + "grad_norm": 3.8274590969085693, + "learning_rate": 8.21427135678392e-06, + "loss": 0.0628, + "step": 18275 + }, + { + "epoch": 12.29, + "grad_norm": 3.949167490005493, + "learning_rate": 8.21175879396985e-06, + "loss": 0.0593, + "step": 18300 + }, + { + "epoch": 12.31, + "grad_norm": 3.5620148181915283, + "learning_rate": 8.209346733668342e-06, + "loss": 0.0595, + "step": 18325 + }, + { + "epoch": 12.32, + "grad_norm": 3.922238349914551, + "learning_rate": 8.206834170854273e-06, + "loss": 0.0643, + "step": 18350 + }, + { + "epoch": 12.34, + "grad_norm": 3.2374136447906494, + "learning_rate": 8.204321608040202e-06, + "loss": 0.0619, + "step": 18375 + }, + { + "epoch": 12.36, + "grad_norm": 3.6573832035064697, + "learning_rate": 8.201809045226131e-06, + "loss": 0.0649, + "step": 18400 + }, + { + "epoch": 12.37, + "grad_norm": 3.654667377471924, + "learning_rate": 8.19929648241206e-06, + "loss": 0.0643, + "step": 18425 + }, + { + "epoch": 12.39, + "grad_norm": 3.6674280166625977, + "learning_rate": 8.19678391959799e-06, + "loss": 0.0644, + "step": 18450 + }, + { + "epoch": 12.41, + "grad_norm": 3.713149070739746, + "learning_rate": 8.194271356783921e-06, + "loss": 0.0631, + "step": 18475 + }, + { + "epoch": 12.42, + "grad_norm": 3.520317792892456, + "learning_rate": 8.19175879396985e-06, + "loss": 0.0604, + "step": 18500 + }, + { + "epoch": 12.44, + "grad_norm": 3.6236910820007324, + "learning_rate": 8.18924623115578e-06, + "loss": 0.0609, + "step": 18525 + }, + { + "epoch": 12.46, + "grad_norm": 3.7595889568328857, + "learning_rate": 8.186733668341709e-06, + "loss": 0.0612, + "step": 18550 + }, + { + "epoch": 12.47, + "grad_norm": 3.2042977809906006, + "learning_rate": 8.184221105527638e-06, + "loss": 0.0608, + "step": 18575 + }, + { + "epoch": 12.49, + "grad_norm": 3.5839831829071045, + "learning_rate": 8.18170854271357e-06, + "loss": 0.0605, + "step": 18600 + }, + { + "epoch": 12.51, + "grad_norm": 3.4118340015411377, + "learning_rate": 8.179195979899498e-06, + "loss": 0.0626, + "step": 18625 + }, + { + "epoch": 12.53, + "grad_norm": 3.53318452835083, + "learning_rate": 8.176683417085428e-06, + "loss": 0.0636, + "step": 18650 + }, + { + "epoch": 12.54, + "grad_norm": 3.73972749710083, + "learning_rate": 8.174170854271357e-06, + "loss": 0.0647, + "step": 18675 + }, + { + "epoch": 12.56, + "grad_norm": 3.85784912109375, + "learning_rate": 8.171658291457286e-06, + "loss": 0.0607, + "step": 18700 + }, + { + "epoch": 12.58, + "grad_norm": 3.6849751472473145, + "learning_rate": 8.169145728643216e-06, + "loss": 0.0653, + "step": 18725 + }, + { + "epoch": 12.59, + "grad_norm": 3.4816763401031494, + "learning_rate": 8.166633165829147e-06, + "loss": 0.0632, + "step": 18750 + }, + { + "epoch": 12.61, + "grad_norm": 3.7751224040985107, + "learning_rate": 8.164120603015076e-06, + "loss": 0.0633, + "step": 18775 + }, + { + "epoch": 12.63, + "grad_norm": 3.649627208709717, + "learning_rate": 8.161608040201005e-06, + "loss": 0.0622, + "step": 18800 + }, + { + "epoch": 12.64, + "grad_norm": 4.23603630065918, + "learning_rate": 8.159095477386936e-06, + "loss": 0.0637, + "step": 18825 + }, + { + "epoch": 12.66, + "grad_norm": 3.636678695678711, + "learning_rate": 8.156582914572864e-06, + "loss": 0.0647, + "step": 18850 + }, + { + "epoch": 12.68, + "grad_norm": 3.7286479473114014, + "learning_rate": 8.154070351758795e-06, + "loss": 0.062, + "step": 18875 + }, + { + "epoch": 12.69, + "grad_norm": 3.8103444576263428, + "learning_rate": 8.151557788944724e-06, + "loss": 0.0644, + "step": 18900 + }, + { + "epoch": 12.71, + "grad_norm": 3.5667850971221924, + "learning_rate": 8.149045226130654e-06, + "loss": 0.0649, + "step": 18925 + }, + { + "epoch": 12.73, + "grad_norm": 3.7628564834594727, + "learning_rate": 8.146532663316583e-06, + "loss": 0.0633, + "step": 18950 + }, + { + "epoch": 12.74, + "grad_norm": 4.083582401275635, + "learning_rate": 8.144020100502512e-06, + "loss": 0.0632, + "step": 18975 + }, + { + "epoch": 12.76, + "grad_norm": 4.067498207092285, + "learning_rate": 8.141507537688443e-06, + "loss": 0.0651, + "step": 19000 + }, + { + "epoch": 12.76, + "eval_loss": 0.2384849190711975, + "eval_runtime": 510.0081, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 2.806, + "eval_wer": 32.477076591154265, + "step": 19000 + }, + { + "epoch": 12.78, + "grad_norm": 3.6416635513305664, + "learning_rate": 8.138994974874373e-06, + "loss": 0.0647, + "step": 19025 + }, + { + "epoch": 12.79, + "grad_norm": 3.600551128387451, + "learning_rate": 8.136482412060302e-06, + "loss": 0.0621, + "step": 19050 + }, + { + "epoch": 12.81, + "grad_norm": 4.586661338806152, + "learning_rate": 8.133969849246231e-06, + "loss": 0.0626, + "step": 19075 + }, + { + "epoch": 12.83, + "grad_norm": 4.498733997344971, + "learning_rate": 8.131457286432162e-06, + "loss": 0.0657, + "step": 19100 + }, + { + "epoch": 12.84, + "grad_norm": 3.6843016147613525, + "learning_rate": 8.12894472361809e-06, + "loss": 0.0644, + "step": 19125 + }, + { + "epoch": 12.86, + "grad_norm": 3.938875198364258, + "learning_rate": 8.126432160804021e-06, + "loss": 0.0639, + "step": 19150 + }, + { + "epoch": 12.88, + "grad_norm": 3.8744261264801025, + "learning_rate": 8.12391959798995e-06, + "loss": 0.0623, + "step": 19175 + }, + { + "epoch": 12.89, + "grad_norm": 4.295655727386475, + "learning_rate": 8.12140703517588e-06, + "loss": 0.0634, + "step": 19200 + }, + { + "epoch": 12.91, + "grad_norm": 4.014922618865967, + "learning_rate": 8.11889447236181e-06, + "loss": 0.0645, + "step": 19225 + }, + { + "epoch": 12.93, + "grad_norm": 3.778974771499634, + "learning_rate": 8.11638190954774e-06, + "loss": 0.0656, + "step": 19250 + }, + { + "epoch": 12.94, + "grad_norm": 3.9158711433410645, + "learning_rate": 8.11386934673367e-06, + "loss": 0.0654, + "step": 19275 + }, + { + "epoch": 12.96, + "grad_norm": 3.5712993144989014, + "learning_rate": 8.111356783919599e-06, + "loss": 0.0656, + "step": 19300 + }, + { + "epoch": 12.98, + "grad_norm": 4.457583904266357, + "learning_rate": 8.108844221105528e-06, + "loss": 0.0653, + "step": 19325 + }, + { + "epoch": 13.0, + "grad_norm": 3.9733550548553467, + "learning_rate": 8.106331658291457e-06, + "loss": 0.0673, + "step": 19350 + }, + { + "epoch": 13.01, + "grad_norm": 3.3184280395507812, + "learning_rate": 8.103819095477388e-06, + "loss": 0.0557, + "step": 19375 + }, + { + "epoch": 13.03, + "grad_norm": 3.5367844104766846, + "learning_rate": 8.10140703517588e-06, + "loss": 0.0519, + "step": 19400 + }, + { + "epoch": 13.05, + "grad_norm": 3.3739562034606934, + "learning_rate": 8.098894472361811e-06, + "loss": 0.0492, + "step": 19425 + }, + { + "epoch": 13.06, + "grad_norm": 3.0413777828216553, + "learning_rate": 8.096381909547739e-06, + "loss": 0.0515, + "step": 19450 + }, + { + "epoch": 13.08, + "grad_norm": 3.3823771476745605, + "learning_rate": 8.09386934673367e-06, + "loss": 0.0512, + "step": 19475 + }, + { + "epoch": 13.1, + "grad_norm": 3.6641411781311035, + "learning_rate": 8.091356783919599e-06, + "loss": 0.049, + "step": 19500 + }, + { + "epoch": 13.11, + "grad_norm": 3.041918992996216, + "learning_rate": 8.088844221105528e-06, + "loss": 0.0489, + "step": 19525 + }, + { + "epoch": 13.13, + "grad_norm": 3.109720230102539, + "learning_rate": 8.086331658291458e-06, + "loss": 0.0497, + "step": 19550 + }, + { + "epoch": 13.15, + "grad_norm": 3.4649603366851807, + "learning_rate": 8.083819095477387e-06, + "loss": 0.0499, + "step": 19575 + }, + { + "epoch": 13.16, + "grad_norm": 3.5612220764160156, + "learning_rate": 8.081306532663318e-06, + "loss": 0.0501, + "step": 19600 + }, + { + "epoch": 13.18, + "grad_norm": 4.157169342041016, + "learning_rate": 8.078793969849247e-06, + "loss": 0.051, + "step": 19625 + }, + { + "epoch": 13.2, + "grad_norm": 3.4699630737304688, + "learning_rate": 8.076281407035177e-06, + "loss": 0.0506, + "step": 19650 + }, + { + "epoch": 13.21, + "grad_norm": 3.2808892726898193, + "learning_rate": 8.073768844221106e-06, + "loss": 0.0522, + "step": 19675 + }, + { + "epoch": 13.23, + "grad_norm": 3.4986891746520996, + "learning_rate": 8.071256281407037e-06, + "loss": 0.0511, + "step": 19700 + }, + { + "epoch": 13.25, + "grad_norm": 3.341071605682373, + "learning_rate": 8.068743718592964e-06, + "loss": 0.0523, + "step": 19725 + }, + { + "epoch": 13.26, + "grad_norm": 4.219409465789795, + "learning_rate": 8.066231155778895e-06, + "loss": 0.0545, + "step": 19750 + }, + { + "epoch": 13.28, + "grad_norm": 3.7513458728790283, + "learning_rate": 8.063718592964825e-06, + "loss": 0.0522, + "step": 19775 + }, + { + "epoch": 13.3, + "grad_norm": 3.603447914123535, + "learning_rate": 8.061206030150754e-06, + "loss": 0.051, + "step": 19800 + }, + { + "epoch": 13.31, + "grad_norm": 3.5638389587402344, + "learning_rate": 8.058693467336685e-06, + "loss": 0.0532, + "step": 19825 + }, + { + "epoch": 13.33, + "grad_norm": 3.6939656734466553, + "learning_rate": 8.056180904522613e-06, + "loss": 0.0524, + "step": 19850 + }, + { + "epoch": 13.35, + "grad_norm": 3.5885846614837646, + "learning_rate": 8.053668341708544e-06, + "loss": 0.0537, + "step": 19875 + }, + { + "epoch": 13.36, + "grad_norm": 3.144559144973755, + "learning_rate": 8.051155778894473e-06, + "loss": 0.0529, + "step": 19900 + }, + { + "epoch": 13.38, + "grad_norm": 3.166619062423706, + "learning_rate": 8.048643216080402e-06, + "loss": 0.0544, + "step": 19925 + }, + { + "epoch": 13.4, + "grad_norm": 3.5741381645202637, + "learning_rate": 8.046130653266332e-06, + "loss": 0.054, + "step": 19950 + }, + { + "epoch": 13.42, + "grad_norm": 4.110526084899902, + "learning_rate": 8.043618090452263e-06, + "loss": 0.0537, + "step": 19975 + }, + { + "epoch": 13.43, + "grad_norm": 3.5379180908203125, + "learning_rate": 8.04110552763819e-06, + "loss": 0.0535, + "step": 20000 + }, + { + "epoch": 13.43, + "eval_loss": 0.24679304659366608, + "eval_runtime": 501.8206, + "eval_samples_per_second": 2.852, + "eval_steps_per_second": 2.852, + "eval_wer": 32.69956850053937, + "step": 20000 + } + ], + "logging_steps": 25, + "max_steps": 100000, + "num_input_tokens_seen": 0, + "num_train_epochs": 68, + "save_steps": 1000, + "total_flos": 3.150705222549504e+19, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/whisper-tiny/magahi/checkpoint-20000/training_args.bin b/checkpoints/whisper-tiny/magahi/checkpoint-20000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e73f53693022af2d58e28dca992b8b97aa0e55bc --- /dev/null +++ b/checkpoints/whisper-tiny/magahi/checkpoint-20000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3f9fc81e4ad892b06bff91eb0504f5266c0cc6343eb470452100d594e12f895 +size 4667 diff --git a/checkpoints/whisper-tiny/maithili/checkpoint-20000/config.json b/checkpoints/whisper-tiny/maithili/checkpoint-20000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c47e7ae5f6c65847b8952aa0e827c7f13a489891 --- /dev/null +++ b/checkpoints/whisper-tiny/maithili/checkpoint-20000/config.json @@ -0,0 +1,154 @@ +{ + "_name_or_path": "openai/whisper-tiny", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 384, + "decoder_attention_heads": 6, + "decoder_ffn_dim": 1536, + "decoder_layerdrop": 0.0, + "decoder_layers": 4, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 6, + "encoder_ffn_dim": 1536, + "encoder_layerdrop": 0.0, + "encoder_layers": 4, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50276 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 4, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "torch_dtype": "float32", + "transformers_version": "4.39.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoints/whisper-tiny/maithili/checkpoint-20000/generation_config.json b/checkpoints/whisper-tiny/maithili/checkpoint-20000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c4857895fba6cdefb862460b5d33969e1892aa71 --- /dev/null +++ b/checkpoints/whisper-tiny/maithili/checkpoint-20000/generation_config.json @@ -0,0 +1,248 @@ +{ + "alignment_heads": [ + [ + 2, + 2 + ], + [ + 3, + 0 + ], + [ + 3, + 2 + ], + [ + 3, + 3 + ], + [ + 3, + 4 + ], + [ + 3, + 5 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.39.2" +} diff --git a/checkpoints/whisper-tiny/maithili/checkpoint-20000/model.safetensors b/checkpoints/whisper-tiny/maithili/checkpoint-20000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f64d5c4882477d3af3f99f0ac4df296d27dd8430 --- /dev/null +++ b/checkpoints/whisper-tiny/maithili/checkpoint-20000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a05fa64681f95274a57ed1f9ee935f0d97b64faca634571edec40b845470880 +size 151061672 diff --git a/checkpoints/whisper-tiny/maithili/checkpoint-20000/optimizer.pt b/checkpoints/whisper-tiny/maithili/checkpoint-20000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e850ddf533500066bd747ac5ee69e105fe021d4d --- /dev/null +++ b/checkpoints/whisper-tiny/maithili/checkpoint-20000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89d005203c34e0019929248c5ec36dd4cf0edec52feb2c6c063453e0f4a6069b +size 297615749 diff --git a/checkpoints/whisper-tiny/maithili/checkpoint-20000/preprocessor_config.json b/checkpoints/whisper-tiny/maithili/checkpoint-20000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoints/whisper-tiny/maithili/checkpoint-20000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoints/whisper-tiny/maithili/checkpoint-20000/rng_state.pth b/checkpoints/whisper-tiny/maithili/checkpoint-20000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..6f1e6022bc62ea805086e336e039eb6fec891ad0 --- /dev/null +++ b/checkpoints/whisper-tiny/maithili/checkpoint-20000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:001dd829c061f4e1a06cddc9c203239a2e3cb058358d5726cd42527a9c3668a0 +size 14575 diff --git a/checkpoints/whisper-tiny/maithili/checkpoint-20000/scheduler.pt b/checkpoints/whisper-tiny/maithili/checkpoint-20000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3362a23888da3971ecbdf83bb5c0766680abb929 --- /dev/null +++ b/checkpoints/whisper-tiny/maithili/checkpoint-20000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a4c57795bc7e8298d05a0d53fb436efd8d26c13f7d48d21b5090c941852d177 +size 627 diff --git a/checkpoints/whisper-tiny/maithili/checkpoint-20000/trainer_state.json b/checkpoints/whisper-tiny/maithili/checkpoint-20000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b48625da23ff4d10ed7209ea08f49799efa3e70c --- /dev/null +++ b/checkpoints/whisper-tiny/maithili/checkpoint-20000/trainer_state.json @@ -0,0 +1,5801 @@ +{ + "best_metric": 28.12175717744725, + "best_model_checkpoint": "results/whisper-tiny/maithili/checkpoint-10000", + "epoch": 13.431833445265278, + "eval_steps": 1000, + "global_step": 20000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "grad_norm": 83.32074737548828, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.9641, + "step": 25 + }, + { + "epoch": 0.03, + "grad_norm": 38.8628044128418, + "learning_rate": 9.200000000000001e-07, + "loss": 3.3025, + "step": 50 + }, + { + "epoch": 0.05, + "grad_norm": 15.426881790161133, + "learning_rate": 1.42e-06, + "loss": 2.4904, + "step": 75 + }, + { + "epoch": 0.07, + "grad_norm": 8.933202743530273, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.8978, + "step": 100 + }, + { + "epoch": 0.08, + "grad_norm": 6.8524699211120605, + "learning_rate": 2.42e-06, + "loss": 1.4681, + "step": 125 + }, + { + "epoch": 0.1, + "grad_norm": 5.693584442138672, + "learning_rate": 2.92e-06, + "loss": 1.2126, + "step": 150 + }, + { + "epoch": 0.12, + "grad_norm": 6.243007659912109, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.0182, + "step": 175 + }, + { + "epoch": 0.13, + "grad_norm": 5.5485687255859375, + "learning_rate": 3.920000000000001e-06, + "loss": 0.8814, + "step": 200 + }, + { + "epoch": 0.15, + "grad_norm": 4.904171466827393, + "learning_rate": 4.42e-06, + "loss": 0.7901, + "step": 225 + }, + { + "epoch": 0.17, + "grad_norm": 4.8001532554626465, + "learning_rate": 4.92e-06, + "loss": 0.7144, + "step": 250 + }, + { + "epoch": 0.18, + "grad_norm": 4.733066082000732, + "learning_rate": 5.420000000000001e-06, + "loss": 0.6554, + "step": 275 + }, + { + "epoch": 0.2, + "grad_norm": 4.648881435394287, + "learning_rate": 5.92e-06, + "loss": 0.631, + "step": 300 + }, + { + "epoch": 0.22, + "grad_norm": 5.993784427642822, + "learning_rate": 6.42e-06, + "loss": 0.5917, + "step": 325 + }, + { + "epoch": 0.24, + "grad_norm": 4.416947364807129, + "learning_rate": 6.92e-06, + "loss": 0.5582, + "step": 350 + }, + { + "epoch": 0.25, + "grad_norm": 4.789530277252197, + "learning_rate": 7.420000000000001e-06, + "loss": 0.5416, + "step": 375 + }, + { + "epoch": 0.27, + "grad_norm": 4.663525104522705, + "learning_rate": 7.92e-06, + "loss": 0.5243, + "step": 400 + }, + { + "epoch": 0.29, + "grad_norm": 5.562720775604248, + "learning_rate": 8.42e-06, + "loss": 0.4935, + "step": 425 + }, + { + "epoch": 0.3, + "grad_norm": 4.6928534507751465, + "learning_rate": 8.920000000000001e-06, + "loss": 0.4914, + "step": 450 + }, + { + "epoch": 0.32, + "grad_norm": 4.314043998718262, + "learning_rate": 9.42e-06, + "loss": 0.4676, + "step": 475 + }, + { + "epoch": 0.34, + "grad_norm": 4.993198394775391, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4539, + "step": 500 + }, + { + "epoch": 0.35, + "grad_norm": 4.943511009216309, + "learning_rate": 9.997889447236182e-06, + "loss": 0.4411, + "step": 525 + }, + { + "epoch": 0.37, + "grad_norm": 4.5246124267578125, + "learning_rate": 9.995376884422112e-06, + "loss": 0.4344, + "step": 550 + }, + { + "epoch": 0.39, + "grad_norm": 5.379785537719727, + "learning_rate": 9.992864321608041e-06, + "loss": 0.4228, + "step": 575 + }, + { + "epoch": 0.4, + "grad_norm": 5.210270404815674, + "learning_rate": 9.99035175879397e-06, + "loss": 0.4159, + "step": 600 + }, + { + "epoch": 0.42, + "grad_norm": 4.314879417419434, + "learning_rate": 9.9878391959799e-06, + "loss": 0.4087, + "step": 625 + }, + { + "epoch": 0.44, + "grad_norm": 4.224247932434082, + "learning_rate": 9.98532663316583e-06, + "loss": 0.3901, + "step": 650 + }, + { + "epoch": 0.45, + "grad_norm": 6.062074184417725, + "learning_rate": 9.98281407035176e-06, + "loss": 0.3871, + "step": 675 + }, + { + "epoch": 0.47, + "grad_norm": 4.350323677062988, + "learning_rate": 9.98030150753769e-06, + "loss": 0.386, + "step": 700 + }, + { + "epoch": 0.49, + "grad_norm": 4.867780685424805, + "learning_rate": 9.977788944723619e-06, + "loss": 0.3851, + "step": 725 + }, + { + "epoch": 0.5, + "grad_norm": 4.49179744720459, + "learning_rate": 9.975276381909548e-06, + "loss": 0.3772, + "step": 750 + }, + { + "epoch": 0.52, + "grad_norm": 4.474670886993408, + "learning_rate": 9.972763819095477e-06, + "loss": 0.3642, + "step": 775 + }, + { + "epoch": 0.54, + "grad_norm": 4.740733623504639, + "learning_rate": 9.970251256281408e-06, + "loss": 0.3551, + "step": 800 + }, + { + "epoch": 0.55, + "grad_norm": 4.2420549392700195, + "learning_rate": 9.967738693467338e-06, + "loss": 0.3637, + "step": 825 + }, + { + "epoch": 0.57, + "grad_norm": 4.520359039306641, + "learning_rate": 9.965226130653267e-06, + "loss": 0.3537, + "step": 850 + }, + { + "epoch": 0.59, + "grad_norm": 3.9906153678894043, + "learning_rate": 9.962713567839198e-06, + "loss": 0.3412, + "step": 875 + }, + { + "epoch": 0.6, + "grad_norm": 4.288099765777588, + "learning_rate": 9.960201005025126e-06, + "loss": 0.3466, + "step": 900 + }, + { + "epoch": 0.62, + "grad_norm": 4.125555992126465, + "learning_rate": 9.957688442211057e-06, + "loss": 0.3421, + "step": 925 + }, + { + "epoch": 0.64, + "grad_norm": 4.692856788635254, + "learning_rate": 9.955175879396986e-06, + "loss": 0.3415, + "step": 950 + }, + { + "epoch": 0.65, + "grad_norm": 5.192648410797119, + "learning_rate": 9.952663316582915e-06, + "loss": 0.3337, + "step": 975 + }, + { + "epoch": 0.67, + "grad_norm": 4.478278160095215, + "learning_rate": 9.950150753768845e-06, + "loss": 0.3283, + "step": 1000 + }, + { + "epoch": 0.67, + "eval_loss": 0.28899532556533813, + "eval_runtime": 538.2218, + "eval_samples_per_second": 2.618, + "eval_steps_per_second": 2.618, + "eval_wer": 44.904877205119334, + "step": 1000 + }, + { + "epoch": 0.69, + "grad_norm": 4.19979190826416, + "learning_rate": 9.947638190954774e-06, + "loss": 0.3293, + "step": 1025 + }, + { + "epoch": 0.71, + "grad_norm": 5.217833042144775, + "learning_rate": 9.945125628140703e-06, + "loss": 0.3261, + "step": 1050 + }, + { + "epoch": 0.72, + "grad_norm": 4.803160190582275, + "learning_rate": 9.942613065326634e-06, + "loss": 0.3202, + "step": 1075 + }, + { + "epoch": 0.74, + "grad_norm": 4.6511359214782715, + "learning_rate": 9.940100502512564e-06, + "loss": 0.3189, + "step": 1100 + }, + { + "epoch": 0.76, + "grad_norm": 4.849580764770508, + "learning_rate": 9.937587939698493e-06, + "loss": 0.3106, + "step": 1125 + }, + { + "epoch": 0.77, + "grad_norm": 3.984544277191162, + "learning_rate": 9.935075376884424e-06, + "loss": 0.3085, + "step": 1150 + }, + { + "epoch": 0.79, + "grad_norm": 4.383615016937256, + "learning_rate": 9.932562814070352e-06, + "loss": 0.3104, + "step": 1175 + }, + { + "epoch": 0.81, + "grad_norm": 4.594172477722168, + "learning_rate": 9.930050251256283e-06, + "loss": 0.3035, + "step": 1200 + }, + { + "epoch": 0.82, + "grad_norm": 3.7613401412963867, + "learning_rate": 9.927537688442212e-06, + "loss": 0.3025, + "step": 1225 + }, + { + "epoch": 0.84, + "grad_norm": 4.686855792999268, + "learning_rate": 9.925025125628141e-06, + "loss": 0.307, + "step": 1250 + }, + { + "epoch": 0.86, + "grad_norm": 4.406073570251465, + "learning_rate": 9.922512562814072e-06, + "loss": 0.3024, + "step": 1275 + }, + { + "epoch": 0.87, + "grad_norm": 4.2036004066467285, + "learning_rate": 9.920000000000002e-06, + "loss": 0.2993, + "step": 1300 + }, + { + "epoch": 0.89, + "grad_norm": 4.8607177734375, + "learning_rate": 9.917487437185931e-06, + "loss": 0.2924, + "step": 1325 + }, + { + "epoch": 0.91, + "grad_norm": 4.137043476104736, + "learning_rate": 9.91497487437186e-06, + "loss": 0.2953, + "step": 1350 + }, + { + "epoch": 0.92, + "grad_norm": 4.247952461242676, + "learning_rate": 9.91246231155779e-06, + "loss": 0.2831, + "step": 1375 + }, + { + "epoch": 0.94, + "grad_norm": 4.612934589385986, + "learning_rate": 9.909949748743719e-06, + "loss": 0.2815, + "step": 1400 + }, + { + "epoch": 0.96, + "grad_norm": 4.575165748596191, + "learning_rate": 9.90743718592965e-06, + "loss": 0.2865, + "step": 1425 + }, + { + "epoch": 0.97, + "grad_norm": 4.751337051391602, + "learning_rate": 9.904924623115578e-06, + "loss": 0.2728, + "step": 1450 + }, + { + "epoch": 0.99, + "grad_norm": 4.269663333892822, + "learning_rate": 9.902412060301509e-06, + "loss": 0.2804, + "step": 1475 + }, + { + "epoch": 1.01, + "grad_norm": 3.966280221939087, + "learning_rate": 9.899899497487438e-06, + "loss": 0.2721, + "step": 1500 + }, + { + "epoch": 1.02, + "grad_norm": 4.3141984939575195, + "learning_rate": 9.897386934673367e-06, + "loss": 0.2732, + "step": 1525 + }, + { + "epoch": 1.04, + "grad_norm": 4.059327602386475, + "learning_rate": 9.894874371859298e-06, + "loss": 0.2596, + "step": 1550 + }, + { + "epoch": 1.06, + "grad_norm": 3.9442734718322754, + "learning_rate": 9.892361809045228e-06, + "loss": 0.2598, + "step": 1575 + }, + { + "epoch": 1.07, + "grad_norm": 3.946674346923828, + "learning_rate": 9.889849246231157e-06, + "loss": 0.2661, + "step": 1600 + }, + { + "epoch": 1.09, + "grad_norm": 4.30720853805542, + "learning_rate": 9.887336683417086e-06, + "loss": 0.2564, + "step": 1625 + }, + { + "epoch": 1.11, + "grad_norm": 4.806241035461426, + "learning_rate": 9.884824120603015e-06, + "loss": 0.2627, + "step": 1650 + }, + { + "epoch": 1.12, + "grad_norm": 4.289256572723389, + "learning_rate": 9.882311557788945e-06, + "loss": 0.2511, + "step": 1675 + }, + { + "epoch": 1.14, + "grad_norm": 3.732201337814331, + "learning_rate": 9.879798994974876e-06, + "loss": 0.254, + "step": 1700 + }, + { + "epoch": 1.16, + "grad_norm": 3.8544976711273193, + "learning_rate": 9.877286432160805e-06, + "loss": 0.251, + "step": 1725 + }, + { + "epoch": 1.18, + "grad_norm": 3.483417510986328, + "learning_rate": 9.874773869346734e-06, + "loss": 0.2492, + "step": 1750 + }, + { + "epoch": 1.19, + "grad_norm": 4.441720485687256, + "learning_rate": 9.872261306532664e-06, + "loss": 0.2546, + "step": 1775 + }, + { + "epoch": 1.21, + "grad_norm": 3.9507439136505127, + "learning_rate": 9.869748743718593e-06, + "loss": 0.2487, + "step": 1800 + }, + { + "epoch": 1.23, + "grad_norm": 3.5427682399749756, + "learning_rate": 9.867236180904524e-06, + "loss": 0.2466, + "step": 1825 + }, + { + "epoch": 1.24, + "grad_norm": 3.8757991790771484, + "learning_rate": 9.864723618090453e-06, + "loss": 0.2472, + "step": 1850 + }, + { + "epoch": 1.26, + "grad_norm": 4.069082260131836, + "learning_rate": 9.862211055276383e-06, + "loss": 0.2504, + "step": 1875 + }, + { + "epoch": 1.28, + "grad_norm": 4.411264896392822, + "learning_rate": 9.859698492462312e-06, + "loss": 0.2399, + "step": 1900 + }, + { + "epoch": 1.29, + "grad_norm": 4.063783645629883, + "learning_rate": 9.857185929648241e-06, + "loss": 0.2414, + "step": 1925 + }, + { + "epoch": 1.31, + "grad_norm": 4.256843566894531, + "learning_rate": 9.854673366834172e-06, + "loss": 0.2393, + "step": 1950 + }, + { + "epoch": 1.33, + "grad_norm": 3.9724340438842773, + "learning_rate": 9.852160804020102e-06, + "loss": 0.2413, + "step": 1975 + }, + { + "epoch": 1.34, + "grad_norm": 4.0315752029418945, + "learning_rate": 9.849648241206031e-06, + "loss": 0.2423, + "step": 2000 + }, + { + "epoch": 1.34, + "eval_loss": 0.22521595656871796, + "eval_runtime": 501.9391, + "eval_samples_per_second": 2.807, + "eval_steps_per_second": 2.807, + "eval_wer": 36.63784157730889, + "step": 2000 + }, + { + "epoch": 1.36, + "grad_norm": 3.7031657695770264, + "learning_rate": 9.84713567839196e-06, + "loss": 0.2422, + "step": 2025 + }, + { + "epoch": 1.38, + "grad_norm": 3.9322640895843506, + "learning_rate": 9.84462311557789e-06, + "loss": 0.2358, + "step": 2050 + }, + { + "epoch": 1.39, + "grad_norm": 4.5099639892578125, + "learning_rate": 9.842110552763819e-06, + "loss": 0.2351, + "step": 2075 + }, + { + "epoch": 1.41, + "grad_norm": 4.933638572692871, + "learning_rate": 9.83959798994975e-06, + "loss": 0.2402, + "step": 2100 + }, + { + "epoch": 1.43, + "grad_norm": 4.435012340545654, + "learning_rate": 9.83708542713568e-06, + "loss": 0.2345, + "step": 2125 + }, + { + "epoch": 1.44, + "grad_norm": 4.261793613433838, + "learning_rate": 9.834572864321609e-06, + "loss": 0.2334, + "step": 2150 + }, + { + "epoch": 1.46, + "grad_norm": 3.4005892276763916, + "learning_rate": 9.832060301507538e-06, + "loss": 0.2366, + "step": 2175 + }, + { + "epoch": 1.48, + "grad_norm": 4.037840843200684, + "learning_rate": 9.829547738693467e-06, + "loss": 0.2352, + "step": 2200 + }, + { + "epoch": 1.49, + "grad_norm": 4.4088873863220215, + "learning_rate": 9.827035175879398e-06, + "loss": 0.2311, + "step": 2225 + }, + { + "epoch": 1.51, + "grad_norm": 5.249969482421875, + "learning_rate": 9.824522613065328e-06, + "loss": 0.2292, + "step": 2250 + }, + { + "epoch": 1.53, + "grad_norm": 3.866811752319336, + "learning_rate": 9.822010050251257e-06, + "loss": 0.2307, + "step": 2275 + }, + { + "epoch": 1.54, + "grad_norm": 4.279900074005127, + "learning_rate": 9.819497487437186e-06, + "loss": 0.2334, + "step": 2300 + }, + { + "epoch": 1.56, + "grad_norm": 3.9198098182678223, + "learning_rate": 9.816984924623116e-06, + "loss": 0.2248, + "step": 2325 + }, + { + "epoch": 1.58, + "grad_norm": 4.273867130279541, + "learning_rate": 9.814472361809047e-06, + "loss": 0.2287, + "step": 2350 + }, + { + "epoch": 1.6, + "grad_norm": 3.9743666648864746, + "learning_rate": 9.811959798994976e-06, + "loss": 0.2311, + "step": 2375 + }, + { + "epoch": 1.61, + "grad_norm": 3.9278736114501953, + "learning_rate": 9.809447236180905e-06, + "loss": 0.2275, + "step": 2400 + }, + { + "epoch": 1.63, + "grad_norm": 3.819561243057251, + "learning_rate": 9.806934673366835e-06, + "loss": 0.224, + "step": 2425 + }, + { + "epoch": 1.65, + "grad_norm": 4.277536869049072, + "learning_rate": 9.804422110552764e-06, + "loss": 0.2241, + "step": 2450 + }, + { + "epoch": 1.66, + "grad_norm": 3.7323555946350098, + "learning_rate": 9.801909547738693e-06, + "loss": 0.2222, + "step": 2475 + }, + { + "epoch": 1.68, + "grad_norm": 4.7595014572143555, + "learning_rate": 9.799396984924624e-06, + "loss": 0.2229, + "step": 2500 + }, + { + "epoch": 1.7, + "grad_norm": 3.9532127380371094, + "learning_rate": 9.796884422110554e-06, + "loss": 0.2215, + "step": 2525 + }, + { + "epoch": 1.71, + "grad_norm": 4.128178596496582, + "learning_rate": 9.794371859296483e-06, + "loss": 0.2198, + "step": 2550 + }, + { + "epoch": 1.73, + "grad_norm": 4.2043561935424805, + "learning_rate": 9.791859296482414e-06, + "loss": 0.2166, + "step": 2575 + }, + { + "epoch": 1.75, + "grad_norm": 4.0248918533325195, + "learning_rate": 9.789346733668342e-06, + "loss": 0.2194, + "step": 2600 + }, + { + "epoch": 1.76, + "grad_norm": 4.235128402709961, + "learning_rate": 9.786834170854273e-06, + "loss": 0.2198, + "step": 2625 + }, + { + "epoch": 1.78, + "grad_norm": 3.745103120803833, + "learning_rate": 9.784321608040202e-06, + "loss": 0.213, + "step": 2650 + }, + { + "epoch": 1.8, + "grad_norm": 3.817952871322632, + "learning_rate": 9.781809045226131e-06, + "loss": 0.2158, + "step": 2675 + }, + { + "epoch": 1.81, + "grad_norm": 4.436026573181152, + "learning_rate": 9.77929648241206e-06, + "loss": 0.2142, + "step": 2700 + }, + { + "epoch": 1.83, + "grad_norm": 3.990562677383423, + "learning_rate": 9.77678391959799e-06, + "loss": 0.2175, + "step": 2725 + }, + { + "epoch": 1.85, + "grad_norm": 3.7376487255096436, + "learning_rate": 9.774271356783921e-06, + "loss": 0.2116, + "step": 2750 + }, + { + "epoch": 1.86, + "grad_norm": 3.7319161891937256, + "learning_rate": 9.77175879396985e-06, + "loss": 0.2125, + "step": 2775 + }, + { + "epoch": 1.88, + "grad_norm": 3.8331363201141357, + "learning_rate": 9.76924623115578e-06, + "loss": 0.2129, + "step": 2800 + }, + { + "epoch": 1.9, + "grad_norm": 3.8604419231414795, + "learning_rate": 9.766733668341709e-06, + "loss": 0.2152, + "step": 2825 + }, + { + "epoch": 1.91, + "grad_norm": 3.878577947616577, + "learning_rate": 9.76422110552764e-06, + "loss": 0.2119, + "step": 2850 + }, + { + "epoch": 1.93, + "grad_norm": 3.766144037246704, + "learning_rate": 9.761708542713568e-06, + "loss": 0.2141, + "step": 2875 + }, + { + "epoch": 1.95, + "grad_norm": 4.006993770599365, + "learning_rate": 9.759195979899499e-06, + "loss": 0.2064, + "step": 2900 + }, + { + "epoch": 1.96, + "grad_norm": 3.8346142768859863, + "learning_rate": 9.756683417085428e-06, + "loss": 0.2078, + "step": 2925 + }, + { + "epoch": 1.98, + "grad_norm": 3.907681703567505, + "learning_rate": 9.754170854271357e-06, + "loss": 0.2096, + "step": 2950 + }, + { + "epoch": 2.0, + "grad_norm": 3.8826911449432373, + "learning_rate": 9.751658291457288e-06, + "loss": 0.2075, + "step": 2975 + }, + { + "epoch": 2.01, + "grad_norm": 4.0189208984375, + "learning_rate": 9.749145728643216e-06, + "loss": 0.1952, + "step": 3000 + }, + { + "epoch": 2.01, + "eval_loss": 0.1993221491575241, + "eval_runtime": 506.086, + "eval_samples_per_second": 2.784, + "eval_steps_per_second": 2.784, + "eval_wer": 34.78381182981668, + "step": 3000 + }, + { + "epoch": 2.03, + "grad_norm": 3.4568824768066406, + "learning_rate": 9.746633165829147e-06, + "loss": 0.1871, + "step": 3025 + }, + { + "epoch": 2.05, + "grad_norm": 3.4548001289367676, + "learning_rate": 9.744120603015076e-06, + "loss": 0.1921, + "step": 3050 + }, + { + "epoch": 2.07, + "grad_norm": 3.6934814453125, + "learning_rate": 9.741608040201006e-06, + "loss": 0.1911, + "step": 3075 + }, + { + "epoch": 2.08, + "grad_norm": 3.56278133392334, + "learning_rate": 9.739095477386935e-06, + "loss": 0.1909, + "step": 3100 + }, + { + "epoch": 2.1, + "grad_norm": 3.7794251441955566, + "learning_rate": 9.736582914572866e-06, + "loss": 0.1922, + "step": 3125 + }, + { + "epoch": 2.12, + "grad_norm": 4.016905784606934, + "learning_rate": 9.734070351758794e-06, + "loss": 0.1872, + "step": 3150 + }, + { + "epoch": 2.13, + "grad_norm": 3.4843788146972656, + "learning_rate": 9.731557788944725e-06, + "loss": 0.187, + "step": 3175 + }, + { + "epoch": 2.15, + "grad_norm": 3.5570619106292725, + "learning_rate": 9.729045226130654e-06, + "loss": 0.1867, + "step": 3200 + }, + { + "epoch": 2.17, + "grad_norm": 3.652226209640503, + "learning_rate": 9.726532663316583e-06, + "loss": 0.1924, + "step": 3225 + }, + { + "epoch": 2.18, + "grad_norm": 3.7155370712280273, + "learning_rate": 9.724020100502514e-06, + "loss": 0.1953, + "step": 3250 + }, + { + "epoch": 2.2, + "grad_norm": 3.7845969200134277, + "learning_rate": 9.721507537688444e-06, + "loss": 0.1859, + "step": 3275 + }, + { + "epoch": 2.22, + "grad_norm": 3.329834461212158, + "learning_rate": 9.718994974874373e-06, + "loss": 0.1866, + "step": 3300 + }, + { + "epoch": 2.23, + "grad_norm": 3.741820812225342, + "learning_rate": 9.716482412060302e-06, + "loss": 0.1885, + "step": 3325 + }, + { + "epoch": 2.25, + "grad_norm": 3.828986644744873, + "learning_rate": 9.713969849246232e-06, + "loss": 0.1875, + "step": 3350 + }, + { + "epoch": 2.27, + "grad_norm": 3.369107246398926, + "learning_rate": 9.711457286432163e-06, + "loss": 0.1917, + "step": 3375 + }, + { + "epoch": 2.28, + "grad_norm": 3.2843613624572754, + "learning_rate": 9.708944723618092e-06, + "loss": 0.1832, + "step": 3400 + }, + { + "epoch": 2.3, + "grad_norm": 3.8559796810150146, + "learning_rate": 9.706432160804021e-06, + "loss": 0.1798, + "step": 3425 + }, + { + "epoch": 2.32, + "grad_norm": 4.124207496643066, + "learning_rate": 9.70391959798995e-06, + "loss": 0.1895, + "step": 3450 + }, + { + "epoch": 2.33, + "grad_norm": 3.7531955242156982, + "learning_rate": 9.70140703517588e-06, + "loss": 0.1861, + "step": 3475 + }, + { + "epoch": 2.35, + "grad_norm": 3.9688479900360107, + "learning_rate": 9.698894472361809e-06, + "loss": 0.1815, + "step": 3500 + }, + { + "epoch": 2.37, + "grad_norm": 4.048210620880127, + "learning_rate": 9.69638190954774e-06, + "loss": 0.1813, + "step": 3525 + }, + { + "epoch": 2.38, + "grad_norm": 3.9658586978912354, + "learning_rate": 9.69386934673367e-06, + "loss": 0.1829, + "step": 3550 + }, + { + "epoch": 2.4, + "grad_norm": 4.282639980316162, + "learning_rate": 9.691356783919599e-06, + "loss": 0.1829, + "step": 3575 + }, + { + "epoch": 2.42, + "grad_norm": 3.5770888328552246, + "learning_rate": 9.688844221105528e-06, + "loss": 0.177, + "step": 3600 + }, + { + "epoch": 2.43, + "grad_norm": 3.636948585510254, + "learning_rate": 9.686331658291457e-06, + "loss": 0.1809, + "step": 3625 + }, + { + "epoch": 2.45, + "grad_norm": 3.558443546295166, + "learning_rate": 9.683819095477388e-06, + "loss": 0.183, + "step": 3650 + }, + { + "epoch": 2.47, + "grad_norm": 3.943390369415283, + "learning_rate": 9.681306532663318e-06, + "loss": 0.1851, + "step": 3675 + }, + { + "epoch": 2.48, + "grad_norm": 3.551934003829956, + "learning_rate": 9.678793969849247e-06, + "loss": 0.1799, + "step": 3700 + }, + { + "epoch": 2.5, + "grad_norm": 3.7090535163879395, + "learning_rate": 9.676281407035176e-06, + "loss": 0.1806, + "step": 3725 + }, + { + "epoch": 2.52, + "grad_norm": 3.5795400142669678, + "learning_rate": 9.673768844221106e-06, + "loss": 0.1832, + "step": 3750 + }, + { + "epoch": 2.54, + "grad_norm": 3.2557222843170166, + "learning_rate": 9.671256281407035e-06, + "loss": 0.1803, + "step": 3775 + }, + { + "epoch": 2.55, + "grad_norm": 3.438840627670288, + "learning_rate": 9.668743718592966e-06, + "loss": 0.1734, + "step": 3800 + }, + { + "epoch": 2.57, + "grad_norm": 3.4316484928131104, + "learning_rate": 9.666231155778895e-06, + "loss": 0.1783, + "step": 3825 + }, + { + "epoch": 2.59, + "grad_norm": 3.9053733348846436, + "learning_rate": 9.663718592964825e-06, + "loss": 0.1803, + "step": 3850 + }, + { + "epoch": 2.6, + "grad_norm": 3.600428581237793, + "learning_rate": 9.661206030150754e-06, + "loss": 0.1803, + "step": 3875 + }, + { + "epoch": 2.62, + "grad_norm": 3.9253036975860596, + "learning_rate": 9.658693467336683e-06, + "loss": 0.178, + "step": 3900 + }, + { + "epoch": 2.64, + "grad_norm": 3.82502818107605, + "learning_rate": 9.656180904522614e-06, + "loss": 0.1775, + "step": 3925 + }, + { + "epoch": 2.65, + "grad_norm": 3.5559029579162598, + "learning_rate": 9.653668341708544e-06, + "loss": 0.171, + "step": 3950 + }, + { + "epoch": 2.67, + "grad_norm": 3.3937253952026367, + "learning_rate": 9.651155778894473e-06, + "loss": 0.1707, + "step": 3975 + }, + { + "epoch": 2.69, + "grad_norm": 3.5934906005859375, + "learning_rate": 9.648643216080404e-06, + "loss": 0.1782, + "step": 4000 + }, + { + "epoch": 2.69, + "eval_loss": 0.1855400949716568, + "eval_runtime": 501.0751, + "eval_samples_per_second": 2.812, + "eval_steps_per_second": 2.812, + "eval_wer": 31.85748875821515, + "step": 4000 + }, + { + "epoch": 2.7, + "grad_norm": 3.875981569290161, + "learning_rate": 9.646130653266332e-06, + "loss": 0.1732, + "step": 4025 + }, + { + "epoch": 2.72, + "grad_norm": 3.575018882751465, + "learning_rate": 9.643618090452263e-06, + "loss": 0.172, + "step": 4050 + }, + { + "epoch": 2.74, + "grad_norm": 3.6058952808380127, + "learning_rate": 9.641105527638192e-06, + "loss": 0.1736, + "step": 4075 + }, + { + "epoch": 2.75, + "grad_norm": 3.6790058612823486, + "learning_rate": 9.638592964824121e-06, + "loss": 0.1816, + "step": 4100 + }, + { + "epoch": 2.77, + "grad_norm": 3.738783359527588, + "learning_rate": 9.63608040201005e-06, + "loss": 0.172, + "step": 4125 + }, + { + "epoch": 2.79, + "grad_norm": 4.019230365753174, + "learning_rate": 9.63356783919598e-06, + "loss": 0.1713, + "step": 4150 + }, + { + "epoch": 2.8, + "grad_norm": 3.8548195362091064, + "learning_rate": 9.63105527638191e-06, + "loss": 0.1785, + "step": 4175 + }, + { + "epoch": 2.82, + "grad_norm": 3.492232322692871, + "learning_rate": 9.62854271356784e-06, + "loss": 0.1715, + "step": 4200 + }, + { + "epoch": 2.84, + "grad_norm": 3.3461267948150635, + "learning_rate": 9.62603015075377e-06, + "loss": 0.1694, + "step": 4225 + }, + { + "epoch": 2.85, + "grad_norm": 3.983288526535034, + "learning_rate": 9.623517587939699e-06, + "loss": 0.1713, + "step": 4250 + }, + { + "epoch": 2.87, + "grad_norm": 4.382197380065918, + "learning_rate": 9.62100502512563e-06, + "loss": 0.1741, + "step": 4275 + }, + { + "epoch": 2.89, + "grad_norm": 4.118824481964111, + "learning_rate": 9.618492462311558e-06, + "loss": 0.1655, + "step": 4300 + }, + { + "epoch": 2.9, + "grad_norm": 3.5951993465423584, + "learning_rate": 9.615979899497489e-06, + "loss": 0.1723, + "step": 4325 + }, + { + "epoch": 2.92, + "grad_norm": 4.276073455810547, + "learning_rate": 9.613467336683418e-06, + "loss": 0.1748, + "step": 4350 + }, + { + "epoch": 2.94, + "grad_norm": 3.650339126586914, + "learning_rate": 9.610954773869347e-06, + "loss": 0.1719, + "step": 4375 + }, + { + "epoch": 2.96, + "grad_norm": 3.4903323650360107, + "learning_rate": 9.608442211055277e-06, + "loss": 0.1727, + "step": 4400 + }, + { + "epoch": 2.97, + "grad_norm": 3.7405972480773926, + "learning_rate": 9.605929648241206e-06, + "loss": 0.1693, + "step": 4425 + }, + { + "epoch": 2.99, + "grad_norm": 3.651560068130493, + "learning_rate": 9.603417085427137e-06, + "loss": 0.1667, + "step": 4450 + }, + { + "epoch": 3.01, + "grad_norm": 3.7319529056549072, + "learning_rate": 9.600904522613066e-06, + "loss": 0.161, + "step": 4475 + }, + { + "epoch": 3.02, + "grad_norm": 3.5703518390655518, + "learning_rate": 9.598391959798996e-06, + "loss": 0.1543, + "step": 4500 + }, + { + "epoch": 3.04, + "grad_norm": 3.5607829093933105, + "learning_rate": 9.595879396984925e-06, + "loss": 0.1513, + "step": 4525 + }, + { + "epoch": 3.06, + "grad_norm": 3.972504138946533, + "learning_rate": 9.593366834170856e-06, + "loss": 0.1475, + "step": 4550 + }, + { + "epoch": 3.07, + "grad_norm": 3.5525929927825928, + "learning_rate": 9.590854271356784e-06, + "loss": 0.1515, + "step": 4575 + }, + { + "epoch": 3.09, + "grad_norm": 3.5524590015411377, + "learning_rate": 9.588341708542715e-06, + "loss": 0.1584, + "step": 4600 + }, + { + "epoch": 3.11, + "grad_norm": 3.23596453666687, + "learning_rate": 9.585829145728644e-06, + "loss": 0.1441, + "step": 4625 + }, + { + "epoch": 3.12, + "grad_norm": 3.6133830547332764, + "learning_rate": 9.583316582914573e-06, + "loss": 0.1515, + "step": 4650 + }, + { + "epoch": 3.14, + "grad_norm": 3.294287919998169, + "learning_rate": 9.580804020100504e-06, + "loss": 0.1541, + "step": 4675 + }, + { + "epoch": 3.16, + "grad_norm": 3.3605408668518066, + "learning_rate": 9.578291457286432e-06, + "loss": 0.1529, + "step": 4700 + }, + { + "epoch": 3.17, + "grad_norm": 3.1740756034851074, + "learning_rate": 9.575778894472363e-06, + "loss": 0.1547, + "step": 4725 + }, + { + "epoch": 3.19, + "grad_norm": 3.333665132522583, + "learning_rate": 9.573266331658292e-06, + "loss": 0.1531, + "step": 4750 + }, + { + "epoch": 3.21, + "grad_norm": 3.4780566692352295, + "learning_rate": 9.570753768844222e-06, + "loss": 0.1529, + "step": 4775 + }, + { + "epoch": 3.22, + "grad_norm": 3.233739137649536, + "learning_rate": 9.568241206030151e-06, + "loss": 0.154, + "step": 4800 + }, + { + "epoch": 3.24, + "grad_norm": 4.083475589752197, + "learning_rate": 9.565728643216082e-06, + "loss": 0.1537, + "step": 4825 + }, + { + "epoch": 3.26, + "grad_norm": 3.8085060119628906, + "learning_rate": 9.563216080402011e-06, + "loss": 0.1545, + "step": 4850 + }, + { + "epoch": 3.27, + "grad_norm": 3.908021926879883, + "learning_rate": 9.56070351758794e-06, + "loss": 0.1508, + "step": 4875 + }, + { + "epoch": 3.29, + "grad_norm": 3.436661958694458, + "learning_rate": 9.55819095477387e-06, + "loss": 0.1469, + "step": 4900 + }, + { + "epoch": 3.31, + "grad_norm": 3.2997820377349854, + "learning_rate": 9.5556783919598e-06, + "loss": 0.1442, + "step": 4925 + }, + { + "epoch": 3.32, + "grad_norm": 3.606473684310913, + "learning_rate": 9.55316582914573e-06, + "loss": 0.1545, + "step": 4950 + }, + { + "epoch": 3.34, + "grad_norm": 3.2949883937835693, + "learning_rate": 9.550653266331658e-06, + "loss": 0.1513, + "step": 4975 + }, + { + "epoch": 3.36, + "grad_norm": 3.6132426261901855, + "learning_rate": 9.548140703517589e-06, + "loss": 0.1498, + "step": 5000 + }, + { + "epoch": 3.36, + "eval_loss": 0.17742596566677094, + "eval_runtime": 502.1958, + "eval_samples_per_second": 2.806, + "eval_steps_per_second": 2.806, + "eval_wer": 31.45624351435489, + "step": 5000 + }, + { + "epoch": 3.37, + "grad_norm": 3.245905637741089, + "learning_rate": 9.545628140703518e-06, + "loss": 0.1502, + "step": 5025 + }, + { + "epoch": 3.39, + "grad_norm": 3.4925742149353027, + "learning_rate": 9.543115577889448e-06, + "loss": 0.1479, + "step": 5050 + }, + { + "epoch": 3.41, + "grad_norm": 3.5079145431518555, + "learning_rate": 9.540603015075379e-06, + "loss": 0.1512, + "step": 5075 + }, + { + "epoch": 3.43, + "grad_norm": 3.842787265777588, + "learning_rate": 9.538090452261308e-06, + "loss": 0.1498, + "step": 5100 + }, + { + "epoch": 3.44, + "grad_norm": 3.209733247756958, + "learning_rate": 9.535577889447237e-06, + "loss": 0.1466, + "step": 5125 + }, + { + "epoch": 3.46, + "grad_norm": 3.6466197967529297, + "learning_rate": 9.533065326633166e-06, + "loss": 0.1478, + "step": 5150 + }, + { + "epoch": 3.48, + "grad_norm": 3.8729169368743896, + "learning_rate": 9.530552763819096e-06, + "loss": 0.1519, + "step": 5175 + }, + { + "epoch": 3.49, + "grad_norm": 3.3529200553894043, + "learning_rate": 9.528040201005025e-06, + "loss": 0.148, + "step": 5200 + }, + { + "epoch": 3.51, + "grad_norm": 4.148180961608887, + "learning_rate": 9.525527638190956e-06, + "loss": 0.1497, + "step": 5225 + }, + { + "epoch": 3.53, + "grad_norm": 3.4012880325317383, + "learning_rate": 9.523015075376885e-06, + "loss": 0.1471, + "step": 5250 + }, + { + "epoch": 3.54, + "grad_norm": 3.541660785675049, + "learning_rate": 9.520502512562815e-06, + "loss": 0.1466, + "step": 5275 + }, + { + "epoch": 3.56, + "grad_norm": 3.375972032546997, + "learning_rate": 9.517989949748744e-06, + "loss": 0.1446, + "step": 5300 + }, + { + "epoch": 3.58, + "grad_norm": 3.4109225273132324, + "learning_rate": 9.515477386934673e-06, + "loss": 0.15, + "step": 5325 + }, + { + "epoch": 3.59, + "grad_norm": 3.340024709701538, + "learning_rate": 9.512964824120604e-06, + "loss": 0.1568, + "step": 5350 + }, + { + "epoch": 3.61, + "grad_norm": 3.2707488536834717, + "learning_rate": 9.510452261306534e-06, + "loss": 0.1442, + "step": 5375 + }, + { + "epoch": 3.63, + "grad_norm": 3.839214324951172, + "learning_rate": 9.507939698492463e-06, + "loss": 0.1521, + "step": 5400 + }, + { + "epoch": 3.64, + "grad_norm": 3.2995080947875977, + "learning_rate": 9.505427135678392e-06, + "loss": 0.1463, + "step": 5425 + }, + { + "epoch": 3.66, + "grad_norm": 3.5799291133880615, + "learning_rate": 9.502914572864322e-06, + "loss": 0.1416, + "step": 5450 + }, + { + "epoch": 3.68, + "grad_norm": 3.6219823360443115, + "learning_rate": 9.500402010050253e-06, + "loss": 0.148, + "step": 5475 + }, + { + "epoch": 3.69, + "grad_norm": 3.393965005874634, + "learning_rate": 9.497889447236182e-06, + "loss": 0.1468, + "step": 5500 + }, + { + "epoch": 3.71, + "grad_norm": 3.571566104888916, + "learning_rate": 9.495376884422111e-06, + "loss": 0.1462, + "step": 5525 + }, + { + "epoch": 3.73, + "grad_norm": 3.5705130100250244, + "learning_rate": 9.49286432160804e-06, + "loss": 0.1483, + "step": 5550 + }, + { + "epoch": 3.74, + "grad_norm": 3.7848360538482666, + "learning_rate": 9.49035175879397e-06, + "loss": 0.1506, + "step": 5575 + }, + { + "epoch": 3.76, + "grad_norm": 3.352940082550049, + "learning_rate": 9.4878391959799e-06, + "loss": 0.1457, + "step": 5600 + }, + { + "epoch": 3.78, + "grad_norm": 3.7227752208709717, + "learning_rate": 9.48532663316583e-06, + "loss": 0.1456, + "step": 5625 + }, + { + "epoch": 3.79, + "grad_norm": 3.6076254844665527, + "learning_rate": 9.48281407035176e-06, + "loss": 0.1464, + "step": 5650 + }, + { + "epoch": 3.81, + "grad_norm": 3.4364395141601562, + "learning_rate": 9.480301507537689e-06, + "loss": 0.1434, + "step": 5675 + }, + { + "epoch": 3.83, + "grad_norm": 4.120806694030762, + "learning_rate": 9.47778894472362e-06, + "loss": 0.1469, + "step": 5700 + }, + { + "epoch": 3.84, + "grad_norm": 3.919128179550171, + "learning_rate": 9.475276381909548e-06, + "loss": 0.1477, + "step": 5725 + }, + { + "epoch": 3.86, + "grad_norm": 3.2233633995056152, + "learning_rate": 9.472763819095479e-06, + "loss": 0.1472, + "step": 5750 + }, + { + "epoch": 3.88, + "grad_norm": 3.701591968536377, + "learning_rate": 9.470251256281408e-06, + "loss": 0.1431, + "step": 5775 + }, + { + "epoch": 3.9, + "grad_norm": 3.7108662128448486, + "learning_rate": 9.467738693467337e-06, + "loss": 0.142, + "step": 5800 + }, + { + "epoch": 3.91, + "grad_norm": 3.4114067554473877, + "learning_rate": 9.465226130653267e-06, + "loss": 0.141, + "step": 5825 + }, + { + "epoch": 3.93, + "grad_norm": 3.3884594440460205, + "learning_rate": 9.462713567839196e-06, + "loss": 0.1471, + "step": 5850 + }, + { + "epoch": 3.95, + "grad_norm": 3.6920559406280518, + "learning_rate": 9.460201005025127e-06, + "loss": 0.1396, + "step": 5875 + }, + { + "epoch": 3.96, + "grad_norm": 3.3707456588745117, + "learning_rate": 9.457688442211056e-06, + "loss": 0.1409, + "step": 5900 + }, + { + "epoch": 3.98, + "grad_norm": 3.5900766849517822, + "learning_rate": 9.455175879396986e-06, + "loss": 0.1408, + "step": 5925 + }, + { + "epoch": 4.0, + "grad_norm": 3.5266335010528564, + "learning_rate": 9.452663316582915e-06, + "loss": 0.1404, + "step": 5950 + }, + { + "epoch": 4.01, + "grad_norm": 3.628702402114868, + "learning_rate": 9.450150753768846e-06, + "loss": 0.1273, + "step": 5975 + }, + { + "epoch": 4.03, + "grad_norm": 3.3968164920806885, + "learning_rate": 9.447638190954774e-06, + "loss": 0.1244, + "step": 6000 + }, + { + "epoch": 4.03, + "eval_loss": 0.17096446454524994, + "eval_runtime": 510.5065, + "eval_samples_per_second": 2.76, + "eval_steps_per_second": 2.76, + "eval_wer": 31.2902109996541, + "step": 6000 + }, + { + "epoch": 4.05, + "grad_norm": 3.511183261871338, + "learning_rate": 9.445125628140705e-06, + "loss": 0.1229, + "step": 6025 + }, + { + "epoch": 4.06, + "grad_norm": 3.7338268756866455, + "learning_rate": 9.442613065326634e-06, + "loss": 0.126, + "step": 6050 + }, + { + "epoch": 4.08, + "grad_norm": 3.313610792160034, + "learning_rate": 9.440100502512563e-06, + "loss": 0.1288, + "step": 6075 + }, + { + "epoch": 4.1, + "grad_norm": 3.235154151916504, + "learning_rate": 9.437587939698494e-06, + "loss": 0.1246, + "step": 6100 + }, + { + "epoch": 4.11, + "grad_norm": 3.490407705307007, + "learning_rate": 9.435075376884422e-06, + "loss": 0.1321, + "step": 6125 + }, + { + "epoch": 4.13, + "grad_norm": 3.39567232131958, + "learning_rate": 9.432562814070353e-06, + "loss": 0.1275, + "step": 6150 + }, + { + "epoch": 4.15, + "grad_norm": 3.357705593109131, + "learning_rate": 9.430050251256282e-06, + "loss": 0.1312, + "step": 6175 + }, + { + "epoch": 4.16, + "grad_norm": 3.194918155670166, + "learning_rate": 9.427537688442212e-06, + "loss": 0.1241, + "step": 6200 + }, + { + "epoch": 4.18, + "grad_norm": 3.4928500652313232, + "learning_rate": 9.425025125628141e-06, + "loss": 0.1275, + "step": 6225 + }, + { + "epoch": 4.2, + "grad_norm": 2.884371280670166, + "learning_rate": 9.422512562814072e-06, + "loss": 0.1265, + "step": 6250 + }, + { + "epoch": 4.21, + "grad_norm": 2.933730125427246, + "learning_rate": 9.42e-06, + "loss": 0.1222, + "step": 6275 + }, + { + "epoch": 4.23, + "grad_norm": 3.588804006576538, + "learning_rate": 9.41748743718593e-06, + "loss": 0.127, + "step": 6300 + }, + { + "epoch": 4.25, + "grad_norm": 3.483628273010254, + "learning_rate": 9.41497487437186e-06, + "loss": 0.1282, + "step": 6325 + }, + { + "epoch": 4.26, + "grad_norm": 3.363340139389038, + "learning_rate": 9.41246231155779e-06, + "loss": 0.1272, + "step": 6350 + }, + { + "epoch": 4.28, + "grad_norm": 3.393373489379883, + "learning_rate": 9.40994974874372e-06, + "loss": 0.1252, + "step": 6375 + }, + { + "epoch": 4.3, + "grad_norm": 3.599374532699585, + "learning_rate": 9.407437185929648e-06, + "loss": 0.1264, + "step": 6400 + }, + { + "epoch": 4.31, + "grad_norm": 3.465827465057373, + "learning_rate": 9.404924623115579e-06, + "loss": 0.1296, + "step": 6425 + }, + { + "epoch": 4.33, + "grad_norm": 3.071708917617798, + "learning_rate": 9.402412060301508e-06, + "loss": 0.1237, + "step": 6450 + }, + { + "epoch": 4.35, + "grad_norm": 3.3425467014312744, + "learning_rate": 9.399899497487438e-06, + "loss": 0.1292, + "step": 6475 + }, + { + "epoch": 4.37, + "grad_norm": 3.2368080615997314, + "learning_rate": 9.397386934673369e-06, + "loss": 0.1278, + "step": 6500 + }, + { + "epoch": 4.38, + "grad_norm": 3.4730188846588135, + "learning_rate": 9.394874371859298e-06, + "loss": 0.1286, + "step": 6525 + }, + { + "epoch": 4.4, + "grad_norm": 3.747457981109619, + "learning_rate": 9.392361809045227e-06, + "loss": 0.1219, + "step": 6550 + }, + { + "epoch": 4.42, + "grad_norm": 3.635645866394043, + "learning_rate": 9.389849246231157e-06, + "loss": 0.128, + "step": 6575 + }, + { + "epoch": 4.43, + "grad_norm": 3.5323641300201416, + "learning_rate": 9.387336683417086e-06, + "loss": 0.1275, + "step": 6600 + }, + { + "epoch": 4.45, + "grad_norm": 3.428246259689331, + "learning_rate": 9.384824120603015e-06, + "loss": 0.1268, + "step": 6625 + }, + { + "epoch": 4.47, + "grad_norm": 3.7653777599334717, + "learning_rate": 9.382311557788946e-06, + "loss": 0.129, + "step": 6650 + }, + { + "epoch": 4.48, + "grad_norm": 3.6948444843292236, + "learning_rate": 9.379798994974874e-06, + "loss": 0.1247, + "step": 6675 + }, + { + "epoch": 4.5, + "grad_norm": 3.5169332027435303, + "learning_rate": 9.377286432160805e-06, + "loss": 0.1305, + "step": 6700 + }, + { + "epoch": 4.52, + "grad_norm": 3.3148910999298096, + "learning_rate": 9.374773869346734e-06, + "loss": 0.1253, + "step": 6725 + }, + { + "epoch": 4.53, + "grad_norm": 3.5133206844329834, + "learning_rate": 9.372261306532664e-06, + "loss": 0.1281, + "step": 6750 + }, + { + "epoch": 4.55, + "grad_norm": 3.1394548416137695, + "learning_rate": 9.369748743718595e-06, + "loss": 0.1275, + "step": 6775 + }, + { + "epoch": 4.57, + "grad_norm": 3.8831162452697754, + "learning_rate": 9.367236180904524e-06, + "loss": 0.1232, + "step": 6800 + }, + { + "epoch": 4.58, + "grad_norm": 3.013374090194702, + "learning_rate": 9.364723618090453e-06, + "loss": 0.1239, + "step": 6825 + }, + { + "epoch": 4.6, + "grad_norm": 4.17524528503418, + "learning_rate": 9.362211055276383e-06, + "loss": 0.1288, + "step": 6850 + }, + { + "epoch": 4.62, + "grad_norm": 3.58526349067688, + "learning_rate": 9.359698492462312e-06, + "loss": 0.1276, + "step": 6875 + }, + { + "epoch": 4.63, + "grad_norm": 3.1176724433898926, + "learning_rate": 9.357185929648241e-06, + "loss": 0.1219, + "step": 6900 + }, + { + "epoch": 4.65, + "grad_norm": 3.605490207672119, + "learning_rate": 9.354673366834172e-06, + "loss": 0.1215, + "step": 6925 + }, + { + "epoch": 4.67, + "grad_norm": 3.646974802017212, + "learning_rate": 9.352160804020101e-06, + "loss": 0.1243, + "step": 6950 + }, + { + "epoch": 4.68, + "grad_norm": 3.325100898742676, + "learning_rate": 9.34964824120603e-06, + "loss": 0.1236, + "step": 6975 + }, + { + "epoch": 4.7, + "grad_norm": 3.2043683528900146, + "learning_rate": 9.34713567839196e-06, + "loss": 0.1213, + "step": 7000 + }, + { + "epoch": 4.7, + "eval_loss": 0.16865763068199158, + "eval_runtime": 507.6393, + "eval_samples_per_second": 2.776, + "eval_steps_per_second": 2.776, + "eval_wer": 28.94500172950536, + "step": 7000 + }, + { + "epoch": 4.72, + "grad_norm": 3.095010757446289, + "learning_rate": 9.34462311557789e-06, + "loss": 0.1174, + "step": 7025 + }, + { + "epoch": 4.73, + "grad_norm": 3.4582784175872803, + "learning_rate": 9.34211055276382e-06, + "loss": 0.1227, + "step": 7050 + }, + { + "epoch": 4.75, + "grad_norm": 3.361680269241333, + "learning_rate": 9.33959798994975e-06, + "loss": 0.1253, + "step": 7075 + }, + { + "epoch": 4.77, + "grad_norm": 3.7100329399108887, + "learning_rate": 9.337085427135679e-06, + "loss": 0.1256, + "step": 7100 + }, + { + "epoch": 4.79, + "grad_norm": 3.3510217666625977, + "learning_rate": 9.334572864321608e-06, + "loss": 0.1248, + "step": 7125 + }, + { + "epoch": 4.8, + "grad_norm": 3.22613525390625, + "learning_rate": 9.332060301507538e-06, + "loss": 0.1227, + "step": 7150 + }, + { + "epoch": 4.82, + "grad_norm": 3.123006820678711, + "learning_rate": 9.329547738693469e-06, + "loss": 0.1206, + "step": 7175 + }, + { + "epoch": 4.84, + "grad_norm": 3.68377947807312, + "learning_rate": 9.327035175879398e-06, + "loss": 0.1197, + "step": 7200 + }, + { + "epoch": 4.85, + "grad_norm": 3.1306381225585938, + "learning_rate": 9.324522613065327e-06, + "loss": 0.1208, + "step": 7225 + }, + { + "epoch": 4.87, + "grad_norm": 3.268972635269165, + "learning_rate": 9.322010050251257e-06, + "loss": 0.1222, + "step": 7250 + }, + { + "epoch": 4.89, + "grad_norm": 3.4340620040893555, + "learning_rate": 9.319497487437186e-06, + "loss": 0.1243, + "step": 7275 + }, + { + "epoch": 4.9, + "grad_norm": 3.4942822456359863, + "learning_rate": 9.316984924623115e-06, + "loss": 0.1241, + "step": 7300 + }, + { + "epoch": 4.92, + "grad_norm": 3.602111577987671, + "learning_rate": 9.314472361809046e-06, + "loss": 0.125, + "step": 7325 + }, + { + "epoch": 4.94, + "grad_norm": 3.1872684955596924, + "learning_rate": 9.311959798994976e-06, + "loss": 0.1226, + "step": 7350 + }, + { + "epoch": 4.95, + "grad_norm": 3.0820956230163574, + "learning_rate": 9.309447236180905e-06, + "loss": 0.1187, + "step": 7375 + }, + { + "epoch": 4.97, + "grad_norm": 3.4358787536621094, + "learning_rate": 9.306934673366836e-06, + "loss": 0.1205, + "step": 7400 + }, + { + "epoch": 4.99, + "grad_norm": 3.4491493701934814, + "learning_rate": 9.304422110552764e-06, + "loss": 0.12, + "step": 7425 + }, + { + "epoch": 5.0, + "grad_norm": 3.160081624984741, + "learning_rate": 9.301909547738695e-06, + "loss": 0.1239, + "step": 7450 + }, + { + "epoch": 5.02, + "grad_norm": 2.9199459552764893, + "learning_rate": 9.299396984924624e-06, + "loss": 0.1072, + "step": 7475 + }, + { + "epoch": 5.04, + "grad_norm": 3.235931396484375, + "learning_rate": 9.296884422110553e-06, + "loss": 0.1038, + "step": 7500 + }, + { + "epoch": 5.05, + "grad_norm": 3.1781842708587646, + "learning_rate": 9.294371859296483e-06, + "loss": 0.1042, + "step": 7525 + }, + { + "epoch": 5.07, + "grad_norm": 3.2865490913391113, + "learning_rate": 9.291859296482412e-06, + "loss": 0.1051, + "step": 7550 + }, + { + "epoch": 5.09, + "grad_norm": 3.2022526264190674, + "learning_rate": 9.289346733668343e-06, + "loss": 0.1087, + "step": 7575 + }, + { + "epoch": 5.1, + "grad_norm": 3.3950209617614746, + "learning_rate": 9.286834170854272e-06, + "loss": 0.1114, + "step": 7600 + }, + { + "epoch": 5.12, + "grad_norm": 3.342078924179077, + "learning_rate": 9.284321608040202e-06, + "loss": 0.1103, + "step": 7625 + }, + { + "epoch": 5.14, + "grad_norm": 3.537217378616333, + "learning_rate": 9.281809045226131e-06, + "loss": 0.1065, + "step": 7650 + }, + { + "epoch": 5.15, + "grad_norm": 3.1118323802948, + "learning_rate": 9.279296482412062e-06, + "loss": 0.1075, + "step": 7675 + }, + { + "epoch": 5.17, + "grad_norm": 2.9323408603668213, + "learning_rate": 9.27678391959799e-06, + "loss": 0.1029, + "step": 7700 + }, + { + "epoch": 5.19, + "grad_norm": 3.127558946609497, + "learning_rate": 9.27427135678392e-06, + "loss": 0.1034, + "step": 7725 + }, + { + "epoch": 5.2, + "grad_norm": 3.6522057056427, + "learning_rate": 9.27175879396985e-06, + "loss": 0.1094, + "step": 7750 + }, + { + "epoch": 5.22, + "grad_norm": 3.2726082801818848, + "learning_rate": 9.26924623115578e-06, + "loss": 0.1061, + "step": 7775 + }, + { + "epoch": 5.24, + "grad_norm": 3.391132354736328, + "learning_rate": 9.26673366834171e-06, + "loss": 0.1114, + "step": 7800 + }, + { + "epoch": 5.26, + "grad_norm": 3.2813234329223633, + "learning_rate": 9.264221105527638e-06, + "loss": 0.1057, + "step": 7825 + }, + { + "epoch": 5.27, + "grad_norm": 3.506495237350464, + "learning_rate": 9.261708542713569e-06, + "loss": 0.1096, + "step": 7850 + }, + { + "epoch": 5.29, + "grad_norm": 3.3548014163970947, + "learning_rate": 9.259195979899498e-06, + "loss": 0.1062, + "step": 7875 + }, + { + "epoch": 5.31, + "grad_norm": 3.262648582458496, + "learning_rate": 9.256683417085428e-06, + "loss": 0.1055, + "step": 7900 + }, + { + "epoch": 5.32, + "grad_norm": 3.2068886756896973, + "learning_rate": 9.254170854271357e-06, + "loss": 0.1051, + "step": 7925 + }, + { + "epoch": 5.34, + "grad_norm": 3.2230381965637207, + "learning_rate": 9.251658291457288e-06, + "loss": 0.1068, + "step": 7950 + }, + { + "epoch": 5.36, + "grad_norm": 3.6877520084381104, + "learning_rate": 9.249145728643217e-06, + "loss": 0.1071, + "step": 7975 + }, + { + "epoch": 5.37, + "grad_norm": 3.3297247886657715, + "learning_rate": 9.246633165829147e-06, + "loss": 0.1071, + "step": 8000 + }, + { + "epoch": 5.37, + "eval_loss": 0.17124322056770325, + "eval_runtime": 508.0847, + "eval_samples_per_second": 2.773, + "eval_steps_per_second": 2.773, + "eval_wer": 29.616049809754408, + "step": 8000 + }, + { + "epoch": 5.39, + "grad_norm": 3.547956705093384, + "learning_rate": 9.244120603015076e-06, + "loss": 0.1068, + "step": 8025 + }, + { + "epoch": 5.41, + "grad_norm": 3.5101535320281982, + "learning_rate": 9.241608040201005e-06, + "loss": 0.1055, + "step": 8050 + }, + { + "epoch": 5.42, + "grad_norm": 3.4105026721954346, + "learning_rate": 9.239095477386936e-06, + "loss": 0.1068, + "step": 8075 + }, + { + "epoch": 5.44, + "grad_norm": 3.110013008117676, + "learning_rate": 9.236582914572864e-06, + "loss": 0.1053, + "step": 8100 + }, + { + "epoch": 5.46, + "grad_norm": 3.2478489875793457, + "learning_rate": 9.234070351758795e-06, + "loss": 0.1079, + "step": 8125 + }, + { + "epoch": 5.47, + "grad_norm": 3.324193239212036, + "learning_rate": 9.231557788944724e-06, + "loss": 0.1069, + "step": 8150 + }, + { + "epoch": 5.49, + "grad_norm": 3.107780694961548, + "learning_rate": 9.229045226130654e-06, + "loss": 0.1113, + "step": 8175 + }, + { + "epoch": 5.51, + "grad_norm": 3.4343416690826416, + "learning_rate": 9.226532663316585e-06, + "loss": 0.1027, + "step": 8200 + }, + { + "epoch": 5.52, + "grad_norm": 3.3404057025909424, + "learning_rate": 9.224020100502514e-06, + "loss": 0.1055, + "step": 8225 + }, + { + "epoch": 5.54, + "grad_norm": 3.924722671508789, + "learning_rate": 9.221507537688443e-06, + "loss": 0.1047, + "step": 8250 + }, + { + "epoch": 5.56, + "grad_norm": 3.3845882415771484, + "learning_rate": 9.218994974874373e-06, + "loss": 0.1025, + "step": 8275 + }, + { + "epoch": 5.57, + "grad_norm": 3.430337905883789, + "learning_rate": 9.216482412060302e-06, + "loss": 0.1058, + "step": 8300 + }, + { + "epoch": 5.59, + "grad_norm": 3.3585777282714844, + "learning_rate": 9.213969849246231e-06, + "loss": 0.1045, + "step": 8325 + }, + { + "epoch": 5.61, + "grad_norm": 3.30911922454834, + "learning_rate": 9.211457286432162e-06, + "loss": 0.1052, + "step": 8350 + }, + { + "epoch": 5.62, + "grad_norm": 3.383086919784546, + "learning_rate": 9.20894472361809e-06, + "loss": 0.107, + "step": 8375 + }, + { + "epoch": 5.64, + "grad_norm": 3.5503170490264893, + "learning_rate": 9.206432160804021e-06, + "loss": 0.1036, + "step": 8400 + }, + { + "epoch": 5.66, + "grad_norm": 3.4624392986297607, + "learning_rate": 9.20391959798995e-06, + "loss": 0.1063, + "step": 8425 + }, + { + "epoch": 5.67, + "grad_norm": 3.284414052963257, + "learning_rate": 9.20140703517588e-06, + "loss": 0.1069, + "step": 8450 + }, + { + "epoch": 5.69, + "grad_norm": 3.623403787612915, + "learning_rate": 9.19889447236181e-06, + "loss": 0.111, + "step": 8475 + }, + { + "epoch": 5.71, + "grad_norm": 3.654534339904785, + "learning_rate": 9.19638190954774e-06, + "loss": 0.1061, + "step": 8500 + }, + { + "epoch": 5.73, + "grad_norm": 3.876286745071411, + "learning_rate": 9.19386934673367e-06, + "loss": 0.1085, + "step": 8525 + }, + { + "epoch": 5.74, + "grad_norm": 3.7261335849761963, + "learning_rate": 9.191356783919599e-06, + "loss": 0.1071, + "step": 8550 + }, + { + "epoch": 5.76, + "grad_norm": 3.63120174407959, + "learning_rate": 9.188844221105528e-06, + "loss": 0.1055, + "step": 8575 + }, + { + "epoch": 5.78, + "grad_norm": 3.371018648147583, + "learning_rate": 9.186331658291459e-06, + "loss": 0.1069, + "step": 8600 + }, + { + "epoch": 5.79, + "grad_norm": 2.954195261001587, + "learning_rate": 9.183819095477388e-06, + "loss": 0.1022, + "step": 8625 + }, + { + "epoch": 5.81, + "grad_norm": 3.309741497039795, + "learning_rate": 9.181306532663317e-06, + "loss": 0.1075, + "step": 8650 + }, + { + "epoch": 5.83, + "grad_norm": 3.3323915004730225, + "learning_rate": 9.178793969849247e-06, + "loss": 0.1081, + "step": 8675 + }, + { + "epoch": 5.84, + "grad_norm": 2.7809927463531494, + "learning_rate": 9.176281407035176e-06, + "loss": 0.1081, + "step": 8700 + }, + { + "epoch": 5.86, + "grad_norm": 3.5699234008789062, + "learning_rate": 9.173768844221105e-06, + "loss": 0.1066, + "step": 8725 + }, + { + "epoch": 5.88, + "grad_norm": 3.6759111881256104, + "learning_rate": 9.171256281407036e-06, + "loss": 0.1058, + "step": 8750 + }, + { + "epoch": 5.89, + "grad_norm": 3.1996591091156006, + "learning_rate": 9.168743718592966e-06, + "loss": 0.1033, + "step": 8775 + }, + { + "epoch": 5.91, + "grad_norm": 3.4699742794036865, + "learning_rate": 9.166231155778895e-06, + "loss": 0.1047, + "step": 8800 + }, + { + "epoch": 5.93, + "grad_norm": 3.0533335208892822, + "learning_rate": 9.163718592964826e-06, + "loss": 0.1044, + "step": 8825 + }, + { + "epoch": 5.94, + "grad_norm": 3.2533061504364014, + "learning_rate": 9.161206030150754e-06, + "loss": 0.1068, + "step": 8850 + }, + { + "epoch": 5.96, + "grad_norm": 3.4915833473205566, + "learning_rate": 9.158693467336685e-06, + "loss": 0.1039, + "step": 8875 + }, + { + "epoch": 5.98, + "grad_norm": 3.404642343521118, + "learning_rate": 9.156180904522614e-06, + "loss": 0.1049, + "step": 8900 + }, + { + "epoch": 5.99, + "grad_norm": 3.2542970180511475, + "learning_rate": 9.153668341708543e-06, + "loss": 0.1027, + "step": 8925 + }, + { + "epoch": 6.01, + "grad_norm": 3.248486042022705, + "learning_rate": 9.151155778894473e-06, + "loss": 0.0978, + "step": 8950 + }, + { + "epoch": 6.03, + "grad_norm": 3.3420162200927734, + "learning_rate": 9.148643216080402e-06, + "loss": 0.0862, + "step": 8975 + }, + { + "epoch": 6.04, + "grad_norm": 3.206451654434204, + "learning_rate": 9.146130653266331e-06, + "loss": 0.0933, + "step": 9000 + }, + { + "epoch": 6.04, + "eval_loss": 0.17191056907176971, + "eval_runtime": 508.9161, + "eval_samples_per_second": 2.769, + "eval_steps_per_second": 2.769, + "eval_wer": 28.765133171912833, + "step": 9000 + }, + { + "epoch": 6.06, + "grad_norm": 3.2574267387390137, + "learning_rate": 9.143618090452262e-06, + "loss": 0.0891, + "step": 9025 + }, + { + "epoch": 6.08, + "grad_norm": 3.8183400630950928, + "learning_rate": 9.141105527638192e-06, + "loss": 0.0907, + "step": 9050 + }, + { + "epoch": 6.09, + "grad_norm": 3.0673763751983643, + "learning_rate": 9.138592964824121e-06, + "loss": 0.0886, + "step": 9075 + }, + { + "epoch": 6.11, + "grad_norm": 3.0169191360473633, + "learning_rate": 9.136080402010052e-06, + "loss": 0.0877, + "step": 9100 + }, + { + "epoch": 6.13, + "grad_norm": 3.136153221130371, + "learning_rate": 9.13356783919598e-06, + "loss": 0.0921, + "step": 9125 + }, + { + "epoch": 6.15, + "grad_norm": 3.5610499382019043, + "learning_rate": 9.13105527638191e-06, + "loss": 0.0914, + "step": 9150 + }, + { + "epoch": 6.16, + "grad_norm": 3.125319719314575, + "learning_rate": 9.12854271356784e-06, + "loss": 0.0923, + "step": 9175 + }, + { + "epoch": 6.18, + "grad_norm": 3.1054294109344482, + "learning_rate": 9.12603015075377e-06, + "loss": 0.0906, + "step": 9200 + }, + { + "epoch": 6.2, + "grad_norm": 3.4676826000213623, + "learning_rate": 9.1235175879397e-06, + "loss": 0.0943, + "step": 9225 + }, + { + "epoch": 6.21, + "grad_norm": 3.0213968753814697, + "learning_rate": 9.121005025125628e-06, + "loss": 0.0907, + "step": 9250 + }, + { + "epoch": 6.23, + "grad_norm": 3.278038501739502, + "learning_rate": 9.118492462311559e-06, + "loss": 0.0896, + "step": 9275 + }, + { + "epoch": 6.25, + "grad_norm": 3.228107452392578, + "learning_rate": 9.115979899497488e-06, + "loss": 0.0886, + "step": 9300 + }, + { + "epoch": 6.26, + "grad_norm": 3.23997163772583, + "learning_rate": 9.113467336683418e-06, + "loss": 0.0896, + "step": 9325 + }, + { + "epoch": 6.28, + "grad_norm": 2.835906982421875, + "learning_rate": 9.110954773869347e-06, + "loss": 0.0906, + "step": 9350 + }, + { + "epoch": 6.3, + "grad_norm": 3.2943994998931885, + "learning_rate": 9.108442211055278e-06, + "loss": 0.0875, + "step": 9375 + }, + { + "epoch": 6.31, + "grad_norm": 3.3675384521484375, + "learning_rate": 9.105929648241206e-06, + "loss": 0.0929, + "step": 9400 + }, + { + "epoch": 6.33, + "grad_norm": 3.464437246322632, + "learning_rate": 9.103417085427137e-06, + "loss": 0.0895, + "step": 9425 + }, + { + "epoch": 6.35, + "grad_norm": 3.696207284927368, + "learning_rate": 9.100904522613066e-06, + "loss": 0.0924, + "step": 9450 + }, + { + "epoch": 6.36, + "grad_norm": 3.1954405307769775, + "learning_rate": 9.098391959798995e-06, + "loss": 0.0915, + "step": 9475 + }, + { + "epoch": 6.38, + "grad_norm": 3.6132760047912598, + "learning_rate": 9.095879396984926e-06, + "loss": 0.0902, + "step": 9500 + }, + { + "epoch": 6.4, + "grad_norm": 3.4867594242095947, + "learning_rate": 9.093366834170854e-06, + "loss": 0.0881, + "step": 9525 + }, + { + "epoch": 6.41, + "grad_norm": 3.544745683670044, + "learning_rate": 9.090854271356785e-06, + "loss": 0.094, + "step": 9550 + }, + { + "epoch": 6.43, + "grad_norm": 3.3103139400482178, + "learning_rate": 9.088341708542714e-06, + "loss": 0.0904, + "step": 9575 + }, + { + "epoch": 6.45, + "grad_norm": 3.3092243671417236, + "learning_rate": 9.085829145728644e-06, + "loss": 0.0895, + "step": 9600 + }, + { + "epoch": 6.46, + "grad_norm": 3.100635528564453, + "learning_rate": 9.083316582914573e-06, + "loss": 0.0901, + "step": 9625 + }, + { + "epoch": 6.48, + "grad_norm": 3.3440194129943848, + "learning_rate": 9.080804020100504e-06, + "loss": 0.0908, + "step": 9650 + }, + { + "epoch": 6.5, + "grad_norm": 3.386597156524658, + "learning_rate": 9.078291457286433e-06, + "loss": 0.0928, + "step": 9675 + }, + { + "epoch": 6.51, + "grad_norm": 3.621265172958374, + "learning_rate": 9.075778894472363e-06, + "loss": 0.0907, + "step": 9700 + }, + { + "epoch": 6.53, + "grad_norm": 3.9119040966033936, + "learning_rate": 9.073266331658292e-06, + "loss": 0.0894, + "step": 9725 + }, + { + "epoch": 6.55, + "grad_norm": 3.462559223175049, + "learning_rate": 9.070753768844221e-06, + "loss": 0.0891, + "step": 9750 + }, + { + "epoch": 6.56, + "grad_norm": 3.5488765239715576, + "learning_rate": 9.068241206030152e-06, + "loss": 0.0896, + "step": 9775 + }, + { + "epoch": 6.58, + "grad_norm": 3.224942922592163, + "learning_rate": 9.06572864321608e-06, + "loss": 0.0891, + "step": 9800 + }, + { + "epoch": 6.6, + "grad_norm": 3.1307756900787354, + "learning_rate": 9.063216080402011e-06, + "loss": 0.0893, + "step": 9825 + }, + { + "epoch": 6.62, + "grad_norm": 3.251023292541504, + "learning_rate": 9.06070351758794e-06, + "loss": 0.0903, + "step": 9850 + }, + { + "epoch": 6.63, + "grad_norm": 3.177859306335449, + "learning_rate": 9.05819095477387e-06, + "loss": 0.0956, + "step": 9875 + }, + { + "epoch": 6.65, + "grad_norm": 3.1978774070739746, + "learning_rate": 9.0556783919598e-06, + "loss": 0.0911, + "step": 9900 + }, + { + "epoch": 6.67, + "grad_norm": 2.7492542266845703, + "learning_rate": 9.05316582914573e-06, + "loss": 0.0873, + "step": 9925 + }, + { + "epoch": 6.68, + "grad_norm": 3.136077880859375, + "learning_rate": 9.05065326633166e-06, + "loss": 0.0906, + "step": 9950 + }, + { + "epoch": 6.7, + "grad_norm": 3.527278184890747, + "learning_rate": 9.048140703517589e-06, + "loss": 0.092, + "step": 9975 + }, + { + "epoch": 6.72, + "grad_norm": 3.2761435508728027, + "learning_rate": 9.045628140703518e-06, + "loss": 0.0908, + "step": 10000 + }, + { + "epoch": 6.72, + "eval_loss": 0.17029492557048798, + "eval_runtime": 510.4229, + "eval_samples_per_second": 2.76, + "eval_steps_per_second": 2.76, + "eval_wer": 28.12175717744725, + "step": 10000 + }, + { + "epoch": 6.73, + "grad_norm": 3.230163812637329, + "learning_rate": 9.043115577889447e-06, + "loss": 0.0899, + "step": 10025 + }, + { + "epoch": 6.75, + "grad_norm": 3.5742313861846924, + "learning_rate": 9.040603015075378e-06, + "loss": 0.0927, + "step": 10050 + }, + { + "epoch": 6.77, + "grad_norm": 3.380689859390259, + "learning_rate": 9.03819095477387e-06, + "loss": 0.0887, + "step": 10075 + }, + { + "epoch": 6.78, + "grad_norm": 3.4047091007232666, + "learning_rate": 9.0356783919598e-06, + "loss": 0.088, + "step": 10100 + }, + { + "epoch": 6.8, + "grad_norm": 3.3065056800842285, + "learning_rate": 9.033165829145728e-06, + "loss": 0.0892, + "step": 10125 + }, + { + "epoch": 6.82, + "grad_norm": 3.664566993713379, + "learning_rate": 9.03065326633166e-06, + "loss": 0.0904, + "step": 10150 + }, + { + "epoch": 6.83, + "grad_norm": 3.5351150035858154, + "learning_rate": 9.028140703517589e-06, + "loss": 0.0905, + "step": 10175 + }, + { + "epoch": 6.85, + "grad_norm": 3.6764650344848633, + "learning_rate": 9.025628140703518e-06, + "loss": 0.0902, + "step": 10200 + }, + { + "epoch": 6.87, + "grad_norm": 3.3056864738464355, + "learning_rate": 9.023115577889447e-06, + "loss": 0.0866, + "step": 10225 + }, + { + "epoch": 6.88, + "grad_norm": 2.84043025970459, + "learning_rate": 9.020603015075378e-06, + "loss": 0.093, + "step": 10250 + }, + { + "epoch": 6.9, + "grad_norm": 3.714857578277588, + "learning_rate": 9.018090452261308e-06, + "loss": 0.0875, + "step": 10275 + }, + { + "epoch": 6.92, + "grad_norm": 3.1573572158813477, + "learning_rate": 9.015577889447237e-06, + "loss": 0.0895, + "step": 10300 + }, + { + "epoch": 6.93, + "grad_norm": 3.662036180496216, + "learning_rate": 9.013065326633166e-06, + "loss": 0.0907, + "step": 10325 + }, + { + "epoch": 6.95, + "grad_norm": 3.429009199142456, + "learning_rate": 9.010552763819096e-06, + "loss": 0.0865, + "step": 10350 + }, + { + "epoch": 6.97, + "grad_norm": 3.921877861022949, + "learning_rate": 9.008040201005027e-06, + "loss": 0.092, + "step": 10375 + }, + { + "epoch": 6.98, + "grad_norm": 3.3963305950164795, + "learning_rate": 9.005527638190954e-06, + "loss": 0.0887, + "step": 10400 + }, + { + "epoch": 7.0, + "grad_norm": 2.715726613998413, + "learning_rate": 9.003015075376885e-06, + "loss": 0.0848, + "step": 10425 + }, + { + "epoch": 7.02, + "grad_norm": 2.7307467460632324, + "learning_rate": 9.000502512562815e-06, + "loss": 0.0739, + "step": 10450 + }, + { + "epoch": 7.03, + "grad_norm": 2.9492616653442383, + "learning_rate": 8.997989949748744e-06, + "loss": 0.0746, + "step": 10475 + }, + { + "epoch": 7.05, + "grad_norm": 2.7080578804016113, + "learning_rate": 8.995477386934675e-06, + "loss": 0.0758, + "step": 10500 + }, + { + "epoch": 7.07, + "grad_norm": 3.0745952129364014, + "learning_rate": 8.992964824120604e-06, + "loss": 0.0754, + "step": 10525 + }, + { + "epoch": 7.09, + "grad_norm": 2.8382179737091064, + "learning_rate": 8.990452261306534e-06, + "loss": 0.0748, + "step": 10550 + }, + { + "epoch": 7.1, + "grad_norm": 3.3266615867614746, + "learning_rate": 8.987939698492463e-06, + "loss": 0.0768, + "step": 10575 + }, + { + "epoch": 7.12, + "grad_norm": 3.281311273574829, + "learning_rate": 8.985427135678392e-06, + "loss": 0.0715, + "step": 10600 + }, + { + "epoch": 7.14, + "grad_norm": 2.937974452972412, + "learning_rate": 8.982914572864322e-06, + "loss": 0.073, + "step": 10625 + }, + { + "epoch": 7.15, + "grad_norm": 3.3671152591705322, + "learning_rate": 8.980402010050253e-06, + "loss": 0.0759, + "step": 10650 + }, + { + "epoch": 7.17, + "grad_norm": 3.638820171356201, + "learning_rate": 8.977889447236182e-06, + "loss": 0.0741, + "step": 10675 + }, + { + "epoch": 7.19, + "grad_norm": 2.8787953853607178, + "learning_rate": 8.975376884422111e-06, + "loss": 0.0748, + "step": 10700 + }, + { + "epoch": 7.2, + "grad_norm": 3.1332554817199707, + "learning_rate": 8.97286432160804e-06, + "loss": 0.0764, + "step": 10725 + }, + { + "epoch": 7.22, + "grad_norm": 3.090695858001709, + "learning_rate": 8.97035175879397e-06, + "loss": 0.0785, + "step": 10750 + }, + { + "epoch": 7.24, + "grad_norm": 3.604449510574341, + "learning_rate": 8.967839195979901e-06, + "loss": 0.077, + "step": 10775 + }, + { + "epoch": 7.25, + "grad_norm": 3.2391767501831055, + "learning_rate": 8.96532663316583e-06, + "loss": 0.0756, + "step": 10800 + }, + { + "epoch": 7.27, + "grad_norm": 3.2017974853515625, + "learning_rate": 8.96281407035176e-06, + "loss": 0.0752, + "step": 10825 + }, + { + "epoch": 7.29, + "grad_norm": 3.580592155456543, + "learning_rate": 8.960301507537689e-06, + "loss": 0.0767, + "step": 10850 + }, + { + "epoch": 7.3, + "grad_norm": 3.331475019454956, + "learning_rate": 8.957788944723618e-06, + "loss": 0.0761, + "step": 10875 + }, + { + "epoch": 7.32, + "grad_norm": 2.882138967514038, + "learning_rate": 8.95527638190955e-06, + "loss": 0.0738, + "step": 10900 + }, + { + "epoch": 7.34, + "grad_norm": 3.2594470977783203, + "learning_rate": 8.952763819095479e-06, + "loss": 0.0775, + "step": 10925 + }, + { + "epoch": 7.35, + "grad_norm": 3.5899250507354736, + "learning_rate": 8.950251256281408e-06, + "loss": 0.0738, + "step": 10950 + }, + { + "epoch": 7.37, + "grad_norm": 3.113407611846924, + "learning_rate": 8.947738693467337e-06, + "loss": 0.0799, + "step": 10975 + }, + { + "epoch": 7.39, + "grad_norm": 3.1238062381744385, + "learning_rate": 8.945226130653267e-06, + "loss": 0.0764, + "step": 11000 + }, + { + "epoch": 7.39, + "eval_loss": 0.17724904417991638, + "eval_runtime": 511.738, + "eval_samples_per_second": 2.753, + "eval_steps_per_second": 2.753, + "eval_wer": 29.06260809408509, + "step": 11000 + }, + { + "epoch": 7.4, + "grad_norm": 3.3441638946533203, + "learning_rate": 8.942713567839196e-06, + "loss": 0.0754, + "step": 11025 + }, + { + "epoch": 7.42, + "grad_norm": 3.426607131958008, + "learning_rate": 8.940201005025127e-06, + "loss": 0.0754, + "step": 11050 + }, + { + "epoch": 7.44, + "grad_norm": 3.2632927894592285, + "learning_rate": 8.937688442211056e-06, + "loss": 0.0758, + "step": 11075 + }, + { + "epoch": 7.45, + "grad_norm": 2.913177490234375, + "learning_rate": 8.935175879396986e-06, + "loss": 0.0768, + "step": 11100 + }, + { + "epoch": 7.47, + "grad_norm": 3.321467161178589, + "learning_rate": 8.932663316582915e-06, + "loss": 0.0814, + "step": 11125 + }, + { + "epoch": 7.49, + "grad_norm": 3.190857172012329, + "learning_rate": 8.930150753768844e-06, + "loss": 0.0766, + "step": 11150 + }, + { + "epoch": 7.51, + "grad_norm": 3.290775775909424, + "learning_rate": 8.927638190954775e-06, + "loss": 0.0766, + "step": 11175 + }, + { + "epoch": 7.52, + "grad_norm": 3.9134840965270996, + "learning_rate": 8.925125628140705e-06, + "loss": 0.0778, + "step": 11200 + }, + { + "epoch": 7.54, + "grad_norm": 3.0009334087371826, + "learning_rate": 8.922613065326634e-06, + "loss": 0.0747, + "step": 11225 + }, + { + "epoch": 7.56, + "grad_norm": 3.578972816467285, + "learning_rate": 8.920100502512563e-06, + "loss": 0.0794, + "step": 11250 + }, + { + "epoch": 7.57, + "grad_norm": 2.6400928497314453, + "learning_rate": 8.917587939698493e-06, + "loss": 0.0762, + "step": 11275 + }, + { + "epoch": 7.59, + "grad_norm": 3.749530076980591, + "learning_rate": 8.915075376884424e-06, + "loss": 0.0787, + "step": 11300 + }, + { + "epoch": 7.61, + "grad_norm": 3.4759154319763184, + "learning_rate": 8.912562814070353e-06, + "loss": 0.0759, + "step": 11325 + }, + { + "epoch": 7.62, + "grad_norm": 3.50630521774292, + "learning_rate": 8.910050251256282e-06, + "loss": 0.0733, + "step": 11350 + }, + { + "epoch": 7.64, + "grad_norm": 3.326730728149414, + "learning_rate": 8.907537688442212e-06, + "loss": 0.0754, + "step": 11375 + }, + { + "epoch": 7.66, + "grad_norm": 3.661982774734497, + "learning_rate": 8.905025125628143e-06, + "loss": 0.0794, + "step": 11400 + }, + { + "epoch": 7.67, + "grad_norm": 3.375459909439087, + "learning_rate": 8.90251256281407e-06, + "loss": 0.0767, + "step": 11425 + }, + { + "epoch": 7.69, + "grad_norm": 3.2290966510772705, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0761, + "step": 11450 + }, + { + "epoch": 7.71, + "grad_norm": 3.0712223052978516, + "learning_rate": 8.89748743718593e-06, + "loss": 0.0766, + "step": 11475 + }, + { + "epoch": 7.72, + "grad_norm": 3.504812717437744, + "learning_rate": 8.89497487437186e-06, + "loss": 0.0773, + "step": 11500 + }, + { + "epoch": 7.74, + "grad_norm": 3.4784443378448486, + "learning_rate": 8.892462311557791e-06, + "loss": 0.0796, + "step": 11525 + }, + { + "epoch": 7.76, + "grad_norm": 3.2101492881774902, + "learning_rate": 8.889949748743718e-06, + "loss": 0.0765, + "step": 11550 + }, + { + "epoch": 7.77, + "grad_norm": 3.276008367538452, + "learning_rate": 8.88743718592965e-06, + "loss": 0.0757, + "step": 11575 + }, + { + "epoch": 7.79, + "grad_norm": 3.388629198074341, + "learning_rate": 8.884924623115579e-06, + "loss": 0.078, + "step": 11600 + }, + { + "epoch": 7.81, + "grad_norm": 3.0041682720184326, + "learning_rate": 8.882412060301508e-06, + "loss": 0.0737, + "step": 11625 + }, + { + "epoch": 7.82, + "grad_norm": 3.183945894241333, + "learning_rate": 8.879899497487437e-06, + "loss": 0.0766, + "step": 11650 + }, + { + "epoch": 7.84, + "grad_norm": 2.99658465385437, + "learning_rate": 8.877386934673368e-06, + "loss": 0.0757, + "step": 11675 + }, + { + "epoch": 7.86, + "grad_norm": 3.5441689491271973, + "learning_rate": 8.874874371859296e-06, + "loss": 0.077, + "step": 11700 + }, + { + "epoch": 7.87, + "grad_norm": 3.34417986869812, + "learning_rate": 8.872361809045227e-06, + "loss": 0.0761, + "step": 11725 + }, + { + "epoch": 7.89, + "grad_norm": 2.9885261058807373, + "learning_rate": 8.869849246231156e-06, + "loss": 0.0777, + "step": 11750 + }, + { + "epoch": 7.91, + "grad_norm": 3.308939218521118, + "learning_rate": 8.867336683417086e-06, + "loss": 0.0745, + "step": 11775 + }, + { + "epoch": 7.92, + "grad_norm": 2.95430064201355, + "learning_rate": 8.864824120603017e-06, + "loss": 0.0773, + "step": 11800 + }, + { + "epoch": 7.94, + "grad_norm": 3.582934617996216, + "learning_rate": 8.862311557788944e-06, + "loss": 0.0778, + "step": 11825 + }, + { + "epoch": 7.96, + "grad_norm": 3.065767765045166, + "learning_rate": 8.859798994974875e-06, + "loss": 0.0781, + "step": 11850 + }, + { + "epoch": 7.98, + "grad_norm": 3.6940627098083496, + "learning_rate": 8.857286432160805e-06, + "loss": 0.0768, + "step": 11875 + }, + { + "epoch": 7.99, + "grad_norm": 3.6962196826934814, + "learning_rate": 8.854773869346734e-06, + "loss": 0.0757, + "step": 11900 + }, + { + "epoch": 8.01, + "grad_norm": 2.8643910884857178, + "learning_rate": 8.852261306532665e-06, + "loss": 0.0682, + "step": 11925 + }, + { + "epoch": 8.03, + "grad_norm": 2.79264235496521, + "learning_rate": 8.849748743718594e-06, + "loss": 0.0602, + "step": 11950 + }, + { + "epoch": 8.04, + "grad_norm": 3.17783260345459, + "learning_rate": 8.847236180904524e-06, + "loss": 0.0648, + "step": 11975 + }, + { + "epoch": 8.06, + "grad_norm": 3.4043726921081543, + "learning_rate": 8.844723618090453e-06, + "loss": 0.0621, + "step": 12000 + }, + { + "epoch": 8.06, + "eval_loss": 0.18032993376255035, + "eval_runtime": 510.4059, + "eval_samples_per_second": 2.761, + "eval_steps_per_second": 2.761, + "eval_wer": 28.785887236250435, + "step": 12000 + }, + { + "epoch": 8.08, + "grad_norm": 3.1424877643585205, + "learning_rate": 8.842211055276382e-06, + "loss": 0.0625, + "step": 12025 + }, + { + "epoch": 8.09, + "grad_norm": 2.9311633110046387, + "learning_rate": 8.839698492462312e-06, + "loss": 0.0626, + "step": 12050 + }, + { + "epoch": 8.11, + "grad_norm": 3.3612778186798096, + "learning_rate": 8.837185929648243e-06, + "loss": 0.0602, + "step": 12075 + }, + { + "epoch": 8.13, + "grad_norm": 3.378889799118042, + "learning_rate": 8.83467336683417e-06, + "loss": 0.0621, + "step": 12100 + }, + { + "epoch": 8.14, + "grad_norm": 3.024562358856201, + "learning_rate": 8.832160804020101e-06, + "loss": 0.0631, + "step": 12125 + }, + { + "epoch": 8.16, + "grad_norm": 3.3630809783935547, + "learning_rate": 8.82964824120603e-06, + "loss": 0.0586, + "step": 12150 + }, + { + "epoch": 8.18, + "grad_norm": 2.9870553016662598, + "learning_rate": 8.82713567839196e-06, + "loss": 0.0637, + "step": 12175 + }, + { + "epoch": 8.19, + "grad_norm": 3.198687791824341, + "learning_rate": 8.824623115577891e-06, + "loss": 0.0639, + "step": 12200 + }, + { + "epoch": 8.21, + "grad_norm": 2.8470616340637207, + "learning_rate": 8.82211055276382e-06, + "loss": 0.0628, + "step": 12225 + }, + { + "epoch": 8.23, + "grad_norm": 3.0749526023864746, + "learning_rate": 8.81959798994975e-06, + "loss": 0.063, + "step": 12250 + }, + { + "epoch": 8.24, + "grad_norm": 3.1890945434570312, + "learning_rate": 8.817085427135679e-06, + "loss": 0.0596, + "step": 12275 + }, + { + "epoch": 8.26, + "grad_norm": 3.2999472618103027, + "learning_rate": 8.814572864321608e-06, + "loss": 0.0625, + "step": 12300 + }, + { + "epoch": 8.28, + "grad_norm": 2.686805009841919, + "learning_rate": 8.812060301507538e-06, + "loss": 0.0638, + "step": 12325 + }, + { + "epoch": 8.29, + "grad_norm": 2.7559542655944824, + "learning_rate": 8.809547738693469e-06, + "loss": 0.0601, + "step": 12350 + }, + { + "epoch": 8.31, + "grad_norm": 3.118020534515381, + "learning_rate": 8.807035175879398e-06, + "loss": 0.0626, + "step": 12375 + }, + { + "epoch": 8.33, + "grad_norm": 3.2046866416931152, + "learning_rate": 8.804522613065327e-06, + "loss": 0.0626, + "step": 12400 + }, + { + "epoch": 8.34, + "grad_norm": 3.025376796722412, + "learning_rate": 8.802010050251257e-06, + "loss": 0.063, + "step": 12425 + }, + { + "epoch": 8.36, + "grad_norm": 2.8445382118225098, + "learning_rate": 8.799497487437186e-06, + "loss": 0.0642, + "step": 12450 + }, + { + "epoch": 8.38, + "grad_norm": 3.525177240371704, + "learning_rate": 8.796984924623117e-06, + "loss": 0.0644, + "step": 12475 + }, + { + "epoch": 8.39, + "grad_norm": 3.397125244140625, + "learning_rate": 8.794472361809046e-06, + "loss": 0.0607, + "step": 12500 + }, + { + "epoch": 8.41, + "grad_norm": 2.8300137519836426, + "learning_rate": 8.791959798994976e-06, + "loss": 0.065, + "step": 12525 + }, + { + "epoch": 8.43, + "grad_norm": 3.177786111831665, + "learning_rate": 8.789447236180905e-06, + "loss": 0.0609, + "step": 12550 + }, + { + "epoch": 8.45, + "grad_norm": 3.142648458480835, + "learning_rate": 8.786934673366834e-06, + "loss": 0.0629, + "step": 12575 + }, + { + "epoch": 8.46, + "grad_norm": 2.9772539138793945, + "learning_rate": 8.784422110552765e-06, + "loss": 0.0647, + "step": 12600 + }, + { + "epoch": 8.48, + "grad_norm": 3.7154743671417236, + "learning_rate": 8.781909547738695e-06, + "loss": 0.0649, + "step": 12625 + }, + { + "epoch": 8.5, + "grad_norm": 3.272456645965576, + "learning_rate": 8.779396984924624e-06, + "loss": 0.0653, + "step": 12650 + }, + { + "epoch": 8.51, + "grad_norm": 2.978908061981201, + "learning_rate": 8.776884422110553e-06, + "loss": 0.0631, + "step": 12675 + }, + { + "epoch": 8.53, + "grad_norm": 2.990621566772461, + "learning_rate": 8.774371859296483e-06, + "loss": 0.0637, + "step": 12700 + }, + { + "epoch": 8.55, + "grad_norm": 2.6106889247894287, + "learning_rate": 8.771859296482412e-06, + "loss": 0.064, + "step": 12725 + }, + { + "epoch": 8.56, + "grad_norm": 2.959925413131714, + "learning_rate": 8.769346733668343e-06, + "loss": 0.0616, + "step": 12750 + }, + { + "epoch": 8.58, + "grad_norm": 3.4228014945983887, + "learning_rate": 8.766834170854272e-06, + "loss": 0.0637, + "step": 12775 + }, + { + "epoch": 8.6, + "grad_norm": 3.4024524688720703, + "learning_rate": 8.764321608040202e-06, + "loss": 0.0641, + "step": 12800 + }, + { + "epoch": 8.61, + "grad_norm": 3.3007500171661377, + "learning_rate": 8.761809045226131e-06, + "loss": 0.064, + "step": 12825 + }, + { + "epoch": 8.63, + "grad_norm": 3.561375379562378, + "learning_rate": 8.75929648241206e-06, + "loss": 0.0689, + "step": 12850 + }, + { + "epoch": 8.65, + "grad_norm": 2.9627270698547363, + "learning_rate": 8.756783919597991e-06, + "loss": 0.0639, + "step": 12875 + }, + { + "epoch": 8.66, + "grad_norm": 3.5544731616973877, + "learning_rate": 8.75427135678392e-06, + "loss": 0.0652, + "step": 12900 + }, + { + "epoch": 8.68, + "grad_norm": 3.6626393795013428, + "learning_rate": 8.75175879396985e-06, + "loss": 0.0684, + "step": 12925 + }, + { + "epoch": 8.7, + "grad_norm": 3.2799363136291504, + "learning_rate": 8.74924623115578e-06, + "loss": 0.0634, + "step": 12950 + }, + { + "epoch": 8.71, + "grad_norm": 3.6406378746032715, + "learning_rate": 8.746733668341709e-06, + "loss": 0.0653, + "step": 12975 + }, + { + "epoch": 8.73, + "grad_norm": 3.4915060997009277, + "learning_rate": 8.74422110552764e-06, + "loss": 0.0658, + "step": 13000 + }, + { + "epoch": 8.73, + "eval_loss": 0.18315483629703522, + "eval_runtime": 509.7702, + "eval_samples_per_second": 2.764, + "eval_steps_per_second": 2.764, + "eval_wer": 29.91352473192667, + "step": 13000 + }, + { + "epoch": 8.75, + "grad_norm": 2.7121729850769043, + "learning_rate": 8.741809045226131e-06, + "loss": 0.0639, + "step": 13025 + }, + { + "epoch": 8.76, + "grad_norm": 3.2380919456481934, + "learning_rate": 8.73929648241206e-06, + "loss": 0.0641, + "step": 13050 + }, + { + "epoch": 8.78, + "grad_norm": 3.229346990585327, + "learning_rate": 8.736783919597991e-06, + "loss": 0.0691, + "step": 13075 + }, + { + "epoch": 8.8, + "grad_norm": 2.9300754070281982, + "learning_rate": 8.734271356783919e-06, + "loss": 0.0672, + "step": 13100 + }, + { + "epoch": 8.81, + "grad_norm": 3.952948570251465, + "learning_rate": 8.73175879396985e-06, + "loss": 0.0637, + "step": 13125 + }, + { + "epoch": 8.83, + "grad_norm": 3.3095152378082275, + "learning_rate": 8.72924623115578e-06, + "loss": 0.0651, + "step": 13150 + }, + { + "epoch": 8.85, + "grad_norm": 3.537675142288208, + "learning_rate": 8.726733668341709e-06, + "loss": 0.0607, + "step": 13175 + }, + { + "epoch": 8.87, + "grad_norm": 3.9727022647857666, + "learning_rate": 8.72422110552764e-06, + "loss": 0.0665, + "step": 13200 + }, + { + "epoch": 8.88, + "grad_norm": 3.0853493213653564, + "learning_rate": 8.721708542713569e-06, + "loss": 0.0633, + "step": 13225 + }, + { + "epoch": 8.9, + "grad_norm": 3.1359477043151855, + "learning_rate": 8.719195979899498e-06, + "loss": 0.0645, + "step": 13250 + }, + { + "epoch": 8.92, + "grad_norm": 3.4040563106536865, + "learning_rate": 8.716683417085428e-06, + "loss": 0.0651, + "step": 13275 + }, + { + "epoch": 8.93, + "grad_norm": 3.2836079597473145, + "learning_rate": 8.714170854271357e-06, + "loss": 0.0623, + "step": 13300 + }, + { + "epoch": 8.95, + "grad_norm": 3.7122316360473633, + "learning_rate": 8.711658291457286e-06, + "loss": 0.0642, + "step": 13325 + }, + { + "epoch": 8.97, + "grad_norm": 2.9786834716796875, + "learning_rate": 8.709145728643217e-06, + "loss": 0.0661, + "step": 13350 + }, + { + "epoch": 8.98, + "grad_norm": 3.3601863384246826, + "learning_rate": 8.706633165829147e-06, + "loss": 0.0651, + "step": 13375 + }, + { + "epoch": 9.0, + "grad_norm": 3.556716203689575, + "learning_rate": 8.704120603015076e-06, + "loss": 0.067, + "step": 13400 + }, + { + "epoch": 9.02, + "grad_norm": 2.550269365310669, + "learning_rate": 8.701608040201005e-06, + "loss": 0.0516, + "step": 13425 + }, + { + "epoch": 9.03, + "grad_norm": 2.7437970638275146, + "learning_rate": 8.699095477386935e-06, + "loss": 0.0514, + "step": 13450 + }, + { + "epoch": 9.05, + "grad_norm": 2.7179758548736572, + "learning_rate": 8.696582914572866e-06, + "loss": 0.0488, + "step": 13475 + }, + { + "epoch": 9.07, + "grad_norm": 2.2934176921844482, + "learning_rate": 8.694070351758795e-06, + "loss": 0.0522, + "step": 13500 + }, + { + "epoch": 9.08, + "grad_norm": 2.7907474040985107, + "learning_rate": 8.691557788944724e-06, + "loss": 0.0518, + "step": 13525 + }, + { + "epoch": 9.1, + "grad_norm": 2.927288770675659, + "learning_rate": 8.689045226130654e-06, + "loss": 0.053, + "step": 13550 + }, + { + "epoch": 9.12, + "grad_norm": 3.4830610752105713, + "learning_rate": 8.686532663316583e-06, + "loss": 0.0526, + "step": 13575 + }, + { + "epoch": 9.13, + "grad_norm": 2.820234537124634, + "learning_rate": 8.684020100502514e-06, + "loss": 0.0516, + "step": 13600 + }, + { + "epoch": 9.15, + "grad_norm": 3.2226998805999756, + "learning_rate": 8.681507537688443e-06, + "loss": 0.0501, + "step": 13625 + }, + { + "epoch": 9.17, + "grad_norm": 3.1339826583862305, + "learning_rate": 8.678994974874373e-06, + "loss": 0.0518, + "step": 13650 + }, + { + "epoch": 9.18, + "grad_norm": 3.39349365234375, + "learning_rate": 8.676482412060302e-06, + "loss": 0.0528, + "step": 13675 + }, + { + "epoch": 9.2, + "grad_norm": 2.9372856616973877, + "learning_rate": 8.673969849246231e-06, + "loss": 0.0556, + "step": 13700 + }, + { + "epoch": 9.22, + "grad_norm": 2.7705941200256348, + "learning_rate": 8.67145728643216e-06, + "loss": 0.0519, + "step": 13725 + }, + { + "epoch": 9.23, + "grad_norm": 2.9308881759643555, + "learning_rate": 8.668944723618092e-06, + "loss": 0.0486, + "step": 13750 + }, + { + "epoch": 9.25, + "grad_norm": 3.1928322315216064, + "learning_rate": 8.666432160804021e-06, + "loss": 0.0501, + "step": 13775 + }, + { + "epoch": 9.27, + "grad_norm": 2.571592092514038, + "learning_rate": 8.66391959798995e-06, + "loss": 0.0499, + "step": 13800 + }, + { + "epoch": 9.28, + "grad_norm": 3.330294132232666, + "learning_rate": 8.661407035175881e-06, + "loss": 0.0518, + "step": 13825 + }, + { + "epoch": 9.3, + "grad_norm": 3.7702674865722656, + "learning_rate": 8.658894472361809e-06, + "loss": 0.0516, + "step": 13850 + }, + { + "epoch": 9.32, + "grad_norm": 3.7358484268188477, + "learning_rate": 8.65638190954774e-06, + "loss": 0.0534, + "step": 13875 + }, + { + "epoch": 9.34, + "grad_norm": 2.7643566131591797, + "learning_rate": 8.65386934673367e-06, + "loss": 0.056, + "step": 13900 + }, + { + "epoch": 9.35, + "grad_norm": 2.894335985183716, + "learning_rate": 8.651356783919599e-06, + "loss": 0.0547, + "step": 13925 + }, + { + "epoch": 9.37, + "grad_norm": 2.96712064743042, + "learning_rate": 8.648844221105528e-06, + "loss": 0.0523, + "step": 13950 + }, + { + "epoch": 9.39, + "grad_norm": 3.296905040740967, + "learning_rate": 8.646331658291457e-06, + "loss": 0.0509, + "step": 13975 + }, + { + "epoch": 9.4, + "grad_norm": 3.1295838356018066, + "learning_rate": 8.643819095477388e-06, + "loss": 0.0518, + "step": 14000 + }, + { + "epoch": 9.4, + "eval_loss": 0.19353525340557098, + "eval_runtime": 510.3065, + "eval_samples_per_second": 2.761, + "eval_steps_per_second": 2.761, + "eval_wer": 29.090280179868554, + "step": 14000 + }, + { + "epoch": 9.42, + "grad_norm": 3.1721503734588623, + "learning_rate": 8.641306532663318e-06, + "loss": 0.0545, + "step": 14025 + }, + { + "epoch": 9.44, + "grad_norm": 3.2175495624542236, + "learning_rate": 8.638793969849247e-06, + "loss": 0.0532, + "step": 14050 + }, + { + "epoch": 9.45, + "grad_norm": 2.9887442588806152, + "learning_rate": 8.636281407035176e-06, + "loss": 0.0538, + "step": 14075 + }, + { + "epoch": 9.47, + "grad_norm": 2.981250047683716, + "learning_rate": 8.633768844221107e-06, + "loss": 0.0544, + "step": 14100 + }, + { + "epoch": 9.49, + "grad_norm": 3.1379947662353516, + "learning_rate": 8.631256281407035e-06, + "loss": 0.053, + "step": 14125 + }, + { + "epoch": 9.5, + "grad_norm": 3.1135265827178955, + "learning_rate": 8.628743718592966e-06, + "loss": 0.0515, + "step": 14150 + }, + { + "epoch": 9.52, + "grad_norm": 2.8551738262176514, + "learning_rate": 8.626231155778895e-06, + "loss": 0.0548, + "step": 14175 + }, + { + "epoch": 9.54, + "grad_norm": 3.264505624771118, + "learning_rate": 8.623718592964825e-06, + "loss": 0.052, + "step": 14200 + }, + { + "epoch": 9.55, + "grad_norm": 3.2367053031921387, + "learning_rate": 8.621206030150756e-06, + "loss": 0.052, + "step": 14225 + }, + { + "epoch": 9.57, + "grad_norm": 3.5439274311065674, + "learning_rate": 8.618693467336683e-06, + "loss": 0.0515, + "step": 14250 + }, + { + "epoch": 9.59, + "grad_norm": 3.412789821624756, + "learning_rate": 8.616180904522614e-06, + "loss": 0.0547, + "step": 14275 + }, + { + "epoch": 9.6, + "grad_norm": 3.6014373302459717, + "learning_rate": 8.613668341708544e-06, + "loss": 0.0533, + "step": 14300 + }, + { + "epoch": 9.62, + "grad_norm": 3.224170684814453, + "learning_rate": 8.611155778894473e-06, + "loss": 0.0524, + "step": 14325 + }, + { + "epoch": 9.64, + "grad_norm": 3.330113172531128, + "learning_rate": 8.608643216080402e-06, + "loss": 0.0537, + "step": 14350 + }, + { + "epoch": 9.65, + "grad_norm": 3.19978666305542, + "learning_rate": 8.606130653266333e-06, + "loss": 0.0524, + "step": 14375 + }, + { + "epoch": 9.67, + "grad_norm": 4.063028812408447, + "learning_rate": 8.60361809045226e-06, + "loss": 0.0539, + "step": 14400 + }, + { + "epoch": 9.69, + "grad_norm": 3.1024320125579834, + "learning_rate": 8.601105527638192e-06, + "loss": 0.0535, + "step": 14425 + }, + { + "epoch": 9.7, + "grad_norm": 3.0538790225982666, + "learning_rate": 8.598592964824121e-06, + "loss": 0.0531, + "step": 14450 + }, + { + "epoch": 9.72, + "grad_norm": 3.585303783416748, + "learning_rate": 8.59608040201005e-06, + "loss": 0.0538, + "step": 14475 + }, + { + "epoch": 9.74, + "grad_norm": 3.149641990661621, + "learning_rate": 8.593567839195981e-06, + "loss": 0.0528, + "step": 14500 + }, + { + "epoch": 9.75, + "grad_norm": 3.390556573867798, + "learning_rate": 8.591055276381909e-06, + "loss": 0.0527, + "step": 14525 + }, + { + "epoch": 9.77, + "grad_norm": 2.823974132537842, + "learning_rate": 8.58854271356784e-06, + "loss": 0.0527, + "step": 14550 + }, + { + "epoch": 9.79, + "grad_norm": 3.134711265563965, + "learning_rate": 8.58603015075377e-06, + "loss": 0.0542, + "step": 14575 + }, + { + "epoch": 9.81, + "grad_norm": 3.110089063644409, + "learning_rate": 8.583517587939699e-06, + "loss": 0.0528, + "step": 14600 + }, + { + "epoch": 9.82, + "grad_norm": 3.1524767875671387, + "learning_rate": 8.58100502512563e-06, + "loss": 0.0517, + "step": 14625 + }, + { + "epoch": 9.84, + "grad_norm": 2.90120530128479, + "learning_rate": 8.578492462311559e-06, + "loss": 0.0515, + "step": 14650 + }, + { + "epoch": 9.86, + "grad_norm": 5.170886993408203, + "learning_rate": 8.575979899497488e-06, + "loss": 0.0549, + "step": 14675 + }, + { + "epoch": 9.87, + "grad_norm": 3.319868326187134, + "learning_rate": 8.573467336683418e-06, + "loss": 0.0531, + "step": 14700 + }, + { + "epoch": 9.89, + "grad_norm": 3.2302260398864746, + "learning_rate": 8.570954773869347e-06, + "loss": 0.0521, + "step": 14725 + }, + { + "epoch": 9.91, + "grad_norm": 2.753457546234131, + "learning_rate": 8.568442211055276e-06, + "loss": 0.052, + "step": 14750 + }, + { + "epoch": 9.92, + "grad_norm": 3.5706660747528076, + "learning_rate": 8.565929648241207e-06, + "loss": 0.0559, + "step": 14775 + }, + { + "epoch": 9.94, + "grad_norm": 3.3832499980926514, + "learning_rate": 8.563417085427135e-06, + "loss": 0.0509, + "step": 14800 + }, + { + "epoch": 9.96, + "grad_norm": 3.235295295715332, + "learning_rate": 8.560904522613066e-06, + "loss": 0.0554, + "step": 14825 + }, + { + "epoch": 9.97, + "grad_norm": 3.2667226791381836, + "learning_rate": 8.558391959798995e-06, + "loss": 0.0522, + "step": 14850 + }, + { + "epoch": 9.99, + "grad_norm": 3.3643698692321777, + "learning_rate": 8.555879396984925e-06, + "loss": 0.057, + "step": 14875 + }, + { + "epoch": 10.01, + "grad_norm": 2.965453863143921, + "learning_rate": 8.553366834170856e-06, + "loss": 0.0476, + "step": 14900 + }, + { + "epoch": 10.02, + "grad_norm": 2.819925546646118, + "learning_rate": 8.550854271356785e-06, + "loss": 0.0401, + "step": 14925 + }, + { + "epoch": 10.04, + "grad_norm": 2.694479465484619, + "learning_rate": 8.548341708542714e-06, + "loss": 0.0416, + "step": 14950 + }, + { + "epoch": 10.06, + "grad_norm": 3.1868343353271484, + "learning_rate": 8.545829145728644e-06, + "loss": 0.0422, + "step": 14975 + }, + { + "epoch": 10.07, + "grad_norm": 2.852847099304199, + "learning_rate": 8.543316582914573e-06, + "loss": 0.0413, + "step": 15000 + }, + { + "epoch": 10.07, + "eval_loss": 0.20033271610736847, + "eval_runtime": 508.9241, + "eval_samples_per_second": 2.769, + "eval_steps_per_second": 2.769, + "eval_wer": 28.97959183673469, + "step": 15000 + }, + { + "epoch": 10.09, + "grad_norm": 2.6403441429138184, + "learning_rate": 8.540804020100502e-06, + "loss": 0.0398, + "step": 15025 + }, + { + "epoch": 10.11, + "grad_norm": 3.0076358318328857, + "learning_rate": 8.538291457286433e-06, + "loss": 0.04, + "step": 15050 + }, + { + "epoch": 10.12, + "grad_norm": 2.6404025554656982, + "learning_rate": 8.535778894472363e-06, + "loss": 0.0421, + "step": 15075 + }, + { + "epoch": 10.14, + "grad_norm": 2.900808572769165, + "learning_rate": 8.533266331658292e-06, + "loss": 0.0413, + "step": 15100 + }, + { + "epoch": 10.16, + "grad_norm": 2.8935415744781494, + "learning_rate": 8.530753768844221e-06, + "loss": 0.041, + "step": 15125 + }, + { + "epoch": 10.17, + "grad_norm": 2.9496779441833496, + "learning_rate": 8.52824120603015e-06, + "loss": 0.0414, + "step": 15150 + }, + { + "epoch": 10.19, + "grad_norm": 3.303175926208496, + "learning_rate": 8.525728643216082e-06, + "loss": 0.0431, + "step": 15175 + }, + { + "epoch": 10.21, + "grad_norm": 2.9223127365112305, + "learning_rate": 8.523216080402011e-06, + "loss": 0.0428, + "step": 15200 + }, + { + "epoch": 10.22, + "grad_norm": 2.385010242462158, + "learning_rate": 8.52070351758794e-06, + "loss": 0.0407, + "step": 15225 + }, + { + "epoch": 10.24, + "grad_norm": 2.7351412773132324, + "learning_rate": 8.518190954773871e-06, + "loss": 0.0408, + "step": 15250 + }, + { + "epoch": 10.26, + "grad_norm": 2.8547720909118652, + "learning_rate": 8.515678391959799e-06, + "loss": 0.044, + "step": 15275 + }, + { + "epoch": 10.28, + "grad_norm": 2.6377203464508057, + "learning_rate": 8.51316582914573e-06, + "loss": 0.0415, + "step": 15300 + }, + { + "epoch": 10.29, + "grad_norm": 2.6137449741363525, + "learning_rate": 8.51065326633166e-06, + "loss": 0.0437, + "step": 15325 + }, + { + "epoch": 10.31, + "grad_norm": 3.0624136924743652, + "learning_rate": 8.508140703517589e-06, + "loss": 0.041, + "step": 15350 + }, + { + "epoch": 10.33, + "grad_norm": 3.235222816467285, + "learning_rate": 8.505628140703518e-06, + "loss": 0.041, + "step": 15375 + }, + { + "epoch": 10.34, + "grad_norm": 3.127394199371338, + "learning_rate": 8.503115577889447e-06, + "loss": 0.0437, + "step": 15400 + }, + { + "epoch": 10.36, + "grad_norm": 2.9560129642486572, + "learning_rate": 8.500603015075377e-06, + "loss": 0.0424, + "step": 15425 + }, + { + "epoch": 10.38, + "grad_norm": 2.8424625396728516, + "learning_rate": 8.498090452261308e-06, + "loss": 0.0417, + "step": 15450 + }, + { + "epoch": 10.39, + "grad_norm": 3.3747715950012207, + "learning_rate": 8.495577889447237e-06, + "loss": 0.0433, + "step": 15475 + }, + { + "epoch": 10.41, + "grad_norm": 3.1818647384643555, + "learning_rate": 8.493065326633166e-06, + "loss": 0.0426, + "step": 15500 + }, + { + "epoch": 10.43, + "grad_norm": 2.781809091567993, + "learning_rate": 8.490552763819097e-06, + "loss": 0.0442, + "step": 15525 + }, + { + "epoch": 10.44, + "grad_norm": 2.716920852661133, + "learning_rate": 8.488040201005025e-06, + "loss": 0.0432, + "step": 15550 + }, + { + "epoch": 10.46, + "grad_norm": 2.764256477355957, + "learning_rate": 8.485527638190956e-06, + "loss": 0.0433, + "step": 15575 + }, + { + "epoch": 10.48, + "grad_norm": 2.6743826866149902, + "learning_rate": 8.483015075376885e-06, + "loss": 0.0437, + "step": 15600 + }, + { + "epoch": 10.49, + "grad_norm": 3.2498996257781982, + "learning_rate": 8.480502512562815e-06, + "loss": 0.0432, + "step": 15625 + }, + { + "epoch": 10.51, + "grad_norm": 3.112048864364624, + "learning_rate": 8.477989949748744e-06, + "loss": 0.0414, + "step": 15650 + }, + { + "epoch": 10.53, + "grad_norm": 2.9476828575134277, + "learning_rate": 8.475477386934673e-06, + "loss": 0.0425, + "step": 15675 + }, + { + "epoch": 10.54, + "grad_norm": 2.9141814708709717, + "learning_rate": 8.472964824120604e-06, + "loss": 0.041, + "step": 15700 + }, + { + "epoch": 10.56, + "grad_norm": 2.969512462615967, + "learning_rate": 8.470452261306534e-06, + "loss": 0.0425, + "step": 15725 + }, + { + "epoch": 10.58, + "grad_norm": 3.2587733268737793, + "learning_rate": 8.467939698492463e-06, + "loss": 0.0439, + "step": 15750 + }, + { + "epoch": 10.59, + "grad_norm": 3.059880256652832, + "learning_rate": 8.465427135678392e-06, + "loss": 0.0448, + "step": 15775 + }, + { + "epoch": 10.61, + "grad_norm": 3.435535430908203, + "learning_rate": 8.462914572864323e-06, + "loss": 0.0471, + "step": 15800 + }, + { + "epoch": 10.63, + "grad_norm": 3.2924277782440186, + "learning_rate": 8.460402010050251e-06, + "loss": 0.0452, + "step": 15825 + }, + { + "epoch": 10.64, + "grad_norm": 3.082156181335449, + "learning_rate": 8.457889447236182e-06, + "loss": 0.0435, + "step": 15850 + }, + { + "epoch": 10.66, + "grad_norm": 2.9143271446228027, + "learning_rate": 8.455376884422111e-06, + "loss": 0.0421, + "step": 15875 + }, + { + "epoch": 10.68, + "grad_norm": 3.1731033325195312, + "learning_rate": 8.45286432160804e-06, + "loss": 0.0432, + "step": 15900 + }, + { + "epoch": 10.7, + "grad_norm": 3.1878859996795654, + "learning_rate": 8.450351758793972e-06, + "loss": 0.0435, + "step": 15925 + }, + { + "epoch": 10.71, + "grad_norm": 3.052755355834961, + "learning_rate": 8.4478391959799e-06, + "loss": 0.0438, + "step": 15950 + }, + { + "epoch": 10.73, + "grad_norm": 3.448249340057373, + "learning_rate": 8.44532663316583e-06, + "loss": 0.0432, + "step": 15975 + }, + { + "epoch": 10.75, + "grad_norm": 3.0805633068084717, + "learning_rate": 8.44281407035176e-06, + "loss": 0.0456, + "step": 16000 + }, + { + "epoch": 10.75, + "eval_loss": 0.20624487102031708, + "eval_runtime": 506.9324, + "eval_samples_per_second": 2.779, + "eval_steps_per_second": 2.779, + "eval_wer": 29.67139398132134, + "step": 16000 + }, + { + "epoch": 10.76, + "grad_norm": 2.8858835697174072, + "learning_rate": 8.440301507537689e-06, + "loss": 0.0444, + "step": 16025 + }, + { + "epoch": 10.78, + "grad_norm": 2.7300102710723877, + "learning_rate": 8.437788944723618e-06, + "loss": 0.0449, + "step": 16050 + }, + { + "epoch": 10.8, + "grad_norm": 3.4515113830566406, + "learning_rate": 8.43527638190955e-06, + "loss": 0.0441, + "step": 16075 + }, + { + "epoch": 10.81, + "grad_norm": 2.9812541007995605, + "learning_rate": 8.432763819095479e-06, + "loss": 0.0443, + "step": 16100 + }, + { + "epoch": 10.83, + "grad_norm": 3.2973012924194336, + "learning_rate": 8.430251256281408e-06, + "loss": 0.0454, + "step": 16125 + }, + { + "epoch": 10.85, + "grad_norm": 3.0193145275115967, + "learning_rate": 8.427738693467337e-06, + "loss": 0.0444, + "step": 16150 + }, + { + "epoch": 10.86, + "grad_norm": 2.9764163494110107, + "learning_rate": 8.425226130653266e-06, + "loss": 0.0437, + "step": 16175 + }, + { + "epoch": 10.88, + "grad_norm": 3.2184228897094727, + "learning_rate": 8.422713567839198e-06, + "loss": 0.0444, + "step": 16200 + }, + { + "epoch": 10.9, + "grad_norm": 3.2065048217773438, + "learning_rate": 8.420201005025125e-06, + "loss": 0.0423, + "step": 16225 + }, + { + "epoch": 10.91, + "grad_norm": 3.27622652053833, + "learning_rate": 8.417688442211056e-06, + "loss": 0.0445, + "step": 16250 + }, + { + "epoch": 10.93, + "grad_norm": 3.2937915325164795, + "learning_rate": 8.415175879396985e-06, + "loss": 0.0441, + "step": 16275 + }, + { + "epoch": 10.95, + "grad_norm": 3.310680627822876, + "learning_rate": 8.412663316582915e-06, + "loss": 0.0448, + "step": 16300 + }, + { + "epoch": 10.96, + "grad_norm": 3.2154619693756104, + "learning_rate": 8.410150753768846e-06, + "loss": 0.0442, + "step": 16325 + }, + { + "epoch": 10.98, + "grad_norm": 3.290130138397217, + "learning_rate": 8.407638190954775e-06, + "loss": 0.0439, + "step": 16350 + }, + { + "epoch": 11.0, + "grad_norm": 3.004760503768921, + "learning_rate": 8.405125628140704e-06, + "loss": 0.0434, + "step": 16375 + }, + { + "epoch": 11.01, + "grad_norm": 2.8138742446899414, + "learning_rate": 8.402613065326634e-06, + "loss": 0.0329, + "step": 16400 + }, + { + "epoch": 11.03, + "grad_norm": 2.536019802093506, + "learning_rate": 8.400100502512563e-06, + "loss": 0.0339, + "step": 16425 + }, + { + "epoch": 11.05, + "grad_norm": 2.8082315921783447, + "learning_rate": 8.397587939698492e-06, + "loss": 0.0332, + "step": 16450 + }, + { + "epoch": 11.06, + "grad_norm": 2.3799972534179688, + "learning_rate": 8.395075376884423e-06, + "loss": 0.033, + "step": 16475 + }, + { + "epoch": 11.08, + "grad_norm": 3.4198269844055176, + "learning_rate": 8.392562814070351e-06, + "loss": 0.0331, + "step": 16500 + }, + { + "epoch": 11.1, + "grad_norm": 2.7005763053894043, + "learning_rate": 8.390050251256282e-06, + "loss": 0.0313, + "step": 16525 + }, + { + "epoch": 11.11, + "grad_norm": 2.602388381958008, + "learning_rate": 8.387537688442211e-06, + "loss": 0.0339, + "step": 16550 + }, + { + "epoch": 11.13, + "grad_norm": 2.7407968044281006, + "learning_rate": 8.38502512562814e-06, + "loss": 0.0325, + "step": 16575 + }, + { + "epoch": 11.15, + "grad_norm": 2.967979669570923, + "learning_rate": 8.382512562814072e-06, + "loss": 0.0339, + "step": 16600 + }, + { + "epoch": 11.17, + "grad_norm": 2.7870914936065674, + "learning_rate": 8.380000000000001e-06, + "loss": 0.0331, + "step": 16625 + }, + { + "epoch": 11.18, + "grad_norm": 2.4689157009124756, + "learning_rate": 8.37748743718593e-06, + "loss": 0.0323, + "step": 16650 + }, + { + "epoch": 11.2, + "grad_norm": 2.7460992336273193, + "learning_rate": 8.37497487437186e-06, + "loss": 0.0353, + "step": 16675 + }, + { + "epoch": 11.22, + "grad_norm": 2.964063882827759, + "learning_rate": 8.372462311557789e-06, + "loss": 0.0333, + "step": 16700 + }, + { + "epoch": 11.23, + "grad_norm": 3.015428304672241, + "learning_rate": 8.36994974874372e-06, + "loss": 0.0333, + "step": 16725 + }, + { + "epoch": 11.25, + "grad_norm": 2.875497817993164, + "learning_rate": 8.36743718592965e-06, + "loss": 0.0347, + "step": 16750 + }, + { + "epoch": 11.27, + "grad_norm": 2.6037163734436035, + "learning_rate": 8.364924623115579e-06, + "loss": 0.0344, + "step": 16775 + }, + { + "epoch": 11.28, + "grad_norm": 3.212022304534912, + "learning_rate": 8.362412060301508e-06, + "loss": 0.0338, + "step": 16800 + }, + { + "epoch": 11.3, + "grad_norm": 2.321396589279175, + "learning_rate": 8.359899497487437e-06, + "loss": 0.0338, + "step": 16825 + }, + { + "epoch": 11.32, + "grad_norm": 2.2769739627838135, + "learning_rate": 8.357386934673367e-06, + "loss": 0.0334, + "step": 16850 + }, + { + "epoch": 11.33, + "grad_norm": 3.2137532234191895, + "learning_rate": 8.354874371859298e-06, + "loss": 0.0342, + "step": 16875 + }, + { + "epoch": 11.35, + "grad_norm": 2.795146942138672, + "learning_rate": 8.352361809045227e-06, + "loss": 0.0363, + "step": 16900 + }, + { + "epoch": 11.37, + "grad_norm": 3.4696481227874756, + "learning_rate": 8.349849246231156e-06, + "loss": 0.0348, + "step": 16925 + }, + { + "epoch": 11.38, + "grad_norm": 2.4808151721954346, + "learning_rate": 8.347336683417087e-06, + "loss": 0.0343, + "step": 16950 + }, + { + "epoch": 11.4, + "grad_norm": 2.720414638519287, + "learning_rate": 8.344824120603015e-06, + "loss": 0.0345, + "step": 16975 + }, + { + "epoch": 11.42, + "grad_norm": 3.196791887283325, + "learning_rate": 8.342311557788946e-06, + "loss": 0.0345, + "step": 17000 + }, + { + "epoch": 11.42, + "eval_loss": 0.21900735795497894, + "eval_runtime": 506.8922, + "eval_samples_per_second": 2.78, + "eval_steps_per_second": 2.78, + "eval_wer": 30.335524040124522, + "step": 17000 + }, + { + "epoch": 11.43, + "grad_norm": 3.169095277786255, + "learning_rate": 8.339798994974875e-06, + "loss": 0.0351, + "step": 17025 + }, + { + "epoch": 11.45, + "grad_norm": 2.580080509185791, + "learning_rate": 8.337286432160805e-06, + "loss": 0.0341, + "step": 17050 + }, + { + "epoch": 11.47, + "grad_norm": 3.1775014400482178, + "learning_rate": 8.334773869346734e-06, + "loss": 0.0346, + "step": 17075 + }, + { + "epoch": 11.48, + "grad_norm": 2.957369089126587, + "learning_rate": 8.332361809045226e-06, + "loss": 0.0356, + "step": 17100 + }, + { + "epoch": 11.5, + "grad_norm": 2.850900650024414, + "learning_rate": 8.329849246231157e-06, + "loss": 0.0355, + "step": 17125 + }, + { + "epoch": 11.52, + "grad_norm": 3.018730401992798, + "learning_rate": 8.327336683417086e-06, + "loss": 0.0344, + "step": 17150 + }, + { + "epoch": 11.53, + "grad_norm": 2.547421932220459, + "learning_rate": 8.324824120603015e-06, + "loss": 0.0357, + "step": 17175 + }, + { + "epoch": 11.55, + "grad_norm": 3.110640287399292, + "learning_rate": 8.322311557788946e-06, + "loss": 0.0352, + "step": 17200 + }, + { + "epoch": 11.57, + "grad_norm": 2.897852659225464, + "learning_rate": 8.319798994974876e-06, + "loss": 0.0354, + "step": 17225 + }, + { + "epoch": 11.58, + "grad_norm": 2.9520862102508545, + "learning_rate": 8.317286432160805e-06, + "loss": 0.0338, + "step": 17250 + }, + { + "epoch": 11.6, + "grad_norm": 3.08408784866333, + "learning_rate": 8.314773869346734e-06, + "loss": 0.0329, + "step": 17275 + }, + { + "epoch": 11.62, + "grad_norm": 2.8934521675109863, + "learning_rate": 8.312261306532663e-06, + "loss": 0.034, + "step": 17300 + }, + { + "epoch": 11.64, + "grad_norm": 3.054666519165039, + "learning_rate": 8.309748743718595e-06, + "loss": 0.0353, + "step": 17325 + }, + { + "epoch": 11.65, + "grad_norm": 3.0845742225646973, + "learning_rate": 8.307236180904524e-06, + "loss": 0.0367, + "step": 17350 + }, + { + "epoch": 11.67, + "grad_norm": 2.6850764751434326, + "learning_rate": 8.304723618090453e-06, + "loss": 0.0361, + "step": 17375 + }, + { + "epoch": 11.69, + "grad_norm": 3.040971279144287, + "learning_rate": 8.302211055276382e-06, + "loss": 0.0356, + "step": 17400 + }, + { + "epoch": 11.7, + "grad_norm": 3.5542678833007812, + "learning_rate": 8.299698492462312e-06, + "loss": 0.0352, + "step": 17425 + }, + { + "epoch": 11.72, + "grad_norm": 2.9630064964294434, + "learning_rate": 8.297185929648241e-06, + "loss": 0.0364, + "step": 17450 + }, + { + "epoch": 11.74, + "grad_norm": 2.8122315406799316, + "learning_rate": 8.294673366834172e-06, + "loss": 0.0382, + "step": 17475 + }, + { + "epoch": 11.75, + "grad_norm": 3.4405579566955566, + "learning_rate": 8.292160804020101e-06, + "loss": 0.0347, + "step": 17500 + }, + { + "epoch": 11.77, + "grad_norm": 2.946769952774048, + "learning_rate": 8.28964824120603e-06, + "loss": 0.0347, + "step": 17525 + }, + { + "epoch": 11.79, + "grad_norm": 3.351062536239624, + "learning_rate": 8.287135678391962e-06, + "loss": 0.0349, + "step": 17550 + }, + { + "epoch": 11.8, + "grad_norm": 3.410038709640503, + "learning_rate": 8.28462311557789e-06, + "loss": 0.0368, + "step": 17575 + }, + { + "epoch": 11.82, + "grad_norm": 2.7303621768951416, + "learning_rate": 8.28211055276382e-06, + "loss": 0.0345, + "step": 17600 + }, + { + "epoch": 11.84, + "grad_norm": 3.1485795974731445, + "learning_rate": 8.27959798994975e-06, + "loss": 0.0343, + "step": 17625 + }, + { + "epoch": 11.85, + "grad_norm": 3.757327079772949, + "learning_rate": 8.277085427135679e-06, + "loss": 0.0368, + "step": 17650 + }, + { + "epoch": 11.87, + "grad_norm": 3.2278878688812256, + "learning_rate": 8.274572864321608e-06, + "loss": 0.0356, + "step": 17675 + }, + { + "epoch": 11.89, + "grad_norm": 2.9767661094665527, + "learning_rate": 8.272060301507538e-06, + "loss": 0.038, + "step": 17700 + }, + { + "epoch": 11.9, + "grad_norm": 2.9485790729522705, + "learning_rate": 8.269547738693467e-06, + "loss": 0.0345, + "step": 17725 + }, + { + "epoch": 11.92, + "grad_norm": 3.2627480030059814, + "learning_rate": 8.267035175879398e-06, + "loss": 0.0391, + "step": 17750 + }, + { + "epoch": 11.94, + "grad_norm": 3.0980026721954346, + "learning_rate": 8.264522613065327e-06, + "loss": 0.0331, + "step": 17775 + }, + { + "epoch": 11.95, + "grad_norm": 2.398850679397583, + "learning_rate": 8.262010050251257e-06, + "loss": 0.0355, + "step": 17800 + }, + { + "epoch": 11.97, + "grad_norm": 2.830465078353882, + "learning_rate": 8.259497487437188e-06, + "loss": 0.0365, + "step": 17825 + }, + { + "epoch": 11.99, + "grad_norm": 2.837803363800049, + "learning_rate": 8.256984924623115e-06, + "loss": 0.035, + "step": 17850 + }, + { + "epoch": 12.0, + "grad_norm": 2.5481348037719727, + "learning_rate": 8.254472361809046e-06, + "loss": 0.0322, + "step": 17875 + }, + { + "epoch": 12.02, + "grad_norm": 2.396738290786743, + "learning_rate": 8.251959798994976e-06, + "loss": 0.0245, + "step": 17900 + }, + { + "epoch": 12.04, + "grad_norm": 2.375900983810425, + "learning_rate": 8.249447236180905e-06, + "loss": 0.0257, + "step": 17925 + }, + { + "epoch": 12.06, + "grad_norm": 2.5473861694335938, + "learning_rate": 8.246934673366836e-06, + "loss": 0.0254, + "step": 17950 + }, + { + "epoch": 12.07, + "grad_norm": 2.4333953857421875, + "learning_rate": 8.244422110552764e-06, + "loss": 0.0258, + "step": 17975 + }, + { + "epoch": 12.09, + "grad_norm": 2.229600667953491, + "learning_rate": 8.241909547738695e-06, + "loss": 0.0255, + "step": 18000 + }, + { + "epoch": 12.09, + "eval_loss": 0.22930583357810974, + "eval_runtime": 509.2183, + "eval_samples_per_second": 2.767, + "eval_steps_per_second": 2.767, + "eval_wer": 30.280179868557592, + "step": 18000 + }, + { + "epoch": 12.11, + "grad_norm": 2.598322868347168, + "learning_rate": 8.239396984924624e-06, + "loss": 0.0263, + "step": 18025 + }, + { + "epoch": 12.12, + "grad_norm": 2.4724090099334717, + "learning_rate": 8.236884422110553e-06, + "loss": 0.0259, + "step": 18050 + }, + { + "epoch": 12.14, + "grad_norm": 2.860166549682617, + "learning_rate": 8.234371859296483e-06, + "loss": 0.0266, + "step": 18075 + }, + { + "epoch": 12.16, + "grad_norm": 2.4977660179138184, + "learning_rate": 8.231859296482414e-06, + "loss": 0.0261, + "step": 18100 + }, + { + "epoch": 12.17, + "grad_norm": 2.9457530975341797, + "learning_rate": 8.229346733668341e-06, + "loss": 0.0263, + "step": 18125 + }, + { + "epoch": 12.19, + "grad_norm": 2.5766079425811768, + "learning_rate": 8.226834170854272e-06, + "loss": 0.0256, + "step": 18150 + }, + { + "epoch": 12.21, + "grad_norm": 2.7472805976867676, + "learning_rate": 8.224321608040202e-06, + "loss": 0.0259, + "step": 18175 + }, + { + "epoch": 12.22, + "grad_norm": 2.5744516849517822, + "learning_rate": 8.221809045226131e-06, + "loss": 0.0264, + "step": 18200 + }, + { + "epoch": 12.24, + "grad_norm": 2.3059563636779785, + "learning_rate": 8.219296482412062e-06, + "loss": 0.0277, + "step": 18225 + }, + { + "epoch": 12.26, + "grad_norm": 3.2585506439208984, + "learning_rate": 8.21678391959799e-06, + "loss": 0.0279, + "step": 18250 + }, + { + "epoch": 12.27, + "grad_norm": 2.4447991847991943, + "learning_rate": 8.21427135678392e-06, + "loss": 0.0272, + "step": 18275 + }, + { + "epoch": 12.29, + "grad_norm": 2.5891377925872803, + "learning_rate": 8.21175879396985e-06, + "loss": 0.0273, + "step": 18300 + }, + { + "epoch": 12.31, + "grad_norm": 2.6267025470733643, + "learning_rate": 8.20924623115578e-06, + "loss": 0.0257, + "step": 18325 + }, + { + "epoch": 12.32, + "grad_norm": 2.7671260833740234, + "learning_rate": 8.206733668341709e-06, + "loss": 0.0282, + "step": 18350 + }, + { + "epoch": 12.34, + "grad_norm": 3.5493524074554443, + "learning_rate": 8.20422110552764e-06, + "loss": 0.0276, + "step": 18375 + }, + { + "epoch": 12.36, + "grad_norm": 2.510411024093628, + "learning_rate": 8.201708542713569e-06, + "loss": 0.0268, + "step": 18400 + }, + { + "epoch": 12.37, + "grad_norm": 2.765998363494873, + "learning_rate": 8.199195979899498e-06, + "loss": 0.0265, + "step": 18425 + }, + { + "epoch": 12.39, + "grad_norm": 3.095228433609009, + "learning_rate": 8.196683417085428e-06, + "loss": 0.0288, + "step": 18450 + }, + { + "epoch": 12.41, + "grad_norm": 3.233299970626831, + "learning_rate": 8.194170854271357e-06, + "loss": 0.0275, + "step": 18475 + }, + { + "epoch": 12.42, + "grad_norm": 2.947317123413086, + "learning_rate": 8.191658291457288e-06, + "loss": 0.0278, + "step": 18500 + }, + { + "epoch": 12.44, + "grad_norm": 2.8245813846588135, + "learning_rate": 8.189145728643216e-06, + "loss": 0.0285, + "step": 18525 + }, + { + "epoch": 12.46, + "grad_norm": 3.2293148040771484, + "learning_rate": 8.186633165829147e-06, + "loss": 0.0276, + "step": 18550 + }, + { + "epoch": 12.47, + "grad_norm": 2.8926360607147217, + "learning_rate": 8.184120603015076e-06, + "loss": 0.0274, + "step": 18575 + }, + { + "epoch": 12.49, + "grad_norm": 2.5087268352508545, + "learning_rate": 8.181608040201005e-06, + "loss": 0.0273, + "step": 18600 + }, + { + "epoch": 12.51, + "grad_norm": 2.4943645000457764, + "learning_rate": 8.179095477386936e-06, + "loss": 0.0272, + "step": 18625 + }, + { + "epoch": 12.53, + "grad_norm": 2.6638033390045166, + "learning_rate": 8.176582914572866e-06, + "loss": 0.0283, + "step": 18650 + }, + { + "epoch": 12.54, + "grad_norm": 2.891900062561035, + "learning_rate": 8.174070351758795e-06, + "loss": 0.0278, + "step": 18675 + }, + { + "epoch": 12.56, + "grad_norm": 3.337963819503784, + "learning_rate": 8.171557788944724e-06, + "loss": 0.0271, + "step": 18700 + }, + { + "epoch": 12.58, + "grad_norm": 2.388216972351074, + "learning_rate": 8.169045226130654e-06, + "loss": 0.0272, + "step": 18725 + }, + { + "epoch": 12.59, + "grad_norm": 2.7136969566345215, + "learning_rate": 8.166532663316583e-06, + "loss": 0.0287, + "step": 18750 + }, + { + "epoch": 12.61, + "grad_norm": 2.954904794692993, + "learning_rate": 8.164020100502514e-06, + "loss": 0.0285, + "step": 18775 + }, + { + "epoch": 12.63, + "grad_norm": 2.485668659210205, + "learning_rate": 8.161507537688443e-06, + "loss": 0.0276, + "step": 18800 + }, + { + "epoch": 12.64, + "grad_norm": 2.5536584854125977, + "learning_rate": 8.158994974874373e-06, + "loss": 0.0279, + "step": 18825 + }, + { + "epoch": 12.66, + "grad_norm": 2.758317470550537, + "learning_rate": 8.156482412060302e-06, + "loss": 0.0287, + "step": 18850 + }, + { + "epoch": 12.68, + "grad_norm": 2.8779945373535156, + "learning_rate": 8.153969849246231e-06, + "loss": 0.0277, + "step": 18875 + }, + { + "epoch": 12.69, + "grad_norm": 2.4216692447662354, + "learning_rate": 8.151457286432162e-06, + "loss": 0.0261, + "step": 18900 + }, + { + "epoch": 12.71, + "grad_norm": 2.7389280796051025, + "learning_rate": 8.148944723618092e-06, + "loss": 0.0284, + "step": 18925 + }, + { + "epoch": 12.73, + "grad_norm": 2.737917900085449, + "learning_rate": 8.146432160804021e-06, + "loss": 0.0283, + "step": 18950 + }, + { + "epoch": 12.74, + "grad_norm": 3.025228261947632, + "learning_rate": 8.14391959798995e-06, + "loss": 0.0287, + "step": 18975 + }, + { + "epoch": 12.76, + "grad_norm": 3.156458854675293, + "learning_rate": 8.14140703517588e-06, + "loss": 0.0278, + "step": 19000 + }, + { + "epoch": 12.76, + "eval_loss": 0.23426826298236847, + "eval_runtime": 503.7909, + "eval_samples_per_second": 2.797, + "eval_steps_per_second": 2.797, + "eval_wer": 29.664475959875475, + "step": 19000 + }, + { + "epoch": 12.78, + "grad_norm": 2.5011305809020996, + "learning_rate": 8.13889447236181e-06, + "loss": 0.0275, + "step": 19025 + }, + { + "epoch": 12.79, + "grad_norm": 2.918154001235962, + "learning_rate": 8.13638190954774e-06, + "loss": 0.0296, + "step": 19050 + }, + { + "epoch": 12.81, + "grad_norm": 2.684267520904541, + "learning_rate": 8.13386934673367e-06, + "loss": 0.0272, + "step": 19075 + }, + { + "epoch": 12.83, + "grad_norm": 2.9294755458831787, + "learning_rate": 8.131356783919598e-06, + "loss": 0.0303, + "step": 19100 + }, + { + "epoch": 12.84, + "grad_norm": 2.906726360321045, + "learning_rate": 8.128844221105528e-06, + "loss": 0.0278, + "step": 19125 + }, + { + "epoch": 12.86, + "grad_norm": 2.8053345680236816, + "learning_rate": 8.126331658291457e-06, + "loss": 0.0274, + "step": 19150 + }, + { + "epoch": 12.88, + "grad_norm": 2.7568044662475586, + "learning_rate": 8.12391959798995e-06, + "loss": 0.0287, + "step": 19175 + }, + { + "epoch": 12.89, + "grad_norm": 2.920099973678589, + "learning_rate": 8.12140703517588e-06, + "loss": 0.0276, + "step": 19200 + }, + { + "epoch": 12.91, + "grad_norm": 2.634989023208618, + "learning_rate": 8.11889447236181e-06, + "loss": 0.0275, + "step": 19225 + }, + { + "epoch": 12.93, + "grad_norm": 2.596900701522827, + "learning_rate": 8.11638190954774e-06, + "loss": 0.0274, + "step": 19250 + }, + { + "epoch": 12.94, + "grad_norm": 3.2636964321136475, + "learning_rate": 8.11386934673367e-06, + "loss": 0.0294, + "step": 19275 + }, + { + "epoch": 12.96, + "grad_norm": 3.6109907627105713, + "learning_rate": 8.111356783919599e-06, + "loss": 0.0297, + "step": 19300 + }, + { + "epoch": 12.98, + "grad_norm": 2.874441623687744, + "learning_rate": 8.108844221105528e-06, + "loss": 0.03, + "step": 19325 + }, + { + "epoch": 13.0, + "grad_norm": 3.0763564109802246, + "learning_rate": 8.106331658291457e-06, + "loss": 0.0289, + "step": 19350 + }, + { + "epoch": 13.01, + "grad_norm": 2.2706706523895264, + "learning_rate": 8.103819095477388e-06, + "loss": 0.0223, + "step": 19375 + }, + { + "epoch": 13.03, + "grad_norm": 1.984673023223877, + "learning_rate": 8.101306532663318e-06, + "loss": 0.0201, + "step": 19400 + }, + { + "epoch": 13.05, + "grad_norm": 2.2425191402435303, + "learning_rate": 8.098793969849247e-06, + "loss": 0.0204, + "step": 19425 + }, + { + "epoch": 13.06, + "grad_norm": 2.154808282852173, + "learning_rate": 8.096281407035176e-06, + "loss": 0.0192, + "step": 19450 + }, + { + "epoch": 13.08, + "grad_norm": 2.65604305267334, + "learning_rate": 8.093768844221106e-06, + "loss": 0.0198, + "step": 19475 + }, + { + "epoch": 13.1, + "grad_norm": 2.0501763820648193, + "learning_rate": 8.091256281407037e-06, + "loss": 0.0196, + "step": 19500 + }, + { + "epoch": 13.11, + "grad_norm": 2.3358802795410156, + "learning_rate": 8.088743718592966e-06, + "loss": 0.0203, + "step": 19525 + }, + { + "epoch": 13.13, + "grad_norm": 2.126844882965088, + "learning_rate": 8.086231155778895e-06, + "loss": 0.0203, + "step": 19550 + }, + { + "epoch": 13.15, + "grad_norm": 2.44891357421875, + "learning_rate": 8.083718592964825e-06, + "loss": 0.0214, + "step": 19575 + }, + { + "epoch": 13.16, + "grad_norm": 2.3258211612701416, + "learning_rate": 8.081206030150754e-06, + "loss": 0.0212, + "step": 19600 + }, + { + "epoch": 13.18, + "grad_norm": 2.3395602703094482, + "learning_rate": 8.078693467336685e-06, + "loss": 0.0211, + "step": 19625 + }, + { + "epoch": 13.2, + "grad_norm": 2.4894628524780273, + "learning_rate": 8.076180904522614e-06, + "loss": 0.021, + "step": 19650 + }, + { + "epoch": 13.21, + "grad_norm": 2.812272071838379, + "learning_rate": 8.073668341708544e-06, + "loss": 0.0229, + "step": 19675 + }, + { + "epoch": 13.23, + "grad_norm": 2.0549569129943848, + "learning_rate": 8.071155778894473e-06, + "loss": 0.0204, + "step": 19700 + }, + { + "epoch": 13.25, + "grad_norm": 3.1301002502441406, + "learning_rate": 8.068643216080402e-06, + "loss": 0.0211, + "step": 19725 + }, + { + "epoch": 13.26, + "grad_norm": 2.128800392150879, + "learning_rate": 8.066130653266332e-06, + "loss": 0.0213, + "step": 19750 + }, + { + "epoch": 13.28, + "grad_norm": 2.3359644412994385, + "learning_rate": 8.063618090452263e-06, + "loss": 0.0214, + "step": 19775 + }, + { + "epoch": 13.3, + "grad_norm": 2.4811410903930664, + "learning_rate": 8.061105527638192e-06, + "loss": 0.0214, + "step": 19800 + }, + { + "epoch": 13.31, + "grad_norm": 2.270700693130493, + "learning_rate": 8.058592964824121e-06, + "loss": 0.0209, + "step": 19825 + }, + { + "epoch": 13.33, + "grad_norm": 2.5120649337768555, + "learning_rate": 8.05608040201005e-06, + "loss": 0.0213, + "step": 19850 + }, + { + "epoch": 13.35, + "grad_norm": 2.5744411945343018, + "learning_rate": 8.053668341708544e-06, + "loss": 0.0195, + "step": 19875 + }, + { + "epoch": 13.36, + "grad_norm": 2.5161170959472656, + "learning_rate": 8.051155778894473e-06, + "loss": 0.0202, + "step": 19900 + }, + { + "epoch": 13.38, + "grad_norm": 2.6120686531066895, + "learning_rate": 8.048643216080402e-06, + "loss": 0.0207, + "step": 19925 + }, + { + "epoch": 13.4, + "grad_norm": 3.0308547019958496, + "learning_rate": 8.046130653266332e-06, + "loss": 0.0204, + "step": 19950 + }, + { + "epoch": 13.42, + "grad_norm": 2.0303311347961426, + "learning_rate": 8.043618090452263e-06, + "loss": 0.0221, + "step": 19975 + }, + { + "epoch": 13.43, + "grad_norm": 2.3271639347076416, + "learning_rate": 8.04110552763819e-06, + "loss": 0.0209, + "step": 20000 + }, + { + "epoch": 13.43, + "eval_loss": 0.24769054353237152, + "eval_runtime": 511.0893, + "eval_samples_per_second": 2.757, + "eval_steps_per_second": 2.757, + "eval_wer": 30.086475268073333, + "step": 20000 + } + ], + "logging_steps": 25, + "max_steps": 100000, + "num_input_tokens_seen": 0, + "num_train_epochs": 68, + "save_steps": 1000, + "total_flos": 3.150705222549504e+19, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/whisper-tiny/maithili/checkpoint-20000/training_args.bin b/checkpoints/whisper-tiny/maithili/checkpoint-20000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b37920541fe0821c1469a27116147b54da297211 --- /dev/null +++ b/checkpoints/whisper-tiny/maithili/checkpoint-20000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3712b71cdd8cdb16bb060b60ce4e6db0bdef689a92e28edafb957644c57f61b5 +size 4667 diff --git a/checkpoints/whisper-tiny/marathi/checkpoint-22000/config.json b/checkpoints/whisper-tiny/marathi/checkpoint-22000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1a041e85a2a18587cbe3f0ba14ca9b9956a08d08 --- /dev/null +++ b/checkpoints/whisper-tiny/marathi/checkpoint-22000/config.json @@ -0,0 +1,154 @@ +{ + "_name_or_path": "openai/whisper-tiny", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 384, + "decoder_attention_heads": 6, + "decoder_ffn_dim": 1536, + "decoder_layerdrop": 0.0, + "decoder_layers": 4, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 6, + "encoder_ffn_dim": 1536, + "encoder_layerdrop": 0.0, + "encoder_layers": 4, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50320 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 4, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "torch_dtype": "float32", + "transformers_version": "4.39.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoints/whisper-tiny/marathi/checkpoint-22000/generation_config.json b/checkpoints/whisper-tiny/marathi/checkpoint-22000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c4857895fba6cdefb862460b5d33969e1892aa71 --- /dev/null +++ b/checkpoints/whisper-tiny/marathi/checkpoint-22000/generation_config.json @@ -0,0 +1,248 @@ +{ + "alignment_heads": [ + [ + 2, + 2 + ], + [ + 3, + 0 + ], + [ + 3, + 2 + ], + [ + 3, + 3 + ], + [ + 3, + 4 + ], + [ + 3, + 5 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.39.2" +} diff --git a/checkpoints/whisper-tiny/marathi/checkpoint-22000/model.safetensors b/checkpoints/whisper-tiny/marathi/checkpoint-22000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9405daf8bde0ef42ad573c7d16b3aef5e41f57db --- /dev/null +++ b/checkpoints/whisper-tiny/marathi/checkpoint-22000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b2e6b77f11bd548d17084926838ccddbc8545a50ceda7898506b09ccd9f59fb +size 151061672 diff --git a/checkpoints/whisper-tiny/marathi/checkpoint-22000/optimizer.pt b/checkpoints/whisper-tiny/marathi/checkpoint-22000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..854a233998940279ea7f67aadd72c640d16dbd75 --- /dev/null +++ b/checkpoints/whisper-tiny/marathi/checkpoint-22000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a96247a12da6ab6ea97e164565b728e70e2cf3c5f7fd7570c31b0864ba85406b +size 297615749 diff --git a/checkpoints/whisper-tiny/marathi/checkpoint-22000/preprocessor_config.json b/checkpoints/whisper-tiny/marathi/checkpoint-22000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoints/whisper-tiny/marathi/checkpoint-22000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoints/whisper-tiny/marathi/checkpoint-22000/rng_state.pth b/checkpoints/whisper-tiny/marathi/checkpoint-22000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e385a347368cee441a67343c57bcc352cbe9c9dc --- /dev/null +++ b/checkpoints/whisper-tiny/marathi/checkpoint-22000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6510c25e2e9cfe854cd3187c44894dad0388d38169ebc5d34f6d054b53e2777c +size 14575 diff --git a/checkpoints/whisper-tiny/marathi/checkpoint-22000/scheduler.pt b/checkpoints/whisper-tiny/marathi/checkpoint-22000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9b5db5e519f4668cfca578bbdc8da10a0f40821d --- /dev/null +++ b/checkpoints/whisper-tiny/marathi/checkpoint-22000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a54a11b92af0098a08a5f24c7ca905131539cf0fed97b3b9febed573384f5c2 +size 627 diff --git a/checkpoints/whisper-tiny/marathi/checkpoint-22000/trainer_state.json b/checkpoints/whisper-tiny/marathi/checkpoint-22000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..30636719362cb5dc6d3e536613fbefd576fd5ba3 --- /dev/null +++ b/checkpoints/whisper-tiny/marathi/checkpoint-22000/trainer_state.json @@ -0,0 +1,6379 @@ +{ + "best_metric": 26.328800988875155, + "best_model_checkpoint": "results/whisper-tiny/marathi/checkpoint-12000", + "epoch": 14.775016789791806, + "eval_steps": 1000, + "global_step": 22000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "grad_norm": 74.15543365478516, + "learning_rate": 4.4e-07, + "loss": 3.7766, + "step": 25 + }, + { + "epoch": 0.03, + "grad_norm": 39.68334197998047, + "learning_rate": 9.200000000000001e-07, + "loss": 3.2026, + "step": 50 + }, + { + "epoch": 0.05, + "grad_norm": 14.481256484985352, + "learning_rate": 1.42e-06, + "loss": 2.4672, + "step": 75 + }, + { + "epoch": 0.07, + "grad_norm": 8.309144020080566, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.9195, + "step": 100 + }, + { + "epoch": 0.08, + "grad_norm": 6.247703552246094, + "learning_rate": 2.42e-06, + "loss": 1.5361, + "step": 125 + }, + { + "epoch": 0.1, + "grad_norm": 5.86753511428833, + "learning_rate": 2.92e-06, + "loss": 1.2775, + "step": 150 + }, + { + "epoch": 0.12, + "grad_norm": 5.9364752769470215, + "learning_rate": 3.4200000000000007e-06, + "loss": 1.0933, + "step": 175 + }, + { + "epoch": 0.13, + "grad_norm": 5.17349910736084, + "learning_rate": 3.920000000000001e-06, + "loss": 0.9505, + "step": 200 + }, + { + "epoch": 0.15, + "grad_norm": 5.331369400024414, + "learning_rate": 4.42e-06, + "loss": 0.8507, + "step": 225 + }, + { + "epoch": 0.17, + "grad_norm": 5.466459274291992, + "learning_rate": 4.92e-06, + "loss": 0.7655, + "step": 250 + }, + { + "epoch": 0.18, + "grad_norm": 4.921384811401367, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7153, + "step": 275 + }, + { + "epoch": 0.2, + "grad_norm": 5.227000713348389, + "learning_rate": 5.92e-06, + "loss": 0.6886, + "step": 300 + }, + { + "epoch": 0.22, + "grad_norm": 4.924015045166016, + "learning_rate": 6.42e-06, + "loss": 0.6324, + "step": 325 + }, + { + "epoch": 0.24, + "grad_norm": 4.0168986320495605, + "learning_rate": 6.92e-06, + "loss": 0.6107, + "step": 350 + }, + { + "epoch": 0.25, + "grad_norm": 4.831826686859131, + "learning_rate": 7.420000000000001e-06, + "loss": 0.5784, + "step": 375 + }, + { + "epoch": 0.27, + "grad_norm": 4.7476935386657715, + "learning_rate": 7.92e-06, + "loss": 0.5509, + "step": 400 + }, + { + "epoch": 0.29, + "grad_norm": 4.2020978927612305, + "learning_rate": 8.42e-06, + "loss": 0.5442, + "step": 425 + }, + { + "epoch": 0.3, + "grad_norm": 4.830783843994141, + "learning_rate": 8.920000000000001e-06, + "loss": 0.5297, + "step": 450 + }, + { + "epoch": 0.32, + "grad_norm": 4.747669696807861, + "learning_rate": 9.42e-06, + "loss": 0.5059, + "step": 475 + }, + { + "epoch": 0.34, + "grad_norm": 4.504109859466553, + "learning_rate": 9.920000000000002e-06, + "loss": 0.4927, + "step": 500 + }, + { + "epoch": 0.35, + "grad_norm": 4.707924842834473, + "learning_rate": 9.997889447236182e-06, + "loss": 0.4721, + "step": 525 + }, + { + "epoch": 0.37, + "grad_norm": 4.621720790863037, + "learning_rate": 9.995376884422112e-06, + "loss": 0.464, + "step": 550 + }, + { + "epoch": 0.39, + "grad_norm": 4.50490140914917, + "learning_rate": 9.992864321608041e-06, + "loss": 0.4518, + "step": 575 + }, + { + "epoch": 0.4, + "grad_norm": 4.592816352844238, + "learning_rate": 9.99035175879397e-06, + "loss": 0.4335, + "step": 600 + }, + { + "epoch": 0.42, + "grad_norm": 4.791091442108154, + "learning_rate": 9.9878391959799e-06, + "loss": 0.4348, + "step": 625 + }, + { + "epoch": 0.44, + "grad_norm": 4.221704959869385, + "learning_rate": 9.98532663316583e-06, + "loss": 0.4203, + "step": 650 + }, + { + "epoch": 0.45, + "grad_norm": 4.549515724182129, + "learning_rate": 9.98281407035176e-06, + "loss": 0.4086, + "step": 675 + }, + { + "epoch": 0.47, + "grad_norm": 4.485387802124023, + "learning_rate": 9.98030150753769e-06, + "loss": 0.405, + "step": 700 + }, + { + "epoch": 0.49, + "grad_norm": 4.758955001831055, + "learning_rate": 9.977788944723619e-06, + "loss": 0.4016, + "step": 725 + }, + { + "epoch": 0.5, + "grad_norm": 4.615067005157471, + "learning_rate": 9.975276381909548e-06, + "loss": 0.393, + "step": 750 + }, + { + "epoch": 0.52, + "grad_norm": 4.661777019500732, + "learning_rate": 9.972763819095477e-06, + "loss": 0.3843, + "step": 775 + }, + { + "epoch": 0.54, + "grad_norm": 4.5793609619140625, + "learning_rate": 9.970251256281408e-06, + "loss": 0.3832, + "step": 800 + }, + { + "epoch": 0.55, + "grad_norm": 5.030839443206787, + "learning_rate": 9.967738693467338e-06, + "loss": 0.3789, + "step": 825 + }, + { + "epoch": 0.57, + "grad_norm": 4.351238250732422, + "learning_rate": 9.965226130653267e-06, + "loss": 0.3576, + "step": 850 + }, + { + "epoch": 0.59, + "grad_norm": 4.560535907745361, + "learning_rate": 9.962713567839198e-06, + "loss": 0.3589, + "step": 875 + }, + { + "epoch": 0.6, + "grad_norm": 4.39430046081543, + "learning_rate": 9.960201005025126e-06, + "loss": 0.3554, + "step": 900 + }, + { + "epoch": 0.62, + "grad_norm": 4.813572883605957, + "learning_rate": 9.957688442211057e-06, + "loss": 0.3561, + "step": 925 + }, + { + "epoch": 0.64, + "grad_norm": 3.895594358444214, + "learning_rate": 9.955175879396986e-06, + "loss": 0.3532, + "step": 950 + }, + { + "epoch": 0.65, + "grad_norm": 4.176882266998291, + "learning_rate": 9.952663316582915e-06, + "loss": 0.3501, + "step": 975 + }, + { + "epoch": 0.67, + "grad_norm": 4.483668327331543, + "learning_rate": 9.950150753768845e-06, + "loss": 0.3485, + "step": 1000 + }, + { + "epoch": 0.67, + "eval_loss": 0.23381204903125763, + "eval_runtime": 566.2474, + "eval_samples_per_second": 2.448, + "eval_steps_per_second": 2.448, + "eval_wer": 47.28059332509271, + "step": 1000 + }, + { + "epoch": 0.69, + "grad_norm": 4.013958930969238, + "learning_rate": 9.947638190954774e-06, + "loss": 0.3408, + "step": 1025 + }, + { + "epoch": 0.71, + "grad_norm": 4.734582424163818, + "learning_rate": 9.945125628140703e-06, + "loss": 0.3313, + "step": 1050 + }, + { + "epoch": 0.72, + "grad_norm": 4.5922722816467285, + "learning_rate": 9.942613065326634e-06, + "loss": 0.3364, + "step": 1075 + }, + { + "epoch": 0.74, + "grad_norm": 3.997859001159668, + "learning_rate": 9.940100502512564e-06, + "loss": 0.3283, + "step": 1100 + }, + { + "epoch": 0.76, + "grad_norm": 4.432836532592773, + "learning_rate": 9.937587939698493e-06, + "loss": 0.3258, + "step": 1125 + }, + { + "epoch": 0.77, + "grad_norm": 4.074716091156006, + "learning_rate": 9.935075376884424e-06, + "loss": 0.3338, + "step": 1150 + }, + { + "epoch": 0.79, + "grad_norm": 4.509114742279053, + "learning_rate": 9.932562814070352e-06, + "loss": 0.3121, + "step": 1175 + }, + { + "epoch": 0.81, + "grad_norm": 4.589898586273193, + "learning_rate": 9.930050251256283e-06, + "loss": 0.3161, + "step": 1200 + }, + { + "epoch": 0.82, + "grad_norm": 4.0301079750061035, + "learning_rate": 9.927537688442212e-06, + "loss": 0.3248, + "step": 1225 + }, + { + "epoch": 0.84, + "grad_norm": 4.21639347076416, + "learning_rate": 9.925025125628141e-06, + "loss": 0.3096, + "step": 1250 + }, + { + "epoch": 0.86, + "grad_norm": 4.40596866607666, + "learning_rate": 9.922512562814072e-06, + "loss": 0.3136, + "step": 1275 + }, + { + "epoch": 0.87, + "grad_norm": 4.144809722900391, + "learning_rate": 9.920000000000002e-06, + "loss": 0.3068, + "step": 1300 + }, + { + "epoch": 0.89, + "grad_norm": 3.97633695602417, + "learning_rate": 9.917487437185931e-06, + "loss": 0.3044, + "step": 1325 + }, + { + "epoch": 0.91, + "grad_norm": 4.247403144836426, + "learning_rate": 9.91497487437186e-06, + "loss": 0.307, + "step": 1350 + }, + { + "epoch": 0.92, + "grad_norm": 3.9145348072052, + "learning_rate": 9.91246231155779e-06, + "loss": 0.3007, + "step": 1375 + }, + { + "epoch": 0.94, + "grad_norm": 4.151167869567871, + "learning_rate": 9.909949748743719e-06, + "loss": 0.2931, + "step": 1400 + }, + { + "epoch": 0.96, + "grad_norm": 4.783816337585449, + "learning_rate": 9.90743718592965e-06, + "loss": 0.2939, + "step": 1425 + }, + { + "epoch": 0.97, + "grad_norm": 4.319779872894287, + "learning_rate": 9.904924623115578e-06, + "loss": 0.294, + "step": 1450 + }, + { + "epoch": 0.99, + "grad_norm": 4.233304500579834, + "learning_rate": 9.902412060301509e-06, + "loss": 0.2918, + "step": 1475 + }, + { + "epoch": 1.01, + "grad_norm": 5.045380592346191, + "learning_rate": 9.899899497487438e-06, + "loss": 0.2839, + "step": 1500 + }, + { + "epoch": 1.02, + "grad_norm": 4.171890735626221, + "learning_rate": 9.897386934673367e-06, + "loss": 0.277, + "step": 1525 + }, + { + "epoch": 1.04, + "grad_norm": 5.11909818649292, + "learning_rate": 9.894874371859298e-06, + "loss": 0.2708, + "step": 1550 + }, + { + "epoch": 1.06, + "grad_norm": 4.329667568206787, + "learning_rate": 9.892361809045228e-06, + "loss": 0.278, + "step": 1575 + }, + { + "epoch": 1.07, + "grad_norm": 4.358795166015625, + "learning_rate": 9.889849246231157e-06, + "loss": 0.2702, + "step": 1600 + }, + { + "epoch": 1.09, + "grad_norm": 4.983689308166504, + "learning_rate": 9.887336683417086e-06, + "loss": 0.2594, + "step": 1625 + }, + { + "epoch": 1.11, + "grad_norm": 4.065433502197266, + "learning_rate": 9.884824120603015e-06, + "loss": 0.2628, + "step": 1650 + }, + { + "epoch": 1.12, + "grad_norm": 4.138759136199951, + "learning_rate": 9.882311557788945e-06, + "loss": 0.2692, + "step": 1675 + }, + { + "epoch": 1.14, + "grad_norm": 4.43567419052124, + "learning_rate": 9.879798994974876e-06, + "loss": 0.2688, + "step": 1700 + }, + { + "epoch": 1.16, + "grad_norm": 4.089324951171875, + "learning_rate": 9.877286432160805e-06, + "loss": 0.2641, + "step": 1725 + }, + { + "epoch": 1.18, + "grad_norm": 4.174434661865234, + "learning_rate": 9.874773869346734e-06, + "loss": 0.2638, + "step": 1750 + }, + { + "epoch": 1.19, + "grad_norm": 4.324215888977051, + "learning_rate": 9.872261306532664e-06, + "loss": 0.2621, + "step": 1775 + }, + { + "epoch": 1.21, + "grad_norm": 4.167600631713867, + "learning_rate": 9.869748743718593e-06, + "loss": 0.2568, + "step": 1800 + }, + { + "epoch": 1.23, + "grad_norm": 4.090190410614014, + "learning_rate": 9.867236180904524e-06, + "loss": 0.2579, + "step": 1825 + }, + { + "epoch": 1.24, + "grad_norm": 3.862471580505371, + "learning_rate": 9.864723618090453e-06, + "loss": 0.2549, + "step": 1850 + }, + { + "epoch": 1.26, + "grad_norm": 3.9046545028686523, + "learning_rate": 9.862211055276383e-06, + "loss": 0.2512, + "step": 1875 + }, + { + "epoch": 1.28, + "grad_norm": 3.973026990890503, + "learning_rate": 9.859698492462312e-06, + "loss": 0.2535, + "step": 1900 + }, + { + "epoch": 1.29, + "grad_norm": 3.875776529312134, + "learning_rate": 9.857185929648241e-06, + "loss": 0.2454, + "step": 1925 + }, + { + "epoch": 1.31, + "grad_norm": 3.815830707550049, + "learning_rate": 9.854673366834172e-06, + "loss": 0.2509, + "step": 1950 + }, + { + "epoch": 1.33, + "grad_norm": 3.9826467037200928, + "learning_rate": 9.852160804020102e-06, + "loss": 0.2469, + "step": 1975 + }, + { + "epoch": 1.34, + "grad_norm": 4.199316024780273, + "learning_rate": 9.849648241206031e-06, + "loss": 0.2543, + "step": 2000 + }, + { + "epoch": 1.34, + "eval_loss": 0.1726374477148056, + "eval_runtime": 531.5792, + "eval_samples_per_second": 2.607, + "eval_steps_per_second": 2.607, + "eval_wer": 38.892812996644885, + "step": 2000 + }, + { + "epoch": 1.36, + "grad_norm": 4.188065052032471, + "learning_rate": 9.84713567839196e-06, + "loss": 0.253, + "step": 2025 + }, + { + "epoch": 1.38, + "grad_norm": 4.348769187927246, + "learning_rate": 9.84462311557789e-06, + "loss": 0.2439, + "step": 2050 + }, + { + "epoch": 1.39, + "grad_norm": 4.025571823120117, + "learning_rate": 9.842110552763819e-06, + "loss": 0.2499, + "step": 2075 + }, + { + "epoch": 1.41, + "grad_norm": 3.441206216812134, + "learning_rate": 9.83959798994975e-06, + "loss": 0.2419, + "step": 2100 + }, + { + "epoch": 1.43, + "grad_norm": 4.062358856201172, + "learning_rate": 9.83708542713568e-06, + "loss": 0.2428, + "step": 2125 + }, + { + "epoch": 1.44, + "grad_norm": 4.701034069061279, + "learning_rate": 9.834572864321609e-06, + "loss": 0.2435, + "step": 2150 + }, + { + "epoch": 1.46, + "grad_norm": 4.011937618255615, + "learning_rate": 9.832060301507538e-06, + "loss": 0.2429, + "step": 2175 + }, + { + "epoch": 1.48, + "grad_norm": 3.9073057174682617, + "learning_rate": 9.829547738693467e-06, + "loss": 0.2371, + "step": 2200 + }, + { + "epoch": 1.49, + "grad_norm": 4.053809642791748, + "learning_rate": 9.827035175879398e-06, + "loss": 0.236, + "step": 2225 + }, + { + "epoch": 1.51, + "grad_norm": 3.983830690383911, + "learning_rate": 9.824522613065328e-06, + "loss": 0.2393, + "step": 2250 + }, + { + "epoch": 1.53, + "grad_norm": 4.095301151275635, + "learning_rate": 9.822010050251257e-06, + "loss": 0.2329, + "step": 2275 + }, + { + "epoch": 1.54, + "grad_norm": 3.980642318725586, + "learning_rate": 9.819497487437186e-06, + "loss": 0.24, + "step": 2300 + }, + { + "epoch": 1.56, + "grad_norm": 4.499876976013184, + "learning_rate": 9.816984924623116e-06, + "loss": 0.2307, + "step": 2325 + }, + { + "epoch": 1.58, + "grad_norm": 4.50550651550293, + "learning_rate": 9.814472361809047e-06, + "loss": 0.2336, + "step": 2350 + }, + { + "epoch": 1.6, + "grad_norm": 4.186766147613525, + "learning_rate": 9.811959798994976e-06, + "loss": 0.233, + "step": 2375 + }, + { + "epoch": 1.61, + "grad_norm": 4.362492084503174, + "learning_rate": 9.809447236180905e-06, + "loss": 0.2281, + "step": 2400 + }, + { + "epoch": 1.63, + "grad_norm": 4.026979446411133, + "learning_rate": 9.806934673366835e-06, + "loss": 0.227, + "step": 2425 + }, + { + "epoch": 1.65, + "grad_norm": 3.556326389312744, + "learning_rate": 9.804422110552764e-06, + "loss": 0.2331, + "step": 2450 + }, + { + "epoch": 1.66, + "grad_norm": 4.109285831451416, + "learning_rate": 9.801909547738693e-06, + "loss": 0.2369, + "step": 2475 + }, + { + "epoch": 1.68, + "grad_norm": 4.070573329925537, + "learning_rate": 9.799396984924624e-06, + "loss": 0.2307, + "step": 2500 + }, + { + "epoch": 1.7, + "grad_norm": 4.282459735870361, + "learning_rate": 9.796884422110554e-06, + "loss": 0.2276, + "step": 2525 + }, + { + "epoch": 1.71, + "grad_norm": 3.989485263824463, + "learning_rate": 9.794371859296483e-06, + "loss": 0.2325, + "step": 2550 + }, + { + "epoch": 1.73, + "grad_norm": 4.217010021209717, + "learning_rate": 9.791859296482414e-06, + "loss": 0.222, + "step": 2575 + }, + { + "epoch": 1.75, + "grad_norm": 4.2022199630737305, + "learning_rate": 9.789346733668342e-06, + "loss": 0.2281, + "step": 2600 + }, + { + "epoch": 1.76, + "grad_norm": 3.7386114597320557, + "learning_rate": 9.786834170854273e-06, + "loss": 0.2165, + "step": 2625 + }, + { + "epoch": 1.78, + "grad_norm": 4.048258304595947, + "learning_rate": 9.784321608040202e-06, + "loss": 0.2218, + "step": 2650 + }, + { + "epoch": 1.8, + "grad_norm": 4.0867133140563965, + "learning_rate": 9.781809045226131e-06, + "loss": 0.2197, + "step": 2675 + }, + { + "epoch": 1.81, + "grad_norm": 3.8676252365112305, + "learning_rate": 9.77929648241206e-06, + "loss": 0.2261, + "step": 2700 + }, + { + "epoch": 1.83, + "grad_norm": 3.8840291500091553, + "learning_rate": 9.77678391959799e-06, + "loss": 0.2188, + "step": 2725 + }, + { + "epoch": 1.85, + "grad_norm": 4.130185127258301, + "learning_rate": 9.774271356783921e-06, + "loss": 0.2176, + "step": 2750 + }, + { + "epoch": 1.86, + "grad_norm": 3.8641357421875, + "learning_rate": 9.77175879396985e-06, + "loss": 0.2165, + "step": 2775 + }, + { + "epoch": 1.88, + "grad_norm": 3.8261783123016357, + "learning_rate": 9.76924623115578e-06, + "loss": 0.2149, + "step": 2800 + }, + { + "epoch": 1.9, + "grad_norm": 3.861722946166992, + "learning_rate": 9.766733668341709e-06, + "loss": 0.2122, + "step": 2825 + }, + { + "epoch": 1.91, + "grad_norm": 4.013296127319336, + "learning_rate": 9.76422110552764e-06, + "loss": 0.2183, + "step": 2850 + }, + { + "epoch": 1.93, + "grad_norm": 3.78545880317688, + "learning_rate": 9.761708542713568e-06, + "loss": 0.2151, + "step": 2875 + }, + { + "epoch": 1.95, + "grad_norm": 4.247804641723633, + "learning_rate": 9.759195979899499e-06, + "loss": 0.2213, + "step": 2900 + }, + { + "epoch": 1.96, + "grad_norm": 4.548637390136719, + "learning_rate": 9.756683417085428e-06, + "loss": 0.2198, + "step": 2925 + }, + { + "epoch": 1.98, + "grad_norm": 3.617631435394287, + "learning_rate": 9.754170854271357e-06, + "loss": 0.2103, + "step": 2950 + }, + { + "epoch": 2.0, + "grad_norm": 3.9520044326782227, + "learning_rate": 9.751658291457288e-06, + "loss": 0.2053, + "step": 2975 + }, + { + "epoch": 2.01, + "grad_norm": 3.903465747833252, + "learning_rate": 9.749145728643216e-06, + "loss": 0.1961, + "step": 3000 + }, + { + "epoch": 2.01, + "eval_loss": 0.1471080482006073, + "eval_runtime": 533.9926, + "eval_samples_per_second": 2.596, + "eval_steps_per_second": 2.596, + "eval_wer": 33.365707222320324, + "step": 3000 + }, + { + "epoch": 2.03, + "grad_norm": 3.3298327922821045, + "learning_rate": 9.746633165829147e-06, + "loss": 0.1968, + "step": 3025 + }, + { + "epoch": 2.05, + "grad_norm": 3.907670259475708, + "learning_rate": 9.744120603015076e-06, + "loss": 0.1938, + "step": 3050 + }, + { + "epoch": 2.07, + "grad_norm": 3.819309711456299, + "learning_rate": 9.741608040201006e-06, + "loss": 0.1879, + "step": 3075 + }, + { + "epoch": 2.08, + "grad_norm": 4.644184112548828, + "learning_rate": 9.739095477386935e-06, + "loss": 0.1933, + "step": 3100 + }, + { + "epoch": 2.1, + "grad_norm": 3.5478782653808594, + "learning_rate": 9.736582914572866e-06, + "loss": 0.19, + "step": 3125 + }, + { + "epoch": 2.12, + "grad_norm": 3.4926066398620605, + "learning_rate": 9.734070351758794e-06, + "loss": 0.1929, + "step": 3150 + }, + { + "epoch": 2.13, + "grad_norm": 3.6318588256835938, + "learning_rate": 9.731557788944725e-06, + "loss": 0.1921, + "step": 3175 + }, + { + "epoch": 2.15, + "grad_norm": 4.020270824432373, + "learning_rate": 9.729045226130654e-06, + "loss": 0.19, + "step": 3200 + }, + { + "epoch": 2.17, + "grad_norm": 3.391878128051758, + "learning_rate": 9.726532663316583e-06, + "loss": 0.1912, + "step": 3225 + }, + { + "epoch": 2.18, + "grad_norm": 3.8649306297302246, + "learning_rate": 9.724020100502514e-06, + "loss": 0.1965, + "step": 3250 + }, + { + "epoch": 2.2, + "grad_norm": 3.8927695751190186, + "learning_rate": 9.721507537688444e-06, + "loss": 0.1901, + "step": 3275 + }, + { + "epoch": 2.22, + "grad_norm": 3.7473957538604736, + "learning_rate": 9.718994974874373e-06, + "loss": 0.1932, + "step": 3300 + }, + { + "epoch": 2.23, + "grad_norm": 3.2613677978515625, + "learning_rate": 9.716482412060302e-06, + "loss": 0.1925, + "step": 3325 + }, + { + "epoch": 2.25, + "grad_norm": 4.175868988037109, + "learning_rate": 9.713969849246232e-06, + "loss": 0.1898, + "step": 3350 + }, + { + "epoch": 2.27, + "grad_norm": 4.236743450164795, + "learning_rate": 9.711457286432163e-06, + "loss": 0.1899, + "step": 3375 + }, + { + "epoch": 2.28, + "grad_norm": 4.136856555938721, + "learning_rate": 9.708944723618092e-06, + "loss": 0.1912, + "step": 3400 + }, + { + "epoch": 2.3, + "grad_norm": 3.826167345046997, + "learning_rate": 9.706432160804021e-06, + "loss": 0.1909, + "step": 3425 + }, + { + "epoch": 2.32, + "grad_norm": 3.949150323867798, + "learning_rate": 9.70391959798995e-06, + "loss": 0.1889, + "step": 3450 + }, + { + "epoch": 2.33, + "grad_norm": 4.023538589477539, + "learning_rate": 9.70140703517588e-06, + "loss": 0.1903, + "step": 3475 + }, + { + "epoch": 2.35, + "grad_norm": 3.7844576835632324, + "learning_rate": 9.698894472361809e-06, + "loss": 0.1928, + "step": 3500 + }, + { + "epoch": 2.37, + "grad_norm": 3.364312171936035, + "learning_rate": 9.69638190954774e-06, + "loss": 0.193, + "step": 3525 + }, + { + "epoch": 2.38, + "grad_norm": 3.4202849864959717, + "learning_rate": 9.69386934673367e-06, + "loss": 0.186, + "step": 3550 + }, + { + "epoch": 2.4, + "grad_norm": 3.6285476684570312, + "learning_rate": 9.691356783919599e-06, + "loss": 0.1863, + "step": 3575 + }, + { + "epoch": 2.42, + "grad_norm": 4.26074743270874, + "learning_rate": 9.688844221105528e-06, + "loss": 0.1831, + "step": 3600 + }, + { + "epoch": 2.43, + "grad_norm": 3.6059014797210693, + "learning_rate": 9.686331658291457e-06, + "loss": 0.182, + "step": 3625 + }, + { + "epoch": 2.45, + "grad_norm": 3.773573637008667, + "learning_rate": 9.683819095477388e-06, + "loss": 0.1824, + "step": 3650 + }, + { + "epoch": 2.47, + "grad_norm": 4.112974643707275, + "learning_rate": 9.681306532663318e-06, + "loss": 0.1828, + "step": 3675 + }, + { + "epoch": 2.48, + "grad_norm": 3.467885732650757, + "learning_rate": 9.678793969849247e-06, + "loss": 0.1819, + "step": 3700 + }, + { + "epoch": 2.5, + "grad_norm": 3.418673038482666, + "learning_rate": 9.676281407035176e-06, + "loss": 0.1831, + "step": 3725 + }, + { + "epoch": 2.52, + "grad_norm": 4.263250350952148, + "learning_rate": 9.673768844221106e-06, + "loss": 0.186, + "step": 3750 + }, + { + "epoch": 2.54, + "grad_norm": 3.5524044036865234, + "learning_rate": 9.671256281407035e-06, + "loss": 0.1889, + "step": 3775 + }, + { + "epoch": 2.55, + "grad_norm": 3.693559408187866, + "learning_rate": 9.668743718592966e-06, + "loss": 0.1842, + "step": 3800 + }, + { + "epoch": 2.57, + "grad_norm": 3.682617425918579, + "learning_rate": 9.666231155778895e-06, + "loss": 0.1826, + "step": 3825 + }, + { + "epoch": 2.59, + "grad_norm": 3.4766149520874023, + "learning_rate": 9.663718592964825e-06, + "loss": 0.1766, + "step": 3850 + }, + { + "epoch": 2.6, + "grad_norm": 3.3245768547058105, + "learning_rate": 9.661206030150754e-06, + "loss": 0.1816, + "step": 3875 + }, + { + "epoch": 2.62, + "grad_norm": 4.028345584869385, + "learning_rate": 9.658693467336683e-06, + "loss": 0.1803, + "step": 3900 + }, + { + "epoch": 2.64, + "grad_norm": 3.665334463119507, + "learning_rate": 9.656180904522614e-06, + "loss": 0.178, + "step": 3925 + }, + { + "epoch": 2.65, + "grad_norm": 4.212314128875732, + "learning_rate": 9.653668341708544e-06, + "loss": 0.1818, + "step": 3950 + }, + { + "epoch": 2.67, + "grad_norm": 4.093043804168701, + "learning_rate": 9.651155778894473e-06, + "loss": 0.1784, + "step": 3975 + }, + { + "epoch": 2.69, + "grad_norm": 3.8363521099090576, + "learning_rate": 9.648643216080404e-06, + "loss": 0.1786, + "step": 4000 + }, + { + "epoch": 2.69, + "eval_loss": 0.1332446187734604, + "eval_runtime": 541.0077, + "eval_samples_per_second": 2.562, + "eval_steps_per_second": 2.562, + "eval_wer": 30.64630054741303, + "step": 4000 + }, + { + "epoch": 2.7, + "grad_norm": 3.6020116806030273, + "learning_rate": 9.646130653266332e-06, + "loss": 0.1755, + "step": 4025 + }, + { + "epoch": 2.72, + "grad_norm": 3.253662586212158, + "learning_rate": 9.643618090452263e-06, + "loss": 0.1767, + "step": 4050 + }, + { + "epoch": 2.74, + "grad_norm": 3.3012306690216064, + "learning_rate": 9.641105527638192e-06, + "loss": 0.1813, + "step": 4075 + }, + { + "epoch": 2.75, + "grad_norm": 3.7597391605377197, + "learning_rate": 9.638592964824121e-06, + "loss": 0.1806, + "step": 4100 + }, + { + "epoch": 2.77, + "grad_norm": 3.914498805999756, + "learning_rate": 9.63608040201005e-06, + "loss": 0.1781, + "step": 4125 + }, + { + "epoch": 2.79, + "grad_norm": 3.5466084480285645, + "learning_rate": 9.63356783919598e-06, + "loss": 0.1784, + "step": 4150 + }, + { + "epoch": 2.8, + "grad_norm": 3.5035176277160645, + "learning_rate": 9.63105527638191e-06, + "loss": 0.1693, + "step": 4175 + }, + { + "epoch": 2.82, + "grad_norm": 3.6211013793945312, + "learning_rate": 9.62854271356784e-06, + "loss": 0.1831, + "step": 4200 + }, + { + "epoch": 2.84, + "grad_norm": 3.355555772781372, + "learning_rate": 9.62603015075377e-06, + "loss": 0.1737, + "step": 4225 + }, + { + "epoch": 2.85, + "grad_norm": 4.14884614944458, + "learning_rate": 9.623517587939699e-06, + "loss": 0.1773, + "step": 4250 + }, + { + "epoch": 2.87, + "grad_norm": 3.896099805831909, + "learning_rate": 9.62100502512563e-06, + "loss": 0.1712, + "step": 4275 + }, + { + "epoch": 2.89, + "grad_norm": 3.69228196144104, + "learning_rate": 9.618492462311558e-06, + "loss": 0.1747, + "step": 4300 + }, + { + "epoch": 2.9, + "grad_norm": 3.4385323524475098, + "learning_rate": 9.615979899497489e-06, + "loss": 0.1806, + "step": 4325 + }, + { + "epoch": 2.92, + "grad_norm": 4.37261438369751, + "learning_rate": 9.613467336683418e-06, + "loss": 0.1705, + "step": 4350 + }, + { + "epoch": 2.94, + "grad_norm": 3.549129009246826, + "learning_rate": 9.610954773869347e-06, + "loss": 0.1683, + "step": 4375 + }, + { + "epoch": 2.96, + "grad_norm": 3.8860154151916504, + "learning_rate": 9.608442211055277e-06, + "loss": 0.1732, + "step": 4400 + }, + { + "epoch": 2.97, + "grad_norm": 3.8280348777770996, + "learning_rate": 9.605929648241206e-06, + "loss": 0.1739, + "step": 4425 + }, + { + "epoch": 2.99, + "grad_norm": 3.9021239280700684, + "learning_rate": 9.603417085427137e-06, + "loss": 0.1744, + "step": 4450 + }, + { + "epoch": 3.01, + "grad_norm": 3.436377763748169, + "learning_rate": 9.600904522613066e-06, + "loss": 0.1674, + "step": 4475 + }, + { + "epoch": 3.02, + "grad_norm": 3.5898520946502686, + "learning_rate": 9.598391959798996e-06, + "loss": 0.1546, + "step": 4500 + }, + { + "epoch": 3.04, + "grad_norm": 3.295307159423828, + "learning_rate": 9.595879396984925e-06, + "loss": 0.1543, + "step": 4525 + }, + { + "epoch": 3.06, + "grad_norm": 3.3402857780456543, + "learning_rate": 9.593366834170856e-06, + "loss": 0.1585, + "step": 4550 + }, + { + "epoch": 3.07, + "grad_norm": 3.4992740154266357, + "learning_rate": 9.590854271356784e-06, + "loss": 0.1584, + "step": 4575 + }, + { + "epoch": 3.09, + "grad_norm": 3.301234245300293, + "learning_rate": 9.588341708542715e-06, + "loss": 0.1556, + "step": 4600 + }, + { + "epoch": 3.11, + "grad_norm": 3.2298898696899414, + "learning_rate": 9.585829145728644e-06, + "loss": 0.1557, + "step": 4625 + }, + { + "epoch": 3.12, + "grad_norm": 3.81208872795105, + "learning_rate": 9.583316582914573e-06, + "loss": 0.1488, + "step": 4650 + }, + { + "epoch": 3.14, + "grad_norm": 3.7610058784484863, + "learning_rate": 9.580804020100504e-06, + "loss": 0.1537, + "step": 4675 + }, + { + "epoch": 3.16, + "grad_norm": 3.394169569015503, + "learning_rate": 9.578291457286432e-06, + "loss": 0.1546, + "step": 4700 + }, + { + "epoch": 3.17, + "grad_norm": 3.5936498641967773, + "learning_rate": 9.575778894472363e-06, + "loss": 0.1544, + "step": 4725 + }, + { + "epoch": 3.19, + "grad_norm": 3.714808464050293, + "learning_rate": 9.573266331658292e-06, + "loss": 0.1554, + "step": 4750 + }, + { + "epoch": 3.21, + "grad_norm": 3.731008768081665, + "learning_rate": 9.570753768844222e-06, + "loss": 0.157, + "step": 4775 + }, + { + "epoch": 3.22, + "grad_norm": 3.4987032413482666, + "learning_rate": 9.568241206030151e-06, + "loss": 0.1506, + "step": 4800 + }, + { + "epoch": 3.24, + "grad_norm": 3.487567186355591, + "learning_rate": 9.565728643216082e-06, + "loss": 0.1553, + "step": 4825 + }, + { + "epoch": 3.26, + "grad_norm": 3.537971258163452, + "learning_rate": 9.563216080402011e-06, + "loss": 0.1518, + "step": 4850 + }, + { + "epoch": 3.27, + "grad_norm": 3.5595390796661377, + "learning_rate": 9.56070351758794e-06, + "loss": 0.1519, + "step": 4875 + }, + { + "epoch": 3.29, + "grad_norm": 3.397580146789551, + "learning_rate": 9.55819095477387e-06, + "loss": 0.1516, + "step": 4900 + }, + { + "epoch": 3.31, + "grad_norm": 3.758497714996338, + "learning_rate": 9.5556783919598e-06, + "loss": 0.1562, + "step": 4925 + }, + { + "epoch": 3.32, + "grad_norm": 3.1611812114715576, + "learning_rate": 9.55316582914573e-06, + "loss": 0.1522, + "step": 4950 + }, + { + "epoch": 3.34, + "grad_norm": 3.4654600620269775, + "learning_rate": 9.550653266331658e-06, + "loss": 0.1544, + "step": 4975 + }, + { + "epoch": 3.36, + "grad_norm": 3.0207717418670654, + "learning_rate": 9.548140703517589e-06, + "loss": 0.1574, + "step": 5000 + }, + { + "epoch": 3.36, + "eval_loss": 0.1250012367963791, + "eval_runtime": 532.3886, + "eval_samples_per_second": 2.603, + "eval_steps_per_second": 2.603, + "eval_wer": 28.50962387427159, + "step": 5000 + }, + { + "epoch": 3.37, + "grad_norm": 3.4317171573638916, + "learning_rate": 9.545628140703518e-06, + "loss": 0.1563, + "step": 5025 + }, + { + "epoch": 3.39, + "grad_norm": 3.7062742710113525, + "learning_rate": 9.543115577889448e-06, + "loss": 0.1559, + "step": 5050 + }, + { + "epoch": 3.41, + "grad_norm": 3.580521821975708, + "learning_rate": 9.540603015075379e-06, + "loss": 0.1521, + "step": 5075 + }, + { + "epoch": 3.43, + "grad_norm": 3.364760160446167, + "learning_rate": 9.538090452261308e-06, + "loss": 0.1637, + "step": 5100 + }, + { + "epoch": 3.44, + "grad_norm": 3.812782049179077, + "learning_rate": 9.535577889447237e-06, + "loss": 0.1564, + "step": 5125 + }, + { + "epoch": 3.46, + "grad_norm": 3.065197229385376, + "learning_rate": 9.533065326633166e-06, + "loss": 0.1484, + "step": 5150 + }, + { + "epoch": 3.48, + "grad_norm": 3.456214427947998, + "learning_rate": 9.530552763819096e-06, + "loss": 0.1517, + "step": 5175 + }, + { + "epoch": 3.49, + "grad_norm": 3.731849193572998, + "learning_rate": 9.528040201005025e-06, + "loss": 0.1499, + "step": 5200 + }, + { + "epoch": 3.51, + "grad_norm": 4.133150577545166, + "learning_rate": 9.525527638190956e-06, + "loss": 0.1475, + "step": 5225 + }, + { + "epoch": 3.53, + "grad_norm": 3.353069543838501, + "learning_rate": 9.523015075376885e-06, + "loss": 0.1479, + "step": 5250 + }, + { + "epoch": 3.54, + "grad_norm": 3.885782480239868, + "learning_rate": 9.520502512562815e-06, + "loss": 0.1458, + "step": 5275 + }, + { + "epoch": 3.56, + "grad_norm": 3.5367889404296875, + "learning_rate": 9.517989949748744e-06, + "loss": 0.1531, + "step": 5300 + }, + { + "epoch": 3.58, + "grad_norm": 3.8021907806396484, + "learning_rate": 9.515477386934673e-06, + "loss": 0.1498, + "step": 5325 + }, + { + "epoch": 3.59, + "grad_norm": 3.308176279067993, + "learning_rate": 9.512964824120604e-06, + "loss": 0.1519, + "step": 5350 + }, + { + "epoch": 3.61, + "grad_norm": 3.668410539627075, + "learning_rate": 9.510452261306534e-06, + "loss": 0.1526, + "step": 5375 + }, + { + "epoch": 3.63, + "grad_norm": 3.228257417678833, + "learning_rate": 9.507939698492463e-06, + "loss": 0.1509, + "step": 5400 + }, + { + "epoch": 3.64, + "grad_norm": 3.433962345123291, + "learning_rate": 9.505427135678392e-06, + "loss": 0.1521, + "step": 5425 + }, + { + "epoch": 3.66, + "grad_norm": 3.707969903945923, + "learning_rate": 9.502914572864322e-06, + "loss": 0.1439, + "step": 5450 + }, + { + "epoch": 3.68, + "grad_norm": 3.447314739227295, + "learning_rate": 9.500402010050253e-06, + "loss": 0.1473, + "step": 5475 + }, + { + "epoch": 3.69, + "grad_norm": 3.579751968383789, + "learning_rate": 9.497889447236182e-06, + "loss": 0.1507, + "step": 5500 + }, + { + "epoch": 3.71, + "grad_norm": 3.470454454421997, + "learning_rate": 9.495376884422111e-06, + "loss": 0.1473, + "step": 5525 + }, + { + "epoch": 3.73, + "grad_norm": 3.2754967212677, + "learning_rate": 9.49286432160804e-06, + "loss": 0.1458, + "step": 5550 + }, + { + "epoch": 3.74, + "grad_norm": 3.724622964859009, + "learning_rate": 9.49035175879397e-06, + "loss": 0.1474, + "step": 5575 + }, + { + "epoch": 3.76, + "grad_norm": 3.176765203475952, + "learning_rate": 9.4878391959799e-06, + "loss": 0.1484, + "step": 5600 + }, + { + "epoch": 3.78, + "grad_norm": 3.3496909141540527, + "learning_rate": 9.48532663316583e-06, + "loss": 0.1485, + "step": 5625 + }, + { + "epoch": 3.79, + "grad_norm": 3.3814542293548584, + "learning_rate": 9.48281407035176e-06, + "loss": 0.1491, + "step": 5650 + }, + { + "epoch": 3.81, + "grad_norm": 3.9236228466033936, + "learning_rate": 9.480301507537689e-06, + "loss": 0.1522, + "step": 5675 + }, + { + "epoch": 3.83, + "grad_norm": 3.3441123962402344, + "learning_rate": 9.47778894472362e-06, + "loss": 0.1442, + "step": 5700 + }, + { + "epoch": 3.84, + "grad_norm": 3.3952231407165527, + "learning_rate": 9.475276381909548e-06, + "loss": 0.1487, + "step": 5725 + }, + { + "epoch": 3.86, + "grad_norm": 3.1410765647888184, + "learning_rate": 9.472763819095479e-06, + "loss": 0.1522, + "step": 5750 + }, + { + "epoch": 3.88, + "grad_norm": 3.267335891723633, + "learning_rate": 9.470251256281408e-06, + "loss": 0.1425, + "step": 5775 + }, + { + "epoch": 3.9, + "grad_norm": 3.547773838043213, + "learning_rate": 9.467738693467337e-06, + "loss": 0.1416, + "step": 5800 + }, + { + "epoch": 3.91, + "grad_norm": 3.4462673664093018, + "learning_rate": 9.465226130653267e-06, + "loss": 0.1453, + "step": 5825 + }, + { + "epoch": 3.93, + "grad_norm": 3.4584672451019287, + "learning_rate": 9.462713567839196e-06, + "loss": 0.1445, + "step": 5850 + }, + { + "epoch": 3.95, + "grad_norm": 3.501403331756592, + "learning_rate": 9.460201005025127e-06, + "loss": 0.1423, + "step": 5875 + }, + { + "epoch": 3.96, + "grad_norm": 3.912052631378174, + "learning_rate": 9.457688442211056e-06, + "loss": 0.1481, + "step": 5900 + }, + { + "epoch": 3.98, + "grad_norm": 3.257798433303833, + "learning_rate": 9.455175879396986e-06, + "loss": 0.1385, + "step": 5925 + }, + { + "epoch": 4.0, + "grad_norm": 3.7633163928985596, + "learning_rate": 9.452663316582915e-06, + "loss": 0.1444, + "step": 5950 + }, + { + "epoch": 4.01, + "grad_norm": 3.1884329319000244, + "learning_rate": 9.450150753768846e-06, + "loss": 0.1314, + "step": 5975 + }, + { + "epoch": 4.03, + "grad_norm": 3.5105104446411133, + "learning_rate": 9.447638190954774e-06, + "loss": 0.1284, + "step": 6000 + }, + { + "epoch": 4.03, + "eval_loss": 0.11968862265348434, + "eval_runtime": 533.7582, + "eval_samples_per_second": 2.597, + "eval_steps_per_second": 2.597, + "eval_wer": 27.644358114073814, + "step": 6000 + }, + { + "epoch": 4.05, + "grad_norm": 3.4444563388824463, + "learning_rate": 9.445125628140705e-06, + "loss": 0.1278, + "step": 6025 + }, + { + "epoch": 4.06, + "grad_norm": 3.446941375732422, + "learning_rate": 9.442613065326634e-06, + "loss": 0.1316, + "step": 6050 + }, + { + "epoch": 4.08, + "grad_norm": 3.251770496368408, + "learning_rate": 9.440100502512563e-06, + "loss": 0.1289, + "step": 6075 + }, + { + "epoch": 4.1, + "grad_norm": 3.1929450035095215, + "learning_rate": 9.437587939698494e-06, + "loss": 0.1257, + "step": 6100 + }, + { + "epoch": 4.11, + "grad_norm": 3.137993097305298, + "learning_rate": 9.435075376884422e-06, + "loss": 0.1259, + "step": 6125 + }, + { + "epoch": 4.13, + "grad_norm": 3.5924248695373535, + "learning_rate": 9.432562814070353e-06, + "loss": 0.1246, + "step": 6150 + }, + { + "epoch": 4.15, + "grad_norm": 3.7657840251922607, + "learning_rate": 9.430050251256282e-06, + "loss": 0.1263, + "step": 6175 + }, + { + "epoch": 4.16, + "grad_norm": 3.8803839683532715, + "learning_rate": 9.427537688442212e-06, + "loss": 0.1278, + "step": 6200 + }, + { + "epoch": 4.18, + "grad_norm": 3.049147844314575, + "learning_rate": 9.425025125628141e-06, + "loss": 0.1248, + "step": 6225 + }, + { + "epoch": 4.2, + "grad_norm": 3.5847809314727783, + "learning_rate": 9.422512562814072e-06, + "loss": 0.1326, + "step": 6250 + }, + { + "epoch": 4.21, + "grad_norm": 3.208193063735962, + "learning_rate": 9.42e-06, + "loss": 0.1278, + "step": 6275 + }, + { + "epoch": 4.23, + "grad_norm": 3.787940740585327, + "learning_rate": 9.41748743718593e-06, + "loss": 0.1286, + "step": 6300 + }, + { + "epoch": 4.25, + "grad_norm": 2.801053762435913, + "learning_rate": 9.41497487437186e-06, + "loss": 0.1309, + "step": 6325 + }, + { + "epoch": 4.26, + "grad_norm": 3.1014838218688965, + "learning_rate": 9.41246231155779e-06, + "loss": 0.1265, + "step": 6350 + }, + { + "epoch": 4.28, + "grad_norm": 3.3319690227508545, + "learning_rate": 9.40994974874372e-06, + "loss": 0.1273, + "step": 6375 + }, + { + "epoch": 4.3, + "grad_norm": 3.366464376449585, + "learning_rate": 9.407437185929648e-06, + "loss": 0.1266, + "step": 6400 + }, + { + "epoch": 4.31, + "grad_norm": 3.5356907844543457, + "learning_rate": 9.404924623115579e-06, + "loss": 0.13, + "step": 6425 + }, + { + "epoch": 4.33, + "grad_norm": 3.325680732727051, + "learning_rate": 9.402412060301508e-06, + "loss": 0.1262, + "step": 6450 + }, + { + "epoch": 4.35, + "grad_norm": 3.4266843795776367, + "learning_rate": 9.399899497487438e-06, + "loss": 0.1284, + "step": 6475 + }, + { + "epoch": 4.37, + "grad_norm": 3.1395492553710938, + "learning_rate": 9.397386934673369e-06, + "loss": 0.1276, + "step": 6500 + }, + { + "epoch": 4.38, + "grad_norm": 3.323065757751465, + "learning_rate": 9.394874371859298e-06, + "loss": 0.1246, + "step": 6525 + }, + { + "epoch": 4.4, + "grad_norm": 3.3577070236206055, + "learning_rate": 9.392361809045227e-06, + "loss": 0.1326, + "step": 6550 + }, + { + "epoch": 4.42, + "grad_norm": 3.4483211040496826, + "learning_rate": 9.389849246231157e-06, + "loss": 0.1287, + "step": 6575 + }, + { + "epoch": 4.43, + "grad_norm": 3.939202308654785, + "learning_rate": 9.387336683417086e-06, + "loss": 0.1295, + "step": 6600 + }, + { + "epoch": 4.45, + "grad_norm": 3.5882346630096436, + "learning_rate": 9.384824120603015e-06, + "loss": 0.1257, + "step": 6625 + }, + { + "epoch": 4.47, + "grad_norm": 3.9268131256103516, + "learning_rate": 9.382311557788946e-06, + "loss": 0.1308, + "step": 6650 + }, + { + "epoch": 4.48, + "grad_norm": 3.2181479930877686, + "learning_rate": 9.379798994974874e-06, + "loss": 0.1289, + "step": 6675 + }, + { + "epoch": 4.5, + "grad_norm": 3.4467923641204834, + "learning_rate": 9.377286432160805e-06, + "loss": 0.1286, + "step": 6700 + }, + { + "epoch": 4.52, + "grad_norm": 3.227398157119751, + "learning_rate": 9.374773869346734e-06, + "loss": 0.1279, + "step": 6725 + }, + { + "epoch": 4.53, + "grad_norm": 3.5086820125579834, + "learning_rate": 9.372261306532664e-06, + "loss": 0.1295, + "step": 6750 + }, + { + "epoch": 4.55, + "grad_norm": 3.0712101459503174, + "learning_rate": 9.369748743718595e-06, + "loss": 0.1285, + "step": 6775 + }, + { + "epoch": 4.57, + "grad_norm": 3.3961784839630127, + "learning_rate": 9.367236180904524e-06, + "loss": 0.1311, + "step": 6800 + }, + { + "epoch": 4.58, + "grad_norm": 3.6800429821014404, + "learning_rate": 9.364723618090453e-06, + "loss": 0.1284, + "step": 6825 + }, + { + "epoch": 4.6, + "grad_norm": 3.6793227195739746, + "learning_rate": 9.362211055276383e-06, + "loss": 0.1311, + "step": 6850 + }, + { + "epoch": 4.62, + "grad_norm": 3.1020681858062744, + "learning_rate": 9.359698492462312e-06, + "loss": 0.1287, + "step": 6875 + }, + { + "epoch": 4.63, + "grad_norm": 3.738802909851074, + "learning_rate": 9.357185929648241e-06, + "loss": 0.1241, + "step": 6900 + }, + { + "epoch": 4.65, + "grad_norm": 3.344667911529541, + "learning_rate": 9.354673366834172e-06, + "loss": 0.1247, + "step": 6925 + }, + { + "epoch": 4.67, + "grad_norm": 3.011655330657959, + "learning_rate": 9.352160804020101e-06, + "loss": 0.1237, + "step": 6950 + }, + { + "epoch": 4.68, + "grad_norm": 3.486971139907837, + "learning_rate": 9.34964824120603e-06, + "loss": 0.1275, + "step": 6975 + }, + { + "epoch": 4.7, + "grad_norm": 3.265568971633911, + "learning_rate": 9.34713567839196e-06, + "loss": 0.1216, + "step": 7000 + }, + { + "epoch": 4.7, + "eval_loss": 0.11660390347242355, + "eval_runtime": 534.0823, + "eval_samples_per_second": 2.595, + "eval_steps_per_second": 2.595, + "eval_wer": 26.823238566131025, + "step": 7000 + }, + { + "epoch": 4.72, + "grad_norm": 3.0894901752471924, + "learning_rate": 9.34462311557789e-06, + "loss": 0.1258, + "step": 7025 + }, + { + "epoch": 4.73, + "grad_norm": 3.5530054569244385, + "learning_rate": 9.34211055276382e-06, + "loss": 0.1294, + "step": 7050 + }, + { + "epoch": 4.75, + "grad_norm": 3.127763271331787, + "learning_rate": 9.33959798994975e-06, + "loss": 0.129, + "step": 7075 + }, + { + "epoch": 4.77, + "grad_norm": 3.453204393386841, + "learning_rate": 9.337085427135679e-06, + "loss": 0.1264, + "step": 7100 + }, + { + "epoch": 4.79, + "grad_norm": 3.470991611480713, + "learning_rate": 9.334572864321608e-06, + "loss": 0.1272, + "step": 7125 + }, + { + "epoch": 4.8, + "grad_norm": 3.498213768005371, + "learning_rate": 9.332060301507538e-06, + "loss": 0.1257, + "step": 7150 + }, + { + "epoch": 4.82, + "grad_norm": 3.052225351333618, + "learning_rate": 9.329547738693469e-06, + "loss": 0.1242, + "step": 7175 + }, + { + "epoch": 4.84, + "grad_norm": 2.9512875080108643, + "learning_rate": 9.327035175879398e-06, + "loss": 0.1226, + "step": 7200 + }, + { + "epoch": 4.85, + "grad_norm": 3.124257802963257, + "learning_rate": 9.324522613065327e-06, + "loss": 0.1276, + "step": 7225 + }, + { + "epoch": 4.87, + "grad_norm": 3.763948678970337, + "learning_rate": 9.322010050251257e-06, + "loss": 0.123, + "step": 7250 + }, + { + "epoch": 4.89, + "grad_norm": 3.859360694885254, + "learning_rate": 9.319497487437186e-06, + "loss": 0.1288, + "step": 7275 + }, + { + "epoch": 4.9, + "grad_norm": 3.406261682510376, + "learning_rate": 9.316984924623115e-06, + "loss": 0.1268, + "step": 7300 + }, + { + "epoch": 4.92, + "grad_norm": 3.5981762409210205, + "learning_rate": 9.314472361809046e-06, + "loss": 0.1267, + "step": 7325 + }, + { + "epoch": 4.94, + "grad_norm": 3.2677414417266846, + "learning_rate": 9.311959798994976e-06, + "loss": 0.1228, + "step": 7350 + }, + { + "epoch": 4.95, + "grad_norm": 3.4176025390625, + "learning_rate": 9.309447236180905e-06, + "loss": 0.1292, + "step": 7375 + }, + { + "epoch": 4.97, + "grad_norm": 3.702085018157959, + "learning_rate": 9.306934673366836e-06, + "loss": 0.1212, + "step": 7400 + }, + { + "epoch": 4.99, + "grad_norm": 3.075143337249756, + "learning_rate": 9.304422110552764e-06, + "loss": 0.1208, + "step": 7425 + }, + { + "epoch": 5.0, + "grad_norm": 2.96437406539917, + "learning_rate": 9.301909547738695e-06, + "loss": 0.1223, + "step": 7450 + }, + { + "epoch": 5.02, + "grad_norm": 3.359867572784424, + "learning_rate": 9.299396984924624e-06, + "loss": 0.1083, + "step": 7475 + }, + { + "epoch": 5.04, + "grad_norm": 3.1340601444244385, + "learning_rate": 9.296884422110553e-06, + "loss": 0.1088, + "step": 7500 + }, + { + "epoch": 5.05, + "grad_norm": 3.4933323860168457, + "learning_rate": 9.294371859296483e-06, + "loss": 0.1103, + "step": 7525 + }, + { + "epoch": 5.07, + "grad_norm": 2.8419055938720703, + "learning_rate": 9.291859296482412e-06, + "loss": 0.1074, + "step": 7550 + }, + { + "epoch": 5.09, + "grad_norm": 2.699908971786499, + "learning_rate": 9.289346733668343e-06, + "loss": 0.1074, + "step": 7575 + }, + { + "epoch": 5.1, + "grad_norm": 3.4752280712127686, + "learning_rate": 9.286834170854272e-06, + "loss": 0.1074, + "step": 7600 + }, + { + "epoch": 5.12, + "grad_norm": 3.5037472248077393, + "learning_rate": 9.284321608040202e-06, + "loss": 0.1114, + "step": 7625 + }, + { + "epoch": 5.14, + "grad_norm": 3.3195717334747314, + "learning_rate": 9.281809045226131e-06, + "loss": 0.1111, + "step": 7650 + }, + { + "epoch": 5.15, + "grad_norm": 3.210256338119507, + "learning_rate": 9.279296482412062e-06, + "loss": 0.1095, + "step": 7675 + }, + { + "epoch": 5.17, + "grad_norm": 3.4619410037994385, + "learning_rate": 9.27678391959799e-06, + "loss": 0.1118, + "step": 7700 + }, + { + "epoch": 5.19, + "grad_norm": 3.2132604122161865, + "learning_rate": 9.27427135678392e-06, + "loss": 0.1065, + "step": 7725 + }, + { + "epoch": 5.2, + "grad_norm": 3.379657030105591, + "learning_rate": 9.27175879396985e-06, + "loss": 0.1121, + "step": 7750 + }, + { + "epoch": 5.22, + "grad_norm": 3.6748008728027344, + "learning_rate": 9.26924623115578e-06, + "loss": 0.1062, + "step": 7775 + }, + { + "epoch": 5.24, + "grad_norm": 3.063694715499878, + "learning_rate": 9.26673366834171e-06, + "loss": 0.1077, + "step": 7800 + }, + { + "epoch": 5.26, + "grad_norm": 3.237032413482666, + "learning_rate": 9.264221105527638e-06, + "loss": 0.1079, + "step": 7825 + }, + { + "epoch": 5.27, + "grad_norm": 3.2364072799682617, + "learning_rate": 9.261708542713569e-06, + "loss": 0.1075, + "step": 7850 + }, + { + "epoch": 5.29, + "grad_norm": 3.2186496257781982, + "learning_rate": 9.259195979899498e-06, + "loss": 0.107, + "step": 7875 + }, + { + "epoch": 5.31, + "grad_norm": 3.249338150024414, + "learning_rate": 9.256683417085428e-06, + "loss": 0.1101, + "step": 7900 + }, + { + "epoch": 5.32, + "grad_norm": 2.9037253856658936, + "learning_rate": 9.254170854271357e-06, + "loss": 0.1049, + "step": 7925 + }, + { + "epoch": 5.34, + "grad_norm": 3.467984914779663, + "learning_rate": 9.251658291457288e-06, + "loss": 0.1164, + "step": 7950 + }, + { + "epoch": 5.36, + "grad_norm": 3.047340154647827, + "learning_rate": 9.249145728643217e-06, + "loss": 0.1079, + "step": 7975 + }, + { + "epoch": 5.37, + "grad_norm": 3.2782435417175293, + "learning_rate": 9.246633165829147e-06, + "loss": 0.1063, + "step": 8000 + }, + { + "epoch": 5.37, + "eval_loss": 0.11793605983257294, + "eval_runtime": 535.0689, + "eval_samples_per_second": 2.59, + "eval_steps_per_second": 2.59, + "eval_wer": 27.19406674907293, + "step": 8000 + }, + { + "epoch": 5.39, + "grad_norm": 3.045055389404297, + "learning_rate": 9.244120603015076e-06, + "loss": 0.1068, + "step": 8025 + }, + { + "epoch": 5.41, + "grad_norm": 3.3729374408721924, + "learning_rate": 9.241608040201005e-06, + "loss": 0.1097, + "step": 8050 + }, + { + "epoch": 5.42, + "grad_norm": 3.581709861755371, + "learning_rate": 9.239095477386936e-06, + "loss": 0.109, + "step": 8075 + }, + { + "epoch": 5.44, + "grad_norm": 3.690354585647583, + "learning_rate": 9.236582914572864e-06, + "loss": 0.1105, + "step": 8100 + }, + { + "epoch": 5.46, + "grad_norm": 3.4395689964294434, + "learning_rate": 9.234070351758795e-06, + "loss": 0.1047, + "step": 8125 + }, + { + "epoch": 5.47, + "grad_norm": 3.5904619693756104, + "learning_rate": 9.231557788944724e-06, + "loss": 0.1098, + "step": 8150 + }, + { + "epoch": 5.49, + "grad_norm": 3.4449338912963867, + "learning_rate": 9.229045226130654e-06, + "loss": 0.1094, + "step": 8175 + }, + { + "epoch": 5.51, + "grad_norm": 3.081770181655884, + "learning_rate": 9.226532663316585e-06, + "loss": 0.1046, + "step": 8200 + }, + { + "epoch": 5.52, + "grad_norm": 3.2109663486480713, + "learning_rate": 9.224020100502514e-06, + "loss": 0.11, + "step": 8225 + }, + { + "epoch": 5.54, + "grad_norm": 3.8002219200134277, + "learning_rate": 9.221507537688443e-06, + "loss": 0.1103, + "step": 8250 + }, + { + "epoch": 5.56, + "grad_norm": 3.423508644104004, + "learning_rate": 9.218994974874373e-06, + "loss": 0.1046, + "step": 8275 + }, + { + "epoch": 5.57, + "grad_norm": 3.408816337585449, + "learning_rate": 9.216482412060302e-06, + "loss": 0.1137, + "step": 8300 + }, + { + "epoch": 5.59, + "grad_norm": 3.349015235900879, + "learning_rate": 9.213969849246231e-06, + "loss": 0.1063, + "step": 8325 + }, + { + "epoch": 5.61, + "grad_norm": 3.255462169647217, + "learning_rate": 9.211457286432162e-06, + "loss": 0.1081, + "step": 8350 + }, + { + "epoch": 5.62, + "grad_norm": 3.0760374069213867, + "learning_rate": 9.20894472361809e-06, + "loss": 0.1124, + "step": 8375 + }, + { + "epoch": 5.64, + "grad_norm": 3.469221830368042, + "learning_rate": 9.206432160804021e-06, + "loss": 0.1095, + "step": 8400 + }, + { + "epoch": 5.66, + "grad_norm": 3.20563006401062, + "learning_rate": 9.20391959798995e-06, + "loss": 0.1091, + "step": 8425 + }, + { + "epoch": 5.67, + "grad_norm": 3.58202862739563, + "learning_rate": 9.20140703517588e-06, + "loss": 0.1084, + "step": 8450 + }, + { + "epoch": 5.69, + "grad_norm": 3.2912611961364746, + "learning_rate": 9.19889447236181e-06, + "loss": 0.109, + "step": 8475 + }, + { + "epoch": 5.71, + "grad_norm": 3.2603135108947754, + "learning_rate": 9.19638190954774e-06, + "loss": 0.1051, + "step": 8500 + }, + { + "epoch": 5.73, + "grad_norm": 3.3398075103759766, + "learning_rate": 9.19386934673367e-06, + "loss": 0.1086, + "step": 8525 + }, + { + "epoch": 5.74, + "grad_norm": 3.480815887451172, + "learning_rate": 9.191356783919599e-06, + "loss": 0.1059, + "step": 8550 + }, + { + "epoch": 5.76, + "grad_norm": 3.1898598670959473, + "learning_rate": 9.188844221105528e-06, + "loss": 0.104, + "step": 8575 + }, + { + "epoch": 5.78, + "grad_norm": 3.3440845012664795, + "learning_rate": 9.186331658291459e-06, + "loss": 0.1126, + "step": 8600 + }, + { + "epoch": 5.79, + "grad_norm": 3.6762936115264893, + "learning_rate": 9.183819095477388e-06, + "loss": 0.1064, + "step": 8625 + }, + { + "epoch": 5.81, + "grad_norm": 3.66489315032959, + "learning_rate": 9.181306532663317e-06, + "loss": 0.1096, + "step": 8650 + }, + { + "epoch": 5.83, + "grad_norm": 3.3494789600372314, + "learning_rate": 9.178793969849247e-06, + "loss": 0.1111, + "step": 8675 + }, + { + "epoch": 5.84, + "grad_norm": 3.4388012886047363, + "learning_rate": 9.176281407035176e-06, + "loss": 0.1107, + "step": 8700 + }, + { + "epoch": 5.86, + "grad_norm": 3.2451605796813965, + "learning_rate": 9.173768844221105e-06, + "loss": 0.1089, + "step": 8725 + }, + { + "epoch": 5.88, + "grad_norm": 3.6606147289276123, + "learning_rate": 9.171256281407036e-06, + "loss": 0.1084, + "step": 8750 + }, + { + "epoch": 5.89, + "grad_norm": 3.2145121097564697, + "learning_rate": 9.168743718592966e-06, + "loss": 0.1063, + "step": 8775 + }, + { + "epoch": 5.91, + "grad_norm": 3.6518869400024414, + "learning_rate": 9.166231155778895e-06, + "loss": 0.1043, + "step": 8800 + }, + { + "epoch": 5.93, + "grad_norm": 2.9405784606933594, + "learning_rate": 9.163718592964826e-06, + "loss": 0.1117, + "step": 8825 + }, + { + "epoch": 5.94, + "grad_norm": 3.5626678466796875, + "learning_rate": 9.161206030150754e-06, + "loss": 0.1048, + "step": 8850 + }, + { + "epoch": 5.96, + "grad_norm": 3.2351441383361816, + "learning_rate": 9.158693467336685e-06, + "loss": 0.1093, + "step": 8875 + }, + { + "epoch": 5.98, + "grad_norm": 3.439530372619629, + "learning_rate": 9.156180904522614e-06, + "loss": 0.1073, + "step": 8900 + }, + { + "epoch": 5.99, + "grad_norm": 3.4655144214630127, + "learning_rate": 9.153668341708543e-06, + "loss": 0.1067, + "step": 8925 + }, + { + "epoch": 6.01, + "grad_norm": 2.794478178024292, + "learning_rate": 9.151155778894473e-06, + "loss": 0.0995, + "step": 8950 + }, + { + "epoch": 6.03, + "grad_norm": 3.5291810035705566, + "learning_rate": 9.148643216080402e-06, + "loss": 0.0925, + "step": 8975 + }, + { + "epoch": 6.04, + "grad_norm": 2.9376721382141113, + "learning_rate": 9.146130653266331e-06, + "loss": 0.0879, + "step": 9000 + }, + { + "epoch": 6.04, + "eval_loss": 0.1166120246052742, + "eval_runtime": 531.7233, + "eval_samples_per_second": 2.607, + "eval_steps_per_second": 2.607, + "eval_wer": 26.876214020837015, + "step": 9000 + }, + { + "epoch": 6.06, + "grad_norm": 3.121159076690674, + "learning_rate": 9.143618090452262e-06, + "loss": 0.0925, + "step": 9025 + }, + { + "epoch": 6.08, + "grad_norm": 3.190279722213745, + "learning_rate": 9.141105527638192e-06, + "loss": 0.0966, + "step": 9050 + }, + { + "epoch": 6.09, + "grad_norm": 2.9551713466644287, + "learning_rate": 9.138592964824121e-06, + "loss": 0.0927, + "step": 9075 + }, + { + "epoch": 6.11, + "grad_norm": 2.6916284561157227, + "learning_rate": 9.136080402010052e-06, + "loss": 0.0905, + "step": 9100 + }, + { + "epoch": 6.13, + "grad_norm": 3.1297528743743896, + "learning_rate": 9.13356783919598e-06, + "loss": 0.0966, + "step": 9125 + }, + { + "epoch": 6.15, + "grad_norm": 3.3253660202026367, + "learning_rate": 9.13105527638191e-06, + "loss": 0.0977, + "step": 9150 + }, + { + "epoch": 6.16, + "grad_norm": 3.1732029914855957, + "learning_rate": 9.12854271356784e-06, + "loss": 0.0943, + "step": 9175 + }, + { + "epoch": 6.18, + "grad_norm": 3.00846791267395, + "learning_rate": 9.12603015075377e-06, + "loss": 0.095, + "step": 9200 + }, + { + "epoch": 6.2, + "grad_norm": 3.4318153858184814, + "learning_rate": 9.1235175879397e-06, + "loss": 0.0918, + "step": 9225 + }, + { + "epoch": 6.21, + "grad_norm": 2.615586519241333, + "learning_rate": 9.121005025125628e-06, + "loss": 0.0918, + "step": 9250 + }, + { + "epoch": 6.23, + "grad_norm": 3.2654173374176025, + "learning_rate": 9.118492462311559e-06, + "loss": 0.0892, + "step": 9275 + }, + { + "epoch": 6.25, + "grad_norm": 3.255948066711426, + "learning_rate": 9.115979899497488e-06, + "loss": 0.0955, + "step": 9300 + }, + { + "epoch": 6.26, + "grad_norm": 3.593632221221924, + "learning_rate": 9.113467336683418e-06, + "loss": 0.0951, + "step": 9325 + }, + { + "epoch": 6.28, + "grad_norm": 3.3398244380950928, + "learning_rate": 9.110954773869347e-06, + "loss": 0.0926, + "step": 9350 + }, + { + "epoch": 6.3, + "grad_norm": 3.4789888858795166, + "learning_rate": 9.108442211055278e-06, + "loss": 0.0946, + "step": 9375 + }, + { + "epoch": 6.31, + "grad_norm": 3.1585254669189453, + "learning_rate": 9.105929648241206e-06, + "loss": 0.0921, + "step": 9400 + }, + { + "epoch": 6.33, + "grad_norm": 3.3125743865966797, + "learning_rate": 9.103417085427137e-06, + "loss": 0.0912, + "step": 9425 + }, + { + "epoch": 6.35, + "grad_norm": 2.899616241455078, + "learning_rate": 9.100904522613066e-06, + "loss": 0.0935, + "step": 9450 + }, + { + "epoch": 6.36, + "grad_norm": 2.9725539684295654, + "learning_rate": 9.098391959798995e-06, + "loss": 0.0934, + "step": 9475 + }, + { + "epoch": 6.38, + "grad_norm": 3.340712070465088, + "learning_rate": 9.095879396984926e-06, + "loss": 0.0968, + "step": 9500 + }, + { + "epoch": 6.4, + "grad_norm": 3.4166252613067627, + "learning_rate": 9.093366834170854e-06, + "loss": 0.0902, + "step": 9525 + }, + { + "epoch": 6.41, + "grad_norm": 3.42030930519104, + "learning_rate": 9.090854271356785e-06, + "loss": 0.0903, + "step": 9550 + }, + { + "epoch": 6.43, + "grad_norm": 2.913060188293457, + "learning_rate": 9.088341708542714e-06, + "loss": 0.0967, + "step": 9575 + }, + { + "epoch": 6.45, + "grad_norm": 2.9808599948883057, + "learning_rate": 9.085829145728644e-06, + "loss": 0.0898, + "step": 9600 + }, + { + "epoch": 6.46, + "grad_norm": 3.318812847137451, + "learning_rate": 9.083316582914573e-06, + "loss": 0.0964, + "step": 9625 + }, + { + "epoch": 6.48, + "grad_norm": 2.8281571865081787, + "learning_rate": 9.080804020100504e-06, + "loss": 0.0925, + "step": 9650 + }, + { + "epoch": 6.5, + "grad_norm": 3.3148748874664307, + "learning_rate": 9.078291457286433e-06, + "loss": 0.0955, + "step": 9675 + }, + { + "epoch": 6.51, + "grad_norm": 3.047445297241211, + "learning_rate": 9.075778894472363e-06, + "loss": 0.0907, + "step": 9700 + }, + { + "epoch": 6.53, + "grad_norm": 3.201747417449951, + "learning_rate": 9.073266331658292e-06, + "loss": 0.0891, + "step": 9725 + }, + { + "epoch": 6.55, + "grad_norm": 3.5526840686798096, + "learning_rate": 9.070753768844221e-06, + "loss": 0.0937, + "step": 9750 + }, + { + "epoch": 6.56, + "grad_norm": 3.3490021228790283, + "learning_rate": 9.068241206030152e-06, + "loss": 0.0924, + "step": 9775 + }, + { + "epoch": 6.58, + "grad_norm": 3.195934534072876, + "learning_rate": 9.06572864321608e-06, + "loss": 0.0941, + "step": 9800 + }, + { + "epoch": 6.6, + "grad_norm": 3.1133546829223633, + "learning_rate": 9.063216080402011e-06, + "loss": 0.093, + "step": 9825 + }, + { + "epoch": 6.62, + "grad_norm": 3.5979671478271484, + "learning_rate": 9.06070351758794e-06, + "loss": 0.0881, + "step": 9850 + }, + { + "epoch": 6.63, + "grad_norm": 3.7291669845581055, + "learning_rate": 9.05819095477387e-06, + "loss": 0.0955, + "step": 9875 + }, + { + "epoch": 6.65, + "grad_norm": 3.2835400104522705, + "learning_rate": 9.0556783919598e-06, + "loss": 0.0902, + "step": 9900 + }, + { + "epoch": 6.67, + "grad_norm": 3.1277029514312744, + "learning_rate": 9.05316582914573e-06, + "loss": 0.0936, + "step": 9925 + }, + { + "epoch": 6.68, + "grad_norm": 3.2376766204833984, + "learning_rate": 9.05065326633166e-06, + "loss": 0.0896, + "step": 9950 + }, + { + "epoch": 6.7, + "grad_norm": 2.698474168777466, + "learning_rate": 9.048140703517589e-06, + "loss": 0.0915, + "step": 9975 + }, + { + "epoch": 6.72, + "grad_norm": 3.623647451400757, + "learning_rate": 9.045628140703518e-06, + "loss": 0.0924, + "step": 10000 + }, + { + "epoch": 6.72, + "eval_loss": 0.1171552762389183, + "eval_runtime": 533.9147, + "eval_samples_per_second": 2.596, + "eval_steps_per_second": 2.596, + "eval_wer": 26.558361292601095, + "step": 10000 + }, + { + "epoch": 6.73, + "grad_norm": 3.608774423599243, + "learning_rate": 9.043115577889447e-06, + "loss": 0.0927, + "step": 10025 + }, + { + "epoch": 6.75, + "grad_norm": Infinity, + "learning_rate": 9.04070351758794e-06, + "loss": 0.0952, + "step": 10050 + }, + { + "epoch": 6.77, + "grad_norm": 2.832880735397339, + "learning_rate": 9.03819095477387e-06, + "loss": 0.0909, + "step": 10075 + }, + { + "epoch": 6.78, + "grad_norm": 3.0156736373901367, + "learning_rate": 9.0356783919598e-06, + "loss": 0.0944, + "step": 10100 + }, + { + "epoch": 6.8, + "grad_norm": 3.3390650749206543, + "learning_rate": 9.033165829145728e-06, + "loss": 0.0919, + "step": 10125 + }, + { + "epoch": 6.82, + "grad_norm": 3.394937515258789, + "learning_rate": 9.03065326633166e-06, + "loss": 0.0932, + "step": 10150 + }, + { + "epoch": 6.83, + "grad_norm": 3.443366765975952, + "learning_rate": 9.028140703517589e-06, + "loss": 0.0934, + "step": 10175 + }, + { + "epoch": 6.85, + "grad_norm": 3.167790174484253, + "learning_rate": 9.025628140703518e-06, + "loss": 0.0934, + "step": 10200 + }, + { + "epoch": 6.87, + "grad_norm": 3.151536464691162, + "learning_rate": 9.023115577889447e-06, + "loss": 0.0935, + "step": 10225 + }, + { + "epoch": 6.88, + "grad_norm": 3.475541114807129, + "learning_rate": 9.020603015075378e-06, + "loss": 0.0924, + "step": 10250 + }, + { + "epoch": 6.9, + "grad_norm": 3.254150629043579, + "learning_rate": 9.018090452261308e-06, + "loss": 0.0946, + "step": 10275 + }, + { + "epoch": 6.92, + "grad_norm": 3.126755714416504, + "learning_rate": 9.015577889447237e-06, + "loss": 0.0921, + "step": 10300 + }, + { + "epoch": 6.93, + "grad_norm": 3.1626737117767334, + "learning_rate": 9.013065326633166e-06, + "loss": 0.0908, + "step": 10325 + }, + { + "epoch": 6.95, + "grad_norm": 3.488074779510498, + "learning_rate": 9.010552763819096e-06, + "loss": 0.0956, + "step": 10350 + }, + { + "epoch": 6.97, + "grad_norm": 3.0085911750793457, + "learning_rate": 9.008040201005027e-06, + "loss": 0.0915, + "step": 10375 + }, + { + "epoch": 6.98, + "grad_norm": 3.424804925918579, + "learning_rate": 9.005527638190954e-06, + "loss": 0.0968, + "step": 10400 + }, + { + "epoch": 7.0, + "grad_norm": 3.1618521213531494, + "learning_rate": 9.003015075376885e-06, + "loss": 0.0911, + "step": 10425 + }, + { + "epoch": 7.02, + "grad_norm": 3.355823040008545, + "learning_rate": 9.000502512562815e-06, + "loss": 0.0775, + "step": 10450 + }, + { + "epoch": 7.03, + "grad_norm": 2.7716736793518066, + "learning_rate": 8.997989949748744e-06, + "loss": 0.0776, + "step": 10475 + }, + { + "epoch": 7.05, + "grad_norm": 2.89070987701416, + "learning_rate": 8.995477386934675e-06, + "loss": 0.0803, + "step": 10500 + }, + { + "epoch": 7.07, + "grad_norm": 3.0273945331573486, + "learning_rate": 8.992964824120604e-06, + "loss": 0.0731, + "step": 10525 + }, + { + "epoch": 7.09, + "grad_norm": 2.902979612350464, + "learning_rate": 8.990452261306534e-06, + "loss": 0.0805, + "step": 10550 + }, + { + "epoch": 7.1, + "grad_norm": 2.9858810901641846, + "learning_rate": 8.987939698492463e-06, + "loss": 0.0761, + "step": 10575 + }, + { + "epoch": 7.12, + "grad_norm": 2.780200958251953, + "learning_rate": 8.985427135678392e-06, + "loss": 0.0786, + "step": 10600 + }, + { + "epoch": 7.14, + "grad_norm": 3.0452048778533936, + "learning_rate": 8.982914572864322e-06, + "loss": 0.078, + "step": 10625 + }, + { + "epoch": 7.15, + "grad_norm": 3.0429253578186035, + "learning_rate": 8.980402010050253e-06, + "loss": 0.078, + "step": 10650 + }, + { + "epoch": 7.17, + "grad_norm": 2.758443593978882, + "learning_rate": 8.977889447236182e-06, + "loss": 0.0787, + "step": 10675 + }, + { + "epoch": 7.19, + "grad_norm": 3.3259782791137695, + "learning_rate": 8.975376884422111e-06, + "loss": 0.0818, + "step": 10700 + }, + { + "epoch": 7.2, + "grad_norm": 3.1599812507629395, + "learning_rate": 8.97286432160804e-06, + "loss": 0.0788, + "step": 10725 + }, + { + "epoch": 7.22, + "grad_norm": 3.163283348083496, + "learning_rate": 8.97035175879397e-06, + "loss": 0.0801, + "step": 10750 + }, + { + "epoch": 7.24, + "grad_norm": 3.883058547973633, + "learning_rate": 8.967839195979901e-06, + "loss": 0.0818, + "step": 10775 + }, + { + "epoch": 7.25, + "grad_norm": 3.0166139602661133, + "learning_rate": 8.96532663316583e-06, + "loss": 0.079, + "step": 10800 + }, + { + "epoch": 7.27, + "grad_norm": 3.532127857208252, + "learning_rate": 8.96281407035176e-06, + "loss": 0.0764, + "step": 10825 + }, + { + "epoch": 7.29, + "grad_norm": 2.8934993743896484, + "learning_rate": 8.960301507537689e-06, + "loss": 0.0791, + "step": 10850 + }, + { + "epoch": 7.3, + "grad_norm": 3.4274938106536865, + "learning_rate": 8.957788944723618e-06, + "loss": 0.0788, + "step": 10875 + }, + { + "epoch": 7.32, + "grad_norm": 2.964526891708374, + "learning_rate": 8.95527638190955e-06, + "loss": 0.0781, + "step": 10900 + }, + { + "epoch": 7.34, + "grad_norm": 3.1131231784820557, + "learning_rate": 8.952763819095479e-06, + "loss": 0.0776, + "step": 10925 + }, + { + "epoch": 7.35, + "grad_norm": 2.757322072982788, + "learning_rate": 8.950251256281408e-06, + "loss": 0.0793, + "step": 10950 + }, + { + "epoch": 7.37, + "grad_norm": 2.8853962421417236, + "learning_rate": 8.947738693467337e-06, + "loss": 0.08, + "step": 10975 + }, + { + "epoch": 7.39, + "grad_norm": 3.2388052940368652, + "learning_rate": 8.945226130653267e-06, + "loss": 0.0837, + "step": 11000 + }, + { + "epoch": 7.39, + "eval_loss": 0.11983851343393326, + "eval_runtime": 541.1838, + "eval_samples_per_second": 2.561, + "eval_steps_per_second": 2.561, + "eval_wer": 27.052798869856964, + "step": 11000 + }, + { + "epoch": 7.4, + "grad_norm": 3.431065559387207, + "learning_rate": 8.942713567839196e-06, + "loss": 0.0797, + "step": 11025 + }, + { + "epoch": 7.42, + "grad_norm": 3.1514389514923096, + "learning_rate": 8.940201005025127e-06, + "loss": 0.0809, + "step": 11050 + }, + { + "epoch": 7.44, + "grad_norm": 3.1348989009857178, + "learning_rate": 8.937688442211056e-06, + "loss": 0.0796, + "step": 11075 + }, + { + "epoch": 7.45, + "grad_norm": 3.4892783164978027, + "learning_rate": 8.935175879396986e-06, + "loss": 0.0813, + "step": 11100 + }, + { + "epoch": 7.47, + "grad_norm": 3.3423171043395996, + "learning_rate": 8.932663316582915e-06, + "loss": 0.0768, + "step": 11125 + }, + { + "epoch": 7.49, + "grad_norm": 3.119539499282837, + "learning_rate": 8.930150753768844e-06, + "loss": 0.0833, + "step": 11150 + }, + { + "epoch": 7.51, + "grad_norm": 3.181475877761841, + "learning_rate": 8.927638190954775e-06, + "loss": 0.0803, + "step": 11175 + }, + { + "epoch": 7.52, + "grad_norm": 3.3543057441711426, + "learning_rate": 8.925125628140705e-06, + "loss": 0.0806, + "step": 11200 + }, + { + "epoch": 7.54, + "grad_norm": 3.1575417518615723, + "learning_rate": 8.922613065326634e-06, + "loss": 0.0812, + "step": 11225 + }, + { + "epoch": 7.56, + "grad_norm": 3.0198452472686768, + "learning_rate": 8.920100502512563e-06, + "loss": 0.0805, + "step": 11250 + }, + { + "epoch": 7.57, + "grad_norm": 2.9735798835754395, + "learning_rate": 8.917587939698493e-06, + "loss": 0.0791, + "step": 11275 + }, + { + "epoch": 7.59, + "grad_norm": 3.363503932952881, + "learning_rate": 8.915075376884424e-06, + "loss": 0.0817, + "step": 11300 + }, + { + "epoch": 7.61, + "grad_norm": 3.10579514503479, + "learning_rate": 8.912562814070353e-06, + "loss": 0.0833, + "step": 11325 + }, + { + "epoch": 7.62, + "grad_norm": 3.5427165031433105, + "learning_rate": 8.910050251256282e-06, + "loss": 0.0827, + "step": 11350 + }, + { + "epoch": 7.64, + "grad_norm": 2.9739034175872803, + "learning_rate": 8.907537688442212e-06, + "loss": 0.0795, + "step": 11375 + }, + { + "epoch": 7.66, + "grad_norm": 3.0262250900268555, + "learning_rate": 8.905025125628143e-06, + "loss": 0.0777, + "step": 11400 + }, + { + "epoch": 7.67, + "grad_norm": 2.9359376430511475, + "learning_rate": 8.90251256281407e-06, + "loss": 0.0807, + "step": 11425 + }, + { + "epoch": 7.69, + "grad_norm": 3.158572196960449, + "learning_rate": 8.900000000000001e-06, + "loss": 0.0772, + "step": 11450 + }, + { + "epoch": 7.71, + "grad_norm": 3.330089807510376, + "learning_rate": 8.89748743718593e-06, + "loss": 0.0793, + "step": 11475 + }, + { + "epoch": 7.72, + "grad_norm": 3.2174530029296875, + "learning_rate": 8.89497487437186e-06, + "loss": 0.079, + "step": 11500 + }, + { + "epoch": 7.74, + "grad_norm": 3.673243522644043, + "learning_rate": 8.892462311557791e-06, + "loss": 0.0775, + "step": 11525 + }, + { + "epoch": 7.76, + "grad_norm": 3.3094096183776855, + "learning_rate": 8.889949748743718e-06, + "loss": 0.078, + "step": 11550 + }, + { + "epoch": 7.77, + "grad_norm": 3.426079273223877, + "learning_rate": 8.88743718592965e-06, + "loss": 0.0777, + "step": 11575 + }, + { + "epoch": 7.79, + "grad_norm": 3.517086982727051, + "learning_rate": 8.884924623115579e-06, + "loss": 0.0776, + "step": 11600 + }, + { + "epoch": 7.81, + "grad_norm": 2.9824516773223877, + "learning_rate": 8.882412060301508e-06, + "loss": 0.0805, + "step": 11625 + }, + { + "epoch": 7.82, + "grad_norm": 2.965653896331787, + "learning_rate": 8.879899497487437e-06, + "loss": 0.0786, + "step": 11650 + }, + { + "epoch": 7.84, + "grad_norm": 2.9882099628448486, + "learning_rate": 8.877386934673368e-06, + "loss": 0.0822, + "step": 11675 + }, + { + "epoch": 7.86, + "grad_norm": 3.118823289871216, + "learning_rate": 8.874874371859296e-06, + "loss": 0.0773, + "step": 11700 + }, + { + "epoch": 7.87, + "grad_norm": 4.2748188972473145, + "learning_rate": 8.872361809045227e-06, + "loss": 0.0812, + "step": 11725 + }, + { + "epoch": 7.89, + "grad_norm": 3.5226612091064453, + "learning_rate": 8.869849246231156e-06, + "loss": 0.0801, + "step": 11750 + }, + { + "epoch": 7.91, + "grad_norm": 3.2962095737457275, + "learning_rate": 8.867336683417086e-06, + "loss": 0.0779, + "step": 11775 + }, + { + "epoch": 7.92, + "grad_norm": 3.037177801132202, + "learning_rate": 8.864824120603017e-06, + "loss": 0.0811, + "step": 11800 + }, + { + "epoch": 7.94, + "grad_norm": 3.207000255584717, + "learning_rate": 8.862311557788944e-06, + "loss": 0.0813, + "step": 11825 + }, + { + "epoch": 7.96, + "grad_norm": 3.5045995712280273, + "learning_rate": 8.859798994974875e-06, + "loss": 0.0793, + "step": 11850 + }, + { + "epoch": 7.98, + "grad_norm": 2.9062917232513428, + "learning_rate": 8.857286432160805e-06, + "loss": 0.0808, + "step": 11875 + }, + { + "epoch": 7.99, + "grad_norm": 3.086449146270752, + "learning_rate": 8.854773869346734e-06, + "loss": 0.0757, + "step": 11900 + }, + { + "epoch": 8.01, + "grad_norm": 3.4503021240234375, + "learning_rate": 8.852261306532665e-06, + "loss": 0.0698, + "step": 11925 + }, + { + "epoch": 8.03, + "grad_norm": 2.755633592605591, + "learning_rate": 8.849748743718594e-06, + "loss": 0.0619, + "step": 11950 + }, + { + "epoch": 8.04, + "grad_norm": 3.3875789642333984, + "learning_rate": 8.847236180904524e-06, + "loss": 0.0651, + "step": 11975 + }, + { + "epoch": 8.06, + "grad_norm": 2.697042465209961, + "learning_rate": 8.844723618090453e-06, + "loss": 0.0654, + "step": 12000 + }, + { + "epoch": 8.06, + "eval_loss": 0.12158209830522537, + "eval_runtime": 532.8467, + "eval_samples_per_second": 2.601, + "eval_steps_per_second": 2.601, + "eval_wer": 26.328800988875155, + "step": 12000 + }, + { + "epoch": 8.08, + "grad_norm": 2.8202855587005615, + "learning_rate": 8.842211055276382e-06, + "loss": 0.0658, + "step": 12025 + }, + { + "epoch": 8.09, + "grad_norm": 2.7945172786712646, + "learning_rate": 8.839698492462312e-06, + "loss": 0.0627, + "step": 12050 + }, + { + "epoch": 8.11, + "grad_norm": 3.1584692001342773, + "learning_rate": 8.837185929648243e-06, + "loss": 0.0673, + "step": 12075 + }, + { + "epoch": 8.13, + "grad_norm": 3.1642470359802246, + "learning_rate": 8.83467336683417e-06, + "loss": 0.0683, + "step": 12100 + }, + { + "epoch": 8.14, + "grad_norm": 2.9188601970672607, + "learning_rate": 8.832160804020101e-06, + "loss": 0.0682, + "step": 12125 + }, + { + "epoch": 8.16, + "grad_norm": 3.276679039001465, + "learning_rate": 8.829748743718593e-06, + "loss": 0.0656, + "step": 12150 + }, + { + "epoch": 8.18, + "grad_norm": 2.683711051940918, + "learning_rate": 8.827236180904524e-06, + "loss": 0.0625, + "step": 12175 + }, + { + "epoch": 8.19, + "grad_norm": 3.232003688812256, + "learning_rate": 8.824723618090453e-06, + "loss": 0.066, + "step": 12200 + }, + { + "epoch": 8.21, + "grad_norm": 2.7374961376190186, + "learning_rate": 8.822211055276383e-06, + "loss": 0.0647, + "step": 12225 + }, + { + "epoch": 8.23, + "grad_norm": 3.423482656478882, + "learning_rate": 8.819698492462312e-06, + "loss": 0.0673, + "step": 12250 + }, + { + "epoch": 8.24, + "grad_norm": 2.9813687801361084, + "learning_rate": 8.817185929648241e-06, + "loss": 0.0685, + "step": 12275 + }, + { + "epoch": 8.26, + "grad_norm": 3.047753095626831, + "learning_rate": 8.81467336683417e-06, + "loss": 0.0658, + "step": 12300 + }, + { + "epoch": 8.28, + "grad_norm": 3.4329652786254883, + "learning_rate": 8.812160804020102e-06, + "loss": 0.0662, + "step": 12325 + }, + { + "epoch": 8.29, + "grad_norm": 3.080573081970215, + "learning_rate": 8.809648241206031e-06, + "loss": 0.0674, + "step": 12350 + }, + { + "epoch": 8.31, + "grad_norm": 2.828704833984375, + "learning_rate": 8.80713567839196e-06, + "loss": 0.0694, + "step": 12375 + }, + { + "epoch": 8.33, + "grad_norm": 3.132976531982422, + "learning_rate": 8.804623115577891e-06, + "loss": 0.0685, + "step": 12400 + }, + { + "epoch": 8.34, + "grad_norm": 3.154456615447998, + "learning_rate": 8.802110552763819e-06, + "loss": 0.0679, + "step": 12425 + }, + { + "epoch": 8.36, + "grad_norm": 3.4193313121795654, + "learning_rate": 8.79959798994975e-06, + "loss": 0.0674, + "step": 12450 + }, + { + "epoch": 8.38, + "grad_norm": 3.2318356037139893, + "learning_rate": 8.79708542713568e-06, + "loss": 0.0658, + "step": 12475 + }, + { + "epoch": 8.39, + "grad_norm": 2.9559836387634277, + "learning_rate": 8.794572864321609e-06, + "loss": 0.0647, + "step": 12500 + }, + { + "epoch": 8.41, + "grad_norm": 3.459628105163574, + "learning_rate": 8.792060301507538e-06, + "loss": 0.0693, + "step": 12525 + }, + { + "epoch": 8.43, + "grad_norm": 3.2934398651123047, + "learning_rate": 8.789547738693467e-06, + "loss": 0.0696, + "step": 12550 + }, + { + "epoch": 8.45, + "grad_norm": 3.2100000381469727, + "learning_rate": 8.787035175879398e-06, + "loss": 0.0703, + "step": 12575 + }, + { + "epoch": 8.46, + "grad_norm": 3.280884265899658, + "learning_rate": 8.784522613065328e-06, + "loss": 0.0661, + "step": 12600 + }, + { + "epoch": 8.48, + "grad_norm": 3.1474897861480713, + "learning_rate": 8.782010050251257e-06, + "loss": 0.0663, + "step": 12625 + }, + { + "epoch": 8.5, + "grad_norm": 2.9876487255096436, + "learning_rate": 8.779497487437186e-06, + "loss": 0.0693, + "step": 12650 + }, + { + "epoch": 8.51, + "grad_norm": 3.278313159942627, + "learning_rate": 8.776984924623117e-06, + "loss": 0.0699, + "step": 12675 + }, + { + "epoch": 8.53, + "grad_norm": 3.023169755935669, + "learning_rate": 8.774472361809045e-06, + "loss": 0.0712, + "step": 12700 + }, + { + "epoch": 8.55, + "grad_norm": 3.168148994445801, + "learning_rate": 8.771959798994976e-06, + "loss": 0.0698, + "step": 12725 + }, + { + "epoch": 8.56, + "grad_norm": 3.177262544631958, + "learning_rate": 8.769447236180905e-06, + "loss": 0.0686, + "step": 12750 + }, + { + "epoch": 8.58, + "grad_norm": 3.1487865447998047, + "learning_rate": 8.766934673366834e-06, + "loss": 0.0684, + "step": 12775 + }, + { + "epoch": 8.6, + "grad_norm": 2.9590165615081787, + "learning_rate": 8.764422110552765e-06, + "loss": 0.0691, + "step": 12800 + }, + { + "epoch": 8.61, + "grad_norm": 3.0423812866210938, + "learning_rate": 8.761909547738693e-06, + "loss": 0.0682, + "step": 12825 + }, + { + "epoch": 8.63, + "grad_norm": 3.3768019676208496, + "learning_rate": 8.759396984924624e-06, + "loss": 0.0709, + "step": 12850 + }, + { + "epoch": 8.65, + "grad_norm": 3.7296512126922607, + "learning_rate": 8.756884422110553e-06, + "loss": 0.0701, + "step": 12875 + }, + { + "epoch": 8.66, + "grad_norm": 3.148634433746338, + "learning_rate": 8.754371859296483e-06, + "loss": 0.0634, + "step": 12900 + }, + { + "epoch": 8.68, + "grad_norm": 2.908444881439209, + "learning_rate": 8.751859296482412e-06, + "loss": 0.0659, + "step": 12925 + }, + { + "epoch": 8.7, + "grad_norm": 3.3164865970611572, + "learning_rate": 8.749346733668343e-06, + "loss": 0.0655, + "step": 12950 + }, + { + "epoch": 8.71, + "grad_norm": 2.9725685119628906, + "learning_rate": 8.746834170854272e-06, + "loss": 0.0659, + "step": 12975 + }, + { + "epoch": 8.73, + "grad_norm": 3.171374797821045, + "learning_rate": 8.744321608040202e-06, + "loss": 0.068, + "step": 13000 + }, + { + "epoch": 8.73, + "eval_loss": 0.12423743307590485, + "eval_runtime": 533.8353, + "eval_samples_per_second": 2.596, + "eval_steps_per_second": 2.596, + "eval_wer": 26.86738477838602, + "step": 13000 + }, + { + "epoch": 8.75, + "grad_norm": 3.3160324096679688, + "learning_rate": 8.741809045226131e-06, + "loss": 0.0692, + "step": 13025 + }, + { + "epoch": 8.76, + "grad_norm": 3.2802672386169434, + "learning_rate": 8.73929648241206e-06, + "loss": 0.067, + "step": 13050 + }, + { + "epoch": 8.78, + "grad_norm": 3.2849535942077637, + "learning_rate": 8.736783919597991e-06, + "loss": 0.0654, + "step": 13075 + }, + { + "epoch": 8.8, + "grad_norm": 3.685974359512329, + "learning_rate": 8.734271356783919e-06, + "loss": 0.0687, + "step": 13100 + }, + { + "epoch": 8.81, + "grad_norm": 2.9581081867218018, + "learning_rate": 8.73175879396985e-06, + "loss": 0.0658, + "step": 13125 + }, + { + "epoch": 8.83, + "grad_norm": 3.3408470153808594, + "learning_rate": 8.72924623115578e-06, + "loss": 0.0726, + "step": 13150 + }, + { + "epoch": 8.85, + "grad_norm": 3.5375308990478516, + "learning_rate": 8.726733668341709e-06, + "loss": 0.0688, + "step": 13175 + }, + { + "epoch": 8.87, + "grad_norm": 2.7572827339172363, + "learning_rate": 8.72422110552764e-06, + "loss": 0.0688, + "step": 13200 + }, + { + "epoch": 8.88, + "grad_norm": 3.0948410034179688, + "learning_rate": 8.721708542713569e-06, + "loss": 0.0686, + "step": 13225 + }, + { + "epoch": 8.9, + "grad_norm": 3.076904773712158, + "learning_rate": 8.719195979899498e-06, + "loss": 0.0683, + "step": 13250 + }, + { + "epoch": 8.92, + "grad_norm": 3.060412645339966, + "learning_rate": 8.716683417085428e-06, + "loss": 0.0692, + "step": 13275 + }, + { + "epoch": 8.93, + "grad_norm": 3.1852357387542725, + "learning_rate": 8.714170854271357e-06, + "loss": 0.0647, + "step": 13300 + }, + { + "epoch": 8.95, + "grad_norm": 3.427971601486206, + "learning_rate": 8.711658291457286e-06, + "loss": 0.0675, + "step": 13325 + }, + { + "epoch": 8.97, + "grad_norm": 3.221360683441162, + "learning_rate": 8.709145728643217e-06, + "loss": 0.0702, + "step": 13350 + }, + { + "epoch": 8.98, + "grad_norm": 3.490898847579956, + "learning_rate": 8.706633165829147e-06, + "loss": 0.0693, + "step": 13375 + }, + { + "epoch": 9.0, + "grad_norm": 3.1776282787323, + "learning_rate": 8.704120603015076e-06, + "loss": 0.0725, + "step": 13400 + }, + { + "epoch": 9.02, + "grad_norm": 2.52174973487854, + "learning_rate": 8.701608040201005e-06, + "loss": 0.0542, + "step": 13425 + }, + { + "epoch": 9.03, + "grad_norm": 2.8436169624328613, + "learning_rate": 8.699095477386935e-06, + "loss": 0.0543, + "step": 13450 + }, + { + "epoch": 9.05, + "grad_norm": 3.0883164405822754, + "learning_rate": 8.696582914572866e-06, + "loss": 0.0565, + "step": 13475 + }, + { + "epoch": 9.07, + "grad_norm": 3.2945592403411865, + "learning_rate": 8.694070351758795e-06, + "loss": 0.0554, + "step": 13500 + }, + { + "epoch": 9.08, + "grad_norm": 3.1277835369110107, + "learning_rate": 8.691557788944724e-06, + "loss": 0.0575, + "step": 13525 + }, + { + "epoch": 9.1, + "grad_norm": 2.555258274078369, + "learning_rate": 8.689045226130654e-06, + "loss": 0.0557, + "step": 13550 + }, + { + "epoch": 9.12, + "grad_norm": 2.6981780529022217, + "learning_rate": 8.686532663316583e-06, + "loss": 0.056, + "step": 13575 + }, + { + "epoch": 9.13, + "grad_norm": 2.9988884925842285, + "learning_rate": 8.684020100502514e-06, + "loss": 0.0575, + "step": 13600 + }, + { + "epoch": 9.15, + "grad_norm": 2.7814390659332275, + "learning_rate": 8.681507537688443e-06, + "loss": 0.0543, + "step": 13625 + }, + { + "epoch": 9.17, + "grad_norm": 2.8165695667266846, + "learning_rate": 8.678994974874373e-06, + "loss": 0.0542, + "step": 13650 + }, + { + "epoch": 9.18, + "grad_norm": 2.8924388885498047, + "learning_rate": 8.676482412060302e-06, + "loss": 0.0584, + "step": 13675 + }, + { + "epoch": 9.2, + "grad_norm": 2.8846709728240967, + "learning_rate": 8.673969849246231e-06, + "loss": 0.0546, + "step": 13700 + }, + { + "epoch": 9.22, + "grad_norm": 3.0931618213653564, + "learning_rate": 8.67145728643216e-06, + "loss": 0.0541, + "step": 13725 + }, + { + "epoch": 9.23, + "grad_norm": 3.0044896602630615, + "learning_rate": 8.668944723618092e-06, + "loss": 0.0566, + "step": 13750 + }, + { + "epoch": 9.25, + "grad_norm": 2.992866039276123, + "learning_rate": 8.666432160804021e-06, + "loss": 0.0568, + "step": 13775 + }, + { + "epoch": 9.27, + "grad_norm": 3.3243565559387207, + "learning_rate": 8.66391959798995e-06, + "loss": 0.0575, + "step": 13800 + }, + { + "epoch": 9.28, + "grad_norm": 3.164736747741699, + "learning_rate": 8.661407035175881e-06, + "loss": 0.0565, + "step": 13825 + }, + { + "epoch": 9.3, + "grad_norm": 2.89432430267334, + "learning_rate": 8.658894472361809e-06, + "loss": 0.0571, + "step": 13850 + }, + { + "epoch": 9.32, + "grad_norm": 3.053514242172241, + "learning_rate": 8.65638190954774e-06, + "loss": 0.0582, + "step": 13875 + }, + { + "epoch": 9.34, + "grad_norm": 2.7615840435028076, + "learning_rate": 8.65386934673367e-06, + "loss": 0.0566, + "step": 13900 + }, + { + "epoch": 9.35, + "grad_norm": 3.1976537704467773, + "learning_rate": 8.651356783919599e-06, + "loss": 0.0578, + "step": 13925 + }, + { + "epoch": 9.37, + "grad_norm": 3.1072587966918945, + "learning_rate": 8.648844221105528e-06, + "loss": 0.0577, + "step": 13950 + }, + { + "epoch": 9.39, + "grad_norm": 3.4911906719207764, + "learning_rate": 8.646331658291457e-06, + "loss": 0.0548, + "step": 13975 + }, + { + "epoch": 9.4, + "grad_norm": 2.923501968383789, + "learning_rate": 8.643819095477388e-06, + "loss": 0.0586, + "step": 14000 + }, + { + "epoch": 9.4, + "eval_loss": 0.1282009482383728, + "eval_runtime": 533.8178, + "eval_samples_per_second": 2.596, + "eval_steps_per_second": 2.596, + "eval_wer": 26.982164930248985, + "step": 14000 + }, + { + "epoch": 9.42, + "grad_norm": 3.0205700397491455, + "learning_rate": 8.641306532663318e-06, + "loss": 0.0552, + "step": 14025 + }, + { + "epoch": 9.44, + "grad_norm": 3.022747278213501, + "learning_rate": 8.638793969849247e-06, + "loss": 0.0574, + "step": 14050 + }, + { + "epoch": 9.45, + "grad_norm": 3.2978105545043945, + "learning_rate": 8.636281407035176e-06, + "loss": 0.0571, + "step": 14075 + }, + { + "epoch": 9.47, + "grad_norm": 3.0741355419158936, + "learning_rate": 8.633768844221107e-06, + "loss": 0.0556, + "step": 14100 + }, + { + "epoch": 9.49, + "grad_norm": 2.8877174854278564, + "learning_rate": 8.631256281407035e-06, + "loss": 0.0538, + "step": 14125 + }, + { + "epoch": 9.5, + "grad_norm": 3.618729591369629, + "learning_rate": 8.628743718592966e-06, + "loss": 0.0592, + "step": 14150 + }, + { + "epoch": 9.52, + "grad_norm": 3.005646228790283, + "learning_rate": 8.626231155778895e-06, + "loss": 0.057, + "step": 14175 + }, + { + "epoch": 9.54, + "grad_norm": 3.3048083782196045, + "learning_rate": 8.623718592964825e-06, + "loss": 0.0564, + "step": 14200 + }, + { + "epoch": 9.55, + "grad_norm": 3.2562224864959717, + "learning_rate": 8.621206030150756e-06, + "loss": 0.0571, + "step": 14225 + }, + { + "epoch": 9.57, + "grad_norm": 2.980013608932495, + "learning_rate": 8.618693467336683e-06, + "loss": 0.0564, + "step": 14250 + }, + { + "epoch": 9.59, + "grad_norm": 3.220036745071411, + "learning_rate": 8.616180904522614e-06, + "loss": 0.0588, + "step": 14275 + }, + { + "epoch": 9.6, + "grad_norm": 3.4643850326538086, + "learning_rate": 8.613668341708544e-06, + "loss": 0.0565, + "step": 14300 + }, + { + "epoch": 9.62, + "grad_norm": 3.2021632194519043, + "learning_rate": 8.611155778894473e-06, + "loss": 0.0586, + "step": 14325 + }, + { + "epoch": 9.64, + "grad_norm": 3.2279539108276367, + "learning_rate": 8.608643216080402e-06, + "loss": 0.0562, + "step": 14350 + }, + { + "epoch": 9.65, + "grad_norm": 3.429431438446045, + "learning_rate": 8.606130653266333e-06, + "loss": 0.0585, + "step": 14375 + }, + { + "epoch": 9.67, + "grad_norm": 3.278526544570923, + "learning_rate": 8.60361809045226e-06, + "loss": 0.0584, + "step": 14400 + }, + { + "epoch": 9.69, + "grad_norm": 3.5569005012512207, + "learning_rate": 8.601105527638192e-06, + "loss": 0.0587, + "step": 14425 + }, + { + "epoch": 9.7, + "grad_norm": 3.0540413856506348, + "learning_rate": 8.598592964824121e-06, + "loss": 0.0582, + "step": 14450 + }, + { + "epoch": 9.72, + "grad_norm": 2.9771244525909424, + "learning_rate": 8.59608040201005e-06, + "loss": 0.0544, + "step": 14475 + }, + { + "epoch": 9.74, + "grad_norm": 3.271925926208496, + "learning_rate": 8.593567839195981e-06, + "loss": 0.0556, + "step": 14500 + }, + { + "epoch": 9.75, + "grad_norm": 3.2107813358306885, + "learning_rate": 8.591055276381909e-06, + "loss": 0.0556, + "step": 14525 + }, + { + "epoch": 9.77, + "grad_norm": 2.9411368370056152, + "learning_rate": 8.58854271356784e-06, + "loss": 0.06, + "step": 14550 + }, + { + "epoch": 9.79, + "grad_norm": 2.9419991970062256, + "learning_rate": 8.58603015075377e-06, + "loss": 0.055, + "step": 14575 + }, + { + "epoch": 9.81, + "grad_norm": 3.3104031085968018, + "learning_rate": 8.583517587939699e-06, + "loss": 0.0586, + "step": 14600 + }, + { + "epoch": 9.82, + "grad_norm": 3.488868236541748, + "learning_rate": 8.58100502512563e-06, + "loss": 0.0608, + "step": 14625 + }, + { + "epoch": 9.84, + "grad_norm": 2.7537827491760254, + "learning_rate": 8.578492462311559e-06, + "loss": 0.061, + "step": 14650 + }, + { + "epoch": 9.86, + "grad_norm": 2.967761278152466, + "learning_rate": 8.575979899497488e-06, + "loss": 0.0616, + "step": 14675 + }, + { + "epoch": 9.87, + "grad_norm": 2.6756021976470947, + "learning_rate": 8.573467336683418e-06, + "loss": 0.0572, + "step": 14700 + }, + { + "epoch": 9.89, + "grad_norm": 3.6669530868530273, + "learning_rate": 8.570954773869347e-06, + "loss": 0.0545, + "step": 14725 + }, + { + "epoch": 9.91, + "grad_norm": 3.402998208999634, + "learning_rate": 8.568442211055276e-06, + "loss": 0.0595, + "step": 14750 + }, + { + "epoch": 9.92, + "grad_norm": 3.397134304046631, + "learning_rate": 8.565929648241207e-06, + "loss": 0.0582, + "step": 14775 + }, + { + "epoch": 9.94, + "grad_norm": 3.193824291229248, + "learning_rate": 8.563417085427135e-06, + "loss": 0.0558, + "step": 14800 + }, + { + "epoch": 9.96, + "grad_norm": 3.0948803424835205, + "learning_rate": 8.560904522613066e-06, + "loss": 0.0572, + "step": 14825 + }, + { + "epoch": 9.97, + "grad_norm": 3.6509146690368652, + "learning_rate": 8.558391959798995e-06, + "loss": 0.0595, + "step": 14850 + }, + { + "epoch": 9.99, + "grad_norm": 3.0662288665771484, + "learning_rate": 8.555879396984925e-06, + "loss": 0.057, + "step": 14875 + }, + { + "epoch": 10.01, + "grad_norm": 2.2760088443756104, + "learning_rate": 8.553366834170856e-06, + "loss": 0.0524, + "step": 14900 + }, + { + "epoch": 10.02, + "grad_norm": 2.8303427696228027, + "learning_rate": 8.550854271356785e-06, + "loss": 0.0494, + "step": 14925 + }, + { + "epoch": 10.04, + "grad_norm": 3.1542868614196777, + "learning_rate": 8.548341708542714e-06, + "loss": 0.0445, + "step": 14950 + }, + { + "epoch": 10.06, + "grad_norm": 2.8265697956085205, + "learning_rate": 8.545829145728644e-06, + "loss": 0.0464, + "step": 14975 + }, + { + "epoch": 10.07, + "grad_norm": 3.163896322250366, + "learning_rate": 8.543316582914573e-06, + "loss": 0.047, + "step": 15000 + }, + { + "epoch": 10.07, + "eval_loss": 0.13359740376472473, + "eval_runtime": 533.7428, + "eval_samples_per_second": 2.597, + "eval_steps_per_second": 2.597, + "eval_wer": 27.405968567896878, + "step": 15000 + }, + { + "epoch": 10.09, + "grad_norm": 2.813354253768921, + "learning_rate": 8.540804020100502e-06, + "loss": 0.0476, + "step": 15025 + }, + { + "epoch": 10.11, + "grad_norm": 2.448727607727051, + "learning_rate": 8.538291457286433e-06, + "loss": 0.0448, + "step": 15050 + }, + { + "epoch": 10.12, + "grad_norm": 2.798645257949829, + "learning_rate": 8.535778894472363e-06, + "loss": 0.0458, + "step": 15075 + }, + { + "epoch": 10.14, + "grad_norm": 2.969273090362549, + "learning_rate": 8.533266331658292e-06, + "loss": 0.0442, + "step": 15100 + }, + { + "epoch": 10.16, + "grad_norm": 2.901127576828003, + "learning_rate": 8.530753768844221e-06, + "loss": 0.0431, + "step": 15125 + }, + { + "epoch": 10.17, + "grad_norm": 3.0042836666107178, + "learning_rate": 8.52824120603015e-06, + "loss": 0.049, + "step": 15150 + }, + { + "epoch": 10.19, + "grad_norm": 2.694744825363159, + "learning_rate": 8.525728643216082e-06, + "loss": 0.0474, + "step": 15175 + }, + { + "epoch": 10.21, + "grad_norm": 2.79301118850708, + "learning_rate": 8.523216080402011e-06, + "loss": 0.0459, + "step": 15200 + }, + { + "epoch": 10.22, + "grad_norm": 3.328848123550415, + "learning_rate": 8.52070351758794e-06, + "loss": 0.0481, + "step": 15225 + }, + { + "epoch": 10.24, + "grad_norm": 3.0490903854370117, + "learning_rate": 8.518190954773871e-06, + "loss": 0.0467, + "step": 15250 + }, + { + "epoch": 10.26, + "grad_norm": 2.891860246658325, + "learning_rate": 8.515678391959799e-06, + "loss": 0.0482, + "step": 15275 + }, + { + "epoch": 10.28, + "grad_norm": 3.29339599609375, + "learning_rate": 8.51316582914573e-06, + "loss": 0.0468, + "step": 15300 + }, + { + "epoch": 10.29, + "grad_norm": 2.871262550354004, + "learning_rate": 8.51065326633166e-06, + "loss": 0.0465, + "step": 15325 + }, + { + "epoch": 10.31, + "grad_norm": 2.673008680343628, + "learning_rate": 8.508140703517589e-06, + "loss": 0.0457, + "step": 15350 + }, + { + "epoch": 10.33, + "grad_norm": 2.5940115451812744, + "learning_rate": 8.505628140703518e-06, + "loss": 0.049, + "step": 15375 + }, + { + "epoch": 10.34, + "grad_norm": 2.8226072788238525, + "learning_rate": 8.503115577889447e-06, + "loss": 0.0472, + "step": 15400 + }, + { + "epoch": 10.36, + "grad_norm": 2.800179958343506, + "learning_rate": 8.500603015075377e-06, + "loss": 0.0477, + "step": 15425 + }, + { + "epoch": 10.38, + "grad_norm": 3.0697898864746094, + "learning_rate": 8.498090452261308e-06, + "loss": 0.0448, + "step": 15450 + }, + { + "epoch": 10.39, + "grad_norm": 2.9394161701202393, + "learning_rate": 8.495577889447237e-06, + "loss": 0.0464, + "step": 15475 + }, + { + "epoch": 10.41, + "grad_norm": 3.055058479309082, + "learning_rate": 8.493065326633166e-06, + "loss": 0.0486, + "step": 15500 + }, + { + "epoch": 10.43, + "grad_norm": 3.4436676502227783, + "learning_rate": 8.490552763819097e-06, + "loss": 0.0479, + "step": 15525 + }, + { + "epoch": 10.44, + "grad_norm": 3.167590379714966, + "learning_rate": 8.488040201005025e-06, + "loss": 0.049, + "step": 15550 + }, + { + "epoch": 10.46, + "grad_norm": 2.786879539489746, + "learning_rate": 8.485527638190956e-06, + "loss": 0.0476, + "step": 15575 + }, + { + "epoch": 10.48, + "grad_norm": 3.0949158668518066, + "learning_rate": 8.483015075376885e-06, + "loss": 0.0463, + "step": 15600 + }, + { + "epoch": 10.49, + "grad_norm": 3.426304340362549, + "learning_rate": 8.480502512562815e-06, + "loss": 0.0475, + "step": 15625 + }, + { + "epoch": 10.51, + "grad_norm": 3.1173408031463623, + "learning_rate": 8.477989949748744e-06, + "loss": 0.0476, + "step": 15650 + }, + { + "epoch": 10.53, + "grad_norm": 2.856600046157837, + "learning_rate": 8.475477386934673e-06, + "loss": 0.0471, + "step": 15675 + }, + { + "epoch": 10.54, + "grad_norm": 3.2512564659118652, + "learning_rate": 8.472964824120604e-06, + "loss": 0.0483, + "step": 15700 + }, + { + "epoch": 10.56, + "grad_norm": 3.3549506664276123, + "learning_rate": 8.470452261306534e-06, + "loss": 0.0462, + "step": 15725 + }, + { + "epoch": 10.58, + "grad_norm": 2.7729334831237793, + "learning_rate": 8.467939698492463e-06, + "loss": 0.0472, + "step": 15750 + }, + { + "epoch": 10.59, + "grad_norm": 2.711257219314575, + "learning_rate": 8.465427135678392e-06, + "loss": 0.0472, + "step": 15775 + }, + { + "epoch": 10.61, + "grad_norm": 3.229771375656128, + "learning_rate": 8.462914572864323e-06, + "loss": 0.0479, + "step": 15800 + }, + { + "epoch": 10.63, + "grad_norm": 3.0402400493621826, + "learning_rate": 8.460402010050251e-06, + "loss": 0.0503, + "step": 15825 + }, + { + "epoch": 10.64, + "grad_norm": 2.9210867881774902, + "learning_rate": 8.457989949748744e-06, + "loss": 0.0497, + "step": 15850 + }, + { + "epoch": 10.66, + "grad_norm": 3.3483831882476807, + "learning_rate": 8.455577889447237e-06, + "loss": 0.0475, + "step": 15875 + }, + { + "epoch": 10.68, + "grad_norm": 3.053593873977661, + "learning_rate": 8.453065326633167e-06, + "loss": 0.046, + "step": 15900 + }, + { + "epoch": 10.7, + "grad_norm": 3.136958599090576, + "learning_rate": 8.450552763819096e-06, + "loss": 0.0509, + "step": 15925 + }, + { + "epoch": 10.71, + "grad_norm": 3.1040425300598145, + "learning_rate": 8.448040201005025e-06, + "loss": 0.0504, + "step": 15950 + }, + { + "epoch": 10.73, + "grad_norm": 2.8489692211151123, + "learning_rate": 8.445527638190956e-06, + "loss": 0.0484, + "step": 15975 + }, + { + "epoch": 10.75, + "grad_norm": 2.8868560791015625, + "learning_rate": 8.443015075376884e-06, + "loss": 0.0475, + "step": 16000 + }, + { + "epoch": 10.75, + "eval_loss": 0.1362370103597641, + "eval_runtime": 536.1147, + "eval_samples_per_second": 2.585, + "eval_steps_per_second": 2.585, + "eval_wer": 27.441285537700864, + "step": 16000 + }, + { + "epoch": 10.76, + "grad_norm": 3.188688039779663, + "learning_rate": 8.440502512562815e-06, + "loss": 0.0502, + "step": 16025 + }, + { + "epoch": 10.78, + "grad_norm": 2.4469282627105713, + "learning_rate": 8.437989949748744e-06, + "loss": 0.0459, + "step": 16050 + }, + { + "epoch": 10.8, + "grad_norm": 2.948697328567505, + "learning_rate": 8.435477386934674e-06, + "loss": 0.0472, + "step": 16075 + }, + { + "epoch": 10.81, + "grad_norm": 3.236891508102417, + "learning_rate": 8.432964824120605e-06, + "loss": 0.0494, + "step": 16100 + }, + { + "epoch": 10.83, + "grad_norm": 3.0507919788360596, + "learning_rate": 8.430452261306534e-06, + "loss": 0.0494, + "step": 16125 + }, + { + "epoch": 10.85, + "grad_norm": 2.8577802181243896, + "learning_rate": 8.427939698492463e-06, + "loss": 0.0487, + "step": 16150 + }, + { + "epoch": 10.86, + "grad_norm": 3.035109758377075, + "learning_rate": 8.425427135678393e-06, + "loss": 0.0486, + "step": 16175 + }, + { + "epoch": 10.88, + "grad_norm": 3.5497820377349854, + "learning_rate": 8.422914572864322e-06, + "loss": 0.0497, + "step": 16200 + }, + { + "epoch": 10.9, + "grad_norm": 2.838867664337158, + "learning_rate": 8.420402010050251e-06, + "loss": 0.0451, + "step": 16225 + }, + { + "epoch": 10.91, + "grad_norm": 3.316819190979004, + "learning_rate": 8.417889447236182e-06, + "loss": 0.0489, + "step": 16250 + }, + { + "epoch": 10.93, + "grad_norm": 3.3198862075805664, + "learning_rate": 8.415376884422112e-06, + "loss": 0.0528, + "step": 16275 + }, + { + "epoch": 10.95, + "grad_norm": 3.4924492835998535, + "learning_rate": 8.412864321608041e-06, + "loss": 0.0492, + "step": 16300 + }, + { + "epoch": 10.96, + "grad_norm": 3.0983831882476807, + "learning_rate": 8.41035175879397e-06, + "loss": 0.0498, + "step": 16325 + }, + { + "epoch": 10.98, + "grad_norm": 3.4345991611480713, + "learning_rate": 8.4078391959799e-06, + "loss": 0.0483, + "step": 16350 + }, + { + "epoch": 11.0, + "grad_norm": 3.294377326965332, + "learning_rate": 8.40532663316583e-06, + "loss": 0.0485, + "step": 16375 + }, + { + "epoch": 11.01, + "grad_norm": 2.1766245365142822, + "learning_rate": 8.40281407035176e-06, + "loss": 0.0371, + "step": 16400 + }, + { + "epoch": 11.03, + "grad_norm": 2.683638334274292, + "learning_rate": 8.40030150753769e-06, + "loss": 0.0355, + "step": 16425 + }, + { + "epoch": 11.05, + "grad_norm": 2.8458847999572754, + "learning_rate": 8.397788944723619e-06, + "loss": 0.038, + "step": 16450 + }, + { + "epoch": 11.06, + "grad_norm": 2.7042036056518555, + "learning_rate": 8.395276381909548e-06, + "loss": 0.0375, + "step": 16475 + }, + { + "epoch": 11.08, + "grad_norm": 2.0865659713745117, + "learning_rate": 8.392763819095479e-06, + "loss": 0.039, + "step": 16500 + }, + { + "epoch": 11.1, + "grad_norm": 2.3241260051727295, + "learning_rate": 8.390251256281408e-06, + "loss": 0.0365, + "step": 16525 + }, + { + "epoch": 11.11, + "grad_norm": 2.7509355545043945, + "learning_rate": 8.387738693467338e-06, + "loss": 0.0392, + "step": 16550 + }, + { + "epoch": 11.13, + "grad_norm": 2.3158955574035645, + "learning_rate": 8.385226130653267e-06, + "loss": 0.0399, + "step": 16575 + }, + { + "epoch": 11.15, + "grad_norm": 2.368791103363037, + "learning_rate": 8.382713567839196e-06, + "loss": 0.0366, + "step": 16600 + }, + { + "epoch": 11.17, + "grad_norm": 3.157816171646118, + "learning_rate": 8.380201005025126e-06, + "loss": 0.0386, + "step": 16625 + }, + { + "epoch": 11.18, + "grad_norm": 2.391731023788452, + "learning_rate": 8.377688442211057e-06, + "loss": 0.0409, + "step": 16650 + }, + { + "epoch": 11.2, + "grad_norm": 2.881032943725586, + "learning_rate": 8.375175879396986e-06, + "loss": 0.0399, + "step": 16675 + }, + { + "epoch": 11.22, + "grad_norm": 2.8162527084350586, + "learning_rate": 8.372663316582915e-06, + "loss": 0.0386, + "step": 16700 + }, + { + "epoch": 11.23, + "grad_norm": 2.798832654953003, + "learning_rate": 8.370150753768845e-06, + "loss": 0.0389, + "step": 16725 + }, + { + "epoch": 11.25, + "grad_norm": 2.4073362350463867, + "learning_rate": 8.367638190954774e-06, + "loss": 0.038, + "step": 16750 + }, + { + "epoch": 11.27, + "grad_norm": 3.539222002029419, + "learning_rate": 8.365125628140705e-06, + "loss": 0.0385, + "step": 16775 + }, + { + "epoch": 11.28, + "grad_norm": 3.047471761703491, + "learning_rate": 8.362613065326634e-06, + "loss": 0.0386, + "step": 16800 + }, + { + "epoch": 11.3, + "grad_norm": 2.62675142288208, + "learning_rate": 8.360100502512563e-06, + "loss": 0.0388, + "step": 16825 + }, + { + "epoch": 11.32, + "grad_norm": 2.6403391361236572, + "learning_rate": 8.357587939698493e-06, + "loss": 0.041, + "step": 16850 + }, + { + "epoch": 11.33, + "grad_norm": 2.7048850059509277, + "learning_rate": 8.355075376884422e-06, + "loss": 0.0405, + "step": 16875 + }, + { + "epoch": 11.35, + "grad_norm": 2.8291220664978027, + "learning_rate": 8.352562814070353e-06, + "loss": 0.0375, + "step": 16900 + }, + { + "epoch": 11.37, + "grad_norm": 2.9671170711517334, + "learning_rate": 8.350050251256282e-06, + "loss": 0.0377, + "step": 16925 + }, + { + "epoch": 11.38, + "grad_norm": 3.0989413261413574, + "learning_rate": 8.347537688442212e-06, + "loss": 0.039, + "step": 16950 + }, + { + "epoch": 11.4, + "grad_norm": 2.738807201385498, + "learning_rate": 8.345025125628141e-06, + "loss": 0.0399, + "step": 16975 + }, + { + "epoch": 11.42, + "grad_norm": 2.9761691093444824, + "learning_rate": 8.34251256281407e-06, + "loss": 0.0402, + "step": 17000 + }, + { + "epoch": 11.42, + "eval_loss": 0.13800786435604095, + "eval_runtime": 531.8418, + "eval_samples_per_second": 2.606, + "eval_steps_per_second": 2.606, + "eval_wer": 27.76796750838778, + "step": 17000 + }, + { + "epoch": 11.43, + "grad_norm": 3.1192235946655273, + "learning_rate": 8.34e-06, + "loss": 0.0389, + "step": 17025 + }, + { + "epoch": 11.45, + "grad_norm": 3.019216299057007, + "learning_rate": 8.33748743718593e-06, + "loss": 0.0413, + "step": 17050 + }, + { + "epoch": 11.47, + "grad_norm": 2.6235885620117188, + "learning_rate": 8.33497487437186e-06, + "loss": 0.043, + "step": 17075 + }, + { + "epoch": 11.48, + "grad_norm": 3.3072292804718018, + "learning_rate": 8.33246231155779e-06, + "loss": 0.0384, + "step": 17100 + }, + { + "epoch": 11.5, + "grad_norm": 3.032578706741333, + "learning_rate": 8.32994974874372e-06, + "loss": 0.0394, + "step": 17125 + }, + { + "epoch": 11.52, + "grad_norm": 3.0692577362060547, + "learning_rate": 8.327437185929648e-06, + "loss": 0.0402, + "step": 17150 + }, + { + "epoch": 11.53, + "grad_norm": 3.113739252090454, + "learning_rate": 8.324924623115579e-06, + "loss": 0.038, + "step": 17175 + }, + { + "epoch": 11.55, + "grad_norm": 3.1510965824127197, + "learning_rate": 8.322412060301508e-06, + "loss": 0.0423, + "step": 17200 + }, + { + "epoch": 11.57, + "grad_norm": 3.110407590866089, + "learning_rate": 8.319899497487438e-06, + "loss": 0.0381, + "step": 17225 + }, + { + "epoch": 11.58, + "grad_norm": 2.9603676795959473, + "learning_rate": 8.317386934673367e-06, + "loss": 0.0421, + "step": 17250 + }, + { + "epoch": 11.6, + "grad_norm": 2.7330162525177, + "learning_rate": 8.314874371859298e-06, + "loss": 0.04, + "step": 17275 + }, + { + "epoch": 11.62, + "grad_norm": 3.783348798751831, + "learning_rate": 8.312361809045226e-06, + "loss": 0.0428, + "step": 17300 + }, + { + "epoch": 11.64, + "grad_norm": 3.3141326904296875, + "learning_rate": 8.309849246231157e-06, + "loss": 0.04, + "step": 17325 + }, + { + "epoch": 11.65, + "grad_norm": 3.1341404914855957, + "learning_rate": 8.307336683417086e-06, + "loss": 0.0389, + "step": 17350 + }, + { + "epoch": 11.67, + "grad_norm": 2.5702879428863525, + "learning_rate": 8.304824120603015e-06, + "loss": 0.0411, + "step": 17375 + }, + { + "epoch": 11.69, + "grad_norm": 2.7597875595092773, + "learning_rate": 8.302311557788946e-06, + "loss": 0.0387, + "step": 17400 + }, + { + "epoch": 11.7, + "grad_norm": 3.1602911949157715, + "learning_rate": 8.299798994974874e-06, + "loss": 0.0401, + "step": 17425 + }, + { + "epoch": 11.72, + "grad_norm": 2.9719858169555664, + "learning_rate": 8.297286432160805e-06, + "loss": 0.04, + "step": 17450 + }, + { + "epoch": 11.74, + "grad_norm": 2.7361767292022705, + "learning_rate": 8.294773869346734e-06, + "loss": 0.041, + "step": 17475 + }, + { + "epoch": 11.75, + "grad_norm": 2.7034785747528076, + "learning_rate": 8.292261306532664e-06, + "loss": 0.0413, + "step": 17500 + }, + { + "epoch": 11.77, + "grad_norm": 3.2431066036224365, + "learning_rate": 8.289748743718595e-06, + "loss": 0.0396, + "step": 17525 + }, + { + "epoch": 11.79, + "grad_norm": 2.7960753440856934, + "learning_rate": 8.287236180904524e-06, + "loss": 0.0406, + "step": 17550 + }, + { + "epoch": 11.8, + "grad_norm": 3.0115575790405273, + "learning_rate": 8.284723618090453e-06, + "loss": 0.0395, + "step": 17575 + }, + { + "epoch": 11.82, + "grad_norm": 2.4014508724212646, + "learning_rate": 8.282211055276383e-06, + "loss": 0.0404, + "step": 17600 + }, + { + "epoch": 11.84, + "grad_norm": 3.1004748344421387, + "learning_rate": 8.279698492462312e-06, + "loss": 0.0385, + "step": 17625 + }, + { + "epoch": 11.85, + "grad_norm": 2.5941948890686035, + "learning_rate": 8.277185929648241e-06, + "loss": 0.0398, + "step": 17650 + }, + { + "epoch": 11.87, + "grad_norm": 2.6056137084960938, + "learning_rate": 8.274673366834172e-06, + "loss": 0.0381, + "step": 17675 + }, + { + "epoch": 11.89, + "grad_norm": 2.8399932384490967, + "learning_rate": 8.2721608040201e-06, + "loss": 0.0401, + "step": 17700 + }, + { + "epoch": 11.9, + "grad_norm": 2.9396562576293945, + "learning_rate": 8.269648241206031e-06, + "loss": 0.0409, + "step": 17725 + }, + { + "epoch": 11.92, + "grad_norm": 3.1237053871154785, + "learning_rate": 8.26713567839196e-06, + "loss": 0.039, + "step": 17750 + }, + { + "epoch": 11.94, + "grad_norm": 3.0028700828552246, + "learning_rate": 8.26462311557789e-06, + "loss": 0.0421, + "step": 17775 + }, + { + "epoch": 11.95, + "grad_norm": 3.055807590484619, + "learning_rate": 8.26211055276382e-06, + "loss": 0.0405, + "step": 17800 + }, + { + "epoch": 11.97, + "grad_norm": 3.251986026763916, + "learning_rate": 8.25959798994975e-06, + "loss": 0.0433, + "step": 17825 + }, + { + "epoch": 11.99, + "grad_norm": 2.845550537109375, + "learning_rate": 8.25708542713568e-06, + "loss": 0.0385, + "step": 17850 + }, + { + "epoch": 12.0, + "grad_norm": 2.913346290588379, + "learning_rate": 8.254572864321609e-06, + "loss": 0.0378, + "step": 17875 + }, + { + "epoch": 12.02, + "grad_norm": 2.3991270065307617, + "learning_rate": 8.252060301507538e-06, + "loss": 0.0294, + "step": 17900 + }, + { + "epoch": 12.04, + "grad_norm": 2.4414055347442627, + "learning_rate": 8.249547738693467e-06, + "loss": 0.0306, + "step": 17925 + }, + { + "epoch": 12.06, + "grad_norm": 2.274725914001465, + "learning_rate": 8.247035175879398e-06, + "loss": 0.0295, + "step": 17950 + }, + { + "epoch": 12.07, + "grad_norm": 2.767655849456787, + "learning_rate": 8.244522613065328e-06, + "loss": 0.0307, + "step": 17975 + }, + { + "epoch": 12.09, + "grad_norm": 2.5598373413085938, + "learning_rate": 8.242010050251257e-06, + "loss": 0.0307, + "step": 18000 + }, + { + "epoch": 12.09, + "eval_loss": 0.1446864753961563, + "eval_runtime": 537.4834, + "eval_samples_per_second": 2.579, + "eval_steps_per_second": 2.579, + "eval_wer": 27.238212961327918, + "step": 18000 + }, + { + "epoch": 12.11, + "grad_norm": 2.654730796813965, + "learning_rate": 8.239497487437186e-06, + "loss": 0.0303, + "step": 18025 + }, + { + "epoch": 12.12, + "grad_norm": 2.6578266620635986, + "learning_rate": 8.236984924623116e-06, + "loss": 0.0298, + "step": 18050 + }, + { + "epoch": 12.14, + "grad_norm": 3.2597641944885254, + "learning_rate": 8.234472361809047e-06, + "loss": 0.0307, + "step": 18075 + }, + { + "epoch": 12.16, + "grad_norm": 3.1756911277770996, + "learning_rate": 8.231959798994976e-06, + "loss": 0.0303, + "step": 18100 + }, + { + "epoch": 12.17, + "grad_norm": 2.3517801761627197, + "learning_rate": 8.229447236180905e-06, + "loss": 0.0299, + "step": 18125 + }, + { + "epoch": 12.19, + "grad_norm": 2.7081449031829834, + "learning_rate": 8.226934673366835e-06, + "loss": 0.0317, + "step": 18150 + }, + { + "epoch": 12.21, + "grad_norm": 2.9442265033721924, + "learning_rate": 8.224422110552764e-06, + "loss": 0.0309, + "step": 18175 + }, + { + "epoch": 12.22, + "grad_norm": 2.202742099761963, + "learning_rate": 8.221909547738695e-06, + "loss": 0.0299, + "step": 18200 + }, + { + "epoch": 12.24, + "grad_norm": 2.683105230331421, + "learning_rate": 8.219396984924624e-06, + "loss": 0.0303, + "step": 18225 + }, + { + "epoch": 12.26, + "grad_norm": 2.4034810066223145, + "learning_rate": 8.216884422110554e-06, + "loss": 0.0319, + "step": 18250 + }, + { + "epoch": 12.27, + "grad_norm": 2.621290683746338, + "learning_rate": 8.214371859296483e-06, + "loss": 0.0318, + "step": 18275 + }, + { + "epoch": 12.29, + "grad_norm": 2.842874765396118, + "learning_rate": 8.211859296482412e-06, + "loss": 0.0332, + "step": 18300 + }, + { + "epoch": 12.31, + "grad_norm": 2.4797563552856445, + "learning_rate": 8.209346733668342e-06, + "loss": 0.0325, + "step": 18325 + }, + { + "epoch": 12.32, + "grad_norm": 2.8069446086883545, + "learning_rate": 8.206834170854273e-06, + "loss": 0.033, + "step": 18350 + }, + { + "epoch": 12.34, + "grad_norm": 2.9851083755493164, + "learning_rate": 8.204321608040202e-06, + "loss": 0.0321, + "step": 18375 + }, + { + "epoch": 12.36, + "grad_norm": 2.948084592819214, + "learning_rate": 8.201809045226131e-06, + "loss": 0.0338, + "step": 18400 + }, + { + "epoch": 12.37, + "grad_norm": 2.7898919582366943, + "learning_rate": 8.19929648241206e-06, + "loss": 0.0315, + "step": 18425 + }, + { + "epoch": 12.39, + "grad_norm": 2.366434097290039, + "learning_rate": 8.19678391959799e-06, + "loss": 0.032, + "step": 18450 + }, + { + "epoch": 12.41, + "grad_norm": 2.9562463760375977, + "learning_rate": 8.194271356783921e-06, + "loss": 0.0334, + "step": 18475 + }, + { + "epoch": 12.42, + "grad_norm": 2.5975656509399414, + "learning_rate": 8.19175879396985e-06, + "loss": 0.0331, + "step": 18500 + }, + { + "epoch": 12.44, + "grad_norm": 2.8374183177948, + "learning_rate": 8.18924623115578e-06, + "loss": 0.0318, + "step": 18525 + }, + { + "epoch": 12.46, + "grad_norm": 2.839860439300537, + "learning_rate": 8.186733668341709e-06, + "loss": 0.0324, + "step": 18550 + }, + { + "epoch": 12.47, + "grad_norm": 2.800180196762085, + "learning_rate": 8.184221105527638e-06, + "loss": 0.0309, + "step": 18575 + }, + { + "epoch": 12.49, + "grad_norm": 2.644583225250244, + "learning_rate": 8.18170854271357e-06, + "loss": 0.0331, + "step": 18600 + }, + { + "epoch": 12.51, + "grad_norm": 3.0358402729034424, + "learning_rate": 8.179195979899498e-06, + "loss": 0.0327, + "step": 18625 + }, + { + "epoch": 12.53, + "grad_norm": 2.807608127593994, + "learning_rate": 8.176683417085428e-06, + "loss": 0.032, + "step": 18650 + }, + { + "epoch": 12.54, + "grad_norm": 3.115736961364746, + "learning_rate": 8.174170854271357e-06, + "loss": 0.034, + "step": 18675 + }, + { + "epoch": 12.56, + "grad_norm": 2.563960313796997, + "learning_rate": 8.171658291457286e-06, + "loss": 0.0325, + "step": 18700 + }, + { + "epoch": 12.58, + "grad_norm": 2.6218457221984863, + "learning_rate": 8.169145728643216e-06, + "loss": 0.0312, + "step": 18725 + }, + { + "epoch": 12.59, + "grad_norm": 2.6230452060699463, + "learning_rate": 8.166633165829147e-06, + "loss": 0.0318, + "step": 18750 + }, + { + "epoch": 12.61, + "grad_norm": 3.0028395652770996, + "learning_rate": 8.164120603015076e-06, + "loss": 0.0339, + "step": 18775 + }, + { + "epoch": 12.63, + "grad_norm": 2.810173273086548, + "learning_rate": 8.161608040201005e-06, + "loss": 0.0337, + "step": 18800 + }, + { + "epoch": 12.64, + "grad_norm": 2.7154364585876465, + "learning_rate": 8.159095477386936e-06, + "loss": 0.0315, + "step": 18825 + }, + { + "epoch": 12.66, + "grad_norm": 2.9645156860351562, + "learning_rate": 8.156582914572864e-06, + "loss": 0.0341, + "step": 18850 + }, + { + "epoch": 12.68, + "grad_norm": 2.558562755584717, + "learning_rate": 8.154070351758795e-06, + "loss": 0.0321, + "step": 18875 + }, + { + "epoch": 12.69, + "grad_norm": 3.045975923538208, + "learning_rate": 8.151557788944724e-06, + "loss": 0.0328, + "step": 18900 + }, + { + "epoch": 12.71, + "grad_norm": 2.605736494064331, + "learning_rate": 8.149045226130654e-06, + "loss": 0.0338, + "step": 18925 + }, + { + "epoch": 12.73, + "grad_norm": 2.6503992080688477, + "learning_rate": 8.146532663316583e-06, + "loss": 0.0349, + "step": 18950 + }, + { + "epoch": 12.74, + "grad_norm": 2.7485363483428955, + "learning_rate": 8.144020100502512e-06, + "loss": 0.0331, + "step": 18975 + }, + { + "epoch": 12.76, + "grad_norm": 3.0558133125305176, + "learning_rate": 8.141507537688443e-06, + "loss": 0.0331, + "step": 19000 + }, + { + "epoch": 12.76, + "eval_loss": 0.15126191079616547, + "eval_runtime": 542.0176, + "eval_samples_per_second": 2.557, + "eval_steps_per_second": 2.557, + "eval_wer": 28.129966448878683, + "step": 19000 + }, + { + "epoch": 12.78, + "grad_norm": 3.117704391479492, + "learning_rate": 8.138994974874373e-06, + "loss": 0.0336, + "step": 19025 + }, + { + "epoch": 12.79, + "grad_norm": 2.7645487785339355, + "learning_rate": 8.136482412060302e-06, + "loss": 0.0324, + "step": 19050 + }, + { + "epoch": 12.81, + "grad_norm": 2.742771625518799, + "learning_rate": 8.133969849246231e-06, + "loss": 0.0331, + "step": 19075 + }, + { + "epoch": 12.83, + "grad_norm": 2.8407609462738037, + "learning_rate": 8.131457286432162e-06, + "loss": 0.0317, + "step": 19100 + }, + { + "epoch": 12.84, + "grad_norm": 2.5845396518707275, + "learning_rate": 8.12894472361809e-06, + "loss": 0.0335, + "step": 19125 + }, + { + "epoch": 12.86, + "grad_norm": 2.8739688396453857, + "learning_rate": 8.126432160804021e-06, + "loss": 0.0333, + "step": 19150 + }, + { + "epoch": 12.88, + "grad_norm": 3.1160261631011963, + "learning_rate": 8.12391959798995e-06, + "loss": 0.033, + "step": 19175 + }, + { + "epoch": 12.89, + "grad_norm": 2.978895902633667, + "learning_rate": 8.12140703517588e-06, + "loss": 0.0358, + "step": 19200 + }, + { + "epoch": 12.91, + "grad_norm": 3.0800576210021973, + "learning_rate": 8.11889447236181e-06, + "loss": 0.0335, + "step": 19225 + }, + { + "epoch": 12.93, + "grad_norm": 2.4890170097351074, + "learning_rate": 8.11638190954774e-06, + "loss": 0.034, + "step": 19250 + }, + { + "epoch": 12.94, + "grad_norm": 2.8995964527130127, + "learning_rate": 8.11386934673367e-06, + "loss": 0.0342, + "step": 19275 + }, + { + "epoch": 12.96, + "grad_norm": 2.8822238445281982, + "learning_rate": 8.111356783919599e-06, + "loss": 0.0338, + "step": 19300 + }, + { + "epoch": 12.98, + "grad_norm": 2.3847439289093018, + "learning_rate": 8.108844221105528e-06, + "loss": 0.0345, + "step": 19325 + }, + { + "epoch": 13.0, + "grad_norm": 2.5077168941497803, + "learning_rate": 8.106331658291457e-06, + "loss": 0.0323, + "step": 19350 + }, + { + "epoch": 13.01, + "grad_norm": 2.0860869884490967, + "learning_rate": 8.103819095477388e-06, + "loss": 0.0256, + "step": 19375 + }, + { + "epoch": 13.03, + "grad_norm": 2.4186856746673584, + "learning_rate": 8.101306532663318e-06, + "loss": 0.025, + "step": 19400 + }, + { + "epoch": 13.05, + "grad_norm": 2.169545888900757, + "learning_rate": 8.098793969849247e-06, + "loss": 0.024, + "step": 19425 + }, + { + "epoch": 13.06, + "grad_norm": 2.250295877456665, + "learning_rate": 8.096281407035176e-06, + "loss": 0.0227, + "step": 19450 + }, + { + "epoch": 13.08, + "grad_norm": 2.8207223415374756, + "learning_rate": 8.093768844221106e-06, + "loss": 0.0254, + "step": 19475 + }, + { + "epoch": 13.1, + "grad_norm": 2.4845900535583496, + "learning_rate": 8.091256281407037e-06, + "loss": 0.0251, + "step": 19500 + }, + { + "epoch": 13.11, + "grad_norm": 2.9678895473480225, + "learning_rate": 8.088743718592966e-06, + "loss": 0.0255, + "step": 19525 + }, + { + "epoch": 13.13, + "grad_norm": 3.0639657974243164, + "learning_rate": 8.086231155778895e-06, + "loss": 0.0266, + "step": 19550 + }, + { + "epoch": 13.15, + "grad_norm": 2.5778753757476807, + "learning_rate": 8.083718592964825e-06, + "loss": 0.0258, + "step": 19575 + }, + { + "epoch": 13.16, + "grad_norm": 2.3090131282806396, + "learning_rate": 8.081206030150754e-06, + "loss": 0.0234, + "step": 19600 + }, + { + "epoch": 13.18, + "grad_norm": 2.645989418029785, + "learning_rate": 8.078693467336685e-06, + "loss": 0.0243, + "step": 19625 + }, + { + "epoch": 13.2, + "grad_norm": 2.4817280769348145, + "learning_rate": 8.076180904522614e-06, + "loss": 0.0274, + "step": 19650 + }, + { + "epoch": 13.21, + "grad_norm": 2.17031192779541, + "learning_rate": 8.073668341708544e-06, + "loss": 0.024, + "step": 19675 + }, + { + "epoch": 13.23, + "grad_norm": 2.587280035018921, + "learning_rate": 8.071155778894473e-06, + "loss": 0.0258, + "step": 19700 + }, + { + "epoch": 13.25, + "grad_norm": 2.3844306468963623, + "learning_rate": 8.068643216080402e-06, + "loss": 0.0264, + "step": 19725 + }, + { + "epoch": 13.26, + "grad_norm": 2.440300226211548, + "learning_rate": 8.066130653266332e-06, + "loss": 0.0259, + "step": 19750 + }, + { + "epoch": 13.28, + "grad_norm": 2.120274543762207, + "learning_rate": 8.063618090452263e-06, + "loss": 0.0253, + "step": 19775 + }, + { + "epoch": 13.3, + "grad_norm": 2.412203073501587, + "learning_rate": 8.061105527638192e-06, + "loss": 0.0256, + "step": 19800 + }, + { + "epoch": 13.31, + "grad_norm": 2.3215441703796387, + "learning_rate": 8.058592964824121e-06, + "loss": 0.0247, + "step": 19825 + }, + { + "epoch": 13.33, + "grad_norm": 2.0729939937591553, + "learning_rate": 8.05608040201005e-06, + "loss": 0.0248, + "step": 19850 + }, + { + "epoch": 13.35, + "grad_norm": 2.622880697250366, + "learning_rate": 8.05356783919598e-06, + "loss": 0.0271, + "step": 19875 + }, + { + "epoch": 13.36, + "grad_norm": 2.5304481983184814, + "learning_rate": 8.051055276381911e-06, + "loss": 0.0255, + "step": 19900 + }, + { + "epoch": 13.38, + "grad_norm": 2.6204922199249268, + "learning_rate": 8.04854271356784e-06, + "loss": 0.0261, + "step": 19925 + }, + { + "epoch": 13.4, + "grad_norm": 2.284783363342285, + "learning_rate": 8.04603015075377e-06, + "loss": 0.0257, + "step": 19950 + }, + { + "epoch": 13.42, + "grad_norm": 3.0914671421051025, + "learning_rate": 8.043517587939699e-06, + "loss": 0.027, + "step": 19975 + }, + { + "epoch": 13.43, + "grad_norm": 2.8612654209136963, + "learning_rate": 8.041005025125628e-06, + "loss": 0.0258, + "step": 20000 + }, + { + "epoch": 13.43, + "eval_loss": 0.15857619047164917, + "eval_runtime": 534.77, + "eval_samples_per_second": 2.592, + "eval_steps_per_second": 2.592, + "eval_wer": 28.809818117605506, + "step": 20000 + }, + { + "epoch": 13.45, + "grad_norm": 3.074786424636841, + "learning_rate": 8.03849246231156e-06, + "loss": 0.026, + "step": 20025 + }, + { + "epoch": 13.47, + "grad_norm": 2.40915584564209, + "learning_rate": 8.035979899497489e-06, + "loss": 0.029, + "step": 20050 + }, + { + "epoch": 13.48, + "grad_norm": 2.7619211673736572, + "learning_rate": 8.033467336683418e-06, + "loss": 0.0261, + "step": 20075 + }, + { + "epoch": 13.5, + "grad_norm": 2.8454036712646484, + "learning_rate": 8.030954773869347e-06, + "loss": 0.0257, + "step": 20100 + }, + { + "epoch": 13.52, + "grad_norm": 2.519239664077759, + "learning_rate": 8.028442211055277e-06, + "loss": 0.0255, + "step": 20125 + }, + { + "epoch": 13.53, + "grad_norm": 2.798295736312866, + "learning_rate": 8.025929648241206e-06, + "loss": 0.0256, + "step": 20150 + }, + { + "epoch": 13.55, + "grad_norm": 2.658249855041504, + "learning_rate": 8.023417085427137e-06, + "loss": 0.0252, + "step": 20175 + }, + { + "epoch": 13.57, + "grad_norm": 2.55195689201355, + "learning_rate": 8.020904522613066e-06, + "loss": 0.0281, + "step": 20200 + }, + { + "epoch": 13.58, + "grad_norm": 2.282550096511841, + "learning_rate": 8.018391959798996e-06, + "loss": 0.0262, + "step": 20225 + }, + { + "epoch": 13.6, + "grad_norm": 2.6260697841644287, + "learning_rate": 8.015879396984927e-06, + "loss": 0.0249, + "step": 20250 + }, + { + "epoch": 13.62, + "grad_norm": 2.61671781539917, + "learning_rate": 8.013366834170854e-06, + "loss": 0.0276, + "step": 20275 + }, + { + "epoch": 13.63, + "grad_norm": 2.5859358310699463, + "learning_rate": 8.010854271356785e-06, + "loss": 0.0265, + "step": 20300 + }, + { + "epoch": 13.65, + "grad_norm": 2.6100573539733887, + "learning_rate": 8.008341708542714e-06, + "loss": 0.0258, + "step": 20325 + }, + { + "epoch": 13.67, + "grad_norm": 2.5182266235351562, + "learning_rate": 8.005829145728644e-06, + "loss": 0.028, + "step": 20350 + }, + { + "epoch": 13.68, + "grad_norm": 3.105220317840576, + "learning_rate": 8.003316582914573e-06, + "loss": 0.027, + "step": 20375 + }, + { + "epoch": 13.7, + "grad_norm": 2.7697339057922363, + "learning_rate": 8.000804020100502e-06, + "loss": 0.0274, + "step": 20400 + }, + { + "epoch": 13.72, + "grad_norm": 2.74824857711792, + "learning_rate": 7.998291457286432e-06, + "loss": 0.0264, + "step": 20425 + }, + { + "epoch": 13.73, + "grad_norm": 2.1460442543029785, + "learning_rate": 7.995778894472363e-06, + "loss": 0.0266, + "step": 20450 + }, + { + "epoch": 13.75, + "grad_norm": 2.700098991394043, + "learning_rate": 7.993266331658292e-06, + "loss": 0.0271, + "step": 20475 + }, + { + "epoch": 13.77, + "grad_norm": 3.0646328926086426, + "learning_rate": 7.990753768844221e-06, + "loss": 0.0273, + "step": 20500 + }, + { + "epoch": 13.78, + "grad_norm": 2.4817585945129395, + "learning_rate": 7.988241206030152e-06, + "loss": 0.0267, + "step": 20525 + }, + { + "epoch": 13.8, + "grad_norm": 2.383892059326172, + "learning_rate": 7.98572864321608e-06, + "loss": 0.0281, + "step": 20550 + }, + { + "epoch": 13.82, + "grad_norm": 2.6712028980255127, + "learning_rate": 7.983216080402011e-06, + "loss": 0.0262, + "step": 20575 + }, + { + "epoch": 13.83, + "grad_norm": 2.8054888248443604, + "learning_rate": 7.98070351758794e-06, + "loss": 0.0277, + "step": 20600 + }, + { + "epoch": 13.85, + "grad_norm": 2.520451545715332, + "learning_rate": 7.97819095477387e-06, + "loss": 0.0256, + "step": 20625 + }, + { + "epoch": 13.87, + "grad_norm": 2.6715471744537354, + "learning_rate": 7.975678391959799e-06, + "loss": 0.0271, + "step": 20650 + }, + { + "epoch": 13.89, + "grad_norm": 2.936898946762085, + "learning_rate": 7.973165829145728e-06, + "loss": 0.0271, + "step": 20675 + }, + { + "epoch": 13.9, + "grad_norm": 2.5876598358154297, + "learning_rate": 7.97065326633166e-06, + "loss": 0.0254, + "step": 20700 + }, + { + "epoch": 13.92, + "grad_norm": 2.576573133468628, + "learning_rate": 7.968140703517589e-06, + "loss": 0.0268, + "step": 20725 + }, + { + "epoch": 13.94, + "grad_norm": 2.962134838104248, + "learning_rate": 7.965628140703518e-06, + "loss": 0.028, + "step": 20750 + }, + { + "epoch": 13.95, + "grad_norm": 2.4978857040405273, + "learning_rate": 7.963115577889447e-06, + "loss": 0.0268, + "step": 20775 + }, + { + "epoch": 13.97, + "grad_norm": 2.7507359981536865, + "learning_rate": 7.960603015075378e-06, + "loss": 0.0264, + "step": 20800 + }, + { + "epoch": 13.99, + "grad_norm": 2.290602922439575, + "learning_rate": 7.958090452261306e-06, + "loss": 0.0268, + "step": 20825 + }, + { + "epoch": 14.0, + "grad_norm": 1.895709753036499, + "learning_rate": 7.955577889447237e-06, + "loss": 0.0267, + "step": 20850 + }, + { + "epoch": 14.02, + "grad_norm": 2.577284097671509, + "learning_rate": 7.953065326633166e-06, + "loss": 0.02, + "step": 20875 + }, + { + "epoch": 14.04, + "grad_norm": 2.139061450958252, + "learning_rate": 7.950552763819096e-06, + "loss": 0.0182, + "step": 20900 + }, + { + "epoch": 14.05, + "grad_norm": 2.31142520904541, + "learning_rate": 7.948040201005027e-06, + "loss": 0.0189, + "step": 20925 + }, + { + "epoch": 14.07, + "grad_norm": 2.4628167152404785, + "learning_rate": 7.945527638190954e-06, + "loss": 0.0191, + "step": 20950 + }, + { + "epoch": 14.09, + "grad_norm": 2.2550642490386963, + "learning_rate": 7.943015075376885e-06, + "loss": 0.0205, + "step": 20975 + }, + { + "epoch": 14.1, + "grad_norm": 2.5067131519317627, + "learning_rate": 7.940502512562815e-06, + "loss": 0.0193, + "step": 21000 + }, + { + "epoch": 14.1, + "eval_loss": 0.16441361606121063, + "eval_runtime": 532.0683, + "eval_samples_per_second": 2.605, + "eval_steps_per_second": 2.605, + "eval_wer": 28.28006357054565, + "step": 21000 + }, + { + "epoch": 14.12, + "grad_norm": 2.0792436599731445, + "learning_rate": 7.937989949748744e-06, + "loss": 0.0202, + "step": 21025 + }, + { + "epoch": 14.14, + "grad_norm": 2.0055572986602783, + "learning_rate": 7.935477386934673e-06, + "loss": 0.02, + "step": 21050 + }, + { + "epoch": 14.15, + "grad_norm": 2.557342052459717, + "learning_rate": 7.932964824120604e-06, + "loss": 0.0202, + "step": 21075 + }, + { + "epoch": 14.17, + "grad_norm": 2.351605176925659, + "learning_rate": 7.930452261306534e-06, + "loss": 0.0205, + "step": 21100 + }, + { + "epoch": 14.19, + "grad_norm": 2.4522876739501953, + "learning_rate": 7.927939698492463e-06, + "loss": 0.0197, + "step": 21125 + }, + { + "epoch": 14.2, + "grad_norm": 1.9259110689163208, + "learning_rate": 7.925527638190955e-06, + "loss": 0.019, + "step": 21150 + }, + { + "epoch": 14.22, + "grad_norm": 2.6869237422943115, + "learning_rate": 7.923015075376886e-06, + "loss": 0.0191, + "step": 21175 + }, + { + "epoch": 14.24, + "grad_norm": 2.1610636711120605, + "learning_rate": 7.920502512562815e-06, + "loss": 0.0199, + "step": 21200 + }, + { + "epoch": 14.25, + "grad_norm": 2.3419833183288574, + "learning_rate": 7.917989949748744e-06, + "loss": 0.0205, + "step": 21225 + }, + { + "epoch": 14.27, + "grad_norm": 2.655822277069092, + "learning_rate": 7.915477386934674e-06, + "loss": 0.0211, + "step": 21250 + }, + { + "epoch": 14.29, + "grad_norm": 2.3895249366760254, + "learning_rate": 7.912964824120603e-06, + "loss": 0.02, + "step": 21275 + }, + { + "epoch": 14.3, + "grad_norm": 2.626079559326172, + "learning_rate": 7.910452261306534e-06, + "loss": 0.0204, + "step": 21300 + }, + { + "epoch": 14.32, + "grad_norm": 2.4946000576019287, + "learning_rate": 7.907939698492463e-06, + "loss": 0.0211, + "step": 21325 + }, + { + "epoch": 14.34, + "grad_norm": 2.2254092693328857, + "learning_rate": 7.905427135678393e-06, + "loss": 0.0209, + "step": 21350 + }, + { + "epoch": 14.36, + "grad_norm": 2.813023328781128, + "learning_rate": 7.902914572864322e-06, + "loss": 0.0205, + "step": 21375 + }, + { + "epoch": 14.37, + "grad_norm": 2.3448939323425293, + "learning_rate": 7.900402010050253e-06, + "loss": 0.0204, + "step": 21400 + }, + { + "epoch": 14.39, + "grad_norm": 2.1861133575439453, + "learning_rate": 7.89788944723618e-06, + "loss": 0.0211, + "step": 21425 + }, + { + "epoch": 14.41, + "grad_norm": 2.1422207355499268, + "learning_rate": 7.895376884422111e-06, + "loss": 0.0224, + "step": 21450 + }, + { + "epoch": 14.42, + "grad_norm": 2.713761329650879, + "learning_rate": 7.89286432160804e-06, + "loss": 0.02, + "step": 21475 + }, + { + "epoch": 14.44, + "grad_norm": 2.430680274963379, + "learning_rate": 7.89035175879397e-06, + "loss": 0.0218, + "step": 21500 + }, + { + "epoch": 14.46, + "grad_norm": 2.974393606185913, + "learning_rate": 7.887839195979901e-06, + "loss": 0.0197, + "step": 21525 + }, + { + "epoch": 14.47, + "grad_norm": 2.530994415283203, + "learning_rate": 7.885326633165829e-06, + "loss": 0.0221, + "step": 21550 + }, + { + "epoch": 14.49, + "grad_norm": 2.5071282386779785, + "learning_rate": 7.88281407035176e-06, + "loss": 0.0214, + "step": 21575 + }, + { + "epoch": 14.51, + "grad_norm": 2.2111854553222656, + "learning_rate": 7.880301507537689e-06, + "loss": 0.0208, + "step": 21600 + }, + { + "epoch": 14.52, + "grad_norm": 2.194091320037842, + "learning_rate": 7.877788944723618e-06, + "loss": 0.0203, + "step": 21625 + }, + { + "epoch": 14.54, + "grad_norm": 2.2206263542175293, + "learning_rate": 7.875276381909548e-06, + "loss": 0.0221, + "step": 21650 + }, + { + "epoch": 14.56, + "grad_norm": 2.425065279006958, + "learning_rate": 7.872763819095479e-06, + "loss": 0.0211, + "step": 21675 + }, + { + "epoch": 14.57, + "grad_norm": 2.6152865886688232, + "learning_rate": 7.870251256281408e-06, + "loss": 0.0207, + "step": 21700 + }, + { + "epoch": 14.59, + "grad_norm": 2.2612714767456055, + "learning_rate": 7.867738693467337e-06, + "loss": 0.0209, + "step": 21725 + }, + { + "epoch": 14.61, + "grad_norm": 2.1470086574554443, + "learning_rate": 7.865226130653267e-06, + "loss": 0.021, + "step": 21750 + }, + { + "epoch": 14.62, + "grad_norm": 2.484851598739624, + "learning_rate": 7.862713567839196e-06, + "loss": 0.02, + "step": 21775 + }, + { + "epoch": 14.64, + "grad_norm": 2.4667041301727295, + "learning_rate": 7.860201005025127e-06, + "loss": 0.0206, + "step": 21800 + }, + { + "epoch": 14.66, + "grad_norm": 2.9903693199157715, + "learning_rate": 7.857688442211055e-06, + "loss": 0.0219, + "step": 21825 + }, + { + "epoch": 14.67, + "grad_norm": 2.6542530059814453, + "learning_rate": 7.855175879396986e-06, + "loss": 0.0213, + "step": 21850 + }, + { + "epoch": 14.69, + "grad_norm": 2.333191394805908, + "learning_rate": 7.852663316582915e-06, + "loss": 0.0214, + "step": 21875 + }, + { + "epoch": 14.71, + "grad_norm": 2.71769380569458, + "learning_rate": 7.850150753768844e-06, + "loss": 0.0215, + "step": 21900 + }, + { + "epoch": 14.72, + "grad_norm": 2.4674861431121826, + "learning_rate": 7.847638190954775e-06, + "loss": 0.0211, + "step": 21925 + }, + { + "epoch": 14.74, + "grad_norm": 2.931941270828247, + "learning_rate": 7.845125628140705e-06, + "loss": 0.0244, + "step": 21950 + }, + { + "epoch": 14.76, + "grad_norm": 2.738786458969116, + "learning_rate": 7.842613065326634e-06, + "loss": 0.0218, + "step": 21975 + }, + { + "epoch": 14.78, + "grad_norm": 2.375138521194458, + "learning_rate": 7.840100502512563e-06, + "loss": 0.0219, + "step": 22000 + }, + { + "epoch": 14.78, + "eval_loss": 0.16828645765781403, + "eval_runtime": 539.6518, + "eval_samples_per_second": 2.568, + "eval_steps_per_second": 2.568, + "eval_wer": 28.11230796397669, + "step": 22000 + } + ], + "logging_steps": 25, + "max_steps": 100000, + "num_input_tokens_seen": 0, + "num_train_epochs": 68, + "save_steps": 1000, + "total_flos": 3.465787561869312e+19, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/whisper-tiny/marathi/checkpoint-22000/training_args.bin b/checkpoints/whisper-tiny/marathi/checkpoint-22000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a2a45f92945f10e170154791b44b8de98ac8a773 --- /dev/null +++ b/checkpoints/whisper-tiny/marathi/checkpoint-22000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2c639b8936ce64e8fe2c00349809456d0d9751777a6f5e4f03b3e1169d23591 +size 4667 diff --git a/checkpoints/whisper-tiny/telugu/checkpoint-24000/config.json b/checkpoints/whisper-tiny/telugu/checkpoint-24000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f6ab7422b017bbfc651e54c45dd3214e03c9e5a9 --- /dev/null +++ b/checkpoints/whisper-tiny/telugu/checkpoint-24000/config.json @@ -0,0 +1,154 @@ +{ + "_name_or_path": "openai/whisper-tiny", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 384, + "decoder_attention_heads": 6, + "decoder_ffn_dim": 1536, + "decoder_layerdrop": 0.0, + "decoder_layers": 4, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 6, + "encoder_ffn_dim": 1536, + "encoder_layerdrop": 0.0, + "encoder_layers": 4, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50299 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 4, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "torch_dtype": "float32", + "transformers_version": "4.39.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoints/whisper-tiny/telugu/checkpoint-24000/generation_config.json b/checkpoints/whisper-tiny/telugu/checkpoint-24000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c4857895fba6cdefb862460b5d33969e1892aa71 --- /dev/null +++ b/checkpoints/whisper-tiny/telugu/checkpoint-24000/generation_config.json @@ -0,0 +1,248 @@ +{ + "alignment_heads": [ + [ + 2, + 2 + ], + [ + 3, + 0 + ], + [ + 3, + 2 + ], + [ + 3, + 3 + ], + [ + 3, + 4 + ], + [ + 3, + 5 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.39.2" +} diff --git a/checkpoints/whisper-tiny/telugu/checkpoint-24000/model.safetensors b/checkpoints/whisper-tiny/telugu/checkpoint-24000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..66914de9cfc6b6aa1174b58db5cfbd4d1e6b6ac4 --- /dev/null +++ b/checkpoints/whisper-tiny/telugu/checkpoint-24000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:323da069f1a1d619c79b3f1cd6c62f7f3b5be6677ccbc3225092e530b1dafaf6 +size 151061672 diff --git a/checkpoints/whisper-tiny/telugu/checkpoint-24000/optimizer.pt b/checkpoints/whisper-tiny/telugu/checkpoint-24000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..874bf3d4c74645b20d3bf974b1ac061f1f6d3567 --- /dev/null +++ b/checkpoints/whisper-tiny/telugu/checkpoint-24000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e25bad8130a48b8a2d37ed224866c9766172efcf42bdd7525df82a5274e42c92 +size 297615749 diff --git a/checkpoints/whisper-tiny/telugu/checkpoint-24000/preprocessor_config.json b/checkpoints/whisper-tiny/telugu/checkpoint-24000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoints/whisper-tiny/telugu/checkpoint-24000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoints/whisper-tiny/telugu/checkpoint-24000/rng_state.pth b/checkpoints/whisper-tiny/telugu/checkpoint-24000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ff8b928a3d16994a2a6b2980f571a40c8f0cfdf6 --- /dev/null +++ b/checkpoints/whisper-tiny/telugu/checkpoint-24000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d51c77b0bb4c7ce6fba25fe67744115465a5531c1e4f88a6c67d0ec9aeb94175 +size 14575 diff --git a/checkpoints/whisper-tiny/telugu/checkpoint-24000/scheduler.pt b/checkpoints/whisper-tiny/telugu/checkpoint-24000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3f125a2a3a0e1e24b444e82e2c4cbfb087fc9168 --- /dev/null +++ b/checkpoints/whisper-tiny/telugu/checkpoint-24000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bc672aa3c9e2038c7eab9551798d77eff82ab1cb587df10c2323892c34b0ff2 +size 627 diff --git a/checkpoints/whisper-tiny/telugu/checkpoint-24000/trainer_state.json b/checkpoints/whisper-tiny/telugu/checkpoint-24000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7ad7b1a344589f8df2ee3559d99e54beccae4cfe --- /dev/null +++ b/checkpoints/whisper-tiny/telugu/checkpoint-24000/trainer_state.json @@ -0,0 +1,6957 @@ +{ + "best_metric": 35.51156271899089, + "best_model_checkpoint": "results/whisper-tiny/telugu/checkpoint-14000", + "epoch": 16.118200134318336, + "eval_steps": 1000, + "global_step": 24000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "grad_norm": 46.288719177246094, + "learning_rate": 4.4e-07, + "loss": 2.6373, + "step": 25 + }, + { + "epoch": 0.03, + "grad_norm": 19.318984985351562, + "learning_rate": 9.400000000000001e-07, + "loss": 2.355, + "step": 50 + }, + { + "epoch": 0.05, + "grad_norm": 8.56235122680664, + "learning_rate": 1.44e-06, + "loss": 2.0219, + "step": 75 + }, + { + "epoch": 0.07, + "grad_norm": 6.064026355743408, + "learning_rate": 1.94e-06, + "loss": 1.7403, + "step": 100 + }, + { + "epoch": 0.08, + "grad_norm": 6.795860290527344, + "learning_rate": 2.4400000000000004e-06, + "loss": 1.5409, + "step": 125 + }, + { + "epoch": 0.1, + "grad_norm": 5.613386631011963, + "learning_rate": 2.9400000000000002e-06, + "loss": 1.4447, + "step": 150 + }, + { + "epoch": 0.12, + "grad_norm": 5.609338760375977, + "learning_rate": 3.44e-06, + "loss": 1.3978, + "step": 175 + }, + { + "epoch": 0.13, + "grad_norm": 7.090567588806152, + "learning_rate": 3.94e-06, + "loss": 1.3598, + "step": 200 + }, + { + "epoch": 0.15, + "grad_norm": 8.041254997253418, + "learning_rate": 4.440000000000001e-06, + "loss": 1.3315, + "step": 225 + }, + { + "epoch": 0.17, + "grad_norm": 9.281234741210938, + "learning_rate": 4.94e-06, + "loss": 1.3065, + "step": 250 + }, + { + "epoch": 0.18, + "grad_norm": 7.175929546356201, + "learning_rate": 5.4400000000000004e-06, + "loss": 1.2769, + "step": 275 + }, + { + "epoch": 0.2, + "grad_norm": 7.125080585479736, + "learning_rate": 5.94e-06, + "loss": 1.2559, + "step": 300 + }, + { + "epoch": 0.22, + "grad_norm": 10.941713333129883, + "learning_rate": 6.440000000000001e-06, + "loss": 1.2276, + "step": 325 + }, + { + "epoch": 0.24, + "grad_norm": 9.61944580078125, + "learning_rate": 6.9400000000000005e-06, + "loss": 1.1937, + "step": 350 + }, + { + "epoch": 0.25, + "grad_norm": 11.414420127868652, + "learning_rate": 7.440000000000001e-06, + "loss": 1.1516, + "step": 375 + }, + { + "epoch": 0.27, + "grad_norm": 18.75218963623047, + "learning_rate": 7.94e-06, + "loss": 1.0752, + "step": 400 + }, + { + "epoch": 0.29, + "grad_norm": 16.333126068115234, + "learning_rate": 8.44e-06, + "loss": 0.9224, + "step": 425 + }, + { + "epoch": 0.3, + "grad_norm": 9.628554344177246, + "learning_rate": 8.94e-06, + "loss": 0.7448, + "step": 450 + }, + { + "epoch": 0.32, + "grad_norm": 7.029972553253174, + "learning_rate": 9.440000000000001e-06, + "loss": 0.6051, + "step": 475 + }, + { + "epoch": 0.34, + "grad_norm": 8.085220336914062, + "learning_rate": 9.940000000000001e-06, + "loss": 0.5029, + "step": 500 + }, + { + "epoch": 0.35, + "grad_norm": 8.469658851623535, + "learning_rate": 9.997788944723618e-06, + "loss": 0.4404, + "step": 525 + }, + { + "epoch": 0.37, + "grad_norm": 13.255173683166504, + "learning_rate": 9.99527638190955e-06, + "loss": 0.3904, + "step": 550 + }, + { + "epoch": 0.39, + "grad_norm": 7.986926078796387, + "learning_rate": 9.992763819095477e-06, + "loss": 0.3707, + "step": 575 + }, + { + "epoch": 0.4, + "grad_norm": 5.75927734375, + "learning_rate": 9.990251256281408e-06, + "loss": 0.3469, + "step": 600 + }, + { + "epoch": 0.42, + "grad_norm": 6.482667922973633, + "learning_rate": 9.987738693467337e-06, + "loss": 0.3373, + "step": 625 + }, + { + "epoch": 0.44, + "grad_norm": 3.9817612171173096, + "learning_rate": 9.985226130653267e-06, + "loss": 0.3208, + "step": 650 + }, + { + "epoch": 0.45, + "grad_norm": 5.0060296058654785, + "learning_rate": 9.982713567839198e-06, + "loss": 0.3144, + "step": 675 + }, + { + "epoch": 0.47, + "grad_norm": 6.076388359069824, + "learning_rate": 9.980201005025127e-06, + "loss": 0.3032, + "step": 700 + }, + { + "epoch": 0.49, + "grad_norm": 5.037066459655762, + "learning_rate": 9.977688442211056e-06, + "loss": 0.2962, + "step": 725 + }, + { + "epoch": 0.5, + "grad_norm": 5.0189528465271, + "learning_rate": 9.975175879396986e-06, + "loss": 0.2805, + "step": 750 + }, + { + "epoch": 0.52, + "grad_norm": 7.7649383544921875, + "learning_rate": 9.972663316582915e-06, + "loss": 0.2807, + "step": 775 + }, + { + "epoch": 0.54, + "grad_norm": 4.730064392089844, + "learning_rate": 9.970150753768844e-06, + "loss": 0.2771, + "step": 800 + }, + { + "epoch": 0.55, + "grad_norm": 4.841135501861572, + "learning_rate": 9.967638190954775e-06, + "loss": 0.2746, + "step": 825 + }, + { + "epoch": 0.57, + "grad_norm": 5.047031879425049, + "learning_rate": 9.965125628140703e-06, + "loss": 0.2626, + "step": 850 + }, + { + "epoch": 0.59, + "grad_norm": 5.136034965515137, + "learning_rate": 9.962613065326634e-06, + "loss": 0.2623, + "step": 875 + }, + { + "epoch": 0.6, + "grad_norm": 4.8938446044921875, + "learning_rate": 9.960100502512563e-06, + "loss": 0.2605, + "step": 900 + }, + { + "epoch": 0.62, + "grad_norm": 6.293755531311035, + "learning_rate": 9.957587939698493e-06, + "loss": 0.2558, + "step": 925 + }, + { + "epoch": 0.64, + "grad_norm": 5.07853364944458, + "learning_rate": 9.955075376884424e-06, + "loss": 0.2556, + "step": 950 + }, + { + "epoch": 0.65, + "grad_norm": 4.405387878417969, + "learning_rate": 9.952562814070353e-06, + "loss": 0.2464, + "step": 975 + }, + { + "epoch": 0.67, + "grad_norm": 6.849056720733643, + "learning_rate": 9.950050251256282e-06, + "loss": 0.248, + "step": 1000 + }, + { + "epoch": 0.67, + "eval_loss": 0.15763895213603973, + "eval_runtime": 890.7513, + "eval_samples_per_second": 1.614, + "eval_steps_per_second": 1.614, + "eval_wer": 137.5437981779958, + "step": 1000 + }, + { + "epoch": 0.69, + "grad_norm": 5.960962295532227, + "learning_rate": 9.947537688442212e-06, + "loss": 0.2437, + "step": 1025 + }, + { + "epoch": 0.71, + "grad_norm": 4.051809310913086, + "learning_rate": 9.945025125628141e-06, + "loss": 0.2423, + "step": 1050 + }, + { + "epoch": 0.72, + "grad_norm": 4.354046821594238, + "learning_rate": 9.94251256281407e-06, + "loss": 0.237, + "step": 1075 + }, + { + "epoch": 0.74, + "grad_norm": 4.964585781097412, + "learning_rate": 9.940000000000001e-06, + "loss": 0.2322, + "step": 1100 + }, + { + "epoch": 0.76, + "grad_norm": 4.258399486541748, + "learning_rate": 9.93748743718593e-06, + "loss": 0.2351, + "step": 1125 + }, + { + "epoch": 0.77, + "grad_norm": 6.372684478759766, + "learning_rate": 9.93497487437186e-06, + "loss": 0.2363, + "step": 1150 + }, + { + "epoch": 0.79, + "grad_norm": 3.22149658203125, + "learning_rate": 9.93246231155779e-06, + "loss": 0.2304, + "step": 1175 + }, + { + "epoch": 0.81, + "grad_norm": 4.906097888946533, + "learning_rate": 9.929949748743719e-06, + "loss": 0.2304, + "step": 1200 + }, + { + "epoch": 0.82, + "grad_norm": 5.798041820526123, + "learning_rate": 9.92743718592965e-06, + "loss": 0.2269, + "step": 1225 + }, + { + "epoch": 0.84, + "grad_norm": 4.752237319946289, + "learning_rate": 9.924924623115579e-06, + "loss": 0.2218, + "step": 1250 + }, + { + "epoch": 0.86, + "grad_norm": 4.795834541320801, + "learning_rate": 9.922412060301508e-06, + "loss": 0.2197, + "step": 1275 + }, + { + "epoch": 0.87, + "grad_norm": 3.58443546295166, + "learning_rate": 9.91989949748744e-06, + "loss": 0.2159, + "step": 1300 + }, + { + "epoch": 0.89, + "grad_norm": 4.739859580993652, + "learning_rate": 9.917386934673367e-06, + "loss": 0.2173, + "step": 1325 + }, + { + "epoch": 0.91, + "grad_norm": 4.105286121368408, + "learning_rate": 9.914874371859298e-06, + "loss": 0.2189, + "step": 1350 + }, + { + "epoch": 0.92, + "grad_norm": 3.9577555656433105, + "learning_rate": 9.912361809045227e-06, + "loss": 0.2177, + "step": 1375 + }, + { + "epoch": 0.94, + "grad_norm": 4.78550910949707, + "learning_rate": 9.909849246231157e-06, + "loss": 0.212, + "step": 1400 + }, + { + "epoch": 0.96, + "grad_norm": 4.1616435050964355, + "learning_rate": 9.907336683417086e-06, + "loss": 0.2142, + "step": 1425 + }, + { + "epoch": 0.97, + "grad_norm": 4.921288967132568, + "learning_rate": 9.904824120603015e-06, + "loss": 0.2138, + "step": 1450 + }, + { + "epoch": 0.99, + "grad_norm": 4.49397611618042, + "learning_rate": 9.902311557788945e-06, + "loss": 0.2111, + "step": 1475 + }, + { + "epoch": 1.01, + "grad_norm": 4.925095081329346, + "learning_rate": 9.899798994974876e-06, + "loss": 0.2065, + "step": 1500 + }, + { + "epoch": 1.02, + "grad_norm": 5.786722660064697, + "learning_rate": 9.897286432160805e-06, + "loss": 0.1993, + "step": 1525 + }, + { + "epoch": 1.04, + "grad_norm": 4.955239772796631, + "learning_rate": 9.894773869346734e-06, + "loss": 0.1967, + "step": 1550 + }, + { + "epoch": 1.06, + "grad_norm": 3.772550344467163, + "learning_rate": 9.892261306532665e-06, + "loss": 0.1995, + "step": 1575 + }, + { + "epoch": 1.07, + "grad_norm": 3.3915085792541504, + "learning_rate": 9.889748743718593e-06, + "loss": 0.1959, + "step": 1600 + }, + { + "epoch": 1.09, + "grad_norm": 4.938091278076172, + "learning_rate": 9.887236180904524e-06, + "loss": 0.1906, + "step": 1625 + }, + { + "epoch": 1.11, + "grad_norm": 5.589095115661621, + "learning_rate": 9.884723618090453e-06, + "loss": 0.1977, + "step": 1650 + }, + { + "epoch": 1.12, + "grad_norm": 5.237961769104004, + "learning_rate": 9.882211055276383e-06, + "loss": 0.1901, + "step": 1675 + }, + { + "epoch": 1.14, + "grad_norm": 3.3004534244537354, + "learning_rate": 9.879698492462312e-06, + "loss": 0.1947, + "step": 1700 + }, + { + "epoch": 1.16, + "grad_norm": 4.175780773162842, + "learning_rate": 9.877185929648241e-06, + "loss": 0.1897, + "step": 1725 + }, + { + "epoch": 1.18, + "grad_norm": 3.922126054763794, + "learning_rate": 9.874673366834172e-06, + "loss": 0.1928, + "step": 1750 + }, + { + "epoch": 1.19, + "grad_norm": 4.713805198669434, + "learning_rate": 9.872160804020102e-06, + "loss": 0.1897, + "step": 1775 + }, + { + "epoch": 1.21, + "grad_norm": 3.903756618499756, + "learning_rate": 9.869648241206031e-06, + "loss": 0.1928, + "step": 1800 + }, + { + "epoch": 1.23, + "grad_norm": 4.778044700622559, + "learning_rate": 9.86713567839196e-06, + "loss": 0.192, + "step": 1825 + }, + { + "epoch": 1.24, + "grad_norm": 3.893132448196411, + "learning_rate": 9.864623115577891e-06, + "loss": 0.1839, + "step": 1850 + }, + { + "epoch": 1.26, + "grad_norm": 5.725592613220215, + "learning_rate": 9.862110552763819e-06, + "loss": 0.1821, + "step": 1875 + }, + { + "epoch": 1.28, + "grad_norm": 4.511555194854736, + "learning_rate": 9.85959798994975e-06, + "loss": 0.1816, + "step": 1900 + }, + { + "epoch": 1.29, + "grad_norm": 5.316925525665283, + "learning_rate": 9.85708542713568e-06, + "loss": 0.1874, + "step": 1925 + }, + { + "epoch": 1.31, + "grad_norm": 3.552487850189209, + "learning_rate": 9.854572864321609e-06, + "loss": 0.1787, + "step": 1950 + }, + { + "epoch": 1.33, + "grad_norm": 3.510145425796509, + "learning_rate": 9.85206030150754e-06, + "loss": 0.1846, + "step": 1975 + }, + { + "epoch": 1.34, + "grad_norm": 4.14070987701416, + "learning_rate": 9.849547738693467e-06, + "loss": 0.177, + "step": 2000 + }, + { + "epoch": 1.34, + "eval_loss": 0.11255700886249542, + "eval_runtime": 840.3778, + "eval_samples_per_second": 1.711, + "eval_steps_per_second": 1.711, + "eval_wer": 113.9540995094604, + "step": 2000 + }, + { + "epoch": 1.36, + "grad_norm": 3.559063196182251, + "learning_rate": 9.847035175879398e-06, + "loss": 0.1786, + "step": 2025 + }, + { + "epoch": 1.38, + "grad_norm": 3.4146175384521484, + "learning_rate": 9.844522613065328e-06, + "loss": 0.1822, + "step": 2050 + }, + { + "epoch": 1.39, + "grad_norm": 4.3564910888671875, + "learning_rate": 9.842010050251257e-06, + "loss": 0.1811, + "step": 2075 + }, + { + "epoch": 1.41, + "grad_norm": 5.131686210632324, + "learning_rate": 9.839497487437186e-06, + "loss": 0.1798, + "step": 2100 + }, + { + "epoch": 1.43, + "grad_norm": 3.9800162315368652, + "learning_rate": 9.836984924623117e-06, + "loss": 0.1821, + "step": 2125 + }, + { + "epoch": 1.44, + "grad_norm": 3.412644386291504, + "learning_rate": 9.834472361809047e-06, + "loss": 0.1805, + "step": 2150 + }, + { + "epoch": 1.46, + "grad_norm": 4.895207405090332, + "learning_rate": 9.831959798994976e-06, + "loss": 0.1736, + "step": 2175 + }, + { + "epoch": 1.48, + "grad_norm": 3.850755453109741, + "learning_rate": 9.829447236180905e-06, + "loss": 0.171, + "step": 2200 + }, + { + "epoch": 1.49, + "grad_norm": 5.936442852020264, + "learning_rate": 9.826934673366834e-06, + "loss": 0.1806, + "step": 2225 + }, + { + "epoch": 1.51, + "grad_norm": 3.3060200214385986, + "learning_rate": 9.824422110552766e-06, + "loss": 0.1775, + "step": 2250 + }, + { + "epoch": 1.53, + "grad_norm": 3.466420888900757, + "learning_rate": 9.821909547738693e-06, + "loss": 0.1728, + "step": 2275 + }, + { + "epoch": 1.54, + "grad_norm": 2.88674259185791, + "learning_rate": 9.819396984924624e-06, + "loss": 0.1748, + "step": 2300 + }, + { + "epoch": 1.56, + "grad_norm": 3.2692856788635254, + "learning_rate": 9.816884422110553e-06, + "loss": 0.1764, + "step": 2325 + }, + { + "epoch": 1.58, + "grad_norm": 4.058603763580322, + "learning_rate": 9.814371859296483e-06, + "loss": 0.1744, + "step": 2350 + }, + { + "epoch": 1.6, + "grad_norm": 3.3817245960235596, + "learning_rate": 9.811859296482414e-06, + "loss": 0.1724, + "step": 2375 + }, + { + "epoch": 1.61, + "grad_norm": 3.797555923461914, + "learning_rate": 9.809346733668343e-06, + "loss": 0.1699, + "step": 2400 + }, + { + "epoch": 1.63, + "grad_norm": 2.828004837036133, + "learning_rate": 9.806834170854272e-06, + "loss": 0.1729, + "step": 2425 + }, + { + "epoch": 1.65, + "grad_norm": 3.694157361984253, + "learning_rate": 9.804321608040202e-06, + "loss": 0.1716, + "step": 2450 + }, + { + "epoch": 1.66, + "grad_norm": 4.082045078277588, + "learning_rate": 9.801809045226131e-06, + "loss": 0.1722, + "step": 2475 + }, + { + "epoch": 1.68, + "grad_norm": 3.213883399963379, + "learning_rate": 9.79929648241206e-06, + "loss": 0.1659, + "step": 2500 + }, + { + "epoch": 1.7, + "grad_norm": 3.308229684829712, + "learning_rate": 9.796783919597991e-06, + "loss": 0.1672, + "step": 2525 + }, + { + "epoch": 1.71, + "grad_norm": 3.3312532901763916, + "learning_rate": 9.79427135678392e-06, + "loss": 0.1659, + "step": 2550 + }, + { + "epoch": 1.73, + "grad_norm": 4.818289756774902, + "learning_rate": 9.79175879396985e-06, + "loss": 0.1673, + "step": 2575 + }, + { + "epoch": 1.75, + "grad_norm": 3.281080722808838, + "learning_rate": 9.78924623115578e-06, + "loss": 0.1674, + "step": 2600 + }, + { + "epoch": 1.76, + "grad_norm": 3.1090762615203857, + "learning_rate": 9.786733668341709e-06, + "loss": 0.1635, + "step": 2625 + }, + { + "epoch": 1.78, + "grad_norm": 4.347409248352051, + "learning_rate": 9.78422110552764e-06, + "loss": 0.166, + "step": 2650 + }, + { + "epoch": 1.8, + "grad_norm": 3.5327181816101074, + "learning_rate": 9.781708542713569e-06, + "loss": 0.1632, + "step": 2675 + }, + { + "epoch": 1.81, + "grad_norm": 3.060777187347412, + "learning_rate": 9.779195979899498e-06, + "loss": 0.1668, + "step": 2700 + }, + { + "epoch": 1.83, + "grad_norm": 3.9147114753723145, + "learning_rate": 9.776683417085428e-06, + "loss": 0.1608, + "step": 2725 + }, + { + "epoch": 1.85, + "grad_norm": 3.5742897987365723, + "learning_rate": 9.774170854271357e-06, + "loss": 0.1625, + "step": 2750 + }, + { + "epoch": 1.86, + "grad_norm": 3.483430862426758, + "learning_rate": 9.771658291457288e-06, + "loss": 0.1635, + "step": 2775 + }, + { + "epoch": 1.88, + "grad_norm": 3.913830041885376, + "learning_rate": 9.769145728643217e-06, + "loss": 0.1637, + "step": 2800 + }, + { + "epoch": 1.9, + "grad_norm": 3.7740983963012695, + "learning_rate": 9.766733668341709e-06, + "loss": 0.1626, + "step": 2825 + }, + { + "epoch": 1.91, + "grad_norm": 3.780313014984131, + "learning_rate": 9.76422110552764e-06, + "loss": 0.1615, + "step": 2850 + }, + { + "epoch": 1.93, + "grad_norm": 2.9504480361938477, + "learning_rate": 9.761708542713568e-06, + "loss": 0.1575, + "step": 2875 + }, + { + "epoch": 1.95, + "grad_norm": 3.9463906288146973, + "learning_rate": 9.759195979899499e-06, + "loss": 0.1602, + "step": 2900 + }, + { + "epoch": 1.96, + "grad_norm": 4.844554901123047, + "learning_rate": 9.756683417085428e-06, + "loss": 0.158, + "step": 2925 + }, + { + "epoch": 1.98, + "grad_norm": 2.893134355545044, + "learning_rate": 9.754170854271357e-06, + "loss": 0.1588, + "step": 2950 + }, + { + "epoch": 2.0, + "grad_norm": 4.4972825050354, + "learning_rate": 9.751658291457288e-06, + "loss": 0.1588, + "step": 2975 + }, + { + "epoch": 2.01, + "grad_norm": 3.745360851287842, + "learning_rate": 9.749145728643216e-06, + "loss": 0.1496, + "step": 3000 + }, + { + "epoch": 2.01, + "eval_loss": 0.09570121765136719, + "eval_runtime": 879.7603, + "eval_samples_per_second": 1.635, + "eval_steps_per_second": 1.635, + "eval_wer": 84.76699369306236, + "step": 3000 + }, + { + "epoch": 2.03, + "grad_norm": 2.871485948562622, + "learning_rate": 9.746633165829147e-06, + "loss": 0.1489, + "step": 3025 + }, + { + "epoch": 2.05, + "grad_norm": 2.8597240447998047, + "learning_rate": 9.744120603015076e-06, + "loss": 0.1496, + "step": 3050 + }, + { + "epoch": 2.07, + "grad_norm": 3.686061143875122, + "learning_rate": 9.741608040201006e-06, + "loss": 0.1504, + "step": 3075 + }, + { + "epoch": 2.08, + "grad_norm": 4.051623344421387, + "learning_rate": 9.739095477386935e-06, + "loss": 0.1482, + "step": 3100 + }, + { + "epoch": 2.1, + "grad_norm": 3.5273308753967285, + "learning_rate": 9.736582914572866e-06, + "loss": 0.1488, + "step": 3125 + }, + { + "epoch": 2.12, + "grad_norm": 2.896193265914917, + "learning_rate": 9.734070351758794e-06, + "loss": 0.1451, + "step": 3150 + }, + { + "epoch": 2.13, + "grad_norm": 4.335400581359863, + "learning_rate": 9.731557788944725e-06, + "loss": 0.1458, + "step": 3175 + }, + { + "epoch": 2.15, + "grad_norm": 4.603724002838135, + "learning_rate": 9.729045226130654e-06, + "loss": 0.1501, + "step": 3200 + }, + { + "epoch": 2.17, + "grad_norm": 5.055929183959961, + "learning_rate": 9.726532663316583e-06, + "loss": 0.1478, + "step": 3225 + }, + { + "epoch": 2.18, + "grad_norm": 3.657083511352539, + "learning_rate": 9.724020100502514e-06, + "loss": 0.148, + "step": 3250 + }, + { + "epoch": 2.2, + "grad_norm": 3.7822017669677734, + "learning_rate": 9.721507537688444e-06, + "loss": 0.1485, + "step": 3275 + }, + { + "epoch": 2.22, + "grad_norm": 2.9088945388793945, + "learning_rate": 9.718994974874373e-06, + "loss": 0.1449, + "step": 3300 + }, + { + "epoch": 2.23, + "grad_norm": 5.0420918464660645, + "learning_rate": 9.716482412060302e-06, + "loss": 0.1468, + "step": 3325 + }, + { + "epoch": 2.25, + "grad_norm": 3.5268824100494385, + "learning_rate": 9.713969849246232e-06, + "loss": 0.1474, + "step": 3350 + }, + { + "epoch": 2.27, + "grad_norm": 2.949197292327881, + "learning_rate": 9.711457286432163e-06, + "loss": 0.1455, + "step": 3375 + }, + { + "epoch": 2.28, + "grad_norm": 3.0619781017303467, + "learning_rate": 9.708944723618092e-06, + "loss": 0.1418, + "step": 3400 + }, + { + "epoch": 2.3, + "grad_norm": 2.6028056144714355, + "learning_rate": 9.706432160804021e-06, + "loss": 0.1404, + "step": 3425 + }, + { + "epoch": 2.32, + "grad_norm": 4.112977981567383, + "learning_rate": 9.70391959798995e-06, + "loss": 0.1457, + "step": 3450 + }, + { + "epoch": 2.33, + "grad_norm": 3.902012825012207, + "learning_rate": 9.70140703517588e-06, + "loss": 0.1459, + "step": 3475 + }, + { + "epoch": 2.35, + "grad_norm": 3.593888282775879, + "learning_rate": 9.698894472361809e-06, + "loss": 0.1436, + "step": 3500 + }, + { + "epoch": 2.37, + "grad_norm": 3.7610585689544678, + "learning_rate": 9.69638190954774e-06, + "loss": 0.1402, + "step": 3525 + }, + { + "epoch": 2.38, + "grad_norm": 2.8161423206329346, + "learning_rate": 9.69386934673367e-06, + "loss": 0.1429, + "step": 3550 + }, + { + "epoch": 2.4, + "grad_norm": 5.7642316818237305, + "learning_rate": 9.691356783919599e-06, + "loss": 0.1441, + "step": 3575 + }, + { + "epoch": 2.42, + "grad_norm": 2.900308132171631, + "learning_rate": 9.688844221105528e-06, + "loss": 0.1411, + "step": 3600 + }, + { + "epoch": 2.43, + "grad_norm": 2.7447474002838135, + "learning_rate": 9.686331658291457e-06, + "loss": 0.14, + "step": 3625 + }, + { + "epoch": 2.45, + "grad_norm": 4.141810894012451, + "learning_rate": 9.683819095477388e-06, + "loss": 0.1447, + "step": 3650 + }, + { + "epoch": 2.47, + "grad_norm": 3.8563168048858643, + "learning_rate": 9.681306532663318e-06, + "loss": 0.1391, + "step": 3675 + }, + { + "epoch": 2.48, + "grad_norm": 2.9455509185791016, + "learning_rate": 9.678793969849247e-06, + "loss": 0.1408, + "step": 3700 + }, + { + "epoch": 2.5, + "grad_norm": 3.729257583618164, + "learning_rate": 9.676281407035176e-06, + "loss": 0.1394, + "step": 3725 + }, + { + "epoch": 2.52, + "grad_norm": 3.146662950515747, + "learning_rate": 9.673768844221106e-06, + "loss": 0.141, + "step": 3750 + }, + { + "epoch": 2.54, + "grad_norm": 2.9390599727630615, + "learning_rate": 9.671256281407035e-06, + "loss": 0.139, + "step": 3775 + }, + { + "epoch": 2.55, + "grad_norm": 2.8737688064575195, + "learning_rate": 9.668743718592966e-06, + "loss": 0.1391, + "step": 3800 + }, + { + "epoch": 2.57, + "grad_norm": 3.0958399772644043, + "learning_rate": 9.666231155778895e-06, + "loss": 0.1387, + "step": 3825 + }, + { + "epoch": 2.59, + "grad_norm": 2.8722758293151855, + "learning_rate": 9.663718592964825e-06, + "loss": 0.1377, + "step": 3850 + }, + { + "epoch": 2.6, + "grad_norm": 3.010615110397339, + "learning_rate": 9.661206030150754e-06, + "loss": 0.1377, + "step": 3875 + }, + { + "epoch": 2.62, + "grad_norm": 3.0470664501190186, + "learning_rate": 9.658693467336683e-06, + "loss": 0.1367, + "step": 3900 + }, + { + "epoch": 2.64, + "grad_norm": 3.4866979122161865, + "learning_rate": 9.656180904522614e-06, + "loss": 0.1379, + "step": 3925 + }, + { + "epoch": 2.65, + "grad_norm": 4.0438714027404785, + "learning_rate": 9.653668341708544e-06, + "loss": 0.1383, + "step": 3950 + }, + { + "epoch": 2.67, + "grad_norm": 3.6789968013763428, + "learning_rate": 9.651155778894473e-06, + "loss": 0.1361, + "step": 3975 + }, + { + "epoch": 2.69, + "grad_norm": 3.431304454803467, + "learning_rate": 9.648643216080404e-06, + "loss": 0.1379, + "step": 4000 + }, + { + "epoch": 2.69, + "eval_loss": 0.08822890371084213, + "eval_runtime": 892.2237, + "eval_samples_per_second": 1.612, + "eval_steps_per_second": 1.612, + "eval_wer": 84.48668535388929, + "step": 4000 + }, + { + "epoch": 2.7, + "grad_norm": 3.76167893409729, + "learning_rate": 9.646130653266332e-06, + "loss": 0.135, + "step": 4025 + }, + { + "epoch": 2.72, + "grad_norm": 4.518193244934082, + "learning_rate": 9.643618090452263e-06, + "loss": 0.1362, + "step": 4050 + }, + { + "epoch": 2.74, + "grad_norm": 3.3439040184020996, + "learning_rate": 9.641105527638192e-06, + "loss": 0.1369, + "step": 4075 + }, + { + "epoch": 2.75, + "grad_norm": 2.795503616333008, + "learning_rate": 9.638592964824121e-06, + "loss": 0.136, + "step": 4100 + }, + { + "epoch": 2.77, + "grad_norm": 3.676111936569214, + "learning_rate": 9.63608040201005e-06, + "loss": 0.1416, + "step": 4125 + }, + { + "epoch": 2.79, + "grad_norm": 3.0340778827667236, + "learning_rate": 9.63356783919598e-06, + "loss": 0.1358, + "step": 4150 + }, + { + "epoch": 2.8, + "grad_norm": 3.244170904159546, + "learning_rate": 9.63105527638191e-06, + "loss": 0.1386, + "step": 4175 + }, + { + "epoch": 2.82, + "grad_norm": 3.1988685131073, + "learning_rate": 9.62854271356784e-06, + "loss": 0.1348, + "step": 4200 + }, + { + "epoch": 2.84, + "grad_norm": 3.8628764152526855, + "learning_rate": 9.62603015075377e-06, + "loss": 0.138, + "step": 4225 + }, + { + "epoch": 2.85, + "grad_norm": 4.065423011779785, + "learning_rate": 9.623517587939699e-06, + "loss": 0.1376, + "step": 4250 + }, + { + "epoch": 2.87, + "grad_norm": 2.8142220973968506, + "learning_rate": 9.62100502512563e-06, + "loss": 0.1376, + "step": 4275 + }, + { + "epoch": 2.89, + "grad_norm": 2.8501029014587402, + "learning_rate": 9.618492462311558e-06, + "loss": 0.1332, + "step": 4300 + }, + { + "epoch": 2.9, + "grad_norm": 3.389183521270752, + "learning_rate": 9.615979899497489e-06, + "loss": 0.1344, + "step": 4325 + }, + { + "epoch": 2.92, + "grad_norm": 2.7283239364624023, + "learning_rate": 9.613467336683418e-06, + "loss": 0.1357, + "step": 4350 + }, + { + "epoch": 2.94, + "grad_norm": 3.0676822662353516, + "learning_rate": 9.610954773869347e-06, + "loss": 0.1328, + "step": 4375 + }, + { + "epoch": 2.96, + "grad_norm": 4.118182182312012, + "learning_rate": 9.608442211055277e-06, + "loss": 0.1357, + "step": 4400 + }, + { + "epoch": 2.97, + "grad_norm": 2.7560715675354004, + "learning_rate": 9.605929648241206e-06, + "loss": 0.1353, + "step": 4425 + }, + { + "epoch": 2.99, + "grad_norm": 2.9807188510894775, + "learning_rate": 9.603417085427137e-06, + "loss": 0.1314, + "step": 4450 + }, + { + "epoch": 3.01, + "grad_norm": 2.4613709449768066, + "learning_rate": 9.600904522613066e-06, + "loss": 0.1306, + "step": 4475 + }, + { + "epoch": 3.02, + "grad_norm": 3.1161227226257324, + "learning_rate": 9.598391959798996e-06, + "loss": 0.1233, + "step": 4500 + }, + { + "epoch": 3.04, + "grad_norm": 3.521268129348755, + "learning_rate": 9.595879396984925e-06, + "loss": 0.1249, + "step": 4525 + }, + { + "epoch": 3.06, + "grad_norm": 3.160971164703369, + "learning_rate": 9.593366834170856e-06, + "loss": 0.1241, + "step": 4550 + }, + { + "epoch": 3.07, + "grad_norm": 3.0447280406951904, + "learning_rate": 9.590854271356784e-06, + "loss": 0.1252, + "step": 4575 + }, + { + "epoch": 3.09, + "grad_norm": 2.6389529705047607, + "learning_rate": 9.588341708542715e-06, + "loss": 0.1269, + "step": 4600 + }, + { + "epoch": 3.11, + "grad_norm": 3.629293918609619, + "learning_rate": 9.585829145728644e-06, + "loss": 0.1228, + "step": 4625 + }, + { + "epoch": 3.12, + "grad_norm": 3.6926896572113037, + "learning_rate": 9.583316582914573e-06, + "loss": 0.1227, + "step": 4650 + }, + { + "epoch": 3.14, + "grad_norm": 2.5895748138427734, + "learning_rate": 9.580804020100504e-06, + "loss": 0.1234, + "step": 4675 + }, + { + "epoch": 3.16, + "grad_norm": 4.536306381225586, + "learning_rate": 9.578291457286432e-06, + "loss": 0.1234, + "step": 4700 + }, + { + "epoch": 3.17, + "grad_norm": 2.6335320472717285, + "learning_rate": 9.575778894472363e-06, + "loss": 0.1209, + "step": 4725 + }, + { + "epoch": 3.19, + "grad_norm": 2.8081154823303223, + "learning_rate": 9.573266331658292e-06, + "loss": 0.1209, + "step": 4750 + }, + { + "epoch": 3.21, + "grad_norm": 3.3498728275299072, + "learning_rate": 9.570753768844222e-06, + "loss": 0.1244, + "step": 4775 + }, + { + "epoch": 3.22, + "grad_norm": 3.1684329509735107, + "learning_rate": 9.568241206030151e-06, + "loss": 0.1211, + "step": 4800 + }, + { + "epoch": 3.24, + "grad_norm": 3.1564972400665283, + "learning_rate": 9.565728643216082e-06, + "loss": 0.1243, + "step": 4825 + }, + { + "epoch": 3.26, + "grad_norm": 3.7097954750061035, + "learning_rate": 9.563216080402011e-06, + "loss": 0.1235, + "step": 4850 + }, + { + "epoch": 3.27, + "grad_norm": 3.3534088134765625, + "learning_rate": 9.56070351758794e-06, + "loss": 0.1208, + "step": 4875 + }, + { + "epoch": 3.29, + "grad_norm": 3.123911142349243, + "learning_rate": 9.55819095477387e-06, + "loss": 0.1217, + "step": 4900 + }, + { + "epoch": 3.31, + "grad_norm": 2.8602004051208496, + "learning_rate": 9.5556783919598e-06, + "loss": 0.1194, + "step": 4925 + }, + { + "epoch": 3.32, + "grad_norm": 4.2586846351623535, + "learning_rate": 9.55316582914573e-06, + "loss": 0.1238, + "step": 4950 + }, + { + "epoch": 3.34, + "grad_norm": 2.9559895992279053, + "learning_rate": 9.550653266331658e-06, + "loss": 0.1173, + "step": 4975 + }, + { + "epoch": 3.36, + "grad_norm": 3.430467128753662, + "learning_rate": 9.548140703517589e-06, + "loss": 0.1242, + "step": 5000 + }, + { + "epoch": 3.36, + "eval_loss": 0.08331552892923355, + "eval_runtime": 906.4848, + "eval_samples_per_second": 1.586, + "eval_steps_per_second": 1.586, + "eval_wer": 79.43237561317449, + "step": 5000 + }, + { + "epoch": 3.37, + "grad_norm": 3.843204975128174, + "learning_rate": 9.545628140703518e-06, + "loss": 0.1207, + "step": 5025 + }, + { + "epoch": 3.39, + "grad_norm": 4.088472843170166, + "learning_rate": 9.543115577889448e-06, + "loss": 0.1165, + "step": 5050 + }, + { + "epoch": 3.41, + "grad_norm": 4.112658977508545, + "learning_rate": 9.540603015075379e-06, + "loss": 0.1198, + "step": 5075 + }, + { + "epoch": 3.43, + "grad_norm": 4.109236240386963, + "learning_rate": 9.538090452261308e-06, + "loss": 0.1215, + "step": 5100 + }, + { + "epoch": 3.44, + "grad_norm": 3.567878246307373, + "learning_rate": 9.535577889447237e-06, + "loss": 0.122, + "step": 5125 + }, + { + "epoch": 3.46, + "grad_norm": 2.913968801498413, + "learning_rate": 9.533065326633166e-06, + "loss": 0.1213, + "step": 5150 + }, + { + "epoch": 3.48, + "grad_norm": 3.458784818649292, + "learning_rate": 9.530552763819096e-06, + "loss": 0.1195, + "step": 5175 + }, + { + "epoch": 3.49, + "grad_norm": 3.110161542892456, + "learning_rate": 9.528040201005025e-06, + "loss": 0.1224, + "step": 5200 + }, + { + "epoch": 3.51, + "grad_norm": 3.8568274974823, + "learning_rate": 9.525527638190956e-06, + "loss": 0.1195, + "step": 5225 + }, + { + "epoch": 3.53, + "grad_norm": 3.053230047225952, + "learning_rate": 9.523015075376885e-06, + "loss": 0.1215, + "step": 5250 + }, + { + "epoch": 3.54, + "grad_norm": 3.0793192386627197, + "learning_rate": 9.520502512562815e-06, + "loss": 0.1199, + "step": 5275 + }, + { + "epoch": 3.56, + "grad_norm": 2.8230350017547607, + "learning_rate": 9.517989949748744e-06, + "loss": 0.119, + "step": 5300 + }, + { + "epoch": 3.58, + "grad_norm": 2.637842893600464, + "learning_rate": 9.515477386934673e-06, + "loss": 0.1206, + "step": 5325 + }, + { + "epoch": 3.59, + "grad_norm": 2.722827911376953, + "learning_rate": 9.512964824120604e-06, + "loss": 0.1185, + "step": 5350 + }, + { + "epoch": 3.61, + "grad_norm": 2.8432233333587646, + "learning_rate": 9.510452261306534e-06, + "loss": 0.1213, + "step": 5375 + }, + { + "epoch": 3.63, + "grad_norm": 2.5972678661346436, + "learning_rate": 9.507939698492463e-06, + "loss": 0.1188, + "step": 5400 + }, + { + "epoch": 3.64, + "grad_norm": 2.9363596439361572, + "learning_rate": 9.505427135678392e-06, + "loss": 0.1184, + "step": 5425 + }, + { + "epoch": 3.66, + "grad_norm": 3.3370134830474854, + "learning_rate": 9.502914572864322e-06, + "loss": 0.1206, + "step": 5450 + }, + { + "epoch": 3.68, + "grad_norm": 2.8531062602996826, + "learning_rate": 9.500402010050253e-06, + "loss": 0.1197, + "step": 5475 + }, + { + "epoch": 3.69, + "grad_norm": 2.996798038482666, + "learning_rate": 9.497889447236182e-06, + "loss": 0.1183, + "step": 5500 + }, + { + "epoch": 3.71, + "grad_norm": 2.7696945667266846, + "learning_rate": 9.495376884422111e-06, + "loss": 0.1205, + "step": 5525 + }, + { + "epoch": 3.73, + "grad_norm": 2.8554718494415283, + "learning_rate": 9.49286432160804e-06, + "loss": 0.1176, + "step": 5550 + }, + { + "epoch": 3.74, + "grad_norm": 3.669240951538086, + "learning_rate": 9.49035175879397e-06, + "loss": 0.1186, + "step": 5575 + }, + { + "epoch": 3.76, + "grad_norm": 2.7125723361968994, + "learning_rate": 9.4878391959799e-06, + "loss": 0.1203, + "step": 5600 + }, + { + "epoch": 3.78, + "grad_norm": 2.618934392929077, + "learning_rate": 9.48532663316583e-06, + "loss": 0.1164, + "step": 5625 + }, + { + "epoch": 3.79, + "grad_norm": 3.904175281524658, + "learning_rate": 9.48281407035176e-06, + "loss": 0.1219, + "step": 5650 + }, + { + "epoch": 3.81, + "grad_norm": 3.5440423488616943, + "learning_rate": 9.480301507537689e-06, + "loss": 0.1162, + "step": 5675 + }, + { + "epoch": 3.83, + "grad_norm": 2.8084070682525635, + "learning_rate": 9.47778894472362e-06, + "loss": 0.1186, + "step": 5700 + }, + { + "epoch": 3.84, + "grad_norm": 2.6558783054351807, + "learning_rate": 9.475276381909548e-06, + "loss": 0.1144, + "step": 5725 + }, + { + "epoch": 3.86, + "grad_norm": 3.752830743789673, + "learning_rate": 9.472763819095479e-06, + "loss": 0.1167, + "step": 5750 + }, + { + "epoch": 3.88, + "grad_norm": 4.293878078460693, + "learning_rate": 9.470251256281408e-06, + "loss": 0.119, + "step": 5775 + }, + { + "epoch": 3.9, + "grad_norm": 2.8852791786193848, + "learning_rate": 9.467738693467337e-06, + "loss": 0.1158, + "step": 5800 + }, + { + "epoch": 3.91, + "grad_norm": 2.745896577835083, + "learning_rate": 9.465226130653267e-06, + "loss": 0.1192, + "step": 5825 + }, + { + "epoch": 3.93, + "grad_norm": 3.4636952877044678, + "learning_rate": 9.462713567839196e-06, + "loss": 0.1153, + "step": 5850 + }, + { + "epoch": 3.95, + "grad_norm": 2.9681363105773926, + "learning_rate": 9.460201005025127e-06, + "loss": 0.1206, + "step": 5875 + }, + { + "epoch": 3.96, + "grad_norm": 2.9947328567504883, + "learning_rate": 9.457688442211056e-06, + "loss": 0.1159, + "step": 5900 + }, + { + "epoch": 3.98, + "grad_norm": 2.8335916996002197, + "learning_rate": 9.455175879396986e-06, + "loss": 0.1145, + "step": 5925 + }, + { + "epoch": 4.0, + "grad_norm": 3.053276300430298, + "learning_rate": 9.452663316582915e-06, + "loss": 0.1146, + "step": 5950 + }, + { + "epoch": 4.01, + "grad_norm": 2.810828447341919, + "learning_rate": 9.450150753768846e-06, + "loss": 0.1103, + "step": 5975 + }, + { + "epoch": 4.03, + "grad_norm": 4.065425395965576, + "learning_rate": 9.447638190954774e-06, + "loss": 0.1075, + "step": 6000 + }, + { + "epoch": 4.03, + "eval_loss": 0.07984987646341324, + "eval_runtime": 914.6167, + "eval_samples_per_second": 1.572, + "eval_steps_per_second": 1.572, + "eval_wer": 57.5332866152768, + "step": 6000 + }, + { + "epoch": 4.05, + "grad_norm": 3.2369909286499023, + "learning_rate": 9.445125628140705e-06, + "loss": 0.1073, + "step": 6025 + }, + { + "epoch": 4.06, + "grad_norm": 3.32157826423645, + "learning_rate": 9.442613065326634e-06, + "loss": 0.1047, + "step": 6050 + }, + { + "epoch": 4.08, + "grad_norm": 3.9391729831695557, + "learning_rate": 9.440100502512563e-06, + "loss": 0.1036, + "step": 6075 + }, + { + "epoch": 4.1, + "grad_norm": 3.063920736312866, + "learning_rate": 9.437587939698494e-06, + "loss": 0.1046, + "step": 6100 + }, + { + "epoch": 4.11, + "grad_norm": 2.6293845176696777, + "learning_rate": 9.435075376884422e-06, + "loss": 0.107, + "step": 6125 + }, + { + "epoch": 4.13, + "grad_norm": 2.7756454944610596, + "learning_rate": 9.432562814070353e-06, + "loss": 0.1073, + "step": 6150 + }, + { + "epoch": 4.15, + "grad_norm": 2.763529062271118, + "learning_rate": 9.430050251256282e-06, + "loss": 0.1078, + "step": 6175 + }, + { + "epoch": 4.16, + "grad_norm": 3.1121578216552734, + "learning_rate": 9.427537688442212e-06, + "loss": 0.1082, + "step": 6200 + }, + { + "epoch": 4.18, + "grad_norm": 2.825221061706543, + "learning_rate": 9.425025125628141e-06, + "loss": 0.1071, + "step": 6225 + }, + { + "epoch": 4.2, + "grad_norm": 3.468634605407715, + "learning_rate": 9.422512562814072e-06, + "loss": 0.1083, + "step": 6250 + }, + { + "epoch": 4.21, + "grad_norm": 2.75702166557312, + "learning_rate": 9.42e-06, + "loss": 0.1053, + "step": 6275 + }, + { + "epoch": 4.23, + "grad_norm": 3.145787000656128, + "learning_rate": 9.41748743718593e-06, + "loss": 0.1051, + "step": 6300 + }, + { + "epoch": 4.25, + "grad_norm": 2.607553720474243, + "learning_rate": 9.41497487437186e-06, + "loss": 0.105, + "step": 6325 + }, + { + "epoch": 4.26, + "grad_norm": 2.7493605613708496, + "learning_rate": 9.41246231155779e-06, + "loss": 0.1084, + "step": 6350 + }, + { + "epoch": 4.28, + "grad_norm": 2.865753412246704, + "learning_rate": 9.40994974874372e-06, + "loss": 0.1095, + "step": 6375 + }, + { + "epoch": 4.3, + "grad_norm": 3.4291553497314453, + "learning_rate": 9.407437185929648e-06, + "loss": 0.1074, + "step": 6400 + }, + { + "epoch": 4.31, + "grad_norm": 2.664867639541626, + "learning_rate": 9.404924623115579e-06, + "loss": 0.1058, + "step": 6425 + }, + { + "epoch": 4.33, + "grad_norm": 3.18866229057312, + "learning_rate": 9.402412060301508e-06, + "loss": 0.1053, + "step": 6450 + }, + { + "epoch": 4.35, + "grad_norm": 2.751782178878784, + "learning_rate": 9.399899497487438e-06, + "loss": 0.108, + "step": 6475 + }, + { + "epoch": 4.37, + "grad_norm": 3.0513594150543213, + "learning_rate": 9.397386934673369e-06, + "loss": 0.1053, + "step": 6500 + }, + { + "epoch": 4.38, + "grad_norm": 2.6278483867645264, + "learning_rate": 9.394874371859298e-06, + "loss": 0.1079, + "step": 6525 + }, + { + "epoch": 4.4, + "grad_norm": 2.737006187438965, + "learning_rate": 9.392361809045227e-06, + "loss": 0.1065, + "step": 6550 + }, + { + "epoch": 4.42, + "grad_norm": 3.074719190597534, + "learning_rate": 9.389849246231157e-06, + "loss": 0.1057, + "step": 6575 + }, + { + "epoch": 4.43, + "grad_norm": 2.827303171157837, + "learning_rate": 9.387336683417086e-06, + "loss": 0.1076, + "step": 6600 + }, + { + "epoch": 4.45, + "grad_norm": 2.663496732711792, + "learning_rate": 9.384824120603015e-06, + "loss": 0.105, + "step": 6625 + }, + { + "epoch": 4.47, + "grad_norm": 2.6089656352996826, + "learning_rate": 9.382311557788946e-06, + "loss": 0.1059, + "step": 6650 + }, + { + "epoch": 4.48, + "grad_norm": 2.7915990352630615, + "learning_rate": 9.379798994974874e-06, + "loss": 0.1068, + "step": 6675 + }, + { + "epoch": 4.5, + "grad_norm": 3.7253527641296387, + "learning_rate": 9.377286432160805e-06, + "loss": 0.1093, + "step": 6700 + }, + { + "epoch": 4.52, + "grad_norm": 3.905001163482666, + "learning_rate": 9.374773869346734e-06, + "loss": 0.1035, + "step": 6725 + }, + { + "epoch": 4.53, + "grad_norm": 2.698610782623291, + "learning_rate": 9.372261306532664e-06, + "loss": 0.1053, + "step": 6750 + }, + { + "epoch": 4.55, + "grad_norm": 2.6254777908325195, + "learning_rate": 9.369748743718595e-06, + "loss": 0.1054, + "step": 6775 + }, + { + "epoch": 4.57, + "grad_norm": 3.4177279472351074, + "learning_rate": 9.367236180904524e-06, + "loss": 0.1032, + "step": 6800 + }, + { + "epoch": 4.58, + "grad_norm": 2.763265609741211, + "learning_rate": 9.364723618090453e-06, + "loss": 0.1083, + "step": 6825 + }, + { + "epoch": 4.6, + "grad_norm": 2.5962061882019043, + "learning_rate": 9.362211055276383e-06, + "loss": 0.1062, + "step": 6850 + }, + { + "epoch": 4.62, + "grad_norm": 3.1983237266540527, + "learning_rate": 9.359698492462312e-06, + "loss": 0.1068, + "step": 6875 + }, + { + "epoch": 4.63, + "grad_norm": 3.396357774734497, + "learning_rate": 9.357185929648241e-06, + "loss": 0.1032, + "step": 6900 + }, + { + "epoch": 4.65, + "grad_norm": 2.495387315750122, + "learning_rate": 9.354673366834172e-06, + "loss": 0.1026, + "step": 6925 + }, + { + "epoch": 4.67, + "grad_norm": 2.6819722652435303, + "learning_rate": 9.352261306532664e-06, + "loss": 0.1051, + "step": 6950 + }, + { + "epoch": 4.68, + "grad_norm": 2.7940011024475098, + "learning_rate": 9.349748743718595e-06, + "loss": 0.1047, + "step": 6975 + }, + { + "epoch": 4.7, + "grad_norm": 2.944594144821167, + "learning_rate": 9.347236180904522e-06, + "loss": 0.1074, + "step": 7000 + }, + { + "epoch": 4.7, + "eval_loss": 0.07687355577945709, + "eval_runtime": 929.395, + "eval_samples_per_second": 1.547, + "eval_steps_per_second": 1.547, + "eval_wer": 53.46881569726699, + "step": 7000 + }, + { + "epoch": 4.72, + "grad_norm": 2.9731857776641846, + "learning_rate": 9.344723618090453e-06, + "loss": 0.1058, + "step": 7025 + }, + { + "epoch": 4.73, + "grad_norm": 3.146904706954956, + "learning_rate": 9.342211055276383e-06, + "loss": 0.1054, + "step": 7050 + }, + { + "epoch": 4.75, + "grad_norm": 2.6802990436553955, + "learning_rate": 9.339698492462312e-06, + "loss": 0.1064, + "step": 7075 + }, + { + "epoch": 4.77, + "grad_norm": 2.875955820083618, + "learning_rate": 9.337185929648241e-06, + "loss": 0.1017, + "step": 7100 + }, + { + "epoch": 4.79, + "grad_norm": 2.767763614654541, + "learning_rate": 9.334673366834172e-06, + "loss": 0.103, + "step": 7125 + }, + { + "epoch": 4.8, + "grad_norm": 2.8487796783447266, + "learning_rate": 9.332160804020102e-06, + "loss": 0.1047, + "step": 7150 + }, + { + "epoch": 4.82, + "grad_norm": 2.7425243854522705, + "learning_rate": 9.329648241206031e-06, + "loss": 0.1007, + "step": 7175 + }, + { + "epoch": 4.84, + "grad_norm": 2.4976718425750732, + "learning_rate": 9.32713567839196e-06, + "loss": 0.103, + "step": 7200 + }, + { + "epoch": 4.85, + "grad_norm": 2.9242231845855713, + "learning_rate": 9.32462311557789e-06, + "loss": 0.103, + "step": 7225 + }, + { + "epoch": 4.87, + "grad_norm": 2.5247926712036133, + "learning_rate": 9.32211055276382e-06, + "loss": 0.1041, + "step": 7250 + }, + { + "epoch": 4.89, + "grad_norm": 2.655942678451538, + "learning_rate": 9.319597989949748e-06, + "loss": 0.1006, + "step": 7275 + }, + { + "epoch": 4.9, + "grad_norm": 4.131620407104492, + "learning_rate": 9.31708542713568e-06, + "loss": 0.1041, + "step": 7300 + }, + { + "epoch": 4.92, + "grad_norm": 2.5244383811950684, + "learning_rate": 9.314572864321609e-06, + "loss": 0.1025, + "step": 7325 + }, + { + "epoch": 4.94, + "grad_norm": 3.20101261138916, + "learning_rate": 9.312060301507538e-06, + "loss": 0.1064, + "step": 7350 + }, + { + "epoch": 4.95, + "grad_norm": 2.8850972652435303, + "learning_rate": 9.309547738693469e-06, + "loss": 0.1089, + "step": 7375 + }, + { + "epoch": 4.97, + "grad_norm": 2.491765260696411, + "learning_rate": 9.307035175879398e-06, + "loss": 0.105, + "step": 7400 + }, + { + "epoch": 4.99, + "grad_norm": 3.3025479316711426, + "learning_rate": 9.304522613065328e-06, + "loss": 0.105, + "step": 7425 + }, + { + "epoch": 5.0, + "grad_norm": 2.931335687637329, + "learning_rate": 9.302010050251257e-06, + "loss": 0.1029, + "step": 7450 + }, + { + "epoch": 5.02, + "grad_norm": 2.7513043880462646, + "learning_rate": 9.299497487437186e-06, + "loss": 0.0936, + "step": 7475 + }, + { + "epoch": 5.04, + "grad_norm": 2.629145622253418, + "learning_rate": 9.296984924623116e-06, + "loss": 0.0949, + "step": 7500 + }, + { + "epoch": 5.05, + "grad_norm": 2.7287440299987793, + "learning_rate": 9.294472361809047e-06, + "loss": 0.0934, + "step": 7525 + }, + { + "epoch": 5.07, + "grad_norm": 2.982724666595459, + "learning_rate": 9.291959798994976e-06, + "loss": 0.0941, + "step": 7550 + }, + { + "epoch": 5.09, + "grad_norm": 2.6540138721466064, + "learning_rate": 9.289447236180905e-06, + "loss": 0.0926, + "step": 7575 + }, + { + "epoch": 5.1, + "grad_norm": 2.3603038787841797, + "learning_rate": 9.286934673366835e-06, + "loss": 0.0957, + "step": 7600 + }, + { + "epoch": 5.12, + "grad_norm": 3.1355912685394287, + "learning_rate": 9.284422110552764e-06, + "loss": 0.0981, + "step": 7625 + }, + { + "epoch": 5.14, + "grad_norm": 2.345231771469116, + "learning_rate": 9.281909547738695e-06, + "loss": 0.0958, + "step": 7650 + }, + { + "epoch": 5.15, + "grad_norm": 2.71669340133667, + "learning_rate": 9.279396984924624e-06, + "loss": 0.0936, + "step": 7675 + }, + { + "epoch": 5.17, + "grad_norm": 3.0384445190429688, + "learning_rate": 9.276884422110554e-06, + "loss": 0.0957, + "step": 7700 + }, + { + "epoch": 5.19, + "grad_norm": 2.6773102283477783, + "learning_rate": 9.274371859296483e-06, + "loss": 0.0939, + "step": 7725 + }, + { + "epoch": 5.2, + "grad_norm": 2.9012200832366943, + "learning_rate": 9.271859296482412e-06, + "loss": 0.0949, + "step": 7750 + }, + { + "epoch": 5.22, + "grad_norm": 2.8586056232452393, + "learning_rate": 9.269346733668343e-06, + "loss": 0.098, + "step": 7775 + }, + { + "epoch": 5.24, + "grad_norm": 2.8841850757598877, + "learning_rate": 9.266834170854273e-06, + "loss": 0.0931, + "step": 7800 + }, + { + "epoch": 5.26, + "grad_norm": 2.5372838973999023, + "learning_rate": 9.264321608040202e-06, + "loss": 0.0941, + "step": 7825 + }, + { + "epoch": 5.27, + "grad_norm": 3.1618082523345947, + "learning_rate": 9.261809045226131e-06, + "loss": 0.093, + "step": 7850 + }, + { + "epoch": 5.29, + "grad_norm": 3.331390142440796, + "learning_rate": 9.25929648241206e-06, + "loss": 0.0933, + "step": 7875 + }, + { + "epoch": 5.31, + "grad_norm": 2.2937328815460205, + "learning_rate": 9.25678391959799e-06, + "loss": 0.0922, + "step": 7900 + }, + { + "epoch": 5.32, + "grad_norm": 2.4022207260131836, + "learning_rate": 9.254271356783921e-06, + "loss": 0.0932, + "step": 7925 + }, + { + "epoch": 5.34, + "grad_norm": 2.6664276123046875, + "learning_rate": 9.25175879396985e-06, + "loss": 0.095, + "step": 7950 + }, + { + "epoch": 5.36, + "grad_norm": 2.866859197616577, + "learning_rate": 9.24924623115578e-06, + "loss": 0.0956, + "step": 7975 + }, + { + "epoch": 5.37, + "grad_norm": 2.672640323638916, + "learning_rate": 9.24673366834171e-06, + "loss": 0.0916, + "step": 8000 + }, + { + "epoch": 5.37, + "eval_loss": 0.07515992224216461, + "eval_runtime": 933.6113, + "eval_samples_per_second": 1.54, + "eval_steps_per_second": 1.54, + "eval_wer": 46.049404344779255, + "step": 8000 + }, + { + "epoch": 5.39, + "grad_norm": 2.710423231124878, + "learning_rate": 9.244221105527638e-06, + "loss": 0.0943, + "step": 8025 + }, + { + "epoch": 5.41, + "grad_norm": 2.5025436878204346, + "learning_rate": 9.24170854271357e-06, + "loss": 0.0919, + "step": 8050 + }, + { + "epoch": 5.42, + "grad_norm": 2.6012582778930664, + "learning_rate": 9.239195979899498e-06, + "loss": 0.0955, + "step": 8075 + }, + { + "epoch": 5.44, + "grad_norm": 2.6384871006011963, + "learning_rate": 9.236683417085428e-06, + "loss": 0.0939, + "step": 8100 + }, + { + "epoch": 5.46, + "grad_norm": 2.5509018898010254, + "learning_rate": 9.234170854271357e-06, + "loss": 0.0949, + "step": 8125 + }, + { + "epoch": 5.47, + "grad_norm": 3.7073445320129395, + "learning_rate": 9.231658291457286e-06, + "loss": 0.0954, + "step": 8150 + }, + { + "epoch": 5.49, + "grad_norm": 2.231095314025879, + "learning_rate": 9.229145728643217e-06, + "loss": 0.0947, + "step": 8175 + }, + { + "epoch": 5.51, + "grad_norm": 2.5014142990112305, + "learning_rate": 9.226633165829147e-06, + "loss": 0.098, + "step": 8200 + }, + { + "epoch": 5.52, + "grad_norm": 2.8307981491088867, + "learning_rate": 9.224120603015076e-06, + "loss": 0.093, + "step": 8225 + }, + { + "epoch": 5.54, + "grad_norm": 3.688793182373047, + "learning_rate": 9.221608040201005e-06, + "loss": 0.0976, + "step": 8250 + }, + { + "epoch": 5.56, + "grad_norm": 2.5747296810150146, + "learning_rate": 9.219095477386936e-06, + "loss": 0.0928, + "step": 8275 + }, + { + "epoch": 5.57, + "grad_norm": 2.6798343658447266, + "learning_rate": 9.216582914572864e-06, + "loss": 0.0938, + "step": 8300 + }, + { + "epoch": 5.59, + "grad_norm": 3.1839914321899414, + "learning_rate": 9.214070351758795e-06, + "loss": 0.0951, + "step": 8325 + }, + { + "epoch": 5.61, + "grad_norm": 2.474449872970581, + "learning_rate": 9.211557788944724e-06, + "loss": 0.0947, + "step": 8350 + }, + { + "epoch": 5.62, + "grad_norm": 2.6252379417419434, + "learning_rate": 9.209045226130654e-06, + "loss": 0.0943, + "step": 8375 + }, + { + "epoch": 5.64, + "grad_norm": 2.6871514320373535, + "learning_rate": 9.206532663316585e-06, + "loss": 0.0928, + "step": 8400 + }, + { + "epoch": 5.66, + "grad_norm": 3.0898377895355225, + "learning_rate": 9.204020100502512e-06, + "loss": 0.093, + "step": 8425 + }, + { + "epoch": 5.67, + "grad_norm": 2.8474881649017334, + "learning_rate": 9.201507537688443e-06, + "loss": 0.0944, + "step": 8450 + }, + { + "epoch": 5.69, + "grad_norm": 2.3759796619415283, + "learning_rate": 9.198994974874373e-06, + "loss": 0.0974, + "step": 8475 + }, + { + "epoch": 5.71, + "grad_norm": 2.7885348796844482, + "learning_rate": 9.196482412060302e-06, + "loss": 0.0926, + "step": 8500 + }, + { + "epoch": 5.73, + "grad_norm": 3.531207799911499, + "learning_rate": 9.193969849246231e-06, + "loss": 0.0927, + "step": 8525 + }, + { + "epoch": 5.74, + "grad_norm": 2.9926812648773193, + "learning_rate": 9.191457286432162e-06, + "loss": 0.0952, + "step": 8550 + }, + { + "epoch": 5.76, + "grad_norm": 2.675708532333374, + "learning_rate": 9.188944723618092e-06, + "loss": 0.091, + "step": 8575 + }, + { + "epoch": 5.78, + "grad_norm": 3.062901735305786, + "learning_rate": 9.186432160804021e-06, + "loss": 0.0918, + "step": 8600 + }, + { + "epoch": 5.79, + "grad_norm": 2.8128740787506104, + "learning_rate": 9.18391959798995e-06, + "loss": 0.0956, + "step": 8625 + }, + { + "epoch": 5.81, + "grad_norm": 2.575350046157837, + "learning_rate": 9.18140703517588e-06, + "loss": 0.0926, + "step": 8650 + }, + { + "epoch": 5.83, + "grad_norm": 3.0187032222747803, + "learning_rate": 9.17889447236181e-06, + "loss": 0.0904, + "step": 8675 + }, + { + "epoch": 5.84, + "grad_norm": 2.8932690620422363, + "learning_rate": 9.176381909547738e-06, + "loss": 0.0933, + "step": 8700 + }, + { + "epoch": 5.86, + "grad_norm": 2.5999746322631836, + "learning_rate": 9.17386934673367e-06, + "loss": 0.0924, + "step": 8725 + }, + { + "epoch": 5.88, + "grad_norm": 2.621938467025757, + "learning_rate": 9.171356783919599e-06, + "loss": 0.0924, + "step": 8750 + }, + { + "epoch": 5.89, + "grad_norm": 2.5686533451080322, + "learning_rate": 9.168844221105528e-06, + "loss": 0.0919, + "step": 8775 + }, + { + "epoch": 5.91, + "grad_norm": 2.7545671463012695, + "learning_rate": 9.166331658291459e-06, + "loss": 0.0914, + "step": 8800 + }, + { + "epoch": 5.93, + "grad_norm": 2.763503313064575, + "learning_rate": 9.163819095477388e-06, + "loss": 0.0919, + "step": 8825 + }, + { + "epoch": 5.94, + "grad_norm": 2.5559895038604736, + "learning_rate": 9.161306532663318e-06, + "loss": 0.095, + "step": 8850 + }, + { + "epoch": 5.96, + "grad_norm": 2.411494255065918, + "learning_rate": 9.158793969849247e-06, + "loss": 0.0904, + "step": 8875 + }, + { + "epoch": 5.98, + "grad_norm": 2.722850799560547, + "learning_rate": 9.156281407035176e-06, + "loss": 0.0932, + "step": 8900 + }, + { + "epoch": 5.99, + "grad_norm": 2.1574954986572266, + "learning_rate": 9.153768844221106e-06, + "loss": 0.095, + "step": 8925 + }, + { + "epoch": 6.01, + "grad_norm": 2.5372064113616943, + "learning_rate": 9.151256281407037e-06, + "loss": 0.0851, + "step": 8950 + }, + { + "epoch": 6.03, + "grad_norm": 2.320565700531006, + "learning_rate": 9.148743718592964e-06, + "loss": 0.0828, + "step": 8975 + }, + { + "epoch": 6.04, + "grad_norm": 2.7525405883789062, + "learning_rate": 9.146231155778895e-06, + "loss": 0.0833, + "step": 9000 + }, + { + "epoch": 6.04, + "eval_loss": 0.07459180802106857, + "eval_runtime": 938.7058, + "eval_samples_per_second": 1.532, + "eval_steps_per_second": 1.532, + "eval_wer": 48.12543798177995, + "step": 9000 + }, + { + "epoch": 6.06, + "grad_norm": 2.663193941116333, + "learning_rate": 9.143718592964825e-06, + "loss": 0.0831, + "step": 9025 + }, + { + "epoch": 6.08, + "grad_norm": 2.717726469039917, + "learning_rate": 9.141206030150754e-06, + "loss": 0.083, + "step": 9050 + }, + { + "epoch": 6.09, + "grad_norm": 2.820401906967163, + "learning_rate": 9.138693467336685e-06, + "loss": 0.0839, + "step": 9075 + }, + { + "epoch": 6.11, + "grad_norm": 2.7127275466918945, + "learning_rate": 9.136180904522614e-06, + "loss": 0.0831, + "step": 9100 + }, + { + "epoch": 6.13, + "grad_norm": 2.4587132930755615, + "learning_rate": 9.133668341708544e-06, + "loss": 0.082, + "step": 9125 + }, + { + "epoch": 6.15, + "grad_norm": 2.659923553466797, + "learning_rate": 9.131155778894473e-06, + "loss": 0.0844, + "step": 9150 + }, + { + "epoch": 6.16, + "grad_norm": 2.549023151397705, + "learning_rate": 9.128643216080402e-06, + "loss": 0.0837, + "step": 9175 + }, + { + "epoch": 6.18, + "grad_norm": 2.6309587955474854, + "learning_rate": 9.126130653266332e-06, + "loss": 0.0854, + "step": 9200 + }, + { + "epoch": 6.2, + "grad_norm": 2.7024123668670654, + "learning_rate": 9.123618090452263e-06, + "loss": 0.0847, + "step": 9225 + }, + { + "epoch": 6.21, + "grad_norm": 3.7530219554901123, + "learning_rate": 9.121105527638192e-06, + "loss": 0.083, + "step": 9250 + }, + { + "epoch": 6.23, + "grad_norm": 2.680955648422241, + "learning_rate": 9.118592964824121e-06, + "loss": 0.0832, + "step": 9275 + }, + { + "epoch": 6.25, + "grad_norm": 2.6058413982391357, + "learning_rate": 9.11608040201005e-06, + "loss": 0.0856, + "step": 9300 + }, + { + "epoch": 6.26, + "grad_norm": 2.389058828353882, + "learning_rate": 9.11356783919598e-06, + "loss": 0.0838, + "step": 9325 + }, + { + "epoch": 6.28, + "grad_norm": 2.699385166168213, + "learning_rate": 9.111055276381911e-06, + "loss": 0.0862, + "step": 9350 + }, + { + "epoch": 6.3, + "grad_norm": 2.527519702911377, + "learning_rate": 9.10854271356784e-06, + "loss": 0.0866, + "step": 9375 + }, + { + "epoch": 6.31, + "grad_norm": 2.4927728176116943, + "learning_rate": 9.10603015075377e-06, + "loss": 0.084, + "step": 9400 + }, + { + "epoch": 6.33, + "grad_norm": 3.1683754920959473, + "learning_rate": 9.1035175879397e-06, + "loss": 0.0823, + "step": 9425 + }, + { + "epoch": 6.35, + "grad_norm": 2.3609166145324707, + "learning_rate": 9.101005025125628e-06, + "loss": 0.0865, + "step": 9450 + }, + { + "epoch": 6.36, + "grad_norm": 2.4730277061462402, + "learning_rate": 9.09849246231156e-06, + "loss": 0.0863, + "step": 9475 + }, + { + "epoch": 6.38, + "grad_norm": 2.8320670127868652, + "learning_rate": 9.095979899497489e-06, + "loss": 0.0865, + "step": 9500 + }, + { + "epoch": 6.4, + "grad_norm": 3.0311648845672607, + "learning_rate": 9.09356783919598e-06, + "loss": 0.0858, + "step": 9525 + }, + { + "epoch": 6.41, + "grad_norm": 2.4911820888519287, + "learning_rate": 9.091055276381911e-06, + "loss": 0.0837, + "step": 9550 + }, + { + "epoch": 6.43, + "grad_norm": 2.6998291015625, + "learning_rate": 9.088542713567839e-06, + "loss": 0.0843, + "step": 9575 + }, + { + "epoch": 6.45, + "grad_norm": 2.284531593322754, + "learning_rate": 9.08603015075377e-06, + "loss": 0.0859, + "step": 9600 + }, + { + "epoch": 6.46, + "grad_norm": 2.8543198108673096, + "learning_rate": 9.083517587939699e-06, + "loss": 0.0851, + "step": 9625 + }, + { + "epoch": 6.48, + "grad_norm": 2.7972476482391357, + "learning_rate": 9.081005025125628e-06, + "loss": 0.0839, + "step": 9650 + }, + { + "epoch": 6.5, + "grad_norm": 3.091080665588379, + "learning_rate": 9.07849246231156e-06, + "loss": 0.0856, + "step": 9675 + }, + { + "epoch": 6.51, + "grad_norm": 2.6846652030944824, + "learning_rate": 9.075979899497489e-06, + "loss": 0.0836, + "step": 9700 + }, + { + "epoch": 6.53, + "grad_norm": 2.55631160736084, + "learning_rate": 9.073467336683418e-06, + "loss": 0.0859, + "step": 9725 + }, + { + "epoch": 6.55, + "grad_norm": 2.745532989501953, + "learning_rate": 9.070954773869347e-06, + "loss": 0.0841, + "step": 9750 + }, + { + "epoch": 6.56, + "grad_norm": 2.4253811836242676, + "learning_rate": 9.068442211055277e-06, + "loss": 0.0835, + "step": 9775 + }, + { + "epoch": 6.58, + "grad_norm": 2.6354739665985107, + "learning_rate": 9.065929648241206e-06, + "loss": 0.0854, + "step": 9800 + }, + { + "epoch": 6.6, + "grad_norm": 3.109679698944092, + "learning_rate": 9.063417085427137e-06, + "loss": 0.0841, + "step": 9825 + }, + { + "epoch": 6.62, + "grad_norm": 2.7716126441955566, + "learning_rate": 9.060904522613066e-06, + "loss": 0.0851, + "step": 9850 + }, + { + "epoch": 6.63, + "grad_norm": 2.8653435707092285, + "learning_rate": 9.058391959798996e-06, + "loss": 0.082, + "step": 9875 + }, + { + "epoch": 6.65, + "grad_norm": 2.604787826538086, + "learning_rate": 9.055879396984925e-06, + "loss": 0.0842, + "step": 9900 + }, + { + "epoch": 6.67, + "grad_norm": 2.8881208896636963, + "learning_rate": 9.053366834170854e-06, + "loss": 0.0865, + "step": 9925 + }, + { + "epoch": 6.68, + "grad_norm": 2.84915828704834, + "learning_rate": 9.050854271356785e-06, + "loss": 0.0829, + "step": 9950 + }, + { + "epoch": 6.7, + "grad_norm": 2.54783034324646, + "learning_rate": 9.048341708542715e-06, + "loss": 0.0848, + "step": 9975 + }, + { + "epoch": 6.72, + "grad_norm": 2.642479658126831, + "learning_rate": 9.045829145728644e-06, + "loss": 0.0851, + "step": 10000 + }, + { + "epoch": 6.72, + "eval_loss": 0.07398388534784317, + "eval_runtime": 940.9612, + "eval_samples_per_second": 1.528, + "eval_steps_per_second": 1.528, + "eval_wer": 38.03433777154871, + "step": 10000 + }, + { + "epoch": 6.73, + "grad_norm": 2.4956440925598145, + "learning_rate": 9.043316582914573e-06, + "loss": 0.0848, + "step": 10025 + }, + { + "epoch": 6.75, + "grad_norm": 2.9228978157043457, + "learning_rate": 9.040804020100503e-06, + "loss": 0.0811, + "step": 10050 + }, + { + "epoch": 6.77, + "grad_norm": 2.9106268882751465, + "learning_rate": 9.038291457286434e-06, + "loss": 0.0827, + "step": 10075 + }, + { + "epoch": 6.78, + "grad_norm": 2.4934680461883545, + "learning_rate": 9.035778894472363e-06, + "loss": 0.0816, + "step": 10100 + }, + { + "epoch": 6.8, + "grad_norm": 2.345693349838257, + "learning_rate": 9.033266331658292e-06, + "loss": 0.0811, + "step": 10125 + }, + { + "epoch": 6.82, + "grad_norm": 2.3857240676879883, + "learning_rate": 9.030753768844222e-06, + "loss": 0.0849, + "step": 10150 + }, + { + "epoch": 6.83, + "grad_norm": 2.5776798725128174, + "learning_rate": 9.028241206030151e-06, + "loss": 0.0853, + "step": 10175 + }, + { + "epoch": 6.85, + "grad_norm": 2.6006035804748535, + "learning_rate": 9.02572864321608e-06, + "loss": 0.0842, + "step": 10200 + }, + { + "epoch": 6.87, + "grad_norm": 2.258542776107788, + "learning_rate": 9.023216080402011e-06, + "loss": 0.0823, + "step": 10225 + }, + { + "epoch": 6.88, + "grad_norm": 2.793701410293579, + "learning_rate": 9.02070351758794e-06, + "loss": 0.0822, + "step": 10250 + }, + { + "epoch": 6.9, + "grad_norm": 2.9286675453186035, + "learning_rate": 9.01819095477387e-06, + "loss": 0.0851, + "step": 10275 + }, + { + "epoch": 6.92, + "grad_norm": 2.917487621307373, + "learning_rate": 9.0156783919598e-06, + "loss": 0.084, + "step": 10300 + }, + { + "epoch": 6.93, + "grad_norm": 3.1733992099761963, + "learning_rate": 9.013165829145729e-06, + "loss": 0.0859, + "step": 10325 + }, + { + "epoch": 6.95, + "grad_norm": 2.997939109802246, + "learning_rate": 9.01065326633166e-06, + "loss": 0.0853, + "step": 10350 + }, + { + "epoch": 6.97, + "grad_norm": 3.0148122310638428, + "learning_rate": 9.008140703517589e-06, + "loss": 0.0833, + "step": 10375 + }, + { + "epoch": 6.98, + "grad_norm": 2.7705020904541016, + "learning_rate": 9.005628140703518e-06, + "loss": 0.0841, + "step": 10400 + }, + { + "epoch": 7.0, + "grad_norm": 2.3565280437469482, + "learning_rate": 9.003115577889448e-06, + "loss": 0.0833, + "step": 10425 + }, + { + "epoch": 7.02, + "grad_norm": 2.8147268295288086, + "learning_rate": 9.000603015075377e-06, + "loss": 0.0736, + "step": 10450 + }, + { + "epoch": 7.03, + "grad_norm": 2.1311087608337402, + "learning_rate": 8.998090452261308e-06, + "loss": 0.0749, + "step": 10475 + }, + { + "epoch": 7.05, + "grad_norm": 3.1434128284454346, + "learning_rate": 8.995577889447237e-06, + "loss": 0.074, + "step": 10500 + }, + { + "epoch": 7.07, + "grad_norm": 2.4007229804992676, + "learning_rate": 8.993065326633167e-06, + "loss": 0.0749, + "step": 10525 + }, + { + "epoch": 7.09, + "grad_norm": 2.493443489074707, + "learning_rate": 8.990552763819096e-06, + "loss": 0.0753, + "step": 10550 + }, + { + "epoch": 7.1, + "grad_norm": 2.7775678634643555, + "learning_rate": 8.988040201005025e-06, + "loss": 0.0737, + "step": 10575 + }, + { + "epoch": 7.12, + "grad_norm": 2.423232316970825, + "learning_rate": 8.985527638190955e-06, + "loss": 0.0765, + "step": 10600 + }, + { + "epoch": 7.14, + "grad_norm": 2.3441214561462402, + "learning_rate": 8.983015075376886e-06, + "loss": 0.0727, + "step": 10625 + }, + { + "epoch": 7.15, + "grad_norm": 2.8226394653320312, + "learning_rate": 8.980502512562815e-06, + "loss": 0.0748, + "step": 10650 + }, + { + "epoch": 7.17, + "grad_norm": 3.1403701305389404, + "learning_rate": 8.977989949748744e-06, + "loss": 0.0751, + "step": 10675 + }, + { + "epoch": 7.19, + "grad_norm": 2.4939680099487305, + "learning_rate": 8.975477386934675e-06, + "loss": 0.0761, + "step": 10700 + }, + { + "epoch": 7.2, + "grad_norm": 2.57086443901062, + "learning_rate": 8.972964824120603e-06, + "loss": 0.0746, + "step": 10725 + }, + { + "epoch": 7.22, + "grad_norm": 2.5829505920410156, + "learning_rate": 8.970452261306534e-06, + "loss": 0.0746, + "step": 10750 + }, + { + "epoch": 7.24, + "grad_norm": 2.1644701957702637, + "learning_rate": 8.967939698492463e-06, + "loss": 0.0754, + "step": 10775 + }, + { + "epoch": 7.25, + "grad_norm": 2.6177899837493896, + "learning_rate": 8.965427135678393e-06, + "loss": 0.0757, + "step": 10800 + }, + { + "epoch": 7.27, + "grad_norm": 2.649995803833008, + "learning_rate": 8.962914572864322e-06, + "loss": 0.0763, + "step": 10825 + }, + { + "epoch": 7.29, + "grad_norm": 2.885385513305664, + "learning_rate": 8.960402010050251e-06, + "loss": 0.0716, + "step": 10850 + }, + { + "epoch": 7.3, + "grad_norm": 2.490011215209961, + "learning_rate": 8.957889447236182e-06, + "loss": 0.0753, + "step": 10875 + }, + { + "epoch": 7.32, + "grad_norm": 2.5891458988189697, + "learning_rate": 8.955376884422112e-06, + "loss": 0.0751, + "step": 10900 + }, + { + "epoch": 7.34, + "grad_norm": 2.5575714111328125, + "learning_rate": 8.95286432160804e-06, + "loss": 0.0781, + "step": 10925 + }, + { + "epoch": 7.35, + "grad_norm": 2.4237771034240723, + "learning_rate": 8.95035175879397e-06, + "loss": 0.076, + "step": 10950 + }, + { + "epoch": 7.37, + "grad_norm": 2.8854238986968994, + "learning_rate": 8.947839195979901e-06, + "loss": 0.0776, + "step": 10975 + }, + { + "epoch": 7.39, + "grad_norm": 3.1436028480529785, + "learning_rate": 8.945326633165829e-06, + "loss": 0.0747, + "step": 11000 + }, + { + "epoch": 7.39, + "eval_loss": 0.07451890408992767, + "eval_runtime": 944.9651, + "eval_samples_per_second": 1.522, + "eval_steps_per_second": 1.522, + "eval_wer": 37.75402943237561, + "step": 11000 + }, + { + "epoch": 7.4, + "grad_norm": 2.762936592102051, + "learning_rate": 8.94281407035176e-06, + "loss": 0.078, + "step": 11025 + }, + { + "epoch": 7.42, + "grad_norm": 2.804004669189453, + "learning_rate": 8.940301507537689e-06, + "loss": 0.0741, + "step": 11050 + }, + { + "epoch": 7.44, + "grad_norm": 2.684346914291382, + "learning_rate": 8.937788944723618e-06, + "loss": 0.0746, + "step": 11075 + }, + { + "epoch": 7.45, + "grad_norm": 3.3459744453430176, + "learning_rate": 8.93527638190955e-06, + "loss": 0.0762, + "step": 11100 + }, + { + "epoch": 7.47, + "grad_norm": 2.971228837966919, + "learning_rate": 8.932763819095477e-06, + "loss": 0.0779, + "step": 11125 + }, + { + "epoch": 7.49, + "grad_norm": 2.6844675540924072, + "learning_rate": 8.930251256281408e-06, + "loss": 0.0743, + "step": 11150 + }, + { + "epoch": 7.51, + "grad_norm": 2.520510196685791, + "learning_rate": 8.927738693467337e-06, + "loss": 0.0757, + "step": 11175 + }, + { + "epoch": 7.52, + "grad_norm": 2.496900796890259, + "learning_rate": 8.925226130653267e-06, + "loss": 0.0745, + "step": 11200 + }, + { + "epoch": 7.54, + "grad_norm": 2.5680129528045654, + "learning_rate": 8.922713567839196e-06, + "loss": 0.0773, + "step": 11225 + }, + { + "epoch": 7.56, + "grad_norm": 2.736950397491455, + "learning_rate": 8.920201005025127e-06, + "loss": 0.0762, + "step": 11250 + }, + { + "epoch": 7.57, + "grad_norm": 2.5988550186157227, + "learning_rate": 8.917688442211055e-06, + "loss": 0.0758, + "step": 11275 + }, + { + "epoch": 7.59, + "grad_norm": 2.5270214080810547, + "learning_rate": 8.915175879396986e-06, + "loss": 0.0754, + "step": 11300 + }, + { + "epoch": 7.61, + "grad_norm": 2.5261621475219727, + "learning_rate": 8.912663316582915e-06, + "loss": 0.0758, + "step": 11325 + }, + { + "epoch": 7.62, + "grad_norm": 2.587200403213501, + "learning_rate": 8.910150753768844e-06, + "loss": 0.077, + "step": 11350 + }, + { + "epoch": 7.64, + "grad_norm": 2.415273904800415, + "learning_rate": 8.907638190954775e-06, + "loss": 0.0736, + "step": 11375 + }, + { + "epoch": 7.66, + "grad_norm": 2.9976024627685547, + "learning_rate": 8.905125628140705e-06, + "loss": 0.0745, + "step": 11400 + }, + { + "epoch": 7.67, + "grad_norm": 2.8108954429626465, + "learning_rate": 8.902613065326634e-06, + "loss": 0.0773, + "step": 11425 + }, + { + "epoch": 7.69, + "grad_norm": 2.2607011795043945, + "learning_rate": 8.900100502512563e-06, + "loss": 0.0732, + "step": 11450 + }, + { + "epoch": 7.71, + "grad_norm": 2.678086757659912, + "learning_rate": 8.897587939698493e-06, + "loss": 0.0748, + "step": 11475 + }, + { + "epoch": 7.72, + "grad_norm": 2.4147794246673584, + "learning_rate": 8.895075376884424e-06, + "loss": 0.0742, + "step": 11500 + }, + { + "epoch": 7.74, + "grad_norm": 2.6311733722686768, + "learning_rate": 8.892562814070353e-06, + "loss": 0.0761, + "step": 11525 + }, + { + "epoch": 7.76, + "grad_norm": 2.679243326187134, + "learning_rate": 8.890050251256282e-06, + "loss": 0.0758, + "step": 11550 + }, + { + "epoch": 7.77, + "grad_norm": 2.5631027221679688, + "learning_rate": 8.887537688442212e-06, + "loss": 0.0747, + "step": 11575 + }, + { + "epoch": 7.79, + "grad_norm": 2.5809240341186523, + "learning_rate": 8.885025125628141e-06, + "loss": 0.077, + "step": 11600 + }, + { + "epoch": 7.81, + "grad_norm": 2.5820209980010986, + "learning_rate": 8.88251256281407e-06, + "loss": 0.0737, + "step": 11625 + }, + { + "epoch": 7.82, + "grad_norm": 3.0778956413269043, + "learning_rate": 8.880000000000001e-06, + "loss": 0.0753, + "step": 11650 + }, + { + "epoch": 7.84, + "grad_norm": 2.952677011489868, + "learning_rate": 8.87748743718593e-06, + "loss": 0.0802, + "step": 11675 + }, + { + "epoch": 7.86, + "grad_norm": 2.6686625480651855, + "learning_rate": 8.87497487437186e-06, + "loss": 0.0771, + "step": 11700 + }, + { + "epoch": 7.87, + "grad_norm": 2.6133458614349365, + "learning_rate": 8.87246231155779e-06, + "loss": 0.0735, + "step": 11725 + }, + { + "epoch": 7.89, + "grad_norm": 2.7653019428253174, + "learning_rate": 8.869949748743719e-06, + "loss": 0.0777, + "step": 11750 + }, + { + "epoch": 7.91, + "grad_norm": 2.53521728515625, + "learning_rate": 8.86743718592965e-06, + "loss": 0.0777, + "step": 11775 + }, + { + "epoch": 7.92, + "grad_norm": 2.804986000061035, + "learning_rate": 8.864924623115579e-06, + "loss": 0.0781, + "step": 11800 + }, + { + "epoch": 7.94, + "grad_norm": 2.3068668842315674, + "learning_rate": 8.862412060301508e-06, + "loss": 0.0748, + "step": 11825 + }, + { + "epoch": 7.96, + "grad_norm": 2.1941022872924805, + "learning_rate": 8.859899497487438e-06, + "loss": 0.0748, + "step": 11850 + }, + { + "epoch": 7.98, + "grad_norm": 2.287976026535034, + "learning_rate": 8.857386934673367e-06, + "loss": 0.0726, + "step": 11875 + }, + { + "epoch": 7.99, + "grad_norm": 2.6045656204223633, + "learning_rate": 8.854874371859296e-06, + "loss": 0.0772, + "step": 11900 + }, + { + "epoch": 8.01, + "grad_norm": 2.3343260288238525, + "learning_rate": 8.852361809045227e-06, + "loss": 0.0709, + "step": 11925 + }, + { + "epoch": 8.03, + "grad_norm": 2.3608927726745605, + "learning_rate": 8.849849246231157e-06, + "loss": 0.0663, + "step": 11950 + }, + { + "epoch": 8.04, + "grad_norm": 2.5067567825317383, + "learning_rate": 8.847336683417086e-06, + "loss": 0.07, + "step": 11975 + }, + { + "epoch": 8.06, + "grad_norm": 2.20392107963562, + "learning_rate": 8.844824120603015e-06, + "loss": 0.0643, + "step": 12000 + }, + { + "epoch": 8.06, + "eval_loss": 0.07609053701162338, + "eval_runtime": 947.5186, + "eval_samples_per_second": 1.518, + "eval_steps_per_second": 1.518, + "eval_wer": 35.97582340574632, + "step": 12000 + }, + { + "epoch": 8.08, + "grad_norm": 2.6891331672668457, + "learning_rate": 8.842311557788945e-06, + "loss": 0.0693, + "step": 12025 + }, + { + "epoch": 8.09, + "grad_norm": 2.647749662399292, + "learning_rate": 8.839798994974876e-06, + "loss": 0.0664, + "step": 12050 + }, + { + "epoch": 8.11, + "grad_norm": 2.5699245929718018, + "learning_rate": 8.837286432160805e-06, + "loss": 0.067, + "step": 12075 + }, + { + "epoch": 8.13, + "grad_norm": 2.467653274536133, + "learning_rate": 8.834773869346734e-06, + "loss": 0.0648, + "step": 12100 + }, + { + "epoch": 8.14, + "grad_norm": 2.6818325519561768, + "learning_rate": 8.832261306532665e-06, + "loss": 0.0679, + "step": 12125 + }, + { + "epoch": 8.16, + "grad_norm": 3.3654887676239014, + "learning_rate": 8.829748743718593e-06, + "loss": 0.0675, + "step": 12150 + }, + { + "epoch": 8.18, + "grad_norm": 2.3903543949127197, + "learning_rate": 8.827236180904524e-06, + "loss": 0.0687, + "step": 12175 + }, + { + "epoch": 8.19, + "grad_norm": 2.6501457691192627, + "learning_rate": 8.824723618090453e-06, + "loss": 0.0662, + "step": 12200 + }, + { + "epoch": 8.21, + "grad_norm": 2.909879207611084, + "learning_rate": 8.822211055276383e-06, + "loss": 0.0695, + "step": 12225 + }, + { + "epoch": 8.23, + "grad_norm": 3.0803229808807373, + "learning_rate": 8.819698492462312e-06, + "loss": 0.0658, + "step": 12250 + }, + { + "epoch": 8.24, + "grad_norm": 2.59801983833313, + "learning_rate": 8.817185929648241e-06, + "loss": 0.0662, + "step": 12275 + }, + { + "epoch": 8.26, + "grad_norm": 2.374279022216797, + "learning_rate": 8.81467336683417e-06, + "loss": 0.0664, + "step": 12300 + }, + { + "epoch": 8.28, + "grad_norm": 2.24104905128479, + "learning_rate": 8.812160804020102e-06, + "loss": 0.0658, + "step": 12325 + }, + { + "epoch": 8.29, + "grad_norm": 2.9232263565063477, + "learning_rate": 8.809648241206031e-06, + "loss": 0.0694, + "step": 12350 + }, + { + "epoch": 8.31, + "grad_norm": 2.6230671405792236, + "learning_rate": 8.80713567839196e-06, + "loss": 0.0681, + "step": 12375 + }, + { + "epoch": 8.33, + "grad_norm": 2.4782748222351074, + "learning_rate": 8.804623115577891e-06, + "loss": 0.0676, + "step": 12400 + }, + { + "epoch": 8.34, + "grad_norm": 2.668426990509033, + "learning_rate": 8.802110552763819e-06, + "loss": 0.0668, + "step": 12425 + }, + { + "epoch": 8.36, + "grad_norm": 2.3836426734924316, + "learning_rate": 8.79959798994975e-06, + "loss": 0.0694, + "step": 12450 + }, + { + "epoch": 8.38, + "grad_norm": 2.4502406120300293, + "learning_rate": 8.79708542713568e-06, + "loss": 0.0702, + "step": 12475 + }, + { + "epoch": 8.39, + "grad_norm": 3.2457191944122314, + "learning_rate": 8.794572864321609e-06, + "loss": 0.0677, + "step": 12500 + }, + { + "epoch": 8.41, + "grad_norm": 2.3833024501800537, + "learning_rate": 8.792060301507538e-06, + "loss": 0.0679, + "step": 12525 + }, + { + "epoch": 8.43, + "grad_norm": 3.653550624847412, + "learning_rate": 8.789547738693467e-06, + "loss": 0.0664, + "step": 12550 + }, + { + "epoch": 8.45, + "grad_norm": 2.4735491275787354, + "learning_rate": 8.787035175879398e-06, + "loss": 0.0674, + "step": 12575 + }, + { + "epoch": 8.46, + "grad_norm": 2.5834007263183594, + "learning_rate": 8.784522613065328e-06, + "loss": 0.0684, + "step": 12600 + }, + { + "epoch": 8.48, + "grad_norm": 2.7656002044677734, + "learning_rate": 8.782010050251257e-06, + "loss": 0.0658, + "step": 12625 + }, + { + "epoch": 8.5, + "grad_norm": 2.700043201446533, + "learning_rate": 8.779497487437186e-06, + "loss": 0.0685, + "step": 12650 + }, + { + "epoch": 8.51, + "grad_norm": 2.4712941646575928, + "learning_rate": 8.776984924623117e-06, + "loss": 0.0684, + "step": 12675 + }, + { + "epoch": 8.53, + "grad_norm": 2.314640998840332, + "learning_rate": 8.774472361809045e-06, + "loss": 0.0685, + "step": 12700 + }, + { + "epoch": 8.55, + "grad_norm": 2.6900835037231445, + "learning_rate": 8.771959798994976e-06, + "loss": 0.068, + "step": 12725 + }, + { + "epoch": 8.56, + "grad_norm": 2.5131781101226807, + "learning_rate": 8.769447236180905e-06, + "loss": 0.0688, + "step": 12750 + }, + { + "epoch": 8.58, + "grad_norm": 2.605616331100464, + "learning_rate": 8.766934673366834e-06, + "loss": 0.0677, + "step": 12775 + }, + { + "epoch": 8.6, + "grad_norm": 2.8637475967407227, + "learning_rate": 8.764422110552765e-06, + "loss": 0.0685, + "step": 12800 + }, + { + "epoch": 8.61, + "grad_norm": 3.07174015045166, + "learning_rate": 8.761909547738693e-06, + "loss": 0.0681, + "step": 12825 + }, + { + "epoch": 8.63, + "grad_norm": 2.485527992248535, + "learning_rate": 8.759396984924624e-06, + "loss": 0.0695, + "step": 12850 + }, + { + "epoch": 8.65, + "grad_norm": 2.476440906524658, + "learning_rate": 8.756884422110553e-06, + "loss": 0.0688, + "step": 12875 + }, + { + "epoch": 8.66, + "grad_norm": 2.517463445663452, + "learning_rate": 8.754371859296483e-06, + "loss": 0.0682, + "step": 12900 + }, + { + "epoch": 8.68, + "grad_norm": 2.298196315765381, + "learning_rate": 8.751859296482412e-06, + "loss": 0.0671, + "step": 12925 + }, + { + "epoch": 8.7, + "grad_norm": 2.5696184635162354, + "learning_rate": 8.749346733668343e-06, + "loss": 0.0678, + "step": 12950 + }, + { + "epoch": 8.71, + "grad_norm": 2.541201591491699, + "learning_rate": 8.746834170854272e-06, + "loss": 0.0676, + "step": 12975 + }, + { + "epoch": 8.73, + "grad_norm": 2.5252506732940674, + "learning_rate": 8.744321608040202e-06, + "loss": 0.0694, + "step": 13000 + }, + { + "epoch": 8.73, + "eval_loss": 0.0756089985370636, + "eval_runtime": 938.6151, + "eval_samples_per_second": 1.532, + "eval_steps_per_second": 1.532, + "eval_wer": 36.1510161177295, + "step": 13000 + }, + { + "epoch": 8.75, + "grad_norm": 2.6718802452087402, + "learning_rate": 8.741809045226131e-06, + "loss": 0.0687, + "step": 13025 + }, + { + "epoch": 8.76, + "grad_norm": 2.4813454151153564, + "learning_rate": 8.73929648241206e-06, + "loss": 0.0693, + "step": 13050 + }, + { + "epoch": 8.78, + "grad_norm": 2.5022711753845215, + "learning_rate": 8.736783919597991e-06, + "loss": 0.0682, + "step": 13075 + }, + { + "epoch": 8.8, + "grad_norm": 2.8723132610321045, + "learning_rate": 8.734271356783919e-06, + "loss": 0.0689, + "step": 13100 + }, + { + "epoch": 8.81, + "grad_norm": 2.2734808921813965, + "learning_rate": 8.73175879396985e-06, + "loss": 0.0683, + "step": 13125 + }, + { + "epoch": 8.83, + "grad_norm": 3.583946943283081, + "learning_rate": 8.72924623115578e-06, + "loss": 0.0677, + "step": 13150 + }, + { + "epoch": 8.85, + "grad_norm": 2.2414391040802, + "learning_rate": 8.726733668341709e-06, + "loss": 0.0683, + "step": 13175 + }, + { + "epoch": 8.87, + "grad_norm": 2.2894062995910645, + "learning_rate": 8.72422110552764e-06, + "loss": 0.067, + "step": 13200 + }, + { + "epoch": 8.88, + "grad_norm": 2.9999337196350098, + "learning_rate": 8.721708542713569e-06, + "loss": 0.0677, + "step": 13225 + }, + { + "epoch": 8.9, + "grad_norm": 2.736943006515503, + "learning_rate": 8.719195979899498e-06, + "loss": 0.0697, + "step": 13250 + }, + { + "epoch": 8.92, + "grad_norm": 2.2958357334136963, + "learning_rate": 8.716683417085428e-06, + "loss": 0.0685, + "step": 13275 + }, + { + "epoch": 8.93, + "grad_norm": 2.6933298110961914, + "learning_rate": 8.714170854271357e-06, + "loss": 0.068, + "step": 13300 + }, + { + "epoch": 8.95, + "grad_norm": 2.872922658920288, + "learning_rate": 8.711658291457286e-06, + "loss": 0.0647, + "step": 13325 + }, + { + "epoch": 8.97, + "grad_norm": 3.174562454223633, + "learning_rate": 8.709145728643217e-06, + "loss": 0.0703, + "step": 13350 + }, + { + "epoch": 8.98, + "grad_norm": 2.847895383834839, + "learning_rate": 8.706633165829147e-06, + "loss": 0.0662, + "step": 13375 + }, + { + "epoch": 9.0, + "grad_norm": 2.649989366531372, + "learning_rate": 8.704120603015076e-06, + "loss": 0.0685, + "step": 13400 + }, + { + "epoch": 9.02, + "grad_norm": 2.3739471435546875, + "learning_rate": 8.701608040201005e-06, + "loss": 0.0597, + "step": 13425 + }, + { + "epoch": 9.03, + "grad_norm": 2.5196468830108643, + "learning_rate": 8.699095477386935e-06, + "loss": 0.0594, + "step": 13450 + }, + { + "epoch": 9.05, + "grad_norm": 2.634519577026367, + "learning_rate": 8.696582914572866e-06, + "loss": 0.0585, + "step": 13475 + }, + { + "epoch": 9.07, + "grad_norm": 2.3203775882720947, + "learning_rate": 8.694070351758795e-06, + "loss": 0.0588, + "step": 13500 + }, + { + "epoch": 9.08, + "grad_norm": 2.374537467956543, + "learning_rate": 8.691557788944724e-06, + "loss": 0.0587, + "step": 13525 + }, + { + "epoch": 9.1, + "grad_norm": 2.575796604156494, + "learning_rate": 8.689145728643218e-06, + "loss": 0.0606, + "step": 13550 + }, + { + "epoch": 9.12, + "grad_norm": 2.760568141937256, + "learning_rate": 8.686633165829147e-06, + "loss": 0.0593, + "step": 13575 + }, + { + "epoch": 9.13, + "grad_norm": 3.001232147216797, + "learning_rate": 8.684120603015076e-06, + "loss": 0.0596, + "step": 13600 + }, + { + "epoch": 9.15, + "grad_norm": 2.5632381439208984, + "learning_rate": 8.681608040201006e-06, + "loss": 0.0595, + "step": 13625 + }, + { + "epoch": 9.17, + "grad_norm": 2.7728652954101562, + "learning_rate": 8.679095477386935e-06, + "loss": 0.0625, + "step": 13650 + }, + { + "epoch": 9.18, + "grad_norm": 2.6438934803009033, + "learning_rate": 8.676582914572866e-06, + "loss": 0.0607, + "step": 13675 + }, + { + "epoch": 9.2, + "grad_norm": 2.527219295501709, + "learning_rate": 8.674070351758794e-06, + "loss": 0.059, + "step": 13700 + }, + { + "epoch": 9.22, + "grad_norm": 3.258898973464966, + "learning_rate": 8.671557788944725e-06, + "loss": 0.0591, + "step": 13725 + }, + { + "epoch": 9.23, + "grad_norm": 2.7250688076019287, + "learning_rate": 8.669045226130654e-06, + "loss": 0.0599, + "step": 13750 + }, + { + "epoch": 9.25, + "grad_norm": 2.429975748062134, + "learning_rate": 8.666532663316583e-06, + "loss": 0.0589, + "step": 13775 + }, + { + "epoch": 9.27, + "grad_norm": 2.5147359371185303, + "learning_rate": 8.664020100502514e-06, + "loss": 0.0596, + "step": 13800 + }, + { + "epoch": 9.28, + "grad_norm": 2.559971332550049, + "learning_rate": 8.661507537688444e-06, + "loss": 0.06, + "step": 13825 + }, + { + "epoch": 9.3, + "grad_norm": 2.3673105239868164, + "learning_rate": 8.658994974874373e-06, + "loss": 0.0624, + "step": 13850 + }, + { + "epoch": 9.32, + "grad_norm": 2.6384220123291016, + "learning_rate": 8.656482412060302e-06, + "loss": 0.0626, + "step": 13875 + }, + { + "epoch": 9.34, + "grad_norm": 2.719146966934204, + "learning_rate": 8.653969849246231e-06, + "loss": 0.0604, + "step": 13900 + }, + { + "epoch": 9.35, + "grad_norm": 2.488280773162842, + "learning_rate": 8.65145728643216e-06, + "loss": 0.0619, + "step": 13925 + }, + { + "epoch": 9.37, + "grad_norm": 2.646446466445923, + "learning_rate": 8.648944723618092e-06, + "loss": 0.0592, + "step": 13950 + }, + { + "epoch": 9.39, + "grad_norm": 2.2520694732666016, + "learning_rate": 8.64643216080402e-06, + "loss": 0.0598, + "step": 13975 + }, + { + "epoch": 9.4, + "grad_norm": 3.0429489612579346, + "learning_rate": 8.64391959798995e-06, + "loss": 0.0608, + "step": 14000 + }, + { + "epoch": 9.4, + "eval_loss": 0.07920407503843307, + "eval_runtime": 946.2073, + "eval_samples_per_second": 1.52, + "eval_steps_per_second": 1.52, + "eval_wer": 35.51156271899089, + "step": 14000 + }, + { + "epoch": 9.42, + "grad_norm": 2.448462724685669, + "learning_rate": 8.64140703517588e-06, + "loss": 0.061, + "step": 14025 + }, + { + "epoch": 9.44, + "grad_norm": 2.6913626194000244, + "learning_rate": 8.638894472361809e-06, + "loss": 0.061, + "step": 14050 + }, + { + "epoch": 9.45, + "grad_norm": 2.397061586380005, + "learning_rate": 8.63638190954774e-06, + "loss": 0.0576, + "step": 14075 + }, + { + "epoch": 9.47, + "grad_norm": 2.5650970935821533, + "learning_rate": 8.63386934673367e-06, + "loss": 0.0613, + "step": 14100 + }, + { + "epoch": 9.49, + "grad_norm": 2.3487985134124756, + "learning_rate": 8.631356783919599e-06, + "loss": 0.0611, + "step": 14125 + }, + { + "epoch": 9.5, + "grad_norm": 2.8183300495147705, + "learning_rate": 8.628844221105528e-06, + "loss": 0.0618, + "step": 14150 + }, + { + "epoch": 9.52, + "grad_norm": 2.2249584197998047, + "learning_rate": 8.626331658291457e-06, + "loss": 0.063, + "step": 14175 + }, + { + "epoch": 9.54, + "grad_norm": 2.5605950355529785, + "learning_rate": 8.623819095477388e-06, + "loss": 0.061, + "step": 14200 + }, + { + "epoch": 9.55, + "grad_norm": 2.8595635890960693, + "learning_rate": 8.621306532663318e-06, + "loss": 0.0627, + "step": 14225 + }, + { + "epoch": 9.57, + "grad_norm": 2.2395496368408203, + "learning_rate": 8.618793969849247e-06, + "loss": 0.0619, + "step": 14250 + }, + { + "epoch": 9.59, + "grad_norm": 2.4124205112457275, + "learning_rate": 8.616281407035176e-06, + "loss": 0.0602, + "step": 14275 + }, + { + "epoch": 9.6, + "grad_norm": 2.6336212158203125, + "learning_rate": 8.613768844221106e-06, + "loss": 0.0607, + "step": 14300 + }, + { + "epoch": 9.62, + "grad_norm": 3.0532987117767334, + "learning_rate": 8.611256281407035e-06, + "loss": 0.0626, + "step": 14325 + }, + { + "epoch": 9.64, + "grad_norm": 2.452667713165283, + "learning_rate": 8.608743718592966e-06, + "loss": 0.0623, + "step": 14350 + }, + { + "epoch": 9.65, + "grad_norm": 2.486095666885376, + "learning_rate": 8.606231155778895e-06, + "loss": 0.0626, + "step": 14375 + }, + { + "epoch": 9.67, + "grad_norm": 2.1423239707946777, + "learning_rate": 8.603718592964825e-06, + "loss": 0.0603, + "step": 14400 + }, + { + "epoch": 9.69, + "grad_norm": 2.6517510414123535, + "learning_rate": 8.601206030150756e-06, + "loss": 0.0603, + "step": 14425 + }, + { + "epoch": 9.7, + "grad_norm": 2.513198137283325, + "learning_rate": 8.598693467336683e-06, + "loss": 0.0624, + "step": 14450 + }, + { + "epoch": 9.72, + "grad_norm": 2.406611442565918, + "learning_rate": 8.596180904522614e-06, + "loss": 0.0606, + "step": 14475 + }, + { + "epoch": 9.74, + "grad_norm": 3.116328477859497, + "learning_rate": 8.593668341708544e-06, + "loss": 0.0625, + "step": 14500 + }, + { + "epoch": 9.75, + "grad_norm": 2.720670700073242, + "learning_rate": 8.591155778894473e-06, + "loss": 0.0603, + "step": 14525 + }, + { + "epoch": 9.77, + "grad_norm": 2.718442440032959, + "learning_rate": 8.588643216080402e-06, + "loss": 0.0618, + "step": 14550 + }, + { + "epoch": 9.79, + "grad_norm": 2.624940872192383, + "learning_rate": 8.586130653266332e-06, + "loss": 0.0608, + "step": 14575 + }, + { + "epoch": 9.81, + "grad_norm": 2.48469614982605, + "learning_rate": 8.583618090452261e-06, + "loss": 0.0602, + "step": 14600 + }, + { + "epoch": 9.82, + "grad_norm": 2.7031617164611816, + "learning_rate": 8.581105527638192e-06, + "loss": 0.0599, + "step": 14625 + }, + { + "epoch": 9.84, + "grad_norm": 2.4318370819091797, + "learning_rate": 8.578592964824121e-06, + "loss": 0.063, + "step": 14650 + }, + { + "epoch": 9.86, + "grad_norm": 2.2842206954956055, + "learning_rate": 8.57608040201005e-06, + "loss": 0.063, + "step": 14675 + }, + { + "epoch": 9.87, + "grad_norm": 2.384749174118042, + "learning_rate": 8.573567839195982e-06, + "loss": 0.0602, + "step": 14700 + }, + { + "epoch": 9.89, + "grad_norm": 2.5647854804992676, + "learning_rate": 8.57105527638191e-06, + "loss": 0.0618, + "step": 14725 + }, + { + "epoch": 9.91, + "grad_norm": 2.1962180137634277, + "learning_rate": 8.56854271356784e-06, + "loss": 0.0602, + "step": 14750 + }, + { + "epoch": 9.92, + "grad_norm": 2.9758098125457764, + "learning_rate": 8.56603015075377e-06, + "loss": 0.0618, + "step": 14775 + }, + { + "epoch": 9.94, + "grad_norm": 2.527043104171753, + "learning_rate": 8.563517587939699e-06, + "loss": 0.0616, + "step": 14800 + }, + { + "epoch": 9.96, + "grad_norm": 2.5038506984710693, + "learning_rate": 8.56100502512563e-06, + "loss": 0.0583, + "step": 14825 + }, + { + "epoch": 9.97, + "grad_norm": 2.4725499153137207, + "learning_rate": 8.558492462311558e-06, + "loss": 0.06, + "step": 14850 + }, + { + "epoch": 9.99, + "grad_norm": 2.5974104404449463, + "learning_rate": 8.555979899497489e-06, + "loss": 0.0627, + "step": 14875 + }, + { + "epoch": 10.01, + "grad_norm": 2.327585458755493, + "learning_rate": 8.553467336683418e-06, + "loss": 0.0577, + "step": 14900 + }, + { + "epoch": 10.02, + "grad_norm": 2.5683743953704834, + "learning_rate": 8.550954773869347e-06, + "loss": 0.0525, + "step": 14925 + }, + { + "epoch": 10.04, + "grad_norm": 2.233187198638916, + "learning_rate": 8.548442211055277e-06, + "loss": 0.054, + "step": 14950 + }, + { + "epoch": 10.06, + "grad_norm": 2.317199468612671, + "learning_rate": 8.545929648241208e-06, + "loss": 0.0536, + "step": 14975 + }, + { + "epoch": 10.07, + "grad_norm": 2.7724609375, + "learning_rate": 8.543417085427135e-06, + "loss": 0.0506, + "step": 15000 + }, + { + "epoch": 10.07, + "eval_loss": 0.0795641764998436, + "eval_runtime": 951.9232, + "eval_samples_per_second": 1.511, + "eval_steps_per_second": 1.511, + "eval_wer": 37.1583742116328, + "step": 15000 + }, + { + "epoch": 10.09, + "grad_norm": 2.1854188442230225, + "learning_rate": 8.540904522613066e-06, + "loss": 0.053, + "step": 15025 + }, + { + "epoch": 10.11, + "grad_norm": 2.9276561737060547, + "learning_rate": 8.538391959798996e-06, + "loss": 0.0525, + "step": 15050 + }, + { + "epoch": 10.12, + "grad_norm": 2.3880844116210938, + "learning_rate": 8.535879396984925e-06, + "loss": 0.0531, + "step": 15075 + }, + { + "epoch": 10.14, + "grad_norm": 2.4319684505462646, + "learning_rate": 8.533366834170856e-06, + "loss": 0.0541, + "step": 15100 + }, + { + "epoch": 10.16, + "grad_norm": 2.4318885803222656, + "learning_rate": 8.530854271356784e-06, + "loss": 0.0537, + "step": 15125 + }, + { + "epoch": 10.17, + "grad_norm": 2.1942214965820312, + "learning_rate": 8.528341708542715e-06, + "loss": 0.0538, + "step": 15150 + }, + { + "epoch": 10.19, + "grad_norm": 2.346592426300049, + "learning_rate": 8.525829145728644e-06, + "loss": 0.053, + "step": 15175 + }, + { + "epoch": 10.21, + "grad_norm": 2.5984833240509033, + "learning_rate": 8.523316582914573e-06, + "loss": 0.0533, + "step": 15200 + }, + { + "epoch": 10.22, + "grad_norm": 2.636021852493286, + "learning_rate": 8.520804020100503e-06, + "loss": 0.0522, + "step": 15225 + }, + { + "epoch": 10.24, + "grad_norm": 2.155094623565674, + "learning_rate": 8.518291457286434e-06, + "loss": 0.0521, + "step": 15250 + }, + { + "epoch": 10.26, + "grad_norm": 2.6423208713531494, + "learning_rate": 8.515778894472363e-06, + "loss": 0.0534, + "step": 15275 + }, + { + "epoch": 10.28, + "grad_norm": 2.7121756076812744, + "learning_rate": 8.513266331658292e-06, + "loss": 0.0525, + "step": 15300 + }, + { + "epoch": 10.29, + "grad_norm": 2.7865710258483887, + "learning_rate": 8.510753768844222e-06, + "loss": 0.0543, + "step": 15325 + }, + { + "epoch": 10.31, + "grad_norm": 2.961301565170288, + "learning_rate": 8.508241206030151e-06, + "loss": 0.054, + "step": 15350 + }, + { + "epoch": 10.33, + "grad_norm": 2.719883441925049, + "learning_rate": 8.505728643216082e-06, + "loss": 0.0534, + "step": 15375 + }, + { + "epoch": 10.34, + "grad_norm": 2.588083028793335, + "learning_rate": 8.50321608040201e-06, + "loss": 0.0538, + "step": 15400 + }, + { + "epoch": 10.36, + "grad_norm": 2.5165112018585205, + "learning_rate": 8.50070351758794e-06, + "loss": 0.0569, + "step": 15425 + }, + { + "epoch": 10.38, + "grad_norm": 2.9085943698883057, + "learning_rate": 8.49819095477387e-06, + "loss": 0.0546, + "step": 15450 + }, + { + "epoch": 10.39, + "grad_norm": 2.655588150024414, + "learning_rate": 8.4956783919598e-06, + "loss": 0.0524, + "step": 15475 + }, + { + "epoch": 10.41, + "grad_norm": 2.4400410652160645, + "learning_rate": 8.49316582914573e-06, + "loss": 0.0541, + "step": 15500 + }, + { + "epoch": 10.43, + "grad_norm": 2.8765523433685303, + "learning_rate": 8.49065326633166e-06, + "loss": 0.0551, + "step": 15525 + }, + { + "epoch": 10.44, + "grad_norm": 3.0199999809265137, + "learning_rate": 8.488140703517589e-06, + "loss": 0.0555, + "step": 15550 + }, + { + "epoch": 10.46, + "grad_norm": 2.695577621459961, + "learning_rate": 8.485628140703518e-06, + "loss": 0.0549, + "step": 15575 + }, + { + "epoch": 10.48, + "grad_norm": 2.4532010555267334, + "learning_rate": 8.483115577889447e-06, + "loss": 0.0551, + "step": 15600 + }, + { + "epoch": 10.49, + "grad_norm": 2.728224515914917, + "learning_rate": 8.48070351758794e-06, + "loss": 0.0577, + "step": 15625 + }, + { + "epoch": 10.51, + "grad_norm": 2.5855214595794678, + "learning_rate": 8.47819095477387e-06, + "loss": 0.0544, + "step": 15650 + }, + { + "epoch": 10.53, + "grad_norm": 2.2945587635040283, + "learning_rate": 8.4756783919598e-06, + "loss": 0.053, + "step": 15675 + }, + { + "epoch": 10.54, + "grad_norm": 2.846177101135254, + "learning_rate": 8.47316582914573e-06, + "loss": 0.0556, + "step": 15700 + }, + { + "epoch": 10.56, + "grad_norm": 2.7254414558410645, + "learning_rate": 8.470653266331658e-06, + "loss": 0.0549, + "step": 15725 + }, + { + "epoch": 10.58, + "grad_norm": 2.395880699157715, + "learning_rate": 8.468140703517589e-06, + "loss": 0.052, + "step": 15750 + }, + { + "epoch": 10.59, + "grad_norm": 2.5893428325653076, + "learning_rate": 8.465628140703518e-06, + "loss": 0.056, + "step": 15775 + }, + { + "epoch": 10.61, + "grad_norm": 2.5432846546173096, + "learning_rate": 8.463115577889448e-06, + "loss": 0.0533, + "step": 15800 + }, + { + "epoch": 10.63, + "grad_norm": 2.8422675132751465, + "learning_rate": 8.460603015075377e-06, + "loss": 0.0549, + "step": 15825 + }, + { + "epoch": 10.64, + "grad_norm": 2.6458046436309814, + "learning_rate": 8.458090452261308e-06, + "loss": 0.0541, + "step": 15850 + }, + { + "epoch": 10.66, + "grad_norm": 2.541419744491577, + "learning_rate": 8.455577889447237e-06, + "loss": 0.0563, + "step": 15875 + }, + { + "epoch": 10.68, + "grad_norm": 2.5031700134277344, + "learning_rate": 8.453065326633167e-06, + "loss": 0.0551, + "step": 15900 + }, + { + "epoch": 10.7, + "grad_norm": 2.77070951461792, + "learning_rate": 8.450552763819096e-06, + "loss": 0.0543, + "step": 15925 + }, + { + "epoch": 10.71, + "grad_norm": 2.660856246948242, + "learning_rate": 8.448040201005025e-06, + "loss": 0.0547, + "step": 15950 + }, + { + "epoch": 10.73, + "grad_norm": 2.3509740829467773, + "learning_rate": 8.445527638190956e-06, + "loss": 0.0524, + "step": 15975 + }, + { + "epoch": 10.75, + "grad_norm": 2.630680561065674, + "learning_rate": 8.443015075376884e-06, + "loss": 0.0558, + "step": 16000 + }, + { + "epoch": 10.75, + "eval_loss": 0.0806279182434082, + "eval_runtime": 955.4004, + "eval_samples_per_second": 1.505, + "eval_steps_per_second": 1.505, + "eval_wer": 36.948142957252976, + "step": 16000 + }, + { + "epoch": 10.76, + "grad_norm": 2.735692262649536, + "learning_rate": 8.440502512562815e-06, + "loss": 0.0541, + "step": 16025 + }, + { + "epoch": 10.78, + "grad_norm": 2.319406509399414, + "learning_rate": 8.437989949748744e-06, + "loss": 0.0542, + "step": 16050 + }, + { + "epoch": 10.8, + "grad_norm": 2.7495503425598145, + "learning_rate": 8.435477386934674e-06, + "loss": 0.0558, + "step": 16075 + }, + { + "epoch": 10.81, + "grad_norm": 2.655841112136841, + "learning_rate": 8.432964824120605e-06, + "loss": 0.0551, + "step": 16100 + }, + { + "epoch": 10.83, + "grad_norm": 2.3896639347076416, + "learning_rate": 8.430452261306534e-06, + "loss": 0.0534, + "step": 16125 + }, + { + "epoch": 10.85, + "grad_norm": 2.549037218093872, + "learning_rate": 8.427939698492463e-06, + "loss": 0.0545, + "step": 16150 + }, + { + "epoch": 10.86, + "grad_norm": 2.999101400375366, + "learning_rate": 8.425427135678393e-06, + "loss": 0.0546, + "step": 16175 + }, + { + "epoch": 10.88, + "grad_norm": 2.5211498737335205, + "learning_rate": 8.422914572864322e-06, + "loss": 0.0555, + "step": 16200 + }, + { + "epoch": 10.9, + "grad_norm": 2.7481014728546143, + "learning_rate": 8.420402010050251e-06, + "loss": 0.0579, + "step": 16225 + }, + { + "epoch": 10.91, + "grad_norm": 2.6747305393218994, + "learning_rate": 8.417889447236182e-06, + "loss": 0.0552, + "step": 16250 + }, + { + "epoch": 10.93, + "grad_norm": 2.445272922515869, + "learning_rate": 8.415376884422112e-06, + "loss": 0.0562, + "step": 16275 + }, + { + "epoch": 10.95, + "grad_norm": 2.8092739582061768, + "learning_rate": 8.412864321608041e-06, + "loss": 0.0542, + "step": 16300 + }, + { + "epoch": 10.96, + "grad_norm": 2.5945794582366943, + "learning_rate": 8.41035175879397e-06, + "loss": 0.0539, + "step": 16325 + }, + { + "epoch": 10.98, + "grad_norm": 2.5719878673553467, + "learning_rate": 8.4078391959799e-06, + "loss": 0.0554, + "step": 16350 + }, + { + "epoch": 11.0, + "grad_norm": 2.4250590801239014, + "learning_rate": 8.40532663316583e-06, + "loss": 0.0562, + "step": 16375 + }, + { + "epoch": 11.01, + "grad_norm": 2.162781000137329, + "learning_rate": 8.40281407035176e-06, + "loss": 0.0477, + "step": 16400 + }, + { + "epoch": 11.03, + "grad_norm": 2.256786823272705, + "learning_rate": 8.40030150753769e-06, + "loss": 0.0459, + "step": 16425 + }, + { + "epoch": 11.05, + "grad_norm": 2.5522096157073975, + "learning_rate": 8.397788944723619e-06, + "loss": 0.0467, + "step": 16450 + }, + { + "epoch": 11.06, + "grad_norm": 2.6226308345794678, + "learning_rate": 8.395276381909548e-06, + "loss": 0.0472, + "step": 16475 + }, + { + "epoch": 11.08, + "grad_norm": 2.2303457260131836, + "learning_rate": 8.392763819095479e-06, + "loss": 0.0466, + "step": 16500 + }, + { + "epoch": 11.1, + "grad_norm": 2.2399849891662598, + "learning_rate": 8.390251256281408e-06, + "loss": 0.047, + "step": 16525 + }, + { + "epoch": 11.11, + "grad_norm": 2.2633492946624756, + "learning_rate": 8.387738693467338e-06, + "loss": 0.0454, + "step": 16550 + }, + { + "epoch": 11.13, + "grad_norm": 2.3870925903320312, + "learning_rate": 8.385226130653267e-06, + "loss": 0.0475, + "step": 16575 + }, + { + "epoch": 11.15, + "grad_norm": 2.827336549758911, + "learning_rate": 8.382713567839196e-06, + "loss": 0.0476, + "step": 16600 + }, + { + "epoch": 11.17, + "grad_norm": 2.692314863204956, + "learning_rate": 8.380201005025126e-06, + "loss": 0.0476, + "step": 16625 + }, + { + "epoch": 11.18, + "grad_norm": 2.828364849090576, + "learning_rate": 8.377688442211057e-06, + "loss": 0.0483, + "step": 16650 + }, + { + "epoch": 11.2, + "grad_norm": 2.3477985858917236, + "learning_rate": 8.375175879396986e-06, + "loss": 0.0457, + "step": 16675 + }, + { + "epoch": 11.22, + "grad_norm": 2.728351593017578, + "learning_rate": 8.372663316582915e-06, + "loss": 0.0484, + "step": 16700 + }, + { + "epoch": 11.23, + "grad_norm": 3.0237677097320557, + "learning_rate": 8.370150753768845e-06, + "loss": 0.047, + "step": 16725 + }, + { + "epoch": 11.25, + "grad_norm": 2.3787364959716797, + "learning_rate": 8.367638190954774e-06, + "loss": 0.0455, + "step": 16750 + }, + { + "epoch": 11.27, + "grad_norm": 2.203397035598755, + "learning_rate": 8.365125628140705e-06, + "loss": 0.0483, + "step": 16775 + }, + { + "epoch": 11.28, + "grad_norm": 2.3077335357666016, + "learning_rate": 8.362613065326634e-06, + "loss": 0.0494, + "step": 16800 + }, + { + "epoch": 11.3, + "grad_norm": 2.618177652359009, + "learning_rate": 8.360100502512563e-06, + "loss": 0.0488, + "step": 16825 + }, + { + "epoch": 11.32, + "grad_norm": 2.249844789505005, + "learning_rate": 8.357587939698493e-06, + "loss": 0.0462, + "step": 16850 + }, + { + "epoch": 11.33, + "grad_norm": 2.8220443725585938, + "learning_rate": 8.355075376884422e-06, + "loss": 0.0496, + "step": 16875 + }, + { + "epoch": 11.35, + "grad_norm": 2.5732083320617676, + "learning_rate": 8.352562814070353e-06, + "loss": 0.0491, + "step": 16900 + }, + { + "epoch": 11.37, + "grad_norm": 2.5625510215759277, + "learning_rate": 8.350050251256282e-06, + "loss": 0.0479, + "step": 16925 + }, + { + "epoch": 11.38, + "grad_norm": 2.61069655418396, + "learning_rate": 8.347537688442212e-06, + "loss": 0.0473, + "step": 16950 + }, + { + "epoch": 11.4, + "grad_norm": 2.487173080444336, + "learning_rate": 8.345025125628141e-06, + "loss": 0.05, + "step": 16975 + }, + { + "epoch": 11.42, + "grad_norm": 2.5279154777526855, + "learning_rate": 8.34251256281407e-06, + "loss": 0.0495, + "step": 17000 + }, + { + "epoch": 11.42, + "eval_loss": 0.08420177549123764, + "eval_runtime": 944.1019, + "eval_samples_per_second": 1.523, + "eval_steps_per_second": 1.523, + "eval_wer": 38.27960756832516, + "step": 17000 + }, + { + "epoch": 11.43, + "grad_norm": 2.383618116378784, + "learning_rate": 8.34e-06, + "loss": 0.0477, + "step": 17025 + }, + { + "epoch": 11.45, + "grad_norm": 2.7168374061584473, + "learning_rate": 8.33748743718593e-06, + "loss": 0.0489, + "step": 17050 + }, + { + "epoch": 11.47, + "grad_norm": 2.2538793087005615, + "learning_rate": 8.33497487437186e-06, + "loss": 0.0476, + "step": 17075 + }, + { + "epoch": 11.48, + "grad_norm": 2.3224775791168213, + "learning_rate": 8.33246231155779e-06, + "loss": 0.048, + "step": 17100 + }, + { + "epoch": 11.5, + "grad_norm": 2.3509902954101562, + "learning_rate": 8.32994974874372e-06, + "loss": 0.0489, + "step": 17125 + }, + { + "epoch": 11.52, + "grad_norm": 2.6135170459747314, + "learning_rate": 8.327437185929648e-06, + "loss": 0.0478, + "step": 17150 + }, + { + "epoch": 11.53, + "grad_norm": 2.5049493312835693, + "learning_rate": 8.324924623115579e-06, + "loss": 0.0471, + "step": 17175 + }, + { + "epoch": 11.55, + "grad_norm": 2.4074366092681885, + "learning_rate": 8.322412060301508e-06, + "loss": 0.0481, + "step": 17200 + }, + { + "epoch": 11.57, + "grad_norm": 2.9850168228149414, + "learning_rate": 8.319899497487438e-06, + "loss": 0.0471, + "step": 17225 + }, + { + "epoch": 11.58, + "grad_norm": 2.18269681930542, + "learning_rate": 8.317386934673367e-06, + "loss": 0.048, + "step": 17250 + }, + { + "epoch": 11.6, + "grad_norm": 2.563279867172241, + "learning_rate": 8.314874371859298e-06, + "loss": 0.047, + "step": 17275 + }, + { + "epoch": 11.62, + "grad_norm": 2.5818583965301514, + "learning_rate": 8.312361809045226e-06, + "loss": 0.0483, + "step": 17300 + }, + { + "epoch": 11.64, + "grad_norm": 2.9220376014709473, + "learning_rate": 8.309849246231157e-06, + "loss": 0.0489, + "step": 17325 + }, + { + "epoch": 11.65, + "grad_norm": 2.416315793991089, + "learning_rate": 8.307336683417086e-06, + "loss": 0.0489, + "step": 17350 + }, + { + "epoch": 11.67, + "grad_norm": 2.442135810852051, + "learning_rate": 8.304824120603015e-06, + "loss": 0.0511, + "step": 17375 + }, + { + "epoch": 11.69, + "grad_norm": 2.347637891769409, + "learning_rate": 8.302311557788946e-06, + "loss": 0.0477, + "step": 17400 + }, + { + "epoch": 11.7, + "grad_norm": 2.8717072010040283, + "learning_rate": 8.299798994974874e-06, + "loss": 0.0485, + "step": 17425 + }, + { + "epoch": 11.72, + "grad_norm": 2.1800575256347656, + "learning_rate": 8.297286432160805e-06, + "loss": 0.0478, + "step": 17450 + }, + { + "epoch": 11.74, + "grad_norm": 2.239169120788574, + "learning_rate": 8.294773869346734e-06, + "loss": 0.0481, + "step": 17475 + }, + { + "epoch": 11.75, + "grad_norm": 2.5761425495147705, + "learning_rate": 8.292261306532664e-06, + "loss": 0.0504, + "step": 17500 + }, + { + "epoch": 11.77, + "grad_norm": 2.475437641143799, + "learning_rate": 8.289748743718595e-06, + "loss": 0.0489, + "step": 17525 + }, + { + "epoch": 11.79, + "grad_norm": 2.5709142684936523, + "learning_rate": 8.287236180904524e-06, + "loss": 0.0487, + "step": 17550 + }, + { + "epoch": 11.8, + "grad_norm": 2.7514986991882324, + "learning_rate": 8.284723618090453e-06, + "loss": 0.0469, + "step": 17575 + }, + { + "epoch": 11.82, + "grad_norm": 3.034162759780884, + "learning_rate": 8.282211055276383e-06, + "loss": 0.0498, + "step": 17600 + }, + { + "epoch": 11.84, + "grad_norm": 2.3932578563690186, + "learning_rate": 8.279698492462312e-06, + "loss": 0.048, + "step": 17625 + }, + { + "epoch": 11.85, + "grad_norm": 2.710620403289795, + "learning_rate": 8.277185929648241e-06, + "loss": 0.0498, + "step": 17650 + }, + { + "epoch": 11.87, + "grad_norm": 2.268237590789795, + "learning_rate": 8.274673366834172e-06, + "loss": 0.0489, + "step": 17675 + }, + { + "epoch": 11.89, + "grad_norm": 2.4965016841888428, + "learning_rate": 8.2721608040201e-06, + "loss": 0.0497, + "step": 17700 + }, + { + "epoch": 11.9, + "grad_norm": 2.4899039268493652, + "learning_rate": 8.269648241206031e-06, + "loss": 0.0496, + "step": 17725 + }, + { + "epoch": 11.92, + "grad_norm": 2.6754138469696045, + "learning_rate": 8.267236180904523e-06, + "loss": 0.0499, + "step": 17750 + }, + { + "epoch": 11.94, + "grad_norm": 2.5295770168304443, + "learning_rate": 8.264723618090454e-06, + "loss": 0.0501, + "step": 17775 + }, + { + "epoch": 11.95, + "grad_norm": 2.402968406677246, + "learning_rate": 8.262211055276383e-06, + "loss": 0.0482, + "step": 17800 + }, + { + "epoch": 11.97, + "grad_norm": 2.4143357276916504, + "learning_rate": 8.259698492462312e-06, + "loss": 0.0511, + "step": 17825 + }, + { + "epoch": 11.99, + "grad_norm": 2.4003748893737793, + "learning_rate": 8.257185929648242e-06, + "loss": 0.0488, + "step": 17850 + }, + { + "epoch": 12.0, + "grad_norm": 2.1996049880981445, + "learning_rate": 8.25467336683417e-06, + "loss": 0.0479, + "step": 17875 + }, + { + "epoch": 12.02, + "grad_norm": 2.3562393188476562, + "learning_rate": 8.2521608040201e-06, + "loss": 0.0406, + "step": 17900 + }, + { + "epoch": 12.04, + "grad_norm": 2.7702817916870117, + "learning_rate": 8.249648241206031e-06, + "loss": 0.0407, + "step": 17925 + }, + { + "epoch": 12.06, + "grad_norm": 2.107628345489502, + "learning_rate": 8.24713567839196e-06, + "loss": 0.0407, + "step": 17950 + }, + { + "epoch": 12.07, + "grad_norm": 2.395982027053833, + "learning_rate": 8.24462311557789e-06, + "loss": 0.041, + "step": 17975 + }, + { + "epoch": 12.09, + "grad_norm": 2.1188511848449707, + "learning_rate": 8.24211055276382e-06, + "loss": 0.0391, + "step": 18000 + }, + { + "epoch": 12.09, + "eval_loss": 0.08676249533891678, + "eval_runtime": 950.6783, + "eval_samples_per_second": 1.513, + "eval_steps_per_second": 1.513, + "eval_wer": 37.70147161878066, + "step": 18000 + }, + { + "epoch": 12.11, + "grad_norm": 2.2308688163757324, + "learning_rate": 8.239597989949748e-06, + "loss": 0.04, + "step": 18025 + }, + { + "epoch": 12.12, + "grad_norm": 2.6389808654785156, + "learning_rate": 8.23708542713568e-06, + "loss": 0.0421, + "step": 18050 + }, + { + "epoch": 12.14, + "grad_norm": 2.655792713165283, + "learning_rate": 8.234572864321609e-06, + "loss": 0.0413, + "step": 18075 + }, + { + "epoch": 12.16, + "grad_norm": 2.614919662475586, + "learning_rate": 8.232060301507538e-06, + "loss": 0.0416, + "step": 18100 + }, + { + "epoch": 12.17, + "grad_norm": 2.929002285003662, + "learning_rate": 8.229547738693467e-06, + "loss": 0.0409, + "step": 18125 + }, + { + "epoch": 12.19, + "grad_norm": 2.468407392501831, + "learning_rate": 8.227035175879397e-06, + "loss": 0.0391, + "step": 18150 + }, + { + "epoch": 12.21, + "grad_norm": 2.2625582218170166, + "learning_rate": 8.224522613065328e-06, + "loss": 0.0427, + "step": 18175 + }, + { + "epoch": 12.22, + "grad_norm": 2.3319356441497803, + "learning_rate": 8.222010050251257e-06, + "loss": 0.0409, + "step": 18200 + }, + { + "epoch": 12.24, + "grad_norm": 2.662274122238159, + "learning_rate": 8.219497487437186e-06, + "loss": 0.0417, + "step": 18225 + }, + { + "epoch": 12.26, + "grad_norm": 2.637533187866211, + "learning_rate": 8.216984924623116e-06, + "loss": 0.0418, + "step": 18250 + }, + { + "epoch": 12.27, + "grad_norm": 2.462749481201172, + "learning_rate": 8.214472361809047e-06, + "loss": 0.0431, + "step": 18275 + }, + { + "epoch": 12.29, + "grad_norm": 2.2730648517608643, + "learning_rate": 8.211959798994974e-06, + "loss": 0.0421, + "step": 18300 + }, + { + "epoch": 12.31, + "grad_norm": 2.62898850440979, + "learning_rate": 8.209447236180905e-06, + "loss": 0.0425, + "step": 18325 + }, + { + "epoch": 12.32, + "grad_norm": 2.405099391937256, + "learning_rate": 8.206934673366835e-06, + "loss": 0.0409, + "step": 18350 + }, + { + "epoch": 12.34, + "grad_norm": 2.4923086166381836, + "learning_rate": 8.204422110552764e-06, + "loss": 0.043, + "step": 18375 + }, + { + "epoch": 12.36, + "grad_norm": 2.3909108638763428, + "learning_rate": 8.201909547738695e-06, + "loss": 0.0437, + "step": 18400 + }, + { + "epoch": 12.37, + "grad_norm": 2.163416862487793, + "learning_rate": 8.199396984924623e-06, + "loss": 0.0411, + "step": 18425 + }, + { + "epoch": 12.39, + "grad_norm": 2.6367242336273193, + "learning_rate": 8.196884422110554e-06, + "loss": 0.0423, + "step": 18450 + }, + { + "epoch": 12.41, + "grad_norm": 2.4411003589630127, + "learning_rate": 8.194371859296483e-06, + "loss": 0.0417, + "step": 18475 + }, + { + "epoch": 12.42, + "grad_norm": 2.217989921569824, + "learning_rate": 8.191859296482412e-06, + "loss": 0.0426, + "step": 18500 + }, + { + "epoch": 12.44, + "grad_norm": 2.399442195892334, + "learning_rate": 8.189346733668342e-06, + "loss": 0.043, + "step": 18525 + }, + { + "epoch": 12.46, + "grad_norm": 2.2447445392608643, + "learning_rate": 8.186834170854273e-06, + "loss": 0.0418, + "step": 18550 + }, + { + "epoch": 12.47, + "grad_norm": 2.6561951637268066, + "learning_rate": 8.184321608040202e-06, + "loss": 0.0432, + "step": 18575 + }, + { + "epoch": 12.49, + "grad_norm": 2.832292318344116, + "learning_rate": 8.181809045226131e-06, + "loss": 0.044, + "step": 18600 + }, + { + "epoch": 12.51, + "grad_norm": 2.4265310764312744, + "learning_rate": 8.17929648241206e-06, + "loss": 0.0419, + "step": 18625 + }, + { + "epoch": 12.53, + "grad_norm": 2.6496670246124268, + "learning_rate": 8.17678391959799e-06, + "loss": 0.0434, + "step": 18650 + }, + { + "epoch": 12.54, + "grad_norm": 2.6289494037628174, + "learning_rate": 8.174271356783921e-06, + "loss": 0.0433, + "step": 18675 + }, + { + "epoch": 12.56, + "grad_norm": 2.4116311073303223, + "learning_rate": 8.171758793969849e-06, + "loss": 0.0432, + "step": 18700 + }, + { + "epoch": 12.58, + "grad_norm": 2.5447356700897217, + "learning_rate": 8.16924623115578e-06, + "loss": 0.0447, + "step": 18725 + }, + { + "epoch": 12.59, + "grad_norm": 2.356588125228882, + "learning_rate": 8.166733668341709e-06, + "loss": 0.0428, + "step": 18750 + }, + { + "epoch": 12.61, + "grad_norm": 2.3651928901672363, + "learning_rate": 8.164221105527638e-06, + "loss": 0.0422, + "step": 18775 + }, + { + "epoch": 12.63, + "grad_norm": 2.5740935802459717, + "learning_rate": 8.16170854271357e-06, + "loss": 0.0446, + "step": 18800 + }, + { + "epoch": 12.64, + "grad_norm": 2.7281911373138428, + "learning_rate": 8.159195979899499e-06, + "loss": 0.0435, + "step": 18825 + }, + { + "epoch": 12.66, + "grad_norm": 2.4611990451812744, + "learning_rate": 8.156683417085428e-06, + "loss": 0.0457, + "step": 18850 + }, + { + "epoch": 12.68, + "grad_norm": 2.712991237640381, + "learning_rate": 8.154170854271357e-06, + "loss": 0.0429, + "step": 18875 + }, + { + "epoch": 12.69, + "grad_norm": 2.493391275405884, + "learning_rate": 8.151658291457287e-06, + "loss": 0.043, + "step": 18900 + }, + { + "epoch": 12.71, + "grad_norm": 2.752633810043335, + "learning_rate": 8.149145728643216e-06, + "loss": 0.0448, + "step": 18925 + }, + { + "epoch": 12.73, + "grad_norm": 2.8290460109710693, + "learning_rate": 8.146633165829147e-06, + "loss": 0.0429, + "step": 18950 + }, + { + "epoch": 12.74, + "grad_norm": 2.81404972076416, + "learning_rate": 8.144120603015076e-06, + "loss": 0.0424, + "step": 18975 + }, + { + "epoch": 12.76, + "grad_norm": 2.9670522212982178, + "learning_rate": 8.141608040201006e-06, + "loss": 0.0422, + "step": 19000 + }, + { + "epoch": 12.76, + "eval_loss": 0.08924829959869385, + "eval_runtime": 955.1852, + "eval_samples_per_second": 1.505, + "eval_steps_per_second": 1.505, + "eval_wer": 38.411002102312544, + "step": 19000 + }, + { + "epoch": 12.78, + "grad_norm": 2.2908473014831543, + "learning_rate": 8.139095477386935e-06, + "loss": 0.0434, + "step": 19025 + }, + { + "epoch": 12.79, + "grad_norm": 2.7769930362701416, + "learning_rate": 8.136582914572864e-06, + "loss": 0.0427, + "step": 19050 + }, + { + "epoch": 12.81, + "grad_norm": 2.22310471534729, + "learning_rate": 8.134070351758795e-06, + "loss": 0.0441, + "step": 19075 + }, + { + "epoch": 12.83, + "grad_norm": 2.597101926803589, + "learning_rate": 8.131557788944725e-06, + "loss": 0.0454, + "step": 19100 + }, + { + "epoch": 12.84, + "grad_norm": 2.368563413619995, + "learning_rate": 8.129045226130654e-06, + "loss": 0.0437, + "step": 19125 + }, + { + "epoch": 12.86, + "grad_norm": 2.6715641021728516, + "learning_rate": 8.126532663316583e-06, + "loss": 0.0427, + "step": 19150 + }, + { + "epoch": 12.88, + "grad_norm": 2.2254865169525146, + "learning_rate": 8.124020100502513e-06, + "loss": 0.0428, + "step": 19175 + }, + { + "epoch": 12.89, + "grad_norm": 2.636401891708374, + "learning_rate": 8.121507537688444e-06, + "loss": 0.0443, + "step": 19200 + }, + { + "epoch": 12.91, + "grad_norm": 2.5772204399108887, + "learning_rate": 8.118994974874373e-06, + "loss": 0.0432, + "step": 19225 + }, + { + "epoch": 12.93, + "grad_norm": 2.5020699501037598, + "learning_rate": 8.116482412060302e-06, + "loss": 0.0416, + "step": 19250 + }, + { + "epoch": 12.94, + "grad_norm": 3.158520460128784, + "learning_rate": 8.113969849246232e-06, + "loss": 0.0451, + "step": 19275 + }, + { + "epoch": 12.96, + "grad_norm": 2.4919381141662598, + "learning_rate": 8.111457286432161e-06, + "loss": 0.0434, + "step": 19300 + }, + { + "epoch": 12.98, + "grad_norm": 2.604158878326416, + "learning_rate": 8.10894472361809e-06, + "loss": 0.0451, + "step": 19325 + }, + { + "epoch": 13.0, + "grad_norm": 2.432056427001953, + "learning_rate": 8.106432160804021e-06, + "loss": 0.0442, + "step": 19350 + }, + { + "epoch": 13.01, + "grad_norm": 2.7511301040649414, + "learning_rate": 8.10391959798995e-06, + "loss": 0.0385, + "step": 19375 + }, + { + "epoch": 13.03, + "grad_norm": 2.4156081676483154, + "learning_rate": 8.10140703517588e-06, + "loss": 0.0336, + "step": 19400 + }, + { + "epoch": 13.05, + "grad_norm": 2.2334141731262207, + "learning_rate": 8.098894472361811e-06, + "loss": 0.0356, + "step": 19425 + }, + { + "epoch": 13.06, + "grad_norm": 2.1928906440734863, + "learning_rate": 8.096381909547739e-06, + "loss": 0.036, + "step": 19450 + }, + { + "epoch": 13.08, + "grad_norm": 2.1651523113250732, + "learning_rate": 8.09386934673367e-06, + "loss": 0.0369, + "step": 19475 + }, + { + "epoch": 13.1, + "grad_norm": 2.4520890712738037, + "learning_rate": 8.091356783919599e-06, + "loss": 0.0363, + "step": 19500 + }, + { + "epoch": 13.11, + "grad_norm": 2.5533361434936523, + "learning_rate": 8.088844221105528e-06, + "loss": 0.036, + "step": 19525 + }, + { + "epoch": 13.13, + "grad_norm": 2.108170986175537, + "learning_rate": 8.086331658291458e-06, + "loss": 0.0364, + "step": 19550 + }, + { + "epoch": 13.15, + "grad_norm": 2.758329391479492, + "learning_rate": 8.083819095477387e-06, + "loss": 0.0377, + "step": 19575 + }, + { + "epoch": 13.16, + "grad_norm": 2.817915439605713, + "learning_rate": 8.081306532663318e-06, + "loss": 0.0373, + "step": 19600 + }, + { + "epoch": 13.18, + "grad_norm": 2.287584066390991, + "learning_rate": 8.078793969849247e-06, + "loss": 0.0353, + "step": 19625 + }, + { + "epoch": 13.2, + "grad_norm": 2.5290281772613525, + "learning_rate": 8.076281407035177e-06, + "loss": 0.0369, + "step": 19650 + }, + { + "epoch": 13.21, + "grad_norm": 2.33791184425354, + "learning_rate": 8.073768844221106e-06, + "loss": 0.0376, + "step": 19675 + }, + { + "epoch": 13.23, + "grad_norm": 2.299699544906616, + "learning_rate": 8.071256281407037e-06, + "loss": 0.0358, + "step": 19700 + }, + { + "epoch": 13.25, + "grad_norm": 2.591982841491699, + "learning_rate": 8.068743718592964e-06, + "loss": 0.0376, + "step": 19725 + }, + { + "epoch": 13.26, + "grad_norm": 2.7202000617980957, + "learning_rate": 8.066231155778895e-06, + "loss": 0.0379, + "step": 19750 + }, + { + "epoch": 13.28, + "grad_norm": 2.5300393104553223, + "learning_rate": 8.063718592964825e-06, + "loss": 0.0356, + "step": 19775 + }, + { + "epoch": 13.3, + "grad_norm": 2.203085422515869, + "learning_rate": 8.061206030150754e-06, + "loss": 0.0377, + "step": 19800 + }, + { + "epoch": 13.31, + "grad_norm": 2.3006625175476074, + "learning_rate": 8.058693467336685e-06, + "loss": 0.0373, + "step": 19825 + }, + { + "epoch": 13.33, + "grad_norm": 2.495436191558838, + "learning_rate": 8.056180904522613e-06, + "loss": 0.0363, + "step": 19850 + }, + { + "epoch": 13.35, + "grad_norm": 2.682196617126465, + "learning_rate": 8.053668341708544e-06, + "loss": 0.0385, + "step": 19875 + }, + { + "epoch": 13.36, + "grad_norm": 2.494319438934326, + "learning_rate": 8.051155778894473e-06, + "loss": 0.0371, + "step": 19900 + }, + { + "epoch": 13.38, + "grad_norm": 2.0060739517211914, + "learning_rate": 8.048643216080402e-06, + "loss": 0.0365, + "step": 19925 + }, + { + "epoch": 13.4, + "grad_norm": 2.2405190467834473, + "learning_rate": 8.046130653266332e-06, + "loss": 0.0367, + "step": 19950 + }, + { + "epoch": 13.42, + "grad_norm": 2.3366434574127197, + "learning_rate": 8.043618090452263e-06, + "loss": 0.0381, + "step": 19975 + }, + { + "epoch": 13.43, + "grad_norm": 2.352342367172241, + "learning_rate": 8.04110552763819e-06, + "loss": 0.0359, + "step": 20000 + }, + { + "epoch": 13.43, + "eval_loss": 0.09295900166034698, + "eval_runtime": 945.1259, + "eval_samples_per_second": 1.521, + "eval_steps_per_second": 1.521, + "eval_wer": 38.01681850035038, + "step": 20000 + }, + { + "epoch": 13.45, + "grad_norm": 2.7983577251434326, + "learning_rate": 8.038592964824121e-06, + "loss": 0.037, + "step": 20025 + }, + { + "epoch": 13.47, + "grad_norm": 2.7423253059387207, + "learning_rate": 8.03608040201005e-06, + "loss": 0.0386, + "step": 20050 + }, + { + "epoch": 13.48, + "grad_norm": 2.5691864490509033, + "learning_rate": 8.03356783919598e-06, + "loss": 0.0378, + "step": 20075 + }, + { + "epoch": 13.5, + "grad_norm": 2.373624324798584, + "learning_rate": 8.031055276381911e-06, + "loss": 0.0394, + "step": 20100 + }, + { + "epoch": 13.52, + "grad_norm": 2.4439280033111572, + "learning_rate": 8.028542713567839e-06, + "loss": 0.0381, + "step": 20125 + }, + { + "epoch": 13.53, + "grad_norm": 2.6071155071258545, + "learning_rate": 8.02603015075377e-06, + "loss": 0.0381, + "step": 20150 + }, + { + "epoch": 13.55, + "grad_norm": 2.6824278831481934, + "learning_rate": 8.023517587939699e-06, + "loss": 0.0381, + "step": 20175 + }, + { + "epoch": 13.57, + "grad_norm": 3.054227828979492, + "learning_rate": 8.021005025125628e-06, + "loss": 0.0379, + "step": 20200 + }, + { + "epoch": 13.58, + "grad_norm": 2.1759233474731445, + "learning_rate": 8.01849246231156e-06, + "loss": 0.0369, + "step": 20225 + }, + { + "epoch": 13.6, + "grad_norm": 2.484517812728882, + "learning_rate": 8.015979899497489e-06, + "loss": 0.0373, + "step": 20250 + }, + { + "epoch": 13.62, + "grad_norm": 2.475522756576538, + "learning_rate": 8.013467336683418e-06, + "loss": 0.037, + "step": 20275 + }, + { + "epoch": 13.63, + "grad_norm": 2.6449432373046875, + "learning_rate": 8.010954773869347e-06, + "loss": 0.038, + "step": 20300 + }, + { + "epoch": 13.65, + "grad_norm": 2.3792076110839844, + "learning_rate": 8.008442211055277e-06, + "loss": 0.0366, + "step": 20325 + }, + { + "epoch": 13.67, + "grad_norm": 2.5500106811523438, + "learning_rate": 8.005929648241206e-06, + "loss": 0.0377, + "step": 20350 + }, + { + "epoch": 13.68, + "grad_norm": 2.8283488750457764, + "learning_rate": 8.003417085427137e-06, + "loss": 0.0379, + "step": 20375 + }, + { + "epoch": 13.7, + "grad_norm": 2.622807264328003, + "learning_rate": 8.000904522613065e-06, + "loss": 0.039, + "step": 20400 + }, + { + "epoch": 13.72, + "grad_norm": 2.7982845306396484, + "learning_rate": 7.998391959798996e-06, + "loss": 0.0394, + "step": 20425 + }, + { + "epoch": 13.73, + "grad_norm": 2.711775779724121, + "learning_rate": 7.995879396984925e-06, + "loss": 0.0395, + "step": 20450 + }, + { + "epoch": 13.75, + "grad_norm": 2.3508825302124023, + "learning_rate": 7.993366834170854e-06, + "loss": 0.0378, + "step": 20475 + }, + { + "epoch": 13.77, + "grad_norm": 2.2336623668670654, + "learning_rate": 7.990854271356785e-06, + "loss": 0.0388, + "step": 20500 + }, + { + "epoch": 13.78, + "grad_norm": 2.2531332969665527, + "learning_rate": 7.988341708542715e-06, + "loss": 0.0371, + "step": 20525 + }, + { + "epoch": 13.8, + "grad_norm": 2.659705638885498, + "learning_rate": 7.985829145728644e-06, + "loss": 0.0363, + "step": 20550 + }, + { + "epoch": 13.82, + "grad_norm": 2.4200754165649414, + "learning_rate": 7.983316582914573e-06, + "loss": 0.0394, + "step": 20575 + }, + { + "epoch": 13.83, + "grad_norm": 2.7003097534179688, + "learning_rate": 7.980804020100503e-06, + "loss": 0.0372, + "step": 20600 + }, + { + "epoch": 13.85, + "grad_norm": 2.5731041431427, + "learning_rate": 7.978291457286432e-06, + "loss": 0.0376, + "step": 20625 + }, + { + "epoch": 13.87, + "grad_norm": 2.343764543533325, + "learning_rate": 7.975778894472363e-06, + "loss": 0.0382, + "step": 20650 + }, + { + "epoch": 13.89, + "grad_norm": 2.402113914489746, + "learning_rate": 7.973266331658292e-06, + "loss": 0.038, + "step": 20675 + }, + { + "epoch": 13.9, + "grad_norm": 2.6608850955963135, + "learning_rate": 7.970753768844222e-06, + "loss": 0.0395, + "step": 20700 + }, + { + "epoch": 13.92, + "grad_norm": 2.682255744934082, + "learning_rate": 7.968241206030151e-06, + "loss": 0.0379, + "step": 20725 + }, + { + "epoch": 13.94, + "grad_norm": 2.935173273086548, + "learning_rate": 7.96572864321608e-06, + "loss": 0.0387, + "step": 20750 + }, + { + "epoch": 13.95, + "grad_norm": 2.7305967807769775, + "learning_rate": 7.963216080402011e-06, + "loss": 0.0384, + "step": 20775 + }, + { + "epoch": 13.97, + "grad_norm": 2.341463327407837, + "learning_rate": 7.96070351758794e-06, + "loss": 0.0383, + "step": 20800 + }, + { + "epoch": 13.99, + "grad_norm": 2.2973952293395996, + "learning_rate": 7.95819095477387e-06, + "loss": 0.0393, + "step": 20825 + }, + { + "epoch": 14.0, + "grad_norm": 2.16302227973938, + "learning_rate": 7.955678391959801e-06, + "loss": 0.0371, + "step": 20850 + }, + { + "epoch": 14.02, + "grad_norm": 2.322252035140991, + "learning_rate": 7.953165829145729e-06, + "loss": 0.0318, + "step": 20875 + }, + { + "epoch": 14.04, + "grad_norm": 2.2239387035369873, + "learning_rate": 7.95065326633166e-06, + "loss": 0.0311, + "step": 20900 + }, + { + "epoch": 14.05, + "grad_norm": 2.3184633255004883, + "learning_rate": 7.948140703517589e-06, + "loss": 0.0312, + "step": 20925 + }, + { + "epoch": 14.07, + "grad_norm": 2.35105037689209, + "learning_rate": 7.945628140703518e-06, + "loss": 0.0305, + "step": 20950 + }, + { + "epoch": 14.09, + "grad_norm": 2.3514328002929688, + "learning_rate": 7.943115577889448e-06, + "loss": 0.0318, + "step": 20975 + }, + { + "epoch": 14.1, + "grad_norm": 2.480410099029541, + "learning_rate": 7.940603015075377e-06, + "loss": 0.0314, + "step": 21000 + }, + { + "epoch": 14.1, + "eval_loss": 0.09592559933662415, + "eval_runtime": 943.5426, + "eval_samples_per_second": 1.524, + "eval_steps_per_second": 1.524, + "eval_wer": 37.570077084793276, + "step": 21000 + }, + { + "epoch": 14.12, + "grad_norm": 2.5358049869537354, + "learning_rate": 7.938090452261306e-06, + "loss": 0.0317, + "step": 21025 + }, + { + "epoch": 14.14, + "grad_norm": 2.5162100791931152, + "learning_rate": 7.935577889447237e-06, + "loss": 0.0307, + "step": 21050 + }, + { + "epoch": 14.15, + "grad_norm": 2.27839994430542, + "learning_rate": 7.933065326633167e-06, + "loss": 0.0318, + "step": 21075 + }, + { + "epoch": 14.17, + "grad_norm": 2.642482042312622, + "learning_rate": 7.930552763819096e-06, + "loss": 0.0323, + "step": 21100 + }, + { + "epoch": 14.19, + "grad_norm": 2.2753946781158447, + "learning_rate": 7.928040201005027e-06, + "loss": 0.0315, + "step": 21125 + }, + { + "epoch": 14.2, + "grad_norm": 2.8006997108459473, + "learning_rate": 7.925527638190955e-06, + "loss": 0.0328, + "step": 21150 + }, + { + "epoch": 14.22, + "grad_norm": 2.847626209259033, + "learning_rate": 7.923015075376886e-06, + "loss": 0.0318, + "step": 21175 + }, + { + "epoch": 14.24, + "grad_norm": 2.722684621810913, + "learning_rate": 7.920502512562815e-06, + "loss": 0.0318, + "step": 21200 + }, + { + "epoch": 14.25, + "grad_norm": 2.292083978652954, + "learning_rate": 7.917989949748744e-06, + "loss": 0.0322, + "step": 21225 + }, + { + "epoch": 14.27, + "grad_norm": 2.461219310760498, + "learning_rate": 7.915477386934674e-06, + "loss": 0.0319, + "step": 21250 + }, + { + "epoch": 14.29, + "grad_norm": 2.5110058784484863, + "learning_rate": 7.912964824120603e-06, + "loss": 0.0324, + "step": 21275 + }, + { + "epoch": 14.3, + "grad_norm": 2.1377546787261963, + "learning_rate": 7.910452261306534e-06, + "loss": 0.0315, + "step": 21300 + }, + { + "epoch": 14.32, + "grad_norm": 2.3189284801483154, + "learning_rate": 7.907939698492463e-06, + "loss": 0.0339, + "step": 21325 + }, + { + "epoch": 14.34, + "grad_norm": 2.116954803466797, + "learning_rate": 7.905427135678393e-06, + "loss": 0.0345, + "step": 21350 + }, + { + "epoch": 14.36, + "grad_norm": 2.1134145259857178, + "learning_rate": 7.902914572864322e-06, + "loss": 0.0321, + "step": 21375 + }, + { + "epoch": 14.37, + "grad_norm": 2.318699359893799, + "learning_rate": 7.900402010050253e-06, + "loss": 0.0335, + "step": 21400 + }, + { + "epoch": 14.39, + "grad_norm": 2.424032211303711, + "learning_rate": 7.89788944723618e-06, + "loss": 0.0318, + "step": 21425 + }, + { + "epoch": 14.41, + "grad_norm": 2.271960735321045, + "learning_rate": 7.895376884422111e-06, + "loss": 0.0334, + "step": 21450 + }, + { + "epoch": 14.42, + "grad_norm": 2.7182435989379883, + "learning_rate": 7.89286432160804e-06, + "loss": 0.0335, + "step": 21475 + }, + { + "epoch": 14.44, + "grad_norm": 2.4806132316589355, + "learning_rate": 7.89035175879397e-06, + "loss": 0.032, + "step": 21500 + }, + { + "epoch": 14.46, + "grad_norm": 2.322605609893799, + "learning_rate": 7.887839195979901e-06, + "loss": 0.0314, + "step": 21525 + }, + { + "epoch": 14.47, + "grad_norm": 2.24501633644104, + "learning_rate": 7.885326633165829e-06, + "loss": 0.0331, + "step": 21550 + }, + { + "epoch": 14.49, + "grad_norm": 2.5784947872161865, + "learning_rate": 7.88281407035176e-06, + "loss": 0.0331, + "step": 21575 + }, + { + "epoch": 14.51, + "grad_norm": 2.280972957611084, + "learning_rate": 7.880301507537689e-06, + "loss": 0.0344, + "step": 21600 + }, + { + "epoch": 14.52, + "grad_norm": 2.2095541954040527, + "learning_rate": 7.877788944723618e-06, + "loss": 0.0325, + "step": 21625 + }, + { + "epoch": 14.54, + "grad_norm": 2.238990068435669, + "learning_rate": 7.875276381909548e-06, + "loss": 0.032, + "step": 21650 + }, + { + "epoch": 14.56, + "grad_norm": 2.205785036087036, + "learning_rate": 7.872763819095479e-06, + "loss": 0.0338, + "step": 21675 + }, + { + "epoch": 14.57, + "grad_norm": 2.3384366035461426, + "learning_rate": 7.870251256281408e-06, + "loss": 0.0321, + "step": 21700 + }, + { + "epoch": 14.59, + "grad_norm": 2.450470209121704, + "learning_rate": 7.867738693467337e-06, + "loss": 0.0322, + "step": 21725 + }, + { + "epoch": 14.61, + "grad_norm": 2.680314302444458, + "learning_rate": 7.865226130653267e-06, + "loss": 0.0349, + "step": 21750 + }, + { + "epoch": 14.62, + "grad_norm": 2.2746779918670654, + "learning_rate": 7.862713567839196e-06, + "loss": 0.0332, + "step": 21775 + }, + { + "epoch": 14.64, + "grad_norm": 2.3216638565063477, + "learning_rate": 7.86030150753769e-06, + "loss": 0.0327, + "step": 21800 + }, + { + "epoch": 14.66, + "grad_norm": 2.2493793964385986, + "learning_rate": 7.857788944723619e-06, + "loss": 0.0331, + "step": 21825 + }, + { + "epoch": 14.67, + "grad_norm": 2.3709781169891357, + "learning_rate": 7.855276381909548e-06, + "loss": 0.0331, + "step": 21850 + }, + { + "epoch": 14.69, + "grad_norm": 2.499533176422119, + "learning_rate": 7.852763819095477e-06, + "loss": 0.0326, + "step": 21875 + }, + { + "epoch": 14.71, + "grad_norm": 2.5715808868408203, + "learning_rate": 7.850251256281408e-06, + "loss": 0.0336, + "step": 21900 + }, + { + "epoch": 14.72, + "grad_norm": 2.55898380279541, + "learning_rate": 7.847738693467338e-06, + "loss": 0.0328, + "step": 21925 + }, + { + "epoch": 14.74, + "grad_norm": 2.4711601734161377, + "learning_rate": 7.845226130653267e-06, + "loss": 0.0324, + "step": 21950 + }, + { + "epoch": 14.76, + "grad_norm": 2.296672821044922, + "learning_rate": 7.842713567839196e-06, + "loss": 0.0327, + "step": 21975 + }, + { + "epoch": 14.78, + "grad_norm": 2.247776508331299, + "learning_rate": 7.840201005025127e-06, + "loss": 0.033, + "step": 22000 + }, + { + "epoch": 14.78, + "eval_loss": 0.09800489991903305, + "eval_runtime": 948.3267, + "eval_samples_per_second": 1.516, + "eval_steps_per_second": 1.516, + "eval_wer": 36.974421864050456, + "step": 22000 + }, + { + "epoch": 14.79, + "grad_norm": 2.339890956878662, + "learning_rate": 7.837688442211055e-06, + "loss": 0.032, + "step": 22025 + }, + { + "epoch": 14.81, + "grad_norm": 2.305504322052002, + "learning_rate": 7.835175879396986e-06, + "loss": 0.0335, + "step": 22050 + }, + { + "epoch": 14.83, + "grad_norm": 2.2204623222351074, + "learning_rate": 7.832663316582915e-06, + "loss": 0.0341, + "step": 22075 + }, + { + "epoch": 14.84, + "grad_norm": 2.3129026889801025, + "learning_rate": 7.830150753768845e-06, + "loss": 0.0324, + "step": 22100 + }, + { + "epoch": 14.86, + "grad_norm": 2.556502342224121, + "learning_rate": 7.827638190954776e-06, + "loss": 0.0348, + "step": 22125 + }, + { + "epoch": 14.88, + "grad_norm": 2.7629687786102295, + "learning_rate": 7.825125628140703e-06, + "loss": 0.0338, + "step": 22150 + }, + { + "epoch": 14.89, + "grad_norm": 2.4208903312683105, + "learning_rate": 7.822613065326634e-06, + "loss": 0.0341, + "step": 22175 + }, + { + "epoch": 14.91, + "grad_norm": 2.677117347717285, + "learning_rate": 7.820100502512564e-06, + "loss": 0.0331, + "step": 22200 + }, + { + "epoch": 14.93, + "grad_norm": 2.5515668392181396, + "learning_rate": 7.817587939698493e-06, + "loss": 0.0337, + "step": 22225 + }, + { + "epoch": 14.94, + "grad_norm": 2.599891185760498, + "learning_rate": 7.815075376884422e-06, + "loss": 0.0326, + "step": 22250 + }, + { + "epoch": 14.96, + "grad_norm": 2.7202401161193848, + "learning_rate": 7.812562814070353e-06, + "loss": 0.0333, + "step": 22275 + }, + { + "epoch": 14.98, + "grad_norm": 2.6786723136901855, + "learning_rate": 7.810050251256283e-06, + "loss": 0.035, + "step": 22300 + }, + { + "epoch": 14.99, + "grad_norm": 2.3007864952087402, + "learning_rate": 7.807537688442212e-06, + "loss": 0.0327, + "step": 22325 + }, + { + "epoch": 15.01, + "grad_norm": 1.9121522903442383, + "learning_rate": 7.805025125628141e-06, + "loss": 0.0274, + "step": 22350 + }, + { + "epoch": 15.03, + "grad_norm": 2.48038649559021, + "learning_rate": 7.80251256281407e-06, + "loss": 0.0274, + "step": 22375 + }, + { + "epoch": 15.04, + "grad_norm": 2.2166085243225098, + "learning_rate": 7.800000000000002e-06, + "loss": 0.0262, + "step": 22400 + }, + { + "epoch": 15.06, + "grad_norm": 2.224414825439453, + "learning_rate": 7.79748743718593e-06, + "loss": 0.0269, + "step": 22425 + }, + { + "epoch": 15.08, + "grad_norm": 2.150346279144287, + "learning_rate": 7.79497487437186e-06, + "loss": 0.0265, + "step": 22450 + }, + { + "epoch": 15.09, + "grad_norm": 2.3419864177703857, + "learning_rate": 7.79246231155779e-06, + "loss": 0.0274, + "step": 22475 + }, + { + "epoch": 15.11, + "grad_norm": 2.4363386631011963, + "learning_rate": 7.789949748743719e-06, + "loss": 0.0271, + "step": 22500 + }, + { + "epoch": 15.13, + "grad_norm": 1.7033381462097168, + "learning_rate": 7.78743718592965e-06, + "loss": 0.0273, + "step": 22525 + }, + { + "epoch": 15.14, + "grad_norm": 2.5488929748535156, + "learning_rate": 7.78492462311558e-06, + "loss": 0.0275, + "step": 22550 + }, + { + "epoch": 15.16, + "grad_norm": 2.422987699508667, + "learning_rate": 7.782412060301509e-06, + "loss": 0.0272, + "step": 22575 + }, + { + "epoch": 15.18, + "grad_norm": 2.3573038578033447, + "learning_rate": 7.779899497487438e-06, + "loss": 0.0276, + "step": 22600 + }, + { + "epoch": 15.19, + "grad_norm": 2.7141470909118652, + "learning_rate": 7.777386934673367e-06, + "loss": 0.0272, + "step": 22625 + }, + { + "epoch": 15.21, + "grad_norm": 2.489651679992676, + "learning_rate": 7.774874371859296e-06, + "loss": 0.0272, + "step": 22650 + }, + { + "epoch": 15.23, + "grad_norm": 2.4006781578063965, + "learning_rate": 7.772361809045227e-06, + "loss": 0.0277, + "step": 22675 + }, + { + "epoch": 15.25, + "grad_norm": 1.9095662832260132, + "learning_rate": 7.769849246231155e-06, + "loss": 0.0282, + "step": 22700 + }, + { + "epoch": 15.26, + "grad_norm": 2.5083181858062744, + "learning_rate": 7.767336683417086e-06, + "loss": 0.0275, + "step": 22725 + }, + { + "epoch": 15.28, + "grad_norm": 2.201840877532959, + "learning_rate": 7.764824120603015e-06, + "loss": 0.0286, + "step": 22750 + }, + { + "epoch": 15.3, + "grad_norm": 2.3138089179992676, + "learning_rate": 7.762311557788945e-06, + "loss": 0.0283, + "step": 22775 + }, + { + "epoch": 15.31, + "grad_norm": 2.146266460418701, + "learning_rate": 7.759798994974876e-06, + "loss": 0.028, + "step": 22800 + }, + { + "epoch": 15.33, + "grad_norm": 2.4032161235809326, + "learning_rate": 7.757286432160805e-06, + "loss": 0.0273, + "step": 22825 + }, + { + "epoch": 15.35, + "grad_norm": 2.389676570892334, + "learning_rate": 7.754773869346734e-06, + "loss": 0.028, + "step": 22850 + }, + { + "epoch": 15.36, + "grad_norm": 2.2575244903564453, + "learning_rate": 7.752261306532664e-06, + "loss": 0.0281, + "step": 22875 + }, + { + "epoch": 15.38, + "grad_norm": 2.510605812072754, + "learning_rate": 7.749748743718593e-06, + "loss": 0.0282, + "step": 22900 + }, + { + "epoch": 15.4, + "grad_norm": 2.544074058532715, + "learning_rate": 7.747236180904524e-06, + "loss": 0.0296, + "step": 22925 + }, + { + "epoch": 15.41, + "grad_norm": 2.5623300075531006, + "learning_rate": 7.744723618090453e-06, + "loss": 0.0278, + "step": 22950 + }, + { + "epoch": 15.43, + "grad_norm": 2.4195051193237305, + "learning_rate": 7.742211055276383e-06, + "loss": 0.0285, + "step": 22975 + }, + { + "epoch": 15.45, + "grad_norm": 2.2963266372680664, + "learning_rate": 7.739698492462312e-06, + "loss": 0.0285, + "step": 23000 + }, + { + "epoch": 15.45, + "eval_loss": 0.10142482072114944, + "eval_runtime": 936.2833, + "eval_samples_per_second": 1.536, + "eval_steps_per_second": 1.536, + "eval_wer": 36.764190609670635, + "step": 23000 + }, + { + "epoch": 15.46, + "grad_norm": 2.2131433486938477, + "learning_rate": 7.737185929648241e-06, + "loss": 0.0282, + "step": 23025 + }, + { + "epoch": 15.48, + "grad_norm": 2.5363659858703613, + "learning_rate": 7.73467336683417e-06, + "loss": 0.028, + "step": 23050 + }, + { + "epoch": 15.5, + "grad_norm": 2.436103343963623, + "learning_rate": 7.732160804020102e-06, + "loss": 0.0286, + "step": 23075 + }, + { + "epoch": 15.51, + "grad_norm": 2.647289514541626, + "learning_rate": 7.729648241206031e-06, + "loss": 0.0306, + "step": 23100 + }, + { + "epoch": 15.53, + "grad_norm": 2.235051155090332, + "learning_rate": 7.72713567839196e-06, + "loss": 0.0307, + "step": 23125 + }, + { + "epoch": 15.55, + "grad_norm": 2.1627144813537598, + "learning_rate": 7.724623115577891e-06, + "loss": 0.0281, + "step": 23150 + }, + { + "epoch": 15.56, + "grad_norm": 2.1198933124542236, + "learning_rate": 7.722110552763819e-06, + "loss": 0.0294, + "step": 23175 + }, + { + "epoch": 15.58, + "grad_norm": 2.3785107135772705, + "learning_rate": 7.71959798994975e-06, + "loss": 0.0288, + "step": 23200 + }, + { + "epoch": 15.6, + "grad_norm": 2.362521171569824, + "learning_rate": 7.71708542713568e-06, + "loss": 0.0279, + "step": 23225 + }, + { + "epoch": 15.61, + "grad_norm": 2.689411163330078, + "learning_rate": 7.714572864321609e-06, + "loss": 0.028, + "step": 23250 + }, + { + "epoch": 15.63, + "grad_norm": 2.3612396717071533, + "learning_rate": 7.712060301507538e-06, + "loss": 0.0281, + "step": 23275 + }, + { + "epoch": 15.65, + "grad_norm": 2.718661308288574, + "learning_rate": 7.709547738693467e-06, + "loss": 0.0279, + "step": 23300 + }, + { + "epoch": 15.66, + "grad_norm": 2.3702728748321533, + "learning_rate": 7.707035175879397e-06, + "loss": 0.0288, + "step": 23325 + }, + { + "epoch": 15.68, + "grad_norm": 2.525615692138672, + "learning_rate": 7.704522613065328e-06, + "loss": 0.0299, + "step": 23350 + }, + { + "epoch": 15.7, + "grad_norm": 2.99353289604187, + "learning_rate": 7.702010050251257e-06, + "loss": 0.0289, + "step": 23375 + }, + { + "epoch": 15.72, + "grad_norm": 2.498053550720215, + "learning_rate": 7.699497487437186e-06, + "loss": 0.0304, + "step": 23400 + }, + { + "epoch": 15.73, + "grad_norm": 2.5506653785705566, + "learning_rate": 7.696984924623117e-06, + "loss": 0.0287, + "step": 23425 + }, + { + "epoch": 15.75, + "grad_norm": 2.4572904109954834, + "learning_rate": 7.694472361809045e-06, + "loss": 0.0287, + "step": 23450 + }, + { + "epoch": 15.77, + "grad_norm": 2.4675610065460205, + "learning_rate": 7.691959798994976e-06, + "loss": 0.0293, + "step": 23475 + }, + { + "epoch": 15.78, + "grad_norm": 2.5318410396575928, + "learning_rate": 7.689447236180905e-06, + "loss": 0.0295, + "step": 23500 + }, + { + "epoch": 15.8, + "grad_norm": 2.5657405853271484, + "learning_rate": 7.686934673366835e-06, + "loss": 0.0289, + "step": 23525 + }, + { + "epoch": 15.82, + "grad_norm": 2.246229887008667, + "learning_rate": 7.684422110552766e-06, + "loss": 0.0281, + "step": 23550 + }, + { + "epoch": 15.83, + "grad_norm": 2.613318681716919, + "learning_rate": 7.681909547738693e-06, + "loss": 0.0287, + "step": 23575 + }, + { + "epoch": 15.85, + "grad_norm": 2.242007255554199, + "learning_rate": 7.679396984924624e-06, + "loss": 0.0283, + "step": 23600 + }, + { + "epoch": 15.87, + "grad_norm": 2.559636354446411, + "learning_rate": 7.676884422110554e-06, + "loss": 0.0285, + "step": 23625 + }, + { + "epoch": 15.88, + "grad_norm": 2.1081771850585938, + "learning_rate": 7.674371859296483e-06, + "loss": 0.0294, + "step": 23650 + }, + { + "epoch": 15.9, + "grad_norm": 2.597853183746338, + "learning_rate": 7.671859296482412e-06, + "loss": 0.03, + "step": 23675 + }, + { + "epoch": 15.92, + "grad_norm": 2.076571226119995, + "learning_rate": 7.669346733668343e-06, + "loss": 0.0294, + "step": 23700 + }, + { + "epoch": 15.93, + "grad_norm": 2.259345054626465, + "learning_rate": 7.666834170854271e-06, + "loss": 0.0294, + "step": 23725 + }, + { + "epoch": 15.95, + "grad_norm": 2.719036102294922, + "learning_rate": 7.664321608040202e-06, + "loss": 0.0296, + "step": 23750 + }, + { + "epoch": 15.97, + "grad_norm": 2.2839863300323486, + "learning_rate": 7.661809045226131e-06, + "loss": 0.0301, + "step": 23775 + }, + { + "epoch": 15.98, + "grad_norm": 2.4373881816864014, + "learning_rate": 7.659396984924625e-06, + "loss": 0.0309, + "step": 23800 + }, + { + "epoch": 16.0, + "grad_norm": 1.7572460174560547, + "learning_rate": 7.656884422110554e-06, + "loss": 0.0274, + "step": 23825 + }, + { + "epoch": 16.02, + "grad_norm": 2.1525707244873047, + "learning_rate": 7.654371859296483e-06, + "loss": 0.0224, + "step": 23850 + }, + { + "epoch": 16.03, + "grad_norm": 1.7920043468475342, + "learning_rate": 7.651859296482412e-06, + "loss": 0.0235, + "step": 23875 + }, + { + "epoch": 16.05, + "grad_norm": 2.176175355911255, + "learning_rate": 7.649346733668342e-06, + "loss": 0.0224, + "step": 23900 + }, + { + "epoch": 16.07, + "grad_norm": 1.9781831502914429, + "learning_rate": 7.646834170854271e-06, + "loss": 0.0229, + "step": 23925 + }, + { + "epoch": 16.08, + "grad_norm": 2.437086582183838, + "learning_rate": 7.644321608040202e-06, + "loss": 0.0247, + "step": 23950 + }, + { + "epoch": 16.1, + "grad_norm": 2.2544217109680176, + "learning_rate": 7.641809045226131e-06, + "loss": 0.0226, + "step": 23975 + }, + { + "epoch": 16.12, + "grad_norm": 2.208923101425171, + "learning_rate": 7.63929648241206e-06, + "loss": 0.0239, + "step": 24000 + }, + { + "epoch": 16.12, + "eval_loss": 0.1079203188419342, + "eval_runtime": 923.1824, + "eval_samples_per_second": 1.558, + "eval_steps_per_second": 1.558, + "eval_wer": 37.90294323756132, + "step": 24000 + } + ], + "logging_steps": 25, + "max_steps": 100000, + "num_input_tokens_seen": 0, + "num_train_epochs": 68, + "save_steps": 1000, + "total_flos": 3.780830510972928e+19, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoints/whisper-tiny/telugu/checkpoint-24000/training_args.bin b/checkpoints/whisper-tiny/telugu/checkpoint-24000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bdfdf0727284286c3b856a7cff922862527e5f99 --- /dev/null +++ b/checkpoints/whisper-tiny/telugu/checkpoint-24000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5ca3f5e6196d2badc60c6d19d7ce33e3f532d686735a8d1aa26532ba532bff3 +size 4667