RafatK/Whisper_AZ

Browse files

Files changed (9) hide show

README.md +63 -0
config.json +61 -0
generation_config.json +233 -0
model-00001-of-00002.safetensors +3 -0
model-00002-of-00002.safetensors +3 -0
model.safetensors.index.json +0 -0
preprocessor_config.json +14 -0
trainer_state.json +1455 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,63 @@

+---
+library_name: transformers
+license: apache-2.0
+base_model: openai/whisper-large-v2
+tags:
+- generated_from_trainer
+model-index:
+- name: second2
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# second2
+This model is a fine-tuned version of [openai/whisper-large-v2](https://huggingface.co/openai/whisper-large-v2) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- eval_loss: 0.3327
+- eval_cer: 5.4203
+- eval_wer: 20.4638
+- eval_bleu: 0.6173
+- eval_runtime: 1022.0953
+- eval_samples_per_second: 0.898
+- eval_steps_per_second: 0.225
+- epoch: 0.2134
+- step: 1000
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 1e-05
+- train_batch_size: 8
+- eval_batch_size: 4
+- seed: 420
+- gradient_accumulation_steps: 8
+- total_train_batch_size: 64
+- optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 2
+- training_steps: 9500
+- mixed_precision_training: Native AMP
+### Framework versions
+- Transformers 4.48.0
+- Pytorch 2.9.1+cu126
+- Datasets 3.6.0
+- Tokenizers 0.21.4

config.json ADDED Viewed

	@@ -0,0 +1,61 @@

+{
+  "_name_or_path": "openai/whisper-large-v2",
+  "activation_dropout": 0.0,
+  "activation_function": "gelu",
+  "apply_spec_augment": true,
+  "architectures": [
+    "WhisperForConditionalGeneration"
+  ],
+  "attention_dropout": 0.0,
+  "begin_suppress_tokens": null,
+  "bos_token_id": 50257,
+  "classifier_proj_size": 256,
+  "d_model": 1280,
+  "decoder_attention_heads": 20,
+  "decoder_ffn_dim": 5120,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 32,
+  "decoder_start_token_id": 50258,
+  "dropout": 0.1,
+  "encoder_attention_heads": 20,
+  "encoder_ffn_dim": 5120,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 32,
+  "eos_token_id": 50257,
+  "forced_decoder_ids": [
+    [
+      1,
+      50304
+    ],
+    [
+      2,
+      50359
+    ],
+    [
+      3,
+      50363
+    ]
+  ],
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "max_length": null,
+  "max_source_positions": 1500,
+  "max_target_positions": 448,
+  "median_filter_width": 7,
+  "model_type": "whisper",
+  "num_hidden_layers": 32,
+  "num_mel_bins": 80,
+  "pad_token_id": 50257,
+  "scale_embedding": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.48.0",
+  "use_cache": true,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 51865
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,233 @@

+{
+  "alignment_heads": [
+    [
+      10,
+      12
+    ],
+    [
+      13,
+      17
+    ],
+    [
+      16,
+      11
+    ],
+    [
+      16,
+      12
+    ],
+    [
+      16,
+      13
+    ],
+    [
+      17,
+      15
+    ],
+    [
+      17,
+      16
+    ],
+    [
+      18,
+      4
+    ],
+    [
+      18,
+      11
+    ],
+    [
+      18,
+      19
+    ],
+    [
+      19,
+      11
+    ],
+    [
+      21,
+      2
+    ],
+    [
+      21,
+      3
+    ],
+    [
+      22,
+      3
+    ],
+    [
+      22,
+      9
+    ],
+    [
+      22,
+      12
+    ],
+    [
+      23,
+      5
+    ],
+    [
+      23,
+      7
+    ],
+    [
+      23,
+      13
+    ],
+    [
+      25,
+      5
+    ],
+    [
+      26,
+      1
+    ],
+    [
+      26,
+      12
+    ],
+    [
+      27,
+      15
+    ]
+  ],
+  "begin_suppress_tokens": [
+    220,
+    50257
+  ],
+  "bos_token_id": 50257,
+  "decoder_start_token_id": 50258,
+  "eos_token_id": 50257,
+  "forced_decoder_ids": [
+    [
+      1,
+      50304
+    ],
+    [
+      2,
+      50359
+    ],
+    [
+      3,
+      50363
+    ]
+  ],
+  "is_multilingual": true,
+  "lang_to_id": {
+    "<|af|>": 50327,
+    "<|am|>": 50334,
+    "<|ar|>": 50272,
+    "<|as|>": 50350,
+    "<|az|>": 50304,
+    "<|ba|>": 50355,
+    "<|be|>": 50330,
+    "<|bg|>": 50292,
+    "<|bn|>": 50302,
+    "<|bo|>": 50347,
+    "<|br|>": 50309,
+    "<|bs|>": 50315,
+    "<|ca|>": 50270,
+    "<|cs|>": 50283,
+    "<|cy|>": 50297,
+    "<|da|>": 50285,
+    "<|de|>": 50261,
+    "<|el|>": 50281,
+    "<|en|>": 50259,
+    "<|es|>": 50262,
+    "<|et|>": 50307,
+    "<|eu|>": 50310,
+    "<|fa|>": 50300,
+    "<|fi|>": 50277,
+    "<|fo|>": 50338,
+    "<|fr|>": 50265,
+    "<|gl|>": 50319,
+    "<|gu|>": 50333,
+    "<|haw|>": 50352,
+    "<|ha|>": 50354,
+    "<|he|>": 50279,
+    "<|hi|>": 50276,
+    "<|hr|>": 50291,
+    "<|ht|>": 50339,
+    "<|hu|>": 50286,
+    "<|hy|>": 50312,
+    "<|id|>": 50275,
+    "<|is|>": 50311,
+    "<|it|>": 50274,
+    "<|ja|>": 50266,
+    "<|jw|>": 50356,
+    "<|ka|>": 50329,
+    "<|kk|>": 50316,
+    "<|km|>": 50323,
+    "<|kn|>": 50306,
+    "<|ko|>": 50264,
+    "<|la|>": 50294,
+    "<|lb|>": 50345,
+    "<|ln|>": 50353,
+    "<|lo|>": 50336,
+    "<|lt|>": 50293,
+    "<|lv|>": 50301,
+    "<|mg|>": 50349,
+    "<|mi|>": 50295,
+    "<|mk|>": 50308,
+    "<|ml|>": 50296,
+    "<|mn|>": 50314,
+    "<|mr|>": 50320,
+    "<|ms|>": 50282,
+    "<|mt|>": 50343,
+    "<|my|>": 50346,
+    "<|ne|>": 50313,
+    "<|nl|>": 50271,
+    "<|nn|>": 50342,
+    "<|no|>": 50288,
+    "<|oc|>": 50328,
+    "<|pa|>": 50321,
+    "<|pl|>": 50269,
+    "<|ps|>": 50340,
+    "<|pt|>": 50267,
+    "<|ro|>": 50284,
+    "<|ru|>": 50263,
+    "<|sa|>": 50344,
+    "<|sd|>": 50332,
+    "<|si|>": 50322,
+    "<|sk|>": 50298,
+    "<|sl|>": 50305,
+    "<|sn|>": 50324,
+    "<|so|>": 50326,
+    "<|sq|>": 50317,
+    "<|sr|>": 50303,
+    "<|su|>": 50357,
+    "<|sv|>": 50273,
+    "<|sw|>": 50318,
+    "<|ta|>": 50287,
+    "<|te|>": 50299,
+    "<|tg|>": 50331,
+    "<|th|>": 50289,
+    "<|tk|>": 50341,
+    "<|tl|>": 50348,
+    "<|tr|>": 50268,
+    "<|tt|>": 50351,
+    "<|uk|>": 50280,
+    "<|ur|>": 50290,
+    "<|uz|>": 50337,
+    "<|vi|>": 50278,
+    "<|yi|>": 50335,
+    "<|yo|>": 50325,
+    "<|zh|>": 50260
+  },
+  "language": "azerbaijani",
+  "max_initial_timestamp_index": 50,
+  "max_length": 448,
+  "no_timestamps_token_id": 50363,
+  "pad_token_id": 50257,
+  "prev_sot_token_id": 50361,
+  "return_timestamps": false,
+  "suppress_tokens": [],
+  "task": "transcribe",
+  "task_to_id": {
+    "transcribe": 50359,
+    "translate": 50358
+  },
+  "transformers_version": "4.48.0"
+}

model-00001-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1f4da568a59ac91b8552e6f3ea528ad37c777855838670fc3060f614befbe564
+size 4992706480

model-00002-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ad925562bc89a272373ae30b8cecda8339794ff8bb9035d5dcdbdfc5e8927e3b
+size 1180663192

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "chunk_length": 30,
+  "feature_extractor_type": "WhisperFeatureExtractor",
+  "feature_size": 80,
+  "hop_length": 160,
+  "n_fft": 400,
+  "n_samples": 480000,
+  "nb_max_frames": 3000,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "processor_class": "WhisperProcessor",
+  "return_attention_mask": false,
+  "sampling_rate": 16000
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1455 @@

+{
+  "best_metric": 22.283451007404857,
+  "best_model_checkpoint": "./whisper-large-v2/second2/checkpoint-500",
+  "epoch": 0.2133902373966391,
+  "eval_steps": 500,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0010669511869831954,
+      "grad_norm": 19.418699264526367,
+      "learning_rate": 3.3333333333333334e-08,
+      "loss": 2.4187,
+      "step": 5
+    },
+    {
+      "epoch": 0.002133902373966391,
+      "grad_norm": 21.75494956970215,
+      "learning_rate": 6.666666666666667e-08,
+      "loss": 2.3362,
+      "step": 10
+    },
+    {
+      "epoch": 0.0032008535609495867,
+      "grad_norm": 21.226938247680664,
+      "learning_rate": 1e-07,
+      "loss": 2.4288,
+      "step": 15
+    },
+    {
+      "epoch": 0.004267804747932782,
+      "grad_norm": 27.21125602722168,
+      "learning_rate": 1.3333333333333334e-07,
+      "loss": 2.4004,
+      "step": 20
+    },
+    {
+      "epoch": 0.005334755934915978,
+      "grad_norm": 13.559316635131836,
+      "learning_rate": 1.6666666666666665e-07,
+      "loss": 2.1607,
+      "step": 25
+    },
+    {
+      "epoch": 0.0064017071218991735,
+      "grad_norm": 13.044219970703125,
+      "learning_rate": 1.9333333333333332e-07,
+      "loss": 2.1591,
+      "step": 30
+    },
+    {
+      "epoch": 0.007468658308882369,
+      "grad_norm": 13.89301872253418,
+      "learning_rate": 2.2666666666666663e-07,
+      "loss": 2.0855,
+      "step": 35
+    },
+    {
+      "epoch": 0.008535609495865563,
+      "grad_norm": 14.154369354248047,
+      "learning_rate": 2.6e-07,
+      "loss": 1.9557,
+      "step": 40
+    },
+    {
+      "epoch": 0.00960256068284876,
+      "grad_norm": 9.126311302185059,
+      "learning_rate": 2.933333333333333e-07,
+      "loss": 1.8661,
+      "step": 45
+    },
+    {
+      "epoch": 0.010669511869831956,
+      "grad_norm": 13.619270324707031,
+      "learning_rate": 3.2666666666666663e-07,
+      "loss": 1.8424,
+      "step": 50
+    },
+    {
+      "epoch": 0.01173646305681515,
+      "grad_norm": 9.875323295593262,
+      "learning_rate": 3.6e-07,
+      "loss": 1.6208,
+      "step": 55
+    },
+    {
+      "epoch": 0.012803414243798347,
+      "grad_norm": 11.143688201904297,
+      "learning_rate": 3.933333333333333e-07,
+      "loss": 1.5396,
+      "step": 60
+    },
+    {
+      "epoch": 0.013870365430781541,
+      "grad_norm": 10.829161643981934,
+      "learning_rate": 4.266666666666667e-07,
+      "loss": 1.4174,
+      "step": 65
+    },
+    {
+      "epoch": 0.014937316617764738,
+      "grad_norm": 7.393435001373291,
+      "learning_rate": 4.6e-07,
+      "loss": 1.1693,
+      "step": 70
+    },
+    {
+      "epoch": 0.016004267804747934,
+      "grad_norm": 4.182456016540527,
+      "learning_rate": 4.933333333333333e-07,
+      "loss": 1.1779,
+      "step": 75
+    },
+    {
+      "epoch": 0.017071218991731127,
+      "grad_norm": 8.4662504196167,
+      "learning_rate": 5.266666666666666e-07,
+      "loss": 1.1281,
+      "step": 80
+    },
+    {
+      "epoch": 0.018138170178714323,
+      "grad_norm": 4.043624401092529,
+      "learning_rate": 5.6e-07,
+      "loss": 1.0614,
+      "step": 85
+    },
+    {
+      "epoch": 0.01920512136569752,
+      "grad_norm": 3.8517227172851562,
+      "learning_rate": 5.933333333333334e-07,
+      "loss": 1.0066,
+      "step": 90
+    },
+    {
+      "epoch": 0.020272072552680716,
+      "grad_norm": 4.19964075088501,
+      "learning_rate": 6.266666666666667e-07,
+      "loss": 1.0659,
+      "step": 95
+    },
+    {
+      "epoch": 0.021339023739663912,
+      "grad_norm": 4.7087178230285645,
+      "learning_rate": 6.6e-07,
+      "loss": 0.9793,
+      "step": 100
+    },
+    {
+      "epoch": 0.022405974926647105,
+      "grad_norm": 4.864803314208984,
+      "learning_rate": 6.933333333333333e-07,
+      "loss": 0.9675,
+      "step": 105
+    },
+    {
+      "epoch": 0.0234729261136303,
+      "grad_norm": 3.8939764499664307,
+      "learning_rate": 7.266666666666667e-07,
+      "loss": 0.9513,
+      "step": 110
+    },
+    {
+      "epoch": 0.024539877300613498,
+      "grad_norm": 3.557497978210449,
+      "learning_rate": 7.599999999999999e-07,
+      "loss": 0.9164,
+      "step": 115
+    },
+    {
+      "epoch": 0.025606828487596694,
+      "grad_norm": 4.033596038818359,
+      "learning_rate": 7.933333333333333e-07,
+      "loss": 0.9069,
+      "step": 120
+    },
+    {
+      "epoch": 0.026673779674579887,
+      "grad_norm": 4.726081371307373,
+      "learning_rate": 8.266666666666667e-07,
+      "loss": 0.8976,
+      "step": 125
+    },
+    {
+      "epoch": 0.027740730861563083,
+      "grad_norm": 18.711572647094727,
+      "learning_rate": 8.599999999999999e-07,
+      "loss": 0.9157,
+      "step": 130
+    },
+    {
+      "epoch": 0.02880768204854628,
+      "grad_norm": 4.13236141204834,
+      "learning_rate": 8.933333333333333e-07,
+      "loss": 0.8808,
+      "step": 135
+    },
+    {
+      "epoch": 0.029874633235529476,
+      "grad_norm": 3.7335939407348633,
+      "learning_rate": 9.266666666666665e-07,
+      "loss": 0.7707,
+      "step": 140
+    },
+    {
+      "epoch": 0.03094158442251267,
+      "grad_norm": 4.5814313888549805,
+      "learning_rate": 9.6e-07,
+      "loss": 0.8349,
+      "step": 145
+    },
+    {
+      "epoch": 0.03200853560949587,
+      "grad_norm": 4.993039608001709,
+      "learning_rate": 9.933333333333333e-07,
+      "loss": 0.7002,
+      "step": 150
+    },
+    {
+      "epoch": 0.033075486796479064,
+      "grad_norm": 5.326510906219482,
+      "learning_rate": 9.996791443850267e-07,
+      "loss": 0.6418,
+      "step": 155
+    },
+    {
+      "epoch": 0.034142437983462254,
+      "grad_norm": 4.4001994132995605,
+      "learning_rate": 9.99144385026738e-07,
+      "loss": 0.5632,
+      "step": 160
+    },
+    {
+      "epoch": 0.03520938917044545,
+      "grad_norm": 71.39772033691406,
+      "learning_rate": 9.98716577540107e-07,
+      "loss": 0.6374,
+      "step": 165
+    },
+    {
+      "epoch": 0.036276340357428646,
+      "grad_norm": 9.561308860778809,
+      "learning_rate": 9.98288770053476e-07,
+      "loss": 0.5493,
+      "step": 170
+    },
+    {
+      "epoch": 0.03734329154441184,
+      "grad_norm": 2.866039991378784,
+      "learning_rate": 9.97754010695187e-07,
+      "loss": 0.473,
+      "step": 175
+    },
+    {
+      "epoch": 0.03841024273139504,
+      "grad_norm": 3.0788004398345947,
+      "learning_rate": 9.972192513368983e-07,
+      "loss": 0.4903,
+      "step": 180
+    },
+    {
+      "epoch": 0.039477193918378235,
+      "grad_norm": 3.4059643745422363,
+      "learning_rate": 9.966844919786097e-07,
+      "loss": 0.5017,
+      "step": 185
+    },
+    {
+      "epoch": 0.04054414510536143,
+      "grad_norm": 2.9747097492218018,
+      "learning_rate": 9.961497326203208e-07,
+      "loss": 0.4269,
+      "step": 190
+    },
+    {
+      "epoch": 0.04161109629234463,
+      "grad_norm": 3.476583242416382,
+      "learning_rate": 9.95614973262032e-07,
+      "loss": 0.4214,
+      "step": 195
+    },
+    {
+      "epoch": 0.042678047479327824,
+      "grad_norm": 3.0810964107513428,
+      "learning_rate": 9.950802139037432e-07,
+      "loss": 0.4291,
+      "step": 200
+    },
+    {
+      "epoch": 0.043744998666311014,
+      "grad_norm": 87.03646850585938,
+      "learning_rate": 9.945454545454544e-07,
+      "loss": 0.4035,
+      "step": 205
+    },
+    {
+      "epoch": 0.04481194985329421,
+      "grad_norm": 2.6879165172576904,
+      "learning_rate": 9.940106951871658e-07,
+      "loss": 0.428,
+      "step": 210
+    },
+    {
+      "epoch": 0.045878901040277406,
+      "grad_norm": 3.602217197418213,
+      "learning_rate": 9.93475935828877e-07,
+      "loss": 0.4534,
+      "step": 215
+    },
+    {
+      "epoch": 0.0469458522272606,
+      "grad_norm": 2.9568989276885986,
+      "learning_rate": 9.929411764705881e-07,
+      "loss": 0.42,
+      "step": 220
+    },
+    {
+      "epoch": 0.0480128034142438,
+      "grad_norm": 2.7602076530456543,
+      "learning_rate": 9.924064171122995e-07,
+      "loss": 0.448,
+      "step": 225
+    },
+    {
+      "epoch": 0.049079754601226995,
+      "grad_norm": 2.7191200256347656,
+      "learning_rate": 9.918716577540107e-07,
+      "loss": 0.4046,
+      "step": 230
+    },
+    {
+      "epoch": 0.05014670578821019,
+      "grad_norm": 2.7233424186706543,
+      "learning_rate": 9.913368983957219e-07,
+      "loss": 0.4028,
+      "step": 235
+    },
+    {
+      "epoch": 0.05121365697519339,
+      "grad_norm": 2.730400323867798,
+      "learning_rate": 9.90802139037433e-07,
+      "loss": 0.4361,
+      "step": 240
+    },
+    {
+      "epoch": 0.05228060816217658,
+      "grad_norm": 4.667561054229736,
+      "learning_rate": 9.902673796791442e-07,
+      "loss": 0.4183,
+      "step": 245
+    },
+    {
+      "epoch": 0.05334755934915977,
+      "grad_norm": 2.981497049331665,
+      "learning_rate": 9.897326203208556e-07,
+      "loss": 0.4148,
+      "step": 250
+    },
+    {
+      "epoch": 0.05441451053614297,
+      "grad_norm": 9.336112976074219,
+      "learning_rate": 9.891978609625668e-07,
+      "loss": 0.4386,
+      "step": 255
+    },
+    {
+      "epoch": 0.055481461723126166,
+      "grad_norm": 4.434041500091553,
+      "learning_rate": 9.88663101604278e-07,
+      "loss": 0.4235,
+      "step": 260
+    },
+    {
+      "epoch": 0.05654841291010936,
+      "grad_norm": 2.695866346359253,
+      "learning_rate": 9.881283422459892e-07,
+      "loss": 0.4424,
+      "step": 265
+    },
+    {
+      "epoch": 0.05761536409709256,
+      "grad_norm": 2.8247873783111572,
+      "learning_rate": 9.875935828877004e-07,
+      "loss": 0.387,
+      "step": 270
+    },
+    {
+      "epoch": 0.058682315284075755,
+      "grad_norm": 3.4680376052856445,
+      "learning_rate": 9.870588235294118e-07,
+      "loss": 0.4294,
+      "step": 275
+    },
+    {
+      "epoch": 0.05974926647105895,
+      "grad_norm": 2.6994473934173584,
+      "learning_rate": 9.86524064171123e-07,
+      "loss": 0.4043,
+      "step": 280
+    },
+    {
+      "epoch": 0.06081621765804215,
+      "grad_norm": 2.6048827171325684,
+      "learning_rate": 9.859893048128341e-07,
+      "loss": 0.4064,
+      "step": 285
+    },
+    {
+      "epoch": 0.06188316884502534,
+      "grad_norm": 3.028503179550171,
+      "learning_rate": 9.854545454545455e-07,
+      "loss": 0.4269,
+      "step": 290
+    },
+    {
+      "epoch": 0.06295012003200853,
+      "grad_norm": 3.3406739234924316,
+      "learning_rate": 9.849197860962567e-07,
+      "loss": 0.4339,
+      "step": 295
+    },
+    {
+      "epoch": 0.06401707121899174,
+      "grad_norm": 3.1800878047943115,
+      "learning_rate": 9.843850267379679e-07,
+      "loss": 0.3972,
+      "step": 300
+    },
+    {
+      "epoch": 0.06508402240597493,
+      "grad_norm": 2.818852424621582,
+      "learning_rate": 9.83850267379679e-07,
+      "loss": 0.3898,
+      "step": 305
+    },
+    {
+      "epoch": 0.06615097359295813,
+      "grad_norm": 3.256840705871582,
+      "learning_rate": 9.833155080213902e-07,
+      "loss": 0.4377,
+      "step": 310
+    },
+    {
+      "epoch": 0.06721792477994132,
+      "grad_norm": 3.388472318649292,
+      "learning_rate": 9.827807486631016e-07,
+      "loss": 0.4187,
+      "step": 315
+    },
+    {
+      "epoch": 0.06828487596692451,
+      "grad_norm": 4.434413909912109,
+      "learning_rate": 9.822459893048128e-07,
+      "loss": 0.4009,
+      "step": 320
+    },
+    {
+      "epoch": 0.06935182715390771,
+      "grad_norm": 3.08677077293396,
+      "learning_rate": 9.81711229946524e-07,
+      "loss": 0.3837,
+      "step": 325
+    },
+    {
+      "epoch": 0.0704187783408909,
+      "grad_norm": 3.7852623462677,
+      "learning_rate": 9.811764705882352e-07,
+      "loss": 0.3999,
+      "step": 330
+    },
+    {
+      "epoch": 0.0714857295278741,
+      "grad_norm": 2.947720766067505,
+      "learning_rate": 9.806417112299463e-07,
+      "loss": 0.3677,
+      "step": 335
+    },
+    {
+      "epoch": 0.07255268071485729,
+      "grad_norm": 3.1685659885406494,
+      "learning_rate": 9.801069518716577e-07,
+      "loss": 0.4035,
+      "step": 340
+    },
+    {
+      "epoch": 0.0736196319018405,
+      "grad_norm": 2.661599636077881,
+      "learning_rate": 9.79572192513369e-07,
+      "loss": 0.3658,
+      "step": 345
+    },
+    {
+      "epoch": 0.07468658308882369,
+      "grad_norm": 2.6443653106689453,
+      "learning_rate": 9.7903743315508e-07,
+      "loss": 0.3805,
+      "step": 350
+    },
+    {
+      "epoch": 0.07575353427580689,
+      "grad_norm": 3.653778314590454,
+      "learning_rate": 9.785026737967915e-07,
+      "loss": 0.407,
+      "step": 355
+    },
+    {
+      "epoch": 0.07682048546279008,
+      "grad_norm": 2.991504430770874,
+      "learning_rate": 9.779679144385027e-07,
+      "loss": 0.4135,
+      "step": 360
+    },
+    {
+      "epoch": 0.07788743664977327,
+      "grad_norm": 2.7770955562591553,
+      "learning_rate": 9.774331550802139e-07,
+      "loss": 0.3934,
+      "step": 365
+    },
+    {
+      "epoch": 0.07895438783675647,
+      "grad_norm": 2.686368703842163,
+      "learning_rate": 9.76898395721925e-07,
+      "loss": 0.409,
+      "step": 370
+    },
+    {
+      "epoch": 0.08002133902373966,
+      "grad_norm": 3.307638168334961,
+      "learning_rate": 9.763636363636362e-07,
+      "loss": 0.3872,
+      "step": 375
+    },
+    {
+      "epoch": 0.08108829021072286,
+      "grad_norm": 8.76164722442627,
+      "learning_rate": 9.758288770053476e-07,
+      "loss": 0.3882,
+      "step": 380
+    },
+    {
+      "epoch": 0.08215524139770605,
+      "grad_norm": 2.868077039718628,
+      "learning_rate": 9.752941176470588e-07,
+      "loss": 0.4123,
+      "step": 385
+    },
+    {
+      "epoch": 0.08322219258468926,
+      "grad_norm": 4.677827835083008,
+      "learning_rate": 9.7475935828877e-07,
+      "loss": 0.362,
+      "step": 390
+    },
+    {
+      "epoch": 0.08428914377167245,
+      "grad_norm": 3.836914539337158,
+      "learning_rate": 9.742245989304812e-07,
+      "loss": 0.4023,
+      "step": 395
+    },
+    {
+      "epoch": 0.08535609495865565,
+      "grad_norm": 2.7120203971862793,
+      "learning_rate": 9.736898395721923e-07,
+      "loss": 0.419,
+      "step": 400
+    },
+    {
+      "epoch": 0.08642304614563884,
+      "grad_norm": 3.1341233253479004,
+      "learning_rate": 9.731550802139037e-07,
+      "loss": 0.4275,
+      "step": 405
+    },
+    {
+      "epoch": 0.08748999733262203,
+      "grad_norm": 3.0901923179626465,
+      "learning_rate": 9.72620320855615e-07,
+      "loss": 0.3799,
+      "step": 410
+    },
+    {
+      "epoch": 0.08855694851960523,
+      "grad_norm": 2.7124781608581543,
+      "learning_rate": 9.72085561497326e-07,
+      "loss": 0.3788,
+      "step": 415
+    },
+    {
+      "epoch": 0.08962389970658842,
+      "grad_norm": 2.8177292346954346,
+      "learning_rate": 9.715508021390375e-07,
+      "loss": 0.3436,
+      "step": 420
+    },
+    {
+      "epoch": 0.09069085089357162,
+      "grad_norm": 3.063669204711914,
+      "learning_rate": 9.710160427807487e-07,
+      "loss": 0.4312,
+      "step": 425
+    },
+    {
+      "epoch": 0.09175780208055481,
+      "grad_norm": 2.78320574760437,
+      "learning_rate": 9.704812834224598e-07,
+      "loss": 0.4122,
+      "step": 430
+    },
+    {
+      "epoch": 0.09282475326753802,
+      "grad_norm": 3.080400228500366,
+      "learning_rate": 9.69946524064171e-07,
+      "loss": 0.3917,
+      "step": 435
+    },
+    {
+      "epoch": 0.0938917044545212,
+      "grad_norm": 3.02703857421875,
+      "learning_rate": 9.694117647058822e-07,
+      "loss": 0.3546,
+      "step": 440
+    },
+    {
+      "epoch": 0.09495865564150441,
+      "grad_norm": 2.6559834480285645,
+      "learning_rate": 9.688770053475936e-07,
+      "loss": 0.3536,
+      "step": 445
+    },
+    {
+      "epoch": 0.0960256068284876,
+      "grad_norm": 3.939589738845825,
+      "learning_rate": 9.683422459893048e-07,
+      "loss": 0.3729,
+      "step": 450
+    },
+    {
+      "epoch": 0.09709255801547079,
+      "grad_norm": 2.419799327850342,
+      "learning_rate": 9.67807486631016e-07,
+      "loss": 0.3474,
+      "step": 455
+    },
+    {
+      "epoch": 0.09815950920245399,
+      "grad_norm": 2.237165927886963,
+      "learning_rate": 9.672727272727271e-07,
+      "loss": 0.3387,
+      "step": 460
+    },
+    {
+      "epoch": 0.09922646038943718,
+      "grad_norm": 2.6400506496429443,
+      "learning_rate": 9.667379679144385e-07,
+      "loss": 0.3462,
+      "step": 465
+    },
+    {
+      "epoch": 0.10029341157642038,
+      "grad_norm": 3.294222354888916,
+      "learning_rate": 9.662032085561497e-07,
+      "loss": 0.3724,
+      "step": 470
+    },
+    {
+      "epoch": 0.10136036276340357,
+      "grad_norm": 2.5825111865997314,
+      "learning_rate": 9.656684491978609e-07,
+      "loss": 0.3652,
+      "step": 475
+    },
+    {
+      "epoch": 0.10242731395038678,
+      "grad_norm": 2.6807165145874023,
+      "learning_rate": 9.65133689839572e-07,
+      "loss": 0.3824,
+      "step": 480
+    },
+    {
+      "epoch": 0.10349426513736996,
+      "grad_norm": 2.541398048400879,
+      "learning_rate": 9.645989304812835e-07,
+      "loss": 0.3509,
+      "step": 485
+    },
+    {
+      "epoch": 0.10456121632435315,
+      "grad_norm": 3.071420192718506,
+      "learning_rate": 9.640641711229946e-07,
+      "loss": 0.3511,
+      "step": 490
+    },
+    {
+      "epoch": 0.10562816751133636,
+      "grad_norm": 3.117623805999756,
+      "learning_rate": 9.635294117647058e-07,
+      "loss": 0.3823,
+      "step": 495
+    },
+    {
+      "epoch": 0.10669511869831955,
+      "grad_norm": 2.9759390354156494,
+      "learning_rate": 9.62994652406417e-07,
+      "loss": 0.3595,
+      "step": 500
+    },
+    {
+      "epoch": 0.10669511869831955,
+      "eval_bleu": 0.5858525890811711,
+      "eval_cer": 5.81035140115651,
+      "eval_loss": 0.35532379150390625,
+      "eval_runtime": 1038.2775,
+      "eval_samples_per_second": 0.884,
+      "eval_steps_per_second": 0.222,
+      "eval_wer": 22.283451007404857,
+      "step": 500
+    },
+    {
+      "epoch": 0.10776206988530275,
+      "grad_norm": 2.9231984615325928,
+      "learning_rate": 9.475679090334807e-07,
+      "loss": 0.3783,
+      "step": 505
+    },
+    {
+      "epoch": 0.10882902107228594,
+      "grad_norm": 2.2739880084991455,
+      "learning_rate": 9.470414824173509e-07,
+      "loss": 0.3238,
+      "step": 510
+    },
+    {
+      "epoch": 0.10989597225926914,
+      "grad_norm": 2.603306531906128,
+      "learning_rate": 9.465150558012212e-07,
+      "loss": 0.3538,
+      "step": 515
+    },
+    {
+      "epoch": 0.11096292344625233,
+      "grad_norm": 2.871011972427368,
+      "learning_rate": 9.459886291850916e-07,
+      "loss": 0.3955,
+      "step": 520
+    },
+    {
+      "epoch": 0.11202987463323553,
+      "grad_norm": 3.134646415710449,
+      "learning_rate": 9.454622025689618e-07,
+      "loss": 0.3508,
+      "step": 525
+    },
+    {
+      "epoch": 0.11309682582021872,
+      "grad_norm": 3.014796733856201,
+      "learning_rate": 9.449357759528321e-07,
+      "loss": 0.3664,
+      "step": 530
+    },
+    {
+      "epoch": 0.11416377700720191,
+      "grad_norm": 3.0675926208496094,
+      "learning_rate": 9.444093493367024e-07,
+      "loss": 0.3497,
+      "step": 535
+    },
+    {
+      "epoch": 0.11523072819418512,
+      "grad_norm": 4.508389472961426,
+      "learning_rate": 9.438829227205727e-07,
+      "loss": 0.3172,
+      "step": 540
+    },
+    {
+      "epoch": 0.1162976793811683,
+      "grad_norm": 2.528317451477051,
+      "learning_rate": 9.43356496104443e-07,
+      "loss": 0.3495,
+      "step": 545
+    },
+    {
+      "epoch": 0.11736463056815151,
+      "grad_norm": 2.759575843811035,
+      "learning_rate": 9.428300694883133e-07,
+      "loss": 0.3326,
+      "step": 550
+    },
+    {
+      "epoch": 0.1184315817551347,
+      "grad_norm": 6.699812889099121,
+      "learning_rate": 9.423036428721835e-07,
+      "loss": 0.4124,
+      "step": 555
+    },
+    {
+      "epoch": 0.1194985329421179,
+      "grad_norm": 2.358922243118286,
+      "learning_rate": 9.417772162560539e-07,
+      "loss": 0.3292,
+      "step": 560
+    },
+    {
+      "epoch": 0.12056548412910109,
+      "grad_norm": 2.3635692596435547,
+      "learning_rate": 9.412507896399241e-07,
+      "loss": 0.3715,
+      "step": 565
+    },
+    {
+      "epoch": 0.1216324353160843,
+      "grad_norm": 2.8696706295013428,
+      "learning_rate": 9.407243630237944e-07,
+      "loss": 0.3348,
+      "step": 570
+    },
+    {
+      "epoch": 0.12269938650306748,
+      "grad_norm": 3.717510223388672,
+      "learning_rate": 9.401979364076647e-07,
+      "loss": 0.355,
+      "step": 575
+    },
+    {
+      "epoch": 0.12376633769005067,
+      "grad_norm": 3.9285202026367188,
+      "learning_rate": 9.39671509791535e-07,
+      "loss": 0.3752,
+      "step": 580
+    },
+    {
+      "epoch": 0.12483328887703388,
+      "grad_norm": 3.560582399368286,
+      "learning_rate": 9.391450831754053e-07,
+      "loss": 0.3422,
+      "step": 585
+    },
+    {
+      "epoch": 0.12590024006401707,
+      "grad_norm": 6.333406925201416,
+      "learning_rate": 9.386186565592757e-07,
+      "loss": 0.3436,
+      "step": 590
+    },
+    {
+      "epoch": 0.12696719125100026,
+      "grad_norm": 2.94331431388855,
+      "learning_rate": 9.380922299431458e-07,
+      "loss": 0.3346,
+      "step": 595
+    },
+    {
+      "epoch": 0.12803414243798347,
+      "grad_norm": 2.933142900466919,
+      "learning_rate": 9.375658033270162e-07,
+      "loss": 0.3414,
+      "step": 600
+    },
+    {
+      "epoch": 0.12910109362496666,
+      "grad_norm": 3.2017970085144043,
+      "learning_rate": 9.370393767108865e-07,
+      "loss": 0.3731,
+      "step": 605
+    },
+    {
+      "epoch": 0.13016804481194985,
+      "grad_norm": 3.2127702236175537,
+      "learning_rate": 9.365129500947567e-07,
+      "loss": 0.3399,
+      "step": 610
+    },
+    {
+      "epoch": 0.13123499599893304,
+      "grad_norm": 3.1433818340301514,
+      "learning_rate": 9.35986523478627e-07,
+      "loss": 0.342,
+      "step": 615
+    },
+    {
+      "epoch": 0.13230194718591626,
+      "grad_norm": 3.0913007259368896,
+      "learning_rate": 9.354600968624973e-07,
+      "loss": 0.3543,
+      "step": 620
+    },
+    {
+      "epoch": 0.13336889837289945,
+      "grad_norm": 3.459428310394287,
+      "learning_rate": 9.349336702463676e-07,
+      "loss": 0.292,
+      "step": 625
+    },
+    {
+      "epoch": 0.13443584955988264,
+      "grad_norm": 2.965162754058838,
+      "learning_rate": 9.34407243630238e-07,
+      "loss": 0.3591,
+      "step": 630
+    },
+    {
+      "epoch": 0.13550280074686583,
+      "grad_norm": 2.476099967956543,
+      "learning_rate": 9.338808170141081e-07,
+      "loss": 0.3351,
+      "step": 635
+    },
+    {
+      "epoch": 0.13656975193384902,
+      "grad_norm": 2.853848695755005,
+      "learning_rate": 9.333543903979785e-07,
+      "loss": 0.3828,
+      "step": 640
+    },
+    {
+      "epoch": 0.13763670312083223,
+      "grad_norm": 2.560877561569214,
+      "learning_rate": 9.328279637818488e-07,
+      "loss": 0.3183,
+      "step": 645
+    },
+    {
+      "epoch": 0.13870365430781542,
+      "grad_norm": 2.7191262245178223,
+      "learning_rate": 9.32301537165719e-07,
+      "loss": 0.3572,
+      "step": 650
+    },
+    {
+      "epoch": 0.1397706054947986,
+      "grad_norm": 3.388456106185913,
+      "learning_rate": 9.317751105495893e-07,
+      "loss": 0.3207,
+      "step": 655
+    },
+    {
+      "epoch": 0.1408375566817818,
+      "grad_norm": 2.827470302581787,
+      "learning_rate": 9.312486839334597e-07,
+      "loss": 0.3576,
+      "step": 660
+    },
+    {
+      "epoch": 0.14190450786876502,
+      "grad_norm": 2.630094528198242,
+      "learning_rate": 9.307222573173299e-07,
+      "loss": 0.3616,
+      "step": 665
+    },
+    {
+      "epoch": 0.1429714590557482,
+      "grad_norm": 2.7217891216278076,
+      "learning_rate": 9.301958307012003e-07,
+      "loss": 0.3434,
+      "step": 670
+    },
+    {
+      "epoch": 0.1440384102427314,
+      "grad_norm": 2.558335065841675,
+      "learning_rate": 9.296694040850705e-07,
+      "loss": 0.3516,
+      "step": 675
+    },
+    {
+      "epoch": 0.14510536142971459,
+      "grad_norm": 3.1991679668426514,
+      "learning_rate": 9.291429774689408e-07,
+      "loss": 0.338,
+      "step": 680
+    },
+    {
+      "epoch": 0.14617231261669777,
+      "grad_norm": 4.686666011810303,
+      "learning_rate": 9.286165508528111e-07,
+      "loss": 0.3539,
+      "step": 685
+    },
+    {
+      "epoch": 0.147239263803681,
+      "grad_norm": 3.0826447010040283,
+      "learning_rate": 9.280901242366814e-07,
+      "loss": 0.3313,
+      "step": 690
+    },
+    {
+      "epoch": 0.14830621499066418,
+      "grad_norm": 2.656141757965088,
+      "learning_rate": 9.275636976205516e-07,
+      "loss": 0.3454,
+      "step": 695
+    },
+    {
+      "epoch": 0.14937316617764737,
+      "grad_norm": 4.859818935394287,
+      "learning_rate": 9.27037271004422e-07,
+      "loss": 0.3331,
+      "step": 700
+    },
+    {
+      "epoch": 0.15044011736463056,
+      "grad_norm": 5.376903057098389,
+      "learning_rate": 9.265108443882922e-07,
+      "loss": 0.3377,
+      "step": 705
+    },
+    {
+      "epoch": 0.15150706855161378,
+      "grad_norm": 2.7976577281951904,
+      "learning_rate": 9.259844177721626e-07,
+      "loss": 0.3768,
+      "step": 710
+    },
+    {
+      "epoch": 0.15257401973859697,
+      "grad_norm": 2.993427038192749,
+      "learning_rate": 9.254579911560328e-07,
+      "loss": 0.3509,
+      "step": 715
+    },
+    {
+      "epoch": 0.15364097092558016,
+      "grad_norm": 2.8581597805023193,
+      "learning_rate": 9.249315645399031e-07,
+      "loss": 0.3338,
+      "step": 720
+    },
+    {
+      "epoch": 0.15470792211256335,
+      "grad_norm": 2.8063721656799316,
+      "learning_rate": 9.244051379237734e-07,
+      "loss": 0.3597,
+      "step": 725
+    },
+    {
+      "epoch": 0.15577487329954653,
+      "grad_norm": 2.7333686351776123,
+      "learning_rate": 9.238787113076438e-07,
+      "loss": 0.3563,
+      "step": 730
+    },
+    {
+      "epoch": 0.15684182448652975,
+      "grad_norm": 3.4143598079681396,
+      "learning_rate": 9.233522846915139e-07,
+      "loss": 0.336,
+      "step": 735
+    },
+    {
+      "epoch": 0.15790877567351294,
+      "grad_norm": 2.84298038482666,
+      "learning_rate": 9.228258580753843e-07,
+      "loss": 0.3651,
+      "step": 740
+    },
+    {
+      "epoch": 0.15897572686049613,
+      "grad_norm": 5.912104606628418,
+      "learning_rate": 9.222994314592545e-07,
+      "loss": 0.3531,
+      "step": 745
+    },
+    {
+      "epoch": 0.16004267804747932,
+      "grad_norm": 3.0039525032043457,
+      "learning_rate": 9.217730048431249e-07,
+      "loss": 0.3212,
+      "step": 750
+    },
+    {
+      "epoch": 0.16110962923446254,
+      "grad_norm": 3.2109341621398926,
+      "learning_rate": 9.212465782269951e-07,
+      "loss": 0.3206,
+      "step": 755
+    },
+    {
+      "epoch": 0.16217658042144573,
+      "grad_norm": 2.7238008975982666,
+      "learning_rate": 9.207201516108654e-07,
+      "loss": 0.3696,
+      "step": 760
+    },
+    {
+      "epoch": 0.16324353160842892,
+      "grad_norm": 2.6420886516571045,
+      "learning_rate": 9.201937249947357e-07,
+      "loss": 0.3339,
+      "step": 765
+    },
+    {
+      "epoch": 0.1643104827954121,
+      "grad_norm": 2.8822038173675537,
+      "learning_rate": 9.196672983786061e-07,
+      "loss": 0.3611,
+      "step": 770
+    },
+    {
+      "epoch": 0.1653774339823953,
+      "grad_norm": 2.558979034423828,
+      "learning_rate": 9.191408717624762e-07,
+      "loss": 0.3247,
+      "step": 775
+    },
+    {
+      "epoch": 0.1664443851693785,
+      "grad_norm": 2.649867057800293,
+      "learning_rate": 9.186144451463465e-07,
+      "loss": 0.3146,
+      "step": 780
+    },
+    {
+      "epoch": 0.1675113363563617,
+      "grad_norm": 2.6174542903900146,
+      "learning_rate": 9.180880185302169e-07,
+      "loss": 0.3319,
+      "step": 785
+    },
+    {
+      "epoch": 0.1685782875433449,
+      "grad_norm": 2.65977144241333,
+      "learning_rate": 9.175615919140871e-07,
+      "loss": 0.3643,
+      "step": 790
+    },
+    {
+      "epoch": 0.16964523873032808,
+      "grad_norm": 3.2722222805023193,
+      "learning_rate": 9.170351652979574e-07,
+      "loss": 0.3349,
+      "step": 795
+    },
+    {
+      "epoch": 0.1707121899173113,
+      "grad_norm": 2.346200466156006,
+      "learning_rate": 9.165087386818277e-07,
+      "loss": 0.3028,
+      "step": 800
+    },
+    {
+      "epoch": 0.17177914110429449,
+      "grad_norm": 2.675050735473633,
+      "learning_rate": 9.15982312065698e-07,
+      "loss": 0.3256,
+      "step": 805
+    },
+    {
+      "epoch": 0.17284609229127768,
+      "grad_norm": 2.4576005935668945,
+      "learning_rate": 9.154558854495683e-07,
+      "loss": 0.3127,
+      "step": 810
+    },
+    {
+      "epoch": 0.17391304347826086,
+      "grad_norm": 2.73311710357666,
+      "learning_rate": 9.150347441566645e-07,
+      "loss": 0.3508,
+      "step": 815
+    },
+    {
+      "epoch": 0.17497999466524405,
+      "grad_norm": 2.6573374271392822,
+      "learning_rate": 9.145083175405348e-07,
+      "loss": 0.3643,
+      "step": 820
+    },
+    {
+      "epoch": 0.17604694585222727,
+      "grad_norm": 2.7585701942443848,
+      "learning_rate": 9.139818909244052e-07,
+      "loss": 0.3321,
+      "step": 825
+    },
+    {
+      "epoch": 0.17711389703921046,
+      "grad_norm": 2.8242616653442383,
+      "learning_rate": 9.134554643082753e-07,
+      "loss": 0.3175,
+      "step": 830
+    },
+    {
+      "epoch": 0.17818084822619365,
+      "grad_norm": 3.1244609355926514,
+      "learning_rate": 9.129290376921457e-07,
+      "loss": 0.3641,
+      "step": 835
+    },
+    {
+      "epoch": 0.17924779941317684,
+      "grad_norm": 2.4807212352752686,
+      "learning_rate": 9.12402611076016e-07,
+      "loss": 0.3464,
+      "step": 840
+    },
+    {
+      "epoch": 0.18031475060016006,
+      "grad_norm": 4.587203502655029,
+      "learning_rate": 9.118761844598863e-07,
+      "loss": 0.3199,
+      "step": 845
+    },
+    {
+      "epoch": 0.18138170178714325,
+      "grad_norm": 3.1150426864624023,
+      "learning_rate": 9.113497578437566e-07,
+      "loss": 0.3129,
+      "step": 850
+    },
+    {
+      "epoch": 0.18244865297412644,
+      "grad_norm": 2.80098557472229,
+      "learning_rate": 9.108233312276268e-07,
+      "loss": 0.3296,
+      "step": 855
+    },
+    {
+      "epoch": 0.18351560416110962,
+      "grad_norm": 2.6048800945281982,
+      "learning_rate": 9.102969046114971e-07,
+      "loss": 0.3426,
+      "step": 860
+    },
+    {
+      "epoch": 0.1845825553480928,
+      "grad_norm": 2.4776954650878906,
+      "learning_rate": 9.097704779953675e-07,
+      "loss": 0.3389,
+      "step": 865
+    },
+    {
+      "epoch": 0.18564950653507603,
+      "grad_norm": 3.662856101989746,
+      "learning_rate": 9.092440513792377e-07,
+      "loss": 0.3191,
+      "step": 870
+    },
+    {
+      "epoch": 0.18671645772205922,
+      "grad_norm": 5.855990886688232,
+      "learning_rate": 9.08717624763108e-07,
+      "loss": 0.3491,
+      "step": 875
+    },
+    {
+      "epoch": 0.1877834089090424,
+      "grad_norm": 2.6081950664520264,
+      "learning_rate": 9.081911981469783e-07,
+      "loss": 0.3256,
+      "step": 880
+    },
+    {
+      "epoch": 0.1888503600960256,
+      "grad_norm": 3.206470251083374,
+      "learning_rate": 9.076647715308485e-07,
+      "loss": 0.3419,
+      "step": 885
+    },
+    {
+      "epoch": 0.18991731128300882,
+      "grad_norm": 3.7217066287994385,
+      "learning_rate": 9.071383449147189e-07,
+      "loss": 0.3369,
+      "step": 890
+    },
+    {
+      "epoch": 0.190984262469992,
+      "grad_norm": 4.7247633934021,
+      "learning_rate": 9.066119182985891e-07,
+      "loss": 0.3664,
+      "step": 895
+    },
+    {
+      "epoch": 0.1920512136569752,
+      "grad_norm": 3.455446481704712,
+      "learning_rate": 9.060854916824594e-07,
+      "loss": 0.3171,
+      "step": 900
+    },
+    {
+      "epoch": 0.19311816484395838,
+      "grad_norm": 2.6066224575042725,
+      "learning_rate": 9.055590650663297e-07,
+      "loss": 0.3266,
+      "step": 905
+    },
+    {
+      "epoch": 0.19418511603094157,
+      "grad_norm": 3.2545228004455566,
+      "learning_rate": 9.050326384502e-07,
+      "loss": 0.3201,
+      "step": 910
+    },
+    {
+      "epoch": 0.1952520672179248,
+      "grad_norm": 3.8174829483032227,
+      "learning_rate": 9.045062118340702e-07,
+      "loss": 0.3537,
+      "step": 915
+    },
+    {
+      "epoch": 0.19631901840490798,
+      "grad_norm": 3.725991725921631,
+      "learning_rate": 9.039797852179406e-07,
+      "loss": 0.3373,
+      "step": 920
+    },
+    {
+      "epoch": 0.19738596959189117,
+      "grad_norm": 2.5402047634124756,
+      "learning_rate": 9.034533586018108e-07,
+      "loss": 0.3107,
+      "step": 925
+    },
+    {
+      "epoch": 0.19845292077887436,
+      "grad_norm": 2.3887853622436523,
+      "learning_rate": 9.029269319856812e-07,
+      "loss": 0.3255,
+      "step": 930
+    },
+    {
+      "epoch": 0.19951987196585758,
+      "grad_norm": 2.83178973197937,
+      "learning_rate": 9.024005053695514e-07,
+      "loss": 0.3192,
+      "step": 935
+    },
+    {
+      "epoch": 0.20058682315284077,
+      "grad_norm": 2.540933132171631,
+      "learning_rate": 9.018740787534217e-07,
+      "loss": 0.3414,
+      "step": 940
+    },
+    {
+      "epoch": 0.20165377433982395,
+      "grad_norm": 2.799577474594116,
+      "learning_rate": 9.01347652137292e-07,
+      "loss": 0.3383,
+      "step": 945
+    },
+    {
+      "epoch": 0.20272072552680714,
+      "grad_norm": 2.5945661067962646,
+      "learning_rate": 9.008212255211624e-07,
+      "loss": 0.3414,
+      "step": 950
+    },
+    {
+      "epoch": 0.20378767671379033,
+      "grad_norm": 3.102372407913208,
+      "learning_rate": 9.002947989050325e-07,
+      "loss": 0.33,
+      "step": 955
+    },
+    {
+      "epoch": 0.20485462790077355,
+      "grad_norm": 3.1992921829223633,
+      "learning_rate": 8.997683722889029e-07,
+      "loss": 0.3719,
+      "step": 960
+    },
+    {
+      "epoch": 0.20592157908775674,
+      "grad_norm": 3.082578420639038,
+      "learning_rate": 8.992419456727732e-07,
+      "loss": 0.3412,
+      "step": 965
+    },
+    {
+      "epoch": 0.20698853027473993,
+      "grad_norm": 2.406475067138672,
+      "learning_rate": 8.987155190566435e-07,
+      "loss": 0.3069,
+      "step": 970
+    },
+    {
+      "epoch": 0.20805548146172312,
+      "grad_norm": 3.033055067062378,
+      "learning_rate": 8.981890924405137e-07,
+      "loss": 0.3683,
+      "step": 975
+    },
+    {
+      "epoch": 0.2091224326487063,
+      "grad_norm": 2.807874917984009,
+      "learning_rate": 8.97662665824384e-07,
+      "loss": 0.3024,
+      "step": 980
+    },
+    {
+      "epoch": 0.21018938383568952,
+      "grad_norm": 2.6849870681762695,
+      "learning_rate": 8.971362392082543e-07,
+      "loss": 0.3248,
+      "step": 985
+    },
+    {
+      "epoch": 0.21125633502267271,
+      "grad_norm": 3.144150495529175,
+      "learning_rate": 8.966098125921247e-07,
+      "loss": 0.3158,
+      "step": 990
+    },
+    {
+      "epoch": 0.2123232862096559,
+      "grad_norm": 2.495368003845215,
+      "learning_rate": 8.960833859759948e-07,
+      "loss": 0.312,
+      "step": 995
+    },
+    {
+      "epoch": 0.2133902373966391,
+      "grad_norm": 2.2597124576568604,
+      "learning_rate": 8.955569593598652e-07,
+      "loss": 0.3053,
+      "step": 1000
+    },
+    {
+      "epoch": 0.2133902373966391,
+      "eval_bleu": 0.6173395426659279,
+      "eval_cer": 5.420346953040605,
+      "eval_loss": 0.3327235281467438,
+      "eval_runtime": 1022.0953,
+      "eval_samples_per_second": 0.898,
+      "eval_steps_per_second": 0.225,
+      "eval_wer": 20.46380804775845,
+      "step": 1000
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 9500,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.358829453312e+20,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:20adc179b6f798df62ecd414b871514d9da075bfe8a22514841288c4b0d3adc4
+size 5841