End of training

Browse files

Files changed (6) hide show

README.md +15 -2
all_results.json +13 -13
eval_results.json +7 -7
runs/Aug09_05-15-33_30e2a19b1988/events.out.tfevents.1754729893.30e2a19b1988.3712.1 +3 -0
train_results.json +7 -7
trainer_state.json +1212 -345

README.md CHANGED Viewed

@@ -4,11 +4,24 @@ license: apache-2.0
 base_model: openai/whisper-medium
 tags:
 - generated_from_trainer
 metrics:
 - wer
 model-index:
 - name: whisper-medium-ph
-  results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -16,7 +29,7 @@ should probably proofread and complete it, then remove this comment. -->
 # whisper-medium-ph
-This model is a fine-tuned version of [openai/whisper-medium](https://huggingface.co/openai/whisper-medium) on an unknown dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.2901
 - Wer: 0.1147

 base_model: openai/whisper-medium
 tags:
 - generated_from_trainer
+datasets:
+- rbcurzon/ph_dialect_asr
 metrics:
 - wer
 model-index:
 - name: whisper-medium-ph
+  results:
+  - task:
+      name: Automatic Speech Recognition
+      type: automatic-speech-recognition
+    dataset:
+      name: rbcurzon/ph_dialect_asr all
+      type: rbcurzon/ph_dialect_asr
+      args: all
+    metrics:
+    - name: Wer
+      type: wer
+      value: 0.1146545827633379
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 # whisper-medium-ph
+This model is a fine-tuned version of [openai/whisper-medium](https://huggingface.co/openai/whisper-medium) on the rbcurzon/ph_dialect_asr all dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.2901
 - Wer: 0.1147

all_results.json CHANGED Viewed

@@ -1,15 +1,15 @@
 {
-    "epoch": 2.466091245376079,
-    "eval_loss": 0.3112793266773224,
-    "eval_runtime": 1173.9261,
-    "eval_samples": 3612,
-    "eval_samples_per_second": 3.077,
-    "eval_steps_per_second": 0.193,
-    "eval_wer": 0.12829864835872132,
-    "total_flos": 3.265323341119488e+19,
-    "train_loss": 0.2021937195956707,
-    "train_runtime": 9527.7148,
-    "train_samples": 12973,
-    "train_samples_per_second": 3.359,
-    "train_steps_per_second": 0.21
 }

 {
+    "epoch": 7.407709414381023,
+    "eval_loss": 0.29011788964271545,
+    "eval_runtime": 736.3871,
+    "eval_samples": 2885,
+    "eval_samples_per_second": 3.918,
+    "eval_steps_per_second": 0.49,
+    "eval_wer": 0.1146545827633379,
+    "total_flos": 8.155551755501568e+19,
+    "train_loss": 0.10907779041565954,
+    "train_runtime": 12394.4337,
+    "train_samples": 10787,
+    "train_samples_per_second": 6.455,
+    "train_steps_per_second": 0.403
 }

eval_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-    "epoch": 2.466091245376079,
-    "eval_loss": 0.3112793266773224,
-    "eval_runtime": 1173.9261,
-    "eval_samples": 3612,
-    "eval_samples_per_second": 3.077,
-    "eval_steps_per_second": 0.193,
-    "eval_wer": 0.12829864835872132
 }

 {
+    "epoch": 7.407709414381023,
+    "eval_loss": 0.29011788964271545,
+    "eval_runtime": 736.3871,
+    "eval_samples": 2885,
+    "eval_samples_per_second": 3.918,
+    "eval_steps_per_second": 0.49,
+    "eval_wer": 0.1146545827633379
 }

runs/Aug09_05-15-33_30e2a19b1988/events.out.tfevents.1754729893.30e2a19b1988.3712.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4fc589040fcd11263525855db0ac5e3ae941083ed659ce852614b4c40555680d
+size 406

train_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-    "epoch": 2.466091245376079,
-    "total_flos": 3.265323341119488e+19,
-    "train_loss": 0.2021937195956707,
-    "train_runtime": 9527.7148,
-    "train_samples": 12973,
-    "train_samples_per_second": 3.359,
-    "train_steps_per_second": 0.21
 }

 {
+    "epoch": 7.407709414381023,
+    "total_flos": 8.155551755501568e+19,
+    "train_loss": 0.10907779041565954,
+    "train_runtime": 12394.4337,
+    "train_samples": 10787,
+    "train_samples_per_second": 6.455,
+    "train_steps_per_second": 0.403
 }

trainer_state.json CHANGED Viewed

@@ -2,605 +2,1472 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.466091245376079,
   "eval_steps": 1000,
-  "global_step": 2000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.030826140567200986,
-      "grad_norm": 11.499724388122559,
-      "learning_rate": 4.4e-07,
-      "loss": 1.2454,
       "step": 25
     },
     {
-      "epoch": 0.06165228113440197,
-      "grad_norm": 8.96716594696045,
-      "learning_rate": 9.400000000000001e-07,
-      "loss": 1.0189,
       "step": 50
     },
     {
-      "epoch": 0.09247842170160296,
-      "grad_norm": 7.870485305786133,
-      "learning_rate": 1.44e-06,
-      "loss": 0.8525,
       "step": 75
     },
     {
-      "epoch": 0.12330456226880394,
-      "grad_norm": 6.292770862579346,
-      "learning_rate": 1.94e-06,
-      "loss": 0.6987,
       "step": 100
     },
     {
-      "epoch": 0.15413070283600494,
-      "grad_norm": 6.656809329986572,
-      "learning_rate": 2.4400000000000004e-06,
-      "loss": 0.6285,
       "step": 125
     },
     {
-      "epoch": 0.18495684340320592,
-      "grad_norm": 6.315510272979736,
-      "learning_rate": 2.9400000000000002e-06,
-      "loss": 0.6055,
       "step": 150
     },
     {
-      "epoch": 0.2157829839704069,
-      "grad_norm": 7.333935737609863,
-      "learning_rate": 3.44e-06,
-      "loss": 0.5517,
       "step": 175
     },
     {
-      "epoch": 0.2466091245376079,
-      "grad_norm": 7.557521820068359,
-      "learning_rate": 3.94e-06,
-      "loss": 0.4966,
       "step": 200
     },
     {
-      "epoch": 0.27743526510480887,
-      "grad_norm": 5.930201053619385,
-      "learning_rate": 4.440000000000001e-06,
-      "loss": 0.4821,
       "step": 225
     },
     {
-      "epoch": 0.3082614056720099,
-      "grad_norm": 6.367243766784668,
-      "learning_rate": 4.94e-06,
-      "loss": 0.4661,
       "step": 250
     },
     {
-      "epoch": 0.33908754623921084,
-      "grad_norm": 6.376411437988281,
-      "learning_rate": 5.4400000000000004e-06,
-      "loss": 0.4417,
       "step": 275
     },
     {
-      "epoch": 0.36991368680641185,
-      "grad_norm": 5.1631574630737305,
-      "learning_rate": 5.94e-06,
-      "loss": 0.4297,
       "step": 300
     },
     {
-      "epoch": 0.4007398273736128,
-      "grad_norm": 5.0071330070495605,
-      "learning_rate": 6.440000000000001e-06,
-      "loss": 0.3914,
       "step": 325
     },
     {
-      "epoch": 0.4315659679408138,
-      "grad_norm": 4.759220123291016,
-      "learning_rate": 6.9400000000000005e-06,
-      "loss": 0.3548,
       "step": 350
     },
     {
-      "epoch": 0.4623921085080148,
-      "grad_norm": 5.040701389312744,
-      "learning_rate": 7.440000000000001e-06,
-      "loss": 0.3546,
       "step": 375
     },
     {
-      "epoch": 0.4932182490752158,
-      "grad_norm": 5.04915189743042,
-      "learning_rate": 7.94e-06,
-      "loss": 0.3388,
       "step": 400
     },
     {
-      "epoch": 0.5240443896424167,
-      "grad_norm": 5.2604546546936035,
-      "learning_rate": 8.44e-06,
-      "loss": 0.3048,
       "step": 425
     },
     {
-      "epoch": 0.5548705302096177,
-      "grad_norm": 5.409047603607178,
-      "learning_rate": 8.94e-06,
-      "loss": 0.3204,
       "step": 450
     },
     {
-      "epoch": 0.5856966707768188,
-      "grad_norm": 7.095681190490723,
-      "learning_rate": 9.440000000000001e-06,
-      "loss": 0.2962,
       "step": 475
     },
     {
-      "epoch": 0.6165228113440198,
-      "grad_norm": 5.518855571746826,
-      "learning_rate": 9.940000000000001e-06,
-      "loss": 0.2854,
       "step": 500
     },
     {
-      "epoch": 0.6473489519112207,
-      "grad_norm": 5.083781719207764,
-      "learning_rate": 9.853333333333334e-06,
-      "loss": 0.2816,
       "step": 525
     },
     {
-      "epoch": 0.6781750924784217,
-      "grad_norm": 4.599233150482178,
-      "learning_rate": 9.686666666666668e-06,
-      "loss": 0.2804,
       "step": 550
     },
     {
-      "epoch": 0.7090012330456227,
-      "grad_norm": 5.882887840270996,
-      "learning_rate": 9.52e-06,
-      "loss": 0.254,
       "step": 575
     },
     {
-      "epoch": 0.7398273736128237,
-      "grad_norm": 3.953178644180298,
-      "learning_rate": 9.353333333333334e-06,
-      "loss": 0.2522,
       "step": 600
     },
     {
-      "epoch": 0.7706535141800247,
-      "grad_norm": 3.6629250049591064,
-      "learning_rate": 9.186666666666666e-06,
-      "loss": 0.2217,
       "step": 625
     },
     {
-      "epoch": 0.8014796547472256,
-      "grad_norm": 5.168231010437012,
-      "learning_rate": 9.020000000000002e-06,
-      "loss": 0.2451,
       "step": 650
     },
     {
-      "epoch": 0.8323057953144266,
-      "grad_norm": 4.211165904998779,
-      "learning_rate": 8.853333333333334e-06,
-      "loss": 0.216,
       "step": 675
     },
     {
-      "epoch": 0.8631319358816276,
-      "grad_norm": 4.873836994171143,
-      "learning_rate": 8.686666666666668e-06,
-      "loss": 0.2027,
       "step": 700
     },
     {
-      "epoch": 0.8939580764488286,
-      "grad_norm": 4.061721324920654,
-      "learning_rate": 8.52e-06,
-      "loss": 0.2184,
       "step": 725
     },
     {
-      "epoch": 0.9247842170160296,
-      "grad_norm": 5.536831855773926,
-      "learning_rate": 8.353333333333335e-06,
-      "loss": 0.2212,
       "step": 750
     },
     {
-      "epoch": 0.9556103575832305,
-      "grad_norm": 3.8625235557556152,
-      "learning_rate": 8.186666666666667e-06,
-      "loss": 0.1874,
       "step": 775
     },
     {
-      "epoch": 0.9864364981504316,
-      "grad_norm": 4.030850887298584,
-      "learning_rate": 8.020000000000001e-06,
-      "loss": 0.1962,
       "step": 800
     },
     {
-      "epoch": 1.0172626387176325,
-      "grad_norm": 3.3085479736328125,
-      "learning_rate": 7.853333333333333e-06,
-      "loss": 0.1508,
       "step": 825
     },
     {
-      "epoch": 1.0480887792848335,
-      "grad_norm": 3.4279379844665527,
-      "learning_rate": 7.686666666666667e-06,
-      "loss": 0.0925,
       "step": 850
     },
     {
-      "epoch": 1.0789149198520345,
-      "grad_norm": 3.8404757976531982,
-      "learning_rate": 7.520000000000001e-06,
-      "loss": 0.1182,
       "step": 875
     },
     {
-      "epoch": 1.1097410604192355,
-      "grad_norm": 3.7630670070648193,
-      "learning_rate": 7.353333333333334e-06,
-      "loss": 0.1136,
       "step": 900
     },
     {
-      "epoch": 1.1405672009864365,
-      "grad_norm": 3.491826295852661,
-      "learning_rate": 7.186666666666668e-06,
-      "loss": 0.1069,
       "step": 925
     },
     {
-      "epoch": 1.1713933415536375,
-      "grad_norm": 2.2027463912963867,
-      "learning_rate": 7.0200000000000006e-06,
-      "loss": 0.1048,
       "step": 950
     },
     {
-      "epoch": 1.2022194821208385,
-      "grad_norm": 4.981961250305176,
-      "learning_rate": 6.853333333333334e-06,
-      "loss": 0.1141,
       "step": 975
     },
     {
-      "epoch": 1.2330456226880395,
-      "grad_norm": 3.4486851692199707,
-      "learning_rate": 6.6866666666666665e-06,
-      "loss": 0.1001,
       "step": 1000
     },
     {
-      "epoch": 1.2330456226880395,
-      "eval_loss": 0.30403050780296326,
-      "eval_runtime": 1181.2571,
-      "eval_samples_per_second": 3.058,
-      "eval_steps_per_second": 0.191,
-      "eval_wer": 0.14332880402393383,
       "step": 1000
     },
     {
-      "epoch": 1.2638717632552403,
-      "grad_norm": 2.620025396347046,
-      "learning_rate": 6.520000000000001e-06,
-      "loss": 0.0916,
       "step": 1025
     },
     {
-      "epoch": 1.2946979038224415,
-      "grad_norm": 3.629256248474121,
-      "learning_rate": 6.353333333333333e-06,
-      "loss": 0.0949,
       "step": 1050
     },
     {
-      "epoch": 1.3255240443896423,
-      "grad_norm": 2.832113265991211,
-      "learning_rate": 6.186666666666668e-06,
-      "loss": 0.084,
       "step": 1075
     },
     {
-      "epoch": 1.3563501849568433,
-      "grad_norm": 3.5080323219299316,
-      "learning_rate": 6.02e-06,
-      "loss": 0.0881,
       "step": 1100
     },
     {
-      "epoch": 1.3871763255240444,
-      "grad_norm": 3.9893321990966797,
-      "learning_rate": 5.853333333333335e-06,
-      "loss": 0.0781,
       "step": 1125
     },
     {
-      "epoch": 1.4180024660912454,
-      "grad_norm": 2.786031723022461,
-      "learning_rate": 5.686666666666667e-06,
-      "loss": 0.0852,
       "step": 1150
     },
     {
-      "epoch": 1.4488286066584464,
-      "grad_norm": 2.333205461502075,
-      "learning_rate": 5.5200000000000005e-06,
-      "loss": 0.0759,
       "step": 1175
     },
     {
-      "epoch": 1.4796547472256474,
-      "grad_norm": 3.204261302947998,
-      "learning_rate": 5.3533333333333335e-06,
-      "loss": 0.0865,
       "step": 1200
     },
     {
-      "epoch": 1.5104808877928484,
-      "grad_norm": 3.2963826656341553,
-      "learning_rate": 5.186666666666667e-06,
-      "loss": 0.0757,
       "step": 1225
     },
     {
-      "epoch": 1.5413070283600492,
-      "grad_norm": 2.5825254917144775,
-      "learning_rate": 5.02e-06,
-      "loss": 0.0717,
       "step": 1250
     },
     {
-      "epoch": 1.5721331689272504,
-      "grad_norm": 2.7192881107330322,
-      "learning_rate": 4.853333333333334e-06,
-      "loss": 0.0722,
       "step": 1275
     },
     {
-      "epoch": 1.6029593094944512,
-      "grad_norm": 2.911716938018799,
-      "learning_rate": 4.686666666666667e-06,
-      "loss": 0.0757,
       "step": 1300
     },
     {
-      "epoch": 1.6337854500616524,
-      "grad_norm": 2.1598548889160156,
-      "learning_rate": 4.520000000000001e-06,
-      "loss": 0.0704,
       "step": 1325
     },
     {
-      "epoch": 1.6646115906288532,
-      "grad_norm": 2.1722934246063232,
-      "learning_rate": 4.353333333333334e-06,
-      "loss": 0.0621,
       "step": 1350
     },
     {
-      "epoch": 1.6954377311960542,
-      "grad_norm": 2.0885589122772217,
-      "learning_rate": 4.1866666666666675e-06,
-      "loss": 0.0737,
       "step": 1375
     },
     {
-      "epoch": 1.7262638717632552,
-      "grad_norm": 3.2038285732269287,
-      "learning_rate": 4.0200000000000005e-06,
-      "loss": 0.0729,
       "step": 1400
     },
     {
-      "epoch": 1.7570900123304563,
-      "grad_norm": 2.2708075046539307,
-      "learning_rate": 3.853333333333334e-06,
-      "loss": 0.0571,
       "step": 1425
     },
     {
-      "epoch": 1.7879161528976573,
-      "grad_norm": 1.496151089668274,
-      "learning_rate": 3.686666666666667e-06,
-      "loss": 0.0532,
       "step": 1450
     },
     {
-      "epoch": 1.818742293464858,
-      "grad_norm": 1.8642607927322388,
-      "learning_rate": 3.52e-06,
-      "loss": 0.0544,
       "step": 1475
     },
     {
-      "epoch": 1.8495684340320593,
-      "grad_norm": 2.1302435398101807,
-      "learning_rate": 3.3533333333333336e-06,
-      "loss": 0.0425,
       "step": 1500
     },
     {
-      "epoch": 1.88039457459926,
-      "grad_norm": 1.2720330953598022,
-      "learning_rate": 3.186666666666667e-06,
-      "loss": 0.0486,
       "step": 1525
     },
     {
-      "epoch": 1.9112207151664613,
-      "grad_norm": 1.3626000881195068,
-      "learning_rate": 3.0200000000000003e-06,
-      "loss": 0.0482,
       "step": 1550
     },
     {
-      "epoch": 1.942046855733662,
-      "grad_norm": 2.652956485748291,
-      "learning_rate": 2.8533333333333337e-06,
-      "loss": 0.047,
       "step": 1575
     },
     {
-      "epoch": 1.972872996300863,
-      "grad_norm": 2.178326368331909,
-      "learning_rate": 2.686666666666667e-06,
-      "loss": 0.0543,
       "step": 1600
     },
     {
-      "epoch": 2.003699136868064,
-      "grad_norm": 0.6113713979721069,
-      "learning_rate": 2.52e-06,
-      "loss": 0.0416,
       "step": 1625
     },
     {
-      "epoch": 2.034525277435265,
-      "grad_norm": 1.6302359104156494,
-      "learning_rate": 2.3533333333333334e-06,
-      "loss": 0.0167,
       "step": 1650
     },
     {
-      "epoch": 2.065351418002466,
-      "grad_norm": 0.9459154605865479,
-      "learning_rate": 2.1866666666666668e-06,
-      "loss": 0.0137,
       "step": 1675
     },
     {
-      "epoch": 2.096177558569667,
-      "grad_norm": 1.4943691492080688,
-      "learning_rate": 2.02e-06,
-      "loss": 0.0159,
       "step": 1700
     },
     {
-      "epoch": 2.127003699136868,
-      "grad_norm": 0.5425832867622375,
-      "learning_rate": 1.8533333333333333e-06,
-      "loss": 0.0152,
       "step": 1725
     },
     {
-      "epoch": 2.157829839704069,
-      "grad_norm": 1.4946790933609009,
-      "learning_rate": 1.6866666666666667e-06,
-      "loss": 0.0146,
       "step": 1750
     },
     {
-      "epoch": 2.18865598027127,
-      "grad_norm": 0.9100169539451599,
-      "learning_rate": 1.52e-06,
-      "loss": 0.0191,
       "step": 1775
     },
     {
-      "epoch": 2.219482120838471,
-      "grad_norm": 1.2448313236236572,
-      "learning_rate": 1.3533333333333334e-06,
-      "loss": 0.0159,
       "step": 1800
     },
     {
-      "epoch": 2.250308261405672,
-      "grad_norm": 0.9976411461830139,
-      "learning_rate": 1.1866666666666668e-06,
-      "loss": 0.0203,
       "step": 1825
     },
     {
-      "epoch": 2.281134401972873,
-      "grad_norm": 1.358780860900879,
-      "learning_rate": 1.02e-06,
-      "loss": 0.0139,
       "step": 1850
     },
     {
-      "epoch": 2.311960542540074,
-      "grad_norm": 1.2800226211547852,
-      "learning_rate": 8.533333333333334e-07,
-      "loss": 0.0175,
       "step": 1875
     },
     {
-      "epoch": 2.342786683107275,
-      "grad_norm": 1.007161021232605,
-      "learning_rate": 6.866666666666667e-07,
-      "loss": 0.013,
       "step": 1900
     },
     {
-      "epoch": 2.373612823674476,
-      "grad_norm": 1.1838051080703735,
-      "learning_rate": 5.2e-07,
-      "loss": 0.0144,
       "step": 1925
     },
     {
-      "epoch": 2.404438964241677,
-      "grad_norm": 1.2872673273086548,
-      "learning_rate": 3.533333333333334e-07,
-      "loss": 0.0134,
       "step": 1950
     },
     {
-      "epoch": 2.435265104808878,
-      "grad_norm": 0.7094443440437317,
-      "learning_rate": 1.866666666666667e-07,
-      "loss": 0.0123,
       "step": 1975
     },
     {
-      "epoch": 2.466091245376079,
-      "grad_norm": 0.6137486696243286,
-      "learning_rate": 2e-08,
-      "loss": 0.0125,
       "step": 2000
     },
     {
-      "epoch": 2.466091245376079,
-      "eval_loss": 0.3112793266773224,
-      "eval_runtime": 1182.7026,
-      "eval_samples_per_second": 3.054,
-      "eval_steps_per_second": 0.191,
-      "eval_wer": 0.12829864835872132,
       "step": 2000
     },
     {
-      "epoch": 2.466091245376079,
-      "step": 2000,
-      "total_flos": 3.265323341119488e+19,
-      "train_loss": 0.2021937195956707,
-      "train_runtime": 9527.7148,
-      "train_samples_per_second": 3.359,
-      "train_steps_per_second": 0.21
     }
   ],
   "logging_steps": 25,
-  "max_steps": 2000,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 3,
   "save_steps": 1000,
   "stateful_callbacks": {
     "TrainerControl": {
@@ -614,8 +1481,8 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.265323341119488e+19,
-  "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null
 }

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 7.407709414381023,
   "eval_steps": 1000,
+  "global_step": 5000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.037064492216456635,
+      "grad_norm": 14.615763664245605,
+      "learning_rate": 4.800000000000001e-07,
+      "loss": 1.4607,
       "step": 25
     },
     {
+      "epoch": 0.07412898443291327,
+      "grad_norm": 10.59756851196289,
+      "learning_rate": 9.800000000000001e-07,
+      "loss": 1.176,
       "step": 50
     },
     {
+      "epoch": 0.1111934766493699,
+      "grad_norm": 7.142136573791504,
+      "learning_rate": 1.48e-06,
+      "loss": 0.9267,
       "step": 75
     },
     {
+      "epoch": 0.14825796886582654,
+      "grad_norm": 9.191902160644531,
+      "learning_rate": 1.98e-06,
+      "loss": 0.7253,
       "step": 100
     },
     {
+      "epoch": 0.18532246108228317,
+      "grad_norm": 10.320201873779297,
+      "learning_rate": 2.4800000000000004e-06,
+      "loss": 0.7047,
       "step": 125
     },
     {
+      "epoch": 0.2223869532987398,
+      "grad_norm": 8.486912727355957,
+      "learning_rate": 2.9800000000000003e-06,
+      "loss": 0.6634,
       "step": 150
     },
     {
+      "epoch": 0.25945144551519644,
+      "grad_norm": 9.802300453186035,
+      "learning_rate": 3.48e-06,
+      "loss": 0.5786,
       "step": 175
     },
     {
+      "epoch": 0.2965159377316531,
+      "grad_norm": 9.568249702453613,
+      "learning_rate": 3.980000000000001e-06,
+      "loss": 0.5857,
       "step": 200
     },
     {
+      "epoch": 0.3335804299481097,
+      "grad_norm": 7.968526840209961,
+      "learning_rate": 4.48e-06,
+      "loss": 0.5385,
       "step": 225
     },
     {
+      "epoch": 0.37064492216456635,
+      "grad_norm": 7.507795810699463,
+      "learning_rate": 4.980000000000001e-06,
+      "loss": 0.5151,
       "step": 250
     },
     {
+      "epoch": 0.407709414381023,
+      "grad_norm": 6.258375644683838,
+      "learning_rate": 5.480000000000001e-06,
+      "loss": 0.4649,
       "step": 275
     },
     {
+      "epoch": 0.4447739065974796,
+      "grad_norm": 9.89697551727295,
+      "learning_rate": 5.98e-06,
+      "loss": 0.4209,
       "step": 300
     },
     {
+      "epoch": 0.48183839881393625,
+      "grad_norm": 7.8507490158081055,
+      "learning_rate": 6.480000000000001e-06,
+      "loss": 0.4459,
       "step": 325
     },
     {
+      "epoch": 0.5189028910303929,
+      "grad_norm": 5.835811138153076,
+      "learning_rate": 6.98e-06,
+      "loss": 0.4141,
       "step": 350
     },
     {
+      "epoch": 0.5559673832468495,
+      "grad_norm": 6.767547607421875,
+      "learning_rate": 7.48e-06,
+      "loss": 0.4108,
       "step": 375
     },
     {
+      "epoch": 0.5930318754633062,
+      "grad_norm": 5.9475884437561035,
+      "learning_rate": 7.980000000000002e-06,
+      "loss": 0.41,
       "step": 400
     },
     {
+      "epoch": 0.6300963676797627,
+      "grad_norm": 7.767906188964844,
+      "learning_rate": 8.48e-06,
+      "loss": 0.3781,
       "step": 425
     },
     {
+      "epoch": 0.6671608598962194,
+      "grad_norm": 6.990137100219727,
+      "learning_rate": 8.98e-06,
+      "loss": 0.39,
       "step": 450
     },
     {
+      "epoch": 0.704225352112676,
+      "grad_norm": 5.607441425323486,
+      "learning_rate": 9.48e-06,
+      "loss": 0.3783,
       "step": 475
     },
     {
+      "epoch": 0.7412898443291327,
+      "grad_norm": 6.288857936859131,
+      "learning_rate": 9.980000000000001e-06,
+      "loss": 0.3559,
       "step": 500
     },
     {
+      "epoch": 0.7783543365455893,
+      "grad_norm": 6.985698699951172,
+      "learning_rate": 9.946666666666667e-06,
+      "loss": 0.3595,
       "step": 525
     },
     {
+      "epoch": 0.815418828762046,
+      "grad_norm": 6.037854194641113,
+      "learning_rate": 9.891111111111113e-06,
+      "loss": 0.3163,
       "step": 550
     },
     {
+      "epoch": 0.8524833209785025,
+      "grad_norm": 5.8710784912109375,
+      "learning_rate": 9.835555555555556e-06,
+      "loss": 0.3502,
       "step": 575
     },
     {
+      "epoch": 0.8895478131949592,
+      "grad_norm": 6.342834949493408,
+      "learning_rate": 9.780000000000001e-06,
+      "loss": 0.317,
       "step": 600
     },
     {
+      "epoch": 0.9266123054114158,
+      "grad_norm": 5.589534759521484,
+      "learning_rate": 9.724444444444445e-06,
+      "loss": 0.3228,
       "step": 625
     },
     {
+      "epoch": 0.9636767976278725,
+      "grad_norm": 7.743918418884277,
+      "learning_rate": 9.66888888888889e-06,
+      "loss": 0.3144,
       "step": 650
     },
     {
+      "epoch": 1.0,
+      "grad_norm": 10.073568344116211,
+      "learning_rate": 9.613333333333335e-06,
+      "loss": 0.2939,
       "step": 675
     },
     {
+      "epoch": 1.0370644922164567,
+      "grad_norm": 4.640520095825195,
+      "learning_rate": 9.557777777777777e-06,
+      "loss": 0.1939,
       "step": 700
     },
     {
+      "epoch": 1.0741289844329134,
+      "grad_norm": 3.2049508094787598,
+      "learning_rate": 9.502222222222223e-06,
+      "loss": 0.1929,
       "step": 725
     },
     {
+      "epoch": 1.1111934766493698,
+      "grad_norm": 3.9065611362457275,
+      "learning_rate": 9.446666666666667e-06,
+      "loss": 0.1998,
       "step": 750
     },
     {
+      "epoch": 1.1482579688658265,
+      "grad_norm": 3.7471649646759033,
+      "learning_rate": 9.391111111111111e-06,
+      "loss": 0.2007,
       "step": 775
     },
     {
+      "epoch": 1.1853224610822832,
+      "grad_norm": 3.952751874923706,
+      "learning_rate": 9.335555555555557e-06,
+      "loss": 0.1863,
       "step": 800
     },
     {
+      "epoch": 1.2223869532987397,
+      "grad_norm": 5.39549446105957,
+      "learning_rate": 9.280000000000001e-06,
+      "loss": 0.1953,
       "step": 825
     },
     {
+      "epoch": 1.2594514455151964,
+      "grad_norm": 4.03216552734375,
+      "learning_rate": 9.224444444444445e-06,
+      "loss": 0.2065,
       "step": 850
     },
     {
+      "epoch": 1.296515937731653,
+      "grad_norm": 3.854651689529419,
+      "learning_rate": 9.168888888888889e-06,
+      "loss": 0.1703,
       "step": 875
     },
     {
+      "epoch": 1.3335804299481098,
+      "grad_norm": 4.835360050201416,
+      "learning_rate": 9.113333333333335e-06,
+      "loss": 0.1692,
       "step": 900
     },
     {
+      "epoch": 1.3706449221645665,
+      "grad_norm": 5.247130393981934,
+      "learning_rate": 9.057777777777779e-06,
+      "loss": 0.1982,
       "step": 925
     },
     {
+      "epoch": 1.407709414381023,
+      "grad_norm": 3.9537737369537354,
+      "learning_rate": 9.002222222222223e-06,
+      "loss": 0.1661,
       "step": 950
     },
     {
+      "epoch": 1.4447739065974796,
+      "grad_norm": 4.887810230255127,
+      "learning_rate": 8.946666666666669e-06,
+      "loss": 0.1836,
       "step": 975
     },
     {
+      "epoch": 1.4818383988139363,
+      "grad_norm": 3.6338751316070557,
+      "learning_rate": 8.891111111111111e-06,
+      "loss": 0.1822,
       "step": 1000
     },
     {
+      "epoch": 1.4818383988139363,
+      "eval_loss": 0.2655850648880005,
+      "eval_runtime": 730.9503,
+      "eval_samples_per_second": 3.947,
+      "eval_steps_per_second": 0.494,
+      "eval_wer": 0.14449384404924762,
       "step": 1000
     },
     {
+      "epoch": 1.5189028910303928,
+      "grad_norm": 4.078255653381348,
+      "learning_rate": 8.835555555555557e-06,
+      "loss": 0.1661,
       "step": 1025
     },
     {
+      "epoch": 1.5559673832468495,
+      "grad_norm": 3.9311952590942383,
+      "learning_rate": 8.78e-06,
+      "loss": 0.1725,
       "step": 1050
     },
     {
+      "epoch": 1.5930318754633062,
+      "grad_norm": 4.800196170806885,
+      "learning_rate": 8.724444444444445e-06,
+      "loss": 0.1704,
       "step": 1075
     },
     {
+      "epoch": 1.6300963676797626,
+      "grad_norm": 4.550530910491943,
+      "learning_rate": 8.66888888888889e-06,
+      "loss": 0.1793,
       "step": 1100
     },
     {
+      "epoch": 1.6671608598962195,
+      "grad_norm": 6.508624076843262,
+      "learning_rate": 8.613333333333333e-06,
+      "loss": 0.1619,
       "step": 1125
     },
     {
+      "epoch": 1.704225352112676,
+      "grad_norm": 4.16792106628418,
+      "learning_rate": 8.557777777777778e-06,
+      "loss": 0.1652,
       "step": 1150
     },
     {
+      "epoch": 1.7412898443291327,
+      "grad_norm": 4.420657157897949,
+      "learning_rate": 8.502222222222223e-06,
+      "loss": 0.16,
       "step": 1175
     },
     {
+      "epoch": 1.7783543365455894,
+      "grad_norm": 4.781569004058838,
+      "learning_rate": 8.446666666666668e-06,
+      "loss": 0.1695,
       "step": 1200
     },
     {
+      "epoch": 1.8154188287620459,
+      "grad_norm": 3.877307176589966,
+      "learning_rate": 8.391111111111112e-06,
+      "loss": 0.1529,
       "step": 1225
     },
     {
+      "epoch": 1.8524833209785025,
+      "grad_norm": 4.159163475036621,
+      "learning_rate": 8.335555555555556e-06,
+      "loss": 0.1619,
       "step": 1250
     },
     {
+      "epoch": 1.8895478131949592,
+      "grad_norm": 3.6631579399108887,
+      "learning_rate": 8.28e-06,
+      "loss": 0.1654,
       "step": 1275
     },
     {
+      "epoch": 1.9266123054114157,
+      "grad_norm": 4.1784210205078125,
+      "learning_rate": 8.224444444444444e-06,
+      "loss": 0.1494,
       "step": 1300
     },
     {
+      "epoch": 1.9636767976278726,
+      "grad_norm": 5.867852210998535,
+      "learning_rate": 8.16888888888889e-06,
+      "loss": 0.1443,
       "step": 1325
     },
     {
+      "epoch": 2.0,
+      "grad_norm": 5.817214012145996,
+      "learning_rate": 8.113333333333334e-06,
+      "loss": 0.139,
       "step": 1350
     },
     {
+      "epoch": 2.0370644922164565,
+      "grad_norm": 2.3572022914886475,
+      "learning_rate": 8.057777777777778e-06,
+      "loss": 0.0614,
       "step": 1375
     },
     {
+      "epoch": 2.0741289844329134,
+      "grad_norm": 2.2769412994384766,
+      "learning_rate": 8.002222222222222e-06,
+      "loss": 0.0606,
       "step": 1400
     },
     {
+      "epoch": 2.11119347664937,
+      "grad_norm": 2.474583864212036,
+      "learning_rate": 7.946666666666666e-06,
+      "loss": 0.0716,
       "step": 1425
     },
     {
+      "epoch": 2.1482579688658268,
+      "grad_norm": 2.5783841609954834,
+      "learning_rate": 7.891111111111112e-06,
+      "loss": 0.065,
       "step": 1450
     },
     {
+      "epoch": 2.1853224610822832,
+      "grad_norm": 1.6132420301437378,
+      "learning_rate": 7.835555555555556e-06,
+      "loss": 0.067,
       "step": 1475
     },
     {
+      "epoch": 2.2223869532987397,
+      "grad_norm": 3.8042001724243164,
+      "learning_rate": 7.78e-06,
+      "loss": 0.0724,
       "step": 1500
     },
     {
+      "epoch": 2.2594514455151966,
+      "grad_norm": 2.2419843673706055,
+      "learning_rate": 7.724444444444446e-06,
+      "loss": 0.0761,
       "step": 1525
     },
     {
+      "epoch": 2.296515937731653,
+      "grad_norm": 2.706354856491089,
+      "learning_rate": 7.66888888888889e-06,
+      "loss": 0.0659,
       "step": 1550
     },
     {
+      "epoch": 2.3335804299481095,
+      "grad_norm": 2.8394265174865723,
+      "learning_rate": 7.613333333333334e-06,
+      "loss": 0.0688,
       "step": 1575
     },
     {
+      "epoch": 2.3706449221645665,
+      "grad_norm": 2.383784770965576,
+      "learning_rate": 7.557777777777779e-06,
+      "loss": 0.0729,
       "step": 1600
     },
     {
+      "epoch": 2.407709414381023,
+      "grad_norm": 3.0959832668304443,
+      "learning_rate": 7.502222222222223e-06,
+      "loss": 0.0626,
       "step": 1625
     },
     {
+      "epoch": 2.4447739065974794,
+      "grad_norm": 2.927393913269043,
+      "learning_rate": 7.446666666666668e-06,
+      "loss": 0.0677,
       "step": 1650
     },
     {
+      "epoch": 2.4818383988139363,
+      "grad_norm": 2.644434928894043,
+      "learning_rate": 7.3911111111111125e-06,
+      "loss": 0.0644,
       "step": 1675
     },
     {
+      "epoch": 2.5189028910303928,
+      "grad_norm": 2.9071755409240723,
+      "learning_rate": 7.335555555555556e-06,
+      "loss": 0.061,
       "step": 1700
     },
     {
+      "epoch": 2.5559673832468492,
+      "grad_norm": 2.6862034797668457,
+      "learning_rate": 7.280000000000001e-06,
+      "loss": 0.0615,
       "step": 1725
     },
     {
+      "epoch": 2.593031875463306,
+      "grad_norm": 3.1184046268463135,
+      "learning_rate": 7.224444444444445e-06,
+      "loss": 0.0714,
       "step": 1750
     },
     {
+      "epoch": 2.6300963676797626,
+      "grad_norm": 1.7592053413391113,
+      "learning_rate": 7.1688888888888895e-06,
+      "loss": 0.0704,
       "step": 1775
     },
     {
+      "epoch": 2.6671608598962195,
+      "grad_norm": 2.9316508769989014,
+      "learning_rate": 7.113333333333334e-06,
+      "loss": 0.0689,
       "step": 1800
     },
     {
+      "epoch": 2.704225352112676,
+      "grad_norm": 2.1934666633605957,
+      "learning_rate": 7.057777777777778e-06,
+      "loss": 0.0721,
       "step": 1825
     },
     {
+      "epoch": 2.741289844329133,
+      "grad_norm": 3.4919371604919434,
+      "learning_rate": 7.0022222222222225e-06,
+      "loss": 0.0638,
       "step": 1850
     },
     {
+      "epoch": 2.7783543365455894,
+      "grad_norm": 2.723252058029175,
+      "learning_rate": 6.946666666666667e-06,
+      "loss": 0.0598,
       "step": 1875
     },
     {
+      "epoch": 2.815418828762046,
+      "grad_norm": 1.8668267726898193,
+      "learning_rate": 6.891111111111111e-06,
+      "loss": 0.0607,
       "step": 1900
     },
     {
+      "epoch": 2.8524833209785028,
+      "grad_norm": 2.0989866256713867,
+      "learning_rate": 6.835555555555556e-06,
+      "loss": 0.0821,
       "step": 1925
     },
     {
+      "epoch": 2.8895478131949592,
+      "grad_norm": 2.9375364780426025,
+      "learning_rate": 6.780000000000001e-06,
+      "loss": 0.0636,
       "step": 1950
     },
     {
+      "epoch": 2.9266123054114157,
+      "grad_norm": 2.1375315189361572,
+      "learning_rate": 6.724444444444444e-06,
+      "loss": 0.0723,
       "step": 1975
     },
     {
+      "epoch": 2.9636767976278726,
+      "grad_norm": 2.5874264240264893,
+      "learning_rate": 6.668888888888889e-06,
+      "loss": 0.0706,
       "step": 2000
     },
     {
+      "epoch": 2.9636767976278726,
+      "eval_loss": 0.2490690052509308,
+      "eval_runtime": 730.2087,
+      "eval_samples_per_second": 3.951,
+      "eval_steps_per_second": 0.494,
+      "eval_wer": 0.12696648426812585,
       "step": 2000
     },
     {
+      "epoch": 3.0,
+      "grad_norm": 6.509148597717285,
+      "learning_rate": 6.613333333333334e-06,
+      "loss": 0.0587,
+      "step": 2025
+    },
+    {
+      "epoch": 3.0370644922164565,
+      "grad_norm": 1.9590086936950684,
+      "learning_rate": 6.557777777777778e-06,
+      "loss": 0.0241,
+      "step": 2050
+    },
+    {
+      "epoch": 3.0741289844329134,
+      "grad_norm": 1.4612740278244019,
+      "learning_rate": 6.502222222222223e-06,
+      "loss": 0.0267,
+      "step": 2075
+    },
+    {
+      "epoch": 3.11119347664937,
+      "grad_norm": 0.9522780179977417,
+      "learning_rate": 6.446666666666668e-06,
+      "loss": 0.023,
+      "step": 2100
+    },
+    {
+      "epoch": 3.1482579688658268,
+      "grad_norm": 1.891400694847107,
+      "learning_rate": 6.391111111111111e-06,
+      "loss": 0.0281,
+      "step": 2125
+    },
+    {
+      "epoch": 3.1853224610822832,
+      "grad_norm": 1.0783302783966064,
+      "learning_rate": 6.335555555555556e-06,
+      "loss": 0.0246,
+      "step": 2150
+    },
+    {
+      "epoch": 3.2223869532987397,
+      "grad_norm": 1.3504562377929688,
+      "learning_rate": 6.280000000000001e-06,
+      "loss": 0.0244,
+      "step": 2175
+    },
+    {
+      "epoch": 3.2594514455151966,
+      "grad_norm": 1.8768439292907715,
+      "learning_rate": 6.224444444444445e-06,
+      "loss": 0.0264,
+      "step": 2200
+    },
+    {
+      "epoch": 3.296515937731653,
+      "grad_norm": 1.5083887577056885,
+      "learning_rate": 6.16888888888889e-06,
+      "loss": 0.0248,
+      "step": 2225
+    },
+    {
+      "epoch": 3.3335804299481095,
+      "grad_norm": 3.5768120288848877,
+      "learning_rate": 6.113333333333333e-06,
+      "loss": 0.0316,
+      "step": 2250
+    },
+    {
+      "epoch": 3.3706449221645665,
+      "grad_norm": 1.1493444442749023,
+      "learning_rate": 6.057777777777778e-06,
+      "loss": 0.0294,
+      "step": 2275
+    },
+    {
+      "epoch": 3.407709414381023,
+      "grad_norm": 2.3746306896209717,
+      "learning_rate": 6.002222222222223e-06,
+      "loss": 0.0263,
+      "step": 2300
+    },
+    {
+      "epoch": 3.4447739065974794,
+      "grad_norm": 2.144634485244751,
+      "learning_rate": 5.946666666666668e-06,
+      "loss": 0.0348,
+      "step": 2325
+    },
+    {
+      "epoch": 3.4818383988139363,
+      "grad_norm": 1.5002686977386475,
+      "learning_rate": 5.891111111111112e-06,
+      "loss": 0.0228,
+      "step": 2350
+    },
+    {
+      "epoch": 3.5189028910303928,
+      "grad_norm": 1.6059187650680542,
+      "learning_rate": 5.8355555555555565e-06,
+      "loss": 0.0239,
+      "step": 2375
+    },
+    {
+      "epoch": 3.5559673832468492,
+      "grad_norm": 2.757420778274536,
+      "learning_rate": 5.78e-06,
+      "loss": 0.0277,
+      "step": 2400
+    },
+    {
+      "epoch": 3.593031875463306,
+      "grad_norm": 1.3977222442626953,
+      "learning_rate": 5.724444444444445e-06,
+      "loss": 0.0224,
+      "step": 2425
+    },
+    {
+      "epoch": 3.6300963676797626,
+      "grad_norm": 1.9618048667907715,
+      "learning_rate": 5.6688888888888895e-06,
+      "loss": 0.026,
+      "step": 2450
+    },
+    {
+      "epoch": 3.6671608598962195,
+      "grad_norm": 0.898245632648468,
+      "learning_rate": 5.613333333333334e-06,
+      "loss": 0.0326,
+      "step": 2475
+    },
+    {
+      "epoch": 3.704225352112676,
+      "grad_norm": 1.8148616552352905,
+      "learning_rate": 5.557777777777778e-06,
+      "loss": 0.0213,
+      "step": 2500
+    },
+    {
+      "epoch": 3.741289844329133,
+      "grad_norm": 1.308030366897583,
+      "learning_rate": 5.5022222222222224e-06,
+      "loss": 0.0192,
+      "step": 2525
+    },
+    {
+      "epoch": 3.7783543365455894,
+      "grad_norm": 1.6680744886398315,
+      "learning_rate": 5.4466666666666665e-06,
+      "loss": 0.027,
+      "step": 2550
+    },
+    {
+      "epoch": 3.815418828762046,
+      "grad_norm": 3.235917568206787,
+      "learning_rate": 5.391111111111111e-06,
+      "loss": 0.0242,
+      "step": 2575
+    },
+    {
+      "epoch": 3.8524833209785028,
+      "grad_norm": 2.096780300140381,
+      "learning_rate": 5.335555555555556e-06,
+      "loss": 0.0243,
+      "step": 2600
+    },
+    {
+      "epoch": 3.8895478131949592,
+      "grad_norm": 1.8445031642913818,
+      "learning_rate": 5.28e-06,
+      "loss": 0.024,
+      "step": 2625
+    },
+    {
+      "epoch": 3.9266123054114157,
+      "grad_norm": 1.357937216758728,
+      "learning_rate": 5.224444444444445e-06,
+      "loss": 0.0244,
+      "step": 2650
+    },
+    {
+      "epoch": 3.9636767976278726,
+      "grad_norm": 1.0413466691970825,
+      "learning_rate": 5.168888888888889e-06,
+      "loss": 0.0221,
+      "step": 2675
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 3.0572996139526367,
+      "learning_rate": 5.113333333333333e-06,
+      "loss": 0.0206,
+      "step": 2700
+    },
+    {
+      "epoch": 4.037064492216457,
+      "grad_norm": 0.9961848258972168,
+      "learning_rate": 5.057777777777778e-06,
+      "loss": 0.0136,
+      "step": 2725
+    },
+    {
+      "epoch": 4.074128984432913,
+      "grad_norm": 1.0248702764511108,
+      "learning_rate": 5.002222222222223e-06,
+      "loss": 0.009,
+      "step": 2750
+    },
+    {
+      "epoch": 4.11119347664937,
+      "grad_norm": 0.6142157912254333,
+      "learning_rate": 4.946666666666667e-06,
+      "loss": 0.0113,
+      "step": 2775
+    },
+    {
+      "epoch": 4.148257968865827,
+      "grad_norm": 0.27292531728744507,
+      "learning_rate": 4.891111111111111e-06,
+      "loss": 0.009,
+      "step": 2800
+    },
+    {
+      "epoch": 4.185322461082283,
+      "grad_norm": 2.2906312942504883,
+      "learning_rate": 4.835555555555556e-06,
+      "loss": 0.0073,
+      "step": 2825
+    },
+    {
+      "epoch": 4.22238695329874,
+      "grad_norm": 1.0498850345611572,
+      "learning_rate": 4.78e-06,
+      "loss": 0.0093,
+      "step": 2850
+    },
+    {
+      "epoch": 4.259451445515197,
+      "grad_norm": 1.1574844121932983,
+      "learning_rate": 4.724444444444445e-06,
+      "loss": 0.0159,
+      "step": 2875
+    },
+    {
+      "epoch": 4.2965159377316535,
+      "grad_norm": 0.7209671139717102,
+      "learning_rate": 4.66888888888889e-06,
+      "loss": 0.0088,
+      "step": 2900
+    },
+    {
+      "epoch": 4.3335804299481095,
+      "grad_norm": 1.168841004371643,
+      "learning_rate": 4.613333333333334e-06,
+      "loss": 0.0094,
+      "step": 2925
+    },
+    {
+      "epoch": 4.3706449221645665,
+      "grad_norm": 0.6153778433799744,
+      "learning_rate": 4.557777777777778e-06,
+      "loss": 0.009,
+      "step": 2950
+    },
+    {
+      "epoch": 4.407709414381023,
+      "grad_norm": 1.5705232620239258,
+      "learning_rate": 4.502222222222223e-06,
+      "loss": 0.0085,
+      "step": 2975
+    },
+    {
+      "epoch": 4.444773906597479,
+      "grad_norm": 0.24448032677173615,
+      "learning_rate": 4.446666666666667e-06,
+      "loss": 0.0072,
+      "step": 3000
+    },
+    {
+      "epoch": 4.444773906597479,
+      "eval_loss": 0.27286583185195923,
+      "eval_runtime": 739.8615,
+      "eval_samples_per_second": 3.899,
+      "eval_steps_per_second": 0.488,
+      "eval_wer": 0.11913474692202462,
+      "step": 3000
+    },
+    {
+      "epoch": 4.481838398813936,
+      "grad_norm": 1.2278587818145752,
+      "learning_rate": 4.391111111111112e-06,
+      "loss": 0.0146,
+      "step": 3025
+    },
+    {
+      "epoch": 4.518902891030393,
+      "grad_norm": 0.6478213667869568,
+      "learning_rate": 4.3355555555555565e-06,
+      "loss": 0.014,
+      "step": 3050
+    },
+    {
+      "epoch": 4.555967383246849,
+      "grad_norm": 0.7865190505981445,
+      "learning_rate": 4.2800000000000005e-06,
+      "loss": 0.0079,
+      "step": 3075
+    },
+    {
+      "epoch": 4.593031875463306,
+      "grad_norm": 2.3078877925872803,
+      "learning_rate": 4.2244444444444446e-06,
+      "loss": 0.009,
+      "step": 3100
+    },
+    {
+      "epoch": 4.630096367679763,
+      "grad_norm": 0.9625842571258545,
+      "learning_rate": 4.168888888888889e-06,
+      "loss": 0.0096,
+      "step": 3125
+    },
+    {
+      "epoch": 4.667160859896219,
+      "grad_norm": 0.7619579434394836,
+      "learning_rate": 4.1133333333333335e-06,
+      "loss": 0.0096,
+      "step": 3150
+    },
+    {
+      "epoch": 4.704225352112676,
+      "grad_norm": 1.5049270391464233,
+      "learning_rate": 4.057777777777778e-06,
+      "loss": 0.0099,
+      "step": 3175
+    },
+    {
+      "epoch": 4.741289844329133,
+      "grad_norm": 1.1056573390960693,
+      "learning_rate": 4.002222222222222e-06,
+      "loss": 0.0065,
+      "step": 3200
+    },
+    {
+      "epoch": 4.778354336545589,
+      "grad_norm": 0.7983392477035522,
+      "learning_rate": 3.946666666666667e-06,
+      "loss": 0.0105,
+      "step": 3225
+    },
+    {
+      "epoch": 4.815418828762046,
+      "grad_norm": 1.1153795719146729,
+      "learning_rate": 3.891111111111111e-06,
+      "loss": 0.0075,
+      "step": 3250
+    },
+    {
+      "epoch": 4.852483320978503,
+      "grad_norm": 0.9730608463287354,
+      "learning_rate": 3.835555555555555e-06,
+      "loss": 0.0087,
+      "step": 3275
+    },
+    {
+      "epoch": 4.889547813194959,
+      "grad_norm": 0.5694206953048706,
+      "learning_rate": 3.7800000000000002e-06,
+      "loss": 0.0071,
+      "step": 3300
+    },
+    {
+      "epoch": 4.926612305411416,
+      "grad_norm": 0.2520028352737427,
+      "learning_rate": 3.724444444444445e-06,
+      "loss": 0.0081,
+      "step": 3325
+    },
+    {
+      "epoch": 4.963676797627873,
+      "grad_norm": 0.436355322599411,
+      "learning_rate": 3.668888888888889e-06,
+      "loss": 0.0078,
+      "step": 3350
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 0.798361599445343,
+      "learning_rate": 3.6133333333333336e-06,
+      "loss": 0.0075,
+      "step": 3375
+    },
+    {
+      "epoch": 5.037064492216457,
+      "grad_norm": 1.3702267408370972,
+      "learning_rate": 3.5577777777777785e-06,
+      "loss": 0.005,
+      "step": 3400
+    },
+    {
+      "epoch": 5.074128984432913,
+      "grad_norm": 0.2790464162826538,
+      "learning_rate": 3.5022222222222225e-06,
+      "loss": 0.0032,
+      "step": 3425
+    },
+    {
+      "epoch": 5.11119347664937,
+      "grad_norm": 0.15111476182937622,
+      "learning_rate": 3.446666666666667e-06,
+      "loss": 0.0046,
+      "step": 3450
+    },
+    {
+      "epoch": 5.148257968865827,
+      "grad_norm": 0.09985285252332687,
+      "learning_rate": 3.391111111111111e-06,
+      "loss": 0.0035,
+      "step": 3475
+    },
+    {
+      "epoch": 5.185322461082283,
+      "grad_norm": 0.5352105498313904,
+      "learning_rate": 3.335555555555556e-06,
+      "loss": 0.0031,
+      "step": 3500
+    },
+    {
+      "epoch": 5.22238695329874,
+      "grad_norm": 0.9406213760375977,
+      "learning_rate": 3.2800000000000004e-06,
+      "loss": 0.0035,
+      "step": 3525
+    },
+    {
+      "epoch": 5.259451445515197,
+      "grad_norm": 0.7073507905006409,
+      "learning_rate": 3.2244444444444444e-06,
+      "loss": 0.0035,
+      "step": 3550
+    },
+    {
+      "epoch": 5.2965159377316535,
+      "grad_norm": 0.07916448265314102,
+      "learning_rate": 3.1688888888888893e-06,
+      "loss": 0.0035,
+      "step": 3575
+    },
+    {
+      "epoch": 5.3335804299481095,
+      "grad_norm": 0.5285120606422424,
+      "learning_rate": 3.1133333333333337e-06,
+      "loss": 0.0027,
+      "step": 3600
+    },
+    {
+      "epoch": 5.3706449221645665,
+      "grad_norm": 0.09832775592803955,
+      "learning_rate": 3.0577777777777778e-06,
+      "loss": 0.0036,
+      "step": 3625
+    },
+    {
+      "epoch": 5.407709414381023,
+      "grad_norm": 0.21083103120326996,
+      "learning_rate": 3.0022222222222227e-06,
+      "loss": 0.0041,
+      "step": 3650
+    },
+    {
+      "epoch": 5.444773906597479,
+      "grad_norm": 0.6747980713844299,
+      "learning_rate": 2.946666666666667e-06,
+      "loss": 0.003,
+      "step": 3675
+    },
+    {
+      "epoch": 5.481838398813936,
+      "grad_norm": 0.5111549496650696,
+      "learning_rate": 2.891111111111111e-06,
+      "loss": 0.0028,
+      "step": 3700
+    },
+    {
+      "epoch": 5.518902891030393,
+      "grad_norm": 0.6502516269683838,
+      "learning_rate": 2.835555555555556e-06,
+      "loss": 0.0045,
+      "step": 3725
+    },
+    {
+      "epoch": 5.555967383246849,
+      "grad_norm": 0.4688964784145355,
+      "learning_rate": 2.7800000000000005e-06,
+      "loss": 0.0036,
+      "step": 3750
+    },
+    {
+      "epoch": 5.593031875463306,
+      "grad_norm": 0.281994104385376,
+      "learning_rate": 2.7244444444444445e-06,
+      "loss": 0.0021,
+      "step": 3775
+    },
+    {
+      "epoch": 5.630096367679763,
+      "grad_norm": 0.11583279073238373,
+      "learning_rate": 2.6688888888888894e-06,
+      "loss": 0.0041,
+      "step": 3800
+    },
+    {
+      "epoch": 5.667160859896219,
+      "grad_norm": 0.22941534221172333,
+      "learning_rate": 2.6133333333333334e-06,
+      "loss": 0.0022,
+      "step": 3825
+    },
+    {
+      "epoch": 5.704225352112676,
+      "grad_norm": 0.13950073719024658,
+      "learning_rate": 2.557777777777778e-06,
+      "loss": 0.003,
+      "step": 3850
+    },
+    {
+      "epoch": 5.741289844329133,
+      "grad_norm": 0.6869206428527832,
+      "learning_rate": 2.5022222222222224e-06,
+      "loss": 0.0024,
+      "step": 3875
+    },
+    {
+      "epoch": 5.778354336545589,
+      "grad_norm": 0.09893081337213516,
+      "learning_rate": 2.446666666666667e-06,
+      "loss": 0.0029,
+      "step": 3900
+    },
+    {
+      "epoch": 5.815418828762046,
+      "grad_norm": 0.1264762133359909,
+      "learning_rate": 2.3911111111111113e-06,
+      "loss": 0.0033,
+      "step": 3925
+    },
+    {
+      "epoch": 5.852483320978503,
+      "grad_norm": 0.15489889681339264,
+      "learning_rate": 2.3355555555555557e-06,
+      "loss": 0.003,
+      "step": 3950
+    },
+    {
+      "epoch": 5.889547813194959,
+      "grad_norm": 0.5875250697135925,
+      "learning_rate": 2.28e-06,
+      "loss": 0.0022,
+      "step": 3975
+    },
+    {
+      "epoch": 5.926612305411416,
+      "grad_norm": 0.06691984087228775,
+      "learning_rate": 2.2244444444444447e-06,
+      "loss": 0.005,
+      "step": 4000
+    },
+    {
+      "epoch": 5.926612305411416,
+      "eval_loss": 0.28099098801612854,
+      "eval_runtime": 734.9707,
+      "eval_samples_per_second": 3.925,
+      "eval_steps_per_second": 0.491,
+      "eval_wer": 0.11566347469220246,
+      "step": 4000
+    },
+    {
+      "epoch": 5.963676797627873,
+      "grad_norm": 0.2645249664783478,
+      "learning_rate": 2.168888888888889e-06,
+      "loss": 0.0026,
+      "step": 4025
+    },
+    {
+      "epoch": 6.0,
+      "grad_norm": 0.3361597955226898,
+      "learning_rate": 2.1133333333333336e-06,
+      "loss": 0.0023,
+      "step": 4050
+    },
+    {
+      "epoch": 6.037064492216457,
+      "grad_norm": 0.059147898107767105,
+      "learning_rate": 2.057777777777778e-06,
+      "loss": 0.0015,
+      "step": 4075
+    },
+    {
+      "epoch": 6.074128984432913,
+      "grad_norm": 0.1158735603094101,
+      "learning_rate": 2.0022222222222225e-06,
+      "loss": 0.0016,
+      "step": 4100
+    },
+    {
+      "epoch": 6.11119347664937,
+      "grad_norm": 1.3564985990524292,
+      "learning_rate": 1.9466666666666665e-06,
+      "loss": 0.0014,
+      "step": 4125
+    },
+    {
+      "epoch": 6.148257968865827,
+      "grad_norm": 0.5956087112426758,
+      "learning_rate": 1.8911111111111114e-06,
+      "loss": 0.0018,
+      "step": 4150
+    },
+    {
+      "epoch": 6.185322461082283,
+      "grad_norm": 0.09224885702133179,
+      "learning_rate": 1.8355555555555557e-06,
+      "loss": 0.0017,
+      "step": 4175
+    },
+    {
+      "epoch": 6.22238695329874,
+      "grad_norm": 0.06868930906057358,
+      "learning_rate": 1.7800000000000001e-06,
+      "loss": 0.0017,
+      "step": 4200
+    },
+    {
+      "epoch": 6.259451445515197,
+      "grad_norm": 0.06657718122005463,
+      "learning_rate": 1.7244444444444448e-06,
+      "loss": 0.0014,
+      "step": 4225
+    },
+    {
+      "epoch": 6.2965159377316535,
+      "grad_norm": 0.05459928885102272,
+      "learning_rate": 1.668888888888889e-06,
+      "loss": 0.0017,
+      "step": 4250
+    },
+    {
+      "epoch": 6.3335804299481095,
+      "grad_norm": 0.05795517563819885,
+      "learning_rate": 1.6133333333333335e-06,
+      "loss": 0.0027,
+      "step": 4275
+    },
+    {
+      "epoch": 6.3706449221645665,
+      "grad_norm": 0.06204914301633835,
+      "learning_rate": 1.5577777777777777e-06,
+      "loss": 0.0012,
+      "step": 4300
+    },
+    {
+      "epoch": 6.407709414381023,
+      "grad_norm": 0.0820712074637413,
+      "learning_rate": 1.5022222222222224e-06,
+      "loss": 0.0012,
+      "step": 4325
+    },
+    {
+      "epoch": 6.444773906597479,
+      "grad_norm": 0.056523606181144714,
+      "learning_rate": 1.4466666666666669e-06,
+      "loss": 0.0013,
+      "step": 4350
+    },
+    {
+      "epoch": 6.481838398813936,
+      "grad_norm": 0.07985592633485794,
+      "learning_rate": 1.3911111111111111e-06,
+      "loss": 0.0014,
+      "step": 4375
+    },
+    {
+      "epoch": 6.518902891030393,
+      "grad_norm": 0.044111426919698715,
+      "learning_rate": 1.3355555555555558e-06,
+      "loss": 0.0012,
+      "step": 4400
+    },
+    {
+      "epoch": 6.555967383246849,
+      "grad_norm": 0.05683915689587593,
+      "learning_rate": 1.28e-06,
+      "loss": 0.0014,
+      "step": 4425
+    },
+    {
+      "epoch": 6.593031875463306,
+      "grad_norm": 0.08568093180656433,
+      "learning_rate": 1.2244444444444445e-06,
+      "loss": 0.0012,
+      "step": 4450
+    },
+    {
+      "epoch": 6.630096367679763,
+      "grad_norm": 0.054062824696302414,
+      "learning_rate": 1.168888888888889e-06,
+      "loss": 0.0011,
+      "step": 4475
+    },
+    {
+      "epoch": 6.667160859896219,
+      "grad_norm": 0.0509476363658905,
+      "learning_rate": 1.1133333333333334e-06,
+      "loss": 0.0013,
+      "step": 4500
+    },
+    {
+      "epoch": 6.704225352112676,
+      "grad_norm": 0.04927874356508255,
+      "learning_rate": 1.0577777777777779e-06,
+      "loss": 0.0012,
+      "step": 4525
+    },
+    {
+      "epoch": 6.741289844329133,
+      "grad_norm": 0.08598697185516357,
+      "learning_rate": 1.0022222222222223e-06,
+      "loss": 0.0011,
+      "step": 4550
+    },
+    {
+      "epoch": 6.778354336545589,
+      "grad_norm": 0.3571934700012207,
+      "learning_rate": 9.466666666666667e-07,
+      "loss": 0.0016,
+      "step": 4575
+    },
+    {
+      "epoch": 6.815418828762046,
+      "grad_norm": 0.05977300554513931,
+      "learning_rate": 8.911111111111112e-07,
+      "loss": 0.001,
+      "step": 4600
+    },
+    {
+      "epoch": 6.852483320978503,
+      "grad_norm": 0.05966237559914589,
+      "learning_rate": 8.355555555555556e-07,
+      "loss": 0.001,
+      "step": 4625
+    },
+    {
+      "epoch": 6.889547813194959,
+      "grad_norm": 0.05432112514972687,
+      "learning_rate": 7.8e-07,
+      "loss": 0.001,
+      "step": 4650
+    },
+    {
+      "epoch": 6.926612305411416,
+      "grad_norm": 0.06741122156381607,
+      "learning_rate": 7.244444444444446e-07,
+      "loss": 0.0019,
+      "step": 4675
+    },
+    {
+      "epoch": 6.963676797627873,
+      "grad_norm": 0.04723643884062767,
+      "learning_rate": 6.68888888888889e-07,
+      "loss": 0.0012,
+      "step": 4700
+    },
+    {
+      "epoch": 7.0,
+      "grad_norm": 0.07329325377941132,
+      "learning_rate": 6.133333333333333e-07,
+      "loss": 0.001,
+      "step": 4725
+    },
+    {
+      "epoch": 7.037064492216457,
+      "grad_norm": 0.06389188766479492,
+      "learning_rate": 5.577777777777779e-07,
+      "loss": 0.001,
+      "step": 4750
+    },
+    {
+      "epoch": 7.074128984432913,
+      "grad_norm": 0.03797365352511406,
+      "learning_rate": 5.022222222222222e-07,
+      "loss": 0.001,
+      "step": 4775
+    },
+    {
+      "epoch": 7.11119347664937,
+      "grad_norm": 0.04686768725514412,
+      "learning_rate": 4.466666666666667e-07,
+      "loss": 0.0009,
+      "step": 4800
+    },
+    {
+      "epoch": 7.148257968865827,
+      "grad_norm": 0.06883518397808075,
+      "learning_rate": 3.9111111111111115e-07,
+      "loss": 0.001,
+      "step": 4825
+    },
+    {
+      "epoch": 7.185322461082283,
+      "grad_norm": 0.02842629700899124,
+      "learning_rate": 3.3555555555555556e-07,
+      "loss": 0.0009,
+      "step": 4850
+    },
+    {
+      "epoch": 7.22238695329874,
+      "grad_norm": 0.04749394953250885,
+      "learning_rate": 2.8e-07,
+      "loss": 0.001,
+      "step": 4875
+    },
+    {
+      "epoch": 7.259451445515197,
+      "grad_norm": 0.04491546377539635,
+      "learning_rate": 2.2444444444444445e-07,
+      "loss": 0.001,
+      "step": 4900
+    },
+    {
+      "epoch": 7.2965159377316535,
+      "grad_norm": 0.056013334542512894,
+      "learning_rate": 1.6888888888888888e-07,
+      "loss": 0.001,
+      "step": 4925
+    },
+    {
+      "epoch": 7.3335804299481095,
+      "grad_norm": 0.057778194546699524,
+      "learning_rate": 1.1333333333333336e-07,
+      "loss": 0.0011,
+      "step": 4950
+    },
+    {
+      "epoch": 7.3706449221645665,
+      "grad_norm": 0.051241885870695114,
+      "learning_rate": 5.777777777777778e-08,
+      "loss": 0.0011,
+      "step": 4975
+    },
+    {
+      "epoch": 7.407709414381023,
+      "grad_norm": 0.06301814317703247,
+      "learning_rate": 2.2222222222222225e-09,
+      "loss": 0.0009,
+      "step": 5000
+    },
+    {
+      "epoch": 7.407709414381023,
+      "eval_loss": 0.29011788964271545,
+      "eval_runtime": 732.4342,
+      "eval_samples_per_second": 3.939,
+      "eval_steps_per_second": 0.493,
+      "eval_wer": 0.1146545827633379,
+      "step": 5000
+    },
+    {
+      "epoch": 7.407709414381023,
+      "step": 5000,
+      "total_flos": 8.155551755501568e+19,
+      "train_loss": 0.10907779041565954,
+      "train_runtime": 12394.4337,
+      "train_samples_per_second": 6.455,
+      "train_steps_per_second": 0.403
     }
   ],
   "logging_steps": 25,
+  "max_steps": 5000,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 8,
   "save_steps": 1000,
   "stateful_callbacks": {
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 8.155551755501568e+19,
+  "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null
 }