End of training

Browse files

Files changed (5) hide show

README.md +16 -4
all_results.json +15 -0
eval_results.json +9 -0
train_results.json +9 -0
trainer_state.json +1691 -0

README.md CHANGED Viewed

@@ -4,11 +4,23 @@ license: apache-2.0
 base_model: Leonel-Maia/fongbe-whisper-small
 tags:
 - generated_from_trainer
 metrics:
 - wer
 model-index:
 - name: whisper-small-transfer
-  results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -16,10 +28,10 @@ should probably proofread and complete it, then remove this comment. -->
 # whisper-small-transfer
-This model is a fine-tuned version of [Leonel-Maia/fongbe-whisper-small](https://huggingface.co/Leonel-Maia/fongbe-whisper-small) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.2720
-- Wer: 0.2185
 ## Model description

 base_model: Leonel-Maia/fongbe-whisper-small
 tags:
 - generated_from_trainer
+datasets:
+- Leonel-Maia/ewe_dataset_splitted
 metrics:
 - wer
 model-index:
 - name: whisper-small-transfer
+  results:
+  - task:
+      name: Automatic Speech Recognition
+      type: automatic-speech-recognition
+    dataset:
+      name: Leonel-Maia/ewe_dataset_splitted
+      type: Leonel-Maia/ewe_dataset_splitted
+    metrics:
+    - name: Wer
+      type: wer
+      value: 0.21356341934578732
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 # whisper-small-transfer
+This model is a fine-tuned version of [Leonel-Maia/fongbe-whisper-small](https://huggingface.co/Leonel-Maia/fongbe-whisper-small) on the Leonel-Maia/ewe_dataset_splitted dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.2392
+- Wer: 0.2136
 ## Model description

all_results.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    "epoch": 6.641930618401207,
+    "eval_loss": 0.23916485905647278,
+    "eval_runtime": 2446.3074,
+    "eval_samples": 3315,
+    "eval_samples_per_second": 1.355,
+    "eval_steps_per_second": 0.339,
+    "eval_wer": 0.21356341934578732,
+    "total_flos": 5.08256607043584e+19,
+    "train_loss": 0.25465128779411317,
+    "train_runtime": 113391.8356,
+    "train_samples": 26517,
+    "train_samples_per_second": 14.031,
+    "train_steps_per_second": 0.438
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 6.641930618401207,
+    "eval_loss": 0.23916485905647278,
+    "eval_runtime": 2446.3074,
+    "eval_samples": 3315,
+    "eval_samples_per_second": 1.355,
+    "eval_steps_per_second": 0.339,
+    "eval_wer": 0.21356341934578732
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 6.641930618401207,
+    "total_flos": 5.08256607043584e+19,
+    "train_loss": 0.25465128779411317,
+    "train_runtime": 113391.8356,
+    "train_samples": 26517,
+    "train_samples_per_second": 14.031,
+    "train_steps_per_second": 0.438
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1691 @@

+{
+  "best_global_step": 3000,
+  "best_metric": 0.23916485905647278,
+  "best_model_checkpoint": "./whisper-small-transfer/checkpoint-3000",
+  "epoch": 6.641930618401207,
+  "eval_steps": 500,
+  "global_step": 5500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.030165912518853696,
+      "grad_norm": 26.211389541625977,
+      "learning_rate": 4.800000000000001e-07,
+      "loss": 4.3528,
+      "step": 25
+    },
+    {
+      "epoch": 0.06033182503770739,
+      "grad_norm": 16.204137802124023,
+      "learning_rate": 9.800000000000001e-07,
+      "loss": 3.4167,
+      "step": 50
+    },
+    {
+      "epoch": 0.09049773755656108,
+      "grad_norm": 10.840243339538574,
+      "learning_rate": 1.48e-06,
+      "loss": 2.3923,
+      "step": 75
+    },
+    {
+      "epoch": 0.12066365007541478,
+      "grad_norm": 7.455395698547363,
+      "learning_rate": 1.98e-06,
+      "loss": 1.7404,
+      "step": 100
+    },
+    {
+      "epoch": 0.15082956259426847,
+      "grad_norm": 5.6613593101501465,
+      "learning_rate": 2.4800000000000004e-06,
+      "loss": 1.3135,
+      "step": 125
+    },
+    {
+      "epoch": 0.18099547511312217,
+      "grad_norm": 4.958397388458252,
+      "learning_rate": 2.9800000000000003e-06,
+      "loss": 1.0967,
+      "step": 150
+    },
+    {
+      "epoch": 0.21116138763197587,
+      "grad_norm": 4.388453006744385,
+      "learning_rate": 3.48e-06,
+      "loss": 0.9164,
+      "step": 175
+    },
+    {
+      "epoch": 0.24132730015082957,
+      "grad_norm": 4.023995399475098,
+      "learning_rate": 3.980000000000001e-06,
+      "loss": 0.8129,
+      "step": 200
+    },
+    {
+      "epoch": 0.27149321266968324,
+      "grad_norm": 3.9679815769195557,
+      "learning_rate": 4.48e-06,
+      "loss": 0.7049,
+      "step": 225
+    },
+    {
+      "epoch": 0.30165912518853694,
+      "grad_norm": 4.201240062713623,
+      "learning_rate": 4.980000000000001e-06,
+      "loss": 0.6658,
+      "step": 250
+    },
+    {
+      "epoch": 0.33182503770739064,
+      "grad_norm": 4.384641647338867,
+      "learning_rate": 5.480000000000001e-06,
+      "loss": 0.6323,
+      "step": 275
+    },
+    {
+      "epoch": 0.36199095022624433,
+      "grad_norm": 3.6530189514160156,
+      "learning_rate": 5.98e-06,
+      "loss": 0.5616,
+      "step": 300
+    },
+    {
+      "epoch": 0.39215686274509803,
+      "grad_norm": 4.075361728668213,
+      "learning_rate": 6.480000000000001e-06,
+      "loss": 0.5227,
+      "step": 325
+    },
+    {
+      "epoch": 0.42232277526395173,
+      "grad_norm": 4.275745868682861,
+      "learning_rate": 6.98e-06,
+      "loss": 0.5387,
+      "step": 350
+    },
+    {
+      "epoch": 0.45248868778280543,
+      "grad_norm": 3.494539737701416,
+      "learning_rate": 7.48e-06,
+      "loss": 0.5019,
+      "step": 375
+    },
+    {
+      "epoch": 0.48265460030165913,
+      "grad_norm": 3.5438411235809326,
+      "learning_rate": 7.980000000000002e-06,
+      "loss": 0.4708,
+      "step": 400
+    },
+    {
+      "epoch": 0.5128205128205128,
+      "grad_norm": 3.6554458141326904,
+      "learning_rate": 8.48e-06,
+      "loss": 0.441,
+      "step": 425
+    },
+    {
+      "epoch": 0.5429864253393665,
+      "grad_norm": 3.1188902854919434,
+      "learning_rate": 8.98e-06,
+      "loss": 0.4363,
+      "step": 450
+    },
+    {
+      "epoch": 0.5731523378582202,
+      "grad_norm": 3.156195878982544,
+      "learning_rate": 9.48e-06,
+      "loss": 0.4245,
+      "step": 475
+    },
+    {
+      "epoch": 0.6033182503770739,
+      "grad_norm": 3.006938934326172,
+      "learning_rate": 9.980000000000001e-06,
+      "loss": 0.3978,
+      "step": 500
+    },
+    {
+      "epoch": 0.6033182503770739,
+      "eval_loss": 0.3847188353538513,
+      "eval_runtime": 2461.5392,
+      "eval_samples_per_second": 1.347,
+      "eval_steps_per_second": 0.337,
+      "eval_wer": 0.34837741876301787,
+      "step": 500
+    },
+    {
+      "epoch": 0.6334841628959276,
+      "grad_norm": 3.300306797027588,
+      "learning_rate": 9.99511996746645e-06,
+      "loss": 0.4099,
+      "step": 525
+    },
+    {
+      "epoch": 0.6636500754147813,
+      "grad_norm": 2.924450635910034,
+      "learning_rate": 9.990036600244002e-06,
+      "loss": 0.3736,
+      "step": 550
+    },
+    {
+      "epoch": 0.693815987933635,
+      "grad_norm": 3.143441677093506,
+      "learning_rate": 9.984953233021555e-06,
+      "loss": 0.3785,
+      "step": 575
+    },
+    {
+      "epoch": 0.7239819004524887,
+      "grad_norm": 3.0799312591552734,
+      "learning_rate": 9.979869865799107e-06,
+      "loss": 0.3559,
+      "step": 600
+    },
+    {
+      "epoch": 0.7541478129713424,
+      "grad_norm": 2.933881998062134,
+      "learning_rate": 9.974786498576659e-06,
+      "loss": 0.3554,
+      "step": 625
+    },
+    {
+      "epoch": 0.7843137254901961,
+      "grad_norm": 2.6328113079071045,
+      "learning_rate": 9.96970313135421e-06,
+      "loss": 0.3393,
+      "step": 650
+    },
+    {
+      "epoch": 0.8144796380090498,
+      "grad_norm": 2.767172336578369,
+      "learning_rate": 9.964619764131762e-06,
+      "loss": 0.3539,
+      "step": 675
+    },
+    {
+      "epoch": 0.8446455505279035,
+      "grad_norm": 3.040672779083252,
+      "learning_rate": 9.959536396909314e-06,
+      "loss": 0.3347,
+      "step": 700
+    },
+    {
+      "epoch": 0.8748114630467572,
+      "grad_norm": 2.708042860031128,
+      "learning_rate": 9.954453029686866e-06,
+      "loss": 0.3208,
+      "step": 725
+    },
+    {
+      "epoch": 0.9049773755656109,
+      "grad_norm": 2.599907875061035,
+      "learning_rate": 9.949369662464417e-06,
+      "loss": 0.3314,
+      "step": 750
+    },
+    {
+      "epoch": 0.9351432880844646,
+      "grad_norm": 2.7246906757354736,
+      "learning_rate": 9.944286295241969e-06,
+      "loss": 0.3212,
+      "step": 775
+    },
+    {
+      "epoch": 0.9653092006033183,
+      "grad_norm": 2.789820909500122,
+      "learning_rate": 9.93920292801952e-06,
+      "loss": 0.3197,
+      "step": 800
+    },
+    {
+      "epoch": 0.995475113122172,
+      "grad_norm": 2.7562570571899414,
+      "learning_rate": 9.934119560797073e-06,
+      "loss": 0.3075,
+      "step": 825
+    },
+    {
+      "epoch": 1.0265460030165912,
+      "grad_norm": 2.808879852294922,
+      "learning_rate": 9.929036193574624e-06,
+      "loss": 0.2848,
+      "step": 850
+    },
+    {
+      "epoch": 1.056711915535445,
+      "grad_norm": 2.3943305015563965,
+      "learning_rate": 9.923952826352176e-06,
+      "loss": 0.2741,
+      "step": 875
+    },
+    {
+      "epoch": 1.0868778280542986,
+      "grad_norm": 2.515653133392334,
+      "learning_rate": 9.91886945912973e-06,
+      "loss": 0.2812,
+      "step": 900
+    },
+    {
+      "epoch": 1.1170437405731524,
+      "grad_norm": 2.7121636867523193,
+      "learning_rate": 9.913786091907281e-06,
+      "loss": 0.2805,
+      "step": 925
+    },
+    {
+      "epoch": 1.147209653092006,
+      "grad_norm": 2.8646414279937744,
+      "learning_rate": 9.908702724684833e-06,
+      "loss": 0.2726,
+      "step": 950
+    },
+    {
+      "epoch": 1.1773755656108598,
+      "grad_norm": 2.6251399517059326,
+      "learning_rate": 9.903619357462384e-06,
+      "loss": 0.2747,
+      "step": 975
+    },
+    {
+      "epoch": 1.2075414781297134,
+      "grad_norm": 2.397496461868286,
+      "learning_rate": 9.898535990239936e-06,
+      "loss": 0.249,
+      "step": 1000
+    },
+    {
+      "epoch": 1.2075414781297134,
+      "eval_loss": 0.28903013467788696,
+      "eval_runtime": 2466.5989,
+      "eval_samples_per_second": 1.344,
+      "eval_steps_per_second": 0.336,
+      "eval_wer": 0.25850141915153085,
+      "step": 1000
+    },
+    {
+      "epoch": 1.2377073906485672,
+      "grad_norm": 2.6788296699523926,
+      "learning_rate": 9.893452623017488e-06,
+      "loss": 0.2737,
+      "step": 1025
+    },
+    {
+      "epoch": 1.2678733031674208,
+      "grad_norm": 2.5809574127197266,
+      "learning_rate": 9.88836925579504e-06,
+      "loss": 0.2609,
+      "step": 1050
+    },
+    {
+      "epoch": 1.2980392156862746,
+      "grad_norm": 2.419163227081299,
+      "learning_rate": 9.883285888572591e-06,
+      "loss": 0.2673,
+      "step": 1075
+    },
+    {
+      "epoch": 1.3282051282051281,
+      "grad_norm": 2.5561363697052,
+      "learning_rate": 9.878202521350143e-06,
+      "loss": 0.2535,
+      "step": 1100
+    },
+    {
+      "epoch": 1.358371040723982,
+      "grad_norm": 2.6312174797058105,
+      "learning_rate": 9.873119154127695e-06,
+      "loss": 0.2693,
+      "step": 1125
+    },
+    {
+      "epoch": 1.3885369532428355,
+      "grad_norm": 2.4085021018981934,
+      "learning_rate": 9.868035786905246e-06,
+      "loss": 0.2598,
+      "step": 1150
+    },
+    {
+      "epoch": 1.4187028657616894,
+      "grad_norm": 2.352027654647827,
+      "learning_rate": 9.862952419682798e-06,
+      "loss": 0.2713,
+      "step": 1175
+    },
+    {
+      "epoch": 1.448868778280543,
+      "grad_norm": 2.478139877319336,
+      "learning_rate": 9.85786905246035e-06,
+      "loss": 0.2783,
+      "step": 1200
+    },
+    {
+      "epoch": 1.4790346907993968,
+      "grad_norm": 2.542982816696167,
+      "learning_rate": 9.852785685237901e-06,
+      "loss": 0.2445,
+      "step": 1225
+    },
+    {
+      "epoch": 1.5092006033182503,
+      "grad_norm": 2.4919915199279785,
+      "learning_rate": 9.847702318015455e-06,
+      "loss": 0.2654,
+      "step": 1250
+    },
+    {
+      "epoch": 1.539366515837104,
+      "grad_norm": 2.59543776512146,
+      "learning_rate": 9.842618950793007e-06,
+      "loss": 0.2477,
+      "step": 1275
+    },
+    {
+      "epoch": 1.5695324283559577,
+      "grad_norm": 2.4258153438568115,
+      "learning_rate": 9.837535583570558e-06,
+      "loss": 0.2544,
+      "step": 1300
+    },
+    {
+      "epoch": 1.5996983408748116,
+      "grad_norm": 2.4069764614105225,
+      "learning_rate": 9.83245221634811e-06,
+      "loss": 0.2383,
+      "step": 1325
+    },
+    {
+      "epoch": 1.6298642533936651,
+      "grad_norm": 2.3214974403381348,
+      "learning_rate": 9.827368849125662e-06,
+      "loss": 0.2652,
+      "step": 1350
+    },
+    {
+      "epoch": 1.6600301659125187,
+      "grad_norm": 2.626494884490967,
+      "learning_rate": 9.822285481903213e-06,
+      "loss": 0.2501,
+      "step": 1375
+    },
+    {
+      "epoch": 1.6901960784313725,
+      "grad_norm": 2.589961290359497,
+      "learning_rate": 9.817202114680765e-06,
+      "loss": 0.2465,
+      "step": 1400
+    },
+    {
+      "epoch": 1.7203619909502263,
+      "grad_norm": 2.246035099029541,
+      "learning_rate": 9.812118747458317e-06,
+      "loss": 0.2399,
+      "step": 1425
+    },
+    {
+      "epoch": 1.75052790346908,
+      "grad_norm": 2.430635452270508,
+      "learning_rate": 9.807035380235868e-06,
+      "loss": 0.2421,
+      "step": 1450
+    },
+    {
+      "epoch": 1.7806938159879335,
+      "grad_norm": 2.1688945293426514,
+      "learning_rate": 9.80195201301342e-06,
+      "loss": 0.2433,
+      "step": 1475
+    },
+    {
+      "epoch": 1.8108597285067873,
+      "grad_norm": 2.5152928829193115,
+      "learning_rate": 9.796868645790972e-06,
+      "loss": 0.2481,
+      "step": 1500
+    },
+    {
+      "epoch": 1.8108597285067873,
+      "eval_loss": 0.2585034966468811,
+      "eval_runtime": 2401.9254,
+      "eval_samples_per_second": 1.38,
+      "eval_steps_per_second": 0.345,
+      "eval_wer": 0.23873042596129979,
+      "step": 1500
+    },
+    {
+      "epoch": 1.8410256410256411,
+      "grad_norm": 2.3025524616241455,
+      "learning_rate": 9.791785278568524e-06,
+      "loss": 0.2371,
+      "step": 1525
+    },
+    {
+      "epoch": 1.8711915535444947,
+      "grad_norm": 2.131627082824707,
+      "learning_rate": 9.786701911346075e-06,
+      "loss": 0.2294,
+      "step": 1550
+    },
+    {
+      "epoch": 1.9013574660633483,
+      "grad_norm": 2.1070120334625244,
+      "learning_rate": 9.781618544123627e-06,
+      "loss": 0.2593,
+      "step": 1575
+    },
+    {
+      "epoch": 1.9315233785822021,
+      "grad_norm": 2.209496021270752,
+      "learning_rate": 9.77653517690118e-06,
+      "loss": 0.2415,
+      "step": 1600
+    },
+    {
+      "epoch": 1.961689291101056,
+      "grad_norm": 2.3032071590423584,
+      "learning_rate": 9.771451809678732e-06,
+      "loss": 0.2419,
+      "step": 1625
+    },
+    {
+      "epoch": 1.9918552036199095,
+      "grad_norm": 2.4967117309570312,
+      "learning_rate": 9.766368442456284e-06,
+      "loss": 0.2474,
+      "step": 1650
+    },
+    {
+      "epoch": 2.0229260935143287,
+      "grad_norm": 2.2161521911621094,
+      "learning_rate": 9.761285075233836e-06,
+      "loss": 0.2136,
+      "step": 1675
+    },
+    {
+      "epoch": 2.0530920060331823,
+      "grad_norm": 2.219045400619507,
+      "learning_rate": 9.756201708011387e-06,
+      "loss": 0.1956,
+      "step": 1700
+    },
+    {
+      "epoch": 2.0832579185520363,
+      "grad_norm": 2.001629590988159,
+      "learning_rate": 9.75111834078894e-06,
+      "loss": 0.1886,
+      "step": 1725
+    },
+    {
+      "epoch": 2.11342383107089,
+      "grad_norm": 2.072310209274292,
+      "learning_rate": 9.746034973566492e-06,
+      "loss": 0.2012,
+      "step": 1750
+    },
+    {
+      "epoch": 2.1435897435897435,
+      "grad_norm": 2.7330238819122314,
+      "learning_rate": 9.740951606344044e-06,
+      "loss": 0.2154,
+      "step": 1775
+    },
+    {
+      "epoch": 2.173755656108597,
+      "grad_norm": 2.282186985015869,
+      "learning_rate": 9.735868239121596e-06,
+      "loss": 0.2028,
+      "step": 1800
+    },
+    {
+      "epoch": 2.203921568627451,
+      "grad_norm": 2.1114888191223145,
+      "learning_rate": 9.730784871899147e-06,
+      "loss": 0.2026,
+      "step": 1825
+    },
+    {
+      "epoch": 2.2340874811463047,
+      "grad_norm": 2.320906400680542,
+      "learning_rate": 9.725701504676699e-06,
+      "loss": 0.211,
+      "step": 1850
+    },
+    {
+      "epoch": 2.2642533936651583,
+      "grad_norm": 2.435915231704712,
+      "learning_rate": 9.72061813745425e-06,
+      "loss": 0.1983,
+      "step": 1875
+    },
+    {
+      "epoch": 2.294419306184012,
+      "grad_norm": 2.0894196033477783,
+      "learning_rate": 9.715534770231803e-06,
+      "loss": 0.1852,
+      "step": 1900
+    },
+    {
+      "epoch": 2.324585218702866,
+      "grad_norm": 1.7594900131225586,
+      "learning_rate": 9.710451403009354e-06,
+      "loss": 0.1864,
+      "step": 1925
+    },
+    {
+      "epoch": 2.3547511312217195,
+      "grad_norm": 2.0977182388305664,
+      "learning_rate": 9.705368035786906e-06,
+      "loss": 0.1942,
+      "step": 1950
+    },
+    {
+      "epoch": 2.384917043740573,
+      "grad_norm": 2.048466205596924,
+      "learning_rate": 9.700284668564458e-06,
+      "loss": 0.2083,
+      "step": 1975
+    },
+    {
+      "epoch": 2.4150829562594267,
+      "grad_norm": 2.275794506072998,
+      "learning_rate": 9.69520130134201e-06,
+      "loss": 0.1996,
+      "step": 2000
+    },
+    {
+      "epoch": 2.4150829562594267,
+      "eval_loss": 0.24703706800937653,
+      "eval_runtime": 2360.2803,
+      "eval_samples_per_second": 1.404,
+      "eval_steps_per_second": 0.351,
+      "eval_wer": 0.2232870355381444,
+      "step": 2000
+    },
+    {
+      "epoch": 2.4452488687782807,
+      "grad_norm": 2.1452808380126953,
+      "learning_rate": 9.690117934119561e-06,
+      "loss": 0.1945,
+      "step": 2025
+    },
+    {
+      "epoch": 2.4754147812971343,
+      "grad_norm": 2.2425966262817383,
+      "learning_rate": 9.685034566897113e-06,
+      "loss": 0.1885,
+      "step": 2050
+    },
+    {
+      "epoch": 2.505580693815988,
+      "grad_norm": 2.123422861099243,
+      "learning_rate": 9.679951199674666e-06,
+      "loss": 0.1817,
+      "step": 2075
+    },
+    {
+      "epoch": 2.5357466063348415,
+      "grad_norm": 1.747390866279602,
+      "learning_rate": 9.674867832452218e-06,
+      "loss": 0.1999,
+      "step": 2100
+    },
+    {
+      "epoch": 2.565912518853695,
+      "grad_norm": 1.9593695402145386,
+      "learning_rate": 9.66978446522977e-06,
+      "loss": 0.188,
+      "step": 2125
+    },
+    {
+      "epoch": 2.596078431372549,
+      "grad_norm": 2.0385262966156006,
+      "learning_rate": 9.664701098007321e-06,
+      "loss": 0.2004,
+      "step": 2150
+    },
+    {
+      "epoch": 2.6262443438914027,
+      "grad_norm": 1.9144082069396973,
+      "learning_rate": 9.659617730784873e-06,
+      "loss": 0.2062,
+      "step": 2175
+    },
+    {
+      "epoch": 2.6564102564102563,
+      "grad_norm": 2.052720308303833,
+      "learning_rate": 9.654534363562425e-06,
+      "loss": 0.1839,
+      "step": 2200
+    },
+    {
+      "epoch": 2.6865761689291103,
+      "grad_norm": 1.9951667785644531,
+      "learning_rate": 9.649450996339976e-06,
+      "loss": 0.1798,
+      "step": 2225
+    },
+    {
+      "epoch": 2.716742081447964,
+      "grad_norm": 2.5916664600372314,
+      "learning_rate": 9.644367629117528e-06,
+      "loss": 0.2058,
+      "step": 2250
+    },
+    {
+      "epoch": 2.7469079939668175,
+      "grad_norm": 2.3864409923553467,
+      "learning_rate": 9.63928426189508e-06,
+      "loss": 0.1993,
+      "step": 2275
+    },
+    {
+      "epoch": 2.777073906485671,
+      "grad_norm": 2.1544036865234375,
+      "learning_rate": 9.634200894672631e-06,
+      "loss": 0.1916,
+      "step": 2300
+    },
+    {
+      "epoch": 2.8072398190045247,
+      "grad_norm": 2.513857364654541,
+      "learning_rate": 9.629117527450183e-06,
+      "loss": 0.1979,
+      "step": 2325
+    },
+    {
+      "epoch": 2.8374057315233787,
+      "grad_norm": 2.702158212661743,
+      "learning_rate": 9.624034160227735e-06,
+      "loss": 0.1976,
+      "step": 2350
+    },
+    {
+      "epoch": 2.8675716440422323,
+      "grad_norm": 1.9018394947052002,
+      "learning_rate": 9.618950793005287e-06,
+      "loss": 0.2007,
+      "step": 2375
+    },
+    {
+      "epoch": 2.897737556561086,
+      "grad_norm": 2.1460797786712646,
+      "learning_rate": 9.613867425782838e-06,
+      "loss": 0.191,
+      "step": 2400
+    },
+    {
+      "epoch": 2.92790346907994,
+      "grad_norm": 2.0922977924346924,
+      "learning_rate": 9.608784058560392e-06,
+      "loss": 0.1956,
+      "step": 2425
+    },
+    {
+      "epoch": 2.9580693815987935,
+      "grad_norm": 2.323761463165283,
+      "learning_rate": 9.603700691337943e-06,
+      "loss": 0.1927,
+      "step": 2450
+    },
+    {
+      "epoch": 2.988235294117647,
+      "grad_norm": 2.0030786991119385,
+      "learning_rate": 9.598617324115495e-06,
+      "loss": 0.1923,
+      "step": 2475
+    },
+    {
+      "epoch": 3.0193061840120663,
+      "grad_norm": 1.9809656143188477,
+      "learning_rate": 9.593533956893047e-06,
+      "loss": 0.1669,
+      "step": 2500
+    },
+    {
+      "epoch": 3.0193061840120663,
+      "eval_loss": 0.241033673286438,
+      "eval_runtime": 2297.6931,
+      "eval_samples_per_second": 1.443,
+      "eval_steps_per_second": 0.361,
+      "eval_wer": 0.2156894486353482,
+      "step": 2500
+    },
+    {
+      "epoch": 3.04947209653092,
+      "grad_norm": 1.9472345113754272,
+      "learning_rate": 9.588450589670599e-06,
+      "loss": 0.1578,
+      "step": 2525
+    },
+    {
+      "epoch": 3.079638009049774,
+      "grad_norm": 2.215965509414673,
+      "learning_rate": 9.58336722244815e-06,
+      "loss": 0.1564,
+      "step": 2550
+    },
+    {
+      "epoch": 3.1098039215686275,
+      "grad_norm": 1.8719356060028076,
+      "learning_rate": 9.578283855225702e-06,
+      "loss": 0.1646,
+      "step": 2575
+    },
+    {
+      "epoch": 3.139969834087481,
+      "grad_norm": 1.7623282670974731,
+      "learning_rate": 9.573200488003254e-06,
+      "loss": 0.149,
+      "step": 2600
+    },
+    {
+      "epoch": 3.1701357466063347,
+      "grad_norm": 1.8403270244598389,
+      "learning_rate": 9.568117120780805e-06,
+      "loss": 0.1543,
+      "step": 2625
+    },
+    {
+      "epoch": 3.2003016591251887,
+      "grad_norm": 2.092792510986328,
+      "learning_rate": 9.563033753558359e-06,
+      "loss": 0.1626,
+      "step": 2650
+    },
+    {
+      "epoch": 3.2304675716440423,
+      "grad_norm": 1.7507753372192383,
+      "learning_rate": 9.55795038633591e-06,
+      "loss": 0.1516,
+      "step": 2675
+    },
+    {
+      "epoch": 3.260633484162896,
+      "grad_norm": 1.7713559865951538,
+      "learning_rate": 9.552867019113462e-06,
+      "loss": 0.1565,
+      "step": 2700
+    },
+    {
+      "epoch": 3.2907993966817495,
+      "grad_norm": 2.3640658855438232,
+      "learning_rate": 9.547783651891014e-06,
+      "loss": 0.1583,
+      "step": 2725
+    },
+    {
+      "epoch": 3.3209653092006035,
+      "grad_norm": 1.9633855819702148,
+      "learning_rate": 9.542700284668566e-06,
+      "loss": 0.1612,
+      "step": 2750
+    },
+    {
+      "epoch": 3.351131221719457,
+      "grad_norm": 1.8450348377227783,
+      "learning_rate": 9.537616917446117e-06,
+      "loss": 0.1573,
+      "step": 2775
+    },
+    {
+      "epoch": 3.3812971342383107,
+      "grad_norm": 1.913599967956543,
+      "learning_rate": 9.532533550223669e-06,
+      "loss": 0.1623,
+      "step": 2800
+    },
+    {
+      "epoch": 3.4114630467571643,
+      "grad_norm": 2.1844587326049805,
+      "learning_rate": 9.52745018300122e-06,
+      "loss": 0.1562,
+      "step": 2825
+    },
+    {
+      "epoch": 3.4416289592760183,
+      "grad_norm": 1.827317237854004,
+      "learning_rate": 9.522366815778772e-06,
+      "loss": 0.1598,
+      "step": 2850
+    },
+    {
+      "epoch": 3.471794871794872,
+      "grad_norm": 1.7931983470916748,
+      "learning_rate": 9.517283448556324e-06,
+      "loss": 0.1557,
+      "step": 2875
+    },
+    {
+      "epoch": 3.5019607843137255,
+      "grad_norm": 1.85126793384552,
+      "learning_rate": 9.512200081333877e-06,
+      "loss": 0.1588,
+      "step": 2900
+    },
+    {
+      "epoch": 3.532126696832579,
+      "grad_norm": 2.0126168727874756,
+      "learning_rate": 9.50711671411143e-06,
+      "loss": 0.161,
+      "step": 2925
+    },
+    {
+      "epoch": 3.5622926093514327,
+      "grad_norm": 1.982439637184143,
+      "learning_rate": 9.502033346888981e-06,
+      "loss": 0.1477,
+      "step": 2950
+    },
+    {
+      "epoch": 3.5924585218702867,
+      "grad_norm": 2.315864086151123,
+      "learning_rate": 9.496949979666533e-06,
+      "loss": 0.157,
+      "step": 2975
+    },
+    {
+      "epoch": 3.6226244343891403,
+      "grad_norm": 1.939032793045044,
+      "learning_rate": 9.491866612444084e-06,
+      "loss": 0.1535,
+      "step": 3000
+    },
+    {
+      "epoch": 3.6226244343891403,
+      "eval_loss": 0.23916485905647278,
+      "eval_runtime": 2282.7909,
+      "eval_samples_per_second": 1.452,
+      "eval_steps_per_second": 0.363,
+      "eval_wer": 0.21356341934578732,
+      "step": 3000
+    },
+    {
+      "epoch": 3.652790346907994,
+      "grad_norm": 1.8340437412261963,
+      "learning_rate": 9.486783245221636e-06,
+      "loss": 0.1555,
+      "step": 3025
+    },
+    {
+      "epoch": 3.682956259426848,
+      "grad_norm": 2.115091562271118,
+      "learning_rate": 9.481699877999188e-06,
+      "loss": 0.1589,
+      "step": 3050
+    },
+    {
+      "epoch": 3.7131221719457015,
+      "grad_norm": 2.074758768081665,
+      "learning_rate": 9.47661651077674e-06,
+      "loss": 0.1506,
+      "step": 3075
+    },
+    {
+      "epoch": 3.743288084464555,
+      "grad_norm": 2.0023365020751953,
+      "learning_rate": 9.471533143554291e-06,
+      "loss": 0.1537,
+      "step": 3100
+    },
+    {
+      "epoch": 3.7734539969834087,
+      "grad_norm": 1.883928656578064,
+      "learning_rate": 9.466449776331843e-06,
+      "loss": 0.1598,
+      "step": 3125
+    },
+    {
+      "epoch": 3.8036199095022623,
+      "grad_norm": 2.637164354324341,
+      "learning_rate": 9.461366409109394e-06,
+      "loss": 0.1725,
+      "step": 3150
+    },
+    {
+      "epoch": 3.8337858220211163,
+      "grad_norm": 2.0999221801757812,
+      "learning_rate": 9.456283041886946e-06,
+      "loss": 0.1591,
+      "step": 3175
+    },
+    {
+      "epoch": 3.86395173453997,
+      "grad_norm": 2.033339500427246,
+      "learning_rate": 9.451199674664498e-06,
+      "loss": 0.1622,
+      "step": 3200
+    },
+    {
+      "epoch": 3.8941176470588235,
+      "grad_norm": 2.1740732192993164,
+      "learning_rate": 9.44611630744205e-06,
+      "loss": 0.1606,
+      "step": 3225
+    },
+    {
+      "epoch": 3.9242835595776775,
+      "grad_norm": 2.3770601749420166,
+      "learning_rate": 9.441032940219603e-06,
+      "loss": 0.1593,
+      "step": 3250
+    },
+    {
+      "epoch": 3.954449472096531,
+      "grad_norm": 1.9333163499832153,
+      "learning_rate": 9.435949572997155e-06,
+      "loss": 0.1579,
+      "step": 3275
+    },
+    {
+      "epoch": 3.9846153846153847,
+      "grad_norm": 2.231935501098633,
+      "learning_rate": 9.430866205774706e-06,
+      "loss": 0.165,
+      "step": 3300
+    },
+    {
+      "epoch": 4.015686274509804,
+      "grad_norm": 1.8732832670211792,
+      "learning_rate": 9.425782838552258e-06,
+      "loss": 0.1401,
+      "step": 3325
+    },
+    {
+      "epoch": 4.0458521870286575,
+      "grad_norm": 1.96370530128479,
+      "learning_rate": 9.42069947132981e-06,
+      "loss": 0.1241,
+      "step": 3350
+    },
+    {
+      "epoch": 4.076018099547511,
+      "grad_norm": 2.0420496463775635,
+      "learning_rate": 9.415616104107362e-06,
+      "loss": 0.1198,
+      "step": 3375
+    },
+    {
+      "epoch": 4.106184012066365,
+      "grad_norm": 1.689680576324463,
+      "learning_rate": 9.410532736884913e-06,
+      "loss": 0.1225,
+      "step": 3400
+    },
+    {
+      "epoch": 4.136349924585219,
+      "grad_norm": 2.194132089614868,
+      "learning_rate": 9.405449369662465e-06,
+      "loss": 0.1333,
+      "step": 3425
+    },
+    {
+      "epoch": 4.166515837104073,
+      "grad_norm": 1.8354153633117676,
+      "learning_rate": 9.400366002440017e-06,
+      "loss": 0.119,
+      "step": 3450
+    },
+    {
+      "epoch": 4.196681749622926,
+      "grad_norm": 1.5466539859771729,
+      "learning_rate": 9.395282635217568e-06,
+      "loss": 0.1245,
+      "step": 3475
+    },
+    {
+      "epoch": 4.22684766214178,
+      "grad_norm": 2.1299631595611572,
+      "learning_rate": 9.39019926799512e-06,
+      "loss": 0.1272,
+      "step": 3500
+    },
+    {
+      "epoch": 4.22684766214178,
+      "eval_loss": 0.24591179192066193,
+      "eval_runtime": 2249.976,
+      "eval_samples_per_second": 1.473,
+      "eval_steps_per_second": 0.368,
+      "eval_wer": 0.21497717486321105,
+      "step": 3500
+    },
+    {
+      "epoch": 4.2570135746606335,
+      "grad_norm": 2.316577911376953,
+      "learning_rate": 9.385115900772672e-06,
+      "loss": 0.1274,
+      "step": 3525
+    },
+    {
+      "epoch": 4.287179487179487,
+      "grad_norm": 1.9104264974594116,
+      "learning_rate": 9.380032533550223e-06,
+      "loss": 0.1231,
+      "step": 3550
+    },
+    {
+      "epoch": 4.317345399698341,
+      "grad_norm": 2.457646369934082,
+      "learning_rate": 9.374949166327775e-06,
+      "loss": 0.1342,
+      "step": 3575
+    },
+    {
+      "epoch": 4.347511312217194,
+      "grad_norm": 2.089953660964966,
+      "learning_rate": 9.369865799105327e-06,
+      "loss": 0.1186,
+      "step": 3600
+    },
+    {
+      "epoch": 4.377677224736049,
+      "grad_norm": 2.036520004272461,
+      "learning_rate": 9.36478243188288e-06,
+      "loss": 0.1309,
+      "step": 3625
+    },
+    {
+      "epoch": 4.407843137254902,
+      "grad_norm": 1.8452465534210205,
+      "learning_rate": 9.359699064660432e-06,
+      "loss": 0.1337,
+      "step": 3650
+    },
+    {
+      "epoch": 4.438009049773756,
+      "grad_norm": 2.151616096496582,
+      "learning_rate": 9.354615697437984e-06,
+      "loss": 0.1238,
+      "step": 3675
+    },
+    {
+      "epoch": 4.4681749622926095,
+      "grad_norm": 2.0973825454711914,
+      "learning_rate": 9.349532330215535e-06,
+      "loss": 0.1329,
+      "step": 3700
+    },
+    {
+      "epoch": 4.498340874811463,
+      "grad_norm": 2.1247801780700684,
+      "learning_rate": 9.344448962993089e-06,
+      "loss": 0.1289,
+      "step": 3725
+    },
+    {
+      "epoch": 4.528506787330317,
+      "grad_norm": 2.375617027282715,
+      "learning_rate": 9.33936559577064e-06,
+      "loss": 0.1306,
+      "step": 3750
+    },
+    {
+      "epoch": 4.55867269984917,
+      "grad_norm": 1.6577335596084595,
+      "learning_rate": 9.334282228548192e-06,
+      "loss": 0.1225,
+      "step": 3775
+    },
+    {
+      "epoch": 4.588838612368024,
+      "grad_norm": 1.7088857889175415,
+      "learning_rate": 9.329198861325744e-06,
+      "loss": 0.1199,
+      "step": 3800
+    },
+    {
+      "epoch": 4.619004524886877,
+      "grad_norm": 2.1655218601226807,
+      "learning_rate": 9.324115494103296e-06,
+      "loss": 0.1214,
+      "step": 3825
+    },
+    {
+      "epoch": 4.649170437405732,
+      "grad_norm": 1.614488959312439,
+      "learning_rate": 9.319032126880847e-06,
+      "loss": 0.1129,
+      "step": 3850
+    },
+    {
+      "epoch": 4.6793363499245855,
+      "grad_norm": 2.1687254905700684,
+      "learning_rate": 9.313948759658399e-06,
+      "loss": 0.1329,
+      "step": 3875
+    },
+    {
+      "epoch": 4.709502262443439,
+      "grad_norm": 1.8908534049987793,
+      "learning_rate": 9.30886539243595e-06,
+      "loss": 0.1273,
+      "step": 3900
+    },
+    {
+      "epoch": 4.739668174962293,
+      "grad_norm": 1.9584071636199951,
+      "learning_rate": 9.303782025213502e-06,
+      "loss": 0.1234,
+      "step": 3925
+    },
+    {
+      "epoch": 4.769834087481146,
+      "grad_norm": 1.8992727994918823,
+      "learning_rate": 9.298698657991054e-06,
+      "loss": 0.1202,
+      "step": 3950
+    },
+    {
+      "epoch": 4.8,
+      "grad_norm": 2.2295639514923096,
+      "learning_rate": 9.293615290768606e-06,
+      "loss": 0.12,
+      "step": 3975
+    },
+    {
+      "epoch": 4.830165912518853,
+      "grad_norm": 1.7892181873321533,
+      "learning_rate": 9.288531923546157e-06,
+      "loss": 0.1226,
+      "step": 4000
+    },
+    {
+      "epoch": 4.830165912518853,
+      "eval_loss": 0.24282999336719513,
+      "eval_runtime": 2246.6955,
+      "eval_samples_per_second": 1.476,
+      "eval_steps_per_second": 0.369,
+      "eval_wer": 0.21186907113024897,
+      "step": 4000
+    },
+    {
+      "epoch": 4.860331825037708,
+      "grad_norm": 1.6192424297332764,
+      "learning_rate": 9.28344855632371e-06,
+      "loss": 0.1275,
+      "step": 4025
+    },
+    {
+      "epoch": 4.8904977375565615,
+      "grad_norm": 2.2194361686706543,
+      "learning_rate": 9.278365189101261e-06,
+      "loss": 0.1344,
+      "step": 4050
+    },
+    {
+      "epoch": 4.920663650075415,
+      "grad_norm": 1.8603276014328003,
+      "learning_rate": 9.273281821878813e-06,
+      "loss": 0.1271,
+      "step": 4075
+    },
+    {
+      "epoch": 4.950829562594269,
+      "grad_norm": 2.0720746517181396,
+      "learning_rate": 9.268198454656366e-06,
+      "loss": 0.1348,
+      "step": 4100
+    },
+    {
+      "epoch": 4.980995475113122,
+      "grad_norm": 2.0119757652282715,
+      "learning_rate": 9.263115087433918e-06,
+      "loss": 0.1246,
+      "step": 4125
+    },
+    {
+      "epoch": 5.012066365007541,
+      "grad_norm": 1.3096059560775757,
+      "learning_rate": 9.25803172021147e-06,
+      "loss": 0.1155,
+      "step": 4150
+    },
+    {
+      "epoch": 5.042232277526395,
+      "grad_norm": 1.489650011062622,
+      "learning_rate": 9.252948352989021e-06,
+      "loss": 0.0973,
+      "step": 4175
+    },
+    {
+      "epoch": 5.072398190045249,
+      "grad_norm": 2.003122568130493,
+      "learning_rate": 9.247864985766573e-06,
+      "loss": 0.0949,
+      "step": 4200
+    },
+    {
+      "epoch": 5.102564102564102,
+      "grad_norm": 1.655964970588684,
+      "learning_rate": 9.242781618544125e-06,
+      "loss": 0.0907,
+      "step": 4225
+    },
+    {
+      "epoch": 5.132730015082957,
+      "grad_norm": 2.134763717651367,
+      "learning_rate": 9.237698251321676e-06,
+      "loss": 0.0955,
+      "step": 4250
+    },
+    {
+      "epoch": 5.16289592760181,
+      "grad_norm": 2.039686918258667,
+      "learning_rate": 9.232614884099228e-06,
+      "loss": 0.0959,
+      "step": 4275
+    },
+    {
+      "epoch": 5.193061840120664,
+      "grad_norm": 2.1623027324676514,
+      "learning_rate": 9.22753151687678e-06,
+      "loss": 0.0971,
+      "step": 4300
+    },
+    {
+      "epoch": 5.223227752639517,
+      "grad_norm": 2.3452537059783936,
+      "learning_rate": 9.222448149654331e-06,
+      "loss": 0.0882,
+      "step": 4325
+    },
+    {
+      "epoch": 5.253393665158371,
+      "grad_norm": 1.7960082292556763,
+      "learning_rate": 9.217364782431883e-06,
+      "loss": 0.0936,
+      "step": 4350
+    },
+    {
+      "epoch": 5.283559577677225,
+      "grad_norm": 1.9322994947433472,
+      "learning_rate": 9.212281415209435e-06,
+      "loss": 0.099,
+      "step": 4375
+    },
+    {
+      "epoch": 5.313725490196078,
+      "grad_norm": 2.040149688720703,
+      "learning_rate": 9.207198047986986e-06,
+      "loss": 0.0974,
+      "step": 4400
+    },
+    {
+      "epoch": 5.343891402714932,
+      "grad_norm": 2.1404929161071777,
+      "learning_rate": 9.202114680764538e-06,
+      "loss": 0.0886,
+      "step": 4425
+    },
+    {
+      "epoch": 5.374057315233786,
+      "grad_norm": 1.9556715488433838,
+      "learning_rate": 9.197031313542092e-06,
+      "loss": 0.1003,
+      "step": 4450
+    },
+    {
+      "epoch": 5.40422322775264,
+      "grad_norm": 2.070523500442505,
+      "learning_rate": 9.191947946319643e-06,
+      "loss": 0.0972,
+      "step": 4475
+    },
+    {
+      "epoch": 5.4343891402714934,
+      "grad_norm": 1.8254801034927368,
+      "learning_rate": 9.186864579097195e-06,
+      "loss": 0.0939,
+      "step": 4500
+    },
+    {
+      "epoch": 5.4343891402714934,
+      "eval_loss": 0.2541460692882538,
+      "eval_runtime": 2247.1988,
+      "eval_samples_per_second": 1.475,
+      "eval_steps_per_second": 0.369,
+      "eval_wer": 0.2141785648762694,
+      "step": 4500
+    },
+    {
+      "epoch": 5.464555052790347,
+      "grad_norm": 2.025676965713501,
+      "learning_rate": 9.181781211874747e-06,
+      "loss": 0.0941,
+      "step": 4525
+    },
+    {
+      "epoch": 5.494720965309201,
+      "grad_norm": 2.0886032581329346,
+      "learning_rate": 9.176697844652298e-06,
+      "loss": 0.0976,
+      "step": 4550
+    },
+    {
+      "epoch": 5.524886877828054,
+      "grad_norm": 2.048823118209839,
+      "learning_rate": 9.17161447742985e-06,
+      "loss": 0.1008,
+      "step": 4575
+    },
+    {
+      "epoch": 5.555052790346908,
+      "grad_norm": 2.661174774169922,
+      "learning_rate": 9.166531110207402e-06,
+      "loss": 0.0971,
+      "step": 4600
+    },
+    {
+      "epoch": 5.585218702865761,
+      "grad_norm": 2.3181405067443848,
+      "learning_rate": 9.161447742984953e-06,
+      "loss": 0.1005,
+      "step": 4625
+    },
+    {
+      "epoch": 5.615384615384615,
+      "grad_norm": 1.9508439302444458,
+      "learning_rate": 9.156364375762505e-06,
+      "loss": 0.098,
+      "step": 4650
+    },
+    {
+      "epoch": 5.6455505279034695,
+      "grad_norm": 2.297891139984131,
+      "learning_rate": 9.151281008540057e-06,
+      "loss": 0.0994,
+      "step": 4675
+    },
+    {
+      "epoch": 5.675716440422323,
+      "grad_norm": 1.9096143245697021,
+      "learning_rate": 9.146197641317609e-06,
+      "loss": 0.1021,
+      "step": 4700
+    },
+    {
+      "epoch": 5.705882352941177,
+      "grad_norm": 2.3850667476654053,
+      "learning_rate": 9.14111427409516e-06,
+      "loss": 0.0988,
+      "step": 4725
+    },
+    {
+      "epoch": 5.73604826546003,
+      "grad_norm": 1.954728126525879,
+      "learning_rate": 9.136030906872714e-06,
+      "loss": 0.1021,
+      "step": 4750
+    },
+    {
+      "epoch": 5.766214177978884,
+      "grad_norm": 2.3807568550109863,
+      "learning_rate": 9.130947539650265e-06,
+      "loss": 0.0891,
+      "step": 4775
+    },
+    {
+      "epoch": 5.796380090497737,
+      "grad_norm": 2.0435431003570557,
+      "learning_rate": 9.125864172427817e-06,
+      "loss": 0.1055,
+      "step": 4800
+    },
+    {
+      "epoch": 5.826546003016591,
+      "grad_norm": 2.7044458389282227,
+      "learning_rate": 9.120780805205369e-06,
+      "loss": 0.0979,
+      "step": 4825
+    },
+    {
+      "epoch": 5.856711915535445,
+      "grad_norm": 1.9672693014144897,
+      "learning_rate": 9.11569743798292e-06,
+      "loss": 0.1003,
+      "step": 4850
+    },
+    {
+      "epoch": 5.886877828054299,
+      "grad_norm": 1.9601274728775024,
+      "learning_rate": 9.110614070760472e-06,
+      "loss": 0.0987,
+      "step": 4875
+    },
+    {
+      "epoch": 5.917043740573153,
+      "grad_norm": 2.0287139415740967,
+      "learning_rate": 9.105530703538024e-06,
+      "loss": 0.0963,
+      "step": 4900
+    },
+    {
+      "epoch": 5.947209653092006,
+      "grad_norm": 1.9941165447235107,
+      "learning_rate": 9.100447336315577e-06,
+      "loss": 0.1005,
+      "step": 4925
+    },
+    {
+      "epoch": 5.97737556561086,
+      "grad_norm": 1.8804293870925903,
+      "learning_rate": 9.095363969093129e-06,
+      "loss": 0.1101,
+      "step": 4950
+    },
+    {
+      "epoch": 6.008446455505279,
+      "grad_norm": 1.3988499641418457,
+      "learning_rate": 9.09028060187068e-06,
+      "loss": 0.0912,
+      "step": 4975
+    },
+    {
+      "epoch": 6.038612368024133,
+      "grad_norm": 1.9083130359649658,
+      "learning_rate": 9.085197234648232e-06,
+      "loss": 0.0665,
+      "step": 5000
+    },
+    {
+      "epoch": 6.038612368024133,
+      "eval_loss": 0.2640438675880432,
+      "eval_runtime": 2326.5234,
+      "eval_samples_per_second": 1.425,
+      "eval_steps_per_second": 0.356,
+      "eval_wer": 0.21559232039369314,
+      "step": 5000
+    },
+    {
+      "epoch": 6.068778280542986,
+      "grad_norm": 1.7908351421356201,
+      "learning_rate": 9.080113867425784e-06,
+      "loss": 0.068,
+      "step": 5025
+    },
+    {
+      "epoch": 6.09894419306184,
+      "grad_norm": 2.0591347217559814,
+      "learning_rate": 9.075030500203336e-06,
+      "loss": 0.0761,
+      "step": 5050
+    },
+    {
+      "epoch": 6.129110105580694,
+      "grad_norm": 2.0180952548980713,
+      "learning_rate": 9.069947132980888e-06,
+      "loss": 0.0651,
+      "step": 5075
+    },
+    {
+      "epoch": 6.159276018099548,
+      "grad_norm": 1.842887282371521,
+      "learning_rate": 9.06486376575844e-06,
+      "loss": 0.0712,
+      "step": 5100
+    },
+    {
+      "epoch": 6.189441930618401,
+      "grad_norm": 1.8542691469192505,
+      "learning_rate": 9.059780398535991e-06,
+      "loss": 0.0685,
+      "step": 5125
+    },
+    {
+      "epoch": 6.219607843137255,
+      "grad_norm": 1.998128890991211,
+      "learning_rate": 9.054697031313543e-06,
+      "loss": 0.0663,
+      "step": 5150
+    },
+    {
+      "epoch": 6.249773755656109,
+      "grad_norm": 2.074847459793091,
+      "learning_rate": 9.049613664091094e-06,
+      "loss": 0.0691,
+      "step": 5175
+    },
+    {
+      "epoch": 6.279939668174962,
+      "grad_norm": 2.1815929412841797,
+      "learning_rate": 9.044530296868646e-06,
+      "loss": 0.0646,
+      "step": 5200
+    },
+    {
+      "epoch": 6.310105580693816,
+      "grad_norm": 1.8308629989624023,
+      "learning_rate": 9.039446929646198e-06,
+      "loss": 0.079,
+      "step": 5225
+    },
+    {
+      "epoch": 6.340271493212669,
+      "grad_norm": 1.6758077144622803,
+      "learning_rate": 9.03436356242375e-06,
+      "loss": 0.0747,
+      "step": 5250
+    },
+    {
+      "epoch": 6.370437405731524,
+      "grad_norm": 1.7715494632720947,
+      "learning_rate": 9.029280195201303e-06,
+      "loss": 0.0699,
+      "step": 5275
+    },
+    {
+      "epoch": 6.400603318250377,
+      "grad_norm": 1.9695020914077759,
+      "learning_rate": 9.024196827978855e-06,
+      "loss": 0.0706,
+      "step": 5300
+    },
+    {
+      "epoch": 6.430769230769231,
+      "grad_norm": 2.174950122833252,
+      "learning_rate": 9.019113460756406e-06,
+      "loss": 0.0735,
+      "step": 5325
+    },
+    {
+      "epoch": 6.460935143288085,
+      "grad_norm": 1.3423354625701904,
+      "learning_rate": 9.014030093533958e-06,
+      "loss": 0.0687,
+      "step": 5350
+    },
+    {
+      "epoch": 6.491101055806938,
+      "grad_norm": 1.8988721370697021,
+      "learning_rate": 9.00894672631151e-06,
+      "loss": 0.0684,
+      "step": 5375
+    },
+    {
+      "epoch": 6.521266968325792,
+      "grad_norm": 2.1522958278656006,
+      "learning_rate": 9.003863359089061e-06,
+      "loss": 0.0703,
+      "step": 5400
+    },
+    {
+      "epoch": 6.551432880844645,
+      "grad_norm": 2.2434656620025635,
+      "learning_rate": 8.998779991866613e-06,
+      "loss": 0.0725,
+      "step": 5425
+    },
+    {
+      "epoch": 6.581598793363499,
+      "grad_norm": 1.9670661687850952,
+      "learning_rate": 8.993696624644165e-06,
+      "loss": 0.073,
+      "step": 5450
+    },
+    {
+      "epoch": 6.6117647058823525,
+      "grad_norm": 1.9538111686706543,
+      "learning_rate": 8.988613257421716e-06,
+      "loss": 0.0755,
+      "step": 5475
+    },
+    {
+      "epoch": 6.641930618401207,
+      "grad_norm": 1.7768446207046509,
+      "learning_rate": 8.983529890199268e-06,
+      "loss": 0.0717,
+      "step": 5500
+    },
+    {
+      "epoch": 6.641930618401207,
+      "eval_loss": 0.27195391058921814,
+      "eval_runtime": 2447.6492,
+      "eval_samples_per_second": 1.354,
+      "eval_steps_per_second": 0.339,
+      "eval_wer": 0.2185061676433451,
+      "step": 5500
+    },
+    {
+      "epoch": 6.641930618401207,
+      "step": 5500,
+      "total_flos": 5.08256607043584e+19,
+      "train_loss": 0.25465128779411317,
+      "train_runtime": 113391.8356,
+      "train_samples_per_second": 14.031,
+      "train_steps_per_second": 0.438
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 49680,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 60,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 5,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 5
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5.08256607043584e+19,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}