Upload 9 files

Browse files

Files changed (8) hide show

config.json +101 -5
generation_config.json +1 -28
model.safetensors +1 -1
optimizer.pt +1 -1
rng_state.pth +1 -1
scheduler.pt +1 -1
trainer_state.json +625 -1209
training_args.bin +2 -2

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "openai/whisper-base.en",
   "activation_dropout": 0.0,
   "activation_function": "gelu",
   "apply_spec_augment": false,
@@ -25,7 +25,12 @@
   "encoder_layerdrop": 0.0,
   "encoder_layers": 6,
   "eos_token_id": 50256,
-  "forced_decoder_ids": null,
   "init_std": 0.02,
   "is_encoder_decoder": true,
   "mask_feature_length": 10,
@@ -43,10 +48,101 @@
   "num_mel_bins": 80,
   "pad_token_id": 50256,
   "scale_embedding": false,
-  "suppress_tokens": [],
   "torch_dtype": "float32",
-  "transformers_version": "4.38.2",
-  "use_cache": false,
   "use_weighted_layer_sum": false,
   "vocab_size": 51864
 }

 {
+  "_name_or_path": "/kaggle/working/whisper-ft-2/checkpoint-600/model.safetensors",
   "activation_dropout": 0.0,
   "activation_function": "gelu",
   "apply_spec_augment": false,
   "encoder_layerdrop": 0.0,
   "encoder_layers": 6,
   "eos_token_id": 50256,
+  "forced_decoder_ids": [
+    [
+      1,
+      50362
+    ]
+  ],
   "init_std": 0.02,
   "is_encoder_decoder": true,
   "mask_feature_length": 10,
   "num_mel_bins": 80,
   "pad_token_id": 50256,
   "scale_embedding": false,
+  "suppress_tokens": [
+    1,
+    2,
+    7,
+    8,
+    9,
+    10,
+    14,
+    25,
+    26,
+    27,
+    28,
+    29,
+    31,
+    58,
+    59,
+    60,
+    61,
+    62,
+    63,
+    90,
+    91,
+    92,
+    93,
+    357,
+    366,
+    438,
+    532,
+    685,
+    705,
+    796,
+    930,
+    1058,
+    1220,
+    1267,
+    1279,
+    1303,
+    1343,
+    1377,
+    1391,
+    1635,
+    1782,
+    1875,
+    2162,
+    2361,
+    2488,
+    3467,
+    4008,
+    4211,
+    4600,
+    4808,
+    5299,
+    5855,
+    6329,
+    7203,
+    9609,
+    9959,
+    10563,
+    10786,
+    11420,
+    11709,
+    11907,
+    13163,
+    13697,
+    13700,
+    14808,
+    15306,
+    16410,
+    16791,
+    17992,
+    19203,
+    19510,
+    20724,
+    22305,
+    22935,
+    27007,
+    30109,
+    30420,
+    33409,
+    34949,
+    40283,
+    40493,
+    40549,
+    47282,
+    49146,
+    50257,
+    50357,
+    50358,
+    50359,
+    50360,
+    50361
+  ],
   "torch_dtype": "float32",
+  "transformers_version": "4.42.3",
+  "use_cache": true,
   "use_weighted_layer_sum": false,
   "vocab_size": 51864
 }

generation_config.json CHANGED Viewed

@@ -1,26 +1,4 @@
 {
-  "alignment_heads": [
-    [
-      3,
-      3
-    ],
-    [
-      4,
-      7
-    ],
-    [
-      5,
-      1
-    ],
-    [
-      5,
-      5
-    ],
-    [
-      5,
-      7
-    ]
-  ],
   "begin_suppress_tokens": [
     220,
     50256
@@ -34,13 +12,8 @@
       50362
     ]
   ],
-  "is_multilingual": false,
-  "max_initial_timestamp_index": 50,
   "max_length": 448,
-  "no_timestamps_token_id": 50362,
   "pad_token_id": 50256,
-  "prev_sot_token_id": 50360,
-  "return_timestamps": false,
   "suppress_tokens": [
     1,
     2,
@@ -133,5 +106,5 @@
     50360,
     50361
   ],
-  "transformers_version": "4.38.2"
 }

 {
   "begin_suppress_tokens": [
     220,
     50256
       50362
     ]
   ],
   "max_length": 448,
   "pad_token_id": 50256,
   "suppress_tokens": [
     1,
     2,
     50360,
     50361
   ],
+  "transformers_version": "4.42.3"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6f26bf17bf8046342e89171c8323b425642f7e61c3ea8bf22ed7c5b1ceb274ca
 size 290401888

 version https://git-lfs.github.com/spec/v1
+oid sha256:22aa2c1a7ff282ac49e8b7b35970dc582ab111868362fb5d325524b1e181646a
 size 290401888

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:64ff846159375dd759f4b87e313376fc9c713e195386cd980e0c0cf7865dc6af
 size 574807418

 version https://git-lfs.github.com/spec/v1
+oid sha256:e858eb0107d3a7caf2bce9b6dcbae6286b6e0670211dd4e01d9772be12056a79
 size 574807418

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:75e86984aae1f369afbae73f50a6df2db09bc057180ac42ff74f5d33ce72e723
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:49f603c8d0789e2771e72e17a35d09664975138faec34bc1a21798b82a6ebaa3
 size 14244

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:408b86bc95e437bcb37ef1818966a87d9013926bc3d5ae3d46a9b5ee7f360e3e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:78f763231c165c2571b07da4d26c7965824aff598ab2cc82420929859dcb97f0
 size 1064

trainer_state.json CHANGED Viewed

@@ -1,1466 +1,882 @@
 {
-  "best_metric": 39.414129810453765,
-  "best_model_checkpoint": "./whisper-ft-2/checkpoint-5000",
-  "epoch": 1.0,
-  "eval_steps": 1000,
-  "global_step": 5000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.01,
-      "grad_norm": 5.948551654815674,
-      "learning_rate": 7.8125e-06,
-      "loss": 0.1937,
-      "step": 25
-    },
-    {
-      "epoch": 0.01,
-      "grad_norm": 5.579243183135986,
-      "learning_rate": 9.96376811594203e-06,
-      "loss": 0.1739,
-      "step": 50
-    },
-    {
-      "epoch": 0.01,
-      "grad_norm": 4.500240325927734,
-      "learning_rate": 9.913446054750404e-06,
-      "loss": 0.1795,
-      "step": 75
-    },
-    {
-      "epoch": 0.02,
-      "grad_norm": 2.341679811477661,
-      "learning_rate": 9.863123993558778e-06,
-      "loss": 0.1617,
-      "step": 100
-    },
-    {
-      "epoch": 0.03,
-      "grad_norm": 5.624894618988037,
-      "learning_rate": 9.81280193236715e-06,
-      "loss": 0.1669,
-      "step": 125
-    },
-    {
-      "epoch": 0.03,
-      "grad_norm": 6.136338710784912,
-      "learning_rate": 9.762479871175523e-06,
-      "loss": 0.1635,
-      "step": 150
-    },
-    {
-      "epoch": 0.04,
-      "grad_norm": 4.855004787445068,
-      "learning_rate": 9.712157809983898e-06,
-      "loss": 0.1611,
-      "step": 175
-    },
-    {
-      "epoch": 0.04,
-      "grad_norm": 6.826947212219238,
-      "learning_rate": 9.66183574879227e-06,
-      "loss": 0.1661,
-      "step": 200
-    },
-    {
-      "epoch": 0.04,
-      "grad_norm": 4.076170921325684,
-      "learning_rate": 9.611513687600645e-06,
-      "loss": 0.1754,
-      "step": 225
-    },
-    {
-      "epoch": 0.05,
-      "grad_norm": 7.027714729309082,
-      "learning_rate": 9.56119162640902e-06,
-      "loss": 0.2,
-      "step": 250
-    },
-    {
-      "epoch": 0.06,
-      "grad_norm": 4.252183437347412,
-      "learning_rate": 9.510869565217392e-06,
-      "loss": 0.1827,
-      "step": 275
     },
     {
-      "epoch": 0.06,
-      "grad_norm": 7.006402492523193,
-      "learning_rate": 9.460547504025765e-06,
-      "loss": 0.1794,
-      "step": 300
-    },
-    {
-      "epoch": 0.07,
-      "grad_norm": 5.446494102478027,
-      "learning_rate": 9.41022544283414e-06,
-      "loss": 0.1949,
-      "step": 325
-    },
-    {
-      "epoch": 0.07,
-      "grad_norm": 4.943406581878662,
-      "learning_rate": 9.359903381642514e-06,
-      "loss": 0.1732,
-      "step": 350
-    },
-    {
-      "epoch": 0.07,
-      "grad_norm": 6.656269073486328,
-      "learning_rate": 9.309581320450886e-06,
-      "loss": 0.1742,
-      "step": 375
-    },
-    {
-      "epoch": 0.08,
-      "grad_norm": 4.511006832122803,
-      "learning_rate": 9.25925925925926e-06,
-      "loss": 0.163,
-      "step": 400
-    },
-    {
-      "epoch": 0.09,
-      "grad_norm": 4.760077953338623,
-      "learning_rate": 9.208937198067634e-06,
-      "loss": 0.1564,
-      "step": 425
-    },
-    {
-      "epoch": 0.09,
-      "grad_norm": 5.462475299835205,
-      "learning_rate": 9.160628019323673e-06,
-      "loss": 0.1381,
-      "step": 450
     },
     {
       "epoch": 0.1,
-      "grad_norm": 6.01568603515625,
-      "learning_rate": 9.110305958132047e-06,
-      "loss": 0.1295,
-      "step": 475
     },
     {
-      "epoch": 0.1,
-      "grad_norm": 3.843243360519409,
-      "learning_rate": 9.05998389694042e-06,
-      "loss": 0.1139,
-      "step": 500
     },
     {
-      "epoch": 0.1,
-      "grad_norm": 3.413757085800171,
-      "learning_rate": 9.009661835748792e-06,
-      "loss": 0.1145,
-      "step": 525
-    },
-    {
-      "epoch": 0.11,
-      "grad_norm": 5.756856918334961,
-      "learning_rate": 8.959339774557167e-06,
-      "loss": 0.1043,
-      "step": 550
-    },
-    {
-      "epoch": 0.12,
-      "grad_norm": 4.615521430969238,
-      "learning_rate": 8.90901771336554e-06,
-      "loss": 0.1617,
-      "step": 575
-    },
-    {
-      "epoch": 0.12,
-      "grad_norm": 7.172885894775391,
-      "learning_rate": 8.858695652173914e-06,
-      "loss": 0.166,
-      "step": 600
-    },
-    {
-      "epoch": 0.12,
-      "grad_norm": 4.559473514556885,
-      "learning_rate": 8.808373590982288e-06,
-      "loss": 0.1274,
-      "step": 625
-    },
-    {
-      "epoch": 0.13,
-      "grad_norm": 4.201319694519043,
-      "learning_rate": 8.758051529790661e-06,
-      "loss": 0.1382,
-      "step": 650
-    },
-    {
-      "epoch": 0.14,
-      "grad_norm": 4.814964771270752,
-      "learning_rate": 8.707729468599034e-06,
-      "loss": 0.1448,
-      "step": 675
-    },
-    {
-      "epoch": 0.14,
-      "grad_norm": 5.314846992492676,
-      "learning_rate": 8.657407407407408e-06,
-      "loss": 0.1513,
-      "step": 700
-    },
-    {
-      "epoch": 0.14,
-      "grad_norm": 3.898376941680908,
-      "learning_rate": 8.607085346215783e-06,
-      "loss": 0.1634,
-      "step": 725
-    },
-    {
-      "epoch": 0.15,
-      "grad_norm": 5.028552055358887,
-      "learning_rate": 8.556763285024155e-06,
-      "loss": 0.1584,
-      "step": 750
-    },
-    {
-      "epoch": 0.15,
-      "grad_norm": 5.497000694274902,
-      "learning_rate": 8.506441223832528e-06,
-      "loss": 0.1642,
-      "step": 775
-    },
-    {
-      "epoch": 0.16,
-      "grad_norm": 4.642214298248291,
-      "learning_rate": 8.456119162640902e-06,
-      "loss": 0.174,
-      "step": 800
-    },
-    {
-      "epoch": 0.17,
-      "grad_norm": 4.980218887329102,
-      "learning_rate": 8.405797101449275e-06,
-      "loss": 0.1885,
-      "step": 825
-    },
-    {
-      "epoch": 0.17,
-      "grad_norm": 4.9581618309021,
-      "learning_rate": 8.35547504025765e-06,
-      "loss": 0.1928,
-      "step": 850
-    },
-    {
-      "epoch": 0.17,
-      "grad_norm": 4.621245861053467,
-      "learning_rate": 8.305152979066024e-06,
-      "loss": 0.1629,
-      "step": 875
-    },
-    {
-      "epoch": 0.18,
-      "grad_norm": 4.990960121154785,
-      "learning_rate": 8.254830917874397e-06,
-      "loss": 0.206,
-      "step": 900
-    },
-    {
-      "epoch": 0.18,
-      "grad_norm": 4.905004501342773,
-      "learning_rate": 8.20450885668277e-06,
-      "loss": 0.232,
-      "step": 925
-    },
-    {
-      "epoch": 0.19,
-      "grad_norm": 6.192000865936279,
-      "learning_rate": 8.154186795491144e-06,
-      "loss": 0.2406,
-      "step": 950
-    },
-    {
-      "epoch": 0.2,
-      "grad_norm": 4.241088390350342,
-      "learning_rate": 8.103864734299518e-06,
-      "loss": 0.206,
-      "step": 975
-    },
-    {
-      "epoch": 0.2,
-      "grad_norm": 5.220916271209717,
-      "learning_rate": 8.053542673107891e-06,
-      "loss": 0.2176,
-      "step": 1000
-    },
-    {
-      "epoch": 0.2,
-      "eval_loss": 0.3194602429866791,
-      "eval_runtime": 281.282,
-      "eval_samples_per_second": 3.555,
-      "eval_steps_per_second": 0.444,
-      "eval_wer": 61.69442848937392,
-      "step": 1000
     },
     {
       "epoch": 0.2,
-      "grad_norm": 4.905993461608887,
-      "learning_rate": 8.003220611916265e-06,
-      "loss": 0.2084,
-      "step": 1025
-    },
-    {
-      "epoch": 0.21,
-      "grad_norm": 6.92851448059082,
-      "learning_rate": 7.952898550724638e-06,
-      "loss": 0.1915,
-      "step": 1050
-    },
-    {
-      "epoch": 0.21,
-      "grad_norm": 6.4899725914001465,
-      "learning_rate": 7.90257648953301e-06,
-      "loss": 0.2366,
-      "step": 1075
-    },
-    {
-      "epoch": 0.22,
-      "grad_norm": 6.207301139831543,
-      "learning_rate": 7.852254428341385e-06,
-      "loss": 0.2334,
-      "step": 1100
-    },
-    {
-      "epoch": 0.23,
-      "grad_norm": 7.392577648162842,
-      "learning_rate": 7.80193236714976e-06,
-      "loss": 0.2388,
-      "step": 1125
-    },
-    {
-      "epoch": 0.23,
-      "grad_norm": 4.038721084594727,
-      "learning_rate": 7.751610305958132e-06,
-      "loss": 0.2316,
-      "step": 1150
-    },
-    {
-      "epoch": 0.23,
-      "grad_norm": 5.786244869232178,
-      "learning_rate": 7.701288244766507e-06,
-      "loss": 0.2238,
-      "step": 1175
-    },
-    {
-      "epoch": 0.24,
-      "grad_norm": 7.598459243774414,
-      "learning_rate": 7.65096618357488e-06,
-      "loss": 0.217,
-      "step": 1200
-    },
-    {
-      "epoch": 0.24,
-      "grad_norm": 8.268415451049805,
-      "learning_rate": 7.600644122383254e-06,
-      "loss": 0.2337,
-      "step": 1225
-    },
-    {
-      "epoch": 0.25,
-      "grad_norm": 7.092071533203125,
-      "learning_rate": 7.5503220611916275e-06,
-      "loss": 0.2106,
-      "step": 1250
-    },
-    {
-      "epoch": 0.26,
-      "grad_norm": 5.1447954177856445,
-      "learning_rate": 7.500000000000001e-06,
-      "loss": 0.2088,
-      "step": 1275
-    },
-    {
-      "epoch": 0.26,
-      "grad_norm": 6.847914695739746,
-      "learning_rate": 7.449677938808374e-06,
-      "loss": 0.2151,
-      "step": 1300
-    },
-    {
-      "epoch": 0.27,
-      "grad_norm": 5.531068801879883,
-      "learning_rate": 7.399355877616747e-06,
-      "loss": 0.2031,
-      "step": 1325
-    },
-    {
-      "epoch": 0.27,
-      "grad_norm": 5.454577445983887,
-      "learning_rate": 7.349033816425122e-06,
-      "loss": 0.1933,
-      "step": 1350
-    },
-    {
-      "epoch": 0.28,
-      "grad_norm": 5.625954627990723,
-      "learning_rate": 7.298711755233495e-06,
-      "loss": 0.2437,
-      "step": 1375
-    },
-    {
-      "epoch": 0.28,
-      "grad_norm": 5.812635898590088,
-      "learning_rate": 7.248389694041869e-06,
-      "loss": 0.2787,
-      "step": 1400
-    },
-    {
-      "epoch": 0.28,
-      "grad_norm": 6.263451099395752,
-      "learning_rate": 7.1980676328502416e-06,
-      "loss": 0.3031,
-      "step": 1425
-    },
-    {
-      "epoch": 0.29,
-      "grad_norm": 5.698704242706299,
-      "learning_rate": 7.147745571658615e-06,
-      "loss": 0.2982,
-      "step": 1450
     },
     {
-      "epoch": 0.29,
-      "grad_norm": 7.498880386352539,
-      "learning_rate": 7.0974235104669895e-06,
-      "loss": 0.2548,
-      "step": 1475
     },
     {
-      "epoch": 0.3,
-      "grad_norm": 5.943820476531982,
-      "learning_rate": 7.047101449275363e-06,
-      "loss": 0.2153,
-      "step": 1500
     },
     {
       "epoch": 0.3,
-      "grad_norm": 7.872410297393799,
-      "learning_rate": 6.996779388083737e-06,
-      "loss": 0.2617,
-      "step": 1525
-    },
-    {
-      "epoch": 0.31,
-      "grad_norm": 8.562033653259277,
-      "learning_rate": 6.94645732689211e-06,
-      "loss": 0.2733,
-      "step": 1550
-    },
-    {
-      "epoch": 0.32,
-      "grad_norm": 6.5205888748168945,
-      "learning_rate": 6.896135265700483e-06,
-      "loss": 0.2294,
-      "step": 1575
-    },
-    {
-      "epoch": 0.32,
-      "grad_norm": 5.271300792694092,
-      "learning_rate": 6.845813204508857e-06,
-      "loss": 0.2271,
-      "step": 1600
-    },
-    {
-      "epoch": 0.33,
-      "grad_norm": 9.590472221374512,
-      "learning_rate": 6.795491143317231e-06,
-      "loss": 0.2366,
-      "step": 1625
-    },
-    {
-      "epoch": 0.33,
-      "grad_norm": 5.548666000366211,
-      "learning_rate": 6.7451690821256045e-06,
-      "loss": 0.188,
-      "step": 1650
-    },
-    {
-      "epoch": 0.34,
-      "grad_norm": 7.139528751373291,
-      "learning_rate": 6.694847020933978e-06,
-      "loss": 0.1934,
-      "step": 1675
-    },
-    {
-      "epoch": 0.34,
-      "grad_norm": 7.057426929473877,
-      "learning_rate": 6.644524959742351e-06,
-      "loss": 0.2008,
-      "step": 1700
-    },
-    {
-      "epoch": 0.34,
-      "grad_norm": 5.961209774017334,
-      "learning_rate": 6.594202898550725e-06,
-      "loss": 0.2003,
-      "step": 1725
-    },
-    {
-      "epoch": 0.35,
-      "grad_norm": 6.465278625488281,
-      "learning_rate": 6.543880837359099e-06,
-      "loss": 0.1979,
-      "step": 1750
-    },
-    {
-      "epoch": 0.35,
-      "grad_norm": 4.620067119598389,
-      "learning_rate": 6.493558776167472e-06,
-      "loss": 0.2111,
-      "step": 1775
-    },
-    {
-      "epoch": 0.36,
-      "grad_norm": 6.613126754760742,
-      "learning_rate": 6.443236714975846e-06,
-      "loss": 0.2045,
-      "step": 1800
-    },
-    {
-      "epoch": 0.36,
-      "grad_norm": 4.759819507598877,
-      "learning_rate": 6.3929146537842194e-06,
-      "loss": 0.1908,
-      "step": 1825
-    },
-    {
-      "epoch": 0.37,
-      "grad_norm": 7.598659515380859,
-      "learning_rate": 6.342592592592594e-06,
-      "loss": 0.2148,
-      "step": 1850
-    },
-    {
-      "epoch": 0.38,
-      "grad_norm": 7.476439952850342,
-      "learning_rate": 6.2922705314009666e-06,
-      "loss": 0.2213,
-      "step": 1875
-    },
-    {
-      "epoch": 0.38,
-      "grad_norm": 5.949714183807373,
-      "learning_rate": 6.24194847020934e-06,
-      "loss": 0.2106,
-      "step": 1900
-    },
-    {
-      "epoch": 0.39,
-      "grad_norm": 4.579805374145508,
-      "learning_rate": 6.191626409017714e-06,
-      "loss": 0.2128,
-      "step": 1925
     },
     {
-      "epoch": 0.39,
-      "grad_norm": 6.055177211761475,
-      "learning_rate": 6.141304347826087e-06,
-      "loss": 0.203,
-      "step": 1950
-    },
-    {
-      "epoch": 0.4,
-      "grad_norm": 4.678502559661865,
-      "learning_rate": 6.090982286634462e-06,
-      "loss": 0.2089,
-      "step": 1975
     },
     {
-      "epoch": 0.4,
-      "grad_norm": 4.454530715942383,
-      "learning_rate": 6.040660225442834e-06,
-      "loss": 0.1811,
-      "step": 2000
     },
     {
       "epoch": 0.4,
-      "eval_loss": 0.28770312666893005,
-      "eval_runtime": 287.8226,
-      "eval_samples_per_second": 3.474,
-      "eval_steps_per_second": 0.434,
-      "eval_wer": 48.40896036760483,
-      "step": 2000
-    },
-    {
-      "epoch": 0.41,
-      "grad_norm": 8.851235389709473,
-      "learning_rate": 5.990338164251208e-06,
-      "loss": 0.1833,
-      "step": 2025
-    },
-    {
-      "epoch": 0.41,
-      "grad_norm": 7.610278606414795,
-      "learning_rate": 5.9400161030595815e-06,
-      "loss": 0.2345,
-      "step": 2050
     },
     {
-      "epoch": 0.41,
-      "grad_norm": 6.303083419799805,
-      "learning_rate": 5.889694041867955e-06,
-      "loss": 0.2612,
-      "step": 2075
     },
     {
-      "epoch": 0.42,
-      "grad_norm": 6.611576557159424,
-      "learning_rate": 5.8393719806763295e-06,
-      "loss": 0.2757,
-      "step": 2100
     },
     {
-      "epoch": 0.42,
-      "grad_norm": 7.016587257385254,
-      "learning_rate": 5.789049919484703e-06,
-      "loss": 0.2339,
-      "step": 2125
-    },
-    {
-      "epoch": 0.43,
-      "grad_norm": 5.895429611206055,
-      "learning_rate": 5.738727858293076e-06,
-      "loss": 0.2227,
-      "step": 2150
-    },
-    {
-      "epoch": 0.43,
-      "grad_norm": 6.225393295288086,
-      "learning_rate": 5.688405797101449e-06,
-      "loss": 0.2254,
-      "step": 2175
-    },
-    {
-      "epoch": 0.44,
-      "grad_norm": 4.9755401611328125,
-      "learning_rate": 5.638083735909823e-06,
-      "loss": 0.2268,
-      "step": 2200
-    },
-    {
-      "epoch": 0.45,
-      "grad_norm": 4.857475757598877,
-      "learning_rate": 5.587761674718197e-06,
-      "loss": 0.2554,
-      "step": 2225
-    },
-    {
-      "epoch": 0.45,
-      "grad_norm": 6.710099697113037,
-      "learning_rate": 5.537439613526571e-06,
-      "loss": 0.2674,
-      "step": 2250
-    },
-    {
-      "epoch": 0.46,
-      "grad_norm": 6.126322269439697,
-      "learning_rate": 5.4871175523349444e-06,
-      "loss": 0.2679,
-      "step": 2275
-    },
-    {
-      "epoch": 0.46,
-      "grad_norm": 8.071053504943848,
-      "learning_rate": 5.436795491143317e-06,
-      "loss": 0.2781,
-      "step": 2300
-    },
-    {
-      "epoch": 0.47,
-      "grad_norm": 6.979907035827637,
-      "learning_rate": 5.386473429951691e-06,
-      "loss": 0.2821,
-      "step": 2325
-    },
-    {
-      "epoch": 0.47,
-      "grad_norm": 7.387816429138184,
-      "learning_rate": 5.336151368760065e-06,
-      "loss": 0.3021,
-      "step": 2350
-    },
-    {
-      "epoch": 0.47,
-      "grad_norm": 4.772555351257324,
-      "learning_rate": 5.285829307568439e-06,
-      "loss": 0.2713,
-      "step": 2375
-    },
-    {
-      "epoch": 0.48,
-      "grad_norm": 6.187594890594482,
-      "learning_rate": 5.235507246376812e-06,
-      "loss": 0.2674,
-      "step": 2400
     },
     {
-      "epoch": 0.48,
-      "grad_norm": 3.790215492248535,
-      "learning_rate": 5.185185185185185e-06,
-      "loss": 0.212,
-      "step": 2425
     },
     {
-      "epoch": 0.49,
-      "grad_norm": 5.831188201904297,
-      "learning_rate": 5.1348631239935585e-06,
-      "loss": 0.1897,
-      "step": 2450
     },
     {
-      "epoch": 0.49,
-      "grad_norm": 5.055106163024902,
-      "learning_rate": 5.084541062801933e-06,
-      "loss": 0.2233,
-      "step": 2475
     },
     {
-      "epoch": 0.5,
-      "grad_norm": 5.827037811279297,
-      "learning_rate": 5.0342190016103065e-06,
-      "loss": 0.2439,
-      "step": 2500
     },
     {
-      "epoch": 0.51,
-      "grad_norm": 5.658998966217041,
-      "learning_rate": 4.985909822866345e-06,
-      "loss": 0.2912,
-      "step": 2525
     },
     {
-      "epoch": 0.51,
-      "grad_norm": 7.745667457580566,
-      "learning_rate": 4.935587761674719e-06,
-      "loss": 0.3364,
-      "step": 2550
     },
     {
-      "epoch": 0.52,
-      "grad_norm": 7.70411491394043,
-      "learning_rate": 4.885265700483092e-06,
-      "loss": 0.299,
-      "step": 2575
     },
     {
-      "epoch": 0.52,
-      "grad_norm": 4.991663932800293,
-      "learning_rate": 4.834943639291465e-06,
-      "loss": 0.225,
-      "step": 2600
     },
     {
-      "epoch": 0.53,
-      "grad_norm": 4.773478031158447,
-      "learning_rate": 4.78462157809984e-06,
-      "loss": 0.2118,
-      "step": 2625
     },
     {
-      "epoch": 0.53,
-      "grad_norm": 5.240381717681885,
-      "learning_rate": 4.7342995169082125e-06,
-      "loss": 0.2136,
-      "step": 2650
     },
     {
-      "epoch": 0.54,
-      "grad_norm": 5.283033847808838,
-      "learning_rate": 4.683977455716587e-06,
-      "loss": 0.2146,
-      "step": 2675
     },
     {
-      "epoch": 0.54,
-      "grad_norm": 5.568673610687256,
-      "learning_rate": 4.6336553945249605e-06,
-      "loss": 0.2033,
-      "step": 2700
     },
     {
-      "epoch": 0.55,
-      "grad_norm": 6.020970821380615,
-      "learning_rate": 4.583333333333333e-06,
-      "loss": 0.1728,
-      "step": 2725
     },
     {
-      "epoch": 0.55,
-      "grad_norm": 6.083353042602539,
-      "learning_rate": 4.533011272141708e-06,
-      "loss": 0.1626,
-      "step": 2750
     },
     {
-      "epoch": 0.56,
-      "grad_norm": 5.078822135925293,
-      "learning_rate": 4.482689210950081e-06,
-      "loss": 0.178,
-      "step": 2775
     },
     {
-      "epoch": 0.56,
-      "grad_norm": 4.692159175872803,
-      "learning_rate": 4.432367149758455e-06,
-      "loss": 0.1805,
-      "step": 2800
     },
     {
-      "epoch": 0.56,
-      "grad_norm": 6.026794910430908,
-      "learning_rate": 4.382045088566828e-06,
-      "loss": 0.2099,
-      "step": 2825
     },
     {
-      "epoch": 0.57,
-      "grad_norm": 5.062854290008545,
-      "learning_rate": 4.331723027375201e-06,
-      "loss": 0.2449,
-      "step": 2850
     },
     {
-      "epoch": 0.57,
-      "grad_norm": 5.74497652053833,
-      "learning_rate": 4.2814009661835754e-06,
-      "loss": 0.2114,
-      "step": 2875
     },
     {
-      "epoch": 0.58,
-      "grad_norm": 8.087077140808105,
-      "learning_rate": 4.231078904991949e-06,
-      "loss": 0.2078,
-      "step": 2900
     },
     {
-      "epoch": 0.58,
-      "grad_norm": 4.35167121887207,
-      "learning_rate": 4.1807568438003226e-06,
-      "loss": 0.2068,
-      "step": 2925
     },
     {
-      "epoch": 0.59,
-      "grad_norm": 4.306957721710205,
-      "learning_rate": 4.130434782608696e-06,
-      "loss": 0.1802,
-      "step": 2950
     },
     {
-      "epoch": 0.59,
-      "grad_norm": 3.7093899250030518,
-      "learning_rate": 4.08011272141707e-06,
-      "loss": 0.1632,
-      "step": 2975
     },
     {
-      "epoch": 0.6,
-      "grad_norm": 3.872870922088623,
-      "learning_rate": 4.029790660225443e-06,
-      "loss": 0.1556,
-      "step": 3000
     },
     {
-      "epoch": 0.6,
-      "eval_loss": 0.246797576546669,
-      "eval_runtime": 287.6255,
-      "eval_samples_per_second": 3.477,
-      "eval_steps_per_second": 0.435,
-      "eval_wer": 42.12521539345204,
-      "step": 3000
-    },
-    {
-      "epoch": 0.6,
-      "grad_norm": 5.306319713592529,
-      "learning_rate": 3.979468599033817e-06,
-      "loss": 0.1559,
-      "step": 3025
     },
     {
-      "epoch": 0.61,
-      "grad_norm": 4.230405807495117,
-      "learning_rate": 3.92914653784219e-06,
-      "loss": 0.1847,
-      "step": 3050
     },
     {
-      "epoch": 0.61,
-      "grad_norm": 5.295231819152832,
-      "learning_rate": 3.878824476650564e-06,
-      "loss": 0.2108,
-      "step": 3075
     },
     {
-      "epoch": 0.62,
-      "grad_norm": 8.07589054107666,
-      "learning_rate": 3.8285024154589375e-06,
-      "loss": 0.207,
-      "step": 3100
     },
     {
-      "epoch": 0.62,
-      "grad_norm": 6.108493804931641,
-      "learning_rate": 3.778180354267311e-06,
-      "loss": 0.192,
-      "step": 3125
     },
     {
-      "epoch": 0.63,
-      "grad_norm": 5.475741386413574,
-      "learning_rate": 3.7278582930756846e-06,
-      "loss": 0.1688,
-      "step": 3150
     },
     {
-      "epoch": 0.64,
-      "grad_norm": 5.193215370178223,
-      "learning_rate": 3.6775362318840586e-06,
-      "loss": 0.1655,
-      "step": 3175
     },
     {
-      "epoch": 0.64,
-      "grad_norm": 4.918320655822754,
-      "learning_rate": 3.6272141706924318e-06,
-      "loss": 0.1814,
-      "step": 3200
     },
     {
-      "epoch": 0.65,
-      "grad_norm": 3.8058395385742188,
-      "learning_rate": 3.5768921095008053e-06,
-      "loss": 0.1758,
-      "step": 3225
     },
     {
-      "epoch": 0.65,
-      "grad_norm": 4.73598051071167,
-      "learning_rate": 3.5265700483091793e-06,
-      "loss": 0.1731,
-      "step": 3250
     },
     {
-      "epoch": 0.66,
-      "grad_norm": 5.590235233306885,
-      "learning_rate": 3.4762479871175525e-06,
-      "loss": 0.1465,
-      "step": 3275
     },
     {
-      "epoch": 0.66,
-      "grad_norm": 3.9685428142547607,
-      "learning_rate": 3.4259259259259265e-06,
-      "loss": 0.1571,
-      "step": 3300
     },
     {
-      "epoch": 0.67,
-      "grad_norm": 5.418837547302246,
-      "learning_rate": 3.3756038647342996e-06,
-      "loss": 0.1985,
-      "step": 3325
     },
     {
-      "epoch": 0.67,
-      "grad_norm": 4.822926044464111,
-      "learning_rate": 3.325281803542673e-06,
-      "loss": 0.193,
-      "step": 3350
     },
     {
-      "epoch": 0.68,
-      "grad_norm": 4.274627685546875,
-      "learning_rate": 3.274959742351047e-06,
-      "loss": 0.1745,
-      "step": 3375
     },
     {
-      "epoch": 0.68,
-      "grad_norm": 4.962224006652832,
-      "learning_rate": 3.2246376811594203e-06,
-      "loss": 0.1825,
-      "step": 3400
     },
     {
-      "epoch": 0.69,
-      "grad_norm": 7.427305698394775,
-      "learning_rate": 3.1743156199677943e-06,
-      "loss": 0.1795,
-      "step": 3425
     },
     {
-      "epoch": 0.69,
-      "grad_norm": 4.612069129943848,
-      "learning_rate": 3.123993558776168e-06,
-      "loss": 0.1979,
-      "step": 3450
     },
     {
-      "epoch": 0.69,
-      "grad_norm": 4.866634368896484,
-      "learning_rate": 3.073671497584541e-06,
-      "loss": 0.1693,
-      "step": 3475
     },
     {
-      "epoch": 0.7,
-      "grad_norm": 6.59066915512085,
-      "learning_rate": 3.023349436392915e-06,
-      "loss": 0.1878,
-      "step": 3500
     },
     {
-      "epoch": 0.7,
-      "grad_norm": 7.696073055267334,
-      "learning_rate": 2.9730273752012885e-06,
-      "loss": 0.1841,
-      "step": 3525
     },
     {
-      "epoch": 0.71,
-      "grad_norm": 6.277599811553955,
-      "learning_rate": 2.922705314009662e-06,
-      "loss": 0.2039,
-      "step": 3550
     },
     {
-      "epoch": 0.71,
-      "grad_norm": 5.677185535430908,
-      "learning_rate": 2.8723832528180357e-06,
-      "loss": 0.2101,
-      "step": 3575
     },
     {
-      "epoch": 0.72,
-      "grad_norm": 4.459707260131836,
-      "learning_rate": 2.822061191626409e-06,
-      "loss": 0.1946,
-      "step": 3600
     },
     {
-      "epoch": 0.72,
-      "grad_norm": 5.484477996826172,
-      "learning_rate": 2.771739130434783e-06,
-      "loss": 0.1878,
-      "step": 3625
     },
     {
-      "epoch": 0.73,
-      "grad_norm": 4.7586469650268555,
-      "learning_rate": 2.7214170692431564e-06,
-      "loss": 0.1906,
-      "step": 3650
     },
     {
-      "epoch": 0.73,
-      "grad_norm": 8.146832466125488,
-      "learning_rate": 2.6710950080515303e-06,
-      "loss": 0.2291,
-      "step": 3675
     },
     {
-      "epoch": 0.74,
-      "grad_norm": 6.010147571563721,
-      "learning_rate": 2.6207729468599035e-06,
-      "loss": 0.2465,
-      "step": 3700
     },
     {
-      "epoch": 0.74,
-      "grad_norm": 4.747072696685791,
-      "learning_rate": 2.570450885668277e-06,
-      "loss": 0.2215,
-      "step": 3725
     },
     {
-      "epoch": 0.75,
-      "grad_norm": 4.160192012786865,
-      "learning_rate": 2.520128824476651e-06,
-      "loss": 0.2075,
-      "step": 3750
     },
     {
-      "epoch": 0.76,
-      "grad_norm": 4.015562534332275,
-      "learning_rate": 2.469806763285024e-06,
-      "loss": 0.1772,
-      "step": 3775
     },
     {
-      "epoch": 0.76,
-      "grad_norm": 6.313992023468018,
-      "learning_rate": 2.4194847020933977e-06,
-      "loss": 0.1977,
-      "step": 3800
     },
     {
-      "epoch": 0.77,
-      "grad_norm": 4.440485954284668,
-      "learning_rate": 2.3691626409017713e-06,
-      "loss": 0.2007,
-      "step": 3825
     },
     {
-      "epoch": 0.77,
-      "grad_norm": 4.781130790710449,
-      "learning_rate": 2.3188405797101453e-06,
-      "loss": 0.1795,
-      "step": 3850
     },
     {
-      "epoch": 0.78,
-      "grad_norm": 6.669640064239502,
-      "learning_rate": 2.268518518518519e-06,
-      "loss": 0.1715,
-      "step": 3875
     },
     {
-      "epoch": 0.78,
-      "grad_norm": 5.417683124542236,
-      "learning_rate": 2.2181964573268924e-06,
-      "loss": 0.1669,
-      "step": 3900
     },
     {
-      "epoch": 0.79,
-      "grad_norm": 4.186657428741455,
-      "learning_rate": 2.167874396135266e-06,
-      "loss": 0.153,
-      "step": 3925
     },
     {
-      "epoch": 0.79,
-      "grad_norm": 4.2640509605407715,
-      "learning_rate": 2.1175523349436395e-06,
-      "loss": 0.1805,
-      "step": 3950
     },
     {
-      "epoch": 0.8,
-      "grad_norm": 5.582681655883789,
-      "learning_rate": 2.067230273752013e-06,
-      "loss": 0.2054,
-      "step": 3975
     },
     {
-      "epoch": 0.8,
-      "grad_norm": 6.194969654083252,
-      "learning_rate": 2.0169082125603867e-06,
-      "loss": 0.2261,
-      "step": 4000
     },
     {
-      "epoch": 0.8,
-      "eval_loss": 0.24181929230690002,
-      "eval_runtime": 290.4148,
-      "eval_samples_per_second": 3.443,
-      "eval_steps_per_second": 0.43,
-      "eval_wer": 42.05628948879954,
-      "step": 4000
     },
     {
-      "epoch": 0.81,
-      "grad_norm": 5.402450084686279,
-      "learning_rate": 1.9665861513687602e-06,
-      "loss": 0.2351,
-      "step": 4025
     },
     {
-      "epoch": 0.81,
-      "grad_norm": 5.974727630615234,
-      "learning_rate": 1.916264090177134e-06,
-      "loss": 0.2482,
-      "step": 4050
     },
     {
-      "epoch": 0.81,
-      "grad_norm": 5.478939056396484,
-      "learning_rate": 1.8659420289855074e-06,
-      "loss": 0.234,
-      "step": 4075
     },
     {
-      "epoch": 0.82,
-      "grad_norm": 5.91377067565918,
-      "learning_rate": 1.815619967793881e-06,
-      "loss": 0.2,
-      "step": 4100
     },
     {
-      "epoch": 0.82,
-      "grad_norm": 4.323048114776611,
-      "learning_rate": 1.7652979066022547e-06,
-      "loss": 0.1868,
-      "step": 4125
     },
     {
-      "epoch": 0.83,
-      "grad_norm": 3.8323700428009033,
-      "learning_rate": 1.7149758454106283e-06,
-      "loss": 0.1604,
-      "step": 4150
     },
     {
-      "epoch": 0.83,
-      "grad_norm": 3.8948872089385986,
-      "learning_rate": 1.6646537842190016e-06,
-      "loss": 0.1687,
-      "step": 4175
     },
     {
-      "epoch": 0.84,
-      "grad_norm": 4.291049957275391,
-      "learning_rate": 1.6143317230273752e-06,
-      "loss": 0.1701,
-      "step": 4200
     },
     {
-      "epoch": 0.84,
-      "grad_norm": 4.441890716552734,
-      "learning_rate": 1.564009661835749e-06,
-      "loss": 0.1563,
-      "step": 4225
     },
     {
-      "epoch": 0.85,
-      "grad_norm": 4.571049690246582,
-      "learning_rate": 1.5136876006441225e-06,
-      "loss": 0.1431,
-      "step": 4250
     },
     {
-      "epoch": 0.85,
-      "grad_norm": 5.673919200897217,
-      "learning_rate": 1.4633655394524963e-06,
-      "loss": 0.1641,
-      "step": 4275
     },
     {
-      "epoch": 0.86,
-      "grad_norm": 3.778271436691284,
-      "learning_rate": 1.4130434782608697e-06,
-      "loss": 0.166,
-      "step": 4300
     },
     {
-      "epoch": 0.86,
-      "grad_norm": 4.124724864959717,
-      "learning_rate": 1.3627214170692432e-06,
-      "loss": 0.1611,
-      "step": 4325
     },
     {
-      "epoch": 0.87,
-      "grad_norm": 4.925194263458252,
-      "learning_rate": 1.3123993558776168e-06,
-      "loss": 0.1644,
-      "step": 4350
     },
     {
-      "epoch": 0.88,
-      "grad_norm": 6.828126907348633,
-      "learning_rate": 1.2620772946859906e-06,
-      "loss": 0.1798,
-      "step": 4375
     },
     {
-      "epoch": 0.88,
-      "grad_norm": 7.811668872833252,
-      "learning_rate": 1.211755233494364e-06,
-      "loss": 0.191,
-      "step": 4400
     },
     {
-      "epoch": 0.89,
-      "grad_norm": 4.744828701019287,
-      "learning_rate": 1.1614331723027377e-06,
-      "loss": 0.2006,
-      "step": 4425
     },
     {
-      "epoch": 0.89,
-      "grad_norm": 6.9806718826293945,
-      "learning_rate": 1.111111111111111e-06,
-      "loss": 0.2161,
-      "step": 4450
     },
     {
-      "epoch": 0.9,
-      "grad_norm": 5.690942287445068,
-      "learning_rate": 1.0607890499194848e-06,
-      "loss": 0.2092,
-      "step": 4475
     },
     {
-      "epoch": 0.9,
-      "grad_norm": 7.265722274780273,
-      "learning_rate": 1.0104669887278584e-06,
-      "loss": 0.2055,
-      "step": 4500
     },
     {
-      "epoch": 0.91,
-      "grad_norm": 6.037989616394043,
-      "learning_rate": 9.60144927536232e-07,
-      "loss": 0.1691,
-      "step": 4525
     },
     {
-      "epoch": 0.91,
-      "grad_norm": 4.537023544311523,
-      "learning_rate": 9.098228663446056e-07,
-      "loss": 0.1756,
-      "step": 4550
     },
     {
-      "epoch": 0.92,
-      "grad_norm": 4.457717418670654,
-      "learning_rate": 8.595008051529791e-07,
-      "loss": 0.1685,
-      "step": 4575
     },
     {
-      "epoch": 0.92,
-      "grad_norm": 4.620595932006836,
-      "learning_rate": 8.091787439613527e-07,
-      "loss": 0.1692,
-      "step": 4600
     },
     {
-      "epoch": 0.93,
-      "grad_norm": 5.932682514190674,
-      "learning_rate": 7.588566827697263e-07,
-      "loss": 0.184,
-      "step": 4625
     },
     {
-      "epoch": 0.93,
-      "grad_norm": 5.343090534210205,
-      "learning_rate": 7.085346215780999e-07,
-      "loss": 0.1755,
-      "step": 4650
     },
     {
-      "epoch": 0.94,
-      "grad_norm": 4.914378643035889,
-      "learning_rate": 6.582125603864735e-07,
-      "loss": 0.1598,
-      "step": 4675
     },
     {
-      "epoch": 0.94,
-      "grad_norm": 4.512668132781982,
-      "learning_rate": 6.078904991948471e-07,
-      "loss": 0.2097,
-      "step": 4700
     },
     {
-      "epoch": 0.94,
-      "grad_norm": 4.129415512084961,
-      "learning_rate": 5.575684380032207e-07,
-      "loss": 0.216,
-      "step": 4725
     },
     {
-      "epoch": 0.95,
-      "grad_norm": 4.950856685638428,
-      "learning_rate": 5.072463768115942e-07,
-      "loss": 0.1984,
-      "step": 4750
     },
     {
-      "epoch": 0.95,
-      "grad_norm": 5.21985387802124,
-      "learning_rate": 4.5893719806763294e-07,
-      "loss": 0.2017,
-      "step": 4775
     },
     {
-      "epoch": 0.96,
-      "grad_norm": 4.996951580047607,
-      "learning_rate": 4.086151368760065e-07,
-      "loss": 0.1827,
-      "step": 4800
     },
     {
-      "epoch": 0.96,
-      "grad_norm": 5.20858907699585,
-      "learning_rate": 3.5829307568438007e-07,
-      "loss": 0.1948,
-      "step": 4825
     },
     {
-      "epoch": 0.97,
-      "grad_norm": 5.125787258148193,
-      "learning_rate": 3.079710144927537e-07,
-      "loss": 0.1691,
-      "step": 4850
     },
     {
-      "epoch": 0.97,
-      "grad_norm": 3.7991411685943604,
-      "learning_rate": 2.5764895330112725e-07,
-      "loss": 0.147,
-      "step": 4875
     },
     {
-      "epoch": 0.98,
-      "grad_norm": 3.768537759780884,
-      "learning_rate": 2.073268921095008e-07,
-      "loss": 0.1255,
-      "step": 4900
     },
     {
-      "epoch": 0.98,
-      "grad_norm": 5.530201435089111,
-      "learning_rate": 1.570048309178744e-07,
-      "loss": 0.1241,
-      "step": 4925
     },
     {
-      "epoch": 0.99,
-      "grad_norm": 5.020101547241211,
-      "learning_rate": 1.0668276972624801e-07,
-      "loss": 0.1349,
-      "step": 4950
     },
     {
-      "epoch": 0.99,
-      "grad_norm": 6.2157673835754395,
-      "learning_rate": 5.6360708534621584e-08,
-      "loss": 0.1273,
-      "step": 4975
     },
     {
-      "epoch": 1.0,
-      "grad_norm": 4.496521949768066,
-      "learning_rate": 6.0386473429951695e-09,
-      "loss": 0.1257,
-      "step": 5000
     },
     {
-      "epoch": 1.0,
-      "eval_loss": 0.23232251405715942,
-      "eval_runtime": 287.2652,
-      "eval_samples_per_second": 3.481,
       "eval_steps_per_second": 0.435,
-      "eval_wer": 39.414129810453765,
-      "step": 5000
     }
   ],
-  "logging_steps": 25,
-  "max_steps": 5000,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 9223372036854775807,
-  "save_steps": 1000,
-  "total_flos": 5.1887996928e+18,
-  "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 19.701321079839175,
+  "best_model_checkpoint": "./whisper-ft-2/checkpoint-600",
+  "epoch": 4.0,
+  "eval_steps": 600,
+  "global_step": 600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.03333333333333333,
+      "grad_norm": 1.419047236442566,
+      "learning_rate": 5e-06,
+      "loss": 0.0144,
+      "step": 5
     },
     {
+      "epoch": 0.06666666666666667,
+      "grad_norm": 2.3382816314697266,
+      "learning_rate": 1e-05,
+      "loss": 0.0143,
+      "step": 10
     },
     {
       "epoch": 0.1,
+      "grad_norm": 0.8841551542282104,
+      "learning_rate": 9.943820224719102e-06,
+      "loss": 0.0126,
+      "step": 15
     },
     {
+      "epoch": 0.13333333333333333,
+      "grad_norm": 2.564495325088501,
+      "learning_rate": 9.887640449438202e-06,
+      "loss": 0.0144,
+      "step": 20
     },
     {
+      "epoch": 0.16666666666666666,
+      "grad_norm": 1.858008623123169,
+      "learning_rate": 9.831460674157303e-06,
+      "loss": 0.0097,
+      "step": 25
     },
     {
       "epoch": 0.2,
+      "grad_norm": 1.3031666278839111,
+      "learning_rate": 9.775280898876405e-06,
+      "loss": 0.0129,
+      "step": 30
     },
     {
+      "epoch": 0.23333333333333334,
+      "grad_norm": 3.633366584777832,
+      "learning_rate": 9.719101123595506e-06,
+      "loss": 0.0083,
+      "step": 35
     },
     {
+      "epoch": 0.26666666666666666,
+      "grad_norm": 1.9989734888076782,
+      "learning_rate": 9.662921348314608e-06,
+      "loss": 0.0122,
+      "step": 40
     },
     {
       "epoch": 0.3,
+      "grad_norm": 0.9710230827331543,
+      "learning_rate": 9.60674157303371e-06,
+      "loss": 0.007,
+      "step": 45
     },
     {
+      "epoch": 0.3333333333333333,
+      "grad_norm": 1.8736152648925781,
+      "learning_rate": 9.55056179775281e-06,
+      "loss": 0.0122,
+      "step": 50
     },
     {
+      "epoch": 0.36666666666666664,
+      "grad_norm": 0.9838809370994568,
+      "learning_rate": 9.49438202247191e-06,
+      "loss": 0.005,
+      "step": 55
     },
     {
       "epoch": 0.4,
+      "grad_norm": 2.429737091064453,
+      "learning_rate": 9.438202247191012e-06,
+      "loss": 0.0115,
+      "step": 60
     },
     {
+      "epoch": 0.43333333333333335,
+      "grad_norm": 1.108884572982788,
+      "learning_rate": 9.382022471910113e-06,
+      "loss": 0.0058,
+      "step": 65
     },
     {
+      "epoch": 0.4666666666666667,
+      "grad_norm": 1.1713858842849731,
+      "learning_rate": 9.325842696629213e-06,
+      "loss": 0.0058,
+      "step": 70
     },
     {
+      "epoch": 0.5,
+      "grad_norm": 3.6589481830596924,
+      "learning_rate": 9.269662921348316e-06,
+      "loss": 0.0094,
+      "step": 75
     },
     {
+      "epoch": 0.5333333333333333,
+      "grad_norm": 1.9582005739212036,
+      "learning_rate": 9.213483146067417e-06,
+      "loss": 0.0076,
+      "step": 80
     },
     {
+      "epoch": 0.5666666666666667,
+      "grad_norm": 1.3726775646209717,
+      "learning_rate": 9.157303370786517e-06,
+      "loss": 0.0054,
+      "step": 85
     },
     {
+      "epoch": 0.6,
+      "grad_norm": 1.7600703239440918,
+      "learning_rate": 9.101123595505619e-06,
+      "loss": 0.0099,
+      "step": 90
     },
     {
+      "epoch": 0.6333333333333333,
+      "grad_norm": 0.9456138610839844,
+      "learning_rate": 9.04494382022472e-06,
+      "loss": 0.0047,
+      "step": 95
     },
     {
+      "epoch": 0.6666666666666666,
+      "grad_norm": 0.9388735294342041,
+      "learning_rate": 8.988764044943822e-06,
+      "loss": 0.0089,
+      "step": 100
     },
     {
+      "epoch": 0.7,
+      "grad_norm": 2.800154685974121,
+      "learning_rate": 8.932584269662921e-06,
+      "loss": 0.0074,
+      "step": 105
     },
     {
+      "epoch": 0.7333333333333333,
+      "grad_norm": 4.255383491516113,
+      "learning_rate": 8.876404494382023e-06,
+      "loss": 0.0057,
+      "step": 110
     },
     {
+      "epoch": 0.7666666666666667,
+      "grad_norm": 1.3430100679397583,
+      "learning_rate": 8.820224719101124e-06,
+      "loss": 0.0042,
+      "step": 115
     },
     {
+      "epoch": 0.8,
+      "grad_norm": 4.184587478637695,
+      "learning_rate": 8.764044943820226e-06,
+      "loss": 0.0063,
+      "step": 120
     },
     {
+      "epoch": 0.8333333333333334,
+      "grad_norm": 1.3114601373672485,
+      "learning_rate": 8.707865168539327e-06,
+      "loss": 0.0085,
+      "step": 125
     },
     {
+      "epoch": 0.8666666666666667,
+      "grad_norm": 1.0539401769638062,
+      "learning_rate": 8.651685393258428e-06,
+      "loss": 0.0072,
+      "step": 130
     },
     {
+      "epoch": 0.9,
+      "grad_norm": 4.378466606140137,
+      "learning_rate": 8.595505617977528e-06,
+      "loss": 0.0049,
+      "step": 135
     },
     {
+      "epoch": 0.9333333333333333,
+      "grad_norm": 1.5044472217559814,
+      "learning_rate": 8.53932584269663e-06,
+      "loss": 0.0067,
+      "step": 140
     },
     {
+      "epoch": 0.9666666666666667,
+      "grad_norm": 2.2182376384735107,
+      "learning_rate": 8.483146067415731e-06,
+      "loss": 0.0091,
+      "step": 145
     },
     {
+      "epoch": 1.0,
+      "grad_norm": 0.24589230120182037,
+      "learning_rate": 8.426966292134832e-06,
+      "loss": 0.0062,
+      "step": 150
     },
     {
+      "epoch": 1.0333333333333334,
+      "grad_norm": 0.48602986335754395,
+      "learning_rate": 8.370786516853934e-06,
+      "loss": 0.0056,
+      "step": 155
     },
     {
+      "epoch": 1.0666666666666667,
+      "grad_norm": 0.2779291570186615,
+      "learning_rate": 8.314606741573035e-06,
+      "loss": 0.0031,
+      "step": 160
     },
     {
+      "epoch": 1.1,
+      "grad_norm": 2.2703235149383545,
+      "learning_rate": 8.258426966292135e-06,
+      "loss": 0.0069,
+      "step": 165
     },
     {
+      "epoch": 1.1333333333333333,
+      "grad_norm": 1.3132015466690063,
+      "learning_rate": 8.202247191011237e-06,
+      "loss": 0.006,
+      "step": 170
     },
     {
+      "epoch": 1.1666666666666667,
+      "grad_norm": 0.24997250735759735,
+      "learning_rate": 8.146067415730338e-06,
+      "loss": 0.0027,
+      "step": 175
     },
     {
+      "epoch": 1.2,
+      "grad_norm": 0.14482256770133972,
+      "learning_rate": 8.08988764044944e-06,
+      "loss": 0.0023,
+      "step": 180
     },
     {
+      "epoch": 1.2333333333333334,
+      "grad_norm": 0.7420951128005981,
+      "learning_rate": 8.033707865168539e-06,
+      "loss": 0.0064,
+      "step": 185
     },
     {
+      "epoch": 1.2666666666666666,
+      "grad_norm": 0.21342052519321442,
+      "learning_rate": 7.97752808988764e-06,
+      "loss": 0.0026,
+      "step": 190
     },
     {
+      "epoch": 1.3,
+      "grad_norm": 0.2786453664302826,
+      "learning_rate": 7.921348314606742e-06,
+      "loss": 0.0036,
+      "step": 195
     },
     {
+      "epoch": 1.3333333333333333,
+      "grad_norm": 0.36970847845077515,
+      "learning_rate": 7.865168539325843e-06,
+      "loss": 0.0046,
+      "step": 200
     },
     {
+      "epoch": 1.3666666666666667,
+      "grad_norm": 0.17185050249099731,
+      "learning_rate": 7.808988764044945e-06,
+      "loss": 0.0022,
+      "step": 205
     },
     {
+      "epoch": 1.4,
+      "grad_norm": 1.4263725280761719,
+      "learning_rate": 7.752808988764046e-06,
+      "loss": 0.0034,
+      "step": 210
     },
     {
+      "epoch": 1.4333333333333333,
+      "grad_norm": 0.5556924939155579,
+      "learning_rate": 7.696629213483146e-06,
+      "loss": 0.005,
+      "step": 215
     },
     {
+      "epoch": 1.4666666666666668,
+      "grad_norm": 1.5778443813323975,
+      "learning_rate": 7.640449438202247e-06,
+      "loss": 0.0048,
+      "step": 220
     },
     {
+      "epoch": 1.5,
+      "grad_norm": 0.39948800206184387,
+      "learning_rate": 7.584269662921349e-06,
+      "loss": 0.0056,
+      "step": 225
     },
     {
+      "epoch": 1.5333333333333332,
+      "grad_norm": 1.9470425844192505,
+      "learning_rate": 7.5280898876404495e-06,
+      "loss": 0.0052,
+      "step": 230
     },
     {
+      "epoch": 1.5666666666666667,
+      "grad_norm": 1.3367302417755127,
+      "learning_rate": 7.471910112359552e-06,
+      "loss": 0.0056,
+      "step": 235
     },
     {
+      "epoch": 1.6,
+      "grad_norm": 2.2041380405426025,
+      "learning_rate": 7.415730337078652e-06,
+      "loss": 0.0026,
+      "step": 240
     },
     {
+      "epoch": 1.6333333333333333,
+      "grad_norm": 1.1062074899673462,
+      "learning_rate": 7.359550561797754e-06,
+      "loss": 0.0058,
+      "step": 245
     },
     {
+      "epoch": 1.6666666666666665,
+      "grad_norm": 1.558339238166809,
+      "learning_rate": 7.303370786516854e-06,
+      "loss": 0.0036,
+      "step": 250
     },
     {
+      "epoch": 1.7,
+      "grad_norm": 1.4909693002700806,
+      "learning_rate": 7.247191011235956e-06,
+      "loss": 0.0081,
+      "step": 255
     },
     {
+      "epoch": 1.7333333333333334,
+      "grad_norm": 1.0885131359100342,
+      "learning_rate": 7.191011235955056e-06,
+      "loss": 0.0035,
+      "step": 260
     },
     {
+      "epoch": 1.7666666666666666,
+      "grad_norm": 0.34988316893577576,
+      "learning_rate": 7.134831460674158e-06,
+      "loss": 0.0066,
+      "step": 265
     },
     {
+      "epoch": 1.8,
+      "grad_norm": 0.39315518736839294,
+      "learning_rate": 7.078651685393258e-06,
+      "loss": 0.0036,
+      "step": 270
     },
     {
+      "epoch": 1.8333333333333335,
+      "grad_norm": 0.4730512797832489,
+      "learning_rate": 7.022471910112361e-06,
+      "loss": 0.0025,
+      "step": 275
     },
     {
+      "epoch": 1.8666666666666667,
+      "grad_norm": 1.0061614513397217,
+      "learning_rate": 6.966292134831461e-06,
+      "loss": 0.0058,
+      "step": 280
     },
     {
+      "epoch": 1.9,
+      "grad_norm": 0.6261555552482605,
+      "learning_rate": 6.910112359550563e-06,
+      "loss": 0.0035,
+      "step": 285
     },
     {
+      "epoch": 1.9333333333333333,
+      "grad_norm": 0.54555344581604,
+      "learning_rate": 6.853932584269663e-06,
+      "loss": 0.0064,
+      "step": 290
     },
     {
+      "epoch": 1.9666666666666668,
+      "grad_norm": 0.26168444752693176,
+      "learning_rate": 6.797752808988765e-06,
+      "loss": 0.0063,
+      "step": 295
     },
     {
+      "epoch": 2.0,
+      "grad_norm": 2.1794660091400146,
+      "learning_rate": 6.741573033707865e-06,
+      "loss": 0.0047,
+      "step": 300
     },
     {
+      "epoch": 2.033333333333333,
+      "grad_norm": 0.07394399493932724,
+      "learning_rate": 6.685393258426967e-06,
+      "loss": 0.0033,
+      "step": 305
     },
     {
+      "epoch": 2.066666666666667,
+      "grad_norm": 0.32680198550224304,
+      "learning_rate": 6.629213483146067e-06,
+      "loss": 0.0033,
+      "step": 310
     },
     {
+      "epoch": 2.1,
+      "grad_norm": 1.1588655710220337,
+      "learning_rate": 6.57303370786517e-06,
+      "loss": 0.0044,
+      "step": 315
     },
     {
+      "epoch": 2.1333333333333333,
+      "grad_norm": 0.12549975514411926,
+      "learning_rate": 6.51685393258427e-06,
+      "loss": 0.0025,
+      "step": 320
     },
     {
+      "epoch": 2.1666666666666665,
+      "grad_norm": 1.6806613206863403,
+      "learning_rate": 6.460674157303372e-06,
+      "loss": 0.0073,
+      "step": 325
     },
     {
+      "epoch": 2.2,
+      "grad_norm": 1.2238233089447021,
+      "learning_rate": 6.404494382022472e-06,
+      "loss": 0.0063,
+      "step": 330
     },
     {
+      "epoch": 2.2333333333333334,
+      "grad_norm": 0.13195298612117767,
+      "learning_rate": 6.348314606741574e-06,
+      "loss": 0.0029,
+      "step": 335
     },
     {
+      "epoch": 2.2666666666666666,
+      "grad_norm": 0.6085399985313416,
+      "learning_rate": 6.292134831460674e-06,
+      "loss": 0.0014,
+      "step": 340
     },
     {
+      "epoch": 2.3,
+      "grad_norm": 0.1822354644536972,
+      "learning_rate": 6.235955056179776e-06,
+      "loss": 0.0017,
+      "step": 345
     },
     {
+      "epoch": 2.3333333333333335,
+      "grad_norm": 1.526226282119751,
+      "learning_rate": 6.179775280898876e-06,
+      "loss": 0.0016,
+      "step": 350
     },
     {
+      "epoch": 2.3666666666666667,
+      "grad_norm": 0.3068194091320038,
+      "learning_rate": 6.1235955056179785e-06,
+      "loss": 0.0053,
+      "step": 355
     },
     {
+      "epoch": 2.4,
+      "grad_norm": 0.3344336748123169,
+      "learning_rate": 6.06741573033708e-06,
+      "loss": 0.0034,
+      "step": 360
     },
     {
+      "epoch": 2.4333333333333336,
+      "grad_norm": 0.38717201352119446,
+      "learning_rate": 6.0112359550561805e-06,
+      "loss": 0.0032,
+      "step": 365
     },
     {
+      "epoch": 2.466666666666667,
+      "grad_norm": 3.577847957611084,
+      "learning_rate": 5.955056179775281e-06,
+      "loss": 0.0046,
+      "step": 370
     },
     {
+      "epoch": 2.5,
+      "grad_norm": 0.1945178508758545,
+      "learning_rate": 5.8988764044943826e-06,
+      "loss": 0.0016,
+      "step": 375
     },
     {
+      "epoch": 2.533333333333333,
+      "grad_norm": 0.12505494058132172,
+      "learning_rate": 5.842696629213483e-06,
+      "loss": 0.0021,
+      "step": 380
     },
     {
+      "epoch": 2.5666666666666664,
+      "grad_norm": 0.23506364226341248,
+      "learning_rate": 5.786516853932585e-06,
+      "loss": 0.0025,
+      "step": 385
     },
     {
+      "epoch": 2.6,
+      "grad_norm": 0.48507827520370483,
+      "learning_rate": 5.730337078651685e-06,
+      "loss": 0.0016,
+      "step": 390
     },
     {
+      "epoch": 2.6333333333333333,
+      "grad_norm": 0.14861613512039185,
+      "learning_rate": 5.6741573033707874e-06,
+      "loss": 0.0019,
+      "step": 395
     },
     {
+      "epoch": 2.6666666666666665,
+      "grad_norm": 0.07135830074548721,
+      "learning_rate": 5.617977528089889e-06,
+      "loss": 0.0018,
+      "step": 400
     },
     {
+      "epoch": 2.7,
+      "grad_norm": 0.35108867287635803,
+      "learning_rate": 5.5617977528089895e-06,
+      "loss": 0.0011,
+      "step": 405
     },
     {
+      "epoch": 2.7333333333333334,
+      "grad_norm": 1.0602957010269165,
+      "learning_rate": 5.50561797752809e-06,
+      "loss": 0.0015,
+      "step": 410
     },
     {
+      "epoch": 2.7666666666666666,
+      "grad_norm": 0.14372961223125458,
+      "learning_rate": 5.4494382022471915e-06,
+      "loss": 0.0023,
+      "step": 415
     },
     {
+      "epoch": 2.8,
+      "grad_norm": 1.5078669786453247,
+      "learning_rate": 5.393258426966292e-06,
+      "loss": 0.0017,
+      "step": 420
     },
     {
+      "epoch": 2.8333333333333335,
+      "grad_norm": 0.08180749416351318,
+      "learning_rate": 5.3370786516853935e-06,
+      "loss": 0.0022,
+      "step": 425
     },
     {
+      "epoch": 2.8666666666666667,
+      "grad_norm": 0.27330687642097473,
+      "learning_rate": 5.280898876404494e-06,
+      "loss": 0.0016,
+      "step": 430
     },
     {
+      "epoch": 2.9,
+      "grad_norm": 0.06831669807434082,
+      "learning_rate": 5.224719101123596e-06,
+      "loss": 0.0008,
+      "step": 435
     },
     {
+      "epoch": 2.9333333333333336,
+      "grad_norm": 0.3877633213996887,
+      "learning_rate": 5.168539325842698e-06,
+      "loss": 0.0022,
+      "step": 440
     },
     {
+      "epoch": 2.966666666666667,
+      "grad_norm": 2.5412757396698,
+      "learning_rate": 5.112359550561798e-06,
+      "loss": 0.003,
+      "step": 445
     },
     {
+      "epoch": 3.0,
+      "grad_norm": 0.09176287055015564,
+      "learning_rate": 5.0561797752809e-06,
+      "loss": 0.0011,
+      "step": 450
     },
     {
+      "epoch": 3.033333333333333,
+      "grad_norm": 0.03548486530780792,
+      "learning_rate": 5e-06,
+      "loss": 0.0016,
+      "step": 455
     },
     {
+      "epoch": 3.066666666666667,
+      "grad_norm": 0.11006509512662888,
+      "learning_rate": 4.943820224719101e-06,
+      "loss": 0.006,
+      "step": 460
     },
     {
+      "epoch": 3.1,
+      "grad_norm": 0.19691240787506104,
+      "learning_rate": 4.8876404494382024e-06,
+      "loss": 0.002,
+      "step": 465
     },
     {
+      "epoch": 3.1333333333333333,
+      "grad_norm": 0.30039843916893005,
+      "learning_rate": 4.831460674157304e-06,
+      "loss": 0.001,
+      "step": 470
     },
     {
+      "epoch": 3.1666666666666665,
+      "grad_norm": 0.0753551796078682,
+      "learning_rate": 4.775280898876405e-06,
+      "loss": 0.0008,
+      "step": 475
     },
     {
+      "epoch": 3.2,
+      "grad_norm": 0.12786687910556793,
+      "learning_rate": 4.719101123595506e-06,
+      "loss": 0.0026,
+      "step": 480
     },
     {
+      "epoch": 3.2333333333333334,
+      "grad_norm": 0.07095532864332199,
+      "learning_rate": 4.6629213483146065e-06,
+      "loss": 0.0004,
+      "step": 485
     },
     {
+      "epoch": 3.2666666666666666,
+      "grad_norm": 0.1347319781780243,
+      "learning_rate": 4.606741573033709e-06,
+      "loss": 0.0013,
+      "step": 490
     },
     {
+      "epoch": 3.3,
+      "grad_norm": 0.11095249652862549,
+      "learning_rate": 4.550561797752809e-06,
+      "loss": 0.0012,
+      "step": 495
     },
     {
+      "epoch": 3.3333333333333335,
+      "grad_norm": 0.10048757493495941,
+      "learning_rate": 4.494382022471911e-06,
+      "loss": 0.0014,
+      "step": 500
     },
     {
+      "epoch": 3.3666666666666667,
+      "grad_norm": 0.04148377105593681,
+      "learning_rate": 4.438202247191011e-06,
+      "loss": 0.0006,
+      "step": 505
     },
     {
+      "epoch": 3.4,
+      "grad_norm": 0.17400866746902466,
+      "learning_rate": 4.382022471910113e-06,
+      "loss": 0.0008,
+      "step": 510
     },
     {
+      "epoch": 3.4333333333333336,
+      "grad_norm": 0.2563508450984955,
+      "learning_rate": 4.325842696629214e-06,
+      "loss": 0.0027,
+      "step": 515
     },
     {
+      "epoch": 3.466666666666667,
+      "grad_norm": 0.06805615872144699,
+      "learning_rate": 4.269662921348315e-06,
+      "loss": 0.0011,
+      "step": 520
     },
     {
+      "epoch": 3.5,
+      "grad_norm": 0.20213201642036438,
+      "learning_rate": 4.213483146067416e-06,
+      "loss": 0.0031,
+      "step": 525
     },
     {
+      "epoch": 3.533333333333333,
+      "grad_norm": 0.03789810836315155,
+      "learning_rate": 4.157303370786518e-06,
+      "loss": 0.0038,
+      "step": 530
     },
     {
+      "epoch": 3.5666666666666664,
+      "grad_norm": 0.13470155000686646,
+      "learning_rate": 4.101123595505618e-06,
+      "loss": 0.0017,
+      "step": 535
     },
     {
+      "epoch": 3.6,
+      "grad_norm": 0.046678509563207626,
+      "learning_rate": 4.04494382022472e-06,
+      "loss": 0.0005,
+      "step": 540
     },
     {
+      "epoch": 3.6333333333333333,
+      "grad_norm": 0.05889086052775383,
+      "learning_rate": 3.98876404494382e-06,
+      "loss": 0.0036,
+      "step": 545
     },
     {
+      "epoch": 3.6666666666666665,
+      "grad_norm": 0.03523600473999977,
+      "learning_rate": 3.932584269662922e-06,
+      "loss": 0.0011,
+      "step": 550
     },
     {
+      "epoch": 3.7,
+      "grad_norm": 0.03970940411090851,
+      "learning_rate": 3.876404494382023e-06,
+      "loss": 0.0011,
+      "step": 555
     },
     {
+      "epoch": 3.7333333333333334,
+      "grad_norm": 0.2314375936985016,
+      "learning_rate": 3.820224719101124e-06,
+      "loss": 0.0026,
+      "step": 560
     },
     {
+      "epoch": 3.7666666666666666,
+      "grad_norm": 0.07075604051351547,
+      "learning_rate": 3.7640449438202247e-06,
+      "loss": 0.0031,
+      "step": 565
     },
     {
+      "epoch": 3.8,
+      "grad_norm": 0.09040886908769608,
+      "learning_rate": 3.707865168539326e-06,
+      "loss": 0.0004,
+      "step": 570
     },
     {
+      "epoch": 3.8333333333333335,
+      "grad_norm": 0.11175351589918137,
+      "learning_rate": 3.651685393258427e-06,
+      "loss": 0.0012,
+      "step": 575
     },
     {
+      "epoch": 3.8666666666666667,
+      "grad_norm": 0.5862079858779907,
+      "learning_rate": 3.595505617977528e-06,
+      "loss": 0.001,
+      "step": 580
     },
     {
+      "epoch": 3.9,
+      "grad_norm": 0.4760936200618744,
+      "learning_rate": 3.539325842696629e-06,
+      "loss": 0.0008,
+      "step": 585
     },
     {
+      "epoch": 3.9333333333333336,
+      "grad_norm": 0.18229596316814423,
+      "learning_rate": 3.4831460674157306e-06,
+      "loss": 0.001,
+      "step": 590
     },
     {
+      "epoch": 3.966666666666667,
+      "grad_norm": 0.033338598906993866,
+      "learning_rate": 3.4269662921348316e-06,
+      "loss": 0.002,
+      "step": 595
     },
     {
+      "epoch": 4.0,
+      "grad_norm": 0.19226883351802826,
+      "learning_rate": 3.3707865168539327e-06,
+      "loss": 0.0006,
+      "step": 600
     },
     {
+      "epoch": 4.0,
+      "eval_loss": 0.3506176173686981,
+      "eval_runtime": 287.3723,
+      "eval_samples_per_second": 3.48,
       "eval_steps_per_second": 0.435,
+      "eval_wer": 19.701321079839175,
+      "step": 600
     }
   ],
+  "logging_steps": 5,
+  "max_steps": 900,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 6,
+  "save_steps": 600,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 7.7831995392e+17,
+  "train_batch_size": 10,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fba55210c552526ee1c649baa4ad5bafd32da08d4c4d293ad0eca838f857f04d
-size 5048

 version https://git-lfs.github.com/spec/v1
+oid sha256:db804bff1d34b49eeb93672a0155a7e464e05406a85421985e0ebf298394549d
+size 5240