Training in progress, step 20000, checkpoint

Browse files

Files changed (7) hide show

last-checkpoint/global_step20000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +2 -2
last-checkpoint/global_step20000/mp_rank_00_model_states.pt +2 -2
last-checkpoint/latest +1 -1
last-checkpoint/model.safetensors +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +876 -6

last-checkpoint/global_step20000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ade9c3e43b2a1550492ecf4b91e9228af429dcf0d7b1c09aea81ebc7a5842d20
-size 761059696

 version https://git-lfs.github.com/spec/v1
+oid sha256:9867ea7f15881e7bed68bb3b7781fc3c4f5646e0a9aec63231d97c009a1c403f
+size 5117197020

last-checkpoint/global_step20000/mp_rank_00_model_states.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:58f46b37e83d56bff8e8b49fc01d48e56f7c2f6034abd01b65de03f862980853
-size 129965712

 version https://git-lfs.github.com/spec/v1
+oid sha256:49d8bf456bdf7cfa2a6fed84991a6fc983b6fea67864bf0474df258a8f8c7541
+size 859127504

last-checkpoint/latest CHANGED Viewed

	@@ -1 +1 @@
1	- ~~global_step17000~~


1	+ global_step20000

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aa4f82ea40fb0305db931cf7a54215d8c646ba708abad07172d476a907b2dad4
 size 962205216

 version https://git-lfs.github.com/spec/v1
+oid sha256:069fbc2b96ff55558de2b6621d0406b4fbcbc7edffe8d2472bb8b992e0abdb14
 size 962205216

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:be21ff914d7590ad2180b18bca69f62255c4deee5c5c2b727794908b9d148dcc
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:ef2e75134b208d60f6f9b30cef29e49813797dfcda4ce7d7e2cabca76bb3fa47
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4aa6830d6aa63edbea9a9fa4aac3b79365984a3d18eed4b014dcec7309b75dc2
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:29c7a79b53a589de48d3b7a21df9c0d024be4dea79f68869f72fdc01ae3b212a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 82.65912305516267,
-  "best_model_checkpoint": "./iteboshi_temp/checkpoint-16000",
-  "epoch": 18.722466960352424,
   "eval_steps": 1000,
-  "global_step": 17000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4937,6 +4937,876 @@
       "eval_steps_per_second": 1.554,
       "eval_wer": 82.998585572843,
       "step": 17000
     }
   ],
   "logging_steps": 25,
@@ -4951,12 +5821,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.920843417033166e+20,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 82.34794908062236,
+  "best_model_checkpoint": "./iteboshi_temp/checkpoint-20000",
+  "epoch": 22.026431718061673,
   "eval_steps": 1000,
+  "global_step": 20000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_steps_per_second": 1.554,
       "eval_wer": 82.998585572843,
       "step": 17000
+    },
+    {
+      "epoch": 18.75,
+      "grad_norm": 0.04777693375945091,
+      "learning_rate": 3.051282051282052e-06,
+      "loss": 0.0023,
+      "step": 17025
+    },
+    {
+      "epoch": 18.777533039647576,
+      "grad_norm": 0.012851621024310589,
+      "learning_rate": 3.0256410256410256e-06,
+      "loss": 0.0016,
+      "step": 17050
+    },
+    {
+      "epoch": 18.805066079295155,
+      "grad_norm": 0.07990699261426926,
+      "learning_rate": 3e-06,
+      "loss": 0.0016,
+      "step": 17075
+    },
+    {
+      "epoch": 18.83259911894273,
+      "grad_norm": 0.011805381625890732,
+      "learning_rate": 2.9743589743589746e-06,
+      "loss": 0.0027,
+      "step": 17100
+    },
+    {
+      "epoch": 18.860132158590307,
+      "grad_norm": 0.14670372009277344,
+      "learning_rate": 2.948717948717949e-06,
+      "loss": 0.0026,
+      "step": 17125
+    },
+    {
+      "epoch": 18.887665198237887,
+      "grad_norm": 0.023519041016697884,
+      "learning_rate": 2.9230769230769236e-06,
+      "loss": 0.0028,
+      "step": 17150
+    },
+    {
+      "epoch": 18.915198237885463,
+      "grad_norm": 0.021847659721970558,
+      "learning_rate": 2.897435897435898e-06,
+      "loss": 0.0015,
+      "step": 17175
+    },
+    {
+      "epoch": 18.94273127753304,
+      "grad_norm": 0.013796437531709671,
+      "learning_rate": 2.8717948717948717e-06,
+      "loss": 0.0023,
+      "step": 17200
+    },
+    {
+      "epoch": 18.970264317180618,
+      "grad_norm": 0.1518554836511612,
+      "learning_rate": 2.846153846153846e-06,
+      "loss": 0.0016,
+      "step": 17225
+    },
+    {
+      "epoch": 18.997797356828194,
+      "grad_norm": 0.012883415445685387,
+      "learning_rate": 2.8205128205128207e-06,
+      "loss": 0.0019,
+      "step": 17250
+    },
+    {
+      "epoch": 19.02533039647577,
+      "grad_norm": 0.01099941972643137,
+      "learning_rate": 2.794871794871795e-06,
+      "loss": 0.0022,
+      "step": 17275
+    },
+    {
+      "epoch": 19.05286343612335,
+      "grad_norm": 0.006992665119469166,
+      "learning_rate": 2.7692307692307697e-06,
+      "loss": 0.0011,
+      "step": 17300
+    },
+    {
+      "epoch": 19.080396475770925,
+      "grad_norm": 0.012264972552657127,
+      "learning_rate": 2.743589743589744e-06,
+      "loss": 0.0014,
+      "step": 17325
+    },
+    {
+      "epoch": 19.1079295154185,
+      "grad_norm": 0.04312492161989212,
+      "learning_rate": 2.717948717948718e-06,
+      "loss": 0.0012,
+      "step": 17350
+    },
+    {
+      "epoch": 19.13546255506608,
+      "grad_norm": 0.008214226923882961,
+      "learning_rate": 2.6923076923076923e-06,
+      "loss": 0.0011,
+      "step": 17375
+    },
+    {
+      "epoch": 19.162995594713657,
+      "grad_norm": 0.009182457812130451,
+      "learning_rate": 2.666666666666667e-06,
+      "loss": 0.0011,
+      "step": 17400
+    },
+    {
+      "epoch": 19.190528634361232,
+      "grad_norm": 0.009743117727339268,
+      "learning_rate": 2.6410256410256413e-06,
+      "loss": 0.001,
+      "step": 17425
+    },
+    {
+      "epoch": 19.218061674008812,
+      "grad_norm": 0.011959163472056389,
+      "learning_rate": 2.615384615384616e-06,
+      "loss": 0.001,
+      "step": 17450
+    },
+    {
+      "epoch": 19.245594713656388,
+      "grad_norm": 0.033681828528642654,
+      "learning_rate": 2.5897435897435903e-06,
+      "loss": 0.0019,
+      "step": 17475
+    },
+    {
+      "epoch": 19.273127753303964,
+      "grad_norm": 0.012354315258562565,
+      "learning_rate": 2.564102564102564e-06,
+      "loss": 0.0028,
+      "step": 17500
+    },
+    {
+      "epoch": 19.300660792951543,
+      "grad_norm": 0.01059970073401928,
+      "learning_rate": 2.5384615384615385e-06,
+      "loss": 0.0018,
+      "step": 17525
+    },
+    {
+      "epoch": 19.32819383259912,
+      "grad_norm": 0.007629127707332373,
+      "learning_rate": 2.512820512820513e-06,
+      "loss": 0.001,
+      "step": 17550
+    },
+    {
+      "epoch": 19.355726872246695,
+      "grad_norm": 0.0125362453982234,
+      "learning_rate": 2.4871794871794875e-06,
+      "loss": 0.001,
+      "step": 17575
+    },
+    {
+      "epoch": 19.383259911894275,
+      "grad_norm": 0.01261002104729414,
+      "learning_rate": 2.461538461538462e-06,
+      "loss": 0.0014,
+      "step": 17600
+    },
+    {
+      "epoch": 19.41079295154185,
+      "grad_norm": 0.010447504930198193,
+      "learning_rate": 2.435897435897436e-06,
+      "loss": 0.0021,
+      "step": 17625
+    },
+    {
+      "epoch": 19.438325991189426,
+      "grad_norm": 0.009724145755171776,
+      "learning_rate": 2.4102564102564105e-06,
+      "loss": 0.0021,
+      "step": 17650
+    },
+    {
+      "epoch": 19.465859030837006,
+      "grad_norm": 0.008591737598180771,
+      "learning_rate": 2.384615384615385e-06,
+      "loss": 0.0013,
+      "step": 17675
+    },
+    {
+      "epoch": 19.493392070484582,
+      "grad_norm": 0.008385499939322472,
+      "learning_rate": 2.358974358974359e-06,
+      "loss": 0.0017,
+      "step": 17700
+    },
+    {
+      "epoch": 19.520925110132158,
+      "grad_norm": 0.04597390815615654,
+      "learning_rate": 2.3333333333333336e-06,
+      "loss": 0.0013,
+      "step": 17725
+    },
+    {
+      "epoch": 19.548458149779737,
+      "grad_norm": 0.00930617842823267,
+      "learning_rate": 2.307692307692308e-06,
+      "loss": 0.0016,
+      "step": 17750
+    },
+    {
+      "epoch": 19.575991189427313,
+      "grad_norm": 0.009862055070698261,
+      "learning_rate": 2.282051282051282e-06,
+      "loss": 0.0014,
+      "step": 17775
+    },
+    {
+      "epoch": 19.60352422907489,
+      "grad_norm": 0.01388918049633503,
+      "learning_rate": 2.2564102564102566e-06,
+      "loss": 0.0011,
+      "step": 17800
+    },
+    {
+      "epoch": 19.63105726872247,
+      "grad_norm": 0.010380508378148079,
+      "learning_rate": 2.230769230769231e-06,
+      "loss": 0.0022,
+      "step": 17825
+    },
+    {
+      "epoch": 19.658590308370044,
+      "grad_norm": 0.003493061987683177,
+      "learning_rate": 2.2051282051282052e-06,
+      "loss": 0.001,
+      "step": 17850
+    },
+    {
+      "epoch": 19.68612334801762,
+      "grad_norm": 0.00607143621891737,
+      "learning_rate": 2.1794871794871797e-06,
+      "loss": 0.0016,
+      "step": 17875
+    },
+    {
+      "epoch": 19.7136563876652,
+      "grad_norm": 0.007698683068156242,
+      "learning_rate": 2.153846153846154e-06,
+      "loss": 0.0029,
+      "step": 17900
+    },
+    {
+      "epoch": 19.741189427312776,
+      "grad_norm": 0.007107453886419535,
+      "learning_rate": 2.1282051282051283e-06,
+      "loss": 0.0018,
+      "step": 17925
+    },
+    {
+      "epoch": 19.76872246696035,
+      "grad_norm": 0.0059033227153122425,
+      "learning_rate": 2.1025641025641028e-06,
+      "loss": 0.001,
+      "step": 17950
+    },
+    {
+      "epoch": 19.79625550660793,
+      "grad_norm": 0.005275961942970753,
+      "learning_rate": 2.0769230769230773e-06,
+      "loss": 0.0026,
+      "step": 17975
+    },
+    {
+      "epoch": 19.823788546255507,
+      "grad_norm": 0.016638007014989853,
+      "learning_rate": 2.0512820512820513e-06,
+      "loss": 0.0019,
+      "step": 18000
+    },
+    {
+      "epoch": 19.823788546255507,
+      "eval_cer": 22.66344158747263,
+      "eval_loss": 0.8900153040885925,
+      "eval_runtime": 1717.0751,
+      "eval_samples_per_second": 6.162,
+      "eval_steps_per_second": 1.541,
+      "eval_wer": 82.50825082508251,
+      "step": 18000
+    },
+    {
+      "epoch": 19.851321585903083,
+      "grad_norm": 0.0051730177365243435,
+      "learning_rate": 2.025641025641026e-06,
+      "loss": 0.0013,
+      "step": 18025
+    },
+    {
+      "epoch": 19.878854625550662,
+      "grad_norm": 0.00516405189409852,
+      "learning_rate": 2.0000000000000003e-06,
+      "loss": 0.0018,
+      "step": 18050
+    },
+    {
+      "epoch": 19.90638766519824,
+      "grad_norm": 0.006816135719418526,
+      "learning_rate": 1.9743589743589744e-06,
+      "loss": 0.001,
+      "step": 18075
+    },
+    {
+      "epoch": 19.933920704845814,
+      "grad_norm": 0.005780714098364115,
+      "learning_rate": 1.948717948717949e-06,
+      "loss": 0.0009,
+      "step": 18100
+    },
+    {
+      "epoch": 19.961453744493394,
+      "grad_norm": 0.007895824499428272,
+      "learning_rate": 1.9230769230769234e-06,
+      "loss": 0.0011,
+      "step": 18125
+    },
+    {
+      "epoch": 19.98898678414097,
+      "grad_norm": 0.00839215237647295,
+      "learning_rate": 1.8974358974358975e-06,
+      "loss": 0.0011,
+      "step": 18150
+    },
+    {
+      "epoch": 20.016519823788546,
+      "grad_norm": 0.0035141175612807274,
+      "learning_rate": 1.871794871794872e-06,
+      "loss": 0.0011,
+      "step": 18175
+    },
+    {
+      "epoch": 20.044052863436125,
+      "grad_norm": 0.008937545120716095,
+      "learning_rate": 1.8461538461538465e-06,
+      "loss": 0.0009,
+      "step": 18200
+    },
+    {
+      "epoch": 20.0715859030837,
+      "grad_norm": 0.0037842292804270983,
+      "learning_rate": 1.8205128205128205e-06,
+      "loss": 0.0011,
+      "step": 18225
+    },
+    {
+      "epoch": 20.099118942731277,
+      "grad_norm": 0.003870155429467559,
+      "learning_rate": 1.794871794871795e-06,
+      "loss": 0.0009,
+      "step": 18250
+    },
+    {
+      "epoch": 20.126651982378856,
+      "grad_norm": 0.003817240707576275,
+      "learning_rate": 1.7692307692307695e-06,
+      "loss": 0.0009,
+      "step": 18275
+    },
+    {
+      "epoch": 20.154185022026432,
+      "grad_norm": 0.007133571431040764,
+      "learning_rate": 1.7435897435897436e-06,
+      "loss": 0.0008,
+      "step": 18300
+    },
+    {
+      "epoch": 20.181718061674008,
+      "grad_norm": 0.011461510322988033,
+      "learning_rate": 1.717948717948718e-06,
+      "loss": 0.0007,
+      "step": 18325
+    },
+    {
+      "epoch": 20.209251101321588,
+      "grad_norm": 0.003969813231378794,
+      "learning_rate": 1.6923076923076926e-06,
+      "loss": 0.001,
+      "step": 18350
+    },
+    {
+      "epoch": 20.236784140969164,
+      "grad_norm": 0.007272036280483007,
+      "learning_rate": 1.6666666666666667e-06,
+      "loss": 0.001,
+      "step": 18375
+    },
+    {
+      "epoch": 20.26431718061674,
+      "grad_norm": 0.006936676800251007,
+      "learning_rate": 1.6410256410256412e-06,
+      "loss": 0.0009,
+      "step": 18400
+    },
+    {
+      "epoch": 20.291850220264315,
+      "grad_norm": 0.005403169430792332,
+      "learning_rate": 1.6153846153846157e-06,
+      "loss": 0.0007,
+      "step": 18425
+    },
+    {
+      "epoch": 20.319383259911895,
+      "grad_norm": 0.009516764432191849,
+      "learning_rate": 1.5897435897435897e-06,
+      "loss": 0.0029,
+      "step": 18450
+    },
+    {
+      "epoch": 20.34691629955947,
+      "grad_norm": 0.003727905685082078,
+      "learning_rate": 1.5641025641025642e-06,
+      "loss": 0.0008,
+      "step": 18475
+    },
+    {
+      "epoch": 20.374449339207047,
+      "grad_norm": 0.006022660061717033,
+      "learning_rate": 1.5384615384615387e-06,
+      "loss": 0.002,
+      "step": 18500
+    },
+    {
+      "epoch": 20.401982378854626,
+      "grad_norm": 0.004205208737403154,
+      "learning_rate": 1.5128205128205128e-06,
+      "loss": 0.001,
+      "step": 18525
+    },
+    {
+      "epoch": 20.429515418502202,
+      "grad_norm": 0.10070935636758804,
+      "learning_rate": 1.4871794871794873e-06,
+      "loss": 0.0009,
+      "step": 18550
+    },
+    {
+      "epoch": 20.457048458149778,
+      "grad_norm": 0.004871605895459652,
+      "learning_rate": 1.4615384615384618e-06,
+      "loss": 0.0009,
+      "step": 18575
+    },
+    {
+      "epoch": 20.484581497797357,
+      "grad_norm": 0.005528348032385111,
+      "learning_rate": 1.4358974358974359e-06,
+      "loss": 0.0008,
+      "step": 18600
+    },
+    {
+      "epoch": 20.512114537444933,
+      "grad_norm": 0.007922505959868431,
+      "learning_rate": 1.4102564102564104e-06,
+      "loss": 0.0007,
+      "step": 18625
+    },
+    {
+      "epoch": 20.53964757709251,
+      "grad_norm": 0.004503941163420677,
+      "learning_rate": 1.3846153846153848e-06,
+      "loss": 0.001,
+      "step": 18650
+    },
+    {
+      "epoch": 20.56718061674009,
+      "grad_norm": 0.04012945666909218,
+      "learning_rate": 1.358974358974359e-06,
+      "loss": 0.0011,
+      "step": 18675
+    },
+    {
+      "epoch": 20.594713656387665,
+      "grad_norm": 0.011533623561263084,
+      "learning_rate": 1.3333333333333334e-06,
+      "loss": 0.0011,
+      "step": 18700
+    },
+    {
+      "epoch": 20.62224669603524,
+      "grad_norm": 0.008248466067016125,
+      "learning_rate": 1.307692307692308e-06,
+      "loss": 0.0009,
+      "step": 18725
+    },
+    {
+      "epoch": 20.64977973568282,
+      "grad_norm": 0.004799861926585436,
+      "learning_rate": 1.282051282051282e-06,
+      "loss": 0.0007,
+      "step": 18750
+    },
+    {
+      "epoch": 20.677312775330396,
+      "grad_norm": 0.006359547842293978,
+      "learning_rate": 1.2564102564102565e-06,
+      "loss": 0.0007,
+      "step": 18775
+    },
+    {
+      "epoch": 20.704845814977972,
+      "grad_norm": 0.006216075737029314,
+      "learning_rate": 1.230769230769231e-06,
+      "loss": 0.001,
+      "step": 18800
+    },
+    {
+      "epoch": 20.73237885462555,
+      "grad_norm": 0.08518233150243759,
+      "learning_rate": 1.2051282051282053e-06,
+      "loss": 0.0012,
+      "step": 18825
+    },
+    {
+      "epoch": 20.759911894273127,
+      "grad_norm": 0.004133372101932764,
+      "learning_rate": 1.1794871794871795e-06,
+      "loss": 0.001,
+      "step": 18850
+    },
+    {
+      "epoch": 20.787444933920703,
+      "grad_norm": 0.006971430499106646,
+      "learning_rate": 1.153846153846154e-06,
+      "loss": 0.0014,
+      "step": 18875
+    },
+    {
+      "epoch": 20.814977973568283,
+      "grad_norm": 0.005109596531838179,
+      "learning_rate": 1.1282051282051283e-06,
+      "loss": 0.0011,
+      "step": 18900
+    },
+    {
+      "epoch": 20.84251101321586,
+      "grad_norm": 0.038249921053647995,
+      "learning_rate": 1.1025641025641026e-06,
+      "loss": 0.0012,
+      "step": 18925
+    },
+    {
+      "epoch": 20.870044052863435,
+      "grad_norm": 0.008875112980604172,
+      "learning_rate": 1.076923076923077e-06,
+      "loss": 0.0007,
+      "step": 18950
+    },
+    {
+      "epoch": 20.897577092511014,
+      "grad_norm": 0.0044938609935343266,
+      "learning_rate": 1.0512820512820514e-06,
+      "loss": 0.0011,
+      "step": 18975
+    },
+    {
+      "epoch": 20.92511013215859,
+      "grad_norm": 0.07247400283813477,
+      "learning_rate": 1.0256410256410257e-06,
+      "loss": 0.0008,
+      "step": 19000
+    },
+    {
+      "epoch": 20.92511013215859,
+      "eval_cer": 22.58778214666468,
+      "eval_loss": 0.892371654510498,
+      "eval_runtime": 1719.93,
+      "eval_samples_per_second": 6.152,
+      "eval_steps_per_second": 1.538,
+      "eval_wer": 82.47996228194248,
+      "step": 19000
+    },
+    {
+      "epoch": 20.952643171806166,
+      "grad_norm": 0.006040550768375397,
+      "learning_rate": 1.0000000000000002e-06,
+      "loss": 0.0007,
+      "step": 19025
+    },
+    {
+      "epoch": 20.980176211453745,
+      "grad_norm": 0.00338306394405663,
+      "learning_rate": 9.743589743589745e-07,
+      "loss": 0.001,
+      "step": 19050
+    },
+    {
+      "epoch": 21.00770925110132,
+      "grad_norm": 0.007667516358196735,
+      "learning_rate": 9.487179487179487e-07,
+      "loss": 0.0012,
+      "step": 19075
+    },
+    {
+      "epoch": 21.035242290748897,
+      "grad_norm": 0.0036987056955695152,
+      "learning_rate": 9.230769230769232e-07,
+      "loss": 0.0006,
+      "step": 19100
+    },
+    {
+      "epoch": 21.062775330396477,
+      "grad_norm": 0.0036683231592178345,
+      "learning_rate": 8.974358974358975e-07,
+      "loss": 0.0011,
+      "step": 19125
+    },
+    {
+      "epoch": 21.090308370044053,
+      "grad_norm": 0.007168483920395374,
+      "learning_rate": 8.717948717948718e-07,
+      "loss": 0.0009,
+      "step": 19150
+    },
+    {
+      "epoch": 21.11784140969163,
+      "grad_norm": 0.0029900213703513145,
+      "learning_rate": 8.461538461538463e-07,
+      "loss": 0.0017,
+      "step": 19175
+    },
+    {
+      "epoch": 21.145374449339208,
+      "grad_norm": 0.00418079923838377,
+      "learning_rate": 8.205128205128206e-07,
+      "loss": 0.0009,
+      "step": 19200
+    },
+    {
+      "epoch": 21.172907488986784,
+      "grad_norm": 0.003424633527174592,
+      "learning_rate": 7.948717948717949e-07,
+      "loss": 0.0007,
+      "step": 19225
+    },
+    {
+      "epoch": 21.20044052863436,
+      "grad_norm": 0.0028422100003808737,
+      "learning_rate": 7.692307692307694e-07,
+      "loss": 0.0006,
+      "step": 19250
+    },
+    {
+      "epoch": 21.22797356828194,
+      "grad_norm": 0.004691548179835081,
+      "learning_rate": 7.435897435897436e-07,
+      "loss": 0.0006,
+      "step": 19275
+    },
+    {
+      "epoch": 21.255506607929515,
+      "grad_norm": 0.004589064046740532,
+      "learning_rate": 7.179487179487179e-07,
+      "loss": 0.0005,
+      "step": 19300
+    },
+    {
+      "epoch": 21.28303964757709,
+      "grad_norm": 0.005557245574891567,
+      "learning_rate": 6.923076923076924e-07,
+      "loss": 0.0011,
+      "step": 19325
+    },
+    {
+      "epoch": 21.31057268722467,
+      "grad_norm": 0.0031431138049811125,
+      "learning_rate": 6.666666666666667e-07,
+      "loss": 0.0006,
+      "step": 19350
+    },
+    {
+      "epoch": 21.338105726872246,
+      "grad_norm": 0.004688850603997707,
+      "learning_rate": 6.41025641025641e-07,
+      "loss": 0.0007,
+      "step": 19375
+    },
+    {
+      "epoch": 21.365638766519822,
+      "grad_norm": 0.007398667279630899,
+      "learning_rate": 6.153846153846155e-07,
+      "loss": 0.0006,
+      "step": 19400
+    },
+    {
+      "epoch": 21.393171806167402,
+      "grad_norm": 0.005217025522142649,
+      "learning_rate": 5.897435897435898e-07,
+      "loss": 0.0008,
+      "step": 19425
+    },
+    {
+      "epoch": 21.420704845814978,
+      "grad_norm": 0.004331599920988083,
+      "learning_rate": 5.641025641025642e-07,
+      "loss": 0.0006,
+      "step": 19450
+    },
+    {
+      "epoch": 21.448237885462554,
+      "grad_norm": 0.004927519708871841,
+      "learning_rate": 5.384615384615386e-07,
+      "loss": 0.0009,
+      "step": 19475
+    },
+    {
+      "epoch": 21.475770925110133,
+      "grad_norm": 0.0034796635154634714,
+      "learning_rate": 5.128205128205128e-07,
+      "loss": 0.001,
+      "step": 19500
+    },
+    {
+      "epoch": 21.50330396475771,
+      "grad_norm": 0.00347193144261837,
+      "learning_rate": 4.871794871794872e-07,
+      "loss": 0.0006,
+      "step": 19525
+    },
+    {
+      "epoch": 21.530837004405285,
+      "grad_norm": 0.0074023474007844925,
+      "learning_rate": 4.615384615384616e-07,
+      "loss": 0.0006,
+      "step": 19550
+    },
+    {
+      "epoch": 21.558370044052865,
+      "grad_norm": 0.0036716184113174677,
+      "learning_rate": 4.358974358974359e-07,
+      "loss": 0.0006,
+      "step": 19575
+    },
+    {
+      "epoch": 21.58590308370044,
+      "grad_norm": 0.006558453664183617,
+      "learning_rate": 4.102564102564103e-07,
+      "loss": 0.0007,
+      "step": 19600
+    },
+    {
+      "epoch": 21.613436123348016,
+      "grad_norm": 0.0030144904740154743,
+      "learning_rate": 3.846153846153847e-07,
+      "loss": 0.0007,
+      "step": 19625
+    },
+    {
+      "epoch": 21.640969162995596,
+      "grad_norm": 0.0037687935400754213,
+      "learning_rate": 3.5897435897435896e-07,
+      "loss": 0.0007,
+      "step": 19650
+    },
+    {
+      "epoch": 21.66850220264317,
+      "grad_norm": 0.0722261294722557,
+      "learning_rate": 3.3333333333333335e-07,
+      "loss": 0.0007,
+      "step": 19675
+    },
+    {
+      "epoch": 21.696035242290748,
+      "grad_norm": 0.0034861781168729067,
+      "learning_rate": 3.0769230769230774e-07,
+      "loss": 0.0007,
+      "step": 19700
+    },
+    {
+      "epoch": 21.723568281938327,
+      "grad_norm": 0.004740406293421984,
+      "learning_rate": 2.820512820512821e-07,
+      "loss": 0.0009,
+      "step": 19725
+    },
+    {
+      "epoch": 21.751101321585903,
+      "grad_norm": 0.0040426794439554214,
+      "learning_rate": 2.564102564102564e-07,
+      "loss": 0.0007,
+      "step": 19750
+    },
+    {
+      "epoch": 21.77863436123348,
+      "grad_norm": 0.005103557836264372,
+      "learning_rate": 2.307692307692308e-07,
+      "loss": 0.0006,
+      "step": 19775
+    },
+    {
+      "epoch": 21.80616740088106,
+      "grad_norm": 0.007594733498990536,
+      "learning_rate": 2.0512820512820514e-07,
+      "loss": 0.0006,
+      "step": 19800
+    },
+    {
+      "epoch": 21.833700440528634,
+      "grad_norm": 0.004270041361451149,
+      "learning_rate": 1.7948717948717948e-07,
+      "loss": 0.0007,
+      "step": 19825
+    },
+    {
+      "epoch": 21.86123348017621,
+      "grad_norm": 0.00658000260591507,
+      "learning_rate": 1.5384615384615387e-07,
+      "loss": 0.0006,
+      "step": 19850
+    },
+    {
+      "epoch": 21.88876651982379,
+      "grad_norm": 0.004829788114875555,
+      "learning_rate": 1.282051282051282e-07,
+      "loss": 0.0005,
+      "step": 19875
+    },
+    {
+      "epoch": 21.916299559471366,
+      "grad_norm": 0.004017261788249016,
+      "learning_rate": 1.0256410256410257e-07,
+      "loss": 0.0006,
+      "step": 19900
+    },
+    {
+      "epoch": 21.94383259911894,
+      "grad_norm": 0.005543394014239311,
+      "learning_rate": 7.692307692307694e-08,
+      "loss": 0.0009,
+      "step": 19925
+    },
+    {
+      "epoch": 21.97136563876652,
+      "grad_norm": 0.006894242484122515,
+      "learning_rate": 5.1282051282051286e-08,
+      "loss": 0.0006,
+      "step": 19950
+    },
+    {
+      "epoch": 21.998898678414097,
+      "grad_norm": 0.004000292159616947,
+      "learning_rate": 2.5641025641025643e-08,
+      "loss": 0.0007,
+      "step": 19975
+    },
+    {
+      "epoch": 22.026431718061673,
+      "grad_norm": 0.004270936828106642,
+      "learning_rate": 0.0,
+      "loss": 0.0006,
+      "step": 20000
+    },
+    {
+      "epoch": 22.026431718061673,
+      "eval_cer": 22.62675822223241,
+      "eval_loss": 0.8947405219078064,
+      "eval_runtime": 1706.5603,
+      "eval_samples_per_second": 6.2,
+      "eval_steps_per_second": 1.55,
+      "eval_wer": 82.34794908062236,
+      "step": 20000
     }
   ],
   "logging_steps": 25,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.4362863729801953e+20,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null