iteboshi-tiny / trainer_state.json
kiritan's picture
训练结束,上传最终模型
d1626f0 verified
{
"best_metric": 115.08722300801509,
"best_model_checkpoint": "./iteboshi_student_model_temp/checkpoint-19000",
"epoch": 22.026431718061673,
"eval_steps": 1000,
"global_step": 20000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02753303964757709,
"grad_norm": 15.011414527893066,
"learning_rate": 1.0000000000000002e-06,
"loss": 3.3645,
"step": 25
},
{
"epoch": 0.05506607929515418,
"grad_norm": 4.264221668243408,
"learning_rate": 2.0000000000000003e-06,
"loss": 2.6877,
"step": 50
},
{
"epoch": 0.08259911894273128,
"grad_norm": 2.5181801319122314,
"learning_rate": 3e-06,
"loss": 1.6827,
"step": 75
},
{
"epoch": 0.11013215859030837,
"grad_norm": 1.4112682342529297,
"learning_rate": 4.000000000000001e-06,
"loss": 1.1058,
"step": 100
},
{
"epoch": 0.13766519823788545,
"grad_norm": 1.4806125164031982,
"learning_rate": 5e-06,
"loss": 0.9359,
"step": 125
},
{
"epoch": 0.16519823788546256,
"grad_norm": 1.3319814205169678,
"learning_rate": 6e-06,
"loss": 0.9862,
"step": 150
},
{
"epoch": 0.19273127753303965,
"grad_norm": 1.4169176816940308,
"learning_rate": 7e-06,
"loss": 0.8884,
"step": 175
},
{
"epoch": 0.22026431718061673,
"grad_norm": 1.5349735021591187,
"learning_rate": 8.000000000000001e-06,
"loss": 0.9031,
"step": 200
},
{
"epoch": 0.24779735682819384,
"grad_norm": 1.4419125318527222,
"learning_rate": 9e-06,
"loss": 0.8748,
"step": 225
},
{
"epoch": 0.2753303964757709,
"grad_norm": 1.49337637424469,
"learning_rate": 1e-05,
"loss": 0.8685,
"step": 250
},
{
"epoch": 0.30286343612334804,
"grad_norm": 1.4557123184204102,
"learning_rate": 1.1000000000000001e-05,
"loss": 0.8104,
"step": 275
},
{
"epoch": 0.3303964757709251,
"grad_norm": 1.2917561531066895,
"learning_rate": 1.2e-05,
"loss": 0.818,
"step": 300
},
{
"epoch": 0.3579295154185022,
"grad_norm": 1.221163034439087,
"learning_rate": 1.3000000000000001e-05,
"loss": 0.8098,
"step": 325
},
{
"epoch": 0.3854625550660793,
"grad_norm": 1.2775193452835083,
"learning_rate": 1.4e-05,
"loss": 0.8419,
"step": 350
},
{
"epoch": 0.4129955947136564,
"grad_norm": 1.3367918729782104,
"learning_rate": 1.5000000000000002e-05,
"loss": 0.7582,
"step": 375
},
{
"epoch": 0.44052863436123346,
"grad_norm": 1.5160075426101685,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.8009,
"step": 400
},
{
"epoch": 0.46806167400881055,
"grad_norm": 1.368986964225769,
"learning_rate": 1.7e-05,
"loss": 0.7772,
"step": 425
},
{
"epoch": 0.4955947136563877,
"grad_norm": 1.3891838788986206,
"learning_rate": 1.8e-05,
"loss": 0.7927,
"step": 450
},
{
"epoch": 0.5231277533039648,
"grad_norm": 1.1749062538146973,
"learning_rate": 1.9e-05,
"loss": 0.7827,
"step": 475
},
{
"epoch": 0.5506607929515418,
"grad_norm": 1.3586732149124146,
"learning_rate": 2e-05,
"loss": 0.7839,
"step": 500
},
{
"epoch": 0.5781938325991189,
"grad_norm": 1.2230278253555298,
"learning_rate": 1.9974358974358975e-05,
"loss": 0.7555,
"step": 525
},
{
"epoch": 0.6057268722466961,
"grad_norm": 1.2390245199203491,
"learning_rate": 1.994871794871795e-05,
"loss": 0.7625,
"step": 550
},
{
"epoch": 0.6332599118942731,
"grad_norm": 1.3164693117141724,
"learning_rate": 1.9923076923076926e-05,
"loss": 0.7323,
"step": 575
},
{
"epoch": 0.6607929515418502,
"grad_norm": 1.3300975561141968,
"learning_rate": 1.98974358974359e-05,
"loss": 0.7453,
"step": 600
},
{
"epoch": 0.6883259911894273,
"grad_norm": 1.4462056159973145,
"learning_rate": 1.9871794871794873e-05,
"loss": 0.7177,
"step": 625
},
{
"epoch": 0.7158590308370044,
"grad_norm": 1.4526742696762085,
"learning_rate": 1.9846153846153847e-05,
"loss": 0.7449,
"step": 650
},
{
"epoch": 0.7433920704845814,
"grad_norm": 1.0672987699508667,
"learning_rate": 1.9820512820512824e-05,
"loss": 0.7286,
"step": 675
},
{
"epoch": 0.7709251101321586,
"grad_norm": 1.0843076705932617,
"learning_rate": 1.9794871794871798e-05,
"loss": 0.7252,
"step": 700
},
{
"epoch": 0.7984581497797357,
"grad_norm": 1.2082732915878296,
"learning_rate": 1.976923076923077e-05,
"loss": 0.7562,
"step": 725
},
{
"epoch": 0.8259911894273128,
"grad_norm": 1.2885624170303345,
"learning_rate": 1.9743589743589745e-05,
"loss": 0.7521,
"step": 750
},
{
"epoch": 0.8535242290748899,
"grad_norm": 1.263953447341919,
"learning_rate": 1.9717948717948722e-05,
"loss": 0.688,
"step": 775
},
{
"epoch": 0.8810572687224669,
"grad_norm": 1.4190903902053833,
"learning_rate": 1.9692307692307696e-05,
"loss": 0.7404,
"step": 800
},
{
"epoch": 0.9085903083700441,
"grad_norm": 1.023093342781067,
"learning_rate": 1.9666666666666666e-05,
"loss": 0.6488,
"step": 825
},
{
"epoch": 0.9361233480176211,
"grad_norm": 1.170280933380127,
"learning_rate": 1.9641025641025643e-05,
"loss": 0.6808,
"step": 850
},
{
"epoch": 0.9636563876651982,
"grad_norm": 0.9678378105163574,
"learning_rate": 1.9615384615384617e-05,
"loss": 0.7139,
"step": 875
},
{
"epoch": 0.9911894273127754,
"grad_norm": 1.1732635498046875,
"learning_rate": 1.958974358974359e-05,
"loss": 0.6886,
"step": 900
},
{
"epoch": 1.0187224669603525,
"grad_norm": 0.9801719784736633,
"learning_rate": 1.9564102564102564e-05,
"loss": 0.6295,
"step": 925
},
{
"epoch": 1.0462555066079295,
"grad_norm": 1.0827771425247192,
"learning_rate": 1.953846153846154e-05,
"loss": 0.575,
"step": 950
},
{
"epoch": 1.0737885462555066,
"grad_norm": 1.4441510438919067,
"learning_rate": 1.9512820512820515e-05,
"loss": 0.5829,
"step": 975
},
{
"epoch": 1.1013215859030836,
"grad_norm": 1.0029908418655396,
"learning_rate": 1.9487179487179488e-05,
"loss": 0.5915,
"step": 1000
},
{
"epoch": 1.1013215859030836,
"eval_cer": 65.05164330012725,
"eval_loss": 0.7333521246910095,
"eval_runtime": 476.6198,
"eval_samples_per_second": 22.2,
"eval_steps_per_second": 5.552,
"eval_wer": 158.28382838283827,
"step": 1000
},
{
"epoch": 1.1288546255506609,
"grad_norm": 1.2138437032699585,
"learning_rate": 1.9461538461538462e-05,
"loss": 0.5968,
"step": 1025
},
{
"epoch": 1.1563876651982379,
"grad_norm": 1.1982746124267578,
"learning_rate": 1.943589743589744e-05,
"loss": 0.6058,
"step": 1050
},
{
"epoch": 1.183920704845815,
"grad_norm": 1.052754282951355,
"learning_rate": 1.9410256410256413e-05,
"loss": 0.5817,
"step": 1075
},
{
"epoch": 1.2114537444933922,
"grad_norm": 1.165330171585083,
"learning_rate": 1.9384615384615386e-05,
"loss": 0.557,
"step": 1100
},
{
"epoch": 1.2389867841409692,
"grad_norm": 0.9782461524009705,
"learning_rate": 1.935897435897436e-05,
"loss": 0.6146,
"step": 1125
},
{
"epoch": 1.2665198237885462,
"grad_norm": 1.2477244138717651,
"learning_rate": 1.9333333333333333e-05,
"loss": 0.575,
"step": 1150
},
{
"epoch": 1.2940528634361232,
"grad_norm": 0.9106314778327942,
"learning_rate": 1.930769230769231e-05,
"loss": 0.5746,
"step": 1175
},
{
"epoch": 1.3215859030837005,
"grad_norm": 1.1797159910202026,
"learning_rate": 1.9282051282051284e-05,
"loss": 0.5875,
"step": 1200
},
{
"epoch": 1.3491189427312775,
"grad_norm": 1.2631396055221558,
"learning_rate": 1.9256410256410258e-05,
"loss": 0.5728,
"step": 1225
},
{
"epoch": 1.3766519823788546,
"grad_norm": 1.024104118347168,
"learning_rate": 1.923076923076923e-05,
"loss": 0.6005,
"step": 1250
},
{
"epoch": 1.4041850220264318,
"grad_norm": 1.0408469438552856,
"learning_rate": 1.920512820512821e-05,
"loss": 0.5665,
"step": 1275
},
{
"epoch": 1.4317180616740088,
"grad_norm": 1.000183343887329,
"learning_rate": 1.9179487179487182e-05,
"loss": 0.5731,
"step": 1300
},
{
"epoch": 1.4592511013215859,
"grad_norm": 1.0863183736801147,
"learning_rate": 1.9153846153846156e-05,
"loss": 0.6083,
"step": 1325
},
{
"epoch": 1.4867841409691631,
"grad_norm": 1.1477770805358887,
"learning_rate": 1.912820512820513e-05,
"loss": 0.6137,
"step": 1350
},
{
"epoch": 1.51431718061674,
"grad_norm": 1.1528939008712769,
"learning_rate": 1.9102564102564106e-05,
"loss": 0.5898,
"step": 1375
},
{
"epoch": 1.5418502202643172,
"grad_norm": 0.9799731373786926,
"learning_rate": 1.907692307692308e-05,
"loss": 0.6014,
"step": 1400
},
{
"epoch": 1.5693832599118944,
"grad_norm": 1.0169357061386108,
"learning_rate": 1.905128205128205e-05,
"loss": 0.6065,
"step": 1425
},
{
"epoch": 1.5969162995594712,
"grad_norm": 1.0970255136489868,
"learning_rate": 1.9025641025641027e-05,
"loss": 0.5905,
"step": 1450
},
{
"epoch": 1.6244493392070485,
"grad_norm": 1.1324760913848877,
"learning_rate": 1.9e-05,
"loss": 0.5664,
"step": 1475
},
{
"epoch": 1.6519823788546255,
"grad_norm": 1.0170447826385498,
"learning_rate": 1.8974358974358975e-05,
"loss": 0.5502,
"step": 1500
},
{
"epoch": 1.6795154185022025,
"grad_norm": 1.145984172821045,
"learning_rate": 1.894871794871795e-05,
"loss": 0.5548,
"step": 1525
},
{
"epoch": 1.7070484581497798,
"grad_norm": 1.0441069602966309,
"learning_rate": 1.8923076923076925e-05,
"loss": 0.5479,
"step": 1550
},
{
"epoch": 1.7345814977973568,
"grad_norm": 1.049399495124817,
"learning_rate": 1.88974358974359e-05,
"loss": 0.5633,
"step": 1575
},
{
"epoch": 1.7621145374449338,
"grad_norm": 1.0441521406173706,
"learning_rate": 1.8871794871794873e-05,
"loss": 0.5587,
"step": 1600
},
{
"epoch": 1.789647577092511,
"grad_norm": 1.2298874855041504,
"learning_rate": 1.8846153846153846e-05,
"loss": 0.6115,
"step": 1625
},
{
"epoch": 1.8171806167400881,
"grad_norm": 1.3439756631851196,
"learning_rate": 1.8820512820512823e-05,
"loss": 0.6115,
"step": 1650
},
{
"epoch": 1.8447136563876652,
"grad_norm": 1.3676002025604248,
"learning_rate": 1.8794871794871797e-05,
"loss": 0.611,
"step": 1675
},
{
"epoch": 1.8722466960352424,
"grad_norm": 0.9913831949234009,
"learning_rate": 1.876923076923077e-05,
"loss": 0.58,
"step": 1700
},
{
"epoch": 1.8997797356828194,
"grad_norm": 0.9699712991714478,
"learning_rate": 1.8743589743589744e-05,
"loss": 0.5701,
"step": 1725
},
{
"epoch": 1.9273127753303965,
"grad_norm": 1.0531800985336304,
"learning_rate": 1.8717948717948718e-05,
"loss": 0.5763,
"step": 1750
},
{
"epoch": 1.9548458149779737,
"grad_norm": 1.2362020015716553,
"learning_rate": 1.8692307692307695e-05,
"loss": 0.5716,
"step": 1775
},
{
"epoch": 1.9823788546255505,
"grad_norm": 1.1315600872039795,
"learning_rate": 1.866666666666667e-05,
"loss": 0.5633,
"step": 1800
},
{
"epoch": 2.0099118942731278,
"grad_norm": 1.0342429876327515,
"learning_rate": 1.8641025641025642e-05,
"loss": 0.5612,
"step": 1825
},
{
"epoch": 2.037444933920705,
"grad_norm": 0.8838308453559875,
"learning_rate": 1.8615384615384616e-05,
"loss": 0.4806,
"step": 1850
},
{
"epoch": 2.064977973568282,
"grad_norm": 1.3863153457641602,
"learning_rate": 1.8589743589743593e-05,
"loss": 0.4645,
"step": 1875
},
{
"epoch": 2.092511013215859,
"grad_norm": 0.6479698419570923,
"learning_rate": 1.8564102564102567e-05,
"loss": 0.4494,
"step": 1900
},
{
"epoch": 2.1200440528634363,
"grad_norm": 1.058168649673462,
"learning_rate": 1.853846153846154e-05,
"loss": 0.5045,
"step": 1925
},
{
"epoch": 2.147577092511013,
"grad_norm": 1.0008894205093384,
"learning_rate": 1.8512820512820514e-05,
"loss": 0.437,
"step": 1950
},
{
"epoch": 2.1751101321585904,
"grad_norm": 1.0125486850738525,
"learning_rate": 1.848717948717949e-05,
"loss": 0.4645,
"step": 1975
},
{
"epoch": 2.202643171806167,
"grad_norm": 0.9113588929176331,
"learning_rate": 1.8461538461538465e-05,
"loss": 0.4754,
"step": 2000
},
{
"epoch": 2.202643171806167,
"eval_cer": 66.80212763518392,
"eval_loss": 0.680008590221405,
"eval_runtime": 471.9604,
"eval_samples_per_second": 22.419,
"eval_steps_per_second": 5.606,
"eval_wer": 176.77510608203676,
"step": 2000
},
{
"epoch": 2.2301762114537445,
"grad_norm": 0.8999997973442078,
"learning_rate": 1.8435897435897435e-05,
"loss": 0.4651,
"step": 2025
},
{
"epoch": 2.2577092511013217,
"grad_norm": 1.2379090785980225,
"learning_rate": 1.8410256410256412e-05,
"loss": 0.4663,
"step": 2050
},
{
"epoch": 2.2852422907488985,
"grad_norm": 0.773048460483551,
"learning_rate": 1.8384615384615386e-05,
"loss": 0.4431,
"step": 2075
},
{
"epoch": 2.3127753303964758,
"grad_norm": 0.908449649810791,
"learning_rate": 1.835897435897436e-05,
"loss": 0.4742,
"step": 2100
},
{
"epoch": 2.340308370044053,
"grad_norm": 0.7912779450416565,
"learning_rate": 1.8333333333333333e-05,
"loss": 0.4306,
"step": 2125
},
{
"epoch": 2.36784140969163,
"grad_norm": 1.0128527879714966,
"learning_rate": 1.830769230769231e-05,
"loss": 0.452,
"step": 2150
},
{
"epoch": 2.395374449339207,
"grad_norm": 0.9093798398971558,
"learning_rate": 1.8282051282051284e-05,
"loss": 0.4623,
"step": 2175
},
{
"epoch": 2.4229074889867843,
"grad_norm": 0.8354513049125671,
"learning_rate": 1.8256410256410257e-05,
"loss": 0.4436,
"step": 2200
},
{
"epoch": 2.450440528634361,
"grad_norm": 0.9825035929679871,
"learning_rate": 1.823076923076923e-05,
"loss": 0.4354,
"step": 2225
},
{
"epoch": 2.4779735682819384,
"grad_norm": 0.9167904257774353,
"learning_rate": 1.8205128205128208e-05,
"loss": 0.4573,
"step": 2250
},
{
"epoch": 2.505506607929515,
"grad_norm": 0.855309009552002,
"learning_rate": 1.817948717948718e-05,
"loss": 0.4975,
"step": 2275
},
{
"epoch": 2.5330396475770924,
"grad_norm": 0.9728146195411682,
"learning_rate": 1.8153846153846155e-05,
"loss": 0.4755,
"step": 2300
},
{
"epoch": 2.5605726872246697,
"grad_norm": 0.9784273505210876,
"learning_rate": 1.812820512820513e-05,
"loss": 0.4574,
"step": 2325
},
{
"epoch": 2.5881057268722465,
"grad_norm": 1.0068365335464478,
"learning_rate": 1.8102564102564102e-05,
"loss": 0.468,
"step": 2350
},
{
"epoch": 2.6156387665198237,
"grad_norm": 0.883303701877594,
"learning_rate": 1.807692307692308e-05,
"loss": 0.4711,
"step": 2375
},
{
"epoch": 2.643171806167401,
"grad_norm": 0.9032068252563477,
"learning_rate": 1.8051282051282053e-05,
"loss": 0.446,
"step": 2400
},
{
"epoch": 2.670704845814978,
"grad_norm": 0.887459397315979,
"learning_rate": 1.8025641025641027e-05,
"loss": 0.441,
"step": 2425
},
{
"epoch": 2.698237885462555,
"grad_norm": 0.9685478210449219,
"learning_rate": 1.8e-05,
"loss": 0.4884,
"step": 2450
},
{
"epoch": 2.7257709251101323,
"grad_norm": 0.9034000039100647,
"learning_rate": 1.7974358974358977e-05,
"loss": 0.419,
"step": 2475
},
{
"epoch": 2.753303964757709,
"grad_norm": 1.1092092990875244,
"learning_rate": 1.794871794871795e-05,
"loss": 0.4934,
"step": 2500
},
{
"epoch": 2.7808370044052864,
"grad_norm": 0.9451773166656494,
"learning_rate": 1.7923076923076925e-05,
"loss": 0.4584,
"step": 2525
},
{
"epoch": 2.8083700440528636,
"grad_norm": 0.9756285548210144,
"learning_rate": 1.78974358974359e-05,
"loss": 0.4273,
"step": 2550
},
{
"epoch": 2.8359030837004404,
"grad_norm": 1.0345897674560547,
"learning_rate": 1.7871794871794875e-05,
"loss": 0.4462,
"step": 2575
},
{
"epoch": 2.8634361233480177,
"grad_norm": 0.7886134386062622,
"learning_rate": 1.784615384615385e-05,
"loss": 0.4645,
"step": 2600
},
{
"epoch": 2.890969162995595,
"grad_norm": 1.0247100591659546,
"learning_rate": 1.7820512820512823e-05,
"loss": 0.4285,
"step": 2625
},
{
"epoch": 2.9185022026431717,
"grad_norm": 1.1661251783370972,
"learning_rate": 1.7794871794871796e-05,
"loss": 0.4543,
"step": 2650
},
{
"epoch": 2.946035242290749,
"grad_norm": 0.8847468495368958,
"learning_rate": 1.776923076923077e-05,
"loss": 0.4672,
"step": 2675
},
{
"epoch": 2.9735682819383262,
"grad_norm": 1.077468991279602,
"learning_rate": 1.7743589743589744e-05,
"loss": 0.4421,
"step": 2700
},
{
"epoch": 3.001101321585903,
"grad_norm": 0.8612393140792847,
"learning_rate": 1.7717948717948717e-05,
"loss": 0.4854,
"step": 2725
},
{
"epoch": 3.0286343612334803,
"grad_norm": 0.7957535982131958,
"learning_rate": 1.7692307692307694e-05,
"loss": 0.3886,
"step": 2750
},
{
"epoch": 3.056167400881057,
"grad_norm": 0.8133636713027954,
"learning_rate": 1.7666666666666668e-05,
"loss": 0.3436,
"step": 2775
},
{
"epoch": 3.0837004405286343,
"grad_norm": 0.9280221462249756,
"learning_rate": 1.7641025641025642e-05,
"loss": 0.3586,
"step": 2800
},
{
"epoch": 3.1112334801762116,
"grad_norm": 0.6199328303337097,
"learning_rate": 1.7615384615384615e-05,
"loss": 0.3373,
"step": 2825
},
{
"epoch": 3.1387665198237884,
"grad_norm": 0.8616262674331665,
"learning_rate": 1.7589743589743592e-05,
"loss": 0.3515,
"step": 2850
},
{
"epoch": 3.1662995594713657,
"grad_norm": 0.8298392295837402,
"learning_rate": 1.7564102564102566e-05,
"loss": 0.3781,
"step": 2875
},
{
"epoch": 3.193832599118943,
"grad_norm": 1.0421444177627563,
"learning_rate": 1.753846153846154e-05,
"loss": 0.3673,
"step": 2900
},
{
"epoch": 3.2213656387665197,
"grad_norm": 0.769826352596283,
"learning_rate": 1.7512820512820513e-05,
"loss": 0.3726,
"step": 2925
},
{
"epoch": 3.248898678414097,
"grad_norm": 0.9118036031723022,
"learning_rate": 1.7487179487179487e-05,
"loss": 0.3702,
"step": 2950
},
{
"epoch": 3.2764317180616738,
"grad_norm": 0.8753936290740967,
"learning_rate": 1.7461538461538464e-05,
"loss": 0.3637,
"step": 2975
},
{
"epoch": 3.303964757709251,
"grad_norm": 0.8670012354850769,
"learning_rate": 1.7435897435897438e-05,
"loss": 0.3484,
"step": 3000
},
{
"epoch": 3.303964757709251,
"eval_cer": 86.51599738631023,
"eval_loss": 0.6673649549484253,
"eval_runtime": 498.8483,
"eval_samples_per_second": 21.211,
"eval_steps_per_second": 5.304,
"eval_wer": 250.1933050447902,
"step": 3000
},
{
"epoch": 3.3314977973568283,
"grad_norm": 0.8442785143852234,
"learning_rate": 1.741025641025641e-05,
"loss": 0.3744,
"step": 3025
},
{
"epoch": 3.359030837004405,
"grad_norm": 0.9680752754211426,
"learning_rate": 1.7384615384615385e-05,
"loss": 0.4015,
"step": 3050
},
{
"epoch": 3.3865638766519823,
"grad_norm": 1.012505292892456,
"learning_rate": 1.7358974358974362e-05,
"loss": 0.3609,
"step": 3075
},
{
"epoch": 3.4140969162995596,
"grad_norm": 0.838257908821106,
"learning_rate": 1.7333333333333336e-05,
"loss": 0.425,
"step": 3100
},
{
"epoch": 3.4416299559471364,
"grad_norm": 1.0413908958435059,
"learning_rate": 1.730769230769231e-05,
"loss": 0.3912,
"step": 3125
},
{
"epoch": 3.4691629955947136,
"grad_norm": 0.6918802261352539,
"learning_rate": 1.7282051282051283e-05,
"loss": 0.3665,
"step": 3150
},
{
"epoch": 3.496696035242291,
"grad_norm": 0.8139140009880066,
"learning_rate": 1.725641025641026e-05,
"loss": 0.3797,
"step": 3175
},
{
"epoch": 3.5242290748898677,
"grad_norm": 0.8543463349342346,
"learning_rate": 1.7230769230769234e-05,
"loss": 0.3906,
"step": 3200
},
{
"epoch": 3.551762114537445,
"grad_norm": 0.8312181234359741,
"learning_rate": 1.7205128205128207e-05,
"loss": 0.3728,
"step": 3225
},
{
"epoch": 3.579295154185022,
"grad_norm": 0.8691410422325134,
"learning_rate": 1.717948717948718e-05,
"loss": 0.3696,
"step": 3250
},
{
"epoch": 3.606828193832599,
"grad_norm": 0.9111345410346985,
"learning_rate": 1.7153846153846155e-05,
"loss": 0.4036,
"step": 3275
},
{
"epoch": 3.6343612334801763,
"grad_norm": 1.0638726949691772,
"learning_rate": 1.7128205128205128e-05,
"loss": 0.4002,
"step": 3300
},
{
"epoch": 3.6618942731277535,
"grad_norm": 0.650422215461731,
"learning_rate": 1.7102564102564102e-05,
"loss": 0.345,
"step": 3325
},
{
"epoch": 3.6894273127753303,
"grad_norm": 0.9388235211372375,
"learning_rate": 1.707692307692308e-05,
"loss": 0.3814,
"step": 3350
},
{
"epoch": 3.7169603524229076,
"grad_norm": 0.843593955039978,
"learning_rate": 1.7051282051282053e-05,
"loss": 0.3333,
"step": 3375
},
{
"epoch": 3.744493392070485,
"grad_norm": 0.9046334028244019,
"learning_rate": 1.7025641025641026e-05,
"loss": 0.3803,
"step": 3400
},
{
"epoch": 3.7720264317180616,
"grad_norm": 1.0686695575714111,
"learning_rate": 1.7e-05,
"loss": 0.3584,
"step": 3425
},
{
"epoch": 3.799559471365639,
"grad_norm": 0.9424968361854553,
"learning_rate": 1.6974358974358977e-05,
"loss": 0.3596,
"step": 3450
},
{
"epoch": 3.827092511013216,
"grad_norm": 0.8426197171211243,
"learning_rate": 1.694871794871795e-05,
"loss": 0.3648,
"step": 3475
},
{
"epoch": 3.854625550660793,
"grad_norm": 1.1227201223373413,
"learning_rate": 1.6923076923076924e-05,
"loss": 0.4158,
"step": 3500
},
{
"epoch": 3.88215859030837,
"grad_norm": 0.8253980278968811,
"learning_rate": 1.6897435897435898e-05,
"loss": 0.3581,
"step": 3525
},
{
"epoch": 3.909691629955947,
"grad_norm": 1.038388729095459,
"learning_rate": 1.687179487179487e-05,
"loss": 0.3824,
"step": 3550
},
{
"epoch": 3.9372246696035242,
"grad_norm": 0.847663164138794,
"learning_rate": 1.684615384615385e-05,
"loss": 0.4023,
"step": 3575
},
{
"epoch": 3.964757709251101,
"grad_norm": 1.0508993864059448,
"learning_rate": 1.6820512820512822e-05,
"loss": 0.3857,
"step": 3600
},
{
"epoch": 3.9922907488986783,
"grad_norm": 0.8296692371368408,
"learning_rate": 1.6794871794871796e-05,
"loss": 0.3799,
"step": 3625
},
{
"epoch": 4.0198237885462555,
"grad_norm": 0.7697169184684753,
"learning_rate": 1.676923076923077e-05,
"loss": 0.2988,
"step": 3650
},
{
"epoch": 4.047356828193832,
"grad_norm": 0.8309630751609802,
"learning_rate": 1.6743589743589747e-05,
"loss": 0.3086,
"step": 3675
},
{
"epoch": 4.07488986784141,
"grad_norm": 0.9464417099952698,
"learning_rate": 1.671794871794872e-05,
"loss": 0.2595,
"step": 3700
},
{
"epoch": 4.102422907488987,
"grad_norm": 0.6952372789382935,
"learning_rate": 1.6692307692307694e-05,
"loss": 0.3095,
"step": 3725
},
{
"epoch": 4.129955947136564,
"grad_norm": 1.4975730180740356,
"learning_rate": 1.6666666666666667e-05,
"loss": 0.3056,
"step": 3750
},
{
"epoch": 4.157488986784141,
"grad_norm": 0.7223972082138062,
"learning_rate": 1.6641025641025645e-05,
"loss": 0.2897,
"step": 3775
},
{
"epoch": 4.185022026431718,
"grad_norm": 0.5949111580848694,
"learning_rate": 1.6615384615384618e-05,
"loss": 0.2548,
"step": 3800
},
{
"epoch": 4.212555066079295,
"grad_norm": 0.7186844348907471,
"learning_rate": 1.6589743589743592e-05,
"loss": 0.2927,
"step": 3825
},
{
"epoch": 4.240088105726873,
"grad_norm": 0.8656429648399353,
"learning_rate": 1.6564102564102565e-05,
"loss": 0.2867,
"step": 3850
},
{
"epoch": 4.2676211453744495,
"grad_norm": 0.7154597640037537,
"learning_rate": 1.653846153846154e-05,
"loss": 0.3131,
"step": 3875
},
{
"epoch": 4.295154185022026,
"grad_norm": 0.7212648987770081,
"learning_rate": 1.6512820512820513e-05,
"loss": 0.305,
"step": 3900
},
{
"epoch": 4.322687224669604,
"grad_norm": 0.9249489307403564,
"learning_rate": 1.6487179487179486e-05,
"loss": 0.3191,
"step": 3925
},
{
"epoch": 4.350220264317181,
"grad_norm": 0.8987734317779541,
"learning_rate": 1.6461538461538463e-05,
"loss": 0.3007,
"step": 3950
},
{
"epoch": 4.377753303964758,
"grad_norm": 0.8961289525032043,
"learning_rate": 1.6435897435897437e-05,
"loss": 0.3249,
"step": 3975
},
{
"epoch": 4.405286343612334,
"grad_norm": 1.0475411415100098,
"learning_rate": 1.641025641025641e-05,
"loss": 0.3012,
"step": 4000
},
{
"epoch": 4.405286343612334,
"eval_cer": 143.75523024543463,
"eval_loss": 0.673335075378418,
"eval_runtime": 578.457,
"eval_samples_per_second": 18.292,
"eval_steps_per_second": 4.574,
"eval_wer": 390.6647807637907,
"step": 4000
},
{
"epoch": 4.432819383259912,
"grad_norm": 0.6653734445571899,
"learning_rate": 1.6384615384615384e-05,
"loss": 0.3257,
"step": 4025
},
{
"epoch": 4.460352422907489,
"grad_norm": 1.0824356079101562,
"learning_rate": 1.635897435897436e-05,
"loss": 0.3307,
"step": 4050
},
{
"epoch": 4.487885462555066,
"grad_norm": 1.0432815551757812,
"learning_rate": 1.6333333333333335e-05,
"loss": 0.3184,
"step": 4075
},
{
"epoch": 4.515418502202643,
"grad_norm": 0.6064091324806213,
"learning_rate": 1.630769230769231e-05,
"loss": 0.2876,
"step": 4100
},
{
"epoch": 4.54295154185022,
"grad_norm": 0.9204082489013672,
"learning_rate": 1.6282051282051282e-05,
"loss": 0.3239,
"step": 4125
},
{
"epoch": 4.570484581497797,
"grad_norm": 0.9674654006958008,
"learning_rate": 1.625641025641026e-05,
"loss": 0.3295,
"step": 4150
},
{
"epoch": 4.598017621145375,
"grad_norm": 1.0070710182189941,
"learning_rate": 1.6230769230769233e-05,
"loss": 0.3171,
"step": 4175
},
{
"epoch": 4.6255506607929515,
"grad_norm": 0.7143537998199463,
"learning_rate": 1.6205128205128207e-05,
"loss": 0.299,
"step": 4200
},
{
"epoch": 4.653083700440528,
"grad_norm": 1.4927059412002563,
"learning_rate": 1.617948717948718e-05,
"loss": 0.3133,
"step": 4225
},
{
"epoch": 4.680616740088106,
"grad_norm": 0.9383150339126587,
"learning_rate": 1.6153846153846154e-05,
"loss": 0.2923,
"step": 4250
},
{
"epoch": 4.708149779735683,
"grad_norm": 0.8235558271408081,
"learning_rate": 1.612820512820513e-05,
"loss": 0.3061,
"step": 4275
},
{
"epoch": 4.73568281938326,
"grad_norm": 1.0017234086990356,
"learning_rate": 1.6102564102564105e-05,
"loss": 0.3159,
"step": 4300
},
{
"epoch": 4.763215859030837,
"grad_norm": 0.8471269607543945,
"learning_rate": 1.607692307692308e-05,
"loss": 0.3082,
"step": 4325
},
{
"epoch": 4.790748898678414,
"grad_norm": 0.9471202492713928,
"learning_rate": 1.6051282051282052e-05,
"loss": 0.3103,
"step": 4350
},
{
"epoch": 4.818281938325991,
"grad_norm": 1.0645701885223389,
"learning_rate": 1.602564102564103e-05,
"loss": 0.3185,
"step": 4375
},
{
"epoch": 4.845814977973569,
"grad_norm": 0.6874596476554871,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.2969,
"step": 4400
},
{
"epoch": 4.8733480176211454,
"grad_norm": 0.6609659194946289,
"learning_rate": 1.5974358974358976e-05,
"loss": 0.2947,
"step": 4425
},
{
"epoch": 4.900881057268722,
"grad_norm": 0.9104480743408203,
"learning_rate": 1.594871794871795e-05,
"loss": 0.3108,
"step": 4450
},
{
"epoch": 4.9284140969163,
"grad_norm": 0.7762302756309509,
"learning_rate": 1.5923076923076924e-05,
"loss": 0.2803,
"step": 4475
},
{
"epoch": 4.955947136563877,
"grad_norm": 0.8123212456703186,
"learning_rate": 1.5897435897435897e-05,
"loss": 0.2983,
"step": 4500
},
{
"epoch": 4.983480176211454,
"grad_norm": 0.8753710985183716,
"learning_rate": 1.587179487179487e-05,
"loss": 0.3076,
"step": 4525
},
{
"epoch": 5.011013215859031,
"grad_norm": 0.8646990060806274,
"learning_rate": 1.5846153846153848e-05,
"loss": 0.2629,
"step": 4550
},
{
"epoch": 5.038546255506608,
"grad_norm": 0.45370516180992126,
"learning_rate": 1.582051282051282e-05,
"loss": 0.2463,
"step": 4575
},
{
"epoch": 5.066079295154185,
"grad_norm": 0.8293446898460388,
"learning_rate": 1.5794871794871795e-05,
"loss": 0.2204,
"step": 4600
},
{
"epoch": 5.093612334801762,
"grad_norm": 0.5812169909477234,
"learning_rate": 1.576923076923077e-05,
"loss": 0.2584,
"step": 4625
},
{
"epoch": 5.121145374449339,
"grad_norm": 0.6977860331535339,
"learning_rate": 1.5743589743589746e-05,
"loss": 0.2581,
"step": 4650
},
{
"epoch": 5.148678414096916,
"grad_norm": 0.5691882967948914,
"learning_rate": 1.571794871794872e-05,
"loss": 0.2561,
"step": 4675
},
{
"epoch": 5.176211453744493,
"grad_norm": 0.6710711121559143,
"learning_rate": 1.5692307692307693e-05,
"loss": 0.2287,
"step": 4700
},
{
"epoch": 5.203744493392071,
"grad_norm": 0.6122124195098877,
"learning_rate": 1.5666666666666667e-05,
"loss": 0.2539,
"step": 4725
},
{
"epoch": 5.2312775330396475,
"grad_norm": 0.8912720680236816,
"learning_rate": 1.5641025641025644e-05,
"loss": 0.2261,
"step": 4750
},
{
"epoch": 5.258810572687224,
"grad_norm": 0.7192656993865967,
"learning_rate": 1.5615384615384618e-05,
"loss": 0.277,
"step": 4775
},
{
"epoch": 5.286343612334802,
"grad_norm": 0.5232201814651489,
"learning_rate": 1.558974358974359e-05,
"loss": 0.2315,
"step": 4800
},
{
"epoch": 5.313876651982379,
"grad_norm": 0.5389770865440369,
"learning_rate": 1.5564102564102565e-05,
"loss": 0.2157,
"step": 4825
},
{
"epoch": 5.341409691629956,
"grad_norm": 0.7740320563316345,
"learning_rate": 1.553846153846154e-05,
"loss": 0.2374,
"step": 4850
},
{
"epoch": 5.368942731277533,
"grad_norm": 0.8149337768554688,
"learning_rate": 1.5512820512820516e-05,
"loss": 0.2459,
"step": 4875
},
{
"epoch": 5.39647577092511,
"grad_norm": 0.8045769929885864,
"learning_rate": 1.548717948717949e-05,
"loss": 0.2262,
"step": 4900
},
{
"epoch": 5.424008810572687,
"grad_norm": 0.8608861565589905,
"learning_rate": 1.5461538461538463e-05,
"loss": 0.2812,
"step": 4925
},
{
"epoch": 5.451541850220265,
"grad_norm": 0.6250303387641907,
"learning_rate": 1.5435897435897436e-05,
"loss": 0.2565,
"step": 4950
},
{
"epoch": 5.479074889867841,
"grad_norm": 0.6353731155395508,
"learning_rate": 1.5410256410256414e-05,
"loss": 0.2348,
"step": 4975
},
{
"epoch": 5.506607929515418,
"grad_norm": 0.6314155459403992,
"learning_rate": 1.5384615384615387e-05,
"loss": 0.2416,
"step": 5000
},
{
"epoch": 5.506607929515418,
"eval_cer": 89.07064986874234,
"eval_loss": 0.685695230960846,
"eval_runtime": 499.9069,
"eval_samples_per_second": 21.166,
"eval_steps_per_second": 5.293,
"eval_wer": 259.84912776991985,
"step": 5000
},
{
"epoch": 5.534140969162996,
"grad_norm": 0.7577415704727173,
"learning_rate": 1.535897435897436e-05,
"loss": 0.2469,
"step": 5025
},
{
"epoch": 5.561674008810573,
"grad_norm": 0.8051561713218689,
"learning_rate": 1.5333333333333334e-05,
"loss": 0.2706,
"step": 5050
},
{
"epoch": 5.5892070484581495,
"grad_norm": 0.7317938208580017,
"learning_rate": 1.5307692307692308e-05,
"loss": 0.251,
"step": 5075
},
{
"epoch": 5.616740088105727,
"grad_norm": 0.9899778962135315,
"learning_rate": 1.5282051282051282e-05,
"loss": 0.2415,
"step": 5100
},
{
"epoch": 5.644273127753304,
"grad_norm": 1.2906497716903687,
"learning_rate": 1.5256410256410257e-05,
"loss": 0.2477,
"step": 5125
},
{
"epoch": 5.671806167400881,
"grad_norm": 0.7807902693748474,
"learning_rate": 1.523076923076923e-05,
"loss": 0.2629,
"step": 5150
},
{
"epoch": 5.6993392070484585,
"grad_norm": 0.7599391937255859,
"learning_rate": 1.5205128205128206e-05,
"loss": 0.229,
"step": 5175
},
{
"epoch": 5.726872246696035,
"grad_norm": 0.71393221616745,
"learning_rate": 1.517948717948718e-05,
"loss": 0.2768,
"step": 5200
},
{
"epoch": 5.754405286343612,
"grad_norm": 0.6856432557106018,
"learning_rate": 1.5153846153846155e-05,
"loss": 0.2434,
"step": 5225
},
{
"epoch": 5.78193832599119,
"grad_norm": 1.1420562267303467,
"learning_rate": 1.5128205128205129e-05,
"loss": 0.2724,
"step": 5250
},
{
"epoch": 5.809471365638767,
"grad_norm": 0.7609044909477234,
"learning_rate": 1.5102564102564104e-05,
"loss": 0.2538,
"step": 5275
},
{
"epoch": 5.8370044052863435,
"grad_norm": 0.9725056290626526,
"learning_rate": 1.5076923076923078e-05,
"loss": 0.2455,
"step": 5300
},
{
"epoch": 5.864537444933921,
"grad_norm": 0.8432466387748718,
"learning_rate": 1.5051282051282053e-05,
"loss": 0.2502,
"step": 5325
},
{
"epoch": 5.892070484581498,
"grad_norm": 0.784191370010376,
"learning_rate": 1.5025641025641027e-05,
"loss": 0.2559,
"step": 5350
},
{
"epoch": 5.919603524229075,
"grad_norm": 0.6561942100524902,
"learning_rate": 1.5000000000000002e-05,
"loss": 0.2337,
"step": 5375
},
{
"epoch": 5.9471365638766525,
"grad_norm": 0.9330358505249023,
"learning_rate": 1.4974358974358976e-05,
"loss": 0.2369,
"step": 5400
},
{
"epoch": 5.974669603524229,
"grad_norm": 0.8283205628395081,
"learning_rate": 1.494871794871795e-05,
"loss": 0.2332,
"step": 5425
},
{
"epoch": 6.002202643171806,
"grad_norm": 0.7662353515625,
"learning_rate": 1.4923076923076925e-05,
"loss": 0.2154,
"step": 5450
},
{
"epoch": 6.029735682819383,
"grad_norm": 0.6226630210876465,
"learning_rate": 1.4897435897435898e-05,
"loss": 0.1824,
"step": 5475
},
{
"epoch": 6.057268722466961,
"grad_norm": 0.7479391098022461,
"learning_rate": 1.4871794871794874e-05,
"loss": 0.1676,
"step": 5500
},
{
"epoch": 6.084801762114537,
"grad_norm": 0.8184050917625427,
"learning_rate": 1.4846153846153847e-05,
"loss": 0.1946,
"step": 5525
},
{
"epoch": 6.112334801762114,
"grad_norm": 0.6436644196510315,
"learning_rate": 1.4820512820512823e-05,
"loss": 0.2017,
"step": 5550
},
{
"epoch": 6.139867841409692,
"grad_norm": 0.5338073968887329,
"learning_rate": 1.4794871794871796e-05,
"loss": 0.1875,
"step": 5575
},
{
"epoch": 6.167400881057269,
"grad_norm": 0.8311201930046082,
"learning_rate": 1.4769230769230772e-05,
"loss": 0.209,
"step": 5600
},
{
"epoch": 6.1949339207048455,
"grad_norm": 0.5197294354438782,
"learning_rate": 1.4743589743589745e-05,
"loss": 0.1704,
"step": 5625
},
{
"epoch": 6.222466960352423,
"grad_norm": 0.8122308850288391,
"learning_rate": 1.471794871794872e-05,
"loss": 0.2048,
"step": 5650
},
{
"epoch": 6.25,
"grad_norm": 0.6813929677009583,
"learning_rate": 1.4692307692307694e-05,
"loss": 0.1971,
"step": 5675
},
{
"epoch": 6.277533039647577,
"grad_norm": 0.5382255911827087,
"learning_rate": 1.4666666666666666e-05,
"loss": 0.1629,
"step": 5700
},
{
"epoch": 6.3050660792951545,
"grad_norm": 0.5841939449310303,
"learning_rate": 1.4641025641025642e-05,
"loss": 0.1939,
"step": 5725
},
{
"epoch": 6.332599118942731,
"grad_norm": 0.6143937706947327,
"learning_rate": 1.4615384615384615e-05,
"loss": 0.1787,
"step": 5750
},
{
"epoch": 6.360132158590308,
"grad_norm": 0.6379638314247131,
"learning_rate": 1.458974358974359e-05,
"loss": 0.1867,
"step": 5775
},
{
"epoch": 6.387665198237886,
"grad_norm": 0.6888879537582397,
"learning_rate": 1.4564102564102564e-05,
"loss": 0.1941,
"step": 5800
},
{
"epoch": 6.415198237885463,
"grad_norm": 0.645486056804657,
"learning_rate": 1.453846153846154e-05,
"loss": 0.1823,
"step": 5825
},
{
"epoch": 6.442731277533039,
"grad_norm": 0.5782633423805237,
"learning_rate": 1.4512820512820513e-05,
"loss": 0.2033,
"step": 5850
},
{
"epoch": 6.470264317180617,
"grad_norm": 0.81694495677948,
"learning_rate": 1.4487179487179489e-05,
"loss": 0.2154,
"step": 5875
},
{
"epoch": 6.497797356828194,
"grad_norm": 0.7232884168624878,
"learning_rate": 1.4461538461538462e-05,
"loss": 0.1968,
"step": 5900
},
{
"epoch": 6.525330396475771,
"grad_norm": 0.7122112512588501,
"learning_rate": 1.4435897435897438e-05,
"loss": 0.217,
"step": 5925
},
{
"epoch": 6.5528634361233475,
"grad_norm": 0.5657493472099304,
"learning_rate": 1.4410256410256411e-05,
"loss": 0.1989,
"step": 5950
},
{
"epoch": 6.580396475770925,
"grad_norm": 0.7421569228172302,
"learning_rate": 1.4384615384615387e-05,
"loss": 0.1993,
"step": 5975
},
{
"epoch": 6.607929515418502,
"grad_norm": 0.7630535364151001,
"learning_rate": 1.435897435897436e-05,
"loss": 0.194,
"step": 6000
},
{
"epoch": 6.607929515418502,
"eval_cer": 75.63250146160283,
"eval_loss": 0.7101058959960938,
"eval_runtime": 501.2734,
"eval_samples_per_second": 21.108,
"eval_steps_per_second": 5.279,
"eval_wer": 197.0768505421971,
"step": 6000
},
{
"epoch": 6.635462555066079,
"grad_norm": 0.5665035843849182,
"learning_rate": 1.4333333333333334e-05,
"loss": 0.1686,
"step": 6025
},
{
"epoch": 6.6629955947136565,
"grad_norm": 0.5344381928443909,
"learning_rate": 1.430769230769231e-05,
"loss": 0.2208,
"step": 6050
},
{
"epoch": 6.690528634361233,
"grad_norm": 0.8236050605773926,
"learning_rate": 1.4282051282051283e-05,
"loss": 0.228,
"step": 6075
},
{
"epoch": 6.71806167400881,
"grad_norm": 0.8301483988761902,
"learning_rate": 1.4256410256410258e-05,
"loss": 0.2233,
"step": 6100
},
{
"epoch": 6.745594713656388,
"grad_norm": 0.5735576748847961,
"learning_rate": 1.4230769230769232e-05,
"loss": 0.2112,
"step": 6125
},
{
"epoch": 6.773127753303965,
"grad_norm": 0.7066081762313843,
"learning_rate": 1.4205128205128207e-05,
"loss": 0.1978,
"step": 6150
},
{
"epoch": 6.8006607929515415,
"grad_norm": 0.6695354580879211,
"learning_rate": 1.4179487179487181e-05,
"loss": 0.2082,
"step": 6175
},
{
"epoch": 6.828193832599119,
"grad_norm": 0.6278955340385437,
"learning_rate": 1.4153846153846156e-05,
"loss": 0.1864,
"step": 6200
},
{
"epoch": 6.855726872246696,
"grad_norm": 0.7320701479911804,
"learning_rate": 1.412820512820513e-05,
"loss": 0.2168,
"step": 6225
},
{
"epoch": 6.883259911894273,
"grad_norm": 0.612777829170227,
"learning_rate": 1.4102564102564105e-05,
"loss": 0.1938,
"step": 6250
},
{
"epoch": 6.9107929515418505,
"grad_norm": 0.7008684873580933,
"learning_rate": 1.4076923076923079e-05,
"loss": 0.1862,
"step": 6275
},
{
"epoch": 6.938325991189427,
"grad_norm": 0.5621137619018555,
"learning_rate": 1.405128205128205e-05,
"loss": 0.1923,
"step": 6300
},
{
"epoch": 6.965859030837004,
"grad_norm": 0.8963515758514404,
"learning_rate": 1.4025641025641026e-05,
"loss": 0.1986,
"step": 6325
},
{
"epoch": 6.993392070484582,
"grad_norm": 0.6735339760780334,
"learning_rate": 1.4e-05,
"loss": 0.1991,
"step": 6350
},
{
"epoch": 7.020925110132159,
"grad_norm": 0.4790953993797302,
"learning_rate": 1.3974358974358975e-05,
"loss": 0.1505,
"step": 6375
},
{
"epoch": 7.048458149779735,
"grad_norm": 0.4763677716255188,
"learning_rate": 1.3948717948717949e-05,
"loss": 0.1424,
"step": 6400
},
{
"epoch": 7.075991189427313,
"grad_norm": 0.41467663645744324,
"learning_rate": 1.3923076923076924e-05,
"loss": 0.1336,
"step": 6425
},
{
"epoch": 7.10352422907489,
"grad_norm": 0.5693683624267578,
"learning_rate": 1.3897435897435898e-05,
"loss": 0.1341,
"step": 6450
},
{
"epoch": 7.131057268722467,
"grad_norm": 0.6672292947769165,
"learning_rate": 1.3871794871794873e-05,
"loss": 0.1446,
"step": 6475
},
{
"epoch": 7.158590308370044,
"grad_norm": 0.6646426320075989,
"learning_rate": 1.3846153846153847e-05,
"loss": 0.1573,
"step": 6500
},
{
"epoch": 7.186123348017621,
"grad_norm": 1.0004900693893433,
"learning_rate": 1.3820512820512822e-05,
"loss": 0.1445,
"step": 6525
},
{
"epoch": 7.213656387665198,
"grad_norm": 0.8498390913009644,
"learning_rate": 1.3794871794871796e-05,
"loss": 0.1453,
"step": 6550
},
{
"epoch": 7.241189427312776,
"grad_norm": 0.43505725264549255,
"learning_rate": 1.3769230769230771e-05,
"loss": 0.1508,
"step": 6575
},
{
"epoch": 7.2687224669603525,
"grad_norm": 0.717443585395813,
"learning_rate": 1.3743589743589745e-05,
"loss": 0.1517,
"step": 6600
},
{
"epoch": 7.296255506607929,
"grad_norm": 0.6070489287376404,
"learning_rate": 1.3717948717948718e-05,
"loss": 0.1646,
"step": 6625
},
{
"epoch": 7.323788546255507,
"grad_norm": 1.4384660720825195,
"learning_rate": 1.3692307692307694e-05,
"loss": 0.1566,
"step": 6650
},
{
"epoch": 7.351321585903084,
"grad_norm": 0.6029159426689148,
"learning_rate": 1.3666666666666667e-05,
"loss": 0.1683,
"step": 6675
},
{
"epoch": 7.378854625550661,
"grad_norm": 0.5382408499717712,
"learning_rate": 1.3641025641025643e-05,
"loss": 0.1366,
"step": 6700
},
{
"epoch": 7.406387665198238,
"grad_norm": 0.4797053039073944,
"learning_rate": 1.3615384615384616e-05,
"loss": 0.1342,
"step": 6725
},
{
"epoch": 7.433920704845815,
"grad_norm": 0.6972818970680237,
"learning_rate": 1.3589743589743592e-05,
"loss": 0.155,
"step": 6750
},
{
"epoch": 7.461453744493392,
"grad_norm": 0.5016514658927917,
"learning_rate": 1.3564102564102565e-05,
"loss": 0.1663,
"step": 6775
},
{
"epoch": 7.48898678414097,
"grad_norm": 0.618401050567627,
"learning_rate": 1.353846153846154e-05,
"loss": 0.1597,
"step": 6800
},
{
"epoch": 7.516519823788546,
"grad_norm": 0.675841748714447,
"learning_rate": 1.3512820512820514e-05,
"loss": 0.1482,
"step": 6825
},
{
"epoch": 7.544052863436123,
"grad_norm": 0.4411374628543854,
"learning_rate": 1.348717948717949e-05,
"loss": 0.1497,
"step": 6850
},
{
"epoch": 7.5715859030837,
"grad_norm": 0.7848430275917053,
"learning_rate": 1.3461538461538463e-05,
"loss": 0.1439,
"step": 6875
},
{
"epoch": 7.599118942731278,
"grad_norm": 0.6942805051803589,
"learning_rate": 1.3435897435897435e-05,
"loss": 0.178,
"step": 6900
},
{
"epoch": 7.6266519823788546,
"grad_norm": 0.48058149218559265,
"learning_rate": 1.341025641025641e-05,
"loss": 0.1429,
"step": 6925
},
{
"epoch": 7.654185022026431,
"grad_norm": 0.7218976020812988,
"learning_rate": 1.3384615384615384e-05,
"loss": 0.1804,
"step": 6950
},
{
"epoch": 7.681718061674009,
"grad_norm": 0.6133447885513306,
"learning_rate": 1.335897435897436e-05,
"loss": 0.1686,
"step": 6975
},
{
"epoch": 7.709251101321586,
"grad_norm": 0.8538093566894531,
"learning_rate": 1.3333333333333333e-05,
"loss": 0.1436,
"step": 7000
},
{
"epoch": 7.709251101321586,
"eval_cer": 103.36913782628135,
"eval_loss": 0.7327257394790649,
"eval_runtime": 545.0853,
"eval_samples_per_second": 19.412,
"eval_steps_per_second": 4.854,
"eval_wer": 235.48326261197548,
"step": 7000
},
{
"epoch": 7.736784140969163,
"grad_norm": 0.8791314363479614,
"learning_rate": 1.3307692307692309e-05,
"loss": 0.1747,
"step": 7025
},
{
"epoch": 7.76431718061674,
"grad_norm": 0.7194722294807434,
"learning_rate": 1.3282051282051282e-05,
"loss": 0.152,
"step": 7050
},
{
"epoch": 7.791850220264317,
"grad_norm": 0.6934202313423157,
"learning_rate": 1.3256410256410258e-05,
"loss": 0.1532,
"step": 7075
},
{
"epoch": 7.819383259911894,
"grad_norm": 0.8959735631942749,
"learning_rate": 1.3230769230769231e-05,
"loss": 0.1377,
"step": 7100
},
{
"epoch": 7.846916299559472,
"grad_norm": 0.6112185716629028,
"learning_rate": 1.3205128205128207e-05,
"loss": 0.1407,
"step": 7125
},
{
"epoch": 7.8744493392070485,
"grad_norm": 0.6644231081008911,
"learning_rate": 1.317948717948718e-05,
"loss": 0.1777,
"step": 7150
},
{
"epoch": 7.901982378854625,
"grad_norm": 0.4906589388847351,
"learning_rate": 1.3153846153846156e-05,
"loss": 0.1619,
"step": 7175
},
{
"epoch": 7.929515418502203,
"grad_norm": 0.6490347981452942,
"learning_rate": 1.312820512820513e-05,
"loss": 0.1727,
"step": 7200
},
{
"epoch": 7.95704845814978,
"grad_norm": 0.709720253944397,
"learning_rate": 1.3102564102564103e-05,
"loss": 0.1646,
"step": 7225
},
{
"epoch": 7.984581497797357,
"grad_norm": 0.694337785243988,
"learning_rate": 1.3076923076923078e-05,
"loss": 0.1604,
"step": 7250
},
{
"epoch": 8.012114537444933,
"grad_norm": 0.41181373596191406,
"learning_rate": 1.3051282051282052e-05,
"loss": 0.1716,
"step": 7275
},
{
"epoch": 8.039647577092511,
"grad_norm": 0.49692800641059875,
"learning_rate": 1.3025641025641027e-05,
"loss": 0.1231,
"step": 7300
},
{
"epoch": 8.067180616740089,
"grad_norm": 0.4960375130176544,
"learning_rate": 1.3000000000000001e-05,
"loss": 0.1202,
"step": 7325
},
{
"epoch": 8.094713656387665,
"grad_norm": 0.5013170838356018,
"learning_rate": 1.2974358974358976e-05,
"loss": 0.1137,
"step": 7350
},
{
"epoch": 8.122246696035242,
"grad_norm": 0.579422116279602,
"learning_rate": 1.294871794871795e-05,
"loss": 0.1129,
"step": 7375
},
{
"epoch": 8.14977973568282,
"grad_norm": 0.542116641998291,
"learning_rate": 1.2923076923076925e-05,
"loss": 0.1098,
"step": 7400
},
{
"epoch": 8.177312775330396,
"grad_norm": 0.2935909926891327,
"learning_rate": 1.2897435897435899e-05,
"loss": 0.1197,
"step": 7425
},
{
"epoch": 8.204845814977974,
"grad_norm": 0.4438793957233429,
"learning_rate": 1.2871794871794874e-05,
"loss": 0.1086,
"step": 7450
},
{
"epoch": 8.232378854625551,
"grad_norm": 0.6815393567085266,
"learning_rate": 1.2846153846153848e-05,
"loss": 0.1396,
"step": 7475
},
{
"epoch": 8.259911894273127,
"grad_norm": 0.4968509376049042,
"learning_rate": 1.2820512820512823e-05,
"loss": 0.1129,
"step": 7500
},
{
"epoch": 8.287444933920705,
"grad_norm": 0.9515237212181091,
"learning_rate": 1.2794871794871795e-05,
"loss": 0.1132,
"step": 7525
},
{
"epoch": 8.314977973568283,
"grad_norm": 0.4827808141708374,
"learning_rate": 1.2769230769230769e-05,
"loss": 0.1072,
"step": 7550
},
{
"epoch": 8.342511013215859,
"grad_norm": 0.7047274112701416,
"learning_rate": 1.2743589743589744e-05,
"loss": 0.1177,
"step": 7575
},
{
"epoch": 8.370044052863436,
"grad_norm": 0.4887017011642456,
"learning_rate": 1.2717948717948718e-05,
"loss": 0.1051,
"step": 7600
},
{
"epoch": 8.397577092511014,
"grad_norm": 0.6869075894355774,
"learning_rate": 1.2692307692307693e-05,
"loss": 0.1272,
"step": 7625
},
{
"epoch": 8.42511013215859,
"grad_norm": 0.4369036853313446,
"learning_rate": 1.2666666666666667e-05,
"loss": 0.106,
"step": 7650
},
{
"epoch": 8.452643171806168,
"grad_norm": 0.6985669732093811,
"learning_rate": 1.2641025641025642e-05,
"loss": 0.1185,
"step": 7675
},
{
"epoch": 8.480176211453745,
"grad_norm": 0.5139929056167603,
"learning_rate": 1.2615384615384616e-05,
"loss": 0.1146,
"step": 7700
},
{
"epoch": 8.507709251101321,
"grad_norm": 0.5673744082450867,
"learning_rate": 1.2589743589743591e-05,
"loss": 0.1166,
"step": 7725
},
{
"epoch": 8.535242290748899,
"grad_norm": 0.7198874354362488,
"learning_rate": 1.2564102564102565e-05,
"loss": 0.1278,
"step": 7750
},
{
"epoch": 8.562775330396477,
"grad_norm": 0.6545113325119019,
"learning_rate": 1.253846153846154e-05,
"loss": 0.1245,
"step": 7775
},
{
"epoch": 8.590308370044053,
"grad_norm": 0.667356014251709,
"learning_rate": 1.2512820512820514e-05,
"loss": 0.1314,
"step": 7800
},
{
"epoch": 8.61784140969163,
"grad_norm": 0.32821300625801086,
"learning_rate": 1.2487179487179487e-05,
"loss": 0.1325,
"step": 7825
},
{
"epoch": 8.645374449339208,
"grad_norm": 0.7761093378067017,
"learning_rate": 1.2461538461538463e-05,
"loss": 0.1069,
"step": 7850
},
{
"epoch": 8.672907488986784,
"grad_norm": 0.5627309679985046,
"learning_rate": 1.2435897435897436e-05,
"loss": 0.1046,
"step": 7875
},
{
"epoch": 8.700440528634362,
"grad_norm": 0.8482288122177124,
"learning_rate": 1.2410256410256412e-05,
"loss": 0.1305,
"step": 7900
},
{
"epoch": 8.72797356828194,
"grad_norm": 0.4097133278846741,
"learning_rate": 1.2384615384615385e-05,
"loss": 0.1262,
"step": 7925
},
{
"epoch": 8.755506607929515,
"grad_norm": 0.5583866834640503,
"learning_rate": 1.235897435897436e-05,
"loss": 0.1227,
"step": 7950
},
{
"epoch": 8.783039647577093,
"grad_norm": 0.5424619317054749,
"learning_rate": 1.2333333333333334e-05,
"loss": 0.1258,
"step": 7975
},
{
"epoch": 8.810572687224669,
"grad_norm": 0.7977835536003113,
"learning_rate": 1.230769230769231e-05,
"loss": 0.135,
"step": 8000
},
{
"epoch": 8.810572687224669,
"eval_cer": 96.63028899613678,
"eval_loss": 0.7634754180908203,
"eval_runtime": 560.1673,
"eval_samples_per_second": 18.889,
"eval_steps_per_second": 4.724,
"eval_wer": 223.13059877416316,
"step": 8000
},
{
"epoch": 8.838105726872246,
"grad_norm": 0.5561416745185852,
"learning_rate": 1.2282051282051283e-05,
"loss": 0.1158,
"step": 8025
},
{
"epoch": 8.865638766519824,
"grad_norm": 0.5646144151687622,
"learning_rate": 1.2256410256410259e-05,
"loss": 0.1298,
"step": 8050
},
{
"epoch": 8.8931718061674,
"grad_norm": 0.547580897808075,
"learning_rate": 1.2230769230769232e-05,
"loss": 0.1194,
"step": 8075
},
{
"epoch": 8.920704845814978,
"grad_norm": 0.3888353407382965,
"learning_rate": 1.2205128205128208e-05,
"loss": 0.1367,
"step": 8100
},
{
"epoch": 8.948237885462555,
"grad_norm": 0.734626054763794,
"learning_rate": 1.217948717948718e-05,
"loss": 0.1156,
"step": 8125
},
{
"epoch": 8.975770925110131,
"grad_norm": 0.4471361041069031,
"learning_rate": 1.2153846153846153e-05,
"loss": 0.1089,
"step": 8150
},
{
"epoch": 9.003303964757709,
"grad_norm": 0.4151979982852936,
"learning_rate": 1.2128205128205129e-05,
"loss": 0.1235,
"step": 8175
},
{
"epoch": 9.030837004405287,
"grad_norm": 0.6531796455383301,
"learning_rate": 1.2102564102564102e-05,
"loss": 0.0797,
"step": 8200
},
{
"epoch": 9.058370044052863,
"grad_norm": 0.31330060958862305,
"learning_rate": 1.2076923076923078e-05,
"loss": 0.0821,
"step": 8225
},
{
"epoch": 9.08590308370044,
"grad_norm": 0.6132084131240845,
"learning_rate": 1.2051282051282051e-05,
"loss": 0.0751,
"step": 8250
},
{
"epoch": 9.113436123348018,
"grad_norm": 0.6006444692611694,
"learning_rate": 1.2025641025641027e-05,
"loss": 0.0848,
"step": 8275
},
{
"epoch": 9.140969162995594,
"grad_norm": 0.4292917549610138,
"learning_rate": 1.2e-05,
"loss": 0.0933,
"step": 8300
},
{
"epoch": 9.168502202643172,
"grad_norm": 0.4369455873966217,
"learning_rate": 1.1974358974358976e-05,
"loss": 0.0744,
"step": 8325
},
{
"epoch": 9.19603524229075,
"grad_norm": 0.5301995277404785,
"learning_rate": 1.194871794871795e-05,
"loss": 0.0932,
"step": 8350
},
{
"epoch": 9.223568281938325,
"grad_norm": 0.42414310574531555,
"learning_rate": 1.1923076923076925e-05,
"loss": 0.0841,
"step": 8375
},
{
"epoch": 9.251101321585903,
"grad_norm": 0.45556119084358215,
"learning_rate": 1.1897435897435898e-05,
"loss": 0.0835,
"step": 8400
},
{
"epoch": 9.27863436123348,
"grad_norm": 0.4080977141857147,
"learning_rate": 1.1871794871794872e-05,
"loss": 0.0842,
"step": 8425
},
{
"epoch": 9.306167400881057,
"grad_norm": 0.4317820966243744,
"learning_rate": 1.1846153846153847e-05,
"loss": 0.097,
"step": 8450
},
{
"epoch": 9.333700440528634,
"grad_norm": 0.5324757695198059,
"learning_rate": 1.1820512820512821e-05,
"loss": 0.0853,
"step": 8475
},
{
"epoch": 9.361233480176212,
"grad_norm": 0.336733877658844,
"learning_rate": 1.1794871794871796e-05,
"loss": 0.0933,
"step": 8500
},
{
"epoch": 9.388766519823788,
"grad_norm": 0.28637856245040894,
"learning_rate": 1.176923076923077e-05,
"loss": 0.0891,
"step": 8525
},
{
"epoch": 9.416299559471366,
"grad_norm": 0.480830579996109,
"learning_rate": 1.1743589743589745e-05,
"loss": 0.0945,
"step": 8550
},
{
"epoch": 9.443832599118943,
"grad_norm": 0.44252699613571167,
"learning_rate": 1.1717948717948719e-05,
"loss": 0.0886,
"step": 8575
},
{
"epoch": 9.47136563876652,
"grad_norm": 0.5835415720939636,
"learning_rate": 1.1692307692307694e-05,
"loss": 0.0904,
"step": 8600
},
{
"epoch": 9.498898678414097,
"grad_norm": 0.6899629235267639,
"learning_rate": 1.1666666666666668e-05,
"loss": 0.1032,
"step": 8625
},
{
"epoch": 9.526431718061675,
"grad_norm": 0.3765935003757477,
"learning_rate": 1.1641025641025643e-05,
"loss": 0.105,
"step": 8650
},
{
"epoch": 9.55396475770925,
"grad_norm": 0.5741687417030334,
"learning_rate": 1.1615384615384617e-05,
"loss": 0.0963,
"step": 8675
},
{
"epoch": 9.581497797356828,
"grad_norm": 0.6885107159614563,
"learning_rate": 1.1589743589743592e-05,
"loss": 0.079,
"step": 8700
},
{
"epoch": 9.609030837004406,
"grad_norm": 0.3543364405632019,
"learning_rate": 1.1564102564102566e-05,
"loss": 0.1011,
"step": 8725
},
{
"epoch": 9.636563876651982,
"grad_norm": 0.5241756439208984,
"learning_rate": 1.1538461538461538e-05,
"loss": 0.0965,
"step": 8750
},
{
"epoch": 9.66409691629956,
"grad_norm": 0.5131816864013672,
"learning_rate": 1.1512820512820513e-05,
"loss": 0.089,
"step": 8775
},
{
"epoch": 9.691629955947137,
"grad_norm": 0.6563847064971924,
"learning_rate": 1.1487179487179487e-05,
"loss": 0.0776,
"step": 8800
},
{
"epoch": 9.719162995594713,
"grad_norm": 0.6348633766174316,
"learning_rate": 1.1461538461538462e-05,
"loss": 0.1008,
"step": 8825
},
{
"epoch": 9.746696035242291,
"grad_norm": 0.5632328987121582,
"learning_rate": 1.1435897435897436e-05,
"loss": 0.1074,
"step": 8850
},
{
"epoch": 9.774229074889869,
"grad_norm": 0.5217724442481995,
"learning_rate": 1.1410256410256411e-05,
"loss": 0.1016,
"step": 8875
},
{
"epoch": 9.801762114537445,
"grad_norm": 0.8553673624992371,
"learning_rate": 1.1384615384615385e-05,
"loss": 0.0864,
"step": 8900
},
{
"epoch": 9.829295154185022,
"grad_norm": 0.548801839351654,
"learning_rate": 1.135897435897436e-05,
"loss": 0.1079,
"step": 8925
},
{
"epoch": 9.8568281938326,
"grad_norm": 0.35070112347602844,
"learning_rate": 1.1333333333333334e-05,
"loss": 0.0976,
"step": 8950
},
{
"epoch": 9.884361233480176,
"grad_norm": 0.5965908765792847,
"learning_rate": 1.1307692307692309e-05,
"loss": 0.0864,
"step": 8975
},
{
"epoch": 9.911894273127754,
"grad_norm": 0.6857784390449524,
"learning_rate": 1.1282051282051283e-05,
"loss": 0.0854,
"step": 9000
},
{
"epoch": 9.911894273127754,
"eval_cer": 96.66926507170452,
"eval_loss": 0.784762442111969,
"eval_runtime": 542.6886,
"eval_samples_per_second": 19.497,
"eval_steps_per_second": 4.876,
"eval_wer": 235.66242338519567,
"step": 9000
},
{
"epoch": 9.939427312775331,
"grad_norm": 0.3563746213912964,
"learning_rate": 1.1256410256410258e-05,
"loss": 0.09,
"step": 9025
},
{
"epoch": 9.966960352422907,
"grad_norm": 0.8333278298377991,
"learning_rate": 1.1230769230769232e-05,
"loss": 0.1009,
"step": 9050
},
{
"epoch": 9.994493392070485,
"grad_norm": 0.5625563263893127,
"learning_rate": 1.1205128205128205e-05,
"loss": 0.0924,
"step": 9075
},
{
"epoch": 10.022026431718063,
"grad_norm": 0.27612215280532837,
"learning_rate": 1.117948717948718e-05,
"loss": 0.0642,
"step": 9100
},
{
"epoch": 10.049559471365638,
"grad_norm": 0.44427192211151123,
"learning_rate": 1.1153846153846154e-05,
"loss": 0.0609,
"step": 9125
},
{
"epoch": 10.077092511013216,
"grad_norm": 0.8023830652236938,
"learning_rate": 1.112820512820513e-05,
"loss": 0.078,
"step": 9150
},
{
"epoch": 10.104625550660794,
"grad_norm": 0.6242303252220154,
"learning_rate": 1.1102564102564103e-05,
"loss": 0.0553,
"step": 9175
},
{
"epoch": 10.13215859030837,
"grad_norm": 0.5606359243392944,
"learning_rate": 1.1076923076923079e-05,
"loss": 0.0673,
"step": 9200
},
{
"epoch": 10.159691629955947,
"grad_norm": 0.25088736414909363,
"learning_rate": 1.1051282051282052e-05,
"loss": 0.062,
"step": 9225
},
{
"epoch": 10.187224669603523,
"grad_norm": 0.4773547947406769,
"learning_rate": 1.1025641025641028e-05,
"loss": 0.0745,
"step": 9250
},
{
"epoch": 10.214757709251101,
"grad_norm": 0.19335705041885376,
"learning_rate": 1.1000000000000001e-05,
"loss": 0.0553,
"step": 9275
},
{
"epoch": 10.242290748898679,
"grad_norm": 0.32268404960632324,
"learning_rate": 1.0974358974358977e-05,
"loss": 0.0573,
"step": 9300
},
{
"epoch": 10.269823788546255,
"grad_norm": 0.6676753759384155,
"learning_rate": 1.094871794871795e-05,
"loss": 0.0686,
"step": 9325
},
{
"epoch": 10.297356828193832,
"grad_norm": 0.45940276980400085,
"learning_rate": 1.0923076923076922e-05,
"loss": 0.0699,
"step": 9350
},
{
"epoch": 10.32488986784141,
"grad_norm": 0.3542877435684204,
"learning_rate": 1.0897435897435898e-05,
"loss": 0.0839,
"step": 9375
},
{
"epoch": 10.352422907488986,
"grad_norm": 0.5135036110877991,
"learning_rate": 1.0871794871794871e-05,
"loss": 0.0592,
"step": 9400
},
{
"epoch": 10.379955947136564,
"grad_norm": 0.3040463328361511,
"learning_rate": 1.0846153846153847e-05,
"loss": 0.0794,
"step": 9425
},
{
"epoch": 10.407488986784141,
"grad_norm": 0.3832766115665436,
"learning_rate": 1.082051282051282e-05,
"loss": 0.0684,
"step": 9450
},
{
"epoch": 10.435022026431717,
"grad_norm": 0.3733726739883423,
"learning_rate": 1.0794871794871796e-05,
"loss": 0.0594,
"step": 9475
},
{
"epoch": 10.462555066079295,
"grad_norm": 0.41105160117149353,
"learning_rate": 1.076923076923077e-05,
"loss": 0.0608,
"step": 9500
},
{
"epoch": 10.490088105726873,
"grad_norm": 0.5909921526908875,
"learning_rate": 1.0743589743589745e-05,
"loss": 0.0815,
"step": 9525
},
{
"epoch": 10.517621145374449,
"grad_norm": 0.3582472801208496,
"learning_rate": 1.0717948717948718e-05,
"loss": 0.0612,
"step": 9550
},
{
"epoch": 10.545154185022026,
"grad_norm": 0.30810996890068054,
"learning_rate": 1.0692307692307694e-05,
"loss": 0.0669,
"step": 9575
},
{
"epoch": 10.572687224669604,
"grad_norm": 0.44238853454589844,
"learning_rate": 1.0666666666666667e-05,
"loss": 0.0648,
"step": 9600
},
{
"epoch": 10.60022026431718,
"grad_norm": 0.6048989295959473,
"learning_rate": 1.0641025641025643e-05,
"loss": 0.071,
"step": 9625
},
{
"epoch": 10.627753303964758,
"grad_norm": 0.3572470545768738,
"learning_rate": 1.0615384615384616e-05,
"loss": 0.0725,
"step": 9650
},
{
"epoch": 10.655286343612335,
"grad_norm": 0.4658237099647522,
"learning_rate": 1.058974358974359e-05,
"loss": 0.0815,
"step": 9675
},
{
"epoch": 10.682819383259911,
"grad_norm": 1.4714276790618896,
"learning_rate": 1.0564102564102565e-05,
"loss": 0.0769,
"step": 9700
},
{
"epoch": 10.710352422907489,
"grad_norm": 0.3880234360694885,
"learning_rate": 1.0538461538461539e-05,
"loss": 0.0779,
"step": 9725
},
{
"epoch": 10.737885462555067,
"grad_norm": 0.28868013620376587,
"learning_rate": 1.0512820512820514e-05,
"loss": 0.0737,
"step": 9750
},
{
"epoch": 10.765418502202643,
"grad_norm": 0.2880098521709442,
"learning_rate": 1.0487179487179488e-05,
"loss": 0.0634,
"step": 9775
},
{
"epoch": 10.79295154185022,
"grad_norm": 0.5987436175346375,
"learning_rate": 1.0461538461538463e-05,
"loss": 0.0744,
"step": 9800
},
{
"epoch": 10.820484581497798,
"grad_norm": 0.58491051197052,
"learning_rate": 1.0435897435897437e-05,
"loss": 0.0688,
"step": 9825
},
{
"epoch": 10.848017621145374,
"grad_norm": 0.3418164551258087,
"learning_rate": 1.0410256410256412e-05,
"loss": 0.0643,
"step": 9850
},
{
"epoch": 10.875550660792952,
"grad_norm": 0.475784569978714,
"learning_rate": 1.0384615384615386e-05,
"loss": 0.0729,
"step": 9875
},
{
"epoch": 10.90308370044053,
"grad_norm": 0.713233232498169,
"learning_rate": 1.0358974358974361e-05,
"loss": 0.0531,
"step": 9900
},
{
"epoch": 10.930616740088105,
"grad_norm": 0.5004772543907166,
"learning_rate": 1.0333333333333335e-05,
"loss": 0.0573,
"step": 9925
},
{
"epoch": 10.958149779735683,
"grad_norm": 0.3708213269710541,
"learning_rate": 1.0307692307692307e-05,
"loss": 0.0716,
"step": 9950
},
{
"epoch": 10.98568281938326,
"grad_norm": 0.41214606165885925,
"learning_rate": 1.0282051282051282e-05,
"loss": 0.0793,
"step": 9975
},
{
"epoch": 11.013215859030836,
"grad_norm": 0.15892280638217926,
"learning_rate": 1.0256410256410256e-05,
"loss": 0.062,
"step": 10000
},
{
"epoch": 11.013215859030836,
"eval_cer": 83.99286967088143,
"eval_loss": 0.8101971745491028,
"eval_runtime": 534.2549,
"eval_samples_per_second": 19.805,
"eval_steps_per_second": 4.953,
"eval_wer": 199.8114097123998,
"step": 10000
},
{
"epoch": 11.040748898678414,
"grad_norm": 0.3245684504508972,
"learning_rate": 1.0230769230769231e-05,
"loss": 0.0495,
"step": 10025
},
{
"epoch": 11.068281938325992,
"grad_norm": 0.3588270843029022,
"learning_rate": 1.0205128205128205e-05,
"loss": 0.0501,
"step": 10050
},
{
"epoch": 11.095814977973568,
"grad_norm": 0.2821900248527527,
"learning_rate": 1.017948717948718e-05,
"loss": 0.0522,
"step": 10075
},
{
"epoch": 11.123348017621145,
"grad_norm": 0.22381822764873505,
"learning_rate": 1.0153846153846154e-05,
"loss": 0.0511,
"step": 10100
},
{
"epoch": 11.150881057268723,
"grad_norm": 0.5374503135681152,
"learning_rate": 1.012820512820513e-05,
"loss": 0.0423,
"step": 10125
},
{
"epoch": 11.178414096916299,
"grad_norm": 0.6062394380569458,
"learning_rate": 1.0102564102564103e-05,
"loss": 0.0473,
"step": 10150
},
{
"epoch": 11.205947136563877,
"grad_norm": 0.47851529717445374,
"learning_rate": 1.0076923076923078e-05,
"loss": 0.0472,
"step": 10175
},
{
"epoch": 11.233480176211454,
"grad_norm": 0.3368567228317261,
"learning_rate": 1.0051282051282052e-05,
"loss": 0.048,
"step": 10200
},
{
"epoch": 11.26101321585903,
"grad_norm": 0.37278836965560913,
"learning_rate": 1.0025641025641027e-05,
"loss": 0.0475,
"step": 10225
},
{
"epoch": 11.288546255506608,
"grad_norm": 0.5743754506111145,
"learning_rate": 1e-05,
"loss": 0.0416,
"step": 10250
},
{
"epoch": 11.316079295154186,
"grad_norm": 0.3918323814868927,
"learning_rate": 9.974358974358974e-06,
"loss": 0.0511,
"step": 10275
},
{
"epoch": 11.343612334801762,
"grad_norm": 0.36391782760620117,
"learning_rate": 9.94871794871795e-06,
"loss": 0.0506,
"step": 10300
},
{
"epoch": 11.37114537444934,
"grad_norm": 0.24007482826709747,
"learning_rate": 9.923076923076923e-06,
"loss": 0.0451,
"step": 10325
},
{
"epoch": 11.398678414096917,
"grad_norm": 0.2574377954006195,
"learning_rate": 9.897435897435899e-06,
"loss": 0.0516,
"step": 10350
},
{
"epoch": 11.426211453744493,
"grad_norm": 0.212848961353302,
"learning_rate": 9.871794871794872e-06,
"loss": 0.0435,
"step": 10375
},
{
"epoch": 11.45374449339207,
"grad_norm": 0.2388388067483902,
"learning_rate": 9.846153846153848e-06,
"loss": 0.0571,
"step": 10400
},
{
"epoch": 11.481277533039648,
"grad_norm": 0.441074013710022,
"learning_rate": 9.820512820512821e-06,
"loss": 0.0475,
"step": 10425
},
{
"epoch": 11.508810572687224,
"grad_norm": 0.23038113117218018,
"learning_rate": 9.794871794871795e-06,
"loss": 0.053,
"step": 10450
},
{
"epoch": 11.536343612334802,
"grad_norm": 0.42329710721969604,
"learning_rate": 9.76923076923077e-06,
"loss": 0.0411,
"step": 10475
},
{
"epoch": 11.56387665198238,
"grad_norm": 0.2476469725370407,
"learning_rate": 9.743589743589744e-06,
"loss": 0.0468,
"step": 10500
},
{
"epoch": 11.591409691629956,
"grad_norm": 0.3586748540401459,
"learning_rate": 9.71794871794872e-06,
"loss": 0.0551,
"step": 10525
},
{
"epoch": 11.618942731277533,
"grad_norm": 0.465305894613266,
"learning_rate": 9.692307692307693e-06,
"loss": 0.0504,
"step": 10550
},
{
"epoch": 11.646475770925111,
"grad_norm": 0.5798656344413757,
"learning_rate": 9.666666666666667e-06,
"loss": 0.0526,
"step": 10575
},
{
"epoch": 11.674008810572687,
"grad_norm": 0.21707002818584442,
"learning_rate": 9.641025641025642e-06,
"loss": 0.0441,
"step": 10600
},
{
"epoch": 11.701541850220265,
"grad_norm": 0.2580870985984802,
"learning_rate": 9.615384615384616e-06,
"loss": 0.0505,
"step": 10625
},
{
"epoch": 11.729074889867842,
"grad_norm": 0.23660093545913696,
"learning_rate": 9.589743589743591e-06,
"loss": 0.0502,
"step": 10650
},
{
"epoch": 11.756607929515418,
"grad_norm": 0.40983569622039795,
"learning_rate": 9.564102564102565e-06,
"loss": 0.0506,
"step": 10675
},
{
"epoch": 11.784140969162996,
"grad_norm": 0.5480314493179321,
"learning_rate": 9.53846153846154e-06,
"loss": 0.0538,
"step": 10700
},
{
"epoch": 11.811674008810574,
"grad_norm": 0.23049433529376984,
"learning_rate": 9.512820512820514e-06,
"loss": 0.0441,
"step": 10725
},
{
"epoch": 11.83920704845815,
"grad_norm": 0.2648046314716339,
"learning_rate": 9.487179487179487e-06,
"loss": 0.0573,
"step": 10750
},
{
"epoch": 11.866740088105727,
"grad_norm": 0.19929298758506775,
"learning_rate": 9.461538461538463e-06,
"loss": 0.0437,
"step": 10775
},
{
"epoch": 11.894273127753303,
"grad_norm": 0.43784064054489136,
"learning_rate": 9.435897435897436e-06,
"loss": 0.0506,
"step": 10800
},
{
"epoch": 11.92180616740088,
"grad_norm": 0.33534079790115356,
"learning_rate": 9.410256410256412e-06,
"loss": 0.0447,
"step": 10825
},
{
"epoch": 11.949339207048459,
"grad_norm": 0.41424134373664856,
"learning_rate": 9.384615384615385e-06,
"loss": 0.0479,
"step": 10850
},
{
"epoch": 11.976872246696034,
"grad_norm": 0.46953722834587097,
"learning_rate": 9.358974358974359e-06,
"loss": 0.0642,
"step": 10875
},
{
"epoch": 12.004405286343612,
"grad_norm": 0.2814580798149109,
"learning_rate": 9.333333333333334e-06,
"loss": 0.0539,
"step": 10900
},
{
"epoch": 12.03193832599119,
"grad_norm": 0.3269638419151306,
"learning_rate": 9.307692307692308e-06,
"loss": 0.0319,
"step": 10925
},
{
"epoch": 12.059471365638766,
"grad_norm": 0.3257371783256531,
"learning_rate": 9.282051282051283e-06,
"loss": 0.0341,
"step": 10950
},
{
"epoch": 12.087004405286343,
"grad_norm": 0.3003399074077606,
"learning_rate": 9.256410256410257e-06,
"loss": 0.0334,
"step": 10975
},
{
"epoch": 12.114537444933921,
"grad_norm": 0.23329438269138336,
"learning_rate": 9.230769230769232e-06,
"loss": 0.0299,
"step": 11000
},
{
"epoch": 12.114537444933921,
"eval_cer": 102.80570426329486,
"eval_loss": 0.8363927602767944,
"eval_runtime": 573.3468,
"eval_samples_per_second": 18.455,
"eval_steps_per_second": 4.615,
"eval_wer": 177.04856199905706,
"step": 11000
},
{
"epoch": 12.142070484581497,
"grad_norm": 0.3134128749370575,
"learning_rate": 9.205128205128206e-06,
"loss": 0.0351,
"step": 11025
},
{
"epoch": 12.169603524229075,
"grad_norm": 0.19677399098873138,
"learning_rate": 9.17948717948718e-06,
"loss": 0.0308,
"step": 11050
},
{
"epoch": 12.197136563876652,
"grad_norm": 0.22807055711746216,
"learning_rate": 9.153846153846155e-06,
"loss": 0.0398,
"step": 11075
},
{
"epoch": 12.224669603524228,
"grad_norm": 0.23452354967594147,
"learning_rate": 9.128205128205129e-06,
"loss": 0.0337,
"step": 11100
},
{
"epoch": 12.252202643171806,
"grad_norm": 0.21507778763771057,
"learning_rate": 9.102564102564104e-06,
"loss": 0.0343,
"step": 11125
},
{
"epoch": 12.279735682819384,
"grad_norm": 0.5566434264183044,
"learning_rate": 9.076923076923078e-06,
"loss": 0.0339,
"step": 11150
},
{
"epoch": 12.30726872246696,
"grad_norm": 0.17659461498260498,
"learning_rate": 9.051282051282051e-06,
"loss": 0.0325,
"step": 11175
},
{
"epoch": 12.334801762114537,
"grad_norm": 0.2903195023536682,
"learning_rate": 9.025641025641027e-06,
"loss": 0.0332,
"step": 11200
},
{
"epoch": 12.362334801762115,
"grad_norm": 0.20203354954719543,
"learning_rate": 9e-06,
"loss": 0.0279,
"step": 11225
},
{
"epoch": 12.389867841409691,
"grad_norm": 0.25275635719299316,
"learning_rate": 8.974358974358976e-06,
"loss": 0.0329,
"step": 11250
},
{
"epoch": 12.417400881057269,
"grad_norm": 0.16091254353523254,
"learning_rate": 8.94871794871795e-06,
"loss": 0.0322,
"step": 11275
},
{
"epoch": 12.444933920704846,
"grad_norm": 0.2583257853984833,
"learning_rate": 8.923076923076925e-06,
"loss": 0.0323,
"step": 11300
},
{
"epoch": 12.472466960352422,
"grad_norm": 0.3744211494922638,
"learning_rate": 8.897435897435898e-06,
"loss": 0.035,
"step": 11325
},
{
"epoch": 12.5,
"grad_norm": 0.37432727217674255,
"learning_rate": 8.871794871794872e-06,
"loss": 0.0408,
"step": 11350
},
{
"epoch": 12.527533039647578,
"grad_norm": 0.22354626655578613,
"learning_rate": 8.846153846153847e-06,
"loss": 0.0394,
"step": 11375
},
{
"epoch": 12.555066079295154,
"grad_norm": 0.20891696214675903,
"learning_rate": 8.820512820512821e-06,
"loss": 0.0412,
"step": 11400
},
{
"epoch": 12.582599118942731,
"grad_norm": 0.31616201996803284,
"learning_rate": 8.794871794871796e-06,
"loss": 0.0395,
"step": 11425
},
{
"epoch": 12.610132158590309,
"grad_norm": 0.2666582465171814,
"learning_rate": 8.76923076923077e-06,
"loss": 0.0345,
"step": 11450
},
{
"epoch": 12.637665198237885,
"grad_norm": 0.41860589385032654,
"learning_rate": 8.743589743589743e-06,
"loss": 0.0374,
"step": 11475
},
{
"epoch": 12.665198237885463,
"grad_norm": 1.188323974609375,
"learning_rate": 8.717948717948719e-06,
"loss": 0.0382,
"step": 11500
},
{
"epoch": 12.69273127753304,
"grad_norm": 0.4103492498397827,
"learning_rate": 8.692307692307692e-06,
"loss": 0.0363,
"step": 11525
},
{
"epoch": 12.720264317180616,
"grad_norm": 0.19665437936782837,
"learning_rate": 8.666666666666668e-06,
"loss": 0.038,
"step": 11550
},
{
"epoch": 12.747797356828194,
"grad_norm": 0.2887498140335083,
"learning_rate": 8.641025641025641e-06,
"loss": 0.0343,
"step": 11575
},
{
"epoch": 12.775330396475772,
"grad_norm": 0.34571361541748047,
"learning_rate": 8.615384615384617e-06,
"loss": 0.0363,
"step": 11600
},
{
"epoch": 12.802863436123348,
"grad_norm": 0.4487113058567047,
"learning_rate": 8.58974358974359e-06,
"loss": 0.0374,
"step": 11625
},
{
"epoch": 12.830396475770925,
"grad_norm": 0.3439520001411438,
"learning_rate": 8.564102564102564e-06,
"loss": 0.0336,
"step": 11650
},
{
"epoch": 12.857929515418503,
"grad_norm": 0.4911608099937439,
"learning_rate": 8.53846153846154e-06,
"loss": 0.0346,
"step": 11675
},
{
"epoch": 12.885462555066079,
"grad_norm": 0.15864339470863342,
"learning_rate": 8.512820512820513e-06,
"loss": 0.0377,
"step": 11700
},
{
"epoch": 12.912995594713657,
"grad_norm": 0.20187804102897644,
"learning_rate": 8.487179487179488e-06,
"loss": 0.0379,
"step": 11725
},
{
"epoch": 12.940528634361234,
"grad_norm": 0.402381956577301,
"learning_rate": 8.461538461538462e-06,
"loss": 0.0432,
"step": 11750
},
{
"epoch": 12.96806167400881,
"grad_norm": 0.39829185605049133,
"learning_rate": 8.435897435897436e-06,
"loss": 0.0394,
"step": 11775
},
{
"epoch": 12.995594713656388,
"grad_norm": 0.28365182876586914,
"learning_rate": 8.410256410256411e-06,
"loss": 0.0416,
"step": 11800
},
{
"epoch": 13.023127753303966,
"grad_norm": 0.20419412851333618,
"learning_rate": 8.384615384615385e-06,
"loss": 0.0281,
"step": 11825
},
{
"epoch": 13.050660792951541,
"grad_norm": 0.6308184266090393,
"learning_rate": 8.35897435897436e-06,
"loss": 0.0274,
"step": 11850
},
{
"epoch": 13.07819383259912,
"grad_norm": 0.14284294843673706,
"learning_rate": 8.333333333333334e-06,
"loss": 0.0262,
"step": 11875
},
{
"epoch": 13.105726872246697,
"grad_norm": 0.25530532002449036,
"learning_rate": 8.307692307692309e-06,
"loss": 0.0243,
"step": 11900
},
{
"epoch": 13.133259911894273,
"grad_norm": 0.22208963334560394,
"learning_rate": 8.282051282051283e-06,
"loss": 0.0271,
"step": 11925
},
{
"epoch": 13.16079295154185,
"grad_norm": 0.14056318998336792,
"learning_rate": 8.256410256410256e-06,
"loss": 0.0235,
"step": 11950
},
{
"epoch": 13.188325991189428,
"grad_norm": 0.5111184120178223,
"learning_rate": 8.230769230769232e-06,
"loss": 0.0255,
"step": 11975
},
{
"epoch": 13.215859030837004,
"grad_norm": 0.40052205324172974,
"learning_rate": 8.205128205128205e-06,
"loss": 0.0254,
"step": 12000
},
{
"epoch": 13.215859030837004,
"eval_cer": 85.54675409535383,
"eval_loss": 0.8552005290985107,
"eval_runtime": 512.197,
"eval_samples_per_second": 20.658,
"eval_steps_per_second": 5.166,
"eval_wer": 176.0867515322961,
"step": 12000
},
{
"epoch": 13.243392070484582,
"grad_norm": 0.37046974897384644,
"learning_rate": 8.17948717948718e-06,
"loss": 0.0281,
"step": 12025
},
{
"epoch": 13.270925110132158,
"grad_norm": 0.18737167119979858,
"learning_rate": 8.153846153846154e-06,
"loss": 0.0212,
"step": 12050
},
{
"epoch": 13.298458149779735,
"grad_norm": 0.22969557344913483,
"learning_rate": 8.12820512820513e-06,
"loss": 0.0233,
"step": 12075
},
{
"epoch": 13.325991189427313,
"grad_norm": 0.24471865594387054,
"learning_rate": 8.102564102564103e-06,
"loss": 0.0273,
"step": 12100
},
{
"epoch": 13.353524229074889,
"grad_norm": 0.1323070228099823,
"learning_rate": 8.076923076923077e-06,
"loss": 0.0273,
"step": 12125
},
{
"epoch": 13.381057268722467,
"grad_norm": 0.19890151917934418,
"learning_rate": 8.051282051282052e-06,
"loss": 0.0267,
"step": 12150
},
{
"epoch": 13.408590308370044,
"grad_norm": 0.18089306354522705,
"learning_rate": 8.025641025641026e-06,
"loss": 0.0256,
"step": 12175
},
{
"epoch": 13.43612334801762,
"grad_norm": 0.3312578499317169,
"learning_rate": 8.000000000000001e-06,
"loss": 0.0238,
"step": 12200
},
{
"epoch": 13.463656387665198,
"grad_norm": 0.372565358877182,
"learning_rate": 7.974358974358975e-06,
"loss": 0.0279,
"step": 12225
},
{
"epoch": 13.491189427312776,
"grad_norm": 0.16830548644065857,
"learning_rate": 7.948717948717949e-06,
"loss": 0.023,
"step": 12250
},
{
"epoch": 13.518722466960352,
"grad_norm": 0.16041933000087738,
"learning_rate": 7.923076923076924e-06,
"loss": 0.027,
"step": 12275
},
{
"epoch": 13.54625550660793,
"grad_norm": 0.3963809013366699,
"learning_rate": 7.897435897435898e-06,
"loss": 0.0307,
"step": 12300
},
{
"epoch": 13.573788546255507,
"grad_norm": 0.2639409899711609,
"learning_rate": 7.871794871794873e-06,
"loss": 0.024,
"step": 12325
},
{
"epoch": 13.601321585903083,
"grad_norm": 0.15676841139793396,
"learning_rate": 7.846153846153847e-06,
"loss": 0.0239,
"step": 12350
},
{
"epoch": 13.62885462555066,
"grad_norm": 0.314523309469223,
"learning_rate": 7.820512820512822e-06,
"loss": 0.0274,
"step": 12375
},
{
"epoch": 13.656387665198238,
"grad_norm": 0.20265546441078186,
"learning_rate": 7.794871794871796e-06,
"loss": 0.0264,
"step": 12400
},
{
"epoch": 13.683920704845814,
"grad_norm": 0.11325351148843765,
"learning_rate": 7.76923076923077e-06,
"loss": 0.0248,
"step": 12425
},
{
"epoch": 13.711453744493392,
"grad_norm": 0.16151660680770874,
"learning_rate": 7.743589743589745e-06,
"loss": 0.0261,
"step": 12450
},
{
"epoch": 13.73898678414097,
"grad_norm": 0.2732401192188263,
"learning_rate": 7.717948717948718e-06,
"loss": 0.0267,
"step": 12475
},
{
"epoch": 13.766519823788546,
"grad_norm": 0.37177756428718567,
"learning_rate": 7.692307692307694e-06,
"loss": 0.0293,
"step": 12500
},
{
"epoch": 13.794052863436123,
"grad_norm": 0.12377744168043137,
"learning_rate": 7.666666666666667e-06,
"loss": 0.0254,
"step": 12525
},
{
"epoch": 13.821585903083701,
"grad_norm": 0.3608620762825012,
"learning_rate": 7.641025641025641e-06,
"loss": 0.0296,
"step": 12550
},
{
"epoch": 13.849118942731277,
"grad_norm": 0.17188109457492828,
"learning_rate": 7.615384615384615e-06,
"loss": 0.0212,
"step": 12575
},
{
"epoch": 13.876651982378855,
"grad_norm": 0.1362937092781067,
"learning_rate": 7.58974358974359e-06,
"loss": 0.0259,
"step": 12600
},
{
"epoch": 13.904185022026432,
"grad_norm": 0.1876504123210907,
"learning_rate": 7.564102564102564e-06,
"loss": 0.0273,
"step": 12625
},
{
"epoch": 13.931718061674008,
"grad_norm": 0.14184896647930145,
"learning_rate": 7.538461538461539e-06,
"loss": 0.0233,
"step": 12650
},
{
"epoch": 13.959251101321586,
"grad_norm": 0.17070193588733673,
"learning_rate": 7.512820512820513e-06,
"loss": 0.0235,
"step": 12675
},
{
"epoch": 13.986784140969164,
"grad_norm": 0.3402119576931,
"learning_rate": 7.487179487179488e-06,
"loss": 0.0294,
"step": 12700
},
{
"epoch": 14.01431718061674,
"grad_norm": 0.11597840487957001,
"learning_rate": 7.461538461538462e-06,
"loss": 0.0217,
"step": 12725
},
{
"epoch": 14.041850220264317,
"grad_norm": 0.26662012934684753,
"learning_rate": 7.435897435897437e-06,
"loss": 0.0162,
"step": 12750
},
{
"epoch": 14.069383259911895,
"grad_norm": 0.09315423667430878,
"learning_rate": 7.410256410256411e-06,
"loss": 0.0184,
"step": 12775
},
{
"epoch": 14.09691629955947,
"grad_norm": 0.34267550706863403,
"learning_rate": 7.384615384615386e-06,
"loss": 0.0169,
"step": 12800
},
{
"epoch": 14.124449339207048,
"grad_norm": 0.2209981232881546,
"learning_rate": 7.35897435897436e-06,
"loss": 0.0163,
"step": 12825
},
{
"epoch": 14.151982378854626,
"grad_norm": 0.2604495882987976,
"learning_rate": 7.333333333333333e-06,
"loss": 0.0194,
"step": 12850
},
{
"epoch": 14.179515418502202,
"grad_norm": 0.10924118757247925,
"learning_rate": 7.307692307692308e-06,
"loss": 0.0149,
"step": 12875
},
{
"epoch": 14.20704845814978,
"grad_norm": 0.10360319912433624,
"learning_rate": 7.282051282051282e-06,
"loss": 0.0177,
"step": 12900
},
{
"epoch": 14.234581497797357,
"grad_norm": 0.20702053606510162,
"learning_rate": 7.256410256410257e-06,
"loss": 0.0179,
"step": 12925
},
{
"epoch": 14.262114537444933,
"grad_norm": 0.22959311306476593,
"learning_rate": 7.230769230769231e-06,
"loss": 0.017,
"step": 12950
},
{
"epoch": 14.289647577092511,
"grad_norm": 0.10817913711071014,
"learning_rate": 7.205128205128206e-06,
"loss": 0.018,
"step": 12975
},
{
"epoch": 14.317180616740089,
"grad_norm": 0.1359902173280716,
"learning_rate": 7.17948717948718e-06,
"loss": 0.0196,
"step": 13000
},
{
"epoch": 14.317180616740089,
"eval_cer": 60.44272236424288,
"eval_loss": 0.8670655488967896,
"eval_runtime": 479.7498,
"eval_samples_per_second": 22.055,
"eval_steps_per_second": 5.515,
"eval_wer": 126.28005657708627,
"step": 13000
},
{
"epoch": 14.344713656387665,
"grad_norm": 0.21668480336666107,
"learning_rate": 7.153846153846155e-06,
"loss": 0.0185,
"step": 13025
},
{
"epoch": 14.372246696035242,
"grad_norm": 0.12029585987329483,
"learning_rate": 7.128205128205129e-06,
"loss": 0.0181,
"step": 13050
},
{
"epoch": 14.39977973568282,
"grad_norm": 0.2249136120080948,
"learning_rate": 7.102564102564104e-06,
"loss": 0.0194,
"step": 13075
},
{
"epoch": 14.427312775330396,
"grad_norm": 0.1638566255569458,
"learning_rate": 7.076923076923078e-06,
"loss": 0.0198,
"step": 13100
},
{
"epoch": 14.454845814977974,
"grad_norm": 0.10072775185108185,
"learning_rate": 7.051282051282053e-06,
"loss": 0.0196,
"step": 13125
},
{
"epoch": 14.482378854625551,
"grad_norm": 0.3090708553791046,
"learning_rate": 7.025641025641025e-06,
"loss": 0.018,
"step": 13150
},
{
"epoch": 14.509911894273127,
"grad_norm": 0.2770833373069763,
"learning_rate": 7e-06,
"loss": 0.0186,
"step": 13175
},
{
"epoch": 14.537444933920705,
"grad_norm": 0.37222012877464294,
"learning_rate": 6.974358974358974e-06,
"loss": 0.022,
"step": 13200
},
{
"epoch": 14.564977973568283,
"grad_norm": 0.11628738045692444,
"learning_rate": 6.948717948717949e-06,
"loss": 0.019,
"step": 13225
},
{
"epoch": 14.592511013215859,
"grad_norm": 0.3900822401046753,
"learning_rate": 6.923076923076923e-06,
"loss": 0.0205,
"step": 13250
},
{
"epoch": 14.620044052863436,
"grad_norm": 0.1428934782743454,
"learning_rate": 6.897435897435898e-06,
"loss": 0.0186,
"step": 13275
},
{
"epoch": 14.647577092511014,
"grad_norm": 0.25180259346961975,
"learning_rate": 6.871794871794872e-06,
"loss": 0.0214,
"step": 13300
},
{
"epoch": 14.67511013215859,
"grad_norm": 0.2877309024333954,
"learning_rate": 6.846153846153847e-06,
"loss": 0.0218,
"step": 13325
},
{
"epoch": 14.702643171806168,
"grad_norm": 0.21245427429676056,
"learning_rate": 6.820512820512821e-06,
"loss": 0.0192,
"step": 13350
},
{
"epoch": 14.730176211453745,
"grad_norm": 0.33086642622947693,
"learning_rate": 6.794871794871796e-06,
"loss": 0.0194,
"step": 13375
},
{
"epoch": 14.757709251101321,
"grad_norm": 0.21538333594799042,
"learning_rate": 6.76923076923077e-06,
"loss": 0.0208,
"step": 13400
},
{
"epoch": 14.785242290748899,
"grad_norm": 0.1178901195526123,
"learning_rate": 6.743589743589745e-06,
"loss": 0.0177,
"step": 13425
},
{
"epoch": 14.812775330396477,
"grad_norm": 0.13401082158088684,
"learning_rate": 6.717948717948718e-06,
"loss": 0.0204,
"step": 13450
},
{
"epoch": 14.840308370044053,
"grad_norm": 0.1349448412656784,
"learning_rate": 6.692307692307692e-06,
"loss": 0.0151,
"step": 13475
},
{
"epoch": 14.86784140969163,
"grad_norm": 0.10409779101610184,
"learning_rate": 6.666666666666667e-06,
"loss": 0.021,
"step": 13500
},
{
"epoch": 14.895374449339208,
"grad_norm": 0.11943039298057556,
"learning_rate": 6.641025641025641e-06,
"loss": 0.0188,
"step": 13525
},
{
"epoch": 14.922907488986784,
"grad_norm": 0.15288065373897552,
"learning_rate": 6.615384615384616e-06,
"loss": 0.0166,
"step": 13550
},
{
"epoch": 14.950440528634362,
"grad_norm": 0.3485194146633148,
"learning_rate": 6.58974358974359e-06,
"loss": 0.0194,
"step": 13575
},
{
"epoch": 14.97797356828194,
"grad_norm": 0.23302164673805237,
"learning_rate": 6.564102564102565e-06,
"loss": 0.0198,
"step": 13600
},
{
"epoch": 15.005506607929515,
"grad_norm": 0.18940000236034393,
"learning_rate": 6.538461538461539e-06,
"loss": 0.0211,
"step": 13625
},
{
"epoch": 15.033039647577093,
"grad_norm": 0.17743512988090515,
"learning_rate": 6.512820512820514e-06,
"loss": 0.0147,
"step": 13650
},
{
"epoch": 15.060572687224669,
"grad_norm": 0.1025228500366211,
"learning_rate": 6.487179487179488e-06,
"loss": 0.0137,
"step": 13675
},
{
"epoch": 15.088105726872246,
"grad_norm": 0.08102612942457199,
"learning_rate": 6.461538461538463e-06,
"loss": 0.0134,
"step": 13700
},
{
"epoch": 15.115638766519824,
"grad_norm": 0.11834702640771866,
"learning_rate": 6.435897435897437e-06,
"loss": 0.0136,
"step": 13725
},
{
"epoch": 15.1431718061674,
"grad_norm": 0.09455480426549911,
"learning_rate": 6.410256410256412e-06,
"loss": 0.014,
"step": 13750
},
{
"epoch": 15.170704845814978,
"grad_norm": 0.10078572481870651,
"learning_rate": 6.384615384615384e-06,
"loss": 0.012,
"step": 13775
},
{
"epoch": 15.198237885462555,
"grad_norm": 0.10887516289949417,
"learning_rate": 6.358974358974359e-06,
"loss": 0.0131,
"step": 13800
},
{
"epoch": 15.225770925110131,
"grad_norm": 0.07745672762393951,
"learning_rate": 6.333333333333333e-06,
"loss": 0.0149,
"step": 13825
},
{
"epoch": 15.253303964757709,
"grad_norm": 0.5340884923934937,
"learning_rate": 6.307692307692308e-06,
"loss": 0.0165,
"step": 13850
},
{
"epoch": 15.280837004405287,
"grad_norm": 0.105720154941082,
"learning_rate": 6.282051282051282e-06,
"loss": 0.0131,
"step": 13875
},
{
"epoch": 15.308370044052863,
"grad_norm": 0.2738574743270874,
"learning_rate": 6.256410256410257e-06,
"loss": 0.0127,
"step": 13900
},
{
"epoch": 15.33590308370044,
"grad_norm": 0.2029091864824295,
"learning_rate": 6.230769230769231e-06,
"loss": 0.0127,
"step": 13925
},
{
"epoch": 15.363436123348018,
"grad_norm": 0.09055199474096298,
"learning_rate": 6.205128205128206e-06,
"loss": 0.0153,
"step": 13950
},
{
"epoch": 15.390969162995594,
"grad_norm": 0.33734768629074097,
"learning_rate": 6.17948717948718e-06,
"loss": 0.0142,
"step": 13975
},
{
"epoch": 15.418502202643172,
"grad_norm": 0.11411110311746597,
"learning_rate": 6.153846153846155e-06,
"loss": 0.0136,
"step": 14000
},
{
"epoch": 15.418502202643172,
"eval_cer": 73.25610720713492,
"eval_loss": 0.8812981843948364,
"eval_runtime": 442.8713,
"eval_samples_per_second": 23.892,
"eval_steps_per_second": 5.975,
"eval_wer": 177.97265440829796,
"step": 14000
},
{
"epoch": 15.44603524229075,
"grad_norm": 0.21195538341999054,
"learning_rate": 6.128205128205129e-06,
"loss": 0.0142,
"step": 14025
},
{
"epoch": 15.473568281938325,
"grad_norm": 0.09307265281677246,
"learning_rate": 6.102564102564104e-06,
"loss": 0.0121,
"step": 14050
},
{
"epoch": 15.501101321585903,
"grad_norm": 0.18475420773029327,
"learning_rate": 6.076923076923077e-06,
"loss": 0.0133,
"step": 14075
},
{
"epoch": 15.52863436123348,
"grad_norm": 0.12336278706789017,
"learning_rate": 6.051282051282051e-06,
"loss": 0.0126,
"step": 14100
},
{
"epoch": 15.556167400881057,
"grad_norm": 0.18600626289844513,
"learning_rate": 6.025641025641026e-06,
"loss": 0.0122,
"step": 14125
},
{
"epoch": 15.583700440528634,
"grad_norm": 0.22498783469200134,
"learning_rate": 6e-06,
"loss": 0.0138,
"step": 14150
},
{
"epoch": 15.611233480176212,
"grad_norm": 0.10272740572690964,
"learning_rate": 5.974358974358975e-06,
"loss": 0.0146,
"step": 14175
},
{
"epoch": 15.638766519823788,
"grad_norm": 0.27834802865982056,
"learning_rate": 5.948717948717949e-06,
"loss": 0.013,
"step": 14200
},
{
"epoch": 15.666299559471366,
"grad_norm": 0.21032482385635376,
"learning_rate": 5.923076923076924e-06,
"loss": 0.0122,
"step": 14225
},
{
"epoch": 15.693832599118943,
"grad_norm": 0.12462333589792252,
"learning_rate": 5.897435897435898e-06,
"loss": 0.0142,
"step": 14250
},
{
"epoch": 15.72136563876652,
"grad_norm": 0.14421270787715912,
"learning_rate": 5.871794871794873e-06,
"loss": 0.0133,
"step": 14275
},
{
"epoch": 15.748898678414097,
"grad_norm": 0.0854870080947876,
"learning_rate": 5.846153846153847e-06,
"loss": 0.0141,
"step": 14300
},
{
"epoch": 15.776431718061675,
"grad_norm": 0.3137272298336029,
"learning_rate": 5.820512820512822e-06,
"loss": 0.0141,
"step": 14325
},
{
"epoch": 15.80396475770925,
"grad_norm": 0.42760950326919556,
"learning_rate": 5.794871794871796e-06,
"loss": 0.0162,
"step": 14350
},
{
"epoch": 15.831497797356828,
"grad_norm": 0.08184482902288437,
"learning_rate": 5.769230769230769e-06,
"loss": 0.0143,
"step": 14375
},
{
"epoch": 15.859030837004406,
"grad_norm": 0.2387475222349167,
"learning_rate": 5.743589743589743e-06,
"loss": 0.0139,
"step": 14400
},
{
"epoch": 15.886563876651982,
"grad_norm": 0.14585834741592407,
"learning_rate": 5.717948717948718e-06,
"loss": 0.0137,
"step": 14425
},
{
"epoch": 15.91409691629956,
"grad_norm": 0.22786743938922882,
"learning_rate": 5.692307692307692e-06,
"loss": 0.0142,
"step": 14450
},
{
"epoch": 15.941629955947137,
"grad_norm": 0.11104258894920349,
"learning_rate": 5.666666666666667e-06,
"loss": 0.0135,
"step": 14475
},
{
"epoch": 15.969162995594713,
"grad_norm": 0.12056852877140045,
"learning_rate": 5.641025641025641e-06,
"loss": 0.0123,
"step": 14500
},
{
"epoch": 15.996696035242291,
"grad_norm": 0.12010839581489563,
"learning_rate": 5.615384615384616e-06,
"loss": 0.0142,
"step": 14525
},
{
"epoch": 16.024229074889867,
"grad_norm": 0.14887858927249908,
"learning_rate": 5.58974358974359e-06,
"loss": 0.01,
"step": 14550
},
{
"epoch": 16.051762114537446,
"grad_norm": 0.06393441557884216,
"learning_rate": 5.564102564102565e-06,
"loss": 0.0107,
"step": 14575
},
{
"epoch": 16.079295154185022,
"grad_norm": 0.05813656747341156,
"learning_rate": 5.538461538461539e-06,
"loss": 0.0098,
"step": 14600
},
{
"epoch": 16.106828193832598,
"grad_norm": 0.30714789032936096,
"learning_rate": 5.512820512820514e-06,
"loss": 0.0103,
"step": 14625
},
{
"epoch": 16.134361233480178,
"grad_norm": 0.06188270449638367,
"learning_rate": 5.487179487179488e-06,
"loss": 0.0095,
"step": 14650
},
{
"epoch": 16.161894273127754,
"grad_norm": 0.16568152606487274,
"learning_rate": 5.461538461538461e-06,
"loss": 0.0093,
"step": 14675
},
{
"epoch": 16.18942731277533,
"grad_norm": 0.07314164191484451,
"learning_rate": 5.435897435897436e-06,
"loss": 0.0101,
"step": 14700
},
{
"epoch": 16.21696035242291,
"grad_norm": 0.06874439120292664,
"learning_rate": 5.41025641025641e-06,
"loss": 0.0102,
"step": 14725
},
{
"epoch": 16.244493392070485,
"grad_norm": 0.0864795446395874,
"learning_rate": 5.384615384615385e-06,
"loss": 0.0091,
"step": 14750
},
{
"epoch": 16.27202643171806,
"grad_norm": 0.09835877269506454,
"learning_rate": 5.358974358974359e-06,
"loss": 0.011,
"step": 14775
},
{
"epoch": 16.29955947136564,
"grad_norm": 0.18435481190681458,
"learning_rate": 5.333333333333334e-06,
"loss": 0.0106,
"step": 14800
},
{
"epoch": 16.327092511013216,
"grad_norm": 0.06160463020205498,
"learning_rate": 5.307692307692308e-06,
"loss": 0.0094,
"step": 14825
},
{
"epoch": 16.354625550660792,
"grad_norm": 0.06980791687965393,
"learning_rate": 5.282051282051283e-06,
"loss": 0.0107,
"step": 14850
},
{
"epoch": 16.38215859030837,
"grad_norm": 0.09515848010778427,
"learning_rate": 5.256410256410257e-06,
"loss": 0.0115,
"step": 14875
},
{
"epoch": 16.409691629955947,
"grad_norm": 0.11992871761322021,
"learning_rate": 5.230769230769232e-06,
"loss": 0.0102,
"step": 14900
},
{
"epoch": 16.437224669603523,
"grad_norm": 0.08134716004133224,
"learning_rate": 5.205128205128206e-06,
"loss": 0.0103,
"step": 14925
},
{
"epoch": 16.464757709251103,
"grad_norm": 0.5592033863067627,
"learning_rate": 5.179487179487181e-06,
"loss": 0.0118,
"step": 14950
},
{
"epoch": 16.49229074889868,
"grad_norm": 0.06395772844552994,
"learning_rate": 5.1538461538461534e-06,
"loss": 0.012,
"step": 14975
},
{
"epoch": 16.519823788546255,
"grad_norm": 0.24540168046951294,
"learning_rate": 5.128205128205128e-06,
"loss": 0.0102,
"step": 15000
},
{
"epoch": 16.519823788546255,
"eval_cer": 57.35444155308198,
"eval_loss": 0.8929909467697144,
"eval_runtime": 475.0118,
"eval_samples_per_second": 22.275,
"eval_steps_per_second": 5.57,
"eval_wer": 142.6968411126827,
"step": 15000
},
{
"epoch": 16.547356828193834,
"grad_norm": 0.1699555218219757,
"learning_rate": 5.1025641025641024e-06,
"loss": 0.0104,
"step": 15025
},
{
"epoch": 16.57488986784141,
"grad_norm": 0.12570585310459137,
"learning_rate": 5.076923076923077e-06,
"loss": 0.0092,
"step": 15050
},
{
"epoch": 16.602422907488986,
"grad_norm": 0.07865352183580399,
"learning_rate": 5.051282051282051e-06,
"loss": 0.0098,
"step": 15075
},
{
"epoch": 16.629955947136565,
"grad_norm": 0.12875448167324066,
"learning_rate": 5.025641025641026e-06,
"loss": 0.0104,
"step": 15100
},
{
"epoch": 16.65748898678414,
"grad_norm": 0.14089557528495789,
"learning_rate": 5e-06,
"loss": 0.0109,
"step": 15125
},
{
"epoch": 16.685022026431717,
"grad_norm": 0.07757396996021271,
"learning_rate": 4.974358974358975e-06,
"loss": 0.0092,
"step": 15150
},
{
"epoch": 16.712555066079297,
"grad_norm": 0.07462769746780396,
"learning_rate": 4.948717948717949e-06,
"loss": 0.0097,
"step": 15175
},
{
"epoch": 16.740088105726873,
"grad_norm": 0.17445862293243408,
"learning_rate": 4.923076923076924e-06,
"loss": 0.0108,
"step": 15200
},
{
"epoch": 16.76762114537445,
"grad_norm": 0.06989112496376038,
"learning_rate": 4.8974358974358975e-06,
"loss": 0.0107,
"step": 15225
},
{
"epoch": 16.795154185022028,
"grad_norm": 0.10867713391780853,
"learning_rate": 4.871794871794872e-06,
"loss": 0.0094,
"step": 15250
},
{
"epoch": 16.822687224669604,
"grad_norm": 0.08284565061330795,
"learning_rate": 4.8461538461538465e-06,
"loss": 0.0096,
"step": 15275
},
{
"epoch": 16.85022026431718,
"grad_norm": 0.40096861124038696,
"learning_rate": 4.820512820512821e-06,
"loss": 0.0106,
"step": 15300
},
{
"epoch": 16.87775330396476,
"grad_norm": 0.20616371929645538,
"learning_rate": 4.7948717948717955e-06,
"loss": 0.0098,
"step": 15325
},
{
"epoch": 16.905286343612335,
"grad_norm": 0.09715402871370316,
"learning_rate": 4.76923076923077e-06,
"loss": 0.009,
"step": 15350
},
{
"epoch": 16.93281938325991,
"grad_norm": 0.19870775938034058,
"learning_rate": 4.743589743589744e-06,
"loss": 0.0097,
"step": 15375
},
{
"epoch": 16.96035242290749,
"grad_norm": 0.07990965992212296,
"learning_rate": 4.717948717948718e-06,
"loss": 0.0103,
"step": 15400
},
{
"epoch": 16.987885462555067,
"grad_norm": 0.1826823353767395,
"learning_rate": 4.692307692307693e-06,
"loss": 0.0119,
"step": 15425
},
{
"epoch": 17.015418502202643,
"grad_norm": 0.07441161572933197,
"learning_rate": 4.666666666666667e-06,
"loss": 0.0091,
"step": 15450
},
{
"epoch": 17.042951541850222,
"grad_norm": 0.05266563221812248,
"learning_rate": 4.641025641025642e-06,
"loss": 0.0092,
"step": 15475
},
{
"epoch": 17.070484581497798,
"grad_norm": 0.06553443521261215,
"learning_rate": 4.615384615384616e-06,
"loss": 0.0068,
"step": 15500
},
{
"epoch": 17.098017621145374,
"grad_norm": 0.04783637821674347,
"learning_rate": 4.58974358974359e-06,
"loss": 0.0076,
"step": 15525
},
{
"epoch": 17.125550660792953,
"grad_norm": 0.06203090399503708,
"learning_rate": 4.564102564102564e-06,
"loss": 0.0076,
"step": 15550
},
{
"epoch": 17.15308370044053,
"grad_norm": 0.18787582218647003,
"learning_rate": 4.538461538461539e-06,
"loss": 0.0087,
"step": 15575
},
{
"epoch": 17.180616740088105,
"grad_norm": 0.054731931537389755,
"learning_rate": 4.512820512820513e-06,
"loss": 0.007,
"step": 15600
},
{
"epoch": 17.208149779735685,
"grad_norm": 0.06515911966562271,
"learning_rate": 4.487179487179488e-06,
"loss": 0.0091,
"step": 15625
},
{
"epoch": 17.23568281938326,
"grad_norm": 0.12270718812942505,
"learning_rate": 4.461538461538462e-06,
"loss": 0.0082,
"step": 15650
},
{
"epoch": 17.263215859030836,
"grad_norm": 0.09646424651145935,
"learning_rate": 4.435897435897436e-06,
"loss": 0.0082,
"step": 15675
},
{
"epoch": 17.290748898678412,
"grad_norm": 0.13605421781539917,
"learning_rate": 4.4102564102564104e-06,
"loss": 0.0071,
"step": 15700
},
{
"epoch": 17.318281938325992,
"grad_norm": 0.18393711745738983,
"learning_rate": 4.384615384615385e-06,
"loss": 0.0078,
"step": 15725
},
{
"epoch": 17.345814977973568,
"grad_norm": 0.053975436836481094,
"learning_rate": 4.358974358974359e-06,
"loss": 0.0085,
"step": 15750
},
{
"epoch": 17.373348017621144,
"grad_norm": 0.05142604932188988,
"learning_rate": 4.333333333333334e-06,
"loss": 0.0072,
"step": 15775
},
{
"epoch": 17.400881057268723,
"grad_norm": 0.09333042055368423,
"learning_rate": 4.307692307692308e-06,
"loss": 0.0071,
"step": 15800
},
{
"epoch": 17.4284140969163,
"grad_norm": 0.0760270357131958,
"learning_rate": 4.282051282051282e-06,
"loss": 0.0076,
"step": 15825
},
{
"epoch": 17.455947136563875,
"grad_norm": 0.07667813450098038,
"learning_rate": 4.2564102564102566e-06,
"loss": 0.0086,
"step": 15850
},
{
"epoch": 17.483480176211454,
"grad_norm": 0.07268601655960083,
"learning_rate": 4.230769230769231e-06,
"loss": 0.008,
"step": 15875
},
{
"epoch": 17.51101321585903,
"grad_norm": 0.05997084081172943,
"learning_rate": 4.2051282051282055e-06,
"loss": 0.0084,
"step": 15900
},
{
"epoch": 17.538546255506606,
"grad_norm": 0.08455850183963776,
"learning_rate": 4.17948717948718e-06,
"loss": 0.0079,
"step": 15925
},
{
"epoch": 17.566079295154186,
"grad_norm": 0.05682849884033203,
"learning_rate": 4.1538461538461545e-06,
"loss": 0.009,
"step": 15950
},
{
"epoch": 17.59361233480176,
"grad_norm": 0.13320918381214142,
"learning_rate": 4.128205128205128e-06,
"loss": 0.0091,
"step": 15975
},
{
"epoch": 17.621145374449338,
"grad_norm": 0.08159907907247543,
"learning_rate": 4.102564102564103e-06,
"loss": 0.0079,
"step": 16000
},
{
"epoch": 17.621145374449338,
"eval_cer": 59.87355702543762,
"eval_loss": 0.9064295291900635,
"eval_runtime": 473.5233,
"eval_samples_per_second": 22.345,
"eval_steps_per_second": 5.588,
"eval_wer": 132.6166902404526,
"step": 16000
},
{
"epoch": 17.648678414096917,
"grad_norm": 0.08243115991353989,
"learning_rate": 4.076923076923077e-06,
"loss": 0.0075,
"step": 16025
},
{
"epoch": 17.676211453744493,
"grad_norm": 0.06684228777885437,
"learning_rate": 4.051282051282052e-06,
"loss": 0.0086,
"step": 16050
},
{
"epoch": 17.70374449339207,
"grad_norm": 0.07782474905252457,
"learning_rate": 4.025641025641026e-06,
"loss": 0.0081,
"step": 16075
},
{
"epoch": 17.73127753303965,
"grad_norm": 0.09877069294452667,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0082,
"step": 16100
},
{
"epoch": 17.758810572687224,
"grad_norm": 0.1733996719121933,
"learning_rate": 3.974358974358974e-06,
"loss": 0.008,
"step": 16125
},
{
"epoch": 17.7863436123348,
"grad_norm": 0.05951946601271629,
"learning_rate": 3.948717948717949e-06,
"loss": 0.0089,
"step": 16150
},
{
"epoch": 17.81387665198238,
"grad_norm": 0.09281080961227417,
"learning_rate": 3.923076923076923e-06,
"loss": 0.0085,
"step": 16175
},
{
"epoch": 17.841409691629956,
"grad_norm": 0.17630000412464142,
"learning_rate": 3.897435897435898e-06,
"loss": 0.0091,
"step": 16200
},
{
"epoch": 17.86894273127753,
"grad_norm": 0.058690495789051056,
"learning_rate": 3.871794871794872e-06,
"loss": 0.008,
"step": 16225
},
{
"epoch": 17.89647577092511,
"grad_norm": 0.09863056242465973,
"learning_rate": 3.846153846153847e-06,
"loss": 0.0089,
"step": 16250
},
{
"epoch": 17.924008810572687,
"grad_norm": 0.09190403670072556,
"learning_rate": 3.8205128205128204e-06,
"loss": 0.0087,
"step": 16275
},
{
"epoch": 17.951541850220263,
"grad_norm": 0.06220954656600952,
"learning_rate": 3.794871794871795e-06,
"loss": 0.008,
"step": 16300
},
{
"epoch": 17.979074889867842,
"grad_norm": 0.0854143351316452,
"learning_rate": 3.7692307692307694e-06,
"loss": 0.008,
"step": 16325
},
{
"epoch": 18.006607929515418,
"grad_norm": 0.10065086930990219,
"learning_rate": 3.743589743589744e-06,
"loss": 0.0077,
"step": 16350
},
{
"epoch": 18.034140969162994,
"grad_norm": 0.07576055824756622,
"learning_rate": 3.7179487179487184e-06,
"loss": 0.0066,
"step": 16375
},
{
"epoch": 18.061674008810574,
"grad_norm": 0.04324162006378174,
"learning_rate": 3.692307692307693e-06,
"loss": 0.0065,
"step": 16400
},
{
"epoch": 18.08920704845815,
"grad_norm": 0.07214418798685074,
"learning_rate": 3.6666666666666666e-06,
"loss": 0.0061,
"step": 16425
},
{
"epoch": 18.116740088105725,
"grad_norm": 0.05964656174182892,
"learning_rate": 3.641025641025641e-06,
"loss": 0.006,
"step": 16450
},
{
"epoch": 18.144273127753305,
"grad_norm": 0.06809210777282715,
"learning_rate": 3.6153846153846156e-06,
"loss": 0.0068,
"step": 16475
},
{
"epoch": 18.17180616740088,
"grad_norm": 0.04498510807752609,
"learning_rate": 3.58974358974359e-06,
"loss": 0.0065,
"step": 16500
},
{
"epoch": 18.199339207048457,
"grad_norm": 0.05225253850221634,
"learning_rate": 3.5641025641025646e-06,
"loss": 0.0064,
"step": 16525
},
{
"epoch": 18.226872246696036,
"grad_norm": 0.05612196773290634,
"learning_rate": 3.538461538461539e-06,
"loss": 0.0069,
"step": 16550
},
{
"epoch": 18.254405286343612,
"grad_norm": 0.05375833064317703,
"learning_rate": 3.5128205128205127e-06,
"loss": 0.0063,
"step": 16575
},
{
"epoch": 18.281938325991188,
"grad_norm": 0.08952938765287399,
"learning_rate": 3.487179487179487e-06,
"loss": 0.0063,
"step": 16600
},
{
"epoch": 18.309471365638768,
"grad_norm": 0.05091915279626846,
"learning_rate": 3.4615384615384617e-06,
"loss": 0.0064,
"step": 16625
},
{
"epoch": 18.337004405286343,
"grad_norm": 0.05258096382021904,
"learning_rate": 3.435897435897436e-06,
"loss": 0.0066,
"step": 16650
},
{
"epoch": 18.36453744493392,
"grad_norm": 0.09490983188152313,
"learning_rate": 3.4102564102564107e-06,
"loss": 0.0063,
"step": 16675
},
{
"epoch": 18.3920704845815,
"grad_norm": 0.05754420533776283,
"learning_rate": 3.384615384615385e-06,
"loss": 0.0066,
"step": 16700
},
{
"epoch": 18.419603524229075,
"grad_norm": 0.045870695263147354,
"learning_rate": 3.358974358974359e-06,
"loss": 0.0062,
"step": 16725
},
{
"epoch": 18.44713656387665,
"grad_norm": 0.05464649200439453,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.007,
"step": 16750
},
{
"epoch": 18.47466960352423,
"grad_norm": 0.09517217427492142,
"learning_rate": 3.307692307692308e-06,
"loss": 0.0066,
"step": 16775
},
{
"epoch": 18.502202643171806,
"grad_norm": 0.05817287415266037,
"learning_rate": 3.2820512820512823e-06,
"loss": 0.0066,
"step": 16800
},
{
"epoch": 18.529735682819382,
"grad_norm": 0.06281236559152603,
"learning_rate": 3.256410256410257e-06,
"loss": 0.0069,
"step": 16825
},
{
"epoch": 18.55726872246696,
"grad_norm": 0.06776443868875504,
"learning_rate": 3.2307692307692313e-06,
"loss": 0.0071,
"step": 16850
},
{
"epoch": 18.584801762114537,
"grad_norm": 0.07165702432394028,
"learning_rate": 3.205128205128206e-06,
"loss": 0.0067,
"step": 16875
},
{
"epoch": 18.612334801762113,
"grad_norm": 0.05155620351433754,
"learning_rate": 3.1794871794871795e-06,
"loss": 0.0063,
"step": 16900
},
{
"epoch": 18.639867841409693,
"grad_norm": 0.05491460859775543,
"learning_rate": 3.153846153846154e-06,
"loss": 0.0062,
"step": 16925
},
{
"epoch": 18.66740088105727,
"grad_norm": 0.05498324707150459,
"learning_rate": 3.1282051282051284e-06,
"loss": 0.0068,
"step": 16950
},
{
"epoch": 18.694933920704845,
"grad_norm": 0.06264489144086838,
"learning_rate": 3.102564102564103e-06,
"loss": 0.0063,
"step": 16975
},
{
"epoch": 18.722466960352424,
"grad_norm": 0.058767594397068024,
"learning_rate": 3.0769230769230774e-06,
"loss": 0.0074,
"step": 17000
},
{
"epoch": 18.722466960352424,
"eval_cer": 55.902582738183945,
"eval_loss": 0.9159504175186157,
"eval_runtime": 454.2093,
"eval_samples_per_second": 23.295,
"eval_steps_per_second": 5.826,
"eval_wer": 125.60113154172561,
"step": 17000
},
{
"epoch": 18.75,
"grad_norm": 0.07268258184194565,
"learning_rate": 3.051282051282052e-06,
"loss": 0.0072,
"step": 17025
},
{
"epoch": 18.777533039647576,
"grad_norm": 0.1571418195962906,
"learning_rate": 3.0256410256410256e-06,
"loss": 0.0066,
"step": 17050
},
{
"epoch": 18.805066079295155,
"grad_norm": 0.054754406213760376,
"learning_rate": 3e-06,
"loss": 0.0065,
"step": 17075
},
{
"epoch": 18.83259911894273,
"grad_norm": 0.06480716168880463,
"learning_rate": 2.9743589743589746e-06,
"loss": 0.0061,
"step": 17100
},
{
"epoch": 18.860132158590307,
"grad_norm": 0.08004415780305862,
"learning_rate": 2.948717948717949e-06,
"loss": 0.0064,
"step": 17125
},
{
"epoch": 18.887665198237887,
"grad_norm": 0.05629754438996315,
"learning_rate": 2.9230769230769236e-06,
"loss": 0.0069,
"step": 17150
},
{
"epoch": 18.915198237885463,
"grad_norm": 0.05493941903114319,
"learning_rate": 2.897435897435898e-06,
"loss": 0.0068,
"step": 17175
},
{
"epoch": 18.94273127753304,
"grad_norm": 0.05299900844693184,
"learning_rate": 2.8717948717948717e-06,
"loss": 0.0067,
"step": 17200
},
{
"epoch": 18.970264317180618,
"grad_norm": 0.06607411801815033,
"learning_rate": 2.846153846153846e-06,
"loss": 0.0073,
"step": 17225
},
{
"epoch": 18.997797356828194,
"grad_norm": 0.06345100700855255,
"learning_rate": 2.8205128205128207e-06,
"loss": 0.0068,
"step": 17250
},
{
"epoch": 19.02533039647577,
"grad_norm": 0.04356463998556137,
"learning_rate": 2.794871794871795e-06,
"loss": 0.0057,
"step": 17275
},
{
"epoch": 19.05286343612335,
"grad_norm": 0.044814836233854294,
"learning_rate": 2.7692307692307697e-06,
"loss": 0.0055,
"step": 17300
},
{
"epoch": 19.080396475770925,
"grad_norm": 0.07849572598934174,
"learning_rate": 2.743589743589744e-06,
"loss": 0.0055,
"step": 17325
},
{
"epoch": 19.1079295154185,
"grad_norm": 0.07459200173616409,
"learning_rate": 2.717948717948718e-06,
"loss": 0.0054,
"step": 17350
},
{
"epoch": 19.13546255506608,
"grad_norm": 0.04626765102148056,
"learning_rate": 2.6923076923076923e-06,
"loss": 0.0051,
"step": 17375
},
{
"epoch": 19.162995594713657,
"grad_norm": 0.04221516102552414,
"learning_rate": 2.666666666666667e-06,
"loss": 0.0053,
"step": 17400
},
{
"epoch": 19.190528634361232,
"grad_norm": 0.05490809306502342,
"learning_rate": 2.6410256410256413e-06,
"loss": 0.0055,
"step": 17425
},
{
"epoch": 19.218061674008812,
"grad_norm": 0.058104030787944794,
"learning_rate": 2.615384615384616e-06,
"loss": 0.0055,
"step": 17450
},
{
"epoch": 19.245594713656388,
"grad_norm": 0.0437290221452713,
"learning_rate": 2.5897435897435903e-06,
"loss": 0.0058,
"step": 17475
},
{
"epoch": 19.273127753303964,
"grad_norm": 0.048149868845939636,
"learning_rate": 2.564102564102564e-06,
"loss": 0.0053,
"step": 17500
},
{
"epoch": 19.300660792951543,
"grad_norm": 0.05037374794483185,
"learning_rate": 2.5384615384615385e-06,
"loss": 0.006,
"step": 17525
},
{
"epoch": 19.32819383259912,
"grad_norm": 0.06295998394489288,
"learning_rate": 2.512820512820513e-06,
"loss": 0.0057,
"step": 17550
},
{
"epoch": 19.355726872246695,
"grad_norm": 0.04326135665178299,
"learning_rate": 2.4871794871794875e-06,
"loss": 0.0053,
"step": 17575
},
{
"epoch": 19.383259911894275,
"grad_norm": 0.06319163739681244,
"learning_rate": 2.461538461538462e-06,
"loss": 0.0059,
"step": 17600
},
{
"epoch": 19.41079295154185,
"grad_norm": 0.03665575757622719,
"learning_rate": 2.435897435897436e-06,
"loss": 0.0054,
"step": 17625
},
{
"epoch": 19.438325991189426,
"grad_norm": 0.07812593877315521,
"learning_rate": 2.4102564102564105e-06,
"loss": 0.0056,
"step": 17650
},
{
"epoch": 19.465859030837006,
"grad_norm": 0.0484769307076931,
"learning_rate": 2.384615384615385e-06,
"loss": 0.006,
"step": 17675
},
{
"epoch": 19.493392070484582,
"grad_norm": 0.04405777528882027,
"learning_rate": 2.358974358974359e-06,
"loss": 0.0056,
"step": 17700
},
{
"epoch": 19.520925110132158,
"grad_norm": 0.04773370921611786,
"learning_rate": 2.3333333333333336e-06,
"loss": 0.0054,
"step": 17725
},
{
"epoch": 19.548458149779737,
"grad_norm": 0.04407154396176338,
"learning_rate": 2.307692307692308e-06,
"loss": 0.0059,
"step": 17750
},
{
"epoch": 19.575991189427313,
"grad_norm": 0.05238990858197212,
"learning_rate": 2.282051282051282e-06,
"loss": 0.0058,
"step": 17775
},
{
"epoch": 19.60352422907489,
"grad_norm": 0.06105871871113777,
"learning_rate": 2.2564102564102566e-06,
"loss": 0.0055,
"step": 17800
},
{
"epoch": 19.63105726872247,
"grad_norm": 0.15811942517757416,
"learning_rate": 2.230769230769231e-06,
"loss": 0.0062,
"step": 17825
},
{
"epoch": 19.658590308370044,
"grad_norm": 0.036435484886169434,
"learning_rate": 2.2051282051282052e-06,
"loss": 0.0053,
"step": 17850
},
{
"epoch": 19.68612334801762,
"grad_norm": 0.042688727378845215,
"learning_rate": 2.1794871794871797e-06,
"loss": 0.0056,
"step": 17875
},
{
"epoch": 19.7136563876652,
"grad_norm": 0.04815078526735306,
"learning_rate": 2.153846153846154e-06,
"loss": 0.0054,
"step": 17900
},
{
"epoch": 19.741189427312776,
"grad_norm": 0.04753319174051285,
"learning_rate": 2.1282051282051283e-06,
"loss": 0.0056,
"step": 17925
},
{
"epoch": 19.76872246696035,
"grad_norm": 0.04775834083557129,
"learning_rate": 2.1025641025641028e-06,
"loss": 0.0058,
"step": 17950
},
{
"epoch": 19.79625550660793,
"grad_norm": 0.04497678205370903,
"learning_rate": 2.0769230769230773e-06,
"loss": 0.0054,
"step": 17975
},
{
"epoch": 19.823788546255507,
"grad_norm": 0.06292616575956345,
"learning_rate": 2.0512820512820513e-06,
"loss": 0.0053,
"step": 18000
},
{
"epoch": 19.823788546255507,
"eval_cer": 50.16278243325347,
"eval_loss": 0.9244782328605652,
"eval_runtime": 438.8708,
"eval_samples_per_second": 24.11,
"eval_steps_per_second": 6.029,
"eval_wer": 116.01131541725603,
"step": 18000
},
{
"epoch": 19.851321585903083,
"grad_norm": 0.04525560513138771,
"learning_rate": 2.025641025641026e-06,
"loss": 0.0061,
"step": 18025
},
{
"epoch": 19.878854625550662,
"grad_norm": 0.04258139431476593,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.0055,
"step": 18050
},
{
"epoch": 19.90638766519824,
"grad_norm": 0.05663159489631653,
"learning_rate": 1.9743589743589744e-06,
"loss": 0.0056,
"step": 18075
},
{
"epoch": 19.933920704845814,
"grad_norm": 0.05438832566142082,
"learning_rate": 1.948717948717949e-06,
"loss": 0.0057,
"step": 18100
},
{
"epoch": 19.961453744493394,
"grad_norm": 0.049876242876052856,
"learning_rate": 1.9230769230769234e-06,
"loss": 0.0057,
"step": 18125
},
{
"epoch": 19.98898678414097,
"grad_norm": 0.04682116210460663,
"learning_rate": 1.8974358974358975e-06,
"loss": 0.0054,
"step": 18150
},
{
"epoch": 20.016519823788546,
"grad_norm": 0.031594615429639816,
"learning_rate": 1.871794871794872e-06,
"loss": 0.0049,
"step": 18175
},
{
"epoch": 20.044052863436125,
"grad_norm": 0.03813030198216438,
"learning_rate": 1.8461538461538465e-06,
"loss": 0.0047,
"step": 18200
},
{
"epoch": 20.0715859030837,
"grad_norm": 0.0341855026781559,
"learning_rate": 1.8205128205128205e-06,
"loss": 0.005,
"step": 18225
},
{
"epoch": 20.099118942731277,
"grad_norm": 0.03425971418619156,
"learning_rate": 1.794871794871795e-06,
"loss": 0.0048,
"step": 18250
},
{
"epoch": 20.126651982378856,
"grad_norm": 0.03382967412471771,
"learning_rate": 1.7692307692307695e-06,
"loss": 0.0046,
"step": 18275
},
{
"epoch": 20.154185022026432,
"grad_norm": 0.0427679605782032,
"learning_rate": 1.7435897435897436e-06,
"loss": 0.0046,
"step": 18300
},
{
"epoch": 20.181718061674008,
"grad_norm": 0.053978513926267624,
"learning_rate": 1.717948717948718e-06,
"loss": 0.005,
"step": 18325
},
{
"epoch": 20.209251101321588,
"grad_norm": 0.03782325237989426,
"learning_rate": 1.6923076923076926e-06,
"loss": 0.0048,
"step": 18350
},
{
"epoch": 20.236784140969164,
"grad_norm": 0.03709937259554863,
"learning_rate": 1.6666666666666667e-06,
"loss": 0.0046,
"step": 18375
},
{
"epoch": 20.26431718061674,
"grad_norm": 0.037801649421453476,
"learning_rate": 1.6410256410256412e-06,
"loss": 0.0045,
"step": 18400
},
{
"epoch": 20.291850220264315,
"grad_norm": 0.04301599785685539,
"learning_rate": 1.6153846153846157e-06,
"loss": 0.0049,
"step": 18425
},
{
"epoch": 20.319383259911895,
"grad_norm": 0.052962783724069595,
"learning_rate": 1.5897435897435897e-06,
"loss": 0.0054,
"step": 18450
},
{
"epoch": 20.34691629955947,
"grad_norm": 0.033712126314640045,
"learning_rate": 1.5641025641025642e-06,
"loss": 0.0046,
"step": 18475
},
{
"epoch": 20.374449339207047,
"grad_norm": 0.04511284828186035,
"learning_rate": 1.5384615384615387e-06,
"loss": 0.0055,
"step": 18500
},
{
"epoch": 20.401982378854626,
"grad_norm": 0.04226896911859512,
"learning_rate": 1.5128205128205128e-06,
"loss": 0.005,
"step": 18525
},
{
"epoch": 20.429515418502202,
"grad_norm": 0.05907629802823067,
"learning_rate": 1.4871794871794873e-06,
"loss": 0.0053,
"step": 18550
},
{
"epoch": 20.457048458149778,
"grad_norm": 0.03855994716286659,
"learning_rate": 1.4615384615384618e-06,
"loss": 0.0048,
"step": 18575
},
{
"epoch": 20.484581497797357,
"grad_norm": 0.03888246417045593,
"learning_rate": 1.4358974358974359e-06,
"loss": 0.0044,
"step": 18600
},
{
"epoch": 20.512114537444933,
"grad_norm": 0.04747318476438522,
"learning_rate": 1.4102564102564104e-06,
"loss": 0.005,
"step": 18625
},
{
"epoch": 20.53964757709251,
"grad_norm": 0.03958306834101677,
"learning_rate": 1.3846153846153848e-06,
"loss": 0.0049,
"step": 18650
},
{
"epoch": 20.56718061674009,
"grad_norm": 0.03587072342634201,
"learning_rate": 1.358974358974359e-06,
"loss": 0.005,
"step": 18675
},
{
"epoch": 20.594713656387665,
"grad_norm": 0.05696781352162361,
"learning_rate": 1.3333333333333334e-06,
"loss": 0.0047,
"step": 18700
},
{
"epoch": 20.62224669603524,
"grad_norm": 0.04611218348145485,
"learning_rate": 1.307692307692308e-06,
"loss": 0.0049,
"step": 18725
},
{
"epoch": 20.64977973568282,
"grad_norm": 0.039149776101112366,
"learning_rate": 1.282051282051282e-06,
"loss": 0.0047,
"step": 18750
},
{
"epoch": 20.677312775330396,
"grad_norm": 0.041222602128982544,
"learning_rate": 1.2564102564102565e-06,
"loss": 0.0049,
"step": 18775
},
{
"epoch": 20.704845814977972,
"grad_norm": 0.04520060867071152,
"learning_rate": 1.230769230769231e-06,
"loss": 0.005,
"step": 18800
},
{
"epoch": 20.73237885462555,
"grad_norm": 0.2069810926914215,
"learning_rate": 1.2051282051282053e-06,
"loss": 0.0051,
"step": 18825
},
{
"epoch": 20.759911894273127,
"grad_norm": 0.04224303737282753,
"learning_rate": 1.1794871794871795e-06,
"loss": 0.0049,
"step": 18850
},
{
"epoch": 20.787444933920703,
"grad_norm": 0.040397610515356064,
"learning_rate": 1.153846153846154e-06,
"loss": 0.0045,
"step": 18875
},
{
"epoch": 20.814977973568283,
"grad_norm": 0.037870801985263824,
"learning_rate": 1.1282051282051283e-06,
"loss": 0.0048,
"step": 18900
},
{
"epoch": 20.84251101321586,
"grad_norm": 0.04810772091150284,
"learning_rate": 1.1025641025641026e-06,
"loss": 0.0051,
"step": 18925
},
{
"epoch": 20.870044052863435,
"grad_norm": 0.045735545456409454,
"learning_rate": 1.076923076923077e-06,
"loss": 0.0047,
"step": 18950
},
{
"epoch": 20.897577092511014,
"grad_norm": 0.03935140371322632,
"learning_rate": 1.0512820512820514e-06,
"loss": 0.0048,
"step": 18975
},
{
"epoch": 20.92511013215859,
"grad_norm": 0.05066705495119095,
"learning_rate": 1.0256410256410257e-06,
"loss": 0.0052,
"step": 19000
},
{
"epoch": 20.92511013215859,
"eval_cer": 47.776644159893614,
"eval_loss": 0.9299447536468506,
"eval_runtime": 436.4027,
"eval_samples_per_second": 24.246,
"eval_steps_per_second": 6.063,
"eval_wer": 115.08722300801509,
"step": 19000
},
{
"epoch": 20.952643171806166,
"grad_norm": 0.04411695525050163,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.0049,
"step": 19025
},
{
"epoch": 20.980176211453745,
"grad_norm": 0.041007477790117264,
"learning_rate": 9.743589743589745e-07,
"loss": 0.0049,
"step": 19050
},
{
"epoch": 21.00770925110132,
"grad_norm": 0.03803296014666557,
"learning_rate": 9.487179487179487e-07,
"loss": 0.0048,
"step": 19075
},
{
"epoch": 21.035242290748897,
"grad_norm": 0.03167716786265373,
"learning_rate": 9.230769230769232e-07,
"loss": 0.0043,
"step": 19100
},
{
"epoch": 21.062775330396477,
"grad_norm": 0.04057995602488518,
"learning_rate": 8.974358974358975e-07,
"loss": 0.0044,
"step": 19125
},
{
"epoch": 21.090308370044053,
"grad_norm": 0.042665161192417145,
"learning_rate": 8.717948717948718e-07,
"loss": 0.0044,
"step": 19150
},
{
"epoch": 21.11784140969163,
"grad_norm": 0.031034432351589203,
"learning_rate": 8.461538461538463e-07,
"loss": 0.0042,
"step": 19175
},
{
"epoch": 21.145374449339208,
"grad_norm": 0.03610997274518013,
"learning_rate": 8.205128205128206e-07,
"loss": 0.0043,
"step": 19200
},
{
"epoch": 21.172907488986784,
"grad_norm": 0.03419085592031479,
"learning_rate": 7.948717948717949e-07,
"loss": 0.0044,
"step": 19225
},
{
"epoch": 21.20044052863436,
"grad_norm": 0.03195258602499962,
"learning_rate": 7.692307692307694e-07,
"loss": 0.0043,
"step": 19250
},
{
"epoch": 21.22797356828194,
"grad_norm": 0.033934228122234344,
"learning_rate": 7.435897435897436e-07,
"loss": 0.0044,
"step": 19275
},
{
"epoch": 21.255506607929515,
"grad_norm": 0.038121603429317474,
"learning_rate": 7.179487179487179e-07,
"loss": 0.0042,
"step": 19300
},
{
"epoch": 21.28303964757709,
"grad_norm": 0.042483534663915634,
"learning_rate": 6.923076923076924e-07,
"loss": 0.0043,
"step": 19325
},
{
"epoch": 21.31057268722467,
"grad_norm": 0.03081641159951687,
"learning_rate": 6.666666666666667e-07,
"loss": 0.0045,
"step": 19350
},
{
"epoch": 21.338105726872246,
"grad_norm": 0.038055986166000366,
"learning_rate": 6.41025641025641e-07,
"loss": 0.0045,
"step": 19375
},
{
"epoch": 21.365638766519822,
"grad_norm": 0.047620829194784164,
"learning_rate": 6.153846153846155e-07,
"loss": 0.0045,
"step": 19400
},
{
"epoch": 21.393171806167402,
"grad_norm": 0.04037508741021156,
"learning_rate": 5.897435897435898e-07,
"loss": 0.0046,
"step": 19425
},
{
"epoch": 21.420704845814978,
"grad_norm": 0.031555745750665665,
"learning_rate": 5.641025641025642e-07,
"loss": 0.0044,
"step": 19450
},
{
"epoch": 21.448237885462554,
"grad_norm": 0.040886688977479935,
"learning_rate": 5.384615384615386e-07,
"loss": 0.0045,
"step": 19475
},
{
"epoch": 21.475770925110133,
"grad_norm": 0.034373532980680466,
"learning_rate": 5.128205128205128e-07,
"loss": 0.0045,
"step": 19500
},
{
"epoch": 21.50330396475771,
"grad_norm": 0.03271722421050072,
"learning_rate": 4.871794871794872e-07,
"loss": 0.0044,
"step": 19525
},
{
"epoch": 21.530837004405285,
"grad_norm": 0.03823432698845863,
"learning_rate": 4.615384615384616e-07,
"loss": 0.0043,
"step": 19550
},
{
"epoch": 21.558370044052865,
"grad_norm": 0.03353292867541313,
"learning_rate": 4.358974358974359e-07,
"loss": 0.0041,
"step": 19575
},
{
"epoch": 21.58590308370044,
"grad_norm": 0.04605744779109955,
"learning_rate": 4.102564102564103e-07,
"loss": 0.0046,
"step": 19600
},
{
"epoch": 21.613436123348016,
"grad_norm": 0.0359153151512146,
"learning_rate": 3.846153846153847e-07,
"loss": 0.0047,
"step": 19625
},
{
"epoch": 21.640969162995596,
"grad_norm": 0.03662218898534775,
"learning_rate": 3.5897435897435896e-07,
"loss": 0.0044,
"step": 19650
},
{
"epoch": 21.66850220264317,
"grad_norm": 0.03287964314222336,
"learning_rate": 3.3333333333333335e-07,
"loss": 0.0043,
"step": 19675
},
{
"epoch": 21.696035242290748,
"grad_norm": 0.031979408115148544,
"learning_rate": 3.0769230769230774e-07,
"loss": 0.0043,
"step": 19700
},
{
"epoch": 21.723568281938327,
"grad_norm": 0.03703833371400833,
"learning_rate": 2.820512820512821e-07,
"loss": 0.005,
"step": 19725
},
{
"epoch": 21.751101321585903,
"grad_norm": 0.037790607661008835,
"learning_rate": 2.564102564102564e-07,
"loss": 0.0044,
"step": 19750
},
{
"epoch": 21.77863436123348,
"grad_norm": 0.04017505794763565,
"learning_rate": 2.307692307692308e-07,
"loss": 0.0044,
"step": 19775
},
{
"epoch": 21.80616740088106,
"grad_norm": 0.045976828783750534,
"learning_rate": 2.0512820512820514e-07,
"loss": 0.0043,
"step": 19800
},
{
"epoch": 21.833700440528634,
"grad_norm": 0.035648688673973083,
"learning_rate": 1.7948717948717948e-07,
"loss": 0.0043,
"step": 19825
},
{
"epoch": 21.86123348017621,
"grad_norm": 0.041394732892513275,
"learning_rate": 1.5384615384615387e-07,
"loss": 0.0044,
"step": 19850
},
{
"epoch": 21.88876651982379,
"grad_norm": 0.03750582039356232,
"learning_rate": 1.282051282051282e-07,
"loss": 0.0043,
"step": 19875
},
{
"epoch": 21.916299559471366,
"grad_norm": 0.037499021738767624,
"learning_rate": 1.0256410256410257e-07,
"loss": 0.0042,
"step": 19900
},
{
"epoch": 21.94383259911894,
"grad_norm": 0.04036805406212807,
"learning_rate": 7.692307692307694e-08,
"loss": 0.0044,
"step": 19925
},
{
"epoch": 21.97136563876652,
"grad_norm": 0.03988456726074219,
"learning_rate": 5.1282051282051286e-08,
"loss": 0.0043,
"step": 19950
},
{
"epoch": 21.998898678414097,
"grad_norm": 0.03900681063532829,
"learning_rate": 2.5641025641025643e-08,
"loss": 0.0047,
"step": 19975
},
{
"epoch": 22.026431718061673,
"grad_norm": 0.03660197928547859,
"learning_rate": 0.0,
"loss": 0.0043,
"step": 20000
},
{
"epoch": 22.026431718061673,
"eval_cer": 50.12380635768574,
"eval_loss": 0.932576060295105,
"eval_runtime": 448.5885,
"eval_samples_per_second": 23.587,
"eval_steps_per_second": 5.899,
"eval_wer": 115.25695426685525,
"step": 20000
},
{
"epoch": 22.026431718061673,
"step": 20000,
"total_flos": 1.5756085807389082e+19,
"train_loss": 0.1688191306591034,
"train_runtime": 17105.7932,
"train_samples_per_second": 37.414,
"train_steps_per_second": 1.169
}
],
"logging_steps": 25,
"max_steps": 20000,
"num_input_tokens_seen": 0,
"num_train_epochs": 23,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.5756085807389082e+19,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}