DEMO_TRAIN_AC_NTUT / trainer_state.json
gymeee's picture
Update from DEMO_TRAIN_AC_NYUT
f6cd2b5
raw
history blame
160 kB
{
"best_metric": 0.2921061041866411,
"best_model_checkpoint": "./save/jonatasgrosman/wav2vec2-large-xlsr-53-chinese-zh-cn/checkpoint-60480",
"epoch": 40.0,
"global_step": 60480,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 4.9961144179894184e-05,
"loss": 3.0163,
"step": 50
},
{
"epoch": 0.07,
"learning_rate": 4.9920634920634924e-05,
"loss": 2.0988,
"step": 100
},
{
"epoch": 0.1,
"learning_rate": 4.987929894179895e-05,
"loss": 1.9504,
"step": 150
},
{
"epoch": 0.13,
"learning_rate": 4.983796296296296e-05,
"loss": 1.8016,
"step": 200
},
{
"epoch": 0.17,
"learning_rate": 4.9796626984126986e-05,
"loss": 1.5856,
"step": 250
},
{
"epoch": 0.2,
"learning_rate": 4.975529100529101e-05,
"loss": 1.5416,
"step": 300
},
{
"epoch": 0.23,
"learning_rate": 4.971395502645503e-05,
"loss": 1.5159,
"step": 350
},
{
"epoch": 0.26,
"learning_rate": 4.967261904761905e-05,
"loss": 1.4587,
"step": 400
},
{
"epoch": 0.3,
"learning_rate": 4.963128306878307e-05,
"loss": 1.3937,
"step": 450
},
{
"epoch": 0.33,
"learning_rate": 4.958994708994709e-05,
"loss": 1.522,
"step": 500
},
{
"epoch": 0.36,
"learning_rate": 4.954861111111112e-05,
"loss": 1.4178,
"step": 550
},
{
"epoch": 0.4,
"learning_rate": 4.950727513227513e-05,
"loss": 1.3444,
"step": 600
},
{
"epoch": 0.43,
"learning_rate": 4.9465939153439155e-05,
"loss": 1.3562,
"step": 650
},
{
"epoch": 0.46,
"learning_rate": 4.942460317460318e-05,
"loss": 1.2731,
"step": 700
},
{
"epoch": 0.5,
"learning_rate": 4.93832671957672e-05,
"loss": 1.3772,
"step": 750
},
{
"epoch": 0.53,
"learning_rate": 4.934193121693122e-05,
"loss": 1.1955,
"step": 800
},
{
"epoch": 0.56,
"learning_rate": 4.930059523809524e-05,
"loss": 1.1987,
"step": 850
},
{
"epoch": 0.6,
"learning_rate": 4.925925925925926e-05,
"loss": 1.234,
"step": 900
},
{
"epoch": 0.63,
"learning_rate": 4.9217923280423286e-05,
"loss": 1.1588,
"step": 950
},
{
"epoch": 0.66,
"learning_rate": 4.9176587301587305e-05,
"loss": 1.2147,
"step": 1000
},
{
"epoch": 0.69,
"learning_rate": 4.913525132275132e-05,
"loss": 1.1531,
"step": 1050
},
{
"epoch": 0.73,
"learning_rate": 4.909391534391535e-05,
"loss": 1.2003,
"step": 1100
},
{
"epoch": 0.76,
"learning_rate": 4.905257936507937e-05,
"loss": 1.2157,
"step": 1150
},
{
"epoch": 0.79,
"learning_rate": 4.9011243386243385e-05,
"loss": 1.2028,
"step": 1200
},
{
"epoch": 0.83,
"learning_rate": 4.896990740740741e-05,
"loss": 1.1683,
"step": 1250
},
{
"epoch": 0.86,
"learning_rate": 4.892857142857143e-05,
"loss": 1.1867,
"step": 1300
},
{
"epoch": 0.89,
"learning_rate": 4.8887235449735454e-05,
"loss": 1.1559,
"step": 1350
},
{
"epoch": 0.93,
"learning_rate": 4.8846726190476194e-05,
"loss": 1.1128,
"step": 1400
},
{
"epoch": 0.96,
"learning_rate": 4.880539021164021e-05,
"loss": 1.0949,
"step": 1450
},
{
"epoch": 0.99,
"learning_rate": 4.876405423280424e-05,
"loss": 1.1256,
"step": 1500
},
{
"epoch": 1.0,
"eval_cer": 0.40424816185304435,
"eval_loss": 1.3202868700027466,
"eval_mer": 0.4189837008628955,
"eval_runtime": 95.4069,
"eval_samples_per_second": 11.813,
"eval_steps_per_second": 1.478,
"step": 1512
},
{
"epoch": 1.03,
"learning_rate": 4.8722718253968256e-05,
"loss": 0.9742,
"step": 1550
},
{
"epoch": 1.06,
"learning_rate": 4.8681382275132275e-05,
"loss": 0.9366,
"step": 1600
},
{
"epoch": 1.09,
"learning_rate": 4.86400462962963e-05,
"loss": 0.9154,
"step": 1650
},
{
"epoch": 1.12,
"learning_rate": 4.859871031746032e-05,
"loss": 0.9173,
"step": 1700
},
{
"epoch": 1.16,
"learning_rate": 4.855737433862434e-05,
"loss": 0.9022,
"step": 1750
},
{
"epoch": 1.19,
"learning_rate": 4.851603835978836e-05,
"loss": 0.9119,
"step": 1800
},
{
"epoch": 1.22,
"learning_rate": 4.847470238095239e-05,
"loss": 0.9003,
"step": 1850
},
{
"epoch": 1.26,
"learning_rate": 4.8433366402116406e-05,
"loss": 0.9018,
"step": 1900
},
{
"epoch": 1.29,
"learning_rate": 4.8392030423280425e-05,
"loss": 0.9132,
"step": 1950
},
{
"epoch": 1.32,
"learning_rate": 4.835069444444444e-05,
"loss": 0.873,
"step": 2000
},
{
"epoch": 1.36,
"learning_rate": 4.830935846560847e-05,
"loss": 0.8774,
"step": 2050
},
{
"epoch": 1.39,
"learning_rate": 4.8268022486772494e-05,
"loss": 0.862,
"step": 2100
},
{
"epoch": 1.42,
"learning_rate": 4.8226686507936506e-05,
"loss": 0.964,
"step": 2150
},
{
"epoch": 1.46,
"learning_rate": 4.818535052910053e-05,
"loss": 0.9131,
"step": 2200
},
{
"epoch": 1.49,
"learning_rate": 4.8144014550264556e-05,
"loss": 0.9175,
"step": 2250
},
{
"epoch": 1.52,
"learning_rate": 4.8102678571428575e-05,
"loss": 0.9015,
"step": 2300
},
{
"epoch": 1.55,
"learning_rate": 4.806134259259259e-05,
"loss": 0.9043,
"step": 2350
},
{
"epoch": 1.59,
"learning_rate": 4.802000661375662e-05,
"loss": 0.8006,
"step": 2400
},
{
"epoch": 1.62,
"learning_rate": 4.797867063492064e-05,
"loss": 0.8803,
"step": 2450
},
{
"epoch": 1.65,
"learning_rate": 4.793733465608466e-05,
"loss": 0.8212,
"step": 2500
},
{
"epoch": 1.69,
"learning_rate": 4.7895998677248674e-05,
"loss": 0.855,
"step": 2550
},
{
"epoch": 1.72,
"learning_rate": 4.78546626984127e-05,
"loss": 0.8908,
"step": 2600
},
{
"epoch": 1.75,
"learning_rate": 4.7813326719576725e-05,
"loss": 0.8804,
"step": 2650
},
{
"epoch": 1.79,
"learning_rate": 4.777199074074074e-05,
"loss": 0.9983,
"step": 2700
},
{
"epoch": 1.82,
"learning_rate": 4.773065476190476e-05,
"loss": 0.8771,
"step": 2750
},
{
"epoch": 1.85,
"learning_rate": 4.768931878306879e-05,
"loss": 0.9337,
"step": 2800
},
{
"epoch": 1.88,
"learning_rate": 4.7647982804232805e-05,
"loss": 0.8288,
"step": 2850
},
{
"epoch": 1.92,
"learning_rate": 4.760664682539683e-05,
"loss": 0.9049,
"step": 2900
},
{
"epoch": 1.95,
"learning_rate": 4.756531084656085e-05,
"loss": 0.8074,
"step": 2950
},
{
"epoch": 1.98,
"learning_rate": 4.752397486772487e-05,
"loss": 0.8368,
"step": 3000
},
{
"epoch": 2.0,
"eval_cer": 0.3695516363112115,
"eval_loss": 1.2670589685440063,
"eval_mer": 0.39829018855864495,
"eval_runtime": 95.1955,
"eval_samples_per_second": 11.839,
"eval_steps_per_second": 1.481,
"step": 3024
},
{
"epoch": 2.02,
"learning_rate": 4.748263888888889e-05,
"loss": 0.7209,
"step": 3050
},
{
"epoch": 2.05,
"learning_rate": 4.744130291005292e-05,
"loss": 0.6765,
"step": 3100
},
{
"epoch": 2.08,
"learning_rate": 4.739996693121693e-05,
"loss": 0.6969,
"step": 3150
},
{
"epoch": 2.12,
"learning_rate": 4.7358630952380955e-05,
"loss": 0.718,
"step": 3200
},
{
"epoch": 2.15,
"learning_rate": 4.7317294973544974e-05,
"loss": 0.699,
"step": 3250
},
{
"epoch": 2.18,
"learning_rate": 4.7275958994709e-05,
"loss": 0.7062,
"step": 3300
},
{
"epoch": 2.22,
"learning_rate": 4.723462301587302e-05,
"loss": 0.6871,
"step": 3350
},
{
"epoch": 2.25,
"learning_rate": 4.7193287037037036e-05,
"loss": 0.7622,
"step": 3400
},
{
"epoch": 2.28,
"learning_rate": 4.715195105820106e-05,
"loss": 0.7401,
"step": 3450
},
{
"epoch": 2.31,
"learning_rate": 4.711061507936509e-05,
"loss": 0.6804,
"step": 3500
},
{
"epoch": 2.35,
"learning_rate": 4.70692791005291e-05,
"loss": 0.6523,
"step": 3550
},
{
"epoch": 2.38,
"learning_rate": 4.7027943121693124e-05,
"loss": 0.7141,
"step": 3600
},
{
"epoch": 2.41,
"learning_rate": 4.698660714285715e-05,
"loss": 0.6962,
"step": 3650
},
{
"epoch": 2.45,
"learning_rate": 4.694527116402117e-05,
"loss": 0.708,
"step": 3700
},
{
"epoch": 2.48,
"learning_rate": 4.6903935185185186e-05,
"loss": 0.6723,
"step": 3750
},
{
"epoch": 2.51,
"learning_rate": 4.6862599206349204e-05,
"loss": 0.6947,
"step": 3800
},
{
"epoch": 2.55,
"learning_rate": 4.682126322751323e-05,
"loss": 0.6948,
"step": 3850
},
{
"epoch": 2.58,
"learning_rate": 4.6779927248677255e-05,
"loss": 0.7409,
"step": 3900
},
{
"epoch": 2.61,
"learning_rate": 4.673859126984127e-05,
"loss": 0.7444,
"step": 3950
},
{
"epoch": 2.65,
"learning_rate": 4.669725529100529e-05,
"loss": 0.6634,
"step": 4000
},
{
"epoch": 2.68,
"learning_rate": 4.665591931216932e-05,
"loss": 0.6559,
"step": 4050
},
{
"epoch": 2.71,
"learning_rate": 4.6614583333333336e-05,
"loss": 0.6702,
"step": 4100
},
{
"epoch": 2.74,
"learning_rate": 4.6573247354497354e-05,
"loss": 0.6344,
"step": 4150
},
{
"epoch": 2.78,
"learning_rate": 4.653191137566138e-05,
"loss": 0.7009,
"step": 4200
},
{
"epoch": 2.81,
"learning_rate": 4.64905753968254e-05,
"loss": 0.6868,
"step": 4250
},
{
"epoch": 2.84,
"learning_rate": 4.6449239417989423e-05,
"loss": 0.6605,
"step": 4300
},
{
"epoch": 2.88,
"learning_rate": 4.640790343915344e-05,
"loss": 0.6451,
"step": 4350
},
{
"epoch": 2.91,
"learning_rate": 4.636656746031746e-05,
"loss": 0.6496,
"step": 4400
},
{
"epoch": 2.94,
"learning_rate": 4.6325231481481486e-05,
"loss": 0.7193,
"step": 4450
},
{
"epoch": 2.98,
"learning_rate": 4.6283895502645504e-05,
"loss": 0.6488,
"step": 4500
},
{
"epoch": 3.0,
"eval_cer": 0.33317314623480226,
"eval_loss": 1.2416309118270874,
"eval_mer": 0.36792905081495686,
"eval_runtime": 95.6118,
"eval_samples_per_second": 11.787,
"eval_steps_per_second": 1.475,
"step": 4536
},
{
"epoch": 3.01,
"learning_rate": 4.624255952380952e-05,
"loss": 0.6896,
"step": 4550
},
{
"epoch": 3.04,
"learning_rate": 4.620122354497355e-05,
"loss": 0.5454,
"step": 4600
},
{
"epoch": 3.08,
"learning_rate": 4.6159887566137567e-05,
"loss": 0.5877,
"step": 4650
},
{
"epoch": 3.11,
"learning_rate": 4.611855158730159e-05,
"loss": 0.5971,
"step": 4700
},
{
"epoch": 3.14,
"learning_rate": 4.607721560846561e-05,
"loss": 0.5895,
"step": 4750
},
{
"epoch": 3.17,
"learning_rate": 4.603587962962963e-05,
"loss": 0.5444,
"step": 4800
},
{
"epoch": 3.21,
"learning_rate": 4.5994543650793654e-05,
"loss": 0.5482,
"step": 4850
},
{
"epoch": 3.24,
"learning_rate": 4.595320767195768e-05,
"loss": 0.5129,
"step": 4900
},
{
"epoch": 3.27,
"learning_rate": 4.591187169312169e-05,
"loss": 0.5814,
"step": 4950
},
{
"epoch": 3.31,
"learning_rate": 4.5870535714285716e-05,
"loss": 0.5138,
"step": 5000
},
{
"epoch": 3.34,
"learning_rate": 4.5829199735449735e-05,
"loss": 0.5813,
"step": 5050
},
{
"epoch": 3.37,
"learning_rate": 4.578786375661376e-05,
"loss": 0.5406,
"step": 5100
},
{
"epoch": 3.41,
"learning_rate": 4.574652777777778e-05,
"loss": 0.5675,
"step": 5150
},
{
"epoch": 3.44,
"learning_rate": 4.57051917989418e-05,
"loss": 0.584,
"step": 5200
},
{
"epoch": 3.47,
"learning_rate": 4.566385582010582e-05,
"loss": 0.5844,
"step": 5250
},
{
"epoch": 3.51,
"learning_rate": 4.562251984126985e-05,
"loss": 0.555,
"step": 5300
},
{
"epoch": 3.54,
"learning_rate": 4.558118386243386e-05,
"loss": 0.5929,
"step": 5350
},
{
"epoch": 3.57,
"learning_rate": 4.5539847883597885e-05,
"loss": 0.6313,
"step": 5400
},
{
"epoch": 3.6,
"learning_rate": 4.549851190476191e-05,
"loss": 0.5634,
"step": 5450
},
{
"epoch": 3.64,
"learning_rate": 4.545717592592593e-05,
"loss": 0.5763,
"step": 5500
},
{
"epoch": 3.67,
"learning_rate": 4.541583994708995e-05,
"loss": 0.565,
"step": 5550
},
{
"epoch": 3.7,
"learning_rate": 4.537450396825397e-05,
"loss": 0.4958,
"step": 5600
},
{
"epoch": 3.74,
"learning_rate": 4.533316798941799e-05,
"loss": 0.5628,
"step": 5650
},
{
"epoch": 3.77,
"learning_rate": 4.5291832010582016e-05,
"loss": 0.5922,
"step": 5700
},
{
"epoch": 3.8,
"learning_rate": 4.525049603174603e-05,
"loss": 0.5687,
"step": 5750
},
{
"epoch": 3.84,
"learning_rate": 4.520916005291005e-05,
"loss": 0.5546,
"step": 5800
},
{
"epoch": 3.87,
"learning_rate": 4.516782407407408e-05,
"loss": 0.5814,
"step": 5850
},
{
"epoch": 3.9,
"learning_rate": 4.51264880952381e-05,
"loss": 0.5326,
"step": 5900
},
{
"epoch": 3.94,
"learning_rate": 4.5085152116402116e-05,
"loss": 0.5213,
"step": 5950
},
{
"epoch": 3.97,
"learning_rate": 4.504381613756614e-05,
"loss": 0.5707,
"step": 6000
},
{
"epoch": 4.0,
"eval_cer": 0.3132779085972416,
"eval_loss": 1.2135156393051147,
"eval_mer": 0.3548258229466283,
"eval_runtime": 95.1191,
"eval_samples_per_second": 11.848,
"eval_steps_per_second": 1.482,
"step": 6048
},
{
"epoch": 4.0,
"learning_rate": 4.500330687830688e-05,
"loss": 0.522,
"step": 6050
},
{
"epoch": 4.03,
"learning_rate": 4.49619708994709e-05,
"loss": 0.4544,
"step": 6100
},
{
"epoch": 4.07,
"learning_rate": 4.4920634920634924e-05,
"loss": 0.4676,
"step": 6150
},
{
"epoch": 4.1,
"learning_rate": 4.487929894179894e-05,
"loss": 0.4998,
"step": 6200
},
{
"epoch": 4.13,
"learning_rate": 4.483796296296297e-05,
"loss": 0.4847,
"step": 6250
},
{
"epoch": 4.17,
"learning_rate": 4.479662698412699e-05,
"loss": 0.51,
"step": 6300
},
{
"epoch": 4.2,
"learning_rate": 4.4755291005291005e-05,
"loss": 0.4724,
"step": 6350
},
{
"epoch": 4.23,
"learning_rate": 4.471395502645503e-05,
"loss": 0.5023,
"step": 6400
},
{
"epoch": 4.27,
"learning_rate": 4.467261904761905e-05,
"loss": 0.4922,
"step": 6450
},
{
"epoch": 4.3,
"learning_rate": 4.463128306878307e-05,
"loss": 0.4087,
"step": 6500
},
{
"epoch": 4.33,
"learning_rate": 4.458994708994709e-05,
"loss": 0.4532,
"step": 6550
},
{
"epoch": 4.37,
"learning_rate": 4.454861111111112e-05,
"loss": 0.4796,
"step": 6600
},
{
"epoch": 4.4,
"learning_rate": 4.4507275132275137e-05,
"loss": 0.4327,
"step": 6650
},
{
"epoch": 4.43,
"learning_rate": 4.4465939153439155e-05,
"loss": 0.4628,
"step": 6700
},
{
"epoch": 4.46,
"learning_rate": 4.4424603174603174e-05,
"loss": 0.4486,
"step": 6750
},
{
"epoch": 4.5,
"learning_rate": 4.43832671957672e-05,
"loss": 0.4523,
"step": 6800
},
{
"epoch": 4.53,
"learning_rate": 4.434193121693122e-05,
"loss": 0.4582,
"step": 6850
},
{
"epoch": 4.56,
"learning_rate": 4.4300595238095236e-05,
"loss": 0.4913,
"step": 6900
},
{
"epoch": 4.6,
"learning_rate": 4.425925925925926e-05,
"loss": 0.4474,
"step": 6950
},
{
"epoch": 4.63,
"learning_rate": 4.4217923280423286e-05,
"loss": 0.4896,
"step": 7000
},
{
"epoch": 4.66,
"learning_rate": 4.4176587301587305e-05,
"loss": 0.4632,
"step": 7050
},
{
"epoch": 4.7,
"learning_rate": 4.4135251322751323e-05,
"loss": 0.4396,
"step": 7100
},
{
"epoch": 4.73,
"learning_rate": 4.409391534391535e-05,
"loss": 0.4866,
"step": 7150
},
{
"epoch": 4.76,
"learning_rate": 4.405257936507937e-05,
"loss": 0.404,
"step": 7200
},
{
"epoch": 4.79,
"learning_rate": 4.401124338624339e-05,
"loss": 0.4391,
"step": 7250
},
{
"epoch": 4.83,
"learning_rate": 4.3969907407407404e-05,
"loss": 0.4816,
"step": 7300
},
{
"epoch": 4.86,
"learning_rate": 4.392857142857143e-05,
"loss": 0.4727,
"step": 7350
},
{
"epoch": 4.89,
"learning_rate": 4.3887235449735455e-05,
"loss": 0.4271,
"step": 7400
},
{
"epoch": 4.93,
"learning_rate": 4.3846726190476194e-05,
"loss": 0.5031,
"step": 7450
},
{
"epoch": 4.96,
"learning_rate": 4.380539021164021e-05,
"loss": 0.4445,
"step": 7500
},
{
"epoch": 4.99,
"learning_rate": 4.376488095238095e-05,
"loss": 0.4489,
"step": 7550
},
{
"epoch": 5.0,
"eval_cer": 0.29967802393195253,
"eval_loss": 1.1896605491638184,
"eval_mer": 0.33940556088207097,
"eval_runtime": 95.1963,
"eval_samples_per_second": 11.839,
"eval_steps_per_second": 1.481,
"step": 7560
},
{
"epoch": 5.03,
"learning_rate": 4.372354497354498e-05,
"loss": 0.4105,
"step": 7600
},
{
"epoch": 5.06,
"learning_rate": 4.3682208994708996e-05,
"loss": 0.397,
"step": 7650
},
{
"epoch": 5.09,
"learning_rate": 4.364087301587302e-05,
"loss": 0.351,
"step": 7700
},
{
"epoch": 5.13,
"learning_rate": 4.359953703703704e-05,
"loss": 0.3586,
"step": 7750
},
{
"epoch": 5.16,
"learning_rate": 4.355820105820106e-05,
"loss": 0.357,
"step": 7800
},
{
"epoch": 5.19,
"learning_rate": 4.3516865079365084e-05,
"loss": 0.3978,
"step": 7850
},
{
"epoch": 5.22,
"learning_rate": 4.34755291005291e-05,
"loss": 0.3838,
"step": 7900
},
{
"epoch": 5.26,
"learning_rate": 4.343419312169312e-05,
"loss": 0.4029,
"step": 7950
},
{
"epoch": 5.29,
"learning_rate": 4.3392857142857146e-05,
"loss": 0.4344,
"step": 8000
},
{
"epoch": 5.32,
"learning_rate": 4.3351521164021165e-05,
"loss": 0.3906,
"step": 8050
},
{
"epoch": 5.36,
"learning_rate": 4.331018518518519e-05,
"loss": 0.3507,
"step": 8100
},
{
"epoch": 5.39,
"learning_rate": 4.326884920634921e-05,
"loss": 0.4054,
"step": 8150
},
{
"epoch": 5.42,
"learning_rate": 4.322751322751323e-05,
"loss": 0.3825,
"step": 8200
},
{
"epoch": 5.46,
"learning_rate": 4.318617724867725e-05,
"loss": 0.4181,
"step": 8250
},
{
"epoch": 5.49,
"learning_rate": 4.314484126984127e-05,
"loss": 0.3644,
"step": 8300
},
{
"epoch": 5.52,
"learning_rate": 4.310350529100529e-05,
"loss": 0.3331,
"step": 8350
},
{
"epoch": 5.56,
"learning_rate": 4.3062169312169315e-05,
"loss": 0.3559,
"step": 8400
},
{
"epoch": 5.59,
"learning_rate": 4.302083333333334e-05,
"loss": 0.4191,
"step": 8450
},
{
"epoch": 5.62,
"learning_rate": 4.297949735449736e-05,
"loss": 0.3387,
"step": 8500
},
{
"epoch": 5.65,
"learning_rate": 4.293816137566138e-05,
"loss": 0.3725,
"step": 8550
},
{
"epoch": 5.69,
"learning_rate": 4.2896825396825396e-05,
"loss": 0.429,
"step": 8600
},
{
"epoch": 5.72,
"learning_rate": 4.285548941798942e-05,
"loss": 0.4053,
"step": 8650
},
{
"epoch": 5.75,
"learning_rate": 4.281415343915344e-05,
"loss": 0.3546,
"step": 8700
},
{
"epoch": 5.79,
"learning_rate": 4.277281746031746e-05,
"loss": 0.3662,
"step": 8750
},
{
"epoch": 5.82,
"learning_rate": 4.273148148148148e-05,
"loss": 0.3845,
"step": 8800
},
{
"epoch": 5.85,
"learning_rate": 4.269014550264551e-05,
"loss": 0.3852,
"step": 8850
},
{
"epoch": 5.89,
"learning_rate": 4.264880952380953e-05,
"loss": 0.3892,
"step": 8900
},
{
"epoch": 5.92,
"learning_rate": 4.2607473544973545e-05,
"loss": 0.4143,
"step": 8950
},
{
"epoch": 5.95,
"learning_rate": 4.256613756613757e-05,
"loss": 0.4316,
"step": 9000
},
{
"epoch": 5.99,
"learning_rate": 4.252480158730159e-05,
"loss": 0.3446,
"step": 9050
},
{
"epoch": 6.0,
"eval_cer": 0.28814455283771445,
"eval_loss": 1.2278224229812622,
"eval_mer": 0.3327740492170022,
"eval_runtime": 95.4916,
"eval_samples_per_second": 11.802,
"eval_steps_per_second": 1.477,
"step": 9072
},
{
"epoch": 6.02,
"learning_rate": 4.2483465608465615e-05,
"loss": 0.3236,
"step": 9100
},
{
"epoch": 6.05,
"learning_rate": 4.2442129629629626e-05,
"loss": 0.3461,
"step": 9150
},
{
"epoch": 6.08,
"learning_rate": 4.240079365079365e-05,
"loss": 0.2918,
"step": 9200
},
{
"epoch": 6.12,
"learning_rate": 4.235945767195768e-05,
"loss": 0.33,
"step": 9250
},
{
"epoch": 6.15,
"learning_rate": 4.2318121693121695e-05,
"loss": 0.3389,
"step": 9300
},
{
"epoch": 6.18,
"learning_rate": 4.2276785714285714e-05,
"loss": 0.2903,
"step": 9350
},
{
"epoch": 6.22,
"learning_rate": 4.223544973544974e-05,
"loss": 0.3372,
"step": 9400
},
{
"epoch": 6.25,
"learning_rate": 4.219411375661376e-05,
"loss": 0.3382,
"step": 9450
},
{
"epoch": 6.28,
"learning_rate": 4.215277777777778e-05,
"loss": 0.311,
"step": 9500
},
{
"epoch": 6.32,
"learning_rate": 4.21114417989418e-05,
"loss": 0.3391,
"step": 9550
},
{
"epoch": 6.35,
"learning_rate": 4.207010582010582e-05,
"loss": 0.3012,
"step": 9600
},
{
"epoch": 6.38,
"learning_rate": 4.2028769841269845e-05,
"loss": 0.2694,
"step": 9650
},
{
"epoch": 6.42,
"learning_rate": 4.1987433862433864e-05,
"loss": 0.3103,
"step": 9700
},
{
"epoch": 6.45,
"learning_rate": 4.194609788359788e-05,
"loss": 0.2985,
"step": 9750
},
{
"epoch": 6.48,
"learning_rate": 4.190476190476191e-05,
"loss": 0.3454,
"step": 9800
},
{
"epoch": 6.51,
"learning_rate": 4.1863425925925926e-05,
"loss": 0.3204,
"step": 9850
},
{
"epoch": 6.55,
"learning_rate": 4.182208994708995e-05,
"loss": 0.3376,
"step": 9900
},
{
"epoch": 6.58,
"learning_rate": 4.178075396825397e-05,
"loss": 0.3401,
"step": 9950
},
{
"epoch": 6.61,
"learning_rate": 4.173941798941799e-05,
"loss": 0.3583,
"step": 10000
},
{
"epoch": 6.65,
"learning_rate": 4.1698082010582014e-05,
"loss": 0.3418,
"step": 10050
},
{
"epoch": 6.68,
"learning_rate": 4.165674603174604e-05,
"loss": 0.3342,
"step": 10100
},
{
"epoch": 6.71,
"learning_rate": 4.161541005291005e-05,
"loss": 0.2981,
"step": 10150
},
{
"epoch": 6.75,
"learning_rate": 4.1574074074074076e-05,
"loss": 0.3674,
"step": 10200
},
{
"epoch": 6.78,
"learning_rate": 4.15327380952381e-05,
"loss": 0.2771,
"step": 10250
},
{
"epoch": 6.81,
"learning_rate": 4.149140211640212e-05,
"loss": 0.2884,
"step": 10300
},
{
"epoch": 6.85,
"learning_rate": 4.145006613756614e-05,
"loss": 0.3426,
"step": 10350
},
{
"epoch": 6.88,
"learning_rate": 4.1408730158730164e-05,
"loss": 0.3942,
"step": 10400
},
{
"epoch": 6.91,
"learning_rate": 4.136739417989418e-05,
"loss": 0.3126,
"step": 10450
},
{
"epoch": 6.94,
"learning_rate": 4.132605820105821e-05,
"loss": 0.3148,
"step": 10500
},
{
"epoch": 6.98,
"learning_rate": 4.128472222222222e-05,
"loss": 0.3123,
"step": 10550
},
{
"epoch": 7.0,
"eval_cer": 0.27867749531452735,
"eval_loss": 1.1994494199752808,
"eval_mer": 0.32790028763183127,
"eval_runtime": 95.3614,
"eval_samples_per_second": 11.818,
"eval_steps_per_second": 1.479,
"step": 10584
},
{
"epoch": 7.01,
"learning_rate": 4.1243386243386244e-05,
"loss": 0.2994,
"step": 10600
},
{
"epoch": 7.04,
"learning_rate": 4.120205026455027e-05,
"loss": 0.2945,
"step": 10650
},
{
"epoch": 7.08,
"learning_rate": 4.116071428571429e-05,
"loss": 0.2497,
"step": 10700
},
{
"epoch": 7.11,
"learning_rate": 4.111937830687831e-05,
"loss": 0.2542,
"step": 10750
},
{
"epoch": 7.14,
"learning_rate": 4.107804232804233e-05,
"loss": 0.2905,
"step": 10800
},
{
"epoch": 7.18,
"learning_rate": 4.103670634920635e-05,
"loss": 0.2664,
"step": 10850
},
{
"epoch": 7.21,
"learning_rate": 4.0995370370370376e-05,
"loss": 0.2808,
"step": 10900
},
{
"epoch": 7.24,
"learning_rate": 4.0954034391534394e-05,
"loss": 0.2606,
"step": 10950
},
{
"epoch": 7.28,
"learning_rate": 4.091269841269841e-05,
"loss": 0.2613,
"step": 11000
},
{
"epoch": 7.31,
"learning_rate": 4.087136243386244e-05,
"loss": 0.2391,
"step": 11050
},
{
"epoch": 7.34,
"learning_rate": 4.0830026455026457e-05,
"loss": 0.262,
"step": 11100
},
{
"epoch": 7.37,
"learning_rate": 4.0788690476190475e-05,
"loss": 0.2515,
"step": 11150
},
{
"epoch": 7.41,
"learning_rate": 4.07473544973545e-05,
"loss": 0.2881,
"step": 11200
},
{
"epoch": 7.44,
"learning_rate": 4.070601851851852e-05,
"loss": 0.2491,
"step": 11250
},
{
"epoch": 7.47,
"learning_rate": 4.0664682539682544e-05,
"loss": 0.314,
"step": 11300
},
{
"epoch": 7.51,
"learning_rate": 4.062334656084656e-05,
"loss": 0.2802,
"step": 11350
},
{
"epoch": 7.54,
"learning_rate": 4.058201058201058e-05,
"loss": 0.3154,
"step": 11400
},
{
"epoch": 7.57,
"learning_rate": 4.0540674603174606e-05,
"loss": 0.2618,
"step": 11450
},
{
"epoch": 7.61,
"learning_rate": 4.049933862433863e-05,
"loss": 0.3068,
"step": 11500
},
{
"epoch": 7.64,
"learning_rate": 4.0458002645502643e-05,
"loss": 0.3046,
"step": 11550
},
{
"epoch": 7.67,
"learning_rate": 4.041666666666667e-05,
"loss": 0.2958,
"step": 11600
},
{
"epoch": 7.71,
"learning_rate": 4.0375330687830694e-05,
"loss": 0.2863,
"step": 11650
},
{
"epoch": 7.74,
"learning_rate": 4.033399470899471e-05,
"loss": 0.2811,
"step": 11700
},
{
"epoch": 7.77,
"learning_rate": 4.029265873015873e-05,
"loss": 0.295,
"step": 11750
},
{
"epoch": 7.8,
"learning_rate": 4.025132275132275e-05,
"loss": 0.2951,
"step": 11800
},
{
"epoch": 7.84,
"learning_rate": 4.0209986772486775e-05,
"loss": 0.3515,
"step": 11850
},
{
"epoch": 7.87,
"learning_rate": 4.01686507936508e-05,
"loss": 0.2698,
"step": 11900
},
{
"epoch": 7.9,
"learning_rate": 4.012731481481481e-05,
"loss": 0.2944,
"step": 11950
},
{
"epoch": 7.94,
"learning_rate": 4.008597883597884e-05,
"loss": 0.2623,
"step": 12000
},
{
"epoch": 7.97,
"learning_rate": 4.004464285714286e-05,
"loss": 0.2652,
"step": 12050
},
{
"epoch": 8.0,
"eval_cer": 0.27622663270700176,
"eval_loss": 1.283096432685852,
"eval_mer": 0.32941834451901564,
"eval_runtime": 95.1411,
"eval_samples_per_second": 11.846,
"eval_steps_per_second": 1.482,
"step": 12096
},
{
"epoch": 8.0,
"learning_rate": 4.000330687830688e-05,
"loss": 0.271,
"step": 12100
},
{
"epoch": 8.04,
"learning_rate": 3.99619708994709e-05,
"loss": 0.2162,
"step": 12150
},
{
"epoch": 8.07,
"learning_rate": 3.9920634920634925e-05,
"loss": 0.2076,
"step": 12200
},
{
"epoch": 8.1,
"learning_rate": 3.9880125661375664e-05,
"loss": 0.2134,
"step": 12250
},
{
"epoch": 8.13,
"learning_rate": 3.983878968253968e-05,
"loss": 0.195,
"step": 12300
},
{
"epoch": 8.17,
"learning_rate": 3.979745370370371e-05,
"loss": 0.2551,
"step": 12350
},
{
"epoch": 8.2,
"learning_rate": 3.975611772486773e-05,
"loss": 0.2689,
"step": 12400
},
{
"epoch": 8.23,
"learning_rate": 3.9714781746031745e-05,
"loss": 0.2537,
"step": 12450
},
{
"epoch": 8.27,
"learning_rate": 3.967344576719577e-05,
"loss": 0.2378,
"step": 12500
},
{
"epoch": 8.3,
"learning_rate": 3.963210978835979e-05,
"loss": 0.2394,
"step": 12550
},
{
"epoch": 8.33,
"learning_rate": 3.9590773809523814e-05,
"loss": 0.2524,
"step": 12600
},
{
"epoch": 8.37,
"learning_rate": 3.954943783068783e-05,
"loss": 0.239,
"step": 12650
},
{
"epoch": 8.4,
"learning_rate": 3.950810185185185e-05,
"loss": 0.2676,
"step": 12700
},
{
"epoch": 8.43,
"learning_rate": 3.9466765873015877e-05,
"loss": 0.2486,
"step": 12750
},
{
"epoch": 8.47,
"learning_rate": 3.9426256613756616e-05,
"loss": 0.2128,
"step": 12800
},
{
"epoch": 8.5,
"learning_rate": 3.9384920634920635e-05,
"loss": 0.2372,
"step": 12850
},
{
"epoch": 8.53,
"learning_rate": 3.934358465608466e-05,
"loss": 0.2173,
"step": 12900
},
{
"epoch": 8.56,
"learning_rate": 3.930224867724868e-05,
"loss": 0.2739,
"step": 12950
},
{
"epoch": 8.6,
"learning_rate": 3.92609126984127e-05,
"loss": 0.1962,
"step": 13000
},
{
"epoch": 8.63,
"learning_rate": 3.921957671957672e-05,
"loss": 0.2531,
"step": 13050
},
{
"epoch": 8.66,
"learning_rate": 3.917824074074074e-05,
"loss": 0.2906,
"step": 13100
},
{
"epoch": 8.7,
"learning_rate": 3.9136904761904766e-05,
"loss": 0.2504,
"step": 13150
},
{
"epoch": 8.73,
"learning_rate": 3.9095568783068785e-05,
"loss": 0.2303,
"step": 13200
},
{
"epoch": 8.76,
"learning_rate": 3.90542328042328e-05,
"loss": 0.2311,
"step": 13250
},
{
"epoch": 8.8,
"learning_rate": 3.901289682539683e-05,
"loss": 0.2085,
"step": 13300
},
{
"epoch": 8.83,
"learning_rate": 3.8971560846560854e-05,
"loss": 0.2324,
"step": 13350
},
{
"epoch": 8.86,
"learning_rate": 3.8930224867724865e-05,
"loss": 0.2308,
"step": 13400
},
{
"epoch": 8.9,
"learning_rate": 3.888888888888889e-05,
"loss": 0.2319,
"step": 13450
},
{
"epoch": 8.93,
"learning_rate": 3.8847552910052916e-05,
"loss": 0.2344,
"step": 13500
},
{
"epoch": 8.96,
"learning_rate": 3.8806216931216935e-05,
"loss": 0.2414,
"step": 13550
},
{
"epoch": 8.99,
"learning_rate": 3.876488095238095e-05,
"loss": 0.2293,
"step": 13600
},
{
"epoch": 9.0,
"eval_cer": 0.26805709068191647,
"eval_loss": 1.3081992864608765,
"eval_mer": 0.3170341962288271,
"eval_runtime": 95.4776,
"eval_samples_per_second": 11.804,
"eval_steps_per_second": 1.477,
"step": 13608
},
{
"epoch": 9.03,
"learning_rate": 3.872354497354497e-05,
"loss": 0.2161,
"step": 13650
},
{
"epoch": 9.06,
"learning_rate": 3.8682208994709e-05,
"loss": 0.2318,
"step": 13700
},
{
"epoch": 9.09,
"learning_rate": 3.864087301587302e-05,
"loss": 0.1939,
"step": 13750
},
{
"epoch": 9.13,
"learning_rate": 3.8599537037037034e-05,
"loss": 0.1953,
"step": 13800
},
{
"epoch": 9.16,
"learning_rate": 3.855820105820106e-05,
"loss": 0.2249,
"step": 13850
},
{
"epoch": 9.19,
"learning_rate": 3.8516865079365084e-05,
"loss": 0.2156,
"step": 13900
},
{
"epoch": 9.23,
"learning_rate": 3.84755291005291e-05,
"loss": 0.1648,
"step": 13950
},
{
"epoch": 9.26,
"learning_rate": 3.843419312169312e-05,
"loss": 0.214,
"step": 14000
},
{
"epoch": 9.29,
"learning_rate": 3.839285714285715e-05,
"loss": 0.1713,
"step": 14050
},
{
"epoch": 9.33,
"learning_rate": 3.8351521164021165e-05,
"loss": 0.1595,
"step": 14100
},
{
"epoch": 9.36,
"learning_rate": 3.831018518518519e-05,
"loss": 0.2019,
"step": 14150
},
{
"epoch": 9.39,
"learning_rate": 3.82688492063492e-05,
"loss": 0.184,
"step": 14200
},
{
"epoch": 9.42,
"learning_rate": 3.822751322751323e-05,
"loss": 0.1627,
"step": 14250
},
{
"epoch": 9.46,
"learning_rate": 3.818617724867725e-05,
"loss": 0.1824,
"step": 14300
},
{
"epoch": 9.49,
"learning_rate": 3.814484126984127e-05,
"loss": 0.1818,
"step": 14350
},
{
"epoch": 9.52,
"learning_rate": 3.810350529100529e-05,
"loss": 0.1956,
"step": 14400
},
{
"epoch": 9.56,
"learning_rate": 3.8062169312169315e-05,
"loss": 0.1918,
"step": 14450
},
{
"epoch": 9.59,
"learning_rate": 3.8020833333333334e-05,
"loss": 0.2195,
"step": 14500
},
{
"epoch": 9.62,
"learning_rate": 3.797949735449736e-05,
"loss": 0.2231,
"step": 14550
},
{
"epoch": 9.66,
"learning_rate": 3.793816137566138e-05,
"loss": 0.237,
"step": 14600
},
{
"epoch": 9.69,
"learning_rate": 3.7896825396825396e-05,
"loss": 0.1696,
"step": 14650
},
{
"epoch": 9.72,
"learning_rate": 3.785548941798942e-05,
"loss": 0.2176,
"step": 14700
},
{
"epoch": 9.76,
"learning_rate": 3.7814153439153447e-05,
"loss": 0.218,
"step": 14750
},
{
"epoch": 9.79,
"learning_rate": 3.777281746031746e-05,
"loss": 0.1682,
"step": 14800
},
{
"epoch": 9.82,
"learning_rate": 3.7731481481481484e-05,
"loss": 0.2033,
"step": 14850
},
{
"epoch": 9.85,
"learning_rate": 3.76901455026455e-05,
"loss": 0.1718,
"step": 14900
},
{
"epoch": 9.89,
"learning_rate": 3.764880952380953e-05,
"loss": 0.2017,
"step": 14950
},
{
"epoch": 9.92,
"learning_rate": 3.7607473544973546e-05,
"loss": 0.2159,
"step": 15000
},
{
"epoch": 9.95,
"learning_rate": 3.7566137566137564e-05,
"loss": 0.2239,
"step": 15050
},
{
"epoch": 9.99,
"learning_rate": 3.752480158730159e-05,
"loss": 0.193,
"step": 15100
},
{
"epoch": 10.0,
"eval_cer": 0.2699312797347302,
"eval_loss": 1.3139551877975464,
"eval_mer": 0.321348673697667,
"eval_runtime": 95.5158,
"eval_samples_per_second": 11.799,
"eval_steps_per_second": 1.476,
"step": 15120
},
{
"epoch": 10.02,
"learning_rate": 3.7483465608465615e-05,
"loss": 0.2884,
"step": 15150
},
{
"epoch": 10.05,
"learning_rate": 3.744212962962963e-05,
"loss": 0.1595,
"step": 15200
},
{
"epoch": 10.09,
"learning_rate": 3.740079365079365e-05,
"loss": 0.1742,
"step": 15250
},
{
"epoch": 10.12,
"learning_rate": 3.735945767195768e-05,
"loss": 0.1798,
"step": 15300
},
{
"epoch": 10.15,
"learning_rate": 3.7318121693121696e-05,
"loss": 0.1903,
"step": 15350
},
{
"epoch": 10.19,
"learning_rate": 3.7276785714285714e-05,
"loss": 0.1614,
"step": 15400
},
{
"epoch": 10.22,
"learning_rate": 3.723544973544973e-05,
"loss": 0.1825,
"step": 15450
},
{
"epoch": 10.25,
"learning_rate": 3.719411375661376e-05,
"loss": 0.1698,
"step": 15500
},
{
"epoch": 10.28,
"learning_rate": 3.715277777777778e-05,
"loss": 0.1904,
"step": 15550
},
{
"epoch": 10.32,
"learning_rate": 3.7111441798941795e-05,
"loss": 0.1538,
"step": 15600
},
{
"epoch": 10.35,
"learning_rate": 3.707010582010582e-05,
"loss": 0.174,
"step": 15650
},
{
"epoch": 10.38,
"learning_rate": 3.7028769841269846e-05,
"loss": 0.1665,
"step": 15700
},
{
"epoch": 10.42,
"learning_rate": 3.6987433862433864e-05,
"loss": 0.131,
"step": 15750
},
{
"epoch": 10.45,
"learning_rate": 3.694609788359788e-05,
"loss": 0.159,
"step": 15800
},
{
"epoch": 10.48,
"learning_rate": 3.690476190476191e-05,
"loss": 0.1944,
"step": 15850
},
{
"epoch": 10.52,
"learning_rate": 3.6863425925925926e-05,
"loss": 0.1465,
"step": 15900
},
{
"epoch": 10.55,
"learning_rate": 3.682208994708995e-05,
"loss": 0.1805,
"step": 15950
},
{
"epoch": 10.58,
"learning_rate": 3.678075396825397e-05,
"loss": 0.1621,
"step": 16000
},
{
"epoch": 10.62,
"learning_rate": 3.673941798941799e-05,
"loss": 0.1634,
"step": 16050
},
{
"epoch": 10.65,
"learning_rate": 3.6698082010582014e-05,
"loss": 0.1662,
"step": 16100
},
{
"epoch": 10.68,
"learning_rate": 3.665674603174603e-05,
"loss": 0.1637,
"step": 16150
},
{
"epoch": 10.71,
"learning_rate": 3.661541005291005e-05,
"loss": 0.1821,
"step": 16200
},
{
"epoch": 10.75,
"learning_rate": 3.6574074074074076e-05,
"loss": 0.1786,
"step": 16250
},
{
"epoch": 10.78,
"learning_rate": 3.6532738095238095e-05,
"loss": 0.2009,
"step": 16300
},
{
"epoch": 10.81,
"learning_rate": 3.649140211640212e-05,
"loss": 0.1684,
"step": 16350
},
{
"epoch": 10.85,
"learning_rate": 3.645006613756614e-05,
"loss": 0.1789,
"step": 16400
},
{
"epoch": 10.88,
"learning_rate": 3.640873015873016e-05,
"loss": 0.1689,
"step": 16450
},
{
"epoch": 10.91,
"learning_rate": 3.636739417989418e-05,
"loss": 0.186,
"step": 16500
},
{
"epoch": 10.95,
"learning_rate": 3.632605820105821e-05,
"loss": 0.1654,
"step": 16550
},
{
"epoch": 10.98,
"learning_rate": 3.628472222222222e-05,
"loss": 0.1764,
"step": 16600
},
{
"epoch": 11.0,
"eval_cer": 0.2717093565284252,
"eval_loss": 1.4910989999771118,
"eval_mer": 0.317193991690636,
"eval_runtime": 95.4079,
"eval_samples_per_second": 11.812,
"eval_steps_per_second": 1.478,
"step": 16632
},
{
"epoch": 11.01,
"learning_rate": 3.6243386243386245e-05,
"loss": 0.1815,
"step": 16650
},
{
"epoch": 11.04,
"learning_rate": 3.620205026455027e-05,
"loss": 0.1643,
"step": 16700
},
{
"epoch": 11.08,
"learning_rate": 3.616071428571429e-05,
"loss": 0.1514,
"step": 16750
},
{
"epoch": 11.11,
"learning_rate": 3.611937830687831e-05,
"loss": 0.1271,
"step": 16800
},
{
"epoch": 11.14,
"learning_rate": 3.6078042328042326e-05,
"loss": 0.1278,
"step": 16850
},
{
"epoch": 11.18,
"learning_rate": 3.603670634920635e-05,
"loss": 0.1299,
"step": 16900
},
{
"epoch": 11.21,
"learning_rate": 3.5995370370370376e-05,
"loss": 0.1328,
"step": 16950
},
{
"epoch": 11.24,
"learning_rate": 3.5954034391534395e-05,
"loss": 0.1526,
"step": 17000
},
{
"epoch": 11.28,
"learning_rate": 3.591269841269841e-05,
"loss": 0.1405,
"step": 17050
},
{
"epoch": 11.31,
"learning_rate": 3.587136243386244e-05,
"loss": 0.1658,
"step": 17100
},
{
"epoch": 11.34,
"learning_rate": 3.583002645502646e-05,
"loss": 0.1468,
"step": 17150
},
{
"epoch": 11.38,
"learning_rate": 3.5788690476190476e-05,
"loss": 0.1491,
"step": 17200
},
{
"epoch": 11.41,
"learning_rate": 3.57473544973545e-05,
"loss": 0.1357,
"step": 17250
},
{
"epoch": 11.44,
"learning_rate": 3.570601851851852e-05,
"loss": 0.1315,
"step": 17300
},
{
"epoch": 11.47,
"learning_rate": 3.5664682539682545e-05,
"loss": 0.142,
"step": 17350
},
{
"epoch": 11.51,
"learning_rate": 3.562334656084656e-05,
"loss": 0.1875,
"step": 17400
},
{
"epoch": 11.54,
"learning_rate": 3.558201058201058e-05,
"loss": 0.1791,
"step": 17450
},
{
"epoch": 11.57,
"learning_rate": 3.554067460317461e-05,
"loss": 0.1419,
"step": 17500
},
{
"epoch": 11.61,
"learning_rate": 3.5499338624338625e-05,
"loss": 0.164,
"step": 17550
},
{
"epoch": 11.64,
"learning_rate": 3.5458002645502644e-05,
"loss": 0.145,
"step": 17600
},
{
"epoch": 11.67,
"learning_rate": 3.541666666666667e-05,
"loss": 0.1569,
"step": 17650
},
{
"epoch": 11.71,
"learning_rate": 3.537533068783069e-05,
"loss": 0.1391,
"step": 17700
},
{
"epoch": 11.74,
"learning_rate": 3.533399470899471e-05,
"loss": 0.1433,
"step": 17750
},
{
"epoch": 11.77,
"learning_rate": 3.529265873015873e-05,
"loss": 0.1513,
"step": 17800
},
{
"epoch": 11.81,
"learning_rate": 3.525132275132275e-05,
"loss": 0.155,
"step": 17850
},
{
"epoch": 11.84,
"learning_rate": 3.5209986772486775e-05,
"loss": 0.1508,
"step": 17900
},
{
"epoch": 11.87,
"learning_rate": 3.51686507936508e-05,
"loss": 0.1335,
"step": 17950
},
{
"epoch": 11.9,
"learning_rate": 3.512731481481481e-05,
"loss": 0.1423,
"step": 18000
},
{
"epoch": 11.94,
"learning_rate": 3.508597883597884e-05,
"loss": 0.1639,
"step": 18050
},
{
"epoch": 11.97,
"learning_rate": 3.5044642857142856e-05,
"loss": 0.1589,
"step": 18100
},
{
"epoch": 12.0,
"eval_cer": 0.26536594742659425,
"eval_loss": 1.4262381792068481,
"eval_mer": 0.3113614573346117,
"eval_runtime": 95.2065,
"eval_samples_per_second": 11.837,
"eval_steps_per_second": 1.481,
"step": 18144
},
{
"epoch": 12.0,
"learning_rate": 3.500330687830688e-05,
"loss": 0.1555,
"step": 18150
},
{
"epoch": 12.04,
"learning_rate": 3.49619708994709e-05,
"loss": 0.1179,
"step": 18200
},
{
"epoch": 12.07,
"learning_rate": 3.492063492063492e-05,
"loss": 0.1147,
"step": 18250
},
{
"epoch": 12.1,
"learning_rate": 3.4879298941798944e-05,
"loss": 0.1089,
"step": 18300
},
{
"epoch": 12.14,
"learning_rate": 3.483796296296297e-05,
"loss": 0.1184,
"step": 18350
},
{
"epoch": 12.17,
"learning_rate": 3.479662698412699e-05,
"loss": 0.1376,
"step": 18400
},
{
"epoch": 12.2,
"learning_rate": 3.4755291005291006e-05,
"loss": 0.1161,
"step": 18450
},
{
"epoch": 12.24,
"learning_rate": 3.471395502645503e-05,
"loss": 0.1675,
"step": 18500
},
{
"epoch": 12.27,
"learning_rate": 3.467261904761905e-05,
"loss": 0.1258,
"step": 18550
},
{
"epoch": 12.3,
"learning_rate": 3.463128306878307e-05,
"loss": 0.1666,
"step": 18600
},
{
"epoch": 12.33,
"learning_rate": 3.458994708994709e-05,
"loss": 0.1135,
"step": 18650
},
{
"epoch": 12.37,
"learning_rate": 3.454861111111111e-05,
"loss": 0.1168,
"step": 18700
},
{
"epoch": 12.4,
"learning_rate": 3.450727513227514e-05,
"loss": 0.1185,
"step": 18750
},
{
"epoch": 12.43,
"learning_rate": 3.4465939153439156e-05,
"loss": 0.1419,
"step": 18800
},
{
"epoch": 12.47,
"learning_rate": 3.4424603174603174e-05,
"loss": 0.1187,
"step": 18850
},
{
"epoch": 12.5,
"learning_rate": 3.43832671957672e-05,
"loss": 0.119,
"step": 18900
},
{
"epoch": 12.53,
"learning_rate": 3.434193121693122e-05,
"loss": 0.1236,
"step": 18950
},
{
"epoch": 12.57,
"learning_rate": 3.430059523809524e-05,
"loss": 0.1439,
"step": 19000
},
{
"epoch": 12.6,
"learning_rate": 3.425925925925926e-05,
"loss": 0.1223,
"step": 19050
},
{
"epoch": 12.63,
"learning_rate": 3.421792328042328e-05,
"loss": 0.1216,
"step": 19100
},
{
"epoch": 12.67,
"learning_rate": 3.4176587301587306e-05,
"loss": 0.1318,
"step": 19150
},
{
"epoch": 12.7,
"learning_rate": 3.4135251322751324e-05,
"loss": 0.1241,
"step": 19200
},
{
"epoch": 12.73,
"learning_rate": 3.409391534391534e-05,
"loss": 0.1333,
"step": 19250
},
{
"epoch": 12.76,
"learning_rate": 3.405257936507937e-05,
"loss": 0.1597,
"step": 19300
},
{
"epoch": 12.8,
"learning_rate": 3.401124338624339e-05,
"loss": 0.1408,
"step": 19350
},
{
"epoch": 12.83,
"learning_rate": 3.396990740740741e-05,
"loss": 0.1329,
"step": 19400
},
{
"epoch": 12.86,
"learning_rate": 3.392857142857143e-05,
"loss": 0.1635,
"step": 19450
},
{
"epoch": 12.9,
"learning_rate": 3.388723544973545e-05,
"loss": 0.1078,
"step": 19500
},
{
"epoch": 12.93,
"learning_rate": 3.3845899470899474e-05,
"loss": 0.1148,
"step": 19550
},
{
"epoch": 12.96,
"learning_rate": 3.380456349206349e-05,
"loss": 0.1319,
"step": 19600
},
{
"epoch": 13.0,
"learning_rate": 3.376322751322751e-05,
"loss": 0.1383,
"step": 19650
},
{
"epoch": 13.0,
"eval_cer": 0.26767264164544186,
"eval_loss": 1.3802485466003418,
"eval_mer": 0.31559603707254713,
"eval_runtime": 95.5715,
"eval_samples_per_second": 11.792,
"eval_steps_per_second": 1.475,
"step": 19656
},
{
"epoch": 13.03,
"learning_rate": 3.3721891534391537e-05,
"loss": 0.1022,
"step": 19700
},
{
"epoch": 13.06,
"learning_rate": 3.368055555555556e-05,
"loss": 0.1163,
"step": 19750
},
{
"epoch": 13.1,
"learning_rate": 3.363921957671958e-05,
"loss": 0.1356,
"step": 19800
},
{
"epoch": 13.13,
"learning_rate": 3.35978835978836e-05,
"loss": 0.1295,
"step": 19850
},
{
"epoch": 13.16,
"learning_rate": 3.355654761904762e-05,
"loss": 0.101,
"step": 19900
},
{
"epoch": 13.19,
"learning_rate": 3.351521164021164e-05,
"loss": 0.0865,
"step": 19950
},
{
"epoch": 13.23,
"learning_rate": 3.347387566137566e-05,
"loss": 0.1259,
"step": 20000
},
{
"epoch": 13.26,
"learning_rate": 3.343336640211641e-05,
"loss": 0.1116,
"step": 20050
},
{
"epoch": 13.29,
"learning_rate": 3.3392030423280426e-05,
"loss": 0.1036,
"step": 20100
},
{
"epoch": 13.33,
"learning_rate": 3.3350694444444445e-05,
"loss": 0.119,
"step": 20150
},
{
"epoch": 13.36,
"learning_rate": 3.330935846560846e-05,
"loss": 0.0993,
"step": 20200
},
{
"epoch": 13.39,
"learning_rate": 3.326802248677249e-05,
"loss": 0.0969,
"step": 20250
},
{
"epoch": 13.43,
"learning_rate": 3.3226686507936514e-05,
"loss": 0.1363,
"step": 20300
},
{
"epoch": 13.46,
"learning_rate": 3.3185350529100525e-05,
"loss": 0.102,
"step": 20350
},
{
"epoch": 13.49,
"learning_rate": 3.314401455026455e-05,
"loss": 0.1003,
"step": 20400
},
{
"epoch": 13.53,
"learning_rate": 3.3102678571428576e-05,
"loss": 0.1085,
"step": 20450
},
{
"epoch": 13.56,
"learning_rate": 3.3061342592592594e-05,
"loss": 0.097,
"step": 20500
},
{
"epoch": 13.59,
"learning_rate": 3.302000661375661e-05,
"loss": 0.1041,
"step": 20550
},
{
"epoch": 13.62,
"learning_rate": 3.297867063492064e-05,
"loss": 0.1397,
"step": 20600
},
{
"epoch": 13.66,
"learning_rate": 3.293733465608466e-05,
"loss": 0.1392,
"step": 20650
},
{
"epoch": 13.69,
"learning_rate": 3.289599867724868e-05,
"loss": 0.1033,
"step": 20700
},
{
"epoch": 13.72,
"learning_rate": 3.28546626984127e-05,
"loss": 0.1033,
"step": 20750
},
{
"epoch": 13.76,
"learning_rate": 3.281332671957672e-05,
"loss": 0.1166,
"step": 20800
},
{
"epoch": 13.79,
"learning_rate": 3.2771990740740744e-05,
"loss": 0.1026,
"step": 20850
},
{
"epoch": 13.82,
"learning_rate": 3.273065476190476e-05,
"loss": 0.1188,
"step": 20900
},
{
"epoch": 13.86,
"learning_rate": 3.268931878306878e-05,
"loss": 0.1172,
"step": 20950
},
{
"epoch": 13.89,
"learning_rate": 3.264798280423281e-05,
"loss": 0.1047,
"step": 21000
},
{
"epoch": 13.92,
"learning_rate": 3.2606646825396825e-05,
"loss": 0.1297,
"step": 21050
},
{
"epoch": 13.96,
"learning_rate": 3.256531084656085e-05,
"loss": 0.1389,
"step": 21100
},
{
"epoch": 13.99,
"learning_rate": 3.252397486772487e-05,
"loss": 0.0984,
"step": 21150
},
{
"epoch": 14.0,
"eval_cer": 0.2665673506655774,
"eval_loss": 1.548509955406189,
"eval_mer": 0.3131991051454139,
"eval_runtime": 95.2722,
"eval_samples_per_second": 11.829,
"eval_steps_per_second": 1.48,
"step": 21168
},
{
"epoch": 14.02,
"learning_rate": 3.248263888888889e-05,
"loss": 0.1301,
"step": 21200
},
{
"epoch": 14.05,
"learning_rate": 3.244130291005291e-05,
"loss": 0.0825,
"step": 21250
},
{
"epoch": 14.09,
"learning_rate": 3.239996693121694e-05,
"loss": 0.0821,
"step": 21300
},
{
"epoch": 14.12,
"learning_rate": 3.235863095238095e-05,
"loss": 0.082,
"step": 21350
},
{
"epoch": 14.15,
"learning_rate": 3.2317294973544975e-05,
"loss": 0.0921,
"step": 21400
},
{
"epoch": 14.19,
"learning_rate": 3.2275958994708994e-05,
"loss": 0.112,
"step": 21450
},
{
"epoch": 14.22,
"learning_rate": 3.223462301587302e-05,
"loss": 0.094,
"step": 21500
},
{
"epoch": 14.25,
"learning_rate": 3.219328703703704e-05,
"loss": 0.0755,
"step": 21550
},
{
"epoch": 14.29,
"learning_rate": 3.2151951058201056e-05,
"loss": 0.1018,
"step": 21600
},
{
"epoch": 14.32,
"learning_rate": 3.211061507936508e-05,
"loss": 0.1401,
"step": 21650
},
{
"epoch": 14.35,
"learning_rate": 3.2069279100529106e-05,
"loss": 0.112,
"step": 21700
},
{
"epoch": 14.38,
"learning_rate": 3.202794312169312e-05,
"loss": 0.1021,
"step": 21750
},
{
"epoch": 14.42,
"learning_rate": 3.1986607142857144e-05,
"loss": 0.0935,
"step": 21800
},
{
"epoch": 14.45,
"learning_rate": 3.194527116402117e-05,
"loss": 0.1096,
"step": 21850
},
{
"epoch": 14.48,
"learning_rate": 3.190393518518519e-05,
"loss": 0.071,
"step": 21900
},
{
"epoch": 14.52,
"learning_rate": 3.1862599206349206e-05,
"loss": 0.1179,
"step": 21950
},
{
"epoch": 14.55,
"learning_rate": 3.182126322751323e-05,
"loss": 0.117,
"step": 22000
},
{
"epoch": 14.58,
"learning_rate": 3.177992724867725e-05,
"loss": 0.1019,
"step": 22050
},
{
"epoch": 14.62,
"learning_rate": 3.1738591269841275e-05,
"loss": 0.0977,
"step": 22100
},
{
"epoch": 14.65,
"learning_rate": 3.169725529100529e-05,
"loss": 0.1142,
"step": 22150
},
{
"epoch": 14.68,
"learning_rate": 3.165591931216931e-05,
"loss": 0.095,
"step": 22200
},
{
"epoch": 14.72,
"learning_rate": 3.161458333333334e-05,
"loss": 0.0999,
"step": 22250
},
{
"epoch": 14.75,
"learning_rate": 3.1573247354497356e-05,
"loss": 0.083,
"step": 22300
},
{
"epoch": 14.78,
"learning_rate": 3.1531911375661374e-05,
"loss": 0.0886,
"step": 22350
},
{
"epoch": 14.81,
"learning_rate": 3.14905753968254e-05,
"loss": 0.1007,
"step": 22400
},
{
"epoch": 14.85,
"learning_rate": 3.144923941798942e-05,
"loss": 0.1098,
"step": 22450
},
{
"epoch": 14.88,
"learning_rate": 3.140790343915344e-05,
"loss": 0.0803,
"step": 22500
},
{
"epoch": 14.91,
"learning_rate": 3.136656746031746e-05,
"loss": 0.0979,
"step": 22550
},
{
"epoch": 14.95,
"learning_rate": 3.132523148148148e-05,
"loss": 0.1089,
"step": 22600
},
{
"epoch": 14.98,
"learning_rate": 3.1283895502645506e-05,
"loss": 0.1087,
"step": 22650
},
{
"epoch": 15.0,
"eval_cer": 0.2658465087221875,
"eval_loss": 1.6277092695236206,
"eval_mer": 0.31208053691275167,
"eval_runtime": 95.3346,
"eval_samples_per_second": 11.822,
"eval_steps_per_second": 1.479,
"step": 22680
},
{
"epoch": 15.01,
"learning_rate": 3.1242559523809524e-05,
"loss": 0.0883,
"step": 22700
},
{
"epoch": 15.05,
"learning_rate": 3.1202050264550264e-05,
"loss": 0.082,
"step": 22750
},
{
"epoch": 15.08,
"learning_rate": 3.116071428571429e-05,
"loss": 0.0914,
"step": 22800
},
{
"epoch": 15.11,
"learning_rate": 3.111937830687831e-05,
"loss": 0.0944,
"step": 22850
},
{
"epoch": 15.15,
"learning_rate": 3.1078042328042326e-05,
"loss": 0.0956,
"step": 22900
},
{
"epoch": 15.18,
"learning_rate": 3.103670634920635e-05,
"loss": 0.1169,
"step": 22950
},
{
"epoch": 15.21,
"learning_rate": 3.099537037037038e-05,
"loss": 0.0861,
"step": 23000
},
{
"epoch": 15.24,
"learning_rate": 3.0954034391534395e-05,
"loss": 0.0864,
"step": 23050
},
{
"epoch": 15.28,
"learning_rate": 3.0912698412698414e-05,
"loss": 0.0824,
"step": 23100
},
{
"epoch": 15.31,
"learning_rate": 3.087136243386243e-05,
"loss": 0.0825,
"step": 23150
},
{
"epoch": 15.34,
"learning_rate": 3.083002645502646e-05,
"loss": 0.1068,
"step": 23200
},
{
"epoch": 15.38,
"learning_rate": 3.0788690476190476e-05,
"loss": 0.0813,
"step": 23250
},
{
"epoch": 15.41,
"learning_rate": 3.0747354497354494e-05,
"loss": 0.0767,
"step": 23300
},
{
"epoch": 15.44,
"learning_rate": 3.070601851851852e-05,
"loss": 0.0837,
"step": 23350
},
{
"epoch": 15.48,
"learning_rate": 3.0664682539682545e-05,
"loss": 0.0827,
"step": 23400
},
{
"epoch": 15.51,
"learning_rate": 3.0623346560846564e-05,
"loss": 0.1133,
"step": 23450
},
{
"epoch": 15.54,
"learning_rate": 3.058201058201058e-05,
"loss": 0.0896,
"step": 23500
},
{
"epoch": 15.58,
"learning_rate": 3.054067460317461e-05,
"loss": 0.0805,
"step": 23550
},
{
"epoch": 15.61,
"learning_rate": 3.0499338624338626e-05,
"loss": 0.0787,
"step": 23600
},
{
"epoch": 15.64,
"learning_rate": 3.0458002645502648e-05,
"loss": 0.083,
"step": 23650
},
{
"epoch": 15.67,
"learning_rate": 3.0416666666666666e-05,
"loss": 0.0755,
"step": 23700
},
{
"epoch": 15.71,
"learning_rate": 3.0375330687830688e-05,
"loss": 0.0967,
"step": 23750
},
{
"epoch": 15.74,
"learning_rate": 3.0333994708994713e-05,
"loss": 0.1132,
"step": 23800
},
{
"epoch": 15.77,
"learning_rate": 3.029265873015873e-05,
"loss": 0.0864,
"step": 23850
},
{
"epoch": 15.81,
"learning_rate": 3.0251322751322754e-05,
"loss": 0.0755,
"step": 23900
},
{
"epoch": 15.84,
"learning_rate": 3.0209986772486776e-05,
"loss": 0.0923,
"step": 23950
},
{
"epoch": 15.87,
"learning_rate": 3.0168650793650794e-05,
"loss": 0.1026,
"step": 24000
},
{
"epoch": 15.91,
"learning_rate": 3.0127314814814816e-05,
"loss": 0.0838,
"step": 24050
},
{
"epoch": 15.94,
"learning_rate": 3.0085978835978838e-05,
"loss": 0.0779,
"step": 24100
},
{
"epoch": 15.97,
"learning_rate": 3.0044642857142857e-05,
"loss": 0.0995,
"step": 24150
},
{
"epoch": 16.0,
"eval_cer": 0.2597433802681532,
"eval_loss": 1.6507114171981812,
"eval_mer": 0.30616810482582296,
"eval_runtime": 95.5806,
"eval_samples_per_second": 11.791,
"eval_steps_per_second": 1.475,
"step": 24192
},
{
"epoch": 16.01,
"learning_rate": 3.0003306878306882e-05,
"loss": 0.0771,
"step": 24200
},
{
"epoch": 16.04,
"learning_rate": 2.9961970899470904e-05,
"loss": 0.0677,
"step": 24250
},
{
"epoch": 16.07,
"learning_rate": 2.9920634920634922e-05,
"loss": 0.0908,
"step": 24300
},
{
"epoch": 16.1,
"learning_rate": 2.9879298941798944e-05,
"loss": 0.0712,
"step": 24350
},
{
"epoch": 16.14,
"learning_rate": 2.9837962962962963e-05,
"loss": 0.0687,
"step": 24400
},
{
"epoch": 16.17,
"learning_rate": 2.9796626984126985e-05,
"loss": 0.0732,
"step": 24450
},
{
"epoch": 16.2,
"learning_rate": 2.975529100529101e-05,
"loss": 0.076,
"step": 24500
},
{
"epoch": 16.24,
"learning_rate": 2.9713955026455025e-05,
"loss": 0.079,
"step": 24550
},
{
"epoch": 16.27,
"learning_rate": 2.967261904761905e-05,
"loss": 0.0713,
"step": 24600
},
{
"epoch": 16.3,
"learning_rate": 2.9631283068783072e-05,
"loss": 0.0877,
"step": 24650
},
{
"epoch": 16.34,
"learning_rate": 2.958994708994709e-05,
"loss": 0.0826,
"step": 24700
},
{
"epoch": 16.37,
"learning_rate": 2.9548611111111113e-05,
"loss": 0.0752,
"step": 24750
},
{
"epoch": 16.4,
"learning_rate": 2.9507275132275138e-05,
"loss": 0.0609,
"step": 24800
},
{
"epoch": 16.44,
"learning_rate": 2.9465939153439153e-05,
"loss": 0.0847,
"step": 24850
},
{
"epoch": 16.47,
"learning_rate": 2.9424603174603178e-05,
"loss": 0.0669,
"step": 24900
},
{
"epoch": 16.5,
"learning_rate": 2.9383267195767193e-05,
"loss": 0.0749,
"step": 24950
},
{
"epoch": 16.53,
"learning_rate": 2.934193121693122e-05,
"loss": 0.0842,
"step": 25000
},
{
"epoch": 16.57,
"learning_rate": 2.930059523809524e-05,
"loss": 0.0741,
"step": 25050
},
{
"epoch": 16.6,
"learning_rate": 2.925925925925926e-05,
"loss": 0.0771,
"step": 25100
},
{
"epoch": 16.63,
"learning_rate": 2.921792328042328e-05,
"loss": 0.0911,
"step": 25150
},
{
"epoch": 16.67,
"learning_rate": 2.9176587301587306e-05,
"loss": 0.0712,
"step": 25200
},
{
"epoch": 16.7,
"learning_rate": 2.913525132275132e-05,
"loss": 0.1093,
"step": 25250
},
{
"epoch": 16.73,
"learning_rate": 2.9093915343915347e-05,
"loss": 0.0875,
"step": 25300
},
{
"epoch": 16.77,
"learning_rate": 2.905257936507937e-05,
"loss": 0.0843,
"step": 25350
},
{
"epoch": 16.8,
"learning_rate": 2.9011243386243387e-05,
"loss": 0.0822,
"step": 25400
},
{
"epoch": 16.83,
"learning_rate": 2.896990740740741e-05,
"loss": 0.0842,
"step": 25450
},
{
"epoch": 16.87,
"learning_rate": 2.8928571428571434e-05,
"loss": 0.0883,
"step": 25500
},
{
"epoch": 16.9,
"learning_rate": 2.888723544973545e-05,
"loss": 0.0705,
"step": 25550
},
{
"epoch": 16.93,
"learning_rate": 2.8845899470899475e-05,
"loss": 0.0838,
"step": 25600
},
{
"epoch": 16.96,
"learning_rate": 2.880456349206349e-05,
"loss": 0.0978,
"step": 25650
},
{
"epoch": 17.0,
"learning_rate": 2.8763227513227515e-05,
"loss": 0.0788,
"step": 25700
},
{
"epoch": 17.0,
"eval_cer": 0.25623528281032243,
"eval_loss": 1.5880810022354126,
"eval_mer": 0.3044103547459252,
"eval_runtime": 95.3504,
"eval_samples_per_second": 11.82,
"eval_steps_per_second": 1.479,
"step": 25704
},
{
"epoch": 17.03,
"learning_rate": 2.8721891534391537e-05,
"loss": 0.0621,
"step": 25750
},
{
"epoch": 17.06,
"learning_rate": 2.8680555555555555e-05,
"loss": 0.063,
"step": 25800
},
{
"epoch": 17.1,
"learning_rate": 2.8639219576719577e-05,
"loss": 0.0947,
"step": 25850
},
{
"epoch": 17.13,
"learning_rate": 2.8597883597883603e-05,
"loss": 0.0601,
"step": 25900
},
{
"epoch": 17.16,
"learning_rate": 2.8556547619047618e-05,
"loss": 0.0607,
"step": 25950
},
{
"epoch": 17.2,
"learning_rate": 2.8515211640211643e-05,
"loss": 0.0796,
"step": 26000
},
{
"epoch": 17.23,
"learning_rate": 2.8473875661375665e-05,
"loss": 0.1042,
"step": 26050
},
{
"epoch": 17.26,
"learning_rate": 2.8432539682539683e-05,
"loss": 0.0635,
"step": 26100
},
{
"epoch": 17.29,
"learning_rate": 2.8391203703703705e-05,
"loss": 0.0562,
"step": 26150
},
{
"epoch": 17.33,
"learning_rate": 2.8349867724867724e-05,
"loss": 0.059,
"step": 26200
},
{
"epoch": 17.36,
"learning_rate": 2.8308531746031746e-05,
"loss": 0.0935,
"step": 26250
},
{
"epoch": 17.39,
"learning_rate": 2.826719576719577e-05,
"loss": 0.0742,
"step": 26300
},
{
"epoch": 17.43,
"learning_rate": 2.8225859788359786e-05,
"loss": 0.0697,
"step": 26350
},
{
"epoch": 17.46,
"learning_rate": 2.818452380952381e-05,
"loss": 0.0943,
"step": 26400
},
{
"epoch": 17.49,
"learning_rate": 2.8143187830687833e-05,
"loss": 0.0549,
"step": 26450
},
{
"epoch": 17.53,
"learning_rate": 2.8101851851851852e-05,
"loss": 0.0824,
"step": 26500
},
{
"epoch": 17.56,
"learning_rate": 2.8060515873015874e-05,
"loss": 0.0747,
"step": 26550
},
{
"epoch": 17.59,
"learning_rate": 2.80191798941799e-05,
"loss": 0.0656,
"step": 26600
},
{
"epoch": 17.63,
"learning_rate": 2.7977843915343914e-05,
"loss": 0.069,
"step": 26650
},
{
"epoch": 17.66,
"learning_rate": 2.793650793650794e-05,
"loss": 0.0574,
"step": 26700
},
{
"epoch": 17.69,
"learning_rate": 2.789517195767196e-05,
"loss": 0.0655,
"step": 26750
},
{
"epoch": 17.72,
"learning_rate": 2.785383597883598e-05,
"loss": 0.0768,
"step": 26800
},
{
"epoch": 17.76,
"learning_rate": 2.7812500000000002e-05,
"loss": 0.0708,
"step": 26850
},
{
"epoch": 17.79,
"learning_rate": 2.777116402116402e-05,
"loss": 0.094,
"step": 26900
},
{
"epoch": 17.82,
"learning_rate": 2.7729828042328042e-05,
"loss": 0.0657,
"step": 26950
},
{
"epoch": 17.86,
"learning_rate": 2.7688492063492067e-05,
"loss": 0.0613,
"step": 27000
},
{
"epoch": 17.89,
"learning_rate": 2.7647156084656083e-05,
"loss": 0.0839,
"step": 27050
},
{
"epoch": 17.92,
"learning_rate": 2.7605820105820108e-05,
"loss": 0.0819,
"step": 27100
},
{
"epoch": 17.96,
"learning_rate": 2.756448412698413e-05,
"loss": 0.08,
"step": 27150
},
{
"epoch": 17.99,
"learning_rate": 2.752314814814815e-05,
"loss": 0.047,
"step": 27200
},
{
"epoch": 18.0,
"eval_cer": 0.2579653034744582,
"eval_loss": 1.709592342376709,
"eval_mer": 0.304010866091403,
"eval_runtime": 95.8001,
"eval_samples_per_second": 11.764,
"eval_steps_per_second": 1.472,
"step": 27216
},
{
"epoch": 18.02,
"learning_rate": 2.748181216931217e-05,
"loss": 0.0856,
"step": 27250
},
{
"epoch": 18.06,
"learning_rate": 2.7440476190476195e-05,
"loss": 0.0732,
"step": 27300
},
{
"epoch": 18.09,
"learning_rate": 2.739914021164021e-05,
"loss": 0.0664,
"step": 27350
},
{
"epoch": 18.12,
"learning_rate": 2.7358630952380954e-05,
"loss": 0.0797,
"step": 27400
},
{
"epoch": 18.15,
"learning_rate": 2.7317294973544976e-05,
"loss": 0.062,
"step": 27450
},
{
"epoch": 18.19,
"learning_rate": 2.7275958994708994e-05,
"loss": 0.0553,
"step": 27500
},
{
"epoch": 18.22,
"learning_rate": 2.7234623015873016e-05,
"loss": 0.071,
"step": 27550
},
{
"epoch": 18.25,
"learning_rate": 2.719328703703704e-05,
"loss": 0.0815,
"step": 27600
},
{
"epoch": 18.29,
"learning_rate": 2.715195105820106e-05,
"loss": 0.0647,
"step": 27650
},
{
"epoch": 18.32,
"learning_rate": 2.711061507936508e-05,
"loss": 0.059,
"step": 27700
},
{
"epoch": 18.35,
"learning_rate": 2.70692791005291e-05,
"loss": 0.0598,
"step": 27750
},
{
"epoch": 18.39,
"learning_rate": 2.7027943121693122e-05,
"loss": 0.0635,
"step": 27800
},
{
"epoch": 18.42,
"learning_rate": 2.6986607142857144e-05,
"loss": 0.0592,
"step": 27850
},
{
"epoch": 18.45,
"learning_rate": 2.6945271164021162e-05,
"loss": 0.0697,
"step": 27900
},
{
"epoch": 18.49,
"learning_rate": 2.6903935185185188e-05,
"loss": 0.0549,
"step": 27950
},
{
"epoch": 18.52,
"learning_rate": 2.686259920634921e-05,
"loss": 0.0546,
"step": 28000
},
{
"epoch": 18.55,
"learning_rate": 2.6821263227513228e-05,
"loss": 0.0474,
"step": 28050
},
{
"epoch": 18.58,
"learning_rate": 2.677992724867725e-05,
"loss": 0.0618,
"step": 28100
},
{
"epoch": 18.62,
"learning_rate": 2.6738591269841272e-05,
"loss": 0.0523,
"step": 28150
},
{
"epoch": 18.65,
"learning_rate": 2.669725529100529e-05,
"loss": 0.0578,
"step": 28200
},
{
"epoch": 18.68,
"learning_rate": 2.6655919312169312e-05,
"loss": 0.0661,
"step": 28250
},
{
"epoch": 18.72,
"learning_rate": 2.6614583333333338e-05,
"loss": 0.0802,
"step": 28300
},
{
"epoch": 18.75,
"learning_rate": 2.6573247354497356e-05,
"loss": 0.0608,
"step": 28350
},
{
"epoch": 18.78,
"learning_rate": 2.6531911375661378e-05,
"loss": 0.0766,
"step": 28400
},
{
"epoch": 18.82,
"learning_rate": 2.6490575396825397e-05,
"loss": 0.069,
"step": 28450
},
{
"epoch": 18.85,
"learning_rate": 2.644923941798942e-05,
"loss": 0.0602,
"step": 28500
},
{
"epoch": 18.88,
"learning_rate": 2.640790343915344e-05,
"loss": 0.0664,
"step": 28550
},
{
"epoch": 18.92,
"learning_rate": 2.636656746031746e-05,
"loss": 0.0594,
"step": 28600
},
{
"epoch": 18.95,
"learning_rate": 2.6325231481481484e-05,
"loss": 0.0639,
"step": 28650
},
{
"epoch": 18.98,
"learning_rate": 2.6283895502645506e-05,
"loss": 0.0614,
"step": 28700
},
{
"epoch": 19.0,
"eval_cer": 0.2567158441059157,
"eval_loss": 1.677333950996399,
"eval_mer": 0.30576861617130074,
"eval_runtime": 95.5429,
"eval_samples_per_second": 11.796,
"eval_steps_per_second": 1.476,
"step": 28728
},
{
"epoch": 19.01,
"learning_rate": 2.6242559523809525e-05,
"loss": 0.0618,
"step": 28750
},
{
"epoch": 19.05,
"learning_rate": 2.6201223544973546e-05,
"loss": 0.0516,
"step": 28800
},
{
"epoch": 19.08,
"learning_rate": 2.615988756613757e-05,
"loss": 0.0557,
"step": 28850
},
{
"epoch": 19.11,
"learning_rate": 2.6118551587301587e-05,
"loss": 0.0547,
"step": 28900
},
{
"epoch": 19.15,
"learning_rate": 2.6077215608465612e-05,
"loss": 0.0727,
"step": 28950
},
{
"epoch": 19.18,
"learning_rate": 2.6035879629629627e-05,
"loss": 0.0476,
"step": 29000
},
{
"epoch": 19.21,
"learning_rate": 2.5994543650793653e-05,
"loss": 0.0597,
"step": 29050
},
{
"epoch": 19.25,
"learning_rate": 2.5953207671957674e-05,
"loss": 0.0419,
"step": 29100
},
{
"epoch": 19.28,
"learning_rate": 2.5911871693121693e-05,
"loss": 0.053,
"step": 29150
},
{
"epoch": 19.31,
"learning_rate": 2.5870535714285715e-05,
"loss": 0.0432,
"step": 29200
},
{
"epoch": 19.35,
"learning_rate": 2.5829199735449737e-05,
"loss": 0.0744,
"step": 29250
},
{
"epoch": 19.38,
"learning_rate": 2.5787863756613755e-05,
"loss": 0.0579,
"step": 29300
},
{
"epoch": 19.41,
"learning_rate": 2.574652777777778e-05,
"loss": 0.0459,
"step": 29350
},
{
"epoch": 19.44,
"learning_rate": 2.5705191798941802e-05,
"loss": 0.059,
"step": 29400
},
{
"epoch": 19.48,
"learning_rate": 2.566385582010582e-05,
"loss": 0.0661,
"step": 29450
},
{
"epoch": 19.51,
"learning_rate": 2.5622519841269843e-05,
"loss": 0.0722,
"step": 29500
},
{
"epoch": 19.54,
"learning_rate": 2.5581183862433865e-05,
"loss": 0.0608,
"step": 29550
},
{
"epoch": 19.58,
"learning_rate": 2.5539847883597883e-05,
"loss": 0.0671,
"step": 29600
},
{
"epoch": 19.61,
"learning_rate": 2.549851190476191e-05,
"loss": 0.0659,
"step": 29650
},
{
"epoch": 19.64,
"learning_rate": 2.5457175925925924e-05,
"loss": 0.0536,
"step": 29700
},
{
"epoch": 19.68,
"learning_rate": 2.541583994708995e-05,
"loss": 0.0792,
"step": 29750
},
{
"epoch": 19.71,
"learning_rate": 2.537450396825397e-05,
"loss": 0.0534,
"step": 29800
},
{
"epoch": 19.74,
"learning_rate": 2.533316798941799e-05,
"loss": 0.0723,
"step": 29850
},
{
"epoch": 19.78,
"learning_rate": 2.529183201058201e-05,
"loss": 0.0619,
"step": 29900
},
{
"epoch": 19.81,
"learning_rate": 2.5250496031746033e-05,
"loss": 0.0501,
"step": 29950
},
{
"epoch": 19.84,
"learning_rate": 2.520916005291005e-05,
"loss": 0.0648,
"step": 30000
},
{
"epoch": 19.87,
"learning_rate": 2.5167824074074077e-05,
"loss": 0.0671,
"step": 30050
},
{
"epoch": 19.91,
"learning_rate": 2.51264880952381e-05,
"loss": 0.0612,
"step": 30100
},
{
"epoch": 19.94,
"learning_rate": 2.5085152116402117e-05,
"loss": 0.0658,
"step": 30150
},
{
"epoch": 19.97,
"learning_rate": 2.504381613756614e-05,
"loss": 0.0527,
"step": 30200
},
{
"epoch": 20.0,
"eval_cer": 0.2609928396366957,
"eval_loss": 1.7759464979171753,
"eval_mer": 0.30936401406200065,
"eval_runtime": 95.3318,
"eval_samples_per_second": 11.822,
"eval_steps_per_second": 1.479,
"step": 30240
},
{
"epoch": 20.01,
"learning_rate": 2.500248015873016e-05,
"loss": 0.0508,
"step": 30250
},
{
"epoch": 20.04,
"learning_rate": 2.496114417989418e-05,
"loss": 0.0557,
"step": 30300
},
{
"epoch": 20.07,
"learning_rate": 2.4919808201058205e-05,
"loss": 0.0451,
"step": 30350
},
{
"epoch": 20.11,
"learning_rate": 2.4878472222222223e-05,
"loss": 0.0559,
"step": 30400
},
{
"epoch": 20.14,
"learning_rate": 2.4837136243386245e-05,
"loss": 0.0579,
"step": 30450
},
{
"epoch": 20.17,
"learning_rate": 2.4795800264550264e-05,
"loss": 0.0575,
"step": 30500
},
{
"epoch": 20.21,
"learning_rate": 2.475446428571429e-05,
"loss": 0.0502,
"step": 30550
},
{
"epoch": 20.24,
"learning_rate": 2.4713128306878308e-05,
"loss": 0.0627,
"step": 30600
},
{
"epoch": 20.27,
"learning_rate": 2.467179232804233e-05,
"loss": 0.0428,
"step": 30650
},
{
"epoch": 20.3,
"learning_rate": 2.463045634920635e-05,
"loss": 0.0368,
"step": 30700
},
{
"epoch": 20.34,
"learning_rate": 2.4589120370370373e-05,
"loss": 0.053,
"step": 30750
},
{
"epoch": 20.37,
"learning_rate": 2.4547784391534392e-05,
"loss": 0.0494,
"step": 30800
},
{
"epoch": 20.4,
"learning_rate": 2.4506448412698414e-05,
"loss": 0.0616,
"step": 30850
},
{
"epoch": 20.44,
"learning_rate": 2.4465112433862436e-05,
"loss": 0.065,
"step": 30900
},
{
"epoch": 20.47,
"learning_rate": 2.4423776455026458e-05,
"loss": 0.0531,
"step": 30950
},
{
"epoch": 20.5,
"learning_rate": 2.4382440476190476e-05,
"loss": 0.0517,
"step": 31000
},
{
"epoch": 20.54,
"learning_rate": 2.4341104497354498e-05,
"loss": 0.0573,
"step": 31050
},
{
"epoch": 20.57,
"learning_rate": 2.429976851851852e-05,
"loss": 0.0588,
"step": 31100
},
{
"epoch": 20.6,
"learning_rate": 2.4258432539682542e-05,
"loss": 0.071,
"step": 31150
},
{
"epoch": 20.63,
"learning_rate": 2.421709656084656e-05,
"loss": 0.0667,
"step": 31200
},
{
"epoch": 20.67,
"learning_rate": 2.4176587301587303e-05,
"loss": 0.0514,
"step": 31250
},
{
"epoch": 20.7,
"learning_rate": 2.4135251322751322e-05,
"loss": 0.0508,
"step": 31300
},
{
"epoch": 20.73,
"learning_rate": 2.4093915343915347e-05,
"loss": 0.0514,
"step": 31350
},
{
"epoch": 20.77,
"learning_rate": 2.4052579365079366e-05,
"loss": 0.048,
"step": 31400
},
{
"epoch": 20.8,
"learning_rate": 2.4011243386243388e-05,
"loss": 0.0461,
"step": 31450
},
{
"epoch": 20.83,
"learning_rate": 2.3969907407407406e-05,
"loss": 0.0498,
"step": 31500
},
{
"epoch": 20.87,
"learning_rate": 2.392857142857143e-05,
"loss": 0.0568,
"step": 31550
},
{
"epoch": 20.9,
"learning_rate": 2.388723544973545e-05,
"loss": 0.0436,
"step": 31600
},
{
"epoch": 20.93,
"learning_rate": 2.3845899470899472e-05,
"loss": 0.0577,
"step": 31650
},
{
"epoch": 20.97,
"learning_rate": 2.380456349206349e-05,
"loss": 0.0611,
"step": 31700
},
{
"epoch": 21.0,
"learning_rate": 2.3763227513227516e-05,
"loss": 0.0471,
"step": 31750
},
{
"epoch": 21.0,
"eval_cer": 0.254841655053102,
"eval_loss": 1.827702522277832,
"eval_mer": 0.3022531160115053,
"eval_runtime": 95.2583,
"eval_samples_per_second": 11.831,
"eval_steps_per_second": 1.48,
"step": 31752
},
{
"epoch": 21.03,
"learning_rate": 2.3721891534391534e-05,
"loss": 0.0539,
"step": 31800
},
{
"epoch": 21.06,
"learning_rate": 2.3680555555555556e-05,
"loss": 0.0556,
"step": 31850
},
{
"epoch": 21.1,
"learning_rate": 2.36400462962963e-05,
"loss": 0.0524,
"step": 31900
},
{
"epoch": 21.13,
"learning_rate": 2.3598710317460317e-05,
"loss": 0.0377,
"step": 31950
},
{
"epoch": 21.16,
"learning_rate": 2.355737433862434e-05,
"loss": 0.0411,
"step": 32000
},
{
"epoch": 21.2,
"learning_rate": 2.351603835978836e-05,
"loss": 0.0463,
"step": 32050
},
{
"epoch": 21.23,
"learning_rate": 2.3474702380952383e-05,
"loss": 0.0353,
"step": 32100
},
{
"epoch": 21.26,
"learning_rate": 2.34333664021164e-05,
"loss": 0.0483,
"step": 32150
},
{
"epoch": 21.3,
"learning_rate": 2.3392030423280427e-05,
"loss": 0.0443,
"step": 32200
},
{
"epoch": 21.33,
"learning_rate": 2.3350694444444445e-05,
"loss": 0.0551,
"step": 32250
},
{
"epoch": 21.36,
"learning_rate": 2.3309358465608467e-05,
"loss": 0.0423,
"step": 32300
},
{
"epoch": 21.4,
"learning_rate": 2.3268022486772486e-05,
"loss": 0.045,
"step": 32350
},
{
"epoch": 21.43,
"learning_rate": 2.322668650793651e-05,
"loss": 0.0493,
"step": 32400
},
{
"epoch": 21.46,
"learning_rate": 2.318535052910053e-05,
"loss": 0.0611,
"step": 32450
},
{
"epoch": 21.49,
"learning_rate": 2.314401455026455e-05,
"loss": 0.0344,
"step": 32500
},
{
"epoch": 21.53,
"learning_rate": 2.3102678571428573e-05,
"loss": 0.0441,
"step": 32550
},
{
"epoch": 21.56,
"learning_rate": 2.3061342592592595e-05,
"loss": 0.0608,
"step": 32600
},
{
"epoch": 21.59,
"learning_rate": 2.3020006613756614e-05,
"loss": 0.0443,
"step": 32650
},
{
"epoch": 21.63,
"learning_rate": 2.2978670634920636e-05,
"loss": 0.0704,
"step": 32700
},
{
"epoch": 21.66,
"learning_rate": 2.2937334656084658e-05,
"loss": 0.0441,
"step": 32750
},
{
"epoch": 21.69,
"learning_rate": 2.289599867724868e-05,
"loss": 0.0766,
"step": 32800
},
{
"epoch": 21.73,
"learning_rate": 2.2854662698412698e-05,
"loss": 0.0419,
"step": 32850
},
{
"epoch": 21.76,
"learning_rate": 2.2813326719576723e-05,
"loss": 0.0641,
"step": 32900
},
{
"epoch": 21.79,
"learning_rate": 2.2771990740740742e-05,
"loss": 0.0389,
"step": 32950
},
{
"epoch": 21.83,
"learning_rate": 2.2730654761904764e-05,
"loss": 0.0512,
"step": 33000
},
{
"epoch": 21.86,
"learning_rate": 2.2689318783068782e-05,
"loss": 0.0552,
"step": 33050
},
{
"epoch": 21.89,
"learning_rate": 2.2647982804232808e-05,
"loss": 0.0388,
"step": 33100
},
{
"epoch": 21.92,
"learning_rate": 2.2606646825396826e-05,
"loss": 0.0547,
"step": 33150
},
{
"epoch": 21.96,
"learning_rate": 2.2565310846560848e-05,
"loss": 0.0466,
"step": 33200
},
{
"epoch": 21.99,
"learning_rate": 2.2523974867724866e-05,
"loss": 0.0493,
"step": 33250
},
{
"epoch": 22.0,
"eval_cer": 0.2501802104858475,
"eval_loss": 1.7990031242370605,
"eval_mer": 0.2956216043464366,
"eval_runtime": 95.4115,
"eval_samples_per_second": 11.812,
"eval_steps_per_second": 1.478,
"step": 33264
},
{
"epoch": 22.02,
"learning_rate": 2.2482638888888892e-05,
"loss": 0.0445,
"step": 33300
},
{
"epoch": 22.06,
"learning_rate": 2.244130291005291e-05,
"loss": 0.0512,
"step": 33350
},
{
"epoch": 22.09,
"learning_rate": 2.2399966931216932e-05,
"loss": 0.0572,
"step": 33400
},
{
"epoch": 22.12,
"learning_rate": 2.2358630952380954e-05,
"loss": 0.0354,
"step": 33450
},
{
"epoch": 22.16,
"learning_rate": 2.2317294973544976e-05,
"loss": 0.0467,
"step": 33500
},
{
"epoch": 22.19,
"learning_rate": 2.2275958994708994e-05,
"loss": 0.0634,
"step": 33550
},
{
"epoch": 22.22,
"learning_rate": 2.2234623015873016e-05,
"loss": 0.0508,
"step": 33600
},
{
"epoch": 22.26,
"learning_rate": 2.2193287037037038e-05,
"loss": 0.0474,
"step": 33650
},
{
"epoch": 22.29,
"learning_rate": 2.215195105820106e-05,
"loss": 0.0342,
"step": 33700
},
{
"epoch": 22.32,
"learning_rate": 2.211061507936508e-05,
"loss": 0.0306,
"step": 33750
},
{
"epoch": 22.35,
"learning_rate": 2.2069279100529104e-05,
"loss": 0.0316,
"step": 33800
},
{
"epoch": 22.39,
"learning_rate": 2.2027943121693122e-05,
"loss": 0.0394,
"step": 33850
},
{
"epoch": 22.42,
"learning_rate": 2.1986607142857144e-05,
"loss": 0.034,
"step": 33900
},
{
"epoch": 22.45,
"learning_rate": 2.1945271164021163e-05,
"loss": 0.0395,
"step": 33950
},
{
"epoch": 22.49,
"learning_rate": 2.1903935185185188e-05,
"loss": 0.0421,
"step": 34000
},
{
"epoch": 22.52,
"learning_rate": 2.1862599206349207e-05,
"loss": 0.0421,
"step": 34050
},
{
"epoch": 22.55,
"learning_rate": 2.182126322751323e-05,
"loss": 0.0364,
"step": 34100
},
{
"epoch": 22.59,
"learning_rate": 2.177992724867725e-05,
"loss": 0.0368,
"step": 34150
},
{
"epoch": 22.62,
"learning_rate": 2.1738591269841272e-05,
"loss": 0.0448,
"step": 34200
},
{
"epoch": 22.65,
"learning_rate": 2.169725529100529e-05,
"loss": 0.037,
"step": 34250
},
{
"epoch": 22.69,
"learning_rate": 2.1655919312169313e-05,
"loss": 0.0477,
"step": 34300
},
{
"epoch": 22.72,
"learning_rate": 2.1614583333333335e-05,
"loss": 0.0426,
"step": 34350
},
{
"epoch": 22.75,
"learning_rate": 2.1573247354497357e-05,
"loss": 0.0408,
"step": 34400
},
{
"epoch": 22.78,
"learning_rate": 2.1531911375661375e-05,
"loss": 0.0556,
"step": 34450
},
{
"epoch": 22.82,
"learning_rate": 2.14905753968254e-05,
"loss": 0.0429,
"step": 34500
},
{
"epoch": 22.85,
"learning_rate": 2.144923941798942e-05,
"loss": 0.0372,
"step": 34550
},
{
"epoch": 22.88,
"learning_rate": 2.140790343915344e-05,
"loss": 0.0466,
"step": 34600
},
{
"epoch": 22.92,
"learning_rate": 2.136656746031746e-05,
"loss": 0.0538,
"step": 34650
},
{
"epoch": 22.95,
"learning_rate": 2.1325231481481485e-05,
"loss": 0.057,
"step": 34700
},
{
"epoch": 22.98,
"learning_rate": 2.1283895502645503e-05,
"loss": 0.0571,
"step": 34750
},
{
"epoch": 23.0,
"eval_cer": 0.25291940987072903,
"eval_loss": 1.9801671504974365,
"eval_mer": 0.2965004793863854,
"eval_runtime": 95.7109,
"eval_samples_per_second": 11.775,
"eval_steps_per_second": 1.473,
"step": 34776
},
{
"epoch": 23.02,
"learning_rate": 2.1242559523809525e-05,
"loss": 0.0379,
"step": 34800
},
{
"epoch": 23.05,
"learning_rate": 2.1201223544973544e-05,
"loss": 0.0542,
"step": 34850
},
{
"epoch": 23.08,
"learning_rate": 2.115988756613757e-05,
"loss": 0.0378,
"step": 34900
},
{
"epoch": 23.12,
"learning_rate": 2.1118551587301587e-05,
"loss": 0.0363,
"step": 34950
},
{
"epoch": 23.15,
"learning_rate": 2.107721560846561e-05,
"loss": 0.0471,
"step": 35000
},
{
"epoch": 23.18,
"learning_rate": 2.103587962962963e-05,
"loss": 0.0433,
"step": 35050
},
{
"epoch": 23.21,
"learning_rate": 2.0994543650793653e-05,
"loss": 0.0562,
"step": 35100
},
{
"epoch": 23.25,
"learning_rate": 2.095320767195767e-05,
"loss": 0.0339,
"step": 35150
},
{
"epoch": 23.28,
"learning_rate": 2.0911871693121693e-05,
"loss": 0.0353,
"step": 35200
},
{
"epoch": 23.31,
"learning_rate": 2.0870535714285715e-05,
"loss": 0.0464,
"step": 35250
},
{
"epoch": 23.35,
"learning_rate": 2.0829199735449737e-05,
"loss": 0.0364,
"step": 35300
},
{
"epoch": 23.38,
"learning_rate": 2.0787863756613756e-05,
"loss": 0.0505,
"step": 35350
},
{
"epoch": 23.41,
"learning_rate": 2.074652777777778e-05,
"loss": 0.0434,
"step": 35400
},
{
"epoch": 23.45,
"learning_rate": 2.07051917989418e-05,
"loss": 0.044,
"step": 35450
},
{
"epoch": 23.48,
"learning_rate": 2.066385582010582e-05,
"loss": 0.0478,
"step": 35500
},
{
"epoch": 23.51,
"learning_rate": 2.062251984126984e-05,
"loss": 0.0434,
"step": 35550
},
{
"epoch": 23.54,
"learning_rate": 2.0581183862433865e-05,
"loss": 0.038,
"step": 35600
},
{
"epoch": 23.58,
"learning_rate": 2.0539847883597884e-05,
"loss": 0.0424,
"step": 35650
},
{
"epoch": 23.61,
"learning_rate": 2.0499338624338627e-05,
"loss": 0.0367,
"step": 35700
},
{
"epoch": 23.64,
"learning_rate": 2.0458002645502645e-05,
"loss": 0.0498,
"step": 35750
},
{
"epoch": 23.68,
"learning_rate": 2.0416666666666667e-05,
"loss": 0.0725,
"step": 35800
},
{
"epoch": 23.71,
"learning_rate": 2.037615740740741e-05,
"loss": 0.0389,
"step": 35850
},
{
"epoch": 23.74,
"learning_rate": 2.033482142857143e-05,
"loss": 0.0448,
"step": 35900
},
{
"epoch": 23.78,
"learning_rate": 2.029348544973545e-05,
"loss": 0.0342,
"step": 35950
},
{
"epoch": 23.81,
"learning_rate": 2.0252149470899472e-05,
"loss": 0.0338,
"step": 36000
},
{
"epoch": 23.84,
"learning_rate": 2.0210813492063494e-05,
"loss": 0.0378,
"step": 36050
},
{
"epoch": 23.88,
"learning_rate": 2.0169477513227513e-05,
"loss": 0.0273,
"step": 36100
},
{
"epoch": 23.91,
"learning_rate": 2.0128141534391535e-05,
"loss": 0.0298,
"step": 36150
},
{
"epoch": 23.94,
"learning_rate": 2.0086805555555557e-05,
"loss": 0.0431,
"step": 36200
},
{
"epoch": 23.97,
"learning_rate": 2.004546957671958e-05,
"loss": 0.0635,
"step": 36250
},
{
"epoch": 24.0,
"eval_cer": 0.2512374453361526,
"eval_loss": 1.8666701316833496,
"eval_mer": 0.29729945669542984,
"eval_runtime": 95.5045,
"eval_samples_per_second": 11.8,
"eval_steps_per_second": 1.476,
"step": 36288
},
{
"epoch": 24.01,
"learning_rate": 2.0004133597883597e-05,
"loss": 0.0334,
"step": 36300
},
{
"epoch": 24.04,
"learning_rate": 1.9962797619047622e-05,
"loss": 0.0515,
"step": 36350
},
{
"epoch": 24.07,
"learning_rate": 1.992146164021164e-05,
"loss": 0.0484,
"step": 36400
},
{
"epoch": 24.11,
"learning_rate": 1.9880125661375663e-05,
"loss": 0.0407,
"step": 36450
},
{
"epoch": 24.14,
"learning_rate": 1.983878968253968e-05,
"loss": 0.0325,
"step": 36500
},
{
"epoch": 24.17,
"learning_rate": 1.9797453703703707e-05,
"loss": 0.0413,
"step": 36550
},
{
"epoch": 24.21,
"learning_rate": 1.9756117724867725e-05,
"loss": 0.0433,
"step": 36600
},
{
"epoch": 24.24,
"learning_rate": 1.9714781746031747e-05,
"loss": 0.0363,
"step": 36650
},
{
"epoch": 24.27,
"learning_rate": 1.967344576719577e-05,
"loss": 0.0409,
"step": 36700
},
{
"epoch": 24.31,
"learning_rate": 1.963210978835979e-05,
"loss": 0.0311,
"step": 36750
},
{
"epoch": 24.34,
"learning_rate": 1.959077380952381e-05,
"loss": 0.0345,
"step": 36800
},
{
"epoch": 24.37,
"learning_rate": 1.954943783068783e-05,
"loss": 0.0413,
"step": 36850
},
{
"epoch": 24.4,
"learning_rate": 1.9508101851851853e-05,
"loss": 0.033,
"step": 36900
},
{
"epoch": 24.44,
"learning_rate": 1.9466765873015875e-05,
"loss": 0.0342,
"step": 36950
},
{
"epoch": 24.47,
"learning_rate": 1.9425429894179893e-05,
"loss": 0.0374,
"step": 37000
},
{
"epoch": 24.5,
"learning_rate": 1.9384093915343915e-05,
"loss": 0.0389,
"step": 37050
},
{
"epoch": 24.54,
"learning_rate": 1.9342757936507937e-05,
"loss": 0.0319,
"step": 37100
},
{
"epoch": 24.57,
"learning_rate": 1.930142195767196e-05,
"loss": 0.0341,
"step": 37150
},
{
"epoch": 24.6,
"learning_rate": 1.9260085978835978e-05,
"loss": 0.0396,
"step": 37200
},
{
"epoch": 24.64,
"learning_rate": 1.9218750000000003e-05,
"loss": 0.0355,
"step": 37250
},
{
"epoch": 24.67,
"learning_rate": 1.917741402116402e-05,
"loss": 0.0463,
"step": 37300
},
{
"epoch": 24.7,
"learning_rate": 1.9136078042328043e-05,
"loss": 0.0425,
"step": 37350
},
{
"epoch": 24.74,
"learning_rate": 1.9094742063492062e-05,
"loss": 0.0424,
"step": 37400
},
{
"epoch": 24.77,
"learning_rate": 1.9053406084656087e-05,
"loss": 0.0674,
"step": 37450
},
{
"epoch": 24.8,
"learning_rate": 1.9012070105820106e-05,
"loss": 0.0329,
"step": 37500
},
{
"epoch": 24.83,
"learning_rate": 1.8970734126984128e-05,
"loss": 0.0326,
"step": 37550
},
{
"epoch": 24.87,
"learning_rate": 1.892939814814815e-05,
"loss": 0.0636,
"step": 37600
},
{
"epoch": 24.9,
"learning_rate": 1.888806216931217e-05,
"loss": 0.0349,
"step": 37650
},
{
"epoch": 24.93,
"learning_rate": 1.884672619047619e-05,
"loss": 0.042,
"step": 37700
},
{
"epoch": 24.97,
"learning_rate": 1.8805390211640212e-05,
"loss": 0.0329,
"step": 37750
},
{
"epoch": 25.0,
"learning_rate": 1.8764054232804234e-05,
"loss": 0.0229,
"step": 37800
},
{
"epoch": 25.0,
"eval_cer": 0.25133355759527126,
"eval_loss": 1.8796802759170532,
"eval_mer": 0.2953020134228188,
"eval_runtime": 95.7023,
"eval_samples_per_second": 11.776,
"eval_steps_per_second": 1.473,
"step": 37800
},
{
"epoch": 25.03,
"learning_rate": 1.8722718253968256e-05,
"loss": 0.0347,
"step": 37850
},
{
"epoch": 25.07,
"learning_rate": 1.8681382275132274e-05,
"loss": 0.0269,
"step": 37900
},
{
"epoch": 25.1,
"learning_rate": 1.86400462962963e-05,
"loss": 0.0521,
"step": 37950
},
{
"epoch": 25.13,
"learning_rate": 1.8598710317460318e-05,
"loss": 0.0442,
"step": 38000
},
{
"epoch": 25.17,
"learning_rate": 1.855737433862434e-05,
"loss": 0.047,
"step": 38050
},
{
"epoch": 25.2,
"learning_rate": 1.8516038359788358e-05,
"loss": 0.0304,
"step": 38100
},
{
"epoch": 25.23,
"learning_rate": 1.8474702380952384e-05,
"loss": 0.0382,
"step": 38150
},
{
"epoch": 25.26,
"learning_rate": 1.8433366402116402e-05,
"loss": 0.0333,
"step": 38200
},
{
"epoch": 25.3,
"learning_rate": 1.8392030423280424e-05,
"loss": 0.0283,
"step": 38250
},
{
"epoch": 25.33,
"learning_rate": 1.8350694444444443e-05,
"loss": 0.0295,
"step": 38300
},
{
"epoch": 25.36,
"learning_rate": 1.8309358465608468e-05,
"loss": 0.0336,
"step": 38350
},
{
"epoch": 25.4,
"learning_rate": 1.8268022486772486e-05,
"loss": 0.0449,
"step": 38400
},
{
"epoch": 25.43,
"learning_rate": 1.8226686507936508e-05,
"loss": 0.0367,
"step": 38450
},
{
"epoch": 25.46,
"learning_rate": 1.818535052910053e-05,
"loss": 0.0301,
"step": 38500
},
{
"epoch": 25.5,
"learning_rate": 1.8144014550264552e-05,
"loss": 0.0366,
"step": 38550
},
{
"epoch": 25.53,
"learning_rate": 1.810267857142857e-05,
"loss": 0.0406,
"step": 38600
},
{
"epoch": 25.56,
"learning_rate": 1.8061342592592592e-05,
"loss": 0.0242,
"step": 38650
},
{
"epoch": 25.6,
"learning_rate": 1.8020006613756614e-05,
"loss": 0.0382,
"step": 38700
},
{
"epoch": 25.63,
"learning_rate": 1.7978670634920636e-05,
"loss": 0.0268,
"step": 38750
},
{
"epoch": 25.66,
"learning_rate": 1.7937334656084655e-05,
"loss": 0.0286,
"step": 38800
},
{
"epoch": 25.69,
"learning_rate": 1.789599867724868e-05,
"loss": 0.0344,
"step": 38850
},
{
"epoch": 25.73,
"learning_rate": 1.78546626984127e-05,
"loss": 0.0249,
"step": 38900
},
{
"epoch": 25.76,
"learning_rate": 1.781332671957672e-05,
"loss": 0.031,
"step": 38950
},
{
"epoch": 25.79,
"learning_rate": 1.777199074074074e-05,
"loss": 0.0343,
"step": 39000
},
{
"epoch": 25.83,
"learning_rate": 1.7730654761904764e-05,
"loss": 0.0348,
"step": 39050
},
{
"epoch": 25.86,
"learning_rate": 1.7689318783068783e-05,
"loss": 0.0311,
"step": 39100
},
{
"epoch": 25.89,
"learning_rate": 1.7647982804232805e-05,
"loss": 0.0379,
"step": 39150
},
{
"epoch": 25.93,
"learning_rate": 1.7606646825396827e-05,
"loss": 0.0296,
"step": 39200
},
{
"epoch": 25.96,
"learning_rate": 1.756531084656085e-05,
"loss": 0.0263,
"step": 39250
},
{
"epoch": 25.99,
"learning_rate": 1.7523974867724867e-05,
"loss": 0.0285,
"step": 39300
},
{
"epoch": 26.0,
"eval_cer": 0.2528713537411697,
"eval_loss": 1.9864203929901123,
"eval_mer": 0.2975391498881432,
"eval_runtime": 95.3176,
"eval_samples_per_second": 11.824,
"eval_steps_per_second": 1.479,
"step": 39312
},
{
"epoch": 26.03,
"learning_rate": 1.748263888888889e-05,
"loss": 0.041,
"step": 39350
},
{
"epoch": 26.06,
"learning_rate": 1.744130291005291e-05,
"loss": 0.0329,
"step": 39400
},
{
"epoch": 26.09,
"learning_rate": 1.7399966931216933e-05,
"loss": 0.0272,
"step": 39450
},
{
"epoch": 26.12,
"learning_rate": 1.735863095238095e-05,
"loss": 0.0314,
"step": 39500
},
{
"epoch": 26.16,
"learning_rate": 1.7317294973544973e-05,
"loss": 0.0393,
"step": 39550
},
{
"epoch": 26.19,
"learning_rate": 1.7275958994708995e-05,
"loss": 0.03,
"step": 39600
},
{
"epoch": 26.22,
"learning_rate": 1.7234623015873017e-05,
"loss": 0.0269,
"step": 39650
},
{
"epoch": 26.26,
"learning_rate": 1.7193287037037035e-05,
"loss": 0.0297,
"step": 39700
},
{
"epoch": 26.29,
"learning_rate": 1.715195105820106e-05,
"loss": 0.0312,
"step": 39750
},
{
"epoch": 26.32,
"learning_rate": 1.711061507936508e-05,
"loss": 0.0405,
"step": 39800
},
{
"epoch": 26.36,
"learning_rate": 1.70692791005291e-05,
"loss": 0.0358,
"step": 39850
},
{
"epoch": 26.39,
"learning_rate": 1.7028769841269844e-05,
"loss": 0.0375,
"step": 39900
},
{
"epoch": 26.42,
"learning_rate": 1.6987433862433863e-05,
"loss": 0.036,
"step": 39950
},
{
"epoch": 26.46,
"learning_rate": 1.6946097883597884e-05,
"loss": 0.0402,
"step": 40000
},
{
"epoch": 26.49,
"learning_rate": 1.6904761904761906e-05,
"loss": 0.0294,
"step": 40050
},
{
"epoch": 26.52,
"learning_rate": 1.6863425925925928e-05,
"loss": 0.029,
"step": 40100
},
{
"epoch": 26.55,
"learning_rate": 1.6822089947089947e-05,
"loss": 0.0323,
"step": 40150
},
{
"epoch": 26.59,
"learning_rate": 1.678075396825397e-05,
"loss": 0.0413,
"step": 40200
},
{
"epoch": 26.62,
"learning_rate": 1.673941798941799e-05,
"loss": 0.0523,
"step": 40250
},
{
"epoch": 26.65,
"learning_rate": 1.6698082010582012e-05,
"loss": 0.0294,
"step": 40300
},
{
"epoch": 26.69,
"learning_rate": 1.665674603174603e-05,
"loss": 0.0252,
"step": 40350
},
{
"epoch": 26.72,
"learning_rate": 1.6615410052910056e-05,
"loss": 0.0293,
"step": 40400
},
{
"epoch": 26.75,
"learning_rate": 1.6574074074074075e-05,
"loss": 0.0212,
"step": 40450
},
{
"epoch": 26.79,
"learning_rate": 1.6532738095238097e-05,
"loss": 0.0318,
"step": 40500
},
{
"epoch": 26.82,
"learning_rate": 1.6491402116402115e-05,
"loss": 0.0325,
"step": 40550
},
{
"epoch": 26.85,
"learning_rate": 1.645006613756614e-05,
"loss": 0.0321,
"step": 40600
},
{
"epoch": 26.88,
"learning_rate": 1.640873015873016e-05,
"loss": 0.0404,
"step": 40650
},
{
"epoch": 26.92,
"learning_rate": 1.636739417989418e-05,
"loss": 0.0349,
"step": 40700
},
{
"epoch": 26.95,
"learning_rate": 1.6326058201058203e-05,
"loss": 0.0219,
"step": 40750
},
{
"epoch": 26.98,
"learning_rate": 1.6284722222222225e-05,
"loss": 0.0253,
"step": 40800
},
{
"epoch": 27.0,
"eval_cer": 0.25527416021913596,
"eval_loss": 1.9873512983322144,
"eval_mer": 0.30433045701502076,
"eval_runtime": 95.6318,
"eval_samples_per_second": 11.785,
"eval_steps_per_second": 1.474,
"step": 40824
},
{
"epoch": 27.02,
"learning_rate": 1.6243386243386243e-05,
"loss": 0.0265,
"step": 40850
},
{
"epoch": 27.05,
"learning_rate": 1.6202050264550265e-05,
"loss": 0.025,
"step": 40900
},
{
"epoch": 27.08,
"learning_rate": 1.6160714285714287e-05,
"loss": 0.0273,
"step": 40950
},
{
"epoch": 27.12,
"learning_rate": 1.611937830687831e-05,
"loss": 0.0378,
"step": 41000
},
{
"epoch": 27.15,
"learning_rate": 1.6078042328042327e-05,
"loss": 0.023,
"step": 41050
},
{
"epoch": 27.18,
"learning_rate": 1.6036706349206353e-05,
"loss": 0.0379,
"step": 41100
},
{
"epoch": 27.22,
"learning_rate": 1.599537037037037e-05,
"loss": 0.0329,
"step": 41150
},
{
"epoch": 27.25,
"learning_rate": 1.5954034391534393e-05,
"loss": 0.0357,
"step": 41200
},
{
"epoch": 27.28,
"learning_rate": 1.591269841269841e-05,
"loss": 0.0224,
"step": 41250
},
{
"epoch": 27.31,
"learning_rate": 1.5871362433862437e-05,
"loss": 0.0231,
"step": 41300
},
{
"epoch": 27.35,
"learning_rate": 1.5830026455026455e-05,
"loss": 0.0358,
"step": 41350
},
{
"epoch": 27.38,
"learning_rate": 1.5788690476190477e-05,
"loss": 0.0233,
"step": 41400
},
{
"epoch": 27.41,
"learning_rate": 1.5747354497354496e-05,
"loss": 0.0194,
"step": 41450
},
{
"epoch": 27.45,
"learning_rate": 1.570601851851852e-05,
"loss": 0.022,
"step": 41500
},
{
"epoch": 27.48,
"learning_rate": 1.566468253968254e-05,
"loss": 0.032,
"step": 41550
},
{
"epoch": 27.51,
"learning_rate": 1.562334656084656e-05,
"loss": 0.0367,
"step": 41600
},
{
"epoch": 27.55,
"learning_rate": 1.5582010582010583e-05,
"loss": 0.0204,
"step": 41650
},
{
"epoch": 27.58,
"learning_rate": 1.5540674603174605e-05,
"loss": 0.0421,
"step": 41700
},
{
"epoch": 27.61,
"learning_rate": 1.5499338624338624e-05,
"loss": 0.0345,
"step": 41750
},
{
"epoch": 27.65,
"learning_rate": 1.5458002645502646e-05,
"loss": 0.0395,
"step": 41800
},
{
"epoch": 27.68,
"learning_rate": 1.5416666666666668e-05,
"loss": 0.0216,
"step": 41850
},
{
"epoch": 27.71,
"learning_rate": 1.537533068783069e-05,
"loss": 0.0347,
"step": 41900
},
{
"epoch": 27.74,
"learning_rate": 1.5333994708994708e-05,
"loss": 0.0376,
"step": 41950
},
{
"epoch": 27.78,
"learning_rate": 1.5292658730158733e-05,
"loss": 0.0275,
"step": 42000
},
{
"epoch": 27.81,
"learning_rate": 1.5251322751322752e-05,
"loss": 0.0254,
"step": 42050
},
{
"epoch": 27.84,
"learning_rate": 1.5209986772486774e-05,
"loss": 0.0277,
"step": 42100
},
{
"epoch": 27.88,
"learning_rate": 1.5168650793650794e-05,
"loss": 0.0313,
"step": 42150
},
{
"epoch": 27.91,
"learning_rate": 1.5127314814814816e-05,
"loss": 0.0248,
"step": 42200
},
{
"epoch": 27.94,
"learning_rate": 1.5085978835978836e-05,
"loss": 0.0293,
"step": 42250
},
{
"epoch": 27.98,
"learning_rate": 1.5044642857142858e-05,
"loss": 0.0357,
"step": 42300
},
{
"epoch": 28.0,
"eval_cer": 0.25561055312605124,
"eval_loss": 2.020660400390625,
"eval_mer": 0.30233301374240973,
"eval_runtime": 95.342,
"eval_samples_per_second": 11.821,
"eval_steps_per_second": 1.479,
"step": 42336
},
{
"epoch": 28.01,
"learning_rate": 1.500330687830688e-05,
"loss": 0.0262,
"step": 42350
},
{
"epoch": 28.04,
"learning_rate": 1.49619708994709e-05,
"loss": 0.0317,
"step": 42400
},
{
"epoch": 28.08,
"learning_rate": 1.4920634920634922e-05,
"loss": 0.0281,
"step": 42450
},
{
"epoch": 28.11,
"learning_rate": 1.4879298941798942e-05,
"loss": 0.025,
"step": 42500
},
{
"epoch": 28.14,
"learning_rate": 1.4837962962962964e-05,
"loss": 0.0251,
"step": 42550
},
{
"epoch": 28.17,
"learning_rate": 1.4796626984126984e-05,
"loss": 0.0233,
"step": 42600
},
{
"epoch": 28.21,
"learning_rate": 1.4755291005291006e-05,
"loss": 0.0253,
"step": 42650
},
{
"epoch": 28.24,
"learning_rate": 1.4713955026455028e-05,
"loss": 0.0302,
"step": 42700
},
{
"epoch": 28.27,
"learning_rate": 1.4672619047619048e-05,
"loss": 0.03,
"step": 42750
},
{
"epoch": 28.31,
"learning_rate": 1.463128306878307e-05,
"loss": 0.0406,
"step": 42800
},
{
"epoch": 28.34,
"learning_rate": 1.458994708994709e-05,
"loss": 0.027,
"step": 42850
},
{
"epoch": 28.37,
"learning_rate": 1.4548611111111112e-05,
"loss": 0.0227,
"step": 42900
},
{
"epoch": 28.41,
"learning_rate": 1.4507275132275132e-05,
"loss": 0.0234,
"step": 42950
},
{
"epoch": 28.44,
"learning_rate": 1.4465939153439154e-05,
"loss": 0.0223,
"step": 43000
},
{
"epoch": 28.47,
"learning_rate": 1.4424603174603174e-05,
"loss": 0.0285,
"step": 43050
},
{
"epoch": 28.51,
"learning_rate": 1.4383267195767196e-05,
"loss": 0.0284,
"step": 43100
},
{
"epoch": 28.54,
"learning_rate": 1.4341931216931218e-05,
"loss": 0.0221,
"step": 43150
},
{
"epoch": 28.57,
"learning_rate": 1.4300595238095238e-05,
"loss": 0.0323,
"step": 43200
},
{
"epoch": 28.6,
"learning_rate": 1.425925925925926e-05,
"loss": 0.0271,
"step": 43250
},
{
"epoch": 28.64,
"learning_rate": 1.4217923280423282e-05,
"loss": 0.0405,
"step": 43300
},
{
"epoch": 28.67,
"learning_rate": 1.4176587301587302e-05,
"loss": 0.025,
"step": 43350
},
{
"epoch": 28.7,
"learning_rate": 1.4135251322751323e-05,
"loss": 0.0291,
"step": 43400
},
{
"epoch": 28.74,
"learning_rate": 1.4093915343915345e-05,
"loss": 0.0376,
"step": 43450
},
{
"epoch": 28.77,
"learning_rate": 1.4052579365079366e-05,
"loss": 0.0343,
"step": 43500
},
{
"epoch": 28.8,
"learning_rate": 1.4011243386243387e-05,
"loss": 0.0247,
"step": 43550
},
{
"epoch": 28.84,
"learning_rate": 1.3969907407407409e-05,
"loss": 0.0201,
"step": 43600
},
{
"epoch": 28.87,
"learning_rate": 1.392857142857143e-05,
"loss": 0.0252,
"step": 43650
},
{
"epoch": 28.9,
"learning_rate": 1.388723544973545e-05,
"loss": 0.0255,
"step": 43700
},
{
"epoch": 28.94,
"learning_rate": 1.3845899470899471e-05,
"loss": 0.0224,
"step": 43750
},
{
"epoch": 28.97,
"learning_rate": 1.3804563492063493e-05,
"loss": 0.0189,
"step": 43800
},
{
"epoch": 29.0,
"eval_cer": 0.25166995050218655,
"eval_loss": 1.9557234048843384,
"eval_mer": 0.29594119527005436,
"eval_runtime": 95.5416,
"eval_samples_per_second": 11.796,
"eval_steps_per_second": 1.476,
"step": 43848
},
{
"epoch": 29.0,
"learning_rate": 1.3763227513227515e-05,
"loss": 0.042,
"step": 43850
},
{
"epoch": 29.03,
"learning_rate": 1.3721891534391535e-05,
"loss": 0.0391,
"step": 43900
},
{
"epoch": 29.07,
"learning_rate": 1.3680555555555557e-05,
"loss": 0.0343,
"step": 43950
},
{
"epoch": 29.1,
"learning_rate": 1.3639219576719579e-05,
"loss": 0.0308,
"step": 44000
},
{
"epoch": 29.13,
"learning_rate": 1.3597883597883599e-05,
"loss": 0.0229,
"step": 44050
},
{
"epoch": 29.17,
"learning_rate": 1.3556547619047619e-05,
"loss": 0.0235,
"step": 44100
},
{
"epoch": 29.2,
"learning_rate": 1.3515211640211643e-05,
"loss": 0.0202,
"step": 44150
},
{
"epoch": 29.23,
"learning_rate": 1.3473875661375663e-05,
"loss": 0.0297,
"step": 44200
},
{
"epoch": 29.27,
"learning_rate": 1.3432539682539683e-05,
"loss": 0.0323,
"step": 44250
},
{
"epoch": 29.3,
"learning_rate": 1.3391203703703703e-05,
"loss": 0.0262,
"step": 44300
},
{
"epoch": 29.33,
"learning_rate": 1.3349867724867727e-05,
"loss": 0.0179,
"step": 44350
},
{
"epoch": 29.37,
"learning_rate": 1.3308531746031747e-05,
"loss": 0.0331,
"step": 44400
},
{
"epoch": 29.4,
"learning_rate": 1.3267195767195767e-05,
"loss": 0.0299,
"step": 44450
},
{
"epoch": 29.43,
"learning_rate": 1.3225859788359791e-05,
"loss": 0.0303,
"step": 44500
},
{
"epoch": 29.46,
"learning_rate": 1.3184523809523811e-05,
"loss": 0.0234,
"step": 44550
},
{
"epoch": 29.5,
"learning_rate": 1.3143187830687831e-05,
"loss": 0.0256,
"step": 44600
},
{
"epoch": 29.53,
"learning_rate": 1.3101851851851852e-05,
"loss": 0.0165,
"step": 44650
},
{
"epoch": 29.56,
"learning_rate": 1.3060515873015875e-05,
"loss": 0.022,
"step": 44700
},
{
"epoch": 29.6,
"learning_rate": 1.3019179894179895e-05,
"loss": 0.028,
"step": 44750
},
{
"epoch": 29.63,
"learning_rate": 1.2977843915343916e-05,
"loss": 0.0334,
"step": 44800
},
{
"epoch": 29.66,
"learning_rate": 1.2936507936507939e-05,
"loss": 0.0287,
"step": 44850
},
{
"epoch": 29.7,
"learning_rate": 1.289517195767196e-05,
"loss": 0.0335,
"step": 44900
},
{
"epoch": 29.73,
"learning_rate": 1.285383597883598e-05,
"loss": 0.0309,
"step": 44950
},
{
"epoch": 29.76,
"learning_rate": 1.28125e-05,
"loss": 0.0349,
"step": 45000
},
{
"epoch": 29.79,
"learning_rate": 1.2771164021164023e-05,
"loss": 0.021,
"step": 45050
},
{
"epoch": 29.83,
"learning_rate": 1.2729828042328044e-05,
"loss": 0.03,
"step": 45100
},
{
"epoch": 29.86,
"learning_rate": 1.2688492063492064e-05,
"loss": 0.0281,
"step": 45150
},
{
"epoch": 29.89,
"learning_rate": 1.2647156084656087e-05,
"loss": 0.0272,
"step": 45200
},
{
"epoch": 29.93,
"learning_rate": 1.2605820105820108e-05,
"loss": 0.0226,
"step": 45250
},
{
"epoch": 29.96,
"learning_rate": 1.2564484126984128e-05,
"loss": 0.026,
"step": 45300
},
{
"epoch": 29.99,
"learning_rate": 1.2523148148148148e-05,
"loss": 0.0218,
"step": 45350
},
{
"epoch": 30.0,
"eval_cer": 0.25152578211350857,
"eval_loss": 2.031803846359253,
"eval_mer": 0.29442313838286993,
"eval_runtime": 95.6299,
"eval_samples_per_second": 11.785,
"eval_steps_per_second": 1.474,
"step": 45360
},
{
"epoch": 30.03,
"learning_rate": 1.248181216931217e-05,
"loss": 0.0276,
"step": 45400
},
{
"epoch": 30.06,
"learning_rate": 1.2440476190476192e-05,
"loss": 0.0227,
"step": 45450
},
{
"epoch": 30.09,
"learning_rate": 1.2399140211640212e-05,
"loss": 0.0186,
"step": 45500
},
{
"epoch": 30.13,
"learning_rate": 1.2357804232804234e-05,
"loss": 0.0353,
"step": 45550
},
{
"epoch": 30.16,
"learning_rate": 1.2316468253968256e-05,
"loss": 0.0244,
"step": 45600
},
{
"epoch": 30.19,
"learning_rate": 1.2275132275132276e-05,
"loss": 0.0252,
"step": 45650
},
{
"epoch": 30.22,
"learning_rate": 1.2233796296296298e-05,
"loss": 0.0288,
"step": 45700
},
{
"epoch": 30.26,
"learning_rate": 1.2192460317460318e-05,
"loss": 0.029,
"step": 45750
},
{
"epoch": 30.29,
"learning_rate": 1.215112433862434e-05,
"loss": 0.0155,
"step": 45800
},
{
"epoch": 30.32,
"learning_rate": 1.210978835978836e-05,
"loss": 0.0255,
"step": 45850
},
{
"epoch": 30.36,
"learning_rate": 1.2068452380952382e-05,
"loss": 0.0195,
"step": 45900
},
{
"epoch": 30.39,
"learning_rate": 1.2027116402116402e-05,
"loss": 0.0249,
"step": 45950
},
{
"epoch": 30.42,
"learning_rate": 1.1985780423280424e-05,
"loss": 0.0292,
"step": 46000
},
{
"epoch": 30.46,
"learning_rate": 1.1944444444444446e-05,
"loss": 0.0234,
"step": 46050
},
{
"epoch": 30.49,
"learning_rate": 1.1903935185185186e-05,
"loss": 0.0343,
"step": 46100
},
{
"epoch": 30.52,
"learning_rate": 1.1862599206349206e-05,
"loss": 0.0265,
"step": 46150
},
{
"epoch": 30.56,
"learning_rate": 1.1821263227513228e-05,
"loss": 0.031,
"step": 46200
},
{
"epoch": 30.59,
"learning_rate": 1.1779927248677248e-05,
"loss": 0.0191,
"step": 46250
},
{
"epoch": 30.62,
"learning_rate": 1.173859126984127e-05,
"loss": 0.0272,
"step": 46300
},
{
"epoch": 30.65,
"learning_rate": 1.1697255291005292e-05,
"loss": 0.0262,
"step": 46350
},
{
"epoch": 30.69,
"learning_rate": 1.1655919312169312e-05,
"loss": 0.0212,
"step": 46400
},
{
"epoch": 30.72,
"learning_rate": 1.1614583333333334e-05,
"loss": 0.0265,
"step": 46450
},
{
"epoch": 30.75,
"learning_rate": 1.1573247354497354e-05,
"loss": 0.0312,
"step": 46500
},
{
"epoch": 30.79,
"learning_rate": 1.1531911375661376e-05,
"loss": 0.0212,
"step": 46550
},
{
"epoch": 30.82,
"learning_rate": 1.1490575396825396e-05,
"loss": 0.019,
"step": 46600
},
{
"epoch": 30.85,
"learning_rate": 1.1449239417989418e-05,
"loss": 0.0295,
"step": 46650
},
{
"epoch": 30.89,
"learning_rate": 1.140790343915344e-05,
"loss": 0.0387,
"step": 46700
},
{
"epoch": 30.92,
"learning_rate": 1.136656746031746e-05,
"loss": 0.0274,
"step": 46750
},
{
"epoch": 30.95,
"learning_rate": 1.1325231481481482e-05,
"loss": 0.0213,
"step": 46800
},
{
"epoch": 30.99,
"learning_rate": 1.1283895502645502e-05,
"loss": 0.0218,
"step": 46850
},
{
"epoch": 31.0,
"eval_cer": 0.25066077178144075,
"eval_loss": 2.0200111865997314,
"eval_mer": 0.2976190476190476,
"eval_runtime": 95.7498,
"eval_samples_per_second": 11.77,
"eval_steps_per_second": 1.473,
"step": 46872
},
{
"epoch": 31.02,
"learning_rate": 1.1242559523809524e-05,
"loss": 0.0279,
"step": 46900
},
{
"epoch": 31.05,
"learning_rate": 1.1201223544973544e-05,
"loss": 0.0278,
"step": 46950
},
{
"epoch": 31.08,
"learning_rate": 1.1159887566137566e-05,
"loss": 0.0158,
"step": 47000
},
{
"epoch": 31.12,
"learning_rate": 1.1118551587301586e-05,
"loss": 0.02,
"step": 47050
},
{
"epoch": 31.15,
"learning_rate": 1.1077215608465608e-05,
"loss": 0.0148,
"step": 47100
},
{
"epoch": 31.18,
"learning_rate": 1.103587962962963e-05,
"loss": 0.0153,
"step": 47150
},
{
"epoch": 31.22,
"learning_rate": 1.099454365079365e-05,
"loss": 0.0343,
"step": 47200
},
{
"epoch": 31.25,
"learning_rate": 1.0953207671957672e-05,
"loss": 0.0245,
"step": 47250
},
{
"epoch": 31.28,
"learning_rate": 1.0911871693121693e-05,
"loss": 0.0177,
"step": 47300
},
{
"epoch": 31.32,
"learning_rate": 1.0870535714285714e-05,
"loss": 0.0304,
"step": 47350
},
{
"epoch": 31.35,
"learning_rate": 1.0829199735449735e-05,
"loss": 0.0206,
"step": 47400
},
{
"epoch": 31.38,
"learning_rate": 1.0787863756613757e-05,
"loss": 0.026,
"step": 47450
},
{
"epoch": 31.42,
"learning_rate": 1.0746527777777778e-05,
"loss": 0.0267,
"step": 47500
},
{
"epoch": 31.45,
"learning_rate": 1.0705191798941799e-05,
"loss": 0.0194,
"step": 47550
},
{
"epoch": 31.48,
"learning_rate": 1.066385582010582e-05,
"loss": 0.0219,
"step": 47600
},
{
"epoch": 31.51,
"learning_rate": 1.062251984126984e-05,
"loss": 0.0286,
"step": 47650
},
{
"epoch": 31.55,
"learning_rate": 1.0581183862433863e-05,
"loss": 0.0223,
"step": 47700
},
{
"epoch": 31.58,
"learning_rate": 1.0539847883597883e-05,
"loss": 0.0156,
"step": 47750
},
{
"epoch": 31.61,
"learning_rate": 1.0498511904761905e-05,
"loss": 0.0166,
"step": 47800
},
{
"epoch": 31.65,
"learning_rate": 1.0457175925925925e-05,
"loss": 0.0251,
"step": 47850
},
{
"epoch": 31.68,
"learning_rate": 1.0415839947089947e-05,
"loss": 0.0212,
"step": 47900
},
{
"epoch": 31.71,
"learning_rate": 1.0374503968253969e-05,
"loss": 0.0394,
"step": 47950
},
{
"epoch": 31.75,
"learning_rate": 1.0333167989417989e-05,
"loss": 0.0244,
"step": 48000
},
{
"epoch": 31.78,
"learning_rate": 1.0291832010582011e-05,
"loss": 0.0237,
"step": 48050
},
{
"epoch": 31.81,
"learning_rate": 1.0250496031746031e-05,
"loss": 0.0243,
"step": 48100
},
{
"epoch": 31.85,
"learning_rate": 1.0209986772486772e-05,
"loss": 0.0247,
"step": 48150
},
{
"epoch": 31.88,
"learning_rate": 1.0168650793650794e-05,
"loss": 0.0199,
"step": 48200
},
{
"epoch": 31.91,
"learning_rate": 1.0127314814814816e-05,
"loss": 0.0209,
"step": 48250
},
{
"epoch": 31.94,
"learning_rate": 1.0085978835978836e-05,
"loss": 0.0311,
"step": 48300
},
{
"epoch": 31.98,
"learning_rate": 1.0045469576719576e-05,
"loss": 0.0162,
"step": 48350
},
{
"epoch": 32.0,
"eval_cer": 0.24782546013744053,
"eval_loss": 2.0407798290252686,
"eval_mer": 0.2951422179610099,
"eval_runtime": 95.8171,
"eval_samples_per_second": 11.762,
"eval_steps_per_second": 1.472,
"step": 48384
},
{
"epoch": 32.01,
"learning_rate": 1.0004133597883598e-05,
"loss": 0.0198,
"step": 48400
},
{
"epoch": 32.04,
"learning_rate": 9.96279761904762e-06,
"loss": 0.0143,
"step": 48450
},
{
"epoch": 32.08,
"learning_rate": 9.92146164021164e-06,
"loss": 0.0216,
"step": 48500
},
{
"epoch": 32.11,
"learning_rate": 9.880125661375662e-06,
"loss": 0.0137,
"step": 48550
},
{
"epoch": 32.14,
"learning_rate": 9.838789682539682e-06,
"loss": 0.0361,
"step": 48600
},
{
"epoch": 32.18,
"learning_rate": 9.797453703703704e-06,
"loss": 0.0242,
"step": 48650
},
{
"epoch": 32.21,
"learning_rate": 9.756117724867724e-06,
"loss": 0.0219,
"step": 48700
},
{
"epoch": 32.24,
"learning_rate": 9.714781746031746e-06,
"loss": 0.0152,
"step": 48750
},
{
"epoch": 32.28,
"learning_rate": 9.673445767195766e-06,
"loss": 0.0254,
"step": 48800
},
{
"epoch": 32.31,
"learning_rate": 9.632109788359788e-06,
"loss": 0.0141,
"step": 48850
},
{
"epoch": 32.34,
"learning_rate": 9.59077380952381e-06,
"loss": 0.0179,
"step": 48900
},
{
"epoch": 32.37,
"learning_rate": 9.54943783068783e-06,
"loss": 0.0156,
"step": 48950
},
{
"epoch": 32.41,
"learning_rate": 9.508101851851852e-06,
"loss": 0.0153,
"step": 49000
},
{
"epoch": 32.44,
"learning_rate": 9.466765873015872e-06,
"loss": 0.0189,
"step": 49050
},
{
"epoch": 32.47,
"learning_rate": 9.425429894179894e-06,
"loss": 0.0194,
"step": 49100
},
{
"epoch": 32.51,
"learning_rate": 9.384093915343915e-06,
"loss": 0.0254,
"step": 49150
},
{
"epoch": 32.54,
"learning_rate": 9.342757936507936e-06,
"loss": 0.0275,
"step": 49200
},
{
"epoch": 32.57,
"learning_rate": 9.301421957671957e-06,
"loss": 0.0212,
"step": 49250
},
{
"epoch": 32.61,
"learning_rate": 9.260085978835979e-06,
"loss": 0.0261,
"step": 49300
},
{
"epoch": 32.64,
"learning_rate": 9.21875e-06,
"loss": 0.0201,
"step": 49350
},
{
"epoch": 32.67,
"learning_rate": 9.17741402116402e-06,
"loss": 0.014,
"step": 49400
},
{
"epoch": 32.71,
"learning_rate": 9.136078042328043e-06,
"loss": 0.0164,
"step": 49450
},
{
"epoch": 32.74,
"learning_rate": 9.094742063492063e-06,
"loss": 0.0214,
"step": 49500
},
{
"epoch": 32.77,
"learning_rate": 9.053406084656085e-06,
"loss": 0.0207,
"step": 49550
},
{
"epoch": 32.8,
"learning_rate": 9.012070105820105e-06,
"loss": 0.0316,
"step": 49600
},
{
"epoch": 32.84,
"learning_rate": 8.970734126984127e-06,
"loss": 0.036,
"step": 49650
},
{
"epoch": 32.87,
"learning_rate": 8.929398148148149e-06,
"loss": 0.0225,
"step": 49700
},
{
"epoch": 32.9,
"learning_rate": 8.888062169312169e-06,
"loss": 0.0237,
"step": 49750
},
{
"epoch": 32.94,
"learning_rate": 8.84672619047619e-06,
"loss": 0.0161,
"step": 49800
},
{
"epoch": 32.97,
"learning_rate": 8.805390211640211e-06,
"loss": 0.0271,
"step": 49850
},
{
"epoch": 33.0,
"eval_cer": 0.2503724350040848,
"eval_loss": 2.087434768676758,
"eval_mer": 0.29618088846276763,
"eval_runtime": 95.4454,
"eval_samples_per_second": 11.808,
"eval_steps_per_second": 1.477,
"step": 49896
},
{
"epoch": 33.0,
"learning_rate": 8.764054232804233e-06,
"loss": 0.0302,
"step": 49900
},
{
"epoch": 33.04,
"learning_rate": 8.722718253968255e-06,
"loss": 0.016,
"step": 49950
},
{
"epoch": 33.07,
"learning_rate": 8.681382275132275e-06,
"loss": 0.0175,
"step": 50000
},
{
"epoch": 33.1,
"learning_rate": 8.640046296296297e-06,
"loss": 0.0172,
"step": 50050
},
{
"epoch": 33.13,
"learning_rate": 8.598710317460317e-06,
"loss": 0.0163,
"step": 50100
},
{
"epoch": 33.17,
"learning_rate": 8.557374338624339e-06,
"loss": 0.0199,
"step": 50150
},
{
"epoch": 33.2,
"learning_rate": 8.51603835978836e-06,
"loss": 0.0174,
"step": 50200
},
{
"epoch": 33.23,
"learning_rate": 8.474702380952381e-06,
"loss": 0.0119,
"step": 50250
},
{
"epoch": 33.27,
"learning_rate": 8.433366402116403e-06,
"loss": 0.0205,
"step": 50300
},
{
"epoch": 33.3,
"learning_rate": 8.392030423280423e-06,
"loss": 0.0261,
"step": 50350
},
{
"epoch": 33.33,
"learning_rate": 8.350694444444445e-06,
"loss": 0.0221,
"step": 50400
},
{
"epoch": 33.37,
"learning_rate": 8.309358465608465e-06,
"loss": 0.0161,
"step": 50450
},
{
"epoch": 33.4,
"learning_rate": 8.268022486772487e-06,
"loss": 0.0251,
"step": 50500
},
{
"epoch": 33.43,
"learning_rate": 8.226686507936509e-06,
"loss": 0.0212,
"step": 50550
},
{
"epoch": 33.47,
"learning_rate": 8.18535052910053e-06,
"loss": 0.0148,
"step": 50600
},
{
"epoch": 33.5,
"learning_rate": 8.144014550264551e-06,
"loss": 0.0149,
"step": 50650
},
{
"epoch": 33.53,
"learning_rate": 8.102678571428571e-06,
"loss": 0.0144,
"step": 50700
},
{
"epoch": 33.56,
"learning_rate": 8.061342592592593e-06,
"loss": 0.0204,
"step": 50750
},
{
"epoch": 33.6,
"learning_rate": 8.020006613756613e-06,
"loss": 0.0193,
"step": 50800
},
{
"epoch": 33.63,
"learning_rate": 7.978670634920635e-06,
"loss": 0.0156,
"step": 50850
},
{
"epoch": 33.66,
"learning_rate": 7.937334656084657e-06,
"loss": 0.0173,
"step": 50900
},
{
"epoch": 33.7,
"learning_rate": 7.895998677248677e-06,
"loss": 0.0142,
"step": 50950
},
{
"epoch": 33.73,
"learning_rate": 7.8546626984127e-06,
"loss": 0.022,
"step": 51000
},
{
"epoch": 33.76,
"learning_rate": 7.81332671957672e-06,
"loss": 0.0217,
"step": 51050
},
{
"epoch": 33.8,
"learning_rate": 7.771990740740741e-06,
"loss": 0.0228,
"step": 51100
},
{
"epoch": 33.83,
"learning_rate": 7.731481481481481e-06,
"loss": 0.0264,
"step": 51150
},
{
"epoch": 33.86,
"learning_rate": 7.690145502645503e-06,
"loss": 0.0187,
"step": 51200
},
{
"epoch": 33.9,
"learning_rate": 7.648809523809525e-06,
"loss": 0.0191,
"step": 51250
},
{
"epoch": 33.93,
"learning_rate": 7.607473544973545e-06,
"loss": 0.0238,
"step": 51300
},
{
"epoch": 33.96,
"learning_rate": 7.566137566137567e-06,
"loss": 0.0191,
"step": 51350
},
{
"epoch": 33.99,
"learning_rate": 7.524801587301587e-06,
"loss": 0.0505,
"step": 51400
},
{
"epoch": 34.0,
"eval_cer": 0.24705656206449134,
"eval_loss": 2.0537052154541016,
"eval_mer": 0.2943432406519655,
"eval_runtime": 95.6816,
"eval_samples_per_second": 11.779,
"eval_steps_per_second": 1.474,
"step": 51408
},
{
"epoch": 34.03,
"learning_rate": 7.483465608465609e-06,
"loss": 0.0101,
"step": 51450
},
{
"epoch": 34.06,
"learning_rate": 7.442129629629629e-06,
"loss": 0.0199,
"step": 51500
},
{
"epoch": 34.09,
"learning_rate": 7.400793650793651e-06,
"loss": 0.0197,
"step": 51550
},
{
"epoch": 34.13,
"learning_rate": 7.359457671957673e-06,
"loss": 0.0204,
"step": 51600
},
{
"epoch": 34.16,
"learning_rate": 7.318121693121693e-06,
"loss": 0.0165,
"step": 51650
},
{
"epoch": 34.19,
"learning_rate": 7.276785714285715e-06,
"loss": 0.0356,
"step": 51700
},
{
"epoch": 34.23,
"learning_rate": 7.2354497354497354e-06,
"loss": 0.0193,
"step": 51750
},
{
"epoch": 34.26,
"learning_rate": 7.194113756613757e-06,
"loss": 0.0234,
"step": 51800
},
{
"epoch": 34.29,
"learning_rate": 7.1527777777777775e-06,
"loss": 0.0199,
"step": 51850
},
{
"epoch": 34.33,
"learning_rate": 7.1114417989417994e-06,
"loss": 0.0167,
"step": 51900
},
{
"epoch": 34.36,
"learning_rate": 7.07010582010582e-06,
"loss": 0.0132,
"step": 51950
},
{
"epoch": 34.39,
"learning_rate": 7.0287698412698415e-06,
"loss": 0.0127,
"step": 52000
},
{
"epoch": 34.42,
"learning_rate": 6.9874338624338634e-06,
"loss": 0.0118,
"step": 52050
},
{
"epoch": 34.46,
"learning_rate": 6.946097883597884e-06,
"loss": 0.0136,
"step": 52100
},
{
"epoch": 34.49,
"learning_rate": 6.9047619047619055e-06,
"loss": 0.024,
"step": 52150
},
{
"epoch": 34.52,
"learning_rate": 6.863425925925926e-06,
"loss": 0.0136,
"step": 52200
},
{
"epoch": 34.56,
"learning_rate": 6.822089947089948e-06,
"loss": 0.0169,
"step": 52250
},
{
"epoch": 34.59,
"learning_rate": 6.780753968253968e-06,
"loss": 0.0196,
"step": 52300
},
{
"epoch": 34.62,
"learning_rate": 6.73941798941799e-06,
"loss": 0.0158,
"step": 52350
},
{
"epoch": 34.66,
"learning_rate": 6.69808201058201e-06,
"loss": 0.0192,
"step": 52400
},
{
"epoch": 34.69,
"learning_rate": 6.656746031746032e-06,
"loss": 0.0167,
"step": 52450
},
{
"epoch": 34.72,
"learning_rate": 6.615410052910054e-06,
"loss": 0.0208,
"step": 52500
},
{
"epoch": 34.76,
"learning_rate": 6.574074074074074e-06,
"loss": 0.018,
"step": 52550
},
{
"epoch": 34.79,
"learning_rate": 6.532738095238096e-06,
"loss": 0.0302,
"step": 52600
},
{
"epoch": 34.82,
"learning_rate": 6.491402116402116e-06,
"loss": 0.0216,
"step": 52650
},
{
"epoch": 34.85,
"learning_rate": 6.450066137566138e-06,
"loss": 0.0282,
"step": 52700
},
{
"epoch": 34.89,
"learning_rate": 6.408730158730158e-06,
"loss": 0.0221,
"step": 52750
},
{
"epoch": 34.92,
"learning_rate": 6.36739417989418e-06,
"loss": 0.0172,
"step": 52800
},
{
"epoch": 34.95,
"learning_rate": 6.326058201058202e-06,
"loss": 0.0175,
"step": 52850
},
{
"epoch": 34.99,
"learning_rate": 6.284722222222222e-06,
"loss": 0.0156,
"step": 52900
},
{
"epoch": 35.0,
"eval_cer": 0.2496515930606949,
"eval_loss": 2.1400859355926514,
"eval_mer": 0.2947427293064877,
"eval_runtime": 95.5907,
"eval_samples_per_second": 11.79,
"eval_steps_per_second": 1.475,
"step": 52920
},
{
"epoch": 35.02,
"learning_rate": 6.243386243386243e-06,
"loss": 0.0261,
"step": 52950
},
{
"epoch": 35.05,
"learning_rate": 6.202050264550264e-06,
"loss": 0.0187,
"step": 53000
},
{
"epoch": 35.09,
"learning_rate": 6.160714285714286e-06,
"loss": 0.0159,
"step": 53050
},
{
"epoch": 35.12,
"learning_rate": 6.119378306878307e-06,
"loss": 0.0232,
"step": 53100
},
{
"epoch": 35.15,
"learning_rate": 6.078042328042328e-06,
"loss": 0.0108,
"step": 53150
},
{
"epoch": 35.19,
"learning_rate": 6.036706349206349e-06,
"loss": 0.0226,
"step": 53200
},
{
"epoch": 35.22,
"learning_rate": 5.99537037037037e-06,
"loss": 0.0155,
"step": 53250
},
{
"epoch": 35.25,
"learning_rate": 5.954034391534391e-06,
"loss": 0.0174,
"step": 53300
},
{
"epoch": 35.28,
"learning_rate": 5.9126984126984124e-06,
"loss": 0.0298,
"step": 53350
},
{
"epoch": 35.32,
"learning_rate": 5.8713624338624335e-06,
"loss": 0.0108,
"step": 53400
},
{
"epoch": 35.35,
"learning_rate": 5.830026455026455e-06,
"loss": 0.0172,
"step": 53450
},
{
"epoch": 35.38,
"learning_rate": 5.7886904761904764e-06,
"loss": 0.0198,
"step": 53500
},
{
"epoch": 35.42,
"learning_rate": 5.7473544973544975e-06,
"loss": 0.0118,
"step": 53550
},
{
"epoch": 35.45,
"learning_rate": 5.7060185185185186e-06,
"loss": 0.0119,
"step": 53600
},
{
"epoch": 35.48,
"learning_rate": 5.66468253968254e-06,
"loss": 0.0232,
"step": 53650
},
{
"epoch": 35.52,
"learning_rate": 5.623346560846561e-06,
"loss": 0.0181,
"step": 53700
},
{
"epoch": 35.55,
"learning_rate": 5.582010582010582e-06,
"loss": 0.0307,
"step": 53750
},
{
"epoch": 35.58,
"learning_rate": 5.540674603174604e-06,
"loss": 0.0149,
"step": 53800
},
{
"epoch": 35.62,
"learning_rate": 5.499338624338625e-06,
"loss": 0.0194,
"step": 53850
},
{
"epoch": 35.65,
"learning_rate": 5.458002645502646e-06,
"loss": 0.0175,
"step": 53900
},
{
"epoch": 35.68,
"learning_rate": 5.416666666666667e-06,
"loss": 0.0147,
"step": 53950
},
{
"epoch": 35.71,
"learning_rate": 5.375330687830688e-06,
"loss": 0.0164,
"step": 54000
},
{
"epoch": 35.75,
"learning_rate": 5.333994708994709e-06,
"loss": 0.0131,
"step": 54050
},
{
"epoch": 35.78,
"learning_rate": 5.292658730158731e-06,
"loss": 0.022,
"step": 54100
},
{
"epoch": 35.81,
"learning_rate": 5.251322751322752e-06,
"loss": 0.0226,
"step": 54150
},
{
"epoch": 35.85,
"learning_rate": 5.209986772486773e-06,
"loss": 0.0109,
"step": 54200
},
{
"epoch": 35.88,
"learning_rate": 5.168650793650794e-06,
"loss": 0.034,
"step": 54250
},
{
"epoch": 35.91,
"learning_rate": 5.127314814814815e-06,
"loss": 0.0111,
"step": 54300
},
{
"epoch": 35.95,
"learning_rate": 5.085978835978836e-06,
"loss": 0.0147,
"step": 54350
},
{
"epoch": 35.98,
"learning_rate": 5.044642857142858e-06,
"loss": 0.0147,
"step": 54400
},
{
"epoch": 36.0,
"eval_cer": 0.24859435821038975,
"eval_loss": 2.1733622550964355,
"eval_mer": 0.2936241610738255,
"eval_runtime": 95.6305,
"eval_samples_per_second": 11.785,
"eval_steps_per_second": 1.474,
"step": 54432
},
{
"epoch": 36.01,
"learning_rate": 5.003306878306879e-06,
"loss": 0.0188,
"step": 54450
},
{
"epoch": 36.04,
"learning_rate": 4.9619708994709e-06,
"loss": 0.0274,
"step": 54500
},
{
"epoch": 36.08,
"learning_rate": 4.920634920634921e-06,
"loss": 0.0154,
"step": 54550
},
{
"epoch": 36.11,
"learning_rate": 4.879298941798942e-06,
"loss": 0.0192,
"step": 54600
},
{
"epoch": 36.14,
"learning_rate": 4.837962962962963e-06,
"loss": 0.013,
"step": 54650
},
{
"epoch": 36.18,
"learning_rate": 4.796626984126985e-06,
"loss": 0.0173,
"step": 54700
},
{
"epoch": 36.21,
"learning_rate": 4.755291005291006e-06,
"loss": 0.0337,
"step": 54750
},
{
"epoch": 36.24,
"learning_rate": 4.713955026455027e-06,
"loss": 0.0155,
"step": 54800
},
{
"epoch": 36.28,
"learning_rate": 4.672619047619048e-06,
"loss": 0.0207,
"step": 54850
},
{
"epoch": 36.31,
"learning_rate": 4.631283068783069e-06,
"loss": 0.0183,
"step": 54900
},
{
"epoch": 36.34,
"learning_rate": 4.58994708994709e-06,
"loss": 0.0149,
"step": 54950
},
{
"epoch": 36.38,
"learning_rate": 4.548611111111111e-06,
"loss": 0.0196,
"step": 55000
},
{
"epoch": 36.41,
"learning_rate": 4.507275132275132e-06,
"loss": 0.0142,
"step": 55050
},
{
"epoch": 36.44,
"learning_rate": 4.465939153439154e-06,
"loss": 0.0164,
"step": 55100
},
{
"epoch": 36.47,
"learning_rate": 4.424603174603175e-06,
"loss": 0.0147,
"step": 55150
},
{
"epoch": 36.51,
"learning_rate": 4.383267195767196e-06,
"loss": 0.0149,
"step": 55200
},
{
"epoch": 36.54,
"learning_rate": 4.3419312169312175e-06,
"loss": 0.0168,
"step": 55250
},
{
"epoch": 36.57,
"learning_rate": 4.3005952380952385e-06,
"loss": 0.027,
"step": 55300
},
{
"epoch": 36.61,
"learning_rate": 4.2592592592592596e-06,
"loss": 0.0114,
"step": 55350
},
{
"epoch": 36.64,
"learning_rate": 4.217923280423281e-06,
"loss": 0.0204,
"step": 55400
},
{
"epoch": 36.67,
"learning_rate": 4.176587301587302e-06,
"loss": 0.0146,
"step": 55450
},
{
"epoch": 36.71,
"learning_rate": 4.135251322751324e-06,
"loss": 0.0238,
"step": 55500
},
{
"epoch": 36.74,
"learning_rate": 4.093915343915345e-06,
"loss": 0.0147,
"step": 55550
},
{
"epoch": 36.77,
"learning_rate": 4.052579365079366e-06,
"loss": 0.0251,
"step": 55600
},
{
"epoch": 36.81,
"learning_rate": 4.012070105820106e-06,
"loss": 0.0099,
"step": 55650
},
{
"epoch": 36.84,
"learning_rate": 3.970734126984127e-06,
"loss": 0.0137,
"step": 55700
},
{
"epoch": 36.87,
"learning_rate": 3.929398148148148e-06,
"loss": 0.0099,
"step": 55750
},
{
"epoch": 36.9,
"learning_rate": 3.88806216931217e-06,
"loss": 0.0167,
"step": 55800
},
{
"epoch": 36.94,
"learning_rate": 3.846726190476191e-06,
"loss": 0.0271,
"step": 55850
},
{
"epoch": 36.97,
"learning_rate": 3.805390211640212e-06,
"loss": 0.0344,
"step": 55900
},
{
"epoch": 37.0,
"eval_cer": 0.24748906723052524,
"eval_loss": 2.1714305877685547,
"eval_mer": 0.2932246724193033,
"eval_runtime": 95.3301,
"eval_samples_per_second": 11.822,
"eval_steps_per_second": 1.479,
"step": 55944
},
{
"epoch": 37.0,
"learning_rate": 3.764054232804233e-06,
"loss": 0.0195,
"step": 55950
},
{
"epoch": 37.04,
"learning_rate": 3.722718253968254e-06,
"loss": 0.0094,
"step": 56000
},
{
"epoch": 37.07,
"learning_rate": 3.681382275132275e-06,
"loss": 0.0277,
"step": 56050
},
{
"epoch": 37.1,
"learning_rate": 3.6400462962962965e-06,
"loss": 0.0121,
"step": 56100
},
{
"epoch": 37.14,
"learning_rate": 3.5987103174603175e-06,
"loss": 0.0125,
"step": 56150
},
{
"epoch": 37.17,
"learning_rate": 3.557374338624339e-06,
"loss": 0.0127,
"step": 56200
},
{
"epoch": 37.2,
"learning_rate": 3.51603835978836e-06,
"loss": 0.0147,
"step": 56250
},
{
"epoch": 37.24,
"learning_rate": 3.474702380952381e-06,
"loss": 0.0143,
"step": 56300
},
{
"epoch": 37.27,
"learning_rate": 3.433366402116402e-06,
"loss": 0.0141,
"step": 56350
},
{
"epoch": 37.3,
"learning_rate": 3.3928571428571426e-06,
"loss": 0.0138,
"step": 56400
},
{
"epoch": 37.33,
"learning_rate": 3.3515211640211637e-06,
"loss": 0.0285,
"step": 56450
},
{
"epoch": 37.37,
"learning_rate": 3.3101851851851856e-06,
"loss": 0.022,
"step": 56500
},
{
"epoch": 37.4,
"learning_rate": 3.2688492063492066e-06,
"loss": 0.0196,
"step": 56550
},
{
"epoch": 37.43,
"learning_rate": 3.2275132275132277e-06,
"loss": 0.0228,
"step": 56600
},
{
"epoch": 37.47,
"learning_rate": 3.1861772486772487e-06,
"loss": 0.013,
"step": 56650
},
{
"epoch": 37.5,
"learning_rate": 3.1448412698412698e-06,
"loss": 0.0087,
"step": 56700
},
{
"epoch": 37.53,
"learning_rate": 3.103505291005291e-06,
"loss": 0.0113,
"step": 56750
},
{
"epoch": 37.57,
"learning_rate": 3.0621693121693123e-06,
"loss": 0.0163,
"step": 56800
},
{
"epoch": 37.6,
"learning_rate": 3.0208333333333334e-06,
"loss": 0.0223,
"step": 56850
},
{
"epoch": 37.63,
"learning_rate": 2.9794973544973544e-06,
"loss": 0.0192,
"step": 56900
},
{
"epoch": 37.67,
"learning_rate": 2.9381613756613755e-06,
"loss": 0.0136,
"step": 56950
},
{
"epoch": 37.7,
"learning_rate": 2.896825396825397e-06,
"loss": 0.0192,
"step": 57000
},
{
"epoch": 37.73,
"learning_rate": 2.855489417989418e-06,
"loss": 0.0109,
"step": 57050
},
{
"epoch": 37.76,
"learning_rate": 2.814153439153439e-06,
"loss": 0.0351,
"step": 57100
},
{
"epoch": 37.8,
"learning_rate": 2.7728174603174605e-06,
"loss": 0.0192,
"step": 57150
},
{
"epoch": 37.83,
"learning_rate": 2.7314814814814816e-06,
"loss": 0.0107,
"step": 57200
},
{
"epoch": 37.86,
"learning_rate": 2.6901455026455026e-06,
"loss": 0.0125,
"step": 57250
},
{
"epoch": 37.9,
"learning_rate": 2.648809523809524e-06,
"loss": 0.013,
"step": 57300
},
{
"epoch": 37.93,
"learning_rate": 2.607473544973545e-06,
"loss": 0.0163,
"step": 57350
},
{
"epoch": 37.96,
"learning_rate": 2.566137566137566e-06,
"loss": 0.0154,
"step": 57400
},
{
"epoch": 38.0,
"learning_rate": 2.5248015873015877e-06,
"loss": 0.0208,
"step": 57450
},
{
"epoch": 38.0,
"eval_cer": 0.2472487865827286,
"eval_loss": 2.1860885620117188,
"eval_mer": 0.29218600191754557,
"eval_runtime": 95.511,
"eval_samples_per_second": 11.8,
"eval_steps_per_second": 1.476,
"step": 57456
},
{
"epoch": 38.03,
"learning_rate": 2.4834656084656087e-06,
"loss": 0.015,
"step": 57500
},
{
"epoch": 38.06,
"learning_rate": 2.4421296296296298e-06,
"loss": 0.0111,
"step": 57550
},
{
"epoch": 38.1,
"learning_rate": 2.4007936507936512e-06,
"loss": 0.0127,
"step": 57600
},
{
"epoch": 38.13,
"learning_rate": 2.3594576719576723e-06,
"loss": 0.0282,
"step": 57650
},
{
"epoch": 38.16,
"learning_rate": 2.3181216931216933e-06,
"loss": 0.0102,
"step": 57700
},
{
"epoch": 38.19,
"learning_rate": 2.2767857142857144e-06,
"loss": 0.0242,
"step": 57750
},
{
"epoch": 38.23,
"learning_rate": 2.235449735449736e-06,
"loss": 0.0124,
"step": 57800
},
{
"epoch": 38.26,
"learning_rate": 2.194113756613757e-06,
"loss": 0.014,
"step": 57850
},
{
"epoch": 38.29,
"learning_rate": 2.152777777777778e-06,
"loss": 0.0186,
"step": 57900
},
{
"epoch": 38.33,
"learning_rate": 2.111441798941799e-06,
"loss": 0.0187,
"step": 57950
},
{
"epoch": 38.36,
"learning_rate": 2.0701058201058205e-06,
"loss": 0.0176,
"step": 58000
},
{
"epoch": 38.39,
"learning_rate": 2.0287698412698415e-06,
"loss": 0.0134,
"step": 58050
},
{
"epoch": 38.43,
"learning_rate": 1.9874338624338626e-06,
"loss": 0.0173,
"step": 58100
},
{
"epoch": 38.46,
"learning_rate": 1.9460978835978836e-06,
"loss": 0.0167,
"step": 58150
},
{
"epoch": 38.49,
"learning_rate": 1.9047619047619051e-06,
"loss": 0.0194,
"step": 58200
},
{
"epoch": 38.53,
"learning_rate": 1.8634259259259262e-06,
"loss": 0.0322,
"step": 58250
},
{
"epoch": 38.56,
"learning_rate": 1.8220899470899472e-06,
"loss": 0.0153,
"step": 58300
},
{
"epoch": 38.59,
"learning_rate": 1.7807539682539683e-06,
"loss": 0.0132,
"step": 58350
},
{
"epoch": 38.62,
"learning_rate": 1.7394179894179893e-06,
"loss": 0.0135,
"step": 58400
},
{
"epoch": 38.66,
"learning_rate": 1.6980820105820108e-06,
"loss": 0.0249,
"step": 58450
},
{
"epoch": 38.69,
"learning_rate": 1.6567460317460318e-06,
"loss": 0.0106,
"step": 58500
},
{
"epoch": 38.72,
"learning_rate": 1.615410052910053e-06,
"loss": 0.0167,
"step": 58550
},
{
"epoch": 38.76,
"learning_rate": 1.574074074074074e-06,
"loss": 0.022,
"step": 58600
},
{
"epoch": 38.79,
"learning_rate": 1.5327380952380952e-06,
"loss": 0.016,
"step": 58650
},
{
"epoch": 38.82,
"learning_rate": 1.4914021164021165e-06,
"loss": 0.0169,
"step": 58700
},
{
"epoch": 38.86,
"learning_rate": 1.4500661375661375e-06,
"loss": 0.0278,
"step": 58750
},
{
"epoch": 38.89,
"learning_rate": 1.4087301587301588e-06,
"loss": 0.0143,
"step": 58800
},
{
"epoch": 38.92,
"learning_rate": 1.3673941798941798e-06,
"loss": 0.0086,
"step": 58850
},
{
"epoch": 38.96,
"learning_rate": 1.326058201058201e-06,
"loss": 0.0184,
"step": 58900
},
{
"epoch": 38.99,
"learning_rate": 1.2847222222222222e-06,
"loss": 0.0125,
"step": 58950
},
{
"epoch": 39.0,
"eval_cer": 0.2483060214330338,
"eval_loss": 2.2433042526245117,
"eval_mer": 0.29338446788111217,
"eval_runtime": 95.3127,
"eval_samples_per_second": 11.824,
"eval_steps_per_second": 1.479,
"step": 58968
},
{
"epoch": 39.02,
"learning_rate": 1.2433862433862434e-06,
"loss": 0.0167,
"step": 59000
},
{
"epoch": 39.05,
"learning_rate": 1.2020502645502645e-06,
"loss": 0.015,
"step": 59050
},
{
"epoch": 39.09,
"learning_rate": 1.1607142857142857e-06,
"loss": 0.0161,
"step": 59100
},
{
"epoch": 39.12,
"learning_rate": 1.1193783068783068e-06,
"loss": 0.015,
"step": 59150
},
{
"epoch": 39.15,
"learning_rate": 1.078042328042328e-06,
"loss": 0.008,
"step": 59200
},
{
"epoch": 39.19,
"learning_rate": 1.0367063492063493e-06,
"loss": 0.0118,
"step": 59250
},
{
"epoch": 39.22,
"learning_rate": 9.953703703703704e-07,
"loss": 0.0089,
"step": 59300
},
{
"epoch": 39.25,
"learning_rate": 9.540343915343916e-07,
"loss": 0.0153,
"step": 59350
},
{
"epoch": 39.29,
"learning_rate": 9.126984126984128e-07,
"loss": 0.0199,
"step": 59400
},
{
"epoch": 39.32,
"learning_rate": 8.713624338624338e-07,
"loss": 0.0106,
"step": 59450
},
{
"epoch": 39.35,
"learning_rate": 8.300264550264551e-07,
"loss": 0.0218,
"step": 59500
},
{
"epoch": 39.38,
"learning_rate": 7.886904761904763e-07,
"loss": 0.0187,
"step": 59550
},
{
"epoch": 39.42,
"learning_rate": 7.473544973544974e-07,
"loss": 0.0119,
"step": 59600
},
{
"epoch": 39.45,
"learning_rate": 7.060185185185186e-07,
"loss": 0.0147,
"step": 59650
},
{
"epoch": 39.48,
"learning_rate": 6.646825396825397e-07,
"loss": 0.0134,
"step": 59700
},
{
"epoch": 39.52,
"learning_rate": 6.233465608465609e-07,
"loss": 0.01,
"step": 59750
},
{
"epoch": 39.55,
"learning_rate": 5.82010582010582e-07,
"loss": 0.0108,
"step": 59800
},
{
"epoch": 39.58,
"learning_rate": 5.406746031746032e-07,
"loss": 0.0164,
"step": 59850
},
{
"epoch": 39.62,
"learning_rate": 4.993386243386244e-07,
"loss": 0.0118,
"step": 59900
},
{
"epoch": 39.65,
"learning_rate": 4.580026455026455e-07,
"loss": 0.0158,
"step": 59950
},
{
"epoch": 39.68,
"learning_rate": 4.1666666666666667e-07,
"loss": 0.0124,
"step": 60000
},
{
"epoch": 39.72,
"learning_rate": 3.753306878306878e-07,
"loss": 0.0215,
"step": 60050
},
{
"epoch": 39.75,
"learning_rate": 3.3399470899470903e-07,
"loss": 0.0101,
"step": 60100
},
{
"epoch": 39.78,
"learning_rate": 2.926587301587302e-07,
"loss": 0.0315,
"step": 60150
},
{
"epoch": 39.81,
"learning_rate": 2.5132275132275135e-07,
"loss": 0.0247,
"step": 60200
},
{
"epoch": 39.85,
"learning_rate": 2.099867724867725e-07,
"loss": 0.0135,
"step": 60250
},
{
"epoch": 39.88,
"learning_rate": 1.6865079365079366e-07,
"loss": 0.0141,
"step": 60300
},
{
"epoch": 39.91,
"learning_rate": 1.2731481481481484e-07,
"loss": 0.0165,
"step": 60350
},
{
"epoch": 39.95,
"learning_rate": 8.597883597883599e-08,
"loss": 0.0148,
"step": 60400
},
{
"epoch": 39.98,
"learning_rate": 4.4642857142857145e-08,
"loss": 0.0104,
"step": 60450
},
{
"epoch": 40.0,
"eval_cer": 0.24820990917391514,
"eval_loss": 2.2275092601776123,
"eval_mer": 0.2921061041866411,
"eval_runtime": 95.499,
"eval_samples_per_second": 11.801,
"eval_steps_per_second": 1.476,
"step": 60480
},
{
"epoch": 40.0,
"step": 60480,
"total_flos": 5.93018274691843e+19,
"train_loss": 0.1760966865744974,
"train_runtime": 18436.4964,
"train_samples_per_second": 26.231,
"train_steps_per_second": 3.28
}
],
"max_steps": 60480,
"num_train_epochs": 40,
"total_flos": 5.93018274691843e+19,
"trial_name": null,
"trial_params": null
}